aboutsummaryrefslogtreecommitdiff
path: root/llvm
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2022-07-03 14:10:23 +0000
committerDimitry Andric <dim@FreeBSD.org>2022-07-03 14:10:23 +0000
commit145449b1e420787bb99721a429341fa6be3adfb6 (patch)
tree1d56ae694a6de602e348dd80165cf881a36600ed /llvm
parentecbca9f5fb7d7613d2b94982c4825eb0d33d6842 (diff)
downloadsrc-145449b1e420787bb99721a429341fa6be3adfb6.tar.gz
src-145449b1e420787bb99721a429341fa6be3adfb6.zip
Diffstat (limited to 'llvm')
-rw-r--r--llvm/include/llvm-c/Core.h46
-rw-r--r--llvm/include/llvm-c/DisassemblerTypes.h16
-rw-r--r--llvm/include/llvm-c/Object.h32
-rw-r--r--llvm/include/llvm-c/Orc.h110
-rw-r--r--llvm/include/llvm-c/TargetMachine.h4
-rw-r--r--llvm/include/llvm-c/Transforms/Coroutines.h56
-rw-r--r--llvm/include/llvm-c/Transforms/IPO.h3
-rw-r--r--llvm/include/llvm-c/Transforms/PassManagerBuilder.h6
-rw-r--r--llvm/include/llvm-c/Transforms/Scalar.h3
-rw-r--r--llvm/include/llvm-c/blake3.h79
-rw-r--r--llvm/include/llvm/ADT/APFloat.h3
-rw-r--r--llvm/include/llvm/ADT/APInt.h58
-rw-r--r--llvm/include/llvm/ADT/AddressRanges.h79
-rw-r--r--llvm/include/llvm/ADT/ArrayRef.h41
-rw-r--r--llvm/include/llvm/ADT/BitmaskEnum.h12
-rw-r--r--llvm/include/llvm/ADT/BreadthFirstIterator.h2
-rw-r--r--llvm/include/llvm/ADT/DenseMap.h1
-rw-r--r--llvm/include/llvm/ADT/EpochTracker.h4
-rw-r--r--llvm/include/llvm/ADT/EquivalenceClasses.h3
-rw-r--r--llvm/include/llvm/ADT/FloatingPointMode.h28
-rw-r--r--llvm/include/llvm/ADT/FoldingSet.h55
-rw-r--r--llvm/include/llvm/ADT/GenericCycleImpl.h54
-rw-r--r--llvm/include/llvm/ADT/GenericCycleInfo.h18
-rw-r--r--llvm/include/llvm/ADT/IntervalMap.h34
-rw-r--r--llvm/include/llvm/ADT/IntrusiveRefCntPtr.h4
-rw-r--r--llvm/include/llvm/ADT/Optional.h195
-rw-r--r--llvm/include/llvm/ADT/PointerIntPair.h10
-rw-r--r--llvm/include/llvm/ADT/PointerSumType.h5
-rw-r--r--llvm/include/llvm/ADT/PointerUnion.h74
-rw-r--r--llvm/include/llvm/ADT/SCCIterator.h11
-rw-r--r--llvm/include/llvm/ADT/STLExtras.h56
-rw-r--r--llvm/include/llvm/ADT/SmallVector.h5
-rw-r--r--llvm/include/llvm/ADT/Statistic.h38
-rw-r--r--llvm/include/llvm/ADT/StringRef.h4
-rw-r--r--llvm/include/llvm/ADT/Triple.h91
-rw-r--r--llvm/include/llvm/ADT/edit_distance.h38
-rw-r--r--llvm/include/llvm/Analysis/AliasAnalysis.h12
-rw-r--r--llvm/include/llvm/Analysis/AliasAnalysisEvaluator.h4
-rw-r--r--llvm/include/llvm/Analysis/AliasSetTracker.h7
-rw-r--r--llvm/include/llvm/Analysis/AssumeBundleQueries.h6
-rw-r--r--llvm/include/llvm/Analysis/BasicAliasAnalysis.h2
-rw-r--r--llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h24
-rw-r--r--llvm/include/llvm/Analysis/BranchProbabilityInfo.h2
-rw-r--r--llvm/include/llvm/Analysis/CFGPrinter.h4
-rw-r--r--llvm/include/llvm/Analysis/CFLAliasAnalysisUtils.h2
-rw-r--r--llvm/include/llvm/Analysis/CFLAndersAliasAnalysis.h2
-rw-r--r--llvm/include/llvm/Analysis/CFLSteensAliasAnalysis.h2
-rw-r--r--llvm/include/llvm/Analysis/CGSCCPassManager.h24
-rw-r--r--llvm/include/llvm/Analysis/CallGraph.h5
-rw-r--r--llvm/include/llvm/Analysis/CallPrinter.h14
-rw-r--r--llvm/include/llvm/Analysis/CaptureTracking.h35
-rw-r--r--llvm/include/llvm/Analysis/CmpInstAnalysis.h37
-rw-r--r--llvm/include/llvm/Analysis/CodeMetrics.h7
-rw-r--r--llvm/include/llvm/Analysis/ConstantFolding.h36
-rw-r--r--llvm/include/llvm/Analysis/ConstraintSystem.h19
-rw-r--r--llvm/include/llvm/Analysis/DDG.h4
-rw-r--r--llvm/include/llvm/Analysis/DDGPrinter.h3
-rw-r--r--llvm/include/llvm/Analysis/DOTGraphTraitsPass.h195
-rw-r--r--llvm/include/llvm/Analysis/Delinearization.h15
-rw-r--r--llvm/include/llvm/Analysis/DependenceAnalysis.h6
-rw-r--r--llvm/include/llvm/Analysis/DivergenceAnalysis.h8
-rw-r--r--llvm/include/llvm/Analysis/DomPrinter.h118
-rw-r--r--llvm/include/llvm/Analysis/DomTreeUpdater.h43
-rw-r--r--llvm/include/llvm/Analysis/DominanceFrontierImpl.h1
-rw-r--r--llvm/include/llvm/Analysis/EHPersonalities.h1
-rw-r--r--llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h49
-rw-r--r--llvm/include/llvm/Analysis/GlobalsModRef.h11
-rw-r--r--llvm/include/llvm/Analysis/IRSimilarityIdentifier.h41
-rw-r--r--llvm/include/llvm/Analysis/IVDescriptors.h52
-rw-r--r--llvm/include/llvm/Analysis/IVUsers.h2
-rw-r--r--llvm/include/llvm/Analysis/InlineAdvisor.h51
-rw-r--r--llvm/include/llvm/Analysis/InlineCost.h14
-rw-r--r--llvm/include/llvm/Analysis/InlineModelFeatureMaps.h4
-rw-r--r--llvm/include/llvm/Analysis/InlineOrder.h99
-rw-r--r--llvm/include/llvm/Analysis/InstSimplifyFolder.h141
-rw-r--r--llvm/include/llvm/Analysis/InstructionSimplify.h94
-rw-r--r--llvm/include/llvm/Analysis/IntervalIterator.h3
-rw-r--r--llvm/include/llvm/Analysis/LazyCallGraph.h11
-rw-r--r--llvm/include/llvm/Analysis/LazyValueInfo.h3
-rw-r--r--llvm/include/llvm/Analysis/Loads.h4
-rw-r--r--llvm/include/llvm/Analysis/LoopAccessAnalysis.h91
-rw-r--r--llvm/include/llvm/Analysis/LoopAnalysisManager.h1
-rw-r--r--llvm/include/llvm/Analysis/LoopCacheAnalysis.h24
-rw-r--r--llvm/include/llvm/Analysis/LoopInfo.h20
-rw-r--r--llvm/include/llvm/Analysis/LoopInfoImpl.h15
-rw-r--r--llvm/include/llvm/Analysis/LoopPass.h3
-rw-r--r--llvm/include/llvm/Analysis/LoopUnrollAnalyzer.h7
-rw-r--r--llvm/include/llvm/Analysis/MLInlineAdvisor.h36
-rw-r--r--llvm/include/llvm/Analysis/MLModelRunner.h21
-rw-r--r--llvm/include/llvm/Analysis/MemoryBuiltins.h33
-rw-r--r--llvm/include/llvm/Analysis/MemoryLocation.h1
-rw-r--r--llvm/include/llvm/Analysis/MemorySSA.h48
-rw-r--r--llvm/include/llvm/Analysis/MemorySSAUpdater.h3
-rw-r--r--llvm/include/llvm/Analysis/ModelUnderTrainingRunner.h7
-rw-r--r--llvm/include/llvm/Analysis/ModuleDebugInfoPrinter.h2
-rw-r--r--llvm/include/llvm/Analysis/MustExecute.h2
-rw-r--r--llvm/include/llvm/Analysis/NoInferenceModelRunner.h12
-rw-r--r--llvm/include/llvm/Analysis/ObjCARCUtil.h4
-rw-r--r--llvm/include/llvm/Analysis/OverflowInstAnalysis.h4
-rw-r--r--llvm/include/llvm/Analysis/PhiValues.h1
-rw-r--r--llvm/include/llvm/Analysis/PostDominators.h5
-rw-r--r--llvm/include/llvm/Analysis/ProfileSummaryInfo.h4
-rw-r--r--llvm/include/llvm/Analysis/PtrUseVisitor.h11
-rw-r--r--llvm/include/llvm/Analysis/RegionInfo.h6
-rw-r--r--llvm/include/llvm/Analysis/RegionInfoImpl.h4
-rw-r--r--llvm/include/llvm/Analysis/RegionIterator.h2
-rw-r--r--llvm/include/llvm/Analysis/RegionPass.h3
-rw-r--r--llvm/include/llvm/Analysis/RegionPrinter.h10
-rw-r--r--llvm/include/llvm/Analysis/ReleaseModeModelRunner.h44
-rw-r--r--llvm/include/llvm/Analysis/ReplayInlineAdvisor.h14
-rw-r--r--llvm/include/llvm/Analysis/ScalarEvolution.h137
-rw-r--r--llvm/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h7
-rw-r--r--llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h9
-rw-r--r--llvm/include/llvm/Analysis/ScalarEvolutionNormalization.h2
-rw-r--r--llvm/include/llvm/Analysis/ScalarFuncs.def117
-rw-r--r--llvm/include/llvm/Analysis/SparsePropagation.h1
-rw-r--r--llvm/include/llvm/Analysis/StackLifetime.h3
-rw-r--r--llvm/include/llvm/Analysis/SyncDependenceAnalysis.h6
-rw-r--r--llvm/include/llvm/Analysis/SyntheticCountsUtils.h2
-rw-r--r--llvm/include/llvm/Analysis/TargetFolder.h162
-rw-r--r--llvm/include/llvm/Analysis/TargetLibraryInfo.h14
-rw-r--r--llvm/include/llvm/Analysis/TargetTransformInfo.h139
-rw-r--r--llvm/include/llvm/Analysis/TargetTransformInfoImpl.h78
-rw-r--r--llvm/include/llvm/Analysis/TensorSpec.h132
-rw-r--r--llvm/include/llvm/Analysis/TypeMetadataUtils.h2
-rw-r--r--llvm/include/llvm/Analysis/Utils/TFUtils.h102
-rw-r--r--llvm/include/llvm/Analysis/ValueLattice.h6
-rw-r--r--llvm/include/llvm/Analysis/ValueTracking.h40
-rw-r--r--llvm/include/llvm/Analysis/VectorUtils.h30
-rw-r--r--llvm/include/llvm/AsmParser/LLLexer.h2
-rw-r--r--llvm/include/llvm/AsmParser/LLParser.h23
-rw-r--r--llvm/include/llvm/AsmParser/LLToken.h98
-rw-r--r--llvm/include/llvm/AsmParser/Parser.h4
-rw-r--r--llvm/include/llvm/BinaryFormat/COFF.h5
-rw-r--r--llvm/include/llvm/BinaryFormat/DXContainer.h131
-rw-r--r--llvm/include/llvm/BinaryFormat/Dwarf.h4
-rw-r--r--llvm/include/llvm/BinaryFormat/DynamicTags.def1
-rw-r--r--llvm/include/llvm/BinaryFormat/ELF.h90
-rw-r--r--llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def62
-rw-r--r--llvm/include/llvm/BinaryFormat/GOFF.h33
-rw-r--r--llvm/include/llvm/BinaryFormat/MachO.h45
-rw-r--r--llvm/include/llvm/BinaryFormat/Magic.h3
-rw-r--r--llvm/include/llvm/BinaryFormat/Swift.def7
-rw-r--r--llvm/include/llvm/BinaryFormat/Wasm.h22
-rw-r--r--llvm/include/llvm/BinaryFormat/XCOFF.h30
-rw-r--r--llvm/include/llvm/Bitcode/BitcodeAnalyzer.h3
-rw-r--r--llvm/include/llvm/Bitcode/BitcodeReader.h7
-rw-r--r--llvm/include/llvm/Bitcode/BitcodeWriter.h2
-rw-r--r--llvm/include/llvm/Bitcode/BitcodeWriterPass.h1
-rw-r--r--llvm/include/llvm/Bitcode/LLVMBitCodes.h27
-rw-r--r--llvm/include/llvm/Bitstream/BitCodeEnums.h90
-rw-r--r--llvm/include/llvm/Bitstream/BitCodes.h71
-rw-r--r--llvm/include/llvm/Bitstream/BitstreamReader.h50
-rw-r--r--llvm/include/llvm/Bitstream/BitstreamWriter.h21
-rw-r--r--llvm/include/llvm/CodeGen/AccelTable.h8
-rw-r--r--llvm/include/llvm/CodeGen/Analysis.h5
-rw-r--r--llvm/include/llvm/CodeGen/AsmPrinter.h36
-rw-r--r--llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h109
-rw-r--r--llvm/include/llvm/CodeGen/BasicTTIImpl.h248
-rw-r--r--llvm/include/llvm/CodeGen/CFIFixup.h38
-rw-r--r--llvm/include/llvm/CodeGen/CalcSpillWeights.h12
-rw-r--r--llvm/include/llvm/CodeGen/CallingConvLower.h2
-rw-r--r--llvm/include/llvm/CodeGen/CodeGenCommonISel.h8
-rw-r--r--llvm/include/llvm/CodeGen/CodeGenPassBuilder.h11
-rw-r--r--llvm/include/llvm/CodeGen/CommandFlags.h14
-rw-r--r--llvm/include/llvm/CodeGen/DFAPacketizer.h3
-rw-r--r--llvm/include/llvm/CodeGen/DbgEntityHistoryCalculator.h2
-rw-r--r--llvm/include/llvm/CodeGen/DwarfStringPoolEntry.h94
-rw-r--r--llvm/include/llvm/CodeGen/FastISel.h11
-rw-r--r--llvm/include/llvm/CodeGen/FaultMaps.h1
-rw-r--r--llvm/include/llvm/CodeGen/FunctionLoweringInfo.h4
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/CSEMIRBuilder.h2
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h5
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/Combiner.h1
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h46
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/GISelWorkList.h12
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h32
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h9
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/InstructionSelect.h4
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h9
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h5
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h8
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h8
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h20
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h38
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h13
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/Localizer.h5
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h42
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h40
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/RegBankSelect.h2
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/Utils.h61
-rw-r--r--llvm/include/llvm/CodeGen/ISDOpcodes.h77
-rw-r--r--llvm/include/llvm/CodeGen/IntrinsicLowering.h2
-rw-r--r--llvm/include/llvm/CodeGen/LazyMachineBlockFrequencyInfo.h2
-rw-r--r--llvm/include/llvm/CodeGen/LiveInterval.h14
-rw-r--r--llvm/include/llvm/CodeGen/LiveIntervalUnion.h14
-rw-r--r--llvm/include/llvm/CodeGen/LiveIntervals.h2
-rw-r--r--llvm/include/llvm/CodeGen/LivePhysRegs.h2
-rw-r--r--llvm/include/llvm/CodeGen/LiveRangeCalc.h1
-rw-r--r--llvm/include/llvm/CodeGen/LiveRangeEdit.h10
-rw-r--r--llvm/include/llvm/CodeGen/LiveRegMatrix.h12
-rw-r--r--llvm/include/llvm/CodeGen/LiveStacks.h6
-rw-r--r--llvm/include/llvm/CodeGen/LiveVariables.h1
-rw-r--r--llvm/include/llvm/CodeGen/MIRFSDiscriminator.h21
-rw-r--r--llvm/include/llvm/CodeGen/MIRParser/MIRParser.h11
-rw-r--r--llvm/include/llvm/CodeGen/MIRSampleProfile.h28
-rw-r--r--llvm/include/llvm/CodeGen/MIRYamlMapping.h19
-rw-r--r--llvm/include/llvm/CodeGen/MachineBasicBlock.h35
-rw-r--r--llvm/include/llvm/CodeGen/MachineBranchProbabilityInfo.h2
-rw-r--r--llvm/include/llvm/CodeGen/MachineCombinerPattern.h4
-rw-r--r--llvm/include/llvm/CodeGen/MachineCycleAnalysis.h26
-rw-r--r--llvm/include/llvm/CodeGen/MachineDominators.h5
-rw-r--r--llvm/include/llvm/CodeGen/MachineFrameInfo.h34
-rw-r--r--llvm/include/llvm/CodeGen/MachineFunction.h67
-rw-r--r--llvm/include/llvm/CodeGen/MachineInstr.h45
-rw-r--r--llvm/include/llvm/CodeGen/MachineLoopInfo.h1
-rw-r--r--llvm/include/llvm/CodeGen/MachineMemOperand.h3
-rw-r--r--llvm/include/llvm/CodeGen/MachineModuleInfo.h71
-rw-r--r--llvm/include/llvm/CodeGen/MachineOperand.h15
-rw-r--r--llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h3
-rw-r--r--llvm/include/llvm/CodeGen/MachineOutliner.h138
-rw-r--r--llvm/include/llvm/CodeGen/MachinePassManager.h6
-rw-r--r--llvm/include/llvm/CodeGen/MachinePassRegistry.def3
-rw-r--r--llvm/include/llvm/CodeGen/MachinePipeliner.h20
-rw-r--r--llvm/include/llvm/CodeGen/MachineRegisterInfo.h33
-rw-r--r--llvm/include/llvm/CodeGen/MachineSSAContext.h10
-rw-r--r--llvm/include/llvm/CodeGen/MachineScheduler.h4
-rw-r--r--llvm/include/llvm/CodeGen/MachineStableHash.h4
-rw-r--r--llvm/include/llvm/CodeGen/ModuloSchedule.h7
-rw-r--r--llvm/include/llvm/CodeGen/PBQP/ReductionRules.h2
-rw-r--r--llvm/include/llvm/CodeGen/Passes.h17
-rw-r--r--llvm/include/llvm/CodeGen/PseudoSourceValue.h19
-rw-r--r--llvm/include/llvm/CodeGen/RDFGraph.h1
-rw-r--r--llvm/include/llvm/CodeGen/RegAllocPBQP.h17
-rw-r--r--llvm/include/llvm/CodeGen/Register.h2
-rw-r--r--llvm/include/llvm/CodeGen/RegisterBank.h (renamed from llvm/include/llvm/CodeGen/GlobalISel/RegisterBank.h)6
-rw-r--r--llvm/include/llvm/CodeGen/RegisterBankInfo.h (renamed from llvm/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h)16
-rw-r--r--llvm/include/llvm/CodeGen/RegisterClassInfo.h7
-rw-r--r--llvm/include/llvm/CodeGen/RegisterPressure.h1
-rw-r--r--llvm/include/llvm/CodeGen/RegisterScavenging.h20
-rw-r--r--llvm/include/llvm/CodeGen/RegisterUsageInfo.h2
-rw-r--r--llvm/include/llvm/CodeGen/ReplaceWithVeclib.h4
-rw-r--r--llvm/include/llvm/CodeGen/ScheduleDAG.h2
-rw-r--r--llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h2
-rw-r--r--llvm/include/llvm/CodeGen/SelectionDAG.h222
-rw-r--r--llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h2
-rw-r--r--llvm/include/llvm/CodeGen/SelectionDAGISel.h4
-rw-r--r--llvm/include/llvm/CodeGen/SelectionDAGNodes.h178
-rw-r--r--llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h4
-rw-r--r--llvm/include/llvm/CodeGen/SlotIndexes.h1
-rw-r--r--llvm/include/llvm/CodeGen/StackMaps.h2
-rw-r--r--llvm/include/llvm/CodeGen/StackProtector.h1
-rw-r--r--llvm/include/llvm/CodeGen/SwiftErrorValueTracking.h2
-rw-r--r--llvm/include/llvm/CodeGen/TailDuplicator.h5
-rw-r--r--llvm/include/llvm/CodeGen/TargetCallingConv.h3
-rw-r--r--llvm/include/llvm/CodeGen/TargetFrameLowering.h12
-rw-r--r--llvm/include/llvm/CodeGen/TargetInstrInfo.h26
-rw-r--r--llvm/include/llvm/CodeGen/TargetLowering.h284
-rw-r--r--llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h11
-rw-r--r--llvm/include/llvm/CodeGen/TargetPassConfig.h3
-rw-r--r--llvm/include/llvm/CodeGen/TargetRegisterInfo.h47
-rw-r--r--llvm/include/llvm/CodeGen/TargetSubtargetInfo.h10
-rw-r--r--llvm/include/llvm/CodeGen/TileShapeInfo.h4
-rw-r--r--llvm/include/llvm/CodeGen/ValueTypes.h7
-rw-r--r--llvm/include/llvm/CodeGen/ValueTypes.td403
-rw-r--r--llvm/include/llvm/DWARFLinker/DWARFLinker.h61
-rw-r--r--llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h8
-rw-r--r--llvm/include/llvm/DWARFLinker/DWARFLinkerDeclContext.h10
-rw-r--r--llvm/include/llvm/DWARFLinker/DWARFStreamer.h3
-rw-r--r--llvm/include/llvm/DWP/DWPStringPool.h2
-rw-r--r--llvm/include/llvm/DebugInfo/CodeView/AppendingTypeTableBuilder.h2
-rw-r--r--llvm/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h10
-rw-r--r--llvm/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h5
-rw-r--r--llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h10
-rw-r--r--llvm/include/llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h10
-rw-r--r--llvm/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h4
-rw-r--r--llvm/include/llvm/DebugInfo/CodeView/DebugCrossExSubsection.h3
-rw-r--r--llvm/include/llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h3
-rw-r--r--llvm/include/llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h6
-rw-r--r--llvm/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h1
-rw-r--r--llvm/include/llvm/DebugInfo/CodeView/DebugLinesSubsection.h3
-rw-r--r--llvm/include/llvm/DebugInfo/CodeView/DebugSubsection.h6
-rw-r--r--llvm/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h1
-rw-r--r--llvm/include/llvm/DebugInfo/CodeView/EnumTables.h2
-rw-r--r--llvm/include/llvm/DebugInfo/CodeView/Formatters.h2
-rw-r--r--llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h4
-rw-r--r--llvm/include/llvm/DebugInfo/CodeView/Line.h1
-rw-r--r--llvm/include/llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h6
-rw-r--r--llvm/include/llvm/DebugInfo/CodeView/RecordName.h7
-rw-r--r--llvm/include/llvm/DebugInfo/CodeView/RecordSerialization.h3
-rw-r--r--llvm/include/llvm/DebugInfo/CodeView/StringsAndChecksums.h6
-rw-r--r--llvm/include/llvm/DebugInfo/CodeView/SymbolDumper.h8
-rw-r--r--llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h4
-rw-r--r--llvm/include/llvm/DebugInfo/CodeView/SymbolSerializer.h4
-rw-r--r--llvm/include/llvm/DebugInfo/CodeView/TypeCollection.h2
-rw-r--r--llvm/include/llvm/DebugInfo/CodeView/TypeDumpVisitor.h10
-rw-r--r--llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h6
-rw-r--r--llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h2
-rw-r--r--llvm/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h6
-rw-r--r--llvm/include/llvm/DebugInfo/CodeView/TypeRecordMapping.h5
-rw-r--r--llvm/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h3
-rw-r--r--llvm/include/llvm/DebugInfo/DIContext.h10
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h4
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFAddressRange.h3
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h7
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h36
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h5
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h3
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h7
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h8
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h2
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h8
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h9
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h1
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h5
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h6
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h11
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h9
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h5
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h14
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h2
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h1
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFRelocMap.h1
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFTypePrinter.h67
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h3
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h30
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h21
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h28
-rw-r--r--llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h2
-rw-r--r--llvm/include/llvm/DebugInfo/GSYM/ExtractRanges.h81
-rw-r--r--llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h17
-rw-r--r--llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h2
-rw-r--r--llvm/include/llvm/DebugInfo/GSYM/InlineInfo.h3
-rw-r--r--llvm/include/llvm/DebugInfo/GSYM/LineEntry.h2
-rw-r--r--llvm/include/llvm/DebugInfo/GSYM/LookupResult.h2
-rw-r--r--llvm/include/llvm/DebugInfo/GSYM/Range.h130
-rw-r--r--llvm/include/llvm/DebugInfo/GSYM/StringTable.h2
-rw-r--r--llvm/include/llvm/DebugInfo/MSF/MSFBuilder.h4
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h1
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h6
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h10
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/DbiStream.h14
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h22
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/EnumTables.h2
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/FormatUtil.h (renamed from llvm/tools/llvm-pdbutil/FormatUtil.h)14
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h14
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/GlobalsStream.h14
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h3
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/InfoStream.h10
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h8
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/InjectedSourceStream.h9
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/InputFile.h (renamed from llvm/tools/llvm-pdbutil/InputFile.h)81
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/LinePrinter.h (renamed from llvm/tools/llvm-pdbutil/LinePrinter.h)52
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h13
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h1
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumGlobals.h2
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumLineNumbers.h5
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumSymbols.h2
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumTypes.h7
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h5
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/NativeFunctionSymbol.h7
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/NativeInlineSiteSymbol.h6
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/NativeLineNumber.h4
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/NativePublicSymbol.h5
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/NativeSession.h12
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/NativeSourceFile.h5
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/NativeSymbolEnumerator.h8
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeEnum.h8
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h8
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/NativeTypePointer.h5
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeTypedef.h9
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeUDT.h8
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h6
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/PDBFile.h2
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h14
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h6
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/PublicsStream.h13
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/SymbolCache.h16
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/SymbolStream.h5
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/TpiStream.h7
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h15
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/PDBContext.h2
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/PDBSymbol.h5
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/PDBSymbolAnnotation.h1
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/PDBSymbolBlock.h2
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompilandDetails.h1
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompilandEnv.h1
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/PDBSymbolCustom.h2
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/PDBSymbolData.h6
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/PDBSymbolFunc.h11
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h2
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h1
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/PDBSymbolLabel.h1
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h1
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/PDBSymbolThunk.h1
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeArray.h1
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h6
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h1
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h1
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h1
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h8
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h1
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h1
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h1
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypePointer.h1
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h1
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h9
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h1
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h1
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/PDBSymbolUnknown.h1
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h1
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/PDBTypes.h3
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/UDTLayout.h1
-rw-r--r--llvm/include/llvm/DebugInfo/Symbolize/DIFetcher.h51
-rw-r--r--llvm/include/llvm/DebugInfo/Symbolize/Markup.h120
-rw-r--r--llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h76
-rw-r--r--llvm/include/llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h (renamed from llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h)6
-rw-r--r--llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h103
-rw-r--r--llvm/include/llvm/Debuginfod/DIFetcher.h34
-rw-r--r--llvm/include/llvm/Debuginfod/HTTPClient.h44
-rw-r--r--llvm/include/llvm/Demangle/Demangle.h4
-rw-r--r--llvm/include/llvm/Demangle/ItaniumDemangle.h2287
-rw-r--r--llvm/include/llvm/Demangle/ItaniumNodes.def95
-rw-r--r--llvm/include/llvm/Demangle/Utility.h114
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.h35
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h21
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h27
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/MemoryFlags.h10
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h339
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/riscv.h17
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h9
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/Core.h21
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/DebugUtils.h3
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h3
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h2
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h2
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h2
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h20
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h146
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/MemoryMapper.h115
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h39
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h13
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h2
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/Speculation.h9
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h7
-rw-r--r--llvm/include/llvm/FileCheck/FileCheck.h8
-rw-r--r--llvm/include/llvm/Frontend/OpenMP/OMP.td279
-rw-r--r--llvm/include/llvm/Frontend/OpenMP/OMPConstants.h129
-rw-r--r--llvm/include/llvm/Frontend/OpenMP/OMPContext.h8
-rw-r--r--llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h261
-rw-r--r--llvm/include/llvm/Frontend/OpenMP/OMPKinds.def12
-rw-r--r--llvm/include/llvm/FuzzMutate/FuzzerCLI.h27
-rw-r--r--llvm/include/llvm/FuzzMutate/IRMutator.h26
-rw-r--r--llvm/include/llvm/FuzzMutate/OpDescriptor.h6
-rw-r--r--llvm/include/llvm/FuzzMutate/RandomIRBuilder.h13
-rw-r--r--llvm/include/llvm/IR/AbstractCallSite.h6
-rw-r--r--llvm/include/llvm/IR/Argument.h1
-rw-r--r--llvm/include/llvm/IR/Assumptions.h4
-rw-r--r--llvm/include/llvm/IR/Attributes.h51
-rw-r--r--llvm/include/llvm/IR/Attributes.td23
-rw-r--r--llvm/include/llvm/IR/AutoUpgrade.h16
-rw-r--r--llvm/include/llvm/IR/BasicBlock.h9
-rw-r--r--llvm/include/llvm/IR/CFG.h1
-rw-r--r--llvm/include/llvm/IR/ConstantFold.h (renamed from llvm/lib/IR/ConstantFold.h)17
-rw-r--r--llvm/include/llvm/IR/ConstantFolder.h178
-rw-r--r--llvm/include/llvm/IR/ConstantRange.h3
-rw-r--r--llvm/include/llvm/IR/Constants.h15
-rw-r--r--llvm/include/llvm/IR/DIBuilder.h23
-rw-r--r--llvm/include/llvm/IR/DataLayout.h2
-rw-r--r--llvm/include/llvm/IR/DebugInfoMetadata.h183
-rw-r--r--llvm/include/llvm/IR/DerivedTypes.h9
-rw-r--r--llvm/include/llvm/IR/DiagnosticInfo.h20
-rw-r--r--llvm/include/llvm/IR/Dominators.h4
-rw-r--r--llvm/include/llvm/IR/FMF.h121
-rw-r--r--llvm/include/llvm/IR/FPEnv.h19
-rw-r--r--llvm/include/llvm/IR/FixedMetadataKinds.def2
-rw-r--r--llvm/include/llvm/IR/Function.h23
-rw-r--r--llvm/include/llvm/IR/GCStrategy.h17
-rw-r--r--llvm/include/llvm/IR/GlobalIFunc.h5
-rw-r--r--llvm/include/llvm/IR/GlobalObject.h5
-rw-r--r--llvm/include/llvm/IR/GlobalValue.h52
-rw-r--r--llvm/include/llvm/IR/IRBuilder.h303
-rw-r--r--llvm/include/llvm/IR/IRBuilderFolder.h71
-rw-r--r--llvm/include/llvm/IR/InlineAsm.h25
-rw-r--r--llvm/include/llvm/IR/InstVisitor.h3
-rw-r--r--llvm/include/llvm/IR/InstrTypes.h50
-rw-r--r--llvm/include/llvm/IR/Instruction.h1
-rw-r--r--llvm/include/llvm/IR/Instructions.h57
-rw-r--r--llvm/include/llvm/IR/IntrinsicInst.h71
-rw-r--r--llvm/include/llvm/IR/Intrinsics.h14
-rw-r--r--llvm/include/llvm/IR/Intrinsics.td420
-rw-r--r--llvm/include/llvm/IR/IntrinsicsAArch64.td166
-rw-r--r--llvm/include/llvm/IR/IntrinsicsAMDGPU.td528
-rw-r--r--llvm/include/llvm/IR/IntrinsicsARM.td194
-rw-r--r--llvm/include/llvm/IR/IntrinsicsBPF.td20
-rw-r--r--llvm/include/llvm/IR/IntrinsicsDirectX.td20
-rw-r--r--llvm/include/llvm/IR/IntrinsicsHexagon.td13
-rw-r--r--llvm/include/llvm/IR/IntrinsicsMips.td1342
-rw-r--r--llvm/include/llvm/IR/IntrinsicsNVVM.td1449
-rw-r--r--llvm/include/llvm/IR/IntrinsicsPowerPC.td746
-rw-r--r--llvm/include/llvm/IR/IntrinsicsRISCV.td589
-rw-r--r--llvm/include/llvm/IR/IntrinsicsSPIRV.td31
-rw-r--r--llvm/include/llvm/IR/IntrinsicsSystemZ.td56
-rw-r--r--llvm/include/llvm/IR/IntrinsicsVE.td15
-rw-r--r--llvm/include/llvm/IR/IntrinsicsVEVL.gen.td2470
-rw-r--r--llvm/include/llvm/IR/IntrinsicsWebAssembly.td22
-rw-r--r--llvm/include/llvm/IR/IntrinsicsX86.td2332
-rw-r--r--llvm/include/llvm/IR/IntrinsicsXCore.td8
-rw-r--r--llvm/include/llvm/IR/LLVMContext.h22
-rw-r--r--llvm/include/llvm/IR/LegacyPassManagers.h8
-rw-r--r--llvm/include/llvm/IR/MDBuilder.h4
-rw-r--r--llvm/include/llvm/IR/MatrixBuilder.h20
-rw-r--r--llvm/include/llvm/IR/Metadata.h158
-rw-r--r--llvm/include/llvm/IR/Module.h22
-rw-r--r--llvm/include/llvm/IR/NoFolder.h164
-rw-r--r--llvm/include/llvm/IR/Operator.h100
-rw-r--r--llvm/include/llvm/IR/PatternMatch.h116
-rw-r--r--llvm/include/llvm/IR/RuntimeLibcalls.def16
-rw-r--r--llvm/include/llvm/IR/Statepoint.h5
-rw-r--r--llvm/include/llvm/IR/Type.h18
-rw-r--r--llvm/include/llvm/IR/User.h4
-rw-r--r--llvm/include/llvm/IR/VPIntrinsics.def158
-rw-r--r--llvm/include/llvm/IR/ValueMap.h6
-rw-r--r--llvm/include/llvm/IR/VectorBuilder.h99
-rw-r--r--llvm/include/llvm/IRReader/IRReader.h4
-rw-r--r--llvm/include/llvm/InitializePasses.h38
-rw-r--r--llvm/include/llvm/InterfaceStub/ELFObjHandler.h11
-rw-r--r--llvm/include/llvm/InterfaceStub/IFSHandler.h6
-rw-r--r--llvm/include/llvm/InterfaceStub/IFSStub.h5
-rw-r--r--llvm/include/llvm/LTO/Config.h10
-rw-r--r--llvm/include/llvm/LTO/LTO.h13
-rw-r--r--llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h2
-rw-r--r--llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h7
-rw-r--r--llvm/include/llvm/LinkAllPasses.h28
-rw-r--r--llvm/include/llvm/Linker/IRMover.h7
-rw-r--r--llvm/include/llvm/MC/ConstantPools.h3
-rw-r--r--llvm/include/llvm/MC/MCAsmBackend.h8
-rw-r--r--llvm/include/llvm/MC/MCAsmInfo.h22
-rw-r--r--llvm/include/llvm/MC/MCAssembler.h19
-rw-r--r--llvm/include/llvm/MC/MCCodeView.h13
-rw-r--r--llvm/include/llvm/MC/MCContext.h1432
-rw-r--r--llvm/include/llvm/MC/MCDXContainerStreamer.h49
-rw-r--r--llvm/include/llvm/MC/MCDXContainerWriter.h45
-rw-r--r--llvm/include/llvm/MC/MCDecoderOps.h (renamed from llvm/include/llvm/MC/MCFixedLenDisassembler.h)8
-rw-r--r--llvm/include/llvm/MC/MCDirectives.h1
-rw-r--r--llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h28
-rw-r--r--llvm/include/llvm/MC/MCDisassembler/MCExternalSymbolizer.h5
-rw-r--r--llvm/include/llvm/MC/MCDisassembler/MCSymbolizer.h5
-rw-r--r--llvm/include/llvm/MC/MCDwarf.h11
-rw-r--r--llvm/include/llvm/MC/MCELFStreamer.h9
-rw-r--r--llvm/include/llvm/MC/MCFragment.h7
-rw-r--r--llvm/include/llvm/MC/MCInstrAnalysis.h3
-rw-r--r--llvm/include/llvm/MC/MCInstrDesc.h10
-rw-r--r--llvm/include/llvm/MC/MCInstrInfo.h1
-rw-r--r--llvm/include/llvm/MC/MCLinkerOptimizationHint.h2
-rw-r--r--llvm/include/llvm/MC/MCMachObjectWriter.h2
-rw-r--r--llvm/include/llvm/MC/MCObjectFileInfo.h16
-rw-r--r--llvm/include/llvm/MC/MCObjectStreamer.h8
-rw-r--r--llvm/include/llvm/MC/MCObjectWriter.h12
-rw-r--r--llvm/include/llvm/MC/MCParser/MCAsmLexer.h2
-rw-r--r--llvm/include/llvm/MC/MCParser/MCAsmParser.h6
-rw-r--r--llvm/include/llvm/MC/MCParser/MCAsmParserExtension.h3
-rw-r--r--llvm/include/llvm/MC/MCParser/MCParsedAsmOperand.h12
-rw-r--r--llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h15
-rw-r--r--llvm/include/llvm/MC/MCPseudoProbe.h16
-rw-r--r--llvm/include/llvm/MC/MCRegisterInfo.h8
-rw-r--r--llvm/include/llvm/MC/MCSPIRVObjectWriter.h40
-rw-r--r--llvm/include/llvm/MC/MCSPIRVStreamer.h50
-rw-r--r--llvm/include/llvm/MC/MCSection.h8
-rw-r--r--llvm/include/llvm/MC/MCSectionCOFF.h6
-rw-r--r--llvm/include/llvm/MC/MCSectionDXContainer.h38
-rw-r--r--llvm/include/llvm/MC/MCSectionELF.h8
-rw-r--r--llvm/include/llvm/MC/MCSectionGOFF.h15
-rw-r--r--llvm/include/llvm/MC/MCSectionMachO.h4
-rw-r--r--llvm/include/llvm/MC/MCSectionSPIRV.h41
-rw-r--r--llvm/include/llvm/MC/MCSectionWasm.h4
-rw-r--r--llvm/include/llvm/MC/MCSectionXCOFF.h29
-rw-r--r--llvm/include/llvm/MC/MCStreamer.h124
-rw-r--r--llvm/include/llvm/MC/MCSubtargetInfo.h3
-rw-r--r--llvm/include/llvm/MC/MCSymbol.h2
-rw-r--r--llvm/include/llvm/MC/MCSymbolWasm.h14
-rw-r--r--llvm/include/llvm/MC/MCSymbolXCOFF.h3
-rw-r--r--llvm/include/llvm/MC/MCTargetOptions.h21
-rw-r--r--llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h3
-rw-r--r--llvm/include/llvm/MC/MCValue.h1
-rw-r--r--llvm/include/llvm/MC/MCWin64EH.h8
-rw-r--r--llvm/include/llvm/MC/MCWinCOFFStreamer.h20
-rw-r--r--llvm/include/llvm/MC/MCWinEH.h10
-rw-r--r--llvm/include/llvm/MC/MCXCOFFStreamer.h4
-rw-r--r--llvm/include/llvm/MC/SectionKind.h7
-rw-r--r--llvm/include/llvm/MC/StringTableBuilder.h1
-rw-r--r--llvm/include/llvm/MC/SubtargetFeature.h3
-rw-r--r--llvm/include/llvm/MC/TargetRegistry.h56
-rw-r--r--llvm/include/llvm/MCA/CustomBehaviour.h5
-rw-r--r--llvm/include/llvm/MCA/IncrementalSourceMgr.h92
-rw-r--r--llvm/include/llvm/MCA/InstrBuilder.h30
-rw-r--r--llvm/include/llvm/MCA/Instruction.h45
-rw-r--r--llvm/include/llvm/MCA/Pipeline.h12
-rw-r--r--llvm/include/llvm/MCA/SourceMgr.h57
-rw-r--r--llvm/include/llvm/MCA/Stages/EntryStage.h3
-rw-r--r--llvm/include/llvm/MCA/Stages/Stage.h13
-rw-r--r--llvm/include/llvm/ObjCopy/COFF/COFFConfig.h (renamed from llvm/tools/llvm-objcopy/COFF/COFFConfig.h)6
-rw-r--r--llvm/include/llvm/ObjCopy/COFF/COFFObjcopy.h (renamed from llvm/tools/llvm-objcopy/COFF/COFFObjcopy.h)9
-rw-r--r--llvm/include/llvm/ObjCopy/CommonConfig.h (renamed from llvm/tools/llvm-objcopy/CommonConfig.h)21
-rw-r--r--llvm/include/llvm/ObjCopy/ConfigManager.h50
-rw-r--r--llvm/include/llvm/ObjCopy/ELF/ELFConfig.h (renamed from llvm/tools/llvm-objcopy/ELF/ELFConfig.h)6
-rw-r--r--llvm/include/llvm/ObjCopy/ELF/ELFObjcopy.h (renamed from llvm/tools/llvm-objcopy/ELF/ELFObjcopy.h)19
-rw-r--r--llvm/include/llvm/ObjCopy/MachO/MachOConfig.h (renamed from llvm/tools/llvm-objcopy/MachO/MachOConfig.h)9
-rw-r--r--llvm/include/llvm/ObjCopy/MachO/MachOObjcopy.h (renamed from llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h)12
-rw-r--r--llvm/include/llvm/ObjCopy/MultiFormatConfig.h (renamed from llvm/tools/llvm-objcopy/MultiFormatConfig.h)8
-rw-r--r--llvm/include/llvm/ObjCopy/ObjCopy.h42
-rw-r--r--llvm/include/llvm/ObjCopy/XCOFF/XCOFFConfig.h21
-rw-r--r--llvm/include/llvm/ObjCopy/XCOFF/XCOFFObjcopy.h35
-rw-r--r--llvm/include/llvm/ObjCopy/wasm/WasmConfig.h (renamed from llvm/tools/llvm-objcopy/wasm/WasmConfig.h)6
-rw-r--r--llvm/include/llvm/ObjCopy/wasm/WasmObjcopy.h (renamed from llvm/tools/llvm-objcopy/wasm/WasmObjcopy.h)9
-rw-r--r--llvm/include/llvm/Object/Archive.h12
-rw-r--r--llvm/include/llvm/Object/ArchiveWriter.h5
-rw-r--r--llvm/include/llvm/Object/Binary.h6
-rw-r--r--llvm/include/llvm/Object/COFF.h12
-rw-r--r--llvm/include/llvm/Object/COFFImportFile.h3
-rw-r--r--llvm/include/llvm/Object/COFFModuleDefinition.h2
-rw-r--r--llvm/include/llvm/Object/DXContainer.h124
-rw-r--r--llvm/include/llvm/Object/Decompressor.h6
-rw-r--r--llvm/include/llvm/Object/ELF.h2
-rw-r--r--llvm/include/llvm/Object/ELFObjectFile.h32
-rw-r--r--llvm/include/llvm/Object/ELFTypes.h12
-rw-r--r--llvm/include/llvm/Object/Error.h1
-rw-r--r--llvm/include/llvm/Object/IRObjectFile.h1
-rw-r--r--llvm/include/llvm/Object/MachO.h130
-rw-r--r--llvm/include/llvm/Object/MachOUniversal.h2
-rw-r--r--llvm/include/llvm/Object/MachOUniversalWriter.h13
-rw-r--r--llvm/include/llvm/Object/ObjectFile.h13
-rw-r--r--llvm/include/llvm/Object/OffloadBinary.h169
-rw-r--r--llvm/include/llvm/Object/RelocationResolver.h15
-rw-r--r--llvm/include/llvm/Object/SymbolicFile.h8
-rw-r--r--llvm/include/llvm/Object/TapiFile.h15
-rw-r--r--llvm/include/llvm/Object/TapiUniversal.h6
-rw-r--r--llvm/include/llvm/Object/Wasm.h1
-rw-r--r--llvm/include/llvm/Object/WindowsResource.h2
-rw-r--r--llvm/include/llvm/Object/XCOFFObjectFile.h15
-rw-r--r--llvm/include/llvm/ObjectYAML/DXContainerYAML.h101
-rw-r--r--llvm/include/llvm/ObjectYAML/ELFYAML.h45
-rw-r--r--llvm/include/llvm/ObjectYAML/MachOYAML.h1
-rw-r--r--llvm/include/llvm/ObjectYAML/ObjectYAML.h4
-rw-r--r--llvm/include/llvm/ObjectYAML/OffloadYAML.h79
-rw-r--r--llvm/include/llvm/ObjectYAML/WasmYAML.h22
-rw-r--r--llvm/include/llvm/ObjectYAML/yaml2obj.h11
-rw-r--r--llvm/include/llvm/Option/ArgList.h14
-rw-r--r--llvm/include/llvm/Pass.h10
-rw-r--r--llvm/include/llvm/Passes/PassBuilder.h43
-rw-r--r--llvm/include/llvm/Passes/StandardInstrumentations.h31
-rw-r--r--llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h4
-rw-r--r--llvm/include/llvm/ProfileData/GCOV.h4
-rw-r--r--llvm/include/llvm/ProfileData/InstrProf.h41
-rw-r--r--llvm/include/llvm/ProfileData/InstrProfCorrelator.h9
-rw-r--r--llvm/include/llvm/ProfileData/InstrProfData.inc4
-rw-r--r--llvm/include/llvm/ProfileData/InstrProfReader.h49
-rw-r--r--llvm/include/llvm/ProfileData/InstrProfWriter.h34
-rw-r--r--llvm/include/llvm/ProfileData/MIBEntryDef.inc47
-rw-r--r--llvm/include/llvm/ProfileData/MemProf.h613
-rw-r--r--llvm/include/llvm/ProfileData/MemProfData.inc143
-rw-r--r--llvm/include/llvm/ProfileData/RawMemProfReader.h127
-rw-r--r--llvm/include/llvm/ProfileData/SampleProf.h97
-rw-r--r--llvm/include/llvm/ProfileData/SampleProfReader.h17
-rw-r--r--llvm/include/llvm/ProfileData/SampleProfWriter.h4
-rw-r--r--llvm/include/llvm/Remarks/RemarkSerializer.h1
-rw-r--r--llvm/include/llvm/Support/AArch64TargetParser.def55
-rw-r--r--llvm/include/llvm/Support/AMDHSAKernelDescriptor.h14
-rw-r--r--llvm/include/llvm/Support/ARMBuildAttributes.h39
-rw-r--r--llvm/include/llvm/Support/ARMTargetParser.def8
-rw-r--r--llvm/include/llvm/Support/ARMWinEH.h5
-rw-r--r--llvm/include/llvm/Support/Alignment.h73
-rw-r--r--llvm/include/llvm/Support/Allocator.h7
-rw-r--r--llvm/include/llvm/Support/BLAKE3.h124
-rw-r--r--llvm/include/llvm/Support/Base64.h1
-rw-r--r--llvm/include/llvm/Support/BinaryStreamArray.h2
-rw-r--r--llvm/include/llvm/Support/BinaryStreamRef.h6
-rw-r--r--llvm/include/llvm/Support/BranchProbability.h1
-rw-r--r--llvm/include/llvm/Support/CSKYAttributeParser.h43
-rw-r--r--llvm/include/llvm/Support/CSKYAttributes.h95
-rw-r--r--llvm/include/llvm/Support/CSKYTargetParser.def524
-rw-r--r--llvm/include/llvm/Support/CSKYTargetParser.h203
-rw-r--r--llvm/include/llvm/Support/Casting.h769
-rw-r--r--llvm/include/llvm/Support/CodeGen.h36
-rw-r--r--llvm/include/llvm/Support/CommandLine.h290
-rw-r--r--llvm/include/llvm/Support/Compiler.h64
-rw-r--r--llvm/include/llvm/Support/Compression.h4
-rw-r--r--llvm/include/llvm/Support/ConvertUTF.h21
-rw-r--r--llvm/include/llvm/Support/CrashRecoveryContext.h3
-rw-r--r--llvm/include/llvm/Support/Debug.h4
-rw-r--r--llvm/include/llvm/Support/Errno.h1
-rw-r--r--llvm/include/llvm/Support/Error.h4
-rw-r--r--llvm/include/llvm/Support/ErrorHandling.h25
-rw-r--r--llvm/include/llvm/Support/FileUtilities.h21
-rw-r--r--llvm/include/llvm/Support/FormatProviders.h2
-rw-r--r--llvm/include/llvm/Support/FormatVariadic.h2
-rw-r--r--llvm/include/llvm/Support/HashBuilder.h7
-rw-r--r--llvm/include/llvm/Support/Host.h1
-rw-r--r--llvm/include/llvm/Support/KnownBits.h8
-rw-r--r--llvm/include/llvm/Support/LowLevelTypeImpl.h12
-rw-r--r--llvm/include/llvm/Support/MD5.h29
-rw-r--r--llvm/include/llvm/Support/MachineValueType.h439
-rw-r--r--llvm/include/llvm/Support/MathExtras.h52
-rw-r--r--llvm/include/llvm/Support/Parallel.h4
-rw-r--r--llvm/include/llvm/Support/Path.h1
-rw-r--r--llvm/include/llvm/Support/PluginLoader.h6
-rw-r--r--llvm/include/llvm/Support/Printable.h8
-rw-r--r--llvm/include/llvm/Support/Process.h1
-rw-r--r--llvm/include/llvm/Support/Program.h2
-rw-r--r--llvm/include/llvm/Support/RISCVISAInfo.h2
-rw-r--r--llvm/include/llvm/Support/RWMutex.h4
-rw-r--r--llvm/include/llvm/Support/SHA1.h13
-rw-r--r--llvm/include/llvm/Support/SHA256.h13
-rw-r--r--llvm/include/llvm/Support/ScopedPrinter.h7
-rw-r--r--llvm/include/llvm/Support/Signals.h1
-rw-r--r--llvm/include/llvm/Support/Signposts.h2
-rw-r--r--llvm/include/llvm/Support/SourceMgr.h30
-rw-r--r--llvm/include/llvm/Support/TargetOpcodes.def6
-rw-r--r--llvm/include/llvm/Support/TargetParser.h14
-rw-r--r--llvm/include/llvm/Support/ThreadPool.h97
-rw-r--r--llvm/include/llvm/Support/Threading.h22
-rw-r--r--llvm/include/llvm/Support/TrigramIndex.h2
-rw-r--r--llvm/include/llvm/Support/TypeSize.h19
-rw-r--r--llvm/include/llvm/Support/Unicode.h42
-rw-r--r--llvm/include/llvm/Support/VersionTuple.h14
-rw-r--r--llvm/include/llvm/Support/VirtualFileSystem.h140
-rw-r--r--llvm/include/llvm/Support/Win64EH.h36
-rw-r--r--llvm/include/llvm/Support/WithColor.h18
-rw-r--r--llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h2
-rw-r--r--llvm/include/llvm/Support/X86TargetParser.def72
-rw-r--r--llvm/include/llvm/Support/YAMLParser.h1
-rw-r--r--llvm/include/llvm/Support/YAMLTraits.h67
-rw-r--r--llvm/include/llvm/Support/circular_raw_ostream.h11
-rw-r--r--llvm/include/llvm/Support/raw_sha1_ostream.h2
-rw-r--r--llvm/include/llvm/TableGen/Parser.h34
-rw-r--r--llvm/include/llvm/TableGen/Record.h305
-rw-r--r--llvm/include/llvm/Target/CGPassBuilderOption.h1
-rw-r--r--llvm/include/llvm/Target/GenericOpcodes.td13
-rw-r--r--llvm/include/llvm/Target/GlobalISel/Combine.td73
-rw-r--r--llvm/include/llvm/Target/Target.td54
-rw-r--r--llvm/include/llvm/Target/TargetLoweringObjectFile.h3
-rw-r--r--llvm/include/llvm/Target/TargetMachine.h22
-rw-r--r--llvm/include/llvm/Target/TargetOptions.h33
-rw-r--r--llvm/include/llvm/Target/TargetSelectionDAG.td138
-rw-r--r--llvm/include/llvm/Testing/Support/SupportHelpers.h8
-rw-r--r--llvm/include/llvm/TextAPI/Symbol.h1
-rw-r--r--llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h4
-rw-r--r--llvm/include/llvm/Transforms/Coroutines.h37
-rw-r--r--llvm/include/llvm/Transforms/Coroutines/CoroCleanup.h4
-rw-r--r--llvm/include/llvm/Transforms/Coroutines/CoroConditionalWrapper.h30
-rw-r--r--llvm/include/llvm/Transforms/Coroutines/CoroEarly.h4
-rw-r--r--llvm/include/llvm/Transforms/IPO.h7
-rw-r--r--llvm/include/llvm/Transforms/IPO/AlwaysInliner.h4
-rw-r--r--llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h6
-rw-r--r--llvm/include/llvm/Transforms/IPO/Attributor.h514
-rw-r--r--llvm/include/llvm/Transforms/IPO/DeadArgumentElimination.h41
-rw-r--r--llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h3
-rw-r--r--llvm/include/llvm/Transforms/IPO/FunctionAttrs.h17
-rw-r--r--llvm/include/llvm/Transforms/IPO/GlobalDCE.h9
-rw-r--r--llvm/include/llvm/Transforms/IPO/IROutliner.h41
-rw-r--r--llvm/include/llvm/Transforms/IPO/InferFunctionAttrs.h4
-rw-r--r--llvm/include/llvm/Transforms/IPO/Inliner.h8
-rw-r--r--llvm/include/llvm/Transforms/IPO/Internalize.h1
-rw-r--r--llvm/include/llvm/Transforms/IPO/ModuleInliner.h9
-rw-r--r--llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h4
-rw-r--r--llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h13
-rw-r--r--llvm/include/llvm/Transforms/IPO/SampleContextTracker.h114
-rw-r--r--llvm/include/llvm/Transforms/IPO/SampleProfile.h2
-rw-r--r--llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h12
-rw-r--r--llvm/include/llvm/Transforms/IPO/StripDeadPrototypes.h3
-rw-r--r--llvm/include/llvm/Transforms/IPO/ThinLTOBitcodeWriter.h3
-rw-r--r--llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h5
-rw-r--r--llvm/include/llvm/Transforms/InstCombine/InstCombine.h1
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation.h39
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h110
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h45
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerOptions.h5
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/BoundsChecking.h3
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/CGProfile.h2
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/ControlHeightReduction.h1
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/DataFlowSanitizer.h2
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h12
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h4
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h7
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h11
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h3
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h6
-rw-r--r--llvm/include/llvm/Transforms/Scalar.h22
-rw-r--r--llvm/include/llvm/Transforms/Scalar/BDCE.h3
-rw-r--r--llvm/include/llvm/Transforms/Scalar/CallSiteSplitting.h3
-rw-r--r--llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h1
-rw-r--r--llvm/include/llvm/Transforms/Scalar/DCE.h3
-rw-r--r--llvm/include/llvm/Transforms/Scalar/DFAJumpThreading.h3
-rw-r--r--llvm/include/llvm/Transforms/Scalar/Float2Int.h11
-rw-r--r--llvm/include/llvm/Transforms/Scalar/GVN.h5
-rw-r--r--llvm/include/llvm/Transforms/Scalar/GuardWidening.h5
-rw-r--r--llvm/include/llvm/Transforms/Scalar/IVUsersPrinter.h8
-rw-r--r--llvm/include/llvm/Transforms/Scalar/JumpThreading.h6
-rw-r--r--llvm/include/llvm/Transforms/Scalar/LICM.h60
-rw-r--r--llvm/include/llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h8
-rw-r--r--llvm/include/llvm/Transforms/Scalar/LoopBoundSplit.h4
-rw-r--r--llvm/include/llvm/Transforms/Scalar/LoopDataPrefetch.h3
-rw-r--r--llvm/include/llvm/Transforms/Scalar/LoopDeletion.h6
-rw-r--r--llvm/include/llvm/Transforms/Scalar/LoopFlatten.h4
-rw-r--r--llvm/include/llvm/Transforms/Scalar/LoopInterchange.h5
-rw-r--r--llvm/include/llvm/Transforms/Scalar/LoopPassManager.h3
-rw-r--r--llvm/include/llvm/Transforms/Scalar/LoopPredication.h5
-rw-r--r--llvm/include/llvm/Transforms/Scalar/LoopRotation.h5
-rw-r--r--llvm/include/llvm/Transforms/Scalar/LoopSimplifyCFG.h6
-rw-r--r--llvm/include/llvm/Transforms/Scalar/LoopSink.h4
-rw-r--r--llvm/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h4
-rw-r--r--llvm/include/llvm/Transforms/Scalar/LoopVersioningLICM.h4
-rw-r--r--llvm/include/llvm/Transforms/Scalar/LowerAtomicPass.h (renamed from llvm/include/llvm/Transforms/Scalar/LowerAtomic.h)13
-rw-r--r--llvm/include/llvm/Transforms/Scalar/LowerConstantIntrinsics.h3
-rw-r--r--llvm/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h3
-rw-r--r--llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h4
-rw-r--r--llvm/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h3
-rw-r--r--llvm/include/llvm/Transforms/Scalar/PartiallyInlineLibCalls.h2
-rw-r--r--llvm/include/llvm/Transforms/Scalar/SCCP.h18
-rw-r--r--llvm/include/llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h2
-rw-r--r--llvm/include/llvm/Transforms/Scalar/Scalarizer.h21
-rw-r--r--llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h9
-rw-r--r--llvm/include/llvm/Transforms/Scalar/Sink.h3
-rw-r--r--llvm/include/llvm/Transforms/Scalar/SpeculativeExecution.h2
-rw-r--r--llvm/include/llvm/Transforms/Scalar/TLSVariableHoist.h131
-rw-r--r--llvm/include/llvm/Transforms/Scalar/TailRecursionElimination.h3
-rw-r--r--llvm/include/llvm/Transforms/Scalar/WarnMissedTransforms.h3
-rw-r--r--llvm/include/llvm/Transforms/Utils.h6
-rw-r--r--llvm/include/llvm/Transforms/Utils/AssumeBundleBuilder.h7
-rw-r--r--llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h17
-rw-r--r--llvm/include/llvm/Transforms/Utils/BreakCriticalEdges.h3
-rw-r--r--llvm/include/llvm/Transforms/Utils/BuildLibCalls.h69
-rw-r--r--llvm/include/llvm/Transforms/Utils/CallGraphUpdater.h5
-rw-r--r--llvm/include/llvm/Transforms/Utils/CallPromotionUtils.h10
-rw-r--r--llvm/include/llvm/Transforms/Utils/CanonicalizeAliases.h3
-rw-r--r--llvm/include/llvm/Transforms/Utils/CanonicalizeFreezeInLoops.h2
-rw-r--r--llvm/include/llvm/Transforms/Utils/CodeExtractor.h15
-rw-r--r--llvm/include/llvm/Transforms/Utils/CtorUtils.h8
-rw-r--r--llvm/include/llvm/Transforms/Utils/Debugify.h38
-rw-r--r--llvm/include/llvm/Transforms/Utils/EscapeEnumerator.h5
-rw-r--r--llvm/include/llvm/Transforms/Utils/Evaluator.h5
-rw-r--r--llvm/include/llvm/Transforms/Utils/FunctionComparator.h2
-rw-r--r--llvm/include/llvm/Transforms/Utils/GlobalStatus.h3
-rw-r--r--llvm/include/llvm/Transforms/Utils/InjectTLIMappings.h1
-rw-r--r--llvm/include/llvm/Transforms/Utils/Local.h20
-rw-r--r--llvm/include/llvm/Transforms/Utils/LoopUtils.h45
-rw-r--r--llvm/include/llvm/Transforms/Utils/LoopVersioning.h7
-rw-r--r--llvm/include/llvm/Transforms/Utils/LowerAtomic.h37
-rw-r--r--llvm/include/llvm/Transforms/Utils/LowerGlobalDtors.h28
-rw-r--r--llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h24
-rw-r--r--llvm/include/llvm/Transforms/Utils/MemoryTaggingSupport.h82
-rw-r--r--llvm/include/llvm/Transforms/Utils/MisExpect.h77
-rw-r--r--llvm/include/llvm/Transforms/Utils/ModuleUtils.h11
-rw-r--r--llvm/include/llvm/Transforms/Utils/NameAnonGlobals.h1
-rw-r--r--llvm/include/llvm/Transforms/Utils/PredicateInfo.h2
-rw-r--r--llvm/include/llvm/Transforms/Utils/RelLookupTableConverter.h3
-rw-r--r--llvm/include/llvm/Transforms/Utils/SCCPSolver.h42
-rw-r--r--llvm/include/llvm/Transforms/Utils/SSAUpdaterImpl.h26
-rw-r--r--llvm/include/llvm/Transforms/Utils/SampleProfileInference.h1
-rw-r--r--llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h5
-rw-r--r--llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h8
-rw-r--r--llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h24
-rw-r--r--llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h5
-rw-r--r--llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h7
-rw-r--r--llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h12
-rw-r--r--llvm/include/llvm/Transforms/Utils/SizeOpts.h1
-rw-r--r--llvm/include/llvm/Transforms/Utils/SplitModule.h2
-rw-r--r--llvm/include/llvm/Transforms/Utils/UnrollLoop.h9
-rw-r--r--llvm/include/llvm/Transforms/Vectorize/LoadStoreVectorizer.h3
-rw-r--r--llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h31
-rw-r--r--llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h4
-rw-r--r--llvm/include/llvm/WindowsDriver/MSVCPaths.h107
-rw-r--r--llvm/include/llvm/WindowsDriver/MSVCSetupApi.h523
-rw-r--r--llvm/include/llvm/module.modulemap7
-rw-r--r--llvm/lib/Analysis/AliasAnalysis.cpp25
-rw-r--r--llvm/lib/Analysis/AliasAnalysisEvaluator.cpp110
-rw-r--r--llvm/lib/Analysis/AliasSetTracker.cpp33
-rw-r--r--llvm/lib/Analysis/Analysis.cpp16
-rw-r--r--llvm/lib/Analysis/AssumeBundleQueries.cpp4
-rw-r--r--llvm/lib/Analysis/AssumptionCache.cpp4
-rw-r--r--llvm/lib/Analysis/BasicAliasAnalysis.cpp75
-rw-r--r--llvm/lib/Analysis/BlockFrequencyInfo.cpp1
-rw-r--r--llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp5
-rw-r--r--llvm/lib/Analysis/BranchProbabilityInfo.cpp17
-rw-r--r--llvm/lib/Analysis/CFG.cpp6
-rw-r--r--llvm/lib/Analysis/CFGPrinter.cpp2
-rw-r--r--llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp4
-rw-r--r--llvm/lib/Analysis/CFLGraph.h4
-rw-r--r--llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp14
-rw-r--r--llvm/lib/Analysis/CGSCCPassManager.cpp39
-rw-r--r--llvm/lib/Analysis/CallGraph.cpp4
-rw-r--r--llvm/lib/Analysis/CallGraphSCCPass.cpp7
-rw-r--r--llvm/lib/Analysis/CallPrinter.cpp106
-rw-r--r--llvm/lib/Analysis/CaptureTracking.cpp331
-rw-r--r--llvm/lib/Analysis/CmpInstAnalysis.cpp16
-rw-r--r--llvm/lib/Analysis/CodeMetrics.cpp15
-rw-r--r--llvm/lib/Analysis/ConstantFolding.cpp266
-rw-r--r--llvm/lib/Analysis/ConstraintSystem.cpp1
-rw-r--r--llvm/lib/Analysis/CostModel.cpp4
-rw-r--r--llvm/lib/Analysis/CycleAnalysis.cpp6
-rw-r--r--llvm/lib/Analysis/DDG.cpp9
-rw-r--r--llvm/lib/Analysis/DDGPrinter.cpp4
-rw-r--r--llvm/lib/Analysis/Delinearization.cpp40
-rw-r--r--llvm/lib/Analysis/DemandedBits.cpp6
-rw-r--r--llvm/lib/Analysis/DependenceAnalysis.cpp102
-rw-r--r--llvm/lib/Analysis/DependenceGraphBuilder.cpp1
-rw-r--r--llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp8
-rw-r--r--llvm/lib/Analysis/DivergenceAnalysis.cpp3
-rw-r--r--llvm/lib/Analysis/DomPrinter.cpp305
-rw-r--r--llvm/lib/Analysis/DomTreeUpdater.cpp93
-rw-r--r--llvm/lib/Analysis/DominanceFrontier.cpp1
-rw-r--r--llvm/lib/Analysis/EHPersonalities.cpp6
-rw-r--r--llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp233
-rw-r--r--llvm/lib/Analysis/GlobalsModRef.cpp51
-rw-r--r--llvm/lib/Analysis/IRSimilarityIdentifier.cpp96
-rw-r--r--llvm/lib/Analysis/IVDescriptors.cpp266
-rw-r--r--llvm/lib/Analysis/IVUsers.cpp6
-rw-r--r--llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp13
-rw-r--r--llvm/lib/Analysis/InlineAdvisor.cpp102
-rw-r--r--llvm/lib/Analysis/InlineCost.cpp178
-rw-r--r--llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp25
-rw-r--r--llvm/lib/Analysis/InstructionSimplify.cpp1310
-rw-r--r--llvm/lib/Analysis/Interval.cpp1
-rw-r--r--llvm/lib/Analysis/LazyCallGraph.cpp8
-rw-r--r--llvm/lib/Analysis/LazyValueInfo.cpp32
-rw-r--r--llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp1
-rw-r--r--llvm/lib/Analysis/Lint.cpp220
-rw-r--r--llvm/lib/Analysis/Loads.cpp9
-rw-r--r--llvm/lib/Analysis/LoopAccessAnalysis.cpp486
-rw-r--r--llvm/lib/Analysis/LoopAnalysisManager.cpp3
-rw-r--r--llvm/lib/Analysis/LoopCacheAnalysis.cpp129
-rw-r--r--llvm/lib/Analysis/LoopInfo.cpp11
-rw-r--r--llvm/lib/Analysis/LoopNestAnalysis.cpp3
-rw-r--r--llvm/lib/Analysis/LoopPass.cpp8
-rw-r--r--llvm/lib/Analysis/LoopUnrollAnalyzer.cpp11
-rw-r--r--llvm/lib/Analysis/MLInlineAdvisor.cpp141
-rw-r--r--llvm/lib/Analysis/MemDepPrinter.cpp1
-rw-r--r--llvm/lib/Analysis/MemDerefPrinter.cpp3
-rw-r--r--llvm/lib/Analysis/MemoryBuiltins.cpp524
-rw-r--r--llvm/lib/Analysis/MemoryDependenceAnalysis.cpp64
-rw-r--r--llvm/lib/Analysis/MemoryLocation.cpp2
-rw-r--r--llvm/lib/Analysis/MemorySSA.cpp36
-rw-r--r--llvm/lib/Analysis/MemorySSAUpdater.cpp23
-rw-r--r--llvm/lib/Analysis/ModelUnderTrainingRunner.cpp29
-rw-r--r--llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp2
-rw-r--r--llvm/lib/Analysis/ModuleSummaryAnalysis.cpp15
-rw-r--r--llvm/lib/Analysis/MustExecute.cpp7
-rw-r--r--llvm/lib/Analysis/NoInferenceModelRunner.cpp16
-rw-r--r--llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp2
-rw-r--r--llvm/lib/Analysis/OptimizationRemarkEmitter.cpp4
-rw-r--r--llvm/lib/Analysis/OverflowInstAnalysis.cpp1
-rw-r--r--llvm/lib/Analysis/PHITransAddr.cpp9
-rw-r--r--llvm/lib/Analysis/ProfileSummaryInfo.cpp13
-rw-r--r--llvm/lib/Analysis/PtrUseVisitor.cpp1
-rw-r--r--llvm/lib/Analysis/RegionInfo.cpp1
-rw-r--r--llvm/lib/Analysis/RegionPass.cpp8
-rw-r--r--llvm/lib/Analysis/RegionPrinter.cpp69
-rw-r--r--llvm/lib/Analysis/ReplayInlineAdvisor.cpp22
-rw-r--r--llvm/lib/Analysis/ScalarEvolution.cpp1323
-rw-r--r--llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp1
-rw-r--r--llvm/lib/Analysis/ScalarEvolutionDivision.cpp2
-rw-r--r--llvm/lib/Analysis/ScalarEvolutionNormalization.cpp1
-rw-r--r--llvm/lib/Analysis/ScopedNoAliasAA.cpp1
-rw-r--r--llvm/lib/Analysis/StackLifetime.cpp7
-rw-r--r--llvm/lib/Analysis/StackSafetyAnalysis.cpp5
-rw-r--r--llvm/lib/Analysis/StratifiedSets.h6
-rw-r--r--llvm/lib/Analysis/SyncDependenceAnalysis.cpp8
-rw-r--r--llvm/lib/Analysis/SyntheticCountsUtils.cpp7
-rw-r--r--llvm/lib/Analysis/TFUtils.cpp163
-rw-r--r--llvm/lib/Analysis/TargetLibraryInfo.cpp12
-rw-r--r--llvm/lib/Analysis/TargetTransformInfo.cpp70
-rw-r--r--llvm/lib/Analysis/TensorSpec.cpp144
-rw-r--r--llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp1
-rw-r--r--llvm/lib/Analysis/TypeMetadataUtils.cpp1
-rw-r--r--llvm/lib/Analysis/VFABIDemangling.cpp2
-rw-r--r--llvm/lib/Analysis/ValueLatticeUtils.cpp9
-rw-r--r--llvm/lib/Analysis/ValueTracking.cpp509
-rw-r--r--llvm/lib/Analysis/VectorUtils.cpp132
-rw-r--r--llvm/lib/AsmParser/LLLexer.cpp94
-rw-r--r--llvm/lib/AsmParser/LLParser.cpp225
-rw-r--r--llvm/lib/AsmParser/Parser.cpp2
-rw-r--r--llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp4
-rw-r--r--llvm/lib/BinaryFormat/COFF.cpp57
-rw-r--r--llvm/lib/BinaryFormat/Magic.cpp14
-rw-r--r--llvm/lib/BinaryFormat/Wasm.cpp29
-rw-r--r--llvm/lib/Bitcode/Reader/BitReader.cpp1
-rw-r--r--llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp16
-rw-r--r--llvm/lib/Bitcode/Reader/BitcodeReader.cpp1774
-rw-r--r--llvm/lib/Bitcode/Reader/MetadataLoader.cpp90
-rw-r--r--llvm/lib/Bitcode/Reader/MetadataLoader.h6
-rw-r--r--llvm/lib/Bitcode/Reader/ValueList.cpp195
-rw-r--r--llvm/lib/Bitcode/Reader/ValueList.h61
-rw-r--r--llvm/lib/Bitcode/Writer/BitcodeWriter.cpp90
-rw-r--r--llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp1
-rw-r--r--llvm/lib/Bitcode/Writer/ValueEnumerator.cpp136
-rw-r--r--llvm/lib/Bitstream/Reader/BitstreamReader.cpp57
-rw-r--r--llvm/lib/CodeGen/Analysis.cpp3
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AIXException.cpp19
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/ARMException.cpp15
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp3
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp4
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp460
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp12
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp11
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp123
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h70
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DIE.cpp11
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp1
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp3
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp1
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp12
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp23
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h1
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp60
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h19
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp11
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp4
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp10
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp28
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp7
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp6
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp12
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp2
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/WasmException.cpp2
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/WasmException.h5
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp19
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/WinException.cpp58
-rw-r--r--llvm/lib/CodeGen/AtomicExpandPass.cpp332
-rw-r--r--llvm/lib/CodeGen/BasicBlockSections.cpp181
-rw-r--r--llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp144
-rw-r--r--llvm/lib/CodeGen/BranchFolding.cpp8
-rw-r--r--llvm/lib/CodeGen/BranchFolding.h1
-rw-r--r--llvm/lib/CodeGen/BranchRelaxation.cpp1
-rw-r--r--llvm/lib/CodeGen/BreakFalseDeps.cpp4
-rw-r--r--llvm/lib/CodeGen/CFIFixup.cpp225
-rw-r--r--llvm/lib/CodeGen/CFIInstrInserter.cpp4
-rw-r--r--llvm/lib/CodeGen/CalcSpillWeights.cpp5
-rw-r--r--llvm/lib/CodeGen/CallingConvLower.cpp16
-rw-r--r--llvm/lib/CodeGen/CodeGen.cpp4
-rw-r--r--llvm/lib/CodeGen/CodeGenCommonISel.cpp34
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp192
-rw-r--r--llvm/lib/CodeGen/CommandFlags.cpp33
-rw-r--r--llvm/lib/CodeGen/DFAPacketizer.cpp2
-rw-r--r--llvm/lib/CodeGen/DeadMachineInstructionElim.cpp1
-rw-r--r--llvm/lib/CodeGen/DetectDeadLanes.cpp20
-rw-r--r--llvm/lib/CodeGen/EHContGuardCatchret.cpp2
-rw-r--r--llvm/lib/CodeGen/EarlyIfConversion.cpp7
-rw-r--r--llvm/lib/CodeGen/ExpandMemCmp.cpp14
-rw-r--r--llvm/lib/CodeGen/ExpandPostRAPseudos.cpp10
-rw-r--r--llvm/lib/CodeGen/ExpandReductions.cpp2
-rw-r--r--llvm/lib/CodeGen/ExpandVectorPredication.cpp27
-rw-r--r--llvm/lib/CodeGen/FEntryInserter.cpp3
-rw-r--r--llvm/lib/CodeGen/FaultMaps.cpp2
-rw-r--r--llvm/lib/CodeGen/FinalizeISel.cpp2
-rw-r--r--llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp12
-rw-r--r--llvm/lib/CodeGen/GCMetadata.cpp3
-rw-r--r--llvm/lib/CodeGen/GCRootLowering.cpp5
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp5
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp45
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CallLowering.cpp10
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Combiner.cpp8
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp313
-rw-r--r--llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp41
-rw-r--r--llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp1
-rw-r--r--llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp58
-rw-r--r--llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp10
-rw-r--r--llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp5
-rw-r--r--llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp11
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp7
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp21
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Legalizer.cpp6
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp150
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp10
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp7
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Localizer.cpp1
-rw-r--r--llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp56
-rw-r--r--llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp8
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Utils.cpp154
-rw-r--r--llvm/lib/CodeGen/GlobalMerge.cpp14
-rw-r--r--llvm/lib/CodeGen/HardwareLoops.cpp3
-rw-r--r--llvm/lib/CodeGen/IfConversion.cpp4
-rw-r--r--llvm/lib/CodeGen/IndirectBrExpandPass.cpp6
-rw-r--r--llvm/lib/CodeGen/InlineSpiller.cpp14
-rw-r--r--llvm/lib/CodeGen/InterferenceCache.h2
-rw-r--r--llvm/lib/CodeGen/InterleavedAccessPass.cpp2
-rw-r--r--llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp22
-rw-r--r--llvm/lib/CodeGen/JMCInstrumenter.cpp233
-rw-r--r--llvm/lib/CodeGen/LLVMTargetMachine.cpp38
-rw-r--r--llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp3
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp625
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h142
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp15
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h11
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp22
-rw-r--r--llvm/lib/CodeGen/LiveDebugVariables.cpp29
-rw-r--r--llvm/lib/CodeGen/LiveInterval.cpp19
-rw-r--r--llvm/lib/CodeGen/LiveIntervalCalc.cpp11
-rw-r--r--llvm/lib/CodeGen/LiveIntervalUnion.cpp15
-rw-r--r--llvm/lib/CodeGen/LiveIntervals.cpp14
-rw-r--r--llvm/lib/CodeGen/LiveRangeCalc.cpp2
-rw-r--r--llvm/lib/CodeGen/LiveRangeEdit.cpp2
-rw-r--r--llvm/lib/CodeGen/LiveRangeShrink.cpp1
-rw-r--r--llvm/lib/CodeGen/LiveRegMatrix.cpp17
-rw-r--r--llvm/lib/CodeGen/LiveStacks.cpp5
-rw-r--r--llvm/lib/CodeGen/LocalStackSlotAllocation.cpp19
-rw-r--r--llvm/lib/CodeGen/LowLevelType.cpp1
-rw-r--r--llvm/lib/CodeGen/LowerEmuTLS.cpp1
-rw-r--r--llvm/lib/CodeGen/MIRCanonicalizerPass.cpp10
-rw-r--r--llvm/lib/CodeGen/MIRFSDiscriminator.cpp7
-rw-r--r--llvm/lib/CodeGen/MIRNamerPass.cpp4
-rw-r--r--llvm/lib/CodeGen/MIRParser/MILexer.cpp3
-rw-r--r--llvm/lib/CodeGen/MIRParser/MIParser.cpp74
-rw-r--r--llvm/lib/CodeGen/MIRParser/MIRParser.cpp30
-rw-r--r--llvm/lib/CodeGen/MIRPrinter.cpp32
-rw-r--r--llvm/lib/CodeGen/MIRSampleProfile.cpp8
-rw-r--r--llvm/lib/CodeGen/MIRVRegNamerUtils.cpp1
-rw-r--r--llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp119
-rw-r--r--llvm/lib/CodeGen/MachineBasicBlock.cpp27
-rw-r--r--llvm/lib/CodeGen/MachineBlockPlacement.cpp15
-rw-r--r--llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp2
-rw-r--r--llvm/lib/CodeGen/MachineCSE.cpp6
-rw-r--r--llvm/lib/CodeGen/MachineCheckDebugify.cpp18
-rw-r--r--llvm/lib/CodeGen/MachineCombiner.cpp3
-rw-r--r--llvm/lib/CodeGen/MachineCopyPropagation.cpp426
-rw-r--r--llvm/lib/CodeGen/MachineCycleAnalysis.cpp110
-rw-r--r--llvm/lib/CodeGen/MachineDebugify.cpp3
-rw-r--r--llvm/lib/CodeGen/MachineDominanceFrontier.cpp3
-rw-r--r--llvm/lib/CodeGen/MachineDominators.cpp2
-rw-r--r--llvm/lib/CodeGen/MachineFunction.cpp114
-rw-r--r--llvm/lib/CodeGen/MachineFunctionPass.cpp1
-rw-r--r--llvm/lib/CodeGen/MachineFunctionSplitter.cpp9
-rw-r--r--llvm/lib/CodeGen/MachineInstr.cpp49
-rw-r--r--llvm/lib/CodeGen/MachineInstrBundle.cpp5
-rw-r--r--llvm/lib/CodeGen/MachineLICM.cpp20
-rw-r--r--llvm/lib/CodeGen/MachineLoopInfo.cpp5
-rw-r--r--llvm/lib/CodeGen/MachineLoopUtils.cpp20
-rw-r--r--llvm/lib/CodeGen/MachineModuleInfo.cpp218
-rw-r--r--llvm/lib/CodeGen/MachineOperand.cpp2
-rw-r--r--llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp4
-rw-r--r--llvm/lib/CodeGen/MachineOutliner.cpp53
-rw-r--r--llvm/lib/CodeGen/MachinePipeliner.cpp133
-rw-r--r--llvm/lib/CodeGen/MachineRegisterInfo.cpp16
-rw-r--r--llvm/lib/CodeGen/MachineSSAContext.cpp2
-rw-r--r--llvm/lib/CodeGen/MachineScheduler.cpp15
-rw-r--r--llvm/lib/CodeGen/MachineSink.cpp290
-rw-r--r--llvm/lib/CodeGen/MachineStableHash.cpp56
-rw-r--r--llvm/lib/CodeGen/MachineStripDebug.cpp4
-rw-r--r--llvm/lib/CodeGen/MachineVerifier.cpp86
-rw-r--r--llvm/lib/CodeGen/MacroFusion.cpp3
-rw-r--r--llvm/lib/CodeGen/ModuloSchedule.cpp88
-rw-r--r--llvm/lib/CodeGen/NonRelocatableStringpool.cpp4
-rw-r--r--llvm/lib/CodeGen/OptimizePHIs.cpp1
-rw-r--r--llvm/lib/CodeGen/PHIElimination.cpp2
-rw-r--r--llvm/lib/CodeGen/ParallelCG.cpp3
-rw-r--r--llvm/lib/CodeGen/PatchableFunction.cpp4
-rw-r--r--llvm/lib/CodeGen/PeepholeOptimizer.cpp16
-rw-r--r--llvm/lib/CodeGen/PostRAHazardRecognizer.cpp12
-rw-r--r--llvm/lib/CodeGen/PostRASchedulerList.cpp8
-rw-r--r--llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp2
-rw-r--r--llvm/lib/CodeGen/ProcessImplicitDefs.cpp11
-rw-r--r--llvm/lib/CodeGen/PrologEpilogInserter.cpp184
-rw-r--r--llvm/lib/CodeGen/PseudoProbeInserter.cpp4
-rw-r--r--llvm/lib/CodeGen/PseudoSourceValue.cpp46
-rw-r--r--llvm/lib/CodeGen/RDFGraph.cpp16
-rw-r--r--llvm/lib/CodeGen/RDFLiveness.cpp6
-rw-r--r--llvm/lib/CodeGen/ReachingDefAnalysis.cpp14
-rw-r--r--llvm/lib/CodeGen/RegAllocBase.cpp9
-rw-r--r--llvm/lib/CodeGen/RegAllocBase.h10
-rw-r--r--llvm/lib/CodeGen/RegAllocBasic.cpp33
-rw-r--r--llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp38
-rw-r--r--llvm/lib/CodeGen/RegAllocEvictionAdvisor.h48
-rw-r--r--llvm/lib/CodeGen/RegAllocFast.cpp36
-rw-r--r--llvm/lib/CodeGen/RegAllocGreedy.cpp564
-rw-r--r--llvm/lib/CodeGen/RegAllocGreedy.h187
-rw-r--r--llvm/lib/CodeGen/RegAllocPBQP.cpp1
-rw-r--r--llvm/lib/CodeGen/RegAllocScore.cpp22
-rw-r--r--llvm/lib/CodeGen/RegAllocScore.h19
-rw-r--r--llvm/lib/CodeGen/RegUsageInfoCollector.cpp5
-rw-r--r--llvm/lib/CodeGen/RegUsageInfoPropagate.cpp3
-rw-r--r--llvm/lib/CodeGen/RegisterBank.cpp (renamed from llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp)2
-rw-r--r--llvm/lib/CodeGen/RegisterBankInfo.cpp (renamed from llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp)7
-rw-r--r--llvm/lib/CodeGen/RegisterClassInfo.cpp19
-rw-r--r--llvm/lib/CodeGen/RegisterCoalescer.cpp2
-rw-r--r--llvm/lib/CodeGen/RegisterScavenging.cpp2
-rw-r--r--llvm/lib/CodeGen/RegisterUsageInfo.cpp2
-rw-r--r--llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp3
-rw-r--r--llvm/lib/CodeGen/RenameIndependentSubregs.cpp2
-rw-r--r--llvm/lib/CodeGen/ReplaceWithVeclib.cpp5
-rw-r--r--llvm/lib/CodeGen/SafeStack.cpp57
-rw-r--r--llvm/lib/CodeGen/SafeStackLayout.cpp1
-rw-r--r--llvm/lib/CodeGen/SafeStackLayout.h2
-rw-r--r--llvm/lib/CodeGen/ScheduleDAGInstrs.cpp10
-rw-r--r--llvm/lib/CodeGen/ScheduleDAGPrinter.cpp5
-rw-r--r--llvm/lib/CodeGen/SelectOptimize.cpp989
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp1969
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/FastISel.cpp40
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp28
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp22
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h3
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp262
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp24
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp388
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp92
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h13
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp46
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp936
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp14
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h1
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp42
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp19
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp5
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp4
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp860
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp491
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h26
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp12
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp36
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp28
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp59
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp888
-rw-r--r--llvm/lib/CodeGen/ShadowStackGCLowering.cpp3
-rw-r--r--llvm/lib/CodeGen/SjLjEHPrepare.cpp2
-rw-r--r--llvm/lib/CodeGen/SplitKit.cpp89
-rw-r--r--llvm/lib/CodeGen/SplitKit.h23
-rw-r--r--llvm/lib/CodeGen/StackColoring.cpp10
-rw-r--r--llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp2
-rw-r--r--llvm/lib/CodeGen/StackMaps.cpp4
-rw-r--r--llvm/lib/CodeGen/StackProtector.cpp4
-rw-r--r--llvm/lib/CodeGen/StackSlotColoring.cpp1
-rw-r--r--llvm/lib/CodeGen/TailDuplication.cpp4
-rw-r--r--llvm/lib/CodeGen/TailDuplicator.cpp23
-rw-r--r--llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp9
-rw-r--r--llvm/lib/CodeGen/TargetInstrInfo.cpp14
-rw-r--r--llvm/lib/CodeGen/TargetLoweringBase.cpp209
-rw-r--r--llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp91
-rw-r--r--llvm/lib/CodeGen/TargetOptionsImpl.cpp1
-rw-r--r--llvm/lib/CodeGen/TargetPassConfig.cpp80
-rw-r--r--llvm/lib/CodeGen/TargetRegisterInfo.cpp3
-rw-r--r--llvm/lib/CodeGen/TargetSchedule.cpp1
-rw-r--r--llvm/lib/CodeGen/TargetSubtargetInfo.cpp4
-rw-r--r--llvm/lib/CodeGen/TwoAddressInstructionPass.cpp72
-rw-r--r--llvm/lib/CodeGen/TypePromotion.cpp137
-rw-r--r--llvm/lib/CodeGen/UnreachableBlockElim.cpp14
-rw-r--r--llvm/lib/CodeGen/VLIWMachineScheduler.cpp10
-rw-r--r--llvm/lib/CodeGen/ValueTypes.cpp15
-rw-r--r--llvm/lib/CodeGen/WasmEHPrepare.cpp12
-rw-r--r--llvm/lib/CodeGen/WinEHPrepare.cpp6
-rw-r--r--llvm/lib/DWARFLinker/DWARFLinker.cpp150
-rw-r--r--llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp6
-rw-r--r--llvm/lib/DWARFLinker/DWARFLinkerDeclContext.cpp1
-rw-r--r--llvm/lib/DWARFLinker/DWARFStreamer.cpp49
-rw-r--r--llvm/lib/DWP/DWP.cpp17
-rw-r--r--llvm/lib/DebugInfo/CodeView/AppendingTypeTableBuilder.cpp9
-rw-r--r--llvm/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp75
-rw-r--r--llvm/lib/DebugInfo/CodeView/CVTypeVisitor.cpp5
-rw-r--r--llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp33
-rw-r--r--llvm/lib/DebugInfo/CodeView/ContinuationRecordBuilder.cpp8
-rw-r--r--llvm/lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp1
-rw-r--r--llvm/lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp2
-rw-r--r--llvm/lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp1
-rw-r--r--llvm/lib/DebugInfo/CodeView/DebugSubsection.cpp4
-rw-r--r--llvm/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp1
-rw-r--r--llvm/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp3
-rw-r--r--llvm/lib/DebugInfo/CodeView/DebugSymbolsSubsection.cpp1
-rw-r--r--llvm/lib/DebugInfo/CodeView/Formatters.cpp4
-rw-r--r--llvm/lib/DebugInfo/CodeView/GlobalTypeTableBuilder.cpp10
-rw-r--r--llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp5
-rw-r--r--llvm/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp11
-rw-r--r--llvm/lib/DebugInfo/CodeView/RecordName.cpp6
-rw-r--r--llvm/lib/DebugInfo/CodeView/RecordSerialization.cpp2
-rw-r--r--llvm/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp5
-rw-r--r--llvm/lib/DebugInfo/CodeView/StringsAndChecksums.cpp1
-rw-r--r--llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp4
-rw-r--r--llvm/lib/DebugInfo/CodeView/SymbolRecordHelpers.cpp2
-rw-r--r--llvm/lib/DebugInfo/CodeView/SymbolSerializer.cpp6
-rw-r--r--llvm/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp5
-rw-r--r--llvm/lib/DebugInfo/CodeView/TypeHashing.cpp3
-rw-r--r--llvm/lib/DebugInfo/CodeView/TypeIndex.cpp1
-rw-r--r--llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp38
-rw-r--r--llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp6
-rw-r--r--llvm/lib/DebugInfo/CodeView/TypeTableCollection.cpp5
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp2
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp1
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFAddressRange.cpp2
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp3
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFContext.cpp127
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp4
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp1
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp15
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp13
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp3
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp14
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp12
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp7
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFDie.cpp648
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp5
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp2
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFTypePrinter.cpp608
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp2
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp119
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp1
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp70
-rw-r--r--llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp29
-rw-r--r--llvm/lib/DebugInfo/GSYM/ExtractRanges.cpp79
-rw-r--r--llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp14
-rw-r--r--llvm/lib/DebugInfo/GSYM/GsymCreator.cpp6
-rw-r--r--llvm/lib/DebugInfo/GSYM/GsymReader.cpp2
-rw-r--r--llvm/lib/DebugInfo/GSYM/InlineInfo.cpp16
-rw-r--r--llvm/lib/DebugInfo/GSYM/LookupResult.cpp3
-rw-r--r--llvm/lib/DebugInfo/GSYM/Range.cpp123
-rw-r--r--llvm/lib/DebugInfo/MSF/MappedBlockStream.cpp1
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp12
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/DbiModuleList.cpp1
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp2
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp13
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/EnumTables.cpp1
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/FormatUtil.cpp (renamed from llvm/tools/llvm-pdbutil/FormatUtil.cpp)55
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp9
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/GlobalsStream.cpp3
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/HashTable.cpp3
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/InfoStream.cpp4
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp4
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/InjectedSourceStream.cpp2
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/InputFile.cpp (renamed from llvm/tools/llvm-pdbutil/InputFile.cpp)107
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/LinePrinter.cpp (renamed from llvm/tools/llvm-pdbutil/LinePrinter.cpp)47
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp7
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp4
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp2
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/NativeEnumGlobals.cpp4
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp4
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/NativeEnumLineNumbers.cpp8
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp5
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/NativeEnumSymbols.cpp4
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/NativeEnumTypes.cpp9
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp6
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/NativeFunctionSymbol.cpp10
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/NativeInlineSiteSymbol.cpp68
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/NativeLineNumber.cpp1
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/NativePublicSymbol.cpp5
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp1
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp26
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/NativeSourceFile.cpp2
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/NativeSymbolEnumerator.cpp4
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/NativeTypeArray.cpp9
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/NativeTypeBuiltin.cpp3
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp20
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/NativeTypeFunctionSig.cpp5
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/NativeTypePointer.cpp5
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/NativeTypeTypedef.cpp4
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/NativeTypeUDT.cpp13
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/NativeTypeVTShape.cpp5
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp1
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp21
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/PDBStringTable.cpp1
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp7
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp4
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/SymbolCache.cpp15
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/SymbolStream.cpp5
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp6
-rw-r--r--llvm/lib/DebugInfo/PDB/PDB.cpp1
-rw-r--r--llvm/lib/DebugInfo/PDB/PDBContext.cpp9
-rw-r--r--llvm/lib/DebugInfo/PDB/PDBExtras.cpp2
-rw-r--r--llvm/lib/DebugInfo/PDB/PDBSymbol.cpp2
-rw-r--r--llvm/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp2
-rw-r--r--llvm/lib/DebugInfo/PDB/PDBSymbolBlock.cpp3
-rw-r--r--llvm/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp3
-rw-r--r--llvm/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp3
-rw-r--r--llvm/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp4
-rw-r--r--llvm/lib/DebugInfo/PDB/PDBSymbolCustom.cpp3
-rw-r--r--llvm/lib/DebugInfo/PDB/PDBSymbolData.cpp3
-rw-r--r--llvm/lib/DebugInfo/PDB/PDBSymbolExe.cpp4
-rw-r--r--llvm/lib/DebugInfo/PDB/PDBSymbolFunc.cpp2
-rw-r--r--llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp3
-rw-r--r--llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp4
-rw-r--r--llvm/lib/DebugInfo/PDB/PDBSymbolLabel.cpp2
-rw-r--r--llvm/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp4
-rw-r--r--llvm/lib/DebugInfo/PDB/PDBSymbolThunk.cpp2
-rw-r--r--llvm/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp2
-rw-r--r--llvm/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp4
-rw-r--r--llvm/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp2
-rw-r--r--llvm/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp3
-rw-r--r--llvm/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp3
-rw-r--r--llvm/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp3
-rw-r--r--llvm/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp3
-rw-r--r--llvm/lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp2
-rw-r--r--llvm/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp3
-rw-r--r--llvm/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp3
-rw-r--r--llvm/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp2
-rw-r--r--llvm/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp10
-rw-r--r--llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp2
-rw-r--r--llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp3
-rw-r--r--llvm/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp3
-rw-r--r--llvm/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp3
-rw-r--r--llvm/lib/DebugInfo/PDB/UDTLayout.cpp3
-rw-r--r--llvm/lib/DebugInfo/Symbolize/DIFetcher.cpp57
-rw-r--r--llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp6
-rw-r--r--llvm/lib/DebugInfo/Symbolize/Markup.cpp202
-rw-r--r--llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp143
-rw-r--r--llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp10
-rw-r--r--llvm/lib/DebugInfo/Symbolize/Symbolize.cpp316
-rw-r--r--llvm/lib/Debuginfod/DIFetcher.cpp28
-rw-r--r--llvm/lib/Debuginfod/Debuginfod.cpp63
-rw-r--r--llvm/lib/Debuginfod/HTTPClient.cpp88
-rw-r--r--llvm/lib/Demangle/Demangle.cpp2
-rw-r--r--llvm/lib/Demangle/ItaniumDemangle.cpp58
-rw-r--r--llvm/lib/Demangle/MicrosoftDemangle.cpp37
-rw-r--r--llvm/lib/Demangle/MicrosoftDemangleNodes.cpp4
-rw-r--r--llvm/lib/Demangle/RustDemangle.cpp58
-rw-r--r--llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp5
-rw-r--r--llvm/lib/ExecutionEngine/Interpreter/Interpreter.h2
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.cpp117
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp564
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h53
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.cpp2
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp317
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp72
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp26
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/JITLink.cpp11
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp2
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp2
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp45
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp493
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp7
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/aarch64.cpp52
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/riscv.cpp4
-rw-r--r--llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp1
-rw-r--r--llvm/lib/ExecutionEngine/MCJIT/MCJIT.h3
-rw-r--r--llvm/lib/ExecutionEngine/Orc/CompileUtils.cpp1
-rw-r--r--llvm/lib/ExecutionEngine/Orc/Core.cpp76
-rw-r--r--llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp2
-rw-r--r--llvm/lib/ExecutionEngine/Orc/DebugUtils.cpp7
-rw-r--r--llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp11
-rw-r--r--llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp88
-rw-r--r--llvm/lib/ExecutionEngine/Orc/EPCDebugObjectRegistrar.cpp3
-rw-r--r--llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp11
-rw-r--r--llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp8
-rw-r--r--llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp4
-rw-r--r--llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp2
-rw-r--r--llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp16
-rw-r--r--llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp1
-rw-r--r--llvm/lib/ExecutionEngine/Orc/LLJIT.cpp56
-rw-r--r--llvm/lib/ExecutionEngine/Orc/Layer.cpp4
-rw-r--r--llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp4
-rw-r--r--llvm/lib/ExecutionEngine/Orc/LookupAndRecordAddrs.cpp4
-rw-r--r--llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp433
-rw-r--r--llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp152
-rw-r--r--llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp11
-rw-r--r--llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp14
-rw-r--r--llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp171
-rw-r--r--llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp252
-rw-r--r--llvm/lib/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.cpp4
-rw-r--r--llvm/lib/ExecutionEngine/Orc/Speculation.cpp4
-rw-r--r--llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.cpp4
-rw-r--r--llvm/lib/ExecutionEngine/Orc/TaskDispatch.cpp2
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp8
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp4
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp3
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp9
-rw-r--r--llvm/lib/ExecutionEngine/SectionMemoryManager.cpp2
-rw-r--r--llvm/lib/FileCheck/FileCheck.cpp28
-rw-r--r--llvm/lib/Frontend/OpenMP/OMPContext.cpp5
-rw-r--r--llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp1143
-rw-r--r--llvm/lib/FuzzMutate/FuzzerCLI.cpp48
-rw-r--r--llvm/lib/FuzzMutate/IRMutator.cpp56
-rw-r--r--llvm/lib/FuzzMutate/Operations.cpp17
-rw-r--r--llvm/lib/FuzzMutate/RandomIRBuilder.cpp16
-rw-r--r--llvm/lib/IR/AbstractCallSite.cpp1
-rw-r--r--llvm/lib/IR/AsmWriter.cpp32
-rw-r--r--llvm/lib/IR/Assumptions.cpp1
-rw-r--r--llvm/lib/IR/AttributeImpl.h2
-rw-r--r--llvm/lib/IR/Attributes.cpp181
-rw-r--r--llvm/lib/IR/AutoUpgrade.cpp188
-rw-r--r--llvm/lib/IR/BasicBlock.cpp6
-rw-r--r--llvm/lib/IR/BuiltinGCs.cpp2
-rw-r--r--llvm/lib/IR/ConstantFold.cpp36
-rw-r--r--llvm/lib/IR/ConstantRange.cpp77
-rw-r--r--llvm/lib/IR/Constants.cpp132
-rw-r--r--llvm/lib/IR/ConstantsContext.h37
-rw-r--r--llvm/lib/IR/Core.cpp40
-rw-r--r--llvm/lib/IR/DIBuilder.cpp22
-rw-r--r--llvm/lib/IR/DebugInfoMetadata.cpp188
-rw-r--r--llvm/lib/IR/DiagnosticHandler.cpp9
-rw-r--r--llvm/lib/IR/DiagnosticInfo.cpp11
-rw-r--r--llvm/lib/IR/Dominators.cpp1
-rw-r--r--llvm/lib/IR/FPEnv.cpp45
-rw-r--r--llvm/lib/IR/Function.cpp123
-rw-r--r--llvm/lib/IR/GVMaterializer.cpp2
-rw-r--r--llvm/lib/IR/Globals.cpp25
-rw-r--r--llvm/lib/IR/IRBuilder.cpp178
-rw-r--r--llvm/lib/IR/Instruction.cpp5
-rw-r--r--llvm/lib/IR/Instructions.cpp60
-rw-r--r--llvm/lib/IR/IntrinsicInst.cpp107
-rw-r--r--llvm/lib/IR/LLVMContext.cpp37
-rw-r--r--llvm/lib/IR/LLVMContextImpl.cpp20
-rw-r--r--llvm/lib/IR/LLVMContextImpl.h34
-rw-r--r--llvm/lib/IR/LegacyPassManager.cpp14
-rw-r--r--llvm/lib/IR/MDBuilder.cpp8
-rw-r--r--llvm/lib/IR/Mangler.cpp2
-rw-r--r--llvm/lib/IR/Metadata.cpp174
-rw-r--r--llvm/lib/IR/Module.cpp33
-rw-r--r--llvm/lib/IR/Pass.cpp10
-rw-r--r--llvm/lib/IR/ReplaceConstant.cpp1
-rw-r--r--llvm/lib/IR/SafepointIRVerifier.cpp11
-rw-r--r--llvm/lib/IR/Use.cpp4
-rw-r--r--llvm/lib/IR/User.cpp12
-rw-r--r--llvm/lib/IR/Value.cpp17
-rw-r--r--llvm/lib/IR/VectorBuilder.cpp103
-rw-r--r--llvm/lib/IR/Verifier.cpp3888
-rw-r--r--llvm/lib/InterfaceStub/ELFObjHandler.cpp139
-rw-r--r--llvm/lib/InterfaceStub/IFSHandler.cpp48
-rw-r--r--llvm/lib/InterfaceStub/IFSStub.cpp2
-rw-r--r--llvm/lib/LTO/LTO.cpp106
-rw-r--r--llvm/lib/LTO/LTOBackend.cpp46
-rw-r--r--llvm/lib/LTO/LTOCodeGenerator.cpp6
-rw-r--r--llvm/lib/LTO/LTOModule.cpp2
-rw-r--r--llvm/lib/LTO/SummaryBasedOptimizations.cpp2
-rw-r--r--llvm/lib/LTO/ThinLTOCodeGenerator.cpp51
-rw-r--r--llvm/lib/LineEditor/LineEditor.cpp4
-rw-r--r--llvm/lib/Linker/IRMover.cpp69
-rw-r--r--llvm/lib/Linker/LinkModules.cpp13
-rw-r--r--llvm/lib/MC/ConstantPools.cpp25
-rw-r--r--llvm/lib/MC/ELFObjectWriter.cpp50
-rw-r--r--llvm/lib/MC/MCAsmBackend.cpp10
-rw-r--r--llvm/lib/MC/MCAsmInfo.cpp5
-rw-r--r--llvm/lib/MC/MCAsmStreamer.cpp204
-rw-r--r--llvm/lib/MC/MCAssembler.cpp15
-rw-r--r--llvm/lib/MC/MCCodeView.cpp7
-rw-r--r--llvm/lib/MC/MCContext.cpp90
-rw-r--r--llvm/lib/MC/MCDXContainerStreamer.cpp31
-rw-r--r--llvm/lib/MC/MCDXContainerWriter.cpp143
-rw-r--r--llvm/lib/MC/MCDisassembler/Disassembler.cpp1
-rw-r--r--llvm/lib/MC/MCDisassembler/Disassembler.h2
-rw-r--r--llvm/lib/MC/MCDisassembler/MCDisassembler.cpp17
-rw-r--r--llvm/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp16
-rw-r--r--llvm/lib/MC/MCDisassembler/MCRelocationInfo.cpp2
-rw-r--r--llvm/lib/MC/MCDwarf.cpp55
-rw-r--r--llvm/lib/MC/MCELFStreamer.cpp25
-rw-r--r--llvm/lib/MC/MCExpr.cpp6
-rw-r--r--llvm/lib/MC/MCFragment.cpp2
-rw-r--r--llvm/lib/MC/MCInstPrinter.cpp1
-rw-r--r--llvm/lib/MC/MCInstrAnalysis.cpp7
-rw-r--r--llvm/lib/MC/MCInstrDesc.cpp1
-rw-r--r--llvm/lib/MC/MCMachOStreamer.cpp33
-rw-r--r--llvm/lib/MC/MCNullStreamer.cpp18
-rw-r--r--llvm/lib/MC/MCObjectFileInfo.cpp52
-rw-r--r--llvm/lib/MC/MCObjectStreamer.cpp10
-rw-r--r--llvm/lib/MC/MCObjectWriter.cpp4
-rw-r--r--llvm/lib/MC/MCParser/AsmLexer.cpp8
-rw-r--r--llvm/lib/MC/MCParser/AsmParser.cpp83
-rw-r--r--llvm/lib/MC/MCParser/COFFAsmParser.cpp45
-rw-r--r--llvm/lib/MC/MCParser/COFFMasmParser.cpp27
-rw-r--r--llvm/lib/MC/MCParser/DarwinAsmParser.cpp19
-rw-r--r--llvm/lib/MC/MCParser/ELFAsmParser.cpp33
-rw-r--r--llvm/lib/MC/MCParser/GOFFAsmParser.cpp11
-rw-r--r--llvm/lib/MC/MCParser/MCAsmLexer.cpp1
-rw-r--r--llvm/lib/MC/MCParser/MCAsmParser.cpp2
-rw-r--r--llvm/lib/MC/MCParser/MCAsmParserExtension.cpp2
-rw-r--r--llvm/lib/MC/MCParser/MasmParser.cpp139
-rw-r--r--llvm/lib/MC/MCParser/WasmAsmParser.cpp15
-rw-r--r--llvm/lib/MC/MCParser/XCOFFAsmParser.cpp9
-rw-r--r--llvm/lib/MC/MCPseudoProbe.cpp176
-rw-r--r--llvm/lib/MC/MCRegisterInfo.cpp11
-rw-r--r--llvm/lib/MC/MCSPIRVStreamer.cpp45
-rw-r--r--llvm/lib/MC/MCSchedule.cpp4
-rw-r--r--llvm/lib/MC/MCSection.cpp2
-rw-r--r--llvm/lib/MC/MCSectionCOFF.cpp12
-rw-r--r--llvm/lib/MC/MCSectionDXContainer.cpp15
-rw-r--r--llvm/lib/MC/MCSectionELF.cpp15
-rw-r--r--llvm/lib/MC/MCSectionMachO.cpp17
-rw-r--r--llvm/lib/MC/MCSectionWasm.cpp5
-rw-r--r--llvm/lib/MC/MCSectionXCOFF.cpp10
-rw-r--r--llvm/lib/MC/MCStreamer.cpp117
-rw-r--r--llvm/lib/MC/MCSymbol.cpp1
-rw-r--r--llvm/lib/MC/MCSymbolELF.cpp1
-rw-r--r--llvm/lib/MC/MCTargetOptions.cpp9
-rw-r--r--llvm/lib/MC/MCTargetOptionsCommandFlags.cpp19
-rw-r--r--llvm/lib/MC/MCWasmStreamer.cpp18
-rw-r--r--llvm/lib/MC/MCWin64EH.cpp1320
-rw-r--r--llvm/lib/MC/MCWinCOFFStreamer.cpp43
-rw-r--r--llvm/lib/MC/MCWinEH.cpp9
-rw-r--r--llvm/lib/MC/MCXCOFFStreamer.cpp5
-rw-r--r--llvm/lib/MC/MachObjectWriter.cpp25
-rw-r--r--llvm/lib/MC/SPIRVObjectWriter.cpp76
-rw-r--r--llvm/lib/MC/SubtargetFeature.cpp4
-rw-r--r--llvm/lib/MC/TargetRegistry.cpp4
-rw-r--r--llvm/lib/MC/WasmObjectWriter.cpp135
-rw-r--r--llvm/lib/MC/WinCOFFObjectWriter.cpp51
-rw-r--r--llvm/lib/MC/XCOFFObjectWriter.cpp480
-rw-r--r--llvm/lib/MCA/CustomBehaviour.cpp2
-rw-r--r--llvm/lib/MCA/HardwareUnits/LSUnit.cpp32
-rw-r--r--llvm/lib/MCA/IncrementalSourceMgr.cpp51
-rw-r--r--llvm/lib/MCA/InstrBuilder.cpp96
-rw-r--r--llvm/lib/MCA/Instruction.cpp12
-rw-r--r--llvm/lib/MCA/Pipeline.cpp15
-rw-r--r--llvm/lib/MCA/Stages/DispatchStage.cpp6
-rw-r--r--llvm/lib/MCA/Stages/EntryStage.cpp23
-rw-r--r--llvm/lib/MCA/Stages/ExecuteStage.cpp4
-rw-r--r--llvm/lib/MCA/Stages/InOrderIssueStage.cpp11
-rw-r--r--llvm/lib/MCA/Stages/Stage.cpp1
-rw-r--r--llvm/lib/ObjCopy/Archive.cpp110
-rw-r--r--llvm/lib/ObjCopy/Archive.h (renamed from llvm/tools/llvm-objcopy/llvm-objcopy.h)25
-rw-r--r--llvm/lib/ObjCopy/COFF/COFFObjcopy.cpp (renamed from llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp)56
-rw-r--r--llvm/lib/ObjCopy/COFF/COFFObject.cpp (renamed from llvm/tools/llvm-objcopy/COFF/Object.cpp)4
-rw-r--r--llvm/lib/ObjCopy/COFF/COFFObject.h (renamed from llvm/tools/llvm-objcopy/COFF/Object.h)9
-rw-r--r--llvm/lib/ObjCopy/COFF/COFFReader.cpp (renamed from llvm/tools/llvm-objcopy/COFF/Reader.cpp)6
-rw-r--r--llvm/lib/ObjCopy/COFF/COFFReader.h (renamed from llvm/tools/llvm-objcopy/COFF/Reader.h)8
-rw-r--r--llvm/lib/ObjCopy/COFF/COFFWriter.cpp (renamed from llvm/tools/llvm-objcopy/COFF/Writer.cpp)27
-rw-r--r--llvm/lib/ObjCopy/COFF/COFFWriter.h (renamed from llvm/tools/llvm-objcopy/COFF/Writer.h)10
-rw-r--r--llvm/lib/ObjCopy/CommonConfig.cpp50
-rw-r--r--llvm/lib/ObjCopy/ConfigManager.cpp97
-rw-r--r--llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp (renamed from llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp)58
-rw-r--r--llvm/lib/ObjCopy/ELF/ELFObject.cpp (renamed from llvm/tools/llvm-objcopy/ELF/Object.cpp)121
-rw-r--r--llvm/lib/ObjCopy/ELF/ELFObject.h (renamed from llvm/tools/llvm-objcopy/ELF/Object.h)39
-rw-r--r--llvm/lib/ObjCopy/MachO/MachOLayoutBuilder.cpp (renamed from llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp)0
-rw-r--r--llvm/lib/ObjCopy/MachO/MachOLayoutBuilder.h (renamed from llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.h)12
-rw-r--r--llvm/lib/ObjCopy/MachO/MachOObjcopy.cpp (renamed from llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp)77
-rw-r--r--llvm/lib/ObjCopy/MachO/MachOObject.cpp (renamed from llvm/tools/llvm-objcopy/MachO/Object.cpp)4
-rw-r--r--llvm/lib/ObjCopy/MachO/MachOObject.h (renamed from llvm/tools/llvm-objcopy/MachO/Object.h)8
-rw-r--r--llvm/lib/ObjCopy/MachO/MachOReader.cpp (renamed from llvm/tools/llvm-objcopy/MachO/MachOReader.cpp)2
-rw-r--r--llvm/lib/ObjCopy/MachO/MachOReader.h (renamed from llvm/tools/llvm-objcopy/MachO/MachOReader.h)9
-rw-r--r--llvm/lib/ObjCopy/MachO/MachOWriter.cpp (renamed from llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp)162
-rw-r--r--llvm/lib/ObjCopy/MachO/MachOWriter.h (renamed from llvm/tools/llvm-objcopy/MachO/MachOWriter.h)9
-rw-r--r--llvm/lib/ObjCopy/ObjCopy.cpp90
-rw-r--r--llvm/lib/ObjCopy/XCOFF/XCOFFObjcopy.cpp45
-rw-r--r--llvm/lib/ObjCopy/XCOFF/XCOFFObject.h48
-rw-r--r--llvm/lib/ObjCopy/XCOFF/XCOFFReader.cpp101
-rw-r--r--llvm/lib/ObjCopy/XCOFF/XCOFFReader.h35
-rw-r--r--llvm/lib/ObjCopy/XCOFF/XCOFFWriter.cpp125
-rw-r--r--llvm/lib/ObjCopy/XCOFF/XCOFFWriter.h48
-rw-r--r--llvm/lib/ObjCopy/wasm/WasmObjcopy.cpp (renamed from llvm/tools/llvm-objcopy/wasm/WasmObjcopy.cpp)32
-rw-r--r--llvm/lib/ObjCopy/wasm/WasmObject.cpp (renamed from llvm/tools/llvm-objcopy/wasm/Object.cpp)4
-rw-r--r--llvm/lib/ObjCopy/wasm/WasmObject.h (renamed from llvm/tools/llvm-objcopy/wasm/Object.h)8
-rw-r--r--llvm/lib/ObjCopy/wasm/WasmReader.cpp (renamed from llvm/tools/llvm-objcopy/wasm/Reader.cpp)10
-rw-r--r--llvm/lib/ObjCopy/wasm/WasmReader.h (renamed from llvm/tools/llvm-objcopy/wasm/Reader.h)10
-rw-r--r--llvm/lib/ObjCopy/wasm/WasmWriter.cpp (renamed from llvm/tools/llvm-objcopy/wasm/Writer.cpp)4
-rw-r--r--llvm/lib/ObjCopy/wasm/WasmWriter.h (renamed from llvm/tools/llvm-objcopy/wasm/Writer.h)10
-rw-r--r--llvm/lib/Object/Archive.cpp18
-rw-r--r--llvm/lib/Object/ArchiveWriter.cpp234
-rw-r--r--llvm/lib/Object/Binary.cpp7
-rw-r--r--llvm/lib/Object/COFFImportFile.cpp4
-rw-r--r--llvm/lib/Object/COFFModuleDefinition.cpp2
-rw-r--r--llvm/lib/Object/COFFObjectFile.cpp161
-rw-r--r--llvm/lib/Object/DXContainer.cpp111
-rw-r--r--llvm/lib/Object/Decompressor.cpp2
-rw-r--r--llvm/lib/Object/ELF.cpp29
-rw-r--r--llvm/lib/Object/ELFObjectFile.cpp87
-rw-r--r--llvm/lib/Object/Error.cpp2
-rw-r--r--llvm/lib/Object/IRObjectFile.cpp16
-rw-r--r--llvm/lib/Object/IRSymtab.cpp1
-rw-r--r--llvm/lib/Object/MachOObjectFile.cpp229
-rw-r--r--llvm/lib/Object/MachOUniversal.cpp6
-rw-r--r--llvm/lib/Object/MachOUniversalWriter.cpp12
-rw-r--r--llvm/lib/Object/ModuleSymbolTable.cpp3
-rw-r--r--llvm/lib/Object/Object.cpp2
-rw-r--r--llvm/lib/Object/ObjectFile.cpp15
-rw-r--r--llvm/lib/Object/OffloadBinary.cpp164
-rw-r--r--llvm/lib/Object/RecordStreamer.h8
-rw-r--r--llvm/lib/Object/RelocationResolver.cpp45
-rw-r--r--llvm/lib/Object/SymbolicFile.cpp9
-rw-r--r--llvm/lib/Object/TapiFile.cpp6
-rw-r--r--llvm/lib/Object/TapiUniversal.cpp5
-rw-r--r--llvm/lib/Object/WasmObjectFile.cpp123
-rw-r--r--llvm/lib/Object/WindowsResource.cpp2
-rw-r--r--llvm/lib/Object/XCOFFObjectFile.cpp27
-rw-r--r--llvm/lib/ObjectYAML/COFFEmitter.cpp7
-rw-r--r--llvm/lib/ObjectYAML/COFFYAML.cpp3
-rw-r--r--llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp1
-rw-r--r--llvm/lib/ObjectYAML/CodeViewYAMLTypes.cpp5
-rw-r--r--llvm/lib/ObjectYAML/DWARFEmitter.cpp6
-rw-r--r--llvm/lib/ObjectYAML/DWARFYAML.cpp2
-rw-r--r--llvm/lib/ObjectYAML/DXContainerEmitter.cpp190
-rw-r--r--llvm/lib/ObjectYAML/DXContainerYAML.cpp61
-rw-r--r--llvm/lib/ObjectYAML/ELFEmitter.cpp43
-rw-r--r--llvm/lib/ObjectYAML/ELFYAML.cpp48
-rw-r--r--llvm/lib/ObjectYAML/MachOEmitter.cpp22
-rw-r--r--llvm/lib/ObjectYAML/MachOYAML.cpp9
-rw-r--r--llvm/lib/ObjectYAML/MinidumpEmitter.cpp2
-rw-r--r--llvm/lib/ObjectYAML/ObjectYAML.cpp7
-rw-r--r--llvm/lib/ObjectYAML/OffloadEmitter.cpp68
-rw-r--r--llvm/lib/ObjectYAML/OffloadYAML.cpp78
-rw-r--r--llvm/lib/ObjectYAML/WasmEmitter.cpp62
-rw-r--r--llvm/lib/ObjectYAML/WasmYAML.cpp69
-rw-r--r--llvm/lib/ObjectYAML/XCOFFEmitter.cpp162
-rw-r--r--llvm/lib/ObjectYAML/yaml2obj.cpp4
-rw-r--r--llvm/lib/Option/ArgList.cpp7
-rw-r--r--llvm/lib/Passes/PassBuilder.cpp41
-rw-r--r--llvm/lib/Passes/PassBuilderPipelines.cpp229
-rw-r--r--llvm/lib/Passes/PassRegistry.def54
-rw-r--r--llvm/lib/Passes/StandardInstrumentations.cpp84
-rw-r--r--llvm/lib/ProfileData/Coverage/CoverageMapping.cpp12
-rw-r--r--llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp4
-rw-r--r--llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp8
-rw-r--r--llvm/lib/ProfileData/GCOV.cpp8
-rw-r--r--llvm/lib/ProfileData/InstrProf.cpp81
-rw-r--r--llvm/lib/ProfileData/InstrProfCorrelator.cpp11
-rw-r--r--llvm/lib/ProfileData/InstrProfReader.cpp146
-rw-r--r--llvm/lib/ProfileData/InstrProfWriter.cpp143
-rw-r--r--llvm/lib/ProfileData/MemProf.cpp110
-rw-r--r--llvm/lib/ProfileData/ProfileSummaryBuilder.cpp32
-rw-r--r--llvm/lib/ProfileData/RawMemProfReader.cpp543
-rw-r--r--llvm/lib/ProfileData/SampleProf.cpp32
-rw-r--r--llvm/lib/ProfileData/SampleProfReader.cpp31
-rw-r--r--llvm/lib/ProfileData/SampleProfWriter.cpp30
-rw-r--r--llvm/lib/Remarks/BitstreamRemarkSerializer.cpp1
-rw-r--r--llvm/lib/Remarks/RemarkLinker.cpp7
-rw-r--r--llvm/lib/Remarks/RemarkParser.cpp2
-rw-r--r--llvm/lib/Remarks/YAMLRemarkSerializer.cpp10
-rw-r--r--llvm/lib/Support/AArch64TargetParser.cpp64
-rw-r--r--llvm/lib/Support/APFixedPoint.cpp20
-rw-r--r--llvm/lib/Support/APFloat.cpp9
-rw-r--r--llvm/lib/Support/APInt.cpp126
-rw-r--r--llvm/lib/Support/ARMAttributeParser.cpp2
-rw-r--r--llvm/lib/Support/ARMWinEH.cpp21
-rw-r--r--llvm/lib/Support/AddressRanges.cpp59
-rw-r--r--llvm/lib/Support/BLAKE3/LICENSE330
-rw-r--r--llvm/lib/Support/BLAKE3/README.md296
-rw-r--r--llvm/lib/Support/BLAKE3/blake3.c627
-rw-r--r--llvm/lib/Support/BLAKE3/blake3_avx2.c326
-rw-r--r--llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_unix.S1826
-rw-r--r--llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_gnu.S1817
-rw-r--r--llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_msvc.asm1828
-rw-r--r--llvm/lib/Support/BLAKE3/blake3_avx512.c1207
-rw-r--r--llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_unix.S2601
-rw-r--r--llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_gnu.S2615
-rw-r--r--llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_msvc.asm2634
-rw-r--r--llvm/lib/Support/BLAKE3/blake3_dispatch.c277
-rw-r--r--llvm/lib/Support/BLAKE3/blake3_impl.h312
-rw-r--r--llvm/lib/Support/BLAKE3/blake3_neon.c356
-rw-r--r--llvm/lib/Support/BLAKE3/blake3_portable.c160
-rw-r--r--llvm/lib/Support/BLAKE3/blake3_sse2.c566
-rw-r--r--llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_unix.S2307
-rw-r--r--llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_gnu.S2332
-rw-r--r--llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_msvc.asm2350
-rw-r--r--llvm/lib/Support/BLAKE3/blake3_sse41.c560
-rw-r--r--llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_unix.S2044
-rw-r--r--llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_gnu.S2069
-rw-r--r--llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_msvc.asm2089
-rw-r--r--llvm/lib/Support/BinaryStreamWriter.cpp10
-rw-r--r--llvm/lib/Support/CSKYAttributeParser.cpp155
-rw-r--r--llvm/lib/Support/CSKYAttributes.cpp33
-rw-r--r--llvm/lib/Support/CSKYTargetParser.cpp181
-rw-r--r--llvm/lib/Support/CodeGenCoverage.cpp2
-rw-r--r--llvm/lib/Support/CommandLine.cpp94
-rw-r--r--llvm/lib/Support/Compression.cpp12
-rw-r--r--llvm/lib/Support/ConvertUTFWrapper.cpp102
-rw-r--r--llvm/lib/Support/CrashRecoveryContext.cpp20
-rw-r--r--llvm/lib/Support/Debug.cpp2
-rw-r--r--llvm/lib/Support/DebugCounter.cpp3
-rw-r--r--llvm/lib/Support/DeltaAlgorithm.cpp3
-rw-r--r--llvm/lib/Support/DynamicLibrary.cpp7
-rw-r--r--llvm/lib/Support/Errno.cpp3
-rw-r--r--llvm/lib/Support/ErrorHandling.cpp5
-rw-r--r--llvm/lib/Support/FileUtilities.cpp66
-rw-r--r--llvm/lib/Support/FoldingSet.cpp48
-rw-r--r--llvm/lib/Support/FormatVariadic.cpp2
-rw-r--r--llvm/lib/Support/Host.cpp115
-rw-r--r--llvm/lib/Support/ItaniumManglingCanonicalizer.cpp14
-rw-r--r--llvm/lib/Support/JSON.cpp20
-rw-r--r--llvm/lib/Support/KnownBits.cpp12
-rw-r--r--llvm/lib/Support/LineIterator.cpp2
-rw-r--r--llvm/lib/Support/MD5.cpp14
-rw-r--r--llvm/lib/Support/MathExtras.cpp2
-rw-r--r--llvm/lib/Support/Memory.cpp1
-rw-r--r--llvm/lib/Support/MemoryBuffer.cpp13
-rw-r--r--llvm/lib/Support/NativeFormatting.cpp10
-rw-r--r--llvm/lib/Support/Parallel.cpp10
-rw-r--r--llvm/lib/Support/Path.cpp16
-rw-r--r--llvm/lib/Support/Process.cpp2
-rw-r--r--llvm/lib/Support/Program.cpp1
-rw-r--r--llvm/lib/Support/RISCVISAInfo.cpp106
-rw-r--r--llvm/lib/Support/SHA1.cpp21
-rw-r--r--llvm/lib/Support/SHA256.cpp21
-rw-r--r--llvm/lib/Support/ScopedPrinter.cpp9
-rw-r--r--llvm/lib/Support/Signals.cpp18
-rw-r--r--llvm/lib/Support/Signposts.cpp5
-rw-r--r--llvm/lib/Support/SourceMgr.cpp16
-rw-r--r--llvm/lib/Support/SpecialCaseList.cpp2
-rw-r--r--llvm/lib/Support/Statistic.cpp6
-rw-r--r--llvm/lib/Support/StringMap.cpp76
-rw-r--r--llvm/lib/Support/StringRef.cpp7
-rw-r--r--llvm/lib/Support/TargetParser.cpp27
-rw-r--r--llvm/lib/Support/ThreadPool.cpp171
-rw-r--r--llvm/lib/Support/TrigramIndex.cpp1
-rw-r--r--llvm/lib/Support/Triple.cpp194
-rw-r--r--llvm/lib/Support/TypeSize.cpp5
-rw-r--r--llvm/lib/Support/Unicode.cpp452
-rw-r--r--llvm/lib/Support/UnicodeNameToCodepoint.cpp551
-rw-r--r--llvm/lib/Support/UnicodeNameToCodepointGenerated.cpp20911
-rw-r--r--llvm/lib/Support/Unix/COM.inc2
-rw-r--r--llvm/lib/Support/Unix/Memory.inc1
-rw-r--r--llvm/lib/Support/Unix/Path.inc24
-rw-r--r--llvm/lib/Support/Unix/Process.inc39
-rw-r--r--llvm/lib/Support/Unix/Signals.inc12
-rw-r--r--llvm/lib/Support/Unix/ThreadLocal.inc12
-rw-r--r--llvm/lib/Support/Unix/Threading.inc37
-rw-r--r--llvm/lib/Support/VirtualFileSystem.cpp592
-rw-r--r--llvm/lib/Support/Windows/Path.inc2
-rw-r--r--llvm/lib/Support/Windows/Process.inc10
-rw-r--r--llvm/lib/Support/Windows/Program.inc3
-rw-r--r--llvm/lib/Support/Windows/Signals.inc54
-rw-r--r--llvm/lib/Support/Windows/Threading.inc8
-rw-r--r--llvm/lib/Support/WithColor.cpp20
-rw-r--r--llvm/lib/Support/YAMLParser.cpp71
-rw-r--r--llvm/lib/Support/Z3Solver.cpp8
-rw-r--r--llvm/lib/Support/raw_ostream.cpp4
-rw-r--r--llvm/lib/Support/regcomp.c26
-rw-r--r--llvm/lib/Support/regengine.inc39
-rw-r--r--llvm/lib/Support/xxhash.cpp1
-rw-r--r--llvm/lib/TableGen/Error.cpp4
-rw-r--r--llvm/lib/TableGen/Parser.cpp39
-rw-r--r--llvm/lib/TableGen/Record.cpp493
-rw-r--r--llvm/lib/TableGen/TGLexer.cpp7
-rw-r--r--llvm/lib/TableGen/TGLexer.h5
-rw-r--r--llvm/lib/TableGen/TGParser.cpp247
-rw-r--r--llvm/lib/TableGen/TGParser.h2
-rw-r--r--llvm/lib/Target/AArch64/AArch64.h3
-rw-r--r--llvm/lib/Target/AArch64/AArch64.td113
-rw-r--r--llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp1
-rw-r--r--llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp49
-rw-r--r--llvm/lib/Target/AArch64/AArch64CallingConvention.td6
-rw-r--r--llvm/lib/Target/AArch64/AArch64CollectLOH.cpp6
-rw-r--r--llvm/lib/Target/AArch64/AArch64Combine.td4
-rw-r--r--llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp4
-rw-r--r--llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp34
-rw-r--r--llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64FastISel.cpp11
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.cpp1098
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.h19
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp294
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp4023
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.h83
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrAtomics.td37
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrFormats.td257
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.cpp608
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.h56
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.td876
-rw-r--r--llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp201
-rw-r--r--llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp236
-rw-r--r--llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp49
-rw-r--r--llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h30
-rw-r--r--llvm/lib/Target/AArch64/AArch64MachineScheduler.cpp82
-rw-r--r--llvm/lib/Target/AArch64/AArch64MachineScheduler.h33
-rw-r--r--llvm/lib/Target/AArch64/AArch64MacroFusion.cpp15
-rw-r--r--llvm/lib/Target/AArch64/AArch64PerfectShuffle.h13169
-rw-r--r--llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp114
-rw-r--r--llvm/lib/Target/AArch64/AArch64RegisterInfo.h5
-rw-r--r--llvm/lib/Target/AArch64/AArch64RegisterInfo.td51
-rw-r--r--llvm/lib/Target/AArch64/AArch64SLSHardening.cpp4
-rw-r--r--llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td73
-rw-r--r--llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td583
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedA55.td127
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedA64FX.td12
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedAmpere1.td1136
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedPredAmpere.td25
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedPredExynos.td5
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedPredicates.td149
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedTSV110.td3
-rw-r--r--llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp34
-rw-r--r--llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h2
-rw-r--r--llvm/lib/Target/AArch64/AArch64StackTagging.cpp203
-rw-r--r--llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp1
-rw-r--r--llvm/lib/Target/AArch64/AArch64Subtarget.cpp37
-rw-r--r--llvm/lib/Target/AArch64/AArch64Subtarget.h365
-rw-r--r--llvm/lib/Target/AArch64/AArch64SystemOperands.td12
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetMachine.cpp36
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetMachine.h2
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp383
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h24
-rw-r--r--llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp134
-rw-r--r--llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp580
-rw-r--r--llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.h8
-rw-r--r--llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp6
-rw-r--r--llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h3
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp38
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp590
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp12
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp6
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp2
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp2
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp8
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp9
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h2
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp26
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp1
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp7
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp2
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp1
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp38
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h11
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp4
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp1
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp31
-rw-r--r--llvm/lib/Target/AArch64/SMEInstrFormats.td538
-rw-r--r--llvm/lib/Target/AArch64/SVEInstrFormats.td378
-rw-r--r--llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp10
-rw-r--r--llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.h13
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.td280
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp144
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp95
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAttributes.def31
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp266
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp50
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td66
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp64
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUGISel.td29
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp11
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h4
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp91
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp439
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.h22
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp253
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h17
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp401
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h11
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp457
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp78
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td6
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp770
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h31
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructions.td158
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp824
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h17
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp11
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULibFunc.h4
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp7
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp38
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp27
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp15
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp9
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp50
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h20
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp1
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp18
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp12
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp215
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp64
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp663
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h7
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUReleaseVGPRs.cpp140
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp26
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.h12
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp152
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td168
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSetWavePriority.cpp166
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp158
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h42
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp88
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h6
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp54
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h23
-rw-r--r--llvm/lib/Target/AMDGPU/AMDKernelCodeT.h2
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp1146
-rw-r--r--llvm/lib/Target/AMDGPU/BUFInstructions.td891
-rw-r--r--llvm/lib/Target/AMDGPU/DSInstructions.td546
-rw-r--r--llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp470
-rw-r--r--llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h87
-rw-r--r--llvm/lib/Target/AMDGPU/EXPInstructions.td79
-rw-r--r--llvm/lib/Target/AMDGPU/FLATInstructions.td1038
-rw-r--r--llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp18
-rw-r--r--llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp901
-rw-r--r--llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h25
-rw-r--r--llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp29
-rw-r--r--llvm/lib/Target/AMDGPU/GCNProcessors.td28
-rw-r--r--llvm/lib/Target/AMDGPU/GCNRegPressure.h2
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp356
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSchedStrategy.h36
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSubtarget.h349
-rw-r--r--llvm/lib/Target/AMDGPU/LDSDIRInstructions.td116
-rw-r--r--llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h4
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp29
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp257
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h17
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h56
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h2
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp38
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.h1
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp165
-rw-r--r--llvm/lib/Target/AMDGPU/MIMGInstructions.td618
-rw-r--r--llvm/lib/Target/AMDGPU/R600.h2
-rw-r--r--llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp1
-rw-r--r--llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/R600FrameLowering.cpp1
-rw-r--r--llvm/lib/Target/AMDGPU/R600ISelLowering.cpp183
-rw-r--r--llvm/lib/Target/AMDGPU/R600InstrInfo.cpp19
-rw-r--r--llvm/lib/Target/AMDGPU/R600InstrInfo.h3
-rw-r--r--llvm/lib/Target/AMDGPU/R600MachineCFGStructurizer.cpp (renamed from llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp)152
-rw-r--r--llvm/lib/Target/AMDGPU/R600Packetizer.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/R600Subtarget.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/R600Subtarget.h16
-rw-r--r--llvm/lib/Target/AMDGPU/R600TargetMachine.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/R600TargetMachine.h4
-rw-r--r--llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp58
-rw-r--r--llvm/lib/Target/AMDGPU/SIDefines.h196
-rw-r--r--llvm/lib/Target/AMDGPU/SIFoldOperands.cpp189
-rw-r--r--llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/SIFrameLowering.cpp230
-rw-r--r--llvm/lib/Target/AMDGPU/SIFrameLowering.h3
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp1927
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.h24
-rw-r--r--llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp77
-rw-r--r--llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp354
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrFormats.td83
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp667
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.h68
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td625
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td244
-rw-r--r--llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp12
-rw-r--r--llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp842
-rw-r--r--llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp42
-rw-r--r--llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp33
-rw-r--r--llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp16
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp126
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h179
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp58
-rw-r--r--llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp484
-rw-r--r--llvm/lib/Target/AMDGPU/SIModeRegister.cpp17
-rw-r--r--llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp251
-rw-r--r--llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp110
-rw-r--r--llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp125
-rw-r--r--llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp12
-rw-r--r--llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp21
-rw-r--r--llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp28
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp603
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.h28
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.td127
-rw-r--r--llvm/lib/Target/AMDGPU/SISchedule.td65
-rw-r--r--llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp435
-rw-r--r--llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp42
-rw-r--r--llvm/lib/Target/AMDGPU/SMInstructions.td410
-rw-r--r--llvm/lib/Target/AMDGPU/SOPInstructions.td425
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp314
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h56
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp686
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h133
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h38
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp (renamed from llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp)102
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h51
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h4
-rw-r--r--llvm/lib/Target/AMDGPU/VIInstrFormats.td2
-rw-r--r--llvm/lib/Target/AMDGPU/VINTERPInstructions.td180
-rw-r--r--llvm/lib/Target/AMDGPU/VOP1Instructions.td380
-rw-r--r--llvm/lib/Target/AMDGPU/VOP2Instructions.td626
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3Instructions.td453
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3PInstructions.td671
-rw-r--r--llvm/lib/Target/AMDGPU/VOPCInstructions.td873
-rw-r--r--llvm/lib/Target/AMDGPU/VOPDInstructions.td159
-rw-r--r--llvm/lib/Target/AMDGPU/VOPInstructions.td658
-rw-r--r--llvm/lib/Target/ARC/ARCMachineFunctionInfo.cpp7
-rw-r--r--llvm/lib/Target/ARC/ARCMachineFunctionInfo.h6
-rw-r--r--llvm/lib/Target/ARC/ARCOptAddrMode.cpp8
-rw-r--r--llvm/lib/Target/ARC/ARCTargetMachine.cpp4
-rw-r--r--llvm/lib/Target/ARC/ARCTargetMachine.h2
-rw-r--r--llvm/lib/Target/ARC/Disassembler/ARCDisassembler.cpp78
-rw-r--r--llvm/lib/Target/ARM/A15SDOptimizer.cpp3
-rw-r--r--llvm/lib/Target/ARM/ARM.h2
-rw-r--r--llvm/lib/Target/ARM/ARM.td163
-rw-r--r--llvm/lib/Target/ARM/ARMAsmPrinter.cpp143
-rw-r--r--llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp227
-rw-r--r--llvm/lib/Target/ARM/ARMBaseInstrInfo.h29
-rw-r--r--llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp26
-rw-r--r--llvm/lib/Target/ARM/ARMBaseRegisterInfo.h39
-rw-r--r--llvm/lib/Target/ARM/ARMBlockPlacement.cpp3
-rw-r--r--llvm/lib/Target/ARM/ARMCallingConv.td21
-rw-r--r--llvm/lib/Target/ARM/ARMConstantIslandPass.cpp2
-rw-r--r--llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp17
-rw-r--r--llvm/lib/Target/ARM/ARMFastISel.cpp45
-rw-r--r--llvm/lib/Target/ARM/ARMFixCortexA57AES1742098Pass.cpp432
-rw-r--r--llvm/lib/Target/ARM/ARMFrameLowering.cpp846
-rw-r--r--llvm/lib/Target/ARM/ARMFrameLowering.h1
-rw-r--r--llvm/lib/Target/ARM/ARMHazardRecognizer.cpp2
-rw-r--r--llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp35
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp413
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.h12
-rw-r--r--llvm/lib/Target/ARM/ARMInstrFormats.td26
-rw-r--r--llvm/lib/Target/ARM/ARMInstrInfo.td27
-rw-r--r--llvm/lib/Target/ARM/ARMInstrMVE.td89
-rw-r--r--llvm/lib/Target/ARM/ARMInstrNEON.td3
-rw-r--r--llvm/lib/Target/ARM/ARMInstrThumb2.td7
-rw-r--r--llvm/lib/Target/ARM/ARMInstrVFP.td96
-rw-r--r--llvm/lib/Target/ARM/ARMInstructionSelector.cpp16
-rw-r--r--llvm/lib/Target/ARM/ARMLegalizerInfo.cpp1
-rw-r--r--llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp12
-rw-r--r--llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp4
-rw-r--r--llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp7
-rw-r--r--llvm/lib/Target/ARM/ARMMachineFunctionInfo.h8
-rw-r--r--llvm/lib/Target/ARM/ARMParallelDSP.cpp5
-rw-r--r--llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp7
-rw-r--r--llvm/lib/Target/ARM/ARMRegisterBankInfo.h2
-rw-r--r--llvm/lib/Target/ARM/ARMRegisterInfo.cpp2
-rw-r--r--llvm/lib/Target/ARM/ARMSLSHardening.cpp4
-rw-r--r--llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp9
-rw-r--r--llvm/lib/Target/ARM/ARMSelectionDAGInfo.h1
-rw-r--r--llvm/lib/Target/ARM/ARMSubtarget.cpp43
-rw-r--r--llvm/lib/Target/ARM/ARMSubtarget.h476
-rw-r--r--llvm/lib/Target/ARM/ARMTargetMachine.cpp28
-rw-r--r--llvm/lib/Target/ARM/ARMTargetMachine.h2
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp50
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.h3
-rw-r--r--llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp317
-rw-r--r--llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp1287
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp132
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp4
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp2
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp12
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h4
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp6
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp29
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp227
-rw-r--r--llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp57
-rw-r--r--llvm/lib/Target/ARM/MVELaneInterleavingPass.cpp7
-rw-r--r--llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp14
-rw-r--r--llvm/lib/Target/ARM/MVEVPTBlockPass.cpp3
-rw-r--r--llvm/lib/Target/ARM/Thumb1FrameLowering.cpp758
-rw-r--r--llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp6
-rw-r--r--llvm/lib/Target/ARM/Thumb2InstrInfo.cpp6
-rw-r--r--llvm/lib/Target/ARM/Thumb2SizeReduction.cpp32
-rw-r--r--llvm/lib/Target/ARM/ThumbRegisterInfo.cpp50
-rw-r--r--llvm/lib/Target/AVR/AVR.h4
-rw-r--r--llvm/lib/Target/AVR/AVRAsmPrinter.cpp43
-rw-r--r--llvm/lib/Target/AVR/AVRCallingConv.td4
-rw-r--r--llvm/lib/Target/AVR/AVRDevices.td165
-rw-r--r--llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp446
-rw-r--r--llvm/lib/Target/AVR/AVRFrameLowering.cpp123
-rw-r--r--llvm/lib/Target/AVR/AVRISelLowering.cpp160
-rw-r--r--llvm/lib/Target/AVR/AVRISelLowering.h3
-rw-r--r--llvm/lib/Target/AVR/AVRInstrFormats.td4
-rw-r--r--llvm/lib/Target/AVR/AVRInstrInfo.cpp23
-rw-r--r--llvm/lib/Target/AVR/AVRInstrInfo.td97
-rw-r--r--llvm/lib/Target/AVR/AVRMachineFunctionInfo.h7
-rw-r--r--llvm/lib/Target/AVR/AVRRegisterInfo.cpp34
-rw-r--r--llvm/lib/Target/AVR/AVRRelaxMemOperations.cpp144
-rw-r--r--llvm/lib/Target/AVR/AVRSubtarget.h12
-rw-r--r--llvm/lib/Target/AVR/AVRTargetMachine.cpp4
-rw-r--r--llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp17
-rw-r--r--llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp200
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp2
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp1
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h3
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp1
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.h2
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h1
-rw-r--r--llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp1
-rw-r--r--llvm/lib/Target/BPF/BPF.h2
-rw-r--r--llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp50
-rw-r--r--llvm/lib/Target/BPF/BPFAdjustOpt.cpp10
-rw-r--r--llvm/lib/Target/BPF/BPFCORE.h2
-rw-r--r--llvm/lib/Target/BPF/BPFISelLowering.cpp3
-rw-r--r--llvm/lib/Target/BPF/BPFInstrFormats.td1
-rw-r--r--llvm/lib/Target/BPF/BPFInstrInfo.cpp3
-rw-r--r--llvm/lib/Target/BPF/BPFInstrInfo.td2
-rw-r--r--llvm/lib/Target/BPF/BPFMIChecking.cpp1
-rw-r--r--llvm/lib/Target/BPF/BPFMIPeephole.cpp7
-rw-r--r--llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp24
-rw-r--r--llvm/lib/Target/BPF/BPFPreserveDIType.cpp1
-rw-r--r--llvm/lib/Target/BPF/BPFTargetMachine.cpp4
-rw-r--r--llvm/lib/Target/BPF/BPFTargetMachine.h2
-rw-r--r--llvm/lib/Target/BPF/BPFTargetTransformInfo.h9
-rw-r--r--llvm/lib/Target/BPF/BTF.def1
-rw-r--r--llvm/lib/Target/BPF/BTF.h10
-rw-r--r--llvm/lib/Target/BPF/BTFDebug.cpp197
-rw-r--r--llvm/lib/Target/BPF/BTFDebug.h26
-rw-r--r--llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp16
-rw-r--r--llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp5
-rw-r--r--llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp1
-rw-r--r--llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h2
-rw-r--r--llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp6
-rw-r--r--llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h3
-rw-r--r--llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp271
-rw-r--r--llvm/lib/Target/CSKY/CSKY.h2
-rw-r--r--llvm/lib/Target/CSKY/CSKY.td523
-rw-r--r--llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp105
-rw-r--r--llvm/lib/Target/CSKY/CSKYAsmPrinter.h14
-rw-r--r--llvm/lib/Target/CSKY/CSKYConstantIslandPass.cpp6
-rw-r--r--llvm/lib/Target/CSKY/CSKYFrameLowering.cpp23
-rw-r--r--llvm/lib/Target/CSKY/CSKYISelDAGToDAG.cpp219
-rw-r--r--llvm/lib/Target/CSKY/CSKYISelLowering.cpp180
-rw-r--r--llvm/lib/Target/CSKY/CSKYISelLowering.h6
-rw-r--r--llvm/lib/Target/CSKY/CSKYInstrAlias.td38
-rw-r--r--llvm/lib/Target/CSKY/CSKYInstrFormats.td2
-rw-r--r--llvm/lib/Target/CSKY/CSKYInstrInfo.cpp9
-rw-r--r--llvm/lib/Target/CSKY/CSKYInstrInfo.h2
-rw-r--r--llvm/lib/Target/CSKY/CSKYInstrInfo.td32
-rw-r--r--llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td131
-rw-r--r--llvm/lib/Target/CSKY/CSKYMachineFunctionInfo.h11
-rw-r--r--llvm/lib/Target/CSKY/CSKYRegisterInfo.cpp20
-rw-r--r--llvm/lib/Target/CSKY/CSKYRegisterInfo.td14
-rw-r--r--llvm/lib/Target/CSKY/CSKYSubtarget.cpp33
-rw-r--r--llvm/lib/Target/CSKY/CSKYSubtarget.h102
-rw-r--r--llvm/lib/Target/CSKY/CSKYTargetMachine.cpp12
-rw-r--r--llvm/lib/Target/CSKY/CSKYTargetObjectFile.cpp25
-rw-r--r--llvm/lib/Target/CSKY/CSKYTargetObjectFile.h24
-rw-r--r--llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp553
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp184
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h13
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFObjectWriter.cpp110
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFStreamer.cpp335
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFStreamer.h148
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp68
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp161
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.h12
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCExpr.cpp2
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp88
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.h4
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYTargetStreamer.cpp143
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYTargetStreamer.h110
-rw-r--r--llvm/lib/Target/DirectX/DXIL.td144
-rw-r--r--llvm/lib/Target/DirectX/DXILConstants.h25
-rw-r--r--llvm/lib/Target/DirectX/DXILOpLowering.cpp265
-rw-r--r--llvm/lib/Target/DirectX/DXILPointerType.cpp66
-rw-r--r--llvm/lib/Target/DirectX/DXILPointerType.h52
-rw-r--r--llvm/lib/Target/DirectX/DXILPrepare.cpp184
-rw-r--r--llvm/lib/Target/DirectX/DXILStubs.td18
-rw-r--r--llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp121
-rw-r--r--llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp2963
-rw-r--r--llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.h82
-rw-r--r--llvm/lib/Target/DirectX/DXILWriter/DXILValueEnumerator.cpp1147
-rw-r--r--llvm/lib/Target/DirectX/DXILWriter/DXILValueEnumerator.h308
-rw-r--r--llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp100
-rw-r--r--llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.h37
-rw-r--r--llvm/lib/Target/DirectX/DirectX.h43
-rw-r--r--llvm/lib/Target/DirectX/DirectX.td54
-rw-r--r--llvm/lib/Target/DirectX/DirectXAsmPrinter.cpp57
-rw-r--r--llvm/lib/Target/DirectX/DirectXFrameLowering.h35
-rw-r--r--llvm/lib/Target/DirectX/DirectXInstrInfo.cpp20
-rw-r--r--llvm/lib/Target/DirectX/DirectXInstrInfo.h30
-rw-r--r--llvm/lib/Target/DirectX/DirectXRegisterInfo.cpp24
-rw-r--r--llvm/lib/Target/DirectX/DirectXRegisterInfo.h28
-rw-r--r--llvm/lib/Target/DirectX/DirectXSubtarget.cpp29
-rw-r--r--llvm/lib/Target/DirectX/DirectXSubtarget.h56
-rw-r--r--llvm/lib/Target/DirectX/DirectXTargetLowering.h31
-rw-r--r--llvm/lib/Target/DirectX/DirectXTargetMachine.cpp144
-rw-r--r--llvm/lib/Target/DirectX/DirectXTargetMachine.h51
-rw-r--r--llvm/lib/Target/DirectX/DirectXTargetTransformInfo.h39
-rw-r--r--llvm/lib/Target/DirectX/MCTargetDesc/DirectXContainerObjectWriter.cpp28
-rw-r--r--llvm/lib/Target/DirectX/MCTargetDesc/DirectXContainerObjectWriter.h24
-rw-r--r--llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.cpp152
-rw-r--r--llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.h29
-rw-r--r--llvm/lib/Target/DirectX/PointerTypeAnalysis.cpp119
-rw-r--r--llvm/lib/Target/DirectX/PointerTypeAnalysis.h43
-rw-r--r--llvm/lib/Target/DirectX/TargetInfo/DirectXTargetInfo.cpp30
-rw-r--r--llvm/lib/Target/DirectX/TargetInfo/DirectXTargetInfo.h18
-rw-r--r--llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp8
-rw-r--r--llvm/lib/Target/Hexagon/BitTracker.cpp3
-rw-r--r--llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp160
-rw-r--r--llvm/lib/Target/Hexagon/HexagonArch.h31
-rw-r--r--llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp8
-rw-r--r--llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp84
-rw-r--r--llvm/lib/Target/Hexagon/HexagonBranchRelaxation.cpp5
-rw-r--r--llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp7
-rw-r--r--llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp12
-rw-r--r--llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp22
-rw-r--r--llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp18
-rw-r--r--llvm/lib/Target/Hexagon/HexagonDepArch.h88
-rw-r--r--llvm/lib/Target/Hexagon/HexagonDepDecoders.inc44
-rw-r--r--llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp61
-rw-r--r--llvm/lib/Target/Hexagon/HexagonGenInsert.cpp51
-rw-r--r--llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp10
-rw-r--r--llvm/lib/Target/Hexagon/HexagonHazardRecognizer.cpp2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp11
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelLowering.cpp101
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelLowering.h2
-rwxr-xr-xllvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp145
-rw-r--r--llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp65
-rw-r--r--llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp13
-rw-r--r--llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.cpp6
-rw-r--r--llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp3
-rw-r--r--llvm/lib/Target/Hexagon/HexagonPatterns.td6
-rw-r--r--llvm/lib/Target/Hexagon/HexagonPatternsHVX.td19
-rw-r--r--llvm/lib/Target/Hexagon/HexagonPeephole.cpp28
-rw-r--r--llvm/lib/Target/Hexagon/HexagonPseudo.td22
-rw-r--r--llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonSubtarget.cpp58
-rw-r--r--llvm/lib/Target/Hexagon/HexagonSubtarget.h2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp75
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTargetMachine.h2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp7
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp3
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h10
-rw-r--r--llvm/lib/Target/Hexagon/HexagonVExtract.cpp12
-rw-r--r--llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp25
-rw-r--r--llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp6
-rw-r--r--llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.h2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonVectorPrint.cpp6
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp1
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp1
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp5
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp1
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp1
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp25
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h1
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp4
-rw-r--r--llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp4
-rw-r--r--llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp45
-rw-r--r--llvm/lib/Target/Lanai/LanaiISelLowering.cpp6
-rw-r--r--llvm/lib/Target/Lanai/LanaiInstrInfo.cpp4
-rw-r--r--llvm/lib/Target/Lanai/LanaiMachineFunctionInfo.cpp7
-rw-r--r--llvm/lib/Target/Lanai/LanaiMachineFunctionInfo.h4
-rw-r--r--llvm/lib/Target/Lanai/LanaiTargetMachine.cpp4
-rw-r--r--llvm/lib/Target/Lanai/LanaiTargetMachine.h2
-rw-r--r--llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h3
-rw-r--r--llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp1
-rw-r--r--llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h1
-rw-r--r--llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp556
-rw-r--r--llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp145
-rw-r--r--llvm/lib/Target/LoongArch/LoongArch.h38
-rw-r--r--llvm/lib/Target/LoongArch/LoongArch.td139
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp48
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h46
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchCallingConv.td23
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td177
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td188
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td241
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp55
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchFrameLowering.h38
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp132
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h55
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp531
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelLowering.h95
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchInstrFormats.td404
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp49
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchInstrInfo.h36
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchInstrInfo.td730
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp66
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h57
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp115
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h50
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td161
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp54
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchSubtarget.h89
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp118
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchTargetMachine.h46
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp68
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h63
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp40
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h44
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp64
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp63
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h49
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp34
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.h30
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp127
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp114
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h54
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.cpp51
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.h30
-rw-r--r--llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.cpp30
-rw-r--r--llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.h21
-rw-r--r--llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp1
-rw-r--r--llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp618
-rw-r--r--llvm/lib/Target/M68k/GISel/M68kCallLowering.cpp98
-rw-r--r--llvm/lib/Target/M68k/GISel/M68kCallLowering.h12
-rw-r--r--llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.cpp4
-rw-r--r--llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.h2
-rw-r--r--llvm/lib/Target/M68k/M68kCollapseMOVEMPass.cpp2
-rw-r--r--llvm/lib/Target/M68k/M68kExpandPseudo.cpp2
-rw-r--r--llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp30
-rw-r--r--llvm/lib/Target/M68k/M68kISelLowering.cpp107
-rw-r--r--llvm/lib/Target/M68k/M68kISelLowering.h2
-rw-r--r--llvm/lib/Target/M68k/M68kInstrArithmetic.td717
-rw-r--r--llvm/lib/Target/M68k/M68kInstrBits.td75
-rw-r--r--llvm/lib/Target/M68k/M68kInstrControl.td166
-rw-r--r--llvm/lib/Target/M68k/M68kInstrData.td653
-rw-r--r--llvm/lib/Target/M68k/M68kInstrFormats.td136
-rw-r--r--llvm/lib/Target/M68k/M68kInstrInfo.cpp53
-rw-r--r--llvm/lib/Target/M68k/M68kInstrInfo.td106
-rw-r--r--llvm/lib/Target/M68k/M68kInstrShiftRotate.td54
-rw-r--r--llvm/lib/Target/M68k/M68kMachineFunction.cpp7
-rw-r--r--llvm/lib/Target/M68k/M68kMachineFunction.h9
-rw-r--r--llvm/lib/Target/M68k/M68kRegisterInfo.cpp1
-rw-r--r--llvm/lib/Target/M68k/M68kRegisterInfo.h8
-rw-r--r--llvm/lib/Target/M68k/M68kSubtarget.h2
-rw-r--r--llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp452
-rw-r--r--llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.h1
-rw-r--r--llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp1
-rw-r--r--llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp14
-rw-r--r--llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp2
-rw-r--r--llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp2
-rw-r--r--llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp3
-rw-r--r--llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp3
-rw-r--r--llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h1
-rw-r--r--llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp4
-rw-r--r--llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp3
-rw-r--r--llvm/lib/Target/MSP430/MSP430ISelLowering.cpp34
-rw-r--r--llvm/lib/Target/MSP430/MSP430InstrInfo.cpp3
-rw-r--r--llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp7
-rw-r--r--llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h5
-rw-r--r--llvm/lib/Target/MSP430/MSP430TargetMachine.cpp6
-rw-r--r--llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp17
-rw-r--r--llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp866
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h2
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp2
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp13
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp2
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp4
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h2
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp162
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h50
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp2
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h2
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp8
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp36
-rw-r--r--llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td4
-rw-r--r--llvm/lib/Target/Mips/MicroMipsInstrFPU.td28
-rw-r--r--llvm/lib/Target/Mips/MicroMipsInstrInfo.td5
-rw-r--r--llvm/lib/Target/Mips/MicroMipsSizeReduction.cpp2
-rw-r--r--llvm/lib/Target/Mips/Mips.h2
-rw-r--r--llvm/lib/Target/Mips/Mips.td6
-rw-r--r--llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp2
-rw-r--r--llvm/lib/Target/Mips/Mips16RegisterInfo.cpp2
-rw-r--r--llvm/lib/Target/Mips/Mips32r6InstrInfo.td2
-rw-r--r--llvm/lib/Target/Mips/MipsAsmPrinter.cpp28
-rw-r--r--llvm/lib/Target/Mips/MipsBranchExpansion.cpp44
-rw-r--r--llvm/lib/Target/Mips/MipsCallLowering.cpp4
-rw-r--r--llvm/lib/Target/Mips/MipsCombine.td15
-rw-r--r--llvm/lib/Target/Mips/MipsConstantIslandPass.cpp6
-rw-r--r--llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp2
-rw-r--r--llvm/lib/Target/Mips/MipsExpandPseudo.cpp2
-rw-r--r--llvm/lib/Target/Mips/MipsFastISel.cpp18
-rw-r--r--llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp2
-rw-r--r--llvm/lib/Target/Mips/MipsISelLowering.cpp98
-rw-r--r--llvm/lib/Target/Mips/MipsISelLowering.h10
-rw-r--r--llvm/lib/Target/Mips/MipsInstrInfo.cpp44
-rw-r--r--llvm/lib/Target/Mips/MipsInstrInfo.h13
-rw-r--r--llvm/lib/Target/Mips/MipsInstrInfo.td4
-rw-r--r--llvm/lib/Target/Mips/MipsLegalizerInfo.cpp4
-rw-r--r--llvm/lib/Target/Mips/MipsMachineFunction.cpp9
-rw-r--r--llvm/lib/Target/Mips/MipsMachineFunction.h5
-rw-r--r--llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp3
-rw-r--r--llvm/lib/Target/Mips/MipsOptimizePICCall.cpp4
-rw-r--r--llvm/lib/Target/Mips/MipsOs16.cpp1
-rw-r--r--llvm/lib/Target/Mips/MipsPostLegalizerCombiner.cpp148
-rw-r--r--llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp4
-rw-r--r--llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp8
-rw-r--r--llvm/lib/Target/Mips/MipsRegisterBankInfo.h2
-rw-r--r--llvm/lib/Target/Mips/MipsSEFrameLowering.cpp2
-rw-r--r--llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp36
-rw-r--r--llvm/lib/Target/Mips/MipsSEISelLowering.cpp24
-rw-r--r--llvm/lib/Target/Mips/MipsSERegisterInfo.cpp2
-rw-r--r--llvm/lib/Target/Mips/MipsScheduleGeneric.td8
-rw-r--r--llvm/lib/Target/Mips/MipsSubtarget.cpp15
-rw-r--r--llvm/lib/Target/Mips/MipsSubtarget.h11
-rw-r--r--llvm/lib/Target/Mips/MipsTargetMachine.cpp15
-rw-r--r--llvm/lib/Target/Mips/MipsTargetMachine.h2
-rw-r--r--llvm/lib/Target/Mips/MipsTargetStreamer.h2
-rw-r--r--llvm/lib/Target/Mips/MipsTargetTransformInfo.cpp17
-rw-r--r--llvm/lib/Target/Mips/MipsTargetTransformInfo.h40
-rw-r--r--llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp11
-rw-r--r--llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp2
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp133
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h5
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXAtomicLower.cpp2
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp17
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp45
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp520
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelLowering.h21
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp2
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXInstrInfo.td53
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXIntrinsics.td352
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp3
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp119
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h7
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp8
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp2
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp4
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXTargetMachine.h2
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h2
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp65
-rw-r--r--llvm/lib/Target/NVPTX/NVVMReflect.cpp12
-rw-r--r--llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp63
-rw-r--r--llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp98
-rw-r--r--llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp1
-rw-r--r--llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp3
-rw-r--r--llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h4
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp2
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp2
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp6
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h4
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp11
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp13
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h1
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp23
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.cpp1
-rw-r--r--llvm/lib/Target/PowerPC/P10InstrResources.td8
-rw-r--r--llvm/lib/Target/PowerPC/P9InstrResources.td10
-rw-r--r--llvm/lib/Target/PowerPC/PPC.h8
-rw-r--r--llvm/lib/Target/PowerPC/PPC.td12
-rw-r--r--llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp88
-rw-r--r--llvm/lib/Target/PowerPC/PPCBack2BackFusion.def2
-rw-r--r--llvm/lib/Target/PowerPC/PPCCTRLoops.cpp421
-rw-r--r--llvm/lib/Target/PowerPC/PPCCTRLoopsVerify.cpp185
-rw-r--r--llvm/lib/Target/PowerPC/PPCCallingConv.td22
-rw-r--r--llvm/lib/Target/PowerPC/PPCFastISel.cpp2
-rw-r--r--llvm/lib/Target/PowerPC/PPCFrameLowering.cpp42
-rw-r--r--llvm/lib/Target/PowerPC/PPCGenScalarMASSEntries.cpp149
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp14
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp485
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.h43
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstr64Bit.td19
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.cpp91
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.h99
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.td543
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrMMA.td628
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrP10.td (renamed from llvm/lib/Target/PowerPC/PPCInstrPrefix.td)1062
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrVSX.td76
-rw-r--r--llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp1
-rw-r--r--llvm/lib/Target/PowerPC/PPCMCInstLower.cpp2
-rw-r--r--llvm/lib/Target/PowerPC/PPCMIPeephole.cpp3
-rw-r--r--llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp7
-rw-r--r--llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h5
-rw-r--r--llvm/lib/Target/PowerPC/PPCMacroFusion.cpp5
-rw-r--r--llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp37
-rw-r--r--llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp171
-rw-r--r--llvm/lib/Target/PowerPC/PPCRegisterInfo.h2
-rw-r--r--llvm/lib/Target/PowerPC/PPCRegisterInfo.td655
-rw-r--r--llvm/lib/Target/PowerPC/PPCRegisterInfoMMA.td106
-rw-r--r--llvm/lib/Target/PowerPC/PPCScheduleP10.td2
-rw-r--r--llvm/lib/Target/PowerPC/PPCScheduleP9.td3
-rw-r--r--llvm/lib/Target/PowerPC/PPCSubtarget.cpp2
-rw-r--r--llvm/lib/Target/PowerPC/PPCSubtarget.h4
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetMachine.cpp34
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetMachine.h2
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetStreamer.h1
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp20
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h7
-rw-r--r--llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp2
-rw-r--r--llvm/lib/Target/PowerPC/README_P9.txt9
-rw-r--r--llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp172
-rw-r--r--llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp118
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp10
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h6
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp25
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h57
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp2
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp101
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h5
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp10
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h4
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp8
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp1
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCObjectFileInfo.cpp1
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp6
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h1
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp179
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h10
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp16
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h6
-rw-r--r--llvm/lib/Target/RISCV/RISCV.h11
-rw-r--r--llvm/lib/Target/RISCV/RISCV.td108
-rw-r--r--llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp21
-rw-r--r--llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp11
-rw-r--r--llvm/lib/Target/RISCV/RISCVFrameLowering.cpp468
-rw-r--r--llvm/lib/Target/RISCV/RISCVFrameLowering.h5
-rw-r--r--llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp26
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp970
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h20
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp4004
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.h115
-rw-r--r--llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp1514
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrFormats.td5
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.cpp206
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.h26
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.td325
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoA.td30
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoD.td239
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoF.td327
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoM.td8
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoV.td57
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td987
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td575
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td1227
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZb.td264
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td245
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td71
-rw-r--r--llvm/lib/Target/RISCV/RISCVMCInstLower.cpp13
-rw-r--r--llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp37
-rw-r--r--llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h35
-rw-r--r--llvm/lib/Target/RISCV/RISCVMacroFusion.cpp67
-rw-r--r--llvm/lib/Target/RISCV/RISCVMacroFusion.h28
-rw-r--r--llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp382
-rw-r--r--llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp280
-rw-r--r--llvm/lib/Target/RISCV/RISCVRedundantCopyElimination.cpp179
-rw-r--r--llvm/lib/Target/RISCV/RISCVRegisterBankInfo.cpp7
-rw-r--r--llvm/lib/Target/RISCV/RISCVRegisterBankInfo.h2
-rw-r--r--llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp15
-rw-r--r--llvm/lib/Target/RISCV/RISCVRegisterInfo.td36
-rw-r--r--llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp275
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedRocket.td5
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedSiFive7.td5
-rw-r--r--llvm/lib/Target/RISCV/RISCVScheduleB.td206
-rw-r--r--llvm/lib/Target/RISCV/RISCVSubtarget.cpp58
-rw-r--r--llvm/lib/Target/RISCV/RISCVSubtarget.h100
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetMachine.cpp84
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetMachine.h10
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp231
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h106
-rw-r--r--llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVAsmBackend.cpp63
-rw-r--r--llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.cpp1072
-rw-r--r--llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.h739
-rw-r--r--llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp556
-rw-r--r--llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.h94
-rw-r--r--llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCAsmInfo.cpp34
-rw-r--r--llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCAsmInfo.h29
-rw-r--r--llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCCodeEmitter.cpp132
-rw-r--r--llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.cpp102
-rw-r--r--llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.h52
-rw-r--r--llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVObjectTargetWriter.cpp25
-rw-r--r--llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.cpp18
-rw-r--r--llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.h28
-rw-r--r--llvm/lib/Target/SPIRV/SPIRV.h34
-rw-r--r--llvm/lib/Target/SPIRV/SPIRV.td43
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp348
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp223
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVCallLowering.h50
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp433
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVEnums.td51
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVFrameLowering.h39
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp459
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h174
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp45
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVISelLowering.h47
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVInstrFormats.td31
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp195
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVInstrInfo.h54
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVInstrInfo.td732
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp1268
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp301
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.h36
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVMCInstLower.cpp58
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVMCInstLower.h29
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp250
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h137
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp440
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVRegisterBankInfo.cpp47
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVRegisterBankInfo.h38
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVRegisterBanks.td15
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVRegisterInfo.cpp32
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVRegisterInfo.h36
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVRegisterInfo.td39
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp68
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVSubtarget.h93
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp186
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVTargetMachine.h47
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVTargetObjectFile.h45
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h44
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVUtils.cpp207
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVUtils.h83
-rw-r--r--llvm/lib/Target/SPIRV/TargetInfo/SPIRVTargetInfo.cpp28
-rw-r--r--llvm/lib/Target/SPIRV/TargetInfo/SPIRVTargetInfo.h21
-rw-r--r--llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp124
-rw-r--r--llvm/lib/Target/Sparc/DelaySlotFiller.cpp11
-rw-r--r--llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp185
-rw-r--r--llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp23
-rw-r--r--llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp7
-rw-r--r--llvm/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h12
-rw-r--r--llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp15
-rw-r--r--llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp16
-rw-r--r--llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h7
-rw-r--r--llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h1
-rw-r--r--llvm/lib/Target/Sparc/SparcCallingConv.td2
-rw-r--r--llvm/lib/Target/Sparc/SparcFrameLowering.cpp31
-rw-r--r--llvm/lib/Target/Sparc/SparcISelLowering.cpp172
-rw-r--r--llvm/lib/Target/Sparc/SparcISelLowering.h10
-rw-r--r--llvm/lib/Target/Sparc/SparcInstr64Bit.td23
-rw-r--r--llvm/lib/Target/Sparc/SparcInstrInfo.td123
-rw-r--r--llvm/lib/Target/Sparc/SparcMachineFunctionInfo.cpp7
-rw-r--r--llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h5
-rw-r--r--llvm/lib/Target/Sparc/SparcTargetMachine.cpp4
-rw-r--r--llvm/lib/Target/Sparc/SparcTargetObjectFile.h2
-rw-r--r--llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp12
-rw-r--r--llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp139
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp10
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp3
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h1
-rw-r--r--llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp325
-rw-r--r--llvm/lib/Target/SystemZ/SystemZAsmPrinter.h21
-rw-r--r--llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp2
-rw-r--r--llvm/lib/Target/SystemZ/SystemZElimCompare.cpp16
-rw-r--r--llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp90
-rw-r--r--llvm/lib/Target/SystemZ/SystemZFrameLowering.h3
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp9
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelLowering.cpp443
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelLowering.h42
-rw-r--r--llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp49
-rw-r--r--llvm/lib/Target/SystemZ/SystemZInstrInfo.h9
-rw-r--r--llvm/lib/Target/SystemZ/SystemZInstrInfo.td32
-rw-r--r--llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp2
-rw-r--r--llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp6
-rw-r--r--llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h5
-rw-r--r--llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp3
-rw-r--r--llvm/lib/Target/SystemZ/SystemZProcessors.td3
-rw-r--r--llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp5
-rw-r--r--llvm/lib/Target/SystemZ/SystemZRegisterInfo.h6
-rw-r--r--llvm/lib/Target/SystemZ/SystemZSchedule.td4
-rw-r--r--llvm/lib/Target/SystemZ/SystemZScheduleZ13.td6
-rw-r--r--llvm/lib/Target/SystemZ/SystemZScheduleZ14.td6
-rw-r--r--llvm/lib/Target/SystemZ/SystemZScheduleZ15.td6
-rw-r--r--llvm/lib/Target/SystemZ/SystemZScheduleZ16.td1728
-rw-r--r--llvm/lib/Target/SystemZ/SystemZScheduleZ196.td6
-rw-r--r--llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td6
-rw-r--r--llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp2
-rw-r--r--llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h2
-rw-r--r--llvm/lib/Target/SystemZ/SystemZShortenInst.cpp14
-rw-r--r--llvm/lib/Target/SystemZ/SystemZSubtarget.cpp20
-rw-r--r--llvm/lib/Target/SystemZ/SystemZSubtarget.h4
-rw-r--r--llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp12
-rw-r--r--llvm/lib/Target/SystemZ/SystemZTargetMachine.h2
-rw-r--r--llvm/lib/Target/SystemZ/SystemZTargetStreamer.h1
-rw-r--r--llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp49
-rw-r--r--llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h8
-rw-r--r--llvm/lib/Target/TargetIntrinsicInfo.cpp8
-rw-r--r--llvm/lib/Target/TargetLoweringObjectFile.cpp2
-rw-r--r--llvm/lib/Target/TargetMachine.cpp17
-rw-r--r--llvm/lib/Target/TargetMachineC.cpp8
-rw-r--r--llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp1
-rw-r--r--llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp126
-rw-r--r--llvm/lib/Target/VE/MCTargetDesc/VEELFObjectWriter.cpp2
-rw-r--r--llvm/lib/Target/VE/MCTargetDesc/VEFixupKinds.h16
-rw-r--r--llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp1
-rw-r--r--llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.cpp1
-rw-r--r--llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h3
-rw-r--r--llvm/lib/Target/VE/VE.h4
-rw-r--r--llvm/lib/Target/VE/VECustomDAG.cpp514
-rw-r--r--llvm/lib/Target/VE/VECustomDAG.h144
-rw-r--r--llvm/lib/Target/VE/VEISelDAGToDAG.cpp37
-rw-r--r--llvm/lib/Target/VE/VEISelLowering.cpp281
-rw-r--r--llvm/lib/Target/VE/VEISelLowering.h29
-rw-r--r--llvm/lib/Target/VE/VEInstrInfo.cpp7
-rw-r--r--llvm/lib/Target/VE/VEInstrInfo.td50
-rw-r--r--llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td54
-rw-r--r--llvm/lib/Target/VE/VEInstrIntrinsicVL.td3
-rw-r--r--llvm/lib/Target/VE/VEInstrPatternsVec.td43
-rw-r--r--llvm/lib/Target/VE/VEMachineFunctionInfo.cpp7
-rw-r--r--llvm/lib/Target/VE/VEMachineFunctionInfo.h5
-rw-r--r--llvm/lib/Target/VE/VERegisterInfo.td4
-rw-r--r--llvm/lib/Target/VE/VETargetMachine.cpp7
-rw-r--r--llvm/lib/Target/VE/VETargetMachine.h2
-rw-r--r--llvm/lib/Target/VE/VETargetTransformInfo.h66
-rw-r--r--llvm/lib/Target/VE/VVPISelLowering.cpp443
-rw-r--r--llvm/lib/Target/VE/VVPInstrInfo.td111
-rw-r--r--llvm/lib/Target/VE/VVPInstrPatternsVec.td358
-rw-r--r--llvm/lib/Target/VE/VVPNodes.def89
-rw-r--r--llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp18
-rw-r--r--llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp95
-rw-r--r--llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h9
-rw-r--r--llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp2
-rw-r--r--llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp2
-rw-r--r--llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp1
-rw-r--r--llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp6
-rw-r--r--llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h5
-rw-r--r--llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h4
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssembly.h4
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssembly.td4
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp207
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h4
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp2
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp1
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp2
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp54
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp46
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h4
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td22
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td16
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td16
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td8
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td6
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td131
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp4
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp74
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyMCLowerPrePass.cpp3
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp14
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h13
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyNullifyDebugValueLists.cpp1
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp7
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp126
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp3
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp2
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h1
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h1
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp42
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h2
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp4
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h2
-rw-r--r--llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp513
-rw-r--r--llvm/lib/Target/X86/AsmParser/X86Operand.h36
-rw-r--r--llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp77
-rw-r--r--llvm/lib/Target/X86/MCA/X86CustomBehaviour.h2
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp8
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp105
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp22
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h3
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86InstrRelaxTables.cpp165
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86InstrRelaxTables.h54
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp4
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp134
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86MCExpr.h1
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp91
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h26
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86MnemonicTables.cpp (renamed from llvm/include/llvm/IR/AttributesAMDGPU.td)10
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp21
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp1
-rw-r--r--llvm/lib/Target/X86/X86.h4
-rw-r--r--llvm/lib/Target/X86/X86.td279
-rw-r--r--llvm/lib/Target/X86/X86AsmPrinter.cpp96
-rw-r--r--llvm/lib/Target/X86/X86AsmPrinter.h5
-rw-r--r--llvm/lib/Target/X86/X86AvoidTrailingCall.cpp7
-rw-r--r--llvm/lib/Target/X86/X86CallingConv.cpp2
-rw-r--r--llvm/lib/Target/X86/X86CmovConversion.cpp27
-rw-r--r--llvm/lib/Target/X86/X86DiscriminateMemOps.cpp3
-rw-r--r--llvm/lib/Target/X86/X86DomainReassignment.cpp14
-rw-r--r--llvm/lib/Target/X86/X86ExpandPseudo.cpp11
-rw-r--r--llvm/lib/Target/X86/X86FastISel.cpp133
-rw-r--r--llvm/lib/Target/X86/X86FastPreTileConfig.cpp709
-rw-r--r--llvm/lib/Target/X86/X86FastTileConfig.cpp293
-rw-r--r--llvm/lib/Target/X86/X86FixupLEAs.cpp3
-rw-r--r--llvm/lib/Target/X86/X86FloatingPoint.cpp26
-rw-r--r--llvm/lib/Target/X86/X86FrameLowering.cpp136
-rw-r--r--llvm/lib/Target/X86/X86FrameLowering.h7
-rw-r--r--llvm/lib/Target/X86/X86ISelDAGToDAG.cpp282
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp3225
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h58
-rw-r--r--llvm/lib/Target/X86/X86IndirectThunks.cpp1
-rw-r--r--llvm/lib/Target/X86/X86InsertPrefetch.cpp1
-rw-r--r--llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp49
-rw-r--r--llvm/lib/Target/X86/X86InstrAMX.td18
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td131
-rw-r--r--llvm/lib/Target/X86/X86InstrArithmetic.td8
-rw-r--r--llvm/lib/Target/X86/X86InstrCMovSetCC.td8
-rw-r--r--llvm/lib/Target/X86/X86InstrCompiler.td85
-rw-r--r--llvm/lib/Target/X86/X86InstrControl.td4
-rw-r--r--llvm/lib/Target/X86/X86InstrFPStack.td22
-rw-r--r--llvm/lib/Target/X86/X86InstrFoldTables.cpp4
-rw-r--r--llvm/lib/Target/X86/X86InstrFormats.td6
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td1
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp851
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.h18
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.td111
-rw-r--r--llvm/lib/Target/X86/X86InstrMMX.td4
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td68
-rw-r--r--llvm/lib/Target/X86/X86InstrSystem.td16
-rw-r--r--llvm/lib/Target/X86/X86InstrTSX.td2
-rw-r--r--llvm/lib/Target/X86/X86InstrVecCompiler.td6
-rw-r--r--llvm/lib/Target/X86/X86InstrXOP.td4
-rw-r--r--llvm/lib/Target/X86/X86InstructionSelector.cpp16
-rw-r--r--llvm/lib/Target/X86/X86IntrinsicsInfo.h12
-rw-r--r--llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp3
-rw-r--r--llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp1
-rw-r--r--llvm/lib/Target/X86/X86LowerAMXType.cpp181
-rw-r--r--llvm/lib/Target/X86/X86MCInstLower.cpp41
-rw-r--r--llvm/lib/Target/X86/X86MachineFunctionInfo.cpp7
-rw-r--r--llvm/lib/Target/X86/X86MachineFunctionInfo.h10
-rw-r--r--llvm/lib/Target/X86/X86MacroFusion.cpp1
-rw-r--r--llvm/lib/Target/X86/X86PadShortFunction.cpp11
-rw-r--r--llvm/lib/Target/X86/X86PartialReduction.cpp35
-rw-r--r--llvm/lib/Target/X86/X86PreAMXConfig.cpp56
-rw-r--r--llvm/lib/Target/X86/X86PreTileConfig.cpp53
-rw-r--r--llvm/lib/Target/X86/X86RegisterBankInfo.cpp7
-rw-r--r--llvm/lib/Target/X86/X86RegisterBankInfo.h2
-rw-r--r--llvm/lib/Target/X86/X86RegisterInfo.cpp62
-rw-r--r--llvm/lib/Target/X86/X86RegisterInfo.h12
-rw-r--r--llvm/lib/Target/X86/X86RegisterInfo.td15
-rw-r--r--llvm/lib/Target/X86/X86SchedBroadwell.td20
-rw-r--r--llvm/lib/Target/X86/X86SchedHaswell.td20
-rw-r--r--llvm/lib/Target/X86/X86SchedIceLake.td20
-rw-r--r--llvm/lib/Target/X86/X86SchedSandyBridge.td40
-rw-r--r--llvm/lib/Target/X86/X86SchedSkylakeClient.td26
-rw-r--r--llvm/lib/Target/X86/X86SchedSkylakeServer.td32
-rw-r--r--llvm/lib/Target/X86/X86ScheduleBtVer2.td4
-rw-r--r--llvm/lib/Target/X86/X86ScheduleSLM.td6
-rw-r--r--llvm/lib/Target/X86/X86ScheduleZnver1.td106
-rw-r--r--llvm/lib/Target/X86/X86ScheduleZnver2.td86
-rw-r--r--llvm/lib/Target/X86/X86SelectionDAGInfo.cpp39
-rw-r--r--llvm/lib/Target/X86/X86SelectionDAGInfo.h2
-rw-r--r--llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp31
-rw-r--r--llvm/lib/Target/X86/X86Subtarget.cpp12
-rw-r--r--llvm/lib/Target/X86/X86Subtarget.h629
-rw-r--r--llvm/lib/Target/X86/X86TargetMachine.cpp51
-rw-r--r--llvm/lib/Target/X86/X86TargetMachine.h2
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp290
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.h21
-rw-r--r--llvm/lib/Target/X86/X86TileConfig.cpp15
-rw-r--r--llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp286
-rw-r--r--llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.h3
-rw-r--r--llvm/lib/Target/XCore/XCore.h1
-rw-r--r--llvm/lib/Target/XCore/XCoreAsmPrinter.cpp2
-rw-r--r--llvm/lib/Target/XCore/XCoreISelLowering.cpp36
-rw-r--r--llvm/lib/Target/XCore/XCoreInstrInfo.td2
-rw-r--r--llvm/lib/Target/XCore/XCoreMachineFunctionInfo.cpp7
-rw-r--r--llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h5
-rw-r--r--llvm/lib/Target/XCore/XCoreTargetMachine.cpp4
-rw-r--r--llvm/lib/Target/XCore/XCoreTargetMachine.h4
-rw-r--r--llvm/lib/Testing/Support/Annotations.cpp4
-rw-r--r--llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp28
-rw-r--r--llvm/lib/ToolDrivers/llvm-lib/Options.td16
-rw-r--r--llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp92
-rw-r--r--llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h46
-rw-r--r--llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp86
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroCleanup.cpp81
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroConditionalWrapper.cpp24
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroEarly.cpp79
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroElide.cpp125
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroFrame.cpp177
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroInternal.h47
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroSplit.cpp377
-rw-r--r--llvm/lib/Transforms/Coroutines/Coroutines.cpp193
-rw-r--r--llvm/lib/Transforms/IPO/AlwaysInliner.cpp47
-rw-r--r--llvm/lib/Transforms/IPO/ArgumentPromotion.cpp1139
-rw-r--r--llvm/lib/Transforms/IPO/Attributor.cpp462
-rw-r--r--llvm/lib/Transforms/IPO/AttributorAttributes.cpp2008
-rw-r--r--llvm/lib/Transforms/IPO/BlockExtractor.cpp11
-rw-r--r--llvm/lib/Transforms/IPO/CalledValuePropagation.cpp6
-rw-r--r--llvm/lib/Transforms/IPO/ConstantMerge.cpp2
-rw-r--r--llvm/lib/Transforms/IPO/CrossDSOCFI.cpp5
-rw-r--r--llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp578
-rw-r--r--llvm/lib/Transforms/IPO/ExtractGV.cpp1
-rw-r--r--llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp2
-rw-r--r--llvm/lib/Transforms/IPO/FunctionAttrs.cpp175
-rw-r--r--llvm/lib/Transforms/IPO/FunctionImport.cpp19
-rw-r--r--llvm/lib/Transforms/IPO/FunctionSpecialization.cpp501
-rw-r--r--llvm/lib/Transforms/IPO/GlobalDCE.cpp34
-rw-r--r--llvm/lib/Transforms/IPO/GlobalOpt.cpp177
-rw-r--r--llvm/lib/Transforms/IPO/GlobalSplit.cpp4
-rw-r--r--llvm/lib/Transforms/IPO/HotColdSplitting.cpp26
-rw-r--r--llvm/lib/Transforms/IPO/IPO.cpp5
-rw-r--r--llvm/lib/Transforms/IPO/IROutliner.cpp326
-rw-r--r--llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp5
-rw-r--r--llvm/lib/Transforms/IPO/InlineSimple.cpp8
-rw-r--r--llvm/lib/Transforms/IPO/Inliner.cpp111
-rw-r--r--llvm/lib/Transforms/IPO/Internalize.cpp3
-rw-r--r--llvm/lib/Transforms/IPO/LoopExtractor.cpp5
-rw-r--r--llvm/lib/Transforms/IPO/LowerTypeTests.cpp21
-rw-r--r--llvm/lib/Transforms/IPO/MergeFunctions.cpp48
-rw-r--r--llvm/lib/Transforms/IPO/ModuleInliner.cpp25
-rw-r--r--llvm/lib/Transforms/IPO/OpenMPOpt.cpp255
-rw-r--r--llvm/lib/Transforms/IPO/PartialInlining.cpp16
-rw-r--r--llvm/lib/Transforms/IPO/PassManagerBuilder.cpp295
-rw-r--r--llvm/lib/Transforms/IPO/PruneEH.cpp5
-rw-r--r--llvm/lib/Transforms/IPO/SCCP.cpp1
-rw-r--r--llvm/lib/Transforms/IPO/SampleContextTracker.cpp123
-rw-r--r--llvm/lib/Transforms/IPO/SampleProfile.cpp293
-rw-r--r--llvm/lib/Transforms/IPO/SampleProfileProbe.cpp10
-rw-r--r--llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp10
-rw-r--r--llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp8
-rw-r--r--llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp134
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp115
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp1037
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp1
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp383
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp185
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp874
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineInternal.h41
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp13
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp344
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp14
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp127
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp913
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp164
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp202
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp157
-rw-r--r--llvm/lib/Transforms/InstCombine/InstructionCombining.cpp533
-rw-r--r--llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp400
-rw-r--r--llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp5
-rw-r--r--llvm/lib/Transforms/Instrumentation/CGProfile.cpp3
-rw-r--r--llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp78
-rw-r--r--llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp237
-rw-r--r--llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp59
-rw-r--r--llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp521
-rw-r--r--llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp73
-rw-r--r--llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp9
-rw-r--r--llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp59
-rw-r--r--llvm/lib/Transforms/Instrumentation/Instrumentation.cpp10
-rw-r--r--llvm/lib/Transforms/Instrumentation/MaximumSpanningTree.h109
-rw-r--r--llvm/lib/Transforms/Instrumentation/MemProfiler.cpp54
-rw-r--r--llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp155
-rw-r--r--llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp176
-rw-r--r--llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp65
-rw-r--r--llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp6
-rw-r--r--llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp23
-rw-r--r--llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp84
-rw-r--r--llvm/lib/Transforms/Instrumentation/ValueProfileCollector.cpp7
-rw-r--r--llvm/lib/Transforms/Instrumentation/ValueProfileCollector.h2
-rw-r--r--llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc1
-rw-r--r--llvm/lib/Transforms/ObjCARC/ObjCARC.cpp1
-rw-r--r--llvm/lib/Transforms/ObjCARC/ObjCARC.h1
-rw-r--r--llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp5
-rw-r--r--llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp9
-rw-r--r--llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/ADCE.cpp1
-rw-r--r--llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp7
-rw-r--r--llvm/lib/Transforms/Scalar/AnnotationRemarks.cpp3
-rw-r--r--llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp13
-rw-r--r--llvm/lib/Transforms/Scalar/ConstantHoisting.cpp1
-rw-r--r--llvm/lib/Transforms/Scalar/ConstraintElimination.cpp754
-rw-r--r--llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp110
-rw-r--r--llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp157
-rw-r--r--llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp138
-rw-r--r--llvm/lib/Transforms/Scalar/EarlyCSE.cpp25
-rw-r--r--llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/Float2Int.cpp207
-rw-r--r--llvm/lib/Transforms/Scalar/GVN.cpp231
-rw-r--r--llvm/lib/Transforms/Scalar/GVNHoist.cpp16
-rw-r--r--llvm/lib/Transforms/Scalar/GVNSink.cpp30
-rw-r--r--llvm/lib/Transforms/Scalar/GuardWidening.cpp3
-rw-r--r--llvm/lib/Transforms/Scalar/IVUsersPrinter.cpp1
-rw-r--r--llvm/lib/Transforms/Scalar/IndVarSimplify.cpp15
-rw-r--r--llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp34
-rw-r--r--llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp72
-rw-r--r--llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp6
-rw-r--r--llvm/lib/Transforms/Scalar/JumpThreading.cpp142
-rw-r--r--llvm/lib/Transforms/Scalar/LICM.cpp482
-rw-r--r--llvm/lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp1
-rw-r--r--llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp26
-rw-r--r--llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp18
-rw-r--r--llvm/lib/Transforms/Scalar/LoopDeletion.cpp12
-rw-r--r--llvm/lib/Transforms/Scalar/LoopDistribute.cpp15
-rw-r--r--llvm/lib/Transforms/Scalar/LoopFlatten.cpp14
-rw-r--r--llvm/lib/Transforms/Scalar/LoopFuse.cpp15
-rw-r--r--llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp64
-rw-r--r--llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp13
-rw-r--r--llvm/lib/Transforms/Scalar/LoopInterchange.cpp200
-rw-r--r--llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp14
-rw-r--r--llvm/lib/Transforms/Scalar/LoopPassManager.cpp10
-rw-r--r--llvm/lib/Transforms/Scalar/LoopPredication.cpp5
-rw-r--r--llvm/lib/Transforms/Scalar/LoopRerollPass.cpp11
-rw-r--r--llvm/lib/Transforms/Scalar/LoopRotation.cpp13
-rw-r--r--llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp26
-rw-r--r--llvm/lib/Transforms/Scalar/LoopSink.cpp91
-rw-r--r--llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp664
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp30
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp57
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnswitch.cpp1774
-rw-r--r--llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/LowerAtomicPass.cpp (renamed from llvm/lib/Transforms/Scalar/LowerAtomic.cpp)84
-rw-r--r--llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp18
-rw-r--r--llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp12
-rw-r--r--llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp11
-rw-r--r--llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp57
-rw-r--r--llvm/lib/Transforms/Scalar/LowerWidenableCondition.cpp13
-rw-r--r--llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp187
-rw-r--r--llvm/lib/Transforms/Scalar/MergeICmps.cpp59
-rw-r--r--llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp6
-rw-r--r--llvm/lib/Transforms/Scalar/NewGVN.cpp46
-rw-r--r--llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp5
-rw-r--r--llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp3
-rw-r--r--llvm/lib/Transforms/Scalar/Reassociate.cpp7
-rw-r--r--llvm/lib/Transforms/Scalar/Reg2Mem.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp489
-rw-r--r--llvm/lib/Transforms/Scalar/SCCP.cpp105
-rw-r--r--llvm/lib/Transforms/Scalar/SROA.cpp75
-rw-r--r--llvm/lib/Transforms/Scalar/Scalar.cpp9
-rw-r--r--llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp7
-rw-r--r--llvm/lib/Transforms/Scalar/Scalarizer.cpp103
-rw-r--r--llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp1
-rw-r--r--llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp121
-rw-r--r--llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp14
-rw-r--r--llvm/lib/Transforms/Scalar/Sink.cpp7
-rw-r--r--llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp6
-rw-r--r--llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp19
-rw-r--r--llvm/lib/Transforms/Scalar/StructurizeCFG.cpp67
-rw-r--r--llvm/lib/Transforms/Scalar/TLSVariableHoist.cpp306
-rw-r--r--llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp15
-rw-r--r--llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp3
-rw-r--r--llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp1
-rw-r--r--llvm/lib/Transforms/Utils/AddDiscriminators.cpp4
-rw-r--r--llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp1
-rw-r--r--llvm/lib/Transforms/Utils/BasicBlockUtils.cpp8
-rw-r--r--llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp15
-rw-r--r--llvm/lib/Transforms/Utils/BuildLibCalls.cpp406
-rw-r--r--llvm/lib/Transforms/Utils/CallGraphUpdater.cpp3
-rw-r--r--llvm/lib/Transforms/Utils/CallPromotionUtils.cpp4
-rw-r--r--llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp3
-rw-r--r--llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp1
-rw-r--r--llvm/lib/Transforms/Utils/CloneFunction.cpp106
-rw-r--r--llvm/lib/Transforms/Utils/CloneModule.cpp5
-rw-r--r--llvm/lib/Transforms/Utils/CodeExtractor.cpp24
-rw-r--r--llvm/lib/Transforms/Utils/CodeLayout.cpp28
-rw-r--r--llvm/lib/Transforms/Utils/CtorUtils.cpp65
-rw-r--r--llvm/lib/Transforms/Utils/Debugify.cpp184
-rw-r--r--llvm/lib/Transforms/Utils/DemoteRegToStack.cpp3
-rw-r--r--llvm/lib/Transforms/Utils/Evaluator.cpp109
-rw-r--r--llvm/lib/Transforms/Utils/FixIrreducible.cpp9
-rw-r--r--llvm/lib/Transforms/Utils/FunctionImportUtils.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/GlobalStatus.cpp32
-rw-r--r--llvm/lib/Transforms/Utils/InjectTLIMappings.cpp1
-rw-r--r--llvm/lib/Transforms/Utils/InlineFunction.cpp77
-rw-r--r--llvm/lib/Transforms/Utils/IntegerDivision.cpp1
-rw-r--r--llvm/lib/Transforms/Utils/LCSSA.cpp3
-rw-r--r--llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/Local.cpp77
-rw-r--r--llvm/lib/Transforms/Utils/LoopPeel.cpp122
-rw-r--r--llvm/lib/Transforms/Utils/LoopRotationUtils.cpp19
-rw-r--r--llvm/lib/Transforms/Utils/LoopSimplify.cpp9
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnroll.cpp4
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp4
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp47
-rw-r--r--llvm/lib/Transforms/Utils/LoopUtils.cpp85
-rw-r--r--llvm/lib/Transforms/Utils/LoopVersioning.cpp7
-rw-r--r--llvm/lib/Transforms/Utils/LowerAtomic.cpp93
-rw-r--r--llvm/lib/Transforms/Utils/LowerGlobalDtors.cpp (renamed from llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp)65
-rw-r--r--llvm/lib/Transforms/Utils/LowerInvoke.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp217
-rw-r--r--llvm/lib/Transforms/Utils/LowerSwitch.cpp43
-rw-r--r--llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp195
-rw-r--r--llvm/lib/Transforms/Utils/MisExpect.cpp249
-rw-r--r--llvm/lib/Transforms/Utils/ModuleUtils.cpp12
-rw-r--r--llvm/lib/Transforms/Utils/PredicateInfo.cpp8
-rw-r--r--llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp8
-rw-r--r--llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp27
-rw-r--r--llvm/lib/Transforms/Utils/SCCPSolver.cpp204
-rw-r--r--llvm/lib/Transforms/Utils/SSAUpdater.cpp3
-rw-r--r--llvm/lib/Transforms/Utils/SampleProfileInference.cpp394
-rw-r--r--llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp10
-rw-r--r--llvm/lib/Transforms/Utils/SanitizerStats.cpp1
-rw-r--r--llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp258
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp818
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyIndVar.cpp18
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp665
-rw-r--r--llvm/lib/Transforms/Utils/SizeOpts.cpp4
-rw-r--r--llvm/lib/Transforms/Utils/StripGCRelocates.cpp4
-rw-r--r--llvm/lib/Transforms/Utils/SymbolRewriter.cpp1
-rw-r--r--llvm/lib/Transforms/Utils/UnifyLoopExits.cpp48
-rw-r--r--llvm/lib/Transforms/Utils/Utils.cpp1
-rw-r--r--llvm/lib/Transforms/Utils/VNCoercion.cpp124
-rw-r--r--llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp19
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp133
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h22
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp2149
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp4364
-rw-r--r--llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h12
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp1161
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h592
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp135
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h10
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanLoopInfo.h44
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp248
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanPredicator.h74
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp840
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanSLP.cpp15
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp114
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.h16
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanValue.h24
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp55
-rw-r--r--llvm/lib/Transforms/Vectorize/VectorCombine.cpp379
-rw-r--r--llvm/lib/Transforms/Vectorize/Vectorize.cpp1
-rw-r--r--llvm/lib/WindowsDriver/MSVCPaths.cpp719
-rw-r--r--llvm/lib/WindowsManifest/WindowsManifestMerger.cpp4
-rw-r--r--llvm/lib/XRay/FDRTraceWriter.cpp2
-rw-r--r--llvm/tools/bugpoint/CrashDebugger.cpp4
-rw-r--r--llvm/tools/bugpoint/ExecutionDriver.cpp8
-rw-r--r--llvm/tools/bugpoint/OptimizerDriver.cpp2
-rw-r--r--llvm/tools/bugpoint/bugpoint.cpp12
-rw-r--r--llvm/tools/llc/llc.cpp42
-rw-r--r--llvm/tools/lli/lli.cpp158
-rw-r--r--llvm/tools/llvm-ar/llvm-ar.cpp169
-rw-r--r--llvm/tools/llvm-cov/CodeCoverage.cpp68
-rw-r--r--llvm/tools/llvm-cov/CoverageViewOptions.h2
-rw-r--r--llvm/tools/llvm-cov/SourceCoverageViewHTML.cpp42
-rw-r--r--llvm/tools/llvm-cov/TestingSupport.cpp1
-rw-r--r--llvm/tools/llvm-cxxdump/llvm-cxxdump.cpp2
-rw-r--r--llvm/tools/llvm-cxxfilt/Opts.td2
-rw-r--r--llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp2
-rw-r--r--llvm/tools/llvm-dis/llvm-dis.cpp12
-rw-r--r--llvm/tools/llvm-dwarfdump/Statistics.cpp18
-rw-r--r--llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp93
-rw-r--r--llvm/tools/llvm-dwp/llvm-dwp.cpp17
-rw-r--r--llvm/tools/llvm-extract/llvm-extract.cpp21
-rw-r--r--llvm/tools/llvm-link/llvm-link.cpp24
-rw-r--r--llvm/tools/llvm-lto/llvm-lto.cpp16
-rw-r--r--llvm/tools/llvm-lto2/llvm-lto2.cpp57
-rw-r--r--llvm/tools/llvm-mc/llvm-mc.cpp4
-rw-r--r--llvm/tools/llvm-mca/CodeRegionGenerator.cpp9
-rw-r--r--llvm/tools/llvm-mca/Views/InstructionInfoView.cpp4
-rw-r--r--llvm/tools/llvm-mca/Views/InstructionView.h3
-rw-r--r--llvm/tools/llvm-mca/Views/SchedulerStatistics.cpp8
-rw-r--r--llvm/tools/llvm-mca/llvm-mca.cpp31
-rw-r--r--llvm/tools/llvm-modextract/llvm-modextract.cpp1
-rw-r--r--llvm/tools/llvm-nm/Opts.td11
-rw-r--r--llvm/tools/llvm-nm/llvm-nm.cpp956
-rw-r--r--llvm/tools/llvm-objcopy/BitcodeStripOpts.td8
-rw-r--r--llvm/tools/llvm-objcopy/ObjcopyOptions.cpp (renamed from llvm/tools/llvm-objcopy/ConfigManager.cpp)222
-rw-r--r--llvm/tools/llvm-objcopy/ObjcopyOptions.h (renamed from llvm/tools/llvm-objcopy/ConfigManager.h)38
-rw-r--r--llvm/tools/llvm-objcopy/ObjcopyOpts.td6
-rw-r--r--llvm/tools/llvm-objcopy/llvm-objcopy.cpp227
-rw-r--r--llvm/tools/llvm-objdump/COFFDump.cpp32
-rw-r--r--llvm/tools/llvm-objdump/ELFDump.cpp8
-rw-r--r--llvm/tools/llvm-objdump/MachODump.cpp69
-rw-r--r--llvm/tools/llvm-objdump/MachODump.h1
-rw-r--r--llvm/tools/llvm-objdump/ObjdumpOpts.td9
-rw-r--r--llvm/tools/llvm-objdump/OffloadDump.cpp102
-rw-r--r--llvm/tools/llvm-objdump/OffloadDump.h22
-rw-r--r--llvm/tools/llvm-objdump/OtoolOpts.td1
-rw-r--r--llvm/tools/llvm-objdump/SourcePrinter.cpp2
-rw-r--r--llvm/tools/llvm-objdump/SourcePrinter.h1
-rw-r--r--llvm/tools/llvm-objdump/XCOFFDump.cpp2
-rw-r--r--llvm/tools/llvm-objdump/llvm-objdump.cpp125
-rw-r--r--llvm/tools/llvm-pdbutil/BytesOutputStyle.cpp12
-rw-r--r--llvm/tools/llvm-pdbutil/BytesOutputStyle.h2
-rw-r--r--llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp398
-rw-r--r--llvm/tools/llvm-pdbutil/DumpOutputStyle.h2
-rw-r--r--llvm/tools/llvm-pdbutil/ExplainOutputStyle.cpp9
-rw-r--r--llvm/tools/llvm-pdbutil/ExplainOutputStyle.h3
-rw-r--r--llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp10
-rw-r--r--llvm/tools/llvm-pdbutil/MinimalTypeDumper.cpp7
-rw-r--r--llvm/tools/llvm-pdbutil/OutputStyle.h5
-rw-r--r--llvm/tools/llvm-pdbutil/PrettyBuiltinDumper.cpp4
-rw-r--r--llvm/tools/llvm-pdbutil/PrettyClassDefinitionDumper.cpp3
-rw-r--r--llvm/tools/llvm-pdbutil/PrettyClassLayoutGraphicalDumper.cpp3
-rw-r--r--llvm/tools/llvm-pdbutil/PrettyCompilandDumper.cpp1
-rw-r--r--llvm/tools/llvm-pdbutil/PrettyEnumDumper.cpp3
-rw-r--r--llvm/tools/llvm-pdbutil/PrettyExternalSymbolDumper.cpp3
-rw-r--r--llvm/tools/llvm-pdbutil/PrettyFunctionDumper.cpp5
-rw-r--r--llvm/tools/llvm-pdbutil/PrettyTypeDumper.cpp4
-rw-r--r--llvm/tools/llvm-pdbutil/PrettyTypedefDumper.cpp4
-rw-r--r--llvm/tools/llvm-pdbutil/PrettyVariableDumper.cpp6
-rw-r--r--llvm/tools/llvm-pdbutil/StreamUtil.cpp4
-rw-r--r--llvm/tools/llvm-pdbutil/TypeReferenceTracker.cpp6
-rw-r--r--llvm/tools/llvm-pdbutil/TypeReferenceTracker.h3
-rw-r--r--llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp145
-rw-r--r--llvm/tools/llvm-pdbutil/llvm-pdbutil.h3
-rw-r--r--llvm/tools/llvm-profdata/llvm-profdata.cpp107
-rw-r--r--llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp176
-rw-r--r--llvm/tools/llvm-readobj/ARMWinEHPrinter.h3
-rw-r--r--llvm/tools/llvm-readobj/ELFDumper.cpp196
-rw-r--r--llvm/tools/llvm-readobj/MachODumper.cpp53
-rw-r--r--llvm/tools/llvm-readobj/ObjDumper.h58
-rw-r--r--llvm/tools/llvm-readobj/Opts.td3
-rw-r--r--llvm/tools/llvm-readobj/WasmDumper.cpp14
-rw-r--r--llvm/tools/llvm-readobj/XCOFFDumper.cpp253
-rw-r--r--llvm/tools/llvm-readobj/llvm-readobj.cpp53
-rw-r--r--llvm/tools/llvm-readobj/llvm-readobj.h5
-rw-r--r--llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp19
-rw-r--r--llvm/tools/llvm-sim/llvm-sim.cpp5
-rw-r--r--llvm/tools/llvm-stress/llvm-stress.cpp101
-rw-r--r--llvm/tools/llvm-strings/llvm-strings.cpp3
-rw-r--r--llvm/tools/llvm-symbolizer/Opts.td6
-rw-r--r--llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp210
-rw-r--r--llvm/tools/llvm-tapi-diff/llvm-tapi-diff.cpp1
-rw-r--r--llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp1
-rw-r--r--llvm/tools/llvm-xray/func-id-helper.cpp1
-rw-r--r--llvm/tools/llvm-xray/func-id-helper.h1
-rw-r--r--llvm/tools/llvm-xray/xray-graph-diff.cpp1
-rw-r--r--llvm/tools/opt/NewPMDriver.cpp70
-rw-r--r--llvm/tools/opt/NewPMDriver.h5
-rw-r--r--llvm/tools/opt/PassPrinters.cpp212
-rw-r--r--llvm/tools/opt/PassPrinters.h40
-rw-r--r--llvm/tools/opt/opt.cpp122
-rw-r--r--llvm/utils/TableGen/AsmMatcherEmitter.cpp3
-rw-r--r--llvm/utils/TableGen/AsmWriterEmitter.cpp23
-rw-r--r--llvm/utils/TableGen/AsmWriterInst.cpp1
-rw-r--r--llvm/utils/TableGen/Attributes.cpp3
-rw-r--r--llvm/utils/TableGen/CallingConvEmitter.cpp139
-rw-r--r--llvm/utils/TableGen/CodeBeadsGen.cpp137
-rw-r--r--llvm/utils/TableGen/CodeEmitterGen.cpp250
-rw-r--r--llvm/utils/TableGen/CodeGenDAGPatterns.cpp14
-rw-r--r--llvm/utils/TableGen/CodeGenDAGPatterns.h1
-rw-r--r--llvm/utils/TableGen/CodeGenInstruction.cpp6
-rw-r--r--llvm/utils/TableGen/CodeGenInstruction.h3
-rw-r--r--llvm/utils/TableGen/CodeGenIntrinsics.h5
-rw-r--r--llvm/utils/TableGen/CodeGenMapTable.cpp2
-rw-r--r--llvm/utils/TableGen/CodeGenRegisters.cpp38
-rw-r--r--llvm/utils/TableGen/CodeGenRegisters.h36
-rw-r--r--llvm/utils/TableGen/CodeGenSchedule.cpp1
-rw-r--r--llvm/utils/TableGen/CodeGenSchedule.h3
-rw-r--r--llvm/utils/TableGen/CodeGenTarget.cpp24
-rw-r--r--llvm/utils/TableGen/CodeGenTarget.h7
-rw-r--r--llvm/utils/TableGen/DAGISelEmitter.cpp1
-rw-r--r--llvm/utils/TableGen/DAGISelMatcherEmitter.cpp2
-rw-r--r--llvm/utils/TableGen/DAGISelMatcherGen.cpp3
-rw-r--r--llvm/utils/TableGen/DFAEmitter.cpp4
-rw-r--r--llvm/utils/TableGen/DFAPacketizerEmitter.cpp2
-rw-r--r--llvm/utils/TableGen/DXILEmitter.cpp374
-rw-r--r--llvm/utils/TableGen/DecoderEmitter.cpp (renamed from llvm/utils/TableGen/FixedLenDecoderEmitter.cpp)805
-rw-r--r--llvm/utils/TableGen/DirectiveEmitter.cpp4
-rw-r--r--llvm/utils/TableGen/DisassemblerEmitter.cpp26
-rw-r--r--llvm/utils/TableGen/ExegesisEmitter.cpp4
-rw-r--r--llvm/utils/TableGen/FastISelEmitter.cpp2
-rw-r--r--llvm/utils/TableGen/GICombinerEmitter.cpp17
-rw-r--r--llvm/utils/TableGen/GlobalISel/GIMatchDag.cpp4
-rw-r--r--llvm/utils/TableGen/GlobalISel/GIMatchTree.h4
-rw-r--r--llvm/utils/TableGen/GlobalISelEmitter.cpp23
-rw-r--r--llvm/utils/TableGen/InstrInfoEmitter.cpp51
-rw-r--r--llvm/utils/TableGen/IntrinsicEmitter.cpp81
-rw-r--r--llvm/utils/TableGen/OptParserEmitter.cpp2
-rw-r--r--llvm/utils/TableGen/OptRSTEmitter.cpp29
-rw-r--r--llvm/utils/TableGen/PseudoLoweringEmitter.cpp3
-rw-r--r--llvm/utils/TableGen/RegisterBankEmitter.cpp8
-rw-r--r--llvm/utils/TableGen/RegisterInfoEmitter.cpp120
-rw-r--r--llvm/utils/TableGen/SearchableTableEmitter.cpp4
-rw-r--r--llvm/utils/TableGen/SequenceToOffsetTable.h16
-rw-r--r--llvm/utils/TableGen/SubtargetEmitter.cpp53
-rw-r--r--llvm/utils/TableGen/SubtargetFeatureInfo.cpp66
-rw-r--r--llvm/utils/TableGen/TableGen.cpp20
-rw-r--r--llvm/utils/TableGen/TableGenBackends.h3
-rw-r--r--llvm/utils/TableGen/VarLenCodeEmitterGen.cpp487
-rw-r--r--llvm/utils/TableGen/VarLenCodeEmitterGen.h66
-rw-r--r--llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp18
-rw-r--r--llvm/utils/TableGen/X86DisassemblerTables.cpp34
-rw-r--r--llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp75
-rw-r--r--llvm/utils/TableGen/X86FoldTablesEmitter.cpp266
-rw-r--r--llvm/utils/TableGen/X86MnemonicTables.cpp94
-rw-r--r--llvm/utils/TableGen/X86RecognizableInstr.cpp192
-rw-r--r--llvm/utils/TableGen/X86RecognizableInstr.h77
3334 files changed, 242006 insertions, 89156 deletions
diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h
index 09d80841fa5d..2abc29851cd9 100644
--- a/llvm/include/llvm-c/Core.h
+++ b/llvm/include/llvm-c/Core.h
@@ -549,6 +549,13 @@ LLVMBool LLVMContextShouldDiscardValueNames(LLVMContextRef C);
void LLVMContextSetDiscardValueNames(LLVMContextRef C, LLVMBool Discard);
/**
+ * Set whether the given context is in opaque pointer mode.
+ *
+ * @see LLVMContext::setOpaquePointers()
+ */
+void LLVMContextSetOpaquePointers(LLVMContextRef C, LLVMBool OpaquePointers);
+
+/**
* Destroy a context instance.
*
* This should be called for every call to LLVMContextCreate() or memory
@@ -1391,9 +1398,9 @@ LLVMBool LLVMIsLiteralStruct(LLVMTypeRef StructTy);
*/
/**
- * Obtain the type of elements within a sequential type.
+ * Obtain the element type of an array or vector type.
*
- * This works on array, vector, and pointer types.
+ * This currently also works for pointer types, but this usage is deprecated.
*
* @see llvm::SequentialType::getElementType()
*/
@@ -1443,6 +1450,22 @@ unsigned LLVMGetArrayLength(LLVMTypeRef ArrayTy);
LLVMTypeRef LLVMPointerType(LLVMTypeRef ElementType, unsigned AddressSpace);
/**
+ * Determine whether a pointer is opaque.
+ *
+ * True if this is an instance of an opaque PointerType.
+ *
+ * @see llvm::Type::isOpaquePointerTy()
+ */
+LLVMBool LLVMPointerTypeIsOpaque(LLVMTypeRef Ty);
+
+/**
+ * Create an opaque pointer type in a context.
+ *
+ * @see llvm::PointerType::get()
+ */
+LLVMTypeRef LLVMPointerTypeInContext(LLVMContextRef C, unsigned AddressSpace);
+
+/**
* Obtain the address space of a pointer type.
*
* This only works on types that represent pointers.
@@ -2089,11 +2112,23 @@ LLVMValueRef LLVMConstNamedStruct(LLVMTypeRef StructTy,
unsigned Count);
/**
+ * Get element of a constant aggregate (struct, array or vector) at the
+ * specified index. Returns null if the index is out of range, or it's not
+ * possible to determine the element (e.g., because the constant is a
+ * constant expression.)
+ *
+ * @see llvm::Constant::getAggregateElement()
+ */
+LLVMValueRef LLVMGetAggregateElement(LLVMValueRef C, unsigned Idx);
+
+/**
* Get an element at specified index as a constant.
*
* @see ConstantDataSequential::getElementAsConstant()
*/
-LLVMValueRef LLVMGetElementAsConstant(LLVMValueRef C, unsigned idx);
+LLVM_ATTRIBUTE_C_DEPRECATED(
+ LLVMValueRef LLVMGetElementAsConstant(LLVMValueRef C, unsigned idx),
+ "Use LLVMGetAggregateElement instead");
/**
* Create a ConstantVector from values.
@@ -2203,8 +2238,6 @@ LLVMValueRef LLVMConstInsertElement(LLVMValueRef VectorConstant,
LLVMValueRef LLVMConstShuffleVector(LLVMValueRef VectorAConstant,
LLVMValueRef VectorBConstant,
LLVMValueRef MaskConstant);
-LLVMValueRef LLVMConstExtractValue(LLVMValueRef AggConstant, unsigned *IdxList,
- unsigned NumIdx);
LLVMValueRef LLVMConstInsertValue(LLVMValueRef AggConstant,
LLVMValueRef ElementValueConstant,
unsigned *IdxList, unsigned NumIdx);
@@ -3978,6 +4011,9 @@ LLVMValueRef LLVMBuildFPCast(LLVMBuilderRef, LLVMValueRef Val,
LLVMValueRef LLVMBuildIntCast(LLVMBuilderRef, LLVMValueRef Val, /*Signed cast!*/
LLVMTypeRef DestTy, const char *Name);
+LLVMOpcode LLVMGetCastOpcode(LLVMValueRef Src, LLVMBool SrcIsSigned,
+ LLVMTypeRef DestTy, LLVMBool DestIsSigned);
+
/* Comparisons */
LLVMValueRef LLVMBuildICmp(LLVMBuilderRef, LLVMIntPredicate Op,
LLVMValueRef LHS, LLVMValueRef RHS,
diff --git a/llvm/include/llvm-c/DisassemblerTypes.h b/llvm/include/llvm-c/DisassemblerTypes.h
index 53baaef11033..6999a350ec91 100644
--- a/llvm/include/llvm-c/DisassemblerTypes.h
+++ b/llvm/include/llvm-c/DisassemblerTypes.h
@@ -38,15 +38,15 @@ typedef void *LLVMDisasmContextRef;
* one operand with symbolic information. To determine the symbolic operand
* information for each operand, the bytes for the specific operand in the
* instruction are specified by the Offset parameter and its byte widith is the
- * size parameter. For instructions sets with fixed widths and one symbolic
- * operand per instruction, the Offset parameter will be zero and Size parameter
- * will be the instruction width. The information is returned in TagBuf and is
- * Triple specific with its specific information defined by the value of
- * TagType for that Triple. If symbolic information is returned the function
- * returns 1, otherwise it returns 0.
+ * OpSize parameter. For instructions sets with fixed widths and one symbolic
+ * operand per instruction, the Offset parameter will be zero and InstSize
+ * parameter will be the instruction width. The information is returned in
+ * TagBuf and is Triple specific with its specific information defined by the
+ * value of TagType for that Triple. If symbolic information is returned the
+ * function * returns 1, otherwise it returns 0.
*/
-typedef int (*LLVMOpInfoCallback)(void *DisInfo, uint64_t PC,
- uint64_t Offset, uint64_t Size,
+typedef int (*LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset,
+ uint64_t OpSize, uint64_t InstSize,
int TagType, void *TagBuf);
/**
diff --git a/llvm/include/llvm-c/Object.h b/llvm/include/llvm-c/Object.h
index 9a9596aaa08c..f422c1ad224d 100644
--- a/llvm/include/llvm-c/Object.h
+++ b/llvm/include/llvm-c/Object.h
@@ -38,21 +38,23 @@ typedef struct LLVMOpaqueSymbolIterator *LLVMSymbolIteratorRef;
typedef struct LLVMOpaqueRelocationIterator *LLVMRelocationIteratorRef;
typedef enum {
- LLVMBinaryTypeArchive, /**< Archive file. */
- LLVMBinaryTypeMachOUniversalBinary, /**< Mach-O Universal Binary file. */
- LLVMBinaryTypeCOFFImportFile, /**< COFF Import file. */
- LLVMBinaryTypeIR, /**< LLVM IR. */
- LLVMBinaryTypeWinRes, /**< Windows resource (.res) file. */
- LLVMBinaryTypeCOFF, /**< COFF Object file. */
- LLVMBinaryTypeELF32L, /**< ELF 32-bit, little endian. */
- LLVMBinaryTypeELF32B, /**< ELF 32-bit, big endian. */
- LLVMBinaryTypeELF64L, /**< ELF 64-bit, little endian. */
- LLVMBinaryTypeELF64B, /**< ELF 64-bit, big endian. */
- LLVMBinaryTypeMachO32L, /**< MachO 32-bit, little endian. */
- LLVMBinaryTypeMachO32B, /**< MachO 32-bit, big endian. */
- LLVMBinaryTypeMachO64L, /**< MachO 64-bit, little endian. */
- LLVMBinaryTypeMachO64B, /**< MachO 64-bit, big endian. */
- LLVMBinaryTypeWasm, /**< Web Assembly. */
+ LLVMBinaryTypeArchive, /**< Archive file. */
+ LLVMBinaryTypeMachOUniversalBinary, /**< Mach-O Universal Binary file. */
+ LLVMBinaryTypeCOFFImportFile, /**< COFF Import file. */
+ LLVMBinaryTypeIR, /**< LLVM IR. */
+ LLVMBinaryTypeWinRes, /**< Windows resource (.res) file. */
+ LLVMBinaryTypeCOFF, /**< COFF Object file. */
+ LLVMBinaryTypeELF32L, /**< ELF 32-bit, little endian. */
+ LLVMBinaryTypeELF32B, /**< ELF 32-bit, big endian. */
+ LLVMBinaryTypeELF64L, /**< ELF 64-bit, little endian. */
+ LLVMBinaryTypeELF64B, /**< ELF 64-bit, big endian. */
+ LLVMBinaryTypeMachO32L, /**< MachO 32-bit, little endian. */
+ LLVMBinaryTypeMachO32B, /**< MachO 32-bit, big endian. */
+ LLVMBinaryTypeMachO64L, /**< MachO 64-bit, little endian. */
+ LLVMBinaryTypeMachO64B, /**< MachO 64-bit, big endian. */
+ LLVMBinaryTypeWasm, /**< Web Assembly. */
+ LLVMBinaryTypeOffload, /**< Offloading fatbinary. */
+
} LLVMBinaryType;
/**
diff --git a/llvm/include/llvm-c/Orc.h b/llvm/include/llvm-c/Orc.h
index e2f30b7cdf45..0dcfb06865aa 100644
--- a/llvm/include/llvm-c/Orc.h
+++ b/llvm/include/llvm-c/Orc.h
@@ -54,6 +54,7 @@ typedef uint64_t LLVMOrcExecutorAddress;
* Represents generic linkage flags for a symbol definition.
*/
typedef enum {
+ LLVMJITSymbolGenericFlagsNone = 0,
LLVMJITSymbolGenericFlagsExported = 1U << 0,
LLVMJITSymbolGenericFlagsWeak = 1U << 1,
LLVMJITSymbolGenericFlagsCallable = 1U << 2,
@@ -122,13 +123,13 @@ typedef LLVMOrcCSymbolFlagsMapPair *LLVMOrcCSymbolFlagsMapPairs;
typedef struct {
LLVMOrcSymbolStringPoolEntryRef Name;
LLVMJITEvaluatedSymbol Sym;
-} LLVMJITCSymbolMapPair;
+} LLVMOrcCSymbolMapPair;
/**
* Represents a list of (SymbolStringPtr, JITEvaluatedSymbol) pairs that can be
* used to construct a SymbolMap.
*/
-typedef LLVMJITCSymbolMapPair *LLVMOrcCSymbolMapPairs;
+typedef LLVMOrcCSymbolMapPair *LLVMOrcCSymbolMapPairs;
/**
* Represents a SymbolAliasMapEntry
@@ -203,6 +204,22 @@ typedef enum {
} LLVMOrcJITDylibLookupFlags;
/**
+ * An element type for a JITDylib search order.
+ */
+typedef struct {
+ LLVMOrcJITDylibRef JD;
+ LLVMOrcJITDylibLookupFlags JDLookupFlags;
+} LLVMOrcCJITDylibSearchOrderElement;
+
+/**
+ * A JITDylib search order.
+ *
+ * The list is terminated with an element containing a null pointer for the JD
+ * field.
+ */
+typedef LLVMOrcCJITDylibSearchOrderElement *LLVMOrcCJITDylibSearchOrder;
+
+/**
* Symbol lookup flags for lookup sets. This should be kept in sync with
* llvm::orc::SymbolLookupFlags.
*/
@@ -341,6 +358,14 @@ typedef LLVMErrorRef (*LLVMOrcCAPIDefinitionGeneratorTryToGenerateFunction)(
LLVMOrcCLookupSet LookupSet, size_t LookupSetSize);
/**
+ * Disposer for a custom generator.
+ *
+ * Will be called by ORC when the JITDylib that the generator is attached to
+ * is destroyed.
+ */
+typedef void (*LLVMOrcDisposeCAPIDefinitionGeneratorFunction)(void *Ctx);
+
+/**
* Predicate function for SymbolStringPoolEntries.
*/
typedef int (*LLVMOrcSymbolPredicate)(void *Ctx,
@@ -495,6 +520,58 @@ LLVMOrcSymbolStringPoolEntryRef
LLVMOrcExecutionSessionIntern(LLVMOrcExecutionSessionRef ES, const char *Name);
/**
+ * Callback type for ExecutionSession lookups.
+ *
+ * If Err is LLVMErrorSuccess then Result will contain a pointer to a
+ * list of ( SymbolStringPtr, JITEvaluatedSymbol ) pairs of length NumPairs.
+ *
+ * If Err is a failure value then Result and Ctx are undefined and should
+ * not be accessed. The Callback is responsible for handling the error
+ * value (e.g. by calling LLVMGetErrorMessage + LLVMDisposeErrorMessage).
+ *
+ * The caller retains ownership of the Result array and will release all
+ * contained symbol names. Clients are responsible for retaining any symbol
+ * names that they wish to hold after the function returns.
+ */
+typedef void (*LLVMOrcExecutionSessionLookupHandleResultFunction)(
+ LLVMErrorRef Err, LLVMOrcCSymbolMapPairs Result, size_t NumPairs,
+ void *Ctx);
+
+/**
+ * Look up symbols in an execution session.
+ *
+ * This is a wrapper around the general ExecutionSession::lookup function.
+ *
+ * The SearchOrder argument contains a list of (JITDylibs, JITDylibSearchFlags)
+ * pairs that describe the search order. The JITDylibs will be searched in the
+ * given order to try to find the symbols in the Symbols argument.
+ *
+ * The Symbols argument should contain a null-terminated array of
+ * (SymbolStringPtr, SymbolLookupFlags) pairs describing the symbols to be
+ * searched for. This function takes ownership of the elements of the Symbols
+ * array. The Name fields of the Symbols elements are taken to have been
+ * retained by the client for this function. The client should *not* release the
+ * Name fields, but are still responsible for destroying the array itself.
+ *
+ * The HandleResult function will be called once all searched for symbols have
+ * been found, or an error occurs. The HandleResult function will be passed an
+ * LLVMErrorRef indicating success or failure, and (on success) a
+ * null-terminated LLVMOrcCSymbolMapPairs array containing the function result,
+ * and the Ctx value passed to the lookup function.
+ *
+ * The client is fully responsible for managing the lifetime of the Ctx object.
+ * A common idiom is to allocate the context prior to the lookup and deallocate
+ * it in the handler.
+ *
+ * THIS API IS EXPERIMENTAL AND LIKELY TO CHANGE IN THE NEAR FUTURE!
+ */
+void LLVMOrcExecutionSessionLookup(
+ LLVMOrcExecutionSessionRef ES, LLVMOrcLookupKind K,
+ LLVMOrcCJITDylibSearchOrder SearchOrder, size_t SearchOrderSize,
+ LLVMOrcCLookupSet Symbols, size_t SymbolsSize,
+ LLVMOrcExecutionSessionLookupHandleResultFunction HandleResult, void *Ctx);
+
+/**
* Increments the ref-count for a SymbolStringPool entry.
*/
void LLVMOrcRetainSymbolStringPoolEntry(LLVMOrcSymbolStringPoolEntryRef S);
@@ -504,6 +581,11 @@ void LLVMOrcRetainSymbolStringPoolEntry(LLVMOrcSymbolStringPoolEntryRef S);
*/
void LLVMOrcReleaseSymbolStringPoolEntry(LLVMOrcSymbolStringPoolEntryRef S);
+/**
+ * Return the c-string for the given symbol. This string will remain valid until
+ * the entry is freed (once all LLVMOrcSymbolStringPoolEntryRefs have been
+ * released).
+ */
const char *LLVMOrcSymbolStringPoolEntryStr(LLVMOrcSymbolStringPoolEntryRef S);
/**
@@ -547,7 +629,7 @@ void LLVMOrcDisposeMaterializationUnit(LLVMOrcMaterializationUnitRef MU);
* unit. This function takes ownership of the elements of the Syms array. The
* Name fields of the array elements are taken to have been retained for this
* function. The client should *not* release the elements of the array, but is
- * still responsible for destroyingthe array itself.
+ * still responsible for destroying the array itself.
*
* The InitSym argument indicates whether or not this MaterializationUnit
* contains static initializers. If three are no static initializers (the common
@@ -701,7 +783,7 @@ LLVMOrcMaterializationResponsibilityGetRequestedSymbols(
*/
void LLVMOrcDisposeSymbols(LLVMOrcSymbolStringPoolEntryRef *Symbols);
-/*
+/**
* Notifies the target JITDylib that the given symbols have been resolved.
* This will update the given symbols' addresses in the JITDylib, and notify
* any pending queries on the given symbols of their resolution. The given
@@ -901,9 +983,27 @@ void LLVMOrcJITDylibAddGenerator(LLVMOrcJITDylibRef JD,
/**
* Create a custom generator.
+ *
+ * The F argument will be used to implement the DefinitionGenerator's
+ * tryToGenerate method (see
+ * LLVMOrcCAPIDefinitionGeneratorTryToGenerateFunction).
+ *
+ * Ctx is a context object that will be passed to F. This argument is
+ * permitted to be null.
+ *
+ * Dispose is the disposal function for Ctx. This argument is permitted to be
+ * null (in which case the client is responsible for the lifetime of Ctx).
*/
LLVMOrcDefinitionGeneratorRef LLVMOrcCreateCustomCAPIDefinitionGenerator(
- LLVMOrcCAPIDefinitionGeneratorTryToGenerateFunction F, void *Ctx);
+ LLVMOrcCAPIDefinitionGeneratorTryToGenerateFunction F, void *Ctx,
+ LLVMOrcDisposeCAPIDefinitionGeneratorFunction Dispose);
+
+/**
+ * Continue a lookup that was suspended in a generator (see
+ * LLVMOrcCAPIDefinitionGeneratorTryToGenerateFunction).
+ */
+void LLVMOrcLookupStateContinueLookup(LLVMOrcLookupStateRef S,
+ LLVMErrorRef Err);
/**
* Get a DynamicLibrarySearchGenerator that will reflect process symbols into
diff --git a/llvm/include/llvm-c/TargetMachine.h b/llvm/include/llvm-c/TargetMachine.h
index 23c8c63ff0b4..bfbe1421a356 100644
--- a/llvm/include/llvm-c/TargetMachine.h
+++ b/llvm/include/llvm-c/TargetMachine.h
@@ -136,7 +136,9 @@ void LLVMSetTargetMachineAsmVerbosity(LLVMTargetMachineRef T,
wraps several c++ only classes (among them a file stream). Returns any
error in ErrorMessage. Use LLVMDisposeMessage to dispose the message. */
LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M,
- char *Filename, LLVMCodeGenFileType codegen, char **ErrorMessage);
+ const char *Filename,
+ LLVMCodeGenFileType codegen,
+ char **ErrorMessage);
/** Compile the LLVM IR stored in \p M and store the result in \p OutMemBuf. */
LLVMBool LLVMTargetMachineEmitToMemoryBuffer(LLVMTargetMachineRef T, LLVMModuleRef M,
diff --git a/llvm/include/llvm-c/Transforms/Coroutines.h b/llvm/include/llvm-c/Transforms/Coroutines.h
deleted file mode 100644
index 03b6822033c9..000000000000
--- a/llvm/include/llvm-c/Transforms/Coroutines.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*===-- Coroutines.h - Coroutines Library C Interface -----------*- C++ -*-===*\
-|* *|
-|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *|
-|* Exceptions. *|
-|* See https://llvm.org/LICENSE.txt for license information. *|
-|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *|
-|* *|
-|*===----------------------------------------------------------------------===*|
-|* *|
-|* This header declares the C interface to libLLVMCoroutines.a, which *|
-|* implements various scalar transformations of the LLVM IR. *|
-|* *|
-|* Many exotic languages can interoperate with C code but have a harder time *|
-|* with C++ due to name mangling. So in addition to C, this interface enables *|
-|* tools written in such languages. *|
-|* *|
-\*===----------------------------------------------------------------------===*/
-
-#ifndef LLVM_C_TRANSFORMS_COROUTINES_H
-#define LLVM_C_TRANSFORMS_COROUTINES_H
-
-#include "llvm-c/ExternC.h"
-#include "llvm-c/Types.h"
-#include "llvm-c/Transforms/PassManagerBuilder.h"
-
-LLVM_C_EXTERN_C_BEGIN
-
-/**
- * @defgroup LLVMCTransformsCoroutines Coroutine transformations
- * @ingroup LLVMCTransforms
- *
- * @{
- */
-
-/** See llvm::createCoroEarlyLegacyPass function. */
-void LLVMAddCoroEarlyPass(LLVMPassManagerRef PM);
-
-/** See llvm::createCoroSplitLegacyPass function. */
-void LLVMAddCoroSplitPass(LLVMPassManagerRef PM);
-
-/** See llvm::createCoroElideLegacyPass function. */
-void LLVMAddCoroElidePass(LLVMPassManagerRef PM);
-
-/** See llvm::createCoroCleanupLegacyPass function. */
-void LLVMAddCoroCleanupPass(LLVMPassManagerRef PM);
-
-/** See llvm::addCoroutinePassesToExtensionPoints. */
-void LLVMPassManagerBuilderAddCoroutinePassesToExtensionPoints(LLVMPassManagerBuilderRef PMB);
-
-/**
- * @}
- */
-
-LLVM_C_EXTERN_C_END
-
-#endif
diff --git a/llvm/include/llvm-c/Transforms/IPO.h b/llvm/include/llvm-c/Transforms/IPO.h
index 3f2cadf32366..c806156281bd 100644
--- a/llvm/include/llvm-c/Transforms/IPO.h
+++ b/llvm/include/llvm-c/Transforms/IPO.h
@@ -27,9 +27,6 @@ LLVM_C_EXTERN_C_BEGIN
* @{
*/
-/** See llvm::createArgumentPromotionPass function. */
-void LLVMAddArgumentPromotionPass(LLVMPassManagerRef PM);
-
/** See llvm::createConstantMergePass function. */
void LLVMAddConstantMergePass(LLVMPassManagerRef PM);
diff --git a/llvm/include/llvm-c/Transforms/PassManagerBuilder.h b/llvm/include/llvm-c/Transforms/PassManagerBuilder.h
index 6e13e18e063b..3ba75440129a 100644
--- a/llvm/include/llvm-c/Transforms/PassManagerBuilder.h
+++ b/llvm/include/llvm-c/Transforms/PassManagerBuilder.h
@@ -72,12 +72,6 @@ void
LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB,
LLVMPassManagerRef PM);
-/** See llvm::PassManagerBuilder::populateLTOPassManager. */
-void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB,
- LLVMPassManagerRef PM,
- LLVMBool Internalize,
- LLVMBool RunInliner);
-
/**
* @}
*/
diff --git a/llvm/include/llvm-c/Transforms/Scalar.h b/llvm/include/llvm-c/Transforms/Scalar.h
index ba142508bbe4..1d0944799710 100644
--- a/llvm/include/llvm-c/Transforms/Scalar.h
+++ b/llvm/include/llvm-c/Transforms/Scalar.h
@@ -94,9 +94,6 @@ void LLVMAddLoopUnrollPass(LLVMPassManagerRef PM);
/** See llvm::createLoopUnrollAndJamPass function. */
void LLVMAddLoopUnrollAndJamPass(LLVMPassManagerRef PM);
-/** See llvm::createLoopUnswitchPass function. */
-void LLVMAddLoopUnswitchPass(LLVMPassManagerRef PM);
-
/** See llvm::createLowerAtomicPass function. */
void LLVMAddLowerAtomicPass(LLVMPassManagerRef PM);
diff --git a/llvm/include/llvm-c/blake3.h b/llvm/include/llvm-c/blake3.h
new file mode 100644
index 000000000000..679477c3aa7f
--- /dev/null
+++ b/llvm/include/llvm-c/blake3.h
@@ -0,0 +1,79 @@
+/*===-- llvm-c/blake3.h - BLAKE3 C Interface ----------------------*- C -*-===*\
+|* *|
+|* Released into the public domain with CC0 1.0 *|
+|* See 'llvm/lib/Support/BLAKE3/LICENSE' for info. *|
+|* SPDX-License-Identifier: CC0-1.0 *|
+|* *|
+|*===----------------------------------------------------------------------===*|
+|* *|
+|* This header declares the C interface to LLVM's BLAKE3 implementation. *|
+|* Original BLAKE3 C API: https://github.com/BLAKE3-team/BLAKE3/tree/1.3.1/c *|
+|* *|
+|* Symbols are prefixed with 'llvm' to avoid a potential conflict with *|
+|* another BLAKE3 version within the same program. *|
+|* *|
+\*===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_C_BLAKE3_H
+#define LLVM_C_BLAKE3_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define LLVM_BLAKE3_VERSION_STRING "1.3.1"
+#define LLVM_BLAKE3_KEY_LEN 32
+#define LLVM_BLAKE3_OUT_LEN 32
+#define LLVM_BLAKE3_BLOCK_LEN 64
+#define LLVM_BLAKE3_CHUNK_LEN 1024
+#define LLVM_BLAKE3_MAX_DEPTH 54
+
+// This struct is a private implementation detail. It has to be here because
+// it's part of llvm_blake3_hasher below.
+typedef struct {
+ uint32_t cv[8];
+ uint64_t chunk_counter;
+ uint8_t buf[LLVM_BLAKE3_BLOCK_LEN];
+ uint8_t buf_len;
+ uint8_t blocks_compressed;
+ uint8_t flags;
+} llvm_blake3_chunk_state;
+
+typedef struct {
+ uint32_t key[8];
+ llvm_blake3_chunk_state chunk;
+ uint8_t cv_stack_len;
+ // The stack size is MAX_DEPTH + 1 because we do lazy merging. For example,
+ // with 7 chunks, we have 3 entries in the stack. Adding an 8th chunk
+ // requires a 4th entry, rather than merging everything down to 1, because we
+ // don't know whether more input is coming. This is different from how the
+ // reference implementation does things.
+ uint8_t cv_stack[(LLVM_BLAKE3_MAX_DEPTH + 1) * LLVM_BLAKE3_OUT_LEN];
+} llvm_blake3_hasher;
+
+const char *llvm_blake3_version(void);
+void llvm_blake3_hasher_init(llvm_blake3_hasher *self);
+void llvm_blake3_hasher_init_keyed(llvm_blake3_hasher *self,
+ const uint8_t key[LLVM_BLAKE3_KEY_LEN]);
+void llvm_blake3_hasher_init_derive_key(llvm_blake3_hasher *self,
+ const char *context);
+void llvm_blake3_hasher_init_derive_key_raw(llvm_blake3_hasher *self,
+ const void *context,
+ size_t context_len);
+void llvm_blake3_hasher_update(llvm_blake3_hasher *self, const void *input,
+ size_t input_len);
+void llvm_blake3_hasher_finalize(const llvm_blake3_hasher *self, uint8_t *out,
+ size_t out_len);
+void llvm_blake3_hasher_finalize_seek(const llvm_blake3_hasher *self,
+ uint64_t seek, uint8_t *out,
+ size_t out_len);
+void llvm_blake3_hasher_reset(llvm_blake3_hasher *self);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* LLVM_C_BLAKE3_H */
diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h
index 17b57de7b0aa..cdedb6ece992 100644
--- a/llvm/include/llvm/ADT/APFloat.h
+++ b/llvm/include/llvm/ADT/APFloat.h
@@ -155,7 +155,8 @@ struct APFloatBase {
S_IEEEdouble,
S_x87DoubleExtended,
S_IEEEquad,
- S_PPCDoubleDouble
+ S_PPCDoubleDouble,
+ S_MaxSemantics = S_PPCDoubleDouble
};
static const llvm::fltSemantics &EnumToSemantics(Semantics S);
diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h
index b1fc85d3c09d..4155cb260a2a 100644
--- a/llvm/include/llvm/ADT/APInt.h
+++ b/llvm/include/llvm/ADT/APInt.h
@@ -486,7 +486,7 @@ public:
return (Ones > 0) && ((Ones + countLeadingZerosSlowCase()) == BitWidth);
}
- /// Return true if this APInt value contains a sequence of ones with
+ /// Return true if this APInt value contains a non-empty sequence of ones with
/// the remainder zero.
bool isShiftedMask() const {
if (isSingleWord())
@@ -496,6 +496,23 @@ public:
return (Ones + LeadZ + countTrailingZeros()) == BitWidth;
}
+ /// Return true if this APInt value contains a non-empty sequence of ones with
+ /// the remainder zero. If true, \p MaskIdx will specify the index of the
+ /// lowest set bit and \p MaskLen is updated to specify the length of the
+ /// mask, else neither are updated.
+ bool isShiftedMask(unsigned &MaskIdx, unsigned &MaskLen) const {
+ if (isSingleWord())
+ return isShiftedMask_64(U.VAL, MaskIdx, MaskLen);
+ unsigned Ones = countPopulationSlowCase();
+ unsigned LeadZ = countLeadingZerosSlowCase();
+ unsigned TrailZ = countTrailingZerosSlowCase();
+ if ((Ones + LeadZ + TrailZ) != BitWidth)
+ return false;
+ MaskLen = Ones;
+ MaskIdx = TrailZ;
+ return true;
+ }
+
/// Compute an APInt containing numBits highbits from this APInt.
///
/// Get an APInt with the same BitWidth as this APInt, just zero mask the low
@@ -1201,7 +1218,7 @@ public:
/// Truncate to new width.
///
/// Truncate the APInt to a specified width. It is an error to specify a width
- /// that is greater than or equal to the current width.
+ /// that is greater than the current width.
APInt trunc(unsigned width) const;
/// Truncate to new width with unsigned saturation.
@@ -1221,7 +1238,7 @@ public:
///
/// This operation sign extends the APInt to a new width. If the high order
/// bit is set, the fill on the left will be done with 1 bits, otherwise zero.
- /// It is an error to specify a width that is less than or equal to the
+ /// It is an error to specify a width that is less than the
/// current width.
APInt sext(unsigned width) const;
@@ -1229,7 +1246,7 @@ public:
///
/// This operation zero extends the APInt to a new width. The high order bits
/// are filled with 0 bits. It is an error to specify a width that is less
- /// than or equal to the current width.
+ /// than the current width.
APInt zext(unsigned width) const;
/// Sign extend or truncate to width
@@ -1244,24 +1261,6 @@ public:
/// extended, truncated, or left alone to make it that width.
APInt zextOrTrunc(unsigned width) const;
- /// Truncate to width
- ///
- /// Make this APInt have the bit width given by \p width. The value is
- /// truncated or left alone to make it that width.
- APInt truncOrSelf(unsigned width) const;
-
- /// Sign extend or truncate to width
- ///
- /// Make this APInt have the bit width given by \p width. The value is sign
- /// extended, or left alone to make it that width.
- APInt sextOrSelf(unsigned width) const;
-
- /// Zero extend or truncate to width
- ///
- /// Make this APInt have the bit width given by \p width. The value is zero
- /// extended, or left alone to make it that width.
- APInt zextOrSelf(unsigned width) const;
-
/// @}
/// \name Bit Manipulation Operators
/// @{
@@ -1489,6 +1488,11 @@ public:
/// equivalent of the string given by \p str.
static unsigned getBitsNeeded(StringRef str, uint8_t radix);
+ /// Get the bits that are sufficient to represent the string value. This may
+ /// over estimate the amount of bits required, but it does not require
+ /// parsing the value in the string.
+ static unsigned getSufficientBitsNeeded(StringRef Str, uint8_t Radix);
+
/// The APInt version of the countLeadingZeros functions in
/// MathExtras.h.
///
@@ -2235,12 +2239,16 @@ Optional<unsigned> GetMostSignificantDifferentBit(const APInt &A,
/// Splat/Merge neighboring bits to widen/narrow the bitmask represented
/// by \param A to \param NewBitWidth bits.
///
+/// MatchAnyBits: (Default)
/// e.g. ScaleBitMask(0b0101, 8) -> 0b00110011
/// e.g. ScaleBitMask(0b00011011, 4) -> 0b0111
-/// A.getBitwidth() or NewBitWidth must be a whole multiples of the other.
///
-/// TODO: Do we need a mode where all bits must be set when merging down?
-APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth);
+/// MatchAllBits:
+/// e.g. ScaleBitMask(0b0101, 8) -> 0b00110011
+/// e.g. ScaleBitMask(0b00011011, 4) -> 0b0001
+/// A.getBitwidth() or NewBitWidth must be a whole multiples of the other.
+APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth,
+ bool MatchAllBits = false);
} // namespace APIntOps
// See friend declaration above. This additional declaration is required in
diff --git a/llvm/include/llvm/ADT/AddressRanges.h b/llvm/include/llvm/ADT/AddressRanges.h
new file mode 100644
index 000000000000..1953680d5222
--- /dev/null
+++ b/llvm/include/llvm/ADT/AddressRanges.h
@@ -0,0 +1,79 @@
+//===- AddressRanges.h ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_ADDRESSRANGES_H
+#define LLVM_ADT_ADDRESSRANGES_H
+
+#include "llvm/ADT/Optional.h"
+#include <cassert>
+#include <stdint.h>
+#include <vector>
+
+namespace llvm {
+
+/// A class that represents an address range. The range is specified using
+/// a start and an end address: [Start, End).
+class AddressRange {
+public:
+ AddressRange() {}
+ AddressRange(uint64_t S, uint64_t E) : Start(S), End(E) {
+ assert(Start <= End);
+ }
+ uint64_t start() const { return Start; }
+ uint64_t end() const { return End; }
+ uint64_t size() const { return End - Start; }
+ bool contains(uint64_t Addr) const { return Start <= Addr && Addr < End; }
+ bool intersects(const AddressRange &R) const {
+ return Start < R.End && R.Start < End;
+ }
+ bool operator==(const AddressRange &R) const {
+ return Start == R.Start && End == R.End;
+ }
+ bool operator!=(const AddressRange &R) const { return !(*this == R); }
+ bool operator<(const AddressRange &R) const {
+ return std::make_pair(Start, End) < std::make_pair(R.Start, R.End);
+ }
+
+private:
+ uint64_t Start = 0;
+ uint64_t End = 0;
+};
+
+/// The AddressRanges class helps normalize address range collections.
+/// This class keeps a sorted vector of AddressRange objects and can perform
+/// insertions and searches efficiently. The address ranges are always sorted
+/// and never contain any invalid or empty address ranges. Intersecting
+/// address ranges are combined during insertion.
+class AddressRanges {
+protected:
+ using Collection = std::vector<AddressRange>;
+ Collection Ranges;
+
+public:
+ void clear() { Ranges.clear(); }
+ bool empty() const { return Ranges.empty(); }
+ bool contains(uint64_t Addr) const;
+ bool contains(AddressRange Range) const;
+ Optional<AddressRange> getRangeThatContains(uint64_t Addr) const;
+ void insert(AddressRange Range);
+ void reserve(size_t Capacity) { Ranges.reserve(Capacity); }
+ size_t size() const { return Ranges.size(); }
+ bool operator==(const AddressRanges &RHS) const {
+ return Ranges == RHS.Ranges;
+ }
+ const AddressRange &operator[](size_t i) const {
+ assert(i < Ranges.size());
+ return Ranges[i];
+ }
+ Collection::const_iterator begin() const { return Ranges.begin(); }
+ Collection::const_iterator end() const { return Ranges.end(); }
+};
+
+} // namespace llvm
+
+#endif // LLVM_ADT_ADDRESSRANGES_H
diff --git a/llvm/include/llvm/ADT/ArrayRef.h b/llvm/include/llvm/ADT/ArrayRef.h
index b6896395dae8..ee35a5686fc4 100644
--- a/llvm/include/llvm/ADT/ArrayRef.h
+++ b/llvm/include/llvm/ADT/ArrayRef.h
@@ -25,6 +25,7 @@
#include <vector>
namespace llvm {
+ template<typename T> class LLVM_NODISCARD MutableArrayRef;
/// ArrayRef - Represent a constant reference to an array (0 or more elements
/// consecutively in memory), i.e. a start pointer and a length. It allows
@@ -175,10 +176,10 @@ namespace llvm {
}
// copy - Allocate copy in Allocator and return ArrayRef<T> to it.
- template <typename Allocator> ArrayRef<T> copy(Allocator &A) {
+ template <typename Allocator> MutableArrayRef<T> copy(Allocator &A) {
T *Buff = A.template Allocate<T>(Length);
std::uninitialized_copy(begin(), end(), Buff);
- return ArrayRef<T>(Buff, Length);
+ return MutableArrayRef<T>(Buff, Length);
}
/// equals - Check for element-wise equality.
@@ -539,6 +540,42 @@ namespace llvm {
return MutableArrayRef<T>(data, length);
}
+ /// Construct a MutableArrayRef from a SmallVector.
+ template <typename T>
+ MutableArrayRef<T> makeMutableArrayRef(SmallVectorImpl<T> &Vec) {
+ return Vec;
+ }
+
+ /// Construct a MutableArrayRef from a SmallVector.
+ template <typename T, unsigned N>
+ MutableArrayRef<T> makeMutableArrayRef(SmallVector<T, N> &Vec) {
+ return Vec;
+ }
+
+ /// Construct a MutableArrayRef from a std::vector.
+ template<typename T>
+ MutableArrayRef<T> makeMutableArrayRef(std::vector<T> &Vec) {
+ return Vec;
+ }
+
+ /// Construct a MutableArrayRef from a std::array.
+ template <typename T, std::size_t N>
+ MutableArrayRef<T> makeMutableArrayRef(std::array<T, N> &Arr) {
+ return Arr;
+ }
+
+ /// Construct a MutableArrayRef from a MutableArrayRef (no-op) (const)
+ template <typename T>
+ MutableArrayRef<T> makeMutableArrayRef(const MutableArrayRef<T> &Vec) {
+ return Vec;
+ }
+
+ /// Construct a MutableArrayRef from a C array.
+ template<typename T, size_t N>
+ MutableArrayRef<T> makeMutableArrayRef(T (&Arr)[N]) {
+ return MutableArrayRef<T>(Arr);
+ }
+
/// @}
/// @name ArrayRef Comparison Operators
/// @{
diff --git a/llvm/include/llvm/ADT/BitmaskEnum.h b/llvm/include/llvm/ADT/BitmaskEnum.h
index 89e5508e08e1..205da1240d44 100644
--- a/llvm/include/llvm/ADT/BitmaskEnum.h
+++ b/llvm/include/llvm/ADT/BitmaskEnum.h
@@ -77,7 +77,7 @@ namespace BitmaskEnumDetail {
/// Get a bitmask with 1s in all places up to the high-order bit of E's largest
/// value.
-template <typename E> std::underlying_type_t<E> Mask() {
+template <typename E> constexpr std::underlying_type_t<E> Mask() {
// On overflow, NextPowerOf2 returns zero with the type uint64_t, so
// subtracting 1 gives us the mask with all bits set, like we want.
return NextPowerOf2(static_cast<std::underlying_type_t<E>>(
@@ -87,7 +87,7 @@ template <typename E> std::underlying_type_t<E> Mask() {
/// Check that Val is in range for E, and return Val cast to E's underlying
/// type.
-template <typename E> std::underlying_type_t<E> Underlying(E Val) {
+template <typename E> constexpr std::underlying_type_t<E> Underlying(E Val) {
auto U = static_cast<std::underlying_type_t<E>>(Val);
assert(U >= 0 && "Negative enum values are not allowed.");
assert(U <= Mask<E>() && "Enum value too large (or largest val too small?)");
@@ -99,22 +99,22 @@ constexpr unsigned bitWidth(uint64_t Value) {
}
template <typename E, typename = std::enable_if_t<is_bitmask_enum<E>::value>>
-E operator~(E Val) {
+constexpr E operator~(E Val) {
return static_cast<E>(~Underlying(Val) & Mask<E>());
}
template <typename E, typename = std::enable_if_t<is_bitmask_enum<E>::value>>
-E operator|(E LHS, E RHS) {
+constexpr E operator|(E LHS, E RHS) {
return static_cast<E>(Underlying(LHS) | Underlying(RHS));
}
template <typename E, typename = std::enable_if_t<is_bitmask_enum<E>::value>>
-E operator&(E LHS, E RHS) {
+constexpr E operator&(E LHS, E RHS) {
return static_cast<E>(Underlying(LHS) & Underlying(RHS));
}
template <typename E, typename = std::enable_if_t<is_bitmask_enum<E>::value>>
-E operator^(E LHS, E RHS) {
+constexpr E operator^(E LHS, E RHS) {
return static_cast<E>(Underlying(LHS) ^ Underlying(RHS));
}
diff --git a/llvm/include/llvm/ADT/BreadthFirstIterator.h b/llvm/include/llvm/ADT/BreadthFirstIterator.h
index 1312b5f91e83..807b0a92c48c 100644
--- a/llvm/include/llvm/ADT/BreadthFirstIterator.h
+++ b/llvm/include/llvm/ADT/BreadthFirstIterator.h
@@ -80,7 +80,7 @@ private:
inline void toNext() {
Optional<QueueElement> Head = VisitQueue.front();
- QueueElement H = Head.getValue();
+ QueueElement H = *Head;
NodeRef Node = H.first;
Optional<ChildItTy> &ChildIt = H.second;
diff --git a/llvm/include/llvm/ADT/DenseMap.h b/llvm/include/llvm/ADT/DenseMap.h
index 7673b66ca42a..c14414c46419 100644
--- a/llvm/include/llvm/ADT/DenseMap.h
+++ b/llvm/include/llvm/ADT/DenseMap.h
@@ -137,6 +137,7 @@ public:
}
}
assert(NumEntries == 0 && "Node count imbalance!");
+ (void)NumEntries;
}
setNumEntries(0);
setNumTombstones(0);
diff --git a/llvm/include/llvm/ADT/EpochTracker.h b/llvm/include/llvm/ADT/EpochTracker.h
index b06888494466..b46989bc5111 100644
--- a/llvm/include/llvm/ADT/EpochTracker.h
+++ b/llvm/include/llvm/ADT/EpochTracker.h
@@ -34,10 +34,10 @@ namespace llvm {
/// is still valid.
///
class DebugEpochBase {
- uint64_t Epoch;
+ uint64_t Epoch = 0;
public:
- DebugEpochBase() : Epoch(0) {}
+ DebugEpochBase() = default;
/// Calling incrementEpoch invalidates all handles pointing into the
/// calling instance.
diff --git a/llvm/include/llvm/ADT/EquivalenceClasses.h b/llvm/include/llvm/ADT/EquivalenceClasses.h
index f12b683ead2d..4f98b84cf97d 100644
--- a/llvm/include/llvm/ADT/EquivalenceClasses.h
+++ b/llvm/include/llvm/ADT/EquivalenceClasses.h
@@ -161,7 +161,8 @@ public:
//
/// iterator* - Provides a way to iterate over all values in the set.
- using iterator = typename std::set<ECValue>::const_iterator;
+ using iterator =
+ typename std::set<ECValue, ECValueComparator>::const_iterator;
iterator begin() const { return TheMapping.begin(); }
iterator end() const { return TheMapping.end(); }
diff --git a/llvm/include/llvm/ADT/FloatingPointMode.h b/llvm/include/llvm/ADT/FloatingPointMode.h
index 9cc69b8a8344..59ccea1f9d44 100644
--- a/llvm/include/llvm/ADT/FloatingPointMode.h
+++ b/llvm/include/llvm/ADT/FloatingPointMode.h
@@ -7,7 +7,8 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// Utilities for dealing with flags related to floating point mode controls.
+/// Utilities for dealing with flags related to floating point properties and
+/// mode controls.
///
//===----------------------------------------------------------------------===/
@@ -193,4 +194,29 @@ void DenormalMode::print(raw_ostream &OS) const {
}
+/// Floating-point class tests, supported by 'is_fpclass' intrinsic. Actual
+/// test may be an OR combination of basic tests.
+enum FPClassTest {
+ fcSNan = 0x0001,
+ fcQNan = 0x0002,
+ fcNegInf = 0x0004,
+ fcNegNormal = 0x0008,
+ fcNegSubnormal = 0x0010,
+ fcNegZero = 0x0020,
+ fcPosZero = 0x0040,
+ fcPosSubnormal = 0x0080,
+ fcPosNormal = 0x0100,
+ fcPosInf = 0x0200,
+
+ fcNan = fcSNan | fcQNan,
+ fcInf = fcPosInf | fcNegInf,
+ fcNormal = fcPosNormal | fcNegNormal,
+ fcSubnormal = fcPosSubnormal | fcNegSubnormal,
+ fcZero = fcPosZero | fcNegZero,
+ fcPosFinite = fcPosNormal | fcPosSubnormal | fcPosZero,
+ fcNegFinite = fcNegNormal | fcNegSubnormal | fcNegZero,
+ fcFinite = fcPosFinite | fcNegFinite,
+ fcAllFlags = fcNan | fcInf | fcFinite
+};
+
#endif // LLVM_ADT_FLOATINGPOINTMODE_H
diff --git a/llvm/include/llvm/ADT/FoldingSet.h b/llvm/include/llvm/ADT/FoldingSet.h
index a8707f0ee81e..ec276d41da80 100644
--- a/llvm/include/llvm/ADT/FoldingSet.h
+++ b/llvm/include/llvm/ADT/FoldingSet.h
@@ -16,12 +16,14 @@
#ifndef LLVM_ADT_FOLDINGSET_H
#define LLVM_ADT_FOLDINGSET_H
+#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/iterator.h"
#include "llvm/Support/Allocator.h"
#include <cassert>
#include <cstddef>
#include <cstdint>
+#include <type_traits>
#include <utility>
namespace llvm {
@@ -255,8 +257,8 @@ template<typename T> struct DefaultFoldingSetTrait {
/// through template specialization the behavior can be tailored for specific
/// types. Combined with the FoldingSetNodeWrapper class, one can add objects
/// to FoldingSets that were not originally designed to have that behavior.
-template<typename T> struct FoldingSetTrait
- : public DefaultFoldingSetTrait<T> {};
+template <typename T, typename Enable = void>
+struct FoldingSetTrait : public DefaultFoldingSetTrait<T> {};
/// DefaultContextualFoldingSetTrait - Like DefaultFoldingSetTrait, but
/// for ContextualFoldingSets.
@@ -293,7 +295,9 @@ public:
/// ComputeHash - Compute a strong hash value for this FoldingSetNodeIDRef,
/// used to lookup the node in the FoldingSetBase.
- unsigned ComputeHash() const;
+ unsigned ComputeHash() const {
+ return static_cast<unsigned>(hash_combine_range(Data, Data + Size));
+ }
bool operator==(FoldingSetNodeIDRef) const;
@@ -323,13 +327,33 @@ public:
: Bits(Ref.getData(), Ref.getData() + Ref.getSize()) {}
/// Add* - Add various data types to Bit data.
- void AddPointer(const void *Ptr);
- void AddInteger(signed I);
- void AddInteger(unsigned I);
- void AddInteger(long I);
- void AddInteger(unsigned long I);
- void AddInteger(long long I);
- void AddInteger(unsigned long long I);
+ void AddPointer(const void *Ptr) {
+ // Note: this adds pointers to the hash using sizes and endianness that
+ // depend on the host. It doesn't matter, however, because hashing on
+ // pointer values is inherently unstable. Nothing should depend on the
+ // ordering of nodes in the folding set.
+ static_assert(sizeof(uintptr_t) <= sizeof(unsigned long long),
+ "unexpected pointer size");
+ AddInteger(reinterpret_cast<uintptr_t>(Ptr));
+ }
+ void AddInteger(signed I) { Bits.push_back(I); }
+ void AddInteger(unsigned I) { Bits.push_back(I); }
+ void AddInteger(long I) { AddInteger((unsigned long)I); }
+ void AddInteger(unsigned long I) {
+ if (sizeof(long) == sizeof(int))
+ AddInteger(unsigned(I));
+ else if (sizeof(long) == sizeof(long long)) {
+ AddInteger((unsigned long long)I);
+ } else {
+ llvm_unreachable("unexpected sizeof(long)");
+ }
+ }
+ void AddInteger(long long I) { AddInteger((unsigned long long)I); }
+ void AddInteger(unsigned long long I) {
+ AddInteger(unsigned(I));
+ AddInteger(unsigned(I >> 32));
+ }
+
void AddBoolean(bool B) { AddInteger(B ? 1U : 0U); }
void AddString(StringRef String);
void AddNodeID(const FoldingSetNodeID &ID);
@@ -343,7 +367,9 @@ public:
/// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used
/// to lookup the node in the FoldingSetBase.
- unsigned ComputeHash() const;
+ unsigned ComputeHash() const {
+ return FoldingSetNodeIDRef(Bits.data(), Bits.size()).ComputeHash();
+ }
/// operator== - Used to compare two nodes to each other.
bool operator==(const FoldingSetNodeID &RHS) const;
@@ -803,6 +829,13 @@ struct FoldingSetTrait<std::pair<T1, T2>> {
}
};
+template <typename T>
+struct FoldingSetTrait<T, typename std::enable_if_t<std::is_enum<T>::value>> {
+ static void Profile(const T &X, FoldingSetNodeID &ID) {
+ ID.AddInteger(static_cast<typename std::underlying_type_t<T>>(X));
+ }
+};
+
} // end namespace llvm
#endif // LLVM_ADT_FOLDINGSET_H
diff --git a/llvm/include/llvm/ADT/GenericCycleImpl.h b/llvm/include/llvm/ADT/GenericCycleImpl.h
index d443f9e21a47..ea2847f8c8ee 100644
--- a/llvm/include/llvm/ADT/GenericCycleImpl.h
+++ b/llvm/include/llvm/ADT/GenericCycleImpl.h
@@ -66,6 +66,44 @@ void GenericCycle<ContextT>::getExitBlocks(
}
}
+template <typename ContextT>
+auto GenericCycle<ContextT>::getCyclePreheader() const -> BlockT * {
+ BlockT *Predecessor = getCyclePredecessor();
+ if (!Predecessor)
+ return nullptr;
+
+ assert(isReducible() && "Cycle Predecessor must be in a reducible cycle!");
+
+ if (succ_size(Predecessor) != 1)
+ return nullptr;
+
+ // Make sure we are allowed to hoist instructions into the predecessor.
+ if (!Predecessor->isLegalToHoistInto())
+ return nullptr;
+
+ return Predecessor;
+}
+
+template <typename ContextT>
+auto GenericCycle<ContextT>::getCyclePredecessor() const -> BlockT * {
+ if (!isReducible())
+ return nullptr;
+
+ BlockT *Out = nullptr;
+
+ // Loop over the predecessors of the header node...
+ BlockT *Header = getHeader();
+ for (const auto Pred : predecessors(Header)) {
+ if (!contains(Pred)) {
+ if (Out && Out != Pred)
+ return nullptr;
+ Out = Pred;
+ }
+ }
+
+ return Out;
+}
+
/// \brief Helper class for computing cycle information.
template <typename ContextT> class GenericCycleInfoCompute {
using BlockT = typename ContextT::BlockT;
@@ -267,8 +305,8 @@ void GenericCycleInfoCompute<ContextT>::dfs(BlockT *EntryBlock) {
DFSTreeStack.emplace_back(TraverseStack.size());
llvm::append_range(TraverseStack, successors(Block));
- LLVM_ATTRIBUTE_UNUSED
bool Added = BlockDFSInfo.try_emplace(Block, ++Counter).second;
+ (void)Added;
assert(Added);
BlockPreorder.push_back(Block);
LLVM_DEBUG(errs() << " preorder number: " << Counter << "\n");
@@ -326,6 +364,19 @@ auto GenericCycleInfo<ContextT>::getCycle(const BlockT *Block) const
return nullptr;
}
+/// \brief get the depth for the cycle which containing a given block.
+///
+/// \returns the depth for the innermost cycle containing \p Block or 0 if it is
+/// not contained in any cycle.
+template <typename ContextT>
+unsigned GenericCycleInfo<ContextT>::getCycleDepth(const BlockT *Block) const {
+ CycleT *Cycle = getCycle(Block);
+ if (!Cycle)
+ return 0;
+ return Cycle->getDepth();
+}
+
+#ifndef NDEBUG
/// \brief Validate the internal consistency of the cycle tree.
///
/// Note that this does \em not check that cycles are really cycles in the CFG,
@@ -391,6 +442,7 @@ bool GenericCycleInfo<ContextT>::validateTree() const {
return true;
}
+#endif
/// \brief Print the cycle info.
template <typename ContextT>
diff --git a/llvm/include/llvm/ADT/GenericCycleInfo.h b/llvm/include/llvm/ADT/GenericCycleInfo.h
index d5f9cd9142ac..970664b85715 100644
--- a/llvm/include/llvm/ADT/GenericCycleInfo.h
+++ b/llvm/include/llvm/ADT/GenericCycleInfo.h
@@ -100,6 +100,10 @@ public:
BlockT *getHeader() const { return Entries[0]; }
+ const SmallVectorImpl<BlockT *> & getEntries() const {
+ return Entries;
+ }
+
/// \brief Return whether \p Block is an entry block of the cycle.
bool isEntry(BlockT *Block) const { return is_contained(Entries, Block); }
@@ -124,6 +128,16 @@ public:
/// branched to.
void getExitBlocks(SmallVectorImpl<BlockT *> &TmpStorage) const;
+ /// Return the preheader block for this cycle. Pre-header is well-defined for
+ /// reducible cycle in docs/LoopTerminology.rst as: the only one entering
+ /// block and its only edge is to the entry block. Return null for irreducible
+ /// cycles.
+ BlockT *getCyclePreheader() const;
+
+ /// If the cycle has exactly one entry with exactly one predecessor, return
+ /// it, otherwise return nullptr.
+ BlockT *getCyclePredecessor() const;
+
/// Iteration over child cycles.
//@{
using const_child_iterator_base =
@@ -178,6 +192,7 @@ public:
iterator_range<const_entry_iterator> entries() const {
return llvm::make_range(Entries.begin(), Entries.end());
}
+ //@}
Printable printEntries(const ContextT &Ctx) const {
return Printable([this, &Ctx](raw_ostream &Out) {
@@ -238,6 +253,7 @@ public:
const ContextT &getSSAContext() const { return Context; }
CycleT *getCycle(const BlockT *Block) const;
+ unsigned getCycleDepth(const BlockT *Block) const;
CycleT *getTopLevelParentCycle(const BlockT *Block) const;
/// Move \p Child to \p NewParent by manipulating Children vectors.
@@ -248,7 +264,9 @@ public:
/// Methods for debug and self-test.
//@{
+#ifndef NDEBUG
bool validateTree() const;
+#endif
void print(raw_ostream &Out) const;
void dump() const { print(dbgs()); }
//@}
diff --git a/llvm/include/llvm/ADT/IntervalMap.h b/llvm/include/llvm/ADT/IntervalMap.h
index 368ed46f98d2..57f02df252c0 100644
--- a/llvm/include/llvm/ADT/IntervalMap.h
+++ b/llvm/include/llvm/ADT/IntervalMap.h
@@ -106,13 +106,10 @@
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/bit.h"
-#include "llvm/Support/AlignOf.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/RecyclingAllocator.h"
#include <algorithm>
#include <cassert>
-#include <cstdint>
#include <iterator>
#include <new>
#include <utility>
@@ -969,7 +966,10 @@ public:
private:
// The root data is either a RootLeaf or a RootBranchData instance.
- AlignedCharArrayUnion<RootLeaf, RootBranchData> data;
+ union {
+ RootLeaf leaf;
+ RootBranchData branchData;
+ };
// Tree height.
// 0: Leaves in root.
@@ -983,25 +983,22 @@ private:
// Allocator used for creating external nodes.
Allocator &allocator;
- /// Represent data as a node type without breaking aliasing rules.
- template <typename T> T &dataAs() const { return *bit_cast<T *>(&data); }
-
const RootLeaf &rootLeaf() const {
assert(!branched() && "Cannot acces leaf data in branched root");
- return dataAs<RootLeaf>();
+ return leaf;
}
RootLeaf &rootLeaf() {
assert(!branched() && "Cannot acces leaf data in branched root");
- return dataAs<RootLeaf>();
+ return leaf;
}
- RootBranchData &rootBranchData() const {
+ const RootBranchData &rootBranchData() const {
assert(branched() && "Cannot access branch data in non-branched root");
- return dataAs<RootBranchData>();
+ return branchData;
}
RootBranchData &rootBranchData() {
assert(branched() && "Cannot access branch data in non-branched root");
- return dataAs<RootBranchData>();
+ return branchData;
}
const RootBranch &rootBranch() const { return rootBranchData().node; }
@@ -1042,11 +1039,20 @@ private:
public:
explicit IntervalMap(Allocator &a) : height(0), rootSize(0), allocator(a) {
- assert((uintptr_t(&data) & (alignof(RootLeaf) - 1)) == 0 &&
- "Insufficient alignment");
new(&rootLeaf()) RootLeaf();
}
+ // The default copy/move constructors and assignment operators would perform
+ // a shallow copy, leading to an incorrect internal state. To prevent
+ // accidental use, explicitly delete these operators.
+ // If necessary, implement them to perform a deep copy.
+ IntervalMap(const IntervalMap &Other) = delete;
+ IntervalMap(IntervalMap &&Other) = delete;
+ // Note: these are already implicitly deleted, because RootLeaf (union
+ // member) has a non-trivial assignment operator (because of std::pair).
+ IntervalMap &operator=(const IntervalMap &Other) = delete;
+ IntervalMap &operator=(IntervalMap &&Other) = delete;
+
~IntervalMap() {
clear();
rootLeaf().~RootLeaf();
diff --git a/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h b/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h
index 975535bb5676..e41eb0639ce3 100644
--- a/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h
+++ b/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h
@@ -84,7 +84,7 @@ protected:
#ifndef NDEBUG
~RefCountedBase() {
assert(RefCount == 0 &&
- "Destruction occured when there are still references to this.");
+ "Destruction occurred when there are still references to this.");
}
#else
// Default the destructor in release builds, A trivial destructor may enable
@@ -115,7 +115,7 @@ protected:
#ifndef NDEBUG
~ThreadSafeRefCountedBase() {
assert(RefCount == 0 &&
- "Destruction occured when there are still references to this.");
+ "Destruction occurred when there are still references to this.");
}
#else
// Default the destructor in release builds, A trivial destructor may enable
diff --git a/llvm/include/llvm/ADT/Optional.h b/llvm/include/llvm/ADT/Optional.h
index e047b0fc6514..d1615d903e98 100644
--- a/llvm/include/llvm/ADT/Optional.h
+++ b/llvm/include/llvm/ADT/Optional.h
@@ -60,85 +60,96 @@ template <typename T,
class OptionalStorage {
union {
char empty;
- T value;
+ T val;
};
- bool hasVal;
+ bool hasVal = false;
public:
~OptionalStorage() { reset(); }
- constexpr OptionalStorage() noexcept : empty(), hasVal(false) {}
+ constexpr OptionalStorage() noexcept : empty() {}
constexpr OptionalStorage(OptionalStorage const &other) : OptionalStorage() {
- if (other.hasValue()) {
- emplace(other.value);
+ if (other.has_value()) {
+ emplace(other.val);
}
}
constexpr OptionalStorage(OptionalStorage &&other) : OptionalStorage() {
- if (other.hasValue()) {
- emplace(std::move(other.value));
+ if (other.has_value()) {
+ emplace(std::move(other.val));
}
}
template <class... Args>
- constexpr explicit OptionalStorage(in_place_t, Args &&... args)
- : value(std::forward<Args>(args)...), hasVal(true) {}
+ constexpr explicit OptionalStorage(in_place_t, Args &&...args)
+ : val(std::forward<Args>(args)...), hasVal(true) {}
void reset() noexcept {
if (hasVal) {
- value.~T();
+ val.~T();
hasVal = false;
}
}
+ constexpr bool has_value() const noexcept { return hasVal; }
constexpr bool hasValue() const noexcept { return hasVal; }
- T &getValue() LLVM_LVALUE_FUNCTION noexcept {
+ T &value() &noexcept {
+ assert(hasVal);
+ return val;
+ }
+ T &getValue() &noexcept {
+ assert(hasVal);
+ return val;
+ }
+ constexpr T const &value() const &noexcept {
assert(hasVal);
- return value;
+ return val;
}
- constexpr T const &getValue() const LLVM_LVALUE_FUNCTION noexcept {
+ constexpr T const &getValue() const &noexcept {
assert(hasVal);
- return value;
+ return val;
}
-#if LLVM_HAS_RVALUE_REFERENCE_THIS
- T &&getValue() && noexcept {
+ T &&value() &&noexcept {
assert(hasVal);
- return std::move(value);
+ return std::move(val);
+ }
+ T &&getValue() &&noexcept {
+ assert(hasVal);
+ return std::move(val);
}
-#endif
- template <class... Args> void emplace(Args &&... args) {
+ template <class... Args> void emplace(Args &&...args) {
reset();
- ::new ((void *)std::addressof(value)) T(std::forward<Args>(args)...);
+ ::new ((void *)std::addressof(val)) T(std::forward<Args>(args)...);
hasVal = true;
}
OptionalStorage &operator=(T const &y) {
- if (hasValue()) {
- value = y;
+ if (has_value()) {
+ val = y;
} else {
- ::new ((void *)std::addressof(value)) T(y);
+ ::new ((void *)std::addressof(val)) T(y);
hasVal = true;
}
return *this;
}
OptionalStorage &operator=(T &&y) {
- if (hasValue()) {
- value = std::move(y);
+ if (has_value()) {
+ val = std::move(y);
} else {
- ::new ((void *)std::addressof(value)) T(std::move(y));
+ ::new ((void *)std::addressof(val)) T(std::move(y));
hasVal = true;
}
return *this;
}
OptionalStorage &operator=(OptionalStorage const &other) {
- if (other.hasValue()) {
- if (hasValue()) {
- value = other.value;
+ if (other.has_value()) {
+ if (has_value()) {
+ val = other.val;
} else {
- ::new ((void *)std::addressof(value)) T(other.value);
+ ::new ((void *)std::addressof(val)) T(other.val);
hasVal = true;
}
} else {
@@ -148,11 +159,11 @@ public:
}
OptionalStorage &operator=(OptionalStorage &&other) {
- if (other.hasValue()) {
- if (hasValue()) {
- value = std::move(other.value);
+ if (other.has_value()) {
+ if (has_value()) {
+ val = std::move(other.val);
} else {
- ::new ((void *)std::addressof(value)) T(std::move(other.value));
+ ::new ((void *)std::addressof(val)) T(std::move(other.val));
hasVal = true;
}
} else {
@@ -165,7 +176,7 @@ public:
template <typename T> class OptionalStorage<T, true> {
union {
char empty;
- T value;
+ T val;
};
bool hasVal = false;
@@ -181,53 +192,64 @@ public:
OptionalStorage &operator=(OptionalStorage &&other) = default;
template <class... Args>
- constexpr explicit OptionalStorage(in_place_t, Args &&... args)
- : value(std::forward<Args>(args)...), hasVal(true) {}
+ constexpr explicit OptionalStorage(in_place_t, Args &&...args)
+ : val(std::forward<Args>(args)...), hasVal(true) {}
void reset() noexcept {
if (hasVal) {
- value.~T();
+ val.~T();
hasVal = false;
}
}
+ constexpr bool has_value() const noexcept { return hasVal; }
constexpr bool hasValue() const noexcept { return hasVal; }
- T &getValue() LLVM_LVALUE_FUNCTION noexcept {
+ T &value() &noexcept {
+ assert(hasVal);
+ return val;
+ }
+ T &getValue() &noexcept {
assert(hasVal);
- return value;
+ return val;
}
- constexpr T const &getValue() const LLVM_LVALUE_FUNCTION noexcept {
+ constexpr T const &value() const &noexcept {
assert(hasVal);
- return value;
+ return val;
}
-#if LLVM_HAS_RVALUE_REFERENCE_THIS
- T &&getValue() && noexcept {
+ constexpr T const &getValue() const &noexcept {
assert(hasVal);
- return std::move(value);
+ return val;
+ }
+ T &&value() &&noexcept {
+ assert(hasVal);
+ return std::move(val);
+ }
+ T &&getValue() &&noexcept {
+ assert(hasVal);
+ return std::move(val);
}
-#endif
- template <class... Args> void emplace(Args &&... args) {
+ template <class... Args> void emplace(Args &&...args) {
reset();
- ::new ((void *)std::addressof(value)) T(std::forward<Args>(args)...);
+ ::new ((void *)std::addressof(val)) T(std::forward<Args>(args)...);
hasVal = true;
}
OptionalStorage &operator=(T const &y) {
- if (hasValue()) {
- value = y;
+ if (has_value()) {
+ val = y;
} else {
- ::new ((void *)std::addressof(value)) T(y);
+ ::new ((void *)std::addressof(val)) T(y);
hasVal = true;
}
return *this;
}
OptionalStorage &operator=(T &&y) {
- if (hasValue()) {
- value = std::move(y);
+ if (has_value()) {
+ val = std::move(y);
} else {
- ::new ((void *)std::addressof(value)) T(std::move(y));
+ ::new ((void *)std::addressof(val)) T(std::move(y));
hasVal = true;
}
return *this;
@@ -278,52 +300,55 @@ public:
void reset() { Storage.reset(); }
- constexpr const T *getPointer() const { return &Storage.getValue(); }
- T *getPointer() { return &Storage.getValue(); }
- constexpr const T &getValue() const LLVM_LVALUE_FUNCTION {
- return Storage.getValue();
- }
- T &getValue() LLVM_LVALUE_FUNCTION { return Storage.getValue(); }
+ constexpr const T *getPointer() const { return &Storage.value(); }
+ T *getPointer() { return &Storage.value(); }
+ constexpr const T &value() const & { return Storage.value(); }
+ constexpr const T &getValue() const & { return Storage.value(); }
+ T &value() & { return Storage.value(); }
+ T &getValue() & { return Storage.value(); }
- constexpr explicit operator bool() const { return hasValue(); }
- constexpr bool hasValue() const { return Storage.hasValue(); }
+ constexpr explicit operator bool() const { return has_value(); }
+ constexpr bool has_value() const { return Storage.has_value(); }
+ constexpr bool hasValue() const { return Storage.has_value(); }
constexpr const T *operator->() const { return getPointer(); }
T *operator->() { return getPointer(); }
- constexpr const T &operator*() const LLVM_LVALUE_FUNCTION {
- return getValue();
- }
- T &operator*() LLVM_LVALUE_FUNCTION { return getValue(); }
+ constexpr const T &operator*() const & { return value(); }
+ T &operator*() & { return value(); }
- template <typename U>
- constexpr T getValueOr(U &&value) const LLVM_LVALUE_FUNCTION {
- return hasValue() ? getValue() : std::forward<U>(value);
+ template <typename U> constexpr T value_or(U &&alt) const & {
+ return has_value() ? value() : std::forward<U>(alt);
+ }
+ template <typename U> constexpr T getValueOr(U &&alt) const & {
+ return has_value() ? value() : std::forward<U>(alt);
}
/// Apply a function to the value if present; otherwise return None.
template <class Function>
- auto map(const Function &F) const LLVM_LVALUE_FUNCTION
- -> Optional<decltype(F(getValue()))> {
- if (*this) return F(getValue());
+ auto map(const Function &F) const & -> Optional<decltype(F(value()))> {
+ if (*this)
+ return F(value());
return None;
}
-#if LLVM_HAS_RVALUE_REFERENCE_THIS
- T &&getValue() && { return std::move(Storage.getValue()); }
- T &&operator*() && { return std::move(Storage.getValue()); }
+ T &&value() && { return std::move(Storage.value()); }
+ T &&getValue() && { return std::move(Storage.value()); }
+ T &&operator*() && { return std::move(Storage.value()); }
- template <typename U>
- T getValueOr(U &&value) && {
- return hasValue() ? std::move(getValue()) : std::forward<U>(value);
+ template <typename U> T value_or(U &&alt) && {
+ return has_value() ? std::move(value()) : std::forward<U>(alt);
+ }
+ template <typename U> T getValueOr(U &&alt) && {
+ return has_value() ? std::move(value()) : std::forward<U>(alt);
}
/// Apply a function to the value if present; otherwise return None.
template <class Function>
- auto map(const Function &F) &&
- -> Optional<decltype(F(std::move(*this).getValue()))> {
- if (*this) return F(std::move(*this).getValue());
+ auto map(const Function &F)
+ && -> Optional<decltype(F(std::move(*this).value()))> {
+ if (*this)
+ return F(std::move(*this).value());
return None;
}
-#endif
};
template <class T> llvm::hash_code hash_value(const Optional<T> &O) {
@@ -334,7 +359,7 @@ template <typename T, typename U>
constexpr bool operator==(const Optional<T> &X, const Optional<U> &Y) {
if (X && Y)
return *X == *Y;
- return X.hasValue() == Y.hasValue();
+ return X.has_value() == Y.has_value();
}
template <typename T, typename U>
@@ -346,7 +371,7 @@ template <typename T, typename U>
constexpr bool operator<(const Optional<T> &X, const Optional<U> &Y) {
if (X && Y)
return *X < *Y;
- return X.hasValue() < Y.hasValue();
+ return X.has_value() < Y.has_value();
}
template <typename T, typename U>
@@ -389,7 +414,7 @@ template <typename T> constexpr bool operator<(const Optional<T> &, NoneType) {
}
template <typename T> constexpr bool operator<(NoneType, const Optional<T> &X) {
- return X.hasValue();
+ return X.has_value();
}
template <typename T>
diff --git a/llvm/include/llvm/ADT/PointerIntPair.h b/llvm/include/llvm/ADT/PointerIntPair.h
index b7ddf8855605..7d10b2a6dd14 100644
--- a/llvm/include/llvm/ADT/PointerIntPair.h
+++ b/llvm/include/llvm/ADT/PointerIntPair.h
@@ -61,19 +61,19 @@ public:
IntType getInt() const { return (IntType)Info::getInt(Value); }
- void setPointer(PointerTy PtrVal) LLVM_LVALUE_FUNCTION {
+ void setPointer(PointerTy PtrVal) & {
Value = Info::updatePointer(Value, PtrVal);
}
- void setInt(IntType IntVal) LLVM_LVALUE_FUNCTION {
+ void setInt(IntType IntVal) & {
Value = Info::updateInt(Value, static_cast<intptr_t>(IntVal));
}
- void initWithPointer(PointerTy PtrVal) LLVM_LVALUE_FUNCTION {
+ void initWithPointer(PointerTy PtrVal) & {
Value = Info::updatePointer(0, PtrVal);
}
- void setPointerAndInt(PointerTy PtrVal, IntType IntVal) LLVM_LVALUE_FUNCTION {
+ void setPointerAndInt(PointerTy PtrVal, IntType IntVal) & {
Value = Info::updateInt(Info::updatePointer(0, PtrVal),
static_cast<intptr_t>(IntVal));
}
@@ -91,7 +91,7 @@ public:
void *getOpaqueValue() const { return reinterpret_cast<void *>(Value); }
- void setFromOpaqueValue(void *Val) LLVM_LVALUE_FUNCTION {
+ void setFromOpaqueValue(void *Val) & {
Value = reinterpret_cast<intptr_t>(Val);
}
diff --git a/llvm/include/llvm/ADT/PointerSumType.h b/llvm/include/llvm/ADT/PointerSumType.h
index a7ef774e205e..57f045035a78 100644
--- a/llvm/include/llvm/ADT/PointerSumType.h
+++ b/llvm/include/llvm/ADT/PointerSumType.h
@@ -272,11 +272,12 @@ struct DenseMapInfo<PointerSumType<TagT, MemberTs...>> {
using SomePointerInfo = DenseMapInfo<SomePointerT>;
static inline SumType getEmptyKey() {
- return SumType::create<SomeTag>(SomePointerInfo::getEmptyKey());
+ return SumType::template create<SomeTag>(SomePointerInfo::getEmptyKey());
}
static inline SumType getTombstoneKey() {
- return SumType::create<SomeTag>(SomePointerInfo::getTombstoneKey());
+ return SumType::template create<SomeTag>(
+ SomePointerInfo::getTombstoneKey());
}
static unsigned getHashValue(const SumType &Arg) {
diff --git a/llvm/include/llvm/ADT/PointerUnion.h b/llvm/include/llvm/ADT/PointerUnion.h
index 04d566bbc75e..f01db09dd765 100644
--- a/llvm/include/llvm/ADT/PointerUnion.h
+++ b/llvm/include/llvm/ADT/PointerUnion.h
@@ -18,6 +18,7 @@
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/PointerLikeTypeTraits.h"
#include <algorithm>
#include <cassert>
@@ -87,6 +88,9 @@ namespace pointer_union_detail {
};
}
+// This is a forward declaration of CastInfoPointerUnionImpl
+// Refer to its definition below for further details
+template <typename... PTs> struct CastInfoPointerUnionImpl;
/// A discriminated union of two or more pointer types, with the discriminator
/// in the low bit of the pointer.
///
@@ -122,6 +126,11 @@ class PointerUnion
using First = TypeAtIndex<0, PTs...>;
using Base = typename PointerUnion::PointerUnionMembers;
+ /// This is needed to give the CastInfo implementation below access
+ /// to protected members.
+ /// Refer to its definition for further details.
+ friend struct CastInfoPointerUnionImpl<PTs...>;
+
public:
PointerUnion() = default;
@@ -134,25 +143,24 @@ public:
explicit operator bool() const { return !isNull(); }
+ // FIXME: Replace the uses of is(), get() and dyn_cast() with
+ // isa<T>, cast<T> and the llvm::dyn_cast<T>
+
/// Test if the Union currently holds the type matching T.
- template <typename T> bool is() const {
- return this->Val.getInt() == FirstIndexOfType<T, PTs...>::value;
- }
+ template <typename T> inline bool is() const { return isa<T>(*this); }
/// Returns the value of the specified pointer type.
///
/// If the specified pointer type is incorrect, assert.
- template <typename T> T get() const {
- assert(is<T>() && "Invalid accessor called");
- return PointerLikeTypeTraits<T>::getFromVoidPointer(this->Val.getPointer());
+ template <typename T> inline T get() const {
+ assert(isa<T>(*this) && "Invalid accessor called");
+ return cast<T>(*this);
}
/// Returns the current pointer if it is of the specified pointer type,
/// otherwise returns null.
- template <typename T> T dyn_cast() const {
- if (is<T>())
- return get<T>();
- return T();
+ template <typename T> inline T dyn_cast() const {
+ return llvm::dyn_cast<T>(*this);
}
/// If the union is set to the first pointer type get an address pointing to
@@ -205,6 +213,52 @@ bool operator<(PointerUnion<PTs...> lhs, PointerUnion<PTs...> rhs) {
return lhs.getOpaqueValue() < rhs.getOpaqueValue();
}
+/// We can't (at least, at this moment with C++14) declare CastInfo
+/// as a friend of PointerUnion like this:
+/// ```
+/// template<typename To>
+/// friend struct CastInfo<To, PointerUnion<PTs...>>;
+/// ```
+/// The compiler complains 'Partial specialization cannot be declared as a
+/// friend'.
+/// So we define this struct to be a bridge between CastInfo and
+/// PointerUnion.
+template <typename... PTs> struct CastInfoPointerUnionImpl {
+ using From = PointerUnion<PTs...>;
+
+ template <typename To> static inline bool isPossible(From &F) {
+ return F.Val.getInt() == FirstIndexOfType<To, PTs...>::value;
+ }
+
+ template <typename To> static To doCast(From &F) {
+ assert(isPossible<To>(F) && "cast to an incompatible type !");
+ return PointerLikeTypeTraits<To>::getFromVoidPointer(F.Val.getPointer());
+ }
+};
+
+// Specialization of CastInfo for PointerUnion
+template <typename To, typename... PTs>
+struct CastInfo<To, PointerUnion<PTs...>>
+ : public DefaultDoCastIfPossible<To, PointerUnion<PTs...>,
+ CastInfo<To, PointerUnion<PTs...>>> {
+ using From = PointerUnion<PTs...>;
+ using Impl = CastInfoPointerUnionImpl<PTs...>;
+
+ static inline bool isPossible(From &f) {
+ return Impl::template isPossible<To>(f);
+ }
+
+ static To doCast(From &f) { return Impl::template doCast<To>(f); }
+
+ static inline To castFailed() { return To(); }
+};
+
+template <typename To, typename... PTs>
+struct CastInfo<To, const PointerUnion<PTs...>>
+ : public ConstStrippingForwardingCast<To, const PointerUnion<PTs...>,
+ CastInfo<To, PointerUnion<PTs...>>> {
+};
+
// Teach SmallPtrSet that PointerUnion is "basically a pointer", that has
// # low bits available = min(PT1bits,PT2bits)-1.
template <typename ...PTs>
diff --git a/llvm/include/llvm/ADT/SCCIterator.h b/llvm/include/llvm/ADT/SCCIterator.h
index ad35e09f0f74..e4035a02b5f5 100644
--- a/llvm/include/llvm/ADT/SCCIterator.h
+++ b/llvm/include/llvm/ADT/SCCIterator.h
@@ -348,9 +348,14 @@ scc_member_iterator<GraphT, GT>::scc_member_iterator(
NodeInfoMap[Edge->Target].Visited = false;
std::queue<NodeType *> Queue;
- for (auto &Node : NodeInfoMap)
- if (Node.second.Visited)
- Queue.push(Node.first);
+ // Initialze the queue with MST roots. Note that walking through SortedEdges
+ // instead of NodeInfoMap ensures an ordered deterministic push.
+ for (auto *Edge : SortedEdges) {
+ if (NodeInfoMap[Edge->Source].Visited) {
+ Queue.push(Edge->Source);
+ NodeInfoMap[Edge->Source].Visited = false;
+ }
+ }
while (!Queue.empty()) {
auto *Node = Queue.front();
diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h
index e2972f4f902a..0efa96e69a8c 100644
--- a/llvm/include/llvm/ADT/STLExtras.h
+++ b/llvm/include/llvm/ADT/STLExtras.h
@@ -129,7 +129,7 @@ struct function_traits<ReturnType (ClassType::*)(Args...) const, false> {
/// Overload for class function types.
template <typename ClassType, typename ReturnType, typename... Args>
struct function_traits<ReturnType (ClassType::*)(Args...), false>
- : function_traits<ReturnType (ClassType::*)(Args...) const> {};
+ : public function_traits<ReturnType (ClassType::*)(Args...) const> {};
/// Overload for non-class function types.
template <typename ReturnType, typename... Args>
struct function_traits<ReturnType (*)(Args...), false> {
@@ -143,6 +143,9 @@ struct function_traits<ReturnType (*)(Args...), false> {
template <size_t i>
using arg_t = typename std::tuple_element<i, std::tuple<Args...>>::type;
};
+template <typename ReturnType, typename... Args>
+struct function_traits<ReturnType (*const)(Args...), false>
+ : public function_traits<ReturnType (*)(Args...)> {};
/// Overload for non-class function type references.
template <typename ReturnType, typename... Args>
struct function_traits<ReturnType (&)(Args...), false>
@@ -203,6 +206,17 @@ struct FirstIndexOfType<T, T, Us...> : std::integral_constant<size_t, 0> {};
template <size_t I, typename... Ts>
using TypeAtIndex = std::tuple_element_t<I, std::tuple<Ts...>>;
+/// Helper which adds two underlying types of enumeration type.
+/// Implicit conversion to a common type is accepted.
+template <typename EnumTy1, typename EnumTy2,
+ typename UT1 = std::enable_if_t<std::is_enum<EnumTy1>::value,
+ std::underlying_type_t<EnumTy1>>,
+ typename UT2 = std::enable_if_t<std::is_enum<EnumTy2>::value,
+ std::underlying_type_t<EnumTy2>>>
+constexpr auto addEnumValues(EnumTy1 LHS, EnumTy2 RHS) {
+ return static_cast<UT1>(LHS) + static_cast<UT2>(RHS);
+}
+
//===----------------------------------------------------------------------===//
// Extra additions to <iterator>
//===----------------------------------------------------------------------===//
@@ -268,6 +282,13 @@ template <typename T> auto drop_begin(T &&RangeOrContainer, size_t N = 1) {
adl_end(RangeOrContainer));
}
+/// Return a range covering \p RangeOrContainer with the last N elements
+/// excluded.
+template <typename T> auto drop_end(T &&RangeOrContainer, size_t N = 1) {
+ return make_range(adl_begin(RangeOrContainer),
+ std::prev(adl_end(RangeOrContainer), N));
+}
+
// mapped_iterator - This is a simple iterator adapter that causes a function to
// be applied whenever operator* is invoked on the iterator.
@@ -423,6 +444,16 @@ public:
findNextValid();
return *this;
}
+
+ decltype(auto) operator*() const {
+ assert(BaseT::wrapped() != End && "Cannot dereference end iterator!");
+ return BaseT::operator*();
+ }
+
+ decltype(auto) operator->() const {
+ assert(BaseT::wrapped() != End && "Cannot dereference end iterator!");
+ return BaseT::operator->();
+ }
};
/// Specialization of filter_iterator_base for forward iteration only.
@@ -1160,13 +1191,15 @@ public:
}
/// Compare this range with another.
- template <typename OtherT> bool operator==(const OtherT &other) const {
- return size() ==
- static_cast<size_t>(std::distance(other.begin(), other.end())) &&
- std::equal(begin(), end(), other.begin());
+ template <typename OtherT>
+ friend bool operator==(const indexed_accessor_range_base &lhs,
+ const OtherT &rhs) {
+ return std::equal(lhs.begin(), lhs.end(), rhs.begin(), rhs.end());
}
- template <typename OtherT> bool operator!=(const OtherT &other) const {
- return !(*this == other);
+ template <typename OtherT>
+ friend bool operator!=(const indexed_accessor_range_base &lhs,
+ const OtherT &rhs) {
+ return !(lhs == rhs);
}
/// Return the size of this range.
@@ -1650,6 +1683,15 @@ bool is_contained(R &&Range, const E &Element) {
return std::find(adl_begin(Range), adl_end(Range), Element) != adl_end(Range);
}
+template <typename T>
+constexpr bool is_contained(std::initializer_list<T> Set, T Value) {
+ // TODO: Use std::find when we switch to C++20.
+ for (T V : Set)
+ if (V == Value)
+ return true;
+ return false;
+}
+
/// Wrapper function around std::is_sorted to check if elements in a range \p R
/// are sorted with respect to a comparator \p C.
template <typename R, typename Compare> bool is_sorted(R &&Range, Compare C) {
diff --git a/llvm/include/llvm/ADT/SmallVector.h b/llvm/include/llvm/ADT/SmallVector.h
index a4a790323a6b..e34702bdbb3c 100644
--- a/llvm/include/llvm/ADT/SmallVector.h
+++ b/llvm/include/llvm/ADT/SmallVector.h
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
///
-/// /file
+/// \file
/// This file defines the SmallVector class.
///
//===----------------------------------------------------------------------===//
@@ -949,6 +949,9 @@ public:
return std::lexicographical_compare(this->begin(), this->end(),
RHS.begin(), RHS.end());
}
+ bool operator>(const SmallVectorImpl &RHS) const { return RHS < *this; }
+ bool operator<=(const SmallVectorImpl &RHS) const { return !(*this > RHS); }
+ bool operator>=(const SmallVectorImpl &RHS) const { return !(*this < RHS); }
};
template <typename T>
diff --git a/llvm/include/llvm/ADT/Statistic.h b/llvm/include/llvm/ADT/Statistic.h
index c39e161bcbcd..6c195cc44990 100644
--- a/llvm/include/llvm/ADT/Statistic.h
+++ b/llvm/include/llvm/ADT/Statistic.h
@@ -53,7 +53,7 @@ public:
const char *const Name;
const char *const Desc;
- std::atomic<unsigned> Value;
+ std::atomic<uint64_t> Value;
std::atomic<bool> Initialized;
constexpr TrackingStatistic(const char *DebugType, const char *Name,
@@ -65,12 +65,12 @@ public:
const char *getName() const { return Name; }
const char *getDesc() const { return Desc; }
- unsigned getValue() const { return Value.load(std::memory_order_relaxed); }
+ uint64_t getValue() const { return Value.load(std::memory_order_relaxed); }
// Allow use of this class as the value itself.
- operator unsigned() const { return getValue(); }
+ operator uint64_t() const { return getValue(); }
- const TrackingStatistic &operator=(unsigned Val) {
+ const TrackingStatistic &operator=(uint64_t Val) {
Value.store(Val, std::memory_order_relaxed);
return init();
}
@@ -80,7 +80,7 @@ public:
return init();
}
- unsigned operator++(int) {
+ uint64_t operator++(int) {
init();
return Value.fetch_add(1, std::memory_order_relaxed);
}
@@ -90,27 +90,27 @@ public:
return init();
}
- unsigned operator--(int) {
+ uint64_t operator--(int) {
init();
return Value.fetch_sub(1, std::memory_order_relaxed);
}
- const TrackingStatistic &operator+=(unsigned V) {
+ const TrackingStatistic &operator+=(uint64_t V) {
if (V == 0)
return *this;
Value.fetch_add(V, std::memory_order_relaxed);
return init();
}
- const TrackingStatistic &operator-=(unsigned V) {
+ const TrackingStatistic &operator-=(uint64_t V) {
if (V == 0)
return *this;
Value.fetch_sub(V, std::memory_order_relaxed);
return init();
}
- void updateMax(unsigned V) {
- unsigned PrevMax = Value.load(std::memory_order_relaxed);
+ void updateMax(uint64_t V) {
+ uint64_t PrevMax = Value.load(std::memory_order_relaxed);
// Keep trying to update max until we succeed or another thread produces
// a bigger max than us.
while (V > PrevMax && !Value.compare_exchange_weak(
@@ -134,26 +134,26 @@ public:
NoopStatistic(const char * /*DebugType*/, const char * /*Name*/,
const char * /*Desc*/) {}
- unsigned getValue() const { return 0; }
+ uint64_t getValue() const { return 0; }
// Allow use of this class as the value itself.
- operator unsigned() const { return 0; }
+ operator uint64_t() const { return 0; }
- const NoopStatistic &operator=(unsigned Val) { return *this; }
+ const NoopStatistic &operator=(uint64_t Val) { return *this; }
const NoopStatistic &operator++() { return *this; }
- unsigned operator++(int) { return 0; }
+ uint64_t operator++(int) { return 0; }
const NoopStatistic &operator--() { return *this; }
- unsigned operator--(int) { return 0; }
+ uint64_t operator--(int) { return 0; }
- const NoopStatistic &operator+=(const unsigned &V) { return *this; }
+ const NoopStatistic &operator+=(const uint64_t &V) { return *this; }
- const NoopStatistic &operator-=(const unsigned &V) { return *this; }
+ const NoopStatistic &operator-=(const uint64_t &V) { return *this; }
- void updateMax(unsigned V) {}
+ void updateMax(uint64_t V) {}
};
#if LLVM_ENABLE_STATS
@@ -200,7 +200,7 @@ void PrintStatisticsJSON(raw_ostream &OS);
/// during it's execution. It will return the value at the point that it is
/// read. However, it will prevent new statistics from registering until it
/// completes.
-const std::vector<std::pair<StringRef, unsigned>> GetStatistics();
+const std::vector<std::pair<StringRef, uint64_t>> GetStatistics();
/// Reset the statistics. This can be used to zero and de-register the
/// statistics in order to measure a compilation.
diff --git a/llvm/include/llvm/ADT/StringRef.h b/llvm/include/llvm/ADT/StringRef.h
index 118def2f43e1..80ba47dd619c 100644
--- a/llvm/include/llvm/ADT/StringRef.h
+++ b/llvm/include/llvm/ADT/StringRef.h
@@ -240,6 +240,10 @@ namespace llvm {
unsigned edit_distance(StringRef Other, bool AllowReplacements = true,
unsigned MaxEditDistance = 0) const;
+ LLVM_NODISCARD unsigned
+ edit_distance_insensitive(StringRef Other, bool AllowReplacements = true,
+ unsigned MaxEditDistance = 0) const;
+
/// str - Get the contents as an std::string.
LLVM_NODISCARD
std::string str() const {
diff --git a/llvm/include/llvm/ADT/Triple.h b/llvm/include/llvm/ADT/Triple.h
index 42277c013035..9d85a28fbf04 100644
--- a/llvm/include/llvm/ADT/Triple.h
+++ b/llvm/include/llvm/ADT/Triple.h
@@ -56,7 +56,10 @@ public:
bpfel, // eBPF or extended BPF or 64-bit BPF (little endian)
bpfeb, // eBPF or extended BPF or 64-bit BPF (big endian)
csky, // CSKY: csky
+ dxil, // DXIL 32-bit DirectX bytecode
hexagon, // Hexagon: hexagon
+ loongarch32, // LoongArch (32-bit): loongarch32
+ loongarch64, // LoongArch (64-bit): loongarch64
m68k, // M68k: Motorola 680x0 family
mips, // MIPS: mips, mipsallegrex, mipsr6
mipsel, // MIPSEL: mipsel, mipsallegrexe, mipsr6el
@@ -146,7 +149,15 @@ public:
MipsSubArch_r6,
- PPCSubArch_spe
+ PPCSubArch_spe,
+
+ // SPIR-V sub-arch corresponds to its version.
+ SPIRVSubArch_v10,
+ SPIRVSubArch_v11,
+ SPIRVSubArch_v12,
+ SPIRVSubArch_v13,
+ SPIRVSubArch_v14,
+ SPIRVSubArch_v15,
};
enum VendorType {
UnknownVendor,
@@ -195,9 +206,11 @@ public:
NVCL, // NVIDIA OpenCL
AMDHSA, // AMD HSA Runtime
PS4,
+ PS5,
ELFIAMCU,
TvOS, // Apple tvOS
WatchOS, // Apple watchOS
+ DriverKit, // Apple DriverKit
Mesa3D,
Contiki,
AMDPAL, // AMD PAL Runtime
@@ -205,7 +218,8 @@ public:
Hurd, // GNU/Hurd
WASI, // Experimental WebAssembly OS
Emscripten,
- LastOSType = Emscripten
+ ShaderModel, // DirectX ShaderModel
+ LastOSType = ShaderModel
};
enum EnvironmentType {
UnknownEnvironment,
@@ -232,15 +246,35 @@ public:
CoreCLR,
Simulator, // Simulator variants of other systems, e.g., Apple's iOS
MacABI, // Mac Catalyst variant of Apple's iOS deployment target.
- LastEnvironmentType = MacABI
+
+ // Shader Stages
+ Pixel,
+ Vertex,
+ Geometry,
+ Hull,
+ Domain,
+ Compute,
+ Library,
+ RayGeneration,
+ Intersection,
+ AnyHit,
+ ClosestHit,
+ Miss,
+ Callable,
+ Mesh,
+ Amplification,
+
+ LastEnvironmentType = Amplification
};
enum ObjectFormatType {
UnknownObjectFormat,
COFF,
+ DXContainer,
ELF,
GOFF,
MachO,
+ SPIRV,
Wasm,
XCOFF,
};
@@ -360,6 +394,9 @@ public:
/// with WatchOS or generic triples.
VersionTuple getWatchOSVersion() const;
+ /// Parse the version number as with getOSVersion.
+ VersionTuple getDriverKitVersion() const;
+
/// @}
/// @name Direct Component Access
/// @{
@@ -462,11 +499,14 @@ public:
return getSubArch() == Triple::ARMSubArch_v7k;
}
+ /// Is this an Apple DriverKit triple.
+ bool isDriverKit() const { return getOS() == Triple::DriverKit; }
+
bool isOSzOS() const { return getOS() == Triple::ZOS; }
- /// Is this a "Darwin" OS (macOS, iOS, tvOS or watchOS).
+ /// Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, or DriverKit).
bool isOSDarwin() const {
- return isMacOSX() || isiOS() || isWatchOS();
+ return isMacOSX() || isiOS() || isWatchOS() || isDriverKit();
}
bool isSimulatorEnvironment() const {
@@ -640,19 +680,23 @@ public:
return getObjectFormat() == Triple::XCOFF;
}
- /// Tests whether the target is the PS4 CPU
- bool isPS4CPU() const {
+ /// Tests whether the target is the PS4 platform.
+ bool isPS4() const {
return getArch() == Triple::x86_64 &&
getVendor() == Triple::SCEI &&
getOS() == Triple::PS4;
}
- /// Tests whether the target is the PS4 platform
- bool isPS4() const {
- return getVendor() == Triple::SCEI &&
- getOS() == Triple::PS4;
+ /// Tests whether the target is the PS5 platform.
+ bool isPS5() const {
+ return getArch() == Triple::x86_64 &&
+ getVendor() == Triple::SCEI &&
+ getOS() == Triple::PS5;
}
+ /// Tests whether the target is the PS4 or PS5 platform.
+ bool isPS() const { return isPS4() || isPS5(); }
+
/// Tests whether the target is Android
bool isAndroid() const { return getEnvironment() == Triple::Android; }
@@ -676,6 +720,11 @@ public:
getEnvironment() == Triple::MuslX32;
}
+ /// Tests whether the target is DXIL.
+ bool isDXIL() const {
+ return getArch() == Triple::dxil;
+ }
+
/// Tests whether the target is SPIR (32- or 64-bit).
bool isSPIR() const {
return getArch() == Triple::spir || getArch() == Triple::spir64;
@@ -774,6 +823,11 @@ public:
: PointerWidth == 64;
}
+ /// Tests whether the target is LoongArch (32- and 64-bit).
+ bool isLoongArch() const {
+ return getArch() == Triple::loongarch32 || getArch() == Triple::loongarch64;
+ }
+
/// Tests whether the target is MIPS 32-bit (little and big endian).
bool isMIPS32() const {
return getArch() == Triple::mips || getArch() == Triple::mipsel;
@@ -810,6 +864,17 @@ public:
return getArch() == Triple::riscv32 || getArch() == Triple::riscv64;
}
+ /// Tests whether the target is 32-bit SPARC (little and big endian).
+ bool isSPARC32() const {
+ return getArch() == Triple::sparc || getArch() == Triple::sparcel;
+ }
+
+ /// Tests whether the target is 64-bit SPARC (big endian).
+ bool isSPARC64() const { return getArch() == Triple::sparcv9; }
+
+ /// Tests whether the target is SPARC.
+ bool isSPARC() const { return isSPARC32() || isSPARC64(); }
+
/// Tests whether the target is SystemZ.
bool isSystemZ() const {
return getArch() == Triple::systemz;
@@ -863,7 +928,7 @@ public:
}
/// Tests if the environment supports dllimport/export annotations.
- bool hasDLLImportExport() const { return isOSWindows() || isPS4CPU(); }
+ bool hasDLLImportExport() const { return isOSWindows() || isPS(); }
/// @}
/// @name Mutators
@@ -971,7 +1036,7 @@ public:
/// Get the "prefix" canonical name for the \p Kind architecture. This is the
/// prefix used by the architecture specific builtins, and is suitable for
- /// passing to \see Intrinsic::getIntrinsicForGCCBuiltin().
+ /// passing to \see Intrinsic::getIntrinsicForClangBuiltin().
///
/// \return - The architecture prefix, or 0 if none is defined.
static StringRef getArchTypePrefix(ArchType Kind);
diff --git a/llvm/include/llvm/ADT/edit_distance.h b/llvm/include/llvm/ADT/edit_distance.h
index c480c1e7cd78..6df3db6125d4 100644
--- a/llvm/include/llvm/ADT/edit_distance.h
+++ b/llvm/include/llvm/ADT/edit_distance.h
@@ -28,6 +28,9 @@ namespace llvm {
///
/// \param ToArray the second sequence to compare.
///
+/// \param Map A Functor to apply to each item of the sequences before
+/// comparison.
+///
/// \param AllowReplacements whether to allow element replacements (change one
/// element into another) as a single operation, rather than as two operations
/// (an insertion and a removal).
@@ -39,10 +42,10 @@ namespace llvm {
/// \returns the minimum number of element insertions, removals, or (if
/// \p AllowReplacements is \c true) replacements needed to transform one of
/// the given sequences into the other. If zero, the sequences are identical.
-template<typename T>
-unsigned ComputeEditDistance(ArrayRef<T> FromArray, ArrayRef<T> ToArray,
- bool AllowReplacements = true,
- unsigned MaxEditDistance = 0) {
+template <typename T, typename Functor>
+unsigned ComputeMappedEditDistance(ArrayRef<T> FromArray, ArrayRef<T> ToArray,
+ Functor Map, bool AllowReplacements = true,
+ unsigned MaxEditDistance = 0) {
// The algorithm implemented below is the "classic"
// dynamic-programming algorithm for computing the Levenshtein
// distance, which is described here:
@@ -58,6 +61,15 @@ unsigned ComputeEditDistance(ArrayRef<T> FromArray, ArrayRef<T> ToArray,
typename ArrayRef<T>::size_type m = FromArray.size();
typename ArrayRef<T>::size_type n = ToArray.size();
+ if (MaxEditDistance) {
+ // If the difference in size between the 2 arrays is larger than the max
+ // distance allowed, we can bail out as we will always need at least
+ // MaxEditDistance insertions or removals.
+ typename ArrayRef<T>::size_type AbsDiff = m > n ? m - n : n - m;
+ if (AbsDiff > MaxEditDistance)
+ return MaxEditDistance + 1;
+ }
+
const unsigned SmallBufferSize = 64;
unsigned SmallBuffer[SmallBufferSize];
std::unique_ptr<unsigned[]> Allocated;
@@ -75,15 +87,16 @@ unsigned ComputeEditDistance(ArrayRef<T> FromArray, ArrayRef<T> ToArray,
unsigned BestThisRow = Row[0];
unsigned Previous = y - 1;
+ const auto &CurItem = Map(FromArray[y - 1]);
for (typename ArrayRef<T>::size_type x = 1; x <= n; ++x) {
int OldRow = Row[x];
if (AllowReplacements) {
- Row[x] = std::min(
- Previous + (FromArray[y-1] == ToArray[x-1] ? 0u : 1u),
- std::min(Row[x-1], Row[x])+1);
+ Row[x] = std::min(Previous + (CurItem == Map(ToArray[x - 1]) ? 0u : 1u),
+ std::min(Row[x - 1], Row[x]) + 1);
}
else {
- if (FromArray[y-1] == ToArray[x-1]) Row[x] = Previous;
+ if (CurItem == Map(ToArray[x - 1]))
+ Row[x] = Previous;
else Row[x] = std::min(Row[x-1], Row[x]) + 1;
}
Previous = OldRow;
@@ -98,6 +111,15 @@ unsigned ComputeEditDistance(ArrayRef<T> FromArray, ArrayRef<T> ToArray,
return Result;
}
+template <typename T>
+unsigned ComputeEditDistance(ArrayRef<T> FromArray, ArrayRef<T> ToArray,
+ bool AllowReplacements = true,
+ unsigned MaxEditDistance = 0) {
+ return ComputeMappedEditDistance(
+ FromArray, ToArray, [](const T &X) -> const T & { return X; },
+ AllowReplacements, MaxEditDistance);
+}
+
} // End llvm namespace
#endif
diff --git a/llvm/include/llvm/Analysis/AliasAnalysis.h b/llvm/include/llvm/Analysis/AliasAnalysis.h
index d4febe6c1db9..c065553db8e9 100644
--- a/llvm/include/llvm/Analysis/AliasAnalysis.h
+++ b/llvm/include/llvm/Analysis/AliasAnalysis.h
@@ -38,7 +38,6 @@
#define LLVM_ANALYSIS_ALIASANALYSIS_H
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/MemoryLocation.h"
@@ -64,6 +63,7 @@ class LoopInfo;
class PreservedAnalyses;
class TargetLibraryInfo;
class Value;
+template <typename> class SmallPtrSetImpl;
/// The possible results of an alias query.
///
@@ -413,8 +413,12 @@ class EarliestEscapeInfo final : public CaptureInfo {
/// This is used for cache invalidation purposes.
DenseMap<Instruction *, TinyPtrVector<const Value *>> Inst2Obj;
+ const SmallPtrSetImpl<const Value *> &EphValues;
+
public:
- EarliestEscapeInfo(DominatorTree &DT, const LoopInfo &LI) : DT(DT), LI(LI) {}
+ EarliestEscapeInfo(DominatorTree &DT, const LoopInfo &LI,
+ const SmallPtrSetImpl<const Value *> &EphValues)
+ : DT(DT), LI(LI), EphValues(EphValues) {}
bool isNotCapturedBeforeOrAt(const Value *Object,
const Instruction *I) override;
@@ -1267,6 +1271,10 @@ bool isIdentifiedObject(const Value *V);
/// IdentifiedObjects.
bool isIdentifiedFunctionLocal(const Value *V);
+/// Returns true if the pointer is one which would have been considered an
+/// escape by isNonEscapingLocalObject.
+bool isEscapeSource(const Value *V);
+
/// Return true if Object memory is not visible after an unwind, in the sense
/// that program semantics cannot depend on Object containing any particular
/// value on unwind. If the RequiresNoCaptureBeforeUnwind out parameter is set
diff --git a/llvm/include/llvm/Analysis/AliasAnalysisEvaluator.h b/llvm/include/llvm/Analysis/AliasAnalysisEvaluator.h
index 2dd2e7ca916d..48181cc52626 100644
--- a/llvm/include/llvm/Analysis/AliasAnalysisEvaluator.h
+++ b/llvm/include/llvm/Analysis/AliasAnalysisEvaluator.h
@@ -24,12 +24,12 @@
#ifndef LLVM_ANALYSIS_ALIASANALYSISEVALUATOR_H
#define LLVM_ANALYSIS_ALIASANALYSISEVALUATOR_H
-#include "llvm/IR/Function.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/Pass.h"
namespace llvm {
class AAResults;
+class Function;
+class FunctionPass;
class AAEvaluator : public PassInfoMixin<AAEvaluator> {
int64_t FunctionCount = 0;
diff --git a/llvm/include/llvm/Analysis/AliasSetTracker.h b/llvm/include/llvm/Analysis/AliasSetTracker.h
index b66ff395454d..78f5545ab215 100644
--- a/llvm/include/llvm/Analysis/AliasSetTracker.h
+++ b/llvm/include/llvm/Analysis/AliasSetTracker.h
@@ -22,13 +22,10 @@
#include "llvm/ADT/ilist_node.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Metadata.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/ValueHandle.h"
-#include "llvm/Support/Casting.h"
#include <cassert>
#include <cstddef>
-#include <cstdint>
#include <iterator>
#include <vector>
@@ -224,10 +221,6 @@ public:
// track of the list's exact size.
unsigned size() { return SetSize; }
- /// If this alias set is known to contain a single instruction and *only* a
- /// single unique instruction, return it. Otherwise, return nullptr.
- Instruction* getUniqueInstruction();
-
void print(raw_ostream &OS) const;
void dump() const;
diff --git a/llvm/include/llvm/Analysis/AssumeBundleQueries.h b/llvm/include/llvm/Analysis/AssumeBundleQueries.h
index 77da19110246..785980130386 100644
--- a/llvm/include/llvm/Analysis/AssumeBundleQueries.h
+++ b/llvm/include/llvm/Analysis/AssumeBundleQueries.h
@@ -14,14 +14,14 @@
#ifndef LLVM_ANALYSIS_ASSUMEBUNDLEQUERIES_H
#define LLVM_ANALYSIS_ASSUMEBUNDLEQUERIES_H
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/IR/IntrinsicInst.h"
namespace llvm {
class AssumptionCache;
class DominatorTree;
+class Instruction;
+class Value;
/// Index of elements in the operand bundle.
/// If the element exist it is guaranteed to be what is specified in this enum
diff --git a/llvm/include/llvm/Analysis/BasicAliasAnalysis.h b/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
index 97dda58109e9..46f14a21a9ff 100644
--- a/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
+++ b/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
@@ -18,8 +18,6 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
-#include <algorithm>
-#include <cstdint>
#include <memory>
#include <utility>
diff --git a/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h
index 858dd369dd0b..d8e524d7cb80 100644
--- a/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h
+++ b/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h
@@ -20,6 +20,7 @@
#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SparseBitVector.h"
#include "llvm/ADT/Twine.h"
@@ -31,7 +32,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/DOTGraphTraits.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/ScaledNumber.h"
#include "llvm/Support/raw_ostream.h"
@@ -45,7 +45,6 @@
#include <list>
#include <queue>
#include <string>
-#include <unordered_set>
#include <utility>
#include <vector>
@@ -1300,7 +1299,7 @@ bool BlockFrequencyInfoImpl<BT>::computeMassInLoop(LoopData &Loop) {
auto &HeaderNode = Loop.Nodes[H];
assert(!getBlock(HeaderNode)->getIrrLoopHeaderWeight() &&
"Shouldn't have a weight metadata");
- uint64_t MinWeight = MinHeaderWeight.getValue();
+ uint64_t MinWeight = *MinHeaderWeight;
LLVM_DEBUG(dbgs() << "Giving weight " << MinWeight << " to "
<< getBlockName(HeaderNode) << "\n");
if (MinWeight)
@@ -1516,7 +1515,7 @@ void BlockFrequencyInfoImpl<BT>::findReachableBlocks(
// Find all blocks to apply inference on, that is, reachable from the entry
// along edges with non-zero probablities
std::queue<const BlockT *> Queue;
- std::unordered_set<const BlockT *> Reachable;
+ SmallPtrSet<const BlockT *, 8> Reachable;
const BlockT *Entry = &F->front();
Queue.push(Entry);
Reachable.insert(Entry);
@@ -1527,16 +1526,14 @@ void BlockFrequencyInfoImpl<BT>::findReachableBlocks(
auto EP = BPI->getEdgeProbability(SrcBB, DstBB);
if (EP.isZero())
continue;
- if (Reachable.find(DstBB) == Reachable.end()) {
+ if (Reachable.insert(DstBB).second)
Queue.push(DstBB);
- Reachable.insert(DstBB);
- }
}
}
// Find all blocks to apply inference on, that is, backward reachable from
// the entry along (backward) edges with non-zero probablities
- std::unordered_set<const BlockT *> InverseReachable;
+ SmallPtrSet<const BlockT *, 8> InverseReachable;
for (const BlockT &BB : *F) {
// An exit block is a block without any successors
bool HasSucc = GraphTraits<const BlockT *>::child_begin(&BB) !=
@@ -1553,10 +1550,8 @@ void BlockFrequencyInfoImpl<BT>::findReachableBlocks(
auto EP = BPI->getEdgeProbability(DstBB, SrcBB);
if (EP.isZero())
continue;
- if (InverseReachable.find(DstBB) == InverseReachable.end()) {
+ if (InverseReachable.insert(DstBB).second)
Queue.push(DstBB);
- InverseReachable.insert(DstBB);
- }
}
}
@@ -1581,15 +1576,14 @@ void BlockFrequencyInfoImpl<BT>::initTransitionProbabilities(
// Find unique successors and corresponding probabilities for every block
for (size_t Src = 0; Src < NumBlocks; Src++) {
const BlockT *BB = Blocks[Src];
- std::unordered_set<const BlockT *> UniqueSuccs;
+ SmallPtrSet<const BlockT *, 2> UniqueSuccs;
for (const auto SI : children<const BlockT *>(BB)) {
// Ignore cold blocks
if (BlockIndex.find(SI) == BlockIndex.end())
continue;
// Ignore parallel edges between BB and SI blocks
- if (UniqueSuccs.find(SI) != UniqueSuccs.end())
+ if (!UniqueSuccs.insert(SI).second)
continue;
- UniqueSuccs.insert(SI);
// Ignore jumps with zero probability
auto EP = BPI->getEdgeProbability(BB, SI);
if (EP.isZero())
@@ -1875,7 +1869,7 @@ struct BFIDOTGraphTraitsBase : public DefaultDOTGraphTraits {
case GVDT_Count: {
auto Count = Graph->getBlockProfileCount(Node);
if (Count)
- OS << Count.getValue();
+ OS << *Count;
else
OS << "Unknown";
break;
diff --git a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h
index e2099eba0f65..28418198acea 100644
--- a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h
+++ b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h
@@ -16,14 +16,12 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
#include "llvm/Support/BranchProbability.h"
-#include "llvm/Support/Casting.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
diff --git a/llvm/include/llvm/Analysis/CFGPrinter.h b/llvm/include/llvm/Analysis/CFGPrinter.h
index c0cabceb4a54..768cda59c57d 100644
--- a/llvm/include/llvm/Analysis/CFGPrinter.h
+++ b/llvm/include/llvm/Analysis/CFGPrinter.h
@@ -18,7 +18,6 @@
#ifndef LLVM_ANALYSIS_CFGPRINTER_H
#define LLVM_ANALYSIS_CFGPRINTER_H
-#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/HeatUtils.h"
@@ -27,10 +26,11 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/Support/DOTGraphTraits.h"
#include "llvm/Support/FormatVariadic.h"
-#include "llvm/Support/GraphWriter.h"
namespace llvm {
+template <class GraphType> struct GraphTraits;
class CFGViewerPass : public PassInfoMixin<CFGViewerPass> {
public:
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
diff --git a/llvm/include/llvm/Analysis/CFLAliasAnalysisUtils.h b/llvm/include/llvm/Analysis/CFLAliasAnalysisUtils.h
index 2eae2824bec3..6543c53c9b28 100644
--- a/llvm/include/llvm/Analysis/CFLAliasAnalysisUtils.h
+++ b/llvm/include/llvm/Analysis/CFLAliasAnalysisUtils.h
@@ -14,10 +14,12 @@
#ifndef LLVM_ANALYSIS_CFLALIASANALYSISUTILS_H
#define LLVM_ANALYSIS_CFLALIASANALYSISUTILS_H
+#include "llvm/IR/Argument.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/ValueHandle.h"
namespace llvm {
+
namespace cflaa {
template <typename AAResult> struct FunctionHandle final : public CallbackVH {
diff --git a/llvm/include/llvm/Analysis/CFLAndersAliasAnalysis.h b/llvm/include/llvm/Analysis/CFLAndersAliasAnalysis.h
index 5f5e52af3d88..dfb363173187 100644
--- a/llvm/include/llvm/Analysis/CFLAndersAliasAnalysis.h
+++ b/llvm/include/llvm/Analysis/CFLAndersAliasAnalysis.h
@@ -15,7 +15,6 @@
#define LLVM_ANALYSIS_CFLANDERSALIASANALYSIS_H
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CFLAliasAnalysisUtils.h"
#include "llvm/IR/PassManager.h"
@@ -25,6 +24,7 @@
namespace llvm {
+template <typename T> class Optional;
class Function;
class MemoryLocation;
class TargetLibraryInfo;
diff --git a/llvm/include/llvm/Analysis/CFLSteensAliasAnalysis.h b/llvm/include/llvm/Analysis/CFLSteensAliasAnalysis.h
index ec05b3706ca3..865f4a54c094 100644
--- a/llvm/include/llvm/Analysis/CFLSteensAliasAnalysis.h
+++ b/llvm/include/llvm/Analysis/CFLSteensAliasAnalysis.h
@@ -15,13 +15,11 @@
#define LLVM_ANALYSIS_CFLSTEENSALIASANALYSIS_H
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CFLAliasAnalysisUtils.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
-#include "llvm/Support/Casting.h"
#include <forward_list>
#include <memory>
diff --git a/llvm/include/llvm/Analysis/CGSCCPassManager.h b/llvm/include/llvm/Analysis/CGSCCPassManager.h
index 7cf172dc1dd1..9d1b331346b6 100644
--- a/llvm/include/llvm/Analysis/CGSCCPassManager.h
+++ b/llvm/include/llvm/Analysis/CGSCCPassManager.h
@@ -88,27 +88,21 @@
#ifndef LLVM_ANALYSIS_CGSCCPASSMANAGER_H
#define LLVM_ANALYSIS_CGSCCPASSMANAGER_H
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/PriorityWorklist.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/LazyCallGraph.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/InstIterator.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/ValueHandle.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
#include <cassert>
#include <utility>
namespace llvm {
+class Function;
+class Value;
+template <typename T, unsigned int N> class SmallPriorityWorklist;
struct CGSCCUpdateResult;
+
class Module;
// Allow debug logging in this inline function.
@@ -278,16 +272,6 @@ struct CGSCCUpdateResult {
/// the list and removing entries from it.
SmallPtrSetImpl<LazyCallGraph::SCC *> &InvalidatedSCCs;
- /// If non-null, the updated current \c RefSCC being processed.
- ///
- /// This is set when a graph refinement takes place and the "current" point
- /// in the graph moves "down" or earlier in the post-order walk. This will
- /// often cause the "current" RefSCC to be a newly created RefSCC object and
- /// the old one to be added to the above worklist. When that happens, this
- /// pointer is non-null and can be used to continue processing the "top" of
- /// the post-order walk.
- LazyCallGraph::RefSCC *UpdatedRC;
-
/// If non-null, the updated current \c SCC being processed.
///
/// This is set when a graph refinement takes place and the "current" point
diff --git a/llvm/include/llvm/Analysis/CallGraph.h b/llvm/include/llvm/Analysis/CallGraph.h
index 4da448c9900b..88d56785de67 100644
--- a/llvm/include/llvm/Analysis/CallGraph.h
+++ b/llvm/include/llvm/Analysis/CallGraph.h
@@ -45,9 +45,6 @@
#ifndef LLVM_ANALYSIS_CALLGRAPH_H
#define LLVM_ANALYSIS_CALLGRAPH_H
-#include "llvm/ADT/GraphTraits.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/PassManager.h"
@@ -61,7 +58,9 @@
namespace llvm {
+template <class GraphType> struct GraphTraits;
class CallGraphNode;
+class Function;
class Module;
class raw_ostream;
diff --git a/llvm/include/llvm/Analysis/CallPrinter.h b/llvm/include/llvm/Analysis/CallPrinter.h
index 8d4159f3ddc0..d325d0010371 100644
--- a/llvm/include/llvm/Analysis/CallPrinter.h
+++ b/llvm/include/llvm/Analysis/CallPrinter.h
@@ -14,10 +14,24 @@
#ifndef LLVM_ANALYSIS_CALLPRINTER_H
#define LLVM_ANALYSIS_CALLPRINTER_H
+#include "llvm/IR/PassManager.h"
+
namespace llvm {
class ModulePass;
+/// Pass for printing the call graph to a dot file
+class CallGraphDOTPrinterPass : public PassInfoMixin<CallGraphDOTPrinterPass> {
+public:
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
+/// Pass for viewing the call graph
+class CallGraphViewerPass : public PassInfoMixin<CallGraphViewerPass> {
+public:
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
ModulePass *createCallGraphViewerPass();
ModulePass *createCallGraphDOTPrinterPass();
diff --git a/llvm/include/llvm/Analysis/CaptureTracking.h b/llvm/include/llvm/Analysis/CaptureTracking.h
index 50d12db7a1c3..a2d9277745e4 100644
--- a/llvm/include/llvm/Analysis/CaptureTracking.h
+++ b/llvm/include/llvm/Analysis/CaptureTracking.h
@@ -14,6 +14,7 @@
#define LLVM_ANALYSIS_CAPTURETRACKING_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
namespace llvm {
@@ -24,6 +25,7 @@ namespace llvm {
class DominatorTree;
class LoopInfo;
class Function;
+ template <typename T> class SmallPtrSetImpl;
/// getDefaultMaxUsesToExploreForCaptureTracking - Return default value of
/// the maximal number of uses to explore before giving up. It is used by
@@ -41,7 +43,13 @@ namespace llvm {
/// one value before giving up due too "too many uses". If MaxUsesToExplore
/// is zero, a default value is assumed.
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures,
+ bool StoreCaptures, unsigned MaxUsesToExplore = 0);
+
+ /// Variant of the above function which accepts a set of Values that are
+ /// ephemeral and cannot cause pointers to escape.
+ bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures,
bool StoreCaptures,
+ const SmallPtrSetImpl<const Value *> &EphValues,
unsigned MaxUsesToExplore = 0);
/// PointerMayBeCapturedBefore - Return true if this pointer value may be
@@ -72,10 +80,11 @@ namespace llvm {
// nullptr is returned. Note that the caller of the function has to ensure
// that the instruction the result value is compared against is not in a
// cycle.
- Instruction *FindEarliestCapture(const Value *V, Function &F,
- bool ReturnCaptures, bool StoreCaptures,
- const DominatorTree &DT,
- unsigned MaxUsesToExplore = 0);
+ Instruction *
+ FindEarliestCapture(const Value *V, Function &F, bool ReturnCaptures,
+ bool StoreCaptures, const DominatorTree &DT,
+ const SmallPtrSetImpl<const Value *> &EphValues,
+ unsigned MaxUsesToExplore = 0);
/// This callback is used in conjunction with PointerMayBeCaptured. In
/// addition to the interface here, you'll need to provide your own getters
@@ -105,6 +114,24 @@ namespace llvm {
virtual bool isDereferenceableOrNull(Value *O, const DataLayout &DL);
};
+ /// Types of use capture kinds, see \p DetermineUseCaptureKind.
+ enum class UseCaptureKind {
+ NO_CAPTURE,
+ MAY_CAPTURE,
+ PASSTHROUGH,
+ };
+
+ /// Determine what kind of capture behaviour \p U may exhibit.
+ ///
+ /// A use can be no-capture, a use can potentially capture, or a use can be
+ /// passthrough such that the uses of the user or \p U should be inspected.
+ /// The \p IsDereferenceableOrNull callback is used to rule out capturing for
+ /// certain comparisons.
+ UseCaptureKind
+ DetermineUseCaptureKind(const Use &U,
+ llvm::function_ref<bool(Value *, const DataLayout &)>
+ IsDereferenceableOrNull);
+
/// PointerMayBeCaptured - Visit the value and the values derived from it and
/// find values which appear to be capturing the pointer value. This feeds
/// results into and is controlled by the CaptureTracker object.
diff --git a/llvm/include/llvm/Analysis/CmpInstAnalysis.h b/llvm/include/llvm/Analysis/CmpInstAnalysis.h
index 3d34cd12aea4..332eb9b66e9c 100644
--- a/llvm/include/llvm/Analysis/CmpInstAnalysis.h
+++ b/llvm/include/llvm/Analysis/CmpInstAnalysis.h
@@ -17,7 +17,7 @@
#include "llvm/IR/InstrTypes.h"
namespace llvm {
- class ICmpInst;
+ class Type;
class Value;
/// Encode a icmp predicate into a three bit mask. These bits are carefully
@@ -43,7 +43,7 @@ namespace llvm {
/// 110 6 A <= B
/// 111 7 Always true
///
- unsigned getICmpCode(const ICmpInst *ICI, bool InvertPred = false);
+ unsigned getICmpCode(CmpInst::Predicate Pred);
/// This is the complement of getICmpCode. It turns a predicate code into
/// either a constant true or false or the predicate for a new ICmp.
@@ -58,6 +58,39 @@ namespace llvm {
/// equality comparison (which is signless).
bool predicatesFoldable(CmpInst::Predicate P1, CmpInst::Predicate P2);
+ /// Similar to getICmpCode but for FCmpInst. This encodes a fcmp predicate
+ /// into a four bit mask.
+ inline unsigned getFCmpCode(CmpInst::Predicate CC) {
+ assert(CmpInst::FCMP_FALSE <= CC && CC <= CmpInst::FCMP_TRUE &&
+ "Unexpected FCmp predicate!");
+ // Take advantage of the bit pattern of CmpInst::Predicate here.
+ // U L G E
+ static_assert(CmpInst::FCMP_FALSE == 0, ""); // 0 0 0 0
+ static_assert(CmpInst::FCMP_OEQ == 1, ""); // 0 0 0 1
+ static_assert(CmpInst::FCMP_OGT == 2, ""); // 0 0 1 0
+ static_assert(CmpInst::FCMP_OGE == 3, ""); // 0 0 1 1
+ static_assert(CmpInst::FCMP_OLT == 4, ""); // 0 1 0 0
+ static_assert(CmpInst::FCMP_OLE == 5, ""); // 0 1 0 1
+ static_assert(CmpInst::FCMP_ONE == 6, ""); // 0 1 1 0
+ static_assert(CmpInst::FCMP_ORD == 7, ""); // 0 1 1 1
+ static_assert(CmpInst::FCMP_UNO == 8, ""); // 1 0 0 0
+ static_assert(CmpInst::FCMP_UEQ == 9, ""); // 1 0 0 1
+ static_assert(CmpInst::FCMP_UGT == 10, ""); // 1 0 1 0
+ static_assert(CmpInst::FCMP_UGE == 11, ""); // 1 0 1 1
+ static_assert(CmpInst::FCMP_ULT == 12, ""); // 1 1 0 0
+ static_assert(CmpInst::FCMP_ULE == 13, ""); // 1 1 0 1
+ static_assert(CmpInst::FCMP_UNE == 14, ""); // 1 1 1 0
+ static_assert(CmpInst::FCMP_TRUE == 15, ""); // 1 1 1 1
+ return CC;
+ }
+
+ /// This is the complement of getFCmpCode. It turns a predicate code into
+ /// either a constant true or false or the predicate for a new FCmp.
+ /// Non-NULL return value will be a true or false constant.
+ /// NULL return means a new ICmp is needed. The predicate is output in Pred.
+ Constant *getPredForFCmpCode(unsigned Code, Type *OpTy,
+ CmpInst::Predicate &Pred);
+
/// Decompose an icmp into the form ((X & Mask) pred 0) if possible. The
/// returned predicate is either == or !=. Returns false if decomposition
/// fails.
diff --git a/llvm/include/llvm/Analysis/CodeMetrics.h b/llvm/include/llvm/Analysis/CodeMetrics.h
index 615591aa83ad..a9431bca1125 100644
--- a/llvm/include/llvm/Analysis/CodeMetrics.h
+++ b/llvm/include/llvm/Analysis/CodeMetrics.h
@@ -15,6 +15,7 @@
#define LLVM_ANALYSIS_CODEMETRICS_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/InstructionCost.h"
namespace llvm {
class AssumptionCache;
@@ -47,14 +48,14 @@ struct CodeMetrics {
/// True if this function calls alloca (in the C sense).
bool usesDynamicAlloca = false;
- /// Number of instructions in the analyzed blocks.
- unsigned NumInsts = false;
+ /// Code size cost of the analyzed blocks.
+ InstructionCost NumInsts = 0;
/// Number of analyzed blocks.
unsigned NumBlocks = false;
/// Keeps track of basic block code size estimates.
- DenseMap<const BasicBlock *, unsigned> NumBBInsts;
+ DenseMap<const BasicBlock *, InstructionCost> NumBBInsts;
/// Keep track of the number of calls to 'big' functions.
unsigned NumCalls = false;
diff --git a/llvm/include/llvm/Analysis/ConstantFolding.h b/llvm/include/llvm/Analysis/ConstantFolding.h
index 37258c80e3a3..23ec7d6b70ec 100644
--- a/llvm/include/llvm/Analysis/ConstantFolding.h
+++ b/llvm/include/llvm/Analysis/ConstantFolding.h
@@ -19,16 +19,18 @@
#ifndef LLVM_ANALYSIS_CONSTANTFOLDING_H
#define LLVM_ANALYSIS_CONSTANTFOLDING_H
+#include <stdint.h>
+
namespace llvm {
class APInt;
template <typename T> class ArrayRef;
class CallBase;
class Constant;
-class ConstantExpr;
class DSOLocalEquivalent;
class DataLayout;
class Function;
class GlobalValue;
+class GlobalVariable;
class Instruction;
class TargetLibraryInfo;
class Type;
@@ -65,14 +67,13 @@ Constant *ConstantFoldInstOperands(Instruction *I, ArrayRef<Constant *> Ops,
const DataLayout &DL,
const TargetLibraryInfo *TLI = nullptr);
-/// ConstantFoldCompareInstOperands - Attempt to constant fold a compare
-/// instruction (icmp/fcmp) with the specified operands. If it fails, it
-/// returns a constant expression of the specified operands.
-///
-Constant *
-ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS,
- Constant *RHS, const DataLayout &DL,
- const TargetLibraryInfo *TLI = nullptr);
+/// Attempt to constant fold a compare instruction (icmp/fcmp) with the
+/// specified operands. If it fails, it returns a constant expression of the
+/// specified operands.
+/// Denormal inputs may be flushed based on the denormal handling mode.
+Constant *ConstantFoldCompareInstOperands(
+ unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL,
+ const TargetLibraryInfo *TLI = nullptr, const Instruction *I = nullptr);
/// Attempt to constant fold a unary operation with the specified
/// operand. If it fails, it returns a constant expression of the specified
@@ -86,6 +87,21 @@ Constant *ConstantFoldUnaryOpOperand(unsigned Opcode, Constant *Op,
Constant *ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS,
Constant *RHS, const DataLayout &DL);
+/// Attempt to constant fold a floating point binary operation with the
+/// specified operands, applying the denormal handling mod to the operands. If
+/// it fails, it returns a constant expression of the specified operands.
+Constant *ConstantFoldFPInstOperands(unsigned Opcode, Constant *LHS,
+ Constant *RHS, const DataLayout &DL,
+ const Instruction *I);
+
+/// Attempt to flush float point constant according to denormal mode set in the
+/// instruction's parent function attributes. If so, return a zero with the
+/// correct sign, otherwise return the original constant. Inputs and outputs to
+/// floating point instructions can have their mode set separately, so the
+/// direction is also needed.
+Constant *FlushFPConstant(Constant *Operand, const Instruction *I,
+ bool IsOutput);
+
/// Attempt to constant fold a select instruction with the specified
/// operands. The constant result is returned if successful; if not, null is
/// returned.
@@ -173,6 +189,8 @@ Constant *ConstantFoldLoadThroughBitcast(Constant *C, Type *DestTy,
/// Check whether the given call has no side-effects.
/// Specifically checks for math routimes which sometimes set errno.
bool isMathLibCallNoop(const CallBase *Call, const TargetLibraryInfo *TLI);
+
+Constant *ReadByteArrayFromGlobal(const GlobalVariable *GV, uint64_t Offset);
}
#endif
diff --git a/llvm/include/llvm/Analysis/ConstraintSystem.h b/llvm/include/llvm/Analysis/ConstraintSystem.h
index d7800f578325..2c83658b81dc 100644
--- a/llvm/include/llvm/Analysis/ConstraintSystem.h
+++ b/llvm/include/llvm/Analysis/ConstraintSystem.h
@@ -11,7 +11,6 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include <string>
@@ -37,7 +36,7 @@ class ConstraintSystem {
bool mayHaveSolutionImpl();
public:
- bool addVariableRow(const SmallVector<int64_t, 8> &R) {
+ bool addVariableRow(ArrayRef<int64_t> R) {
assert(Constraints.empty() || R.size() == Constraints.back().size());
// If all variable coefficients are 0, the constraint does not provide any
// usable information.
@@ -49,11 +48,16 @@ public:
GCD = APIntOps::GreatestCommonDivisor({32, (uint32_t)A}, {32, GCD})
.getZExtValue();
}
- Constraints.push_back(R);
+ Constraints.emplace_back(R.begin(), R.end());
return true;
}
- bool addVariableRowFill(const SmallVector<int64_t, 8> &R) {
+ bool addVariableRowFill(ArrayRef<int64_t> R) {
+ // If all variable coefficients are 0, the constraint does not provide any
+ // usable information.
+ if (all_of(makeArrayRef(R).drop_front(1), [](int64_t C) { return C == 0; }))
+ return false;
+
for (auto &CR : Constraints) {
while (CR.size() != R.size())
CR.push_back(0);
@@ -75,7 +79,14 @@ public:
bool isConditionImplied(SmallVector<int64_t, 8> R) const;
+ ArrayRef<int64_t> getLastConstraint() { return Constraints[0]; }
void popLastConstraint() { Constraints.pop_back(); }
+ void popLastNVariables(unsigned N) {
+ for (auto &C : Constraints) {
+ for (unsigned i = 0; i < N; i++)
+ C.pop_back();
+ }
+ }
/// Returns the number of rows in the constraint system.
unsigned size() const { return Constraints.size(); }
diff --git a/llvm/include/llvm/Analysis/DDG.h b/llvm/include/llvm/Analysis/DDG.h
index c5107da2a017..7649e630b23d 100644
--- a/llvm/include/llvm/Analysis/DDG.h
+++ b/llvm/include/llvm/Analysis/DDG.h
@@ -18,9 +18,11 @@
#include "llvm/Analysis/DependenceAnalysis.h"
#include "llvm/Analysis/DependenceGraphBuilder.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
-#include "llvm/IR/Instructions.h"
namespace llvm {
+class Function;
+class Loop;
+class LoopInfo;
class DDGNode;
class DDGEdge;
using DDGNodeBase = DGNode<DDGNode, DDGEdge>;
diff --git a/llvm/include/llvm/Analysis/DDGPrinter.h b/llvm/include/llvm/Analysis/DDGPrinter.h
index 4477b387fe50..d93c28280bac 100644
--- a/llvm/include/llvm/Analysis/DDGPrinter.h
+++ b/llvm/include/llvm/Analysis/DDGPrinter.h
@@ -16,10 +16,11 @@
#define LLVM_ANALYSIS_DDGPRINTER_H
#include "llvm/Analysis/DDG.h"
-#include "llvm/Pass.h"
#include "llvm/Support/DOTGraphTraits.h"
namespace llvm {
+class LPMUpdater;
+class Loop;
//===--------------------------------------------------------------------===//
// Implementation of DDG DOT Printer for a loop.
diff --git a/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h b/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h
index d8021907b5b2..c35e189de6fc 100644
--- a/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h
+++ b/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h
@@ -14,23 +14,156 @@
#define LLVM_ANALYSIS_DOTGRAPHTRAITSPASS_H
#include "llvm/Analysis/CFGPrinter.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/GraphWriter.h"
namespace llvm {
/// Default traits class for extracting a graph from an analysis pass.
///
+/// This assumes that 'GraphT' is 'AnalysisT::Result *', and pass it through
+template <typename Result, typename GraphT = Result *>
+struct DefaultAnalysisGraphTraits {
+ static GraphT getGraph(Result R) { return &R; }
+};
+
+template <typename GraphT>
+void viewGraphForFunction(Function &F, GraphT Graph, StringRef Name,
+ bool IsSimple) {
+ std::string GraphName = DOTGraphTraits<GraphT *>::getGraphName(&Graph);
+
+ ViewGraph(Graph, Name, IsSimple,
+ GraphName + " for '" + F.getName() + "' function");
+}
+
+template <typename AnalysisT, bool IsSimple,
+ typename GraphT = typename AnalysisT::Result *,
+ typename AnalysisGraphTraitsT =
+ DefaultAnalysisGraphTraits<typename AnalysisT::Result &, GraphT>>
+struct DOTGraphTraitsViewer
+ : PassInfoMixin<DOTGraphTraitsViewer<AnalysisT, IsSimple, GraphT,
+ AnalysisGraphTraitsT>> {
+ DOTGraphTraitsViewer(StringRef GraphName) : Name(GraphName) {}
+
+ /// Return true if this function should be processed.
+ ///
+ /// An implementation of this class my override this function to indicate that
+ /// only certain functions should be viewed.
+ ///
+ /// @param Result The current analysis result for this function.
+ virtual bool processFunction(Function &F,
+ const typename AnalysisT::Result &Result) {
+ return true;
+ }
+
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM) {
+ auto &Result = FAM.getResult<AnalysisT>(F);
+ if (!processFunction(F, Result))
+ return PreservedAnalyses::all();
+
+ GraphT Graph = AnalysisGraphTraitsT::getGraph(Result);
+ viewGraphForFunction(F, Graph, Name, IsSimple);
+
+ return PreservedAnalyses::all();
+ };
+
+protected:
+ /// Avoid compiler warning "has virtual functions but non-virtual destructor
+ /// [-Wnon-virtual-dtor]" in derived classes.
+ ///
+ /// DOTGraphTraitsViewer is also used as a mixin for avoiding repeated
+ /// implementation of viewer passes, ie there should be no
+ /// runtime-polymorphisms/downcasting involving this class and hence no
+ /// virtual destructor needed. Making this dtor protected stops accidental
+ /// invocation when the derived class destructor should have been called.
+ /// Those derived classes sould be marked final to avoid the warning.
+ ~DOTGraphTraitsViewer() {}
+
+private:
+ StringRef Name;
+};
+
+template <typename GraphT>
+void printGraphForFunction(Function &F, GraphT Graph, StringRef Name,
+ bool IsSimple) {
+ std::string Filename = Name.str() + "." + F.getName().str() + ".dot";
+ std::error_code EC;
+
+ errs() << "Writing '" << Filename << "'...";
+
+ raw_fd_ostream File(Filename, EC, sys::fs::OF_TextWithCRLF);
+ std::string GraphName = DOTGraphTraits<GraphT>::getGraphName(Graph);
+
+ if (!EC)
+ WriteGraph(File, Graph, IsSimple,
+ GraphName + " for '" + F.getName() + "' function");
+ else
+ errs() << " error opening file for writing!";
+ errs() << "\n";
+}
+
+template <typename AnalysisT, bool IsSimple,
+ typename GraphT = typename AnalysisT::Result *,
+ typename AnalysisGraphTraitsT =
+ DefaultAnalysisGraphTraits<typename AnalysisT::Result &, GraphT>>
+struct DOTGraphTraitsPrinter
+ : PassInfoMixin<DOTGraphTraitsPrinter<AnalysisT, IsSimple, GraphT,
+ AnalysisGraphTraitsT>> {
+ DOTGraphTraitsPrinter(StringRef GraphName) : Name(GraphName) {}
+
+ /// Return true if this function should be processed.
+ ///
+ /// An implementation of this class my override this function to indicate that
+ /// only certain functions should be viewed.
+ ///
+ /// @param Analysis The current analysis result for this function.
+ virtual bool processFunction(Function &F,
+ const typename AnalysisT::Result &Result) {
+ return true;
+ }
+
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM) {
+ auto &Result = FAM.getResult<AnalysisT>(F);
+ if (!processFunction(F, Result))
+ return PreservedAnalyses::all();
+
+ GraphT Graph = AnalysisGraphTraitsT::getGraph(Result);
+
+ printGraphForFunction(F, Graph, Name, IsSimple);
+
+ return PreservedAnalyses::all();
+ };
+
+protected:
+ /// Avoid compiler warning "has virtual functions but non-virtual destructor
+ /// [-Wnon-virtual-dtor]" in derived classes.
+ ///
+ /// DOTGraphTraitsPrinter is also used as a mixin for avoiding repeated
+ /// implementation of printer passes, ie there should be no
+ /// runtime-polymorphisms/downcasting involving this class and hence no
+ /// virtual destructor needed. Making this dtor protected stops accidental
+ /// invocation when the derived class destructor should have been called.
+ /// Those derived classes sould be marked final to avoid the warning.
+ ~DOTGraphTraitsPrinter() {}
+
+private:
+ StringRef Name;
+};
+
+/// Default traits class for extracting a graph from an analysis pass.
+///
/// This assumes that 'GraphT' is 'AnalysisT *' and so just passes it through.
template <typename AnalysisT, typename GraphT = AnalysisT *>
-struct DefaultAnalysisGraphTraits {
+struct LegacyDefaultAnalysisGraphTraits {
static GraphT getGraph(AnalysisT *A) { return A; }
};
-template <
- typename AnalysisT, bool IsSimple, typename GraphT = AnalysisT *,
- typename AnalysisGraphTraitsT = DefaultAnalysisGraphTraits<AnalysisT, GraphT> >
-class DOTGraphTraitsViewer : public FunctionPass {
+template <typename AnalysisT, bool IsSimple, typename GraphT = AnalysisT *,
+ typename AnalysisGraphTraitsT =
+ LegacyDefaultAnalysisGraphTraits<AnalysisT, GraphT>>
+class DOTGraphTraitsViewerWrapperPass : public FunctionPass {
public:
- DOTGraphTraitsViewer(StringRef GraphName, char &ID)
+ DOTGraphTraitsViewerWrapperPass(StringRef GraphName, char &ID)
: FunctionPass(ID), Name(GraphName) {}
/// Return true if this function should be processed.
@@ -50,10 +183,7 @@ public:
return false;
GraphT Graph = AnalysisGraphTraitsT::getGraph(&Analysis);
- std::string GraphName = DOTGraphTraits<GraphT>::getGraphName(Graph);
- std::string Title = GraphName + " for '" + F.getName().str() + "' function";
-
- ViewGraph(Graph, Name, IsSimple, Title);
+ viewGraphForFunction(F, Graph, Name, IsSimple);
return false;
}
@@ -67,12 +197,12 @@ private:
std::string Name;
};
-template <
- typename AnalysisT, bool IsSimple, typename GraphT = AnalysisT *,
- typename AnalysisGraphTraitsT = DefaultAnalysisGraphTraits<AnalysisT, GraphT> >
-class DOTGraphTraitsPrinter : public FunctionPass {
+template <typename AnalysisT, bool IsSimple, typename GraphT = AnalysisT *,
+ typename AnalysisGraphTraitsT =
+ LegacyDefaultAnalysisGraphTraits<AnalysisT, GraphT>>
+class DOTGraphTraitsPrinterWrapperPass : public FunctionPass {
public:
- DOTGraphTraitsPrinter(StringRef GraphName, char &ID)
+ DOTGraphTraitsPrinterWrapperPass(StringRef GraphName, char &ID)
: FunctionPass(ID), Name(GraphName) {}
/// Return true if this function should be processed.
@@ -92,20 +222,7 @@ public:
return false;
GraphT Graph = AnalysisGraphTraitsT::getGraph(&Analysis);
- std::string Filename = Name + "." + F.getName().str() + ".dot";
- std::error_code EC;
-
- errs() << "Writing '" << Filename << "'...";
-
- raw_fd_ostream File(Filename, EC, sys::fs::OF_TextWithCRLF);
- std::string GraphName = DOTGraphTraits<GraphT>::getGraphName(Graph);
- std::string Title = GraphName + " for '" + F.getName().str() + "' function";
-
- if (!EC)
- WriteGraph(File, Graph, IsSimple, Title);
- else
- errs() << " error opening file for writing!";
- errs() << "\n";
+ printGraphForFunction(F, Graph, Name, IsSimple);
return false;
}
@@ -119,12 +236,12 @@ private:
std::string Name;
};
-template <
- typename AnalysisT, bool IsSimple, typename GraphT = AnalysisT *,
- typename AnalysisGraphTraitsT = DefaultAnalysisGraphTraits<AnalysisT, GraphT> >
-class DOTGraphTraitsModuleViewer : public ModulePass {
+template <typename AnalysisT, bool IsSimple, typename GraphT = AnalysisT *,
+ typename AnalysisGraphTraitsT =
+ LegacyDefaultAnalysisGraphTraits<AnalysisT, GraphT>>
+class DOTGraphTraitsModuleViewerWrapperPass : public ModulePass {
public:
- DOTGraphTraitsModuleViewer(StringRef GraphName, char &ID)
+ DOTGraphTraitsModuleViewerWrapperPass(StringRef GraphName, char &ID)
: ModulePass(ID), Name(GraphName) {}
bool runOnModule(Module &M) override {
@@ -145,12 +262,12 @@ private:
std::string Name;
};
-template <
- typename AnalysisT, bool IsSimple, typename GraphT = AnalysisT *,
- typename AnalysisGraphTraitsT = DefaultAnalysisGraphTraits<AnalysisT, GraphT> >
-class DOTGraphTraitsModulePrinter : public ModulePass {
+template <typename AnalysisT, bool IsSimple, typename GraphT = AnalysisT *,
+ typename AnalysisGraphTraitsT =
+ LegacyDefaultAnalysisGraphTraits<AnalysisT, GraphT>>
+class DOTGraphTraitsModulePrinterWrapperPass : public ModulePass {
public:
- DOTGraphTraitsModulePrinter(StringRef GraphName, char &ID)
+ DOTGraphTraitsModulePrinterWrapperPass(StringRef GraphName, char &ID)
: ModulePass(ID), Name(GraphName) {}
bool runOnModule(Module &M) override {
diff --git a/llvm/include/llvm/Analysis/Delinearization.h b/llvm/include/llvm/Analysis/Delinearization.h
index 6e942530f253..95a36b8b79a4 100644
--- a/llvm/include/llvm/Analysis/Delinearization.h
+++ b/llvm/include/llvm/Analysis/Delinearization.h
@@ -16,11 +16,11 @@
#ifndef LLVM_ANALYSIS_DELINEARIZATION_H
#define LLVM_ANALYSIS_DELINEARIZATION_H
-#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/Support/raw_ostream.h"
namespace llvm {
+class raw_ostream;
+template <typename T> class SmallVectorImpl;
class GetElementPtrInst;
class ScalarEvolution;
class SCEV;
@@ -125,6 +125,17 @@ bool getIndexExpressionsFromGEP(ScalarEvolution &SE,
SmallVectorImpl<const SCEV *> &Subscripts,
SmallVectorImpl<int> &Sizes);
+/// Implementation of fixed size array delinearization. Try to delinearize
+/// access function for a fixed size multi-dimensional array, by deriving
+/// subscripts from GEP instructions. Returns true upon success and false
+/// otherwise. \p Inst is the load/store instruction whose pointer operand is
+/// the one we want to delinearize. \p AccessFn is its corresponding SCEV
+/// expression w.r.t. the surrounding loop.
+bool tryDelinearizeFixedSizeImpl(ScalarEvolution *SE, Instruction *Inst,
+ const SCEV *AccessFn,
+ SmallVectorImpl<const SCEV *> &Subscripts,
+ SmallVectorImpl<int> &Sizes);
+
struct DelinearizationPrinterPass
: public PassInfoMixin<DelinearizationPrinterPass> {
explicit DelinearizationPrinterPass(raw_ostream &OS);
diff --git a/llvm/include/llvm/Analysis/DependenceAnalysis.h b/llvm/include/llvm/Analysis/DependenceAnalysis.h
index 638f4869d677..a34afe9fb38d 100644
--- a/llvm/include/llvm/Analysis/DependenceAnalysis.h
+++ b/llvm/include/llvm/Analysis/DependenceAnalysis.h
@@ -927,9 +927,9 @@ namespace llvm {
bool tryDelinearize(Instruction *Src, Instruction *Dst,
SmallVectorImpl<Subscript> &Pair);
- /// Tries to delinearize access function for a fixed size multi-dimensional
- /// array, by deriving subscripts from GEP instructions. Returns true upon
- /// success and false otherwise.
+ /// Tries to delinearize \p Src and \p Dst access functions for a fixed size
+ /// multi-dimensional array. Calls tryDelinearizeFixedSizeImpl() to
+ /// delinearize \p Src and \p Dst separately,
bool tryDelinearizeFixedSize(Instruction *Src, Instruction *Dst,
const SCEV *SrcAccessFn,
const SCEV *DstAccessFn,
diff --git a/llvm/include/llvm/Analysis/DivergenceAnalysis.h b/llvm/include/llvm/Analysis/DivergenceAnalysis.h
index c52b42ae8dc2..4c2a5399ea54 100644
--- a/llvm/include/llvm/Analysis/DivergenceAnalysis.h
+++ b/llvm/include/llvm/Analysis/DivergenceAnalysis.h
@@ -17,16 +17,16 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/Analysis/SyncDependenceAnalysis.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Pass.h"
+#include "llvm/IR/PassManager.h"
#include <vector>
namespace llvm {
-class Value;
+class Function;
class Instruction;
class Loop;
class raw_ostream;
class TargetTransformInfo;
+class Value;
/// \brief Generic divergence analysis for reducible CFGs.
///
@@ -41,7 +41,7 @@ public:
/// \param RegionLoop if non-null the analysis is restricted to \p RegionLoop.
/// Otherwise the whole function is analyzed.
/// \param IsLCSSAForm whether the analysis may assume that the IR in the
- /// region in in LCSSA form.
+ /// region in LCSSA form.
DivergenceAnalysisImpl(const Function &F, const Loop *RegionLoop,
const DominatorTree &DT, const LoopInfo &LI,
SyncDependenceAnalysis &SDA, bool IsLCSSAForm);
diff --git a/llvm/include/llvm/Analysis/DomPrinter.h b/llvm/include/llvm/Analysis/DomPrinter.h
index e6df12d88072..83fe721346ab 100644
--- a/llvm/include/llvm/Analysis/DomPrinter.h
+++ b/llvm/include/llvm/Analysis/DomPrinter.h
@@ -14,30 +14,120 @@
#ifndef LLVM_ANALYSIS_DOMPRINTER_H
#define LLVM_ANALYSIS_DOMPRINTER_H
+#include "llvm/Analysis/DOTGraphTraitsPass.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
-class DomTreePrinterPass : public PassInfoMixin<DomTreePrinterPass> {
-public:
- PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
+template <>
+struct DOTGraphTraits<DomTreeNode *> : public DefaultDOTGraphTraits {
+
+ DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
+
+ std::string getNodeLabel(DomTreeNode *Node, DomTreeNode *Graph) {
+
+ BasicBlock *BB = Node->getBlock();
+
+ if (!BB)
+ return "Post dominance root node";
+
+ if (isSimple())
+ return DOTGraphTraits<DOTFuncInfo *>::getSimpleNodeLabel(BB, nullptr);
+
+ return DOTGraphTraits<DOTFuncInfo *>::getCompleteNodeLabel(BB, nullptr);
+ }
+};
+
+template <>
+struct DOTGraphTraits<DominatorTree *>
+ : public DOTGraphTraits<DomTreeNode *> {
+
+ DOTGraphTraits(bool isSimple = false)
+ : DOTGraphTraits<DomTreeNode *>(isSimple) {}
+
+ static std::string getGraphName(DominatorTree *DT) {
+ return "Dominator tree";
+ }
+
+ std::string getNodeLabel(DomTreeNode *Node, DominatorTree *G) {
+ return DOTGraphTraits<DomTreeNode *>::getNodeLabel(Node,
+ G->getRootNode());
+ }
+};
+
+template<>
+struct DOTGraphTraits<PostDominatorTree *>
+ : public DOTGraphTraits<DomTreeNode*> {
+
+ DOTGraphTraits (bool isSimple=false)
+ : DOTGraphTraits<DomTreeNode*>(isSimple) {}
+
+ static std::string getGraphName(PostDominatorTree *DT) {
+ return "Post dominator tree";
+ }
+
+ std::string getNodeLabel(DomTreeNode *Node,
+ PostDominatorTree *G) {
+ return DOTGraphTraits<DomTreeNode*>::getNodeLabel(Node, G->getRootNode());
+ }
+};
+
+struct DomViewer final : DOTGraphTraitsViewer<DominatorTreeAnalysis, false> {
+ DomViewer() : DOTGraphTraitsViewer<DominatorTreeAnalysis, false>("dom") {}
+};
+
+struct DomOnlyViewer final : DOTGraphTraitsViewer<DominatorTreeAnalysis, true> {
+ DomOnlyViewer()
+ : DOTGraphTraitsViewer<DominatorTreeAnalysis, true>("domonly") {}
+};
+
+struct PostDomViewer final
+ : DOTGraphTraitsViewer<PostDominatorTreeAnalysis, false> {
+ PostDomViewer()
+ : DOTGraphTraitsViewer<PostDominatorTreeAnalysis, false>("postdom") {}
+};
+
+struct PostDomOnlyViewer final
+ : DOTGraphTraitsViewer<PostDominatorTreeAnalysis, true> {
+ PostDomOnlyViewer()
+ : DOTGraphTraitsViewer<PostDominatorTreeAnalysis, true>("postdomonly") {}
+};
+
+struct DomPrinter final : DOTGraphTraitsPrinter<DominatorTreeAnalysis, false> {
+ DomPrinter() : DOTGraphTraitsPrinter<DominatorTreeAnalysis, false>("dom") {}
+};
+
+struct DomOnlyPrinter final
+ : DOTGraphTraitsPrinter<DominatorTreeAnalysis, true> {
+ DomOnlyPrinter()
+ : DOTGraphTraitsPrinter<DominatorTreeAnalysis, true>("domonly") {}
+};
+
+struct PostDomPrinter final
+ : DOTGraphTraitsPrinter<PostDominatorTreeAnalysis, false> {
+ PostDomPrinter()
+ : DOTGraphTraitsPrinter<PostDominatorTreeAnalysis, false>("postdom") {}
};
-class DomTreeOnlyPrinterPass : public PassInfoMixin<DomTreeOnlyPrinterPass> {
-public:
- PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+struct PostDomOnlyPrinter final
+ : DOTGraphTraitsPrinter<PostDominatorTreeAnalysis, true> {
+ PostDomOnlyPrinter()
+ : DOTGraphTraitsPrinter<PostDominatorTreeAnalysis, true>("postdomonly") {}
};
} // namespace llvm
namespace llvm {
class FunctionPass;
- FunctionPass *createDomPrinterPass();
- FunctionPass *createDomOnlyPrinterPass();
- FunctionPass *createDomViewerPass();
- FunctionPass *createDomOnlyViewerPass();
- FunctionPass *createPostDomPrinterPass();
- FunctionPass *createPostDomOnlyPrinterPass();
- FunctionPass *createPostDomViewerPass();
- FunctionPass *createPostDomOnlyViewerPass();
+ FunctionPass *createDomPrinterWrapperPassPass();
+ FunctionPass *createDomOnlyPrinterWrapperPassPass();
+ FunctionPass *createDomViewerWrapperPassPass();
+ FunctionPass *createDomOnlyViewerWrapperPassPass();
+ FunctionPass *createPostDomPrinterWrapperPassPass();
+ FunctionPass *createPostDomOnlyPrinterWrapperPassPass();
+ FunctionPass *createPostDomViewerWrapperPassPass();
+ FunctionPass *createPostDomOnlyViewerWrapperPassPass();
} // End llvm namespace
#endif
diff --git a/llvm/include/llvm/Analysis/DomTreeUpdater.h b/llvm/include/llvm/Analysis/DomTreeUpdater.h
index d09154d506ed..ddb958455ccd 100644
--- a/llvm/include/llvm/Analysis/DomTreeUpdater.h
+++ b/llvm/include/llvm/Analysis/DomTreeUpdater.h
@@ -150,49 +150,6 @@ public:
/// awaiting deletion immediately.
void recalculate(Function &F);
- /// \deprecated { Submit an edge insertion to all available trees. The Eager
- /// Strategy flushes this update immediately while the Lazy Strategy queues
- /// the update. An internal function checks if the edge exists in the CFG in
- /// DEBUG mode. CAUTION! This function has to be called *after* making the
- /// update on the actual CFG. It is illegal to submit any update that has
- /// already been applied. }
- LLVM_ATTRIBUTE_DEPRECATED(void insertEdge(BasicBlock *From, BasicBlock *To),
- "Use applyUpdates() instead.");
-
- /// \deprecated {Submit an edge insertion to all available trees.
- /// Under either Strategy, an invalid update will be discard silently.
- /// Invalid update means inserting an edge that does not exist in the CFG.
- /// The Eager Strategy flushes this update immediately while the Lazy Strategy
- /// queues the update. It is only recommended to use this method when you
- /// want to discard an invalid update.
- /// CAUTION! It is illegal to submit any update that has already been
- /// submitted. }
- LLVM_ATTRIBUTE_DEPRECATED(void insertEdgeRelaxed(BasicBlock *From,
- BasicBlock *To),
- "Use applyUpdatesPermissive() instead.");
-
- /// \deprecated { Submit an edge deletion to all available trees. The Eager
- /// Strategy flushes this update immediately while the Lazy Strategy queues
- /// the update. An internal function checks if the edge doesn't exist in the
- /// CFG in DEBUG mode.
- /// CAUTION! This function has to be called *after* making the update on the
- /// actual CFG. It is illegal to submit any update that has already been
- /// submitted. }
- LLVM_ATTRIBUTE_DEPRECATED(void deleteEdge(BasicBlock *From, BasicBlock *To),
- "Use applyUpdates() instead.");
-
- /// \deprecated { Submit an edge deletion to all available trees.
- /// Under either Strategy, an invalid update will be discard silently.
- /// Invalid update means deleting an edge that exists in the CFG.
- /// The Eager Strategy flushes this update immediately while the Lazy Strategy
- /// queues the update. It is only recommended to use this method when you
- /// want to discard an invalid update.
- /// CAUTION! It is illegal to submit any update that has already been
- /// submitted. }
- LLVM_ATTRIBUTE_DEPRECATED(void deleteEdgeRelaxed(BasicBlock *From,
- BasicBlock *To),
- "Use applyUpdatesPermissive() instead.");
-
/// Delete DelBB. DelBB will be removed from its Parent and
/// erased from available trees if it exists and finally get deleted.
/// Under Eager UpdateStrategy, DelBB will be processed immediately.
diff --git a/llvm/include/llvm/Analysis/DominanceFrontierImpl.h b/llvm/include/llvm/Analysis/DominanceFrontierImpl.h
index aa764be93b91..7a5f8f31bae3 100644
--- a/llvm/include/llvm/Analysis/DominanceFrontierImpl.h
+++ b/llvm/include/llvm/Analysis/DominanceFrontierImpl.h
@@ -17,7 +17,6 @@
#ifndef LLVM_ANALYSIS_DOMINANCEFRONTIERIMPL_H
#define LLVM_ANALYSIS_DOMINANCEFRONTIERIMPL_H
-#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/DominanceFrontier.h"
#include "llvm/Config/llvm-config.h"
diff --git a/llvm/include/llvm/Analysis/EHPersonalities.h b/llvm/include/llvm/Analysis/EHPersonalities.h
index eaada6627494..660d431bb063 100644
--- a/llvm/include/llvm/Analysis/EHPersonalities.h
+++ b/llvm/include/llvm/Analysis/EHPersonalities.h
@@ -11,7 +11,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/TinyPtrVector.h"
-#include "llvm/Support/ErrorHandling.h"
namespace llvm {
class BasicBlock;
diff --git a/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h b/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h
index cf07c873b17c..a0f5331fdba5 100644
--- a/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h
+++ b/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h
@@ -14,16 +14,33 @@
#ifndef LLVM_ANALYSIS_FUNCTIONPROPERTIESANALYSIS_H
#define LLVM_ANALYSIS_FUNCTIONPROPERTIESANALYSIS_H
-#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
+class DominatorTree;
class Function;
+class LoopInfo;
class FunctionPropertiesInfo {
+ friend class FunctionPropertiesUpdater;
+ void updateForBB(const BasicBlock &BB, int64_t Direction);
+ void updateAggregateStats(const Function &F, const LoopInfo &LI);
+ void reIncludeBB(const BasicBlock &BB);
+
public:
- static FunctionPropertiesInfo getFunctionPropertiesInfo(const Function &F,
- const LoopInfo &LI);
+ static FunctionPropertiesInfo
+ getFunctionPropertiesInfo(const Function &F, FunctionAnalysisManager &FAM);
+
+ bool operator==(const FunctionPropertiesInfo &FPI) const {
+ return std::memcmp(this, &FPI, sizeof(FunctionPropertiesInfo)) == 0;
+ }
+
+ bool operator!=(const FunctionPropertiesInfo &FPI) const {
+ return !(*this == FPI);
+ }
void print(raw_ostream &OS) const;
@@ -57,6 +74,9 @@ public:
// Number of Top Level Loops in the Function
int64_t TopLevelLoopCount = 0;
+
+ // All non-debug instructions
+ int64_t TotalInstructionCount = 0;
};
// Analysis pass
@@ -66,9 +86,9 @@ class FunctionPropertiesAnalysis
public:
static AnalysisKey Key;
- using Result = FunctionPropertiesInfo;
+ using Result = const FunctionPropertiesInfo;
- Result run(Function &F, FunctionAnalysisManager &FAM);
+ FunctionPropertiesInfo run(Function &F, FunctionAnalysisManager &FAM);
};
/// Printer pass for the FunctionPropertiesAnalysis results.
@@ -82,5 +102,24 @@ public:
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
+/// Correctly update FunctionPropertiesInfo post-inlining. A
+/// FunctionPropertiesUpdater keeps the state necessary for tracking the changes
+/// llvm::InlineFunction makes. The idea is that inlining will at most modify
+/// a few BBs of the Caller (maybe the entry BB and definitely the callsite BB)
+/// and potentially affect exception handling BBs in the case of invoke
+/// inlining.
+class FunctionPropertiesUpdater {
+public:
+ FunctionPropertiesUpdater(FunctionPropertiesInfo &FPI, const CallBase &CB);
+
+ void finish(FunctionAnalysisManager &FAM) const;
+
+private:
+ FunctionPropertiesInfo &FPI;
+ const BasicBlock &CallSiteBB;
+ const Function &Caller;
+
+ DenseSet<const BasicBlock *> Successors;
+};
} // namespace llvm
#endif // LLVM_ANALYSIS_FUNCTIONPROPERTIESANALYSIS_H
diff --git a/llvm/include/llvm/Analysis/GlobalsModRef.h b/llvm/include/llvm/Analysis/GlobalsModRef.h
index 7daaa7f484de..4d8ed10bb18e 100644
--- a/llvm/include/llvm/Analysis/GlobalsModRef.h
+++ b/llvm/include/llvm/Analysis/GlobalsModRef.h
@@ -14,15 +14,14 @@
#define LLVM_ANALYSIS_GLOBALSMODREF_H
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
#include <list>
namespace llvm {
class CallGraph;
+class Function;
/// An alias analysis result set for globals.
///
@@ -79,6 +78,8 @@ class GlobalsAAResult : public AAResultBase<GlobalsAAResult> {
const DataLayout &DL,
std::function<const TargetLibraryInfo &(Function &F)> GetTLI);
+ friend struct RecomputeGlobalsAAPass;
+
public:
GlobalsAAResult(GlobalsAAResult &&Arg);
~GlobalsAAResult();
@@ -139,6 +140,10 @@ public:
GlobalsAAResult run(Module &M, ModuleAnalysisManager &AM);
};
+struct RecomputeGlobalsAAPass : PassInfoMixin<RecomputeGlobalsAAPass> {
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
/// Legacy wrapper pass to provide the GlobalsAAResult object.
class GlobalsAAWrapperPass : public ModulePass {
std::unique_ptr<GlobalsAAResult> Result;
diff --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
index 90ab2833e428..a3f1c1335cac 100644
--- a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
+++ b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
@@ -51,12 +51,13 @@
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
#include "llvm/Support/Allocator.h"
namespace llvm {
+class Module;
+
namespace IRSimilarity {
struct IRInstructionDataList;
@@ -546,7 +547,7 @@ struct IRInstructionMapper {
// an outlined function. Also, assume-like intrinsics could be removed
// from the region, removing arguments, causing discrepencies in the
// number of inputs between different regions.
- if (II.isLifetimeStartOrEnd() || II.isAssumeLikeIntrinsic())
+ if (II.isAssumeLikeIntrinsic())
return Illegal;
return EnableIntrinsics ? Legal : Illegal;
}
@@ -559,6 +560,18 @@ struct IRInstructionMapper {
return Illegal;
if (!F && !IsIndirectCall)
return Illegal;
+ // Functions marked with the swifttailcc and tailcc calling conventions
+ // require special handling when outlining musttail functions. The
+ // calling convention must be passed down to the outlined function as
+ // well. Further, there is special handling for musttail calls as well,
+ // requiring a return call directly after. For now, the outliner does not
+ // support this, so we do not handle matching this case either.
+ if ((CI.getCallingConv() == CallingConv::SwiftTail ||
+ CI.getCallingConv() == CallingConv::Tail) &&
+ !EnableMustTailCalls)
+ return Illegal;
+ if (CI.isMustTailCall() && !EnableMustTailCalls)
+ return Illegal;
return Legal;
}
// TODO: We do not current handle similarity that changes the control flow.
@@ -580,6 +593,10 @@ struct IRInstructionMapper {
// Flag that lets the classifier know whether we should allow intrinsics to
// be checked for similarity.
bool EnableIntrinsics = false;
+
+ // Flag that lets the classifier know whether we should allow tail calls to
+ // be checked for similarity.
+ bool EnableMustTailCalls = false;
};
/// Maps an Instruction to a member of InstrType.
@@ -814,8 +831,6 @@ public:
void getBasicBlocks(DenseSet<BasicBlock *> &BBSet) const {
for (IRInstructionData &ID : *this) {
BasicBlock *BB = ID.Inst->getParent();
- if (BBSet.contains(BB))
- continue;
BBSet.insert(BB);
}
}
@@ -826,10 +841,8 @@ public:
SmallVector<BasicBlock *> &BBList) const {
for (IRInstructionData &ID : *this) {
BasicBlock *BB = ID.Inst->getParent();
- if (BBSet.contains(BB))
- continue;
- BBSet.insert(BB);
- BBList.push_back(BB);
+ if (BBSet.insert(BB).second)
+ BBList.push_back(BB);
}
}
@@ -967,11 +980,13 @@ public:
IRSimilarityIdentifier(bool MatchBranches = true,
bool MatchIndirectCalls = true,
bool MatchCallsWithName = false,
- bool MatchIntrinsics = true)
+ bool MatchIntrinsics = true,
+ bool MatchMustTailCalls = true)
: Mapper(&InstDataAllocator, &InstDataListAllocator),
EnableBranches(MatchBranches), EnableIndirectCalls(MatchIndirectCalls),
EnableMatchingCallsByName(MatchCallsWithName),
- EnableIntrinsics(MatchIntrinsics) {}
+ EnableIntrinsics(MatchIntrinsics),
+ EnableMustTailCalls(MatchMustTailCalls) {}
private:
/// Map the instructions in the module to unsigned integers, using mapping
@@ -1024,7 +1039,7 @@ public:
// If we've already analyzed a Module or set of Modules, so we must clear
// the SimilarityCandidates to make sure we do not have only old values
// hanging around.
- if (SimilarityCandidates.hasValue())
+ if (SimilarityCandidates)
SimilarityCandidates->clear();
else
SimilarityCandidates = SimilarityGroupList();
@@ -1064,6 +1079,10 @@ private:
/// similarity.
bool EnableIntrinsics = true;
+ // The flag variable that marks whether we should allow tailcalls
+ // to be checked for similarity.
+ bool EnableMustTailCalls = false;
+
/// The SimilarityGroups found with the most recent run of \ref
/// findSimilarity. None if there is no recent run.
Optional<SimilarityGroupList> SimilarityCandidates;
diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h
index dec488a6f26d..231d3bbf534b 100644
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -13,27 +13,23 @@
#ifndef LLVM_ANALYSIS_IVDESCRIPTORS_H
#define LLVM_ANALYSIS_IVDESCRIPTORS_H
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instruction.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Operator.h"
#include "llvm/IR/ValueHandle.h"
-#include "llvm/Support/Casting.h"
namespace llvm {
-class DemandedBits;
class AssumptionCache;
+class DemandedBits;
+class DominatorTree;
+class Instruction;
class Loop;
class PredicatedScalarEvolution;
class ScalarEvolution;
class SCEV;
-class DominatorTree;
+class StoreInst;
/// These are the kinds of recurrences that we support.
enum class RecurKind {
@@ -74,14 +70,14 @@ class RecurrenceDescriptor {
public:
RecurrenceDescriptor() = default;
- RecurrenceDescriptor(Value *Start, Instruction *Exit, RecurKind K,
- FastMathFlags FMF, Instruction *ExactFP, Type *RT,
- bool Signed, bool Ordered,
+ RecurrenceDescriptor(Value *Start, Instruction *Exit, StoreInst *Store,
+ RecurKind K, FastMathFlags FMF, Instruction *ExactFP,
+ Type *RT, bool Signed, bool Ordered,
SmallPtrSetImpl<Instruction *> &CI,
unsigned MinWidthCastToRecurTy)
- : StartValue(Start), LoopExitInstr(Exit), Kind(K), FMF(FMF),
- ExactFPMathInst(ExactFP), RecurrenceType(RT), IsSigned(Signed),
- IsOrdered(Ordered),
+ : IntermediateStore(Store), StartValue(Start), LoopExitInstr(Exit),
+ Kind(K), FMF(FMF), ExactFPMathInst(ExactFP), RecurrenceType(RT),
+ IsSigned(Signed), IsOrdered(Ordered),
MinWidthCastToRecurrenceType(MinWidthCastToRecurTy) {
CastInsts.insert(CI.begin(), CI.end());
}
@@ -168,22 +164,21 @@ public:
/// RecurrenceDescriptor. If either \p DB is non-null or \p AC and \p DT are
/// non-null, the minimal bit width needed to compute the reduction will be
/// computed.
- static bool AddReductionVar(PHINode *Phi, RecurKind Kind, Loop *TheLoop,
- FastMathFlags FuncFMF,
- RecurrenceDescriptor &RedDes,
- DemandedBits *DB = nullptr,
- AssumptionCache *AC = nullptr,
- DominatorTree *DT = nullptr);
+ static bool
+ AddReductionVar(PHINode *Phi, RecurKind Kind, Loop *TheLoop,
+ FastMathFlags FuncFMF, RecurrenceDescriptor &RedDes,
+ DemandedBits *DB = nullptr, AssumptionCache *AC = nullptr,
+ DominatorTree *DT = nullptr, ScalarEvolution *SE = nullptr);
/// Returns true if Phi is a reduction in TheLoop. The RecurrenceDescriptor
/// is returned in RedDes. If either \p DB is non-null or \p AC and \p DT are
/// non-null, the minimal bit width needed to compute the reduction will be
- /// computed.
- static bool isReductionPHI(PHINode *Phi, Loop *TheLoop,
- RecurrenceDescriptor &RedDes,
- DemandedBits *DB = nullptr,
- AssumptionCache *AC = nullptr,
- DominatorTree *DT = nullptr);
+ /// computed. If \p SE is non-null, store instructions to loop invariant
+ /// addresses are processed.
+ static bool
+ isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes,
+ DemandedBits *DB = nullptr, AssumptionCache *AC = nullptr,
+ DominatorTree *DT = nullptr, ScalarEvolution *SE = nullptr);
/// Returns true if Phi is a first-order recurrence. A first-order recurrence
/// is a non-reduction recurrence relation in which the value of the
@@ -275,6 +270,11 @@ public:
cast<IntrinsicInst>(I)->getIntrinsicID() == Intrinsic::fmuladd;
}
+ /// Reductions may store temporary or final result to an invariant address.
+ /// If there is such a store in the loop then, after successfull run of
+ /// AddReductionVar method, this field will be assigned the last met store.
+ StoreInst *IntermediateStore = nullptr;
+
private:
// The starting value of the recurrence.
// It does not have to be zero!
diff --git a/llvm/include/llvm/Analysis/IVUsers.h b/llvm/include/llvm/Analysis/IVUsers.h
index 390d09848dde..e5a496037691 100644
--- a/llvm/include/llvm/Analysis/IVUsers.h
+++ b/llvm/include/llvm/Analysis/IVUsers.h
@@ -23,8 +23,6 @@ namespace llvm {
class AssumptionCache;
class DominatorTree;
-class Instruction;
-class Value;
class ScalarEvolution;
class SCEV;
class IVUsers;
diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h
index 0103ee7f8386..31524126027b 100644
--- a/llvm/include/llvm/Analysis/InlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/InlineAdvisor.h
@@ -9,19 +9,20 @@
#ifndef LLVM_ANALYSIS_INLINEADVISOR_H
#define LLVM_ANALYSIS_INLINEADVISOR_H
+#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/LazyCallGraph.h"
-#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/PassManager.h"
#include <memory>
-#include <unordered_set>
namespace llvm {
class BasicBlock;
class CallBase;
class Function;
class Module;
+class OptimizationRemark;
+class ImportedFunctionsInliningStatistics;
class OptimizationRemarkEmitter;
struct ReplayInlinerSettings;
@@ -40,6 +41,28 @@ struct ReplayInlinerSettings;
/// training.
enum class InliningAdvisorMode : int { Default, Release, Development };
+// Each entry represents an inline driver.
+enum class InlinePass : int {
+ AlwaysInliner,
+ CGSCCInliner,
+ EarlyInliner,
+ ModuleInliner,
+ MLInliner,
+ ReplayCGSCCInliner,
+ ReplaySampleProfileInliner,
+ SampleProfileInliner,
+};
+
+/// Provides context on when an inline advisor is constructed in the pipeline
+/// (e.g., link phase, inline driver).
+struct InlineContext {
+ ThinOrFullLTOPhase LTOPhase;
+
+ InlinePass Pass;
+};
+
+std::string AnnotateInlinePassName(InlineContext IC);
+
class InlineAdvisor;
/// Capture state between an inlining decision having had been made, and
/// its impact being observable. When collecting model training data, this
@@ -122,7 +145,7 @@ public:
DefaultInlineAdvice(InlineAdvisor *Advisor, CallBase &CB,
Optional<InlineCost> OIC, OptimizationRemarkEmitter &ORE,
bool EmitRemarks = true)
- : InlineAdvice(Advisor, CB, ORE, OIC.hasValue()), OriginalCB(&CB),
+ : InlineAdvice(Advisor, CB, ORE, OIC.has_value()), OriginalCB(&CB),
OIC(OIC), EmitRemarks(EmitRemarks) {}
private:
@@ -158,7 +181,7 @@ public:
/// This must be called when the Inliner pass is entered, to allow the
/// InlineAdvisor update internal state, as result of function passes run
/// between Inliner pass runs (for the same module).
- virtual void onPassEntry() {}
+ virtual void onPassEntry(LazyCallGraph::SCC *SCC = nullptr) {}
/// This must be called when the Inliner pass is exited, as function passes
/// may be run subsequently. This allows an implementation of InlineAdvisor
@@ -170,14 +193,22 @@ public:
OS << "Unimplemented InlineAdvisor print\n";
}
+ /// NOTE pass name is annotated only when inline advisor constructor provides InlineContext.
+ const char *getAnnotatedInlinePassName() const {
+ return AnnotatedInlinePassName.c_str();
+ }
+
protected:
- InlineAdvisor(Module &M, FunctionAnalysisManager &FAM);
+ InlineAdvisor(Module &M, FunctionAnalysisManager &FAM,
+ Optional<InlineContext> IC = NoneType::None);
virtual std::unique_ptr<InlineAdvice> getAdviceImpl(CallBase &CB) = 0;
virtual std::unique_ptr<InlineAdvice> getMandatoryAdvice(CallBase &CB,
bool Advice);
Module &M;
FunctionAnalysisManager &FAM;
+ const Optional<InlineContext> IC;
+ const std::string AnnotatedInlinePassName;
std::unique_ptr<ImportedFunctionsInliningStatistics> ImportedFunctionsStats;
enum class MandatoryInliningKind { NotMandatory, Always, Never };
@@ -198,8 +229,8 @@ private:
class DefaultInlineAdvisor : public InlineAdvisor {
public:
DefaultInlineAdvisor(Module &M, FunctionAnalysisManager &FAM,
- InlineParams Params)
- : InlineAdvisor(M, FAM), Params(Params) {}
+ InlineParams Params, InlineContext IC)
+ : InlineAdvisor(M, FAM, IC), Params(Params) {}
private:
std::unique_ptr<InlineAdvice> getAdviceImpl(CallBase &CB) override;
@@ -223,7 +254,8 @@ public:
return !PAC.preservedWhenStateless();
}
bool tryCreate(InlineParams Params, InliningAdvisorMode Mode,
- const ReplayInlinerSettings &ReplaySettings);
+ const ReplayInlinerSettings &ReplaySettings,
+ InlineContext IC);
InlineAdvisor *getAdvisor() const { return Advisor.get(); }
private:
@@ -244,6 +276,9 @@ public:
explicit InlineAdvisorAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {}
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
+
+ PreservedAnalyses run(LazyCallGraph::SCC &InitialC, CGSCCAnalysisManager &AM,
+ LazyCallGraph &CG, CGSCCUpdateResult &UR);
};
std::unique_ptr<InlineAdvisor>
diff --git a/llvm/include/llvm/Analysis/InlineCost.h b/llvm/include/llvm/Analysis/InlineCost.h
index f86ee5a14874..756f1fb61f95 100644
--- a/llvm/include/llvm/Analysis/InlineCost.h
+++ b/llvm/include/llvm/Analysis/InlineCost.h
@@ -13,14 +13,17 @@
#ifndef LLVM_ANALYSIS_INLINECOST_H
#define LLVM_ANALYSIS_INLINECOST_H
-#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/Analysis/InlineModelFeatureMaps.h"
-#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/IR/PassManager.h"
#include <cassert>
#include <climits>
namespace llvm {
+class AssumptionCache;
+class OptimizationRemarkEmitter;
class BlockFrequencyInfo;
class CallBase;
class DataLayout;
@@ -52,6 +55,9 @@ const unsigned TotalAllocaSizeRecursiveCaller = 1024;
/// Do not inline dynamic allocas that have been constant propagated to be
/// static allocas above this amount in bytes.
const uint64_t MaxSimplifiedDynamicAllocaToInline = 65536;
+
+const char FunctionInlineCostMultiplierAttributeName[] =
+ "function-inline-cost-multiplier";
} // namespace InlineConstants
// The cost-benefit pair computed by cost-benefit analysis.
@@ -217,6 +223,8 @@ struct InlineParams {
Optional<bool> AllowRecursiveCall = false;
};
+Optional<int> getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind);
+
/// Generate the parameters to tune the inline cost analysis based only on the
/// commandline options.
InlineParams getInlineParams();
diff --git a/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h b/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h
index 1afa8a825f15..fb8236c28b25 100644
--- a/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h
+++ b/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h
@@ -10,6 +10,8 @@
#ifndef LLVM_ANALYSIS_INLINEMODELFEATUREMAPS_H
#define LLVM_ANALYSIS_INLINEMODELFEATUREMAPS_H
+#include "llvm/Analysis/TensorSpec.h"
+
#include <array>
#include <string>
#include <vector>
@@ -127,7 +129,7 @@ inlineCostFeatureToMlFeature(InlineCostFeatureIndex Feature) {
constexpr size_t NumberOfFeatures =
static_cast<size_t>(FeatureIndex::NumberOfFeatures);
-extern const std::array<std::string, NumberOfFeatures> FeatureNameMap;
+extern const std::array<TensorSpec, NumberOfFeatures> FeatureMap;
extern const char *const DecisionName;
extern const char *const DefaultDecisionName;
diff --git a/llvm/include/llvm/Analysis/InlineOrder.h b/llvm/include/llvm/Analysis/InlineOrder.h
index 84252bcf1b06..aabd86c98780 100644
--- a/llvm/include/llvm/Analysis/InlineOrder.h
+++ b/llvm/include/llvm/Analysis/InlineOrder.h
@@ -10,10 +10,9 @@
#define LLVM_ANALYSIS_INLINEORDER_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
+#include "llvm/IR/InstrTypes.h"
#include <algorithm>
#include <utility>
@@ -71,34 +70,52 @@ private:
size_t FirstIndex = 0;
};
-class InlineSizePriority {
+class InlinePriority {
public:
- InlineSizePriority(int Size) : Size(Size) {}
+ virtual ~InlinePriority() = default;
+ virtual bool hasLowerPriority(const CallBase *L, const CallBase *R) const = 0;
+ virtual void update(const CallBase *CB) = 0;
+ virtual bool updateAndCheckDecreased(const CallBase *CB) = 0;
+};
- static bool isMoreDesirable(const InlineSizePriority &S1,
- const InlineSizePriority &S2) {
- return S1.Size < S2.Size;
- }
+class SizePriority : public InlinePriority {
+ using PriorityT = unsigned;
+ DenseMap<const CallBase *, PriorityT> Priorities;
- static InlineSizePriority evaluate(CallBase *CB) {
+ static PriorityT evaluate(const CallBase *CB) {
Function *Callee = CB->getCalledFunction();
- return InlineSizePriority(Callee->getInstructionCount());
+ return Callee->getInstructionCount();
+ }
+
+ static bool isMoreDesirable(const PriorityT &P1, const PriorityT &P2) {
+ return P1 < P2;
}
- int Size;
+ bool hasLowerPriority(const CallBase *L, const CallBase *R) const override {
+ const auto I1 = Priorities.find(L);
+ const auto I2 = Priorities.find(R);
+ assert(I1 != Priorities.end() && I2 != Priorities.end());
+ return isMoreDesirable(I2->second, I1->second);
+ }
+
+public:
+ // Update the priority associated with CB.
+ void update(const CallBase *CB) override { Priorities[CB] = evaluate(CB); };
+
+ bool updateAndCheckDecreased(const CallBase *CB) override {
+ auto It = Priorities.find(CB);
+ const auto OldPriority = It->second;
+ It->second = evaluate(CB);
+ const auto NewPriority = It->second;
+ return isMoreDesirable(OldPriority, NewPriority);
+ }
};
-template <typename PriorityT>
class PriorityInlineOrder : public InlineOrder<std::pair<CallBase *, int>> {
using T = std::pair<CallBase *, int>;
- using HeapT = std::pair<CallBase *, PriorityT>;
using reference = T &;
using const_reference = const T &;
- static bool cmp(const HeapT &P1, const HeapT &P2) {
- return PriorityT::isMoreDesirable(P2.second, P1.second);
- }
-
// A call site could become less desirable for inlining because of the size
// growth from prior inlining into the callee. This method is used to lazily
// update the desirability of a call site if it's decreasing. It is only
@@ -107,31 +124,29 @@ class PriorityInlineOrder : public InlineOrder<std::pair<CallBase *, int>> {
// pushed right back into the heap. For simplicity, those cases where
// the desirability of a call site increases are ignored here.
void adjust() {
- bool Changed = false;
- do {
- CallBase *CB = Heap.front().first;
- const PriorityT PreviousGoodness = Heap.front().second;
- const PriorityT CurrentGoodness = PriorityT::evaluate(CB);
- Changed = PriorityT::isMoreDesirable(PreviousGoodness, CurrentGoodness);
- if (Changed) {
- std::pop_heap(Heap.begin(), Heap.end(), cmp);
- Heap.pop_back();
- Heap.push_back({CB, CurrentGoodness});
- std::push_heap(Heap.begin(), Heap.end(), cmp);
- }
- } while (Changed);
+ while (PriorityPtr->updateAndCheckDecreased(Heap.front())) {
+ std::pop_heap(Heap.begin(), Heap.end(), isLess);
+ std::push_heap(Heap.begin(), Heap.end(), isLess);
+ }
}
public:
+ PriorityInlineOrder(std::unique_ptr<InlinePriority> PriorityPtr)
+ : PriorityPtr(std::move(PriorityPtr)) {
+ isLess = [this](const CallBase *L, const CallBase *R) {
+ return this->PriorityPtr->hasLowerPriority(L, R);
+ };
+ }
+
size_t size() override { return Heap.size(); }
void push(const T &Elt) override {
CallBase *CB = Elt.first;
const int InlineHistoryID = Elt.second;
- const PriorityT Goodness = PriorityT::evaluate(CB);
- Heap.push_back({CB, Goodness});
- std::push_heap(Heap.begin(), Heap.end(), cmp);
+ Heap.push_back(CB);
+ PriorityPtr->update(CB);
+ std::push_heap(Heap.begin(), Heap.end(), isLess);
InlineHistoryMap[CB] = InlineHistoryID;
}
@@ -139,10 +154,10 @@ public:
assert(size() > 0);
adjust();
- CallBase *CB = Heap.front().first;
+ CallBase *CB = Heap.front();
T Result = std::make_pair(CB, InlineHistoryMap[CB]);
InlineHistoryMap.erase(CB);
- std::pop_heap(Heap.begin(), Heap.end(), cmp);
+ std::pop_heap(Heap.begin(), Heap.end(), isLess);
Heap.pop_back();
return Result;
}
@@ -151,21 +166,23 @@ public:
assert(size() > 0);
adjust();
- CallBase *CB = Heap.front().first;
+ CallBase *CB = Heap.front();
return *InlineHistoryMap.find(CB);
}
void erase_if(function_ref<bool(T)> Pred) override {
- auto PredWrapper = [=](HeapT P) -> bool {
- return Pred(std::make_pair(P.first, 0));
+ auto PredWrapper = [=](CallBase *CB) -> bool {
+ return Pred(std::make_pair(CB, 0));
};
llvm::erase_if(Heap, PredWrapper);
- std::make_heap(Heap.begin(), Heap.end(), cmp);
+ std::make_heap(Heap.begin(), Heap.end(), isLess);
}
private:
- SmallVector<HeapT, 16> Heap;
+ SmallVector<CallBase *, 16> Heap;
+ std::function<bool(const CallBase *L, const CallBase *R)> isLess;
DenseMap<CallBase *, int> InlineHistoryMap;
+ std::unique_ptr<InlinePriority> PriorityPtr;
};
} // namespace llvm
#endif // LLVM_ANALYSIS_INLINEORDER_H
diff --git a/llvm/include/llvm/Analysis/InstSimplifyFolder.h b/llvm/include/llvm/Analysis/InstSimplifyFolder.h
index 54ef1ddf6085..d4ea7d73ec92 100644
--- a/llvm/include/llvm/Analysis/InstSimplifyFolder.h
+++ b/llvm/include/llvm/Analysis/InstSimplifyFolder.h
@@ -22,12 +22,11 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetFolder.h"
-#include "llvm/IR/Constants.h"
#include "llvm/IR/IRBuilderFolder.h"
-#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
namespace llvm {
+class Constant;
/// InstSimplifyFolder - Use InstructionSimplify to fold operations to existing
/// values. Also applies target-specific constant folding when not using
@@ -47,108 +46,74 @@ public:
// Return an existing value or a constant if the operation can be simplified.
// Otherwise return nullptr.
//===--------------------------------------------------------------------===//
- Value *FoldAdd(Value *LHS, Value *RHS, bool HasNUW = false,
- bool HasNSW = false) const override {
- return SimplifyAddInst(LHS, RHS, HasNUW, HasNSW, SQ);
+
+ Value *FoldBinOp(Instruction::BinaryOps Opc, Value *LHS,
+ Value *RHS) const override {
+ return simplifyBinOp(Opc, LHS, RHS, SQ);
+ }
+
+ Value *FoldExactBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS,
+ bool IsExact) const override {
+ return simplifyBinOp(Opc, LHS, RHS, SQ);
}
- Value *FoldAnd(Value *LHS, Value *RHS) const override {
- return SimplifyAndInst(LHS, RHS, SQ);
+ Value *FoldNoWrapBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS,
+ bool HasNUW, bool HasNSW) const override {
+ return simplifyBinOp(Opc, LHS, RHS, SQ);
}
- Value *FoldOr(Value *LHS, Value *RHS) const override {
- return SimplifyOrInst(LHS, RHS, SQ);
+ Value *FoldBinOpFMF(Instruction::BinaryOps Opc, Value *LHS, Value *RHS,
+ FastMathFlags FMF) const override {
+ return simplifyBinOp(Opc, LHS, RHS, FMF, SQ);
}
Value *FoldICmp(CmpInst::Predicate P, Value *LHS, Value *RHS) const override {
- return SimplifyICmpInst(P, LHS, RHS, SQ);
+ return simplifyICmpInst(P, LHS, RHS, SQ);
}
Value *FoldGEP(Type *Ty, Value *Ptr, ArrayRef<Value *> IdxList,
bool IsInBounds = false) const override {
- return SimplifyGEPInst(Ty, Ptr, IdxList, IsInBounds, SQ);
+ return simplifyGEPInst(Ty, Ptr, IdxList, IsInBounds, SQ);
}
Value *FoldSelect(Value *C, Value *True, Value *False) const override {
- return SimplifySelectInst(C, True, False, SQ);
+ return simplifySelectInst(C, True, False, SQ);
}
- //===--------------------------------------------------------------------===//
- // Binary Operators
- //===--------------------------------------------------------------------===//
+ Value *FoldExtractValue(Value *Agg,
+ ArrayRef<unsigned> IdxList) const override {
+ return simplifyExtractValueInst(Agg, IdxList, SQ);
+ };
- Value *CreateFAdd(Constant *LHS, Constant *RHS) const override {
- return ConstFolder.CreateFAdd(LHS, RHS);
- }
- Value *CreateSub(Constant *LHS, Constant *RHS, bool HasNUW = false,
- bool HasNSW = false) const override {
- return ConstFolder.CreateSub(LHS, RHS, HasNUW, HasNSW);
- }
- Value *CreateFSub(Constant *LHS, Constant *RHS) const override {
- return ConstFolder.CreateFSub(LHS, RHS);
- }
- Value *CreateMul(Constant *LHS, Constant *RHS, bool HasNUW = false,
- bool HasNSW = false) const override {
- return ConstFolder.CreateMul(LHS, RHS, HasNUW, HasNSW);
- }
- Value *CreateFMul(Constant *LHS, Constant *RHS) const override {
- return ConstFolder.CreateFMul(LHS, RHS);
- }
- Value *CreateUDiv(Constant *LHS, Constant *RHS,
- bool isExact = false) const override {
- return ConstFolder.CreateUDiv(LHS, RHS, isExact);
- }
- Value *CreateSDiv(Constant *LHS, Constant *RHS,
- bool isExact = false) const override {
- return ConstFolder.CreateSDiv(LHS, RHS, isExact);
- }
- Value *CreateFDiv(Constant *LHS, Constant *RHS) const override {
- return ConstFolder.CreateFDiv(LHS, RHS);
- }
- Value *CreateURem(Constant *LHS, Constant *RHS) const override {
- return ConstFolder.CreateURem(LHS, RHS);
- }
- Value *CreateSRem(Constant *LHS, Constant *RHS) const override {
- return ConstFolder.CreateSRem(LHS, RHS);
- }
- Value *CreateFRem(Constant *LHS, Constant *RHS) const override {
- return ConstFolder.CreateFRem(LHS, RHS);
- }
- Value *CreateShl(Constant *LHS, Constant *RHS, bool HasNUW = false,
- bool HasNSW = false) const override {
- return ConstFolder.CreateShl(LHS, RHS, HasNUW, HasNSW);
- }
- Value *CreateLShr(Constant *LHS, Constant *RHS,
- bool isExact = false) const override {
- return ConstFolder.CreateLShr(LHS, RHS, isExact);
+ Value *FoldInsertValue(Value *Agg, Value *Val,
+ ArrayRef<unsigned> IdxList) const override {
+ return simplifyInsertValueInst(Agg, Val, IdxList, SQ);
}
- Value *CreateAShr(Constant *LHS, Constant *RHS,
- bool isExact = false) const override {
- return ConstFolder.CreateAShr(LHS, RHS, isExact);
+
+ Value *FoldExtractElement(Value *Vec, Value *Idx) const override {
+ return simplifyExtractElementInst(Vec, Idx, SQ);
}
- Value *CreateXor(Constant *LHS, Constant *RHS) const override {
- return ConstFolder.CreateXor(LHS, RHS);
+
+ Value *FoldInsertElement(Value *Vec, Value *NewElt,
+ Value *Idx) const override {
+ return simplifyInsertElementInst(Vec, NewElt, Idx, SQ);
}
- Value *CreateBinOp(Instruction::BinaryOps Opc, Constant *LHS,
- Constant *RHS) const override {
- return ConstFolder.CreateBinOp(Opc, LHS, RHS);
+ Value *FoldShuffleVector(Value *V1, Value *V2,
+ ArrayRef<int> Mask) const override {
+ Type *RetTy = VectorType::get(
+ cast<VectorType>(V1->getType())->getElementType(), Mask.size(),
+ isa<ScalableVectorType>(V1->getType()));
+ return simplifyShuffleVectorInst(V1, V2, Mask, RetTy, SQ);
}
//===--------------------------------------------------------------------===//
// Unary Operators
//===--------------------------------------------------------------------===//
- Value *CreateNeg(Constant *C, bool HasNUW = false,
- bool HasNSW = false) const override {
- return ConstFolder.CreateNeg(C, HasNUW, HasNSW);
- }
Value *CreateFNeg(Constant *C) const override {
return ConstFolder.CreateFNeg(C);
}
- Value *CreateNot(Constant *C) const override {
- return ConstFolder.CreateNot(C);
- }
Value *CreateUnOp(Instruction::UnaryOps Opc, Constant *C) const override {
return ConstFolder.CreateUnOp(Opc, C);
@@ -220,34 +185,6 @@ public:
Constant *RHS) const override {
return ConstFolder.CreateFCmp(P, LHS, RHS);
}
-
- //===--------------------------------------------------------------------===//
- // Other Instructions
- //===--------------------------------------------------------------------===//
-
- Value *CreateExtractElement(Constant *Vec, Constant *Idx) const override {
- return ConstFolder.CreateExtractElement(Vec, Idx);
- }
-
- Value *CreateInsertElement(Constant *Vec, Constant *NewElt,
- Constant *Idx) const override {
- return ConstFolder.CreateInsertElement(Vec, NewElt, Idx);
- }
-
- Value *CreateShuffleVector(Constant *V1, Constant *V2,
- ArrayRef<int> Mask) const override {
- return ConstFolder.CreateShuffleVector(V1, V2, Mask);
- }
-
- Value *CreateExtractValue(Constant *Agg,
- ArrayRef<unsigned> IdxList) const override {
- return ConstFolder.CreateExtractValue(Agg, IdxList);
- }
-
- Value *CreateInsertValue(Constant *Agg, Constant *Val,
- ArrayRef<unsigned> IdxList) const override {
- return ConstFolder.CreateInsertValue(Agg, Val, IdxList);
- }
};
} // end namespace llvm
diff --git a/llvm/include/llvm/Analysis/InstructionSimplify.h b/llvm/include/llvm/Analysis/InstructionSimplify.h
index 8b49c115f101..52d43bf5c2a6 100644
--- a/llvm/include/llvm/Analysis/InstructionSimplify.h
+++ b/llvm/include/llvm/Analysis/InstructionSimplify.h
@@ -35,8 +35,6 @@
#ifndef LLVM_ANALYSIS_INSTRUCTIONSIMPLIFY_H
#define LLVM_ANALYSIS_INSTRUCTIONSIMPLIFY_H
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
namespace llvm {
@@ -49,6 +47,7 @@ class CallBase;
class DataLayout;
class DominatorTree;
class Function;
+class Instruction;
struct LoopStandardAnalysisResults;
class MDNode;
class OptimizationRemarkEmitter;
@@ -145,176 +144,185 @@ struct SimplifyQuery {
// Please use the SimplifyQuery versions in new code.
/// Given operand for an FNeg, fold the result or return null.
-Value *SimplifyFNegInst(Value *Op, FastMathFlags FMF, const SimplifyQuery &Q);
+Value *simplifyFNegInst(Value *Op, FastMathFlags FMF, const SimplifyQuery &Q);
/// Given operands for an Add, fold the result or return null.
-Value *SimplifyAddInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW,
+Value *simplifyAddInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW,
const SimplifyQuery &Q);
/// Given operands for a Sub, fold the result or return null.
-Value *SimplifySubInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW,
+Value *simplifySubInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW,
const SimplifyQuery &Q);
/// Given operands for an FAdd, fold the result or return null.
Value *
-SimplifyFAddInst(Value *LHS, Value *RHS, FastMathFlags FMF,
+simplifyFAddInst(Value *LHS, Value *RHS, FastMathFlags FMF,
const SimplifyQuery &Q,
fp::ExceptionBehavior ExBehavior = fp::ebIgnore,
RoundingMode Rounding = RoundingMode::NearestTiesToEven);
/// Given operands for an FSub, fold the result or return null.
Value *
-SimplifyFSubInst(Value *LHS, Value *RHS, FastMathFlags FMF,
+simplifyFSubInst(Value *LHS, Value *RHS, FastMathFlags FMF,
const SimplifyQuery &Q,
fp::ExceptionBehavior ExBehavior = fp::ebIgnore,
RoundingMode Rounding = RoundingMode::NearestTiesToEven);
/// Given operands for an FMul, fold the result or return null.
Value *
-SimplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF,
+simplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF,
const SimplifyQuery &Q,
fp::ExceptionBehavior ExBehavior = fp::ebIgnore,
RoundingMode Rounding = RoundingMode::NearestTiesToEven);
/// Given operands for the multiplication of a FMA, fold the result or return
-/// null. In contrast to SimplifyFMulInst, this function will not perform
+/// null. In contrast to simplifyFMulInst, this function will not perform
/// simplifications whose unrounded results differ when rounded to the argument
/// type.
-Value *SimplifyFMAFMul(Value *LHS, Value *RHS, FastMathFlags FMF,
+Value *simplifyFMAFMul(Value *LHS, Value *RHS, FastMathFlags FMF,
const SimplifyQuery &Q,
fp::ExceptionBehavior ExBehavior = fp::ebIgnore,
RoundingMode Rounding = RoundingMode::NearestTiesToEven);
/// Given operands for a Mul, fold the result or return null.
-Value *SimplifyMulInst(Value *LHS, Value *RHS, const SimplifyQuery &Q);
+Value *simplifyMulInst(Value *LHS, Value *RHS, const SimplifyQuery &Q);
/// Given operands for an SDiv, fold the result or return null.
-Value *SimplifySDivInst(Value *LHS, Value *RHS, const SimplifyQuery &Q);
+Value *simplifySDivInst(Value *LHS, Value *RHS, const SimplifyQuery &Q);
/// Given operands for a UDiv, fold the result or return null.
-Value *SimplifyUDivInst(Value *LHS, Value *RHS, const SimplifyQuery &Q);
+Value *simplifyUDivInst(Value *LHS, Value *RHS, const SimplifyQuery &Q);
/// Given operands for an FDiv, fold the result or return null.
Value *
-SimplifyFDivInst(Value *LHS, Value *RHS, FastMathFlags FMF,
+simplifyFDivInst(Value *LHS, Value *RHS, FastMathFlags FMF,
const SimplifyQuery &Q,
fp::ExceptionBehavior ExBehavior = fp::ebIgnore,
RoundingMode Rounding = RoundingMode::NearestTiesToEven);
/// Given operands for an SRem, fold the result or return null.
-Value *SimplifySRemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q);
+Value *simplifySRemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q);
/// Given operands for a URem, fold the result or return null.
-Value *SimplifyURemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q);
+Value *simplifyURemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q);
/// Given operands for an FRem, fold the result or return null.
Value *
-SimplifyFRemInst(Value *LHS, Value *RHS, FastMathFlags FMF,
+simplifyFRemInst(Value *LHS, Value *RHS, FastMathFlags FMF,
const SimplifyQuery &Q,
fp::ExceptionBehavior ExBehavior = fp::ebIgnore,
RoundingMode Rounding = RoundingMode::NearestTiesToEven);
/// Given operands for a Shl, fold the result or return null.
-Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+Value *simplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
const SimplifyQuery &Q);
/// Given operands for a LShr, fold the result or return null.
-Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
+Value *simplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
const SimplifyQuery &Q);
/// Given operands for a AShr, fold the result or return nulll.
-Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
+Value *simplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
const SimplifyQuery &Q);
/// Given operands for an And, fold the result or return null.
-Value *SimplifyAndInst(Value *LHS, Value *RHS, const SimplifyQuery &Q);
+Value *simplifyAndInst(Value *LHS, Value *RHS, const SimplifyQuery &Q);
/// Given operands for an Or, fold the result or return null.
-Value *SimplifyOrInst(Value *LHS, Value *RHS, const SimplifyQuery &Q);
+Value *simplifyOrInst(Value *LHS, Value *RHS, const SimplifyQuery &Q);
/// Given operands for an Xor, fold the result or return null.
-Value *SimplifyXorInst(Value *LHS, Value *RHS, const SimplifyQuery &Q);
+Value *simplifyXorInst(Value *LHS, Value *RHS, const SimplifyQuery &Q);
/// Given operands for an ICmpInst, fold the result or return null.
-Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+Value *simplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
const SimplifyQuery &Q);
/// Given operands for an FCmpInst, fold the result or return null.
-Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+Value *simplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
FastMathFlags FMF, const SimplifyQuery &Q);
/// Given operands for a SelectInst, fold the result or return null.
-Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
+Value *simplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
const SimplifyQuery &Q);
/// Given operands for a GetElementPtrInst, fold the result or return null.
-Value *SimplifyGEPInst(Type *SrcTy, Value *Ptr, ArrayRef<Value *> Indices,
+Value *simplifyGEPInst(Type *SrcTy, Value *Ptr, ArrayRef<Value *> Indices,
bool InBounds, const SimplifyQuery &Q);
/// Given operands for an InsertValueInst, fold the result or return null.
-Value *SimplifyInsertValueInst(Value *Agg, Value *Val, ArrayRef<unsigned> Idxs,
+Value *simplifyInsertValueInst(Value *Agg, Value *Val, ArrayRef<unsigned> Idxs,
const SimplifyQuery &Q);
/// Given operands for an InsertElement, fold the result or return null.
-Value *SimplifyInsertElementInst(Value *Vec, Value *Elt, Value *Idx,
+Value *simplifyInsertElementInst(Value *Vec, Value *Elt, Value *Idx,
const SimplifyQuery &Q);
/// Given operands for an ExtractValueInst, fold the result or return null.
-Value *SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
+Value *simplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
const SimplifyQuery &Q);
/// Given operands for an ExtractElementInst, fold the result or return null.
-Value *SimplifyExtractElementInst(Value *Vec, Value *Idx,
+Value *simplifyExtractElementInst(Value *Vec, Value *Idx,
const SimplifyQuery &Q);
/// Given operands for a CastInst, fold the result or return null.
-Value *SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty,
+Value *simplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty,
const SimplifyQuery &Q);
/// Given operands for a ShuffleVectorInst, fold the result or return null.
/// See class ShuffleVectorInst for a description of the mask representation.
-Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, ArrayRef<int> Mask,
+Value *simplifyShuffleVectorInst(Value *Op0, Value *Op1, ArrayRef<int> Mask,
Type *RetTy, const SimplifyQuery &Q);
//=== Helper functions for higher up the class hierarchy.
/// Given operands for a CmpInst, fold the result or return null.
-Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+Value *simplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
const SimplifyQuery &Q);
/// Given operand for a UnaryOperator, fold the result or return null.
-Value *SimplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q);
+Value *simplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q);
/// Given operand for a UnaryOperator, fold the result or return null.
/// Try to use FastMathFlags when folding the result.
-Value *SimplifyUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF,
+Value *simplifyUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF,
const SimplifyQuery &Q);
/// Given operands for a BinaryOperator, fold the result or return null.
-Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+Value *simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
const SimplifyQuery &Q);
/// Given operands for a BinaryOperator, fold the result or return null.
/// Try to use FastMathFlags when folding the result.
-Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, FastMathFlags FMF,
+Value *simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, FastMathFlags FMF,
const SimplifyQuery &Q);
/// Given a callsite, fold the result or return null.
-Value *SimplifyCall(CallBase *Call, const SimplifyQuery &Q);
+Value *simplifyCall(CallBase *Call, const SimplifyQuery &Q);
+
+/// Given a constrained FP intrinsic call, tries to compute its simplified
+/// version. Returns a simplified result or null.
+///
+/// This function provides an additional contract: it guarantees that if
+/// simplification succeeds that the intrinsic is side effect free. As a result,
+/// successful simplification can be used to delete the intrinsic not just
+/// replace its result.
+Value *simplifyConstrainedFPCall(CallBase *Call, const SimplifyQuery &Q);
/// Given an operand for a Freeze, see if we can fold the result.
/// If not, this returns null.
-Value *SimplifyFreezeInst(Value *Op, const SimplifyQuery &Q);
+Value *simplifyFreezeInst(Value *Op, const SimplifyQuery &Q);
/// See if we can compute a simplified version of this instruction. If not,
/// return null.
-Value *SimplifyInstruction(Instruction *I, const SimplifyQuery &Q,
+Value *simplifyInstruction(Instruction *I, const SimplifyQuery &Q,
OptimizationRemarkEmitter *ORE = nullptr);
-/// Like \p SimplifyInstruction but the operands of \p I are replaced with
+/// Like \p simplifyInstruction but the operands of \p I are replaced with
/// \p NewOps. Returns a simplified value, or null if none was found.
Value *
-SimplifyInstructionWithOperands(Instruction *I, ArrayRef<Value *> NewOps,
+simplifyInstructionWithOperands(Instruction *I, ArrayRef<Value *> NewOps,
const SimplifyQuery &Q,
OptimizationRemarkEmitter *ORE = nullptr);
diff --git a/llvm/include/llvm/Analysis/IntervalIterator.h b/llvm/include/llvm/Analysis/IntervalIterator.h
index 8e2273618a66..cbb7cac1c508 100644
--- a/llvm/include/llvm/Analysis/IntervalIterator.h
+++ b/llvm/include/llvm/Analysis/IntervalIterator.h
@@ -36,8 +36,6 @@
#include "llvm/Analysis/Interval.h"
#include "llvm/Analysis/IntervalPartition.h"
#include "llvm/IR/CFG.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Support/ErrorHandling.h"
#include <algorithm>
#include <cassert>
#include <iterator>
@@ -48,6 +46,7 @@
namespace llvm {
class BasicBlock;
+class Function;
// getNodeHeader - Given a source graph node and the source graph, return the
// BasicBlock that is the header node. This is the opposite of
diff --git a/llvm/include/llvm/Analysis/LazyCallGraph.h b/llvm/include/llvm/Analysis/LazyCallGraph.h
index c0404d37d04d..4cacf8951d6a 100644
--- a/llvm/include/llvm/Analysis/LazyCallGraph.h
+++ b/llvm/include/llvm/Analysis/LazyCallGraph.h
@@ -38,20 +38,14 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/PointerIntPair.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/IR/Constant.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Function.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Support/Allocator.h"
-#include "llvm/Support/Casting.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <iterator>
@@ -60,8 +54,11 @@
namespace llvm {
+class Constant;
+class Function;
template <class GraphType> struct GraphTraits;
class Module;
+class TargetLibraryInfo;
class Value;
/// A lazily constructed view of the call graph of a module.
@@ -331,7 +328,7 @@ public:
bool operator!=(const Node &N) const { return !operator==(N); }
/// Tests whether the node has been populated with edges.
- bool isPopulated() const { return Edges.hasValue(); }
+ bool isPopulated() const { return Edges.has_value(); }
/// Tests whether this is actually a dead node and no longer valid.
///
diff --git a/llvm/include/llvm/Analysis/LazyValueInfo.h b/llvm/include/llvm/Analysis/LazyValueInfo.h
index 754391e10630..24c2bfcc74b9 100644
--- a/llvm/include/llvm/Analysis/LazyValueInfo.h
+++ b/llvm/include/llvm/Analysis/LazyValueInfo.h
@@ -114,6 +114,9 @@ public:
/// Inform the analysis cache that we have erased a block.
void eraseBlock(BasicBlock *BB);
+ /// Complete flush all previously computed values
+ void clear(const Module *M);
+
/// Print the \LazyValueInfo Analysis.
/// We pass in the DTree that is required for identifying which basic blocks
/// we can solve/print for, in the LVIPrinter.
diff --git a/llvm/include/llvm/Analysis/Loads.h b/llvm/include/llvm/Analysis/Loads.h
index 09bf98d324ed..29e3efb38e19 100644
--- a/llvm/include/llvm/Analysis/Loads.h
+++ b/llvm/include/llvm/Analysis/Loads.h
@@ -75,9 +75,9 @@ bool isSafeToLoadUnconditionally(Value *V, Align Alignment, APInt &Size,
/// within the specified loop) would access only dereferenceable memory, and
/// be properly aligned on every iteration of the specified loop regardless of
/// its placement within the loop. (i.e. does not require predication beyond
-/// that required by the the header itself and could be hoisted into the header
+/// that required by the header itself and could be hoisted into the header
/// if desired.) This is more powerful than the variants above when the
-/// address loaded from is analyzeable by SCEV.
+/// address loaded from is analyzeable by SCEV.
bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L,
ScalarEvolution &SE,
DominatorTree &DT);
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index c83a04991b04..8f71ce9e96c0 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -244,6 +244,15 @@ public:
SmallVector<Instruction *, 4> getInstructionsForAccess(Value *Ptr,
bool isWrite) const;
+ /// Return the program order indices for the access location (Ptr, IsWrite).
+ /// Returns an empty ArrayRef if there are no accesses for the location.
+ ArrayRef<unsigned> getOrderForAccess(Value *Ptr, bool IsWrite) const {
+ auto I = Accesses.find({Ptr, IsWrite});
+ if (I != Accesses.end())
+ return I->second;
+ return {};
+ }
+
private:
/// A wrapper around ScalarEvolution, used to add runtime SCEV checks, and
/// applies dynamic knowledge to simplify SCEV expressions and convert them
@@ -327,12 +336,6 @@ struct RuntimeCheckingPtrGroup {
/// pointer, with index \p Index in RtCheck.
RuntimeCheckingPtrGroup(unsigned Index, RuntimePointerChecking &RtCheck);
- RuntimeCheckingPtrGroup(unsigned Index, const SCEV *Start, const SCEV *End,
- unsigned AS)
- : High(End), Low(Start), AddressSpace(AS) {
- Members.push_back(Index);
- }
-
/// Tries to add the pointer recorded in RtCheck at index
/// \p Index to this pointer checking group. We can only add a pointer
/// to a checking group if we will still be able to get
@@ -340,7 +343,7 @@ struct RuntimeCheckingPtrGroup {
/// of success, false otherwise.
bool addPointer(unsigned Index, RuntimePointerChecking &RtCheck);
bool addPointer(unsigned Index, const SCEV *Start, const SCEV *End,
- unsigned AS, ScalarEvolution &SE);
+ unsigned AS, bool NeedsFreeze, ScalarEvolution &SE);
/// The SCEV expression which represents the upper bound of all the
/// pointers in this group.
@@ -352,6 +355,9 @@ struct RuntimeCheckingPtrGroup {
SmallVector<unsigned, 2> Members;
/// Address space of the involved pointers.
unsigned AddressSpace;
+ /// Whether the pointer needs to be frozen after expansion, e.g. because it
+ /// may be poison outside the loop.
+ bool NeedsFreeze = false;
};
/// A memcheck which made up of a pair of grouped pointers.
@@ -359,6 +365,18 @@ typedef std::pair<const RuntimeCheckingPtrGroup *,
const RuntimeCheckingPtrGroup *>
RuntimePointerCheck;
+struct PointerDiffInfo {
+ const SCEV *SrcStart;
+ const SCEV *SinkStart;
+ unsigned AccessSize;
+ bool NeedsFreeze;
+
+ PointerDiffInfo(const SCEV *SrcStart, const SCEV *SinkStart,
+ unsigned AccessSize, bool NeedsFreeze)
+ : SrcStart(SrcStart), SinkStart(SinkStart), AccessSize(AccessSize),
+ NeedsFreeze(NeedsFreeze) {}
+};
+
/// Holds information about the memory runtime legality checks to verify
/// that a group of pointers do not overlap.
class RuntimePointerChecking {
@@ -383,16 +401,19 @@ public:
unsigned AliasSetId;
/// SCEV for the access.
const SCEV *Expr;
+ /// True if the pointer expressions needs to be frozen after expansion.
+ bool NeedsFreeze;
PointerInfo(Value *PointerValue, const SCEV *Start, const SCEV *End,
bool IsWritePtr, unsigned DependencySetId, unsigned AliasSetId,
- const SCEV *Expr)
+ const SCEV *Expr, bool NeedsFreeze)
: PointerValue(PointerValue), Start(Start), End(End),
IsWritePtr(IsWritePtr), DependencySetId(DependencySetId),
- AliasSetId(AliasSetId), Expr(Expr) {}
+ AliasSetId(AliasSetId), Expr(Expr), NeedsFreeze(NeedsFreeze) {}
};
- RuntimePointerChecking(ScalarEvolution *SE) : SE(SE) {}
+ RuntimePointerChecking(MemoryDepChecker &DC, ScalarEvolution *SE)
+ : DC(DC), SE(SE) {}
/// Reset the state of the pointer runtime information.
void reset() {
@@ -406,9 +427,9 @@ public:
/// according to the assumptions that we've made during the analysis.
/// The method might also version the pointer stride according to \p Strides,
/// and add new predicates to \p PSE.
- void insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId,
- unsigned ASId, const ValueToValueMap &Strides,
- PredicatedScalarEvolution &PSE);
+ void insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, Type *AccessTy,
+ bool WritePtr, unsigned DepSetId, unsigned ASId,
+ PredicatedScalarEvolution &PSE, bool NeedsFreeze);
/// No run-time memory checking is necessary.
bool empty() const { return Pointers.empty(); }
@@ -418,11 +439,23 @@ public:
void generateChecks(MemoryDepChecker::DepCandidates &DepCands,
bool UseDependencies);
- /// Returns the checks that generateChecks created.
+ /// Returns the checks that generateChecks created. They can be used to ensure
+ /// no read/write accesses overlap across all loop iterations.
const SmallVectorImpl<RuntimePointerCheck> &getChecks() const {
return Checks;
}
+ // Returns an optional list of (pointer-difference expressions, access size)
+ // pairs that can be used to prove that there are no vectorization-preventing
+ // dependencies at runtime. There are is a vectorization-preventing dependency
+ // if any pointer-difference is <u VF * InterleaveCount * access size. Returns
+ // None if pointer-difference checks cannot be used.
+ Optional<ArrayRef<PointerDiffInfo>> getDiffChecks() const {
+ if (!CanUseDiffCheck)
+ return None;
+ return {DiffChecks};
+ }
+
/// Decide if we need to add a check between two groups of pointers,
/// according to needsChecking.
bool needsChecking(const RuntimeCheckingPtrGroup &M,
@@ -477,7 +510,15 @@ private:
bool UseDependencies);
/// Generate the checks and return them.
- SmallVector<RuntimePointerCheck, 4> generateChecks() const;
+ SmallVector<RuntimePointerCheck, 4> generateChecks();
+
+ /// Try to create add a new (pointer-difference, access size) pair to
+ /// DiffCheck for checking groups \p CGI and \p CGJ. If pointer-difference
+ /// checks cannot be used for the groups, set CanUseDiffCheck to false.
+ void tryToCreateDiffCheck(const RuntimeCheckingPtrGroup &CGI,
+ const RuntimeCheckingPtrGroup &CGJ);
+
+ MemoryDepChecker &DC;
/// Holds a pointer to the ScalarEvolution analysis.
ScalarEvolution *SE;
@@ -485,6 +526,13 @@ private:
/// Set of run-time checks required to establish independence of
/// otherwise may-aliasing pointers in the loop.
SmallVector<RuntimePointerCheck, 4> Checks;
+
+ /// Flag indicating if pointer-difference checks can be used
+ bool CanUseDiffCheck = true;
+
+ /// A list of (pointer-difference, access size) pairs that can be used to
+ /// prove that there are no vectorization-preventing dependencies.
+ SmallVector<PointerDiffInfo> DiffChecks;
};
/// Drive the analysis of memory accesses in the loop
@@ -575,6 +623,11 @@ public:
return HasDependenceInvolvingLoopInvariantAddress;
}
+ /// Return the list of stores to invariant addresses.
+ const ArrayRef<StoreInst *> getStoresToInvariantAddresses() const {
+ return StoresToInvariantAddresses;
+ }
+
/// Used to add runtime SCEV checks. Simplifies SCEV expressions and converts
/// them to a more usable form. All SCEV expressions during the analysis
/// should be re-written (and therefore simplified) according to PSE.
@@ -605,6 +658,11 @@ private:
/// invariant.
void collectStridedAccess(Value *LoadOrStoreInst);
+ // Emits the first unsafe memory dependence in a loop.
+ // Emits nothing if there are no unsafe dependences
+ // or if the dependences were not recorded.
+ void emitUnsafeDependenceRemark();
+
std::unique_ptr<PredicatedScalarEvolution> PSE;
/// We need to check that all of the pointers in this list are disjoint
@@ -629,6 +687,9 @@ private:
/// Indicator that there are non vectorizable stores to a uniform address.
bool HasDependenceInvolvingLoopInvariantAddress = false;
+ /// List of stores to invariant addresses.
+ SmallVector<StoreInst *> StoresToInvariantAddresses;
+
/// The diagnostics report generated for the analysis. E.g. why we
/// couldn't analyze the loop.
std::unique_ptr<OptimizationRemarkAnalysis> Report;
diff --git a/llvm/include/llvm/Analysis/LoopAnalysisManager.h b/llvm/include/llvm/Analysis/LoopAnalysisManager.h
index d07e6977fed1..d22675a308aa 100644
--- a/llvm/include/llvm/Analysis/LoopAnalysisManager.h
+++ b/llvm/include/llvm/Analysis/LoopAnalysisManager.h
@@ -29,7 +29,6 @@
#ifndef LLVM_ANALYSIS_LOOPANALYSISMANAGER_H
#define LLVM_ANALYSIS_LOOPANALYSISMANAGER_H
-#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
diff --git a/llvm/include/llvm/Analysis/LoopCacheAnalysis.h b/llvm/include/llvm/Analysis/LoopCacheAnalysis.h
index 21882ebd0087..4c5083f3c980 100644
--- a/llvm/include/llvm/Analysis/LoopCacheAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopCacheAnalysis.h
@@ -15,15 +15,17 @@
#define LLVM_ANALYSIS_LOOPCACHEANALYSIS_H
#include "llvm/Analysis/LoopAnalysisManager.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/Support/raw_ostream.h"
namespace llvm {
class AAResults;
class DependenceInfo;
+class Instruction;
class LPMUpdater;
+class raw_ostream;
+class LoopInfo;
+class Loop;
class ScalarEvolution;
class SCEV;
class TargetTransformInfo;
@@ -96,6 +98,10 @@ private:
/// Attempt to delinearize the indexed reference.
bool delinearize(const LoopInfo &LI);
+ /// Attempt to delinearize \p AccessFn for fixed-size arrays.
+ bool tryDelinearizeFixedSize(const SCEV *AccessFn,
+ SmallVectorImpl<const SCEV *> &Subscripts);
+
/// Return true if the index reference is invariant with respect to loop \p L.
bool isLoopInvariant(const Loop &L) const;
@@ -105,6 +111,13 @@ private:
/// smaller than the cache line size \p CLS.
bool isConsecutive(const Loop &L, unsigned CLS) const;
+ /// Retrieve the index of the subscript corresponding to the given loop \p
+ /// L. Return a zero-based positive index if the subscript index is
+ /// succesfully located and a negative value otherwise. For example given the
+ /// indexed reference 'A[i][2j+1][3k+2]', the call
+ /// 'getSubscriptIndex(loop-k)' would return value 2.
+ int getSubscriptIndex(const Loop &L) const;
+
/// Return the coefficient used in the rightmost dimension.
const SCEV *getLastCoefficient() const;
@@ -237,9 +250,10 @@ private:
/// Sort the LoopCosts vector by decreasing cache cost.
void sortLoopCosts() {
- sort(LoopCosts, [](const LoopCacheCostTy &A, const LoopCacheCostTy &B) {
- return A.second > B.second;
- });
+ stable_sort(LoopCosts,
+ [](const LoopCacheCostTy &A, const LoopCacheCostTy &B) {
+ return A.second > B.second;
+ });
}
private:
diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h
index a0ffdb07a7ec..9351b83ad747 100644
--- a/llvm/include/llvm/Analysis/LoopInfo.h
+++ b/llvm/include/llvm/Analysis/LoopInfo.h
@@ -44,7 +44,6 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/CFG.h"
-#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
@@ -55,9 +54,10 @@
namespace llvm {
class DominatorTree;
+class InductionDescriptor;
+class Instruction;
class LoopInfo;
class Loop;
-class InductionDescriptor;
class MDNode;
class MemorySSAUpdater;
class ScalarEvolution;
@@ -112,6 +112,22 @@ public:
/// parent is the innermost loop in which it is enclosed.
LoopT *getParentLoop() const { return ParentLoop; }
+ /// Get the outermost loop in which this loop is contained.
+ /// This may be the loop itself, if it already is the outermost loop.
+ const LoopT *getOutermostLoop() const {
+ const LoopT *L = static_cast<const LoopT *>(this);
+ while (L->ParentLoop)
+ L = L->ParentLoop;
+ return L;
+ }
+
+ LoopT *getOutermostLoop() {
+ LoopT *L = static_cast<LoopT *>(this);
+ while (L->ParentLoop)
+ L = L->ParentLoop;
+ return L;
+ }
+
/// This is a raw interface for bypassing addChildLoop.
void setParentLoop(LoopT *L) {
assert(!isInvalid() && "Loop not in a valid state!");
diff --git a/llvm/include/llvm/Analysis/LoopInfoImpl.h b/llvm/include/llvm/Analysis/LoopInfoImpl.h
index b8b8330d0fe1..a96a698f3afb 100644
--- a/llvm/include/llvm/Analysis/LoopInfoImpl.h
+++ b/llvm/include/llvm/Analysis/LoopInfoImpl.h
@@ -14,7 +14,6 @@
#ifndef LLVM_ANALYSIS_LOOPINFOIMPL_H
#define LLVM_ANALYSIS_LOOPINFOIMPL_H
-#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetOperations.h"
@@ -315,12 +314,11 @@ void LoopBase<BlockT, LoopT>::verifyLoop() const {
"Loop block has no in-loop predecessors!");
SmallVector<BlockT *, 2> OutsideLoopPreds;
- std::for_each(GraphTraits<Inverse<BlockT *>>::child_begin(BB),
- GraphTraits<Inverse<BlockT *>>::child_end(BB),
- [&](BlockT *B) {
- if (!contains(B))
- OutsideLoopPreds.push_back(B);
- });
+ for (BlockT *B :
+ llvm::make_range(GraphTraits<Inverse<BlockT *>>::child_begin(BB),
+ GraphTraits<Inverse<BlockT *>>::child_end(BB)))
+ if (!contains(B))
+ OutsideLoopPreds.push_back(B);
if (BB == getHeader()) {
assert(!OutsideLoopPreds.empty() && "Loop is unreachable!");
@@ -455,8 +453,7 @@ static void discoverAndMapSubloop(LoopT *L, ArrayRef<BlockT *> Backedges,
InvBlockTraits::child_end(PredBB));
} else {
// This is a discovered block. Find its outermost discovered loop.
- while (LoopT *Parent = Subloop->getParentLoop())
- Subloop = Parent;
+ Subloop = Subloop->getOutermostLoop();
// If it is already discovered to be a subloop of this loop, continue.
if (Subloop == L)
diff --git a/llvm/include/llvm/Analysis/LoopPass.h b/llvm/include/llvm/Analysis/LoopPass.h
index 0fd2a39eefc0..c5f08d0ae8af 100644
--- a/llvm/include/llvm/Analysis/LoopPass.h
+++ b/llvm/include/llvm/Analysis/LoopPass.h
@@ -14,13 +14,14 @@
#ifndef LLVM_ANALYSIS_LOOPPASS_H
#define LLVM_ANALYSIS_LOOPPASS_H
-#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/LegacyPassManagers.h"
#include "llvm/Pass.h"
#include <deque>
namespace llvm {
+class Loop;
+class LoopInfo;
class LPPassManager;
class Function;
diff --git a/llvm/include/llvm/Analysis/LoopUnrollAnalyzer.h b/llvm/include/llvm/Analysis/LoopUnrollAnalyzer.h
index 7cf8a081f9a2..eada6a647763 100644
--- a/llvm/include/llvm/Analysis/LoopUnrollAnalyzer.h
+++ b/llvm/include/llvm/Analysis/LoopUnrollAnalyzer.h
@@ -15,8 +15,9 @@
#ifndef LLVM_ANALYSIS_LOOPUNROLLANALYZER_H
#define LLVM_ANALYSIS_LOOPUNROLLANALYZER_H
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/InstVisitor.h"
// This class is used to get an estimate of the optimization effects that we
@@ -36,6 +37,8 @@
// And finally:
// v = b[1]
namespace llvm {
+class Instruction;
+
class UnrolledInstAnalyzer : private InstVisitor<UnrolledInstAnalyzer, bool> {
typedef InstVisitor<UnrolledInstAnalyzer, bool> Base;
friend class InstVisitor<UnrolledInstAnalyzer, bool>;
diff --git a/llvm/include/llvm/Analysis/MLInlineAdvisor.h b/llvm/include/llvm/Analysis/MLInlineAdvisor.h
index b1a81d5e7030..00e8d7d7dd4d 100644
--- a/llvm/include/llvm/Analysis/MLInlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/MLInlineAdvisor.h
@@ -9,6 +9,7 @@
#ifndef LLVM_ANALYSIS_MLINLINEADVISOR_H
#define LLVM_ANALYSIS_MLINLINEADVISOR_H
+#include "llvm/Analysis/FunctionPropertiesAnalysis.h"
#include "llvm/Analysis/InlineAdvisor.h"
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/MLModelRunner.h"
@@ -19,6 +20,7 @@
#include <memory>
namespace llvm {
+class DiagnosticInfoOptimizationBase;
class Module;
class MLInlineAdvice;
@@ -29,16 +31,19 @@ public:
virtual ~MLInlineAdvisor() = default;
- void onPassEntry() override;
+ void onPassEntry(LazyCallGraph::SCC *SCC) override;
void onPassExit(LazyCallGraph::SCC *SCC) override;
- int64_t getIRSize(const Function &F) const { return F.getInstructionCount(); }
+ int64_t getIRSize(Function &F) const {
+ return getCachedFPI(F).TotalInstructionCount;
+ }
void onSuccessfulInlining(const MLInlineAdvice &Advice,
bool CalleeWasDeleted);
bool isForcedToStop() const { return ForceStop; }
int64_t getLocalCalls(Function &F);
const MLModelRunner &getModelRunner() const { return *ModelRunner.get(); }
+ FunctionPropertiesInfo &getCachedFPI(Function &) const;
protected:
std::unique_ptr<InlineAdvice> getAdviceImpl(CallBase &CB) override;
@@ -60,11 +65,11 @@ protected:
private:
int64_t getModuleIRSize() const;
+ std::unique_ptr<InlineAdvice>
+ getSkipAdviceIfUnreachableCallsite(CallBase &CB);
+ void print(raw_ostream &OS) const override;
- void print(raw_ostream &OS) const override {
- OS << "[MLInlineAdvisor] Nodes: " << NodeCount << " Edges: " << EdgeCount
- << "\n";
- }
+ mutable DenseMap<const Function *, FunctionPropertiesInfo> FPICache;
LazyCallGraph &CG;
@@ -75,7 +80,7 @@ private:
std::map<const LazyCallGraph::Node *, unsigned> FunctionLevels;
const int32_t InitialIRSize = 0;
int32_t CurrentIRSize = 0;
- std::deque<const LazyCallGraph::Node *> NodesInLastSCC;
+ llvm::SmallPtrSet<const LazyCallGraph::Node *, 1> NodesInLastSCC;
DenseSet<const LazyCallGraph::Node *> AllNodes;
bool ForceStop = false;
};
@@ -85,16 +90,7 @@ private:
class MLInlineAdvice : public InlineAdvice {
public:
MLInlineAdvice(MLInlineAdvisor *Advisor, CallBase &CB,
- OptimizationRemarkEmitter &ORE, bool Recommendation)
- : InlineAdvice(Advisor, CB, ORE, Recommendation),
- CallerIRSize(Advisor->isForcedToStop() ? 0
- : Advisor->getIRSize(*Caller)),
- CalleeIRSize(Advisor->isForcedToStop() ? 0
- : Advisor->getIRSize(*Callee)),
- CallerAndCalleeEdges(Advisor->isForcedToStop()
- ? 0
- : (Advisor->getLocalCalls(*Caller) +
- Advisor->getLocalCalls(*Callee))) {}
+ OptimizationRemarkEmitter &ORE, bool Recommendation);
virtual ~MLInlineAdvice() = default;
void recordInliningImpl() override;
@@ -108,13 +104,17 @@ public:
const int64_t CallerIRSize;
const int64_t CalleeIRSize;
const int64_t CallerAndCalleeEdges;
+ void updateCachedCallerFPI(FunctionAnalysisManager &FAM) const;
private:
void reportContextForRemark(DiagnosticInfoOptimizationBase &OR);
-
MLInlineAdvisor *getAdvisor() const {
return static_cast<MLInlineAdvisor *>(Advisor);
};
+ // Make a copy of the FPI of the caller right before inlining. If inlining
+ // fails, we can just update the cache with that value.
+ const FunctionPropertiesInfo PreInlineCallerFPI;
+ Optional<FunctionPropertiesUpdater> FPU;
};
} // namespace llvm
diff --git a/llvm/include/llvm/Analysis/MLModelRunner.h b/llvm/include/llvm/Analysis/MLModelRunner.h
index 669c02af0b3b..872c0e37f00e 100644
--- a/llvm/include/llvm/Analysis/MLModelRunner.h
+++ b/llvm/include/llvm/Analysis/MLModelRunner.h
@@ -10,10 +10,11 @@
#ifndef LLVM_ANALYSIS_MLMODELRUNNER_H
#define LLVM_ANALYSIS_MLMODELRUNNER_H
-#include "llvm/IR/LLVMContext.h"
+#include "llvm/Analysis/TensorSpec.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
+class LLVMContext;
/// MLModelRunner interface: abstraction of a mechanism for evaluating a
/// tensorflow "saved model".
@@ -41,7 +42,7 @@ public:
getTensorUntyped(static_cast<size_t>(FeatureID)));
}
- virtual void *getTensorUntyped(size_t Index) = 0;
+ void *getTensorUntyped(size_t Index) { return InputBuffers[Index]; }
const void *getTensorUntyped(size_t Index) const {
return (const_cast<MLModelRunner *>(this))->getTensorUntyped(Index);
}
@@ -50,13 +51,27 @@ public:
Kind getKind() const { return Type; }
protected:
- MLModelRunner(LLVMContext &Ctx, Kind Type) : Ctx(Ctx), Type(Type) {
+ MLModelRunner(LLVMContext &Ctx, Kind Type, size_t NrInputs)
+ : Ctx(Ctx), Type(Type), InputBuffers(NrInputs) {
assert(Type != Kind::Unknown);
}
virtual void *evaluateUntyped() = 0;
+ void setUpBufferForTensor(size_t Index, const TensorSpec &Spec,
+ void *Buffer) {
+ if (!Buffer) {
+ OwnedBuffers.emplace_back(Spec.getTotalTensorBufferSize());
+ Buffer = OwnedBuffers.back().data();
+ }
+ InputBuffers[Index] = Buffer;
+ }
+
LLVMContext &Ctx;
const Kind Type;
+
+private:
+ std::vector<void *> InputBuffers;
+ std::vector<std::vector<char *>> OwnedBuffers;
};
} // namespace llvm
diff --git a/llvm/include/llvm/Analysis/MemoryBuiltins.h b/llvm/include/llvm/Analysis/MemoryBuiltins.h
index d5b60ee540e0..7ad83612880f 100644
--- a/llvm/include/llvm/Analysis/MemoryBuiltins.h
+++ b/llvm/include/llvm/Analysis/MemoryBuiltins.h
@@ -28,6 +28,7 @@
namespace llvm {
class AllocaInst;
+class AAResults;
class Argument;
class CallInst;
class ConstantPointerNull;
@@ -100,7 +101,10 @@ inline CallInst *isFreeCall(Value *I, const TargetLibraryInfo *TLI) {
/// insertion or speculative execution of allocation routines.
bool isAllocRemovable(const CallBase *V, const TargetLibraryInfo *TLI);
-/// Gets the alignment argument for an aligned_alloc-like function
+/// Gets the alignment argument for an aligned_alloc-like function, using either
+/// built-in knowledge based on fuction names/signatures or allocalign
+/// attributes. Note: the Value returned may not indicate a valid alignment, per
+/// the definition of the allocalign attribute.
Value *getAllocAlignment(const CallBase *V, const TargetLibraryInfo *TLI);
/// Return the size of the requested allocation. With a trivial mapper, this is
@@ -111,12 +115,19 @@ Optional<APInt> getAllocSize(const CallBase *CB,
const TargetLibraryInfo *TLI,
std::function<const Value*(const Value*)> Mapper);
-/// If this allocation function initializes memory to a fixed value, return
-/// said value in the requested type. Otherwise, return nullptr.
-Constant *getInitialValueOfAllocation(const CallBase *Alloc,
+/// If this is a call to an allocation function that initializes memory to a
+/// fixed value, return said value in the requested type. Otherwise, return
+/// nullptr.
+Constant *getInitialValueOfAllocation(const Value *V,
const TargetLibraryInfo *TLI,
Type *Ty);
+/// If a function is part of an allocation family (e.g.
+/// malloc/realloc/calloc/free), return the identifier for its family
+/// of functions.
+Optional<StringRef> getAllocationFamily(const Value *I,
+ const TargetLibraryInfo *TLI);
+
//===----------------------------------------------------------------------===//
// Utility functions to compute size of objects.
//
@@ -143,6 +154,8 @@ struct ObjectSizeOpts {
/// though they can't be evaluated. Otherwise, null is always considered to
/// point to a 0 byte region of memory.
bool NullIsUnknownSize = false;
+ /// If set, used for more accurate evaluation
+ AAResults *AA = nullptr;
};
/// Compute the size of the object pointed by Ptr. Returns true and the
@@ -162,8 +175,9 @@ bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL,
/// argument of the call to objectsize.
Value *lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL,
const TargetLibraryInfo *TLI, bool MustSucceed);
-
-
+Value *lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL,
+ const TargetLibraryInfo *TLI, AAResults *AA,
+ bool MustSucceed);
using SizeOffsetType = std::pair<APInt, APInt>;
@@ -210,7 +224,6 @@ public:
SizeOffsetType visitConstantPointerNull(ConstantPointerNull&);
SizeOffsetType visitExtractElementInst(ExtractElementInst &I);
SizeOffsetType visitExtractValueInst(ExtractValueInst &I);
- SizeOffsetType visitGEPOperator(GEPOperator &GEP);
SizeOffsetType visitGlobalAlias(GlobalAlias &GA);
SizeOffsetType visitGlobalVariable(GlobalVariable &GV);
SizeOffsetType visitIntToPtrInst(IntToPtrInst&);
@@ -221,6 +234,12 @@ public:
SizeOffsetType visitInstruction(Instruction &I);
private:
+ SizeOffsetType findLoadSizeOffset(
+ LoadInst &LoadFrom, BasicBlock &BB, BasicBlock::iterator From,
+ SmallDenseMap<BasicBlock *, SizeOffsetType, 8> &VisitedBlocks,
+ unsigned &ScannedInstCount);
+ SizeOffsetType combineSizeOffset(SizeOffsetType LHS, SizeOffsetType RHS);
+ SizeOffsetType computeImpl(Value *V);
bool CheckedZextOrTrunc(APInt &I);
};
diff --git a/llvm/include/llvm/Analysis/MemoryLocation.h b/llvm/include/llvm/Analysis/MemoryLocation.h
index 23e50f601e04..dfac49445d75 100644
--- a/llvm/include/llvm/Analysis/MemoryLocation.h
+++ b/llvm/include/llvm/Analysis/MemoryLocation.h
@@ -36,6 +36,7 @@ class AnyMemTransferInst;
class AnyMemIntrinsic;
class TargetLibraryInfo;
class VAArgInst;
+class Value;
// Represents the size of a MemoryLocation. Logically, it's an
// Optional<uint63_t> that also carries a bit to represent whether the integer
diff --git a/llvm/include/llvm/Analysis/MemorySSA.h b/llvm/include/llvm/Analysis/MemorySSA.h
index b41f5771bacd..8cadb6a4c912 100644
--- a/llvm/include/llvm/Analysis/MemorySSA.h
+++ b/llvm/include/llvm/Analysis/MemorySSA.h
@@ -66,6 +66,19 @@
/// MemoryDefs are not disambiguated because it would require multiple reaching
/// definitions, which would require multiple phis, and multiple memoryaccesses
/// per instruction.
+///
+/// In addition to the def/use graph described above, MemoryDefs also contain
+/// an "optimized" definition use. The "optimized" use points to some def
+/// reachable through the memory def chain. The optimized def *may* (but is
+/// not required to) alias the original MemoryDef, but no def *closer* to the
+/// source def may alias it. As the name implies, the purpose of the optimized
+/// use is to allow caching of clobber searches for memory defs. The optimized
+/// def may be nullptr, in which case clients must walk the defining access
+/// chain.
+///
+/// When iterating the uses of a MemoryDef, both defining uses and optimized
+/// uses will be encountered. If only one type is needed, the client must
+/// filter the use walk.
//
//===----------------------------------------------------------------------===//
@@ -73,30 +86,18 @@
#define LLVM_ANALYSIS_MEMORYSSA_H
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/ilist.h"
#include "llvm/ADT/ilist_node.h"
-#include "llvm/ADT/iterator.h"
#include "llvm/ADT/iterator_range.h"
-#include "llvm/ADT/simple_ilist.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/PHITransAddr.h"
-#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DerivedUser.h"
#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
-#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
-#include "llvm/IR/Value.h"
-#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/CommandLine.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -106,11 +107,16 @@
namespace llvm {
+template <class GraphType> struct GraphTraits;
+class BasicBlock;
class Function;
class Instruction;
+class LLVMContext;
class MemoryAccess;
class MemorySSAWalker;
-class LLVMContext;
+class Module;
+class Use;
+class Value;
class raw_ostream;
namespace MSSAHelpers {
@@ -259,10 +265,11 @@ public:
return MA->getValueID() == MemoryUseVal || MA->getValueID() == MemoryDefVal;
}
- // Sadly, these have to be public because they are needed in some of the
- // iterators.
+ /// Do we have an optimized use?
inline bool isOptimized() const;
+ /// Return the MemoryAccess associated with the optimized use, or nullptr.
inline MemoryAccess *getOptimized() const;
+ /// Sets the optimized use for a MemoryDef.
inline void setOptimized(MemoryAccess *);
// Retrieve AliasResult type of the optimized access. Ideally this would be
@@ -339,6 +346,9 @@ public:
setOperand(0, DMA);
}
+ /// Whether the MemoryUse is optimized. If ensureOptimizedUses() was called,
+ /// uses will usually be optimized, but this is not guaranteed (e.g. due to
+ /// invalidation and optimization limits.)
bool isOptimized() const {
return getDefiningAccess() && OptimizedID == getDefiningAccess()->getID();
}
@@ -791,6 +801,13 @@ public:
/// about the beginning or end of a block.
enum InsertionPlace { Beginning, End, BeforeTerminator };
+ /// By default, uses are *not* optimized during MemorySSA construction.
+ /// Calling this method will attempt to optimize all MemoryUses, if this has
+ /// not happened yet for this MemorySSA instance. This should be done if you
+ /// plan to query the clobbering access for most uses, or if you walk the
+ /// def-use chain of uses.
+ void ensureOptimizedUses();
+
protected:
// Used by Memory SSA dumpers and wrapper pass
friend class MemorySSAPrinterLegacyPass;
@@ -893,6 +910,7 @@ private:
std::unique_ptr<CachingWalker<AliasAnalysis>> Walker;
std::unique_ptr<SkipSelfWalker<AliasAnalysis>> SkipWalker;
unsigned NextID = 0;
+ bool IsOptimized = false;
};
/// Enables verification of MemorySSA.
diff --git a/llvm/include/llvm/Analysis/MemorySSAUpdater.h b/llvm/include/llvm/Analysis/MemorySSAUpdater.h
index 3e5ebe9cb427..2bcd1a462871 100644
--- a/llvm/include/llvm/Analysis/MemorySSAUpdater.h
+++ b/llvm/include/llvm/Analysis/MemorySSAUpdater.h
@@ -31,7 +31,6 @@
#ifndef LLVM_ANALYSIS_MEMORYSSAUPDATER_H
#define LLVM_ANALYSIS_MEMORYSSAUPDATER_H
-#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -39,7 +38,6 @@
#include "llvm/IR/ValueHandle.h"
#include "llvm/IR/ValueMap.h"
#include "llvm/Support/CFGDiff.h"
-#include <utility>
namespace llvm {
@@ -47,6 +45,7 @@ class BasicBlock;
class DominatorTree;
class Instruction;
class LoopBlocksRPO;
+template <typename T, unsigned int N> class SmallSetVector;
using ValueToValueMapTy = ValueMap<const Value *, WeakTrackingVH>;
using PhiToDefMap = SmallDenseMap<MemoryPhi *, MemoryAccess *>;
diff --git a/llvm/include/llvm/Analysis/ModelUnderTrainingRunner.h b/llvm/include/llvm/Analysis/ModelUnderTrainingRunner.h
index 071ccf96fe5b..72bd185b6c32 100644
--- a/llvm/include/llvm/Analysis/ModelUnderTrainingRunner.h
+++ b/llvm/include/llvm/Analysis/ModelUnderTrainingRunner.h
@@ -10,6 +10,7 @@
#ifndef LLVM_ANALYSIS_MODELUNDERTRAININGRUNNER_H
#define LLVM_ANALYSIS_MODELUNDERTRAININGRUNNER_H
+#include "llvm/Analysis/TensorSpec.h"
#include "llvm/Config/llvm-config.h"
#ifdef LLVM_HAVE_TF_API
@@ -48,6 +49,11 @@ public:
StringRef DecisionName,
const std::vector<TensorSpec> &InputSpecs,
StringRef OutputSpecsPathOverride = "");
+ static std::unique_ptr<ModelUnderTrainingRunner>
+ createAndEnsureValid(LLVMContext &Ctx, const std::string &ModelPath,
+ StringRef DecisionName,
+ const std::vector<TensorSpec> &InputSpecs,
+ const std::vector<LoggedFeatureSpec> &OutputSpecs);
private:
ModelUnderTrainingRunner(LLVMContext &Ctx, const std::string &ModelPath,
@@ -58,7 +64,6 @@ private:
const std::vector<LoggedFeatureSpec> OutputSpecs;
Optional<TFModelEvaluator::EvaluationResult> LastEvaluationResult;
void *evaluateUntyped() override;
- void *getTensorUntyped(size_t Index) override;
bool isValid() const { return !!Evaluator; }
};
diff --git a/llvm/include/llvm/Analysis/ModuleDebugInfoPrinter.h b/llvm/include/llvm/Analysis/ModuleDebugInfoPrinter.h
index 99aa315319b8..fa91e4f653d0 100644
--- a/llvm/include/llvm/Analysis/ModuleDebugInfoPrinter.h
+++ b/llvm/include/llvm/Analysis/ModuleDebugInfoPrinter.h
@@ -11,9 +11,9 @@
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/Support/raw_ostream.h"
namespace llvm {
+class raw_ostream;
class ModuleDebugInfoPrinterPass
: public PassInfoMixin<ModuleDebugInfoPrinterPass> {
diff --git a/llvm/include/llvm/Analysis/MustExecute.h b/llvm/include/llvm/Analysis/MustExecute.h
index 18a0bfee5730..1e4994207555 100644
--- a/llvm/include/llvm/Analysis/MustExecute.h
+++ b/llvm/include/llvm/Analysis/MustExecute.h
@@ -28,7 +28,6 @@
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/InstructionPrecedenceTracking.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/Support/raw_ostream.h"
namespace llvm {
@@ -42,6 +41,7 @@ class Instruction;
class Loop;
class LoopInfo;
class PostDominatorTree;
+class raw_ostream;
/// Captures loop safety information.
/// It keep information for loop blocks may throw exception or otherwise
diff --git a/llvm/include/llvm/Analysis/NoInferenceModelRunner.h b/llvm/include/llvm/Analysis/NoInferenceModelRunner.h
index 5bcedf98865c..980b40500d7c 100644
--- a/llvm/include/llvm/Analysis/NoInferenceModelRunner.h
+++ b/llvm/include/llvm/Analysis/NoInferenceModelRunner.h
@@ -10,13 +10,9 @@
#ifndef LLVM_ANALYSIS_NOINFERENCEMODELRUNNER_H
#define LLVM_ANALYSIS_NOINFERENCEMODELRUNNER_H
-#include "llvm/Config/llvm-config.h"
-
-/// While not strictly necessary to conditionally compile this, it really
-/// has no usecase outside the 'development' mode.
-#ifdef LLVM_HAVE_TF_API
#include "llvm/Analysis/MLModelRunner.h"
-#include "llvm/Analysis/Utils/TFUtils.h"
+#include "llvm/Analysis/TensorSpec.h"
+#include "llvm/Config/llvm-config.h"
namespace llvm {
/// A pseudo model runner. We use it to store feature values when collecting
/// logs for the default policy, in 'development' mode, but never ask it to
@@ -34,10 +30,6 @@ private:
void *evaluateUntyped() override {
llvm_unreachable("We shouldn't call run on this model runner.");
}
- void *getTensorUntyped(size_t Index) override;
-
- std::vector<std::unique_ptr<char[]>> ValuesBuffer;
};
} // namespace llvm
-#endif // defined(LLVM_HAVE_TF_API)
#endif // LLVM_ANALYSIS_NOINFERENCEMODELRUNNER_H
diff --git a/llvm/include/llvm/Analysis/ObjCARCUtil.h b/llvm/include/llvm/Analysis/ObjCARCUtil.h
index 385fa5422926..56faa20c4c6e 100644
--- a/llvm/include/llvm/Analysis/ObjCARCUtil.h
+++ b/llvm/include/llvm/Analysis/ObjCARCUtil.h
@@ -35,7 +35,7 @@ inline bool hasAttachedCallOpBundle(const CallBase *CB) {
// functions.
return !CB->getFunctionType()->getReturnType()->isVoidTy() &&
CB->getOperandBundle(LLVMContext::OB_clang_arc_attachedcall)
- .hasValue();
+ .has_value();
}
/// This function returns operand bundle clang_arc_attachedcall's argument,
@@ -59,7 +59,7 @@ inline bool isRetainOrClaimRV(ARCInstKind Kind) {
/// or UnsafeClaimRV.
inline ARCInstKind getAttachedARCFunctionKind(const CallBase *CB) {
Optional<Function *> Fn = getAttachedARCFunction(CB);
- if (!Fn.hasValue())
+ if (!Fn)
return ARCInstKind::None;
auto FnClass = GetFunctionClass(*Fn);
assert(isRetainOrClaimRV(FnClass) && "unexpected ARC runtime function");
diff --git a/llvm/include/llvm/Analysis/OverflowInstAnalysis.h b/llvm/include/llvm/Analysis/OverflowInstAnalysis.h
index 7523fb9392cd..761d20f17a8b 100644
--- a/llvm/include/llvm/Analysis/OverflowInstAnalysis.h
+++ b/llvm/include/llvm/Analysis/OverflowInstAnalysis.h
@@ -14,11 +14,9 @@
#ifndef LLVM_ANALYSIS_OVERFLOWINSTANALYSIS_H
#define LLVM_ANALYSIS_OVERFLOWINSTANALYSIS_H
-#include "llvm/IR/InstrTypes.h"
-
namespace llvm {
-class Value;
class Use;
+class Value;
/// Match one of the patterns up to the select/logic op:
/// %Op0 = icmp ne i4 %X, 0
diff --git a/llvm/include/llvm/Analysis/PhiValues.h b/llvm/include/llvm/Analysis/PhiValues.h
index c0e91c8b0bdf..ecbb8874b378 100644
--- a/llvm/include/llvm/Analysis/PhiValues.h
+++ b/llvm/include/llvm/Analysis/PhiValues.h
@@ -22,7 +22,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
diff --git a/llvm/include/llvm/Analysis/PostDominators.h b/llvm/include/llvm/Analysis/PostDominators.h
index 296110d8d03b..4383113c8db1 100644
--- a/llvm/include/llvm/Analysis/PostDominators.h
+++ b/llvm/include/llvm/Analysis/PostDominators.h
@@ -102,10 +102,7 @@ template <> struct GraphTraits<PostDominatorTree*>
}
static nodes_iterator nodes_begin(PostDominatorTree *N) {
- if (getEntryNode(N))
- return df_begin(getEntryNode(N));
- else
- return df_end(getEntryNode(N));
+ return df_begin(getEntryNode(N));
}
static nodes_iterator nodes_end(PostDominatorTree *N) {
diff --git a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
index 886800d8a0f5..773784ac418c 100644
--- a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
+++ b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
@@ -170,11 +170,11 @@ public:
uint64_t getOrCompColdCountThreshold() const;
/// Returns HotCountThreshold if set.
uint64_t getHotCountThreshold() const {
- return HotCountThreshold.getValueOr(0);
+ return HotCountThreshold.value_or(0);
}
/// Returns ColdCountThreshold if set.
uint64_t getColdCountThreshold() const {
- return ColdCountThreshold.getValueOr(0);
+ return ColdCountThreshold.value_or(0);
}
private:
diff --git a/llvm/include/llvm/Analysis/PtrUseVisitor.h b/llvm/include/llvm/Analysis/PtrUseVisitor.h
index 78e9251da627..86206b2d5e9f 100644
--- a/llvm/include/llvm/Analysis/PtrUseVisitor.h
+++ b/llvm/include/llvm/Analysis/PtrUseVisitor.h
@@ -26,22 +26,15 @@
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/InstVisitor.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/Use.h"
-#include "llvm/IR/User.h"
-#include "llvm/Support/Casting.h"
-#include <algorithm>
#include <cassert>
#include <type_traits>
namespace llvm {
+class DataLayout;
+class Use;
namespace detail {
diff --git a/llvm/include/llvm/Analysis/RegionInfo.h b/llvm/include/llvm/Analysis/RegionInfo.h
index f93081d6f51d..612b977f1ffa 100644
--- a/llvm/include/llvm/Analysis/RegionInfo.h
+++ b/llvm/include/llvm/Analysis/RegionInfo.h
@@ -42,11 +42,9 @@
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Config/llvm-config.h"
-#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
-#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
#include <map>
@@ -58,6 +56,7 @@
namespace llvm {
+class BasicBlock;
class DominanceFrontier;
class Loop;
class LoopInfo;
@@ -67,6 +66,7 @@ template <class RegionTr> class RegionBase;
class RegionInfo;
template <class RegionTr> class RegionInfoBase;
class RegionNode;
+class raw_ostream;
// Class to be specialized for different users of RegionInfo
// (i.e. BasicBlocks or MachineBasicBlocks). This is only to avoid needing to
@@ -242,7 +242,7 @@ public:
///
/// You can obtain more examples by either calling
///
-/// <tt> "opt -regions -analyze anyprogram.ll" </tt>
+/// <tt> "opt -passes='print<regions>' anyprogram.ll" </tt>
/// or
/// <tt> "opt -view-regions-only anyprogram.ll" </tt>
///
diff --git a/llvm/include/llvm/Analysis/RegionInfoImpl.h b/llvm/include/llvm/Analysis/RegionInfoImpl.h
index b694effb2229..561702db3790 100644
--- a/llvm/include/llvm/Analysis/RegionInfoImpl.h
+++ b/llvm/include/llvm/Analysis/RegionInfoImpl.h
@@ -15,8 +15,6 @@
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/iterator_range.h"
-#include "llvm/Analysis/DominanceFrontier.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/RegionInfo.h"
@@ -24,7 +22,6 @@
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
#include <iterator>
@@ -37,6 +34,7 @@
#define DEBUG_TYPE "region"
namespace llvm {
+class raw_ostream;
//===----------------------------------------------------------------------===//
/// RegionBase Implementation
diff --git a/llvm/include/llvm/Analysis/RegionIterator.h b/llvm/include/llvm/Analysis/RegionIterator.h
index fecb28725dcc..ba28b1b902ea 100644
--- a/llvm/include/llvm/Analysis/RegionIterator.h
+++ b/llvm/include/llvm/Analysis/RegionIterator.h
@@ -15,7 +15,6 @@
#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/Analysis/RegionInfo.h"
-#include "llvm/IR/CFG.h"
#include <cassert>
#include <iterator>
#include <type_traits>
@@ -23,6 +22,7 @@
namespace llvm {
class BasicBlock;
+class RegionInfo;
//===----------------------------------------------------------------------===//
/// Hierarchical RegionNode successor iterator.
diff --git a/llvm/include/llvm/Analysis/RegionPass.h b/llvm/include/llvm/Analysis/RegionPass.h
index 5c7fa5f56693..dd5e6a1a3b24 100644
--- a/llvm/include/llvm/Analysis/RegionPass.h
+++ b/llvm/include/llvm/Analysis/RegionPass.h
@@ -15,7 +15,6 @@
#ifndef LLVM_ANALYSIS_REGIONPASS_H
#define LLVM_ANALYSIS_REGIONPASS_H
-#include "llvm/Analysis/RegionInfo.h"
#include "llvm/IR/LegacyPassManagers.h"
#include "llvm/Pass.h"
#include <deque>
@@ -23,6 +22,8 @@
namespace llvm {
class Function;
class RGPassManager;
+class Region;
+class RegionInfo;
//===----------------------------------------------------------------------===//
/// A pass that runs on each Region in a function.
diff --git a/llvm/include/llvm/Analysis/RegionPrinter.h b/llvm/include/llvm/Analysis/RegionPrinter.h
index 154ac35c486a..501a5406236e 100644
--- a/llvm/include/llvm/Analysis/RegionPrinter.h
+++ b/llvm/include/llvm/Analysis/RegionPrinter.h
@@ -14,6 +14,9 @@
#ifndef LLVM_ANALYSIS_REGIONPRINTER_H
#define LLVM_ANALYSIS_REGIONPRINTER_H
+#include "llvm/Analysis/DOTGraphTraitsPass.h"
+#include "llvm/Analysis/RegionInfo.h"
+
namespace llvm {
class FunctionPass;
class Function;
@@ -24,6 +27,13 @@ namespace llvm {
FunctionPass *createRegionPrinterPass();
FunctionPass *createRegionOnlyPrinterPass();
+ template <>
+ struct DOTGraphTraits<RegionNode *> : public DefaultDOTGraphTraits {
+ DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
+
+ std::string getNodeLabel(RegionNode *Node, RegionNode *Graph);
+ };
+
#ifndef NDEBUG
/// Open a viewer to display the GraphViz vizualization of the analysis
/// result.
diff --git a/llvm/include/llvm/Analysis/ReleaseModeModelRunner.h b/llvm/include/llvm/Analysis/ReleaseModeModelRunner.h
index 1bf2e853980c..bf1aaca2adbb 100644
--- a/llvm/include/llvm/Analysis/ReleaseModeModelRunner.h
+++ b/llvm/include/llvm/Analysis/ReleaseModeModelRunner.h
@@ -15,11 +15,12 @@
#define LLVM_ANALYSIS_RELEASEMODEMODELRUNNER_H
#include "llvm/Analysis/MLModelRunner.h"
+#include "llvm/Analysis/TensorSpec.h"
+#include "llvm/Support/ErrorHandling.h"
#include <memory>
#include <vector>
-using namespace llvm;
namespace llvm {
/// ReleaseModeModelRunner - production mode implementation of the
@@ -30,21 +31,20 @@ public:
/// FeatureNames' type should be an indexed collection of std::string, like
/// std::array or std::vector, that has a size() method.
template <class FType>
- ReleaseModeModelRunner(LLVMContext &Ctx, const FType &FeatureNames,
+ ReleaseModeModelRunner(LLVMContext &Ctx, const FType &InputSpec,
StringRef DecisionName, StringRef FeedPrefix = "feed_",
StringRef FetchPrefix = "fetch_")
- : MLModelRunner(Ctx, MLModelRunner::Kind::Release),
+ : MLModelRunner(Ctx, MLModelRunner::Kind::Release, InputSpec.size()),
CompiledModel(std::make_unique<TGen>()) {
assert(CompiledModel && "The CompiledModel should be valid");
- const size_t FeatureCount = FeatureNames.size();
- FeatureIndices.resize(FeatureCount);
-
- for (size_t I = 0; I < FeatureCount; ++I) {
+ for (size_t I = 0; I < InputSpec.size(); ++I) {
const int Index =
- CompiledModel->LookupArgIndex(FeedPrefix.str() + FeatureNames[I]);
- assert(Index >= 0 && "Cannot find Feature in inlining model");
- FeatureIndices[I] = Index;
+ CompiledModel->LookupArgIndex(FeedPrefix.str() + InputSpec[I].name());
+ void *Buffer = nullptr;
+ if (Index >= 0)
+ Buffer = CompiledModel->arg_data(Index);
+ setUpBufferForTensor(I, InputSpec[I], Buffer);
}
ResultIndex = CompiledModel->LookupResultIndex(FetchPrefix.str() +
@@ -64,15 +64,27 @@ private:
return CompiledModel->result_data(ResultIndex);
}
- void *getTensorUntyped(size_t Index) override {
- return reinterpret_cast<char *>(
- CompiledModel->arg_data(FeatureIndices[Index]));
- }
-
- std::vector<int32_t> FeatureIndices;
int32_t ResultIndex = -1;
std::unique_ptr<TGen> CompiledModel;
};
+
+/// A mock class satisfying the interface expected by ReleaseModeModelRunner for
+/// its `TGen` parameter. Useful to avoid conditional compilation complexity, as
+/// a compile-time replacement for a real AOT-ed model.
+class NoopSavedModelImpl final {
+#define NOOP_MODEL_ERRMSG \
+ "The mock AOT-ed saved model is a compile-time stub and should not be " \
+ "called."
+
+public:
+ NoopSavedModelImpl() = default;
+ int LookupArgIndex(const std::string &) { llvm_unreachable(NOOP_MODEL_ERRMSG); }
+ int LookupResultIndex(const std::string &) { llvm_unreachable(NOOP_MODEL_ERRMSG); }
+ void Run() { llvm_unreachable(NOOP_MODEL_ERRMSG); }
+ void *result_data(int) { llvm_unreachable(NOOP_MODEL_ERRMSG); }
+ void *arg_data(int) { llvm_unreachable(NOOP_MODEL_ERRMSG); }
+#undef NOOP_MODEL_ERRMSG
+};
} // namespace llvm
#endif // LLVM_ANALYSIS_RELEASEMODEMODELRUNNER_H
diff --git a/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h b/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h
index dc2efeafb568..0c5b566f60a4 100644
--- a/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h
@@ -11,11 +11,11 @@
#include "llvm/ADT/StringSet.h"
#include "llvm/Analysis/InlineAdvisor.h"
-#include "llvm/IR/LLVMContext.h"
namespace llvm {
class CallBase;
class Function;
+class LLVMContext;
class Module;
struct CallSiteFormat {
@@ -53,10 +53,12 @@ struct ReplayInlinerSettings {
/// Get call site location as a string with the given format
std::string formatCallSiteLocation(DebugLoc DLoc, const CallSiteFormat &Format);
-std::unique_ptr<InlineAdvisor> getReplayInlineAdvisor(
- Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context,
- std::unique_ptr<InlineAdvisor> OriginalAdvisor,
- const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks);
+std::unique_ptr<InlineAdvisor>
+getReplayInlineAdvisor(Module &M, FunctionAnalysisManager &FAM,
+ LLVMContext &Context,
+ std::unique_ptr<InlineAdvisor> OriginalAdvisor,
+ const ReplayInlinerSettings &ReplaySettings,
+ bool EmitRemarks, InlineContext IC);
/// Replay inline advisor that uses optimization remarks from inlining of
/// previous build to guide current inlining. This is useful for inliner tuning.
@@ -66,7 +68,7 @@ public:
LLVMContext &Context,
std::unique_ptr<InlineAdvisor> OriginalAdvisor,
const ReplayInlinerSettings &ReplaySettings,
- bool EmitRemarks);
+ bool EmitRemarks, InlineContext IC);
std::unique_ptr<InlineAdvice> getAdviceImpl(CallBase &CB) override;
bool areReplayRemarksLoaded() const { return HasReplayRemarks; }
diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index b16aa7017719..de1cc299f062 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -31,18 +31,12 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/ConstantRange.h"
-#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Operator.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/IR/ValueMap.h"
#include "llvm/Pass.h"
-#include "llvm/Support/Allocator.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/Compiler.h"
-#include <algorithm>
#include <cassert>
#include <cstdint>
#include <memory>
@@ -50,12 +44,14 @@
namespace llvm {
+class OverflowingBinaryOperator;
class AssumptionCache;
class BasicBlock;
class Constant;
class ConstantInt;
class DataLayout;
class DominatorTree;
+class Function;
class GEPOperator;
class Instruction;
class LLVMContext;
@@ -71,6 +67,8 @@ class Type;
class Value;
enum SCEVTypes : unsigned short;
+extern bool VerifySCEV;
+
/// This class represents an analyzed expression in the program. These are
/// opaque objects that the client is not allowed to do much with directly.
///
@@ -222,7 +220,7 @@ class SCEVPredicate : public FoldingSetNode {
FoldingSetNodeIDRef FastID;
public:
- enum SCEVPredicateKind { P_Union, P_Equal, P_Wrap };
+ enum SCEVPredicateKind { P_Union, P_Compare, P_Wrap };
protected:
SCEVPredicateKind Kind;
@@ -249,10 +247,6 @@ public:
/// Prints a textual representation of this predicate with an indentation of
/// \p Depth.
virtual void print(raw_ostream &OS, unsigned Depth = 0) const = 0;
-
- /// Returns the SCEV to which this predicate applies, or nullptr if this is
- /// a SCEVUnionPredicate.
- virtual const SCEV *getExpr() const = 0;
};
inline raw_ostream &operator<<(raw_ostream &OS, const SCEVPredicate &P) {
@@ -279,32 +273,35 @@ struct FoldingSetTrait<SCEVPredicate> : DefaultFoldingSetTrait<SCEVPredicate> {
}
};
-/// This class represents an assumption that two SCEV expressions are equal,
-/// and this can be checked at run-time.
-class SCEVEqualPredicate final : public SCEVPredicate {
- /// We assume that LHS == RHS.
+/// This class represents an assumption that the expression LHS Pred RHS
+/// evaluates to true, and this can be checked at run-time.
+class SCEVComparePredicate final : public SCEVPredicate {
+ /// We assume that LHS Pred RHS is true.
+ const ICmpInst::Predicate Pred;
const SCEV *LHS;
const SCEV *RHS;
public:
- SCEVEqualPredicate(const FoldingSetNodeIDRef ID, const SCEV *LHS,
- const SCEV *RHS);
+ SCEVComparePredicate(const FoldingSetNodeIDRef ID,
+ const ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS);
/// Implementation of the SCEVPredicate interface
bool implies(const SCEVPredicate *N) const override;
void print(raw_ostream &OS, unsigned Depth = 0) const override;
bool isAlwaysTrue() const override;
- const SCEV *getExpr() const override;
- /// Returns the left hand side of the equality.
+ ICmpInst::Predicate getPredicate() const { return Pred; }
+
+ /// Returns the left hand side of the predicate.
const SCEV *getLHS() const { return LHS; }
- /// Returns the right hand side of the equality.
+ /// Returns the right hand side of the predicate.
const SCEV *getRHS() const { return RHS; }
/// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const SCEVPredicate *P) {
- return P->getKind() == P_Equal;
+ return P->getKind() == P_Compare;
}
};
@@ -396,7 +393,7 @@ public:
IncrementWrapFlags getFlags() const { return Flags; }
/// Implementation of the SCEVPredicate interface
- const SCEV *getExpr() const override;
+ const SCEVAddRecExpr *getExpr() const;
bool implies(const SCEVPredicate *N) const override;
void print(raw_ostream &OS, unsigned Depth = 0) const override;
bool isAlwaysTrue() const override;
@@ -421,28 +418,20 @@ private:
/// Vector with references to all predicates in this union.
SmallVector<const SCEVPredicate *, 16> Preds;
- /// Maps SCEVs to predicates for quick look-ups.
- PredicateMap SCEVToPreds;
+ /// Adds a predicate to this union.
+ void add(const SCEVPredicate *N);
public:
- SCEVUnionPredicate();
+ SCEVUnionPredicate(ArrayRef<const SCEVPredicate *> Preds);
const SmallVectorImpl<const SCEVPredicate *> &getPredicates() const {
return Preds;
}
- /// Adds a predicate to this union.
- void add(const SCEVPredicate *N);
-
- /// Returns a reference to a vector containing all predicates which apply to
- /// \p Expr.
- ArrayRef<const SCEVPredicate *> getPredicatesForExpr(const SCEV *Expr);
-
/// Implementation of the SCEVPredicate interface
bool isAlwaysTrue() const override;
bool implies(const SCEVPredicate *N) const override;
void print(raw_ostream &OS, unsigned Depth) const override;
- const SCEV *getExpr() const override;
/// We estimate the complexity of a union predicate as the size number of
/// predicates in the union.
@@ -556,6 +545,10 @@ public:
/// Return true if the SCEV expression contains an undef value.
bool containsUndefs(const SCEV *S) const;
+ /// Return true if the SCEV expression contains a Value that has been
+ /// optimised out and is now a nullptr.
+ bool containsErasedValue(const SCEV *S) const;
+
/// Return a SCEV expression for the full generality of the specified
/// expression.
const SCEV *getSCEV(Value *V);
@@ -885,7 +878,7 @@ public:
/// the answer to be correct. Predicates can be checked with run-time
/// checks and can be used to perform loop versioning.
const SCEV *getPredicatedBackedgeTakenCount(const Loop *L,
- SCEVUnionPredicate &Predicates);
+ SmallVector<const SCEVPredicate *, 4> &Predicates);
/// When successful, this returns a SCEVConstant that is greater than or equal
/// to (i.e. a "conservative over-approximation") of the value returend by
@@ -1166,6 +1159,8 @@ public:
}
const SCEVPredicate *getEqualPredicate(const SCEV *LHS, const SCEV *RHS);
+ const SCEVPredicate *getComparePredicate(ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS);
const SCEVPredicate *
getWrapPredicate(const SCEVAddRecExpr *AR,
@@ -1173,7 +1168,7 @@ public:
/// Re-writes the SCEV according to the Predicates in \p A.
const SCEV *rewriteUsingPredicate(const SCEV *S, const Loop *L,
- SCEVUnionPredicate &A);
+ const SCEVPredicate &A);
/// Tries to convert the \p S expression to an AddRec expression,
/// adding additional predicates to \p Preds as required.
const SCEVAddRecExpr *convertSCEVToAddRecWithPredicates(
@@ -1256,30 +1251,11 @@ private:
HasRecMapType HasRecMap;
/// The type for ExprValueMap.
- using ValueOffsetPair = std::pair<Value *, ConstantInt *>;
- using ValueOffsetPairSetVector = SmallSetVector<ValueOffsetPair, 4>;
- using ExprValueMapType = DenseMap<const SCEV *, ValueOffsetPairSetVector>;
+ using ValueSetVector = SmallSetVector<Value *, 4>;
+ using ExprValueMapType = DenseMap<const SCEV *, ValueSetVector>;
/// ExprValueMap -- This map records the original values from which
/// the SCEV expr is generated from.
- ///
- /// We want to represent the mapping as SCEV -> ValueOffsetPair instead
- /// of SCEV -> Value:
- /// Suppose we know S1 expands to V1, and
- /// S1 = S2 + C_a
- /// S3 = S2 + C_b
- /// where C_a and C_b are different SCEVConstants. Then we'd like to
- /// expand S3 as V1 - C_a + C_b instead of expanding S2 literally.
- /// It is helpful when S2 is a complex SCEV expr.
- ///
- /// In order to do that, we represent ExprValueMap as a mapping from
- /// SCEV to ValueOffsetPair. We will save both S1->{V1, 0} and
- /// S2->{V1, C_a} into the map when we create SCEV for V1. When S3
- /// is expanded, it will first expand S2 to V1 - C_a because of
- /// S2->{V1, C_a} in the map, then expand S3 to V1 - C_a + C_b.
- ///
- /// Note: S->{V, Offset} in the ExprValueMap means S can be expanded
- /// to V - Offset.
ExprValueMapType ExprValueMap;
/// The type for ValueExprMap.
@@ -1310,7 +1286,7 @@ private:
DenseMap<const SCEV *, uint32_t> MinTrailingZerosCache;
/// Return the Value set from which the SCEV expr is generated.
- ValueOffsetPairSetVector *getSCEVValues(const SCEV *S);
+ ArrayRef<Value *> getSCEVValues(const SCEV *S);
/// Private helper method for the GetMinTrailingZeros method
uint32_t GetMinTrailingZerosImpl(const SCEV *S);
@@ -1369,17 +1345,17 @@ private:
PoisoningVH<BasicBlock> ExitingBlock;
const SCEV *ExactNotTaken;
const SCEV *MaxNotTaken;
- std::unique_ptr<SCEVUnionPredicate> Predicate;
+ SmallPtrSet<const SCEVPredicate *, 4> Predicates;
explicit ExitNotTakenInfo(PoisoningVH<BasicBlock> ExitingBlock,
const SCEV *ExactNotTaken,
const SCEV *MaxNotTaken,
- std::unique_ptr<SCEVUnionPredicate> Predicate)
+ const SmallPtrSet<const SCEVPredicate *, 4> &Predicates)
: ExitingBlock(ExitingBlock), ExactNotTaken(ExactNotTaken),
- MaxNotTaken(ExactNotTaken), Predicate(std::move(Predicate)) {}
+ MaxNotTaken(ExactNotTaken), Predicates(Predicates) {}
bool hasAlwaysTruePredicate() const {
- return !Predicate || Predicate->isAlwaysTrue();
+ return Predicates.empty();
}
};
@@ -1452,7 +1428,7 @@ private:
/// vector, this information can contain them and therefore a
/// SCEVPredicate argument should be added to getExact.
const SCEV *getExact(const Loop *L, ScalarEvolution *SE,
- SCEVUnionPredicate *Predicates = nullptr) const;
+ SmallVector<const SCEVPredicate *, 4> *Predicates = nullptr) const;
/// Return the number of times this loop exit may fall through to the back
/// edge, or SCEVCouldNotCompute. The loop is guaranteed not to exit via
@@ -1599,9 +1575,17 @@ private:
ConstantRange getRangeForUnknownRecurrence(const SCEVUnknown *U);
/// We know that there is no SCEV for the specified value. Analyze the
- /// expression.
+ /// expression recursively.
const SCEV *createSCEV(Value *V);
+ /// We know that there is no SCEV for the specified value. Create a new SCEV
+ /// for \p V iteratively.
+ const SCEV *createSCEVIter(Value *V);
+ /// Collect operands of \p V for which SCEV expressions should be constructed
+ /// first. Returns a SCEV directly if it can be constructed trivially for \p
+ /// V.
+ const SCEV *getOperandsToCreate(Value *V, SmallVectorImpl<Value *> &Ops);
+
/// Provide the special handling we need to analyze PHI SCEVs.
const SCEV *createNodeForPHI(PHINode *PN);
@@ -1619,8 +1603,22 @@ private:
/// is either a select instruction or a phi node). \p I is the instruction
/// being processed, and it is assumed equivalent to "Cond ? TrueVal :
/// FalseVal".
- const SCEV *createNodeForSelectOrPHI(Instruction *I, Value *Cond,
- Value *TrueVal, Value *FalseVal);
+ const SCEV *createNodeForSelectOrPHIInstWithICmpInstCond(Instruction *I,
+ ICmpInst *Cond,
+ Value *TrueVal,
+ Value *FalseVal);
+
+ /// See if we can model this select-like instruction via umin_seq expression.
+ const SCEV *createNodeForSelectOrPHIViaUMinSeq(Value *I, Value *Cond,
+ Value *TrueVal,
+ Value *FalseVal);
+
+ /// Given a value \p V, which is a select-like instruction (currently this is
+ /// either a select instruction or a phi node), which is assumed equivalent to
+ /// Cond ? TrueVal : FalseVal
+ /// see if we can model it as a SCEV expression.
+ const SCEV *createNodeForSelectOrPHI(Value *V, Value *Cond, Value *TrueVal,
+ Value *FalseVal);
/// Provide the special handling we need to analyze GEP SCEVs.
const SCEV *createNodeForGEP(GEPOperator *GEP);
@@ -2097,6 +2095,11 @@ private:
/// `UniqueSCEVs`. Return if found, else nullptr.
SCEV *findExistingSCEVInCache(SCEVTypes SCEVType, ArrayRef<const SCEV *> Ops);
+ /// Get reachable blocks in this function, making limited use of SCEV
+ /// reasoning about conditions.
+ void getReachableBlocks(SmallPtrSetImpl<BasicBlock *> &Reachable,
+ Function &F);
+
FoldingSet<SCEV> UniqueSCEVs;
FoldingSet<SCEVPredicate> UniquePreds;
BumpPtrAllocator SCEVAllocator;
@@ -2182,7 +2185,7 @@ class PredicatedScalarEvolution {
public:
PredicatedScalarEvolution(ScalarEvolution &SE, Loop &L);
- const SCEVUnionPredicate &getUnionPredicate() const;
+ const SCEVPredicate &getPredicate() const;
/// Returns the SCEV expression of V, in the context of the current SCEV
/// predicate. The order of transformations applied on the expression of V
@@ -2251,7 +2254,7 @@ private:
/// The SCEVPredicate that forms our context. We will rewrite all
/// expressions assuming that this predicate true.
- SCEVUnionPredicate Preds;
+ std::unique_ptr<SCEVUnionPredicate> Preds;
/// Marks the version of the SCEV predicate used. When rewriting a SCEV
/// expression we mark it with the version of the predicate. We use this to
diff --git a/llvm/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h b/llvm/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h
index ebd427354cee..15e27283021c 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h
@@ -14,13 +14,14 @@
#define LLVM_ANALYSIS_SCALAREVOLUTIONALIASANALYSIS_H
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
namespace llvm {
+class Function;
+class ScalarEvolution;
+class SCEV;
+
/// A simple alias analysis implementation that uses ScalarEvolution to answer
/// queries.
class SCEVAAResult : public AAResultBase<SCEVAAResult> {
diff --git a/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h b/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
index cd8e5fab6766..b29854cddc66 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
@@ -14,13 +14,11 @@
#define LLVM_ANALYSIS_SCALAREVOLUTIONEXPRESSIONS_H
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
@@ -31,9 +29,11 @@ namespace llvm {
class APInt;
class Constant;
+class ConstantInt;
class ConstantRange;
class Loop;
class Type;
+class Value;
enum SCEVTypes : unsigned short {
// These should be ordered in terms of increasing complexity to make the
@@ -699,8 +699,11 @@ public:
case scUMinExpr:
case scSequentialUMinExpr:
case scAddRecExpr:
- for (const auto *Op : cast<SCEVNAryExpr>(S)->operands())
+ for (const auto *Op : cast<SCEVNAryExpr>(S)->operands()) {
push(Op);
+ if (Visitor.isDone())
+ break;
+ }
continue;
case scUDivExpr: {
const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
diff --git a/llvm/include/llvm/Analysis/ScalarEvolutionNormalization.h b/llvm/include/llvm/Analysis/ScalarEvolutionNormalization.h
index 6ab92a3a977f..da420ff1e6d2 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolutionNormalization.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolutionNormalization.h
@@ -35,7 +35,7 @@
#ifndef LLVM_ANALYSIS_SCALAREVOLUTIONNORMALIZATION_H
#define LLVM_ANALYSIS_SCALAREVOLUTIONNORMALIZATION_H
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
namespace llvm {
diff --git a/llvm/include/llvm/Analysis/ScalarFuncs.def b/llvm/include/llvm/Analysis/ScalarFuncs.def
new file mode 100644
index 000000000000..2ed9be538091
--- /dev/null
+++ b/llvm/include/llvm/Analysis/ScalarFuncs.def
@@ -0,0 +1,117 @@
+//===-- ScalarFuncs.def - Library information ----------*- C++ -*----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// This .def file creates mapping from standard IEEE math functions
+// their corresponding entries in the IBM MASS (scalar) library.
+// LLVM intrinsic math functions will be handled in PPCISelLowing to
+// allow existing optimizations like pow(x,0.5) --> sqrt(x).
+
+#if defined(TLI_DEFINE_SCALAR_MASS_FUNCS)
+#define TLI_DEFINE_SCALAR_MASS_FUNC(SCAL, MASSENTRY) {SCAL, MASSENTRY},
+#endif
+
+TLI_DEFINE_SCALAR_MASS_FUNC("acosf", "__xl_acosf")
+TLI_DEFINE_SCALAR_MASS_FUNC("__acosf_finite", "__xl_acosf")
+TLI_DEFINE_SCALAR_MASS_FUNC("acos", "__xl_acos")
+TLI_DEFINE_SCALAR_MASS_FUNC("__acos_finite", "__xl_acos")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("acoshf", "__xl_acoshf")
+TLI_DEFINE_SCALAR_MASS_FUNC("__acoshf_finite", "__xl_acoshf")
+TLI_DEFINE_SCALAR_MASS_FUNC("acosh", "__xl_acosh")
+TLI_DEFINE_SCALAR_MASS_FUNC("__acosh_finite", "__xl_acosh")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("asinf", "__xl_asinf")
+TLI_DEFINE_SCALAR_MASS_FUNC("__asinf_finite", "__xl_asinf")
+TLI_DEFINE_SCALAR_MASS_FUNC("asin", "__xl_asin")
+TLI_DEFINE_SCALAR_MASS_FUNC("__asin_finite", "__xl_asin")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("asinhf", "__xl_asinhf")
+TLI_DEFINE_SCALAR_MASS_FUNC("asinh", "__xl_asinh")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("atanf", "__xl_atanf")
+TLI_DEFINE_SCALAR_MASS_FUNC("atan", "__xl_atan")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("atan2f", "__xl_atan2f")
+TLI_DEFINE_SCALAR_MASS_FUNC("__atan2f_finite", "__xl_atan2f")
+TLI_DEFINE_SCALAR_MASS_FUNC("atan2", "__xl_atan2")
+TLI_DEFINE_SCALAR_MASS_FUNC("__atan2_finite", "__xl_atan2")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("atanhf", "__xl_atanhf")
+TLI_DEFINE_SCALAR_MASS_FUNC("__atanhf_finite", "__xl_atanhf")
+TLI_DEFINE_SCALAR_MASS_FUNC("atanh", "__xl_atanh")
+TLI_DEFINE_SCALAR_MASS_FUNC("__atanh_finite", "__xl_atanh")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("cbrtf", "__xl_cbrtf")
+TLI_DEFINE_SCALAR_MASS_FUNC("cbrt", "__xl_cbrt")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("cosf", "__xl_cosf")
+TLI_DEFINE_SCALAR_MASS_FUNC("cos", "__xl_cos")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("coshf", "__xl_coshf")
+TLI_DEFINE_SCALAR_MASS_FUNC("__coshf_finite", "__xl_coshf")
+TLI_DEFINE_SCALAR_MASS_FUNC("cosh", "__xl_cosh")
+TLI_DEFINE_SCALAR_MASS_FUNC("__cosh_finite", "__xl_cosh")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("erff", "__xl_erff")
+TLI_DEFINE_SCALAR_MASS_FUNC("erf", "__xl_erf")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("erfcf", "__xl_erfcf")
+TLI_DEFINE_SCALAR_MASS_FUNC("erfc", "__xl_erfc")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("expf", "__xl_expf")
+TLI_DEFINE_SCALAR_MASS_FUNC("__expf_finite", "__xl_expf")
+TLI_DEFINE_SCALAR_MASS_FUNC("exp", "__xl_exp")
+TLI_DEFINE_SCALAR_MASS_FUNC("__exp_finite", "__xl_exp")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("expm1f", "__xl_expm1f")
+TLI_DEFINE_SCALAR_MASS_FUNC("expm1", "__xl_expm1")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("hypotf", "__xl_hypotf")
+TLI_DEFINE_SCALAR_MASS_FUNC("hypot", "__xl_hypot")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("lgammaf", "__xl_lgammaf")
+TLI_DEFINE_SCALAR_MASS_FUNC("lgamma", "__xl_lgamma")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("logf", "__xl_logf")
+TLI_DEFINE_SCALAR_MASS_FUNC("__logf_finite", "__xl_logf")
+TLI_DEFINE_SCALAR_MASS_FUNC("log", "__xl_log")
+TLI_DEFINE_SCALAR_MASS_FUNC("__log_finite", "__xl_log")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("log10f", "__xl_log10f")
+TLI_DEFINE_SCALAR_MASS_FUNC("__log10f_finite", "__xl_log10f")
+TLI_DEFINE_SCALAR_MASS_FUNC("log10", "__xl_log10")
+TLI_DEFINE_SCALAR_MASS_FUNC("__log10_finite", "__xl_log10")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("log1pf", "__xl_log1pf")
+TLI_DEFINE_SCALAR_MASS_FUNC("log1p", "__xl_log1p")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("powf", "__xl_powf")
+TLI_DEFINE_SCALAR_MASS_FUNC("__powf_finite", "__xl_powf")
+TLI_DEFINE_SCALAR_MASS_FUNC("pow", "__xl_pow")
+TLI_DEFINE_SCALAR_MASS_FUNC("__pow_finite", "__xl_pow")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("rsqrt", "__xl_rsqrt")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("sinf", "__xl_sinf")
+TLI_DEFINE_SCALAR_MASS_FUNC("sin", "__xl_sin")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("sinhf", "__xl_sinhf")
+TLI_DEFINE_SCALAR_MASS_FUNC("__sinhf_finite", "__xl_sinhf")
+TLI_DEFINE_SCALAR_MASS_FUNC("sinh", "__xl_sinh")
+TLI_DEFINE_SCALAR_MASS_FUNC("__sinh_finite", "__xl_sinh")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("sqrt", "__xl_sqrt")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("tanf", "__xl_tanf")
+TLI_DEFINE_SCALAR_MASS_FUNC("tan", "__xl_tan")
+
+TLI_DEFINE_SCALAR_MASS_FUNC("tanhf", "__xl_tanhf")
+TLI_DEFINE_SCALAR_MASS_FUNC("tanh", "__xl_tanh")
+
+#undef TLI_DEFINE_SCALAR_MASS_FUNCS
+#undef TLI_DEFINE_SCALAR_MASS_FUNC
diff --git a/llvm/include/llvm/Analysis/SparsePropagation.h b/llvm/include/llvm/Analysis/SparsePropagation.h
index 6eb6d5518a41..428238c5fa0b 100644
--- a/llvm/include/llvm/Analysis/SparsePropagation.h
+++ b/llvm/include/llvm/Analysis/SparsePropagation.h
@@ -15,6 +15,7 @@
#define LLVM_ANALYSIS_SPARSEPROPAGATION_H
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Support/Debug.h"
#include <set>
diff --git a/llvm/include/llvm/Analysis/StackLifetime.h b/llvm/include/llvm/Analysis/StackLifetime.h
index 239aec4e258b..7fd88362276a 100644
--- a/llvm/include/llvm/Analysis/StackLifetime.h
+++ b/llvm/include/llvm/Analysis/StackLifetime.h
@@ -14,10 +14,8 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Support/raw_ostream.h"
-#include <cassert>
#include <utility>
namespace llvm {
@@ -26,6 +24,7 @@ class AllocaInst;
class BasicBlock;
class Function;
class Instruction;
+class IntrinsicInst;
/// Compute live ranges of allocas.
/// Live ranges are represented as sets of "interesting" instructions, which are
diff --git a/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h b/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h
index cfc1e20255d1..e6e3efbe0fcb 100644
--- a/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h
+++ b/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h
@@ -16,18 +16,18 @@
#ifndef LLVM_ANALYSIS_SYNCDEPENDENCEANALYSIS_H
#define LLVM_ANALYSIS_SYNCDEPENDENCEANALYSIS_H
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Analysis/LoopInfo.h"
#include <map>
#include <memory>
#include <unordered_map>
+#include <vector>
namespace llvm {
class BasicBlock;
class DominatorTree;
+class Instruction;
+class LoopInfo;
class PostDominatorTree;
using ConstBlockSet = SmallPtrSet<const BasicBlock *, 4>;
diff --git a/llvm/include/llvm/Analysis/SyntheticCountsUtils.h b/llvm/include/llvm/Analysis/SyntheticCountsUtils.h
index f9bac739cee6..458b599f2937 100644
--- a/llvm/include/llvm/Analysis/SyntheticCountsUtils.h
+++ b/llvm/include/llvm/Analysis/SyntheticCountsUtils.h
@@ -13,7 +13,7 @@
#ifndef LLVM_ANALYSIS_SYNTHETICCOUNTSUTILS_H
#define LLVM_ANALYSIS_SYNTHETICCOUNTSUTILS_H
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Support/ScaledNumber.h"
diff --git a/llvm/include/llvm/Analysis/TargetFolder.h b/llvm/include/llvm/Analysis/TargetFolder.h
index 1df0530e40e6..3a7218b10b97 100644
--- a/llvm/include/llvm/Analysis/TargetFolder.h
+++ b/llvm/include/llvm/Analysis/TargetFolder.h
@@ -21,12 +21,14 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/IRBuilderFolder.h"
+#include "llvm/IR/Operator.h"
namespace llvm {
+class Constant;
class DataLayout;
+class Type;
/// TargetFolder - Create constants with target dependent folding.
class TargetFolder final : public IRBuilderFolder {
@@ -48,31 +50,45 @@ public:
// Return an existing value or a constant if the operation can be simplified.
// Otherwise return nullptr.
//===--------------------------------------------------------------------===//
- Value *FoldAdd(Value *LHS, Value *RHS, bool HasNUW = false,
- bool HasNSW = false) const override {
+
+ Value *FoldBinOp(Instruction::BinaryOps Opc, Value *LHS,
+ Value *RHS) const override {
auto *LC = dyn_cast<Constant>(LHS);
auto *RC = dyn_cast<Constant>(RHS);
if (LC && RC)
- return Fold(ConstantExpr::getAdd(LC, RC, HasNUW, HasNSW));
+ return Fold(ConstantExpr::get(Opc, LC, RC));
return nullptr;
}
- Value *FoldAnd(Value *LHS, Value *RHS) const override {
+ Value *FoldExactBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS,
+ bool IsExact) const override {
auto *LC = dyn_cast<Constant>(LHS);
auto *RC = dyn_cast<Constant>(RHS);
if (LC && RC)
- return Fold(ConstantExpr::getAnd(LC, RC));
+ return Fold(ConstantExpr::get(
+ Opc, LC, RC, IsExact ? PossiblyExactOperator::IsExact : 0));
return nullptr;
}
- Value *FoldOr(Value *LHS, Value *RHS) const override {
+ Value *FoldNoWrapBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS,
+ bool HasNUW, bool HasNSW) const override {
auto *LC = dyn_cast<Constant>(LHS);
auto *RC = dyn_cast<Constant>(RHS);
- if (LC && RC)
- return Fold(ConstantExpr::getOr(LC, RC));
+ if (LC && RC) {
+ unsigned Flags = 0;
+ if (HasNUW)
+ Flags |= OverflowingBinaryOperator::NoUnsignedWrap;
+ if (HasNSW)
+ Flags |= OverflowingBinaryOperator::NoSignedWrap;
+ return Fold(ConstantExpr::get(Opc, LC, RC, Flags));
+ }
return nullptr;
}
+ Value *FoldBinOpFMF(Instruction::BinaryOps Opc, Value *LHS, Value *RHS,
+ FastMathFlags FMF) const override {
+ return FoldBinOp(Opc, LHS, RHS);
+ }
Value *FoldICmp(CmpInst::Predicate P, Value *LHS, Value *RHS) const override {
auto *LC = dyn_cast<Constant>(LHS);
auto *RC = dyn_cast<Constant>(RHS);
@@ -105,82 +121,56 @@ public:
return nullptr;
}
- //===--------------------------------------------------------------------===//
- // Binary Operators
- //===--------------------------------------------------------------------===//
-
- Constant *CreateFAdd(Constant *LHS, Constant *RHS) const override {
- return Fold(ConstantExpr::getFAdd(LHS, RHS));
- }
- Constant *CreateSub(Constant *LHS, Constant *RHS,
- bool HasNUW = false, bool HasNSW = false) const override {
- return Fold(ConstantExpr::getSub(LHS, RHS, HasNUW, HasNSW));
- }
- Constant *CreateFSub(Constant *LHS, Constant *RHS) const override {
- return Fold(ConstantExpr::getFSub(LHS, RHS));
- }
- Constant *CreateMul(Constant *LHS, Constant *RHS,
- bool HasNUW = false, bool HasNSW = false) const override {
- return Fold(ConstantExpr::getMul(LHS, RHS, HasNUW, HasNSW));
- }
- Constant *CreateFMul(Constant *LHS, Constant *RHS) const override {
- return Fold(ConstantExpr::getFMul(LHS, RHS));
- }
- Constant *CreateUDiv(Constant *LHS, Constant *RHS,
- bool isExact = false) const override {
- return Fold(ConstantExpr::getUDiv(LHS, RHS, isExact));
- }
- Constant *CreateSDiv(Constant *LHS, Constant *RHS,
- bool isExact = false) const override {
- return Fold(ConstantExpr::getSDiv(LHS, RHS, isExact));
- }
- Constant *CreateFDiv(Constant *LHS, Constant *RHS) const override {
- return Fold(ConstantExpr::getFDiv(LHS, RHS));
- }
- Constant *CreateURem(Constant *LHS, Constant *RHS) const override {
- return Fold(ConstantExpr::getURem(LHS, RHS));
- }
- Constant *CreateSRem(Constant *LHS, Constant *RHS) const override {
- return Fold(ConstantExpr::getSRem(LHS, RHS));
- }
- Constant *CreateFRem(Constant *LHS, Constant *RHS) const override {
- return Fold(ConstantExpr::getFRem(LHS, RHS));
- }
- Constant *CreateShl(Constant *LHS, Constant *RHS,
- bool HasNUW = false, bool HasNSW = false) const override {
- return Fold(ConstantExpr::getShl(LHS, RHS, HasNUW, HasNSW));
- }
- Constant *CreateLShr(Constant *LHS, Constant *RHS,
- bool isExact = false) const override {
- return Fold(ConstantExpr::getLShr(LHS, RHS, isExact));
+ Value *FoldExtractValue(Value *Agg,
+ ArrayRef<unsigned> IdxList) const override {
+ if (auto *CAgg = dyn_cast<Constant>(Agg))
+ return ConstantFoldExtractValueInstruction(CAgg, IdxList);
+ return nullptr;
+ };
+
+ Value *FoldInsertValue(Value *Agg, Value *Val,
+ ArrayRef<unsigned> IdxList) const override {
+ auto *CAgg = dyn_cast<Constant>(Agg);
+ auto *CVal = dyn_cast<Constant>(Val);
+ if (CAgg && CVal)
+ return ConstantFoldInsertValueInstruction(CAgg, CVal, IdxList);
+ return nullptr;
}
- Constant *CreateAShr(Constant *LHS, Constant *RHS,
- bool isExact = false) const override {
- return Fold(ConstantExpr::getAShr(LHS, RHS, isExact));
+
+ Value *FoldExtractElement(Value *Vec, Value *Idx) const override {
+ auto *CVec = dyn_cast<Constant>(Vec);
+ auto *CIdx = dyn_cast<Constant>(Idx);
+ if (CVec && CIdx)
+ return Fold(ConstantExpr::getExtractElement(CVec, CIdx));
+ return nullptr;
}
- Constant *CreateXor(Constant *LHS, Constant *RHS) const override {
- return Fold(ConstantExpr::getXor(LHS, RHS));
+
+ Value *FoldInsertElement(Value *Vec, Value *NewElt,
+ Value *Idx) const override {
+ auto *CVec = dyn_cast<Constant>(Vec);
+ auto *CNewElt = dyn_cast<Constant>(NewElt);
+ auto *CIdx = dyn_cast<Constant>(Idx);
+ if (CVec && CNewElt && CIdx)
+ return Fold(ConstantExpr::getInsertElement(CVec, CNewElt, CIdx));
+ return nullptr;
}
- Constant *CreateBinOp(Instruction::BinaryOps Opc,
- Constant *LHS, Constant *RHS) const override {
- return Fold(ConstantExpr::get(Opc, LHS, RHS));
+ Value *FoldShuffleVector(Value *V1, Value *V2,
+ ArrayRef<int> Mask) const override {
+ auto *C1 = dyn_cast<Constant>(V1);
+ auto *C2 = dyn_cast<Constant>(V2);
+ if (C1 && C2)
+ return Fold(ConstantExpr::getShuffleVector(C1, C2, Mask));
+ return nullptr;
}
//===--------------------------------------------------------------------===//
// Unary Operators
//===--------------------------------------------------------------------===//
- Constant *CreateNeg(Constant *C,
- bool HasNUW = false, bool HasNSW = false) const override {
- return Fold(ConstantExpr::getNeg(C, HasNUW, HasNSW));
- }
Constant *CreateFNeg(Constant *C) const override {
return Fold(ConstantExpr::getFNeg(C));
}
- Constant *CreateNot(Constant *C) const override {
- return Fold(ConstantExpr::getNot(C));
- }
Constant *CreateUnOp(Instruction::UnaryOps Opc, Constant *C) const override {
return Fold(ConstantExpr::get(Opc, C));
@@ -252,34 +242,6 @@ public:
Constant *RHS) const override {
return Fold(ConstantExpr::getCompare(P, LHS, RHS));
}
-
- //===--------------------------------------------------------------------===//
- // Other Instructions
- //===--------------------------------------------------------------------===//
-
- Constant *CreateExtractElement(Constant *Vec, Constant *Idx) const override {
- return Fold(ConstantExpr::getExtractElement(Vec, Idx));
- }
-
- Constant *CreateInsertElement(Constant *Vec, Constant *NewElt,
- Constant *Idx) const override {
- return Fold(ConstantExpr::getInsertElement(Vec, NewElt, Idx));
- }
-
- Constant *CreateShuffleVector(Constant *V1, Constant *V2,
- ArrayRef<int> Mask) const override {
- return Fold(ConstantExpr::getShuffleVector(V1, V2, Mask));
- }
-
- Constant *CreateExtractValue(Constant *Agg,
- ArrayRef<unsigned> IdxList) const override {
- return Fold(ConstantExpr::getExtractValue(Agg, IdxList));
- }
-
- Constant *CreateInsertValue(Constant *Agg, Constant *Val,
- ArrayRef<unsigned> IdxList) const override {
- return Fold(ConstantExpr::getInsertValue(Agg, Val, IdxList));
- }
};
}
diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
index 17d1e3f770c1..7bfda0124de7 100644
--- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h
+++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
@@ -12,14 +12,15 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Optional.h"
-#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
namespace llvm {
+
template <typename T> class ArrayRef;
+class Function;
+class Module;
class Triple;
/// Describes a possible vectorization of a function.
@@ -49,7 +50,7 @@ class TargetLibraryInfoImpl {
friend class TargetLibraryInfo;
unsigned char AvailableArray[(NumLibFuncs+3)/4];
- llvm::DenseMap<unsigned, std::string> CustomNames;
+ DenseMap<unsigned, std::string> CustomNames;
static StringLiteral const StandardNames[NumLibFuncs];
bool ShouldExtI32Param, ShouldExtI32Return, ShouldSignExtI32Param;
unsigned SizeOfInt;
@@ -279,6 +280,13 @@ public:
return B == OverrideAsUnavailable;
}
+ /// Return true if the function type FTy is valid for the library function
+ /// F, regardless of whether the function is available.
+ bool isValidProtoForLibFunc(const FunctionType &FTy, LibFunc F,
+ const Module &M) const {
+ return Impl->isValidProtoForLibFunc(FTy, F, M);
+ }
+
/// Searches for a particular function name.
///
/// If it is one of the known library functions, return true and set F to the
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 7412e050322e..372f17cfc7ff 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -21,13 +21,13 @@
#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/IR/FMF.h"
#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Operator.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/BranchProbability.h"
-#include "llvm/Support/DataTypes.h"
#include "llvm/Support/InstructionCost.h"
#include <functional>
#include <utility>
@@ -617,8 +617,8 @@ public:
Instruction *I = nullptr) const;
/// Return true if LSR cost of C1 is lower than C1.
- bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
- TargetTransformInfo::LSRCost &C2) const;
+ bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
+ const TargetTransformInfo::LSRCost &C2) const;
/// Return true if LSR major cost is number of registers. Targets which
/// implement their own isLSRCostLess and unset number of registers as major
@@ -659,6 +659,10 @@ public:
/// Return true if the target supports nontemporal load.
bool isLegalNTLoad(Type *DataType, Align Alignment) const;
+ /// \Returns true if the target supports broadcasting a load to a vector of
+ /// type <NumElements x ElementTy>.
+ bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const;
+
/// Return true if the target supports masked scatter.
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const;
/// Return true if the target supports masked gather.
@@ -675,6 +679,16 @@ public:
/// Return true if the target supports masked expand load.
bool isLegalMaskedExpandLoad(Type *DataType) const;
+ /// Return true if this is an alternating opcode pattern that can be lowered
+ /// to a single instruction on the target. In X86 this is for the addsub
+ /// instruction which corrsponds to a Shuffle + Fadd + FSub pattern in IR.
+ /// This function expectes two opcodes: \p Opcode1 and \p Opcode2 being
+ /// selected by \p OpcodeMask. The mask contains one bit per lane and is a `0`
+ /// when \p Opcode0 is selected and `1` when Opcode1 is selected.
+ /// \p VecTy is the vector type of the instruction to be generated.
+ bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
+ const SmallBitVector &OpcodeMask) const;
+
/// Return true if we should be enabling ordered reductions for the target.
bool enableOrderedReductions() const;
@@ -727,7 +741,7 @@ public:
bool isTypeLegal(Type *Ty) const;
/// Returns the estimated number of registers required to represent \p Ty.
- InstructionCost getRegUsageForType(Type *Ty) const;
+ unsigned getRegUsageForType(Type *Ty) const;
/// Return true if switches should be turned into lookup tables for the
/// target.
@@ -762,6 +776,9 @@ public:
/// the scalarization cost of a load/store.
bool supportsEfficientVectorElementLoadStore() const;
+ /// If the target supports tail calls.
+ bool supportsTailCalls() const;
+
/// Don't restrict interleaved unrolling to small loops.
bool enableAggressiveInterleaving(bool LoopHasReductions) const;
@@ -934,7 +951,8 @@ public:
/// creating vectors that span multiple vector registers.
/// If false, the vectorization factor will be chosen based on the
/// size of the widest element type.
- bool shouldMaximizeVectorBandwidth() const;
+ /// \p K Register Kind for vectorization.
+ bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const;
/// \return The minimum vectorization factor for types of given element
/// bit width, or 0 if there is no minimum VF. The returned value only
@@ -947,6 +965,17 @@ public:
/// Currently only used by the SLP vectorizer.
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;
+ /// \return The minimum vectorization factor for the store instruction. Given
+ /// the initial estimation of the minimum vector factor and store value type,
+ /// it tries to find possible lowest VF, which still might be profitable for
+ /// the vectorization.
+ /// \param VF Initial estimation of the minimum vector factor.
+ /// \param ScalarMemTy Scalar memory type of the store operation.
+ /// \param ScalarValTy Scalar type of the stored value.
+ /// Currently only used by the SLP vectorizer.
+ unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
+ Type *ScalarValTy) const;
+
/// \return True if it should be considered for address type promotion.
/// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
/// profitable without finding other extensions fed by the same input.
@@ -1045,11 +1074,14 @@ public:
/// The exact mask may be passed as Mask, or else the array will be empty.
/// The index and subtype parameters are used by the subvector insertion and
/// extraction shuffle kinds to show the insert/extract point and the type of
- /// the subvector being inserted/extracted.
+ /// the subvector being inserted/extracted. The operands of the shuffle can be
+ /// passed through \p Args, which helps improve the cost estimation in some
+ /// cases, like in broadcast loads.
/// NOTE: For subvector extractions Tp represents the source type.
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
ArrayRef<int> Mask = None, int Index = 0,
- VectorType *SubTp = nullptr) const;
+ VectorType *SubTp = nullptr,
+ ArrayRef<const Value *> Args = None) const;
/// Represents a hint about the context in which a cast is used.
///
@@ -1283,9 +1315,11 @@ public:
Type *ExpectedType) const;
/// \returns The type to use in a loop expansion of a memcpy call.
- Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
- unsigned SrcAddrSpace, unsigned DestAddrSpace,
- unsigned SrcAlign, unsigned DestAlign) const;
+ Type *
+ getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
+ unsigned SrcAddrSpace, unsigned DestAddrSpace,
+ unsigned SrcAlign, unsigned DestAlign,
+ Optional<uint32_t> AtomicElementSize = None) const;
/// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
/// \param RemainingBytes The number of bytes to copy.
@@ -1296,7 +1330,8 @@ public:
void getMemcpyLoopResidualLoweringType(
SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
- unsigned SrcAlign, unsigned DestAlign) const;
+ unsigned SrcAlign, unsigned DestAlign,
+ Optional<uint32_t> AtomicCpySize = None) const;
/// \returns True if the two functions have compatible attributes for inlining
/// purposes.
@@ -1536,8 +1571,8 @@ public:
int64_t BaseOffset, bool HasBaseReg,
int64_t Scale, unsigned AddrSpace,
Instruction *I) = 0;
- virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
- TargetTransformInfo::LSRCost &C2) = 0;
+ virtual bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
+ const TargetTransformInfo::LSRCost &C2) = 0;
virtual bool isNumRegsMajorCostOfLSR() = 0;
virtual bool isProfitableLSRChainElement(Instruction *I) = 0;
virtual bool canMacroFuseCmp() = 0;
@@ -1550,6 +1585,8 @@ public:
virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0;
virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;
virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0;
+ virtual bool isLegalBroadcastLoad(Type *ElementTy,
+ ElementCount NumElements) const = 0;
virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) = 0;
virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0;
virtual bool forceScalarizeMaskedGather(VectorType *DataType,
@@ -1558,6 +1595,9 @@ public:
Align Alignment) = 0;
virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
+ virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0,
+ unsigned Opcode1,
+ const SmallBitVector &OpcodeMask) const = 0;
virtual bool enableOrderedReductions() = 0;
virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
@@ -1571,7 +1611,7 @@ public:
virtual bool isProfitableToHoist(Instruction *I) = 0;
virtual bool useAA() = 0;
virtual bool isTypeLegal(Type *Ty) = 0;
- virtual InstructionCost getRegUsageForType(Type *Ty) = 0;
+ virtual unsigned getRegUsageForType(Type *Ty) = 0;
virtual bool shouldBuildLookupTables() = 0;
virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
virtual bool shouldBuildRelLookupTables() = 0;
@@ -1584,6 +1624,7 @@ public:
getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
ArrayRef<Type *> Tys) = 0;
virtual bool supportsEfficientVectorElementLoadStore() = 0;
+ virtual bool supportsTailCalls() = 0;
virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
virtual MemCmpExpansionOptions
enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0;
@@ -1618,10 +1659,13 @@ public:
virtual unsigned getMinVectorRegisterBitWidth() const = 0;
virtual Optional<unsigned> getMaxVScale() const = 0;
virtual Optional<unsigned> getVScaleForTuning() const = 0;
- virtual bool shouldMaximizeVectorBandwidth() const = 0;
+ virtual bool
+ shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const = 0;
virtual ElementCount getMinimumVF(unsigned ElemWidth,
bool IsScalable) const = 0;
virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = 0;
+ virtual unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
+ Type *ScalarValTy) const = 0;
virtual bool shouldConsiderAddressTypePromotion(
const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
virtual unsigned getCacheLineSize() const = 0;
@@ -1660,7 +1704,8 @@ public:
ArrayRef<const Value *> Args, const Instruction *CxtI = nullptr) = 0;
virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
ArrayRef<int> Mask, int Index,
- VectorType *SubTp) = 0;
+ VectorType *SubTp,
+ ArrayRef<const Value *> Args) = 0;
virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
Type *Src, CastContextHint CCH,
TTI::TargetCostKind CostKind,
@@ -1734,15 +1779,17 @@ public:
virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
Type *ExpectedType) = 0;
- virtual Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
- unsigned SrcAddrSpace,
- unsigned DestAddrSpace,
- unsigned SrcAlign,
- unsigned DestAlign) const = 0;
+ virtual Type *
+ getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
+ unsigned SrcAddrSpace, unsigned DestAddrSpace,
+ unsigned SrcAlign, unsigned DestAlign,
+ Optional<uint32_t> AtomicElementSize) const = 0;
+
virtual void getMemcpyLoopResidualLoweringType(
SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
- unsigned SrcAlign, unsigned DestAlign) const = 0;
+ unsigned SrcAlign, unsigned DestAlign,
+ Optional<uint32_t> AtomicCpySize) const = 0;
virtual bool areInlineCompatible(const Function *Caller,
const Function *Callee) const = 0;
virtual bool areTypesABICompatible(const Function *Caller,
@@ -1920,8 +1967,8 @@ public:
return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
AddrSpace, I);
}
- bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
- TargetTransformInfo::LSRCost &C2) override {
+ bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
+ const TargetTransformInfo::LSRCost &C2) override {
return Impl.isLSRCostLess(C1, C2);
}
bool isNumRegsMajorCostOfLSR() override {
@@ -1953,6 +2000,10 @@ public:
bool isLegalNTLoad(Type *DataType, Align Alignment) override {
return Impl.isLegalNTLoad(DataType, Alignment);
}
+ bool isLegalBroadcastLoad(Type *ElementTy,
+ ElementCount NumElements) const override {
+ return Impl.isLegalBroadcastLoad(ElementTy, NumElements);
+ }
bool isLegalMaskedScatter(Type *DataType, Align Alignment) override {
return Impl.isLegalMaskedScatter(DataType, Alignment);
}
@@ -1973,6 +2024,10 @@ public:
bool isLegalMaskedExpandLoad(Type *DataType) override {
return Impl.isLegalMaskedExpandLoad(DataType);
}
+ bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
+ const SmallBitVector &OpcodeMask) const override {
+ return Impl.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask);
+ }
bool enableOrderedReductions() override {
return Impl.enableOrderedReductions();
}
@@ -2001,7 +2056,7 @@ public:
}
bool useAA() override { return Impl.useAA(); }
bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
- InstructionCost getRegUsageForType(Type *Ty) override {
+ unsigned getRegUsageForType(Type *Ty) override {
return Impl.getRegUsageForType(Ty);
}
bool shouldBuildLookupTables() override {
@@ -2032,6 +2087,8 @@ public:
return Impl.supportsEfficientVectorElementLoadStore();
}
+ bool supportsTailCalls() override { return Impl.supportsTailCalls(); }
+
bool enableAggressiveInterleaving(bool LoopHasReductions) override {
return Impl.enableAggressiveInterleaving(LoopHasReductions);
}
@@ -2108,8 +2165,9 @@ public:
Optional<unsigned> getVScaleForTuning() const override {
return Impl.getVScaleForTuning();
}
- bool shouldMaximizeVectorBandwidth() const override {
- return Impl.shouldMaximizeVectorBandwidth();
+ bool shouldMaximizeVectorBandwidth(
+ TargetTransformInfo::RegisterKind K) const override {
+ return Impl.shouldMaximizeVectorBandwidth(K);
}
ElementCount getMinimumVF(unsigned ElemWidth,
bool IsScalable) const override {
@@ -2118,6 +2176,10 @@ public:
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override {
return Impl.getMaximumVF(ElemWidth, Opcode);
}
+ unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
+ Type *ScalarValTy) const override {
+ return Impl.getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
+ }
bool shouldConsiderAddressTypePromotion(
const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
return Impl.shouldConsiderAddressTypePromotion(
@@ -2180,8 +2242,9 @@ public:
}
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
ArrayRef<int> Mask, int Index,
- VectorType *SubTp) override {
- return Impl.getShuffleCost(Kind, Tp, Mask, Index, SubTp);
+ VectorType *SubTp,
+ ArrayRef<const Value *> Args) override {
+ return Impl.getShuffleCost(Kind, Tp, Mask, Index, SubTp, Args);
}
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
CastContextHint CCH,
@@ -2298,20 +2361,22 @@ public:
Type *ExpectedType) override {
return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
}
- Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
- unsigned SrcAddrSpace, unsigned DestAddrSpace,
- unsigned SrcAlign,
- unsigned DestAlign) const override {
+ Type *getMemcpyLoopLoweringType(
+ LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
+ unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
+ Optional<uint32_t> AtomicElementSize) const override {
return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
- DestAddrSpace, SrcAlign, DestAlign);
+ DestAddrSpace, SrcAlign, DestAlign,
+ AtomicElementSize);
}
void getMemcpyLoopResidualLoweringType(
SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
- unsigned SrcAlign, unsigned DestAlign) const override {
+ unsigned SrcAlign, unsigned DestAlign,
+ Optional<uint32_t> AtomicCpySize) const override {
Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
SrcAddrSpace, DestAddrSpace,
- SrcAlign, DestAlign);
+ SrcAlign, DestAlign, AtomicCpySize);
}
bool areInlineCompatible(const Function *Caller,
const Function *Callee) const override {
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index a32744f8d58b..a70c418974f5 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -18,18 +18,16 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
-#include "llvm/IR/Type.h"
#include <utility>
-using namespace llvm::PatternMatch;
-
namespace llvm {
+class Function;
+
/// Base class for use as a mix-in that aids implementing
/// a TargetTransformInfo-compatible class.
class TargetTransformInfoImplBase {
@@ -212,7 +210,7 @@ public:
return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
}
- bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) const {
+ bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const {
return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds,
C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds,
@@ -258,6 +256,10 @@ public:
return Alignment >= DataSize && isPowerOf2_32(DataSize);
}
+ bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const {
+ return false;
+ }
+
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const {
return false;
}
@@ -277,6 +279,11 @@ public:
bool isLegalMaskedCompressStore(Type *DataType) const { return false; }
+ bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
+ const SmallBitVector &OpcodeMask) const {
+ return false;
+ }
+
bool isLegalMaskedExpandLoad(Type *DataType) const { return false; }
bool enableOrderedReductions() const { return false; }
@@ -310,7 +317,7 @@ public:
bool isTypeLegal(Type *Ty) const { return false; }
- InstructionCost getRegUsageForType(Type *Ty) const { return 1; }
+ unsigned getRegUsageForType(Type *Ty) const { return 1; }
bool shouldBuildLookupTables() const { return true; }
@@ -333,6 +340,8 @@ public:
bool supportsEfficientVectorElementLoadStore() const { return false; }
+ bool supportsTailCalls() const { return true; }
+
bool enableAggressiveInterleaving(bool LoopHasReductions) const {
return false;
}
@@ -415,13 +424,17 @@ public:
Optional<unsigned> getMaxVScale() const { return None; }
Optional<unsigned> getVScaleForTuning() const { return None; }
- bool shouldMaximizeVectorBandwidth() const { return false; }
+ bool
+ shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const {
+ return false;
+ }
ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const {
return ElementCount::get(0, IsScalable);
}
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const { return 0; }
+ unsigned getStoreMinimumVF(unsigned VF, Type *, Type *) const { return VF; }
bool shouldConsiderAddressTypePromotion(
const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
@@ -490,7 +503,8 @@ public:
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty,
ArrayRef<int> Mask, int Index,
- VectorType *SubTp) const {
+ VectorType *SubTp,
+ ArrayRef<const Value *> Args = None) const {
return 1;
}
@@ -697,16 +711,21 @@ public:
Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
unsigned SrcAddrSpace, unsigned DestAddrSpace,
- unsigned SrcAlign, unsigned DestAlign) const {
- return Type::getInt8Ty(Context);
+ unsigned SrcAlign, unsigned DestAlign,
+ Optional<uint32_t> AtomicElementSize) const {
+ return AtomicElementSize ? Type::getIntNTy(Context, *AtomicElementSize * 8)
+ : Type::getInt8Ty(Context);
}
void getMemcpyLoopResidualLoweringType(
SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
- unsigned SrcAlign, unsigned DestAlign) const {
- for (unsigned i = 0; i != RemainingBytes; ++i)
- OpsOut.push_back(Type::getInt8Ty(Context));
+ unsigned SrcAlign, unsigned DestAlign,
+ Optional<uint32_t> AtomicCpySize) const {
+ unsigned OpSizeInBytes = AtomicCpySize ? *AtomicCpySize : 1;
+ Type *OpType = Type::getIntNTy(Context, OpSizeInBytes * 8);
+ for (unsigned i = 0; i != RemainingBytes; i += OpSizeInBytes)
+ OpsOut.push_back(OpType);
}
bool areInlineCompatible(const Function *Caller,
@@ -960,6 +979,8 @@ public:
InstructionCost getUserCost(const User *U, ArrayRef<const Value *> Operands,
TTI::TargetCostKind CostKind) {
+ using namespace llvm::PatternMatch;
+
auto *TargetTTI = static_cast<T *>(this);
// Handle non-intrinsic calls, invokes, and callbr.
// FIXME: Unlikely to be true for anything but CodeSize.
@@ -976,8 +997,6 @@ public:
}
Type *Ty = U->getType();
- Type *OpTy =
- U->getNumOperands() == 1 ? U->getOperand(0)->getType() : nullptr;
unsigned Opcode = Operator::getOpcode(U);
auto *I = dyn_cast<Instruction>(U);
switch (Opcode) {
@@ -1049,9 +1068,11 @@ public:
case Instruction::FPExt:
case Instruction::SExt:
case Instruction::ZExt:
- case Instruction::AddrSpaceCast:
+ case Instruction::AddrSpaceCast: {
+ Type *OpTy = U->getOperand(0)->getType();
return TargetTTI->getCastInstrCost(
Opcode, Ty, OpTy, TTI::getCastContextHint(I), CostKind, I);
+ }
case Instruction::Store: {
auto *SI = cast<StoreInst>(U);
Type *ValTy = U->getOperand(0)->getType();
@@ -1137,13 +1158,14 @@ public:
if (Shuffle->isExtractSubvectorMask(SubIndex))
return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy,
Shuffle->getShuffleMask(), SubIndex,
- VecTy);
+ VecTy, Operands);
if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
return TargetTTI->getShuffleCost(
TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(),
SubIndex,
- FixedVectorType::get(VecTy->getScalarType(), NumSubElts));
+ FixedVectorType::get(VecTy->getScalarType(), NumSubElts),
+ Operands);
int ReplicationFactor, VF;
if (Shuffle->isReplicationMask(ReplicationFactor, VF)) {
@@ -1166,31 +1188,37 @@ public:
if (Shuffle->isReverse())
return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy,
- Shuffle->getShuffleMask(), 0, nullptr);
+ Shuffle->getShuffleMask(), 0, nullptr,
+ Operands);
if (Shuffle->isSelect())
return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy,
- Shuffle->getShuffleMask(), 0, nullptr);
+ Shuffle->getShuffleMask(), 0, nullptr,
+ Operands);
if (Shuffle->isTranspose())
return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy,
- Shuffle->getShuffleMask(), 0, nullptr);
+ Shuffle->getShuffleMask(), 0, nullptr,
+ Operands);
if (Shuffle->isZeroEltSplat())
return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy,
- Shuffle->getShuffleMask(), 0, nullptr);
+ Shuffle->getShuffleMask(), 0, nullptr,
+ Operands);
if (Shuffle->isSingleSource())
return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy,
- Shuffle->getShuffleMask(), 0, nullptr);
+ Shuffle->getShuffleMask(), 0, nullptr,
+ Operands);
if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
return TargetTTI->getShuffleCost(
TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(), SubIndex,
- FixedVectorType::get(VecTy->getScalarType(), NumSubElts));
+ FixedVectorType::get(VecTy->getScalarType(), NumSubElts), Operands);
return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy,
- Shuffle->getShuffleMask(), 0, nullptr);
+ Shuffle->getShuffleMask(), 0, nullptr,
+ Operands);
}
case Instruction::ExtractElement: {
auto *EEI = dyn_cast<ExtractElementInst>(U);
diff --git a/llvm/include/llvm/Analysis/TensorSpec.h b/llvm/include/llvm/Analysis/TensorSpec.h
new file mode 100644
index 000000000000..382ab3f10445
--- /dev/null
+++ b/llvm/include/llvm/Analysis/TensorSpec.h
@@ -0,0 +1,132 @@
+//===- TensorSpec.h - type descriptor for a tensor --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+#ifndef LLVM_ANALYSIS_TENSORSPEC_H
+#define LLVM_ANALYSIS_TENSORSPEC_H
+
+#include "llvm/Config/llvm-config.h"
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/JSON.h"
+
+#include <memory>
+#include <vector>
+
+namespace llvm {
+/// TensorSpec encapsulates the specification of a tensor: its dimensions, or
+/// "shape" (row-major), its type (see TensorSpec::getDataType specializations
+/// for supported types), its name and port (see "TensorFlow: Large-Scale
+/// Machine Learning on Heterogeneous Distributed Systems", section 4.2, para 2:
+/// https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/45166.pdf)
+///
+/// Known tensor types. The left part is the C type, the right is a name we
+/// can use to identify the type (to implement TensorSpec equality checks), and
+/// to use, if needed, when mapping to an underlying evaluator's type system.
+/// The main requirement is that the C type we use has the same size and
+/// encoding (e.g. endian-ness) as the one used by the evaluator.
+#define SUPPORTED_TENSOR_TYPES(M) \
+ M(float, Float) \
+ M(double, Double) \
+ M(int8_t, Int8) \
+ M(uint8_t, UInt8) \
+ M(int16_t, Int16) \
+ M(uint16_t, UInt16) \
+ M(int32_t, Int32) \
+ M(uint32_t, UInt32) \
+ M(int64_t, Int64) \
+ M(uint64_t, UInt64)
+
+enum class TensorType {
+ Invalid,
+#define _TENSOR_TYPE_ENUM_MEMBERS(_, Name) Name,
+ SUPPORTED_TENSOR_TYPES(_TENSOR_TYPE_ENUM_MEMBERS)
+#undef _TENSOR_TYPE_ENUM_MEMBERS
+};
+
+class TensorSpec final {
+public:
+ template <typename T>
+ static TensorSpec createSpec(const std::string &Name,
+ const std::vector<int64_t> &Shape,
+ int Port = 0) {
+ return TensorSpec(Name, Port, getDataType<T>(), sizeof(T), Shape);
+ }
+
+ const std::string &name() const { return Name; }
+ int port() const { return Port; }
+ TensorType type() const { return Type; }
+ const std::vector<int64_t> &shape() const { return Shape; }
+
+ bool operator==(const TensorSpec &Other) const {
+ return Name == Other.Name && Port == Other.Port && Type == Other.Type &&
+ Shape == Other.Shape;
+ }
+
+ bool operator!=(const TensorSpec &Other) const { return !(*this == Other); }
+
+ /// Get the number of elements in a tensor with this shape.
+ size_t getElementCount() const { return ElementCount; }
+ /// Get the size, in bytes, of one element.
+ size_t getElementByteSize() const { return ElementSize; }
+ /// Get the total size of a memory buffer needed to store the whole tensor.
+ size_t getTotalTensorBufferSize() const { return ElementCount * ElementSize; }
+
+ template <typename T> bool isElementType() const {
+ return getDataType<T>() == Type;
+ }
+
+private:
+ TensorSpec(const std::string &Name, int Port, TensorType Type,
+ size_t ElementSize, const std::vector<int64_t> &Shape);
+
+ template <typename T> static TensorType getDataType();
+
+ std::string Name;
+ int Port = 0;
+ TensorType Type = TensorType::Invalid;
+ std::vector<int64_t> Shape;
+ size_t ElementCount = 0;
+ size_t ElementSize = 0;
+};
+
+/// Construct a TensorSpec from a JSON dictionary of the form:
+/// { "name": <string>,
+/// "port": <int>,
+/// "type": <string. Use LLVM's types, e.g. float, double, int64_t>,
+/// "shape": <array of ints> }
+/// For the "type" field, see the C++ primitive types used in
+/// TFUTILS_SUPPORTED_TYPES.
+Optional<TensorSpec> getTensorSpecFromJSON(LLVMContext &Ctx,
+ const json::Value &Value);
+
+struct LoggedFeatureSpec {
+ TensorSpec Spec;
+ Optional<std::string> LoggingName;
+ const std::string &getLoggingName() const {
+ return LoggingName ? *LoggingName : Spec.name();
+ }
+};
+
+/// Load the output specs. If SpecFileOverride is not empty, that path is used.
+/// Otherwise, the file is assumed to be called 'output_spec.json' and be found
+/// under ModelPath (the model directory).
+/// The first output tensor name must match ExpectedDecisionName.
+/// In case of error, the return is None and the error is logged.
+Optional<std::vector<LoggedFeatureSpec>>
+loadOutputSpecs(LLVMContext &Ctx, StringRef ExpectedDecisionName,
+ StringRef ModelPath, StringRef SpecFileOverride = StringRef());
+
+#define TFUTILS_GETDATATYPE_DEF(T, Name) \
+ template <> TensorType TensorSpec::getDataType<T>();
+SUPPORTED_TENSOR_TYPES(TFUTILS_GETDATATYPE_DEF)
+
+#undef TFUTILS_GETDATATYPE_DEF
+} // namespace llvm
+
+#endif // LLVM_ANALYSIS_TENSORSPEC_H
diff --git a/llvm/include/llvm/Analysis/TypeMetadataUtils.h b/llvm/include/llvm/Analysis/TypeMetadataUtils.h
index 074c40942b06..dab67aad1ab0 100644
--- a/llvm/include/llvm/Analysis/TypeMetadataUtils.h
+++ b/llvm/include/llvm/Analysis/TypeMetadataUtils.h
@@ -14,11 +14,11 @@
#ifndef LLVM_ANALYSIS_TYPEMETADATAUTILS_H
#define LLVM_ANALYSIS_TYPEMETADATAUTILS_H
-#include "llvm/ADT/SmallVector.h"
#include <cstdint>
namespace llvm {
+template <typename T> class SmallVectorImpl;
class CallBase;
class CallInst;
class Constant;
diff --git a/llvm/include/llvm/Analysis/Utils/TFUtils.h b/llvm/include/llvm/Analysis/Utils/TFUtils.h
index 785b9fe949a5..372c35863f3f 100644
--- a/llvm/include/llvm/Analysis/Utils/TFUtils.h
+++ b/llvm/include/llvm/Analysis/Utils/TFUtils.h
@@ -13,6 +13,7 @@
#ifdef LLVM_HAVE_TF_API
#include "llvm/ADT/StringMap.h"
+#include "llvm/Analysis/TensorSpec.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/JSON.h"
@@ -38,86 +39,6 @@ namespace llvm {
class TFModelEvaluatorImpl;
class EvaluationResultImpl;
-/// TensorSpec encapsulates the specification of a tensor: its dimensions, or
-/// "shape" (row-major), its type (see TensorSpec::getDataType specializations
-/// for supported types), its name and port (see "TensorFlow: Large-Scale
-/// Machine Learning on Heterogeneous Distributed Systems", section 4.2, para 2:
-/// https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/45166.pdf)
-///
-/// TensorSpec is used to set up a TFModelEvaluator by describing the expected
-/// inputs and outputs.
-class TensorSpec final {
-public:
- template <typename T>
- static TensorSpec createSpec(const std::string &Name,
- const std::vector<int64_t> &Shape,
- int Port = 0) {
- return TensorSpec(Name, Port, getDataType<T>(), Shape);
- }
-
- const std::string &name() const { return Name; }
- int port() const { return Port; }
- int typeIndex() const { return TypeIndex; }
- const std::vector<int64_t> &shape() const { return Shape; }
-
- bool operator==(const TensorSpec &Other) const {
- return Name == Other.Name && Port == Other.Port &&
- TypeIndex == Other.TypeIndex && Shape == Other.Shape;
- }
-
- bool operator!=(const TensorSpec &Other) const { return !(*this == Other); }
-
- /// Get the number of elements in a tensor with this shape.
- size_t getElementCount() const { return ElementCount; }
- /// Get the size, in bytes, of one element.
- size_t getElementByteSize() const;
-
- template <typename T> bool isElementType() const {
- return getDataType<T>() == TypeIndex;
- }
-
-private:
- TensorSpec(const std::string &Name, int Port, int TypeIndex,
- const std::vector<int64_t> &Shape);
-
- template <typename T> static int getDataType() {
- llvm_unreachable("Undefined tensor type");
- }
-
- std::string Name;
- int Port = 0;
- int TypeIndex = 0;
- std::vector<int64_t> Shape;
- size_t ElementCount = 0;
-};
-
-/// Construct a TensorSpec from a JSON dictionary of the form:
-/// { "name": <string>,
-/// "port": <int>,
-/// "type": <string. Use LLVM's types, e.g. float, double, int64_t>,
-/// "shape": <array of ints> }
-/// For the "type" field, see the C++ primitive types used in
-/// TFUTILS_SUPPORTED_TYPES.
-Optional<TensorSpec> getTensorSpecFromJSON(LLVMContext &Ctx,
- const json::Value &Value);
-
-struct LoggedFeatureSpec {
- TensorSpec Spec;
- Optional<std::string> LoggingName;
- const std::string &getLoggingName() const {
- return LoggingName ? *LoggingName : Spec.name();
- }
-};
-
-/// Load the output specs. If SpecFileOverride is not empty, that path is used.
-/// Otherwise, the file is assumed to be called 'output_spec.json' and be found
-/// under ModelPath (the model directory).
-/// The first output tensor name must match ExpectedDecisionName.
-/// In case of error, the return is None and the error is logged.
-Optional<std::vector<LoggedFeatureSpec>>
-loadOutputSpecs(LLVMContext &Ctx, StringRef ExpectedDecisionName,
- StringRef ModelPath, StringRef SpecFileOverride = StringRef());
-
/// Logging utility - given an ordered specification of features, and assuming
/// a scalar reward, allow logging feature values and rewards, and then print
/// as tf.train.SequenceExample text protobuf.
@@ -262,27 +183,6 @@ private:
std::unique_ptr<TFModelEvaluatorImpl> Impl;
};
-/// List of supported types, as a pair:
-/// - C++ type
-/// - enum name (implementation-specific)
-#define TFUTILS_SUPPORTED_TYPES(M) \
- M(float, TF_FLOAT) \
- M(double, TF_DOUBLE) \
- M(int8_t, TF_INT8) \
- M(uint8_t, TF_UINT8) \
- M(int16_t, TF_INT16) \
- M(uint16_t, TF_UINT16) \
- M(int32_t, TF_INT32) \
- M(uint32_t, TF_UINT32) \
- M(int64_t, TF_INT64) \
- M(uint64_t, TF_UINT64)
-
-#define TFUTILS_GETDATATYPE_DEF(T, E) \
- template <> int TensorSpec::getDataType<T>();
-
-TFUTILS_SUPPORTED_TYPES(TFUTILS_GETDATATYPE_DEF)
-
-#undef TFUTILS_GETDATATYPE_DEF
} // namespace llvm
#endif // LLVM_HAVE_TF_API
diff --git a/llvm/include/llvm/Analysis/ValueLattice.h b/llvm/include/llvm/Analysis/ValueLattice.h
index 1b32fca50697..bc6b279e9ed5 100644
--- a/llvm/include/llvm/Analysis/ValueLattice.h
+++ b/llvm/include/llvm/Analysis/ValueLattice.h
@@ -9,16 +9,18 @@
#ifndef LLVM_ANALYSIS_VALUELATTICE_H
#define LLVM_ANALYSIS_VALUELATTICE_H
-#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Instructions.h"
-//
+
//===----------------------------------------------------------------------===//
// ValueLatticeElement
//===----------------------------------------------------------------------===//
namespace llvm {
+class Constant;
+
/// This class represents lattice values for constants.
///
/// FIXME: This is basically just for bringup, this can be made a lot more rich
diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h
index 5b39b0244339..3b29bf1d53b4 100644
--- a/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/llvm/include/llvm/Analysis/ValueTracking.h
@@ -21,12 +21,12 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/Operator.h"
#include <cassert>
#include <cstdint>
namespace llvm {
+class Operator;
class AddOperator;
class AllocaInst;
class APInt;
@@ -463,15 +463,37 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6;
const DominatorTree *DT = nullptr,
const TargetLibraryInfo *TLI = nullptr);
+ /// This returns the same result as isSafeToSpeculativelyExecute if Opcode is
+ /// the actual opcode of Inst. If the provided and actual opcode differ, the
+ /// function (virtually) overrides the opcode of Inst with the provided
+ /// Opcode. There are come constraints in this case:
+ /// * If Opcode has a fixed number of operands (eg, as binary operators do),
+ /// then Inst has to have at least as many leading operands. The function
+ /// will ignore all trailing operands beyond that number.
+ /// * If Opcode allows for an arbitrary number of operands (eg, as CallInsts
+ /// do), then all operands are considered.
+ /// * The virtual instruction has to satisfy all typing rules of the provided
+ /// Opcode.
+ /// * This function is pessimistic in the following sense: If one actually
+ /// materialized the virtual instruction, then isSafeToSpeculativelyExecute
+ /// may say that the materialized instruction is speculatable whereas this
+ /// function may have said that the instruction wouldn't be speculatable.
+ /// This behavior is a shortcoming in the current implementation and not
+ /// intentional.
+ bool isSafeToSpeculativelyExecuteWithOpcode(
+ unsigned Opcode, const Operator *Inst, const Instruction *CtxI = nullptr,
+ const DominatorTree *DT = nullptr,
+ const TargetLibraryInfo *TLI = nullptr);
+
/// Returns true if the result or effects of the given instructions \p I
- /// depend on or influence global memory.
- /// Memory dependence arises for example if the instruction reads from
- /// memory or may produce effects or undefined behaviour. Memory dependent
- /// instructions generally cannot be reorderd with respect to other memory
- /// dependent instructions or moved into non-dominated basic blocks.
- /// Instructions which just compute a value based on the values of their
- /// operands are not memory dependent.
- bool mayBeMemoryDependent(const Instruction &I);
+ /// depend values not reachable through the def use graph.
+ /// * Memory dependence arises for example if the instruction reads from
+ /// memory or may produce effects or undefined behaviour. Memory dependent
+ /// instructions generally cannot be reorderd with respect to other memory
+ /// dependent instructions.
+ /// * Control dependence arises for example if the instruction may fault
+ /// if lifted above a throwing call or infinite loop.
+ bool mayHaveNonDefUseDependency(const Instruction &I);
/// Return true if it is an intrinsic that cannot be speculated but also
/// cannot trap.
diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h
index 751c88a4ecbb..0005874ba040 100644
--- a/llvm/include/llvm/Analysis/VectorUtils.h
+++ b/llvm/include/llvm/Analysis/VectorUtils.h
@@ -236,7 +236,7 @@ class VFDatabase {
// ensuring that the variant described in the attribute has a
// corresponding definition or declaration of the vector
// function in the Module M.
- if (Shape.hasValue() && (Shape.getValue().ScalarName == ScalarName)) {
+ if (Shape && (Shape.getValue().ScalarName == ScalarName)) {
assert(CI.getModule()->getFunction(Shape.getValue().VectorName) &&
"Vector function is missing.");
Mappings.push_back(Shape.getValue());
@@ -309,16 +309,16 @@ inline Type *ToVectorTy(Type *Scalar, unsigned VF) {
/// Identify if the intrinsic is trivially vectorizable.
/// This method returns true if the intrinsic's argument types are all scalars
/// for the scalar form of the intrinsic and all vectors (or scalars handled by
-/// hasVectorInstrinsicScalarOpd) for the vector form of the intrinsic.
+/// isVectorIntrinsicWithScalarOpAtArg) for the vector form of the intrinsic.
bool isTriviallyVectorizable(Intrinsic::ID ID);
/// Identifies if the vector form of the intrinsic has a scalar operand.
-bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx);
+bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,
+ unsigned ScalarOpdIdx);
-/// Identifies if the vector form of the intrinsic has a scalar operand that has
+/// Identifies if the vector form of the intrinsic has a operand that has
/// an overloaded type.
-bool hasVectorInstrinsicOverloadedScalarOpd(Intrinsic::ID ID,
- unsigned ScalarOpdIdx);
+bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, unsigned OpdIdx);
/// Returns intrinsic ID for call.
/// For the input call instruction it finds mapping intrinsic and returns
@@ -398,6 +398,24 @@ void narrowShuffleMaskElts(int Scale, ArrayRef<int> Mask,
bool widenShuffleMaskElts(int Scale, ArrayRef<int> Mask,
SmallVectorImpl<int> &ScaledMask);
+/// Splits and processes shuffle mask depending on the number of input and
+/// output registers. The function does 2 main things: 1) splits the
+/// source/destination vectors into real registers; 2) do the mask analysis to
+/// identify which real registers are permuted. Then the function processes
+/// resulting registers mask using provided action items. If no input register
+/// is defined, \p NoInputAction action is used. If only 1 input register is
+/// used, \p SingleInputAction is used, otherwise \p ManyInputsAction is used to
+/// process > 2 input registers and masks.
+/// \param Mask Original shuffle mask.
+/// \param NumOfSrcRegs Number of source registers.
+/// \param NumOfDestRegs Number of destination registers.
+/// \param NumOfUsedRegs Number of actually used destination registers.
+void processShuffleMasks(
+ ArrayRef<int> Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs,
+ unsigned NumOfUsedRegs, function_ref<void()> NoInputAction,
+ function_ref<void(ArrayRef<int>, unsigned, unsigned)> SingleInputAction,
+ function_ref<void(ArrayRef<int>, unsigned, unsigned)> ManyInputsAction);
+
/// Compute a map of integer instructions to their minimum legal type
/// size.
///
diff --git a/llvm/include/llvm/AsmParser/LLLexer.h b/llvm/include/llvm/AsmParser/LLLexer.h
index c30165e4a97b..7bcb33f18768 100644
--- a/llvm/include/llvm/AsmParser/LLLexer.h
+++ b/llvm/include/llvm/AsmParser/LLLexer.h
@@ -37,7 +37,7 @@ namespace llvm {
lltok::Kind CurKind;
std::string StrVal;
unsigned UIntVal;
- Type *TyVal;
+ Type *TyVal = nullptr;
APFloat APFloatVal;
APSInt APSIntVal;
diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h
index 62af3afbc142..3389475b2c9a 100644
--- a/llvm/include/llvm/AsmParser/LLParser.h
+++ b/llvm/include/llvm/AsmParser/LLParser.h
@@ -14,18 +14,25 @@
#define LLVM_ASMPARSER_LLPARSER_H
#include "LLLexer.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/AsmParser/Parser.h"
#include "llvm/IR/Attributes.h"
+#include "llvm/IR/FMF.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/ModuleSummaryIndex.h"
-#include "llvm/IR/Operator.h"
-#include "llvm/IR/Type.h"
#include <map>
namespace llvm {
class Module;
+ class ConstantRange;
+ class FunctionType;
+ class GlobalObject;
+ class SMDiagnostic;
+ class SMLoc;
+ class SourceMgr;
+ class Type;
+ struct MaybeAlign;
+ template <typename T> class Optional;
class Function;
class Value;
class BasicBlock;
@@ -88,6 +95,8 @@ namespace llvm {
typedef LLLexer::LocTy LocTy;
private:
LLVMContext &Context;
+ // Lexer to determine whether to use opaque pointers or not.
+ LLLexer OPLex;
LLLexer Lex;
// Module being parsed, null if we are only parsing summary index.
Module *M;
@@ -150,8 +159,9 @@ namespace llvm {
LLParser(StringRef F, SourceMgr &SM, SMDiagnostic &Err, Module *M,
ModuleSummaryIndex *Index, LLVMContext &Context,
SlotMapping *Slots = nullptr)
- : Context(Context), Lex(F, SM, Err, Context), M(M), Index(Index),
- Slots(Slots), BlockAddressPFS(nullptr) {}
+ : Context(Context), OPLex(F, SM, Err, Context),
+ Lex(F, SM, Err, Context), M(M), Index(Index), Slots(Slots),
+ BlockAddressPFS(nullptr) {}
bool Run(
bool UpgradeDebugInfo, DataLayoutCallbackTy DataLayoutCallback =
[](StringRef) { return None; });
@@ -263,6 +273,8 @@ namespace llvm {
bool parseOptionalAlignment(MaybeAlign &Alignment,
bool AllowParens = false);
bool parseOptionalDerefAttrBytes(lltok::Kind AttrKind, uint64_t &Bytes);
+ bool parseOptionalUWTableKind(UWTableKind &Kind);
+ bool parseAllocKind(AllocFnKind &Kind);
bool parseScopeAndOrdering(bool IsAtomic, SyncScope::ID &SSID,
AtomicOrdering &Ordering);
bool parseScope(SyncScope::ID &SSID);
@@ -503,6 +515,7 @@ namespace llvm {
bool parseGlobalValueVector(SmallVectorImpl<Constant *> &Elts,
Optional<unsigned> *InRangeOp = nullptr);
bool parseOptionalComdat(StringRef GlobalName, Comdat *&C);
+ bool parseSanitizer(GlobalVariable *GV);
bool parseMetadataAsValue(Value *&V, PerFunctionState &PFS);
bool parseValueAsMetadata(Metadata *&MD, const Twine &TypeMsg,
PerFunctionState *PFS);
diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h
index 78ebb35e0ea4..230a1662cc04 100644
--- a/llvm/include/llvm/AsmParser/LLToken.h
+++ b/llvm/include/llvm/AsmParser/LLToken.h
@@ -88,7 +88,6 @@ enum Kind {
kw_triple,
kw_source_filename,
kw_unwind,
- kw_deplibs, // FIXME: Remove in 4.0
kw_datalayout,
kw_volatile,
kw_atomic,
@@ -112,7 +111,6 @@ enum Kind {
kw_exact,
kw_inbounds,
kw_inrange,
- kw_align,
kw_addrspace,
kw_section,
kw_partition,
@@ -121,7 +119,6 @@ enum Kind {
kw_module,
kw_asm,
kw_sideeffect,
- kw_alignstack,
kw_inteldialect,
kw_gc,
kw_prefix,
@@ -177,81 +174,12 @@ enum Kind {
// Attributes:
kw_attributes,
- kw_allocsize,
- kw_alwaysinline,
- kw_argmemonly,
- kw_sanitize_address,
- kw_sanitize_hwaddress,
- kw_sanitize_memtag,
- kw_builtin,
- kw_byval,
- kw_inalloca,
- kw_cold,
- kw_convergent,
- kw_dereferenceable,
- kw_dereferenceable_or_null,
- kw_disable_sanitizer_instrumentation,
- kw_elementtype,
- kw_inaccessiblememonly,
- kw_inaccessiblemem_or_argmemonly,
- kw_inlinehint,
- kw_inreg,
- kw_jumptable,
- kw_minsize,
- kw_naked,
- kw_nest,
- kw_noalias,
- kw_noundef,
- kw_nobuiltin,
- kw_nocallback,
- kw_nocapture,
- kw_noduplicate,
- kw_nofree,
- kw_noimplicitfloat,
- kw_noinline,
- kw_norecurse,
- kw_nonlazybind,
- kw_nomerge,
- kw_nonnull,
- kw_noprofile,
- kw_noredzone,
- kw_noreturn,
- kw_nosync,
- kw_nocf_check,
- kw_nounwind,
- kw_nosanitize_coverage,
- kw_null_pointer_is_valid,
- kw_optforfuzzing,
- kw_optnone,
- kw_optsize,
- kw_preallocated,
- kw_readnone,
- kw_readonly,
- kw_returned,
- kw_returns_twice,
- kw_signext,
- kw_speculatable,
- kw_ssp,
- kw_sspreq,
- kw_sspstrong,
- kw_safestack,
- kw_shadowcallstack,
- kw_sret,
- kw_sanitize_thread,
- kw_sanitize_memory,
- kw_speculative_load_hardening,
- kw_strictfp,
- kw_swifterror,
- kw_swiftself,
- kw_swiftasync,
- kw_uwtable,
- kw_vscale_range,
- kw_willreturn,
- kw_writeonly,
- kw_zeroext,
- kw_immarg,
- kw_byref,
- kw_mustprogress,
+ kw_sync,
+ kw_async,
+#define GET_ATTR_NAMES
+#define ATTRIBUTE_ENUM(ENUM_NAME, DISPLAY_NAME) \
+ kw_##DISPLAY_NAME,
+#include "llvm/IR/Attributes.inc"
kw_type,
kw_opaque,
@@ -415,7 +343,6 @@ enum Kind {
kw_param,
kw_hotness,
kw_unknown,
- kw_hot,
kw_critical,
kw_relbf,
kw_variable,
@@ -464,6 +391,19 @@ enum Kind {
kw_bit,
kw_varFlags,
+ // GV's with __attribute__((no_sanitize("address"))), or things in
+ // -fsanitize-ignorelist when built with ASan.
+ kw_no_sanitize_address,
+ // GV's with __attribute__((no_sanitize("hwaddress"))), or things in
+ // -fsanitize-ignorelist when built with HWASan.
+ kw_no_sanitize_hwaddress,
+ // GV's with __attribute__((no_sanitize("memtag"))), or things in
+ // -fsanitize-ignorelist when built with memory tagging.
+ kw_no_sanitize_memtag,
+ // GV's where the clang++ frontend (when ASan is used) notes that this is
+ // dynamically initialized, and thus needs ODR detection.
+ kw_sanitize_address_dyninit,
+
// Unsigned Valued tokens (UIntVal).
LabelID, // 42:
GlobalID, // @42
diff --git a/llvm/include/llvm/AsmParser/Parser.h b/llvm/include/llvm/AsmParser/Parser.h
index e1c7f746a335..6710ae6e358d 100644
--- a/llvm/include/llvm/AsmParser/Parser.h
+++ b/llvm/include/llvm/AsmParser/Parser.h
@@ -13,7 +13,9 @@
#ifndef LLVM_ASMPARSER_PARSER_H
#define LLVM_ASMPARSER_PARSER_H
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLForwardCompat.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/ADT/StringRef.h"
#include <memory>
diff --git a/llvm/include/llvm/BinaryFormat/COFF.h b/llvm/include/llvm/BinaryFormat/COFF.h
index e7dde986784f..fb563ff198ef 100644
--- a/llvm/include/llvm/BinaryFormat/COFF.h
+++ b/llvm/include/llvm/BinaryFormat/COFF.h
@@ -24,7 +24,6 @@
#include "llvm/Support/DataTypes.h"
#include <cassert>
-#include <cstring>
namespace llvm {
namespace COFF {
@@ -731,6 +730,10 @@ inline bool isReservedSectionNumber(int32_t SectionNumber) {
return SectionNumber <= 0;
}
+/// Encode section name based on string table offset.
+/// The size of Out must be at least COFF::NameSize.
+bool encodeSectionName(char *Out, uint64_t Offset);
+
} // End namespace COFF.
} // End namespace llvm.
diff --git a/llvm/include/llvm/BinaryFormat/DXContainer.h b/llvm/include/llvm/BinaryFormat/DXContainer.h
new file mode 100644
index 000000000000..9e912c7bd4ba
--- /dev/null
+++ b/llvm/include/llvm/BinaryFormat/DXContainer.h
@@ -0,0 +1,131 @@
+//===-- llvm/BinaryFormat/DXContainer.h - The DXBC file format --*- C++/-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines manifest constants for the DXContainer object file format.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BINARYFORMAT_DXCONTAINER_H
+#define LLVM_BINARYFORMAT_DXCONTAINER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/SwapByteOrder.h"
+
+#include <stdint.h>
+
+namespace llvm {
+
+// The DXContainer file format is arranged as a header and "parts". Semantically
+// parts are similar to sections in other object file formats. The File format
+// structure is roughly:
+
+// ┌────────────────────────────────┐
+// │ Header │
+// ├────────────────────────────────┤
+// │ Part │
+// ├────────────────────────────────┤
+// │ Part │
+// ├────────────────────────────────┤
+// │ ... │
+// └────────────────────────────────┘
+
+namespace dxbc {
+
+struct Hash {
+ uint8_t Digest[16];
+};
+
+enum class HashFlags : uint32_t {
+ None = 0, // No flags defined.
+ IncludesSource = 1, // This flag indicates that the shader hash was computed
+ // taking into account source information (-Zss)
+};
+
+struct ShaderHash {
+ uint32_t Flags; // DxilShaderHashFlags
+ uint8_t Digest[16];
+
+ void swapBytes() { sys::swapByteOrder(Flags); }
+};
+
+struct ContainerVersion {
+ uint16_t Major;
+ uint16_t Minor;
+
+ void swapBytes() {
+ sys::swapByteOrder(Major);
+ sys::swapByteOrder(Minor);
+ }
+};
+
+struct Header {
+ uint8_t Magic[4]; // "DXBC"
+ Hash FileHash;
+ ContainerVersion Version;
+ uint32_t FileSize;
+ uint32_t PartCount;
+
+ void swapBytes() {
+ Version.swapBytes();
+ sys::swapByteOrder(FileSize);
+ sys::swapByteOrder(PartCount);
+ }
+ // Structure is followed by part offsets: uint32_t PartOffset[PartCount];
+ // The offset is to a PartHeader, which is followed by the Part Data.
+};
+
+/// Use this type to describe the size and type of a DXIL container part.
+struct PartHeader {
+ uint8_t Name[4];
+ uint32_t Size;
+
+ void swapBytes() { sys::swapByteOrder(Size); }
+ StringRef getName() const {
+ return StringRef(reinterpret_cast<const char *>(&Name[0]), 4);
+ }
+ // Structure is followed directly by part data: uint8_t PartData[PartSize].
+};
+
+struct BitcodeHeader {
+ uint8_t Magic[4]; // ACSII "DXIL".
+ uint8_t MajorVersion; // DXIL version.
+ uint8_t MinorVersion; // DXIL version.
+ uint16_t Unused;
+ uint32_t Offset; // Offset to LLVM bitcode (from start of header).
+ uint32_t Size; // Size of LLVM bitcode (in bytes).
+ // Followed by uint8_t[BitcodeHeader.Size] at &BitcodeHeader + Header.Offset
+
+ void swapBytes() {
+ sys::swapByteOrder(MinorVersion);
+ sys::swapByteOrder(MajorVersion);
+ sys::swapByteOrder(Offset);
+ sys::swapByteOrder(Size);
+ }
+};
+
+struct ProgramHeader {
+ uint8_t MinorVersion : 4;
+ uint8_t MajorVersion : 4;
+ uint8_t Unused;
+ uint16_t ShaderKind;
+ uint32_t Size; // Size in uint32_t words including this header.
+ BitcodeHeader Bitcode;
+
+ void swapBytes() {
+ sys::swapByteOrder(ShaderKind);
+ sys::swapByteOrder(Size);
+ Bitcode.swapBytes();
+ }
+};
+
+static_assert(sizeof(ProgramHeader) == 24, "ProgramHeader Size incorrect!");
+
+} // namespace dxbc
+} // namespace llvm
+
+#endif // LLVM_BINARYFORMAT_DXCONTAINER_H
diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.h b/llvm/include/llvm/BinaryFormat/Dwarf.h
index 4473f506d371..e288c5191bdb 100644
--- a/llvm/include/llvm/BinaryFormat/Dwarf.h
+++ b/llvm/include/llvm/BinaryFormat/Dwarf.h
@@ -320,6 +320,10 @@ inline bool isFortran(SourceLanguage S) {
return result;
}
+inline TypeKind getArrayIndexTypeEncoding(SourceLanguage S) {
+ return isFortran(S) ? DW_ATE_signed : DW_ATE_unsigned;
+}
+
enum CaseSensitivity {
// Identifier case codes
DW_ID_case_sensitive = 0x00,
diff --git a/llvm/include/llvm/BinaryFormat/DynamicTags.def b/llvm/include/llvm/BinaryFormat/DynamicTags.def
index 814d8b113ec4..ae25ec53813c 100644
--- a/llvm/include/llvm/BinaryFormat/DynamicTags.def
+++ b/llvm/include/llvm/BinaryFormat/DynamicTags.def
@@ -209,6 +209,7 @@ MIPS_DYNAMIC_TAG(MIPS_RWPLT, 0x70000034) // Points to the base
// of a writable PLT.
MIPS_DYNAMIC_TAG(MIPS_RLD_MAP_REL, 0x70000035) // Relative offset of run time loader
// map, used for debugging.
+MIPS_DYNAMIC_TAG(MIPS_XHASH, 0x70000036) // GNU-style hash table with xlat.
// PPC specific dynamic table entries.
PPC_DYNAMIC_TAG(PPC_GOT, 0x70000000) // Uses Secure PLT ABI.
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index 5d3b1270b538..1e0ef613788d 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -319,6 +319,7 @@ enum {
EM_BPF = 247, // Linux kernel bpf virtual machine
EM_VE = 251, // NEC SX-Aurora VE
EM_CSKY = 252, // C-SKY 32-bit processor
+ EM_LOONGARCH = 258, // LoongArch
};
// Object file classes.
@@ -563,6 +564,15 @@ enum : unsigned {
EF_MIPS_ARCH = 0xf0000000 // Mask for applying EF_MIPS_ARCH_ variant
};
+// MIPS-specific section indexes
+enum {
+ SHN_MIPS_ACOMMON = 0xff00, // Common symbols which are defined and allocated
+ SHN_MIPS_TEXT = 0xff01, // Not ABI compliant
+ SHN_MIPS_DATA = 0xff02, // Not ABI compliant
+ SHN_MIPS_SCOMMON = 0xff03, // Common symbols for global data area
+ SHN_MIPS_SUNDEFINED = 0xff04 // Undefined symbols for global data area
+};
+
// ELF Relocation types for Mips
enum {
#include "ELFRelocs/Mips.def"
@@ -753,16 +763,18 @@ enum : unsigned {
EF_AMDGPU_MACH_AMDGCN_GFX1035 = 0x03d,
EF_AMDGPU_MACH_AMDGCN_GFX1034 = 0x03e,
EF_AMDGPU_MACH_AMDGCN_GFX90A = 0x03f,
- EF_AMDGPU_MACH_AMDGCN_RESERVED_0X40 = 0x040,
- EF_AMDGPU_MACH_AMDGCN_RESERVED_0X41 = 0x041,
+ EF_AMDGPU_MACH_AMDGCN_GFX940 = 0x040,
+ EF_AMDGPU_MACH_AMDGCN_GFX1100 = 0x041,
EF_AMDGPU_MACH_AMDGCN_GFX1013 = 0x042,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X43 = 0x043,
- EF_AMDGPU_MACH_AMDGCN_RESERVED_0X44 = 0x044,
- EF_AMDGPU_MACH_AMDGCN_RESERVED_0X45 = 0x045,
+ EF_AMDGPU_MACH_AMDGCN_GFX1103 = 0x044,
+ EF_AMDGPU_MACH_AMDGCN_GFX1036 = 0x045,
+ EF_AMDGPU_MACH_AMDGCN_GFX1101 = 0x046,
+ EF_AMDGPU_MACH_AMDGCN_GFX1102 = 0x047,
// First/last AMDGCN-based processors.
EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600,
- EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_RESERVED_0X45,
+ EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX1102,
// Indicates if the "xnack" target feature is enabled for all code contained
// in the object.
@@ -865,12 +877,34 @@ enum {
#include "ELFRelocs/VE.def"
};
+// CSKY Specific e_flags
+enum : unsigned {
+ EF_CSKY_801 = 0xa,
+ EF_CSKY_802 = 0x10,
+ EF_CSKY_803 = 0x9,
+ EF_CSKY_805 = 0x11,
+ EF_CSKY_807 = 0x6,
+ EF_CSKY_810 = 0x8,
+ EF_CSKY_860 = 0xb,
+ EF_CSKY_800 = 0x1f,
+ EF_CSKY_FLOAT = 0x2000,
+ EF_CSKY_DSP = 0x4000,
+ EF_CSKY_ABIV2 = 0x20000000,
+ EF_CSKY_EFV1 = 0x1000000,
+ EF_CSKY_EFV2 = 0x2000000,
+ EF_CSKY_EFV3 = 0x3000000
+};
// ELF Relocation types for CSKY
enum {
#include "ELFRelocs/CSKY.def"
};
+// ELF Relocation types for LoongArch
+enum {
+#include "ELFRelocs/LoongArch.def"
+};
+
#undef ELF_RELOC
// Section header.
@@ -947,12 +981,15 @@ enum : unsigned {
SHT_LLVM_ADDRSIG = 0x6fff4c03, // List of address-significant symbols
// for safe ICF.
SHT_LLVM_DEPENDENT_LIBRARIES =
- 0x6fff4c04, // LLVM Dependent Library Specifiers.
- SHT_LLVM_SYMPART = 0x6fff4c05, // Symbol partition specification.
- SHT_LLVM_PART_EHDR = 0x6fff4c06, // ELF header for loadable partition.
- SHT_LLVM_PART_PHDR = 0x6fff4c07, // Phdrs for loadable partition.
- SHT_LLVM_BB_ADDR_MAP = 0x6fff4c08, // LLVM Basic Block Address Map.
+ 0x6fff4c04, // LLVM Dependent Library Specifiers.
+ SHT_LLVM_SYMPART = 0x6fff4c05, // Symbol partition specification.
+ SHT_LLVM_PART_EHDR = 0x6fff4c06, // ELF header for loadable partition.
+ SHT_LLVM_PART_PHDR = 0x6fff4c07, // Phdrs for loadable partition.
+ SHT_LLVM_BB_ADDR_MAP_V0 =
+ 0x6fff4c08, // LLVM Basic Block Address Map (old version kept for
+ // backward-compatibility).
SHT_LLVM_CALL_GRAPH_PROFILE = 0x6fff4c09, // LLVM Call Graph Profile.
+ SHT_LLVM_BB_ADDR_MAP = 0x6fff4c0a, // LLVM Basic Block Address Map.
// Android's experimental support for SHT_RELR sections.
// https://android.googlesource.com/platform/bionic/+/b7feec74547f84559a1467aca02708ff61346d2a/libc/include/elf.h#512
SHT_ANDROID_RELR = 0x6fffff00, // Relocation entries; only offsets.
@@ -985,6 +1022,8 @@ enum : unsigned {
SHT_RISCV_ATTRIBUTES = 0x70000003U,
+ SHT_CSKY_ATTRIBUTES = 0x70000001U,
+
SHT_HIPROC = 0x7fffffff, // Highest processor arch-specific type.
SHT_LOUSER = 0x80000000, // Lowest type reserved for applications.
SHT_HIUSER = 0xffffffff // Highest type reserved for applications.
@@ -1036,6 +1075,9 @@ enum : unsigned {
SHF_MASKOS = 0x0ff00000,
+ // Solaris equivalent of SHF_GNU_RETAIN.
+ SHF_SUNW_NODISCARD = 0x00100000,
+
// Bits indicating processor-specific flags.
SHF_MASKPROC = 0xf0000000,
@@ -1329,6 +1371,9 @@ enum {
PT_MIPS_RTPROC = 0x70000001, // Runtime procedure table.
PT_MIPS_OPTIONS = 0x70000002, // Options segment.
PT_MIPS_ABIFLAGS = 0x70000003, // Abiflags segment.
+
+ // RISCV program header types.
+ PT_RISCV_ATTRIBUTES = 0x70000003,
};
// Segment flag bits.
@@ -1531,6 +1576,31 @@ enum {
NT_GNU_PROPERTY_TYPE_0 = 5,
};
+// Android note types.
+enum {
+ NT_ANDROID_TYPE_IDENT = 1,
+ NT_ANDROID_TYPE_KUSER = 3,
+ NT_ANDROID_TYPE_MEMTAG = 4,
+};
+
+// Memory tagging values used in NT_ANDROID_TYPE_MEMTAG notes.
+enum {
+ // Enumeration to determine the tagging mode. In Android-land, 'SYNC' means
+ // running all threads in MTE Synchronous mode, and 'ASYNC' means to use the
+ // kernels auto-upgrade feature to allow for either MTE Asynchronous,
+ // Asymmetric, or Synchronous mode. This allows silicon vendors to specify, on
+ // a per-cpu basis what 'ASYNC' should mean. Generally, the expectation is
+ // "pick the most precise mode that's very fast".
+ NT_MEMTAG_LEVEL_NONE = 0,
+ NT_MEMTAG_LEVEL_ASYNC = 1,
+ NT_MEMTAG_LEVEL_SYNC = 2,
+ NT_MEMTAG_LEVEL_MASK = 3,
+ // Bits indicating whether the loader should prepare for MTE to be enabled on
+ // the heap and/or stack.
+ NT_MEMTAG_HEAP = 4,
+ NT_MEMTAG_STACK = 8,
+};
+
// Property types used in GNU_PROPERTY_TYPE_0 notes.
enum : unsigned {
GNU_PROPERTY_STACK_SIZE = 1,
diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def
new file mode 100644
index 000000000000..8cbfe2fe4235
--- /dev/null
+++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def
@@ -0,0 +1,62 @@
+#ifndef ELF_RELOC
+#error "ELF_RELOC must be defined"
+#endif
+
+// These types and values are from the LoongArch ELF psABI which can be found at
+// https://github.com/loongson/LoongArch-Documentation
+// and these definitions has been adopted by binutils (include/elf/loongarch.h).
+// The commit hash (main branch) we reference is:
+// 9b3bd9f4a497115913c22f1a2a47863798fbc02a
+
+ELF_RELOC(R_LARCH_NONE, 0)
+ELF_RELOC(R_LARCH_32, 1)
+ELF_RELOC(R_LARCH_64, 2)
+ELF_RELOC(R_LARCH_RELATIVE, 3)
+ELF_RELOC(R_LARCH_COPY, 4)
+ELF_RELOC(R_LARCH_JUMP_SLOT, 5)
+ELF_RELOC(R_LARCH_TLS_DTPMOD32, 6)
+ELF_RELOC(R_LARCH_TLS_DTPMOD64, 7)
+ELF_RELOC(R_LARCH_TLS_DTPREL32, 8)
+ELF_RELOC(R_LARCH_TLS_DTPREL64, 9)
+ELF_RELOC(R_LARCH_TLS_TPREL32, 10)
+ELF_RELOC(R_LARCH_TLS_TPREL64, 11)
+ELF_RELOC(R_LARCH_IRELATIVE, 12)
+ELF_RELOC(R_LARCH_MARK_LA, 20)
+ELF_RELOC(R_LARCH_MARK_PCREL, 21)
+ELF_RELOC(R_LARCH_SOP_PUSH_PCREL, 22)
+ELF_RELOC(R_LARCH_SOP_PUSH_ABSOLUTE, 23)
+ELF_RELOC(R_LARCH_SOP_PUSH_DUP, 24)
+ELF_RELOC(R_LARCH_SOP_PUSH_GPREL, 25)
+ELF_RELOC(R_LARCH_SOP_PUSH_TLS_TPREL, 26)
+ELF_RELOC(R_LARCH_SOP_PUSH_TLS_GOT, 27)
+ELF_RELOC(R_LARCH_SOP_PUSH_TLS_GD, 28)
+ELF_RELOC(R_LARCH_SOP_PUSH_PLT_PCREL, 29)
+ELF_RELOC(R_LARCH_SOP_ASSERT, 30)
+ELF_RELOC(R_LARCH_SOP_NOT, 31)
+ELF_RELOC(R_LARCH_SOP_SUB, 32)
+ELF_RELOC(R_LARCH_SOP_SL, 33)
+ELF_RELOC(R_LARCH_SOP_SR, 34)
+ELF_RELOC(R_LARCH_SOP_ADD, 35)
+ELF_RELOC(R_LARCH_SOP_AND, 36)
+ELF_RELOC(R_LARCH_SOP_IF_ELSE, 37)
+ELF_RELOC(R_LARCH_SOP_POP_32_S_10_5, 38)
+ELF_RELOC(R_LARCH_SOP_POP_32_U_10_12, 39)
+ELF_RELOC(R_LARCH_SOP_POP_32_S_10_12, 40)
+ELF_RELOC(R_LARCH_SOP_POP_32_S_10_16, 41)
+ELF_RELOC(R_LARCH_SOP_POP_32_S_10_16_S2, 42)
+ELF_RELOC(R_LARCH_SOP_POP_32_S_5_20, 43)
+ELF_RELOC(R_LARCH_SOP_POP_32_S_0_5_10_16_S2, 44)
+ELF_RELOC(R_LARCH_SOP_POP_32_S_0_10_10_16_S2, 45)
+ELF_RELOC(R_LARCH_SOP_POP_32_U, 46)
+ELF_RELOC(R_LARCH_ADD8, 47)
+ELF_RELOC(R_LARCH_ADD16, 48)
+ELF_RELOC(R_LARCH_ADD24, 49)
+ELF_RELOC(R_LARCH_ADD32, 50)
+ELF_RELOC(R_LARCH_ADD64, 51)
+ELF_RELOC(R_LARCH_SUB8, 52)
+ELF_RELOC(R_LARCH_SUB16, 53)
+ELF_RELOC(R_LARCH_SUB24, 54)
+ELF_RELOC(R_LARCH_SUB32, 55)
+ELF_RELOC(R_LARCH_SUB64, 56)
+ELF_RELOC(R_LARCH_GNU_VTINHERIT, 57)
+ELF_RELOC(R_LARCH_GNU_VTENTRY, 58)
diff --git a/llvm/include/llvm/BinaryFormat/GOFF.h b/llvm/include/llvm/BinaryFormat/GOFF.h
new file mode 100644
index 000000000000..96992414c6cc
--- /dev/null
+++ b/llvm/include/llvm/BinaryFormat/GOFF.h
@@ -0,0 +1,33 @@
+//===-- llvm/BinaryFormat/GOFF.h - GOFF definitions --------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This header contains common, non-processor-specific data structures and
+// constants for the GOFF file format.
+//
+// GOFF specifics can be found in MVS Program Management: Advanced Facilities
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BINARYFORMAT_GOFF_H
+#define LLVM_BINARYFORMAT_GOFF_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+namespace GOFF {
+
+// \brief Subsections of the primary C_CODE section in the object file.
+enum SubsectionKind : uint8_t {
+ SK_PPA1 = 2,
+};
+
+} // end namespace GOFF
+
+} // end namespace llvm
+
+#endif // LLVM_BINARYFORMAT_GOFF_H
diff --git a/llvm/include/llvm/BinaryFormat/MachO.h b/llvm/include/llvm/BinaryFormat/MachO.h
index ce3a5c46e0d1..c05e79333d38 100644
--- a/llvm/include/llvm/BinaryFormat/MachO.h
+++ b/llvm/include/llvm/BinaryFormat/MachO.h
@@ -255,7 +255,8 @@ enum BindType {
enum BindSpecialDylib {
BIND_SPECIAL_DYLIB_SELF = 0,
BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE = -1,
- BIND_SPECIAL_DYLIB_FLAT_LOOKUP = -2
+ BIND_SPECIAL_DYLIB_FLAT_LOOKUP = -2,
+ BIND_SPECIAL_DYLIB_WEAK_LOOKUP = -3
};
enum {
@@ -1001,6 +1002,27 @@ struct nlist_64 {
uint64_t n_value;
};
+/// Structs for dyld chained fixups.
+/// dyld_chained_fixups_header is the data pointed to by LC_DYLD_CHAINED_FIXUPS
+/// load command.
+struct dyld_chained_fixups_header {
+ uint32_t fixups_version; ///< 0
+ uint32_t starts_offset; ///< Offset of dyld_chained_starts_in_image.
+ uint32_t imports_offset; ///< Offset of imports table in chain_data.
+ uint32_t symbols_offset; ///< Offset of symbol strings in chain_data.
+ uint32_t imports_count; ///< Number of imported symbol names.
+ uint32_t imports_format; ///< DYLD_CHAINED_IMPORT*
+ uint32_t symbols_format; ///< 0 => uncompressed, 1 => zlib compressed
+};
+
+/// dyld_chained_starts_in_image is embedded in LC_DYLD_CHAINED_FIXUPS payload.
+/// Each each seg_info_offset entry is the offset into this struct for that
+/// segment followed by pool of dyld_chain_starts_in_segment data.
+struct dyld_chained_starts_in_image {
+ uint32_t seg_count;
+ uint32_t seg_info_offset[1];
+};
+
// Byte order swapping functions for MachO structs
inline void swapStruct(fat_header &mh) {
@@ -2008,6 +2030,16 @@ union alignas(4) macho_load_command {
};
LLVM_PACKED_END
+inline void swapStruct(dyld_chained_fixups_header &C) {
+ sys::swapByteOrder(C.fixups_version);
+ sys::swapByteOrder(C.starts_offset);
+ sys::swapByteOrder(C.imports_offset);
+ sys::swapByteOrder(C.symbols_offset);
+ sys::swapByteOrder(C.imports_count);
+ sys::swapByteOrder(C.imports_format);
+ sys::swapByteOrder(C.symbols_format);
+}
+
/* code signing attributes of a process */
enum CodeSignAttrs {
@@ -2205,6 +2237,17 @@ enum SecCSDigestAlgorithm {
kSecCodeSignatureHashSHA512 = 5, /* SHA-512 */
};
+enum LinkerOptimizationHintKind {
+ LOH_ARM64_ADRP_ADRP = 1,
+ LOH_ARM64_ADRP_LDR = 2,
+ LOH_ARM64_ADRP_ADD_LDR = 3,
+ LOH_ARM64_ADRP_LDR_GOT_LDR = 4,
+ LOH_ARM64_ADRP_ADD_STR = 5,
+ LOH_ARM64_ADRP_LDR_GOT_STR = 6,
+ LOH_ARM64_ADRP_ADD = 7,
+ LOH_ARM64_ADRP_LDR_GOT = 8,
+};
+
} // end namespace MachO
} // end namespace llvm
diff --git a/llvm/include/llvm/BinaryFormat/Magic.h b/llvm/include/llvm/BinaryFormat/Magic.h
index 6988b2dde656..c8e0dad42b0b 100644
--- a/llvm/include/llvm/BinaryFormat/Magic.h
+++ b/llvm/include/llvm/BinaryFormat/Magic.h
@@ -51,6 +51,9 @@ struct file_magic {
wasm_object, ///< WebAssembly Object file
pdb, ///< Windows PDB debug info file
tapi_file, ///< Text-based Dynamic Library Stub file
+ cuda_fatbinary, ///< CUDA Fatbinary object file
+ offload_binary, ///< LLVM offload object file
+ dxcontainer_object, ///< DirectX container file
};
bool is_object() const { return V != unknown; }
diff --git a/llvm/include/llvm/BinaryFormat/Swift.def b/llvm/include/llvm/BinaryFormat/Swift.def
index 6160e2551432..05b60e40632c 100644
--- a/llvm/include/llvm/BinaryFormat/Swift.def
+++ b/llvm/include/llvm/BinaryFormat/Swift.def
@@ -24,3 +24,10 @@ HANDLE_SWIFT_SECTION(builtin, "__swift5_builtin", "swift5_builtin", ".sw5bltn")
HANDLE_SWIFT_SECTION(capture, "__swift5_capture", "swift5_capture", ".sw5cptr")
HANDLE_SWIFT_SECTION(typeref, "__swift5_typeref", "swift5_typeref", ".sw5tyrf")
HANDLE_SWIFT_SECTION(reflstr, "__swift5_reflstr", "swift5_reflstr", ".sw5rfst")
+HANDLE_SWIFT_SECTION(conform, "__swift5_proto", "swift5_protocol_conformances",
+ ".sw5prtc$B")
+HANDLE_SWIFT_SECTION(protocs, "__swift5_protos", "swift5_protocols",
+ ".sw5prt$B")
+HANDLE_SWIFT_SECTION(acfuncs, "__swift5_acfuncs", "swift5_accessible_functions",
+ ".sw5acfn$B")
+HANDLE_SWIFT_SECTION(mpenum, "__swift5_mpenum", "swift5_mpenum", ".sw5mpen$B")
diff --git a/llvm/include/llvm/BinaryFormat/Wasm.h b/llvm/include/llvm/BinaryFormat/Wasm.h
index 0bc8c4e167d8..62a6881ef36a 100644
--- a/llvm/include/llvm/BinaryFormat/Wasm.h
+++ b/llvm/include/llvm/BinaryFormat/Wasm.h
@@ -91,7 +91,7 @@ struct WasmTable {
StringRef SymbolName; // from the "linking" section
};
-struct WasmInitExpr {
+struct WasmInitExprMVP {
uint8_t Opcode;
union {
int32_t Int32;
@@ -102,6 +102,13 @@ struct WasmInitExpr {
} Value;
};
+struct WasmInitExpr {
+ uint8_t Extended; // Set to non-zero if extended const is used (i.e. more than
+ // one instruction)
+ WasmInitExprMVP Inst;
+ ArrayRef<uint8_t> Body;
+};
+
struct WasmGlobalType {
uint8_t Type;
bool Mutable;
@@ -245,7 +252,8 @@ enum : unsigned {
WASM_SEC_CODE = 10, // Function bodies (code)
WASM_SEC_DATA = 11, // Data segments
WASM_SEC_DATACOUNT = 12, // Data segment count
- WASM_SEC_TAG = 13 // Tag declarations
+ WASM_SEC_TAG = 13, // Tag declarations
+ WASM_SEC_LAST_KNOWN = WASM_SEC_TAG,
};
// Type immediate encodings used in various contexts.
@@ -276,6 +284,7 @@ enum : unsigned {
WASM_OPCODE_CALL = 0x10,
WASM_OPCODE_LOCAL_GET = 0x20,
WASM_OPCODE_LOCAL_SET = 0x21,
+ WASM_OPCODE_LOCAL_TEE = 0x22,
WASM_OPCODE_GLOBAL_GET = 0x23,
WASM_OPCODE_GLOBAL_SET = 0x24,
WASM_OPCODE_I32_STORE = 0x36,
@@ -285,7 +294,11 @@ enum : unsigned {
WASM_OPCODE_F32_CONST = 0x43,
WASM_OPCODE_F64_CONST = 0x44,
WASM_OPCODE_I32_ADD = 0x6a,
+ WASM_OPCODE_I32_SUB = 0x6b,
+ WASM_OPCODE_I32_MUL = 0x6c,
WASM_OPCODE_I64_ADD = 0x7c,
+ WASM_OPCODE_I64_SUB = 0x7d,
+ WASM_OPCODE_I64_MUL = 0x7e,
WASM_OPCODE_REF_NULL = 0xd0,
};
@@ -458,8 +471,9 @@ inline bool operator==(const WasmTableType &LHS, const WasmTableType &RHS) {
return LHS.ElemType == RHS.ElemType && LHS.Limits == RHS.Limits;
}
-std::string toString(WasmSymbolType type);
-std::string relocTypetoString(uint32_t type);
+llvm::StringRef toString(WasmSymbolType type);
+llvm::StringRef relocTypetoString(uint32_t type);
+llvm::StringRef sectionTypeToString(uint32_t type);
bool relocTypeHasAddend(uint32_t type);
} // end namespace wasm
diff --git a/llvm/include/llvm/BinaryFormat/XCOFF.h b/llvm/include/llvm/BinaryFormat/XCOFF.h
index cffd8618f1e3..5d23ec5cd911 100644
--- a/llvm/include/llvm/BinaryFormat/XCOFF.h
+++ b/llvm/include/llvm/BinaryFormat/XCOFF.h
@@ -54,6 +54,34 @@ enum AuxHeaderFlags64 : uint16_t {
///< future use and should be set to 0.
};
+enum XCOFFInterpret : uint16_t {
+ OLD_XCOFF_INTERPRET = 1,
+ NEW_XCOFF_INTERPRET = 2
+};
+
+enum FileFlag : uint16_t {
+ F_RELFLG = 0x0001, ///< relocation info stripped from file
+ F_EXEC = 0x0002, ///< file is executable (i.e., it
+ ///< has a loader section)
+ F_LNNO = 0x0004, ///< line numbers stripped from file
+ F_LSYMS = 0x0008, ///< local symbols stripped from file
+ F_FDPR_PROF = 0x0010, ///< file was profiled with FDPR
+ F_FDPR_OPTI = 0x0020, ///< file was reordered with FDPR
+ F_DSA = 0x0040, ///< file uses Dynamic Segment Allocation (32-bit
+ ///< only)
+ F_DEP_1 = 0x0080, ///< Data Execution Protection bit 1
+ F_VARPG = 0x0100, ///< executable requests using variable size pages
+ F_LPTEXT = 0x0400, ///< executable requires large pages for text
+ F_LPDATA = 0x0800, ///< executable requires large pages for data
+ F_DYNLOAD = 0x1000, ///< file is dynamically loadable and
+ ///< executable (equivalent to F_EXEC on AIX)
+ F_SHROBJ = 0x2000, ///< file is a shared object
+ F_LOADONLY =
+ 0x4000, ///< file can be loaded by the system loader, but it is
+ ///< ignored by the linker if it is a member of an archive.
+ F_DEP_2 = 0x8000 ///< Data Execution Protection bit 2
+};
+
// x_smclas field of x_csect from system header: /usr/include/syms.h
/// Storage Mapping Class definitions.
enum StorageMappingClass : uint8_t {
@@ -212,6 +240,8 @@ enum VisibilityType : uint16_t {
SYM_V_EXPORTED = 0x4000
};
+constexpr uint16_t VISIBILITY_MASK = 0x7000;
+
// Relocation types, defined in `/usr/include/reloc.h`.
enum RelocationType : uint8_t {
R_POS = 0x00, ///< Positive relocation. Provides the address of the referenced
diff --git a/llvm/include/llvm/Bitcode/BitcodeAnalyzer.h b/llvm/include/llvm/Bitcode/BitcodeAnalyzer.h
index f6fc284da33f..102e2257abcc 100644
--- a/llvm/include/llvm/Bitcode/BitcodeAnalyzer.h
+++ b/llvm/include/llvm/Bitcode/BitcodeAnalyzer.h
@@ -18,12 +18,13 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Bitstream/BitstreamReader.h"
#include "llvm/Support/Error.h"
-#include "llvm/Support/raw_ostream.h"
#include <map>
#include <vector>
namespace llvm {
+class raw_ostream;
+
/// CurStreamTypeType - A type for CurStreamType
enum CurStreamTypeType {
UnknownBitstream,
diff --git a/llvm/include/llvm/Bitcode/BitcodeReader.h b/llvm/include/llvm/Bitcode/BitcodeReader.h
index a82791c8720b..39ea48c33fc3 100644
--- a/llvm/include/llvm/Bitcode/BitcodeReader.h
+++ b/llvm/include/llvm/Bitcode/BitcodeReader.h
@@ -15,12 +15,11 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/Bitstream/BitCodes.h"
-#include "llvm/IR/ModuleSummaryIndex.h"
+#include "llvm/Bitstream/BitCodeEnums.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorOr.h"
-#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/MemoryBufferRef.h"
#include <cstdint>
#include <memory>
#include <string>
@@ -30,6 +29,8 @@ namespace llvm {
class LLVMContext;
class Module;
+class MemoryBuffer;
+class ModuleSummaryIndex;
typedef llvm::function_ref<Optional<std::string>(StringRef)>
DataLayoutCallbackTy;
diff --git a/llvm/include/llvm/Bitcode/BitcodeWriter.h b/llvm/include/llvm/Bitcode/BitcodeWriter.h
index 96f25fce8ddb..248d33f4502e 100644
--- a/llvm/include/llvm/Bitcode/BitcodeWriter.h
+++ b/llvm/include/llvm/Bitcode/BitcodeWriter.h
@@ -17,7 +17,7 @@
#include "llvm/IR/ModuleSummaryIndex.h"
#include "llvm/MC/StringTableBuilder.h"
#include "llvm/Support/Allocator.h"
-#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/MemoryBufferRef.h"
#include <map>
#include <memory>
#include <string>
diff --git a/llvm/include/llvm/Bitcode/BitcodeWriterPass.h b/llvm/include/llvm/Bitcode/BitcodeWriterPass.h
index dda5b20973c1..3c2471237532 100644
--- a/llvm/include/llvm/Bitcode/BitcodeWriterPass.h
+++ b/llvm/include/llvm/Bitcode/BitcodeWriterPass.h
@@ -14,7 +14,6 @@
#ifndef LLVM_BITCODE_BITCODEWRITERPASS_H
#define LLVM_BITCODE_BITCODEWRITERPASS_H
-#include "llvm/ADT/StringRef.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
index 6d0f51ce9c6d..5d96204ba42a 100644
--- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -17,7 +17,10 @@
#ifndef LLVM_BITCODE_LLVMBITCODES_H
#define LLVM_BITCODE_LLVMBITCODES_H
-#include "llvm/Bitstream/BitCodes.h"
+// This is the only file included, and it, in turn, is a leaf header.
+// This allows external tools to dump the AST of this file and analyze it for
+// changes without needing to fully or partially build LLVM itself.
+#include "llvm/Bitstream/BitCodeEnums.h"
namespace llvm {
namespace bitc {
@@ -582,14 +585,15 @@ enum FunctionCodes {
52, // CATCHSWITCH: [num,args...] or [num,args...,bb]
// 53 is unused.
// 54 is unused.
- FUNC_CODE_OPERAND_BUNDLE = 55, // OPERAND_BUNDLE: [tag#, value...]
- FUNC_CODE_INST_UNOP = 56, // UNOP: [opcode, ty, opval]
- FUNC_CODE_INST_CALLBR = 57, // CALLBR: [attr, cc, norm, transfs,
- // fnty, fnid, args...]
- FUNC_CODE_INST_FREEZE = 58, // FREEZE: [opty, opval]
- FUNC_CODE_INST_ATOMICRMW = 59, // ATOMICRMW: [ptrty, ptr, valty, val,
- // operation, align, vol,
- // ordering, synchscope]
+ FUNC_CODE_OPERAND_BUNDLE = 55, // OPERAND_BUNDLE: [tag#, value...]
+ FUNC_CODE_INST_UNOP = 56, // UNOP: [opcode, ty, opval]
+ FUNC_CODE_INST_CALLBR = 57, // CALLBR: [attr, cc, norm, transfs,
+ // fnty, fnid, args...]
+ FUNC_CODE_INST_FREEZE = 58, // FREEZE: [opty, opval]
+ FUNC_CODE_INST_ATOMICRMW = 59, // ATOMICRMW: [ptrty, ptr, valty, val,
+ // operation, align, vol,
+ // ordering, synchscope]
+ FUNC_CODE_BLOCKADDR_USERS = 60, // BLOCKADDR_USERS: [value...]
};
enum UseListCodes {
@@ -677,6 +681,11 @@ enum AttributeKindCodes {
ATTR_KIND_NO_SANITIZE_COVERAGE = 76,
ATTR_KIND_ELEMENTTYPE = 77,
ATTR_KIND_DISABLE_SANITIZER_INSTRUMENTATION = 78,
+ ATTR_KIND_NO_SANITIZE_BOUNDS = 79,
+ ATTR_KIND_ALLOC_ALIGN = 80,
+ ATTR_KIND_ALLOCATED_POINTER = 81,
+ ATTR_KIND_ALLOC_KIND = 82,
+ ATTR_KIND_PRESPLIT_COROUTINE = 83,
};
enum ComdatSelectionKindCodes {
diff --git a/llvm/include/llvm/Bitstream/BitCodeEnums.h b/llvm/include/llvm/Bitstream/BitCodeEnums.h
new file mode 100644
index 000000000000..4288bd3987ae
--- /dev/null
+++ b/llvm/include/llvm/Bitstream/BitCodeEnums.h
@@ -0,0 +1,90 @@
+//===- BitCodeEnums.h - Core enums for the bitstream format -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines "core" bitstream enum values.
+// It has been separated from the other header that defines bitstream enum
+// values, BitCodes.h, to allow tools to track changes to the various
+// bitstream and bitcode enums without needing to fully or partially build
+// LLVM itself.
+//
+// The enum values defined in this file should be considered permanent. If
+// new features are added, they should have values added at the end of the
+// respective lists.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BITSTREAM_BITCODEENUMS_H
+#define LLVM_BITSTREAM_BITCODEENUMS_H
+
+namespace llvm {
+/// Offsets of the 32-bit fields of bitstream wrapper header.
+enum BitstreamWrapperHeader : unsigned {
+ BWH_MagicField = 0 * 4,
+ BWH_VersionField = 1 * 4,
+ BWH_OffsetField = 2 * 4,
+ BWH_SizeField = 3 * 4,
+ BWH_CPUTypeField = 4 * 4,
+ BWH_HeaderSize = 5 * 4
+};
+
+namespace bitc {
+enum StandardWidths {
+ BlockIDWidth = 8, // We use VBR-8 for block IDs.
+ CodeLenWidth = 4, // Codelen are VBR-4.
+ BlockSizeWidth = 32 // BlockSize up to 2^32 32-bit words = 16GB per block.
+};
+
+// The standard abbrev namespace always has a way to exit a block, enter a
+// nested block, define abbrevs, and define an unabbreviated record.
+enum FixedAbbrevIDs {
+ END_BLOCK = 0, // Must be zero to guarantee termination for broken bitcode.
+ ENTER_SUBBLOCK = 1,
+
+ /// DEFINE_ABBREV - Defines an abbrev for the current block. It consists
+ /// of a vbr5 for # operand infos. Each operand info is emitted with a
+ /// single bit to indicate if it is a literal encoding. If so, the value is
+ /// emitted with a vbr8. If not, the encoding is emitted as 3 bits followed
+ /// by the info value as a vbr5 if needed.
+ DEFINE_ABBREV = 2,
+
+ // UNABBREV_RECORDs are emitted with a vbr6 for the record code, followed by
+ // a vbr6 for the # operands, followed by vbr6's for each operand.
+ UNABBREV_RECORD = 3,
+
+ // This is not a code, this is a marker for the first abbrev assignment.
+ FIRST_APPLICATION_ABBREV = 4
+};
+
+/// StandardBlockIDs - All bitcode files can optionally include a BLOCKINFO
+/// block, which contains metadata about other blocks in the file.
+enum StandardBlockIDs {
+ /// BLOCKINFO_BLOCK is used to define metadata about blocks, for example,
+ /// standard abbrevs that should be available to all blocks of a specified
+ /// ID.
+ BLOCKINFO_BLOCK_ID = 0,
+
+ // Block IDs 1-7 are reserved for future expansion.
+ FIRST_APPLICATION_BLOCKID = 8
+};
+
+/// BlockInfoCodes - The blockinfo block contains metadata about user-defined
+/// blocks.
+enum BlockInfoCodes {
+ // DEFINE_ABBREV has magic semantics here, applying to the current SETBID'd
+ // block, instead of the BlockInfo block.
+
+ BLOCKINFO_CODE_SETBID = 1, // SETBID: [blockid#]
+ BLOCKINFO_CODE_BLOCKNAME = 2, // BLOCKNAME: [name]
+ BLOCKINFO_CODE_SETRECORDNAME = 3 // BLOCKINFO_CODE_SETRECORDNAME:
+ // [id, name]
+};
+
+} // namespace bitc
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/Bitstream/BitCodes.h b/llvm/include/llvm/Bitstream/BitCodes.h
index 9cd4e535a470..93888f7d3b33 100644
--- a/llvm/include/llvm/Bitstream/BitCodes.h
+++ b/llvm/include/llvm/Bitstream/BitCodes.h
@@ -19,75 +19,12 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/Bitstream/BitCodeEnums.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/ErrorHandling.h"
#include <cassert>
namespace llvm {
-/// Offsets of the 32-bit fields of bitstream wrapper header.
-enum BitstreamWrapperHeader : unsigned {
- BWH_MagicField = 0 * 4,
- BWH_VersionField = 1 * 4,
- BWH_OffsetField = 2 * 4,
- BWH_SizeField = 3 * 4,
- BWH_CPUTypeField = 4 * 4,
- BWH_HeaderSize = 5 * 4
-};
-
-namespace bitc {
- enum StandardWidths {
- BlockIDWidth = 8, // We use VBR-8 for block IDs.
- CodeLenWidth = 4, // Codelen are VBR-4.
- BlockSizeWidth = 32 // BlockSize up to 2^32 32-bit words = 16GB per block.
- };
-
- // The standard abbrev namespace always has a way to exit a block, enter a
- // nested block, define abbrevs, and define an unabbreviated record.
- enum FixedAbbrevIDs {
- END_BLOCK = 0, // Must be zero to guarantee termination for broken bitcode.
- ENTER_SUBBLOCK = 1,
-
- /// DEFINE_ABBREV - Defines an abbrev for the current block. It consists
- /// of a vbr5 for # operand infos. Each operand info is emitted with a
- /// single bit to indicate if it is a literal encoding. If so, the value is
- /// emitted with a vbr8. If not, the encoding is emitted as 3 bits followed
- /// by the info value as a vbr5 if needed.
- DEFINE_ABBREV = 2,
-
- // UNABBREV_RECORDs are emitted with a vbr6 for the record code, followed by
- // a vbr6 for the # operands, followed by vbr6's for each operand.
- UNABBREV_RECORD = 3,
-
- // This is not a code, this is a marker for the first abbrev assignment.
- FIRST_APPLICATION_ABBREV = 4
- };
-
- /// StandardBlockIDs - All bitcode files can optionally include a BLOCKINFO
- /// block, which contains metadata about other blocks in the file.
- enum StandardBlockIDs {
- /// BLOCKINFO_BLOCK is used to define metadata about blocks, for example,
- /// standard abbrevs that should be available to all blocks of a specified
- /// ID.
- BLOCKINFO_BLOCK_ID = 0,
-
- // Block IDs 1-7 are reserved for future expansion.
- FIRST_APPLICATION_BLOCKID = 8
- };
-
- /// BlockInfoCodes - The blockinfo block contains metadata about user-defined
- /// blocks.
- enum BlockInfoCodes {
- // DEFINE_ABBREV has magic semantics here, applying to the current SETBID'd
- // block, instead of the BlockInfo block.
-
- BLOCKINFO_CODE_SETBID = 1, // SETBID: [blockid#]
- BLOCKINFO_CODE_BLOCKNAME = 2, // BLOCKNAME: [name]
- BLOCKINFO_CODE_SETRECORDNAME = 3 // BLOCKINFO_CODE_SETRECORDNAME:
- // [id, name]
- };
-
-} // End bitc namespace
-
/// BitCodeAbbrevOp - This describes one or more operands in an abbreviation.
/// This is actually a union of two different things:
/// 1. It could be a literal integer value ("the operand is always 17").
@@ -106,6 +43,10 @@ public:
Blob = 5 // 32-bit aligned array of 8-bit characters.
};
+ static bool isValidEncoding(uint64_t E) {
+ return E >= 1 && E <= 5;
+ }
+
explicit BitCodeAbbrevOp(uint64_t V) : Val(V), IsLiteral(true) {}
explicit BitCodeAbbrevOp(Encoding E, uint64_t Data = 0)
: Val(Data), IsLiteral(false), Enc(E) {}
@@ -179,6 +120,6 @@ public:
OperandList.push_back(OpInfo);
}
};
-} // End llvm namespace
+} // namespace llvm
#endif
diff --git a/llvm/include/llvm/Bitstream/BitstreamReader.h b/llvm/include/llvm/Bitstream/BitstreamReader.h
index 37b7c4d73cff..10a0a4e0039e 100644
--- a/llvm/include/llvm/Bitstream/BitstreamReader.h
+++ b/llvm/include/llvm/Bitstream/BitstreamReader.h
@@ -19,7 +19,6 @@
#include "llvm/Bitstream/BitCodes.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MemoryBufferRef.h"
#include <algorithm>
#include <cassert>
@@ -97,8 +96,6 @@ private:
unsigned BitsInCurWord = 0;
public:
- static const constexpr size_t MaxChunkSize = sizeof(word_t) * 8;
-
SimpleBitstreamCursor() = default;
explicit SimpleBitstreamCursor(ArrayRef<uint8_t> BitcodeBytes)
: BitcodeBytes(BitcodeBytes) {}
@@ -187,7 +184,7 @@ public:
}
Expected<word_t> Read(unsigned NumBits) {
- static const unsigned BitsInWord = MaxChunkSize;
+ static const unsigned BitsInWord = sizeof(word_t) * 8;
assert(NumBits && NumBits <= BitsInWord &&
"Cannot return zero or more than BitsInWord bits!");
@@ -229,24 +226,32 @@ public:
return R;
}
- Expected<uint32_t> ReadVBR(unsigned NumBits) {
+ Expected<uint32_t> ReadVBR(const unsigned NumBits) {
Expected<unsigned> MaybeRead = Read(NumBits);
if (!MaybeRead)
return MaybeRead;
uint32_t Piece = MaybeRead.get();
- if ((Piece & (1U << (NumBits-1))) == 0)
+ assert(NumBits <= 32 && NumBits >= 1 && "Invalid NumBits value");
+ const uint32_t MaskBitOrder = (NumBits - 1);
+ const uint32_t Mask = 1UL << MaskBitOrder;
+
+ if ((Piece & Mask) == 0)
return Piece;
uint32_t Result = 0;
unsigned NextBit = 0;
while (true) {
- Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit;
+ Result |= (Piece & (Mask - 1)) << NextBit;
- if ((Piece & (1U << (NumBits-1))) == 0)
+ if ((Piece & Mask) == 0)
return Result;
NextBit += NumBits-1;
+ if (NextBit >= 32)
+ return createStringError(std::errc::illegal_byte_sequence,
+ "Unterminated VBR");
+
MaybeRead = Read(NumBits);
if (!MaybeRead)
return MaybeRead;
@@ -256,24 +261,31 @@ public:
// Read a VBR that may have a value up to 64-bits in size. The chunk size of
// the VBR must still be <= 32 bits though.
- Expected<uint64_t> ReadVBR64(unsigned NumBits) {
+ Expected<uint64_t> ReadVBR64(const unsigned NumBits) {
Expected<uint64_t> MaybeRead = Read(NumBits);
if (!MaybeRead)
return MaybeRead;
uint32_t Piece = MaybeRead.get();
+ assert(NumBits <= 32 && NumBits >= 1 && "Invalid NumBits value");
+ const uint32_t MaskBitOrder = (NumBits - 1);
+ const uint32_t Mask = 1UL << MaskBitOrder;
- if ((Piece & (1U << (NumBits-1))) == 0)
+ if ((Piece & Mask) == 0)
return uint64_t(Piece);
uint64_t Result = 0;
unsigned NextBit = 0;
while (true) {
- Result |= uint64_t(Piece & ((1U << (NumBits-1))-1)) << NextBit;
+ Result |= uint64_t(Piece & (Mask - 1)) << NextBit;
- if ((Piece & (1U << (NumBits-1))) == 0)
+ if ((Piece & Mask) == 0)
return Result;
NextBit += NumBits-1;
+ if (NextBit >= 64)
+ return createStringError(std::errc::illegal_byte_sequence,
+ "Unterminated VBR");
+
MaybeRead = Read(NumBits);
if (!MaybeRead)
return MaybeRead;
@@ -299,6 +311,13 @@ public:
/// Skip to the end of the file.
void skipToEnd() { NextChar = BitcodeBytes.size(); }
+
+ /// Check whether a reservation of Size elements is plausible.
+ bool isSizePlausible(size_t Size) const {
+ // Don't allow reserving more elements than the number of bits, assuming
+ // at least one bit is needed to encode an element.
+ return Size < BitcodeBytes.size() * 8;
+ }
};
/// When advancing through a bitstream cursor, each advance can discover a few
@@ -357,7 +376,7 @@ class BitstreamCursor : SimpleBitstreamCursor {
BitstreamBlockInfo *BlockInfo = nullptr;
public:
- static const size_t MaxChunkSize = sizeof(word_t) * 8;
+ static const size_t MaxChunkSize = 32;
BitstreamCursor() = default;
explicit BitstreamCursor(ArrayRef<uint8_t> BitcodeBytes)
@@ -521,10 +540,11 @@ private:
public:
/// Return the abbreviation for the specified AbbrevId.
- const BitCodeAbbrev *getAbbrev(unsigned AbbrevID) {
+ Expected<const BitCodeAbbrev *> getAbbrev(unsigned AbbrevID) {
unsigned AbbrevNo = AbbrevID - bitc::FIRST_APPLICATION_ABBREV;
if (AbbrevNo >= CurAbbrevs.size())
- report_fatal_error("Invalid abbrev number");
+ return createStringError(
+ std::errc::illegal_byte_sequence, "Invalid abbrev number");
return CurAbbrevs[AbbrevNo].get();
}
diff --git a/llvm/include/llvm/Bitstream/BitstreamWriter.h b/llvm/include/llvm/Bitstream/BitstreamWriter.h
index 21b260b7b9f3..be6bab5532bd 100644
--- a/llvm/include/llvm/Bitstream/BitstreamWriter.h
+++ b/llvm/include/llvm/Bitstream/BitstreamWriter.h
@@ -74,16 +74,10 @@ class BitstreamWriter {
};
std::vector<BlockInfo> BlockInfoRecords;
- void WriteByte(unsigned char Value) {
- Out.push_back(Value);
- FlushToFile();
- }
-
void WriteWord(unsigned Value) {
Value = support::endian::byte_swap<uint32_t, support::little>(Value);
Out.append(reinterpret_cast<const char *>(&Value),
reinterpret_cast<const char *>(&Value + 1));
- FlushToFile();
}
uint64_t GetNumOfFlushedBytes() const { return FS ? FS->tell() : 0; }
@@ -114,7 +108,7 @@ public:
/// null, \p O does not flush incrementially, but writes to disk at the end.
///
/// \p FlushThreshold is the threshold (unit M) to flush \p O if \p FS is
- /// valid.
+ /// valid. Flushing only occurs at (sub)block boundaries.
BitstreamWriter(SmallVectorImpl<char> &O, raw_fd_stream *FS = nullptr,
uint32_t FlushThreshold = 512)
: Out(O), FS(FS), FlushThreshold(FlushThreshold << 20), CurBit(0),
@@ -249,8 +243,8 @@ public:
// Emit the bits with VBR encoding, NumBits-1 bits at a time.
while (Val >= Threshold) {
- Emit(((uint32_t)Val & ((1 << (NumBits-1))-1)) |
- (1 << (NumBits-1)), NumBits);
+ Emit(((uint32_t)Val & ((1 << (NumBits - 1)) - 1)) | (1 << (NumBits - 1)),
+ NumBits);
Val >>= NumBits-1;
}
@@ -327,6 +321,7 @@ public:
CurCodeSize = B.PrevCodeSize;
CurAbbrevs = std::move(B.PrevAbbrevs);
BlockScope.pop_back();
+ FlushToFile();
}
//===--------------------------------------------------------------------===//
@@ -472,14 +467,12 @@ public:
FlushToWord();
// Emit literal bytes.
- for (const auto &B : Bytes) {
- assert(isUInt<8>(B) && "Value too large to emit as byte");
- WriteByte((unsigned char)B);
- }
+ assert(llvm::all_of(Bytes, [](UIntTy B) { return isUInt<8>(B); }));
+ Out.append(Bytes.begin(), Bytes.end());
// Align end to 32-bits.
while (GetBufferOffset() & 3)
- WriteByte(0);
+ Out.push_back(0);
}
void emitBlob(StringRef Bytes, bool ShouldEmitSize = true) {
emitBlob(makeArrayRef((const uint8_t *)Bytes.data(), Bytes.size()),
diff --git a/llvm/include/llvm/CodeGen/AccelTable.h b/llvm/include/llvm/CodeGen/AccelTable.h
index 1190d6061e45..c0e976317aef 100644
--- a/llvm/include/llvm/CodeGen/AccelTable.h
+++ b/llvm/include/llvm/CodeGen/AccelTable.h
@@ -14,19 +14,15 @@
#define LLVM_CODEGEN_ACCELTABLE_H
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/DIE.h"
#include "llvm/CodeGen/DwarfStringPoolEntry.h"
-#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/DJB.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cstddef>
#include <cstdint>
#include <vector>
@@ -108,6 +104,8 @@ namespace llvm {
class AsmPrinter;
class DwarfCompileUnit;
class DwarfDebug;
+class MCSymbol;
+class raw_ostream;
/// Interface which the different types of accelerator table data have to
/// conform. It serves as a base class for different values of the template
diff --git a/llvm/include/llvm/CodeGen/Analysis.h b/llvm/include/llvm/CodeGen/Analysis.h
index 60442326d6c7..1a09820f80ef 100644
--- a/llvm/include/llvm/CodeGen/Analysis.h
+++ b/llvm/include/llvm/CodeGen/Analysis.h
@@ -15,14 +15,11 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/ISDOpcodes.h"
-#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/Support/CodeGen.h"
namespace llvm {
+template <typename T> class SmallVectorImpl;
class GlobalValue;
class LLT;
class MachineBasicBlock;
diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h
index d911bfd435ae..fb4627c029b0 100644
--- a/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -22,9 +22,7 @@
#include "llvm/CodeGen/DwarfStringPoolEntry.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/IR/InlineAsm.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/SourceMgr.h"
#include <cstdint>
#include <memory>
#include <utility>
@@ -32,6 +30,7 @@
namespace llvm {
+class AddrLabelMap;
class BasicBlock;
class BlockAddress;
class Constant;
@@ -176,6 +175,10 @@ private:
// function. This is used to calculate the size of the BB section.
MCSymbol *CurrentSectionBeginSym = nullptr;
+ /// This map keeps track of which symbol is being used for the specified basic
+ /// block's address of label.
+ std::unique_ptr<AddrLabelMap> AddrLabelSymbols;
+
// The garbage collection metadata printer table.
void *GCMetadataPrinters = nullptr; // Really a DenseMap.
@@ -212,6 +215,16 @@ private:
/// CFISection type the module needs i.e. either .eh_frame or .debug_frame.
CFISection ModuleCFISection = CFISection::None;
+ /// True if the module contains split-stack functions. This is used to
+ /// emit .note.GNU-split-stack section as required by the linker for
+ /// special handling split-stack function calling no-split-stack function.
+ bool HasSplitStack = false;
+
+ /// True if the module contains no-split-stack functions. This is used to emit
+ /// .note.GNU-no-split-stack section when it also contains functions without a
+ /// split stack prologue.
+ bool HasNoSplitStack = false;
+
protected:
explicit AsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer);
@@ -254,6 +267,25 @@ public:
// given basic block.
MCSymbol *getMBBExceptionSym(const MachineBasicBlock &MBB);
+ /// Return the symbol to be used for the specified basic block when its
+ /// address is taken. This cannot be its normal LBB label because the block
+ /// may be accessed outside its containing function.
+ MCSymbol *getAddrLabelSymbol(const BasicBlock *BB) {
+ return getAddrLabelSymbolToEmit(BB).front();
+ }
+
+ /// Return the symbol to be used for the specified basic block when its
+ /// address is taken. If other blocks were RAUW'd to this one, we may have
+ /// to emit them as well, return the whole set.
+ ArrayRef<MCSymbol *> getAddrLabelSymbolToEmit(const BasicBlock *BB);
+
+ /// If the specified function has had any references to address-taken blocks
+ /// generated, but the block got deleted, return the symbol now so we can
+ /// emit it. This prevents emitting a reference to a symbol that has no
+ /// definition.
+ void takeDeletedSymbolsForFunction(const Function *F,
+ std::vector<MCSymbol *> &Result);
+
/// Return information about object file lowering.
const TargetLoweringObjectFile &getObjFileLowering() const;
diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
new file mode 100644
index 000000000000..7ae1304cced9
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -0,0 +1,109 @@
+//===-- BasicBlockSectionsProfileReader.h - BB sections profile reader pass ==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass creates the basic block cluster info by reading the basic block
+// sections profile. The cluster info will be used by the basic-block-sections
+// pass to arrange basic blocks in their sections.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_BASICBLOCKSECTIONSINFO_H
+#define LLVM_ANALYSIS_BASICBLOCKSECTIONSINFO_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/LineIterator.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+using namespace llvm;
+
+namespace llvm {
+
+// The cluster information for a machine basic block.
+struct BBClusterInfo {
+ // MachineBasicBlock ID.
+ unsigned MBBNumber;
+ // Cluster ID this basic block belongs to.
+ unsigned ClusterID;
+ // Position of basic block within the cluster.
+ unsigned PositionInCluster;
+};
+
+using ProgramBBClusterInfoMapTy = StringMap<SmallVector<BBClusterInfo>>;
+
+class BasicBlockSectionsProfileReader : public ImmutablePass {
+public:
+ static char ID;
+
+ BasicBlockSectionsProfileReader(const MemoryBuffer *Buf)
+ : ImmutablePass(ID), MBuf(Buf) {
+ initializeBasicBlockSectionsProfileReaderPass(
+ *PassRegistry::getPassRegistry());
+ };
+
+ BasicBlockSectionsProfileReader() : ImmutablePass(ID) {
+ initializeBasicBlockSectionsProfileReaderPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override {
+ return "Basic Block Sections Profile Reader";
+ }
+
+ // Returns true if basic block sections profile exist for function \p
+ // FuncName.
+ bool isFunctionHot(StringRef FuncName) const;
+
+ // Returns a pair with first element representing whether basic block sections
+ // profile exist for the function \p FuncName, and the second element
+ // representing the basic block sections profile (cluster info) for this
+ // function. If the first element is true and the second element is empty, it
+ // means unique basic block sections are desired for all basic blocks of the
+ // function.
+ std::pair<bool, SmallVector<BBClusterInfo>>
+ getBBClusterInfoForFunction(StringRef FuncName) const;
+
+ /// Read profiles of basic blocks if available here.
+ void initializePass() override;
+
+private:
+ StringRef getAliasName(StringRef FuncName) const {
+ auto R = FuncAliasMap.find(FuncName);
+ return R == FuncAliasMap.end() ? FuncName : R->second;
+ }
+
+ // This contains the basic-block-sections profile.
+ const MemoryBuffer *MBuf = nullptr;
+
+ // This encapsulates the BB cluster information for the whole program.
+ //
+ // For every function name, it contains the cluster information for (all or
+ // some of) its basic blocks. The cluster information for every basic block
+ // includes its cluster ID along with the position of the basic block in that
+ // cluster.
+ ProgramBBClusterInfoMapTy ProgramBBClusterInfo;
+
+ // Some functions have alias names. We use this map to find the main alias
+ // name for which we have mapping in ProgramBBClusterInfo.
+ StringMap<StringRef> FuncAliasMap;
+};
+
+// Creates a BasicBlockSectionsProfileReader pass to parse the basic block
+// sections profile. \p Buf is a memory buffer that contains the list of
+// functions and basic block ids to selectively enable basic block sections.
+ImmutablePass *
+createBasicBlockSectionsProfileReaderPass(const MemoryBuffer *Buf);
+
+} // namespace llvm
+#endif // LLVM_ANALYSIS_BASICBLOCKSECTIONSINFO_H
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 0b2737628923..46be8e030406 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -195,6 +195,10 @@ private:
bool VariableMask,
bool IsGatherScatter,
TTI::TargetCostKind CostKind) {
+ // We cannot scalarize scalable vectors, so return Invalid.
+ if (isa<ScalableVectorType>(DataTy))
+ return InstructionCost::getInvalid();
+
auto *VT = cast<FixedVectorType>(DataTy);
// Assume the target does not have support for gather/scatter operations
// and provide a rough estimate.
@@ -312,6 +316,26 @@ public:
return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
}
+ unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
+ Type *ScalarValTy) const {
+ auto &&IsSupportedByTarget = [this, ScalarMemTy, ScalarValTy](unsigned VF) {
+ auto *SrcTy = FixedVectorType::get(ScalarMemTy, VF / 2);
+ EVT VT = getTLI()->getValueType(DL, SrcTy);
+ if (getTLI()->isOperationLegal(ISD::STORE, VT) ||
+ getTLI()->isOperationCustom(ISD::STORE, VT))
+ return true;
+
+ EVT ValVT =
+ getTLI()->getValueType(DL, FixedVectorType::get(ScalarValTy, VF / 2));
+ EVT LegalizedVT =
+ getTLI()->getTypeToTransformTo(ScalarMemTy->getContext(), VT);
+ return getTLI()->isTruncStoreLegal(LegalizedVT, ValVT);
+ };
+ while (VF > 2 && IsSupportedByTarget(VF))
+ VF /= 2;
+ return VF;
+ }
+
bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty,
const DataLayout &DL) const {
EVT VT = getTLI()->getValueType(DL, Ty);
@@ -362,10 +386,9 @@ public:
return getTLI()->isTypeLegal(VT);
}
- InstructionCost getRegUsageForType(Type *Ty) {
- InstructionCost Val = getTLI()->getTypeLegalizationCost(DL, Ty).first;
- assert(Val >= 0 && "Negative cost!");
- return Val;
+ unsigned getRegUsageForType(Type *Ty) {
+ EVT ETy = getTLI()->getValueType(DL, Ty);
+ return getTLI()->getNumRegisters(Ty->getContext(), ETy);
}
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
@@ -680,6 +703,8 @@ public:
bool Insert, bool Extract) {
/// FIXME: a bitfield is not a reasonable abstraction for talking about
/// which elements are needed from a scalable vector
+ if (isa<ScalableVectorType>(InTy))
+ return InstructionCost::getInvalid();
auto *Ty = cast<FixedVectorType>(InTy);
assert(DemandedElts.getBitWidth() == Ty->getNumElements() &&
@@ -702,6 +727,8 @@ public:
/// Helper wrapper for the DemandedElts variant of getScalarizationOverhead.
InstructionCost getScalarizationOverhead(VectorType *InTy, bool Insert,
bool Extract) {
+ if (isa<ScalableVectorType>(InTy))
+ return InstructionCost::getInvalid();
auto *Ty = cast<FixedVectorType>(InTy);
APInt DemandedElts = APInt::getAllOnes(Ty->getNumElements());
@@ -871,7 +898,8 @@ public:
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
ArrayRef<int> Mask, int Index,
- VectorType *SubTp) {
+ VectorType *SubTp,
+ ArrayRef<const Value *> Args = None) {
switch (improveShuffleKindFromMask(Kind, Mask)) {
case TTI::SK_Broadcast:
@@ -1100,6 +1128,9 @@ public:
// TODO: If one of the types get legalized by splitting, handle this
// similarly to what getCastInstrCost() does.
if (auto *ValVTy = dyn_cast<VectorType>(ValTy)) {
+ if (isa<ScalableVectorType>(ValTy))
+ return InstructionCost::getInvalid();
+
unsigned Num = cast<FixedVectorType>(ValVTy)->getNumElements();
if (CondTy)
CondTy = CondTy->getScalarType();
@@ -1172,11 +1203,12 @@ public:
if (CostKind != TTI::TCK_RecipThroughput)
return Cost;
+ const DataLayout &DL = this->getDataLayout();
if (Src->isVectorTy() &&
// In practice it's not currently possible to have a change in lane
// length for extending loads or truncating stores so both types should
// have the same scalable property.
- TypeSize::isKnownLT(Src->getPrimitiveSizeInBits(),
+ TypeSize::isKnownLT(DL.getTypeStoreSizeInBits(Src),
LT.second.getSizeInBits())) {
// This is a vector load that legalizes to a larger type than the vector
// itself. Unless the corresponding extending load or truncating store is
@@ -1220,6 +1252,11 @@ public:
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond = false, bool UseMaskForGaps = false) {
+
+ // We cannot scalarize scalable vectors, so return Invalid.
+ if (isa<ScalableVectorType>(VecTy))
+ return InstructionCost::getInvalid();
+
auto *VT = cast<FixedVectorType>(VecTy);
unsigned NumElts = VT->getNumElements();
@@ -1274,8 +1311,7 @@ public:
// Scale the cost of the load by the fraction of legal instructions that
// will be used.
- Cost = divideCeil(UsedInsts.count() * Cost.getValue().getValue(),
- NumLegalInsts);
+ Cost = divideCeil(UsedInsts.count() * *Cost.getValue(), NumLegalInsts);
}
// Then plus the cost of interleave operation.
@@ -1382,6 +1418,26 @@ public:
default:
break;
+ case Intrinsic::powi:
+ if (auto *RHSC = dyn_cast<ConstantInt>(Args[1])) {
+ bool ShouldOptForSize = I->getParent()->getParent()->hasOptSize();
+ if (getTLI()->isBeneficialToExpandPowI(RHSC->getSExtValue(),
+ ShouldOptForSize)) {
+ // The cost is modeled on the expansion performed by ExpandPowI in
+ // SelectionDAGBuilder.
+ APInt Exponent = RHSC->getValue().abs();
+ unsigned ActiveBits = Exponent.getActiveBits();
+ unsigned PopCount = Exponent.countPopulation();
+ InstructionCost Cost = (ActiveBits + PopCount - 2) *
+ thisT()->getArithmeticInstrCost(
+ Instruction::FMul, RetTy, CostKind);
+ if (RHSC->getSExtValue() < 0)
+ Cost += thisT()->getArithmeticInstrCost(Instruction::FDiv, RetTy,
+ CostKind);
+ return Cost;
+ }
+ }
+ break;
case Intrinsic::cttz:
// FIXME: If necessary, this should go in target-specific overrides.
if (RetVF.isScalar() && getTLI()->isCheapToSpeculateCttz())
@@ -1418,7 +1474,7 @@ public:
// The cost of materialising a constant integer vector.
return TargetTransformInfo::TCC_Basic;
}
- case Intrinsic::experimental_vector_extract: {
+ case Intrinsic::vector_extract: {
// FIXME: Handle case where a scalable vector is extracted from a scalable
// vector
if (isa<ScalableVectorType>(RetTy))
@@ -1428,7 +1484,7 @@ public:
cast<VectorType>(Args[0]->getType()), None,
Index, cast<VectorType>(RetTy));
}
- case Intrinsic::experimental_vector_insert: {
+ case Intrinsic::vector_insert: {
// FIXME: Handle case where a scalable vector is inserted into a scalable
// vector
if (isa<ScalableVectorType>(Args[1]->getType()))
@@ -1471,8 +1527,6 @@ public:
}
case Intrinsic::fshl:
case Intrinsic::fshr: {
- if (isa<ScalableVectorType>(RetTy))
- return BaseT::getIntrinsicInstrCost(ICA, CostKind);
const Value *X = Args[0];
const Value *Y = Args[1];
const Value *Z = Args[2];
@@ -1512,6 +1566,29 @@ public:
}
return Cost;
}
+ case Intrinsic::get_active_lane_mask: {
+ EVT ResVT = getTLI()->getValueType(DL, RetTy, true);
+ EVT ArgType = getTLI()->getValueType(DL, ICA.getArgTypes()[0], true);
+
+ // If we're not expanding the intrinsic then we assume this is cheap
+ // to implement.
+ if (!getTLI()->shouldExpandGetActiveLaneMask(ResVT, ArgType)) {
+ std::pair<InstructionCost, MVT> LT =
+ getTLI()->getTypeLegalizationCost(DL, RetTy);
+ return LT.first;
+ }
+
+ // Create the expanded types that will be used to calculate the uadd_sat
+ // operation.
+ Type *ExpRetTy = VectorType::get(
+ ICA.getArgTypes()[0], cast<VectorType>(RetTy)->getElementCount());
+ IntrinsicCostAttributes Attrs(Intrinsic::uadd_sat, ExpRetTy, {}, FMF);
+ InstructionCost Cost =
+ thisT()->getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
+ Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, ExpRetTy, RetTy,
+ CmpInst::ICMP_ULT, CostKind);
+ return Cost;
+ }
}
// Assume that we need to scalarize this intrinsic.
@@ -1560,7 +1637,7 @@ public:
// Library call cost - other than size, make it expensive.
unsigned SingleCallCost = CostKind == TTI::TCK_CodeSize ? 1 : 10;
- SmallVector<unsigned, 2> ISDs;
+ unsigned ISD = 0;
switch (IID) {
default: {
// Scalable vectors cannot be scalarized, so return Invalid.
@@ -1605,82 +1682,82 @@ public:
// Look for intrinsics that can be lowered directly or turned into a scalar
// intrinsic call.
case Intrinsic::sqrt:
- ISDs.push_back(ISD::FSQRT);
+ ISD = ISD::FSQRT;
break;
case Intrinsic::sin:
- ISDs.push_back(ISD::FSIN);
+ ISD = ISD::FSIN;
break;
case Intrinsic::cos:
- ISDs.push_back(ISD::FCOS);
+ ISD = ISD::FCOS;
break;
case Intrinsic::exp:
- ISDs.push_back(ISD::FEXP);
+ ISD = ISD::FEXP;
break;
case Intrinsic::exp2:
- ISDs.push_back(ISD::FEXP2);
+ ISD = ISD::FEXP2;
break;
case Intrinsic::log:
- ISDs.push_back(ISD::FLOG);
+ ISD = ISD::FLOG;
break;
case Intrinsic::log10:
- ISDs.push_back(ISD::FLOG10);
+ ISD = ISD::FLOG10;
break;
case Intrinsic::log2:
- ISDs.push_back(ISD::FLOG2);
+ ISD = ISD::FLOG2;
break;
case Intrinsic::fabs:
- ISDs.push_back(ISD::FABS);
+ ISD = ISD::FABS;
break;
case Intrinsic::canonicalize:
- ISDs.push_back(ISD::FCANONICALIZE);
+ ISD = ISD::FCANONICALIZE;
break;
case Intrinsic::minnum:
- ISDs.push_back(ISD::FMINNUM);
+ ISD = ISD::FMINNUM;
break;
case Intrinsic::maxnum:
- ISDs.push_back(ISD::FMAXNUM);
+ ISD = ISD::FMAXNUM;
break;
case Intrinsic::minimum:
- ISDs.push_back(ISD::FMINIMUM);
+ ISD = ISD::FMINIMUM;
break;
case Intrinsic::maximum:
- ISDs.push_back(ISD::FMAXIMUM);
+ ISD = ISD::FMAXIMUM;
break;
case Intrinsic::copysign:
- ISDs.push_back(ISD::FCOPYSIGN);
+ ISD = ISD::FCOPYSIGN;
break;
case Intrinsic::floor:
- ISDs.push_back(ISD::FFLOOR);
+ ISD = ISD::FFLOOR;
break;
case Intrinsic::ceil:
- ISDs.push_back(ISD::FCEIL);
+ ISD = ISD::FCEIL;
break;
case Intrinsic::trunc:
- ISDs.push_back(ISD::FTRUNC);
+ ISD = ISD::FTRUNC;
break;
case Intrinsic::nearbyint:
- ISDs.push_back(ISD::FNEARBYINT);
+ ISD = ISD::FNEARBYINT;
break;
case Intrinsic::rint:
- ISDs.push_back(ISD::FRINT);
+ ISD = ISD::FRINT;
break;
case Intrinsic::round:
- ISDs.push_back(ISD::FROUND);
+ ISD = ISD::FROUND;
break;
case Intrinsic::roundeven:
- ISDs.push_back(ISD::FROUNDEVEN);
+ ISD = ISD::FROUNDEVEN;
break;
case Intrinsic::pow:
- ISDs.push_back(ISD::FPOW);
+ ISD = ISD::FPOW;
break;
case Intrinsic::fma:
- ISDs.push_back(ISD::FMA);
+ ISD = ISD::FMA;
break;
case Intrinsic::fmuladd:
- ISDs.push_back(ISD::FMA);
+ ISD = ISD::FMA;
break;
case Intrinsic::experimental_constrained_fmuladd:
- ISDs.push_back(ISD::STRICT_FMA);
+ ISD = ISD::STRICT_FMA;
break;
// FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
case Intrinsic::lifetime_start:
@@ -1897,23 +1974,49 @@ public:
BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind);
return Cost;
}
+ case Intrinsic::fptosi_sat:
+ case Intrinsic::fptoui_sat: {
+ if (Tys.empty())
+ break;
+ Type *FromTy = Tys[0];
+ bool IsSigned = IID == Intrinsic::fptosi_sat;
+
+ InstructionCost Cost = 0;
+ IntrinsicCostAttributes Attrs1(Intrinsic::minnum, FromTy,
+ {FromTy, FromTy});
+ Cost += thisT()->getIntrinsicInstrCost(Attrs1, CostKind);
+ IntrinsicCostAttributes Attrs2(Intrinsic::maxnum, FromTy,
+ {FromTy, FromTy});
+ Cost += thisT()->getIntrinsicInstrCost(Attrs2, CostKind);
+ Cost += thisT()->getCastInstrCost(
+ IsSigned ? Instruction::FPToSI : Instruction::FPToUI, RetTy, FromTy,
+ TTI::CastContextHint::None, CostKind);
+ if (IsSigned) {
+ Type *CondTy = RetTy->getWithNewBitWidth(1);
+ Cost += thisT()->getCmpSelInstrCost(
+ BinaryOperator::FCmp, FromTy, CondTy, CmpInst::FCMP_UNO, CostKind);
+ Cost += thisT()->getCmpSelInstrCost(
+ BinaryOperator::Select, RetTy, CondTy, CmpInst::FCMP_UNO, CostKind);
+ }
+ return Cost;
+ }
case Intrinsic::ctpop:
- ISDs.push_back(ISD::CTPOP);
+ ISD = ISD::CTPOP;
// In case of legalization use TCC_Expensive. This is cheaper than a
// library call but still not a cheap instruction.
SingleCallCost = TargetTransformInfo::TCC_Expensive;
break;
case Intrinsic::ctlz:
- ISDs.push_back(ISD::CTLZ);
+ ISD = ISD::CTLZ;
break;
case Intrinsic::cttz:
- ISDs.push_back(ISD::CTTZ);
+ ISD = ISD::CTTZ;
break;
case Intrinsic::bswap:
- ISDs.push_back(ISD::BSWAP);
+ ISD = ISD::BSWAP;
break;
case Intrinsic::bitreverse:
- ISDs.push_back(ISD::BITREVERSE);
+ ISD = ISD::BITREVERSE;
break;
}
@@ -1921,38 +2024,25 @@ public:
std::pair<InstructionCost, MVT> LT =
TLI->getTypeLegalizationCost(DL, RetTy);
- SmallVector<InstructionCost, 2> LegalCost;
- SmallVector<InstructionCost, 2> CustomCost;
- for (unsigned ISD : ISDs) {
- if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
- if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() &&
- TLI->isFAbsFree(LT.second)) {
- return 0;
- }
-
- // The operation is legal. Assume it costs 1.
- // If the type is split to multiple registers, assume that there is some
- // overhead to this.
- // TODO: Once we have extract/insert subvector cost we need to use them.
- if (LT.first > 1)
- LegalCost.push_back(LT.first * 2);
- else
- LegalCost.push_back(LT.first * 1);
- } else if (!TLI->isOperationExpand(ISD, LT.second)) {
- // If the operation is custom lowered then assume
- // that the code is twice as expensive.
- CustomCost.push_back(LT.first * 2);
+ if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
+ if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() &&
+ TLI->isFAbsFree(LT.second)) {
+ return 0;
}
- }
- auto *MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end());
- if (MinLegalCostI != LegalCost.end())
- return *MinLegalCostI;
-
- auto MinCustomCostI =
- std::min_element(CustomCost.begin(), CustomCost.end());
- if (MinCustomCostI != CustomCost.end())
- return *MinCustomCostI;
+ // The operation is legal. Assume it costs 1.
+ // If the type is split to multiple registers, assume that there is some
+ // overhead to this.
+ // TODO: Once we have extract/insert subvector cost we need to use them.
+ if (LT.first > 1)
+ return (LT.first * 2);
+ else
+ return (LT.first * 1);
+ } else if (!TLI->isOperationExpand(ISD, LT.second)) {
+ // If the operation is custom lowered then assume
+ // that the code is twice as expensive.
+ return (LT.first * 2);
+ }
// If we can't lower fmuladd into an FMA estimate the cost as a floating
// point mul followed by an add.
@@ -2061,6 +2151,11 @@ public:
/// vector is reduced on each iteration.
InstructionCost getTreeReductionCost(unsigned Opcode, VectorType *Ty,
TTI::TargetCostKind CostKind) {
+ // Targets must implement a default value for the scalable case, since
+ // we don't know how many lanes the vector has.
+ if (isa<ScalableVectorType>(Ty))
+ return InstructionCost::getInvalid();
+
Type *ScalarTy = Ty->getElementType();
unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();
if ((Opcode == Instruction::Or || Opcode == Instruction::And) &&
@@ -2159,6 +2254,11 @@ public:
InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
bool IsUnsigned,
TTI::TargetCostKind CostKind) {
+ // Targets must implement a default value for the scalable case, since
+ // we don't know how many lanes the vector has.
+ if (isa<ScalableVectorType>(Ty))
+ return InstructionCost::getInvalid();
+
Type *ScalarTy = Ty->getElementType();
Type *ScalarCondTy = CondTy->getElementType();
unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();
diff --git a/llvm/include/llvm/CodeGen/CFIFixup.h b/llvm/include/llvm/CodeGen/CFIFixup.h
new file mode 100644
index 000000000000..40e535106751
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/CFIFixup.h
@@ -0,0 +1,38 @@
+//===-- CFIFixup.h - Insert CFI remember/restore instructions ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Contains definition of the base CFIFixup pass.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_CFIFIXUP_H
+#define LLVM_CODEGEN_CFIFIXUP_H
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/InitializePasses.h"
+
+namespace llvm {
+class CFIFixup : public MachineFunctionPass {
+public:
+ static char ID;
+
+ CFIFixup() : MachineFunctionPass(ID) {
+ initializeCFIFixupPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_CFIFIXUP_H
diff --git a/llvm/include/llvm/CodeGen/CalcSpillWeights.h b/llvm/include/llvm/CodeGen/CalcSpillWeights.h
index bfd5bab3d1c0..41b7f10cfc38 100644
--- a/llvm/include/llvm/CodeGen/CalcSpillWeights.h
+++ b/llvm/include/llvm/CodeGen/CalcSpillWeights.h
@@ -9,7 +9,6 @@
#ifndef LLVM_CODEGEN_CALCSPILLWEIGHTS_H
#define LLVM_CODEGEN_CALCSPILLWEIGHTS_H
-#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/SlotIndexes.h"
namespace llvm {
@@ -65,17 +64,6 @@ class VirtRegMap;
/// (re)compute li's spill weight and allocation hint.
void calculateSpillWeightAndHint(LiveInterval &LI);
- /// Compute future expected spill weight of a split artifact of LI
- /// that will span between start and end slot indexes.
- /// \param LI The live interval to be split.
- /// \param Start The expected beginning of the split artifact. Instructions
- /// before start will not affect the weight.
- /// \param End The expected end of the split artifact. Instructions
- /// after end will not affect the weight.
- /// \return The expected spill weight of the split artifact. Returns
- /// negative weight for unspillable LI.
- float futureWeight(LiveInterval &LI, SlotIndex Start, SlotIndex End);
-
/// Compute spill weights and allocation hints for all virtual register
/// live intervals.
void calculateSpillWeightsAndHints();
diff --git a/llvm/include/llvm/CodeGen/CallingConvLower.h b/llvm/include/llvm/CodeGen/CallingConvLower.h
index 8dbcd6b8ab7d..90afbfc32a4e 100644
--- a/llvm/include/llvm/CodeGen/CallingConvLower.h
+++ b/llvm/include/llvm/CodeGen/CallingConvLower.h
@@ -15,11 +15,9 @@
#define LLVM_CODEGEN_CALLINGCONVLOWER_H
#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/TargetCallingConv.h"
#include "llvm/IR/CallingConv.h"
-#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/Alignment.h"
namespace llvm {
diff --git a/llvm/include/llvm/CodeGen/CodeGenCommonISel.h b/llvm/include/llvm/CodeGen/CodeGenCommonISel.h
index 270f935b6738..ce278468dffc 100644
--- a/llvm/include/llvm/CodeGen/CodeGenCommonISel.h
+++ b/llvm/include/llvm/CodeGen/CodeGenCommonISel.h
@@ -19,7 +19,6 @@
namespace llvm {
class BasicBlock;
-class MachineBasicBlock;
/// Encapsulates all of the information needed to generate a stack protector
/// check, and signals to isel when initialized that one needs to be generated.
///
@@ -213,6 +212,13 @@ private:
MachineBasicBlock::iterator
findSplitPointForStackProtector(MachineBasicBlock *BB,
const TargetInstrInfo &TII);
+/// Evaluates if the specified FP class test is an inversion of a simpler test.
+/// An example is the test "inf|normal|subnormal|zero", which is an inversion
+/// of "nan".
+/// \param Test The test as specified in 'is_fpclass' intrinsic invocation.
+/// \returns The inverted test, or zero, if inversion does not produce simpler
+/// test.
+unsigned getInvertedFPClassTest(unsigned Test);
} // namespace llvm
diff --git a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h
index f6563971f981..f4b1980b9ede 100644
--- a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h
@@ -15,7 +15,6 @@
#ifndef LLVM_CODEGEN_CODEGENPASSBUILDER_H
#define LLVM_CODEGEN_CODEGENPASSBUILDER_H
-#include "llvm/ADT/FunctionExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -26,7 +25,6 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
#include "llvm/CodeGen/ExpandReductions.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachinePassManager.h"
#include "llvm/CodeGen/PreISelIntrinsicLowering.h"
#include "llvm/CodeGen/ReplaceWithVeclib.h"
@@ -35,7 +33,6 @@
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Verifier.h"
#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Debug.h"
@@ -43,7 +40,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/CGPassBuilderOption.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/ConstantHoisting.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Scalar/LoopStrengthReduce.h"
@@ -51,7 +47,6 @@
#include "llvm/Transforms/Scalar/MergeICmps.h"
#include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h"
#include "llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h"
-#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
#include "llvm/Transforms/Utils/LowerInvoke.h"
#include <cassert>
@@ -668,6 +663,10 @@ void CodeGenPassBuilder<Derived>::addIRPasses(AddIRPass &addPass) const {
// Expand reduction intrinsics into shuffle sequences if the target wants to.
addPass(ExpandReductionsPass());
+
+ // Convert conditional moves to conditional jumps when profitable.
+ if (getOptLevel() != CodeGenOpt::None && !Opt.DisableSelectOptimize)
+ addPass(SelectOptimizePass());
}
/// Turn exception handling constructs into something the code generators can
@@ -751,7 +750,7 @@ template <typename Derived>
Error CodeGenPassBuilder<Derived>::addCoreISelPasses(
AddMachinePass &addPass) const {
// Enable FastISel with -fast-isel, but allow that to be overridden.
- TM.setO0WantsFastISel(Opt.EnableFastISelOption.getValueOr(true));
+ TM.setO0WantsFastISel(Opt.EnableFastISelOption.value_or(true));
// Determine an instruction selector.
enum class SelectorType { SelectionDAG, FastISel, GlobalISel };
diff --git a/llvm/include/llvm/CodeGen/CommandFlags.h b/llvm/include/llvm/CodeGen/CommandFlags.h
index 73d39fecc268..9281ed723854 100644
--- a/llvm/include/llvm/CodeGen/CommandFlags.h
+++ b/llvm/include/llvm/CodeGen/CommandFlags.h
@@ -16,11 +16,6 @@
#define LLVM_CODEGEN_COMMANDFLAGS_H
#include "llvm/ADT/FloatingPointMode.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Triple.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/MC/MCTargetOptionsCommandFlags.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Target/TargetOptions.h"
#include <string>
@@ -29,6 +24,9 @@
namespace llvm {
class Module;
+class AttrBuilder;
+class Function;
+class Triple;
namespace codegen {
@@ -62,6 +60,8 @@ bool getEnableNoNaNsFPMath();
bool getEnableNoSignedZerosFPMath();
+bool getEnableApproxFuncFPMath();
+
bool getEnableNoTrappingFPMath();
DenormalMode::DenormalModeKind getDenormalFPMath();
@@ -93,6 +93,8 @@ std::string getTrapFuncName();
bool getUseCtors();
+bool getLowerGlobalDtorsViaCxaAtExit();
+
bool getRelaxELFRelocations();
bool getDataSections();
@@ -140,6 +142,8 @@ bool getDebugStrictDwarf();
unsigned getAlignLoops();
+bool getJMCInstrument();
+
/// Create this object with static storage to register codegen-related command
/// line options.
struct RegisterCodeGenFlags {
diff --git a/llvm/include/llvm/CodeGen/DFAPacketizer.h b/llvm/include/llvm/CodeGen/DFAPacketizer.h
index 9cdaedc9e861..aba6503a6a1f 100644
--- a/llvm/include/llvm/CodeGen/DFAPacketizer.h
+++ b/llvm/include/llvm/CodeGen/DFAPacketizer.h
@@ -25,9 +25,7 @@
#ifndef LLVM_CODEGEN_DFAPACKETIZER_H
#define LLVM_CODEGEN_DFAPACKETIZER_H
-#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/ScheduleDAGMutation.h"
#include "llvm/Support/Automaton.h"
#include <cstdint>
#include <map>
@@ -38,6 +36,7 @@
namespace llvm {
class DefaultVLIWScheduler;
+class ScheduleDAGMutation;
class InstrItineraryData;
class MachineFunction;
class MachineInstr;
diff --git a/llvm/include/llvm/CodeGen/DbgEntityHistoryCalculator.h b/llvm/include/llvm/CodeGen/DbgEntityHistoryCalculator.h
index 2ac9d938d281..465829159e42 100644
--- a/llvm/include/llvm/CodeGen/DbgEntityHistoryCalculator.h
+++ b/llvm/include/llvm/CodeGen/DbgEntityHistoryCalculator.h
@@ -12,12 +12,12 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/LexicalScopes.h"
#include <utility>
namespace llvm {
class DILocation;
+class LexicalScopes;
class DINode;
class MachineFunction;
class MachineInstr;
diff --git a/llvm/include/llvm/CodeGen/DwarfStringPoolEntry.h b/llvm/include/llvm/CodeGen/DwarfStringPoolEntry.h
index abeba62707c1..f19d321793e9 100644
--- a/llvm/include/llvm/CodeGen/DwarfStringPoolEntry.h
+++ b/llvm/include/llvm/CodeGen/DwarfStringPoolEntry.h
@@ -9,7 +9,7 @@
#ifndef LLVM_CODEGEN_DWARFSTRINGPOOLENTRY_H
#define LLVM_CODEGEN_DWARFSTRINGPOOLENTRY_H
-#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/PointerUnion.h"
#include "llvm/ADT/StringMap.h"
namespace llvm {
@@ -20,49 +20,91 @@ class MCSymbol;
struct DwarfStringPoolEntry {
static constexpr unsigned NotIndexed = -1;
- MCSymbol *Symbol;
- uint64_t Offset;
- unsigned Index;
+ MCSymbol *Symbol = nullptr;
+ uint64_t Offset = 0;
+ unsigned Index = 0;
bool isIndexed() const { return Index != NotIndexed; }
};
-/// String pool entry reference.
+/// DwarfStringPoolEntryRef: Dwarf string pool entry reference.
+///
+/// Dwarf string pool entry keeps string value and its data.
+/// There are two variants how data are represented:
+///
+/// 1. By value - StringMapEntry<DwarfStringPoolEntry>.
+/// 2. By pointer - StringMapEntry<DwarfStringPoolEntry *>.
+///
+/// The "By pointer" variant allows for reducing memory usage for the case
+/// when string pool entry does not have data: it keeps the null pointer
+/// and so no need to waste space for the full DwarfStringPoolEntry.
+/// It is recommended to use "By pointer" variant if not all entries
+/// of dwarf string pool have corresponding DwarfStringPoolEntry.
+
class DwarfStringPoolEntryRef {
- PointerIntPair<const StringMapEntry<DwarfStringPoolEntry> *, 1, bool>
- MapEntryAndIndexed;
+ /// Pointer type for "By value" string entry.
+ using ByValStringEntryPtr = const StringMapEntry<DwarfStringPoolEntry> *;
- const StringMapEntry<DwarfStringPoolEntry> *getMapEntry() const {
- return MapEntryAndIndexed.getPointer();
- }
+ /// Pointer type for "By pointer" string entry.
+ using ByPtrStringEntryPtr = const StringMapEntry<DwarfStringPoolEntry *> *;
+
+ /// Pointer to the dwarf string pool Entry.
+ PointerUnion<ByValStringEntryPtr, ByPtrStringEntryPtr> MapEntry = nullptr;
public:
DwarfStringPoolEntryRef() = default;
- DwarfStringPoolEntryRef(const StringMapEntry<DwarfStringPoolEntry> &Entry,
- bool Indexed)
- : MapEntryAndIndexed(&Entry, Indexed) {}
- explicit operator bool() const { return getMapEntry(); }
+ /// ASSUMPTION: DwarfStringPoolEntryRef keeps pointer to \p Entry,
+ /// thus specified entry mustn`t be reallocated.
+ DwarfStringPoolEntryRef(const StringMapEntry<DwarfStringPoolEntry> &Entry)
+ : MapEntry(&Entry) {}
+
+ /// ASSUMPTION: DwarfStringPoolEntryRef keeps pointer to \p Entry,
+ /// thus specified entry mustn`t be reallocated.
+ DwarfStringPoolEntryRef(const StringMapEntry<DwarfStringPoolEntry *> &Entry)
+ : MapEntry(&Entry) {
+ assert(MapEntry.get<ByPtrStringEntryPtr>()->second != nullptr);
+ }
+
+ explicit operator bool() const { return !MapEntry.isNull(); }
+
+ /// \returns symbol for the dwarf string.
MCSymbol *getSymbol() const {
- assert(getMapEntry()->second.Symbol && "No symbol available!");
- return getMapEntry()->second.Symbol;
+ assert(getEntry().Symbol && "No symbol available!");
+ return getEntry().Symbol;
}
- uint64_t getOffset() const { return getMapEntry()->second.Offset; }
- bool isIndexed() const { return MapEntryAndIndexed.getInt(); }
+
+ /// \returns offset for the dwarf string.
+ uint64_t getOffset() const { return getEntry().Offset; }
+
+ /// \returns index for the dwarf string.
unsigned getIndex() const {
- assert(isIndexed());
- assert(getMapEntry()->getValue().isIndexed());
- return getMapEntry()->second.Index;
+ assert(getEntry().isIndexed() && "Index is not set!");
+ return getEntry().Index;
+ }
+
+ /// \returns string.
+ StringRef getString() const {
+ if (MapEntry.is<ByValStringEntryPtr>())
+ return MapEntry.get<ByValStringEntryPtr>()->first();
+
+ return MapEntry.get<ByPtrStringEntryPtr>()->first();
+ }
+
+ /// \returns the entire string pool entry for convenience.
+ const DwarfStringPoolEntry &getEntry() const {
+ if (MapEntry.is<ByValStringEntryPtr>())
+ return MapEntry.get<ByValStringEntryPtr>()->second;
+
+ return *MapEntry.get<ByPtrStringEntryPtr>()->second;
}
- StringRef getString() const { return getMapEntry()->first(); }
- /// Return the entire string pool entry for convenience.
- DwarfStringPoolEntry getEntry() const { return getMapEntry()->getValue(); }
bool operator==(const DwarfStringPoolEntryRef &X) const {
- return getMapEntry() == X.getMapEntry();
+ return MapEntry.getOpaqueValue() == X.MapEntry.getOpaqueValue();
}
+
bool operator!=(const DwarfStringPoolEntryRef &X) const {
- return getMapEntry() != X.getMapEntry();
+ return MapEntry.getOpaqueValue() != X.MapEntry.getOpaqueValue();
}
};
diff --git a/llvm/include/llvm/CodeGen/FastISel.h b/llvm/include/llvm/CodeGen/FastISel.h
index 775698a66ada..8be97d2c2095 100644
--- a/llvm/include/llvm/CodeGen/FastISel.h
+++ b/llvm/include/llvm/CodeGen/FastISel.h
@@ -24,15 +24,15 @@
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/MachineValueType.h"
-#include <algorithm>
#include <cstdint>
#include <utility>
namespace llvm {
class AllocaInst;
+class Instruction;
+class IntrinsicInst;
class BasicBlock;
class CallInst;
class Constant;
@@ -212,6 +212,7 @@ protected:
const TargetRegisterInfo &TRI;
const TargetLibraryInfo *LibInfo;
bool SkipTargetIndependentISel;
+ bool UseInstrRefDebugInfo = false;
/// The position of the last instruction for materializing constants
/// for use in the current block. It resets to EmitStartPt when it makes sense
@@ -318,6 +319,12 @@ public:
/// Reset InsertPt to the given old insert position.
void leaveLocalValueArea(SavePoint Old);
+ /// Signal whether instruction referencing variable locations are desired for
+ /// this function's debug-info.
+ void useInstrRefDebugInfo(bool Flag) {
+ UseInstrRefDebugInfo = Flag;
+ }
+
protected:
explicit FastISel(FunctionLoweringInfo &FuncInfo,
const TargetLibraryInfo *LibInfo,
diff --git a/llvm/include/llvm/CodeGen/FaultMaps.h b/llvm/include/llvm/CodeGen/FaultMaps.h
index 8a8b1d2e6008..c228bb895edd 100644
--- a/llvm/include/llvm/CodeGen/FaultMaps.h
+++ b/llvm/include/llvm/CodeGen/FaultMaps.h
@@ -10,7 +10,6 @@
#define LLVM_CODEGEN_FAULTMAPS_H
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/Endian.h"
#include <map>
#include <vector>
diff --git a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
index 524730d53694..f8156ce73196 100644
--- a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
+++ b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
@@ -101,6 +101,10 @@ public:
// Value was lowered to tied def and gc.relocate should be replaced with
// copy from vreg.
VReg,
+ // Value was lowered to tied def and gc.relocate should be replaced with
+ // SDValue kept in StatepointLoweringInfo structure. This valid for local
+ // relocates only.
+ SDValueNode,
} type = NoRelocate;
// Payload contains either frame index of the stack slot in which the value
// was spilled, or virtual register which contains the re-definition.
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CSEMIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/CSEMIRBuilder.h
index 4f95335db74b..4d9694347f17 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CSEMIRBuilder.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CSEMIRBuilder.h
@@ -13,10 +13,10 @@
#define LLVM_CODEGEN_GLOBALISEL_CSEMIRBUILDER_H
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
-#include "llvm/CodeGen/GlobalISel/Utils.h"
namespace llvm {
+class GISelInstProfileBuilder;
/// Defines a builder that does CSE of MachineInstructions using GISelCSEInfo.
/// Eg usage.
///
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
index f9663fadb868..9bf1c134618c 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
@@ -17,25 +17,26 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/TargetCallingConv.h"
-#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LowLevelTypeImpl.h"
#include "llvm/Support/MachineValueType.h"
#include <cstdint>
#include <functional>
namespace llvm {
+class AttributeList;
class CallBase;
class DataLayout;
class Function;
class FunctionLoweringInfo;
class MachineIRBuilder;
+class MachineFunction;
struct MachinePointerInfo;
class MachineRegisterInfo;
class TargetLowering;
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Combiner.h b/llvm/include/llvm/CodeGen/GlobalISel/Combiner.h
index 795686980842..8c295428afe8 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Combiner.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Combiner.h
@@ -15,7 +15,6 @@
#define LLVM_CODEGEN_GLOBALISEL_COMBINER_H
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
namespace llvm {
class MachineRegisterInfo;
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 45c27c25aea0..73edc3c37970 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -17,16 +17,20 @@
#ifndef LLVM_CODEGEN_GLOBALISEL_COMBINERHELPER_H
#define LLVM_CODEGEN_GLOBALISEL_COMBINERHELPER_H
-#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
-#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/Register.h"
-#include "llvm/Support/Alignment.h"
+#include "llvm/Support/LowLevelTypeImpl.h"
+#include <functional>
namespace llvm {
class GISelChangeObserver;
+class APFloat;
+class APInt;
+class GPtrAdd;
+class GStore;
+class GZExtLoad;
class MachineIRBuilder;
class MachineInstrBuilder;
class MachineRegisterInfo;
@@ -124,10 +128,20 @@ public:
const TargetLowering &getTargetLowering() const;
+ /// \returns true if the combiner is running pre-legalization.
+ bool isPreLegalize() const;
+
+ /// \returns true if \p Query is legal on the target.
+ bool isLegal(const LegalityQuery &Query) const;
+
/// \return true if the combine is running prior to legalization, or if \p
/// Query is legal on the target.
bool isLegalOrBeforeLegalizer(const LegalityQuery &Query) const;
+ /// \return true if the combine is running prior to legalization, or if \p Ty
+ /// is a legal integer constant type on the target.
+ bool isConstantLegalOrBeforeLegalizer(const LLT Ty) const;
+
/// MachineRegisterInfo::replaceRegWith() and inform the observer of the changes
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const;
@@ -529,6 +543,13 @@ public:
/// Combine G_UREM x, (known power of 2) to an add and bitmasking.
void applySimplifyURemByPow2(MachineInstr &MI);
+ /// Push a binary operator through a select on constants.
+ ///
+ /// binop (select cond, K0, K1), K2 ->
+ /// select cond, (binop K0, K2), (binop K1, K2)
+ bool matchFoldBinOpIntoSelect(MachineInstr &MI, unsigned &SelectOpNo);
+ bool applyFoldBinOpIntoSelect(MachineInstr &MI, const unsigned &SelectOpNo);
+
bool matchCombineInsertVecElts(MachineInstr &MI,
SmallVectorImpl<Register> &MatchInfo);
@@ -645,6 +666,14 @@ public:
/// (G_SMULO x, 2) -> (G_SADDO x, x)
bool matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo);
+ /// Match:
+ /// (G_*MULO x, 0) -> 0 + no carry out
+ bool matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo);
+
+ /// Match:
+ /// (G_*ADDO x, 0) -> x + no carry out
+ bool matchAddOBy0(MachineInstr &MI, BuildFnTy &MatchInfo);
+
/// Transform (fadd x, fneg(y)) -> (fsub x, y)
/// (fadd fneg(x), y) -> (fsub y, x)
/// (fsub x, fneg(y)) -> (fadd x, y)
@@ -702,6 +731,15 @@ public:
bool matchCombineFSubFpExtFNegFMulToFMadOrFMA(MachineInstr &MI,
BuildFnTy &MatchInfo);
+ /// Fold boolean selects to logical operations.
+ bool matchSelectToLogical(MachineInstr &MI, BuildFnTy &MatchInfo);
+
+ bool matchCombineFMinMaxNaN(MachineInstr &MI, unsigned &Info);
+
+ /// Transform G_ADD(x, G_SUB(y, x)) to y.
+ /// Transform G_ADD(G_SUB(y, x), x) to y.
+ bool matchAddSubSameReg(MachineInstr &MI, Register &Src);
+
private:
/// Given a non-indexed load or store instruction \p MI, find an offset that
/// can be usefully and legally folded into it as a post-indexing operation.
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GISelWorkList.h b/llvm/include/llvm/CodeGen/GlobalISel/GISelWorkList.h
index 7d198fada411..3ec6a1da201e 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GISelWorkList.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GISelWorkList.h
@@ -28,7 +28,7 @@ class GISelWorkList {
SmallVector<MachineInstr *, N> Worklist;
DenseMap<MachineInstr *, unsigned> WorklistMap;
-#ifndef NDEBUG
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
bool Finalized = true;
#endif
@@ -49,7 +49,7 @@ public:
// of most passes.
void deferred_insert(MachineInstr *I) {
Worklist.push_back(I);
-#ifndef NDEBUG
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
Finalized = false;
#endif
}
@@ -65,21 +65,25 @@ public:
for (unsigned i = 0; i < Worklist.size(); ++i)
if (!WorklistMap.try_emplace(Worklist[i], i).second)
llvm_unreachable("Duplicate elements in the list");
-#ifndef NDEBUG
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
Finalized = true;
#endif
}
/// Add the specified instruction to the worklist if it isn't already in it.
void insert(MachineInstr *I) {
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
assert(Finalized && "GISelWorkList used without finalizing");
+#endif
if (WorklistMap.try_emplace(I, Worklist.size()).second)
Worklist.push_back(I);
}
/// Remove I from the worklist if it exists.
void remove(const MachineInstr *I) {
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
assert((Finalized || WorklistMap.empty()) && "Neither finalized nor empty");
+#endif
auto It = WorklistMap.find(I);
if (It == WorklistMap.end())
return; // Not in worklist.
@@ -96,7 +100,9 @@ public:
}
MachineInstr *pop_back_val() {
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
assert(Finalized && "GISelWorkList used without finalizing");
+#endif
MachineInstr *I;
do {
I = Worklist.pop_back_val();
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index 7103656365b1..58fe48200e73 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -14,6 +14,7 @@
#ifndef LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
#define LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
+#include "llvm/IR/Instructions.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/TargetOpcodes.h"
@@ -226,6 +227,37 @@ public:
}
};
+/// Represent a G_ICMP or G_FCMP.
+class GAnyCmp : public GenericMachineInstr {
+public:
+ CmpInst::Predicate getCond() const {
+ return static_cast<CmpInst::Predicate>(getOperand(1).getPredicate());
+ }
+ Register getLHSReg() const { return getReg(2); }
+ Register getRHSReg() const { return getReg(3); }
+
+ static bool classof(const MachineInstr *MI) {
+ return MI->getOpcode() == TargetOpcode::G_ICMP ||
+ MI->getOpcode() == TargetOpcode::G_FCMP;
+ }
+};
+
+/// Represent a G_ICMP.
+class GICmp : public GAnyCmp {
+public:
+ static bool classof(const MachineInstr *MI) {
+ return MI->getOpcode() == TargetOpcode::G_ICMP;
+ }
+};
+
+/// Represent a G_FCMP.
+class GFCmp : public GAnyCmp {
+public:
+ static bool classof(const MachineInstr *MI) {
+ return MI->getOpcode() == TargetOpcode::G_FCMP;
+ }
+};
+
} // namespace llvm
#endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
index ebe16cd4f58c..5e7428a5edc5 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
@@ -22,11 +22,10 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
-#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/SwiftErrorValueTracking.h"
#include "llvm/CodeGen/SwitchLoweringUtils.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/CodeGen.h"
#include <memory>
@@ -248,12 +247,6 @@ private:
bool translateInlineAsm(const CallBase &CB, MachineIRBuilder &MIRBuilder);
- /// Returns true if the value should be split into multiple LLTs.
- /// If \p Offsets is given then the split type's offsets will be stored in it.
- /// If \p Offsets is not empty it will be cleared first.
- bool valueIsSplit(const Value &V,
- SmallVectorImpl<uint64_t> *Offsets = nullptr);
-
/// Common code for translating normal calls or invokes.
bool translateCallBase(const CallBase &CB, MachineIRBuilder &MIRBuilder);
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelect.h b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelect.h
index 4a72621ec61e..60c7694725a5 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelect.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelect.h
@@ -13,8 +13,10 @@
#ifndef LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECT_H
#define LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECT_H
-#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/CodeGen.h"
namespace llvm {
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
index 03f4f3bf0b19..8ea45e576e4d 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
@@ -18,12 +18,9 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/BlockFrequencyInfo.h"
-#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/Support/CodeGenCoverage.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/LowLevelTypeImpl.h"
#include <bitset>
#include <cstddef>
@@ -34,6 +31,10 @@
namespace llvm {
+class BlockFrequencyInfo;
+class CodeGenCoverage;
+class MachineBasicBlock;
+class ProfileSummaryInfo;
class APInt;
class APFloat;
class GISelKnownBits;
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h
index bc9f952146c2..c06b33d11170 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h
@@ -17,16 +17,17 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/CodeGenCoverage.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -673,7 +674,7 @@ bool InstructionSelector::executeMatchTable(
ComplexRendererFns Renderer =
(ISel.*ISelInfo.ComplexPredicates[ComplexPredicateID])(
State.MIs[InsnID]->getOperand(OpIdx));
- if (Renderer.hasValue())
+ if (Renderer)
State.Renderers[RendererID] = Renderer.getValue();
else
if (handleReject() == RejectAndGiveUp)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
index 38d2fe28063a..6802591b6350 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
@@ -24,10 +24,10 @@
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Register.h"
+#include "llvm/IR/Constants.h"
#include "llvm/Support/Debug.h"
#define DEBUG_TYPE "legalizer"
-using namespace llvm::MIPatternMatch;
namespace llvm {
class LegalizationArtifactCombiner {
@@ -56,6 +56,7 @@ public:
SmallVectorImpl<MachineInstr *> &DeadInsts,
SmallVectorImpl<Register> &UpdatedDefs,
GISelObserverWrapper &Observer) {
+ using namespace llvm::MIPatternMatch;
assert(MI.getOpcode() == TargetOpcode::G_ANYEXT);
Builder.setInstrAndDebugLoc(MI);
@@ -109,6 +110,7 @@ public:
SmallVectorImpl<MachineInstr *> &DeadInsts,
SmallVectorImpl<Register> &UpdatedDefs,
GISelObserverWrapper &Observer) {
+ using namespace llvm::MIPatternMatch;
assert(MI.getOpcode() == TargetOpcode::G_ZEXT);
Builder.setInstrAndDebugLoc(MI);
@@ -170,6 +172,7 @@ public:
bool tryCombineSExt(MachineInstr &MI,
SmallVectorImpl<MachineInstr *> &DeadInsts,
SmallVectorImpl<Register> &UpdatedDefs) {
+ using namespace llvm::MIPatternMatch;
assert(MI.getOpcode() == TargetOpcode::G_SEXT);
Builder.setInstrAndDebugLoc(MI);
@@ -227,6 +230,7 @@ public:
SmallVectorImpl<MachineInstr *> &DeadInsts,
SmallVectorImpl<Register> &UpdatedDefs,
GISelObserverWrapper &Observer) {
+ using namespace llvm::MIPatternMatch;
assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
Builder.setInstr(MI);
@@ -1281,6 +1285,8 @@ private:
/// Looks through copy instructions and returns the actual
/// source register.
Register lookThroughCopyInstrs(Register Reg) {
+ using namespace llvm::MIPatternMatch;
+
Register TmpReg;
while (mi_match(Reg, MRI, m_Copy(m_Reg(TmpReg)))) {
if (MRI.getType(TmpReg).isValid())
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h b/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h
index c19f1d5330ba..7884b3f2ea6e 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h
@@ -20,11 +20,17 @@
#ifndef LLVM_CODEGEN_GLOBALISEL_LEGALIZER_H
#define LLVM_CODEGEN_GLOBALISEL_LEGALIZER_H
-#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
namespace llvm {
+class LegalizerInfo;
+class MachineIRBuilder;
+class MachineInstr;
+class GISelChangeObserver;
class LostDebugLocObserver;
class Legalizer : public MachineFunctionPass {
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index 3b2f937375eb..c6c57ac07f0e 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -21,14 +21,22 @@
#define LLVM_CODEGEN_GLOBALISEL_LEGALIZERHELPER_H
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
-#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
-#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
-#include "llvm/CodeGen/LowLevelType.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
namespace llvm {
// Forward declarations.
+class APInt;
+class GAnyLoad;
+class GLoadStore;
+class GStore;
+class GenericMachineInstr;
+class MachineFunction;
+class MachineIRBuilder;
+class MachineInstr;
+class MachineInstrBuilder;
+struct MachinePointerInfo;
+template <typename T> class SmallVectorImpl;
class LegalizerInfo;
class MachineRegisterInfo;
class GISelChangeObserver;
@@ -159,10 +167,6 @@ public:
/// def by inserting a G_BITCAST from \p CastTy
void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx);
- /// Widen \p OrigReg to \p WideTy by merging to a wider type, padding with
- /// G_IMPLICIT_DEF, and producing dead results.
- Register widenWithUnmerge(LLT WideTy, Register OrigReg);
-
private:
LegalizeResult
widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, LLT WideTy);
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
index 17cb53dd2d5b..c0cad8ff675d 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
@@ -14,26 +14,26 @@
#ifndef LLVM_CODEGEN_GLOBALISEL_LEGALIZERINFO_H
#define LLVM_CODEGEN_GLOBALISEL_LEGALIZERINFO_H
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/LowLevelTypeImpl.h"
-#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstdint>
#include <tuple>
-#include <unordered_map>
#include <utility>
namespace llvm {
extern cl::opt<bool> DisableGISelLegalityCheck;
+class MachineFunction;
+class raw_ostream;
class LegalizerHelper;
class MachineInstr;
class MachineRegisterInfo;
@@ -327,8 +327,14 @@ LegalityPredicate largerThan(unsigned TypeIdx0, unsigned TypeIdx1);
/// index.
LegalityPredicate smallerThan(unsigned TypeIdx0, unsigned TypeIdx1);
-/// True iff the specified MMO index has a size that is not a power of 2
+/// True iff the specified MMO index has a size (rounded to bytes) that is not a
+/// power of 2.
LegalityPredicate memSizeInBytesNotPow2(unsigned MMOIdx);
+
+/// True iff the specified MMO index has a size that is not an even byte size,
+/// or that even byte size is not a power of 2.
+LegalityPredicate memSizeNotByteSizePow2(unsigned MMOIdx);
+
/// True iff the specified type index is a vector whose element count is not a
/// power of 2.
LegalityPredicate numElementsNotPow2(unsigned TypeIdx);
@@ -351,6 +357,14 @@ LegalizeMutation changeElementTo(unsigned TypeIdx, unsigned FromTypeIdx);
/// Keep the same scalar or element type as the given type.
LegalizeMutation changeElementTo(unsigned TypeIdx, LLT Ty);
+/// Keep the same scalar or element type as \p TypeIdx, but take the number of
+/// elements from \p FromTypeIdx.
+LegalizeMutation changeElementCountTo(unsigned TypeIdx, unsigned FromTypeIdx);
+
+/// Keep the same scalar or element type as \p TypeIdx, but take the number of
+/// elements from \p Ty.
+LegalizeMutation changeElementCountTo(unsigned TypeIdx, LLT Ty);
+
/// Change the scalar size or element size to have the same scalar size as type
/// index \p FromIndex. Unlike changeElementTo, this discards pointer types and
/// only changes the size.
@@ -800,11 +814,23 @@ public:
return actionIf(LegalizeAction::Unsupported,
LegalityPredicates::memSizeInBytesNotPow2(0));
}
+
+ /// Lower a memory operation if the memory size, rounded to bytes, is not a
+ /// power of 2. For example, this will not trigger for s1 or s7, but will for
+ /// s24.
LegalizeRuleSet &lowerIfMemSizeNotPow2() {
return actionIf(LegalizeAction::Lower,
LegalityPredicates::memSizeInBytesNotPow2(0));
}
+ /// Lower a memory operation if the memory access size is not a round power of
+ /// 2 byte size. This is stricter than lowerIfMemSizeNotPow2, and more likely
+ /// what you want (e.g. this will lower s1, s7 and s24).
+ LegalizeRuleSet &lowerIfMemSizeNotByteSizePow2() {
+ return actionIf(LegalizeAction::Lower,
+ LegalityPredicates::memSizeNotByteSizePow2(0));
+ }
+
LegalizeRuleSet &customIf(LegalityPredicate Predicate) {
// We have no choice but conservatively assume that a custom action with a
// free-form user provided Predicate properly handles all type indices:
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h b/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h
index 0845c001abdb..6efe7c7c9bbd 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h
@@ -17,18 +17,19 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
-#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
-#include "llvm/CodeGen/GlobalISel/Utils.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
namespace llvm {
// Forward declarations.
+class AnalysisUsage;
+class GStore;
+class LegalizerInfo;
+class MachineBasicBlock;
+class MachineInstr;
+class TargetLowering;
+struct LegalityQuery;
class MachineRegisterInfo;
namespace GISelAddressing {
/// Helper struct to store a base, index and offset that forms an address
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h b/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h
index 1d1afff7f934..9ea0d095eeb1 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h
@@ -22,11 +22,14 @@
#define LLVM_CODEGEN_GLOBALISEL_LOCALIZER_H
#include "llvm/ADT/SetVector.h"
-#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
namespace llvm {
// Forward declarations.
+class AnalysisUsage;
+class MachineBasicBlock;
+class MachineInstr;
+class MachineOperand;
class MachineRegisterInfo;
class TargetTransformInfo;
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
index daf1ff052983..1cacf96620f0 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
@@ -94,6 +94,48 @@ inline ConstantMatch<int64_t> m_ICst(int64_t &Cst) {
return ConstantMatch<int64_t>(Cst);
}
+template <typename ConstT>
+inline Optional<ConstT> matchConstantSplat(Register,
+ const MachineRegisterInfo &);
+
+template <>
+inline Optional<APInt> matchConstantSplat(Register Reg,
+ const MachineRegisterInfo &MRI) {
+ return getIConstantSplatVal(Reg, MRI);
+}
+
+template <>
+inline Optional<int64_t> matchConstantSplat(Register Reg,
+ const MachineRegisterInfo &MRI) {
+ return getIConstantSplatSExtVal(Reg, MRI);
+}
+
+template <typename ConstT> struct ICstOrSplatMatch {
+ ConstT &CR;
+ ICstOrSplatMatch(ConstT &C) : CR(C) {}
+ bool match(const MachineRegisterInfo &MRI, Register Reg) {
+ if (auto MaybeCst = matchConstant<ConstT>(Reg, MRI)) {
+ CR = *MaybeCst;
+ return true;
+ }
+
+ if (auto MaybeCstSplat = matchConstantSplat<ConstT>(Reg, MRI)) {
+ CR = *MaybeCstSplat;
+ return true;
+ }
+
+ return false;
+ };
+};
+
+inline ICstOrSplatMatch<APInt> m_ICstOrSplat(APInt &Cst) {
+ return ICstOrSplatMatch<APInt>(Cst);
+}
+
+inline ICstOrSplatMatch<int64_t> m_ICstOrSplat(int64_t &Cst) {
+ return ICstOrSplatMatch<int64_t>(Cst);
+}
+
struct GCstAndRegMatch {
Optional<ValueAndVReg> &ValReg;
GCstAndRegMatch(Optional<ValueAndVReg> &ValReg) : ValReg(ValReg) {}
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index c4c2fc076dd8..16ba568c1be9 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -13,19 +13,26 @@
#ifndef LLVM_CODEGEN_GLOBALISEL_MACHINEIRBUILDER_H
#define LLVM_CODEGEN_GLOBALISEL_MACHINEIRBUILDER_H
-#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
-#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
-#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Module.h"
namespace llvm {
// Forward declarations.
+class APInt;
+class BlockAddress;
+class Constant;
+class ConstantFP;
+class ConstantInt;
+class DataLayout;
+class GISelCSEInfo;
+class GlobalValue;
+class TargetRegisterClass;
class MachineFunction;
class MachineInstr;
class TargetInstrInfo;
@@ -942,22 +949,6 @@ public:
/// Build and insert \p Res = IMPLICIT_DEF.
MachineInstrBuilder buildUndef(const DstOp &Res);
- /// Build and insert instructions to put \p Ops together at the specified p
- /// Indices to form a larger register.
- ///
- /// If the types of the input registers are uniform and cover the entirity of
- /// \p Res then a G_MERGE_VALUES will be produced. Otherwise an IMPLICIT_DEF
- /// followed by a sequence of G_INSERT instructions.
- ///
- /// \pre setBasicBlock or setMI must have been called.
- /// \pre The final element of the sequence must not extend past the end of the
- /// destination register.
- /// \pre The bits defined by each Op (derived from index and scalar size) must
- /// not overlap.
- /// \pre \p Indices must be in ascending order of bit position.
- void buildSequence(Register Res, ArrayRef<Register> Ops,
- ArrayRef<uint64_t> Indices);
-
/// Build and insert \p Res = G_MERGE_VALUES \p Op0, ...
///
/// G_MERGE_VALUES combines the input elements contiguously into a larger
@@ -1001,6 +992,11 @@ public:
MachineInstrBuilder buildBuildVector(const DstOp &Res,
ArrayRef<Register> Ops);
+ /// Build and insert \p Res = G_BUILD_VECTOR \p Op0, ... where each OpN is
+ /// built with G_CONSTANT.
+ MachineInstrBuilder buildBuildVectorConstant(const DstOp &Res,
+ ArrayRef<APInt> Ops);
+
/// Build and insert \p Res = G_BUILD_VECTOR with \p Src replicated to fill
/// the number of elements
MachineInstrBuilder buildSplatVector(const DstOp &Res,
@@ -1442,8 +1438,8 @@ public:
/// Build and insert \p Res = G_SUB \p Op0, \p Op1
///
- /// G_SUB sets \p Res to the sum of integer parameters \p Op0 and \p Op1,
- /// truncated to their width.
+ /// G_SUB sets \p Res to the difference of integer parameters \p Op0 and
+ /// \p Op1, truncated to their width.
///
/// \pre setBasicBlock or setMI must have been called.
/// \pre \p Res, \p Op0 and \p Op1 must be generic virtual registers
@@ -1459,7 +1455,7 @@ public:
/// Build and insert \p Res = G_MUL \p Op0, \p Op1
///
- /// G_MUL sets \p Res to the sum of integer parameters \p Op0 and \p Op1,
+ /// G_MUL sets \p Res to the product of integer parameters \p Op0 and \p Op1,
/// truncated to their width.
///
/// \pre setBasicBlock or setMI must have been called.
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/RegBankSelect.h b/llvm/include/llvm/CodeGen/GlobalISel/RegBankSelect.h
index 45006eecfce6..d0918485249d 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/RegBankSelect.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/RegBankSelect.h
@@ -66,10 +66,10 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
#include <cassert>
#include <cstdint>
#include <memory>
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index aed915d2cc4b..78f1b49da822 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -15,18 +15,20 @@
#define LLVM_CODEGEN_GLOBALISEL_UTILS_H
#include "GISelWorkList.h"
-#include "LostDebugLocObserver.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/Register.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/Support/Alignment.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/LowLevelTypeImpl.h"
#include <cstdint>
namespace llvm {
class AnalysisUsage;
+class LostDebugLocObserver;
+class MachineBasicBlock;
class BlockFrequencyInfo;
class GISelKnownBits;
class MachineFunction;
@@ -267,13 +269,10 @@ Optional<APFloat> ConstantFoldFPBinOp(unsigned Opcode, const Register Op1,
const MachineRegisterInfo &MRI);
/// Tries to constant fold a vector binop with sources \p Op1 and \p Op2.
-/// If successful, returns the G_BUILD_VECTOR representing the folded vector
-/// constant. \p MIB should have an insertion point already set to create new
-/// G_CONSTANT instructions as needed.
-Register ConstantFoldVectorBinop(unsigned Opcode, const Register Op1,
- const Register Op2,
- const MachineRegisterInfo &MRI,
- MachineIRBuilder &MIB);
+/// Returns an empty vector on failure.
+SmallVector<APInt> ConstantFoldVectorBinop(unsigned Opcode, const Register Op1,
+ const Register Op2,
+ const MachineRegisterInfo &MRI);
Optional<APInt> ConstantFoldExtOp(unsigned Opcode, const Register Op1,
uint64_t Imm, const MachineRegisterInfo &MRI);
@@ -374,9 +373,23 @@ public:
/// If \p MI is not a splat, returns None.
Optional<int> getSplatIndex(MachineInstr &MI);
-/// Returns a scalar constant of a G_BUILD_VECTOR splat if it exists.
-Optional<int64_t> getBuildVectorConstantSplat(const MachineInstr &MI,
- const MachineRegisterInfo &MRI);
+/// \returns the scalar integral splat value of \p Reg if possible.
+Optional<APInt> getIConstantSplatVal(const Register Reg,
+ const MachineRegisterInfo &MRI);
+
+/// \returns the scalar integral splat value defined by \p MI if possible.
+Optional<APInt> getIConstantSplatVal(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI);
+
+/// \returns the scalar sign extended integral splat value of \p Reg if
+/// possible.
+Optional<int64_t> getIConstantSplatSExtVal(const Register Reg,
+ const MachineRegisterInfo &MRI);
+
+/// \returns the scalar sign extended integral splat value defined by \p MI if
+/// possible.
+Optional<int64_t> getIConstantSplatSExtVal(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI);
/// Returns a floating point scalar constant of a build vector splat if it
/// exists. When \p AllowUndef == true some elements can be undef but not all.
@@ -408,6 +421,30 @@ bool isBuildVectorAllOnes(const MachineInstr &MI,
const MachineRegisterInfo &MRI,
bool AllowUndef = false);
+/// Return true if the specified instruction is known to be a constant, or a
+/// vector of constants.
+///
+/// If \p AllowFP is true, this will consider G_FCONSTANT in addition to
+/// G_CONSTANT. If \p AllowOpaqueConstants is true, constant-like instructions
+/// such as G_GLOBAL_VALUE will also be considered.
+bool isConstantOrConstantVector(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ bool AllowFP = true,
+ bool AllowOpaqueConstants = true);
+
+/// Return true if the value is a constant 0 integer or a splatted vector of a
+/// constant 0 integer (with no undefs if \p AllowUndefs is false). This will
+/// handle G_BUILD_VECTOR and G_BUILD_VECTOR_TRUNC as truncation is not an issue
+/// for null values.
+bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI,
+ bool AllowUndefs = false);
+
+/// Return true if the value is a constant -1 integer or a splatted vector of a
+/// constant -1 integer (with no undefs if \p AllowUndefs is false).
+bool isAllOnesOrAllOnesSplat(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ bool AllowUndefs = false);
+
/// \returns a value when \p MI is a vector splat. The splat can be either a
/// Register or a constant.
///
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index b07c7cd3db3a..120f89952a95 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -281,12 +281,25 @@ enum NodeType {
/// Carry-using nodes for multiple precision addition and subtraction.
/// These nodes take three operands: The first two are the normal lhs and
- /// rhs to the add or sub, and the third is a boolean indicating if there
- /// is an incoming carry. These nodes produce two results: the normal
- /// result of the add or sub, and the output carry so they can be chained
- /// together. The use of this opcode is preferable to adde/sube if the
- /// target supports it, as the carry is a regular value rather than a
- /// glue, which allows further optimisation.
+ /// rhs to the add or sub, and the third is a boolean value that is 1 if and
+ /// only if there is an incoming carry/borrow. These nodes produce two
+ /// results: the normal result of the add or sub, and a boolean value that is
+ /// 1 if and only if there is an outgoing carry/borrow.
+ ///
+ /// Care must be taken if these opcodes are lowered to hardware instructions
+ /// that use the inverse logic -- 0 if and only if there is an
+ /// incoming/outgoing carry/borrow. In such cases, you must preserve the
+ /// semantics of these opcodes by inverting the incoming carry/borrow, feeding
+ /// it to the add/sub hardware instruction, and then inverting the outgoing
+ /// carry/borrow.
+ ///
+ /// The use of these opcodes is preferable to adde/sube if the target supports
+ /// it, as the carry is a regular value rather than a glue, which allows
+ /// further optimisation.
+ ///
+ /// These opcodes are different from [US]{ADD,SUB}O in that ADDCARRY/SUBCARRY
+ /// consume and produce a carry/borrow, whereas [US]{ADD,SUB}O produce an
+ /// overflow.
ADDCARRY,
SUBCARRY,
@@ -294,7 +307,7 @@ enum NodeType {
/// subtraction. These nodes take three operands: The first two are normal lhs
/// and rhs to the add or sub, and the third is a boolean indicating if there
/// is an incoming carry. They produce two results: the normal result of the
- /// add or sub, and a boolean that indicates if an overflow occured (*not*
+ /// add or sub, and a boolean that indicates if an overflow occurred (*not*
/// flag, because it may be a store to memory, etc.). If the type of the
/// boolean is not i1 then the high bits conform to getBooleanContents.
SADDO_CARRY,
@@ -462,6 +475,9 @@ enum NodeType {
STRICT_FSETCC,
STRICT_FSETCCS,
+ // FPTRUNC_ROUND - This corresponds to the fptrunc_round intrinsic.
+ FPTRUNC_ROUND,
+
/// FMA - Perform a * b + c with no intermediate rounding step.
FMA,
@@ -482,6 +498,13 @@ enum NodeType {
/// Returns platform specific canonical encoding of a floating point number.
FCANONICALIZE,
+ /// Performs a check of floating point class property, defined by IEEE-754.
+ /// The first operand is the floating point value to check. The second operand
+ /// specifies the checked property and is a TargetConstant which specifies
+ /// test in the same way as intrinsic 'is_fpclass'.
+ /// Returns boolean value.
+ IS_FPCLASS,
+
/// BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector
/// with the specified, possibly variable, elements. The types of the
/// operands must match the vector element type, except that integer types
@@ -614,6 +637,17 @@ enum NodeType {
MULHU,
MULHS,
+ /// AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of
+ /// type i[N+1], halving the result by shifting it one bit right.
+ /// shr(add(ext(X), ext(Y)), 1)
+ AVGFLOORS,
+ AVGFLOORU,
+ /// AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an
+ /// integer of type i[N+2], add 1 and halve the result by shifting it one bit
+ /// right. shr(add(ext(X), ext(Y), 1), 1)
+ AVGCEILS,
+ AVGCEILU,
+
// ABDS/ABDU - Absolute difference - Return the absolute difference between
// two numbers interpreted as signed/unsigned.
// i.e trunc(abs(sext(Op0) - sext(Op1))) becomes abds(Op0, Op1)
@@ -864,6 +898,13 @@ enum NodeType {
STRICT_FP16_TO_FP,
STRICT_FP_TO_FP16,
+ /// BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions
+ /// and truncation for bfloat16. These nodes form a semi-softened interface
+ /// for dealing with bf16 (as an i16), which is often a storage-only type but
+ /// has native conversions.
+ BF16_TO_FP,
+ FP_TO_BF16,
+
/// Perform various unary floating-point operations inspired by libm. For
/// FPOWI, the result is undefined if if the integer operand doesn't fit into
/// sizeof(int).
@@ -1324,18 +1365,18 @@ static const int LAST_INDEXED_MODE = POST_DEC + 1;
/// MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's
/// index parameter when calculating addresses.
///
-/// SIGNED_SCALED Addr = Base + ((signed)Index * sizeof(element))
-/// SIGNED_UNSCALED Addr = Base + (signed)Index
-/// UNSIGNED_SCALED Addr = Base + ((unsigned)Index * sizeof(element))
-/// UNSIGNED_UNSCALED Addr = Base + (unsigned)Index
-enum MemIndexType {
- SIGNED_SCALED = 0,
- SIGNED_UNSCALED,
- UNSIGNED_SCALED,
- UNSIGNED_UNSCALED
-};
+/// SIGNED_SCALED Addr = Base + ((signed)Index * Scale)
+/// UNSIGNED_SCALED Addr = Base + ((unsigned)Index * Scale)
+///
+/// NOTE: The value of Scale is typically only known to the node owning the
+/// IndexType, with a value of 1 the equivalent of being unscaled.
+enum MemIndexType { SIGNED_SCALED = 0, UNSIGNED_SCALED };
-static const int LAST_MEM_INDEX_TYPE = UNSIGNED_UNSCALED + 1;
+static const int LAST_MEM_INDEX_TYPE = UNSIGNED_SCALED + 1;
+
+inline bool isIndexTypeSigned(MemIndexType IndexType) {
+ return IndexType == SIGNED_SCALED;
+}
//===--------------------------------------------------------------------===//
/// LoadExtType enum - This enum defines the three variants of LOADEXT
diff --git a/llvm/include/llvm/CodeGen/IntrinsicLowering.h b/llvm/include/llvm/CodeGen/IntrinsicLowering.h
index 06512f2dc560..0b327a34ca09 100644
--- a/llvm/include/llvm/CodeGen/IntrinsicLowering.h
+++ b/llvm/include/llvm/CodeGen/IntrinsicLowering.h
@@ -15,8 +15,6 @@
#ifndef LLVM_CODEGEN_INTRINSICLOWERING_H
#define LLVM_CODEGEN_INTRINSICLOWERING_H
-#include "llvm/IR/Intrinsics.h"
-
namespace llvm {
class CallInst;
class DataLayout;
diff --git a/llvm/include/llvm/CodeGen/LazyMachineBlockFrequencyInfo.h b/llvm/include/llvm/CodeGen/LazyMachineBlockFrequencyInfo.h
index c692dbc2199e..e5794966ce63 100644
--- a/llvm/include/llvm/CodeGen/LazyMachineBlockFrequencyInfo.h
+++ b/llvm/include/llvm/CodeGen/LazyMachineBlockFrequencyInfo.h
@@ -17,8 +17,8 @@
#define LLVM_CODEGEN_LAZYMACHINEBLOCKFREQUENCYINFO_H
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
-#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
namespace llvm {
diff --git a/llvm/include/llvm/CodeGen/LiveInterval.h b/llvm/include/llvm/CodeGen/LiveInterval.h
index 51ffe2807434..92e35c9a4ab9 100644
--- a/llvm/include/llvm/CodeGen/LiveInterval.h
+++ b/llvm/include/llvm/CodeGen/LiveInterval.h
@@ -227,6 +227,14 @@ namespace llvm {
const_vni_iterator vni_begin() const { return valnos.begin(); }
const_vni_iterator vni_end() const { return valnos.end(); }
+ iterator_range<vni_iterator> vnis() {
+ return make_range(vni_begin(), vni_end());
+ }
+
+ iterator_range<const_vni_iterator> vnis() const {
+ return make_range(vni_begin(), vni_end());
+ }
+
/// Constructs a new LiveRange object.
LiveRange(bool UseSegmentSet = false)
: segmentSet(UseSegmentSet ? std::make_unique<SegmentSet>()
@@ -625,10 +633,8 @@ namespace llvm {
// if the Seg is lower find first segment that is above Idx using binary
// search
if (Seg->end <= *Idx) {
- Seg = std::upper_bound(
- ++Seg, EndSeg, *Idx,
- [=](std::remove_reference_t<decltype(*Idx)> V,
- const std::remove_reference_t<decltype(*Seg)> &S) {
+ Seg =
+ std::upper_bound(++Seg, EndSeg, *Idx, [=](auto V, const auto &S) {
return V < S.end;
});
if (Seg == EndSeg)
diff --git a/llvm/include/llvm/CodeGen/LiveIntervalUnion.h b/llvm/include/llvm/CodeGen/LiveIntervalUnion.h
index 3b6a4a379d72..81003455da42 100644
--- a/llvm/include/llvm/CodeGen/LiveIntervalUnion.h
+++ b/llvm/include/llvm/CodeGen/LiveIntervalUnion.h
@@ -43,7 +43,7 @@ class LiveIntervalUnion {
// A set of live virtual register segments that supports fast insertion,
// intersection, and removal.
// Mapping SlotIndex intervals to virtual register numbers.
- using LiveSegments = IntervalMap<SlotIndex, LiveInterval*>;
+ using LiveSegments = IntervalMap<SlotIndex, const LiveInterval *>;
public:
// SegmentIter can advance to the next segment ordered by starting position
@@ -88,10 +88,10 @@ public:
bool changedSince(unsigned tag) const { return tag != Tag; }
// Add a live virtual register to this union and merge its segments.
- void unify(LiveInterval &VirtReg, const LiveRange &Range);
+ void unify(const LiveInterval &VirtReg, const LiveRange &Range);
// Remove a live virtual register's segments from this union.
- void extract(LiveInterval &VirtReg, const LiveRange &Range);
+ void extract(const LiveInterval &VirtReg, const LiveRange &Range);
// Remove all inserted virtual registers.
void clear() { Segments.clear(); ++Tag; }
@@ -105,7 +105,7 @@ public:
#endif
// Get any virtual register that is assign to this physical unit
- LiveInterval *getOneVReg() const;
+ const LiveInterval *getOneVReg() const;
/// Query interferences between a single live virtual register and a live
/// interval union.
@@ -114,7 +114,7 @@ public:
const LiveRange *LR = nullptr;
LiveRange::const_iterator LRI; ///< current position in LR
ConstSegmentIter LiveUnionI; ///< current position in LiveUnion
- SmallVector<LiveInterval *, 4> InterferingVRegs;
+ SmallVector<const LiveInterval *, 4> InterferingVRegs;
bool CheckedFirstInterference = false;
bool SeenAllInterferences = false;
unsigned Tag = 0;
@@ -125,7 +125,7 @@ public:
unsigned collectInterferingVRegs(unsigned MaxInterferingRegs);
// Was this virtual register visited during collectInterferingVRegs?
- bool isSeenInterference(LiveInterval *VirtReg) const;
+ bool isSeenInterference(const LiveInterval *VirtReg) const;
public:
Query() = default;
@@ -159,7 +159,7 @@ public:
bool checkInterference() { return collectInterferingVRegs(1); }
// Vector generated by collectInterferingVRegs.
- const SmallVectorImpl<LiveInterval *> &interferingVRegs(
+ const SmallVectorImpl<const LiveInterval *> &interferingVRegs(
unsigned MaxInterferingRegs = std::numeric_limits<unsigned>::max()) {
if (!SeenAllInterferences || MaxInterferingRegs < InterferingVRegs.size())
collectInterferingVRegs(MaxInterferingRegs);
diff --git a/llvm/include/llvm/CodeGen/LiveIntervals.h b/llvm/include/llvm/CodeGen/LiveIntervals.h
index fa08166791b0..b832eaa37305 100644
--- a/llvm/include/llvm/CodeGen/LiveIntervals.h
+++ b/llvm/include/llvm/CodeGen/LiveIntervals.h
@@ -374,7 +374,7 @@ class VirtRegMap;
///
/// Returns false if \p LI doesn't cross any register mask instructions. In
/// that case, the bit vector is not filled in.
- bool checkRegMaskInterference(LiveInterval &LI,
+ bool checkRegMaskInterference(const LiveInterval &LI,
BitVector &UsableRegs);
// Register unit functions.
diff --git a/llvm/include/llvm/CodeGen/LivePhysRegs.h b/llvm/include/llvm/CodeGen/LivePhysRegs.h
index 99ba1a28c934..27285d63aa83 100644
--- a/llvm/include/llvm/CodeGen/LivePhysRegs.h
+++ b/llvm/include/llvm/CodeGen/LivePhysRegs.h
@@ -32,6 +32,7 @@
#include "llvm/ADT/SparseSet.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/MC/MCRegister.h"
#include "llvm/MC/MCRegisterInfo.h"
#include <cassert>
#include <utility>
@@ -39,6 +40,7 @@
namespace llvm {
class MachineInstr;
+class MachineFunction;
class MachineOperand;
class MachineRegisterInfo;
class raw_ostream;
diff --git a/llvm/include/llvm/CodeGen/LiveRangeCalc.h b/llvm/include/llvm/CodeGen/LiveRangeCalc.h
index 31efd6e37e01..895ecff18f89 100644
--- a/llvm/include/llvm/CodeGen/LiveRangeCalc.h
+++ b/llvm/include/llvm/CodeGen/LiveRangeCalc.h
@@ -31,7 +31,6 @@
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/SlotIndexes.h"
-#include "llvm/MC/LaneBitmask.h"
#include <utility>
namespace llvm {
diff --git a/llvm/include/llvm/CodeGen/LiveRangeEdit.h b/llvm/include/llvm/CodeGen/LiveRangeEdit.h
index d80522f5bdac..c6efa7b30d71 100644
--- a/llvm/include/llvm/CodeGen/LiveRangeEdit.h
+++ b/llvm/include/llvm/CodeGen/LiveRangeEdit.h
@@ -66,7 +66,7 @@ public:
};
private:
- LiveInterval *Parent;
+ const LiveInterval *const Parent;
SmallVectorImpl<Register> &NewRegs;
MachineRegisterInfo &MRI;
LiveIntervals &LIS;
@@ -129,7 +129,7 @@ public:
/// be done. This could be the case if called before Regalloc.
/// @param deadRemats The collection of all the instructions defining an
/// original reg and are dead after remat.
- LiveRangeEdit(LiveInterval *parent, SmallVectorImpl<Register> &newRegs,
+ LiveRangeEdit(const LiveInterval *parent, SmallVectorImpl<Register> &newRegs,
MachineFunction &MF, LiveIntervals &lis, VirtRegMap *vrm,
Delegate *delegate = nullptr,
SmallPtrSet<MachineInstr *, 32> *deadRemats = nullptr)
@@ -141,7 +141,7 @@ public:
~LiveRangeEdit() override { MRI.resetDelegate(this); }
- LiveInterval &getParent() const {
+ const LiveInterval &getParent() const {
assert(Parent && "No parent LiveInterval");
return *Parent;
}
@@ -193,11 +193,11 @@ public:
/// Remat - Information needed to rematerialize at a specific location.
struct Remat {
- VNInfo *ParentVNI; // parent_'s value at the remat location.
+ const VNInfo *const ParentVNI; // parent_'s value at the remat location.
MachineInstr *OrigMI = nullptr; // Instruction defining OrigVNI. It contains
// the real expr for remat.
- explicit Remat(VNInfo *ParentVNI) : ParentVNI(ParentVNI) {}
+ explicit Remat(const VNInfo *ParentVNI) : ParentVNI(ParentVNI) {}
};
/// allUsesAvailableAt - Return true if all registers used by OrigMI at
diff --git a/llvm/include/llvm/CodeGen/LiveRegMatrix.h b/llvm/include/llvm/CodeGen/LiveRegMatrix.h
index fc67bce329ab..9e28e4d243c2 100644
--- a/llvm/include/llvm/CodeGen/LiveRegMatrix.h
+++ b/llvm/include/llvm/CodeGen/LiveRegMatrix.h
@@ -104,7 +104,8 @@ public:
/// If this function returns IK_Free, it is legal to assign(VirtReg, PhysReg).
/// When there is more than one kind of interference, the InterferenceKind
/// with the highest enum value is returned.
- InterferenceKind checkInterference(LiveInterval &VirtReg, MCRegister PhysReg);
+ InterferenceKind checkInterference(const LiveInterval &VirtReg,
+ MCRegister PhysReg);
/// Check for interference in the segment [Start, End) that may prevent
/// assignment to PhysReg. If this function returns true, there is
@@ -116,12 +117,12 @@ public:
/// Assign VirtReg to PhysReg.
/// This will mark VirtReg's live range as occupied in the LiveRegMatrix and
/// update VirtRegMap. The live range is expected to be available in PhysReg.
- void assign(LiveInterval &VirtReg, MCRegister PhysReg);
+ void assign(const LiveInterval &VirtReg, MCRegister PhysReg);
/// Unassign VirtReg from its PhysReg.
/// Assuming that VirtReg was previously assigned to a PhysReg, this undoes
/// the assignment and updates VirtRegMap accordingly.
- void unassign(LiveInterval &VirtReg);
+ void unassign(const LiveInterval &VirtReg);
/// Returns true if the given \p PhysReg has any live intervals assigned.
bool isPhysRegUsed(MCRegister PhysReg) const;
@@ -136,13 +137,14 @@ public:
/// Check for regmask interference only.
/// Return true if VirtReg crosses a regmask operand that clobbers PhysReg.
/// If PhysReg is null, check if VirtReg crosses any regmask operands.
- bool checkRegMaskInterference(LiveInterval &VirtReg,
+ bool checkRegMaskInterference(const LiveInterval &VirtReg,
MCRegister PhysReg = MCRegister::NoRegister);
/// Check for regunit interference only.
/// Return true if VirtReg overlaps a fixed assignment of one of PhysRegs's
/// register units.
- bool checkRegUnitInterference(LiveInterval &VirtReg, MCRegister PhysReg);
+ bool checkRegUnitInterference(const LiveInterval &VirtReg,
+ MCRegister PhysReg);
/// Query a line of the assigned virtual register matrix directly.
/// Use MCRegUnitIterator to enumerate all regunits in the desired PhysReg.
diff --git a/llvm/include/llvm/CodeGen/LiveStacks.h b/llvm/include/llvm/CodeGen/LiveStacks.h
index 1cbdb8bd86bd..26f30fb4d088 100644
--- a/llvm/include/llvm/CodeGen/LiveStacks.h
+++ b/llvm/include/llvm/CodeGen/LiveStacks.h
@@ -18,13 +18,17 @@
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
#include <cassert>
#include <map>
#include <unordered_map>
namespace llvm {
+class AnalysisUsage;
+class MachineFunction;
+class Module;
+class raw_ostream;
class TargetRegisterClass;
class TargetRegisterInfo;
diff --git a/llvm/include/llvm/CodeGen/LiveVariables.h b/llvm/include/llvm/CodeGen/LiveVariables.h
index dee316677b25..aa198527415d 100644
--- a/llvm/include/llvm/CodeGen/LiveVariables.h
+++ b/llvm/include/llvm/CodeGen/LiveVariables.h
@@ -37,6 +37,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/InitializePasses.h"
+#include "llvm/PassRegistry.h"
namespace llvm {
diff --git a/llvm/include/llvm/CodeGen/MIRFSDiscriminator.h b/llvm/include/llvm/CodeGen/MIRFSDiscriminator.h
index deb6b37a9bcf..3bbcfd63e3aa 100644
--- a/llvm/include/llvm/CodeGen/MIRFSDiscriminator.h
+++ b/llvm/include/llvm/CodeGen/MIRFSDiscriminator.h
@@ -17,29 +17,16 @@
#ifndef LLVM_CODEGEN_MIRFSDISCRIMINATOR_H
#define LLVM_CODEGEN_MIRFSDISCRIMINATOR_H
-#include "llvm/Analysis/ProfileSummaryInfo.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
-#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
-#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
-#include "llvm/CodeGen/MachinePostDominators.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Module.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/ProfileData/InstrProf.h"
-#include "llvm/ProfileData/SampleProf.h"
-#include "llvm/ProfileData/SampleProfReader.h"
+#include "llvm/Support/Discriminator.h"
#include <cassert>
+#include <cstdint>
namespace llvm {
+class MachineFunction;
using namespace sampleprof;
class MIRAddFSDiscriminators : public MachineFunctionPass {
diff --git a/llvm/include/llvm/CodeGen/MIRParser/MIRParser.h b/llvm/include/llvm/CodeGen/MIRParser/MIRParser.h
index a7c69e2d43ef..aa9891a80a32 100644
--- a/llvm/include/llvm/CodeGen/MIRParser/MIRParser.h
+++ b/llvm/include/llvm/CodeGen/MIRParser/MIRParser.h
@@ -17,13 +17,20 @@
#ifndef LLVM_CODEGEN_MIRPARSER_MIRPARSER_H
#define LLVM_CODEGEN_MIRPARSER_MIRPARSER_H
-#include "llvm/IR/Module.h"
-#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLForwardCompat.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include <functional>
#include <memory>
namespace llvm {
class Function;
+class LLVMContext;
+class MemoryBuffer;
+class Module;
class MIRParserImpl;
class MachineModuleInfo;
class SMDiagnostic;
diff --git a/llvm/include/llvm/CodeGen/MIRSampleProfile.h b/llvm/include/llvm/CodeGen/MIRSampleProfile.h
index 2503524ccfdf..f54c4b5891be 100644
--- a/llvm/include/llvm/CodeGen/MIRSampleProfile.h
+++ b/llvm/include/llvm/CodeGen/MIRSampleProfile.h
@@ -14,29 +14,17 @@
#ifndef LLVM_CODEGEN_MIRSAMPLEPROFILE_H
#define LLVM_CODEGEN_MIRSAMPLEPROFILE_H
-#include "llvm/Analysis/ProfileSummaryInfo.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
-#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
-#include "llvm/CodeGen/MachinePostDominators.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Module.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/ProfileData/InstrProf.h"
-#include "llvm/ProfileData/SampleProf.h"
-#include "llvm/ProfileData/SampleProfReader.h"
-
-#include <cassert>
+#include "llvm/Support/Discriminator.h"
+#include <memory>
+#include <string>
namespace llvm {
+class AnalysisUsage;
+class MachineBlockFrequencyInfo;
+class MachineFunction;
+class Module;
using namespace sampleprof;
diff --git a/llvm/include/llvm/CodeGen/MIRYamlMapping.h b/llvm/include/llvm/CodeGen/MIRYamlMapping.h
index 02eb5d24271d..25247437b641 100644
--- a/llvm/include/llvm/CodeGen/MIRYamlMapping.h
+++ b/llvm/include/llvm/CodeGen/MIRYamlMapping.h
@@ -605,7 +605,7 @@ struct MachineFrameInfo {
bool AdjustsStack = false;
bool HasCalls = false;
StringValue StackProtector;
- // TODO: Serialize FunctionContextIdx
+ StringValue FunctionContext;
unsigned MaxCallFrameSize = ~0u; ///< ~0u means: not computed yet.
unsigned CVBytesOfCalleeSavedRegisters = 0;
bool HasOpaqueSPAdjustment = false;
@@ -626,6 +626,7 @@ struct MachineFrameInfo {
MaxAlignment == Other.MaxAlignment &&
AdjustsStack == Other.AdjustsStack && HasCalls == Other.HasCalls &&
StackProtector == Other.StackProtector &&
+ FunctionContext == Other.FunctionContext &&
MaxCallFrameSize == Other.MaxCallFrameSize &&
CVBytesOfCalleeSavedRegisters ==
Other.CVBytesOfCalleeSavedRegisters &&
@@ -651,6 +652,8 @@ template <> struct MappingTraits<MachineFrameInfo> {
YamlIO.mapOptional("hasCalls", MFI.HasCalls, false);
YamlIO.mapOptional("stackProtector", MFI.StackProtector,
StringValue()); // Don't print it out when it's empty.
+ YamlIO.mapOptional("functionContext", MFI.FunctionContext,
+ StringValue()); // Don't print it out when it's empty.
YamlIO.mapOptional("maxCallFrameSize", MFI.MaxCallFrameSize, (unsigned)~0);
YamlIO.mapOptional("cvBytesOfCalleeSavedRegisters",
MFI.CVBytesOfCalleeSavedRegisters, 0U);
@@ -694,6 +697,13 @@ struct MachineFunction {
// Register information
bool TracksRegLiveness = false;
bool HasWinCFI = false;
+
+ bool CallsEHReturn = false;
+ bool CallsUnwindInit = false;
+ bool HasEHCatchret = false;
+ bool HasEHScopes = false;
+ bool HasEHFunclets = false;
+
bool FailsVerification = false;
bool TracksDebugUserValues = false;
std::vector<VirtualRegisterDefinition> VirtualRegisters;
@@ -724,6 +734,13 @@ template <> struct MappingTraits<MachineFunction> {
YamlIO.mapOptional("failedISel", MF.FailedISel, false);
YamlIO.mapOptional("tracksRegLiveness", MF.TracksRegLiveness, false);
YamlIO.mapOptional("hasWinCFI", MF.HasWinCFI, false);
+
+ YamlIO.mapOptional("callsEHReturn", MF.CallsEHReturn, false);
+ YamlIO.mapOptional("callsUnwindInit", MF.CallsUnwindInit, false);
+ YamlIO.mapOptional("hasEHCatchret", MF.HasEHCatchret, false);
+ YamlIO.mapOptional("hasEHScopes", MF.HasEHScopes, false);
+ YamlIO.mapOptional("hasEHFunclets", MF.HasEHFunclets, false);
+
YamlIO.mapOptional("failsVerification", MF.FailsVerification, false);
YamlIO.mapOptional("tracksDebugUserValues", MF.TracksDebugUserValues,
false);
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 638b6732a543..ddfbd4018590 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -14,9 +14,9 @@
#define LLVM_CODEGEN_MACHINEBASICBLOCK_H
#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/SparseBitVector.h"
#include "llvm/ADT/ilist.h"
#include "llvm/ADT/iterator_range.h"
-#include "llvm/ADT/SparseBitVector.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBundleIterator.h"
#include "llvm/IR/DebugLoc.h"
@@ -24,7 +24,6 @@
#include "llvm/Support/BranchProbability.h"
#include <cassert>
#include <cstdint>
-#include <functional>
#include <iterator>
#include <string>
#include <vector>
@@ -110,10 +109,10 @@ public:
private:
using Instructions = ilist<MachineInstr, ilist_sentinel_tracking<true>>;
- Instructions Insts;
const BasicBlock *BB;
int Number;
MachineFunction *xParent;
+ Instructions Insts;
/// Keep track of the predecessor / successor basic blocks.
std::vector<MachineBasicBlock *> Predecessors;
@@ -205,6 +204,12 @@ public:
/// to an LLVM basic block.
const BasicBlock *getBasicBlock() const { return BB; }
+ /// Remove the reference to the underlying IR BasicBlock. This is for
+ /// reduction tools and should generally not be used.
+ void clearBasicBlock() {
+ BB = nullptr;
+ }
+
/// Return the name of the corresponding LLVM basic block, or an empty string.
StringRef getName() const;
@@ -241,6 +246,7 @@ public:
MachineInstrBundleIterator<const MachineInstr, true>;
unsigned size() const { return (unsigned)Insts.size(); }
+ bool sizeWithoutDebugLargerThan(unsigned Limit) const;
bool empty() const { return Insts.empty(); }
MachineInstr &instr_front() { return Insts.front(); }
@@ -400,7 +406,7 @@ public:
// Iteration support for live in sets. These sets are kept in sorted
// order by their register number.
using livein_iterator = LiveInVector::const_iterator;
-#ifndef NDEBUG
+
/// Unlike livein_begin, this method does not check that the liveness
/// information is accurate. Still for debug purposes it may be useful
/// to have iterators that won't assert if the liveness information
@@ -409,7 +415,7 @@ public:
iterator_range<livein_iterator> liveins_dbg() const {
return make_range(livein_begin_dbg(), livein_end());
}
-#endif
+
livein_iterator livein_begin() const;
livein_iterator livein_end() const { return LiveIns.end(); }
bool livein_empty() const { return LiveIns.empty(); }
@@ -731,6 +737,15 @@ public:
/// other block.
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const;
+ /// Return the successor of this block if it has a single successor.
+ /// Otherwise return a null pointer.
+ ///
+ const MachineBasicBlock *getSingleSuccessor() const;
+ MachineBasicBlock *getSingleSuccessor() {
+ return const_cast<MachineBasicBlock *>(
+ static_cast<const MachineBasicBlock *>(this)->getSingleSuccessor());
+ }
+
/// Return the fallthrough block if the block can implicitly
/// transfer control to the block after it by falling off the end of
/// it. This should return null if it can reach the block after
@@ -1087,6 +1102,11 @@ public:
IrrLoopHeaderWeight = Weight;
}
+ /// Return probability of the edge from this block to MBB. This method should
+ /// NOT be called directly, but by using getEdgeProbability method from
+ /// MachineBranchProbabilityInfo class.
+ BranchProbability getSuccProbability(const_succ_iterator Succ) const;
+
private:
/// Return probability iterator corresponding to the I successor iterator.
probability_iterator getProbabilityIterator(succ_iterator I);
@@ -1096,11 +1116,6 @@ private:
friend class MachineBranchProbabilityInfo;
friend class MIPrinter;
- /// Return probability of the edge from this block to MBB. This method should
- /// NOT be called directly, but by using getEdgeProbability method from
- /// MachineBranchProbabilityInfo class.
- BranchProbability getSuccProbability(const_succ_iterator Succ) const;
-
// Methods used to maintain doubly linked list of blocks...
friend struct ilist_callback_traits<MachineBasicBlock>;
diff --git a/llvm/include/llvm/CodeGen/MachineBranchProbabilityInfo.h b/llvm/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
index 7e7e0a9c477a..bd544421bc0f 100644
--- a/llvm/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
@@ -16,8 +16,6 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/Pass.h"
#include "llvm/Support/BranchProbability.h"
-#include <climits>
-#include <numeric>
namespace llvm {
diff --git a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
index 67544779f34c..68c95679d466 100644
--- a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
+++ b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
@@ -34,6 +34,10 @@ enum class MachineCombinerPattern {
REASSOC_XY_BCA,
REASSOC_XY_BAC,
+ // These are patterns used to reduce the length of dependence chain.
+ SUBADD_OP1,
+ SUBADD_OP2,
+
// These are multiply-add patterns matched by the AArch64 machine combiner.
MULADDW_OP1,
MULADDW_OP2,
diff --git a/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h b/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h
index d3816bbc0780..3f89f2076d50 100644
--- a/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h
+++ b/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h
@@ -15,8 +15,9 @@
#define LLVM_CODEGEN_MACHINECYCLEANALYSIS_H
#include "llvm/ADT/GenericCycleInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineSSAContext.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/InitializePasses.h"
namespace llvm {
@@ -26,6 +27,29 @@ extern template class GenericCycle<MachineSSAContext>;
using MachineCycleInfo = GenericCycleInfo<MachineSSAContext>;
using MachineCycle = MachineCycleInfo::CycleT;
+/// Legacy analysis pass which computes a \ref MachineCycleInfo.
+class MachineCycleInfoWrapperPass : public MachineFunctionPass {
+ MachineFunction *F = nullptr;
+ MachineCycleInfo CI;
+
+public:
+ static char ID;
+
+ MachineCycleInfoWrapperPass();
+
+ MachineCycleInfo &getCycleInfo() { return CI; }
+ const MachineCycleInfo &getCycleInfo() const { return CI; }
+
+ bool runOnMachineFunction(MachineFunction &F) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ void releaseMemory() override;
+ void print(raw_ostream &OS, const Module *M = nullptr) const override;
+};
+
+// TODO: add this function to GenericCycle template after implementing IR
+// version.
+bool isCycleInvariant(const MachineCycle *Cycle, MachineInstr &I);
+
} // end namespace llvm
#endif // LLVM_CODEGEN_MACHINECYCLEANALYSIS_H
diff --git a/llvm/include/llvm/CodeGen/MachineDominators.h b/llvm/include/llvm/CodeGen/MachineDominators.h
index f749e9ff7e0a..30c18ef410fa 100644
--- a/llvm/include/llvm/CodeGen/MachineDominators.h
+++ b/llvm/include/llvm/CodeGen/MachineDominators.h
@@ -19,12 +19,17 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBundleIterator.h"
#include "llvm/Support/GenericDomTree.h"
#include "llvm/Support/GenericDomTreeConstruction.h"
#include <cassert>
#include <memory>
namespace llvm {
+class AnalysisUsage;
+class MachineFunction;
+class Module;
+class raw_ostream;
template <>
inline void DominatorTreeBase<MachineBasicBlock, false>::addRoot(
diff --git a/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/llvm/include/llvm/CodeGen/MachineFrameInfo.h
index 864ca73180af..7ea731b46655 100644
--- a/llvm/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineFrameInfo.h
@@ -16,7 +16,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/Register.h"
#include "llvm/Support/Alignment.h"
-#include "llvm/Support/DataTypes.h"
#include <cassert>
#include <vector>
@@ -335,10 +334,13 @@ private:
/// Not null, if shrink-wrapping found a better place for the epilogue.
MachineBasicBlock *Restore = nullptr;
+ /// Size of the UnsafeStack Frame
+ uint64_t UnsafeStackSize = 0;
+
public:
- explicit MachineFrameInfo(unsigned StackAlignment, bool StackRealignable,
+ explicit MachineFrameInfo(Align StackAlignment, bool StackRealignable,
bool ForcedRealign)
- : StackAlignment(assumeAligned(StackAlignment)),
+ : StackAlignment(StackAlignment),
StackRealignable(StackRealignable), ForcedRealign(ForcedRealign) {}
MachineFrameInfo(const MachineFrameInfo &) = delete;
@@ -360,6 +362,7 @@ public:
/// This object is used for SjLj exceptions.
int getFunctionContextIndex() const { return FunctionContextIdx; }
void setFunctionContextIndex(int I) { FunctionContextIdx = I; }
+ bool hasFunctionContextIndex() const { return FunctionContextIdx != -1; }
/// This method may be called any time after instruction
/// selection is complete to determine if there is a call to
@@ -385,6 +388,20 @@ public:
bool hasPatchPoint() const { return HasPatchPoint; }
void setHasPatchPoint(bool s = true) { HasPatchPoint = s; }
+ /// Return true if this function requires a split stack prolog, even if it
+ /// uses no stack space. This is only meaningful for functions where
+ /// MachineFunction::shouldSplitStack() returns true.
+ //
+ // For non-leaf functions we have to allow for the possibility that the call
+ // is to a non-split function, as in PR37807. This function could also take
+ // the address of a non-split function. When the linker tries to adjust its
+ // non-existent prologue, it would fail with an error. Mark the object file so
+ // that such failures are not errors. See this Go language bug-report
+ // https://go-review.googlesource.com/c/go/+/148819/
+ bool needsSplitStackProlog() const {
+ return getStackSize() != 0 || hasTailCall();
+ }
+
/// Return the minimum frame object index.
int getObjectIndexBegin() const { return -NumFixedObjects; }
@@ -488,6 +505,14 @@ public:
return Objects[ObjectIdx+NumFixedObjects].Alloca;
}
+ /// Remove the underlying Alloca of the specified stack object if it
+ /// exists. This generally should not be used and is for reduction tooling.
+ void clearObjectAllocation(int ObjectIdx) {
+ assert(unsigned(ObjectIdx + NumFixedObjects) < Objects.size() &&
+ "Invalid Object Idx!");
+ Objects[ObjectIdx + NumFixedObjects].Alloca = nullptr;
+ }
+
/// Return the assigned stack offset of the specified object
/// from the incoming stack pointer.
int64_t getObjectOffset(int ObjectIdx) const {
@@ -773,6 +798,9 @@ public:
MachineBasicBlock *getRestorePoint() const { return Restore; }
void setRestorePoint(MachineBasicBlock *NewRestore) { Restore = NewRestore; }
+ uint64_t getUnsafeStackSize() const { return UnsafeStackSize; }
+ void setUnsafeStackSize(uint64_t Size) { UnsafeStackSize = Size; }
+
/// Return a set of physical registers that are pristine.
///
/// Pristine registers hold a value that is useless to the current function,
diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h
index c4767a51b094..fc1188186ac4 100644
--- a/llvm/include/llvm/CodeGen/MachineFunction.h
+++ b/llvm/include/llvm/CodeGen/MachineFunction.h
@@ -103,6 +103,22 @@ struct MachineFunctionInfo {
static Ty *create(BumpPtrAllocator &Allocator, MachineFunction &MF) {
return new (Allocator.Allocate<Ty>()) Ty(MF);
}
+
+ template <typename Ty>
+ static Ty *create(BumpPtrAllocator &Allocator, const Ty &MFI) {
+ return new (Allocator.Allocate<Ty>()) Ty(MFI);
+ }
+
+ /// Make a functionally equivalent copy of this MachineFunctionInfo in \p MF.
+ /// This requires remapping MachineBasicBlock references from the original
+ /// parent to values in the new function. Targets may assume that virtual
+ /// register and frame index values are preserved in the new function.
+ virtual MachineFunctionInfo *
+ clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
+ const {
+ return nullptr;
+ }
};
/// Properties which a MachineFunction may have at a given point in time.
@@ -277,12 +293,6 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction {
// numbered and this vector keeps track of the mapping from ID's to MBB's.
std::vector<MachineBasicBlock*> MBBNumbering;
- // Unary encoding of basic block symbols is used to reduce size of ".strtab".
- // Basic block number 'i' gets a prefix of length 'i'. The ith character also
- // denotes the type of basic block number 'i'. Return blocks are marked with
- // 'r', landing pads with 'l' and regular blocks with 'a'.
- std::vector<char> BBSectionsSymbolPrefix;
-
// Pool-allocate MachineFunction-lifetime and IR objects.
BumpPtrAllocator Allocator;
@@ -537,8 +547,13 @@ public:
/// the copied value; or for parameters, creates a DBG_PHI on entry.
/// May insert instructions into the entry block!
/// \p MI The copy-like instruction to salvage.
+ /// \p DbgPHICache A container to cache already-solved COPYs.
/// \returns An instruction/operand pair identifying the defining value.
- DebugInstrOperandPair salvageCopySSA(MachineInstr &MI);
+ DebugInstrOperandPair
+ salvageCopySSA(MachineInstr &MI,
+ DenseMap<Register, DebugInstrOperandPair> &DbgPHICache);
+
+ DebugInstrOperandPair salvageCopySSAImpl(MachineInstr &MI);
/// Finalise any partially emitted debug instructions. These are DBG_INSTR_REF
/// instructions where we only knew the vreg of the value they use, not the
@@ -747,6 +762,21 @@ public:
return const_cast<MachineFunction*>(this)->getInfo<Ty>();
}
+ template <typename Ty> Ty *cloneInfo(const Ty &Old) {
+ assert(!MFInfo);
+ MFInfo = Ty::template create<Ty>(Allocator, Old);
+ return static_cast<Ty *>(MFInfo);
+ }
+
+ MachineFunctionInfo *cloneInfoFrom(
+ const MachineFunction &OrigMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB) {
+ assert(!MFInfo && "new function already has MachineFunctionInfo");
+ if (!OrigMF.MFInfo)
+ return nullptr;
+ return OrigMF.MFInfo->clone(Allocator, *this, Src2DstMBB);
+ }
+
/// Returns the denormal handling type for the default rounding mode of the
/// function.
DenormalMode getDenormalMode(const fltSemantics &FPType) const;
@@ -1101,12 +1131,6 @@ public:
/// Add a cleanup action for a landing pad.
void addCleanup(MachineBasicBlock *LandingPad);
- void addSEHCatchHandler(MachineBasicBlock *LandingPad, const Function *Filter,
- const BlockAddress *RecoverBA);
-
- void addSEHCleanupHandler(MachineBasicBlock *LandingPad,
- const Function *Cleanup);
-
/// Return the type id for the specified typeinfo. This is function wide.
unsigned getTypeIDFor(const GlobalValue *TI);
@@ -1116,6 +1140,11 @@ public:
/// Map the landing pad's EH symbol to the call site indexes.
void setCallSiteLandingPad(MCSymbol *Sym, ArrayRef<unsigned> Sites);
+ /// Return if there is any wasm exception handling.
+ bool hasAnyWasmLandingPadIndex() const {
+ return !WasmLPadToIndexMap.empty();
+ }
+
/// Map the landing pad to its index. Used for Wasm exception handling.
void setWasmLandingPadIndex(const MachineBasicBlock *LPad, unsigned Index) {
WasmLPadToIndexMap[LPad] = Index;
@@ -1132,6 +1161,10 @@ public:
return WasmLPadToIndexMap.lookup(LPad);
}
+ bool hasAnyCallSiteLandingPad() const {
+ return !LPadToCallSiteMap.empty();
+ }
+
/// Get the call site indexes for a landing pad EH symbol.
SmallVectorImpl<unsigned> &getCallSiteLandingPad(MCSymbol *Sym) {
assert(hasCallSiteLandingPad(Sym) &&
@@ -1144,6 +1177,10 @@ public:
return !LPadToCallSiteMap[Sym].empty();
}
+ bool hasAnyCallSiteLabel() const {
+ return !CallSiteMap.empty();
+ }
+
/// Map the begin label for a call site.
void setCallSiteBeginLabel(MCSymbol *BeginLabel, unsigned Site) {
CallSiteMap[BeginLabel] = Site;
@@ -1220,10 +1257,6 @@ public:
void copyCallSiteInfo(const MachineInstr *Old,
const MachineInstr *New);
- const std::vector<char> &getBBSectionsSymbolPrefix() const {
- return BBSectionsSymbolPrefix;
- }
-
/// Move the call site info from \p Old to \New call site info. This function
/// is used when we are replacing one call instruction with another one to
/// the same callee.
diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h
index 2893e138a95c..acc4c9a24c01 100644
--- a/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -26,7 +26,6 @@
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/InlineAsm.h"
-#include "llvm/IR/PseudoProbe.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/ArrayRecycler.h"
@@ -38,6 +37,9 @@
namespace llvm {
+class DILabel;
+class Instruction;
+class MDNode;
class AAResults;
template <typename T> class ArrayRef;
class DIExpression;
@@ -96,7 +98,7 @@ public:
FmContract = 1 << 8, // Instruction supports Fast math
// contraction operations like fma.
FmAfn = 1 << 9, // Instruction may map to Fast math
- // instrinsic approximation.
+ // intrinsic approximation.
FmReassoc = 1 << 10, // Instruction supports Fast math
// reassociation of operand order.
NoUWrap = 1 << 11, // Instruction supports binary operator
@@ -586,8 +588,7 @@ public:
/// Return true if operand \p OpIdx is a subregister index.
bool isOperandSubregIdx(unsigned OpIdx) const {
- assert(getOperand(OpIdx).getType() == MachineOperand::MO_Immediate &&
- "Expected MO_Immediate operand type.");
+ assert(getOperand(OpIdx).isImm() && "Expected MO_Immediate operand type.");
if (isExtractSubreg() && OpIdx == 2)
return true;
if (isInsertSubreg() && OpIdx == 3)
@@ -810,6 +811,12 @@ public:
return hasProperty(MCID::Pseudo, Type);
}
+ /// Return true if this instruction doesn't produce any output in the form of
+ /// executable instructions.
+ bool isMetaInstruction(QueryType Type = IgnoreBundle) const {
+ return hasProperty(MCID::Meta, Type);
+ }
+
bool isReturn(QueryType Type = AnyInBundle) const {
return hasProperty(MCID::Return, Type);
}
@@ -1306,30 +1313,6 @@ public:
getOperand(0).getSubReg() == getOperand(1).getSubReg();
}
- /// Return true if this instruction doesn't produce any output in the form of
- /// executable instructions.
- bool isMetaInstruction() const {
- switch (getOpcode()) {
- default:
- return false;
- case TargetOpcode::IMPLICIT_DEF:
- case TargetOpcode::KILL:
- case TargetOpcode::CFI_INSTRUCTION:
- case TargetOpcode::EH_LABEL:
- case TargetOpcode::GC_LABEL:
- case TargetOpcode::DBG_VALUE:
- case TargetOpcode::DBG_VALUE_LIST:
- case TargetOpcode::DBG_INSTR_REF:
- case TargetOpcode::DBG_PHI:
- case TargetOpcode::DBG_LABEL:
- case TargetOpcode::LIFETIME_START:
- case TargetOpcode::LIFETIME_END:
- case TargetOpcode::PSEUDO_PROBE:
- case TargetOpcode::ARITH_FENCE:
- return true;
- }
- }
-
/// Return true if this is a transient instruction that is either very likely
/// to be eliminated during register allocation (such as copy-like
/// instructions), or if this instruction doesn't have an execution-time cost.
@@ -1744,7 +1727,7 @@ public:
/// Erase an operand from an instruction, leaving it with one
/// fewer operand than it started with.
- void RemoveOperand(unsigned OpNo);
+ void removeOperand(unsigned OpNo);
/// Clear this MachineInstr's memory reference descriptor list. This resets
/// the memrefs to their most conservative state. This should be used only
@@ -1863,12 +1846,12 @@ private:
/// Unlink all of the register operands in this instruction from their
/// respective use lists. This requires that the operands already be on their
/// use lists.
- void RemoveRegOperandsFromUseLists(MachineRegisterInfo&);
+ void removeRegOperandsFromUseLists(MachineRegisterInfo&);
/// Add all of the register operands in this instruction from their
/// respective use lists. This requires that the operands not be on their
/// use lists yet.
- void AddRegOperandsToUseLists(MachineRegisterInfo&);
+ void addRegOperandsToUseLists(MachineRegisterInfo&);
/// Slow path for hasProperty when we're dealing with a bundle.
bool hasPropertyInBundle(uint64_t Mask, QueryType Type) const;
diff --git a/llvm/include/llvm/CodeGen/MachineLoopInfo.h b/llvm/include/llvm/CodeGen/MachineLoopInfo.h
index c90f07096d02..daf0f18a7518 100644
--- a/llvm/include/llvm/CodeGen/MachineLoopInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineLoopInfo.h
@@ -33,7 +33,6 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/IR/DebugLoc.h"
-#include "llvm/Pass.h"
namespace llvm {
diff --git a/llvm/include/llvm/CodeGen/MachineMemOperand.h b/llvm/include/llvm/CodeGen/MachineMemOperand.h
index 00080b171974..41574d8d556a 100644
--- a/llvm/include/llvm/CodeGen/MachineMemOperand.h
+++ b/llvm/include/llvm/CodeGen/MachineMemOperand.h
@@ -31,14 +31,13 @@ class MDNode;
class raw_ostream;
class MachineFunction;
class ModuleSlotTracker;
+class TargetInstrInfo;
/// This class contains a discriminated union of information about pointers in
/// memory operands, relating them back to LLVM IR or to virtual locations (such
/// as frame indices) that are exposed during codegen.
struct MachinePointerInfo {
/// This is the IR pointer value for the access, or it is null if unknown.
- /// If this is null, then the access is to a pointer in the default address
- /// space.
PointerUnion<const Value *, const PseudoSourceValue *> V;
/// Offset - This is an offset from the base Value*.
diff --git a/llvm/include/llvm/CodeGen/MachineModuleInfo.h b/llvm/include/llvm/CodeGen/MachineModuleInfo.h
index c07606e89374..cdd0073749d3 100644
--- a/llvm/include/llvm/CodeGen/MachineModuleInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineModuleInfo.h
@@ -30,12 +30,10 @@
#ifndef LLVM_CODEGEN_MACHINEMODULEINFO_H
#define LLVM_CODEGEN_MACHINEMODULEINFO_H
-#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/IR/PassManager.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCSymbol.h"
#include "llvm/Pass.h"
#include <memory>
#include <utility>
@@ -46,9 +44,9 @@ namespace llvm {
class BasicBlock;
class Function;
class LLVMTargetMachine;
-class MMIAddrLabelMap;
class MachineFunction;
class Module;
+class MCSymbol;
//===----------------------------------------------------------------------===//
/// This class can be derived from and used by targets to hold private
@@ -106,10 +104,6 @@ class MachineModuleInfo {
/// \}
- /// This map keeps track of which symbol is being used for the specified
- /// basic block's address of label.
- MMIAddrLabelMap *AddrLabelSymbols;
-
// TODO: Ideally, what we'd like is to have a switch that allows emitting
// synchronous (precise at call-sites only) CFA into .eh_frame. However,
// even under this switch, we'd like .debug_frame to be precise when using
@@ -123,22 +117,6 @@ class MachineModuleInfo {
/// point. This is used to emit an undefined reference to _fltused.
bool UsesMSVCFloatingPoint;
- /// True if the module calls the __morestack function indirectly, as is
- /// required under the large code model on x86. This is used to emit
- /// a definition of a symbol, __morestack_addr, containing the address. See
- /// comments in lib/Target/X86/X86FrameLowering.cpp for more details.
- bool UsesMorestackAddr;
-
- /// True if the module contains split-stack functions. This is used to
- /// emit .note.GNU-split-stack section as required by the linker for
- /// special handling split-stack function calling no-split-stack function.
- bool HasSplitStack;
-
- /// True if the module contains no-split-stack functions. This is used to
- /// emit .note.GNU-no-split-stack section when it also contains split-stack
- /// functions.
- bool HasNosplitStack;
-
/// Maps IR Functions to their corresponding MachineFunctions.
DenseMap<const Function*, std::unique_ptr<MachineFunction>> MachineFunctions;
/// Next unique number available for a MachineFunction.
@@ -184,6 +162,9 @@ public:
/// Machine Function map.
void deleteMachineFunctionFor(Function &F);
+ /// Add an externally created MachineFunction \p MF for \p F.
+ void insertFunction(const Function &F, std::unique_ptr<MachineFunction> &&MF);
+
/// Keep track of various per-module pieces of information for backends
/// that would like to do so.
template<typename Ty>
@@ -200,55 +181,11 @@ public:
/// Returns true if valid debug info is present.
bool hasDebugInfo() const { return DbgInfoAvailable; }
- void setDebugInfoAvailability(bool avail) { DbgInfoAvailable = avail; }
bool usesMSVCFloatingPoint() const { return UsesMSVCFloatingPoint; }
void setUsesMSVCFloatingPoint(bool b) { UsesMSVCFloatingPoint = b; }
- bool usesMorestackAddr() const {
- return UsesMorestackAddr;
- }
-
- void setUsesMorestackAddr(bool b) {
- UsesMorestackAddr = b;
- }
-
- bool hasSplitStack() const {
- return HasSplitStack;
- }
-
- void setHasSplitStack(bool b) {
- HasSplitStack = b;
- }
-
- bool hasNosplitStack() const {
- return HasNosplitStack;
- }
-
- void setHasNosplitStack(bool b) {
- HasNosplitStack = b;
- }
-
- /// Return the symbol to be used for the specified basic block when its
- /// address is taken. This cannot be its normal LBB label because the block
- /// may be accessed outside its containing function.
- MCSymbol *getAddrLabelSymbol(const BasicBlock *BB) {
- return getAddrLabelSymbolToEmit(BB).front();
- }
-
- /// Return the symbol to be used for the specified basic block when its
- /// address is taken. If other blocks were RAUW'd to this one, we may have
- /// to emit them as well, return the whole set.
- ArrayRef<MCSymbol *> getAddrLabelSymbolToEmit(const BasicBlock *BB);
-
- /// If the specified function has had any references to address-taken blocks
- /// generated, but the block got deleted, return the symbol now so we can
- /// emit it. This prevents emitting a reference to a symbol that has no
- /// definition.
- void takeDeletedSymbolsForFunction(const Function *F,
- std::vector<MCSymbol*> &Result);
-
/// \name Exception Handling
/// \{
diff --git a/llvm/include/llvm/CodeGen/MachineOperand.h b/llvm/include/llvm/CodeGen/MachineOperand.h
index eded28183ea2..c88e72cdc1d9 100644
--- a/llvm/include/llvm/CodeGen/MachineOperand.h
+++ b/llvm/include/llvm/CodeGen/MachineOperand.h
@@ -13,15 +13,14 @@
#ifndef LLVM_CODEGEN_MACHINEOPERAND_H
#define LLVM_CODEGEN_MACHINEOPERAND_H
-#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/CodeGen/Register.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/Support/DataTypes.h"
-#include "llvm/Support/LowLevelTypeImpl.h"
#include <cassert>
namespace llvm {
+class LLT;
class BlockAddress;
class Constant;
class ConstantFP;
@@ -460,6 +459,16 @@ public:
return !isUndef() && !isInternalRead() && (isUse() || getSubReg());
}
+ /// Return true if this operand can validly be appended to an arbitrary
+ /// operand list. i.e. this behaves like an implicit operand.
+ bool isValidExcessOperand() const {
+ if ((isReg() && isImplicit()) || isRegMask())
+ return true;
+
+ // Debug operands
+ return isMetadata() || isMCSymbol();
+ }
+
//===--------------------------------------------------------------------===//
// Mutators for Register Operands
//===--------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h b/llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h
index 285b858c96cb..cb0998984dfb 100644
--- a/llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h
+++ b/llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h
@@ -15,8 +15,9 @@
#ifndef LLVM_CODEGEN_MACHINEOPTIMIZATIONREMARKEMITTER_H
#define LLVM_CODEGEN_MACHINEOPTIMIZATIONREMARKEMITTER_H
-#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Function.h"
namespace llvm {
class MachineBasicBlock;
diff --git a/llvm/include/llvm/CodeGen/MachineOutliner.h b/llvm/include/llvm/CodeGen/MachineOutliner.h
index 08b76295dbf2..f968089e0de0 100644
--- a/llvm/include/llvm/CodeGen/MachineOutliner.h
+++ b/llvm/include/llvm/CodeGen/MachineOutliner.h
@@ -15,11 +15,10 @@
#ifndef LLVM_CODEGEN_MACHINEOUTLINER_H
#define LLVM_CODEGEN_MACHINEOUTLINER_H
-#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include <initializer_list>
namespace llvm {
namespace outliner {
@@ -56,6 +55,55 @@ private:
/// target.
unsigned CallOverhead = 0;
+ /// Liveness information for this Candidate. Tracks from the end of the
+ /// block containing this Candidate to the beginning of its sequence.
+ ///
+ /// Optional. Can be used to fine-tune the cost model, or fine-tune legality
+ /// decisions.
+ LiveRegUnits FromEndOfBlockToStartOfSeq;
+
+ /// Liveness information restricted to this Candidate's instruction sequence.
+ ///
+ /// Optional. Can be used to fine-tune the cost model, or fine-tune legality
+ /// decisions.
+ LiveRegUnits InSeq;
+
+ /// True if FromEndOfBlockToStartOfSeq has been initialized.
+ bool FromEndOfBlockToStartOfSeqWasSet = false;
+
+ /// True if InSeq has been initialized.
+ bool InSeqWasSet = false;
+
+ /// Populate FromEndOfBlockToStartOfSeq with liveness information.
+ void initFromEndOfBlockToStartOfSeq(const TargetRegisterInfo &TRI) {
+ assert(MBB->getParent()->getRegInfo().tracksLiveness() &&
+ "Candidate's Machine Function must track liveness");
+ // Only initialize once.
+ if (FromEndOfBlockToStartOfSeqWasSet)
+ return;
+ FromEndOfBlockToStartOfSeqWasSet = true;
+ FromEndOfBlockToStartOfSeq.init(TRI);
+ FromEndOfBlockToStartOfSeq.addLiveOuts(*MBB);
+ // Compute liveness from the end of the block up to the beginning of the
+ // outlining candidate.
+ for (auto &MI : make_range(MBB->rbegin(),
+ (MachineBasicBlock::reverse_iterator)front()))
+ FromEndOfBlockToStartOfSeq.stepBackward(MI);
+ }
+
+ /// Populate InSeq with liveness information.
+ void initInSeq(const TargetRegisterInfo &TRI) {
+ assert(MBB->getParent()->getRegInfo().tracksLiveness() &&
+ "Candidate's Machine Function must track liveness");
+ // Only initialize once.
+ if (InSeqWasSet)
+ return;
+ InSeqWasSet = true;
+ InSeq.init(TRI);
+ for (auto &MI : make_range(front(), std::next(back())))
+ InSeq.accumulate(MI);
+ }
+
public:
/// The index of this \p Candidate's \p OutlinedFunction in the list of
/// \p OutlinedFunctions.
@@ -65,26 +113,9 @@ public:
/// from this point. Defined by the target.
unsigned CallConstructionID = 0;
- /// Contains physical register liveness information for the MBB containing
- /// this \p Candidate.
- ///
- /// This is optionally used by the target to calculate more fine-grained
- /// cost model information.
- LiveRegUnits LRU;
-
- /// Contains the accumulated register liveness information for the
- /// instructions in this \p Candidate.
- ///
- /// This is optionally used by the target to determine which registers have
- /// been used across the sequence.
- LiveRegUnits UsedInSequence;
-
/// Target-specific flags for this Candidate's MBB.
unsigned Flags = 0x0;
- /// True if initLRU has been called on this Candidate.
- bool LRUWasSet = false;
-
/// Return the number of instructions in this Candidate.
unsigned getLength() const { return Len; }
@@ -109,6 +140,50 @@ public:
MachineFunction *getMF() const { return MBB->getParent(); }
MachineBasicBlock *getMBB() const { return MBB; }
+ /// \returns True if \p Reg is available from the end of the block to the
+ /// beginning of the sequence.
+ ///
+ /// This query considers the following range:
+ ///
+ /// in_seq_1
+ /// in_seq_2
+ /// ...
+ /// in_seq_n
+ /// not_in_seq_1
+ /// ...
+ /// <end of block>
+ bool isAvailableAcrossAndOutOfSeq(Register Reg,
+ const TargetRegisterInfo &TRI) {
+ if (!FromEndOfBlockToStartOfSeqWasSet)
+ initFromEndOfBlockToStartOfSeq(TRI);
+ return FromEndOfBlockToStartOfSeq.available(Reg);
+ }
+
+ /// \returns True if `isAvailableAcrossAndOutOfSeq` fails for any register
+ /// in \p Regs.
+ bool isAnyUnavailableAcrossOrOutOfSeq(std::initializer_list<Register> Regs,
+ const TargetRegisterInfo &TRI) {
+ if (!FromEndOfBlockToStartOfSeqWasSet)
+ initFromEndOfBlockToStartOfSeq(TRI);
+ return any_of(Regs, [&](Register Reg) {
+ return !FromEndOfBlockToStartOfSeq.available(Reg);
+ });
+ }
+
+ /// \returns True if \p Reg is available within the sequence itself.
+ ///
+ /// This query considers the following range:
+ ///
+ /// in_seq_1
+ /// in_seq_2
+ /// ...
+ /// in_seq_n
+ bool isAvailableInsideSeq(Register Reg, const TargetRegisterInfo &TRI) {
+ if (!InSeqWasSet)
+ initInSeq(TRI);
+ return InSeq.available(Reg);
+ }
+
/// The number of instructions that would be saved by outlining every
/// candidate of this type.
///
@@ -132,31 +207,6 @@ public:
return getStartIdx() > RHS.getStartIdx();
}
- /// Compute the registers that are live across this Candidate.
- /// Used by targets that need this information for cost model calculation.
- /// If a target does not need this information, then this should not be
- /// called.
- void initLRU(const TargetRegisterInfo &TRI) {
- assert(MBB->getParent()->getRegInfo().tracksLiveness() &&
- "Candidate's Machine Function must track liveness");
- // Only initialize once.
- if (LRUWasSet)
- return;
- LRUWasSet = true;
- LRU.init(TRI);
- LRU.addLiveOuts(*MBB);
-
- // Compute liveness from the end of the block up to the beginning of the
- // outlining candidate.
- std::for_each(MBB->rbegin(), (MachineBasicBlock::reverse_iterator)front(),
- [this](MachineInstr &MI) { LRU.stepBackward(MI); });
-
- // Walk over the sequence itself and figure out which registers were used
- // in the sequence.
- UsedInSequence.init(TRI);
- std::for_each(front(), std::next(back()),
- [this](MachineInstr &MI) { UsedInSequence.accumulate(MI); });
- }
};
/// The information necessary to create an outlined function for some
diff --git a/llvm/include/llvm/CodeGen/MachinePassManager.h b/llvm/include/llvm/CodeGen/MachinePassManager.h
index 75b8a89c812e..6089339c7f5a 100644
--- a/llvm/include/llvm/CodeGen/MachinePassManager.h
+++ b/llvm/include/llvm/CodeGen/MachinePassManager.h
@@ -25,13 +25,15 @@
#include "llvm/ADT/FunctionExtras.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Support/Error.h"
-#include "llvm/Support/type_traits.h"
+
+#include <map>
namespace llvm {
class Module;
+class Function;
+class MachineFunction;
extern template class AnalysisManager<MachineFunction>;
diff --git a/llvm/include/llvm/CodeGen/MachinePassRegistry.def b/llvm/include/llvm/CodeGen/MachinePassRegistry.def
index e6763899a083..7748055f5d35 100644
--- a/llvm/include/llvm/CodeGen/MachinePassRegistry.def
+++ b/llvm/include/llvm/CodeGen/MachinePassRegistry.def
@@ -47,6 +47,7 @@ FUNCTION_PASS("expand-reductions", ExpandReductionsPass, ())
FUNCTION_PASS("expandvp", ExpandVectorPredicationPass, ())
FUNCTION_PASS("lowerinvoke", LowerInvokePass, ())
FUNCTION_PASS("scalarize-masked-mem-intrin", ScalarizeMaskedMemIntrinPass, ())
+FUNCTION_PASS("tlshoist", TLSVariableHoistPass, ())
FUNCTION_PASS("verify", VerifierPass, ())
#undef FUNCTION_PASS
@@ -119,6 +120,7 @@ DUMMY_FUNCTION_PASS("indirectbr-expand", IndirectBrExpandPass, ())
DUMMY_FUNCTION_PASS("cfguard-dispatch", CFGuardDispatchPass, ())
DUMMY_FUNCTION_PASS("cfguard-check", CFGuardCheckPass, ())
DUMMY_FUNCTION_PASS("gc-info-printer", GCInfoPrinterPass, ())
+DUMMY_FUNCTION_PASS("select-optimize", SelectOptimizePass, ())
#undef DUMMY_FUNCTION_PASS
#ifndef DUMMY_MODULE_PASS
@@ -197,6 +199,5 @@ DUMMY_MACHINE_FUNCTION_PASS("regbankselect", RegBankSelectPass, ())
DUMMY_MACHINE_FUNCTION_PASS("instruction-select", InstructionSelectPass, ())
DUMMY_MACHINE_FUNCTION_PASS("reset-machine-function", ResetMachineFunctionPass, ())
DUMMY_MACHINE_FUNCTION_PASS("machineverifier", MachineVerifierPass, ())
-DUMMY_MACHINE_FUNCTION_PASS("machine-cycles", MachineCycleInfoWrapperPass, ())
DUMMY_MACHINE_FUNCTION_PASS("print-machine-cycles", MachineCycleInfoPrinterPass, ())
#undef DUMMY_MACHINE_FUNCTION_PASS
diff --git a/llvm/include/llvm/CodeGen/MachinePipeliner.h b/llvm/include/llvm/CodeGen/MachinePipeliner.h
index 7e7fa57d80da..4559f7a9bde7 100644
--- a/llvm/include/llvm/CodeGen/MachinePipeliner.h
+++ b/llvm/include/llvm/CodeGen/MachinePipeliner.h
@@ -40,13 +40,17 @@
#ifndef LLVM_CODEGEN_MACHINEPIPELINER_H
#define LLVM_CODEGEN_MACHINEPIPELINER_H
+#include "llvm/ADT/SetVector.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/ScheduleDAGMutation.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/InitializePasses.h"
+#include <deque>
+
namespace llvm {
class AAResults;
@@ -80,6 +84,8 @@ public:
SmallVector<MachineOperand, 4> BrCond;
MachineInstr *LoopInductionVar = nullptr;
MachineInstr *LoopCompare = nullptr;
+ std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> LoopPipelinerInfo =
+ nullptr;
};
LoopInfo LI;
@@ -115,6 +121,7 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
LiveIntervals &LIS;
const RegisterClassInfo &RegClassInfo;
unsigned II_setByPragma = 0;
+ TargetInstrInfo::PipelinerLoopInfo *LoopPipelinerInfo = nullptr;
/// A toplogical ordering of the SUnits, which is needed for changing
/// dependences and iterating over the SUnits.
@@ -192,9 +199,11 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
public:
SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis,
- const RegisterClassInfo &rci, unsigned II)
+ const RegisterClassInfo &rci, unsigned II,
+ TargetInstrInfo::PipelinerLoopInfo *PLI)
: ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis),
- RegClassInfo(rci), II_setByPragma(II), Topo(SUnits, &ExitSU) {
+ RegClassInfo(rci), II_setByPragma(II), LoopPipelinerInfo(PLI),
+ Topo(SUnits, &ExitSU) {
P.MF->getSubtarget().getSMSMutations(Mutations);
if (SwpEnableCopyToPhi)
Mutations.push_back(std::make_unique<CopyToPhiMutation>());
@@ -585,6 +594,13 @@ public:
return ScheduledInstrs[cycle];
}
+ SmallSet<SUnit *, 8>
+ computeUnpipelineableNodes(SwingSchedulerDAG *SSD,
+ TargetInstrInfo::PipelinerLoopInfo *PLI);
+
+ bool
+ normalizeNonPipelinedInstructions(SwingSchedulerDAG *SSD,
+ TargetInstrInfo::PipelinerLoopInfo *PLI);
bool isValidSchedule(SwingSchedulerDAG *SSD);
void finalizeSchedule(SwingSchedulerDAG *SSD);
void orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
diff --git a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
index 94ae6fe02e9c..b2c5f12106af 100644
--- a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -15,18 +15,16 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IndexedMap.h"
#include "llvm/ADT/PointerUnion.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/iterator_range.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
-#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/RegisterBank.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/MC/LaneBitmask.h"
@@ -229,6 +227,16 @@ public:
/// Returns true if the updated CSR list was initialized and false otherwise.
bool isUpdatedCSRsInitialized() const { return IsUpdatedCSRsInitialized; }
+ /// Returns true if a register can be used as an argument to a function.
+ bool isArgumentRegister(const MachineFunction &MF, MCRegister Reg) const;
+
+ /// Returns true if a register is a fixed register.
+ bool isFixedRegister(const MachineFunction &MF, MCRegister Reg) const;
+
+ /// Returns true if a register is a general purpose register.
+ bool isGeneralPurposeRegister(const MachineFunction &MF,
+ MCRegister Reg) const;
+
/// Disables the register from the list of CSRs.
/// I.e. the register will not appear as part of the CSR mask.
/// \see UpdatedCalleeSavedRegs.
@@ -825,23 +833,12 @@ public:
/// to refer to the designated register.
void updateDbgUsersToReg(MCRegister OldReg, MCRegister NewReg,
ArrayRef<MachineInstr *> Users) const {
- SmallSet<MCRegister, 4> OldRegUnits;
- for (MCRegUnitIterator RUI(OldReg, getTargetRegisterInfo()); RUI.isValid();
- ++RUI)
- OldRegUnits.insert(*RUI);
-
// If this operand is a register, check whether it overlaps with OldReg.
// If it does, replace with NewReg.
- auto UpdateOp = [this, &NewReg, &OldReg, &OldRegUnits](MachineOperand &Op) {
- if (Op.isReg()) {
- for (MCRegUnitIterator RUI(OldReg, getTargetRegisterInfo());
- RUI.isValid(); ++RUI) {
- if (OldRegUnits.contains(*RUI)) {
- Op.setReg(NewReg);
- break;
- }
- }
- }
+ auto UpdateOp = [this, &NewReg, &OldReg](MachineOperand &Op) {
+ if (Op.isReg() &&
+ getTargetRegisterInfo()->regsOverlap(Op.getReg(), OldReg))
+ Op.setReg(NewReg);
};
// Iterate through (possibly several) operands to DBG_VALUEs and update
diff --git a/llvm/include/llvm/CodeGen/MachineSSAContext.h b/llvm/include/llvm/CodeGen/MachineSSAContext.h
index 6dbf321bdeaa..f59d7cf8a522 100644
--- a/llvm/include/llvm/CodeGen/MachineSSAContext.h
+++ b/llvm/include/llvm/CodeGen/MachineSSAContext.h
@@ -15,21 +15,21 @@
#ifndef LLVM_CODEGEN_MACHINESSACONTEXT_H
#define LLVM_CODEGEN_MACHINESSACONTEXT_H
-#include "llvm/ADT/GenericSSAContext.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/Support/Printable.h"
-#include <memory>
-
namespace llvm {
+class MachineRegisterInfo;
class MachineInstr;
-class MachineBasicBlock;
class MachineFunction;
class Register;
+template <typename _FunctionT> class GenericSSAContext;
template <typename, bool> class DominatorTreeBase;
inline auto successors(MachineBasicBlock *BB) { return BB->successors(); }
inline auto predecessors(MachineBasicBlock *BB) { return BB->predecessors(); }
+inline unsigned succ_size(MachineBasicBlock *BB) { return BB->succ_size(); }
+inline unsigned pred_size(MachineBasicBlock *BB) { return BB->pred_size(); }
template <> class GenericSSAContext<MachineFunction> {
const MachineRegisterInfo *RegInfo = nullptr;
diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h
index 267c4b595eec..0554eb1ab77e 100644
--- a/llvm/include/llvm/CodeGen/MachineScheduler.h
+++ b/llvm/include/llvm/CodeGen/MachineScheduler.h
@@ -287,7 +287,7 @@ protected:
const SUnit *NextClusterPred = nullptr;
const SUnit *NextClusterSucc = nullptr;
-#ifndef NDEBUG
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
/// The number of instructions scheduled so far. Used to cut off the
/// scheduler at the point determined by misched-cutoff.
unsigned NumInstrsScheduled = 0;
@@ -679,7 +679,7 @@ private:
// For each PIdx, stores the resource group IDs of its subunits
SmallVector<APInt, 16> ResourceGroupSubUnitMasks;
-#ifndef NDEBUG
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
// Remember the greatest possible stall as an upper bound on the number of
// times we should retry the pending queue because of a hazard.
unsigned MaxObservedStall;
diff --git a/llvm/include/llvm/CodeGen/MachineStableHash.h b/llvm/include/llvm/CodeGen/MachineStableHash.h
index 8423b2da1c78..43571b7b8afd 100644
--- a/llvm/include/llvm/CodeGen/MachineStableHash.h
+++ b/llvm/include/llvm/CodeGen/MachineStableHash.h
@@ -17,6 +17,8 @@
#include "llvm/CodeGen/StableHashing.h"
namespace llvm {
+class MachineBasicBlock;
+class MachineFunction;
class MachineInstr;
class MachineOperand;
@@ -24,6 +26,8 @@ stable_hash stableHashValue(const MachineOperand &MO);
stable_hash stableHashValue(const MachineInstr &MI, bool HashVRegs = false,
bool HashConstantPoolIndices = false,
bool HashMemOperands = false);
+stable_hash stableHashValue(const MachineBasicBlock &MBB);
+stable_hash stableHashValue(const MachineFunction &MF);
} // namespace llvm
diff --git a/llvm/include/llvm/CodeGen/ModuloSchedule.h b/llvm/include/llvm/CodeGen/ModuloSchedule.h
index e8dbf49994bb..c515101e80fd 100644
--- a/llvm/include/llvm/CodeGen/ModuloSchedule.h
+++ b/llvm/include/llvm/CodeGen/ModuloSchedule.h
@@ -61,7 +61,6 @@
#define LLVM_CODEGEN_MODULOSCHEDULE_H
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineLoopUtils.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -70,6 +69,8 @@
namespace llvm {
class MachineBasicBlock;
+class MachineLoop;
+class MachineRegisterInfo;
class MachineInstr;
class LiveIntervals;
@@ -190,8 +191,8 @@ private:
void generateProlog(unsigned LastStage, MachineBasicBlock *KernelBB,
ValueMapTy *VRMap, MBBVectorTy &PrologBBs);
void generateEpilog(unsigned LastStage, MachineBasicBlock *KernelBB,
- ValueMapTy *VRMap, MBBVectorTy &EpilogBBs,
- MBBVectorTy &PrologBBs);
+ MachineBasicBlock *OrigBB, ValueMapTy *VRMap,
+ MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs);
void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
ValueMapTy *VRMap, InstrMapTy &InstrMap,
diff --git a/llvm/include/llvm/CodeGen/PBQP/ReductionRules.h b/llvm/include/llvm/CodeGen/PBQP/ReductionRules.h
index 51822d082bad..043b6b120632 100644
--- a/llvm/include/llvm/CodeGen/PBQP/ReductionRules.h
+++ b/llvm/include/llvm/CodeGen/PBQP/ReductionRules.h
@@ -190,7 +190,7 @@ namespace PBQP {
RawVector v = G.getNodeCosts(NId);
-#ifndef NDEBUG
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
// Although a conservatively allocatable node can be allocated to a register,
// spilling it may provide a lower cost solution. Assert here that spilling
// is done by choice, not because there were no register available.
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index 616ab1034133..6e37d42f0d29 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -51,10 +51,8 @@ namespace llvm {
FunctionPass *createUnreachableBlockEliminationPass();
/// createBasicBlockSections Pass - This pass assigns sections to machine
- /// basic blocks and is enabled with -fbasic-block-sections. Buf is a memory
- /// buffer that contains the list of functions and basic block ids to
- /// selectively enable basic block sections.
- MachineFunctionPass *createBasicBlockSectionsPass(const MemoryBuffer *Buf);
+ /// basic blocks and is enabled with -fbasic-block-sections.
+ MachineFunctionPass *createBasicBlockSectionsPass();
/// createMachineFunctionSplitterPass - This pass splits machine functions
/// using profile information.
@@ -331,6 +329,8 @@ namespace llvm {
/// machine instructions.
extern char &MachineCopyPropagationID;
+ MachineFunctionPass *createMachineCopyPropagationPass(bool UseCopyInstr);
+
/// PeepholeOptimizer - This pass performs peephole optimizations -
/// like extension and comparison eliminations.
extern char &PeepholeOptimizerID;
@@ -494,6 +494,9 @@ namespace llvm {
// This pass expands indirectbr instructions.
FunctionPass *createIndirectBrExpandPass();
+ /// Creates CFI Fixup pass. \see CFIFixup.cpp
+ FunctionPass *createCFIFixup();
+
/// Creates CFI Instruction Inserter pass. \see CFIInstrInserter.cpp
FunctionPass *createCFIInstrInserter();
@@ -554,6 +557,12 @@ namespace llvm {
/// When learning an eviction policy, extract score(reward) information,
/// otherwise this does nothing
FunctionPass *createRegAllocScoringPass();
+
+ /// JMC instrument pass.
+ ModulePass *createJMCInstrumenterPass();
+
+ /// This pass converts conditional moves to conditional jumps when profitable.
+ FunctionPass *createSelectOptimizePass();
} // End llvm namespace
#endif
diff --git a/llvm/include/llvm/CodeGen/PseudoSourceValue.h b/llvm/include/llvm/CodeGen/PseudoSourceValue.h
index f1487017f205..07b7ba321566 100644
--- a/llvm/include/llvm/CodeGen/PseudoSourceValue.h
+++ b/llvm/include/llvm/CodeGen/PseudoSourceValue.h
@@ -25,7 +25,7 @@ class MachineMemOperand;
class MIRFormatter;
class PseudoSourceValue;
class raw_ostream;
-class TargetInstrInfo;
+class TargetMachine;
raw_ostream &operator<<(raw_ostream &OS, const PseudoSourceValue* PSV);
@@ -59,7 +59,7 @@ private:
virtual void printCustom(raw_ostream &O) const;
public:
- explicit PseudoSourceValue(unsigned Kind, const TargetInstrInfo &TII);
+ explicit PseudoSourceValue(unsigned Kind, const TargetMachine &TM);
virtual ~PseudoSourceValue();
@@ -95,8 +95,8 @@ class FixedStackPseudoSourceValue : public PseudoSourceValue {
const int FI;
public:
- explicit FixedStackPseudoSourceValue(int FI, const TargetInstrInfo &TII)
- : PseudoSourceValue(FixedStack, TII), FI(FI) {}
+ explicit FixedStackPseudoSourceValue(int FI, const TargetMachine &TM)
+ : PseudoSourceValue(FixedStack, TM), FI(FI) {}
static bool classof(const PseudoSourceValue *V) {
return V->kind() == FixedStack;
@@ -115,7 +115,7 @@ public:
class CallEntryPseudoSourceValue : public PseudoSourceValue {
protected:
- CallEntryPseudoSourceValue(unsigned Kind, const TargetInstrInfo &TII);
+ CallEntryPseudoSourceValue(unsigned Kind, const TargetMachine &TM);
public:
bool isConstant(const MachineFrameInfo *) const override;
@@ -128,8 +128,7 @@ class GlobalValuePseudoSourceValue : public CallEntryPseudoSourceValue {
const GlobalValue *GV;
public:
- GlobalValuePseudoSourceValue(const GlobalValue *GV,
- const TargetInstrInfo &TII);
+ GlobalValuePseudoSourceValue(const GlobalValue *GV, const TargetMachine &TM);
static bool classof(const PseudoSourceValue *V) {
return V->kind() == GlobalValueCallEntry;
@@ -143,7 +142,7 @@ class ExternalSymbolPseudoSourceValue : public CallEntryPseudoSourceValue {
const char *ES;
public:
- ExternalSymbolPseudoSourceValue(const char *ES, const TargetInstrInfo &TII);
+ ExternalSymbolPseudoSourceValue(const char *ES, const TargetMachine &TM);
static bool classof(const PseudoSourceValue *V) {
return V->kind() == ExternalSymbolCallEntry;
@@ -154,7 +153,7 @@ public:
/// Manages creation of pseudo source values.
class PseudoSourceValueManager {
- const TargetInstrInfo &TII;
+ const TargetMachine &TM;
const PseudoSourceValue StackPSV, GOTPSV, JumpTablePSV, ConstantPoolPSV;
std::map<int, std::unique_ptr<FixedStackPseudoSourceValue>> FSValues;
StringMap<std::unique_ptr<const ExternalSymbolPseudoSourceValue>>
@@ -164,7 +163,7 @@ class PseudoSourceValueManager {
GlobalCallEntries;
public:
- PseudoSourceValueManager(const TargetInstrInfo &TII);
+ PseudoSourceValueManager(const TargetMachine &TM);
/// Return a pseudo source value referencing the area below the stack frame of
/// a function, e.g., the argument space.
diff --git a/llvm/include/llvm/CodeGen/RDFGraph.h b/llvm/include/llvm/CodeGen/RDFGraph.h
index e0205d7c92c8..a323ee9dc396 100644
--- a/llvm/include/llvm/CodeGen/RDFGraph.h
+++ b/llvm/include/llvm/CodeGen/RDFGraph.h
@@ -749,7 +749,6 @@ namespace rdf {
RegisterRef makeRegRef(unsigned Reg, unsigned Sub) const;
RegisterRef makeRegRef(const MachineOperand &Op) const;
- RegisterRef restrictRef(RegisterRef AR, RegisterRef BR) const;
NodeAddr<RefNode*> getNextRelated(NodeAddr<InstrNode*> IA,
NodeAddr<RefNode*> RA) const;
diff --git a/llvm/include/llvm/CodeGen/RegAllocPBQP.h b/llvm/include/llvm/CodeGen/RegAllocPBQP.h
index 1ed55082e32c..1ea8840947bc 100644
--- a/llvm/include/llvm/CodeGen/RegAllocPBQP.h
+++ b/llvm/include/llvm/CodeGen/RegAllocPBQP.h
@@ -183,11 +183,12 @@ public:
NodeMetadata() = default;
NodeMetadata(const NodeMetadata &Other)
- : RS(Other.RS), NumOpts(Other.NumOpts), DeniedOpts(Other.DeniedOpts),
- OptUnsafeEdges(new unsigned[NumOpts]), VReg(Other.VReg),
- AllowedRegs(Other.AllowedRegs)
-#ifndef NDEBUG
- , everConservativelyAllocatable(Other.everConservativelyAllocatable)
+ : RS(Other.RS), NumOpts(Other.NumOpts), DeniedOpts(Other.DeniedOpts),
+ OptUnsafeEdges(new unsigned[NumOpts]), VReg(Other.VReg),
+ AllowedRegs(Other.AllowedRegs)
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+ ,
+ everConservativelyAllocatable(Other.everConservativelyAllocatable)
#endif
{
if (NumOpts > 0) {
@@ -217,7 +218,7 @@ public:
assert(RS >= this->RS && "A node's reduction state can not be downgraded");
this->RS = RS;
-#ifndef NDEBUG
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
// Remember this state to assert later that a non-infinite register
// option was available.
if (RS == ConservativelyAllocatable)
@@ -247,7 +248,7 @@ public:
&OptUnsafeEdges[NumOpts]);
}
-#ifndef NDEBUG
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
bool wasConservativelyAllocatable() const {
return everConservativelyAllocatable;
}
@@ -261,7 +262,7 @@ private:
Register VReg;
GraphMetadata::AllowedRegVecRef AllowedRegs;
-#ifndef NDEBUG
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
bool everConservativelyAllocatable = false;
#endif
};
diff --git a/llvm/include/llvm/CodeGen/Register.h b/llvm/include/llvm/CodeGen/Register.h
index a683223b5a4a..9dc3e98fe837 100644
--- a/llvm/include/llvm/CodeGen/Register.h
+++ b/llvm/include/llvm/CodeGen/Register.h
@@ -69,7 +69,7 @@ public:
/// Return true if the specified register number is in
/// the virtual register namespace.
static bool isVirtualRegister(unsigned Reg) {
- return Reg & MCRegister::VirtualRegFlag && !isStackSlot(Reg);
+ return Reg & MCRegister::VirtualRegFlag;
}
/// Convert a virtual register number to a 0-based index.
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/RegisterBank.h b/llvm/include/llvm/CodeGen/RegisterBank.h
index 5440d97728b4..66885f113e8e 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/RegisterBank.h
+++ b/llvm/include/llvm/CodeGen/RegisterBank.h
@@ -1,4 +1,4 @@
-//==-- llvm/CodeGen/GlobalISel/RegisterBank.h - Register Bank ----*- C++ -*-==//
+//==-- llvm/CodeGen/RegisterBank.h - Register Bank ---------------*- C++ -*-==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -10,8 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_GLOBALISEL_REGISTERBANK_H
-#define LLVM_CODEGEN_GLOBALISEL_REGISTERBANK_H
+#ifndef LLVM_CODEGEN_REGISTERBANK_H
+#define LLVM_CODEGEN_REGISTERBANK_H
#include "llvm/ADT/BitVector.h"
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h b/llvm/include/llvm/CodeGen/RegisterBankInfo.h
index da785406bc31..bba4f1f025a0 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h
+++ b/llvm/include/llvm/CodeGen/RegisterBankInfo.h
@@ -1,4 +1,4 @@
-//===- llvm/CodeGen/GlobalISel/RegisterBankInfo.h ---------------*- C++ -*-===//
+//===- llvm/CodeGen/RegisterBankInfo.h --------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_GLOBALISEL_REGISTERBANKINFO_H
-#define LLVM_CODEGEN_GLOBALISEL_REGISTERBANKINFO_H
+#ifndef LLVM_CODEGEN_REGISTERBANKINFO_H
+#define LLVM_CODEGEN_REGISTERBANKINFO_H
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Hashing.h"
@@ -217,8 +217,7 @@ public:
const ValueMapping *OperandsMapping,
unsigned NumOperands)
: ID(ID), Cost(Cost), OperandsMapping(OperandsMapping),
- NumOperands(NumOperands) {
- }
+ NumOperands(NumOperands) {}
/// Default constructor.
/// Use this constructor to express that the mapping is invalid.
@@ -630,8 +629,9 @@ public:
/// similar to ::copyCost, except for cases where multiple copy-like
/// operations need to be inserted. If the register is used as a source
/// operand and already has a bank assigned, \p CurBank is non-null.
- virtual unsigned getBreakDownCost(const ValueMapping &ValMapping,
- const RegisterBank *CurBank = nullptr) const {
+ virtual unsigned
+ getBreakDownCost(const ValueMapping &ValMapping,
+ const RegisterBank *CurBank = nullptr) const {
return std::numeric_limits<unsigned>::max();
}
@@ -772,4 +772,4 @@ hash_code hash_value(const RegisterBankInfo::PartialMapping &PartMapping);
} // end namespace llvm
-#endif // LLVM_CODEGEN_GLOBALISEL_REGISTERBANKINFO_H
+#endif // LLVM_CODEGEN_REGISTERBANKINFO_H
diff --git a/llvm/include/llvm/CodeGen/RegisterClassInfo.h b/llvm/include/llvm/CodeGen/RegisterClassInfo.h
index d82f1db60d8b..39c72a42c433 100644
--- a/llvm/include/llvm/CodeGen/RegisterClassInfo.h
+++ b/llvm/include/llvm/CodeGen/RegisterClassInfo.h
@@ -20,8 +20,7 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include <cassert>
+#include "llvm/MC/MCRegister.h"
#include <cstdint>
#include <memory>
@@ -61,6 +60,10 @@ class RegisterClassInfo {
// Map register alias to the callee saved Register.
SmallVector<MCPhysReg, 4> CalleeSavedAliases;
+ // Indicate if a specified callee saved register be in the allocation order
+ // exactly as written in the tablegen descriptions or listed later.
+ BitVector IgnoreCSRForAllocOrder;
+
// Reserved registers in the current MF.
BitVector Reserved;
diff --git a/llvm/include/llvm/CodeGen/RegisterPressure.h b/llvm/include/llvm/CodeGen/RegisterPressure.h
index 1deeb4d41511..c40c0eec80ec 100644
--- a/llvm/include/llvm/CodeGen/RegisterPressure.h
+++ b/llvm/include/llvm/CodeGen/RegisterPressure.h
@@ -22,7 +22,6 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/MC/LaneBitmask.h"
#include <cassert>
-#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <limits>
diff --git a/llvm/include/llvm/CodeGen/RegisterScavenging.h b/llvm/include/llvm/CodeGen/RegisterScavenging.h
index 218e05f6eb6b..1f0cd273bf61 100644
--- a/llvm/include/llvm/CodeGen/RegisterScavenging.h
+++ b/llvm/include/llvm/CodeGen/RegisterScavenging.h
@@ -70,6 +70,26 @@ class RegScavenger {
public:
RegScavenger() = default;
+ /// Record that \p Reg is in use at scavenging index \p FI. This is for
+ /// targets which need to directly manage the spilling process, and need to
+ /// update the scavenger's internal state. It's expected this be called a
+ /// second time with \p Restore set to a non-null value, so that the
+ /// externally inserted restore instruction resets the scavenged slot
+ /// liveness when encountered.
+ void assignRegToScavengingIndex(int FI, Register Reg,
+ MachineInstr *Restore = nullptr) {
+ for (ScavengedInfo &Slot : Scavenged) {
+ if (Slot.FrameIndex == FI) {
+ assert(!Slot.Reg || Slot.Reg == Reg);
+ Slot.Reg = Reg;
+ Slot.Restore = Restore;
+ return;
+ }
+ }
+
+ llvm_unreachable("did not find scavenging index");
+ }
+
/// Start tracking liveness from the begin of basic block \p MBB.
void enterBasicBlock(MachineBasicBlock &MBB);
diff --git a/llvm/include/llvm/CodeGen/RegisterUsageInfo.h b/llvm/include/llvm/CodeGen/RegisterUsageInfo.h
index bf347c0753e5..8b406a275025 100644
--- a/llvm/include/llvm/CodeGen/RegisterUsageInfo.h
+++ b/llvm/include/llvm/CodeGen/RegisterUsageInfo.h
@@ -20,9 +20,9 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
#include <cstdint>
#include <vector>
diff --git a/llvm/include/llvm/CodeGen/ReplaceWithVeclib.h b/llvm/include/llvm/CodeGen/ReplaceWithVeclib.h
index 7c0ebe7191e4..c71aca0c992b 100644
--- a/llvm/include/llvm/CodeGen/ReplaceWithVeclib.h
+++ b/llvm/include/llvm/CodeGen/ReplaceWithVeclib.h
@@ -1,4 +1,4 @@
-//===- ReplaceWithVeclib.h - Replace vector instrinsics with veclib calls -===//
+//===- ReplaceWithVeclib.h - Replace vector intrinsics with veclib calls --===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -17,8 +17,10 @@
#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
namespace llvm {
+class Function;
struct ReplaceWithVeclib : public PassInfoMixin<ReplaceWithVeclib> {
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
diff --git a/llvm/include/llvm/CodeGen/ScheduleDAG.h b/llvm/include/llvm/CodeGen/ScheduleDAG.h
index af8c0cd8756e..f1c377f76d02 100644
--- a/llvm/include/llvm/CodeGen/ScheduleDAG.h
+++ b/llvm/include/llvm/CodeGen/ScheduleDAG.h
@@ -16,7 +16,6 @@
#define LLVM_CODEGEN_SCHEDULEDAG_H
#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/iterator.h"
@@ -31,6 +30,7 @@
namespace llvm {
+template <class GraphType> struct GraphTraits;
template<class Graph> class GraphWriter;
class LLVMTargetMachine;
class MachineFunction;
diff --git a/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h b/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h
index 50b186de2b05..fb3900b4a9c1 100644
--- a/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h
+++ b/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h
@@ -16,10 +16,10 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PointerIntPair.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SparseMultiSet.h"
#include "llvm/ADT/SparseSet.h"
+#include "llvm/ADT/identity.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/ScheduleDAG.h"
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index e31719bcff0b..bcbd7ebcc0c9 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -20,7 +20,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/FoldingSet.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/ilist.h"
@@ -33,17 +32,13 @@
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DebugLoc.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/ArrayRecycler.h"
-#include "llvm/Support/AtomicOrdering.h"
-#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/RecyclingAllocator.h"
-#include <algorithm>
#include <cassert>
#include <cstdint>
#include <functional>
@@ -55,6 +50,15 @@
namespace llvm {
+class DIExpression;
+class DILabel;
+class DIVariable;
+class Function;
+class Pass;
+class Type;
+template <class GraphType> struct GraphTraits;
+template <typename T, unsigned int N> class SmallSetVector;
+template <typename T, typename Enable> struct FoldingSetTrait;
class AAResults;
class BlockAddress;
class BlockFrequencyInfo;
@@ -276,8 +280,16 @@ class SelectionDAG {
DenseMap<const SDNode *, CallSiteDbgInfo> SDCallSiteDbgInfo;
+ /// PersistentId counter to be used when inserting the next
+ /// SDNode to this SelectionDAG. We do not place that under
+ /// `#if LLVM_ENABLE_ABI_BREAKING_CHECKS` intentionally because
+ /// it adds unneeded complexity without noticeable
+ /// benefits (see discussion with @thakis in D120714).
uint16_t NextPersistentId = 0;
+ /// Are instruction referencing variable locations desired for this function?
+ bool UseInstrRefDebugInfo = false;
+
public:
/// Clients of various APIs that cause global effects on
/// the DAG can optionally implement this interface. This allows the clients
@@ -440,6 +452,9 @@ public:
const DataLayout &getDataLayout() const { return MF->getDataLayout(); }
const TargetMachine &getTarget() const { return TM; }
const TargetSubtargetInfo &getSubtarget() const { return MF->getSubtarget(); }
+ template <typename STC> const STC &getSubtarget() const {
+ return MF->getSubtarget<STC>();
+ }
const TargetLowering &getTargetLoweringInfo() const { return *TLI; }
const TargetLibraryInfo &getLibInfo() const { return *LibInfo; }
const SelectionDAGTargetInfo &getSelectionDAGInfo() const { return *TSI; }
@@ -467,7 +482,7 @@ public:
void viewGraph(const std::string &Title);
void viewGraph();
-#ifndef NDEBUG
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
std::map<const SDNode *, std::string> NodeGraphAttrs;
#endif
@@ -893,6 +908,11 @@ public:
/// Create a logical NOT operation as (XOR Val, BooleanOne).
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT);
+ /// Create a vector-predicated logical NOT operation as (VP_XOR Val,
+ /// BooleanOne, Mask, EVL).
+ SDValue getVPLogicalNOT(const SDLoc &DL, SDValue Val, SDValue Mask,
+ SDValue EVL, EVT VT);
+
/// Returns sum of the base pointer and offset.
/// Unlike getObjectPtrOffset this does not set NoUnsignedWrap by default.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL,
@@ -1032,25 +1052,26 @@ public:
const AAMDNodes &AAInfo = AAMDNodes());
SDValue getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src,
- SDValue Size, Align Alignment, bool isVol, bool isTailCall,
+ SDValue Size, Align Alignment, bool isVol,
+ bool AlwaysInline, bool isTailCall,
MachinePointerInfo DstPtrInfo,
const AAMDNodes &AAInfo = AAMDNodes());
SDValue getAtomicMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
- unsigned DstAlign, SDValue Src, unsigned SrcAlign,
- SDValue Size, Type *SizeTy, unsigned ElemSz,
- bool isTailCall, MachinePointerInfo DstPtrInfo,
+ SDValue Src, SDValue Size, Type *SizeTy,
+ unsigned ElemSz, bool isTailCall,
+ MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo);
SDValue getAtomicMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
- unsigned DstAlign, SDValue Src, unsigned SrcAlign,
- SDValue Size, Type *SizeTy, unsigned ElemSz,
- bool isTailCall, MachinePointerInfo DstPtrInfo,
+ SDValue Src, SDValue Size, Type *SizeTy,
+ unsigned ElemSz, bool isTailCall,
+ MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo);
SDValue getAtomicMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
- unsigned DstAlign, SDValue Value, SDValue Size,
- Type *SizeTy, unsigned ElemSz, bool isTailCall,
+ SDValue Value, SDValue Size, Type *SizeTy,
+ unsigned ElemSz, bool isTailCall,
MachinePointerInfo DstPtrInfo);
/// Helper function to make it easier to build SetCC's if you just have an
@@ -1070,14 +1091,24 @@ public:
return getNode(ISD::SETCC, DL, VT, LHS, RHS, getCondCode(Cond));
}
+ /// Helper function to make it easier to build VP_SETCCs if you just have an
+ /// ISD::CondCode instead of an SDValue.
+ SDValue getSetCCVP(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS,
+ ISD::CondCode Cond, SDValue Mask, SDValue EVL) {
+ assert(LHS.getValueType().isVector() && RHS.getValueType().isVector() &&
+ "Cannot compare scalars");
+ assert(Cond != ISD::SETCC_INVALID &&
+ "Cannot create a setCC of an invalid node.");
+ return getNode(ISD::VP_SETCC, DL, VT, LHS, RHS, getCondCode(Cond), Mask,
+ EVL);
+ }
+
/// Helper function to make it easier to build Select's if you just have
/// operands and don't want to check for vector.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS,
SDValue RHS) {
- assert(LHS.getValueType() == RHS.getValueType() &&
+ assert(LHS.getValueType() == VT && RHS.getValueType() == VT &&
"Cannot use select on differing types");
- assert(VT.isVector() == LHS.getValueType().isVector() &&
- "Cannot mix vectors and scalars");
auto Opcode = Cond.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT;
return getNode(Opcode, DL, VT, Cond, LHS, RHS);
}
@@ -1149,7 +1180,7 @@ public:
uint64_t Size = 0, const AAMDNodes &AAInfo = AAMDNodes()) {
// Ensure that codegen never sees alignment 0
return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, PtrInfo,
- Alignment.getValueOr(getEVTAlign(MemVT)), Flags,
+ Alignment.value_or(getEVTAlign(MemVT)), Flags,
Size, AAInfo);
}
@@ -1230,7 +1261,7 @@ public:
const AAMDNodes &AAInfo = AAMDNodes(), const MDNode *Ranges = nullptr) {
// Ensures that codegen never sees a None Alignment.
return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, PtrInfo, MemVT,
- Alignment.getValueOr(getEVTAlign(MemVT)), MMOFlags, AAInfo,
+ Alignment.value_or(getEVTAlign(MemVT)), MMOFlags, AAInfo,
Ranges);
}
/// FIXME: Remove once transition to Align is over.
@@ -1264,7 +1295,7 @@ public:
MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
const AAMDNodes &AAInfo = AAMDNodes()) {
return getStore(Chain, dl, Val, Ptr, PtrInfo,
- Alignment.getValueOr(getEVTAlign(Val.getValueType())),
+ Alignment.value_or(getEVTAlign(Val.getValueType())),
MMOFlags, AAInfo);
}
/// FIXME: Remove once transition to Align is over.
@@ -1290,7 +1321,7 @@ public:
MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
const AAMDNodes &AAInfo = AAMDNodes()) {
return getTruncStore(Chain, dl, Val, Ptr, PtrInfo, SVT,
- Alignment.getValueOr(getEVTAlign(SVT)), MMOFlags,
+ Alignment.value_or(getEVTAlign(SVT)), MMOFlags,
AAInfo);
}
/// FIXME: Remove once transition to Align is over.
@@ -1323,7 +1354,7 @@ public:
const MDNode *Ranges = nullptr, bool IsExpanding = false) {
// Ensures that codegen never sees a None Alignment.
return getLoadVP(AM, ExtType, VT, dl, Chain, Ptr, Offset, Mask, EVL,
- PtrInfo, MemVT, Alignment.getValueOr(getEVTAlign(MemVT)),
+ PtrInfo, MemVT, Alignment.value_or(getEVTAlign(MemVT)),
MMOFlags, AAInfo, Ranges, IsExpanding);
}
SDValue getLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT,
@@ -1364,6 +1395,77 @@ public:
SDValue getIndexedStoreVP(SDValue OrigStore, const SDLoc &dl, SDValue Base,
SDValue Offset, ISD::MemIndexedMode AM);
+ SDValue getStridedLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
+ EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr,
+ SDValue Offset, SDValue Stride, SDValue Mask,
+ SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT,
+ Align Alignment, MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo,
+ const MDNode *Ranges = nullptr,
+ bool IsExpanding = false);
+ inline SDValue getStridedLoadVP(
+ ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL,
+ SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask,
+ SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT,
+ MaybeAlign Alignment = MaybeAlign(),
+ MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
+ const AAMDNodes &AAInfo = AAMDNodes(), const MDNode *Ranges = nullptr,
+ bool IsExpanding = false) {
+ // Ensures that codegen never sees a None Alignment.
+ return getStridedLoadVP(AM, ExtType, VT, DL, Chain, Ptr, Offset, Stride,
+ Mask, EVL, PtrInfo, MemVT,
+ Alignment.value_or(getEVTAlign(MemVT)), MMOFlags,
+ AAInfo, Ranges, IsExpanding);
+ }
+ SDValue getStridedLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
+ EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr,
+ SDValue Offset, SDValue Stride, SDValue Mask,
+ SDValue EVL, EVT MemVT, MachineMemOperand *MMO,
+ bool IsExpanding = false);
+ SDValue getStridedLoadVP(EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr,
+ SDValue Stride, SDValue Mask, SDValue EVL,
+ MachinePointerInfo PtrInfo, MaybeAlign Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo,
+ const MDNode *Ranges = nullptr,
+ bool IsExpanding = false);
+ SDValue getStridedLoadVP(EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr,
+ SDValue Stride, SDValue Mask, SDValue EVL,
+ MachineMemOperand *MMO, bool IsExpanding = false);
+ SDValue
+ getExtStridedLoadVP(ISD::LoadExtType ExtType, const SDLoc &DL, EVT VT,
+ SDValue Chain, SDValue Ptr, SDValue Stride, SDValue Mask,
+ SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT,
+ MaybeAlign Alignment, MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo, bool IsExpanding = false);
+ SDValue getExtStridedLoadVP(ISD::LoadExtType ExtType, const SDLoc &DL, EVT VT,
+ SDValue Chain, SDValue Ptr, SDValue Stride,
+ SDValue Mask, SDValue EVL, EVT MemVT,
+ MachineMemOperand *MMO, bool IsExpanding = false);
+ SDValue getIndexedStridedLoadVP(SDValue OrigLoad, const SDLoc &DL,
+ SDValue Base, SDValue Offset,
+ ISD::MemIndexedMode AM);
+ SDValue getStridedStoreVP(SDValue Chain, const SDLoc &DL, SDValue Val,
+ SDValue Ptr, SDValue Offset, SDValue Stride,
+ SDValue Mask, SDValue EVL, EVT MemVT,
+ MachineMemOperand *MMO, ISD::MemIndexedMode AM,
+ bool IsTruncating = false,
+ bool IsCompressing = false);
+ SDValue getTruncStridedStoreVP(SDValue Chain, const SDLoc &DL, SDValue Val,
+ SDValue Ptr, SDValue Stride, SDValue Mask,
+ SDValue EVL, MachinePointerInfo PtrInfo,
+ EVT SVT, Align Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo,
+ bool IsCompressing = false);
+ SDValue getTruncStridedStoreVP(SDValue Chain, const SDLoc &DL, SDValue Val,
+ SDValue Ptr, SDValue Stride, SDValue Mask,
+ SDValue EVL, EVT SVT, MachineMemOperand *MMO,
+ bool IsCompressing = false);
+ SDValue getIndexedStridedStoreVP(SDValue OrigStore, const SDLoc &DL,
+ SDValue Base, SDValue Offset,
+ ISD::MemIndexedMode AM);
+
SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl,
ArrayRef<SDValue> Ops, MachineMemOperand *MMO,
ISD::MemIndexType IndexType);
@@ -1412,6 +1514,11 @@ public:
/// Return an AssertAlignSDNode.
SDValue getAssertAlign(const SDLoc &DL, SDValue V, Align A);
+ /// Swap N1 and N2 if Opcode is a commutative binary opcode
+ /// and the canonical form expects the opposite order.
+ void canonicalizeCommutativeBinop(unsigned Opcode, SDValue &N1,
+ SDValue &N2) const;
+
/// Return the specified value casted to
/// the target's desired shift amount type.
SDValue getShiftAmountOperand(EVT LHSTy, SDValue Op);
@@ -1702,6 +1809,16 @@ public:
/// function mirrors \c llvm::salvageDebugInfo.
void salvageDebugInfo(SDNode &N);
+ /// Signal whether instruction referencing variable locations are desired for
+ /// this function's debug-info.
+ void useInstrRefDebugInfo(bool Flag) {
+ UseInstrRefDebugInfo = Flag;
+ }
+
+ bool getUseInstrRefDebugInfo() const {
+ return UseInstrRefDebugInfo;
+ }
+
void dump() const;
/// In most cases this function returns the ABI alignment for a given type,
@@ -1745,16 +1862,6 @@ public:
/// simplify nodes with multiple uses more aggressively.)
SDValue GetDemandedBits(SDValue V, const APInt &DemandedBits);
- /// See if the specified operand can be simplified with the knowledge that
- /// only the bits specified by DemandedBits are used in the elements specified
- /// by DemandedElts. If so, return the simpler operand, otherwise return a
- /// null SDValue.
- ///
- /// (This exists alongside SimplifyDemandedBits because GetDemandedBits can
- /// simplify nodes with multiple uses more aggressively.)
- SDValue GetDemandedBits(SDValue V, const APInt &DemandedBits,
- const APInt &DemandedElts);
-
/// Return true if the sign bit of Op is known to be zero.
/// We use this predicate to simplify operations downstream.
bool SignBitIsZero(SDValue Op, unsigned Depth = 0) const;
@@ -1771,6 +1878,11 @@ public:
bool MaskedValueIsZero(SDValue Op, const APInt &Mask,
const APInt &DemandedElts, unsigned Depth = 0) const;
+ /// Return true if 'Op' is known to be zero in DemandedElts. We
+ /// use this predicate to simplify operations downstream.
+ bool MaskedVectorIsZero(SDValue Op, const APInt &DemandedElts,
+ unsigned Depth = 0) const;
+
/// Return true if '(Op & Mask) == Mask'.
/// Op and Mask are known to be the same type.
bool MaskedValueIsAllOnes(SDValue Op, const APInt &Mask,
@@ -2020,11 +2132,6 @@ public:
/// Compute the default alignment value for the given type.
Align getEVTAlign(EVT MemoryVT) const;
- /// Compute the default alignment value for the given type.
- /// FIXME: Remove once transition to Align is over.
- inline unsigned getEVTAlignment(EVT MemoryVT) const {
- return getEVTAlign(MemoryVT).value();
- }
/// Test whether the given value is a constant int or similar node.
SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N) const;
@@ -2039,39 +2146,34 @@ public:
isConstantFPBuildVectorOrConstantFP(N);
}
- void addCallSiteInfo(const SDNode *CallNode, CallSiteInfoImpl &&CallInfo) {
- SDCallSiteDbgInfo[CallNode].CSInfo = std::move(CallInfo);
+ /// Set CallSiteInfo to be associated with Node.
+ void addCallSiteInfo(const SDNode *Node, CallSiteInfoImpl &&CallInfo) {
+ SDCallSiteDbgInfo[Node].CSInfo = std::move(CallInfo);
}
-
- CallSiteInfo getSDCallSiteInfo(const SDNode *CallNode) {
- auto I = SDCallSiteDbgInfo.find(CallNode);
- if (I != SDCallSiteDbgInfo.end())
- return std::move(I->second).CSInfo;
- return CallSiteInfo();
+ /// Return CallSiteInfo associated with Node, or a default if none exists.
+ CallSiteInfo getCallSiteInfo(const SDNode *Node) {
+ auto I = SDCallSiteDbgInfo.find(Node);
+ return I != SDCallSiteDbgInfo.end() ? std::move(I->second).CSInfo
+ : CallSiteInfo();
}
-
+ /// Set HeapAllocSite to be associated with Node.
void addHeapAllocSite(const SDNode *Node, MDNode *MD) {
SDCallSiteDbgInfo[Node].HeapAllocSite = MD;
}
-
- /// Return the HeapAllocSite type associated with the SDNode, if it exists.
- MDNode *getHeapAllocSite(const SDNode *Node) {
- auto It = SDCallSiteDbgInfo.find(Node);
- if (It == SDCallSiteDbgInfo.end())
- return nullptr;
- return It->second.HeapAllocSite;
+ /// Return HeapAllocSite associated with Node, or nullptr if none exists.
+ MDNode *getHeapAllocSite(const SDNode *Node) const {
+ auto I = SDCallSiteDbgInfo.find(Node);
+ return I != SDCallSiteDbgInfo.end() ? I->second.HeapAllocSite : nullptr;
}
-
+ /// Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge) {
if (NoMerge)
SDCallSiteDbgInfo[Node].NoMerge = NoMerge;
}
-
- bool getNoMergeSiteInfo(const SDNode *Node) {
+ /// Return NoMerge info associated with Node.
+ bool getNoMergeSiteInfo(const SDNode *Node) const {
auto I = SDCallSiteDbgInfo.find(Node);
- if (I == SDCallSiteDbgInfo.end())
- return false;
- return I->second.NoMerge;
+ return I != SDCallSiteDbgInfo.end() ? I->second.NoMerge : false;
}
/// Return the current function's default denormal handling kind for the given
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h b/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h
index 0f3af915da64..e23eebec81db 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h
@@ -49,7 +49,7 @@ public:
SDValue getBase() const { return Base; }
SDValue getIndex() { return Index; }
SDValue getIndex() const { return Index; }
- bool hasValidOffset() const { return Offset.hasValue(); }
+ bool hasValidOffset() const { return Offset.has_value(); }
int64_t getOffset() const { return *Offset; }
// Returns true if `Other` and `*this` are both some offset from the same base
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/llvm/include/llvm/CodeGen/SelectionDAGISel.h
index 9cea197724cc..35fb0bc80593 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGISel.h
@@ -16,12 +16,13 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/BasicBlock.h"
#include <memory>
namespace llvm {
class AAResults;
+class TargetInstrInfo;
+class TargetMachine;
class SelectionDAGBuilder;
class SDValue;
class MachineRegisterInfo;
@@ -53,6 +54,7 @@ public:
const TargetLowering *TLI;
bool FastISelFailed;
SmallPtrSet<const Instruction *, 4> ElidedArgCopyInstrs;
+ bool UseInstrRefDebugInfo = false;
/// Current optimization remark emitter.
/// Used to report things like combines and FastISel failures.
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index 04c6b50197d4..5974f13a296b 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -508,7 +508,7 @@ BEGIN_TWO_BYTE_PACK()
class LSBaseSDNodeBitfields {
friend class LSBaseSDNode;
- friend class VPLoadStoreSDNode;
+ friend class VPBaseLoadStoreSDNode;
friend class MaskedLoadStoreSDNode;
friend class MaskedGatherScatterSDNode;
friend class VPGatherScatterSDNode;
@@ -529,6 +529,7 @@ BEGIN_TWO_BYTE_PACK()
class LoadSDNodeBitfields {
friend class LoadSDNode;
friend class VPLoadSDNode;
+ friend class VPStridedLoadSDNode;
friend class MaskedLoadSDNode;
friend class MaskedGatherSDNode;
friend class VPGatherSDNode;
@@ -542,6 +543,7 @@ BEGIN_TWO_BYTE_PACK()
class StoreSDNodeBitfields {
friend class StoreSDNode;
friend class VPStoreSDNode;
+ friend class VPStridedStoreSDNode;
friend class MaskedStoreSDNode;
friend class MaskedScatterSDNode;
friend class VPScatterSDNode;
@@ -613,8 +615,10 @@ private:
SDNodeFlags Flags;
public:
- /// Unique and persistent id per SDNode in the DAG.
- /// Used for debug printing.
+ /// Unique and persistent id per SDNode in the DAG. Used for debug printing.
+ /// We do not place that under `#if LLVM_ENABLE_ABI_BREAKING_CHECKS`
+ /// intentionally because it adds unneeded complexity without noticeable
+ /// benefits (see discussion with @thakis in D120714).
uint16_t PersistentId;
//===--------------------------------------------------------------------===//
@@ -1191,12 +1195,13 @@ inline void SDValue::dumpr(const SelectionDAG *G) const {
inline void SDUse::set(const SDValue &V) {
if (Val.getNode()) removeFromList();
Val = V;
- if (V.getNode()) V.getNode()->addUse(*this);
+ if (V.getNode())
+ V->addUse(*this);
}
inline void SDUse::setInitial(const SDValue &V) {
Val = V;
- V.getNode()->addUse(*this);
+ V->addUse(*this);
}
inline void SDUse::setNode(SDNode *N) {
@@ -1364,6 +1369,7 @@ public:
case ISD::VP_STORE:
case ISD::MSTORE:
case ISD::VP_SCATTER:
+ case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
return getOperand(2);
case ISD::MGATHER:
case ISD::MSCATTER:
@@ -1407,6 +1413,8 @@ public:
case ISD::VP_STORE:
case ISD::VP_GATHER:
case ISD::VP_SCATTER:
+ case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
+ case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
return true;
default:
return N->isMemIntrinsic() || N->isTargetMemoryOpcode();
@@ -1661,6 +1669,9 @@ bool isAllOnesConstant(SDValue V);
/// Returns true if \p V is a constant integer one.
bool isOneConstant(SDValue V);
+/// Returns true if \p V is a constant min signed integer value.
+bool isMinSignedConstant(SDValue V);
+
/// Return the non-bitcasted source operand of \p V if it exists.
/// If \p V is not a bitcasted value, it is returned as-is.
SDValue peekThroughBitcasts(SDValue V);
@@ -1677,6 +1688,11 @@ SDValue peekThroughExtractSubvectors(SDValue V);
/// constant is canonicalized to be operand 1.
bool isBitwiseNot(SDValue V, bool AllowUndefs = false);
+/// If \p V is a bitwise not, returns the inverted operand. Otherwise returns
+/// an empty SDValue. Only bits set in \p Mask are required to be inverted,
+/// other bits may be arbitrary.
+SDValue getBitwiseNotOperand(SDValue V, SDValue Mask, bool AllowUndefs);
+
/// Returns the SDNode if it is a constant splat BuildVector or constant int.
ConstantSDNode *isConstOrConstSplat(SDValue N, bool AllowUndefs = false,
bool AllowTruncation = false);
@@ -2353,34 +2369,64 @@ public:
}
};
-/// This base class is used to represent VP_LOAD and VP_STORE nodes
-class VPLoadStoreSDNode : public MemSDNode {
+/// This base class is used to represent VP_LOAD, VP_STORE,
+/// EXPERIMENTAL_VP_STRIDED_LOAD and EXPERIMENTAL_VP_STRIDED_STORE nodes
+class VPBaseLoadStoreSDNode : public MemSDNode {
public:
friend class SelectionDAG;
- VPLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order, const DebugLoc &dl,
- SDVTList VTs, ISD::MemIndexedMode AM, EVT MemVT,
- MachineMemOperand *MMO)
- : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
+ VPBaseLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order,
+ const DebugLoc &DL, SDVTList VTs,
+ ISD::MemIndexedMode AM, EVT MemVT,
+ MachineMemOperand *MMO)
+ : MemSDNode(NodeTy, Order, DL, VTs, MemVT, MMO) {
LSBaseSDNodeBits.AddressingMode = AM;
assert(getAddressingMode() == AM && "Value truncated");
}
- // VPLoadSDNode (Chain, Ptr, Offset, Mask, EVL)
- // VPStoreSDNode (Chain, Data, Ptr, Offset, Mask, EVL)
+ // VPStridedStoreSDNode (Chain, Data, Ptr, Offset, Stride, Mask, EVL)
+ // VPStoreSDNode (Chain, Data, Ptr, Offset, Mask, EVL)
+ // VPStridedLoadSDNode (Chain, Ptr, Offset, Stride, Mask, EVL)
+ // VPLoadSDNode (Chain, Ptr, Offset, Mask, EVL)
// Mask is a vector of i1 elements;
// the type of EVL is TLI.getVPExplicitVectorLengthTy().
const SDValue &getOffset() const {
- return getOperand(getOpcode() == ISD::VP_LOAD ? 2 : 3);
+ return getOperand((getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD ||
+ getOpcode() == ISD::VP_LOAD)
+ ? 2
+ : 3);
}
const SDValue &getBasePtr() const {
- return getOperand(getOpcode() == ISD::VP_LOAD ? 1 : 2);
+ return getOperand((getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD ||
+ getOpcode() == ISD::VP_LOAD)
+ ? 1
+ : 2);
}
const SDValue &getMask() const {
- return getOperand(getOpcode() == ISD::VP_LOAD ? 3 : 4);
+ switch (getOpcode()) {
+ default:
+ llvm_unreachable("Invalid opcode");
+ case ISD::VP_LOAD:
+ return getOperand(3);
+ case ISD::VP_STORE:
+ case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
+ return getOperand(4);
+ case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
+ return getOperand(5);
+ }
}
const SDValue &getVectorLength() const {
- return getOperand(getOpcode() == ISD::VP_LOAD ? 4 : 5);
+ switch (getOpcode()) {
+ default:
+ llvm_unreachable("Invalid opcode");
+ case ISD::VP_LOAD:
+ return getOperand(4);
+ case ISD::VP_STORE:
+ case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
+ return getOperand(5);
+ case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
+ return getOperand(6);
+ }
}
/// Return the addressing mode for this load or store:
@@ -2396,19 +2442,21 @@ public:
bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; }
static bool classof(const SDNode *N) {
- return N->getOpcode() == ISD::VP_LOAD || N->getOpcode() == ISD::VP_STORE;
+ return N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD ||
+ N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE ||
+ N->getOpcode() == ISD::VP_LOAD || N->getOpcode() == ISD::VP_STORE;
}
};
/// This class is used to represent a VP_LOAD node
-class VPLoadSDNode : public VPLoadStoreSDNode {
+class VPLoadSDNode : public VPBaseLoadStoreSDNode {
public:
friend class SelectionDAG;
VPLoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
ISD::MemIndexedMode AM, ISD::LoadExtType ETy, bool isExpanding,
EVT MemVT, MachineMemOperand *MMO)
- : VPLoadStoreSDNode(ISD::VP_LOAD, Order, dl, VTs, AM, MemVT, MMO) {
+ : VPBaseLoadStoreSDNode(ISD::VP_LOAD, Order, dl, VTs, AM, MemVT, MMO) {
LoadSDNodeBits.ExtTy = ETy;
LoadSDNodeBits.IsExpanding = isExpanding;
}
@@ -2428,15 +2476,45 @@ public:
bool isExpandingLoad() const { return LoadSDNodeBits.IsExpanding; }
};
+/// This class is used to represent an EXPERIMENTAL_VP_STRIDED_LOAD node.
+class VPStridedLoadSDNode : public VPBaseLoadStoreSDNode {
+public:
+ friend class SelectionDAG;
+
+ VPStridedLoadSDNode(unsigned Order, const DebugLoc &DL, SDVTList VTs,
+ ISD::MemIndexedMode AM, ISD::LoadExtType ETy,
+ bool IsExpanding, EVT MemVT, MachineMemOperand *MMO)
+ : VPBaseLoadStoreSDNode(ISD::EXPERIMENTAL_VP_STRIDED_LOAD, Order, DL, VTs,
+ AM, MemVT, MMO) {
+ LoadSDNodeBits.ExtTy = ETy;
+ LoadSDNodeBits.IsExpanding = IsExpanding;
+ }
+
+ ISD::LoadExtType getExtensionType() const {
+ return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
+ }
+
+ const SDValue &getBasePtr() const { return getOperand(1); }
+ const SDValue &getOffset() const { return getOperand(2); }
+ const SDValue &getStride() const { return getOperand(3); }
+ const SDValue &getMask() const { return getOperand(4); }
+ const SDValue &getVectorLength() const { return getOperand(5); }
+
+ static bool classof(const SDNode *N) {
+ return N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD;
+ }
+ bool isExpandingLoad() const { return LoadSDNodeBits.IsExpanding; }
+};
+
/// This class is used to represent a VP_STORE node
-class VPStoreSDNode : public VPLoadStoreSDNode {
+class VPStoreSDNode : public VPBaseLoadStoreSDNode {
public:
friend class SelectionDAG;
VPStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
ISD::MemIndexedMode AM, bool isTrunc, bool isCompressing,
EVT MemVT, MachineMemOperand *MMO)
- : VPLoadStoreSDNode(ISD::VP_STORE, Order, dl, VTs, AM, MemVT, MMO) {
+ : VPBaseLoadStoreSDNode(ISD::VP_STORE, Order, dl, VTs, AM, MemVT, MMO) {
StoreSDNodeBits.IsTruncating = isTrunc;
StoreSDNodeBits.IsCompressing = isCompressing;
}
@@ -2463,6 +2541,43 @@ public:
}
};
+/// This class is used to represent an EXPERIMENTAL_VP_STRIDED_STORE node.
+class VPStridedStoreSDNode : public VPBaseLoadStoreSDNode {
+public:
+ friend class SelectionDAG;
+
+ VPStridedStoreSDNode(unsigned Order, const DebugLoc &DL, SDVTList VTs,
+ ISD::MemIndexedMode AM, bool IsTrunc, bool IsCompressing,
+ EVT MemVT, MachineMemOperand *MMO)
+ : VPBaseLoadStoreSDNode(ISD::EXPERIMENTAL_VP_STRIDED_STORE, Order, DL,
+ VTs, AM, MemVT, MMO) {
+ StoreSDNodeBits.IsTruncating = IsTrunc;
+ StoreSDNodeBits.IsCompressing = IsCompressing;
+ }
+
+ /// Return true if this is a truncating store.
+ /// For integers this is the same as doing a TRUNCATE and storing the result.
+ /// For floats, it is the same as doing an FP_ROUND and storing the result.
+ bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
+
+ /// Returns true if the op does a compression to the vector before storing.
+ /// The node contiguously stores the active elements (integers or floats)
+ /// in src (those with their respective bit set in writemask k) to unaligned
+ /// memory at base_addr.
+ bool isCompressingStore() const { return StoreSDNodeBits.IsCompressing; }
+
+ const SDValue &getValue() const { return getOperand(1); }
+ const SDValue &getBasePtr() const { return getOperand(2); }
+ const SDValue &getOffset() const { return getOperand(3); }
+ const SDValue &getStride() const { return getOperand(4); }
+ const SDValue &getMask() const { return getOperand(5); }
+ const SDValue &getVectorLength() const { return getOperand(6); }
+
+ static bool classof(const SDNode *N) {
+ return N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE;
+ }
+};
+
/// This base class is used to represent MLOAD and MSTORE nodes
class MaskedLoadStoreSDNode : public MemSDNode {
public:
@@ -2588,13 +2703,9 @@ public:
return static_cast<ISD::MemIndexType>(LSBaseSDNodeBits.AddressingMode);
}
bool isIndexScaled() const {
- return (getIndexType() == ISD::SIGNED_SCALED) ||
- (getIndexType() == ISD::UNSIGNED_SCALED);
- }
- bool isIndexSigned() const {
- return (getIndexType() == ISD::SIGNED_SCALED) ||
- (getIndexType() == ISD::SIGNED_UNSCALED);
+ return !cast<ConstantSDNode>(getScale())->isOne();
}
+ bool isIndexSigned() const { return isIndexTypeSigned(getIndexType()); }
// In the both nodes address is Op1, mask is Op2:
// VPGatherSDNode (Chain, base, index, scale, mask, vlen)
@@ -2675,17 +2786,10 @@ public:
ISD::MemIndexType getIndexType() const {
return static_cast<ISD::MemIndexType>(LSBaseSDNodeBits.AddressingMode);
}
- void setIndexType(ISD::MemIndexType IndexType) {
- LSBaseSDNodeBits.AddressingMode = IndexType;
- }
bool isIndexScaled() const {
- return (getIndexType() == ISD::SIGNED_SCALED) ||
- (getIndexType() == ISD::UNSIGNED_SCALED);
- }
- bool isIndexSigned() const {
- return (getIndexType() == ISD::SIGNED_SCALED) ||
- (getIndexType() == ISD::SIGNED_UNSCALED);
+ return !cast<ConstantSDNode>(getScale())->isOne();
}
+ bool isIndexSigned() const { return isIndexTypeSigned(getIndexType()); }
// In the both nodes address is Op1, mask is Op2:
// MaskedGatherSDNode (Chain, passthru, mask, base, index, scale)
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h b/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h
index 722c3275fd06..e7d608969124 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h
@@ -76,11 +76,13 @@ public:
/// that don't fit the target's parameters for simple stores and can be more
/// efficient than using a library call. This function can return a null
/// SDValue if the target declines to use custom code and a different
- /// lowering strategy should be used.
+ /// lowering strategy should be used. Note that if AlwaysInline is true the
+ /// function has to return a valid SDValue.
virtual SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Op1,
SDValue Op2, SDValue Op3,
Align Alignment, bool isVolatile,
+ bool AlwaysInline,
MachinePointerInfo DstPtrInfo) const {
return SDValue();
}
diff --git a/llvm/include/llvm/CodeGen/SlotIndexes.h b/llvm/include/llvm/CodeGen/SlotIndexes.h
index e8d618a24f9b..942a47c6cc7d 100644
--- a/llvm/include/llvm/CodeGen/SlotIndexes.h
+++ b/llvm/include/llvm/CodeGen/SlotIndexes.h
@@ -28,7 +28,6 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Allocator.h"
#include <algorithm>
#include <cassert>
diff --git a/llvm/include/llvm/CodeGen/StackMaps.h b/llvm/include/llvm/CodeGen/StackMaps.h
index 928d7cc6cc04..01cc9bc37931 100644
--- a/llvm/include/llvm/CodeGen/StackMaps.h
+++ b/llvm/include/llvm/CodeGen/StackMaps.h
@@ -13,7 +13,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/IR/CallingConv.h"
-#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Debug.h"
#include <algorithm>
#include <cassert>
@@ -23,6 +22,7 @@
namespace llvm {
class AsmPrinter;
+class MCSymbol;
class MCExpr;
class MCStreamer;
class raw_ostream;
diff --git a/llvm/include/llvm/CodeGen/StackProtector.h b/llvm/include/llvm/CodeGen/StackProtector.h
index 57456b3f6c16..b96c0c74fabc 100644
--- a/llvm/include/llvm/CodeGen/StackProtector.h
+++ b/llvm/include/llvm/CodeGen/StackProtector.h
@@ -20,7 +20,6 @@
#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/ValueMap.h"
#include "llvm/Pass.h"
namespace llvm {
diff --git a/llvm/include/llvm/CodeGen/SwiftErrorValueTracking.h b/llvm/include/llvm/CodeGen/SwiftErrorValueTracking.h
index 08ab2abbdd5b..a374736347f6 100644
--- a/llvm/include/llvm/CodeGen/SwiftErrorValueTracking.h
+++ b/llvm/include/llvm/CodeGen/SwiftErrorValueTracking.h
@@ -20,8 +20,6 @@
#include "llvm/CodeGen/Register.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DebugLoc.h"
-#include <functional>
-#include <type_traits>
#include <utility>
diff --git a/llvm/include/llvm/CodeGen/TailDuplicator.h b/llvm/include/llvm/CodeGen/TailDuplicator.h
index daaa27f72d52..94e8092319d7 100644
--- a/llvm/include/llvm/CodeGen/TailDuplicator.h
+++ b/llvm/include/llvm/CodeGen/TailDuplicator.h
@@ -16,15 +16,16 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/MBFIWrapper.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include <utility>
#include <vector>
namespace llvm {
+template <typename T, unsigned int N> class SmallSetVector;
+template <typename Fn> class function_ref;
+class MBFIWrapper;
class MachineBasicBlock;
class MachineBranchProbabilityInfo;
class MachineFunction;
diff --git a/llvm/include/llvm/CodeGen/TargetCallingConv.h b/llvm/include/llvm/CodeGen/TargetCallingConv.h
index 62365330379d..1333f2d98973 100644
--- a/llvm/include/llvm/CodeGen/TargetCallingConv.h
+++ b/llvm/include/llvm/CodeGen/TargetCallingConv.h
@@ -46,7 +46,8 @@ namespace ISD {
unsigned IsHvaStart : 1; ///< HVA structure start
unsigned IsSecArgPass : 1; ///< Second argument
unsigned MemAlign : 4; ///< Log 2 of alignment when arg is passed in memory
- ///< (including byval/byref)
+ ///< (including byval/byref). The max alignment is
+ ///< verified in IR verification.
unsigned OrigAlign : 5; ///< Log 2 of original alignment
unsigned IsInConsecutiveRegsLast : 1;
unsigned IsInConsecutiveRegs : 1;
diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
index f2ca1590fc39..fbce5d7a9102 100644
--- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
@@ -213,12 +213,24 @@ public:
virtual void emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const = 0;
+ /// emitZeroCallUsedRegs - Zeros out call used registers.
+ virtual void emitZeroCallUsedRegs(BitVector RegsToZero,
+ MachineBasicBlock &MBB) const {}
+
/// With basic block sections, emit callee saved frame moves for basic blocks
/// that are in a different section.
virtual void
emitCalleeSavedFrameMovesFullCFA(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) const {}
+ /// Returns true if we may need to fix the unwind information for the
+ /// function.
+ virtual bool enableCFIFixup(MachineFunction &MF) const;
+
+ /// Emit CFI instructions that recreate the state of the unwind information
+ /// upon fucntion entry.
+ virtual void resetCFIToInitialState(MachineBasicBlock &MBB) const {}
+
/// Replace a StackProbe stub (if any) with the actual probe code inline
virtual void inlineStackProbe(MachineFunction &MF,
MachineBasicBlock &PrologueMBB) const {}
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 411811d08c18..f9183e0a9c66 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -382,6 +382,17 @@ public:
/// to which instructions should be sunk.
virtual bool shouldSink(const MachineInstr &MI) const { return true; }
+ /// Return false if the instruction should not be hoisted by MachineLICM.
+ ///
+ /// MachineLICM determines on its own whether the instruction is safe to
+ /// hoist; this gives the target a hook to extend this assessment and prevent
+ /// an instruction being hoisted from a given loop for target specific
+ /// reasons.
+ virtual bool shouldHoist(const MachineInstr &MI,
+ const MachineLoop *FromLoop) const {
+ return true;
+ }
+
/// Re-issue the specified 'original' instruction at the
/// specific location targeting a new destination register.
/// The register in Orig->getOperand(0).getReg() will be substituted by
@@ -723,12 +734,16 @@ public:
virtual bool shouldIgnoreForPipelining(const MachineInstr *MI) const = 0;
/// Create a condition to determine if the trip count of the loop is greater
- /// than TC.
+ /// than TC, where TC is always one more than for the previous prologue or
+ /// 0 if this is being called for the outermost prologue.
///
/// If the trip count is statically known to be greater than TC, return
/// true. If the trip count is statically known to be not greater than TC,
/// return false. Otherwise return nullopt and fill out Cond with the test
/// condition.
+ ///
+ /// Note: This hook is guaranteed to be called from the innermost to the
+ /// outermost prologue of the loop being software pipelined.
virtual Optional<bool>
createTripCountGreaterCondition(int TC, MachineBasicBlock &MBB,
SmallVectorImpl<MachineOperand> &Cond) = 0;
@@ -1268,13 +1283,6 @@ protected:
}
public:
- /// getAddressSpaceForPseudoSourceKind - Given the kind of memory
- /// (e.g. stack) the target returns the corresponding address space.
- virtual unsigned
- getAddressSpaceForPseudoSourceKind(unsigned Kind) const {
- return 0;
- }
-
/// unfoldMemoryOperand - Separate a single instruction which folded a load or
/// a store or a load and a store into two or more instruction. If this is
/// possible, returns true as well as the new instructions by reference.
@@ -1942,7 +1950,7 @@ public:
virtual MachineBasicBlock::iterator
insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
MachineBasicBlock::iterator &It, MachineFunction &MF,
- const outliner::Candidate &C) const {
+ outliner::Candidate &C) const {
llvm_unreachable(
"Target didn't implement TargetInstrInfo::insertOutlinedCall!");
}
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 3861648a5feb..98b9a416ea59 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -25,7 +25,7 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLArrayExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/DAGCombine.h"
@@ -248,12 +248,21 @@ public:
/// w.r.t. what they should expand to.
enum class AtomicExpansionKind {
None, // Don't expand the instruction.
+ CastToInteger, // Cast the atomic instruction to another type, e.g. from
+ // floating-point to integer type.
LLSC, // Expand the instruction into loadlinked/storeconditional; used
// by ARM/AArch64.
LLOnly, // Expand the (load) instruction into just a load-linked, which has
// greater atomic guarantees than a normal load.
CmpXChg, // Expand the instruction into cmpxchg; used by at least X86.
- MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop.
+ MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop.
+ BitTestIntrinsic, // Use a target-specific intrinsic for special bit
+ // operations; used by X86.
+ Expand, // Generic expansion in terms of other atomic operations.
+
+ // Rewrite to a non-atomic form for use in a known non-preemptible
+ // environment.
+ NotAtomic
};
/// Enum that specifies when a multiplication should be expanded.
@@ -1071,6 +1080,11 @@ public:
return false;
}
+ /// How to legalize this custom operation?
+ virtual LegalizeAction getCustomOperationAction(SDNode &Op) const {
+ return Legal;
+ }
+
/// Return how this operation should be treated: either it is legal, needs to
/// be promoted to a larger size, needs to be expanded to some other code
/// sequence, or the target has a custom expander for it.
@@ -1210,6 +1224,10 @@ public:
uint64_t Range, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI) const;
+ /// Returns preferred type for switch condition.
+ virtual MVT getPreferredSwitchConditionType(LLVMContext &Context,
+ EVT ConditionVT) const;
+
/// Return true if lowering to a bit test is suitable for a set of case
/// clusters which contains \p NumDests unique destinations, \p Low and
/// \p High as its lowest and highest case values, and expects \p NumCmps
@@ -1372,7 +1390,9 @@ public:
// Returns true if VT is a legal index type for masked gathers/scatters
// on this target
- virtual bool shouldRemoveExtendFromGSIndex(EVT VT) const { return false; }
+ virtual bool shouldRemoveExtendFromGSIndex(EVT IndexVT, EVT DataVT) const {
+ return false;
+ }
/// Return how the condition code should be treated: either it is legal, needs
/// to be expanded to some other code sequence, or the target has a custom
@@ -1871,7 +1891,7 @@ public:
/// minimum size the object must be to be aligned and PrefAlign is set to the
/// preferred alignment.
virtual bool shouldAlignPointerArgs(CallInst * /*CI*/, unsigned & /*MinSize*/,
- unsigned & /*PrefAlign*/) const {
+ Align & /*PrefAlign*/) const {
return false;
}
@@ -1946,6 +1966,14 @@ public:
llvm_unreachable("Masked atomicrmw expansion unimplemented on this target");
}
+ /// Perform a bit test atomicrmw using a target-specific intrinsic. This
+ /// represents the combined bit test intrinsic which will be lowered at a late
+ /// stage by the backend.
+ virtual void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const {
+ llvm_unreachable(
+ "Bit test atomicrmw expansion unimplemented on this target");
+ }
+
/// Perform a masked cmpxchg using a target-specific intrinsic. This
/// represents the core LL/SC loop which will be lowered at a late stage by
/// the backend.
@@ -2005,12 +2033,6 @@ public:
// be unnecessarily held, except if clrex, inserted by this hook, is executed.
virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const {}
- /// Returns true if the given (atomic) store should be expanded by the
- /// IR-level AtomicExpand pass into an "atomic xchg" which ignores its input.
- virtual bool shouldExpandAtomicStoreInIR(StoreInst *SI) const {
- return false;
- }
-
/// Returns true if arguments should be sign-extended in lib calls.
virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
return IsSigned;
@@ -2027,6 +2049,30 @@ public:
return AtomicExpansionKind::None;
}
+ /// Returns how the given (atomic) load should be cast by the IR-level
+ /// AtomicExpand pass.
+ virtual AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const {
+ if (LI->getType()->isFloatingPointTy())
+ return AtomicExpansionKind::CastToInteger;
+ return AtomicExpansionKind::None;
+ }
+
+ /// Returns how the given (atomic) store should be expanded by the IR-level
+ /// AtomicExpand pass into. For instance AtomicExpansionKind::Expand will try
+ /// to use an atomicrmw xchg.
+ virtual AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const {
+ return AtomicExpansionKind::None;
+ }
+
+ /// Returns how the given (atomic) store should be cast by the IR-level
+ /// AtomicExpand pass into. For instance AtomicExpansionKind::CastToInteger
+ /// will try to cast the operands to integer values.
+ virtual AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const {
+ if (SI->getValueOperand()->getType()->isFloatingPointTy())
+ return AtomicExpansionKind::CastToInteger;
+ return AtomicExpansionKind::None;
+ }
+
/// Returns how the given atomic cmpxchg should be expanded by the IR-level
/// AtomicExpand pass.
virtual AtomicExpansionKind
@@ -2041,6 +2087,18 @@ public:
AtomicExpansionKind::CmpXChg : AtomicExpansionKind::None;
}
+ /// Returns how the given atomic atomicrmw should be cast by the IR-level
+ /// AtomicExpand pass.
+ virtual AtomicExpansionKind
+ shouldCastAtomicRMWIInIR(AtomicRMWInst *RMWI) const {
+ if (RMWI->getOperation() == AtomicRMWInst::Xchg &&
+ (RMWI->getValOperand()->getType()->isFloatingPointTy() ||
+ RMWI->getValOperand()->getType()->isPointerTy()))
+ return AtomicExpansionKind::CastToInteger;
+
+ return AtomicExpansionKind::None;
+ }
+
/// On some platforms, an AtomicRMW that never actually modifies the value
/// (such as fetch_add of 0) can be turned into a fence followed by an
/// atomic load. This may sound useless, but it makes it possible for the
@@ -2123,8 +2181,8 @@ public:
/// about some cases, a default true can be returned to let the DAGCombiner
/// decide.
/// AddNode is (add x, c1), and ConstNode is c2.
- virtual bool isMulAddWithConstProfitable(const SDValue &AddNode,
- const SDValue &ConstNode) const {
+ virtual bool isMulAddWithConstProfitable(SDValue AddNode,
+ SDValue ConstNode) const {
return true;
}
@@ -2138,6 +2196,18 @@ public:
return false;
}
+ /// Return true if it is beneficial to expand an @llvm.powi.* intrinsic.
+ /// If not optimizing for size, expanding @llvm.powi.* intrinsics is always
+ /// considered beneficial.
+ /// If optimizing for size, expansion is only considered beneficial for upto
+ /// 5 multiplies and a divide (if the exponent is negative).
+ bool isBeneficialToExpandPowI(int Exponent, bool OptForSize) const {
+ if (Exponent < 0)
+ Exponent = -Exponent;
+ return !OptForSize ||
+ (countPopulation((unsigned int)Exponent) + Log2_32(Exponent) < 7);
+ }
+
//===--------------------------------------------------------------------===//
// TargetLowering Configuration Methods - These methods should be invoked by
// the derived class constructor to configure this object for the target.
@@ -2232,6 +2302,16 @@ protected:
assert(Op < array_lengthof(OpActions[0]) && "Table isn't big enough!");
OpActions[(unsigned)VT.SimpleTy][Op] = Action;
}
+ void setOperationAction(ArrayRef<unsigned> Ops, MVT VT,
+ LegalizeAction Action) {
+ for (auto Op : Ops)
+ setOperationAction(Op, VT, Action);
+ }
+ void setOperationAction(ArrayRef<unsigned> Ops, ArrayRef<MVT> VTs,
+ LegalizeAction Action) {
+ for (auto VT : VTs)
+ setOperationAction(Ops, VT, Action);
+ }
/// Indicate that the specified load with extension does not work with the
/// specified type and indicate what to do about it.
@@ -2244,6 +2324,16 @@ protected:
LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] &= ~((uint16_t)0xF << Shift);
LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] |= (uint16_t)Action << Shift;
}
+ void setLoadExtAction(ArrayRef<unsigned> ExtTypes, MVT ValVT, MVT MemVT,
+ LegalizeAction Action) {
+ for (auto ExtType : ExtTypes)
+ setLoadExtAction(ExtType, ValVT, MemVT, Action);
+ }
+ void setLoadExtAction(ArrayRef<unsigned> ExtTypes, MVT ValVT,
+ ArrayRef<MVT> MemVTs, LegalizeAction Action) {
+ for (auto MemVT : MemVTs)
+ setLoadExtAction(ExtTypes, ValVT, MemVT, Action);
+ }
/// Indicate that the specified truncating store does not work with the
/// specified type and indicate what to do about it.
@@ -2257,8 +2347,16 @@ protected:
///
/// NOTE: All indexed mode loads are initialized to Expand in
/// TargetLowering.cpp
- void setIndexedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action) {
- setIndexedModeAction(IdxMode, VT, IMAB_Load, Action);
+ void setIndexedLoadAction(ArrayRef<unsigned> IdxModes, MVT VT,
+ LegalizeAction Action) {
+ for (auto IdxMode : IdxModes)
+ setIndexedModeAction(IdxMode, VT, IMAB_Load, Action);
+ }
+
+ void setIndexedLoadAction(ArrayRef<unsigned> IdxModes, ArrayRef<MVT> VTs,
+ LegalizeAction Action) {
+ for (auto VT : VTs)
+ setIndexedLoadAction(IdxModes, VT, Action);
}
/// Indicate that the specified indexed store does or does not work with the
@@ -2266,8 +2364,16 @@ protected:
///
/// NOTE: All indexed mode stores are initialized to Expand in
/// TargetLowering.cpp
- void setIndexedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action) {
- setIndexedModeAction(IdxMode, VT, IMAB_Store, Action);
+ void setIndexedStoreAction(ArrayRef<unsigned> IdxModes, MVT VT,
+ LegalizeAction Action) {
+ for (auto IdxMode : IdxModes)
+ setIndexedModeAction(IdxMode, VT, IMAB_Store, Action);
+ }
+
+ void setIndexedStoreAction(ArrayRef<unsigned> IdxModes, ArrayRef<MVT> VTs,
+ LegalizeAction Action) {
+ for (auto VT : VTs)
+ setIndexedStoreAction(IdxModes, VT, Action);
}
/// Indicate that the specified indexed masked load does or does not work with
@@ -2292,17 +2398,24 @@ protected:
/// Indicate that the specified condition code is or isn't supported on the
/// target and indicate what to do about it.
- void setCondCodeAction(ISD::CondCode CC, MVT VT,
+ void setCondCodeAction(ArrayRef<ISD::CondCode> CCs, MVT VT,
LegalizeAction Action) {
- assert(VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) &&
- "Table isn't big enough!");
- assert((unsigned)Action < 0x10 && "too many bits for bitfield array");
- /// The lower 3 bits of the SimpleTy index into Nth 4bit set from the 32-bit
- /// value and the upper 29 bits index into the second dimension of the array
- /// to select what 32-bit value to use.
- uint32_t Shift = 4 * (VT.SimpleTy & 0x7);
- CondCodeActions[CC][VT.SimpleTy >> 3] &= ~((uint32_t)0xF << Shift);
- CondCodeActions[CC][VT.SimpleTy >> 3] |= (uint32_t)Action << Shift;
+ for (auto CC : CCs) {
+ assert(VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) &&
+ "Table isn't big enough!");
+ assert((unsigned)Action < 0x10 && "too many bits for bitfield array");
+ /// The lower 3 bits of the SimpleTy index into Nth 4bit set from the
+ /// 32-bit value and the upper 29 bits index into the second dimension of
+ /// the array to select what 32-bit value to use.
+ uint32_t Shift = 4 * (VT.SimpleTy & 0x7);
+ CondCodeActions[CC][VT.SimpleTy >> 3] &= ~((uint32_t)0xF << Shift);
+ CondCodeActions[CC][VT.SimpleTy >> 3] |= (uint32_t)Action << Shift;
+ }
+ }
+ void setCondCodeAction(ArrayRef<ISD::CondCode> CCs, ArrayRef<MVT> VTs,
+ LegalizeAction Action) {
+ for (auto VT : VTs)
+ setCondCodeAction(CCs, VT, Action);
}
/// If Opc/OrigVT is specified as being promoted, the promotion code defaults
@@ -2323,9 +2436,11 @@ protected:
/// Targets should invoke this method for each target independent node that
/// they want to provide a custom DAG combiner for by implementing the
/// PerformDAGCombine virtual method.
- void setTargetDAGCombine(ISD::NodeType NT) {
- assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray));
- TargetDAGCombineArray[NT >> 3] |= 1 << (NT&7);
+ void setTargetDAGCombine(ArrayRef<ISD::NodeType> NTs) {
+ for (auto NT : NTs) {
+ assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray));
+ TargetDAGCombineArray[NT >> 3] |= 1 << (NT & 7);
+ }
}
/// Set the target's minimum function alignment.
@@ -2510,6 +2625,10 @@ public:
case ISD::FMAXNUM_IEEE:
case ISD::FMINIMUM:
case ISD::FMAXIMUM:
+ case ISD::AVGFLOORS:
+ case ISD::AVGFLOORU:
+ case ISD::AVGCEILS:
+ case ISD::AVGCEILU:
return true;
default: return false;
}
@@ -2653,6 +2772,10 @@ public:
return false;
}
+ /// Return true if this constant should be sign extended when promoting to
+ /// a larger type.
+ virtual bool signExtendConstant(const ConstantInt *C) const { return false; }
+
/// Return true if sinking I's operands to the same basic block as I is
/// profitable, e.g. because the operands can be folded into a target
/// instruction during instruction selection. After calling the function
@@ -2851,6 +2974,14 @@ public:
return false;
}
+ /// Return true if pulling a binary operation into a select with an identity
+ /// constant is profitable. This is the inverse of an IR transform.
+ /// Example: X + (Cond ? Y : 0) --> Cond ? (X + Y) : X
+ virtual bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
+ EVT VT) const {
+ return false;
+ }
+
/// Return true if it is beneficial to convert a load of a constant to
/// just the constant itself.
/// On some targets it might be more efficient to use a combination of
@@ -2940,6 +3071,10 @@ public:
void setLibcallName(RTLIB::Libcall Call, const char *Name) {
LibcallRoutineNames[Call] = Name;
}
+ void setLibcallName(ArrayRef<RTLIB::Libcall> Calls, const char *Name) {
+ for (auto Call : Calls)
+ setLibcallName(Call, Name);
+ }
/// Get the libcall routine name for the specified libcall.
const char *getLibcallName(RTLIB::Libcall Call) const {
@@ -3421,11 +3556,13 @@ public:
/// Determines the optimal series of memory ops to replace the memset / memcpy.
/// Return true if the number of memory ops is below the threshold (Limit).
+ /// Note that this is always the case when Limit is ~0.
/// It returns the types of the sequence of memory ops to perform
/// memset / memcpy by reference.
- bool findOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit,
- const MemOp &Op, unsigned DstAS, unsigned SrcAS,
- const AttributeList &FuncAttributes) const;
+ virtual bool
+ findOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit,
+ const MemOp &Op, unsigned DstAS, unsigned SrcAS,
+ const AttributeList &FuncAttributes) const;
/// Check to see if the specified operand of the specified instruction is a
/// constant integer. If so, check to see if there are any bits set in the
@@ -3534,9 +3671,16 @@ public:
/// Helper wrapper around SimplifyDemandedVectorElts.
/// Adds Op back to the worklist upon success.
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
- APInt &KnownUndef, APInt &KnownZero,
DAGCombinerInfo &DCI) const;
+ /// Return true if the target supports simplifying demanded vector elements by
+ /// converting them to undefs.
+ virtual bool
+ shouldSimplifyDemandedVectorElts(SDValue Op,
+ const TargetLoweringOpt &TLO) const {
+ return true;
+ }
+
/// Determine which of the bits specified in Mask are known to be either zero
/// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts
/// argument allows us to only collect the known bits that are shared by the
@@ -3653,6 +3797,12 @@ public:
APInt &UndefElts,
unsigned Depth = 0) const;
+ /// Returns true if the given Opc is considered a canonical constant for the
+ /// target, which should not be transformed back into a BUILD_VECTOR.
+ virtual bool isTargetCanonicalConstantNode(SDValue Op) const {
+ return Op.getOpcode() == ISD::SPLAT_VECTOR;
+ }
+
struct DAGCombinerInfo {
void *DC; // The DAG Combiner object.
CombineLevel Level;
@@ -3805,7 +3955,7 @@ public:
if (Neg && Cost == NegatibleCost::Cheaper)
return Neg;
// Remove the new created node to avoid the side effect to the DAG.
- if (Neg && Neg.getNode()->use_empty())
+ if (Neg && Neg->use_empty())
DAG.RemoveDeadNode(Neg.getNode());
return SDValue();
}
@@ -4270,6 +4420,7 @@ public:
C_Register, // Constraint represents specific register(s).
C_RegisterClass, // Constraint represents any of register(s) in class.
C_Memory, // Memory constraint.
+ C_Address, // Address constraint.
C_Immediate, // Requires an immediate.
C_Other, // Something else.
C_Unknown // Unsupported constraint.
@@ -4374,6 +4525,8 @@ public:
return InlineAsm::Constraint_o;
if (ConstraintCode == "X")
return InlineAsm::Constraint_X;
+ if (ConstraintCode == "p")
+ return InlineAsm::Constraint_p;
return InlineAsm::Constraint_Unknown;
}
@@ -4410,6 +4563,14 @@ public:
SelectionDAG &DAG,
SmallVectorImpl<SDNode *> &Created) const;
+ /// Targets may override this function to provide custom SREM lowering for
+ /// power-of-2 denominators. If the target returns an empty SDValue, LLVM
+ /// assumes SREM is expensive and replaces it with a series of other integer
+ /// operations.
+ virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDNode *> &Created) const;
+
/// Indicate whether this target prefers to combine FDIVs with the same
/// divisor. If the transform should never be done, return zero. If the
/// transform should be done, return the minimum number of divisor uses
@@ -4442,6 +4603,13 @@ public:
return SDValue();
}
+ /// Try to convert the fminnum/fmaxnum to a compare/select sequence. This is
+ /// required for correctness since InstCombine might have canonicalized a
+ /// fcmp+select sequence to a FMINNUM/FMAXNUM intrinsic. If we were to fall
+ /// through to the default expansion/soften to libcall, we might introduce a
+ /// link-time dependency on libm into a file that originally did not have one.
+ SDValue createSelectForFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const;
+
/// Return a reciprocal estimate value for the input operand.
/// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or
/// 'Enabled' as set by a potential default override attribute.
@@ -4554,6 +4722,16 @@ public:
/// \returns The expansion result
SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const;
+ /// Expand check for floating point class.
+ /// \param ResultVT The type of intrinsic call result.
+ /// \param Op The tested value.
+ /// \param Test The test to perform.
+ /// \param Flags The optimization flags.
+ /// \returns The expansion result or SDValue() if it fails.
+ SDValue expandIS_FPCLASS(EVT ResultVT, SDValue Op, unsigned Test,
+ SDNodeFlags Flags, const SDLoc &DL,
+ SelectionDAG &DAG) const;
+
/// Expand CTPOP nodes. Expands vector/scalar CTPOP nodes,
/// vector nodes can only succeed if all operations are legal/custom.
/// \param N Node to expand
@@ -4693,28 +4871,32 @@ public:
/// method accepts vectors as its arguments.
SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const;
- /// Legalize a SETCC with given LHS and RHS and condition code CC on the
- /// current target.
+ /// Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC
+ /// on the current target. A VP_SETCC will additionally be given a Mask
+ /// and/or EVL not equal to SDValue().
///
/// If the SETCC has been legalized using AND / OR, then the legalized node
/// will be stored in LHS. RHS and CC will be set to SDValue(). NeedInvert
- /// will be set to false.
+ /// will be set to false. This will also hold if the VP_SETCC has been
+ /// legalized using VP_AND / VP_OR.
///
- /// If the SETCC has been legalized by using getSetCCSwappedOperands(),
- /// then the values of LHS and RHS will be swapped, CC will be set to the
- /// new condition, and NeedInvert will be set to false.
+ /// If the SETCC / VP_SETCC has been legalized by using
+ /// getSetCCSwappedOperands(), then the values of LHS and RHS will be
+ /// swapped, CC will be set to the new condition, and NeedInvert will be set
+ /// to false.
///
- /// If the SETCC has been legalized using the inverse condcode, then LHS and
- /// RHS will be unchanged, CC will set to the inverted condcode, and
- /// NeedInvert will be set to true. The caller must invert the result of the
- /// SETCC with SelectionDAG::getLogicalNOT() or take equivalent action to swap
- /// the effect of a true/false result.
+ /// If the SETCC / VP_SETCC has been legalized using the inverse condcode,
+ /// then LHS and RHS will be unchanged, CC will set to the inverted condcode,
+ /// and NeedInvert will be set to true. The caller must invert the result of
+ /// the SETCC with SelectionDAG::getLogicalNOT() or take equivalent action to
+ /// swap the effect of a true/false result.
///
- /// \returns true if the SetCC has been legalized, false if it hasn't.
+ /// \returns true if the SETCC / VP_SETCC has been legalized, false if it
+ /// hasn't.
bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS,
- SDValue &RHS, SDValue &CC, bool &NeedInvert,
- const SDLoc &dl, SDValue &Chain,
- bool IsSignaling = false) const;
+ SDValue &RHS, SDValue &CC, SDValue Mask,
+ SDValue EVL, bool &NeedInvert, const SDLoc &dl,
+ SDValue &Chain, bool IsSignaling = false) const;
//===--------------------------------------------------------------------===//
// Instruction Emitting Hooks
@@ -4766,10 +4948,6 @@ public:
// combiner can fold the new nodes.
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const;
- /// Give targets the chance to reduce the number of distinct addresing modes.
- ISD::MemIndexType getCanonicalIndexType(ISD::MemIndexType IndexType,
- EVT MemVT, SDValue Offsets) const;
-
private:
SDValue foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
const SDLoc &DL, DAGCombinerInfo &DCI) const;
diff --git a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index 2c8b17807f7c..08267d70906a 100644
--- a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -16,6 +16,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/BinaryFormat/XCOFF.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
namespace llvm {
@@ -118,6 +119,9 @@ public:
void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
+ MCSection *getStaticDtorSection(unsigned Priority,
+ const MCSymbol *KeySym) const override;
+
/// Emit the module flags that specify the garbage collection information.
void emitModuleMetadata(MCStreamer &Streamer, Module &M) const override;
@@ -282,6 +286,13 @@ public:
MCSymbol *getFunctionEntryPointSymbol(const GlobalValue *Func,
const TargetMachine &TM) const override;
+
+ /// For functions, this will return the LSDA section. If option
+ /// -ffunction-sections is on, this will return a unique csect with the
+ /// function name appended to .gcc_except_table as a suffix of the LSDA
+ /// section name.
+ MCSection *getSectionForLSDA(const Function &F, const MCSymbol &FnSym,
+ const TargetMachine &TM) const override;
};
class TargetLoweringObjectFileGOFF : public TargetLoweringObjectFile {
diff --git a/llvm/include/llvm/CodeGen/TargetPassConfig.h b/llvm/include/llvm/CodeGen/TargetPassConfig.h
index 9b13b61fc9de..8d7086d02c8a 100644
--- a/llvm/include/llvm/CodeGen/TargetPassConfig.h
+++ b/llvm/include/llvm/CodeGen/TargetPassConfig.h
@@ -345,6 +345,9 @@ protected:
// Helper to verify the analysis is really immutable.
void setOpt(bool &Opt, bool Val);
+ /// Return true if register allocator is specified by -regalloc=override.
+ bool isCustomizedRegAlloc();
+
/// Methods with trivial inline returns are convenient points in the common
/// codegen pass pipeline where targets may insert passes. Methods with
/// out-of-line standard implementations are major CodeGen stages called by
diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
index c3b842052ef5..04369a5bfe0d 100644
--- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -29,7 +29,6 @@
#include "llvm/Support/Printable.h"
#include <cassert>
#include <cstdint>
-#include <functional>
namespace llvm {
@@ -56,6 +55,8 @@ public:
const LaneBitmask LaneMask;
/// Classes with a higher priority value are assigned first by register
/// allocators using a greedy heuristic. The value is in the range [0,63].
+ /// Values >= 32 should be used with care since they may overlap with other
+ /// fields in the allocator's priority heuristics.
const uint8_t AllocationPriority;
/// Configurable target specific flags.
const uint8_t TSFlags;
@@ -415,19 +416,11 @@ public:
/// Returns true if the two registers are equal or alias each other.
/// The registers may be virtual registers.
- bool regsOverlap(Register regA, Register regB) const {
- if (regA == regB) return true;
- if (!regA.isPhysical() || !regB.isPhysical())
- return false;
-
- // Regunits are numerically ordered. Find a common unit.
- MCRegUnitIterator RUA(regA.asMCReg(), this);
- MCRegUnitIterator RUB(regB.asMCReg(), this);
- do {
- if (*RUA == *RUB) return true;
- if (*RUA < *RUB) ++RUA;
- else ++RUB;
- } while (RUA.isValid() && RUB.isValid());
+ bool regsOverlap(Register RegA, Register RegB) const {
+ if (RegA == RegB)
+ return true;
+ if (RegA.isPhysical() && RegB.isPhysical())
+ return MCRegisterInfo::regsOverlap(RegA.asMCReg(), RegB.asMCReg());
return false;
}
@@ -567,6 +560,24 @@ public:
virtual bool isCalleeSavedPhysReg(MCRegister PhysReg,
const MachineFunction &MF) const;
+ /// Returns true if PhysReg can be used as an argument to a function.
+ virtual bool isArgumentRegister(const MachineFunction &MF,
+ MCRegister PhysReg) const {
+ return false;
+ }
+
+ /// Returns true if PhysReg is a fixed register.
+ virtual bool isFixedRegister(const MachineFunction &MF,
+ MCRegister PhysReg) const {
+ return false;
+ }
+
+ /// Returns true if PhysReg is a general purpose register.
+ virtual bool isGeneralPurposeRegister(const MachineFunction &MF,
+ MCRegister PhysReg) const {
+ return false;
+ }
+
/// Prior to adding the live-out mask to a stackmap or patchpoint
/// instruction, provide the target the opportunity to adjust it (mainly to
/// remove pseudo-registers that should be ignored).
@@ -1067,6 +1078,14 @@ public:
return false;
}
+ /// When prioritizing live ranges in register allocation, if this hook returns
+ /// true then the AllocationPriority of the register class will be treated as
+ /// more important than whether the range is local to a basic block or global.
+ virtual bool
+ regClassPriorityTrumpsGlobalness(const MachineFunction &MF) const {
+ return false;
+ }
+
//===--------------------------------------------------------------------===//
/// Debug information queries.
diff --git a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
index 3fac2f688dd8..dbd678b75d05 100644
--- a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
@@ -13,12 +13,10 @@
#ifndef LLVM_CODEGEN_TARGETSUBTARGETINFO_H
#define LLVM_CODEGEN_TARGETSUBTARGETINFO_H
-#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/PBQPRAConstraint.h"
-#include "llvm/CodeGen/ScheduleDAGMutation.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/CodeGen.h"
@@ -27,6 +25,9 @@
namespace llvm {
+class APInt;
+class MachineFunction;
+class ScheduleDAGMutation;
class CallLowering;
class InlineAsmLowering;
class InstrItineraryData;
@@ -272,11 +273,6 @@ public:
/// a finer grain to tune the register allocator.
virtual bool enableRALocalReassignment(CodeGenOpt::Level OptLevel) const;
- /// True if the subtarget should consider the cost of local intervals
- /// created by a split candidate when choosing the best split candidate. This
- /// heuristic may be compile time intensive.
- virtual bool enableAdvancedRASplitCost() const;
-
/// Enable use of alias analysis during code generation (during MI
/// scheduling, DAGCombine, etc.).
virtual bool useAA() const;
diff --git a/llvm/include/llvm/CodeGen/TileShapeInfo.h b/llvm/include/llvm/CodeGen/TileShapeInfo.h
index 4e574bd96cca..1b5f902139fb 100644
--- a/llvm/include/llvm/CodeGen/TileShapeInfo.h
+++ b/llvm/include/llvm/CodeGen/TileShapeInfo.h
@@ -38,7 +38,7 @@ public:
ShapeT()
: Row(nullptr), Col(nullptr), RowImm(InvalidImmShape),
ColImm(InvalidImmShape) {}
- bool operator==(const ShapeT &Shape) {
+ bool operator==(const ShapeT &Shape) const {
MachineOperand *R = Shape.Row;
MachineOperand *C = Shape.Col;
if (!R || !C)
@@ -52,7 +52,7 @@ public:
return false;
}
- bool operator!=(const ShapeT &Shape) { return !(*this == Shape); }
+ bool operator!=(const ShapeT &Shape) const { return !(*this == Shape); }
MachineOperand *getRow() const { return Row; }
diff --git a/llvm/include/llvm/CodeGen/ValueTypes.h b/llvm/include/llvm/CodeGen/ValueTypes.h
index 7b17b98d5c55..48d265476ca8 100644
--- a/llvm/include/llvm/CodeGen/ValueTypes.h
+++ b/llvm/include/llvm/CodeGen/ValueTypes.h
@@ -19,7 +19,6 @@
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/TypeSize.h"
-#include "llvm/Support/WithColor.h"
#include <cassert>
#include <cstdint>
#include <string>
@@ -365,6 +364,12 @@ namespace llvm {
return {(BaseSize.getKnownMinSize() + 7) / 8, BaseSize.isScalable()};
}
+ // Return the number of bytes overwritten by a store of this value type or
+ // this value type's element type in the case of a vector.
+ uint64_t getScalarStoreSize() const {
+ return getScalarType().getStoreSize().getFixedSize();
+ }
+
/// Return the number of bits overwritten by a store of the specified value
/// type.
///
diff --git a/llvm/include/llvm/CodeGen/ValueTypes.td b/llvm/include/llvm/CodeGen/ValueTypes.td
index 7f989e08e9bf..2194800b7ba9 100644
--- a/llvm/include/llvm/CodeGen/ValueTypes.td
+++ b/llvm/include/llvm/CodeGen/ValueTypes.td
@@ -20,204 +20,211 @@ class ValueType<int size, int value> {
def OtherVT : ValueType<0, 1>; // "Other" value
def i1 : ValueType<1, 2>; // One bit boolean value
-def i8 : ValueType<8, 3>; // 8-bit integer value
-def i16 : ValueType<16, 4>; // 16-bit integer value
-def i32 : ValueType<32, 5>; // 32-bit integer value
-def i64 : ValueType<64, 6>; // 64-bit integer value
-def i128 : ValueType<128, 7>; // 128-bit integer value
-
-def bf16 : ValueType<16, 8>; // 16-bit brain floating point value
-def f16 : ValueType<16, 9>; // 16-bit floating point value
-def f32 : ValueType<32, 10>; // 32-bit floating point value
-def f64 : ValueType<64, 11>; // 64-bit floating point value
-def f80 : ValueType<80, 12>; // 80-bit floating point value
-def f128 : ValueType<128, 13>; // 128-bit floating point value
-def ppcf128 : ValueType<128, 14>; // PPC 128-bit floating point value
-
-def v1i1 : ValueType<1, 15>; // 1 x i1 vector value
-def v2i1 : ValueType<2, 16>; // 2 x i1 vector value
-def v4i1 : ValueType<4, 17>; // 4 x i1 vector value
-def v8i1 : ValueType<8, 18>; // 8 x i1 vector value
-def v16i1 : ValueType<16, 19>; // 16 x i1 vector value
-def v32i1 : ValueType<32, 20>; // 32 x i1 vector value
-def v64i1 : ValueType<64, 21>; // 64 x i1 vector value
-def v128i1 : ValueType<128, 22>; // 128 x i1 vector value
-def v256i1 : ValueType<256, 23>; // 256 x i1 vector value
-def v512i1 : ValueType<512, 24>; // 512 x i1 vector value
-def v1024i1 : ValueType<1024, 25>; // 1024 x i1 vector value
-
-def v1i8 : ValueType<8, 26>; // 1 x i8 vector value
-def v2i8 : ValueType<16, 27>; // 2 x i8 vector value
-def v4i8 : ValueType<32, 28>; // 4 x i8 vector value
-def v8i8 : ValueType<64, 29>; // 8 x i8 vector value
-def v16i8 : ValueType<128, 30>; // 16 x i8 vector value
-def v32i8 : ValueType<256, 31>; // 32 x i8 vector value
-def v64i8 : ValueType<512, 32>; // 64 x i8 vector value
-def v128i8 : ValueType<1024, 33>; // 128 x i8 vector value
-def v256i8 : ValueType<2048, 34>; // 256 x i8 vector value
-def v512i8 : ValueType<4096, 35>; // 512 x i8 vector value
-def v1024i8 : ValueType<8192, 36>; // 1024 x i8 vector value
-
-def v1i16 : ValueType<16, 37>; // 1 x i16 vector value
-def v2i16 : ValueType<32, 38>; // 2 x i16 vector value
-def v3i16 : ValueType<48, 39>; // 3 x i16 vector value
-def v4i16 : ValueType<64, 40>; // 4 x i16 vector value
-def v8i16 : ValueType<128, 41>; // 8 x i16 vector value
-def v16i16 : ValueType<256, 42>; // 16 x i16 vector value
-def v32i16 : ValueType<512, 43>; // 32 x i16 vector value
-def v64i16 : ValueType<1024, 44>; // 64 x i16 vector value
-def v128i16 : ValueType<2048, 45>; // 128 x i16 vector value
-def v256i16 : ValueType<4096, 46>; // 256 x i16 vector value
-def v512i16 : ValueType<8192, 47>; // 512 x i16 vector value
-
-def v1i32 : ValueType<32, 48>; // 1 x i32 vector value
-def v2i32 : ValueType<64, 49>; // 2 x i32 vector value
-def v3i32 : ValueType<96, 50>; // 3 x i32 vector value
-def v4i32 : ValueType<128, 51>; // 4 x i32 vector value
-def v5i32 : ValueType<160, 52>; // 5 x i32 vector value
-def v6i32 : ValueType<192, 53>; // 6 x f32 vector value
-def v7i32 : ValueType<224, 54>; // 7 x f32 vector value
-def v8i32 : ValueType<256, 55>; // 8 x i32 vector value
-def v16i32 : ValueType<512, 56>; // 16 x i32 vector value
-def v32i32 : ValueType<1024, 57>; // 32 x i32 vector value
-def v64i32 : ValueType<2048, 58>; // 64 x i32 vector value
-def v128i32 : ValueType<4096, 59>; // 128 x i32 vector value
-def v256i32 : ValueType<8192, 60>; // 256 x i32 vector value
-def v512i32 : ValueType<16384, 61>; // 512 x i32 vector value
-def v1024i32 : ValueType<32768, 62>; // 1024 x i32 vector value
-def v2048i32 : ValueType<65536, 63>; // 2048 x i32 vector value
-
-def v1i64 : ValueType<64, 64>; // 1 x i64 vector value
-def v2i64 : ValueType<128, 65>; // 2 x i64 vector value
-def v3i64 : ValueType<192, 66>; // 3 x i64 vector value
-def v4i64 : ValueType<256, 67>; // 4 x i64 vector value
-def v8i64 : ValueType<512, 68>; // 8 x i64 vector value
-def v16i64 : ValueType<1024, 69>; // 16 x i64 vector value
-def v32i64 : ValueType<2048, 70>; // 32 x i64 vector value
-def v64i64 : ValueType<4096, 71>; // 64 x i64 vector value
-def v128i64 : ValueType<8192, 72>; // 128 x i64 vector value
-def v256i64 : ValueType<16384, 73>; // 256 x i64 vector value
-
-def v1i128 : ValueType<128, 74>; // 1 x i128 vector value
-
-def v1f16 : ValueType<16, 75>; // 1 x f16 vector value
-def v2f16 : ValueType<32, 76>; // 2 x f16 vector value
-def v3f16 : ValueType<48, 77>; // 3 x f16 vector value
-def v4f16 : ValueType<64, 78>; // 4 x f16 vector value
-def v8f16 : ValueType<128, 79>; // 8 x f16 vector value
-def v16f16 : ValueType<256, 80>; // 16 x f16 vector value
-def v32f16 : ValueType<512, 81>; // 32 x f16 vector value
-def v64f16 : ValueType<1024, 82>; // 64 x f16 vector value
-def v128f16 : ValueType<2048, 83>; // 128 x f16 vector value
-def v256f16 : ValueType<4096, 84>; // 256 x f16 vector value
-def v512f16 : ValueType<8192, 85>; // 512 x f16 vector value
-
-def v2bf16 : ValueType<32, 86>; // 2 x bf16 vector value
-def v3bf16 : ValueType<48, 87>; // 3 x bf16 vector value
-def v4bf16 : ValueType<64, 88>; // 4 x bf16 vector value
-def v8bf16 : ValueType<128, 89>; // 8 x bf16 vector value
-def v16bf16 : ValueType<256, 90>; // 16 x bf16 vector value
-def v32bf16 : ValueType<512, 91>; // 32 x bf16 vector value
-def v64bf16 : ValueType<1024, 92>; // 64 x bf16 vector value
-def v128bf16 : ValueType<2048, 93>; // 128 x bf16 vector value
-
-def v1f32 : ValueType<32, 94>; // 1 x f32 vector value
-def v2f32 : ValueType<64, 95>; // 2 x f32 vector value
-def v3f32 : ValueType<96, 96>; // 3 x f32 vector value
-def v4f32 : ValueType<128, 97>; // 4 x f32 vector value
-def v5f32 : ValueType<160, 98>; // 5 x f32 vector value
-def v6f32 : ValueType<192, 99>; // 6 x f32 vector value
-def v7f32 : ValueType<224, 100>; // 7 x f32 vector value
-def v8f32 : ValueType<256, 101>; // 8 x f32 vector value
-def v16f32 : ValueType<512, 102>; // 16 x f32 vector value
-def v32f32 : ValueType<1024, 103>; // 32 x f32 vector value
-def v64f32 : ValueType<2048, 104>; // 64 x f32 vector value
-def v128f32 : ValueType<4096, 105>; // 128 x f32 vector value
-def v256f32 : ValueType<8192, 106>; // 256 x f32 vector value
-def v512f32 : ValueType<16384, 107>; // 512 x f32 vector value
-def v1024f32 : ValueType<32768, 108>; // 1024 x f32 vector value
-def v2048f32 : ValueType<65536, 109>; // 2048 x f32 vector value
-
-def v1f64 : ValueType<64, 110>; // 1 x f64 vector value
-def v2f64 : ValueType<128, 111>; // 2 x f64 vector value
-def v3f64 : ValueType<192, 112>; // 3 x f64 vector value
-def v4f64 : ValueType<256, 113>; // 4 x f64 vector value
-def v8f64 : ValueType<512, 114>; // 8 x f64 vector value
-def v16f64 : ValueType<1024, 115>; // 16 x f64 vector value
-def v32f64 : ValueType<2048, 116>; // 32 x f64 vector value
-def v64f64 : ValueType<4096, 117>; // 64 x f64 vector value
-def v128f64 : ValueType<8192, 118>; // 128 x f64 vector value
-def v256f64 : ValueType<16384, 119>; // 256 x f64 vector value
-
-def nxv1i1 : ValueType<1, 120>; // n x 1 x i1 vector value
-def nxv2i1 : ValueType<2, 121>; // n x 2 x i1 vector value
-def nxv4i1 : ValueType<4, 122>; // n x 4 x i1 vector value
-def nxv8i1 : ValueType<8, 123>; // n x 8 x i1 vector value
-def nxv16i1 : ValueType<16, 124>; // n x 16 x i1 vector value
-def nxv32i1 : ValueType<32, 125>; // n x 32 x i1 vector value
-def nxv64i1 : ValueType<64, 126>; // n x 64 x i1 vector value
-
-def nxv1i8 : ValueType<8, 127>; // n x 1 x i8 vector value
-def nxv2i8 : ValueType<16, 128>; // n x 2 x i8 vector value
-def nxv4i8 : ValueType<32, 129>; // n x 4 x i8 vector value
-def nxv8i8 : ValueType<64, 130>; // n x 8 x i8 vector value
-def nxv16i8 : ValueType<128, 131>; // n x 16 x i8 vector value
-def nxv32i8 : ValueType<256, 132>; // n x 32 x i8 vector value
-def nxv64i8 : ValueType<512, 133>; // n x 64 x i8 vector value
-
-def nxv1i16 : ValueType<16, 134>; // n x 1 x i16 vector value
-def nxv2i16 : ValueType<32, 135>; // n x 2 x i16 vector value
-def nxv4i16 : ValueType<64, 136>; // n x 4 x i16 vector value
-def nxv8i16 : ValueType<128, 137>; // n x 8 x i16 vector value
-def nxv16i16 : ValueType<256, 138>; // n x 16 x i16 vector value
-def nxv32i16 : ValueType<512, 139>; // n x 32 x i16 vector value
-
-def nxv1i32 : ValueType<32, 140>; // n x 1 x i32 vector value
-def nxv2i32 : ValueType<64, 141>; // n x 2 x i32 vector value
-def nxv4i32 : ValueType<128, 142>; // n x 4 x i32 vector value
-def nxv8i32 : ValueType<256, 143>; // n x 8 x i32 vector value
-def nxv16i32 : ValueType<512, 144>; // n x 16 x i32 vector value
-def nxv32i32 : ValueType<1024, 145>; // n x 32 x i32 vector value
-
-def nxv1i64 : ValueType<64, 146>; // n x 1 x i64 vector value
-def nxv2i64 : ValueType<128, 147>; // n x 2 x i64 vector value
-def nxv4i64 : ValueType<256, 148>; // n x 4 x i64 vector value
-def nxv8i64 : ValueType<512, 149>; // n x 8 x i64 vector value
-def nxv16i64 : ValueType<1024, 150>; // n x 16 x i64 vector value
-def nxv32i64 : ValueType<2048, 151>; // n x 32 x i64 vector value
-
-def nxv1f16 : ValueType<16, 152>; // n x 1 x f16 vector value
-def nxv2f16 : ValueType<32, 153>; // n x 2 x f16 vector value
-def nxv4f16 : ValueType<64, 154>; // n x 4 x f16 vector value
-def nxv8f16 : ValueType<128, 155>; // n x 8 x f16 vector value
-def nxv16f16 : ValueType<256, 156>; // n x 16 x f16 vector value
-def nxv32f16 : ValueType<512, 157>; // n x 32 x f16 vector value
-
-def nxv1bf16 : ValueType<16, 158>; // n x 1 x bf16 vector value
-def nxv2bf16 : ValueType<32, 159>; // n x 2 x bf16 vector value
-def nxv4bf16 : ValueType<64, 160>; // n x 4 x bf16 vector value
-def nxv8bf16 : ValueType<128, 161>; // n x 8 x bf16 vector value
-
-def nxv1f32 : ValueType<32, 162>; // n x 1 x f32 vector value
-def nxv2f32 : ValueType<64, 163>; // n x 2 x f32 vector value
-def nxv4f32 : ValueType<128, 164>; // n x 4 x f32 vector value
-def nxv8f32 : ValueType<256, 165>; // n x 8 x f32 vector value
-def nxv16f32 : ValueType<512, 166>; // n x 16 x f32 vector value
-
-def nxv1f64 : ValueType<64, 167>; // n x 1 x f64 vector value
-def nxv2f64 : ValueType<128, 168>; // n x 2 x f64 vector value
-def nxv4f64 : ValueType<256, 169>; // n x 4 x f64 vector value
-def nxv8f64 : ValueType<512, 170>; // n x 8 x f64 vector value
-
-def x86mmx : ValueType<64, 171>; // X86 MMX value
-def FlagVT : ValueType<0, 172>; // Pre-RA sched glue
-def isVoid : ValueType<0, 173>; // Produces no value
-def untyped : ValueType<8, 174>; // Produces an untyped value
-def funcref : ValueType<0, 175>; // WebAssembly's funcref type
-def externref : ValueType<0, 176>; // WebAssembly's externref type
-def x86amx : ValueType<8192, 177>; // X86 AMX value
-def i64x8 : ValueType<512, 178>; // 8 Consecutive GPRs (AArch64)
-
+def i2 : ValueType<2, 3>; // 2-bit integer value
+def i4 : ValueType<4, 4>; // 4-bit integer value
+def i8 : ValueType<8, 5>; // 8-bit integer value
+def i16 : ValueType<16, 6>; // 16-bit integer value
+def i32 : ValueType<32, 7>; // 32-bit integer value
+def i64 : ValueType<64, 8>; // 64-bit integer value
+def i128 : ValueType<128, 9>; // 128-bit integer value
+
+def bf16 : ValueType<16, 10>; // 16-bit brain floating point value
+def f16 : ValueType<16, 11>; // 16-bit floating point value
+def f32 : ValueType<32, 12>; // 32-bit floating point value
+def f64 : ValueType<64, 13>; // 64-bit floating point value
+def f80 : ValueType<80, 14>; // 80-bit floating point value
+def f128 : ValueType<128, 15>; // 128-bit floating point value
+def ppcf128 : ValueType<128, 16>; // PPC 128-bit floating point value
+
+def v1i1 : ValueType<1, 17>; // 1 x i1 vector value
+def v2i1 : ValueType<2, 18>; // 2 x i1 vector value
+def v4i1 : ValueType<4, 19>; // 4 x i1 vector value
+def v8i1 : ValueType<8, 20>; // 8 x i1 vector value
+def v16i1 : ValueType<16, 21>; // 16 x i1 vector value
+def v32i1 : ValueType<32, 22>; // 32 x i1 vector value
+def v64i1 : ValueType<64, 23>; // 64 x i1 vector value
+def v128i1 : ValueType<128, 24>; // 128 x i1 vector value
+def v256i1 : ValueType<256, 25>; // 256 x i1 vector value
+def v512i1 : ValueType<512, 26>; // 512 x i1 vector value
+def v1024i1 : ValueType<1024, 27>; // 1024 x i1 vector value
+
+def v128i2 : ValueType<256, 28>; // 128 x i2 vector value
+
+def v64i4 : ValueType<256, 29>; // 64 x i4 vector value
+
+def v1i8 : ValueType<8, 30>; // 1 x i8 vector value
+def v2i8 : ValueType<16, 31>; // 2 x i8 vector value
+def v4i8 : ValueType<32, 32>; // 4 x i8 vector value
+def v8i8 : ValueType<64, 33>; // 8 x i8 vector value
+def v16i8 : ValueType<128, 34>; // 16 x i8 vector value
+def v32i8 : ValueType<256, 35>; // 32 x i8 vector value
+def v64i8 : ValueType<512, 36>; // 64 x i8 vector value
+def v128i8 : ValueType<1024, 37>; // 128 x i8 vector value
+def v256i8 : ValueType<2048, 38>; // 256 x i8 vector value
+def v512i8 : ValueType<4096, 39>; // 512 x i8 vector value
+def v1024i8 : ValueType<8192, 40>; // 1024 x i8 vector value
+
+def v1i16 : ValueType<16, 41>; // 1 x i16 vector value
+def v2i16 : ValueType<32, 42>; // 2 x i16 vector value
+def v3i16 : ValueType<48, 43>; // 3 x i16 vector value
+def v4i16 : ValueType<64, 44>; // 4 x i16 vector value
+def v8i16 : ValueType<128, 45>; // 8 x i16 vector value
+def v16i16 : ValueType<256, 46>; // 16 x i16 vector value
+def v32i16 : ValueType<512, 47>; // 32 x i16 vector value
+def v64i16 : ValueType<1024, 48>; // 64 x i16 vector value
+def v128i16 : ValueType<2048, 49>; // 128 x i16 vector value
+def v256i16 : ValueType<4096, 50>; // 256 x i16 vector value
+def v512i16 : ValueType<8192, 51>; // 512 x i16 vector value
+
+def v1i32 : ValueType<32, 52>; // 1 x i32 vector value
+def v2i32 : ValueType<64, 53>; // 2 x i32 vector value
+def v3i32 : ValueType<96, 54>; // 3 x i32 vector value
+def v4i32 : ValueType<128, 55>; // 4 x i32 vector value
+def v5i32 : ValueType<160, 56>; // 5 x i32 vector value
+def v6i32 : ValueType<192, 57>; // 6 x f32 vector value
+def v7i32 : ValueType<224, 58>; // 7 x f32 vector value
+def v8i32 : ValueType<256, 59>; // 8 x i32 vector value
+def v16i32 : ValueType<512, 60>; // 16 x i32 vector value
+def v32i32 : ValueType<1024, 61>; // 32 x i32 vector value
+def v64i32 : ValueType<2048, 62>; // 64 x i32 vector value
+def v128i32 : ValueType<4096, 63>; // 128 x i32 vector value
+def v256i32 : ValueType<8192, 64>; // 256 x i32 vector value
+def v512i32 : ValueType<16384, 65>; // 512 x i32 vector value
+def v1024i32 : ValueType<32768, 66>; // 1024 x i32 vector value
+def v2048i32 : ValueType<65536, 67>; // 2048 x i32 vector value
+
+def v1i64 : ValueType<64, 68>; // 1 x i64 vector value
+def v2i64 : ValueType<128, 69>; // 2 x i64 vector value
+def v3i64 : ValueType<192, 70>; // 3 x i64 vector value
+def v4i64 : ValueType<256, 71>; // 4 x i64 vector value
+def v8i64 : ValueType<512, 72>; // 8 x i64 vector value
+def v16i64 : ValueType<1024, 73>; // 16 x i64 vector value
+def v32i64 : ValueType<2048, 74>; // 32 x i64 vector value
+def v64i64 : ValueType<4096, 75>; // 64 x i64 vector value
+def v128i64 : ValueType<8192, 76>; // 128 x i64 vector value
+def v256i64 : ValueType<16384, 77>; // 256 x i64 vector value
+
+def v1i128 : ValueType<128, 78>; // 1 x i128 vector value
+
+def v1f16 : ValueType<16, 79>; // 1 x f16 vector value
+def v2f16 : ValueType<32, 80>; // 2 x f16 vector value
+def v3f16 : ValueType<48, 81>; // 3 x f16 vector value
+def v4f16 : ValueType<64, 82>; // 4 x f16 vector value
+def v8f16 : ValueType<128, 83>; // 8 x f16 vector value
+def v16f16 : ValueType<256, 84>; // 16 x f16 vector value
+def v32f16 : ValueType<512, 85>; // 32 x f16 vector value
+def v64f16 : ValueType<1024, 86>; // 64 x f16 vector value
+def v128f16 : ValueType<2048, 87>; // 128 x f16 vector value
+def v256f16 : ValueType<4096, 88>; // 256 x f16 vector value
+def v512f16 : ValueType<8192, 89>; // 512 x f16 vector value
+
+def v2bf16 : ValueType<32, 90>; // 2 x bf16 vector value
+def v3bf16 : ValueType<48, 91>; // 3 x bf16 vector value
+def v4bf16 : ValueType<64, 92>; // 4 x bf16 vector value
+def v8bf16 : ValueType<128, 93>; // 8 x bf16 vector value
+def v16bf16 : ValueType<256, 94>; // 16 x bf16 vector value
+def v32bf16 : ValueType<512, 95>; // 32 x bf16 vector value
+def v64bf16 : ValueType<1024, 96>; // 64 x bf16 vector value
+def v128bf16 : ValueType<2048, 97>; // 128 x bf16 vector value
+
+def v1f32 : ValueType<32, 98>; // 1 x f32 vector value
+def v2f32 : ValueType<64, 99>; // 2 x f32 vector value
+def v3f32 : ValueType<96, 100>; // 3 x f32 vector value
+def v4f32 : ValueType<128, 101>; // 4 x f32 vector value
+def v5f32 : ValueType<160, 102>; // 5 x f32 vector value
+def v6f32 : ValueType<192, 103>; // 6 x f32 vector value
+def v7f32 : ValueType<224, 104>; // 7 x f32 vector value
+def v8f32 : ValueType<256, 105>; // 8 x f32 vector value
+def v16f32 : ValueType<512, 106>; // 16 x f32 vector value
+def v32f32 : ValueType<1024, 107>; // 32 x f32 vector value
+def v64f32 : ValueType<2048, 108>; // 64 x f32 vector value
+def v128f32 : ValueType<4096, 109>; // 128 x f32 vector value
+def v256f32 : ValueType<8192, 110>; // 256 x f32 vector value
+def v512f32 : ValueType<16384, 111>; // 512 x f32 vector value
+def v1024f32 : ValueType<32768, 112>; // 1024 x f32 vector value
+def v2048f32 : ValueType<65536, 113>; // 2048 x f32 vector value
+
+def v1f64 : ValueType<64, 114>; // 1 x f64 vector value
+def v2f64 : ValueType<128, 115>; // 2 x f64 vector value
+def v3f64 : ValueType<192, 116>; // 3 x f64 vector value
+def v4f64 : ValueType<256, 117>; // 4 x f64 vector value
+def v8f64 : ValueType<512, 118>; // 8 x f64 vector value
+def v16f64 : ValueType<1024, 119>; // 16 x f64 vector value
+def v32f64 : ValueType<2048, 120>; // 32 x f64 vector value
+def v64f64 : ValueType<4096, 121>; // 64 x f64 vector value
+def v128f64 : ValueType<8192, 122>; // 128 x f64 vector value
+def v256f64 : ValueType<16384, 123>; // 256 x f64 vector value
+
+def nxv1i1 : ValueType<1, 124>; // n x 1 x i1 vector value
+def nxv2i1 : ValueType<2, 125>; // n x 2 x i1 vector value
+def nxv4i1 : ValueType<4, 126>; // n x 4 x i1 vector value
+def nxv8i1 : ValueType<8, 127>; // n x 8 x i1 vector value
+def nxv16i1 : ValueType<16, 128>; // n x 16 x i1 vector value
+def nxv32i1 : ValueType<32, 129>; // n x 32 x i1 vector value
+def nxv64i1 : ValueType<64, 130>; // n x 64 x i1 vector value
+
+def nxv1i8 : ValueType<8, 131>; // n x 1 x i8 vector value
+def nxv2i8 : ValueType<16, 132>; // n x 2 x i8 vector value
+def nxv4i8 : ValueType<32, 133>; // n x 4 x i8 vector value
+def nxv8i8 : ValueType<64, 134>; // n x 8 x i8 vector value
+def nxv16i8 : ValueType<128, 135>; // n x 16 x i8 vector value
+def nxv32i8 : ValueType<256, 136>; // n x 32 x i8 vector value
+def nxv64i8 : ValueType<512, 137>; // n x 64 x i8 vector value
+
+def nxv1i16 : ValueType<16, 138>; // n x 1 x i16 vector value
+def nxv2i16 : ValueType<32, 139>; // n x 2 x i16 vector value
+def nxv4i16 : ValueType<64, 140>; // n x 4 x i16 vector value
+def nxv8i16 : ValueType<128, 141>; // n x 8 x i16 vector value
+def nxv16i16 : ValueType<256, 142>; // n x 16 x i16 vector value
+def nxv32i16 : ValueType<512, 143>; // n x 32 x i16 vector value
+
+def nxv1i32 : ValueType<32, 144>; // n x 1 x i32 vector value
+def nxv2i32 : ValueType<64, 145>; // n x 2 x i32 vector value
+def nxv4i32 : ValueType<128, 146>; // n x 4 x i32 vector value
+def nxv8i32 : ValueType<256, 147>; // n x 8 x i32 vector value
+def nxv16i32 : ValueType<512, 148>; // n x 16 x i32 vector value
+def nxv32i32 : ValueType<1024, 149>; // n x 32 x i32 vector value
+
+def nxv1i64 : ValueType<64, 150>; // n x 1 x i64 vector value
+def nxv2i64 : ValueType<128, 151>; // n x 2 x i64 vector value
+def nxv4i64 : ValueType<256, 152>; // n x 4 x i64 vector value
+def nxv8i64 : ValueType<512, 153>; // n x 8 x i64 vector value
+def nxv16i64 : ValueType<1024, 154>; // n x 16 x i64 vector value
+def nxv32i64 : ValueType<2048, 155>; // n x 32 x i64 vector value
+
+def nxv1f16 : ValueType<16, 156>; // n x 1 x f16 vector value
+def nxv2f16 : ValueType<32, 157>; // n x 2 x f16 vector value
+def nxv4f16 : ValueType<64, 158>; // n x 4 x f16 vector value
+def nxv8f16 : ValueType<128, 159>; // n x 8 x f16 vector value
+def nxv16f16 : ValueType<256, 160>; // n x 16 x f16 vector value
+def nxv32f16 : ValueType<512, 161>; // n x 32 x f16 vector value
+
+def nxv1bf16 : ValueType<16, 162>; // n x 1 x bf16 vector value
+def nxv2bf16 : ValueType<32, 163>; // n x 2 x bf16 vector value
+def nxv4bf16 : ValueType<64, 164>; // n x 4 x bf16 vector value
+def nxv8bf16 : ValueType<128, 165>; // n x 8 x bf16 vector value
+def nxv16bf16 : ValueType<256, 166>; // n x 16 x bf16 vector value
+def nxv32bf16 : ValueType<512, 167>; // n x 32 x bf16 vector value
+
+def nxv1f32 : ValueType<32, 168>; // n x 1 x f32 vector value
+def nxv2f32 : ValueType<64, 169>; // n x 2 x f32 vector value
+def nxv4f32 : ValueType<128, 170>; // n x 4 x f32 vector value
+def nxv8f32 : ValueType<256, 171>; // n x 8 x f32 vector value
+def nxv16f32 : ValueType<512, 172>; // n x 16 x f32 vector value
+
+def nxv1f64 : ValueType<64, 173>; // n x 1 x f64 vector value
+def nxv2f64 : ValueType<128, 174>; // n x 2 x f64 vector value
+def nxv4f64 : ValueType<256, 175>; // n x 4 x f64 vector value
+def nxv8f64 : ValueType<512, 176>; // n x 8 x f64 vector value
+
+def x86mmx : ValueType<64, 177>; // X86 MMX value
+def FlagVT : ValueType<0, 178>; // Pre-RA sched glue
+def isVoid : ValueType<0, 179>; // Produces no value
+def untyped : ValueType<8, 180>; // Produces an untyped value
+def funcref : ValueType<0, 181>; // WebAssembly's funcref type
+def externref : ValueType<0, 182>; // WebAssembly's externref type
+def x86amx : ValueType<8192, 183>; // X86 AMX value
+def i64x8 : ValueType<512, 184>; // 8 Consecutive GPRs (AArch64)
def token : ValueType<0, 248>; // TokenTy
def MetadataVT : ValueType<0, 249>; // Metadata
diff --git a/llvm/include/llvm/DWARFLinker/DWARFLinker.h b/llvm/include/llvm/DWARFLinker/DWARFLinker.h
index 4f1c666df35f..0b2e033bd97a 100644
--- a/llvm/include/llvm/DWARFLinker/DWARFLinker.h
+++ b/llvm/include/llvm/DWARFLinker/DWARFLinker.h
@@ -11,18 +11,26 @@
#include "llvm/CodeGen/AccelTable.h"
#include "llvm/CodeGen/NonRelocatableStringpool.h"
-#include "llvm/DWARFLinker/DWARFLinkerDeclContext.h"
-#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
-#include "llvm/DebugInfo/DWARF/DWARFContext.h"
-#include "llvm/MC/MCDwarf.h"
+#include "llvm/DWARFLinker/DWARFLinkerCompileUnit.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h"
+#include "llvm/DebugInfo/DWARF/DWARFDie.h"
#include <map>
namespace llvm {
+class DWARFContext;
+class DWARFExpression;
+class DWARFUnit;
+class DataExtractor;
+class DeclContextTree;
+struct MCDwarfLineTableParams;
+template <typename T> class SmallVectorImpl;
enum class DwarfLinkerClient { Dsymutil, LLD, General };
/// The kind of accelerator tables we should emit.
-enum class AccelTableKind {
+enum class DwarfLinkerAccelTableKind : uint8_t {
+ None,
Apple, ///< .apple_names, .apple_namespaces, .apple_types, .apple_objc.
Dwarf, ///< DWARF v5 .debug_names.
Default, ///< Dwarf for DWARF5 or later, Apple otherwise.
@@ -56,28 +64,21 @@ class AddressesMap {
public:
virtual ~AddressesMap();
- /// Returns true if represented addresses are from linked file.
- /// Returns false if represented addresses are from not-linked
- /// object file.
- virtual bool areRelocationsResolved() const = 0;
-
/// Checks that there are valid relocations against a .debug_info
/// section.
virtual bool hasValidRelocs() = 0;
- /// Checks that the specified DIE has a DW_AT_Location attribute
- /// that references into a live code section.
- ///
+ /// Checks that the specified variable \p DIE references live code section.
+ /// Allowed kind of input die: DW_TAG_variable, DW_TAG_constant.
/// \returns true and sets Info.InDebugMap if it is the case.
- virtual bool hasLiveMemoryLocation(const DWARFDie &DIE,
- CompileUnit::DIEInfo &Info) = 0;
+ virtual bool isLiveVariable(const DWARFDie &DIE,
+ CompileUnit::DIEInfo &Info) = 0;
- /// Checks that the specified DIE has a DW_AT_Low_pc attribute
- /// that references into a live code section.
- ///
+ /// Checks that the specified subprogram \p DIE references live code section.
+ /// Allowed kind of input die: DW_TAG_subprogram, DW_TAG_label.
/// \returns true and sets Info.InDebugMap if it is the case.
- virtual bool hasLiveAddressRange(const DWARFDie &DIE,
- CompileUnit::DIEInfo &Info) = 0;
+ virtual bool isLiveSubprogram(const DWARFDie &DIE,
+ CompileUnit::DIEInfo &Info) = 0;
/// Apply the valid relocations to the buffer \p Data, taking into
/// account that Data is at \p BaseOffset in the .debug_info section.
@@ -272,6 +273,9 @@ public:
/// Print statistics to standard output.
void setStatistics(bool Statistics) { Options.Statistics = Statistics; }
+ /// Verify the input DWARF.
+ void setVerifyInputDWARF(bool Verify) { Options.VerifyInputDWARF = Verify; }
+
/// Do not emit linked dwarf info.
void setNoOutput(bool NoOut) { Options.NoOutput = NoOut; }
@@ -290,7 +294,7 @@ public:
void setNumThreads(unsigned NumThreads) { Options.Threads = NumThreads; }
/// Set kind of accelerator tables to be generated.
- void setAccelTableKind(AccelTableKind Kind) {
+ void setAccelTableKind(DwarfLinkerAccelTableKind Kind) {
Options.TheAccelTableKind = Kind;
}
@@ -361,6 +365,8 @@ private:
/// Given a DIE, update its incompleteness based on whether the DIEs it
/// references are incomplete.
UpdateRefIncompleteness,
+ /// Given a DIE, mark it as ODR Canonical if applicable.
+ MarkODRCanonicalDie,
};
/// This class represents an item in the work list. The type defines what kind
@@ -389,6 +395,9 @@ private:
AncestorIdx(AncestorIdx) {}
};
+ /// Verify the given DWARF file.
+ bool verify(const DWARFFile &File);
+
/// returns true if we need to translate strings.
bool needToTranslateStrings() { return StringsTranslator != nullptr; }
@@ -457,6 +466,10 @@ private:
const DWARFFile &File,
SmallVectorImpl<WorklistItem> &Worklist);
+ /// Mark context corresponding to the specified \p Die as having canonical
+ /// die, if applicable.
+ void markODRCanonicalDie(const DWARFDie &Die, CompileUnit &CU);
+
/// \defgroup FindRootDIEs Find DIEs corresponding to Address map entries.
///
/// @{
@@ -778,6 +791,9 @@ private:
/// Print statistics.
bool Statistics = false;
+ /// Verify the input DWARF.
+ bool VerifyInputDWARF = false;
+
/// Skip emitting output
bool NoOutput = false;
@@ -795,7 +811,8 @@ private:
unsigned Threads = 1;
/// The accelerator table kind
- AccelTableKind TheAccelTableKind = AccelTableKind::Default;
+ DwarfLinkerAccelTableKind TheAccelTableKind =
+ DwarfLinkerAccelTableKind::Default;
/// Prepend path for the clang modules.
std::string PrependPath;
diff --git a/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h b/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h
index afba19ac7d42..788275782235 100644
--- a/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h
+++ b/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h
@@ -9,10 +9,10 @@
#ifndef LLVM_DWARFLINKER_DWARFLINKERCOMPILEUNIT_H
#define LLVM_DWARFLINKER_DWARFLINKERCOMPILEUNIT_H
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IntervalMap.h"
#include "llvm/CodeGen/DIE.h"
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
-#include "llvm/Support/DataExtractor.h"
namespace llvm {
@@ -74,6 +74,12 @@ public:
/// Does DIE transitively refer an incomplete decl?
bool Incomplete : 1;
+
+ /// Is DIE in the clang module scope?
+ bool InModuleScope : 1;
+
+ /// Is ODR marking done?
+ bool ODRMarkingDone : 1;
};
CompileUnit(DWARFUnit &OrigUnit, unsigned ID, bool CanUseODR,
diff --git a/llvm/include/llvm/DWARFLinker/DWARFLinkerDeclContext.h b/llvm/include/llvm/DWARFLinker/DWARFLinkerDeclContext.h
index d2274488e85f..fb02b0fc1b4d 100644
--- a/llvm/include/llvm/DWARFLinker/DWARFLinkerDeclContext.h
+++ b/llvm/include/llvm/DWARFLinker/DWARFLinkerDeclContext.h
@@ -14,14 +14,15 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/NonRelocatableStringpool.h"
-#include "llvm/DWARFLinker/DWARFLinkerCompileUnit.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
#include "llvm/DebugInfo/DWARF/DWARFDie.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
+#include <atomic>
namespace llvm {
+class CompileUnit;
struct DeclMapInfo;
/// Small helper that resolves and caches file paths. This helps reduce the
@@ -91,6 +92,10 @@ public:
bool setLastSeenDIE(CompileUnit &U, const DWARFDie &Die);
+ void setHasCanonicalDIE() { HasCanonicalDIE = true; }
+
+ bool hasCanonicalDIE() const { return HasCanonicalDIE; }
+
uint32_t getCanonicalDIEOffset() const { return CanonicalDIEOffset; }
void setCanonicalDIEOffset(uint32_t Offset) { CanonicalDIEOffset = Offset; }
@@ -112,7 +117,8 @@ private:
const DeclContext &Parent;
DWARFDie LastSeenDIE;
uint32_t LastSeenCompileUnitID = 0;
- uint32_t CanonicalDIEOffset = 0;
+ std::atomic<uint32_t> CanonicalDIEOffset = {0};
+ bool HasCanonicalDIE = false;
};
/// This class gives a tree-like API to the DenseMap that stores the
diff --git a/llvm/include/llvm/DWARFLinker/DWARFStreamer.h b/llvm/include/llvm/DWARFLinker/DWARFStreamer.h
index fc8c59904cfb..003fe548252a 100644
--- a/llvm/include/llvm/DWARFLinker/DWARFStreamer.h
+++ b/llvm/include/llvm/DWARFLinker/DWARFStreamer.h
@@ -10,7 +10,6 @@
#define LLVM_DWARFLINKER_DWARFSTREAMER_H
#include "llvm/BinaryFormat/Swift.h"
-#include "llvm/CodeGen/AccelTable.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/DWARFLinker/DWARFLinker.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -18,9 +17,11 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
+template <typename DataT> class AccelTable;
enum class OutputFileType {
Object,
diff --git a/llvm/include/llvm/DWP/DWPStringPool.h b/llvm/include/llvm/DWP/DWPStringPool.h
index 9f69851f0055..1354b46f156b 100644
--- a/llvm/include/llvm/DWP/DWPStringPool.h
+++ b/llvm/include/llvm/DWP/DWPStringPool.h
@@ -43,7 +43,7 @@ public:
auto Pair = Pool.insert(std::make_pair(Str, Offset));
if (Pair.second) {
- Out.SwitchSection(Sec);
+ Out.switchSection(Sec);
Out.emitBytes(StringRef(Str, Length));
Offset += Length;
}
diff --git a/llvm/include/llvm/DebugInfo/CodeView/AppendingTypeTableBuilder.h b/llvm/include/llvm/DebugInfo/CodeView/AppendingTypeTableBuilder.h
index 5a91682e9bd4..d474173973b5 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/AppendingTypeTableBuilder.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/AppendingTypeTableBuilder.h
@@ -11,7 +11,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/CVRecord.h"
#include "llvm/DebugInfo/CodeView/SimpleTypeSerializer.h"
#include "llvm/DebugInfo/CodeView/TypeCollection.h"
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
diff --git a/llvm/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h b/llvm/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h
index 82ef8c173bee..ef44b622d955 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h
@@ -10,7 +10,7 @@
#define LLVM_DEBUGINFO_CODEVIEW_CVSYMBOLVISITOR_H
#include "llvm/DebugInfo/CodeView/CVRecord.h"
-#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/Error.h"
namespace llvm {
namespace codeview {
@@ -18,12 +18,20 @@ class SymbolVisitorCallbacks;
class CVSymbolVisitor {
public:
+ struct FilterOptions {
+ llvm::Optional<uint32_t> SymbolOffset;
+ llvm::Optional<uint32_t> ParentRecursiveDepth;
+ llvm::Optional<uint32_t> ChildRecursiveDepth;
+ };
+
CVSymbolVisitor(SymbolVisitorCallbacks &Callbacks);
Error visitSymbolRecord(CVSymbol &Record);
Error visitSymbolRecord(CVSymbol &Record, uint32_t Offset);
Error visitSymbolStream(const CVSymbolArray &Symbols);
Error visitSymbolStream(const CVSymbolArray &Symbols, uint32_t InitialOffset);
+ Error visitSymbolStreamFiltered(const CVSymbolArray &Symbols,
+ const FilterOptions &Filter);
private:
SymbolVisitorCallbacks &Callbacks;
diff --git a/llvm/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h b/llvm/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h
index 7538cb2c2548..7780e233cab3 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h
@@ -9,14 +9,17 @@
#ifndef LLVM_DEBUGINFO_CODEVIEW_CVTYPEVISITOR_H
#define LLVM_DEBUGINFO_CODEVIEW_CVTYPEVISITOR_H
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/DebugInfo/CodeView/CVRecord.h"
-#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/Support/Error.h"
namespace llvm {
namespace codeview {
+class TypeIndex;
class TypeCollection;
class TypeVisitorCallbacks;
+struct CVMemberRecord;
enum VisitorDataSource {
VDS_BytesPresent, // The record bytes are passed into the visitation
diff --git a/llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h b/llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h
index d851dea0a27f..4fbe7e835a8a 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h
@@ -9,14 +9,11 @@
#ifndef LLVM_DEBUGINFO_CODEVIEW_CODEVIEWRECORDIO_H
#define LLVM_DEBUGINFO_CODEVIEW_CODEVIEWRECORDIO_H
-#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/DebugInfo/CodeView/CodeViewError.h"
-#include "llvm/DebugInfo/CodeView/GUID.h"
-#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/BinaryStreamWriter.h"
#include "llvm/Support/Error.h"
@@ -26,7 +23,12 @@
namespace llvm {
+template <typename T> class ArrayRef;
+class APSInt;
+
namespace codeview {
+class TypeIndex;
+struct GUID;
class CodeViewRecordStreamer {
public:
@@ -246,7 +248,7 @@ private:
Optional<uint32_t> MaxLength;
Optional<uint32_t> bytesRemaining(uint32_t CurrentOffset) const {
- if (!MaxLength.hasValue())
+ if (!MaxLength)
return None;
assert(CurrentOffset >= BeginOffset);
diff --git a/llvm/include/llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h b/llvm/include/llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h
index 0e2f5d90e243..0f83ae370a1e 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h
@@ -12,22 +12,16 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/DebugInfo/CodeView/CodeView.h"
-#include "llvm/DebugInfo/CodeView/RecordSerialization.h"
-#include "llvm/DebugInfo/CodeView/TypeIndex.h"
-#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/CodeView/CVRecord.h"
#include "llvm/DebugInfo/CodeView/TypeRecordMapping.h"
-#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
#include "llvm/Support/BinaryByteStream.h"
#include "llvm/Support/BinaryStreamWriter.h"
-#include "llvm/Support/Error.h"
-#include <cassert>
#include <cstdint>
-#include <memory>
#include <vector>
namespace llvm {
namespace codeview {
+class TypeIndex;
enum class ContinuationRecordKind { FieldList, MethodOverloadList };
class ContinuationRecordBuilder {
diff --git a/llvm/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h b/llvm/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h
index 01f83676afdf..615fd216e655 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h
@@ -16,7 +16,6 @@
#include "llvm/DebugInfo/CodeView/DebugSubsection.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/BinaryStreamArray.h"
-#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/BinaryStreamRef.h"
#include "llvm/Support/Error.h"
#include <cstdint>
@@ -24,6 +23,9 @@
namespace llvm {
+class BinaryStreamReader;
+class BinaryStreamWriter;
+
namespace codeview {
class DebugStringTableSubsection;
diff --git a/llvm/include/llvm/DebugInfo/CodeView/DebugCrossExSubsection.h b/llvm/include/llvm/DebugInfo/CodeView/DebugCrossExSubsection.h
index 64a78a7cef21..e21873a3af8f 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/DebugCrossExSubsection.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/DebugCrossExSubsection.h
@@ -12,13 +12,14 @@
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/DebugSubsection.h"
#include "llvm/Support/BinaryStreamArray.h"
-#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/BinaryStreamRef.h"
#include "llvm/Support/Error.h"
#include <cstdint>
#include <map>
namespace llvm {
+class BinaryStreamReader;
+class BinaryStreamWriter;
namespace codeview {
class DebugCrossModuleExportsSubsectionRef final : public DebugSubsectionRef {
diff --git a/llvm/include/llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h b/llvm/include/llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h
index e7683cb2a9c4..198ce4a8b4e4 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h
@@ -14,7 +14,6 @@
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/DebugSubsection.h"
#include "llvm/Support/BinaryStreamArray.h"
-#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/BinaryStreamRef.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
@@ -22,6 +21,8 @@
#include <vector>
namespace llvm {
+class BinaryStreamReader;
+class BinaryStreamWriter;
namespace codeview {
diff --git a/llvm/include/llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h b/llvm/include/llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h
index d5cd640231f9..f2c5bf9d7c95 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h
@@ -11,11 +11,15 @@
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/DebugSubsection.h"
-#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/BinaryStreamArray.h"
+#include "llvm/Support/BinaryStreamRef.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
namespace llvm {
+class BinaryStreamReader;
+class BinaryStreamWriter;
+
namespace codeview {
class DebugFrameDataSubsectionRef final : public DebugSubsectionRef {
public:
diff --git a/llvm/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h b/llvm/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h
index 9fd88a64873a..f9d1507af5f3 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h
@@ -12,7 +12,6 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/DebugSubsection.h"
-#include "llvm/DebugInfo/CodeView/Line.h"
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/Support/BinaryStreamArray.h"
#include "llvm/Support/BinaryStreamReader.h"
diff --git a/llvm/include/llvm/DebugInfo/CodeView/DebugLinesSubsection.h b/llvm/include/llvm/DebugInfo/CodeView/DebugLinesSubsection.h
index 1f8e56c5311f..68eb9e1af3bd 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/DebugLinesSubsection.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/DebugLinesSubsection.h
@@ -14,7 +14,6 @@
#include "llvm/DebugInfo/CodeView/DebugSubsection.h"
#include "llvm/DebugInfo/CodeView/Line.h"
#include "llvm/Support/BinaryStreamArray.h"
-#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/BinaryStreamRef.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
@@ -22,6 +21,8 @@
#include <vector>
namespace llvm {
+class BinaryStreamReader;
+class BinaryStreamWriter;
namespace codeview {
class DebugChecksumsSubsection;
diff --git a/llvm/include/llvm/DebugInfo/CodeView/DebugSubsection.h b/llvm/include/llvm/DebugInfo/CodeView/DebugSubsection.h
index 2e1cd15a3956..39413bb73b58 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/DebugSubsection.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/DebugSubsection.h
@@ -10,10 +10,12 @@
#define LLVM_DEBUGINFO_CODEVIEW_DEBUGSUBSECTION_H
#include "llvm/DebugInfo/CodeView/CodeView.h"
-#include "llvm/Support/BinaryStreamWriter.h"
-#include "llvm/Support/Casting.h"
+#include "llvm/Support/Error.h"
+
+#include <cstdint>
namespace llvm {
+class BinaryStreamWriter;
namespace codeview {
class DebugSubsectionRef {
diff --git a/llvm/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h b/llvm/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h
index 151930d6d43d..fdca2ad063a1 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h
@@ -9,7 +9,6 @@
#ifndef LLVM_DEBUGINFO_CODEVIEW_DEBUGSUBSECTIONVISITOR_H
#define LLVM_DEBUGINFO_CODEVIEW_DEBUGSUBSECTIONVISITOR_H
-#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/StringsAndChecksums.h"
#include "llvm/Support/Error.h"
diff --git a/llvm/include/llvm/DebugInfo/CodeView/EnumTables.h b/llvm/include/llvm/DebugInfo/CodeView/EnumTables.h
index 270cd4b8330c..ec874b7ca114 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/EnumTables.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/EnumTables.h
@@ -12,10 +12,10 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
-#include "llvm/Support/ScopedPrinter.h"
#include <cstdint>
namespace llvm {
+template <typename T> struct EnumEntry;
namespace codeview {
ArrayRef<EnumEntry<SymbolKind>> getSymbolTypeNames();
diff --git a/llvm/include/llvm/DebugInfo/CodeView/Formatters.h b/llvm/include/llvm/DebugInfo/CodeView/Formatters.h
index 7d04a6a89bef..10683c289224 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/Formatters.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/Formatters.h
@@ -22,6 +22,8 @@ namespace llvm {
namespace codeview {
+struct GUID;
+
namespace detail {
class GuidAdapter final : public FormatAdapter<ArrayRef<uint8_t>> {
diff --git a/llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h b/llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h
index 465c26ec2ce6..d592bde18bae 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h
@@ -10,9 +10,9 @@
#define LLVM_DEBUGINFO_CODEVIEW_GLOBALTYPETABLEBUILDER_H
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/CVRecord.h"
#include "llvm/DebugInfo/CodeView/SimpleTypeSerializer.h"
#include "llvm/DebugInfo/CodeView/TypeCollection.h"
#include "llvm/DebugInfo/CodeView/TypeHashing.h"
diff --git a/llvm/include/llvm/DebugInfo/CodeView/Line.h b/llvm/include/llvm/DebugInfo/CodeView/Line.h
index eb2aa154df1b..6918645b94d2 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/Line.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/Line.h
@@ -9,7 +9,6 @@
#ifndef LLVM_DEBUGINFO_CODEVIEW_LINE_H
#define LLVM_DEBUGINFO_CODEVIEW_LINE_H
-#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/Support/Endian.h"
#include <cinttypes>
diff --git a/llvm/include/llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h b/llvm/include/llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h
index 0f9d5e476075..1965aab9b5cc 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h
@@ -10,18 +10,18 @@
#define LLVM_DEBUGINFO_CODEVIEW_MERGINGTYPETABLEBUILDER_H
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/CVRecord.h"
#include "llvm/DebugInfo/CodeView/SimpleTypeSerializer.h"
#include "llvm/DebugInfo/CodeView/TypeCollection.h"
-#include "llvm/DebugInfo/CodeView/TypeHashing.h"
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/Support/Allocator.h"
#include <cstdint>
namespace llvm {
namespace codeview {
+struct LocallyHashedType;
class ContinuationRecordBuilder;
diff --git a/llvm/include/llvm/DebugInfo/CodeView/RecordName.h b/llvm/include/llvm/DebugInfo/CodeView/RecordName.h
index 8e06be9e41e8..9078ed38d2f1 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/RecordName.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/RecordName.h
@@ -9,11 +9,14 @@
#ifndef LLVM_DEBUGINFO_CODEVIEW_RECORDNAME_H
#define LLVM_DEBUGINFO_CODEVIEW_RECORDNAME_H
-#include "llvm/DebugInfo/CodeView/TypeCollection.h"
-#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/CodeView/CVRecord.h"
+#include <string>
namespace llvm {
namespace codeview {
+class TypeCollection;
+class TypeIndex;
std::string computeTypeName(TypeCollection &Types, TypeIndex Index);
StringRef getSymbolName(CVSymbol Sym);
} // namespace codeview
diff --git a/llvm/include/llvm/DebugInfo/CodeView/RecordSerialization.h b/llvm/include/llvm/DebugInfo/CodeView/RecordSerialization.h
index 36c0f2fbd8fa..10248dbf646b 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/RecordSerialization.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/RecordSerialization.h
@@ -9,7 +9,6 @@
#ifndef LLVM_DEBUGINFO_CODEVIEW_RECORDSERIALIZATION_H
#define LLVM_DEBUGINFO_CODEVIEW_RECORDSERIALIZATION_H
-#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
@@ -18,9 +17,9 @@
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include <cinttypes>
-#include <tuple>
namespace llvm {
+class APSInt;
namespace codeview {
using llvm::support::little32_t;
using llvm::support::ulittle16_t;
diff --git a/llvm/include/llvm/DebugInfo/CodeView/StringsAndChecksums.h b/llvm/include/llvm/DebugInfo/CodeView/StringsAndChecksums.h
index 22a283e785e1..50e745e5c2ab 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/StringsAndChecksums.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/StringsAndChecksums.h
@@ -10,13 +10,15 @@
#define LLVM_DEBUGINFO_CODEVIEW_STRINGSANDCHECKSUMS_H
#include "llvm/DebugInfo/CodeView/CodeView.h"
-#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
-#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h"
#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
#include <memory>
namespace llvm {
namespace codeview {
+class DebugChecksumsSubsection;
+class DebugChecksumsSubsectionRef;
+class DebugStringTableSubsection;
+class DebugStringTableSubsectionRef;
class StringsAndChecksumsRef {
public:
diff --git a/llvm/include/llvm/DebugInfo/CodeView/SymbolDumper.h b/llvm/include/llvm/DebugInfo/CodeView/SymbolDumper.h
index aaeffb2446ad..c674700fac59 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/SymbolDumper.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/SymbolDumper.h
@@ -9,11 +9,13 @@
#ifndef LLVM_DEBUGINFO_CODEVIEW_SYMBOLDUMPER_H
#define LLVM_DEBUGINFO_CODEVIEW_SYMBOLDUMPER_H
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/StringSet.h"
#include "llvm/DebugInfo/CodeView/CVRecord.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/SymbolDumpDelegate.h"
-#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include "llvm/Support/Error.h"
+
+#include <memory>
+#include <utility>
namespace llvm {
class ScopedPrinter;
diff --git a/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h b/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h
index c37f6b4d5fa7..9513e19a330a 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h
@@ -196,7 +196,7 @@ struct BinaryAnnotationIterator
const DecodedAnnotation &operator*() {
ParseCurrentAnnotation();
- return Current.getValue();
+ return *Current;
}
private:
@@ -249,7 +249,7 @@ private:
}
bool ParseCurrentAnnotation() {
- if (Current.hasValue())
+ if (Current)
return true;
Next = Data;
diff --git a/llvm/include/llvm/DebugInfo/CodeView/SymbolSerializer.h b/llvm/include/llvm/DebugInfo/CodeView/SymbolSerializer.h
index fb806c692cfd..53986f9a6db6 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/SymbolSerializer.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/SymbolSerializer.h
@@ -10,15 +10,17 @@
#define LLVM_DEBUGINFO_CODEVIEW_SYMBOLSERIALIZER_H
#include "llvm/ADT/Optional.h"
+#include "llvm/DebugInfo/CodeView/CVRecord.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/RecordSerialization.h"
-#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/DebugInfo/CodeView/SymbolRecordMapping.h"
#include "llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/BinaryByteStream.h"
#include "llvm/Support/BinaryStreamWriter.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
+#include <array>
#include <cstdint>
namespace llvm {
diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeCollection.h b/llvm/include/llvm/DebugInfo/CodeView/TypeCollection.h
index bde5a8b3ab2f..f643bc4d7451 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/TypeCollection.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/TypeCollection.h
@@ -34,7 +34,7 @@ public:
template <typename TFunc> void ForEachRecord(TFunc Func) {
Optional<TypeIndex> Next = getFirst();
- while (Next.hasValue()) {
+ while (Next) {
TypeIndex N = *Next;
Func(N, getType(N));
Next = getNext(N);
diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeDumpVisitor.h b/llvm/include/llvm/DebugInfo/CodeView/TypeDumpVisitor.h
index 41a219ae5a7b..1fad50343e3a 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/TypeDumpVisitor.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/TypeDumpVisitor.h
@@ -9,16 +9,18 @@
#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEDUMPVISITOR_H
#define LLVM_DEBUGINFO_CODEVIEW_TYPEDUMPVISITOR_H
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/StringSet.h"
-#include "llvm/DebugInfo/CodeView/TypeIndex.h"
-#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/CodeView/CVRecord.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
namespace llvm {
class ScopedPrinter;
namespace codeview {
+class TypeIndex;
+struct CVMemberRecord;
+struct MemberAttributes;
class TypeCollection;
diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h b/llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h
index 9f34d026b1ba..f49bc9b8e790 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h
@@ -9,10 +9,11 @@
#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H
#define LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H
-#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/StringRef.h"
-#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/CVRecord.h"
#include "llvm/DebugInfo/CodeView/TypeCollection.h"
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
@@ -21,6 +22,7 @@
#include <type_traits>
namespace llvm {
+class raw_ostream;
namespace codeview {
/// A locally hashed type represents a straightforward hash code of a serialized
diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h b/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h
index 226a436c0930..653eafa04e0a 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h
@@ -13,7 +13,6 @@
#include "llvm/Support/Endian.h"
#include <cassert>
#include <cinttypes>
-#include <functional>
namespace llvm {
@@ -36,6 +35,7 @@ enum class SimpleTypeKind : uint32_t {
WideCharacter = 0x0071, // wide char
Character16 = 0x007a, // char16_t
Character32 = 0x007b, // char32_t
+ Character8 = 0x007c, // char8_t
SByte = 0x0068, // 8 bit signed int
Byte = 0x0069, // 8 bit unsigned int
diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h b/llvm/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h
index f4f5835d8b57..7ef8521604fb 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h
@@ -9,13 +9,13 @@
#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEINDEXDISCOVERY_H
#define LLVM_DEBUGINFO_CODEVIEW_TYPEINDEXDISCOVERY_H
-#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/DebugInfo/CodeView/CVRecord.h"
-#include "llvm/DebugInfo/CodeView/TypeIndex.h"
-#include "llvm/Support/Error.h"
namespace llvm {
+template <typename T> class SmallVectorImpl;
namespace codeview {
+class TypeIndex;
enum class TiRefKind { TypeRef, IndexRef };
struct TiReference {
TiRefKind Kind;
diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeRecordMapping.h b/llvm/include/llvm/DebugInfo/CodeView/TypeRecordMapping.h
index c6044d5138a8..ed4fc7a75624 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/TypeRecordMapping.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/TypeRecordMapping.h
@@ -10,7 +10,8 @@
#define LLVM_DEBUGINFO_CODEVIEW_TYPERECORDMAPPING_H
#include "llvm/ADT/Optional.h"
-#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
+#include "llvm/DebugInfo/CodeView/CVRecord.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/CodeViewRecordIO.h"
#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
#include "llvm/Support/Error.h"
@@ -20,6 +21,8 @@ class BinaryStreamReader;
class BinaryStreamWriter;
namespace codeview {
+class TypeIndex;
+struct CVMemberRecord;
class TypeRecordMapping : public TypeVisitorCallbacks {
public:
explicit TypeRecordMapping(BinaryStreamReader &Reader) : IO(Reader) {}
diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h b/llvm/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h
index 04d7c7b0420a..04a1e44dd809 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h
@@ -10,11 +10,12 @@
#define LLVM_DEBUGINFO_CODEVIEW_TYPESTREAMMERGER_H
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/DebugInfo/CodeView/CVRecord.h"
#include "llvm/Support/Error.h"
namespace llvm {
+template <typename T> class Optional;
+template <typename T> class SmallVectorImpl;
namespace codeview {
class TypeIndex;
diff --git a/llvm/include/llvm/DebugInfo/DIContext.h b/llvm/include/llvm/DebugInfo/DIContext.h
index d029556c9d89..9b278b696073 100644
--- a/llvm/include/llvm/DebugInfo/DIContext.h
+++ b/llvm/include/llvm/DebugInfo/DIContext.h
@@ -90,6 +90,8 @@ class DIInliningInfo {
public:
DIInliningInfo() = default;
+ /// Returns the frame at `Index`. Frames are stored in bottom-up
+ /// (leaf-to-root) order with increasing index.
const DILineInfo &getFrame(unsigned Index) const {
assert(Index < Frames.size());
return Frames[Index];
@@ -112,6 +114,8 @@ struct DIGlobal {
std::string Name;
uint64_t Start = 0;
uint64_t Size = 0;
+ std::string DeclFile;
+ uint64_t DeclLine = 0;
DIGlobal() : Name(DILineInfo::BadString) {}
};
@@ -151,6 +155,10 @@ struct DILineInfoSpecifier {
DILineInfoSpecifier(FileLineInfoKind FLIKind = FileLineInfoKind::RawValue,
FunctionNameKind FNKind = FunctionNameKind::None)
: FLIKind(FLIKind), FNKind(FNKind) {}
+
+ inline bool operator==(const DILineInfoSpecifier &RHS) const {
+ return FLIKind == RHS.FLIKind && FNKind == RHS.FNKind;
+ }
};
/// This is just a helper to programmatically construct DIDumpType.
@@ -233,6 +241,8 @@ public:
virtual DILineInfo getLineInfoForAddress(
object::SectionedAddress Address,
DILineInfoSpecifier Specifier = DILineInfoSpecifier()) = 0;
+ virtual DILineInfo
+ getLineInfoForDataAddress(object::SectionedAddress Address) = 0;
virtual DILineInfoTable getLineInfoForAddressRange(
object::SectionedAddress Address, uint64_t Size,
DILineInfoSpecifier Specifier = DILineInfoSpecifier()) = 0;
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h
index cdf3f60f88be..3887656ceef6 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h
@@ -13,13 +13,13 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/BinaryFormat/Dwarf.h"
-#include "llvm/Support/DataExtractor.h"
#include <cassert>
#include <cstddef>
#include <cstdint>
namespace llvm {
+class DataExtractor;
class DWARFFormValue;
class DWARFUnit;
class raw_ostream;
@@ -34,7 +34,7 @@ public:
AttributeSpec(dwarf::Attribute A, dwarf::Form F, Optional<uint8_t> ByteSize)
: Attr(A), Form(F) {
assert(!isImplicitConst());
- this->ByteSize.HasByteSize = ByteSize.hasValue();
+ this->ByteSize.HasByteSize = ByteSize.has_value();
if (this->ByteSize.HasByteSize)
this->ByteSize.ByteSize = *ByteSize;
}
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFAddressRange.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFAddressRange.h
index 537a03ec11fc..f4d6c451cbe1 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFAddressRange.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFAddressRange.h
@@ -10,6 +10,9 @@
#define LLVM_DEBUGINFO_DWARF_DWARFADDRESSRANGE_H
#include "llvm/DebugInfo/DIContext.h"
+#include "llvm/Object/ObjectFile.h"
+#include <algorithm>
+#include <cassert>
#include <cstdint>
#include <tuple>
#include <vector>
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h
index ec5a3cd85266..d449b7bed796 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h
@@ -10,10 +10,15 @@
#define LLVM_DEBUGINFO_DWARF_DWARFCOMPILEUNIT_H
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
-#include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h"
namespace llvm {
+class DWARFContext;
+class DWARFDebugAbbrev;
+class raw_ostream;
+struct DIDumpOptions;
+struct DWARFSection;
+
class DWARFCompileUnit : public DWARFUnit {
public:
DWARFCompileUnit(DWARFContext &Context, const DWARFSection &Section,
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
index e82faf6eeb24..bf591ed554c6 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
@@ -9,43 +9,37 @@
#ifndef LLVM_DEBUGINFO_DWARF_DWARFCONTEXT_H
#define LLVM_DEBUGINFO_DWARF_DWARFCONTEXT_H
-#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/iterator_range.h"
#include "llvm/DebugInfo/DIContext.h"
-#include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h"
-#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
-#include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
-#include "llvm/DebugInfo/DWARF/DWARFDebugAranges.h"
-#include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
-#include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
-#include "llvm/DebugInfo/DWARF/DWARFDebugMacro.h"
#include "llvm/DebugInfo/DWARF/DWARFDie.h"
-#include "llvm/DebugInfo/DWARF/DWARFGdbIndex.h"
#include "llvm/DebugInfo/DWARF/DWARFObject.h"
-#include "llvm/DebugInfo/DWARF/DWARFSection.h"
-#include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h"
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
-#include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h"
#include "llvm/Object/Binary.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/DataExtractor.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/Host.h"
#include <cstdint>
-#include <deque>
-#include <map>
#include <memory>
namespace llvm {
class MCRegisterInfo;
class MemoryBuffer;
-class raw_ostream;
+class AppleAcceleratorTable;
+class DWARFCompileUnit;
+class DWARFDebugAbbrev;
+class DWARFDebugAranges;
+class DWARFDebugFrame;
+class DWARFDebugLoc;
+class DWARFDebugMacro;
+class DWARFDebugNames;
+class DWARFGdbIndex;
+class DWARFTypeUnit;
+class DWARFUnitIndex;
/// DWARFContext
/// This data structure is the top level entity that deals with dwarf debug
@@ -124,7 +118,7 @@ public:
WithColor::defaultErrorHandler,
std::function<void(Error)> WarningHandler =
WithColor::defaultWarningHandler);
- ~DWARFContext();
+ ~DWARFContext() override;
DWARFContext(DWARFContext &) = delete;
DWARFContext &operator=(DWARFContext &) = delete;
@@ -339,6 +333,10 @@ public:
getLineTableForUnit(DWARFUnit *U,
function_ref<void(Error)> RecoverableErrorHandler);
+ // Clear the line table object corresponding to a compile unit for memory
+ // management purpose. When it's referred to again, it'll be re-populated.
+ void clearLineTableForUnit(DWARFUnit *U);
+
DataExtractor getStringExtractor() const {
return DataExtractor(DObj->getStrSection(), false, 0);
}
@@ -366,6 +364,8 @@ public:
DILineInfo getLineInfoForAddress(
object::SectionedAddress Address,
DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override;
+ DILineInfo
+ getLineInfoForDataAddress(object::SectionedAddress Address) override;
DILineInfoTable getLineInfoForAddressRange(
object::SectionedAddress Address, uint64_t Size,
DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override;
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h
index e1407ddd89eb..67d9ce1476dd 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h
@@ -11,17 +11,14 @@
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/DebugInfo/DIContext.h"
-#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
-#include "llvm/Support/Errc.h"
#include "llvm/Support/Error.h"
#include <cstdint>
-#include <map>
#include <vector>
namespace llvm {
-class Error;
class raw_ostream;
+class DWARFDataExtractor;
/// A class representing an address table as specified in DWARF v5.
/// The table consists of a header followed by an array of address values from
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h
index 65334b4a4976..760d8826771c 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h
@@ -10,7 +10,7 @@
#define LLVM_DEBUGINFO_DWARF_DWARFDEBUGARANGESET_H
#include "llvm/ADT/iterator_range.h"
-#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/Support/Error.h"
#include <cstdint>
#include <vector>
@@ -18,6 +18,7 @@
namespace llvm {
class raw_ostream;
+class DWARFDataExtractor;
class DWARFDebugArangeSet {
public:
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h
index 216dd1e4defc..068674cfae5c 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h
@@ -10,11 +10,13 @@
#define LLVM_DEBUGINFO_DWARF_DWARFDEBUGARANGES_H
#include "llvm/ADT/DenseSet.h"
-#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
#include <cstdint>
#include <vector>
namespace llvm {
+class DWARFDataExtractor;
+class Error;
class DWARFContext;
@@ -26,7 +28,8 @@ public:
private:
void clear();
void extract(DWARFDataExtractor DebugArangesData,
- function_ref<void(Error)> RecoverableErrorHandler);
+ function_ref<void(Error)> RecoverableErrorHandler,
+ function_ref<void(Error)> WarningHandler);
/// Call appendRange multiple times and then call construct.
void appendRange(uint64_t CUOffset, uint64_t LowPC, uint64_t HighPC);
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h
index 8167aaaeffb5..48df091412bf 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h
@@ -13,7 +13,6 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/iterator.h"
-#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
#include "llvm/DebugInfo/DWARF/DWARFExpression.h"
#include "llvm/Support/Error.h"
#include <map>
@@ -23,6 +22,9 @@
namespace llvm {
class raw_ostream;
+class DWARFDataExtractor;
+class MCRegisterInfo;
+struct DIDumpOptions;
namespace dwarf {
@@ -130,7 +132,7 @@ public:
uint32_t getRegister() const { return RegNum; }
int32_t getOffset() const { return Offset; }
uint32_t getAddressSpace() const {
- assert(Kind == RegPlusOffset && AddrSpace.hasValue());
+ assert(Kind == RegPlusOffset && AddrSpace);
return *AddrSpace;
}
int32_t getConstant() const { return Offset; }
@@ -259,7 +261,7 @@ public:
UnwindRow() : CFAValue(UnwindLocation::createUnspecified()) {}
/// Returns true if the address is valid in this object.
- bool hasAddress() const { return Address.hasValue(); }
+ bool hasAddress() const { return Address.has_value(); }
/// Get the address for this row.
///
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h
index 6bdd23900182..9befcc0c4182 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h
@@ -11,12 +11,12 @@
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h"
-#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
#include <cstdint>
namespace llvm {
class DWARFUnit;
+class DWARFDataExtractor;
/// DWARFDebugInfoEntry - A DIE with only the minimum required data.
class DWARFDebugInfoEntry {
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
index ee15b6d4112d..86f90135f8d4 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
@@ -11,12 +11,10 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/DebugInfo/DIContext.h"
-#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
-#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
-#include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
-#include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h"
+#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
#include "llvm/Support/MD5.h"
#include "llvm/Support/Path.h"
#include <cstdint>
@@ -26,7 +24,6 @@
namespace llvm {
-class DWARFUnit;
class raw_ostream;
class DWARFDebugLine {
@@ -307,6 +304,7 @@ public:
getOrParseLineTable(DWARFDataExtractor &DebugLineData, uint64_t Offset,
const DWARFContext &Ctx, const DWARFUnit *U,
function_ref<void(Error)> RecoverableErrorHandler);
+ void clearLineTable(uint64_t Offset);
/// Helper to allow for parsing of an entire .debug_line section in sequence.
class SectionParser {
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h
index 1794f6649827..90e009e514d4 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h
@@ -11,10 +11,7 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/DebugInfo/DIContext.h"
#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
-#include "llvm/DebugInfo/DWARF/DWARFLocationExpression.h"
-#include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
#include "llvm/Support/Errc.h"
#include <cstdint>
@@ -22,6 +19,12 @@ namespace llvm {
class DWARFUnit;
class MCRegisterInfo;
class raw_ostream;
+class DWARFObject;
+struct DIDumpOptions;
+struct DWARFLocationExpression;
+namespace object {
+struct SectionedAddress;
+}
/// A single location within a location list. Entries are stored in the DWARF5
/// form even if they originally come from a DWARF<=4 location list.
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h
index f1768a1ddab5..d98cf9a6045a 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h
@@ -12,7 +12,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
-#include "llvm/Support/Errc.h"
#include "llvm/Support/Error.h"
#include <cstdint>
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h
index cb347615868b..6c82bbfe74f7 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h
@@ -10,16 +10,17 @@
#define LLVM_DEBUGINFO_DWARF_DWARFDEBUGPUBTABLE_H
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/Dwarf.h"
-#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
-#include "llvm/DebugInfo/DWARF/DWARFObject.h"
#include <cstdint>
#include <vector>
namespace llvm {
class raw_ostream;
+class DWARFDataExtractor;
+class Error;
/// Represents structure for holding and parsing .debug_pub* tables.
class DWARFDebugPubTable {
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h
index 0d9f37c5610b..f4aeac1bb9db 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h
@@ -10,14 +10,16 @@
#define LLVM_DEBUGINFO_DWARF_DWARFDEBUGRANGELIST_H
#include "llvm/DebugInfo/DWARF/DWARFAddressRange.h"
-#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
-#include <cassert>
#include <cstdint>
#include <vector>
namespace llvm {
class raw_ostream;
+class DWARFDataExtractor;
+namespace object {
+struct SectionedAddress;
+}
class DWARFDebugRangeList {
public:
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h
index 2baa6493f709..13f018f53fa1 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h
@@ -10,11 +10,9 @@
#define LLVM_DEBUGINFO_DWARF_DWARFDEBUGRNGLISTS_H
#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/BinaryFormat/Dwarf.h"
-#include "llvm/DebugInfo/DIContext.h"
-#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
-#include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h"
+#include "llvm/DebugInfo/DWARF/DWARFAddressRange.h"
#include "llvm/DebugInfo/DWARF/DWARFListTable.h"
#include <cstdint>
@@ -23,6 +21,11 @@ namespace llvm {
class Error;
class raw_ostream;
class DWARFUnit;
+class DWARFDataExtractor;
+struct DIDumpOptions;
+namespace object {
+struct SectionedAddress;
+}
/// A class representing a single range list entry.
struct RangeListEntry : public DWARFListEntryBase {
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h
index f731d440a35b..149c5ef4e493 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h
@@ -18,7 +18,7 @@
#include "llvm/DebugInfo/DWARF/DWARFAddressRange.h"
#include "llvm/DebugInfo/DWARF/DWARFAttribute.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h"
-#include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
+#include "llvm/DebugInfo/DWARF/DWARFLocationExpression.h"
#include <cassert>
#include <cstdint>
#include <iterator>
@@ -280,6 +280,13 @@ public:
/// \returns an iterator range for the attributes of the current DIE.
iterator_range<attribute_iterator> attributes() const;
+ /// Gets the type size (in bytes) for this DIE.
+ ///
+ /// \param PointerSize the pointer size of the containing CU.
+ /// \returns if this is a type DIE, or this DIE contains a DW_AT_type, returns
+ /// the size of the type.
+ Optional<uint64_t> getTypeSize(uint64_t PointerSize);
+
class iterator;
iterator begin() const;
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h
index b694eeacfd9d..c4d81047a4dc 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h
@@ -9,16 +9,15 @@
#ifndef LLVM_DEBUGINFO_DWARF_DWARFEXPRESSION_H
#define LLVM_DEBUGINFO_DWARF_DWARFEXPRESSION_H
-#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator.h"
-#include "llvm/ADT/iterator_range.h"
#include "llvm/BinaryFormat/Dwarf.h"
-#include "llvm/DebugInfo/DIContext.h"
#include "llvm/Support/DataExtractor.h"
namespace llvm {
class DWARFUnit;
+struct DIDumpOptions;
class MCRegisterInfo;
class raw_ostream;
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
index 130cdb8800a9..c2c1df5b590b 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
@@ -14,12 +14,14 @@
#include "llvm/ADT/Optional.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/DebugInfo/DIContext.h"
-#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
+#include "llvm/Support/DataExtractor.h"
#include <cstdint>
namespace llvm {
class DWARFContext;
+class DWARFObject;
+class DWARFDataExtractor;
class DWARFUnit;
class raw_ostream;
@@ -234,7 +236,7 @@ inline Optional<uint64_t> toUnsigned(const Optional<DWARFFormValue> &V) {
/// value or the form value's encoding wasn't an unsigned constant form.
inline uint64_t toUnsigned(const Optional<DWARFFormValue> &V,
uint64_t Default) {
- return toUnsigned(V).getValueOr(Default);
+ return toUnsigned(V).value_or(Default);
}
/// Take an optional DWARFFormValue and try to extract an reference.
@@ -256,7 +258,7 @@ inline Optional<uint64_t> toReference(const Optional<DWARFFormValue> &V) {
/// value or the form value's encoding wasn't a reference form.
inline uint64_t toReference(const Optional<DWARFFormValue> &V,
uint64_t Default) {
- return toReference(V).getValueOr(Default);
+ return toReference(V).value_or(Default);
}
/// Take an optional DWARFFormValue and try to extract an signed constant.
@@ -277,7 +279,7 @@ inline Optional<int64_t> toSigned(const Optional<DWARFFormValue> &V) {
/// \returns the extracted signed integer value or Default if the V doesn't
/// have a value or the form value's encoding wasn't a signed integer form.
inline int64_t toSigned(const Optional<DWARFFormValue> &V, int64_t Default) {
- return toSigned(V).getValueOr(Default);
+ return toSigned(V).value_or(Default);
}
/// Take an optional DWARFFormValue and try to extract an address.
@@ -305,7 +307,7 @@ toSectionedAddress(const Optional<DWARFFormValue> &V) {
/// \returns the extracted address value or Default if the V doesn't have a
/// value or the form value's encoding wasn't an address form.
inline uint64_t toAddress(const Optional<DWARFFormValue> &V, uint64_t Default) {
- return toAddress(V).getValueOr(Default);
+ return toAddress(V).value_or(Default);
}
/// Take an optional DWARFFormValue and try to extract an section offset.
@@ -327,7 +329,7 @@ inline Optional<uint64_t> toSectionOffset(const Optional<DWARFFormValue> &V) {
/// have a value or the form value's encoding wasn't a section offset form.
inline uint64_t toSectionOffset(const Optional<DWARFFormValue> &V,
uint64_t Default) {
- return toSectionOffset(V).getValueOr(Default);
+ return toSectionOffset(V).value_or(Default);
}
/// Take an optional DWARFFormValue and try to extract block data.
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h
index 38cd42ddb883..6b23c4e57d95 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h
@@ -11,13 +11,13 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/DataExtractor.h"
#include <cstdint>
#include <utility>
namespace llvm {
class raw_ostream;
+class DataExtractor;
class DWARFGdbIndex {
uint32_t Version;
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h
index 515623cedc94..84c8d71b04fc 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h
@@ -14,7 +14,6 @@
#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/Error.h"
-#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdint>
#include <map>
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFRelocMap.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFRelocMap.h
index 3add711943d0..fef59c5e95f8 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFRelocMap.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFRelocMap.h
@@ -10,6 +10,7 @@
#define LLVM_DEBUGINFO_DWARF_DWARFRELOCMAP_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/Object/ObjectFile.h"
#include "llvm/Object/RelocationResolver.h"
#include <cstdint>
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFTypePrinter.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFTypePrinter.h
new file mode 100644
index 000000000000..e05271740e61
--- /dev/null
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFTypePrinter.h
@@ -0,0 +1,67 @@
+//===- DWARFTypePrinter.h ---------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_DWARF_DWARFTYPEPRINTER_H
+#define LLVM_DEBUGINFO_DWARF_DWARFTYPEPRINTER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/DebugInfo/DWARF/DWARFDie.h"
+
+#include <string>
+
+namespace llvm {
+
+class raw_ostream;
+
+// FIXME: We should have pretty printers per language. Currently we print
+// everything as if it was C++ and fall back to the TAG type name.
+struct DWARFTypePrinter {
+ raw_ostream &OS;
+ bool Word = true;
+ bool EndedWithTemplate = false;
+
+ DWARFTypePrinter(raw_ostream &OS) : OS(OS) {}
+
+ /// Dump the name encoded in the type tag.
+ void appendTypeTagName(dwarf::Tag T);
+
+ void appendArrayType(const DWARFDie &D);
+
+ DWARFDie skipQualifiers(DWARFDie D);
+
+ bool needsParens(DWARFDie D);
+
+ void appendPointerLikeTypeBefore(DWARFDie D, DWARFDie Inner, StringRef Ptr);
+
+ DWARFDie appendUnqualifiedNameBefore(DWARFDie D,
+ std::string *OriginalFullName = nullptr);
+
+ void appendUnqualifiedNameAfter(DWARFDie D, DWARFDie Inner,
+ bool SkipFirstParamIfArtificial = false);
+ void appendQualifiedName(DWARFDie D);
+ DWARFDie appendQualifiedNameBefore(DWARFDie D);
+ bool appendTemplateParameters(DWARFDie D, bool *FirstParameter = nullptr);
+ void decomposeConstVolatile(DWARFDie &N, DWARFDie &T, DWARFDie &C,
+ DWARFDie &V);
+ void appendConstVolatileQualifierAfter(DWARFDie N);
+ void appendConstVolatileQualifierBefore(DWARFDie N);
+
+ /// Recursively append the DIE type name when applicable.
+ void appendUnqualifiedName(DWARFDie D,
+ std::string *OriginalFullName = nullptr);
+
+ void appendSubroutineNameAfter(DWARFDie D, DWARFDie Inner,
+ bool SkipFirstParamIfArtificial, bool Const,
+ bool Volatile);
+ void appendScopes(DWARFDie D);
+};
+
+} // namespace llvm
+
+#endif // LLVM_DEBUGINFO_DWARF_DWARFTYPEPRINTER_H
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h
index c95bdcbd8a43..85ec6fd86ade 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h
@@ -11,12 +11,11 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
-#include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h"
-#include "llvm/Support/DataExtractor.h"
#include <cstdint>
namespace llvm {
+struct DIDumpOptions;
class DWARFContext;
class DWARFDebugAbbrev;
struct DWARFSection;
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
index b96a4c19758f..9188865b4d77 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
@@ -9,28 +9,26 @@
#ifndef LLVM_DEBUGINFO_DWARF_DWARFUNIT_H
#define LLVM_DEBUGINFO_DWARF_DWARFUNIT_H
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/DebugInfo/DWARF/DWARFAddressRange.h"
+#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h"
-#include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
-#include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h"
-#include "llvm/DebugInfo/DWARF/DWARFDebugRnglists.h"
#include "llvm/DebugInfo/DWARF/DWARFDie.h"
-#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
-#include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
-#include "llvm/DebugInfo/DWARF/DWARFSection.h"
+#include "llvm/DebugInfo/DWARF/DWARFLocationExpression.h"
#include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h"
#include "llvm/Support/DataExtractor.h"
-#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <map>
#include <memory>
+#include <set>
#include <utility>
#include <vector>
@@ -40,6 +38,12 @@ class DWARFAbbreviationDeclarationSet;
class DWARFContext;
class DWARFDebugAbbrev;
class DWARFUnit;
+class DWARFDebugRangeList;
+class DWARFLocationTable;
+class DWARFObject;
+class raw_ostream;
+struct DIDumpOptions;
+struct DWARFSection;
/// Base class describing the header of any kind of "unit." Some information
/// is specific to certain unit types. We separate this class out so we can
@@ -238,6 +242,11 @@ class DWARFUnit {
/// std::map::upper_bound for address range lookup.
std::map<uint64_t, std::pair<uint64_t, DWARFDie>> AddrDieMap;
+ /// Map from the location (interpreted DW_AT_location) of a DW_TAG_variable,
+ /// to the end address and the corresponding DIE.
+ std::map<uint64_t, std::pair<uint64_t, DWARFDie>> VariableDieMap;
+ DenseSet<uint64_t> RootsParsedForVariables;
+
using die_iterator_range =
iterator_range<std::vector<DWARFDebugInfoEntry>::iterator>;
@@ -320,6 +329,9 @@ public:
/// Recursively update address to Die map.
void updateAddressDieMap(DWARFDie Die);
+ /// Recursively update address to variable Die map.
+ void updateVariableDieMap(DWARFDie Die);
+
void setRangesSection(const DWARFSection *RS, uint64_t Base) {
RangeSection = RS;
RangeSectionBase = Base;
@@ -434,6 +446,10 @@ public:
/// cleared.
DWARFDie getSubroutineForAddress(uint64_t Address);
+ /// Returns variable DIE for the address provided. The pointer is alive as
+ /// long as parsed compile unit DIEs are not cleared.
+ DWARFDie getVariableForAddress(uint64_t Address);
+
/// getInlinedChainForAddress - fetches inlined chain for a given address.
/// Returns empty chain if there is no subprogram containing address. The
/// chain is valid as long as parsed compile unit DIEs are not cleared.
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h
index edea59e474cf..b5e191ba7def 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h
@@ -11,13 +11,13 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/DataExtractor.h"
#include <cstdint>
#include <memory>
namespace llvm {
class raw_ostream;
+class DataExtractor;
/// The enum of section identifiers to be used in internal interfaces.
///
@@ -64,6 +64,25 @@ enum DWARFSectionKind {
DW_SECT_EXT_MACINFO = 10,
};
+inline const char *toString(DWARFSectionKind Kind) {
+ switch (Kind) {
+ case DW_SECT_EXT_unknown:
+ return "Unknown DW_SECT value 0";
+#define STRINGIZE(X) #X
+#define HANDLE_DW_SECT(ID, NAME) \
+ case DW_SECT_##NAME: \
+ return "DW_SECT_" STRINGIZE(NAME);
+#include "llvm/BinaryFormat/Dwarf.def"
+ case DW_SECT_EXT_TYPES:
+ return "DW_SECT_TYPES";
+ case DW_SECT_EXT_LOC:
+ return "DW_SECT_LOC";
+ case DW_SECT_EXT_MACINFO:
+ return "DW_SECT_MACINFO";
+ }
+ llvm_unreachable("unknown DWARFSectionKind");
+}
+
/// Convert the internal value for a section kind to an on-disk value.
///
/// The conversion depends on the version of the index section.
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
index 505686bfbf59..1f1ebe943238 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
@@ -12,9 +12,9 @@
#include "llvm/ADT/Optional.h"
#include "llvm/DebugInfo/DIContext.h"
#include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h"
+#include "llvm/DebugInfo/DWARF/DWARFAddressRange.h"
#include "llvm/DebugInfo/DWARF/DWARFDie.h"
#include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h"
-#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
#include <cstdint>
#include <map>
#include <set>
@@ -22,13 +22,14 @@
namespace llvm {
class raw_ostream;
struct DWARFAddressRange;
+class DWARFUnit;
+class DWARFUnitVector;
struct DWARFAttribute;
class DWARFContext;
class DWARFDataExtractor;
class DWARFDebugAbbrev;
class DataExtractor;
struct DWARFSection;
-class DWARFUnit;
/// A class that verifies DWARF debug information given a DWARF Context.
class DWARFVerifier {
@@ -151,12 +152,15 @@ private:
/// section.
///
/// \param S The DWARF Section to verify.
- /// \param SectionKind The object-file section kind that S comes from.
///
/// \returns The number of errors that occurred during verification.
unsigned verifyUnitSection(const DWARFSection &S);
unsigned verifyUnits(const DWARFUnitVector &Units);
+ unsigned verifyIndexes(const DWARFObject &DObj);
+ unsigned verifyIndex(StringRef Name, DWARFSectionKind SectionKind,
+ StringRef Index);
+
/// Verifies that a call site entry is nested within a subprogram with a
/// DW_AT_call attribute.
///
@@ -301,6 +305,24 @@ public:
/// \returns true if all sections verify successfully, false otherwise.
bool handleDebugInfo();
+ /// Verify the information in the .debug_cu_index section.
+ ///
+ /// Any errors are reported to the stream that was this object was
+ /// constructed with.
+ ///
+ /// \returns true if the .debug_cu_index verifies successfully, false
+ /// otherwise.
+ bool handleDebugCUIndex();
+
+ /// Verify the information in the .debug_tu_index section.
+ ///
+ /// Any errors are reported to the stream that was this object was
+ /// constructed with.
+ ///
+ /// \returns true if the .debug_tu_index verifies successfully, false
+ /// otherwise.
+ bool handleDebugTUIndex();
+
/// Verify the information in the .debug_line section.
///
/// Any errors are reported to the stream that was this object was
diff --git a/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h b/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h
index 32fc54b14796..b8d7199f2d87 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h
@@ -10,7 +10,7 @@
#define LLVM_DEBUGINFO_GSYM_DWARFTRANSFORMER_H
#include "llvm/ADT/StringRef.h"
-#include "llvm/DebugInfo/GSYM/Range.h"
+#include "llvm/DebugInfo/GSYM/ExtractRanges.h"
#include "llvm/Support/Error.h"
namespace llvm {
diff --git a/llvm/include/llvm/DebugInfo/GSYM/ExtractRanges.h b/llvm/include/llvm/DebugInfo/GSYM/ExtractRanges.h
new file mode 100644
index 000000000000..9a6568719875
--- /dev/null
+++ b/llvm/include/llvm/DebugInfo/GSYM/ExtractRanges.h
@@ -0,0 +1,81 @@
+//===- ExtractRanges.h ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_GSYM_EXTRACTRANGES_H
+#define LLVM_DEBUGINFO_GSYM_EXTRACTRANGES_H
+
+#include "llvm/ADT/AddressRanges.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include <stdint.h>
+#include <vector>
+
+#define HEX8(v) llvm::format_hex(v, 4)
+#define HEX16(v) llvm::format_hex(v, 6)
+#define HEX32(v) llvm::format_hex(v, 10)
+#define HEX64(v) llvm::format_hex(v, 18)
+
+namespace llvm {
+class DataExtractor;
+class raw_ostream;
+
+namespace gsym {
+
+class FileWriter;
+
+/// AddressRange objects are encoded and decoded to be relative to a base
+/// address. This will be the FunctionInfo's start address if the AddressRange
+/// is directly contained in a FunctionInfo, or a base address of the
+/// containing parent AddressRange or AddressRanges. This allows address
+/// ranges to be efficiently encoded using ULEB128 encodings as we encode the
+/// offset and size of each range instead of full addresses. This also makes
+/// encoded addresses easy to relocate as we just need to relocate one base
+/// address.
+/// @{
+AddressRange decodeRange(DataExtractor &Data, uint64_t BaseAddr,
+ uint64_t &Offset);
+void encodeRange(const AddressRange &Range, FileWriter &O, uint64_t BaseAddr);
+/// @}
+
+/// Skip an address range object in the specified data a the specified
+/// offset.
+///
+/// \param Data The binary stream to read the data from.
+///
+/// \param Offset The byte offset within \a Data.
+void skipRange(DataExtractor &Data, uint64_t &Offset);
+
+/// Address ranges are decoded and encoded to be relative to a base address.
+/// See the AddressRange comment for the encode and decode methods for full
+/// details.
+/// @{
+void decodeRanges(AddressRanges &Ranges, DataExtractor &Data, uint64_t BaseAddr,
+ uint64_t &Offset);
+void encodeRanges(const AddressRanges &Ranges, FileWriter &O,
+ uint64_t BaseAddr);
+/// @}
+
+/// Skip an address range object in the specified data a the specified
+/// offset.
+///
+/// \param Data The binary stream to read the data from.
+///
+/// \param Offset The byte offset within \a Data.
+///
+/// \returns The number of address ranges that were skipped.
+uint64_t skipRanges(DataExtractor &Data, uint64_t &Offset);
+
+} // namespace gsym
+
+raw_ostream &operator<<(raw_ostream &OS, const AddressRange &R);
+
+raw_ostream &operator<<(raw_ostream &OS, const AddressRanges &AR);
+
+} // namespace llvm
+
+#endif // LLVM_DEBUGINFO_GSYM_EXTRACTRANGES_H
diff --git a/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h b/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
index 552337f54390..fb48f7f9a93c 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
@@ -10,10 +10,10 @@
#define LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H
#include "llvm/ADT/Optional.h"
+#include "llvm/DebugInfo/GSYM/ExtractRanges.h"
#include "llvm/DebugInfo/GSYM/InlineInfo.h"
#include "llvm/DebugInfo/GSYM/LineTable.h"
#include "llvm/DebugInfo/GSYM/LookupResult.h"
-#include "llvm/DebugInfo/GSYM/Range.h"
#include "llvm/DebugInfo/GSYM/StringTable.h"
#include <cstdint>
#include <tuple>
@@ -102,9 +102,7 @@ struct FunctionInfo {
/// debug info, we might end up with multiple FunctionInfo objects for the
/// same range and we need to be able to tell which one is the better object
/// to use.
- bool hasRichInfo() const {
- return OptLineTable.hasValue() || Inline.hasValue();
- }
+ bool hasRichInfo() const { return OptLineTable || Inline; }
/// Query if a FunctionInfo object is valid.
///
@@ -170,12 +168,9 @@ struct FunctionInfo {
uint64_t FuncAddr,
uint64_t Addr);
- uint64_t startAddress() const { return Range.Start; }
- uint64_t endAddress() const { return Range.End; }
+ uint64_t startAddress() const { return Range.start(); }
+ uint64_t endAddress() const { return Range.end(); }
uint64_t size() const { return Range.size(); }
- void setStartAddress(uint64_t Addr) { Range.Start = Addr; }
- void setEndAddress(uint64_t Addr) { Range.End = Addr; }
- void setSize(uint64_t Size) { Range.End = Range.Start + Size; }
void clear() {
Range = {0, 0};
@@ -203,8 +198,8 @@ inline bool operator<(const FunctionInfo &LHS, const FunctionInfo &RHS) {
return LHS.Range < RHS.Range;
// Then sort by inline
- if (LHS.Inline.hasValue() != RHS.Inline.hasValue())
- return RHS.Inline.hasValue();
+ if (LHS.Inline.has_value() != RHS.Inline.has_value())
+ return RHS.Inline.has_value();
return LHS.OptLineTable < RHS.OptLineTable;
}
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
index 872ccd4a0b6a..29ad1c18e295 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
@@ -14,11 +14,11 @@
#include <mutex>
#include <thread>
+#include "llvm/ADT/AddressRanges.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/DebugInfo/GSYM/FileEntry.h"
#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
-#include "llvm/DebugInfo/GSYM/Range.h"
#include "llvm/MC/StringTableBuilder.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
diff --git a/llvm/include/llvm/DebugInfo/GSYM/InlineInfo.h b/llvm/include/llvm/DebugInfo/GSYM/InlineInfo.h
index 9bcfa5935180..80385116598a 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/InlineInfo.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/InlineInfo.h
@@ -10,14 +10,13 @@
#define LLVM_DEBUGINFO_GSYM_INLINEINFO_H
#include "llvm/ADT/Optional.h"
+#include "llvm/DebugInfo/GSYM/ExtractRanges.h"
#include "llvm/DebugInfo/GSYM/LineEntry.h"
#include "llvm/DebugInfo/GSYM/LookupResult.h"
-#include "llvm/DebugInfo/GSYM/Range.h"
#include "llvm/Support/Error.h"
#include <stdint.h>
#include <vector>
-
namespace llvm {
class raw_ostream;
diff --git a/llvm/include/llvm/DebugInfo/GSYM/LineEntry.h b/llvm/include/llvm/DebugInfo/GSYM/LineEntry.h
index b4e7587fc5ee..e68624b21929 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/LineEntry.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/LineEntry.h
@@ -9,7 +9,7 @@
#ifndef LLVM_DEBUGINFO_GSYM_LINEENTRY_H
#define LLVM_DEBUGINFO_GSYM_LINEENTRY_H
-#include "llvm/DebugInfo/GSYM/Range.h"
+#include "llvm/DebugInfo/GSYM/ExtractRanges.h"
namespace llvm {
namespace gsym {
diff --git a/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h b/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h
index 3dabbce32bb2..44e58f522002 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h
@@ -9,8 +9,8 @@
#ifndef LLVM_DEBUGINFO_GSYM_LOOKUPRESULT_H
#define LLVM_DEBUGINFO_GSYM_LOOKUPRESULT_H
+#include "llvm/ADT/AddressRanges.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/DebugInfo/GSYM/Range.h"
#include <inttypes.h>
#include <vector>
diff --git a/llvm/include/llvm/DebugInfo/GSYM/Range.h b/llvm/include/llvm/DebugInfo/GSYM/Range.h
deleted file mode 100644
index 36ad95602d14..000000000000
--- a/llvm/include/llvm/DebugInfo/GSYM/Range.h
+++ /dev/null
@@ -1,130 +0,0 @@
-//===- Range.h --------------------------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_DEBUGINFO_GSYM_RANGE_H
-#define LLVM_DEBUGINFO_GSYM_RANGE_H
-
-#include "llvm/ADT/Optional.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
-#include <stdint.h>
-#include <vector>
-
-#define HEX8(v) llvm::format_hex(v, 4)
-#define HEX16(v) llvm::format_hex(v, 6)
-#define HEX32(v) llvm::format_hex(v, 10)
-#define HEX64(v) llvm::format_hex(v, 18)
-
-namespace llvm {
-class DataExtractor;
-class raw_ostream;
-
-namespace gsym {
-
-class FileWriter;
-
-/// A class that represents an address range. The range is specified using
-/// a start and an end address.
-struct AddressRange {
- uint64_t Start;
- uint64_t End;
- AddressRange() : Start(0), End(0) {}
- AddressRange(uint64_t S, uint64_t E) : Start(S), End(E) {}
- uint64_t size() const { return End - Start; }
- bool contains(uint64_t Addr) const { return Start <= Addr && Addr < End; }
- bool intersects(const AddressRange &R) const {
- return Start < R.End && R.Start < End;
- }
-
- bool operator==(const AddressRange &R) const {
- return Start == R.Start && End == R.End;
- }
- bool operator!=(const AddressRange &R) const {
- return !(*this == R);
- }
- bool operator<(const AddressRange &R) const {
- return std::make_pair(Start, End) < std::make_pair(R.Start, R.End);
- }
- /// AddressRange objects are encoded and decoded to be relative to a base
- /// address. This will be the FunctionInfo's start address if the AddressRange
- /// is directly contained in a FunctionInfo, or a base address of the
- /// containing parent AddressRange or AddressRanges. This allows address
- /// ranges to be efficiently encoded using ULEB128 encodings as we encode the
- /// offset and size of each range instead of full addresses. This also makes
- /// encoded addresses easy to relocate as we just need to relocate one base
- /// address.
- /// @{
- void decode(DataExtractor &Data, uint64_t BaseAddr, uint64_t &Offset);
- void encode(FileWriter &O, uint64_t BaseAddr) const;
- /// @}
-
- /// Skip an address range object in the specified data a the specified
- /// offset.
- ///
- /// \param Data The binary stream to read the data from.
- ///
- /// \param Offset The byte offset within \a Data.
- static void skip(DataExtractor &Data, uint64_t &Offset);
-};
-
-raw_ostream &operator<<(raw_ostream &OS, const AddressRange &R);
-
-/// The AddressRanges class helps normalize address range collections.
-/// This class keeps a sorted vector of AddressRange objects and can perform
-/// insertions and searches efficiently. The address ranges are always sorted
-/// and never contain any invalid or empty address ranges. This allows us to
-/// emit address ranges into the GSYM file efficiently. Intersecting address
-/// ranges are combined during insertion so that we can emit the most compact
-/// representation for address ranges when writing to disk.
-class AddressRanges {
-protected:
- using Collection = std::vector<AddressRange>;
- Collection Ranges;
-public:
- void clear() { Ranges.clear(); }
- bool empty() const { return Ranges.empty(); }
- bool contains(uint64_t Addr) const;
- bool contains(AddressRange Range) const;
- Optional<AddressRange> getRangeThatContains(uint64_t Addr) const;
- void insert(AddressRange Range);
- size_t size() const { return Ranges.size(); }
- bool operator==(const AddressRanges &RHS) const {
- return Ranges == RHS.Ranges;
- }
- const AddressRange &operator[](size_t i) const {
- assert(i < Ranges.size());
- return Ranges[i];
- }
- Collection::const_iterator begin() const { return Ranges.begin(); }
- Collection::const_iterator end() const { return Ranges.end(); }
-
- /// Address ranges are decoded and encoded to be relative to a base address.
- /// See the AddressRange comment for the encode and decode methods for full
- /// details.
- /// @{
- void decode(DataExtractor &Data, uint64_t BaseAddr, uint64_t &Offset);
- void encode(FileWriter &O, uint64_t BaseAddr) const;
- /// @}
-
- /// Skip an address range object in the specified data a the specified
- /// offset.
- ///
- /// \param Data The binary stream to read the data from.
- ///
- /// \param Offset The byte offset within \a Data.
- ///
- /// \returns The number of address ranges that were skipped.
- static uint64_t skip(DataExtractor &Data, uint64_t &Offset);
-};
-
-raw_ostream &operator<<(raw_ostream &OS, const AddressRanges &AR);
-
-} // namespace gsym
-} // namespace llvm
-
-#endif // LLVM_DEBUGINFO_GSYM_RANGE_H
diff --git a/llvm/include/llvm/DebugInfo/GSYM/StringTable.h b/llvm/include/llvm/DebugInfo/GSYM/StringTable.h
index d920335d373e..d9c9ede91be5 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/StringTable.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/StringTable.h
@@ -10,7 +10,7 @@
#define LLVM_DEBUGINFO_GSYM_STRINGTABLE_H
#include "llvm/ADT/StringRef.h"
-#include "llvm/DebugInfo/GSYM/Range.h"
+#include "llvm/DebugInfo/GSYM/ExtractRanges.h"
#include <stdint.h>
namespace llvm {
diff --git a/llvm/include/llvm/DebugInfo/MSF/MSFBuilder.h b/llvm/include/llvm/DebugInfo/MSF/MSFBuilder.h
index 1a03d42ded92..2ac18a8efaba 100644
--- a/llvm/include/llvm/DebugInfo/MSF/MSFBuilder.h
+++ b/llvm/include/llvm/DebugInfo/MSF/MSFBuilder.h
@@ -11,7 +11,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
-#include "llvm/DebugInfo/MSF/MSFCommon.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Error.h"
#include <cstdint>
@@ -22,6 +22,8 @@ namespace llvm {
class FileBufferByteStream;
namespace msf {
+struct MSFLayout;
+
class MSFBuilder {
public:
/// Create a new `MSFBuilder`.
diff --git a/llvm/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h b/llvm/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h
index bfa67d39bc76..6cd5c8d1d668 100644
--- a/llvm/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h
+++ b/llvm/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h
@@ -9,6 +9,7 @@
#ifndef LLVM_DEBUGINFO_PDB_IPDBENUMCHILDREN_H
#define LLVM_DEBUGINFO_PDB_IPDBENUMCHILDREN_H
+#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
#include <cassert>
#include <cstdint>
#include <memory>
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h b/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h
index 70ef4d058082..1ecae5c32509 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h
@@ -10,16 +10,16 @@
#define LLVM_DEBUGINFO_PDB_NATIVE_DBIMODULEDESCRIPTOR_H
#include "llvm/ADT/StringRef.h"
-#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
-#include "llvm/Support/BinaryStreamArray.h"
#include "llvm/Support/BinaryStreamRef.h"
#include "llvm/Support/Error.h"
#include <cstdint>
namespace llvm {
+template <typename T> struct VarStreamArrayExtractor;
namespace pdb {
-
+struct ModuleInfoHeader;
+struct SectionContrib;
class DbiModuleDescriptor {
friend class DbiStreamBuilder;
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h b/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h
index 8a49f46320b0..287f319e01b0 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h
@@ -9,13 +9,12 @@
#ifndef LLVM_DEBUGINFO_PDB_NATIVE_DBIMODULEDESCRIPTORBUILDER_H
#define LLVM_DEBUGINFO_PDB_NATIVE_DBIMODULEDESCRIPTORBUILDER_H
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
-#include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h"
-#include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h"
+#include "llvm/DebugInfo/CodeView/CVRecord.h"
#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
-#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
+#include "llvm/Support/BinaryStreamRef.h"
#include "llvm/Support/Error.h"
#include <cstdint>
#include <string>
@@ -23,9 +22,8 @@
namespace llvm {
class BinaryStreamWriter;
-
namespace codeview {
-class DebugSubsectionRecordBuilder;
+class DebugSubsection;
}
namespace msf {
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/DbiStream.h b/llvm/include/llvm/DebugInfo/PDB/Native/DbiStream.h
index 0bdb27a0a991..3f60130f5752 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/DbiStream.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/DbiStream.h
@@ -9,14 +9,10 @@
#ifndef LLVM_DEBUGINFO_PDB_NATIVE_DBISTREAM_H
#define LLVM_DEBUGINFO_PDB_NATIVE_DBISTREAM_H
-#include "llvm/DebugInfo/CodeView/DebugSubsection.h"
#include "llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h"
-#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
-#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
#include "llvm/DebugInfo/PDB/Native/DbiModuleList.h"
#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
#include "llvm/DebugInfo/PDB/Native/RawConstants.h"
-#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
#include "llvm/DebugInfo/PDB/PDBTypes.h"
#include "llvm/Support/BinaryStreamArray.h"
#include "llvm/Support/BinaryStreamRef.h"
@@ -24,13 +20,19 @@
#include "llvm/Support/Error.h"
namespace llvm {
+class BinaryStream;
namespace object {
struct FpoData;
struct coff_section;
}
-
+namespace msf {
+class MappedBlockStream;
+}
namespace pdb {
-class DbiStreamBuilder;
+struct DbiStreamHeader;
+struct SecMapEntry;
+struct SectionContrib2;
+struct SectionContrib;
class PDBFile;
class ISectionContribVisitor;
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h b/llvm/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h
index ef441d433040..2f99aa942a05 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h
@@ -10,35 +10,33 @@
#define LLVM_DEBUGINFO_PDB_NATIVE_DBISTREAMBUILDER_H
#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/StringSet.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Support/Allocator.h"
#include "llvm/Support/Error.h"
#include "llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h"
-#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
#include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h"
#include "llvm/DebugInfo/PDB/Native/RawConstants.h"
+#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
#include "llvm/DebugInfo/PDB/PDBTypes.h"
#include "llvm/Support/BinaryByteStream.h"
-#include "llvm/Support/BinaryStreamReader.h"
-#include "llvm/Support/Endian.h"
+#include "llvm/Support/BinaryStreamRef.h"
namespace llvm {
+
+class BinaryStreamWriter;
namespace codeview {
struct FrameData;
}
namespace msf {
class MSFBuilder;
-}
-namespace object {
-struct coff_section;
-struct FpoData;
+struct MSFLayout;
}
namespace pdb {
-class DbiStream;
-struct DbiStreamHeader;
class DbiModuleDescriptorBuilder;
-class PDBFile;
class DbiStreamBuilder {
public:
@@ -134,7 +132,7 @@ private:
std::vector<SecMapEntry> SectionMap;
std::array<Optional<DebugStream>, (int)DbgHeaderType::Max> DbgStreams;
};
-}
+} // namespace pdb
}
#endif
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/EnumTables.h b/llvm/include/llvm/DebugInfo/PDB/Native/EnumTables.h
index 60cd494639c1..dcc67f1e4a8c 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/EnumTables.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/EnumTables.h
@@ -10,9 +10,9 @@
#define LLVM_DEBUGINFO_PDB_NATIVE_ENUMTABLES_H
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/Support/ScopedPrinter.h"
namespace llvm {
+template <typename T> struct EnumEntry;
namespace pdb {
ArrayRef<EnumEntry<uint16_t>> getOMFSegMapDescFlagNames();
}
diff --git a/llvm/tools/llvm-pdbutil/FormatUtil.h b/llvm/include/llvm/DebugInfo/PDB/Native/FormatUtil.h
index b99ccec215b5..ed745eaf9727 100644
--- a/llvm/tools/llvm-pdbutil/FormatUtil.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/FormatUtil.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_LLVMPDBUTIL_FORMAT_UTIL_H
-#define LLVM_TOOLS_LLVMPDBUTIL_FORMAT_UTIL_H
+#ifndef LLVM_DEBUGINFO_PDB_NATIVE_FORMATUTIL_H
+#define LLVM_DEBUGINFO_PDB_NATIVE_FORMATUTIL_H
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
@@ -22,14 +22,6 @@
namespace llvm {
namespace pdb {
-std::string truncateStringBack(StringRef S, uint32_t MaxLen);
-std::string truncateStringMiddle(StringRef S, uint32_t MaxLen);
-std::string truncateStringFront(StringRef S, uint32_t MaxLen);
-std::string truncateQuotedNameFront(StringRef Label, StringRef Name,
- uint32_t MaxLen);
-std::string truncateQuotedNameBack(StringRef Label, StringRef Name,
- uint32_t MaxLen);
-
#define PUSH_MASKED_FLAG(Enum, Mask, TheOpt, Value, Text) \
if (Enum::TheOpt == (Value & Mask)) \
Opts.push_back(Text);
@@ -136,6 +128,6 @@ fmtle(support::detail::packed_endian_specific_integral<T, support::little,
Value) {
return detail::EndianAdapter<T>(std::move(Value));
}
-}
+} // namespace pdb
} // namespace llvm
#endif
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h b/llvm/include/llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h
index 9530a15849d5..28a72c887f25 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h
@@ -10,18 +10,20 @@
#define LLVM_DEBUGINFO_PDB_NATIVE_GSISTREAMBUILDER_H
#include "llvm/ADT/DenseSet.h"
-#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/DebugInfo/CodeView/CVRecord.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
#include "llvm/DebugInfo/PDB/Native/RawConstants.h"
-#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
-#include "llvm/Support/BinaryByteStream.h"
-#include "llvm/Support/BinaryItemStream.h"
#include "llvm/Support/BinaryStreamRef.h"
-#include "llvm/Support/BinaryStreamWriter.h"
-#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
namespace llvm {
+namespace codeview {
+class ConstantSym;
+class DataSym;
+class ProcRefSym;
+} // namespace codeview
+template <typename T> struct BinaryItemTraits;
template <> struct BinaryItemTraits<codeview::CVSymbol> {
static size_t length(const codeview::CVSymbol &Item) {
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/GlobalsStream.h b/llvm/include/llvm/DebugInfo/PDB/Native/GlobalsStream.h
index 2b74babd6ab9..2988bef4a75b 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/GlobalsStream.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/GlobalsStream.h
@@ -10,18 +10,18 @@
#define LLVM_DEBUGINFO_PDB_NATIVE_GLOBALSSTREAM_H
#include "llvm/ADT/iterator.h"
-#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
-#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
-#include "llvm/DebugInfo/PDB/Native/RawConstants.h"
+#include "llvm/DebugInfo/CodeView/CVRecord.h"
#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
-#include "llvm/DebugInfo/PDB/PDBTypes.h"
#include "llvm/Support/BinaryStreamArray.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
namespace llvm {
+class BinaryStreamReader;
+namespace msf {
+class MappedBlockStream;
+}
namespace pdb {
-class DbiStream;
-class PDBFile;
class SymbolStream;
/// Iterator over hash records producing symbol record offsets. Abstracts away
@@ -81,7 +81,7 @@ private:
GSIHashTable GlobalsTable;
std::unique_ptr<msf::MappedBlockStream> Stream;
};
-}
+} // namespace pdb
}
#endif
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h b/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h
index 474bd796b2b3..7924cffd640f 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h
@@ -23,9 +23,6 @@
namespace llvm {
-class BinaryStreamReader;
-class BinaryStreamWriter;
-
namespace pdb {
Error readSparseBitVector(BinaryStreamReader &Stream, SparseBitVector<> &V);
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/InfoStream.h b/llvm/include/llvm/DebugInfo/PDB/Native/InfoStream.h
index 67db92b64913..625bab6a4378 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/InfoStream.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/InfoStream.h
@@ -9,22 +9,18 @@
#ifndef LLVM_DEBUGINFO_PDB_NATIVE_INFOSTREAM_H
#define LLVM_DEBUGINFO_PDB_NATIVE_INFOSTREAM_H
-#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/DebugInfo/CodeView/GUID.h"
-#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
#include "llvm/DebugInfo/PDB/Native/NamedStreamMap.h"
#include "llvm/DebugInfo/PDB/Native/RawConstants.h"
-#include "llvm/DebugInfo/PDB/PDBTypes.h"
+#include "llvm/Support/BinaryStream.h"
+#include "llvm/Support/BinaryStreamRef.h"
-#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
namespace llvm {
namespace pdb {
-class InfoStreamBuilder;
-class PDBFile;
-
+struct InfoStreamHeader;
class InfoStream {
friend class InfoStreamBuilder;
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h b/llvm/include/llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h
index 4952173c5873..2d5088a3bd42 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h
@@ -12,19 +12,17 @@
#include "llvm/ADT/Optional.h"
#include "llvm/Support/Error.h"
-#include "llvm/DebugInfo/PDB/Native/NamedStreamMap.h"
-#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/CodeView/GUID.h"
#include "llvm/DebugInfo/PDB/Native/RawConstants.h"
-#include "llvm/DebugInfo/PDB/PDBTypes.h"
namespace llvm {
class WritableBinaryStreamRef;
namespace msf {
class MSFBuilder;
+struct MSFLayout;
}
namespace pdb {
-class PDBFile;
class NamedStreamMap;
class InfoStreamBuilder {
@@ -70,7 +68,7 @@ private:
NamedStreamMap &NamedStreams;
};
-}
+} // namespace pdb
}
#endif
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/InjectedSourceStream.h b/llvm/include/llvm/DebugInfo/PDB/Native/InjectedSourceStream.h
index b2ba81a88254..259c924d9d7c 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/InjectedSourceStream.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/InjectedSourceStream.h
@@ -9,15 +9,14 @@
#ifndef LLVM_DEBUGINFO_PDB_NATIVE_INJECTEDSOURCESTREAM_H
#define LLVM_DEBUGINFO_PDB_NATIVE_INJECTEDSOURCESTREAM_H
+#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
#include "llvm/DebugInfo/PDB/Native/HashTable.h"
-#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
#include "llvm/Support/Error.h"
namespace llvm {
-namespace msf {
-class MappedBlockStream;
-}
namespace pdb {
+struct SrcHeaderBlockEntry;
+struct SrcHeaderBlockHeader;
class PDBStringTable;
class InjectedSourceStream {
@@ -38,6 +37,6 @@ private:
HashTable<SrcHeaderBlockEntry> InjectedSourceTable;
};
}
-}
+} // namespace llvm
#endif
diff --git a/llvm/tools/llvm-pdbutil/InputFile.h b/llvm/include/llvm/DebugInfo/PDB/Native/InputFile.h
index 633ab34a54d4..c0d722960540 100644
--- a/llvm/tools/llvm-pdbutil/InputFile.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/InputFile.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_LLVMPDBDUMP_INPUTFILE_H
-#define LLVM_TOOLS_LLVMPDBDUMP_INPUTFILE_H
+#ifndef LLVM_DEBUGINFO_PDB_NATIVE_INPUTFILE_H
+#define LLVM_DEBUGINFO_PDB_NATIVE_INPUTFILE_H
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/PointerUnion.h"
@@ -15,6 +15,7 @@
#include "llvm/ADT/iterator.h"
#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
#include "llvm/DebugInfo/CodeView/StringsAndChecksums.h"
+#include "llvm/DebugInfo/PDB/Native/LinePrinter.h"
#include "llvm/DebugInfo/PDB/Native/ModuleDebugStream.h"
#include "llvm/Object/Binary.h"
#include "llvm/Object/ObjectFile.h"
@@ -54,6 +55,9 @@ class InputFile {
getOrCreateTypeCollection(TypeCollectionKind Kind);
public:
+ InputFile(PDBFile *Pdb) { PdbOrObj = Pdb; }
+ InputFile(object::COFFObjectFile *Obj) { PdbOrObj = Obj; }
+ InputFile(MemoryBuffer *Buffer) { PdbOrObj = Buffer; }
~InputFile();
InputFile(InputFile &&Other) = default;
@@ -91,6 +95,7 @@ public:
explicit SymbolGroup(InputFile *File, uint32_t GroupIndex = 0);
Expected<StringRef> getNameFromStringTable(uint32_t Offset) const;
+ Expected<StringRef> getNameFromChecksums(uint32_t Offset) const;
void formatFromFileName(LinePrinter &Printer, StringRef File,
bool Append = false) const;
@@ -148,6 +153,78 @@ private:
SymbolGroup Value;
};
+Expected<ModuleDebugStreamRef>
+getModuleDebugStream(PDBFile &File, StringRef &ModuleName, uint32_t Index);
+Expected<ModuleDebugStreamRef> getModuleDebugStream(PDBFile &File,
+ uint32_t Index);
+
+bool shouldDumpSymbolGroup(uint32_t Idx, const SymbolGroup &Group,
+ const FilterOptions &Filters);
+
+// TODO: Change these callbacks to be function_refs (de-templatify them).
+template <typename CallbackT>
+Error iterateOneModule(InputFile &File, const PrintScope &HeaderScope,
+ const SymbolGroup &SG, uint32_t Modi,
+ CallbackT Callback) {
+ HeaderScope.P.formatLine(
+ "Mod {0:4} | `{1}`: ",
+ fmt_align(Modi, AlignStyle::Right, HeaderScope.LabelWidth), SG.name());
+
+ AutoIndent Indent(HeaderScope);
+ return Callback(Modi, SG);
+}
+
+template <typename CallbackT>
+Error iterateSymbolGroups(InputFile &Input, const PrintScope &HeaderScope,
+ CallbackT Callback) {
+ AutoIndent Indent(HeaderScope);
+
+ FilterOptions Filters = HeaderScope.P.getFilters();
+ if (Filters.DumpModi) {
+ uint32_t Modi = *Filters.DumpModi;
+ SymbolGroup SG(&Input, Modi);
+ return iterateOneModule(Input, withLabelWidth(HeaderScope, NumDigits(Modi)),
+ SG, Modi, Callback);
+ }
+
+ uint32_t I = 0;
+
+ for (const auto &SG : Input.symbol_groups()) {
+ if (shouldDumpSymbolGroup(I, SG, Filters))
+ if (auto Err =
+ iterateOneModule(Input, withLabelWidth(HeaderScope, NumDigits(I)),
+ SG, I, Callback))
+ return Err;
+
+ ++I;
+ }
+ return Error::success();
+}
+
+template <typename SubsectionT>
+Error iterateModuleSubsections(
+ InputFile &File, const PrintScope &HeaderScope,
+ llvm::function_ref<Error(uint32_t, const SymbolGroup &, SubsectionT &)>
+ Callback) {
+
+ return iterateSymbolGroups(
+ File, HeaderScope, [&](uint32_t Modi, const SymbolGroup &SG) -> Error {
+ for (const auto &SS : SG.getDebugSubsections()) {
+ SubsectionT Subsection;
+
+ if (SS.kind() != Subsection.kind())
+ continue;
+
+ BinaryStreamReader Reader(SS.getRecordData());
+ if (auto Err = Subsection.initialize(Reader))
+ continue;
+ if (auto Err = Callback(Modi, SG, Subsection))
+ return Err;
+ }
+ return Error::success();
+ });
+}
+
} // namespace pdb
} // namespace llvm
diff --git a/llvm/tools/llvm-pdbutil/LinePrinter.h b/llvm/include/llvm/DebugInfo/PDB/Native/LinePrinter.h
index b6bb77280fd5..0db21309f593 100644
--- a/llvm/tools/llvm-pdbutil/LinePrinter.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/LinePrinter.h
@@ -6,12 +6,13 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_LLVMPDBDUMP_LINEPRINTER_H
-#define LLVM_TOOLS_LLVMPDBDUMP_LINEPRINTER_H
+#ifndef LLVM_DEBUGINFO_PDB_NATIVE_LINEPRINTER_H
+#define LLVM_DEBUGINFO_PDB_NATIVE_LINEPRINTER_H
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/DebugInfo/PDB/Native/FormatUtil.h"
#include "llvm/Support/BinaryStreamRef.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/Regex.h"
@@ -19,6 +20,23 @@
#include <list>
+// Container for filter options to control which elements will be printed.
+struct FilterOptions {
+ std::list<std::string> ExcludeTypes;
+ std::list<std::string> ExcludeSymbols;
+ std::list<std::string> ExcludeCompilands;
+ std::list<std::string> IncludeTypes;
+ std::list<std::string> IncludeSymbols;
+ std::list<std::string> IncludeCompilands;
+ uint32_t PaddingThreshold;
+ uint32_t SizeThreshold;
+ llvm::Optional<uint32_t> DumpModi;
+ llvm::Optional<uint32_t> ParentRecurseDepth;
+ llvm::Optional<uint32_t> ChildrenRecurseDepth;
+ llvm::Optional<uint32_t> SymbolOffset;
+ bool JustMyCode;
+};
+
namespace llvm {
namespace msf {
class MSFStreamLayout;
@@ -27,12 +45,14 @@ namespace pdb {
class ClassLayout;
class PDBFile;
+class SymbolGroup;
class LinePrinter {
friend class WithColor;
public:
- LinePrinter(int Indent, bool UseColor, raw_ostream &Stream);
+ LinePrinter(int Indent, bool UseColor, raw_ostream &Stream,
+ const FilterOptions &Filters);
void Indent(uint32_t Amount = 0);
void Unindent(uint32_t Amount = 0);
@@ -40,10 +60,10 @@ public:
void printLine(const Twine &T);
void print(const Twine &T);
- template <typename... Ts> void formatLine(const char *Fmt, Ts &&... Items) {
+ template <typename... Ts> void formatLine(const char *Fmt, Ts &&...Items) {
printLine(formatv(Fmt, std::forward<Ts>(Items)...));
}
- template <typename... Ts> void format(const char *Fmt, Ts &&... Items) {
+ template <typename... Ts> void format(const char *Fmt, Ts &&...Items) {
print(formatv(Fmt, std::forward<Ts>(Items)...));
}
@@ -69,6 +89,8 @@ public:
bool IsSymbolExcluded(llvm::StringRef SymbolName);
bool IsCompilandExcluded(llvm::StringRef CompilandName);
+ const FilterOptions &getFilters() const { return Filters; }
+
private:
template <typename Iter>
void SetFilters(std::list<Regex> &List, Iter Begin, Iter End) {
@@ -81,6 +103,7 @@ private:
int IndentSpaces;
int CurrentIndent;
bool UseColor;
+ const FilterOptions &Filters;
std::list<Regex> ExcludeCompilandFilters;
std::list<Regex> ExcludeTypeFilters;
@@ -102,11 +125,8 @@ struct PrintScope {
uint32_t LabelWidth = 0;
};
-inline Optional<PrintScope> withLabelWidth(const Optional<PrintScope> &Scope,
- uint32_t W) {
- if (!Scope)
- return None;
- return PrintScope{*Scope, W};
+inline PrintScope withLabelWidth(const PrintScope &Scope, uint32_t W) {
+ return PrintScope{Scope, W};
}
struct AutoIndent {
@@ -114,11 +134,9 @@ struct AutoIndent {
: L(&L), Amount(Amount) {
L.Indent(Amount);
}
- explicit AutoIndent(const Optional<PrintScope> &Scope) {
- if (Scope.hasValue()) {
- L = &Scope->P;
- Amount = Scope->IndentLevel;
- }
+ explicit AutoIndent(const PrintScope &Scope) {
+ L = &Scope.P;
+ Amount = Scope.IndentLevel;
}
~AutoIndent() {
if (L)
@@ -161,7 +179,7 @@ private:
raw_ostream &OS;
bool UseColor;
};
-}
-}
+} // namespace pdb
+} // namespace llvm
#endif
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h b/llvm/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h
index cb1ffc729512..0caf9fffbad6 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h
@@ -10,10 +10,8 @@
#define LLVM_DEBUGINFO_PDB_NATIVE_MODULEDEBUGSTREAM_H
#include "llvm/ADT/iterator_range.h"
-#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
+#include "llvm/DebugInfo/CodeView/CVRecord.h"
#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
-#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
-#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
#include "llvm/Support/BinaryStreamRef.h"
#include "llvm/Support/Error.h"
@@ -21,10 +19,15 @@
#include <memory>
namespace llvm {
+class BinaryStreamReader;
+namespace codeview {
+class DebugChecksumsSubsectionRef;
+}
+namespace msf {
+class MappedBlockStream;
+}
namespace pdb {
-class DbiModuleDescriptor;
-
class ModuleDebugStreamRef {
using DebugSubsectionIterator = codeview::DebugSubsectionArray::Iterator;
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h b/llvm/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h
index f110e90b3f90..18fbab0dd38c 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h
@@ -11,7 +11,6 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/iterator_range.h"
#include "llvm/DebugInfo/PDB/Native/HashTable.h"
#include "llvm/Support/Error.h"
#include <cstdint>
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumGlobals.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumGlobals.h
index 073878afd129..c10e652efa8d 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumGlobals.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumGlobals.h
@@ -9,7 +9,7 @@
#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVEENUMGLOBALS_H
#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVEENUMGLOBALS_H
-#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
#include "llvm/DebugInfo/PDB/PDBSymbol.h"
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumLineNumbers.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumLineNumbers.h
index 32a4515d557e..a936b769d688 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumLineNumbers.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumLineNumbers.h
@@ -9,16 +9,13 @@
#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVEENUMLINENUMBERS_H
#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVEENUMLINENUMBERS_H
-#include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h"
-#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
-#include "llvm/DebugInfo/CodeView/StringsAndChecksums.h"
#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
#include "llvm/DebugInfo/PDB/IPDBLineNumber.h"
#include "llvm/DebugInfo/PDB/Native/NativeLineNumber.h"
+#include <vector>
namespace llvm {
namespace pdb {
-class IPDBLineNumber;
class NativeEnumLineNumbers : public IPDBEnumChildren<IPDBLineNumber> {
public:
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumSymbols.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumSymbols.h
index 480b3fb11419..5fc91675f209 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumSymbols.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumSymbols.h
@@ -9,9 +9,9 @@
#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVEENUMSYMBOLS_H
#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVEENUMSYMBOLS_H
-#include "llvm/DebugInfo/CodeView/TypeRecord.h"
#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
#include "llvm/DebugInfo/PDB/PDBSymbol.h"
+#include "llvm/DebugInfo/PDB/PDBTypes.h"
#include <vector>
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumTypes.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumTypes.h
index 25c56567384f..2ca000c1c0fe 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumTypes.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumTypes.h
@@ -9,14 +9,17 @@
#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVEENUMTYPES_H
#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVEENUMTYPES_H
-#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
-#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
#include "llvm/DebugInfo/PDB/PDBSymbol.h"
#include <vector>
namespace llvm {
+namespace codeview {
+class LazyRandomTypeCollection;
+}
namespace pdb {
class NativeSession;
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h
index 280358d02305..82fdff130c4f 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h
@@ -9,12 +9,15 @@
#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVEEXESYMBOL_H
#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVEEXESYMBOL_H
+#include "llvm/DebugInfo/CodeView/GUID.h"
#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
-#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+#include "llvm/DebugInfo/PDB/PDBTypes.h"
namespace llvm {
namespace pdb {
+class NativeSession;
+
class DbiStream;
class NativeExeSymbol : public NativeRawSymbol {
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeFunctionSymbol.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeFunctionSymbol.h
index b219055d2153..c15e22f61077 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeFunctionSymbol.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeFunctionSymbol.h
@@ -9,14 +9,17 @@
#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVEFUNCTIONSYMBOL_H
#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVEFUNCTIONSYMBOL_H
-#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h"
#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
-#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+#include "llvm/DebugInfo/PDB/PDBTypes.h"
namespace llvm {
+class raw_ostream;
namespace pdb {
+class NativeSession;
+
class NativeFunctionSymbol : public NativeRawSymbol {
public:
NativeFunctionSymbol(NativeSession &Session, SymIndexId Id,
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeInlineSiteSymbol.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeInlineSiteSymbol.h
index 2f6aba038ae8..3467ac912162 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeInlineSiteSymbol.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeInlineSiteSymbol.h
@@ -9,14 +9,16 @@
#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVEINLINESITESYMBOL_H
#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVEINLINESITESYMBOL_H
-#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h"
#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
-#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+#include "llvm/DebugInfo/PDB/PDBTypes.h"
namespace llvm {
namespace pdb {
+class NativeSession;
+
class NativeInlineSiteSymbol : public NativeRawSymbol {
public:
NativeInlineSiteSymbol(NativeSession &Session, SymIndexId Id,
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeLineNumber.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeLineNumber.h
index be0ddf0a063a..53f2985833fd 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeLineNumber.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeLineNumber.h
@@ -11,10 +11,12 @@
#include "llvm/DebugInfo/CodeView/Line.h"
#include "llvm/DebugInfo/PDB/IPDBLineNumber.h"
-#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
namespace llvm {
namespace pdb {
+
+class NativeSession;
+
class NativeLineNumber : public IPDBLineNumber {
public:
explicit NativeLineNumber(const NativeSession &Session,
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativePublicSymbol.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativePublicSymbol.h
index 9f410e27f4cb..43de80507d02 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/NativePublicSymbol.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativePublicSymbol.h
@@ -9,13 +9,14 @@
#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVEPUBLICSYMBOL_H
#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVEPUBLICSYMBOL_H
-#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
-#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
namespace llvm {
+
+class raw_ostream;
namespace pdb {
+class NativeSession;
class NativePublicSymbol : public NativeRawSymbol {
public:
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeSession.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeSession.h
index 5f8fc587e546..95be7d09aae9 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeSession.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeSession.h
@@ -9,13 +9,11 @@
#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVESESSION_H
#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVESESSION_H
-#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IntervalMap.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/DebugInfo/CodeView/TypeIndex.h"
-#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h"
#include "llvm/DebugInfo/PDB/IPDBSession.h"
-#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
#include "llvm/DebugInfo/PDB/Native/SymbolCache.h"
+#include "llvm/DebugInfo/PDB/PDBTypes.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Error.h"
@@ -24,6 +22,12 @@ class MemoryBuffer;
namespace pdb {
class PDBFile;
class NativeExeSymbol;
+class IPDBSourceFile;
+class ModuleDebugStreamRef;
+class PDBSymbol;
+class PDBSymbolCompiland;
+class PDBSymbolExe;
+template <typename ChildType> class IPDBEnumChildren;
class NativeSession : public IPDBSession {
struct PdbSearchOptions {
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeSourceFile.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeSourceFile.h
index eb6336f268e8..c6653368bc0c 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeSourceFile.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeSourceFile.h
@@ -11,11 +11,12 @@
#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
#include "llvm/DebugInfo/PDB/IPDBSourceFile.h"
-#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
-#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
+#include "llvm/DebugInfo/PDB/PDBTypes.h"
namespace llvm {
namespace pdb {
+class PDBSymbolCompiland;
+template <typename ChildType> class IPDBEnumChildren;
class NativeSession;
class NativeSourceFile : public IPDBSourceFile {
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeSymbolEnumerator.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeSymbolEnumerator.h
index d6a3125ee40b..ab4abc4d3c2c 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeSymbolEnumerator.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeSymbolEnumerator.h
@@ -9,12 +9,16 @@
#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVESYMBOLENUMERATOR_H
#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVESYMBOLENUMERATOR_H
-#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h"
#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
-#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+#include "llvm/DebugInfo/PDB/PDBTypes.h"
namespace llvm {
+
+class raw_ostream;
namespace pdb {
+class NativeSession;
class NativeTypeEnum;
class NativeSymbolEnumerator : public NativeRawSymbol {
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeEnum.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeEnum.h
index 2068c88fc74a..429c06f29ac7 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeEnum.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeEnum.h
@@ -10,12 +10,14 @@
#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEENUM_H
#include "llvm/ADT/Optional.h"
-#include "llvm/DebugInfo/CodeView/CodeView.h"
-#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h"
#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
-#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+#include "llvm/DebugInfo/PDB/PDBTypes.h"
namespace llvm {
+class raw_ostream;
namespace pdb {
class NativeTypeBuiltin;
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h
index 90b5d8068959..47ea722313c3 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h
@@ -9,17 +9,15 @@
#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEFUNCTIONSIG_H
#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEFUNCTIONSIG_H
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h"
#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
-#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+#include "llvm/DebugInfo/PDB/PDBTypes.h"
namespace llvm {
namespace pdb {
-class NativeTypeUDT;
-
class NativeTypeFunctionSig : public NativeRawSymbol {
protected:
void initialize() override;
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypePointer.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypePointer.h
index 7a3dfaecefeb..1f357754ac0f 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypePointer.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypePointer.h
@@ -10,10 +10,11 @@
#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEPOINTER_H
#include "llvm/ADT/Optional.h"
-#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h"
#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
-#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+#include "llvm/DebugInfo/PDB/PDBTypes.h"
namespace llvm {
namespace pdb {
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeTypedef.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeTypedef.h
index 292fc48e7b6d..ce4ebcd00c4a 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeTypedef.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeTypedef.h
@@ -9,14 +9,19 @@
#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPETYPEDEF_H
#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPETYPEDEF_H
-#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h"
#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
-#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+#include "llvm/DebugInfo/PDB/PDBTypes.h"
namespace llvm {
+
+class raw_ostream;
+
namespace pdb {
+class NativeSession;
+
class NativeTypeTypedef : public NativeRawSymbol {
public:
// Create a pointer record for a non-simple type.
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeUDT.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeUDT.h
index e1b31a256c12..a1dd39c0b4be 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeUDT.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeUDT.h
@@ -10,13 +10,17 @@
#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEUDT_H
#include "llvm/ADT/Optional.h"
-#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h"
#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
-#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+#include "llvm/DebugInfo/PDB/PDBTypes.h"
namespace llvm {
+
+class raw_ostream;
namespace pdb {
+class NativeSession;
class NativeTypeUDT : public NativeRawSymbol {
public:
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h
index 21995ca665c1..92d51706c1da 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h
@@ -9,13 +9,15 @@
#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEVTSHAPE_H
#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEVTSHAPE_H
-#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h"
#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
-#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+#include "llvm/DebugInfo/PDB/PDBTypes.h"
namespace llvm {
namespace pdb {
+class NativeSession;
class NativeTypeVTShape : public NativeRawSymbol {
public:
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/PDBFile.h b/llvm/include/llvm/DebugInfo/PDB/Native/PDBFile.h
index c5ee73280c46..1ea92ed4bf21 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/PDBFile.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/PDBFile.h
@@ -9,14 +9,12 @@
#ifndef LLVM_DEBUGINFO_PDB_NATIVE_PDBFILE_H
#define LLVM_DEBUGINFO_PDB_NATIVE_PDBFILE_H
-#include "llvm/ADT/DenseMap.h"
#include "llvm/DebugInfo/MSF/IMSFFile.h"
#include "llvm/DebugInfo/MSF/MSFCommon.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/BinaryStreamRef.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
-#include "llvm/Support/MathExtras.h"
#include <memory>
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h b/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h
index 004d005280d4..c23d958f8ed0 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h
@@ -9,24 +9,28 @@
#ifndef LLVM_DEBUGINFO_PDB_NATIVE_PDBFILEBUILDER_H
#define LLVM_DEBUGINFO_PDB_NATIVE_PDBFILEBUILDER_H
-#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/DebugInfo/PDB/Native/HashTable.h"
#include "llvm/DebugInfo/PDB/Native/NamedStreamMap.h"
-#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
#include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h"
-#include "llvm/DebugInfo/PDB/Native/RawConstants.h"
-#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
#include "llvm/Support/Allocator.h"
-#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/MemoryBuffer.h"
#include <memory>
namespace llvm {
+class WritableBinaryStream;
+namespace codeview {
+struct GUID;
+}
+
namespace msf {
class MSFBuilder;
+struct MSFLayout;
}
namespace pdb {
+struct SrcHeaderBlockEntry;
class DbiStreamBuilder;
class InfoStreamBuilder;
class GSIStreamBuilder;
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h b/llvm/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h
index 5cb749c8a747..4336cd398baf 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h
@@ -9,11 +9,9 @@
#ifndef LLVM_DEBUGINFO_PDB_NATIVE_PDBSTRINGTABLE_H
#define LLVM_DEBUGINFO_PDB_NATIVE_PDBSTRINGTABLE_H
-#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h"
#include "llvm/Support/BinaryStreamArray.h"
-#include "llvm/Support/BinaryStreamRef.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include <cstdint>
@@ -21,10 +19,6 @@
namespace llvm {
class BinaryStreamReader;
-namespace msf {
-class MappedBlockStream;
-}
-
namespace pdb {
struct PDBStringTableHeader;
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/PublicsStream.h b/llvm/include/llvm/DebugInfo/PDB/Native/PublicsStream.h
index bf6da3ea2920..a59a752ff911 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/PublicsStream.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/PublicsStream.h
@@ -9,20 +9,17 @@
#ifndef LLVM_DEBUGINFO_PDB_NATIVE_PUBLICSSTREAM_H
#define LLVM_DEBUGINFO_PDB_NATIVE_PUBLICSSTREAM_H
-#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
-#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
#include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
-#include "llvm/DebugInfo/PDB/Native/RawConstants.h"
-#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
-#include "llvm/DebugInfo/PDB/PDBTypes.h"
#include "llvm/Support/BinaryStreamArray.h"
#include "llvm/Support/Error.h"
namespace llvm {
+namespace msf {
+class MappedBlockStream;
+}
namespace pdb {
-class DbiStream;
-struct GSIHashHeader;
-class PDBFile;
+struct PublicsStreamHeader;
+struct SectionOffset;
class PublicsStream {
public:
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/SymbolCache.h b/llvm/include/llvm/DebugInfo/PDB/Native/SymbolCache.h
index 1ff6ca173b2b..7c5b6b9e1bdf 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/SymbolCache.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/SymbolCache.h
@@ -10,23 +10,29 @@
#define LLVM_DEBUGINFO_PDB_NATIVE_SYMBOLCACHE_H
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/IntervalMap.h"
+#include "llvm/DebugInfo/CodeView/CVRecord.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/Line.h"
-#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
-#include "llvm/DebugInfo/CodeView/TypeRecord.h"
-#include "llvm/DebugInfo/PDB/Native/ModuleDebugStream.h"
#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
#include "llvm/DebugInfo/PDB/Native/NativeSourceFile.h"
+#include "llvm/DebugInfo/PDB/PDBTypes.h"
#include <memory>
#include <vector>
namespace llvm {
+namespace codeview {
+class InlineSiteSym;
+struct FileChecksumEntry;
+} // namespace codeview
namespace pdb {
+class IPDBSourceFile;
+class NativeSession;
+class PDBSymbol;
+class PDBSymbolCompiland;
class DbiStream;
-class PDBFile;
class SymbolCache {
NativeSession &Session;
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/SymbolStream.h b/llvm/include/llvm/DebugInfo/PDB/Native/SymbolStream.h
index 839cc8d2c503..c2f7eb04d16e 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/SymbolStream.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/SymbolStream.h
@@ -9,7 +9,7 @@
#ifndef LLVM_DEBUGINFO_PDB_NATIVE_SYMBOLSTREAM_H
#define LLVM_DEBUGINFO_PDB_NATIVE_SYMBOLSTREAM_H
-#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/DebugInfo/CodeView/CVRecord.h"
#include "llvm/Support/Error.h"
@@ -18,7 +18,6 @@ namespace msf {
class MappedBlockStream;
}
namespace pdb {
-class PDBFile;
class SymbolStream {
public:
@@ -41,7 +40,7 @@ private:
codeview::CVSymbolArray SymbolRecords;
std::unique_ptr<msf::MappedBlockStream> Stream;
};
-}
+} // namespace pdb
}
#endif
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/TpiStream.h b/llvm/include/llvm/DebugInfo/PDB/Native/TpiStream.h
index e49d58af4421..4c413abb2bf0 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/TpiStream.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/TpiStream.h
@@ -12,22 +12,23 @@
#include "llvm/DebugInfo/CodeView/CVRecord.h"
#include "llvm/DebugInfo/PDB/Native/HashTable.h"
#include "llvm/DebugInfo/PDB/Native/RawConstants.h"
-#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
-#include "llvm/DebugInfo/PDB/PDBTypes.h"
#include "llvm/Support/BinaryStreamArray.h"
#include "llvm/Support/BinaryStreamRef.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Error.h"
namespace llvm {
+class BinaryStream;
namespace codeview {
+class TypeIndex;
+struct TypeIndexOffset;
class LazyRandomTypeCollection;
}
namespace msf {
class MappedBlockStream;
}
namespace pdb {
+struct TpiStreamHeader;
class PDBFile;
class TpiStream {
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h b/llvm/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h
index f18d38ae0b31..9f320358144c 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h
@@ -10,12 +10,10 @@
#define LLVM_DEBUGINFO_PDB_NATIVE_TPISTREAMBUILDER_H
#include "llvm/ADT/Optional.h"
-#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/CodeView/CVRecord.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/DebugInfo/PDB/Native/RawConstants.h"
-#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
#include "llvm/Support/Allocator.h"
-#include "llvm/Support/BinaryByteStream.h"
-#include "llvm/Support/BinaryItemStream.h"
#include "llvm/Support/BinaryStreamRef.h"
#include "llvm/Support/Error.h"
@@ -23,7 +21,7 @@
namespace llvm {
class BinaryByteStream;
-class WritableBinaryStreamRef;
+template <typename T> struct BinaryItemTraits;
template <> struct BinaryItemTraits<llvm::codeview::CVType> {
static size_t length(const codeview::CVType &Item) { return Item.length(); }
@@ -32,16 +30,11 @@ template <> struct BinaryItemTraits<llvm::codeview::CVType> {
}
};
-namespace codeview {
-class TypeRecord;
-}
namespace msf {
class MSFBuilder;
struct MSFLayout;
}
namespace pdb {
-class PDBFile;
-class TpiStream;
struct TpiStreamHeader;
class TpiStreamBuilder {
@@ -88,7 +81,7 @@ private:
const TpiStreamHeader *Header;
uint32_t Idx;
};
-}
+} // namespace pdb
}
#endif
diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBContext.h b/llvm/include/llvm/DebugInfo/PDB/PDBContext.h
index 7b6793f0a639..3163c0a1dae0 100644
--- a/llvm/include/llvm/DebugInfo/PDB/PDBContext.h
+++ b/llvm/include/llvm/DebugInfo/PDB/PDBContext.h
@@ -45,6 +45,8 @@ namespace pdb {
DILineInfo getLineInfoForAddress(
object::SectionedAddress Address,
DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override;
+ DILineInfo
+ getLineInfoForDataAddress(object::SectionedAddress Address) override;
DILineInfoTable getLineInfoForAddressRange(
object::SectionedAddress Address, uint64_t Size,
DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override;
diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbol.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbol.h
index 24cf1e459f92..4e34b75b6117 100644
--- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbol.h
+++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbol.h
@@ -9,11 +9,9 @@
#ifndef LLVM_DEBUGINFO_PDB_PDBSYMBOL_H
#define LLVM_DEBUGINFO_PDB_PDBSYMBOL_H
-#include "ConcreteSymbolEnumerator.h"
#include "IPDBRawSymbol.h"
#include "PDBExtras.h"
#include "PDBTypes.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Casting.h"
#define FORWARD_SYMBOL_METHOD(MethodName) \
@@ -43,6 +41,9 @@ class raw_ostream;
namespace pdb {
class IPDBSession;
+class PDBSymDumper;
+class PDBSymbol;
+template <typename ChildType> class ConcreteSymbolEnumerator;
#define DECLARE_PDB_SYMBOL_CONCRETE_TYPE(TagValue) \
private: \
diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolAnnotation.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolAnnotation.h
index c76466a97b66..c8d3d0b7bb96 100644
--- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolAnnotation.h
+++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolAnnotation.h
@@ -13,7 +13,6 @@
namespace llvm {
-class raw_ostream;
namespace pdb {
class PDBSymbolAnnotation : public PDBSymbol {
diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolBlock.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolBlock.h
index cf471450d989..09142227b017 100644
--- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolBlock.h
+++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolBlock.h
@@ -13,8 +13,6 @@
namespace llvm {
-class raw_ostream;
-
namespace pdb {
class PDBSymbolBlock : public PDBSymbol {
diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompilandDetails.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompilandDetails.h
index dbd8ba5a63ff..46c159268533 100644
--- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompilandDetails.h
+++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompilandDetails.h
@@ -14,7 +14,6 @@
namespace llvm {
-class raw_ostream;
namespace pdb {
class PDBSymbolCompilandDetails : public PDBSymbol {
diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompilandEnv.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompilandEnv.h
index 61607a03593d..cba082f2ff19 100644
--- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompilandEnv.h
+++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCompilandEnv.h
@@ -14,7 +14,6 @@
namespace llvm {
-class raw_ostream;
namespace pdb {
class PDBSymbolCompilandEnv : public PDBSymbol {
DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::CompilandEnv)
diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCustom.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCustom.h
index 75a86411643a..c78b47ce9924 100644
--- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCustom.h
+++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolCustom.h
@@ -15,8 +15,6 @@
namespace llvm {
-class raw_ostream;
-
namespace pdb {
/// PDBSymbolCustom represents symbols that are compiler-specific and do not
/// fit anywhere else in the lexical hierarchy.
diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolData.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolData.h
index 7e9b69d7cf4b..61e67d1368a8 100644
--- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolData.h
+++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolData.h
@@ -9,16 +9,16 @@
#ifndef LLVM_DEBUGINFO_PDB_PDBSYMBOLDATA_H
#define LLVM_DEBUGINFO_PDB_PDBSYMBOLDATA_H
-#include "IPDBLineNumber.h"
#include "PDBSymbol.h"
#include "PDBTypes.h"
+#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h"
namespace llvm {
-class raw_ostream;
-
namespace pdb {
+class PDBSymDumper;
+
class PDBSymbolData : public PDBSymbol {
DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::Data)
public:
diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFunc.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFunc.h
index f50057c68406..bfc7f7689718 100644
--- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFunc.h
+++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFunc.h
@@ -9,17 +9,20 @@
#ifndef LLVM_DEBUGINFO_PDB_PDBSYMBOLFUNC_H
#define LLVM_DEBUGINFO_PDB_PDBSYMBOLFUNC_H
-#include "IPDBLineNumber.h"
+#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h"
+
#include "PDBSymbol.h"
-#include "PDBSymbolTypeFunctionSig.h"
#include "PDBTypes.h"
namespace llvm {
-class raw_ostream;
-
namespace pdb {
+class PDBSymDumper;
+class PDBSymbolData;
+class PDBSymbolTypeFunctionSig;
+template <typename ChildType> class IPDBEnumChildren;
+
class PDBSymbolFunc : public PDBSymbol {
DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::Function)
public:
diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h
index 1cdc1811bb1a..09c6f4728960 100644
--- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h
+++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h
@@ -14,8 +14,6 @@
namespace llvm {
-class raw_ostream;
-
namespace pdb {
class PDBSymbolFuncDebugEnd : public PDBSymbol {
diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h
index 021f27c7f0f7..843a8348a2f0 100644
--- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h
+++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h
@@ -14,7 +14,6 @@
namespace llvm {
-class raw_ostream;
namespace pdb {
class PDBSymbolFuncDebugStart : public PDBSymbol {
diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolLabel.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolLabel.h
index 33eb36696cc2..148802a47cbc 100644
--- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolLabel.h
+++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolLabel.h
@@ -14,7 +14,6 @@
namespace llvm {
-class raw_ostream;
namespace pdb {
class PDBSymbolLabel : public PDBSymbol {
diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h
index f8dcb2ba9d5f..a757cc02624b 100644
--- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h
+++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h
@@ -14,7 +14,6 @@
namespace llvm {
-class raw_ostream;
namespace pdb {
class PDBSymbolPublicSymbol : public PDBSymbol {
diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolThunk.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolThunk.h
index a5f795cc1303..2b81a63995e6 100644
--- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolThunk.h
+++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolThunk.h
@@ -14,7 +14,6 @@
namespace llvm {
-class raw_ostream;
namespace pdb {
class PDBSymbolThunk : public PDBSymbol {
diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeArray.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeArray.h
index d4cd6e71423e..496141e5fa68 100644
--- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeArray.h
+++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeArray.h
@@ -14,7 +14,6 @@
namespace llvm {
-class raw_ostream;
namespace pdb {
class PDBSymbolTypeArray : public PDBSymbol {
diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h
index bd2dbc914725..c74ac3fb9cce 100644
--- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h
+++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h
@@ -12,14 +12,14 @@
#include "PDBSymbol.h"
#include "PDBTypes.h"
-#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h"
-#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h"
+#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h"
namespace llvm {
-class raw_ostream;
namespace pdb {
+class PDBSymDumper;
+
class PDBSymbolTypeBaseClass : public PDBSymbol {
DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::BaseClass)
public:
diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h
index df6309b1545c..b923983095f3 100644
--- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h
+++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h
@@ -14,7 +14,6 @@
namespace llvm {
-class raw_ostream;
namespace pdb {
class PDBSymbolTypeBuiltin : public PDBSymbol {
diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h
index 7bf0317ff1ca..b15abf7bedfd 100644
--- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h
+++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h
@@ -14,7 +14,6 @@
namespace llvm {
-class raw_ostream;
namespace pdb {
class PDBSymbolTypeCustom : public PDBSymbol {
diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h
index 5d742237bac4..e7570b41dd21 100644
--- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h
+++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h
@@ -14,7 +14,6 @@
namespace llvm {
-class raw_ostream;
namespace pdb {
class PDBSymbolTypeDimension : public PDBSymbol {
diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h
index 0aab91039509..ee1f736c17a0 100644
--- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h
+++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h
@@ -9,16 +9,18 @@
#ifndef LLVM_DEBUGINFO_PDB_PDBSYMBOLTYPEENUM_H
#define LLVM_DEBUGINFO_PDB_PDBSYMBOLTYPEENUM_H
-#include "IPDBLineNumber.h"
#include "PDBSymbol.h"
-#include "PDBSymbolTypeBuiltin.h"
#include "PDBTypes.h"
+#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h"
+
namespace llvm {
-class raw_ostream;
namespace pdb {
+class PDBSymDumper;
+class PDBSymbolTypeBuiltin;
+
class PDBSymbolTypeEnum : public PDBSymbol {
DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::Enum)
public:
diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h
index d56a90662dae..9fde42116261 100644
--- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h
+++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h
@@ -14,7 +14,6 @@
namespace llvm {
-class raw_ostream;
namespace pdb {
class PDBSymbolTypeFriend : public PDBSymbol {
diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h
index 559ceec5aace..71decff722a5 100644
--- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h
+++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h
@@ -14,7 +14,6 @@
namespace llvm {
-class raw_ostream;
namespace pdb {
class PDBSymbolTypeFunctionArg : public PDBSymbol {
diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h
index 5e7b83ce8004..866bf520a3b2 100644
--- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h
+++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h
@@ -14,7 +14,6 @@
namespace llvm {
-class raw_ostream;
namespace pdb {
class PDBSymbolTypeManaged : public PDBSymbol {
diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypePointer.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypePointer.h
index da25eab50f9b..1b43ef9a21bd 100644
--- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypePointer.h
+++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypePointer.h
@@ -14,7 +14,6 @@
namespace llvm {
-class raw_ostream;
namespace pdb {
class PDBSymbolTypePointer : public PDBSymbol {
diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h
index 8dc29ca26192..3f37730cf1df 100644
--- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h
+++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h
@@ -14,7 +14,6 @@
namespace llvm {
-class raw_ostream;
namespace pdb {
class PDBSymbolTypeTypedef : public PDBSymbol {
diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h
index 3e73ad7ac85a..a3a49a4b619a 100644
--- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h
+++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h
@@ -9,18 +9,17 @@
#ifndef LLVM_DEBUGINFO_PDB_PDBSYMBOLTYPEUDT_H
#define LLVM_DEBUGINFO_PDB_PDBSYMBOLTYPEUDT_H
-#include "IPDBLineNumber.h"
-#include "IPDBSession.h"
+#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h"
+
#include "PDBSymbol.h"
-#include "PDBSymbolTypeBaseClass.h"
#include "PDBTypes.h"
namespace llvm {
-class raw_ostream;
-
namespace pdb {
+class PDBSymDumper;
+
class PDBSymbolTypeUDT : public PDBSymbol {
DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::UDT)
public:
diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h
index d08728dafa76..6223bee98670 100644
--- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h
+++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h
@@ -14,7 +14,6 @@
namespace llvm {
-class raw_ostream;
namespace pdb {
class PDBSymbolTypeVTable : public PDBSymbol {
diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h
index c7e2ac148503..bec0a9970a9f 100644
--- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h
+++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h
@@ -14,7 +14,6 @@
namespace llvm {
-class raw_ostream;
namespace pdb {
class PDBSymbolTypeVTableShape : public PDBSymbol {
diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolUnknown.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolUnknown.h
index 5b4909b800b9..a53af49bc9e0 100644
--- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolUnknown.h
+++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolUnknown.h
@@ -13,7 +13,6 @@
namespace llvm {
-class raw_ostream;
namespace pdb {
class PDBSymbolUnknown : public PDBSymbol {
diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h
index 19a8f414eb43..dde25a023d00 100644
--- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h
+++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h
@@ -14,7 +14,6 @@
namespace llvm {
-class raw_ostream;
namespace pdb {
class PDBSymbolUsingNamespace : public PDBSymbol {
diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBTypes.h b/llvm/include/llvm/DebugInfo/PDB/PDBTypes.h
index e7c2ded1bee1..b6a794ad7e76 100644
--- a/llvm/include/llvm/DebugInfo/PDB/PDBTypes.h
+++ b/llvm/include/llvm/DebugInfo/PDB/PDBTypes.h
@@ -352,7 +352,8 @@ enum class PDB_BuiltinType {
BSTR = 30,
HResult = 31,
Char16 = 32,
- Char32 = 33
+ Char32 = 33,
+ Char8 = 34,
};
/// These values correspond to the flags that can be combined to control the
diff --git a/llvm/include/llvm/DebugInfo/PDB/UDTLayout.h b/llvm/include/llvm/DebugInfo/PDB/UDTLayout.h
index c67b093b63c0..8631c412f114 100644
--- a/llvm/include/llvm/DebugInfo/PDB/UDTLayout.h
+++ b/llvm/include/llvm/DebugInfo/PDB/UDTLayout.h
@@ -18,7 +18,6 @@
#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h"
-#include "llvm/DebugInfo/PDB/PDBTypes.h"
#include <cstdint>
#include <memory>
#include <string>
diff --git a/llvm/include/llvm/DebugInfo/Symbolize/DIFetcher.h b/llvm/include/llvm/DebugInfo/Symbolize/DIFetcher.h
new file mode 100644
index 000000000000..c5340b5f0460
--- /dev/null
+++ b/llvm/include/llvm/DebugInfo/Symbolize/DIFetcher.h
@@ -0,0 +1,51 @@
+//===-- llvm/DebugInfo/Symbolize/DIFetcher.h --------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file declares a DIFetcher abstraction for obtaining debug info from an
+/// arbitrary outside source.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_SYMBOLIZE_DIFETCHER_H
+#define LLVM_DEBUGINFO_SYMBOLIZE_DIFETCHER_H
+
+#include <cstdint>
+#include <string>
+
+#include "llvm/ADT/ArrayRef.h"
+
+namespace llvm {
+namespace symbolize {
+
+/// The DIFetcher interface provides arbitrary mechanisms for obtaining debug
+/// info from an outside source.
+class DIFetcher {
+public:
+ virtual ~DIFetcher() = default;
+ virtual Optional<std::string>
+ fetchBuildID(ArrayRef<uint8_t> BuildID) const = 0;
+};
+
+/// LocalDIFetcher searches local cache directories for debug info.
+class LocalDIFetcher : public DIFetcher {
+public:
+ LocalDIFetcher(ArrayRef<std::string> DebugFileDirectory)
+ : DebugFileDirectory(DebugFileDirectory){};
+ virtual ~LocalDIFetcher() = default;
+
+ Optional<std::string> fetchBuildID(ArrayRef<uint8_t> BuildID) const override;
+
+private:
+ const ArrayRef<std::string> DebugFileDirectory;
+};
+
+} // end namespace symbolize
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_SYMBOLIZE_DIFETCHER_H
diff --git a/llvm/include/llvm/DebugInfo/Symbolize/Markup.h b/llvm/include/llvm/DebugInfo/Symbolize/Markup.h
new file mode 100644
index 000000000000..2628b47cf6d3
--- /dev/null
+++ b/llvm/include/llvm/DebugInfo/Symbolize/Markup.h
@@ -0,0 +1,120 @@
+//===- Markup.h -------------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file declares the log symbolizer markup data model and parser.
+///
+/// See https://llvm.org/docs/SymbolizerMarkupFormat.html
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_SYMBOLIZE_MARKUP_H
+#define LLVM_DEBUGINFO_SYMBOLIZE_MARKUP_H
+
+#include <iostream>
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Support/Regex.h"
+
+namespace llvm {
+namespace symbolize {
+
+/// A node of symbolizer markup.
+///
+/// If only the Text field is set, this represents a region of text outside a
+/// markup element. ANSI SGR control codes are also reported this way; if
+/// detected, then the control code will be the entirety of the Text field, and
+/// any surrounding text will be reported as preceding and following nodes.
+struct MarkupNode {
+ /// The full text of this node in the input.
+ StringRef Text;
+
+ /// If this represents an element, the tag. Otherwise, empty.
+ StringRef Tag;
+
+ /// If this represents an element with fields, a list of the field contents.
+ /// Otherwise, empty.
+ SmallVector<StringRef> Fields;
+
+ bool operator==(const MarkupNode &Other) const {
+ return Text == Other.Text && Tag == Other.Tag && Fields == Other.Fields;
+ }
+ bool operator!=(const MarkupNode &Other) const { return !(*this == Other); }
+};
+
+/// Parses a log containing symbolizer markup into a sequence of nodes.
+class MarkupParser {
+public:
+ MarkupParser(StringSet<> MultilineTags = {});
+
+ /// Parses an individual \p Line of input.
+ ///
+ /// Nodes from the previous parseLine() call that haven't yet been extracted
+ /// by nextNode() are discarded. The nodes returned by nextNode() may
+ /// reference the input string, so it must be retained by the caller until the
+ /// last use.
+ ///
+ /// Note that some elements may span multiple lines. If a line ends with the
+ /// start of one of these elements, then no nodes will be produced until the
+ /// either the end or something that cannot be part of an element is
+ /// encountered. This may only occur after multiple calls to parseLine(),
+ /// corresponding to the lines of the multi-line element.
+ void parseLine(StringRef Line);
+
+ /// Inform the parser of that the input stream has ended.
+ ///
+ /// This allows the parser to finish any deferred processing (e.g., an
+ /// in-progress multi-line element) and may cause nextNode() to return
+ /// additional nodes.
+ void flush();
+
+ /// Returns the next node in the input sequence.
+ ///
+ /// Calling nextNode() may invalidate the contents of the node returned by the
+ /// previous call.
+ ///
+ /// \returns the next markup node or None if none remain.
+ Optional<MarkupNode> nextNode();
+
+private:
+ Optional<MarkupNode> parseElement(StringRef Line);
+ void parseTextOutsideMarkup(StringRef Text);
+ Optional<StringRef> parseMultiLineBegin(StringRef Line);
+ Optional<StringRef> parseMultiLineEnd(StringRef Line);
+
+ // Tags of elements that can span multiple lines.
+ const StringSet<> MultilineTags;
+
+ // Contents of a multi-line element that has finished being parsed. Retained
+ // to keep returned StringRefs for the contents valid.
+ std::string FinishedMultiline;
+
+ // Contents of a multi-line element that is still in the process of receiving
+ // lines.
+ std::string InProgressMultiline;
+
+ // The line currently being parsed.
+ StringRef Line;
+
+ // Buffer for nodes parsed from the current line.
+ SmallVector<MarkupNode> Buffer;
+
+ // Next buffer index to return.
+ size_t NextIdx;
+
+ // Regular expression matching supported ANSI SGR escape sequences.
+ const Regex SGRSyntax;
+};
+
+} // end namespace symbolize
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_SYMBOLIZE_MARKUP_H
diff --git a/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h b/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h
new file mode 100644
index 000000000000..b7d70ccafe66
--- /dev/null
+++ b/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h
@@ -0,0 +1,76 @@
+//===- MarkupFilter.h -------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file declares a filter that replaces symbolizer markup with
+/// human-readable expressions.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_SYMBOLIZE_MARKUPFILTER_H
+#define LLVM_DEBUGINFO_SYMBOLIZE_MARKUPFILTER_H
+
+#include "Markup.h"
+
+#include "llvm/Support/WithColor.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace symbolize {
+
+/// Filter to convert parsed log symbolizer markup elements into human-readable
+/// text.
+class MarkupFilter {
+public:
+ MarkupFilter(raw_ostream &OS, Optional<bool> ColorsEnabled = llvm::None);
+
+ /// Begins a logical \p Line of markup.
+ ///
+ /// This must be called for each line of the input stream before calls to
+ /// filter() for elements of that line. The provided \p Line must be the same
+ /// one that was passed to parseLine() to produce the elements to be later
+ /// passed to filter().
+ ///
+ /// This informs the filter that a new line is beginning and establishes a
+ /// context for error location reporting.
+ void beginLine(StringRef Line);
+
+ /// Handle a \p Node of symbolizer markup.
+ ///
+ /// If the node is a recognized, valid markup element, it is replaced with a
+ /// human-readable string. If the node isn't an element or the element isn't
+ /// recognized, it is output verbatim. If the element is recognized but isn't
+ /// valid, it is omitted from the output.
+ void filter(const MarkupNode &Node);
+
+private:
+ bool trySGR(const MarkupNode &Node);
+
+ void highlight();
+ void restoreColor();
+ void resetColor();
+
+ bool checkTag(const MarkupNode &Node) const;
+ bool checkNumFields(const MarkupNode &Node, size_t Size) const;
+
+ void reportTypeError(StringRef Str, StringRef TypeName) const;
+ void reportLocation(StringRef::iterator Loc) const;
+
+ raw_ostream &OS;
+ const bool ColorsEnabled;
+
+ StringRef Line;
+
+ Optional<raw_ostream::Colors> Color;
+ bool Bold = false;
+};
+
+} // end namespace symbolize
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_SYMBOLIZE_MARKUPFILTER_H
diff --git a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h b/llvm/include/llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h
index 8fb003fff0ae..075dbe3e0e37 100644
--- a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h
+++ b/llvm/include/llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h
@@ -9,8 +9,8 @@
// This file declares the SymbolizableObjectFile class.
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_DEBUGINFO_SYMBOLIZE_SYMBOLIZABLEOBJECTFILE_H
-#define LLVM_LIB_DEBUGINFO_SYMBOLIZE_SYMBOLIZABLEOBJECTFILE_H
+#ifndef LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZABLEOBJECTFILE_H
+#define LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZABLEOBJECTFILE_H
#include "llvm/ADT/StringRef.h"
#include "llvm/DebugInfo/DIContext.h"
@@ -100,4 +100,4 @@ private:
} // end namespace llvm
-#endif // LLVM_LIB_DEBUGINFO_SYMBOLIZE_SYMBOLIZABLEOBJECTFILE_H
+#endif // LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZABLEOBJECTFILE_H
diff --git a/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h b/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
index 4ec333422c4b..00c4bf0a615f 100644
--- a/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
+++ b/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
@@ -13,10 +13,12 @@
#ifndef LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZE_H
#define LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZE_H
-#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/ilist_node.h"
+#include "llvm/ADT/simple_ilist.h"
+#include "llvm/DebugInfo/DIContext.h"
+#include "llvm/DebugInfo/Symbolize/DIFetcher.h"
#include "llvm/Object/Binary.h"
-#include "llvm/Object/ELFObjectFile.h"
-#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Error.h"
#include <algorithm>
#include <cstdint>
@@ -27,13 +29,24 @@
#include <vector>
namespace llvm {
+namespace object {
+class ELFObjectFileBase;
+class MachOObjectFile;
+class ObjectFile;
+struct SectionedAddress;
+} // namespace object
+
namespace symbolize {
+class SymbolizableModule;
+
using namespace object;
using FunctionNameKind = DILineInfoSpecifier::FunctionNameKind;
using FileLineInfoKind = DILineInfoSpecifier::FileLineInfoKind;
+class CachedBinary;
+
class LLVMSymbolizer {
public:
struct Options {
@@ -49,40 +62,63 @@ public:
std::string FallbackDebugPath;
std::string DWPName;
std::vector<std::string> DebugFileDirectory;
+ size_t MaxCacheSize =
+ sizeof(size_t) == 4
+ ? 512 * 1024 * 1024 /* 512 MiB */
+ : static_cast<size_t>(4ULL * 1024 * 1024 * 1024) /* 4 GiB */;
};
- LLVMSymbolizer() = default;
- LLVMSymbolizer(const Options &Opts) : Opts(Opts) {}
+ LLVMSymbolizer();
+ LLVMSymbolizer(const Options &Opts);
- ~LLVMSymbolizer() { flush(); }
+ ~LLVMSymbolizer();
// Overloads accepting ObjectFile does not support COFF currently
Expected<DILineInfo> symbolizeCode(const ObjectFile &Obj,
object::SectionedAddress ModuleOffset);
Expected<DILineInfo> symbolizeCode(const std::string &ModuleName,
object::SectionedAddress ModuleOffset);
+ Expected<DILineInfo> symbolizeCode(ArrayRef<uint8_t> BuildID,
+ object::SectionedAddress ModuleOffset);
Expected<DIInliningInfo>
symbolizeInlinedCode(const ObjectFile &Obj,
object::SectionedAddress ModuleOffset);
Expected<DIInliningInfo>
symbolizeInlinedCode(const std::string &ModuleName,
object::SectionedAddress ModuleOffset);
+ Expected<DIInliningInfo>
+ symbolizeInlinedCode(ArrayRef<uint8_t> BuildID,
+ object::SectionedAddress ModuleOffset);
Expected<DIGlobal> symbolizeData(const ObjectFile &Obj,
object::SectionedAddress ModuleOffset);
Expected<DIGlobal> symbolizeData(const std::string &ModuleName,
object::SectionedAddress ModuleOffset);
+ Expected<DIGlobal> symbolizeData(ArrayRef<uint8_t> BuildID,
+ object::SectionedAddress ModuleOffset);
Expected<std::vector<DILocal>>
symbolizeFrame(const ObjectFile &Obj, object::SectionedAddress ModuleOffset);
Expected<std::vector<DILocal>>
symbolizeFrame(const std::string &ModuleName,
object::SectionedAddress ModuleOffset);
+ Expected<std::vector<DILocal>>
+ symbolizeFrame(ArrayRef<uint8_t> BuildID,
+ object::SectionedAddress ModuleOffset);
void flush();
+ // Evict entries from the binary cache until it is under the maximum size
+ // given in the options. Calling this invalidates references in the DI...
+ // objects returned by the methods above.
+ void pruneCache();
+
static std::string
DemangleName(const std::string &Name,
const SymbolizableModule *DbiModuleDescriptor);
+ void addDIFetcher(std::unique_ptr<DIFetcher> Fetcher) {
+ DIFetchers.push_back(std::move(Fetcher));
+ }
+
private:
// Bundles together object file with code/data and object file with
// corresponding debug info. These objects can be the same.
@@ -112,6 +148,12 @@ private:
getOrCreateModuleInfo(const std::string &ModuleName);
Expected<SymbolizableModule *> getOrCreateModuleInfo(const ObjectFile &Obj);
+ /// Returns a SymbolizableModule or an error if loading debug info failed.
+ /// Unlike the above, errors are reported each time, since they are more
+ /// likely to be transient.
+ Expected<SymbolizableModule *>
+ getOrCreateModuleInfo(ArrayRef<uint8_t> BuildID);
+
Expected<SymbolizableModule *>
createModuleInfo(const ObjectFile *Obj, std::unique_ptr<DIContext> Context,
StringRef ModuleName);
@@ -126,6 +168,13 @@ private:
const ELFObjectFileBase *Obj,
const std::string &ArchName);
+ bool findDebugBinary(const std::string &OrigPath,
+ const std::string &DebuglinkName, uint32_t CRCHash,
+ std::string &Result);
+
+ bool getOrFindDebugBinary(const ArrayRef<uint8_t> BuildID,
+ std::string &Result);
+
/// Returns pair of pointers to object and debug object.
Expected<ObjectPair> getOrCreateObjectPair(const std::string &Path,
const std::string &ArchName);
@@ -136,15 +185,24 @@ private:
Expected<ObjectFile *> getOrCreateObject(const std::string &Path,
const std::string &ArchName);
+ /// Update the LRU cache order when a binary is accessed.
+ void recordAccess(CachedBinary &Bin);
+
std::map<std::string, std::unique_ptr<SymbolizableModule>, std::less<>>
Modules;
+ StringMap<std::string> BuildIDPaths;
/// Contains cached results of getOrCreateObjectPair().
std::map<std::pair<std::string, std::string>, ObjectPair>
ObjectPairForPathArch;
/// Contains parsed binary for each path, or parsing error.
- std::map<std::string, OwningBinary<Binary>> BinaryForPath;
+ std::map<std::string, CachedBinary> BinaryForPath;
+
+ /// A list of cached binaries in LRU order.
+ simple_ilist<CachedBinary> LRUBinaries;
+ /// Sum of the sizes of the cached binaries.
+ size_t CacheSize = 0;
/// Parsed object file for path/architecture pair, where "path" refers
/// to Mach-O universal binary.
@@ -152,6 +210,37 @@ private:
ObjectForUBPathAndArch;
Options Opts;
+
+ SmallVector<std::unique_ptr<DIFetcher>> DIFetchers;
+};
+
+// A binary intrusively linked into a LRU cache list. If the binary is empty,
+// then the entry marks that an error occurred, and it is not part of the LRU
+// list.
+class CachedBinary : public ilist_node<CachedBinary> {
+public:
+ CachedBinary() = default;
+ CachedBinary(OwningBinary<Binary> Bin) : Bin(std::move(Bin)) {}
+
+ OwningBinary<Binary> &operator*() { return Bin; }
+ OwningBinary<Binary> *operator->() { return &Bin; }
+
+ // Add an action to be performed when the binary is evicted, before all
+ // previously registered evictors.
+ void pushEvictor(std::function<void()> Evictor);
+
+ // Run all registered evictors in the reverse of the order in which they were
+ // added.
+ void evict() {
+ if (Evictor)
+ Evictor();
+ }
+
+ size_t size() { return Bin.getBinary()->getData().size(); }
+
+private:
+ OwningBinary<Binary> Bin;
+ std::function<void()> Evictor;
};
} // end namespace symbolize
diff --git a/llvm/include/llvm/Debuginfod/DIFetcher.h b/llvm/include/llvm/Debuginfod/DIFetcher.h
new file mode 100644
index 000000000000..d398fd900051
--- /dev/null
+++ b/llvm/include/llvm/Debuginfod/DIFetcher.h
@@ -0,0 +1,34 @@
+//===- llvm/DebugInfod/DIFetcher.h - Debug info fetcher----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file declares a DIFetcher implementation for obtaining debug info from
+/// debuginfod.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFOD_DIFETCHER_H
+#define LLVM_DEBUGINFOD_DIFETCHER_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/DebugInfo/Symbolize/DIFetcher.h"
+
+namespace llvm {
+
+class DebuginfodDIFetcher : public symbolize::DIFetcher {
+public:
+ virtual ~DebuginfodDIFetcher() = default;
+
+ /// Fetches the given Build ID using debuginfod and returns a local path to
+ /// the resulting debug binary.
+ Optional<std::string> fetchBuildID(ArrayRef<uint8_t> BuildID) const override;
+};
+
+} // namespace llvm
+
+#endif // LLVM_DEBUGINFOD_DIFETCHER_H
diff --git a/llvm/include/llvm/Debuginfod/HTTPClient.h b/llvm/include/llvm/Debuginfod/HTTPClient.h
index ca3b76ca9f3f..6c94961032e7 100644
--- a/llvm/include/llvm/Debuginfod/HTTPClient.h
+++ b/llvm/include/llvm/Debuginfod/HTTPClient.h
@@ -7,9 +7,8 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file contains the declarations of the HTTPClient, HTTPMethod,
-/// HTTPResponseHandler, and BufferedHTTPResponseHandler classes, as well as
-/// the HTTPResponseBuffer and HTTPRequest structs.
+/// This file contains the declarations of the HTTPClient library for issuing
+/// HTTP requests and handling the responses.
///
//===----------------------------------------------------------------------===//
@@ -40,43 +39,13 @@ bool operator==(const HTTPRequest &A, const HTTPRequest &B);
/// of its methods.
class HTTPResponseHandler {
public:
- /// Processes one line of HTTP response headers.
- virtual Error handleHeaderLine(StringRef HeaderLine) = 0;
-
/// Processes an additional chunk of bytes of the HTTP response body.
virtual Error handleBodyChunk(StringRef BodyChunk) = 0;
- /// Processes the HTTP response status code.
- virtual Error handleStatusCode(unsigned Code) = 0;
-
protected:
~HTTPResponseHandler();
};
-/// An HTTP response status code bundled with a buffer to store the body.
-struct HTTPResponseBuffer {
- unsigned Code = 0;
- std::unique_ptr<WritableMemoryBuffer> Body;
-};
-
-/// A simple handler which writes returned data to an HTTPResponseBuffer.
-/// Ignores all headers except the Content-Length, which it uses to
-/// allocate an appropriately-sized Body buffer.
-class BufferedHTTPResponseHandler final : public HTTPResponseHandler {
- size_t Offset = 0;
-
-public:
- /// Stores the data received from the HTTP server.
- HTTPResponseBuffer ResponseBuffer;
-
- /// These callbacks store the body and status code in an HTTPResponseBuffer
- /// allocated based on Content-Length. The Content-Length header must be
- /// handled by handleHeaderLine before any calls to handleBodyChunk.
- Error handleHeaderLine(StringRef HeaderLine) override;
- Error handleBodyChunk(StringRef BodyChunk) override;
- Error handleStatusCode(unsigned Code) override;
-};
-
/// A reusable client that can perform HTTPRequests through a network socket.
class HTTPClient {
#ifdef LLVM_ENABLE_CURL
@@ -107,13 +76,8 @@ public:
/// Handler method.
Error perform(const HTTPRequest &Request, HTTPResponseHandler &Handler);
- /// Performs the Request with the default BufferedHTTPResponseHandler, and
- /// returns its HTTPResponseBuffer or an Error.
- Expected<HTTPResponseBuffer> perform(const HTTPRequest &Request);
-
- /// Performs an HTTPRequest with the default configuration to make a GET
- /// request to the given Url. Returns an HTTPResponseBuffer or an Error.
- Expected<HTTPResponseBuffer> get(StringRef Url);
+ /// Returns the last received response code or zero if none.
+ unsigned responseCode();
};
} // end namespace llvm
diff --git a/llvm/include/llvm/Demangle/Demangle.h b/llvm/include/llvm/Demangle/Demangle.h
index 3150e049320b..6133d0b95bbf 100644
--- a/llvm/include/llvm/Demangle/Demangle.h
+++ b/llvm/include/llvm/Demangle/Demangle.h
@@ -57,8 +57,8 @@ char *microsoftDemangle(const char *mangled_name, size_t *n_read, char *buf,
size_t *n_buf, int *status,
MSDemangleFlags Flags = MSDF_None);
-// Demangles a Rust v0 mangled symbol. The API follows that of __cxa_demangle.
-char *rustDemangle(const char *MangledName, char *Buf, size_t *N, int *Status);
+// Demangles a Rust v0 mangled symbol.
+char *rustDemangle(const char *MangledName);
// Demangles a D mangled symbol.
char *dlangDemangle(const char *MangledName);
diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h
index 760319544a02..959632f13e1e 100644
--- a/llvm/include/llvm/Demangle/ItaniumDemangle.h
+++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h
@@ -16,10 +16,6 @@
#ifndef DEMANGLE_ITANIUMDEMANGLE_H
#define DEMANGLE_ITANIUMDEMANGLE_H
-// FIXME: (possibly) incomplete list of features that clang mangles that this
-// file does not yet support:
-// - C++ modules TS
-
#include "DemangleConfig.h"
#include "StringView.h"
#include "Utility.h"
@@ -32,85 +28,6 @@
#include <limits>
#include <utility>
-#define FOR_EACH_NODE_KIND(X) \
- X(NodeArrayNode) \
- X(DotSuffix) \
- X(VendorExtQualType) \
- X(QualType) \
- X(ConversionOperatorType) \
- X(PostfixQualifiedType) \
- X(ElaboratedTypeSpefType) \
- X(NameType) \
- X(AbiTagAttr) \
- X(EnableIfAttr) \
- X(ObjCProtoName) \
- X(PointerType) \
- X(ReferenceType) \
- X(PointerToMemberType) \
- X(ArrayType) \
- X(FunctionType) \
- X(NoexceptSpec) \
- X(DynamicExceptionSpec) \
- X(FunctionEncoding) \
- X(LiteralOperator) \
- X(SpecialName) \
- X(CtorVtableSpecialName) \
- X(QualifiedName) \
- X(NestedName) \
- X(LocalName) \
- X(VectorType) \
- X(PixelVectorType) \
- X(BinaryFPType) \
- X(SyntheticTemplateParamName) \
- X(TypeTemplateParamDecl) \
- X(NonTypeTemplateParamDecl) \
- X(TemplateTemplateParamDecl) \
- X(TemplateParamPackDecl) \
- X(ParameterPack) \
- X(TemplateArgumentPack) \
- X(ParameterPackExpansion) \
- X(TemplateArgs) \
- X(ForwardTemplateReference) \
- X(NameWithTemplateArgs) \
- X(GlobalQualifiedName) \
- X(StdQualifiedName) \
- X(ExpandedSpecialSubstitution) \
- X(SpecialSubstitution) \
- X(CtorDtorName) \
- X(DtorName) \
- X(UnnamedTypeName) \
- X(ClosureTypeName) \
- X(StructuredBindingName) \
- X(BinaryExpr) \
- X(ArraySubscriptExpr) \
- X(PostfixExpr) \
- X(ConditionalExpr) \
- X(MemberExpr) \
- X(SubobjectExpr) \
- X(EnclosingExpr) \
- X(CastExpr) \
- X(SizeofParamPackExpr) \
- X(CallExpr) \
- X(NewExpr) \
- X(DeleteExpr) \
- X(PrefixExpr) \
- X(FunctionParam) \
- X(ConversionExpr) \
- X(PointerToMemberConversionExpr) \
- X(InitListExpr) \
- X(FoldExpr) \
- X(ThrowExpr) \
- X(BoolExpr) \
- X(StringLiteral) \
- X(LambdaExpr) \
- X(EnumLiteral) \
- X(IntegerLiteral) \
- X(FloatLiteral) \
- X(DoubleLiteral) \
- X(LongDoubleLiteral) \
- X(BracedExpr) \
- X(BracedRangeExpr)
-
DEMANGLE_NAMESPACE_BEGIN
template <class T, size_t N> class PODSmallVector {
@@ -238,37 +155,68 @@ public:
class Node {
public:
enum Kind : unsigned char {
-#define ENUMERATOR(NodeKind) K ## NodeKind,
- FOR_EACH_NODE_KIND(ENUMERATOR)
-#undef ENUMERATOR
+#define NODE(NodeKind) K##NodeKind,
+#include "ItaniumNodes.def"
};
/// Three-way bool to track a cached value. Unknown is possible if this node
/// has an unexpanded parameter pack below it that may affect this cache.
enum class Cache : unsigned char { Yes, No, Unknown, };
+ /// Operator precedence for expression nodes. Used to determine required
+ /// parens in expression emission.
+ enum class Prec {
+ Primary,
+ Postfix,
+ Unary,
+ Cast,
+ PtrMem,
+ Multiplicative,
+ Additive,
+ Shift,
+ Spaceship,
+ Relational,
+ Equality,
+ And,
+ Xor,
+ Ior,
+ AndIf,
+ OrIf,
+ Conditional,
+ Assign,
+ Comma,
+ Default,
+ };
+
private:
Kind K;
+ Prec Precedence : 6;
+
// FIXME: Make these protected.
public:
/// Tracks if this node has a component on its right side, in which case we
/// need to call printRight.
- Cache RHSComponentCache;
+ Cache RHSComponentCache : 2;
/// Track if this node is a (possibly qualified) array type. This can affect
/// how we format the output string.
- Cache ArrayCache;
+ Cache ArrayCache : 2;
/// Track if this node is a (possibly qualified) function type. This can
/// affect how we format the output string.
- Cache FunctionCache;
+ Cache FunctionCache : 2;
public:
- Node(Kind K_, Cache RHSComponentCache_ = Cache::No,
- Cache ArrayCache_ = Cache::No, Cache FunctionCache_ = Cache::No)
- : K(K_), RHSComponentCache(RHSComponentCache_), ArrayCache(ArrayCache_),
- FunctionCache(FunctionCache_) {}
+ Node(Kind K_, Prec Precedence_ = Prec::Primary,
+ Cache RHSComponentCache_ = Cache::No, Cache ArrayCache_ = Cache::No,
+ Cache FunctionCache_ = Cache::No)
+ : K(K_), Precedence(Precedence_), RHSComponentCache(RHSComponentCache_),
+ ArrayCache(ArrayCache_), FunctionCache(FunctionCache_) {}
+ Node(Kind K_, Cache RHSComponentCache_, Cache ArrayCache_ = Cache::No,
+ Cache FunctionCache_ = Cache::No)
+ : Node(K_, Prec::Primary, RHSComponentCache_, ArrayCache_,
+ FunctionCache_) {}
/// Visit the most-derived object corresponding to this object.
template<typename Fn> void visit(Fn F) const;
@@ -299,6 +247,8 @@ public:
Kind getKind() const { return K; }
+ Prec getPrecedence() const { return Precedence; }
+
virtual bool hasRHSComponentSlow(OutputBuffer &) const { return false; }
virtual bool hasArraySlow(OutputBuffer &) const { return false; }
virtual bool hasFunctionSlow(OutputBuffer &) const { return false; }
@@ -307,6 +257,19 @@ public:
// get at a node that actually represents some concrete syntax.
virtual const Node *getSyntaxNode(OutputBuffer &) const { return this; }
+ // Print this node as an expression operand, surrounding it in parentheses if
+ // its precedence is [Strictly] weaker than P.
+ void printAsOperand(OutputBuffer &OB, Prec P = Prec::Default,
+ bool StrictlyWorse = false) const {
+ bool Paren =
+ unsigned(getPrecedence()) >= unsigned(P) + unsigned(StrictlyWorse);
+ if (Paren)
+ OB.printOpen();
+ print(OB);
+ if (Paren)
+ OB.printClose();
+ }
+
void print(OutputBuffer &OB) const {
printLeft(OB);
if (RHSComponentCache != Cache::No)
@@ -356,7 +319,7 @@ public:
if (!FirstElement)
OB += ", ";
size_t AfterComma = OB.getCurrentPosition();
- Elements[Idx]->print(OB);
+ Elements[Idx]->printAsOperand(OB, Node::Prec::Comma);
// Elements[Idx] is an empty parameter pack expansion, we should erase the
// comma we just printed.
@@ -494,7 +457,7 @@ class PostfixQualifiedType final : public Node {
const StringView Postfix;
public:
- PostfixQualifiedType(Node *Ty_, StringView Postfix_)
+ PostfixQualifiedType(const Node *Ty_, StringView Postfix_)
: Node(KPostfixQualifiedType), Ty(Ty_), Postfix(Postfix_) {}
template<typename Fn> void match(Fn F) const { F(Ty, Postfix); }
@@ -519,6 +482,26 @@ public:
void printLeft(OutputBuffer &OB) const override { OB += Name; }
};
+class BitIntType final : public Node {
+ const Node *Size;
+ bool Signed;
+
+public:
+ BitIntType(const Node *Size_, bool Signed_)
+ : Node(KBitIntType), Size(Size_), Signed(Signed_) {}
+
+ template <typename Fn> void match(Fn F) const { F(Size, Signed); }
+
+ void printLeft(OutputBuffer &OB) const override {
+ if (!Signed)
+ OB += "unsigned ";
+ OB += "_BitInt";
+ OB.printOpen();
+ Size->printAsOperand(OB);
+ OB.printClose();
+ }
+};
+
class ElaboratedTypeSpefType : public Node {
StringView Kind;
Node *Child;
@@ -693,7 +676,7 @@ public:
void printLeft(OutputBuffer &OB) const override {
if (Printing)
return;
- SwapAndRestore<bool> SavePrinting(Printing, true);
+ ScopedOverride<bool> SavePrinting(Printing, true);
std::pair<ReferenceKind, const Node *> Collapsed = collapse(OB);
if (!Collapsed.second)
return;
@@ -708,7 +691,7 @@ public:
void printRight(OutputBuffer &OB) const override {
if (Printing)
return;
- SwapAndRestore<bool> SavePrinting(Printing, true);
+ ScopedOverride<bool> SavePrinting(Printing, true);
std::pair<ReferenceKind, const Node *> Collapsed = collapse(OB);
if (!Collapsed.second)
return;
@@ -815,9 +798,9 @@ public:
}
void printRight(OutputBuffer &OB) const override {
- OB += "(";
+ OB.printOpen();
Params.printWithComma(OB);
- OB += ")";
+ OB.printClose();
Ret->printRight(OB);
if (CVQuals & QualConst)
@@ -847,9 +830,10 @@ public:
template<typename Fn> void match(Fn F) const { F(E); }
void printLeft(OutputBuffer &OB) const override {
- OB += "noexcept(";
- E->print(OB);
- OB += ")";
+ OB += "noexcept";
+ OB.printOpen();
+ E->printAsOperand(OB);
+ OB.printClose();
}
};
@@ -862,9 +846,10 @@ public:
template<typename Fn> void match(Fn F) const { F(Types); }
void printLeft(OutputBuffer &OB) const override {
- OB += "throw(";
+ OB += "throw";
+ OB.printOpen();
Types.printWithComma(OB);
- OB += ')';
+ OB.printClose();
}
};
@@ -910,9 +895,9 @@ public:
}
void printRight(OutputBuffer &OB) const override {
- OB += "(";
+ OB.printOpen();
Params.printWithComma(OB);
- OB += ")";
+ OB.printClose();
if (Ret)
Ret->printRight(OB);
@@ -1001,6 +986,46 @@ struct NestedName : Node {
}
};
+struct ModuleName : Node {
+ ModuleName *Parent;
+ Node *Name;
+ bool IsPartition;
+
+ ModuleName(ModuleName *Parent_, Node *Name_, bool IsPartition_ = false)
+ : Node(KModuleName), Parent(Parent_), Name(Name_),
+ IsPartition(IsPartition_) {}
+
+ template <typename Fn> void match(Fn F) const {
+ F(Parent, Name, IsPartition);
+ }
+
+ void printLeft(OutputBuffer &OB) const override {
+ if (Parent)
+ Parent->print(OB);
+ if (Parent || IsPartition)
+ OB += IsPartition ? ':' : '.';
+ Name->print(OB);
+ }
+};
+
+struct ModuleEntity : Node {
+ ModuleName *Module;
+ Node *Name;
+
+ ModuleEntity(ModuleName *Module_, Node *Name_)
+ : Node(KModuleEntity), Module(Module_), Name(Name_) {}
+
+ template <typename Fn> void match(Fn F) const { F(Module, Name); }
+
+ StringView getBaseName() const override { return Name->getBaseName(); }
+
+ void printLeft(OutputBuffer &OB) const override {
+ Name->print(OB);
+ OB += '@';
+ Module->print(OB);
+ }
+};
+
struct LocalName : Node {
Node *Encoding;
Node *Entity;
@@ -1042,9 +1067,8 @@ class VectorType final : public Node {
const Node *Dimension;
public:
- VectorType(const Node *BaseType_, Node *Dimension_)
- : Node(KVectorType), BaseType(BaseType_),
- Dimension(Dimension_) {}
+ VectorType(const Node *BaseType_, const Node *Dimension_)
+ : Node(KVectorType), BaseType(BaseType_), Dimension(Dimension_) {}
template<typename Fn> void match(Fn F) const { F(BaseType, Dimension); }
@@ -1176,6 +1200,7 @@ public:
template<typename Fn> void match(Fn F) const { F(Name, Params); }
void printLeft(OutputBuffer &OB) const override {
+ ScopedOverride<unsigned> LT(OB.GtIsGt, 0);
OB += "template<";
Params.printWithComma(OB);
OB += "> typename ";
@@ -1311,8 +1336,8 @@ public:
void printLeft(OutputBuffer &OB) const override {
constexpr unsigned Max = std::numeric_limits<unsigned>::max();
- SwapAndRestore<unsigned> SavePackIdx(OB.CurrentPackIndex, Max);
- SwapAndRestore<unsigned> SavePackMax(OB.CurrentPackMax, Max);
+ ScopedOverride<unsigned> SavePackIdx(OB.CurrentPackIndex, Max);
+ ScopedOverride<unsigned> SavePackMax(OB.CurrentPackMax, Max);
size_t StreamPos = OB.getCurrentPosition();
// Print the first element in the pack. If Child contains a ParameterPack,
@@ -1353,10 +1378,9 @@ public:
NodeArray getParams() { return Params; }
void printLeft(OutputBuffer &OB) const override {
+ ScopedOverride<unsigned> LT(OB.GtIsGt, 0);
OB += "<";
Params.printWithComma(OB);
- if (OB.back() == '>')
- OB += " ";
OB += ">";
}
};
@@ -1402,38 +1426,38 @@ struct ForwardTemplateReference : Node {
bool hasRHSComponentSlow(OutputBuffer &OB) const override {
if (Printing)
return false;
- SwapAndRestore<bool> SavePrinting(Printing, true);
+ ScopedOverride<bool> SavePrinting(Printing, true);
return Ref->hasRHSComponent(OB);
}
bool hasArraySlow(OutputBuffer &OB) const override {
if (Printing)
return false;
- SwapAndRestore<bool> SavePrinting(Printing, true);
+ ScopedOverride<bool> SavePrinting(Printing, true);
return Ref->hasArray(OB);
}
bool hasFunctionSlow(OutputBuffer &OB) const override {
if (Printing)
return false;
- SwapAndRestore<bool> SavePrinting(Printing, true);
+ ScopedOverride<bool> SavePrinting(Printing, true);
return Ref->hasFunction(OB);
}
const Node *getSyntaxNode(OutputBuffer &OB) const override {
if (Printing)
return this;
- SwapAndRestore<bool> SavePrinting(Printing, true);
+ ScopedOverride<bool> SavePrinting(Printing, true);
return Ref->getSyntaxNode(OB);
}
void printLeft(OutputBuffer &OB) const override {
if (Printing)
return;
- SwapAndRestore<bool> SavePrinting(Printing, true);
+ ScopedOverride<bool> SavePrinting(Printing, true);
Ref->printLeft(OB);
}
void printRight(OutputBuffer &OB) const override {
if (Printing)
return;
- SwapAndRestore<bool> SavePrinting(Printing, true);
+ ScopedOverride<bool> SavePrinting(Printing, true);
Ref->printRight(OB);
}
};
@@ -1473,21 +1497,6 @@ public:
}
};
-struct StdQualifiedName : Node {
- Node *Child;
-
- StdQualifiedName(Node *Child_) : Node(KStdQualifiedName), Child(Child_) {}
-
- template<typename Fn> void match(Fn F) const { F(Child); }
-
- StringView getBaseName() const override { return Child->getBaseName(); }
-
- void printLeft(OutputBuffer &OB) const override {
- OB += "std::";
- Child->print(OB);
- }
-};
-
enum class SpecialSubKind {
allocator,
basic_string,
@@ -1497,15 +1506,25 @@ enum class SpecialSubKind {
iostream,
};
-class ExpandedSpecialSubstitution final : public Node {
+class SpecialSubstitution;
+class ExpandedSpecialSubstitution : public Node {
+protected:
SpecialSubKind SSK;
+ ExpandedSpecialSubstitution(SpecialSubKind SSK_, Kind K_)
+ : Node(K_), SSK(SSK_) {}
public:
ExpandedSpecialSubstitution(SpecialSubKind SSK_)
- : Node(KExpandedSpecialSubstitution), SSK(SSK_) {}
+ : ExpandedSpecialSubstitution(SSK_, KExpandedSpecialSubstitution) {}
+ inline ExpandedSpecialSubstitution(SpecialSubstitution const *);
template<typename Fn> void match(Fn F) const { F(SSK); }
+protected:
+ bool isInstantiation() const {
+ return unsigned(SSK) >= unsigned(SpecialSubKind::string);
+ }
+
StringView getBaseName() const override {
switch (SSK) {
case SpecialSubKind::allocator:
@@ -1524,82 +1543,44 @@ public:
DEMANGLE_UNREACHABLE;
}
+private:
void printLeft(OutputBuffer &OB) const override {
- switch (SSK) {
- case SpecialSubKind::allocator:
- OB += "std::allocator";
- break;
- case SpecialSubKind::basic_string:
- OB += "std::basic_string";
- break;
- case SpecialSubKind::string:
- OB += "std::basic_string<char, std::char_traits<char>, "
- "std::allocator<char> >";
- break;
- case SpecialSubKind::istream:
- OB += "std::basic_istream<char, std::char_traits<char> >";
- break;
- case SpecialSubKind::ostream:
- OB += "std::basic_ostream<char, std::char_traits<char> >";
- break;
- case SpecialSubKind::iostream:
- OB += "std::basic_iostream<char, std::char_traits<char> >";
- break;
+ OB << "std::" << getBaseName();
+ if (isInstantiation()) {
+ OB << "<char, std::char_traits<char>";
+ if (SSK == SpecialSubKind::string)
+ OB << ", std::allocator<char>";
+ OB << ">";
}
}
};
-class SpecialSubstitution final : public Node {
+class SpecialSubstitution final : public ExpandedSpecialSubstitution {
public:
- SpecialSubKind SSK;
-
SpecialSubstitution(SpecialSubKind SSK_)
- : Node(KSpecialSubstitution), SSK(SSK_) {}
+ : ExpandedSpecialSubstitution(SSK_, KSpecialSubstitution) {}
template<typename Fn> void match(Fn F) const { F(SSK); }
StringView getBaseName() const override {
- switch (SSK) {
- case SpecialSubKind::allocator:
- return StringView("allocator");
- case SpecialSubKind::basic_string:
- return StringView("basic_string");
- case SpecialSubKind::string:
- return StringView("string");
- case SpecialSubKind::istream:
- return StringView("istream");
- case SpecialSubKind::ostream:
- return StringView("ostream");
- case SpecialSubKind::iostream:
- return StringView("iostream");
+ auto SV = ExpandedSpecialSubstitution::getBaseName ();
+ if (isInstantiation()) {
+ // The instantiations are typedefs that drop the "basic_" prefix.
+ assert(SV.startsWith("basic_"));
+ SV = SV.dropFront(sizeof("basic_") - 1);
}
- DEMANGLE_UNREACHABLE;
+ return SV;
}
void printLeft(OutputBuffer &OB) const override {
- switch (SSK) {
- case SpecialSubKind::allocator:
- OB += "std::allocator";
- break;
- case SpecialSubKind::basic_string:
- OB += "std::basic_string";
- break;
- case SpecialSubKind::string:
- OB += "std::string";
- break;
- case SpecialSubKind::istream:
- OB += "std::istream";
- break;
- case SpecialSubKind::ostream:
- OB += "std::ostream";
- break;
- case SpecialSubKind::iostream:
- OB += "std::iostream";
- break;
- }
+ OB << "std::" << getBaseName();
}
};
+inline ExpandedSpecialSubstitution::ExpandedSpecialSubstitution(
+ SpecialSubstitution const *SS)
+ : ExpandedSpecialSubstitution(SS->SSK) {}
+
class CtorDtorName final : public Node {
const Node *Basename;
const bool IsDtor;
@@ -1665,13 +1646,14 @@ public:
void printDeclarator(OutputBuffer &OB) const {
if (!TemplateParams.empty()) {
+ ScopedOverride<unsigned> LT(OB.GtIsGt, 0);
OB += "<";
TemplateParams.printWithComma(OB);
OB += ">";
}
- OB += "(";
+ OB.printOpen();
Params.printWithComma(OB);
- OB += ")";
+ OB.printClose();
}
void printLeft(OutputBuffer &OB) const override {
@@ -1691,9 +1673,9 @@ public:
template<typename Fn> void match(Fn F) const { F(Bindings); }
void printLeft(OutputBuffer &OB) const override {
- OB += '[';
+ OB.printOpen('[');
Bindings.printWithComma(OB);
- OB += ']';
+ OB.printClose(']');
}
};
@@ -1705,28 +1687,31 @@ class BinaryExpr : public Node {
const Node *RHS;
public:
- BinaryExpr(const Node *LHS_, StringView InfixOperator_, const Node *RHS_)
- : Node(KBinaryExpr), LHS(LHS_), InfixOperator(InfixOperator_), RHS(RHS_) {
- }
+ BinaryExpr(const Node *LHS_, StringView InfixOperator_, const Node *RHS_,
+ Prec Prec_)
+ : Node(KBinaryExpr, Prec_), LHS(LHS_), InfixOperator(InfixOperator_),
+ RHS(RHS_) {}
- template<typename Fn> void match(Fn F) const { F(LHS, InfixOperator, RHS); }
+ template <typename Fn> void match(Fn F) const {
+ F(LHS, InfixOperator, RHS, getPrecedence());
+ }
void printLeft(OutputBuffer &OB) const override {
- // might be a template argument expression, then we need to disambiguate
- // with parens.
- if (InfixOperator == ">")
- OB += "(";
-
- OB += "(";
- LHS->print(OB);
- OB += ") ";
+ bool ParenAll = OB.isGtInsideTemplateArgs() &&
+ (InfixOperator == ">" || InfixOperator == ">>");
+ if (ParenAll)
+ OB.printOpen();
+ // Assignment is right associative, with special LHS precedence.
+ bool IsAssign = getPrecedence() == Prec::Assign;
+ LHS->printAsOperand(OB, IsAssign ? Prec::OrIf : getPrecedence(), !IsAssign);
+ // No space before comma operator
+ if (!(InfixOperator == ","))
+ OB += " ";
OB += InfixOperator;
- OB += " (";
- RHS->print(OB);
- OB += ")";
-
- if (InfixOperator == ">")
- OB += ")";
+ OB += " ";
+ RHS->printAsOperand(OB, getPrecedence(), IsAssign);
+ if (ParenAll)
+ OB.printClose();
}
};
@@ -1735,17 +1720,18 @@ class ArraySubscriptExpr : public Node {
const Node *Op2;
public:
- ArraySubscriptExpr(const Node *Op1_, const Node *Op2_)
- : Node(KArraySubscriptExpr), Op1(Op1_), Op2(Op2_) {}
+ ArraySubscriptExpr(const Node *Op1_, const Node *Op2_, Prec Prec_)
+ : Node(KArraySubscriptExpr, Prec_), Op1(Op1_), Op2(Op2_) {}
- template<typename Fn> void match(Fn F) const { F(Op1, Op2); }
+ template <typename Fn> void match(Fn F) const {
+ F(Op1, Op2, getPrecedence());
+ }
void printLeft(OutputBuffer &OB) const override {
- OB += "(";
- Op1->print(OB);
- OB += ")[";
- Op2->print(OB);
- OB += "]";
+ Op1->printAsOperand(OB, getPrecedence());
+ OB.printOpen('[');
+ Op2->printAsOperand(OB);
+ OB.printClose(']');
}
};
@@ -1754,15 +1740,15 @@ class PostfixExpr : public Node {
const StringView Operator;
public:
- PostfixExpr(const Node *Child_, StringView Operator_)
- : Node(KPostfixExpr), Child(Child_), Operator(Operator_) {}
+ PostfixExpr(const Node *Child_, StringView Operator_, Prec Prec_)
+ : Node(KPostfixExpr, Prec_), Child(Child_), Operator(Operator_) {}
- template<typename Fn> void match(Fn F) const { F(Child, Operator); }
+ template <typename Fn> void match(Fn F) const {
+ F(Child, Operator, getPrecedence());
+ }
void printLeft(OutputBuffer &OB) const override {
- OB += "(";
- Child->print(OB);
- OB += ")";
+ Child->printAsOperand(OB, getPrecedence(), true);
OB += Operator;
}
};
@@ -1773,19 +1759,20 @@ class ConditionalExpr : public Node {
const Node *Else;
public:
- ConditionalExpr(const Node *Cond_, const Node *Then_, const Node *Else_)
- : Node(KConditionalExpr), Cond(Cond_), Then(Then_), Else(Else_) {}
+ ConditionalExpr(const Node *Cond_, const Node *Then_, const Node *Else_,
+ Prec Prec_)
+ : Node(KConditionalExpr, Prec_), Cond(Cond_), Then(Then_), Else(Else_) {}
- template<typename Fn> void match(Fn F) const { F(Cond, Then, Else); }
+ template <typename Fn> void match(Fn F) const {
+ F(Cond, Then, Else, getPrecedence());
+ }
void printLeft(OutputBuffer &OB) const override {
- OB += "(";
- Cond->print(OB);
- OB += ") ? (";
- Then->print(OB);
- OB += ") : (";
- Else->print(OB);
- OB += ")";
+ Cond->printAsOperand(OB, getPrecedence());
+ OB += " ? ";
+ Then->printAsOperand(OB);
+ OB += " : ";
+ Else->printAsOperand(OB, Prec::Assign, true);
}
};
@@ -1795,15 +1782,17 @@ class MemberExpr : public Node {
const Node *RHS;
public:
- MemberExpr(const Node *LHS_, StringView Kind_, const Node *RHS_)
- : Node(KMemberExpr), LHS(LHS_), Kind(Kind_), RHS(RHS_) {}
+ MemberExpr(const Node *LHS_, StringView Kind_, const Node *RHS_, Prec Prec_)
+ : Node(KMemberExpr, Prec_), LHS(LHS_), Kind(Kind_), RHS(RHS_) {}
- template<typename Fn> void match(Fn F) const { F(LHS, Kind, RHS); }
+ template <typename Fn> void match(Fn F) const {
+ F(LHS, Kind, RHS, getPrecedence());
+ }
void printLeft(OutputBuffer &OB) const override {
- LHS->print(OB);
+ LHS->printAsOperand(OB, getPrecedence(), true);
OB += Kind;
- RHS->print(OB);
+ RHS->printAsOperand(OB, getPrecedence(), false);
}
};
@@ -1847,15 +1836,19 @@ class EnclosingExpr : public Node {
const StringView Postfix;
public:
- EnclosingExpr(StringView Prefix_, Node *Infix_, StringView Postfix_)
- : Node(KEnclosingExpr), Prefix(Prefix_), Infix(Infix_),
- Postfix(Postfix_) {}
+ EnclosingExpr(StringView Prefix_, const Node *Infix_,
+ Prec Prec_ = Prec::Primary)
+ : Node(KEnclosingExpr, Prec_), Prefix(Prefix_), Infix(Infix_) {}
- template<typename Fn> void match(Fn F) const { F(Prefix, Infix, Postfix); }
+ template <typename Fn> void match(Fn F) const {
+ F(Prefix, Infix, getPrecedence());
+ }
void printLeft(OutputBuffer &OB) const override {
OB += Prefix;
+ OB.printOpen();
Infix->print(OB);
+ OB.printClose();
OB += Postfix;
}
};
@@ -1867,18 +1860,24 @@ class CastExpr : public Node {
const Node *From;
public:
- CastExpr(StringView CastKind_, const Node *To_, const Node *From_)
- : Node(KCastExpr), CastKind(CastKind_), To(To_), From(From_) {}
+ CastExpr(StringView CastKind_, const Node *To_, const Node *From_, Prec Prec_)
+ : Node(KCastExpr, Prec_), CastKind(CastKind_), To(To_), From(From_) {}
- template<typename Fn> void match(Fn F) const { F(CastKind, To, From); }
+ template <typename Fn> void match(Fn F) const {
+ F(CastKind, To, From, getPrecedence());
+ }
void printLeft(OutputBuffer &OB) const override {
OB += CastKind;
- OB += "<";
- To->printLeft(OB);
- OB += ">(";
- From->printLeft(OB);
- OB += ")";
+ {
+ ScopedOverride<unsigned> LT(OB.GtIsGt, 0);
+ OB += "<";
+ To->printLeft(OB);
+ OB += ">";
+ }
+ OB.printOpen();
+ From->printAsOperand(OB);
+ OB.printClose();
}
};
@@ -1892,10 +1891,11 @@ public:
template<typename Fn> void match(Fn F) const { F(Pack); }
void printLeft(OutputBuffer &OB) const override {
- OB += "sizeof...(";
+ OB += "sizeof...";
+ OB.printOpen();
ParameterPackExpansion PPE(Pack);
PPE.printLeft(OB);
- OB += ")";
+ OB.printClose();
}
};
@@ -1904,16 +1904,18 @@ class CallExpr : public Node {
NodeArray Args;
public:
- CallExpr(const Node *Callee_, NodeArray Args_)
- : Node(KCallExpr), Callee(Callee_), Args(Args_) {}
+ CallExpr(const Node *Callee_, NodeArray Args_, Prec Prec_)
+ : Node(KCallExpr, Prec_), Callee(Callee_), Args(Args_) {}
- template<typename Fn> void match(Fn F) const { F(Callee, Args); }
+ template <typename Fn> void match(Fn F) const {
+ F(Callee, Args, getPrecedence());
+ }
void printLeft(OutputBuffer &OB) const override {
Callee->print(OB);
- OB += "(";
+ OB.printOpen();
Args.printWithComma(OB);
- OB += ")";
+ OB.printClose();
}
};
@@ -1926,31 +1928,31 @@ class NewExpr : public Node {
bool IsArray; // new[] ?
public:
NewExpr(NodeArray ExprList_, Node *Type_, NodeArray InitList_, bool IsGlobal_,
- bool IsArray_)
- : Node(KNewExpr), ExprList(ExprList_), Type(Type_), InitList(InitList_),
- IsGlobal(IsGlobal_), IsArray(IsArray_) {}
+ bool IsArray_, Prec Prec_)
+ : Node(KNewExpr, Prec_), ExprList(ExprList_), Type(Type_),
+ InitList(InitList_), IsGlobal(IsGlobal_), IsArray(IsArray_) {}
template<typename Fn> void match(Fn F) const {
- F(ExprList, Type, InitList, IsGlobal, IsArray);
+ F(ExprList, Type, InitList, IsGlobal, IsArray, getPrecedence());
}
void printLeft(OutputBuffer &OB) const override {
if (IsGlobal)
- OB += "::operator ";
+ OB += "::";
OB += "new";
if (IsArray)
OB += "[]";
- OB += ' ';
if (!ExprList.empty()) {
- OB += "(";
+ OB.printOpen();
ExprList.printWithComma(OB);
- OB += ")";
+ OB.printClose();
}
+ OB += " ";
Type->print(OB);
if (!InitList.empty()) {
- OB += "(";
+ OB.printOpen();
InitList.printWithComma(OB);
- OB += ")";
+ OB.printClose();
}
}
};
@@ -1961,17 +1963,21 @@ class DeleteExpr : public Node {
bool IsArray;
public:
- DeleteExpr(Node *Op_, bool IsGlobal_, bool IsArray_)
- : Node(KDeleteExpr), Op(Op_), IsGlobal(IsGlobal_), IsArray(IsArray_) {}
+ DeleteExpr(Node *Op_, bool IsGlobal_, bool IsArray_, Prec Prec_)
+ : Node(KDeleteExpr, Prec_), Op(Op_), IsGlobal(IsGlobal_),
+ IsArray(IsArray_) {}
- template<typename Fn> void match(Fn F) const { F(Op, IsGlobal, IsArray); }
+ template <typename Fn> void match(Fn F) const {
+ F(Op, IsGlobal, IsArray, getPrecedence());
+ }
void printLeft(OutputBuffer &OB) const override {
if (IsGlobal)
OB += "::";
OB += "delete";
if (IsArray)
- OB += "[] ";
+ OB += "[]";
+ OB += ' ';
Op->print(OB);
}
};
@@ -1981,16 +1987,16 @@ class PrefixExpr : public Node {
Node *Child;
public:
- PrefixExpr(StringView Prefix_, Node *Child_)
- : Node(KPrefixExpr), Prefix(Prefix_), Child(Child_) {}
+ PrefixExpr(StringView Prefix_, Node *Child_, Prec Prec_)
+ : Node(KPrefixExpr, Prec_), Prefix(Prefix_), Child(Child_) {}
- template<typename Fn> void match(Fn F) const { F(Prefix, Child); }
+ template <typename Fn> void match(Fn F) const {
+ F(Prefix, Child, getPrecedence());
+ }
void printLeft(OutputBuffer &OB) const override {
OB += Prefix;
- OB += "(";
- Child->print(OB);
- OB += ")";
+ Child->printAsOperand(OB, getPrecedence());
}
};
@@ -2013,17 +2019,20 @@ class ConversionExpr : public Node {
NodeArray Expressions;
public:
- ConversionExpr(const Node *Type_, NodeArray Expressions_)
- : Node(KConversionExpr), Type(Type_), Expressions(Expressions_) {}
+ ConversionExpr(const Node *Type_, NodeArray Expressions_, Prec Prec_)
+ : Node(KConversionExpr, Prec_), Type(Type_), Expressions(Expressions_) {}
- template<typename Fn> void match(Fn F) const { F(Type, Expressions); }
+ template <typename Fn> void match(Fn F) const {
+ F(Type, Expressions, getPrecedence());
+ }
void printLeft(OutputBuffer &OB) const override {
- OB += "(";
+ OB.printOpen();
Type->print(OB);
- OB += ")(";
+ OB.printClose();
+ OB.printOpen();
Expressions.printWithComma(OB);
- OB += ")";
+ OB.printClose();
}
};
@@ -2034,18 +2043,21 @@ class PointerToMemberConversionExpr : public Node {
public:
PointerToMemberConversionExpr(const Node *Type_, const Node *SubExpr_,
- StringView Offset_)
- : Node(KPointerToMemberConversionExpr), Type(Type_), SubExpr(SubExpr_),
- Offset(Offset_) {}
+ StringView Offset_, Prec Prec_)
+ : Node(KPointerToMemberConversionExpr, Prec_), Type(Type_),
+ SubExpr(SubExpr_), Offset(Offset_) {}
- template<typename Fn> void match(Fn F) const { F(Type, SubExpr, Offset); }
+ template <typename Fn> void match(Fn F) const {
+ F(Type, SubExpr, Offset, getPrecedence());
+ }
void printLeft(OutputBuffer &OB) const override {
- OB += "(";
+ OB.printOpen();
Type->print(OB);
- OB += ")(";
+ OB.printClose();
+ OB.printOpen();
SubExpr->print(OB);
- OB += ")";
+ OB.printClose();
}
};
@@ -2131,41 +2143,33 @@ public:
void printLeft(OutputBuffer &OB) const override {
auto PrintPack = [&] {
- OB += '(';
+ OB.printOpen();
ParameterPackExpansion(Pack).print(OB);
- OB += ')';
+ OB.printClose();
};
- OB += '(';
-
- if (IsLeftFold) {
- // init op ... op pack
- if (Init != nullptr) {
- Init->print(OB);
- OB += ' ';
- OB += OperatorName;
- OB += ' ';
- }
- // ... op pack
- OB += "... ";
- OB += OperatorName;
- OB += ' ';
- PrintPack();
- } else { // !IsLeftFold
- // pack op ...
- PrintPack();
- OB += ' ';
- OB += OperatorName;
- OB += " ...";
- // pack op ... op init
- if (Init != nullptr) {
- OB += ' ';
- OB += OperatorName;
- OB += ' ';
- Init->print(OB);
- }
+ OB.printOpen();
+ // Either '[init op ]... op pack' or 'pack op ...[ op init]'
+ // Refactored to '[(init|pack) op ]...[ op (pack|init)]'
+ // Fold expr operands are cast-expressions
+ if (!IsLeftFold || Init != nullptr) {
+ // '(init|pack) op '
+ if (IsLeftFold)
+ Init->printAsOperand(OB, Prec::Cast, true);
+ else
+ PrintPack();
+ OB << " " << OperatorName << " ";
+ }
+ OB << "...";
+ if (IsLeftFold || Init != nullptr) {
+ // ' op (init|pack)'
+ OB << " " << OperatorName << " ";
+ if (IsLeftFold)
+ PrintPack();
+ else
+ Init->printAsOperand(OB, Prec::Cast, true);
}
- OB += ')';
+ OB.printClose();
}
};
@@ -2239,9 +2243,9 @@ public:
template<typename Fn> void match(Fn F) const { F(Ty, Integer); }
void printLeft(OutputBuffer &OB) const override {
- OB << "(";
+ OB.printOpen();
Ty->print(OB);
- OB << ")";
+ OB.printClose();
if (Integer[0] == 'n')
OB << "-" << Integer.dropFront(1);
@@ -2262,13 +2266,13 @@ public:
void printLeft(OutputBuffer &OB) const override {
if (Type.size() > 3) {
- OB += "(";
+ OB.printOpen();
OB += Type;
- OB += ")";
+ OB.printClose();
}
if (Value[0] == 'n') {
- OB += "-";
+ OB += '-';
OB += Value.dropFront(1);
} else
OB += Value;
@@ -2344,24 +2348,22 @@ using LongDoubleLiteral = FloatLiteralImpl<long double>;
template<typename Fn>
void Node::visit(Fn F) const {
switch (K) {
-#define CASE(X) case K ## X: return F(static_cast<const X*>(this));
- FOR_EACH_NODE_KIND(CASE)
-#undef CASE
+#define NODE(X) \
+ case K##X: \
+ return F(static_cast<const X *>(this));
+#include "ItaniumNodes.def"
}
assert(0 && "unknown mangling node kind");
}
/// Determine the kind of a node from its type.
template<typename NodeT> struct NodeKind;
-#define SPECIALIZATION(X) \
- template<> struct NodeKind<X> { \
- static constexpr Node::Kind Kind = Node::K##X; \
- static constexpr const char *name() { return #X; } \
+#define NODE(X) \
+ template <> struct NodeKind<X> { \
+ static constexpr Node::Kind Kind = Node::K##X; \
+ static constexpr const char *name() { return #X; } \
};
-FOR_EACH_NODE_KIND(SPECIALIZATION)
-#undef SPECIALIZATION
-
-#undef FOR_EACH_NODE_KIND
+#include "ItaniumNodes.def"
template <typename Derived, typename Alloc> struct AbstractManglingParser {
const char *First;
@@ -2499,17 +2501,16 @@ template <typename Derived, typename Alloc> struct AbstractManglingParser {
/// Parse the <expr> production.
Node *parseExpr();
- Node *parsePrefixExpr(StringView Kind);
- Node *parseBinaryExpr(StringView Kind);
+ Node *parsePrefixExpr(StringView Kind, Node::Prec Prec);
+ Node *parseBinaryExpr(StringView Kind, Node::Prec Prec);
Node *parseIntegerLiteral(StringView Lit);
Node *parseExprPrimary();
template <class Float> Node *parseFloatingLiteral();
Node *parseFunctionParam();
- Node *parseNewExpr();
Node *parseConversionExpr();
Node *parseBracedExpr();
Node *parseFoldExpr();
- Node *parsePointerToMemberConversionExpr();
+ Node *parsePointerToMemberConversionExpr(Node::Prec Prec);
Node *parseSubobjectExpr();
/// Parse the <type> production.
@@ -2557,17 +2558,80 @@ template <typename Derived, typename Alloc> struct AbstractManglingParser {
Node *parseName(NameState *State = nullptr);
Node *parseLocalName(NameState *State);
Node *parseOperatorName(NameState *State);
- Node *parseUnqualifiedName(NameState *State);
+ bool parseModuleNameOpt(ModuleName *&Module);
+ Node *parseUnqualifiedName(NameState *State, Node *Scope, ModuleName *Module);
Node *parseUnnamedTypeName(NameState *State);
Node *parseSourceName(NameState *State);
- Node *parseUnscopedName(NameState *State);
+ Node *parseUnscopedName(NameState *State, bool *isSubstName);
Node *parseNestedName(NameState *State);
Node *parseCtorDtorName(Node *&SoFar, NameState *State);
Node *parseAbiTags(Node *N);
+ struct OperatorInfo {
+ enum OIKind : unsigned char {
+ Prefix, // Prefix unary: @ expr
+ Postfix, // Postfix unary: expr @
+ Binary, // Binary: lhs @ rhs
+ Array, // Array index: lhs [ rhs ]
+ Member, // Member access: lhs @ rhs
+ New, // New
+ Del, // Delete
+ Call, // Function call: expr (expr*)
+ CCast, // C cast: (type)expr
+ Conditional, // Conditional: expr ? expr : expr
+ NameOnly, // Overload only, not allowed in expression.
+ // Below do not have operator names
+ NamedCast, // Named cast, @<type>(expr)
+ OfIdOp, // alignof, sizeof, typeid
+
+ Unnameable = NamedCast,
+ };
+ char Enc[2]; // Encoding
+ OIKind Kind; // Kind of operator
+ bool Flag : 1; // Entry-specific flag
+ Node::Prec Prec : 7; // Precedence
+ const char *Name; // Spelling
+
+ public:
+ constexpr OperatorInfo(const char (&E)[3], OIKind K, bool F, Node::Prec P,
+ const char *N)
+ : Enc{E[0], E[1]}, Kind{K}, Flag{F}, Prec{P}, Name{N} {}
+
+ public:
+ bool operator<(const OperatorInfo &Other) const {
+ return *this < Other.Enc;
+ }
+ bool operator<(const char *Peek) const {
+ return Enc[0] < Peek[0] || (Enc[0] == Peek[0] && Enc[1] < Peek[1]);
+ }
+ bool operator==(const char *Peek) const {
+ return Enc[0] == Peek[0] && Enc[1] == Peek[1];
+ }
+ bool operator!=(const char *Peek) const { return !this->operator==(Peek); }
+
+ public:
+ StringView getSymbol() const {
+ StringView Res = Name;
+ if (Kind < Unnameable) {
+ assert(Res.startsWith("operator") &&
+ "operator name does not start with 'operator'");
+ Res = Res.dropFront(sizeof("operator") - 1);
+ Res.consumeFront(' ');
+ }
+ return Res;
+ }
+ StringView getName() const { return Name; }
+ OIKind getKind() const { return Kind; }
+ bool getFlag() const { return Flag; }
+ Node::Prec getPrecedence() const { return Prec; }
+ };
+ static const OperatorInfo Ops[];
+ static const size_t NumOps;
+ const OperatorInfo *parseOperatorEncoding();
+
/// Parse the <unresolved-name> production.
- Node *parseUnresolvedName();
+ Node *parseUnresolvedName(bool Global);
Node *parseSimpleId();
Node *parseBaseUnresolvedName();
Node *parseUnresolvedType();
@@ -2588,26 +2652,16 @@ const char* parse_discriminator(const char* first, const char* last);
// ::= <substitution>
template <typename Derived, typename Alloc>
Node *AbstractManglingParser<Derived, Alloc>::parseName(NameState *State) {
- consumeIf('L'); // extension
-
if (look() == 'N')
return getDerived().parseNestedName(State);
if (look() == 'Z')
return getDerived().parseLocalName(State);
Node *Result = nullptr;
- bool IsSubst = look() == 'S' && look(1) != 't';
- if (IsSubst) {
- // A substitution must lead to:
- // ::= <unscoped-template-name> <template-args>
- Result = getDerived().parseSubstitution();
- } else {
- // An unscoped name can be one of:
- // ::= <unscoped-name>
- // ::= <unscoped-template-name> <template-args>
- Result = getDerived().parseUnscopedName(State);
- }
- if (Result == nullptr)
+ bool IsSubst = false;
+
+ Result = getDerived().parseUnscopedName(State, &IsSubst);
+ if (!Result)
return nullptr;
if (look() == 'I') {
@@ -2667,38 +2721,63 @@ Node *AbstractManglingParser<Derived, Alloc>::parseLocalName(NameState *State) {
// <unscoped-name> ::= <unqualified-name>
// ::= St <unqualified-name> # ::std::
-// extension ::= StL<unqualified-name>
+// [*] extension
template <typename Derived, typename Alloc>
Node *
-AbstractManglingParser<Derived, Alloc>::parseUnscopedName(NameState *State) {
- bool IsStd = consumeIf("St");
- if (IsStd)
- consumeIf('L');
+AbstractManglingParser<Derived, Alloc>::parseUnscopedName(NameState *State,
+ bool *IsSubst) {
- Node *Result = getDerived().parseUnqualifiedName(State);
- if (Result == nullptr)
- return nullptr;
- if (IsStd)
- Result = make<StdQualifiedName>(Result);
+ Node *Std = nullptr;
+ if (consumeIf("St")) {
+ Std = make<NameType>("std");
+ if (Std == nullptr)
+ return nullptr;
+ }
- return Result;
+ Node *Res = nullptr;
+ ModuleName *Module = nullptr;
+ if (look() == 'S') {
+ Node *S = getDerived().parseSubstitution();
+ if (!S)
+ return nullptr;
+ if (S->getKind() == Node::KModuleName)
+ Module = static_cast<ModuleName *>(S);
+ else if (IsSubst && Std == nullptr) {
+ Res = S;
+ *IsSubst = true;
+ } else {
+ return nullptr;
+ }
+ }
+
+ if (Res == nullptr || Std != nullptr) {
+ Res = getDerived().parseUnqualifiedName(State, Std, Module);
+ }
+
+ return Res;
}
-// <unqualified-name> ::= <operator-name> [abi-tags]
-// ::= <ctor-dtor-name>
-// ::= <source-name>
-// ::= <unnamed-type-name>
-// ::= DC <source-name>+ E # structured binding declaration
+// <unqualified-name> ::= [<module-name>] L? <operator-name> [<abi-tags>]
+// ::= [<module-name>] <ctor-dtor-name> [<abi-tags>]
+// ::= [<module-name>] L? <source-name> [<abi-tags>]
+// ::= [<module-name>] L? <unnamed-type-name> [<abi-tags>]
+// # structured binding declaration
+// ::= [<module-name>] L? DC <source-name>+ E
template <typename Derived, typename Alloc>
-Node *
-AbstractManglingParser<Derived, Alloc>::parseUnqualifiedName(NameState *State) {
- // <ctor-dtor-name>s are special-cased in parseNestedName().
+Node *AbstractManglingParser<Derived, Alloc>::parseUnqualifiedName(
+ NameState *State, Node *Scope, ModuleName *Module) {
+ if (getDerived().parseModuleNameOpt(Module))
+ return nullptr;
+
+ consumeIf('L');
+
Node *Result;
- if (look() == 'U')
- Result = getDerived().parseUnnamedTypeName(State);
- else if (look() >= '1' && look() <= '9')
+ if (look() >= '1' && look() <= '9') {
Result = getDerived().parseSourceName(State);
- else if (consumeIf("DC")) {
+ } else if (look() == 'U') {
+ Result = getDerived().parseUnnamedTypeName(State);
+ } else if (consumeIf("DC")) {
+ // Structured binding
size_t BindingsBegin = Names.size();
do {
Node *Binding = getDerived().parseSourceName(State);
@@ -2707,13 +2786,46 @@ AbstractManglingParser<Derived, Alloc>::parseUnqualifiedName(NameState *State) {
Names.push_back(Binding);
} while (!consumeIf('E'));
Result = make<StructuredBindingName>(popTrailingNodeArray(BindingsBegin));
- } else
+ } else if (look() == 'C' || look() == 'D') {
+ // A <ctor-dtor-name>.
+ if (Scope == nullptr || Module != nullptr)
+ return nullptr;
+ Result = getDerived().parseCtorDtorName(Scope, State);
+ } else {
Result = getDerived().parseOperatorName(State);
+ }
+
+ if (Result != nullptr && Module != nullptr)
+ Result = make<ModuleEntity>(Module, Result);
if (Result != nullptr)
Result = getDerived().parseAbiTags(Result);
+ if (Result != nullptr && Scope != nullptr)
+ Result = make<NestedName>(Scope, Result);
+
return Result;
}
+// <module-name> ::= <module-subname>
+// ::= <module-name> <module-subname>
+// ::= <substitution> # passed in by caller
+// <module-subname> ::= W <source-name>
+// ::= W P <source-name>
+template <typename Derived, typename Alloc>
+bool AbstractManglingParser<Derived, Alloc>::parseModuleNameOpt(
+ ModuleName *&Module) {
+ while (consumeIf('W')) {
+ bool IsPartition = consumeIf('P');
+ Node *Sub = getDerived().parseSourceName(nullptr);
+ if (!Sub)
+ return true;
+ Module =
+ static_cast<ModuleName *>(make<ModuleName>(Module, Sub, IsPartition));
+ Subs.push_back(Module);
+ }
+
+ return false;
+}
+
// <unnamed-type-name> ::= Ut [<nonnegative number>] _
// ::= <closure-type-name>
//
@@ -2735,7 +2847,7 @@ AbstractManglingParser<Derived, Alloc>::parseUnnamedTypeName(NameState *State) {
return make<UnnamedTypeName>(Count);
}
if (consumeIf("Ul")) {
- SwapAndRestore<size_t> SwapParams(ParsingLambdaParamsAtLevel,
+ ScopedOverride<size_t> SwapParams(ParsingLambdaParamsAtLevel,
TemplateParams.size());
ScopedTemplateParamList LambdaTemplateParams(this);
@@ -2813,97 +2925,124 @@ Node *AbstractManglingParser<Derived, Alloc>::parseSourceName(NameState *) {
return make<NameType>(Name);
}
-// <operator-name> ::= aa # &&
-// ::= ad # & (unary)
-// ::= an # &
-// ::= aN # &=
-// ::= aS # =
-// ::= cl # ()
-// ::= cm # ,
-// ::= co # ~
-// ::= cv <type> # (cast)
-// ::= da # delete[]
-// ::= de # * (unary)
-// ::= dl # delete
-// ::= dv # /
-// ::= dV # /=
-// ::= eo # ^
-// ::= eO # ^=
-// ::= eq # ==
-// ::= ge # >=
-// ::= gt # >
-// ::= ix # []
-// ::= le # <=
+// Operator encodings
+template <typename Derived, typename Alloc>
+const typename AbstractManglingParser<
+ Derived, Alloc>::OperatorInfo AbstractManglingParser<Derived,
+ Alloc>::Ops[] = {
+ // Keep ordered by encoding
+ {"aN", OperatorInfo::Binary, false, Node::Prec::Assign, "operator&="},
+ {"aS", OperatorInfo::Binary, false, Node::Prec::Assign, "operator="},
+ {"aa", OperatorInfo::Binary, false, Node::Prec::AndIf, "operator&&"},
+ {"ad", OperatorInfo::Prefix, false, Node::Prec::Unary, "operator&"},
+ {"an", OperatorInfo::Binary, false, Node::Prec::And, "operator&"},
+ {"at", OperatorInfo::OfIdOp, /*Type*/ true, Node::Prec::Unary, "alignof "},
+ {"aw", OperatorInfo::NameOnly, false, Node::Prec::Primary,
+ "operator co_await"},
+ {"az", OperatorInfo::OfIdOp, /*Type*/ false, Node::Prec::Unary, "alignof "},
+ {"cc", OperatorInfo::NamedCast, false, Node::Prec::Postfix, "const_cast"},
+ {"cl", OperatorInfo::Call, false, Node::Prec::Postfix, "operator()"},
+ {"cm", OperatorInfo::Binary, false, Node::Prec::Comma, "operator,"},
+ {"co", OperatorInfo::Prefix, false, Node::Prec::Unary, "operator~"},
+ {"cv", OperatorInfo::CCast, false, Node::Prec::Cast, "operator"}, // C Cast
+ {"dV", OperatorInfo::Binary, false, Node::Prec::Assign, "operator/="},
+ {"da", OperatorInfo::Del, /*Ary*/ true, Node::Prec::Unary,
+ "operator delete[]"},
+ {"dc", OperatorInfo::NamedCast, false, Node::Prec::Postfix, "dynamic_cast"},
+ {"de", OperatorInfo::Prefix, false, Node::Prec::Unary, "operator*"},
+ {"dl", OperatorInfo::Del, /*Ary*/ false, Node::Prec::Unary,
+ "operator delete"},
+ {"ds", OperatorInfo::Member, /*Named*/ false, Node::Prec::PtrMem,
+ "operator.*"},
+ {"dt", OperatorInfo::Member, /*Named*/ false, Node::Prec::Postfix,
+ "operator."},
+ {"dv", OperatorInfo::Binary, false, Node::Prec::Assign, "operator/"},
+ {"eO", OperatorInfo::Binary, false, Node::Prec::Assign, "operator^="},
+ {"eo", OperatorInfo::Binary, false, Node::Prec::Xor, "operator^"},
+ {"eq", OperatorInfo::Binary, false, Node::Prec::Equality, "operator=="},
+ {"ge", OperatorInfo::Binary, false, Node::Prec::Relational, "operator>="},
+ {"gt", OperatorInfo::Binary, false, Node::Prec::Relational, "operator>"},
+ {"ix", OperatorInfo::Array, false, Node::Prec::Postfix, "operator[]"},
+ {"lS", OperatorInfo::Binary, false, Node::Prec::Assign, "operator<<="},
+ {"le", OperatorInfo::Binary, false, Node::Prec::Relational, "operator<="},
+ {"ls", OperatorInfo::Binary, false, Node::Prec::Shift, "operator<<"},
+ {"lt", OperatorInfo::Binary, false, Node::Prec::Relational, "operator<"},
+ {"mI", OperatorInfo::Binary, false, Node::Prec::Assign, "operator-="},
+ {"mL", OperatorInfo::Binary, false, Node::Prec::Assign, "operator*="},
+ {"mi", OperatorInfo::Binary, false, Node::Prec::Additive, "operator-"},
+ {"ml", OperatorInfo::Binary, false, Node::Prec::Multiplicative,
+ "operator*"},
+ {"mm", OperatorInfo::Postfix, false, Node::Prec::Postfix, "operator--"},
+ {"na", OperatorInfo::New, /*Ary*/ true, Node::Prec::Unary,
+ "operator new[]"},
+ {"ne", OperatorInfo::Binary, false, Node::Prec::Equality, "operator!="},
+ {"ng", OperatorInfo::Prefix, false, Node::Prec::Unary, "operator-"},
+ {"nt", OperatorInfo::Prefix, false, Node::Prec::Unary, "operator!"},
+ {"nw", OperatorInfo::New, /*Ary*/ false, Node::Prec::Unary, "operator new"},
+ {"oR", OperatorInfo::Binary, false, Node::Prec::Assign, "operator|="},
+ {"oo", OperatorInfo::Binary, false, Node::Prec::OrIf, "operator||"},
+ {"or", OperatorInfo::Binary, false, Node::Prec::Ior, "operator|"},
+ {"pL", OperatorInfo::Binary, false, Node::Prec::Assign, "operator+="},
+ {"pl", OperatorInfo::Binary, false, Node::Prec::Additive, "operator+"},
+ {"pm", OperatorInfo::Member, /*Named*/ false, Node::Prec::PtrMem,
+ "operator->*"},
+ {"pp", OperatorInfo::Postfix, false, Node::Prec::Postfix, "operator++"},
+ {"ps", OperatorInfo::Prefix, false, Node::Prec::Unary, "operator+"},
+ {"pt", OperatorInfo::Member, /*Named*/ true, Node::Prec::Postfix,
+ "operator->"},
+ {"qu", OperatorInfo::Conditional, false, Node::Prec::Conditional,
+ "operator?"},
+ {"rM", OperatorInfo::Binary, false, Node::Prec::Assign, "operator%="},
+ {"rS", OperatorInfo::Binary, false, Node::Prec::Assign, "operator>>="},
+ {"rc", OperatorInfo::NamedCast, false, Node::Prec::Postfix,
+ "reinterpret_cast"},
+ {"rm", OperatorInfo::Binary, false, Node::Prec::Multiplicative,
+ "operator%"},
+ {"rs", OperatorInfo::Binary, false, Node::Prec::Shift, "operator>>"},
+ {"sc", OperatorInfo::NamedCast, false, Node::Prec::Postfix, "static_cast"},
+ {"ss", OperatorInfo::Binary, false, Node::Prec::Spaceship, "operator<=>"},
+ {"st", OperatorInfo::OfIdOp, /*Type*/ true, Node::Prec::Unary, "sizeof "},
+ {"sz", OperatorInfo::OfIdOp, /*Type*/ false, Node::Prec::Unary, "sizeof "},
+ {"te", OperatorInfo::OfIdOp, /*Type*/ false, Node::Prec::Postfix,
+ "typeid "},
+ {"ti", OperatorInfo::OfIdOp, /*Type*/ true, Node::Prec::Postfix, "typeid "},
+};
+template <typename Derived, typename Alloc>
+const size_t AbstractManglingParser<Derived, Alloc>::NumOps = sizeof(Ops) /
+ sizeof(Ops[0]);
+
+// If the next 2 chars are an operator encoding, consume them and return their
+// OperatorInfo. Otherwise return nullptr.
+template <typename Derived, typename Alloc>
+const typename AbstractManglingParser<Derived, Alloc>::OperatorInfo *
+AbstractManglingParser<Derived, Alloc>::parseOperatorEncoding() {
+ if (numLeft() < 2)
+ return nullptr;
+
+ auto Op = std::lower_bound(
+ &Ops[0], &Ops[NumOps], First,
+ [](const OperatorInfo &Op_, const char *Enc_) { return Op_ < Enc_; });
+ if (Op == &Ops[NumOps] || *Op != First)
+ return nullptr;
+
+ First += 2;
+ return Op;
+}
+
+// <operator-name> ::= See parseOperatorEncoding()
// ::= li <source-name> # operator ""
-// ::= ls # <<
-// ::= lS # <<=
-// ::= lt # <
-// ::= mi # -
-// ::= mI # -=
-// ::= ml # *
-// ::= mL # *=
-// ::= mm # -- (postfix in <expression> context)
-// ::= na # new[]
-// ::= ne # !=
-// ::= ng # - (unary)
-// ::= nt # !
-// ::= nw # new
-// ::= oo # ||
-// ::= or # |
-// ::= oR # |=
-// ::= pm # ->*
-// ::= pl # +
-// ::= pL # +=
-// ::= pp # ++ (postfix in <expression> context)
-// ::= ps # + (unary)
-// ::= pt # ->
-// ::= qu # ?
-// ::= rm # %
-// ::= rM # %=
-// ::= rs # >>
-// ::= rS # >>=
-// ::= ss # <=> C++2a
-// ::= v <digit> <source-name> # vendor extended operator
+// ::= v <digit> <source-name> # vendor extended operator
template <typename Derived, typename Alloc>
Node *
AbstractManglingParser<Derived, Alloc>::parseOperatorName(NameState *State) {
- switch (look()) {
- case 'a':
- switch (look(1)) {
- case 'a':
- First += 2;
- return make<NameType>("operator&&");
- case 'd':
- case 'n':
- First += 2;
- return make<NameType>("operator&");
- case 'N':
- First += 2;
- return make<NameType>("operator&=");
- case 'S':
- First += 2;
- return make<NameType>("operator=");
- }
- return nullptr;
- case 'c':
- switch (look(1)) {
- case 'l':
- First += 2;
- return make<NameType>("operator()");
- case 'm':
- First += 2;
- return make<NameType>("operator,");
- case 'o':
- First += 2;
- return make<NameType>("operator~");
- // ::= cv <type> # (cast)
- case 'v': {
- First += 2;
- SwapAndRestore<bool> SaveTemplate(TryToParseTemplateArgs, false);
+ if (const auto *Op = parseOperatorEncoding()) {
+ if (Op->getKind() == OperatorInfo::CCast) {
+ // ::= cv <type> # (cast)
+ ScopedOverride<bool> SaveTemplate(TryToParseTemplateArgs, false);
// If we're parsing an encoding, State != nullptr and the conversion
// operators' <type> could have a <template-param> that refers to some
// <template-arg>s further ahead in the mangled name.
- SwapAndRestore<bool> SavePermit(PermitForwardTemplateReferences,
+ ScopedOverride<bool> SavePermit(PermitForwardTemplateReferences,
PermitForwardTemplateReferences ||
State != nullptr);
Node *Ty = getDerived().parseType();
@@ -2912,185 +3051,29 @@ AbstractManglingParser<Derived, Alloc>::parseOperatorName(NameState *State) {
if (State) State->CtorDtorConversion = true;
return make<ConversionOperatorType>(Ty);
}
- }
- return nullptr;
- case 'd':
- switch (look(1)) {
- case 'a':
- First += 2;
- return make<NameType>("operator delete[]");
- case 'e':
- First += 2;
- return make<NameType>("operator*");
- case 'l':
- First += 2;
- return make<NameType>("operator delete");
- case 'v':
- First += 2;
- return make<NameType>("operator/");
- case 'V':
- First += 2;
- return make<NameType>("operator/=");
- }
- return nullptr;
- case 'e':
- switch (look(1)) {
- case 'o':
- First += 2;
- return make<NameType>("operator^");
- case 'O':
- First += 2;
- return make<NameType>("operator^=");
- case 'q':
- First += 2;
- return make<NameType>("operator==");
- }
- return nullptr;
- case 'g':
- switch (look(1)) {
- case 'e':
- First += 2;
- return make<NameType>("operator>=");
- case 't':
- First += 2;
- return make<NameType>("operator>");
- }
- return nullptr;
- case 'i':
- if (look(1) == 'x') {
- First += 2;
- return make<NameType>("operator[]");
- }
- return nullptr;
- case 'l':
- switch (look(1)) {
- case 'e':
- First += 2;
- return make<NameType>("operator<=");
+
+ if (Op->getKind() >= OperatorInfo::Unnameable)
+ /* Not a nameable operator. */
+ return nullptr;
+ if (Op->getKind() == OperatorInfo::Member && !Op->getFlag())
+ /* Not a nameable MemberExpr */
+ return nullptr;
+
+ return make<NameType>(Op->getName());
+ }
+
+ if (consumeIf("li")) {
// ::= li <source-name> # operator ""
- case 'i': {
- First += 2;
- Node *SN = getDerived().parseSourceName(State);
- if (SN == nullptr)
- return nullptr;
- return make<LiteralOperator>(SN);
- }
- case 's':
- First += 2;
- return make<NameType>("operator<<");
- case 'S':
- First += 2;
- return make<NameType>("operator<<=");
- case 't':
- First += 2;
- return make<NameType>("operator<");
- }
- return nullptr;
- case 'm':
- switch (look(1)) {
- case 'i':
- First += 2;
- return make<NameType>("operator-");
- case 'I':
- First += 2;
- return make<NameType>("operator-=");
- case 'l':
- First += 2;
- return make<NameType>("operator*");
- case 'L':
- First += 2;
- return make<NameType>("operator*=");
- case 'm':
- First += 2;
- return make<NameType>("operator--");
- }
- return nullptr;
- case 'n':
- switch (look(1)) {
- case 'a':
- First += 2;
- return make<NameType>("operator new[]");
- case 'e':
- First += 2;
- return make<NameType>("operator!=");
- case 'g':
- First += 2;
- return make<NameType>("operator-");
- case 't':
- First += 2;
- return make<NameType>("operator!");
- case 'w':
- First += 2;
- return make<NameType>("operator new");
- }
- return nullptr;
- case 'o':
- switch (look(1)) {
- case 'o':
- First += 2;
- return make<NameType>("operator||");
- case 'r':
- First += 2;
- return make<NameType>("operator|");
- case 'R':
- First += 2;
- return make<NameType>("operator|=");
- }
- return nullptr;
- case 'p':
- switch (look(1)) {
- case 'm':
- First += 2;
- return make<NameType>("operator->*");
- case 'l':
- First += 2;
- return make<NameType>("operator+");
- case 'L':
- First += 2;
- return make<NameType>("operator+=");
- case 'p':
- First += 2;
- return make<NameType>("operator++");
- case 's':
- First += 2;
- return make<NameType>("operator+");
- case 't':
- First += 2;
- return make<NameType>("operator->");
- }
- return nullptr;
- case 'q':
- if (look(1) == 'u') {
- First += 2;
- return make<NameType>("operator?");
- }
- return nullptr;
- case 'r':
- switch (look(1)) {
- case 'm':
- First += 2;
- return make<NameType>("operator%");
- case 'M':
- First += 2;
- return make<NameType>("operator%=");
- case 's':
- First += 2;
- return make<NameType>("operator>>");
- case 'S':
- First += 2;
- return make<NameType>("operator>>=");
- }
- return nullptr;
- case 's':
- if (look(1) == 's') {
- First += 2;
- return make<NameType>("operator<=>");
- }
- return nullptr;
- // ::= v <digit> <source-name> # vendor extended operator
- case 'v':
- if (std::isdigit(look(1))) {
- First += 2;
+ Node *SN = getDerived().parseSourceName(State);
+ if (SN == nullptr)
+ return nullptr;
+ return make<LiteralOperator>(SN);
+ }
+
+ if (consumeIf('v')) {
+ // ::= v <digit> <source-name> # vendor extended operator
+ if (look() >= '0' && look() <= '9') {
+ First++;
Node *SN = getDerived().parseSourceName(State);
if (SN == nullptr)
return nullptr;
@@ -3098,6 +3081,7 @@ AbstractManglingParser<Derived, Alloc>::parseOperatorName(NameState *State) {
}
return nullptr;
}
+
return nullptr;
}
@@ -3116,19 +3100,11 @@ Node *
AbstractManglingParser<Derived, Alloc>::parseCtorDtorName(Node *&SoFar,
NameState *State) {
if (SoFar->getKind() == Node::KSpecialSubstitution) {
- auto SSK = static_cast<SpecialSubstitution *>(SoFar)->SSK;
- switch (SSK) {
- case SpecialSubKind::string:
- case SpecialSubKind::istream:
- case SpecialSubKind::ostream:
- case SpecialSubKind::iostream:
- SoFar = make<ExpandedSpecialSubstitution>(SSK);
- if (!SoFar)
- return nullptr;
- break;
- default:
- break;
- }
+ // Expand the special substitution.
+ SoFar = make<ExpandedSpecialSubstitution>(
+ static_cast<SpecialSubstitution *>(SoFar));
+ if (!SoFar)
+ return nullptr;
}
if (consumeIf('C')) {
@@ -3157,8 +3133,10 @@ AbstractManglingParser<Derived, Alloc>::parseCtorDtorName(Node *&SoFar,
return nullptr;
}
-// <nested-name> ::= N [<CV-Qualifiers>] [<ref-qualifier>] <prefix> <unqualified-name> E
-// ::= N [<CV-Qualifiers>] [<ref-qualifier>] <template-prefix> <template-args> E
+// <nested-name> ::= N [<CV-Qualifiers>] [<ref-qualifier>] <prefix>
+// <unqualified-name> E
+// ::= N [<CV-Qualifiers>] [<ref-qualifier>] <template-prefix>
+// <template-args> E
//
// <prefix> ::= <prefix> <unqualified-name>
// ::= <template-prefix> <template-args>
@@ -3167,7 +3145,7 @@ AbstractManglingParser<Derived, Alloc>::parseCtorDtorName(Node *&SoFar,
// ::= # empty
// ::= <substitution>
// ::= <prefix> <data-member-prefix>
-// extension ::= L
+// [*] extension
//
// <data-member-prefix> := <member source-name> [<template-args>] M
//
@@ -3187,90 +3165,76 @@ AbstractManglingParser<Derived, Alloc>::parseNestedName(NameState *State) {
if (State) State->ReferenceQualifier = FrefQualRValue;
} else if (consumeIf('R')) {
if (State) State->ReferenceQualifier = FrefQualLValue;
- } else
+ } else {
if (State) State->ReferenceQualifier = FrefQualNone;
-
- Node *SoFar = nullptr;
- auto PushComponent = [&](Node *Comp) {
- if (!Comp) return false;
- if (SoFar) SoFar = make<NestedName>(SoFar, Comp);
- else SoFar = Comp;
- if (State) State->EndsWithTemplateArgs = false;
- return SoFar != nullptr;
- };
-
- if (consumeIf("St")) {
- SoFar = make<NameType>("std");
- if (!SoFar)
- return nullptr;
}
+ Node *SoFar = nullptr;
while (!consumeIf('E')) {
- consumeIf('L'); // extension
-
- // <data-member-prefix> := <member source-name> [<template-args>] M
- if (consumeIf('M')) {
- if (SoFar == nullptr)
- return nullptr;
- continue;
- }
+ if (State)
+ // Only set end-with-template on the case that does that.
+ State->EndsWithTemplateArgs = false;
- // ::= <template-param>
if (look() == 'T') {
- if (!PushComponent(getDerived().parseTemplateParam()))
- return nullptr;
- Subs.push_back(SoFar);
- continue;
- }
-
- // ::= <template-prefix> <template-args>
- if (look() == 'I') {
+ // ::= <template-param>
+ if (SoFar != nullptr)
+ return nullptr; // Cannot have a prefix.
+ SoFar = getDerived().parseTemplateParam();
+ } else if (look() == 'I') {
+ // ::= <template-prefix> <template-args>
+ if (SoFar == nullptr)
+ return nullptr; // Must have a prefix.
Node *TA = getDerived().parseTemplateArgs(State != nullptr);
- if (TA == nullptr || SoFar == nullptr)
- return nullptr;
- SoFar = make<NameWithTemplateArgs>(SoFar, TA);
- if (!SoFar)
- return nullptr;
- if (State) State->EndsWithTemplateArgs = true;
- Subs.push_back(SoFar);
- continue;
- }
-
- // ::= <decltype>
- if (look() == 'D' && (look(1) == 't' || look(1) == 'T')) {
- if (!PushComponent(getDerived().parseDecltype()))
+ if (TA == nullptr)
return nullptr;
- Subs.push_back(SoFar);
- continue;
- }
-
- // ::= <substitution>
- if (look() == 'S' && look(1) != 't') {
- Node *S = getDerived().parseSubstitution();
- if (!PushComponent(S))
+ if (SoFar->getKind() == Node::KNameWithTemplateArgs)
+ // Semantically <template-args> <template-args> cannot be generated by a
+ // C++ entity. There will always be [something like] a name between
+ // them.
return nullptr;
- if (SoFar != S)
- Subs.push_back(S);
- continue;
- }
+ if (State)
+ State->EndsWithTemplateArgs = true;
+ SoFar = make<NameWithTemplateArgs>(SoFar, TA);
+ } else if (look() == 'D' && (look(1) == 't' || look(1) == 'T')) {
+ // ::= <decltype>
+ if (SoFar != nullptr)
+ return nullptr; // Cannot have a prefix.
+ SoFar = getDerived().parseDecltype();
+ } else {
+ ModuleName *Module = nullptr;
+
+ if (look() == 'S') {
+ // ::= <substitution>
+ Node *S = nullptr;
+ if (look(1) == 't') {
+ First += 2;
+ S = make<NameType>("std");
+ } else {
+ S = getDerived().parseSubstitution();
+ }
+ if (!S)
+ return nullptr;
+ if (S->getKind() == Node::KModuleName) {
+ Module = static_cast<ModuleName *>(S);
+ } else if (SoFar != nullptr) {
+ return nullptr; // Cannot have a prefix.
+ } else {
+ SoFar = S;
+ continue; // Do not push a new substitution.
+ }
+ }
- // Parse an <unqualified-name> thats actually a <ctor-dtor-name>.
- if (look() == 'C' || (look() == 'D' && look(1) != 'C')) {
- if (SoFar == nullptr)
- return nullptr;
- if (!PushComponent(getDerived().parseCtorDtorName(SoFar, State)))
- return nullptr;
- SoFar = getDerived().parseAbiTags(SoFar);
- if (SoFar == nullptr)
- return nullptr;
- Subs.push_back(SoFar);
- continue;
+ // ::= [<prefix>] <unqualified-name>
+ SoFar = getDerived().parseUnqualifiedName(State, SoFar, Module);
}
- // ::= <prefix> <unqualified-name>
- if (!PushComponent(getDerived().parseUnqualifiedName(State)))
+ if (SoFar == nullptr)
return nullptr;
Subs.push_back(SoFar);
+
+ // No longer used.
+ // <data-member-prefix> := <member source-name> [<template-args>] M
+ consumeIf('M');
}
if (SoFar == nullptr || Subs.empty())
@@ -3365,6 +3329,7 @@ Node *AbstractManglingParser<Derived, Alloc>::parseBaseUnresolvedName() {
// ::= [gs] <base-unresolved-name> # x or (with "gs") ::x
// ::= [gs] sr <unresolved-qualifier-level>+ E <base-unresolved-name>
// # A::x, N::y, A<T>::z; "gs" means leading "::"
+// [gs] has been parsed by caller.
// ::= sr <unresolved-type> <base-unresolved-name> # T::x / decltype(p)::x
// extension ::= sr <unresolved-type> <template-args> <base-unresolved-name>
// # T::N::x /decltype(p)::N::x
@@ -3372,7 +3337,7 @@ Node *AbstractManglingParser<Derived, Alloc>::parseBaseUnresolvedName() {
//
// <unresolved-qualifier-level> ::= <simple-id>
template <typename Derived, typename Alloc>
-Node *AbstractManglingParser<Derived, Alloc>::parseUnresolvedName() {
+Node *AbstractManglingParser<Derived, Alloc>::parseUnresolvedName(bool Global) {
Node *SoFar = nullptr;
// srN <unresolved-type> [<template-args>] <unresolved-qualifier-level>* E <base-unresolved-name>
@@ -3406,8 +3371,6 @@ Node *AbstractManglingParser<Derived, Alloc>::parseUnresolvedName() {
return make<QualifiedName>(SoFar, Base);
}
- bool Global = consumeIf("gs");
-
// [gs] <base-unresolved-name> # x or (with "gs") ::x
if (!consumeIf("sr")) {
SoFar = getDerived().parseBaseUnresolvedName();
@@ -3637,7 +3600,7 @@ Node *AbstractManglingParser<Derived, Alloc>::parseDecltype() {
return nullptr;
if (!consumeIf('E'))
return nullptr;
- return make<EnclosingExpr>("decltype(", E, ")");
+ return make<EnclosingExpr>("decltype", E);
}
// <array-type> ::= A <positive dimension number> _ <element type>
@@ -3723,8 +3686,8 @@ Node *AbstractManglingParser<Derived, Alloc>::parseQualifiedType() {
StringView ProtoSourceName = Qual.dropFront(std::strlen("objcproto"));
StringView Proto;
{
- SwapAndRestore<const char *> SaveFirst(First, ProtoSourceName.begin()),
- SaveLast(Last, ProtoSourceName.end());
+ ScopedOverride<const char *> SaveFirst(First, ProtoSourceName.begin()),
+ SaveLast(Last, ProtoSourceName.end());
Proto = parseBareSourceName();
}
if (Proto.empty())
@@ -3929,6 +3892,22 @@ Node *AbstractManglingParser<Derived, Alloc>::parseType() {
return nullptr;
return make<BinaryFPType>(DimensionNumber);
}
+ // ::= DB <number> _ # C23 signed _BitInt(N)
+ // ::= DB <instantiation-dependent expression> _ # C23 signed _BitInt(N)
+ // ::= DU <number> _ # C23 unsigned _BitInt(N)
+ // ::= DU <instantiation-dependent expression> _ # C23 unsigned _BitInt(N)
+ case 'B':
+ case 'U': {
+ bool Signed = look(1) == 'B';
+ First += 2;
+ Node *Size = std::isdigit(look()) ? make<NameType>(parseNumber())
+ : getDerived().parseExpr();
+ if (!Size)
+ return nullptr;
+ if (!consumeIf('_'))
+ return nullptr;
+ return make<BitIntType>(Size, Signed);
+ }
// ::= Di # char32_t
case 'i':
First += 2;
@@ -4077,8 +4056,9 @@ Node *AbstractManglingParser<Derived, Alloc>::parseType() {
// ::= <substitution> # See Compression below
case 'S': {
if (look(1) != 't') {
- Result = getDerived().parseSubstitution();
- if (Result == nullptr)
+ bool IsSubst = false;
+ Result = getDerived().parseUnscopedName(nullptr, &IsSubst);
+ if (!Result)
return nullptr;
// Sub could be either of:
@@ -4091,12 +4071,14 @@ Node *AbstractManglingParser<Derived, Alloc>::parseType() {
// If this is followed by some <template-args>, and we're permitted to
// parse them, take the second production.
- if (TryToParseTemplateArgs && look() == 'I') {
+ if (look() == 'I' && (!IsSubst || TryToParseTemplateArgs)) {
+ if (!IsSubst)
+ Subs.push_back(Result);
Node *TA = getDerived().parseTemplateArgs();
if (TA == nullptr)
return nullptr;
Result = make<NameWithTemplateArgs>(Result, TA);
- } else {
+ } else if (IsSubst) {
// If all we parsed was a substitution, don't re-insert into the
// substitution table.
return Result;
@@ -4121,22 +4103,24 @@ Node *AbstractManglingParser<Derived, Alloc>::parseType() {
}
template <typename Derived, typename Alloc>
-Node *AbstractManglingParser<Derived, Alloc>::parsePrefixExpr(StringView Kind) {
+Node *AbstractManglingParser<Derived, Alloc>::parsePrefixExpr(StringView Kind,
+ Node::Prec Prec) {
Node *E = getDerived().parseExpr();
if (E == nullptr)
return nullptr;
- return make<PrefixExpr>(Kind, E);
+ return make<PrefixExpr>(Kind, E, Prec);
}
template <typename Derived, typename Alloc>
-Node *AbstractManglingParser<Derived, Alloc>::parseBinaryExpr(StringView Kind) {
+Node *AbstractManglingParser<Derived, Alloc>::parseBinaryExpr(StringView Kind,
+ Node::Prec Prec) {
Node *LHS = getDerived().parseExpr();
if (LHS == nullptr)
return nullptr;
Node *RHS = getDerived().parseExpr();
if (RHS == nullptr)
return nullptr;
- return make<BinaryExpr>(LHS, Kind, RHS);
+ return make<BinaryExpr>(LHS, Kind, RHS, Prec);
}
template <typename Derived, typename Alloc>
@@ -4191,43 +4175,6 @@ Node *AbstractManglingParser<Derived, Alloc>::parseFunctionParam() {
return nullptr;
}
-// [gs] nw <expression>* _ <type> E # new (expr-list) type
-// [gs] nw <expression>* _ <type> <initializer> # new (expr-list) type (init)
-// [gs] na <expression>* _ <type> E # new[] (expr-list) type
-// [gs] na <expression>* _ <type> <initializer> # new[] (expr-list) type (init)
-// <initializer> ::= pi <expression>* E # parenthesized initialization
-template <typename Derived, typename Alloc>
-Node *AbstractManglingParser<Derived, Alloc>::parseNewExpr() {
- bool Global = consumeIf("gs");
- bool IsArray = look(1) == 'a';
- if (!consumeIf("nw") && !consumeIf("na"))
- return nullptr;
- size_t Exprs = Names.size();
- while (!consumeIf('_')) {
- Node *Ex = getDerived().parseExpr();
- if (Ex == nullptr)
- return nullptr;
- Names.push_back(Ex);
- }
- NodeArray ExprList = popTrailingNodeArray(Exprs);
- Node *Ty = getDerived().parseType();
- if (Ty == nullptr)
- return Ty;
- if (consumeIf("pi")) {
- size_t InitsBegin = Names.size();
- while (!consumeIf('E')) {
- Node *Init = getDerived().parseExpr();
- if (Init == nullptr)
- return Init;
- Names.push_back(Init);
- }
- NodeArray Inits = popTrailingNodeArray(InitsBegin);
- return make<NewExpr>(ExprList, Ty, Inits, Global, IsArray);
- } else if (!consumeIf('E'))
- return nullptr;
- return make<NewExpr>(ExprList, Ty, NodeArray(), Global, IsArray);
-}
-
// cv <type> <expression> # conversion with one argument
// cv <type> _ <expression>* E # conversion with a different number of arguments
template <typename Derived, typename Alloc>
@@ -4236,7 +4183,7 @@ Node *AbstractManglingParser<Derived, Alloc>::parseConversionExpr() {
return nullptr;
Node *Ty;
{
- SwapAndRestore<bool> SaveTemp(TryToParseTemplateArgs, false);
+ ScopedOverride<bool> SaveTemp(TryToParseTemplateArgs, false);
Ty = getDerived().parseType();
}
@@ -4353,7 +4300,7 @@ Node *AbstractManglingParser<Derived, Alloc>::parseExprPrimary() {
return nullptr;
}
case 'D':
- if (consumeIf("DnE"))
+ if (consumeIf("Dn") && (consumeIf('0'), consumeIf('E')))
return make<NameType>("nullptr");
return nullptr;
case 'T':
@@ -4440,55 +4387,38 @@ Node *AbstractManglingParser<Derived, Alloc>::parseFoldExpr() {
if (!consumeIf('f'))
return nullptr;
- char FoldKind = look();
- bool IsLeftFold, HasInitializer;
- HasInitializer = FoldKind == 'L' || FoldKind == 'R';
- if (FoldKind == 'l' || FoldKind == 'L')
- IsLeftFold = true;
- else if (FoldKind == 'r' || FoldKind == 'R')
- IsLeftFold = false;
- else
+ bool IsLeftFold = false, HasInitializer = false;
+ switch (look()) {
+ default:
return nullptr;
+ case 'L':
+ IsLeftFold = true;
+ HasInitializer = true;
+ break;
+ case 'R':
+ HasInitializer = true;
+ break;
+ case 'l':
+ IsLeftFold = true;
+ break;
+ case 'r':
+ break;
+ }
++First;
- // FIXME: This map is duplicated in parseOperatorName and parseExpr.
- StringView OperatorName;
- if (consumeIf("aa")) OperatorName = "&&";
- else if (consumeIf("an")) OperatorName = "&";
- else if (consumeIf("aN")) OperatorName = "&=";
- else if (consumeIf("aS")) OperatorName = "=";
- else if (consumeIf("cm")) OperatorName = ",";
- else if (consumeIf("ds")) OperatorName = ".*";
- else if (consumeIf("dv")) OperatorName = "/";
- else if (consumeIf("dV")) OperatorName = "/=";
- else if (consumeIf("eo")) OperatorName = "^";
- else if (consumeIf("eO")) OperatorName = "^=";
- else if (consumeIf("eq")) OperatorName = "==";
- else if (consumeIf("ge")) OperatorName = ">=";
- else if (consumeIf("gt")) OperatorName = ">";
- else if (consumeIf("le")) OperatorName = "<=";
- else if (consumeIf("ls")) OperatorName = "<<";
- else if (consumeIf("lS")) OperatorName = "<<=";
- else if (consumeIf("lt")) OperatorName = "<";
- else if (consumeIf("mi")) OperatorName = "-";
- else if (consumeIf("mI")) OperatorName = "-=";
- else if (consumeIf("ml")) OperatorName = "*";
- else if (consumeIf("mL")) OperatorName = "*=";
- else if (consumeIf("ne")) OperatorName = "!=";
- else if (consumeIf("oo")) OperatorName = "||";
- else if (consumeIf("or")) OperatorName = "|";
- else if (consumeIf("oR")) OperatorName = "|=";
- else if (consumeIf("pl")) OperatorName = "+";
- else if (consumeIf("pL")) OperatorName = "+=";
- else if (consumeIf("rm")) OperatorName = "%";
- else if (consumeIf("rM")) OperatorName = "%=";
- else if (consumeIf("rs")) OperatorName = ">>";
- else if (consumeIf("rS")) OperatorName = ">>=";
- else return nullptr;
-
- Node *Pack = getDerived().parseExpr(), *Init = nullptr;
+ const auto *Op = parseOperatorEncoding();
+ if (!Op)
+ return nullptr;
+ if (!(Op->getKind() == OperatorInfo::Binary
+ || (Op->getKind() == OperatorInfo::Member
+ && Op->getName().back() == '*')))
+ return nullptr;
+
+ Node *Pack = getDerived().parseExpr();
if (Pack == nullptr)
return nullptr;
+
+ Node *Init = nullptr;
if (HasInitializer) {
Init = getDerived().parseExpr();
if (Init == nullptr)
@@ -4498,14 +4428,16 @@ Node *AbstractManglingParser<Derived, Alloc>::parseFoldExpr() {
if (IsLeftFold && Init)
std::swap(Pack, Init);
- return make<FoldExpr>(IsLeftFold, OperatorName, Pack, Init);
+ return make<FoldExpr>(IsLeftFold, Op->getSymbol(), Pack, Init);
}
// <expression> ::= mc <parameter type> <expr> [<offset number>] E
//
// Not yet in the spec: https://github.com/itanium-cxx-abi/cxx-abi/issues/47
template <typename Derived, typename Alloc>
-Node *AbstractManglingParser<Derived, Alloc>::parsePointerToMemberConversionExpr() {
+Node *
+AbstractManglingParser<Derived, Alloc>::parsePointerToMemberConversionExpr(
+ Node::Prec Prec) {
Node *Ty = getDerived().parseType();
if (!Ty)
return nullptr;
@@ -4515,7 +4447,7 @@ Node *AbstractManglingParser<Derived, Alloc>::parsePointerToMemberConversionExpr
StringView Offset = getDerived().parseNumber(true);
if (!consumeIf('E'))
return nullptr;
- return make<PointerToMemberConversionExpr>(Ty, Expr, Offset);
+ return make<PointerToMemberConversionExpr>(Ty, Expr, Offset, Prec);
}
// <expression> ::= so <referent type> <expr> [<offset number>] <union-selector>* [p] E
@@ -4592,316 +4524,127 @@ Node *AbstractManglingParser<Derived, Alloc>::parseSubobjectExpr() {
template <typename Derived, typename Alloc>
Node *AbstractManglingParser<Derived, Alloc>::parseExpr() {
bool Global = consumeIf("gs");
- if (numLeft() < 2)
- return nullptr;
- switch (*First) {
- case 'L':
- return getDerived().parseExprPrimary();
- case 'T':
- return getDerived().parseTemplateParam();
- case 'f': {
- // Disambiguate a fold expression from a <function-param>.
- if (look(1) == 'p' || (look(1) == 'L' && std::isdigit(look(2))))
- return getDerived().parseFunctionParam();
- return getDerived().parseFoldExpr();
- }
- case 'a':
- switch (First[1]) {
- case 'a':
- First += 2;
- return getDerived().parseBinaryExpr("&&");
- case 'd':
- First += 2;
- return getDerived().parsePrefixExpr("&");
- case 'n':
- First += 2;
- return getDerived().parseBinaryExpr("&");
- case 'N':
- First += 2;
- return getDerived().parseBinaryExpr("&=");
- case 'S':
- First += 2;
- return getDerived().parseBinaryExpr("=");
- case 't': {
- First += 2;
- Node *Ty = getDerived().parseType();
- if (Ty == nullptr)
- return nullptr;
- return make<EnclosingExpr>("alignof (", Ty, ")");
- }
- case 'z': {
- First += 2;
- Node *Ty = getDerived().parseExpr();
- if (Ty == nullptr)
- return nullptr;
- return make<EnclosingExpr>("alignof (", Ty, ")");
- }
- }
- return nullptr;
- case 'c':
- switch (First[1]) {
- // cc <type> <expression> # const_cast<type>(expression)
- case 'c': {
- First += 2;
- Node *Ty = getDerived().parseType();
- if (Ty == nullptr)
- return Ty;
- Node *Ex = getDerived().parseExpr();
- if (Ex == nullptr)
- return Ex;
- return make<CastExpr>("const_cast", Ty, Ex);
- }
- // cl <expression>+ E # call
- case 'l': {
- First += 2;
- Node *Callee = getDerived().parseExpr();
- if (Callee == nullptr)
- return Callee;
- size_t ExprsBegin = Names.size();
- while (!consumeIf('E')) {
- Node *E = getDerived().parseExpr();
- if (E == nullptr)
- return E;
- Names.push_back(E);
- }
- return make<CallExpr>(Callee, popTrailingNodeArray(ExprsBegin));
- }
- case 'm':
- First += 2;
- return getDerived().parseBinaryExpr(",");
- case 'o':
- First += 2;
- return getDerived().parsePrefixExpr("~");
- case 'v':
- return getDerived().parseConversionExpr();
- }
- return nullptr;
- case 'd':
- switch (First[1]) {
- case 'a': {
- First += 2;
- Node *Ex = getDerived().parseExpr();
- if (Ex == nullptr)
- return Ex;
- return make<DeleteExpr>(Ex, Global, /*is_array=*/true);
- }
- case 'c': {
- First += 2;
- Node *T = getDerived().parseType();
- if (T == nullptr)
- return T;
+ const auto *Op = parseOperatorEncoding();
+ if (Op) {
+ auto Sym = Op->getSymbol();
+ switch (Op->getKind()) {
+ case OperatorInfo::Binary:
+ // Binary operator: lhs @ rhs
+ return getDerived().parseBinaryExpr(Sym, Op->getPrecedence());
+ case OperatorInfo::Prefix:
+ // Prefix unary operator: @ expr
+ return getDerived().parsePrefixExpr(Sym, Op->getPrecedence());
+ case OperatorInfo::Postfix: {
+ // Postfix unary operator: expr @
+ if (consumeIf('_'))
+ return getDerived().parsePrefixExpr(Sym, Op->getPrecedence());
Node *Ex = getDerived().parseExpr();
if (Ex == nullptr)
- return Ex;
- return make<CastExpr>("dynamic_cast", T, Ex);
- }
- case 'e':
- First += 2;
- return getDerived().parsePrefixExpr("*");
- case 'l': {
- First += 2;
- Node *E = getDerived().parseExpr();
- if (E == nullptr)
- return E;
- return make<DeleteExpr>(E, Global, /*is_array=*/false);
+ return nullptr;
+ return make<PostfixExpr>(Ex, Sym, Op->getPrecedence());
}
- case 'n':
- return getDerived().parseUnresolvedName();
- case 's': {
- First += 2;
- Node *LHS = getDerived().parseExpr();
- if (LHS == nullptr)
+ case OperatorInfo::Array: {
+ // Array Index: lhs [ rhs ]
+ Node *Base = getDerived().parseExpr();
+ if (Base == nullptr)
return nullptr;
- Node *RHS = getDerived().parseExpr();
- if (RHS == nullptr)
+ Node *Index = getDerived().parseExpr();
+ if (Index == nullptr)
return nullptr;
- return make<MemberExpr>(LHS, ".*", RHS);
+ return make<ArraySubscriptExpr>(Base, Index, Op->getPrecedence());
}
- case 't': {
- First += 2;
+ case OperatorInfo::Member: {
+ // Member access lhs @ rhs
Node *LHS = getDerived().parseExpr();
if (LHS == nullptr)
- return LHS;
+ return nullptr;
Node *RHS = getDerived().parseExpr();
if (RHS == nullptr)
return nullptr;
- return make<MemberExpr>(LHS, ".", RHS);
- }
- case 'v':
- First += 2;
- return getDerived().parseBinaryExpr("/");
- case 'V':
- First += 2;
- return getDerived().parseBinaryExpr("/=");
- }
- return nullptr;
- case 'e':
- switch (First[1]) {
- case 'o':
- First += 2;
- return getDerived().parseBinaryExpr("^");
- case 'O':
- First += 2;
- return getDerived().parseBinaryExpr("^=");
- case 'q':
- First += 2;
- return getDerived().parseBinaryExpr("==");
- }
- return nullptr;
- case 'g':
- switch (First[1]) {
- case 'e':
- First += 2;
- return getDerived().parseBinaryExpr(">=");
- case 't':
- First += 2;
- return getDerived().parseBinaryExpr(">");
- }
- return nullptr;
- case 'i':
- switch (First[1]) {
- case 'x': {
- First += 2;
- Node *Base = getDerived().parseExpr();
- if (Base == nullptr)
+ return make<MemberExpr>(LHS, Sym, RHS, Op->getPrecedence());
+ }
+ case OperatorInfo::New: {
+ // New
+ // # new (expr-list) type [(init)]
+ // [gs] nw <expression>* _ <type> [pi <expression>*] E
+ // # new[] (expr-list) type [(init)]
+ // [gs] na <expression>* _ <type> [pi <expression>*] E
+ size_t Exprs = Names.size();
+ while (!consumeIf('_')) {
+ Node *Ex = getDerived().parseExpr();
+ if (Ex == nullptr)
+ return nullptr;
+ Names.push_back(Ex);
+ }
+ NodeArray ExprList = popTrailingNodeArray(Exprs);
+ Node *Ty = getDerived().parseType();
+ if (Ty == nullptr)
return nullptr;
- Node *Index = getDerived().parseExpr();
- if (Index == nullptr)
- return Index;
- return make<ArraySubscriptExpr>(Base, Index);
- }
- case 'l': {
- First += 2;
+ bool HaveInits = consumeIf("pi");
size_t InitsBegin = Names.size();
while (!consumeIf('E')) {
- Node *E = getDerived().parseBracedExpr();
- if (E == nullptr)
+ if (!HaveInits)
return nullptr;
- Names.push_back(E);
+ Node *Init = getDerived().parseExpr();
+ if (Init == nullptr)
+ return Init;
+ Names.push_back(Init);
}
- return make<InitListExpr>(nullptr, popTrailingNodeArray(InitsBegin));
+ NodeArray Inits = popTrailingNodeArray(InitsBegin);
+ return make<NewExpr>(ExprList, Ty, Inits, Global,
+ /*IsArray=*/Op->getFlag(), Op->getPrecedence());
}
- }
- return nullptr;
- case 'l':
- switch (First[1]) {
- case 'e':
- First += 2;
- return getDerived().parseBinaryExpr("<=");
- case 's':
- First += 2;
- return getDerived().parseBinaryExpr("<<");
- case 'S':
- First += 2;
- return getDerived().parseBinaryExpr("<<=");
- case 't':
- First += 2;
- return getDerived().parseBinaryExpr("<");
- }
- return nullptr;
- case 'm':
- switch (First[1]) {
- case 'c':
- First += 2;
- return parsePointerToMemberConversionExpr();
- case 'i':
- First += 2;
- return getDerived().parseBinaryExpr("-");
- case 'I':
- First += 2;
- return getDerived().parseBinaryExpr("-=");
- case 'l':
- First += 2;
- return getDerived().parseBinaryExpr("*");
- case 'L':
- First += 2;
- return getDerived().parseBinaryExpr("*=");
- case 'm':
- First += 2;
- if (consumeIf('_'))
- return getDerived().parsePrefixExpr("--");
+ case OperatorInfo::Del: {
+ // Delete
Node *Ex = getDerived().parseExpr();
if (Ex == nullptr)
return nullptr;
- return make<PostfixExpr>(Ex, "--");
- }
- return nullptr;
- case 'n':
- switch (First[1]) {
- case 'a':
- case 'w':
- return getDerived().parseNewExpr();
- case 'e':
- First += 2;
- return getDerived().parseBinaryExpr("!=");
- case 'g':
- First += 2;
- return getDerived().parsePrefixExpr("-");
- case 't':
- First += 2;
- return getDerived().parsePrefixExpr("!");
- case 'x':
- First += 2;
- Node *Ex = getDerived().parseExpr();
- if (Ex == nullptr)
- return Ex;
- return make<EnclosingExpr>("noexcept (", Ex, ")");
+ return make<DeleteExpr>(Ex, Global, /*IsArray=*/Op->getFlag(),
+ Op->getPrecedence());
}
- return nullptr;
- case 'o':
- switch (First[1]) {
- case 'n':
- return getDerived().parseUnresolvedName();
- case 'o':
- First += 2;
- return getDerived().parseBinaryExpr("||");
- case 'r':
- First += 2;
- return getDerived().parseBinaryExpr("|");
- case 'R':
- First += 2;
- return getDerived().parseBinaryExpr("|=");
- }
- return nullptr;
- case 'p':
- switch (First[1]) {
- case 'm':
- First += 2;
- return getDerived().parseBinaryExpr("->*");
- case 'l':
- First += 2;
- return getDerived().parseBinaryExpr("+");
- case 'L':
- First += 2;
- return getDerived().parseBinaryExpr("+=");
- case 'p': {
- First += 2;
- if (consumeIf('_'))
- return getDerived().parsePrefixExpr("++");
- Node *Ex = getDerived().parseExpr();
- if (Ex == nullptr)
- return Ex;
- return make<PostfixExpr>(Ex, "++");
+ case OperatorInfo::Call: {
+ // Function Call
+ Node *Callee = getDerived().parseExpr();
+ if (Callee == nullptr)
+ return nullptr;
+ size_t ExprsBegin = Names.size();
+ while (!consumeIf('E')) {
+ Node *E = getDerived().parseExpr();
+ if (E == nullptr)
+ return nullptr;
+ Names.push_back(E);
+ }
+ return make<CallExpr>(Callee, popTrailingNodeArray(ExprsBegin),
+ Op->getPrecedence());
}
- case 's':
- First += 2;
- return getDerived().parsePrefixExpr("+");
- case 't': {
- First += 2;
- Node *L = getDerived().parseExpr();
- if (L == nullptr)
+ case OperatorInfo::CCast: {
+ // C Cast: (type)expr
+ Node *Ty;
+ {
+ ScopedOverride<bool> SaveTemp(TryToParseTemplateArgs, false);
+ Ty = getDerived().parseType();
+ }
+ if (Ty == nullptr)
return nullptr;
- Node *R = getDerived().parseExpr();
- if (R == nullptr)
+
+ size_t ExprsBegin = Names.size();
+ bool IsMany = consumeIf('_');
+ while (!consumeIf('E')) {
+ Node *E = getDerived().parseExpr();
+ if (E == nullptr)
+ return E;
+ Names.push_back(E);
+ if (!IsMany)
+ break;
+ }
+ NodeArray Exprs = popTrailingNodeArray(ExprsBegin);
+ if (!IsMany && Exprs.size() != 1)
return nullptr;
- return make<MemberExpr>(L, "->", R);
- }
+ return make<ConversionExpr>(Ty, Exprs, Op->getPrecedence());
}
- return nullptr;
- case 'q':
- if (First[1] == 'u') {
- First += 2;
+ case OperatorInfo::Conditional: {
+ // Conditional operator: expr ? expr : expr
Node *Cond = getDerived().parseExpr();
if (Cond == nullptr)
return nullptr;
@@ -4911,147 +4654,120 @@ Node *AbstractManglingParser<Derived, Alloc>::parseExpr() {
Node *RHS = getDerived().parseExpr();
if (RHS == nullptr)
return nullptr;
- return make<ConditionalExpr>(Cond, LHS, RHS);
- }
- return nullptr;
- case 'r':
- switch (First[1]) {
- case 'c': {
- First += 2;
- Node *T = getDerived().parseType();
- if (T == nullptr)
- return T;
- Node *Ex = getDerived().parseExpr();
- if (Ex == nullptr)
- return Ex;
- return make<CastExpr>("reinterpret_cast", T, Ex);
+ return make<ConditionalExpr>(Cond, LHS, RHS, Op->getPrecedence());
}
- case 'm':
- First += 2;
- return getDerived().parseBinaryExpr("%");
- case 'M':
- First += 2;
- return getDerived().parseBinaryExpr("%=");
- case 's':
- First += 2;
- return getDerived().parseBinaryExpr(">>");
- case 'S':
- First += 2;
- return getDerived().parseBinaryExpr(">>=");
- }
- return nullptr;
- case 's':
- switch (First[1]) {
- case 'c': {
- First += 2;
- Node *T = getDerived().parseType();
- if (T == nullptr)
- return T;
- Node *Ex = getDerived().parseExpr();
- if (Ex == nullptr)
- return Ex;
- return make<CastExpr>("static_cast", T, Ex);
- }
- case 'o':
- First += 2;
- return parseSubobjectExpr();
- case 'p': {
- First += 2;
- Node *Child = getDerived().parseExpr();
- if (Child == nullptr)
- return nullptr;
- return make<ParameterPackExpansion>(Child);
- }
- case 'r':
- return getDerived().parseUnresolvedName();
- case 't': {
- First += 2;
+ case OperatorInfo::NamedCast: {
+ // Named cast operation, @<type>(expr)
Node *Ty = getDerived().parseType();
if (Ty == nullptr)
- return Ty;
- return make<EnclosingExpr>("sizeof (", Ty, ")");
- }
- case 'z': {
- First += 2;
+ return nullptr;
Node *Ex = getDerived().parseExpr();
if (Ex == nullptr)
- return Ex;
- return make<EnclosingExpr>("sizeof (", Ex, ")");
+ return nullptr;
+ return make<CastExpr>(Sym, Ty, Ex, Op->getPrecedence());
}
- case 'Z':
- First += 2;
- if (look() == 'T') {
- Node *R = getDerived().parseTemplateParam();
- if (R == nullptr)
- return nullptr;
- return make<SizeofParamPackExpr>(R);
- } else if (look() == 'f') {
- Node *FP = getDerived().parseFunctionParam();
- if (FP == nullptr)
- return nullptr;
- return make<EnclosingExpr>("sizeof... (", FP, ")");
- }
- return nullptr;
- case 'P': {
- First += 2;
- size_t ArgsBegin = Names.size();
- while (!consumeIf('E')) {
- Node *Arg = getDerived().parseTemplateArg();
- if (Arg == nullptr)
- return nullptr;
- Names.push_back(Arg);
- }
- auto *Pack = make<NodeArrayNode>(popTrailingNodeArray(ArgsBegin));
- if (!Pack)
+ case OperatorInfo::OfIdOp: {
+ // [sizeof/alignof/typeid] ( <type>|<expr> )
+ Node *Arg =
+ Op->getFlag() ? getDerived().parseType() : getDerived().parseExpr();
+ if (!Arg)
return nullptr;
- return make<EnclosingExpr>("sizeof... (", Pack, ")");
+ return make<EnclosingExpr>(Sym, Arg, Op->getPrecedence());
}
+ case OperatorInfo::NameOnly: {
+ // Not valid as an expression operand.
+ return nullptr;
}
- return nullptr;
- case 't':
- switch (First[1]) {
- case 'e': {
- First += 2;
- Node *Ex = getDerived().parseExpr();
- if (Ex == nullptr)
- return Ex;
- return make<EnclosingExpr>("typeid (", Ex, ")");
}
- case 'i': {
- First += 2;
- Node *Ty = getDerived().parseType();
- if (Ty == nullptr)
- return Ty;
- return make<EnclosingExpr>("typeid (", Ty, ")");
+ DEMANGLE_UNREACHABLE;
+ }
+
+ if (numLeft() < 2)
+ return nullptr;
+
+ if (look() == 'L')
+ return getDerived().parseExprPrimary();
+ if (look() == 'T')
+ return getDerived().parseTemplateParam();
+ if (look() == 'f') {
+ // Disambiguate a fold expression from a <function-param>.
+ if (look(1) == 'p' || (look(1) == 'L' && std::isdigit(look(2))))
+ return getDerived().parseFunctionParam();
+ return getDerived().parseFoldExpr();
+ }
+ if (consumeIf("il")) {
+ size_t InitsBegin = Names.size();
+ while (!consumeIf('E')) {
+ Node *E = getDerived().parseBracedExpr();
+ if (E == nullptr)
+ return nullptr;
+ Names.push_back(E);
}
- case 'l': {
- First += 2;
- Node *Ty = getDerived().parseType();
- if (Ty == nullptr)
+ return make<InitListExpr>(nullptr, popTrailingNodeArray(InitsBegin));
+ }
+ if (consumeIf("mc"))
+ return parsePointerToMemberConversionExpr(Node::Prec::Unary);
+ if (consumeIf("nx")) {
+ Node *Ex = getDerived().parseExpr();
+ if (Ex == nullptr)
+ return Ex;
+ return make<EnclosingExpr>("noexcept ", Ex, Node::Prec::Unary);
+ }
+ if (consumeIf("so"))
+ return parseSubobjectExpr();
+ if (consumeIf("sp")) {
+ Node *Child = getDerived().parseExpr();
+ if (Child == nullptr)
+ return nullptr;
+ return make<ParameterPackExpansion>(Child);
+ }
+ if (consumeIf("sZ")) {
+ if (look() == 'T') {
+ Node *R = getDerived().parseTemplateParam();
+ if (R == nullptr)
return nullptr;
- size_t InitsBegin = Names.size();
- while (!consumeIf('E')) {
- Node *E = getDerived().parseBracedExpr();
- if (E == nullptr)
- return nullptr;
- Names.push_back(E);
- }
- return make<InitListExpr>(Ty, popTrailingNodeArray(InitsBegin));
+ return make<SizeofParamPackExpr>(R);
}
- case 'r':
- First += 2;
- return make<NameType>("throw");
- case 'w': {
- First += 2;
- Node *Ex = getDerived().parseExpr();
- if (Ex == nullptr)
+ Node *FP = getDerived().parseFunctionParam();
+ if (FP == nullptr)
+ return nullptr;
+ return make<EnclosingExpr>("sizeof... ", FP);
+ }
+ if (consumeIf("sP")) {
+ size_t ArgsBegin = Names.size();
+ while (!consumeIf('E')) {
+ Node *Arg = getDerived().parseTemplateArg();
+ if (Arg == nullptr)
return nullptr;
- return make<ThrowExpr>(Ex);
+ Names.push_back(Arg);
}
+ auto *Pack = make<NodeArrayNode>(popTrailingNodeArray(ArgsBegin));
+ if (!Pack)
+ return nullptr;
+ return make<EnclosingExpr>("sizeof... ", Pack);
+ }
+ if (consumeIf("tl")) {
+ Node *Ty = getDerived().parseType();
+ if (Ty == nullptr)
+ return nullptr;
+ size_t InitsBegin = Names.size();
+ while (!consumeIf('E')) {
+ Node *E = getDerived().parseBracedExpr();
+ if (E == nullptr)
+ return nullptr;
+ Names.push_back(E);
}
- return nullptr;
- case 'u': {
- ++First;
+ return make<InitListExpr>(Ty, popTrailingNodeArray(InitsBegin));
+ }
+ if (consumeIf("tr"))
+ return make<NameType>("throw");
+ if (consumeIf("tw")) {
+ Node *Ex = getDerived().parseExpr();
+ if (Ex == nullptr)
+ return nullptr;
+ return make<ThrowExpr>(Ex);
+ }
+ if (consumeIf('u')) {
Node *Name = getDerived().parseSourceName(/*NameState=*/nullptr);
if (!Name)
return nullptr;
@@ -5060,45 +4776,36 @@ Node *AbstractManglingParser<Derived, Alloc>::parseExpr() {
// interpreted as <type> node 'short' or 'ellipsis'. However, neither
// __uuidof(short) nor __uuidof(...) can actually appear, so there is no
// actual conflict here.
+ bool IsUUID = false;
+ Node *UUID = nullptr;
if (Name->getBaseName() == "__uuidof") {
- if (numLeft() < 2)
- return nullptr;
- if (*First == 't') {
- ++First;
- Node *Ty = getDerived().parseType();
- if (!Ty)
- return nullptr;
- return make<CallExpr>(Name, makeNodeArray(&Ty, &Ty + 1));
- }
- if (*First == 'z') {
- ++First;
- Node *Ex = getDerived().parseExpr();
- if (!Ex)
- return nullptr;
- return make<CallExpr>(Name, makeNodeArray(&Ex, &Ex + 1));
+ if (consumeIf('t')) {
+ UUID = getDerived().parseType();
+ IsUUID = true;
+ } else if (consumeIf('z')) {
+ UUID = getDerived().parseExpr();
+ IsUUID = true;
}
}
size_t ExprsBegin = Names.size();
- while (!consumeIf('E')) {
- Node *E = getDerived().parseTemplateArg();
- if (E == nullptr)
- return E;
- Names.push_back(E);
+ if (IsUUID) {
+ if (UUID == nullptr)
+ return nullptr;
+ Names.push_back(UUID);
+ } else {
+ while (!consumeIf('E')) {
+ Node *E = getDerived().parseTemplateArg();
+ if (E == nullptr)
+ return E;
+ Names.push_back(E);
+ }
}
- return make<CallExpr>(Name, popTrailingNodeArray(ExprsBegin));
- }
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- return getDerived().parseUnresolvedName();
+ return make<CallExpr>(Name, popTrailingNodeArray(ExprsBegin),
+ Node::Prec::Postfix);
}
- return nullptr;
+
+ // Only unresolved names remain.
+ return getDerived().parseUnresolvedName(Global);
}
// <call-offset> ::= h <nv-offset> _
@@ -5131,14 +4838,17 @@ bool AbstractManglingParser<Alloc, Derived>::parseCallOffset() {
// # second call-offset is result adjustment
// ::= T <call-offset> <base encoding>
// # base is the nominal target function of thunk
-// ::= GV <object name> # Guard variable for one-time initialization
+// # Guard variable for one-time initialization
+// ::= GV <object name>
// # No <type>
// ::= TW <object name> # Thread-local wrapper
// ::= TH <object name> # Thread-local initialization
// ::= GR <object name> _ # First temporary
// ::= GR <object name> <seq-id> _ # Subsequent temporaries
-// extension ::= TC <first type> <number> _ <second type> # construction vtable for second-in-first
+// # construction vtable for second-in-first
+// extension ::= TC <first type> <number> _ <second type>
// extension ::= GR <object name> # reference temporary for object
+// extension ::= GI <module name> # module global initializer
template <typename Derived, typename Alloc>
Node *AbstractManglingParser<Derived, Alloc>::parseSpecialName() {
switch (look()) {
@@ -5265,6 +4975,16 @@ Node *AbstractManglingParser<Derived, Alloc>::parseSpecialName() {
return nullptr;
return make<SpecialName>("reference temporary for ", Name);
}
+ // GI <module-name> v
+ case 'I': {
+ First += 2;
+ ModuleName *Module = nullptr;
+ if (getDerived().parseModuleNameOpt(Module))
+ return nullptr;
+ if (Module == nullptr)
+ return nullptr;
+ return make<SpecialName>("initializer for module ", Module);
+ }
}
}
return nullptr;
@@ -5379,7 +5099,7 @@ template <>
struct FloatData<long double>
{
#if defined(__mips__) && defined(__mips_n64) || defined(__aarch64__) || \
- defined(__wasm__)
+ defined(__wasm__) || defined(__riscv)
static const size_t mangled_size = 32;
#elif defined(__arm__) || defined(__mips__) || defined(__hexagon__)
static const size_t mangled_size = 16;
@@ -5444,6 +5164,7 @@ bool AbstractManglingParser<Alloc, Derived>::parseSeqId(size_t *Out) {
// <substitution> ::= Si # ::std::basic_istream<char, std::char_traits<char> >
// <substitution> ::= So # ::std::basic_ostream<char, std::char_traits<char> >
// <substitution> ::= Sd # ::std::basic_iostream<char, std::char_traits<char> >
+// The St case is handled specially in parseNestedName.
template <typename Derived, typename Alloc>
Node *AbstractManglingParser<Derived, Alloc>::parseSubstitution() {
if (!consumeIf('S'))
diff --git a/llvm/include/llvm/Demangle/ItaniumNodes.def b/llvm/include/llvm/Demangle/ItaniumNodes.def
new file mode 100644
index 000000000000..c0e277d554cc
--- /dev/null
+++ b/llvm/include/llvm/Demangle/ItaniumNodes.def
@@ -0,0 +1,95 @@
+//===--- ItaniumNodes.def ------------*- mode:c++;eval:(read-only-mode) -*-===//
+// Do not edit! See README.txt.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Define the demangler's node names
+
+#ifndef NODE
+#error Define NODE to handle nodes
+#endif
+
+NODE(NodeArrayNode)
+NODE(DotSuffix)
+NODE(VendorExtQualType)
+NODE(QualType)
+NODE(ConversionOperatorType)
+NODE(PostfixQualifiedType)
+NODE(ElaboratedTypeSpefType)
+NODE(NameType)
+NODE(AbiTagAttr)
+NODE(EnableIfAttr)
+NODE(ObjCProtoName)
+NODE(PointerType)
+NODE(ReferenceType)
+NODE(PointerToMemberType)
+NODE(ArrayType)
+NODE(FunctionType)
+NODE(NoexceptSpec)
+NODE(DynamicExceptionSpec)
+NODE(FunctionEncoding)
+NODE(LiteralOperator)
+NODE(SpecialName)
+NODE(CtorVtableSpecialName)
+NODE(QualifiedName)
+NODE(NestedName)
+NODE(LocalName)
+NODE(ModuleName)
+NODE(ModuleEntity)
+NODE(VectorType)
+NODE(PixelVectorType)
+NODE(BinaryFPType)
+NODE(BitIntType)
+NODE(SyntheticTemplateParamName)
+NODE(TypeTemplateParamDecl)
+NODE(NonTypeTemplateParamDecl)
+NODE(TemplateTemplateParamDecl)
+NODE(TemplateParamPackDecl)
+NODE(ParameterPack)
+NODE(TemplateArgumentPack)
+NODE(ParameterPackExpansion)
+NODE(TemplateArgs)
+NODE(ForwardTemplateReference)
+NODE(NameWithTemplateArgs)
+NODE(GlobalQualifiedName)
+NODE(ExpandedSpecialSubstitution)
+NODE(SpecialSubstitution)
+NODE(CtorDtorName)
+NODE(DtorName)
+NODE(UnnamedTypeName)
+NODE(ClosureTypeName)
+NODE(StructuredBindingName)
+NODE(BinaryExpr)
+NODE(ArraySubscriptExpr)
+NODE(PostfixExpr)
+NODE(ConditionalExpr)
+NODE(MemberExpr)
+NODE(SubobjectExpr)
+NODE(EnclosingExpr)
+NODE(CastExpr)
+NODE(SizeofParamPackExpr)
+NODE(CallExpr)
+NODE(NewExpr)
+NODE(DeleteExpr)
+NODE(PrefixExpr)
+NODE(FunctionParam)
+NODE(ConversionExpr)
+NODE(PointerToMemberConversionExpr)
+NODE(InitListExpr)
+NODE(FoldExpr)
+NODE(ThrowExpr)
+NODE(BoolExpr)
+NODE(StringLiteral)
+NODE(LambdaExpr)
+NODE(EnumLiteral)
+NODE(IntegerLiteral)
+NODE(FloatLiteral)
+NODE(DoubleLiteral)
+NODE(LongDoubleLiteral)
+NODE(BracedExpr)
+NODE(BracedRangeExpr)
+
+#undef NODE
diff --git a/llvm/include/llvm/Demangle/Utility.h b/llvm/include/llvm/Demangle/Utility.h
index 1cf7e8f1df45..ca7e44b948c7 100644
--- a/llvm/include/llvm/Demangle/Utility.h
+++ b/llvm/include/llvm/Demangle/Utility.h
@@ -33,43 +33,50 @@ class OutputBuffer {
size_t CurrentPosition = 0;
size_t BufferCapacity = 0;
- // Ensure there is at least n more positions in buffer.
+ // Ensure there are at least N more positions in the buffer.
void grow(size_t N) {
- if (N + CurrentPosition >= BufferCapacity) {
+ size_t Need = N + CurrentPosition;
+ if (Need > BufferCapacity) {
+ // Reduce the number of reallocations, with a bit of hysteresis. The
+ // number here is chosen so the first allocation will more-than-likely not
+ // allocate more than 1K.
+ Need += 1024 - 32;
BufferCapacity *= 2;
- if (BufferCapacity < N + CurrentPosition)
- BufferCapacity = N + CurrentPosition;
+ if (BufferCapacity < Need)
+ BufferCapacity = Need;
Buffer = static_cast<char *>(std::realloc(Buffer, BufferCapacity));
if (Buffer == nullptr)
std::terminate();
}
}
- void writeUnsigned(uint64_t N, bool isNeg = false) {
- // Handle special case...
- if (N == 0) {
- *this << '0';
- return;
- }
-
+ OutputBuffer &writeUnsigned(uint64_t N, bool isNeg = false) {
std::array<char, 21> Temp;
char *TempPtr = Temp.data() + Temp.size();
- while (N) {
+ // Output at least one character.
+ do {
*--TempPtr = char('0' + N % 10);
N /= 10;
- }
+ } while (N);
- // Add negative sign...
+ // Add negative sign.
if (isNeg)
*--TempPtr = '-';
- this->operator<<(StringView(TempPtr, Temp.data() + Temp.size()));
+
+ return operator+=(StringView(TempPtr, Temp.data() + Temp.size()));
}
public:
OutputBuffer(char *StartBuf, size_t Size)
: Buffer(StartBuf), CurrentPosition(0), BufferCapacity(Size) {}
OutputBuffer() = default;
+ // Non-copyable
+ OutputBuffer(const OutputBuffer &) = delete;
+ OutputBuffer &operator=(const OutputBuffer &) = delete;
+
+ operator StringView() const { return StringView(Buffer, CurrentPosition); }
+
void reset(char *Buffer_, size_t BufferCapacity_) {
CurrentPosition = 0;
Buffer = Buffer_;
@@ -81,13 +88,27 @@ public:
unsigned CurrentPackIndex = std::numeric_limits<unsigned>::max();
unsigned CurrentPackMax = std::numeric_limits<unsigned>::max();
+ /// When zero, we're printing template args and '>' needs to be parenthesized.
+ /// Use a counter so we can simply increment inside parentheses.
+ unsigned GtIsGt = 1;
+
+ bool isGtInsideTemplateArgs() const { return GtIsGt == 0; }
+
+ void printOpen(char Open = '(') {
+ GtIsGt++;
+ *this += Open;
+ }
+ void printClose(char Close = ')') {
+ GtIsGt--;
+ *this += Close;
+ }
+
OutputBuffer &operator+=(StringView R) {
- size_t Size = R.size();
- if (Size == 0)
- return *this;
- grow(Size);
- std::memmove(Buffer + CurrentPosition, R.begin(), Size);
- CurrentPosition += Size;
+ if (size_t Size = R.size()) {
+ grow(Size);
+ std::memcpy(Buffer + CurrentPosition, R.begin(), Size);
+ CurrentPosition += Size;
+ }
return *this;
}
@@ -97,9 +118,7 @@ public:
return *this;
}
- OutputBuffer &operator<<(StringView R) { return (*this += R); }
-
- OutputBuffer prepend(StringView R) {
+ OutputBuffer &prepend(StringView R) {
size_t Size = R.size();
grow(Size);
@@ -110,19 +129,16 @@ public:
return *this;
}
+ OutputBuffer &operator<<(StringView R) { return (*this += R); }
+
OutputBuffer &operator<<(char C) { return (*this += C); }
OutputBuffer &operator<<(long long N) {
- if (N < 0)
- writeUnsigned(static_cast<unsigned long long>(-N), true);
- else
- writeUnsigned(static_cast<unsigned long long>(N));
- return *this;
+ return writeUnsigned(static_cast<unsigned long long>(std::abs(N)), N < 0);
}
OutputBuffer &operator<<(unsigned long long N) {
- writeUnsigned(N, false);
- return *this;
+ return writeUnsigned(N, false);
}
OutputBuffer &operator<<(long N) {
@@ -155,7 +171,8 @@ public:
void setCurrentPosition(size_t NewPos) { CurrentPosition = NewPos; }
char back() const {
- return CurrentPosition ? Buffer[CurrentPosition - 1] : '\0';
+ assert(CurrentPosition);
+ return Buffer[CurrentPosition - 1];
}
bool empty() const { return CurrentPosition == 0; }
@@ -165,35 +182,20 @@ public:
size_t getBufferCapacity() const { return BufferCapacity; }
};
-template <class T> class SwapAndRestore {
- T &Restore;
- T OriginalValue;
- bool ShouldRestore = true;
+template <class T> class ScopedOverride {
+ T &Loc;
+ T Original;
public:
- SwapAndRestore(T &Restore_) : SwapAndRestore(Restore_, Restore_) {}
-
- SwapAndRestore(T &Restore_, T NewVal)
- : Restore(Restore_), OriginalValue(Restore) {
- Restore = std::move(NewVal);
- }
- ~SwapAndRestore() {
- if (ShouldRestore)
- Restore = std::move(OriginalValue);
- }
-
- void shouldRestore(bool ShouldRestore_) { ShouldRestore = ShouldRestore_; }
-
- void restoreNow(bool Force) {
- if (!Force && !ShouldRestore)
- return;
+ ScopedOverride(T &Loc_) : ScopedOverride(Loc_, Loc_) {}
- Restore = std::move(OriginalValue);
- ShouldRestore = false;
+ ScopedOverride(T &Loc_, T NewVal) : Loc(Loc_), Original(Loc_) {
+ Loc_ = std::move(NewVal);
}
+ ~ScopedOverride() { Loc = std::move(Original); }
- SwapAndRestore(const SwapAndRestore &) = delete;
- SwapAndRestore &operator=(const SwapAndRestore &) = delete;
+ ScopedOverride(const ScopedOverride &) = delete;
+ ScopedOverride &operator=(const ScopedOverride &) = delete;
};
inline bool initializeOutputBuffer(char *Buf, size_t *N, OutputBuffer &OB,
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.h b/llvm/include/llvm/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.h
new file mode 100644
index 000000000000..d748d4b0fa59
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.h
@@ -0,0 +1,35 @@
+//===--------- DWARFRecordSectionSplitter.h - JITLink -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_JITLINK_DWARFRECORDSECTIONSPLITTER_H
+#define LLVM_EXECUTIONENGINE_JITLINK_DWARFRECORDSECTIONSPLITTER_H
+
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+
+namespace llvm {
+namespace jitlink {
+
+/// A LinkGraph pass that splits blocks in a section that follows the DWARF
+/// Record format into sub-blocks where each header gets its own block.
+/// When splitting EHFrames, DWARFRecordSectionSplitter should not be run
+/// without EHFrameEdgeFixer, which is responsible for adding FDE-to-CIE edges.
+class DWARFRecordSectionSplitter {
+public:
+ DWARFRecordSectionSplitter(StringRef SectionName);
+ Error operator()(LinkGraph &G);
+
+private:
+ Error processBlock(LinkGraph &G, Block &B, LinkGraph::SplitBlockCache &Cache);
+
+ StringRef SectionName;
+};
+
+} // namespace jitlink
+} // namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_JITLINK_DWARFRECORDSECTIONSPLITTER_H
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
index 25f1349f15f2..897808c0ee83 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
@@ -223,6 +223,11 @@ public:
/// Returns the size of this defined addressable.
size_t getSize() const { return Size; }
+ /// Returns the address range of this defined addressable.
+ orc::ExecutorAddrRange getRange() const {
+ return orc::ExecutorAddrRange(getAddress(), getSize());
+ }
+
/// Get the content for this block. Block must not be a zero-fill block.
ArrayRef<char> getContent() const {
assert(Data && "Block does not contain content");
@@ -576,6 +581,11 @@ public:
this->Size = Size;
}
+ /// Returns the address range of this symbol.
+ orc::ExecutorAddrRange getRange() const {
+ return orc::ExecutorAddrRange(getAddress(), getSize());
+ }
+
/// Returns true if this symbol is backed by a zero-fill block.
/// This method may only be called on defined symbols.
bool isSymbolZeroFill() const { return getBlock().isZeroFill(); }
@@ -1215,8 +1225,11 @@ public:
/// Make the given symbol an absolute with the given address (must not already
/// be absolute).
///
- /// Symbol size, linkage, scope, and callability, and liveness will be left
- /// unchanged. Symbol offset will be reset to 0.
+ /// The symbol's size, linkage, and callability, and liveness will be left
+ /// unchanged, and its offset will be reset to 0.
+ ///
+ /// If the symbol was external then its scope will be set to local, otherwise
+ /// it will be left unchanged.
void makeAbsolute(Symbol &Sym, orc::ExecutorAddr Address) {
assert(!Sym.isAbsolute() && "Symbol is already absolute");
if (Sym.isExternal()) {
@@ -1225,6 +1238,7 @@ public:
assert(Sym.getOffset() == 0 && "External is not at offset 0");
ExternalSymbols.erase(&Sym);
Sym.getAddressable().setAbsolute(true);
+ Sym.setScope(Scope::Local);
} else {
assert(Sym.isDefined() && "Sym is not a defined symbol");
Section &Sec = Sym.getBlock().getSection();
@@ -1733,6 +1747,9 @@ Error markAllSymbolsLive(LinkGraph &G);
Error makeTargetOutOfRangeError(const LinkGraph &G, const Block &B,
const Edge &E);
+Error makeAlignmentError(llvm::orc::ExecutorAddr Loc, uint64_t Value, int N,
+ const Edge &E);
+
/// Base case for edge-visitors where the visitor-list is empty.
inline void visitEdge(LinkGraph &G, Block *B, Edge &E) {}
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h b/llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h
index aee14c0d1fe5..6f2ff012697d 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h
@@ -18,30 +18,6 @@
namespace llvm {
namespace jitlink {
-namespace MachO_arm64_Edges {
-
-enum MachOARM64RelocationKind : Edge::Kind {
- Branch26 = Edge::FirstRelocation,
- Pointer32,
- Pointer64,
- Pointer64Anon,
- Page21,
- PageOffset12,
- GOTPage21,
- GOTPageOffset12,
- TLVPage21,
- TLVPageOffset12,
- PointerToGOT,
- PairedAddend,
- LDRLiteral19,
- Delta32,
- Delta64,
- NegDelta32,
- NegDelta64,
-};
-
-} // namespace MachO_arm64_Edges
-
/// Create a LinkGraph from a MachO/arm64 relocatable object.
///
/// Note: The graph does not take ownership of the underlying buffer, nor copy
@@ -62,9 +38,6 @@ createLinkGraphFromMachOObject_arm64(MemoryBufferRef ObjectBuffer);
void link_MachO_arm64(std::unique_ptr<LinkGraph> G,
std::unique_ptr<JITLinkContext> Ctx);
-/// Return the string name of the given MachO arm64 edge kind.
-const char *getMachOARM64RelocationKindName(Edge::Kind R);
-
} // end namespace jitlink
} // end namespace llvm
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/MemoryFlags.h b/llvm/include/llvm/ExecutionEngine/JITLink/MemoryFlags.h
index e9771319ef06..a18098e5a1a9 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/MemoryFlags.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/MemoryFlags.h
@@ -152,13 +152,9 @@ public:
using iterator = typename VectorTy::iterator;
AllocGroupSmallMap() = default;
- AllocGroupSmallMap(std::initializer_list<std::pair<AllocGroup, T>> Inits) {
- Elems.reserve(Inits.size());
- for (const auto &E : Inits)
- Elems.push_back(E);
- llvm::sort(Elems, [](const ElemT &LHS, const ElemT &RHS) {
- return LHS.first < RHS.first;
- });
+ AllocGroupSmallMap(std::initializer_list<std::pair<AllocGroup, T>> Inits)
+ : Elems(Inits) {
+ llvm::sort(Elems, llvm::less_first());
}
iterator begin() { return Elems.begin(); }
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h b/llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h
index 994ce783b058..53ff6c7a219e 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h
@@ -13,24 +13,353 @@
#ifndef LLVM_EXECUTIONENGINE_JITLINK_AARCH64_H
#define LLVM_EXECUTIONENGINE_JITLINK_AARCH64_H
+#include "TableManager.h"
#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+#include "llvm/ExecutionEngine/JITLink/MemoryFlags.h"
namespace llvm {
namespace jitlink {
namespace aarch64 {
-/// Represets aarch64 fixups
enum EdgeKind_aarch64 : Edge::Kind {
-
- /// Set a CALL immediate field to bits [27:2] of X = Target - Fixup + Addend
- R_AARCH64_CALL26 = Edge::FirstRelocation,
-
+ Branch26 = Edge::FirstRelocation,
+ Pointer32,
+ Pointer64,
+ Pointer64Anon,
+ Page21,
+ PageOffset12,
+ MoveWide16,
+ GOTPage21,
+ GOTPageOffset12,
+ TLVPage21,
+ TLVPageOffset12,
+ PointerToGOT,
+ PairedAddend,
+ LDRLiteral19,
+ Delta32,
+ Delta64,
+ NegDelta32,
+ NegDelta64,
};
/// Returns a string name for the given aarch64 edge. For debugging purposes
/// only
const char *getEdgeKindName(Edge::Kind K);
+// Returns whether the Instr is LD/ST (imm12)
+inline bool isLoadStoreImm12(uint32_t Instr) {
+ constexpr uint32_t LoadStoreImm12Mask = 0x3b000000;
+ return (Instr & LoadStoreImm12Mask) == 0x39000000;
+}
+
+// Returns the amount the address operand of LD/ST (imm12)
+// should be shifted right by.
+//
+// The shift value varies by the data size of LD/ST instruction.
+// For instance, LDH instructoin needs the address to be shifted
+// right by 1.
+inline unsigned getPageOffset12Shift(uint32_t Instr) {
+ constexpr uint32_t Vec128Mask = 0x04800000;
+
+ if (isLoadStoreImm12(Instr)) {
+ uint32_t ImplicitShift = Instr >> 30;
+ if (ImplicitShift == 0)
+ if ((Instr & Vec128Mask) == Vec128Mask)
+ ImplicitShift = 4;
+
+ return ImplicitShift;
+ }
+
+ return 0;
+}
+
+// Returns whether the Instr is MOVK/MOVZ (imm16) with a zero immediate field
+inline bool isMoveWideImm16(uint32_t Instr) {
+ constexpr uint32_t MoveWideImm16Mask = 0x5f9fffe0;
+ return (Instr & MoveWideImm16Mask) == 0x52800000;
+}
+
+// Returns the amount the address operand of MOVK/MOVZ (imm16)
+// should be shifted right by.
+//
+// The shift value is specfied in the assembly as LSL #<shift>.
+inline unsigned getMoveWide16Shift(uint32_t Instr) {
+ if (isMoveWideImm16(Instr)) {
+ uint32_t ImplicitShift = (Instr >> 21) & 0b11;
+ return ImplicitShift << 4;
+ }
+
+ return 0;
+}
+
+/// Apply fixup expression for edge to block content.
+inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E) {
+ using namespace support;
+
+ char *BlockWorkingMem = B.getAlreadyMutableContent().data();
+ char *FixupPtr = BlockWorkingMem + E.getOffset();
+ orc::ExecutorAddr FixupAddress = B.getAddress() + E.getOffset();
+
+ switch (E.getKind()) {
+ case Branch26: {
+ assert((FixupAddress.getValue() & 0x3) == 0 &&
+ "Branch-inst is not 32-bit aligned");
+
+ int64_t Value = E.getTarget().getAddress() - FixupAddress + E.getAddend();
+
+ if (static_cast<uint64_t>(Value) & 0x3)
+ return make_error<JITLinkError>("Branch26 target is not 32-bit "
+ "aligned");
+
+ if (Value < -(1 << 27) || Value > ((1 << 27) - 1))
+ return makeTargetOutOfRangeError(G, B, E);
+
+ uint32_t RawInstr = *(little32_t *)FixupPtr;
+ assert((RawInstr & 0x7fffffff) == 0x14000000 &&
+ "RawInstr isn't a B or BR immediate instruction");
+ uint32_t Imm = (static_cast<uint32_t>(Value) & ((1 << 28) - 1)) >> 2;
+ uint32_t FixedInstr = RawInstr | Imm;
+ *(little32_t *)FixupPtr = FixedInstr;
+ break;
+ }
+ case Pointer32: {
+ uint64_t Value = E.getTarget().getAddress().getValue() + E.getAddend();
+ if (Value > std::numeric_limits<uint32_t>::max())
+ return makeTargetOutOfRangeError(G, B, E);
+ *(ulittle32_t *)FixupPtr = Value;
+ break;
+ }
+ case Pointer64:
+ case Pointer64Anon: {
+ uint64_t Value = E.getTarget().getAddress().getValue() + E.getAddend();
+ *(ulittle64_t *)FixupPtr = Value;
+ break;
+ }
+ case Page21: {
+ assert((E.getKind() != GOTPage21 || E.getAddend() == 0) &&
+ "GOTPAGE21 with non-zero addend");
+ uint64_t TargetPage =
+ (E.getTarget().getAddress().getValue() + E.getAddend()) &
+ ~static_cast<uint64_t>(4096 - 1);
+ uint64_t PCPage =
+ FixupAddress.getValue() & ~static_cast<uint64_t>(4096 - 1);
+
+ int64_t PageDelta = TargetPage - PCPage;
+ if (!isInt<33>(PageDelta))
+ return makeTargetOutOfRangeError(G, B, E);
+
+ uint32_t RawInstr = *(ulittle32_t *)FixupPtr;
+ assert((RawInstr & 0xffffffe0) == 0x90000000 &&
+ "RawInstr isn't an ADRP instruction");
+ uint32_t ImmLo = (static_cast<uint64_t>(PageDelta) >> 12) & 0x3;
+ uint32_t ImmHi = (static_cast<uint64_t>(PageDelta) >> 14) & 0x7ffff;
+ uint32_t FixedInstr = RawInstr | (ImmLo << 29) | (ImmHi << 5);
+ *(ulittle32_t *)FixupPtr = FixedInstr;
+ break;
+ }
+ case PageOffset12: {
+ uint64_t TargetOffset =
+ (E.getTarget().getAddress() + E.getAddend()).getValue() & 0xfff;
+
+ uint32_t RawInstr = *(ulittle32_t *)FixupPtr;
+ unsigned ImmShift = getPageOffset12Shift(RawInstr);
+
+ if (TargetOffset & ((1 << ImmShift) - 1))
+ return make_error<JITLinkError>("PAGEOFF12 target is not aligned");
+
+ uint32_t EncodedImm = (TargetOffset >> ImmShift) << 10;
+ uint32_t FixedInstr = RawInstr | EncodedImm;
+ *(ulittle32_t *)FixupPtr = FixedInstr;
+ break;
+ }
+ case MoveWide16: {
+ uint64_t TargetOffset =
+ (E.getTarget().getAddress() + E.getAddend()).getValue();
+
+ uint32_t RawInstr = *(ulittle32_t *)FixupPtr;
+ assert(isMoveWideImm16(RawInstr) &&
+ "RawInstr isn't a MOVK/MOVZ instruction");
+
+ unsigned ImmShift = getMoveWide16Shift(RawInstr);
+ uint32_t Imm = (TargetOffset >> ImmShift) & 0xffff;
+ uint32_t FixedInstr = RawInstr | (Imm << 5);
+ *(ulittle32_t *)FixupPtr = FixedInstr;
+ break;
+ }
+ case LDRLiteral19: {
+ assert((FixupAddress.getValue() & 0x3) == 0 && "LDR is not 32-bit aligned");
+ assert(E.getAddend() == 0 && "LDRLiteral19 with non-zero addend");
+ uint32_t RawInstr = *(ulittle32_t *)FixupPtr;
+ assert(RawInstr == 0x58000010 && "RawInstr isn't a 64-bit LDR literal");
+ int64_t Delta = E.getTarget().getAddress() - FixupAddress;
+ if (Delta & 0x3)
+ return make_error<JITLinkError>("LDR literal target is not 32-bit "
+ "aligned");
+ if (Delta < -(1 << 20) || Delta > ((1 << 20) - 1))
+ return makeTargetOutOfRangeError(G, B, E);
+
+ uint32_t EncodedImm = ((static_cast<uint32_t>(Delta) >> 2) & 0x7ffff) << 5;
+ uint32_t FixedInstr = RawInstr | EncodedImm;
+ *(ulittle32_t *)FixupPtr = FixedInstr;
+ break;
+ }
+ case Delta32:
+ case Delta64:
+ case NegDelta32:
+ case NegDelta64: {
+ int64_t Value;
+ if (E.getKind() == Delta32 || E.getKind() == Delta64)
+ Value = E.getTarget().getAddress() - FixupAddress + E.getAddend();
+ else
+ Value = FixupAddress - E.getTarget().getAddress() + E.getAddend();
+
+ if (E.getKind() == Delta32 || E.getKind() == NegDelta32) {
+ if (Value < std::numeric_limits<int32_t>::min() ||
+ Value > std::numeric_limits<int32_t>::max())
+ return makeTargetOutOfRangeError(G, B, E);
+ *(little32_t *)FixupPtr = Value;
+ } else
+ *(little64_t *)FixupPtr = Value;
+ break;
+ }
+ case TLVPage21:
+ case GOTPage21:
+ case TLVPageOffset12:
+ case GOTPageOffset12:
+ case PointerToGOT: {
+ return make_error<JITLinkError>(
+ "In graph " + G.getName() + ", section " + B.getSection().getName() +
+ "GOT/TLV edge kinds not lowered: " + getEdgeKindName(E.getKind()));
+ }
+ default:
+ return make_error<JITLinkError>(
+ "In graph " + G.getName() + ", section " + B.getSection().getName() +
+ "unsupported edge kind" + getEdgeKindName(E.getKind()));
+ }
+
+ return Error::success();
+}
+
+/// AArch64 null pointer content.
+extern const uint8_t NullGOTEntryContent[8];
+
+/// AArch64 PLT stub content.
+extern const uint8_t StubContent[8];
+
+/// Global Offset Table Builder.
+class GOTTableManager : public TableManager<GOTTableManager> {
+public:
+ static StringRef getSectionName() { return "$__GOT"; }
+
+ bool visitEdge(LinkGraph &G, Block *B, Edge &E) {
+ Edge::Kind KindToSet = Edge::Invalid;
+ const char *BlockWorkingMem = B->getContent().data();
+ const char *FixupPtr = BlockWorkingMem + E.getOffset();
+
+ switch (E.getKind()) {
+ case aarch64::GOTPage21:
+ case aarch64::TLVPage21: {
+ KindToSet = aarch64::Page21;
+ break;
+ }
+ case aarch64::GOTPageOffset12:
+ case aarch64::TLVPageOffset12: {
+ KindToSet = aarch64::PageOffset12;
+ uint32_t RawInstr = *(const support::ulittle32_t *)FixupPtr;
+ (void)RawInstr;
+ assert(E.getAddend() == 0 &&
+ "GOTPageOffset12/TLVPageOffset12 with non-zero addend");
+ assert((RawInstr & 0xfffffc00) == 0xf9400000 &&
+ "RawInstr isn't a 64-bit LDR immediate");
+ break;
+ }
+ case aarch64::PointerToGOT: {
+ KindToSet = aarch64::Delta64;
+ break;
+ }
+ default:
+ return false;
+ }
+ assert(KindToSet != Edge::Invalid &&
+ "Fell through switch, but no new kind to set");
+ DEBUG_WITH_TYPE("jitlink", {
+ dbgs() << " Fixing " << G.getEdgeKindName(E.getKind()) << " edge at "
+ << B->getFixupAddress(E) << " (" << B->getAddress() << " + "
+ << formatv("{0:x}", E.getOffset()) << ")\n";
+ });
+ E.setKind(KindToSet);
+ E.setTarget(getEntryForTarget(G, E.getTarget()));
+ return true;
+ }
+
+ Symbol &createEntry(LinkGraph &G, Symbol &Target) {
+ auto &GOTEntryBlock = G.createContentBlock(
+ getGOTSection(G), getGOTEntryBlockContent(), orc::ExecutorAddr(), 8, 0);
+ GOTEntryBlock.addEdge(aarch64::Pointer64, 0, Target, 0);
+ return G.addAnonymousSymbol(GOTEntryBlock, 0, 8, false, false);
+ }
+
+private:
+ Section &getGOTSection(LinkGraph &G) {
+ if (!GOTSection)
+ GOTSection =
+ &G.createSection(getSectionName(), MemProt::Read | MemProt::Exec);
+ return *GOTSection;
+ }
+
+ ArrayRef<char> getGOTEntryBlockContent() {
+ return {reinterpret_cast<const char *>(NullGOTEntryContent),
+ sizeof(NullGOTEntryContent)};
+ }
+
+ Section *GOTSection = nullptr;
+};
+
+/// Procedure Linkage Table Builder.
+class PLTTableManager : public TableManager<PLTTableManager> {
+public:
+ PLTTableManager(GOTTableManager &GOT) : GOT(GOT) {}
+
+ static StringRef getSectionName() { return "$__STUBS"; }
+
+ bool visitEdge(LinkGraph &G, Block *B, Edge &E) {
+ if (E.getKind() == aarch64::Branch26 && !E.getTarget().isDefined()) {
+ DEBUG_WITH_TYPE("jitlink", {
+ dbgs() << " Fixing " << G.getEdgeKindName(E.getKind()) << " edge at "
+ << B->getFixupAddress(E) << " (" << B->getAddress() << " + "
+ << formatv("{0:x}", E.getOffset()) << ")\n";
+ });
+ E.setTarget(getEntryForTarget(G, E.getTarget()));
+ return true;
+ }
+ return false;
+ }
+
+ Symbol &createEntry(LinkGraph &G, Symbol &Target) {
+ auto &StubContentBlock = G.createContentBlock(
+ getStubsSection(G), getStubBlockContent(), orc::ExecutorAddr(), 1, 0);
+ // Re-use GOT entries for stub targets.
+ auto &GOTEntrySymbol = GOT.getEntryForTarget(G, Target);
+ StubContentBlock.addEdge(aarch64::LDRLiteral19, 0, GOTEntrySymbol, 0);
+ return G.addAnonymousSymbol(StubContentBlock, 0, 8, true, false);
+ }
+
+public:
+ Section &getStubsSection(LinkGraph &G) {
+ if (!StubsSection)
+ StubsSection =
+ &G.createSection(getSectionName(), MemProt::Read | MemProt::Exec);
+ return *StubsSection;
+ }
+
+ ArrayRef<char> getStubBlockContent() {
+ return {reinterpret_cast<const char *>(StubContent), sizeof(StubContent)};
+ }
+
+ GOTTableManager &GOT;
+ Section *StubsSection = nullptr;
+};
+
} // namespace aarch64
} // namespace jitlink
} // namespace llvm
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h b/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h
index 5abd4cf11dea..95f45fae91e4 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h
@@ -37,13 +37,20 @@ enum EdgeKind_riscv : Edge::Kind {
///
R_RISCV_64,
- /// Low 12 bits of PC-relative branch pointer value relocation
+ /// PC-relative branch pointer value relocation
///
/// Fixup expression:
- /// Fixup <- (Target - Fixup + Addend) & 0xFFF
+ /// Fixup <- (Target - Fixup + Addend)
///
R_RISCV_BRANCH,
+ /// High 20 bits of PC-relative jump pointer value relocation
+ ///
+ /// Fixup expression:
+ /// Fixup <- Target - Fixup + Addend
+ ///
+ R_RISCV_JAL,
+
/// High 20 bits of 32-bit pointer value relocation
///
/// Fixup expression
@@ -145,6 +152,12 @@ enum EdgeKind_riscv : Edge::Kind {
/// Fixup <- (Target - *{1}Fixup - Addend)
R_RISCV_SUB8,
+ /// 6 bits label subtraction
+ ///
+ /// Fixup expression
+ /// Fixup <- (Target - *{1}Fixup - Addend)
+ R_RISCV_SUB6,
+
/// Local label assignment
///
/// Fixup expression:
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h b/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h
index 4a4e8d15be66..9a2bc9b09350 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h
@@ -447,11 +447,10 @@ inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E,
break;
}
- default: {
- // If you hit this you should check that *constructor and other non-fixup
- // edges have been removed prior to applying fixups.
- llvm_unreachable("Graph contains edge kind with no fixup expression");
- }
+ default:
+ return make_error<JITLinkError>(
+ "In graph " + G.getName() + ", section " + B.getSection().getName() +
+ "unsupported edge kind" + getEdgeKindName(E.getKind()));
}
return Error::success();
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h
index c4647148f287..df2826b50784 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h
@@ -339,11 +339,7 @@ public:
/// Sort the lookup set by pointer value. This sort is fast but sensitive to
/// allocation order and so should not be used where a consistent order is
/// required.
- void sortByAddress() {
- llvm::sort(Symbols, [](const value_type &LHS, const value_type &RHS) {
- return LHS.first < RHS.first;
- });
- }
+ void sortByAddress() { llvm::sort(Symbols, llvm::less_first()); }
/// Sort the lookup set lexicographically. This sort is slow but the order
/// is unaffected by allocation order.
@@ -420,12 +416,15 @@ class FailedToMaterialize : public ErrorInfo<FailedToMaterialize> {
public:
static char ID;
- FailedToMaterialize(std::shared_ptr<SymbolDependenceMap> Symbols);
+ FailedToMaterialize(std::shared_ptr<SymbolStringPool> SSP,
+ std::shared_ptr<SymbolDependenceMap> Symbols);
+ ~FailedToMaterialize();
std::error_code convertToErrorCode() const override;
void log(raw_ostream &OS) const override;
const SymbolDependenceMap &getSymbols() const { return *Symbols; }
private:
+ std::shared_ptr<SymbolStringPool> SSP;
std::shared_ptr<SymbolDependenceMap> Symbols;
};
@@ -1331,7 +1330,7 @@ public:
lookupInitSymbols(ExecutionSession &ES,
const DenseMap<JITDylib *, SymbolLookupSet> &InitSyms);
- /// Performs an async lookup for the the given symbols in each of the given
+ /// Performs an async lookup for the given symbols in each of the given
/// JITDylibs, calling the given handler once all lookups have completed.
static void
lookupInitSymbolsAsync(unique_function<void(Error)> OnComplete,
@@ -1389,8 +1388,12 @@ public:
/// object.
ExecutionSession(std::unique_ptr<ExecutorProcessControl> EPC);
+ /// Destroy an ExecutionSession. Verifies that endSession was called prior to
+ /// destruction.
+ ~ExecutionSession();
+
/// End the session. Closes all JITDylibs and disconnects from the
- /// executor.
+ /// executor. Clients must call this method before destroying the session.
Error endSession();
/// Get the ExecutorProcessControl object associated with this
@@ -1523,7 +1526,7 @@ public:
/// after resolution, the function will return a success value, but the
/// error will be reported via reportErrors.
Expected<SymbolMap> lookup(const JITDylibSearchOrder &SearchOrder,
- const SymbolLookupSet &Symbols,
+ SymbolLookupSet Symbols,
LookupKind K = LookupKind::Static,
SymbolState RequiredState = SymbolState::Ready,
RegisterDependenciesFunction RegisterDependencies =
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/DebugUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/DebugUtils.h
index 7eb98dfc741e..c4ef06f1fbc6 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/DebugUtils.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/DebugUtils.h
@@ -92,6 +92,9 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolState &S);
/// Render a LookupKind.
raw_ostream &operator<<(raw_ostream &OS, const LookupKind &K);
+/// Dump a SymbolStringPool. Useful for debugging dangling-pointer crashes.
+raw_ostream &operator<<(raw_ostream &OS, const SymbolStringPool &SSP);
+
/// A function object that can be used as an ObjectTransformLayer transform
/// to dump object files to disk at a specified path.
class DumpObjects {
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h b/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h
index 6b12fe990a8a..3804b6dda91f 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h
@@ -109,7 +109,8 @@ public:
/// Returns an AliasMap containing the default aliases for the ELFNixPlatform.
/// This can be modified by clients when constructing the platform to add
/// or remove aliases.
- static SymbolAliasMap standardPlatformAliases(ExecutionSession &ES);
+ static Expected<SymbolAliasMap> standardPlatformAliases(ExecutionSession &ES,
+ JITDylib &PlatformJD);
/// Returns the array of required CXX aliases.
static ArrayRef<std::pair<const char *, const char *>> requiredCXXAliases();
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h
index ac7051b5b75c..241453320ad5 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h
@@ -23,8 +23,6 @@
#include <memory>
#include <vector>
-using namespace llvm::orc::shared;
-
namespace llvm {
namespace orc {
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h
index 92de5882bafe..354984b540a9 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h
@@ -148,7 +148,7 @@ private:
std::mutex EPCUIMutex;
ExecutorProcessControl &EPC;
std::unique_ptr<ABISupport> ABI;
- JITTargetAddress ResolverBlockAddr;
+ JITTargetAddress ResolverBlockAddr = 0;
FinalizedAlloc ResolverBlock;
std::unique_ptr<TrampolinePool> TP;
std::unique_ptr<LazyCallThroughManager> LCTM;
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h b/llvm/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h
index 2cc8c29b2813..e6a63707653a 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h
@@ -125,7 +125,7 @@ public:
/// Set TargetOptions.
///
/// Note: This operation will overwrite any previously configured options,
- /// including EmulatedTLS and ExplicitEmulatedTLS which
+ /// including EmulatedTLS, ExplicitEmulatedTLS, and UseInitArray which
/// the JITTargetMachineBuilder sets by default. Clients are responsible
/// for re-enabling these overwritten options.
JITTargetMachineBuilder &setOptions(TargetOptions Options) {
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h b/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h
index d76e6a21a9bb..d67a7f2bfeb2 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h
@@ -56,7 +56,7 @@ public:
/// Destruct this instance. If a multi-threaded instance, waits for all
/// compile threads to complete.
- ~LLJIT();
+ virtual ~LLJIT();
/// Returns the ExecutionSession for this instance.
ExecutionSession &getExecutionSession() { return *ES; }
@@ -110,30 +110,30 @@ public:
/// Look up a symbol in JITDylib JD by the symbol's linker-mangled name (to
/// look up symbols based on their IR name use the lookup function instead).
- Expected<JITEvaluatedSymbol> lookupLinkerMangled(JITDylib &JD,
- SymbolStringPtr Name);
+ Expected<ExecutorAddr> lookupLinkerMangled(JITDylib &JD,
+ SymbolStringPtr Name);
/// Look up a symbol in JITDylib JD by the symbol's linker-mangled name (to
/// look up symbols based on their IR name use the lookup function instead).
- Expected<JITEvaluatedSymbol> lookupLinkerMangled(JITDylib &JD,
- StringRef Name) {
+ Expected<ExecutorAddr> lookupLinkerMangled(JITDylib &JD,
+ StringRef Name) {
return lookupLinkerMangled(JD, ES->intern(Name));
}
/// Look up a symbol in the main JITDylib by the symbol's linker-mangled name
/// (to look up symbols based on their IR name use the lookup function
/// instead).
- Expected<JITEvaluatedSymbol> lookupLinkerMangled(StringRef Name) {
+ Expected<ExecutorAddr> lookupLinkerMangled(StringRef Name) {
return lookupLinkerMangled(*Main, Name);
}
/// Look up a symbol in JITDylib JD based on its IR symbol name.
- Expected<JITEvaluatedSymbol> lookup(JITDylib &JD, StringRef UnmangledName) {
+ Expected<ExecutorAddr> lookup(JITDylib &JD, StringRef UnmangledName) {
return lookupLinkerMangled(JD, mangle(UnmangledName));
}
/// Look up a symbol in the main JITDylib based on its IR symbol name.
- Expected<JITEvaluatedSymbol> lookup(StringRef UnmangledName) {
+ Expected<ExecutorAddr> lookup(StringRef UnmangledName) {
return lookup(*Main, UnmangledName);
}
@@ -401,7 +401,7 @@ public:
std::function<std::unique_ptr<IndirectStubsManager>()>;
Triple TT;
- JITTargetAddress LazyCompileFailureAddr = 0;
+ ExecutorAddr LazyCompileFailureAddr;
std::unique_ptr<LazyCallThroughManager> LCTMgr;
IndirectStubsManagerBuilderFunction ISMBuilder;
@@ -415,7 +415,7 @@ public:
/// Set the address in the target address to call if a lazy compile fails.
///
/// If this method is not called then the value will default to 0.
- SetterImpl &setLazyCompileFailureAddr(JITTargetAddress Addr) {
+ SetterImpl &setLazyCompileFailureAddr(ExecutorAddr Addr) {
this->impl().LazyCompileFailureAddr = Addr;
return this->impl();
}
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h b/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h
index 01f3f1b2ab63..141dd73548c8 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h
@@ -26,30 +26,19 @@
namespace llvm {
namespace orc {
-struct MachOJITDylibInitializers {
- using SectionList = std::vector<ExecutorAddrRange>;
-
- MachOJITDylibInitializers(std::string Name, ExecutorAddr MachOHeaderAddress)
- : Name(std::move(Name)),
- MachOHeaderAddress(std::move(MachOHeaderAddress)) {}
-
- std::string Name;
- ExecutorAddr MachOHeaderAddress;
- ExecutorAddr ObjCImageInfoAddress;
-
- StringMap<SectionList> InitSections;
-};
-
-class MachOJITDylibDeinitializers {};
-
-using MachOJITDylibInitializerSequence = std::vector<MachOJITDylibInitializers>;
-
-using MachOJITDylibDeinitializerSequence =
- std::vector<MachOJITDylibDeinitializers>;
-
/// Mediates between MachO initialization and ExecutionSession state.
class MachOPlatform : public Platform {
public:
+ // Used internally by MachOPlatform, but made public to enable serialization.
+ struct MachOJITDylibDepInfo {
+ bool Sealed = false;
+ std::vector<ExecutorAddr> DepHeaders;
+ };
+
+ // Used internally by MachOPlatform, but made public to enable serialization.
+ using MachOJITDylibDepInfoMap =
+ std::vector<std::pair<ExecutorAddr, MachOJITDylibDepInfo>>;
+
/// Try to create a MachOPlatform instance, adding the ORC runtime to the
/// given JITDylib.
///
@@ -161,26 +150,28 @@ private:
Error processObjCImageInfo(jitlink::LinkGraph &G,
MaterializationResponsibility &MR);
- Error registerInitSections(jitlink::LinkGraph &G, JITDylib &JD);
-
Error fixTLVSectionsAndEdges(jitlink::LinkGraph &G, JITDylib &JD);
- Error registerEHAndTLVSections(jitlink::LinkGraph &G);
+ Error registerObjectPlatformSections(jitlink::LinkGraph &G, JITDylib &JD);
Error registerEHSectionsPhase1(jitlink::LinkGraph &G);
std::mutex PluginMutex;
MachOPlatform &MP;
+
+ // FIXME: ObjCImageInfos and HeaderAddrs need to be cleared when
+ // JITDylibs are removed.
DenseMap<JITDylib *, std::pair<uint32_t, uint32_t>> ObjCImageInfos;
+ DenseMap<JITDylib *, ExecutorAddr> HeaderAddrs;
InitSymbolDepMap InitSymbolDeps;
};
- using SendInitializerSequenceFn =
- unique_function<void(Expected<MachOJITDylibInitializerSequence>)>;
-
- using SendDeinitializerSequenceFn =
- unique_function<void(Expected<MachOJITDylibDeinitializerSequence>)>;
-
+ using GetJITDylibHeaderSendResultFn =
+ unique_function<void(Expected<ExecutorAddr>)>;
+ using GetJITDylibNameSendResultFn =
+ unique_function<void(Expected<StringRef>)>;
+ using PushInitializersSendResultFn =
+ unique_function<void(Expected<MachOJITDylibDepInfoMap>)>;
using SendSymbolAddressFn = unique_function<void(Expected<ExecutorAddr>)>;
static bool supportedTarget(const Triple &TT);
@@ -193,28 +184,24 @@ private:
// Associate MachOPlatform JIT-side runtime support functions with handlers.
Error associateRuntimeSupportFunctions(JITDylib &PlatformJD);
- void getInitializersBuildSequencePhase(SendInitializerSequenceFn SendResult,
- JITDylib &JD,
- std::vector<JITDylibSP> DFSLinkOrder);
+ // Implements rt_pushInitializers by making repeat async lookups for
+ // initializer symbols (each lookup may spawn more initializer symbols if
+ // it pulls in new materializers, e.g. from objects in a static library).
+ void pushInitializersLoop(PushInitializersSendResultFn SendResult,
+ JITDylibSP JD);
- void getInitializersLookupPhase(SendInitializerSequenceFn SendResult,
- JITDylib &JD);
-
- void rt_getInitializers(SendInitializerSequenceFn SendResult,
- StringRef JDName);
-
- void rt_getDeinitializers(SendDeinitializerSequenceFn SendResult,
- ExecutorAddr Handle);
+ // Handle requests from the ORC runtime to push MachO initializer info.
+ void rt_pushInitializers(PushInitializersSendResultFn SendResult,
+ ExecutorAddr JDHeaderAddr);
+ // Handle requests for symbol addresses from the ORC runtime.
void rt_lookupSymbol(SendSymbolAddressFn SendResult, ExecutorAddr Handle,
StringRef SymbolName);
// Records the addresses of runtime symbols used by the platform.
Error bootstrapMachORuntime(JITDylib &PlatformJD);
- Error registerInitInfo(JITDylib &JD, ExecutorAddr ObjCImageInfoAddr,
- ArrayRef<jitlink::Section *> InitSections);
-
+ // Call the ORC runtime to create a pthread key.
Expected<uint64_t> createPThreadKey();
enum PlatformState { BootstrapPhase1, BootstrapPhase2, Initialized };
@@ -229,81 +216,24 @@ private:
ExecutorAddr orc_rt_macho_platform_shutdown;
ExecutorAddr orc_rt_macho_register_ehframe_section;
ExecutorAddr orc_rt_macho_deregister_ehframe_section;
- ExecutorAddr orc_rt_macho_register_thread_data_section;
- ExecutorAddr orc_rt_macho_deregister_thread_data_section;
+ ExecutorAddr orc_rt_macho_register_jitdylib;
+ ExecutorAddr orc_rt_macho_deregister_jitdylib;
+ ExecutorAddr orc_rt_macho_register_object_platform_sections;
+ ExecutorAddr orc_rt_macho_deregister_object_platform_sections;
ExecutorAddr orc_rt_macho_create_pthread_key;
DenseMap<JITDylib *, SymbolLookupSet> RegisteredInitSymbols;
- // InitSeqs gets its own mutex to avoid locking the whole session when
- // aggregating data from the jitlink.
std::mutex PlatformMutex;
- DenseMap<JITDylib *, MachOJITDylibInitializers> InitSeqs;
-
+ DenseMap<JITDylib *, ExecutorAddr> JITDylibToHeaderAddr;
DenseMap<ExecutorAddr, JITDylib *> HeaderAddrToJITDylib;
DenseMap<JITDylib *, uint64_t> JITDylibToPThreadKey;
};
namespace shared {
-using SPSNamedExecutorAddrRangeSequenceMap =
- SPSSequence<SPSTuple<SPSString, SPSExecutorAddrRangeSequence>>;
-
-using SPSMachOJITDylibInitializers =
- SPSTuple<SPSString, SPSExecutorAddr, SPSExecutorAddr,
- SPSNamedExecutorAddrRangeSequenceMap>;
-
-using SPSMachOJITDylibInitializerSequence =
- SPSSequence<SPSMachOJITDylibInitializers>;
-
-/// Serialization traits for MachOJITDylibInitializers.
-template <>
-class SPSSerializationTraits<SPSMachOJITDylibInitializers,
- MachOJITDylibInitializers> {
-public:
- static size_t size(const MachOJITDylibInitializers &MOJDIs) {
- return SPSMachOJITDylibInitializers::AsArgList::size(
- MOJDIs.Name, MOJDIs.MachOHeaderAddress, MOJDIs.ObjCImageInfoAddress,
- MOJDIs.InitSections);
- }
-
- static bool serialize(SPSOutputBuffer &OB,
- const MachOJITDylibInitializers &MOJDIs) {
- return SPSMachOJITDylibInitializers::AsArgList::serialize(
- OB, MOJDIs.Name, MOJDIs.MachOHeaderAddress, MOJDIs.ObjCImageInfoAddress,
- MOJDIs.InitSections);
- }
-
- static bool deserialize(SPSInputBuffer &IB,
- MachOJITDylibInitializers &MOJDIs) {
- return SPSMachOJITDylibInitializers::AsArgList::deserialize(
- IB, MOJDIs.Name, MOJDIs.MachOHeaderAddress, MOJDIs.ObjCImageInfoAddress,
- MOJDIs.InitSections);
- }
-};
-
-using SPSMachOJITDylibDeinitializers = SPSEmpty;
-
-using SPSMachOJITDylibDeinitializerSequence =
- SPSSequence<SPSMachOJITDylibDeinitializers>;
-
-template <>
-class SPSSerializationTraits<SPSMachOJITDylibDeinitializers,
- MachOJITDylibDeinitializers> {
-public:
- static size_t size(const MachOJITDylibDeinitializers &MOJDDs) { return 0; }
-
- static bool serialize(SPSOutputBuffer &OB,
- const MachOJITDylibDeinitializers &MOJDDs) {
- return true;
- }
-
- static bool deserialize(SPSInputBuffer &IB,
- MachOJITDylibDeinitializers &MOJDDs) {
- MOJDDs = MachOJITDylibDeinitializers();
- return true;
- }
-};
+using SPSNamedExecutorAddrRangeSequence =
+ SPSSequence<SPSTuple<SPSString, SPSExecutorAddrRange>>;
} // end namespace shared
} // end namespace orc
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/MemoryMapper.h b/llvm/include/llvm/ExecutionEngine/Orc/MemoryMapper.h
new file mode 100644
index 000000000000..d023bfbdb5b6
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/MemoryMapper.h
@@ -0,0 +1,115 @@
+//===- MemoryMapper.h - Cross-process memory mapper -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Cross-process (and in-process) memory mapping and transfer
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_MEMORYMAPPER_H
+#define LLVM_EXECUTIONENGINE_ORC_MEMORYMAPPER_H
+
+#include "llvm/ExecutionEngine/Orc/Core.h"
+
+#include <mutex>
+
+namespace llvm {
+namespace orc {
+
+/// Manages mapping, content transfer and protections for JIT memory
+class MemoryMapper {
+public:
+ /// Represents a single allocation containing multiple segments and
+ /// initialization and deinitialization actions
+ struct AllocInfo {
+ struct SegInfo {
+ ExecutorAddrDiff Offset;
+ const char *WorkingMem;
+ size_t ContentSize;
+ size_t ZeroFillSize;
+ unsigned Prot;
+ };
+
+ ExecutorAddr MappingBase;
+ std::vector<SegInfo> Segments;
+ shared::AllocActions Actions;
+ };
+
+ using OnReservedFunction = unique_function<void(Expected<ExecutorAddrRange>)>;
+
+ /// Reserves address space in executor process
+ virtual void reserve(size_t NumBytes, OnReservedFunction OnReserved) = 0;
+
+ /// Provides working memory
+ virtual char *prepare(ExecutorAddr Addr, size_t ContentSize) = 0;
+
+ using OnInitializedFunction = unique_function<void(Expected<ExecutorAddr>)>;
+
+ /// Ensures executor memory is synchronized with working copy memory, sends
+ /// functions to be called after initilization and before deinitialization and
+ /// applies memory protections
+ /// Returns a unique address identifying the allocation. This address should
+ /// be passed to deinitialize to run deallocation actions (and reset
+ /// permissions where possible).
+ virtual void initialize(AllocInfo &AI,
+ OnInitializedFunction OnInitialized) = 0;
+
+ using OnDeinitializedFunction = unique_function<void(Error)>;
+
+ /// Runs previously specified deinitialization actions
+ /// Executor addresses returned by initialize should be passed
+ virtual void deinitialize(ArrayRef<ExecutorAddr> Allocations,
+ OnDeinitializedFunction OnDeInitialized) = 0;
+
+ using OnReleasedFunction = unique_function<void(Error)>;
+
+ /// Release address space acquired through reserve()
+ virtual void release(ArrayRef<ExecutorAddr> Reservations,
+ OnReleasedFunction OnRelease) = 0;
+
+ virtual ~MemoryMapper();
+};
+
+class InProcessMemoryMapper final : public MemoryMapper {
+public:
+ InProcessMemoryMapper() {}
+
+ void reserve(size_t NumBytes, OnReservedFunction OnReserved) override;
+
+ void initialize(AllocInfo &AI, OnInitializedFunction OnInitialized) override;
+
+ char *prepare(ExecutorAddr Addr, size_t ContentSize) override;
+
+ void deinitialize(ArrayRef<ExecutorAddr> Allocations,
+ OnDeinitializedFunction OnDeInitialized) override;
+
+ void release(ArrayRef<ExecutorAddr> Reservations,
+ OnReleasedFunction OnRelease) override;
+
+ ~InProcessMemoryMapper() override;
+
+private:
+ struct Allocation {
+ std::vector<shared::WrapperFunctionCall> DeinitializationActions;
+ };
+ using AllocationMap = DenseMap<ExecutorAddr, Allocation>;
+
+ struct Reservation {
+ size_t Size;
+ std::vector<ExecutorAddr> Allocations;
+ };
+ using ReservationMap = DenseMap<void *, Reservation>;
+
+ std::mutex Mutex;
+ ReservationMap Reservations;
+ AllocationMap Allocations;
+};
+
+} // namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_MEMORYMAPPER_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
index 82dfdc270128..c5c2780bc9ee 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
@@ -330,6 +330,45 @@ public:
JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs);
};
+// @brief riscv64 support.
+//
+// RISC-V 64 supports lazy JITing.
+class OrcRiscv64 {
+public:
+ static constexpr unsigned PointerSize = 8;
+ static constexpr unsigned TrampolineSize = 16;
+ static constexpr unsigned StubSize = 16;
+ static constexpr unsigned StubToPointerMaxDisplacement = 1 << 31;
+ static constexpr unsigned ResolverCodeSize = 0x148;
+
+ /// Write the resolver code into the given memory. The user is
+ /// responsible for allocating the memory and setting permissions.
+ ///
+ /// ReentryFnAddr should be the address of a function whose signature matches
+ /// void* (*)(void *TrampolineAddr, void *ReentryCtxAddr). The ReentryCtxAddr
+ /// argument of writeResolverCode will be passed as the second argument to
+ /// the function at ReentryFnAddr.
+ static void writeResolverCode(char *ResolverWorkingMem,
+ JITTargetAddress ResolverTargetAddress,
+ JITTargetAddress ReentryFnAddr,
+ JITTargetAddress ReentryCtxAddr);
+
+ /// Write the requested number of trampolines into the given memory,
+ /// which must be big enough to hold 1 pointer, plus NumTrampolines
+ /// trampolines.
+ static void writeTrampolines(char *TrampolineBlockWorkingMem,
+ JITTargetAddress TrampolineBlockTargetAddress,
+ JITTargetAddress ResolverFnAddr,
+ unsigned NumTrampolines);
+ /// Write NumStubs indirect stubs to working memory at StubsBlockWorkingMem.
+ /// Stubs will be written as if linked at StubsBlockTargetAddress, with the
+ /// Nth stub using the Nth pointer in memory starting at
+ /// PointersBlockTargetAddress.
+ static void writeIndirectStubsBlock(
+ char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress,
+ JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs);
+};
+
} // end namespace orc
} // end namespace llvm
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h
index dc080cfc79d1..5d545f8abdb9 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h
@@ -43,13 +43,22 @@ public:
/// Cast this ExecutorAddr to a pointer of the given type.
/// Warning: This should only be used when JITing in-process.
- template <typename T> T toPtr() const {
- static_assert(std::is_pointer<T>::value, "T must be a pointer type");
+ template <typename T>
+ std::enable_if_t<std::is_pointer<T>::value, T> toPtr() const {
uintptr_t IntPtr = static_cast<uintptr_t>(Addr);
assert(IntPtr == Addr && "ExecutorAddr value out of range for uintptr_t");
return reinterpret_cast<T>(IntPtr);
}
+ /// Cast this ExecutorAddr to a pointer of the given function type.
+ /// Warning: This should only be used when JITing in-process.
+ template <typename T>
+ std::enable_if_t<std::is_function<T>::value, T *> toPtr() const {
+ uintptr_t IntPtr = static_cast<uintptr_t>(Addr);
+ assert(IntPtr == Addr && "ExecutorAddr value out of range for uintptr_t");
+ return reinterpret_cast<T *>(IntPtr);
+ }
+
uint64_t getValue() const { return Addr; }
void setValue(uint64_t Addr) { this->Addr = Addr; }
bool isNull() const { return Addr == 0; }
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h
index 302b60b80fd0..9be58e9f0fa9 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h
@@ -586,7 +586,7 @@ SPSSerializableExpected<T> toSPSSerializable(Expected<T> E) {
if (E)
return {true, std::move(*E), {}};
else
- return {false, {}, toString(E.takeError())};
+ return {false, T(), toString(E.takeError())};
}
template <typename T>
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h b/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h
index a138f60a7756..b7bba7a48786 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h
@@ -88,7 +88,7 @@ private:
for (auto &Callee : CandidateSet) {
auto ImplSymbol = AliaseeImplTable.getImplFor(Callee);
// try to distinguish already compiled & library symbols
- if (!ImplSymbol.hasValue())
+ if (!ImplSymbol)
continue;
const auto &ImplSymbolName = ImplSymbol.getPointer()->first;
JITDylib *ImplJD = ImplSymbol.getPointer()->second;
@@ -175,9 +175,8 @@ public:
using ResultEval = std::function<IRlikiesStrRef(Function &)>;
using TargetAndLikelies = DenseMap<SymbolStringPtr, SymbolNameSet>;
- IRSpeculationLayer(ExecutionSession &ES, IRCompileLayer &BaseLayer,
- Speculator &Spec, MangleAndInterner &Mangle,
- ResultEval Interpreter)
+ IRSpeculationLayer(ExecutionSession &ES, IRLayer &BaseLayer, Speculator &Spec,
+ MangleAndInterner &Mangle, ResultEval Interpreter)
: IRLayer(ES, BaseLayer.getManglingOptions()), NextLayer(BaseLayer),
S(Spec), Mangle(Mangle), QueryAnalysis(Interpreter) {}
@@ -198,7 +197,7 @@ private:
return InternedNames;
}
- IRCompileLayer &NextLayer;
+ IRLayer &NextLayer;
Speculator &S;
MangleAndInterner &Mangle;
ResultEval QueryAnalysis;
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h b/llvm/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h
index 63abb196ba49..7e433965c922 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h
@@ -19,6 +19,9 @@
#include <mutex>
namespace llvm {
+
+class raw_ostream;
+
namespace orc {
class SymbolStringPtr;
@@ -26,6 +29,10 @@ class SymbolStringPtr;
/// String pool for symbol names used by the JIT.
class SymbolStringPool {
friend class SymbolStringPtr;
+
+ // Implemented in DebugUtils.h.
+ friend raw_ostream &operator<<(raw_ostream &OS, const SymbolStringPool &SSP);
+
public:
/// Destroy a SymbolStringPool.
~SymbolStringPool();
diff --git a/llvm/include/llvm/FileCheck/FileCheck.h b/llvm/include/llvm/FileCheck/FileCheck.h
index 7a6c98db3029..d6d8dc531e10 100644
--- a/llvm/include/llvm/FileCheck/FileCheck.h
+++ b/llvm/include/llvm/FileCheck/FileCheck.h
@@ -14,14 +14,17 @@
#define LLVM_FILECHECK_FILECHECK_H
#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Regex.h"
-#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/SMLoc.h"
#include <bitset>
+#include <memory>
#include <string>
#include <vector>
namespace llvm {
+class MemoryBuffer;
+class SourceMgr;
+template <typename T> class SmallVectorImpl;
/// Contains info about various FileCheck options.
struct FileCheckRequest {
@@ -45,6 +48,7 @@ namespace Check {
enum FileCheckKind {
CheckNone = 0,
+ CheckMisspelled,
CheckPlain,
CheckNext,
CheckSame,
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td
index c5abb16dd9e5..5f1d335ef04f 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.td
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td
@@ -122,13 +122,12 @@ def OMPC_ProcBind : Clause<"proc_bind"> {
];
}
-// static and auto are C++ keywords so need a capital to disambiguate.
-def OMP_SCHEDULE_Static : ClauseVal<"Static", 2, 1> {}
-def OMP_SCHEDULE_Dynamic : ClauseVal<"Dynamic", 3, 1> {}
-def OMP_SCHEDULE_Guided : ClauseVal<"Guided", 4, 1> {}
-def OMP_SCHEDULE_Auto : ClauseVal<"Auto", 5, 1> {}
-def OMP_SCHEDULE_Runtime : ClauseVal<"Runtime", 6, 1> {}
-def OMP_SCHEDULE_Default : ClauseVal<"Default", 7, 0> { let isDefault = 1; }
+def OMP_SCHEDULE_Static : ClauseVal<"static", 2, 1> {}
+def OMP_SCHEDULE_Dynamic : ClauseVal<"dynamic", 3, 1> {}
+def OMP_SCHEDULE_Guided : ClauseVal<"guided", 4, 1> {}
+def OMP_SCHEDULE_Auto : ClauseVal<"auto", 5, 1> {}
+def OMP_SCHEDULE_Runtime : ClauseVal<"runtime", 6, 1> {}
+def OMP_SCHEDULE_Default : ClauseVal<"default", 7, 0> { let isDefault = 1; }
def OMPC_Schedule : Clause<"schedule"> {
let clangClass = "OMPScheduleClause";
@@ -164,6 +163,25 @@ def OMPC_MemoryOrder : Clause<"memory_order"> {
];
}
+def OMP_CANCELLATION_CONSTRUCT_Parallel : ClauseVal<"parallel", 1, 1> {}
+def OMP_CANCELLATION_CONSTRUCT_Loop : ClauseVal<"loop", 2, 1> {}
+def OMP_CANCELLATION_CONSTRUCT_Sections : ClauseVal<"sections", 3, 1> {}
+def OMP_CANCELLATION_CONSTRUCT_Taskgroup : ClauseVal<"taskgroup", 4, 1> {}
+def OMP_CANCELLATION_CONSTRUCT_None : ClauseVal<"none", 5, 0> {
+ let isDefault = 1;
+}
+
+def OMPC_CancellationConstructType : Clause<"cancellation_construct_type"> {
+ let enumClauseValue = "CancellationConstructType";
+ let allowedClauseValues = [
+ OMP_CANCELLATION_CONSTRUCT_Parallel,
+ OMP_CANCELLATION_CONSTRUCT_Loop,
+ OMP_CANCELLATION_CONSTRUCT_Sections,
+ OMP_CANCELLATION_CONSTRUCT_Taskgroup,
+ OMP_CANCELLATION_CONSTRUCT_None
+ ];
+}
+
def OMPC_Ordered : Clause<"ordered"> {
let clangClass = "OMPOrderedClause";
let flangClass = "ScalarIntConstantExpr";
@@ -254,12 +272,18 @@ def OMPC_IsDevicePtr : Clause<"is_device_ptr"> {
let flangClass = "Name";
let isValueList = true;
}
+def OMPC_HasDeviceAddr : Clause<"has_device_addr"> {
+ let clangClass = "OMPHasDeviceAddrClause";
+ let flangClass = "Name";
+ let isValueList = true;
+}
def OMPC_TaskReduction : Clause<"task_reduction"> {
let clangClass = "OMPTaskReductionClause";
let flangClass = "OmpReductionClause";
}
def OMPC_InReduction : Clause<"in_reduction"> {
let clangClass = "OMPInReductionClause";
+ let flangClass = "OmpInReductionClause";
}
def OMPC_UnifiedAddress : Clause<"unified_address"> {
let clangClass = "OMPUnifiedAddressClause";
@@ -557,7 +581,9 @@ def OMP_Target : Directive<"target"> {
VersionedClause<OMPC_Depend>,
VersionedClause<OMPC_FirstPrivate>,
VersionedClause<OMPC_IsDevicePtr>,
+ VersionedClause<OMPC_HasDeviceAddr, 51>,
VersionedClause<OMPC_Reduction>,
+ VersionedClause<OMPC_InReduction, 50>,
VersionedClause<OMPC_Allocate>,
VersionedClause<OMPC_UsesAllocators, 50>
];
@@ -590,11 +616,20 @@ def OMP_Requires : Directive<"requires"> {
let allowedClauses = [
VersionedClause<OMPC_UnifiedAddress>,
VersionedClause<OMPC_UnifiedSharedMemory>,
- VersionedClause<OMPC_ReverseOffload>,
+ // OpenMP 5.2 Spec: If an implementation is not supporting a requirement
+ // (reverse offload in this case) then it should give compile-time error
+ // termination.
+ // Seeting supported version for reverse_offload to a distant future version
+ // 9.9 so that its partial support can be tested in the meantime.
+ //
+ // TODO: Correct this supprted version number whenever complete
+ // implementation of reverse_offload is available.
+ VersionedClause<OMPC_ReverseOffload, 99>,
VersionedClause<OMPC_DynamicAllocators>,
VersionedClause<OMPC_AtomicDefaultMemOrder>
];
}
+def OMP_Nothing : Directive<"nothing"> {}
def OMP_TargetData : Directive<"target data"> {
let allowedClauses = [
VersionedClause<OMPC_UseDevicePtr>,
@@ -645,6 +680,7 @@ def OMP_TargetParallel : Directive<"target parallel"> {
VersionedClause<OMPC_Shared>,
VersionedClause<OMPC_Reduction>,
VersionedClause<OMPC_IsDevicePtr>,
+ VersionedClause<OMPC_HasDeviceAddr, 51>,
VersionedClause<OMPC_Allocate>,
VersionedClause<OMPC_UsesAllocators, 50>
];
@@ -677,6 +713,7 @@ def OMP_TargetParallelFor : Directive<"target parallel for"> {
VersionedClause<OMPC_Ordered>,
VersionedClause<OMPC_Linear>,
VersionedClause<OMPC_IsDevicePtr>,
+ VersionedClause<OMPC_HasDeviceAddr, 51>,
VersionedClause<OMPC_Allocate>,
VersionedClause<OMPC_Order, 50>,
VersionedClause<OMPC_UsesAllocators, 50>
@@ -693,6 +730,7 @@ def OMP_TargetParallelDo : Directive<"target parallel do"> {
VersionedClause<OMPC_Reduction>,
VersionedClause<OMPC_Linear>,
VersionedClause<OMPC_IsDevicePtr>,
+ VersionedClause<OMPC_HasDeviceAddr, 51>,
VersionedClause<OMPC_Allocator>,
VersionedClause<OMPC_Order>,
VersionedClause<OMPC_UsesAllocators>,
@@ -825,6 +863,21 @@ def OMP_ParallelMaster : Directive<"parallel master"> {
VersionedClause<OMPC_Allocate>
];
}
+def OMP_ParallelMasked : Directive<"parallel masked"> {
+ let allowedClauses = [
+ VersionedClause<OMPC_If>,
+ VersionedClause<OMPC_NumThreads>,
+ VersionedClause<OMPC_Default>,
+ VersionedClause<OMPC_Private>,
+ VersionedClause<OMPC_FirstPrivate>,
+ VersionedClause<OMPC_Shared>,
+ VersionedClause<OMPC_Copyin>,
+ VersionedClause<OMPC_Reduction>,
+ VersionedClause<OMPC_ProcBind>,
+ VersionedClause<OMPC_Allocate>,
+ VersionedClause<OMPC_Filter>
+ ];
+}
def OMP_ParallelSections : Directive<"parallel sections"> {
let allowedClauses = [
VersionedClause<OMPC_If>,
@@ -1126,6 +1179,7 @@ def OMP_TargetParallelForSimd : Directive<"target parallel for simd"> {
VersionedClause<OMPC_SimdLen>,
VersionedClause<OMPC_Aligned>,
VersionedClause<OMPC_IsDevicePtr>,
+ VersionedClause<OMPC_HasDeviceAddr, 51>,
VersionedClause<OMPC_Allocate>,
VersionedClause<OMPC_NonTemporal, 50>,
VersionedClause<OMPC_Order, 50>,
@@ -1156,6 +1210,7 @@ def OMP_TargetParallelDoSimd : Directive<"target parallel do simd"> {
VersionedClause<OMPC_SimdLen>,
VersionedClause<OMPC_Aligned>,
VersionedClause<OMPC_IsDevicePtr>,
+ VersionedClause<OMPC_HasDeviceAddr, 51>,
VersionedClause<OMPC_Allocate>,
VersionedClause<OMPC_NonTemporal>,
VersionedClause<OMPC_Order>,
@@ -1169,6 +1224,7 @@ def OMP_TargetSimd : Directive<"target simd"> {
VersionedClause<OMPC_Depend>,
VersionedClause<OMPC_FirstPrivate>,
VersionedClause<OMPC_IsDevicePtr>,
+ VersionedClause<OMPC_HasDeviceAddr, 51>,
VersionedClause<OMPC_LastPrivate>,
VersionedClause<OMPC_Linear>,
VersionedClause<OMPC_Map>,
@@ -1342,6 +1398,7 @@ def OMP_TargetTeams : Directive<"target teams"> {
VersionedClause<OMPC_Depend>,
VersionedClause<OMPC_FirstPrivate>,
VersionedClause<OMPC_IsDevicePtr>,
+ VersionedClause<OMPC_HasDeviceAddr, 51>,
VersionedClause<OMPC_Reduction>,
VersionedClause<OMPC_Allocate>,
VersionedClause<OMPC_UsesAllocators, 50>,
@@ -1365,6 +1422,7 @@ def OMP_TargetTeamsDistribute : Directive<"target teams distribute"> {
VersionedClause<OMPC_Depend>,
VersionedClause<OMPC_FirstPrivate>,
VersionedClause<OMPC_IsDevicePtr>,
+ VersionedClause<OMPC_HasDeviceAddr, 51>,
VersionedClause<OMPC_Reduction>,
VersionedClause<OMPC_Allocate>,
VersionedClause<OMPC_UsesAllocators, 50>,
@@ -1395,6 +1453,7 @@ def OMP_TargetTeamsDistributeParallelFor :
VersionedClause<OMPC_DefaultMap>,
VersionedClause<OMPC_FirstPrivate>,
VersionedClause<OMPC_IsDevicePtr>,
+ VersionedClause<OMPC_HasDeviceAddr, 51>,
VersionedClause<OMPC_Default>,
VersionedClause<OMPC_Shared>,
VersionedClause<OMPC_Reduction>,
@@ -1420,6 +1479,7 @@ def OMP_TargetTeamsDistributeParallelDo :
VersionedClause<OMPC_Depend>,
VersionedClause<OMPC_FirstPrivate>,
VersionedClause<OMPC_IsDevicePtr>,
+ VersionedClause<OMPC_HasDeviceAddr, 51>,
VersionedClause<OMPC_Reduction>,
VersionedClause<OMPC_Allocate>,
VersionedClause<OMPC_UsesAllocators>,
@@ -1456,6 +1516,7 @@ def OMP_TargetTeamsDistributeParallelForSimd :
VersionedClause<OMPC_DefaultMap>,
VersionedClause<OMPC_FirstPrivate>,
VersionedClause<OMPC_IsDevicePtr>,
+ VersionedClause<OMPC_HasDeviceAddr, 51>,
VersionedClause<OMPC_Default>,
VersionedClause<OMPC_Shared>,
VersionedClause<OMPC_Reduction>,
@@ -1485,6 +1546,7 @@ def OMP_TargetTeamsDistributeParallelDoSimd :
VersionedClause<OMPC_Depend>,
VersionedClause<OMPC_FirstPrivate>,
VersionedClause<OMPC_IsDevicePtr>,
+ VersionedClause<OMPC_HasDeviceAddr, 51>,
VersionedClause<OMPC_Reduction>,
VersionedClause<OMPC_Allocate>,
VersionedClause<OMPC_UsesAllocators>,
@@ -1523,6 +1585,7 @@ def OMP_TargetTeamsDistributeSimd :
VersionedClause<OMPC_FirstPrivate>,
VersionedClause<OMPC_If>,
VersionedClause<OMPC_IsDevicePtr>,
+ VersionedClause<OMPC_HasDeviceAddr, 51>,
VersionedClause<OMPC_LastPrivate>,
VersionedClause<OMPC_Linear>,
VersionedClause<OMPC_Map>,
@@ -1581,6 +1644,28 @@ def OMP_MasterTaskloop : Directive<"master taskloop"> {
VersionedClause<OMPC_Allocate>
];
}
+def OMP_MaskedTaskloop : Directive<"masked taskloop"> {
+ let allowedClauses = [
+ VersionedClause<OMPC_If>,
+ VersionedClause<OMPC_Shared>,
+ VersionedClause<OMPC_Private>,
+ VersionedClause<OMPC_FirstPrivate>,
+ VersionedClause<OMPC_LastPrivate>,
+ VersionedClause<OMPC_Default>,
+ VersionedClause<OMPC_Collapse>,
+ VersionedClause<OMPC_Final>,
+ VersionedClause<OMPC_Untied>,
+ VersionedClause<OMPC_Mergeable>,
+ VersionedClause<OMPC_Priority>,
+ VersionedClause<OMPC_GrainSize>,
+ VersionedClause<OMPC_NoGroup>,
+ VersionedClause<OMPC_NumTasks>,
+ VersionedClause<OMPC_Reduction>,
+ VersionedClause<OMPC_InReduction>,
+ VersionedClause<OMPC_Allocate>,
+ VersionedClause<OMPC_Filter>
+ ];
+}
def OMP_ParallelMasterTaskloop :
Directive<"parallel master taskloop"> {
let allowedClauses = [
@@ -1605,6 +1690,31 @@ def OMP_ParallelMasterTaskloop :
VersionedClause<OMPC_Copyin>
];
}
+def OMP_ParallelMaskedTaskloop :
+ Directive<"parallel masked taskloop"> {
+ let allowedClauses = [
+ VersionedClause<OMPC_If>,
+ VersionedClause<OMPC_Shared>,
+ VersionedClause<OMPC_Private>,
+ VersionedClause<OMPC_FirstPrivate>,
+ VersionedClause<OMPC_LastPrivate>,
+ VersionedClause<OMPC_Default>,
+ VersionedClause<OMPC_Collapse>,
+ VersionedClause<OMPC_Final>,
+ VersionedClause<OMPC_Untied>,
+ VersionedClause<OMPC_Mergeable>,
+ VersionedClause<OMPC_Priority>,
+ VersionedClause<OMPC_GrainSize>,
+ VersionedClause<OMPC_NoGroup>,
+ VersionedClause<OMPC_NumTasks>,
+ VersionedClause<OMPC_Reduction>,
+ VersionedClause<OMPC_Allocate>,
+ VersionedClause<OMPC_NumThreads>,
+ VersionedClause<OMPC_ProcBind>,
+ VersionedClause<OMPC_Copyin>,
+ VersionedClause<OMPC_Filter>
+ ];
+}
def OMP_MasterTaskloopSimd : Directive<"master taskloop simd"> {
let allowedClauses = [
VersionedClause<OMPC_If>,
@@ -1632,6 +1742,34 @@ def OMP_MasterTaskloopSimd : Directive<"master taskloop simd"> {
VersionedClause<OMPC_Order, 50>
];
}
+def OMP_MaskedTaskloopSimd : Directive<"masked taskloop simd"> {
+ let allowedClauses = [
+ VersionedClause<OMPC_If>,
+ VersionedClause<OMPC_Shared>,
+ VersionedClause<OMPC_Private>,
+ VersionedClause<OMPC_FirstPrivate>,
+ VersionedClause<OMPC_LastPrivate>,
+ VersionedClause<OMPC_Default>,
+ VersionedClause<OMPC_Collapse>,
+ VersionedClause<OMPC_Final>,
+ VersionedClause<OMPC_Untied>,
+ VersionedClause<OMPC_Mergeable>,
+ VersionedClause<OMPC_Priority>,
+ VersionedClause<OMPC_Linear>,
+ VersionedClause<OMPC_Aligned>,
+ VersionedClause<OMPC_SafeLen>,
+ VersionedClause<OMPC_SimdLen>,
+ VersionedClause<OMPC_GrainSize>,
+ VersionedClause<OMPC_NoGroup>,
+ VersionedClause<OMPC_NumTasks>,
+ VersionedClause<OMPC_Reduction>,
+ VersionedClause<OMPC_InReduction>,
+ VersionedClause<OMPC_Allocate>,
+ VersionedClause<OMPC_NonTemporal, 50>,
+ VersionedClause<OMPC_Order, 50>,
+ VersionedClause<OMPC_Filter>
+ ];
+}
def OMP_ParallelMasterTaskloopSimd :
Directive<"parallel master taskloop simd"> {
let allowedClauses = [
@@ -1662,6 +1800,37 @@ def OMP_ParallelMasterTaskloopSimd :
VersionedClause<OMPC_Order, 50>
];
}
+def OMP_ParallelMaskedTaskloopSimd :
+ Directive<"parallel masked taskloop simd"> {
+ let allowedClauses = [
+ VersionedClause<OMPC_If>,
+ VersionedClause<OMPC_Shared>,
+ VersionedClause<OMPC_Private>,
+ VersionedClause<OMPC_FirstPrivate>,
+ VersionedClause<OMPC_LastPrivate>,
+ VersionedClause<OMPC_Default>,
+ VersionedClause<OMPC_Collapse>,
+ VersionedClause<OMPC_Final>,
+ VersionedClause<OMPC_Untied>,
+ VersionedClause<OMPC_Mergeable>,
+ VersionedClause<OMPC_Priority>,
+ VersionedClause<OMPC_GrainSize>,
+ VersionedClause<OMPC_NoGroup>,
+ VersionedClause<OMPC_NumTasks>,
+ VersionedClause<OMPC_Reduction>,
+ VersionedClause<OMPC_Allocate>,
+ VersionedClause<OMPC_NumThreads>,
+ VersionedClause<OMPC_ProcBind>,
+ VersionedClause<OMPC_Copyin>,
+ VersionedClause<OMPC_Linear>,
+ VersionedClause<OMPC_Aligned>,
+ VersionedClause<OMPC_SafeLen>,
+ VersionedClause<OMPC_SimdLen>,
+ VersionedClause<OMPC_NonTemporal, 50>,
+ VersionedClause<OMPC_Order, 50>,
+ VersionedClause<OMPC_Filter>
+ ];
+}
def OMP_Depobj : Directive<"depobj"> {
let allowedClauses = [
VersionedClause<OMPC_Depend, 50>,
@@ -1734,6 +1903,7 @@ def OMP_dispatch : Directive<"dispatch"> {
let allowedClauses = [
VersionedClause<OMPC_Device>,
VersionedClause<OMPC_IsDevicePtr>,
+ VersionedClause<OMPC_HasDeviceAddr, 51>,
VersionedClause<OMPC_NoWait>,
VersionedClause<OMPC_Depend>,
VersionedClause<OMPC_Novariants>,
@@ -1757,6 +1927,99 @@ def OMP_loop : Directive<"loop"> {
VersionedClause<OMPC_Order>,
];
}
+def OMP_teams_loop : Directive<"teams loop"> {
+ let allowedClauses = [
+ VersionedClause<OMPC_Allocate>,
+ VersionedClause<OMPC_FirstPrivate>,
+ VersionedClause<OMPC_LastPrivate>,
+ VersionedClause<OMPC_Private>,
+ VersionedClause<OMPC_Reduction>,
+ VersionedClause<OMPC_Shared>,
+ ];
+ let allowedOnceClauses = [
+ VersionedClause<OMPC_Bind, 50>,
+ VersionedClause<OMPC_Collapse>,
+ VersionedClause<OMPC_Default>,
+ VersionedClause<OMPC_NumTeams>,
+ VersionedClause<OMPC_Order>,
+ VersionedClause<OMPC_ThreadLimit>,
+ ];
+}
+def OMP_target_teams_loop : Directive<"target teams loop"> {
+ let allowedClauses = [
+ VersionedClause<OMPC_Allocate>,
+ VersionedClause<OMPC_Depend>,
+ VersionedClause<OMPC_DefaultMap>,
+ VersionedClause<OMPC_Device>,
+ VersionedClause<OMPC_FirstPrivate>,
+ VersionedClause<OMPC_IsDevicePtr>,
+ VersionedClause<OMPC_HasDeviceAddr, 51>,
+ VersionedClause<OMPC_LastPrivate>,
+ VersionedClause<OMPC_Map>,
+ VersionedClause<OMPC_Private>,
+ VersionedClause<OMPC_Reduction>,
+ VersionedClause<OMPC_Shared>,
+ VersionedClause<OMPC_UsesAllocators, 50>
+ ];
+ let allowedOnceClauses = [
+ VersionedClause<OMPC_Bind, 50>,
+ VersionedClause<OMPC_Collapse>,
+ VersionedClause<OMPC_Default>,
+ VersionedClause<OMPC_If>,
+ VersionedClause<OMPC_NoWait>,
+ VersionedClause<OMPC_NumTeams>,
+ VersionedClause<OMPC_Order>,
+ VersionedClause<OMPC_ThreadLimit>,
+ ];
+}
+def OMP_parallel_loop : Directive<"parallel loop"> {
+ let allowedClauses = [
+ VersionedClause<OMPC_Allocate>,
+ VersionedClause<OMPC_Copyin>,
+ VersionedClause<OMPC_FirstPrivate>,
+ VersionedClause<OMPC_LastPrivate>,
+ VersionedClause<OMPC_Private>,
+ VersionedClause<OMPC_Reduction>,
+ VersionedClause<OMPC_Shared>,
+ ];
+ let allowedOnceClauses = [
+ VersionedClause<OMPC_Bind, 50>,
+ VersionedClause<OMPC_Collapse>,
+ VersionedClause<OMPC_Default>,
+ VersionedClause<OMPC_If>,
+ VersionedClause<OMPC_NumThreads>,
+ VersionedClause<OMPC_Order>,
+ VersionedClause<OMPC_ProcBind>,
+ ];
+}
+def OMP_target_parallel_loop : Directive<"target parallel loop"> {
+ let allowedClauses = [
+ VersionedClause<OMPC_Allocate>,
+ VersionedClause<OMPC_Copyin>,
+ VersionedClause<OMPC_Depend>,
+ VersionedClause<OMPC_Device>,
+ VersionedClause<OMPC_FirstPrivate>,
+ VersionedClause<OMPC_IsDevicePtr>,
+ VersionedClause<OMPC_HasDeviceAddr, 51>,
+ VersionedClause<OMPC_LastPrivate>,
+ VersionedClause<OMPC_Map>,
+ VersionedClause<OMPC_Private>,
+ VersionedClause<OMPC_Reduction>,
+ VersionedClause<OMPC_Shared>,
+ VersionedClause<OMPC_UsesAllocators, 50>,
+ ];
+ let allowedOnceClauses = [
+ VersionedClause<OMPC_Bind, 50>,
+ VersionedClause<OMPC_Collapse>,
+ VersionedClause<OMPC_Default>,
+ VersionedClause<OMPC_DefaultMap>,
+ VersionedClause<OMPC_If>,
+ VersionedClause<OMPC_NoWait>,
+ VersionedClause<OMPC_NumThreads>,
+ VersionedClause<OMPC_Order>,
+ VersionedClause<OMPC_ProcBind>,
+ ];
+}
def OMP_Metadirective : Directive<"metadirective"> {
let allowedClauses = [VersionedClause<OMPC_When>];
let allowedOnceClauses = [VersionedClause<OMPC_Default>];
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
index bee90281e086..76104f6bc9cf 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
@@ -74,26 +74,114 @@ enum class IdentFlag {
/// \note This needs to be kept in sync with kmp.h enum sched_type.
/// Todo: Update kmp.h to include this file, and remove the enums in kmp.h
-/// To complete this, more enum values will need to be moved here.
enum class OMPScheduleType {
- StaticChunked = 33,
- Static = 34, // static unspecialized
- DistributeChunked = 91,
- Distribute = 92,
- DynamicChunked = 35,
- GuidedChunked = 36, // guided unspecialized
- Runtime = 37,
- Auto = 38, // auto
-
- StaticBalancedChunked = 45, // static with chunk adjustment (e.g., simd)
- GuidedSimd = 46, // guided with chunk adjustment
- RuntimeSimd = 47, // runtime with chunk adjustment
-
- ModifierMonotonic =
- (1 << 29), // Set if the monotonic schedule modifier was present
- ModifierNonmonotonic =
- (1 << 30), // Set if the nonmonotonic schedule modifier was present
- ModifierMask = ModifierMonotonic | ModifierNonmonotonic,
+ // For typed comparisons, not a valid schedule
+ None = 0,
+
+ // Schedule algorithms
+ BaseStaticChunked = 1,
+ BaseStatic = 2,
+ BaseDynamicChunked = 3,
+ BaseGuidedChunked = 4,
+ BaseRuntime = 5,
+ BaseAuto = 6,
+ BaseTrapezoidal = 7,
+ BaseGreedy = 8,
+ BaseBalanced = 9,
+ BaseGuidedIterativeChunked = 10,
+ BaseGuidedAnalyticalChunked = 11,
+ BaseSteal = 12,
+
+ // with chunk adjustment (e.g., simd)
+ BaseStaticBalancedChunked = 13,
+ BaseGuidedSimd = 14,
+ BaseRuntimeSimd = 15,
+
+ // static schedules algorithims for distribute
+ BaseDistributeChunked = 27,
+ BaseDistribute = 28,
+
+ // Modifier flags to be combined with schedule algorithms
+ ModifierUnordered = (1 << 5),
+ ModifierOrdered = (1 << 6),
+ ModifierNomerge = (1 << 7),
+ ModifierMonotonic = (1 << 29),
+ ModifierNonmonotonic = (1 << 30),
+
+ // Masks combining multiple flags
+ OrderingMask = ModifierUnordered | ModifierOrdered | ModifierNomerge,
+ MonotonicityMask = ModifierMonotonic | ModifierNonmonotonic,
+ ModifierMask = OrderingMask | MonotonicityMask,
+
+ // valid schedule type values, without monotonicity flags
+ UnorderedStaticChunked = BaseStaticChunked | ModifierUnordered, // 33
+ UnorderedStatic = BaseStatic | ModifierUnordered, // 34
+ UnorderedDynamicChunked = BaseDynamicChunked | ModifierUnordered, // 35
+ UnorderedGuidedChunked = BaseGuidedChunked | ModifierUnordered, // 36
+ UnorderedRuntime = BaseRuntime | ModifierUnordered, // 37
+ UnorderedAuto = BaseAuto | ModifierUnordered, // 38
+ UnorderedTrapezoidal = BaseTrapezoidal | ModifierUnordered, // 39
+ UnorderedGreedy = BaseGreedy | ModifierUnordered, // 40
+ UnorderedBalanced = BaseBalanced | ModifierUnordered, // 41
+ UnorderedGuidedIterativeChunked =
+ BaseGuidedIterativeChunked | ModifierUnordered, // 42
+ UnorderedGuidedAnalyticalChunked =
+ BaseGuidedAnalyticalChunked | ModifierUnordered, // 43
+ UnorderedSteal = BaseSteal | ModifierUnordered, // 44
+
+ UnorderedStaticBalancedChunked =
+ BaseStaticBalancedChunked | ModifierUnordered, // 45
+ UnorderedGuidedSimd = BaseGuidedSimd | ModifierUnordered, // 46
+ UnorderedRuntimeSimd = BaseRuntimeSimd | ModifierUnordered, // 47
+
+ OrderedStaticChunked = BaseStaticChunked | ModifierOrdered, // 65
+ OrderedStatic = BaseStatic | ModifierOrdered, // 66
+ OrderedDynamicChunked = BaseDynamicChunked | ModifierOrdered, // 67
+ OrderedGuidedChunked = BaseGuidedChunked | ModifierOrdered, // 68
+ OrderedRuntime = BaseRuntime | ModifierOrdered, // 69
+ OrderedAuto = BaseAuto | ModifierOrdered, // 70
+ OrderdTrapezoidal = BaseTrapezoidal | ModifierOrdered, // 71
+
+ OrderedDistributeChunked = BaseDistributeChunked | ModifierOrdered, // 91
+ OrderedDistribute = BaseDistribute | ModifierOrdered, // 92
+
+ NomergeUnorderedStaticChunked =
+ BaseStaticChunked | ModifierUnordered | ModifierNomerge, // 161
+ NomergeUnorderedStatic =
+ BaseStatic | ModifierUnordered | ModifierNomerge, // 162
+ NomergeUnorderedDynamicChunked =
+ BaseDynamicChunked | ModifierUnordered | ModifierNomerge, // 163
+ NomergeUnorderedGuidedChunked =
+ BaseGuidedChunked | ModifierUnordered | ModifierNomerge, // 164
+ NomergeUnorderedRuntime =
+ BaseRuntime | ModifierUnordered | ModifierNomerge, // 165
+ NomergeUnorderedAuto = BaseAuto | ModifierUnordered | ModifierNomerge, // 166
+ NomergeUnorderedTrapezoidal =
+ BaseTrapezoidal | ModifierUnordered | ModifierNomerge, // 167
+ NomergeUnorderedGreedy =
+ BaseGreedy | ModifierUnordered | ModifierNomerge, // 168
+ NomergeUnorderedBalanced =
+ BaseBalanced | ModifierUnordered | ModifierNomerge, // 169
+ NomergeUnorderedGuidedIterativeChunked =
+ BaseGuidedIterativeChunked | ModifierUnordered | ModifierNomerge, // 170
+ NomergeUnorderedGuidedAnalyticalChunked =
+ BaseGuidedAnalyticalChunked | ModifierUnordered | ModifierNomerge, // 171
+ NomergeUnorderedSteal =
+ BaseSteal | ModifierUnordered | ModifierNomerge, // 172
+
+ NomergeOrderedStaticChunked =
+ BaseStaticChunked | ModifierOrdered | ModifierNomerge, // 193
+ NomergeOrderedStatic = BaseStatic | ModifierOrdered | ModifierNomerge, // 194
+ NomergeOrderedDynamicChunked =
+ BaseDynamicChunked | ModifierOrdered | ModifierNomerge, // 195
+ NomergeOrderedGuidedChunked =
+ BaseGuidedChunked | ModifierOrdered | ModifierNomerge, // 196
+ NomergeOrderedRuntime =
+ BaseRuntime | ModifierOrdered | ModifierNomerge, // 197
+ NomergeOrderedAuto = BaseAuto | ModifierOrdered | ModifierNomerge, // 198
+ NomergeOrderedTrapezoidal =
+ BaseTrapezoidal | ModifierOrdered | ModifierNomerge, // 199
+
LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue */ ModifierMask)
};
@@ -116,6 +204,9 @@ enum class AddressSpace : unsigned {
/// \note This needs to be kept in sync with interop.h enum kmp_interop_type_t.:
enum class OMPInteropType { Unknown, Target, TargetSync };
+/// Atomic compare operations. Currently OpenMP only supports ==, >, and <.
+enum class OMPAtomicCompareOp : unsigned { EQ, MIN, MAX };
+
} // end namespace omp
} // end namespace llvm
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPContext.h b/llvm/include/llvm/Frontend/OpenMP/OMPContext.h
index 544f698655a4..b13b74ceab86 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPContext.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPContext.h
@@ -15,14 +15,14 @@
#ifndef LLVM_FRONTEND_OPENMP_OMPCONTEXT_H
#define LLVM_FRONTEND_OPENMP_OMPCONTEXT_H
-#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/Frontend/OpenMP/OMPConstants.h"
namespace llvm {
+class Triple;
namespace omp {
/// OpenMP Context related IDs and helpers
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index f60debe8411c..8a6b1c7d412d 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -23,6 +23,52 @@
namespace llvm {
class CanonicalLoopInfo;
+/// Move the instruction after an InsertPoint to the beginning of another
+/// BasicBlock.
+///
+/// The instructions after \p IP are moved to the beginning of \p New which must
+/// not have any PHINodes. If \p CreateBranch is true, a branch instruction to
+/// \p New will be added such that there is no semantic change. Otherwise, the
+/// \p IP insert block remains degenerate and it is up to the caller to insert a
+/// terminator.
+void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New,
+ bool CreateBranch);
+
+/// Splice a BasicBlock at an IRBuilder's current insertion point. Its new
+/// insert location will stick to after the instruction before the insertion
+/// point (instead of moving with the instruction the InsertPoint stores
+/// internally).
+void spliceBB(IRBuilder<> &Builder, BasicBlock *New, bool CreateBranch);
+
+/// Split a BasicBlock at an InsertPoint, even if the block is degenerate
+/// (missing the terminator).
+///
+/// llvm::SplitBasicBlock and BasicBlock::splitBasicBlock require a well-formed
+/// BasicBlock. \p Name is used for the new successor block. If \p CreateBranch
+/// is true, a branch to the new successor will new created such that
+/// semantically there is no change; otherwise the block of the insertion point
+/// remains degenerate and it is the caller's responsibility to insert a
+/// terminator. Returns the new successor block.
+BasicBlock *splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch,
+ llvm::Twine Name = {});
+
+/// Split a BasicBlock at \p Builder's insertion point, even if the block is
+/// degenerate (missing the terminator). Its new insert location will stick to
+/// after the instruction before the insertion point (instead of moving with the
+/// instruction the InsertPoint stores internally).
+BasicBlock *splitBB(IRBuilderBase &Builder, bool CreateBranch,
+ llvm::Twine Name = {});
+
+/// Split a BasicBlock at \p Builder's insertion point, even if the block is
+/// degenerate (missing the terminator). Its new insert location will stick to
+/// after the instruction before the insertion point (instead of moving with the
+/// instruction the InsertPoint stores internally).
+BasicBlock *splitBB(IRBuilder<> &Builder, bool CreateBranch, llvm::Twine Name);
+
+/// Like splitBB, but reuses the current block's name for the new name.
+BasicBlock *splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch,
+ llvm::Twine Suffix = ".split");
+
/// An interface to create LLVM-IR for OpenMP directives.
///
/// Each OpenMP directive has a corresponding public generator method.
@@ -87,27 +133,36 @@ public:
/// Callback type for body (=inner region) code generation
///
/// The callback takes code locations as arguments, each describing a
- /// location at which code might need to be generated or a location that is
- /// the target of control transfer.
+ /// location where additional instructions can be inserted.
+ ///
+ /// The CodeGenIP may be in the middle of a basic block or point to the end of
+ /// it. The basic block may have a terminator or be degenerate. The callback
+ /// function may just insert instructions at that position, but also split the
+ /// block (without the Before argument of BasicBlock::splitBasicBlock such
+ /// that the identify of the split predecessor block is preserved) and insert
+ /// additional control flow, including branches that do not lead back to what
+ /// follows the CodeGenIP. Note that since the callback is allowed to split
+ /// the block, callers must assume that InsertPoints to positions in the
+ /// BasicBlock after CodeGenIP including CodeGenIP itself are invalidated. If
+ /// such InsertPoints need to be preserved, it can split the block itself
+ /// before calling the callback.
+ ///
+ /// AllocaIP and CodeGenIP must not point to the same position.
///
/// \param AllocaIP is the insertion point at which new alloca instructions
- /// should be placed.
+ /// should be placed. The BasicBlock it is pointing to must
+ /// not be split.
/// \param CodeGenIP is the insertion point at which the body code should be
/// placed.
- /// \param ContinuationBB is the basic block target to leave the body.
- ///
- /// Note that all blocks pointed to by the arguments have terminators.
using BodyGenCallbackTy =
- function_ref<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
- BasicBlock &ContinuationBB)>;
+ function_ref<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
// This is created primarily for sections construct as llvm::function_ref
// (BodyGenCallbackTy) is not storable (as described in the comments of
// function_ref class - function_ref contains non-ownable reference
// to the callable.
using StorableBodyGenCallbackTy =
- std::function<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
- BasicBlock &ContinuationBB)>;
+ std::function<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
/// Callback type for loop body code generation.
///
@@ -145,8 +200,7 @@ public:
/// Description of a LLVM-IR insertion point (IP) and a debug/source location
/// (filename, line, column, ...).
struct LocationDescription {
- template <typename T, typename U>
- LocationDescription(const IRBuilder<T, U> &IRB)
+ LocationDescription(const IRBuilderBase &IRB)
: IP(IRB.saveIP()), DL(IRB.getCurrentDebugLocation()) {}
LocationDescription(const InsertPointTy &IP) : IP(IP) {}
LocationDescription(const InsertPointTy &IP, const DebugLoc &DL)
@@ -345,6 +399,7 @@ public:
ArrayRef<CanonicalLoopInfo *> Loops,
InsertPointTy ComputeIP);
+private:
/// Modifies the canonical loop to be a statically-scheduled workshare loop.
///
/// This takes a \p LoopInfo representing a canonical loop, such as the one
@@ -354,14 +409,6 @@ public:
/// the current thread, updates the relevant instructions in the canonical
/// loop and calls to an OpenMP runtime finalization function after the loop.
///
- /// TODO: Workshare loops with static scheduling may contain up to two loops
- /// that fulfill the requirements of an OpenMP canonical loop. One for
- /// iterating over all iterations of a chunk and another one for iterating
- /// over all chunks that are executed on the same thread. Returning
- /// CanonicalLoopInfo objects representing them may eventually be useful for
- /// the apply clause planned in OpenMP 6.0, but currently whether these are
- /// canonical loops is irrelevant.
- ///
/// \param DL Debug location for instructions added for the
/// workshare-loop construct itself.
/// \param CLI A descriptor of the canonical loop to workshare.
@@ -369,14 +416,30 @@ public:
/// preheader of the loop.
/// \param NeedsBarrier Indicates whether a barrier must be inserted after
/// the loop.
- /// \param Chunk The size of loop chunk considered as a unit when
- /// scheduling. If \p nullptr, defaults to 1.
///
/// \returns Point where to insert code after the workshare construct.
InsertPointTy applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
InsertPointTy AllocaIP,
- bool NeedsBarrier,
- Value *Chunk = nullptr);
+ bool NeedsBarrier);
+
+ /// Modifies the canonical loop a statically-scheduled workshare loop with a
+ /// user-specified chunk size.
+ ///
+ /// \param DL Debug location for instructions added for the
+ /// workshare-loop construct itself.
+ /// \param CLI A descriptor of the canonical loop to workshare.
+ /// \param AllocaIP An insertion point for Alloca instructions usable in
+ /// the preheader of the loop.
+ /// \param NeedsBarrier Indicates whether a barrier must be inserted after the
+ /// loop.
+ /// \param ChunkSize The user-specified chunk size.
+ ///
+ /// \returns Point where to insert code after the workshare construct.
+ InsertPointTy applyStaticChunkedWorkshareLoop(DebugLoc DL,
+ CanonicalLoopInfo *CLI,
+ InsertPointTy AllocaIP,
+ bool NeedsBarrier,
+ Value *ChunkSize);
/// Modifies the canonical loop to be a dynamically-scheduled workshare loop.
///
@@ -404,6 +467,7 @@ public:
bool NeedsBarrier,
Value *Chunk = nullptr);
+public:
/// Modifies the canonical loop to be a workshare loop.
///
/// This takes a \p LoopInfo representing a canonical loop, such as the one
@@ -413,6 +477,10 @@ public:
/// the current thread, updates the relevant instructions in the canonical
/// loop and calls to an OpenMP runtime finalization function after the loop.
///
+ /// The concrete transformation is done by applyStaticWorkshareLoop,
+ /// applyStaticChunkedWorkshareLoop, or applyDynamicWorkshareLoop, depending
+ /// on the value of \p SchedKind and \p ChunkSize.
+ ///
/// \param DL Debug location for instructions added for the
/// workshare-loop construct itself.
/// \param CLI A descriptor of the canonical loop to workshare.
@@ -420,10 +488,25 @@ public:
/// preheader of the loop.
/// \param NeedsBarrier Indicates whether a barrier must be insterted after
/// the loop.
+ /// \param SchedKind Scheduling algorithm to use.
+ /// \param ChunkSize The chunk size for the inner loop.
+ /// \param HasSimdModifier Whether the simd modifier is present in the
+ /// schedule clause.
+ /// \param HasMonotonicModifier Whether the monotonic modifier is present in
+ /// the schedule clause.
+ /// \param HasNonmonotonicModifier Whether the nonmonotonic modifier is
+ /// present in the schedule clause.
+ /// \param HasOrderedClause Whether the (parameterless) ordered clause is
+ /// present.
///
/// \returns Point where to insert code after the workshare construct.
- InsertPointTy applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
- InsertPointTy AllocaIP, bool NeedsBarrier);
+ InsertPointTy applyWorkshareLoop(
+ DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
+ bool NeedsBarrier,
+ llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default,
+ Value *ChunkSize = nullptr, bool HasSimdModifier = false,
+ bool HasMonotonicModifier = false, bool HasNonmonotonicModifier = false,
+ bool HasOrderedClause = false);
/// Tile a loop nest.
///
@@ -535,6 +618,18 @@ public:
/// \param Loc The location where the taskyield directive was encountered.
void createTaskyield(const LocationDescription &Loc);
+ /// Generator for `#omp task`
+ ///
+ /// \param Loc The location where the task construct was encountered.
+ /// \param AllocaIP The insertion point to be used for alloca instructions.
+ /// \param BodyGenCB Callback that will generate the region code.
+ /// \param Tied True if the task is tied, false if the task is untied.
+ /// \param Final i1 value which is `true` if the task is final, `false` if the
+ /// task is not final.
+ InsertPointTy createTask(const LocationDescription &Loc,
+ InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB,
+ bool Tied = true, Value *Final = nullptr);
+
/// Functions used to generate reductions. Such functions take two Values
/// representing LHS and RHS of the reduction, respectively, and a reference
/// to the value that is updated to refer to the reduction result.
@@ -696,6 +791,27 @@ public:
/// Value.
GlobalValue *createGlobalFlag(unsigned Value, StringRef Name);
+ /// Create an offloading section struct used to register this global at
+ /// runtime.
+ ///
+ /// Type struct __tgt_offload_entry{
+ /// void *addr; // Pointer to the offload entry info.
+ /// // (function or global)
+ /// char *name; // Name of the function or global.
+ /// size_t size; // Size of the entry info (0 if it a function).
+ /// int32_t flags;
+ /// int32_t reserved;
+ /// };
+ ///
+ /// \param Addr The pointer to the global being registered.
+ /// \param Name The symbol name associated with the global.
+ /// \param Size The size in bytes of the global (0 for functions).
+ /// \param Flags Flags associated with the entry.
+ /// \param SectionName The section this entry will be placed at.
+ void emitOffloadingEntry(Constant *Addr, StringRef Name, uint64_t Size,
+ int32_t Flags,
+ StringRef SectionName = "omp_offloading_entries");
+
/// Generate control flow and cleanup for cancellation.
///
/// \param CancelFlag Flag indicating if the cancellation is performed.
@@ -768,7 +884,7 @@ public:
struct OutlineInfo {
using PostOutlineCBTy = std::function<void(Function &)>;
PostOutlineCBTy PostOutlineCB;
- BasicBlock *EntryBB, *ExitBB;
+ BasicBlock *EntryBB, *ExitBB, *OuterAllocaBB;
SmallVector<Value *, 2> ExcludeArgsFromAggregate;
/// Collect all blocks in between EntryBB and ExitBB in both the given
@@ -851,12 +967,14 @@ public:
/// \param Loc The source location description.
/// \param BodyGenCB Callback that will generate the region code.
/// \param FiniCB Callback to finalize variable copies.
+ /// \param IsNowait If false, a barrier is emitted.
/// \param DidIt Local variable used as a flag to indicate 'single' thread
///
/// \returns The insertion position *after* the single call.
InsertPointTy createSingle(const LocationDescription &Loc,
BodyGenCallbackTy BodyGenCB,
- FinalizeCallbackTy FiniCB, llvm::Value *DidIt);
+ FinalizeCallbackTy FiniCB, bool IsNowait,
+ llvm::Value *DidIt);
/// Generator for '#omp master'
///
@@ -1198,7 +1316,7 @@ private:
const function_ref<Value *(Value *XOld, IRBuilder<> &IRB)>;
private:
- enum AtomicKind { Read, Write, Update, Capture };
+ enum AtomicKind { Read, Write, Update, Capture, Compare };
/// Determine whether to emit flush or not
///
@@ -1214,7 +1332,8 @@ private:
/// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
/// Only Scalar data types.
///
- /// \param AllocIP Instruction to create AllocaInst before.
+ /// \param AllocaIP The insertion point to be used for alloca
+ /// instructions.
/// \param X The target atomic pointer to be updated
/// \param XElemTy The element type of the atomic pointer.
/// \param Expr The value to update X with.
@@ -1234,7 +1353,7 @@ private:
/// \returns A pair of the old value of X before the update, and the value
/// used for the update.
std::pair<Value *, Value *>
- emitAtomicUpdate(Instruction *AllocIP, Value *X, Type *XElemTy, Value *Expr,
+ emitAtomicUpdate(InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr,
AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
AtomicUpdateCallbackTy &UpdateOp, bool VolatileX,
bool IsXBinopExpr);
@@ -1286,7 +1405,7 @@ public:
/// Only Scalar data types.
///
/// \param Loc The insert and source location description.
- /// \param AllocIP Instruction to create AllocaInst before.
+ /// \param AllocaIP The insertion point to be used for alloca instructions.
/// \param X The target atomic pointer to be updated
/// \param Expr The value to update X with.
/// \param AO Atomic ordering of the generated atomic instructions.
@@ -1302,7 +1421,7 @@ public:
///
/// \return Insertion point after generated atomic update IR.
InsertPointTy createAtomicUpdate(const LocationDescription &Loc,
- Instruction *AllocIP, AtomicOpValue &X,
+ InsertPointTy AllocaIP, AtomicOpValue &X,
Value *Expr, AtomicOrdering AO,
AtomicRMWInst::BinOp RMWOp,
AtomicUpdateCallbackTy &UpdateOp,
@@ -1317,7 +1436,7 @@ public:
/// X = UpdateOp(X); V = X,
///
/// \param Loc The insert and source location description.
- /// \param AllocIP Instruction to create AllocaInst before.
+ /// \param AllocaIP The insertion point to be used for alloca instructions.
/// \param X The target atomic pointer to be updated
/// \param V Memory address where to store captured value
/// \param Expr The value to update X with.
@@ -1338,12 +1457,63 @@ public:
///
/// \return Insertion point after generated atomic capture IR.
InsertPointTy
- createAtomicCapture(const LocationDescription &Loc, Instruction *AllocIP,
+ createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP,
AtomicOpValue &X, AtomicOpValue &V, Value *Expr,
AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr,
bool IsPostfixUpdate, bool IsXBinopExpr);
+ /// Emit atomic compare for constructs: --- Only scalar data types
+ /// cond-expr-stmt:
+ /// x = x ordop expr ? expr : x;
+ /// x = expr ordop x ? expr : x;
+ /// x = x == e ? d : x;
+ /// x = e == x ? d : x; (this one is not in the spec)
+ /// cond-update-stmt:
+ /// if (x ordop expr) { x = expr; }
+ /// if (expr ordop x) { x = expr; }
+ /// if (x == e) { x = d; }
+ /// if (e == x) { x = d; } (this one is not in the spec)
+ /// conditional-update-capture-atomic:
+ /// v = x; cond-update-stmt; (IsPostfixUpdate=true, IsFailOnly=false)
+ /// cond-update-stmt; v = x; (IsPostfixUpdate=false, IsFailOnly=false)
+ /// if (x == e) { x = d; } else { v = x; } (IsPostfixUpdate=false,
+ /// IsFailOnly=true)
+ /// r = x == e; if (r) { x = d; } (IsPostfixUpdate=false, IsFailOnly=false)
+ /// r = x == e; if (r) { x = d; } else { v = x; } (IsPostfixUpdate=false,
+ /// IsFailOnly=true)
+ ///
+ /// \param Loc The insert and source location description.
+ /// \param X The target atomic pointer to be updated.
+ /// \param V Memory address where to store captured value (for
+ /// compare capture only).
+ /// \param R Memory address where to store comparison result
+ /// (for compare capture with '==' only).
+ /// \param E The expected value ('e') for forms that use an
+ /// equality comparison or an expression ('expr') for
+ /// forms that use 'ordop' (logically an atomic maximum or
+ /// minimum).
+ /// \param D The desired value for forms that use an equality
+ /// comparison. If forms that use 'ordop', it should be
+ /// \p nullptr.
+ /// \param AO Atomic ordering of the generated atomic instructions.
+ /// \param Op Atomic compare operation. It can only be ==, <, or >.
+ /// \param IsXBinopExpr True if the conditional statement is in the form where
+ /// x is on LHS. It only matters for < or >.
+ /// \param IsPostfixUpdate True if original value of 'x' must be stored in
+ /// 'v', not an updated one (for compare capture
+ /// only).
+ /// \param IsFailOnly True if the original value of 'x' is stored to 'v'
+ /// only when the comparison fails. This is only valid for
+ /// the case the comparison is '=='.
+ ///
+ /// \return Insertion point after generated atomic capture IR.
+ InsertPointTy
+ createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X,
+ AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D,
+ AtomicOrdering AO, omp::OMPAtomicCompareOp Op,
+ bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly);
+
/// Create the control flow structure of a canonical OpenMP loop.
///
/// The emitted loop will be disconnected, i.e. no edge to the loop's
@@ -1484,6 +1654,27 @@ private:
/// Re-evaluated whether this makes sense.
void collectControlBlocks(SmallVectorImpl<BasicBlock *> &BBs);
+ /// Sets the number of loop iterations to the given value. This value must be
+ /// valid in the condition block (i.e., defined in the preheader) and is
+ /// interpreted as an unsigned integer.
+ void setTripCount(Value *TripCount);
+
+ /// Replace all uses of the canonical induction variable in the loop body with
+ /// a new one.
+ ///
+ /// The intended use case is to update the induction variable for an updated
+ /// iteration space such that it can stay normalized in the 0...tripcount-1
+ /// range.
+ ///
+ /// The \p Updater is called with the (presumable updated) current normalized
+ /// induction variable and is expected to return the value that uses of the
+ /// pre-updated induction values should use instead, typically dependent on
+ /// the new induction variable. This is a lambda (instead of e.g. just passing
+ /// the new value) to be able to distinguish the uses of the pre-updated
+ /// induction variable and uses of the induction varible to compute the
+ /// updated induction variable value.
+ void mapIndVar(llvm::function_ref<Value *(Instruction *)> Updater);
+
public:
/// Returns whether this object currently represents the IR of a loop. If
/// returning false, it may have been consumed by a loop transformation or not
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index 0c3cb3f43105..14aa53a6b08d 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -86,6 +86,8 @@ __OMP_ARRAY_TYPE(KmpCriticalName, Int32, 8)
OMP_STRUCT_TYPE(VarName, "struct." #Name, __VA_ARGS__)
__OMP_STRUCT_TYPE(Ident, ident_t, Int32, Int32, Int32, Int32, Int8Ptr)
+__OMP_STRUCT_TYPE(OffloadEntry, __tgt_offload_entry, Int8Ptr, Int8Ptr, SizeTy,
+ Int32, Int32)
__OMP_STRUCT_TYPE(AsyncInfo, __tgt_async_info, Int8Ptr)
#undef __OMP_STRUCT_TYPE
@@ -475,6 +477,7 @@ __OMP_RTL(__last, false, Void, )
#define ParamAttrs(...) ArrayRef<AttributeSet>({__VA_ARGS__})
#define EnumAttr(Kind) Attribute::get(Ctx, Attribute::AttrKind::Kind)
#define EnumAttrInt(Kind, N) Attribute::get(Ctx, Attribute::AttrKind::Kind, N)
+#define AllocSizeAttr(N, M) Attribute::getWithAllocSizeArgs(Ctx, N, M)
#define AttributeSet(...) \
AttributeSet::get(Ctx, ArrayRef<Attribute>({__VA_ARGS__}))
@@ -908,8 +911,10 @@ __OMP_RTL_ATTRS(__kmpc_doacross_wait, BarrierAttrs, AttributeSet(),
__OMP_RTL_ATTRS(__kmpc_doacross_fini, BarrierAttrs, AttributeSet(),
ParamAttrs(ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_alloc_shared, DeviceAllocAttrs, ReturnPtrAttrs,
- ParamAttrs())
+__OMP_RTL_ATTRS(__kmpc_alloc_shared, AttributeSet(
+ EnumAttr(NoUnwind),
+ EnumAttr(NoSync),
+ AllocSizeAttr(0, None)), ReturnPtrAttrs, ParamAttrs())
__OMP_RTL_ATTRS(__kmpc_free_shared, DeviceAllocAttrs, AttributeSet(),
ParamAttrs(NoCaptureAttrs))
@@ -962,6 +967,7 @@ __OMP_RTL_ATTRS(__kmpc_parallel_51, AlwaysInlineAttrs, AttributeSet(),
#undef EnumAttr
#undef EnumAttrInt
#undef ParamAttrs
+#undef AllocSizeAttr
///}
@@ -1026,6 +1032,7 @@ __OMP_CANCEL_KIND(taskgroup, 4)
__OMP_DEFAULT_KIND(none)
__OMP_DEFAULT_KIND(shared)
+__OMP_DEFAULT_KIND(private)
__OMP_DEFAULT_KIND(firstprivate)
__OMP_DEFAULT_KIND(unknown)
@@ -1153,6 +1160,7 @@ __OMP_TRAIT_PROPERTY(implementation, extension, match_any)
__OMP_TRAIT_PROPERTY(implementation, extension, match_none)
__OMP_TRAIT_PROPERTY(implementation, extension, disable_implicit_base)
__OMP_TRAIT_PROPERTY(implementation, extension, allow_templates)
+__OMP_TRAIT_PROPERTY(implementation, extension, bind_to_declaration)
__OMP_TRAIT_SET(user)
diff --git a/llvm/include/llvm/FuzzMutate/FuzzerCLI.h b/llvm/include/llvm/FuzzMutate/FuzzerCLI.h
index 473277396a90..db0168d3e675 100644
--- a/llvm/include/llvm/FuzzMutate/FuzzerCLI.h
+++ b/llvm/include/llvm/FuzzMutate/FuzzerCLI.h
@@ -14,8 +14,8 @@
#ifndef LLVM_FUZZMUTATE_FUZZERCLI_H
#define LLVM_FUZZMUTATE_FUZZERCLI_H
-#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/DataTypes.h"
+#include <stddef.h>
namespace llvm {
@@ -51,29 +51,6 @@ using FuzzerInitFun = int (*)(int *argc, char ***argv);
int runFuzzerOnInputs(int ArgC, char *ArgV[], FuzzerTestFun TestOne,
FuzzerInitFun Init = [](int *, char ***) { return 0; });
-/// Fuzzer friendly interface for the llvm bitcode parser.
-///
-/// \param Data Bitcode we are going to parse
-/// \param Size Size of the 'Data' in bytes
-/// \return New module or nullptr in case of error
-std::unique_ptr<Module> parseModule(const uint8_t *Data, size_t Size,
- LLVMContext &Context);
-
-/// Fuzzer friendly interface for the llvm bitcode printer.
-///
-/// \param M Module to print
-/// \param Dest Location to store serialized module
-/// \param MaxSize Size of the destination buffer
-/// \return Number of bytes that were written. When module size exceeds MaxSize
-/// returns 0 and leaves Dest unchanged.
-size_t writeModule(const Module &M, uint8_t *Dest, size_t MaxSize);
-
-/// Try to parse module and verify it. May output verification errors to the
-/// errs().
-/// \return New module or nullptr in case of error.
-std::unique_ptr<Module> parseAndVerify(const uint8_t *Data, size_t Size,
- LLVMContext &Context);
-
-} // end llvm namespace
+} // namespace llvm
#endif // LLVM_FUZZMUTATE_FUZZERCLI_H
diff --git a/llvm/include/llvm/FuzzMutate/IRMutator.h b/llvm/include/llvm/FuzzMutate/IRMutator.h
index 423582eace9b..ade76f1b5845 100644
--- a/llvm/include/llvm/FuzzMutate/IRMutator.h
+++ b/llvm/include/llvm/FuzzMutate/IRMutator.h
@@ -10,6 +10,9 @@
// configurable set of strategies. Some common strategies are also included
// here.
//
+// Fuzzer-friendly (de)serialization functions are also provided, as these
+// are usually needed when mutating IR.
+//
//===----------------------------------------------------------------------===//
#ifndef LLVM_FUZZMUTATE_IRMUTATOR_H
@@ -113,6 +116,29 @@ public:
void mutate(Instruction &Inst, RandomIRBuilder &IB) override;
};
+/// Fuzzer friendly interface for the llvm bitcode parser.
+///
+/// \param Data Bitcode we are going to parse
+/// \param Size Size of the 'Data' in bytes
+/// \return New module or nullptr in case of error
+std::unique_ptr<Module> parseModule(const uint8_t *Data, size_t Size,
+ LLVMContext &Context);
+
+/// Fuzzer friendly interface for the llvm bitcode printer.
+///
+/// \param M Module to print
+/// \param Dest Location to store serialized module
+/// \param MaxSize Size of the destination buffer
+/// \return Number of bytes that were written. When module size exceeds MaxSize
+/// returns 0 and leaves Dest unchanged.
+size_t writeModule(const Module &M, uint8_t *Dest, size_t MaxSize);
+
+/// Try to parse module and verify it. May output verification errors to the
+/// errs().
+/// \return New module or nullptr in case of error.
+std::unique_ptr<Module> parseAndVerify(const uint8_t *Data, size_t Size,
+ LLVMContext &Context);
+
} // end llvm namespace
#endif // LLVM_FUZZMUTATE_IRMUTATOR_H
diff --git a/llvm/include/llvm/FuzzMutate/OpDescriptor.h b/llvm/include/llvm/FuzzMutate/OpDescriptor.h
index 43c810920766..847f975571bc 100644
--- a/llvm/include/llvm/FuzzMutate/OpDescriptor.h
+++ b/llvm/include/llvm/FuzzMutate/OpDescriptor.h
@@ -15,16 +15,15 @@
#define LLVM_FUZZMUTATE_OPDESCRIPTOR_H
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include <functional>
namespace llvm {
+class Instruction;
namespace fuzzerop {
/// @{
@@ -146,7 +145,8 @@ static inline SourcePred sizedPtrType() {
return false;
if (const auto *PtrT = dyn_cast<PointerType>(V->getType()))
- return PtrT->getPointerElementType()->isSized();
+ return PtrT->isOpaque() ||
+ PtrT->getNonOpaquePointerElementType()->isSized();
return false;
};
auto Make = [](ArrayRef<Value *>, ArrayRef<Type *> Ts) {
diff --git a/llvm/include/llvm/FuzzMutate/RandomIRBuilder.h b/llvm/include/llvm/FuzzMutate/RandomIRBuilder.h
index f3b609702e9d..aeb41baa8e07 100644
--- a/llvm/include/llvm/FuzzMutate/RandomIRBuilder.h
+++ b/llvm/include/llvm/FuzzMutate/RandomIRBuilder.h
@@ -13,12 +13,19 @@
#ifndef LLVM_FUZZMUTATE_RANDOMIRBUILDER_H
#define LLVM_FUZZMUTATE_RANDOMIRBUILDER_H
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/FuzzMutate/IRMutator.h"
-#include "llvm/FuzzMutate/Random.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
#include <random>
namespace llvm {
+class BasicBlock;
+class Instruction;
+class LLVMContext;
+class Type;
+class Value;
+namespace fuzzerop {
+class SourcePred;
+}
using RandomEngine = std::mt19937;
diff --git a/llvm/include/llvm/IR/AbstractCallSite.h b/llvm/include/llvm/IR/AbstractCallSite.h
index 69048554a05c..50afe016f0d6 100644
--- a/llvm/include/llvm/IR/AbstractCallSite.h
+++ b/llvm/include/llvm/IR/AbstractCallSite.h
@@ -14,17 +14,17 @@
#ifndef LLVM_IR_ABSTRACTCALLSITE_H
#define LLVM_IR_ABSTRACTCALLSITE_H
-#include "llvm/IR/Argument.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
-#include "llvm/Support/Casting.h"
#include <cassert>
namespace llvm {
+class Argument;
+class Use;
+
/// AbstractCallSite
///
/// An abstract call site is a wrapper that allows to treat direct,
diff --git a/llvm/include/llvm/IR/Argument.h b/llvm/include/llvm/IR/Argument.h
index 7cbfa2a7b6ce..3b74853cdafa 100644
--- a/llvm/include/llvm/IR/Argument.h
+++ b/llvm/include/llvm/IR/Argument.h
@@ -14,7 +14,6 @@
#define LLVM_IR_ARGUMENT_H
#include "llvm/ADT/Twine.h"
-#include "llvm/ADT/ilist_node.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Value.h"
diff --git a/llvm/include/llvm/IR/Assumptions.h b/llvm/include/llvm/IR/Assumptions.h
index 08e6c8b6f1e0..2d2ecfbde6e6 100644
--- a/llvm/include/llvm/IR/Assumptions.h
+++ b/llvm/include/llvm/IR/Assumptions.h
@@ -34,6 +34,10 @@ extern StringSet<> KnownAssumptionStrings;
/// Helper that allows to insert a new assumption string in the known assumption
/// set by creating a (static) object.
struct KnownAssumptionString {
+ KnownAssumptionString(const char *AssumptionStr)
+ : AssumptionStr(AssumptionStr) {
+ KnownAssumptionStrings.insert(AssumptionStr);
+ }
KnownAssumptionString(StringRef AssumptionStr)
: AssumptionStr(AssumptionStr) {
KnownAssumptionStrings.insert(AssumptionStr);
diff --git a/llvm/include/llvm/IR/Attributes.h b/llvm/include/llvm/IR/Attributes.h
index 74b60f1e3d05..6a4e6d63a973 100644
--- a/llvm/include/llvm/IR/Attributes.h
+++ b/llvm/include/llvm/IR/Attributes.h
@@ -17,11 +17,13 @@
#include "llvm-c/Types.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/Alignment.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/PointerLikeTypeTraits.h"
#include <bitset>
#include <cassert>
@@ -42,6 +44,18 @@ class Function;
class LLVMContext;
class Type;
+enum class AllocFnKind : uint64_t {
+ Unknown = 0,
+ Alloc = 1 << 0, // Allocator function returns a new allocation
+ Realloc = 1 << 1, // Allocator function resizes the `allocptr` argument
+ Free = 1 << 2, // Allocator function frees the `allocptr` argument
+ Uninitialized = 1 << 3, // Allocator function returns uninitialized memory
+ Zeroed = 1 << 4, // Allocator function returns zeroed memory
+ Aligned = 1 << 5, // Allocator function aligns allocations per the
+ // `allocalign` argument
+ LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ Aligned)
+};
+
//===----------------------------------------------------------------------===//
/// \class
/// Functions, function parameters, and return types can have attributes
@@ -130,6 +144,7 @@ public:
static Attribute getWithByRefType(LLVMContext &Context, Type *Ty);
static Attribute getWithPreallocatedType(LLVMContext &Context, Type *Ty);
static Attribute getWithInAllocaType(LLVMContext &Context, Type *Ty);
+ static Attribute getWithUWTableKind(LLVMContext &Context, UWTableKind Kind);
/// For a typed attribute, return the equivalent attribute with the type
/// changed to \p ReplacementTy.
@@ -223,6 +238,12 @@ public:
/// unknown.
Optional<unsigned> getVScaleRangeMax() const;
+ // Returns the unwind table kind.
+ UWTableKind getUWTableKind() const;
+
+ // Returns the allocator function kind.
+ AllocFnKind getAllocKind() const;
+
/// The Attribute is converted to a string of equivalent mnemonic. This
/// is, presumably, for writing out the mnemonics for the assembly writer.
std::string getAsString(bool InAttrGrp = false) const;
@@ -353,6 +374,8 @@ public:
std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const;
unsigned getVScaleRangeMin() const;
Optional<unsigned> getVScaleRangeMax() const;
+ UWTableKind getUWTableKind() const;
+ AllocFnKind getAllocKind() const;
std::string getAsString(bool InAttrGrp = false) const;
/// Return true if this attribute set belongs to the LLVMContext.
@@ -841,6 +864,11 @@ public:
/// arg.
uint64_t getParamDereferenceableOrNullBytes(unsigned ArgNo) const;
+ /// Get the unwind table kind requested for the function.
+ UWTableKind getUWTableKind() const;
+
+ AllocFnKind getAllocKind() const;
+
/// Return the attributes at the index as a string.
std::string getAsString(unsigned Index, bool InAttrGrp = false) const;
@@ -1190,6 +1218,13 @@ public:
/// Attribute.getIntValue().
AttrBuilder &addVScaleRangeAttrFromRawRepr(uint64_t RawVScaleRangeRepr);
+ /// This turns the unwind table kind into the form used internally in
+ /// Attribute.
+ AttrBuilder &addUWTableAttr(UWTableKind Kind);
+
+ // This turns the allocator kind into the form used internally in Attribute.
+ AttrBuilder &addAllocKindAttr(AllocFnKind Kind);
+
ArrayRef<Attribute> attrs() const { return Attrs; }
bool operator==(const AttrBuilder &B) const;
@@ -1198,8 +1233,17 @@ public:
namespace AttributeFuncs {
-/// Which attributes cannot be applied to a type.
-AttributeMask typeIncompatible(Type *Ty);
+enum AttributeSafetyKind : uint8_t {
+ ASK_SAFE_TO_DROP = 1,
+ ASK_UNSAFE_TO_DROP = 2,
+ ASK_ALL = ASK_SAFE_TO_DROP | ASK_UNSAFE_TO_DROP,
+};
+
+/// Which attributes cannot be applied to a type. The argument \p ASK indicates,
+/// if only attributes that are known to be safely droppable are contained in
+/// the mask; only attributes that might be unsafe to drop (e.g., ABI-related
+/// attributes) are in the mask; or both.
+AttributeMask typeIncompatible(Type *Ty, AttributeSafetyKind ASK = ASK_ALL);
/// Get param/return attributes which imply immediate undefined behavior if an
/// invalid value is passed. For example, this includes noundef (where undef
@@ -1230,6 +1274,9 @@ void mergeAttributesForInlining(Function &Caller, const Function &Callee);
/// \param [in] ToMerge - The function to merge attributes from.
void mergeAttributesForOutlining(Function &Base, const Function &ToMerge);
+/// Update min-legal-vector-width if it is in Attribute and less than Width.
+void updateMinLegalVectorWidthAttr(Function &Fn, uint64_t Width);
+
} // end namespace AttributeFuncs
} // end namespace llvm
diff --git a/llvm/include/llvm/IR/Attributes.td b/llvm/include/llvm/IR/Attributes.td
index 40c554c269ca..7b955b40b0a8 100644
--- a/llvm/include/llvm/IR/Attributes.td
+++ b/llvm/include/llvm/IR/Attributes.td
@@ -47,6 +47,16 @@ class StrBoolAttr<string S> : Attr<S, []>;
/// 0 means unaligned (different from align(1)).
def Alignment : IntAttr<"align", [ParamAttr, RetAttr]>;
+/// Parameter of a function that tells us the alignment of an allocation, as in
+/// aligned_alloc and aligned ::operator::new.
+def AllocAlign: EnumAttr<"allocalign", [ParamAttr]>;
+
+/// Describes behavior of an allocator function in terms of known properties.
+def AllocKind: IntAttr<"allockind", [FnAttr]>;
+
+/// Parameter is the pointer to be manipulated by the allocator function.
+def AllocatedPointer : EnumAttr<"allocptr", [ParamAttr]>;
+
/// The result of the function is guaranteed to point to a number of bytes that
/// we can determine if we know the value of the function's arguments.
def AllocSize : IntAttr<"allocsize", [FnAttr]>;
@@ -175,6 +185,9 @@ def NoProfile : EnumAttr<"noprofile", [FnAttr]>;
/// Function doesn't unwind stack.
def NoUnwind : EnumAttr<"nounwind", [FnAttr]>;
+/// No SanitizeBounds instrumentation.
+def NoSanitizeBounds : EnumAttr<"nosanitize_bounds", [FnAttr]>;
+
/// No SanitizeCoverage instrumentation.
def NoSanitizeCoverage : EnumAttr<"nosanitize_coverage", [FnAttr]>;
@@ -273,7 +286,7 @@ def SwiftSelf : EnumAttr<"swiftself", [ParamAttr]>;
def SwiftAsync : EnumAttr<"swiftasync", [ParamAttr]>;
/// Function must be in a unwind table.
-def UWTable : EnumAttr<"uwtable", [FnAttr]>;
+def UWTable : IntAttr<"uwtable", [FnAttr]>;
/// Minimum/Maximum vscale value for function.
def VScaleRange : IntAttr<"vscale_range", [FnAttr]>;
@@ -290,10 +303,14 @@ def ZExt : EnumAttr<"zeroext", [ParamAttr, RetAttr]>;
/// Function is required to make Forward Progress.
def MustProgress : EnumAttr<"mustprogress", [FnAttr]>;
+/// Function is a presplit coroutine.
+def PresplitCoroutine : EnumAttr<"presplitcoroutine", [FnAttr]>;
+
/// Target-independent string attributes.
def LessPreciseFPMAD : StrBoolAttr<"less-precise-fpmad">;
def NoInfsFPMath : StrBoolAttr<"no-infs-fp-math">;
def NoNansFPMath : StrBoolAttr<"no-nans-fp-math">;
+def ApproxFuncFPMath : StrBoolAttr<"approx-func-fp-math">;
def NoSignedZerosFPMath : StrBoolAttr<"no-signed-zeros-fp-math">;
def UnsafeFPMath : StrBoolAttr<"unsafe-fp-math">;
def NoJumpTables : StrBoolAttr<"no-jump-tables">;
@@ -333,6 +350,7 @@ class MergeRule<string F> {
def : MergeRule<"setAND<LessPreciseFPMADAttr>">;
def : MergeRule<"setAND<NoInfsFPMathAttr>">;
def : MergeRule<"setAND<NoNansFPMathAttr>">;
+def : MergeRule<"setAND<ApproxFuncFPMathAttr>">;
def : MergeRule<"setAND<NoSignedZerosFPMathAttr>">;
def : MergeRule<"setAND<UnsafeFPMathAttr>">;
def : MergeRule<"setOR<NoImplicitFloatAttr>">;
@@ -345,6 +363,3 @@ def : MergeRule<"adjustCallerStackProbeSize">;
def : MergeRule<"adjustMinLegalVectorWidth">;
def : MergeRule<"adjustNullPointerValidAttr">;
def : MergeRule<"setAND<MustProgressAttr>">;
-
-// Target dependent attributes
-include "llvm/IR/AttributesAMDGPU.td"
diff --git a/llvm/include/llvm/IR/AutoUpgrade.h b/llvm/include/llvm/IR/AutoUpgrade.h
index f331fc3c413f..12952f25cbda 100644
--- a/llvm/include/llvm/IR/AutoUpgrade.h
+++ b/llvm/include/llvm/IR/AutoUpgrade.h
@@ -14,19 +14,24 @@
#define LLVM_IR_AUTOUPGRADE_H
#include "llvm/ADT/StringRef.h"
+#include <vector>
namespace llvm {
class AttrBuilder;
- class CallInst;
+ class CallBase;
class Constant;
class Function;
class Instruction;
+ class GlobalVariable;
class MDNode;
class Module;
- class GlobalVariable;
+ class StringRef;
class Type;
class Value;
+ template <typename T> class OperandBundleDefT;
+ using OperandBundleDef = OperandBundleDefT<Value *>;
+
/// This is a more granular function that simply checks an intrinsic function
/// for upgrading, and returns true if it requires upgrading. It may return
/// null in NewFn if the all calls to the original intrinsic function
@@ -35,7 +40,7 @@ namespace llvm {
/// This is the complement to the above, replacing a specific call to an
/// intrinsic function with a call to the specified new function.
- void UpgradeIntrinsicCall(CallInst *CI, Function *NewFn);
+ void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn);
// This upgrades the comment for objc retain release markers in inline asm
// calls
@@ -77,7 +82,7 @@ namespace llvm {
/// This is an auto-upgrade for bitcast constant expression between pointers
/// with different address spaces: the instruction is replaced by a pair
/// ptrtoint+inttoptr.
- Value *UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy);
+ Constant *UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy);
/// Check the debug info version number, if it is out-dated, drop the debug
/// info. Return true if module is modified.
@@ -98,6 +103,9 @@ namespace llvm {
/// Upgrade attributes that changed format or kind.
void UpgradeAttributes(AttrBuilder &B);
+ /// Upgrade operand bundles (without knowing about their user instruction).
+ void UpgradeOperandBundles(std::vector<OperandBundleDef> &OperandBundles);
+
} // End llvm namespace
#endif
diff --git a/llvm/include/llvm/IR/BasicBlock.h b/llvm/include/llvm/IR/BasicBlock.h
index 184ddfc01c29..d487223eca02 100644
--- a/llvm/include/llvm/IR/BasicBlock.h
+++ b/llvm/include/llvm/IR/BasicBlock.h
@@ -22,9 +22,6 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/SymbolTableListTraits.h"
#include "llvm/IR/Value.h"
-#include "llvm/Support/CBindingWrapping.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/Compiler.h"
#include <cassert>
#include <cstddef>
#include <iterator>
@@ -119,7 +116,11 @@ public:
/// Returns the terminator instruction if the block is well formed or null
/// if the block is not well formed.
- const Instruction *getTerminator() const LLVM_READONLY;
+ const Instruction *getTerminator() const LLVM_READONLY {
+ if (InstList.empty() || !InstList.back().isTerminator())
+ return nullptr;
+ return &InstList.back();
+ }
Instruction *getTerminator() {
return const_cast<Instruction *>(
static_cast<const BasicBlock *>(this)->getTerminator());
diff --git a/llvm/include/llvm/IR/CFG.h b/llvm/include/llvm/IR/CFG.h
index 0ee584f8af7e..28a8d31a4cc6 100644
--- a/llvm/include/llvm/IR/CFG.h
+++ b/llvm/include/llvm/IR/CFG.h
@@ -25,7 +25,6 @@
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Value.h"
-#include "llvm/Support/Casting.h"
#include <cassert>
#include <cstddef>
#include <iterator>
diff --git a/llvm/lib/IR/ConstantFold.h b/llvm/include/llvm/IR/ConstantFold.h
index 1aa44f4d21e5..d637a180b0ba 100644
--- a/llvm/lib/IR/ConstantFold.h
+++ b/llvm/include/llvm/IR/ConstantFold.h
@@ -1,4 +1,4 @@
-//===-- ConstantFolding.h - Internal Constant Folding Interface -*- C++ -*-===//
+//==-- ConstantFold.h - DL-independent Constant Folding Interface -*- C++ -*-=//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,23 +6,26 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines the (internal) constant folding interfaces for LLVM. These
-// interfaces are used by the ConstantExpr::get* methods to automatically fold
-// constants when possible.
+// This file defines the DataLayout-independent constant folding interface.
+// When possible, the DataLayout-aware constant folding interface in
+// Analysis/ConstantFolding.h should be preferred.
+//
+// These interfaces are used by the ConstantExpr::get* methods to automatically
+// fold constants when possible.
//
// These operators may return a null object if they don't know how to perform
// the specified operation on the specified constant types.
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_IR_CONSTANTFOLD_H
-#define LLVM_LIB_IR_CONSTANTFOLD_H
+#ifndef LLVM_IR_CONSTANTFOLD_H
+#define LLVM_IR_CONSTANTFOLD_H
#include "llvm/ADT/Optional.h"
#include "llvm/IR/InstrTypes.h"
namespace llvm {
-template <typename T> class ArrayRef;
+ template <typename T> class ArrayRef;
class Value;
class Constant;
class Type;
diff --git a/llvm/include/llvm/IR/ConstantFolder.h b/llvm/include/llvm/IR/ConstantFolder.h
index 28dc63a5886e..5e7ddb9aa673 100644
--- a/llvm/include/llvm/IR/ConstantFolder.h
+++ b/llvm/include/llvm/IR/ConstantFolder.h
@@ -19,9 +19,10 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/ConstantFold.h"
#include "llvm/IR/IRBuilderFolder.h"
-#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Operator.h"
namespace llvm {
@@ -38,31 +39,46 @@ public:
// Return an existing value or a constant if the operation can be simplified.
// Otherwise return nullptr.
//===--------------------------------------------------------------------===//
- Value *FoldAdd(Value *LHS, Value *RHS, bool HasNUW = false,
- bool HasNSW = false) const override {
+
+ Value *FoldBinOp(Instruction::BinaryOps Opc, Value *LHS,
+ Value *RHS) const override {
auto *LC = dyn_cast<Constant>(LHS);
auto *RC = dyn_cast<Constant>(RHS);
if (LC && RC)
- return ConstantExpr::getAdd(LC, RC, HasNUW, HasNSW);
+ return ConstantExpr::get(Opc, LC, RC);
return nullptr;
}
- Value *FoldAnd(Value *LHS, Value *RHS) const override {
+ Value *FoldExactBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS,
+ bool IsExact) const override {
auto *LC = dyn_cast<Constant>(LHS);
auto *RC = dyn_cast<Constant>(RHS);
if (LC && RC)
- return ConstantExpr::getAnd(LC, RC);
+ return ConstantExpr::get(Opc, LC, RC,
+ IsExact ? PossiblyExactOperator::IsExact : 0);
return nullptr;
}
- Value *FoldOr(Value *LHS, Value *RHS) const override {
+ Value *FoldNoWrapBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS,
+ bool HasNUW, bool HasNSW) const override {
auto *LC = dyn_cast<Constant>(LHS);
auto *RC = dyn_cast<Constant>(RHS);
- if (LC && RC)
- return ConstantExpr::getOr(LC, RC);
+ if (LC && RC) {
+ unsigned Flags = 0;
+ if (HasNUW)
+ Flags |= OverflowingBinaryOperator::NoUnsignedWrap;
+ if (HasNSW)
+ Flags |= OverflowingBinaryOperator::NoSignedWrap;
+ return ConstantExpr::get(Opc, LC, RC, Flags);
+ }
return nullptr;
}
+ Value *FoldBinOpFMF(Instruction::BinaryOps Opc, Value *LHS, Value *RHS,
+ FastMathFlags FMF) const override {
+ return FoldBinOp(Opc, LHS, RHS);
+ }
+
Value *FoldICmp(CmpInst::Predicate P, Value *LHS, Value *RHS) const override {
auto *LC = dyn_cast<Constant>(LHS);
auto *RC = dyn_cast<Constant>(RHS);
@@ -95,103 +111,57 @@ public:
return nullptr;
}
- //===--------------------------------------------------------------------===//
- // Binary Operators
- //===--------------------------------------------------------------------===//
-
- Constant *CreateFAdd(Constant *LHS, Constant *RHS) const override {
- return ConstantExpr::getFAdd(LHS, RHS);
- }
-
- Constant *CreateSub(Constant *LHS, Constant *RHS,
- bool HasNUW = false, bool HasNSW = false) const override {
- return ConstantExpr::getSub(LHS, RHS, HasNUW, HasNSW);
- }
-
- Constant *CreateFSub(Constant *LHS, Constant *RHS) const override {
- return ConstantExpr::getFSub(LHS, RHS);
- }
-
- Constant *CreateMul(Constant *LHS, Constant *RHS,
- bool HasNUW = false, bool HasNSW = false) const override {
- return ConstantExpr::getMul(LHS, RHS, HasNUW, HasNSW);
- }
-
- Constant *CreateFMul(Constant *LHS, Constant *RHS) const override {
- return ConstantExpr::getFMul(LHS, RHS);
- }
-
- Constant *CreateUDiv(Constant *LHS, Constant *RHS,
- bool isExact = false) const override {
- return ConstantExpr::getUDiv(LHS, RHS, isExact);
- }
-
- Constant *CreateSDiv(Constant *LHS, Constant *RHS,
- bool isExact = false) const override {
- return ConstantExpr::getSDiv(LHS, RHS, isExact);
- }
-
- Constant *CreateFDiv(Constant *LHS, Constant *RHS) const override {
- return ConstantExpr::getFDiv(LHS, RHS);
- }
-
- Constant *CreateURem(Constant *LHS, Constant *RHS) const override {
- return ConstantExpr::getURem(LHS, RHS);
- }
-
- Constant *CreateSRem(Constant *LHS, Constant *RHS) const override {
- return ConstantExpr::getSRem(LHS, RHS);
- }
-
- Constant *CreateFRem(Constant *LHS, Constant *RHS) const override {
- return ConstantExpr::getFRem(LHS, RHS);
- }
-
- Constant *CreateShl(Constant *LHS, Constant *RHS,
- bool HasNUW = false, bool HasNSW = false) const override {
- return ConstantExpr::getShl(LHS, RHS, HasNUW, HasNSW);
- }
-
- Constant *CreateLShr(Constant *LHS, Constant *RHS,
- bool isExact = false) const override {
- return ConstantExpr::getLShr(LHS, RHS, isExact);
- }
-
- Constant *CreateAShr(Constant *LHS, Constant *RHS,
- bool isExact = false) const override {
- return ConstantExpr::getAShr(LHS, RHS, isExact);
+ Value *FoldExtractValue(Value *Agg,
+ ArrayRef<unsigned> IdxList) const override {
+ if (auto *CAgg = dyn_cast<Constant>(Agg))
+ return ConstantFoldExtractValueInstruction(CAgg, IdxList);
+ return nullptr;
+ };
+
+ Value *FoldInsertValue(Value *Agg, Value *Val,
+ ArrayRef<unsigned> IdxList) const override {
+ auto *CAgg = dyn_cast<Constant>(Agg);
+ auto *CVal = dyn_cast<Constant>(Val);
+ if (CAgg && CVal)
+ return ConstantFoldInsertValueInstruction(CAgg, CVal, IdxList);
+ return nullptr;
}
- Constant *CreateOr(Constant *LHS, Constant *RHS) const {
- return ConstantExpr::getOr(LHS, RHS);
+ Value *FoldExtractElement(Value *Vec, Value *Idx) const override {
+ auto *CVec = dyn_cast<Constant>(Vec);
+ auto *CIdx = dyn_cast<Constant>(Idx);
+ if (CVec && CIdx)
+ return ConstantExpr::getExtractElement(CVec, CIdx);
+ return nullptr;
}
- Constant *CreateXor(Constant *LHS, Constant *RHS) const override {
- return ConstantExpr::getXor(LHS, RHS);
+ Value *FoldInsertElement(Value *Vec, Value *NewElt,
+ Value *Idx) const override {
+ auto *CVec = dyn_cast<Constant>(Vec);
+ auto *CNewElt = dyn_cast<Constant>(NewElt);
+ auto *CIdx = dyn_cast<Constant>(Idx);
+ if (CVec && CNewElt && CIdx)
+ return ConstantExpr::getInsertElement(CVec, CNewElt, CIdx);
+ return nullptr;
}
- Constant *CreateBinOp(Instruction::BinaryOps Opc,
- Constant *LHS, Constant *RHS) const override {
- return ConstantExpr::get(Opc, LHS, RHS);
+ Value *FoldShuffleVector(Value *V1, Value *V2,
+ ArrayRef<int> Mask) const override {
+ auto *C1 = dyn_cast<Constant>(V1);
+ auto *C2 = dyn_cast<Constant>(V2);
+ if (C1 && C2)
+ return ConstantExpr::getShuffleVector(C1, C2, Mask);
+ return nullptr;
}
//===--------------------------------------------------------------------===//
// Unary Operators
//===--------------------------------------------------------------------===//
- Constant *CreateNeg(Constant *C,
- bool HasNUW = false, bool HasNSW = false) const override {
- return ConstantExpr::getNeg(C, HasNUW, HasNSW);
- }
-
Constant *CreateFNeg(Constant *C) const override {
return ConstantExpr::getFNeg(C);
}
- Constant *CreateNot(Constant *C) const override {
- return ConstantExpr::getNot(C);
- }
-
Constant *CreateUnOp(Instruction::UnaryOps Opc, Constant *C) const override {
return ConstantExpr::get(Opc, C);
}
@@ -255,34 +225,6 @@ public:
Constant *RHS) const override {
return ConstantExpr::getCompare(P, LHS, RHS);
}
-
- //===--------------------------------------------------------------------===//
- // Other Instructions
- //===--------------------------------------------------------------------===//
-
- Constant *CreateExtractElement(Constant *Vec, Constant *Idx) const override {
- return ConstantExpr::getExtractElement(Vec, Idx);
- }
-
- Constant *CreateInsertElement(Constant *Vec, Constant *NewElt,
- Constant *Idx) const override {
- return ConstantExpr::getInsertElement(Vec, NewElt, Idx);
- }
-
- Constant *CreateShuffleVector(Constant *V1, Constant *V2,
- ArrayRef<int> Mask) const override {
- return ConstantExpr::getShuffleVector(V1, V2, Mask);
- }
-
- Constant *CreateExtractValue(Constant *Agg,
- ArrayRef<unsigned> IdxList) const override {
- return ConstantExpr::getExtractValue(Agg, IdxList);
- }
-
- Constant *CreateInsertValue(Constant *Agg, Constant *Val,
- ArrayRef<unsigned> IdxList) const override {
- return ConstantExpr::getInsertValue(Agg, Val, IdxList);
- }
};
} // end namespace llvm
diff --git a/llvm/include/llvm/IR/ConstantRange.h b/llvm/include/llvm/IR/ConstantRange.h
index fea4d0da1d0d..68abf4ef555d 100644
--- a/llvm/include/llvm/IR/ConstantRange.h
+++ b/llvm/include/llvm/IR/ConstantRange.h
@@ -553,6 +553,9 @@ public:
/// Return whether unsigned mul of the two ranges always/never overflows.
OverflowResult unsignedMulMayOverflow(const ConstantRange &Other) const;
+ /// Return known bits for values in this range.
+ KnownBits toKnownBits() const;
+
/// Print out the bounds to a stream.
void print(raw_ostream &OS) const;
diff --git a/llvm/include/llvm/IR/Constants.h b/llvm/include/llvm/IR/Constants.h
index fb884912b318..b5445ff71b74 100644
--- a/llvm/include/llvm/IR/Constants.h
+++ b/llvm/include/llvm/IR/Constants.h
@@ -289,7 +289,8 @@ public:
APInt *Payload = nullptr);
static Constant *getSNaN(Type *Ty, bool Negative = false,
APInt *Payload = nullptr);
- static Constant *getNegativeZero(Type *Ty);
+ static Constant *getZero(Type *Ty, bool Negative = false);
+ static Constant *getNegativeZero(Type *Ty) { return getZero(Ty, true); }
static Constant *getInfinity(Type *Ty, bool Negative = false);
/// Return true if Ty is big enough to represent V.
@@ -1120,9 +1121,12 @@ public:
/// commutative, callers can acquire the operand 1 identity constant by
/// setting AllowRHSConstant to true. For example, any shift has a zero
/// identity constant for operand 1: X shift 0 = X.
+ /// If this is a fadd/fsub operation and we don't care about signed zeros,
+ /// then setting NSZ to true returns the identity +0.0 instead of -0.0.
/// Return nullptr if the operator does not have an identity constant.
static Constant *getBinOpIdentity(unsigned Opcode, Type *Ty,
- bool AllowRHSConstant = false);
+ bool AllowRHSConstant = false,
+ bool NSZ = false);
/// Return the absorbing element for the given binary
/// operation, i.e. a constant C such that X op C = C and C op X = C for
@@ -1160,6 +1164,11 @@ public:
Type *Ty ///< The type to trunc or bitcast C to
);
+ /// Create either an sext, trunc or nothing, depending on whether Ty is
+ /// wider, narrower or the same as C->getType(). This only works with
+ /// integer or vector of integer types.
+ static Constant *getSExtOrTrunc(Constant *C, Type *Ty);
+
/// Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant
/// expression.
static Constant *
@@ -1285,8 +1294,6 @@ public:
static Constant *getShuffleVector(Constant *V1, Constant *V2,
ArrayRef<int> Mask,
Type *OnlyIfReducedTy = nullptr);
- static Constant *getExtractValue(Constant *Agg, ArrayRef<unsigned> Idxs,
- Type *OnlyIfReducedTy = nullptr);
static Constant *getInsertValue(Constant *Agg, Constant *Val,
ArrayRef<unsigned> Idxs,
Type *OnlyIfReducedTy = nullptr);
diff --git a/llvm/include/llvm/IR/DIBuilder.h b/llvm/include/llvm/IR/DIBuilder.h
index fc461fc3f49f..9afa715b650c 100644
--- a/llvm/include/llvm/IR/DIBuilder.h
+++ b/llvm/include/llvm/IR/DIBuilder.h
@@ -21,6 +21,7 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/TrackingMDRef.h"
#include "llvm/Support/Casting.h"
@@ -220,6 +221,23 @@ namespace llvm {
/// \param SizeInBits Size of the type.
DIStringType *createStringType(StringRef Name, uint64_t SizeInBits);
+ /// Create debugging information entry for Fortran
+ /// assumed length string type.
+ /// \param Name Type name.
+ /// \param StringLength String length expressed as DIVariable *.
+ /// \param StrLocationExp Optional memory location of the string.
+ DIStringType *createStringType(StringRef Name, DIVariable *StringLength,
+ DIExpression *StrLocationExp = nullptr);
+
+ /// Create debugging information entry for Fortran
+ /// assumed length string type.
+ /// \param Name Type name.
+ /// \param StringLengthExp String length expressed in DIExpression form.
+ /// \param StrLocationExp Optional memory location of the string.
+ DIStringType *createStringType(StringRef Name,
+ DIExpression *StringLengthExp,
+ DIExpression *StrLocationExp = nullptr);
+
/// Create debugging information entry for a qualified
/// type, e.g. 'const int'.
/// \param Tag Tag identifing type, e.g. dwarf::TAG_volatile_type
@@ -734,6 +752,8 @@ namespace llvm {
/// \param TParams Function template parameters.
/// \param ThrownTypes Exception types this function may throw.
/// \param Annotations Attribute Annotations.
+ /// \param TargetFuncName The name of the target function if this is
+ /// a trampoline.
DISubprogram *
createFunction(DIScope *Scope, StringRef Name, StringRef LinkageName,
DIFile *File, unsigned LineNo, DISubroutineType *Ty,
@@ -742,7 +762,8 @@ namespace llvm {
DITemplateParameterArray TParams = nullptr,
DISubprogram *Decl = nullptr,
DITypeArray ThrownTypes = nullptr,
- DINodeArray Annotations = nullptr);
+ DINodeArray Annotations = nullptr,
+ StringRef TargetFuncName = "");
/// Identical to createFunction,
/// except that the resulting DbgNode is meant to be RAUWed.
diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h
index 36438fc4f4e0..a6621c963d85 100644
--- a/llvm/include/llvm/IR/DataLayout.h
+++ b/llvm/include/llvm/IR/DataLayout.h
@@ -26,10 +26,10 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Type.h"
+#include "llvm/Support/Alignment.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/Alignment.h"
#include "llvm/Support/TrailingObjects.h"
#include "llvm/Support/TypeSize.h"
#include <cassert>
diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h
index 96569179060f..db1d031a062d 100644
--- a/llvm/include/llvm/IR/DebugInfoMetadata.h
+++ b/llvm/include/llvm/IR/DebugInfoMetadata.h
@@ -22,7 +22,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator_range.h"
-#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Support/Casting.h"
@@ -61,6 +60,10 @@
namespace llvm {
+namespace dwarf {
+enum Tag : uint16_t;
+}
+
extern cl::opt<bool> EnableFSDiscriminator;
class DITypeRefArray {
@@ -156,7 +159,7 @@ protected:
void setTag(unsigned Tag) { SubclassData16 = Tag; }
public:
- dwarf::Tag getTag() const { return (dwarf::Tag)SubclassData16; }
+ dwarf::Tag getTag() const;
/// Debug info flags.
///
@@ -267,7 +270,7 @@ public:
/// Return a (temporary) clone of this.
TempGenericDINode clone() const { return cloneImpl(); }
- dwarf::Tag getTag() const { return (dwarf::Tag)SubclassData16; }
+ dwarf::Tag getTag() const;
StringRef getHeader() const { return getStringOperand(0); }
MDString *getRawHeader() const { return getOperandAs<MDString>(0); }
@@ -298,8 +301,7 @@ class DISubrange : public DINode {
friend class LLVMContextImpl;
friend class MDNode;
- DISubrange(LLVMContext &C, StorageType Storage, ArrayRef<Metadata *> Ops)
- : DINode(C, DISubrangeKind, Storage, dwarf::DW_TAG_subrange_type, Ops) {}
+ DISubrange(LLVMContext &C, StorageType Storage, ArrayRef<Metadata *> Ops);
~DISubrange() = default;
@@ -363,9 +365,7 @@ class DIGenericSubrange : public DINode {
friend class MDNode;
DIGenericSubrange(LLVMContext &C, StorageType Storage,
- ArrayRef<Metadata *> Ops)
- : DINode(C, DIGenericSubrangeKind, Storage,
- dwarf::DW_TAG_generic_subrange, Ops) {}
+ ArrayRef<Metadata *> Ops);
~DIGenericSubrange() = default;
@@ -414,11 +414,7 @@ class DIEnumerator : public DINode {
APInt Value;
DIEnumerator(LLVMContext &C, StorageType Storage, const APInt &Value,
- bool IsUnsigned, ArrayRef<Metadata *> Ops)
- : DINode(C, DIEnumeratorKind, Storage, dwarf::DW_TAG_enumerator, Ops),
- Value(Value) {
- SubclassData32 = IsUnsigned;
- }
+ bool IsUnsigned, ArrayRef<Metadata *> Ops);
DIEnumerator(LLVMContext &C, StorageType Storage, int64_t Value,
bool IsUnsigned, ArrayRef<Metadata *> Ops)
: DIEnumerator(C, Storage, APInt(64, Value, !IsUnsigned), IsUnsigned,
@@ -568,9 +564,7 @@ private:
DIFile(LLVMContext &C, StorageType Storage,
Optional<ChecksumInfo<MDString *>> CS, Optional<MDString *> Src,
- ArrayRef<Metadata *> Ops)
- : DIScope(C, DIFileKind, Storage, dwarf::DW_TAG_file_type, Ops),
- Checksum(CS), Source(Src) {}
+ ArrayRef<Metadata *> Ops);
~DIFile() = default;
static DIFile *getImpl(LLVMContext &Context, StringRef Filename,
@@ -1021,42 +1015,19 @@ public:
/// Get casted version of extra data.
/// @{
- DIType *getClassType() const {
- assert(getTag() == dwarf::DW_TAG_ptr_to_member_type);
- return cast_or_null<DIType>(getExtraData());
- }
+ DIType *getClassType() const;
DIObjCProperty *getObjCProperty() const {
return dyn_cast_or_null<DIObjCProperty>(getExtraData());
}
- uint32_t getVBPtrOffset() const {
- assert(getTag() == dwarf::DW_TAG_inheritance);
- if (auto *CM = cast_or_null<ConstantAsMetadata>(getExtraData()))
- if (auto *CI = dyn_cast_or_null<ConstantInt>(CM->getValue()))
- return static_cast<uint32_t>(CI->getZExtValue());
- return 0;
- }
+ uint32_t getVBPtrOffset() const;
- Constant *getStorageOffsetInBits() const {
- assert(getTag() == dwarf::DW_TAG_member && isBitField());
- if (auto *C = cast_or_null<ConstantAsMetadata>(getExtraData()))
- return C->getValue();
- return nullptr;
- }
+ Constant *getStorageOffsetInBits() const;
- Constant *getConstant() const {
- assert(getTag() == dwarf::DW_TAG_member && isStaticMember());
- if (auto *C = cast_or_null<ConstantAsMetadata>(getExtraData()))
- return C->getValue();
- return nullptr;
- }
- Constant *getDiscriminantValue() const {
- assert(getTag() == dwarf::DW_TAG_member && !isStaticMember());
- if (auto *C = cast_or_null<ConstantAsMetadata>(getExtraData()))
- return C->getValue();
- return nullptr;
- }
+ Constant *getConstant() const;
+
+ Constant *getDiscriminantValue() const;
/// @}
static bool classof(const Metadata *MD) {
@@ -1300,10 +1271,7 @@ class DISubroutineType : public DIType {
uint8_t CC;
DISubroutineType(LLVMContext &C, StorageType Storage, DIFlags Flags,
- uint8_t CC, ArrayRef<Metadata *> Ops)
- : DIType(C, DISubroutineTypeKind, Storage, dwarf::DW_TAG_subroutine_type,
- 0, 0, 0, 0, Flags, Ops),
- CC(CC) {}
+ uint8_t CC, ArrayRef<Metadata *> Ops);
~DISubroutineType() = default;
static DISubroutineType *getImpl(LLVMContext &Context, DIFlags Flags,
@@ -1330,6 +1298,12 @@ public:
(Flags, CC, TypeArray))
TempDISubroutineType clone() const { return cloneImpl(); }
+ // Returns a new temporary DISubroutineType with updated CC
+ TempDISubroutineType cloneWithCC(uint8_t CC) const {
+ auto NewTy = clone();
+ NewTy->CC = CC;
+ return NewTy;
+ }
uint8_t getCC() const { return CC; }
@@ -1385,15 +1359,7 @@ private:
bool IsOptimized, unsigned RuntimeVersion,
unsigned EmissionKind, uint64_t DWOId, bool SplitDebugInlining,
bool DebugInfoForProfiling, unsigned NameTableKind,
- bool RangesBaseAddress, ArrayRef<Metadata *> Ops)
- : DIScope(C, DICompileUnitKind, Storage, dwarf::DW_TAG_compile_unit, Ops),
- SourceLanguage(SourceLanguage), IsOptimized(IsOptimized),
- RuntimeVersion(RuntimeVersion), EmissionKind(EmissionKind),
- DWOId(DWOId), SplitDebugInlining(SplitDebugInlining),
- DebugInfoForProfiling(DebugInfoForProfiling),
- NameTableKind(NameTableKind), RangesBaseAddress(RangesBaseAddress) {
- assert(Storage != Uniqued);
- }
+ bool RangesBaseAddress, ArrayRef<Metadata *> Ops);
~DICompileUnit() = default;
static DICompileUnit *
@@ -1872,19 +1838,7 @@ public:
static DISPFlags toSPFlags(bool IsLocalToUnit, bool IsDefinition,
bool IsOptimized,
unsigned Virtuality = SPFlagNonvirtual,
- bool IsMainSubprogram = false) {
- // We're assuming virtuality is the low-order field.
- static_assert(int(SPFlagVirtual) == int(dwarf::DW_VIRTUALITY_virtual) &&
- int(SPFlagPureVirtual) ==
- int(dwarf::DW_VIRTUALITY_pure_virtual),
- "Virtuality constant mismatch");
- return static_cast<DISPFlags>(
- (Virtuality & SPFlagVirtuality) |
- (IsLocalToUnit ? SPFlagLocalToUnit : SPFlagZero) |
- (IsDefinition ? SPFlagDefinition : SPFlagZero) |
- (IsOptimized ? SPFlagOptimized : SPFlagZero) |
- (IsMainSubprogram ? SPFlagMainSubprogram : SPFlagZero));
- }
+ bool IsMainSubprogram = false);
private:
DIFlags Flags;
@@ -1892,13 +1846,7 @@ private:
DISubprogram(LLVMContext &C, StorageType Storage, unsigned Line,
unsigned ScopeLine, unsigned VirtualIndex, int ThisAdjustment,
- DIFlags Flags, DISPFlags SPFlags, ArrayRef<Metadata *> Ops)
- : DILocalScope(C, DISubprogramKind, Storage, dwarf::DW_TAG_subprogram,
- Ops),
- Line(Line), ScopeLine(ScopeLine), VirtualIndex(VirtualIndex),
- ThisAdjustment(ThisAdjustment), Flags(Flags), SPFlags(SPFlags) {
- static_assert(dwarf::DW_VIRTUALITY_max < 4, "Virtuality out of range");
- }
+ DIFlags Flags, DISPFlags SPFlags, ArrayRef<Metadata *> Ops);
~DISubprogram() = default;
static DISubprogram *
@@ -1909,13 +1857,14 @@ private:
DISPFlags SPFlags, DICompileUnit *Unit,
DITemplateParameterArray TemplateParams, DISubprogram *Declaration,
DINodeArray RetainedNodes, DITypeArray ThrownTypes,
- DINodeArray Annotations, StorageType Storage,
- bool ShouldCreate = true) {
+ DINodeArray Annotations, StringRef TargetFuncName,
+ StorageType Storage, bool ShouldCreate = true) {
return getImpl(Context, Scope, getCanonicalMDString(Context, Name),
getCanonicalMDString(Context, LinkageName), File, Line, Type,
ScopeLine, ContainingType, VirtualIndex, ThisAdjustment,
Flags, SPFlags, Unit, TemplateParams.get(), Declaration,
RetainedNodes.get(), ThrownTypes.get(), Annotations.get(),
+ getCanonicalMDString(Context, TargetFuncName),
Storage, ShouldCreate);
}
static DISubprogram *
@@ -1925,7 +1874,8 @@ private:
int ThisAdjustment, DIFlags Flags, DISPFlags SPFlags, Metadata *Unit,
Metadata *TemplateParams, Metadata *Declaration,
Metadata *RetainedNodes, Metadata *ThrownTypes, Metadata *Annotations,
- StorageType Storage, bool ShouldCreate = true);
+ MDString *TargetFuncName, StorageType Storage,
+ bool ShouldCreate = true);
TempDISubprogram cloneImpl() const {
return getTemporary(getContext(), getScope(), getName(), getLinkageName(),
@@ -1933,7 +1883,8 @@ private:
getContainingType(), getVirtualIndex(),
getThisAdjustment(), getFlags(), getSPFlags(),
getUnit(), getTemplateParams(), getDeclaration(),
- getRetainedNodes(), getThrownTypes(), getAnnotations());
+ getRetainedNodes(), getThrownTypes(), getAnnotations(),
+ getTargetFuncName());
}
public:
@@ -1945,10 +1896,11 @@ public:
DIFlags Flags, DISPFlags SPFlags, DICompileUnit *Unit,
DITemplateParameterArray TemplateParams = nullptr,
DISubprogram *Declaration = nullptr, DINodeArray RetainedNodes = nullptr,
- DITypeArray ThrownTypes = nullptr, DINodeArray Annotations = nullptr),
+ DITypeArray ThrownTypes = nullptr, DINodeArray Annotations = nullptr,
+ StringRef TargetFuncName = ""),
(Scope, Name, LinkageName, File, Line, Type, ScopeLine, ContainingType,
VirtualIndex, ThisAdjustment, Flags, SPFlags, Unit, TemplateParams,
- Declaration, RetainedNodes, ThrownTypes, Annotations))
+ Declaration, RetainedNodes, ThrownTypes, Annotations, TargetFuncName))
DEFINE_MDNODE_GET(
DISubprogram,
@@ -1958,10 +1910,10 @@ public:
DIFlags Flags, DISPFlags SPFlags, Metadata *Unit,
Metadata *TemplateParams = nullptr, Metadata *Declaration = nullptr,
Metadata *RetainedNodes = nullptr, Metadata *ThrownTypes = nullptr,
- Metadata *Annotations = nullptr),
+ Metadata *Annotations = nullptr, MDString *TargetFuncName = nullptr),
(Scope, Name, LinkageName, File, Line, Type, ScopeLine, ContainingType,
VirtualIndex, ThisAdjustment, Flags, SPFlags, Unit, TemplateParams,
- Declaration, RetainedNodes, ThrownTypes, Annotations))
+ Declaration, RetainedNodes, ThrownTypes, Annotations, TargetFuncName))
TempDISubprogram clone() const { return cloneImpl(); }
@@ -2050,6 +2002,10 @@ public:
DIType *getContainingType() const {
return cast_or_null<DIType>(getRawContainingType());
}
+ void replaceType(DISubroutineType *Ty) {
+ assert(isDistinct() && "Only distinct nodes can mutate");
+ replaceOperandWith(4, Ty);
+ }
DICompileUnit *getUnit() const {
return cast_or_null<DICompileUnit>(getRawUnit());
@@ -2070,6 +2026,9 @@ public:
DINodeArray getAnnotations() const {
return cast_or_null<MDTuple>(getRawAnnotations());
}
+ StringRef getTargetFuncName() const {
+ return (getRawTargetFuncName()) ? getStringOperand(12) : StringRef();
+ }
Metadata *getRawScope() const { return getOperand(1); }
MDString *getRawName() const { return getOperandAs<MDString>(2); }
@@ -2090,6 +2049,9 @@ public:
Metadata *getRawAnnotations() const {
return getNumOperands() > 11 ? getOperandAs<Metadata>(11) : nullptr;
}
+ MDString *getRawTargetFuncName() const {
+ return getNumOperands() > 12 ? getOperandAs<MDString>(12) : nullptr;
+ }
void replaceRawLinkageName(MDString *LinkageName) {
replaceOperandWith(3, LinkageName);
@@ -2108,8 +2070,7 @@ public:
class DILexicalBlockBase : public DILocalScope {
protected:
DILexicalBlockBase(LLVMContext &C, unsigned ID, StorageType Storage,
- ArrayRef<Metadata *> Ops)
- : DILocalScope(C, ID, Storage, dwarf::DW_TAG_lexical_block, Ops) {}
+ ArrayRef<Metadata *> Ops);
~DILexicalBlockBase() = default;
public:
@@ -2301,10 +2262,7 @@ class DINamespace : public DIScope {
unsigned ExportSymbols : 1;
DINamespace(LLVMContext &Context, StorageType Storage, bool ExportSymbols,
- ArrayRef<Metadata *> Ops)
- : DIScope(Context, DINamespaceKind, Storage, dwarf::DW_TAG_namespace,
- Ops),
- ExportSymbols(ExportSymbols) {}
+ ArrayRef<Metadata *> Ops);
~DINamespace() = default;
static DINamespace *getImpl(LLVMContext &Context, DIScope *Scope,
@@ -2353,9 +2311,7 @@ class DIModule : public DIScope {
bool IsDecl;
DIModule(LLVMContext &Context, StorageType Storage, unsigned LineNo,
- bool IsDecl, ArrayRef<Metadata *> Ops)
- : DIScope(Context, DIModuleKind, Storage, dwarf::DW_TAG_module, Ops),
- LineNo(LineNo), IsDecl(IsDecl) {}
+ bool IsDecl, ArrayRef<Metadata *> Ops);
~DIModule() = default;
static DIModule *getImpl(LLVMContext &Context, DIFile *File, DIScope *Scope,
@@ -2449,10 +2405,7 @@ class DITemplateTypeParameter : public DITemplateParameter {
friend class MDNode;
DITemplateTypeParameter(LLVMContext &Context, StorageType Storage,
- bool IsDefault, ArrayRef<Metadata *> Ops)
- : DITemplateParameter(Context, DITemplateTypeParameterKind, Storage,
- dwarf::DW_TAG_template_type_parameter, IsDefault,
- Ops) {}
+ bool IsDefault, ArrayRef<Metadata *> Ops);
~DITemplateTypeParameter() = default;
static DITemplateTypeParameter *getImpl(LLVMContext &Context, StringRef Name,
@@ -2541,10 +2494,8 @@ class DIVariable : public DINode {
uint32_t AlignInBits;
protected:
- DIVariable(LLVMContext &C, unsigned ID, StorageType Storage, unsigned Line,
- ArrayRef<Metadata *> Ops, uint32_t AlignInBits = 0)
- : DINode(C, ID, Storage, dwarf::DW_TAG_variable, Ops), Line(Line),
- AlignInBits(AlignInBits) {}
+ DIVariable(LLVMContext &C, unsigned ID, StorageType Storage, signed Line,
+ ArrayRef<Metadata *> Ops, uint32_t AlignInBits = 0);
~DIVariable() = default;
public:
@@ -2763,9 +2714,7 @@ public:
}
/// Return whether the first element a DW_OP_deref.
- bool startsWithDeref() const {
- return getNumElements() > 0 && getElement(0) == dwarf::DW_OP_deref;
- }
+ bool startsWithDeref() const;
/// Holds the characteristics of one fragment of a larger variable.
struct FragmentInfo {
@@ -2783,7 +2732,7 @@ public:
}
/// Return whether this is a piece of an aggregate variable.
- bool isFragment() const { return getFragmentInfo().hasValue(); }
+ bool isFragment() const { return getFragmentInfo().has_value(); }
/// Return whether this is an implicit location description.
bool isImplicit() const;
@@ -2923,10 +2872,7 @@ public:
/// Check if the expression consists of exactly one entry value operand.
/// (This is the only configuration of entry values that is supported.)
- bool isEntryValue() const {
- return getNumElements() > 0 &&
- getElement(0) == dwarf::DW_OP_LLVM_entry_value;
- }
+ bool isEntryValue() const;
/// Try to shorten an expression with an initial constant operand.
/// Returns a new expression and constant on success, or the original
@@ -3057,10 +3003,7 @@ class DICommonBlock : public DIScope {
friend class MDNode;
DICommonBlock(LLVMContext &Context, StorageType Storage, unsigned LineNo,
- ArrayRef<Metadata *> Ops)
- : DIScope(Context, DICommonBlockKind, Storage, dwarf::DW_TAG_common_block,
- Ops),
- LineNo(LineNo) {}
+ ArrayRef<Metadata *> Ops);
static DICommonBlock *getImpl(LLVMContext &Context, DIScope *Scope,
DIGlobalVariable *Decl, StringRef Name,
@@ -3209,8 +3152,7 @@ class DILabel : public DINode {
unsigned Line;
DILabel(LLVMContext &C, StorageType Storage, unsigned Line,
- ArrayRef<Metadata *> Ops)
- : DINode(C, DILabelKind, Storage, dwarf::DW_TAG_label, Ops), Line(Line) {}
+ ArrayRef<Metadata *> Ops);
~DILabel() = default;
static DILabel *getImpl(LLVMContext &Context, DIScope *Scope, StringRef Name,
@@ -3276,10 +3218,7 @@ class DIObjCProperty : public DINode {
unsigned Attributes;
DIObjCProperty(LLVMContext &C, StorageType Storage, unsigned Line,
- unsigned Attributes, ArrayRef<Metadata *> Ops)
- : DINode(C, DIObjCPropertyKind, Storage, dwarf::DW_TAG_APPLE_property,
- Ops),
- Line(Line), Attributes(Attributes) {}
+ unsigned Attributes, ArrayRef<Metadata *> Ops);
~DIObjCProperty() = default;
static DIObjCProperty *
@@ -3705,7 +3644,7 @@ public:
const DILocation *getInlinedAt() const { return InlinedAt; }
FragmentInfo getFragmentOrDefault() const {
- return Fragment.getValueOr(DefaultFragment);
+ return Fragment.value_or(DefaultFragment);
}
static bool isDefaultFragment(const FragmentInfo F) {
diff --git a/llvm/include/llvm/IR/DerivedTypes.h b/llvm/include/llvm/IR/DerivedTypes.h
index f52ce3cde318..f505fd3f3e32 100644
--- a/llvm/include/llvm/IR/DerivedTypes.h
+++ b/llvm/include/llvm/IR/DerivedTypes.h
@@ -659,7 +659,7 @@ public:
}
/// This constructs a pointer type with the same pointee type as input
- /// PointerType (or opaque pointer is the input PointerType is opaque) and the
+ /// PointerType (or opaque pointer if the input PointerType is opaque) and the
/// given address space. This is only useful during the opaque pointer
/// transition.
/// TODO: remove after opaque pointer transition is complete.
@@ -670,13 +670,6 @@ public:
return get(PT->PointeeTy, AddressSpace);
}
- [[deprecated("Pointer element types are deprecated. You can *temporarily* "
- "use Type::getPointerElementType() instead")]]
- Type *getElementType() const {
- assert(!isOpaque() && "Attempting to get element type of opaque pointer");
- return PointeeTy;
- }
-
bool isOpaque() const { return !PointeeTy; }
/// Return true if the specified type is valid as a element type.
diff --git a/llvm/include/llvm/IR/DiagnosticInfo.h b/llvm/include/llvm/IR/DiagnosticInfo.h
index 1ea1d9787d61..da37801b6d19 100644
--- a/llvm/include/llvm/IR/DiagnosticInfo.h
+++ b/llvm/include/llvm/IR/DiagnosticInfo.h
@@ -85,6 +85,7 @@ enum DiagnosticKind {
DK_Unsupported,
DK_SrcMgr,
DK_DontCall,
+ DK_MisExpect,
DK_FirstPluginKind // Must be last value to work with
// getNextAvailablePluginDiagnosticKind
};
@@ -1032,6 +1033,25 @@ public:
void print(DiagnosticPrinter &DP) const override;
};
+/// Diagnostic information for MisExpect analysis.
+class DiagnosticInfoMisExpect : public DiagnosticInfoWithLocationBase {
+public:
+ DiagnosticInfoMisExpect(const Instruction *Inst, Twine &Msg);
+
+ /// \see DiagnosticInfo::print.
+ void print(DiagnosticPrinter &DP) const override;
+
+ static bool classof(const DiagnosticInfo *DI) {
+ return DI->getKind() == DK_MisExpect;
+ }
+
+ const Twine &getMsg() const { return Msg; }
+
+private:
+ /// Message to report.
+ const Twine &Msg;
+};
+
static DiagnosticSeverity getDiagnosticSeverity(SourceMgr::DiagKind DK) {
switch (DK) {
case llvm::SourceMgr::DK_Error:
diff --git a/llvm/include/llvm/IR/Dominators.h b/llvm/include/llvm/IR/Dominators.h
index d13a5856df3b..a381c075d77b 100644
--- a/llvm/include/llvm/IR/Dominators.h
+++ b/llvm/include/llvm/IR/Dominators.h
@@ -14,6 +14,7 @@
#ifndef LLVM_IR_DOMINATORS_H
#define LLVM_IR_DOMINATORS_H
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseMapInfo.h"
@@ -22,6 +23,8 @@
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/ilist_iterator.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/PassManager.h"
@@ -31,6 +34,7 @@
#include "llvm/Support/CFGUpdate.h"
#include "llvm/Support/GenericDomTree.h"
#include "llvm/Support/GenericDomTreeConstruction.h"
+#include <algorithm>
#include <utility>
#include <vector>
diff --git a/llvm/include/llvm/IR/FMF.h b/llvm/include/llvm/IR/FMF.h
new file mode 100644
index 000000000000..a49feb5a8946
--- /dev/null
+++ b/llvm/include/llvm/IR/FMF.h
@@ -0,0 +1,121 @@
+//===-- llvm/FMF.h - Fast math flags subclass -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the fast math flags.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_FMF_H
+#define LLVM_IR_FMF_H
+
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+/// Convenience struct for specifying and reasoning about fast-math flags.
+class FastMathFlags {
+private:
+ friend class FPMathOperator;
+
+ unsigned Flags = 0;
+
+ FastMathFlags(unsigned F) {
+ // If all 7 bits are set, turn this into -1. If the number of bits grows,
+ // this must be updated. This is intended to provide some forward binary
+ // compatibility insurance for the meaning of 'fast' in case bits are added.
+ if (F == 0x7F) Flags = ~0U;
+ else Flags = F;
+ }
+
+public:
+ // This is how the bits are used in Value::SubclassOptionalData so they
+ // should fit there too.
+ // WARNING: We're out of space. SubclassOptionalData only has 7 bits. New
+ // functionality will require a change in how this information is stored.
+ enum {
+ AllowReassoc = (1 << 0),
+ NoNaNs = (1 << 1),
+ NoInfs = (1 << 2),
+ NoSignedZeros = (1 << 3),
+ AllowReciprocal = (1 << 4),
+ AllowContract = (1 << 5),
+ ApproxFunc = (1 << 6)
+ };
+
+ FastMathFlags() = default;
+
+ static FastMathFlags getFast() {
+ FastMathFlags FMF;
+ FMF.setFast();
+ return FMF;
+ }
+
+ bool any() const { return Flags != 0; }
+ bool none() const { return Flags == 0; }
+ bool all() const { return Flags == ~0U; }
+
+ void clear() { Flags = 0; }
+ void set() { Flags = ~0U; }
+
+ /// Flag queries
+ bool allowReassoc() const { return 0 != (Flags & AllowReassoc); }
+ bool noNaNs() const { return 0 != (Flags & NoNaNs); }
+ bool noInfs() const { return 0 != (Flags & NoInfs); }
+ bool noSignedZeros() const { return 0 != (Flags & NoSignedZeros); }
+ bool allowReciprocal() const { return 0 != (Flags & AllowReciprocal); }
+ bool allowContract() const { return 0 != (Flags & AllowContract); }
+ bool approxFunc() const { return 0 != (Flags & ApproxFunc); }
+ /// 'Fast' means all bits are set.
+ bool isFast() const { return all(); }
+
+ /// Flag setters
+ void setAllowReassoc(bool B = true) {
+ Flags = (Flags & ~AllowReassoc) | B * AllowReassoc;
+ }
+ void setNoNaNs(bool B = true) {
+ Flags = (Flags & ~NoNaNs) | B * NoNaNs;
+ }
+ void setNoInfs(bool B = true) {
+ Flags = (Flags & ~NoInfs) | B * NoInfs;
+ }
+ void setNoSignedZeros(bool B = true) {
+ Flags = (Flags & ~NoSignedZeros) | B * NoSignedZeros;
+ }
+ void setAllowReciprocal(bool B = true) {
+ Flags = (Flags & ~AllowReciprocal) | B * AllowReciprocal;
+ }
+ void setAllowContract(bool B = true) {
+ Flags = (Flags & ~AllowContract) | B * AllowContract;
+ }
+ void setApproxFunc(bool B = true) {
+ Flags = (Flags & ~ApproxFunc) | B * ApproxFunc;
+ }
+ void setFast(bool B = true) { B ? set() : clear(); }
+
+ void operator&=(const FastMathFlags &OtherFlags) {
+ Flags &= OtherFlags.Flags;
+ }
+ void operator|=(const FastMathFlags &OtherFlags) {
+ Flags |= OtherFlags.Flags;
+ }
+ bool operator!=(const FastMathFlags &OtherFlags) const {
+ return Flags != OtherFlags.Flags;
+ }
+
+ /// Print fast-math flags to \p O.
+ void print(raw_ostream &O) const;
+};
+
+inline raw_ostream &operator<<(raw_ostream &O, FastMathFlags FMF) {
+ FMF.print(O);
+ return O;
+}
+
+} // end namespace llvm
+
+#endif // LLVM_IR_FMF_H
diff --git a/llvm/include/llvm/IR/FPEnv.h b/llvm/include/llvm/IR/FPEnv.h
index bf435ec6d109..e598db224211 100644
--- a/llvm/include/llvm/IR/FPEnv.h
+++ b/llvm/include/llvm/IR/FPEnv.h
@@ -17,10 +17,17 @@
#include "llvm/ADT/FloatingPointMode.h"
#include "llvm/ADT/Optional.h"
+#include "llvm/IR/FMF.h"
namespace llvm {
class StringRef;
+namespace Intrinsic {
+typedef unsigned ID;
+}
+
+class Instruction;
+
namespace fp {
/// Exception behavior used for floating point operations.
@@ -59,10 +66,22 @@ inline bool isDefaultFPEnvironment(fp::ExceptionBehavior EB, RoundingMode RM) {
return EB == fp::ebIgnore && RM == RoundingMode::NearestTiesToEven;
}
+/// Returns constrained intrinsic id to represent the given instruction in
+/// strictfp function. If the instruction is already a constrained intrinsic or
+/// does not have a constrained intrinsic counterpart, the function returns
+/// zero.
+Intrinsic::ID getConstrainedIntrinsicID(const Instruction &Instr);
+
/// Returns true if the rounding mode RM may be QRM at compile time or
/// at run time.
inline bool canRoundingModeBe(RoundingMode RM, RoundingMode QRM) {
return RM == QRM || RM == RoundingMode::Dynamic;
}
+
+/// Returns true if the possibility of a signaling NaN can be safely
+/// ignored.
+inline bool canIgnoreSNaN(fp::ExceptionBehavior EB, FastMathFlags FMF) {
+ return (EB == fp::ebIgnore || FMF.noNaNs());
+}
}
#endif
diff --git a/llvm/include/llvm/IR/FixedMetadataKinds.def b/llvm/include/llvm/IR/FixedMetadataKinds.def
index 31979cd2f9db..7c32c5d13760 100644
--- a/llvm/include/llvm/IR/FixedMetadataKinds.def
+++ b/llvm/include/llvm/IR/FixedMetadataKinds.def
@@ -42,3 +42,5 @@ LLVM_FIXED_MD_KIND(MD_preserve_access_index, "llvm.preserve.access.index", 27)
LLVM_FIXED_MD_KIND(MD_vcall_visibility, "vcall_visibility", 28)
LLVM_FIXED_MD_KIND(MD_noundef, "noundef", 29)
LLVM_FIXED_MD_KIND(MD_annotation, "annotation", 30)
+LLVM_FIXED_MD_KIND(MD_nosanitize, "nosanitize", 31)
+LLVM_FIXED_MD_KIND(MD_func_sanitize, "func_sanitize", 32)
diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h
index 90095cd1bc77..7945c64c8610 100644
--- a/llvm/include/llvm/IR/Function.h
+++ b/llvm/include/llvm/IR/Function.h
@@ -32,8 +32,6 @@
#include "llvm/IR/OperandTraits.h"
#include "llvm/IR/SymbolTableListTraits.h"
#include "llvm/IR/Value.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/Compiler.h"
#include <cassert>
#include <cstddef>
#include <cstdint>
@@ -290,7 +288,7 @@ public:
/// profile annotations. If IncludeSynthetic is false, only return true
/// when the profile data is real.
bool hasProfileData(bool IncludeSynthetic = false) const {
- return getEntryCount(IncludeSynthetic).hasValue();
+ return getEntryCount(IncludeSynthetic).has_value();
}
/// Returns the set of GUIDs that needs to be imported to the function for
@@ -486,11 +484,12 @@ public:
return AttributeSets.getParamDereferenceableOrNullBytes(ArgNo);
}
- /// A function will have the "coroutine.presplit" attribute if it's
- /// a coroutine and has not gone through full CoroSplit pass.
+ /// Determine if the function is presplit coroutine.
bool isPresplitCoroutine() const {
- return hasFnAttribute("coroutine.presplit");
+ return hasFnAttribute(Attribute::PresplitCoroutine);
}
+ void setPresplitCoroutine() { addFnAttr(Attribute::PresplitCoroutine); }
+ void setSplittedCoroutine() { removeFnAttr(Attribute::PresplitCoroutine); }
/// Determine if the function does not access memory.
bool doesNotAccessMemory() const {
@@ -623,15 +622,19 @@ public:
bool willReturn() const { return hasFnAttribute(Attribute::WillReturn); }
void setWillReturn() { addFnAttr(Attribute::WillReturn); }
+ /// Get what kind of unwind table entry to generate for this function.
+ UWTableKind getUWTableKind() const {
+ return AttributeSets.getUWTableKind();
+ }
+
/// True if the ABI mandates (or the user requested) that this
/// function be in a unwind table.
bool hasUWTable() const {
- return hasFnAttribute(Attribute::UWTable);
+ return getUWTableKind() != UWTableKind::None;
}
- void setHasUWTable() {
- addFnAttr(Attribute::UWTable);
+ void setUWTableKind(UWTableKind K) {
+ addFnAttr(Attribute::getWithUWTableKind(getContext(), K));
}
-
/// True if this function needs an unwind table.
bool needsUnwindTableEntry() const {
return hasUWTable() || !doesNotThrow() || hasPersonalityFn();
diff --git a/llvm/include/llvm/IR/GCStrategy.h b/llvm/include/llvm/IR/GCStrategy.h
index 4fa8e3a8dcf4..41024469044f 100644
--- a/llvm/include/llvm/IR/GCStrategy.h
+++ b/llvm/include/llvm/IR/GCStrategy.h
@@ -38,9 +38,7 @@
// When used with gc.statepoint, information about safepoint and roots can be
// found in the binary StackMap section after code generation. Safepoint
// placement is currently the responsibility of the frontend, though late
-// insertion support is planned. gc.statepoint does not currently support
-// custom stack map formats; such can be generated by parsing the standard
-// stack map section if desired.
+// insertion support is planned.
//
// The read and write barrier support can be used with either implementation.
//
@@ -101,6 +99,11 @@ public:
}
///@}
+ /// If set, appropriate metadata tables must be emitted by the back-end
+ /// (assembler, JIT, or otherwise). The default stackmap information can be
+ /// found in the StackMap section as described in the documentation.
+ bool usesMetadata() const { return UsesMetadata; }
+
/** @name GCRoot Specific Properties
* These properties and overrides only apply to collector strategies using
* GCRoot.
@@ -110,12 +113,6 @@ public:
/// True if safe points need to be inferred on call sites
bool needsSafePoints() const { return NeededSafePoints; }
- /// If set, appropriate metadata tables must be emitted by the back-end
- /// (assembler, JIT, or otherwise). For statepoint, this method is
- /// currently unsupported. The stackmap information can be found in the
- /// StackMap section as described in the documentation.
- bool usesMetadata() const { return UsesMetadata; }
-
///@}
};
@@ -126,7 +123,7 @@ public:
/// static GCRegistry::Add<CustomGC> X("custom-name",
/// "my custom supper fancy gc strategy");
///
-/// Note that to use a custom GCMetadataPrinter w/gc.roots, you must also
+/// Note that to use a custom GCMetadataPrinter, you must also
/// register your GCMetadataPrinter subclass with the
/// GCMetadataPrinterRegistery as well.
using GCRegistry = Registry<GCStrategy>;
diff --git a/llvm/include/llvm/IR/GlobalIFunc.h b/llvm/include/llvm/IR/GlobalIFunc.h
index 10088ee2fff4..976772b343fd 100644
--- a/llvm/include/llvm/IR/GlobalIFunc.h
+++ b/llvm/include/llvm/IR/GlobalIFunc.h
@@ -84,6 +84,11 @@ public:
return FunctionType::get(IFuncValTy->getPointerTo(), false);
}
+ static bool isValidLinkage(LinkageTypes L) {
+ return isExternalLinkage(L) || isLocalLinkage(L) || isWeakLinkage(L) ||
+ isLinkOnceLinkage(L);
+ }
+
// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const Value *V) {
return V->getValueID() == Value::GlobalIFuncVal;
diff --git a/llvm/include/llvm/IR/GlobalObject.h b/llvm/include/llvm/IR/GlobalObject.h
index 0bb9fd730059..96a270316686 100644
--- a/llvm/include/llvm/IR/GlobalObject.h
+++ b/llvm/include/llvm/IR/GlobalObject.h
@@ -43,13 +43,12 @@ protected:
GlobalObject(Type *Ty, ValueTy VTy, Use *Ops, unsigned NumOps,
LinkageTypes Linkage, const Twine &Name,
unsigned AddressSpace = 0)
- : GlobalValue(Ty, VTy, Ops, NumOps, Linkage, Name, AddressSpace),
- ObjComdat(nullptr) {
+ : GlobalValue(Ty, VTy, Ops, NumOps, Linkage, Name, AddressSpace) {
setGlobalValueSubClassData(0);
}
~GlobalObject();
- Comdat *ObjComdat;
+ Comdat *ObjComdat = nullptr;
enum {
LastAlignmentBit = 5,
HasSectionHashEntryBit,
diff --git a/llvm/include/llvm/IR/GlobalValue.h b/llvm/include/llvm/IR/GlobalValue.h
index 1818f2a8f3cc..a17423dd965b 100644
--- a/llvm/include/llvm/IR/GlobalValue.h
+++ b/llvm/include/llvm/IR/GlobalValue.h
@@ -80,14 +80,14 @@ protected:
UnnamedAddrVal(unsigned(UnnamedAddr::None)),
DllStorageClass(DefaultStorageClass), ThreadLocal(NotThreadLocal),
HasLLVMReservedName(false), IsDSOLocal(false), HasPartition(false),
- IntID((Intrinsic::ID)0U), Parent(nullptr) {
+ HasSanitizerMetadata(false) {
setLinkage(Linkage);
setName(Name);
}
Type *ValueType;
- static const unsigned GlobalValueSubClassDataBits = 16;
+ static const unsigned GlobalValueSubClassDataBits = 15;
// All bitfields use unsigned as the underlying type so that MSVC will pack
// them.
@@ -112,9 +112,14 @@ protected:
/// https://lld.llvm.org/Partitions.html).
unsigned HasPartition : 1;
+ /// True if this symbol has sanitizer metadata available. Should only happen
+ /// if sanitizers were enabled when building the translation unit which
+ /// contains this GV.
+ unsigned HasSanitizerMetadata : 1;
+
private:
// Give subclasses access to what otherwise would be wasted padding.
- // (16 + 4 + 2 + 2 + 2 + 3 + 1 + 1 + 1) == 32.
+ // (15 + 4 + 2 + 2 + 2 + 3 + 1 + 1 + 1 + 1) == 32.
unsigned SubClassData : GlobalValueSubClassDataBits;
friend class Constant;
@@ -153,7 +158,7 @@ protected:
/// Subclasses can use it to store their intrinsic ID, if they have one.
///
/// This is stored here to save space in Function on 64-bit hosts.
- Intrinsic::ID IntID;
+ Intrinsic::ID IntID = (Intrinsic::ID)0U;
unsigned getGlobalValueSubClassData() const {
return SubClassData;
@@ -163,7 +168,7 @@ protected:
SubClassData = V;
}
- Module *Parent; // The containing module.
+ Module *Parent = nullptr; // The containing module.
// Used by SymbolTableListTraits.
void setParent(Module *parent) {
@@ -289,6 +294,43 @@ public:
StringRef getPartition() const;
void setPartition(StringRef Part);
+ // ASan, HWASan and Memtag sanitizers have some instrumentation that applies
+ // specifically to global variables. This instrumentation is implicitly
+ // applied to all global variables when built with -fsanitize=*. What we need
+ // is a way to persist the information that a certain global variable should
+ // *not* have sanitizers applied, which occurs if:
+ // 1. The global variable is in the sanitizer ignore list, or
+ // 2. The global variable is created by the sanitizers itself for internal
+ // usage, or
+ // 3. The global variable has __attribute__((no_sanitize("..."))) or
+ // __attribute__((disable_sanitizer_instrumentation)).
+ //
+ // This is important, a some IR passes like GlobalMerge can delete global
+ // variables and replace them with new ones. If the old variables were marked
+ // to be unsanitized, then the new ones should also be.
+ struct SanitizerMetadata {
+ SanitizerMetadata()
+ : NoAddress(false), NoHWAddress(false), NoMemtag(false),
+ IsDynInit(false) {}
+ unsigned NoAddress : 1;
+ unsigned NoHWAddress : 1;
+ unsigned NoMemtag : 1;
+
+ // ASan-specific metadata. Is this global variable dynamically initialized
+ // (from a C++ language perspective), and should therefore be checked for
+ // ODR violations.
+ unsigned IsDynInit : 1;
+ };
+
+ bool hasSanitizerMetadata() const { return HasSanitizerMetadata; }
+ const SanitizerMetadata &getSanitizerMetadata() const;
+ // Note: Not byref as it's a POD and otherwise it's too easy to call
+ // G.setSanitizerMetadata(G2.getSanitizerMetadata()), and the argument becomes
+ // dangling when the backing storage allocates the metadata for `G`, as the
+ // storage is shared between `G1` and `G2`.
+ void setSanitizerMetadata(SanitizerMetadata Meta);
+ void removeSanitizerMetadata();
+
static LinkageTypes getLinkOnceLinkage(bool ODR) {
return ODR ? LinkOnceODRLinkage : LinkOnceAnyLinkage;
}
diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h
index a1789759960d..d8f08934b3d6 100644
--- a/llvm/include/llvm/IR/IRBuilder.h
+++ b/llvm/include/llvm/IR/IRBuilder.h
@@ -25,7 +25,6 @@
#include "llvm/IR/ConstantFolder.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/FPEnv.h"
@@ -77,7 +76,7 @@ class IRBuilderCallbackInserter : public IRBuilderDefaultInserter {
std::function<void(Instruction *)> Callback;
public:
- virtual ~IRBuilderCallbackInserter();
+ ~IRBuilderCallbackInserter() override;
IRBuilderCallbackInserter(std::function<void(Instruction *)> Callback)
: Callback(std::move(Callback)) {}
@@ -125,21 +124,18 @@ protected:
MDNode *DefaultFPMathTag;
FastMathFlags FMF;
- bool IsFPConstrained;
- fp::ExceptionBehavior DefaultConstrainedExcept;
- RoundingMode DefaultConstrainedRounding;
+ bool IsFPConstrained = false;
+ fp::ExceptionBehavior DefaultConstrainedExcept = fp::ebStrict;
+ RoundingMode DefaultConstrainedRounding = RoundingMode::Dynamic;
ArrayRef<OperandBundleDef> DefaultOperandBundles;
public:
IRBuilderBase(LLVMContext &context, const IRBuilderFolder &Folder,
- const IRBuilderDefaultInserter &Inserter,
- MDNode *FPMathTag, ArrayRef<OperandBundleDef> OpBundles)
+ const IRBuilderDefaultInserter &Inserter, MDNode *FPMathTag,
+ ArrayRef<OperandBundleDef> OpBundles)
: Context(context), Folder(Folder), Inserter(Inserter),
- DefaultFPMathTag(FPMathTag), IsFPConstrained(false),
- DefaultConstrainedExcept(fp::ebStrict),
- DefaultConstrainedRounding(RoundingMode::Dynamic),
- DefaultOperandBundles(OpBundles) {
+ DefaultFPMathTag(FPMathTag), DefaultOperandBundles(OpBundles) {
ClearInsertionPoint();
}
@@ -218,23 +214,11 @@ public:
}
/// Get location information used by debugging information.
- DebugLoc getCurrentDebugLocation() const {
- for (auto &KV : MetadataToCopy)
- if (KV.first == LLVMContext::MD_dbg)
- return {cast<DILocation>(KV.second)};
-
- return {};
- }
+ DebugLoc getCurrentDebugLocation() const;
/// If this builder has a current debug location, set it on the
/// specified instruction.
- void SetInstDebugLocation(Instruction *I) const {
- for (const auto &KV : MetadataToCopy)
- if (KV.first == LLVMContext::MD_dbg) {
- I->setDebugLoc(DebugLoc(KV.second));
- return;
- }
- }
+ void SetInstDebugLocation(Instruction *I) const;
/// Add all entries in MetadataToCopy to \p I.
void AddMetadataToInst(Instruction *I) const {
@@ -316,7 +300,7 @@ public:
void setDefaultConstrainedExcept(fp::ExceptionBehavior NewExcept) {
#ifndef NDEBUG
Optional<StringRef> ExceptStr = convertExceptionBehaviorToStr(NewExcept);
- assert(ExceptStr.hasValue() && "Garbage strict exception behavior!");
+ assert(ExceptStr && "Garbage strict exception behavior!");
#endif
DefaultConstrainedExcept = NewExcept;
}
@@ -325,7 +309,7 @@ public:
void setDefaultConstrainedRounding(RoundingMode NewRounding) {
#ifndef NDEBUG
Optional<StringRef> RoundingStr = convertRoundingModeToStr(NewRounding);
- assert(RoundingStr.hasValue() && "Garbage strict rounding mode!");
+ assert(RoundingStr && "Garbage strict rounding mode!");
#endif
DefaultConstrainedRounding = NewRounding;
}
@@ -556,6 +540,11 @@ public:
return Type::getVoidTy(Context);
}
+ /// Fetch the type representing a pointer.
+ PointerType *getPtrTy(unsigned AddrSpace = 0) {
+ return PointerType::get(Context, AddrSpace);
+ }
+
/// Fetch the type representing a pointer to an 8-bit integer value.
PointerType *getInt8PtrTy(unsigned AddrSpace = 0) {
return Type::getInt8PtrTy(Context, AddrSpace);
@@ -589,6 +578,12 @@ public:
MDNode *ScopeTag = nullptr,
MDNode *NoAliasTag = nullptr);
+ CallInst *CreateMemSetInline(Value *Dst, MaybeAlign DstAlign, Value *Val,
+ Value *Size, bool IsVolatile = false,
+ MDNode *TBAATag = nullptr,
+ MDNode *ScopeTag = nullptr,
+ MDNode *NoAliasTag = nullptr);
+
/// Create and insert an element unordered-atomic memset of the region of
/// memory starting at the given pointer to the given value.
///
@@ -789,7 +784,7 @@ public:
/// Create a call to the experimental.gc.statepoint intrinsic to
/// start a new statepoint sequence.
CallInst *CreateGCStatepointCall(uint64_t ID, uint32_t NumPatchBytes,
- Value *ActualCallee,
+ FunctionCallee ActualCallee,
ArrayRef<Value *> CallArgs,
Optional<ArrayRef<Value *>> DeoptArgs,
ArrayRef<Value *> GCArgs,
@@ -798,7 +793,7 @@ public:
/// Create a call to the experimental.gc.statepoint intrinsic to
/// start a new statepoint sequence.
CallInst *CreateGCStatepointCall(uint64_t ID, uint32_t NumPatchBytes,
- Value *ActualCallee, uint32_t Flags,
+ FunctionCallee ActualCallee, uint32_t Flags,
ArrayRef<Value *> CallArgs,
Optional<ArrayRef<Use>> TransitionArgs,
Optional<ArrayRef<Use>> DeoptArgs,
@@ -809,7 +804,8 @@ public:
/// in using makeArrayRef(CS.arg_begin(), CS.arg_end()); Use needs to be
/// .get()'ed to get the Value pointer.
CallInst *CreateGCStatepointCall(uint64_t ID, uint32_t NumPatchBytes,
- Value *ActualCallee, ArrayRef<Use> CallArgs,
+ FunctionCallee ActualCallee,
+ ArrayRef<Use> CallArgs,
Optional<ArrayRef<Value *>> DeoptArgs,
ArrayRef<Value *> GCArgs,
const Twine &Name = "");
@@ -818,7 +814,7 @@ public:
/// start a new statepoint sequence.
InvokeInst *
CreateGCStatepointInvoke(uint64_t ID, uint32_t NumPatchBytes,
- Value *ActualInvokee, BasicBlock *NormalDest,
+ FunctionCallee ActualInvokee, BasicBlock *NormalDest,
BasicBlock *UnwindDest, ArrayRef<Value *> InvokeArgs,
Optional<ArrayRef<Value *>> DeoptArgs,
ArrayRef<Value *> GCArgs, const Twine &Name = "");
@@ -826,7 +822,7 @@ public:
/// Create an invoke to the experimental.gc.statepoint intrinsic to
/// start a new statepoint sequence.
InvokeInst *CreateGCStatepointInvoke(
- uint64_t ID, uint32_t NumPatchBytes, Value *ActualInvokee,
+ uint64_t ID, uint32_t NumPatchBytes, FunctionCallee ActualInvokee,
BasicBlock *NormalDest, BasicBlock *UnwindDest, uint32_t Flags,
ArrayRef<Value *> InvokeArgs, Optional<ArrayRef<Use>> TransitionArgs,
Optional<ArrayRef<Use>> DeoptArgs, ArrayRef<Value *> GCArgs,
@@ -837,7 +833,7 @@ public:
// get the Value *.
InvokeInst *
CreateGCStatepointInvoke(uint64_t ID, uint32_t NumPatchBytes,
- Value *ActualInvokee, BasicBlock *NormalDest,
+ FunctionCallee ActualInvokee, BasicBlock *NormalDest,
BasicBlock *UnwindDest, ArrayRef<Use> InvokeArgs,
Optional<ArrayRef<Value *>> DeoptArgs,
ArrayRef<Value *> GCArgs, const Twine &Name = "");
@@ -918,18 +914,18 @@ public:
Name);
}
- /// Create a call to the experimental.vector.extract intrinsic.
+ /// Create a call to the vector.extract intrinsic.
CallInst *CreateExtractVector(Type *DstType, Value *SrcVec, Value *Idx,
const Twine &Name = "") {
- return CreateIntrinsic(Intrinsic::experimental_vector_extract,
+ return CreateIntrinsic(Intrinsic::vector_extract,
{DstType, SrcVec->getType()}, {SrcVec, Idx}, nullptr,
Name);
}
- /// Create a call to the experimental.vector.insert intrinsic.
+ /// Create a call to the vector.insert intrinsic.
CallInst *CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec,
Value *Idx, const Twine &Name = "") {
- return CreateIntrinsic(Intrinsic::experimental_vector_insert,
+ return CreateIntrinsic(Intrinsic::vector_insert,
{DstType, SubVec->getType()}, {SrcVec, SubVec, Idx},
nullptr, Name);
}
@@ -1162,21 +1158,14 @@ private:
return I;
}
- Value *foldConstant(Instruction::BinaryOps Opc, Value *L,
- Value *R, const Twine &Name) const {
- auto *LC = dyn_cast<Constant>(L);
- auto *RC = dyn_cast<Constant>(R);
- return (LC && RC) ? Insert(Folder.CreateBinOp(Opc, LC, RC), Name) : nullptr;
- }
-
Value *getConstrainedFPRounding(Optional<RoundingMode> Rounding) {
RoundingMode UseRounding = DefaultConstrainedRounding;
- if (Rounding.hasValue())
+ if (Rounding)
UseRounding = Rounding.getValue();
Optional<StringRef> RoundingStr = convertRoundingModeToStr(UseRounding);
- assert(RoundingStr.hasValue() && "Garbage strict rounding mode!");
+ assert(RoundingStr && "Garbage strict rounding mode!");
auto *RoundingMDS = MDString::get(Context, RoundingStr.getValue());
return MetadataAsValue::get(Context, RoundingMDS);
@@ -1185,11 +1174,11 @@ private:
Value *getConstrainedFPExcept(Optional<fp::ExceptionBehavior> Except) {
fp::ExceptionBehavior UseExcept = DefaultConstrainedExcept;
- if (Except.hasValue())
+ if (Except)
UseExcept = Except.getValue();
Optional<StringRef> ExceptStr = convertExceptionBehaviorToStr(UseExcept);
- assert(ExceptStr.hasValue() && "Garbage strict exception behavior!");
+ assert(ExceptStr && "Garbage strict exception behavior!");
auto *ExceptMDS = MDString::get(Context, ExceptStr.getValue());
return MetadataAsValue::get(Context, ExceptMDS);
@@ -1210,10 +1199,11 @@ private:
public:
Value *CreateAdd(Value *LHS, Value *RHS, const Twine &Name = "",
bool HasNUW = false, bool HasNSW = false) {
- if (auto *V = Folder.FoldAdd(LHS, RHS, HasNUW, HasNSW))
+ if (Value *V =
+ Folder.FoldNoWrapBinOp(Instruction::Add, LHS, RHS, HasNUW, HasNSW))
return V;
- return CreateInsertNUWNSWBinOp(Instruction::Add, LHS, RHS, Name,
- HasNUW, HasNSW);
+ return CreateInsertNUWNSWBinOp(Instruction::Add, LHS, RHS, Name, HasNUW,
+ HasNSW);
}
Value *CreateNSWAdd(Value *LHS, Value *RHS, const Twine &Name = "") {
@@ -1226,11 +1216,11 @@ public:
Value *CreateSub(Value *LHS, Value *RHS, const Twine &Name = "",
bool HasNUW = false, bool HasNSW = false) {
- if (auto *LC = dyn_cast<Constant>(LHS))
- if (auto *RC = dyn_cast<Constant>(RHS))
- return Insert(Folder.CreateSub(LC, RC, HasNUW, HasNSW), Name);
- return CreateInsertNUWNSWBinOp(Instruction::Sub, LHS, RHS, Name,
- HasNUW, HasNSW);
+ if (Value *V =
+ Folder.FoldNoWrapBinOp(Instruction::Sub, LHS, RHS, HasNUW, HasNSW))
+ return V;
+ return CreateInsertNUWNSWBinOp(Instruction::Sub, LHS, RHS, Name, HasNUW,
+ HasNSW);
}
Value *CreateNSWSub(Value *LHS, Value *RHS, const Twine &Name = "") {
@@ -1243,11 +1233,11 @@ public:
Value *CreateMul(Value *LHS, Value *RHS, const Twine &Name = "",
bool HasNUW = false, bool HasNSW = false) {
- if (auto *LC = dyn_cast<Constant>(LHS))
- if (auto *RC = dyn_cast<Constant>(RHS))
- return Insert(Folder.CreateMul(LC, RC, HasNUW, HasNSW), Name);
- return CreateInsertNUWNSWBinOp(Instruction::Mul, LHS, RHS, Name,
- HasNUW, HasNSW);
+ if (Value *V =
+ Folder.FoldNoWrapBinOp(Instruction::Mul, LHS, RHS, HasNUW, HasNSW))
+ return V;
+ return CreateInsertNUWNSWBinOp(Instruction::Mul, LHS, RHS, Name, HasNUW,
+ HasNSW);
}
Value *CreateNSWMul(Value *LHS, Value *RHS, const Twine &Name = "") {
@@ -1260,9 +1250,8 @@ public:
Value *CreateUDiv(Value *LHS, Value *RHS, const Twine &Name = "",
bool isExact = false) {
- if (auto *LC = dyn_cast<Constant>(LHS))
- if (auto *RC = dyn_cast<Constant>(RHS))
- return Insert(Folder.CreateUDiv(LC, RC, isExact), Name);
+ if (Value *V = Folder.FoldExactBinOp(Instruction::UDiv, LHS, RHS, isExact))
+ return V;
if (!isExact)
return Insert(BinaryOperator::CreateUDiv(LHS, RHS), Name);
return Insert(BinaryOperator::CreateExactUDiv(LHS, RHS), Name);
@@ -1274,9 +1263,8 @@ public:
Value *CreateSDiv(Value *LHS, Value *RHS, const Twine &Name = "",
bool isExact = false) {
- if (auto *LC = dyn_cast<Constant>(LHS))
- if (auto *RC = dyn_cast<Constant>(RHS))
- return Insert(Folder.CreateSDiv(LC, RC, isExact), Name);
+ if (Value *V = Folder.FoldExactBinOp(Instruction::SDiv, LHS, RHS, isExact))
+ return V;
if (!isExact)
return Insert(BinaryOperator::CreateSDiv(LHS, RHS), Name);
return Insert(BinaryOperator::CreateExactSDiv(LHS, RHS), Name);
@@ -1287,20 +1275,22 @@ public:
}
Value *CreateURem(Value *LHS, Value *RHS, const Twine &Name = "") {
- if (Value *V = foldConstant(Instruction::URem, LHS, RHS, Name)) return V;
+ if (Value *V = Folder.FoldBinOp(Instruction::URem, LHS, RHS))
+ return V;
return Insert(BinaryOperator::CreateURem(LHS, RHS), Name);
}
Value *CreateSRem(Value *LHS, Value *RHS, const Twine &Name = "") {
- if (Value *V = foldConstant(Instruction::SRem, LHS, RHS, Name)) return V;
+ if (Value *V = Folder.FoldBinOp(Instruction::SRem, LHS, RHS))
+ return V;
return Insert(BinaryOperator::CreateSRem(LHS, RHS), Name);
}
Value *CreateShl(Value *LHS, Value *RHS, const Twine &Name = "",
bool HasNUW = false, bool HasNSW = false) {
- if (auto *LC = dyn_cast<Constant>(LHS))
- if (auto *RC = dyn_cast<Constant>(RHS))
- return Insert(Folder.CreateShl(LC, RC, HasNUW, HasNSW), Name);
+ if (Value *V =
+ Folder.FoldNoWrapBinOp(Instruction::Shl, LHS, RHS, HasNUW, HasNSW))
+ return V;
return CreateInsertNUWNSWBinOp(Instruction::Shl, LHS, RHS, Name,
HasNUW, HasNSW);
}
@@ -1319,9 +1309,8 @@ public:
Value *CreateLShr(Value *LHS, Value *RHS, const Twine &Name = "",
bool isExact = false) {
- if (auto *LC = dyn_cast<Constant>(LHS))
- if (auto *RC = dyn_cast<Constant>(RHS))
- return Insert(Folder.CreateLShr(LC, RC, isExact), Name);
+ if (Value *V = Folder.FoldExactBinOp(Instruction::LShr, LHS, RHS, isExact))
+ return V;
if (!isExact)
return Insert(BinaryOperator::CreateLShr(LHS, RHS), Name);
return Insert(BinaryOperator::CreateExactLShr(LHS, RHS), Name);
@@ -1339,9 +1328,8 @@ public:
Value *CreateAShr(Value *LHS, Value *RHS, const Twine &Name = "",
bool isExact = false) {
- if (auto *LC = dyn_cast<Constant>(LHS))
- if (auto *RC = dyn_cast<Constant>(RHS))
- return Insert(Folder.CreateAShr(LC, RC, isExact), Name);
+ if (Value *V = Folder.FoldExactBinOp(Instruction::AShr, LHS, RHS, isExact))
+ return V;
if (!isExact)
return Insert(BinaryOperator::CreateAShr(LHS, RHS), Name);
return Insert(BinaryOperator::CreateExactAShr(LHS, RHS), Name);
@@ -1358,7 +1346,7 @@ public:
}
Value *CreateAnd(Value *LHS, Value *RHS, const Twine &Name = "") {
- if (auto *V = Folder.FoldAnd(LHS, RHS))
+ if (auto *V = Folder.FoldBinOp(Instruction::And, LHS, RHS))
return V;
return Insert(BinaryOperator::CreateAnd(LHS, RHS), Name);
}
@@ -1380,7 +1368,7 @@ public:
}
Value *CreateOr(Value *LHS, Value *RHS, const Twine &Name = "") {
- if (auto *V = Folder.FoldOr(LHS, RHS))
+ if (auto *V = Folder.FoldBinOp(Instruction::Or, LHS, RHS))
return V;
return Insert(BinaryOperator::CreateOr(LHS, RHS), Name);
}
@@ -1402,7 +1390,8 @@ public:
}
Value *CreateXor(Value *LHS, Value *RHS, const Twine &Name = "") {
- if (Value *V = foldConstant(Instruction::Xor, LHS, RHS, Name)) return V;
+ if (Value *V = Folder.FoldBinOp(Instruction::Xor, LHS, RHS))
+ return V;
return Insert(BinaryOperator::CreateXor(LHS, RHS), Name);
}
@@ -1420,7 +1409,8 @@ public:
return CreateConstrainedFPBinOp(Intrinsic::experimental_constrained_fadd,
L, R, nullptr, Name, FPMD);
- if (Value *V = foldConstant(Instruction::FAdd, L, R, Name)) return V;
+ if (Value *V = Folder.FoldBinOpFMF(Instruction::FAdd, L, R, FMF))
+ return V;
Instruction *I = setFPAttrs(BinaryOperator::CreateFAdd(L, R), FPMD, FMF);
return Insert(I, Name);
}
@@ -1433,9 +1423,10 @@ public:
return CreateConstrainedFPBinOp(Intrinsic::experimental_constrained_fadd,
L, R, FMFSource, Name);
- if (Value *V = foldConstant(Instruction::FAdd, L, R, Name)) return V;
- Instruction *I = setFPAttrs(BinaryOperator::CreateFAdd(L, R), nullptr,
- FMFSource->getFastMathFlags());
+ FastMathFlags FMF = FMFSource->getFastMathFlags();
+ if (Value *V = Folder.FoldBinOpFMF(Instruction::FAdd, L, R, FMF))
+ return V;
+ Instruction *I = setFPAttrs(BinaryOperator::CreateFAdd(L, R), nullptr, FMF);
return Insert(I, Name);
}
@@ -1445,7 +1436,8 @@ public:
return CreateConstrainedFPBinOp(Intrinsic::experimental_constrained_fsub,
L, R, nullptr, Name, FPMD);
- if (Value *V = foldConstant(Instruction::FSub, L, R, Name)) return V;
+ if (Value *V = Folder.FoldBinOpFMF(Instruction::FSub, L, R, FMF))
+ return V;
Instruction *I = setFPAttrs(BinaryOperator::CreateFSub(L, R), FPMD, FMF);
return Insert(I, Name);
}
@@ -1458,9 +1450,10 @@ public:
return CreateConstrainedFPBinOp(Intrinsic::experimental_constrained_fsub,
L, R, FMFSource, Name);
- if (Value *V = foldConstant(Instruction::FSub, L, R, Name)) return V;
- Instruction *I = setFPAttrs(BinaryOperator::CreateFSub(L, R), nullptr,
- FMFSource->getFastMathFlags());
+ FastMathFlags FMF = FMFSource->getFastMathFlags();
+ if (Value *V = Folder.FoldBinOpFMF(Instruction::FSub, L, R, FMF))
+ return V;
+ Instruction *I = setFPAttrs(BinaryOperator::CreateFSub(L, R), nullptr, FMF);
return Insert(I, Name);
}
@@ -1470,7 +1463,8 @@ public:
return CreateConstrainedFPBinOp(Intrinsic::experimental_constrained_fmul,
L, R, nullptr, Name, FPMD);
- if (Value *V = foldConstant(Instruction::FMul, L, R, Name)) return V;
+ if (Value *V = Folder.FoldBinOpFMF(Instruction::FMul, L, R, FMF))
+ return V;
Instruction *I = setFPAttrs(BinaryOperator::CreateFMul(L, R), FPMD, FMF);
return Insert(I, Name);
}
@@ -1483,9 +1477,10 @@ public:
return CreateConstrainedFPBinOp(Intrinsic::experimental_constrained_fmul,
L, R, FMFSource, Name);
- if (Value *V = foldConstant(Instruction::FMul, L, R, Name)) return V;
- Instruction *I = setFPAttrs(BinaryOperator::CreateFMul(L, R), nullptr,
- FMFSource->getFastMathFlags());
+ FastMathFlags FMF = FMFSource->getFastMathFlags();
+ if (Value *V = Folder.FoldBinOpFMF(Instruction::FMul, L, R, FMF))
+ return V;
+ Instruction *I = setFPAttrs(BinaryOperator::CreateFMul(L, R), nullptr, FMF);
return Insert(I, Name);
}
@@ -1495,7 +1490,8 @@ public:
return CreateConstrainedFPBinOp(Intrinsic::experimental_constrained_fdiv,
L, R, nullptr, Name, FPMD);
- if (Value *V = foldConstant(Instruction::FDiv, L, R, Name)) return V;
+ if (Value *V = Folder.FoldBinOpFMF(Instruction::FDiv, L, R, FMF))
+ return V;
Instruction *I = setFPAttrs(BinaryOperator::CreateFDiv(L, R), FPMD, FMF);
return Insert(I, Name);
}
@@ -1508,9 +1504,9 @@ public:
return CreateConstrainedFPBinOp(Intrinsic::experimental_constrained_fdiv,
L, R, FMFSource, Name);
- if (Value *V = foldConstant(Instruction::FDiv, L, R, Name)) return V;
- Instruction *I = setFPAttrs(BinaryOperator::CreateFDiv(L, R), nullptr,
- FMFSource->getFastMathFlags());
+ if (Value *V = Folder.FoldBinOpFMF(Instruction::FDiv, L, R, FMF))
+ return V;
+ Instruction *I = setFPAttrs(BinaryOperator::CreateFDiv(L, R), nullptr, FMF);
return Insert(I, Name);
}
@@ -1520,7 +1516,7 @@ public:
return CreateConstrainedFPBinOp(Intrinsic::experimental_constrained_frem,
L, R, nullptr, Name, FPMD);
- if (Value *V = foldConstant(Instruction::FRem, L, R, Name)) return V;
+ if (Value *V = Folder.FoldBinOpFMF(Instruction::FRem, L, R, FMF)) return V;
Instruction *I = setFPAttrs(BinaryOperator::CreateFRem(L, R), FPMD, FMF);
return Insert(I, Name);
}
@@ -1533,16 +1529,16 @@ public:
return CreateConstrainedFPBinOp(Intrinsic::experimental_constrained_frem,
L, R, FMFSource, Name);
- if (Value *V = foldConstant(Instruction::FRem, L, R, Name)) return V;
- Instruction *I = setFPAttrs(BinaryOperator::CreateFRem(L, R), nullptr,
- FMFSource->getFastMathFlags());
+ FastMathFlags FMF = FMFSource->getFastMathFlags();
+ if (Value *V = Folder.FoldBinOpFMF(Instruction::FRem, L, R, FMF)) return V;
+ Instruction *I = setFPAttrs(BinaryOperator::CreateFRem(L, R), nullptr, FMF);
return Insert(I, Name);
}
Value *CreateBinOp(Instruction::BinaryOps Opc,
Value *LHS, Value *RHS, const Twine &Name = "",
MDNode *FPMathTag = nullptr) {
- if (Value *V = foldConstant(Opc, LHS, RHS, Name)) return V;
+ if (Value *V = Folder.FoldBinOp(Opc, LHS, RHS)) return V;
Instruction *BinOp = BinaryOperator::Create(Opc, LHS, RHS);
if (isa<FPMathOperator>(BinOp))
setFPAttrs(BinOp, FPMathTag, FMF);
@@ -1576,14 +1572,10 @@ public:
Optional<RoundingMode> Rounding = None,
Optional<fp::ExceptionBehavior> Except = None);
- Value *CreateNeg(Value *V, const Twine &Name = "",
- bool HasNUW = false, bool HasNSW = false) {
- if (auto *VC = dyn_cast<Constant>(V))
- return Insert(Folder.CreateNeg(VC, HasNUW, HasNSW), Name);
- BinaryOperator *BO = Insert(BinaryOperator::CreateNeg(V), Name);
- if (HasNUW) BO->setHasNoUnsignedWrap();
- if (HasNSW) BO->setHasNoSignedWrap();
- return BO;
+ Value *CreateNeg(Value *V, const Twine &Name = "", bool HasNUW = false,
+ bool HasNSW = false) {
+ return CreateSub(Constant::getNullValue(V->getType()), V, Name, HasNUW,
+ HasNSW);
}
Value *CreateNSWNeg(Value *V, const Twine &Name = "") {
@@ -1614,9 +1606,7 @@ public:
}
Value *CreateNot(Value *V, const Twine &Name = "") {
- if (auto *VC = dyn_cast<Constant>(V))
- return Insert(Folder.CreateNot(VC), Name);
- return Insert(BinaryOperator::CreateNot(V), Name);
+ return CreateXor(V, Constant::getAllOnesValue(V->getType()), Name);
}
Value *CreateUnOp(Instruction::UnaryOps Opc,
@@ -1733,30 +1723,18 @@ public:
}
Value *CreateGEP(Type *Ty, Value *Ptr, ArrayRef<Value *> IdxList,
- const Twine &Name = "") {
- if (auto *V = Folder.FoldGEP(Ty, Ptr, IdxList, /*IsInBounds=*/false))
+ const Twine &Name = "", bool IsInBounds = false) {
+ if (auto *V = Folder.FoldGEP(Ty, Ptr, IdxList, IsInBounds))
return V;
- return Insert(GetElementPtrInst::Create(Ty, Ptr, IdxList), Name);
+ return Insert(IsInBounds
+ ? GetElementPtrInst::CreateInBounds(Ty, Ptr, IdxList)
+ : GetElementPtrInst::Create(Ty, Ptr, IdxList),
+ Name);
}
Value *CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef<Value *> IdxList,
const Twine &Name = "") {
- if (auto *V = Folder.FoldGEP(Ty, Ptr, IdxList, /*IsInBounds=*/true))
- return V;
- return Insert(GetElementPtrInst::CreateInBounds(Ty, Ptr, IdxList), Name);
- }
-
- Value *CreateGEP(Type *Ty, Value *Ptr, Value *Idx, const Twine &Name = "") {
- if (auto *V = Folder.FoldGEP(Ty, Ptr, {Idx}, /*IsInBounds=*/false))
- return V;
- return Insert(GetElementPtrInst::Create(Ty, Ptr, Idx), Name);
- }
-
- Value *CreateInBoundsGEP(Type *Ty, Value *Ptr, Value *Idx,
- const Twine &Name = "") {
- if (auto *V = Folder.FoldGEP(Ty, Ptr, {Idx}, /*IsInBounds=*/true))
- return V;
- return Insert(GetElementPtrInst::CreateInBounds(Ty, Ptr, Idx), Name);
+ return CreateGEP(Ty, Ptr, IdxList, Name, /* IsInBounds */ true);
}
Value *CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0,
@@ -2297,9 +2275,8 @@ public:
Value *CreateExtractElement(Value *Vec, Value *Idx,
const Twine &Name = "") {
- if (auto *VC = dyn_cast<Constant>(Vec))
- if (auto *IC = dyn_cast<Constant>(Idx))
- return Insert(Folder.CreateExtractElement(VC, IC), Name);
+ if (Value *V = Folder.FoldExtractElement(Vec, Idx))
+ return V;
return Insert(ExtractElementInst::Create(Vec, Idx), Name);
}
@@ -2320,10 +2297,8 @@ public:
Value *CreateInsertElement(Value *Vec, Value *NewElt, Value *Idx,
const Twine &Name = "") {
- if (auto *VC = dyn_cast<Constant>(Vec))
- if (auto *NC = dyn_cast<Constant>(NewElt))
- if (auto *IC = dyn_cast<Constant>(Idx))
- return Insert(Folder.CreateInsertElement(VC, NC, IC), Name);
+ if (Value *V = Folder.FoldInsertElement(Vec, NewElt, Idx))
+ return V;
return Insert(InsertElementInst::Create(Vec, NewElt, Idx), Name);
}
@@ -2339,21 +2314,11 @@ public:
return CreateShuffleVector(V1, V2, IntMask, Name);
}
- LLVM_ATTRIBUTE_DEPRECATED(Value *CreateShuffleVector(Value *V1, Value *V2,
- ArrayRef<uint32_t> Mask,
- const Twine &Name = ""),
- "Pass indices as 'int' instead") {
- SmallVector<int, 16> IntMask;
- IntMask.assign(Mask.begin(), Mask.end());
- return CreateShuffleVector(V1, V2, IntMask, Name);
- }
-
/// See class ShuffleVectorInst for a description of the mask representation.
Value *CreateShuffleVector(Value *V1, Value *V2, ArrayRef<int> Mask,
const Twine &Name = "") {
- if (auto *V1C = dyn_cast<Constant>(V1))
- if (auto *V2C = dyn_cast<Constant>(V2))
- return Insert(Folder.CreateShuffleVector(V1C, V2C, Mask), Name);
+ if (Value *V = Folder.FoldShuffleVector(V1, V2, Mask))
+ return V;
return Insert(new ShuffleVectorInst(V1, V2, Mask), Name);
}
@@ -2364,20 +2329,17 @@ public:
return CreateShuffleVector(V, PoisonValue::get(V->getType()), Mask, Name);
}
- Value *CreateExtractValue(Value *Agg,
- ArrayRef<unsigned> Idxs,
+ Value *CreateExtractValue(Value *Agg, ArrayRef<unsigned> Idxs,
const Twine &Name = "") {
- if (auto *AggC = dyn_cast<Constant>(Agg))
- return Insert(Folder.CreateExtractValue(AggC, Idxs), Name);
+ if (auto *V = Folder.FoldExtractValue(Agg, Idxs))
+ return V;
return Insert(ExtractValueInst::Create(Agg, Idxs), Name);
}
- Value *CreateInsertValue(Value *Agg, Value *Val,
- ArrayRef<unsigned> Idxs,
+ Value *CreateInsertValue(Value *Agg, Value *Val, ArrayRef<unsigned> Idxs,
const Twine &Name = "") {
- if (auto *AggC = dyn_cast<Constant>(Agg))
- if (auto *ValC = dyn_cast<Constant>(Val))
- return Insert(Folder.CreateInsertValue(AggC, ValC, Idxs), Name);
+ if (auto *V = Folder.FoldInsertValue(Agg, Val, Idxs))
+ return V;
return Insert(InsertValueInst::Create(Agg, Val, Idxs), Name);
}
@@ -2394,16 +2356,25 @@ public:
// Utility creation methods
//===--------------------------------------------------------------------===//
- /// Return an i1 value testing if \p Arg is null.
+ /// Return a boolean value testing if \p Arg == 0.
Value *CreateIsNull(Value *Arg, const Twine &Name = "") {
- return CreateICmpEQ(Arg, Constant::getNullValue(Arg->getType()),
- Name);
+ return CreateICmpEQ(Arg, ConstantInt::getNullValue(Arg->getType()), Name);
}
- /// Return an i1 value testing if \p Arg is not null.
+ /// Return a boolean value testing if \p Arg != 0.
Value *CreateIsNotNull(Value *Arg, const Twine &Name = "") {
- return CreateICmpNE(Arg, Constant::getNullValue(Arg->getType()),
- Name);
+ return CreateICmpNE(Arg, ConstantInt::getNullValue(Arg->getType()), Name);
+ }
+
+ /// Return a boolean value testing if \p Arg < 0.
+ Value *CreateIsNeg(Value *Arg, const Twine &Name = "") {
+ return CreateICmpSLT(Arg, ConstantInt::getNullValue(Arg->getType()), Name);
+ }
+
+ /// Return a boolean value testing if \p Arg > -1.
+ Value *CreateIsNotNeg(Value *Arg, const Twine &Name = "") {
+ return CreateICmpSGT(Arg, ConstantInt::getAllOnesValue(Arg->getType()),
+ Name);
}
/// Return the i64 difference between two pointer values, dividing out
diff --git a/llvm/include/llvm/IR/IRBuilderFolder.h b/llvm/include/llvm/IR/IRBuilderFolder.h
index 2827ab553adc..9505f1e3be2a 100644
--- a/llvm/include/llvm/IR/IRBuilderFolder.h
+++ b/llvm/include/llvm/IR/IRBuilderFolder.h
@@ -31,12 +31,19 @@ public:
// Return an existing value or a constant if the operation can be simplified.
// Otherwise return nullptr.
//===--------------------------------------------------------------------===//
- virtual Value *FoldAdd(Value *LHS, Value *RHS, bool HasNUW = false,
- bool HasNSW = false) const = 0;
- virtual Value *FoldAnd(Value *LHS, Value *RHS) const = 0;
+ virtual Value *FoldBinOp(Instruction::BinaryOps Opc, Value *LHS,
+ Value *RHS) const = 0;
- virtual Value *FoldOr(Value *LHS, Value *RHS) const = 0;
+ virtual Value *FoldExactBinOp(Instruction::BinaryOps Opc, Value *LHS,
+ Value *RHS, bool IsExact) const = 0;
+
+ virtual Value *FoldNoWrapBinOp(Instruction::BinaryOps Opc, Value *LHS,
+ Value *RHS, bool HasNUW,
+ bool HasNSW) const = 0;
+
+ virtual Value *FoldBinOpFMF(Instruction::BinaryOps Opc, Value *LHS,
+ Value *RHS, FastMathFlags FMF) const = 0;
virtual Value *FoldICmp(CmpInst::Predicate P, Value *LHS,
Value *RHS) const = 0;
@@ -46,43 +53,25 @@ public:
virtual Value *FoldSelect(Value *C, Value *True, Value *False) const = 0;
- //===--------------------------------------------------------------------===//
- // Binary Operators
- //===--------------------------------------------------------------------===//
+ virtual Value *FoldExtractValue(Value *Agg,
+ ArrayRef<unsigned> IdxList) const = 0;
+
+ virtual Value *FoldInsertValue(Value *Agg, Value *Val,
+ ArrayRef<unsigned> IdxList) const = 0;
+
+ virtual Value *FoldExtractElement(Value *Vec, Value *Idx) const = 0;
- virtual Value *CreateFAdd(Constant *LHS, Constant *RHS) const = 0;
- virtual Value *CreateSub(Constant *LHS, Constant *RHS,
- bool HasNUW = false, bool HasNSW = false) const = 0;
- virtual Value *CreateFSub(Constant *LHS, Constant *RHS) const = 0;
- virtual Value *CreateMul(Constant *LHS, Constant *RHS,
- bool HasNUW = false, bool HasNSW = false) const = 0;
- virtual Value *CreateFMul(Constant *LHS, Constant *RHS) const = 0;
- virtual Value *CreateUDiv(Constant *LHS, Constant *RHS,
- bool isExact = false) const = 0;
- virtual Value *CreateSDiv(Constant *LHS, Constant *RHS,
- bool isExact = false) const = 0;
- virtual Value *CreateFDiv(Constant *LHS, Constant *RHS) const = 0;
- virtual Value *CreateURem(Constant *LHS, Constant *RHS) const = 0;
- virtual Value *CreateSRem(Constant *LHS, Constant *RHS) const = 0;
- virtual Value *CreateFRem(Constant *LHS, Constant *RHS) const = 0;
- virtual Value *CreateShl(Constant *LHS, Constant *RHS,
- bool HasNUW = false, bool HasNSW = false) const = 0;
- virtual Value *CreateLShr(Constant *LHS, Constant *RHS,
- bool isExact = false) const = 0;
- virtual Value *CreateAShr(Constant *LHS, Constant *RHS,
- bool isExact = false) const = 0;
- virtual Value *CreateXor(Constant *LHS, Constant *RHS) const = 0;
- virtual Value *CreateBinOp(Instruction::BinaryOps Opc,
- Constant *LHS, Constant *RHS) const = 0;
+ virtual Value *FoldInsertElement(Value *Vec, Value *NewElt,
+ Value *Idx) const = 0;
+
+ virtual Value *FoldShuffleVector(Value *V1, Value *V2,
+ ArrayRef<int> Mask) const = 0;
//===--------------------------------------------------------------------===//
// Unary Operators
//===--------------------------------------------------------------------===//
- virtual Value *CreateNeg(Constant *C,
- bool HasNUW = false, bool HasNSW = false) const = 0;
virtual Value *CreateFNeg(Constant *C) const = 0;
- virtual Value *CreateNot(Constant *C) const = 0;
virtual Value *CreateUnOp(Instruction::UnaryOps Opc, Constant *C) const = 0;
//===--------------------------------------------------------------------===//
@@ -110,20 +99,6 @@ public:
virtual Value *CreateFCmp(CmpInst::Predicate P, Constant *LHS,
Constant *RHS) const = 0;
-
- //===--------------------------------------------------------------------===//
- // Other Instructions
- //===--------------------------------------------------------------------===//
-
- virtual Value *CreateExtractElement(Constant *Vec, Constant *Idx) const = 0;
- virtual Value *CreateInsertElement(Constant *Vec, Constant *NewElt,
- Constant *Idx) const = 0;
- virtual Value *CreateShuffleVector(Constant *V1, Constant *V2,
- ArrayRef<int> Mask) const = 0;
- virtual Value *CreateExtractValue(Constant *Agg,
- ArrayRef<unsigned> IdxList) const = 0;
- virtual Value *CreateInsertValue(Constant *Agg, Constant *Val,
- ArrayRef<unsigned> IdxList) const = 0;
};
} // end namespace llvm
diff --git a/llvm/include/llvm/IR/InlineAsm.h b/llvm/include/llvm/IR/InlineAsm.h
index cf6b7af96980..57f2da27e04e 100644
--- a/llvm/include/llvm/IR/InlineAsm.h
+++ b/llvm/include/llvm/IR/InlineAsm.h
@@ -240,12 +240,15 @@ public:
Kind_RegDefEarlyClobber = 3, // Early-clobber output register, "=&r".
Kind_Clobber = 4, // Clobbered register, "~r".
Kind_Imm = 5, // Immediate.
- Kind_Mem = 6, // Memory operand, "m".
+ Kind_Mem = 6, // Memory operand, "m", or an address, "p".
// Memory constraint codes.
// These could be tablegenerated but there's little need to do that since
// there's plenty of space in the encoding to support the union of all
// constraint codes for all targets.
+ // Addresses are included here as they need to be treated the same by the
+ // backend, the only difference is that they are not used to actaully
+ // access memory by the instruction.
Constraint_Unknown = 0,
Constraint_es,
Constraint_i,
@@ -268,7 +271,15 @@ public:
Constraint_Z,
Constraint_ZC,
Constraint_Zy,
- Constraints_Max = Constraint_Zy,
+
+ // Address constraints
+ Constraint_p,
+ Constraint_ZQ,
+ Constraint_ZR,
+ Constraint_ZS,
+ Constraint_ZT,
+
+ Constraints_Max = Constraint_ZT,
Constraints_ShiftAmount = 16,
Flag_MatchingOperand = 0x80000000
@@ -453,6 +464,16 @@ public:
return "ZC";
case InlineAsm::Constraint_Zy:
return "Zy";
+ case InlineAsm::Constraint_p:
+ return "p";
+ case InlineAsm::Constraint_ZQ:
+ return "ZQ";
+ case InlineAsm::Constraint_ZR:
+ return "ZR";
+ case InlineAsm::Constraint_ZS:
+ return "ZS";
+ case InlineAsm::Constraint_ZT:
+ return "ZT";
default:
llvm_unreachable("Unknown memory constraint");
}
diff --git a/llvm/include/llvm/IR/InstVisitor.h b/llvm/include/llvm/IR/InstVisitor.h
index 585129904dd4..7fec081d8155 100644
--- a/llvm/include/llvm/IR/InstVisitor.h
+++ b/llvm/include/llvm/IR/InstVisitor.h
@@ -15,7 +15,6 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
-#include "llvm/Support/ErrorHandling.h"
namespace llvm {
@@ -200,7 +199,7 @@ public:
RetTy visitCatchPadInst(CatchPadInst &I) { DELEGATE(FuncletPadInst); }
RetTy visitFreezeInst(FreezeInst &I) { DELEGATE(Instruction); }
- // Handle the special instrinsic instruction classes.
+ // Handle the special intrinsic instruction classes.
RetTy visitDbgDeclareInst(DbgDeclareInst &I) { DELEGATE(DbgVariableIntrinsic);}
RetTy visitDbgValueInst(DbgValueInst &I) { DELEGATE(DbgVariableIntrinsic);}
RetTy visitDbgVariableIntrinsic(DbgVariableIntrinsic &I)
diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h
index 589926c0faf1..eb6f89d740c6 100644
--- a/llvm/include/llvm/IR/InstrTypes.h
+++ b/llvm/include/llvm/IR/InstrTypes.h
@@ -21,22 +21,16 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Sequence.h"
#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallingConv.h"
-#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/OperandTraits.h"
-#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
-#include "llvm/IR/Value.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/ErrorHandling.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -47,6 +41,10 @@
namespace llvm {
+class StringRef;
+class Type;
+class Value;
+
namespace Intrinsic {
typedef unsigned ID;
}
@@ -1615,12 +1613,18 @@ public:
/// Get the attribute of a given kind for the function.
Attribute getFnAttr(StringRef Kind) const {
- return getAttributes().getFnAttr(Kind);
+ Attribute Attr = getAttributes().getFnAttr(Kind);
+ if (Attr.isValid())
+ return Attr;
+ return getFnAttrOnCalledFunction(Kind);
}
/// Get the attribute of a given kind for the function.
Attribute getFnAttr(Attribute::AttrKind Kind) const {
- return getAttributes().getFnAttr(Kind);
+ Attribute A = getAttributes().getFnAttr(Kind);
+ if (A.isValid())
+ return A;
+ return getFnAttrOnCalledFunction(Kind);
}
/// Get the attribute of a given kind from a given arg
@@ -1761,7 +1765,7 @@ public:
return nullptr;
}
- /// Extract the preallocated type for a call or parameter.
+ /// Extract the inalloca type for a call or parameter.
Type *getParamInAllocaType(unsigned ArgNo) const {
if (auto *Ty = Attrs.getParamInAllocaType(ArgNo))
return Ty;
@@ -1770,6 +1774,22 @@ public:
return nullptr;
}
+ /// Extract the sret type for a call or parameter.
+ Type *getParamStructRetType(unsigned ArgNo) const {
+ if (auto *Ty = Attrs.getParamStructRetType(ArgNo))
+ return Ty;
+ if (const Function *F = getCalledFunction())
+ return F->getAttributes().getParamStructRetType(ArgNo);
+ return nullptr;
+ }
+
+ /// Extract the elementtype type for a parameter.
+ /// Note that elementtype() can only be applied to call arguments, not
+ /// function declaration parameters.
+ Type *getParamElementType(unsigned ArgNo) const {
+ return Attrs.getParamElementType(ArgNo);
+ }
+
/// Extract the number of dereferenceable bytes for a call or
/// parameter (0=unknown).
uint64_t getRetDereferenceableBytes() const {
@@ -1806,7 +1826,13 @@ public:
/// If one of the arguments has the 'returned' attribute, returns its
/// operand value. Otherwise, return nullptr.
- Value *getReturnedArgOperand() const;
+ Value *getReturnedArgOperand() const {
+ return getArgOperandWithAttribute(Attribute::Returned);
+ }
+
+ /// If one of the arguments has the specified attribute, returns its
+ /// operand value. Otherwise, return nullptr.
+ Value *getArgOperandWithAttribute(Attribute::AttrKind Kind) const;
/// Return true if the call should not be treated as a call to a
/// builtin.
@@ -2052,7 +2078,8 @@ public:
bool hasClobberingOperandBundles() const {
for (auto &BOI : bundle_op_infos()) {
if (BOI.Tag->second == LLVMContext::OB_deopt ||
- BOI.Tag->second == LLVMContext::OB_funclet)
+ BOI.Tag->second == LLVMContext::OB_funclet ||
+ BOI.Tag->second == LLVMContext::OB_ptrauth)
continue;
// This instruction has an operand bundle that is not known to us.
@@ -2296,6 +2323,7 @@ private:
return hasFnAttrOnCalledFunction(Kind);
}
+ template <typename AK> Attribute getFnAttrOnCalledFunction(AK Kind) const;
/// A specialized version of hasFnAttrImpl for when the caller wants to
/// know if an attribute's semantics are implied, not whether the attribute
diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h
index 1937ffd36f7b..8d0a8363cdfb 100644
--- a/llvm/include/llvm/IR/Instruction.h
+++ b/llvm/include/llvm/IR/Instruction.h
@@ -24,7 +24,6 @@
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/AtomicOrdering.h"
-#include "llvm/Support/Casting.h"
#include <cstdint>
#include <utility>
diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h
index 5929cff3b4fb..d152e86488e1 100644
--- a/llvm/include/llvm/IR/Instructions.h
+++ b/llvm/include/llvm/IR/Instructions.h
@@ -21,24 +21,18 @@
#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/ADT/iterator.h"
#include "llvm/ADT/iterator_range.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/OperandTraits.h"
-#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
-#include "llvm/IR/Value.h"
#include "llvm/Support/AtomicOrdering.h"
-#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include <cassert>
#include <cstddef>
@@ -47,9 +41,14 @@
namespace llvm {
+class APFloat;
class APInt;
+class BasicBlock;
class ConstantInt;
class DataLayout;
+class StringRef;
+class Type;
+class Value;
//===----------------------------------------------------------------------===//
// AllocaInst Class
@@ -127,9 +126,6 @@ public:
setSubclassData<AlignmentField>(Log2(Align));
}
- // FIXME: Remove this one transition to Align is over.
- uint64_t getAlignment() const { return getAlign().value(); }
-
/// Return true if this alloca is in the entry block of the function and is a
/// constant size. If so, the code generator will fold it into the
/// prolog/epilog code, so it is basically free.
@@ -217,11 +213,6 @@ public:
void setVolatile(bool V) { setSubclassData<VolatileField>(V); }
/// Return the alignment of the access that is being performed.
- /// FIXME: Remove this function once transition to Align is over.
- /// Use getAlign() instead.
- uint64_t getAlignment() const { return getAlign().value(); }
-
- /// Return the alignment of the access that is being performed.
Align getAlign() const {
return Align(1ULL << (getSubclassData<AlignmentField>()));
}
@@ -347,11 +338,6 @@ public:
/// Transparently provide more efficient getOperand methods.
DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
- /// Return the alignment of the access that is being performed
- /// FIXME: Remove this function once transition to Align is over.
- /// Use getAlign() instead.
- uint64_t getAlignment() const { return getAlign().value(); }
-
Align getAlign() const {
return Align(1ULL << (getSubclassData<AlignmentField>()));
}
@@ -2138,6 +2124,12 @@ public:
static bool isIdentityMask(ArrayRef<int> Mask);
static bool isIdentityMask(const Constant *Mask) {
assert(Mask->getType()->isVectorTy() && "Shuffle needs vector constant.");
+
+ // Not possible to express a shuffle mask for a scalable vector for this
+ // case.
+ if (isa<ScalableVectorType>(Mask->getType()))
+ return false;
+
SmallVector<int, 16> MaskAsInts;
getShuffleMask(Mask, MaskAsInts);
return isIdentityMask(MaskAsInts);
@@ -2148,6 +2140,11 @@ public:
/// from its input vectors.
/// Example: shufflevector <4 x n> A, <4 x n> B, <4,undef,6,undef>
bool isIdentity() const {
+ // Not possible to express a shuffle mask for a scalable vector for this
+ // case.
+ if (isa<ScalableVectorType>(getType()))
+ return false;
+
return !changesLength() && isIdentityMask(ShuffleMask);
}
@@ -5311,6 +5308,10 @@ public:
}
};
+//===----------------------------------------------------------------------===//
+// Helper functions
+//===----------------------------------------------------------------------===//
+
/// A helper function that returns the pointer operand of a load or store
/// instruction. Returns nullptr if not load or store.
inline const Value *getLoadStorePointerOperand(const Value *V) {
@@ -5366,6 +5367,24 @@ inline Type *getLoadStoreType(Value *I) {
return cast<StoreInst>(I)->getValueOperand()->getType();
}
+/// A helper function that returns an atomic operation's sync scope; returns
+/// None if it is not an atomic operation.
+inline Optional<SyncScope::ID> getAtomicSyncScopeID(const Instruction *I) {
+ if (!I->isAtomic())
+ return None;
+ if (auto *AI = dyn_cast<LoadInst>(I))
+ return AI->getSyncScopeID();
+ if (auto *AI = dyn_cast<StoreInst>(I))
+ return AI->getSyncScopeID();
+ if (auto *AI = dyn_cast<FenceInst>(I))
+ return AI->getSyncScopeID();
+ if (auto *AI = dyn_cast<AtomicCmpXchgInst>(I))
+ return AI->getSyncScopeID();
+ if (auto *AI = dyn_cast<AtomicRMWInst>(I))
+ return AI->getSyncScopeID();
+ llvm_unreachable("unhandled atomic operation");
+}
+
//===----------------------------------------------------------------------===//
// FreezeInst Class
//===----------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h
index 01dada25a285..06d2335821d3 100644
--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -31,7 +31,6 @@
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/Metadata.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
#include <cassert>
@@ -39,6 +38,8 @@
namespace llvm {
+class Metadata;
+
/// A wrapper class for inspecting calls to intrinsic functions.
/// This allows the standard isa/dyncast/cast functionality to work with calls
/// to intrinsic functions.
@@ -472,6 +473,38 @@ public:
/// @}
};
+class VPCastIntrinsic : public VPIntrinsic {
+public:
+ static bool isVPCast(Intrinsic::ID ID);
+
+ /// Methods for support type inquiry through isa, cast, and dyn_cast:
+ /// @{
+ static bool classof(const IntrinsicInst *I) {
+ return VPCastIntrinsic::isVPCast(I->getIntrinsicID());
+ }
+ static bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+ /// @}
+};
+
+class VPCmpIntrinsic : public VPIntrinsic {
+public:
+ static bool isVPCmp(Intrinsic::ID ID);
+
+ CmpInst::Predicate getPredicate() const;
+
+ /// Methods for support type inquiry through isa, cast, and dyn_cast:
+ /// @{
+ static bool classof(const IntrinsicInst *I) {
+ return VPCmpIntrinsic::isVPCmp(I->getIntrinsicID());
+ }
+ static bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+ /// @}
+};
+
/// This is the common base class for constrained floating point intrinsics.
class ConstrainedFPIntrinsic : public IntrinsicInst {
public:
@@ -492,6 +525,9 @@ public:
class ConstrainedFPCmpIntrinsic : public ConstrainedFPIntrinsic {
public:
FCmpInst::Predicate getPredicate() const;
+ bool isSignaling() const {
+ return getIntrinsicID() == Intrinsic::experimental_constrained_fcmps;
+ }
// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const IntrinsicInst *I) {
@@ -723,11 +759,6 @@ public:
setArgOperand(ARG_DEST, Ptr);
}
- /// FIXME: Remove this function once transition to Align is over.
- /// Use the version that takes MaybeAlign instead of this one.
- void setDestAlignment(unsigned Alignment) {
- setDestAlignment(MaybeAlign(Alignment));
- }
void setDestAlignment(MaybeAlign Alignment) {
removeParamAttr(ARG_DEST, Attribute::Alignment);
if (Alignment)
@@ -942,6 +973,7 @@ public:
case Intrinsic::memcpy:
case Intrinsic::memmove:
case Intrinsic::memset:
+ case Intrinsic::memset_inline:
case Intrinsic::memcpy_inline:
return true;
default:
@@ -953,12 +985,33 @@ public:
}
};
-/// This class wraps the llvm.memset intrinsic.
+/// This class wraps the llvm.memset and llvm.memset.inline intrinsics.
class MemSetInst : public MemSetBase<MemIntrinsic> {
public:
// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const IntrinsicInst *I) {
- return I->getIntrinsicID() == Intrinsic::memset;
+ switch (I->getIntrinsicID()) {
+ case Intrinsic::memset:
+ case Intrinsic::memset_inline:
+ return true;
+ default:
+ return false;
+ }
+ }
+ static bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+};
+
+/// This class wraps the llvm.memset.inline intrinsic.
+class MemSetInlineInst : public MemSetInst {
+public:
+ ConstantInt *getLength() const {
+ return cast<ConstantInt>(MemSetInst::getLength());
+ }
+ // Methods for support type inquiry through isa, cast, and dyn_cast:
+ static bool classof(const IntrinsicInst *I) {
+ return I->getIntrinsicID() == Intrinsic::memset_inline;
}
static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
@@ -1043,6 +1096,7 @@ public:
case Intrinsic::memcpy_inline:
case Intrinsic::memmove:
case Intrinsic::memset:
+ case Intrinsic::memset_inline:
case Intrinsic::memcpy_element_unordered_atomic:
case Intrinsic::memmove_element_unordered_atomic:
case Intrinsic::memset_element_unordered_atomic:
@@ -1064,6 +1118,7 @@ public:
static bool classof(const IntrinsicInst *I) {
switch (I->getIntrinsicID()) {
case Intrinsic::memset:
+ case Intrinsic::memset_inline:
case Intrinsic::memset_element_unordered_atomic:
return true;
default:
diff --git a/llvm/include/llvm/IR/Intrinsics.h b/llvm/include/llvm/IR/Intrinsics.h
index 2ff48380ac28..a3db2fa59399 100644
--- a/llvm/include/llvm/IR/Intrinsics.h
+++ b/llvm/include/llvm/IR/Intrinsics.h
@@ -104,8 +104,8 @@ namespace Intrinsic {
int lookupLLVMIntrinsicByName(ArrayRef<const char *> NameTable,
StringRef Name);
- /// Map a GCC builtin name to an intrinsic ID.
- ID getIntrinsicForGCCBuiltin(const char *Prefix, StringRef BuiltinName);
+ /// Map a Clang builtin name to an intrinsic ID.
+ ID getIntrinsicForClangBuiltin(const char *Prefix, StringRef BuiltinName);
/// Map a MS builtin name to an intrinsic ID.
ID getIntrinsicForMSBuiltin(const char *Prefix, StringRef BuiltinName);
@@ -142,6 +142,7 @@ namespace Intrinsic {
VecOfBitcastsToInt,
AMX,
PPCQuad,
+ AnyPtrToElt,
} Kind;
union {
@@ -180,14 +181,15 @@ namespace Intrinsic {
return (ArgKind)(Argument_Info & 7);
}
- // VecOfAnyPtrsToElt uses both an overloaded argument (for address space)
- // and a reference argument (for matching vector width and element types)
+ // VecOfAnyPtrsToElt and AnyPtrToElt uses both an overloaded argument (for
+ // address space) and a reference argument (for matching vector width and
+ // element types)
unsigned getOverloadArgNumber() const {
- assert(Kind == VecOfAnyPtrsToElt);
+ assert(Kind == VecOfAnyPtrsToElt || Kind == AnyPtrToElt);
return Argument_Info >> 16;
}
unsigned getRefArgNumber() const {
- assert(Kind == VecOfAnyPtrsToElt);
+ assert(Kind == VecOfAnyPtrsToElt || Kind == AnyPtrToElt);
return Argument_Info & 0xFFFF;
}
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index f5248e82ad21..0dceea13ea36 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -120,6 +120,9 @@ class ReadNone<AttrIndex idx> : IntrinsicProperty {
def IntrNoReturn : IntrinsicProperty;
+// Applied by default.
+def IntrNoCallback : IntrinsicProperty<1>;
+
// IntrNoSync - Threads executing the intrinsic will not synchronize using
// memory or other means. Applied by default.
def IntrNoSync : IntrinsicProperty<1>;
@@ -212,6 +215,7 @@ class LLVMScalarOrSameVectorWidth<int idx, LLVMType elty>
class LLVMPointerTo<int num> : LLVMMatchType<num>;
class LLVMPointerToElt<int num> : LLVMMatchType<num>;
+class LLVMAnyPointerToElt<int num> : LLVMMatchType<num>;
class LLVMVectorOfAnyPointersToElt<int num> : LLVMMatchType<num>;
class LLVMVectorElementType<int num> : LLVMMatchType<num>;
@@ -241,6 +245,7 @@ def llvm_i8_ty : LLVMType<i8>;
def llvm_i16_ty : LLVMType<i16>;
def llvm_i32_ty : LLVMType<i32>;
def llvm_i64_ty : LLVMType<i64>;
+def llvm_i128_ty : LLVMType<i128>;
def llvm_half_ty : LLVMType<f16>;
def llvm_bfloat_ty : LLVMType<bf16>;
def llvm_float_ty : LLVMType<f32>;
@@ -380,11 +385,11 @@ class DefaultAttrsIntrinsic<list<LLVMType> ret_types,
intr_properties, name,
sd_properties, /*disable_default_attributes*/ 0> {}
-/// GCCBuiltin - If this intrinsic exactly corresponds to a GCC builtin, this
+/// ClangBuiltin - If this intrinsic exactly corresponds to a Clang builtin, this
/// specifies the name of the builtin. This provides automatic CBE and CFE
/// support.
-class GCCBuiltin<string name> {
- string GCCBuiltinName = name;
+class ClangBuiltin<string name> {
+ string ClangBuiltinName = name;
}
class MSBuiltin<string name> {
@@ -540,14 +545,14 @@ def int_seh_scope_end : Intrinsic<[], [], [IntrNoMem]>;
// Note: we treat stacksave/stackrestore as writemem because we don't otherwise
// model their dependencies on allocas.
def int_stacksave : DefaultAttrsIntrinsic<[llvm_ptr_ty]>,
- GCCBuiltin<"__builtin_stack_save">;
+ ClangBuiltin<"__builtin_stack_save">;
def int_stackrestore : DefaultAttrsIntrinsic<[], [llvm_ptr_ty]>,
- GCCBuiltin<"__builtin_stack_restore">;
+ ClangBuiltin<"__builtin_stack_restore">;
def int_get_dynamic_area_offset : DefaultAttrsIntrinsic<[llvm_anyint_ty]>;
def int_thread_pointer : DefaultAttrsIntrinsic<[llvm_ptr_ty], [], [IntrNoMem]>,
- GCCBuiltin<"__builtin_thread_pointer">;
+ ClangBuiltin<"__builtin_thread_pointer">;
// IntrInaccessibleMemOrArgMemOnly is a little more pessimistic than strictly
// necessary for prefetch, however it does conveniently prevent the prefetch
@@ -647,6 +652,17 @@ def int_memset : Intrinsic<[],
NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>,
ImmArg<ArgIndex<3>>]>;
+// Memset version that is guaranteed to be inlined.
+// In particular this means that the generated code is not allowed to call any
+// external function.
+// The third argument (specifying the size) must be a constant.
+def int_memset_inline
+ : Intrinsic<[],
+ [llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty, llvm_i1_ty],
+ [IntrWriteMem, IntrArgMemOnly, IntrWillReturn, IntrNoFree,
+ NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>,
+ ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>;
+
// FIXME: Add version of these floating point intrinsics which allow non-default
// rounding modes and FP exception handling.
@@ -715,7 +731,7 @@ def int_objectsize : DefaultAttrsIntrinsic<[llvm_anyint_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn,
ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>,
ImmArg<ArgIndex<3>>]>,
- GCCBuiltin<"__builtin_object_size">;
+ ClangBuiltin<"__builtin_object_size">;
//===--------------- Access to Floating Point Environment -----------------===//
//
@@ -725,6 +741,14 @@ let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn] in {
def int_set_rounding : DefaultAttrsIntrinsic<[], [llvm_i32_ty]>;
}
+//===--------------- Floating Point Properties ----------------------------===//
+//
+
+def int_is_fpclass
+ : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+ [llvm_anyfloat_ty, llvm_i32_ty],
+ [IntrNoMem, IntrWillReturn, ImmArg<ArgIndex<1>>]>;
+
//===--------------- Constrained Floating Point Intrinsics ----------------===//
//
@@ -909,6 +933,12 @@ let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn] in {
}
// FIXME: Consider maybe adding intrinsics for sitofp, uitofp.
+
+// Truncate a floating point number with a specific rounding mode
+def int_fptrunc_round : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
+ [ llvm_anyfloat_ty, llvm_metadata_ty ],
+ [ IntrNoMem, IntrWillReturn ]>;
+
//===------------------------- Expect Intrinsics --------------------------===//
//
def int_expect : DefaultAttrsIntrinsic<[llvm_anyint_ty],
@@ -984,12 +1014,12 @@ def int_eh_exceptioncode : Intrinsic<[llvm_i32_ty], [llvm_token_ty], [IntrNoMem]
// callee-saved registers to be saved and restored (regardless of whether they
// are used) in the calling function. It is used by libgcc_eh.
def int_eh_unwind_init: Intrinsic<[]>,
- GCCBuiltin<"__builtin_unwind_init">;
+ ClangBuiltin<"__builtin_unwind_init">;
def int_eh_dwarf_cfa : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty]>;
def int_eh_sjlj_lsda : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
-def int_eh_sjlj_callsite : Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>;
+def int_eh_sjlj_callsite : Intrinsic<[], [llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<0>>]>;
def int_eh_sjlj_functioncontext : Intrinsic<[], [llvm_ptr_ty]>;
def int_eh_sjlj_setjmp : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty]>;
@@ -1025,11 +1055,11 @@ def int_init_trampoline : DefaultAttrsIntrinsic<
[], [llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>,
ReadNone<ArgIndex<1>>, ReadNone<ArgIndex<2>>]>,
- GCCBuiltin<"__builtin_init_trampoline">;
+ ClangBuiltin<"__builtin_init_trampoline">;
def int_adjust_trampoline : DefaultAttrsIntrinsic<
[llvm_ptr_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>,
- GCCBuiltin<"__builtin_adjust_trampoline">;
+ ClangBuiltin<"__builtin_adjust_trampoline">;
//===------------------------ Overflow Intrinsics -------------------------===//
//
@@ -1309,9 +1339,9 @@ def int_coro_subfn_addr : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i8_ty],
///===-------------------------- Other Intrinsics --------------------------===//
//
def int_trap : Intrinsic<[], [], [IntrNoReturn, IntrCold]>,
- GCCBuiltin<"__builtin_trap">;
+ ClangBuiltin<"__builtin_trap">;
def int_debugtrap : Intrinsic<[]>,
- GCCBuiltin<"__builtin_debugtrap">;
+ ClangBuiltin<"__builtin_debugtrap">;
def int_ubsantrap : Intrinsic<[], [llvm_i8_ty],
[IntrNoReturn, IntrCold, ImmArg<ArgIndex<0>>]>;
@@ -1397,14 +1427,31 @@ def int_vp_gather: DefaultAttrsIntrinsic<[ llvm_anyvector_ty],
[ IntrReadMem, IntrNoSync, IntrWillReturn, IntrArgMemOnly ]>;
def int_vp_scatter: DefaultAttrsIntrinsic<[],
- [ llvm_anyvector_ty,
- LLVMVectorOfAnyPointersToElt<0>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty],
- [ IntrArgMemOnly, IntrNoSync, IntrWillReturn ]>; // TODO allow IntrNoCapture for vectors of pointers
-
-// Speculatable Binary operators
-let IntrProperties = [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] in {
+ [ llvm_anyvector_ty,
+ LLVMVectorOfAnyPointersToElt<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty],
+ [ IntrArgMemOnly, IntrNoSync, IntrWillReturn ]>; // TODO allow IntrNoCapture for vectors of pointers
+
+// Experimental strided memory accesses
+def int_experimental_vp_strided_store : DefaultAttrsIntrinsic<[],
+ [ llvm_anyvector_ty,
+ LLVMAnyPointerToElt<0>,
+ llvm_anyint_ty, // Stride in bytes
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty],
+ [ NoCapture<ArgIndex<1>>, IntrNoSync, IntrWriteMem, IntrArgMemOnly, IntrWillReturn ]>;
+
+def int_experimental_vp_strided_load : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+ [ LLVMAnyPointerToElt<0>,
+ llvm_anyint_ty, // Stride in bytes
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty],
+ [ NoCapture<ArgIndex<0>>, IntrNoSync, IntrReadMem, IntrWillReturn, IntrArgMemOnly ]>;
+
+// Operators
+let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in {
+ // Integer arithmetic
def int_vp_add : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
@@ -1416,30 +1463,30 @@ let IntrProperties = [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] i
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
def int_vp_mul : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
- [ LLVMMatchType<0>,
- LLVMMatchType<0>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_ashr : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
- [ LLVMMatchType<0>,
- LLVMMatchType<0>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_lshr : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
- [ LLVMMatchType<0>,
- LLVMMatchType<0>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_shl : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
def int_vp_or : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
- [ LLVMMatchType<0>,
- LLVMMatchType<0>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_and : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
@@ -1450,35 +1497,28 @@ let IntrProperties = [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] i
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
-}
-
-// Non-speculatable binary operators.
-let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in {
def int_vp_sdiv : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
- [ LLVMMatchType<0>,
- LLVMMatchType<0>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_udiv : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
- [ LLVMMatchType<0>,
- LLVMMatchType<0>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_srem : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
- [ LLVMMatchType<0>,
- LLVMMatchType<0>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_urem : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
- [ LLVMMatchType<0>,
- LLVMMatchType<0>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
-}
+ [ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
-// Floating-point arithmetic.
-let IntrProperties =
- [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] in {
+ // Floating-point arithmetic
def int_vp_fadd : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
@@ -1490,101 +1530,169 @@ let IntrProperties =
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
def int_vp_fmul : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
- [ LLVMMatchType<0>,
- LLVMMatchType<0>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_fdiv : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
- [ LLVMMatchType<0>,
- LLVMMatchType<0>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_frem : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
- [ LLVMMatchType<0>,
- LLVMMatchType<0>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
-}
-// Shuffles.
-def int_vp_select : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
- [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- LLVMMatchType<0>,
- LLVMMatchType<0>,
- llvm_i32_ty]>;
-
-def int_vp_merge : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
- [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- LLVMMatchType<0>,
- LLVMMatchType<0>,
- llvm_i32_ty]>;
-
-// Reductions
-let IntrProperties = [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] in {
+ [ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_fneg : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+ [ LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_fma : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+ [ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+
+ // Casts
+ def int_vp_trunc : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+ [ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_zext : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+ [ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_sext : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+ [ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_fptrunc : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+ [ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_fpext : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+ [ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_fptoui : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+ [ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_fptosi : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+ [ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_uitofp : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+ [ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_sitofp : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+ [ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_ptrtoint : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+ [ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_inttoptr : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+ [ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+
+ // Shuffles
+ def int_vp_select : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+ [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ llvm_i32_ty]>;
+ def int_vp_merge : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+ [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ llvm_i32_ty]>;
+
+ // Comparisons
+ def int_vp_fcmp : DefaultAttrsIntrinsic<[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ],
+ [ llvm_anyvector_ty,
+ LLVMMatchType<0>,
+ llvm_metadata_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_icmp : DefaultAttrsIntrinsic<[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ],
+ [ llvm_anyvector_ty,
+ LLVMMatchType<0>,
+ llvm_metadata_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+
+ // Reductions
def int_vp_reduce_fadd : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
- [LLVMVectorElementType<0>,
- llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_reduce_fmul : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
- [LLVMVectorElementType<0>,
- llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_reduce_add : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
- [LLVMVectorElementType<0>,
- llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_reduce_mul : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
- [LLVMVectorElementType<0>,
- llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_reduce_and : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
- [LLVMVectorElementType<0>,
- llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_reduce_or : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
- [LLVMVectorElementType<0>,
- llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_reduce_xor : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
- [LLVMVectorElementType<0>,
- llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_reduce_smax : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
- [LLVMVectorElementType<0>,
- llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_reduce_smin : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
- [LLVMVectorElementType<0>,
- llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_reduce_umax : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
- [LLVMVectorElementType<0>,
- llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_reduce_umin : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
- [LLVMVectorElementType<0>,
- llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_reduce_fmax : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
- [LLVMVectorElementType<0>,
- llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
def int_vp_reduce_fmin : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
- [LLVMVectorElementType<0>,
- llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty]>;
+ [ LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
}
def int_get_active_lane_mask:
@@ -1840,28 +1948,26 @@ def int_preserve_struct_access_index : DefaultAttrsIntrinsic<[llvm_anyptr_ty],
//===------------ Intrinsics to perform common vector shuffles ------------===//
def int_experimental_vector_reverse : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
- [LLVMMatchType<0>],
- [IntrNoMem]>;
+ [LLVMMatchType<0>],
+ [IntrNoMem]>;
-//===---------- Intrinsics to query properties of scalable vectors --------===//
-def int_vscale : DefaultAttrsIntrinsic<[llvm_anyint_ty], [], [IntrNoMem]>;
-
-//===---------- Intrinsics to perform subvector insertion/extraction ------===//
-def int_experimental_vector_insert : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
- [LLVMMatchType<0>, llvm_anyvector_ty, llvm_i64_ty],
- [IntrNoMem, ImmArg<ArgIndex<2>>]>;
-
-def int_experimental_vector_extract : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
- [llvm_anyvector_ty, llvm_i64_ty],
- [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-
-//===---------- Named shufflevector intrinsics ------===//
def int_experimental_vector_splice : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
LLVMMatchType<0>,
llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
+//===---------- Intrinsics to query properties of scalable vectors --------===//
+def int_vscale : DefaultAttrsIntrinsic<[llvm_anyint_ty], [], [IntrNoMem]>;
+
+//===---------- Intrinsics to perform subvector insertion/extraction ------===//
+def int_vector_insert : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_anyvector_ty, llvm_i64_ty],
+ [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+
+def int_vector_extract : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+ [llvm_anyvector_ty, llvm_i64_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
//===----------------- Pointer Authentication Intrinsics ------------------===//
//
@@ -1936,4 +2042,6 @@ include "llvm/IR/IntrinsicsBPF.td"
include "llvm/IR/IntrinsicsSystemZ.td"
include "llvm/IR/IntrinsicsWebAssembly.td"
include "llvm/IR/IntrinsicsRISCV.td"
+include "llvm/IR/IntrinsicsSPIRV.td"
include "llvm/IR/IntrinsicsVE.td"
+include "llvm/IR/IntrinsicsDirectX.td"
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index a65ddff07a29..1256ab2c9f84 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -62,14 +62,17 @@ def int_aarch64_frint64x
def int_aarch64_hint : DefaultAttrsIntrinsic<[], [llvm_i32_ty]>;
+def int_aarch64_break : Intrinsic<[], [llvm_i32_ty],
+ [IntrNoMem, IntrHasSideEffects, IntrNoReturn, IntrCold, ImmArg<ArgIndex<0>>]>;
+
//===----------------------------------------------------------------------===//
// Data Barrier Instructions
-def int_aarch64_dmb : GCCBuiltin<"__builtin_arm_dmb">, MSBuiltin<"__dmb">,
+def int_aarch64_dmb : ClangBuiltin<"__builtin_arm_dmb">, MSBuiltin<"__dmb">,
Intrinsic<[], [llvm_i32_ty], [IntrNoFree, IntrWillReturn]>;
-def int_aarch64_dsb : GCCBuiltin<"__builtin_arm_dsb">, MSBuiltin<"__dsb">,
+def int_aarch64_dsb : ClangBuiltin<"__builtin_arm_dsb">, MSBuiltin<"__dsb">,
Intrinsic<[], [llvm_i32_ty], [IntrNoFree, IntrWillReturn]>;
-def int_aarch64_isb : GCCBuiltin<"__builtin_arm_isb">, MSBuiltin<"__isb">,
+def int_aarch64_isb : ClangBuiltin<"__builtin_arm_isb">, MSBuiltin<"__isb">,
Intrinsic<[], [llvm_i32_ty], [IntrNoFree, IntrWillReturn]>;
// A space-consuming intrinsic primarily for testing block and jump table
@@ -907,15 +910,15 @@ let TargetPrefix = "aarch64" in {
// Transactional Memory Extension (TME) Intrinsics
let TargetPrefix = "aarch64" in {
-def int_aarch64_tstart : GCCBuiltin<"__builtin_arm_tstart">,
+def int_aarch64_tstart : ClangBuiltin<"__builtin_arm_tstart">,
Intrinsic<[llvm_i64_ty], [], [IntrWillReturn]>;
-def int_aarch64_tcommit : GCCBuiltin<"__builtin_arm_tcommit">, Intrinsic<[], [], [IntrWillReturn]>;
+def int_aarch64_tcommit : ClangBuiltin<"__builtin_arm_tcommit">, Intrinsic<[], [], [IntrWillReturn]>;
-def int_aarch64_tcancel : GCCBuiltin<"__builtin_arm_tcancel">,
+def int_aarch64_tcancel : ClangBuiltin<"__builtin_arm_tcancel">,
Intrinsic<[], [llvm_i64_ty], [IntrWillReturn, ImmArg<ArgIndex<0>>]>;
-def int_aarch64_ttest : GCCBuiltin<"__builtin_arm_ttest">,
+def int_aarch64_ttest : ClangBuiltin<"__builtin_arm_ttest">,
Intrinsic<[llvm_i64_ty], [],
[IntrNoMem, IntrHasSideEffects, IntrWillReturn]>;
@@ -1759,10 +1762,10 @@ def int_aarch64_sve_cntp : AdvSIMD_SVE_CNTP_Intrinsic;
// FFR manipulation
//
-def int_aarch64_sve_rdffr : GCCBuiltin<"__builtin_sve_svrdffr">, DefaultAttrsIntrinsic<[llvm_nxv16i1_ty], [], [IntrReadMem, IntrInaccessibleMemOnly]>;
-def int_aarch64_sve_rdffr_z : GCCBuiltin<"__builtin_sve_svrdffr_z">, DefaultAttrsIntrinsic<[llvm_nxv16i1_ty], [llvm_nxv16i1_ty], [IntrReadMem, IntrInaccessibleMemOnly]>;
-def int_aarch64_sve_setffr : GCCBuiltin<"__builtin_sve_svsetffr">, DefaultAttrsIntrinsic<[], [], [IntrWriteMem, IntrInaccessibleMemOnly]>;
-def int_aarch64_sve_wrffr : GCCBuiltin<"__builtin_sve_svwrffr">, DefaultAttrsIntrinsic<[], [llvm_nxv16i1_ty], [IntrWriteMem, IntrInaccessibleMemOnly]>;
+def int_aarch64_sve_rdffr : ClangBuiltin<"__builtin_sve_svrdffr">, DefaultAttrsIntrinsic<[llvm_nxv16i1_ty], [], [IntrReadMem, IntrInaccessibleMemOnly]>;
+def int_aarch64_sve_rdffr_z : ClangBuiltin<"__builtin_sve_svrdffr_z">, DefaultAttrsIntrinsic<[llvm_nxv16i1_ty], [llvm_nxv16i1_ty], [IntrReadMem, IntrInaccessibleMemOnly]>;
+def int_aarch64_sve_setffr : ClangBuiltin<"__builtin_sve_svsetffr">, DefaultAttrsIntrinsic<[], [], [IntrWriteMem, IntrInaccessibleMemOnly]>;
+def int_aarch64_sve_wrffr : ClangBuiltin<"__builtin_sve_svwrffr">, DefaultAttrsIntrinsic<[], [llvm_nxv16i1_ty], [IntrWriteMem, IntrInaccessibleMemOnly]>;
//
// Saturating scalar arithmetic
@@ -2493,31 +2496,31 @@ def int_aarch64_sve_xar : AdvSIMD_2VectorArgIndexed_Intrinsic;
// SVE2 - Optional AES, SHA-3 and SM4
//
-def int_aarch64_sve_aesd : GCCBuiltin<"__builtin_sve_svaesd_u8">,
+def int_aarch64_sve_aesd : ClangBuiltin<"__builtin_sve_svaesd_u8">,
DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
[llvm_nxv16i8_ty, llvm_nxv16i8_ty],
[IntrNoMem]>;
-def int_aarch64_sve_aesimc : GCCBuiltin<"__builtin_sve_svaesimc_u8">,
+def int_aarch64_sve_aesimc : ClangBuiltin<"__builtin_sve_svaesimc_u8">,
DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
[llvm_nxv16i8_ty],
[IntrNoMem]>;
-def int_aarch64_sve_aese : GCCBuiltin<"__builtin_sve_svaese_u8">,
+def int_aarch64_sve_aese : ClangBuiltin<"__builtin_sve_svaese_u8">,
DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
[llvm_nxv16i8_ty, llvm_nxv16i8_ty],
[IntrNoMem]>;
-def int_aarch64_sve_aesmc : GCCBuiltin<"__builtin_sve_svaesmc_u8">,
+def int_aarch64_sve_aesmc : ClangBuiltin<"__builtin_sve_svaesmc_u8">,
DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
[llvm_nxv16i8_ty],
[IntrNoMem]>;
-def int_aarch64_sve_rax1 : GCCBuiltin<"__builtin_sve_svrax1_u64">,
+def int_aarch64_sve_rax1 : ClangBuiltin<"__builtin_sve_svrax1_u64">,
DefaultAttrsIntrinsic<[llvm_nxv2i64_ty],
[llvm_nxv2i64_ty, llvm_nxv2i64_ty],
[IntrNoMem]>;
-def int_aarch64_sve_sm4e : GCCBuiltin<"__builtin_sve_svsm4e_u32">,
+def int_aarch64_sve_sm4e : ClangBuiltin<"__builtin_sve_svsm4e_u32">,
DefaultAttrsIntrinsic<[llvm_nxv4i32_ty],
[llvm_nxv4i32_ty, llvm_nxv4i32_ty],
[IntrNoMem]>;
-def int_aarch64_sve_sm4ekey : GCCBuiltin<"__builtin_sve_svsm4ekey_u32">,
+def int_aarch64_sve_sm4ekey : ClangBuiltin<"__builtin_sve_svsm4ekey_u32">,
DefaultAttrsIntrinsic<[llvm_nxv4i32_ty],
[llvm_nxv4i32_ty, llvm_nxv4i32_ty],
[IntrNoMem]>;
@@ -2580,3 +2583,130 @@ def int_aarch64_sve_whilewr_b : SVE2_CONFLICT_DETECT_Intrinsic;
def int_aarch64_sve_whilewr_h : SVE2_CONFLICT_DETECT_Intrinsic;
def int_aarch64_sve_whilewr_s : SVE2_CONFLICT_DETECT_Intrinsic;
def int_aarch64_sve_whilewr_d : SVE2_CONFLICT_DETECT_Intrinsic;
+
+// Scalable Matrix Extension (SME) Intrinsics
+let TargetPrefix = "aarch64" in {
+ class SME_Load_Store_Intrinsic<LLVMType pred_ty>
+ : DefaultAttrsIntrinsic<[],
+ [pred_ty, llvm_ptr_ty, llvm_i64_ty, llvm_i32_ty], []>;
+
+ // Loads
+ def int_aarch64_sme_ld1b_horiz : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
+ def int_aarch64_sme_ld1h_horiz : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
+ def int_aarch64_sme_ld1w_horiz : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
+ def int_aarch64_sme_ld1d_horiz : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
+ def int_aarch64_sme_ld1q_horiz : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
+ def int_aarch64_sme_ld1b_vert : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
+ def int_aarch64_sme_ld1h_vert : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
+ def int_aarch64_sme_ld1w_vert : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
+ def int_aarch64_sme_ld1d_vert : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
+ def int_aarch64_sme_ld1q_vert : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
+
+ // Stores
+ def int_aarch64_sme_st1b_horiz : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
+ def int_aarch64_sme_st1h_horiz : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
+ def int_aarch64_sme_st1w_horiz : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
+ def int_aarch64_sme_st1d_horiz : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
+ def int_aarch64_sme_st1q_horiz : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
+ def int_aarch64_sme_st1b_vert : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
+ def int_aarch64_sme_st1h_vert : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
+ def int_aarch64_sme_st1w_vert : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
+ def int_aarch64_sme_st1d_vert : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
+ def int_aarch64_sme_st1q_vert : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
+
+ // Spill + fill
+ def int_aarch64_sme_ldr : DefaultAttrsIntrinsic<
+ [], [llvm_i32_ty, llvm_ptr_ty]>;
+ def int_aarch64_sme_str : DefaultAttrsIntrinsic<
+ [], [llvm_i32_ty, llvm_ptr_ty]>;
+
+ class SME_TileToVector_Intrinsic
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i64_ty, llvm_i32_ty]>;
+ class SME_VectorToTile_Intrinsic
+ : DefaultAttrsIntrinsic<[],
+ [llvm_i64_ty, llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_anyvector_ty]>;
+
+ def int_aarch64_sme_read_horiz : SME_TileToVector_Intrinsic;
+ def int_aarch64_sme_read_vert : SME_TileToVector_Intrinsic;
+ def int_aarch64_sme_write_horiz : SME_VectorToTile_Intrinsic;
+ def int_aarch64_sme_write_vert : SME_VectorToTile_Intrinsic;
+
+ def int_aarch64_sme_readq_horiz : SME_TileToVector_Intrinsic;
+ def int_aarch64_sme_readq_vert : SME_TileToVector_Intrinsic;
+ def int_aarch64_sme_writeq_horiz : SME_VectorToTile_Intrinsic;
+ def int_aarch64_sme_writeq_vert : SME_VectorToTile_Intrinsic;
+
+ def int_aarch64_sme_zero : DefaultAttrsIntrinsic<[], [llvm_i64_ty]>;
+
+ class SME_OuterProduct_Intrinsic
+ : DefaultAttrsIntrinsic<[],
+ [llvm_i64_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMMatchType<0>,
+ llvm_anyvector_ty]>;
+
+ def int_aarch64_sme_mopa : SME_OuterProduct_Intrinsic;
+ def int_aarch64_sme_mops : SME_OuterProduct_Intrinsic;
+
+ def int_aarch64_sme_mopa_wide : SME_OuterProduct_Intrinsic;
+ def int_aarch64_sme_mops_wide : SME_OuterProduct_Intrinsic;
+
+ def int_aarch64_sme_smopa_wide : SME_OuterProduct_Intrinsic;
+ def int_aarch64_sme_smops_wide : SME_OuterProduct_Intrinsic;
+ def int_aarch64_sme_umopa_wide : SME_OuterProduct_Intrinsic;
+ def int_aarch64_sme_umops_wide : SME_OuterProduct_Intrinsic;
+ def int_aarch64_sme_sumopa_wide : SME_OuterProduct_Intrinsic;
+ def int_aarch64_sme_sumops_wide : SME_OuterProduct_Intrinsic;
+ def int_aarch64_sme_usmopa_wide : SME_OuterProduct_Intrinsic;
+ def int_aarch64_sme_usmops_wide : SME_OuterProduct_Intrinsic;
+
+ //
+ // Counting elements
+ //
+
+ class AdvSIMD_SME_CNTSB_Intrinsic
+ : DefaultAttrsIntrinsic<[llvm_i64_ty], [], [IntrNoMem]>;
+
+ def int_aarch64_sme_cntsb : AdvSIMD_SME_CNTSB_Intrinsic;
+ def int_aarch64_sme_cntsh : AdvSIMD_SME_CNTSB_Intrinsic;
+ def int_aarch64_sme_cntsw : AdvSIMD_SME_CNTSB_Intrinsic;
+ def int_aarch64_sme_cntsd : AdvSIMD_SME_CNTSB_Intrinsic;
+
+ //
+ // PSTATE Functions
+ //
+
+ def int_aarch64_sme_get_pstatesm
+ : DefaultAttrsIntrinsic<[llvm_i64_ty], [],
+ [IntrReadMem, IntrInaccessibleMemOnly]>;
+
+ def int_aarch64_sme_get_tpidr2
+ : DefaultAttrsIntrinsic<[llvm_i64_ty], [],
+ [IntrNoMem, IntrHasSideEffects]>;
+ def int_aarch64_sme_set_tpidr2
+ : DefaultAttrsIntrinsic<[], [llvm_i64_ty],
+ [IntrNoMem, IntrHasSideEffects]>;
+ // Clamp
+ //
+
+ def int_aarch64_sve_sclamp : AdvSIMD_3VectorArg_Intrinsic;
+ def int_aarch64_sve_uclamp : AdvSIMD_3VectorArg_Intrinsic;
+
+ //
+ // Reversal
+ //
+
+ def int_aarch64_sve_revd : AdvSIMD_Merged1VectorArg_Intrinsic;
+
+ //
+ // Predicate selection
+ //
+
+ def int_aarch64_sve_psel
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+ [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMMatchType<0>, llvm_i32_ty]>;
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index c5d266eb57ec..c2dcfc254568 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -14,7 +14,7 @@ class AMDGPUReadPreloadRegisterIntrinsic
: Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
class AMDGPUReadPreloadRegisterIntrinsicNamed<string name>
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>, GCCBuiltin<name>;
+ : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>, ClangBuiltin<name>;
// Used to tag image and resource intrinsics with information used to generate
// mem operands.
@@ -47,12 +47,12 @@ defm int_r600_read_tgid : AMDGPUReadPreloadRegisterIntrinsic_xyz_named
defm int_r600_read_local_size : AMDGPUReadPreloadRegisterIntrinsic_xyz;
defm int_r600_read_tidig : AMDGPUReadPreloadRegisterIntrinsic_xyz;
-def int_r600_group_barrier : GCCBuiltin<"__builtin_r600_group_barrier">,
+def int_r600_group_barrier : ClangBuiltin<"__builtin_r600_group_barrier">,
Intrinsic<[], [], [IntrConvergent, IntrWillReturn]>;
// AS 7 is PARAM_I_ADDRESS, used for kernel arguments
def int_r600_implicitarg_ptr :
- GCCBuiltin<"__builtin_r600_implicitarg_ptr">,
+ ClangBuiltin<"__builtin_r600_implicitarg_ptr">,
Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 7>], [],
[IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
@@ -61,7 +61,7 @@ def int_r600_rat_store_typed :
// 2nd parameter: Index
// 3rd parameter: Constant RAT ID
Intrinsic<[], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrWillReturn]>,
- GCCBuiltin<"__builtin_r600_rat_store_typed">;
+ ClangBuiltin<"__builtin_r600_rat_store_typed">;
def int_r600_recipsqrt_ieee : Intrinsic<
[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
@@ -145,30 +145,30 @@ def int_amdgcn_dispatch_ptr :
[Align<RetIndex, 4>, IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
def int_amdgcn_queue_ptr :
- GCCBuiltin<"__builtin_amdgcn_queue_ptr">,
+ ClangBuiltin<"__builtin_amdgcn_queue_ptr">,
Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
[Align<RetIndex, 4>, IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
def int_amdgcn_kernarg_segment_ptr :
- GCCBuiltin<"__builtin_amdgcn_kernarg_segment_ptr">,
+ ClangBuiltin<"__builtin_amdgcn_kernarg_segment_ptr">,
Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
[Align<RetIndex, 4>, IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
def int_amdgcn_implicitarg_ptr :
- GCCBuiltin<"__builtin_amdgcn_implicitarg_ptr">,
+ ClangBuiltin<"__builtin_amdgcn_implicitarg_ptr">,
Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
[Align<RetIndex, 4>, IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
def int_amdgcn_groupstaticsize :
- GCCBuiltin<"__builtin_amdgcn_groupstaticsize">,
+ ClangBuiltin<"__builtin_amdgcn_groupstaticsize">,
Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
def int_amdgcn_dispatch_id :
- GCCBuiltin<"__builtin_amdgcn_dispatch_id">,
+ ClangBuiltin<"__builtin_amdgcn_dispatch_id">,
Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
def int_amdgcn_implicit_buffer_ptr :
- GCCBuiltin<"__builtin_amdgcn_implicit_buffer_ptr">,
+ ClangBuiltin<"__builtin_amdgcn_implicit_buffer_ptr">,
Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
[Align<RetIndex, 4>, IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
@@ -190,7 +190,7 @@ def int_amdgcn_init_exec_from_input : Intrinsic<[],
[IntrConvergent, ImmArg<ArgIndex<1>>]>;
def int_amdgcn_wavefrontsize :
- GCCBuiltin<"__builtin_amdgcn_wavefrontsize">,
+ ClangBuiltin<"__builtin_amdgcn_wavefrontsize">,
Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
@@ -200,20 +200,44 @@ def int_amdgcn_wavefrontsize :
// The first parameter is s_sendmsg immediate (i16),
// the second one is copied to m0
-def int_amdgcn_s_sendmsg : GCCBuiltin<"__builtin_amdgcn_s_sendmsg">,
+def int_amdgcn_s_sendmsg : ClangBuiltin<"__builtin_amdgcn_s_sendmsg">,
Intrinsic <[], [llvm_i32_ty, llvm_i32_ty],
[ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects]>;
-def int_amdgcn_s_sendmsghalt : GCCBuiltin<"__builtin_amdgcn_s_sendmsghalt">,
+def int_amdgcn_s_sendmsghalt : ClangBuiltin<"__builtin_amdgcn_s_sendmsghalt">,
Intrinsic <[], [llvm_i32_ty, llvm_i32_ty],
[ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects]>;
-def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">,
+
+// gfx11 intrinsic
+// The first parameter is s_sendmsg immediate (i16). Return type is i32 or i64.
+def int_amdgcn_s_sendmsg_rtn : Intrinsic <[llvm_anyint_ty], [llvm_i32_ty],
+ [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects]>;
+
+def int_amdgcn_s_barrier : ClangBuiltin<"__builtin_amdgcn_s_barrier">,
Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn]>;
-def int_amdgcn_wave_barrier : GCCBuiltin<"__builtin_amdgcn_wave_barrier">,
+def int_amdgcn_wave_barrier : ClangBuiltin<"__builtin_amdgcn_wave_barrier">,
Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn]>;
-def int_amdgcn_s_waitcnt : GCCBuiltin<"__builtin_amdgcn_s_waitcnt">,
+// The 1st parameter is a mask for the types of instructions that may be allowed
+// to cross the SCHED_BARRIER during scheduling.
+// MASK = 0x0000 0000: No instructions may be scheduled across SCHED_BARRIER.
+// MASK = 0x0000 0001: ALL, non-memory, non-side-effect producing instructions may be
+// scheduled across SCHED_BARRIER, i.e. allow ALU instructions to pass.
+// MASK = 0x0000 0002: VALU instructions may be scheduled across SCHED_BARRIER.
+// MASK = 0x0000 0004: SALU instructions may be scheduled across SCHED_BARRIER.
+// MASK = 0x0000 0008: MFMA instructions may be scheduled across SCHED_BARRIER.
+// MASK = 0x0000 0010: ALL VMEM instructions may be scheduled across SCHED_BARRIER.
+// MASK = 0x0000 0020: VMEM read instructions may be scheduled across SCHED_BARRIER.
+// MASK = 0x0000 0040: VMEM write instructions may be scheduled across SCHED_BARRIER.
+// MASK = 0x0000 0080: ALL DS instructions may be scheduled across SCHED_BARRIER.
+// MASK = 0x0000 0100: ALL DS read instructions may be scheduled accoss SCHED_BARRIER.
+// MASK = 0x0000 0200: ALL DS write instructions may be scheduled across SCHED_BARRIER.
+def int_amdgcn_sched_barrier : ClangBuiltin<"__builtin_amdgcn_sched_barrier">,
+ Intrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects, IntrConvergent,
+ IntrWillReturn]>;
+
+def int_amdgcn_s_waitcnt : ClangBuiltin<"__builtin_amdgcn_s_waitcnt">,
Intrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects, IntrWillReturn]>;
def int_amdgcn_div_scale : Intrinsic<
@@ -255,7 +279,7 @@ def int_amdgcn_log_clamp : Intrinsic<
[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
>;
-def int_amdgcn_fmul_legacy : GCCBuiltin<"__builtin_amdgcn_fmul_legacy">,
+def int_amdgcn_fmul_legacy : ClangBuiltin<"__builtin_amdgcn_fmul_legacy">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative]
>;
@@ -274,7 +298,7 @@ def int_amdgcn_rcp : Intrinsic<
[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
>;
-def int_amdgcn_rcp_legacy : GCCBuiltin<"__builtin_amdgcn_rcp_legacy">,
+def int_amdgcn_rcp_legacy : ClangBuiltin<"__builtin_amdgcn_rcp_legacy">,
Intrinsic<[llvm_float_ty], [llvm_float_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn]
>;
@@ -287,7 +311,7 @@ def int_amdgcn_rsq : Intrinsic<
[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
>;
-def int_amdgcn_rsq_legacy : GCCBuiltin<"__builtin_amdgcn_rsq_legacy">,
+def int_amdgcn_rsq_legacy : ClangBuiltin<"__builtin_amdgcn_rsq_legacy">,
Intrinsic<
[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
>;
@@ -316,31 +340,31 @@ def int_amdgcn_fract : Intrinsic<
[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]
>;
-def int_amdgcn_cvt_pkrtz : GCCBuiltin<"__builtin_amdgcn_cvt_pkrtz">,
+def int_amdgcn_cvt_pkrtz : ClangBuiltin<"__builtin_amdgcn_cvt_pkrtz">,
Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn]
>;
def int_amdgcn_cvt_pknorm_i16 :
- GCCBuiltin<"__builtin_amdgcn_cvt_pknorm_i16">,
+ ClangBuiltin<"__builtin_amdgcn_cvt_pknorm_i16">,
Intrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn]
>;
def int_amdgcn_cvt_pknorm_u16 :
- GCCBuiltin<"__builtin_amdgcn_cvt_pknorm_u16">,
+ ClangBuiltin<"__builtin_amdgcn_cvt_pknorm_u16">,
Intrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn]
>;
def int_amdgcn_cvt_pk_i16 :
- GCCBuiltin<"__builtin_amdgcn_cvt_pk_i16">,
+ ClangBuiltin<"__builtin_amdgcn_cvt_pk_i16">,
Intrinsic<
[llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn]
>;
-def int_amdgcn_cvt_pk_u16 : GCCBuiltin<"__builtin_amdgcn_cvt_pk_u16">,
+def int_amdgcn_cvt_pk_u16 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_u16">,
Intrinsic<[llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn]
>;
@@ -350,31 +374,31 @@ def int_amdgcn_class : Intrinsic<
[IntrNoMem, IntrSpeculatable, IntrWillReturn]
>;
-def int_amdgcn_fmed3 : GCCBuiltin<"__builtin_amdgcn_fmed3">,
+def int_amdgcn_fmed3 : ClangBuiltin<"__builtin_amdgcn_fmed3">,
Intrinsic<[llvm_anyfloat_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem, IntrSpeculatable, IntrWillReturn]
>;
-def int_amdgcn_cubeid : GCCBuiltin<"__builtin_amdgcn_cubeid">,
+def int_amdgcn_cubeid : ClangBuiltin<"__builtin_amdgcn_cubeid">,
Intrinsic<[llvm_float_ty],
[llvm_float_ty, llvm_float_ty, llvm_float_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn]
>;
-def int_amdgcn_cubema : GCCBuiltin<"__builtin_amdgcn_cubema">,
+def int_amdgcn_cubema : ClangBuiltin<"__builtin_amdgcn_cubema">,
Intrinsic<[llvm_float_ty],
[llvm_float_ty, llvm_float_ty, llvm_float_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn]
>;
-def int_amdgcn_cubesc : GCCBuiltin<"__builtin_amdgcn_cubesc">,
+def int_amdgcn_cubesc : ClangBuiltin<"__builtin_amdgcn_cubesc">,
Intrinsic<[llvm_float_ty],
[llvm_float_ty, llvm_float_ty, llvm_float_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn]
>;
-def int_amdgcn_cubetc : GCCBuiltin<"__builtin_amdgcn_cubetc">,
+def int_amdgcn_cubetc : ClangBuiltin<"__builtin_amdgcn_cubetc">,
Intrinsic<[llvm_float_ty],
[llvm_float_ty, llvm_float_ty, llvm_float_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn]
@@ -838,6 +862,13 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {
[IntrReadMem], [SDNPMemOperand]>;
}
+ foreach dim = AMDGPUDims.Msaa in {
+ def int_amdgcn_image_msaa_load # _ # dim.Name:
+ AMDGPUImageDimIntrinsic<
+ AMDGPUDimNoSampleProfile<"MSAA_LOAD", dim, [llvm_any_ty], []>,
+ [IntrReadMem], [SDNPMemOperand]>;
+ }
+
//////////////////////////////////////////////////////////////////////////
// sample and getlod intrinsics
//////////////////////////////////////////////////////////////////////////
@@ -949,10 +980,12 @@ class AMDGPUBufferLoad<LLVMType data_ty = llvm_any_ty> : Intrinsic <
def int_amdgcn_buffer_load_format : AMDGPUBufferLoad<llvm_anyfloat_ty>;
def int_amdgcn_buffer_load : AMDGPUBufferLoad;
+// Generate a buffer_load instruction that may be optimized to s_buffer_load if
+// the offset argument is uniform.
def int_amdgcn_s_buffer_load : Intrinsic <
[llvm_any_ty],
[llvm_v4i32_ty, // rsrc(SGPR)
- llvm_i32_ty, // byte offset(SGPR/imm)
+ llvm_i32_ty, // byte offset
llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 2 = dlc)
[IntrNoMem, IntrWillReturn, ImmArg<ArgIndex<2>>]>,
AMDGPURsrcIntrinsic<0>;
@@ -1259,6 +1292,40 @@ class AMDGPUBufferAtomicFP : Intrinsic <
// Legacy form of the intrinsic. raw and struct forms should be preferred.
def int_amdgcn_buffer_atomic_fadd : AMDGPUBufferAtomicFP;
+
+class AMDGPURawBufferLoadLDS : Intrinsic <
+ [],
+ [llvm_v4i32_ty, // rsrc(SGPR)
+ LLVMQualPointerType<llvm_i8_ty, 3>, // LDS base offset
+ llvm_i32_ty, // Data byte size: 1/2/4
+ llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling)
+ llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
+ llvm_i32_ty, // imm offset(imm, included in bounds checking and swizzling)
+ llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
+ // bit 1 = slc,
+ // bit 2 = dlc on gfx10+))
+ // swizzled buffer (bit 3 = swz))
+ [IntrWillReturn, NoCapture<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>>,
+ ImmArg<ArgIndex<6>>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>;
+def int_amdgcn_raw_buffer_load_lds : AMDGPURawBufferLoadLDS;
+
+class AMDGPUStructBufferLoadLDS : Intrinsic <
+ [],
+ [llvm_v4i32_ty, // rsrc(SGPR)
+ LLVMQualPointerType<llvm_i8_ty, 3>, // LDS base offset
+ llvm_i32_ty, // Data byte size: 1/2/4
+ llvm_i32_ty, // vindex(VGPR)
+ llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling)
+ llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
+ llvm_i32_ty, // imm offset(imm, included in bounds checking and swizzling)
+ llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
+ // bit 1 = slc,
+ // bit 2 = dlc on gfx10+))
+ // swizzled buffer (bit 3 = swz))
+ [IntrWillReturn, NoCapture<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<6>>,
+ ImmArg<ArgIndex<7>>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>;
+def int_amdgcn_struct_buffer_load_lds : AMDGPUStructBufferLoadLDS;
+
} // defset AMDGPUBufferIntrinsics
// Uses that do not set the done bit should set IntrWriteMem on the
@@ -1278,7 +1345,21 @@ def int_amdgcn_exp : Intrinsic <[], [
IntrWillReturn]
>;
-// exp with compr bit set.
+// exp with row_en bit set. Only supported on GFX11+.
+def int_amdgcn_exp_row : Intrinsic <[], [
+ llvm_i32_ty, // tgt,
+ llvm_i32_ty, // en
+ llvm_any_ty, // src0 (f32 or i32)
+ LLVMMatchType<0>, // src1
+ LLVMMatchType<0>, // src2
+ LLVMMatchType<0>, // src3
+ llvm_i1_ty, // done
+ llvm_i32_ty], // row number
+ [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<6>>,
+ IntrWriteMem, IntrInaccessibleMemOnly, IntrWillReturn]
+>;
+
+// exp with compr bit set. Not supported on GFX11+.
def int_amdgcn_exp_compr : Intrinsic <[], [
llvm_i32_ty, // tgt,
llvm_i32_ty, // en
@@ -1292,35 +1373,35 @@ def int_amdgcn_exp_compr : Intrinsic <[], [
>;
def int_amdgcn_buffer_wbinvl1_sc :
- GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_sc">,
+ ClangBuiltin<"__builtin_amdgcn_buffer_wbinvl1_sc">,
Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>;
def int_amdgcn_buffer_wbinvl1 :
- GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1">,
+ ClangBuiltin<"__builtin_amdgcn_buffer_wbinvl1">,
Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>;
def int_amdgcn_s_dcache_inv :
- GCCBuiltin<"__builtin_amdgcn_s_dcache_inv">,
+ ClangBuiltin<"__builtin_amdgcn_s_dcache_inv">,
Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>;
def int_amdgcn_s_memtime :
- GCCBuiltin<"__builtin_amdgcn_s_memtime">,
+ ClangBuiltin<"__builtin_amdgcn_s_memtime">,
Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>;
def int_amdgcn_s_sleep :
- GCCBuiltin<"__builtin_amdgcn_s_sleep">,
+ ClangBuiltin<"__builtin_amdgcn_s_sleep">,
Intrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrNoMem,
IntrHasSideEffects, IntrWillReturn]> {
}
def int_amdgcn_s_incperflevel :
- GCCBuiltin<"__builtin_amdgcn_s_incperflevel">,
+ ClangBuiltin<"__builtin_amdgcn_s_incperflevel">,
Intrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrNoMem,
IntrHasSideEffects, IntrWillReturn]> {
}
def int_amdgcn_s_decperflevel :
- GCCBuiltin<"__builtin_amdgcn_s_decperflevel">,
+ ClangBuiltin<"__builtin_amdgcn_s_decperflevel">,
Intrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrNoMem,
IntrHasSideEffects, IntrWillReturn]> {
}
@@ -1329,11 +1410,16 @@ def int_amdgcn_s_sethalt :
Intrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrNoMem,
IntrHasSideEffects, IntrWillReturn]>;
+def int_amdgcn_s_setprio :
+ ClangBuiltin<"__builtin_amdgcn_s_setprio">,
+ Intrinsic<[], [llvm_i16_ty], [ImmArg<ArgIndex<0>>, IntrNoMem,
+ IntrHasSideEffects, IntrWillReturn]>;
+
+// This is IntrHasSideEffects so it can be used to read cycle counters.
def int_amdgcn_s_getreg :
- GCCBuiltin<"__builtin_amdgcn_s_getreg">,
+ ClangBuiltin<"__builtin_amdgcn_s_getreg">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty],
- [IntrInaccessibleMemOnly, IntrReadMem, IntrSpeculatable,
- IntrWillReturn, ImmArg<ArgIndex<0>>]
+ [IntrNoMem, IntrHasSideEffects, IntrWillReturn, ImmArg<ArgIndex<0>>]
>;
// Note this can be used to set FP environment properties that are
@@ -1341,7 +1427,7 @@ def int_amdgcn_s_getreg :
// available (and value required to access them) may differ per
// subtarget. llvm.amdgcn.s.setreg(hwmode, value)
def int_amdgcn_s_setreg :
- GCCBuiltin<"__builtin_amdgcn_s_setreg">,
+ ClangBuiltin<"__builtin_amdgcn_s_setreg">,
Intrinsic<[], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrHasSideEffects, IntrWillReturn, ImmArg<ArgIndex<0>>]
>;
@@ -1353,14 +1439,14 @@ def int_amdgcn_s_setreg :
// produce the desired results as optimizations may cause code movement,
// especially as we explicitly use IntrNoMem to allow optimizations.
def int_amdgcn_s_getpc :
- GCCBuiltin<"__builtin_amdgcn_s_getpc">,
+ ClangBuiltin<"__builtin_amdgcn_s_getpc">,
Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable,
IntrWillReturn]>;
// __builtin_amdgcn_interp_mov <param>, <attr_chan>, <attr>, <m0>
// param values: 0 = P10, 1 = P20, 2 = P0
def int_amdgcn_interp_mov :
- GCCBuiltin<"__builtin_amdgcn_interp_mov">,
+ ClangBuiltin<"__builtin_amdgcn_interp_mov">,
Intrinsic<[llvm_float_ty],
[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn,
@@ -1370,7 +1456,7 @@ def int_amdgcn_interp_mov :
// This intrinsic reads from lds, but the memory values are constant,
// so it behaves like IntrNoMem.
def int_amdgcn_interp_p1 :
- GCCBuiltin<"__builtin_amdgcn_interp_p1">,
+ ClangBuiltin<"__builtin_amdgcn_interp_p1">,
Intrinsic<[llvm_float_ty],
[llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn,
@@ -1378,7 +1464,7 @@ def int_amdgcn_interp_p1 :
// __builtin_amdgcn_interp_p2 <p1>, <j>, <attr_chan>, <attr>, <m0>
def int_amdgcn_interp_p2 :
- GCCBuiltin<"__builtin_amdgcn_interp_p2">,
+ ClangBuiltin<"__builtin_amdgcn_interp_p2">,
Intrinsic<[llvm_float_ty],
[llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn,
@@ -1388,7 +1474,7 @@ def int_amdgcn_interp_p2 :
// __builtin_amdgcn_interp_p1_f16 <i>, <attr_chan>, <attr>, <high>, <m0>
// high selects whether high or low 16-bits are loaded from LDS
def int_amdgcn_interp_p1_f16 :
- GCCBuiltin<"__builtin_amdgcn_interp_p1_f16">,
+ ClangBuiltin<"__builtin_amdgcn_interp_p1_f16">,
Intrinsic<[llvm_float_ty],
[llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn,
@@ -1397,12 +1483,57 @@ def int_amdgcn_interp_p1_f16 :
// __builtin_amdgcn_interp_p2_f16 <p1>, <j>, <attr_chan>, <attr>, <high>, <m0>
// high selects whether high or low 16-bits are loaded from LDS
def int_amdgcn_interp_p2_f16 :
- GCCBuiltin<"__builtin_amdgcn_interp_p2_f16">,
+ ClangBuiltin<"__builtin_amdgcn_interp_p2_f16">,
Intrinsic<[llvm_half_ty],
[llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn,
ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
+// llvm.amdgcn.lds.direct.load <m0>
+// The input argument is m0, which contains a packed combination of address
+// offset and flags describing the data type.
+def int_amdgcn_lds_direct_load :
+ Intrinsic<[llvm_any_ty], // overloaded for types u8, u16, i32/f32, i8, i16
+ [llvm_i32_ty],
+ [IntrReadMem, IntrSpeculatable, IntrWillReturn]>;
+
+// llvm.amdgcn.lds.param.load <attr_chan>, <attr>, <m0>
+// Like interp intrinsics, this reads from lds, but the memory values are constant,
+// so it behaves like IntrNoMem.
+def int_amdgcn_lds_param_load :
+ Intrinsic<[llvm_float_ty],
+ [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn,
+ ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
+
+// llvm.amdgcn.interp.inreg.p10 <p>, <i>, <p0>
+def int_amdgcn_interp_inreg_p10 :
+ Intrinsic<[llvm_float_ty],
+ [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
+
+// llvm.amdgcn.interp.inreg.p2 <p>, <j>, <tmp>
+def int_amdgcn_interp_inreg_p2 :
+ Intrinsic<[llvm_float_ty],
+ [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
+
+// llvm.amdgcn.interp.inreg.p10.f16 <p>, <i>, <p0>, <high>
+// high selects whether high or low 16-bits are used for p and p0 operands
+def int_amdgcn_interp_inreg_p10_f16:
+ Intrinsic<[llvm_float_ty],
+ [llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_i1_ty],
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn,
+ ImmArg<ArgIndex<3>>]>;
+
+// llvm.amdgcn.interp.inreg.p2.f16 <p>, <j>, <tmp>, <high>
+// high selects whether high or low 16-bits are used for p operand
+def int_amdgcn_interp_inreg_p2_f16 :
+ Intrinsic<[llvm_half_ty],
+ [llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_i1_ty],
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn,
+ ImmArg<ArgIndex<3>>]>;
+
// Deprecated: use llvm.amdgcn.live.mask instead.
def int_amdgcn_ps_live : Intrinsic <
[llvm_i1_ty],
@@ -1416,18 +1547,18 @@ def int_amdgcn_live_mask : Intrinsic <[llvm_i1_ty],
>;
def int_amdgcn_mbcnt_lo :
- GCCBuiltin<"__builtin_amdgcn_mbcnt_lo">,
+ ClangBuiltin<"__builtin_amdgcn_mbcnt_lo">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrWillReturn]>;
def int_amdgcn_mbcnt_hi :
- GCCBuiltin<"__builtin_amdgcn_mbcnt_hi">,
+ ClangBuiltin<"__builtin_amdgcn_mbcnt_hi">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrWillReturn]>;
// llvm.amdgcn.ds.swizzle src offset
def int_amdgcn_ds_swizzle :
- GCCBuiltin<"__builtin_amdgcn_ds_swizzle">,
+ ClangBuiltin<"__builtin_amdgcn_ds_swizzle">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrConvergent, IntrWillReturn,
ImmArg<ArgIndex<1>>]>;
@@ -1443,55 +1574,55 @@ def int_amdgcn_sbfe : Intrinsic<[llvm_anyint_ty],
>;
def int_amdgcn_lerp :
- GCCBuiltin<"__builtin_amdgcn_lerp">,
+ ClangBuiltin<"__builtin_amdgcn_lerp">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn]
>;
def int_amdgcn_sad_u8 :
- GCCBuiltin<"__builtin_amdgcn_sad_u8">,
+ ClangBuiltin<"__builtin_amdgcn_sad_u8">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn]
>;
def int_amdgcn_msad_u8 :
- GCCBuiltin<"__builtin_amdgcn_msad_u8">,
+ ClangBuiltin<"__builtin_amdgcn_msad_u8">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn]
>;
def int_amdgcn_sad_hi_u8 :
- GCCBuiltin<"__builtin_amdgcn_sad_hi_u8">,
+ ClangBuiltin<"__builtin_amdgcn_sad_hi_u8">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn]
>;
def int_amdgcn_sad_u16 :
- GCCBuiltin<"__builtin_amdgcn_sad_u16">,
+ ClangBuiltin<"__builtin_amdgcn_sad_u16">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn]
>;
def int_amdgcn_qsad_pk_u16_u8 :
- GCCBuiltin<"__builtin_amdgcn_qsad_pk_u16_u8">,
+ ClangBuiltin<"__builtin_amdgcn_qsad_pk_u16_u8">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn]
>;
def int_amdgcn_mqsad_pk_u16_u8 :
- GCCBuiltin<"__builtin_amdgcn_mqsad_pk_u16_u8">,
+ ClangBuiltin<"__builtin_amdgcn_mqsad_pk_u16_u8">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn]
>;
def int_amdgcn_mqsad_u32_u8 :
- GCCBuiltin<"__builtin_amdgcn_mqsad_u32_u8">,
+ ClangBuiltin<"__builtin_amdgcn_mqsad_u32_u8">,
Intrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_v4i32_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn]
>;
def int_amdgcn_cvt_pk_u8_f32 :
- GCCBuiltin<"__builtin_amdgcn_cvt_pk_u8_f32">,
+ ClangBuiltin<"__builtin_amdgcn_cvt_pk_u8_f32">,
Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn]
>;
@@ -1511,14 +1642,14 @@ def int_amdgcn_ballot :
[IntrNoMem, IntrConvergent, IntrWillReturn]>;
def int_amdgcn_readfirstlane :
- GCCBuiltin<"__builtin_amdgcn_readfirstlane">,
+ ClangBuiltin<"__builtin_amdgcn_readfirstlane">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty],
[IntrNoMem, IntrConvergent, IntrWillReturn]>;
// The lane argument must be uniform across the currently active threads of the
// current wave. Otherwise, the result is undefined.
def int_amdgcn_readlane :
- GCCBuiltin<"__builtin_amdgcn_readlane">,
+ ClangBuiltin<"__builtin_amdgcn_readlane">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrConvergent, IntrWillReturn]>;
@@ -1526,7 +1657,7 @@ def int_amdgcn_readlane :
// currently active threads of the current wave. Otherwise, the result is
// undefined.
def int_amdgcn_writelane :
- GCCBuiltin<"__builtin_amdgcn_writelane">,
+ ClangBuiltin<"__builtin_amdgcn_writelane">,
Intrinsic<[llvm_i32_ty], [
llvm_i32_ty, // uniform value to write: returned by the selected lane
llvm_i32_ty, // uniform lane select
@@ -1535,7 +1666,7 @@ def int_amdgcn_writelane :
[IntrNoMem, IntrConvergent, IntrWillReturn]
>;
-def int_amdgcn_alignbyte : GCCBuiltin<"__builtin_amdgcn_alignbyte">,
+def int_amdgcn_alignbyte : ClangBuiltin<"__builtin_amdgcn_alignbyte">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn]
>;
@@ -1565,7 +1696,7 @@ def int_amdgcn_mulhi_u24 : Intrinsic<[llvm_i32_ty],
// bar_val is the total number of waves that will wait on this
// barrier, minus 1.
def int_amdgcn_ds_gws_init :
- GCCBuiltin<"__builtin_amdgcn_ds_gws_init">,
+ ClangBuiltin<"__builtin_amdgcn_ds_gws_init">,
Intrinsic<[],
[llvm_i32_ty, llvm_i32_ty],
[IntrConvergent, IntrWriteMem,
@@ -1577,7 +1708,7 @@ def int_amdgcn_ds_gws_init :
// bar_val is the total number of waves that will wait on this
// barrier, minus 1.
def int_amdgcn_ds_gws_barrier :
- GCCBuiltin<"__builtin_amdgcn_ds_gws_barrier">,
+ ClangBuiltin<"__builtin_amdgcn_ds_gws_barrier">,
Intrinsic<[],
[llvm_i32_ty, llvm_i32_ty],
[IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn], "",
@@ -1586,7 +1717,7 @@ def int_amdgcn_ds_gws_barrier :
// llvm.amdgcn.ds.gws.sema.v(i32 resource_id)
def int_amdgcn_ds_gws_sema_v :
- GCCBuiltin<"__builtin_amdgcn_ds_gws_sema_v">,
+ ClangBuiltin<"__builtin_amdgcn_ds_gws_sema_v">,
Intrinsic<[],
[llvm_i32_ty],
[IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn], "",
@@ -1595,7 +1726,7 @@ def int_amdgcn_ds_gws_sema_v :
// llvm.amdgcn.ds.gws.sema.br(i32 vsrc, i32 resource_id)
def int_amdgcn_ds_gws_sema_br :
- GCCBuiltin<"__builtin_amdgcn_ds_gws_sema_br">,
+ ClangBuiltin<"__builtin_amdgcn_ds_gws_sema_br">,
Intrinsic<[],
[llvm_i32_ty, llvm_i32_ty],
[IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn], "",
@@ -1604,7 +1735,7 @@ def int_amdgcn_ds_gws_sema_br :
// llvm.amdgcn.ds.gws.sema.p(i32 resource_id)
def int_amdgcn_ds_gws_sema_p :
- GCCBuiltin<"__builtin_amdgcn_ds_gws_sema_p">,
+ ClangBuiltin<"__builtin_amdgcn_ds_gws_sema_p">,
Intrinsic<[],
[llvm_i32_ty],
[IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn], "",
@@ -1613,7 +1744,7 @@ def int_amdgcn_ds_gws_sema_p :
// llvm.amdgcn.ds.gws.sema.release.all(i32 resource_id)
def int_amdgcn_ds_gws_sema_release_all :
- GCCBuiltin<"__builtin_amdgcn_ds_gws_sema_release_all">,
+ ClangBuiltin<"__builtin_amdgcn_ds_gws_sema_release_all">,
Intrinsic<[],
[llvm_i32_ty],
[IntrConvergent, IntrInaccessibleMemOnly, IntrWillReturn], "",
@@ -1644,7 +1775,7 @@ def int_amdgcn_wqm_vote : Intrinsic<[llvm_i1_ty],
// FIXME: Should this be IntrNoMem, IntrHasSideEffects, or IntrWillReturn?
def int_amdgcn_kill : Intrinsic<[], [llvm_i1_ty], []>;
-def int_amdgcn_endpgm : GCCBuiltin<"__builtin_amdgcn_endpgm">,
+def int_amdgcn_endpgm : ClangBuiltin<"__builtin_amdgcn_endpgm">,
Intrinsic<[], [], [IntrNoReturn, IntrCold, IntrNoMem, IntrHasSideEffects]
>;
@@ -1683,13 +1814,13 @@ def int_amdgcn_set_inactive :
[IntrNoMem, IntrConvergent, IntrWillReturn]>;
// Return if the given flat pointer points to a local memory address.
-def int_amdgcn_is_shared : GCCBuiltin<"__builtin_amdgcn_is_shared">,
+def int_amdgcn_is_shared : ClangBuiltin<"__builtin_amdgcn_is_shared">,
Intrinsic<[llvm_i1_ty], [llvm_ptr_ty],
[IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>, IntrWillReturn]
>;
// Return if the given flat pointer points to a prvate memory address.
-def int_amdgcn_is_private : GCCBuiltin<"__builtin_amdgcn_is_private">,
+def int_amdgcn_is_private : ClangBuiltin<"__builtin_amdgcn_is_private">,
Intrinsic<[llvm_i1_ty], [llvm_ptr_ty],
[IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>, IntrWillReturn]
>;
@@ -1699,11 +1830,11 @@ def int_amdgcn_is_private : GCCBuiltin<"__builtin_amdgcn_is_private">,
//===----------------------------------------------------------------------===//
def int_amdgcn_s_dcache_inv_vol :
- GCCBuiltin<"__builtin_amdgcn_s_dcache_inv_vol">,
+ ClangBuiltin<"__builtin_amdgcn_s_dcache_inv_vol">,
Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>;
def int_amdgcn_buffer_wbinvl1_vol :
- GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_vol">,
+ ClangBuiltin<"__builtin_amdgcn_buffer_wbinvl1_vol">,
Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>;
//===----------------------------------------------------------------------===//
@@ -1732,48 +1863,67 @@ def int_amdgcn_update_dpp :
ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
def int_amdgcn_s_dcache_wb :
- GCCBuiltin<"__builtin_amdgcn_s_dcache_wb">,
+ ClangBuiltin<"__builtin_amdgcn_s_dcache_wb">,
Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>;
def int_amdgcn_s_dcache_wb_vol :
- GCCBuiltin<"__builtin_amdgcn_s_dcache_wb_vol">,
+ ClangBuiltin<"__builtin_amdgcn_s_dcache_wb_vol">,
Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>;
def int_amdgcn_s_memrealtime :
- GCCBuiltin<"__builtin_amdgcn_s_memrealtime">,
+ ClangBuiltin<"__builtin_amdgcn_s_memrealtime">,
Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>;
// llvm.amdgcn.ds.permute <index> <src>
def int_amdgcn_ds_permute :
- GCCBuiltin<"__builtin_amdgcn_ds_permute">,
+ ClangBuiltin<"__builtin_amdgcn_ds_permute">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrConvergent, IntrWillReturn]>;
// llvm.amdgcn.ds.bpermute <index> <src>
def int_amdgcn_ds_bpermute :
- GCCBuiltin<"__builtin_amdgcn_ds_bpermute">,
+ ClangBuiltin<"__builtin_amdgcn_ds_bpermute">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrConvergent, IntrWillReturn]>;
// llvm.amdgcn.perm <src0> <src1> <selector>
def int_amdgcn_perm :
- GCCBuiltin<"__builtin_amdgcn_perm">,
+ ClangBuiltin<"__builtin_amdgcn_perm">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
//===----------------------------------------------------------------------===//
+// GFX9 Intrinsics
+//===----------------------------------------------------------------------===//
+
+class AMDGPUGlobalLoadLDS : Intrinsic <
+ [],
+ [LLVMQualPointerType<llvm_i8_ty, 1>, // Base global pointer to load from
+ LLVMQualPointerType<llvm_i8_ty, 3>, // LDS base pointer to store to
+ llvm_i32_ty, // Data byte size: 1/2/4
+ llvm_i32_ty, // imm offset (applied to both global and LDS address)
+ llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc/sc0,
+ // bit 1 = slc/sc1,
+ // bit 2 = dlc on gfx10+))
+ // bit 4 = scc/nt on gfx90a+))
+ [IntrWillReturn, NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>,
+ ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>],
+ "", [SDNPMemOperand]>;
+def int_amdgcn_global_load_lds : AMDGPUGlobalLoadLDS;
+
+//===----------------------------------------------------------------------===//
// GFX10 Intrinsics
//===----------------------------------------------------------------------===//
// llvm.amdgcn.permlane16 <old> <src0> <src1> <src2> <fi> <bound_control>
-def int_amdgcn_permlane16 : GCCBuiltin<"__builtin_amdgcn_permlane16">,
+def int_amdgcn_permlane16 : ClangBuiltin<"__builtin_amdgcn_permlane16">,
Intrinsic<[llvm_i32_ty],
[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i1_ty],
[IntrNoMem, IntrConvergent, IntrWillReturn,
ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
// llvm.amdgcn.permlanex16 <old> <src0> <src1> <src2> <fi> <bound_control>
-def int_amdgcn_permlanex16 : GCCBuiltin<"__builtin_amdgcn_permlanex16">,
+def int_amdgcn_permlanex16 : ClangBuiltin<"__builtin_amdgcn_permlanex16">,
Intrinsic<[llvm_i32_ty],
[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i1_ty],
[IntrNoMem, IntrConvergent, IntrWillReturn,
@@ -1789,9 +1939,9 @@ def int_amdgcn_mov_dpp8 :
ImmArg<ArgIndex<1>>]>;
def int_amdgcn_s_get_waveid_in_workgroup :
- GCCBuiltin<"__builtin_amdgcn_s_get_waveid_in_workgroup">,
+ ClangBuiltin<"__builtin_amdgcn_s_get_waveid_in_workgroup">,
Intrinsic<[llvm_i32_ty], [],
- [IntrReadMem, IntrInaccessibleMemOnly, IntrWillReturn]>;
+ [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>;
class AMDGPUGlobalAtomicRtn<LLVMType vt> : Intrinsic <
[vt],
@@ -1813,13 +1963,82 @@ def int_amdgcn_image_bvh_intersect_ray :
[IntrReadMem, IntrWillReturn]>;
//===----------------------------------------------------------------------===//
+// GFX11 Intrinsics
+//===----------------------------------------------------------------------===//
+
+// llvm.amdgcn.permlane64 <src0>
+def int_amdgcn_permlane64 :
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty],
+ [IntrNoMem, IntrConvergent, IntrWillReturn]>;
+
+def int_amdgcn_ds_add_gs_reg_rtn :
+ ClangBuiltin<"__builtin_amdgcn_ds_add_gs_reg_rtn">,
+ Intrinsic<[llvm_anyint_ty], [llvm_i32_ty, llvm_i32_ty],
+ [ImmArg<ArgIndex<1>>, IntrHasSideEffects, IntrWillReturn]>;
+
+def int_amdgcn_ds_sub_gs_reg_rtn :
+ ClangBuiltin<"__builtin_amdgcn_ds_sub_gs_reg_rtn">,
+ Intrinsic<[llvm_anyint_ty], [llvm_i32_ty, llvm_i32_ty],
+ [ImmArg<ArgIndex<1>>, IntrHasSideEffects, IntrWillReturn]>;
+
+// WMMA (Wave Matrix Multiply-Accumulate) intrinsics
+//
+// These operations perform a matrix multiplication and accumulation of
+// the form: D = A * B + C .
+
+class AMDGPUWmmaIntrinsic<LLVMType AB, LLVMType CD> :
+ Intrinsic<
+ [CD], // %D
+ [
+ AB, // %A
+ AB, // %B
+ LLVMMatchType<0>, // %C
+ ],
+ [IntrNoMem, IntrConvergent, IntrWillReturn]
+>;
+
+class AMDGPUWmmaIntrinsicOPSEL<LLVMType AB, LLVMType CD> :
+ Intrinsic<
+ [CD], // %D
+ [
+ AB, // %A
+ AB, // %B
+ LLVMMatchType<0>, // %C
+ llvm_i1_ty, // %high
+ ],
+ [IntrNoMem, IntrConvergent, IntrWillReturn, ImmArg<ArgIndex<3>>]
+>;
+
+class AMDGPUWmmaIntrinsicIU<LLVMType AB, LLVMType CD> :
+ Intrinsic<
+ [CD], // %D
+ [
+ llvm_i1_ty, // %A_sign
+ AB, // %A
+ llvm_i1_ty, // %B_sign
+ AB, // %B
+ LLVMMatchType<0>, // %C
+ llvm_i1_ty, // %clamp
+ ],
+ [IntrNoMem, IntrConvergent, IntrWillReturn, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>>]
+>;
+
+def int_amdgcn_wmma_f32_16x16x16_f16 : AMDGPUWmmaIntrinsic<llvm_v16f16_ty, llvm_anyfloat_ty>;
+def int_amdgcn_wmma_f32_16x16x16_bf16 : AMDGPUWmmaIntrinsic<llvm_v16i16_ty, llvm_anyfloat_ty>;
+def int_amdgcn_wmma_f16_16x16x16_f16 : AMDGPUWmmaIntrinsicOPSEL<llvm_v16f16_ty, llvm_anyfloat_ty>;
+def int_amdgcn_wmma_bf16_16x16x16_bf16 : AMDGPUWmmaIntrinsicOPSEL<llvm_v16i16_ty, llvm_anyint_ty>;
+def int_amdgcn_wmma_i32_16x16x16_iu8 : AMDGPUWmmaIntrinsicIU<llvm_v4i32_ty, llvm_anyint_ty>;
+def int_amdgcn_wmma_i32_16x16x16_iu4 : AMDGPUWmmaIntrinsicIU<llvm_v2i32_ty, llvm_anyint_ty>;
+
+
+//===----------------------------------------------------------------------===//
// Deep learning intrinsics.
//===----------------------------------------------------------------------===//
// f32 %r = llvm.amdgcn.fdot2(v2f16 %a, v2f16 %b, f32 %c, i1 %clamp)
// %r = %a[0] * %b[0] + %a[1] * %b[1] + %c
def int_amdgcn_fdot2 :
- GCCBuiltin<"__builtin_amdgcn_fdot2">,
+ ClangBuiltin<"__builtin_amdgcn_fdot2">,
Intrinsic<
[llvm_float_ty], // %r
[
@@ -1831,10 +2050,53 @@ def int_amdgcn_fdot2 :
[IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg<ArgIndex<3>>]
>;
+// f16 %r = llvm.amdgcn.fdot2.f16.f16(v2f16 %a, v2f16 %b, f16 %c)
+// %r = %a[0] * %b[0] + %a[1] * %b[1] + %c
+def int_amdgcn_fdot2_f16_f16 :
+ ClangBuiltin<"__builtin_amdgcn_fdot2_f16_f16">,
+ Intrinsic<
+ [llvm_half_ty], // %r
+ [
+ llvm_v2f16_ty, // %a
+ llvm_v2f16_ty, // %b
+ llvm_half_ty // %c
+ ],
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn]
+ >;
+
+// bf16 %r = llvm.amdgcn.fdot2.bf16.bf16(v2bf16 %a, v2bf16 %b, bf16 %c)
+// %r = %a[0] * %b[0] + %a[1] * %b[1] + %c
+def int_amdgcn_fdot2_bf16_bf16 :
+ ClangBuiltin<"__builtin_amdgcn_fdot2_bf16_bf16">,
+ Intrinsic<
+ [llvm_i16_ty], // %r
+ [
+ llvm_v2i16_ty, // %a
+ llvm_v2i16_ty, // %b
+ llvm_i16_ty // %c
+ ],
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn]
+ >;
+
+// f32 %r = llvm.amdgcn.fdot2.f32.bf16(v2bf16 %a, v2bf16 %b, f32 %c, i1 %clamp)
+// %r = %a[0] * %b[0] + %a[1] * %b[1] + %c
+def int_amdgcn_fdot2_f32_bf16 :
+ ClangBuiltin<"__builtin_amdgcn_fdot2_f32_bf16">,
+ Intrinsic<
+ [llvm_float_ty], // %r
+ [
+ llvm_v2i16_ty, // %a
+ llvm_v2i16_ty, // %b
+ llvm_float_ty, // %c
+ llvm_i1_ty // %clamp
+ ],
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg<ArgIndex<3>>]
+ >;
+
// i32 %r = llvm.amdgcn.sdot2(v2i16 %a, v2i16 %b, i32 %c, i1 %clamp)
// %r = %a[0] * %b[0] + %a[1] * %b[1] + %c
def int_amdgcn_sdot2 :
- GCCBuiltin<"__builtin_amdgcn_sdot2">,
+ ClangBuiltin<"__builtin_amdgcn_sdot2">,
Intrinsic<
[llvm_i32_ty], // %r
[
@@ -1849,7 +2111,7 @@ def int_amdgcn_sdot2 :
// u32 %r = llvm.amdgcn.udot2(v2u16 %a, v2u16 %b, u32 %c, i1 %clamp)
// %r = %a[0] * %b[0] + %a[1] * %b[1] + %c
def int_amdgcn_udot2 :
- GCCBuiltin<"__builtin_amdgcn_udot2">,
+ ClangBuiltin<"__builtin_amdgcn_udot2">,
Intrinsic<
[llvm_i32_ty], // %r
[
@@ -1864,7 +2126,7 @@ def int_amdgcn_udot2 :
// i32 %r = llvm.amdgcn.sdot4(v4i8 (as i32) %a, v4i8 (as i32) %b, i32 %c, i1 %clamp)
// %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + %c
def int_amdgcn_sdot4 :
- GCCBuiltin<"__builtin_amdgcn_sdot4">,
+ ClangBuiltin<"__builtin_amdgcn_sdot4">,
Intrinsic<
[llvm_i32_ty], // %r
[
@@ -1879,7 +2141,7 @@ def int_amdgcn_sdot4 :
// u32 %r = llvm.amdgcn.udot4(v4u8 (as u32) %a, v4u8 (as u32) %b, u32 %c, i1 %clamp)
// %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + %c
def int_amdgcn_udot4 :
- GCCBuiltin<"__builtin_amdgcn_udot4">,
+ ClangBuiltin<"__builtin_amdgcn_udot4">,
Intrinsic<
[llvm_i32_ty], // %r
[
@@ -1891,11 +2153,32 @@ def int_amdgcn_udot4 :
[IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg<ArgIndex<3>>]
>;
+// i32 %r = llvm.amdgcn.sudot4(i1 %a_sign, v4i8 (as i32) %a, i1 %b_sign, v4i8 (as i32) %b, i32 %c, i1 %clamp)
+// Treat input as signed (_sign = 1) or unsigned (_sign = 0).
+// a[i in 0. . . 3] = (%a_sign ? a.i8[i] : promoteToSigned(a.u8[i]));
+// b[i in 0. . . 3] = (%b_sign ? b.i8[i] : promoteToSigned(b.u8[i]));
+// %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + %c
+def int_amdgcn_sudot4 :
+ ClangBuiltin<"__builtin_amdgcn_sudot4">,
+ Intrinsic<
+ [llvm_i32_ty], // %r
+ [
+ llvm_i1_ty, // %a_sign
+ llvm_i32_ty, // %a
+ llvm_i1_ty, // %b_sign
+ llvm_i32_ty, // %b
+ llvm_i32_ty, // %c
+ llvm_i1_ty // %clamp
+ ],
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn,
+ ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>>]
+ >;
+
// i32 %r = llvm.amdgcn.sdot8(v8i4 (as i32) %a, v8i4 (as i32) %b, i32 %c, i1 %clamp)
// %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] +
// %a[4] * %b[4] + %a[5] * %b[5] + %a[6] * %b[6] + %a[7] * %b[7] + %c
def int_amdgcn_sdot8 :
- GCCBuiltin<"__builtin_amdgcn_sdot8">,
+ ClangBuiltin<"__builtin_amdgcn_sdot8">,
Intrinsic<
[llvm_i32_ty], // %r
[
@@ -1911,7 +2194,7 @@ def int_amdgcn_sdot8 :
// %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] +
// %a[4] * %b[4] + %a[5] * %b[5] + %a[6] * %b[6] + %a[7] * %b[7] + %c
def int_amdgcn_udot8 :
- GCCBuiltin<"__builtin_amdgcn_udot8">,
+ ClangBuiltin<"__builtin_amdgcn_udot8">,
Intrinsic<
[llvm_i32_ty], // %r
[
@@ -1923,6 +2206,28 @@ def int_amdgcn_udot8 :
[IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg<ArgIndex<3>>]
>;
+// i32 %r = llvm.amdgcn.sudot8(i1 %a_sign, v8i4 (as i32) %a, i1 %b_sign, v8i4 (as i32) %b, i32 %c, i1 %clamp)
+// Treat input as signed (_sign = 1) or unsigned (_sign = 0).
+// a[i in 0. . . 7] = (%a_sign ? a.i4[i] : promoteToSigned(a.u4[i]));
+// b[i in 0. . . 7] = (%b_sign ? b.i4[i] : promoteToSigned(b.u4[i]));
+// %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] +
+// %a[4] * %b[4] + %a[5] * %b[5] + %a[6] * %b[6] + %a[7] * %b[7] + %c
+ def int_amdgcn_sudot8 :
+ ClangBuiltin<"__builtin_amdgcn_sudot8">,
+ Intrinsic<
+ [llvm_i32_ty], // %r
+ [
+ llvm_i1_ty, // %a_sign
+ llvm_i32_ty, // %a
+ llvm_i1_ty, // %b_sign
+ llvm_i32_ty, // %b
+ llvm_i32_ty, // %c
+ llvm_i1_ty // %clamp
+ ],
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn,
+ ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>>]
+ >;
+
//===----------------------------------------------------------------------===//
// gfx908 intrinsics
// ===----------------------------------------------------------------------===//
@@ -1931,7 +2236,7 @@ def int_amdgcn_global_atomic_fadd : AMDGPUGlobalAtomicRtn<llvm_anyfloat_ty>;
// llvm.amdgcn.mfma.*.* vdst, srcA, srcB, srcC, cbsz, abid, blgp
class AMDGPUMfmaIntrinsic<LLVMType DestTy, LLVMType SrcABTy> :
- GCCBuiltin<!subst("int", "__builtin", NAME)>,
+ ClangBuiltin<!subst("int", "__builtin", NAME)>,
Intrinsic<[DestTy],
[SrcABTy, SrcABTy, DestTy,
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
@@ -1975,10 +2280,47 @@ def int_amdgcn_mfma_f32_4x4x4bf16_1k : AMDGPUMfmaIntrinsic<llvm_v4f32_ty, ll
def int_amdgcn_mfma_f32_32x32x8bf16_1k : AMDGPUMfmaIntrinsic<llvm_v16f32_ty, llvm_v4i16_ty>;
def int_amdgcn_mfma_f32_16x16x16bf16_1k : AMDGPUMfmaIntrinsic<llvm_v4f32_ty, llvm_v4i16_ty>;
+// Note: in gfx940 BLGP argument is replaced by NEG bitfield in the DGEMM MFMA.
+// Three bits corresponding to the neg modifier applied to the respective
+// source operand.
def int_amdgcn_mfma_f64_16x16x4f64 : AMDGPUMfmaIntrinsic<llvm_v4f64_ty, llvm_double_ty>;
def int_amdgcn_mfma_f64_4x4x4f64 : AMDGPUMfmaIntrinsic<llvm_double_ty, llvm_double_ty>;
//===----------------------------------------------------------------------===//
+// gfx940 intrinsics
+// ===----------------------------------------------------------------------===//
+
+// bf16 atomics use v2i16 argument since there is no bf16 data type in the llvm.
+def int_amdgcn_global_atomic_fadd_v2bf16 : AMDGPUGlobalAtomicRtn<llvm_v2i16_ty>;
+def int_amdgcn_flat_atomic_fadd_v2bf16 : AMDGPUGlobalAtomicRtn<llvm_v2i16_ty>;
+def int_amdgcn_ds_fadd_v2bf16 : Intrinsic<
+ [llvm_v2i16_ty],
+ [LLVMQualPointerType<llvm_v2i16_ty, 3>, llvm_v2i16_ty],
+ [IntrArgMemOnly, IntrWillReturn, NoCapture<ArgIndex<0>>]>,
+ ClangBuiltin<"__builtin_amdgcn_ds_atomic_fadd_v2bf16">;
+
+def int_amdgcn_mfma_i32_16x16x32_i8 : AMDGPUMfmaIntrinsic<llvm_v4i32_ty, llvm_i64_ty>;
+def int_amdgcn_mfma_i32_32x32x16_i8 : AMDGPUMfmaIntrinsic<llvm_v16i32_ty, llvm_i64_ty>;
+def int_amdgcn_mfma_f32_16x16x8_xf32 : AMDGPUMfmaIntrinsic<llvm_v4f32_ty, llvm_v2f32_ty>;
+def int_amdgcn_mfma_f32_32x32x4_xf32 : AMDGPUMfmaIntrinsic<llvm_v16f32_ty, llvm_v2f32_ty>;
+
+// llvm.amdgcn.smfmac.?32.* vdst, srcA, srcB, srcC, index, cbsz, abid
+class AMDGPUMSmfmacIntrinsic<LLVMType DestTy, LLVMType SrcA, LLVMType SrcB> :
+ ClangBuiltin<!subst("int", "__builtin", NAME)>,
+ Intrinsic<[DestTy],
+ [SrcA, SrcB, DestTy, llvm_i32_ty,
+ llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, IntrWillReturn,
+ ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
+
+def int_amdgcn_smfmac_f32_16x16x32_f16 : AMDGPUMSmfmacIntrinsic<llvm_v4f32_ty, llvm_v4f16_ty, llvm_v8f16_ty>;
+def int_amdgcn_smfmac_f32_32x32x16_f16 : AMDGPUMSmfmacIntrinsic<llvm_v16f32_ty, llvm_v4f16_ty, llvm_v8f16_ty>;
+def int_amdgcn_smfmac_f32_16x16x32_bf16 : AMDGPUMSmfmacIntrinsic<llvm_v4f32_ty, llvm_v4i16_ty, llvm_v8i16_ty>;
+def int_amdgcn_smfmac_f32_32x32x16_bf16 : AMDGPUMSmfmacIntrinsic<llvm_v16f32_ty, llvm_v4i16_ty, llvm_v8i16_ty>;
+def int_amdgcn_smfmac_i32_16x16x64_i8 : AMDGPUMSmfmacIntrinsic<llvm_v4i32_ty, llvm_v2i32_ty, llvm_v4i32_ty>;
+def int_amdgcn_smfmac_i32_32x32x32_i8 : AMDGPUMSmfmacIntrinsic<llvm_v16i32_ty, llvm_v2i32_ty, llvm_v4i32_ty>;
+
+//===----------------------------------------------------------------------===//
// Special Intrinsics for backend internal use only. No frontend
// should emit calls to these.
// ===----------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td
index a42484757592..3d905dbca6b9 100644
--- a/llvm/include/llvm/IR/IntrinsicsARM.td
+++ b/llvm/include/llvm/IR/IntrinsicsARM.td
@@ -22,199 +22,199 @@ let TargetPrefix = "arm" in { // All intrinsics start with "llvm.arm.".
def int_arm_space : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>]>;
// 16-bit multiplications
-def int_arm_smulbb : GCCBuiltin<"__builtin_arm_smulbb">,
+def int_arm_smulbb : ClangBuiltin<"__builtin_arm_smulbb">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_arm_smulbt : GCCBuiltin<"__builtin_arm_smulbt">,
+def int_arm_smulbt : ClangBuiltin<"__builtin_arm_smulbt">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_arm_smultb : GCCBuiltin<"__builtin_arm_smultb">,
+def int_arm_smultb : ClangBuiltin<"__builtin_arm_smultb">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_arm_smultt : GCCBuiltin<"__builtin_arm_smultt">,
+def int_arm_smultt : ClangBuiltin<"__builtin_arm_smultt">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_arm_smulwb : GCCBuiltin<"__builtin_arm_smulwb">,
+def int_arm_smulwb : ClangBuiltin<"__builtin_arm_smulwb">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_arm_smulwt : GCCBuiltin<"__builtin_arm_smulwt">,
+def int_arm_smulwt : ClangBuiltin<"__builtin_arm_smulwt">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
//===----------------------------------------------------------------------===//
// Saturating Arithmetic
-def int_arm_qadd : GCCBuiltin<"__builtin_arm_qadd">,
+def int_arm_qadd : ClangBuiltin<"__builtin_arm_qadd">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[Commutative, IntrNoMem]>;
-def int_arm_qsub : GCCBuiltin<"__builtin_arm_qsub">,
+def int_arm_qsub : ClangBuiltin<"__builtin_arm_qsub">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_arm_ssat : GCCBuiltin<"__builtin_arm_ssat">,
+def int_arm_ssat : ClangBuiltin<"__builtin_arm_ssat">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_arm_usat : GCCBuiltin<"__builtin_arm_usat">,
+def int_arm_usat : ClangBuiltin<"__builtin_arm_usat">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
// Accumulating multiplications
-def int_arm_smlabb : GCCBuiltin<"__builtin_arm_smlabb">,
+def int_arm_smlabb : ClangBuiltin<"__builtin_arm_smlabb">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
-def int_arm_smlabt : GCCBuiltin<"__builtin_arm_smlabt">,
+def int_arm_smlabt : ClangBuiltin<"__builtin_arm_smlabt">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
-def int_arm_smlatb : GCCBuiltin<"__builtin_arm_smlatb">,
+def int_arm_smlatb : ClangBuiltin<"__builtin_arm_smlatb">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
-def int_arm_smlatt : GCCBuiltin<"__builtin_arm_smlatt">,
+def int_arm_smlatt : ClangBuiltin<"__builtin_arm_smlatt">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
-def int_arm_smlawb : GCCBuiltin<"__builtin_arm_smlawb">,
+def int_arm_smlawb : ClangBuiltin<"__builtin_arm_smlawb">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
-def int_arm_smlawt : GCCBuiltin<"__builtin_arm_smlawt">,
+def int_arm_smlawt : ClangBuiltin<"__builtin_arm_smlawt">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
// Parallel 16-bit saturation
-def int_arm_ssat16 : GCCBuiltin<"__builtin_arm_ssat16">,
+def int_arm_ssat16 : ClangBuiltin<"__builtin_arm_ssat16">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_arm_usat16 : GCCBuiltin<"__builtin_arm_usat16">,
+def int_arm_usat16 : ClangBuiltin<"__builtin_arm_usat16">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
// Packing and unpacking
-def int_arm_sxtab16 : GCCBuiltin<"__builtin_arm_sxtab16">,
+def int_arm_sxtab16 : ClangBuiltin<"__builtin_arm_sxtab16">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_arm_sxtb16 : GCCBuiltin<"__builtin_arm_sxtb16">,
+def int_arm_sxtb16 : ClangBuiltin<"__builtin_arm_sxtb16">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
-def int_arm_uxtab16 : GCCBuiltin<"__builtin_arm_uxtab16">,
+def int_arm_uxtab16 : ClangBuiltin<"__builtin_arm_uxtab16">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_arm_uxtb16 : GCCBuiltin<"__builtin_arm_uxtb16">,
+def int_arm_uxtb16 : ClangBuiltin<"__builtin_arm_uxtb16">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
// Parallel selection, reads the GE flags.
-def int_arm_sel : GCCBuiltin<"__builtin_arm_sel">,
+def int_arm_sel : ClangBuiltin<"__builtin_arm_sel">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>;
// Parallel 8-bit addition and subtraction
-def int_arm_qadd8 : GCCBuiltin<"__builtin_arm_qadd8">,
+def int_arm_qadd8 : ClangBuiltin<"__builtin_arm_qadd8">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_arm_qsub8 : GCCBuiltin<"__builtin_arm_qsub8">,
+def int_arm_qsub8 : ClangBuiltin<"__builtin_arm_qsub8">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
// Writes to the GE bits.
-def int_arm_sadd8 : GCCBuiltin<"__builtin_arm_sadd8">,
+def int_arm_sadd8 : ClangBuiltin<"__builtin_arm_sadd8">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
-def int_arm_shadd8 : GCCBuiltin<"__builtin_arm_shadd8">,
+def int_arm_shadd8 : ClangBuiltin<"__builtin_arm_shadd8">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_arm_shsub8 : GCCBuiltin<"__builtin_arm_shsub8">,
+def int_arm_shsub8 : ClangBuiltin<"__builtin_arm_shsub8">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
// Writes to the GE bits.
-def int_arm_ssub8 : GCCBuiltin<"__builtin_arm_ssub8">,
+def int_arm_ssub8 : ClangBuiltin<"__builtin_arm_ssub8">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
// Writes to the GE bits.
-def int_arm_uadd8 : GCCBuiltin<"__builtin_arm_uadd8">,
+def int_arm_uadd8 : ClangBuiltin<"__builtin_arm_uadd8">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
-def int_arm_uhadd8 : GCCBuiltin<"__builtin_arm_uhadd8">,
+def int_arm_uhadd8 : ClangBuiltin<"__builtin_arm_uhadd8">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_arm_uhsub8 : GCCBuiltin<"__builtin_arm_uhsub8">,
+def int_arm_uhsub8 : ClangBuiltin<"__builtin_arm_uhsub8">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_arm_uqadd8 : GCCBuiltin<"__builtin_arm_uqadd8">,
+def int_arm_uqadd8 : ClangBuiltin<"__builtin_arm_uqadd8">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_arm_uqsub8 : GCCBuiltin<"__builtin_arm_uqsub8">,
+def int_arm_uqsub8 : ClangBuiltin<"__builtin_arm_uqsub8">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
// Writes to the GE bits.
-def int_arm_usub8 : GCCBuiltin<"__builtin_arm_usub8">,
+def int_arm_usub8 : ClangBuiltin<"__builtin_arm_usub8">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
// Sum of 8-bit absolute differences
-def int_arm_usad8 : GCCBuiltin<"__builtin_arm_usad8">,
+def int_arm_usad8 : ClangBuiltin<"__builtin_arm_usad8">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_arm_usada8 : GCCBuiltin<"__builtin_arm_usada8">,
+def int_arm_usada8 : ClangBuiltin<"__builtin_arm_usada8">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
// Parallel 16-bit addition and subtraction
-def int_arm_qadd16 : GCCBuiltin<"__builtin_arm_qadd16">,
+def int_arm_qadd16 : ClangBuiltin<"__builtin_arm_qadd16">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_arm_qasx : GCCBuiltin<"__builtin_arm_qasx">,
+def int_arm_qasx : ClangBuiltin<"__builtin_arm_qasx">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_arm_qsax : GCCBuiltin<"__builtin_arm_qsax">,
+def int_arm_qsax : ClangBuiltin<"__builtin_arm_qsax">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_arm_qsub16 : GCCBuiltin<"__builtin_arm_qsub16">,
+def int_arm_qsub16 : ClangBuiltin<"__builtin_arm_qsub16">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
// Writes to the GE bits.
-def int_arm_sadd16 : GCCBuiltin<"__builtin_arm_sadd16">,
+def int_arm_sadd16 : ClangBuiltin<"__builtin_arm_sadd16">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
// Writes to the GE bits.
-def int_arm_sasx : GCCBuiltin<"__builtin_arm_sasx">,
+def int_arm_sasx : ClangBuiltin<"__builtin_arm_sasx">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
-def int_arm_shadd16 : GCCBuiltin<"__builtin_arm_shadd16">,
+def int_arm_shadd16 : ClangBuiltin<"__builtin_arm_shadd16">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_arm_shasx : GCCBuiltin<"__builtin_arm_shasx">,
+def int_arm_shasx : ClangBuiltin<"__builtin_arm_shasx">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_arm_shsax : GCCBuiltin<"__builtin_arm_shsax">,
+def int_arm_shsax : ClangBuiltin<"__builtin_arm_shsax">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_arm_shsub16 : GCCBuiltin<"__builtin_arm_shsub16">,
+def int_arm_shsub16 : ClangBuiltin<"__builtin_arm_shsub16">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
// Writes to the GE bits.
-def int_arm_ssax : GCCBuiltin<"__builtin_arm_ssax">,
+def int_arm_ssax : ClangBuiltin<"__builtin_arm_ssax">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
// Writes to the GE bits.
-def int_arm_ssub16 : GCCBuiltin<"__builtin_arm_ssub16">,
+def int_arm_ssub16 : ClangBuiltin<"__builtin_arm_ssub16">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
// Writes to the GE bits.
-def int_arm_uadd16 : GCCBuiltin<"__builtin_arm_uadd16">,
+def int_arm_uadd16 : ClangBuiltin<"__builtin_arm_uadd16">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
// Writes to the GE bits.
-def int_arm_uasx : GCCBuiltin<"__builtin_arm_uasx">,
+def int_arm_uasx : ClangBuiltin<"__builtin_arm_uasx">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
-def int_arm_uhadd16 : GCCBuiltin<"__builtin_arm_uhadd16">,
+def int_arm_uhadd16 : ClangBuiltin<"__builtin_arm_uhadd16">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_arm_uhasx : GCCBuiltin<"__builtin_arm_uhasx">,
+def int_arm_uhasx : ClangBuiltin<"__builtin_arm_uhasx">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_arm_uhsax : GCCBuiltin<"__builtin_arm_uhsax">,
+def int_arm_uhsax : ClangBuiltin<"__builtin_arm_uhsax">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_arm_uhsub16 : GCCBuiltin<"__builtin_arm_uhsub16">,
+def int_arm_uhsub16 : ClangBuiltin<"__builtin_arm_uhsub16">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_arm_uqadd16 : GCCBuiltin<"__builtin_arm_uqadd16">,
+def int_arm_uqadd16 : ClangBuiltin<"__builtin_arm_uqadd16">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_arm_uqasx : GCCBuiltin<"__builtin_arm_uqasx">,
+def int_arm_uqasx : ClangBuiltin<"__builtin_arm_uqasx">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_arm_uqsax : GCCBuiltin<"__builtin_arm_uqsax">,
+def int_arm_uqsax : ClangBuiltin<"__builtin_arm_uqsax">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_arm_uqsub16 : GCCBuiltin<"__builtin_arm_uqsub16">,
+def int_arm_uqsub16 : ClangBuiltin<"__builtin_arm_uqsub16">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
// Writes to the GE bits.
-def int_arm_usax : GCCBuiltin<"__builtin_arm_usax">,
+def int_arm_usax : ClangBuiltin<"__builtin_arm_usax">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
// Writes to the GE bits.
-def int_arm_usub16 : GCCBuiltin<"__builtin_arm_usub16">,
+def int_arm_usub16 : ClangBuiltin<"__builtin_arm_usub16">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
// Parallel 16-bit multiplication
-def int_arm_smlad : GCCBuiltin<"__builtin_arm_smlad">,
+def int_arm_smlad : ClangBuiltin<"__builtin_arm_smlad">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
-def int_arm_smladx : GCCBuiltin<"__builtin_arm_smladx">,
+def int_arm_smladx : ClangBuiltin<"__builtin_arm_smladx">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
-def int_arm_smlald : GCCBuiltin<"__builtin_arm_smlald">,
+def int_arm_smlald : ClangBuiltin<"__builtin_arm_smlald">,
Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i64_ty],
[IntrNoMem]>;
-def int_arm_smlaldx : GCCBuiltin<"__builtin_arm_smlaldx">,
+def int_arm_smlaldx : ClangBuiltin<"__builtin_arm_smlaldx">,
Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i64_ty],
[IntrNoMem]>;
-def int_arm_smlsd : GCCBuiltin<"__builtin_arm_smlsd">,
+def int_arm_smlsd : ClangBuiltin<"__builtin_arm_smlsd">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
-def int_arm_smlsdx : GCCBuiltin<"__builtin_arm_smlsdx">,
+def int_arm_smlsdx : ClangBuiltin<"__builtin_arm_smlsdx">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
-def int_arm_smlsld : GCCBuiltin<"__builtin_arm_smlsld">,
+def int_arm_smlsld : ClangBuiltin<"__builtin_arm_smlsld">,
Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i64_ty],
[IntrNoMem]>;
-def int_arm_smlsldx : GCCBuiltin<"__builtin_arm_smlsldx">,
+def int_arm_smlsldx : ClangBuiltin<"__builtin_arm_smlsldx">,
Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i64_ty],
[IntrNoMem]>;
-def int_arm_smuad : GCCBuiltin<"__builtin_arm_smuad">,
+def int_arm_smuad : ClangBuiltin<"__builtin_arm_smuad">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_arm_smuadx : GCCBuiltin<"__builtin_arm_smuadx">,
+def int_arm_smuadx : ClangBuiltin<"__builtin_arm_smuadx">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_arm_smusd : GCCBuiltin<"__builtin_arm_smusd">,
+def int_arm_smusd : ClangBuiltin<"__builtin_arm_smusd">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_arm_smusdx : GCCBuiltin<"__builtin_arm_smusdx">,
+def int_arm_smusdx : ClangBuiltin<"__builtin_arm_smusdx">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
@@ -239,19 +239,19 @@ def int_arm_ldaexd : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_ptr_ty]>;
//===----------------------------------------------------------------------===//
// Data barrier instructions
-def int_arm_dmb : GCCBuiltin<"__builtin_arm_dmb">, MSBuiltin<"__dmb">,
+def int_arm_dmb : ClangBuiltin<"__builtin_arm_dmb">, MSBuiltin<"__dmb">,
Intrinsic<[], [llvm_i32_ty]>;
-def int_arm_dsb : GCCBuiltin<"__builtin_arm_dsb">, MSBuiltin<"__dsb">,
+def int_arm_dsb : ClangBuiltin<"__builtin_arm_dsb">, MSBuiltin<"__dsb">,
Intrinsic<[], [llvm_i32_ty]>;
-def int_arm_isb : GCCBuiltin<"__builtin_arm_isb">, MSBuiltin<"__isb">,
+def int_arm_isb : ClangBuiltin<"__builtin_arm_isb">, MSBuiltin<"__isb">,
Intrinsic<[], [llvm_i32_ty]>;
//===----------------------------------------------------------------------===//
// VFP
-def int_arm_get_fpscr : GCCBuiltin<"__builtin_arm_get_fpscr">,
+def int_arm_get_fpscr : ClangBuiltin<"__builtin_arm_get_fpscr">,
Intrinsic<[llvm_i32_ty], [], []>;
-def int_arm_set_fpscr : GCCBuiltin<"__builtin_arm_set_fpscr">,
+def int_arm_set_fpscr : ClangBuiltin<"__builtin_arm_set_fpscr">,
Intrinsic<[], [llvm_i32_ty], []>;
def int_arm_vcvtr : Intrinsic<[llvm_float_ty], [llvm_anyfloat_ty],
[IntrNoMem]>;
@@ -261,47 +261,47 @@ def int_arm_vcvtru : Intrinsic<[llvm_float_ty], [llvm_anyfloat_ty],
//===----------------------------------------------------------------------===//
// Coprocessor
-def int_arm_ldc : GCCBuiltin<"__builtin_arm_ldc">,
+def int_arm_ldc : ClangBuiltin<"__builtin_arm_ldc">,
Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
-def int_arm_ldcl : GCCBuiltin<"__builtin_arm_ldcl">,
+def int_arm_ldcl : ClangBuiltin<"__builtin_arm_ldcl">,
Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
-def int_arm_ldc2 : GCCBuiltin<"__builtin_arm_ldc2">,
+def int_arm_ldc2 : ClangBuiltin<"__builtin_arm_ldc2">,
Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
-def int_arm_ldc2l : GCCBuiltin<"__builtin_arm_ldc2l">,
+def int_arm_ldc2l : ClangBuiltin<"__builtin_arm_ldc2l">,
Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
-def int_arm_stc : GCCBuiltin<"__builtin_arm_stc">,
+def int_arm_stc : ClangBuiltin<"__builtin_arm_stc">,
Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
-def int_arm_stcl : GCCBuiltin<"__builtin_arm_stcl">,
+def int_arm_stcl : ClangBuiltin<"__builtin_arm_stcl">,
Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
-def int_arm_stc2 : GCCBuiltin<"__builtin_arm_stc2">,
+def int_arm_stc2 : ClangBuiltin<"__builtin_arm_stc2">,
Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
-def int_arm_stc2l : GCCBuiltin<"__builtin_arm_stc2l">,
+def int_arm_stc2l : ClangBuiltin<"__builtin_arm_stc2l">,
Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>;
// Move to coprocessor
-def int_arm_mcr : GCCBuiltin<"__builtin_arm_mcr">,
+def int_arm_mcr : ClangBuiltin<"__builtin_arm_mcr">,
Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
-def int_arm_mcr2 : GCCBuiltin<"__builtin_arm_mcr2">,
+def int_arm_mcr2 : ClangBuiltin<"__builtin_arm_mcr2">,
Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
// Move from coprocessor
-def int_arm_mrc : GCCBuiltin<"__builtin_arm_mrc">,
+def int_arm_mrc : ClangBuiltin<"__builtin_arm_mrc">,
MSBuiltin<"_MoveFromCoprocessor">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
-def int_arm_mrc2 : GCCBuiltin<"__builtin_arm_mrc2">,
+def int_arm_mrc2 : ClangBuiltin<"__builtin_arm_mrc2">,
MSBuiltin<"_MoveFromCoprocessor2">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
// Coprocessor data processing
-def int_arm_cdp : GCCBuiltin<"__builtin_arm_cdp">,
+def int_arm_cdp : ClangBuiltin<"__builtin_arm_cdp">,
Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
-def int_arm_cdp2 : GCCBuiltin<"__builtin_arm_cdp2">,
+def int_arm_cdp2 : ClangBuiltin<"__builtin_arm_cdp2">,
Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
@@ -335,13 +335,13 @@ def int_arm_crc32cw : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
//===----------------------------------------------------------------------===//
// CMSE
-def int_arm_cmse_tt : GCCBuiltin<"__builtin_arm_cmse_TT">,
+def int_arm_cmse_tt : ClangBuiltin<"__builtin_arm_cmse_TT">,
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>;
-def int_arm_cmse_ttt : GCCBuiltin<"__builtin_arm_cmse_TTT">,
+def int_arm_cmse_ttt : ClangBuiltin<"__builtin_arm_cmse_TTT">,
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>;
-def int_arm_cmse_tta : GCCBuiltin<"__builtin_arm_cmse_TTA">,
+def int_arm_cmse_tta : ClangBuiltin<"__builtin_arm_cmse_TTA">,
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>;
-def int_arm_cmse_ttat : GCCBuiltin<"__builtin_arm_cmse_TTAT">,
+def int_arm_cmse_ttat : ClangBuiltin<"__builtin_arm_cmse_TTAT">,
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>;
//===----------------------------------------------------------------------===//
@@ -1158,7 +1158,7 @@ defm int_arm_mve_vabav: MVEPredicated<
[llvm_i32_ty],
[llvm_i32_ty, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>], llvm_anyvector_ty>;
-// The following 3 instrinsics are MVE vector reductions with two vector
+// The following 3 intrinsics are MVE vector reductions with two vector
// operands.
// The first 3 operands are boolean flags (must be compile-time constants):
// * unsigned - the instruction operates on vectors of unsigned values and
diff --git a/llvm/include/llvm/IR/IntrinsicsBPF.td b/llvm/include/llvm/IR/IntrinsicsBPF.td
index a6bd6f841aab..8916b60d2be3 100644
--- a/llvm/include/llvm/IR/IntrinsicsBPF.td
+++ b/llvm/include/llvm/IR/IntrinsicsBPF.td
@@ -12,29 +12,29 @@
// Specialized loads from packet
let TargetPrefix = "bpf" in { // All intrinsics start with "llvm.bpf."
- def int_bpf_load_byte : GCCBuiltin<"__builtin_bpf_load_byte">,
+ def int_bpf_load_byte : ClangBuiltin<"__builtin_bpf_load_byte">,
Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrReadMem]>;
- def int_bpf_load_half : GCCBuiltin<"__builtin_bpf_load_half">,
+ def int_bpf_load_half : ClangBuiltin<"__builtin_bpf_load_half">,
Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrReadMem]>;
- def int_bpf_load_word : GCCBuiltin<"__builtin_bpf_load_word">,
+ def int_bpf_load_word : ClangBuiltin<"__builtin_bpf_load_word">,
Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrReadMem]>;
- def int_bpf_pseudo : GCCBuiltin<"__builtin_bpf_pseudo">,
+ def int_bpf_pseudo : ClangBuiltin<"__builtin_bpf_pseudo">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty]>;
- def int_bpf_preserve_field_info : GCCBuiltin<"__builtin_bpf_preserve_field_info">,
+ def int_bpf_preserve_field_info : ClangBuiltin<"__builtin_bpf_preserve_field_info">,
Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty, llvm_i64_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_bpf_btf_type_id : GCCBuiltin<"__builtin_bpf_btf_type_id">,
+ def int_bpf_btf_type_id : ClangBuiltin<"__builtin_bpf_btf_type_id">,
Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i64_ty],
[IntrNoMem]>;
- def int_bpf_preserve_type_info : GCCBuiltin<"__builtin_bpf_preserve_type_info">,
+ def int_bpf_preserve_type_info : ClangBuiltin<"__builtin_bpf_preserve_type_info">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty],
[IntrNoMem]>;
- def int_bpf_preserve_enum_value : GCCBuiltin<"__builtin_bpf_preserve_enum_value">,
+ def int_bpf_preserve_enum_value : ClangBuiltin<"__builtin_bpf_preserve_enum_value">,
Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_ptr_ty, llvm_i64_ty],
[IntrNoMem]>;
- def int_bpf_passthrough : GCCBuiltin<"__builtin_bpf_passthrough">,
+ def int_bpf_passthrough : ClangBuiltin<"__builtin_bpf_passthrough">,
Intrinsic<[llvm_any_ty], [llvm_i32_ty, llvm_any_ty], [IntrNoMem]>;
- def int_bpf_compare : GCCBuiltin<"__builtin_bpf_compare">,
+ def int_bpf_compare : ClangBuiltin<"__builtin_bpf_compare">,
Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_anyint_ty, llvm_anyint_ty],
[IntrNoMem]>;
}
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
new file mode 100644
index 000000000000..4a21cf1eb7fc
--- /dev/null
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -0,0 +1,20 @@
+//===- IntrinsicsDirectX.td - Defines DirectX intrinsics ---*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the DirectX-specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+let TargetPrefix = "dxil" in {
+
+def int_dxil_thread_id : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrWillReturn]>;
+def int_dxil_group_id : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrWillReturn]>;
+def int_dxil_thread_id_in_group : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrWillReturn]>;
+def int_dxil_flattened_thread_id_in_group : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrWillReturn]>;
+
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsHexagon.td b/llvm/include/llvm/IR/IntrinsicsHexagon.td
index 212262c28706..52c29ef31f0a 100644
--- a/llvm/include/llvm/IR/IntrinsicsHexagon.td
+++ b/llvm/include/llvm/IR/IntrinsicsHexagon.td
@@ -18,7 +18,7 @@ let TargetPrefix = "hexagon" in {
class Hexagon_Intrinsic<string GCCIntSuffix, list<LLVMType> ret_types,
list<LLVMType> param_types,
list<IntrinsicProperty> properties>
- : GCCBuiltin<!strconcat("__builtin_", GCCIntSuffix)>,
+ : ClangBuiltin<!strconcat("__builtin_", GCCIntSuffix)>,
Intrinsic<ret_types, param_types, properties>;
/// Hexagon_NonGCC_Intrinsic - Base class for bitcode convertible Hexagon
@@ -404,4 +404,15 @@ def int_hexagon_V6_vmaskedstorenq_128B: Hexagon_custom_vms_Intrinsic_128B;
def int_hexagon_V6_vmaskedstorentq_128B: Hexagon_custom_vms_Intrinsic_128B;
def int_hexagon_V6_vmaskedstorentnq_128B: Hexagon_custom_vms_Intrinsic_128B;
+
+// Intrinsic for instrumentation based profiling using a custom handler. The
+// name of the handler is passed as the first operand to the intrinsic. The
+// handler can take only one int32 input which is passed as the second
+// operand to the intrinsic.
+def int_hexagon_instrprof_custom
+ : Hexagon_NonGCC_Intrinsic<[],
+ [llvm_ptr_ty, llvm_i32_ty],
+ [IntrInaccessibleMemOnly]>;
+
+
include "llvm/IR/IntrinsicsHexagonDep.td"
diff --git a/llvm/include/llvm/IR/IntrinsicsMips.td b/llvm/include/llvm/IR/IntrinsicsMips.td
index 271142ca7788..3056f37b9d87 100644
--- a/llvm/include/llvm/IR/IntrinsicsMips.td
+++ b/llvm/include/llvm/IR/IntrinsicsMips.td
@@ -24,370 +24,370 @@ let TargetPrefix = "mips" in { // All intrinsics start with "llvm.mips.".
//===----------------------------------------------------------------------===//
// Addition/subtraction
-def int_mips_addu_qb : GCCBuiltin<"__builtin_mips_addu_qb">,
+def int_mips_addu_qb : ClangBuiltin<"__builtin_mips_addu_qb">,
Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty],
[Commutative, IntrNoMem]>;
-def int_mips_addu_s_qb : GCCBuiltin<"__builtin_mips_addu_s_qb">,
+def int_mips_addu_s_qb : ClangBuiltin<"__builtin_mips_addu_s_qb">,
Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty],
[Commutative, IntrNoMem]>;
-def int_mips_subu_qb : GCCBuiltin<"__builtin_mips_subu_qb">,
+def int_mips_subu_qb : ClangBuiltin<"__builtin_mips_subu_qb">,
Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [IntrNoMem]>;
-def int_mips_subu_s_qb : GCCBuiltin<"__builtin_mips_subu_s_qb">,
+def int_mips_subu_s_qb : ClangBuiltin<"__builtin_mips_subu_s_qb">,
Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [IntrNoMem]>;
-def int_mips_addq_ph : GCCBuiltin<"__builtin_mips_addq_ph">,
+def int_mips_addq_ph : ClangBuiltin<"__builtin_mips_addq_ph">,
Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty],
[Commutative, IntrNoMem]>;
-def int_mips_addq_s_ph : GCCBuiltin<"__builtin_mips_addq_s_ph">,
+def int_mips_addq_s_ph : ClangBuiltin<"__builtin_mips_addq_s_ph">,
Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty],
[Commutative, IntrNoMem]>;
-def int_mips_subq_ph : GCCBuiltin<"__builtin_mips_subq_ph">,
+def int_mips_subq_ph : ClangBuiltin<"__builtin_mips_subq_ph">,
Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], [IntrNoMem]>;
-def int_mips_subq_s_ph : GCCBuiltin<"__builtin_mips_subq_s_ph">,
+def int_mips_subq_s_ph : ClangBuiltin<"__builtin_mips_subq_s_ph">,
Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], [IntrNoMem]>;
-def int_mips_madd: GCCBuiltin<"__builtin_mips_madd">,
+def int_mips_madd: ClangBuiltin<"__builtin_mips_madd">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, Commutative]>;
-def int_mips_maddu: GCCBuiltin<"__builtin_mips_maddu">,
+def int_mips_maddu: ClangBuiltin<"__builtin_mips_maddu">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, Commutative]>;
-def int_mips_msub: GCCBuiltin<"__builtin_mips_msub">,
+def int_mips_msub: ClangBuiltin<"__builtin_mips_msub">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
-def int_mips_msubu: GCCBuiltin<"__builtin_mips_msubu">,
+def int_mips_msubu: ClangBuiltin<"__builtin_mips_msubu">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
-def int_mips_addq_s_w: GCCBuiltin<"__builtin_mips_addq_s_w">,
+def int_mips_addq_s_w: ClangBuiltin<"__builtin_mips_addq_s_w">,
Intrinsic<[mips_q31_ty], [mips_q31_ty, mips_q31_ty], [Commutative]>;
-def int_mips_subq_s_w: GCCBuiltin<"__builtin_mips_subq_s_w">,
+def int_mips_subq_s_w: ClangBuiltin<"__builtin_mips_subq_s_w">,
Intrinsic<[mips_q31_ty], [mips_q31_ty, mips_q31_ty], []>;
-def int_mips_addsc: GCCBuiltin<"__builtin_mips_addsc">,
+def int_mips_addsc: ClangBuiltin<"__builtin_mips_addsc">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [Commutative]>;
-def int_mips_addwc: GCCBuiltin<"__builtin_mips_addwc">,
+def int_mips_addwc: ClangBuiltin<"__builtin_mips_addwc">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [Commutative]>;
-def int_mips_modsub: GCCBuiltin<"__builtin_mips_modsub">,
+def int_mips_modsub: ClangBuiltin<"__builtin_mips_modsub">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_mips_raddu_w_qb: GCCBuiltin<"__builtin_mips_raddu_w_qb">,
+def int_mips_raddu_w_qb: ClangBuiltin<"__builtin_mips_raddu_w_qb">,
Intrinsic<[llvm_i32_ty], [llvm_v4i8_ty], [IntrNoMem]>;
//===----------------------------------------------------------------------===//
// Absolute value
-def int_mips_absq_s_ph: GCCBuiltin<"__builtin_mips_absq_s_ph">,
+def int_mips_absq_s_ph: ClangBuiltin<"__builtin_mips_absq_s_ph">,
Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty], []>;
-def int_mips_absq_s_w: GCCBuiltin<"__builtin_mips_absq_s_w">,
+def int_mips_absq_s_w: ClangBuiltin<"__builtin_mips_absq_s_w">,
Intrinsic<[mips_q31_ty], [mips_q31_ty], []>;
//===----------------------------------------------------------------------===//
// Precision reduce/expand
-def int_mips_precrq_qb_ph: GCCBuiltin<"__builtin_mips_precrq_qb_ph">,
+def int_mips_precrq_qb_ph: ClangBuiltin<"__builtin_mips_precrq_qb_ph">,
Intrinsic<[llvm_v4i8_ty], [mips_v2q15_ty, mips_v2q15_ty], [IntrNoMem]>;
-def int_mips_precrqu_s_qb_ph: GCCBuiltin<"__builtin_mips_precrqu_s_qb_ph">,
+def int_mips_precrqu_s_qb_ph: ClangBuiltin<"__builtin_mips_precrqu_s_qb_ph">,
Intrinsic<[llvm_v4i8_ty], [mips_v2q15_ty, mips_v2q15_ty], []>;
-def int_mips_precrq_ph_w: GCCBuiltin<"__builtin_mips_precrq_ph_w">,
+def int_mips_precrq_ph_w: ClangBuiltin<"__builtin_mips_precrq_ph_w">,
Intrinsic<[mips_v2q15_ty], [mips_q31_ty, mips_q31_ty], [IntrNoMem]>;
-def int_mips_precrq_rs_ph_w: GCCBuiltin<"__builtin_mips_precrq_rs_ph_w">,
+def int_mips_precrq_rs_ph_w: ClangBuiltin<"__builtin_mips_precrq_rs_ph_w">,
Intrinsic<[mips_v2q15_ty], [mips_q31_ty, mips_q31_ty], []>;
-def int_mips_preceq_w_phl: GCCBuiltin<"__builtin_mips_preceq_w_phl">,
+def int_mips_preceq_w_phl: ClangBuiltin<"__builtin_mips_preceq_w_phl">,
Intrinsic<[mips_q31_ty], [mips_v2q15_ty], [IntrNoMem]>;
-def int_mips_preceq_w_phr: GCCBuiltin<"__builtin_mips_preceq_w_phr">,
+def int_mips_preceq_w_phr: ClangBuiltin<"__builtin_mips_preceq_w_phr">,
Intrinsic<[mips_q31_ty], [mips_v2q15_ty], [IntrNoMem]>;
-def int_mips_precequ_ph_qbl: GCCBuiltin<"__builtin_mips_precequ_ph_qbl">,
+def int_mips_precequ_ph_qbl: ClangBuiltin<"__builtin_mips_precequ_ph_qbl">,
Intrinsic<[mips_v2q15_ty], [llvm_v4i8_ty], [IntrNoMem]>;
-def int_mips_precequ_ph_qbr: GCCBuiltin<"__builtin_mips_precequ_ph_qbr">,
+def int_mips_precequ_ph_qbr: ClangBuiltin<"__builtin_mips_precequ_ph_qbr">,
Intrinsic<[mips_v2q15_ty], [llvm_v4i8_ty], [IntrNoMem]>;
-def int_mips_precequ_ph_qbla: GCCBuiltin<"__builtin_mips_precequ_ph_qbla">,
+def int_mips_precequ_ph_qbla: ClangBuiltin<"__builtin_mips_precequ_ph_qbla">,
Intrinsic<[mips_v2q15_ty], [llvm_v4i8_ty], [IntrNoMem]>;
-def int_mips_precequ_ph_qbra: GCCBuiltin<"__builtin_mips_precequ_ph_qbra">,
+def int_mips_precequ_ph_qbra: ClangBuiltin<"__builtin_mips_precequ_ph_qbra">,
Intrinsic<[mips_v2q15_ty], [llvm_v4i8_ty], [IntrNoMem]>;
-def int_mips_preceu_ph_qbl: GCCBuiltin<"__builtin_mips_preceu_ph_qbl">,
+def int_mips_preceu_ph_qbl: ClangBuiltin<"__builtin_mips_preceu_ph_qbl">,
Intrinsic<[mips_v2q15_ty], [llvm_v4i8_ty], [IntrNoMem]>;
-def int_mips_preceu_ph_qbr: GCCBuiltin<"__builtin_mips_preceu_ph_qbr">,
+def int_mips_preceu_ph_qbr: ClangBuiltin<"__builtin_mips_preceu_ph_qbr">,
Intrinsic<[mips_v2q15_ty], [llvm_v4i8_ty], [IntrNoMem]>;
-def int_mips_preceu_ph_qbla: GCCBuiltin<"__builtin_mips_preceu_ph_qbla">,
+def int_mips_preceu_ph_qbla: ClangBuiltin<"__builtin_mips_preceu_ph_qbla">,
Intrinsic<[mips_v2q15_ty], [llvm_v4i8_ty], [IntrNoMem]>;
-def int_mips_preceu_ph_qbra: GCCBuiltin<"__builtin_mips_preceu_ph_qbra">,
+def int_mips_preceu_ph_qbra: ClangBuiltin<"__builtin_mips_preceu_ph_qbra">,
Intrinsic<[mips_v2q15_ty], [llvm_v4i8_ty], [IntrNoMem]>;
//===----------------------------------------------------------------------===//
// Shift
-def int_mips_shll_qb: GCCBuiltin<"__builtin_mips_shll_qb">,
+def int_mips_shll_qb: ClangBuiltin<"__builtin_mips_shll_qb">,
Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_i32_ty], []>;
-def int_mips_shrl_qb: GCCBuiltin<"__builtin_mips_shrl_qb">,
+def int_mips_shrl_qb: ClangBuiltin<"__builtin_mips_shrl_qb">,
Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_mips_shll_ph: GCCBuiltin<"__builtin_mips_shll_ph">,
+def int_mips_shll_ph: ClangBuiltin<"__builtin_mips_shll_ph">,
Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, llvm_i32_ty], []>;
-def int_mips_shll_s_ph: GCCBuiltin<"__builtin_mips_shll_s_ph">,
+def int_mips_shll_s_ph: ClangBuiltin<"__builtin_mips_shll_s_ph">,
Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, llvm_i32_ty], []>;
-def int_mips_shra_ph: GCCBuiltin<"__builtin_mips_shra_ph">,
+def int_mips_shra_ph: ClangBuiltin<"__builtin_mips_shra_ph">,
Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_mips_shra_r_ph: GCCBuiltin<"__builtin_mips_shra_r_ph">,
+def int_mips_shra_r_ph: ClangBuiltin<"__builtin_mips_shra_r_ph">,
Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_mips_shll_s_w: GCCBuiltin<"__builtin_mips_shll_s_w">,
+def int_mips_shll_s_w: ClangBuiltin<"__builtin_mips_shll_s_w">,
Intrinsic<[mips_q31_ty], [mips_q31_ty, llvm_i32_ty], []>;
-def int_mips_shra_r_w: GCCBuiltin<"__builtin_mips_shra_r_w">,
+def int_mips_shra_r_w: ClangBuiltin<"__builtin_mips_shra_r_w">,
Intrinsic<[mips_q31_ty], [mips_q31_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_mips_shilo: GCCBuiltin<"__builtin_mips_shilo">,
+def int_mips_shilo: ClangBuiltin<"__builtin_mips_shilo">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>;
//===----------------------------------------------------------------------===//
// Multiplication
-def int_mips_muleu_s_ph_qbl: GCCBuiltin<"__builtin_mips_muleu_s_ph_qbl">,
+def int_mips_muleu_s_ph_qbl: ClangBuiltin<"__builtin_mips_muleu_s_ph_qbl">,
Intrinsic<[mips_v2q15_ty], [llvm_v4i8_ty, mips_v2q15_ty], []>;
-def int_mips_muleu_s_ph_qbr: GCCBuiltin<"__builtin_mips_muleu_s_ph_qbr">,
+def int_mips_muleu_s_ph_qbr: ClangBuiltin<"__builtin_mips_muleu_s_ph_qbr">,
Intrinsic<[mips_v2q15_ty], [llvm_v4i8_ty, mips_v2q15_ty], []>;
-def int_mips_mulq_rs_ph: GCCBuiltin<"__builtin_mips_mulq_rs_ph">,
+def int_mips_mulq_rs_ph: ClangBuiltin<"__builtin_mips_mulq_rs_ph">,
Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], [Commutative]>;
-def int_mips_muleq_s_w_phl: GCCBuiltin<"__builtin_mips_muleq_s_w_phl">,
+def int_mips_muleq_s_w_phl: ClangBuiltin<"__builtin_mips_muleq_s_w_phl">,
Intrinsic<[mips_q31_ty], [mips_v2q15_ty, mips_v2q15_ty], [Commutative]>;
-def int_mips_muleq_s_w_phr: GCCBuiltin<"__builtin_mips_muleq_s_w_phr">,
+def int_mips_muleq_s_w_phr: ClangBuiltin<"__builtin_mips_muleq_s_w_phr">,
Intrinsic<[mips_q31_ty], [mips_v2q15_ty, mips_v2q15_ty], [Commutative]>;
-def int_mips_mulsaq_s_w_ph: GCCBuiltin<"__builtin_mips_mulsaq_s_w_ph">,
+def int_mips_mulsaq_s_w_ph: ClangBuiltin<"__builtin_mips_mulsaq_s_w_ph">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty], []>;
-def int_mips_maq_s_w_phl: GCCBuiltin<"__builtin_mips_maq_s_w_phl">,
+def int_mips_maq_s_w_phl: ClangBuiltin<"__builtin_mips_maq_s_w_phl">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty], []>;
-def int_mips_maq_s_w_phr: GCCBuiltin<"__builtin_mips_maq_s_w_phr">,
+def int_mips_maq_s_w_phr: ClangBuiltin<"__builtin_mips_maq_s_w_phr">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty], []>;
-def int_mips_maq_sa_w_phl: GCCBuiltin<"__builtin_mips_maq_sa_w_phl">,
+def int_mips_maq_sa_w_phl: ClangBuiltin<"__builtin_mips_maq_sa_w_phl">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty], []>;
-def int_mips_maq_sa_w_phr: GCCBuiltin<"__builtin_mips_maq_sa_w_phr">,
+def int_mips_maq_sa_w_phr: ClangBuiltin<"__builtin_mips_maq_sa_w_phr">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty], []>;
-def int_mips_mult: GCCBuiltin<"__builtin_mips_mult">,
+def int_mips_mult: ClangBuiltin<"__builtin_mips_mult">,
Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, Commutative]>;
-def int_mips_multu: GCCBuiltin<"__builtin_mips_multu">,
+def int_mips_multu: ClangBuiltin<"__builtin_mips_multu">,
Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, Commutative]>;
//===----------------------------------------------------------------------===//
// Dot product with accumulate/subtract
-def int_mips_dpau_h_qbl: GCCBuiltin<"__builtin_mips_dpau_h_qbl">,
+def int_mips_dpau_h_qbl: ClangBuiltin<"__builtin_mips_dpau_h_qbl">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_v4i8_ty, llvm_v4i8_ty],
[IntrNoMem]>;
-def int_mips_dpau_h_qbr: GCCBuiltin<"__builtin_mips_dpau_h_qbr">,
+def int_mips_dpau_h_qbr: ClangBuiltin<"__builtin_mips_dpau_h_qbr">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_v4i8_ty, llvm_v4i8_ty],
[IntrNoMem]>;
-def int_mips_dpsu_h_qbl: GCCBuiltin<"__builtin_mips_dpsu_h_qbl">,
+def int_mips_dpsu_h_qbl: ClangBuiltin<"__builtin_mips_dpsu_h_qbl">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_v4i8_ty, llvm_v4i8_ty],
[IntrNoMem]>;
-def int_mips_dpsu_h_qbr: GCCBuiltin<"__builtin_mips_dpsu_h_qbr">,
+def int_mips_dpsu_h_qbr: ClangBuiltin<"__builtin_mips_dpsu_h_qbr">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_v4i8_ty, llvm_v4i8_ty],
[IntrNoMem]>;
-def int_mips_dpaq_s_w_ph: GCCBuiltin<"__builtin_mips_dpaq_s_w_ph">,
+def int_mips_dpaq_s_w_ph: ClangBuiltin<"__builtin_mips_dpaq_s_w_ph">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty], []>;
-def int_mips_dpsq_s_w_ph: GCCBuiltin<"__builtin_mips_dpsq_s_w_ph">,
+def int_mips_dpsq_s_w_ph: ClangBuiltin<"__builtin_mips_dpsq_s_w_ph">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty], []>;
-def int_mips_dpaq_sa_l_w: GCCBuiltin<"__builtin_mips_dpaq_sa_l_w">,
+def int_mips_dpaq_sa_l_w: ClangBuiltin<"__builtin_mips_dpaq_sa_l_w">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_q31_ty, mips_q31_ty], []>;
-def int_mips_dpsq_sa_l_w: GCCBuiltin<"__builtin_mips_dpsq_sa_l_w">,
+def int_mips_dpsq_sa_l_w: ClangBuiltin<"__builtin_mips_dpsq_sa_l_w">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_q31_ty, mips_q31_ty], []>;
//===----------------------------------------------------------------------===//
// Comparison
-def int_mips_cmpu_eq_qb: GCCBuiltin<"__builtin_mips_cmpu_eq_qb">,
+def int_mips_cmpu_eq_qb: ClangBuiltin<"__builtin_mips_cmpu_eq_qb">,
Intrinsic<[], [llvm_v4i8_ty, llvm_v4i8_ty], [Commutative]>;
-def int_mips_cmpu_lt_qb: GCCBuiltin<"__builtin_mips_cmpu_lt_qb">,
+def int_mips_cmpu_lt_qb: ClangBuiltin<"__builtin_mips_cmpu_lt_qb">,
Intrinsic<[], [llvm_v4i8_ty, llvm_v4i8_ty], []>;
-def int_mips_cmpu_le_qb: GCCBuiltin<"__builtin_mips_cmpu_le_qb">,
+def int_mips_cmpu_le_qb: ClangBuiltin<"__builtin_mips_cmpu_le_qb">,
Intrinsic<[], [llvm_v4i8_ty, llvm_v4i8_ty], []>;
-def int_mips_cmpgu_eq_qb: GCCBuiltin<"__builtin_mips_cmpgu_eq_qb">,
+def int_mips_cmpgu_eq_qb: ClangBuiltin<"__builtin_mips_cmpgu_eq_qb">,
Intrinsic<[llvm_i32_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [Commutative]>;
-def int_mips_cmpgu_lt_qb: GCCBuiltin<"__builtin_mips_cmpgu_lt_qb">,
+def int_mips_cmpgu_lt_qb: ClangBuiltin<"__builtin_mips_cmpgu_lt_qb">,
Intrinsic<[llvm_i32_ty], [llvm_v4i8_ty, llvm_v4i8_ty], []>;
-def int_mips_cmpgu_le_qb: GCCBuiltin<"__builtin_mips_cmpgu_le_qb">,
+def int_mips_cmpgu_le_qb: ClangBuiltin<"__builtin_mips_cmpgu_le_qb">,
Intrinsic<[llvm_i32_ty], [llvm_v4i8_ty, llvm_v4i8_ty], []>;
-def int_mips_cmp_eq_ph: GCCBuiltin<"__builtin_mips_cmp_eq_ph">,
+def int_mips_cmp_eq_ph: ClangBuiltin<"__builtin_mips_cmp_eq_ph">,
Intrinsic<[], [mips_v2q15_ty, mips_v2q15_ty], [Commutative]>;
-def int_mips_cmp_lt_ph: GCCBuiltin<"__builtin_mips_cmp_lt_ph">,
+def int_mips_cmp_lt_ph: ClangBuiltin<"__builtin_mips_cmp_lt_ph">,
Intrinsic<[], [mips_v2q15_ty, mips_v2q15_ty], []>;
-def int_mips_cmp_le_ph: GCCBuiltin<"__builtin_mips_cmp_le_ph">,
+def int_mips_cmp_le_ph: ClangBuiltin<"__builtin_mips_cmp_le_ph">,
Intrinsic<[], [mips_v2q15_ty, mips_v2q15_ty], []>;
//===----------------------------------------------------------------------===//
// Extracting
-def int_mips_extr_s_h: GCCBuiltin<"__builtin_mips_extr_s_h">,
+def int_mips_extr_s_h: ClangBuiltin<"__builtin_mips_extr_s_h">,
Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty], []>;
-def int_mips_extr_w: GCCBuiltin<"__builtin_mips_extr_w">,
+def int_mips_extr_w: ClangBuiltin<"__builtin_mips_extr_w">,
Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty], []>;
-def int_mips_extr_rs_w: GCCBuiltin<"__builtin_mips_extr_rs_w">,
+def int_mips_extr_rs_w: ClangBuiltin<"__builtin_mips_extr_rs_w">,
Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty], []>;
-def int_mips_extr_r_w: GCCBuiltin<"__builtin_mips_extr_r_w">,
+def int_mips_extr_r_w: ClangBuiltin<"__builtin_mips_extr_r_w">,
Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty], []>;
-def int_mips_extp: GCCBuiltin<"__builtin_mips_extp">,
+def int_mips_extp: ClangBuiltin<"__builtin_mips_extp">,
Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty], []>;
-def int_mips_extpdp: GCCBuiltin<"__builtin_mips_extpdp">,
+def int_mips_extpdp: ClangBuiltin<"__builtin_mips_extpdp">,
Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty], []>;
//===----------------------------------------------------------------------===//
// Misc
-def int_mips_wrdsp: GCCBuiltin<"__builtin_mips_wrdsp">,
+def int_mips_wrdsp: ClangBuiltin<"__builtin_mips_wrdsp">,
Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<1>>]>;
-def int_mips_rddsp: GCCBuiltin<"__builtin_mips_rddsp">,
+def int_mips_rddsp: ClangBuiltin<"__builtin_mips_rddsp">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrReadMem, ImmArg<ArgIndex<0>>]>;
-def int_mips_insv: GCCBuiltin<"__builtin_mips_insv">,
+def int_mips_insv: ClangBuiltin<"__builtin_mips_insv">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>;
-def int_mips_bitrev: GCCBuiltin<"__builtin_mips_bitrev">,
+def int_mips_bitrev: ClangBuiltin<"__builtin_mips_bitrev">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
-def int_mips_packrl_ph: GCCBuiltin<"__builtin_mips_packrl_ph">,
+def int_mips_packrl_ph: ClangBuiltin<"__builtin_mips_packrl_ph">,
Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], [IntrNoMem]>;
-def int_mips_repl_qb: GCCBuiltin<"__builtin_mips_repl_qb">,
+def int_mips_repl_qb: ClangBuiltin<"__builtin_mips_repl_qb">,
Intrinsic<[llvm_v4i8_ty], [llvm_i32_ty], [IntrNoMem]>;
-def int_mips_repl_ph: GCCBuiltin<"__builtin_mips_repl_ph">,
+def int_mips_repl_ph: ClangBuiltin<"__builtin_mips_repl_ph">,
Intrinsic<[mips_v2q15_ty], [llvm_i32_ty], [IntrNoMem]>;
-def int_mips_pick_qb: GCCBuiltin<"__builtin_mips_pick_qb">,
+def int_mips_pick_qb: ClangBuiltin<"__builtin_mips_pick_qb">,
Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [IntrReadMem]>;
-def int_mips_pick_ph: GCCBuiltin<"__builtin_mips_pick_ph">,
+def int_mips_pick_ph: ClangBuiltin<"__builtin_mips_pick_ph">,
Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], [IntrReadMem]>;
-def int_mips_mthlip: GCCBuiltin<"__builtin_mips_mthlip">,
+def int_mips_mthlip: ClangBuiltin<"__builtin_mips_mthlip">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty], []>;
-def int_mips_bposge32: GCCBuiltin<"__builtin_mips_bposge32">,
+def int_mips_bposge32: ClangBuiltin<"__builtin_mips_bposge32">,
Intrinsic<[llvm_i32_ty], [], [IntrReadMem]>;
-def int_mips_lbux: GCCBuiltin<"__builtin_mips_lbux">,
+def int_mips_lbux: ClangBuiltin<"__builtin_mips_lbux">,
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;
-def int_mips_lhx: GCCBuiltin<"__builtin_mips_lhx">,
+def int_mips_lhx: ClangBuiltin<"__builtin_mips_lhx">,
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;
-def int_mips_lwx: GCCBuiltin<"__builtin_mips_lwx">,
+def int_mips_lwx: ClangBuiltin<"__builtin_mips_lwx">,
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;
//===----------------------------------------------------------------------===//
// MIPS DSP Rev 2
-def int_mips_absq_s_qb: GCCBuiltin<"__builtin_mips_absq_s_qb">,
+def int_mips_absq_s_qb: ClangBuiltin<"__builtin_mips_absq_s_qb">,
Intrinsic<[mips_v4q7_ty], [mips_v4q7_ty], []>;
-def int_mips_addqh_ph: GCCBuiltin<"__builtin_mips_addqh_ph">,
+def int_mips_addqh_ph: ClangBuiltin<"__builtin_mips_addqh_ph">,
Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty],
[IntrNoMem, Commutative]>;
-def int_mips_addqh_r_ph: GCCBuiltin<"__builtin_mips_addqh_r_ph">,
+def int_mips_addqh_r_ph: ClangBuiltin<"__builtin_mips_addqh_r_ph">,
Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty],
[IntrNoMem, Commutative]>;
-def int_mips_addqh_w: GCCBuiltin<"__builtin_mips_addqh_w">,
+def int_mips_addqh_w: ClangBuiltin<"__builtin_mips_addqh_w">,
Intrinsic<[mips_q31_ty], [mips_q31_ty, mips_q31_ty],
[IntrNoMem, Commutative]>;
-def int_mips_addqh_r_w: GCCBuiltin<"__builtin_mips_addqh_r_w">,
+def int_mips_addqh_r_w: ClangBuiltin<"__builtin_mips_addqh_r_w">,
Intrinsic<[mips_q31_ty], [mips_q31_ty, mips_q31_ty],
[IntrNoMem, Commutative]>;
-def int_mips_addu_ph: GCCBuiltin<"__builtin_mips_addu_ph">,
+def int_mips_addu_ph: ClangBuiltin<"__builtin_mips_addu_ph">,
Intrinsic<[llvm_v2i16_ty], [llvm_v2i16_ty, llvm_v2i16_ty], [Commutative]>;
-def int_mips_addu_s_ph: GCCBuiltin<"__builtin_mips_addu_s_ph">,
+def int_mips_addu_s_ph: ClangBuiltin<"__builtin_mips_addu_s_ph">,
Intrinsic<[llvm_v2i16_ty], [llvm_v2i16_ty, llvm_v2i16_ty], [Commutative]>;
-def int_mips_adduh_qb: GCCBuiltin<"__builtin_mips_adduh_qb">,
+def int_mips_adduh_qb: ClangBuiltin<"__builtin_mips_adduh_qb">,
Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty],
[IntrNoMem, Commutative]>;
-def int_mips_adduh_r_qb: GCCBuiltin<"__builtin_mips_adduh_r_qb">,
+def int_mips_adduh_r_qb: ClangBuiltin<"__builtin_mips_adduh_r_qb">,
Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty],
[IntrNoMem, Commutative]>;
-def int_mips_append: GCCBuiltin<"__builtin_mips_append">,
+def int_mips_append: ClangBuiltin<"__builtin_mips_append">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
-def int_mips_balign: GCCBuiltin<"__builtin_mips_balign">,
+def int_mips_balign: ClangBuiltin<"__builtin_mips_balign">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
-def int_mips_cmpgdu_eq_qb: GCCBuiltin<"__builtin_mips_cmpgdu_eq_qb">,
+def int_mips_cmpgdu_eq_qb: ClangBuiltin<"__builtin_mips_cmpgdu_eq_qb">,
Intrinsic<[llvm_i32_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [Commutative]>;
-def int_mips_cmpgdu_lt_qb: GCCBuiltin<"__builtin_mips_cmpgdu_lt_qb">,
+def int_mips_cmpgdu_lt_qb: ClangBuiltin<"__builtin_mips_cmpgdu_lt_qb">,
Intrinsic<[llvm_i32_ty], [llvm_v4i8_ty, llvm_v4i8_ty], []>;
-def int_mips_cmpgdu_le_qb: GCCBuiltin<"__builtin_mips_cmpgdu_le_qb">,
+def int_mips_cmpgdu_le_qb: ClangBuiltin<"__builtin_mips_cmpgdu_le_qb">,
Intrinsic<[llvm_i32_ty], [llvm_v4i8_ty, llvm_v4i8_ty], []>;
-def int_mips_dpa_w_ph: GCCBuiltin<"__builtin_mips_dpa_w_ph">,
+def int_mips_dpa_w_ph: ClangBuiltin<"__builtin_mips_dpa_w_ph">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_v2i16_ty, llvm_v2i16_ty],
[IntrNoMem]>;
-def int_mips_dps_w_ph: GCCBuiltin<"__builtin_mips_dps_w_ph">,
+def int_mips_dps_w_ph: ClangBuiltin<"__builtin_mips_dps_w_ph">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_v2i16_ty, llvm_v2i16_ty],
[IntrNoMem]>;
-def int_mips_dpaqx_s_w_ph: GCCBuiltin<"__builtin_mips_dpaqx_s_w_ph">,
+def int_mips_dpaqx_s_w_ph: ClangBuiltin<"__builtin_mips_dpaqx_s_w_ph">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty], []>;
-def int_mips_dpaqx_sa_w_ph: GCCBuiltin<"__builtin_mips_dpaqx_sa_w_ph">,
+def int_mips_dpaqx_sa_w_ph: ClangBuiltin<"__builtin_mips_dpaqx_sa_w_ph">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty], []>;
-def int_mips_dpax_w_ph: GCCBuiltin<"__builtin_mips_dpax_w_ph">,
+def int_mips_dpax_w_ph: ClangBuiltin<"__builtin_mips_dpax_w_ph">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_v2i16_ty, llvm_v2i16_ty],
[IntrNoMem]>;
-def int_mips_dpsx_w_ph: GCCBuiltin<"__builtin_mips_dpsx_w_ph">,
+def int_mips_dpsx_w_ph: ClangBuiltin<"__builtin_mips_dpsx_w_ph">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_v2i16_ty, llvm_v2i16_ty],
[IntrNoMem]>;
-def int_mips_dpsqx_s_w_ph: GCCBuiltin<"__builtin_mips_dpsqx_s_w_ph">,
+def int_mips_dpsqx_s_w_ph: ClangBuiltin<"__builtin_mips_dpsqx_s_w_ph">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty], []>;
-def int_mips_dpsqx_sa_w_ph: GCCBuiltin<"__builtin_mips_dpsqx_sa_w_ph">,
+def int_mips_dpsqx_sa_w_ph: ClangBuiltin<"__builtin_mips_dpsqx_sa_w_ph">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty], []>;
-def int_mips_mul_ph: GCCBuiltin<"__builtin_mips_mul_ph">,
+def int_mips_mul_ph: ClangBuiltin<"__builtin_mips_mul_ph">,
Intrinsic<[llvm_v2i16_ty], [llvm_v2i16_ty, llvm_v2i16_ty], [Commutative]>;
-def int_mips_mul_s_ph: GCCBuiltin<"__builtin_mips_mul_s_ph">,
+def int_mips_mul_s_ph: ClangBuiltin<"__builtin_mips_mul_s_ph">,
Intrinsic<[llvm_v2i16_ty], [llvm_v2i16_ty, llvm_v2i16_ty], [Commutative]>;
-def int_mips_mulq_rs_w: GCCBuiltin<"__builtin_mips_mulq_rs_w">,
+def int_mips_mulq_rs_w: ClangBuiltin<"__builtin_mips_mulq_rs_w">,
Intrinsic<[mips_q31_ty], [mips_q31_ty, mips_q31_ty], [Commutative]>;
-def int_mips_mulq_s_ph: GCCBuiltin<"__builtin_mips_mulq_s_ph">,
+def int_mips_mulq_s_ph: ClangBuiltin<"__builtin_mips_mulq_s_ph">,
Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], [Commutative]>;
-def int_mips_mulq_s_w: GCCBuiltin<"__builtin_mips_mulq_s_w">,
+def int_mips_mulq_s_w: ClangBuiltin<"__builtin_mips_mulq_s_w">,
Intrinsic<[mips_q31_ty], [mips_q31_ty, mips_q31_ty], [Commutative]>;
-def int_mips_mulsa_w_ph: GCCBuiltin<"__builtin_mips_mulsa_w_ph">,
+def int_mips_mulsa_w_ph: ClangBuiltin<"__builtin_mips_mulsa_w_ph">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_v2i16_ty, llvm_v2i16_ty],
[IntrNoMem]>;
-def int_mips_precr_qb_ph: GCCBuiltin<"__builtin_mips_precr_qb_ph">,
+def int_mips_precr_qb_ph: ClangBuiltin<"__builtin_mips_precr_qb_ph">,
Intrinsic<[llvm_v4i8_ty], [llvm_v2i16_ty, llvm_v2i16_ty], []>;
-def int_mips_precr_sra_ph_w: GCCBuiltin<"__builtin_mips_precr_sra_ph_w">,
+def int_mips_precr_sra_ph_w: ClangBuiltin<"__builtin_mips_precr_sra_ph_w">,
Intrinsic<[llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
-def int_mips_precr_sra_r_ph_w: GCCBuiltin<"__builtin_mips_precr_sra_r_ph_w">,
+def int_mips_precr_sra_r_ph_w: ClangBuiltin<"__builtin_mips_precr_sra_r_ph_w">,
Intrinsic<[llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
-def int_mips_prepend: GCCBuiltin<"__builtin_mips_prepend">,
+def int_mips_prepend: ClangBuiltin<"__builtin_mips_prepend">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
-def int_mips_shra_qb: GCCBuiltin<"__builtin_mips_shra_qb">,
+def int_mips_shra_qb: ClangBuiltin<"__builtin_mips_shra_qb">,
Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_mips_shra_r_qb: GCCBuiltin<"__builtin_mips_shra_r_qb">,
+def int_mips_shra_r_qb: ClangBuiltin<"__builtin_mips_shra_r_qb">,
Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_mips_shrl_ph: GCCBuiltin<"__builtin_mips_shrl_ph">,
+def int_mips_shrl_ph: ClangBuiltin<"__builtin_mips_shrl_ph">,
Intrinsic<[llvm_v2i16_ty], [llvm_v2i16_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_mips_subqh_ph: GCCBuiltin<"__builtin_mips_subqh_ph">,
+def int_mips_subqh_ph: ClangBuiltin<"__builtin_mips_subqh_ph">,
Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], [IntrNoMem]>;
-def int_mips_subqh_r_ph: GCCBuiltin<"__builtin_mips_subqh_r_ph">,
+def int_mips_subqh_r_ph: ClangBuiltin<"__builtin_mips_subqh_r_ph">,
Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], [IntrNoMem]>;
-def int_mips_subqh_w: GCCBuiltin<"__builtin_mips_subqh_w">,
+def int_mips_subqh_w: ClangBuiltin<"__builtin_mips_subqh_w">,
Intrinsic<[mips_q31_ty], [mips_q31_ty, mips_q31_ty], [IntrNoMem]>;
-def int_mips_subqh_r_w: GCCBuiltin<"__builtin_mips_subqh_r_w">,
+def int_mips_subqh_r_w: ClangBuiltin<"__builtin_mips_subqh_r_w">,
Intrinsic<[mips_q31_ty], [mips_q31_ty, mips_q31_ty], [IntrNoMem]>;
-def int_mips_subu_ph: GCCBuiltin<"__builtin_mips_subu_ph">,
+def int_mips_subu_ph: ClangBuiltin<"__builtin_mips_subu_ph">,
Intrinsic<[llvm_v2i16_ty], [llvm_v2i16_ty, llvm_v2i16_ty], []>;
-def int_mips_subu_s_ph: GCCBuiltin<"__builtin_mips_subu_s_ph">,
+def int_mips_subu_s_ph: ClangBuiltin<"__builtin_mips_subu_s_ph">,
Intrinsic<[llvm_v2i16_ty], [llvm_v2i16_ty, llvm_v2i16_ty], []>;
-def int_mips_subuh_qb: GCCBuiltin<"__builtin_mips_subuh_qb">,
+def int_mips_subuh_qb: ClangBuiltin<"__builtin_mips_subuh_qb">,
Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [IntrNoMem]>;
-def int_mips_subuh_r_qb: GCCBuiltin<"__builtin_mips_subuh_r_qb">,
+def int_mips_subuh_r_qb: ClangBuiltin<"__builtin_mips_subuh_r_qb">,
Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [IntrNoMem]>;
//===----------------------------------------------------------------------===//
@@ -396,1389 +396,1389 @@ def int_mips_subuh_r_qb: GCCBuiltin<"__builtin_mips_subuh_r_qb">,
//===----------------------------------------------------------------------===//
// Addition/subtraction
-def int_mips_add_a_b : GCCBuiltin<"__builtin_msa_add_a_b">,
+def int_mips_add_a_b : ClangBuiltin<"__builtin_msa_add_a_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[Commutative, IntrNoMem]>;
-def int_mips_add_a_h : GCCBuiltin<"__builtin_msa_add_a_h">,
+def int_mips_add_a_h : ClangBuiltin<"__builtin_msa_add_a_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[Commutative, IntrNoMem]>;
-def int_mips_add_a_w : GCCBuiltin<"__builtin_msa_add_a_w">,
+def int_mips_add_a_w : ClangBuiltin<"__builtin_msa_add_a_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[Commutative, IntrNoMem]>;
-def int_mips_add_a_d : GCCBuiltin<"__builtin_msa_add_a_d">,
+def int_mips_add_a_d : ClangBuiltin<"__builtin_msa_add_a_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[Commutative, IntrNoMem]>;
-def int_mips_adds_a_b : GCCBuiltin<"__builtin_msa_adds_a_b">,
+def int_mips_adds_a_b : ClangBuiltin<"__builtin_msa_adds_a_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[Commutative, IntrNoMem]>;
-def int_mips_adds_a_h : GCCBuiltin<"__builtin_msa_adds_a_h">,
+def int_mips_adds_a_h : ClangBuiltin<"__builtin_msa_adds_a_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[Commutative, IntrNoMem]>;
-def int_mips_adds_a_w : GCCBuiltin<"__builtin_msa_adds_a_w">,
+def int_mips_adds_a_w : ClangBuiltin<"__builtin_msa_adds_a_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[Commutative, IntrNoMem]>;
-def int_mips_adds_a_d : GCCBuiltin<"__builtin_msa_adds_a_d">,
+def int_mips_adds_a_d : ClangBuiltin<"__builtin_msa_adds_a_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[Commutative, IntrNoMem]>;
-def int_mips_adds_s_b : GCCBuiltin<"__builtin_msa_adds_s_b">,
+def int_mips_adds_s_b : ClangBuiltin<"__builtin_msa_adds_s_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[Commutative, IntrNoMem]>;
-def int_mips_adds_s_h : GCCBuiltin<"__builtin_msa_adds_s_h">,
+def int_mips_adds_s_h : ClangBuiltin<"__builtin_msa_adds_s_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[Commutative, IntrNoMem]>;
-def int_mips_adds_s_w : GCCBuiltin<"__builtin_msa_adds_s_w">,
+def int_mips_adds_s_w : ClangBuiltin<"__builtin_msa_adds_s_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[Commutative, IntrNoMem]>;
-def int_mips_adds_s_d : GCCBuiltin<"__builtin_msa_adds_s_d">,
+def int_mips_adds_s_d : ClangBuiltin<"__builtin_msa_adds_s_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[Commutative, IntrNoMem]>;
-def int_mips_adds_u_b : GCCBuiltin<"__builtin_msa_adds_u_b">,
+def int_mips_adds_u_b : ClangBuiltin<"__builtin_msa_adds_u_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[Commutative, IntrNoMem]>;
-def int_mips_adds_u_h : GCCBuiltin<"__builtin_msa_adds_u_h">,
+def int_mips_adds_u_h : ClangBuiltin<"__builtin_msa_adds_u_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[Commutative, IntrNoMem]>;
-def int_mips_adds_u_w : GCCBuiltin<"__builtin_msa_adds_u_w">,
+def int_mips_adds_u_w : ClangBuiltin<"__builtin_msa_adds_u_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[Commutative, IntrNoMem]>;
-def int_mips_adds_u_d : GCCBuiltin<"__builtin_msa_adds_u_d">,
+def int_mips_adds_u_d : ClangBuiltin<"__builtin_msa_adds_u_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[Commutative, IntrNoMem]>;
-def int_mips_addv_b : GCCBuiltin<"__builtin_msa_addv_b">,
+def int_mips_addv_b : ClangBuiltin<"__builtin_msa_addv_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[Commutative, IntrNoMem]>;
-def int_mips_addv_h : GCCBuiltin<"__builtin_msa_addv_h">,
+def int_mips_addv_h : ClangBuiltin<"__builtin_msa_addv_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[Commutative, IntrNoMem]>;
-def int_mips_addv_w : GCCBuiltin<"__builtin_msa_addv_w">,
+def int_mips_addv_w : ClangBuiltin<"__builtin_msa_addv_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[Commutative, IntrNoMem]>;
-def int_mips_addv_d : GCCBuiltin<"__builtin_msa_addv_d">,
+def int_mips_addv_d : ClangBuiltin<"__builtin_msa_addv_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[Commutative, IntrNoMem]>;
-def int_mips_addvi_b : GCCBuiltin<"__builtin_msa_addvi_b">,
+def int_mips_addvi_b : ClangBuiltin<"__builtin_msa_addvi_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty],
[Commutative, IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_addvi_h : GCCBuiltin<"__builtin_msa_addvi_h">,
+def int_mips_addvi_h : ClangBuiltin<"__builtin_msa_addvi_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty],
[Commutative, IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_addvi_w : GCCBuiltin<"__builtin_msa_addvi_w">,
+def int_mips_addvi_w : ClangBuiltin<"__builtin_msa_addvi_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty],
[Commutative, IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_addvi_d : GCCBuiltin<"__builtin_msa_addvi_d">,
+def int_mips_addvi_d : ClangBuiltin<"__builtin_msa_addvi_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty],
[Commutative, IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_and_v : GCCBuiltin<"__builtin_msa_and_v">,
+def int_mips_and_v : ClangBuiltin<"__builtin_msa_and_v">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_andi_b : GCCBuiltin<"__builtin_msa_andi_b">,
+def int_mips_andi_b : ClangBuiltin<"__builtin_msa_andi_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_asub_s_b : GCCBuiltin<"__builtin_msa_asub_s_b">,
+def int_mips_asub_s_b : ClangBuiltin<"__builtin_msa_asub_s_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_asub_s_h : GCCBuiltin<"__builtin_msa_asub_s_h">,
+def int_mips_asub_s_h : ClangBuiltin<"__builtin_msa_asub_s_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_asub_s_w : GCCBuiltin<"__builtin_msa_asub_s_w">,
+def int_mips_asub_s_w : ClangBuiltin<"__builtin_msa_asub_s_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_asub_s_d : GCCBuiltin<"__builtin_msa_asub_s_d">,
+def int_mips_asub_s_d : ClangBuiltin<"__builtin_msa_asub_s_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_asub_u_b : GCCBuiltin<"__builtin_msa_asub_u_b">,
+def int_mips_asub_u_b : ClangBuiltin<"__builtin_msa_asub_u_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_asub_u_h : GCCBuiltin<"__builtin_msa_asub_u_h">,
+def int_mips_asub_u_h : ClangBuiltin<"__builtin_msa_asub_u_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_asub_u_w : GCCBuiltin<"__builtin_msa_asub_u_w">,
+def int_mips_asub_u_w : ClangBuiltin<"__builtin_msa_asub_u_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_asub_u_d : GCCBuiltin<"__builtin_msa_asub_u_d">,
+def int_mips_asub_u_d : ClangBuiltin<"__builtin_msa_asub_u_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_ave_s_b : GCCBuiltin<"__builtin_msa_ave_s_b">,
+def int_mips_ave_s_b : ClangBuiltin<"__builtin_msa_ave_s_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[Commutative, IntrNoMem]>;
-def int_mips_ave_s_h : GCCBuiltin<"__builtin_msa_ave_s_h">,
+def int_mips_ave_s_h : ClangBuiltin<"__builtin_msa_ave_s_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[Commutative, IntrNoMem]>;
-def int_mips_ave_s_w : GCCBuiltin<"__builtin_msa_ave_s_w">,
+def int_mips_ave_s_w : ClangBuiltin<"__builtin_msa_ave_s_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[Commutative, IntrNoMem]>;
-def int_mips_ave_s_d : GCCBuiltin<"__builtin_msa_ave_s_d">,
+def int_mips_ave_s_d : ClangBuiltin<"__builtin_msa_ave_s_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[Commutative, IntrNoMem]>;
-def int_mips_ave_u_b : GCCBuiltin<"__builtin_msa_ave_u_b">,
+def int_mips_ave_u_b : ClangBuiltin<"__builtin_msa_ave_u_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[Commutative, IntrNoMem]>;
-def int_mips_ave_u_h : GCCBuiltin<"__builtin_msa_ave_u_h">,
+def int_mips_ave_u_h : ClangBuiltin<"__builtin_msa_ave_u_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[Commutative, IntrNoMem]>;
-def int_mips_ave_u_w : GCCBuiltin<"__builtin_msa_ave_u_w">,
+def int_mips_ave_u_w : ClangBuiltin<"__builtin_msa_ave_u_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[Commutative, IntrNoMem]>;
-def int_mips_ave_u_d : GCCBuiltin<"__builtin_msa_ave_u_d">,
+def int_mips_ave_u_d : ClangBuiltin<"__builtin_msa_ave_u_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[Commutative, IntrNoMem]>;
-def int_mips_aver_s_b : GCCBuiltin<"__builtin_msa_aver_s_b">,
+def int_mips_aver_s_b : ClangBuiltin<"__builtin_msa_aver_s_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[Commutative, IntrNoMem]>;
-def int_mips_aver_s_h : GCCBuiltin<"__builtin_msa_aver_s_h">,
+def int_mips_aver_s_h : ClangBuiltin<"__builtin_msa_aver_s_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[Commutative, IntrNoMem]>;
-def int_mips_aver_s_w : GCCBuiltin<"__builtin_msa_aver_s_w">,
+def int_mips_aver_s_w : ClangBuiltin<"__builtin_msa_aver_s_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[Commutative, IntrNoMem]>;
-def int_mips_aver_s_d : GCCBuiltin<"__builtin_msa_aver_s_d">,
+def int_mips_aver_s_d : ClangBuiltin<"__builtin_msa_aver_s_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[Commutative, IntrNoMem]>;
-def int_mips_aver_u_b : GCCBuiltin<"__builtin_msa_aver_u_b">,
+def int_mips_aver_u_b : ClangBuiltin<"__builtin_msa_aver_u_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[Commutative, IntrNoMem]>;
-def int_mips_aver_u_h : GCCBuiltin<"__builtin_msa_aver_u_h">,
+def int_mips_aver_u_h : ClangBuiltin<"__builtin_msa_aver_u_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[Commutative, IntrNoMem]>;
-def int_mips_aver_u_w : GCCBuiltin<"__builtin_msa_aver_u_w">,
+def int_mips_aver_u_w : ClangBuiltin<"__builtin_msa_aver_u_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[Commutative, IntrNoMem]>;
-def int_mips_aver_u_d : GCCBuiltin<"__builtin_msa_aver_u_d">,
+def int_mips_aver_u_d : ClangBuiltin<"__builtin_msa_aver_u_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[Commutative, IntrNoMem]>;
-def int_mips_bclr_b : GCCBuiltin<"__builtin_msa_bclr_b">,
+def int_mips_bclr_b : ClangBuiltin<"__builtin_msa_bclr_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_bclr_h : GCCBuiltin<"__builtin_msa_bclr_h">,
+def int_mips_bclr_h : ClangBuiltin<"__builtin_msa_bclr_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_bclr_w : GCCBuiltin<"__builtin_msa_bclr_w">,
+def int_mips_bclr_w : ClangBuiltin<"__builtin_msa_bclr_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_bclr_d : GCCBuiltin<"__builtin_msa_bclr_d">,
+def int_mips_bclr_d : ClangBuiltin<"__builtin_msa_bclr_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_bclri_b : GCCBuiltin<"__builtin_msa_bclri_b">,
+def int_mips_bclri_b : ClangBuiltin<"__builtin_msa_bclri_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_bclri_h : GCCBuiltin<"__builtin_msa_bclri_h">,
+def int_mips_bclri_h : ClangBuiltin<"__builtin_msa_bclri_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_bclri_w : GCCBuiltin<"__builtin_msa_bclri_w">,
+def int_mips_bclri_w : ClangBuiltin<"__builtin_msa_bclri_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_bclri_d : GCCBuiltin<"__builtin_msa_bclri_d">,
+def int_mips_bclri_d : ClangBuiltin<"__builtin_msa_bclri_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_binsl_b : GCCBuiltin<"__builtin_msa_binsl_b">,
+def int_mips_binsl_b : ClangBuiltin<"__builtin_msa_binsl_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
-def int_mips_binsl_h : GCCBuiltin<"__builtin_msa_binsl_h">,
+def int_mips_binsl_h : ClangBuiltin<"__builtin_msa_binsl_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
-def int_mips_binsl_w : GCCBuiltin<"__builtin_msa_binsl_w">,
+def int_mips_binsl_w : ClangBuiltin<"__builtin_msa_binsl_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
-def int_mips_binsl_d : GCCBuiltin<"__builtin_msa_binsl_d">,
+def int_mips_binsl_d : ClangBuiltin<"__builtin_msa_binsl_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
-def int_mips_binsli_b : GCCBuiltin<"__builtin_msa_binsli_b">,
+def int_mips_binsli_b : ClangBuiltin<"__builtin_msa_binsli_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
-def int_mips_binsli_h : GCCBuiltin<"__builtin_msa_binsli_h">,
+def int_mips_binsli_h : ClangBuiltin<"__builtin_msa_binsli_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
-def int_mips_binsli_w : GCCBuiltin<"__builtin_msa_binsli_w">,
+def int_mips_binsli_w : ClangBuiltin<"__builtin_msa_binsli_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
-def int_mips_binsli_d : GCCBuiltin<"__builtin_msa_binsli_d">,
+def int_mips_binsli_d : ClangBuiltin<"__builtin_msa_binsli_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
-def int_mips_binsr_b : GCCBuiltin<"__builtin_msa_binsr_b">,
+def int_mips_binsr_b : ClangBuiltin<"__builtin_msa_binsr_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
-def int_mips_binsr_h : GCCBuiltin<"__builtin_msa_binsr_h">,
+def int_mips_binsr_h : ClangBuiltin<"__builtin_msa_binsr_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
-def int_mips_binsr_w : GCCBuiltin<"__builtin_msa_binsr_w">,
+def int_mips_binsr_w : ClangBuiltin<"__builtin_msa_binsr_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
-def int_mips_binsr_d : GCCBuiltin<"__builtin_msa_binsr_d">,
+def int_mips_binsr_d : ClangBuiltin<"__builtin_msa_binsr_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
-def int_mips_binsri_b : GCCBuiltin<"__builtin_msa_binsri_b">,
+def int_mips_binsri_b : ClangBuiltin<"__builtin_msa_binsri_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
-def int_mips_binsri_h : GCCBuiltin<"__builtin_msa_binsri_h">,
+def int_mips_binsri_h : ClangBuiltin<"__builtin_msa_binsri_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
-def int_mips_binsri_w : GCCBuiltin<"__builtin_msa_binsri_w">,
+def int_mips_binsri_w : ClangBuiltin<"__builtin_msa_binsri_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
-def int_mips_binsri_d : GCCBuiltin<"__builtin_msa_binsri_d">,
+def int_mips_binsri_d : ClangBuiltin<"__builtin_msa_binsri_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
-def int_mips_bmnz_v : GCCBuiltin<"__builtin_msa_bmnz_v">,
+def int_mips_bmnz_v : ClangBuiltin<"__builtin_msa_bmnz_v">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
-def int_mips_bmnzi_b : GCCBuiltin<"__builtin_msa_bmnzi_b">,
+def int_mips_bmnzi_b : ClangBuiltin<"__builtin_msa_bmnzi_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
-def int_mips_bmz_v : GCCBuiltin<"__builtin_msa_bmz_v">,
+def int_mips_bmz_v : ClangBuiltin<"__builtin_msa_bmz_v">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
-def int_mips_bmzi_b : GCCBuiltin<"__builtin_msa_bmzi_b">,
+def int_mips_bmzi_b : ClangBuiltin<"__builtin_msa_bmzi_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
-def int_mips_bneg_b : GCCBuiltin<"__builtin_msa_bneg_b">,
+def int_mips_bneg_b : ClangBuiltin<"__builtin_msa_bneg_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_bneg_h : GCCBuiltin<"__builtin_msa_bneg_h">,
+def int_mips_bneg_h : ClangBuiltin<"__builtin_msa_bneg_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_bneg_w : GCCBuiltin<"__builtin_msa_bneg_w">,
+def int_mips_bneg_w : ClangBuiltin<"__builtin_msa_bneg_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_bneg_d : GCCBuiltin<"__builtin_msa_bneg_d">,
+def int_mips_bneg_d : ClangBuiltin<"__builtin_msa_bneg_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_bnegi_b : GCCBuiltin<"__builtin_msa_bnegi_b">,
+def int_mips_bnegi_b : ClangBuiltin<"__builtin_msa_bnegi_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_bnegi_h : GCCBuiltin<"__builtin_msa_bnegi_h">,
+def int_mips_bnegi_h : ClangBuiltin<"__builtin_msa_bnegi_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_bnegi_w : GCCBuiltin<"__builtin_msa_bnegi_w">,
+def int_mips_bnegi_w : ClangBuiltin<"__builtin_msa_bnegi_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_bnegi_d : GCCBuiltin<"__builtin_msa_bnegi_d">,
+def int_mips_bnegi_d : ClangBuiltin<"__builtin_msa_bnegi_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_bnz_b : GCCBuiltin<"__builtin_msa_bnz_b">,
+def int_mips_bnz_b : ClangBuiltin<"__builtin_msa_bnz_b">,
Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_bnz_h : GCCBuiltin<"__builtin_msa_bnz_h">,
+def int_mips_bnz_h : ClangBuiltin<"__builtin_msa_bnz_h">,
Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_bnz_w : GCCBuiltin<"__builtin_msa_bnz_w">,
+def int_mips_bnz_w : ClangBuiltin<"__builtin_msa_bnz_w">,
Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_bnz_d : GCCBuiltin<"__builtin_msa_bnz_d">,
+def int_mips_bnz_d : ClangBuiltin<"__builtin_msa_bnz_d">,
Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_bnz_v : GCCBuiltin<"__builtin_msa_bnz_v">,
+def int_mips_bnz_v : ClangBuiltin<"__builtin_msa_bnz_v">,
Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_bsel_v : GCCBuiltin<"__builtin_msa_bsel_v">,
+def int_mips_bsel_v : ClangBuiltin<"__builtin_msa_bsel_v">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
-def int_mips_bseli_b : GCCBuiltin<"__builtin_msa_bseli_b">,
+def int_mips_bseli_b : ClangBuiltin<"__builtin_msa_bseli_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
-def int_mips_bset_b : GCCBuiltin<"__builtin_msa_bset_b">,
+def int_mips_bset_b : ClangBuiltin<"__builtin_msa_bset_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_bset_h : GCCBuiltin<"__builtin_msa_bset_h">,
+def int_mips_bset_h : ClangBuiltin<"__builtin_msa_bset_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_bset_w : GCCBuiltin<"__builtin_msa_bset_w">,
+def int_mips_bset_w : ClangBuiltin<"__builtin_msa_bset_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_bset_d : GCCBuiltin<"__builtin_msa_bset_d">,
+def int_mips_bset_d : ClangBuiltin<"__builtin_msa_bset_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_bseti_b : GCCBuiltin<"__builtin_msa_bseti_b">,
+def int_mips_bseti_b : ClangBuiltin<"__builtin_msa_bseti_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_bseti_h : GCCBuiltin<"__builtin_msa_bseti_h">,
+def int_mips_bseti_h : ClangBuiltin<"__builtin_msa_bseti_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_bseti_w : GCCBuiltin<"__builtin_msa_bseti_w">,
+def int_mips_bseti_w : ClangBuiltin<"__builtin_msa_bseti_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_bseti_d : GCCBuiltin<"__builtin_msa_bseti_d">,
+def int_mips_bseti_d : ClangBuiltin<"__builtin_msa_bseti_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_bz_b : GCCBuiltin<"__builtin_msa_bz_b">,
+def int_mips_bz_b : ClangBuiltin<"__builtin_msa_bz_b">,
Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_bz_h : GCCBuiltin<"__builtin_msa_bz_h">,
+def int_mips_bz_h : ClangBuiltin<"__builtin_msa_bz_h">,
Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_bz_w : GCCBuiltin<"__builtin_msa_bz_w">,
+def int_mips_bz_w : ClangBuiltin<"__builtin_msa_bz_w">,
Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_bz_d : GCCBuiltin<"__builtin_msa_bz_d">,
+def int_mips_bz_d : ClangBuiltin<"__builtin_msa_bz_d">,
Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_bz_v : GCCBuiltin<"__builtin_msa_bz_v">,
+def int_mips_bz_v : ClangBuiltin<"__builtin_msa_bz_v">,
Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_ceq_b : GCCBuiltin<"__builtin_msa_ceq_b">,
+def int_mips_ceq_b : ClangBuiltin<"__builtin_msa_ceq_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_ceq_h : GCCBuiltin<"__builtin_msa_ceq_h">,
+def int_mips_ceq_h : ClangBuiltin<"__builtin_msa_ceq_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_ceq_w : GCCBuiltin<"__builtin_msa_ceq_w">,
+def int_mips_ceq_w : ClangBuiltin<"__builtin_msa_ceq_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_ceq_d : GCCBuiltin<"__builtin_msa_ceq_d">,
+def int_mips_ceq_d : ClangBuiltin<"__builtin_msa_ceq_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_ceqi_b : GCCBuiltin<"__builtin_msa_ceqi_b">,
+def int_mips_ceqi_b : ClangBuiltin<"__builtin_msa_ceqi_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_ceqi_h : GCCBuiltin<"__builtin_msa_ceqi_h">,
+def int_mips_ceqi_h : ClangBuiltin<"__builtin_msa_ceqi_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_ceqi_w : GCCBuiltin<"__builtin_msa_ceqi_w">,
+def int_mips_ceqi_w : ClangBuiltin<"__builtin_msa_ceqi_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_ceqi_d : GCCBuiltin<"__builtin_msa_ceqi_d">,
+def int_mips_ceqi_d : ClangBuiltin<"__builtin_msa_ceqi_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_cfcmsa : GCCBuiltin<"__builtin_msa_cfcmsa">,
+def int_mips_cfcmsa : ClangBuiltin<"__builtin_msa_cfcmsa">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [ImmArg<ArgIndex<0>>]>;
-def int_mips_cle_s_b : GCCBuiltin<"__builtin_msa_cle_s_b">,
+def int_mips_cle_s_b : ClangBuiltin<"__builtin_msa_cle_s_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_cle_s_h : GCCBuiltin<"__builtin_msa_cle_s_h">,
+def int_mips_cle_s_h : ClangBuiltin<"__builtin_msa_cle_s_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_cle_s_w : GCCBuiltin<"__builtin_msa_cle_s_w">,
+def int_mips_cle_s_w : ClangBuiltin<"__builtin_msa_cle_s_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_cle_s_d : GCCBuiltin<"__builtin_msa_cle_s_d">,
+def int_mips_cle_s_d : ClangBuiltin<"__builtin_msa_cle_s_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_cle_u_b : GCCBuiltin<"__builtin_msa_cle_u_b">,
+def int_mips_cle_u_b : ClangBuiltin<"__builtin_msa_cle_u_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_cle_u_h : GCCBuiltin<"__builtin_msa_cle_u_h">,
+def int_mips_cle_u_h : ClangBuiltin<"__builtin_msa_cle_u_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_cle_u_w : GCCBuiltin<"__builtin_msa_cle_u_w">,
+def int_mips_cle_u_w : ClangBuiltin<"__builtin_msa_cle_u_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_cle_u_d : GCCBuiltin<"__builtin_msa_cle_u_d">,
+def int_mips_cle_u_d : ClangBuiltin<"__builtin_msa_cle_u_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_clei_s_b : GCCBuiltin<"__builtin_msa_clei_s_b">,
+def int_mips_clei_s_b : ClangBuiltin<"__builtin_msa_clei_s_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_clei_s_h : GCCBuiltin<"__builtin_msa_clei_s_h">,
+def int_mips_clei_s_h : ClangBuiltin<"__builtin_msa_clei_s_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_clei_s_w : GCCBuiltin<"__builtin_msa_clei_s_w">,
+def int_mips_clei_s_w : ClangBuiltin<"__builtin_msa_clei_s_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_clei_s_d : GCCBuiltin<"__builtin_msa_clei_s_d">,
+def int_mips_clei_s_d : ClangBuiltin<"__builtin_msa_clei_s_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_clei_u_b : GCCBuiltin<"__builtin_msa_clei_u_b">,
+def int_mips_clei_u_b : ClangBuiltin<"__builtin_msa_clei_u_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_clei_u_h : GCCBuiltin<"__builtin_msa_clei_u_h">,
+def int_mips_clei_u_h : ClangBuiltin<"__builtin_msa_clei_u_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_clei_u_w : GCCBuiltin<"__builtin_msa_clei_u_w">,
+def int_mips_clei_u_w : ClangBuiltin<"__builtin_msa_clei_u_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_clei_u_d : GCCBuiltin<"__builtin_msa_clei_u_d">,
+def int_mips_clei_u_d : ClangBuiltin<"__builtin_msa_clei_u_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_clt_s_b : GCCBuiltin<"__builtin_msa_clt_s_b">,
+def int_mips_clt_s_b : ClangBuiltin<"__builtin_msa_clt_s_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_clt_s_h : GCCBuiltin<"__builtin_msa_clt_s_h">,
+def int_mips_clt_s_h : ClangBuiltin<"__builtin_msa_clt_s_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_clt_s_w : GCCBuiltin<"__builtin_msa_clt_s_w">,
+def int_mips_clt_s_w : ClangBuiltin<"__builtin_msa_clt_s_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_clt_s_d : GCCBuiltin<"__builtin_msa_clt_s_d">,
+def int_mips_clt_s_d : ClangBuiltin<"__builtin_msa_clt_s_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_clt_u_b : GCCBuiltin<"__builtin_msa_clt_u_b">,
+def int_mips_clt_u_b : ClangBuiltin<"__builtin_msa_clt_u_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_clt_u_h : GCCBuiltin<"__builtin_msa_clt_u_h">,
+def int_mips_clt_u_h : ClangBuiltin<"__builtin_msa_clt_u_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_clt_u_w : GCCBuiltin<"__builtin_msa_clt_u_w">,
+def int_mips_clt_u_w : ClangBuiltin<"__builtin_msa_clt_u_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_clt_u_d : GCCBuiltin<"__builtin_msa_clt_u_d">,
+def int_mips_clt_u_d : ClangBuiltin<"__builtin_msa_clt_u_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_clti_s_b : GCCBuiltin<"__builtin_msa_clti_s_b">,
+def int_mips_clti_s_b : ClangBuiltin<"__builtin_msa_clti_s_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_clti_s_h : GCCBuiltin<"__builtin_msa_clti_s_h">,
+def int_mips_clti_s_h : ClangBuiltin<"__builtin_msa_clti_s_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_clti_s_w : GCCBuiltin<"__builtin_msa_clti_s_w">,
+def int_mips_clti_s_w : ClangBuiltin<"__builtin_msa_clti_s_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_clti_s_d : GCCBuiltin<"__builtin_msa_clti_s_d">,
+def int_mips_clti_s_d : ClangBuiltin<"__builtin_msa_clti_s_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_clti_u_b : GCCBuiltin<"__builtin_msa_clti_u_b">,
+def int_mips_clti_u_b : ClangBuiltin<"__builtin_msa_clti_u_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_clti_u_h : GCCBuiltin<"__builtin_msa_clti_u_h">,
+def int_mips_clti_u_h : ClangBuiltin<"__builtin_msa_clti_u_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_clti_u_w : GCCBuiltin<"__builtin_msa_clti_u_w">,
+def int_mips_clti_u_w : ClangBuiltin<"__builtin_msa_clti_u_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_clti_u_d : GCCBuiltin<"__builtin_msa_clti_u_d">,
+def int_mips_clti_u_d : ClangBuiltin<"__builtin_msa_clti_u_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_copy_s_b : GCCBuiltin<"__builtin_msa_copy_s_b">,
+def int_mips_copy_s_b : ClangBuiltin<"__builtin_msa_copy_s_b">,
Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_mips_copy_s_h : GCCBuiltin<"__builtin_msa_copy_s_h">,
+def int_mips_copy_s_h : ClangBuiltin<"__builtin_msa_copy_s_h">,
Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_mips_copy_s_w : GCCBuiltin<"__builtin_msa_copy_s_w">,
+def int_mips_copy_s_w : ClangBuiltin<"__builtin_msa_copy_s_w">,
Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_mips_copy_s_d : GCCBuiltin<"__builtin_msa_copy_s_d">,
+def int_mips_copy_s_d : ClangBuiltin<"__builtin_msa_copy_s_d">,
Intrinsic<[llvm_i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_mips_copy_u_b : GCCBuiltin<"__builtin_msa_copy_u_b">,
+def int_mips_copy_u_b : ClangBuiltin<"__builtin_msa_copy_u_b">,
Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_mips_copy_u_h : GCCBuiltin<"__builtin_msa_copy_u_h">,
+def int_mips_copy_u_h : ClangBuiltin<"__builtin_msa_copy_u_h">,
Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_mips_copy_u_w : GCCBuiltin<"__builtin_msa_copy_u_w">,
+def int_mips_copy_u_w : ClangBuiltin<"__builtin_msa_copy_u_w">,
Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_mips_copy_u_d : GCCBuiltin<"__builtin_msa_copy_u_d">,
+def int_mips_copy_u_d : ClangBuiltin<"__builtin_msa_copy_u_d">,
Intrinsic<[llvm_i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_mips_ctcmsa : GCCBuiltin<"__builtin_msa_ctcmsa">,
+def int_mips_ctcmsa : ClangBuiltin<"__builtin_msa_ctcmsa">,
Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>]>;
-def int_mips_div_s_b : GCCBuiltin<"__builtin_msa_div_s_b">,
+def int_mips_div_s_b : ClangBuiltin<"__builtin_msa_div_s_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_div_s_h : GCCBuiltin<"__builtin_msa_div_s_h">,
+def int_mips_div_s_h : ClangBuiltin<"__builtin_msa_div_s_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_div_s_w : GCCBuiltin<"__builtin_msa_div_s_w">,
+def int_mips_div_s_w : ClangBuiltin<"__builtin_msa_div_s_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_div_s_d : GCCBuiltin<"__builtin_msa_div_s_d">,
+def int_mips_div_s_d : ClangBuiltin<"__builtin_msa_div_s_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_div_u_b : GCCBuiltin<"__builtin_msa_div_u_b">,
+def int_mips_div_u_b : ClangBuiltin<"__builtin_msa_div_u_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_div_u_h : GCCBuiltin<"__builtin_msa_div_u_h">,
+def int_mips_div_u_h : ClangBuiltin<"__builtin_msa_div_u_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_div_u_w : GCCBuiltin<"__builtin_msa_div_u_w">,
+def int_mips_div_u_w : ClangBuiltin<"__builtin_msa_div_u_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_div_u_d : GCCBuiltin<"__builtin_msa_div_u_d">,
+def int_mips_div_u_d : ClangBuiltin<"__builtin_msa_div_u_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
// This instruction is part of the MSA spec but it does not share the
// __builtin_msa prefix because it operates on GP registers.
-def int_mips_dlsa : GCCBuiltin<"__builtin_mips_dlsa">,
+def int_mips_dlsa : ClangBuiltin<"__builtin_mips_dlsa">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty],
[IntrNoMem]>;
-def int_mips_dotp_s_h : GCCBuiltin<"__builtin_msa_dotp_s_h">,
+def int_mips_dotp_s_h : ClangBuiltin<"__builtin_msa_dotp_s_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_dotp_s_w : GCCBuiltin<"__builtin_msa_dotp_s_w">,
+def int_mips_dotp_s_w : ClangBuiltin<"__builtin_msa_dotp_s_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_dotp_s_d : GCCBuiltin<"__builtin_msa_dotp_s_d">,
+def int_mips_dotp_s_d : ClangBuiltin<"__builtin_msa_dotp_s_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_dotp_u_h : GCCBuiltin<"__builtin_msa_dotp_u_h">,
+def int_mips_dotp_u_h : ClangBuiltin<"__builtin_msa_dotp_u_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_dotp_u_w : GCCBuiltin<"__builtin_msa_dotp_u_w">,
+def int_mips_dotp_u_w : ClangBuiltin<"__builtin_msa_dotp_u_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_dotp_u_d : GCCBuiltin<"__builtin_msa_dotp_u_d">,
+def int_mips_dotp_u_d : ClangBuiltin<"__builtin_msa_dotp_u_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_dpadd_s_h : GCCBuiltin<"__builtin_msa_dpadd_s_h">,
+def int_mips_dpadd_s_h : ClangBuiltin<"__builtin_msa_dpadd_s_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
-def int_mips_dpadd_s_w : GCCBuiltin<"__builtin_msa_dpadd_s_w">,
+def int_mips_dpadd_s_w : ClangBuiltin<"__builtin_msa_dpadd_s_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
-def int_mips_dpadd_s_d : GCCBuiltin<"__builtin_msa_dpadd_s_d">,
+def int_mips_dpadd_s_d : ClangBuiltin<"__builtin_msa_dpadd_s_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
-def int_mips_dpadd_u_h : GCCBuiltin<"__builtin_msa_dpadd_u_h">,
+def int_mips_dpadd_u_h : ClangBuiltin<"__builtin_msa_dpadd_u_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
-def int_mips_dpadd_u_w : GCCBuiltin<"__builtin_msa_dpadd_u_w">,
+def int_mips_dpadd_u_w : ClangBuiltin<"__builtin_msa_dpadd_u_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
-def int_mips_dpadd_u_d : GCCBuiltin<"__builtin_msa_dpadd_u_d">,
+def int_mips_dpadd_u_d : ClangBuiltin<"__builtin_msa_dpadd_u_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
-def int_mips_dpsub_s_h : GCCBuiltin<"__builtin_msa_dpsub_s_h">,
+def int_mips_dpsub_s_h : ClangBuiltin<"__builtin_msa_dpsub_s_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
-def int_mips_dpsub_s_w : GCCBuiltin<"__builtin_msa_dpsub_s_w">,
+def int_mips_dpsub_s_w : ClangBuiltin<"__builtin_msa_dpsub_s_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
-def int_mips_dpsub_s_d : GCCBuiltin<"__builtin_msa_dpsub_s_d">,
+def int_mips_dpsub_s_d : ClangBuiltin<"__builtin_msa_dpsub_s_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
-def int_mips_dpsub_u_h : GCCBuiltin<"__builtin_msa_dpsub_u_h">,
+def int_mips_dpsub_u_h : ClangBuiltin<"__builtin_msa_dpsub_u_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
-def int_mips_dpsub_u_w : GCCBuiltin<"__builtin_msa_dpsub_u_w">,
+def int_mips_dpsub_u_w : ClangBuiltin<"__builtin_msa_dpsub_u_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
-def int_mips_dpsub_u_d : GCCBuiltin<"__builtin_msa_dpsub_u_d">,
+def int_mips_dpsub_u_d : ClangBuiltin<"__builtin_msa_dpsub_u_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
-def int_mips_fadd_w : GCCBuiltin<"__builtin_msa_fadd_w">,
+def int_mips_fadd_w : ClangBuiltin<"__builtin_msa_fadd_w">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_fadd_d : GCCBuiltin<"__builtin_msa_fadd_d">,
+def int_mips_fadd_d : ClangBuiltin<"__builtin_msa_fadd_d">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
-def int_mips_fcaf_w : GCCBuiltin<"__builtin_msa_fcaf_w">,
+def int_mips_fcaf_w : ClangBuiltin<"__builtin_msa_fcaf_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_fcaf_d : GCCBuiltin<"__builtin_msa_fcaf_d">,
+def int_mips_fcaf_d : ClangBuiltin<"__builtin_msa_fcaf_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
-def int_mips_fceq_w : GCCBuiltin<"__builtin_msa_fceq_w">,
+def int_mips_fceq_w : ClangBuiltin<"__builtin_msa_fceq_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_fceq_d : GCCBuiltin<"__builtin_msa_fceq_d">,
+def int_mips_fceq_d : ClangBuiltin<"__builtin_msa_fceq_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
-def int_mips_fcle_w : GCCBuiltin<"__builtin_msa_fcle_w">,
+def int_mips_fcle_w : ClangBuiltin<"__builtin_msa_fcle_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_fcle_d : GCCBuiltin<"__builtin_msa_fcle_d">,
+def int_mips_fcle_d : ClangBuiltin<"__builtin_msa_fcle_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
-def int_mips_fclt_w : GCCBuiltin<"__builtin_msa_fclt_w">,
+def int_mips_fclt_w : ClangBuiltin<"__builtin_msa_fclt_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_fclt_d : GCCBuiltin<"__builtin_msa_fclt_d">,
+def int_mips_fclt_d : ClangBuiltin<"__builtin_msa_fclt_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
-def int_mips_fclass_w : GCCBuiltin<"__builtin_msa_fclass_w">,
+def int_mips_fclass_w : ClangBuiltin<"__builtin_msa_fclass_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_fclass_d : GCCBuiltin<"__builtin_msa_fclass_d">,
+def int_mips_fclass_d : ClangBuiltin<"__builtin_msa_fclass_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
-def int_mips_fcne_w : GCCBuiltin<"__builtin_msa_fcne_w">,
+def int_mips_fcne_w : ClangBuiltin<"__builtin_msa_fcne_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_fcne_d : GCCBuiltin<"__builtin_msa_fcne_d">,
+def int_mips_fcne_d : ClangBuiltin<"__builtin_msa_fcne_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
-def int_mips_fcor_w : GCCBuiltin<"__builtin_msa_fcor_w">,
+def int_mips_fcor_w : ClangBuiltin<"__builtin_msa_fcor_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_fcor_d : GCCBuiltin<"__builtin_msa_fcor_d">,
+def int_mips_fcor_d : ClangBuiltin<"__builtin_msa_fcor_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
-def int_mips_fcueq_w : GCCBuiltin<"__builtin_msa_fcueq_w">,
+def int_mips_fcueq_w : ClangBuiltin<"__builtin_msa_fcueq_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_fcueq_d : GCCBuiltin<"__builtin_msa_fcueq_d">,
+def int_mips_fcueq_d : ClangBuiltin<"__builtin_msa_fcueq_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
-def int_mips_fcule_w : GCCBuiltin<"__builtin_msa_fcule_w">,
+def int_mips_fcule_w : ClangBuiltin<"__builtin_msa_fcule_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_fcule_d : GCCBuiltin<"__builtin_msa_fcule_d">,
+def int_mips_fcule_d : ClangBuiltin<"__builtin_msa_fcule_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
-def int_mips_fcult_w : GCCBuiltin<"__builtin_msa_fcult_w">,
+def int_mips_fcult_w : ClangBuiltin<"__builtin_msa_fcult_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_fcult_d : GCCBuiltin<"__builtin_msa_fcult_d">,
+def int_mips_fcult_d : ClangBuiltin<"__builtin_msa_fcult_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
-def int_mips_fcun_w : GCCBuiltin<"__builtin_msa_fcun_w">,
+def int_mips_fcun_w : ClangBuiltin<"__builtin_msa_fcun_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_fcun_d : GCCBuiltin<"__builtin_msa_fcun_d">,
+def int_mips_fcun_d : ClangBuiltin<"__builtin_msa_fcun_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
-def int_mips_fcune_w : GCCBuiltin<"__builtin_msa_fcune_w">,
+def int_mips_fcune_w : ClangBuiltin<"__builtin_msa_fcune_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_fcune_d : GCCBuiltin<"__builtin_msa_fcune_d">,
+def int_mips_fcune_d : ClangBuiltin<"__builtin_msa_fcune_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
-def int_mips_fdiv_w : GCCBuiltin<"__builtin_msa_fdiv_w">,
+def int_mips_fdiv_w : ClangBuiltin<"__builtin_msa_fdiv_w">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_fdiv_d : GCCBuiltin<"__builtin_msa_fdiv_d">,
+def int_mips_fdiv_d : ClangBuiltin<"__builtin_msa_fdiv_d">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
-def int_mips_fexdo_h : GCCBuiltin<"__builtin_msa_fexdo_h">,
+def int_mips_fexdo_h : ClangBuiltin<"__builtin_msa_fexdo_h">,
Intrinsic<[llvm_v8f16_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_fexdo_w : GCCBuiltin<"__builtin_msa_fexdo_w">,
+def int_mips_fexdo_w : ClangBuiltin<"__builtin_msa_fexdo_w">,
Intrinsic<[llvm_v4f32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
-def int_mips_fexp2_w : GCCBuiltin<"__builtin_msa_fexp2_w">,
+def int_mips_fexp2_w : ClangBuiltin<"__builtin_msa_fexp2_w">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_fexp2_d : GCCBuiltin<"__builtin_msa_fexp2_d">,
+def int_mips_fexp2_d : ClangBuiltin<"__builtin_msa_fexp2_d">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_fexupl_w : GCCBuiltin<"__builtin_msa_fexupl_w">,
+def int_mips_fexupl_w : ClangBuiltin<"__builtin_msa_fexupl_w">,
Intrinsic<[llvm_v4f32_ty], [llvm_v8f16_ty], [IntrNoMem]>;
-def int_mips_fexupl_d : GCCBuiltin<"__builtin_msa_fexupl_d">,
+def int_mips_fexupl_d : ClangBuiltin<"__builtin_msa_fexupl_d">,
Intrinsic<[llvm_v2f64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_fexupr_w : GCCBuiltin<"__builtin_msa_fexupr_w">,
+def int_mips_fexupr_w : ClangBuiltin<"__builtin_msa_fexupr_w">,
Intrinsic<[llvm_v4f32_ty], [llvm_v8f16_ty], [IntrNoMem]>;
-def int_mips_fexupr_d : GCCBuiltin<"__builtin_msa_fexupr_d">,
+def int_mips_fexupr_d : ClangBuiltin<"__builtin_msa_fexupr_d">,
Intrinsic<[llvm_v2f64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_ffint_s_w : GCCBuiltin<"__builtin_msa_ffint_s_w">,
+def int_mips_ffint_s_w : ClangBuiltin<"__builtin_msa_ffint_s_w">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_ffint_s_d : GCCBuiltin<"__builtin_msa_ffint_s_d">,
+def int_mips_ffint_s_d : ClangBuiltin<"__builtin_msa_ffint_s_d">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_ffint_u_w : GCCBuiltin<"__builtin_msa_ffint_u_w">,
+def int_mips_ffint_u_w : ClangBuiltin<"__builtin_msa_ffint_u_w">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_ffint_u_d : GCCBuiltin<"__builtin_msa_ffint_u_d">,
+def int_mips_ffint_u_d : ClangBuiltin<"__builtin_msa_ffint_u_d">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_ffql_w : GCCBuiltin<"__builtin_msa_ffql_w">,
+def int_mips_ffql_w : ClangBuiltin<"__builtin_msa_ffql_w">,
Intrinsic<[llvm_v4f32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_ffql_d : GCCBuiltin<"__builtin_msa_ffql_d">,
+def int_mips_ffql_d : ClangBuiltin<"__builtin_msa_ffql_d">,
Intrinsic<[llvm_v2f64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_ffqr_w : GCCBuiltin<"__builtin_msa_ffqr_w">,
+def int_mips_ffqr_w : ClangBuiltin<"__builtin_msa_ffqr_w">,
Intrinsic<[llvm_v4f32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_ffqr_d : GCCBuiltin<"__builtin_msa_ffqr_d">,
+def int_mips_ffqr_d : ClangBuiltin<"__builtin_msa_ffqr_d">,
Intrinsic<[llvm_v2f64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_fill_b : GCCBuiltin<"__builtin_msa_fill_b">,
+def int_mips_fill_b : ClangBuiltin<"__builtin_msa_fill_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty], [IntrNoMem]>;
-def int_mips_fill_h : GCCBuiltin<"__builtin_msa_fill_h">,
+def int_mips_fill_h : ClangBuiltin<"__builtin_msa_fill_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_i32_ty], [IntrNoMem]>;
-def int_mips_fill_w : GCCBuiltin<"__builtin_msa_fill_w">,
+def int_mips_fill_w : ClangBuiltin<"__builtin_msa_fill_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_i32_ty], [IntrNoMem]>;
-def int_mips_fill_d : GCCBuiltin<"__builtin_msa_fill_d">,
+def int_mips_fill_d : ClangBuiltin<"__builtin_msa_fill_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_i64_ty], [IntrNoMem]>;
-def int_mips_flog2_w : GCCBuiltin<"__builtin_msa_flog2_w">,
+def int_mips_flog2_w : ClangBuiltin<"__builtin_msa_flog2_w">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_flog2_d : GCCBuiltin<"__builtin_msa_flog2_d">,
+def int_mips_flog2_d : ClangBuiltin<"__builtin_msa_flog2_d">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
-def int_mips_fmadd_w : GCCBuiltin<"__builtin_msa_fmadd_w">,
+def int_mips_fmadd_w : ClangBuiltin<"__builtin_msa_fmadd_w">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
[IntrNoMem]>;
-def int_mips_fmadd_d : GCCBuiltin<"__builtin_msa_fmadd_d">,
+def int_mips_fmadd_d : ClangBuiltin<"__builtin_msa_fmadd_d">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
[IntrNoMem]>;
-def int_mips_fmax_w : GCCBuiltin<"__builtin_msa_fmax_w">,
+def int_mips_fmax_w : ClangBuiltin<"__builtin_msa_fmax_w">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_fmax_d : GCCBuiltin<"__builtin_msa_fmax_d">,
+def int_mips_fmax_d : ClangBuiltin<"__builtin_msa_fmax_d">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
-def int_mips_fmax_a_w : GCCBuiltin<"__builtin_msa_fmax_a_w">,
+def int_mips_fmax_a_w : ClangBuiltin<"__builtin_msa_fmax_a_w">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_fmax_a_d : GCCBuiltin<"__builtin_msa_fmax_a_d">,
+def int_mips_fmax_a_d : ClangBuiltin<"__builtin_msa_fmax_a_d">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
-def int_mips_fmin_w : GCCBuiltin<"__builtin_msa_fmin_w">,
+def int_mips_fmin_w : ClangBuiltin<"__builtin_msa_fmin_w">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_fmin_d : GCCBuiltin<"__builtin_msa_fmin_d">,
+def int_mips_fmin_d : ClangBuiltin<"__builtin_msa_fmin_d">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
-def int_mips_fmin_a_w : GCCBuiltin<"__builtin_msa_fmin_a_w">,
+def int_mips_fmin_a_w : ClangBuiltin<"__builtin_msa_fmin_a_w">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_fmin_a_d : GCCBuiltin<"__builtin_msa_fmin_a_d">,
+def int_mips_fmin_a_d : ClangBuiltin<"__builtin_msa_fmin_a_d">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
-def int_mips_fmsub_w : GCCBuiltin<"__builtin_msa_fmsub_w">,
+def int_mips_fmsub_w : ClangBuiltin<"__builtin_msa_fmsub_w">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
[IntrNoMem]>;
-def int_mips_fmsub_d : GCCBuiltin<"__builtin_msa_fmsub_d">,
+def int_mips_fmsub_d : ClangBuiltin<"__builtin_msa_fmsub_d">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
[IntrNoMem]>;
-def int_mips_fmul_w : GCCBuiltin<"__builtin_msa_fmul_w">,
+def int_mips_fmul_w : ClangBuiltin<"__builtin_msa_fmul_w">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_fmul_d : GCCBuiltin<"__builtin_msa_fmul_d">,
+def int_mips_fmul_d : ClangBuiltin<"__builtin_msa_fmul_d">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
-def int_mips_frint_w : GCCBuiltin<"__builtin_msa_frint_w">,
+def int_mips_frint_w : ClangBuiltin<"__builtin_msa_frint_w">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_frint_d : GCCBuiltin<"__builtin_msa_frint_d">,
+def int_mips_frint_d : ClangBuiltin<"__builtin_msa_frint_d">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
-def int_mips_frcp_w : GCCBuiltin<"__builtin_msa_frcp_w">,
+def int_mips_frcp_w : ClangBuiltin<"__builtin_msa_frcp_w">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_frcp_d : GCCBuiltin<"__builtin_msa_frcp_d">,
+def int_mips_frcp_d : ClangBuiltin<"__builtin_msa_frcp_d">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
-def int_mips_frsqrt_w : GCCBuiltin<"__builtin_msa_frsqrt_w">,
+def int_mips_frsqrt_w : ClangBuiltin<"__builtin_msa_frsqrt_w">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_frsqrt_d : GCCBuiltin<"__builtin_msa_frsqrt_d">,
+def int_mips_frsqrt_d : ClangBuiltin<"__builtin_msa_frsqrt_d">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
-def int_mips_fsaf_w : GCCBuiltin<"__builtin_msa_fsaf_w">,
+def int_mips_fsaf_w : ClangBuiltin<"__builtin_msa_fsaf_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_fsaf_d : GCCBuiltin<"__builtin_msa_fsaf_d">,
+def int_mips_fsaf_d : ClangBuiltin<"__builtin_msa_fsaf_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
-def int_mips_fseq_w : GCCBuiltin<"__builtin_msa_fseq_w">,
+def int_mips_fseq_w : ClangBuiltin<"__builtin_msa_fseq_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_fseq_d : GCCBuiltin<"__builtin_msa_fseq_d">,
+def int_mips_fseq_d : ClangBuiltin<"__builtin_msa_fseq_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
-def int_mips_fsle_w : GCCBuiltin<"__builtin_msa_fsle_w">,
+def int_mips_fsle_w : ClangBuiltin<"__builtin_msa_fsle_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_fsle_d : GCCBuiltin<"__builtin_msa_fsle_d">,
+def int_mips_fsle_d : ClangBuiltin<"__builtin_msa_fsle_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
-def int_mips_fslt_w : GCCBuiltin<"__builtin_msa_fslt_w">,
+def int_mips_fslt_w : ClangBuiltin<"__builtin_msa_fslt_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_fslt_d : GCCBuiltin<"__builtin_msa_fslt_d">,
+def int_mips_fslt_d : ClangBuiltin<"__builtin_msa_fslt_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
-def int_mips_fsne_w : GCCBuiltin<"__builtin_msa_fsne_w">,
+def int_mips_fsne_w : ClangBuiltin<"__builtin_msa_fsne_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_fsne_d : GCCBuiltin<"__builtin_msa_fsne_d">,
+def int_mips_fsne_d : ClangBuiltin<"__builtin_msa_fsne_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
-def int_mips_fsor_w : GCCBuiltin<"__builtin_msa_fsor_w">,
+def int_mips_fsor_w : ClangBuiltin<"__builtin_msa_fsor_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_fsor_d : GCCBuiltin<"__builtin_msa_fsor_d">,
+def int_mips_fsor_d : ClangBuiltin<"__builtin_msa_fsor_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
-def int_mips_fsqrt_w : GCCBuiltin<"__builtin_msa_fsqrt_w">,
+def int_mips_fsqrt_w : ClangBuiltin<"__builtin_msa_fsqrt_w">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_fsqrt_d : GCCBuiltin<"__builtin_msa_fsqrt_d">,
+def int_mips_fsqrt_d : ClangBuiltin<"__builtin_msa_fsqrt_d">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
-def int_mips_fsub_w : GCCBuiltin<"__builtin_msa_fsub_w">,
+def int_mips_fsub_w : ClangBuiltin<"__builtin_msa_fsub_w">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_fsub_d : GCCBuiltin<"__builtin_msa_fsub_d">,
+def int_mips_fsub_d : ClangBuiltin<"__builtin_msa_fsub_d">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
-def int_mips_fsueq_w : GCCBuiltin<"__builtin_msa_fsueq_w">,
+def int_mips_fsueq_w : ClangBuiltin<"__builtin_msa_fsueq_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_fsueq_d : GCCBuiltin<"__builtin_msa_fsueq_d">,
+def int_mips_fsueq_d : ClangBuiltin<"__builtin_msa_fsueq_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
-def int_mips_fsule_w : GCCBuiltin<"__builtin_msa_fsule_w">,
+def int_mips_fsule_w : ClangBuiltin<"__builtin_msa_fsule_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_fsule_d : GCCBuiltin<"__builtin_msa_fsule_d">,
+def int_mips_fsule_d : ClangBuiltin<"__builtin_msa_fsule_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
-def int_mips_fsult_w : GCCBuiltin<"__builtin_msa_fsult_w">,
+def int_mips_fsult_w : ClangBuiltin<"__builtin_msa_fsult_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_fsult_d : GCCBuiltin<"__builtin_msa_fsult_d">,
+def int_mips_fsult_d : ClangBuiltin<"__builtin_msa_fsult_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
-def int_mips_fsun_w : GCCBuiltin<"__builtin_msa_fsun_w">,
+def int_mips_fsun_w : ClangBuiltin<"__builtin_msa_fsun_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_fsun_d : GCCBuiltin<"__builtin_msa_fsun_d">,
+def int_mips_fsun_d : ClangBuiltin<"__builtin_msa_fsun_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
-def int_mips_fsune_w : GCCBuiltin<"__builtin_msa_fsune_w">,
+def int_mips_fsune_w : ClangBuiltin<"__builtin_msa_fsune_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_fsune_d : GCCBuiltin<"__builtin_msa_fsune_d">,
+def int_mips_fsune_d : ClangBuiltin<"__builtin_msa_fsune_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
-def int_mips_ftint_s_w : GCCBuiltin<"__builtin_msa_ftint_s_w">,
+def int_mips_ftint_s_w : ClangBuiltin<"__builtin_msa_ftint_s_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_ftint_s_d : GCCBuiltin<"__builtin_msa_ftint_s_d">,
+def int_mips_ftint_s_d : ClangBuiltin<"__builtin_msa_ftint_s_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
-def int_mips_ftint_u_w : GCCBuiltin<"__builtin_msa_ftint_u_w">,
+def int_mips_ftint_u_w : ClangBuiltin<"__builtin_msa_ftint_u_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_ftint_u_d : GCCBuiltin<"__builtin_msa_ftint_u_d">,
+def int_mips_ftint_u_d : ClangBuiltin<"__builtin_msa_ftint_u_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
-def int_mips_ftq_h : GCCBuiltin<"__builtin_msa_ftq_h">,
+def int_mips_ftq_h : ClangBuiltin<"__builtin_msa_ftq_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_ftq_w : GCCBuiltin<"__builtin_msa_ftq_w">,
+def int_mips_ftq_w : ClangBuiltin<"__builtin_msa_ftq_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
-def int_mips_ftrunc_s_w : GCCBuiltin<"__builtin_msa_ftrunc_s_w">,
+def int_mips_ftrunc_s_w : ClangBuiltin<"__builtin_msa_ftrunc_s_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_ftrunc_s_d : GCCBuiltin<"__builtin_msa_ftrunc_s_d">,
+def int_mips_ftrunc_s_d : ClangBuiltin<"__builtin_msa_ftrunc_s_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
-def int_mips_ftrunc_u_w : GCCBuiltin<"__builtin_msa_ftrunc_u_w">,
+def int_mips_ftrunc_u_w : ClangBuiltin<"__builtin_msa_ftrunc_u_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-def int_mips_ftrunc_u_d : GCCBuiltin<"__builtin_msa_ftrunc_u_d">,
+def int_mips_ftrunc_u_d : ClangBuiltin<"__builtin_msa_ftrunc_u_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
-def int_mips_hadd_s_h : GCCBuiltin<"__builtin_msa_hadd_s_h">,
+def int_mips_hadd_s_h : ClangBuiltin<"__builtin_msa_hadd_s_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_hadd_s_w : GCCBuiltin<"__builtin_msa_hadd_s_w">,
+def int_mips_hadd_s_w : ClangBuiltin<"__builtin_msa_hadd_s_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_hadd_s_d : GCCBuiltin<"__builtin_msa_hadd_s_d">,
+def int_mips_hadd_s_d : ClangBuiltin<"__builtin_msa_hadd_s_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_hadd_u_h : GCCBuiltin<"__builtin_msa_hadd_u_h">,
+def int_mips_hadd_u_h : ClangBuiltin<"__builtin_msa_hadd_u_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_hadd_u_w : GCCBuiltin<"__builtin_msa_hadd_u_w">,
+def int_mips_hadd_u_w : ClangBuiltin<"__builtin_msa_hadd_u_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_hadd_u_d : GCCBuiltin<"__builtin_msa_hadd_u_d">,
+def int_mips_hadd_u_d : ClangBuiltin<"__builtin_msa_hadd_u_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_hsub_s_h : GCCBuiltin<"__builtin_msa_hsub_s_h">,
+def int_mips_hsub_s_h : ClangBuiltin<"__builtin_msa_hsub_s_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_hsub_s_w : GCCBuiltin<"__builtin_msa_hsub_s_w">,
+def int_mips_hsub_s_w : ClangBuiltin<"__builtin_msa_hsub_s_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_hsub_s_d : GCCBuiltin<"__builtin_msa_hsub_s_d">,
+def int_mips_hsub_s_d : ClangBuiltin<"__builtin_msa_hsub_s_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_hsub_u_h : GCCBuiltin<"__builtin_msa_hsub_u_h">,
+def int_mips_hsub_u_h : ClangBuiltin<"__builtin_msa_hsub_u_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_hsub_u_w : GCCBuiltin<"__builtin_msa_hsub_u_w">,
+def int_mips_hsub_u_w : ClangBuiltin<"__builtin_msa_hsub_u_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_hsub_u_d : GCCBuiltin<"__builtin_msa_hsub_u_d">,
+def int_mips_hsub_u_d : ClangBuiltin<"__builtin_msa_hsub_u_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_ilvev_b : GCCBuiltin<"__builtin_msa_ilvev_b">,
+def int_mips_ilvev_b : ClangBuiltin<"__builtin_msa_ilvev_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_ilvev_h : GCCBuiltin<"__builtin_msa_ilvev_h">,
+def int_mips_ilvev_h : ClangBuiltin<"__builtin_msa_ilvev_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_ilvev_w : GCCBuiltin<"__builtin_msa_ilvev_w">,
+def int_mips_ilvev_w : ClangBuiltin<"__builtin_msa_ilvev_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_ilvev_d : GCCBuiltin<"__builtin_msa_ilvev_d">,
+def int_mips_ilvev_d : ClangBuiltin<"__builtin_msa_ilvev_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_ilvl_b : GCCBuiltin<"__builtin_msa_ilvl_b">,
+def int_mips_ilvl_b : ClangBuiltin<"__builtin_msa_ilvl_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_ilvl_h : GCCBuiltin<"__builtin_msa_ilvl_h">,
+def int_mips_ilvl_h : ClangBuiltin<"__builtin_msa_ilvl_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_ilvl_w : GCCBuiltin<"__builtin_msa_ilvl_w">,
+def int_mips_ilvl_w : ClangBuiltin<"__builtin_msa_ilvl_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_ilvl_d : GCCBuiltin<"__builtin_msa_ilvl_d">,
+def int_mips_ilvl_d : ClangBuiltin<"__builtin_msa_ilvl_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_ilvod_b : GCCBuiltin<"__builtin_msa_ilvod_b">,
+def int_mips_ilvod_b : ClangBuiltin<"__builtin_msa_ilvod_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_ilvod_h : GCCBuiltin<"__builtin_msa_ilvod_h">,
+def int_mips_ilvod_h : ClangBuiltin<"__builtin_msa_ilvod_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_ilvod_w : GCCBuiltin<"__builtin_msa_ilvod_w">,
+def int_mips_ilvod_w : ClangBuiltin<"__builtin_msa_ilvod_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_ilvod_d : GCCBuiltin<"__builtin_msa_ilvod_d">,
+def int_mips_ilvod_d : ClangBuiltin<"__builtin_msa_ilvod_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_ilvr_b : GCCBuiltin<"__builtin_msa_ilvr_b">,
+def int_mips_ilvr_b : ClangBuiltin<"__builtin_msa_ilvr_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_ilvr_h : GCCBuiltin<"__builtin_msa_ilvr_h">,
+def int_mips_ilvr_h : ClangBuiltin<"__builtin_msa_ilvr_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_ilvr_w : GCCBuiltin<"__builtin_msa_ilvr_w">,
+def int_mips_ilvr_w : ClangBuiltin<"__builtin_msa_ilvr_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_ilvr_d : GCCBuiltin<"__builtin_msa_ilvr_d">,
+def int_mips_ilvr_d : ClangBuiltin<"__builtin_msa_ilvr_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_insert_b : GCCBuiltin<"__builtin_msa_insert_b">,
+def int_mips_insert_b : ClangBuiltin<"__builtin_msa_insert_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
-def int_mips_insert_h : GCCBuiltin<"__builtin_msa_insert_h">,
+def int_mips_insert_h : ClangBuiltin<"__builtin_msa_insert_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
-def int_mips_insert_w : GCCBuiltin<"__builtin_msa_insert_w">,
+def int_mips_insert_w : ClangBuiltin<"__builtin_msa_insert_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
-def int_mips_insert_d : GCCBuiltin<"__builtin_msa_insert_d">,
+def int_mips_insert_d : ClangBuiltin<"__builtin_msa_insert_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty, llvm_i64_ty],
[IntrNoMem]>;
-def int_mips_insve_b : GCCBuiltin<"__builtin_msa_insve_b">,
+def int_mips_insve_b : ClangBuiltin<"__builtin_msa_insve_b">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_insve_h : GCCBuiltin<"__builtin_msa_insve_h">,
+def int_mips_insve_h : ClangBuiltin<"__builtin_msa_insve_h">,
Intrinsic<[llvm_v8i16_ty],
[llvm_v8i16_ty, llvm_i32_ty, llvm_v8i16_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_insve_w : GCCBuiltin<"__builtin_msa_insve_w">,
+def int_mips_insve_w : ClangBuiltin<"__builtin_msa_insve_w">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_insve_d : GCCBuiltin<"__builtin_msa_insve_d">,
+def int_mips_insve_d : ClangBuiltin<"__builtin_msa_insve_d">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v2i64_ty, llvm_i32_ty, llvm_v2i64_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_ld_b : GCCBuiltin<"__builtin_msa_ld_b">,
+def int_mips_ld_b : ClangBuiltin<"__builtin_msa_ld_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty],
[IntrReadMem, IntrArgMemOnly]>;
-def int_mips_ld_h : GCCBuiltin<"__builtin_msa_ld_h">,
+def int_mips_ld_h : ClangBuiltin<"__builtin_msa_ld_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_ptr_ty, llvm_i32_ty],
[IntrReadMem, IntrArgMemOnly]>;
-def int_mips_ld_w : GCCBuiltin<"__builtin_msa_ld_w">,
+def int_mips_ld_w : ClangBuiltin<"__builtin_msa_ld_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty],
[IntrReadMem, IntrArgMemOnly]>;
-def int_mips_ld_d : GCCBuiltin<"__builtin_msa_ld_d">,
+def int_mips_ld_d : ClangBuiltin<"__builtin_msa_ld_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_i32_ty],
[IntrReadMem, IntrArgMemOnly]>;
-def int_mips_ldr_d : GCCBuiltin<"__builtin_msa_ldr_d">,
+def int_mips_ldr_d : ClangBuiltin<"__builtin_msa_ldr_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_i32_ty],
[IntrReadMem, IntrArgMemOnly]>;
-def int_mips_ldr_w : GCCBuiltin<"__builtin_msa_ldr_w">,
+def int_mips_ldr_w : ClangBuiltin<"__builtin_msa_ldr_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty],
[IntrReadMem, IntrArgMemOnly]>;
-def int_mips_ldi_b : GCCBuiltin<"__builtin_msa_ldi_b">,
+def int_mips_ldi_b : ClangBuiltin<"__builtin_msa_ldi_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<0>>]>;
-def int_mips_ldi_h : GCCBuiltin<"__builtin_msa_ldi_h">,
+def int_mips_ldi_h : ClangBuiltin<"__builtin_msa_ldi_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<0>>]>;
-def int_mips_ldi_w : GCCBuiltin<"__builtin_msa_ldi_w">,
+def int_mips_ldi_w : ClangBuiltin<"__builtin_msa_ldi_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<0>>]>;
-def int_mips_ldi_d : GCCBuiltin<"__builtin_msa_ldi_d">,
+def int_mips_ldi_d : ClangBuiltin<"__builtin_msa_ldi_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<0>>]>;
// This instruction is part of the MSA spec but it does not share the
// __builtin_msa prefix because it operates on the GPR registers.
-def int_mips_lsa : GCCBuiltin<"__builtin_mips_lsa">,
+def int_mips_lsa : ClangBuiltin<"__builtin_mips_lsa">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
-def int_mips_madd_q_h : GCCBuiltin<"__builtin_msa_madd_q_h">,
+def int_mips_madd_q_h : ClangBuiltin<"__builtin_msa_madd_q_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
-def int_mips_madd_q_w : GCCBuiltin<"__builtin_msa_madd_q_w">,
+def int_mips_madd_q_w : ClangBuiltin<"__builtin_msa_madd_q_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
-def int_mips_maddr_q_h : GCCBuiltin<"__builtin_msa_maddr_q_h">,
+def int_mips_maddr_q_h : ClangBuiltin<"__builtin_msa_maddr_q_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
-def int_mips_maddr_q_w : GCCBuiltin<"__builtin_msa_maddr_q_w">,
+def int_mips_maddr_q_w : ClangBuiltin<"__builtin_msa_maddr_q_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
-def int_mips_maddv_b : GCCBuiltin<"__builtin_msa_maddv_b">,
+def int_mips_maddv_b : ClangBuiltin<"__builtin_msa_maddv_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
-def int_mips_maddv_h : GCCBuiltin<"__builtin_msa_maddv_h">,
+def int_mips_maddv_h : ClangBuiltin<"__builtin_msa_maddv_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
-def int_mips_maddv_w : GCCBuiltin<"__builtin_msa_maddv_w">,
+def int_mips_maddv_w : ClangBuiltin<"__builtin_msa_maddv_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
-def int_mips_maddv_d : GCCBuiltin<"__builtin_msa_maddv_d">,
+def int_mips_maddv_d : ClangBuiltin<"__builtin_msa_maddv_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
-def int_mips_max_a_b : GCCBuiltin<"__builtin_msa_max_a_b">,
+def int_mips_max_a_b : ClangBuiltin<"__builtin_msa_max_a_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_max_a_h : GCCBuiltin<"__builtin_msa_max_a_h">,
+def int_mips_max_a_h : ClangBuiltin<"__builtin_msa_max_a_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_max_a_w : GCCBuiltin<"__builtin_msa_max_a_w">,
+def int_mips_max_a_w : ClangBuiltin<"__builtin_msa_max_a_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_max_a_d : GCCBuiltin<"__builtin_msa_max_a_d">,
+def int_mips_max_a_d : ClangBuiltin<"__builtin_msa_max_a_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_max_s_b : GCCBuiltin<"__builtin_msa_max_s_b">,
+def int_mips_max_s_b : ClangBuiltin<"__builtin_msa_max_s_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_max_s_h : GCCBuiltin<"__builtin_msa_max_s_h">,
+def int_mips_max_s_h : ClangBuiltin<"__builtin_msa_max_s_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_max_s_w : GCCBuiltin<"__builtin_msa_max_s_w">,
+def int_mips_max_s_w : ClangBuiltin<"__builtin_msa_max_s_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_max_s_d : GCCBuiltin<"__builtin_msa_max_s_d">,
+def int_mips_max_s_d : ClangBuiltin<"__builtin_msa_max_s_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_max_u_b : GCCBuiltin<"__builtin_msa_max_u_b">,
+def int_mips_max_u_b : ClangBuiltin<"__builtin_msa_max_u_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_max_u_h : GCCBuiltin<"__builtin_msa_max_u_h">,
+def int_mips_max_u_h : ClangBuiltin<"__builtin_msa_max_u_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_max_u_w : GCCBuiltin<"__builtin_msa_max_u_w">,
+def int_mips_max_u_w : ClangBuiltin<"__builtin_msa_max_u_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_max_u_d : GCCBuiltin<"__builtin_msa_max_u_d">,
+def int_mips_max_u_d : ClangBuiltin<"__builtin_msa_max_u_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_maxi_s_b : GCCBuiltin<"__builtin_msa_maxi_s_b">,
+def int_mips_maxi_s_b : ClangBuiltin<"__builtin_msa_maxi_s_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_maxi_s_h : GCCBuiltin<"__builtin_msa_maxi_s_h">,
+def int_mips_maxi_s_h : ClangBuiltin<"__builtin_msa_maxi_s_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_maxi_s_w : GCCBuiltin<"__builtin_msa_maxi_s_w">,
+def int_mips_maxi_s_w : ClangBuiltin<"__builtin_msa_maxi_s_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_maxi_s_d : GCCBuiltin<"__builtin_msa_maxi_s_d">,
+def int_mips_maxi_s_d : ClangBuiltin<"__builtin_msa_maxi_s_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_maxi_u_b : GCCBuiltin<"__builtin_msa_maxi_u_b">,
+def int_mips_maxi_u_b : ClangBuiltin<"__builtin_msa_maxi_u_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_maxi_u_h : GCCBuiltin<"__builtin_msa_maxi_u_h">,
+def int_mips_maxi_u_h : ClangBuiltin<"__builtin_msa_maxi_u_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_maxi_u_w : GCCBuiltin<"__builtin_msa_maxi_u_w">,
+def int_mips_maxi_u_w : ClangBuiltin<"__builtin_msa_maxi_u_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_maxi_u_d : GCCBuiltin<"__builtin_msa_maxi_u_d">,
+def int_mips_maxi_u_d : ClangBuiltin<"__builtin_msa_maxi_u_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_min_a_b : GCCBuiltin<"__builtin_msa_min_a_b">,
+def int_mips_min_a_b : ClangBuiltin<"__builtin_msa_min_a_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_min_a_h : GCCBuiltin<"__builtin_msa_min_a_h">,
+def int_mips_min_a_h : ClangBuiltin<"__builtin_msa_min_a_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_min_a_w : GCCBuiltin<"__builtin_msa_min_a_w">,
+def int_mips_min_a_w : ClangBuiltin<"__builtin_msa_min_a_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_min_a_d : GCCBuiltin<"__builtin_msa_min_a_d">,
+def int_mips_min_a_d : ClangBuiltin<"__builtin_msa_min_a_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_min_s_b : GCCBuiltin<"__builtin_msa_min_s_b">,
+def int_mips_min_s_b : ClangBuiltin<"__builtin_msa_min_s_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_min_s_h : GCCBuiltin<"__builtin_msa_min_s_h">,
+def int_mips_min_s_h : ClangBuiltin<"__builtin_msa_min_s_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_min_s_w : GCCBuiltin<"__builtin_msa_min_s_w">,
+def int_mips_min_s_w : ClangBuiltin<"__builtin_msa_min_s_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_min_s_d : GCCBuiltin<"__builtin_msa_min_s_d">,
+def int_mips_min_s_d : ClangBuiltin<"__builtin_msa_min_s_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_min_u_b : GCCBuiltin<"__builtin_msa_min_u_b">,
+def int_mips_min_u_b : ClangBuiltin<"__builtin_msa_min_u_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_min_u_h : GCCBuiltin<"__builtin_msa_min_u_h">,
+def int_mips_min_u_h : ClangBuiltin<"__builtin_msa_min_u_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_min_u_w : GCCBuiltin<"__builtin_msa_min_u_w">,
+def int_mips_min_u_w : ClangBuiltin<"__builtin_msa_min_u_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_min_u_d : GCCBuiltin<"__builtin_msa_min_u_d">,
+def int_mips_min_u_d : ClangBuiltin<"__builtin_msa_min_u_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_mini_s_b : GCCBuiltin<"__builtin_msa_mini_s_b">,
+def int_mips_mini_s_b : ClangBuiltin<"__builtin_msa_mini_s_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_mini_s_h : GCCBuiltin<"__builtin_msa_mini_s_h">,
+def int_mips_mini_s_h : ClangBuiltin<"__builtin_msa_mini_s_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_mini_s_w : GCCBuiltin<"__builtin_msa_mini_s_w">,
+def int_mips_mini_s_w : ClangBuiltin<"__builtin_msa_mini_s_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_mini_s_d : GCCBuiltin<"__builtin_msa_mini_s_d">,
+def int_mips_mini_s_d : ClangBuiltin<"__builtin_msa_mini_s_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_mini_u_b : GCCBuiltin<"__builtin_msa_mini_u_b">,
+def int_mips_mini_u_b : ClangBuiltin<"__builtin_msa_mini_u_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_mini_u_h : GCCBuiltin<"__builtin_msa_mini_u_h">,
+def int_mips_mini_u_h : ClangBuiltin<"__builtin_msa_mini_u_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_mini_u_w : GCCBuiltin<"__builtin_msa_mini_u_w">,
+def int_mips_mini_u_w : ClangBuiltin<"__builtin_msa_mini_u_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_mini_u_d : GCCBuiltin<"__builtin_msa_mini_u_d">,
+def int_mips_mini_u_d : ClangBuiltin<"__builtin_msa_mini_u_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_mod_s_b : GCCBuiltin<"__builtin_msa_mod_s_b">,
+def int_mips_mod_s_b : ClangBuiltin<"__builtin_msa_mod_s_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_mod_s_h : GCCBuiltin<"__builtin_msa_mod_s_h">,
+def int_mips_mod_s_h : ClangBuiltin<"__builtin_msa_mod_s_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_mod_s_w : GCCBuiltin<"__builtin_msa_mod_s_w">,
+def int_mips_mod_s_w : ClangBuiltin<"__builtin_msa_mod_s_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_mod_s_d : GCCBuiltin<"__builtin_msa_mod_s_d">,
+def int_mips_mod_s_d : ClangBuiltin<"__builtin_msa_mod_s_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_mod_u_b : GCCBuiltin<"__builtin_msa_mod_u_b">,
+def int_mips_mod_u_b : ClangBuiltin<"__builtin_msa_mod_u_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_mod_u_h : GCCBuiltin<"__builtin_msa_mod_u_h">,
+def int_mips_mod_u_h : ClangBuiltin<"__builtin_msa_mod_u_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_mod_u_w : GCCBuiltin<"__builtin_msa_mod_u_w">,
+def int_mips_mod_u_w : ClangBuiltin<"__builtin_msa_mod_u_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_mod_u_d : GCCBuiltin<"__builtin_msa_mod_u_d">,
+def int_mips_mod_u_d : ClangBuiltin<"__builtin_msa_mod_u_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_move_v : GCCBuiltin<"__builtin_msa_move_v">,
+def int_mips_move_v : ClangBuiltin<"__builtin_msa_move_v">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_msub_q_h : GCCBuiltin<"__builtin_msa_msub_q_h">,
+def int_mips_msub_q_h : ClangBuiltin<"__builtin_msa_msub_q_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
-def int_mips_msub_q_w : GCCBuiltin<"__builtin_msa_msub_q_w">,
+def int_mips_msub_q_w : ClangBuiltin<"__builtin_msa_msub_q_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
-def int_mips_msubr_q_h : GCCBuiltin<"__builtin_msa_msubr_q_h">,
+def int_mips_msubr_q_h : ClangBuiltin<"__builtin_msa_msubr_q_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
-def int_mips_msubr_q_w : GCCBuiltin<"__builtin_msa_msubr_q_w">,
+def int_mips_msubr_q_w : ClangBuiltin<"__builtin_msa_msubr_q_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
-def int_mips_msubv_b : GCCBuiltin<"__builtin_msa_msubv_b">,
+def int_mips_msubv_b : ClangBuiltin<"__builtin_msa_msubv_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
-def int_mips_msubv_h : GCCBuiltin<"__builtin_msa_msubv_h">,
+def int_mips_msubv_h : ClangBuiltin<"__builtin_msa_msubv_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
-def int_mips_msubv_w : GCCBuiltin<"__builtin_msa_msubv_w">,
+def int_mips_msubv_w : ClangBuiltin<"__builtin_msa_msubv_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
-def int_mips_msubv_d : GCCBuiltin<"__builtin_msa_msubv_d">,
+def int_mips_msubv_d : ClangBuiltin<"__builtin_msa_msubv_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
-def int_mips_mul_q_h : GCCBuiltin<"__builtin_msa_mul_q_h">,
+def int_mips_mul_q_h : ClangBuiltin<"__builtin_msa_mul_q_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_mul_q_w : GCCBuiltin<"__builtin_msa_mul_q_w">,
+def int_mips_mul_q_w : ClangBuiltin<"__builtin_msa_mul_q_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_mulr_q_h : GCCBuiltin<"__builtin_msa_mulr_q_h">,
+def int_mips_mulr_q_h : ClangBuiltin<"__builtin_msa_mulr_q_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_mulr_q_w : GCCBuiltin<"__builtin_msa_mulr_q_w">,
+def int_mips_mulr_q_w : ClangBuiltin<"__builtin_msa_mulr_q_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_mulv_b : GCCBuiltin<"__builtin_msa_mulv_b">,
+def int_mips_mulv_b : ClangBuiltin<"__builtin_msa_mulv_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_mulv_h : GCCBuiltin<"__builtin_msa_mulv_h">,
+def int_mips_mulv_h : ClangBuiltin<"__builtin_msa_mulv_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_mulv_w : GCCBuiltin<"__builtin_msa_mulv_w">,
+def int_mips_mulv_w : ClangBuiltin<"__builtin_msa_mulv_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_mulv_d : GCCBuiltin<"__builtin_msa_mulv_d">,
+def int_mips_mulv_d : ClangBuiltin<"__builtin_msa_mulv_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_nloc_b : GCCBuiltin<"__builtin_msa_nloc_b">,
+def int_mips_nloc_b : ClangBuiltin<"__builtin_msa_nloc_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_nloc_h : GCCBuiltin<"__builtin_msa_nloc_h">,
+def int_mips_nloc_h : ClangBuiltin<"__builtin_msa_nloc_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_nloc_w : GCCBuiltin<"__builtin_msa_nloc_w">,
+def int_mips_nloc_w : ClangBuiltin<"__builtin_msa_nloc_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_nloc_d : GCCBuiltin<"__builtin_msa_nloc_d">,
+def int_mips_nloc_d : ClangBuiltin<"__builtin_msa_nloc_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_nlzc_b : GCCBuiltin<"__builtin_msa_nlzc_b">,
+def int_mips_nlzc_b : ClangBuiltin<"__builtin_msa_nlzc_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_nlzc_h : GCCBuiltin<"__builtin_msa_nlzc_h">,
+def int_mips_nlzc_h : ClangBuiltin<"__builtin_msa_nlzc_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_nlzc_w : GCCBuiltin<"__builtin_msa_nlzc_w">,
+def int_mips_nlzc_w : ClangBuiltin<"__builtin_msa_nlzc_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_nlzc_d : GCCBuiltin<"__builtin_msa_nlzc_d">,
+def int_mips_nlzc_d : ClangBuiltin<"__builtin_msa_nlzc_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_nor_v : GCCBuiltin<"__builtin_msa_nor_v">,
+def int_mips_nor_v : ClangBuiltin<"__builtin_msa_nor_v">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_nori_b : GCCBuiltin<"__builtin_msa_nori_b">,
+def int_mips_nori_b : ClangBuiltin<"__builtin_msa_nori_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_or_v : GCCBuiltin<"__builtin_msa_or_v">,
+def int_mips_or_v : ClangBuiltin<"__builtin_msa_or_v">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_ori_b : GCCBuiltin<"__builtin_msa_ori_b">,
+def int_mips_ori_b : ClangBuiltin<"__builtin_msa_ori_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_pckev_b : GCCBuiltin<"__builtin_msa_pckev_b">,
+def int_mips_pckev_b : ClangBuiltin<"__builtin_msa_pckev_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_pckev_h : GCCBuiltin<"__builtin_msa_pckev_h">,
+def int_mips_pckev_h : ClangBuiltin<"__builtin_msa_pckev_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_pckev_w : GCCBuiltin<"__builtin_msa_pckev_w">,
+def int_mips_pckev_w : ClangBuiltin<"__builtin_msa_pckev_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_pckev_d : GCCBuiltin<"__builtin_msa_pckev_d">,
+def int_mips_pckev_d : ClangBuiltin<"__builtin_msa_pckev_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_pckod_b : GCCBuiltin<"__builtin_msa_pckod_b">,
+def int_mips_pckod_b : ClangBuiltin<"__builtin_msa_pckod_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_pckod_h : GCCBuiltin<"__builtin_msa_pckod_h">,
+def int_mips_pckod_h : ClangBuiltin<"__builtin_msa_pckod_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_pckod_w : GCCBuiltin<"__builtin_msa_pckod_w">,
+def int_mips_pckod_w : ClangBuiltin<"__builtin_msa_pckod_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_pckod_d : GCCBuiltin<"__builtin_msa_pckod_d">,
+def int_mips_pckod_d : ClangBuiltin<"__builtin_msa_pckod_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_pcnt_b : GCCBuiltin<"__builtin_msa_pcnt_b">,
+def int_mips_pcnt_b : ClangBuiltin<"__builtin_msa_pcnt_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_pcnt_h : GCCBuiltin<"__builtin_msa_pcnt_h">,
+def int_mips_pcnt_h : ClangBuiltin<"__builtin_msa_pcnt_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_pcnt_w : GCCBuiltin<"__builtin_msa_pcnt_w">,
+def int_mips_pcnt_w : ClangBuiltin<"__builtin_msa_pcnt_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_pcnt_d : GCCBuiltin<"__builtin_msa_pcnt_d">,
+def int_mips_pcnt_d : ClangBuiltin<"__builtin_msa_pcnt_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_sat_s_b : GCCBuiltin<"__builtin_msa_sat_s_b">,
+def int_mips_sat_s_b : ClangBuiltin<"__builtin_msa_sat_s_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_sat_s_h : GCCBuiltin<"__builtin_msa_sat_s_h">,
+def int_mips_sat_s_h : ClangBuiltin<"__builtin_msa_sat_s_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_sat_s_w : GCCBuiltin<"__builtin_msa_sat_s_w">,
+def int_mips_sat_s_w : ClangBuiltin<"__builtin_msa_sat_s_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_sat_s_d : GCCBuiltin<"__builtin_msa_sat_s_d">,
+def int_mips_sat_s_d : ClangBuiltin<"__builtin_msa_sat_s_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_sat_u_b : GCCBuiltin<"__builtin_msa_sat_u_b">,
+def int_mips_sat_u_b : ClangBuiltin<"__builtin_msa_sat_u_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_sat_u_h : GCCBuiltin<"__builtin_msa_sat_u_h">,
+def int_mips_sat_u_h : ClangBuiltin<"__builtin_msa_sat_u_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_sat_u_w : GCCBuiltin<"__builtin_msa_sat_u_w">,
+def int_mips_sat_u_w : ClangBuiltin<"__builtin_msa_sat_u_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_sat_u_d : GCCBuiltin<"__builtin_msa_sat_u_d">,
+def int_mips_sat_u_d : ClangBuiltin<"__builtin_msa_sat_u_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_shf_b : GCCBuiltin<"__builtin_msa_shf_b">,
+def int_mips_shf_b : ClangBuiltin<"__builtin_msa_shf_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_shf_h : GCCBuiltin<"__builtin_msa_shf_h">,
+def int_mips_shf_h : ClangBuiltin<"__builtin_msa_shf_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_shf_w : GCCBuiltin<"__builtin_msa_shf_w">,
+def int_mips_shf_w : ClangBuiltin<"__builtin_msa_shf_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_sld_b : GCCBuiltin<"__builtin_msa_sld_b">,
+def int_mips_sld_b : ClangBuiltin<"__builtin_msa_sld_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_mips_sld_h : GCCBuiltin<"__builtin_msa_sld_h">,
+def int_mips_sld_h : ClangBuiltin<"__builtin_msa_sld_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_mips_sld_w : GCCBuiltin<"__builtin_msa_sld_w">,
+def int_mips_sld_w : ClangBuiltin<"__builtin_msa_sld_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_mips_sld_d : GCCBuiltin<"__builtin_msa_sld_d">,
+def int_mips_sld_d : ClangBuiltin<"__builtin_msa_sld_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_mips_sldi_b : GCCBuiltin<"__builtin_msa_sldi_b">,
+def int_mips_sldi_b : ClangBuiltin<"__builtin_msa_sldi_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
-def int_mips_sldi_h : GCCBuiltin<"__builtin_msa_sldi_h">,
+def int_mips_sldi_h : ClangBuiltin<"__builtin_msa_sldi_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
-def int_mips_sldi_w : GCCBuiltin<"__builtin_msa_sldi_w">,
+def int_mips_sldi_w : ClangBuiltin<"__builtin_msa_sldi_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
-def int_mips_sldi_d : GCCBuiltin<"__builtin_msa_sldi_d">,
+def int_mips_sldi_d : ClangBuiltin<"__builtin_msa_sldi_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
-def int_mips_sll_b : GCCBuiltin<"__builtin_msa_sll_b">,
+def int_mips_sll_b : ClangBuiltin<"__builtin_msa_sll_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_sll_h : GCCBuiltin<"__builtin_msa_sll_h">,
+def int_mips_sll_h : ClangBuiltin<"__builtin_msa_sll_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_sll_w : GCCBuiltin<"__builtin_msa_sll_w">,
+def int_mips_sll_w : ClangBuiltin<"__builtin_msa_sll_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_sll_d : GCCBuiltin<"__builtin_msa_sll_d">,
+def int_mips_sll_d : ClangBuiltin<"__builtin_msa_sll_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_slli_b : GCCBuiltin<"__builtin_msa_slli_b">,
+def int_mips_slli_b : ClangBuiltin<"__builtin_msa_slli_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_slli_h : GCCBuiltin<"__builtin_msa_slli_h">,
+def int_mips_slli_h : ClangBuiltin<"__builtin_msa_slli_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_slli_w : GCCBuiltin<"__builtin_msa_slli_w">,
+def int_mips_slli_w : ClangBuiltin<"__builtin_msa_slli_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_slli_d : GCCBuiltin<"__builtin_msa_slli_d">,
+def int_mips_slli_d : ClangBuiltin<"__builtin_msa_slli_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_splat_b : GCCBuiltin<"__builtin_msa_splat_b">,
+def int_mips_splat_b : ClangBuiltin<"__builtin_msa_splat_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_mips_splat_h : GCCBuiltin<"__builtin_msa_splat_h">,
+def int_mips_splat_h : ClangBuiltin<"__builtin_msa_splat_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_mips_splat_w : GCCBuiltin<"__builtin_msa_splat_w">,
+def int_mips_splat_w : ClangBuiltin<"__builtin_msa_splat_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_mips_splat_d : GCCBuiltin<"__builtin_msa_splat_d">,
+def int_mips_splat_d : ClangBuiltin<"__builtin_msa_splat_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_mips_splati_b : GCCBuiltin<"__builtin_msa_splati_b">,
+def int_mips_splati_b : ClangBuiltin<"__builtin_msa_splati_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_splati_h : GCCBuiltin<"__builtin_msa_splati_h">,
+def int_mips_splati_h : ClangBuiltin<"__builtin_msa_splati_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_splati_w : GCCBuiltin<"__builtin_msa_splati_w">,
+def int_mips_splati_w : ClangBuiltin<"__builtin_msa_splati_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_splati_d : GCCBuiltin<"__builtin_msa_splati_d">,
+def int_mips_splati_d : ClangBuiltin<"__builtin_msa_splati_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_sra_b : GCCBuiltin<"__builtin_msa_sra_b">,
+def int_mips_sra_b : ClangBuiltin<"__builtin_msa_sra_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_sra_h : GCCBuiltin<"__builtin_msa_sra_h">,
+def int_mips_sra_h : ClangBuiltin<"__builtin_msa_sra_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_sra_w : GCCBuiltin<"__builtin_msa_sra_w">,
+def int_mips_sra_w : ClangBuiltin<"__builtin_msa_sra_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_sra_d : GCCBuiltin<"__builtin_msa_sra_d">,
+def int_mips_sra_d : ClangBuiltin<"__builtin_msa_sra_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_srai_b : GCCBuiltin<"__builtin_msa_srai_b">,
+def int_mips_srai_b : ClangBuiltin<"__builtin_msa_srai_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_srai_h : GCCBuiltin<"__builtin_msa_srai_h">,
+def int_mips_srai_h : ClangBuiltin<"__builtin_msa_srai_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_srai_w : GCCBuiltin<"__builtin_msa_srai_w">,
+def int_mips_srai_w : ClangBuiltin<"__builtin_msa_srai_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_srai_d : GCCBuiltin<"__builtin_msa_srai_d">,
+def int_mips_srai_d : ClangBuiltin<"__builtin_msa_srai_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_srar_b : GCCBuiltin<"__builtin_msa_srar_b">,
+def int_mips_srar_b : ClangBuiltin<"__builtin_msa_srar_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_srar_h : GCCBuiltin<"__builtin_msa_srar_h">,
+def int_mips_srar_h : ClangBuiltin<"__builtin_msa_srar_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_srar_w : GCCBuiltin<"__builtin_msa_srar_w">,
+def int_mips_srar_w : ClangBuiltin<"__builtin_msa_srar_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_srar_d : GCCBuiltin<"__builtin_msa_srar_d">,
+def int_mips_srar_d : ClangBuiltin<"__builtin_msa_srar_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_srari_b : GCCBuiltin<"__builtin_msa_srari_b">,
+def int_mips_srari_b : ClangBuiltin<"__builtin_msa_srari_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_srari_h : GCCBuiltin<"__builtin_msa_srari_h">,
+def int_mips_srari_h : ClangBuiltin<"__builtin_msa_srari_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_srari_w : GCCBuiltin<"__builtin_msa_srari_w">,
+def int_mips_srari_w : ClangBuiltin<"__builtin_msa_srari_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_srari_d : GCCBuiltin<"__builtin_msa_srari_d">,
+def int_mips_srari_d : ClangBuiltin<"__builtin_msa_srari_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_srl_b : GCCBuiltin<"__builtin_msa_srl_b">,
+def int_mips_srl_b : ClangBuiltin<"__builtin_msa_srl_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_srl_h : GCCBuiltin<"__builtin_msa_srl_h">,
+def int_mips_srl_h : ClangBuiltin<"__builtin_msa_srl_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_srl_w : GCCBuiltin<"__builtin_msa_srl_w">,
+def int_mips_srl_w : ClangBuiltin<"__builtin_msa_srl_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_srl_d : GCCBuiltin<"__builtin_msa_srl_d">,
+def int_mips_srl_d : ClangBuiltin<"__builtin_msa_srl_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_srli_b : GCCBuiltin<"__builtin_msa_srli_b">,
+def int_mips_srli_b : ClangBuiltin<"__builtin_msa_srli_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_srli_h : GCCBuiltin<"__builtin_msa_srli_h">,
+def int_mips_srli_h : ClangBuiltin<"__builtin_msa_srli_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_srli_w : GCCBuiltin<"__builtin_msa_srli_w">,
+def int_mips_srli_w : ClangBuiltin<"__builtin_msa_srli_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_srli_d : GCCBuiltin<"__builtin_msa_srli_d">,
+def int_mips_srli_d : ClangBuiltin<"__builtin_msa_srli_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_srlr_b : GCCBuiltin<"__builtin_msa_srlr_b">,
+def int_mips_srlr_b : ClangBuiltin<"__builtin_msa_srlr_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_srlr_h : GCCBuiltin<"__builtin_msa_srlr_h">,
+def int_mips_srlr_h : ClangBuiltin<"__builtin_msa_srlr_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_srlr_w : GCCBuiltin<"__builtin_msa_srlr_w">,
+def int_mips_srlr_w : ClangBuiltin<"__builtin_msa_srlr_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_srlr_d : GCCBuiltin<"__builtin_msa_srlr_d">,
+def int_mips_srlr_d : ClangBuiltin<"__builtin_msa_srlr_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_srlri_b : GCCBuiltin<"__builtin_msa_srlri_b">,
+def int_mips_srlri_b : ClangBuiltin<"__builtin_msa_srlri_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_srlri_h : GCCBuiltin<"__builtin_msa_srlri_h">,
+def int_mips_srlri_h : ClangBuiltin<"__builtin_msa_srlri_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_srlri_w : GCCBuiltin<"__builtin_msa_srlri_w">,
+def int_mips_srlri_w : ClangBuiltin<"__builtin_msa_srlri_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_srlri_d : GCCBuiltin<"__builtin_msa_srlri_d">,
+def int_mips_srlri_d : ClangBuiltin<"__builtin_msa_srlri_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_st_b : GCCBuiltin<"__builtin_msa_st_b">,
+def int_mips_st_b : ClangBuiltin<"__builtin_msa_st_b">,
Intrinsic<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i32_ty],
[IntrArgMemOnly]>;
-def int_mips_st_h : GCCBuiltin<"__builtin_msa_st_h">,
+def int_mips_st_h : ClangBuiltin<"__builtin_msa_st_h">,
Intrinsic<[], [llvm_v8i16_ty, llvm_ptr_ty, llvm_i32_ty],
[IntrArgMemOnly]>;
-def int_mips_st_w : GCCBuiltin<"__builtin_msa_st_w">,
+def int_mips_st_w : ClangBuiltin<"__builtin_msa_st_w">,
Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty, llvm_i32_ty],
[IntrArgMemOnly]>;
-def int_mips_st_d : GCCBuiltin<"__builtin_msa_st_d">,
+def int_mips_st_d : ClangBuiltin<"__builtin_msa_st_d">,
Intrinsic<[], [llvm_v2i64_ty, llvm_ptr_ty, llvm_i32_ty],
[IntrArgMemOnly]>;
-def int_mips_str_d : GCCBuiltin<"__builtin_msa_str_d">,
+def int_mips_str_d : ClangBuiltin<"__builtin_msa_str_d">,
Intrinsic<[], [llvm_v2i64_ty, llvm_ptr_ty, llvm_i32_ty],
[IntrArgMemOnly]>;
-def int_mips_str_w : GCCBuiltin<"__builtin_msa_str_w">,
+def int_mips_str_w : ClangBuiltin<"__builtin_msa_str_w">,
Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty, llvm_i32_ty],
[IntrArgMemOnly]>;
-def int_mips_subs_s_b : GCCBuiltin<"__builtin_msa_subs_s_b">,
+def int_mips_subs_s_b : ClangBuiltin<"__builtin_msa_subs_s_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_subs_s_h : GCCBuiltin<"__builtin_msa_subs_s_h">,
+def int_mips_subs_s_h : ClangBuiltin<"__builtin_msa_subs_s_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_subs_s_w : GCCBuiltin<"__builtin_msa_subs_s_w">,
+def int_mips_subs_s_w : ClangBuiltin<"__builtin_msa_subs_s_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_subs_s_d : GCCBuiltin<"__builtin_msa_subs_s_d">,
+def int_mips_subs_s_d : ClangBuiltin<"__builtin_msa_subs_s_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_subs_u_b : GCCBuiltin<"__builtin_msa_subs_u_b">,
+def int_mips_subs_u_b : ClangBuiltin<"__builtin_msa_subs_u_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_subs_u_h : GCCBuiltin<"__builtin_msa_subs_u_h">,
+def int_mips_subs_u_h : ClangBuiltin<"__builtin_msa_subs_u_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_subs_u_w : GCCBuiltin<"__builtin_msa_subs_u_w">,
+def int_mips_subs_u_w : ClangBuiltin<"__builtin_msa_subs_u_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_subs_u_d : GCCBuiltin<"__builtin_msa_subs_u_d">,
+def int_mips_subs_u_d : ClangBuiltin<"__builtin_msa_subs_u_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_subsus_u_b : GCCBuiltin<"__builtin_msa_subsus_u_b">,
+def int_mips_subsus_u_b : ClangBuiltin<"__builtin_msa_subsus_u_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_subsus_u_h : GCCBuiltin<"__builtin_msa_subsus_u_h">,
+def int_mips_subsus_u_h : ClangBuiltin<"__builtin_msa_subsus_u_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_subsus_u_w : GCCBuiltin<"__builtin_msa_subsus_u_w">,
+def int_mips_subsus_u_w : ClangBuiltin<"__builtin_msa_subsus_u_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_subsus_u_d : GCCBuiltin<"__builtin_msa_subsus_u_d">,
+def int_mips_subsus_u_d : ClangBuiltin<"__builtin_msa_subsus_u_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_subsuu_s_b : GCCBuiltin<"__builtin_msa_subsuu_s_b">,
+def int_mips_subsuu_s_b : ClangBuiltin<"__builtin_msa_subsuu_s_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_subsuu_s_h : GCCBuiltin<"__builtin_msa_subsuu_s_h">,
+def int_mips_subsuu_s_h : ClangBuiltin<"__builtin_msa_subsuu_s_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_subsuu_s_w : GCCBuiltin<"__builtin_msa_subsuu_s_w">,
+def int_mips_subsuu_s_w : ClangBuiltin<"__builtin_msa_subsuu_s_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_subsuu_s_d : GCCBuiltin<"__builtin_msa_subsuu_s_d">,
+def int_mips_subsuu_s_d : ClangBuiltin<"__builtin_msa_subsuu_s_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_subv_b : GCCBuiltin<"__builtin_msa_subv_b">,
+def int_mips_subv_b : ClangBuiltin<"__builtin_msa_subv_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_subv_h : GCCBuiltin<"__builtin_msa_subv_h">,
+def int_mips_subv_h : ClangBuiltin<"__builtin_msa_subv_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-def int_mips_subv_w : GCCBuiltin<"__builtin_msa_subv_w">,
+def int_mips_subv_w : ClangBuiltin<"__builtin_msa_subv_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-def int_mips_subv_d : GCCBuiltin<"__builtin_msa_subv_d">,
+def int_mips_subv_d : ClangBuiltin<"__builtin_msa_subv_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
-def int_mips_subvi_b : GCCBuiltin<"__builtin_msa_subvi_b">,
+def int_mips_subvi_b : ClangBuiltin<"__builtin_msa_subvi_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_subvi_h : GCCBuiltin<"__builtin_msa_subvi_h">,
+def int_mips_subvi_h : ClangBuiltin<"__builtin_msa_subvi_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_subvi_w : GCCBuiltin<"__builtin_msa_subvi_w">,
+def int_mips_subvi_w : ClangBuiltin<"__builtin_msa_subvi_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_subvi_d : GCCBuiltin<"__builtin_msa_subvi_d">,
+def int_mips_subvi_d : ClangBuiltin<"__builtin_msa_subvi_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
-def int_mips_vshf_b : GCCBuiltin<"__builtin_msa_vshf_b">,
+def int_mips_vshf_b : ClangBuiltin<"__builtin_msa_vshf_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
-def int_mips_vshf_h : GCCBuiltin<"__builtin_msa_vshf_h">,
+def int_mips_vshf_h : ClangBuiltin<"__builtin_msa_vshf_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
-def int_mips_vshf_w : GCCBuiltin<"__builtin_msa_vshf_w">,
+def int_mips_vshf_w : ClangBuiltin<"__builtin_msa_vshf_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
-def int_mips_vshf_d : GCCBuiltin<"__builtin_msa_vshf_d">,
+def int_mips_vshf_d : ClangBuiltin<"__builtin_msa_vshf_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
-def int_mips_xor_v : GCCBuiltin<"__builtin_msa_xor_v">,
+def int_mips_xor_v : ClangBuiltin<"__builtin_msa_xor_v">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
-def int_mips_xori_b : GCCBuiltin<"__builtin_msa_xori_b">,
+def int_mips_xori_b : ClangBuiltin<"__builtin_msa_xori_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
}
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 41b28db56c75..9c3813128364 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -556,95 +556,124 @@ class SHFL_INFO<bit sync, string mode, string type, bit return_pred> {
}
let TargetPrefix = "nvvm" in {
- def int_nvvm_prmt : GCCBuiltin<"__nvvm_prmt">,
+ def int_nvvm_prmt : ClangBuiltin<"__nvvm_prmt">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, Commutative]>;
+ [IntrNoMem, IntrSpeculatable]>;
//
// Min Max
//
- def int_nvvm_fmin_f : GCCBuiltin<"__nvvm_fmin_f">,
- DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, IntrSpeculatable, Commutative]>;
- def int_nvvm_fmin_ftz_f : GCCBuiltin<"__nvvm_fmin_ftz_f">,
- DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ foreach operation = ["min", "max"] in {
+ def int_nvvm_f # operation # _d :
+ ClangBuiltin<!strconcat("__nvvm_f", operation, "_d")>,
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
[IntrNoMem, IntrSpeculatable, Commutative]>;
- def int_nvvm_fmax_f : GCCBuiltin<"__nvvm_fmax_f">,
- DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty]
- , [IntrNoMem, IntrSpeculatable, Commutative]>;
- def int_nvvm_fmax_ftz_f : GCCBuiltin<"__nvvm_fmax_ftz_f">,
- DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, IntrSpeculatable, Commutative]>;
+ foreach variant = ["_f", "_ftz_f", "_nan_f", "_ftz_nan_f",
+ "_xorsign_abs_f", "_ftz_xorsign_abs_f", "_nan_xorsign_abs_f",
+ "_ftz_nan_xorsign_abs_f"] in {
+ def int_nvvm_f # operation # variant :
+ ClangBuiltin<!strconcat("__nvvm_f", operation, variant)>,
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
+ }
- def int_nvvm_fmin_d : GCCBuiltin<"__nvvm_fmin_d">,
- DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
- [IntrNoMem, IntrSpeculatable, Commutative]>;
- def int_nvvm_fmax_d : GCCBuiltin<"__nvvm_fmax_d">,
- DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
- [IntrNoMem, IntrSpeculatable, Commutative]>;
+ foreach variant = ["_f16", "_ftz_f16", "_nan_f16", "_ftz_nan_f16",
+ "_xorsign_abs_f16", "_ftz_xorsign_abs_f16", "_nan_xorsign_abs_f16",
+ "_ftz_nan_xorsign_abs_f16"] in {
+ def int_nvvm_f # operation # variant :
+ ClangBuiltin<!strconcat("__nvvm_f", operation, variant)>,
+ DefaultAttrsIntrinsic<[llvm_half_ty], [llvm_half_ty, llvm_half_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
+ }
+
+ foreach variant = ["_f16x2", "_ftz_f16x2", "_nan_f16x2",
+ "_ftz_nan_f16x2", "_xorsign_abs_f16x2", "_ftz_xorsign_abs_f16x2",
+ "_nan_xorsign_abs_f16x2", "_ftz_nan_xorsign_abs_f16x2"] in {
+ def int_nvvm_f # operation # variant :
+ ClangBuiltin<!strconcat("__nvvm_f", operation, variant)>,
+ DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_v2f16_ty, llvm_v2f16_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
+ }
+
+ foreach variant = ["_bf16", "_nan_bf16", "_xorsign_abs_bf16",
+ "_nan_xorsign_abs_bf16"] in {
+ def int_nvvm_f # operation # variant :
+ ClangBuiltin<!strconcat("__nvvm_f", operation, variant)>,
+ DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
+ }
+
+ foreach variant = ["_bf16x2", "_nan_bf16x2", "_xorsign_abs_bf16x2",
+ "_nan_xorsign_abs_bf16x2"] in {
+ def int_nvvm_f # operation # variant :
+ ClangBuiltin<!strconcat("__nvvm_f", operation, variant)>,
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
+ }
+ }
//
// Multiplication
//
- def int_nvvm_mulhi_i : GCCBuiltin<"__nvvm_mulhi_i">,
+ def int_nvvm_mulhi_i : ClangBuiltin<"__nvvm_mulhi_i">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable, Commutative]>;
- def int_nvvm_mulhi_ui : GCCBuiltin<"__nvvm_mulhi_ui">,
+ def int_nvvm_mulhi_ui : ClangBuiltin<"__nvvm_mulhi_ui">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable, Commutative]>;
- def int_nvvm_mulhi_ll : GCCBuiltin<"__nvvm_mulhi_ll">,
+ def int_nvvm_mulhi_ll : ClangBuiltin<"__nvvm_mulhi_ll">,
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
[IntrNoMem, IntrSpeculatable, Commutative]>;
- def int_nvvm_mulhi_ull : GCCBuiltin<"__nvvm_mulhi_ull">,
+ def int_nvvm_mulhi_ull : ClangBuiltin<"__nvvm_mulhi_ull">,
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
[IntrNoMem, IntrSpeculatable, Commutative]>;
- def int_nvvm_mul_rn_ftz_f : GCCBuiltin<"__nvvm_mul_rn_ftz_f">,
+ def int_nvvm_mul_rn_ftz_f : ClangBuiltin<"__nvvm_mul_rn_ftz_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, IntrSpeculatable, Commutative]>;
- def int_nvvm_mul_rn_f : GCCBuiltin<"__nvvm_mul_rn_f">,
+ def int_nvvm_mul_rn_f : ClangBuiltin<"__nvvm_mul_rn_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, IntrSpeculatable, Commutative]>;
- def int_nvvm_mul_rz_ftz_f : GCCBuiltin<"__nvvm_mul_rz_ftz_f">,
+ def int_nvvm_mul_rz_ftz_f : ClangBuiltin<"__nvvm_mul_rz_ftz_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, IntrSpeculatable, Commutative]>;
- def int_nvvm_mul_rz_f : GCCBuiltin<"__nvvm_mul_rz_f">,
+ def int_nvvm_mul_rz_f : ClangBuiltin<"__nvvm_mul_rz_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, IntrSpeculatable, Commutative]>;
- def int_nvvm_mul_rm_ftz_f : GCCBuiltin<"__nvvm_mul_rm_ftz_f">,
+ def int_nvvm_mul_rm_ftz_f : ClangBuiltin<"__nvvm_mul_rm_ftz_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, IntrSpeculatable, Commutative]>;
- def int_nvvm_mul_rm_f : GCCBuiltin<"__nvvm_mul_rm_f">,
+ def int_nvvm_mul_rm_f : ClangBuiltin<"__nvvm_mul_rm_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, IntrSpeculatable, Commutative]>;
- def int_nvvm_mul_rp_ftz_f : GCCBuiltin<"__nvvm_mul_rp_ftz_f">,
+ def int_nvvm_mul_rp_ftz_f : ClangBuiltin<"__nvvm_mul_rp_ftz_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, IntrSpeculatable, Commutative]>;
- def int_nvvm_mul_rp_f : GCCBuiltin<"__nvvm_mul_rp_f">,
+ def int_nvvm_mul_rp_f : ClangBuiltin<"__nvvm_mul_rp_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, IntrSpeculatable, Commutative]>;
- def int_nvvm_mul_rn_d : GCCBuiltin<"__nvvm_mul_rn_d">,
+ def int_nvvm_mul_rn_d : ClangBuiltin<"__nvvm_mul_rn_d">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
[IntrNoMem, IntrSpeculatable, Commutative]>;
- def int_nvvm_mul_rz_d : GCCBuiltin<"__nvvm_mul_rz_d">,
+ def int_nvvm_mul_rz_d : ClangBuiltin<"__nvvm_mul_rz_d">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
[IntrNoMem, IntrSpeculatable, Commutative]>;
- def int_nvvm_mul_rm_d : GCCBuiltin<"__nvvm_mul_rm_d">,
+ def int_nvvm_mul_rm_d : ClangBuiltin<"__nvvm_mul_rm_d">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
[IntrNoMem, IntrSpeculatable, Commutative]>;
- def int_nvvm_mul_rp_d : GCCBuiltin<"__nvvm_mul_rp_d">,
+ def int_nvvm_mul_rp_d : ClangBuiltin<"__nvvm_mul_rp_d">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
[IntrNoMem, IntrSpeculatable, Commutative]>;
- def int_nvvm_mul24_i : GCCBuiltin<"__nvvm_mul24_i">,
+ def int_nvvm_mul24_i : ClangBuiltin<"__nvvm_mul24_i">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable, Commutative]>;
- def int_nvvm_mul24_ui : GCCBuiltin<"__nvvm_mul24_ui">,
+ def int_nvvm_mul24_ui : ClangBuiltin<"__nvvm_mul24_ui">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable, Commutative]>;
@@ -652,51 +681,51 @@ let TargetPrefix = "nvvm" in {
// Div
//
- def int_nvvm_div_approx_ftz_f : GCCBuiltin<"__nvvm_div_approx_ftz_f">,
+ def int_nvvm_div_approx_ftz_f : ClangBuiltin<"__nvvm_div_approx_ftz_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem]>;
- def int_nvvm_div_approx_f : GCCBuiltin<"__nvvm_div_approx_f">,
+ def int_nvvm_div_approx_f : ClangBuiltin<"__nvvm_div_approx_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem]>;
- def int_nvvm_div_rn_ftz_f : GCCBuiltin<"__nvvm_div_rn_ftz_f">,
+ def int_nvvm_div_rn_ftz_f : ClangBuiltin<"__nvvm_div_rn_ftz_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem]>;
- def int_nvvm_div_rn_f : GCCBuiltin<"__nvvm_div_rn_f">,
+ def int_nvvm_div_rn_f : ClangBuiltin<"__nvvm_div_rn_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem]>;
- def int_nvvm_div_rz_ftz_f : GCCBuiltin<"__nvvm_div_rz_ftz_f">,
+ def int_nvvm_div_rz_ftz_f : ClangBuiltin<"__nvvm_div_rz_ftz_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem]>;
- def int_nvvm_div_rz_f : GCCBuiltin<"__nvvm_div_rz_f">,
+ def int_nvvm_div_rz_f : ClangBuiltin<"__nvvm_div_rz_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem]>;
- def int_nvvm_div_rm_ftz_f : GCCBuiltin<"__nvvm_div_rm_ftz_f">,
+ def int_nvvm_div_rm_ftz_f : ClangBuiltin<"__nvvm_div_rm_ftz_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem]>;
- def int_nvvm_div_rm_f : GCCBuiltin<"__nvvm_div_rm_f">,
+ def int_nvvm_div_rm_f : ClangBuiltin<"__nvvm_div_rm_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem]>;
- def int_nvvm_div_rp_ftz_f : GCCBuiltin<"__nvvm_div_rp_ftz_f">,
+ def int_nvvm_div_rp_ftz_f : ClangBuiltin<"__nvvm_div_rp_ftz_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem]>;
- def int_nvvm_div_rp_f : GCCBuiltin<"__nvvm_div_rp_f">,
+ def int_nvvm_div_rp_f : ClangBuiltin<"__nvvm_div_rp_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem]>;
- def int_nvvm_div_rn_d : GCCBuiltin<"__nvvm_div_rn_d">,
+ def int_nvvm_div_rn_d : ClangBuiltin<"__nvvm_div_rn_d">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
[IntrNoMem]>;
- def int_nvvm_div_rz_d : GCCBuiltin<"__nvvm_div_rz_d">,
+ def int_nvvm_div_rz_d : ClangBuiltin<"__nvvm_div_rz_d">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
[IntrNoMem]>;
- def int_nvvm_div_rm_d : GCCBuiltin<"__nvvm_div_rm_d">,
+ def int_nvvm_div_rm_d : ClangBuiltin<"__nvvm_div_rm_d">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
[IntrNoMem]>;
- def int_nvvm_div_rp_d : GCCBuiltin<"__nvvm_div_rp_d">,
+ def int_nvvm_div_rp_d : ClangBuiltin<"__nvvm_div_rp_d">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
[IntrNoMem]>;
@@ -704,10 +733,10 @@ let TargetPrefix = "nvvm" in {
// Sad
//
- def int_nvvm_sad_i : GCCBuiltin<"__nvvm_sad_i">,
+ def int_nvvm_sad_i : ClangBuiltin<"__nvvm_sad_i">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, Commutative]>;
- def int_nvvm_sad_ui : GCCBuiltin<"__nvvm_sad_ui">,
+ def int_nvvm_sad_ui : ClangBuiltin<"__nvvm_sad_ui">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, Commutative]>;
@@ -715,264 +744,286 @@ let TargetPrefix = "nvvm" in {
// Floor Ceil
//
- def int_nvvm_floor_ftz_f : GCCBuiltin<"__nvvm_floor_ftz_f">,
+ def int_nvvm_floor_ftz_f : ClangBuiltin<"__nvvm_floor_ftz_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_floor_f : GCCBuiltin<"__nvvm_floor_f">,
+ def int_nvvm_floor_f : ClangBuiltin<"__nvvm_floor_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_floor_d : GCCBuiltin<"__nvvm_floor_d">,
+ def int_nvvm_floor_d : ClangBuiltin<"__nvvm_floor_d">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_ceil_ftz_f : GCCBuiltin<"__nvvm_ceil_ftz_f">,
+ def int_nvvm_ceil_ftz_f : ClangBuiltin<"__nvvm_ceil_ftz_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_ceil_f : GCCBuiltin<"__nvvm_ceil_f">,
+ def int_nvvm_ceil_f : ClangBuiltin<"__nvvm_ceil_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_ceil_d : GCCBuiltin<"__nvvm_ceil_d">,
+ def int_nvvm_ceil_d : ClangBuiltin<"__nvvm_ceil_d">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
//
// Abs
//
- def int_nvvm_fabs_ftz_f : GCCBuiltin<"__nvvm_fabs_ftz_f">,
+ def int_nvvm_fabs_ftz_f : ClangBuiltin<"__nvvm_fabs_ftz_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_fabs_f : GCCBuiltin<"__nvvm_fabs_f">,
+ def int_nvvm_fabs_f : ClangBuiltin<"__nvvm_fabs_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_fabs_d : GCCBuiltin<"__nvvm_fabs_d">,
+ def int_nvvm_fabs_d : ClangBuiltin<"__nvvm_fabs_d">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
//
+// Abs, Neg bf16, bf16x2
+//
+
+ foreach unary = ["abs", "neg"] in {
+ def int_nvvm_ # unary # _bf16 :
+ ClangBuiltin<!strconcat("__nvvm_", unary, "_bf16")>,
+ DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i16_ty], [IntrNoMem]>;
+ def int_nvvm_ # unary # _bf16x2 :
+ ClangBuiltin<!strconcat("__nvvm_", unary, "_bf16x2")>,
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
+ }
+
+//
// Round
//
- def int_nvvm_round_ftz_f : GCCBuiltin<"__nvvm_round_ftz_f">,
+ def int_nvvm_round_ftz_f : ClangBuiltin<"__nvvm_round_ftz_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_round_f : GCCBuiltin<"__nvvm_round_f">,
+ def int_nvvm_round_f : ClangBuiltin<"__nvvm_round_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_round_d : GCCBuiltin<"__nvvm_round_d">,
+ def int_nvvm_round_d : ClangBuiltin<"__nvvm_round_d">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
//
// Trunc
//
- def int_nvvm_trunc_ftz_f : GCCBuiltin<"__nvvm_trunc_ftz_f">,
+ def int_nvvm_trunc_ftz_f : ClangBuiltin<"__nvvm_trunc_ftz_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_trunc_f : GCCBuiltin<"__nvvm_trunc_f">,
+ def int_nvvm_trunc_f : ClangBuiltin<"__nvvm_trunc_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_trunc_d : GCCBuiltin<"__nvvm_trunc_d">,
+ def int_nvvm_trunc_d : ClangBuiltin<"__nvvm_trunc_d">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
//
// Saturate
//
- def int_nvvm_saturate_ftz_f : GCCBuiltin<"__nvvm_saturate_ftz_f">,
+ def int_nvvm_saturate_ftz_f : ClangBuiltin<"__nvvm_saturate_ftz_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_saturate_f : GCCBuiltin<"__nvvm_saturate_f">,
+ def int_nvvm_saturate_f : ClangBuiltin<"__nvvm_saturate_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_saturate_d : GCCBuiltin<"__nvvm_saturate_d">,
+ def int_nvvm_saturate_d : ClangBuiltin<"__nvvm_saturate_d">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
//
// Exp2 Log2
//
- def int_nvvm_ex2_approx_ftz_f : GCCBuiltin<"__nvvm_ex2_approx_ftz_f">,
+ def int_nvvm_ex2_approx_ftz_f : ClangBuiltin<"__nvvm_ex2_approx_ftz_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_nvvm_ex2_approx_f : GCCBuiltin<"__nvvm_ex2_approx_f">,
+ def int_nvvm_ex2_approx_f : ClangBuiltin<"__nvvm_ex2_approx_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_nvvm_ex2_approx_d : GCCBuiltin<"__nvvm_ex2_approx_d">,
+ def int_nvvm_ex2_approx_d : ClangBuiltin<"__nvvm_ex2_approx_d">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ def int_nvvm_ex2_approx_f16 : ClangBuiltin<"__nvvm_ex2_approx_f16">,
+ DefaultAttrsIntrinsic<[llvm_half_ty], [llvm_half_ty], [IntrNoMem]>;
+ def int_nvvm_ex2_approx_f16x2 : ClangBuiltin<"__nvvm_ex2_approx_f16x2">,
+ DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_v2f16_ty], [IntrNoMem]>;
- def int_nvvm_lg2_approx_ftz_f : GCCBuiltin<"__nvvm_lg2_approx_ftz_f">,
+ def int_nvvm_lg2_approx_ftz_f : ClangBuiltin<"__nvvm_lg2_approx_ftz_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_nvvm_lg2_approx_f : GCCBuiltin<"__nvvm_lg2_approx_f">,
+ def int_nvvm_lg2_approx_f : ClangBuiltin<"__nvvm_lg2_approx_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_nvvm_lg2_approx_d : GCCBuiltin<"__nvvm_lg2_approx_d">,
+ def int_nvvm_lg2_approx_d : ClangBuiltin<"__nvvm_lg2_approx_d">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
//
// Sin Cos
//
- def int_nvvm_sin_approx_ftz_f : GCCBuiltin<"__nvvm_sin_approx_ftz_f">,
+ def int_nvvm_sin_approx_ftz_f : ClangBuiltin<"__nvvm_sin_approx_ftz_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_nvvm_sin_approx_f : GCCBuiltin<"__nvvm_sin_approx_f">,
+ def int_nvvm_sin_approx_f : ClangBuiltin<"__nvvm_sin_approx_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_nvvm_cos_approx_ftz_f : GCCBuiltin<"__nvvm_cos_approx_ftz_f">,
+ def int_nvvm_cos_approx_ftz_f : ClangBuiltin<"__nvvm_cos_approx_ftz_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_nvvm_cos_approx_f : GCCBuiltin<"__nvvm_cos_approx_f">,
+ def int_nvvm_cos_approx_f : ClangBuiltin<"__nvvm_cos_approx_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
//
// Fma
//
- def int_nvvm_fma_rn_ftz_f : GCCBuiltin<"__nvvm_fma_rn_ftz_f">,
- DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
- [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_fma_rn_f : GCCBuiltin<"__nvvm_fma_rn_f">,
- DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
- [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_fma_rz_ftz_f : GCCBuiltin<"__nvvm_fma_rz_ftz_f">,
- DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
- [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_fma_rz_f : GCCBuiltin<"__nvvm_fma_rz_f">,
- DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
- [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_fma_rm_ftz_f : GCCBuiltin<"__nvvm_fma_rm_ftz_f">,
- DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
- [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_fma_rm_f : GCCBuiltin<"__nvvm_fma_rm_f">,
- DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
- [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_fma_rp_ftz_f : GCCBuiltin<"__nvvm_fma_rp_ftz_f">,
- DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
- [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_fma_rp_f : GCCBuiltin<"__nvvm_fma_rp_f">,
- DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+ foreach variant = ["_rn_f16", "_rn_ftz_f16", "_rn_sat_f16",
+ "_rn_ftz_sat_f16", "_rn_relu_f16", "_rn_ftz_relu_f16"] in {
+ def int_nvvm_fma # variant : ClangBuiltin<!strconcat("__nvvm_fma", variant)>,
+ DefaultAttrsIntrinsic<[llvm_half_ty],
+ [llvm_half_ty, llvm_half_ty, llvm_half_ty],
+ [IntrNoMem, IntrSpeculatable]>;
+ }
+
+ foreach variant = ["_rn_f16x2", "_rn_ftz_f16x2", "_rn_sat_f16x2",
+ "_rn_ftz_sat_f16x2", "_rn_relu_f16x2", "_rn_ftz_relu_f16x2"] in {
+ def int_nvvm_fma # variant : ClangBuiltin<!strconcat("__nvvm_fma", variant)>,
+ DefaultAttrsIntrinsic<[llvm_v2f16_ty],
+ [llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty],
[IntrNoMem, IntrSpeculatable]>;
+ }
- def int_nvvm_fma_rn_d : GCCBuiltin<"__nvvm_fma_rn_d">,
- DefaultAttrsIntrinsic<[llvm_double_ty],
- [llvm_double_ty, llvm_double_ty, llvm_double_ty],
+ foreach variant = ["_rn_bf16", "_rn_relu_bf16"] in {
+ def int_nvvm_fma # variant : ClangBuiltin<!strconcat("__nvvm_fma", variant)>,
+ DefaultAttrsIntrinsic<[llvm_i16_ty],
+ [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
[IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_fma_rz_d : GCCBuiltin<"__nvvm_fma_rz_d">,
- DefaultAttrsIntrinsic<[llvm_double_ty],
- [llvm_double_ty, llvm_double_ty, llvm_double_ty],
+ }
+
+ foreach variant = ["_rn_bf16x2", "_rn_relu_bf16x2"] in {
+ def int_nvvm_fma # variant : ClangBuiltin<!strconcat("__nvvm_fma", variant)>,
+ DefaultAttrsIntrinsic<[llvm_i32_ty],
+ [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_fma_rm_d : GCCBuiltin<"__nvvm_fma_rm_d">,
- DefaultAttrsIntrinsic<[llvm_double_ty],
- [llvm_double_ty, llvm_double_ty, llvm_double_ty],
+ }
+
+ foreach variant = ["_rn_ftz_f", "_rn_f", "_rz_ftz_f", "_rz_f", "_rm_ftz_f",
+ "_rm_f", "_rp_ftz_f", "_rp_f"] in {
+ def int_nvvm_fma # variant : ClangBuiltin<!strconcat("__nvvm_fma", variant)>,
+ DefaultAttrsIntrinsic<[llvm_float_ty],
+ [llvm_float_ty, llvm_float_ty, llvm_float_ty],
[IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_fma_rp_d : GCCBuiltin<"__nvvm_fma_rp_d">,
+ }
+
+ foreach variant = ["_rn_d", "_rz_d", "_rm_d", "_rp_d"] in {
+ def int_nvvm_fma # variant : ClangBuiltin<!strconcat("__nvvm_fma", variant)>,
DefaultAttrsIntrinsic<[llvm_double_ty],
[llvm_double_ty, llvm_double_ty, llvm_double_ty],
[IntrNoMem, IntrSpeculatable]>;
+ }
//
// Rcp
//
- def int_nvvm_rcp_rn_ftz_f : GCCBuiltin<"__nvvm_rcp_rn_ftz_f">,
+ def int_nvvm_rcp_rn_ftz_f : ClangBuiltin<"__nvvm_rcp_rn_ftz_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_nvvm_rcp_rn_f : GCCBuiltin<"__nvvm_rcp_rn_f">,
+ def int_nvvm_rcp_rn_f : ClangBuiltin<"__nvvm_rcp_rn_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_nvvm_rcp_rz_ftz_f : GCCBuiltin<"__nvvm_rcp_rz_ftz_f">,
+ def int_nvvm_rcp_rz_ftz_f : ClangBuiltin<"__nvvm_rcp_rz_ftz_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_nvvm_rcp_rz_f : GCCBuiltin<"__nvvm_rcp_rz_f">,
+ def int_nvvm_rcp_rz_f : ClangBuiltin<"__nvvm_rcp_rz_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_nvvm_rcp_rm_ftz_f : GCCBuiltin<"__nvvm_rcp_rm_ftz_f">,
+ def int_nvvm_rcp_rm_ftz_f : ClangBuiltin<"__nvvm_rcp_rm_ftz_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_nvvm_rcp_rm_f : GCCBuiltin<"__nvvm_rcp_rm_f">,
+ def int_nvvm_rcp_rm_f : ClangBuiltin<"__nvvm_rcp_rm_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_nvvm_rcp_rp_ftz_f : GCCBuiltin<"__nvvm_rcp_rp_ftz_f">,
+ def int_nvvm_rcp_rp_ftz_f : ClangBuiltin<"__nvvm_rcp_rp_ftz_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_nvvm_rcp_rp_f : GCCBuiltin<"__nvvm_rcp_rp_f">,
+ def int_nvvm_rcp_rp_f : ClangBuiltin<"__nvvm_rcp_rp_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_nvvm_rcp_rn_d : GCCBuiltin<"__nvvm_rcp_rn_d">,
+ def int_nvvm_rcp_rn_d : ClangBuiltin<"__nvvm_rcp_rn_d">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
- def int_nvvm_rcp_rz_d : GCCBuiltin<"__nvvm_rcp_rz_d">,
+ def int_nvvm_rcp_rz_d : ClangBuiltin<"__nvvm_rcp_rz_d">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
- def int_nvvm_rcp_rm_d : GCCBuiltin<"__nvvm_rcp_rm_d">,
+ def int_nvvm_rcp_rm_d : ClangBuiltin<"__nvvm_rcp_rm_d">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
- def int_nvvm_rcp_rp_d : GCCBuiltin<"__nvvm_rcp_rp_d">,
+ def int_nvvm_rcp_rp_d : ClangBuiltin<"__nvvm_rcp_rp_d">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
- def int_nvvm_rcp_approx_ftz_d : GCCBuiltin<"__nvvm_rcp_approx_ftz_d">,
+ def int_nvvm_rcp_approx_ftz_f : ClangBuiltin<"__nvvm_rcp_approx_ftz_f">,
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_rcp_approx_ftz_d : ClangBuiltin<"__nvvm_rcp_approx_ftz_d">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
//
// Sqrt
//
- def int_nvvm_sqrt_f : GCCBuiltin<"__nvvm_sqrt_f">,
+ def int_nvvm_sqrt_f : ClangBuiltin<"__nvvm_sqrt_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_nvvm_sqrt_rn_ftz_f : GCCBuiltin<"__nvvm_sqrt_rn_ftz_f">,
+ def int_nvvm_sqrt_rn_ftz_f : ClangBuiltin<"__nvvm_sqrt_rn_ftz_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_nvvm_sqrt_rn_f : GCCBuiltin<"__nvvm_sqrt_rn_f">,
+ def int_nvvm_sqrt_rn_f : ClangBuiltin<"__nvvm_sqrt_rn_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_nvvm_sqrt_rz_ftz_f : GCCBuiltin<"__nvvm_sqrt_rz_ftz_f">,
+ def int_nvvm_sqrt_rz_ftz_f : ClangBuiltin<"__nvvm_sqrt_rz_ftz_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_nvvm_sqrt_rz_f : GCCBuiltin<"__nvvm_sqrt_rz_f">,
+ def int_nvvm_sqrt_rz_f : ClangBuiltin<"__nvvm_sqrt_rz_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_nvvm_sqrt_rm_ftz_f : GCCBuiltin<"__nvvm_sqrt_rm_ftz_f">,
+ def int_nvvm_sqrt_rm_ftz_f : ClangBuiltin<"__nvvm_sqrt_rm_ftz_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_nvvm_sqrt_rm_f : GCCBuiltin<"__nvvm_sqrt_rm_f">,
+ def int_nvvm_sqrt_rm_f : ClangBuiltin<"__nvvm_sqrt_rm_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_nvvm_sqrt_rp_ftz_f : GCCBuiltin<"__nvvm_sqrt_rp_ftz_f">,
+ def int_nvvm_sqrt_rp_ftz_f : ClangBuiltin<"__nvvm_sqrt_rp_ftz_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_nvvm_sqrt_rp_f : GCCBuiltin<"__nvvm_sqrt_rp_f">,
+ def int_nvvm_sqrt_rp_f : ClangBuiltin<"__nvvm_sqrt_rp_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_nvvm_sqrt_approx_ftz_f : GCCBuiltin<"__nvvm_sqrt_approx_ftz_f">,
+ def int_nvvm_sqrt_approx_ftz_f : ClangBuiltin<"__nvvm_sqrt_approx_ftz_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_nvvm_sqrt_approx_f : GCCBuiltin<"__nvvm_sqrt_approx_f">,
+ def int_nvvm_sqrt_approx_f : ClangBuiltin<"__nvvm_sqrt_approx_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_nvvm_sqrt_rn_d : GCCBuiltin<"__nvvm_sqrt_rn_d">,
+ def int_nvvm_sqrt_rn_d : ClangBuiltin<"__nvvm_sqrt_rn_d">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
- def int_nvvm_sqrt_rz_d : GCCBuiltin<"__nvvm_sqrt_rz_d">,
+ def int_nvvm_sqrt_rz_d : ClangBuiltin<"__nvvm_sqrt_rz_d">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
- def int_nvvm_sqrt_rm_d : GCCBuiltin<"__nvvm_sqrt_rm_d">,
+ def int_nvvm_sqrt_rm_d : ClangBuiltin<"__nvvm_sqrt_rm_d">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
- def int_nvvm_sqrt_rp_d : GCCBuiltin<"__nvvm_sqrt_rp_d">,
+ def int_nvvm_sqrt_rp_d : ClangBuiltin<"__nvvm_sqrt_rp_d">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
//
// Rsqrt
//
- def int_nvvm_rsqrt_approx_ftz_f : GCCBuiltin<"__nvvm_rsqrt_approx_ftz_f">,
+ def int_nvvm_rsqrt_approx_ftz_f : ClangBuiltin<"__nvvm_rsqrt_approx_ftz_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_nvvm_rsqrt_approx_f : GCCBuiltin<"__nvvm_rsqrt_approx_f">,
+ def int_nvvm_rsqrt_approx_f : ClangBuiltin<"__nvvm_rsqrt_approx_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_nvvm_rsqrt_approx_d : GCCBuiltin<"__nvvm_rsqrt_approx_d">,
+ def int_nvvm_rsqrt_approx_d : ClangBuiltin<"__nvvm_rsqrt_approx_d">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
//
// Add
//
- def int_nvvm_add_rn_ftz_f : GCCBuiltin<"__nvvm_add_rn_ftz_f">,
+ def int_nvvm_add_rn_ftz_f : ClangBuiltin<"__nvvm_add_rn_ftz_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, IntrSpeculatable, Commutative]>;
- def int_nvvm_add_rn_f : GCCBuiltin<"__nvvm_add_rn_f">,
+ def int_nvvm_add_rn_f : ClangBuiltin<"__nvvm_add_rn_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, IntrSpeculatable, Commutative]>;
- def int_nvvm_add_rz_ftz_f : GCCBuiltin<"__nvvm_add_rz_ftz_f">,
+ def int_nvvm_add_rz_ftz_f : ClangBuiltin<"__nvvm_add_rz_ftz_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, IntrSpeculatable, Commutative]>;
- def int_nvvm_add_rz_f : GCCBuiltin<"__nvvm_add_rz_f">,
+ def int_nvvm_add_rz_f : ClangBuiltin<"__nvvm_add_rz_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, IntrSpeculatable, Commutative]>;
- def int_nvvm_add_rm_ftz_f : GCCBuiltin<"__nvvm_add_rm_ftz_f">,
+ def int_nvvm_add_rm_ftz_f : ClangBuiltin<"__nvvm_add_rm_ftz_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, IntrSpeculatable, Commutative]>;
- def int_nvvm_add_rm_f : GCCBuiltin<"__nvvm_add_rm_f">,
+ def int_nvvm_add_rm_f : ClangBuiltin<"__nvvm_add_rm_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, IntrSpeculatable, Commutative]>;
- def int_nvvm_add_rp_ftz_f : GCCBuiltin<"__nvvm_add_rp_ftz_f">,
+ def int_nvvm_add_rp_ftz_f : ClangBuiltin<"__nvvm_add_rp_ftz_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, IntrSpeculatable, Commutative]>;
- def int_nvvm_add_rp_f : GCCBuiltin<"__nvvm_add_rp_f">,
+ def int_nvvm_add_rp_f : ClangBuiltin<"__nvvm_add_rp_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
[IntrNoMem, IntrSpeculatable, Commutative]>;
- def int_nvvm_add_rn_d : GCCBuiltin<"__nvvm_add_rn_d">,
+ def int_nvvm_add_rn_d : ClangBuiltin<"__nvvm_add_rn_d">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
[IntrNoMem, IntrSpeculatable, Commutative]>;
- def int_nvvm_add_rz_d : GCCBuiltin<"__nvvm_add_rz_d">,
+ def int_nvvm_add_rz_d : ClangBuiltin<"__nvvm_add_rz_d">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
[IntrNoMem, IntrSpeculatable, Commutative]>;
- def int_nvvm_add_rm_d : GCCBuiltin<"__nvvm_add_rm_d">,
+ def int_nvvm_add_rm_d : ClangBuiltin<"__nvvm_add_rm_d">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
[IntrNoMem, IntrSpeculatable, Commutative]>;
- def int_nvvm_add_rp_d : GCCBuiltin<"__nvvm_add_rp_d">,
+ def int_nvvm_add_rp_d : ClangBuiltin<"__nvvm_add_rp_d">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
[IntrNoMem, IntrSpeculatable, Commutative]>;
@@ -980,278 +1031,278 @@ let TargetPrefix = "nvvm" in {
// Convert
//
- def int_nvvm_d2f_rn_ftz : GCCBuiltin<"__nvvm_d2f_rn_ftz">,
+ def int_nvvm_d2f_rn_ftz : ClangBuiltin<"__nvvm_d2f_rn_ftz">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_d2f_rn : GCCBuiltin<"__nvvm_d2f_rn">,
+ def int_nvvm_d2f_rn : ClangBuiltin<"__nvvm_d2f_rn">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_d2f_rz_ftz : GCCBuiltin<"__nvvm_d2f_rz_ftz">,
+ def int_nvvm_d2f_rz_ftz : ClangBuiltin<"__nvvm_d2f_rz_ftz">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_d2f_rz : GCCBuiltin<"__nvvm_d2f_rz">,
+ def int_nvvm_d2f_rz : ClangBuiltin<"__nvvm_d2f_rz">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_d2f_rm_ftz : GCCBuiltin<"__nvvm_d2f_rm_ftz">,
+ def int_nvvm_d2f_rm_ftz : ClangBuiltin<"__nvvm_d2f_rm_ftz">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_d2f_rm : GCCBuiltin<"__nvvm_d2f_rm">,
+ def int_nvvm_d2f_rm : ClangBuiltin<"__nvvm_d2f_rm">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_d2f_rp_ftz : GCCBuiltin<"__nvvm_d2f_rp_ftz">,
+ def int_nvvm_d2f_rp_ftz : ClangBuiltin<"__nvvm_d2f_rp_ftz">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_d2f_rp : GCCBuiltin<"__nvvm_d2f_rp">,
+ def int_nvvm_d2f_rp : ClangBuiltin<"__nvvm_d2f_rp">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_d2i_rn : GCCBuiltin<"__nvvm_d2i_rn">,
+ def int_nvvm_d2i_rn : ClangBuiltin<"__nvvm_d2i_rn">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_d2i_rz : GCCBuiltin<"__nvvm_d2i_rz">,
+ def int_nvvm_d2i_rz : ClangBuiltin<"__nvvm_d2i_rz">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_d2i_rm : GCCBuiltin<"__nvvm_d2i_rm">,
+ def int_nvvm_d2i_rm : ClangBuiltin<"__nvvm_d2i_rm">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_d2i_rp : GCCBuiltin<"__nvvm_d2i_rp">,
+ def int_nvvm_d2i_rp : ClangBuiltin<"__nvvm_d2i_rp">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_d2ui_rn : GCCBuiltin<"__nvvm_d2ui_rn">,
+ def int_nvvm_d2ui_rn : ClangBuiltin<"__nvvm_d2ui_rn">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_d2ui_rz : GCCBuiltin<"__nvvm_d2ui_rz">,
+ def int_nvvm_d2ui_rz : ClangBuiltin<"__nvvm_d2ui_rz">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_d2ui_rm : GCCBuiltin<"__nvvm_d2ui_rm">,
+ def int_nvvm_d2ui_rm : ClangBuiltin<"__nvvm_d2ui_rm">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_d2ui_rp : GCCBuiltin<"__nvvm_d2ui_rp">,
+ def int_nvvm_d2ui_rp : ClangBuiltin<"__nvvm_d2ui_rp">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_i2d_rn : GCCBuiltin<"__nvvm_i2d_rn">,
+ def int_nvvm_i2d_rn : ClangBuiltin<"__nvvm_i2d_rn">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_i2d_rz : GCCBuiltin<"__nvvm_i2d_rz">,
+ def int_nvvm_i2d_rz : ClangBuiltin<"__nvvm_i2d_rz">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_i2d_rm : GCCBuiltin<"__nvvm_i2d_rm">,
+ def int_nvvm_i2d_rm : ClangBuiltin<"__nvvm_i2d_rm">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_i2d_rp : GCCBuiltin<"__nvvm_i2d_rp">,
+ def int_nvvm_i2d_rp : ClangBuiltin<"__nvvm_i2d_rp">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_ui2d_rn : GCCBuiltin<"__nvvm_ui2d_rn">,
+ def int_nvvm_ui2d_rn : ClangBuiltin<"__nvvm_ui2d_rn">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_ui2d_rz : GCCBuiltin<"__nvvm_ui2d_rz">,
+ def int_nvvm_ui2d_rz : ClangBuiltin<"__nvvm_ui2d_rz">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_ui2d_rm : GCCBuiltin<"__nvvm_ui2d_rm">,
+ def int_nvvm_ui2d_rm : ClangBuiltin<"__nvvm_ui2d_rm">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_ui2d_rp : GCCBuiltin<"__nvvm_ui2d_rp">,
+ def int_nvvm_ui2d_rp : ClangBuiltin<"__nvvm_ui2d_rp">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_f2i_rn_ftz : GCCBuiltin<"__nvvm_f2i_rn_ftz">,
+ def int_nvvm_f2i_rn_ftz : ClangBuiltin<"__nvvm_f2i_rn_ftz">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_f2i_rn : GCCBuiltin<"__nvvm_f2i_rn">,
+ def int_nvvm_f2i_rn : ClangBuiltin<"__nvvm_f2i_rn">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_f2i_rz_ftz : GCCBuiltin<"__nvvm_f2i_rz_ftz">,
+ def int_nvvm_f2i_rz_ftz : ClangBuiltin<"__nvvm_f2i_rz_ftz">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_f2i_rz : GCCBuiltin<"__nvvm_f2i_rz">,
+ def int_nvvm_f2i_rz : ClangBuiltin<"__nvvm_f2i_rz">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_f2i_rm_ftz : GCCBuiltin<"__nvvm_f2i_rm_ftz">,
+ def int_nvvm_f2i_rm_ftz : ClangBuiltin<"__nvvm_f2i_rm_ftz">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_f2i_rm : GCCBuiltin<"__nvvm_f2i_rm">,
+ def int_nvvm_f2i_rm : ClangBuiltin<"__nvvm_f2i_rm">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_f2i_rp_ftz : GCCBuiltin<"__nvvm_f2i_rp_ftz">,
+ def int_nvvm_f2i_rp_ftz : ClangBuiltin<"__nvvm_f2i_rp_ftz">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_f2i_rp : GCCBuiltin<"__nvvm_f2i_rp">,
+ def int_nvvm_f2i_rp : ClangBuiltin<"__nvvm_f2i_rp">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_f2ui_rn_ftz : GCCBuiltin<"__nvvm_f2ui_rn_ftz">,
+ def int_nvvm_f2ui_rn_ftz : ClangBuiltin<"__nvvm_f2ui_rn_ftz">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_f2ui_rn : GCCBuiltin<"__nvvm_f2ui_rn">,
+ def int_nvvm_f2ui_rn : ClangBuiltin<"__nvvm_f2ui_rn">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_f2ui_rz_ftz : GCCBuiltin<"__nvvm_f2ui_rz_ftz">,
+ def int_nvvm_f2ui_rz_ftz : ClangBuiltin<"__nvvm_f2ui_rz_ftz">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_f2ui_rz : GCCBuiltin<"__nvvm_f2ui_rz">,
+ def int_nvvm_f2ui_rz : ClangBuiltin<"__nvvm_f2ui_rz">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_f2ui_rm_ftz : GCCBuiltin<"__nvvm_f2ui_rm_ftz">,
+ def int_nvvm_f2ui_rm_ftz : ClangBuiltin<"__nvvm_f2ui_rm_ftz">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_f2ui_rm : GCCBuiltin<"__nvvm_f2ui_rm">,
+ def int_nvvm_f2ui_rm : ClangBuiltin<"__nvvm_f2ui_rm">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_f2ui_rp_ftz : GCCBuiltin<"__nvvm_f2ui_rp_ftz">,
+ def int_nvvm_f2ui_rp_ftz : ClangBuiltin<"__nvvm_f2ui_rp_ftz">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_f2ui_rp : GCCBuiltin<"__nvvm_f2ui_rp">,
+ def int_nvvm_f2ui_rp : ClangBuiltin<"__nvvm_f2ui_rp">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_i2f_rn : GCCBuiltin<"__nvvm_i2f_rn">,
+ def int_nvvm_i2f_rn : ClangBuiltin<"__nvvm_i2f_rn">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_i2f_rz : GCCBuiltin<"__nvvm_i2f_rz">,
+ def int_nvvm_i2f_rz : ClangBuiltin<"__nvvm_i2f_rz">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_i2f_rm : GCCBuiltin<"__nvvm_i2f_rm">,
+ def int_nvvm_i2f_rm : ClangBuiltin<"__nvvm_i2f_rm">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_i2f_rp : GCCBuiltin<"__nvvm_i2f_rp">,
+ def int_nvvm_i2f_rp : ClangBuiltin<"__nvvm_i2f_rp">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_ui2f_rn : GCCBuiltin<"__nvvm_ui2f_rn">,
+ def int_nvvm_ui2f_rn : ClangBuiltin<"__nvvm_ui2f_rn">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_ui2f_rz : GCCBuiltin<"__nvvm_ui2f_rz">,
+ def int_nvvm_ui2f_rz : ClangBuiltin<"__nvvm_ui2f_rz">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_ui2f_rm : GCCBuiltin<"__nvvm_ui2f_rm">,
+ def int_nvvm_ui2f_rm : ClangBuiltin<"__nvvm_ui2f_rm">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_ui2f_rp : GCCBuiltin<"__nvvm_ui2f_rp">,
+ def int_nvvm_ui2f_rp : ClangBuiltin<"__nvvm_ui2f_rp">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_lohi_i2d : GCCBuiltin<"__nvvm_lohi_i2d">,
+ def int_nvvm_lohi_i2d : ClangBuiltin<"__nvvm_lohi_i2d">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable, Commutative]>;
- def int_nvvm_d2i_lo : GCCBuiltin<"__nvvm_d2i_lo">,
+ def int_nvvm_d2i_lo : ClangBuiltin<"__nvvm_d2i_lo">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_d2i_hi : GCCBuiltin<"__nvvm_d2i_hi">,
+ def int_nvvm_d2i_hi : ClangBuiltin<"__nvvm_d2i_hi">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_f2ll_rn_ftz : GCCBuiltin<"__nvvm_f2ll_rn_ftz">,
+ def int_nvvm_f2ll_rn_ftz : ClangBuiltin<"__nvvm_f2ll_rn_ftz">,
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_f2ll_rn : GCCBuiltin<"__nvvm_f2ll_rn">,
+ def int_nvvm_f2ll_rn : ClangBuiltin<"__nvvm_f2ll_rn">,
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_f2ll_rz_ftz : GCCBuiltin<"__nvvm_f2ll_rz_ftz">,
+ def int_nvvm_f2ll_rz_ftz : ClangBuiltin<"__nvvm_f2ll_rz_ftz">,
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_f2ll_rz : GCCBuiltin<"__nvvm_f2ll_rz">,
+ def int_nvvm_f2ll_rz : ClangBuiltin<"__nvvm_f2ll_rz">,
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_f2ll_rm_ftz : GCCBuiltin<"__nvvm_f2ll_rm_ftz">,
+ def int_nvvm_f2ll_rm_ftz : ClangBuiltin<"__nvvm_f2ll_rm_ftz">,
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_f2ll_rm : GCCBuiltin<"__nvvm_f2ll_rm">,
+ def int_nvvm_f2ll_rm : ClangBuiltin<"__nvvm_f2ll_rm">,
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_f2ll_rp_ftz : GCCBuiltin<"__nvvm_f2ll_rp_ftz">,
+ def int_nvvm_f2ll_rp_ftz : ClangBuiltin<"__nvvm_f2ll_rp_ftz">,
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_f2ll_rp : GCCBuiltin<"__nvvm_f2ll_rp">,
+ def int_nvvm_f2ll_rp : ClangBuiltin<"__nvvm_f2ll_rp">,
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_f2ull_rn_ftz : GCCBuiltin<"__nvvm_f2ull_rn_ftz">,
+ def int_nvvm_f2ull_rn_ftz : ClangBuiltin<"__nvvm_f2ull_rn_ftz">,
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_f2ull_rn : GCCBuiltin<"__nvvm_f2ull_rn">,
+ def int_nvvm_f2ull_rn : ClangBuiltin<"__nvvm_f2ull_rn">,
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_f2ull_rz_ftz : GCCBuiltin<"__nvvm_f2ull_rz_ftz">,
+ def int_nvvm_f2ull_rz_ftz : ClangBuiltin<"__nvvm_f2ull_rz_ftz">,
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_f2ull_rz : GCCBuiltin<"__nvvm_f2ull_rz">,
+ def int_nvvm_f2ull_rz : ClangBuiltin<"__nvvm_f2ull_rz">,
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_f2ull_rm_ftz : GCCBuiltin<"__nvvm_f2ull_rm_ftz">,
+ def int_nvvm_f2ull_rm_ftz : ClangBuiltin<"__nvvm_f2ull_rm_ftz">,
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_f2ull_rm : GCCBuiltin<"__nvvm_f2ull_rm">,
+ def int_nvvm_f2ull_rm : ClangBuiltin<"__nvvm_f2ull_rm">,
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_f2ull_rp_ftz : GCCBuiltin<"__nvvm_f2ull_rp_ftz">,
+ def int_nvvm_f2ull_rp_ftz : ClangBuiltin<"__nvvm_f2ull_rp_ftz">,
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_f2ull_rp : GCCBuiltin<"__nvvm_f2ull_rp">,
+ def int_nvvm_f2ull_rp : ClangBuiltin<"__nvvm_f2ull_rp">,
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_d2ll_rn : GCCBuiltin<"__nvvm_d2ll_rn">,
+ def int_nvvm_d2ll_rn : ClangBuiltin<"__nvvm_d2ll_rn">,
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_d2ll_rz : GCCBuiltin<"__nvvm_d2ll_rz">,
+ def int_nvvm_d2ll_rz : ClangBuiltin<"__nvvm_d2ll_rz">,
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_d2ll_rm : GCCBuiltin<"__nvvm_d2ll_rm">,
+ def int_nvvm_d2ll_rm : ClangBuiltin<"__nvvm_d2ll_rm">,
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_d2ll_rp : GCCBuiltin<"__nvvm_d2ll_rp">,
+ def int_nvvm_d2ll_rp : ClangBuiltin<"__nvvm_d2ll_rp">,
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_d2ull_rn : GCCBuiltin<"__nvvm_d2ull_rn">,
+ def int_nvvm_d2ull_rn : ClangBuiltin<"__nvvm_d2ull_rn">,
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_d2ull_rz : GCCBuiltin<"__nvvm_d2ull_rz">,
+ def int_nvvm_d2ull_rz : ClangBuiltin<"__nvvm_d2ull_rz">,
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_d2ull_rm : GCCBuiltin<"__nvvm_d2ull_rm">,
+ def int_nvvm_d2ull_rm : ClangBuiltin<"__nvvm_d2ull_rm">,
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_d2ull_rp : GCCBuiltin<"__nvvm_d2ull_rp">,
+ def int_nvvm_d2ull_rp : ClangBuiltin<"__nvvm_d2ull_rp">,
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_ll2f_rn : GCCBuiltin<"__nvvm_ll2f_rn">,
+ def int_nvvm_ll2f_rn : ClangBuiltin<"__nvvm_ll2f_rn">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_ll2f_rz : GCCBuiltin<"__nvvm_ll2f_rz">,
+ def int_nvvm_ll2f_rz : ClangBuiltin<"__nvvm_ll2f_rz">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_ll2f_rm : GCCBuiltin<"__nvvm_ll2f_rm">,
+ def int_nvvm_ll2f_rm : ClangBuiltin<"__nvvm_ll2f_rm">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_ll2f_rp : GCCBuiltin<"__nvvm_ll2f_rp">,
+ def int_nvvm_ll2f_rp : ClangBuiltin<"__nvvm_ll2f_rp">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_ull2f_rn : GCCBuiltin<"__nvvm_ull2f_rn">,
+ def int_nvvm_ull2f_rn : ClangBuiltin<"__nvvm_ull2f_rn">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_ull2f_rz : GCCBuiltin<"__nvvm_ull2f_rz">,
+ def int_nvvm_ull2f_rz : ClangBuiltin<"__nvvm_ull2f_rz">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_ull2f_rm : GCCBuiltin<"__nvvm_ull2f_rm">,
+ def int_nvvm_ull2f_rm : ClangBuiltin<"__nvvm_ull2f_rm">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_ull2f_rp : GCCBuiltin<"__nvvm_ull2f_rp">,
+ def int_nvvm_ull2f_rp : ClangBuiltin<"__nvvm_ull2f_rp">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_ll2d_rn : GCCBuiltin<"__nvvm_ll2d_rn">,
+ def int_nvvm_ll2d_rn : ClangBuiltin<"__nvvm_ll2d_rn">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_ll2d_rz : GCCBuiltin<"__nvvm_ll2d_rz">,
+ def int_nvvm_ll2d_rz : ClangBuiltin<"__nvvm_ll2d_rz">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_ll2d_rm : GCCBuiltin<"__nvvm_ll2d_rm">,
+ def int_nvvm_ll2d_rm : ClangBuiltin<"__nvvm_ll2d_rm">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_ll2d_rp : GCCBuiltin<"__nvvm_ll2d_rp">,
+ def int_nvvm_ll2d_rp : ClangBuiltin<"__nvvm_ll2d_rp">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_ull2d_rn : GCCBuiltin<"__nvvm_ull2d_rn">,
+ def int_nvvm_ull2d_rn : ClangBuiltin<"__nvvm_ull2d_rn">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_ull2d_rz : GCCBuiltin<"__nvvm_ull2d_rz">,
+ def int_nvvm_ull2d_rz : ClangBuiltin<"__nvvm_ull2d_rz">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_ull2d_rm : GCCBuiltin<"__nvvm_ull2d_rm">,
+ def int_nvvm_ull2d_rm : ClangBuiltin<"__nvvm_ull2d_rm">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_ull2d_rp : GCCBuiltin<"__nvvm_ull2d_rp">,
+ def int_nvvm_ull2d_rp : ClangBuiltin<"__nvvm_ull2d_rp">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_f2h_rn_ftz : GCCBuiltin<"__nvvm_f2h_rn_ftz">,
+ def int_nvvm_f2h_rn_ftz : ClangBuiltin<"__nvvm_f2h_rn_ftz">,
DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_f2h_rn : GCCBuiltin<"__nvvm_f2h_rn">,
+ def int_nvvm_f2h_rn : ClangBuiltin<"__nvvm_f2h_rn">,
DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_ff2bf16x2_rn : GCCBuiltin<"__nvvm_ff2bf16x2_rn">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_nvvm_ff2bf16x2_rn_relu : GCCBuiltin<"__nvvm_ff2bf16x2_rn_relu">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_nvvm_ff2bf16x2_rz : GCCBuiltin<"__nvvm_ff2bf16x2_rz">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_nvvm_ff2bf16x2_rz_relu : GCCBuiltin<"__nvvm_ff2bf16x2_rz_relu">,
+ def int_nvvm_ff2bf16x2_rn : ClangBuiltin<"__nvvm_ff2bf16x2_rn">,
+ Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
+ def int_nvvm_ff2bf16x2_rn_relu : ClangBuiltin<"__nvvm_ff2bf16x2_rn_relu">,
+ Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
+ def int_nvvm_ff2bf16x2_rz : ClangBuiltin<"__nvvm_ff2bf16x2_rz">,
+ Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
+ def int_nvvm_ff2bf16x2_rz_relu : ClangBuiltin<"__nvvm_ff2bf16x2_rz_relu">,
Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_nvvm_ff2f16x2_rn : GCCBuiltin<"__nvvm_ff2f16x2_rn">,
- Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_nvvm_ff2f16x2_rn_relu : GCCBuiltin<"__nvvm_ff2f16x2_rn_relu">,
- Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_nvvm_ff2f16x2_rz : GCCBuiltin<"__nvvm_ff2f16x2_rz">,
- Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_nvvm_ff2f16x2_rz_relu : GCCBuiltin<"__nvvm_ff2f16x2_rz_relu">,
- Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-
- def int_nvvm_f2bf16_rn : GCCBuiltin<"__nvvm_f2bf16_rn">,
- Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_nvvm_f2bf16_rn_relu : GCCBuiltin<"__nvvm_f2bf16_rn_relu">,
- Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_nvvm_f2bf16_rz : GCCBuiltin<"__nvvm_f2bf16_rz">,
- Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_nvvm_f2bf16_rz_relu : GCCBuiltin<"__nvvm_f2bf16_rz_relu">,
- Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem]>;
-
- def int_nvvm_f2tf32_rna : GCCBuiltin<"__nvvm_f2tf32_rna">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_nvvm_ff2f16x2_rn : ClangBuiltin<"__nvvm_ff2f16x2_rn">,
+ Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
+ def int_nvvm_ff2f16x2_rn_relu : ClangBuiltin<"__nvvm_ff2f16x2_rn_relu">,
+ Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
+ def int_nvvm_ff2f16x2_rz : ClangBuiltin<"__nvvm_ff2f16x2_rz">,
+ Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
+ def int_nvvm_ff2f16x2_rz_relu : ClangBuiltin<"__nvvm_ff2f16x2_rz_relu">,
+ Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
+
+ def int_nvvm_f2bf16_rn : ClangBuiltin<"__nvvm_f2bf16_rn">,
+ Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
+ def int_nvvm_f2bf16_rn_relu : ClangBuiltin<"__nvvm_f2bf16_rn_relu">,
+ Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
+ def int_nvvm_f2bf16_rz : ClangBuiltin<"__nvvm_f2bf16_rz">,
+ Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
+ def int_nvvm_f2bf16_rz_relu : ClangBuiltin<"__nvvm_f2bf16_rz_relu">,
+ Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
+
+ def int_nvvm_f2tf32_rna : ClangBuiltin<"__nvvm_f2tf32_rna">,
+ Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
//
// Bitcast
//
- def int_nvvm_bitcast_f2i : GCCBuiltin<"__nvvm_bitcast_f2i">,
+ def int_nvvm_bitcast_f2i : ClangBuiltin<"__nvvm_bitcast_f2i">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_bitcast_i2f : GCCBuiltin<"__nvvm_bitcast_i2f">,
+ def int_nvvm_bitcast_i2f : ClangBuiltin<"__nvvm_bitcast_i2f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_bitcast_ll2d : GCCBuiltin<"__nvvm_bitcast_ll2d">,
+ def int_nvvm_bitcast_ll2d : ClangBuiltin<"__nvvm_bitcast_ll2d">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_bitcast_d2ll : GCCBuiltin<"__nvvm_bitcast_d2ll">,
+ def int_nvvm_bitcast_d2ll : ClangBuiltin<"__nvvm_bitcast_d2ll">,
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
// FNS
- def int_nvvm_fns : GCCBuiltin<"__nvvm_fns">,
+ def int_nvvm_fns : ClangBuiltin<"__nvvm_fns">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
// Atomics not available as llvm intrinsics.
def int_nvvm_atomic_load_inc_32 : Intrinsic<[llvm_i32_ty],
[LLVMAnyPointerType<llvm_i32_ty>, llvm_i32_ty],
- [IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
+ [IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>]>;
def int_nvvm_atomic_load_dec_32 : Intrinsic<[llvm_i32_ty],
[LLVMAnyPointerType<llvm_i32_ty>, llvm_i32_ty],
- [IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
+ [IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>]>;
class SCOPED_ATOMIC2_impl<LLVMType elty>
: Intrinsic<[elty],
[LLVMAnyPointerType<LLVMMatchType<0>>, LLVMMatchType<0>],
- [IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
+ [IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>]>;
class SCOPED_ATOMIC3_impl<LLVMType elty>
: Intrinsic<[elty],
[LLVMAnyPointerType<LLVMMatchType<0>>, LLVMMatchType<0>,
LLVMMatchType<0>],
- [IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
+ [IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>]>;
multiclass PTXAtomicWithScope2<LLVMType elty> {
def _cta : SCOPED_ATOMIC2_impl<elty>;
@@ -1280,177 +1331,179 @@ let TargetPrefix = "nvvm" in {
// The builtin for "bar.sync 0" is called __syncthreads. Unlike most of the
// intrinsics in this file, this one is a user-facing API.
- def int_nvvm_barrier0 : GCCBuiltin<"__syncthreads">,
- Intrinsic<[], [], [IntrConvergent]>;
+ def int_nvvm_barrier0 : ClangBuiltin<"__syncthreads">,
+ Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
// Synchronize all threads in the CTA at barrier 'n'.
- def int_nvvm_barrier_n : GCCBuiltin<"__nvvm_bar_n">,
- Intrinsic<[], [llvm_i32_ty], [IntrConvergent]>;
+ def int_nvvm_barrier_n : ClangBuiltin<"__nvvm_bar_n">,
+ Intrinsic<[], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>;
// Synchronize 'm', a multiple of warp size, (arg 2) threads in
// the CTA at barrier 'n' (arg 1).
- def int_nvvm_barrier : GCCBuiltin<"__nvvm_bar">,
- Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [IntrConvergent]>;
- def int_nvvm_barrier0_popc : GCCBuiltin<"__nvvm_bar0_popc">,
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrConvergent]>;
- def int_nvvm_barrier0_and : GCCBuiltin<"__nvvm_bar0_and">,
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrConvergent]>;
- def int_nvvm_barrier0_or : GCCBuiltin<"__nvvm_bar0_or">,
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrConvergent]>;
+ def int_nvvm_barrier : ClangBuiltin<"__nvvm_bar">,
+ Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoCallback]>;
+ def int_nvvm_barrier0_popc : ClangBuiltin<"__nvvm_bar0_popc">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>;
+ def int_nvvm_barrier0_and : ClangBuiltin<"__nvvm_bar0_and">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>;
+ def int_nvvm_barrier0_or : ClangBuiltin<"__nvvm_bar0_or">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>;
def int_nvvm_bar_sync :
- Intrinsic<[], [llvm_i32_ty], [IntrConvergent]>,
- GCCBuiltin<"__nvvm_bar_sync">;
+ Intrinsic<[], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>,
+ ClangBuiltin<"__nvvm_bar_sync">;
def int_nvvm_bar_warp_sync :
- Intrinsic<[], [llvm_i32_ty], [IntrConvergent]>,
- GCCBuiltin<"__nvvm_bar_warp_sync">;
+ Intrinsic<[], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>,
+ ClangBuiltin<"__nvvm_bar_warp_sync">;
// barrier.sync id[, cnt]
def int_nvvm_barrier_sync :
- Intrinsic<[], [llvm_i32_ty], [IntrConvergent]>,
- GCCBuiltin<"__nvvm_barrier_sync">;
+ Intrinsic<[], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>,
+ ClangBuiltin<"__nvvm_barrier_sync">;
def int_nvvm_barrier_sync_cnt :
- Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [IntrConvergent]>,
- GCCBuiltin<"__nvvm_barrier_sync_cnt">;
+ Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoCallback]>,
+ ClangBuiltin<"__nvvm_barrier_sync_cnt">;
// Membar
- def int_nvvm_membar_cta : GCCBuiltin<"__nvvm_membar_cta">,
- Intrinsic<[], [], []>;
- def int_nvvm_membar_gl : GCCBuiltin<"__nvvm_membar_gl">,
- Intrinsic<[], [], []>;
- def int_nvvm_membar_sys : GCCBuiltin<"__nvvm_membar_sys">,
- Intrinsic<[], [], []>;
+ def int_nvvm_membar_cta : ClangBuiltin<"__nvvm_membar_cta">,
+ Intrinsic<[], [], [IntrNoCallback]>;
+ def int_nvvm_membar_gl : ClangBuiltin<"__nvvm_membar_gl">,
+ Intrinsic<[], [], [IntrNoCallback]>;
+ def int_nvvm_membar_sys : ClangBuiltin<"__nvvm_membar_sys">,
+ Intrinsic<[], [], [IntrNoCallback]>;
// Async Copy
def int_nvvm_cp_async_mbarrier_arrive :
- GCCBuiltin<"__nvvm_cp_async_mbarrier_arrive">,
- Intrinsic<[],[llvm_i64ptr_ty],[IntrConvergent]>;
+ ClangBuiltin<"__nvvm_cp_async_mbarrier_arrive">,
+ Intrinsic<[],[llvm_i64ptr_ty],[IntrConvergent, IntrNoCallback]>;
def int_nvvm_cp_async_mbarrier_arrive_shared :
- GCCBuiltin<"__nvvm_cp_async_mbarrier_arrive_shared">,
- Intrinsic<[],[llvm_shared_i64ptr_ty],[IntrConvergent]>;
+ ClangBuiltin<"__nvvm_cp_async_mbarrier_arrive_shared">,
+ Intrinsic<[],[llvm_shared_i64ptr_ty],[IntrConvergent, IntrNoCallback]>;
def int_nvvm_cp_async_mbarrier_arrive_noinc :
- GCCBuiltin<"__nvvm_cp_async_mbarrier_arrive_noinc">,
- Intrinsic<[],[llvm_i64ptr_ty],[IntrConvergent]>;
+ ClangBuiltin<"__nvvm_cp_async_mbarrier_arrive_noinc">,
+ Intrinsic<[],[llvm_i64ptr_ty],[IntrConvergent, IntrNoCallback]>;
def int_nvvm_cp_async_mbarrier_arrive_noinc_shared :
- GCCBuiltin<"__nvvm_cp_async_mbarrier_arrive_noinc_shared">,
- Intrinsic<[],[llvm_shared_i64ptr_ty],[IntrConvergent]>;
+ ClangBuiltin<"__nvvm_cp_async_mbarrier_arrive_noinc_shared">,
+ Intrinsic<[],[llvm_shared_i64ptr_ty],[IntrConvergent, IntrNoCallback]>;
def int_nvvm_cp_async_ca_shared_global_4 :
- GCCBuiltin<"__nvvm_cp_async_ca_shared_global_4">,
+ ClangBuiltin<"__nvvm_cp_async_ca_shared_global_4">,
Intrinsic<[],[llvm_shared_i8ptr_ty, llvm_global_i8ptr_ty],
- [IntrArgMemOnly, NoAlias<ArgIndex<0>>, NoAlias<ArgIndex<1>>,
+ [IntrArgMemOnly, IntrNoCallback, NoAlias<ArgIndex<0>>, NoAlias<ArgIndex<1>>,
WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>],
"llvm.nvvm.cp.async.ca.shared.global.4">;
def int_nvvm_cp_async_ca_shared_global_8 :
- GCCBuiltin<"__nvvm_cp_async_ca_shared_global_8">,
+ ClangBuiltin<"__nvvm_cp_async_ca_shared_global_8">,
Intrinsic<[],[llvm_shared_i8ptr_ty, llvm_global_i8ptr_ty],
- [IntrArgMemOnly, NoAlias<ArgIndex<0>>, NoAlias<ArgIndex<1>>,
+ [IntrArgMemOnly, IntrNoCallback, NoAlias<ArgIndex<0>>, NoAlias<ArgIndex<1>>,
WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>],
"llvm.nvvm.cp.async.ca.shared.global.8">;
def int_nvvm_cp_async_ca_shared_global_16 :
- GCCBuiltin<"__nvvm_cp_async_ca_shared_global_16">,
+ ClangBuiltin<"__nvvm_cp_async_ca_shared_global_16">,
Intrinsic<[],[llvm_shared_i8ptr_ty, llvm_global_i8ptr_ty],
- [IntrArgMemOnly, NoAlias<ArgIndex<0>>, NoAlias<ArgIndex<1>>,
+ [IntrArgMemOnly, IntrNoCallback, NoAlias<ArgIndex<0>>, NoAlias<ArgIndex<1>>,
WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>],
"llvm.nvvm.cp.async.ca.shared.global.16">;
def int_nvvm_cp_async_cg_shared_global_16 :
- GCCBuiltin<"__nvvm_cp_async_cg_shared_global_16">,
+ ClangBuiltin<"__nvvm_cp_async_cg_shared_global_16">,
Intrinsic<[],[llvm_shared_i8ptr_ty, llvm_global_i8ptr_ty],
- [IntrArgMemOnly, NoAlias<ArgIndex<0>>, NoAlias<ArgIndex<1>>,
+ [IntrArgMemOnly, IntrNoCallback, NoAlias<ArgIndex<0>>, NoAlias<ArgIndex<1>>,
WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>],
"llvm.nvvm.cp.async.cg.shared.global.16">;
def int_nvvm_cp_async_commit_group :
- GCCBuiltin<"__nvvm_cp_async_commit_group">,
+ ClangBuiltin<"__nvvm_cp_async_commit_group">,
Intrinsic<[],[],[]>;
def int_nvvm_cp_async_wait_group :
- GCCBuiltin<"__nvvm_cp_async_wait_group">,
+ ClangBuiltin<"__nvvm_cp_async_wait_group">,
Intrinsic<[],[llvm_i32_ty],[ImmArg<ArgIndex<0>>]>;
def int_nvvm_cp_async_wait_all :
- GCCBuiltin<"__nvvm_cp_async_wait_all">,
+ ClangBuiltin<"__nvvm_cp_async_wait_all">,
Intrinsic<[],[],[]>;
// mbarrier
-def int_nvvm_mbarrier_init : GCCBuiltin<"__nvvm_mbarrier_init">,
- Intrinsic<[],[llvm_i64ptr_ty, llvm_i32_ty],[IntrConvergent]>;
+def int_nvvm_mbarrier_init : ClangBuiltin<"__nvvm_mbarrier_init">,
+ Intrinsic<[],[llvm_i64ptr_ty, llvm_i32_ty],[IntrConvergent, IntrNoCallback]>;
def int_nvvm_mbarrier_init_shared :
- GCCBuiltin<"__nvvm_mbarrier_init_shared">,
- Intrinsic<[],[llvm_shared_i64ptr_ty, llvm_i32_ty],[IntrConvergent]>;
+ ClangBuiltin<"__nvvm_mbarrier_init_shared">,
+ Intrinsic<[],[llvm_shared_i64ptr_ty, llvm_i32_ty],[IntrConvergent, IntrNoCallback]>;
-def int_nvvm_mbarrier_inval : GCCBuiltin<"__nvvm_mbarrier_inval">,
+def int_nvvm_mbarrier_inval : ClangBuiltin<"__nvvm_mbarrier_inval">,
Intrinsic<[],[llvm_i64ptr_ty],
- [IntrConvergent, IntrWriteMem, IntrArgMemOnly,
+ [IntrConvergent, IntrWriteMem, IntrArgMemOnly, IntrNoCallback,
WriteOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
def int_nvvm_mbarrier_inval_shared :
- GCCBuiltin<"__nvvm_mbarrier_inval_shared">,
+ ClangBuiltin<"__nvvm_mbarrier_inval_shared">,
Intrinsic<[],[llvm_shared_i64ptr_ty],
- [IntrConvergent, IntrWriteMem, IntrArgMemOnly,
+ [IntrConvergent, IntrWriteMem, IntrArgMemOnly, IntrNoCallback,
WriteOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
-def int_nvvm_mbarrier_arrive : GCCBuiltin<"__nvvm_mbarrier_arrive">,
- Intrinsic<[llvm_i64_ty],[llvm_i64ptr_ty],[IntrConvergent]>;
+def int_nvvm_mbarrier_arrive : ClangBuiltin<"__nvvm_mbarrier_arrive">,
+ Intrinsic<[llvm_i64_ty],[llvm_i64ptr_ty],[IntrConvergent, IntrNoCallback]>;
def int_nvvm_mbarrier_arrive_shared :
- GCCBuiltin<"__nvvm_mbarrier_arrive_shared">,
- Intrinsic<[llvm_i64_ty],[llvm_shared_i64ptr_ty],[IntrConvergent]>;
+ ClangBuiltin<"__nvvm_mbarrier_arrive_shared">,
+ Intrinsic<[llvm_i64_ty],[llvm_shared_i64ptr_ty],[IntrConvergent, IntrNoCallback]>;
def int_nvvm_mbarrier_arrive_noComplete :
- GCCBuiltin<"__nvvm_mbarrier_arrive_noComplete">,
- Intrinsic<[llvm_i64_ty],[llvm_i64ptr_ty, llvm_i32_ty],[IntrConvergent]>;
+ ClangBuiltin<"__nvvm_mbarrier_arrive_noComplete">,
+ Intrinsic<[llvm_i64_ty],[llvm_i64ptr_ty, llvm_i32_ty],[IntrConvergent, IntrNoCallback]>;
def int_nvvm_mbarrier_arrive_noComplete_shared :
- GCCBuiltin<"__nvvm_mbarrier_arrive_noComplete_shared">,
- Intrinsic<[llvm_i64_ty],[llvm_shared_i64ptr_ty, llvm_i32_ty],[IntrConvergent]>;
+ ClangBuiltin<"__nvvm_mbarrier_arrive_noComplete_shared">,
+ Intrinsic<[llvm_i64_ty],[llvm_shared_i64ptr_ty,
+ llvm_i32_ty],[IntrConvergent, IntrNoCallback]>;
def int_nvvm_mbarrier_arrive_drop :
- GCCBuiltin<"__nvvm_mbarrier_arrive_drop">,
- Intrinsic<[llvm_i64_ty],[llvm_i64ptr_ty],[IntrConvergent]>;
+ ClangBuiltin<"__nvvm_mbarrier_arrive_drop">,
+ Intrinsic<[llvm_i64_ty],[llvm_i64ptr_ty],[IntrConvergent, IntrNoCallback]>;
def int_nvvm_mbarrier_arrive_drop_shared :
- GCCBuiltin<"__nvvm_mbarrier_arrive_drop_shared">,
- Intrinsic<[llvm_i64_ty],[llvm_shared_i64ptr_ty],[IntrConvergent]>;
+ ClangBuiltin<"__nvvm_mbarrier_arrive_drop_shared">,
+ Intrinsic<[llvm_i64_ty],[llvm_shared_i64ptr_ty],[IntrConvergent, IntrNoCallback]>;
def int_nvvm_mbarrier_arrive_drop_noComplete :
- GCCBuiltin<"__nvvm_mbarrier_arrive_drop_noComplete">,
- Intrinsic<[llvm_i64_ty],[llvm_i64ptr_ty, llvm_i32_ty],[IntrConvergent]>;
+ ClangBuiltin<"__nvvm_mbarrier_arrive_drop_noComplete">,
+ Intrinsic<[llvm_i64_ty],[llvm_i64ptr_ty, llvm_i32_ty],[IntrConvergent, IntrNoCallback]>;
def int_nvvm_mbarrier_arrive_drop_noComplete_shared :
- GCCBuiltin<"__nvvm_mbarrier_arrive_drop_noComplete_shared">,
- Intrinsic<[llvm_i64_ty],[llvm_shared_i64ptr_ty, llvm_i32_ty],[IntrConvergent]>;
+ ClangBuiltin<"__nvvm_mbarrier_arrive_drop_noComplete_shared">,
+ Intrinsic<[llvm_i64_ty],[llvm_shared_i64ptr_ty,
+ llvm_i32_ty],[IntrConvergent, IntrNoCallback]>;
def int_nvvm_mbarrier_test_wait :
- GCCBuiltin<"__nvvm_mbarrier_test_wait">,
- Intrinsic<[llvm_i1_ty],[llvm_i64ptr_ty, llvm_i64_ty],[IntrConvergent]>;
+ ClangBuiltin<"__nvvm_mbarrier_test_wait">,
+ Intrinsic<[llvm_i1_ty],[llvm_i64ptr_ty, llvm_i64_ty],[IntrConvergent, IntrNoCallback]>;
def int_nvvm_mbarrier_test_wait_shared :
- GCCBuiltin<"__nvvm_mbarrier_test_wait_shared">,
- Intrinsic<[llvm_i1_ty],[llvm_shared_i64ptr_ty, llvm_i64_ty],[IntrConvergent]>;
+ ClangBuiltin<"__nvvm_mbarrier_test_wait_shared">,
+ Intrinsic<[llvm_i1_ty],[llvm_shared_i64ptr_ty, llvm_i64_ty],[IntrConvergent, IntrNoCallback]>;
def int_nvvm_mbarrier_pending_count :
- GCCBuiltin<"__nvvm_mbarrier_pending_count">,
- Intrinsic<[llvm_i32_ty],[llvm_i64_ty],[IntrNoMem, IntrConvergent]>;
+ ClangBuiltin<"__nvvm_mbarrier_pending_count">,
+ Intrinsic<[llvm_i32_ty],[llvm_i64_ty],[IntrNoMem, IntrConvergent, IntrNoCallback]>;
// Generated within nvvm. Use for ldu on sm_20 or later. Second arg is the
// pointer's alignment.
def int_nvvm_ldu_global_i : Intrinsic<[llvm_anyint_ty],
[LLVMAnyPointerType<LLVMMatchType<0>>, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly, NoCapture<ArgIndex<0>>],
+ [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
"llvm.nvvm.ldu.global.i">;
def int_nvvm_ldu_global_f : Intrinsic<[llvm_anyfloat_ty],
[LLVMAnyPointerType<LLVMMatchType<0>>, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly, NoCapture<ArgIndex<0>>],
+ [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
"llvm.nvvm.ldu.global.f">;
def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty],
[LLVMAnyPointerType<LLVMMatchType<0>>, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly, NoCapture<ArgIndex<0>>],
+ [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
"llvm.nvvm.ldu.global.p">;
// Generated within nvvm. Use for ldg on sm_35 or later. Second arg is the
// pointer's alignment.
def int_nvvm_ldg_global_i : Intrinsic<[llvm_anyint_ty],
[LLVMAnyPointerType<LLVMMatchType<0>>, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly, NoCapture<ArgIndex<0>>],
+ [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
"llvm.nvvm.ldg.global.i">;
def int_nvvm_ldg_global_f : Intrinsic<[llvm_anyfloat_ty],
[LLVMAnyPointerType<LLVMMatchType<0>>, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly, NoCapture<ArgIndex<0>>],
+ [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
"llvm.nvvm.ldg.global.f">;
def int_nvvm_ldg_global_p : Intrinsic<[llvm_anyptr_ty],
[LLVMAnyPointerType<LLVMMatchType<0>>, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly, NoCapture<ArgIndex<0>>],
+ [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
"llvm.nvvm.ldg.global.p">;
// Use for generic pointers
@@ -1491,7 +1544,7 @@ def int_nvvm_ptr_gen_to_constant: DefaultAttrsIntrinsic<[llvm_anyptr_ty],
// This is for params that are passed to kernel functions by pointer by-val.
def int_nvvm_ptr_gen_to_param: Intrinsic<[llvm_anyptr_ty],
[llvm_anyptr_ty],
- [IntrNoMem, IntrSpeculatable],
+ [IntrNoMem, IntrSpeculatable, IntrNoCallback],
"llvm.nvvm.ptr.gen.to.param">;
// Move intrinsics, used in nvvm internally
@@ -1531,149 +1584,149 @@ def int_nvvm_reflect :
def int_nvvm_isspacep_const
: DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.isspacep.const">,
- GCCBuiltin<"__nvvm_isspacep_const">;
+ ClangBuiltin<"__nvvm_isspacep_const">;
def int_nvvm_isspacep_global
: DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.isspacep.global">,
- GCCBuiltin<"__nvvm_isspacep_global">;
+ ClangBuiltin<"__nvvm_isspacep_global">;
def int_nvvm_isspacep_local
: DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.isspacep.local">,
- GCCBuiltin<"__nvvm_isspacep_local">;
+ ClangBuiltin<"__nvvm_isspacep_local">;
def int_nvvm_isspacep_shared
: DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.isspacep.shared">,
- GCCBuiltin<"__nvvm_isspacep_shared">;
+ ClangBuiltin<"__nvvm_isspacep_shared">;
// Environment register read
def int_nvvm_read_ptx_sreg_envreg0
: DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg0">,
- GCCBuiltin<"__nvvm_read_ptx_sreg_envreg0">;
+ ClangBuiltin<"__nvvm_read_ptx_sreg_envreg0">;
def int_nvvm_read_ptx_sreg_envreg1
: DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg1">,
- GCCBuiltin<"__nvvm_read_ptx_sreg_envreg1">;
+ ClangBuiltin<"__nvvm_read_ptx_sreg_envreg1">;
def int_nvvm_read_ptx_sreg_envreg2
: DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg2">,
- GCCBuiltin<"__nvvm_read_ptx_sreg_envreg2">;
+ ClangBuiltin<"__nvvm_read_ptx_sreg_envreg2">;
def int_nvvm_read_ptx_sreg_envreg3
: DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg3">,
- GCCBuiltin<"__nvvm_read_ptx_sreg_envreg3">;
+ ClangBuiltin<"__nvvm_read_ptx_sreg_envreg3">;
def int_nvvm_read_ptx_sreg_envreg4
: DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg4">,
- GCCBuiltin<"__nvvm_read_ptx_sreg_envreg4">;
+ ClangBuiltin<"__nvvm_read_ptx_sreg_envreg4">;
def int_nvvm_read_ptx_sreg_envreg5
: DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg5">,
- GCCBuiltin<"__nvvm_read_ptx_sreg_envreg5">;
+ ClangBuiltin<"__nvvm_read_ptx_sreg_envreg5">;
def int_nvvm_read_ptx_sreg_envreg6
: DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg6">,
- GCCBuiltin<"__nvvm_read_ptx_sreg_envreg6">;
+ ClangBuiltin<"__nvvm_read_ptx_sreg_envreg6">;
def int_nvvm_read_ptx_sreg_envreg7
: DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg7">,
- GCCBuiltin<"__nvvm_read_ptx_sreg_envreg7">;
+ ClangBuiltin<"__nvvm_read_ptx_sreg_envreg7">;
def int_nvvm_read_ptx_sreg_envreg8
: DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg8">,
- GCCBuiltin<"__nvvm_read_ptx_sreg_envreg8">;
+ ClangBuiltin<"__nvvm_read_ptx_sreg_envreg8">;
def int_nvvm_read_ptx_sreg_envreg9
: DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg9">,
- GCCBuiltin<"__nvvm_read_ptx_sreg_envreg9">;
+ ClangBuiltin<"__nvvm_read_ptx_sreg_envreg9">;
def int_nvvm_read_ptx_sreg_envreg10
: DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg10">,
- GCCBuiltin<"__nvvm_read_ptx_sreg_envreg10">;
+ ClangBuiltin<"__nvvm_read_ptx_sreg_envreg10">;
def int_nvvm_read_ptx_sreg_envreg11
: DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg11">,
- GCCBuiltin<"__nvvm_read_ptx_sreg_envreg11">;
+ ClangBuiltin<"__nvvm_read_ptx_sreg_envreg11">;
def int_nvvm_read_ptx_sreg_envreg12
: DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg12">,
- GCCBuiltin<"__nvvm_read_ptx_sreg_envreg12">;
+ ClangBuiltin<"__nvvm_read_ptx_sreg_envreg12">;
def int_nvvm_read_ptx_sreg_envreg13
: DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg13">,
- GCCBuiltin<"__nvvm_read_ptx_sreg_envreg13">;
+ ClangBuiltin<"__nvvm_read_ptx_sreg_envreg13">;
def int_nvvm_read_ptx_sreg_envreg14
: DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg14">,
- GCCBuiltin<"__nvvm_read_ptx_sreg_envreg14">;
+ ClangBuiltin<"__nvvm_read_ptx_sreg_envreg14">;
def int_nvvm_read_ptx_sreg_envreg15
: DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg15">,
- GCCBuiltin<"__nvvm_read_ptx_sreg_envreg15">;
+ ClangBuiltin<"__nvvm_read_ptx_sreg_envreg15">;
def int_nvvm_read_ptx_sreg_envreg16
: DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg16">,
- GCCBuiltin<"__nvvm_read_ptx_sreg_envreg16">;
+ ClangBuiltin<"__nvvm_read_ptx_sreg_envreg16">;
def int_nvvm_read_ptx_sreg_envreg17
: DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg17">,
- GCCBuiltin<"__nvvm_read_ptx_sreg_envreg17">;
+ ClangBuiltin<"__nvvm_read_ptx_sreg_envreg17">;
def int_nvvm_read_ptx_sreg_envreg18
: DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg18">,
- GCCBuiltin<"__nvvm_read_ptx_sreg_envreg18">;
+ ClangBuiltin<"__nvvm_read_ptx_sreg_envreg18">;
def int_nvvm_read_ptx_sreg_envreg19
: DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg19">,
- GCCBuiltin<"__nvvm_read_ptx_sreg_envreg19">;
+ ClangBuiltin<"__nvvm_read_ptx_sreg_envreg19">;
def int_nvvm_read_ptx_sreg_envreg20
: DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg20">,
- GCCBuiltin<"__nvvm_read_ptx_sreg_envreg20">;
+ ClangBuiltin<"__nvvm_read_ptx_sreg_envreg20">;
def int_nvvm_read_ptx_sreg_envreg21
: DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg21">,
- GCCBuiltin<"__nvvm_read_ptx_sreg_envreg21">;
+ ClangBuiltin<"__nvvm_read_ptx_sreg_envreg21">;
def int_nvvm_read_ptx_sreg_envreg22
: DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg22">,
- GCCBuiltin<"__nvvm_read_ptx_sreg_envreg22">;
+ ClangBuiltin<"__nvvm_read_ptx_sreg_envreg22">;
def int_nvvm_read_ptx_sreg_envreg23
: DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg23">,
- GCCBuiltin<"__nvvm_read_ptx_sreg_envreg23">;
+ ClangBuiltin<"__nvvm_read_ptx_sreg_envreg23">;
def int_nvvm_read_ptx_sreg_envreg24
: DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg24">,
- GCCBuiltin<"__nvvm_read_ptx_sreg_envreg24">;
+ ClangBuiltin<"__nvvm_read_ptx_sreg_envreg24">;
def int_nvvm_read_ptx_sreg_envreg25
: DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg25">,
- GCCBuiltin<"__nvvm_read_ptx_sreg_envreg25">;
+ ClangBuiltin<"__nvvm_read_ptx_sreg_envreg25">;
def int_nvvm_read_ptx_sreg_envreg26
: DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg26">,
- GCCBuiltin<"__nvvm_read_ptx_sreg_envreg26">;
+ ClangBuiltin<"__nvvm_read_ptx_sreg_envreg26">;
def int_nvvm_read_ptx_sreg_envreg27
: DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg27">,
- GCCBuiltin<"__nvvm_read_ptx_sreg_envreg27">;
+ ClangBuiltin<"__nvvm_read_ptx_sreg_envreg27">;
def int_nvvm_read_ptx_sreg_envreg28
: DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg28">,
- GCCBuiltin<"__nvvm_read_ptx_sreg_envreg28">;
+ ClangBuiltin<"__nvvm_read_ptx_sreg_envreg28">;
def int_nvvm_read_ptx_sreg_envreg29
: DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg29">,
- GCCBuiltin<"__nvvm_read_ptx_sreg_envreg29">;
+ ClangBuiltin<"__nvvm_read_ptx_sreg_envreg29">;
def int_nvvm_read_ptx_sreg_envreg30
: DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg30">,
- GCCBuiltin<"__nvvm_read_ptx_sreg_envreg30">;
+ ClangBuiltin<"__nvvm_read_ptx_sreg_envreg30">;
def int_nvvm_read_ptx_sreg_envreg31
: DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg31">,
- GCCBuiltin<"__nvvm_read_ptx_sreg_envreg31">;
+ ClangBuiltin<"__nvvm_read_ptx_sreg_envreg31">;
// Texture Fetch
@@ -3161,62 +3214,62 @@ def int_nvvm_suld_3d_v4i32_zero
def int_nvvm_txq_channel_order
: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
"llvm.nvvm.txq.channel.order">,
- GCCBuiltin<"__nvvm_txq_channel_order">;
+ ClangBuiltin<"__nvvm_txq_channel_order">;
def int_nvvm_txq_channel_data_type
: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
"llvm.nvvm.txq.channel.data.type">,
- GCCBuiltin<"__nvvm_txq_channel_data_type">;
+ ClangBuiltin<"__nvvm_txq_channel_data_type">;
def int_nvvm_txq_width
: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
"llvm.nvvm.txq.width">,
- GCCBuiltin<"__nvvm_txq_width">;
+ ClangBuiltin<"__nvvm_txq_width">;
def int_nvvm_txq_height
: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
"llvm.nvvm.txq.height">,
- GCCBuiltin<"__nvvm_txq_height">;
+ ClangBuiltin<"__nvvm_txq_height">;
def int_nvvm_txq_depth
: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
"llvm.nvvm.txq.depth">,
- GCCBuiltin<"__nvvm_txq_depth">;
+ ClangBuiltin<"__nvvm_txq_depth">;
def int_nvvm_txq_array_size
: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
"llvm.nvvm.txq.array.size">,
- GCCBuiltin<"__nvvm_txq_array_size">;
+ ClangBuiltin<"__nvvm_txq_array_size">;
def int_nvvm_txq_num_samples
: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
"llvm.nvvm.txq.num.samples">,
- GCCBuiltin<"__nvvm_txq_num_samples">;
+ ClangBuiltin<"__nvvm_txq_num_samples">;
def int_nvvm_txq_num_mipmap_levels
: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
"llvm.nvvm.txq.num.mipmap.levels">,
- GCCBuiltin<"__nvvm_txq_num_mipmap_levels">;
+ ClangBuiltin<"__nvvm_txq_num_mipmap_levels">;
//===- Surface Query ------------------------------------------------------===//
def int_nvvm_suq_channel_order
: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
"llvm.nvvm.suq.channel.order">,
- GCCBuiltin<"__nvvm_suq_channel_order">;
+ ClangBuiltin<"__nvvm_suq_channel_order">;
def int_nvvm_suq_channel_data_type
: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
"llvm.nvvm.suq.channel.data.type">,
- GCCBuiltin<"__nvvm_suq_channel_data_type">;
+ ClangBuiltin<"__nvvm_suq_channel_data_type">;
def int_nvvm_suq_width
: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
"llvm.nvvm.suq.width">,
- GCCBuiltin<"__nvvm_suq_width">;
+ ClangBuiltin<"__nvvm_suq_width">;
def int_nvvm_suq_height
: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
"llvm.nvvm.suq.height">,
- GCCBuiltin<"__nvvm_suq_height">;
+ ClangBuiltin<"__nvvm_suq_height">;
def int_nvvm_suq_depth
: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
"llvm.nvvm.suq.depth">,
- GCCBuiltin<"__nvvm_suq_depth">;
+ ClangBuiltin<"__nvvm_suq_depth">;
def int_nvvm_suq_array_size
: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
"llvm.nvvm.suq.array.size">,
- GCCBuiltin<"__nvvm_suq_array_size">;
+ ClangBuiltin<"__nvvm_suq_array_size">;
//===- Handle Query -------------------------------------------------------===//
@@ -3224,15 +3277,15 @@ def int_nvvm_suq_array_size
def int_nvvm_istypep_sampler
: Intrinsic<[llvm_i1_ty], [llvm_i64_ty], [IntrNoMem],
"llvm.nvvm.istypep.sampler">,
- GCCBuiltin<"__nvvm_istypep_sampler">;
+ ClangBuiltin<"__nvvm_istypep_sampler">;
def int_nvvm_istypep_surface
: Intrinsic<[llvm_i1_ty], [llvm_i64_ty], [IntrNoMem],
"llvm.nvvm.istypep.surface">,
- GCCBuiltin<"__nvvm_istypep_surface">;
+ ClangBuiltin<"__nvvm_istypep_surface">;
def int_nvvm_istypep_texture
: Intrinsic<[llvm_i1_ty], [llvm_i64_ty], [IntrNoMem],
"llvm.nvvm.istypep.texture">,
- GCCBuiltin<"__nvvm_istypep_texture">;
+ ClangBuiltin<"__nvvm_istypep_texture">;
@@ -3243,810 +3296,810 @@ def int_nvvm_istypep_texture
def int_nvvm_sust_b_1d_i8_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.i8.clamp">,
- GCCBuiltin<"__nvvm_sust_b_1d_i8_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_1d_i8_clamp">;
def int_nvvm_sust_b_1d_i16_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.i16.clamp">,
- GCCBuiltin<"__nvvm_sust_b_1d_i16_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_1d_i16_clamp">;
def int_nvvm_sust_b_1d_i32_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.1d.i32.clamp">,
- GCCBuiltin<"__nvvm_sust_b_1d_i32_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_1d_i32_clamp">;
def int_nvvm_sust_b_1d_i64_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [],
"llvm.nvvm.sust.b.1d.i64.clamp">,
- GCCBuiltin<"__nvvm_sust_b_1d_i64_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_1d_i64_clamp">;
def int_nvvm_sust_b_1d_v2i8_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.v2i8.clamp">,
- GCCBuiltin<"__nvvm_sust_b_1d_v2i8_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_1d_v2i8_clamp">;
def int_nvvm_sust_b_1d_v2i16_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.v2i16.clamp">,
- GCCBuiltin<"__nvvm_sust_b_1d_v2i16_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_1d_v2i16_clamp">;
def int_nvvm_sust_b_1d_v2i32_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.1d.v2i32.clamp">,
- GCCBuiltin<"__nvvm_sust_b_1d_v2i32_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_1d_v2i32_clamp">;
def int_nvvm_sust_b_1d_v2i64_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], [],
"llvm.nvvm.sust.b.1d.v2i64.clamp">,
- GCCBuiltin<"__nvvm_sust_b_1d_v2i64_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_1d_v2i64_clamp">;
def int_nvvm_sust_b_1d_v4i8_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.v4i8.clamp">,
- GCCBuiltin<"__nvvm_sust_b_1d_v4i8_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_1d_v4i8_clamp">;
def int_nvvm_sust_b_1d_v4i16_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.v4i16.clamp">,
- GCCBuiltin<"__nvvm_sust_b_1d_v4i16_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_1d_v4i16_clamp">;
def int_nvvm_sust_b_1d_v4i32_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.1d.v4i32.clamp">,
- GCCBuiltin<"__nvvm_sust_b_1d_v4i32_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_1d_v4i32_clamp">;
def int_nvvm_sust_b_1d_array_i8_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.array.i8.clamp">,
- GCCBuiltin<"__nvvm_sust_b_1d_array_i8_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_1d_array_i8_clamp">;
def int_nvvm_sust_b_1d_array_i16_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.array.i16.clamp">,
- GCCBuiltin<"__nvvm_sust_b_1d_array_i16_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_1d_array_i16_clamp">;
def int_nvvm_sust_b_1d_array_i32_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.1d.array.i32.clamp">,
- GCCBuiltin<"__nvvm_sust_b_1d_array_i32_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_1d_array_i32_clamp">;
def int_nvvm_sust_b_1d_array_i64_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [],
"llvm.nvvm.sust.b.1d.array.i64.clamp">,
- GCCBuiltin<"__nvvm_sust_b_1d_array_i64_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_1d_array_i64_clamp">;
def int_nvvm_sust_b_1d_array_v2i8_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.array.v2i8.clamp">,
- GCCBuiltin<"__nvvm_sust_b_1d_array_v2i8_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_1d_array_v2i8_clamp">;
def int_nvvm_sust_b_1d_array_v2i16_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.array.v2i16.clamp">,
- GCCBuiltin<"__nvvm_sust_b_1d_array_v2i16_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_1d_array_v2i16_clamp">;
def int_nvvm_sust_b_1d_array_v2i32_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.1d.array.v2i32.clamp">,
- GCCBuiltin<"__nvvm_sust_b_1d_array_v2i32_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_1d_array_v2i32_clamp">;
def int_nvvm_sust_b_1d_array_v2i64_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i64_ty, llvm_i64_ty], [],
"llvm.nvvm.sust.b.1d.array.v2i64.clamp">,
- GCCBuiltin<"__nvvm_sust_b_1d_array_v2i64_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_1d_array_v2i64_clamp">;
def int_nvvm_sust_b_1d_array_v4i8_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.array.v4i8.clamp">,
- GCCBuiltin<"__nvvm_sust_b_1d_array_v4i8_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_1d_array_v4i8_clamp">;
def int_nvvm_sust_b_1d_array_v4i16_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.array.v4i16.clamp">,
- GCCBuiltin<"__nvvm_sust_b_1d_array_v4i16_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_1d_array_v4i16_clamp">;
def int_nvvm_sust_b_1d_array_v4i32_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.1d.array.v4i32.clamp">,
- GCCBuiltin<"__nvvm_sust_b_1d_array_v4i32_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_1d_array_v4i32_clamp">;
def int_nvvm_sust_b_2d_i8_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.i8.clamp">,
- GCCBuiltin<"__nvvm_sust_b_2d_i8_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_2d_i8_clamp">;
def int_nvvm_sust_b_2d_i16_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.i16.clamp">,
- GCCBuiltin<"__nvvm_sust_b_2d_i16_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_2d_i16_clamp">;
def int_nvvm_sust_b_2d_i32_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.2d.i32.clamp">,
- GCCBuiltin<"__nvvm_sust_b_2d_i32_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_2d_i32_clamp">;
def int_nvvm_sust_b_2d_i64_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [],
"llvm.nvvm.sust.b.2d.i64.clamp">,
- GCCBuiltin<"__nvvm_sust_b_2d_i64_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_2d_i64_clamp">;
def int_nvvm_sust_b_2d_v2i8_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.v2i8.clamp">,
- GCCBuiltin<"__nvvm_sust_b_2d_v2i8_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_2d_v2i8_clamp">;
def int_nvvm_sust_b_2d_v2i16_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.v2i16.clamp">,
- GCCBuiltin<"__nvvm_sust_b_2d_v2i16_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_2d_v2i16_clamp">;
def int_nvvm_sust_b_2d_v2i32_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.2d.v2i32.clamp">,
- GCCBuiltin<"__nvvm_sust_b_2d_v2i32_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_2d_v2i32_clamp">;
def int_nvvm_sust_b_2d_v2i64_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i64_ty, llvm_i64_ty], [],
"llvm.nvvm.sust.b.2d.v2i64.clamp">,
- GCCBuiltin<"__nvvm_sust_b_2d_v2i64_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_2d_v2i64_clamp">;
def int_nvvm_sust_b_2d_v4i8_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.v4i8.clamp">,
- GCCBuiltin<"__nvvm_sust_b_2d_v4i8_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_2d_v4i8_clamp">;
def int_nvvm_sust_b_2d_v4i16_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.v4i16.clamp">,
- GCCBuiltin<"__nvvm_sust_b_2d_v4i16_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_2d_v4i16_clamp">;
def int_nvvm_sust_b_2d_v4i32_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.2d.v4i32.clamp">,
- GCCBuiltin<"__nvvm_sust_b_2d_v4i32_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_2d_v4i32_clamp">;
def int_nvvm_sust_b_2d_array_i8_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.array.i8.clamp">,
- GCCBuiltin<"__nvvm_sust_b_2d_array_i8_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_2d_array_i8_clamp">;
def int_nvvm_sust_b_2d_array_i16_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.array.i16.clamp">,
- GCCBuiltin<"__nvvm_sust_b_2d_array_i16_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_2d_array_i16_clamp">;
def int_nvvm_sust_b_2d_array_i32_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.2d.array.i32.clamp">,
- GCCBuiltin<"__nvvm_sust_b_2d_array_i32_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_2d_array_i32_clamp">;
def int_nvvm_sust_b_2d_array_i64_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i64_ty], [],
"llvm.nvvm.sust.b.2d.array.i64.clamp">,
- GCCBuiltin<"__nvvm_sust_b_2d_array_i64_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_2d_array_i64_clamp">;
def int_nvvm_sust_b_2d_array_v2i8_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.array.v2i8.clamp">,
- GCCBuiltin<"__nvvm_sust_b_2d_array_v2i8_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_2d_array_v2i8_clamp">;
def int_nvvm_sust_b_2d_array_v2i16_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.array.v2i16.clamp">,
- GCCBuiltin<"__nvvm_sust_b_2d_array_v2i16_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_2d_array_v2i16_clamp">;
def int_nvvm_sust_b_2d_array_v2i32_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.2d.array.v2i32.clamp">,
- GCCBuiltin<"__nvvm_sust_b_2d_array_v2i32_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_2d_array_v2i32_clamp">;
def int_nvvm_sust_b_2d_array_v2i64_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i64_ty, llvm_i64_ty], [],
"llvm.nvvm.sust.b.2d.array.v2i64.clamp">,
- GCCBuiltin<"__nvvm_sust_b_2d_array_v2i64_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_2d_array_v2i64_clamp">;
def int_nvvm_sust_b_2d_array_v4i8_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.array.v4i8.clamp">,
- GCCBuiltin<"__nvvm_sust_b_2d_array_v4i8_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_2d_array_v4i8_clamp">;
def int_nvvm_sust_b_2d_array_v4i16_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.array.v4i16.clamp">,
- GCCBuiltin<"__nvvm_sust_b_2d_array_v4i16_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_2d_array_v4i16_clamp">;
def int_nvvm_sust_b_2d_array_v4i32_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.2d.array.v4i32.clamp">,
- GCCBuiltin<"__nvvm_sust_b_2d_array_v4i32_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_2d_array_v4i32_clamp">;
def int_nvvm_sust_b_3d_i8_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.3d.i8.clamp">,
- GCCBuiltin<"__nvvm_sust_b_3d_i8_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_3d_i8_clamp">;
def int_nvvm_sust_b_3d_i16_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.3d.i16.clamp">,
- GCCBuiltin<"__nvvm_sust_b_3d_i16_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_3d_i16_clamp">;
def int_nvvm_sust_b_3d_i32_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.3d.i32.clamp">,
- GCCBuiltin<"__nvvm_sust_b_3d_i32_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_3d_i32_clamp">;
def int_nvvm_sust_b_3d_i64_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i64_ty], [],
"llvm.nvvm.sust.b.3d.i64.clamp">,
- GCCBuiltin<"__nvvm_sust_b_3d_i64_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_3d_i64_clamp">;
def int_nvvm_sust_b_3d_v2i8_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.3d.v2i8.clamp">,
- GCCBuiltin<"__nvvm_sust_b_3d_v2i8_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_3d_v2i8_clamp">;
def int_nvvm_sust_b_3d_v2i16_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.3d.v2i16.clamp">,
- GCCBuiltin<"__nvvm_sust_b_3d_v2i16_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_3d_v2i16_clamp">;
def int_nvvm_sust_b_3d_v2i32_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.3d.v2i32.clamp">,
- GCCBuiltin<"__nvvm_sust_b_3d_v2i32_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_3d_v2i32_clamp">;
def int_nvvm_sust_b_3d_v2i64_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i64_ty, llvm_i64_ty], [],
"llvm.nvvm.sust.b.3d.v2i64.clamp">,
- GCCBuiltin<"__nvvm_sust_b_3d_v2i64_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_3d_v2i64_clamp">;
def int_nvvm_sust_b_3d_v4i8_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.3d.v4i8.clamp">,
- GCCBuiltin<"__nvvm_sust_b_3d_v4i8_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_3d_v4i8_clamp">;
def int_nvvm_sust_b_3d_v4i16_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.3d.v4i16.clamp">,
- GCCBuiltin<"__nvvm_sust_b_3d_v4i16_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_3d_v4i16_clamp">;
def int_nvvm_sust_b_3d_v4i32_clamp
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.3d.v4i32.clamp">,
- GCCBuiltin<"__nvvm_sust_b_3d_v4i32_clamp">;
+ ClangBuiltin<"__nvvm_sust_b_3d_v4i32_clamp">;
// .trap variant
def int_nvvm_sust_b_1d_i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.i8.trap">,
- GCCBuiltin<"__nvvm_sust_b_1d_i8_trap">;
+ ClangBuiltin<"__nvvm_sust_b_1d_i8_trap">;
def int_nvvm_sust_b_1d_i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.i16.trap">,
- GCCBuiltin<"__nvvm_sust_b_1d_i16_trap">;
+ ClangBuiltin<"__nvvm_sust_b_1d_i16_trap">;
def int_nvvm_sust_b_1d_i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.1d.i32.trap">,
- GCCBuiltin<"__nvvm_sust_b_1d_i32_trap">;
+ ClangBuiltin<"__nvvm_sust_b_1d_i32_trap">;
def int_nvvm_sust_b_1d_i64_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [],
"llvm.nvvm.sust.b.1d.i64.trap">,
- GCCBuiltin<"__nvvm_sust_b_1d_i64_trap">;
+ ClangBuiltin<"__nvvm_sust_b_1d_i64_trap">;
def int_nvvm_sust_b_1d_v2i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.v2i8.trap">,
- GCCBuiltin<"__nvvm_sust_b_1d_v2i8_trap">;
+ ClangBuiltin<"__nvvm_sust_b_1d_v2i8_trap">;
def int_nvvm_sust_b_1d_v2i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.v2i16.trap">,
- GCCBuiltin<"__nvvm_sust_b_1d_v2i16_trap">;
+ ClangBuiltin<"__nvvm_sust_b_1d_v2i16_trap">;
def int_nvvm_sust_b_1d_v2i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.1d.v2i32.trap">,
- GCCBuiltin<"__nvvm_sust_b_1d_v2i32_trap">;
+ ClangBuiltin<"__nvvm_sust_b_1d_v2i32_trap">;
def int_nvvm_sust_b_1d_v2i64_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], [],
"llvm.nvvm.sust.b.1d.v2i64.trap">,
- GCCBuiltin<"__nvvm_sust_b_1d_v2i64_trap">;
+ ClangBuiltin<"__nvvm_sust_b_1d_v2i64_trap">;
def int_nvvm_sust_b_1d_v4i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.v4i8.trap">,
- GCCBuiltin<"__nvvm_sust_b_1d_v4i8_trap">;
+ ClangBuiltin<"__nvvm_sust_b_1d_v4i8_trap">;
def int_nvvm_sust_b_1d_v4i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.v4i16.trap">,
- GCCBuiltin<"__nvvm_sust_b_1d_v4i16_trap">;
+ ClangBuiltin<"__nvvm_sust_b_1d_v4i16_trap">;
def int_nvvm_sust_b_1d_v4i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.1d.v4i32.trap">,
- GCCBuiltin<"__nvvm_sust_b_1d_v4i32_trap">;
+ ClangBuiltin<"__nvvm_sust_b_1d_v4i32_trap">;
def int_nvvm_sust_b_1d_array_i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.array.i8.trap">,
- GCCBuiltin<"__nvvm_sust_b_1d_array_i8_trap">;
+ ClangBuiltin<"__nvvm_sust_b_1d_array_i8_trap">;
def int_nvvm_sust_b_1d_array_i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.array.i16.trap">,
- GCCBuiltin<"__nvvm_sust_b_1d_array_i16_trap">;
+ ClangBuiltin<"__nvvm_sust_b_1d_array_i16_trap">;
def int_nvvm_sust_b_1d_array_i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.1d.array.i32.trap">,
- GCCBuiltin<"__nvvm_sust_b_1d_array_i32_trap">;
+ ClangBuiltin<"__nvvm_sust_b_1d_array_i32_trap">;
def int_nvvm_sust_b_1d_array_i64_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [],
"llvm.nvvm.sust.b.1d.array.i64.trap">,
- GCCBuiltin<"__nvvm_sust_b_1d_array_i64_trap">;
+ ClangBuiltin<"__nvvm_sust_b_1d_array_i64_trap">;
def int_nvvm_sust_b_1d_array_v2i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.array.v2i8.trap">,
- GCCBuiltin<"__nvvm_sust_b_1d_array_v2i8_trap">;
+ ClangBuiltin<"__nvvm_sust_b_1d_array_v2i8_trap">;
def int_nvvm_sust_b_1d_array_v2i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.array.v2i16.trap">,
- GCCBuiltin<"__nvvm_sust_b_1d_array_v2i16_trap">;
+ ClangBuiltin<"__nvvm_sust_b_1d_array_v2i16_trap">;
def int_nvvm_sust_b_1d_array_v2i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.1d.array.v2i32.trap">,
- GCCBuiltin<"__nvvm_sust_b_1d_array_v2i32_trap">;
+ ClangBuiltin<"__nvvm_sust_b_1d_array_v2i32_trap">;
def int_nvvm_sust_b_1d_array_v2i64_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i64_ty, llvm_i64_ty], [],
"llvm.nvvm.sust.b.1d.array.v2i64.trap">,
- GCCBuiltin<"__nvvm_sust_b_1d_array_v2i64_trap">;
+ ClangBuiltin<"__nvvm_sust_b_1d_array_v2i64_trap">;
def int_nvvm_sust_b_1d_array_v4i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.array.v4i8.trap">,
- GCCBuiltin<"__nvvm_sust_b_1d_array_v4i8_trap">;
+ ClangBuiltin<"__nvvm_sust_b_1d_array_v4i8_trap">;
def int_nvvm_sust_b_1d_array_v4i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.array.v4i16.trap">,
- GCCBuiltin<"__nvvm_sust_b_1d_array_v4i16_trap">;
+ ClangBuiltin<"__nvvm_sust_b_1d_array_v4i16_trap">;
def int_nvvm_sust_b_1d_array_v4i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.1d.array.v4i32.trap">,
- GCCBuiltin<"__nvvm_sust_b_1d_array_v4i32_trap">;
+ ClangBuiltin<"__nvvm_sust_b_1d_array_v4i32_trap">;
def int_nvvm_sust_b_2d_i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.i8.trap">,
- GCCBuiltin<"__nvvm_sust_b_2d_i8_trap">;
+ ClangBuiltin<"__nvvm_sust_b_2d_i8_trap">;
def int_nvvm_sust_b_2d_i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.i16.trap">,
- GCCBuiltin<"__nvvm_sust_b_2d_i16_trap">;
+ ClangBuiltin<"__nvvm_sust_b_2d_i16_trap">;
def int_nvvm_sust_b_2d_i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.2d.i32.trap">,
- GCCBuiltin<"__nvvm_sust_b_2d_i32_trap">;
+ ClangBuiltin<"__nvvm_sust_b_2d_i32_trap">;
def int_nvvm_sust_b_2d_i64_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [],
"llvm.nvvm.sust.b.2d.i64.trap">,
- GCCBuiltin<"__nvvm_sust_b_2d_i64_trap">;
+ ClangBuiltin<"__nvvm_sust_b_2d_i64_trap">;
def int_nvvm_sust_b_2d_v2i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.v2i8.trap">,
- GCCBuiltin<"__nvvm_sust_b_2d_v2i8_trap">;
+ ClangBuiltin<"__nvvm_sust_b_2d_v2i8_trap">;
def int_nvvm_sust_b_2d_v2i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.v2i16.trap">,
- GCCBuiltin<"__nvvm_sust_b_2d_v2i16_trap">;
+ ClangBuiltin<"__nvvm_sust_b_2d_v2i16_trap">;
def int_nvvm_sust_b_2d_v2i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.2d.v2i32.trap">,
- GCCBuiltin<"__nvvm_sust_b_2d_v2i32_trap">;
+ ClangBuiltin<"__nvvm_sust_b_2d_v2i32_trap">;
def int_nvvm_sust_b_2d_v2i64_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i64_ty, llvm_i64_ty], [],
"llvm.nvvm.sust.b.2d.v2i64.trap">,
- GCCBuiltin<"__nvvm_sust_b_2d_v2i64_trap">;
+ ClangBuiltin<"__nvvm_sust_b_2d_v2i64_trap">;
def int_nvvm_sust_b_2d_v4i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.v4i8.trap">,
- GCCBuiltin<"__nvvm_sust_b_2d_v4i8_trap">;
+ ClangBuiltin<"__nvvm_sust_b_2d_v4i8_trap">;
def int_nvvm_sust_b_2d_v4i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.v4i16.trap">,
- GCCBuiltin<"__nvvm_sust_b_2d_v4i16_trap">;
+ ClangBuiltin<"__nvvm_sust_b_2d_v4i16_trap">;
def int_nvvm_sust_b_2d_v4i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.2d.v4i32.trap">,
- GCCBuiltin<"__nvvm_sust_b_2d_v4i32_trap">;
+ ClangBuiltin<"__nvvm_sust_b_2d_v4i32_trap">;
def int_nvvm_sust_b_2d_array_i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.array.i8.trap">,
- GCCBuiltin<"__nvvm_sust_b_2d_array_i8_trap">;
+ ClangBuiltin<"__nvvm_sust_b_2d_array_i8_trap">;
def int_nvvm_sust_b_2d_array_i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.array.i16.trap">,
- GCCBuiltin<"__nvvm_sust_b_2d_array_i16_trap">;
+ ClangBuiltin<"__nvvm_sust_b_2d_array_i16_trap">;
def int_nvvm_sust_b_2d_array_i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.2d.array.i32.trap">,
- GCCBuiltin<"__nvvm_sust_b_2d_array_i32_trap">;
+ ClangBuiltin<"__nvvm_sust_b_2d_array_i32_trap">;
def int_nvvm_sust_b_2d_array_i64_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i64_ty], [],
"llvm.nvvm.sust.b.2d.array.i64.trap">,
- GCCBuiltin<"__nvvm_sust_b_2d_array_i64_trap">;
+ ClangBuiltin<"__nvvm_sust_b_2d_array_i64_trap">;
def int_nvvm_sust_b_2d_array_v2i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.array.v2i8.trap">,
- GCCBuiltin<"__nvvm_sust_b_2d_array_v2i8_trap">;
+ ClangBuiltin<"__nvvm_sust_b_2d_array_v2i8_trap">;
def int_nvvm_sust_b_2d_array_v2i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.array.v2i16.trap">,
- GCCBuiltin<"__nvvm_sust_b_2d_array_v2i16_trap">;
+ ClangBuiltin<"__nvvm_sust_b_2d_array_v2i16_trap">;
def int_nvvm_sust_b_2d_array_v2i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.2d.array.v2i32.trap">,
- GCCBuiltin<"__nvvm_sust_b_2d_array_v2i32_trap">;
+ ClangBuiltin<"__nvvm_sust_b_2d_array_v2i32_trap">;
def int_nvvm_sust_b_2d_array_v2i64_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i64_ty, llvm_i64_ty], [],
"llvm.nvvm.sust.b.2d.array.v2i64.trap">,
- GCCBuiltin<"__nvvm_sust_b_2d_array_v2i64_trap">;
+ ClangBuiltin<"__nvvm_sust_b_2d_array_v2i64_trap">;
def int_nvvm_sust_b_2d_array_v4i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.array.v4i8.trap">,
- GCCBuiltin<"__nvvm_sust_b_2d_array_v4i8_trap">;
+ ClangBuiltin<"__nvvm_sust_b_2d_array_v4i8_trap">;
def int_nvvm_sust_b_2d_array_v4i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.array.v4i16.trap">,
- GCCBuiltin<"__nvvm_sust_b_2d_array_v4i16_trap">;
+ ClangBuiltin<"__nvvm_sust_b_2d_array_v4i16_trap">;
def int_nvvm_sust_b_2d_array_v4i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.2d.array.v4i32.trap">,
- GCCBuiltin<"__nvvm_sust_b_2d_array_v4i32_trap">;
+ ClangBuiltin<"__nvvm_sust_b_2d_array_v4i32_trap">;
def int_nvvm_sust_b_3d_i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.3d.i8.trap">,
- GCCBuiltin<"__nvvm_sust_b_3d_i8_trap">;
+ ClangBuiltin<"__nvvm_sust_b_3d_i8_trap">;
def int_nvvm_sust_b_3d_i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.3d.i16.trap">,
- GCCBuiltin<"__nvvm_sust_b_3d_i16_trap">;
+ ClangBuiltin<"__nvvm_sust_b_3d_i16_trap">;
def int_nvvm_sust_b_3d_i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.3d.i32.trap">,
- GCCBuiltin<"__nvvm_sust_b_3d_i32_trap">;
+ ClangBuiltin<"__nvvm_sust_b_3d_i32_trap">;
def int_nvvm_sust_b_3d_i64_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i64_ty], [],
"llvm.nvvm.sust.b.3d.i64.trap">,
- GCCBuiltin<"__nvvm_sust_b_3d_i64_trap">;
+ ClangBuiltin<"__nvvm_sust_b_3d_i64_trap">;
def int_nvvm_sust_b_3d_v2i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.3d.v2i8.trap">,
- GCCBuiltin<"__nvvm_sust_b_3d_v2i8_trap">;
+ ClangBuiltin<"__nvvm_sust_b_3d_v2i8_trap">;
def int_nvvm_sust_b_3d_v2i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.3d.v2i16.trap">,
- GCCBuiltin<"__nvvm_sust_b_3d_v2i16_trap">;
+ ClangBuiltin<"__nvvm_sust_b_3d_v2i16_trap">;
def int_nvvm_sust_b_3d_v2i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.3d.v2i32.trap">,
- GCCBuiltin<"__nvvm_sust_b_3d_v2i32_trap">;
+ ClangBuiltin<"__nvvm_sust_b_3d_v2i32_trap">;
def int_nvvm_sust_b_3d_v2i64_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i64_ty, llvm_i64_ty], [],
"llvm.nvvm.sust.b.3d.v2i64.trap">,
- GCCBuiltin<"__nvvm_sust_b_3d_v2i64_trap">;
+ ClangBuiltin<"__nvvm_sust_b_3d_v2i64_trap">;
def int_nvvm_sust_b_3d_v4i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.3d.v4i8.trap">,
- GCCBuiltin<"__nvvm_sust_b_3d_v4i8_trap">;
+ ClangBuiltin<"__nvvm_sust_b_3d_v4i8_trap">;
def int_nvvm_sust_b_3d_v4i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.3d.v4i16.trap">,
- GCCBuiltin<"__nvvm_sust_b_3d_v4i16_trap">;
+ ClangBuiltin<"__nvvm_sust_b_3d_v4i16_trap">;
def int_nvvm_sust_b_3d_v4i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.3d.v4i32.trap">,
- GCCBuiltin<"__nvvm_sust_b_3d_v4i32_trap">;
+ ClangBuiltin<"__nvvm_sust_b_3d_v4i32_trap">;
// .zero variant
def int_nvvm_sust_b_1d_i8_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.i8.zero">,
- GCCBuiltin<"__nvvm_sust_b_1d_i8_zero">;
+ ClangBuiltin<"__nvvm_sust_b_1d_i8_zero">;
def int_nvvm_sust_b_1d_i16_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.i16.zero">,
- GCCBuiltin<"__nvvm_sust_b_1d_i16_zero">;
+ ClangBuiltin<"__nvvm_sust_b_1d_i16_zero">;
def int_nvvm_sust_b_1d_i32_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.1d.i32.zero">,
- GCCBuiltin<"__nvvm_sust_b_1d_i32_zero">;
+ ClangBuiltin<"__nvvm_sust_b_1d_i32_zero">;
def int_nvvm_sust_b_1d_i64_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [],
"llvm.nvvm.sust.b.1d.i64.zero">,
- GCCBuiltin<"__nvvm_sust_b_1d_i64_zero">;
+ ClangBuiltin<"__nvvm_sust_b_1d_i64_zero">;
def int_nvvm_sust_b_1d_v2i8_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.v2i8.zero">,
- GCCBuiltin<"__nvvm_sust_b_1d_v2i8_zero">;
+ ClangBuiltin<"__nvvm_sust_b_1d_v2i8_zero">;
def int_nvvm_sust_b_1d_v2i16_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.v2i16.zero">,
- GCCBuiltin<"__nvvm_sust_b_1d_v2i16_zero">;
+ ClangBuiltin<"__nvvm_sust_b_1d_v2i16_zero">;
def int_nvvm_sust_b_1d_v2i32_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.1d.v2i32.zero">,
- GCCBuiltin<"__nvvm_sust_b_1d_v2i32_zero">;
+ ClangBuiltin<"__nvvm_sust_b_1d_v2i32_zero">;
def int_nvvm_sust_b_1d_v2i64_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], [],
"llvm.nvvm.sust.b.1d.v2i64.zero">,
- GCCBuiltin<"__nvvm_sust_b_1d_v2i64_zero">;
+ ClangBuiltin<"__nvvm_sust_b_1d_v2i64_zero">;
def int_nvvm_sust_b_1d_v4i8_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.v4i8.zero">,
- GCCBuiltin<"__nvvm_sust_b_1d_v4i8_zero">;
+ ClangBuiltin<"__nvvm_sust_b_1d_v4i8_zero">;
def int_nvvm_sust_b_1d_v4i16_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.v4i16.zero">,
- GCCBuiltin<"__nvvm_sust_b_1d_v4i16_zero">;
+ ClangBuiltin<"__nvvm_sust_b_1d_v4i16_zero">;
def int_nvvm_sust_b_1d_v4i32_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.1d.v4i32.zero">,
- GCCBuiltin<"__nvvm_sust_b_1d_v4i32_zero">;
+ ClangBuiltin<"__nvvm_sust_b_1d_v4i32_zero">;
def int_nvvm_sust_b_1d_array_i8_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.array.i8.zero">,
- GCCBuiltin<"__nvvm_sust_b_1d_array_i8_zero">;
+ ClangBuiltin<"__nvvm_sust_b_1d_array_i8_zero">;
def int_nvvm_sust_b_1d_array_i16_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.array.i16.zero">,
- GCCBuiltin<"__nvvm_sust_b_1d_array_i16_zero">;
+ ClangBuiltin<"__nvvm_sust_b_1d_array_i16_zero">;
def int_nvvm_sust_b_1d_array_i32_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.1d.array.i32.zero">,
- GCCBuiltin<"__nvvm_sust_b_1d_array_i32_zero">;
+ ClangBuiltin<"__nvvm_sust_b_1d_array_i32_zero">;
def int_nvvm_sust_b_1d_array_i64_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [],
"llvm.nvvm.sust.b.1d.array.i64.zero">,
- GCCBuiltin<"__nvvm_sust_b_1d_array_i64_zero">;
+ ClangBuiltin<"__nvvm_sust_b_1d_array_i64_zero">;
def int_nvvm_sust_b_1d_array_v2i8_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.array.v2i8.zero">,
- GCCBuiltin<"__nvvm_sust_b_1d_array_v2i8_zero">;
+ ClangBuiltin<"__nvvm_sust_b_1d_array_v2i8_zero">;
def int_nvvm_sust_b_1d_array_v2i16_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.array.v2i16.zero">,
- GCCBuiltin<"__nvvm_sust_b_1d_array_v2i16_zero">;
+ ClangBuiltin<"__nvvm_sust_b_1d_array_v2i16_zero">;
def int_nvvm_sust_b_1d_array_v2i32_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.1d.array.v2i32.zero">,
- GCCBuiltin<"__nvvm_sust_b_1d_array_v2i32_zero">;
+ ClangBuiltin<"__nvvm_sust_b_1d_array_v2i32_zero">;
def int_nvvm_sust_b_1d_array_v2i64_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i64_ty, llvm_i64_ty], [],
"llvm.nvvm.sust.b.1d.array.v2i64.zero">,
- GCCBuiltin<"__nvvm_sust_b_1d_array_v2i64_zero">;
+ ClangBuiltin<"__nvvm_sust_b_1d_array_v2i64_zero">;
def int_nvvm_sust_b_1d_array_v4i8_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.array.v4i8.zero">,
- GCCBuiltin<"__nvvm_sust_b_1d_array_v4i8_zero">;
+ ClangBuiltin<"__nvvm_sust_b_1d_array_v4i8_zero">;
def int_nvvm_sust_b_1d_array_v4i16_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.1d.array.v4i16.zero">,
- GCCBuiltin<"__nvvm_sust_b_1d_array_v4i16_zero">;
+ ClangBuiltin<"__nvvm_sust_b_1d_array_v4i16_zero">;
def int_nvvm_sust_b_1d_array_v4i32_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.1d.array.v4i32.zero">,
- GCCBuiltin<"__nvvm_sust_b_1d_array_v4i32_zero">;
+ ClangBuiltin<"__nvvm_sust_b_1d_array_v4i32_zero">;
def int_nvvm_sust_b_2d_i8_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.i8.zero">,
- GCCBuiltin<"__nvvm_sust_b_2d_i8_zero">;
+ ClangBuiltin<"__nvvm_sust_b_2d_i8_zero">;
def int_nvvm_sust_b_2d_i16_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.i16.zero">,
- GCCBuiltin<"__nvvm_sust_b_2d_i16_zero">;
+ ClangBuiltin<"__nvvm_sust_b_2d_i16_zero">;
def int_nvvm_sust_b_2d_i32_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.2d.i32.zero">,
- GCCBuiltin<"__nvvm_sust_b_2d_i32_zero">;
+ ClangBuiltin<"__nvvm_sust_b_2d_i32_zero">;
def int_nvvm_sust_b_2d_i64_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [],
"llvm.nvvm.sust.b.2d.i64.zero">,
- GCCBuiltin<"__nvvm_sust_b_2d_i64_zero">;
+ ClangBuiltin<"__nvvm_sust_b_2d_i64_zero">;
def int_nvvm_sust_b_2d_v2i8_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.v2i8.zero">,
- GCCBuiltin<"__nvvm_sust_b_2d_v2i8_zero">;
+ ClangBuiltin<"__nvvm_sust_b_2d_v2i8_zero">;
def int_nvvm_sust_b_2d_v2i16_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.v2i16.zero">,
- GCCBuiltin<"__nvvm_sust_b_2d_v2i16_zero">;
+ ClangBuiltin<"__nvvm_sust_b_2d_v2i16_zero">;
def int_nvvm_sust_b_2d_v2i32_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.2d.v2i32.zero">,
- GCCBuiltin<"__nvvm_sust_b_2d_v2i32_zero">;
+ ClangBuiltin<"__nvvm_sust_b_2d_v2i32_zero">;
def int_nvvm_sust_b_2d_v2i64_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i64_ty, llvm_i64_ty], [],
"llvm.nvvm.sust.b.2d.v2i64.zero">,
- GCCBuiltin<"__nvvm_sust_b_2d_v2i64_zero">;
+ ClangBuiltin<"__nvvm_sust_b_2d_v2i64_zero">;
def int_nvvm_sust_b_2d_v4i8_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.v4i8.zero">,
- GCCBuiltin<"__nvvm_sust_b_2d_v4i8_zero">;
+ ClangBuiltin<"__nvvm_sust_b_2d_v4i8_zero">;
def int_nvvm_sust_b_2d_v4i16_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.v4i16.zero">,
- GCCBuiltin<"__nvvm_sust_b_2d_v4i16_zero">;
+ ClangBuiltin<"__nvvm_sust_b_2d_v4i16_zero">;
def int_nvvm_sust_b_2d_v4i32_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.2d.v4i32.zero">,
- GCCBuiltin<"__nvvm_sust_b_2d_v4i32_zero">;
+ ClangBuiltin<"__nvvm_sust_b_2d_v4i32_zero">;
def int_nvvm_sust_b_2d_array_i8_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.array.i8.zero">,
- GCCBuiltin<"__nvvm_sust_b_2d_array_i8_zero">;
+ ClangBuiltin<"__nvvm_sust_b_2d_array_i8_zero">;
def int_nvvm_sust_b_2d_array_i16_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.array.i16.zero">,
- GCCBuiltin<"__nvvm_sust_b_2d_array_i16_zero">;
+ ClangBuiltin<"__nvvm_sust_b_2d_array_i16_zero">;
def int_nvvm_sust_b_2d_array_i32_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.2d.array.i32.zero">,
- GCCBuiltin<"__nvvm_sust_b_2d_array_i32_zero">;
+ ClangBuiltin<"__nvvm_sust_b_2d_array_i32_zero">;
def int_nvvm_sust_b_2d_array_i64_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i64_ty], [],
"llvm.nvvm.sust.b.2d.array.i64.zero">,
- GCCBuiltin<"__nvvm_sust_b_2d_array_i64_zero">;
+ ClangBuiltin<"__nvvm_sust_b_2d_array_i64_zero">;
def int_nvvm_sust_b_2d_array_v2i8_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.array.v2i8.zero">,
- GCCBuiltin<"__nvvm_sust_b_2d_array_v2i8_zero">;
+ ClangBuiltin<"__nvvm_sust_b_2d_array_v2i8_zero">;
def int_nvvm_sust_b_2d_array_v2i16_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.array.v2i16.zero">,
- GCCBuiltin<"__nvvm_sust_b_2d_array_v2i16_zero">;
+ ClangBuiltin<"__nvvm_sust_b_2d_array_v2i16_zero">;
def int_nvvm_sust_b_2d_array_v2i32_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.2d.array.v2i32.zero">,
- GCCBuiltin<"__nvvm_sust_b_2d_array_v2i32_zero">;
+ ClangBuiltin<"__nvvm_sust_b_2d_array_v2i32_zero">;
def int_nvvm_sust_b_2d_array_v2i64_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i64_ty, llvm_i64_ty], [],
"llvm.nvvm.sust.b.2d.array.v2i64.zero">,
- GCCBuiltin<"__nvvm_sust_b_2d_array_v2i64_zero">;
+ ClangBuiltin<"__nvvm_sust_b_2d_array_v2i64_zero">;
def int_nvvm_sust_b_2d_array_v4i8_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.array.v4i8.zero">,
- GCCBuiltin<"__nvvm_sust_b_2d_array_v4i8_zero">;
+ ClangBuiltin<"__nvvm_sust_b_2d_array_v4i8_zero">;
def int_nvvm_sust_b_2d_array_v4i16_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.2d.array.v4i16.zero">,
- GCCBuiltin<"__nvvm_sust_b_2d_array_v4i16_zero">;
+ ClangBuiltin<"__nvvm_sust_b_2d_array_v4i16_zero">;
def int_nvvm_sust_b_2d_array_v4i32_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.2d.array.v4i32.zero">,
- GCCBuiltin<"__nvvm_sust_b_2d_array_v4i32_zero">;
+ ClangBuiltin<"__nvvm_sust_b_2d_array_v4i32_zero">;
def int_nvvm_sust_b_3d_i8_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.3d.i8.zero">,
- GCCBuiltin<"__nvvm_sust_b_3d_i8_zero">;
+ ClangBuiltin<"__nvvm_sust_b_3d_i8_zero">;
def int_nvvm_sust_b_3d_i16_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.3d.i16.zero">,
- GCCBuiltin<"__nvvm_sust_b_3d_i16_zero">;
+ ClangBuiltin<"__nvvm_sust_b_3d_i16_zero">;
def int_nvvm_sust_b_3d_i32_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.3d.i32.zero">,
- GCCBuiltin<"__nvvm_sust_b_3d_i32_zero">;
+ ClangBuiltin<"__nvvm_sust_b_3d_i32_zero">;
def int_nvvm_sust_b_3d_i64_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i64_ty], [],
"llvm.nvvm.sust.b.3d.i64.zero">,
- GCCBuiltin<"__nvvm_sust_b_3d_i64_zero">;
+ ClangBuiltin<"__nvvm_sust_b_3d_i64_zero">;
def int_nvvm_sust_b_3d_v2i8_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.3d.v2i8.zero">,
- GCCBuiltin<"__nvvm_sust_b_3d_v2i8_zero">;
+ ClangBuiltin<"__nvvm_sust_b_3d_v2i8_zero">;
def int_nvvm_sust_b_3d_v2i16_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.3d.v2i16.zero">,
- GCCBuiltin<"__nvvm_sust_b_3d_v2i16_zero">;
+ ClangBuiltin<"__nvvm_sust_b_3d_v2i16_zero">;
def int_nvvm_sust_b_3d_v2i32_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.3d.v2i32.zero">,
- GCCBuiltin<"__nvvm_sust_b_3d_v2i32_zero">;
+ ClangBuiltin<"__nvvm_sust_b_3d_v2i32_zero">;
def int_nvvm_sust_b_3d_v2i64_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i64_ty, llvm_i64_ty], [],
"llvm.nvvm.sust.b.3d.v2i64.zero">,
- GCCBuiltin<"__nvvm_sust_b_3d_v2i64_zero">;
+ ClangBuiltin<"__nvvm_sust_b_3d_v2i64_zero">;
def int_nvvm_sust_b_3d_v4i8_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.3d.v4i8.zero">,
- GCCBuiltin<"__nvvm_sust_b_3d_v4i8_zero">;
+ ClangBuiltin<"__nvvm_sust_b_3d_v4i8_zero">;
def int_nvvm_sust_b_3d_v4i16_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.b.3d.v4i16.zero">,
- GCCBuiltin<"__nvvm_sust_b_3d_v4i16_zero">;
+ ClangBuiltin<"__nvvm_sust_b_3d_v4i16_zero">;
def int_nvvm_sust_b_3d_v4i32_zero
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.b.3d.v4i32.zero">,
- GCCBuiltin<"__nvvm_sust_b_3d_v4i32_zero">;
+ ClangBuiltin<"__nvvm_sust_b_3d_v4i32_zero">;
@@ -4055,245 +4108,245 @@ def int_nvvm_sust_b_3d_v4i32_zero
def int_nvvm_sust_p_1d_i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.1d.i8.trap">,
- GCCBuiltin<"__nvvm_sust_p_1d_i8_trap">;
+ ClangBuiltin<"__nvvm_sust_p_1d_i8_trap">;
def int_nvvm_sust_p_1d_i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.1d.i16.trap">,
- GCCBuiltin<"__nvvm_sust_p_1d_i16_trap">;
+ ClangBuiltin<"__nvvm_sust_p_1d_i16_trap">;
def int_nvvm_sust_p_1d_i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.p.1d.i32.trap">,
- GCCBuiltin<"__nvvm_sust_p_1d_i32_trap">;
+ ClangBuiltin<"__nvvm_sust_p_1d_i32_trap">;
def int_nvvm_sust_p_1d_v2i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.1d.v2i8.trap">,
- GCCBuiltin<"__nvvm_sust_p_1d_v2i8_trap">;
+ ClangBuiltin<"__nvvm_sust_p_1d_v2i8_trap">;
def int_nvvm_sust_p_1d_v2i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.1d.v2i16.trap">,
- GCCBuiltin<"__nvvm_sust_p_1d_v2i16_trap">;
+ ClangBuiltin<"__nvvm_sust_p_1d_v2i16_trap">;
def int_nvvm_sust_p_1d_v2i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.p.1d.v2i32.trap">,
- GCCBuiltin<"__nvvm_sust_p_1d_v2i32_trap">;
+ ClangBuiltin<"__nvvm_sust_p_1d_v2i32_trap">;
def int_nvvm_sust_p_1d_v4i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.1d.v4i8.trap">,
- GCCBuiltin<"__nvvm_sust_p_1d_v4i8_trap">;
+ ClangBuiltin<"__nvvm_sust_p_1d_v4i8_trap">;
def int_nvvm_sust_p_1d_v4i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.1d.v4i16.trap">,
- GCCBuiltin<"__nvvm_sust_p_1d_v4i16_trap">;
+ ClangBuiltin<"__nvvm_sust_p_1d_v4i16_trap">;
def int_nvvm_sust_p_1d_v4i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.p.1d.v4i32.trap">,
- GCCBuiltin<"__nvvm_sust_p_1d_v4i32_trap">;
+ ClangBuiltin<"__nvvm_sust_p_1d_v4i32_trap">;
def int_nvvm_sust_p_1d_array_i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.1d.array.i8.trap">,
- GCCBuiltin<"__nvvm_sust_p_1d_array_i8_trap">;
+ ClangBuiltin<"__nvvm_sust_p_1d_array_i8_trap">;
def int_nvvm_sust_p_1d_array_i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.1d.array.i16.trap">,
- GCCBuiltin<"__nvvm_sust_p_1d_array_i16_trap">;
+ ClangBuiltin<"__nvvm_sust_p_1d_array_i16_trap">;
def int_nvvm_sust_p_1d_array_i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.p.1d.array.i32.trap">,
- GCCBuiltin<"__nvvm_sust_p_1d_array_i32_trap">;
+ ClangBuiltin<"__nvvm_sust_p_1d_array_i32_trap">;
def int_nvvm_sust_p_1d_array_v2i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.1d.array.v2i8.trap">,
- GCCBuiltin<"__nvvm_sust_p_1d_array_v2i8_trap">;
+ ClangBuiltin<"__nvvm_sust_p_1d_array_v2i8_trap">;
def int_nvvm_sust_p_1d_array_v2i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.1d.array.v2i16.trap">,
- GCCBuiltin<"__nvvm_sust_p_1d_array_v2i16_trap">;
+ ClangBuiltin<"__nvvm_sust_p_1d_array_v2i16_trap">;
def int_nvvm_sust_p_1d_array_v2i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.p.1d.array.v2i32.trap">,
- GCCBuiltin<"__nvvm_sust_p_1d_array_v2i32_trap">;
+ ClangBuiltin<"__nvvm_sust_p_1d_array_v2i32_trap">;
def int_nvvm_sust_p_1d_array_v4i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.1d.array.v4i8.trap">,
- GCCBuiltin<"__nvvm_sust_p_1d_array_v4i8_trap">;
+ ClangBuiltin<"__nvvm_sust_p_1d_array_v4i8_trap">;
def int_nvvm_sust_p_1d_array_v4i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.1d.array.v4i16.trap">,
- GCCBuiltin<"__nvvm_sust_p_1d_array_v4i16_trap">;
+ ClangBuiltin<"__nvvm_sust_p_1d_array_v4i16_trap">;
def int_nvvm_sust_p_1d_array_v4i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.p.1d.array.v4i32.trap">,
- GCCBuiltin<"__nvvm_sust_p_1d_array_v4i32_trap">;
+ ClangBuiltin<"__nvvm_sust_p_1d_array_v4i32_trap">;
def int_nvvm_sust_p_2d_i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.2d.i8.trap">,
- GCCBuiltin<"__nvvm_sust_p_2d_i8_trap">;
+ ClangBuiltin<"__nvvm_sust_p_2d_i8_trap">;
def int_nvvm_sust_p_2d_i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.2d.i16.trap">,
- GCCBuiltin<"__nvvm_sust_p_2d_i16_trap">;
+ ClangBuiltin<"__nvvm_sust_p_2d_i16_trap">;
def int_nvvm_sust_p_2d_i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.p.2d.i32.trap">,
- GCCBuiltin<"__nvvm_sust_p_2d_i32_trap">;
+ ClangBuiltin<"__nvvm_sust_p_2d_i32_trap">;
def int_nvvm_sust_p_2d_v2i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.2d.v2i8.trap">,
- GCCBuiltin<"__nvvm_sust_p_2d_v2i8_trap">;
+ ClangBuiltin<"__nvvm_sust_p_2d_v2i8_trap">;
def int_nvvm_sust_p_2d_v2i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.2d.v2i16.trap">,
- GCCBuiltin<"__nvvm_sust_p_2d_v2i16_trap">;
+ ClangBuiltin<"__nvvm_sust_p_2d_v2i16_trap">;
def int_nvvm_sust_p_2d_v2i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.p.2d.v2i32.trap">,
- GCCBuiltin<"__nvvm_sust_p_2d_v2i32_trap">;
+ ClangBuiltin<"__nvvm_sust_p_2d_v2i32_trap">;
def int_nvvm_sust_p_2d_v4i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.2d.v4i8.trap">,
- GCCBuiltin<"__nvvm_sust_p_2d_v4i8_trap">;
+ ClangBuiltin<"__nvvm_sust_p_2d_v4i8_trap">;
def int_nvvm_sust_p_2d_v4i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.2d.v4i16.trap">,
- GCCBuiltin<"__nvvm_sust_p_2d_v4i16_trap">;
+ ClangBuiltin<"__nvvm_sust_p_2d_v4i16_trap">;
def int_nvvm_sust_p_2d_v4i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.p.2d.v4i32.trap">,
- GCCBuiltin<"__nvvm_sust_p_2d_v4i32_trap">;
+ ClangBuiltin<"__nvvm_sust_p_2d_v4i32_trap">;
def int_nvvm_sust_p_2d_array_i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.2d.array.i8.trap">,
- GCCBuiltin<"__nvvm_sust_p_2d_array_i8_trap">;
+ ClangBuiltin<"__nvvm_sust_p_2d_array_i8_trap">;
def int_nvvm_sust_p_2d_array_i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.2d.array.i16.trap">,
- GCCBuiltin<"__nvvm_sust_p_2d_array_i16_trap">;
+ ClangBuiltin<"__nvvm_sust_p_2d_array_i16_trap">;
def int_nvvm_sust_p_2d_array_i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.p.2d.array.i32.trap">,
- GCCBuiltin<"__nvvm_sust_p_2d_array_i32_trap">;
+ ClangBuiltin<"__nvvm_sust_p_2d_array_i32_trap">;
def int_nvvm_sust_p_2d_array_v2i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.2d.array.v2i8.trap">,
- GCCBuiltin<"__nvvm_sust_p_2d_array_v2i8_trap">;
+ ClangBuiltin<"__nvvm_sust_p_2d_array_v2i8_trap">;
def int_nvvm_sust_p_2d_array_v2i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.2d.array.v2i16.trap">,
- GCCBuiltin<"__nvvm_sust_p_2d_array_v2i16_trap">;
+ ClangBuiltin<"__nvvm_sust_p_2d_array_v2i16_trap">;
def int_nvvm_sust_p_2d_array_v2i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.p.2d.array.v2i32.trap">,
- GCCBuiltin<"__nvvm_sust_p_2d_array_v2i32_trap">;
+ ClangBuiltin<"__nvvm_sust_p_2d_array_v2i32_trap">;
def int_nvvm_sust_p_2d_array_v4i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.2d.array.v4i8.trap">,
- GCCBuiltin<"__nvvm_sust_p_2d_array_v4i8_trap">;
+ ClangBuiltin<"__nvvm_sust_p_2d_array_v4i8_trap">;
def int_nvvm_sust_p_2d_array_v4i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.2d.array.v4i16.trap">,
- GCCBuiltin<"__nvvm_sust_p_2d_array_v4i16_trap">;
+ ClangBuiltin<"__nvvm_sust_p_2d_array_v4i16_trap">;
def int_nvvm_sust_p_2d_array_v4i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.p.2d.array.v4i32.trap">,
- GCCBuiltin<"__nvvm_sust_p_2d_array_v4i32_trap">;
+ ClangBuiltin<"__nvvm_sust_p_2d_array_v4i32_trap">;
def int_nvvm_sust_p_3d_i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.3d.i8.trap">,
- GCCBuiltin<"__nvvm_sust_p_3d_i8_trap">;
+ ClangBuiltin<"__nvvm_sust_p_3d_i8_trap">;
def int_nvvm_sust_p_3d_i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.3d.i16.trap">,
- GCCBuiltin<"__nvvm_sust_p_3d_i16_trap">;
+ ClangBuiltin<"__nvvm_sust_p_3d_i16_trap">;
def int_nvvm_sust_p_3d_i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.p.3d.i32.trap">,
- GCCBuiltin<"__nvvm_sust_p_3d_i32_trap">;
+ ClangBuiltin<"__nvvm_sust_p_3d_i32_trap">;
def int_nvvm_sust_p_3d_v2i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.3d.v2i8.trap">,
- GCCBuiltin<"__nvvm_sust_p_3d_v2i8_trap">;
+ ClangBuiltin<"__nvvm_sust_p_3d_v2i8_trap">;
def int_nvvm_sust_p_3d_v2i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.3d.v2i16.trap">,
- GCCBuiltin<"__nvvm_sust_p_3d_v2i16_trap">;
+ ClangBuiltin<"__nvvm_sust_p_3d_v2i16_trap">;
def int_nvvm_sust_p_3d_v2i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.p.3d.v2i32.trap">,
- GCCBuiltin<"__nvvm_sust_p_3d_v2i32_trap">;
+ ClangBuiltin<"__nvvm_sust_p_3d_v2i32_trap">;
def int_nvvm_sust_p_3d_v4i8_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.3d.v4i8.trap">,
- GCCBuiltin<"__nvvm_sust_p_3d_v4i8_trap">;
+ ClangBuiltin<"__nvvm_sust_p_3d_v4i8_trap">;
def int_nvvm_sust_p_3d_v4i16_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
"llvm.nvvm.sust.p.3d.v4i16.trap">,
- GCCBuiltin<"__nvvm_sust_p_3d_v4i16_trap">;
+ ClangBuiltin<"__nvvm_sust_p_3d_v4i16_trap">;
def int_nvvm_sust_p_3d_v4i32_trap
: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
"llvm.nvvm.sust.p.3d.v4i32.trap">,
- GCCBuiltin<"__nvvm_sust_p_3d_v4i32_trap">;
+ ClangBuiltin<"__nvvm_sust_p_3d_v4i32_trap">;
def int_nvvm_rotate_b32
: DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable], "llvm.nvvm.rotate.b32">,
- GCCBuiltin<"__nvvm_rotate_b32">;
+ ClangBuiltin<"__nvvm_rotate_b32">;
def int_nvvm_rotate_b64
: DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable], "llvm.nvvm.rotate.b64">,
- GCCBuiltin<"__nvvm_rotate_b64">;
+ ClangBuiltin<"__nvvm_rotate_b64">;
def int_nvvm_rotate_right_b64
: DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable], "llvm.nvvm.rotate.right.b64">,
- GCCBuiltin<"__nvvm_rotate_right_b64">;
+ ClangBuiltin<"__nvvm_rotate_right_b64">;
def int_nvvm_swap_lo_hi_b64
: DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty],
[IntrNoMem, IntrSpeculatable], "llvm.nvvm.swap.lo.hi.b64">,
- GCCBuiltin<"__nvvm_swap_lo_hi_b64">;
+ ClangBuiltin<"__nvvm_swap_lo_hi_b64">;
// Accessing special registers.
@@ -4304,31 +4357,31 @@ multiclass PTXReadSRegIntrinsic_v4i32<string regname> {
// FIXME: Enable this once v4i32 support is enabled in back-end.
// def _v4i16 : Intrinsic<[llvm_v4i32_ty], [], [IntrNoMem, IntrSpeculatable]>;
- def _x : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>,
- GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_x">;
- def _y : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>,
- GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_y">;
- def _z : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>,
- GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_z">;
- def _w : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>,
- GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_w">;
+ def _x : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>,
+ ClangBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_x">;
+ def _y : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>,
+ ClangBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_y">;
+ def _z : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>,
+ ClangBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_z">;
+ def _w : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>,
+ ClangBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_w">;
}
class PTXReadSRegIntrinsic_r32<string name>
: DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>,
- GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
+ ClangBuiltin<"__nvvm_read_ptx_sreg_" # name>;
class PTXReadSRegIntrinsic_r64<string name>
: DefaultAttrsIntrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable]>,
- GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
+ ClangBuiltin<"__nvvm_read_ptx_sreg_" # name>;
// Intrinsics to read registers with non-constant values. E.g. the values that
// do change over the kernel lifetime. Such reads should not be CSE'd.
class PTXReadNCSRegIntrinsic_r32<string name>
- : Intrinsic<[llvm_i32_ty], [], [IntrInaccessibleMemOnly]>,
- GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
+ : Intrinsic<[llvm_i32_ty], [], [IntrInaccessibleMemOnly, IntrNoCallback]>,
+ ClangBuiltin<"__nvvm_read_ptx_sreg_" # name>;
class PTXReadNCSRegIntrinsic_r64<string name>
- : Intrinsic<[llvm_i64_ty], [], [IntrInaccessibleMemOnly]>,
- GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
+ : Intrinsic<[llvm_i64_ty], [], [IntrInaccessibleMemOnly, IntrNoCallback]>,
+ ClangBuiltin<"__nvvm_read_ptx_sreg_" # name>;
defm int_nvvm_read_ptx_sreg_tid : PTXReadSRegIntrinsic_v4i32<"tid">;
defm int_nvvm_read_ptx_sreg_ntid : PTXReadSRegIntrinsic_v4i32<"ntid">;
@@ -4375,14 +4428,16 @@ foreach sync = [false, true] in {
foreach return_pred = [false, true] in {
foreach i = [SHFL_INFO<sync, mode, type, return_pred>] in {
if i.withGccBuiltin then {
- def i.Name : GCCBuiltin<i.Builtin>,
+ def i.Name : ClangBuiltin<i.Builtin>,
Intrinsic<i.RetTy, i.ArgsTy,
- [IntrInaccessibleMemOnly, IntrConvergent],
+ [IntrInaccessibleMemOnly, IntrConvergent,
+ IntrNoCallback],
i.IntrName>;
}
if i.withoutGccBuiltin then {
def i.Name : Intrinsic<i.RetTy, i.ArgsTy,
- [IntrInaccessibleMemOnly, IntrConvergent], i.IntrName>;
+ [IntrInaccessibleMemOnly, IntrConvergent,
+ IntrNoCallback], i.IntrName>;
}
}
}
@@ -4397,23 +4452,23 @@ foreach sync = [false, true] in {
// vote.all pred
def int_nvvm_vote_all :
Intrinsic<[llvm_i1_ty], [llvm_i1_ty],
- [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.vote.all">,
- GCCBuiltin<"__nvvm_vote_all">;
+ [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.all">,
+ ClangBuiltin<"__nvvm_vote_all">;
// vote.any pred
def int_nvvm_vote_any :
Intrinsic<[llvm_i1_ty], [llvm_i1_ty],
- [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.vote.any">,
- GCCBuiltin<"__nvvm_vote_any">;
+ [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.any">,
+ ClangBuiltin<"__nvvm_vote_any">;
// vote.uni pred
def int_nvvm_vote_uni :
Intrinsic<[llvm_i1_ty], [llvm_i1_ty],
- [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.vote.uni">,
- GCCBuiltin<"__nvvm_vote_uni">;
+ [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.uni">,
+ ClangBuiltin<"__nvvm_vote_uni">;
// vote.ballot pred
def int_nvvm_vote_ballot :
Intrinsic<[llvm_i32_ty], [llvm_i1_ty],
- [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.vote.ballot">,
- GCCBuiltin<"__nvvm_vote_ballot">;
+ [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.ballot">,
+ ClangBuiltin<"__nvvm_vote_ballot">;
//
// VOTE.SYNC
@@ -4422,23 +4477,23 @@ def int_nvvm_vote_ballot :
// vote.sync.all mask, pred
def int_nvvm_vote_all_sync :
Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_i1_ty],
- [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.vote.all.sync">,
- GCCBuiltin<"__nvvm_vote_all_sync">;
+ [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.all.sync">,
+ ClangBuiltin<"__nvvm_vote_all_sync">;
// vote.sync.any mask, pred
def int_nvvm_vote_any_sync :
Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_i1_ty],
- [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.vote.any.sync">,
- GCCBuiltin<"__nvvm_vote_any_sync">;
+ [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.any.sync">,
+ ClangBuiltin<"__nvvm_vote_any_sync">;
// vote.sync.uni mask, pred
def int_nvvm_vote_uni_sync :
Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_i1_ty],
- [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.vote.uni.sync">,
- GCCBuiltin<"__nvvm_vote_uni_sync">;
+ [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.uni.sync">,
+ ClangBuiltin<"__nvvm_vote_uni_sync">;
// vote.sync.ballot mask, pred
def int_nvvm_vote_ballot_sync :
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i1_ty],
- [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.vote.ballot.sync">,
- GCCBuiltin<"__nvvm_vote_ballot_sync">;
+ [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.ballot.sync">,
+ ClangBuiltin<"__nvvm_vote_ballot_sync">;
//
// MATCH.SYNC
@@ -4446,13 +4501,13 @@ def int_nvvm_vote_ballot_sync :
// match.any.sync.b32 mask, value
def int_nvvm_match_any_sync_i32 :
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
- [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.match.any.sync.i32">,
- GCCBuiltin<"__nvvm_match_any_sync_i32">;
+ [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.match.any.sync.i32">,
+ ClangBuiltin<"__nvvm_match_any_sync_i32">;
// match.any.sync.b64 mask, value
def int_nvvm_match_any_sync_i64 :
- Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i64_ty],
- [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.match.any.sync.i64">,
- GCCBuiltin<"__nvvm_match_any_sync_i64">;
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty],
+ [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.match.any.sync.i64">,
+ ClangBuiltin<"__nvvm_match_any_sync_i64">;
// match.all instruction have two variants -- one returns a single value, another
// returns a pair {value, predicate}. We currently only implement the latter as
@@ -4461,54 +4516,54 @@ def int_nvvm_match_any_sync_i64 :
// match.all.sync.b32p mask, value
def int_nvvm_match_all_sync_i32p :
Intrinsic<[llvm_i32_ty, llvm_i1_ty], [llvm_i32_ty, llvm_i32_ty],
- [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.match.all.sync.i32p">;
+ [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.match.all.sync.i32p">;
// match.all.sync.b64p mask, value
def int_nvvm_match_all_sync_i64p :
- Intrinsic<[llvm_i64_ty, llvm_i1_ty], [llvm_i32_ty, llvm_i64_ty],
- [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.match.all.sync.i64p">;
+ Intrinsic<[llvm_i32_ty, llvm_i1_ty], [llvm_i32_ty, llvm_i64_ty],
+ [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.match.all.sync.i64p">;
//
// REDUX.SYNC
//
// redux.sync.min.u32 dst, src, membermask;
-def int_nvvm_redux_sync_umin : GCCBuiltin<"__nvvm_redux_sync_umin">,
+def int_nvvm_redux_sync_umin : ClangBuiltin<"__nvvm_redux_sync_umin">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
- [IntrConvergent, IntrInaccessibleMemOnly]>;
+ [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
// redux.sync.max.u32 dst, src, membermask;
-def int_nvvm_redux_sync_umax : GCCBuiltin<"__nvvm_redux_sync_umax">,
+def int_nvvm_redux_sync_umax : ClangBuiltin<"__nvvm_redux_sync_umax">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
- [IntrConvergent, IntrInaccessibleMemOnly]>;
+ [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
// redux.sync.add.s32 dst, src, membermask;
-def int_nvvm_redux_sync_add : GCCBuiltin<"__nvvm_redux_sync_add">,
+def int_nvvm_redux_sync_add : ClangBuiltin<"__nvvm_redux_sync_add">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
- [IntrConvergent, IntrInaccessibleMemOnly]>;
+ [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
// redux.sync.min.s32 dst, src, membermask;
-def int_nvvm_redux_sync_min : GCCBuiltin<"__nvvm_redux_sync_min">,
+def int_nvvm_redux_sync_min : ClangBuiltin<"__nvvm_redux_sync_min">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
- [IntrConvergent, IntrInaccessibleMemOnly]>;
+ [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
// redux.sync.max.s32 dst, src, membermask;
-def int_nvvm_redux_sync_max : GCCBuiltin<"__nvvm_redux_sync_max">,
+def int_nvvm_redux_sync_max : ClangBuiltin<"__nvvm_redux_sync_max">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
- [IntrConvergent, IntrInaccessibleMemOnly]>;
+ [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
// redux.sync.and.b32 dst, src, membermask;
-def int_nvvm_redux_sync_and : GCCBuiltin<"__nvvm_redux_sync_and">,
+def int_nvvm_redux_sync_and : ClangBuiltin<"__nvvm_redux_sync_and">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
- [IntrConvergent, IntrInaccessibleMemOnly]>;
+ [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
// redux.sync.xor.b32 dst, src, membermask;
-def int_nvvm_redux_sync_xor : GCCBuiltin<"__nvvm_redux_sync_xor">,
+def int_nvvm_redux_sync_xor : ClangBuiltin<"__nvvm_redux_sync_xor">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
- [IntrConvergent, IntrInaccessibleMemOnly]>;
+ [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
// redux.sync.or.b32 dst, src, membermask;
-def int_nvvm_redux_sync_or : GCCBuiltin<"__nvvm_redux_sync_or">,
+def int_nvvm_redux_sync_or : ClangBuiltin<"__nvvm_redux_sync_or">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
- [IntrConvergent, IntrInaccessibleMemOnly]>;
+ [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
//
// WMMA instructions
@@ -4517,7 +4572,7 @@ def int_nvvm_redux_sync_or : GCCBuiltin<"__nvvm_redux_sync_or">,
class NVVM_WMMA_LD<WMMA_REGS Frag, string Layout, int WithStride>
: Intrinsic<Frag.regs,
!if(WithStride, [llvm_anyptr_ty, llvm_i32_ty], [llvm_anyptr_ty]),
- [IntrReadMem, IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>],
+ [IntrReadMem, IntrArgMemOnly, IntrNoCallback, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>],
WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.intr>;
// WMMA.STORE.D
@@ -4527,7 +4582,7 @@ class NVVM_WMMA_ST<WMMA_REGS Frag, string Layout, int WithStride>
[llvm_anyptr_ty],
Frag.regs,
!if(WithStride, [llvm_i32_ty], [])),
- [IntrWriteMem, IntrArgMemOnly, WriteOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>],
+ [IntrWriteMem, IntrArgMemOnly, IntrNoCallback, WriteOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>],
WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.intr>;
// Create all load/store variants
@@ -4550,7 +4605,7 @@ class NVVM_WMMA_MMA<string ALayout, string BLayout, int Satfinite, string rnd, s
WMMA_REGS C, WMMA_REGS D>
: Intrinsic<D.regs,
!listconcat(A.regs, B.regs, C.regs),
- [IntrNoMem],
+ [IntrNoMem, IntrNoCallback],
WMMA_NAME<ALayout, BLayout, Satfinite, rnd, b1op, A, B, C, D>.llvm>;
foreach layout_a = ["row", "col"] in {
@@ -4577,7 +4632,7 @@ class NVVM_MMA<string ALayout, string BLayout, int Satfinite, string b1op,
WMMA_REGS A, WMMA_REGS B, WMMA_REGS C, WMMA_REGS D>
: Intrinsic<D.regs,
!listconcat(A.regs, B.regs, C.regs),
- [IntrNoMem],
+ [IntrNoMem, IntrNoCallback],
MMA_NAME<ALayout, BLayout, Satfinite, b1op, A, B, C, D>.llvm>;
foreach layout_a = ["row", "col"] in {
@@ -4598,7 +4653,7 @@ foreach layout_a = ["row", "col"] in {
// LDMATRIX
class NVVM_LDMATRIX<WMMA_REGS Frag, int Transposed>
: Intrinsic<Frag.regs, [llvm_anyptr_ty],
- [IntrReadMem, IntrArgMemOnly, ReadOnly<ArgIndex<0>>,
+ [IntrReadMem, IntrArgMemOnly, IntrNoCallback, ReadOnly<ArgIndex<0>>,
NoCapture<ArgIndex<0>>],
LDMATRIX_NAME<Frag, Transposed>.intr>;
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index b01fa10763b8..577122328dd2 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -18,7 +18,7 @@
let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
// dcba/dcbf/dcbi/dcbst/dcbt/dcbz/dcbzl(PPC970) instructions.
def int_ppc_dcba : Intrinsic<[], [llvm_ptr_ty], []>;
- def int_ppc_dcbf : GCCBuiltin<"__builtin_dcbf">,
+ def int_ppc_dcbf : ClangBuiltin<"__builtin_dcbf">,
Intrinsic<[], [llvm_ptr_ty], [IntrArgMemOnly]>;
def int_ppc_dcbfps : Intrinsic<[], [llvm_ptr_ty], [IntrArgMemOnly]>;
def int_ppc_dcbstps : Intrinsic<[], [llvm_ptr_ty], [IntrArgMemOnly]>;
@@ -30,136 +30,170 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
def int_ppc_dcbzl : Intrinsic<[], [llvm_ptr_ty], []>;
// Get content from current FPSCR register
- def int_ppc_readflm : GCCBuiltin<"__builtin_readflm">,
+ def int_ppc_readflm : ClangBuiltin<"__builtin_readflm">,
Intrinsic<[llvm_double_ty], [],
[IntrNoMerge, IntrHasSideEffects]>;
// Set FPSCR register, and return previous content
- def int_ppc_setflm : GCCBuiltin<"__builtin_setflm">,
+ def int_ppc_setflm : ClangBuiltin<"__builtin_setflm">,
Intrinsic<[llvm_double_ty], [llvm_double_ty],
[IntrHasSideEffects]>;
// Intrinsics for [double]word extended forms of divide instructions
- def int_ppc_divwe : GCCBuiltin<"__builtin_divwe">,
+ def int_ppc_divwe : ClangBuiltin<"__builtin_divwe">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
- def int_ppc_divweu : GCCBuiltin<"__builtin_divweu">,
+ def int_ppc_divweu : ClangBuiltin<"__builtin_divweu">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
- def int_ppc_divde : GCCBuiltin<"__builtin_divde">,
+ def int_ppc_divde : ClangBuiltin<"__builtin_divde">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
[IntrNoMem]>;
- def int_ppc_divdeu : GCCBuiltin<"__builtin_divdeu">,
+ def int_ppc_divdeu : ClangBuiltin<"__builtin_divdeu">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
[IntrNoMem]>;
- def int_ppc_unpack_longdouble : GCCBuiltin<"__builtin_unpack_longdouble">,
+ def int_ppc_unpack_longdouble : ClangBuiltin<"__builtin_unpack_longdouble">,
Intrinsic<[llvm_double_ty],
[llvm_ppcf128_ty, llvm_i32_ty],
[IntrNoMem]>;
- def int_ppc_pack_longdouble : GCCBuiltin<"__builtin_pack_longdouble">,
+ def int_ppc_pack_longdouble : ClangBuiltin<"__builtin_pack_longdouble">,
Intrinsic<[llvm_ppcf128_ty],
[llvm_double_ty, llvm_double_ty],
[IntrNoMem]>;
// Generate a random number
- def int_ppc_darn : GCCBuiltin<"__builtin_darn">,
- Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>;
- def int_ppc_darnraw : GCCBuiltin<"__builtin_darn_raw">,
- Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>;
- def int_ppc_darn32 : GCCBuiltin<"__builtin_darn_32">,
- Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
+ def int_ppc_darn : ClangBuiltin<"__builtin_darn">,
+ Intrinsic<[llvm_i64_ty], [],
+ [IntrNoMerge, IntrHasSideEffects]>;
+ def int_ppc_darnraw : ClangBuiltin<"__builtin_darn_raw">,
+ Intrinsic<[llvm_i64_ty], [],
+ [IntrNoMerge, IntrHasSideEffects]>;
+ def int_ppc_darn32 : ClangBuiltin<"__builtin_darn_32">,
+ Intrinsic<[llvm_i32_ty], [],
+ [IntrNoMerge, IntrHasSideEffects]>;
// Bit permute doubleword
- def int_ppc_bpermd : GCCBuiltin<"__builtin_bpermd">,
+ def int_ppc_bpermd : ClangBuiltin<"__builtin_bpermd">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
[IntrNoMem]>;
// Parallel Bits Deposit/Extract Doubleword Builtins.
def int_ppc_pdepd
- : GCCBuiltin<"__builtin_pdepd">,
+ : ClangBuiltin<"__builtin_pdepd">,
Intrinsic <[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
def int_ppc_pextd
- : GCCBuiltin<"__builtin_pextd">,
+ : ClangBuiltin<"__builtin_pextd">,
Intrinsic <[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
// Centrifuge Doubleword Builtin.
def int_ppc_cfuged
- : GCCBuiltin<"__builtin_cfuged">,
+ : ClangBuiltin<"__builtin_cfuged">,
Intrinsic <[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
// Count Leading / Trailing Zeroes under bit Mask Builtins.
def int_ppc_cntlzdm
- : GCCBuiltin<"__builtin_cntlzdm">,
+ : ClangBuiltin<"__builtin_cntlzdm">,
Intrinsic <[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
def int_ppc_cnttzdm
- : GCCBuiltin<"__builtin_cnttzdm">,
+ : ClangBuiltin<"__builtin_cnttzdm">,
Intrinsic <[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
def int_ppc_truncf128_round_to_odd
- : GCCBuiltin<"__builtin_truncf128_round_to_odd">,
+ : ClangBuiltin<"__builtin_truncf128_round_to_odd">,
Intrinsic <[llvm_double_ty], [llvm_f128_ty], [IntrNoMem]>;
def int_ppc_sqrtf128_round_to_odd
- : GCCBuiltin<"__builtin_sqrtf128_round_to_odd">,
+ : ClangBuiltin<"__builtin_sqrtf128_round_to_odd">,
Intrinsic <[llvm_f128_ty], [llvm_f128_ty], [IntrNoMem]>;
def int_ppc_addf128_round_to_odd
- : GCCBuiltin<"__builtin_addf128_round_to_odd">,
+ : ClangBuiltin<"__builtin_addf128_round_to_odd">,
Intrinsic <[llvm_f128_ty], [llvm_f128_ty,llvm_f128_ty], [IntrNoMem]>;
def int_ppc_subf128_round_to_odd
- : GCCBuiltin<"__builtin_subf128_round_to_odd">,
+ : ClangBuiltin<"__builtin_subf128_round_to_odd">,
Intrinsic <[llvm_f128_ty], [llvm_f128_ty,llvm_f128_ty], [IntrNoMem]>;
def int_ppc_mulf128_round_to_odd
- : GCCBuiltin<"__builtin_mulf128_round_to_odd">,
+ : ClangBuiltin<"__builtin_mulf128_round_to_odd">,
Intrinsic <[llvm_f128_ty], [llvm_f128_ty,llvm_f128_ty], [IntrNoMem]>;
def int_ppc_divf128_round_to_odd
- : GCCBuiltin<"__builtin_divf128_round_to_odd">,
+ : ClangBuiltin<"__builtin_divf128_round_to_odd">,
Intrinsic <[llvm_f128_ty], [llvm_f128_ty,llvm_f128_ty], [IntrNoMem]>;
def int_ppc_fmaf128_round_to_odd
- : GCCBuiltin<"__builtin_fmaf128_round_to_odd">,
+ : ClangBuiltin<"__builtin_fmaf128_round_to_odd">,
Intrinsic <[llvm_f128_ty], [llvm_f128_ty,llvm_f128_ty,llvm_f128_ty], [IntrNoMem]>;
def int_ppc_scalar_extract_expq
- : GCCBuiltin<"__builtin_vsx_scalar_extract_expq">,
+ : ClangBuiltin<"__builtin_vsx_scalar_extract_expq">,
Intrinsic <[llvm_i64_ty], [llvm_f128_ty], [IntrNoMem]>;
def int_ppc_scalar_insert_exp_qp
- : GCCBuiltin<"__builtin_vsx_scalar_insert_exp_qp">,
+ : ClangBuiltin<"__builtin_vsx_scalar_insert_exp_qp">,
Intrinsic <[llvm_f128_ty], [llvm_f128_ty, llvm_i64_ty], [IntrNoMem]>;
// Intrinsics defined to maintain XL compatibility
def int_ppc_tdw
- : GCCBuiltin<"__builtin_ppc_tdw">,
+ : ClangBuiltin<"__builtin_ppc_tdw">,
Intrinsic <[], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], [ImmArg<ArgIndex<2>>]>;
def int_ppc_tw
- : GCCBuiltin<"__builtin_ppc_tw">,
+ : ClangBuiltin<"__builtin_ppc_tw">,
Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<2>>]>;
def int_ppc_trapd
- : GCCBuiltin<"__builtin_ppc_trapd">,
+ : ClangBuiltin<"__builtin_ppc_trapd">,
Intrinsic <[], [llvm_i64_ty], []>;
def int_ppc_trap
- : GCCBuiltin<"__builtin_ppc_trap">,
+ : ClangBuiltin<"__builtin_ppc_trap">,
Intrinsic <[], [llvm_i32_ty], []>;
def int_ppc_fcfid
- : GCCBuiltin<"__builtin_ppc_fcfid">,
+ : ClangBuiltin<"__builtin_ppc_fcfid">,
Intrinsic <[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
def int_ppc_fcfud
- : GCCBuiltin<"__builtin_ppc_fcfud">,
+ : ClangBuiltin<"__builtin_ppc_fcfud">,
Intrinsic <[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
def int_ppc_fctid
- : GCCBuiltin<"__builtin_ppc_fctid">,
+ : ClangBuiltin<"__builtin_ppc_fctid">,
Intrinsic <[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
def int_ppc_fctidz
- : GCCBuiltin<"__builtin_ppc_fctidz">,
+ : ClangBuiltin<"__builtin_ppc_fctidz">,
Intrinsic <[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
def int_ppc_fctiw
- : GCCBuiltin<"__builtin_ppc_fctiw">,
+ : ClangBuiltin<"__builtin_ppc_fctiw">,
Intrinsic <[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
def int_ppc_fctiwz
- : GCCBuiltin<"__builtin_ppc_fctiwz">,
+ : ClangBuiltin<"__builtin_ppc_fctiwz">,
Intrinsic <[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
def int_ppc_fctudz
- : GCCBuiltin<"__builtin_ppc_fctudz">,
+ : ClangBuiltin<"__builtin_ppc_fctudz">,
Intrinsic <[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
def int_ppc_fctuwz
- : GCCBuiltin<"__builtin_ppc_fctuwz">,
+ : ClangBuiltin<"__builtin_ppc_fctuwz">,
Intrinsic <[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+
+ // XL compatible select functions
+ // TODO: Add llvm_f128_ty support.
+ def int_ppc_maxfe
+ : Intrinsic<
+ [llvm_ppcf128_ty],
+ [llvm_ppcf128_ty, llvm_ppcf128_ty, llvm_ppcf128_ty, llvm_vararg_ty],
+ [IntrNoMem]>;
+ def int_ppc_maxfl
+ : Intrinsic<
+ [llvm_double_ty],
+ [llvm_double_ty, llvm_double_ty, llvm_double_ty, llvm_vararg_ty],
+ [IntrNoMem]>;
+ def int_ppc_maxfs
+ : Intrinsic<[llvm_float_ty],
+ [llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_vararg_ty],
+ [IntrNoMem]>;
+ def int_ppc_minfe
+ : Intrinsic<
+ [llvm_ppcf128_ty],
+ [llvm_ppcf128_ty, llvm_ppcf128_ty, llvm_ppcf128_ty, llvm_vararg_ty],
+ [IntrNoMem]>;
+ def int_ppc_minfl
+ : Intrinsic<
+ [llvm_double_ty],
+ [llvm_double_ty, llvm_double_ty, llvm_double_ty, llvm_vararg_ty],
+ [IntrNoMem]>;
+ def int_ppc_minfs
+ : Intrinsic<[llvm_float_ty],
+ [llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_vararg_ty],
+ [IntrNoMem]>;
}
let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
@@ -167,14 +201,14 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
class PowerPC_Vec_Intrinsic<string GCCIntSuffix, list<LLVMType> ret_types,
list<LLVMType> param_types,
list<IntrinsicProperty> properties>
- : GCCBuiltin<!strconcat("__builtin_altivec_", GCCIntSuffix)>,
+ : ClangBuiltin<!strconcat("__builtin_altivec_", GCCIntSuffix)>,
Intrinsic<ret_types, param_types, properties>;
/// PowerPC_VSX_Intrinsic - Base class for all VSX intrinsics.
class PowerPC_VSX_Intrinsic<string GCCIntSuffix, list<LLVMType> ret_types,
list<LLVMType> param_types,
list<IntrinsicProperty> properties>
- : GCCBuiltin<!strconcat("__builtin_vsx_", GCCIntSuffix)>,
+ : ClangBuiltin<!strconcat("__builtin_vsx_", GCCIntSuffix)>,
Intrinsic<ret_types, param_types, properties>;
}
@@ -289,31 +323,31 @@ class PowerPC_VSX_Sca_DDD_Intrinsic<string GCCIntSuffix>
let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
// Data Stream Control.
- def int_ppc_altivec_dss : GCCBuiltin<"__builtin_altivec_dss">,
+ def int_ppc_altivec_dss : ClangBuiltin<"__builtin_altivec_dss">,
Intrinsic<[], [llvm_i32_ty], []>;
- def int_ppc_altivec_dssall : GCCBuiltin<"__builtin_altivec_dssall">,
+ def int_ppc_altivec_dssall : ClangBuiltin<"__builtin_altivec_dssall">,
Intrinsic<[], [], []>;
- def int_ppc_altivec_dst : GCCBuiltin<"__builtin_altivec_dst">,
+ def int_ppc_altivec_dst : ClangBuiltin<"__builtin_altivec_dst">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
[]>;
- def int_ppc_altivec_dstt : GCCBuiltin<"__builtin_altivec_dstt">,
+ def int_ppc_altivec_dstt : ClangBuiltin<"__builtin_altivec_dstt">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
[]>;
- def int_ppc_altivec_dstst : GCCBuiltin<"__builtin_altivec_dstst">,
+ def int_ppc_altivec_dstst : ClangBuiltin<"__builtin_altivec_dstst">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
[]>;
- def int_ppc_altivec_dststt : GCCBuiltin<"__builtin_altivec_dststt">,
+ def int_ppc_altivec_dststt : ClangBuiltin<"__builtin_altivec_dststt">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
[]>;
// VSCR access.
- def int_ppc_altivec_mfvscr : GCCBuiltin<"__builtin_altivec_mfvscr">,
+ def int_ppc_altivec_mfvscr : ClangBuiltin<"__builtin_altivec_mfvscr">,
Intrinsic<[llvm_v8i16_ty], [], [IntrNoMem, IntrHasSideEffects]>;
- def int_ppc_altivec_mtvscr : GCCBuiltin<"__builtin_altivec_mtvscr">,
+ def int_ppc_altivec_mtvscr : ClangBuiltin<"__builtin_altivec_mtvscr">,
Intrinsic<[], [llvm_v4i32_ty], [IntrNoMem, IntrHasSideEffects]>;
@@ -349,354 +383,354 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
[IntrWriteMem, IntrArgMemOnly]>;
// Comparisons setting a vector.
- def int_ppc_altivec_vcmpbfp : GCCBuiltin<"__builtin_altivec_vcmpbfp">,
+ def int_ppc_altivec_vcmpbfp : ClangBuiltin<"__builtin_altivec_vcmpbfp">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpeqfp : GCCBuiltin<"__builtin_altivec_vcmpeqfp">,
+ def int_ppc_altivec_vcmpeqfp : ClangBuiltin<"__builtin_altivec_vcmpeqfp">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpgefp : GCCBuiltin<"__builtin_altivec_vcmpgefp">,
+ def int_ppc_altivec_vcmpgefp : ClangBuiltin<"__builtin_altivec_vcmpgefp">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpgtfp : GCCBuiltin<"__builtin_altivec_vcmpgtfp">,
+ def int_ppc_altivec_vcmpgtfp : ClangBuiltin<"__builtin_altivec_vcmpgtfp">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpequd : GCCBuiltin<"__builtin_altivec_vcmpequd">,
+ def int_ppc_altivec_vcmpequd : ClangBuiltin<"__builtin_altivec_vcmpequd">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpgtsd : GCCBuiltin<"__builtin_altivec_vcmpgtsd">,
+ def int_ppc_altivec_vcmpgtsd : ClangBuiltin<"__builtin_altivec_vcmpgtsd">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpgtud : GCCBuiltin<"__builtin_altivec_vcmpgtud">,
+ def int_ppc_altivec_vcmpgtud : ClangBuiltin<"__builtin_altivec_vcmpgtud">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpequw : GCCBuiltin<"__builtin_altivec_vcmpequw">,
+ def int_ppc_altivec_vcmpequw : ClangBuiltin<"__builtin_altivec_vcmpequw">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpgtsw : GCCBuiltin<"__builtin_altivec_vcmpgtsw">,
+ def int_ppc_altivec_vcmpgtsw : ClangBuiltin<"__builtin_altivec_vcmpgtsw">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpgtuw : GCCBuiltin<"__builtin_altivec_vcmpgtuw">,
+ def int_ppc_altivec_vcmpgtuw : ClangBuiltin<"__builtin_altivec_vcmpgtuw">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpnew : GCCBuiltin<"__builtin_altivec_vcmpnew">,
+ def int_ppc_altivec_vcmpnew : ClangBuiltin<"__builtin_altivec_vcmpnew">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpnezw : GCCBuiltin<"__builtin_altivec_vcmpnezw">,
+ def int_ppc_altivec_vcmpnezw : ClangBuiltin<"__builtin_altivec_vcmpnezw">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpequh : GCCBuiltin<"__builtin_altivec_vcmpequh">,
+ def int_ppc_altivec_vcmpequh : ClangBuiltin<"__builtin_altivec_vcmpequh">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpgtsh : GCCBuiltin<"__builtin_altivec_vcmpgtsh">,
+ def int_ppc_altivec_vcmpgtsh : ClangBuiltin<"__builtin_altivec_vcmpgtsh">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpgtuh : GCCBuiltin<"__builtin_altivec_vcmpgtuh">,
+ def int_ppc_altivec_vcmpgtuh : ClangBuiltin<"__builtin_altivec_vcmpgtuh">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpneh : GCCBuiltin<"__builtin_altivec_vcmpneh">,
+ def int_ppc_altivec_vcmpneh : ClangBuiltin<"__builtin_altivec_vcmpneh">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpnezh : GCCBuiltin<"__builtin_altivec_vcmpnezh">,
+ def int_ppc_altivec_vcmpnezh : ClangBuiltin<"__builtin_altivec_vcmpnezh">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpequb : GCCBuiltin<"__builtin_altivec_vcmpequb">,
+ def int_ppc_altivec_vcmpequb : ClangBuiltin<"__builtin_altivec_vcmpequb">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpgtsb : GCCBuiltin<"__builtin_altivec_vcmpgtsb">,
+ def int_ppc_altivec_vcmpgtsb : ClangBuiltin<"__builtin_altivec_vcmpgtsb">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpgtub : GCCBuiltin<"__builtin_altivec_vcmpgtub">,
+ def int_ppc_altivec_vcmpgtub : ClangBuiltin<"__builtin_altivec_vcmpgtub">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpneb : GCCBuiltin<"__builtin_altivec_vcmpneb">,
+ def int_ppc_altivec_vcmpneb : ClangBuiltin<"__builtin_altivec_vcmpneb">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpnezb : GCCBuiltin<"__builtin_altivec_vcmpnezb">,
+ def int_ppc_altivec_vcmpnezb : ClangBuiltin<"__builtin_altivec_vcmpnezb">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpequq : GCCBuiltin<"__builtin_altivec_vcmpequq">,
+ def int_ppc_altivec_vcmpequq : ClangBuiltin<"__builtin_altivec_vcmpequq">,
Intrinsic<[llvm_v1i128_ty], [llvm_v1i128_ty, llvm_v1i128_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpgtsq : GCCBuiltin<"__builtin_altivec_vcmpgtsq">,
+ def int_ppc_altivec_vcmpgtsq : ClangBuiltin<"__builtin_altivec_vcmpgtsq">,
Intrinsic<[llvm_v1i128_ty], [llvm_v1i128_ty, llvm_v1i128_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpgtuq : GCCBuiltin<"__builtin_altivec_vcmpgtuq">,
+ def int_ppc_altivec_vcmpgtuq : ClangBuiltin<"__builtin_altivec_vcmpgtuq">,
Intrinsic<[llvm_v1i128_ty], [llvm_v1i128_ty, llvm_v1i128_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpequq_p : GCCBuiltin<"__builtin_altivec_vcmpequq_p">,
+ def int_ppc_altivec_vcmpequq_p : ClangBuiltin<"__builtin_altivec_vcmpequq_p">,
Intrinsic<[llvm_i32_ty],
[llvm_i32_ty,llvm_v1i128_ty,llvm_v1i128_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpgtsq_p : GCCBuiltin<"__builtin_altivec_vcmpgtsq_p">,
+ def int_ppc_altivec_vcmpgtsq_p : ClangBuiltin<"__builtin_altivec_vcmpgtsq_p">,
Intrinsic<[llvm_i32_ty],
[llvm_i32_ty,llvm_v1i128_ty,llvm_v1i128_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpgtuq_p : GCCBuiltin<"__builtin_altivec_vcmpgtuq_p">,
+ def int_ppc_altivec_vcmpgtuq_p : ClangBuiltin<"__builtin_altivec_vcmpgtuq_p">,
Intrinsic<[llvm_i32_ty],
[llvm_i32_ty,llvm_v1i128_ty,llvm_v1i128_ty],
[IntrNoMem]>;
// Predicate Comparisons. The first operand specifies interpretation of CR6.
- def int_ppc_altivec_vcmpbfp_p : GCCBuiltin<"__builtin_altivec_vcmpbfp_p">,
+ def int_ppc_altivec_vcmpbfp_p : ClangBuiltin<"__builtin_altivec_vcmpbfp_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpeqfp_p : GCCBuiltin<"__builtin_altivec_vcmpeqfp_p">,
+ def int_ppc_altivec_vcmpeqfp_p : ClangBuiltin<"__builtin_altivec_vcmpeqfp_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpgefp_p : GCCBuiltin<"__builtin_altivec_vcmpgefp_p">,
+ def int_ppc_altivec_vcmpgefp_p : ClangBuiltin<"__builtin_altivec_vcmpgefp_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpgtfp_p : GCCBuiltin<"__builtin_altivec_vcmpgtfp_p">,
+ def int_ppc_altivec_vcmpgtfp_p : ClangBuiltin<"__builtin_altivec_vcmpgtfp_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpequd_p : GCCBuiltin<"__builtin_altivec_vcmpequd_p">,
+ def int_ppc_altivec_vcmpequd_p : ClangBuiltin<"__builtin_altivec_vcmpequd_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v2i64_ty,llvm_v2i64_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpgtsd_p : GCCBuiltin<"__builtin_altivec_vcmpgtsd_p">,
+ def int_ppc_altivec_vcmpgtsd_p : ClangBuiltin<"__builtin_altivec_vcmpgtsd_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v2i64_ty,llvm_v2i64_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpgtud_p : GCCBuiltin<"__builtin_altivec_vcmpgtud_p">,
+ def int_ppc_altivec_vcmpgtud_p : ClangBuiltin<"__builtin_altivec_vcmpgtud_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v2i64_ty,llvm_v2i64_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpequw_p : GCCBuiltin<"__builtin_altivec_vcmpequw_p">,
+ def int_ppc_altivec_vcmpequw_p : ClangBuiltin<"__builtin_altivec_vcmpequw_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4i32_ty,llvm_v4i32_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpgtsw_p : GCCBuiltin<"__builtin_altivec_vcmpgtsw_p">,
+ def int_ppc_altivec_vcmpgtsw_p : ClangBuiltin<"__builtin_altivec_vcmpgtsw_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4i32_ty,llvm_v4i32_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpgtuw_p : GCCBuiltin<"__builtin_altivec_vcmpgtuw_p">,
+ def int_ppc_altivec_vcmpgtuw_p : ClangBuiltin<"__builtin_altivec_vcmpgtuw_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4i32_ty,llvm_v4i32_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpnew_p : GCCBuiltin<"__builtin_altivec_vcmpnew_p">,
+ def int_ppc_altivec_vcmpnew_p : ClangBuiltin<"__builtin_altivec_vcmpnew_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4i32_ty,llvm_v4i32_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpnezw_p : GCCBuiltin<"__builtin_altivec_vcmpnezw_p">,
+ def int_ppc_altivec_vcmpnezw_p : ClangBuiltin<"__builtin_altivec_vcmpnezw_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4i32_ty,llvm_v4i32_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpequh_p : GCCBuiltin<"__builtin_altivec_vcmpequh_p">,
+ def int_ppc_altivec_vcmpequh_p : ClangBuiltin<"__builtin_altivec_vcmpequh_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v8i16_ty,llvm_v8i16_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpgtsh_p : GCCBuiltin<"__builtin_altivec_vcmpgtsh_p">,
+ def int_ppc_altivec_vcmpgtsh_p : ClangBuiltin<"__builtin_altivec_vcmpgtsh_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v8i16_ty,llvm_v8i16_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpgtuh_p : GCCBuiltin<"__builtin_altivec_vcmpgtuh_p">,
+ def int_ppc_altivec_vcmpgtuh_p : ClangBuiltin<"__builtin_altivec_vcmpgtuh_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v8i16_ty,llvm_v8i16_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpneh_p : GCCBuiltin<"__builtin_altivec_vcmpneh_p">,
+ def int_ppc_altivec_vcmpneh_p : ClangBuiltin<"__builtin_altivec_vcmpneh_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v8i16_ty,llvm_v8i16_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpnezh_p : GCCBuiltin<"__builtin_altivec_vcmpnezh_p">,
+ def int_ppc_altivec_vcmpnezh_p : ClangBuiltin<"__builtin_altivec_vcmpnezh_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v8i16_ty,llvm_v8i16_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpequb_p : GCCBuiltin<"__builtin_altivec_vcmpequb_p">,
+ def int_ppc_altivec_vcmpequb_p : ClangBuiltin<"__builtin_altivec_vcmpequb_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v16i8_ty,llvm_v16i8_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpgtsb_p : GCCBuiltin<"__builtin_altivec_vcmpgtsb_p">,
+ def int_ppc_altivec_vcmpgtsb_p : ClangBuiltin<"__builtin_altivec_vcmpgtsb_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v16i8_ty,llvm_v16i8_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpgtub_p : GCCBuiltin<"__builtin_altivec_vcmpgtub_p">,
+ def int_ppc_altivec_vcmpgtub_p : ClangBuiltin<"__builtin_altivec_vcmpgtub_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v16i8_ty,llvm_v16i8_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpneb_p : GCCBuiltin<"__builtin_altivec_vcmpneb_p">,
+ def int_ppc_altivec_vcmpneb_p : ClangBuiltin<"__builtin_altivec_vcmpneb_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v16i8_ty,llvm_v16i8_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vcmpnezb_p : GCCBuiltin<"__builtin_altivec_vcmpnezb_p">,
+ def int_ppc_altivec_vcmpnezb_p : ClangBuiltin<"__builtin_altivec_vcmpnezb_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v16i8_ty,llvm_v16i8_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vclzlsbb : GCCBuiltin<"__builtin_altivec_vclzlsbb">,
+ def int_ppc_altivec_vclzlsbb : ClangBuiltin<"__builtin_altivec_vclzlsbb">,
Intrinsic<[llvm_i32_ty],[llvm_v16i8_ty],[IntrNoMem]>;
- def int_ppc_altivec_vctzlsbb : GCCBuiltin<"__builtin_altivec_vctzlsbb">,
+ def int_ppc_altivec_vctzlsbb : ClangBuiltin<"__builtin_altivec_vctzlsbb">,
Intrinsic<[llvm_i32_ty],[llvm_v16i8_ty],[IntrNoMem]>;
- def int_ppc_altivec_vprtybw : GCCBuiltin<"__builtin_altivec_vprtybw">,
+ def int_ppc_altivec_vprtybw : ClangBuiltin<"__builtin_altivec_vprtybw">,
Intrinsic<[llvm_v4i32_ty],[llvm_v4i32_ty],[IntrNoMem]>;
- def int_ppc_altivec_vprtybd : GCCBuiltin<"__builtin_altivec_vprtybd">,
+ def int_ppc_altivec_vprtybd : ClangBuiltin<"__builtin_altivec_vprtybd">,
Intrinsic<[llvm_v2i64_ty],[llvm_v2i64_ty],[IntrNoMem]>;
- def int_ppc_altivec_vprtybq : GCCBuiltin<"__builtin_altivec_vprtybq">,
+ def int_ppc_altivec_vprtybq : ClangBuiltin<"__builtin_altivec_vprtybq">,
Intrinsic<[llvm_v1i128_ty],[llvm_v1i128_ty],[IntrNoMem]>;
// BCD intrinsics.
- def int_ppc_bcdadd : GCCBuiltin<"__builtin_ppc_bcdadd">, Intrinsic<
+ def int_ppc_bcdadd : ClangBuiltin<"__builtin_ppc_bcdadd">, Intrinsic<
[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
- def int_ppc_bcdadd_p : GCCBuiltin<"__builtin_ppc_bcdadd_p">, Intrinsic<
+ def int_ppc_bcdadd_p : ClangBuiltin<"__builtin_ppc_bcdadd_p">, Intrinsic<
[llvm_i32_ty], [llvm_i32_ty, llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem, ImmArg<ArgIndex<0>>]>;
- def int_ppc_bcdsub : GCCBuiltin<"__builtin_ppc_bcdsub">, Intrinsic<
+ def int_ppc_bcdsub : ClangBuiltin<"__builtin_ppc_bcdsub">, Intrinsic<
[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
- def int_ppc_bcdsub_p : GCCBuiltin<"__builtin_ppc_bcdsub_p">, Intrinsic<
+ def int_ppc_bcdsub_p : ClangBuiltin<"__builtin_ppc_bcdsub_p">, Intrinsic<
[llvm_i32_ty], [llvm_i32_ty, llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem, ImmArg<ArgIndex<0>>]>;
// P10 Vector Extract with Mask
- def int_ppc_altivec_vextractbm : GCCBuiltin<"__builtin_altivec_vextractbm">,
+ def int_ppc_altivec_vextractbm : ClangBuiltin<"__builtin_altivec_vextractbm">,
Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;
- def int_ppc_altivec_vextracthm : GCCBuiltin<"__builtin_altivec_vextracthm">,
+ def int_ppc_altivec_vextracthm : ClangBuiltin<"__builtin_altivec_vextracthm">,
Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
- def int_ppc_altivec_vextractwm : GCCBuiltin<"__builtin_altivec_vextractwm">,
+ def int_ppc_altivec_vextractwm : ClangBuiltin<"__builtin_altivec_vextractwm">,
Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
- def int_ppc_altivec_vextractdm : GCCBuiltin<"__builtin_altivec_vextractdm">,
+ def int_ppc_altivec_vextractdm : ClangBuiltin<"__builtin_altivec_vextractdm">,
Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty], [IntrNoMem]>;
- def int_ppc_altivec_vextractqm : GCCBuiltin<"__builtin_altivec_vextractqm">,
+ def int_ppc_altivec_vextractqm : ClangBuiltin<"__builtin_altivec_vextractqm">,
Intrinsic<[llvm_i32_ty], [llvm_v1i128_ty], [IntrNoMem]>;
// P10 Vector Expand with Mask
- def int_ppc_altivec_vexpandbm : GCCBuiltin<"__builtin_altivec_vexpandbm">,
+ def int_ppc_altivec_vexpandbm : ClangBuiltin<"__builtin_altivec_vexpandbm">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
- def int_ppc_altivec_vexpandhm : GCCBuiltin<"__builtin_altivec_vexpandhm">,
+ def int_ppc_altivec_vexpandhm : ClangBuiltin<"__builtin_altivec_vexpandhm">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>;
- def int_ppc_altivec_vexpandwm : GCCBuiltin<"__builtin_altivec_vexpandwm">,
+ def int_ppc_altivec_vexpandwm : ClangBuiltin<"__builtin_altivec_vexpandwm">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
- def int_ppc_altivec_vexpanddm : GCCBuiltin<"__builtin_altivec_vexpanddm">,
+ def int_ppc_altivec_vexpanddm : ClangBuiltin<"__builtin_altivec_vexpanddm">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
- def int_ppc_altivec_vexpandqm : GCCBuiltin<"__builtin_altivec_vexpandqm">,
+ def int_ppc_altivec_vexpandqm : ClangBuiltin<"__builtin_altivec_vexpandqm">,
Intrinsic<[llvm_v1i128_ty], [llvm_v1i128_ty], [IntrNoMem]>;
// P10 Vector Count with Mask intrinsics.
- def int_ppc_altivec_vcntmbb : GCCBuiltin<"__builtin_altivec_vcntmbb">,
+ def int_ppc_altivec_vcntmbb : ClangBuiltin<"__builtin_altivec_vcntmbb">,
Intrinsic<[llvm_i64_ty], [llvm_v16i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_ppc_altivec_vcntmbh : GCCBuiltin<"__builtin_altivec_vcntmbh">,
+ def int_ppc_altivec_vcntmbh : ClangBuiltin<"__builtin_altivec_vcntmbh">,
Intrinsic<[llvm_i64_ty], [llvm_v8i16_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_ppc_altivec_vcntmbw : GCCBuiltin<"__builtin_altivec_vcntmbw">,
+ def int_ppc_altivec_vcntmbw : ClangBuiltin<"__builtin_altivec_vcntmbw">,
Intrinsic<[llvm_i64_ty], [llvm_v4i32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_ppc_altivec_vcntmbd : GCCBuiltin<"__builtin_altivec_vcntmbd">,
+ def int_ppc_altivec_vcntmbd : ClangBuiltin<"__builtin_altivec_vcntmbd">,
Intrinsic<[llvm_i64_ty], [llvm_v2i64_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
// P10 Move to VSR with Mask Intrinsics.
- def int_ppc_altivec_mtvsrbm : GCCBuiltin<"__builtin_altivec_mtvsrbm">,
+ def int_ppc_altivec_mtvsrbm : ClangBuiltin<"__builtin_altivec_mtvsrbm">,
Intrinsic<[llvm_v16i8_ty], [llvm_i64_ty], [IntrNoMem]>;
- def int_ppc_altivec_mtvsrhm : GCCBuiltin<"__builtin_altivec_mtvsrhm">,
+ def int_ppc_altivec_mtvsrhm : ClangBuiltin<"__builtin_altivec_mtvsrhm">,
Intrinsic<[llvm_v8i16_ty], [llvm_i64_ty], [IntrNoMem]>;
- def int_ppc_altivec_mtvsrwm : GCCBuiltin<"__builtin_altivec_mtvsrwm">,
+ def int_ppc_altivec_mtvsrwm : ClangBuiltin<"__builtin_altivec_mtvsrwm">,
Intrinsic<[llvm_v4i32_ty], [llvm_i64_ty], [IntrNoMem]>;
- def int_ppc_altivec_mtvsrdm : GCCBuiltin<"__builtin_altivec_mtvsrdm">,
+ def int_ppc_altivec_mtvsrdm : ClangBuiltin<"__builtin_altivec_mtvsrdm">,
Intrinsic<[llvm_v2i64_ty], [llvm_i64_ty], [IntrNoMem]>;
- def int_ppc_altivec_mtvsrqm : GCCBuiltin<"__builtin_altivec_mtvsrqm">,
+ def int_ppc_altivec_mtvsrqm : ClangBuiltin<"__builtin_altivec_mtvsrqm">,
Intrinsic<[llvm_v1i128_ty], [llvm_i64_ty], [IntrNoMem]>;
// P10 Vector Parallel Bits Deposit/Extract Doubleword Builtins.
- def int_ppc_altivec_vpdepd : GCCBuiltin<"__builtin_altivec_vpdepd">,
+ def int_ppc_altivec_vpdepd : ClangBuiltin<"__builtin_altivec_vpdepd">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vpextd : GCCBuiltin<"__builtin_altivec_vpextd">,
+ def int_ppc_altivec_vpextd : ClangBuiltin<"__builtin_altivec_vpextd">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
// P10 Vector String Isolate Intrinsics.
- def int_ppc_altivec_vstribr : GCCBuiltin<"__builtin_altivec_vstribr">,
+ def int_ppc_altivec_vstribr : ClangBuiltin<"__builtin_altivec_vstribr">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
- def int_ppc_altivec_vstribl : GCCBuiltin<"__builtin_altivec_vstribl">,
+ def int_ppc_altivec_vstribl : ClangBuiltin<"__builtin_altivec_vstribl">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
- def int_ppc_altivec_vstrihr : GCCBuiltin<"__builtin_altivec_vstrihr">,
+ def int_ppc_altivec_vstrihr : ClangBuiltin<"__builtin_altivec_vstrihr">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>;
- def int_ppc_altivec_vstrihl : GCCBuiltin<"__builtin_altivec_vstrihl">,
+ def int_ppc_altivec_vstrihl : ClangBuiltin<"__builtin_altivec_vstrihl">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>;
// Predicate Intrinsics: The first operand specifies interpretation of CR6.
- def int_ppc_altivec_vstribr_p : GCCBuiltin<"__builtin_altivec_vstribr_p">,
+ def int_ppc_altivec_vstribr_p : ClangBuiltin<"__builtin_altivec_vstribr_p">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_v16i8_ty], [IntrNoMem]>;
- def int_ppc_altivec_vstribl_p : GCCBuiltin<"__builtin_altivec_vstribl_p">,
+ def int_ppc_altivec_vstribl_p : ClangBuiltin<"__builtin_altivec_vstribl_p">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_v16i8_ty], [IntrNoMem]>;
- def int_ppc_altivec_vstrihr_p : GCCBuiltin<"__builtin_altivec_vstrihr_p">,
+ def int_ppc_altivec_vstrihr_p : ClangBuiltin<"__builtin_altivec_vstrihr_p">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_v8i16_ty], [IntrNoMem]>;
- def int_ppc_altivec_vstrihl_p : GCCBuiltin<"__builtin_altivec_vstrihl_p">,
+ def int_ppc_altivec_vstrihl_p : ClangBuiltin<"__builtin_altivec_vstrihl_p">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_v8i16_ty], [IntrNoMem]>;
// P10 Vector Centrifuge Builtin.
- def int_ppc_altivec_vcfuged : GCCBuiltin<"__builtin_altivec_vcfuged">,
+ def int_ppc_altivec_vcfuged : ClangBuiltin<"__builtin_altivec_vcfuged">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
// P10 Vector Gather Every Nth Bit Builtin.
- def int_ppc_altivec_vgnb : GCCBuiltin<"__builtin_altivec_vgnb">,
+ def int_ppc_altivec_vgnb : ClangBuiltin<"__builtin_altivec_vgnb">,
Intrinsic<[llvm_i64_ty], [llvm_v1i128_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
// P10 Vector Clear Bytes
- def int_ppc_altivec_vclrlb : GCCBuiltin<"__builtin_altivec_vclrlb">,
+ def int_ppc_altivec_vclrlb : ClangBuiltin<"__builtin_altivec_vclrlb">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vclrrb : GCCBuiltin<"__builtin_altivec_vclrrb">,
+ def int_ppc_altivec_vclrrb : ClangBuiltin<"__builtin_altivec_vclrrb">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty],
[IntrNoMem]>;
// P10 Vector Shift Double Bit Immediate.
- def int_ppc_altivec_vsldbi : GCCBuiltin<"__builtin_altivec_vsldbi">,
+ def int_ppc_altivec_vsldbi : ClangBuiltin<"__builtin_altivec_vsldbi">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
- def int_ppc_altivec_vsrdbi : GCCBuiltin<"__builtin_altivec_vsrdbi">,
+ def int_ppc_altivec_vsrdbi : ClangBuiltin<"__builtin_altivec_vsrdbi">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
// P10 Vector Insert.
- def int_ppc_altivec_vinsblx : GCCBuiltin<"__builtin_altivec_vinsblx">,
+ def int_ppc_altivec_vinsblx : ClangBuiltin<"__builtin_altivec_vinsblx">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vinsbrx : GCCBuiltin<"__builtin_altivec_vinsbrx">,
+ def int_ppc_altivec_vinsbrx : ClangBuiltin<"__builtin_altivec_vinsbrx">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vinshlx : GCCBuiltin<"__builtin_altivec_vinshlx">,
+ def int_ppc_altivec_vinshlx : ClangBuiltin<"__builtin_altivec_vinshlx">,
Intrinsic<[llvm_v8i16_ty],
[llvm_v8i16_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vinshrx : GCCBuiltin<"__builtin_altivec_vinshrx">,
+ def int_ppc_altivec_vinshrx : ClangBuiltin<"__builtin_altivec_vinshrx">,
Intrinsic<[llvm_v8i16_ty],
[llvm_v8i16_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vinswlx : GCCBuiltin<"__builtin_altivec_vinswlx">,
+ def int_ppc_altivec_vinswlx : ClangBuiltin<"__builtin_altivec_vinswlx">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vinswrx : GCCBuiltin<"__builtin_altivec_vinswrx">,
+ def int_ppc_altivec_vinswrx : ClangBuiltin<"__builtin_altivec_vinswrx">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vinsdlx : GCCBuiltin<"__builtin_altivec_vinsdlx">,
+ def int_ppc_altivec_vinsdlx : ClangBuiltin<"__builtin_altivec_vinsdlx">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v2i64_ty, llvm_i64_ty, llvm_i64_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vinsdrx : GCCBuiltin<"__builtin_altivec_vinsdrx">,
+ def int_ppc_altivec_vinsdrx : ClangBuiltin<"__builtin_altivec_vinsdrx">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v2i64_ty, llvm_i64_ty, llvm_i64_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vinsbvlx : GCCBuiltin<"__builtin_altivec_vinsbvlx">,
+ def int_ppc_altivec_vinsbvlx : ClangBuiltin<"__builtin_altivec_vinsbvlx">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vinsbvrx : GCCBuiltin<"__builtin_altivec_vinsbvrx">,
+ def int_ppc_altivec_vinsbvrx : ClangBuiltin<"__builtin_altivec_vinsbvrx">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vinshvlx : GCCBuiltin<"__builtin_altivec_vinshvlx">,
+ def int_ppc_altivec_vinshvlx : ClangBuiltin<"__builtin_altivec_vinshvlx">,
Intrinsic<[llvm_v8i16_ty],
[llvm_v8i16_ty, llvm_i32_ty, llvm_v8i16_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vinshvrx : GCCBuiltin<"__builtin_altivec_vinshvrx">,
+ def int_ppc_altivec_vinshvrx : ClangBuiltin<"__builtin_altivec_vinshvrx">,
Intrinsic<[llvm_v8i16_ty],
[llvm_v8i16_ty, llvm_i32_ty, llvm_v8i16_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vinswvlx : GCCBuiltin<"__builtin_altivec_vinswvlx">,
+ def int_ppc_altivec_vinswvlx : ClangBuiltin<"__builtin_altivec_vinswvlx">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vinswvrx : GCCBuiltin<"__builtin_altivec_vinswvrx">,
+ def int_ppc_altivec_vinswvrx : ClangBuiltin<"__builtin_altivec_vinswvrx">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
@@ -710,35 +744,35 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
[llvm_v2i64_ty, llvm_i64_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
// P10 Vector Extract.
- def int_ppc_altivec_vextdubvlx : GCCBuiltin<"__builtin_altivec_vextdubvlx">,
+ def int_ppc_altivec_vextdubvlx : ClangBuiltin<"__builtin_altivec_vextdubvlx">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vextdubvrx : GCCBuiltin<"__builtin_altivec_vextdubvrx">,
+ def int_ppc_altivec_vextdubvrx : ClangBuiltin<"__builtin_altivec_vextdubvrx">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vextduhvlx : GCCBuiltin<"__builtin_altivec_vextduhvlx">,
+ def int_ppc_altivec_vextduhvlx : ClangBuiltin<"__builtin_altivec_vextduhvlx">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vextduhvrx : GCCBuiltin<"__builtin_altivec_vextduhvrx">,
+ def int_ppc_altivec_vextduhvrx : ClangBuiltin<"__builtin_altivec_vextduhvrx">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vextduwvlx : GCCBuiltin<"__builtin_altivec_vextduwvlx">,
+ def int_ppc_altivec_vextduwvlx : ClangBuiltin<"__builtin_altivec_vextduwvlx">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vextduwvrx : GCCBuiltin<"__builtin_altivec_vextduwvrx">,
+ def int_ppc_altivec_vextduwvrx : ClangBuiltin<"__builtin_altivec_vextduwvrx">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vextddvlx : GCCBuiltin<"__builtin_altivec_vextddvlx">,
+ def int_ppc_altivec_vextddvlx : ClangBuiltin<"__builtin_altivec_vextddvlx">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vextddvrx : GCCBuiltin<"__builtin_altivec_vextddvrx">,
+ def int_ppc_altivec_vextddvrx : ClangBuiltin<"__builtin_altivec_vextddvrx">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty],
[IntrNoMem]>;
@@ -796,229 +830,229 @@ def int_ppc_altivec_vsubcuq : PowerPC_Vec_QQQ_Intrinsic<"vsubcuq">;
let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
// Saturating multiply-adds.
- def int_ppc_altivec_vmhaddshs : GCCBuiltin<"__builtin_altivec_vmhaddshs">,
+ def int_ppc_altivec_vmhaddshs : ClangBuiltin<"__builtin_altivec_vmhaddshs">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem, IntrHasSideEffects]>;
- def int_ppc_altivec_vmhraddshs : GCCBuiltin<"__builtin_altivec_vmhraddshs">,
+ def int_ppc_altivec_vmhraddshs : ClangBuiltin<"__builtin_altivec_vmhraddshs">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem, IntrHasSideEffects]>;
- def int_ppc_altivec_vmaddfp : GCCBuiltin<"__builtin_altivec_vmaddfp">,
+ def int_ppc_altivec_vmaddfp : ClangBuiltin<"__builtin_altivec_vmaddfp">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
- def int_ppc_altivec_vnmsubfp : GCCBuiltin<"__builtin_altivec_vnmsubfp">,
+ def int_ppc_altivec_vnmsubfp : ClangBuiltin<"__builtin_altivec_vnmsubfp">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
// Vector Multiply Sum Instructions.
- def int_ppc_altivec_vmsummbm : GCCBuiltin<"__builtin_altivec_vmsummbm">,
+ def int_ppc_altivec_vmsummbm : ClangBuiltin<"__builtin_altivec_vmsummbm">,
Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
llvm_v4i32_ty], [IntrNoMem]>;
- def int_ppc_altivec_vmsumshm : GCCBuiltin<"__builtin_altivec_vmsumshm">,
+ def int_ppc_altivec_vmsumshm : ClangBuiltin<"__builtin_altivec_vmsumshm">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
llvm_v4i32_ty], [IntrNoMem]>;
- def int_ppc_altivec_vmsumshs : GCCBuiltin<"__builtin_altivec_vmsumshs">,
+ def int_ppc_altivec_vmsumshs : ClangBuiltin<"__builtin_altivec_vmsumshs">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
llvm_v4i32_ty], [IntrNoMem, IntrHasSideEffects]>;
- def int_ppc_altivec_vmsumubm : GCCBuiltin<"__builtin_altivec_vmsumubm">,
+ def int_ppc_altivec_vmsumubm : ClangBuiltin<"__builtin_altivec_vmsumubm">,
Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
llvm_v4i32_ty], [IntrNoMem]>;
- def int_ppc_altivec_vmsumuhm : GCCBuiltin<"__builtin_altivec_vmsumuhm">,
+ def int_ppc_altivec_vmsumuhm : ClangBuiltin<"__builtin_altivec_vmsumuhm">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
llvm_v4i32_ty], [IntrNoMem]>;
- def int_ppc_altivec_vmsumudm : GCCBuiltin<"__builtin_altivec_vmsumudm">,
+ def int_ppc_altivec_vmsumudm : ClangBuiltin<"__builtin_altivec_vmsumudm">,
Intrinsic<[llvm_v1i128_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
llvm_v1i128_ty], [IntrNoMem]>;
- def int_ppc_altivec_vmsumuhs : GCCBuiltin<"__builtin_altivec_vmsumuhs">,
+ def int_ppc_altivec_vmsumuhs : ClangBuiltin<"__builtin_altivec_vmsumuhs">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
llvm_v4i32_ty], [IntrNoMem, IntrHasSideEffects]>;
- def int_ppc_altivec_vmsumcud : GCCBuiltin<"__builtin_altivec_vmsumcud">,
+ def int_ppc_altivec_vmsumcud : ClangBuiltin<"__builtin_altivec_vmsumcud">,
Intrinsic<[llvm_v1i128_ty],
[llvm_v2i64_ty, llvm_v2i64_ty, llvm_v1i128_ty], [IntrNoMem]>;
// Vector Multiply Instructions.
- def int_ppc_altivec_vmulesb : GCCBuiltin<"__builtin_altivec_vmulesb">,
+ def int_ppc_altivec_vmulesb : ClangBuiltin<"__builtin_altivec_vmulesb">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vmulesh : GCCBuiltin<"__builtin_altivec_vmulesh">,
+ def int_ppc_altivec_vmulesh : ClangBuiltin<"__builtin_altivec_vmulesh">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vmulesw : GCCBuiltin<"__builtin_altivec_vmulesw">,
+ def int_ppc_altivec_vmulesw : ClangBuiltin<"__builtin_altivec_vmulesw">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vmulesd : PowerPC_Vec_QDD_Intrinsic<"vmulesd">;
- def int_ppc_altivec_vmuleub : GCCBuiltin<"__builtin_altivec_vmuleub">,
+ def int_ppc_altivec_vmuleub : ClangBuiltin<"__builtin_altivec_vmuleub">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vmuleuh : GCCBuiltin<"__builtin_altivec_vmuleuh">,
+ def int_ppc_altivec_vmuleuh : ClangBuiltin<"__builtin_altivec_vmuleuh">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vmuleuw : GCCBuiltin<"__builtin_altivec_vmuleuw">,
+ def int_ppc_altivec_vmuleuw : ClangBuiltin<"__builtin_altivec_vmuleuw">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vmuleud : PowerPC_Vec_QDD_Intrinsic<"vmuleud">;
- def int_ppc_altivec_vmulosb : GCCBuiltin<"__builtin_altivec_vmulosb">,
+ def int_ppc_altivec_vmulosb : ClangBuiltin<"__builtin_altivec_vmulosb">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vmulosh : GCCBuiltin<"__builtin_altivec_vmulosh">,
+ def int_ppc_altivec_vmulosh : ClangBuiltin<"__builtin_altivec_vmulosh">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vmulosw : GCCBuiltin<"__builtin_altivec_vmulosw">,
+ def int_ppc_altivec_vmulosw : ClangBuiltin<"__builtin_altivec_vmulosw">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vmulosd : PowerPC_Vec_QDD_Intrinsic<"vmulosd">;
- def int_ppc_altivec_vmuloub : GCCBuiltin<"__builtin_altivec_vmuloub">,
+ def int_ppc_altivec_vmuloub : ClangBuiltin<"__builtin_altivec_vmuloub">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vmulouh : GCCBuiltin<"__builtin_altivec_vmulouh">,
+ def int_ppc_altivec_vmulouh : ClangBuiltin<"__builtin_altivec_vmulouh">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vmulouw : GCCBuiltin<"__builtin_altivec_vmulouw">,
+ def int_ppc_altivec_vmulouw : ClangBuiltin<"__builtin_altivec_vmulouw">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vmuloud : PowerPC_Vec_QDD_Intrinsic<"vmuloud">;
// Vector Sum Instructions.
- def int_ppc_altivec_vsumsws : GCCBuiltin<"__builtin_altivec_vsumsws">,
+ def int_ppc_altivec_vsumsws : ClangBuiltin<"__builtin_altivec_vsumsws">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem, IntrHasSideEffects]>;
- def int_ppc_altivec_vsum2sws : GCCBuiltin<"__builtin_altivec_vsum2sws">,
+ def int_ppc_altivec_vsum2sws : ClangBuiltin<"__builtin_altivec_vsum2sws">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem, IntrHasSideEffects]>;
- def int_ppc_altivec_vsum4sbs : GCCBuiltin<"__builtin_altivec_vsum4sbs">,
+ def int_ppc_altivec_vsum4sbs : ClangBuiltin<"__builtin_altivec_vsum4sbs">,
Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v4i32_ty],
[IntrNoMem, IntrHasSideEffects]>;
- def int_ppc_altivec_vsum4shs : GCCBuiltin<"__builtin_altivec_vsum4shs">,
+ def int_ppc_altivec_vsum4shs : ClangBuiltin<"__builtin_altivec_vsum4shs">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v4i32_ty],
[IntrNoMem, IntrHasSideEffects]>;
- def int_ppc_altivec_vsum4ubs : GCCBuiltin<"__builtin_altivec_vsum4ubs">,
+ def int_ppc_altivec_vsum4ubs : ClangBuiltin<"__builtin_altivec_vsum4ubs">,
Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v4i32_ty],
[IntrNoMem, IntrHasSideEffects]>;
// Vector Sign Extension Instructions
- def int_ppc_altivec_vextsb2w : GCCBuiltin<"__builtin_altivec_vextsb2w">,
+ def int_ppc_altivec_vextsb2w : ClangBuiltin<"__builtin_altivec_vextsb2w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;
- def int_ppc_altivec_vextsb2d : GCCBuiltin<"__builtin_altivec_vextsb2d">,
+ def int_ppc_altivec_vextsb2d : ClangBuiltin<"__builtin_altivec_vextsb2d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty], [IntrNoMem]>;
- def int_ppc_altivec_vextsh2w : GCCBuiltin<"__builtin_altivec_vextsh2w">,
+ def int_ppc_altivec_vextsh2w : ClangBuiltin<"__builtin_altivec_vextsh2w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
- def int_ppc_altivec_vextsh2d : GCCBuiltin<"__builtin_altivec_vextsh2d">,
+ def int_ppc_altivec_vextsh2d : ClangBuiltin<"__builtin_altivec_vextsh2d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty], [IntrNoMem]>;
- def int_ppc_altivec_vextsw2d : GCCBuiltin<"__builtin_altivec_vextsw2d">,
+ def int_ppc_altivec_vextsw2d : ClangBuiltin<"__builtin_altivec_vextsw2d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
- def int_ppc_altivec_vextsd2q : GCCBuiltin<"__builtin_altivec_vextsd2q">,
+ def int_ppc_altivec_vextsd2q : ClangBuiltin<"__builtin_altivec_vextsd2q">,
Intrinsic<[llvm_v1i128_ty], [llvm_v2i64_ty], [IntrNoMem]>;
// Other multiplies.
- def int_ppc_altivec_vmladduhm : GCCBuiltin<"__builtin_altivec_vmladduhm">,
+ def int_ppc_altivec_vmladduhm : ClangBuiltin<"__builtin_altivec_vmladduhm">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
// Packs.
- def int_ppc_altivec_vpkpx : GCCBuiltin<"__builtin_altivec_vpkpx">,
+ def int_ppc_altivec_vpkpx : ClangBuiltin<"__builtin_altivec_vpkpx">,
Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vpkshss : GCCBuiltin<"__builtin_altivec_vpkshss">,
+ def int_ppc_altivec_vpkshss : ClangBuiltin<"__builtin_altivec_vpkshss">,
Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem, IntrHasSideEffects]>;
- def int_ppc_altivec_vpkshus : GCCBuiltin<"__builtin_altivec_vpkshus">,
+ def int_ppc_altivec_vpkshus : ClangBuiltin<"__builtin_altivec_vpkshus">,
Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem, IntrHasSideEffects]>;
- def int_ppc_altivec_vpkswss : GCCBuiltin<"__builtin_altivec_vpkswss">,
+ def int_ppc_altivec_vpkswss : ClangBuiltin<"__builtin_altivec_vpkswss">,
Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem, IntrHasSideEffects]>;
- def int_ppc_altivec_vpkswus : GCCBuiltin<"__builtin_altivec_vpkswus">,
+ def int_ppc_altivec_vpkswus : ClangBuiltin<"__builtin_altivec_vpkswus">,
Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem, IntrHasSideEffects]>;
- def int_ppc_altivec_vpksdss : GCCBuiltin<"__builtin_altivec_vpksdss">,
+ def int_ppc_altivec_vpksdss : ClangBuiltin<"__builtin_altivec_vpksdss">,
Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem, IntrHasSideEffects]>;
- def int_ppc_altivec_vpksdus : GCCBuiltin<"__builtin_altivec_vpksdus">,
+ def int_ppc_altivec_vpksdus : ClangBuiltin<"__builtin_altivec_vpksdus">,
Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem, IntrHasSideEffects]>;
// vpkuhum is lowered to a shuffle.
- def int_ppc_altivec_vpkuhus : GCCBuiltin<"__builtin_altivec_vpkuhus">,
+ def int_ppc_altivec_vpkuhus : ClangBuiltin<"__builtin_altivec_vpkuhus">,
Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem, IntrHasSideEffects]>;
// vpkuwum is lowered to a shuffle.
- def int_ppc_altivec_vpkuwus : GCCBuiltin<"__builtin_altivec_vpkuwus">,
+ def int_ppc_altivec_vpkuwus : ClangBuiltin<"__builtin_altivec_vpkuwus">,
Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem, IntrHasSideEffects]>;
// vpkudum is lowered to a shuffle.
- def int_ppc_altivec_vpkudus : GCCBuiltin<"__builtin_altivec_vpkudus">,
+ def int_ppc_altivec_vpkudus : ClangBuiltin<"__builtin_altivec_vpkudus">,
Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem, IntrHasSideEffects]>;
// Unpacks.
- def int_ppc_altivec_vupkhpx : GCCBuiltin<"__builtin_altivec_vupkhpx">,
+ def int_ppc_altivec_vupkhpx : ClangBuiltin<"__builtin_altivec_vupkhpx">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
- def int_ppc_altivec_vupkhsb : GCCBuiltin<"__builtin_altivec_vupkhsb">,
+ def int_ppc_altivec_vupkhsb : ClangBuiltin<"__builtin_altivec_vupkhsb">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>;
- def int_ppc_altivec_vupkhsh : GCCBuiltin<"__builtin_altivec_vupkhsh">,
+ def int_ppc_altivec_vupkhsh : ClangBuiltin<"__builtin_altivec_vupkhsh">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
- def int_ppc_altivec_vupkhsw : GCCBuiltin<"__builtin_altivec_vupkhsw">,
+ def int_ppc_altivec_vupkhsw : ClangBuiltin<"__builtin_altivec_vupkhsw">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
- def int_ppc_altivec_vupklpx : GCCBuiltin<"__builtin_altivec_vupklpx">,
+ def int_ppc_altivec_vupklpx : ClangBuiltin<"__builtin_altivec_vupklpx">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
- def int_ppc_altivec_vupklsb : GCCBuiltin<"__builtin_altivec_vupklsb">,
+ def int_ppc_altivec_vupklsb : ClangBuiltin<"__builtin_altivec_vupklsb">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>;
- def int_ppc_altivec_vupklsh : GCCBuiltin<"__builtin_altivec_vupklsh">,
+ def int_ppc_altivec_vupklsh : ClangBuiltin<"__builtin_altivec_vupklsh">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
- def int_ppc_altivec_vupklsw : GCCBuiltin<"__builtin_altivec_vupklsw">,
+ def int_ppc_altivec_vupklsw : ClangBuiltin<"__builtin_altivec_vupklsw">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
// FP <-> integer conversion.
- def int_ppc_altivec_vcfsx : GCCBuiltin<"__builtin_altivec_vcfsx">,
+ def int_ppc_altivec_vcfsx : ClangBuiltin<"__builtin_altivec_vcfsx">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_ppc_altivec_vcfux : GCCBuiltin<"__builtin_altivec_vcfux">,
+ def int_ppc_altivec_vcfux : ClangBuiltin<"__builtin_altivec_vcfux">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_ppc_altivec_vctsxs : GCCBuiltin<"__builtin_altivec_vctsxs">,
+ def int_ppc_altivec_vctsxs : ClangBuiltin<"__builtin_altivec_vctsxs">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_ppc_altivec_vctuxs : GCCBuiltin<"__builtin_altivec_vctuxs">,
+ def int_ppc_altivec_vctuxs : ClangBuiltin<"__builtin_altivec_vctuxs">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_ppc_altivec_vrfim : GCCBuiltin<"__builtin_altivec_vrfim">,
+ def int_ppc_altivec_vrfim : ClangBuiltin<"__builtin_altivec_vrfim">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
- def int_ppc_altivec_vrfin : GCCBuiltin<"__builtin_altivec_vrfin">,
+ def int_ppc_altivec_vrfin : ClangBuiltin<"__builtin_altivec_vrfin">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
- def int_ppc_altivec_vrfip : GCCBuiltin<"__builtin_altivec_vrfip">,
+ def int_ppc_altivec_vrfip : ClangBuiltin<"__builtin_altivec_vrfip">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
- def int_ppc_altivec_vrfiz : GCCBuiltin<"__builtin_altivec_vrfiz">,
+ def int_ppc_altivec_vrfiz : ClangBuiltin<"__builtin_altivec_vrfiz">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
// Add Extended Quadword
- def int_ppc_altivec_vaddeuqm : GCCBuiltin<"__builtin_altivec_vaddeuqm">,
+ def int_ppc_altivec_vaddeuqm : ClangBuiltin<"__builtin_altivec_vaddeuqm">,
Intrinsic<[llvm_v1i128_ty],
[llvm_v1i128_ty, llvm_v1i128_ty, llvm_v1i128_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vaddecuq : GCCBuiltin<"__builtin_altivec_vaddecuq">,
+ def int_ppc_altivec_vaddecuq : ClangBuiltin<"__builtin_altivec_vaddecuq">,
Intrinsic<[llvm_v1i128_ty],
[llvm_v1i128_ty, llvm_v1i128_ty, llvm_v1i128_ty],
[IntrNoMem]>;
// Sub Extended Quadword
- def int_ppc_altivec_vsubeuqm : GCCBuiltin<"__builtin_altivec_vsubeuqm">,
+ def int_ppc_altivec_vsubeuqm : ClangBuiltin<"__builtin_altivec_vsubeuqm">,
Intrinsic<[llvm_v1i128_ty],
[llvm_v1i128_ty, llvm_v1i128_ty, llvm_v1i128_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vsubecuq : GCCBuiltin<"__builtin_altivec_vsubecuq">,
+ def int_ppc_altivec_vsubecuq : ClangBuiltin<"__builtin_altivec_vsubecuq">,
Intrinsic<[llvm_v1i128_ty],
[llvm_v1i128_ty, llvm_v1i128_ty, llvm_v1i128_ty],
[IntrNoMem]>;
// P10 Vector Count Leading / Trailing Zeroes under bit Mask Builtins.
- def int_ppc_altivec_vclzdm : GCCBuiltin<"__builtin_altivec_vclzdm">,
+ def int_ppc_altivec_vclzdm : ClangBuiltin<"__builtin_altivec_vclzdm">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vctzdm : GCCBuiltin<"__builtin_altivec_vctzdm">,
+ def int_ppc_altivec_vctzdm : ClangBuiltin<"__builtin_altivec_vctzdm">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
}
@@ -1056,18 +1090,18 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
def int_ppc_altivec_lvsr :
Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty], [IntrNoMem]>;
- def int_ppc_altivec_vperm : GCCBuiltin<"__builtin_altivec_vperm_4si">,
+ def int_ppc_altivec_vperm : ClangBuiltin<"__builtin_altivec_vperm_4si">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
llvm_v4i32_ty, llvm_v16i8_ty], [IntrNoMem]>;
- def int_ppc_altivec_vsel : GCCBuiltin<"__builtin_altivec_vsel_4si">,
+ def int_ppc_altivec_vsel : ClangBuiltin<"__builtin_altivec_vsel_4si">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
- def int_ppc_altivec_vgbbd : GCCBuiltin<"__builtin_altivec_vgbbd">,
+ def int_ppc_altivec_vgbbd : ClangBuiltin<"__builtin_altivec_vgbbd">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
- def int_ppc_altivec_vbpermq : GCCBuiltin<"__builtin_altivec_vbpermq">,
+ def int_ppc_altivec_vbpermq : ClangBuiltin<"__builtin_altivec_vbpermq">,
Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
- def int_ppc_altivec_vbpermd : GCCBuiltin<"__builtin_altivec_vbpermd">,
+ def int_ppc_altivec_vbpermd : ClangBuiltin<"__builtin_altivec_vbpermd">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v16i8_ty],
[IntrNoMem]>;
}
@@ -1081,23 +1115,23 @@ def int_ppc_altivec_vrsqrtefp : PowerPC_Vec_FF_Intrinsic<"vrsqrtefp">;
// Crypto
let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
def int_ppc_altivec_crypto_vsbox :
- GCCBuiltin<"__builtin_altivec_crypto_vsbox">,
+ ClangBuiltin<"__builtin_altivec_crypto_vsbox">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
def int_ppc_altivec_crypto_vpermxor :
- GCCBuiltin<"__builtin_altivec_crypto_vpermxor">,
+ ClangBuiltin<"__builtin_altivec_crypto_vpermxor">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
def int_ppc_altivec_crypto_vpermxor_be :
- GCCBuiltin<"__builtin_altivec_crypto_vpermxor_be">,
+ ClangBuiltin<"__builtin_altivec_crypto_vpermxor_be">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
def int_ppc_altivec_crypto_vshasigmad :
- GCCBuiltin<"__builtin_altivec_crypto_vshasigmad">,
+ ClangBuiltin<"__builtin_altivec_crypto_vshasigmad">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>;
def int_ppc_altivec_crypto_vshasigmaw :
- GCCBuiltin<"__builtin_altivec_crypto_vshasigmaw">,
+ ClangBuiltin<"__builtin_altivec_crypto_vshasigmaw">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>;
}
@@ -1224,52 +1258,52 @@ def int_ppc_vsx_xvrdpip :
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
// Vector reciprocal estimate
-def int_ppc_vsx_xvresp : GCCBuiltin<"__builtin_vsx_xvresp">,
+def int_ppc_vsx_xvresp : ClangBuiltin<"__builtin_vsx_xvresp">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-def int_ppc_vsx_xvredp : GCCBuiltin<"__builtin_vsx_xvredp">,
+def int_ppc_vsx_xvredp : ClangBuiltin<"__builtin_vsx_xvredp">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
// Vector rsqrte
-def int_ppc_vsx_xvrsqrtesp : GCCBuiltin<"__builtin_vsx_xvrsqrtesp">,
+def int_ppc_vsx_xvrsqrtesp : ClangBuiltin<"__builtin_vsx_xvrsqrtesp">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-def int_ppc_vsx_xvrsqrtedp : GCCBuiltin<"__builtin_vsx_xvrsqrtedp">,
+def int_ppc_vsx_xvrsqrtedp : ClangBuiltin<"__builtin_vsx_xvrsqrtedp">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
// Vector compare
def int_ppc_vsx_xvcmpeqdp :
PowerPC_VSX_Intrinsic<"xvcmpeqdp", [llvm_v2i64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
-def int_ppc_vsx_xvcmpeqdp_p : GCCBuiltin<"__builtin_vsx_xvcmpeqdp_p">,
+def int_ppc_vsx_xvcmpeqdp_p : ClangBuiltin<"__builtin_vsx_xvcmpeqdp_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v2f64_ty,llvm_v2f64_ty],
[IntrNoMem]>;
def int_ppc_vsx_xvcmpeqsp :
PowerPC_VSX_Intrinsic<"xvcmpeqsp", [llvm_v4i32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
-def int_ppc_vsx_xvcmpeqsp_p : GCCBuiltin<"__builtin_vsx_xvcmpeqsp_p">,
+def int_ppc_vsx_xvcmpeqsp_p : ClangBuiltin<"__builtin_vsx_xvcmpeqsp_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty],
[IntrNoMem]>;
def int_ppc_vsx_xvcmpgedp :
PowerPC_VSX_Intrinsic<"xvcmpgedp", [llvm_v2i64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
-def int_ppc_vsx_xvcmpgedp_p : GCCBuiltin<"__builtin_vsx_xvcmpgedp_p">,
+def int_ppc_vsx_xvcmpgedp_p : ClangBuiltin<"__builtin_vsx_xvcmpgedp_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v2f64_ty,llvm_v2f64_ty],
[IntrNoMem]>;
def int_ppc_vsx_xvcmpgesp :
PowerPC_VSX_Intrinsic<"xvcmpgesp", [llvm_v4i32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
-def int_ppc_vsx_xvcmpgesp_p : GCCBuiltin<"__builtin_vsx_xvcmpgesp_p">,
+def int_ppc_vsx_xvcmpgesp_p : ClangBuiltin<"__builtin_vsx_xvcmpgesp_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty],
[IntrNoMem]>;
def int_ppc_vsx_xvcmpgtdp :
PowerPC_VSX_Intrinsic<"xvcmpgtdp", [llvm_v2i64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
-def int_ppc_vsx_xvcmpgtdp_p : GCCBuiltin<"__builtin_vsx_xvcmpgtdp_p">,
+def int_ppc_vsx_xvcmpgtdp_p : ClangBuiltin<"__builtin_vsx_xvcmpgtdp_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v2f64_ty,llvm_v2f64_ty],
[IntrNoMem]>;
def int_ppc_vsx_xvcmpgtsp :
PowerPC_VSX_Intrinsic<"xvcmpgtsp", [llvm_v4i32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
-def int_ppc_vsx_xvcmpgtsp_p : GCCBuiltin<"__builtin_vsx_xvcmpgtsp_p">,
+def int_ppc_vsx_xvcmpgtsp_p : ClangBuiltin<"__builtin_vsx_xvcmpgtsp_p">,
Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty],
[IntrNoMem]>;
def int_ppc_vsx_xxleqv :
@@ -1381,21 +1415,21 @@ def int_ppc_vsx_xxgenpcvdm :
// P10 VSX Vector permute extended.
def int_ppc_vsx_xxpermx :
- GCCBuiltin<"__builtin_vsx_xxpermx">,
+ ClangBuiltin<"__builtin_vsx_xxpermx">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v16i8_ty,llvm_v16i8_ty,llvm_v16i8_ty,llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
// P10 VSX Vector Blend Variable.
-def int_ppc_vsx_xxblendvb: GCCBuiltin<"__builtin_vsx_xxblendvb">,
+def int_ppc_vsx_xxblendvb: ClangBuiltin<"__builtin_vsx_xxblendvb">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
-def int_ppc_vsx_xxblendvh: GCCBuiltin<"__builtin_vsx_xxblendvh">,
+def int_ppc_vsx_xxblendvh: ClangBuiltin<"__builtin_vsx_xxblendvh">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,llvm_v8i16_ty],
[IntrNoMem]>;
-def int_ppc_vsx_xxblendvw: GCCBuiltin<"__builtin_vsx_xxblendvw">,
+def int_ppc_vsx_xxblendvw: ClangBuiltin<"__builtin_vsx_xxblendvw">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
-def int_ppc_vsx_xxblendvd: GCCBuiltin<"__builtin_vsx_xxblendvd">,
+def int_ppc_vsx_xxblendvd: ClangBuiltin<"__builtin_vsx_xxblendvd">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
}
@@ -1405,64 +1439,68 @@ def int_ppc_vsx_xxblendvd: GCCBuiltin<"__builtin_vsx_xxblendvd">,
let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
-def int_ppc_tbegin : GCCBuiltin<"__builtin_tbegin">,
+def int_ppc_tbegin : ClangBuiltin<"__builtin_tbegin">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [ImmArg<ArgIndex<0>>]>;
-def int_ppc_tend : GCCBuiltin<"__builtin_tend">,
+def int_ppc_tend : ClangBuiltin<"__builtin_tend">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [ImmArg<ArgIndex<0>>]>;
-def int_ppc_tabort : GCCBuiltin<"__builtin_tabort">,
+def int_ppc_tabort : ClangBuiltin<"__builtin_tabort">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
-def int_ppc_tabortwc : GCCBuiltin<"__builtin_tabortwc">,
+def int_ppc_tabortwc : ClangBuiltin<"__builtin_tabortwc">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
-def int_ppc_tabortwci : GCCBuiltin<"__builtin_tabortwci">,
+def int_ppc_tabortwci : ClangBuiltin<"__builtin_tabortwci">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
-def int_ppc_tabortdc : GCCBuiltin<"__builtin_tabortdc">,
+def int_ppc_tabortdc : ClangBuiltin<"__builtin_tabortdc">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
-def int_ppc_tabortdci : GCCBuiltin<"__builtin_tabortdci">,
+def int_ppc_tabortdci : ClangBuiltin<"__builtin_tabortdci">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
-def int_ppc_tcheck : GCCBuiltin<"__builtin_tcheck">,
+def int_ppc_tcheck : ClangBuiltin<"__builtin_tcheck">,
Intrinsic<[llvm_i32_ty], [], []>;
-def int_ppc_treclaim : GCCBuiltin<"__builtin_treclaim">,
+def int_ppc_treclaim : ClangBuiltin<"__builtin_treclaim">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
-def int_ppc_trechkpt : GCCBuiltin<"__builtin_trechkpt">,
+def int_ppc_trechkpt : ClangBuiltin<"__builtin_trechkpt">,
Intrinsic<[llvm_i32_ty], [], []>;
-def int_ppc_tsr : GCCBuiltin<"__builtin_tsr">,
+def int_ppc_tsr : ClangBuiltin<"__builtin_tsr">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
-def int_ppc_get_texasr : GCCBuiltin<"__builtin_get_texasr">,
+def int_ppc_get_texasr : ClangBuiltin<"__builtin_get_texasr">,
Intrinsic<[llvm_i64_ty], [], []>;
-def int_ppc_get_texasru : GCCBuiltin<"__builtin_get_texasru">,
+def int_ppc_get_texasru : ClangBuiltin<"__builtin_get_texasru">,
Intrinsic<[llvm_i64_ty], [], []>;
-def int_ppc_get_tfhar : GCCBuiltin<"__builtin_get_tfhar">,
+def int_ppc_get_tfhar : ClangBuiltin<"__builtin_get_tfhar">,
Intrinsic<[llvm_i64_ty], [], []>;
-def int_ppc_get_tfiar : GCCBuiltin<"__builtin_get_tfiar">,
+def int_ppc_get_tfiar : ClangBuiltin<"__builtin_get_tfiar">,
Intrinsic<[llvm_i64_ty], [], []>;
-def int_ppc_set_texasr : GCCBuiltin<"__builtin_set_texasr">,
+def int_ppc_set_texasr : ClangBuiltin<"__builtin_set_texasr">,
Intrinsic<[], [llvm_i64_ty], []>;
-def int_ppc_set_texasru : GCCBuiltin<"__builtin_set_texasru">,
+def int_ppc_set_texasru : ClangBuiltin<"__builtin_set_texasru">,
Intrinsic<[], [llvm_i64_ty], []>;
-def int_ppc_set_tfhar : GCCBuiltin<"__builtin_set_tfhar">,
+def int_ppc_set_tfhar : ClangBuiltin<"__builtin_set_tfhar">,
Intrinsic<[], [llvm_i64_ty], []>;
-def int_ppc_set_tfiar : GCCBuiltin<"__builtin_set_tfiar">,
+def int_ppc_set_tfiar : ClangBuiltin<"__builtin_set_tfiar">,
Intrinsic<[], [llvm_i64_ty], []>;
// Extended mnemonics
-def int_ppc_tendall : GCCBuiltin<"__builtin_tendall">,
+def int_ppc_tendall : ClangBuiltin<"__builtin_tendall">,
Intrinsic<[llvm_i32_ty], [], []>;
-def int_ppc_tresume : GCCBuiltin<"__builtin_tresume">,
+def int_ppc_tresume : ClangBuiltin<"__builtin_tresume">,
Intrinsic<[llvm_i32_ty], [], []>;
-def int_ppc_tsuspend : GCCBuiltin<"__builtin_tsuspend">,
+def int_ppc_tsuspend : ClangBuiltin<"__builtin_tsuspend">,
Intrinsic<[llvm_i32_ty], [], []>;
-def int_ppc_ttest : GCCBuiltin<"__builtin_ttest">,
+def int_ppc_ttest : ClangBuiltin<"__builtin_ttest">,
Intrinsic<[llvm_i64_ty], [], []>;
-def int_ppc_cfence : Intrinsic<[], [llvm_anyint_ty], []>;
+// We currently use llvm.ppc.cfence in the context of atomic load which
+// in LLVM IR requires its type to be one of integer, pointer and
+// float point type. So llvm_any_ty here refers to type mentioned above.
+// Backend is supposed to lower these types to appropriate MVTs.
+def int_ppc_cfence : Intrinsic<[], [llvm_any_ty], []>;
// PowerPC set FPSCR Intrinsic Definitions.
-def int_ppc_setrnd : GCCBuiltin<"__builtin_setrnd">,
+def int_ppc_setrnd : ClangBuiltin<"__builtin_setrnd">,
Intrinsic<[llvm_double_ty], [llvm_i32_ty], []>;
}
@@ -1552,218 +1590,212 @@ let TargetPrefix = "ppc" in {
// XL Compat intrinsics.
let TargetPrefix = "ppc" in {
- def int_ppc_dcbfl : GCCBuiltin<"__builtin_ppc_dcbfl">,
+ def int_ppc_dcbfl : ClangBuiltin<"__builtin_ppc_dcbfl">,
Intrinsic<[], [llvm_ptr_ty], [IntrArgMemOnly]>;
- def int_ppc_dcbflp : GCCBuiltin<"__builtin_ppc_dcbflp">,
+ def int_ppc_dcbflp : ClangBuiltin<"__builtin_ppc_dcbflp">,
Intrinsic<[], [llvm_ptr_ty], [IntrArgMemOnly]>;
- def int_ppc_dcbst : GCCBuiltin<"__builtin_ppc_dcbst">,
+ def int_ppc_dcbst : ClangBuiltin<"__builtin_ppc_dcbst">,
Intrinsic<[], [llvm_ptr_ty], []>;
- def int_ppc_dcbt : GCCBuiltin<"__builtin_ppc_dcbt">,
+ def int_ppc_dcbt : ClangBuiltin<"__builtin_ppc_dcbt">,
Intrinsic<[], [llvm_ptr_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
- def int_ppc_dcbtst : GCCBuiltin<"__builtin_ppc_dcbtst">,
+ def int_ppc_dcbtst : ClangBuiltin<"__builtin_ppc_dcbtst">,
Intrinsic<[], [llvm_ptr_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
- def int_ppc_dcbz : GCCBuiltin<"__builtin_ppc_dcbz">,
+ def int_ppc_dcbz : ClangBuiltin<"__builtin_ppc_dcbz">,
Intrinsic<[], [llvm_ptr_ty], []>;
- def int_ppc_icbt : GCCBuiltin<"__builtin_ppc_icbt">,
+ def int_ppc_icbt : ClangBuiltin<"__builtin_ppc_icbt">,
Intrinsic<[], [llvm_ptr_ty], []>;
// Population Count in each Byte.
def int_ppc_popcntb : Intrinsic<[llvm_anyint_ty], [llvm_anyint_ty], [IntrNoMem]>;
// sync instruction (i.e. sync 0, a.k.a hwsync)
- def int_ppc_sync : GCCBuiltin<"__builtin_ppc_sync">,
+ def int_ppc_sync : ClangBuiltin<"__builtin_ppc_sync">,
Intrinsic<[], [], []>;
- def int_ppc_iospace_sync : GCCBuiltin<"__builtin_ppc_iospace_sync">,
+ def int_ppc_iospace_sync : ClangBuiltin<"__builtin_ppc_iospace_sync">,
Intrinsic<[], [], []>;
// isync instruction
- def int_ppc_isync : GCCBuiltin<"__builtin_ppc_isync">,
+ def int_ppc_isync : ClangBuiltin<"__builtin_ppc_isync">,
Intrinsic<[], [], []>;
// lwsync is sync 1
- def int_ppc_lwsync : GCCBuiltin<"__builtin_ppc_lwsync">,
+ def int_ppc_lwsync : ClangBuiltin<"__builtin_ppc_lwsync">,
Intrinsic<[], [], []>;
- def int_ppc_iospace_lwsync : GCCBuiltin<"__builtin_ppc_iospace_lwsync">,
+ def int_ppc_iospace_lwsync : ClangBuiltin<"__builtin_ppc_iospace_lwsync">,
Intrinsic<[], [], []>;
// eieio instruction
- def int_ppc_eieio : GCCBuiltin<"__builtin_ppc_eieio">,
+ def int_ppc_eieio : ClangBuiltin<"__builtin_ppc_eieio">,
Intrinsic<[],[],[]>;
- def int_ppc_iospace_eieio : GCCBuiltin<"__builtin_ppc_iospace_eieio">,
+ def int_ppc_iospace_eieio : ClangBuiltin<"__builtin_ppc_iospace_eieio">,
Intrinsic<[],[],[]>;
- def int_ppc_stdcx : GCCBuiltin<"__builtin_ppc_stdcx">,
+ def int_ppc_stdcx : ClangBuiltin<"__builtin_ppc_stdcx">,
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i64_ty],
[IntrWriteMem]>;
- def int_ppc_stwcx : GCCBuiltin<"__builtin_ppc_stwcx">,
+ def int_ppc_stwcx : ClangBuiltin<"__builtin_ppc_stwcx">,
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty],
[IntrWriteMem]>;
def int_ppc_sthcx
: Intrinsic<[llvm_i32_ty], [ llvm_ptr_ty, llvm_i32_ty ], [IntrWriteMem]>;
- def int_ppc_stbcx : GCCBuiltin<"__builtin_ppc_stbcx">,
+ def int_ppc_stbcx : ClangBuiltin<"__builtin_ppc_stbcx">,
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty],
[IntrWriteMem]>;
- def int_ppc_dcbtstt : GCCBuiltin<"__builtin_ppc_dcbtstt">,
+ def int_ppc_dcbtstt : ClangBuiltin<"__builtin_ppc_dcbtstt">,
Intrinsic<[], [llvm_ptr_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
- def int_ppc_dcbtt : GCCBuiltin<"__builtin_ppc_dcbtt">,
+ def int_ppc_dcbtt : ClangBuiltin<"__builtin_ppc_dcbtt">,
Intrinsic<[], [llvm_ptr_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
- def int_ppc_mftbu : GCCBuiltin<"__builtin_ppc_mftbu">,
+ def int_ppc_mftbu : ClangBuiltin<"__builtin_ppc_mftbu">,
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
- def int_ppc_mfmsr : GCCBuiltin<"__builtin_ppc_mfmsr">,
+ def int_ppc_mfmsr : ClangBuiltin<"__builtin_ppc_mfmsr">,
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
def int_ppc_mfspr
: Intrinsic<[llvm_anyint_ty], [llvm_i32_ty], [ImmArg<ArgIndex<0>>]>;
def int_ppc_mtmsr
- : GCCBuiltin<"__builtin_ppc_mtmsr">, Intrinsic<[], [llvm_i32_ty], []>;
+ : ClangBuiltin<"__builtin_ppc_mtmsr">, Intrinsic<[], [llvm_i32_ty], []>;
def int_ppc_mtspr
: Intrinsic<[], [llvm_i32_ty, llvm_anyint_ty], [ImmArg<ArgIndex<0>>]>;
- def int_ppc_stfiw : GCCBuiltin<"__builtin_ppc_stfiw">,
+ def int_ppc_stfiw : ClangBuiltin<"__builtin_ppc_stfiw">,
Intrinsic<[], [llvm_ptr_ty, llvm_double_ty],
[IntrWriteMem]>;
// compare
def int_ppc_cmpeqb
- : GCCBuiltin<"__builtin_ppc_cmpeqb">,
+ : ClangBuiltin<"__builtin_ppc_cmpeqb">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
def int_ppc_cmprb
- : GCCBuiltin<"__builtin_ppc_cmprb">,
+ : ClangBuiltin<"__builtin_ppc_cmprb">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<0>>]>;
def int_ppc_setb
- : GCCBuiltin<"__builtin_ppc_setb">,
+ : ClangBuiltin<"__builtin_ppc_setb">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
def int_ppc_cmpb
: Intrinsic<[llvm_anyint_ty], [llvm_anyint_ty, llvm_anyint_ty],
[IntrNoMem]>;
// multiply
def int_ppc_mulhd
- : GCCBuiltin<"__builtin_ppc_mulhd">,
+ : ClangBuiltin<"__builtin_ppc_mulhd">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
def int_ppc_mulhdu
- : GCCBuiltin<"__builtin_ppc_mulhdu">,
+ : ClangBuiltin<"__builtin_ppc_mulhdu">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
def int_ppc_mulhw
- : GCCBuiltin<"__builtin_ppc_mulhw">,
+ : ClangBuiltin<"__builtin_ppc_mulhw">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_ppc_mulhwu
- : GCCBuiltin<"__builtin_ppc_mulhwu">,
+ : ClangBuiltin<"__builtin_ppc_mulhwu">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_ppc_maddhd
- : GCCBuiltin<"__builtin_ppc_maddhd">,
+ : ClangBuiltin<"__builtin_ppc_maddhd">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
def int_ppc_maddhdu
- : GCCBuiltin<"__builtin_ppc_maddhdu">,
+ : ClangBuiltin<"__builtin_ppc_maddhdu">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
def int_ppc_maddld
- : GCCBuiltin<"__builtin_ppc_maddld">,
+ : ClangBuiltin<"__builtin_ppc_maddld">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
// load
def int_ppc_load2r
: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>;
def int_ppc_load4r
- : GCCBuiltin<"__builtin_ppc_load4r">,
+ : ClangBuiltin<"__builtin_ppc_load4r">,
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>;
def int_ppc_load8r
- : GCCBuiltin<"__builtin_ppc_load8r">,
+ : ClangBuiltin<"__builtin_ppc_load8r">,
Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>;
// store
def int_ppc_store2r
- : GCCBuiltin<"__builtin_ppc_store2r">,
+ : ClangBuiltin<"__builtin_ppc_store2r">,
Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty], [IntrWriteMem]>;
def int_ppc_store4r
- : GCCBuiltin<"__builtin_ppc_store4r">,
+ : ClangBuiltin<"__builtin_ppc_store4r">,
Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty], [IntrWriteMem]>;
def int_ppc_store8r
- : GCCBuiltin<"__builtin_ppc_store8r">,
+ : ClangBuiltin<"__builtin_ppc_store8r">,
Intrinsic<[], [llvm_i64_ty, llvm_ptr_ty], [IntrWriteMem]>;
def int_ppc_insert_exp
- : GCCBuiltin<"__builtin_ppc_insert_exp">,
+ : ClangBuiltin<"__builtin_ppc_insert_exp">,
Intrinsic <[llvm_double_ty], [llvm_double_ty, llvm_i64_ty],
[IntrNoMem]>;
def int_ppc_extract_exp
- : GCCBuiltin<"__builtin_ppc_extract_exp">,
+ : ClangBuiltin<"__builtin_ppc_extract_exp">,
Intrinsic <[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
def int_ppc_extract_sig
- : GCCBuiltin<"__builtin_ppc_extract_sig">,
+ : ClangBuiltin<"__builtin_ppc_extract_sig">,
Intrinsic <[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
def int_ppc_mtfsb0
- : GCCBuiltin<"__builtin_ppc_mtfsb0">,
+ : ClangBuiltin<"__builtin_ppc_mtfsb0">,
Intrinsic <[], [llvm_i32_ty],
[IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>]>;
def int_ppc_mtfsb1
- : GCCBuiltin<"__builtin_ppc_mtfsb1">,
+ : ClangBuiltin<"__builtin_ppc_mtfsb1">,
Intrinsic <[], [llvm_i32_ty],
[IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>]>;
def int_ppc_mtfsf :
Intrinsic <[], [llvm_i32_ty, llvm_double_ty],
[IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>]>;
def int_ppc_mtfsfi
- : GCCBuiltin<"__builtin_ppc_mtfsfi">,
+ : ClangBuiltin<"__builtin_ppc_mtfsfi">,
Intrinsic <[], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrHasSideEffects,
ImmArg<ArgIndex<0>>,ImmArg<ArgIndex<1>>]>;
def int_ppc_fmsub
- : GCCBuiltin<"__builtin_ppc_fmsub">,
+ : ClangBuiltin<"__builtin_ppc_fmsub">,
Intrinsic <[llvm_double_ty],
[llvm_double_ty, llvm_double_ty, llvm_double_ty],
[IntrNoMem]>;
def int_ppc_fmsubs
- : GCCBuiltin<"__builtin_ppc_fmsubs">,
+ : ClangBuiltin<"__builtin_ppc_fmsubs">,
Intrinsic <[llvm_float_ty],
[llvm_float_ty, llvm_float_ty, llvm_float_ty],
[IntrNoMem]>;
def int_ppc_fnmadd
- : GCCBuiltin<"__builtin_ppc_fnmadd">,
+ : ClangBuiltin<"__builtin_ppc_fnmadd">,
Intrinsic <[llvm_double_ty],
[llvm_double_ty, llvm_double_ty, llvm_double_ty],
[IntrNoMem]>;
def int_ppc_fnmadds
- : GCCBuiltin<"__builtin_ppc_fnmadds">,
+ : ClangBuiltin<"__builtin_ppc_fnmadds">,
Intrinsic <[llvm_float_ty],
[llvm_float_ty, llvm_float_ty, llvm_float_ty],
[IntrNoMem]>;
def int_ppc_fnmsub
- : GCCBuiltin<"__builtin_ppc_fnmsub">,
- Intrinsic <[llvm_double_ty],
- [llvm_double_ty, llvm_double_ty, llvm_double_ty],
- [IntrNoMem]>;
- def int_ppc_fnmsubs
- : GCCBuiltin<"__builtin_ppc_fnmsubs">,
- Intrinsic <[llvm_float_ty],
- [llvm_float_ty, llvm_float_ty, llvm_float_ty],
- [IntrNoMem]>;
+ : Intrinsic<[llvm_anyfloat_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem]>;
def int_ppc_fre
- : GCCBuiltin<"__builtin_ppc_fre">,
+ : ClangBuiltin<"__builtin_ppc_fre">,
Intrinsic <[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
def int_ppc_fres
- : GCCBuiltin<"__builtin_ppc_fres">,
+ : ClangBuiltin<"__builtin_ppc_fres">,
Intrinsic <[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_ppc_addex
- : GCCBuiltin<"__builtin_ppc_addex">,
+ : ClangBuiltin<"__builtin_ppc_addex">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty],
[IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<2>>]>;
- def int_ppc_fsel : GCCBuiltin<"__builtin_ppc_fsel">,
+ def int_ppc_fsel : ClangBuiltin<"__builtin_ppc_fsel">,
Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty,
llvm_double_ty], [IntrNoMem]>;
- def int_ppc_fsels : GCCBuiltin<"__builtin_ppc_fsels">,
+ def int_ppc_fsels : ClangBuiltin<"__builtin_ppc_fsels">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty,
llvm_float_ty], [IntrNoMem]>;
- def int_ppc_frsqrte : GCCBuiltin<"__builtin_ppc_frsqrte">,
+ def int_ppc_frsqrte : ClangBuiltin<"__builtin_ppc_frsqrte">,
Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
- def int_ppc_frsqrtes : GCCBuiltin<"__builtin_ppc_frsqrtes">,
+ def int_ppc_frsqrtes : ClangBuiltin<"__builtin_ppc_frsqrtes">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_ppc_compare_exp_uo : GCCBuiltin<"__builtin_ppc_compare_exp_uo">,
+ def int_ppc_compare_exp_uo : ClangBuiltin<"__builtin_ppc_compare_exp_uo">,
Intrinsic<[llvm_i32_ty],
[llvm_double_ty, llvm_double_ty],
[IntrNoMem]>;
- def int_ppc_compare_exp_lt : GCCBuiltin<"__builtin_ppc_compare_exp_lt">,
+ def int_ppc_compare_exp_lt : ClangBuiltin<"__builtin_ppc_compare_exp_lt">,
Intrinsic<[llvm_i32_ty],
[llvm_double_ty, llvm_double_ty],
[IntrNoMem]>;
- def int_ppc_compare_exp_gt : GCCBuiltin<"__builtin_ppc_compare_exp_gt">,
+ def int_ppc_compare_exp_gt : ClangBuiltin<"__builtin_ppc_compare_exp_gt">,
Intrinsic<[llvm_i32_ty],
[llvm_double_ty, llvm_double_ty],
[IntrNoMem]>;
- def int_ppc_compare_exp_eq : GCCBuiltin<"__builtin_ppc_compare_exp_eq">,
+ def int_ppc_compare_exp_eq : ClangBuiltin<"__builtin_ppc_compare_exp_eq">,
Intrinsic<[llvm_i32_ty],
[llvm_double_ty, llvm_double_ty],
[IntrNoMem]>;
@@ -1773,6 +1805,12 @@ let TargetPrefix = "ppc" in {
def int_ppc_test_data_class_f : Intrinsic<[llvm_i32_ty],
[llvm_float_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
+ def int_ppc_fnabs
+ : ClangBuiltin<"__builtin_ppc_fnabs">,
+ Intrinsic <[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ def int_ppc_fnabss
+ : ClangBuiltin<"__builtin_ppc_fnabss">,
+ Intrinsic <[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_ppc_convert_f128_to_ppcf128
: Intrinsic<[llvm_ppcf128_ty], [llvm_f128_ty], [IntrNoMem]>;
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index 6780436bd701..098ca1bc6cfb 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -140,7 +140,7 @@ let TargetPrefix = "riscv" in {
// Vectors
// The intrinsic does not have any operand that must be extended.
-defvar NoSplatOperand = 0xF;
+defvar NoScalarOperand = 0xF;
// The intrinsic does not have a VL operand.
// (e.g., riscv_vmv_x_s and riscv_vfmv_f_s)
@@ -150,7 +150,7 @@ class RISCVVIntrinsic {
// These intrinsics may accept illegal integer values in their llvm_any_ty
// operand, so they have to be extended.
Intrinsic IntrinsicID = !cast<Intrinsic>(NAME);
- bits<4> SplatOperand = NoSplatOperand;
+ bits<4> ScalarOperand = NoScalarOperand;
bits<5> VLOperand = NoVLOperand;
}
@@ -219,8 +219,8 @@ let TargetPrefix = "riscv" in {
let VLOperand = 2;
}
// For unit stride load with mask
- // Input: (maskedoff, pointer, mask, vl, ta)
- class RISCVUSLoadMask
+ // Input: (maskedoff, pointer, mask, vl, policy)
+ class RISCVUSLoadMasked
: Intrinsic<[llvm_anyvector_ty ],
[LLVMMatchType<0>,
LLVMPointerType<LLVMMatchType<0>>,
@@ -231,11 +231,11 @@ let TargetPrefix = "riscv" in {
let VLOperand = 3;
}
// For unit stride fault-only-first load with mask
- // Input: (maskedoff, pointer, mask, vl, ta)
+ // Input: (maskedoff, pointer, mask, vl, policy)
// Output: (data, vl)
// NOTE: We model this with default memory properties since we model writing
// VL as a side effect. IntrReadMem, IntrHasSideEffects does not work.
- class RISCVUSLoadFFMask
+ class RISCVUSLoadFFMasked
: Intrinsic<[llvm_anyvector_ty, llvm_anyint_ty],
[LLVMMatchType<0>,
LLVMPointerType<LLVMMatchType<0>>,
@@ -255,8 +255,8 @@ let TargetPrefix = "riscv" in {
let VLOperand = 3;
}
// For strided load with mask
- // Input: (maskedoff, pointer, stride, mask, vl, ta)
- class RISCVSLoadMask
+ // Input: (maskedoff, pointer, stride, mask, vl, policy)
+ class RISCVSLoadMasked
: Intrinsic<[llvm_anyvector_ty ],
[LLVMMatchType<0>,
LLVMPointerType<LLVMMatchType<0>>, llvm_anyint_ty,
@@ -277,8 +277,8 @@ let TargetPrefix = "riscv" in {
let VLOperand = 3;
}
// For indexed load with mask
- // Input: (maskedoff, pointer, index, mask, vl, ta)
- class RISCVILoadMask
+ // Input: (maskedoff, pointer, index, mask, vl, policy)
+ class RISCVILoadMasked
: Intrinsic<[llvm_anyvector_ty ],
[LLVMMatchType<0>,
LLVMPointerType<LLVMMatchType<0>>, llvm_anyvector_ty,
@@ -300,7 +300,7 @@ let TargetPrefix = "riscv" in {
}
// For unit stride store with mask
// Input: (vector_in, pointer, mask, vl)
- class RISCVUSStoreMask
+ class RISCVUSStoreMasked
: Intrinsic<[],
[llvm_anyvector_ty,
LLVMPointerType<LLVMMatchType<0>>,
@@ -321,7 +321,7 @@ let TargetPrefix = "riscv" in {
}
// For stride store with mask
// Input: (vector_in, pointer, stirde, mask, vl)
- class RISCVSStoreMask
+ class RISCVSStoreMasked
: Intrinsic<[],
[llvm_anyvector_ty,
LLVMPointerType<LLVMMatchType<0>>, llvm_anyint_ty,
@@ -341,7 +341,7 @@ let TargetPrefix = "riscv" in {
}
// For indexed store with mask
// Input: (vector_in, pointer, index, mask, vl)
- class RISCVIStoreMask
+ class RISCVIStoreMasked
: Intrinsic<[],
[llvm_anyvector_ty,
LLVMPointerType<LLVMMatchType<0>>, llvm_anyvector_ty,
@@ -350,16 +350,16 @@ let TargetPrefix = "riscv" in {
let VLOperand = 4;
}
// For destination vector type is the same as source vector.
- // Input: (vector_in, vl)
- class RISCVUnaryAANoMask
+ // Input: (passthru, vector_in, vl)
+ class RISCVUnaryAAUnMasked
: Intrinsic<[llvm_anyvector_ty],
- [LLVMMatchType<0>, llvm_anyint_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyint_ty],
[IntrNoMem]>, RISCVVIntrinsic {
- let VLOperand = 1;
+ let VLOperand = 2;
}
// For destination vector type is the same as first source vector (with mask).
- // Input: (vector_in, mask, vl, ta)
- class RISCVUnaryAAMask
+ // Input: (vector_in, vector_in, mask, vl, policy)
+ class RISCVUnaryAAMasked
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
@@ -367,7 +367,8 @@ let TargetPrefix = "riscv" in {
[ImmArg<ArgIndex<4>>, IntrNoMem]>, RISCVVIntrinsic {
let VLOperand = 3;
}
- class RISCVUnaryAAMaskNoTA
+ // Input: (passthru, vector_in, vector_in, mask, vl)
+ class RISCVCompress
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
@@ -376,23 +377,24 @@ let TargetPrefix = "riscv" in {
}
// For destination vector type is the same as first and second source vector.
// Input: (vector_in, vector_in, vl)
- class RISCVBinaryAAANoMask
+ class RISCVBinaryAAAUnMasked
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyint_ty],
[IntrNoMem]>, RISCVVIntrinsic {
let VLOperand = 2;
}
// For destination vector type is the same as first and second source vector.
- // Input: (vector_in, int_vector_in, vl)
- class RISCVRGatherVVNoMask
+ // Input: (passthru, vector_in, int_vector_in, vl)
+ class RISCVRGatherVVUnMasked
: Intrinsic<[llvm_anyvector_ty],
- [LLVMMatchType<0>, LLVMVectorOfBitcastsToInt<0>, llvm_anyint_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>,
+ LLVMVectorOfBitcastsToInt<0>, llvm_anyint_ty],
[IntrNoMem]>, RISCVVIntrinsic {
- let VLOperand = 2;
+ let VLOperand = 3;
}
// For destination vector type is the same as first and second source vector.
- // Input: (vector_in, vector_in, int_vector_in, vl, ta)
- class RISCVRGatherVVMask
+ // Input: (vector_in, vector_in, int_vector_in, vl, policy)
+ class RISCVRGatherVVMasked
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMVectorOfBitcastsToInt<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
@@ -400,17 +402,18 @@ let TargetPrefix = "riscv" in {
[ImmArg<ArgIndex<5>>, IntrNoMem]>, RISCVVIntrinsic {
let VLOperand = 4;
}
- // Input: (vector_in, int16_vector_in, vl)
- class RISCVRGatherEI16VVNoMask
+ // Input: (passthru, vector_in, int16_vector_in, vl)
+ class RISCVRGatherEI16VVUnMasked
: Intrinsic<[llvm_anyvector_ty],
- [LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i16_ty>,
+ [LLVMMatchType<0>, LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i16_ty>,
llvm_anyint_ty],
[IntrNoMem]>, RISCVVIntrinsic {
- let VLOperand = 2;
+ let VLOperand = 3;
}
// For destination vector type is the same as first and second source vector.
- // Input: (vector_in, vector_in, int16_vector_in, vl, ta)
- class RISCVRGatherEI16VVMask
+ // Input: (vector_in, vector_in, int16_vector_in, vl, policy)
+ class RISCVRGatherEI16VVMasked
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i16_ty>,
@@ -421,17 +424,18 @@ let TargetPrefix = "riscv" in {
}
// For destination vector type is the same as first source vector, and the
// second operand is XLen.
- // Input: (vector_in, xlen_in, vl)
- class RISCVGatherVXNoMask
+ // Input: (passthru, vector_in, xlen_in, vl)
+ class RISCVGatherVXUnMasked
: Intrinsic<[llvm_anyvector_ty],
- [LLVMMatchType<0>, llvm_anyint_ty, LLVMMatchType<1>],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyint_ty,
+ LLVMMatchType<1>],
[IntrNoMem]>, RISCVVIntrinsic {
- let VLOperand = 2;
+ let VLOperand = 3;
}
// For destination vector type is the same as first source vector (with mask).
// Second operand is XLen.
- // Input: (maskedoff, vector_in, xlen_in, mask, vl, ta)
- class RISCVGatherVXMask
+ // Input: (maskedoff, vector_in, xlen_in, mask, vl, policy)
+ class RISCVGatherVXMasked
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyint_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<1>,
@@ -440,38 +444,40 @@ let TargetPrefix = "riscv" in {
let VLOperand = 4;
}
// For destination vector type is the same as first source vector.
- // Input: (vector_in, vector_in/scalar_in, vl)
- class RISCVBinaryAAXNoMask
+ // Input: (passthru, vector_in, vector_in/scalar_in, vl)
+ class RISCVBinaryAAXUnMasked
: Intrinsic<[llvm_anyvector_ty],
- [LLVMMatchType<0>, llvm_any_ty, llvm_anyint_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty,
+ llvm_anyint_ty],
[IntrNoMem]>, RISCVVIntrinsic {
- let SplatOperand = 1;
- let VLOperand = 2;
+ let ScalarOperand = 2;
+ let VLOperand = 3;
}
// For destination vector type is the same as first source vector (with mask).
- // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta)
- class RISCVBinaryAAXMask
+ // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, policy)
+ class RISCVBinaryAAXMasked
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
LLVMMatchType<2>],
[ImmArg<ArgIndex<5>>, IntrNoMem]>, RISCVVIntrinsic {
- let SplatOperand = 2;
+ let ScalarOperand = 2;
let VLOperand = 4;
}
// For destination vector type is the same as first source vector. The
// second source operand must match the destination type or be an XLen scalar.
- // Input: (vector_in, vector_in/scalar_in, vl)
- class RISCVBinaryAAShiftNoMask
+ // Input: (passthru, vector_in, vector_in/scalar_in, vl)
+ class RISCVBinaryAAShiftUnMasked
: Intrinsic<[llvm_anyvector_ty],
- [LLVMMatchType<0>, llvm_any_ty, llvm_anyint_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty,
+ llvm_anyint_ty],
[IntrNoMem]>, RISCVVIntrinsic {
- let VLOperand = 2;
+ let VLOperand = 3;
}
// For destination vector type is the same as first source vector (with mask).
// The second source operand must match the destination type or be an XLen scalar.
- // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta)
- class RISCVBinaryAAShiftMask
+ // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, policy)
+ class RISCVBinaryAAShiftMasked
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
@@ -480,38 +486,40 @@ let TargetPrefix = "riscv" in {
let VLOperand = 4;
}
// For destination vector type is NOT the same as first source vector.
- // Input: (vector_in, vector_in/scalar_in, vl)
- class RISCVBinaryABXNoMask
+ // Input: (passthru, vector_in, vector_in/scalar_in, vl)
+ class RISCVBinaryABXUnMasked
: Intrinsic<[llvm_anyvector_ty],
- [llvm_anyvector_ty, llvm_any_ty, llvm_anyint_ty],
+ [LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty,
+ llvm_anyint_ty],
[IntrNoMem]>, RISCVVIntrinsic {
- let SplatOperand = 1;
- let VLOperand = 2;
+ let ScalarOperand = 2;
+ let VLOperand = 3;
}
// For destination vector type is NOT the same as first source vector (with mask).
- // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta)
- class RISCVBinaryABXMask
+ // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, policy)
+ class RISCVBinaryABXMasked
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
LLVMMatchType<3>],
[ImmArg<ArgIndex<5>>, IntrNoMem]>, RISCVVIntrinsic {
- let SplatOperand = 2;
+ let ScalarOperand = 2;
let VLOperand = 4;
}
// For destination vector type is NOT the same as first source vector. The
// second source operand must match the destination type or be an XLen scalar.
- // Input: (vector_in, vector_in/scalar_in, vl)
- class RISCVBinaryABShiftNoMask
+ // Input: (passthru, vector_in, vector_in/scalar_in, vl)
+ class RISCVBinaryABShiftUnMasked
: Intrinsic<[llvm_anyvector_ty],
- [llvm_anyvector_ty, llvm_any_ty, llvm_anyint_ty],
+ [LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty,
+ llvm_anyint_ty],
[IntrNoMem]>, RISCVVIntrinsic {
- let VLOperand = 2;
+ let VLOperand = 3;
}
// For destination vector type is NOT the same as first source vector (with mask).
// The second source operand must match the destination type or be an XLen scalar.
- // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta)
- class RISCVBinaryABShiftMask
+ // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, policy)
+ class RISCVBinaryABShiftMasked
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
@@ -520,15 +528,15 @@ let TargetPrefix = "riscv" in {
let VLOperand = 4;
}
// For binary operations with V0 as input.
- // Input: (vector_in, vector_in/scalar_in, V0, vl)
+ // Input: (passthru, vector_in, vector_in/scalar_in, V0, vl)
class RISCVBinaryWithV0
: Intrinsic<[llvm_anyvector_ty],
- [LLVMMatchType<0>, llvm_any_ty,
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_anyint_ty],
[IntrNoMem]>, RISCVVIntrinsic {
- let SplatOperand = 1;
- let VLOperand = 3;
+ let ScalarOperand = 2;
+ let VLOperand = 4;
}
// For binary operations with mask type output and V0 as input.
// Output: (mask type output)
@@ -539,7 +547,7 @@ let TargetPrefix = "riscv" in {
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_anyint_ty],
[IntrNoMem]>, RISCVVIntrinsic {
- let SplatOperand = 1;
+ let ScalarOperand = 1;
let VLOperand = 3;
}
// For binary operations with mask type output.
@@ -549,87 +557,91 @@ let TargetPrefix = "riscv" in {
: Intrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
[llvm_anyvector_ty, llvm_any_ty, llvm_anyint_ty],
[IntrNoMem]>, RISCVVIntrinsic {
- let SplatOperand = 1;
+ let ScalarOperand = 1;
let VLOperand = 2;
}
// For binary operations with mask type output without mask.
// Output: (mask type output)
// Input: (vector_in, vector_in/scalar_in, vl)
- class RISCVCompareNoMask
+ class RISCVCompareUnMasked
: Intrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
[llvm_anyvector_ty, llvm_any_ty, llvm_anyint_ty],
[IntrNoMem]>, RISCVVIntrinsic {
- let SplatOperand = 1;
+ let ScalarOperand = 1;
let VLOperand = 2;
}
// For binary operations with mask type output with mask.
// Output: (mask type output)
// Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl)
- class RISCVCompareMask
+ class RISCVCompareMasked
: Intrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_anyvector_ty, llvm_any_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
[IntrNoMem]>, RISCVVIntrinsic {
- let SplatOperand = 2;
+ let ScalarOperand = 2;
let VLOperand = 4;
}
// For FP classify operations.
// Output: (bit mask type output)
- // Input: (vector_in, vl)
- class RISCVClassifyNoMask
+ // Input: (passthru, vector_in, vl)
+ class RISCVClassifyUnMasked
: Intrinsic<[LLVMVectorOfBitcastsToInt<0>],
- [llvm_anyvector_ty, llvm_anyint_ty],
+ [LLVMVectorOfBitcastsToInt<0>, llvm_anyvector_ty,
+ llvm_anyint_ty],
[IntrNoMem]>, RISCVVIntrinsic {
let VLOperand = 1;
}
// For FP classify operations with mask.
// Output: (bit mask type output)
- // Input: (maskedoff, vector_in, mask, vl)
- class RISCVClassifyMask
+ // Input: (maskedoff, vector_in, mask, vl, policy)
+ class RISCVClassifyMasked
: Intrinsic<[LLVMVectorOfBitcastsToInt<0>],
[LLVMVectorOfBitcastsToInt<0>, llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
- [IntrNoMem]>, RISCVVIntrinsic {
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_anyint_ty, LLVMMatchType<1>],
+ [IntrNoMem, ImmArg<ArgIndex<4>>]>, RISCVVIntrinsic {
let VLOperand = 3;
}
// For Saturating binary operations.
// The destination vector type is the same as first source vector.
- // Input: (vector_in, vector_in/scalar_in, vl)
- class RISCVSaturatingBinaryAAXNoMask
+ // Input: (passthru, vector_in, vector_in/scalar_in, vl)
+ class RISCVSaturatingBinaryAAXUnMasked
: Intrinsic<[llvm_anyvector_ty],
- [LLVMMatchType<0>, llvm_any_ty, llvm_anyint_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty,
+ llvm_anyint_ty],
[IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic {
- let SplatOperand = 1;
- let VLOperand = 2;
+ let ScalarOperand = 2;
+ let VLOperand = 3;
}
// For Saturating binary operations with mask.
// The destination vector type is the same as first source vector.
- // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta)
- class RISCVSaturatingBinaryAAXMask
+ // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, policy)
+ class RISCVSaturatingBinaryAAXMasked
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
LLVMMatchType<2>],
[ImmArg<ArgIndex<5>>, IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic {
- let SplatOperand = 2;
+ let ScalarOperand = 2;
let VLOperand = 4;
}
// For Saturating binary operations.
// The destination vector type is the same as first source vector.
// The second source operand matches the destination type or is an XLen scalar.
- // Input: (vector_in, vector_in/scalar_in, vl)
- class RISCVSaturatingBinaryAAShiftNoMask
+ // Input: (passthru, vector_in, vector_in/scalar_in, vl)
+ class RISCVSaturatingBinaryAAShiftUnMasked
: Intrinsic<[llvm_anyvector_ty],
- [LLVMMatchType<0>, llvm_any_ty, llvm_anyint_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty,
+ llvm_anyint_ty],
[IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic {
- let VLOperand = 2;
+ let VLOperand = 3;
}
// For Saturating binary operations with mask.
// The destination vector type is the same as first source vector.
// The second source operand matches the destination type or is an XLen scalar.
- // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta)
- class RISCVSaturatingBinaryAAShiftMask
+ // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, policy)
+ class RISCVSaturatingBinaryAAShiftMasked
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
@@ -640,18 +652,19 @@ let TargetPrefix = "riscv" in {
// For Saturating binary operations.
// The destination vector type is NOT the same as first source vector.
// The second source operand matches the destination type or is an XLen scalar.
- // Input: (vector_in, vector_in/scalar_in, vl)
- class RISCVSaturatingBinaryABShiftNoMask
+ // Input: (passthru, vector_in, vector_in/scalar_in, vl)
+ class RISCVSaturatingBinaryABShiftUnMasked
: Intrinsic<[llvm_anyvector_ty],
- [llvm_anyvector_ty, llvm_any_ty, llvm_anyint_ty],
+ [LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty,
+ llvm_anyint_ty],
[IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic {
- let VLOperand = 2;
+ let VLOperand = 3;
}
// For Saturating binary operations with mask.
// The destination vector type is NOT the same as first source vector (with mask).
// The second source operand matches the destination type or is an XLen scalar.
- // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta)
- class RISCVSaturatingBinaryABShiftMask
+ // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, policy)
+ class RISCVSaturatingBinaryABShiftMasked
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
@@ -659,56 +672,69 @@ let TargetPrefix = "riscv" in {
[ImmArg<ArgIndex<5>>, IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic {
let VLOperand = 4;
}
- class RISCVTernaryAAAXNoMask
+ // Input: (vector_in, vector_in, scalar_in, vl, policy)
+ class RVVSlideUnMasked
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyint_ty,
- LLVMMatchType<1>],
- [IntrNoMem]>, RISCVVIntrinsic {
+ LLVMMatchType<1>, LLVMMatchType<1>],
+ [ImmArg<ArgIndex<4>>, IntrNoMem]>, RISCVVIntrinsic {
let VLOperand = 3;
}
- class RISCVTernaryAAAXMask
+ // Input: (vector_in, vector_in, vector_in/scalar_in, mask, vl, policy)
+ class RVVSlideMasked
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyint_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<1>],
- [IntrNoMem]>, RISCVVIntrinsic {
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMMatchType<1>, LLVMMatchType<1>],
+ [ImmArg<ArgIndex<5>>, IntrNoMem]>, RISCVVIntrinsic {
let VLOperand = 4;
}
- class RISCVTernaryAAXANoMask
+ // UnMasked Vector Multiply-Add operations, its first operand can not be undef.
+ // Input: (vector_in, vector_in/scalar, vector_in, vl, policy)
+ class RISCVTernaryAAXAUnMasked
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_any_ty, LLVMMatchType<0>,
- llvm_anyint_ty],
- [IntrNoMem]>, RISCVVIntrinsic {
- let SplatOperand = 1;
+ llvm_anyint_ty, LLVMMatchType<2>],
+ [ImmArg<ArgIndex<4>>, IntrNoMem]>, RISCVVIntrinsic {
+ let ScalarOperand = 1;
let VLOperand = 3;
}
- class RISCVTernaryAAXAMask
+ // Masked Vector Multiply-Add operations, its first operand can not be undef.
+ // Input: (vector_in, vector_in/scalar, vector_in, mask, vl, policy
+ class RISCVTernaryAAXAMasked
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_any_ty, LLVMMatchType<0>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
- [IntrNoMem]>, RISCVVIntrinsic {
- let SplatOperand = 1;
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_anyint_ty, LLVMMatchType<2>],
+ [ImmArg<ArgIndex<5>>, IntrNoMem]>, RISCVVIntrinsic {
+ let ScalarOperand = 1;
let VLOperand = 4;
}
- class RISCVTernaryWideNoMask
+ // UnMasked Widening Vector Multiply-Add operations, its first operand can not be undef.
+ // Input: (vector_in, vector_in/scalar, vector_in, vl, policy)
+ class RISCVTernaryWideUnMasked
: Intrinsic< [llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_any_ty, llvm_anyvector_ty,
- llvm_anyint_ty],
- [IntrNoMem] >, RISCVVIntrinsic {
- let SplatOperand = 1;
+ llvm_anyint_ty, LLVMMatchType<3>],
+ [ImmArg<ArgIndex<4>>, IntrNoMem] >, RISCVVIntrinsic {
+ let ScalarOperand = 1;
let VLOperand = 3;
}
- class RISCVTernaryWideMask
+ // Masked Widening Vector Multiply-Add operations, its first operand can not be undef.
+ // Input: (vector_in, vector_in/scalar, vector_in, mask, vl, policy
+ class RISCVTernaryWideMasked
: Intrinsic< [llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_any_ty, llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
- [IntrNoMem]>, RISCVVIntrinsic {
- let SplatOperand = 1;
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_anyint_ty, LLVMMatchType<3>],
+ [ImmArg<ArgIndex<5>>, IntrNoMem]>, RISCVVIntrinsic {
+ let ScalarOperand = 1;
let VLOperand = 4;
}
// For Reduction ternary operations.
// For destination vector type is the same as first and third source vector.
// Input: (vector_in, vector_in, vector_in, vl)
- class RISCVReductionNoMask
+ class RISCVReductionUnMasked
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>,
llvm_anyint_ty],
@@ -719,7 +745,7 @@ let TargetPrefix = "riscv" in {
// For destination vector type is the same as first and third source vector.
// The mask type come from second source vector.
// Input: (maskedoff, vector_in, vector_in, vector_in, mask, vl)
- class RISCVReductionMask
+ class RISCVReductionMasked
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<1, llvm_i1_ty>, llvm_anyint_ty],
@@ -729,7 +755,7 @@ let TargetPrefix = "riscv" in {
// For unary operations with scalar type output without mask
// Output: (scalar type)
// Input: (vector_in, vl)
- class RISCVMaskUnarySOutNoMask
+ class RISCVMaskedUnarySOutUnMasked
: Intrinsic<[LLVMMatchType<1>],
[llvm_anyvector_ty, llvm_anyint_ty],
[IntrNoMem]>, RISCVVIntrinsic {
@@ -738,23 +764,23 @@ let TargetPrefix = "riscv" in {
// For unary operations with scalar type output with mask
// Output: (scalar type)
// Input: (vector_in, mask, vl)
- class RISCVMaskUnarySOutMask
+ class RISCVMaskedUnarySOutMasked
: Intrinsic<[LLVMMatchType<1>],
[llvm_anyvector_ty, LLVMMatchType<0>, llvm_anyint_ty],
[IntrNoMem]>, RISCVVIntrinsic {
let VLOperand = 2;
}
// For destination vector type is NOT the same as source vector.
- // Input: (vector_in, vl)
- class RISCVUnaryABNoMask
+ // Input: (passthru, vector_in, vl)
+ class RISCVUnaryABUnMasked
: Intrinsic<[llvm_anyvector_ty],
- [llvm_anyvector_ty, llvm_anyint_ty],
+ [LLVMMatchType<0>, llvm_anyvector_ty, llvm_anyint_ty],
[IntrNoMem]>, RISCVVIntrinsic {
- let VLOperand = 1;
+ let VLOperand = 2;
}
// For destination vector type is NOT the same as source vector (with mask).
- // Input: (maskedoff, vector_in, mask, vl, ta)
- class RISCVUnaryABMask
+ // Input: (maskedoff, vector_in, mask, vl, policy)
+ class RISCVUnaryABMasked
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_anyvector_ty,
LLVMScalarOrSameVectorWidth<1, llvm_i1_ty>,
@@ -765,7 +791,7 @@ let TargetPrefix = "riscv" in {
// For unary operations with the same vector type in/out without mask
// Output: (vector)
// Input: (vector_in, vl)
- class RISCVUnaryNoMask
+ class RISCVUnaryUnMasked
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_anyint_ty],
[IntrNoMem]>, RISCVVIntrinsic {
@@ -774,7 +800,7 @@ let TargetPrefix = "riscv" in {
// For mask unary operations with mask type in/out with mask
// Output: (mask type output)
// Input: (mask type maskedoff, mask type vector_in, mask, vl)
- class RISCVMaskUnaryMOutMask
+ class RISCVMaskedUnaryMOutMasked
: Intrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>,
LLVMMatchType<0>, llvm_anyint_ty],
@@ -785,21 +811,28 @@ let TargetPrefix = "riscv" in {
// Input: (vl)
class RISCVNullaryIntrinsic
: Intrinsic<[llvm_anyvector_ty],
- [llvm_anyint_ty],
+ [llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic {
+ let VLOperand = 1;
+ }
+ // Output: (vector)
+ // Input: (passthru, vl)
+ class RISCVID
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_anyint_ty],
[IntrNoMem]>, RISCVVIntrinsic {
- let VLOperand = 0;
+ let VLOperand = 1;
}
// For Conversion unary operations.
- // Input: (vector_in, vl)
- class RISCVConversionNoMask
+ // Input: (passthru, vector_in, vl)
+ class RISCVConversionUnMasked
: Intrinsic<[llvm_anyvector_ty],
- [llvm_anyvector_ty, llvm_anyint_ty],
+ [LLVMMatchType<0>, llvm_anyvector_ty, llvm_anyint_ty],
[IntrNoMem]>, RISCVVIntrinsic {
- let VLOperand = 1;
+ let VLOperand = 2;
}
// For Conversion unary operations with mask.
- // Input: (maskedoff, vector_in, mask, vl, ta)
- class RISCVConversionMask
+ // Input: (maskedoff, vector_in, mask, vl, policy)
+ class RISCVConversionMasked
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_anyvector_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
@@ -809,17 +842,18 @@ let TargetPrefix = "riscv" in {
}
// For unit stride segment load
- // Input: (pointer, vl)
+ // Input: (passthru, pointer, vl)
class RISCVUSSegLoad<int nf>
: Intrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>,
!add(nf, -1))),
- [LLVMPointerToElt<0>, llvm_anyint_ty],
- [NoCapture<ArgIndex<0>>, IntrReadMem]>, RISCVVIntrinsic {
- let VLOperand = 1;
+ !listconcat(!listsplat(LLVMMatchType<0>, nf),
+ [LLVMPointerToElt<0>, llvm_anyint_ty]),
+ [NoCapture<ArgIndex<nf>>, IntrReadMem]>, RISCVVIntrinsic {
+ let VLOperand = !add(nf, 1);
}
// For unit stride segment load with mask
- // Input: (maskedoff, pointer, mask, vl, ta)
- class RISCVUSSegLoadMask<int nf>
+ // Input: (maskedoff, pointer, mask, vl, policy)
+ class RISCVUSSegLoadMasked<int nf>
: Intrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>,
!add(nf, -1))),
!listconcat(!listsplat(LLVMMatchType<0>, nf),
@@ -832,23 +866,24 @@ let TargetPrefix = "riscv" in {
}
// For unit stride fault-only-first segment load
- // Input: (pointer, vl)
+ // Input: (passthru, pointer, vl)
// Output: (data, vl)
// NOTE: We model this with default memory properties since we model writing
// VL as a side effect. IntrReadMem, IntrHasSideEffects does not work.
class RISCVUSSegLoadFF<int nf>
: Intrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>,
!add(nf, -1)), [llvm_anyint_ty]),
- [LLVMPointerToElt<0>, LLVMMatchType<1>],
- [NoCapture<ArgIndex<0>>]>, RISCVVIntrinsic {
- let VLOperand = 1;
+ !listconcat(!listsplat(LLVMMatchType<0>, nf),
+ [LLVMPointerToElt<0>, LLVMMatchType<1>]),
+ [NoCapture<ArgIndex<nf>>]>, RISCVVIntrinsic {
+ let VLOperand = !add(nf, 1);
}
// For unit stride fault-only-first segment load with mask
- // Input: (maskedoff, pointer, mask, vl, ta)
+ // Input: (maskedoff, pointer, mask, vl, policy)
// Output: (data, vl)
// NOTE: We model this with default memory properties since we model writing
// VL as a side effect. IntrReadMem, IntrHasSideEffects does not work.
- class RISCVUSSegLoadFFMask<int nf>
+ class RISCVUSSegLoadFFMasked<int nf>
: Intrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>,
!add(nf, -1)), [llvm_anyint_ty]),
!listconcat(!listsplat(LLVMMatchType<0>, nf),
@@ -861,17 +896,18 @@ let TargetPrefix = "riscv" in {
}
// For stride segment load
- // Input: (pointer, offset, vl)
+ // Input: (passthru, pointer, offset, vl)
class RISCVSSegLoad<int nf>
: Intrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>,
!add(nf, -1))),
- [LLVMPointerToElt<0>, llvm_anyint_ty, LLVMMatchType<1>],
- [NoCapture<ArgIndex<0>>, IntrReadMem]>, RISCVVIntrinsic {
- let VLOperand = 2;
+ !listconcat(!listsplat(LLVMMatchType<0>, nf),
+ [LLVMPointerToElt<0>, llvm_anyint_ty, LLVMMatchType<1>]),
+ [NoCapture<ArgIndex<nf>>, IntrReadMem]>, RISCVVIntrinsic {
+ let VLOperand = !add(nf, 2);
}
// For stride segment load with mask
- // Input: (maskedoff, pointer, offset, mask, vl, ta)
- class RISCVSSegLoadMask<int nf>
+ // Input: (maskedoff, pointer, offset, mask, vl, policy)
+ class RISCVSSegLoadMasked<int nf>
: Intrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>,
!add(nf, -1))),
!listconcat(!listsplat(LLVMMatchType<0>, nf),
@@ -885,17 +921,18 @@ let TargetPrefix = "riscv" in {
}
// For indexed segment load
- // Input: (pointer, index, vl)
+ // Input: (passthru, pointer, index, vl)
class RISCVISegLoad<int nf>
: Intrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>,
!add(nf, -1))),
- [LLVMPointerToElt<0>, llvm_anyvector_ty, llvm_anyint_ty],
- [NoCapture<ArgIndex<0>>, IntrReadMem]>, RISCVVIntrinsic {
- let VLOperand = 2;
+ !listconcat(!listsplat(LLVMMatchType<0>, nf),
+ [LLVMPointerToElt<0>, llvm_anyvector_ty, llvm_anyint_ty]),
+ [NoCapture<ArgIndex<nf>>, IntrReadMem]>, RISCVVIntrinsic {
+ let VLOperand = !add(nf, 2);
}
// For indexed segment load with mask
- // Input: (maskedoff, pointer, index, mask, vl, ta)
- class RISCVISegLoadMask<int nf>
+ // Input: (maskedoff, pointer, index, mask, vl, policy)
+ class RISCVISegLoadMasked<int nf>
: Intrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>,
!add(nf, -1))),
!listconcat(!listsplat(LLVMMatchType<0>, nf),
@@ -920,7 +957,7 @@ let TargetPrefix = "riscv" in {
}
// For unit stride segment store with mask
// Input: (value, pointer, mask, vl)
- class RISCVUSSegStoreMask<int nf>
+ class RISCVUSSegStoreMasked<int nf>
: Intrinsic<[],
!listconcat([llvm_anyvector_ty],
!listsplat(LLVMMatchType<0>, !add(nf, -1)),
@@ -944,7 +981,7 @@ let TargetPrefix = "riscv" in {
}
// For stride segment store with mask
// Input: (value, pointer, offset, mask, vl)
- class RISCVSSegStoreMask<int nf>
+ class RISCVSSegStoreMasked<int nf>
: Intrinsic<[],
!listconcat([llvm_anyvector_ty],
!listsplat(LLVMMatchType<0>, !add(nf, -1)),
@@ -968,7 +1005,7 @@ let TargetPrefix = "riscv" in {
}
// For indexed segment store with mask
// Input: (value, pointer, offset, mask, vl)
- class RISCVISegStoreMask<int nf>
+ class RISCVISegStoreMasked<int nf>
: Intrinsic<[],
!listconcat([llvm_anyvector_ty],
!listsplat(LLVMMatchType<0>, !add(nf, -1)),
@@ -981,76 +1018,76 @@ let TargetPrefix = "riscv" in {
multiclass RISCVUSLoad {
def "int_riscv_" # NAME : RISCVUSLoad;
- def "int_riscv_" # NAME # "_mask" : RISCVUSLoadMask;
+ def "int_riscv_" # NAME # "_mask" : RISCVUSLoadMasked;
}
multiclass RISCVUSLoadFF {
def "int_riscv_" # NAME : RISCVUSLoadFF;
- def "int_riscv_" # NAME # "_mask" : RISCVUSLoadFFMask;
+ def "int_riscv_" # NAME # "_mask" : RISCVUSLoadFFMasked;
}
multiclass RISCVSLoad {
def "int_riscv_" # NAME : RISCVSLoad;
- def "int_riscv_" # NAME # "_mask" : RISCVSLoadMask;
+ def "int_riscv_" # NAME # "_mask" : RISCVSLoadMasked;
}
multiclass RISCVILoad {
def "int_riscv_" # NAME : RISCVILoad;
- def "int_riscv_" # NAME # "_mask" : RISCVILoadMask;
+ def "int_riscv_" # NAME # "_mask" : RISCVILoadMasked;
}
multiclass RISCVUSStore {
def "int_riscv_" # NAME : RISCVUSStore;
- def "int_riscv_" # NAME # "_mask" : RISCVUSStoreMask;
+ def "int_riscv_" # NAME # "_mask" : RISCVUSStoreMasked;
}
multiclass RISCVSStore {
def "int_riscv_" # NAME : RISCVSStore;
- def "int_riscv_" # NAME # "_mask" : RISCVSStoreMask;
+ def "int_riscv_" # NAME # "_mask" : RISCVSStoreMasked;
}
multiclass RISCVIStore {
def "int_riscv_" # NAME : RISCVIStore;
- def "int_riscv_" # NAME # "_mask" : RISCVIStoreMask;
+ def "int_riscv_" # NAME # "_mask" : RISCVIStoreMasked;
}
multiclass RISCVUnaryAA {
- def "int_riscv_" # NAME : RISCVUnaryAANoMask;
- def "int_riscv_" # NAME # "_mask" : RISCVUnaryAAMask;
+ def "int_riscv_" # NAME : RISCVUnaryAAUnMasked;
+ def "int_riscv_" # NAME # "_mask" : RISCVUnaryAAMasked;
}
multiclass RISCVUnaryAB {
- def "int_riscv_" # NAME : RISCVUnaryABNoMask;
- def "int_riscv_" # NAME # "_mask" : RISCVUnaryABMask;
+ def "int_riscv_" # NAME : RISCVUnaryABUnMasked;
+ def "int_riscv_" # NAME # "_mask" : RISCVUnaryABMasked;
}
// AAX means the destination type(A) is the same as the first source
// type(A). X means any type for the second source operand.
multiclass RISCVBinaryAAX {
- def "int_riscv_" # NAME : RISCVBinaryAAXNoMask;
- def "int_riscv_" # NAME # "_mask" : RISCVBinaryAAXMask;
+ def "int_riscv_" # NAME : RISCVBinaryAAXUnMasked;
+ def "int_riscv_" # NAME # "_mask" : RISCVBinaryAAXMasked;
}
// Like RISCVBinaryAAX, but the second operand is used a shift amount so it
// must be a vector or an XLen scalar.
multiclass RISCVBinaryAAShift {
- def "int_riscv_" # NAME : RISCVBinaryAAShiftNoMask;
- def "int_riscv_" # NAME # "_mask" : RISCVBinaryAAShiftMask;
+ def "int_riscv_" # NAME : RISCVBinaryAAShiftUnMasked;
+ def "int_riscv_" # NAME # "_mask" : RISCVBinaryAAShiftMasked;
}
multiclass RISCVRGatherVV {
- def "int_riscv_" # NAME : RISCVRGatherVVNoMask;
- def "int_riscv_" # NAME # "_mask" : RISCVRGatherVVMask;
+ def "int_riscv_" # NAME : RISCVRGatherVVUnMasked;
+ def "int_riscv_" # NAME # "_mask" : RISCVRGatherVVMasked;
}
multiclass RISCVRGatherVX {
- def "int_riscv_" # NAME : RISCVGatherVXNoMask;
- def "int_riscv_" # NAME # "_mask" : RISCVGatherVXMask;
+ def "int_riscv_" # NAME : RISCVGatherVXUnMasked;
+ def "int_riscv_" # NAME # "_mask" : RISCVGatherVXMasked;
}
multiclass RISCVRGatherEI16VV {
- def "int_riscv_" # NAME : RISCVRGatherEI16VVNoMask;
- def "int_riscv_" # NAME # "_mask" : RISCVRGatherEI16VVMask;
+ def "int_riscv_" # NAME : RISCVRGatherEI16VVUnMasked;
+ def "int_riscv_" # NAME # "_mask" : RISCVRGatherEI16VVMasked;
}
// ABX means the destination type(A) is different from the first source
// type(B). X means any type for the second source operand.
multiclass RISCVBinaryABX {
- def "int_riscv_" # NAME : RISCVBinaryABXNoMask;
- def "int_riscv_" # NAME # "_mask" : RISCVBinaryABXMask;
+ def "int_riscv_" # NAME : RISCVBinaryABXUnMasked;
+ def "int_riscv_" # NAME # "_mask" : RISCVBinaryABXMasked;
}
// Like RISCVBinaryABX, but the second operand is used a shift amount so it
// must be a vector or an XLen scalar.
multiclass RISCVBinaryABShift {
- def "int_riscv_" # NAME : RISCVBinaryABShiftNoMask;
- def "int_riscv_" # NAME # "_mask" : RISCVBinaryABShiftMask;
+ def "int_riscv_" # NAME : RISCVBinaryABShiftUnMasked;
+ def "int_riscv_" # NAME # "_mask" : RISCVBinaryABShiftMasked;
}
multiclass RISCVBinaryWithV0 {
def "int_riscv_" # NAME : RISCVBinaryWithV0;
@@ -1062,80 +1099,80 @@ let TargetPrefix = "riscv" in {
def "int_riscv_" # NAME : RISCVBinaryMOut;
}
multiclass RISCVSaturatingBinaryAAX {
- def "int_riscv_" # NAME : RISCVSaturatingBinaryAAXNoMask;
- def "int_riscv_" # NAME # "_mask" : RISCVSaturatingBinaryAAXMask;
+ def "int_riscv_" # NAME : RISCVSaturatingBinaryAAXUnMasked;
+ def "int_riscv_" # NAME # "_mask" : RISCVSaturatingBinaryAAXMasked;
}
multiclass RISCVSaturatingBinaryAAShift {
- def "int_riscv_" # NAME : RISCVSaturatingBinaryAAShiftNoMask;
- def "int_riscv_" # NAME # "_mask" : RISCVSaturatingBinaryAAShiftMask;
+ def "int_riscv_" # NAME : RISCVSaturatingBinaryAAShiftUnMasked;
+ def "int_riscv_" # NAME # "_mask" : RISCVSaturatingBinaryAAShiftMasked;
}
multiclass RISCVSaturatingBinaryABShift {
- def "int_riscv_" # NAME : RISCVSaturatingBinaryABShiftNoMask;
- def "int_riscv_" # NAME # "_mask" : RISCVSaturatingBinaryABShiftMask;
+ def "int_riscv_" # NAME : RISCVSaturatingBinaryABShiftUnMasked;
+ def "int_riscv_" # NAME # "_mask" : RISCVSaturatingBinaryABShiftMasked;
}
- multiclass RISCVTernaryAAAX {
- def "int_riscv_" # NAME : RISCVTernaryAAAXNoMask;
- def "int_riscv_" # NAME # "_mask" : RISCVTernaryAAAXMask;
+ multiclass RVVSlide {
+ def "int_riscv_" # NAME : RVVSlideUnMasked;
+ def "int_riscv_" # NAME # "_mask" : RVVSlideMasked;
}
multiclass RISCVTernaryAAXA {
- def "int_riscv_" # NAME : RISCVTernaryAAXANoMask;
- def "int_riscv_" # NAME # "_mask" : RISCVTernaryAAXAMask;
+ def "int_riscv_" # NAME : RISCVTernaryAAXAUnMasked;
+ def "int_riscv_" # NAME # "_mask" : RISCVTernaryAAXAMasked;
}
multiclass RISCVCompare {
- def "int_riscv_" # NAME : RISCVCompareNoMask;
- def "int_riscv_" # NAME # "_mask" : RISCVCompareMask;
+ def "int_riscv_" # NAME : RISCVCompareUnMasked;
+ def "int_riscv_" # NAME # "_mask" : RISCVCompareMasked;
}
multiclass RISCVClassify {
- def "int_riscv_" # NAME : RISCVClassifyNoMask;
- def "int_riscv_" # NAME # "_mask" : RISCVClassifyMask;
+ def "int_riscv_" # NAME : RISCVClassifyUnMasked;
+ def "int_riscv_" # NAME # "_mask" : RISCVClassifyMasked;
}
multiclass RISCVTernaryWide {
- def "int_riscv_" # NAME : RISCVTernaryWideNoMask;
- def "int_riscv_" # NAME # "_mask" : RISCVTernaryWideMask;
+ def "int_riscv_" # NAME : RISCVTernaryWideUnMasked;
+ def "int_riscv_" # NAME # "_mask" : RISCVTernaryWideMasked;
}
multiclass RISCVReduction {
- def "int_riscv_" # NAME : RISCVReductionNoMask;
- def "int_riscv_" # NAME # "_mask" : RISCVReductionMask;
+ def "int_riscv_" # NAME : RISCVReductionUnMasked;
+ def "int_riscv_" # NAME # "_mask" : RISCVReductionMasked;
}
- multiclass RISCVMaskUnarySOut {
- def "int_riscv_" # NAME : RISCVMaskUnarySOutNoMask;
- def "int_riscv_" # NAME # "_mask" : RISCVMaskUnarySOutMask;
+ multiclass RISCVMaskedUnarySOut {
+ def "int_riscv_" # NAME : RISCVMaskedUnarySOutUnMasked;
+ def "int_riscv_" # NAME # "_mask" : RISCVMaskedUnarySOutMasked;
}
- multiclass RISCVMaskUnaryMOut {
- def "int_riscv_" # NAME : RISCVUnaryNoMask;
- def "int_riscv_" # NAME # "_mask" : RISCVMaskUnaryMOutMask;
+ multiclass RISCVMaskedUnaryMOut {
+ def "int_riscv_" # NAME : RISCVUnaryUnMasked;
+ def "int_riscv_" # NAME # "_mask" : RISCVMaskedUnaryMOutMasked;
}
multiclass RISCVConversion {
- def "int_riscv_" #NAME :RISCVConversionNoMask;
- def "int_riscv_" # NAME # "_mask" : RISCVConversionMask;
+ def "int_riscv_" #NAME :RISCVConversionUnMasked;
+ def "int_riscv_" # NAME # "_mask" : RISCVConversionMasked;
}
multiclass RISCVUSSegLoad<int nf> {
def "int_riscv_" # NAME : RISCVUSSegLoad<nf>;
- def "int_riscv_" # NAME # "_mask" : RISCVUSSegLoadMask<nf>;
+ def "int_riscv_" # NAME # "_mask" : RISCVUSSegLoadMasked<nf>;
}
multiclass RISCVUSSegLoadFF<int nf> {
def "int_riscv_" # NAME : RISCVUSSegLoadFF<nf>;
- def "int_riscv_" # NAME # "_mask" : RISCVUSSegLoadFFMask<nf>;
+ def "int_riscv_" # NAME # "_mask" : RISCVUSSegLoadFFMasked<nf>;
}
multiclass RISCVSSegLoad<int nf> {
def "int_riscv_" # NAME : RISCVSSegLoad<nf>;
- def "int_riscv_" # NAME # "_mask" : RISCVSSegLoadMask<nf>;
+ def "int_riscv_" # NAME # "_mask" : RISCVSSegLoadMasked<nf>;
}
multiclass RISCVISegLoad<int nf> {
def "int_riscv_" # NAME : RISCVISegLoad<nf>;
- def "int_riscv_" # NAME # "_mask" : RISCVISegLoadMask<nf>;
+ def "int_riscv_" # NAME # "_mask" : RISCVISegLoadMasked<nf>;
}
multiclass RISCVUSSegStore<int nf> {
def "int_riscv_" # NAME : RISCVUSSegStore<nf>;
- def "int_riscv_" # NAME # "_mask" : RISCVUSSegStoreMask<nf>;
+ def "int_riscv_" # NAME # "_mask" : RISCVUSSegStoreMasked<nf>;
}
multiclass RISCVSSegStore<int nf> {
def "int_riscv_" # NAME : RISCVSSegStore<nf>;
- def "int_riscv_" # NAME # "_mask" : RISCVSSegStoreMask<nf>;
+ def "int_riscv_" # NAME # "_mask" : RISCVSSegStoreMasked<nf>;
}
multiclass RISCVISegStore<int nf> {
def "int_riscv_" # NAME : RISCVISegStore<nf>;
- def "int_riscv_" # NAME # "_mask" : RISCVISegStoreMask<nf>;
+ def "int_riscv_" # NAME # "_mask" : RISCVISegStoreMasked<nf>;
}
defm vle : RISCVUSLoad;
@@ -1242,20 +1279,29 @@ let TargetPrefix = "riscv" in {
defm vmerge : RISCVBinaryWithV0;
+ // Output: (vector)
+ // Input: (passthru, vector_in, vl)
def int_riscv_vmv_v_v : Intrinsic<[llvm_anyvector_ty],
- [LLVMMatchType<0>, llvm_anyint_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>,
+ llvm_anyint_ty],
[IntrNoMem]>, RISCVVIntrinsic {
- let VLOperand = 1;
+ let VLOperand = 2;
}
+ // Output: (vector)
+ // Input: (passthru, scalar, vl)
def int_riscv_vmv_v_x : Intrinsic<[llvm_anyint_ty],
- [LLVMVectorElementType<0>, llvm_anyint_ty],
+ [LLVMMatchType<0>, LLVMVectorElementType<0>,
+ llvm_anyint_ty],
[IntrNoMem]>, RISCVVIntrinsic {
- let VLOperand = 1;
+ let VLOperand = 2;
}
+ // Output: (vector)
+ // Input: (passthru, scalar, vl)
def int_riscv_vfmv_v_f : Intrinsic<[llvm_anyfloat_ty],
- [LLVMVectorElementType<0>, llvm_anyint_ty],
+ [LLVMMatchType<0>, LLVMVectorElementType<0>,
+ llvm_anyint_ty],
[IntrNoMem]>, RISCVVIntrinsic {
- let VLOperand = 1;
+ let VLOperand = 2;
}
def int_riscv_vmv_x_s : Intrinsic<[LLVMVectorElementType<0>],
@@ -1313,8 +1359,8 @@ let TargetPrefix = "riscv" in {
defm vfmerge : RISCVBinaryWithV0;
- defm vslideup : RISCVTernaryAAAX;
- defm vslidedown : RISCVTernaryAAAX;
+ defm vslideup : RVVSlide;
+ defm vslidedown : RVVSlide;
defm vslide1up : RISCVBinaryAAX;
defm vslide1down : RISCVBinaryAAX;
@@ -1325,7 +1371,7 @@ let TargetPrefix = "riscv" in {
defm vrgather_vx : RISCVRGatherVX;
defm vrgatherei16_vv : RISCVRGatherEI16VV;
- def "int_riscv_vcompress" : RISCVUnaryAAMaskNoTA;
+ def "int_riscv_vcompress" : RISCVCompress;
defm vaaddu : RISCVSaturatingBinaryAAX;
defm vaadd : RISCVSaturatingBinaryAAX;
@@ -1367,22 +1413,22 @@ let TargetPrefix = "riscv" in {
defm vfwredusum : RISCVReduction;
defm vfwredosum : RISCVReduction;
- def int_riscv_vmand: RISCVBinaryAAANoMask;
- def int_riscv_vmnand: RISCVBinaryAAANoMask;
- def int_riscv_vmandn: RISCVBinaryAAANoMask;
- def int_riscv_vmxor: RISCVBinaryAAANoMask;
- def int_riscv_vmor: RISCVBinaryAAANoMask;
- def int_riscv_vmnor: RISCVBinaryAAANoMask;
- def int_riscv_vmorn: RISCVBinaryAAANoMask;
- def int_riscv_vmxnor: RISCVBinaryAAANoMask;
+ def int_riscv_vmand: RISCVBinaryAAAUnMasked;
+ def int_riscv_vmnand: RISCVBinaryAAAUnMasked;
+ def int_riscv_vmandn: RISCVBinaryAAAUnMasked;
+ def int_riscv_vmxor: RISCVBinaryAAAUnMasked;
+ def int_riscv_vmor: RISCVBinaryAAAUnMasked;
+ def int_riscv_vmnor: RISCVBinaryAAAUnMasked;
+ def int_riscv_vmorn: RISCVBinaryAAAUnMasked;
+ def int_riscv_vmxnor: RISCVBinaryAAAUnMasked;
def int_riscv_vmclr : RISCVNullaryIntrinsic;
def int_riscv_vmset : RISCVNullaryIntrinsic;
- defm vcpop : RISCVMaskUnarySOut;
- defm vfirst : RISCVMaskUnarySOut;
- defm vmsbf : RISCVMaskUnaryMOut;
- defm vmsof : RISCVMaskUnaryMOut;
- defm vmsif : RISCVMaskUnaryMOut;
+ defm vcpop : RISCVMaskedUnarySOut;
+ defm vfirst : RISCVMaskedUnarySOut;
+ defm vmsbf : RISCVMaskedUnaryMOut;
+ defm vmsof : RISCVMaskedUnaryMOut;
+ defm vmsif : RISCVMaskedUnaryMOut;
defm vfcvt_xu_f_v : RISCVConversion;
defm vfcvt_x_f_v : RISCVConversion;
@@ -1409,34 +1455,35 @@ let TargetPrefix = "riscv" in {
defm vfncvt_rod_f_f_w : RISCVConversion;
// Output: (vector)
- // Input: (mask type input, vl)
+ // Input: (passthru, mask type input, vl)
def int_riscv_viota : Intrinsic<[llvm_anyvector_ty],
- [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ [LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_anyint_ty],
[IntrNoMem]>, RISCVVIntrinsic {
- let VLOperand = 1;
+ let VLOperand = 2;
}
// Output: (vector)
- // Input: (maskedoff, mask type vector_in, mask, vl)
+ // Input: (maskedoff, mask type vector_in, mask, vl, policy)
def int_riscv_viota_mask : Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_anyint_ty],
- [IntrNoMem]>, RISCVVIntrinsic {
+ llvm_anyint_ty, LLVMMatchType<1>],
+ [ImmArg<ArgIndex<4>>, IntrNoMem]>, RISCVVIntrinsic {
let VLOperand = 3;
}
// Output: (vector)
- // Input: (vl)
- def int_riscv_vid : RISCVNullaryIntrinsic;
+ // Input: (passthru, vl)
+ def int_riscv_vid : RISCVID;
// Output: (vector)
- // Input: (maskedoff, mask, vl)
+ // Input: (maskedoff, mask, vl, policy)
def int_riscv_vid_mask : Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_anyint_ty],
- [IntrNoMem]>, RISCVVIntrinsic {
+ llvm_anyint_ty, LLVMMatchType<1>],
+ [ImmArg<ArgIndex<3>>, IntrNoMem]>, RISCVVIntrinsic {
let VLOperand = 2;
}
@@ -1463,6 +1510,16 @@ let TargetPrefix = "riscv" in {
[llvm_anyvector_ty, llvm_anyptr_ty,
llvm_anyint_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
[NoCapture<ArgIndex<1>>, IntrWriteMem]>;
+
+ // Segment loads for fixed vectors.
+ foreach nf = [2, 3, 4, 5, 6, 7, 8] in {
+ def int_riscv_seg # nf # _load
+ : Intrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>,
+ !add(nf, -1))),
+ [llvm_anyptr_ty, llvm_anyint_ty],
+ [NoCapture<ArgIndex<0>>, IntrReadMem]>;
+ }
+
} // TargetPrefix = "riscv"
//===----------------------------------------------------------------------===//
@@ -1503,7 +1560,7 @@ class ScalarCryptoByteSelectAny
: Intrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i8_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn,
- ImmArg<ArgIndex<2>>, Returned<ArgIndex<0>>]>;
+ ImmArg<ArgIndex<2>>]>;
// Zknd
def int_riscv_aes32dsi : ScalarCryptoByteSelect32;
diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
new file mode 100644
index 000000000000..14c628595d30
--- /dev/null
+++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
@@ -0,0 +1,31 @@
+//===- IntrinsicsSPIRV.td - Defines SPIRV intrinsics -------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the SPIRV-specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+let TargetPrefix = "spv" in {
+ def int_spv_assign_type : Intrinsic<[], [llvm_any_ty, llvm_metadata_ty]>;
+ def int_spv_assign_name : Intrinsic<[], [llvm_any_ty, llvm_vararg_ty]>;
+
+ def int_spv_track_constant : Intrinsic<[llvm_any_ty], [llvm_any_ty, llvm_metadata_ty]>;
+ def int_spv_init_global : Intrinsic<[], [llvm_any_ty, llvm_any_ty]>;
+ def int_spv_unref_global : Intrinsic<[], [llvm_any_ty]>;
+
+ def int_spv_gep : Intrinsic<[llvm_anyptr_ty], [llvm_i1_ty, llvm_any_ty, llvm_vararg_ty], [ImmArg<ArgIndex<0>>]>;
+ def int_spv_load : Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty, llvm_i16_ty, llvm_i8_ty], [ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>;
+ def int_spv_store : Intrinsic<[], [llvm_i32_ty, llvm_anyptr_ty, llvm_i16_ty, llvm_i8_ty], [ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>;
+ def int_spv_extractv : Intrinsic<[llvm_any_ty], [llvm_i32_ty, llvm_vararg_ty]>;
+ def int_spv_insertv : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_any_ty, llvm_vararg_ty]>;
+ def int_spv_extractelt : Intrinsic<[llvm_any_ty], [llvm_any_ty, llvm_anyint_ty]>;
+ def int_spv_insertelt : Intrinsic<[llvm_any_ty], [llvm_any_ty, llvm_any_ty, llvm_anyint_ty]>;
+ def int_spv_const_composite : Intrinsic<[llvm_i32_ty], [llvm_vararg_ty]>;
+ def int_spv_bitcast : Intrinsic<[llvm_any_ty], [llvm_any_ty]>;
+ def int_spv_switch : Intrinsic<[], [llvm_any_ty, llvm_vararg_ty]>;
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsSystemZ.td b/llvm/include/llvm/IR/IntrinsicsSystemZ.td
index a149b571072c..d881a1126bf2 100644
--- a/llvm/include/llvm/IR/IntrinsicsSystemZ.td
+++ b/llvm/include/llvm/IR/IntrinsicsSystemZ.td
@@ -11,7 +11,7 @@
//===----------------------------------------------------------------------===//
class SystemZUnaryConv<string name, LLVMType result, LLVMType arg>
- : GCCBuiltin<"__builtin_s390_" # name>,
+ : ClangBuiltin<"__builtin_s390_" # name>,
Intrinsic<[result], [arg], [IntrNoMem]>;
class SystemZUnary<string name, LLVMType type>
@@ -24,14 +24,14 @@ class SystemZUnaryCC<LLVMType type>
: SystemZUnaryConvCC<type, type>;
class SystemZBinaryConv<string name, LLVMType result, LLVMType arg>
- : GCCBuiltin<"__builtin_s390_" # name>,
+ : ClangBuiltin<"__builtin_s390_" # name>,
Intrinsic<[result], [arg, arg], [IntrNoMem]>;
class SystemZBinary<string name, LLVMType type>
: SystemZBinaryConv<name, type, type>;
class SystemZBinaryInt<string name, LLVMType type>
- : GCCBuiltin<"__builtin_s390_" # name>,
+ : ClangBuiltin<"__builtin_s390_" # name>,
Intrinsic<[type], [type, llvm_i32_ty], [IntrNoMem]>;
class SystemZBinaryConvCC<LLVMType result, LLVMType arg>
@@ -45,7 +45,7 @@ class SystemZBinaryCC<LLVMType type>
: SystemZBinaryConvCC<type, type>;
class SystemZTernaryConv<string name, LLVMType result, LLVMType arg>
- : GCCBuiltin<"__builtin_s390_" # name>,
+ : ClangBuiltin<"__builtin_s390_" # name>,
Intrinsic<[result], [arg, arg, result], [IntrNoMem]>;
class SystemZTernaryConvCC<LLVMType result, LLVMType arg>
@@ -55,7 +55,7 @@ class SystemZTernary<string name, LLVMType type>
: SystemZTernaryConv<name, type, type>;
class SystemZTernaryInt<string name, LLVMType type>
- : GCCBuiltin<"__builtin_s390_" # name>,
+ : ClangBuiltin<"__builtin_s390_" # name>,
Intrinsic<[type], [type, type, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>;
class SystemZTernaryIntCC<LLVMType type>
@@ -63,7 +63,7 @@ class SystemZTernaryIntCC<LLVMType type>
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
class SystemZQuaternaryInt<string name, LLVMType type>
- : GCCBuiltin<"__builtin_s390_" # name>,
+ : ClangBuiltin<"__builtin_s390_" # name>,
Intrinsic<[type], [type, type, type, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
@@ -216,16 +216,16 @@ let TargetPrefix = "s390" in {
def int_s390_tabort : Intrinsic<[], [llvm_i64_ty],
[IntrNoReturn, Throws, IntrWriteMem]>;
- def int_s390_tend : GCCBuiltin<"__builtin_tend">,
+ def int_s390_tend : ClangBuiltin<"__builtin_tend">,
Intrinsic<[llvm_i32_ty], []>;
- def int_s390_etnd : GCCBuiltin<"__builtin_tx_nesting_depth">,
+ def int_s390_etnd : ClangBuiltin<"__builtin_tx_nesting_depth">,
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
def int_s390_ntstg : Intrinsic<[], [llvm_i64_ty, llvm_ptr64_ty],
[IntrArgMemOnly, IntrWriteMem]>;
- def int_s390_ppa_txassist : GCCBuiltin<"__builtin_tx_assist">,
+ def int_s390_ppa_txassist : ClangBuiltin<"__builtin_tx_assist">,
Intrinsic<[], [llvm_i32_ty]>;
}
@@ -236,24 +236,24 @@ let TargetPrefix = "s390" in {
//===----------------------------------------------------------------------===//
let TargetPrefix = "s390" in {
- def int_s390_lcbb : GCCBuiltin<"__builtin_s390_lcbb">,
+ def int_s390_lcbb : ClangBuiltin<"__builtin_s390_lcbb">,
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_s390_vlbb : GCCBuiltin<"__builtin_s390_vlbb">,
+ def int_s390_vlbb : ClangBuiltin<"__builtin_s390_vlbb">,
Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty],
[IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
- def int_s390_vll : GCCBuiltin<"__builtin_s390_vll">,
+ def int_s390_vll : ClangBuiltin<"__builtin_s390_vll">,
Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty, llvm_ptr_ty],
[IntrReadMem, IntrArgMemOnly]>;
- def int_s390_vpdi : GCCBuiltin<"__builtin_s390_vpdi">,
+ def int_s390_vpdi : ClangBuiltin<"__builtin_s390_vpdi">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
- def int_s390_vperm : GCCBuiltin<"__builtin_s390_vperm">,
+ def int_s390_vperm : ClangBuiltin<"__builtin_s390_vperm">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
@@ -264,7 +264,7 @@ let TargetPrefix = "s390" in {
defm int_s390_vpkls : SystemZBinaryTruncHFG<"vpkls">;
defm int_s390_vpkls : SystemZBinaryTruncCCHFG;
- def int_s390_vstl : GCCBuiltin<"__builtin_s390_vstl">,
+ def int_s390_vstl : ClangBuiltin<"__builtin_s390_vstl">,
Intrinsic<[], [llvm_v16i8_ty, llvm_i32_ty, llvm_ptr_ty],
[IntrArgMemOnly, IntrWriteMem]>;
@@ -314,7 +314,7 @@ let TargetPrefix = "s390" in {
def int_s390_vsrl : SystemZBinary<"vsrl", llvm_v16i8_ty>;
def int_s390_vsrlb : SystemZBinary<"vsrlb", llvm_v16i8_ty>;
- def int_s390_vsldb : GCCBuiltin<"__builtin_s390_vsldb">,
+ def int_s390_vsldb : ClangBuiltin<"__builtin_s390_vsldb">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
@@ -382,7 +382,7 @@ let TargetPrefix = "s390" in {
def int_s390_vbperm : SystemZBinaryConv<"vbperm", llvm_v2i64_ty,
llvm_v16i8_ty>;
- def int_s390_vmslg : GCCBuiltin<"__builtin_s390_vmslg">,
+ def int_s390_vmslg : ClangBuiltin<"__builtin_s390_vmslg">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v2i64_ty, llvm_v2i64_ty, llvm_v16i8_ty,
llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<3>>]>;
@@ -411,21 +411,21 @@ let TargetPrefix = "s390" in {
[IntrNoMem, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>;
// Instructions from the Vector Packed Decimal Facility
- def int_s390_vlrl : GCCBuiltin<"__builtin_s390_vlrl">,
+ def int_s390_vlrl : ClangBuiltin<"__builtin_s390_vlrl">,
Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty, llvm_ptr_ty],
[IntrReadMem, IntrArgMemOnly]>;
- def int_s390_vstrl : GCCBuiltin<"__builtin_s390_vstrl">,
+ def int_s390_vstrl : ClangBuiltin<"__builtin_s390_vstrl">,
Intrinsic<[], [llvm_v16i8_ty, llvm_i32_ty, llvm_ptr_ty],
[IntrArgMemOnly, IntrWriteMem]>;
// Instructions from the Vector Enhancements Facility 2
- def int_s390_vsld : GCCBuiltin<"__builtin_s390_vsld">,
+ def int_s390_vsld : ClangBuiltin<"__builtin_s390_vsld">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
- def int_s390_vsrd : GCCBuiltin<"__builtin_s390_vsrd">,
+ def int_s390_vsrd : ClangBuiltin<"__builtin_s390_vsrd">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
@@ -438,23 +438,23 @@ let TargetPrefix = "s390" in {
def int_s390_vstrszf : SystemZTernaryConvCC<llvm_v16i8_ty, llvm_v4i32_ty>;
// Instructions from the NNP-assist Facility
- def int_s390_vclfnhs : GCCBuiltin<"__builtin_s390_vclfnhs">,
+ def int_s390_vclfnhs : ClangBuiltin<"__builtin_s390_vclfnhs">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v8i16_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_s390_vclfnls : GCCBuiltin<"__builtin_s390_vclfnls">,
+ def int_s390_vclfnls : ClangBuiltin<"__builtin_s390_vclfnls">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v8i16_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_s390_vcrnfs : GCCBuiltin<"__builtin_s390_vcrnfs">,
+ def int_s390_vcrnfs : ClangBuiltin<"__builtin_s390_vcrnfs">,
Intrinsic<[llvm_v8i16_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
- def int_s390_vcfn : GCCBuiltin<"__builtin_s390_vcfn">,
+ def int_s390_vcfn : ClangBuiltin<"__builtin_s390_vcfn">,
Intrinsic<[llvm_v8i16_ty],
[llvm_v8i16_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_s390_vcnf : GCCBuiltin<"__builtin_s390_vcnf">,
+ def int_s390_vcnf : ClangBuiltin<"__builtin_s390_vcnf">,
Intrinsic<[llvm_v8i16_ty],
[llvm_v8i16_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
@@ -467,9 +467,9 @@ let TargetPrefix = "s390" in {
//===----------------------------------------------------------------------===//
let TargetPrefix = "s390" in {
- def int_s390_sfpc : GCCBuiltin<"__builtin_s390_sfpc">,
+ def int_s390_sfpc : ClangBuiltin<"__builtin_s390_sfpc">,
Intrinsic<[], [llvm_i32_ty], []>;
- def int_s390_efpc : GCCBuiltin<"__builtin_s390_efpc">,
+ def int_s390_efpc : ClangBuiltin<"__builtin_s390_efpc">,
Intrinsic<[llvm_i32_ty], [], []>;
def int_s390_tdc : Intrinsic<[llvm_i32_ty], [llvm_anyfloat_ty, llvm_i64_ty],
diff --git a/llvm/include/llvm/IR/IntrinsicsVE.td b/llvm/include/llvm/IR/IntrinsicsVE.td
index be4bccef0cc1..15b828b320ea 100644
--- a/llvm/include/llvm/IR/IntrinsicsVE.td
+++ b/llvm/include/llvm/IR/IntrinsicsVE.td
@@ -2,31 +2,28 @@
// VEL Intrinsic instructions.
let TargetPrefix = "ve" in {
- def int_ve_vl_svob : GCCBuiltin<"__builtin_ve_vl_svob">,
- Intrinsic<[], [], [IntrHasSideEffects]>;
-
- def int_ve_vl_pack_f32p : GCCBuiltin<"__builtin_ve_vl_pack_f32p">,
+ def int_ve_vl_pack_f32p : ClangBuiltin<"__builtin_ve_vl_pack_f32p">,
Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_ptr_ty],
[IntrReadMem]>;
- def int_ve_vl_pack_f32a : GCCBuiltin<"__builtin_ve_vl_pack_f32a">,
+ def int_ve_vl_pack_f32a : ClangBuiltin<"__builtin_ve_vl_pack_f32a">,
Intrinsic<[llvm_i64_ty], [llvm_ptr_ty],
[IntrReadMem]>;
def int_ve_vl_extract_vm512u :
- GCCBuiltin<"__builtin_ve_vl_extract_vm512u">,
+ ClangBuiltin<"__builtin_ve_vl_extract_vm512u">,
Intrinsic<[LLVMType<v256i1>], [LLVMType<v512i1>], [IntrNoMem]>;
def int_ve_vl_extract_vm512l :
- GCCBuiltin<"__builtin_ve_vl_extract_vm512l">,
+ ClangBuiltin<"__builtin_ve_vl_extract_vm512l">,
Intrinsic<[LLVMType<v256i1>], [LLVMType<v512i1>], [IntrNoMem]>;
def int_ve_vl_insert_vm512u :
- GCCBuiltin<"__builtin_ve_vl_insert_vm512u">,
+ ClangBuiltin<"__builtin_ve_vl_insert_vm512u">,
Intrinsic<[LLVMType<v512i1>], [LLVMType<v512i1>, LLVMType<v256i1>],
[IntrNoMem]>;
def int_ve_vl_insert_vm512l :
- GCCBuiltin<"__builtin_ve_vl_insert_vm512l">,
+ ClangBuiltin<"__builtin_ve_vl_insert_vm512l">,
Intrinsic<[LLVMType<v512i1>], [LLVMType<v512i1>, LLVMType<v256i1>],
[IntrNoMem]>;
}
diff --git a/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td b/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td
index 67cbd307903d..554dd8557200 100644
--- a/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td
+++ b/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td
@@ -1,1213 +1,1257 @@
-let TargetPrefix = "ve" in def int_ve_vl_vld_vssl : GCCBuiltin<"__builtin_ve_vl_vld_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vld_vssvl : GCCBuiltin<"__builtin_ve_vl_vld_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldnc_vssl : GCCBuiltin<"__builtin_ve_vl_vldnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldnc_vssvl : GCCBuiltin<"__builtin_ve_vl_vldnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldu_vssl : GCCBuiltin<"__builtin_ve_vl_vldu_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldu_vssvl : GCCBuiltin<"__builtin_ve_vl_vldu_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldunc_vssl : GCCBuiltin<"__builtin_ve_vl_vldunc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldunc_vssvl : GCCBuiltin<"__builtin_ve_vl_vldunc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldlsx_vssl : GCCBuiltin<"__builtin_ve_vl_vldlsx_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldlsx_vssvl : GCCBuiltin<"__builtin_ve_vl_vldlsx_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldlsxnc_vssl : GCCBuiltin<"__builtin_ve_vl_vldlsxnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldlsxnc_vssvl : GCCBuiltin<"__builtin_ve_vl_vldlsxnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldlzx_vssl : GCCBuiltin<"__builtin_ve_vl_vldlzx_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldlzx_vssvl : GCCBuiltin<"__builtin_ve_vl_vldlzx_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldlzxnc_vssl : GCCBuiltin<"__builtin_ve_vl_vldlzxnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldlzxnc_vssvl : GCCBuiltin<"__builtin_ve_vl_vldlzxnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vld2d_vssl : GCCBuiltin<"__builtin_ve_vl_vld2d_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vld2d_vssvl : GCCBuiltin<"__builtin_ve_vl_vld2d_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vld2dnc_vssl : GCCBuiltin<"__builtin_ve_vl_vld2dnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vld2dnc_vssvl : GCCBuiltin<"__builtin_ve_vl_vld2dnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldu2d_vssl : GCCBuiltin<"__builtin_ve_vl_vldu2d_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldu2d_vssvl : GCCBuiltin<"__builtin_ve_vl_vldu2d_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldu2dnc_vssl : GCCBuiltin<"__builtin_ve_vl_vldu2dnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldu2dnc_vssvl : GCCBuiltin<"__builtin_ve_vl_vldu2dnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldl2dsx_vssl : GCCBuiltin<"__builtin_ve_vl_vldl2dsx_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldl2dsx_vssvl : GCCBuiltin<"__builtin_ve_vl_vldl2dsx_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldl2dsxnc_vssl : GCCBuiltin<"__builtin_ve_vl_vldl2dsxnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldl2dsxnc_vssvl : GCCBuiltin<"__builtin_ve_vl_vldl2dsxnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldl2dzx_vssl : GCCBuiltin<"__builtin_ve_vl_vldl2dzx_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldl2dzx_vssvl : GCCBuiltin<"__builtin_ve_vl_vldl2dzx_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldl2dzxnc_vssl : GCCBuiltin<"__builtin_ve_vl_vldl2dzxnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vldl2dzxnc_vssvl : GCCBuiltin<"__builtin_ve_vl_vldl2dzxnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vst_vssl : GCCBuiltin<"__builtin_ve_vl_vst_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vst_vssml : GCCBuiltin<"__builtin_ve_vl_vst_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vstnc_vssl : GCCBuiltin<"__builtin_ve_vl_vstnc_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vstnc_vssml : GCCBuiltin<"__builtin_ve_vl_vstnc_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vstot_vssl : GCCBuiltin<"__builtin_ve_vl_vstot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vstot_vssml : GCCBuiltin<"__builtin_ve_vl_vstot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vstncot_vssl : GCCBuiltin<"__builtin_ve_vl_vstncot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vstncot_vssml : GCCBuiltin<"__builtin_ve_vl_vstncot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vstu_vssl : GCCBuiltin<"__builtin_ve_vl_vstu_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vstu_vssml : GCCBuiltin<"__builtin_ve_vl_vstu_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vstunc_vssl : GCCBuiltin<"__builtin_ve_vl_vstunc_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vstunc_vssml : GCCBuiltin<"__builtin_ve_vl_vstunc_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vstuot_vssl : GCCBuiltin<"__builtin_ve_vl_vstuot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vstuot_vssml : GCCBuiltin<"__builtin_ve_vl_vstuot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vstuncot_vssl : GCCBuiltin<"__builtin_ve_vl_vstuncot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vstuncot_vssml : GCCBuiltin<"__builtin_ve_vl_vstuncot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vstl_vssl : GCCBuiltin<"__builtin_ve_vl_vstl_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vstl_vssml : GCCBuiltin<"__builtin_ve_vl_vstl_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vstlnc_vssl : GCCBuiltin<"__builtin_ve_vl_vstlnc_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vstlnc_vssml : GCCBuiltin<"__builtin_ve_vl_vstlnc_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vstlot_vssl : GCCBuiltin<"__builtin_ve_vl_vstlot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vstlot_vssml : GCCBuiltin<"__builtin_ve_vl_vstlot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vstlncot_vssl : GCCBuiltin<"__builtin_ve_vl_vstlncot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vstlncot_vssml : GCCBuiltin<"__builtin_ve_vl_vstlncot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vst2d_vssl : GCCBuiltin<"__builtin_ve_vl_vst2d_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vst2d_vssml : GCCBuiltin<"__builtin_ve_vl_vst2d_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vst2dnc_vssl : GCCBuiltin<"__builtin_ve_vl_vst2dnc_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vst2dnc_vssml : GCCBuiltin<"__builtin_ve_vl_vst2dnc_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vst2dot_vssl : GCCBuiltin<"__builtin_ve_vl_vst2dot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vst2dot_vssml : GCCBuiltin<"__builtin_ve_vl_vst2dot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vst2dncot_vssl : GCCBuiltin<"__builtin_ve_vl_vst2dncot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vst2dncot_vssml : GCCBuiltin<"__builtin_ve_vl_vst2dncot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vstu2d_vssl : GCCBuiltin<"__builtin_ve_vl_vstu2d_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vstu2d_vssml : GCCBuiltin<"__builtin_ve_vl_vstu2d_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vstu2dnc_vssl : GCCBuiltin<"__builtin_ve_vl_vstu2dnc_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vstu2dnc_vssml : GCCBuiltin<"__builtin_ve_vl_vstu2dnc_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vstu2dot_vssl : GCCBuiltin<"__builtin_ve_vl_vstu2dot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vstu2dot_vssml : GCCBuiltin<"__builtin_ve_vl_vstu2dot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vstu2dncot_vssl : GCCBuiltin<"__builtin_ve_vl_vstu2dncot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vstu2dncot_vssml : GCCBuiltin<"__builtin_ve_vl_vstu2dncot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vstl2d_vssl : GCCBuiltin<"__builtin_ve_vl_vstl2d_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vstl2d_vssml : GCCBuiltin<"__builtin_ve_vl_vstl2d_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vstl2dnc_vssl : GCCBuiltin<"__builtin_ve_vl_vstl2dnc_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vstl2dnc_vssml : GCCBuiltin<"__builtin_ve_vl_vstl2dnc_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vstl2dot_vssl : GCCBuiltin<"__builtin_ve_vl_vstl2dot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vstl2dot_vssml : GCCBuiltin<"__builtin_ve_vl_vstl2dot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vstl2dncot_vssl : GCCBuiltin<"__builtin_ve_vl_vstl2dncot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vstl2dncot_vssml : GCCBuiltin<"__builtin_ve_vl_vstl2dncot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pfchv_ssl : GCCBuiltin<"__builtin_ve_vl_pfchv_ssl">, Intrinsic<[], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrInaccessibleMemOrArgMemOnly]>;
-let TargetPrefix = "ve" in def int_ve_vl_pfchvnc_ssl : GCCBuiltin<"__builtin_ve_vl_pfchvnc_ssl">, Intrinsic<[], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrInaccessibleMemOrArgMemOnly]>;
-let TargetPrefix = "ve" in def int_ve_vl_lsv_vvss : GCCBuiltin<"__builtin_ve_vl_lsv_vvss">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<i64>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_lvsl_svs : GCCBuiltin<"__builtin_ve_vl_lvsl_svs">, Intrinsic<[LLVMType<i64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_lvsd_svs : GCCBuiltin<"__builtin_ve_vl_lvsd_svs">, Intrinsic<[LLVMType<f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_lvss_svs : GCCBuiltin<"__builtin_ve_vl_lvss_svs">, Intrinsic<[LLVMType<f32>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_lvm_mmss : GCCBuiltin<"__builtin_ve_vl_lvm_mmss">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256i1>, LLVMType<i64>, LLVMType<i64>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_lvm_MMss : GCCBuiltin<"__builtin_ve_vl_lvm_MMss">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v512i1>, LLVMType<i64>, LLVMType<i64>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_svm_sms : GCCBuiltin<"__builtin_ve_vl_svm_sms">, Intrinsic<[LLVMType<i64>], [LLVMType<v256i1>, LLVMType<i64>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_svm_sMs : GCCBuiltin<"__builtin_ve_vl_svm_sMs">, Intrinsic<[LLVMType<i64>], [LLVMType<v512i1>, LLVMType<i64>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vbrdd_vsl : GCCBuiltin<"__builtin_ve_vl_vbrdd_vsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vbrdd_vsvl : GCCBuiltin<"__builtin_ve_vl_vbrdd_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vbrdd_vsmvl : GCCBuiltin<"__builtin_ve_vl_vbrdd_vsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vbrdl_vsl : GCCBuiltin<"__builtin_ve_vl_vbrdl_vsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vbrdl_vsvl : GCCBuiltin<"__builtin_ve_vl_vbrdl_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vbrdl_vsmvl : GCCBuiltin<"__builtin_ve_vl_vbrdl_vsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vbrds_vsl : GCCBuiltin<"__builtin_ve_vl_vbrds_vsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vbrds_vsvl : GCCBuiltin<"__builtin_ve_vl_vbrds_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vbrds_vsmvl : GCCBuiltin<"__builtin_ve_vl_vbrds_vsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vbrdw_vsl : GCCBuiltin<"__builtin_ve_vl_vbrdw_vsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vbrdw_vsvl : GCCBuiltin<"__builtin_ve_vl_vbrdw_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vbrdw_vsmvl : GCCBuiltin<"__builtin_ve_vl_vbrdw_vsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvbrd_vsl : GCCBuiltin<"__builtin_ve_vl_pvbrd_vsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvbrd_vsvl : GCCBuiltin<"__builtin_ve_vl_pvbrd_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvbrd_vsMvl : GCCBuiltin<"__builtin_ve_vl_pvbrd_vsMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmv_vsvl : GCCBuiltin<"__builtin_ve_vl_vmv_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmv_vsvvl : GCCBuiltin<"__builtin_ve_vl_vmv_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmv_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vmv_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vaddul_vvvl : GCCBuiltin<"__builtin_ve_vl_vaddul_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vaddul_vvvvl : GCCBuiltin<"__builtin_ve_vl_vaddul_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vaddul_vsvl : GCCBuiltin<"__builtin_ve_vl_vaddul_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vaddul_vsvvl : GCCBuiltin<"__builtin_ve_vl_vaddul_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vaddul_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vaddul_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vaddul_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vaddul_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vadduw_vvvl : GCCBuiltin<"__builtin_ve_vl_vadduw_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vadduw_vvvvl : GCCBuiltin<"__builtin_ve_vl_vadduw_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vadduw_vsvl : GCCBuiltin<"__builtin_ve_vl_vadduw_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vadduw_vsvvl : GCCBuiltin<"__builtin_ve_vl_vadduw_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vadduw_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vadduw_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vadduw_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vadduw_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvaddu_vvvl : GCCBuiltin<"__builtin_ve_vl_pvaddu_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvaddu_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvaddu_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvaddu_vsvl : GCCBuiltin<"__builtin_ve_vl_pvaddu_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvaddu_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvaddu_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvaddu_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvaddu_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvaddu_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvaddu_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vaddswsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vaddswsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vaddswsx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vaddswsx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vaddswsx_vsvl : GCCBuiltin<"__builtin_ve_vl_vaddswsx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vaddswsx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vaddswsx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vaddswsx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vaddswsx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vaddswsx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vaddswsx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vaddswzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vaddswzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vaddswzx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vaddswzx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vaddswzx_vsvl : GCCBuiltin<"__builtin_ve_vl_vaddswzx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vaddswzx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vaddswzx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vaddswzx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vaddswzx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vaddswzx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vaddswzx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvadds_vvvl : GCCBuiltin<"__builtin_ve_vl_pvadds_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvadds_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvadds_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvadds_vsvl : GCCBuiltin<"__builtin_ve_vl_pvadds_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvadds_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvadds_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvadds_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvadds_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvadds_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvadds_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vaddsl_vvvl : GCCBuiltin<"__builtin_ve_vl_vaddsl_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vaddsl_vvvvl : GCCBuiltin<"__builtin_ve_vl_vaddsl_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vaddsl_vsvl : GCCBuiltin<"__builtin_ve_vl_vaddsl_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vaddsl_vsvvl : GCCBuiltin<"__builtin_ve_vl_vaddsl_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vaddsl_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vaddsl_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vaddsl_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vaddsl_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsubul_vvvl : GCCBuiltin<"__builtin_ve_vl_vsubul_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsubul_vvvvl : GCCBuiltin<"__builtin_ve_vl_vsubul_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsubul_vsvl : GCCBuiltin<"__builtin_ve_vl_vsubul_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsubul_vsvvl : GCCBuiltin<"__builtin_ve_vl_vsubul_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsubul_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vsubul_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsubul_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vsubul_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsubuw_vvvl : GCCBuiltin<"__builtin_ve_vl_vsubuw_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsubuw_vvvvl : GCCBuiltin<"__builtin_ve_vl_vsubuw_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsubuw_vsvl : GCCBuiltin<"__builtin_ve_vl_vsubuw_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsubuw_vsvvl : GCCBuiltin<"__builtin_ve_vl_vsubuw_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsubuw_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vsubuw_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsubuw_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vsubuw_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvsubu_vvvl : GCCBuiltin<"__builtin_ve_vl_pvsubu_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvsubu_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvsubu_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvsubu_vsvl : GCCBuiltin<"__builtin_ve_vl_pvsubu_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvsubu_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvsubu_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvsubu_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvsubu_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvsubu_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvsubu_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsubswsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vsubswsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsubswsx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vsubswsx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsubswsx_vsvl : GCCBuiltin<"__builtin_ve_vl_vsubswsx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsubswsx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vsubswsx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsubswsx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vsubswsx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsubswsx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vsubswsx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsubswzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vsubswzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsubswzx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vsubswzx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsubswzx_vsvl : GCCBuiltin<"__builtin_ve_vl_vsubswzx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsubswzx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vsubswzx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsubswzx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vsubswzx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsubswzx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vsubswzx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvsubs_vvvl : GCCBuiltin<"__builtin_ve_vl_pvsubs_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvsubs_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvsubs_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvsubs_vsvl : GCCBuiltin<"__builtin_ve_vl_pvsubs_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvsubs_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvsubs_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvsubs_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvsubs_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvsubs_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvsubs_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsubsl_vvvl : GCCBuiltin<"__builtin_ve_vl_vsubsl_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsubsl_vvvvl : GCCBuiltin<"__builtin_ve_vl_vsubsl_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsubsl_vsvl : GCCBuiltin<"__builtin_ve_vl_vsubsl_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsubsl_vsvvl : GCCBuiltin<"__builtin_ve_vl_vsubsl_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsubsl_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vsubsl_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsubsl_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vsubsl_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmulul_vvvl : GCCBuiltin<"__builtin_ve_vl_vmulul_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmulul_vvvvl : GCCBuiltin<"__builtin_ve_vl_vmulul_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmulul_vsvl : GCCBuiltin<"__builtin_ve_vl_vmulul_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmulul_vsvvl : GCCBuiltin<"__builtin_ve_vl_vmulul_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmulul_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vmulul_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmulul_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vmulul_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmuluw_vvvl : GCCBuiltin<"__builtin_ve_vl_vmuluw_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmuluw_vvvvl : GCCBuiltin<"__builtin_ve_vl_vmuluw_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmuluw_vsvl : GCCBuiltin<"__builtin_ve_vl_vmuluw_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmuluw_vsvvl : GCCBuiltin<"__builtin_ve_vl_vmuluw_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmuluw_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vmuluw_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmuluw_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vmuluw_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmulswsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vmulswsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmulswsx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vmulswsx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmulswsx_vsvl : GCCBuiltin<"__builtin_ve_vl_vmulswsx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmulswsx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vmulswsx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmulswsx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vmulswsx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmulswsx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vmulswsx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmulswzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vmulswzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmulswzx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vmulswzx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmulswzx_vsvl : GCCBuiltin<"__builtin_ve_vl_vmulswzx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmulswzx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vmulswzx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmulswzx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vmulswzx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmulswzx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vmulswzx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmulsl_vvvl : GCCBuiltin<"__builtin_ve_vl_vmulsl_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmulsl_vvvvl : GCCBuiltin<"__builtin_ve_vl_vmulsl_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmulsl_vsvl : GCCBuiltin<"__builtin_ve_vl_vmulsl_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmulsl_vsvvl : GCCBuiltin<"__builtin_ve_vl_vmulsl_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmulsl_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vmulsl_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmulsl_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vmulsl_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmulslw_vvvl : GCCBuiltin<"__builtin_ve_vl_vmulslw_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmulslw_vvvvl : GCCBuiltin<"__builtin_ve_vl_vmulslw_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmulslw_vsvl : GCCBuiltin<"__builtin_ve_vl_vmulslw_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmulslw_vsvvl : GCCBuiltin<"__builtin_ve_vl_vmulslw_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivul_vvvl : GCCBuiltin<"__builtin_ve_vl_vdivul_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivul_vvvvl : GCCBuiltin<"__builtin_ve_vl_vdivul_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivul_vsvl : GCCBuiltin<"__builtin_ve_vl_vdivul_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivul_vsvvl : GCCBuiltin<"__builtin_ve_vl_vdivul_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivul_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vdivul_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivul_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vdivul_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vvvl : GCCBuiltin<"__builtin_ve_vl_vdivuw_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vvvvl : GCCBuiltin<"__builtin_ve_vl_vdivuw_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vsvl : GCCBuiltin<"__builtin_ve_vl_vdivuw_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vsvvl : GCCBuiltin<"__builtin_ve_vl_vdivuw_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vdivuw_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vdivuw_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivul_vvsl : GCCBuiltin<"__builtin_ve_vl_vdivul_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivul_vvsvl : GCCBuiltin<"__builtin_ve_vl_vdivul_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivul_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vdivul_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vvsl : GCCBuiltin<"__builtin_ve_vl_vdivuw_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vvsvl : GCCBuiltin<"__builtin_ve_vl_vdivuw_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vdivuw_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vdivswsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vdivswsx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vsvl : GCCBuiltin<"__builtin_ve_vl_vdivswsx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vdivswsx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vdivswsx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vdivswsx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vdivswzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vdivswzx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vsvl : GCCBuiltin<"__builtin_ve_vl_vdivswzx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vdivswzx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vdivswzx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vdivswzx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vvsl : GCCBuiltin<"__builtin_ve_vl_vdivswsx_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vvsvl : GCCBuiltin<"__builtin_ve_vl_vdivswsx_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vdivswsx_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vvsl : GCCBuiltin<"__builtin_ve_vl_vdivswzx_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vvsvl : GCCBuiltin<"__builtin_ve_vl_vdivswzx_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vdivswzx_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vvvl : GCCBuiltin<"__builtin_ve_vl_vdivsl_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vvvvl : GCCBuiltin<"__builtin_ve_vl_vdivsl_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vsvl : GCCBuiltin<"__builtin_ve_vl_vdivsl_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vsvvl : GCCBuiltin<"__builtin_ve_vl_vdivsl_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vdivsl_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vdivsl_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vvsl : GCCBuiltin<"__builtin_ve_vl_vdivsl_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vvsvl : GCCBuiltin<"__builtin_ve_vl_vdivsl_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vdivsl_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcmpul_vvvl : GCCBuiltin<"__builtin_ve_vl_vcmpul_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcmpul_vvvvl : GCCBuiltin<"__builtin_ve_vl_vcmpul_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcmpul_vsvl : GCCBuiltin<"__builtin_ve_vl_vcmpul_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcmpul_vsvvl : GCCBuiltin<"__builtin_ve_vl_vcmpul_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcmpul_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vcmpul_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcmpul_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vcmpul_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcmpuw_vvvl : GCCBuiltin<"__builtin_ve_vl_vcmpuw_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcmpuw_vvvvl : GCCBuiltin<"__builtin_ve_vl_vcmpuw_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcmpuw_vsvl : GCCBuiltin<"__builtin_ve_vl_vcmpuw_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcmpuw_vsvvl : GCCBuiltin<"__builtin_ve_vl_vcmpuw_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcmpuw_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vcmpuw_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcmpuw_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vcmpuw_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvcmpu_vvvl : GCCBuiltin<"__builtin_ve_vl_pvcmpu_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvcmpu_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvcmpu_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvcmpu_vsvl : GCCBuiltin<"__builtin_ve_vl_pvcmpu_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvcmpu_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvcmpu_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvcmpu_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvcmpu_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvcmpu_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvcmpu_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcmpswsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vcmpswsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcmpswsx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vcmpswsx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcmpswsx_vsvl : GCCBuiltin<"__builtin_ve_vl_vcmpswsx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcmpswsx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vcmpswsx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcmpswsx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vcmpswsx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcmpswsx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vcmpswsx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcmpswzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vcmpswzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcmpswzx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vcmpswzx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcmpswzx_vsvl : GCCBuiltin<"__builtin_ve_vl_vcmpswzx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcmpswzx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vcmpswzx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcmpswzx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vcmpswzx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcmpswzx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vcmpswzx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvcmps_vvvl : GCCBuiltin<"__builtin_ve_vl_pvcmps_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvcmps_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvcmps_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvcmps_vsvl : GCCBuiltin<"__builtin_ve_vl_pvcmps_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvcmps_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvcmps_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvcmps_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvcmps_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvcmps_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvcmps_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcmpsl_vvvl : GCCBuiltin<"__builtin_ve_vl_vcmpsl_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcmpsl_vvvvl : GCCBuiltin<"__builtin_ve_vl_vcmpsl_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcmpsl_vsvl : GCCBuiltin<"__builtin_ve_vl_vcmpsl_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcmpsl_vsvvl : GCCBuiltin<"__builtin_ve_vl_vcmpsl_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcmpsl_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vcmpsl_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcmpsl_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vcmpsl_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmaxswsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vmaxswsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmaxswsx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vmaxswsx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmaxswsx_vsvl : GCCBuiltin<"__builtin_ve_vl_vmaxswsx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmaxswsx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vmaxswsx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmaxswsx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vmaxswsx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmaxswsx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vmaxswsx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmaxswzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vmaxswzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmaxswzx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vmaxswzx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmaxswzx_vsvl : GCCBuiltin<"__builtin_ve_vl_vmaxswzx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmaxswzx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vmaxswzx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmaxswzx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vmaxswzx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmaxswzx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vmaxswzx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvmaxs_vvvl : GCCBuiltin<"__builtin_ve_vl_pvmaxs_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvmaxs_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvmaxs_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvmaxs_vsvl : GCCBuiltin<"__builtin_ve_vl_pvmaxs_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvmaxs_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvmaxs_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvmaxs_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvmaxs_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvmaxs_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvmaxs_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vminswsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vminswsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vminswsx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vminswsx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vminswsx_vsvl : GCCBuiltin<"__builtin_ve_vl_vminswsx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vminswsx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vminswsx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vminswsx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vminswsx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vminswsx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vminswsx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vminswzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vminswzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vminswzx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vminswzx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vminswzx_vsvl : GCCBuiltin<"__builtin_ve_vl_vminswzx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vminswzx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vminswzx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vminswzx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vminswzx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vminswzx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vminswzx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvmins_vvvl : GCCBuiltin<"__builtin_ve_vl_pvmins_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvmins_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvmins_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvmins_vsvl : GCCBuiltin<"__builtin_ve_vl_pvmins_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvmins_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvmins_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvmins_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvmins_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvmins_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvmins_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmaxsl_vvvl : GCCBuiltin<"__builtin_ve_vl_vmaxsl_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmaxsl_vvvvl : GCCBuiltin<"__builtin_ve_vl_vmaxsl_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmaxsl_vsvl : GCCBuiltin<"__builtin_ve_vl_vmaxsl_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmaxsl_vsvvl : GCCBuiltin<"__builtin_ve_vl_vmaxsl_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmaxsl_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vmaxsl_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmaxsl_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vmaxsl_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vminsl_vvvl : GCCBuiltin<"__builtin_ve_vl_vminsl_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vminsl_vvvvl : GCCBuiltin<"__builtin_ve_vl_vminsl_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vminsl_vsvl : GCCBuiltin<"__builtin_ve_vl_vminsl_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vminsl_vsvvl : GCCBuiltin<"__builtin_ve_vl_vminsl_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vminsl_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vminsl_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vminsl_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vminsl_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vand_vvvl : GCCBuiltin<"__builtin_ve_vl_vand_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vand_vvvvl : GCCBuiltin<"__builtin_ve_vl_vand_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vand_vsvl : GCCBuiltin<"__builtin_ve_vl_vand_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vand_vsvvl : GCCBuiltin<"__builtin_ve_vl_vand_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vand_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vand_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vand_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vand_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvand_vvvl : GCCBuiltin<"__builtin_ve_vl_pvand_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvand_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvand_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvand_vsvl : GCCBuiltin<"__builtin_ve_vl_pvand_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvand_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvand_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvand_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvand_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvand_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvand_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vor_vvvl : GCCBuiltin<"__builtin_ve_vl_vor_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vor_vvvvl : GCCBuiltin<"__builtin_ve_vl_vor_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vor_vsvl : GCCBuiltin<"__builtin_ve_vl_vor_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vor_vsvvl : GCCBuiltin<"__builtin_ve_vl_vor_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vor_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vor_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vor_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vor_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvor_vvvl : GCCBuiltin<"__builtin_ve_vl_pvor_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvor_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvor_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvor_vsvl : GCCBuiltin<"__builtin_ve_vl_pvor_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvor_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvor_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvor_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvor_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvor_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvor_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vxor_vvvl : GCCBuiltin<"__builtin_ve_vl_vxor_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vxor_vvvvl : GCCBuiltin<"__builtin_ve_vl_vxor_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vxor_vsvl : GCCBuiltin<"__builtin_ve_vl_vxor_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vxor_vsvvl : GCCBuiltin<"__builtin_ve_vl_vxor_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vxor_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vxor_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vxor_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vxor_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvxor_vvvl : GCCBuiltin<"__builtin_ve_vl_pvxor_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvxor_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvxor_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvxor_vsvl : GCCBuiltin<"__builtin_ve_vl_pvxor_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvxor_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvxor_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvxor_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvxor_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvxor_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvxor_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_veqv_vvvl : GCCBuiltin<"__builtin_ve_vl_veqv_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_veqv_vvvvl : GCCBuiltin<"__builtin_ve_vl_veqv_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_veqv_vsvl : GCCBuiltin<"__builtin_ve_vl_veqv_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_veqv_vsvvl : GCCBuiltin<"__builtin_ve_vl_veqv_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_veqv_vvvmvl : GCCBuiltin<"__builtin_ve_vl_veqv_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_veqv_vsvmvl : GCCBuiltin<"__builtin_ve_vl_veqv_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pveqv_vvvl : GCCBuiltin<"__builtin_ve_vl_pveqv_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pveqv_vvvvl : GCCBuiltin<"__builtin_ve_vl_pveqv_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pveqv_vsvl : GCCBuiltin<"__builtin_ve_vl_pveqv_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pveqv_vsvvl : GCCBuiltin<"__builtin_ve_vl_pveqv_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pveqv_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pveqv_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pveqv_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pveqv_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vseq_vl : GCCBuiltin<"__builtin_ve_vl_vseq_vl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vseq_vvl : GCCBuiltin<"__builtin_ve_vl_vseq_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvseqlo_vl : GCCBuiltin<"__builtin_ve_vl_pvseqlo_vl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvseqlo_vvl : GCCBuiltin<"__builtin_ve_vl_pvseqlo_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvsequp_vl : GCCBuiltin<"__builtin_ve_vl_pvsequp_vl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvsequp_vvl : GCCBuiltin<"__builtin_ve_vl_pvsequp_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvseq_vl : GCCBuiltin<"__builtin_ve_vl_pvseq_vl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvseq_vvl : GCCBuiltin<"__builtin_ve_vl_pvseq_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsll_vvvl : GCCBuiltin<"__builtin_ve_vl_vsll_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsll_vvvvl : GCCBuiltin<"__builtin_ve_vl_vsll_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsll_vvsl : GCCBuiltin<"__builtin_ve_vl_vsll_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsll_vvsvl : GCCBuiltin<"__builtin_ve_vl_vsll_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsll_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vsll_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsll_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vsll_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvsll_vvvl : GCCBuiltin<"__builtin_ve_vl_pvsll_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvsll_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvsll_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvsll_vvsl : GCCBuiltin<"__builtin_ve_vl_pvsll_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvsll_vvsvl : GCCBuiltin<"__builtin_ve_vl_pvsll_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvsll_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvsll_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvsll_vvsMvl : GCCBuiltin<"__builtin_ve_vl_pvsll_vvsMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsrl_vvvl : GCCBuiltin<"__builtin_ve_vl_vsrl_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsrl_vvvvl : GCCBuiltin<"__builtin_ve_vl_vsrl_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsrl_vvsl : GCCBuiltin<"__builtin_ve_vl_vsrl_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsrl_vvsvl : GCCBuiltin<"__builtin_ve_vl_vsrl_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsrl_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vsrl_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsrl_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vsrl_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvsrl_vvvl : GCCBuiltin<"__builtin_ve_vl_pvsrl_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvsrl_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvsrl_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvsrl_vvsl : GCCBuiltin<"__builtin_ve_vl_pvsrl_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvsrl_vvsvl : GCCBuiltin<"__builtin_ve_vl_pvsrl_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvsrl_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvsrl_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvsrl_vvsMvl : GCCBuiltin<"__builtin_ve_vl_pvsrl_vvsMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vslawsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vslawsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vslawsx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vslawsx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vslawsx_vvsl : GCCBuiltin<"__builtin_ve_vl_vslawsx_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vslawsx_vvsvl : GCCBuiltin<"__builtin_ve_vl_vslawsx_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vslawsx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vslawsx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vslawsx_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vslawsx_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vslawzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vslawzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vslawzx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vslawzx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vslawzx_vvsl : GCCBuiltin<"__builtin_ve_vl_vslawzx_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vslawzx_vvsvl : GCCBuiltin<"__builtin_ve_vl_vslawzx_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vslawzx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vslawzx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vslawzx_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vslawzx_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvsla_vvvl : GCCBuiltin<"__builtin_ve_vl_pvsla_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvsla_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvsla_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvsla_vvsl : GCCBuiltin<"__builtin_ve_vl_pvsla_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvsla_vvsvl : GCCBuiltin<"__builtin_ve_vl_pvsla_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvsla_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvsla_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvsla_vvsMvl : GCCBuiltin<"__builtin_ve_vl_pvsla_vvsMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vslal_vvvl : GCCBuiltin<"__builtin_ve_vl_vslal_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vslal_vvvvl : GCCBuiltin<"__builtin_ve_vl_vslal_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vslal_vvsl : GCCBuiltin<"__builtin_ve_vl_vslal_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vslal_vvsvl : GCCBuiltin<"__builtin_ve_vl_vslal_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vslal_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vslal_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vslal_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vslal_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsrawsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vsrawsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsrawsx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vsrawsx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsrawsx_vvsl : GCCBuiltin<"__builtin_ve_vl_vsrawsx_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsrawsx_vvsvl : GCCBuiltin<"__builtin_ve_vl_vsrawsx_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsrawsx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vsrawsx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsrawsx_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vsrawsx_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsrawzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vsrawzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsrawzx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vsrawzx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsrawzx_vvsl : GCCBuiltin<"__builtin_ve_vl_vsrawzx_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsrawzx_vvsvl : GCCBuiltin<"__builtin_ve_vl_vsrawzx_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsrawzx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vsrawzx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsrawzx_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vsrawzx_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvsra_vvvl : GCCBuiltin<"__builtin_ve_vl_pvsra_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvsra_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvsra_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvsra_vvsl : GCCBuiltin<"__builtin_ve_vl_pvsra_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvsra_vvsvl : GCCBuiltin<"__builtin_ve_vl_pvsra_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvsra_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvsra_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvsra_vvsMvl : GCCBuiltin<"__builtin_ve_vl_pvsra_vvsMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsral_vvvl : GCCBuiltin<"__builtin_ve_vl_vsral_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsral_vvvvl : GCCBuiltin<"__builtin_ve_vl_vsral_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsral_vvsl : GCCBuiltin<"__builtin_ve_vl_vsral_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsral_vvsvl : GCCBuiltin<"__builtin_ve_vl_vsral_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsral_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vsral_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsral_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vsral_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsfa_vvssl : GCCBuiltin<"__builtin_ve_vl_vsfa_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsfa_vvssvl : GCCBuiltin<"__builtin_ve_vl_vsfa_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsfa_vvssmvl : GCCBuiltin<"__builtin_ve_vl_vsfa_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfaddd_vvvl : GCCBuiltin<"__builtin_ve_vl_vfaddd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfaddd_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfaddd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfaddd_vsvl : GCCBuiltin<"__builtin_ve_vl_vfaddd_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfaddd_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfaddd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfaddd_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfaddd_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfaddd_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfaddd_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfadds_vvvl : GCCBuiltin<"__builtin_ve_vl_vfadds_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfadds_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfadds_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfadds_vsvl : GCCBuiltin<"__builtin_ve_vl_vfadds_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfadds_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfadds_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfadds_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfadds_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfadds_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfadds_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfadd_vvvl : GCCBuiltin<"__builtin_ve_vl_pvfadd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfadd_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfadd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfadd_vsvl : GCCBuiltin<"__builtin_ve_vl_pvfadd_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfadd_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfadd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfadd_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfadd_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfadd_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfadd_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfsubd_vvvl : GCCBuiltin<"__builtin_ve_vl_vfsubd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfsubd_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfsubd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfsubd_vsvl : GCCBuiltin<"__builtin_ve_vl_vfsubd_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfsubd_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfsubd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfsubd_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfsubd_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfsubd_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfsubd_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfsubs_vvvl : GCCBuiltin<"__builtin_ve_vl_vfsubs_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfsubs_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfsubs_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfsubs_vsvl : GCCBuiltin<"__builtin_ve_vl_vfsubs_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfsubs_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfsubs_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfsubs_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfsubs_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfsubs_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfsubs_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfsub_vvvl : GCCBuiltin<"__builtin_ve_vl_pvfsub_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfsub_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfsub_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfsub_vsvl : GCCBuiltin<"__builtin_ve_vl_pvfsub_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfsub_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfsub_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfsub_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfsub_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfsub_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfsub_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmuld_vvvl : GCCBuiltin<"__builtin_ve_vl_vfmuld_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmuld_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmuld_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmuld_vsvl : GCCBuiltin<"__builtin_ve_vl_vfmuld_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmuld_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmuld_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmuld_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmuld_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmuld_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmuld_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmuls_vvvl : GCCBuiltin<"__builtin_ve_vl_vfmuls_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmuls_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmuls_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmuls_vsvl : GCCBuiltin<"__builtin_ve_vl_vfmuls_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmuls_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmuls_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmuls_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmuls_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmuls_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmuls_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmul_vvvl : GCCBuiltin<"__builtin_ve_vl_pvfmul_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmul_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfmul_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmul_vsvl : GCCBuiltin<"__builtin_ve_vl_pvfmul_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmul_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfmul_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmul_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmul_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmul_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmul_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfdivd_vvvl : GCCBuiltin<"__builtin_ve_vl_vfdivd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfdivd_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfdivd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfdivd_vsvl : GCCBuiltin<"__builtin_ve_vl_vfdivd_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfdivd_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfdivd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfdivd_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfdivd_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfdivd_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfdivd_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vvvl : GCCBuiltin<"__builtin_ve_vl_vfdivs_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfdivs_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vsvl : GCCBuiltin<"__builtin_ve_vl_vfdivs_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfdivs_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfdivs_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfdivs_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfsqrtd_vvl : GCCBuiltin<"__builtin_ve_vl_vfsqrtd_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfsqrtd_vvvl : GCCBuiltin<"__builtin_ve_vl_vfsqrtd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfsqrts_vvl : GCCBuiltin<"__builtin_ve_vl_vfsqrts_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfsqrts_vvvl : GCCBuiltin<"__builtin_ve_vl_vfsqrts_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfcmpd_vvvl : GCCBuiltin<"__builtin_ve_vl_vfcmpd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfcmpd_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfcmpd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfcmpd_vsvl : GCCBuiltin<"__builtin_ve_vl_vfcmpd_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfcmpd_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfcmpd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfcmpd_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfcmpd_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfcmpd_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfcmpd_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfcmps_vvvl : GCCBuiltin<"__builtin_ve_vl_vfcmps_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfcmps_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfcmps_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfcmps_vsvl : GCCBuiltin<"__builtin_ve_vl_vfcmps_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfcmps_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfcmps_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfcmps_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfcmps_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfcmps_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfcmps_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfcmp_vvvl : GCCBuiltin<"__builtin_ve_vl_pvfcmp_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfcmp_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfcmp_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfcmp_vsvl : GCCBuiltin<"__builtin_ve_vl_pvfcmp_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfcmp_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfcmp_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfcmp_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfcmp_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfcmp_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfcmp_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmaxd_vvvl : GCCBuiltin<"__builtin_ve_vl_vfmaxd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmaxd_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmaxd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmaxd_vsvl : GCCBuiltin<"__builtin_ve_vl_vfmaxd_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmaxd_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmaxd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmaxd_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmaxd_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmaxd_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmaxd_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmaxs_vvvl : GCCBuiltin<"__builtin_ve_vl_vfmaxs_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmaxs_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmaxs_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmaxs_vsvl : GCCBuiltin<"__builtin_ve_vl_vfmaxs_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmaxs_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmaxs_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmaxs_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmaxs_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmaxs_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmaxs_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmax_vvvl : GCCBuiltin<"__builtin_ve_vl_pvfmax_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmax_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfmax_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmax_vsvl : GCCBuiltin<"__builtin_ve_vl_pvfmax_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmax_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfmax_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmax_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmax_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmax_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmax_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmind_vvvl : GCCBuiltin<"__builtin_ve_vl_vfmind_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmind_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmind_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmind_vsvl : GCCBuiltin<"__builtin_ve_vl_vfmind_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmind_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmind_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmind_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmind_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmind_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmind_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmins_vvvl : GCCBuiltin<"__builtin_ve_vl_vfmins_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmins_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmins_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmins_vsvl : GCCBuiltin<"__builtin_ve_vl_vfmins_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmins_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmins_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmins_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmins_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmins_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmins_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmin_vvvl : GCCBuiltin<"__builtin_ve_vl_pvfmin_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmin_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfmin_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmin_vsvl : GCCBuiltin<"__builtin_ve_vl_pvfmin_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmin_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfmin_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmin_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmin_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmin_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmin_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmadd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vvvvvl : GCCBuiltin<"__builtin_ve_vl_vfmadd_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmadd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vsvvvl : GCCBuiltin<"__builtin_ve_vl_vfmadd_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vvsvl : GCCBuiltin<"__builtin_ve_vl_vfmadd_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vvsvvl : GCCBuiltin<"__builtin_ve_vl_vfmadd_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vvvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmadd_vvvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vsvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmadd_vsvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vvsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmadd_vvsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmads_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmads_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmads_vvvvvl : GCCBuiltin<"__builtin_ve_vl_vfmads_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmads_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmads_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmads_vsvvvl : GCCBuiltin<"__builtin_ve_vl_vfmads_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmads_vvsvl : GCCBuiltin<"__builtin_ve_vl_vfmads_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmads_vvsvvl : GCCBuiltin<"__builtin_ve_vl_vfmads_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmads_vvvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmads_vvvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmads_vsvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmads_vsvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmads_vvsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmads_vvsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfmad_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vvvvvl : GCCBuiltin<"__builtin_ve_vl_pvfmad_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfmad_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vsvvvl : GCCBuiltin<"__builtin_ve_vl_pvfmad_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vvsvl : GCCBuiltin<"__builtin_ve_vl_pvfmad_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vvsvvl : GCCBuiltin<"__builtin_ve_vl_pvfmad_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vvvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmad_vvvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vsvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmad_vsvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vvsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmad_vvsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmsbd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vvvvvl : GCCBuiltin<"__builtin_ve_vl_vfmsbd_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmsbd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vsvvvl : GCCBuiltin<"__builtin_ve_vl_vfmsbd_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vvsvl : GCCBuiltin<"__builtin_ve_vl_vfmsbd_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vvsvvl : GCCBuiltin<"__builtin_ve_vl_vfmsbd_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vvvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmsbd_vvvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vsvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmsbd_vsvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vvsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmsbd_vvsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmsbs_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vvvvvl : GCCBuiltin<"__builtin_ve_vl_vfmsbs_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmsbs_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vsvvvl : GCCBuiltin<"__builtin_ve_vl_vfmsbs_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vvsvl : GCCBuiltin<"__builtin_ve_vl_vfmsbs_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vvsvvl : GCCBuiltin<"__builtin_ve_vl_vfmsbs_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vvvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmsbs_vvvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vsvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmsbs_vsvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vvsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmsbs_vvsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfmsb_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vvvvvl : GCCBuiltin<"__builtin_ve_vl_pvfmsb_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfmsb_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vsvvvl : GCCBuiltin<"__builtin_ve_vl_pvfmsb_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vvsvl : GCCBuiltin<"__builtin_ve_vl_pvfmsb_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vvsvvl : GCCBuiltin<"__builtin_ve_vl_pvfmsb_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vvvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmsb_vvvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vsvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmsb_vsvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vvsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmsb_vvsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmadd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vvvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmadd_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfnmadd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vsvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmadd_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vvsvl : GCCBuiltin<"__builtin_ve_vl_vfnmadd_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vvsvvl : GCCBuiltin<"__builtin_ve_vl_vfnmadd_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vvvvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmadd_vvvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vsvvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmadd_vsvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vvsvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmadd_vvsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmads_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vvvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmads_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfnmads_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vsvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmads_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vvsvl : GCCBuiltin<"__builtin_ve_vl_vfnmads_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vvsvvl : GCCBuiltin<"__builtin_ve_vl_vfnmads_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vvvvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmads_vvvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vsvvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmads_vsvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vvsvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmads_vvsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfnmad_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vvvvvl : GCCBuiltin<"__builtin_ve_vl_pvfnmad_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfnmad_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vsvvvl : GCCBuiltin<"__builtin_ve_vl_pvfnmad_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vvsvl : GCCBuiltin<"__builtin_ve_vl_pvfnmad_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vvsvvl : GCCBuiltin<"__builtin_ve_vl_pvfnmad_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vvvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfnmad_vvvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vsvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfnmad_vsvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vvsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfnmad_vvsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vvvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbd_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vsvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbd_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vvsvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbd_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vvsvvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbd_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vvvvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbd_vvvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vsvvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbd_vsvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vvsvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbd_vvsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbs_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vvvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbs_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbs_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vsvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbs_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vvsvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbs_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vvsvvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbs_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vvvvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbs_vvvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vsvvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbs_vsvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vvsvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbs_vvsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfnmsb_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vvvvvl : GCCBuiltin<"__builtin_ve_vl_pvfnmsb_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfnmsb_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vsvvvl : GCCBuiltin<"__builtin_ve_vl_pvfnmsb_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vvsvl : GCCBuiltin<"__builtin_ve_vl_pvfnmsb_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vvsvvl : GCCBuiltin<"__builtin_ve_vl_pvfnmsb_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vvvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfnmsb_vvvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vsvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfnmsb_vsvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vvsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfnmsb_vvsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vrcpd_vvl : GCCBuiltin<"__builtin_ve_vl_vrcpd_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vrcpd_vvvl : GCCBuiltin<"__builtin_ve_vl_vrcpd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vrcps_vvl : GCCBuiltin<"__builtin_ve_vl_vrcps_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vrcps_vvvl : GCCBuiltin<"__builtin_ve_vl_vrcps_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvrcp_vvl : GCCBuiltin<"__builtin_ve_vl_pvrcp_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvrcp_vvvl : GCCBuiltin<"__builtin_ve_vl_pvrcp_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vrsqrtd_vvl : GCCBuiltin<"__builtin_ve_vl_vrsqrtd_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vrsqrtd_vvvl : GCCBuiltin<"__builtin_ve_vl_vrsqrtd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vrsqrts_vvl : GCCBuiltin<"__builtin_ve_vl_vrsqrts_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vrsqrts_vvvl : GCCBuiltin<"__builtin_ve_vl_vrsqrts_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvrsqrt_vvl : GCCBuiltin<"__builtin_ve_vl_pvrsqrt_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvrsqrt_vvvl : GCCBuiltin<"__builtin_ve_vl_pvrsqrt_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vrsqrtdnex_vvl : GCCBuiltin<"__builtin_ve_vl_vrsqrtdnex_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vrsqrtdnex_vvvl : GCCBuiltin<"__builtin_ve_vl_vrsqrtdnex_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vrsqrtsnex_vvl : GCCBuiltin<"__builtin_ve_vl_vrsqrtsnex_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vrsqrtsnex_vvvl : GCCBuiltin<"__builtin_ve_vl_vrsqrtsnex_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvrsqrtnex_vvl : GCCBuiltin<"__builtin_ve_vl_pvrsqrtnex_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvrsqrtnex_vvvl : GCCBuiltin<"__builtin_ve_vl_pvrsqrtnex_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcvtwdsx_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdsx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcvtwdsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcvtwdsx_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdsx_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcvtwdsxrz_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdsxrz_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcvtwdsxrz_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdsxrz_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcvtwdsxrz_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdsxrz_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcvtwdzx_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdzx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcvtwdzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcvtwdzx_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdzx_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcvtwdzxrz_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdzxrz_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcvtwdzxrz_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdzxrz_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcvtwdzxrz_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdzxrz_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcvtwssx_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtwssx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcvtwssx_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtwssx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcvtwssx_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtwssx_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcvtwssxrz_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtwssxrz_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcvtwssxrz_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtwssxrz_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcvtwssxrz_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtwssxrz_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcvtwszx_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtwszx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcvtwszx_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtwszx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcvtwszx_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtwszx_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcvtwszxrz_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtwszxrz_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcvtwszxrz_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtwszxrz_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcvtwszxrz_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtwszxrz_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvcvtws_vvl : GCCBuiltin<"__builtin_ve_vl_pvcvtws_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvcvtws_vvvl : GCCBuiltin<"__builtin_ve_vl_pvcvtws_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvcvtws_vvMvl : GCCBuiltin<"__builtin_ve_vl_pvcvtws_vvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvcvtwsrz_vvl : GCCBuiltin<"__builtin_ve_vl_pvcvtwsrz_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvcvtwsrz_vvvl : GCCBuiltin<"__builtin_ve_vl_pvcvtwsrz_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvcvtwsrz_vvMvl : GCCBuiltin<"__builtin_ve_vl_pvcvtwsrz_vvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcvtld_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtld_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcvtld_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtld_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcvtld_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtld_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcvtldrz_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtldrz_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcvtldrz_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtldrz_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcvtldrz_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtldrz_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcvtdw_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtdw_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcvtdw_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtdw_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcvtsw_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtsw_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcvtsw_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtsw_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvcvtsw_vvl : GCCBuiltin<"__builtin_ve_vl_pvcvtsw_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvcvtsw_vvvl : GCCBuiltin<"__builtin_ve_vl_pvcvtsw_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcvtdl_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtdl_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcvtdl_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtdl_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcvtds_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtds_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcvtds_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtds_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcvtsd_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtsd_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcvtsd_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtsd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmrg_vvvml : GCCBuiltin<"__builtin_ve_vl_vmrg_vvvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmrg_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vmrg_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmrg_vsvml : GCCBuiltin<"__builtin_ve_vl_vmrg_vsvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmrg_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vmrg_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmrgw_vvvMl : GCCBuiltin<"__builtin_ve_vl_vmrgw_vvvMl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmrgw_vvvMvl : GCCBuiltin<"__builtin_ve_vl_vmrgw_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmrgw_vsvMl : GCCBuiltin<"__builtin_ve_vl_vmrgw_vsvMl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vmrgw_vsvMvl : GCCBuiltin<"__builtin_ve_vl_vmrgw_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vshf_vvvsl : GCCBuiltin<"__builtin_ve_vl_vshf_vvvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vshf_vvvsvl : GCCBuiltin<"__builtin_ve_vl_vshf_vvvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vcp_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcp_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vex_vvmvl : GCCBuiltin<"__builtin_ve_vl_vex_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmklat_ml : GCCBuiltin<"__builtin_ve_vl_vfmklat_ml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmklaf_ml : GCCBuiltin<"__builtin_ve_vl_vfmklaf_ml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkat_Ml : GCCBuiltin<"__builtin_ve_vl_pvfmkat_Ml">, Intrinsic<[LLVMType<v512i1>], [LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkaf_Ml : GCCBuiltin<"__builtin_ve_vl_pvfmkaf_Ml">, Intrinsic<[LLVMType<v512i1>], [LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmklgt_mvl : GCCBuiltin<"__builtin_ve_vl_vfmklgt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmklgt_mvml : GCCBuiltin<"__builtin_ve_vl_vfmklgt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkllt_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkllt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkllt_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkllt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmklne_mvl : GCCBuiltin<"__builtin_ve_vl_vfmklne_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmklne_mvml : GCCBuiltin<"__builtin_ve_vl_vfmklne_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkleq_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkleq_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkleq_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkleq_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmklge_mvl : GCCBuiltin<"__builtin_ve_vl_vfmklge_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmklge_mvml : GCCBuiltin<"__builtin_ve_vl_vfmklge_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmklle_mvl : GCCBuiltin<"__builtin_ve_vl_vfmklle_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmklle_mvml : GCCBuiltin<"__builtin_ve_vl_vfmklle_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmklnum_mvl : GCCBuiltin<"__builtin_ve_vl_vfmklnum_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmklnum_mvml : GCCBuiltin<"__builtin_ve_vl_vfmklnum_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmklnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmklnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmklnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmklnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmklgtnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmklgtnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmklgtnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmklgtnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmklltnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmklltnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmklltnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmklltnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmklnenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmklnenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmklnenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmklnenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkleqnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkleqnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkleqnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkleqnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmklgenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmklgenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmklgenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmklgenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkllenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkllenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkllenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkllenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkwgt_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwgt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkwgt_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwgt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkwlt_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwlt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkwlt_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwlt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkwne_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwne_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkwne_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwne_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkweq_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkweq_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkweq_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkweq_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkwge_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwge_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkwge_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwge_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkwle_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwle_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkwle_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwle_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkwnum_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwnum_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkwnum_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwnum_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkwnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkwnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkwgtnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwgtnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkwgtnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwgtnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkwltnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwltnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkwltnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwltnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkwnenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwnenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkwnenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwnenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkweqnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkweqnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkweqnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkweqnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkwgenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwgenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkwgenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwgenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkwlenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwlenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkwlenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwlenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlogt_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlogt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupgt_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupgt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlogt_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwlogt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupgt_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupgt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlolt_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlolt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwuplt_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwuplt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlolt_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwlolt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwuplt_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwuplt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlone_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlone_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupne_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupne_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlone_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwlone_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupne_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupne_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloeq_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwloeq_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupeq_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupeq_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloeq_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwloeq_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupeq_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupeq_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloge_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwloge_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupge_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupge_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloge_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwloge_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupge_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupge_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlole_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlole_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwuple_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwuple_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlole_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwlole_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwuple_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwuple_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlonum_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlonum_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupnum_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupnum_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlonum_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwlonum_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupnum_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupnum_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlonan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlonan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlonan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwlonan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlogtnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlogtnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupgtnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupgtnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlogtnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwlogtnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupgtnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupgtnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloltnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwloltnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupltnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupltnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloltnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwloltnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupltnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupltnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlonenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlonenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupnenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupnenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlonenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwlonenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupnenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupnenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloeqnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwloeqnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupeqnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupeqnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloeqnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwloeqnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupeqnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupeqnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlogenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlogenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupgenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupgenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlogenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwlogenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupgenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupgenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlolenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlolenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwuplenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwuplenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlolenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwlolenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwuplenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwuplenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwgt_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwgt_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwgt_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwgt_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlt_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlt_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlt_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlt_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwne_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwne_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwne_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwne_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkweq_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkweq_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkweq_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkweq_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwge_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwge_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwge_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwge_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwle_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwle_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwle_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwle_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwnum_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwnum_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwnum_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwnum_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwnan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwnan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwnan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwnan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwgtnan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwgtnan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwgtnan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwgtnan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwltnan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwltnan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwltnan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwltnan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwnenan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwnenan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwnenan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwnenan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkweqnan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkweqnan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkweqnan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkweqnan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwgenan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwgenan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwgenan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwgenan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlenan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlenan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlenan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlenan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkdgt_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdgt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkdgt_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdgt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkdlt_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdlt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkdlt_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdlt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkdne_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdne_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkdne_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdne_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkdeq_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdeq_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkdeq_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdeq_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkdge_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdge_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkdge_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdge_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkdle_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdle_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkdle_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdle_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkdnum_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdnum_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkdnum_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdnum_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkdnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkdnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkdgtnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdgtnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkdgtnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdgtnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkdltnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdltnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkdltnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdltnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkdnenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdnenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkdnenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdnenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkdeqnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdeqnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkdeqnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdeqnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkdgenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdgenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkdgenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdgenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkdlenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdlenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkdlenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdlenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmksgt_mvl : GCCBuiltin<"__builtin_ve_vl_vfmksgt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmksgt_mvml : GCCBuiltin<"__builtin_ve_vl_vfmksgt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkslt_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkslt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkslt_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkslt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmksne_mvl : GCCBuiltin<"__builtin_ve_vl_vfmksne_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmksne_mvml : GCCBuiltin<"__builtin_ve_vl_vfmksne_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkseq_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkseq_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkseq_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkseq_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmksge_mvl : GCCBuiltin<"__builtin_ve_vl_vfmksge_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmksge_mvml : GCCBuiltin<"__builtin_ve_vl_vfmksge_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmksle_mvl : GCCBuiltin<"__builtin_ve_vl_vfmksle_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmksle_mvml : GCCBuiltin<"__builtin_ve_vl_vfmksle_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmksnum_mvl : GCCBuiltin<"__builtin_ve_vl_vfmksnum_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmksnum_mvml : GCCBuiltin<"__builtin_ve_vl_vfmksnum_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmksnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmksnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmksnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmksnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmksgtnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmksgtnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmksgtnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmksgtnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmksltnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmksltnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmksltnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmksltnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmksnenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmksnenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmksnenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmksnenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkseqnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkseqnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkseqnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkseqnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmksgenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmksgenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmksgenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmksgenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkslenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkslenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfmkslenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkslenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkslogt_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslogt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksupgt_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupgt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkslogt_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkslogt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksupgt_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupgt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkslolt_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslolt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksuplt_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksuplt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkslolt_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkslolt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksuplt_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksuplt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkslone_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslone_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksupne_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupne_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkslone_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkslone_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksupne_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupne_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksloeq_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksloeq_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksupeq_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupeq_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksloeq_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksloeq_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksupeq_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupeq_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksloge_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksloge_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksupge_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupge_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksloge_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksloge_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksupge_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupge_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkslole_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslole_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksuple_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksuple_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkslole_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkslole_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksuple_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksuple_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkslonum_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslonum_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksupnum_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupnum_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkslonum_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkslonum_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksupnum_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupnum_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkslonan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslonan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksupnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkslonan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkslonan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksupnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkslogtnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslogtnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksupgtnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupgtnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkslogtnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkslogtnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksupgtnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupgtnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksloltnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksloltnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksupltnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupltnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksloltnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksloltnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksupltnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupltnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkslonenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslonenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksupnenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupnenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkslonenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkslonenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksupnenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupnenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksloeqnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksloeqnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksupeqnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupeqnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksloeqnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksloeqnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksupeqnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupeqnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkslogenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslogenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksupgenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupgenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkslogenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkslogenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksupgenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupgenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkslolenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslolenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksuplenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksuplenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkslolenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkslolenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksuplenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksuplenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksgt_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksgt_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksgt_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmksgt_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkslt_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslt_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkslt_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkslt_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksne_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksne_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksne_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmksne_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkseq_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkseq_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkseq_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkseq_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksge_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksge_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksge_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmksge_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksle_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksle_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksle_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmksle_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksnum_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksnum_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksnum_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmksnum_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksnan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksnan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksnan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmksnan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksgtnan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksgtnan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksgtnan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmksgtnan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksltnan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksltnan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksltnan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmksltnan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksnenan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksnenan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksnenan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmksnenan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkseqnan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkseqnan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkseqnan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkseqnan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksgenan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksgenan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmksgenan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmksgenan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkslenan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslenan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pvfmkslenan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkslenan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsumwsx_vvl : GCCBuiltin<"__builtin_ve_vl_vsumwsx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsumwsx_vvml : GCCBuiltin<"__builtin_ve_vl_vsumwsx_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsumwzx_vvl : GCCBuiltin<"__builtin_ve_vl_vsumwzx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsumwzx_vvml : GCCBuiltin<"__builtin_ve_vl_vsumwzx_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsuml_vvl : GCCBuiltin<"__builtin_ve_vl_vsuml_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsuml_vvml : GCCBuiltin<"__builtin_ve_vl_vsuml_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfsumd_vvl : GCCBuiltin<"__builtin_ve_vl_vfsumd_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfsumd_vvml : GCCBuiltin<"__builtin_ve_vl_vfsumd_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfsums_vvl : GCCBuiltin<"__builtin_ve_vl_vfsums_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfsums_vvml : GCCBuiltin<"__builtin_ve_vl_vfsums_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vrmaxswfstsx_vvl : GCCBuiltin<"__builtin_ve_vl_vrmaxswfstsx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vrmaxswfstsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vrmaxswfstsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vrmaxswlstsx_vvl : GCCBuiltin<"__builtin_ve_vl_vrmaxswlstsx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vrmaxswlstsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vrmaxswlstsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vrmaxswfstzx_vvl : GCCBuiltin<"__builtin_ve_vl_vrmaxswfstzx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vrmaxswfstzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vrmaxswfstzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vrmaxswlstzx_vvl : GCCBuiltin<"__builtin_ve_vl_vrmaxswlstzx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vrmaxswlstzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vrmaxswlstzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vrminswfstsx_vvl : GCCBuiltin<"__builtin_ve_vl_vrminswfstsx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vrminswfstsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vrminswfstsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vrminswlstsx_vvl : GCCBuiltin<"__builtin_ve_vl_vrminswlstsx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vrminswlstsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vrminswlstsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vrminswfstzx_vvl : GCCBuiltin<"__builtin_ve_vl_vrminswfstzx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vrminswfstzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vrminswfstzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vrminswlstzx_vvl : GCCBuiltin<"__builtin_ve_vl_vrminswlstzx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vrminswlstzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vrminswlstzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vrmaxslfst_vvl : GCCBuiltin<"__builtin_ve_vl_vrmaxslfst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vrmaxslfst_vvvl : GCCBuiltin<"__builtin_ve_vl_vrmaxslfst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vrmaxsllst_vvl : GCCBuiltin<"__builtin_ve_vl_vrmaxsllst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vrmaxsllst_vvvl : GCCBuiltin<"__builtin_ve_vl_vrmaxsllst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vrminslfst_vvl : GCCBuiltin<"__builtin_ve_vl_vrminslfst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vrminslfst_vvvl : GCCBuiltin<"__builtin_ve_vl_vrminslfst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vrminsllst_vvl : GCCBuiltin<"__builtin_ve_vl_vrminsllst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vrminsllst_vvvl : GCCBuiltin<"__builtin_ve_vl_vrminsllst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfrmaxdfst_vvl : GCCBuiltin<"__builtin_ve_vl_vfrmaxdfst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfrmaxdfst_vvvl : GCCBuiltin<"__builtin_ve_vl_vfrmaxdfst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfrmaxdlst_vvl : GCCBuiltin<"__builtin_ve_vl_vfrmaxdlst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfrmaxdlst_vvvl : GCCBuiltin<"__builtin_ve_vl_vfrmaxdlst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfrmaxsfst_vvl : GCCBuiltin<"__builtin_ve_vl_vfrmaxsfst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfrmaxsfst_vvvl : GCCBuiltin<"__builtin_ve_vl_vfrmaxsfst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfrmaxslst_vvl : GCCBuiltin<"__builtin_ve_vl_vfrmaxslst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfrmaxslst_vvvl : GCCBuiltin<"__builtin_ve_vl_vfrmaxslst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfrmindfst_vvl : GCCBuiltin<"__builtin_ve_vl_vfrmindfst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfrmindfst_vvvl : GCCBuiltin<"__builtin_ve_vl_vfrmindfst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfrmindlst_vvl : GCCBuiltin<"__builtin_ve_vl_vfrmindlst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfrmindlst_vvvl : GCCBuiltin<"__builtin_ve_vl_vfrmindlst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfrminsfst_vvl : GCCBuiltin<"__builtin_ve_vl_vfrminsfst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfrminsfst_vvvl : GCCBuiltin<"__builtin_ve_vl_vfrminsfst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfrminslst_vvl : GCCBuiltin<"__builtin_ve_vl_vfrminslst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vfrminslst_vvvl : GCCBuiltin<"__builtin_ve_vl_vfrminslst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vrand_vvl : GCCBuiltin<"__builtin_ve_vl_vrand_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vrand_vvml : GCCBuiltin<"__builtin_ve_vl_vrand_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vror_vvl : GCCBuiltin<"__builtin_ve_vl_vror_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vror_vvml : GCCBuiltin<"__builtin_ve_vl_vror_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vrxor_vvl : GCCBuiltin<"__builtin_ve_vl_vrxor_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vrxor_vvml : GCCBuiltin<"__builtin_ve_vl_vrxor_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssl : GCCBuiltin<"__builtin_ve_vl_vgt_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssvl : GCCBuiltin<"__builtin_ve_vl_vgt_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssml : GCCBuiltin<"__builtin_ve_vl_vgt_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssmvl : GCCBuiltin<"__builtin_ve_vl_vgt_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssl : GCCBuiltin<"__builtin_ve_vl_vgtnc_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssvl : GCCBuiltin<"__builtin_ve_vl_vgtnc_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssml : GCCBuiltin<"__builtin_ve_vl_vgtnc_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssmvl : GCCBuiltin<"__builtin_ve_vl_vgtnc_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssl : GCCBuiltin<"__builtin_ve_vl_vgtu_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssvl : GCCBuiltin<"__builtin_ve_vl_vgtu_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssml : GCCBuiltin<"__builtin_ve_vl_vgtu_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssmvl : GCCBuiltin<"__builtin_ve_vl_vgtu_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssl : GCCBuiltin<"__builtin_ve_vl_vgtunc_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssvl : GCCBuiltin<"__builtin_ve_vl_vgtunc_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssml : GCCBuiltin<"__builtin_ve_vl_vgtunc_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssmvl : GCCBuiltin<"__builtin_ve_vl_vgtunc_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssl : GCCBuiltin<"__builtin_ve_vl_vgtlsx_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssvl : GCCBuiltin<"__builtin_ve_vl_vgtlsx_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssml : GCCBuiltin<"__builtin_ve_vl_vgtlsx_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssmvl : GCCBuiltin<"__builtin_ve_vl_vgtlsx_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssl : GCCBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssvl : GCCBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssml : GCCBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssmvl : GCCBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssl : GCCBuiltin<"__builtin_ve_vl_vgtlzx_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssvl : GCCBuiltin<"__builtin_ve_vl_vgtlzx_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssml : GCCBuiltin<"__builtin_ve_vl_vgtlzx_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssmvl : GCCBuiltin<"__builtin_ve_vl_vgtlzx_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssl : GCCBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssvl : GCCBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssml : GCCBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssmvl : GCCBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsc_vvssl : GCCBuiltin<"__builtin_ve_vl_vsc_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsc_vvssml : GCCBuiltin<"__builtin_ve_vl_vsc_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vscnc_vvssl : GCCBuiltin<"__builtin_ve_vl_vscnc_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vscnc_vvssml : GCCBuiltin<"__builtin_ve_vl_vscnc_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vscot_vvssl : GCCBuiltin<"__builtin_ve_vl_vscot_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vscot_vvssml : GCCBuiltin<"__builtin_ve_vl_vscot_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vscncot_vvssl : GCCBuiltin<"__builtin_ve_vl_vscncot_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vscncot_vvssml : GCCBuiltin<"__builtin_ve_vl_vscncot_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vscu_vvssl : GCCBuiltin<"__builtin_ve_vl_vscu_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vscu_vvssml : GCCBuiltin<"__builtin_ve_vl_vscu_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vscunc_vvssl : GCCBuiltin<"__builtin_ve_vl_vscunc_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vscunc_vvssml : GCCBuiltin<"__builtin_ve_vl_vscunc_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vscuot_vvssl : GCCBuiltin<"__builtin_ve_vl_vscuot_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vscuot_vvssml : GCCBuiltin<"__builtin_ve_vl_vscuot_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vscuncot_vvssl : GCCBuiltin<"__builtin_ve_vl_vscuncot_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vscuncot_vvssml : GCCBuiltin<"__builtin_ve_vl_vscuncot_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vscl_vvssl : GCCBuiltin<"__builtin_ve_vl_vscl_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vscl_vvssml : GCCBuiltin<"__builtin_ve_vl_vscl_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsclnc_vvssl : GCCBuiltin<"__builtin_ve_vl_vsclnc_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsclnc_vvssml : GCCBuiltin<"__builtin_ve_vl_vsclnc_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsclot_vvssl : GCCBuiltin<"__builtin_ve_vl_vsclot_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsclot_vvssml : GCCBuiltin<"__builtin_ve_vl_vsclot_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsclncot_vvssl : GCCBuiltin<"__builtin_ve_vl_vsclncot_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_vsclncot_vvssml : GCCBuiltin<"__builtin_ve_vl_vsclncot_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_andm_mmm : GCCBuiltin<"__builtin_ve_vl_andm_mmm">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256i1>, LLVMType<v256i1>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_andm_MMM : GCCBuiltin<"__builtin_ve_vl_andm_MMM">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v512i1>, LLVMType<v512i1>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_orm_mmm : GCCBuiltin<"__builtin_ve_vl_orm_mmm">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256i1>, LLVMType<v256i1>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_orm_MMM : GCCBuiltin<"__builtin_ve_vl_orm_MMM">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v512i1>, LLVMType<v512i1>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_xorm_mmm : GCCBuiltin<"__builtin_ve_vl_xorm_mmm">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256i1>, LLVMType<v256i1>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_xorm_MMM : GCCBuiltin<"__builtin_ve_vl_xorm_MMM">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v512i1>, LLVMType<v512i1>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_eqvm_mmm : GCCBuiltin<"__builtin_ve_vl_eqvm_mmm">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256i1>, LLVMType<v256i1>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_eqvm_MMM : GCCBuiltin<"__builtin_ve_vl_eqvm_MMM">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v512i1>, LLVMType<v512i1>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_nndm_mmm : GCCBuiltin<"__builtin_ve_vl_nndm_mmm">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256i1>, LLVMType<v256i1>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_nndm_MMM : GCCBuiltin<"__builtin_ve_vl_nndm_MMM">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v512i1>, LLVMType<v512i1>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_negm_mm : GCCBuiltin<"__builtin_ve_vl_negm_mm">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256i1>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_negm_MM : GCCBuiltin<"__builtin_ve_vl_negm_MM">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v512i1>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_pcvm_sml : GCCBuiltin<"__builtin_ve_vl_pcvm_sml">, Intrinsic<[LLVMType<i64>], [LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_lzvm_sml : GCCBuiltin<"__builtin_ve_vl_lzvm_sml">, Intrinsic<[LLVMType<i64>], [LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
-let TargetPrefix = "ve" in def int_ve_vl_tovm_sml : GCCBuiltin<"__builtin_ve_vl_tovm_sml">, Intrinsic<[LLVMType<i64>], [LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vld_vssl : ClangBuiltin<"__builtin_ve_vl_vld_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vld_vssvl : ClangBuiltin<"__builtin_ve_vl_vld_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldu_vssl : ClangBuiltin<"__builtin_ve_vl_vldu_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldu_vssvl : ClangBuiltin<"__builtin_ve_vl_vldu_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldunc_vssl : ClangBuiltin<"__builtin_ve_vl_vldunc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldunc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldunc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldlsx_vssl : ClangBuiltin<"__builtin_ve_vl_vldlsx_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldlsx_vssvl : ClangBuiltin<"__builtin_ve_vl_vldlsx_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldlsxnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldlsxnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldlsxnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldlsxnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldlzx_vssl : ClangBuiltin<"__builtin_ve_vl_vldlzx_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldlzx_vssvl : ClangBuiltin<"__builtin_ve_vl_vldlzx_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldlzxnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldlzxnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldlzxnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldlzxnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vld2d_vssl : ClangBuiltin<"__builtin_ve_vl_vld2d_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vld2d_vssvl : ClangBuiltin<"__builtin_ve_vl_vld2d_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vld2dnc_vssl : ClangBuiltin<"__builtin_ve_vl_vld2dnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vld2dnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vld2dnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldu2d_vssl : ClangBuiltin<"__builtin_ve_vl_vldu2d_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldu2d_vssvl : ClangBuiltin<"__builtin_ve_vl_vldu2d_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldu2dnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldu2dnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldu2dnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldu2dnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldl2dsx_vssl : ClangBuiltin<"__builtin_ve_vl_vldl2dsx_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldl2dsx_vssvl : ClangBuiltin<"__builtin_ve_vl_vldl2dsx_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldl2dsxnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldl2dsxnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldl2dsxnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldl2dsxnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldl2dzx_vssl : ClangBuiltin<"__builtin_ve_vl_vldl2dzx_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldl2dzx_vssvl : ClangBuiltin<"__builtin_ve_vl_vldl2dzx_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldl2dzxnc_vssl : ClangBuiltin<"__builtin_ve_vl_vldl2dzxnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldl2dzxnc_vssvl : ClangBuiltin<"__builtin_ve_vl_vldl2dzxnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vst_vssl : ClangBuiltin<"__builtin_ve_vl_vst_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vst_vssml : ClangBuiltin<"__builtin_ve_vl_vst_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstnc_vssl : ClangBuiltin<"__builtin_ve_vl_vstnc_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstnc_vssml : ClangBuiltin<"__builtin_ve_vl_vstnc_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstot_vssl : ClangBuiltin<"__builtin_ve_vl_vstot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstot_vssml : ClangBuiltin<"__builtin_ve_vl_vstot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstncot_vssl : ClangBuiltin<"__builtin_ve_vl_vstncot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstncot_vssml : ClangBuiltin<"__builtin_ve_vl_vstncot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstu_vssl : ClangBuiltin<"__builtin_ve_vl_vstu_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstu_vssml : ClangBuiltin<"__builtin_ve_vl_vstu_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstunc_vssl : ClangBuiltin<"__builtin_ve_vl_vstunc_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstunc_vssml : ClangBuiltin<"__builtin_ve_vl_vstunc_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstuot_vssl : ClangBuiltin<"__builtin_ve_vl_vstuot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstuot_vssml : ClangBuiltin<"__builtin_ve_vl_vstuot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstuncot_vssl : ClangBuiltin<"__builtin_ve_vl_vstuncot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstuncot_vssml : ClangBuiltin<"__builtin_ve_vl_vstuncot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstl_vssl : ClangBuiltin<"__builtin_ve_vl_vstl_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstl_vssml : ClangBuiltin<"__builtin_ve_vl_vstl_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstlnc_vssl : ClangBuiltin<"__builtin_ve_vl_vstlnc_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstlnc_vssml : ClangBuiltin<"__builtin_ve_vl_vstlnc_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstlot_vssl : ClangBuiltin<"__builtin_ve_vl_vstlot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstlot_vssml : ClangBuiltin<"__builtin_ve_vl_vstlot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstlncot_vssl : ClangBuiltin<"__builtin_ve_vl_vstlncot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstlncot_vssml : ClangBuiltin<"__builtin_ve_vl_vstlncot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vst2d_vssl : ClangBuiltin<"__builtin_ve_vl_vst2d_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vst2d_vssml : ClangBuiltin<"__builtin_ve_vl_vst2d_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vst2dnc_vssl : ClangBuiltin<"__builtin_ve_vl_vst2dnc_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vst2dnc_vssml : ClangBuiltin<"__builtin_ve_vl_vst2dnc_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vst2dot_vssl : ClangBuiltin<"__builtin_ve_vl_vst2dot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vst2dot_vssml : ClangBuiltin<"__builtin_ve_vl_vst2dot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vst2dncot_vssl : ClangBuiltin<"__builtin_ve_vl_vst2dncot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vst2dncot_vssml : ClangBuiltin<"__builtin_ve_vl_vst2dncot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstu2d_vssl : ClangBuiltin<"__builtin_ve_vl_vstu2d_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstu2d_vssml : ClangBuiltin<"__builtin_ve_vl_vstu2d_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstu2dnc_vssl : ClangBuiltin<"__builtin_ve_vl_vstu2dnc_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstu2dnc_vssml : ClangBuiltin<"__builtin_ve_vl_vstu2dnc_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstu2dot_vssl : ClangBuiltin<"__builtin_ve_vl_vstu2dot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstu2dot_vssml : ClangBuiltin<"__builtin_ve_vl_vstu2dot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstu2dncot_vssl : ClangBuiltin<"__builtin_ve_vl_vstu2dncot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstu2dncot_vssml : ClangBuiltin<"__builtin_ve_vl_vstu2dncot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstl2d_vssl : ClangBuiltin<"__builtin_ve_vl_vstl2d_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstl2d_vssml : ClangBuiltin<"__builtin_ve_vl_vstl2d_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstl2dnc_vssl : ClangBuiltin<"__builtin_ve_vl_vstl2dnc_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstl2dnc_vssml : ClangBuiltin<"__builtin_ve_vl_vstl2dnc_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstl2dot_vssl : ClangBuiltin<"__builtin_ve_vl_vstl2dot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstl2dot_vssml : ClangBuiltin<"__builtin_ve_vl_vstl2dot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstl2dncot_vssl : ClangBuiltin<"__builtin_ve_vl_vstl2dncot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vstl2dncot_vssml : ClangBuiltin<"__builtin_ve_vl_vstl2dncot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pfchv_ssl : ClangBuiltin<"__builtin_ve_vl_pfchv_ssl">, Intrinsic<[], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrInaccessibleMemOrArgMemOnly]>;
+let TargetPrefix = "ve" in def int_ve_vl_pfchvnc_ssl : ClangBuiltin<"__builtin_ve_vl_pfchvnc_ssl">, Intrinsic<[], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrInaccessibleMemOrArgMemOnly]>;
+let TargetPrefix = "ve" in def int_ve_vl_lsv_vvss : ClangBuiltin<"__builtin_ve_vl_lsv_vvss">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<i64>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_lvsl_svs : ClangBuiltin<"__builtin_ve_vl_lvsl_svs">, Intrinsic<[LLVMType<i64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_lvsd_svs : ClangBuiltin<"__builtin_ve_vl_lvsd_svs">, Intrinsic<[LLVMType<f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_lvss_svs : ClangBuiltin<"__builtin_ve_vl_lvss_svs">, Intrinsic<[LLVMType<f32>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_lvm_mmss : ClangBuiltin<"__builtin_ve_vl_lvm_mmss">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256i1>, LLVMType<i64>, LLVMType<i64>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_lvm_MMss : ClangBuiltin<"__builtin_ve_vl_lvm_MMss">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v512i1>, LLVMType<i64>, LLVMType<i64>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_svm_sms : ClangBuiltin<"__builtin_ve_vl_svm_sms">, Intrinsic<[LLVMType<i64>], [LLVMType<v256i1>, LLVMType<i64>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_svm_sMs : ClangBuiltin<"__builtin_ve_vl_svm_sMs">, Intrinsic<[LLVMType<i64>], [LLVMType<v512i1>, LLVMType<i64>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vbrdd_vsl : ClangBuiltin<"__builtin_ve_vl_vbrdd_vsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vbrdd_vsvl : ClangBuiltin<"__builtin_ve_vl_vbrdd_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vbrdd_vsmvl : ClangBuiltin<"__builtin_ve_vl_vbrdd_vsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vbrdl_vsl : ClangBuiltin<"__builtin_ve_vl_vbrdl_vsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vbrdl_vsvl : ClangBuiltin<"__builtin_ve_vl_vbrdl_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vbrdl_vsmvl : ClangBuiltin<"__builtin_ve_vl_vbrdl_vsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vbrds_vsl : ClangBuiltin<"__builtin_ve_vl_vbrds_vsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vbrds_vsvl : ClangBuiltin<"__builtin_ve_vl_vbrds_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vbrds_vsmvl : ClangBuiltin<"__builtin_ve_vl_vbrds_vsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vbrdw_vsl : ClangBuiltin<"__builtin_ve_vl_vbrdw_vsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vbrdw_vsvl : ClangBuiltin<"__builtin_ve_vl_vbrdw_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vbrdw_vsmvl : ClangBuiltin<"__builtin_ve_vl_vbrdw_vsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvbrd_vsl : ClangBuiltin<"__builtin_ve_vl_pvbrd_vsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvbrd_vsvl : ClangBuiltin<"__builtin_ve_vl_pvbrd_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvbrd_vsMvl : ClangBuiltin<"__builtin_ve_vl_pvbrd_vsMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmv_vsvl : ClangBuiltin<"__builtin_ve_vl_vmv_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmv_vsvvl : ClangBuiltin<"__builtin_ve_vl_vmv_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmv_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vmv_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddul_vvvl : ClangBuiltin<"__builtin_ve_vl_vaddul_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddul_vvvvl : ClangBuiltin<"__builtin_ve_vl_vaddul_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddul_vsvl : ClangBuiltin<"__builtin_ve_vl_vaddul_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddul_vsvvl : ClangBuiltin<"__builtin_ve_vl_vaddul_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddul_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vaddul_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddul_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vaddul_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vadduw_vvvl : ClangBuiltin<"__builtin_ve_vl_vadduw_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vadduw_vvvvl : ClangBuiltin<"__builtin_ve_vl_vadduw_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vadduw_vsvl : ClangBuiltin<"__builtin_ve_vl_vadduw_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vadduw_vsvvl : ClangBuiltin<"__builtin_ve_vl_vadduw_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vadduw_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vadduw_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vadduw_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vadduw_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvaddu_vvvl : ClangBuiltin<"__builtin_ve_vl_pvaddu_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvaddu_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvaddu_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvaddu_vsvl : ClangBuiltin<"__builtin_ve_vl_pvaddu_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvaddu_vsvvl : ClangBuiltin<"__builtin_ve_vl_pvaddu_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvaddu_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pvaddu_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvaddu_vsvMvl : ClangBuiltin<"__builtin_ve_vl_pvaddu_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddswsx_vvvl : ClangBuiltin<"__builtin_ve_vl_vaddswsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddswsx_vvvvl : ClangBuiltin<"__builtin_ve_vl_vaddswsx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddswsx_vsvl : ClangBuiltin<"__builtin_ve_vl_vaddswsx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddswsx_vsvvl : ClangBuiltin<"__builtin_ve_vl_vaddswsx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddswsx_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vaddswsx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddswsx_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vaddswsx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddswzx_vvvl : ClangBuiltin<"__builtin_ve_vl_vaddswzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddswzx_vvvvl : ClangBuiltin<"__builtin_ve_vl_vaddswzx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddswzx_vsvl : ClangBuiltin<"__builtin_ve_vl_vaddswzx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddswzx_vsvvl : ClangBuiltin<"__builtin_ve_vl_vaddswzx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddswzx_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vaddswzx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddswzx_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vaddswzx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvadds_vvvl : ClangBuiltin<"__builtin_ve_vl_pvadds_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvadds_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvadds_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvadds_vsvl : ClangBuiltin<"__builtin_ve_vl_pvadds_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvadds_vsvvl : ClangBuiltin<"__builtin_ve_vl_pvadds_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvadds_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pvadds_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvadds_vsvMvl : ClangBuiltin<"__builtin_ve_vl_pvadds_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddsl_vvvl : ClangBuiltin<"__builtin_ve_vl_vaddsl_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddsl_vvvvl : ClangBuiltin<"__builtin_ve_vl_vaddsl_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddsl_vsvl : ClangBuiltin<"__builtin_ve_vl_vaddsl_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddsl_vsvvl : ClangBuiltin<"__builtin_ve_vl_vaddsl_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddsl_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vaddsl_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vaddsl_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vaddsl_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubul_vvvl : ClangBuiltin<"__builtin_ve_vl_vsubul_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubul_vvvvl : ClangBuiltin<"__builtin_ve_vl_vsubul_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubul_vsvl : ClangBuiltin<"__builtin_ve_vl_vsubul_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubul_vsvvl : ClangBuiltin<"__builtin_ve_vl_vsubul_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubul_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vsubul_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubul_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vsubul_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubuw_vvvl : ClangBuiltin<"__builtin_ve_vl_vsubuw_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubuw_vvvvl : ClangBuiltin<"__builtin_ve_vl_vsubuw_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubuw_vsvl : ClangBuiltin<"__builtin_ve_vl_vsubuw_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubuw_vsvvl : ClangBuiltin<"__builtin_ve_vl_vsubuw_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubuw_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vsubuw_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubuw_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vsubuw_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsubu_vvvl : ClangBuiltin<"__builtin_ve_vl_pvsubu_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsubu_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvsubu_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsubu_vsvl : ClangBuiltin<"__builtin_ve_vl_pvsubu_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsubu_vsvvl : ClangBuiltin<"__builtin_ve_vl_pvsubu_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsubu_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pvsubu_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsubu_vsvMvl : ClangBuiltin<"__builtin_ve_vl_pvsubu_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubswsx_vvvl : ClangBuiltin<"__builtin_ve_vl_vsubswsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubswsx_vvvvl : ClangBuiltin<"__builtin_ve_vl_vsubswsx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubswsx_vsvl : ClangBuiltin<"__builtin_ve_vl_vsubswsx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubswsx_vsvvl : ClangBuiltin<"__builtin_ve_vl_vsubswsx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubswsx_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vsubswsx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubswsx_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vsubswsx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubswzx_vvvl : ClangBuiltin<"__builtin_ve_vl_vsubswzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubswzx_vvvvl : ClangBuiltin<"__builtin_ve_vl_vsubswzx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubswzx_vsvl : ClangBuiltin<"__builtin_ve_vl_vsubswzx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubswzx_vsvvl : ClangBuiltin<"__builtin_ve_vl_vsubswzx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubswzx_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vsubswzx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubswzx_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vsubswzx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsubs_vvvl : ClangBuiltin<"__builtin_ve_vl_pvsubs_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsubs_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvsubs_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsubs_vsvl : ClangBuiltin<"__builtin_ve_vl_pvsubs_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsubs_vsvvl : ClangBuiltin<"__builtin_ve_vl_pvsubs_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsubs_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pvsubs_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsubs_vsvMvl : ClangBuiltin<"__builtin_ve_vl_pvsubs_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubsl_vvvl : ClangBuiltin<"__builtin_ve_vl_vsubsl_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubsl_vvvvl : ClangBuiltin<"__builtin_ve_vl_vsubsl_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubsl_vsvl : ClangBuiltin<"__builtin_ve_vl_vsubsl_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubsl_vsvvl : ClangBuiltin<"__builtin_ve_vl_vsubsl_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubsl_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vsubsl_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsubsl_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vsubsl_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulul_vvvl : ClangBuiltin<"__builtin_ve_vl_vmulul_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulul_vvvvl : ClangBuiltin<"__builtin_ve_vl_vmulul_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulul_vsvl : ClangBuiltin<"__builtin_ve_vl_vmulul_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulul_vsvvl : ClangBuiltin<"__builtin_ve_vl_vmulul_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulul_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vmulul_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulul_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vmulul_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmuluw_vvvl : ClangBuiltin<"__builtin_ve_vl_vmuluw_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmuluw_vvvvl : ClangBuiltin<"__builtin_ve_vl_vmuluw_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmuluw_vsvl : ClangBuiltin<"__builtin_ve_vl_vmuluw_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmuluw_vsvvl : ClangBuiltin<"__builtin_ve_vl_vmuluw_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmuluw_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vmuluw_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmuluw_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vmuluw_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulswsx_vvvl : ClangBuiltin<"__builtin_ve_vl_vmulswsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulswsx_vvvvl : ClangBuiltin<"__builtin_ve_vl_vmulswsx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulswsx_vsvl : ClangBuiltin<"__builtin_ve_vl_vmulswsx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulswsx_vsvvl : ClangBuiltin<"__builtin_ve_vl_vmulswsx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulswsx_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vmulswsx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulswsx_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vmulswsx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulswzx_vvvl : ClangBuiltin<"__builtin_ve_vl_vmulswzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulswzx_vvvvl : ClangBuiltin<"__builtin_ve_vl_vmulswzx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulswzx_vsvl : ClangBuiltin<"__builtin_ve_vl_vmulswzx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulswzx_vsvvl : ClangBuiltin<"__builtin_ve_vl_vmulswzx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulswzx_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vmulswzx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulswzx_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vmulswzx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulsl_vvvl : ClangBuiltin<"__builtin_ve_vl_vmulsl_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulsl_vvvvl : ClangBuiltin<"__builtin_ve_vl_vmulsl_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulsl_vsvl : ClangBuiltin<"__builtin_ve_vl_vmulsl_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulsl_vsvvl : ClangBuiltin<"__builtin_ve_vl_vmulsl_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulsl_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vmulsl_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulsl_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vmulsl_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulslw_vvvl : ClangBuiltin<"__builtin_ve_vl_vmulslw_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulslw_vvvvl : ClangBuiltin<"__builtin_ve_vl_vmulslw_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulslw_vsvl : ClangBuiltin<"__builtin_ve_vl_vmulslw_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmulslw_vsvvl : ClangBuiltin<"__builtin_ve_vl_vmulslw_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivul_vvvl : ClangBuiltin<"__builtin_ve_vl_vdivul_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivul_vvvvl : ClangBuiltin<"__builtin_ve_vl_vdivul_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivul_vsvl : ClangBuiltin<"__builtin_ve_vl_vdivul_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivul_vsvvl : ClangBuiltin<"__builtin_ve_vl_vdivul_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivul_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vdivul_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivul_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vdivul_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vvvl : ClangBuiltin<"__builtin_ve_vl_vdivuw_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vvvvl : ClangBuiltin<"__builtin_ve_vl_vdivuw_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vsvl : ClangBuiltin<"__builtin_ve_vl_vdivuw_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vsvvl : ClangBuiltin<"__builtin_ve_vl_vdivuw_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vdivuw_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vdivuw_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivul_vvsl : ClangBuiltin<"__builtin_ve_vl_vdivul_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivul_vvsvl : ClangBuiltin<"__builtin_ve_vl_vdivul_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivul_vvsmvl : ClangBuiltin<"__builtin_ve_vl_vdivul_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vvsl : ClangBuiltin<"__builtin_ve_vl_vdivuw_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vvsvl : ClangBuiltin<"__builtin_ve_vl_vdivuw_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vvsmvl : ClangBuiltin<"__builtin_ve_vl_vdivuw_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vvvl : ClangBuiltin<"__builtin_ve_vl_vdivswsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vvvvl : ClangBuiltin<"__builtin_ve_vl_vdivswsx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vsvl : ClangBuiltin<"__builtin_ve_vl_vdivswsx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vsvvl : ClangBuiltin<"__builtin_ve_vl_vdivswsx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vdivswsx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vdivswsx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vvvl : ClangBuiltin<"__builtin_ve_vl_vdivswzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vvvvl : ClangBuiltin<"__builtin_ve_vl_vdivswzx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vsvl : ClangBuiltin<"__builtin_ve_vl_vdivswzx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vsvvl : ClangBuiltin<"__builtin_ve_vl_vdivswzx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vdivswzx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vdivswzx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vvsl : ClangBuiltin<"__builtin_ve_vl_vdivswsx_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vvsvl : ClangBuiltin<"__builtin_ve_vl_vdivswsx_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vvsmvl : ClangBuiltin<"__builtin_ve_vl_vdivswsx_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vvsl : ClangBuiltin<"__builtin_ve_vl_vdivswzx_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vvsvl : ClangBuiltin<"__builtin_ve_vl_vdivswzx_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vvsmvl : ClangBuiltin<"__builtin_ve_vl_vdivswzx_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vvvl : ClangBuiltin<"__builtin_ve_vl_vdivsl_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vvvvl : ClangBuiltin<"__builtin_ve_vl_vdivsl_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vsvl : ClangBuiltin<"__builtin_ve_vl_vdivsl_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vsvvl : ClangBuiltin<"__builtin_ve_vl_vdivsl_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vdivsl_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vdivsl_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vvsl : ClangBuiltin<"__builtin_ve_vl_vdivsl_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vvsvl : ClangBuiltin<"__builtin_ve_vl_vdivsl_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vvsmvl : ClangBuiltin<"__builtin_ve_vl_vdivsl_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpul_vvvl : ClangBuiltin<"__builtin_ve_vl_vcmpul_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpul_vvvvl : ClangBuiltin<"__builtin_ve_vl_vcmpul_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpul_vsvl : ClangBuiltin<"__builtin_ve_vl_vcmpul_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpul_vsvvl : ClangBuiltin<"__builtin_ve_vl_vcmpul_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpul_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vcmpul_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpul_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vcmpul_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpuw_vvvl : ClangBuiltin<"__builtin_ve_vl_vcmpuw_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpuw_vvvvl : ClangBuiltin<"__builtin_ve_vl_vcmpuw_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpuw_vsvl : ClangBuiltin<"__builtin_ve_vl_vcmpuw_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpuw_vsvvl : ClangBuiltin<"__builtin_ve_vl_vcmpuw_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpuw_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vcmpuw_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpuw_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vcmpuw_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcmpu_vvvl : ClangBuiltin<"__builtin_ve_vl_pvcmpu_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcmpu_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvcmpu_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcmpu_vsvl : ClangBuiltin<"__builtin_ve_vl_pvcmpu_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcmpu_vsvvl : ClangBuiltin<"__builtin_ve_vl_pvcmpu_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcmpu_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pvcmpu_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcmpu_vsvMvl : ClangBuiltin<"__builtin_ve_vl_pvcmpu_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpswsx_vvvl : ClangBuiltin<"__builtin_ve_vl_vcmpswsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpswsx_vvvvl : ClangBuiltin<"__builtin_ve_vl_vcmpswsx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpswsx_vsvl : ClangBuiltin<"__builtin_ve_vl_vcmpswsx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpswsx_vsvvl : ClangBuiltin<"__builtin_ve_vl_vcmpswsx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpswsx_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vcmpswsx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpswsx_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vcmpswsx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpswzx_vvvl : ClangBuiltin<"__builtin_ve_vl_vcmpswzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpswzx_vvvvl : ClangBuiltin<"__builtin_ve_vl_vcmpswzx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpswzx_vsvl : ClangBuiltin<"__builtin_ve_vl_vcmpswzx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpswzx_vsvvl : ClangBuiltin<"__builtin_ve_vl_vcmpswzx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpswzx_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vcmpswzx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpswzx_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vcmpswzx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcmps_vvvl : ClangBuiltin<"__builtin_ve_vl_pvcmps_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcmps_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvcmps_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcmps_vsvl : ClangBuiltin<"__builtin_ve_vl_pvcmps_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcmps_vsvvl : ClangBuiltin<"__builtin_ve_vl_pvcmps_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcmps_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pvcmps_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcmps_vsvMvl : ClangBuiltin<"__builtin_ve_vl_pvcmps_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpsl_vvvl : ClangBuiltin<"__builtin_ve_vl_vcmpsl_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpsl_vvvvl : ClangBuiltin<"__builtin_ve_vl_vcmpsl_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpsl_vsvl : ClangBuiltin<"__builtin_ve_vl_vcmpsl_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpsl_vsvvl : ClangBuiltin<"__builtin_ve_vl_vcmpsl_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpsl_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vcmpsl_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcmpsl_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vcmpsl_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmaxswsx_vvvl : ClangBuiltin<"__builtin_ve_vl_vmaxswsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmaxswsx_vvvvl : ClangBuiltin<"__builtin_ve_vl_vmaxswsx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmaxswsx_vsvl : ClangBuiltin<"__builtin_ve_vl_vmaxswsx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmaxswsx_vsvvl : ClangBuiltin<"__builtin_ve_vl_vmaxswsx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmaxswsx_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vmaxswsx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmaxswsx_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vmaxswsx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmaxswzx_vvvl : ClangBuiltin<"__builtin_ve_vl_vmaxswzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmaxswzx_vvvvl : ClangBuiltin<"__builtin_ve_vl_vmaxswzx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmaxswzx_vsvl : ClangBuiltin<"__builtin_ve_vl_vmaxswzx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmaxswzx_vsvvl : ClangBuiltin<"__builtin_ve_vl_vmaxswzx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmaxswzx_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vmaxswzx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmaxswzx_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vmaxswzx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvmaxs_vvvl : ClangBuiltin<"__builtin_ve_vl_pvmaxs_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvmaxs_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvmaxs_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvmaxs_vsvl : ClangBuiltin<"__builtin_ve_vl_pvmaxs_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvmaxs_vsvvl : ClangBuiltin<"__builtin_ve_vl_pvmaxs_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvmaxs_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pvmaxs_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvmaxs_vsvMvl : ClangBuiltin<"__builtin_ve_vl_pvmaxs_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vminswsx_vvvl : ClangBuiltin<"__builtin_ve_vl_vminswsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vminswsx_vvvvl : ClangBuiltin<"__builtin_ve_vl_vminswsx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vminswsx_vsvl : ClangBuiltin<"__builtin_ve_vl_vminswsx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vminswsx_vsvvl : ClangBuiltin<"__builtin_ve_vl_vminswsx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vminswsx_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vminswsx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vminswsx_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vminswsx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vminswzx_vvvl : ClangBuiltin<"__builtin_ve_vl_vminswzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vminswzx_vvvvl : ClangBuiltin<"__builtin_ve_vl_vminswzx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vminswzx_vsvl : ClangBuiltin<"__builtin_ve_vl_vminswzx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vminswzx_vsvvl : ClangBuiltin<"__builtin_ve_vl_vminswzx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vminswzx_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vminswzx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vminswzx_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vminswzx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvmins_vvvl : ClangBuiltin<"__builtin_ve_vl_pvmins_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvmins_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvmins_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvmins_vsvl : ClangBuiltin<"__builtin_ve_vl_pvmins_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvmins_vsvvl : ClangBuiltin<"__builtin_ve_vl_pvmins_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvmins_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pvmins_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvmins_vsvMvl : ClangBuiltin<"__builtin_ve_vl_pvmins_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmaxsl_vvvl : ClangBuiltin<"__builtin_ve_vl_vmaxsl_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmaxsl_vvvvl : ClangBuiltin<"__builtin_ve_vl_vmaxsl_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmaxsl_vsvl : ClangBuiltin<"__builtin_ve_vl_vmaxsl_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmaxsl_vsvvl : ClangBuiltin<"__builtin_ve_vl_vmaxsl_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmaxsl_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vmaxsl_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmaxsl_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vmaxsl_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vminsl_vvvl : ClangBuiltin<"__builtin_ve_vl_vminsl_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vminsl_vvvvl : ClangBuiltin<"__builtin_ve_vl_vminsl_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vminsl_vsvl : ClangBuiltin<"__builtin_ve_vl_vminsl_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vminsl_vsvvl : ClangBuiltin<"__builtin_ve_vl_vminsl_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vminsl_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vminsl_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vminsl_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vminsl_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vand_vvvl : ClangBuiltin<"__builtin_ve_vl_vand_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vand_vvvvl : ClangBuiltin<"__builtin_ve_vl_vand_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vand_vsvl : ClangBuiltin<"__builtin_ve_vl_vand_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vand_vsvvl : ClangBuiltin<"__builtin_ve_vl_vand_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vand_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vand_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vand_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vand_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvand_vvvl : ClangBuiltin<"__builtin_ve_vl_pvand_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvand_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvand_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvand_vsvl : ClangBuiltin<"__builtin_ve_vl_pvand_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvand_vsvvl : ClangBuiltin<"__builtin_ve_vl_pvand_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvand_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pvand_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvand_vsvMvl : ClangBuiltin<"__builtin_ve_vl_pvand_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vor_vvvl : ClangBuiltin<"__builtin_ve_vl_vor_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vor_vvvvl : ClangBuiltin<"__builtin_ve_vl_vor_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vor_vsvl : ClangBuiltin<"__builtin_ve_vl_vor_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vor_vsvvl : ClangBuiltin<"__builtin_ve_vl_vor_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vor_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vor_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vor_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vor_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvor_vvvl : ClangBuiltin<"__builtin_ve_vl_pvor_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvor_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvor_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvor_vsvl : ClangBuiltin<"__builtin_ve_vl_pvor_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvor_vsvvl : ClangBuiltin<"__builtin_ve_vl_pvor_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvor_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pvor_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvor_vsvMvl : ClangBuiltin<"__builtin_ve_vl_pvor_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vxor_vvvl : ClangBuiltin<"__builtin_ve_vl_vxor_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vxor_vvvvl : ClangBuiltin<"__builtin_ve_vl_vxor_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vxor_vsvl : ClangBuiltin<"__builtin_ve_vl_vxor_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vxor_vsvvl : ClangBuiltin<"__builtin_ve_vl_vxor_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vxor_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vxor_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vxor_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vxor_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvxor_vvvl : ClangBuiltin<"__builtin_ve_vl_pvxor_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvxor_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvxor_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvxor_vsvl : ClangBuiltin<"__builtin_ve_vl_pvxor_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvxor_vsvvl : ClangBuiltin<"__builtin_ve_vl_pvxor_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvxor_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pvxor_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvxor_vsvMvl : ClangBuiltin<"__builtin_ve_vl_pvxor_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_veqv_vvvl : ClangBuiltin<"__builtin_ve_vl_veqv_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_veqv_vvvvl : ClangBuiltin<"__builtin_ve_vl_veqv_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_veqv_vsvl : ClangBuiltin<"__builtin_ve_vl_veqv_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_veqv_vsvvl : ClangBuiltin<"__builtin_ve_vl_veqv_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_veqv_vvvmvl : ClangBuiltin<"__builtin_ve_vl_veqv_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_veqv_vsvmvl : ClangBuiltin<"__builtin_ve_vl_veqv_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pveqv_vvvl : ClangBuiltin<"__builtin_ve_vl_pveqv_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pveqv_vvvvl : ClangBuiltin<"__builtin_ve_vl_pveqv_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pveqv_vsvl : ClangBuiltin<"__builtin_ve_vl_pveqv_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pveqv_vsvvl : ClangBuiltin<"__builtin_ve_vl_pveqv_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pveqv_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pveqv_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pveqv_vsvMvl : ClangBuiltin<"__builtin_ve_vl_pveqv_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldz_vvl : ClangBuiltin<"__builtin_ve_vl_vldz_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldz_vvvl : ClangBuiltin<"__builtin_ve_vl_vldz_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vldz_vvmvl : ClangBuiltin<"__builtin_ve_vl_vldz_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvldzlo_vvl : ClangBuiltin<"__builtin_ve_vl_pvldzlo_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvldzlo_vvvl : ClangBuiltin<"__builtin_ve_vl_pvldzlo_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvldzlo_vvmvl : ClangBuiltin<"__builtin_ve_vl_pvldzlo_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvldzup_vvl : ClangBuiltin<"__builtin_ve_vl_pvldzup_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvldzup_vvvl : ClangBuiltin<"__builtin_ve_vl_pvldzup_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvldzup_vvmvl : ClangBuiltin<"__builtin_ve_vl_pvldzup_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvldz_vvl : ClangBuiltin<"__builtin_ve_vl_pvldz_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvldz_vvvl : ClangBuiltin<"__builtin_ve_vl_pvldz_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvldz_vvMvl : ClangBuiltin<"__builtin_ve_vl_pvldz_vvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vpcnt_vvl : ClangBuiltin<"__builtin_ve_vl_vpcnt_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vpcnt_vvvl : ClangBuiltin<"__builtin_ve_vl_vpcnt_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vpcnt_vvmvl : ClangBuiltin<"__builtin_ve_vl_vpcnt_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvpcntlo_vvl : ClangBuiltin<"__builtin_ve_vl_pvpcntlo_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvpcntlo_vvvl : ClangBuiltin<"__builtin_ve_vl_pvpcntlo_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvpcntlo_vvmvl : ClangBuiltin<"__builtin_ve_vl_pvpcntlo_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvpcntup_vvl : ClangBuiltin<"__builtin_ve_vl_pvpcntup_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvpcntup_vvvl : ClangBuiltin<"__builtin_ve_vl_pvpcntup_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvpcntup_vvmvl : ClangBuiltin<"__builtin_ve_vl_pvpcntup_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvpcnt_vvl : ClangBuiltin<"__builtin_ve_vl_pvpcnt_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvpcnt_vvvl : ClangBuiltin<"__builtin_ve_vl_pvpcnt_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvpcnt_vvMvl : ClangBuiltin<"__builtin_ve_vl_pvpcnt_vvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vbrv_vvl : ClangBuiltin<"__builtin_ve_vl_vbrv_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vbrv_vvvl : ClangBuiltin<"__builtin_ve_vl_vbrv_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vbrv_vvmvl : ClangBuiltin<"__builtin_ve_vl_vbrv_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvbrvlo_vvl : ClangBuiltin<"__builtin_ve_vl_pvbrvlo_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvbrvlo_vvvl : ClangBuiltin<"__builtin_ve_vl_pvbrvlo_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvbrvlo_vvmvl : ClangBuiltin<"__builtin_ve_vl_pvbrvlo_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvbrvup_vvl : ClangBuiltin<"__builtin_ve_vl_pvbrvup_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvbrvup_vvvl : ClangBuiltin<"__builtin_ve_vl_pvbrvup_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvbrvup_vvmvl : ClangBuiltin<"__builtin_ve_vl_pvbrvup_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvbrv_vvl : ClangBuiltin<"__builtin_ve_vl_pvbrv_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvbrv_vvvl : ClangBuiltin<"__builtin_ve_vl_pvbrv_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvbrv_vvMvl : ClangBuiltin<"__builtin_ve_vl_pvbrv_vvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vseq_vl : ClangBuiltin<"__builtin_ve_vl_vseq_vl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vseq_vvl : ClangBuiltin<"__builtin_ve_vl_vseq_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvseqlo_vl : ClangBuiltin<"__builtin_ve_vl_pvseqlo_vl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvseqlo_vvl : ClangBuiltin<"__builtin_ve_vl_pvseqlo_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsequp_vl : ClangBuiltin<"__builtin_ve_vl_pvsequp_vl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsequp_vvl : ClangBuiltin<"__builtin_ve_vl_pvsequp_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvseq_vl : ClangBuiltin<"__builtin_ve_vl_pvseq_vl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvseq_vvl : ClangBuiltin<"__builtin_ve_vl_pvseq_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsll_vvvl : ClangBuiltin<"__builtin_ve_vl_vsll_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsll_vvvvl : ClangBuiltin<"__builtin_ve_vl_vsll_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsll_vvsl : ClangBuiltin<"__builtin_ve_vl_vsll_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsll_vvsvl : ClangBuiltin<"__builtin_ve_vl_vsll_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsll_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vsll_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsll_vvsmvl : ClangBuiltin<"__builtin_ve_vl_vsll_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsll_vvvl : ClangBuiltin<"__builtin_ve_vl_pvsll_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsll_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvsll_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsll_vvsl : ClangBuiltin<"__builtin_ve_vl_pvsll_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsll_vvsvl : ClangBuiltin<"__builtin_ve_vl_pvsll_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsll_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pvsll_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsll_vvsMvl : ClangBuiltin<"__builtin_ve_vl_pvsll_vvsMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsrl_vvvl : ClangBuiltin<"__builtin_ve_vl_vsrl_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsrl_vvvvl : ClangBuiltin<"__builtin_ve_vl_vsrl_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsrl_vvsl : ClangBuiltin<"__builtin_ve_vl_vsrl_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsrl_vvsvl : ClangBuiltin<"__builtin_ve_vl_vsrl_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsrl_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vsrl_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsrl_vvsmvl : ClangBuiltin<"__builtin_ve_vl_vsrl_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsrl_vvvl : ClangBuiltin<"__builtin_ve_vl_pvsrl_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsrl_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvsrl_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsrl_vvsl : ClangBuiltin<"__builtin_ve_vl_pvsrl_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsrl_vvsvl : ClangBuiltin<"__builtin_ve_vl_pvsrl_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsrl_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pvsrl_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsrl_vvsMvl : ClangBuiltin<"__builtin_ve_vl_pvsrl_vvsMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vslawsx_vvvl : ClangBuiltin<"__builtin_ve_vl_vslawsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vslawsx_vvvvl : ClangBuiltin<"__builtin_ve_vl_vslawsx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vslawsx_vvsl : ClangBuiltin<"__builtin_ve_vl_vslawsx_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vslawsx_vvsvl : ClangBuiltin<"__builtin_ve_vl_vslawsx_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vslawsx_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vslawsx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vslawsx_vvsmvl : ClangBuiltin<"__builtin_ve_vl_vslawsx_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vslawzx_vvvl : ClangBuiltin<"__builtin_ve_vl_vslawzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vslawzx_vvvvl : ClangBuiltin<"__builtin_ve_vl_vslawzx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vslawzx_vvsl : ClangBuiltin<"__builtin_ve_vl_vslawzx_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vslawzx_vvsvl : ClangBuiltin<"__builtin_ve_vl_vslawzx_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vslawzx_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vslawzx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vslawzx_vvsmvl : ClangBuiltin<"__builtin_ve_vl_vslawzx_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsla_vvvl : ClangBuiltin<"__builtin_ve_vl_pvsla_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsla_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvsla_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsla_vvsl : ClangBuiltin<"__builtin_ve_vl_pvsla_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsla_vvsvl : ClangBuiltin<"__builtin_ve_vl_pvsla_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsla_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pvsla_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsla_vvsMvl : ClangBuiltin<"__builtin_ve_vl_pvsla_vvsMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vslal_vvvl : ClangBuiltin<"__builtin_ve_vl_vslal_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vslal_vvvvl : ClangBuiltin<"__builtin_ve_vl_vslal_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vslal_vvsl : ClangBuiltin<"__builtin_ve_vl_vslal_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vslal_vvsvl : ClangBuiltin<"__builtin_ve_vl_vslal_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vslal_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vslal_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vslal_vvsmvl : ClangBuiltin<"__builtin_ve_vl_vslal_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsrawsx_vvvl : ClangBuiltin<"__builtin_ve_vl_vsrawsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsrawsx_vvvvl : ClangBuiltin<"__builtin_ve_vl_vsrawsx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsrawsx_vvsl : ClangBuiltin<"__builtin_ve_vl_vsrawsx_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsrawsx_vvsvl : ClangBuiltin<"__builtin_ve_vl_vsrawsx_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsrawsx_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vsrawsx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsrawsx_vvsmvl : ClangBuiltin<"__builtin_ve_vl_vsrawsx_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsrawzx_vvvl : ClangBuiltin<"__builtin_ve_vl_vsrawzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsrawzx_vvvvl : ClangBuiltin<"__builtin_ve_vl_vsrawzx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsrawzx_vvsl : ClangBuiltin<"__builtin_ve_vl_vsrawzx_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsrawzx_vvsvl : ClangBuiltin<"__builtin_ve_vl_vsrawzx_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsrawzx_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vsrawzx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsrawzx_vvsmvl : ClangBuiltin<"__builtin_ve_vl_vsrawzx_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsra_vvvl : ClangBuiltin<"__builtin_ve_vl_pvsra_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsra_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvsra_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsra_vvsl : ClangBuiltin<"__builtin_ve_vl_pvsra_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsra_vvsvl : ClangBuiltin<"__builtin_ve_vl_pvsra_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsra_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pvsra_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvsra_vvsMvl : ClangBuiltin<"__builtin_ve_vl_pvsra_vvsMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsral_vvvl : ClangBuiltin<"__builtin_ve_vl_vsral_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsral_vvvvl : ClangBuiltin<"__builtin_ve_vl_vsral_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsral_vvsl : ClangBuiltin<"__builtin_ve_vl_vsral_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsral_vvsvl : ClangBuiltin<"__builtin_ve_vl_vsral_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsral_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vsral_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsral_vvsmvl : ClangBuiltin<"__builtin_ve_vl_vsral_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsfa_vvssl : ClangBuiltin<"__builtin_ve_vl_vsfa_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsfa_vvssvl : ClangBuiltin<"__builtin_ve_vl_vsfa_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsfa_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vsfa_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfaddd_vvvl : ClangBuiltin<"__builtin_ve_vl_vfaddd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfaddd_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfaddd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfaddd_vsvl : ClangBuiltin<"__builtin_ve_vl_vfaddd_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfaddd_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfaddd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfaddd_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vfaddd_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfaddd_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vfaddd_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfadds_vvvl : ClangBuiltin<"__builtin_ve_vl_vfadds_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfadds_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfadds_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfadds_vsvl : ClangBuiltin<"__builtin_ve_vl_vfadds_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfadds_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfadds_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfadds_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vfadds_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfadds_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vfadds_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfadd_vvvl : ClangBuiltin<"__builtin_ve_vl_pvfadd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfadd_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvfadd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfadd_vsvl : ClangBuiltin<"__builtin_ve_vl_pvfadd_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfadd_vsvvl : ClangBuiltin<"__builtin_ve_vl_pvfadd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfadd_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pvfadd_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfadd_vsvMvl : ClangBuiltin<"__builtin_ve_vl_pvfadd_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsubd_vvvl : ClangBuiltin<"__builtin_ve_vl_vfsubd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsubd_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfsubd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsubd_vsvl : ClangBuiltin<"__builtin_ve_vl_vfsubd_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsubd_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfsubd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsubd_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vfsubd_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsubd_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vfsubd_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsubs_vvvl : ClangBuiltin<"__builtin_ve_vl_vfsubs_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsubs_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfsubs_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsubs_vsvl : ClangBuiltin<"__builtin_ve_vl_vfsubs_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsubs_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfsubs_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsubs_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vfsubs_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsubs_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vfsubs_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfsub_vvvl : ClangBuiltin<"__builtin_ve_vl_pvfsub_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfsub_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvfsub_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfsub_vsvl : ClangBuiltin<"__builtin_ve_vl_pvfsub_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfsub_vsvvl : ClangBuiltin<"__builtin_ve_vl_pvfsub_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfsub_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pvfsub_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfsub_vsvMvl : ClangBuiltin<"__builtin_ve_vl_pvfsub_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmuld_vvvl : ClangBuiltin<"__builtin_ve_vl_vfmuld_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmuld_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfmuld_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmuld_vsvl : ClangBuiltin<"__builtin_ve_vl_vfmuld_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmuld_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfmuld_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmuld_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vfmuld_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmuld_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vfmuld_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmuls_vvvl : ClangBuiltin<"__builtin_ve_vl_vfmuls_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmuls_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfmuls_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmuls_vsvl : ClangBuiltin<"__builtin_ve_vl_vfmuls_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmuls_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfmuls_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmuls_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vfmuls_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmuls_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vfmuls_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmul_vvvl : ClangBuiltin<"__builtin_ve_vl_pvfmul_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmul_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvfmul_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmul_vsvl : ClangBuiltin<"__builtin_ve_vl_pvfmul_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmul_vsvvl : ClangBuiltin<"__builtin_ve_vl_pvfmul_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmul_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pvfmul_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmul_vsvMvl : ClangBuiltin<"__builtin_ve_vl_pvfmul_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfdivd_vvvl : ClangBuiltin<"__builtin_ve_vl_vfdivd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfdivd_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfdivd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfdivd_vsvl : ClangBuiltin<"__builtin_ve_vl_vfdivd_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfdivd_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfdivd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfdivd_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vfdivd_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfdivd_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vfdivd_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vvvl : ClangBuiltin<"__builtin_ve_vl_vfdivs_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfdivs_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vsvl : ClangBuiltin<"__builtin_ve_vl_vfdivs_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfdivs_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vfdivs_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vfdivs_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsqrtd_vvl : ClangBuiltin<"__builtin_ve_vl_vfsqrtd_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsqrtd_vvvl : ClangBuiltin<"__builtin_ve_vl_vfsqrtd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsqrts_vvl : ClangBuiltin<"__builtin_ve_vl_vfsqrts_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsqrts_vvvl : ClangBuiltin<"__builtin_ve_vl_vfsqrts_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfcmpd_vvvl : ClangBuiltin<"__builtin_ve_vl_vfcmpd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfcmpd_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfcmpd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfcmpd_vsvl : ClangBuiltin<"__builtin_ve_vl_vfcmpd_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfcmpd_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfcmpd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfcmpd_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vfcmpd_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfcmpd_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vfcmpd_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfcmps_vvvl : ClangBuiltin<"__builtin_ve_vl_vfcmps_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfcmps_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfcmps_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfcmps_vsvl : ClangBuiltin<"__builtin_ve_vl_vfcmps_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfcmps_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfcmps_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfcmps_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vfcmps_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfcmps_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vfcmps_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfcmp_vvvl : ClangBuiltin<"__builtin_ve_vl_pvfcmp_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfcmp_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvfcmp_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfcmp_vsvl : ClangBuiltin<"__builtin_ve_vl_pvfcmp_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfcmp_vsvvl : ClangBuiltin<"__builtin_ve_vl_pvfcmp_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfcmp_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pvfcmp_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfcmp_vsvMvl : ClangBuiltin<"__builtin_ve_vl_pvfcmp_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmaxd_vvvl : ClangBuiltin<"__builtin_ve_vl_vfmaxd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmaxd_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfmaxd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmaxd_vsvl : ClangBuiltin<"__builtin_ve_vl_vfmaxd_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmaxd_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfmaxd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmaxd_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vfmaxd_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmaxd_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vfmaxd_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmaxs_vvvl : ClangBuiltin<"__builtin_ve_vl_vfmaxs_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmaxs_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfmaxs_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmaxs_vsvl : ClangBuiltin<"__builtin_ve_vl_vfmaxs_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmaxs_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfmaxs_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmaxs_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vfmaxs_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmaxs_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vfmaxs_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmax_vvvl : ClangBuiltin<"__builtin_ve_vl_pvfmax_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmax_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvfmax_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmax_vsvl : ClangBuiltin<"__builtin_ve_vl_pvfmax_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmax_vsvvl : ClangBuiltin<"__builtin_ve_vl_pvfmax_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmax_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pvfmax_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmax_vsvMvl : ClangBuiltin<"__builtin_ve_vl_pvfmax_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmind_vvvl : ClangBuiltin<"__builtin_ve_vl_vfmind_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmind_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfmind_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmind_vsvl : ClangBuiltin<"__builtin_ve_vl_vfmind_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmind_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfmind_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmind_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vfmind_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmind_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vfmind_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmins_vvvl : ClangBuiltin<"__builtin_ve_vl_vfmins_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmins_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfmins_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmins_vsvl : ClangBuiltin<"__builtin_ve_vl_vfmins_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmins_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfmins_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmins_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vfmins_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmins_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vfmins_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmin_vvvl : ClangBuiltin<"__builtin_ve_vl_pvfmin_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmin_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvfmin_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmin_vsvl : ClangBuiltin<"__builtin_ve_vl_pvfmin_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmin_vsvvl : ClangBuiltin<"__builtin_ve_vl_pvfmin_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmin_vvvMvl : ClangBuiltin<"__builtin_ve_vl_pvfmin_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmin_vsvMvl : ClangBuiltin<"__builtin_ve_vl_pvfmin_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfmadd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vvvvvl : ClangBuiltin<"__builtin_ve_vl_vfmadd_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfmadd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vsvvvl : ClangBuiltin<"__builtin_ve_vl_vfmadd_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vvsvl : ClangBuiltin<"__builtin_ve_vl_vfmadd_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vvsvvl : ClangBuiltin<"__builtin_ve_vl_vfmadd_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vvvvmvl : ClangBuiltin<"__builtin_ve_vl_vfmadd_vvvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vsvvmvl : ClangBuiltin<"__builtin_ve_vl_vfmadd_vsvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vvsvmvl : ClangBuiltin<"__builtin_ve_vl_vfmadd_vvsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmads_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfmads_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmads_vvvvvl : ClangBuiltin<"__builtin_ve_vl_vfmads_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmads_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfmads_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmads_vsvvvl : ClangBuiltin<"__builtin_ve_vl_vfmads_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmads_vvsvl : ClangBuiltin<"__builtin_ve_vl_vfmads_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmads_vvsvvl : ClangBuiltin<"__builtin_ve_vl_vfmads_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmads_vvvvmvl : ClangBuiltin<"__builtin_ve_vl_vfmads_vvvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmads_vsvvmvl : ClangBuiltin<"__builtin_ve_vl_vfmads_vsvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmads_vvsvmvl : ClangBuiltin<"__builtin_ve_vl_vfmads_vvsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvfmad_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vvvvvl : ClangBuiltin<"__builtin_ve_vl_pvfmad_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vsvvl : ClangBuiltin<"__builtin_ve_vl_pvfmad_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vsvvvl : ClangBuiltin<"__builtin_ve_vl_pvfmad_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vvsvl : ClangBuiltin<"__builtin_ve_vl_pvfmad_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vvsvvl : ClangBuiltin<"__builtin_ve_vl_pvfmad_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vvvvMvl : ClangBuiltin<"__builtin_ve_vl_pvfmad_vvvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vsvvMvl : ClangBuiltin<"__builtin_ve_vl_pvfmad_vsvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vvsvMvl : ClangBuiltin<"__builtin_ve_vl_pvfmad_vvsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfmsbd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vvvvvl : ClangBuiltin<"__builtin_ve_vl_vfmsbd_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfmsbd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vsvvvl : ClangBuiltin<"__builtin_ve_vl_vfmsbd_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vvsvl : ClangBuiltin<"__builtin_ve_vl_vfmsbd_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vvsvvl : ClangBuiltin<"__builtin_ve_vl_vfmsbd_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vvvvmvl : ClangBuiltin<"__builtin_ve_vl_vfmsbd_vvvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vsvvmvl : ClangBuiltin<"__builtin_ve_vl_vfmsbd_vsvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vvsvmvl : ClangBuiltin<"__builtin_ve_vl_vfmsbd_vvsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfmsbs_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vvvvvl : ClangBuiltin<"__builtin_ve_vl_vfmsbs_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfmsbs_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vsvvvl : ClangBuiltin<"__builtin_ve_vl_vfmsbs_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vvsvl : ClangBuiltin<"__builtin_ve_vl_vfmsbs_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vvsvvl : ClangBuiltin<"__builtin_ve_vl_vfmsbs_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vvvvmvl : ClangBuiltin<"__builtin_ve_vl_vfmsbs_vvvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vsvvmvl : ClangBuiltin<"__builtin_ve_vl_vfmsbs_vsvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vvsvmvl : ClangBuiltin<"__builtin_ve_vl_vfmsbs_vvsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvfmsb_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vvvvvl : ClangBuiltin<"__builtin_ve_vl_pvfmsb_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vsvvl : ClangBuiltin<"__builtin_ve_vl_pvfmsb_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vsvvvl : ClangBuiltin<"__builtin_ve_vl_pvfmsb_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vvsvl : ClangBuiltin<"__builtin_ve_vl_pvfmsb_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vvsvvl : ClangBuiltin<"__builtin_ve_vl_pvfmsb_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vvvvMvl : ClangBuiltin<"__builtin_ve_vl_pvfmsb_vvvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vsvvMvl : ClangBuiltin<"__builtin_ve_vl_pvfmsb_vsvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vvsvMvl : ClangBuiltin<"__builtin_ve_vl_pvfmsb_vvsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfnmadd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vvvvvl : ClangBuiltin<"__builtin_ve_vl_vfnmadd_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfnmadd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vsvvvl : ClangBuiltin<"__builtin_ve_vl_vfnmadd_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vvsvl : ClangBuiltin<"__builtin_ve_vl_vfnmadd_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vvsvvl : ClangBuiltin<"__builtin_ve_vl_vfnmadd_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vvvvmvl : ClangBuiltin<"__builtin_ve_vl_vfnmadd_vvvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vsvvmvl : ClangBuiltin<"__builtin_ve_vl_vfnmadd_vsvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vvsvmvl : ClangBuiltin<"__builtin_ve_vl_vfnmadd_vvsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfnmads_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vvvvvl : ClangBuiltin<"__builtin_ve_vl_vfnmads_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfnmads_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vsvvvl : ClangBuiltin<"__builtin_ve_vl_vfnmads_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vvsvl : ClangBuiltin<"__builtin_ve_vl_vfnmads_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vvsvvl : ClangBuiltin<"__builtin_ve_vl_vfnmads_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vvvvmvl : ClangBuiltin<"__builtin_ve_vl_vfnmads_vvvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vsvvmvl : ClangBuiltin<"__builtin_ve_vl_vfnmads_vsvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vvsvmvl : ClangBuiltin<"__builtin_ve_vl_vfnmads_vvsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvfnmad_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vvvvvl : ClangBuiltin<"__builtin_ve_vl_pvfnmad_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vsvvl : ClangBuiltin<"__builtin_ve_vl_pvfnmad_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vsvvvl : ClangBuiltin<"__builtin_ve_vl_pvfnmad_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vvsvl : ClangBuiltin<"__builtin_ve_vl_pvfnmad_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vvsvvl : ClangBuiltin<"__builtin_ve_vl_pvfnmad_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vvvvMvl : ClangBuiltin<"__builtin_ve_vl_pvfnmad_vvvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vsvvMvl : ClangBuiltin<"__builtin_ve_vl_pvfnmad_vsvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vvsvMvl : ClangBuiltin<"__builtin_ve_vl_pvfnmad_vvsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfnmsbd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vvvvvl : ClangBuiltin<"__builtin_ve_vl_vfnmsbd_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfnmsbd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vsvvvl : ClangBuiltin<"__builtin_ve_vl_vfnmsbd_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vvsvl : ClangBuiltin<"__builtin_ve_vl_vfnmsbd_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vvsvvl : ClangBuiltin<"__builtin_ve_vl_vfnmsbd_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vvvvmvl : ClangBuiltin<"__builtin_ve_vl_vfnmsbd_vvvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vsvvmvl : ClangBuiltin<"__builtin_ve_vl_vfnmsbd_vsvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vvsvmvl : ClangBuiltin<"__builtin_ve_vl_vfnmsbd_vvsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vvvvl : ClangBuiltin<"__builtin_ve_vl_vfnmsbs_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vvvvvl : ClangBuiltin<"__builtin_ve_vl_vfnmsbs_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vsvvl : ClangBuiltin<"__builtin_ve_vl_vfnmsbs_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vsvvvl : ClangBuiltin<"__builtin_ve_vl_vfnmsbs_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vvsvl : ClangBuiltin<"__builtin_ve_vl_vfnmsbs_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vvsvvl : ClangBuiltin<"__builtin_ve_vl_vfnmsbs_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vvvvmvl : ClangBuiltin<"__builtin_ve_vl_vfnmsbs_vvvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vsvvmvl : ClangBuiltin<"__builtin_ve_vl_vfnmsbs_vsvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vvsvmvl : ClangBuiltin<"__builtin_ve_vl_vfnmsbs_vvsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vvvvl : ClangBuiltin<"__builtin_ve_vl_pvfnmsb_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vvvvvl : ClangBuiltin<"__builtin_ve_vl_pvfnmsb_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vsvvl : ClangBuiltin<"__builtin_ve_vl_pvfnmsb_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vsvvvl : ClangBuiltin<"__builtin_ve_vl_pvfnmsb_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vvsvl : ClangBuiltin<"__builtin_ve_vl_pvfnmsb_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vvsvvl : ClangBuiltin<"__builtin_ve_vl_pvfnmsb_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vvvvMvl : ClangBuiltin<"__builtin_ve_vl_pvfnmsb_vvvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vsvvMvl : ClangBuiltin<"__builtin_ve_vl_pvfnmsb_vsvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vvsvMvl : ClangBuiltin<"__builtin_ve_vl_pvfnmsb_vvsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrcpd_vvl : ClangBuiltin<"__builtin_ve_vl_vrcpd_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrcpd_vvvl : ClangBuiltin<"__builtin_ve_vl_vrcpd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrcps_vvl : ClangBuiltin<"__builtin_ve_vl_vrcps_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrcps_vvvl : ClangBuiltin<"__builtin_ve_vl_vrcps_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvrcp_vvl : ClangBuiltin<"__builtin_ve_vl_pvrcp_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvrcp_vvvl : ClangBuiltin<"__builtin_ve_vl_pvrcp_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrsqrtd_vvl : ClangBuiltin<"__builtin_ve_vl_vrsqrtd_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrsqrtd_vvvl : ClangBuiltin<"__builtin_ve_vl_vrsqrtd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrsqrts_vvl : ClangBuiltin<"__builtin_ve_vl_vrsqrts_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrsqrts_vvvl : ClangBuiltin<"__builtin_ve_vl_vrsqrts_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvrsqrt_vvl : ClangBuiltin<"__builtin_ve_vl_pvrsqrt_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvrsqrt_vvvl : ClangBuiltin<"__builtin_ve_vl_pvrsqrt_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrsqrtdnex_vvl : ClangBuiltin<"__builtin_ve_vl_vrsqrtdnex_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrsqrtdnex_vvvl : ClangBuiltin<"__builtin_ve_vl_vrsqrtdnex_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrsqrtsnex_vvl : ClangBuiltin<"__builtin_ve_vl_vrsqrtsnex_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrsqrtsnex_vvvl : ClangBuiltin<"__builtin_ve_vl_vrsqrtsnex_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvrsqrtnex_vvl : ClangBuiltin<"__builtin_ve_vl_pvrsqrtnex_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvrsqrtnex_vvvl : ClangBuiltin<"__builtin_ve_vl_pvrsqrtnex_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwdsx_vvl : ClangBuiltin<"__builtin_ve_vl_vcvtwdsx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwdsx_vvvl : ClangBuiltin<"__builtin_ve_vl_vcvtwdsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwdsx_vvmvl : ClangBuiltin<"__builtin_ve_vl_vcvtwdsx_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwdsxrz_vvl : ClangBuiltin<"__builtin_ve_vl_vcvtwdsxrz_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwdsxrz_vvvl : ClangBuiltin<"__builtin_ve_vl_vcvtwdsxrz_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwdsxrz_vvmvl : ClangBuiltin<"__builtin_ve_vl_vcvtwdsxrz_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwdzx_vvl : ClangBuiltin<"__builtin_ve_vl_vcvtwdzx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwdzx_vvvl : ClangBuiltin<"__builtin_ve_vl_vcvtwdzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwdzx_vvmvl : ClangBuiltin<"__builtin_ve_vl_vcvtwdzx_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwdzxrz_vvl : ClangBuiltin<"__builtin_ve_vl_vcvtwdzxrz_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwdzxrz_vvvl : ClangBuiltin<"__builtin_ve_vl_vcvtwdzxrz_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwdzxrz_vvmvl : ClangBuiltin<"__builtin_ve_vl_vcvtwdzxrz_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwssx_vvl : ClangBuiltin<"__builtin_ve_vl_vcvtwssx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwssx_vvvl : ClangBuiltin<"__builtin_ve_vl_vcvtwssx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwssx_vvmvl : ClangBuiltin<"__builtin_ve_vl_vcvtwssx_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwssxrz_vvl : ClangBuiltin<"__builtin_ve_vl_vcvtwssxrz_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwssxrz_vvvl : ClangBuiltin<"__builtin_ve_vl_vcvtwssxrz_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwssxrz_vvmvl : ClangBuiltin<"__builtin_ve_vl_vcvtwssxrz_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwszx_vvl : ClangBuiltin<"__builtin_ve_vl_vcvtwszx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwszx_vvvl : ClangBuiltin<"__builtin_ve_vl_vcvtwszx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwszx_vvmvl : ClangBuiltin<"__builtin_ve_vl_vcvtwszx_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwszxrz_vvl : ClangBuiltin<"__builtin_ve_vl_vcvtwszxrz_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwszxrz_vvvl : ClangBuiltin<"__builtin_ve_vl_vcvtwszxrz_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwszxrz_vvmvl : ClangBuiltin<"__builtin_ve_vl_vcvtwszxrz_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcvtws_vvl : ClangBuiltin<"__builtin_ve_vl_pvcvtws_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcvtws_vvvl : ClangBuiltin<"__builtin_ve_vl_pvcvtws_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcvtws_vvMvl : ClangBuiltin<"__builtin_ve_vl_pvcvtws_vvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcvtwsrz_vvl : ClangBuiltin<"__builtin_ve_vl_pvcvtwsrz_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcvtwsrz_vvvl : ClangBuiltin<"__builtin_ve_vl_pvcvtwsrz_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcvtwsrz_vvMvl : ClangBuiltin<"__builtin_ve_vl_pvcvtwsrz_vvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtld_vvl : ClangBuiltin<"__builtin_ve_vl_vcvtld_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtld_vvvl : ClangBuiltin<"__builtin_ve_vl_vcvtld_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtld_vvmvl : ClangBuiltin<"__builtin_ve_vl_vcvtld_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtldrz_vvl : ClangBuiltin<"__builtin_ve_vl_vcvtldrz_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtldrz_vvvl : ClangBuiltin<"__builtin_ve_vl_vcvtldrz_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtldrz_vvmvl : ClangBuiltin<"__builtin_ve_vl_vcvtldrz_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtdw_vvl : ClangBuiltin<"__builtin_ve_vl_vcvtdw_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtdw_vvvl : ClangBuiltin<"__builtin_ve_vl_vcvtdw_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtsw_vvl : ClangBuiltin<"__builtin_ve_vl_vcvtsw_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtsw_vvvl : ClangBuiltin<"__builtin_ve_vl_vcvtsw_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcvtsw_vvl : ClangBuiltin<"__builtin_ve_vl_pvcvtsw_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcvtsw_vvvl : ClangBuiltin<"__builtin_ve_vl_pvcvtsw_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtdl_vvl : ClangBuiltin<"__builtin_ve_vl_vcvtdl_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtdl_vvvl : ClangBuiltin<"__builtin_ve_vl_vcvtdl_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtds_vvl : ClangBuiltin<"__builtin_ve_vl_vcvtds_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtds_vvvl : ClangBuiltin<"__builtin_ve_vl_vcvtds_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtsd_vvl : ClangBuiltin<"__builtin_ve_vl_vcvtsd_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtsd_vvvl : ClangBuiltin<"__builtin_ve_vl_vcvtsd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmrg_vvvml : ClangBuiltin<"__builtin_ve_vl_vmrg_vvvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmrg_vvvmvl : ClangBuiltin<"__builtin_ve_vl_vmrg_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmrg_vsvml : ClangBuiltin<"__builtin_ve_vl_vmrg_vsvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmrg_vsvmvl : ClangBuiltin<"__builtin_ve_vl_vmrg_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmrgw_vvvMl : ClangBuiltin<"__builtin_ve_vl_vmrgw_vvvMl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmrgw_vvvMvl : ClangBuiltin<"__builtin_ve_vl_vmrgw_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmrgw_vsvMl : ClangBuiltin<"__builtin_ve_vl_vmrgw_vsvMl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmrgw_vsvMvl : ClangBuiltin<"__builtin_ve_vl_vmrgw_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vshf_vvvsl : ClangBuiltin<"__builtin_ve_vl_vshf_vvvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vshf_vvvsvl : ClangBuiltin<"__builtin_ve_vl_vshf_vvvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcp_vvmvl : ClangBuiltin<"__builtin_ve_vl_vcp_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vex_vvmvl : ClangBuiltin<"__builtin_ve_vl_vex_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklat_ml : ClangBuiltin<"__builtin_ve_vl_vfmklat_ml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklaf_ml : ClangBuiltin<"__builtin_ve_vl_vfmklaf_ml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkat_Ml : ClangBuiltin<"__builtin_ve_vl_pvfmkat_Ml">, Intrinsic<[LLVMType<v512i1>], [LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkaf_Ml : ClangBuiltin<"__builtin_ve_vl_pvfmkaf_Ml">, Intrinsic<[LLVMType<v512i1>], [LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklgt_mvl : ClangBuiltin<"__builtin_ve_vl_vfmklgt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklgt_mvml : ClangBuiltin<"__builtin_ve_vl_vfmklgt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkllt_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkllt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkllt_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkllt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklne_mvl : ClangBuiltin<"__builtin_ve_vl_vfmklne_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklne_mvml : ClangBuiltin<"__builtin_ve_vl_vfmklne_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkleq_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkleq_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkleq_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkleq_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklge_mvl : ClangBuiltin<"__builtin_ve_vl_vfmklge_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklge_mvml : ClangBuiltin<"__builtin_ve_vl_vfmklge_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklle_mvl : ClangBuiltin<"__builtin_ve_vl_vfmklle_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklle_mvml : ClangBuiltin<"__builtin_ve_vl_vfmklle_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklnum_mvl : ClangBuiltin<"__builtin_ve_vl_vfmklnum_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklnum_mvml : ClangBuiltin<"__builtin_ve_vl_vfmklnum_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklnan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmklnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklnan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmklnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklgtnan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmklgtnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklgtnan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmklgtnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklltnan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmklltnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklltnan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmklltnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklnenan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmklnenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklnenan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmklnenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkleqnan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkleqnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkleqnan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkleqnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklgenan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmklgenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmklgenan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmklgenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkllenan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkllenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkllenan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkllenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwgt_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkwgt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwgt_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkwgt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwlt_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkwlt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwlt_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkwlt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwne_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkwne_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwne_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkwne_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkweq_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkweq_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkweq_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkweq_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwge_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkwge_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwge_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkwge_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwle_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkwle_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwle_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkwle_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwnum_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkwnum_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwnum_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkwnum_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwnan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkwnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwnan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkwnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwgtnan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkwgtnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwgtnan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkwgtnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwltnan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkwltnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwltnan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkwltnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwnenan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkwnenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwnenan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkwnenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkweqnan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkweqnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkweqnan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkweqnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwgenan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkwgenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwgenan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkwgenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwlenan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkwlenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkwlenan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkwlenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlogt_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwlogt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupgt_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwupgt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlogt_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwlogt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupgt_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwupgt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlolt_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwlolt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwuplt_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwuplt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlolt_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwlolt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwuplt_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwuplt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlone_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwlone_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupne_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwupne_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlone_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwlone_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupne_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwupne_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloeq_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwloeq_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupeq_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwupeq_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloeq_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwloeq_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupeq_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwupeq_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloge_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwloge_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupge_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwupge_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloge_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwloge_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupge_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwupge_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlole_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwlole_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwuple_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwuple_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlole_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwlole_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwuple_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwuple_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlonum_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwlonum_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupnum_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwupnum_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlonum_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwlonum_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupnum_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwupnum_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlonan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwlonan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupnan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwupnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlonan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwlonan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupnan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwupnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlogtnan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwlogtnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupgtnan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwupgtnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlogtnan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwlogtnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupgtnan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwupgtnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloltnan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwloltnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupltnan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwupltnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloltnan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwloltnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupltnan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwupltnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlonenan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwlonenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupnenan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwupnenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlonenan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwlonenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupnenan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwupnenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloeqnan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwloeqnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupeqnan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwupeqnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloeqnan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwloeqnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupeqnan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwupeqnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlogenan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwlogenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupgenan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwupgenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlogenan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwlogenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupgenan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwupgenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlolenan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwlolenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwuplenan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwuplenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlolenan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwlolenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwuplenan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkwuplenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwgt_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwgt_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwgt_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmkwgt_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlt_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwlt_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlt_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmkwlt_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwne_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwne_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwne_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmkwne_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkweq_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkweq_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkweq_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmkweq_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwge_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwge_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwge_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmkwge_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwle_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwle_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwle_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmkwle_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwnum_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwnum_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwnum_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmkwnum_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwnan_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwnan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwnan_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmkwnan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwgtnan_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwgtnan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwgtnan_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmkwgtnan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwltnan_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwltnan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwltnan_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmkwltnan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwnenan_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwnenan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwnenan_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmkwnenan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkweqnan_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkweqnan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkweqnan_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmkweqnan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwgenan_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwgenan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwgenan_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmkwgenan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlenan_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkwlenan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlenan_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmkwlenan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdgt_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkdgt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdgt_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkdgt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdlt_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkdlt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdlt_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkdlt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdne_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkdne_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdne_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkdne_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdeq_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkdeq_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdeq_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkdeq_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdge_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkdge_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdge_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkdge_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdle_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkdle_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdle_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkdle_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdnum_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkdnum_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdnum_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkdnum_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdnan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkdnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdnan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkdnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdgtnan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkdgtnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdgtnan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkdgtnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdltnan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkdltnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdltnan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkdltnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdnenan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkdnenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdnenan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkdnenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdeqnan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkdeqnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdeqnan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkdeqnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdgenan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkdgenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdgenan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkdgenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdlenan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkdlenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkdlenan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkdlenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmksgt_mvl : ClangBuiltin<"__builtin_ve_vl_vfmksgt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmksgt_mvml : ClangBuiltin<"__builtin_ve_vl_vfmksgt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkslt_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkslt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkslt_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkslt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmksne_mvl : ClangBuiltin<"__builtin_ve_vl_vfmksne_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmksne_mvml : ClangBuiltin<"__builtin_ve_vl_vfmksne_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkseq_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkseq_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkseq_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkseq_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmksge_mvl : ClangBuiltin<"__builtin_ve_vl_vfmksge_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmksge_mvml : ClangBuiltin<"__builtin_ve_vl_vfmksge_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmksle_mvl : ClangBuiltin<"__builtin_ve_vl_vfmksle_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmksle_mvml : ClangBuiltin<"__builtin_ve_vl_vfmksle_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmksnum_mvl : ClangBuiltin<"__builtin_ve_vl_vfmksnum_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmksnum_mvml : ClangBuiltin<"__builtin_ve_vl_vfmksnum_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmksnan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmksnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmksnan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmksnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmksgtnan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmksgtnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmksgtnan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmksgtnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmksltnan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmksltnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmksltnan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmksltnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmksnenan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmksnenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmksnenan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmksnenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkseqnan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkseqnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkseqnan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkseqnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmksgenan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmksgenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmksgenan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmksgenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkslenan_mvl : ClangBuiltin<"__builtin_ve_vl_vfmkslenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmkslenan_mvml : ClangBuiltin<"__builtin_ve_vl_vfmkslenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslogt_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkslogt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupgt_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksupgt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslogt_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkslogt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupgt_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmksupgt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslolt_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkslolt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksuplt_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksuplt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslolt_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkslolt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksuplt_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmksuplt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslone_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkslone_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupne_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksupne_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslone_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkslone_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupne_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmksupne_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksloeq_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksloeq_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupeq_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksupeq_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksloeq_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmksloeq_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupeq_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmksupeq_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksloge_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksloge_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupge_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksupge_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksloge_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmksloge_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupge_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmksupge_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslole_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkslole_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksuple_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksuple_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslole_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkslole_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksuple_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmksuple_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslonum_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkslonum_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupnum_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksupnum_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslonum_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkslonum_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupnum_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmksupnum_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslonan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkslonan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupnan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksupnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslonan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkslonan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupnan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmksupnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslogtnan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkslogtnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupgtnan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksupgtnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslogtnan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkslogtnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupgtnan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmksupgtnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksloltnan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksloltnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupltnan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksupltnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksloltnan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmksloltnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupltnan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmksupltnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslonenan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkslonenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupnenan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksupnenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslonenan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkslonenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupnenan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmksupnenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksloeqnan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksloeqnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupeqnan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksupeqnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksloeqnan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmksloeqnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupeqnan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmksupeqnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslogenan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkslogenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupgenan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksupgenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslogenan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkslogenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksupgenan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmksupgenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslolenan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkslolenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksuplenan_mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksuplenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslolenan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmkslolenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksuplenan_mvml : ClangBuiltin<"__builtin_ve_vl_pvfmksuplenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksgt_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksgt_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksgt_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmksgt_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslt_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkslt_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslt_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmkslt_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksne_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksne_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksne_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmksne_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkseq_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkseq_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkseq_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmkseq_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksge_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksge_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksge_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmksge_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksle_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksle_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksle_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmksle_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksnum_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksnum_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksnum_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmksnum_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksnan_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksnan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksnan_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmksnan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksgtnan_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksgtnan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksgtnan_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmksgtnan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksltnan_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksltnan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksltnan_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmksltnan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksnenan_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksnenan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksnenan_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmksnenan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkseqnan_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkseqnan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkseqnan_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmkseqnan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksgenan_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmksgenan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmksgenan_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmksgenan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslenan_Mvl : ClangBuiltin<"__builtin_ve_vl_pvfmkslenan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmkslenan_MvMl : ClangBuiltin<"__builtin_ve_vl_pvfmkslenan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsumwsx_vvl : ClangBuiltin<"__builtin_ve_vl_vsumwsx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsumwsx_vvml : ClangBuiltin<"__builtin_ve_vl_vsumwsx_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsumwzx_vvl : ClangBuiltin<"__builtin_ve_vl_vsumwzx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsumwzx_vvml : ClangBuiltin<"__builtin_ve_vl_vsumwzx_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsuml_vvl : ClangBuiltin<"__builtin_ve_vl_vsuml_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsuml_vvml : ClangBuiltin<"__builtin_ve_vl_vsuml_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsumd_vvl : ClangBuiltin<"__builtin_ve_vl_vfsumd_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsumd_vvml : ClangBuiltin<"__builtin_ve_vl_vfsumd_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsums_vvl : ClangBuiltin<"__builtin_ve_vl_vfsums_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsums_vvml : ClangBuiltin<"__builtin_ve_vl_vfsums_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrmaxswfstsx_vvl : ClangBuiltin<"__builtin_ve_vl_vrmaxswfstsx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrmaxswfstsx_vvvl : ClangBuiltin<"__builtin_ve_vl_vrmaxswfstsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrmaxswlstsx_vvl : ClangBuiltin<"__builtin_ve_vl_vrmaxswlstsx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrmaxswlstsx_vvvl : ClangBuiltin<"__builtin_ve_vl_vrmaxswlstsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrmaxswfstzx_vvl : ClangBuiltin<"__builtin_ve_vl_vrmaxswfstzx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrmaxswfstzx_vvvl : ClangBuiltin<"__builtin_ve_vl_vrmaxswfstzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrmaxswlstzx_vvl : ClangBuiltin<"__builtin_ve_vl_vrmaxswlstzx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrmaxswlstzx_vvvl : ClangBuiltin<"__builtin_ve_vl_vrmaxswlstzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrminswfstsx_vvl : ClangBuiltin<"__builtin_ve_vl_vrminswfstsx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrminswfstsx_vvvl : ClangBuiltin<"__builtin_ve_vl_vrminswfstsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrminswlstsx_vvl : ClangBuiltin<"__builtin_ve_vl_vrminswlstsx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrminswlstsx_vvvl : ClangBuiltin<"__builtin_ve_vl_vrminswlstsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrminswfstzx_vvl : ClangBuiltin<"__builtin_ve_vl_vrminswfstzx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrminswfstzx_vvvl : ClangBuiltin<"__builtin_ve_vl_vrminswfstzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrminswlstzx_vvl : ClangBuiltin<"__builtin_ve_vl_vrminswlstzx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrminswlstzx_vvvl : ClangBuiltin<"__builtin_ve_vl_vrminswlstzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrmaxslfst_vvl : ClangBuiltin<"__builtin_ve_vl_vrmaxslfst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrmaxslfst_vvvl : ClangBuiltin<"__builtin_ve_vl_vrmaxslfst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrmaxsllst_vvl : ClangBuiltin<"__builtin_ve_vl_vrmaxsllst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrmaxsllst_vvvl : ClangBuiltin<"__builtin_ve_vl_vrmaxsllst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrminslfst_vvl : ClangBuiltin<"__builtin_ve_vl_vrminslfst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrminslfst_vvvl : ClangBuiltin<"__builtin_ve_vl_vrminslfst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrminsllst_vvl : ClangBuiltin<"__builtin_ve_vl_vrminsllst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrminsllst_vvvl : ClangBuiltin<"__builtin_ve_vl_vrminsllst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfrmaxdfst_vvl : ClangBuiltin<"__builtin_ve_vl_vfrmaxdfst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfrmaxdfst_vvvl : ClangBuiltin<"__builtin_ve_vl_vfrmaxdfst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfrmaxdlst_vvl : ClangBuiltin<"__builtin_ve_vl_vfrmaxdlst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfrmaxdlst_vvvl : ClangBuiltin<"__builtin_ve_vl_vfrmaxdlst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfrmaxsfst_vvl : ClangBuiltin<"__builtin_ve_vl_vfrmaxsfst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfrmaxsfst_vvvl : ClangBuiltin<"__builtin_ve_vl_vfrmaxsfst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfrmaxslst_vvl : ClangBuiltin<"__builtin_ve_vl_vfrmaxslst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfrmaxslst_vvvl : ClangBuiltin<"__builtin_ve_vl_vfrmaxslst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfrmindfst_vvl : ClangBuiltin<"__builtin_ve_vl_vfrmindfst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfrmindfst_vvvl : ClangBuiltin<"__builtin_ve_vl_vfrmindfst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfrmindlst_vvl : ClangBuiltin<"__builtin_ve_vl_vfrmindlst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfrmindlst_vvvl : ClangBuiltin<"__builtin_ve_vl_vfrmindlst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfrminsfst_vvl : ClangBuiltin<"__builtin_ve_vl_vfrminsfst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfrminsfst_vvvl : ClangBuiltin<"__builtin_ve_vl_vfrminsfst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfrminslst_vvl : ClangBuiltin<"__builtin_ve_vl_vfrminslst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfrminslst_vvvl : ClangBuiltin<"__builtin_ve_vl_vfrminslst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrand_vvl : ClangBuiltin<"__builtin_ve_vl_vrand_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrand_vvml : ClangBuiltin<"__builtin_ve_vl_vrand_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vror_vvl : ClangBuiltin<"__builtin_ve_vl_vror_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vror_vvml : ClangBuiltin<"__builtin_ve_vl_vror_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrxor_vvl : ClangBuiltin<"__builtin_ve_vl_vrxor_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrxor_vvml : ClangBuiltin<"__builtin_ve_vl_vrxor_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssl : ClangBuiltin<"__builtin_ve_vl_vgt_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgt_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssml : ClangBuiltin<"__builtin_ve_vl_vgt_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgt_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtnc_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtnc_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtnc_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtnc_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtu_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtu_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtu_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtu_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtunc_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtunc_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtunc_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtunc_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtlsx_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtlsx_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtlsx_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtlsx_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtlzx_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtlzx_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtlzx_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtlzx_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssl : ClangBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssvl : ClangBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssml : ClangBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssmvl : ClangBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsc_vvssl : ClangBuiltin<"__builtin_ve_vl_vsc_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsc_vvssml : ClangBuiltin<"__builtin_ve_vl_vsc_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vscnc_vvssl : ClangBuiltin<"__builtin_ve_vl_vscnc_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vscnc_vvssml : ClangBuiltin<"__builtin_ve_vl_vscnc_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vscot_vvssl : ClangBuiltin<"__builtin_ve_vl_vscot_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vscot_vvssml : ClangBuiltin<"__builtin_ve_vl_vscot_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vscncot_vvssl : ClangBuiltin<"__builtin_ve_vl_vscncot_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vscncot_vvssml : ClangBuiltin<"__builtin_ve_vl_vscncot_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vscu_vvssl : ClangBuiltin<"__builtin_ve_vl_vscu_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vscu_vvssml : ClangBuiltin<"__builtin_ve_vl_vscu_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vscunc_vvssl : ClangBuiltin<"__builtin_ve_vl_vscunc_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vscunc_vvssml : ClangBuiltin<"__builtin_ve_vl_vscunc_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vscuot_vvssl : ClangBuiltin<"__builtin_ve_vl_vscuot_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vscuot_vvssml : ClangBuiltin<"__builtin_ve_vl_vscuot_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vscuncot_vvssl : ClangBuiltin<"__builtin_ve_vl_vscuncot_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vscuncot_vvssml : ClangBuiltin<"__builtin_ve_vl_vscuncot_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vscl_vvssl : ClangBuiltin<"__builtin_ve_vl_vscl_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vscl_vvssml : ClangBuiltin<"__builtin_ve_vl_vscl_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsclnc_vvssl : ClangBuiltin<"__builtin_ve_vl_vsclnc_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsclnc_vvssml : ClangBuiltin<"__builtin_ve_vl_vsclnc_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsclot_vvssl : ClangBuiltin<"__builtin_ve_vl_vsclot_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsclot_vvssml : ClangBuiltin<"__builtin_ve_vl_vsclot_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsclncot_vvssl : ClangBuiltin<"__builtin_ve_vl_vsclncot_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vsclncot_vvssml : ClangBuiltin<"__builtin_ve_vl_vsclncot_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_andm_mmm : ClangBuiltin<"__builtin_ve_vl_andm_mmm">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256i1>, LLVMType<v256i1>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_andm_MMM : ClangBuiltin<"__builtin_ve_vl_andm_MMM">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v512i1>, LLVMType<v512i1>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_orm_mmm : ClangBuiltin<"__builtin_ve_vl_orm_mmm">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256i1>, LLVMType<v256i1>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_orm_MMM : ClangBuiltin<"__builtin_ve_vl_orm_MMM">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v512i1>, LLVMType<v512i1>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_xorm_mmm : ClangBuiltin<"__builtin_ve_vl_xorm_mmm">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256i1>, LLVMType<v256i1>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_xorm_MMM : ClangBuiltin<"__builtin_ve_vl_xorm_MMM">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v512i1>, LLVMType<v512i1>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_eqvm_mmm : ClangBuiltin<"__builtin_ve_vl_eqvm_mmm">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256i1>, LLVMType<v256i1>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_eqvm_MMM : ClangBuiltin<"__builtin_ve_vl_eqvm_MMM">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v512i1>, LLVMType<v512i1>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_nndm_mmm : ClangBuiltin<"__builtin_ve_vl_nndm_mmm">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256i1>, LLVMType<v256i1>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_nndm_MMM : ClangBuiltin<"__builtin_ve_vl_nndm_MMM">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v512i1>, LLVMType<v512i1>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_negm_mm : ClangBuiltin<"__builtin_ve_vl_negm_mm">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256i1>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_negm_MM : ClangBuiltin<"__builtin_ve_vl_negm_MM">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v512i1>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pcvm_sml : ClangBuiltin<"__builtin_ve_vl_pcvm_sml">, Intrinsic<[LLVMType<i64>], [LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_lzvm_sml : ClangBuiltin<"__builtin_ve_vl_lzvm_sml">, Intrinsic<[LLVMType<i64>], [LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_tovm_sml : ClangBuiltin<"__builtin_ve_vl_tovm_sml">, Intrinsic<[LLVMType<i64>], [LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_lcr_sss : ClangBuiltin<"__builtin_ve_vl_lcr_sss">, Intrinsic<[LLVMType<i64>], [LLVMType<i64>, LLVMType<i64>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_scr_sss : ClangBuiltin<"__builtin_ve_vl_scr_sss">, Intrinsic<[], [LLVMType<i64>, LLVMType<i64>, LLVMType<i64>], [IntrNoMem, IntrHasSideEffects]>;
+let TargetPrefix = "ve" in def int_ve_vl_tscr_ssss : ClangBuiltin<"__builtin_ve_vl_tscr_ssss">, Intrinsic<[LLVMType<i64>], [LLVMType<i64>, LLVMType<i64>, LLVMType<i64>], [IntrNoMem, IntrHasSideEffects]>;
+let TargetPrefix = "ve" in def int_ve_vl_fidcr_sss : ClangBuiltin<"__builtin_ve_vl_fidcr_sss">, Intrinsic<[LLVMType<i64>], [LLVMType<i64>, LLVMType<i32>], [IntrNoMem, IntrHasSideEffects]>;
+let TargetPrefix = "ve" in def int_ve_vl_fencei : ClangBuiltin<"__builtin_ve_vl_fencei">, Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>;
+let TargetPrefix = "ve" in def int_ve_vl_fencem_s : ClangBuiltin<"__builtin_ve_vl_fencem_s">, Intrinsic<[], [LLVMType<i32>], [IntrNoMem, IntrHasSideEffects]>;
+let TargetPrefix = "ve" in def int_ve_vl_fencec_s : ClangBuiltin<"__builtin_ve_vl_fencec_s">, Intrinsic<[], [LLVMType<i32>], [IntrNoMem, IntrHasSideEffects]>;
+let TargetPrefix = "ve" in def int_ve_vl_svob : ClangBuiltin<"__builtin_ve_vl_svob">, Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>;
diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
index aecc3d91fae7..f313be1b2235 100644
--- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
+++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -31,6 +31,10 @@ def int_wasm_memory_grow : Intrinsic<[llvm_anyint_ty],
//===----------------------------------------------------------------------===//
def int_wasm_ref_null_extern : Intrinsic<[llvm_externref_ty], [], [IntrNoMem]>;
def int_wasm_ref_null_func : Intrinsic<[llvm_funcref_ty], [], [IntrNoMem]>;
+def int_wasm_ref_is_null_extern : Intrinsic<[llvm_i32_ty], [llvm_externref_ty],
+ [IntrNoMem], "llvm.wasm.ref.is_null.extern">;
+def int_wasm_ref_is_null_func : Intrinsic<[llvm_i32_ty], [llvm_funcref_ty],
+ [IntrNoMem], "llvm.wasm.ref.is_null.func">;
//===----------------------------------------------------------------------===//
// Table intrinsics
@@ -256,16 +260,30 @@ def int_wasm_relaxed_trunc_unsigned:
[llvm_v4f32_ty],
[IntrNoMem, IntrSpeculatable]>;
-def int_wasm_relaxed_trunc_zero_signed:
+def int_wasm_relaxed_trunc_signed_zero:
Intrinsic<[llvm_v4i32_ty],
[llvm_v2f64_ty],
[IntrNoMem, IntrSpeculatable]>;
-def int_wasm_relaxed_trunc_zero_unsigned:
+def int_wasm_relaxed_trunc_unsigned_zero:
Intrinsic<[llvm_v4i32_ty],
[llvm_v2f64_ty],
[IntrNoMem, IntrSpeculatable]>;
+def int_wasm_relaxed_q15mulr_signed:
+ Intrinsic<[llvm_v8i16_ty],
+ [llvm_v8i16_ty, llvm_v8i16_ty],
+ [IntrNoMem, IntrSpeculatable]>;
+
+def int_wasm_dot_i8x16_i7x16_signed:
+ Intrinsic<[llvm_v8i16_ty],
+ [llvm_v16i8_ty, llvm_v16i8_ty],
+ [IntrNoMem, IntrSpeculatable]>;
+
+def int_wasm_dot_i8x16_i7x16_add_signed:
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v4i32_ty],
+ [IntrNoMem, IntrSpeculatable]>;
//===----------------------------------------------------------------------===//
// Thread-local storage intrinsics
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 8de737a1c7a5..0930abcc0993 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -31,20 +31,20 @@ let TargetPrefix = "x86" in {
//===----------------------------------------------------------------------===//
// FLAGS.
let TargetPrefix = "x86" in {
- def int_x86_flags_read_u32 : GCCBuiltin<"__builtin_ia32_readeflags_u32">,
+ def int_x86_flags_read_u32 : ClangBuiltin<"__builtin_ia32_readeflags_u32">,
Intrinsic<[llvm_i32_ty], [], []>;
- def int_x86_flags_read_u64 : GCCBuiltin<"__builtin_ia32_readeflags_u64">,
+ def int_x86_flags_read_u64 : ClangBuiltin<"__builtin_ia32_readeflags_u64">,
Intrinsic<[llvm_i64_ty], [], []>;
- def int_x86_flags_write_u32 : GCCBuiltin<"__builtin_ia32_writeeflags_u32">,
+ def int_x86_flags_write_u32 : ClangBuiltin<"__builtin_ia32_writeeflags_u32">,
Intrinsic<[], [llvm_i32_ty], []>;
- def int_x86_flags_write_u64 : GCCBuiltin<"__builtin_ia32_writeeflags_u64">,
+ def int_x86_flags_write_u64 : ClangBuiltin<"__builtin_ia32_writeeflags_u64">,
Intrinsic<[], [llvm_i64_ty], []>;
}
//===----------------------------------------------------------------------===//
// Read Time Stamp Counter.
let TargetPrefix = "x86" in {
- def int_x86_rdtsc : GCCBuiltin<"__builtin_ia32_rdtsc">,
+ def int_x86_rdtsc : ClangBuiltin<"__builtin_ia32_rdtsc">,
Intrinsic<[llvm_i64_ty], [], []>;
def int_x86_rdtscp :
Intrinsic<[llvm_i64_ty, llvm_i32_ty], [], []>;
@@ -52,42 +52,52 @@ let TargetPrefix = "x86" in {
// Read Performance-Monitoring Counter.
let TargetPrefix = "x86" in {
- def int_x86_rdpmc : GCCBuiltin<"__builtin_ia32_rdpmc">,
+ def int_x86_rdpmc : ClangBuiltin<"__builtin_ia32_rdpmc">,
Intrinsic<[llvm_i64_ty], [llvm_i32_ty], []>;
}
// Read processor ID.
let TargetPrefix = "x86" in {
- def int_x86_rdpid : GCCBuiltin<"__builtin_ia32_rdpid">,
+ def int_x86_rdpid : ClangBuiltin<"__builtin_ia32_rdpid">,
Intrinsic<[llvm_i32_ty], [], []>;
}
+// Lock bit test.
+let TargetPrefix = "x86" in {
+ def int_x86_atomic_bts : Intrinsic<[llvm_anyint_ty], [llvm_ptr_ty, llvm_i8_ty],
+ [ImmArg<ArgIndex<1>>]>;
+ def int_x86_atomic_btc : Intrinsic<[llvm_anyint_ty], [llvm_ptr_ty, llvm_i8_ty],
+ [ImmArg<ArgIndex<1>>]>;
+ def int_x86_atomic_btr : Intrinsic<[llvm_anyint_ty], [llvm_ptr_ty, llvm_i8_ty],
+ [ImmArg<ArgIndex<1>>]>;
+}
+
//===----------------------------------------------------------------------===//
// CET SS
let TargetPrefix = "x86" in {
- def int_x86_incsspd : GCCBuiltin<"__builtin_ia32_incsspd">,
+ def int_x86_incsspd : ClangBuiltin<"__builtin_ia32_incsspd">,
Intrinsic<[], [llvm_i32_ty], []>;
- def int_x86_incsspq : GCCBuiltin<"__builtin_ia32_incsspq">,
+ def int_x86_incsspq : ClangBuiltin<"__builtin_ia32_incsspq">,
Intrinsic<[], [llvm_i64_ty], []>;
- def int_x86_rdsspd : GCCBuiltin<"__builtin_ia32_rdsspd">,
+ def int_x86_rdsspd : ClangBuiltin<"__builtin_ia32_rdsspd">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
- def int_x86_rdsspq : GCCBuiltin<"__builtin_ia32_rdsspq">,
+ def int_x86_rdsspq : ClangBuiltin<"__builtin_ia32_rdsspq">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty], []>;
- def int_x86_saveprevssp : GCCBuiltin<"__builtin_ia32_saveprevssp">,
+ def int_x86_saveprevssp : ClangBuiltin<"__builtin_ia32_saveprevssp">,
Intrinsic<[], [], []>;
- def int_x86_rstorssp : GCCBuiltin<"__builtin_ia32_rstorssp">,
+ def int_x86_rstorssp : ClangBuiltin<"__builtin_ia32_rstorssp">,
Intrinsic<[], [llvm_ptr_ty], []>;
- def int_x86_wrssd : GCCBuiltin<"__builtin_ia32_wrssd">,
+ def int_x86_wrssd : ClangBuiltin<"__builtin_ia32_wrssd">,
Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty], []>;
- def int_x86_wrssq : GCCBuiltin<"__builtin_ia32_wrssq">,
+ def int_x86_wrssq : ClangBuiltin<"__builtin_ia32_wrssq">,
Intrinsic<[], [llvm_i64_ty, llvm_ptr_ty], []>;
- def int_x86_wrussd : GCCBuiltin<"__builtin_ia32_wrussd">,
+ def int_x86_wrussd : ClangBuiltin<"__builtin_ia32_wrussd">,
Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty], []>;
- def int_x86_wrussq : GCCBuiltin<"__builtin_ia32_wrussq">,
+ def int_x86_wrussq : ClangBuiltin<"__builtin_ia32_wrussq">,
Intrinsic<[], [llvm_i64_ty, llvm_ptr_ty], []>;
- def int_x86_setssbsy : GCCBuiltin<"__builtin_ia32_setssbsy">,
+ def int_x86_setssbsy : ClangBuiltin<"__builtin_ia32_setssbsy">,
Intrinsic<[], [], []>;
- def int_x86_clrssbsy : GCCBuiltin<"__builtin_ia32_clrssbsy">,
+ def int_x86_clrssbsy : ClangBuiltin<"__builtin_ia32_clrssbsy">,
Intrinsic<[], [llvm_ptr_ty], []>;
}
@@ -95,57 +105,57 @@ let TargetPrefix = "x86" in {
// 3DNow!
let TargetPrefix = "x86" in {
- def int_x86_3dnow_pavgusb : GCCBuiltin<"__builtin_ia32_pavgusb">,
+ def int_x86_3dnow_pavgusb : ClangBuiltin<"__builtin_ia32_pavgusb">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_3dnow_pf2id : GCCBuiltin<"__builtin_ia32_pf2id">,
+ def int_x86_3dnow_pf2id : ClangBuiltin<"__builtin_ia32_pf2id">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
- def int_x86_3dnow_pfacc : GCCBuiltin<"__builtin_ia32_pfacc">,
+ def int_x86_3dnow_pfacc : ClangBuiltin<"__builtin_ia32_pfacc">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_3dnow_pfadd : GCCBuiltin<"__builtin_ia32_pfadd">,
+ def int_x86_3dnow_pfadd : ClangBuiltin<"__builtin_ia32_pfadd">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_3dnow_pfcmpeq : GCCBuiltin<"__builtin_ia32_pfcmpeq">,
+ def int_x86_3dnow_pfcmpeq : ClangBuiltin<"__builtin_ia32_pfcmpeq">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_3dnow_pfcmpge : GCCBuiltin<"__builtin_ia32_pfcmpge">,
+ def int_x86_3dnow_pfcmpge : ClangBuiltin<"__builtin_ia32_pfcmpge">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_3dnow_pfcmpgt : GCCBuiltin<"__builtin_ia32_pfcmpgt">,
+ def int_x86_3dnow_pfcmpgt : ClangBuiltin<"__builtin_ia32_pfcmpgt">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_3dnow_pfmax : GCCBuiltin<"__builtin_ia32_pfmax">,
+ def int_x86_3dnow_pfmax : ClangBuiltin<"__builtin_ia32_pfmax">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_3dnow_pfmin : GCCBuiltin<"__builtin_ia32_pfmin">,
+ def int_x86_3dnow_pfmin : ClangBuiltin<"__builtin_ia32_pfmin">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_3dnow_pfmul : GCCBuiltin<"__builtin_ia32_pfmul">,
+ def int_x86_3dnow_pfmul : ClangBuiltin<"__builtin_ia32_pfmul">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_3dnow_pfrcp : GCCBuiltin<"__builtin_ia32_pfrcp">,
+ def int_x86_3dnow_pfrcp : ClangBuiltin<"__builtin_ia32_pfrcp">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
- def int_x86_3dnow_pfrcpit1 : GCCBuiltin<"__builtin_ia32_pfrcpit1">,
+ def int_x86_3dnow_pfrcpit1 : ClangBuiltin<"__builtin_ia32_pfrcpit1">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_3dnow_pfrcpit2 : GCCBuiltin<"__builtin_ia32_pfrcpit2">,
+ def int_x86_3dnow_pfrcpit2 : ClangBuiltin<"__builtin_ia32_pfrcpit2">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_3dnow_pfrsqrt : GCCBuiltin<"__builtin_ia32_pfrsqrt">,
+ def int_x86_3dnow_pfrsqrt : ClangBuiltin<"__builtin_ia32_pfrsqrt">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
- def int_x86_3dnow_pfrsqit1 : GCCBuiltin<"__builtin_ia32_pfrsqit1">,
+ def int_x86_3dnow_pfrsqit1 : ClangBuiltin<"__builtin_ia32_pfrsqit1">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_3dnow_pfsub : GCCBuiltin<"__builtin_ia32_pfsub">,
+ def int_x86_3dnow_pfsub : ClangBuiltin<"__builtin_ia32_pfsub">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_3dnow_pfsubr : GCCBuiltin<"__builtin_ia32_pfsubr">,
+ def int_x86_3dnow_pfsubr : ClangBuiltin<"__builtin_ia32_pfsubr">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_3dnow_pi2fd : GCCBuiltin<"__builtin_ia32_pi2fd">,
+ def int_x86_3dnow_pi2fd : ClangBuiltin<"__builtin_ia32_pi2fd">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
- def int_x86_3dnow_pmulhrw : GCCBuiltin<"__builtin_ia32_pmulhrw">,
+ def int_x86_3dnow_pmulhrw : ClangBuiltin<"__builtin_ia32_pmulhrw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
}
@@ -154,15 +164,15 @@ let TargetPrefix = "x86" in {
// 3DNow! extensions
let TargetPrefix = "x86" in {
- def int_x86_3dnowa_pf2iw : GCCBuiltin<"__builtin_ia32_pf2iw">,
+ def int_x86_3dnowa_pf2iw : ClangBuiltin<"__builtin_ia32_pf2iw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
- def int_x86_3dnowa_pfnacc : GCCBuiltin<"__builtin_ia32_pfnacc">,
+ def int_x86_3dnowa_pfnacc : ClangBuiltin<"__builtin_ia32_pfnacc">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_3dnowa_pfpnacc : GCCBuiltin<"__builtin_ia32_pfpnacc">,
+ def int_x86_3dnowa_pfpnacc : ClangBuiltin<"__builtin_ia32_pfpnacc">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_3dnowa_pi2fw : GCCBuiltin<"__builtin_ia32_pi2fw">,
+ def int_x86_3dnowa_pi2fw : ClangBuiltin<"__builtin_ia32_pi2fw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_3dnowa_pswapd :
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
@@ -173,35 +183,35 @@ let TargetPrefix = "x86" in {
// Arithmetic ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_sse_rcp_ss : GCCBuiltin<"__builtin_ia32_rcpss">,
+ def int_x86_sse_rcp_ss : ClangBuiltin<"__builtin_ia32_rcpss">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
[IntrNoMem]>;
- def int_x86_sse_rcp_ps : GCCBuiltin<"__builtin_ia32_rcpps">,
+ def int_x86_sse_rcp_ps : ClangBuiltin<"__builtin_ia32_rcpps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
[IntrNoMem]>;
- def int_x86_sse_rsqrt_ss : GCCBuiltin<"__builtin_ia32_rsqrtss">,
+ def int_x86_sse_rsqrt_ss : ClangBuiltin<"__builtin_ia32_rsqrtss">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
[IntrNoMem]>;
- def int_x86_sse_rsqrt_ps : GCCBuiltin<"__builtin_ia32_rsqrtps">,
+ def int_x86_sse_rsqrt_ps : ClangBuiltin<"__builtin_ia32_rsqrtps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
[IntrNoMem]>;
- def int_x86_sse_min_ss : GCCBuiltin<"__builtin_ia32_minss">,
+ def int_x86_sse_min_ss : ClangBuiltin<"__builtin_ia32_minss">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
- def int_x86_sse_min_ps : GCCBuiltin<"__builtin_ia32_minps">,
+ def int_x86_sse_min_ps : ClangBuiltin<"__builtin_ia32_minps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
- def int_x86_sse_max_ss : GCCBuiltin<"__builtin_ia32_maxss">,
+ def int_x86_sse_max_ss : ClangBuiltin<"__builtin_ia32_maxss">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
- def int_x86_sse_max_ps : GCCBuiltin<"__builtin_ia32_maxps">,
+ def int_x86_sse_max_ps : ClangBuiltin<"__builtin_ia32_maxps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
}
// Comparison ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_sse_cmp_ss : GCCBuiltin<"__builtin_ia32_cmpss">,
+ def int_x86_sse_cmp_ss : ClangBuiltin<"__builtin_ia32_cmpss">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>;
// NOTE: This comparison intrinsic is not used by clang as long as the
@@ -209,40 +219,40 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse_cmp_ps :
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>;
- def int_x86_sse_comieq_ss : GCCBuiltin<"__builtin_ia32_comieq">,
+ def int_x86_sse_comieq_ss : ClangBuiltin<"__builtin_ia32_comieq">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
- def int_x86_sse_comilt_ss : GCCBuiltin<"__builtin_ia32_comilt">,
+ def int_x86_sse_comilt_ss : ClangBuiltin<"__builtin_ia32_comilt">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
- def int_x86_sse_comile_ss : GCCBuiltin<"__builtin_ia32_comile">,
+ def int_x86_sse_comile_ss : ClangBuiltin<"__builtin_ia32_comile">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
- def int_x86_sse_comigt_ss : GCCBuiltin<"__builtin_ia32_comigt">,
+ def int_x86_sse_comigt_ss : ClangBuiltin<"__builtin_ia32_comigt">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
- def int_x86_sse_comige_ss : GCCBuiltin<"__builtin_ia32_comige">,
+ def int_x86_sse_comige_ss : ClangBuiltin<"__builtin_ia32_comige">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
- def int_x86_sse_comineq_ss : GCCBuiltin<"__builtin_ia32_comineq">,
+ def int_x86_sse_comineq_ss : ClangBuiltin<"__builtin_ia32_comineq">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
- def int_x86_sse_ucomieq_ss : GCCBuiltin<"__builtin_ia32_ucomieq">,
+ def int_x86_sse_ucomieq_ss : ClangBuiltin<"__builtin_ia32_ucomieq">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
- def int_x86_sse_ucomilt_ss : GCCBuiltin<"__builtin_ia32_ucomilt">,
+ def int_x86_sse_ucomilt_ss : ClangBuiltin<"__builtin_ia32_ucomilt">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
- def int_x86_sse_ucomile_ss : GCCBuiltin<"__builtin_ia32_ucomile">,
+ def int_x86_sse_ucomile_ss : ClangBuiltin<"__builtin_ia32_ucomile">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
- def int_x86_sse_ucomigt_ss : GCCBuiltin<"__builtin_ia32_ucomigt">,
+ def int_x86_sse_ucomigt_ss : ClangBuiltin<"__builtin_ia32_ucomigt">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
- def int_x86_sse_ucomige_ss : GCCBuiltin<"__builtin_ia32_ucomige">,
+ def int_x86_sse_ucomige_ss : ClangBuiltin<"__builtin_ia32_ucomige">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
- def int_x86_sse_ucomineq_ss : GCCBuiltin<"__builtin_ia32_ucomineq">,
+ def int_x86_sse_ucomineq_ss : ClangBuiltin<"__builtin_ia32_ucomineq">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
}
@@ -250,27 +260,27 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Conversion ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_sse_cvtss2si : GCCBuiltin<"__builtin_ia32_cvtss2si">,
+ def int_x86_sse_cvtss2si : ClangBuiltin<"__builtin_ia32_cvtss2si">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
- def int_x86_sse_cvtss2si64 : GCCBuiltin<"__builtin_ia32_cvtss2si64">,
+ def int_x86_sse_cvtss2si64 : ClangBuiltin<"__builtin_ia32_cvtss2si64">,
Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
- def int_x86_sse_cvttss2si : GCCBuiltin<"__builtin_ia32_cvttss2si">,
+ def int_x86_sse_cvttss2si : ClangBuiltin<"__builtin_ia32_cvttss2si">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
- def int_x86_sse_cvttss2si64 : GCCBuiltin<"__builtin_ia32_cvttss2si64">,
+ def int_x86_sse_cvttss2si64 : ClangBuiltin<"__builtin_ia32_cvttss2si64">,
Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
- def int_x86_sse_cvtps2pi : GCCBuiltin<"__builtin_ia32_cvtps2pi">,
+ def int_x86_sse_cvtps2pi : ClangBuiltin<"__builtin_ia32_cvtps2pi">,
Intrinsic<[llvm_x86mmx_ty], [llvm_v4f32_ty], [IntrNoMem]>;
- def int_x86_sse_cvttps2pi: GCCBuiltin<"__builtin_ia32_cvttps2pi">,
+ def int_x86_sse_cvttps2pi: ClangBuiltin<"__builtin_ia32_cvttps2pi">,
Intrinsic<[llvm_x86mmx_ty], [llvm_v4f32_ty], [IntrNoMem]>;
- def int_x86_sse_cvtpi2ps : GCCBuiltin<"__builtin_ia32_cvtpi2ps">,
+ def int_x86_sse_cvtpi2ps : ClangBuiltin<"__builtin_ia32_cvtpi2ps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
}
// Cacheability support ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_sse_sfence : GCCBuiltin<"__builtin_ia32_sfence">,
+ def int_x86_sse_sfence : ClangBuiltin<"__builtin_ia32_sfence">,
Intrinsic<[], [], []>;
}
@@ -291,7 +301,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Misc.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_sse_movmsk_ps : GCCBuiltin<"__builtin_ia32_movmskps">,
+ def int_x86_sse_movmsk_ps : ClangBuiltin<"__builtin_ia32_movmskps">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
}
@@ -300,23 +310,23 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// FP arithmetic ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_sse2_min_sd : GCCBuiltin<"__builtin_ia32_minsd">,
+ def int_x86_sse2_min_sd : ClangBuiltin<"__builtin_ia32_minsd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
- def int_x86_sse2_min_pd : GCCBuiltin<"__builtin_ia32_minpd">,
+ def int_x86_sse2_min_pd : ClangBuiltin<"__builtin_ia32_minpd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
- def int_x86_sse2_max_sd : GCCBuiltin<"__builtin_ia32_maxsd">,
+ def int_x86_sse2_max_sd : ClangBuiltin<"__builtin_ia32_maxsd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
- def int_x86_sse2_max_pd : GCCBuiltin<"__builtin_ia32_maxpd">,
+ def int_x86_sse2_max_pd : ClangBuiltin<"__builtin_ia32_maxpd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
}
// FP comparison ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_sse2_cmp_sd : GCCBuiltin<"__builtin_ia32_cmpsd">,
+ def int_x86_sse2_cmp_sd : ClangBuiltin<"__builtin_ia32_cmpsd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>;
// NOTE: This comparison intrinsic is not used by clang as long as the
@@ -324,176 +334,176 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse2_cmp_pd :
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>;
- def int_x86_sse2_comieq_sd : GCCBuiltin<"__builtin_ia32_comisdeq">,
+ def int_x86_sse2_comieq_sd : ClangBuiltin<"__builtin_ia32_comisdeq">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
- def int_x86_sse2_comilt_sd : GCCBuiltin<"__builtin_ia32_comisdlt">,
+ def int_x86_sse2_comilt_sd : ClangBuiltin<"__builtin_ia32_comisdlt">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
- def int_x86_sse2_comile_sd : GCCBuiltin<"__builtin_ia32_comisdle">,
+ def int_x86_sse2_comile_sd : ClangBuiltin<"__builtin_ia32_comisdle">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
- def int_x86_sse2_comigt_sd : GCCBuiltin<"__builtin_ia32_comisdgt">,
+ def int_x86_sse2_comigt_sd : ClangBuiltin<"__builtin_ia32_comisdgt">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
- def int_x86_sse2_comige_sd : GCCBuiltin<"__builtin_ia32_comisdge">,
+ def int_x86_sse2_comige_sd : ClangBuiltin<"__builtin_ia32_comisdge">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
- def int_x86_sse2_comineq_sd : GCCBuiltin<"__builtin_ia32_comisdneq">,
+ def int_x86_sse2_comineq_sd : ClangBuiltin<"__builtin_ia32_comisdneq">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
- def int_x86_sse2_ucomieq_sd : GCCBuiltin<"__builtin_ia32_ucomisdeq">,
+ def int_x86_sse2_ucomieq_sd : ClangBuiltin<"__builtin_ia32_ucomisdeq">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
- def int_x86_sse2_ucomilt_sd : GCCBuiltin<"__builtin_ia32_ucomisdlt">,
+ def int_x86_sse2_ucomilt_sd : ClangBuiltin<"__builtin_ia32_ucomisdlt">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
- def int_x86_sse2_ucomile_sd : GCCBuiltin<"__builtin_ia32_ucomisdle">,
+ def int_x86_sse2_ucomile_sd : ClangBuiltin<"__builtin_ia32_ucomisdle">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
- def int_x86_sse2_ucomigt_sd : GCCBuiltin<"__builtin_ia32_ucomisdgt">,
+ def int_x86_sse2_ucomigt_sd : ClangBuiltin<"__builtin_ia32_ucomisdgt">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
- def int_x86_sse2_ucomige_sd : GCCBuiltin<"__builtin_ia32_ucomisdge">,
+ def int_x86_sse2_ucomige_sd : ClangBuiltin<"__builtin_ia32_ucomisdge">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
- def int_x86_sse2_ucomineq_sd : GCCBuiltin<"__builtin_ia32_ucomisdneq">,
+ def int_x86_sse2_ucomineq_sd : ClangBuiltin<"__builtin_ia32_ucomisdneq">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
}
// Integer arithmetic ops.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_sse2_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw128">,
+ def int_x86_sse2_pmulhu_w : ClangBuiltin<"__builtin_ia32_pmulhuw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem, Commutative]>;
- def int_x86_sse2_pmulh_w : GCCBuiltin<"__builtin_ia32_pmulhw128">,
+ def int_x86_sse2_pmulh_w : ClangBuiltin<"__builtin_ia32_pmulhw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem, Commutative]>;
- def int_x86_sse2_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd128">,
+ def int_x86_sse2_pmadd_wd : ClangBuiltin<"__builtin_ia32_pmaddwd128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem, Commutative]>;
- def int_x86_sse2_pavg_b : GCCBuiltin<"__builtin_ia32_pavgb128">,
+ def int_x86_sse2_pavg_b : ClangBuiltin<"__builtin_ia32_pavgb128">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
llvm_v16i8_ty], [IntrNoMem, Commutative]>;
- def int_x86_sse2_pavg_w : GCCBuiltin<"__builtin_ia32_pavgw128">,
+ def int_x86_sse2_pavg_w : ClangBuiltin<"__builtin_ia32_pavgw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem, Commutative]>;
- def int_x86_sse2_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw128">,
+ def int_x86_sse2_psad_bw : ClangBuiltin<"__builtin_ia32_psadbw128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty,
llvm_v16i8_ty], [IntrNoMem, Commutative]>;
}
// Integer shift ops.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_sse2_psll_w : GCCBuiltin<"__builtin_ia32_psllw128">,
+ def int_x86_sse2_psll_w : ClangBuiltin<"__builtin_ia32_psllw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
- def int_x86_sse2_psll_d : GCCBuiltin<"__builtin_ia32_pslld128">,
+ def int_x86_sse2_psll_d : ClangBuiltin<"__builtin_ia32_pslld128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
llvm_v4i32_ty], [IntrNoMem]>;
- def int_x86_sse2_psll_q : GCCBuiltin<"__builtin_ia32_psllq128">,
+ def int_x86_sse2_psll_q : ClangBuiltin<"__builtin_ia32_psllq128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
llvm_v2i64_ty], [IntrNoMem]>;
- def int_x86_sse2_psrl_w : GCCBuiltin<"__builtin_ia32_psrlw128">,
+ def int_x86_sse2_psrl_w : ClangBuiltin<"__builtin_ia32_psrlw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
- def int_x86_sse2_psrl_d : GCCBuiltin<"__builtin_ia32_psrld128">,
+ def int_x86_sse2_psrl_d : ClangBuiltin<"__builtin_ia32_psrld128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
llvm_v4i32_ty], [IntrNoMem]>;
- def int_x86_sse2_psrl_q : GCCBuiltin<"__builtin_ia32_psrlq128">,
+ def int_x86_sse2_psrl_q : ClangBuiltin<"__builtin_ia32_psrlq128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
llvm_v2i64_ty], [IntrNoMem]>;
- def int_x86_sse2_psra_w : GCCBuiltin<"__builtin_ia32_psraw128">,
+ def int_x86_sse2_psra_w : ClangBuiltin<"__builtin_ia32_psraw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
- def int_x86_sse2_psra_d : GCCBuiltin<"__builtin_ia32_psrad128">,
+ def int_x86_sse2_psra_d : ClangBuiltin<"__builtin_ia32_psrad128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
llvm_v4i32_ty], [IntrNoMem]>;
// Oddly these don't require an immediate due to a gcc compatibility issue.
- def int_x86_sse2_pslli_w : GCCBuiltin<"__builtin_ia32_psllwi128">,
+ def int_x86_sse2_pslli_w : ClangBuiltin<"__builtin_ia32_psllwi128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_i32_ty], [IntrNoMem]>;
- def int_x86_sse2_pslli_d : GCCBuiltin<"__builtin_ia32_pslldi128">,
+ def int_x86_sse2_pslli_d : ClangBuiltin<"__builtin_ia32_pslldi128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
llvm_i32_ty], [IntrNoMem]>;
- def int_x86_sse2_pslli_q : GCCBuiltin<"__builtin_ia32_psllqi128">,
+ def int_x86_sse2_pslli_q : ClangBuiltin<"__builtin_ia32_psllqi128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
llvm_i32_ty], [IntrNoMem]>;
- def int_x86_sse2_psrli_w : GCCBuiltin<"__builtin_ia32_psrlwi128">,
+ def int_x86_sse2_psrli_w : ClangBuiltin<"__builtin_ia32_psrlwi128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_i32_ty], [IntrNoMem]>;
- def int_x86_sse2_psrli_d : GCCBuiltin<"__builtin_ia32_psrldi128">,
+ def int_x86_sse2_psrli_d : ClangBuiltin<"__builtin_ia32_psrldi128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
llvm_i32_ty], [IntrNoMem]>;
- def int_x86_sse2_psrli_q : GCCBuiltin<"__builtin_ia32_psrlqi128">,
+ def int_x86_sse2_psrli_q : ClangBuiltin<"__builtin_ia32_psrlqi128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
llvm_i32_ty], [IntrNoMem]>;
- def int_x86_sse2_psrai_w : GCCBuiltin<"__builtin_ia32_psrawi128">,
+ def int_x86_sse2_psrai_w : ClangBuiltin<"__builtin_ia32_psrawi128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_i32_ty], [IntrNoMem]>;
- def int_x86_sse2_psrai_d : GCCBuiltin<"__builtin_ia32_psradi128">,
+ def int_x86_sse2_psrai_d : ClangBuiltin<"__builtin_ia32_psradi128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
llvm_i32_ty], [IntrNoMem]>;
}
// Conversion ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_sse2_cvtpd2dq : GCCBuiltin<"__builtin_ia32_cvtpd2dq">,
+ def int_x86_sse2_cvtpd2dq : ClangBuiltin<"__builtin_ia32_cvtpd2dq">,
Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
- def int_x86_sse2_cvttpd2dq : GCCBuiltin<"__builtin_ia32_cvttpd2dq">,
+ def int_x86_sse2_cvttpd2dq : ClangBuiltin<"__builtin_ia32_cvttpd2dq">,
Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
- def int_x86_sse2_cvtpd2ps : GCCBuiltin<"__builtin_ia32_cvtpd2ps">,
+ def int_x86_sse2_cvtpd2ps : ClangBuiltin<"__builtin_ia32_cvtpd2ps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
- def int_x86_sse2_cvtps2dq : GCCBuiltin<"__builtin_ia32_cvtps2dq">,
+ def int_x86_sse2_cvtps2dq : ClangBuiltin<"__builtin_ia32_cvtps2dq">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
- def int_x86_sse2_cvttps2dq : GCCBuiltin<"__builtin_ia32_cvttps2dq">,
+ def int_x86_sse2_cvttps2dq : ClangBuiltin<"__builtin_ia32_cvttps2dq">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
- def int_x86_sse2_cvtsd2si : GCCBuiltin<"__builtin_ia32_cvtsd2si">,
+ def int_x86_sse2_cvtsd2si : ClangBuiltin<"__builtin_ia32_cvtsd2si">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
- def int_x86_sse2_cvtsd2si64 : GCCBuiltin<"__builtin_ia32_cvtsd2si64">,
+ def int_x86_sse2_cvtsd2si64 : ClangBuiltin<"__builtin_ia32_cvtsd2si64">,
Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
- def int_x86_sse2_cvttsd2si : GCCBuiltin<"__builtin_ia32_cvttsd2si">,
+ def int_x86_sse2_cvttsd2si : ClangBuiltin<"__builtin_ia32_cvttsd2si">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
- def int_x86_sse2_cvttsd2si64 : GCCBuiltin<"__builtin_ia32_cvttsd2si64">,
+ def int_x86_sse2_cvttsd2si64 : ClangBuiltin<"__builtin_ia32_cvttsd2si64">,
Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
- def int_x86_sse2_cvtsd2ss : GCCBuiltin<"__builtin_ia32_cvtsd2ss">,
+ def int_x86_sse2_cvtsd2ss : ClangBuiltin<"__builtin_ia32_cvtsd2ss">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_v2f64_ty], [IntrNoMem]>;
- def int_x86_sse_cvtpd2pi : GCCBuiltin<"__builtin_ia32_cvtpd2pi">,
+ def int_x86_sse_cvtpd2pi : ClangBuiltin<"__builtin_ia32_cvtpd2pi">,
Intrinsic<[llvm_x86mmx_ty], [llvm_v2f64_ty], [IntrNoMem]>;
- def int_x86_sse_cvttpd2pi: GCCBuiltin<"__builtin_ia32_cvttpd2pi">,
+ def int_x86_sse_cvttpd2pi: ClangBuiltin<"__builtin_ia32_cvttpd2pi">,
Intrinsic<[llvm_x86mmx_ty], [llvm_v2f64_ty], [IntrNoMem]>;
- def int_x86_sse_cvtpi2pd : GCCBuiltin<"__builtin_ia32_cvtpi2pd">,
+ def int_x86_sse_cvtpi2pd : ClangBuiltin<"__builtin_ia32_cvtpi2pd">,
Intrinsic<[llvm_v2f64_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
}
// Misc.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_sse2_packsswb_128 : GCCBuiltin<"__builtin_ia32_packsswb128">,
+ def int_x86_sse2_packsswb_128 : ClangBuiltin<"__builtin_ia32_packsswb128">,
Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
- def int_x86_sse2_packssdw_128 : GCCBuiltin<"__builtin_ia32_packssdw128">,
+ def int_x86_sse2_packssdw_128 : ClangBuiltin<"__builtin_ia32_packssdw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty,
llvm_v4i32_ty], [IntrNoMem]>;
- def int_x86_sse2_packuswb_128 : GCCBuiltin<"__builtin_ia32_packuswb128">,
+ def int_x86_sse2_packuswb_128 : ClangBuiltin<"__builtin_ia32_packuswb128">,
Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
- def int_x86_sse2_movmsk_pd : GCCBuiltin<"__builtin_ia32_movmskpd">,
+ def int_x86_sse2_movmsk_pd : ClangBuiltin<"__builtin_ia32_movmskpd">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
- def int_x86_sse2_pmovmskb_128 : GCCBuiltin<"__builtin_ia32_pmovmskb128">,
+ def int_x86_sse2_pmovmskb_128 : ClangBuiltin<"__builtin_ia32_pmovmskb128">,
Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;
- def int_x86_sse2_maskmov_dqu : GCCBuiltin<"__builtin_ia32_maskmovdqu">,
+ def int_x86_sse2_maskmov_dqu : ClangBuiltin<"__builtin_ia32_maskmovdqu">,
Intrinsic<[], [llvm_v16i8_ty,
llvm_v16i8_ty, llvm_ptr_ty], []>;
- def int_x86_sse2_clflush : GCCBuiltin<"__builtin_ia32_clflush">,
+ def int_x86_sse2_clflush : ClangBuiltin<"__builtin_ia32_clflush">,
Intrinsic<[], [llvm_ptr_ty], []>;
- def int_x86_sse2_lfence : GCCBuiltin<"__builtin_ia32_lfence">,
+ def int_x86_sse2_lfence : ClangBuiltin<"__builtin_ia32_lfence">,
Intrinsic<[], [], []>;
- def int_x86_sse2_mfence : GCCBuiltin<"__builtin_ia32_mfence">,
+ def int_x86_sse2_mfence : ClangBuiltin<"__builtin_ia32_mfence">,
Intrinsic<[], [], []>;
- def int_x86_sse2_pause : GCCBuiltin<"__builtin_ia32_pause">,
+ def int_x86_sse2_pause : ClangBuiltin<"__builtin_ia32_pause">,
Intrinsic<[], [], []>;
}
@@ -502,42 +512,42 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Addition / subtraction ops.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_sse3_addsub_ps : GCCBuiltin<"__builtin_ia32_addsubps">,
+ def int_x86_sse3_addsub_ps : ClangBuiltin<"__builtin_ia32_addsubps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
- def int_x86_sse3_addsub_pd : GCCBuiltin<"__builtin_ia32_addsubpd">,
+ def int_x86_sse3_addsub_pd : ClangBuiltin<"__builtin_ia32_addsubpd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
}
// Horizontal ops.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_sse3_hadd_ps : GCCBuiltin<"__builtin_ia32_haddps">,
+ def int_x86_sse3_hadd_ps : ClangBuiltin<"__builtin_ia32_haddps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
- def int_x86_sse3_hadd_pd : GCCBuiltin<"__builtin_ia32_haddpd">,
+ def int_x86_sse3_hadd_pd : ClangBuiltin<"__builtin_ia32_haddpd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
- def int_x86_sse3_hsub_ps : GCCBuiltin<"__builtin_ia32_hsubps">,
+ def int_x86_sse3_hsub_ps : ClangBuiltin<"__builtin_ia32_hsubps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
- def int_x86_sse3_hsub_pd : GCCBuiltin<"__builtin_ia32_hsubpd">,
+ def int_x86_sse3_hsub_pd : ClangBuiltin<"__builtin_ia32_hsubpd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
}
// Specialized unaligned load.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_sse3_ldu_dq : GCCBuiltin<"__builtin_ia32_lddqu">,
+ def int_x86_sse3_ldu_dq : ClangBuiltin<"__builtin_ia32_lddqu">,
Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty], [IntrReadMem]>;
}
// Thread synchronization ops.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_sse3_monitor : GCCBuiltin<"__builtin_ia32_monitor">,
+ def int_x86_sse3_monitor : ClangBuiltin<"__builtin_ia32_monitor">,
Intrinsic<[], [llvm_ptr_ty,
llvm_i32_ty, llvm_i32_ty], []>;
- def int_x86_sse3_mwait : GCCBuiltin<"__builtin_ia32_mwait">,
+ def int_x86_sse3_mwait : ClangBuiltin<"__builtin_ia32_mwait">,
Intrinsic<[], [llvm_i32_ty,
llvm_i32_ty], []>;
}
@@ -547,112 +557,112 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Horizontal arithmetic ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_ssse3_phadd_w : GCCBuiltin<"__builtin_ia32_phaddw">,
+ def int_x86_ssse3_phadd_w : ClangBuiltin<"__builtin_ia32_phaddw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
- def int_x86_ssse3_phadd_w_128 : GCCBuiltin<"__builtin_ia32_phaddw128">,
+ def int_x86_ssse3_phadd_w_128 : ClangBuiltin<"__builtin_ia32_phaddw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
- def int_x86_ssse3_phadd_d : GCCBuiltin<"__builtin_ia32_phaddd">,
+ def int_x86_ssse3_phadd_d : ClangBuiltin<"__builtin_ia32_phaddd">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
- def int_x86_ssse3_phadd_d_128 : GCCBuiltin<"__builtin_ia32_phaddd128">,
+ def int_x86_ssse3_phadd_d_128 : ClangBuiltin<"__builtin_ia32_phaddd128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
llvm_v4i32_ty], [IntrNoMem]>;
- def int_x86_ssse3_phadd_sw : GCCBuiltin<"__builtin_ia32_phaddsw">,
+ def int_x86_ssse3_phadd_sw : ClangBuiltin<"__builtin_ia32_phaddsw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
- def int_x86_ssse3_phadd_sw_128 : GCCBuiltin<"__builtin_ia32_phaddsw128">,
+ def int_x86_ssse3_phadd_sw_128 : ClangBuiltin<"__builtin_ia32_phaddsw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
- def int_x86_ssse3_phsub_w : GCCBuiltin<"__builtin_ia32_phsubw">,
+ def int_x86_ssse3_phsub_w : ClangBuiltin<"__builtin_ia32_phsubw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
- def int_x86_ssse3_phsub_w_128 : GCCBuiltin<"__builtin_ia32_phsubw128">,
+ def int_x86_ssse3_phsub_w_128 : ClangBuiltin<"__builtin_ia32_phsubw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
- def int_x86_ssse3_phsub_d : GCCBuiltin<"__builtin_ia32_phsubd">,
+ def int_x86_ssse3_phsub_d : ClangBuiltin<"__builtin_ia32_phsubd">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
- def int_x86_ssse3_phsub_d_128 : GCCBuiltin<"__builtin_ia32_phsubd128">,
+ def int_x86_ssse3_phsub_d_128 : ClangBuiltin<"__builtin_ia32_phsubd128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
llvm_v4i32_ty], [IntrNoMem]>;
- def int_x86_ssse3_phsub_sw : GCCBuiltin<"__builtin_ia32_phsubsw">,
+ def int_x86_ssse3_phsub_sw : ClangBuiltin<"__builtin_ia32_phsubsw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
- def int_x86_ssse3_phsub_sw_128 : GCCBuiltin<"__builtin_ia32_phsubsw128">,
+ def int_x86_ssse3_phsub_sw_128 : ClangBuiltin<"__builtin_ia32_phsubsw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
- def int_x86_ssse3_pmadd_ub_sw : GCCBuiltin<"__builtin_ia32_pmaddubsw">,
+ def int_x86_ssse3_pmadd_ub_sw : ClangBuiltin<"__builtin_ia32_pmaddubsw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
- def int_x86_ssse3_pmadd_ub_sw_128 : GCCBuiltin<"__builtin_ia32_pmaddubsw128">,
+ def int_x86_ssse3_pmadd_ub_sw_128 : ClangBuiltin<"__builtin_ia32_pmaddubsw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty,
llvm_v16i8_ty], [IntrNoMem]>;
}
// Packed multiply high with round and scale
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_ssse3_pmul_hr_sw : GCCBuiltin<"__builtin_ia32_pmulhrsw">,
+ def int_x86_ssse3_pmul_hr_sw : ClangBuiltin<"__builtin_ia32_pmulhrsw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
- def int_x86_ssse3_pmul_hr_sw_128 : GCCBuiltin<"__builtin_ia32_pmulhrsw128">,
+ def int_x86_ssse3_pmul_hr_sw_128 : ClangBuiltin<"__builtin_ia32_pmulhrsw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem, Commutative]>;
}
// Shuffle ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_ssse3_pshuf_b : GCCBuiltin<"__builtin_ia32_pshufb">,
+ def int_x86_ssse3_pshuf_b : ClangBuiltin<"__builtin_ia32_pshufb">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
- def int_x86_ssse3_pshuf_b_128 : GCCBuiltin<"__builtin_ia32_pshufb128">,
+ def int_x86_ssse3_pshuf_b_128 : ClangBuiltin<"__builtin_ia32_pshufb128">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
llvm_v16i8_ty], [IntrNoMem]>;
- def int_x86_sse_pshuf_w : GCCBuiltin<"__builtin_ia32_pshufw">,
+ def int_x86_sse_pshuf_w : ClangBuiltin<"__builtin_ia32_pshufw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
}
// Sign ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_ssse3_psign_b : GCCBuiltin<"__builtin_ia32_psignb">,
+ def int_x86_ssse3_psign_b : ClangBuiltin<"__builtin_ia32_psignb">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
- def int_x86_ssse3_psign_b_128 : GCCBuiltin<"__builtin_ia32_psignb128">,
+ def int_x86_ssse3_psign_b_128 : ClangBuiltin<"__builtin_ia32_psignb128">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
llvm_v16i8_ty], [IntrNoMem]>;
- def int_x86_ssse3_psign_w : GCCBuiltin<"__builtin_ia32_psignw">,
+ def int_x86_ssse3_psign_w : ClangBuiltin<"__builtin_ia32_psignw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
- def int_x86_ssse3_psign_w_128 : GCCBuiltin<"__builtin_ia32_psignw128">,
+ def int_x86_ssse3_psign_w_128 : ClangBuiltin<"__builtin_ia32_psignw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
- def int_x86_ssse3_psign_d : GCCBuiltin<"__builtin_ia32_psignd">,
+ def int_x86_ssse3_psign_d : ClangBuiltin<"__builtin_ia32_psignd">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
- def int_x86_ssse3_psign_d_128 : GCCBuiltin<"__builtin_ia32_psignd128">,
+ def int_x86_ssse3_psign_d_128 : ClangBuiltin<"__builtin_ia32_psignd128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
llvm_v4i32_ty], [IntrNoMem]>;
}
// Absolute value ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_ssse3_pabs_b : GCCBuiltin<"__builtin_ia32_pabsb">,
+ def int_x86_ssse3_pabs_b : ClangBuiltin<"__builtin_ia32_pabsb">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
- def int_x86_ssse3_pabs_w : GCCBuiltin<"__builtin_ia32_pabsw">,
+ def int_x86_ssse3_pabs_w : ClangBuiltin<"__builtin_ia32_pabsw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
- def int_x86_ssse3_pabs_d : GCCBuiltin<"__builtin_ia32_pabsd">,
+ def int_x86_ssse3_pabs_d : ClangBuiltin<"__builtin_ia32_pabsd">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
}
@@ -661,149 +671,149 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// FP rounding ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_sse41_round_ss : GCCBuiltin<"__builtin_ia32_roundss">,
+ def int_x86_sse41_round_ss : ClangBuiltin<"__builtin_ia32_roundss">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>;
- def int_x86_sse41_round_ps : GCCBuiltin<"__builtin_ia32_roundps">,
+ def int_x86_sse41_round_ps : ClangBuiltin<"__builtin_ia32_roundps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_sse41_round_sd : GCCBuiltin<"__builtin_ia32_roundsd">,
+ def int_x86_sse41_round_sd : ClangBuiltin<"__builtin_ia32_roundsd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>;
- def int_x86_sse41_round_pd : GCCBuiltin<"__builtin_ia32_roundpd">,
+ def int_x86_sse41_round_pd : ClangBuiltin<"__builtin_ia32_roundpd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
}
// Vector min element
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_sse41_phminposuw : GCCBuiltin<"__builtin_ia32_phminposuw128">,
+ def int_x86_sse41_phminposuw : ClangBuiltin<"__builtin_ia32_phminposuw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty],
[IntrNoMem]>;
}
// Advanced Encryption Standard (AES) Instructions
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_aesni_aesimc : GCCBuiltin<"__builtin_ia32_aesimc128">,
+ def int_x86_aesni_aesimc : ClangBuiltin<"__builtin_ia32_aesimc128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty],
[IntrNoMem]>;
- def int_x86_aesni_aesenc : GCCBuiltin<"__builtin_ia32_aesenc128">,
+ def int_x86_aesni_aesenc : ClangBuiltin<"__builtin_ia32_aesenc128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
- def int_x86_aesni_aesenc_256 : GCCBuiltin<"__builtin_ia32_aesenc256">,
+ def int_x86_aesni_aesenc_256 : ClangBuiltin<"__builtin_ia32_aesenc256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
[IntrNoMem]>;
- def int_x86_aesni_aesenc_512 : GCCBuiltin<"__builtin_ia32_aesenc512">,
+ def int_x86_aesni_aesenc_512 : ClangBuiltin<"__builtin_ia32_aesenc512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty],
[IntrNoMem]>;
- def int_x86_aesni_aesenclast : GCCBuiltin<"__builtin_ia32_aesenclast128">,
+ def int_x86_aesni_aesenclast : ClangBuiltin<"__builtin_ia32_aesenclast128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
def int_x86_aesni_aesenclast_256 :
- GCCBuiltin<"__builtin_ia32_aesenclast256">,
+ ClangBuiltin<"__builtin_ia32_aesenclast256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
[IntrNoMem]>;
def int_x86_aesni_aesenclast_512 :
- GCCBuiltin<"__builtin_ia32_aesenclast512">,
+ ClangBuiltin<"__builtin_ia32_aesenclast512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty],
[IntrNoMem]>;
- def int_x86_aesni_aesdec : GCCBuiltin<"__builtin_ia32_aesdec128">,
+ def int_x86_aesni_aesdec : ClangBuiltin<"__builtin_ia32_aesdec128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
- def int_x86_aesni_aesdec_256 : GCCBuiltin<"__builtin_ia32_aesdec256">,
+ def int_x86_aesni_aesdec_256 : ClangBuiltin<"__builtin_ia32_aesdec256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
[IntrNoMem]>;
- def int_x86_aesni_aesdec_512 : GCCBuiltin<"__builtin_ia32_aesdec512">,
+ def int_x86_aesni_aesdec_512 : ClangBuiltin<"__builtin_ia32_aesdec512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty],
[IntrNoMem]>;
- def int_x86_aesni_aesdeclast : GCCBuiltin<"__builtin_ia32_aesdeclast128">,
+ def int_x86_aesni_aesdeclast : ClangBuiltin<"__builtin_ia32_aesdeclast128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
def int_x86_aesni_aesdeclast_256 :
- GCCBuiltin<"__builtin_ia32_aesdeclast256">,
+ ClangBuiltin<"__builtin_ia32_aesdeclast256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
[IntrNoMem]>;
def int_x86_aesni_aesdeclast_512 :
- GCCBuiltin<"__builtin_ia32_aesdeclast512">,
+ ClangBuiltin<"__builtin_ia32_aesdeclast512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty],
[IntrNoMem]>;
def int_x86_aesni_aeskeygenassist :
- GCCBuiltin<"__builtin_ia32_aeskeygenassist128">,
+ ClangBuiltin<"__builtin_ia32_aeskeygenassist128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
}
// PCLMUL instructions
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_pclmulqdq : GCCBuiltin<"__builtin_ia32_pclmulqdq128">,
+ def int_x86_pclmulqdq : ClangBuiltin<"__builtin_ia32_pclmulqdq128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
- def int_x86_pclmulqdq_256 : GCCBuiltin<"__builtin_ia32_pclmulqdq256">,
+ def int_x86_pclmulqdq_256 : ClangBuiltin<"__builtin_ia32_pclmulqdq256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
- def int_x86_pclmulqdq_512 : GCCBuiltin<"__builtin_ia32_pclmulqdq512">,
+ def int_x86_pclmulqdq_512 : ClangBuiltin<"__builtin_ia32_pclmulqdq512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
}
// Vector pack
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_sse41_packusdw : GCCBuiltin<"__builtin_ia32_packusdw128">,
+ def int_x86_sse41_packusdw : ClangBuiltin<"__builtin_ia32_packusdw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
}
// Vector insert
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_sse41_insertps : GCCBuiltin<"__builtin_ia32_insertps128">,
+ def int_x86_sse41_insertps : ClangBuiltin<"__builtin_ia32_insertps128">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
}
// Vector blend
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_sse41_pblendvb : GCCBuiltin<"__builtin_ia32_pblendvb128">,
+ def int_x86_sse41_pblendvb : ClangBuiltin<"__builtin_ia32_pblendvb128">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,llvm_v16i8_ty],
[IntrNoMem]>;
- def int_x86_sse41_blendvpd : GCCBuiltin<"__builtin_ia32_blendvpd">,
+ def int_x86_sse41_blendvpd : ClangBuiltin<"__builtin_ia32_blendvpd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,llvm_v2f64_ty],
[IntrNoMem]>;
- def int_x86_sse41_blendvps : GCCBuiltin<"__builtin_ia32_blendvps">,
+ def int_x86_sse41_blendvps : ClangBuiltin<"__builtin_ia32_blendvps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,llvm_v4f32_ty],
[IntrNoMem]>;
}
// Vector dot product
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_sse41_dppd : GCCBuiltin<"__builtin_ia32_dppd">,
+ def int_x86_sse41_dppd : ClangBuiltin<"__builtin_ia32_dppd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
[IntrNoMem, Commutative, ImmArg<ArgIndex<2>>]>;
- def int_x86_sse41_dpps : GCCBuiltin<"__builtin_ia32_dpps">,
+ def int_x86_sse41_dpps : ClangBuiltin<"__builtin_ia32_dpps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
[IntrNoMem, Commutative, ImmArg<ArgIndex<2>>]>;
}
// Vector sum of absolute differences
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_sse41_mpsadbw : GCCBuiltin<"__builtin_ia32_mpsadbw128">,
+ def int_x86_sse41_mpsadbw : ClangBuiltin<"__builtin_ia32_mpsadbw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty,llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
}
// Test instruction with bitwise comparison.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_sse41_ptestz : GCCBuiltin<"__builtin_ia32_ptestz128">,
+ def int_x86_sse41_ptestz : ClangBuiltin<"__builtin_ia32_ptestz128">,
Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
- def int_x86_sse41_ptestc : GCCBuiltin<"__builtin_ia32_ptestc128">,
+ def int_x86_sse41_ptestc : ClangBuiltin<"__builtin_ia32_ptestc128">,
Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
- def int_x86_sse41_ptestnzc : GCCBuiltin<"__builtin_ia32_ptestnzc128">,
+ def int_x86_sse41_ptestnzc : ClangBuiltin<"__builtin_ia32_ptestnzc128">,
Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
}
@@ -814,81 +824,81 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Miscellaneous
// CRC Instruction
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_sse42_crc32_32_8 : GCCBuiltin<"__builtin_ia32_crc32qi">,
+ def int_x86_sse42_crc32_32_8 : ClangBuiltin<"__builtin_ia32_crc32qi">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i8_ty],
[IntrNoMem]>;
- def int_x86_sse42_crc32_32_16 : GCCBuiltin<"__builtin_ia32_crc32hi">,
+ def int_x86_sse42_crc32_32_16 : ClangBuiltin<"__builtin_ia32_crc32hi">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i16_ty],
[IntrNoMem]>;
- def int_x86_sse42_crc32_32_32 : GCCBuiltin<"__builtin_ia32_crc32si">,
+ def int_x86_sse42_crc32_32_32 : ClangBuiltin<"__builtin_ia32_crc32si">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
- def int_x86_sse42_crc32_64_64 : GCCBuiltin<"__builtin_ia32_crc32di">,
+ def int_x86_sse42_crc32_64_64 : ClangBuiltin<"__builtin_ia32_crc32di">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
[IntrNoMem]>;
}
// String/text processing ops.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_sse42_pcmpistrm128 : GCCBuiltin<"__builtin_ia32_pcmpistrm128">,
+ def int_x86_sse42_pcmpistrm128 : ClangBuiltin<"__builtin_ia32_pcmpistrm128">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
- def int_x86_sse42_pcmpistri128 : GCCBuiltin<"__builtin_ia32_pcmpistri128">,
+ def int_x86_sse42_pcmpistri128 : ClangBuiltin<"__builtin_ia32_pcmpistri128">,
Intrinsic<[llvm_i32_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
- def int_x86_sse42_pcmpistria128 : GCCBuiltin<"__builtin_ia32_pcmpistria128">,
+ def int_x86_sse42_pcmpistria128 : ClangBuiltin<"__builtin_ia32_pcmpistria128">,
Intrinsic<[llvm_i32_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
- def int_x86_sse42_pcmpistric128 : GCCBuiltin<"__builtin_ia32_pcmpistric128">,
+ def int_x86_sse42_pcmpistric128 : ClangBuiltin<"__builtin_ia32_pcmpistric128">,
Intrinsic<[llvm_i32_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
- def int_x86_sse42_pcmpistrio128 : GCCBuiltin<"__builtin_ia32_pcmpistrio128">,
+ def int_x86_sse42_pcmpistrio128 : ClangBuiltin<"__builtin_ia32_pcmpistrio128">,
Intrinsic<[llvm_i32_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
- def int_x86_sse42_pcmpistris128 : GCCBuiltin<"__builtin_ia32_pcmpistris128">,
+ def int_x86_sse42_pcmpistris128 : ClangBuiltin<"__builtin_ia32_pcmpistris128">,
Intrinsic<[llvm_i32_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
- def int_x86_sse42_pcmpistriz128 : GCCBuiltin<"__builtin_ia32_pcmpistriz128">,
+ def int_x86_sse42_pcmpistriz128 : ClangBuiltin<"__builtin_ia32_pcmpistriz128">,
Intrinsic<[llvm_i32_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
- def int_x86_sse42_pcmpestrm128 : GCCBuiltin<"__builtin_ia32_pcmpestrm128">,
+ def int_x86_sse42_pcmpestrm128 : ClangBuiltin<"__builtin_ia32_pcmpestrm128">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_sse42_pcmpestri128 : GCCBuiltin<"__builtin_ia32_pcmpestri128">,
+ def int_x86_sse42_pcmpestri128 : ClangBuiltin<"__builtin_ia32_pcmpestri128">,
Intrinsic<[llvm_i32_ty],
[llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_sse42_pcmpestria128 : GCCBuiltin<"__builtin_ia32_pcmpestria128">,
+ def int_x86_sse42_pcmpestria128 : ClangBuiltin<"__builtin_ia32_pcmpestria128">,
Intrinsic<[llvm_i32_ty],
[llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_sse42_pcmpestric128 : GCCBuiltin<"__builtin_ia32_pcmpestric128">,
+ def int_x86_sse42_pcmpestric128 : ClangBuiltin<"__builtin_ia32_pcmpestric128">,
Intrinsic<[llvm_i32_ty],
[llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_sse42_pcmpestrio128 : GCCBuiltin<"__builtin_ia32_pcmpestrio128">,
+ def int_x86_sse42_pcmpestrio128 : ClangBuiltin<"__builtin_ia32_pcmpestrio128">,
Intrinsic<[llvm_i32_ty],
[llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_sse42_pcmpestris128 : GCCBuiltin<"__builtin_ia32_pcmpestris128">,
+ def int_x86_sse42_pcmpestris128 : ClangBuiltin<"__builtin_ia32_pcmpestris128">,
Intrinsic<[llvm_i32_ty],
[llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_sse42_pcmpestriz128 : GCCBuiltin<"__builtin_ia32_pcmpestriz128">,
+ def int_x86_sse42_pcmpestriz128 : ClangBuiltin<"__builtin_ia32_pcmpestriz128">,
Intrinsic<[llvm_i32_ty],
[llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
llvm_i8_ty],
@@ -899,17 +909,17 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// SSE4A
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_sse4a_extrqi : GCCBuiltin<"__builtin_ia32_extrqi">,
+ def int_x86_sse4a_extrqi : ClangBuiltin<"__builtin_ia32_extrqi">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i8_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>;
- def int_x86_sse4a_extrq : GCCBuiltin<"__builtin_ia32_extrq">,
+ def int_x86_sse4a_extrq : ClangBuiltin<"__builtin_ia32_extrq">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v16i8_ty], [IntrNoMem]>;
- def int_x86_sse4a_insertqi : GCCBuiltin<"__builtin_ia32_insertqi">,
+ def int_x86_sse4a_insertqi : ClangBuiltin<"__builtin_ia32_insertqi">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
llvm_i8_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>;
- def int_x86_sse4a_insertq : GCCBuiltin<"__builtin_ia32_insertq">,
+ def int_x86_sse4a_insertq : ClangBuiltin<"__builtin_ia32_insertq">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
}
@@ -918,177 +928,177 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Arithmetic ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_avx_addsub_pd_256 : GCCBuiltin<"__builtin_ia32_addsubpd256">,
+ def int_x86_avx_addsub_pd_256 : ClangBuiltin<"__builtin_ia32_addsubpd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
llvm_v4f64_ty], [IntrNoMem]>;
- def int_x86_avx_addsub_ps_256 : GCCBuiltin<"__builtin_ia32_addsubps256">,
+ def int_x86_avx_addsub_ps_256 : ClangBuiltin<"__builtin_ia32_addsubps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
llvm_v8f32_ty], [IntrNoMem]>;
- def int_x86_avx_max_pd_256 : GCCBuiltin<"__builtin_ia32_maxpd256">,
+ def int_x86_avx_max_pd_256 : ClangBuiltin<"__builtin_ia32_maxpd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
llvm_v4f64_ty], [IntrNoMem]>;
- def int_x86_avx_max_ps_256 : GCCBuiltin<"__builtin_ia32_maxps256">,
+ def int_x86_avx_max_ps_256 : ClangBuiltin<"__builtin_ia32_maxps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
llvm_v8f32_ty], [IntrNoMem]>;
- def int_x86_avx_min_pd_256 : GCCBuiltin<"__builtin_ia32_minpd256">,
+ def int_x86_avx_min_pd_256 : ClangBuiltin<"__builtin_ia32_minpd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
llvm_v4f64_ty], [IntrNoMem]>;
- def int_x86_avx_min_ps_256 : GCCBuiltin<"__builtin_ia32_minps256">,
+ def int_x86_avx_min_ps_256 : ClangBuiltin<"__builtin_ia32_minps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
llvm_v8f32_ty], [IntrNoMem]>;
- def int_x86_avx_rsqrt_ps_256 : GCCBuiltin<"__builtin_ia32_rsqrtps256">,
+ def int_x86_avx_rsqrt_ps_256 : ClangBuiltin<"__builtin_ia32_rsqrtps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
- def int_x86_avx_rcp_ps_256 : GCCBuiltin<"__builtin_ia32_rcpps256">,
+ def int_x86_avx_rcp_ps_256 : ClangBuiltin<"__builtin_ia32_rcpps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
- def int_x86_avx_round_pd_256 : GCCBuiltin<"__builtin_ia32_roundpd256">,
+ def int_x86_avx_round_pd_256 : ClangBuiltin<"__builtin_ia32_roundpd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_avx_round_ps_256 : GCCBuiltin<"__builtin_ia32_roundps256">,
+ def int_x86_avx_round_ps_256 : ClangBuiltin<"__builtin_ia32_roundps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
}
// Horizontal ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_avx_hadd_pd_256 : GCCBuiltin<"__builtin_ia32_haddpd256">,
+ def int_x86_avx_hadd_pd_256 : ClangBuiltin<"__builtin_ia32_haddpd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
llvm_v4f64_ty], [IntrNoMem]>;
- def int_x86_avx_hsub_ps_256 : GCCBuiltin<"__builtin_ia32_hsubps256">,
+ def int_x86_avx_hsub_ps_256 : ClangBuiltin<"__builtin_ia32_hsubps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
llvm_v8f32_ty], [IntrNoMem]>;
- def int_x86_avx_hsub_pd_256 : GCCBuiltin<"__builtin_ia32_hsubpd256">,
+ def int_x86_avx_hsub_pd_256 : ClangBuiltin<"__builtin_ia32_hsubpd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
llvm_v4f64_ty], [IntrNoMem]>;
- def int_x86_avx_hadd_ps_256 : GCCBuiltin<"__builtin_ia32_haddps256">,
+ def int_x86_avx_hadd_ps_256 : ClangBuiltin<"__builtin_ia32_haddps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
llvm_v8f32_ty], [IntrNoMem]>;
}
// Vector permutation
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_avx_vpermilvar_pd : GCCBuiltin<"__builtin_ia32_vpermilvarpd">,
+ def int_x86_avx_vpermilvar_pd : ClangBuiltin<"__builtin_ia32_vpermilvarpd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
llvm_v2i64_ty], [IntrNoMem]>;
- def int_x86_avx_vpermilvar_ps : GCCBuiltin<"__builtin_ia32_vpermilvarps">,
+ def int_x86_avx_vpermilvar_ps : ClangBuiltin<"__builtin_ia32_vpermilvarps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_avx_vpermilvar_pd_256 :
- GCCBuiltin<"__builtin_ia32_vpermilvarpd256">,
+ ClangBuiltin<"__builtin_ia32_vpermilvarpd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4i64_ty], [IntrNoMem]>;
def int_x86_avx_vpermilvar_ps_256 :
- GCCBuiltin<"__builtin_ia32_vpermilvarps256">,
+ ClangBuiltin<"__builtin_ia32_vpermilvarps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8i32_ty], [IntrNoMem]>;
def int_x86_avx512_vpermi2var_d_128 :
- GCCBuiltin<"__builtin_ia32_vpermi2vard128">,
+ ClangBuiltin<"__builtin_ia32_vpermi2vard128">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_avx512_vpermi2var_d_256 :
- GCCBuiltin<"__builtin_ia32_vpermi2vard256">,
+ ClangBuiltin<"__builtin_ia32_vpermi2vard256">,
Intrinsic<[llvm_v8i32_ty],
[llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
def int_x86_avx512_vpermi2var_d_512 :
- GCCBuiltin<"__builtin_ia32_vpermi2vard512">,
+ ClangBuiltin<"__builtin_ia32_vpermi2vard512">,
Intrinsic<[llvm_v16i32_ty],
[llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty],
[IntrNoMem]>;
def int_x86_avx512_vpermi2var_hi_128 :
- GCCBuiltin<"__builtin_ia32_vpermi2varhi128">,
+ ClangBuiltin<"__builtin_ia32_vpermi2varhi128">,
Intrinsic<[llvm_v8i16_ty],
[llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
def int_x86_avx512_vpermi2var_hi_256 :
- GCCBuiltin<"__builtin_ia32_vpermi2varhi256">,
+ ClangBuiltin<"__builtin_ia32_vpermi2varhi256">,
Intrinsic<[llvm_v16i16_ty],
[llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty],
[IntrNoMem]>;
def int_x86_avx512_vpermi2var_hi_512 :
- GCCBuiltin<"__builtin_ia32_vpermi2varhi512">,
+ ClangBuiltin<"__builtin_ia32_vpermi2varhi512">,
Intrinsic<[llvm_v32i16_ty],
[llvm_v32i16_ty, llvm_v32i16_ty, llvm_v32i16_ty],
[IntrNoMem]>;
def int_x86_avx512_vpermi2var_pd_128 :
- GCCBuiltin<"__builtin_ia32_vpermi2varpd128">,
+ ClangBuiltin<"__builtin_ia32_vpermi2varpd128">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2i64_ty, llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_avx512_vpermi2var_pd_256 :
- GCCBuiltin<"__builtin_ia32_vpermi2varpd256">,
+ ClangBuiltin<"__builtin_ia32_vpermi2varpd256">,
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_v4i64_ty, llvm_v4f64_ty], [IntrNoMem]>;
def int_x86_avx512_vpermi2var_pd_512 :
- GCCBuiltin<"__builtin_ia32_vpermi2varpd512">,
+ ClangBuiltin<"__builtin_ia32_vpermi2varpd512">,
Intrinsic<[llvm_v8f64_ty],
[llvm_v8f64_ty, llvm_v8i64_ty, llvm_v8f64_ty], [IntrNoMem]>;
def int_x86_avx512_vpermi2var_ps_128 :
- GCCBuiltin<"__builtin_ia32_vpermi2varps128">,
+ ClangBuiltin<"__builtin_ia32_vpermi2varps128">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4i32_ty, llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_avx512_vpermi2var_ps_256 :
- GCCBuiltin<"__builtin_ia32_vpermi2varps256">,
+ ClangBuiltin<"__builtin_ia32_vpermi2varps256">,
Intrinsic<[llvm_v8f32_ty],
[llvm_v8f32_ty, llvm_v8i32_ty, llvm_v8f32_ty], [IntrNoMem]>;
def int_x86_avx512_vpermi2var_ps_512 :
- GCCBuiltin<"__builtin_ia32_vpermi2varps512">,
+ ClangBuiltin<"__builtin_ia32_vpermi2varps512">,
Intrinsic<[llvm_v16f32_ty],
[llvm_v16f32_ty, llvm_v16i32_ty, llvm_v16f32_ty],
[IntrNoMem]>;
def int_x86_avx512_vpermi2var_q_128 :
- GCCBuiltin<"__builtin_ia32_vpermi2varq128">,
+ ClangBuiltin<"__builtin_ia32_vpermi2varq128">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
def int_x86_avx512_vpermi2var_q_256 :
- GCCBuiltin<"__builtin_ia32_vpermi2varq256">,
+ ClangBuiltin<"__builtin_ia32_vpermi2varq256">,
Intrinsic<[llvm_v4i64_ty],
[llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>;
def int_x86_avx512_vpermi2var_q_512 :
- GCCBuiltin<"__builtin_ia32_vpermi2varq512">,
+ ClangBuiltin<"__builtin_ia32_vpermi2varq512">,
Intrinsic<[llvm_v8i64_ty],
[llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty], [IntrNoMem]>;
def int_x86_avx512_vpermi2var_qi_128 :
- GCCBuiltin<"__builtin_ia32_vpermi2varqi128">,
+ ClangBuiltin<"__builtin_ia32_vpermi2varqi128">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
def int_x86_avx512_vpermi2var_qi_256 :
- GCCBuiltin<"__builtin_ia32_vpermi2varqi256">,
+ ClangBuiltin<"__builtin_ia32_vpermi2varqi256">,
Intrinsic<[llvm_v32i8_ty],
[llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
def int_x86_avx512_vpermi2var_qi_512 :
- GCCBuiltin<"__builtin_ia32_vpermi2varqi512">,
+ ClangBuiltin<"__builtin_ia32_vpermi2varqi512">,
Intrinsic<[llvm_v64i8_ty],
[llvm_v64i8_ty, llvm_v64i8_ty, llvm_v64i8_ty], [IntrNoMem]>;
def int_x86_avx512_vpermilvar_pd_512 :
- GCCBuiltin<"__builtin_ia32_vpermilvarpd512">,
+ ClangBuiltin<"__builtin_ia32_vpermilvarpd512">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8i64_ty],
[IntrNoMem]>;
def int_x86_avx512_vpermilvar_ps_512 :
- GCCBuiltin<"__builtin_ia32_vpermilvarps512">,
+ ClangBuiltin<"__builtin_ia32_vpermilvarps512">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16i32_ty],
[IntrNoMem]>;
def int_x86_avx512_pshuf_b_512 :
- GCCBuiltin<"__builtin_ia32_pshufb512">,
+ ClangBuiltin<"__builtin_ia32_pshufb512">,
Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty],
[IntrNoMem]>;
@@ -1097,49 +1107,49 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// GFNI Instructions
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_vgf2p8affineinvqb_128 :
- GCCBuiltin<"__builtin_ia32_vgf2p8affineinvqb_v16qi">,
+ ClangBuiltin<"__builtin_ia32_vgf2p8affineinvqb_v16qi">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
def int_x86_vgf2p8affineinvqb_256 :
- GCCBuiltin<"__builtin_ia32_vgf2p8affineinvqb_v32qi">,
+ ClangBuiltin<"__builtin_ia32_vgf2p8affineinvqb_v32qi">,
Intrinsic<[llvm_v32i8_ty],
[llvm_v32i8_ty, llvm_v32i8_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
def int_x86_vgf2p8affineinvqb_512 :
- GCCBuiltin<"__builtin_ia32_vgf2p8affineinvqb_v64qi">,
+ ClangBuiltin<"__builtin_ia32_vgf2p8affineinvqb_v64qi">,
Intrinsic<[llvm_v64i8_ty],
[llvm_v64i8_ty, llvm_v64i8_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
def int_x86_vgf2p8affineqb_128 :
- GCCBuiltin<"__builtin_ia32_vgf2p8affineqb_v16qi">,
+ ClangBuiltin<"__builtin_ia32_vgf2p8affineqb_v16qi">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
def int_x86_vgf2p8affineqb_256 :
- GCCBuiltin<"__builtin_ia32_vgf2p8affineqb_v32qi">,
+ ClangBuiltin<"__builtin_ia32_vgf2p8affineqb_v32qi">,
Intrinsic<[llvm_v32i8_ty],
[llvm_v32i8_ty, llvm_v32i8_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
def int_x86_vgf2p8affineqb_512 :
- GCCBuiltin<"__builtin_ia32_vgf2p8affineqb_v64qi">,
+ ClangBuiltin<"__builtin_ia32_vgf2p8affineqb_v64qi">,
Intrinsic<[llvm_v64i8_ty],
[llvm_v64i8_ty, llvm_v64i8_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
def int_x86_vgf2p8mulb_128 :
- GCCBuiltin<"__builtin_ia32_vgf2p8mulb_v16qi">,
+ ClangBuiltin<"__builtin_ia32_vgf2p8mulb_v16qi">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
def int_x86_vgf2p8mulb_256 :
- GCCBuiltin<"__builtin_ia32_vgf2p8mulb_v32qi">,
+ ClangBuiltin<"__builtin_ia32_vgf2p8mulb_v32qi">,
Intrinsic<[llvm_v32i8_ty],
[llvm_v32i8_ty, llvm_v32i8_ty],
[IntrNoMem]>;
def int_x86_vgf2p8mulb_512 :
- GCCBuiltin<"__builtin_ia32_vgf2p8mulb_v64qi">,
+ ClangBuiltin<"__builtin_ia32_vgf2p8mulb_v64qi">,
Intrinsic<[llvm_v64i8_ty],
[llvm_v64i8_ty, llvm_v64i8_ty],
[IntrNoMem]>;
@@ -1147,17 +1157,17 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Vector blend
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_avx_blendv_pd_256 : GCCBuiltin<"__builtin_ia32_blendvpd256">,
+ def int_x86_avx_blendv_pd_256 : ClangBuiltin<"__builtin_ia32_blendvpd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
- def int_x86_avx_blendv_ps_256 : GCCBuiltin<"__builtin_ia32_blendvps256">,
+ def int_x86_avx_blendv_ps_256 : ClangBuiltin<"__builtin_ia32_blendvps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>;
}
// Vector dot product
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_avx_dp_ps_256 : GCCBuiltin<"__builtin_ia32_dpps256">,
+ def int_x86_avx_dp_ps_256 : ClangBuiltin<"__builtin_ia32_dpps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
llvm_v8f32_ty, llvm_i8_ty],
[IntrNoMem, Commutative, ImmArg<ArgIndex<2>>]>;
@@ -1175,63 +1185,63 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Vector convert
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_avx_cvt_pd2_ps_256 : GCCBuiltin<"__builtin_ia32_cvtpd2ps256">,
+ def int_x86_avx_cvt_pd2_ps_256 : ClangBuiltin<"__builtin_ia32_cvtpd2ps256">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
- def int_x86_avx_cvt_ps2dq_256 : GCCBuiltin<"__builtin_ia32_cvtps2dq256">,
+ def int_x86_avx_cvt_ps2dq_256 : ClangBuiltin<"__builtin_ia32_cvtps2dq256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
- def int_x86_avx_cvtt_pd2dq_256 : GCCBuiltin<"__builtin_ia32_cvttpd2dq256">,
+ def int_x86_avx_cvtt_pd2dq_256 : ClangBuiltin<"__builtin_ia32_cvttpd2dq256">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
- def int_x86_avx_cvt_pd2dq_256 : GCCBuiltin<"__builtin_ia32_cvtpd2dq256">,
+ def int_x86_avx_cvt_pd2dq_256 : ClangBuiltin<"__builtin_ia32_cvtpd2dq256">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
- def int_x86_avx_cvtt_ps2dq_256 : GCCBuiltin<"__builtin_ia32_cvttps2dq256">,
+ def int_x86_avx_cvtt_ps2dq_256 : ClangBuiltin<"__builtin_ia32_cvttps2dq256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
}
// Vector bit test
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_avx_vtestz_pd : GCCBuiltin<"__builtin_ia32_vtestzpd">,
+ def int_x86_avx_vtestz_pd : ClangBuiltin<"__builtin_ia32_vtestzpd">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
- def int_x86_avx_vtestc_pd : GCCBuiltin<"__builtin_ia32_vtestcpd">,
+ def int_x86_avx_vtestc_pd : ClangBuiltin<"__builtin_ia32_vtestcpd">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
- def int_x86_avx_vtestnzc_pd : GCCBuiltin<"__builtin_ia32_vtestnzcpd">,
+ def int_x86_avx_vtestnzc_pd : ClangBuiltin<"__builtin_ia32_vtestnzcpd">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
- def int_x86_avx_vtestz_ps : GCCBuiltin<"__builtin_ia32_vtestzps">,
+ def int_x86_avx_vtestz_ps : ClangBuiltin<"__builtin_ia32_vtestzps">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
- def int_x86_avx_vtestc_ps : GCCBuiltin<"__builtin_ia32_vtestcps">,
+ def int_x86_avx_vtestc_ps : ClangBuiltin<"__builtin_ia32_vtestcps">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
- def int_x86_avx_vtestnzc_ps : GCCBuiltin<"__builtin_ia32_vtestnzcps">,
+ def int_x86_avx_vtestnzc_ps : ClangBuiltin<"__builtin_ia32_vtestnzcps">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
- def int_x86_avx_vtestz_pd_256 : GCCBuiltin<"__builtin_ia32_vtestzpd256">,
+ def int_x86_avx_vtestz_pd_256 : ClangBuiltin<"__builtin_ia32_vtestzpd256">,
Intrinsic<[llvm_i32_ty], [llvm_v4f64_ty,
llvm_v4f64_ty], [IntrNoMem]>;
- def int_x86_avx_vtestc_pd_256 : GCCBuiltin<"__builtin_ia32_vtestcpd256">,
+ def int_x86_avx_vtestc_pd_256 : ClangBuiltin<"__builtin_ia32_vtestcpd256">,
Intrinsic<[llvm_i32_ty], [llvm_v4f64_ty,
llvm_v4f64_ty], [IntrNoMem]>;
- def int_x86_avx_vtestnzc_pd_256 : GCCBuiltin<"__builtin_ia32_vtestnzcpd256">,
+ def int_x86_avx_vtestnzc_pd_256 : ClangBuiltin<"__builtin_ia32_vtestnzcpd256">,
Intrinsic<[llvm_i32_ty], [llvm_v4f64_ty,
llvm_v4f64_ty], [IntrNoMem]>;
- def int_x86_avx_vtestz_ps_256 : GCCBuiltin<"__builtin_ia32_vtestzps256">,
+ def int_x86_avx_vtestz_ps_256 : ClangBuiltin<"__builtin_ia32_vtestzps256">,
Intrinsic<[llvm_i32_ty], [llvm_v8f32_ty,
llvm_v8f32_ty], [IntrNoMem]>;
- def int_x86_avx_vtestc_ps_256 : GCCBuiltin<"__builtin_ia32_vtestcps256">,
+ def int_x86_avx_vtestc_ps_256 : ClangBuiltin<"__builtin_ia32_vtestcps256">,
Intrinsic<[llvm_i32_ty], [llvm_v8f32_ty,
llvm_v8f32_ty], [IntrNoMem]>;
- def int_x86_avx_vtestnzc_ps_256 : GCCBuiltin<"__builtin_ia32_vtestnzcps256">,
+ def int_x86_avx_vtestnzc_ps_256 : ClangBuiltin<"__builtin_ia32_vtestnzcps256">,
Intrinsic<[llvm_i32_ty], [llvm_v8f32_ty,
llvm_v8f32_ty], [IntrNoMem]>;
- def int_x86_avx_ptestz_256 : GCCBuiltin<"__builtin_ia32_ptestz256">,
+ def int_x86_avx_ptestz_256 : ClangBuiltin<"__builtin_ia32_ptestz256">,
Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty,
llvm_v4i64_ty], [IntrNoMem]>;
- def int_x86_avx_ptestc_256 : GCCBuiltin<"__builtin_ia32_ptestc256">,
+ def int_x86_avx_ptestc_256 : ClangBuiltin<"__builtin_ia32_ptestc256">,
Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty,
llvm_v4i64_ty], [IntrNoMem]>;
- def int_x86_avx_ptestnzc_256 : GCCBuiltin<"__builtin_ia32_ptestnzc256">,
+ def int_x86_avx_ptestnzc_256 : ClangBuiltin<"__builtin_ia32_ptestnzc256">,
Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty,
llvm_v4i64_ty], [IntrNoMem]>;
@@ -1254,67 +1264,67 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_v16i1_ty], [llvm_v16f32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
def int_x86_avx512_mask_fpclass_sd :
- GCCBuiltin<"__builtin_ia32_fpclasssd_mask">,
+ ClangBuiltin<"__builtin_ia32_fpclasssd_mask">,
Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_i32_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
def int_x86_avx512_mask_fpclass_ss :
- GCCBuiltin<"__builtin_ia32_fpclassss_mask">,
+ ClangBuiltin<"__builtin_ia32_fpclassss_mask">,
Intrinsic<[llvm_i8_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
}
// Vector extract sign mask
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_avx_movmsk_pd_256 : GCCBuiltin<"__builtin_ia32_movmskpd256">,
+ def int_x86_avx_movmsk_pd_256 : ClangBuiltin<"__builtin_ia32_movmskpd256">,
Intrinsic<[llvm_i32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
- def int_x86_avx_movmsk_ps_256 : GCCBuiltin<"__builtin_ia32_movmskps256">,
+ def int_x86_avx_movmsk_ps_256 : ClangBuiltin<"__builtin_ia32_movmskps256">,
Intrinsic<[llvm_i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
}
// Vector zero
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_avx_vzeroall : GCCBuiltin<"__builtin_ia32_vzeroall">,
+ def int_x86_avx_vzeroall : ClangBuiltin<"__builtin_ia32_vzeroall">,
Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>;
- def int_x86_avx_vzeroupper : GCCBuiltin<"__builtin_ia32_vzeroupper">,
+ def int_x86_avx_vzeroupper : ClangBuiltin<"__builtin_ia32_vzeroupper">,
Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>;
}
// SIMD load ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_avx_ldu_dq_256 : GCCBuiltin<"__builtin_ia32_lddqu256">,
+ def int_x86_avx_ldu_dq_256 : ClangBuiltin<"__builtin_ia32_lddqu256">,
Intrinsic<[llvm_v32i8_ty], [llvm_ptr_ty], [IntrReadMem]>;
}
// Conditional load ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_avx_maskload_pd : GCCBuiltin<"__builtin_ia32_maskloadpd">,
+ def int_x86_avx_maskload_pd : ClangBuiltin<"__builtin_ia32_maskloadpd">,
Intrinsic<[llvm_v2f64_ty], [llvm_ptr_ty, llvm_v2i64_ty],
[IntrReadMem, IntrArgMemOnly]>;
- def int_x86_avx_maskload_ps : GCCBuiltin<"__builtin_ia32_maskloadps">,
+ def int_x86_avx_maskload_ps : ClangBuiltin<"__builtin_ia32_maskloadps">,
Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty, llvm_v4i32_ty],
[IntrReadMem, IntrArgMemOnly]>;
- def int_x86_avx_maskload_pd_256 : GCCBuiltin<"__builtin_ia32_maskloadpd256">,
+ def int_x86_avx_maskload_pd_256 : ClangBuiltin<"__builtin_ia32_maskloadpd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty, llvm_v4i64_ty],
[IntrReadMem, IntrArgMemOnly]>;
- def int_x86_avx_maskload_ps_256 : GCCBuiltin<"__builtin_ia32_maskloadps256">,
+ def int_x86_avx_maskload_ps_256 : ClangBuiltin<"__builtin_ia32_maskloadps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty, llvm_v8i32_ty],
[IntrReadMem, IntrArgMemOnly]>;
}
// Conditional store ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_avx_maskstore_pd : GCCBuiltin<"__builtin_ia32_maskstorepd">,
+ def int_x86_avx_maskstore_pd : ClangBuiltin<"__builtin_ia32_maskstorepd">,
Intrinsic<[], [llvm_ptr_ty,
llvm_v2i64_ty, llvm_v2f64_ty], [IntrArgMemOnly]>;
- def int_x86_avx_maskstore_ps : GCCBuiltin<"__builtin_ia32_maskstoreps">,
+ def int_x86_avx_maskstore_ps : ClangBuiltin<"__builtin_ia32_maskstoreps">,
Intrinsic<[], [llvm_ptr_ty,
llvm_v4i32_ty, llvm_v4f32_ty], [IntrArgMemOnly]>;
def int_x86_avx_maskstore_pd_256 :
- GCCBuiltin<"__builtin_ia32_maskstorepd256">,
+ ClangBuiltin<"__builtin_ia32_maskstorepd256">,
Intrinsic<[], [llvm_ptr_ty,
llvm_v4i64_ty, llvm_v4f64_ty], [IntrArgMemOnly]>;
def int_x86_avx_maskstore_ps_256 :
- GCCBuiltin<"__builtin_ia32_maskstoreps256">,
+ ClangBuiltin<"__builtin_ia32_maskstoreps256">,
Intrinsic<[], [llvm_ptr_ty,
llvm_v8i32_ty, llvm_v8f32_ty], [IntrArgMemOnly]>;
}
@@ -1334,229 +1344,229 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Integer arithmetic ops.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_avx2_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw256">,
+ def int_x86_avx2_pmulhu_w : ClangBuiltin<"__builtin_ia32_pmulhuw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem, Commutative]>;
- def int_x86_avx2_pmulh_w : GCCBuiltin<"__builtin_ia32_pmulhw256">,
+ def int_x86_avx2_pmulh_w : ClangBuiltin<"__builtin_ia32_pmulhw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem, Commutative]>;
- def int_x86_avx2_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd256">,
+ def int_x86_avx2_pmadd_wd : ClangBuiltin<"__builtin_ia32_pmaddwd256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem, Commutative]>;
- def int_x86_avx2_pavg_b : GCCBuiltin<"__builtin_ia32_pavgb256">,
+ def int_x86_avx2_pavg_b : ClangBuiltin<"__builtin_ia32_pavgb256">,
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
llvm_v32i8_ty], [IntrNoMem, Commutative]>;
- def int_x86_avx2_pavg_w : GCCBuiltin<"__builtin_ia32_pavgw256">,
+ def int_x86_avx2_pavg_w : ClangBuiltin<"__builtin_ia32_pavgw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem, Commutative]>;
- def int_x86_avx2_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw256">,
+ def int_x86_avx2_psad_bw : ClangBuiltin<"__builtin_ia32_psadbw256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v32i8_ty,
llvm_v32i8_ty], [IntrNoMem, Commutative]>;
}
// Integer shift ops.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_avx2_psll_w : GCCBuiltin<"__builtin_ia32_psllw256">,
+ def int_x86_avx2_psll_w : ClangBuiltin<"__builtin_ia32_psllw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
- def int_x86_avx2_psll_d : GCCBuiltin<"__builtin_ia32_pslld256">,
+ def int_x86_avx2_psll_d : ClangBuiltin<"__builtin_ia32_pslld256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
llvm_v4i32_ty], [IntrNoMem]>;
- def int_x86_avx2_psll_q : GCCBuiltin<"__builtin_ia32_psllq256">,
+ def int_x86_avx2_psll_q : ClangBuiltin<"__builtin_ia32_psllq256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
llvm_v2i64_ty], [IntrNoMem]>;
- def int_x86_avx2_psrl_w : GCCBuiltin<"__builtin_ia32_psrlw256">,
+ def int_x86_avx2_psrl_w : ClangBuiltin<"__builtin_ia32_psrlw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
- def int_x86_avx2_psrl_d : GCCBuiltin<"__builtin_ia32_psrld256">,
+ def int_x86_avx2_psrl_d : ClangBuiltin<"__builtin_ia32_psrld256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
llvm_v4i32_ty], [IntrNoMem]>;
- def int_x86_avx2_psrl_q : GCCBuiltin<"__builtin_ia32_psrlq256">,
+ def int_x86_avx2_psrl_q : ClangBuiltin<"__builtin_ia32_psrlq256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
llvm_v2i64_ty], [IntrNoMem]>;
- def int_x86_avx2_psra_w : GCCBuiltin<"__builtin_ia32_psraw256">,
+ def int_x86_avx2_psra_w : ClangBuiltin<"__builtin_ia32_psraw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
- def int_x86_avx2_psra_d : GCCBuiltin<"__builtin_ia32_psrad256">,
+ def int_x86_avx2_psra_d : ClangBuiltin<"__builtin_ia32_psrad256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
llvm_v4i32_ty], [IntrNoMem]>;
// Oddly these don't require an immediate due to a gcc compatibility issue.
- def int_x86_avx2_pslli_w : GCCBuiltin<"__builtin_ia32_psllwi256">,
+ def int_x86_avx2_pslli_w : ClangBuiltin<"__builtin_ia32_psllwi256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx2_pslli_d : GCCBuiltin<"__builtin_ia32_pslldi256">,
+ def int_x86_avx2_pslli_d : ClangBuiltin<"__builtin_ia32_pslldi256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx2_pslli_q : GCCBuiltin<"__builtin_ia32_psllqi256">,
+ def int_x86_avx2_pslli_q : ClangBuiltin<"__builtin_ia32_psllqi256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx2_psrli_w : GCCBuiltin<"__builtin_ia32_psrlwi256">,
+ def int_x86_avx2_psrli_w : ClangBuiltin<"__builtin_ia32_psrlwi256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx2_psrli_d : GCCBuiltin<"__builtin_ia32_psrldi256">,
+ def int_x86_avx2_psrli_d : ClangBuiltin<"__builtin_ia32_psrldi256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx2_psrli_q : GCCBuiltin<"__builtin_ia32_psrlqi256">,
+ def int_x86_avx2_psrli_q : ClangBuiltin<"__builtin_ia32_psrlqi256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx2_psrai_w : GCCBuiltin<"__builtin_ia32_psrawi256">,
+ def int_x86_avx2_psrai_w : ClangBuiltin<"__builtin_ia32_psrawi256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx2_psrai_d : GCCBuiltin<"__builtin_ia32_psradi256">,
+ def int_x86_avx2_psrai_d : ClangBuiltin<"__builtin_ia32_psradi256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_psra_q_128 : GCCBuiltin<"__builtin_ia32_psraq128">,
+ def int_x86_avx512_psra_q_128 : ClangBuiltin<"__builtin_ia32_psraq128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
llvm_v2i64_ty], [IntrNoMem]>;
- def int_x86_avx512_psra_q_256 : GCCBuiltin<"__builtin_ia32_psraq256">,
+ def int_x86_avx512_psra_q_256 : ClangBuiltin<"__builtin_ia32_psraq256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
llvm_v2i64_ty], [IntrNoMem]>;
// Oddly these don't require an immediate due to a gcc compatibility issue.
- def int_x86_avx512_psrai_q_128 : GCCBuiltin<"__builtin_ia32_psraqi128">,
+ def int_x86_avx512_psrai_q_128 : ClangBuiltin<"__builtin_ia32_psraqi128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_psrai_q_256 : GCCBuiltin<"__builtin_ia32_psraqi256">,
+ def int_x86_avx512_psrai_q_256 : ClangBuiltin<"__builtin_ia32_psraqi256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_psll_w_512 : GCCBuiltin<"__builtin_ia32_psllw512">,
+ def int_x86_avx512_psll_w_512 : ClangBuiltin<"__builtin_ia32_psllw512">,
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
- def int_x86_avx512_psll_d_512 : GCCBuiltin<"__builtin_ia32_pslld512">,
+ def int_x86_avx512_psll_d_512 : ClangBuiltin<"__builtin_ia32_pslld512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
llvm_v4i32_ty], [IntrNoMem]>;
- def int_x86_avx512_psll_q_512 : GCCBuiltin<"__builtin_ia32_psllq512">,
+ def int_x86_avx512_psll_q_512 : ClangBuiltin<"__builtin_ia32_psllq512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
llvm_v2i64_ty], [IntrNoMem]>;
- def int_x86_avx512_psrl_w_512 : GCCBuiltin<"__builtin_ia32_psrlw512">,
+ def int_x86_avx512_psrl_w_512 : ClangBuiltin<"__builtin_ia32_psrlw512">,
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
- def int_x86_avx512_psrl_d_512 : GCCBuiltin<"__builtin_ia32_psrld512">,
+ def int_x86_avx512_psrl_d_512 : ClangBuiltin<"__builtin_ia32_psrld512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
llvm_v4i32_ty], [IntrNoMem]>;
- def int_x86_avx512_psrl_q_512 : GCCBuiltin<"__builtin_ia32_psrlq512">,
+ def int_x86_avx512_psrl_q_512 : ClangBuiltin<"__builtin_ia32_psrlq512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
llvm_v2i64_ty], [IntrNoMem]>;
- def int_x86_avx512_psra_w_512 : GCCBuiltin<"__builtin_ia32_psraw512">,
+ def int_x86_avx512_psra_w_512 : ClangBuiltin<"__builtin_ia32_psraw512">,
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
- def int_x86_avx512_psra_d_512 : GCCBuiltin<"__builtin_ia32_psrad512">,
+ def int_x86_avx512_psra_d_512 : ClangBuiltin<"__builtin_ia32_psrad512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
llvm_v4i32_ty], [IntrNoMem]>;
- def int_x86_avx512_psra_q_512 : GCCBuiltin<"__builtin_ia32_psraq512">,
+ def int_x86_avx512_psra_q_512 : ClangBuiltin<"__builtin_ia32_psraq512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
llvm_v2i64_ty], [IntrNoMem]>;
// Oddly these don't require an immediate due to a gcc compatibility issue.
- def int_x86_avx512_pslli_w_512 : GCCBuiltin<"__builtin_ia32_psllwi512">,
+ def int_x86_avx512_pslli_w_512 : ClangBuiltin<"__builtin_ia32_psllwi512">,
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_pslli_d_512 : GCCBuiltin<"__builtin_ia32_pslldi512">,
+ def int_x86_avx512_pslli_d_512 : ClangBuiltin<"__builtin_ia32_pslldi512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_pslli_q_512 : GCCBuiltin<"__builtin_ia32_psllqi512">,
+ def int_x86_avx512_pslli_q_512 : ClangBuiltin<"__builtin_ia32_psllqi512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_psrli_w_512 : GCCBuiltin<"__builtin_ia32_psrlwi512">,
+ def int_x86_avx512_psrli_w_512 : ClangBuiltin<"__builtin_ia32_psrlwi512">,
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_psrli_d_512 : GCCBuiltin<"__builtin_ia32_psrldi512">,
+ def int_x86_avx512_psrli_d_512 : ClangBuiltin<"__builtin_ia32_psrldi512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_psrli_q_512 : GCCBuiltin<"__builtin_ia32_psrlqi512">,
+ def int_x86_avx512_psrli_q_512 : ClangBuiltin<"__builtin_ia32_psrlqi512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_psrai_w_512 : GCCBuiltin<"__builtin_ia32_psrawi512">,
+ def int_x86_avx512_psrai_w_512 : ClangBuiltin<"__builtin_ia32_psrawi512">,
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_psrai_d_512 : GCCBuiltin<"__builtin_ia32_psradi512">,
+ def int_x86_avx512_psrai_d_512 : ClangBuiltin<"__builtin_ia32_psradi512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_psrai_q_512 : GCCBuiltin<"__builtin_ia32_psraqi512">,
+ def int_x86_avx512_psrai_q_512 : ClangBuiltin<"__builtin_ia32_psraqi512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_pmultishift_qb_128:
- GCCBuiltin<"__builtin_ia32_vpmultishiftqb128">,
+ ClangBuiltin<"__builtin_ia32_vpmultishiftqb128">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
def int_x86_avx512_pmultishift_qb_256:
- GCCBuiltin<"__builtin_ia32_vpmultishiftqb256">,
+ ClangBuiltin<"__builtin_ia32_vpmultishiftqb256">,
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>;
def int_x86_avx512_pmultishift_qb_512:
- GCCBuiltin<"__builtin_ia32_vpmultishiftqb512">,
+ ClangBuiltin<"__builtin_ia32_vpmultishiftqb512">,
Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty], [IntrNoMem]>;
}
// Pack ops.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_avx2_packsswb : GCCBuiltin<"__builtin_ia32_packsswb256">,
+ def int_x86_avx2_packsswb : ClangBuiltin<"__builtin_ia32_packsswb256">,
Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem]>;
- def int_x86_avx2_packssdw : GCCBuiltin<"__builtin_ia32_packssdw256">,
+ def int_x86_avx2_packssdw : ClangBuiltin<"__builtin_ia32_packssdw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty,
llvm_v8i32_ty], [IntrNoMem]>;
- def int_x86_avx2_packuswb : GCCBuiltin<"__builtin_ia32_packuswb256">,
+ def int_x86_avx2_packuswb : ClangBuiltin<"__builtin_ia32_packuswb256">,
Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem]>;
- def int_x86_avx2_packusdw : GCCBuiltin<"__builtin_ia32_packusdw256">,
+ def int_x86_avx2_packusdw : ClangBuiltin<"__builtin_ia32_packusdw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty,
llvm_v8i32_ty], [IntrNoMem]>;
}
// Horizontal arithmetic ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_avx2_phadd_w : GCCBuiltin<"__builtin_ia32_phaddw256">,
+ def int_x86_avx2_phadd_w : ClangBuiltin<"__builtin_ia32_phaddw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem]>;
- def int_x86_avx2_phadd_d : GCCBuiltin<"__builtin_ia32_phaddd256">,
+ def int_x86_avx2_phadd_d : ClangBuiltin<"__builtin_ia32_phaddd256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
llvm_v8i32_ty], [IntrNoMem]>;
- def int_x86_avx2_phadd_sw : GCCBuiltin<"__builtin_ia32_phaddsw256">,
+ def int_x86_avx2_phadd_sw : ClangBuiltin<"__builtin_ia32_phaddsw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem]>;
- def int_x86_avx2_phsub_w : GCCBuiltin<"__builtin_ia32_phsubw256">,
+ def int_x86_avx2_phsub_w : ClangBuiltin<"__builtin_ia32_phsubw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem]>;
- def int_x86_avx2_phsub_d : GCCBuiltin<"__builtin_ia32_phsubd256">,
+ def int_x86_avx2_phsub_d : ClangBuiltin<"__builtin_ia32_phsubd256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
llvm_v8i32_ty], [IntrNoMem]>;
- def int_x86_avx2_phsub_sw : GCCBuiltin<"__builtin_ia32_phsubsw256">,
+ def int_x86_avx2_phsub_sw : ClangBuiltin<"__builtin_ia32_phsubsw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem]>;
- def int_x86_avx2_pmadd_ub_sw : GCCBuiltin<"__builtin_ia32_pmaddubsw256">,
+ def int_x86_avx2_pmadd_ub_sw : ClangBuiltin<"__builtin_ia32_pmaddubsw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty,
llvm_v32i8_ty], [IntrNoMem]>;
}
// Sign ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_avx2_psign_b : GCCBuiltin<"__builtin_ia32_psignb256">,
+ def int_x86_avx2_psign_b : ClangBuiltin<"__builtin_ia32_psignb256">,
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
llvm_v32i8_ty], [IntrNoMem]>;
- def int_x86_avx2_psign_w : GCCBuiltin<"__builtin_ia32_psignw256">,
+ def int_x86_avx2_psign_w : ClangBuiltin<"__builtin_ia32_psignw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem]>;
- def int_x86_avx2_psign_d : GCCBuiltin<"__builtin_ia32_psignd256">,
+ def int_x86_avx2_psign_d : ClangBuiltin<"__builtin_ia32_psignd256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
llvm_v8i32_ty], [IntrNoMem]>;
}
// Packed multiply high with round and scale
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_avx2_pmul_hr_sw : GCCBuiltin<"__builtin_ia32_pmulhrsw256">,
+ def int_x86_avx2_pmul_hr_sw : ClangBuiltin<"__builtin_ia32_pmulhrsw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem, Commutative]>;
- def int_x86_avx512_pmul_hr_sw_512 : GCCBuiltin<"__builtin_ia32_pmulhrsw512">,
+ def int_x86_avx512_pmul_hr_sw_512 : ClangBuiltin<"__builtin_ia32_pmulhrsw512">,
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
llvm_v32i16_ty], [IntrNoMem, Commutative]>;
}
// Vector blend
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_avx2_pblendvb : GCCBuiltin<"__builtin_ia32_pblendvb256">,
+ def int_x86_avx2_pblendvb : ClangBuiltin<"__builtin_ia32_pblendvb256">,
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
llvm_v32i8_ty], [IntrNoMem]>;
}
@@ -1564,137 +1574,137 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Vector permutation
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_avx2_permd : GCCBuiltin<"__builtin_ia32_permvarsi256">,
+ def int_x86_avx2_permd : ClangBuiltin<"__builtin_ia32_permvarsi256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
[IntrNoMem]>;
- def int_x86_avx2_permps : GCCBuiltin<"__builtin_ia32_permvarsf256">,
+ def int_x86_avx2_permps : ClangBuiltin<"__builtin_ia32_permvarsf256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8i32_ty],
[IntrNoMem]>;
}
// Conditional load ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_avx2_maskload_d : GCCBuiltin<"__builtin_ia32_maskloadd">,
+ def int_x86_avx2_maskload_d : ClangBuiltin<"__builtin_ia32_maskloadd">,
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_v4i32_ty],
[IntrReadMem, IntrArgMemOnly]>;
- def int_x86_avx2_maskload_q : GCCBuiltin<"__builtin_ia32_maskloadq">,
+ def int_x86_avx2_maskload_q : ClangBuiltin<"__builtin_ia32_maskloadq">,
Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_v2i64_ty],
[IntrReadMem, IntrArgMemOnly]>;
- def int_x86_avx2_maskload_d_256 : GCCBuiltin<"__builtin_ia32_maskloadd256">,
+ def int_x86_avx2_maskload_d_256 : ClangBuiltin<"__builtin_ia32_maskloadd256">,
Intrinsic<[llvm_v8i32_ty], [llvm_ptr_ty, llvm_v8i32_ty],
[IntrReadMem, IntrArgMemOnly]>;
- def int_x86_avx2_maskload_q_256 : GCCBuiltin<"__builtin_ia32_maskloadq256">,
+ def int_x86_avx2_maskload_q_256 : ClangBuiltin<"__builtin_ia32_maskloadq256">,
Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_v4i64_ty],
[IntrReadMem, IntrArgMemOnly]>;
}
// Conditional store ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_avx2_maskstore_d : GCCBuiltin<"__builtin_ia32_maskstored">,
+ def int_x86_avx2_maskstore_d : ClangBuiltin<"__builtin_ia32_maskstored">,
Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i32_ty],
[IntrArgMemOnly]>;
- def int_x86_avx2_maskstore_q : GCCBuiltin<"__builtin_ia32_maskstoreq">,
+ def int_x86_avx2_maskstore_q : ClangBuiltin<"__builtin_ia32_maskstoreq">,
Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty],
[IntrArgMemOnly]>;
def int_x86_avx2_maskstore_d_256 :
- GCCBuiltin<"__builtin_ia32_maskstored256">,
+ ClangBuiltin<"__builtin_ia32_maskstored256">,
Intrinsic<[], [llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i32_ty],
[IntrArgMemOnly]>;
def int_x86_avx2_maskstore_q_256 :
- GCCBuiltin<"__builtin_ia32_maskstoreq256">,
+ ClangBuiltin<"__builtin_ia32_maskstoreq256">,
Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i64_ty],
[IntrArgMemOnly]>;
}
// Variable bit shift ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_avx2_psllv_d : GCCBuiltin<"__builtin_ia32_psllv4si">,
+ def int_x86_avx2_psllv_d : ClangBuiltin<"__builtin_ia32_psllv4si">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
- def int_x86_avx2_psllv_d_256 : GCCBuiltin<"__builtin_ia32_psllv8si">,
+ def int_x86_avx2_psllv_d_256 : ClangBuiltin<"__builtin_ia32_psllv8si">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
[IntrNoMem]>;
- def int_x86_avx2_psllv_q : GCCBuiltin<"__builtin_ia32_psllv2di">,
+ def int_x86_avx2_psllv_q : ClangBuiltin<"__builtin_ia32_psllv2di">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
- def int_x86_avx2_psllv_q_256 : GCCBuiltin<"__builtin_ia32_psllv4di">,
+ def int_x86_avx2_psllv_q_256 : ClangBuiltin<"__builtin_ia32_psllv4di">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
[IntrNoMem]>;
- def int_x86_avx512_psllv_d_512 : GCCBuiltin<"__builtin_ia32_psllv16si">,
+ def int_x86_avx512_psllv_d_512 : ClangBuiltin<"__builtin_ia32_psllv16si">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty],
[IntrNoMem]>;
- def int_x86_avx512_psllv_q_512 : GCCBuiltin<"__builtin_ia32_psllv8di">,
+ def int_x86_avx512_psllv_q_512 : ClangBuiltin<"__builtin_ia32_psllv8di">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty],
[IntrNoMem]>;
- def int_x86_avx2_psrlv_d : GCCBuiltin<"__builtin_ia32_psrlv4si">,
+ def int_x86_avx2_psrlv_d : ClangBuiltin<"__builtin_ia32_psrlv4si">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
- def int_x86_avx2_psrlv_d_256 : GCCBuiltin<"__builtin_ia32_psrlv8si">,
+ def int_x86_avx2_psrlv_d_256 : ClangBuiltin<"__builtin_ia32_psrlv8si">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
[IntrNoMem]>;
- def int_x86_avx2_psrlv_q : GCCBuiltin<"__builtin_ia32_psrlv2di">,
+ def int_x86_avx2_psrlv_q : ClangBuiltin<"__builtin_ia32_psrlv2di">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
- def int_x86_avx2_psrlv_q_256 : GCCBuiltin<"__builtin_ia32_psrlv4di">,
+ def int_x86_avx2_psrlv_q_256 : ClangBuiltin<"__builtin_ia32_psrlv4di">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
[IntrNoMem]>;
- def int_x86_avx512_psrlv_d_512 : GCCBuiltin<"__builtin_ia32_psrlv16si">,
+ def int_x86_avx512_psrlv_d_512 : ClangBuiltin<"__builtin_ia32_psrlv16si">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty],
[IntrNoMem]>;
- def int_x86_avx512_psrlv_q_512 : GCCBuiltin<"__builtin_ia32_psrlv8di">,
+ def int_x86_avx512_psrlv_q_512 : ClangBuiltin<"__builtin_ia32_psrlv8di">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty],
[IntrNoMem]>;
- def int_x86_avx2_psrav_d : GCCBuiltin<"__builtin_ia32_psrav4si">,
+ def int_x86_avx2_psrav_d : ClangBuiltin<"__builtin_ia32_psrav4si">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
- def int_x86_avx2_psrav_d_256 : GCCBuiltin<"__builtin_ia32_psrav8si">,
+ def int_x86_avx2_psrav_d_256 : ClangBuiltin<"__builtin_ia32_psrav8si">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
[IntrNoMem]>;
- def int_x86_avx512_psrav_d_512 : GCCBuiltin<"__builtin_ia32_psrav16si">,
+ def int_x86_avx512_psrav_d_512 : ClangBuiltin<"__builtin_ia32_psrav16si">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty],
[IntrNoMem]>;
- def int_x86_avx512_psrav_q_128 : GCCBuiltin<"__builtin_ia32_psravq128">,
+ def int_x86_avx512_psrav_q_128 : ClangBuiltin<"__builtin_ia32_psravq128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
- def int_x86_avx512_psrav_q_256 : GCCBuiltin<"__builtin_ia32_psravq256">,
+ def int_x86_avx512_psrav_q_256 : ClangBuiltin<"__builtin_ia32_psravq256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
[IntrNoMem]>;
- def int_x86_avx512_psrav_q_512 : GCCBuiltin<"__builtin_ia32_psrav8di">,
+ def int_x86_avx512_psrav_q_512 : ClangBuiltin<"__builtin_ia32_psrav8di">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty],
[IntrNoMem]>;
- def int_x86_avx512_psllv_w_128 : GCCBuiltin<"__builtin_ia32_psllv8hi">,
+ def int_x86_avx512_psllv_w_128 : ClangBuiltin<"__builtin_ia32_psllv8hi">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
- def int_x86_avx512_psllv_w_256 : GCCBuiltin<"__builtin_ia32_psllv16hi">,
+ def int_x86_avx512_psllv_w_256 : ClangBuiltin<"__builtin_ia32_psllv16hi">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty],
[IntrNoMem]>;
- def int_x86_avx512_psllv_w_512 : GCCBuiltin<"__builtin_ia32_psllv32hi">,
+ def int_x86_avx512_psllv_w_512 : ClangBuiltin<"__builtin_ia32_psllv32hi">,
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty],
[IntrNoMem]>;
- def int_x86_avx512_psrlv_w_128 : GCCBuiltin<"__builtin_ia32_psrlv8hi">,
+ def int_x86_avx512_psrlv_w_128 : ClangBuiltin<"__builtin_ia32_psrlv8hi">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
- def int_x86_avx512_psrlv_w_256 : GCCBuiltin<"__builtin_ia32_psrlv16hi">,
+ def int_x86_avx512_psrlv_w_256 : ClangBuiltin<"__builtin_ia32_psrlv16hi">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty],
[IntrNoMem]>;
- def int_x86_avx512_psrlv_w_512 : GCCBuiltin<"__builtin_ia32_psrlv32hi">,
+ def int_x86_avx512_psrlv_w_512 : ClangBuiltin<"__builtin_ia32_psrlv32hi">,
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty],
[IntrNoMem]>;
- def int_x86_avx512_psrav_w_128 : GCCBuiltin<"__builtin_ia32_psrav8hi">,
+ def int_x86_avx512_psrav_w_128 : ClangBuiltin<"__builtin_ia32_psrav8hi">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
- def int_x86_avx512_psrav_w_256 : GCCBuiltin<"__builtin_ia32_psrav16hi">,
+ def int_x86_avx512_psrav_w_256 : ClangBuiltin<"__builtin_ia32_psrav16hi">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty],
[IntrNoMem]>;
- def int_x86_avx512_psrav_w_512 : GCCBuiltin<"__builtin_ia32_psrav32hi">,
+ def int_x86_avx512_psrav_w_512 : ClangBuiltin<"__builtin_ia32_psrav32hi">,
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty],
[IntrNoMem]>;
}
@@ -1703,68 +1713,68 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// NOTE: These can't be ArgMemOnly because you can put the address completely
// in the index register.
- def int_x86_avx2_gather_d_pd : GCCBuiltin<"__builtin_ia32_gatherd_pd">,
+ def int_x86_avx2_gather_d_pd : ClangBuiltin<"__builtin_ia32_gatherd_pd">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2f64_ty, llvm_i8_ty],
[IntrReadMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx2_gather_d_pd_256 : GCCBuiltin<"__builtin_ia32_gatherd_pd256">,
+ def int_x86_avx2_gather_d_pd_256 : ClangBuiltin<"__builtin_ia32_gatherd_pd256">,
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4f64_ty, llvm_i8_ty],
[IntrReadMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx2_gather_q_pd : GCCBuiltin<"__builtin_ia32_gatherq_pd">,
+ def int_x86_avx2_gather_q_pd : ClangBuiltin<"__builtin_ia32_gatherq_pd">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i8_ty],
[IntrReadMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx2_gather_q_pd_256 : GCCBuiltin<"__builtin_ia32_gatherq_pd256">,
+ def int_x86_avx2_gather_q_pd_256 : ClangBuiltin<"__builtin_ia32_gatherq_pd256">,
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty],
[IntrReadMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx2_gather_d_ps : GCCBuiltin<"__builtin_ia32_gatherd_ps">,
+ def int_x86_avx2_gather_d_ps : ClangBuiltin<"__builtin_ia32_gatherd_ps">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i8_ty],
[IntrReadMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx2_gather_d_ps_256 : GCCBuiltin<"__builtin_ia32_gatherd_ps256">,
+ def int_x86_avx2_gather_d_ps_256 : ClangBuiltin<"__builtin_ia32_gatherd_ps256">,
Intrinsic<[llvm_v8f32_ty],
[llvm_v8f32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty],
[IntrReadMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx2_gather_q_ps : GCCBuiltin<"__builtin_ia32_gatherq_ps">,
+ def int_x86_avx2_gather_q_ps : ClangBuiltin<"__builtin_ia32_gatherq_ps">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v4f32_ty, llvm_i8_ty],
[IntrReadMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx2_gather_q_ps_256 : GCCBuiltin<"__builtin_ia32_gatherq_ps256">,
+ def int_x86_avx2_gather_q_ps_256 : ClangBuiltin<"__builtin_ia32_gatherq_ps256">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4f32_ty, llvm_i8_ty],
[IntrReadMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx2_gather_d_q : GCCBuiltin<"__builtin_ia32_gatherd_q">,
+ def int_x86_avx2_gather_d_q : ClangBuiltin<"__builtin_ia32_gatherd_q">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v2i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2i64_ty, llvm_i8_ty],
[IntrReadMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx2_gather_d_q_256 : GCCBuiltin<"__builtin_ia32_gatherd_q256">,
+ def int_x86_avx2_gather_d_q_256 : ClangBuiltin<"__builtin_ia32_gatherd_q256">,
Intrinsic<[llvm_v4i64_ty],
[llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i64_ty, llvm_i8_ty],
[IntrReadMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx2_gather_q_q : GCCBuiltin<"__builtin_ia32_gatherq_q">,
+ def int_x86_avx2_gather_q_q : ClangBuiltin<"__builtin_ia32_gatherq_q">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v2i64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
[IntrReadMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx2_gather_q_q_256 : GCCBuiltin<"__builtin_ia32_gatherq_q256">,
+ def int_x86_avx2_gather_q_q_256 : ClangBuiltin<"__builtin_ia32_gatherq_q256">,
Intrinsic<[llvm_v4i64_ty],
[llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
[IntrReadMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx2_gather_d_d : GCCBuiltin<"__builtin_ia32_gatherd_d">,
+ def int_x86_avx2_gather_d_d : ClangBuiltin<"__builtin_ia32_gatherd_d">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
[IntrReadMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx2_gather_d_d_256 : GCCBuiltin<"__builtin_ia32_gatherd_d256">,
+ def int_x86_avx2_gather_d_d_256 : ClangBuiltin<"__builtin_ia32_gatherd_d256">,
Intrinsic<[llvm_v8i32_ty],
[llvm_v8i32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
[IntrReadMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx2_gather_q_d : GCCBuiltin<"__builtin_ia32_gatherq_d">,
+ def int_x86_avx2_gather_q_d : ClangBuiltin<"__builtin_ia32_gatherq_d">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty],
[IntrReadMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx2_gather_q_d_256 : GCCBuiltin<"__builtin_ia32_gatherq_d256">,
+ def int_x86_avx2_gather_q_d_256 : ClangBuiltin<"__builtin_ia32_gatherq_d256">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty],
[IntrReadMem, ImmArg<ArgIndex<4>>]>;
@@ -1772,12 +1782,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Misc.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_avx2_pmovmskb : GCCBuiltin<"__builtin_ia32_pmovmskb256">,
+ def int_x86_avx2_pmovmskb : ClangBuiltin<"__builtin_ia32_pmovmskb256">,
Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty], [IntrNoMem]>;
- def int_x86_avx2_pshuf_b : GCCBuiltin<"__builtin_ia32_pshufb256">,
+ def int_x86_avx2_pshuf_b : ClangBuiltin<"__builtin_ia32_pshufb256">,
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
llvm_v32i8_ty], [IntrNoMem]>;
- def int_x86_avx2_mpsadbw : GCCBuiltin<"__builtin_ia32_mpsadbw256">,
+ def int_x86_avx2_mpsadbw : ClangBuiltin<"__builtin_ia32_mpsadbw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
llvm_i8_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>;
}
@@ -1786,21 +1796,21 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// FMA3 and FMA4
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_fma_vfmaddsub_ps : GCCBuiltin<"__builtin_ia32_vfmaddsubps">,
+ def int_x86_fma_vfmaddsub_ps : ClangBuiltin<"__builtin_ia32_vfmaddsubps">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
[IntrNoMem]>;
- def int_x86_fma_vfmaddsub_pd : GCCBuiltin<"__builtin_ia32_vfmaddsubpd">,
+ def int_x86_fma_vfmaddsub_pd : ClangBuiltin<"__builtin_ia32_vfmaddsubpd">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
[IntrNoMem]>;
def int_x86_fma_vfmaddsub_ps_256 :
- GCCBuiltin<"__builtin_ia32_vfmaddsubps256">,
+ ClangBuiltin<"__builtin_ia32_vfmaddsubps256">,
Intrinsic<[llvm_v8f32_ty],
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
[IntrNoMem]>;
def int_x86_fma_vfmaddsub_pd_256 :
- GCCBuiltin<"__builtin_ia32_vfmaddsubpd256">,
+ ClangBuiltin<"__builtin_ia32_vfmaddsubpd256">,
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
[IntrNoMem]>;
@@ -1835,27 +1845,27 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
def int_x86_avx512_vpmadd52h_uq_128 :
- GCCBuiltin<"__builtin_ia32_vpmadd52huq128">,
+ ClangBuiltin<"__builtin_ia32_vpmadd52huq128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
llvm_v2i64_ty], [IntrNoMem]>;
def int_x86_avx512_vpmadd52l_uq_128 :
- GCCBuiltin<"__builtin_ia32_vpmadd52luq128">,
+ ClangBuiltin<"__builtin_ia32_vpmadd52luq128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
llvm_v2i64_ty], [IntrNoMem]>;
def int_x86_avx512_vpmadd52h_uq_256 :
- GCCBuiltin<"__builtin_ia32_vpmadd52huq256">,
+ ClangBuiltin<"__builtin_ia32_vpmadd52huq256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
llvm_v4i64_ty], [IntrNoMem]>;
def int_x86_avx512_vpmadd52l_uq_256 :
- GCCBuiltin<"__builtin_ia32_vpmadd52luq256">,
+ ClangBuiltin<"__builtin_ia32_vpmadd52luq256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
llvm_v4i64_ty], [IntrNoMem]>;
def int_x86_avx512_vpmadd52h_uq_512 :
- GCCBuiltin<"__builtin_ia32_vpmadd52huq512">,
+ ClangBuiltin<"__builtin_ia32_vpmadd52huq512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
llvm_v8i64_ty], [IntrNoMem]>;
def int_x86_avx512_vpmadd52l_uq_512 :
- GCCBuiltin<"__builtin_ia32_vpmadd52luq512">,
+ ClangBuiltin<"__builtin_ia32_vpmadd52luq512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
llvm_v8i64_ty], [IntrNoMem]>;
}
@@ -1863,54 +1873,54 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// VNNI
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_vpdpbusd_128 :
- GCCBuiltin<"__builtin_ia32_vpdpbusd128">,
+ ClangBuiltin<"__builtin_ia32_vpdpbusd128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_avx512_vpdpbusd_256 :
- GCCBuiltin<"__builtin_ia32_vpdpbusd256">,
+ ClangBuiltin<"__builtin_ia32_vpdpbusd256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
llvm_v8i32_ty], [IntrNoMem]>;
def int_x86_avx512_vpdpbusd_512 :
- GCCBuiltin<"__builtin_ia32_vpdpbusd512">,
+ ClangBuiltin<"__builtin_ia32_vpdpbusd512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
llvm_v16i32_ty], [IntrNoMem]>;
def int_x86_avx512_vpdpbusds_128 :
- GCCBuiltin<"__builtin_ia32_vpdpbusds128">,
+ ClangBuiltin<"__builtin_ia32_vpdpbusds128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_avx512_vpdpbusds_256 :
- GCCBuiltin<"__builtin_ia32_vpdpbusds256">,
+ ClangBuiltin<"__builtin_ia32_vpdpbusds256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
llvm_v8i32_ty], [IntrNoMem]>;
def int_x86_avx512_vpdpbusds_512 :
- GCCBuiltin<"__builtin_ia32_vpdpbusds512">,
+ ClangBuiltin<"__builtin_ia32_vpdpbusds512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
llvm_v16i32_ty], [IntrNoMem]>;
def int_x86_avx512_vpdpwssd_128 :
- GCCBuiltin<"__builtin_ia32_vpdpwssd128">,
+ ClangBuiltin<"__builtin_ia32_vpdpwssd128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_avx512_vpdpwssd_256 :
- GCCBuiltin<"__builtin_ia32_vpdpwssd256">,
+ ClangBuiltin<"__builtin_ia32_vpdpwssd256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
llvm_v8i32_ty], [IntrNoMem]>;
def int_x86_avx512_vpdpwssd_512 :
- GCCBuiltin<"__builtin_ia32_vpdpwssd512">,
+ ClangBuiltin<"__builtin_ia32_vpdpwssd512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
llvm_v16i32_ty], [IntrNoMem]>;
def int_x86_avx512_vpdpwssds_128 :
- GCCBuiltin<"__builtin_ia32_vpdpwssds128">,
+ ClangBuiltin<"__builtin_ia32_vpdpwssds128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_avx512_vpdpwssds_256 :
- GCCBuiltin<"__builtin_ia32_vpdpwssds256">,
+ ClangBuiltin<"__builtin_ia32_vpdpwssds256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
llvm_v8i32_ty], [IntrNoMem]>;
def int_x86_avx512_vpdpwssds_512 :
- GCCBuiltin<"__builtin_ia32_vpdpwssds512">,
+ ClangBuiltin<"__builtin_ia32_vpdpwssds512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
llvm_v16i32_ty], [IntrNoMem]>;
}
@@ -1919,180 +1929,180 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// XOP
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_xop_vpermil2pd : GCCBuiltin<"__builtin_ia32_vpermil2pd">,
+ def int_x86_xop_vpermil2pd : ClangBuiltin<"__builtin_ia32_vpermil2pd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_v2i64_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
def int_x86_xop_vpermil2pd_256 :
- GCCBuiltin<"__builtin_ia32_vpermil2pd256">,
+ ClangBuiltin<"__builtin_ia32_vpermil2pd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
llvm_v4i64_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
- def int_x86_xop_vpermil2ps : GCCBuiltin<"__builtin_ia32_vpermil2ps">,
+ def int_x86_xop_vpermil2ps : ClangBuiltin<"__builtin_ia32_vpermil2ps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
llvm_v4i32_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
def int_x86_xop_vpermil2ps_256 :
- GCCBuiltin<"__builtin_ia32_vpermil2ps256">,
+ ClangBuiltin<"__builtin_ia32_vpermil2ps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
llvm_v8i32_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
- def int_x86_xop_vfrcz_pd : GCCBuiltin<"__builtin_ia32_vfrczpd">,
+ def int_x86_xop_vfrcz_pd : ClangBuiltin<"__builtin_ia32_vfrczpd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
- def int_x86_xop_vfrcz_ps : GCCBuiltin<"__builtin_ia32_vfrczps">,
+ def int_x86_xop_vfrcz_ps : ClangBuiltin<"__builtin_ia32_vfrczps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
- def int_x86_xop_vfrcz_sd : GCCBuiltin<"__builtin_ia32_vfrczsd">,
+ def int_x86_xop_vfrcz_sd : ClangBuiltin<"__builtin_ia32_vfrczsd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
- def int_x86_xop_vfrcz_ss : GCCBuiltin<"__builtin_ia32_vfrczss">,
+ def int_x86_xop_vfrcz_ss : ClangBuiltin<"__builtin_ia32_vfrczss">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
- def int_x86_xop_vfrcz_pd_256 : GCCBuiltin<"__builtin_ia32_vfrczpd256">,
+ def int_x86_xop_vfrcz_pd_256 : ClangBuiltin<"__builtin_ia32_vfrczpd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
- def int_x86_xop_vfrcz_ps_256 : GCCBuiltin<"__builtin_ia32_vfrczps256">,
+ def int_x86_xop_vfrcz_ps_256 : ClangBuiltin<"__builtin_ia32_vfrczps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
def int_x86_xop_vphaddbd :
- GCCBuiltin<"__builtin_ia32_vphaddbd">,
+ ClangBuiltin<"__builtin_ia32_vphaddbd">,
Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;
def int_x86_xop_vphaddbq :
- GCCBuiltin<"__builtin_ia32_vphaddbq">,
+ ClangBuiltin<"__builtin_ia32_vphaddbq">,
Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty], [IntrNoMem]>;
def int_x86_xop_vphaddbw :
- GCCBuiltin<"__builtin_ia32_vphaddbw">,
+ ClangBuiltin<"__builtin_ia32_vphaddbw">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>;
def int_x86_xop_vphadddq :
- GCCBuiltin<"__builtin_ia32_vphadddq">,
+ ClangBuiltin<"__builtin_ia32_vphadddq">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_xop_vphaddubd :
- GCCBuiltin<"__builtin_ia32_vphaddubd">,
+ ClangBuiltin<"__builtin_ia32_vphaddubd">,
Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;
def int_x86_xop_vphaddubq :
- GCCBuiltin<"__builtin_ia32_vphaddubq">,
+ ClangBuiltin<"__builtin_ia32_vphaddubq">,
Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty], [IntrNoMem]>;
def int_x86_xop_vphaddubw :
- GCCBuiltin<"__builtin_ia32_vphaddubw">,
+ ClangBuiltin<"__builtin_ia32_vphaddubw">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>;
def int_x86_xop_vphaddudq :
- GCCBuiltin<"__builtin_ia32_vphaddudq">,
+ ClangBuiltin<"__builtin_ia32_vphaddudq">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_xop_vphadduwd :
- GCCBuiltin<"__builtin_ia32_vphadduwd">,
+ ClangBuiltin<"__builtin_ia32_vphadduwd">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
def int_x86_xop_vphadduwq :
- GCCBuiltin<"__builtin_ia32_vphadduwq">,
+ ClangBuiltin<"__builtin_ia32_vphadduwq">,
Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty], [IntrNoMem]>;
def int_x86_xop_vphaddwd :
- GCCBuiltin<"__builtin_ia32_vphaddwd">,
+ ClangBuiltin<"__builtin_ia32_vphaddwd">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
def int_x86_xop_vphaddwq :
- GCCBuiltin<"__builtin_ia32_vphaddwq">,
+ ClangBuiltin<"__builtin_ia32_vphaddwq">,
Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty], [IntrNoMem]>;
def int_x86_xop_vphsubbw :
- GCCBuiltin<"__builtin_ia32_vphsubbw">,
+ ClangBuiltin<"__builtin_ia32_vphsubbw">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>;
def int_x86_xop_vphsubdq :
- GCCBuiltin<"__builtin_ia32_vphsubdq">,
+ ClangBuiltin<"__builtin_ia32_vphsubdq">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_xop_vphsubwd :
- GCCBuiltin<"__builtin_ia32_vphsubwd">,
+ ClangBuiltin<"__builtin_ia32_vphsubwd">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
def int_x86_xop_vpmacsdd :
- GCCBuiltin<"__builtin_ia32_vpmacsdd">,
+ ClangBuiltin<"__builtin_ia32_vpmacsdd">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem, Commutative]>;
def int_x86_xop_vpmacsdqh :
- GCCBuiltin<"__builtin_ia32_vpmacsdqh">,
+ ClangBuiltin<"__builtin_ia32_vpmacsdqh">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty],
[IntrNoMem, Commutative]>;
def int_x86_xop_vpmacsdql :
- GCCBuiltin<"__builtin_ia32_vpmacsdql">,
+ ClangBuiltin<"__builtin_ia32_vpmacsdql">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty],
[IntrNoMem, Commutative]>;
def int_x86_xop_vpmacssdd :
- GCCBuiltin<"__builtin_ia32_vpmacssdd">,
+ ClangBuiltin<"__builtin_ia32_vpmacssdd">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem, Commutative]>;
def int_x86_xop_vpmacssdqh :
- GCCBuiltin<"__builtin_ia32_vpmacssdqh">,
+ ClangBuiltin<"__builtin_ia32_vpmacssdqh">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty],
[IntrNoMem, Commutative]>;
def int_x86_xop_vpmacssdql :
- GCCBuiltin<"__builtin_ia32_vpmacssdql">,
+ ClangBuiltin<"__builtin_ia32_vpmacssdql">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty],
[IntrNoMem, Commutative]>;
def int_x86_xop_vpmacsswd :
- GCCBuiltin<"__builtin_ia32_vpmacsswd">,
+ ClangBuiltin<"__builtin_ia32_vpmacsswd">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty],
[IntrNoMem, Commutative]>;
def int_x86_xop_vpmacssww :
- GCCBuiltin<"__builtin_ia32_vpmacssww">,
+ ClangBuiltin<"__builtin_ia32_vpmacssww">,
Intrinsic<[llvm_v8i16_ty],
[llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem, Commutative]>;
def int_x86_xop_vpmacswd :
- GCCBuiltin<"__builtin_ia32_vpmacswd">,
+ ClangBuiltin<"__builtin_ia32_vpmacswd">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty],
[IntrNoMem, Commutative]>;
def int_x86_xop_vpmacsww :
- GCCBuiltin<"__builtin_ia32_vpmacsww">,
+ ClangBuiltin<"__builtin_ia32_vpmacsww">,
Intrinsic<[llvm_v8i16_ty],
[llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem, Commutative]>;
def int_x86_xop_vpmadcsswd :
- GCCBuiltin<"__builtin_ia32_vpmadcsswd">,
+ ClangBuiltin<"__builtin_ia32_vpmadcsswd">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty],
[IntrNoMem, Commutative]>;
def int_x86_xop_vpmadcswd :
- GCCBuiltin<"__builtin_ia32_vpmadcswd">,
+ ClangBuiltin<"__builtin_ia32_vpmadcswd">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty],
[IntrNoMem, Commutative]>;
def int_x86_xop_vpperm :
- GCCBuiltin<"__builtin_ia32_vpperm">,
+ ClangBuiltin<"__builtin_ia32_vpperm">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
def int_x86_xop_vpshab :
- GCCBuiltin<"__builtin_ia32_vpshab">,
+ ClangBuiltin<"__builtin_ia32_vpshab">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
def int_x86_xop_vpshad :
- GCCBuiltin<"__builtin_ia32_vpshad">,
+ ClangBuiltin<"__builtin_ia32_vpshad">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_x86_xop_vpshaq :
- GCCBuiltin<"__builtin_ia32_vpshaq">,
+ ClangBuiltin<"__builtin_ia32_vpshaq">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
def int_x86_xop_vpshaw :
- GCCBuiltin<"__builtin_ia32_vpshaw">,
+ ClangBuiltin<"__builtin_ia32_vpshaw">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
def int_x86_xop_vpshlb :
- GCCBuiltin<"__builtin_ia32_vpshlb">,
+ ClangBuiltin<"__builtin_ia32_vpshlb">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
def int_x86_xop_vpshld :
- GCCBuiltin<"__builtin_ia32_vpshld">,
+ ClangBuiltin<"__builtin_ia32_vpshld">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_x86_xop_vpshlq :
- GCCBuiltin<"__builtin_ia32_vpshlq">,
+ ClangBuiltin<"__builtin_ia32_vpshlq">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
[IntrNoMem]>;
def int_x86_xop_vpshlw :
- GCCBuiltin<"__builtin_ia32_vpshlw">,
+ ClangBuiltin<"__builtin_ia32_vpshlw">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
}
@@ -2101,25 +2111,25 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// LWP
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_llwpcb :
- GCCBuiltin<"__builtin_ia32_llwpcb">,
+ ClangBuiltin<"__builtin_ia32_llwpcb">,
Intrinsic<[], [llvm_ptr_ty], []>;
def int_x86_slwpcb :
- GCCBuiltin<"__builtin_ia32_slwpcb">,
+ ClangBuiltin<"__builtin_ia32_slwpcb">,
Intrinsic<[llvm_ptr_ty], [], []>;
def int_x86_lwpins32 :
- GCCBuiltin<"__builtin_ia32_lwpins32">,
+ ClangBuiltin<"__builtin_ia32_lwpins32">,
Intrinsic<[llvm_i8_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[ImmArg<ArgIndex<2>>]>;
def int_x86_lwpins64 :
- GCCBuiltin<"__builtin_ia32_lwpins64">,
+ ClangBuiltin<"__builtin_ia32_lwpins64">,
Intrinsic<[llvm_i8_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty],
[ImmArg<ArgIndex<2>>]>;
def int_x86_lwpval32 :
- GCCBuiltin<"__builtin_ia32_lwpval32">,
+ ClangBuiltin<"__builtin_ia32_lwpval32">,
Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[ImmArg<ArgIndex<2>>]>;
def int_x86_lwpval64 :
- GCCBuiltin<"__builtin_ia32_lwpval64">,
+ ClangBuiltin<"__builtin_ia32_lwpval64">,
Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty],
[ImmArg<ArgIndex<2>>]>;
}
@@ -2129,127 +2139,127 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Empty MMX state op.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_mmx_emms : GCCBuiltin<"__builtin_ia32_emms">,
+ def int_x86_mmx_emms : ClangBuiltin<"__builtin_ia32_emms">,
Intrinsic<[], [], []>;
- def int_x86_mmx_femms : GCCBuiltin<"__builtin_ia32_femms">,
+ def int_x86_mmx_femms : ClangBuiltin<"__builtin_ia32_femms">,
Intrinsic<[], [], []>;
}
// Integer arithmetic ops.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Addition
- def int_x86_mmx_padd_b : GCCBuiltin<"__builtin_ia32_paddb">,
+ def int_x86_mmx_padd_b : ClangBuiltin<"__builtin_ia32_paddb">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem, Commutative]>;
- def int_x86_mmx_padd_w : GCCBuiltin<"__builtin_ia32_paddw">,
+ def int_x86_mmx_padd_w : ClangBuiltin<"__builtin_ia32_paddw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem, Commutative]>;
- def int_x86_mmx_padd_d : GCCBuiltin<"__builtin_ia32_paddd">,
+ def int_x86_mmx_padd_d : ClangBuiltin<"__builtin_ia32_paddd">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem, Commutative]>;
- def int_x86_mmx_padd_q : GCCBuiltin<"__builtin_ia32_paddq">,
+ def int_x86_mmx_padd_q : ClangBuiltin<"__builtin_ia32_paddq">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem, Commutative]>;
- def int_x86_mmx_padds_b : GCCBuiltin<"__builtin_ia32_paddsb">,
+ def int_x86_mmx_padds_b : ClangBuiltin<"__builtin_ia32_paddsb">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
- def int_x86_mmx_padds_w : GCCBuiltin<"__builtin_ia32_paddsw">,
+ def int_x86_mmx_padds_w : ClangBuiltin<"__builtin_ia32_paddsw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
- def int_x86_mmx_paddus_b : GCCBuiltin<"__builtin_ia32_paddusb">,
+ def int_x86_mmx_paddus_b : ClangBuiltin<"__builtin_ia32_paddusb">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
- def int_x86_mmx_paddus_w : GCCBuiltin<"__builtin_ia32_paddusw">,
+ def int_x86_mmx_paddus_w : ClangBuiltin<"__builtin_ia32_paddusw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
// Subtraction
- def int_x86_mmx_psub_b : GCCBuiltin<"__builtin_ia32_psubb">,
+ def int_x86_mmx_psub_b : ClangBuiltin<"__builtin_ia32_psubb">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_mmx_psub_w : GCCBuiltin<"__builtin_ia32_psubw">,
+ def int_x86_mmx_psub_w : ClangBuiltin<"__builtin_ia32_psubw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_mmx_psub_d : GCCBuiltin<"__builtin_ia32_psubd">,
+ def int_x86_mmx_psub_d : ClangBuiltin<"__builtin_ia32_psubd">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_mmx_psub_q : GCCBuiltin<"__builtin_ia32_psubq">,
+ def int_x86_mmx_psub_q : ClangBuiltin<"__builtin_ia32_psubq">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_mmx_psubs_b : GCCBuiltin<"__builtin_ia32_psubsb">,
+ def int_x86_mmx_psubs_b : ClangBuiltin<"__builtin_ia32_psubsb">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
- def int_x86_mmx_psubs_w : GCCBuiltin<"__builtin_ia32_psubsw">,
+ def int_x86_mmx_psubs_w : ClangBuiltin<"__builtin_ia32_psubsw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
- def int_x86_mmx_psubus_b : GCCBuiltin<"__builtin_ia32_psubusb">,
+ def int_x86_mmx_psubus_b : ClangBuiltin<"__builtin_ia32_psubusb">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
- def int_x86_mmx_psubus_w : GCCBuiltin<"__builtin_ia32_psubusw">,
+ def int_x86_mmx_psubus_w : ClangBuiltin<"__builtin_ia32_psubusw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
// Multiplication
- def int_x86_mmx_pmulh_w : GCCBuiltin<"__builtin_ia32_pmulhw">,
+ def int_x86_mmx_pmulh_w : ClangBuiltin<"__builtin_ia32_pmulhw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
- def int_x86_mmx_pmull_w : GCCBuiltin<"__builtin_ia32_pmullw">,
+ def int_x86_mmx_pmull_w : ClangBuiltin<"__builtin_ia32_pmullw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
- def int_x86_mmx_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw">,
+ def int_x86_mmx_pmulhu_w : ClangBuiltin<"__builtin_ia32_pmulhuw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
- def int_x86_mmx_pmulu_dq : GCCBuiltin<"__builtin_ia32_pmuludq">,
+ def int_x86_mmx_pmulu_dq : ClangBuiltin<"__builtin_ia32_pmuludq">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
- def int_x86_mmx_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd">,
+ def int_x86_mmx_pmadd_wd : ClangBuiltin<"__builtin_ia32_pmaddwd">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
// Bitwise operations
- def int_x86_mmx_pand : GCCBuiltin<"__builtin_ia32_pand">,
+ def int_x86_mmx_pand : ClangBuiltin<"__builtin_ia32_pand">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem, Commutative]>;
- def int_x86_mmx_pandn : GCCBuiltin<"__builtin_ia32_pandn">,
+ def int_x86_mmx_pandn : ClangBuiltin<"__builtin_ia32_pandn">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_mmx_por : GCCBuiltin<"__builtin_ia32_por">,
+ def int_x86_mmx_por : ClangBuiltin<"__builtin_ia32_por">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem, Commutative]>;
- def int_x86_mmx_pxor : GCCBuiltin<"__builtin_ia32_pxor">,
+ def int_x86_mmx_pxor : ClangBuiltin<"__builtin_ia32_pxor">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem, Commutative]>;
// Averages
- def int_x86_mmx_pavg_b : GCCBuiltin<"__builtin_ia32_pavgb">,
+ def int_x86_mmx_pavg_b : ClangBuiltin<"__builtin_ia32_pavgb">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
- def int_x86_mmx_pavg_w : GCCBuiltin<"__builtin_ia32_pavgw">,
+ def int_x86_mmx_pavg_w : ClangBuiltin<"__builtin_ia32_pavgw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
// Maximum
- def int_x86_mmx_pmaxu_b : GCCBuiltin<"__builtin_ia32_pmaxub">,
+ def int_x86_mmx_pmaxu_b : ClangBuiltin<"__builtin_ia32_pmaxub">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
- def int_x86_mmx_pmaxs_w : GCCBuiltin<"__builtin_ia32_pmaxsw">,
+ def int_x86_mmx_pmaxs_w : ClangBuiltin<"__builtin_ia32_pmaxsw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
// Minimum
- def int_x86_mmx_pminu_b : GCCBuiltin<"__builtin_ia32_pminub">,
+ def int_x86_mmx_pminu_b : ClangBuiltin<"__builtin_ia32_pminub">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
- def int_x86_mmx_pmins_w : GCCBuiltin<"__builtin_ia32_pminsw">,
+ def int_x86_mmx_pmins_w : ClangBuiltin<"__builtin_ia32_pminsw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
// Packed sum of absolute differences
- def int_x86_mmx_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw">,
+ def int_x86_mmx_psad_bw : ClangBuiltin<"__builtin_ia32_psadbw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
}
@@ -2257,178 +2267,178 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Integer shift ops.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Shift left logical
- def int_x86_mmx_psll_w : GCCBuiltin<"__builtin_ia32_psllw">,
+ def int_x86_mmx_psll_w : ClangBuiltin<"__builtin_ia32_psllw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
- def int_x86_mmx_psll_d : GCCBuiltin<"__builtin_ia32_pslld">,
+ def int_x86_mmx_psll_d : ClangBuiltin<"__builtin_ia32_pslld">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
- def int_x86_mmx_psll_q : GCCBuiltin<"__builtin_ia32_psllq">,
+ def int_x86_mmx_psll_q : ClangBuiltin<"__builtin_ia32_psllq">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
- def int_x86_mmx_psrl_w : GCCBuiltin<"__builtin_ia32_psrlw">,
+ def int_x86_mmx_psrl_w : ClangBuiltin<"__builtin_ia32_psrlw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
- def int_x86_mmx_psrl_d : GCCBuiltin<"__builtin_ia32_psrld">,
+ def int_x86_mmx_psrl_d : ClangBuiltin<"__builtin_ia32_psrld">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
- def int_x86_mmx_psrl_q : GCCBuiltin<"__builtin_ia32_psrlq">,
+ def int_x86_mmx_psrl_q : ClangBuiltin<"__builtin_ia32_psrlq">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
- def int_x86_mmx_psra_w : GCCBuiltin<"__builtin_ia32_psraw">,
+ def int_x86_mmx_psra_w : ClangBuiltin<"__builtin_ia32_psraw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
- def int_x86_mmx_psra_d : GCCBuiltin<"__builtin_ia32_psrad">,
+ def int_x86_mmx_psra_d : ClangBuiltin<"__builtin_ia32_psrad">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
// Oddly these don't require an immediate due to a gcc compatibility issue.
- def int_x86_mmx_pslli_w : GCCBuiltin<"__builtin_ia32_psllwi">,
+ def int_x86_mmx_pslli_w : ClangBuiltin<"__builtin_ia32_psllwi">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_i32_ty], [IntrNoMem]>;
- def int_x86_mmx_pslli_d : GCCBuiltin<"__builtin_ia32_pslldi">,
+ def int_x86_mmx_pslli_d : ClangBuiltin<"__builtin_ia32_pslldi">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_i32_ty], [IntrNoMem]>;
- def int_x86_mmx_pslli_q : GCCBuiltin<"__builtin_ia32_psllqi">,
+ def int_x86_mmx_pslli_q : ClangBuiltin<"__builtin_ia32_psllqi">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_i32_ty], [IntrNoMem]>;
- def int_x86_mmx_psrli_w : GCCBuiltin<"__builtin_ia32_psrlwi">,
+ def int_x86_mmx_psrli_w : ClangBuiltin<"__builtin_ia32_psrlwi">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_i32_ty], [IntrNoMem]>;
- def int_x86_mmx_psrli_d : GCCBuiltin<"__builtin_ia32_psrldi">,
+ def int_x86_mmx_psrli_d : ClangBuiltin<"__builtin_ia32_psrldi">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_i32_ty], [IntrNoMem]>;
- def int_x86_mmx_psrli_q : GCCBuiltin<"__builtin_ia32_psrlqi">,
+ def int_x86_mmx_psrli_q : ClangBuiltin<"__builtin_ia32_psrlqi">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_i32_ty], [IntrNoMem]>;
- def int_x86_mmx_psrai_w : GCCBuiltin<"__builtin_ia32_psrawi">,
+ def int_x86_mmx_psrai_w : ClangBuiltin<"__builtin_ia32_psrawi">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_i32_ty], [IntrNoMem]>;
- def int_x86_mmx_psrai_d : GCCBuiltin<"__builtin_ia32_psradi">,
+ def int_x86_mmx_psrai_d : ClangBuiltin<"__builtin_ia32_psradi">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_i32_ty], [IntrNoMem]>;
}
// Permute
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_avx512_permvar_df_256 : GCCBuiltin<"__builtin_ia32_permvardf256">,
+ def int_x86_avx512_permvar_df_256 : ClangBuiltin<"__builtin_ia32_permvardf256">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
llvm_v4i64_ty], [IntrNoMem]>;
- def int_x86_avx512_permvar_df_512 : GCCBuiltin<"__builtin_ia32_permvardf512">,
+ def int_x86_avx512_permvar_df_512 : ClangBuiltin<"__builtin_ia32_permvardf512">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty,
llvm_v8i64_ty], [IntrNoMem]>;
- def int_x86_avx512_permvar_di_256 : GCCBuiltin<"__builtin_ia32_permvardi256">,
+ def int_x86_avx512_permvar_di_256 : ClangBuiltin<"__builtin_ia32_permvardi256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
llvm_v4i64_ty], [IntrNoMem]>;
- def int_x86_avx512_permvar_di_512 : GCCBuiltin<"__builtin_ia32_permvardi512">,
+ def int_x86_avx512_permvar_di_512 : ClangBuiltin<"__builtin_ia32_permvardi512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
llvm_v8i64_ty], [IntrNoMem]>;
- def int_x86_avx512_permvar_hi_128 : GCCBuiltin<"__builtin_ia32_permvarhi128">,
+ def int_x86_avx512_permvar_hi_128 : ClangBuiltin<"__builtin_ia32_permvarhi128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem]>;
- def int_x86_avx512_permvar_hi_256 : GCCBuiltin<"__builtin_ia32_permvarhi256">,
+ def int_x86_avx512_permvar_hi_256 : ClangBuiltin<"__builtin_ia32_permvarhi256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem]>;
- def int_x86_avx512_permvar_hi_512 : GCCBuiltin<"__builtin_ia32_permvarhi512">,
+ def int_x86_avx512_permvar_hi_512 : ClangBuiltin<"__builtin_ia32_permvarhi512">,
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
llvm_v32i16_ty], [IntrNoMem]>;
- def int_x86_avx512_permvar_qi_128 : GCCBuiltin<"__builtin_ia32_permvarqi128">,
+ def int_x86_avx512_permvar_qi_128 : ClangBuiltin<"__builtin_ia32_permvarqi128">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
llvm_v16i8_ty], [IntrNoMem]>;
- def int_x86_avx512_permvar_qi_256 : GCCBuiltin<"__builtin_ia32_permvarqi256">,
+ def int_x86_avx512_permvar_qi_256 : ClangBuiltin<"__builtin_ia32_permvarqi256">,
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
llvm_v32i8_ty], [IntrNoMem]>;
- def int_x86_avx512_permvar_qi_512 : GCCBuiltin<"__builtin_ia32_permvarqi512">,
+ def int_x86_avx512_permvar_qi_512 : ClangBuiltin<"__builtin_ia32_permvarqi512">,
Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty,
llvm_v64i8_ty], [IntrNoMem]>;
- def int_x86_avx512_permvar_sf_512 : GCCBuiltin<"__builtin_ia32_permvarsf512">,
+ def int_x86_avx512_permvar_sf_512 : ClangBuiltin<"__builtin_ia32_permvarsf512">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty,
llvm_v16i32_ty], [IntrNoMem]>;
- def int_x86_avx512_permvar_si_512 : GCCBuiltin<"__builtin_ia32_permvarsi512">,
+ def int_x86_avx512_permvar_si_512 : ClangBuiltin<"__builtin_ia32_permvarsi512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
llvm_v16i32_ty], [IntrNoMem]>;
}
// Pack ops.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_mmx_packsswb : GCCBuiltin<"__builtin_ia32_packsswb">,
+ def int_x86_mmx_packsswb : ClangBuiltin<"__builtin_ia32_packsswb">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
- def int_x86_mmx_packssdw : GCCBuiltin<"__builtin_ia32_packssdw">,
+ def int_x86_mmx_packssdw : ClangBuiltin<"__builtin_ia32_packssdw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
- def int_x86_mmx_packuswb : GCCBuiltin<"__builtin_ia32_packuswb">,
+ def int_x86_mmx_packuswb : ClangBuiltin<"__builtin_ia32_packuswb">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
}
// Unpacking ops.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_mmx_punpckhbw : GCCBuiltin<"__builtin_ia32_punpckhbw">,
+ def int_x86_mmx_punpckhbw : ClangBuiltin<"__builtin_ia32_punpckhbw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_mmx_punpckhwd : GCCBuiltin<"__builtin_ia32_punpckhwd">,
+ def int_x86_mmx_punpckhwd : ClangBuiltin<"__builtin_ia32_punpckhwd">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_mmx_punpckhdq : GCCBuiltin<"__builtin_ia32_punpckhdq">,
+ def int_x86_mmx_punpckhdq : ClangBuiltin<"__builtin_ia32_punpckhdq">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_mmx_punpcklbw : GCCBuiltin<"__builtin_ia32_punpcklbw">,
+ def int_x86_mmx_punpcklbw : ClangBuiltin<"__builtin_ia32_punpcklbw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_mmx_punpcklwd : GCCBuiltin<"__builtin_ia32_punpcklwd">,
+ def int_x86_mmx_punpcklwd : ClangBuiltin<"__builtin_ia32_punpcklwd">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
- def int_x86_mmx_punpckldq : GCCBuiltin<"__builtin_ia32_punpckldq">,
+ def int_x86_mmx_punpckldq : ClangBuiltin<"__builtin_ia32_punpckldq">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
}
// Integer comparison ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_mmx_pcmpeq_b : GCCBuiltin<"__builtin_ia32_pcmpeqb">,
+ def int_x86_mmx_pcmpeq_b : ClangBuiltin<"__builtin_ia32_pcmpeqb">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
- def int_x86_mmx_pcmpeq_w : GCCBuiltin<"__builtin_ia32_pcmpeqw">,
+ def int_x86_mmx_pcmpeq_w : ClangBuiltin<"__builtin_ia32_pcmpeqw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
- def int_x86_mmx_pcmpeq_d : GCCBuiltin<"__builtin_ia32_pcmpeqd">,
+ def int_x86_mmx_pcmpeq_d : ClangBuiltin<"__builtin_ia32_pcmpeqd">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
- def int_x86_mmx_pcmpgt_b : GCCBuiltin<"__builtin_ia32_pcmpgtb">,
+ def int_x86_mmx_pcmpgt_b : ClangBuiltin<"__builtin_ia32_pcmpgtb">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
- def int_x86_mmx_pcmpgt_w : GCCBuiltin<"__builtin_ia32_pcmpgtw">,
+ def int_x86_mmx_pcmpgt_w : ClangBuiltin<"__builtin_ia32_pcmpgtw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
- def int_x86_mmx_pcmpgt_d : GCCBuiltin<"__builtin_ia32_pcmpgtd">,
+ def int_x86_mmx_pcmpgt_d : ClangBuiltin<"__builtin_ia32_pcmpgtd">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
}
// Misc.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_mmx_maskmovq : GCCBuiltin<"__builtin_ia32_maskmovq">,
+ def int_x86_mmx_maskmovq : ClangBuiltin<"__builtin_ia32_maskmovq">,
Intrinsic<[], [llvm_x86mmx_ty, llvm_x86mmx_ty, llvm_ptr_ty], []>;
- def int_x86_mmx_pmovmskb : GCCBuiltin<"__builtin_ia32_pmovmskb">,
+ def int_x86_mmx_pmovmskb : ClangBuiltin<"__builtin_ia32_pmovmskb">,
Intrinsic<[llvm_i32_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
- def int_x86_mmx_movnt_dq : GCCBuiltin<"__builtin_ia32_movntq">,
+ def int_x86_mmx_movnt_dq : ClangBuiltin<"__builtin_ia32_movntq">,
Intrinsic<[], [llvm_ptrx86mmx_ty, llvm_x86mmx_ty], []>;
- def int_x86_mmx_palignr_b : GCCBuiltin<"__builtin_ia32_palignr">,
+ def int_x86_mmx_palignr_b : ClangBuiltin<"__builtin_ia32_palignr">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty, llvm_i8_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>;
- def int_x86_mmx_pextr_w : GCCBuiltin<"__builtin_ia32_vec_ext_v4hi">,
+ def int_x86_mmx_pextr_w : ClangBuiltin<"__builtin_ia32_vec_ext_v4hi">,
Intrinsic<[llvm_i32_ty], [llvm_x86mmx_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_mmx_pinsr_w : GCCBuiltin<"__builtin_ia32_vec_set_v4hi">,
+ def int_x86_mmx_pinsr_w : ClangBuiltin<"__builtin_ia32_vec_set_v4hi">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>;
}
@@ -2437,21 +2447,21 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// BMI
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_bmi_bextr_32 : GCCBuiltin<"__builtin_ia32_bextr_u32">,
+ def int_x86_bmi_bextr_32 : ClangBuiltin<"__builtin_ia32_bextr_u32">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_bmi_bextr_64 : GCCBuiltin<"__builtin_ia32_bextr_u64">,
+ def int_x86_bmi_bextr_64 : ClangBuiltin<"__builtin_ia32_bextr_u64">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
- def int_x86_bmi_bzhi_32 : GCCBuiltin<"__builtin_ia32_bzhi_si">,
+ def int_x86_bmi_bzhi_32 : ClangBuiltin<"__builtin_ia32_bzhi_si">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_bmi_bzhi_64 : GCCBuiltin<"__builtin_ia32_bzhi_di">,
+ def int_x86_bmi_bzhi_64 : ClangBuiltin<"__builtin_ia32_bzhi_di">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
- def int_x86_bmi_pdep_32 : GCCBuiltin<"__builtin_ia32_pdep_si">,
+ def int_x86_bmi_pdep_32 : ClangBuiltin<"__builtin_ia32_pdep_si">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_bmi_pdep_64 : GCCBuiltin<"__builtin_ia32_pdep_di">,
+ def int_x86_bmi_pdep_64 : ClangBuiltin<"__builtin_ia32_pdep_di">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
- def int_x86_bmi_pext_32 : GCCBuiltin<"__builtin_ia32_pext_si">,
+ def int_x86_bmi_pext_32 : ClangBuiltin<"__builtin_ia32_pext_si">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_bmi_pext_64 : GCCBuiltin<"__builtin_ia32_pext_di">,
+ def int_x86_bmi_pext_64 : ClangBuiltin<"__builtin_ia32_pext_di">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
}
@@ -2459,34 +2469,34 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// FS/GS Base
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_rdfsbase_32 : GCCBuiltin<"__builtin_ia32_rdfsbase32">,
+ def int_x86_rdfsbase_32 : ClangBuiltin<"__builtin_ia32_rdfsbase32">,
Intrinsic<[llvm_i32_ty], []>;
- def int_x86_rdgsbase_32 : GCCBuiltin<"__builtin_ia32_rdgsbase32">,
+ def int_x86_rdgsbase_32 : ClangBuiltin<"__builtin_ia32_rdgsbase32">,
Intrinsic<[llvm_i32_ty], []>;
- def int_x86_rdfsbase_64 : GCCBuiltin<"__builtin_ia32_rdfsbase64">,
+ def int_x86_rdfsbase_64 : ClangBuiltin<"__builtin_ia32_rdfsbase64">,
Intrinsic<[llvm_i64_ty], []>;
- def int_x86_rdgsbase_64 : GCCBuiltin<"__builtin_ia32_rdgsbase64">,
+ def int_x86_rdgsbase_64 : ClangBuiltin<"__builtin_ia32_rdgsbase64">,
Intrinsic<[llvm_i64_ty], []>;
- def int_x86_wrfsbase_32 : GCCBuiltin<"__builtin_ia32_wrfsbase32">,
+ def int_x86_wrfsbase_32 : ClangBuiltin<"__builtin_ia32_wrfsbase32">,
Intrinsic<[], [llvm_i32_ty]>;
- def int_x86_wrgsbase_32 : GCCBuiltin<"__builtin_ia32_wrgsbase32">,
+ def int_x86_wrgsbase_32 : ClangBuiltin<"__builtin_ia32_wrgsbase32">,
Intrinsic<[], [llvm_i32_ty]>;
- def int_x86_wrfsbase_64 : GCCBuiltin<"__builtin_ia32_wrfsbase64">,
+ def int_x86_wrfsbase_64 : ClangBuiltin<"__builtin_ia32_wrfsbase64">,
Intrinsic<[], [llvm_i64_ty]>;
- def int_x86_wrgsbase_64 : GCCBuiltin<"__builtin_ia32_wrgsbase64">,
+ def int_x86_wrgsbase_64 : ClangBuiltin<"__builtin_ia32_wrgsbase64">,
Intrinsic<[], [llvm_i64_ty]>;
}
//===----------------------------------------------------------------------===//
// FXSR
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_fxrstor : GCCBuiltin<"__builtin_ia32_fxrstor">,
+ def int_x86_fxrstor : ClangBuiltin<"__builtin_ia32_fxrstor">,
Intrinsic<[], [llvm_ptr_ty], []>;
- def int_x86_fxrstor64 : GCCBuiltin<"__builtin_ia32_fxrstor64">,
+ def int_x86_fxrstor64 : ClangBuiltin<"__builtin_ia32_fxrstor64">,
Intrinsic<[], [llvm_ptr_ty], []>;
- def int_x86_fxsave : GCCBuiltin<"__builtin_ia32_fxsave">,
+ def int_x86_fxsave : ClangBuiltin<"__builtin_ia32_fxsave">,
Intrinsic<[], [llvm_ptr_ty], []>;
- def int_x86_fxsave64 : GCCBuiltin<"__builtin_ia32_fxsave64">,
+ def int_x86_fxsave64 : ClangBuiltin<"__builtin_ia32_fxsave64">,
Intrinsic<[], [llvm_ptr_ty], []>;
}
@@ -2526,44 +2536,44 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
//===----------------------------------------------------------------------===//
// CLFLUSHOPT and CLWB
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_clflushopt : GCCBuiltin<"__builtin_ia32_clflushopt">,
+ def int_x86_clflushopt : ClangBuiltin<"__builtin_ia32_clflushopt">,
Intrinsic<[], [llvm_ptr_ty], []>;
- def int_x86_clwb : GCCBuiltin<"__builtin_ia32_clwb">,
+ def int_x86_clwb : ClangBuiltin<"__builtin_ia32_clwb">,
Intrinsic<[], [llvm_ptr_ty], []>;
}
//===----------------------------------------------------------------------===//
// Support protection key
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_rdpkru : GCCBuiltin <"__builtin_ia32_rdpkru">,
+ def int_x86_rdpkru : ClangBuiltin <"__builtin_ia32_rdpkru">,
Intrinsic<[llvm_i32_ty], [], []>;
- def int_x86_wrpkru : GCCBuiltin<"__builtin_ia32_wrpkru">,
+ def int_x86_wrpkru : ClangBuiltin<"__builtin_ia32_wrpkru">,
Intrinsic<[], [llvm_i32_ty], []>;
}
//===----------------------------------------------------------------------===//
// Half float conversion
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_vcvtps2ph_128 : GCCBuiltin<"__builtin_ia32_vcvtps2ph">,
+ def int_x86_vcvtps2ph_128 : ClangBuiltin<"__builtin_ia32_vcvtps2ph">,
Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_vcvtps2ph_256 : GCCBuiltin<"__builtin_ia32_vcvtps2ph256">,
+ def int_x86_vcvtps2ph_256 : ClangBuiltin<"__builtin_ia32_vcvtps2ph256">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8f32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
def int_x86_avx512_mask_vcvtph2ps_512 :
Intrinsic<[llvm_v16f32_ty], [llvm_v16i16_ty, llvm_v16f32_ty,
llvm_i16_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
- def int_x86_avx512_mask_vcvtps2ph_512 : GCCBuiltin<"__builtin_ia32_vcvtps2ph512_mask">,
+ def int_x86_avx512_mask_vcvtps2ph_512 : ClangBuiltin<"__builtin_ia32_vcvtps2ph512_mask">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16f32_ty, llvm_i32_ty,
llvm_v16i16_ty, llvm_i16_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_avx512_mask_vcvtps2ph_256 : GCCBuiltin<"__builtin_ia32_vcvtps2ph256_mask">,
+ def int_x86_avx512_mask_vcvtps2ph_256 : ClangBuiltin<"__builtin_ia32_vcvtps2ph256_mask">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8f32_ty, llvm_i32_ty,
llvm_v8i16_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_avx512_mask_vcvtps2ph_128 : GCCBuiltin<"__builtin_ia32_vcvtps2ph_mask">,
+ def int_x86_avx512_mask_vcvtps2ph_128 : ClangBuiltin<"__builtin_ia32_vcvtps2ph_mask">,
Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty, llvm_i32_ty,
llvm_v8i16_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
@@ -2573,10 +2583,10 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// TBM
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_tbm_bextri_u32 : GCCBuiltin<"__builtin_ia32_bextri_u32">,
+ def int_x86_tbm_bextri_u32 : ClangBuiltin<"__builtin_ia32_bextri_u32">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_tbm_bextri_u64 : GCCBuiltin<"__builtin_ia32_bextri_u64">,
+ def int_x86_tbm_bextri_u64 : ClangBuiltin<"__builtin_ia32_bextri_u64">,
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
}
@@ -2619,13 +2629,13 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// RTM intrinsics. Transactional Memory support.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_xbegin : GCCBuiltin<"__builtin_ia32_xbegin">,
+ def int_x86_xbegin : ClangBuiltin<"__builtin_ia32_xbegin">,
Intrinsic<[llvm_i32_ty], [], []>;
- def int_x86_xend : GCCBuiltin<"__builtin_ia32_xend">,
+ def int_x86_xend : ClangBuiltin<"__builtin_ia32_xend">,
Intrinsic<[], [], []>;
- def int_x86_xabort : GCCBuiltin<"__builtin_ia32_xabort">,
+ def int_x86_xabort : ClangBuiltin<"__builtin_ia32_xabort">,
Intrinsic<[], [llvm_i8_ty], [ImmArg<ArgIndex<0>>]>;
- def int_x86_xtest : GCCBuiltin<"__builtin_ia32_xtest">,
+ def int_x86_xtest : ClangBuiltin<"__builtin_ia32_xtest">,
Intrinsic<[llvm_i32_ty], [], []>;
}
@@ -2664,86 +2674,86 @@ let TargetPrefix = "x86" in {
// Conversion ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_avx512_cvttss2si : GCCBuiltin<"__builtin_ia32_vcvttss2si32">,
+ def int_x86_avx512_cvttss2si : ClangBuiltin<"__builtin_ia32_vcvttss2si32">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_avx512_cvttss2si64 : GCCBuiltin<"__builtin_ia32_vcvttss2si64">,
+ def int_x86_avx512_cvttss2si64 : ClangBuiltin<"__builtin_ia32_vcvttss2si64">,
Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_avx512_cvttss2usi : GCCBuiltin<"__builtin_ia32_vcvttss2usi32">,
+ def int_x86_avx512_cvttss2usi : ClangBuiltin<"__builtin_ia32_vcvttss2usi32">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_avx512_cvttss2usi64 : GCCBuiltin<"__builtin_ia32_vcvttss2usi64">,
+ def int_x86_avx512_cvttss2usi64 : ClangBuiltin<"__builtin_ia32_vcvttss2usi64">,
Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_avx512_cvtusi2ss : GCCBuiltin<"__builtin_ia32_cvtusi2ss32">,
+ def int_x86_avx512_cvtusi2ss : ClangBuiltin<"__builtin_ia32_cvtusi2ss32">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>;
- def int_x86_avx512_cvtusi642ss : GCCBuiltin<"__builtin_ia32_cvtusi2ss64">,
+ def int_x86_avx512_cvtusi642ss : ClangBuiltin<"__builtin_ia32_cvtusi2ss64">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>;
- def int_x86_avx512_cvttsd2si : GCCBuiltin<"__builtin_ia32_vcvttsd2si32">,
+ def int_x86_avx512_cvttsd2si : ClangBuiltin<"__builtin_ia32_vcvttsd2si32">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_avx512_cvttsd2si64 : GCCBuiltin<"__builtin_ia32_vcvttsd2si64">,
+ def int_x86_avx512_cvttsd2si64 : ClangBuiltin<"__builtin_ia32_vcvttsd2si64">,
Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_avx512_cvttsd2usi : GCCBuiltin<"__builtin_ia32_vcvttsd2usi32">,
+ def int_x86_avx512_cvttsd2usi : ClangBuiltin<"__builtin_ia32_vcvttsd2usi32">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_avx512_cvttsd2usi64 : GCCBuiltin<"__builtin_ia32_vcvttsd2usi64">,
+ def int_x86_avx512_cvttsd2usi64 : ClangBuiltin<"__builtin_ia32_vcvttsd2usi64">,
Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_avx512_cvtusi642sd : GCCBuiltin<"__builtin_ia32_cvtusi2sd64">,
+ def int_x86_avx512_cvtusi642sd : ClangBuiltin<"__builtin_ia32_cvtusi2sd64">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
llvm_i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>;
- def int_x86_avx512_vcvtss2usi32 : GCCBuiltin<"__builtin_ia32_vcvtss2usi32">,
+ def int_x86_avx512_vcvtss2usi32 : ClangBuiltin<"__builtin_ia32_vcvtss2usi32">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_avx512_vcvtss2usi64 : GCCBuiltin<"__builtin_ia32_vcvtss2usi64">,
+ def int_x86_avx512_vcvtss2usi64 : ClangBuiltin<"__builtin_ia32_vcvtss2usi64">,
Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_avx512_vcvtss2si32 : GCCBuiltin<"__builtin_ia32_vcvtss2si32">,
+ def int_x86_avx512_vcvtss2si32 : ClangBuiltin<"__builtin_ia32_vcvtss2si32">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_avx512_vcvtss2si64 : GCCBuiltin<"__builtin_ia32_vcvtss2si64">,
+ def int_x86_avx512_vcvtss2si64 : ClangBuiltin<"__builtin_ia32_vcvtss2si64">,
Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_avx512_vcvtsd2usi32 : GCCBuiltin<"__builtin_ia32_vcvtsd2usi32">,
+ def int_x86_avx512_vcvtsd2usi32 : ClangBuiltin<"__builtin_ia32_vcvtsd2usi32">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_avx512_vcvtsd2usi64 : GCCBuiltin<"__builtin_ia32_vcvtsd2usi64">,
+ def int_x86_avx512_vcvtsd2usi64 : ClangBuiltin<"__builtin_ia32_vcvtsd2usi64">,
Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_avx512_vcvtsd2si32 : GCCBuiltin<"__builtin_ia32_vcvtsd2si32">,
+ def int_x86_avx512_vcvtsd2si32 : ClangBuiltin<"__builtin_ia32_vcvtsd2si32">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_avx512_vcvtsd2si64 : GCCBuiltin<"__builtin_ia32_vcvtsd2si64">,
+ def int_x86_avx512_vcvtsd2si64 : ClangBuiltin<"__builtin_ia32_vcvtsd2si64">,
Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_avx512_cvtsi2ss32 : GCCBuiltin<"__builtin_ia32_cvtsi2ss32">,
+ def int_x86_avx512_cvtsi2ss32 : ClangBuiltin<"__builtin_ia32_cvtsi2ss32">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>;
- def int_x86_avx512_cvtsi2ss64 : GCCBuiltin<"__builtin_ia32_cvtsi2ss64">,
+ def int_x86_avx512_cvtsi2ss64 : ClangBuiltin<"__builtin_ia32_cvtsi2ss64">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
llvm_i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>;
- def int_x86_avx512_cvtsi2sd64 : GCCBuiltin<"__builtin_ia32_cvtsi2sd64">,
+ def int_x86_avx512_cvtsi2sd64 : ClangBuiltin<"__builtin_ia32_cvtsi2sd64">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
llvm_i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>;
}
// Pack ops.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_avx512_packsswb_512 : GCCBuiltin<"__builtin_ia32_packsswb512">,
+ def int_x86_avx512_packsswb_512 : ClangBuiltin<"__builtin_ia32_packsswb512">,
Intrinsic<[llvm_v64i8_ty], [llvm_v32i16_ty,llvm_v32i16_ty],
[IntrNoMem]>;
- def int_x86_avx512_packssdw_512 : GCCBuiltin<"__builtin_ia32_packssdw512">,
+ def int_x86_avx512_packssdw_512 : ClangBuiltin<"__builtin_ia32_packssdw512">,
Intrinsic<[llvm_v32i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty],
[IntrNoMem]>;
- def int_x86_avx512_packuswb_512 : GCCBuiltin<"__builtin_ia32_packuswb512">,
+ def int_x86_avx512_packuswb_512 : ClangBuiltin<"__builtin_ia32_packuswb512">,
Intrinsic<[llvm_v64i8_ty], [llvm_v32i16_ty,llvm_v32i16_ty],
[IntrNoMem]>;
- def int_x86_avx512_packusdw_512 : GCCBuiltin<"__builtin_ia32_packusdw512">,
+ def int_x86_avx512_packusdw_512 : ClangBuiltin<"__builtin_ia32_packusdw512">,
Intrinsic<[llvm_v32i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty],
[IntrNoMem]>;
}
@@ -2759,380 +2769,380 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
def int_x86_avx512_mask_cvtpd2dq_128 :
- GCCBuiltin<"__builtin_ia32_cvtpd2dq128_mask">,
+ ClangBuiltin<"__builtin_ia32_cvtpd2dq128_mask">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v2f64_ty, llvm_v4i32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_cvtpd2dq_512 :
- GCCBuiltin<"__builtin_ia32_cvtpd2dq512_mask">,
+ ClangBuiltin<"__builtin_ia32_cvtpd2dq512_mask">,
Intrinsic<[llvm_v8i32_ty],
[llvm_v8f64_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
def int_x86_avx512_mask_cvtpd2ps_512 :
- GCCBuiltin<"__builtin_ia32_cvtpd2ps512_mask">,
+ ClangBuiltin<"__builtin_ia32_cvtpd2ps512_mask">,
Intrinsic<[llvm_v8f32_ty],
[llvm_v8f64_ty, llvm_v8f32_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
def int_x86_avx512_mask_cvtsd2ss_round :
- GCCBuiltin<"__builtin_ia32_cvtsd2ss_round_mask">,
+ ClangBuiltin<"__builtin_ia32_cvtsd2ss_round_mask">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v2f64_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<4>>]>;
def int_x86_avx512_mask_cvtss2sd_round :
- GCCBuiltin<"__builtin_ia32_cvtss2sd_round_mask">,
+ ClangBuiltin<"__builtin_ia32_cvtss2sd_round_mask">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v4f32_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<4>>]>;
def int_x86_avx512_mask_cvtpd2ps :
- GCCBuiltin<"__builtin_ia32_cvtpd2ps_mask">,
+ ClangBuiltin<"__builtin_ia32_cvtpd2ps_mask">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v2f64_ty, llvm_v4f32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_cvtpd2qq_128 :
- GCCBuiltin<"__builtin_ia32_cvtpd2qq128_mask">,
+ ClangBuiltin<"__builtin_ia32_cvtpd2qq128_mask">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v2f64_ty, llvm_v2i64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_cvtpd2qq_256 :
- GCCBuiltin<"__builtin_ia32_cvtpd2qq256_mask">,
+ ClangBuiltin<"__builtin_ia32_cvtpd2qq256_mask">,
Intrinsic<[llvm_v4i64_ty],
[llvm_v4f64_ty, llvm_v4i64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_cvtpd2qq_512 :
- GCCBuiltin<"__builtin_ia32_cvtpd2qq512_mask">,
+ ClangBuiltin<"__builtin_ia32_cvtpd2qq512_mask">,
Intrinsic<[llvm_v8i64_ty],
[llvm_v8f64_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
def int_x86_avx512_mask_cvtpd2udq_128 :
- GCCBuiltin<"__builtin_ia32_cvtpd2udq128_mask">,
+ ClangBuiltin<"__builtin_ia32_cvtpd2udq128_mask">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v2f64_ty, llvm_v4i32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_cvtpd2udq_256 :
- GCCBuiltin<"__builtin_ia32_cvtpd2udq256_mask">,
+ ClangBuiltin<"__builtin_ia32_cvtpd2udq256_mask">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4f64_ty, llvm_v4i32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_cvtpd2udq_512 :
- GCCBuiltin<"__builtin_ia32_cvtpd2udq512_mask">,
+ ClangBuiltin<"__builtin_ia32_cvtpd2udq512_mask">,
Intrinsic<[llvm_v8i32_ty],
[llvm_v8f64_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
def int_x86_avx512_mask_cvtpd2uqq_128 :
- GCCBuiltin<"__builtin_ia32_cvtpd2uqq128_mask">,
+ ClangBuiltin<"__builtin_ia32_cvtpd2uqq128_mask">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v2f64_ty, llvm_v2i64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_cvtpd2uqq_256 :
- GCCBuiltin<"__builtin_ia32_cvtpd2uqq256_mask">,
+ ClangBuiltin<"__builtin_ia32_cvtpd2uqq256_mask">,
Intrinsic<[llvm_v4i64_ty],
[llvm_v4f64_ty, llvm_v4i64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_cvtpd2uqq_512 :
- GCCBuiltin<"__builtin_ia32_cvtpd2uqq512_mask">,
+ ClangBuiltin<"__builtin_ia32_cvtpd2uqq512_mask">,
Intrinsic<[llvm_v8i64_ty],
[llvm_v8f64_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
def int_x86_avx512_mask_cvtps2dq_128 :
- GCCBuiltin<"__builtin_ia32_cvtps2dq128_mask">,
+ ClangBuiltin<"__builtin_ia32_cvtps2dq128_mask">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4f32_ty, llvm_v4i32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_cvtps2dq_256 :
- GCCBuiltin<"__builtin_ia32_cvtps2dq256_mask">,
+ ClangBuiltin<"__builtin_ia32_cvtps2dq256_mask">,
Intrinsic<[llvm_v8i32_ty],
[llvm_v8f32_ty, llvm_v8i32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_cvtps2dq_512 :
- GCCBuiltin<"__builtin_ia32_cvtps2dq512_mask">,
+ ClangBuiltin<"__builtin_ia32_cvtps2dq512_mask">,
Intrinsic<[llvm_v16i32_ty],
[llvm_v16f32_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
def int_x86_avx512_mask_cvtps2pd_512 :
- GCCBuiltin<"__builtin_ia32_cvtps2pd512_mask">,
+ ClangBuiltin<"__builtin_ia32_cvtps2pd512_mask">,
Intrinsic<[llvm_v8f64_ty],
[llvm_v8f32_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
def int_x86_avx512_mask_cvtps2qq_128 :
- GCCBuiltin<"__builtin_ia32_cvtps2qq128_mask">,
+ ClangBuiltin<"__builtin_ia32_cvtps2qq128_mask">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v4f32_ty, llvm_v2i64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_cvtps2qq_256 :
- GCCBuiltin<"__builtin_ia32_cvtps2qq256_mask">,
+ ClangBuiltin<"__builtin_ia32_cvtps2qq256_mask">,
Intrinsic<[llvm_v4i64_ty],
[llvm_v4f32_ty, llvm_v4i64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_cvtps2qq_512 :
- GCCBuiltin<"__builtin_ia32_cvtps2qq512_mask">,
+ ClangBuiltin<"__builtin_ia32_cvtps2qq512_mask">,
Intrinsic<[llvm_v8i64_ty],
[llvm_v8f32_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
def int_x86_avx512_mask_cvtps2udq_128 :
- GCCBuiltin<"__builtin_ia32_cvtps2udq128_mask">,
+ ClangBuiltin<"__builtin_ia32_cvtps2udq128_mask">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4f32_ty, llvm_v4i32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_cvtps2udq_256 :
- GCCBuiltin<"__builtin_ia32_cvtps2udq256_mask">,
+ ClangBuiltin<"__builtin_ia32_cvtps2udq256_mask">,
Intrinsic<[llvm_v8i32_ty],
[llvm_v8f32_ty, llvm_v8i32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_cvtps2udq_512 :
- GCCBuiltin<"__builtin_ia32_cvtps2udq512_mask">,
+ ClangBuiltin<"__builtin_ia32_cvtps2udq512_mask">,
Intrinsic<[llvm_v16i32_ty],
[llvm_v16f32_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
def int_x86_avx512_mask_cvtps2uqq_128 :
- GCCBuiltin<"__builtin_ia32_cvtps2uqq128_mask">,
+ ClangBuiltin<"__builtin_ia32_cvtps2uqq128_mask">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v4f32_ty, llvm_v2i64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_cvtps2uqq_256 :
- GCCBuiltin<"__builtin_ia32_cvtps2uqq256_mask">,
+ ClangBuiltin<"__builtin_ia32_cvtps2uqq256_mask">,
Intrinsic<[llvm_v4i64_ty],
[llvm_v4f32_ty, llvm_v4i64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_cvtps2uqq_512 :
- GCCBuiltin<"__builtin_ia32_cvtps2uqq512_mask">,
+ ClangBuiltin<"__builtin_ia32_cvtps2uqq512_mask">,
Intrinsic<[llvm_v8i64_ty],
[llvm_v8f32_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
def int_x86_avx512_mask_cvtqq2ps_128 :
- GCCBuiltin<"__builtin_ia32_cvtqq2ps128_mask">,
+ ClangBuiltin<"__builtin_ia32_cvtqq2ps128_mask">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v2i64_ty, llvm_v4f32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_cvttpd2dq_128 :
- GCCBuiltin<"__builtin_ia32_cvttpd2dq128_mask">,
+ ClangBuiltin<"__builtin_ia32_cvttpd2dq128_mask">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v2f64_ty, llvm_v4i32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_cvttpd2dq_512 :
- GCCBuiltin<"__builtin_ia32_cvttpd2dq512_mask">,
+ ClangBuiltin<"__builtin_ia32_cvttpd2dq512_mask">,
Intrinsic<[llvm_v8i32_ty],
[llvm_v8f64_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
def int_x86_avx512_mask_cvttpd2qq_128 :
- GCCBuiltin<"__builtin_ia32_cvttpd2qq128_mask">,
+ ClangBuiltin<"__builtin_ia32_cvttpd2qq128_mask">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v2f64_ty, llvm_v2i64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_cvttpd2qq_256 :
- GCCBuiltin<"__builtin_ia32_cvttpd2qq256_mask">,
+ ClangBuiltin<"__builtin_ia32_cvttpd2qq256_mask">,
Intrinsic<[llvm_v4i64_ty],
[llvm_v4f64_ty, llvm_v4i64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_cvttpd2qq_512 :
- GCCBuiltin<"__builtin_ia32_cvttpd2qq512_mask">,
+ ClangBuiltin<"__builtin_ia32_cvttpd2qq512_mask">,
Intrinsic<[llvm_v8i64_ty],
[llvm_v8f64_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
def int_x86_avx512_mask_cvttpd2udq_128 :
- GCCBuiltin<"__builtin_ia32_cvttpd2udq128_mask">,
+ ClangBuiltin<"__builtin_ia32_cvttpd2udq128_mask">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v2f64_ty, llvm_v4i32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_cvttpd2udq_256 :
- GCCBuiltin<"__builtin_ia32_cvttpd2udq256_mask">,
+ ClangBuiltin<"__builtin_ia32_cvttpd2udq256_mask">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4f64_ty, llvm_v4i32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_cvttpd2udq_512 :
- GCCBuiltin<"__builtin_ia32_cvttpd2udq512_mask">,
+ ClangBuiltin<"__builtin_ia32_cvttpd2udq512_mask">,
Intrinsic<[llvm_v8i32_ty],
[llvm_v8f64_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
def int_x86_avx512_mask_cvttpd2uqq_128 :
- GCCBuiltin<"__builtin_ia32_cvttpd2uqq128_mask">,
+ ClangBuiltin<"__builtin_ia32_cvttpd2uqq128_mask">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v2f64_ty, llvm_v2i64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_cvttpd2uqq_256 :
- GCCBuiltin<"__builtin_ia32_cvttpd2uqq256_mask">,
+ ClangBuiltin<"__builtin_ia32_cvttpd2uqq256_mask">,
Intrinsic<[llvm_v4i64_ty],
[llvm_v4f64_ty, llvm_v4i64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_cvttpd2uqq_512 :
- GCCBuiltin<"__builtin_ia32_cvttpd2uqq512_mask">,
+ ClangBuiltin<"__builtin_ia32_cvttpd2uqq512_mask">,
Intrinsic<[llvm_v8i64_ty],
[llvm_v8f64_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
def int_x86_avx512_mask_cvttps2dq_512 :
- GCCBuiltin<"__builtin_ia32_cvttps2dq512_mask">,
+ ClangBuiltin<"__builtin_ia32_cvttps2dq512_mask">,
Intrinsic<[llvm_v16i32_ty],
[llvm_v16f32_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
def int_x86_avx512_mask_cvttps2qq_128 :
- GCCBuiltin<"__builtin_ia32_cvttps2qq128_mask">,
+ ClangBuiltin<"__builtin_ia32_cvttps2qq128_mask">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v4f32_ty, llvm_v2i64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_cvttps2qq_256 :
- GCCBuiltin<"__builtin_ia32_cvttps2qq256_mask">,
+ ClangBuiltin<"__builtin_ia32_cvttps2qq256_mask">,
Intrinsic<[llvm_v4i64_ty],
[llvm_v4f32_ty, llvm_v4i64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_cvttps2qq_512 :
- GCCBuiltin<"__builtin_ia32_cvttps2qq512_mask">,
+ ClangBuiltin<"__builtin_ia32_cvttps2qq512_mask">,
Intrinsic<[llvm_v8i64_ty],
[llvm_v8f32_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
def int_x86_avx512_mask_cvttps2udq_128 :
- GCCBuiltin<"__builtin_ia32_cvttps2udq128_mask">,
+ ClangBuiltin<"__builtin_ia32_cvttps2udq128_mask">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4f32_ty, llvm_v4i32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_cvttps2udq_256 :
- GCCBuiltin<"__builtin_ia32_cvttps2udq256_mask">,
+ ClangBuiltin<"__builtin_ia32_cvttps2udq256_mask">,
Intrinsic<[llvm_v8i32_ty],
[llvm_v8f32_ty, llvm_v8i32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_cvttps2udq_512 :
- GCCBuiltin<"__builtin_ia32_cvttps2udq512_mask">,
+ ClangBuiltin<"__builtin_ia32_cvttps2udq512_mask">,
Intrinsic<[llvm_v16i32_ty],
[llvm_v16f32_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
def int_x86_avx512_mask_cvttps2uqq_128 :
- GCCBuiltin<"__builtin_ia32_cvttps2uqq128_mask">,
+ ClangBuiltin<"__builtin_ia32_cvttps2uqq128_mask">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v4f32_ty, llvm_v2i64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_cvttps2uqq_256 :
- GCCBuiltin<"__builtin_ia32_cvttps2uqq256_mask">,
+ ClangBuiltin<"__builtin_ia32_cvttps2uqq256_mask">,
Intrinsic<[llvm_v4i64_ty],
[llvm_v4f32_ty, llvm_v4i64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_cvttps2uqq_512 :
- GCCBuiltin<"__builtin_ia32_cvttps2uqq512_mask">,
+ ClangBuiltin<"__builtin_ia32_cvttps2uqq512_mask">,
Intrinsic<[llvm_v8i64_ty],
[llvm_v8f32_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
def int_x86_avx512_mask_cvtuqq2ps_128 :
- GCCBuiltin<"__builtin_ia32_cvtuqq2ps128_mask">,
+ ClangBuiltin<"__builtin_ia32_cvtuqq2ps128_mask">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v2i64_ty, llvm_v4f32_ty, llvm_i8_ty],
[IntrNoMem]>;
- def int_x86_avx512_mask_rndscale_pd_128 : GCCBuiltin<"__builtin_ia32_rndscalepd_128_mask">,
+ def int_x86_avx512_mask_rndscale_pd_128 : ClangBuiltin<"__builtin_ia32_rndscalepd_128_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i32_ty,
llvm_v2f64_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_avx512_mask_rndscale_pd_256 : GCCBuiltin<"__builtin_ia32_rndscalepd_256_mask">,
+ def int_x86_avx512_mask_rndscale_pd_256 : ClangBuiltin<"__builtin_ia32_rndscalepd_256_mask">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i32_ty,
llvm_v4f64_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_avx512_mask_rndscale_pd_512 : GCCBuiltin<"__builtin_ia32_rndscalepd_mask">,
+ def int_x86_avx512_mask_rndscale_pd_512 : ClangBuiltin<"__builtin_ia32_rndscalepd_mask">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty,
llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx512_mask_rndscale_ps_128 : GCCBuiltin<"__builtin_ia32_rndscaleps_128_mask">,
+ def int_x86_avx512_mask_rndscale_ps_128 : ClangBuiltin<"__builtin_ia32_rndscaleps_128_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty,
llvm_v4f32_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_avx512_mask_rndscale_ps_256 : GCCBuiltin<"__builtin_ia32_rndscaleps_256_mask">,
+ def int_x86_avx512_mask_rndscale_ps_256 : ClangBuiltin<"__builtin_ia32_rndscaleps_256_mask">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i32_ty,
llvm_v8f32_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_avx512_mask_rndscale_ps_512 : GCCBuiltin<"__builtin_ia32_rndscaleps_mask">,
+ def int_x86_avx512_mask_rndscale_ps_512 : ClangBuiltin<"__builtin_ia32_rndscaleps_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty,
llvm_i16_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx512_mask_reduce_pd_128 : GCCBuiltin<"__builtin_ia32_reducepd128_mask">,
+ def int_x86_avx512_mask_reduce_pd_128 : ClangBuiltin<"__builtin_ia32_reducepd128_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i32_ty,
llvm_v2f64_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_avx512_mask_reduce_pd_256 : GCCBuiltin<"__builtin_ia32_reducepd256_mask">,
+ def int_x86_avx512_mask_reduce_pd_256 : ClangBuiltin<"__builtin_ia32_reducepd256_mask">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i32_ty,
llvm_v4f64_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_avx512_mask_reduce_pd_512 : GCCBuiltin<"__builtin_ia32_reducepd512_mask">,
+ def int_x86_avx512_mask_reduce_pd_512 : ClangBuiltin<"__builtin_ia32_reducepd512_mask">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty,
llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx512_mask_reduce_ps_128 : GCCBuiltin<"__builtin_ia32_reduceps128_mask">,
+ def int_x86_avx512_mask_reduce_ps_128 : ClangBuiltin<"__builtin_ia32_reduceps128_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty,
llvm_v4f32_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_avx512_mask_reduce_ps_256 : GCCBuiltin<"__builtin_ia32_reduceps256_mask">,
+ def int_x86_avx512_mask_reduce_ps_256 : ClangBuiltin<"__builtin_ia32_reduceps256_mask">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i32_ty,
llvm_v8f32_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
- def int_x86_avx512_mask_reduce_ps_512 : GCCBuiltin<"__builtin_ia32_reduceps512_mask">,
+ def int_x86_avx512_mask_reduce_ps_512 : ClangBuiltin<"__builtin_ia32_reduceps512_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty,
llvm_i16_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>>]>;
-def int_x86_avx512_mask_range_pd_128 : GCCBuiltin<"__builtin_ia32_rangepd128_mask">,
+def int_x86_avx512_mask_range_pd_128 : ClangBuiltin<"__builtin_ia32_rangepd128_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty,
llvm_v2f64_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
-def int_x86_avx512_mask_range_pd_256 : GCCBuiltin<"__builtin_ia32_rangepd256_mask">,
+def int_x86_avx512_mask_range_pd_256 : ClangBuiltin<"__builtin_ia32_rangepd256_mask">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty,
llvm_v4f64_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
-def int_x86_avx512_mask_range_pd_512 : GCCBuiltin<"__builtin_ia32_rangepd512_mask">,
+def int_x86_avx512_mask_range_pd_512 : ClangBuiltin<"__builtin_ia32_rangepd512_mask">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty,
llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>>]>;
-def int_x86_avx512_mask_range_ps_128 : GCCBuiltin<"__builtin_ia32_rangeps128_mask">,
+def int_x86_avx512_mask_range_ps_128 : ClangBuiltin<"__builtin_ia32_rangeps128_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty,
llvm_v4f32_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
-def int_x86_avx512_mask_range_ps_256 : GCCBuiltin<"__builtin_ia32_rangeps256_mask">,
+def int_x86_avx512_mask_range_ps_256 : ClangBuiltin<"__builtin_ia32_rangeps256_mask">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i32_ty,
llvm_v8f32_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
-def int_x86_avx512_mask_range_ps_512 : GCCBuiltin<"__builtin_ia32_rangeps512_mask">,
+def int_x86_avx512_mask_range_ps_512 : ClangBuiltin<"__builtin_ia32_rangeps512_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty,
llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>>]>;
@@ -3141,152 +3151,152 @@ def int_x86_avx512_mask_range_ps_512 : GCCBuiltin<"__builtin_ia32_rangeps512_mas
// Vector load with broadcast
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_broadcastmw_512 :
- GCCBuiltin<"__builtin_ia32_broadcastmw512">,
+ ClangBuiltin<"__builtin_ia32_broadcastmw512">,
Intrinsic<[llvm_v16i32_ty], [llvm_i16_ty], [IntrNoMem]>;
def int_x86_avx512_broadcastmw_256 :
- GCCBuiltin<"__builtin_ia32_broadcastmw256">,
+ ClangBuiltin<"__builtin_ia32_broadcastmw256">,
Intrinsic<[llvm_v8i32_ty], [llvm_i16_ty], [IntrNoMem]>;
def int_x86_avx512_broadcastmw_128 :
- GCCBuiltin<"__builtin_ia32_broadcastmw128">,
+ ClangBuiltin<"__builtin_ia32_broadcastmw128">,
Intrinsic<[llvm_v4i32_ty], [llvm_i16_ty], [IntrNoMem]>;
def int_x86_avx512_broadcastmb_512 :
- GCCBuiltin<"__builtin_ia32_broadcastmb512">,
+ ClangBuiltin<"__builtin_ia32_broadcastmb512">,
Intrinsic<[llvm_v8i64_ty], [llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_broadcastmb_256 :
- GCCBuiltin<"__builtin_ia32_broadcastmb256">,
+ ClangBuiltin<"__builtin_ia32_broadcastmb256">,
Intrinsic<[llvm_v4i64_ty], [llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_broadcastmb_128 :
- GCCBuiltin<"__builtin_ia32_broadcastmb128">,
+ ClangBuiltin<"__builtin_ia32_broadcastmb128">,
Intrinsic<[llvm_v2i64_ty], [llvm_i8_ty], [IntrNoMem]>;
}
// Arithmetic ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_avx512_add_ps_512 : GCCBuiltin<"__builtin_ia32_addps512">,
+ def int_x86_avx512_add_ps_512 : ClangBuiltin<"__builtin_ia32_addps512">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>;
- def int_x86_avx512_add_pd_512 : GCCBuiltin<"__builtin_ia32_addpd512">,
+ def int_x86_avx512_add_pd_512 : ClangBuiltin<"__builtin_ia32_addpd512">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>;
- def int_x86_avx512_sub_ps_512 : GCCBuiltin<"__builtin_ia32_subps512">,
+ def int_x86_avx512_sub_ps_512 : ClangBuiltin<"__builtin_ia32_subps512">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>;
- def int_x86_avx512_sub_pd_512 : GCCBuiltin<"__builtin_ia32_subpd512">,
+ def int_x86_avx512_sub_pd_512 : ClangBuiltin<"__builtin_ia32_subpd512">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>;
- def int_x86_avx512_mul_ps_512 : GCCBuiltin<"__builtin_ia32_mulps512">,
+ def int_x86_avx512_mul_ps_512 : ClangBuiltin<"__builtin_ia32_mulps512">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>;
- def int_x86_avx512_mul_pd_512 : GCCBuiltin<"__builtin_ia32_mulpd512">,
+ def int_x86_avx512_mul_pd_512 : ClangBuiltin<"__builtin_ia32_mulpd512">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>;
- def int_x86_avx512_div_ps_512 : GCCBuiltin<"__builtin_ia32_divps512">,
+ def int_x86_avx512_div_ps_512 : ClangBuiltin<"__builtin_ia32_divps512">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>;
- def int_x86_avx512_div_pd_512 : GCCBuiltin<"__builtin_ia32_divpd512">,
+ def int_x86_avx512_div_pd_512 : ClangBuiltin<"__builtin_ia32_divpd512">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>;
- def int_x86_avx512_max_ps_512 : GCCBuiltin<"__builtin_ia32_maxps512">,
+ def int_x86_avx512_max_ps_512 : ClangBuiltin<"__builtin_ia32_maxps512">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>;
- def int_x86_avx512_max_pd_512 : GCCBuiltin<"__builtin_ia32_maxpd512">,
+ def int_x86_avx512_max_pd_512 : ClangBuiltin<"__builtin_ia32_maxpd512">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>;
- def int_x86_avx512_min_ps_512 : GCCBuiltin<"__builtin_ia32_minps512">,
+ def int_x86_avx512_min_ps_512 : ClangBuiltin<"__builtin_ia32_minps512">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>;
- def int_x86_avx512_min_pd_512 : GCCBuiltin<"__builtin_ia32_minpd512">,
+ def int_x86_avx512_min_pd_512 : ClangBuiltin<"__builtin_ia32_minpd512">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>;
- def int_x86_avx512_mask_add_ss_round : GCCBuiltin<"__builtin_ia32_addss_round_mask">,
+ def int_x86_avx512_mask_add_ss_round : ClangBuiltin<"__builtin_ia32_addss_round_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx512_mask_div_ss_round : GCCBuiltin<"__builtin_ia32_divss_round_mask">,
+ def int_x86_avx512_mask_div_ss_round : ClangBuiltin<"__builtin_ia32_divss_round_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx512_mask_mul_ss_round : GCCBuiltin<"__builtin_ia32_mulss_round_mask">,
+ def int_x86_avx512_mask_mul_ss_round : ClangBuiltin<"__builtin_ia32_mulss_round_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx512_mask_sub_ss_round : GCCBuiltin<"__builtin_ia32_subss_round_mask">,
+ def int_x86_avx512_mask_sub_ss_round : ClangBuiltin<"__builtin_ia32_subss_round_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx512_mask_max_ss_round : GCCBuiltin<"__builtin_ia32_maxss_round_mask">,
+ def int_x86_avx512_mask_max_ss_round : ClangBuiltin<"__builtin_ia32_maxss_round_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx512_mask_min_ss_round : GCCBuiltin<"__builtin_ia32_minss_round_mask">,
+ def int_x86_avx512_mask_min_ss_round : ClangBuiltin<"__builtin_ia32_minss_round_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx512_mask_add_sd_round : GCCBuiltin<"__builtin_ia32_addsd_round_mask">,
+ def int_x86_avx512_mask_add_sd_round : ClangBuiltin<"__builtin_ia32_addsd_round_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx512_mask_div_sd_round : GCCBuiltin<"__builtin_ia32_divsd_round_mask">,
+ def int_x86_avx512_mask_div_sd_round : ClangBuiltin<"__builtin_ia32_divsd_round_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx512_mask_mul_sd_round : GCCBuiltin<"__builtin_ia32_mulsd_round_mask">,
+ def int_x86_avx512_mask_mul_sd_round : ClangBuiltin<"__builtin_ia32_mulsd_round_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx512_mask_sub_sd_round : GCCBuiltin<"__builtin_ia32_subsd_round_mask">,
+ def int_x86_avx512_mask_sub_sd_round : ClangBuiltin<"__builtin_ia32_subsd_round_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx512_mask_max_sd_round : GCCBuiltin<"__builtin_ia32_maxsd_round_mask">,
+ def int_x86_avx512_mask_max_sd_round : ClangBuiltin<"__builtin_ia32_maxsd_round_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx512_mask_min_sd_round : GCCBuiltin<"__builtin_ia32_minsd_round_mask">,
+ def int_x86_avx512_mask_min_sd_round : ClangBuiltin<"__builtin_ia32_minsd_round_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx512_mask_rndscale_ss : GCCBuiltin<"__builtin_ia32_rndscaless_round_mask">,
+ def int_x86_avx512_mask_rndscale_ss : ClangBuiltin<"__builtin_ia32_rndscaless_round_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
- def int_x86_avx512_mask_rndscale_sd : GCCBuiltin<"__builtin_ia32_rndscalesd_round_mask">,
+ def int_x86_avx512_mask_rndscale_sd : ClangBuiltin<"__builtin_ia32_rndscalesd_round_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
- def int_x86_avx512_mask_range_ss : GCCBuiltin<"__builtin_ia32_rangess128_round_mask">,
+ def int_x86_avx512_mask_range_ss : ClangBuiltin<"__builtin_ia32_rangess128_round_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
- def int_x86_avx512_mask_range_sd : GCCBuiltin<"__builtin_ia32_rangesd128_round_mask">,
+ def int_x86_avx512_mask_range_sd : ClangBuiltin<"__builtin_ia32_rangesd128_round_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
- def int_x86_avx512_mask_reduce_ss : GCCBuiltin<"__builtin_ia32_reducess_mask">,
+ def int_x86_avx512_mask_reduce_ss : ClangBuiltin<"__builtin_ia32_reducess_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
- def int_x86_avx512_mask_reduce_sd : GCCBuiltin<"__builtin_ia32_reducesd_mask">,
+ def int_x86_avx512_mask_reduce_sd : ClangBuiltin<"__builtin_ia32_reducesd_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
- def int_x86_avx512_mask_scalef_sd : GCCBuiltin<"__builtin_ia32_scalefsd_round_mask">,
+ def int_x86_avx512_mask_scalef_sd : ClangBuiltin<"__builtin_ia32_scalefsd_round_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx512_mask_scalef_ss : GCCBuiltin<"__builtin_ia32_scalefss_round_mask">,
+ def int_x86_avx512_mask_scalef_ss : ClangBuiltin<"__builtin_ia32_scalefss_round_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx512_mask_scalef_pd_128 : GCCBuiltin<"__builtin_ia32_scalefpd128_mask">,
+ def int_x86_avx512_mask_scalef_pd_128 : ClangBuiltin<"__builtin_ia32_scalefpd128_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_scalef_pd_256 : GCCBuiltin<"__builtin_ia32_scalefpd256_mask">,
+ def int_x86_avx512_mask_scalef_pd_256 : ClangBuiltin<"__builtin_ia32_scalefpd256_mask">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
llvm_v4f64_ty, llvm_i8_ty],[IntrNoMem]>;
- def int_x86_avx512_mask_scalef_pd_512 : GCCBuiltin<"__builtin_ia32_scalefpd512_mask">,
+ def int_x86_avx512_mask_scalef_pd_512 : ClangBuiltin<"__builtin_ia32_scalefpd512_mask">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx512_mask_scalef_ps_128 : GCCBuiltin<"__builtin_ia32_scalefps128_mask">,
+ def int_x86_avx512_mask_scalef_ps_128 : ClangBuiltin<"__builtin_ia32_scalefps128_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_scalef_ps_256 : GCCBuiltin<"__builtin_ia32_scalefps256_mask">,
+ def int_x86_avx512_mask_scalef_ps_256 : ClangBuiltin<"__builtin_ia32_scalefps256_mask">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_scalef_ps_512 : GCCBuiltin<"__builtin_ia32_scalefps512_mask">,
+ def int_x86_avx512_mask_scalef_ps_512 : ClangBuiltin<"__builtin_ia32_scalefps512_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<4>>]>;
@@ -3307,290 +3317,290 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
def int_x86_avx512_mask_fixupimm_pd_128 :
- GCCBuiltin<"__builtin_ia32_fixupimmpd128_mask">,
+ ClangBuiltin<"__builtin_ia32_fixupimmpd128_mask">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
def int_x86_avx512_maskz_fixupimm_pd_128 :
- GCCBuiltin<"__builtin_ia32_fixupimmpd128_maskz">,
+ ClangBuiltin<"__builtin_ia32_fixupimmpd128_maskz">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
def int_x86_avx512_mask_fixupimm_pd_256 :
- GCCBuiltin<"__builtin_ia32_fixupimmpd256_mask">,
+ ClangBuiltin<"__builtin_ia32_fixupimmpd256_mask">,
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
def int_x86_avx512_maskz_fixupimm_pd_256 :
- GCCBuiltin<"__builtin_ia32_fixupimmpd256_maskz">,
+ ClangBuiltin<"__builtin_ia32_fixupimmpd256_maskz">,
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
def int_x86_avx512_mask_fixupimm_pd_512 :
- GCCBuiltin<"__builtin_ia32_fixupimmpd512_mask">,
+ ClangBuiltin<"__builtin_ia32_fixupimmpd512_mask">,
Intrinsic<[llvm_v8f64_ty],
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8i64_ty, llvm_i32_ty, llvm_i8_ty,
llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<5>>]>;
def int_x86_avx512_maskz_fixupimm_pd_512 :
- GCCBuiltin<"__builtin_ia32_fixupimmpd512_maskz">,
+ ClangBuiltin<"__builtin_ia32_fixupimmpd512_maskz">,
Intrinsic<[llvm_v8f64_ty],
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8i64_ty, llvm_i32_ty, llvm_i8_ty,
llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<5>>]>;
def int_x86_avx512_mask_fixupimm_ps_128 :
- GCCBuiltin<"__builtin_ia32_fixupimmps128_mask">,
+ ClangBuiltin<"__builtin_ia32_fixupimmps128_mask">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
def int_x86_avx512_maskz_fixupimm_ps_128 :
- GCCBuiltin<"__builtin_ia32_fixupimmps128_maskz">,
+ ClangBuiltin<"__builtin_ia32_fixupimmps128_maskz">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
def int_x86_avx512_mask_fixupimm_ps_256 :
- GCCBuiltin<"__builtin_ia32_fixupimmps256_mask">,
+ ClangBuiltin<"__builtin_ia32_fixupimmps256_mask">,
Intrinsic<[llvm_v8f32_ty],
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
def int_x86_avx512_maskz_fixupimm_ps_256 :
- GCCBuiltin<"__builtin_ia32_fixupimmps256_maskz">,
+ ClangBuiltin<"__builtin_ia32_fixupimmps256_maskz">,
Intrinsic<[llvm_v8f32_ty],
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
def int_x86_avx512_mask_fixupimm_ps_512 :
- GCCBuiltin<"__builtin_ia32_fixupimmps512_mask">,
+ ClangBuiltin<"__builtin_ia32_fixupimmps512_mask">,
Intrinsic<[llvm_v16f32_ty],
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<5>>]>;
def int_x86_avx512_maskz_fixupimm_ps_512 :
- GCCBuiltin<"__builtin_ia32_fixupimmps512_maskz">,
+ ClangBuiltin<"__builtin_ia32_fixupimmps512_maskz">,
Intrinsic<[llvm_v16f32_ty],
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16i32_ty, llvm_i32_ty,
llvm_i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<5>>]>;
def int_x86_avx512_mask_fixupimm_sd :
- GCCBuiltin<"__builtin_ia32_fixupimmsd_mask">,
+ ClangBuiltin<"__builtin_ia32_fixupimmsd_mask">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_i8_ty,
llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<5>>]>;
def int_x86_avx512_maskz_fixupimm_sd :
- GCCBuiltin<"__builtin_ia32_fixupimmsd_maskz">,
+ ClangBuiltin<"__builtin_ia32_fixupimmsd_maskz">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_i8_ty,
llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<5>>]>;
def int_x86_avx512_mask_fixupimm_ss :
- GCCBuiltin<"__builtin_ia32_fixupimmss_mask">,
+ ClangBuiltin<"__builtin_ia32_fixupimmss_mask">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_i8_ty,
llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<5>>]>;
def int_x86_avx512_maskz_fixupimm_ss :
- GCCBuiltin<"__builtin_ia32_fixupimmss_maskz">,
+ ClangBuiltin<"__builtin_ia32_fixupimmss_maskz">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_i8_ty,
llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<5>>]>;
- def int_x86_avx512_mask_getexp_pd_128 : GCCBuiltin<"__builtin_ia32_getexppd128_mask">,
+ def int_x86_avx512_mask_getexp_pd_128 : ClangBuiltin<"__builtin_ia32_getexppd128_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_getexp_pd_256 : GCCBuiltin<"__builtin_ia32_getexppd256_mask">,
+ def int_x86_avx512_mask_getexp_pd_256 : ClangBuiltin<"__builtin_ia32_getexppd256_mask">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_getexp_pd_512 : GCCBuiltin<"__builtin_ia32_getexppd512_mask">,
+ def int_x86_avx512_mask_getexp_pd_512 : ClangBuiltin<"__builtin_ia32_getexppd512_mask">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
- def int_x86_avx512_mask_getexp_ps_128 : GCCBuiltin<"__builtin_ia32_getexpps128_mask">,
+ def int_x86_avx512_mask_getexp_ps_128 : ClangBuiltin<"__builtin_ia32_getexpps128_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_getexp_ps_256 : GCCBuiltin<"__builtin_ia32_getexpps256_mask">,
+ def int_x86_avx512_mask_getexp_ps_256 : ClangBuiltin<"__builtin_ia32_getexpps256_mask">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_getexp_ps_512 : GCCBuiltin<"__builtin_ia32_getexpps512_mask">,
+ def int_x86_avx512_mask_getexp_ps_512 : ClangBuiltin<"__builtin_ia32_getexpps512_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
llvm_i16_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
- def int_x86_avx512_mask_getexp_ss : GCCBuiltin<"__builtin_ia32_getexpss128_round_mask">,
+ def int_x86_avx512_mask_getexp_ss : ClangBuiltin<"__builtin_ia32_getexpss128_round_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx512_mask_getexp_sd : GCCBuiltin<"__builtin_ia32_getexpsd128_round_mask">,
+ def int_x86_avx512_mask_getexp_sd : ClangBuiltin<"__builtin_ia32_getexpsd128_round_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<4>>]>;
def int_x86_avx512_mask_getmant_pd_128 :
- GCCBuiltin<"__builtin_ia32_getmantpd128_mask">,
+ ClangBuiltin<"__builtin_ia32_getmantpd128_mask">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty,llvm_i32_ty, llvm_v2f64_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
def int_x86_avx512_mask_getmant_pd_256 :
- GCCBuiltin<"__builtin_ia32_getmantpd256_mask">,
+ ClangBuiltin<"__builtin_ia32_getmantpd256_mask">,
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty,llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
def int_x86_avx512_mask_getmant_pd_512 :
- GCCBuiltin<"__builtin_ia32_getmantpd512_mask">,
+ ClangBuiltin<"__builtin_ia32_getmantpd512_mask">,
Intrinsic<[llvm_v8f64_ty],
[llvm_v8f64_ty,llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty,llvm_i32_ty ],
[IntrNoMem, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>>]>;
def int_x86_avx512_mask_getmant_ps_128 :
- GCCBuiltin<"__builtin_ia32_getmantps128_mask">,
+ ClangBuiltin<"__builtin_ia32_getmantps128_mask">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_i32_ty, llvm_v4f32_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
def int_x86_avx512_mask_getmant_ps_256 :
- GCCBuiltin<"__builtin_ia32_getmantps256_mask">,
+ ClangBuiltin<"__builtin_ia32_getmantps256_mask">,
Intrinsic<[llvm_v8f32_ty],
[llvm_v8f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
def int_x86_avx512_mask_getmant_ps_512 :
- GCCBuiltin<"__builtin_ia32_getmantps512_mask">,
+ ClangBuiltin<"__builtin_ia32_getmantps512_mask">,
Intrinsic<[llvm_v16f32_ty],
[llvm_v16f32_ty,llvm_i32_ty, llvm_v16f32_ty,llvm_i16_ty,llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>>]>;
def int_x86_avx512_mask_getmant_ss :
- GCCBuiltin<"__builtin_ia32_getmantss_round_mask">,
+ ClangBuiltin<"__builtin_ia32_getmantss_round_mask">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v4f32_ty,
llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>>]>;
def int_x86_avx512_mask_getmant_sd :
- GCCBuiltin<"__builtin_ia32_getmantsd_round_mask">,
+ ClangBuiltin<"__builtin_ia32_getmantsd_round_mask">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v2f64_ty,
llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>>]>;
- def int_x86_avx512_rsqrt14_ss : GCCBuiltin<"__builtin_ia32_rsqrt14ss_mask">,
+ def int_x86_avx512_rsqrt14_ss : ClangBuiltin<"__builtin_ia32_rsqrt14ss_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_rsqrt14_sd : GCCBuiltin<"__builtin_ia32_rsqrt14sd_mask">,
+ def int_x86_avx512_rsqrt14_sd : ClangBuiltin<"__builtin_ia32_rsqrt14sd_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_rsqrt14_pd_128 : GCCBuiltin<"__builtin_ia32_rsqrt14pd128_mask">,
+ def int_x86_avx512_rsqrt14_pd_128 : ClangBuiltin<"__builtin_ia32_rsqrt14pd128_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_rsqrt14_pd_256 : GCCBuiltin<"__builtin_ia32_rsqrt14pd256_mask">,
+ def int_x86_avx512_rsqrt14_pd_256 : ClangBuiltin<"__builtin_ia32_rsqrt14pd256_mask">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_rsqrt14_pd_512 : GCCBuiltin<"__builtin_ia32_rsqrt14pd512_mask">,
+ def int_x86_avx512_rsqrt14_pd_512 : ClangBuiltin<"__builtin_ia32_rsqrt14pd512_mask">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_rsqrt14_ps_128 : GCCBuiltin<"__builtin_ia32_rsqrt14ps128_mask">,
+ def int_x86_avx512_rsqrt14_ps_128 : ClangBuiltin<"__builtin_ia32_rsqrt14ps128_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_rsqrt14_ps_256 : GCCBuiltin<"__builtin_ia32_rsqrt14ps256_mask">,
+ def int_x86_avx512_rsqrt14_ps_256 : ClangBuiltin<"__builtin_ia32_rsqrt14ps256_mask">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_rsqrt14_ps_512 : GCCBuiltin<"__builtin_ia32_rsqrt14ps512_mask">,
+ def int_x86_avx512_rsqrt14_ps_512 : ClangBuiltin<"__builtin_ia32_rsqrt14ps512_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_rcp14_ss : GCCBuiltin<"__builtin_ia32_rcp14ss_mask">,
+ def int_x86_avx512_rcp14_ss : ClangBuiltin<"__builtin_ia32_rcp14ss_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_rcp14_sd : GCCBuiltin<"__builtin_ia32_rcp14sd_mask">,
+ def int_x86_avx512_rcp14_sd : ClangBuiltin<"__builtin_ia32_rcp14sd_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_rcp14_pd_128 : GCCBuiltin<"__builtin_ia32_rcp14pd128_mask">,
+ def int_x86_avx512_rcp14_pd_128 : ClangBuiltin<"__builtin_ia32_rcp14pd128_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_rcp14_pd_256 : GCCBuiltin<"__builtin_ia32_rcp14pd256_mask">,
+ def int_x86_avx512_rcp14_pd_256 : ClangBuiltin<"__builtin_ia32_rcp14pd256_mask">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_rcp14_pd_512 : GCCBuiltin<"__builtin_ia32_rcp14pd512_mask">,
+ def int_x86_avx512_rcp14_pd_512 : ClangBuiltin<"__builtin_ia32_rcp14pd512_mask">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_rcp14_ps_128 : GCCBuiltin<"__builtin_ia32_rcp14ps128_mask">,
+ def int_x86_avx512_rcp14_ps_128 : ClangBuiltin<"__builtin_ia32_rcp14ps128_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_rcp14_ps_256 : GCCBuiltin<"__builtin_ia32_rcp14ps256_mask">,
+ def int_x86_avx512_rcp14_ps_256 : ClangBuiltin<"__builtin_ia32_rcp14ps256_mask">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_rcp14_ps_512 : GCCBuiltin<"__builtin_ia32_rcp14ps512_mask">,
+ def int_x86_avx512_rcp14_ps_512 : ClangBuiltin<"__builtin_ia32_rcp14ps512_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_rcp28_ps : GCCBuiltin<"__builtin_ia32_rcp28ps_mask">,
+ def int_x86_avx512_rcp28_ps : ClangBuiltin<"__builtin_ia32_rcp28ps_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
llvm_i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<3>>]>;
- def int_x86_avx512_rcp28_pd : GCCBuiltin<"__builtin_ia32_rcp28pd_mask">,
+ def int_x86_avx512_rcp28_pd : ClangBuiltin<"__builtin_ia32_rcp28pd_mask">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<3>>]>;
- def int_x86_avx512_exp2_ps : GCCBuiltin<"__builtin_ia32_exp2ps_mask">,
+ def int_x86_avx512_exp2_ps : ClangBuiltin<"__builtin_ia32_exp2ps_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
llvm_i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<3>>]>;
- def int_x86_avx512_exp2_pd : GCCBuiltin<"__builtin_ia32_exp2pd_mask">,
+ def int_x86_avx512_exp2_pd : ClangBuiltin<"__builtin_ia32_exp2pd_mask">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<3>>]>;
- def int_x86_avx512_rcp28_ss : GCCBuiltin<"__builtin_ia32_rcp28ss_round_mask">,
+ def int_x86_avx512_rcp28_ss : ClangBuiltin<"__builtin_ia32_rcp28ss_round_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx512_rcp28_sd : GCCBuiltin<"__builtin_ia32_rcp28sd_round_mask">,
+ def int_x86_avx512_rcp28_sd : ClangBuiltin<"__builtin_ia32_rcp28sd_round_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx512_rsqrt28_ps : GCCBuiltin<"__builtin_ia32_rsqrt28ps_mask">,
+ def int_x86_avx512_rsqrt28_ps : ClangBuiltin<"__builtin_ia32_rsqrt28ps_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
llvm_i16_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
- def int_x86_avx512_rsqrt28_pd : GCCBuiltin<"__builtin_ia32_rsqrt28pd_mask">,
+ def int_x86_avx512_rsqrt28_pd : ClangBuiltin<"__builtin_ia32_rsqrt28pd_mask">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
- def int_x86_avx512_rsqrt28_ss : GCCBuiltin<"__builtin_ia32_rsqrt28ss_round_mask">,
+ def int_x86_avx512_rsqrt28_ss : ClangBuiltin<"__builtin_ia32_rsqrt28ss_round_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx512_rsqrt28_sd : GCCBuiltin<"__builtin_ia32_rsqrt28sd_round_mask">,
+ def int_x86_avx512_rsqrt28_sd : ClangBuiltin<"__builtin_ia32_rsqrt28sd_round_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx512_psad_bw_512 : GCCBuiltin<"__builtin_ia32_psadbw512">,
+ def int_x86_avx512_psad_bw_512 : ClangBuiltin<"__builtin_ia32_psadbw512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty],
[IntrNoMem, Commutative]>;
}
// Integer arithmetic ops
let TargetPrefix = "x86" in {
- def int_x86_avx512_pmulhu_w_512 : GCCBuiltin<"__builtin_ia32_pmulhuw512">,
+ def int_x86_avx512_pmulhu_w_512 : ClangBuiltin<"__builtin_ia32_pmulhuw512">,
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
llvm_v32i16_ty], [IntrNoMem, Commutative]>;
- def int_x86_avx512_pmulh_w_512 : GCCBuiltin<"__builtin_ia32_pmulhw512">,
+ def int_x86_avx512_pmulh_w_512 : ClangBuiltin<"__builtin_ia32_pmulhw512">,
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
llvm_v32i16_ty], [IntrNoMem, Commutative]>;
- def int_x86_avx512_pavg_b_512 : GCCBuiltin<"__builtin_ia32_pavgb512">,
+ def int_x86_avx512_pavg_b_512 : ClangBuiltin<"__builtin_ia32_pavgb512">,
Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty],
[IntrNoMem]>;
- def int_x86_avx512_pavg_w_512 : GCCBuiltin<"__builtin_ia32_pavgw512">,
+ def int_x86_avx512_pavg_w_512 : ClangBuiltin<"__builtin_ia32_pavgw512">,
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty],
[IntrNoMem]>;
- def int_x86_avx512_pmaddw_d_512 : GCCBuiltin<"__builtin_ia32_pmaddwd512">,
+ def int_x86_avx512_pmaddw_d_512 : ClangBuiltin<"__builtin_ia32_pmaddwd512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v32i16_ty,
llvm_v32i16_ty], [IntrNoMem, Commutative]>;
- def int_x86_avx512_pmaddubs_w_512 : GCCBuiltin<"__builtin_ia32_pmaddubsw512">,
+ def int_x86_avx512_pmaddubs_w_512 : ClangBuiltin<"__builtin_ia32_pmaddubsw512">,
Intrinsic<[llvm_v32i16_ty], [llvm_v64i8_ty,
llvm_v64i8_ty], [IntrNoMem]>;
def int_x86_avx512_dbpsadbw_128 :
- GCCBuiltin<"__builtin_ia32_dbpsadbw128">,
+ ClangBuiltin<"__builtin_ia32_dbpsadbw128">,
Intrinsic<[llvm_v8i16_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
def int_x86_avx512_dbpsadbw_256 :
- GCCBuiltin<"__builtin_ia32_dbpsadbw256">,
+ ClangBuiltin<"__builtin_ia32_dbpsadbw256">,
Intrinsic<[llvm_v16i16_ty],
[llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
def int_x86_avx512_dbpsadbw_512 :
- GCCBuiltin<"__builtin_ia32_dbpsadbw512">,
+ ClangBuiltin<"__builtin_ia32_dbpsadbw512">,
Intrinsic<[llvm_v32i16_ty],
[llvm_v64i8_ty, llvm_v64i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
@@ -3838,32 +3848,32 @@ let TargetPrefix = "x86" in {
// gather prefetch
// NOTE: These can't be ArgMemOnly because you can put the address completely
// in the index register.
- def int_x86_avx512_gatherpf_dpd_512 : GCCBuiltin<"__builtin_ia32_gatherpfdpd">,
+ def int_x86_avx512_gatherpf_dpd_512 : ClangBuiltin<"__builtin_ia32_gatherpfdpd">,
Intrinsic<[], [llvm_i8_ty, llvm_v8i32_ty, llvm_ptr_ty,
llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx512_gatherpf_dps_512 : GCCBuiltin<"__builtin_ia32_gatherpfdps">,
+ def int_x86_avx512_gatherpf_dps_512 : ClangBuiltin<"__builtin_ia32_gatherpfdps">,
Intrinsic<[], [llvm_i16_ty, llvm_v16i32_ty, llvm_ptr_ty,
llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx512_gatherpf_qpd_512 : GCCBuiltin<"__builtin_ia32_gatherpfqpd">,
+ def int_x86_avx512_gatherpf_qpd_512 : ClangBuiltin<"__builtin_ia32_gatherpfqpd">,
Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty,
llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx512_gatherpf_qps_512 : GCCBuiltin<"__builtin_ia32_gatherpfqps">,
+ def int_x86_avx512_gatherpf_qps_512 : ClangBuiltin<"__builtin_ia32_gatherpfqps">,
Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty,
llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
// scatter prefetch
// NOTE: These can't be ArgMemOnly because you can put the address completely
// in the index register.
- def int_x86_avx512_scatterpf_dpd_512 : GCCBuiltin<"__builtin_ia32_scatterpfdpd">,
+ def int_x86_avx512_scatterpf_dpd_512 : ClangBuiltin<"__builtin_ia32_scatterpfdpd">,
Intrinsic<[], [llvm_i8_ty, llvm_v8i32_ty, llvm_ptr_ty,
llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx512_scatterpf_dps_512 : GCCBuiltin<"__builtin_ia32_scatterpfdps">,
+ def int_x86_avx512_scatterpf_dps_512 : ClangBuiltin<"__builtin_ia32_scatterpfdps">,
Intrinsic<[], [llvm_i16_ty, llvm_v16i32_ty, llvm_ptr_ty,
llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx512_scatterpf_qpd_512 : GCCBuiltin<"__builtin_ia32_scatterpfqpd">,
+ def int_x86_avx512_scatterpf_qpd_512 : ClangBuiltin<"__builtin_ia32_scatterpfqpd">,
Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty,
llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx512_scatterpf_qps_512 : GCCBuiltin<"__builtin_ia32_scatterpfqps">,
+ def int_x86_avx512_scatterpf_qps_512 : ClangBuiltin<"__builtin_ia32_scatterpfqps">,
Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty,
llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
}
@@ -4109,34 +4119,34 @@ let TargetPrefix = "x86" in {
// Instructions that count the number of leading zero bits
let TargetPrefix = "x86" in {
def int_x86_avx512_conflict_d_128 :
- GCCBuiltin<"__builtin_ia32_vpconflictsi_128">,
+ ClangBuiltin<"__builtin_ia32_vpconflictsi_128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_avx512_conflict_d_256 :
- GCCBuiltin<"__builtin_ia32_vpconflictsi_256">,
+ ClangBuiltin<"__builtin_ia32_vpconflictsi_256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>;
def int_x86_avx512_conflict_d_512 :
- GCCBuiltin<"__builtin_ia32_vpconflictsi_512">,
+ ClangBuiltin<"__builtin_ia32_vpconflictsi_512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty], [IntrNoMem]>;
def int_x86_avx512_conflict_q_128 :
- GCCBuiltin<"__builtin_ia32_vpconflictdi_128">,
+ ClangBuiltin<"__builtin_ia32_vpconflictdi_128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
def int_x86_avx512_conflict_q_256 :
- GCCBuiltin<"__builtin_ia32_vpconflictdi_256">,
+ ClangBuiltin<"__builtin_ia32_vpconflictdi_256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>;
def int_x86_avx512_conflict_q_512 :
- GCCBuiltin<"__builtin_ia32_vpconflictdi_512">,
+ ClangBuiltin<"__builtin_ia32_vpconflictdi_512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty], [IntrNoMem]>;
}
// Compares
let TargetPrefix = "x86" in {
// 512-bit
- def int_x86_avx512_vcomi_sd : GCCBuiltin<"__builtin_ia32_vcomisd">,
+ def int_x86_avx512_vcomi_sd : ClangBuiltin<"__builtin_ia32_vcomisd">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
llvm_v2f64_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>;
- def int_x86_avx512_vcomi_ss : GCCBuiltin<"__builtin_ia32_vcomiss">,
+ def int_x86_avx512_vcomi_ss : ClangBuiltin<"__builtin_ia32_vcomiss">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>;
@@ -4159,152 +4169,152 @@ let TargetPrefix = "x86" in {
// truncate
let TargetPrefix = "x86" in {
def int_x86_avx512_mask_pmov_qb_128 :
- GCCBuiltin<"__builtin_ia32_pmovqb128_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovqb128_mask">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v2i64_ty, llvm_v16i8_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmov_qb_mem_128 :
- GCCBuiltin<"__builtin_ia32_pmovqb128mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovqb128mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmovs_qb_128 :
- GCCBuiltin<"__builtin_ia32_pmovsqb128_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovsqb128_mask">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v2i64_ty, llvm_v16i8_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmovs_qb_mem_128 :
- GCCBuiltin<"__builtin_ia32_pmovsqb128mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovsqb128mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmovus_qb_128 :
- GCCBuiltin<"__builtin_ia32_pmovusqb128_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovusqb128_mask">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v2i64_ty, llvm_v16i8_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmovus_qb_mem_128 :
- GCCBuiltin<"__builtin_ia32_pmovusqb128mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovusqb128mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmov_qb_256 :
- GCCBuiltin<"__builtin_ia32_pmovqb256_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovqb256_mask">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v4i64_ty, llvm_v16i8_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmov_qb_mem_256 :
- GCCBuiltin<"__builtin_ia32_pmovqb256mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovqb256mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmovs_qb_256 :
- GCCBuiltin<"__builtin_ia32_pmovsqb256_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovsqb256_mask">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v4i64_ty, llvm_v16i8_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmovs_qb_mem_256 :
- GCCBuiltin<"__builtin_ia32_pmovsqb256mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovsqb256mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmovus_qb_256 :
- GCCBuiltin<"__builtin_ia32_pmovusqb256_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovusqb256_mask">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v4i64_ty, llvm_v16i8_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmovus_qb_mem_256 :
- GCCBuiltin<"__builtin_ia32_pmovusqb256mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovusqb256mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmov_qb_512 :
- GCCBuiltin<"__builtin_ia32_pmovqb512_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovqb512_mask">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v8i64_ty, llvm_v16i8_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmov_qb_mem_512 :
- GCCBuiltin<"__builtin_ia32_pmovqb512mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovqb512mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmovs_qb_512 :
- GCCBuiltin<"__builtin_ia32_pmovsqb512_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovsqb512_mask">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v8i64_ty, llvm_v16i8_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmovs_qb_mem_512 :
- GCCBuiltin<"__builtin_ia32_pmovsqb512mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovsqb512mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmovus_qb_512 :
- GCCBuiltin<"__builtin_ia32_pmovusqb512_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovusqb512_mask">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v8i64_ty, llvm_v16i8_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmovus_qb_mem_512 :
- GCCBuiltin<"__builtin_ia32_pmovusqb512mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovusqb512mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmov_qw_128 :
- GCCBuiltin<"__builtin_ia32_pmovqw128_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovqw128_mask">,
Intrinsic<[llvm_v8i16_ty],
[llvm_v2i64_ty, llvm_v8i16_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmov_qw_mem_128 :
- GCCBuiltin<"__builtin_ia32_pmovqw128mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovqw128mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmovs_qw_128 :
- GCCBuiltin<"__builtin_ia32_pmovsqw128_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovsqw128_mask">,
Intrinsic<[llvm_v8i16_ty],
[llvm_v2i64_ty, llvm_v8i16_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmovs_qw_mem_128 :
- GCCBuiltin<"__builtin_ia32_pmovsqw128mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovsqw128mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmovus_qw_128 :
- GCCBuiltin<"__builtin_ia32_pmovusqw128_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovusqw128_mask">,
Intrinsic<[llvm_v8i16_ty],
[llvm_v2i64_ty, llvm_v8i16_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmovus_qw_mem_128 :
- GCCBuiltin<"__builtin_ia32_pmovusqw128mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovusqw128mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmov_qw_256 :
- GCCBuiltin<"__builtin_ia32_pmovqw256_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovqw256_mask">,
Intrinsic<[llvm_v8i16_ty],
[llvm_v4i64_ty, llvm_v8i16_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmov_qw_mem_256 :
- GCCBuiltin<"__builtin_ia32_pmovqw256mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovqw256mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmovs_qw_256 :
- GCCBuiltin<"__builtin_ia32_pmovsqw256_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovsqw256_mask">,
Intrinsic<[llvm_v8i16_ty],
[llvm_v4i64_ty, llvm_v8i16_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmovs_qw_mem_256 :
- GCCBuiltin<"__builtin_ia32_pmovsqw256mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovsqw256mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmovus_qw_256 :
- GCCBuiltin<"__builtin_ia32_pmovusqw256_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovusqw256_mask">,
Intrinsic<[llvm_v8i16_ty],
[llvm_v4i64_ty, llvm_v8i16_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmovus_qw_mem_256 :
- GCCBuiltin<"__builtin_ia32_pmovusqw256mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovusqw256mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
@@ -4313,167 +4323,167 @@ let TargetPrefix = "x86" in {
[llvm_v8i64_ty, llvm_v8i16_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmov_qw_mem_512 :
- GCCBuiltin<"__builtin_ia32_pmovqw512mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovqw512mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmovs_qw_512 :
- GCCBuiltin<"__builtin_ia32_pmovsqw512_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovsqw512_mask">,
Intrinsic<[llvm_v8i16_ty],
[llvm_v8i64_ty, llvm_v8i16_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmovs_qw_mem_512 :
- GCCBuiltin<"__builtin_ia32_pmovsqw512mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovsqw512mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmovus_qw_512 :
- GCCBuiltin<"__builtin_ia32_pmovusqw512_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovusqw512_mask">,
Intrinsic<[llvm_v8i16_ty],
[llvm_v8i64_ty, llvm_v8i16_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmovus_qw_mem_512 :
- GCCBuiltin<"__builtin_ia32_pmovusqw512mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovusqw512mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmov_qd_128 :
- GCCBuiltin<"__builtin_ia32_pmovqd128_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovqd128_mask">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmov_qd_mem_128 :
- GCCBuiltin<"__builtin_ia32_pmovqd128mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovqd128mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmovs_qd_128 :
- GCCBuiltin<"__builtin_ia32_pmovsqd128_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovsqd128_mask">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmovs_qd_mem_128 :
- GCCBuiltin<"__builtin_ia32_pmovsqd128mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovsqd128mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmovus_qd_128 :
- GCCBuiltin<"__builtin_ia32_pmovusqd128_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovusqd128_mask">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmovus_qd_mem_128 :
- GCCBuiltin<"__builtin_ia32_pmovusqd128mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovusqd128mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmov_qd_mem_256 :
- GCCBuiltin<"__builtin_ia32_pmovqd256mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovqd256mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmovs_qd_256 :
- GCCBuiltin<"__builtin_ia32_pmovsqd256_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovsqd256_mask">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmovs_qd_mem_256 :
- GCCBuiltin<"__builtin_ia32_pmovsqd256mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovsqd256mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmovus_qd_256 :
- GCCBuiltin<"__builtin_ia32_pmovusqd256_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovusqd256_mask">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmovus_qd_mem_256 :
- GCCBuiltin<"__builtin_ia32_pmovusqd256mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovusqd256mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmov_qd_mem_512 :
- GCCBuiltin<"__builtin_ia32_pmovqd512mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovqd512mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmovs_qd_512 :
- GCCBuiltin<"__builtin_ia32_pmovsqd512_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovsqd512_mask">,
Intrinsic<[llvm_v8i32_ty],
[llvm_v8i64_ty, llvm_v8i32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmovs_qd_mem_512 :
- GCCBuiltin<"__builtin_ia32_pmovsqd512mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovsqd512mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmovus_qd_512 :
- GCCBuiltin<"__builtin_ia32_pmovusqd512_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovusqd512_mask">,
Intrinsic<[llvm_v8i32_ty],
[llvm_v8i64_ty, llvm_v8i32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmovus_qd_mem_512 :
- GCCBuiltin<"__builtin_ia32_pmovusqd512mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovusqd512mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmov_db_128 :
- GCCBuiltin<"__builtin_ia32_pmovdb128_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovdb128_mask">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v4i32_ty, llvm_v16i8_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmov_db_mem_128 :
- GCCBuiltin<"__builtin_ia32_pmovdb128mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovdb128mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmovs_db_128 :
- GCCBuiltin<"__builtin_ia32_pmovsdb128_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovsdb128_mask">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v4i32_ty, llvm_v16i8_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmovs_db_mem_128 :
- GCCBuiltin<"__builtin_ia32_pmovsdb128mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovsdb128mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmovus_db_128 :
- GCCBuiltin<"__builtin_ia32_pmovusdb128_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovusdb128_mask">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v4i32_ty, llvm_v16i8_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmovus_db_mem_128 :
- GCCBuiltin<"__builtin_ia32_pmovusdb128mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovusdb128mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmov_db_256 :
- GCCBuiltin<"__builtin_ia32_pmovdb256_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovdb256_mask">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v8i32_ty, llvm_v16i8_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmov_db_mem_256 :
- GCCBuiltin<"__builtin_ia32_pmovdb256mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovdb256mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmovs_db_256 :
- GCCBuiltin<"__builtin_ia32_pmovsdb256_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovsdb256_mask">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v8i32_ty, llvm_v16i8_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmovs_db_mem_256 :
- GCCBuiltin<"__builtin_ia32_pmovsdb256mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovsdb256mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmovus_db_256 :
- GCCBuiltin<"__builtin_ia32_pmovusdb256_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovusdb256_mask">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v8i32_ty, llvm_v16i8_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmovus_db_mem_256 :
- GCCBuiltin<"__builtin_ia32_pmovusdb256mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovusdb256mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
@@ -4482,87 +4492,87 @@ let TargetPrefix = "x86" in {
[llvm_v16i32_ty, llvm_v16i8_ty, llvm_i16_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmov_db_mem_512 :
- GCCBuiltin<"__builtin_ia32_pmovdb512mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovdb512mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmovs_db_512 :
- GCCBuiltin<"__builtin_ia32_pmovsdb512_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovsdb512_mask">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v16i32_ty, llvm_v16i8_ty, llvm_i16_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmovs_db_mem_512 :
- GCCBuiltin<"__builtin_ia32_pmovsdb512mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovsdb512mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmovus_db_512 :
- GCCBuiltin<"__builtin_ia32_pmovusdb512_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovusdb512_mask">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v16i32_ty, llvm_v16i8_ty, llvm_i16_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmovus_db_mem_512 :
- GCCBuiltin<"__builtin_ia32_pmovusdb512mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovusdb512mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmov_dw_128 :
- GCCBuiltin<"__builtin_ia32_pmovdw128_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovdw128_mask">,
Intrinsic<[llvm_v8i16_ty],
[llvm_v4i32_ty, llvm_v8i16_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmov_dw_mem_128 :
- GCCBuiltin<"__builtin_ia32_pmovdw128mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovdw128mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmovs_dw_128 :
- GCCBuiltin<"__builtin_ia32_pmovsdw128_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovsdw128_mask">,
Intrinsic<[llvm_v8i16_ty],
[llvm_v4i32_ty, llvm_v8i16_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmovs_dw_mem_128 :
- GCCBuiltin<"__builtin_ia32_pmovsdw128mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovsdw128mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmovus_dw_128 :
- GCCBuiltin<"__builtin_ia32_pmovusdw128_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovusdw128_mask">,
Intrinsic<[llvm_v8i16_ty],
[llvm_v4i32_ty, llvm_v8i16_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmovus_dw_mem_128 :
- GCCBuiltin<"__builtin_ia32_pmovusdw128mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovusdw128mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmov_dw_256 :
- GCCBuiltin<"__builtin_ia32_pmovdw256_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovdw256_mask">,
Intrinsic<[llvm_v8i16_ty],
[llvm_v8i32_ty, llvm_v8i16_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmov_dw_mem_256 :
- GCCBuiltin<"__builtin_ia32_pmovdw256mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovdw256mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmovs_dw_256 :
- GCCBuiltin<"__builtin_ia32_pmovsdw256_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovsdw256_mask">,
Intrinsic<[llvm_v8i16_ty],
[llvm_v8i32_ty, llvm_v8i16_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmovs_dw_mem_256 :
- GCCBuiltin<"__builtin_ia32_pmovsdw256mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovsdw256mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmovus_dw_256 :
- GCCBuiltin<"__builtin_ia32_pmovusdw256_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovusdw256_mask">,
Intrinsic<[llvm_v8i16_ty],
[llvm_v8i32_ty, llvm_v8i16_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmovus_dw_mem_256 :
- GCCBuiltin<"__builtin_ia32_pmovusdw256mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovusdw256mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
@@ -4571,107 +4581,107 @@ let TargetPrefix = "x86" in {
[llvm_v16i32_ty, llvm_v16i16_ty, llvm_i16_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmov_dw_mem_512 :
- GCCBuiltin<"__builtin_ia32_pmovdw512mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovdw512mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmovs_dw_512 :
- GCCBuiltin<"__builtin_ia32_pmovsdw512_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovsdw512_mask">,
Intrinsic<[llvm_v16i16_ty],
[llvm_v16i32_ty, llvm_v16i16_ty, llvm_i16_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmovs_dw_mem_512 :
- GCCBuiltin<"__builtin_ia32_pmovsdw512mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovsdw512mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmovus_dw_512 :
- GCCBuiltin<"__builtin_ia32_pmovusdw512_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovusdw512_mask">,
Intrinsic<[llvm_v16i16_ty],
[llvm_v16i32_ty, llvm_v16i16_ty, llvm_i16_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmovus_dw_mem_512 :
- GCCBuiltin<"__builtin_ia32_pmovusdw512mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovusdw512mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmov_wb_128 :
- GCCBuiltin<"__builtin_ia32_pmovwb128_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovwb128_mask">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v8i16_ty, llvm_v16i8_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmov_wb_mem_128 :
- GCCBuiltin<"__builtin_ia32_pmovwb128mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovwb128mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmovs_wb_128 :
- GCCBuiltin<"__builtin_ia32_pmovswb128_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovswb128_mask">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v8i16_ty, llvm_v16i8_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmovs_wb_mem_128 :
- GCCBuiltin<"__builtin_ia32_pmovswb128mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovswb128mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmovus_wb_128 :
- GCCBuiltin<"__builtin_ia32_pmovuswb128_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovuswb128_mask">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v8i16_ty, llvm_v16i8_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmovus_wb_mem_128 :
- GCCBuiltin<"__builtin_ia32_pmovuswb128mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovuswb128mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmov_wb_mem_256 :
- GCCBuiltin<"__builtin_ia32_pmovwb256mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovwb256mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmovs_wb_256 :
- GCCBuiltin<"__builtin_ia32_pmovswb256_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovswb256_mask">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v16i16_ty, llvm_v16i8_ty, llvm_i16_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmovs_wb_mem_256 :
- GCCBuiltin<"__builtin_ia32_pmovswb256mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovswb256mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmovus_wb_256 :
- GCCBuiltin<"__builtin_ia32_pmovuswb256_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovuswb256_mask">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v16i16_ty, llvm_v16i8_ty, llvm_i16_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmovus_wb_mem_256 :
- GCCBuiltin<"__builtin_ia32_pmovuswb256mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovuswb256mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmov_wb_mem_512 :
- GCCBuiltin<"__builtin_ia32_pmovwb512mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovwb512mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmovs_wb_512 :
- GCCBuiltin<"__builtin_ia32_pmovswb512_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovswb512_mask">,
Intrinsic<[llvm_v32i8_ty],
[llvm_v32i16_ty, llvm_v32i8_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmovs_wb_mem_512 :
- GCCBuiltin<"__builtin_ia32_pmovswb512mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovswb512mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty],
[IntrArgMemOnly]>;
def int_x86_avx512_mask_pmovus_wb_512 :
- GCCBuiltin<"__builtin_ia32_pmovuswb512_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovuswb512_mask">,
Intrinsic<[llvm_v32i8_ty],
[llvm_v32i16_ty, llvm_v32i8_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pmovus_wb_mem_512 :
- GCCBuiltin<"__builtin_ia32_pmovuswb512mem_mask">,
+ ClangBuiltin<"__builtin_ia32_pmovuswb512mem_mask">,
Intrinsic<[],
[llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty],
[IntrArgMemOnly]>;
@@ -4680,37 +4690,37 @@ let TargetPrefix = "x86" in {
// Bitwise ternary logic
let TargetPrefix = "x86" in {
def int_x86_avx512_pternlog_d_128 :
- GCCBuiltin<"__builtin_ia32_pternlogd128">,
+ ClangBuiltin<"__builtin_ia32_pternlogd128">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
def int_x86_avx512_pternlog_d_256 :
- GCCBuiltin<"__builtin_ia32_pternlogd256">,
+ ClangBuiltin<"__builtin_ia32_pternlogd256">,
Intrinsic<[llvm_v8i32_ty],
[llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
def int_x86_avx512_pternlog_d_512 :
- GCCBuiltin<"__builtin_ia32_pternlogd512">,
+ ClangBuiltin<"__builtin_ia32_pternlogd512">,
Intrinsic<[llvm_v16i32_ty],
[llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty,
llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<3>>]>;
def int_x86_avx512_pternlog_q_128 :
- GCCBuiltin<"__builtin_ia32_pternlogq128">,
+ ClangBuiltin<"__builtin_ia32_pternlogq128">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
def int_x86_avx512_pternlog_q_256 :
- GCCBuiltin<"__builtin_ia32_pternlogq256">,
+ ClangBuiltin<"__builtin_ia32_pternlogq256">,
Intrinsic<[llvm_v4i64_ty],
[llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
def int_x86_avx512_pternlog_q_512 :
- GCCBuiltin<"__builtin_ia32_pternlogq512">,
+ ClangBuiltin<"__builtin_ia32_pternlogq512">,
Intrinsic<[llvm_v8i64_ty],
[llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
@@ -4770,12 +4780,12 @@ let TargetPrefix = "x86" in {
llvm_i32_ty, llvm_v2i1_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>;
def int_x86_avx512_mask_cmp_ss :
- GCCBuiltin<"__builtin_ia32_cmpss_mask">,
+ ClangBuiltin<"__builtin_ia32_cmpss_mask">,
Intrinsic<[llvm_i8_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
llvm_i32_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<4>>]>;
def int_x86_avx512_mask_cmp_sd :
- GCCBuiltin<"__builtin_ia32_cmpsd_mask">,
+ ClangBuiltin<"__builtin_ia32_cmpsd_mask">,
Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_i32_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<4>>]>;
@@ -4784,21 +4794,21 @@ let TargetPrefix = "x86" in {
//===----------------------------------------------------------------------===//
// SHA intrinsics
let TargetPrefix = "x86" in {
- def int_x86_sha1rnds4 : GCCBuiltin<"__builtin_ia32_sha1rnds4">,
+ def int_x86_sha1rnds4 : ClangBuiltin<"__builtin_ia32_sha1rnds4">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
- def int_x86_sha1nexte : GCCBuiltin<"__builtin_ia32_sha1nexte">,
+ def int_x86_sha1nexte : ClangBuiltin<"__builtin_ia32_sha1nexte">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
- def int_x86_sha1msg1 : GCCBuiltin<"__builtin_ia32_sha1msg1">,
+ def int_x86_sha1msg1 : ClangBuiltin<"__builtin_ia32_sha1msg1">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
- def int_x86_sha1msg2 : GCCBuiltin<"__builtin_ia32_sha1msg2">,
+ def int_x86_sha1msg2 : ClangBuiltin<"__builtin_ia32_sha1msg2">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
- def int_x86_sha256rnds2 : GCCBuiltin<"__builtin_ia32_sha256rnds2">,
+ def int_x86_sha256rnds2 : ClangBuiltin<"__builtin_ia32_sha256rnds2">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
- def int_x86_sha256msg1 : GCCBuiltin<"__builtin_ia32_sha256msg1">,
+ def int_x86_sha256msg1 : ClangBuiltin<"__builtin_ia32_sha256msg1">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
- def int_x86_sha256msg2 : GCCBuiltin<"__builtin_ia32_sha256msg2">,
+ def int_x86_sha256msg2 : ClangBuiltin<"__builtin_ia32_sha256msg2">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
}
@@ -4806,17 +4816,17 @@ let TargetPrefix = "x86" in {
// Thread synchronization ops with timer.
let TargetPrefix = "x86" in {
def int_x86_monitorx
- : GCCBuiltin<"__builtin_ia32_monitorx">,
+ : ClangBuiltin<"__builtin_ia32_monitorx">,
Intrinsic<[], [ llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty ], []>;
def int_x86_mwaitx
- : GCCBuiltin<"__builtin_ia32_mwaitx">,
+ : ClangBuiltin<"__builtin_ia32_mwaitx">,
Intrinsic<[], [ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty ], []>;
}
//===----------------------------------------------------------------------===//
// Cache-line zero
let TargetPrefix = "x86" in {
- def int_x86_clzero : GCCBuiltin<"__builtin_ia32_clzero">,
+ def int_x86_clzero : ClangBuiltin<"__builtin_ia32_clzero">,
Intrinsic<[], [llvm_ptr_ty], []>;
}
@@ -4825,11 +4835,11 @@ let TargetPrefix = "x86" in {
let TargetPrefix = "x86" in {
// Write back and invalidate
- def int_x86_wbinvd : GCCBuiltin<"__builtin_ia32_wbinvd">,
+ def int_x86_wbinvd : ClangBuiltin<"__builtin_ia32_wbinvd">,
Intrinsic<[], [], []>;
// Write back no-invalidate
- def int_x86_wbnoinvd : GCCBuiltin<"__builtin_ia32_wbnoinvd">,
+ def int_x86_wbnoinvd : ClangBuiltin<"__builtin_ia32_wbnoinvd">,
Intrinsic<[], [], []>;
}
@@ -4837,18 +4847,18 @@ let TargetPrefix = "x86" in {
// Cache-line demote
let TargetPrefix = "x86" in {
- def int_x86_cldemote : GCCBuiltin<"__builtin_ia32_cldemote">,
+ def int_x86_cldemote : ClangBuiltin<"__builtin_ia32_cldemote">,
Intrinsic<[], [llvm_ptr_ty], []>;
}
//===----------------------------------------------------------------------===//
// Wait and pause enhancements
let TargetPrefix = "x86" in {
- def int_x86_umonitor : GCCBuiltin<"__builtin_ia32_umonitor">,
+ def int_x86_umonitor : ClangBuiltin<"__builtin_ia32_umonitor">,
Intrinsic<[], [llvm_ptr_ty], []>;
- def int_x86_umwait : GCCBuiltin<"__builtin_ia32_umwait">,
+ def int_x86_umwait : ClangBuiltin<"__builtin_ia32_umwait">,
Intrinsic<[llvm_i8_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
- def int_x86_tpause : GCCBuiltin<"__builtin_ia32_tpause">,
+ def int_x86_tpause : ClangBuiltin<"__builtin_ia32_tpause">,
Intrinsic<[llvm_i8_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
}
@@ -4856,11 +4866,11 @@ let TargetPrefix = "x86" in {
// Direct Move Instructions
let TargetPrefix = "x86" in {
- def int_x86_directstore32 : GCCBuiltin<"__builtin_ia32_directstore_u32">,
+ def int_x86_directstore32 : ClangBuiltin<"__builtin_ia32_directstore_u32">,
Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], []>;
- def int_x86_directstore64 : GCCBuiltin<"__builtin_ia32_directstore_u64">,
+ def int_x86_directstore64 : ClangBuiltin<"__builtin_ia32_directstore_u64">,
Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], []>;
- def int_x86_movdir64b : GCCBuiltin<"__builtin_ia32_movdir64b">,
+ def int_x86_movdir64b : ClangBuiltin<"__builtin_ia32_movdir64b">,
Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty], []>;
}
@@ -4868,9 +4878,9 @@ let TargetPrefix = "x86" in {
// PTWrite - Write data to processor trace pocket
let TargetPrefix = "x86" in {
- def int_x86_ptwrite32 : GCCBuiltin<"__builtin_ia32_ptwrite32">,
+ def int_x86_ptwrite32 : ClangBuiltin<"__builtin_ia32_ptwrite32">,
Intrinsic<[], [llvm_i32_ty], []>;
- def int_x86_ptwrite64 : GCCBuiltin<"__builtin_ia32_ptwrite64">,
+ def int_x86_ptwrite64 : ClangBuiltin<"__builtin_ia32_ptwrite64">,
Intrinsic<[], [llvm_i64_ty], []>;
}
@@ -4878,21 +4888,21 @@ let TargetPrefix = "x86" in {
// INVPCID - Invalidate Process-Context Identifier
let TargetPrefix = "x86" in {
- def int_x86_invpcid : GCCBuiltin<"__builtin_ia32_invpcid">,
+ def int_x86_invpcid : ClangBuiltin<"__builtin_ia32_invpcid">,
Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty], []>;
}
let TargetPrefix = "x86" in {
def int_x86_avx512bf16_cvtne2ps2bf16_128:
- GCCBuiltin<"__builtin_ia32_cvtne2ps2bf16_128">,
+ ClangBuiltin<"__builtin_ia32_cvtne2ps2bf16_128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
[IntrNoMem]>;
def int_x86_avx512bf16_cvtne2ps2bf16_256:
- GCCBuiltin<"__builtin_ia32_cvtne2ps2bf16_256">,
+ ClangBuiltin<"__builtin_ia32_cvtne2ps2bf16_256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v8f32_ty, llvm_v8f32_ty],
[IntrNoMem]>;
def int_x86_avx512bf16_cvtne2ps2bf16_512:
- GCCBuiltin<"__builtin_ia32_cvtne2ps2bf16_512">,
+ ClangBuiltin<"__builtin_ia32_cvtne2ps2bf16_512">,
Intrinsic<[llvm_v32i16_ty], [llvm_v16f32_ty, llvm_v16f32_ty],
[IntrNoMem]>;
// Intrinsic must be masked due to it producing less than 128 bits of results.
@@ -4901,21 +4911,21 @@ let TargetPrefix = "x86" in {
[llvm_v4f32_ty, llvm_v8i16_ty, llvm_v4i1_ty],
[IntrNoMem]>;
def int_x86_avx512bf16_cvtneps2bf16_256:
- GCCBuiltin<"__builtin_ia32_cvtneps2bf16_256">,
+ ClangBuiltin<"__builtin_ia32_cvtneps2bf16_256">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8f32_ty], [IntrNoMem]>;
def int_x86_avx512bf16_cvtneps2bf16_512:
- GCCBuiltin<"__builtin_ia32_cvtneps2bf16_512">,
+ ClangBuiltin<"__builtin_ia32_cvtneps2bf16_512">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16f32_ty], [IntrNoMem]>;
def int_x86_avx512bf16_dpbf16ps_128:
- GCCBuiltin<"__builtin_ia32_dpbf16ps_128">,
+ ClangBuiltin<"__builtin_ia32_dpbf16ps_128">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_avx512bf16_dpbf16ps_256:
- GCCBuiltin<"__builtin_ia32_dpbf16ps_256">,
+ ClangBuiltin<"__builtin_ia32_dpbf16ps_256">,
Intrinsic<[llvm_v8f32_ty],
[llvm_v8f32_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
def int_x86_avx512bf16_dpbf16ps_512:
- GCCBuiltin<"__builtin_ia32_dpbf16ps_512">,
+ ClangBuiltin<"__builtin_ia32_dpbf16ps_512">,
Intrinsic<[llvm_v16f32_ty],
[llvm_v16f32_ty, llvm_v16i32_ty, llvm_v16i32_ty], [IntrNoMem]>;
}
@@ -4924,9 +4934,9 @@ let TargetPrefix = "x86" in {
// ENQCMD - Enqueue Stores Instructions
let TargetPrefix = "x86" in {
- def int_x86_enqcmd : GCCBuiltin<"__builtin_ia32_enqcmd">,
+ def int_x86_enqcmd : ClangBuiltin<"__builtin_ia32_enqcmd">,
Intrinsic<[llvm_i8_ty], [llvm_ptr_ty, llvm_ptr_ty], []>;
- def int_x86_enqcmds : GCCBuiltin<"__builtin_ia32_enqcmds">,
+ def int_x86_enqcmds : ClangBuiltin<"__builtin_ia32_enqcmds">,
Intrinsic<[llvm_i8_ty], [llvm_ptr_ty, llvm_ptr_ty], []>;
}
@@ -4934,7 +4944,7 @@ let TargetPrefix = "x86" in {
// SERIALIZE - Serialize instruction fetch and execution
let TargetPrefix = "x86" in {
- def int_x86_serialize : GCCBuiltin<"__builtin_ia32_serialize">,
+ def int_x86_serialize : ClangBuiltin<"__builtin_ia32_serialize">,
Intrinsic<[], [], []>;
}
@@ -4942,16 +4952,16 @@ let TargetPrefix = "x86" in {
// TSXLDTRK - TSX Suspend Load Address Tracking
let TargetPrefix = "x86" in {
- def int_x86_xsusldtrk : GCCBuiltin<"__builtin_ia32_xsusldtrk">,
+ def int_x86_xsusldtrk : ClangBuiltin<"__builtin_ia32_xsusldtrk">,
Intrinsic<[], [], []>;
- def int_x86_xresldtrk : GCCBuiltin<"__builtin_ia32_xresldtrk">,
+ def int_x86_xresldtrk : ClangBuiltin<"__builtin_ia32_xresldtrk">,
Intrinsic<[], [], []>;
}
//===----------------------------------------------------------------------===//
// Key Locker
let TargetPrefix = "x86" in {
- def int_x86_loadiwkey : GCCBuiltin<"__builtin_ia32_loadiwkey">,
+ def int_x86_loadiwkey : ClangBuiltin<"__builtin_ia32_loadiwkey">,
Intrinsic<[], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty],
[]>;
def int_x86_encodekey128 :
@@ -5004,91 +5014,91 @@ let TargetPrefix = "x86" in {
// AMX - Intel AMX extensions
let TargetPrefix = "x86" in {
- def int_x86_ldtilecfg : GCCBuiltin<"__builtin_ia32_tile_loadconfig">,
+ def int_x86_ldtilecfg : ClangBuiltin<"__builtin_ia32_tile_loadconfig">,
Intrinsic<[], [llvm_ptr_ty], []>;
- def int_x86_sttilecfg : GCCBuiltin<"__builtin_ia32_tile_storeconfig">,
+ def int_x86_sttilecfg : ClangBuiltin<"__builtin_ia32_tile_storeconfig">,
Intrinsic<[], [llvm_ptr_ty], []>;
- def int_x86_tilerelease : GCCBuiltin<"__builtin_ia32_tilerelease">,
+ def int_x86_tilerelease : ClangBuiltin<"__builtin_ia32_tilerelease">,
Intrinsic<[], [], []>;
- def int_x86_tilezero : GCCBuiltin<"__builtin_ia32_tilezero">,
+ def int_x86_tilezero : ClangBuiltin<"__builtin_ia32_tilezero">,
Intrinsic<[], [llvm_i8_ty], [ImmArg<ArgIndex<0>>]>;
- def int_x86_tileloadd64 : GCCBuiltin<"__builtin_ia32_tileloadd64">,
+ def int_x86_tileloadd64 : ClangBuiltin<"__builtin_ia32_tileloadd64">,
Intrinsic<[], [llvm_i8_ty, llvm_ptr_ty, llvm_i64_ty],
[ImmArg<ArgIndex<0>>]>;
- def int_x86_tileloaddt164 : GCCBuiltin<"__builtin_ia32_tileloaddt164">,
+ def int_x86_tileloaddt164 : ClangBuiltin<"__builtin_ia32_tileloaddt164">,
Intrinsic<[], [llvm_i8_ty, llvm_ptr_ty, llvm_i64_ty],
[ImmArg<ArgIndex<0>>]>;
- def int_x86_tilestored64 : GCCBuiltin<"__builtin_ia32_tilestored64">,
+ def int_x86_tilestored64 : ClangBuiltin<"__builtin_ia32_tilestored64">,
Intrinsic<[], [llvm_i8_ty, llvm_ptr_ty, llvm_i64_ty],
[ImmArg<ArgIndex<0>>]>;
- def int_x86_tdpbssd : GCCBuiltin<"__builtin_ia32_tdpbssd">,
+ def int_x86_tdpbssd : ClangBuiltin<"__builtin_ia32_tdpbssd">,
Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty],
[ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>,
ImmArg<ArgIndex<2>>]>;
- def int_x86_tdpbsud : GCCBuiltin<"__builtin_ia32_tdpbsud">,
+ def int_x86_tdpbsud : ClangBuiltin<"__builtin_ia32_tdpbsud">,
Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty],
[ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>,
ImmArg<ArgIndex<2>>]>;
- def int_x86_tdpbusd : GCCBuiltin<"__builtin_ia32_tdpbusd">,
+ def int_x86_tdpbusd : ClangBuiltin<"__builtin_ia32_tdpbusd">,
Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty],
[ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>,
ImmArg<ArgIndex<2>>]>;
- def int_x86_tdpbuud : GCCBuiltin<"__builtin_ia32_tdpbuud">,
+ def int_x86_tdpbuud : ClangBuiltin<"__builtin_ia32_tdpbuud">,
Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty],
[ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>,
ImmArg<ArgIndex<2>>]>;
- def int_x86_tdpbf16ps : GCCBuiltin<"__builtin_ia32_tdpbf16ps">,
+ def int_x86_tdpbf16ps : ClangBuiltin<"__builtin_ia32_tdpbf16ps">,
Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty],
[ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>,
ImmArg<ArgIndex<2>>]>;
// AMX - internal intrinsics
def int_x86_ldtilecfg_internal :
- GCCBuiltin<"__builtin_ia32_tile_loadconfig_internal">,
+ ClangBuiltin<"__builtin_ia32_tile_loadconfig_internal">,
Intrinsic<[], [llvm_ptr_ty], []>;
def int_x86_tileloadd64_internal :
- GCCBuiltin<"__builtin_ia32_tileloadd64_internal">,
+ ClangBuiltin<"__builtin_ia32_tileloadd64_internal">,
Intrinsic<[llvm_x86amx_ty],
[llvm_i16_ty, llvm_i16_ty, llvm_ptr_ty, llvm_i64_ty],
[]>;
def int_x86_tileloaddt164_internal :
- GCCBuiltin<"__builtin_ia32_tileloaddt164_internal">,
+ ClangBuiltin<"__builtin_ia32_tileloaddt164_internal">,
Intrinsic<[llvm_x86amx_ty],
[llvm_i16_ty, llvm_i16_ty, llvm_ptr_ty, llvm_i64_ty],
[]>;
def int_x86_tdpbssd_internal :
- GCCBuiltin<"__builtin_ia32_tdpbssd_internal">,
+ ClangBuiltin<"__builtin_ia32_tdpbssd_internal">,
Intrinsic<[llvm_x86amx_ty],
[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty,
llvm_x86amx_ty, llvm_x86amx_ty,
llvm_x86amx_ty], []>;
def int_x86_tdpbsud_internal :
- GCCBuiltin<"__builtin_ia32_tdpbsud_internal">,
+ ClangBuiltin<"__builtin_ia32_tdpbsud_internal">,
Intrinsic<[llvm_x86amx_ty],
[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty,
llvm_x86amx_ty, llvm_x86amx_ty,
llvm_x86amx_ty], []>;
def int_x86_tdpbusd_internal :
- GCCBuiltin<"__builtin_ia32_tdpbusd_internal">,
+ ClangBuiltin<"__builtin_ia32_tdpbusd_internal">,
Intrinsic<[llvm_x86amx_ty],
[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty,
llvm_x86amx_ty, llvm_x86amx_ty,
llvm_x86amx_ty], []>;
def int_x86_tdpbuud_internal :
- GCCBuiltin<"__builtin_ia32_tdpbuud_internal">,
+ ClangBuiltin<"__builtin_ia32_tdpbuud_internal">,
Intrinsic<[llvm_x86amx_ty],
[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty,
llvm_x86amx_ty, llvm_x86amx_ty,
llvm_x86amx_ty], []>;
def int_x86_tilestored64_internal :
- GCCBuiltin<"__builtin_ia32_tilestored64_internal">,
+ ClangBuiltin<"__builtin_ia32_tilestored64_internal">,
Intrinsic<[], [llvm_i16_ty, llvm_i16_ty, llvm_ptr_ty,
llvm_i64_ty, llvm_x86amx_ty], []>;
def int_x86_tilezero_internal :
- GCCBuiltin<"__builtin_ia32_tilezero_internal">,
+ ClangBuiltin<"__builtin_ia32_tilezero_internal">,
Intrinsic<[llvm_x86amx_ty], [llvm_i16_ty, llvm_i16_ty],
[]>;
def int_x86_tdpbf16ps_internal :
- GCCBuiltin<"__builtin_ia32_tdpbf16ps_internal">,
+ ClangBuiltin<"__builtin_ia32_tdpbf16ps_internal">,
Intrinsic<[llvm_x86amx_ty],
[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty,
llvm_x86amx_ty, llvm_x86amx_ty,
@@ -5103,13 +5113,13 @@ let TargetPrefix = "x86" in {
// UINTR - User Level Interrupt
let TargetPrefix = "x86" in {
- def int_x86_clui : GCCBuiltin<"__builtin_ia32_clui">,
+ def int_x86_clui : ClangBuiltin<"__builtin_ia32_clui">,
Intrinsic<[], [], []>;
- def int_x86_stui : GCCBuiltin<"__builtin_ia32_stui">,
+ def int_x86_stui : ClangBuiltin<"__builtin_ia32_stui">,
Intrinsic<[], [], []>;
- def int_x86_testui : GCCBuiltin<"__builtin_ia32_testui">,
+ def int_x86_testui : ClangBuiltin<"__builtin_ia32_testui">,
Intrinsic<[llvm_i8_ty], [], []>;
- def int_x86_senduipi : GCCBuiltin<"__builtin_ia32_senduipi">,
+ def int_x86_senduipi : ClangBuiltin<"__builtin_ia32_senduipi">,
Intrinsic<[], [llvm_i64_ty], []>;
}
@@ -5117,48 +5127,48 @@ let TargetPrefix = "x86" in {
// avx512_fp16: vaddph
let TargetPrefix = "x86" in {
def int_x86_avx512fp16_add_ph_512
- : GCCBuiltin<"__builtin_ia32_addph512">,
+ : ClangBuiltin<"__builtin_ia32_addph512">,
Intrinsic<[ llvm_v32f16_ty ],
[ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
def int_x86_avx512fp16_sub_ph_512
- : GCCBuiltin<"__builtin_ia32_subph512">,
+ : ClangBuiltin<"__builtin_ia32_subph512">,
Intrinsic<[ llvm_v32f16_ty ],
[ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
def int_x86_avx512fp16_mul_ph_512
- : GCCBuiltin<"__builtin_ia32_mulph512">,
+ : ClangBuiltin<"__builtin_ia32_mulph512">,
Intrinsic<[ llvm_v32f16_ty ],
[ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
def int_x86_avx512fp16_div_ph_512
- : GCCBuiltin<"__builtin_ia32_divph512">,
+ : ClangBuiltin<"__builtin_ia32_divph512">,
Intrinsic<[ llvm_v32f16_ty ],
[ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
def int_x86_avx512fp16_max_ph_128
- : GCCBuiltin<"__builtin_ia32_maxph128">,
+ : ClangBuiltin<"__builtin_ia32_maxph128">,
Intrinsic<[ llvm_v8f16_ty ],
[ llvm_v8f16_ty, llvm_v8f16_ty ], [ IntrNoMem ]>;
def int_x86_avx512fp16_max_ph_256
- : GCCBuiltin<"__builtin_ia32_maxph256">,
+ : ClangBuiltin<"__builtin_ia32_maxph256">,
Intrinsic<[ llvm_v16f16_ty ],
[ llvm_v16f16_ty, llvm_v16f16_ty ], [ IntrNoMem ]>;
def int_x86_avx512fp16_max_ph_512
- : GCCBuiltin<"__builtin_ia32_maxph512">,
+ : ClangBuiltin<"__builtin_ia32_maxph512">,
Intrinsic<[ llvm_v32f16_ty ],
[ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
def int_x86_avx512fp16_min_ph_128
- : GCCBuiltin<"__builtin_ia32_minph128">,
+ : ClangBuiltin<"__builtin_ia32_minph128">,
Intrinsic<[ llvm_v8f16_ty ],
[ llvm_v8f16_ty, llvm_v8f16_ty ], [ IntrNoMem ]>;
def int_x86_avx512fp16_min_ph_256
- : GCCBuiltin<"__builtin_ia32_minph256">,
+ : ClangBuiltin<"__builtin_ia32_minph256">,
Intrinsic<[ llvm_v16f16_ty ],
[ llvm_v16f16_ty, llvm_v16f16_ty ], [ IntrNoMem ]>;
def int_x86_avx512fp16_min_ph_512
- : GCCBuiltin<"__builtin_ia32_minph512">,
+ : ClangBuiltin<"__builtin_ia32_minph512">,
Intrinsic<[ llvm_v32f16_ty ],
[ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
@@ -5178,367 +5188,367 @@ let TargetPrefix = "x86" in {
[ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
def int_x86_avx512fp16_mask_add_sh_round
- : GCCBuiltin<"__builtin_ia32_addsh_round_mask">,
+ : ClangBuiltin<"__builtin_ia32_addsh_round_mask">,
Intrinsic<[ llvm_v8f16_ty ],
[ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
def int_x86_avx512fp16_mask_sub_sh_round
- : GCCBuiltin<"__builtin_ia32_subsh_round_mask">,
+ : ClangBuiltin<"__builtin_ia32_subsh_round_mask">,
Intrinsic<[ llvm_v8f16_ty ],
[ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
def int_x86_avx512fp16_mask_mul_sh_round
- : GCCBuiltin<"__builtin_ia32_mulsh_round_mask">,
+ : ClangBuiltin<"__builtin_ia32_mulsh_round_mask">,
Intrinsic<[ llvm_v8f16_ty ],
[ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
def int_x86_avx512fp16_mask_div_sh_round
- : GCCBuiltin<"__builtin_ia32_divsh_round_mask">,
+ : ClangBuiltin<"__builtin_ia32_divsh_round_mask">,
Intrinsic<[ llvm_v8f16_ty ],
[ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
def int_x86_avx512fp16_mask_min_sh_round
- : GCCBuiltin<"__builtin_ia32_minsh_round_mask">,
+ : ClangBuiltin<"__builtin_ia32_minsh_round_mask">,
Intrinsic<[ llvm_v8f16_ty ],
[ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
def int_x86_avx512fp16_mask_max_sh_round
- : GCCBuiltin<"__builtin_ia32_maxsh_round_mask">,
+ : ClangBuiltin<"__builtin_ia32_maxsh_round_mask">,
Intrinsic<[ llvm_v8f16_ty ],
[ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
def int_x86_avx512fp16_mask_cmp_sh
- : GCCBuiltin<"__builtin_ia32_cmpsh_mask">,
+ : ClangBuiltin<"__builtin_ia32_cmpsh_mask">,
Intrinsic<[ llvm_i8_ty ],
[ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i32_ty, llvm_i8_ty,
llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<4>> ]>;
def int_x86_avx512fp16_vcomi_sh
- : GCCBuiltin<"__builtin_ia32_vcomish">,
+ : ClangBuiltin<"__builtin_ia32_vcomish">,
Intrinsic<[ llvm_i32_ty ],
[ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i32_ty, llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>> ]>;
def int_x86_avx512fp16_mask_vcvtph2psx_128
- : GCCBuiltin<"__builtin_ia32_vcvtph2psx128_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvtph2psx128_mask">,
Intrinsic<[ llvm_v4f32_ty ],
[ llvm_v8f16_ty, llvm_v4f32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vcvtph2psx_256
- : GCCBuiltin<"__builtin_ia32_vcvtph2psx256_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvtph2psx256_mask">,
Intrinsic<[ llvm_v8f32_ty ],
[ llvm_v8f16_ty, llvm_v8f32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vcvtph2psx_512
- : GCCBuiltin<"__builtin_ia32_vcvtph2psx512_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvtph2psx512_mask">,
Intrinsic<[ llvm_v16f32_ty ],
[ llvm_v16f16_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
def int_x86_avx512fp16_mask_vcvtps2phx_128
- : GCCBuiltin<"__builtin_ia32_vcvtps2phx128_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvtps2phx128_mask">,
Intrinsic<[ llvm_v8f16_ty ],
[ llvm_v4f32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vcvtps2phx_256
- : GCCBuiltin<"__builtin_ia32_vcvtps2phx256_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvtps2phx256_mask">,
Intrinsic<[ llvm_v8f16_ty ],
[ llvm_v8f32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vcvtps2phx_512
- : GCCBuiltin<"__builtin_ia32_vcvtps2phx512_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvtps2phx512_mask">,
Intrinsic<[ llvm_v16f16_ty ],
[ llvm_v16f32_ty, llvm_v16f16_ty, llvm_i16_ty, llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
def int_x86_avx512fp16_mask_vcvtpd2ph_128
- : GCCBuiltin<"__builtin_ia32_vcvtpd2ph128_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvtpd2ph128_mask">,
Intrinsic<[ llvm_v8f16_ty ],
[ llvm_v2f64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vcvtpd2ph_256
- : GCCBuiltin<"__builtin_ia32_vcvtpd2ph256_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvtpd2ph256_mask">,
Intrinsic<[ llvm_v8f16_ty ],
[ llvm_v4f64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vcvtpd2ph_512
- : GCCBuiltin<"__builtin_ia32_vcvtpd2ph512_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvtpd2ph512_mask">,
Intrinsic<[ llvm_v8f16_ty ],
[ llvm_v8f64_ty, llvm_v8f16_ty, llvm_i8_ty, llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
def int_x86_avx512fp16_mask_vcvtph2pd_128
- : GCCBuiltin<"__builtin_ia32_vcvtph2pd128_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvtph2pd128_mask">,
Intrinsic<[ llvm_v2f64_ty ],
[ llvm_v8f16_ty, llvm_v2f64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vcvtph2pd_256
- : GCCBuiltin<"__builtin_ia32_vcvtph2pd256_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvtph2pd256_mask">,
Intrinsic<[ llvm_v4f64_ty ],
[ llvm_v8f16_ty, llvm_v4f64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vcvtph2pd_512
- : GCCBuiltin<"__builtin_ia32_vcvtph2pd512_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvtph2pd512_mask">,
Intrinsic<[ llvm_v8f64_ty ],
[ llvm_v8f16_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
def int_x86_avx512fp16_mask_vcvtsh2ss_round
- : GCCBuiltin<"__builtin_ia32_vcvtsh2ss_round_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvtsh2ss_round_mask">,
Intrinsic<[ llvm_v4f32_ty ],
[ llvm_v4f32_ty, llvm_v8f16_ty, llvm_v4f32_ty, llvm_i8_ty,
llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
def int_x86_avx512fp16_mask_vcvtss2sh_round
- : GCCBuiltin<"__builtin_ia32_vcvtss2sh_round_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvtss2sh_round_mask">,
Intrinsic<[ llvm_v8f16_ty ],
[ llvm_v8f16_ty, llvm_v4f32_ty, llvm_v8f16_ty, llvm_i8_ty,
llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
def int_x86_avx512fp16_mask_vcvtsd2sh_round
- : GCCBuiltin<"__builtin_ia32_vcvtsd2sh_round_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvtsd2sh_round_mask">,
Intrinsic<[ llvm_v8f16_ty ],
[ llvm_v8f16_ty, llvm_v2f64_ty, llvm_v8f16_ty, llvm_i8_ty,
llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
def int_x86_avx512fp16_mask_vcvtsh2sd_round
- : GCCBuiltin<"__builtin_ia32_vcvtsh2sd_round_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvtsh2sd_round_mask">,
Intrinsic<[ llvm_v2f64_ty ],
[ llvm_v2f64_ty, llvm_v8f16_ty, llvm_v2f64_ty, llvm_i8_ty,
llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
def int_x86_avx512fp16_mask_vcvtph2w_128
- : GCCBuiltin<"__builtin_ia32_vcvtph2w128_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvtph2w128_mask">,
Intrinsic<[ llvm_v8i16_ty ],
[ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vcvtph2w_256
- : GCCBuiltin<"__builtin_ia32_vcvtph2w256_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvtph2w256_mask">,
Intrinsic<[ llvm_v16i16_ty ],
[ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ],
[ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vcvtph2w_512
- : GCCBuiltin<"__builtin_ia32_vcvtph2w512_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvtph2w512_mask">,
Intrinsic<[ llvm_v32i16_ty ],
[ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
def int_x86_avx512fp16_mask_vcvttph2w_128
- : GCCBuiltin<"__builtin_ia32_vcvttph2w128_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvttph2w128_mask">,
Intrinsic<[ llvm_v8i16_ty ],
[ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vcvttph2w_256
- : GCCBuiltin<"__builtin_ia32_vcvttph2w256_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvttph2w256_mask">,
Intrinsic<[ llvm_v16i16_ty ],
[ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ],
[ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vcvttph2w_512
- : GCCBuiltin<"__builtin_ia32_vcvttph2w512_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvttph2w512_mask">,
Intrinsic<[ llvm_v32i16_ty ],
[ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
def int_x86_avx512fp16_mask_vcvtph2uw_128
- : GCCBuiltin<"__builtin_ia32_vcvtph2uw128_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvtph2uw128_mask">,
Intrinsic<[ llvm_v8i16_ty ],
[ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vcvtph2uw_256
- : GCCBuiltin<"__builtin_ia32_vcvtph2uw256_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvtph2uw256_mask">,
Intrinsic<[ llvm_v16i16_ty ],
[ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ],
[ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vcvtph2uw_512
- : GCCBuiltin<"__builtin_ia32_vcvtph2uw512_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvtph2uw512_mask">,
Intrinsic<[ llvm_v32i16_ty ],
[ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
def int_x86_avx512fp16_mask_vcvttph2uw_128
- : GCCBuiltin<"__builtin_ia32_vcvttph2uw128_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvttph2uw128_mask">,
Intrinsic<[ llvm_v8i16_ty ],
[ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vcvttph2uw_256
- : GCCBuiltin<"__builtin_ia32_vcvttph2uw256_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvttph2uw256_mask">,
Intrinsic<[ llvm_v16i16_ty ],
[ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ],
[ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vcvttph2uw_512
- : GCCBuiltin<"__builtin_ia32_vcvttph2uw512_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvttph2uw512_mask">,
Intrinsic<[ llvm_v32i16_ty ],
[ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
def int_x86_avx512fp16_mask_vcvtph2dq_128
- : GCCBuiltin<"__builtin_ia32_vcvtph2dq128_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvtph2dq128_mask">,
Intrinsic<[ llvm_v4i32_ty ],
[ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vcvtph2dq_256
- : GCCBuiltin<"__builtin_ia32_vcvtph2dq256_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvtph2dq256_mask">,
Intrinsic<[ llvm_v8i32_ty ],
[ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vcvtph2dq_512
- : GCCBuiltin<"__builtin_ia32_vcvtph2dq512_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvtph2dq512_mask">,
Intrinsic<[ llvm_v16i32_ty ],
[ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
def int_x86_avx512fp16_mask_vcvtph2udq_128
- : GCCBuiltin<"__builtin_ia32_vcvtph2udq128_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvtph2udq128_mask">,
Intrinsic<[ llvm_v4i32_ty ],
[ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vcvtph2udq_256
- : GCCBuiltin<"__builtin_ia32_vcvtph2udq256_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvtph2udq256_mask">,
Intrinsic<[ llvm_v8i32_ty ],
[ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vcvtph2udq_512
- : GCCBuiltin<"__builtin_ia32_vcvtph2udq512_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvtph2udq512_mask">,
Intrinsic<[ llvm_v16i32_ty ],
[ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
def int_x86_avx512fp16_mask_vcvtdq2ph_128
- : GCCBuiltin<"__builtin_ia32_vcvtdq2ph128_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvtdq2ph128_mask">,
Intrinsic<[ llvm_v8f16_ty ],
[ llvm_v4i32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vcvtudq2ph_128
- : GCCBuiltin<"__builtin_ia32_vcvtudq2ph128_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvtudq2ph128_mask">,
Intrinsic<[ llvm_v8f16_ty ],
[ llvm_v4i32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vcvttph2dq_128
- : GCCBuiltin<"__builtin_ia32_vcvttph2dq128_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvttph2dq128_mask">,
Intrinsic<[ llvm_v4i32_ty ],
[ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vcvttph2dq_256
- : GCCBuiltin<"__builtin_ia32_vcvttph2dq256_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvttph2dq256_mask">,
Intrinsic<[ llvm_v8i32_ty ],
[ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vcvttph2dq_512
- : GCCBuiltin<"__builtin_ia32_vcvttph2dq512_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvttph2dq512_mask">,
Intrinsic<[ llvm_v16i32_ty ],
[ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
def int_x86_avx512fp16_mask_vcvttph2udq_128
- : GCCBuiltin<"__builtin_ia32_vcvttph2udq128_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvttph2udq128_mask">,
Intrinsic<[ llvm_v4i32_ty ],
[ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vcvttph2udq_256
- : GCCBuiltin<"__builtin_ia32_vcvttph2udq256_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvttph2udq256_mask">,
Intrinsic<[ llvm_v8i32_ty ],
[ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vcvttph2udq_512
- : GCCBuiltin<"__builtin_ia32_vcvttph2udq512_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvttph2udq512_mask">,
Intrinsic<[ llvm_v16i32_ty ],
[ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
def int_x86_avx512fp16_mask_vcvtqq2ph_128
- : GCCBuiltin<"__builtin_ia32_vcvtqq2ph128_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvtqq2ph128_mask">,
Intrinsic<[ llvm_v8f16_ty ],
[ llvm_v2i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vcvtqq2ph_256
- : GCCBuiltin<"__builtin_ia32_vcvtqq2ph256_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvtqq2ph256_mask">,
Intrinsic<[ llvm_v8f16_ty ],
[ llvm_v4i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vcvtph2qq_128
- : GCCBuiltin<"__builtin_ia32_vcvtph2qq128_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvtph2qq128_mask">,
Intrinsic<[ llvm_v2i64_ty ],
[ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vcvtph2qq_256
- : GCCBuiltin<"__builtin_ia32_vcvtph2qq256_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvtph2qq256_mask">,
Intrinsic<[ llvm_v4i64_ty ],
[ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vcvtph2qq_512
- : GCCBuiltin<"__builtin_ia32_vcvtph2qq512_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvtph2qq512_mask">,
Intrinsic<[ llvm_v8i64_ty ],
[ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
def int_x86_avx512fp16_mask_vcvtuqq2ph_128
- : GCCBuiltin<"__builtin_ia32_vcvtuqq2ph128_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvtuqq2ph128_mask">,
Intrinsic<[ llvm_v8f16_ty ],
[ llvm_v2i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vcvtuqq2ph_256
- : GCCBuiltin<"__builtin_ia32_vcvtuqq2ph256_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvtuqq2ph256_mask">,
Intrinsic<[ llvm_v8f16_ty ],
[ llvm_v4i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vcvtph2uqq_128
- : GCCBuiltin<"__builtin_ia32_vcvtph2uqq128_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvtph2uqq128_mask">,
Intrinsic<[ llvm_v2i64_ty ],
[ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vcvtph2uqq_256
- : GCCBuiltin<"__builtin_ia32_vcvtph2uqq256_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvtph2uqq256_mask">,
Intrinsic<[ llvm_v4i64_ty ],
[ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vcvtph2uqq_512
- : GCCBuiltin<"__builtin_ia32_vcvtph2uqq512_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvtph2uqq512_mask">,
Intrinsic<[ llvm_v8i64_ty ],
[ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
def int_x86_avx512fp16_mask_vcvttph2qq_128
- : GCCBuiltin<"__builtin_ia32_vcvttph2qq128_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvttph2qq128_mask">,
Intrinsic<[ llvm_v2i64_ty ],
[ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vcvttph2qq_256
- : GCCBuiltin<"__builtin_ia32_vcvttph2qq256_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvttph2qq256_mask">,
Intrinsic<[ llvm_v4i64_ty ],
[ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vcvttph2qq_512
- : GCCBuiltin<"__builtin_ia32_vcvttph2qq512_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvttph2qq512_mask">,
Intrinsic<[ llvm_v8i64_ty ],
[ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
def int_x86_avx512fp16_mask_vcvttph2uqq_128
- : GCCBuiltin<"__builtin_ia32_vcvttph2uqq128_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvttph2uqq128_mask">,
Intrinsic<[ llvm_v2i64_ty ],
[ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vcvttph2uqq_256
- : GCCBuiltin<"__builtin_ia32_vcvttph2uqq256_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvttph2uqq256_mask">,
Intrinsic<[ llvm_v4i64_ty ],
[ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vcvttph2uqq_512
- : GCCBuiltin<"__builtin_ia32_vcvttph2uqq512_mask">,
+ : ClangBuiltin<"__builtin_ia32_vcvttph2uqq512_mask">,
Intrinsic<[ llvm_v8i64_ty ],
[ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
def int_x86_avx512fp16_vcvtsh2si32
- : GCCBuiltin<"__builtin_ia32_vcvtsh2si32">,
+ : ClangBuiltin<"__builtin_ia32_vcvtsh2si32">,
Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
def int_x86_avx512fp16_vcvtsh2usi32
- : GCCBuiltin<"__builtin_ia32_vcvtsh2usi32">,
+ : ClangBuiltin<"__builtin_ia32_vcvtsh2usi32">,
Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
def int_x86_avx512fp16_vcvtsh2si64
- : GCCBuiltin<"__builtin_ia32_vcvtsh2si64">,
+ : ClangBuiltin<"__builtin_ia32_vcvtsh2si64">,
Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
def int_x86_avx512fp16_vcvtsh2usi64
- : GCCBuiltin<"__builtin_ia32_vcvtsh2usi64">,
+ : ClangBuiltin<"__builtin_ia32_vcvtsh2usi64">,
Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
def int_x86_avx512fp16_vcvtusi2sh
- : GCCBuiltin<"__builtin_ia32_vcvtusi2sh">,
+ : ClangBuiltin<"__builtin_ia32_vcvtusi2sh">,
Intrinsic<[ llvm_v8f16_ty ],
[ llvm_v8f16_ty, llvm_i32_ty, llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
def int_x86_avx512fp16_vcvtusi642sh
- : GCCBuiltin<"__builtin_ia32_vcvtusi642sh">,
+ : ClangBuiltin<"__builtin_ia32_vcvtusi642sh">,
Intrinsic<[ llvm_v8f16_ty ],
[ llvm_v8f16_ty, llvm_i64_ty, llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
def int_x86_avx512fp16_vcvtsi2sh
- : GCCBuiltin<"__builtin_ia32_vcvtsi2sh">,
+ : ClangBuiltin<"__builtin_ia32_vcvtsi2sh">,
Intrinsic<[ llvm_v8f16_ty ],
[ llvm_v8f16_ty, llvm_i32_ty, llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
def int_x86_avx512fp16_vcvtsi642sh
- : GCCBuiltin<"__builtin_ia32_vcvtsi642sh">,
+ : ClangBuiltin<"__builtin_ia32_vcvtsi642sh">,
Intrinsic<[ llvm_v8f16_ty ],
[ llvm_v8f16_ty, llvm_i64_ty, llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
def int_x86_avx512fp16_vcvttsh2si32
- : GCCBuiltin<"__builtin_ia32_vcvttsh2si32">,
+ : ClangBuiltin<"__builtin_ia32_vcvttsh2si32">,
Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
def int_x86_avx512fp16_vcvttsh2si64
- : GCCBuiltin<"__builtin_ia32_vcvttsh2si64">,
+ : ClangBuiltin<"__builtin_ia32_vcvttsh2si64">,
Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
def int_x86_avx512fp16_vcvttsh2usi32
- : GCCBuiltin<"__builtin_ia32_vcvttsh2usi32">,
+ : ClangBuiltin<"__builtin_ia32_vcvttsh2usi32">,
Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
def int_x86_avx512fp16_vcvttsh2usi64
- : GCCBuiltin<"__builtin_ia32_vcvttsh2usi64">,
+ : ClangBuiltin<"__builtin_ia32_vcvttsh2usi64">,
Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
@@ -5551,61 +5561,61 @@ let TargetPrefix = "x86" in {
llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
def int_x86_avx512fp16_mask_rsqrt_ph_128
- : GCCBuiltin<"__builtin_ia32_rsqrtph128_mask">,
+ : ClangBuiltin<"__builtin_ia32_rsqrtph128_mask">,
Intrinsic<[ llvm_v8f16_ty ],
[ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
def int_x86_avx512fp16_mask_rsqrt_ph_256
- : GCCBuiltin<"__builtin_ia32_rsqrtph256_mask">,
+ : ClangBuiltin<"__builtin_ia32_rsqrtph256_mask">,
Intrinsic<[ llvm_v16f16_ty ],
[ llvm_v16f16_ty, llvm_v16f16_ty, llvm_i16_ty ],
[ IntrNoMem ]>;
def int_x86_avx512fp16_mask_rsqrt_ph_512
- : GCCBuiltin<"__builtin_ia32_rsqrtph512_mask">,
+ : ClangBuiltin<"__builtin_ia32_rsqrtph512_mask">,
Intrinsic<[ llvm_v32f16_ty ],
[ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ],
[ IntrNoMem ]>;
def int_x86_avx512fp16_mask_rsqrt_sh
- : GCCBuiltin<"__builtin_ia32_rsqrtsh_mask">,
+ : ClangBuiltin<"__builtin_ia32_rsqrtsh_mask">,
Intrinsic<[ llvm_v8f16_ty ],
[ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ],
[ IntrNoMem ]>;
def int_x86_avx512fp16_mask_rcp_ph_128
- : GCCBuiltin<"__builtin_ia32_rcpph128_mask">,
+ : ClangBuiltin<"__builtin_ia32_rcpph128_mask">,
Intrinsic<[ llvm_v8f16_ty ],
[ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
def int_x86_avx512fp16_mask_rcp_ph_256
- : GCCBuiltin<"__builtin_ia32_rcpph256_mask">,
+ : ClangBuiltin<"__builtin_ia32_rcpph256_mask">,
Intrinsic<[ llvm_v16f16_ty ],
[ llvm_v16f16_ty, llvm_v16f16_ty, llvm_i16_ty ],
[ IntrNoMem ]>;
def int_x86_avx512fp16_mask_rcp_ph_512
- : GCCBuiltin<"__builtin_ia32_rcpph512_mask">,
+ : ClangBuiltin<"__builtin_ia32_rcpph512_mask">,
Intrinsic<[ llvm_v32f16_ty ],
[ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ],
[ IntrNoMem ]>;
def int_x86_avx512fp16_mask_rcp_sh
- : GCCBuiltin<"__builtin_ia32_rcpsh_mask">,
+ : ClangBuiltin<"__builtin_ia32_rcpsh_mask">,
Intrinsic<[ llvm_v8f16_ty ],
[ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ],
[ IntrNoMem ]>;
def int_x86_avx512fp16_mask_reduce_ph_128
- : GCCBuiltin<"__builtin_ia32_reduceph128_mask">,
+ : ClangBuiltin<"__builtin_ia32_reduceph128_mask">,
Intrinsic<[ llvm_v8f16_ty ],
[ llvm_v8f16_ty, llvm_i32_ty, llvm_v8f16_ty, llvm_i8_ty ],
[ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
def int_x86_avx512fp16_mask_reduce_ph_256
- : GCCBuiltin<"__builtin_ia32_reduceph256_mask">,
+ : ClangBuiltin<"__builtin_ia32_reduceph256_mask">,
Intrinsic<[ llvm_v16f16_ty ],
[ llvm_v16f16_ty, llvm_i32_ty, llvm_v16f16_ty, llvm_i16_ty ],
[ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
def int_x86_avx512fp16_mask_reduce_ph_512
- : GCCBuiltin<"__builtin_ia32_reduceph512_mask">,
+ : ClangBuiltin<"__builtin_ia32_reduceph512_mask">,
Intrinsic<[ llvm_v32f16_ty ],
[ llvm_v32f16_ty, llvm_i32_ty, llvm_v32f16_ty, llvm_i32_ty,
llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>> ]>;
def int_x86_avx512fp16_mask_reduce_sh
- : GCCBuiltin<"__builtin_ia32_reducesh_mask">,
+ : ClangBuiltin<"__builtin_ia32_reducesh_mask">,
Intrinsic<[ llvm_v8f16_ty ],
[ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
llvm_i32_ty, llvm_i32_ty ],
@@ -5620,91 +5630,91 @@ let TargetPrefix = "x86" in {
: Intrinsic<[ llvm_v32i1_ty ], [ llvm_v32f16_ty, llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
def int_x86_avx512fp16_mask_fpclass_sh
- : GCCBuiltin<"__builtin_ia32_fpclasssh_mask">,
+ : ClangBuiltin<"__builtin_ia32_fpclasssh_mask">,
Intrinsic<[ llvm_i8_ty ], [ llvm_v8f16_ty, llvm_i32_ty, llvm_i8_ty ],
[ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
def int_x86_avx512fp16_mask_getexp_ph_128
- : GCCBuiltin<"__builtin_ia32_getexpph128_mask">,
+ : ClangBuiltin<"__builtin_ia32_getexpph128_mask">,
Intrinsic<[ llvm_v8f16_ty ],
[ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
def int_x86_avx512fp16_mask_getexp_ph_256
- : GCCBuiltin<"__builtin_ia32_getexpph256_mask">,
+ : ClangBuiltin<"__builtin_ia32_getexpph256_mask">,
Intrinsic<[ llvm_v16f16_ty ],
[ llvm_v16f16_ty, llvm_v16f16_ty, llvm_i16_ty ],
[ IntrNoMem ]>;
def int_x86_avx512fp16_mask_getexp_ph_512
- : GCCBuiltin<"__builtin_ia32_getexpph512_mask">,
+ : ClangBuiltin<"__builtin_ia32_getexpph512_mask">,
Intrinsic<[ llvm_v32f16_ty ],
[ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty, llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
def int_x86_avx512fp16_mask_getexp_sh
- : GCCBuiltin<"__builtin_ia32_getexpsh128_round_mask">,
+ : ClangBuiltin<"__builtin_ia32_getexpsh128_round_mask">,
Intrinsic<[ llvm_v8f16_ty ],
[ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
def int_x86_avx512fp16_mask_getmant_ph_128
- : GCCBuiltin<"__builtin_ia32_getmantph128_mask">,
+ : ClangBuiltin<"__builtin_ia32_getmantph128_mask">,
Intrinsic<[ llvm_v8f16_ty ],
[ llvm_v8f16_ty, llvm_i32_ty, llvm_v8f16_ty, llvm_i8_ty ],
[ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
def int_x86_avx512fp16_mask_getmant_ph_256
- : GCCBuiltin<"__builtin_ia32_getmantph256_mask">,
+ : ClangBuiltin<"__builtin_ia32_getmantph256_mask">,
Intrinsic<[ llvm_v16f16_ty ],
[ llvm_v16f16_ty, llvm_i32_ty, llvm_v16f16_ty, llvm_i16_ty ],
[ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
def int_x86_avx512fp16_mask_getmant_ph_512
- : GCCBuiltin<"__builtin_ia32_getmantph512_mask">,
+ : ClangBuiltin<"__builtin_ia32_getmantph512_mask">,
Intrinsic<[ llvm_v32f16_ty ],
[ llvm_v32f16_ty, llvm_i32_ty, llvm_v32f16_ty, llvm_i32_ty,
llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>> ]>;
def int_x86_avx512fp16_mask_getmant_sh
- : GCCBuiltin<"__builtin_ia32_getmantsh_round_mask">,
+ : ClangBuiltin<"__builtin_ia32_getmantsh_round_mask">,
Intrinsic<[ llvm_v8f16_ty ],
[ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i32_ty, llvm_v8f16_ty,
llvm_i8_ty, llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>> ]>;
def int_x86_avx512fp16_mask_rndscale_ph_128
- : GCCBuiltin<"__builtin_ia32_rndscaleph_128_mask">,
+ : ClangBuiltin<"__builtin_ia32_rndscaleph_128_mask">,
Intrinsic<[ llvm_v8f16_ty ],
[ llvm_v8f16_ty, llvm_i32_ty, llvm_v8f16_ty, llvm_i8_ty ],
[ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
def int_x86_avx512fp16_mask_rndscale_ph_256
- : GCCBuiltin<"__builtin_ia32_rndscaleph_256_mask">,
+ : ClangBuiltin<"__builtin_ia32_rndscaleph_256_mask">,
Intrinsic<[ llvm_v16f16_ty ],
[ llvm_v16f16_ty, llvm_i32_ty, llvm_v16f16_ty, llvm_i16_ty ],
[ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
def int_x86_avx512fp16_mask_rndscale_ph_512
- : GCCBuiltin<"__builtin_ia32_rndscaleph_mask">,
+ : ClangBuiltin<"__builtin_ia32_rndscaleph_mask">,
Intrinsic<[ llvm_v32f16_ty ],
[ llvm_v32f16_ty, llvm_i32_ty, llvm_v32f16_ty, llvm_i32_ty,
llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>> ]>;
def int_x86_avx512fp16_mask_rndscale_sh
- : GCCBuiltin<"__builtin_ia32_rndscalesh_round_mask">,
+ : ClangBuiltin<"__builtin_ia32_rndscalesh_round_mask">,
Intrinsic<[ llvm_v8f16_ty ],
[ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
llvm_i32_ty, llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>> ]>;
def int_x86_avx512fp16_mask_scalef_ph_128
- : GCCBuiltin<"__builtin_ia32_scalefph128_mask">,
+ : ClangBuiltin<"__builtin_ia32_scalefph128_mask">,
Intrinsic<[ llvm_v8f16_ty ],
[ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ],
[ IntrNoMem ]>;
def int_x86_avx512fp16_mask_scalef_ph_256
- : GCCBuiltin<"__builtin_ia32_scalefph256_mask">,
+ : ClangBuiltin<"__builtin_ia32_scalefph256_mask">,
Intrinsic<[ llvm_v16f16_ty ],
[ llvm_v16f16_ty, llvm_v16f16_ty, llvm_v16f16_ty, llvm_i16_ty ],
[ IntrNoMem ]>;
def int_x86_avx512fp16_mask_scalef_ph_512
- : GCCBuiltin<"__builtin_ia32_scalefph512_mask">,
+ : ClangBuiltin<"__builtin_ia32_scalefph512_mask">,
Intrinsic<[ llvm_v32f16_ty ],
[ llvm_v32f16_ty, llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty,
llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
def int_x86_avx512fp16_mask_scalef_sh
- : GCCBuiltin<"__builtin_ia32_scalefsh_round_mask">,
+ : ClangBuiltin<"__builtin_ia32_scalefsh_round_mask">,
Intrinsic<[ llvm_v8f16_ty ],
[ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
llvm_i32_ty ],
@@ -5715,12 +5725,12 @@ let TargetPrefix = "x86" in {
[ llvm_v32f16_ty, llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
def int_x86_avx512fp16_vfmaddsub_ph_128
- : GCCBuiltin<"__builtin_ia32_vfmaddsubph">,
+ : ClangBuiltin<"__builtin_ia32_vfmaddsubph">,
Intrinsic<[ llvm_v8f16_ty ],
[ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty ],
[ IntrNoMem ]>;
def int_x86_avx512fp16_vfmaddsub_ph_256
- : GCCBuiltin<"__builtin_ia32_vfmaddsubph256">,
+ : ClangBuiltin<"__builtin_ia32_vfmaddsubph256">,
Intrinsic<[ llvm_v16f16_ty ],
[ llvm_v16f16_ty, llvm_v16f16_ty, llvm_v16f16_ty ],
[ IntrNoMem ]>;
@@ -5734,133 +5744,133 @@ let TargetPrefix = "x86" in {
[ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
def int_x86_avx512fp16_mask_vfcmadd_cph_128
- : GCCBuiltin<"__builtin_ia32_vfcmaddcph128_mask">,
+ : ClangBuiltin<"__builtin_ia32_vfcmaddcph128_mask">,
Intrinsic<[ llvm_v4f32_ty ],
[ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ],
[ IntrNoMem ]>;
def int_x86_avx512fp16_maskz_vfcmadd_cph_128
- : GCCBuiltin<"__builtin_ia32_vfcmaddcph128_maskz">,
+ : ClangBuiltin<"__builtin_ia32_vfcmaddcph128_maskz">,
Intrinsic<[ llvm_v4f32_ty ],
[ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ],
[ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vfcmadd_cph_256
- : GCCBuiltin<"__builtin_ia32_vfcmaddcph256_mask">,
+ : ClangBuiltin<"__builtin_ia32_vfcmaddcph256_mask">,
Intrinsic<[ llvm_v8f32_ty ],
[ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ],
[ IntrNoMem ]>;
def int_x86_avx512fp16_maskz_vfcmadd_cph_256
- : GCCBuiltin<"__builtin_ia32_vfcmaddcph256_maskz">,
+ : ClangBuiltin<"__builtin_ia32_vfcmaddcph256_maskz">,
Intrinsic<[ llvm_v8f32_ty ],
[ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ],
[ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vfcmadd_cph_512
- : GCCBuiltin<"__builtin_ia32_vfcmaddcph512_mask3">,
+ : ClangBuiltin<"__builtin_ia32_vfcmaddcph512_mask3">,
Intrinsic<[ llvm_v16f32_ty ],
[ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
def int_x86_avx512fp16_maskz_vfcmadd_cph_512
- : GCCBuiltin<"__builtin_ia32_vfcmaddcph512_maskz">,
+ : ClangBuiltin<"__builtin_ia32_vfcmaddcph512_maskz">,
Intrinsic<[ llvm_v16f32_ty ],
[ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
def int_x86_avx512fp16_mask_vfmadd_cph_128
- : GCCBuiltin<"__builtin_ia32_vfmaddcph128_mask">,
+ : ClangBuiltin<"__builtin_ia32_vfmaddcph128_mask">,
Intrinsic<[ llvm_v4f32_ty ],
[ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ],
[ IntrNoMem ]>;
def int_x86_avx512fp16_maskz_vfmadd_cph_128
- : GCCBuiltin<"__builtin_ia32_vfmaddcph128_maskz">,
+ : ClangBuiltin<"__builtin_ia32_vfmaddcph128_maskz">,
Intrinsic<[ llvm_v4f32_ty ],
[ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ],
[ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vfmadd_cph_256
- : GCCBuiltin<"__builtin_ia32_vfmaddcph256_mask">,
+ : ClangBuiltin<"__builtin_ia32_vfmaddcph256_mask">,
Intrinsic<[ llvm_v8f32_ty ],
[ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ],
[ IntrNoMem ]>;
def int_x86_avx512fp16_maskz_vfmadd_cph_256
- : GCCBuiltin<"__builtin_ia32_vfmaddcph256_maskz">,
+ : ClangBuiltin<"__builtin_ia32_vfmaddcph256_maskz">,
Intrinsic<[ llvm_v8f32_ty ],
[ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ],
[ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vfmadd_cph_512
- : GCCBuiltin<"__builtin_ia32_vfmaddcph512_mask3">,
+ : ClangBuiltin<"__builtin_ia32_vfmaddcph512_mask3">,
Intrinsic<[ llvm_v16f32_ty ],
[ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
def int_x86_avx512fp16_maskz_vfmadd_cph_512
- : GCCBuiltin<"__builtin_ia32_vfmaddcph512_maskz">,
+ : ClangBuiltin<"__builtin_ia32_vfmaddcph512_maskz">,
Intrinsic<[ llvm_v16f32_ty ],
[ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
def int_x86_avx512fp16_mask_vfmadd_csh
- : GCCBuiltin<"__builtin_ia32_vfmaddcsh_mask">,
+ : ClangBuiltin<"__builtin_ia32_vfmaddcsh_mask">,
Intrinsic<[ llvm_v4f32_ty ],
[ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty,
llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
def int_x86_avx512fp16_maskz_vfmadd_csh
- : GCCBuiltin<"__builtin_ia32_vfmaddcsh_maskz">,
+ : ClangBuiltin<"__builtin_ia32_vfmaddcsh_maskz">,
Intrinsic<[ llvm_v4f32_ty ],
[ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty,
llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
def int_x86_avx512fp16_mask_vfcmadd_csh
- : GCCBuiltin<"__builtin_ia32_vfcmaddcsh_mask">,
+ : ClangBuiltin<"__builtin_ia32_vfcmaddcsh_mask">,
Intrinsic<[ llvm_v4f32_ty ],
[ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty,
llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
def int_x86_avx512fp16_maskz_vfcmadd_csh
- : GCCBuiltin<"__builtin_ia32_vfcmaddcsh_maskz">,
+ : ClangBuiltin<"__builtin_ia32_vfcmaddcsh_maskz">,
Intrinsic<[ llvm_v4f32_ty ],
[ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty,
llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
def int_x86_avx512fp16_mask_vfmul_cph_128
- : GCCBuiltin<"__builtin_ia32_vfmulcph128_mask">,
+ : ClangBuiltin<"__builtin_ia32_vfmulcph128_mask">,
Intrinsic<[ llvm_v4f32_ty ],
[ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ],
[ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vfcmul_cph_128
- : GCCBuiltin<"__builtin_ia32_vfcmulcph128_mask">,
+ : ClangBuiltin<"__builtin_ia32_vfcmulcph128_mask">,
Intrinsic<[ llvm_v4f32_ty ],
[ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ],
[ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vfmul_cph_256
- : GCCBuiltin<"__builtin_ia32_vfmulcph256_mask">,
+ : ClangBuiltin<"__builtin_ia32_vfmulcph256_mask">,
Intrinsic<[ llvm_v8f32_ty ],
[ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ],
[ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vfcmul_cph_256
- : GCCBuiltin<"__builtin_ia32_vfcmulcph256_mask">,
+ : ClangBuiltin<"__builtin_ia32_vfcmulcph256_mask">,
Intrinsic<[ llvm_v8f32_ty ],
[ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ],
[ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vfmul_cph_512
- : GCCBuiltin<"__builtin_ia32_vfmulcph512_mask">,
+ : ClangBuiltin<"__builtin_ia32_vfmulcph512_mask">,
Intrinsic<[ llvm_v16f32_ty ],
[ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
def int_x86_avx512fp16_mask_vfcmul_cph_512
- : GCCBuiltin<"__builtin_ia32_vfcmulcph512_mask">,
+ : ClangBuiltin<"__builtin_ia32_vfcmulcph512_mask">,
Intrinsic<[ llvm_v16f32_ty ],
[ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
def int_x86_avx512fp16_mask_vfmul_csh
- : GCCBuiltin<"__builtin_ia32_vfmulcsh_mask">,
+ : ClangBuiltin<"__builtin_ia32_vfmulcsh_mask">,
Intrinsic<[ llvm_v4f32_ty ],
[ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty,
llvm_i32_ty ],
[ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
def int_x86_avx512fp16_mask_vfcmul_csh
- : GCCBuiltin<"__builtin_ia32_vfcmulcsh_mask">,
+ : ClangBuiltin<"__builtin_ia32_vfcmulcsh_mask">,
Intrinsic<[ llvm_v4f32_ty ],
[ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty,
llvm_i32_ty ],
diff --git a/llvm/include/llvm/IR/IntrinsicsXCore.td b/llvm/include/llvm/IR/IntrinsicsXCore.td
index 89dbc65fea44..d2afc3497833 100644
--- a/llvm/include/llvm/IR/IntrinsicsXCore.td
+++ b/llvm/include/llvm/IR/IntrinsicsXCore.td
@@ -13,7 +13,7 @@
let TargetPrefix = "xcore" in { // All intrinsics start with "llvm.xcore.".
// Miscellaneous instructions.
def int_xcore_bitrev : Intrinsic<[llvm_i32_ty],[llvm_i32_ty],[IntrNoMem]>,
- GCCBuiltin<"__builtin_bitrev">;
+ ClangBuiltin<"__builtin_bitrev">;
def int_xcore_crc8 : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
[llvm_i32_ty,llvm_i32_ty,llvm_i32_ty],
[IntrNoMem]>;
@@ -25,11 +25,11 @@ let TargetPrefix = "xcore" in { // All intrinsics start with "llvm.xcore.".
def int_xcore_zext : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_xcore_getid : Intrinsic<[llvm_i32_ty],[],[IntrNoMem]>,
- GCCBuiltin<"__builtin_getid">;
+ ClangBuiltin<"__builtin_getid">;
def int_xcore_getps : Intrinsic<[llvm_i32_ty],[llvm_i32_ty]>,
- GCCBuiltin<"__builtin_getps">;
+ ClangBuiltin<"__builtin_getps">;
def int_xcore_setps : Intrinsic<[],[llvm_i32_ty, llvm_i32_ty]>,
- GCCBuiltin<"__builtin_setps">;
+ ClangBuiltin<"__builtin_setps">;
def int_xcore_geted : Intrinsic<[llvm_i32_ty],[]>;
def int_xcore_getet : Intrinsic<[llvm_i32_ty],[]>;
def int_xcore_setsr : Intrinsic<[],[llvm_i32_ty]>;
diff --git a/llvm/include/llvm/IR/LLVMContext.h b/llvm/include/llvm/IR/LLVMContext.h
index 446bcecf1c64..91712df153a0 100644
--- a/llvm/include/llvm/IR/LLVMContext.h
+++ b/llvm/include/llvm/IR/LLVMContext.h
@@ -24,6 +24,7 @@
namespace llvm {
+class Any;
class DiagnosticInfo;
enum DiagnosticSeverity : char;
class Function;
@@ -93,6 +94,7 @@ public:
OB_preallocated = 4, // "preallocated"
OB_gc_live = 5, // "gc-live"
OB_clang_arc_attachedcall = 6, // "clang.arc.attachedcall"
+ OB_ptrauth = 7, // "ptrauth"
};
/// getMDKindID - Return a unique non-zero ID for the specified metadata kind.
@@ -201,6 +203,11 @@ public:
/// diagnostics.
void setDiagnosticsHotnessRequested(bool Requested);
+ bool getMisExpectWarningRequested() const;
+ void setMisExpectWarningRequested(bool Requested);
+ void setDiagnosticsMisExpectTolerance(Optional<uint64_t> Tolerance);
+ uint64_t getDiagnosticsMisExpectTolerance() const;
+
/// Return the minimum hotness value a diagnostic would need in order
/// to be included in optimization diagnostics.
///
@@ -304,13 +311,22 @@ public:
/// LLVMContext is used by compilation.
void setOptPassGate(OptPassGate&);
- /// Enable opaque pointers. Can only be called before creating the first
- /// pointer type.
- void enableOpaquePointers() const;
+ /// Whether we've decided on using opaque pointers or typed pointers yet.
+ bool hasSetOpaquePointersValue() const;
+
+ /// Set whether opaque pointers are enabled. The method may be called multiple
+ /// times, but only with the same value. Note that creating a pointer type or
+ /// otherwise querying the opaque pointer mode performs an implicit set to
+ /// the default value.
+ void setOpaquePointers(bool Enable) const;
/// Whether typed pointers are supported. If false, all pointers are opaque.
bool supportsTypedPointers() const;
+ /// Optionally target-spcific data can be attached to the context for lifetime
+ /// management and bypassing layering restrictions.
+ llvm::Any &getTargetData() const;
+
private:
// Module needs access to the add/removeModule methods.
friend class Module;
diff --git a/llvm/include/llvm/IR/LegacyPassManagers.h b/llvm/include/llvm/IR/LegacyPassManagers.h
index 311a407f1a19..41c11d26aa45 100644
--- a/llvm/include/llvm/IR/LegacyPassManagers.h
+++ b/llvm/include/llvm/IR/LegacyPassManagers.h
@@ -294,9 +294,7 @@ private:
/// used by pass managers.
class PMDataManager {
public:
- explicit PMDataManager() : TPM(nullptr), Depth(0) {
- initializeAnalysisInfo();
- }
+ explicit PMDataManager() { initializeAnalysisInfo(); }
virtual ~PMDataManager();
@@ -418,7 +416,7 @@ public:
protected:
// Top level manager.
- PMTopLevelManager *TPM;
+ PMTopLevelManager *TPM = nullptr;
// Collection of pass that are managed by this manager
SmallVector<Pass *, 16> PassVector;
@@ -446,7 +444,7 @@ private:
// this manager.
SmallVector<Pass *, 16> HigherLevelAnalysis;
- unsigned Depth;
+ unsigned Depth = 0;
};
//===----------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/IR/MDBuilder.h b/llvm/include/llvm/IR/MDBuilder.h
index 42829388b79a..21d7b8b6da71 100644
--- a/llvm/include/llvm/IR/MDBuilder.h
+++ b/llvm/include/llvm/IR/MDBuilder.h
@@ -108,6 +108,10 @@ public:
/// Merge the new callback encoding \p NewCB into \p ExistingCallbacks.
MDNode *mergeCallbackEncodings(MDNode *ExistingCallbacks, MDNode *NewCB);
+ /// Return metadata feeding to the CodeGen about how to generate a function
+ /// prologue for the "function" santizier.
+ MDNode *createRTTIPointerPrologue(Constant *PrologueSig, Constant *RTTI);
+
//===------------------------------------------------------------------===//
// AA metadata.
//===------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/IR/MatrixBuilder.h b/llvm/include/llvm/IR/MatrixBuilder.h
index 4c8286692ebf..dbf2cfb7c5e9 100644
--- a/llvm/include/llvm/IR/MatrixBuilder.h
+++ b/llvm/include/llvm/IR/MatrixBuilder.h
@@ -30,8 +30,8 @@ class Function;
class Twine;
class Module;
-template <class IRBuilderTy> class MatrixBuilder {
- IRBuilderTy &B;
+class MatrixBuilder {
+ IRBuilderBase &B;
Module *getModule() { return B.GetInsertBlock()->getParent()->getParent(); }
std::pair<Value *, Value *> splatScalarOperandIfNeeded(Value *LHS,
@@ -55,21 +55,17 @@ template <class IRBuilderTy> class MatrixBuilder {
}
public:
- MatrixBuilder(IRBuilderTy &Builder) : B(Builder) {}
+ MatrixBuilder(IRBuilderBase &Builder) : B(Builder) {}
/// Create a column major, strided matrix load.
+ /// \p EltTy - Matrix element type
/// \p DataPtr - Start address of the matrix read
/// \p Rows - Number of rows in matrix (must be a constant)
/// \p Columns - Number of columns in matrix (must be a constant)
/// \p Stride - Space between columns
- CallInst *CreateColumnMajorLoad(Value *DataPtr, Align Alignment,
+ CallInst *CreateColumnMajorLoad(Type *EltTy, Value *DataPtr, Align Alignment,
Value *Stride, bool IsVolatile, unsigned Rows,
unsigned Columns, const Twine &Name = "") {
-
- // Deal with the pointer
- PointerType *PtrTy = cast<PointerType>(DataPtr->getType());
- Type *EltTy = PtrTy->getPointerElementType();
-
auto *RetType = FixedVectorType::get(EltTy, Rows * Columns);
Value *Ops[] = {DataPtr, Stride, B.getInt1(IsVolatile), B.getInt32(Rows),
@@ -234,12 +230,11 @@ public:
/// Create an assumption that \p Idx is less than \p NumElements.
void CreateIndexAssumption(Value *Idx, unsigned NumElements,
Twine const &Name = "") {
-
Value *NumElts =
B.getIntN(Idx->getType()->getScalarSizeInBits(), NumElements);
auto *Cmp = B.CreateICmpULT(Idx, NumElts);
- if (auto *ConstCond = dyn_cast<ConstantInt>(Cmp))
- assert(ConstCond->isOne() && "Index must be valid!");
+ if (isa<ConstantInt>(Cmp))
+ assert(cast<ConstantInt>(Cmp)->isOne() && "Index must be valid!");
else
B.CreateAssumption(Cmp);
}
@@ -248,7 +243,6 @@ public:
/// a matrix with \p NumRows embedded in a vector.
Value *CreateIndex(Value *RowIdx, Value *ColumnIdx, unsigned NumRows,
Twine const &Name = "") {
-
unsigned MaxWidth = std::max(RowIdx->getType()->getScalarSizeInBits(),
ColumnIdx->getType()->getScalarSizeInBits());
Type *IntTy = IntegerType::get(RowIdx->getType()->getContext(), MaxWidth);
diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h
index 7965884990e5..be359d94f812 100644
--- a/llvm/include/llvm/IR/Metadata.h
+++ b/llvm/include/llvm/IR/Metadata.h
@@ -169,7 +169,7 @@ inline raw_ostream &operator<<(raw_ostream &OS, const Metadata &MD) {
/// Metadata wrapper in the Value hierarchy.
///
/// A member of the \a Value hierarchy to represent a reference to metadata.
-/// This allows, e.g., instrinsics to have metadata as operands.
+/// This allows, e.g., intrinsics to have metadata as operands.
///
/// Notably, this is the only thing in either hierarchy that is allowed to
/// reference \a LocalAsMetadata.
@@ -302,7 +302,8 @@ public:
///
/// Replace all uses of this with \c MD, which is allowed to be null.
void replaceAllUsesWith(Metadata *MD);
-
+ /// Replace all uses of the constant with Undef in debug info metadata
+ static void SalvageDebugInfo(const Constant &C);
/// Returns the list of all DIArgList users of this.
SmallVector<Metadata *> getAllArgListUsers();
@@ -774,10 +775,21 @@ class MDOperand {
public:
MDOperand() = default;
- MDOperand(MDOperand &&) = delete;
MDOperand(const MDOperand &) = delete;
- MDOperand &operator=(MDOperand &&) = delete;
+ MDOperand(MDOperand &&Op) {
+ MD = Op.MD;
+ if (MD)
+ (void)MetadataTracking::retrack(Op.MD, MD);
+ Op.MD = nullptr;
+ }
MDOperand &operator=(const MDOperand &) = delete;
+ MDOperand &operator=(MDOperand &&Op) {
+ MD = Op.MD;
+ if (MD)
+ (void)MetadataTracking::retrack(Op.MD, MD);
+ Op.MD = nullptr;
+ return *this;
+ }
~MDOperand() { untrack(); }
Metadata *get() const { return MD; }
@@ -922,13 +934,109 @@ struct TempMDNodeDeleter {
/// If an unresolved node is part of a cycle, \a resolveCycles() needs
/// to be called on some member of the cycle once all temporary nodes have been
/// replaced.
+///
+/// MDNodes can be large or small, as well as resizable or non-resizable.
+/// Large MDNodes' operands are allocated in a separate storage vector,
+/// whereas small MDNodes' operands are co-allocated. Distinct and temporary
+/// MDnodes are resizable, but only MDTuples support this capability.
+///
+/// Clients can add operands to resizable MDNodes using push_back().
class MDNode : public Metadata {
friend class ReplaceableMetadataImpl;
friend class LLVMContextImpl;
friend class DIArgList;
- unsigned NumOperands;
- unsigned NumUnresolved;
+ /// The header that is coallocated with an MDNode along with its "small"
+ /// operands. It is located immediately before the main body of the node.
+ /// The operands are in turn located immediately before the header.
+ /// For resizable MDNodes, the space for the storage vector is also allocated
+ /// immediately before the header, overlapping with the operands.
+ struct Header {
+ bool IsResizable : 1;
+ bool IsLarge : 1;
+ size_t SmallSize : 4;
+ size_t SmallNumOps : 4;
+ size_t : sizeof(size_t) * CHAR_BIT - 10;
+
+ unsigned NumUnresolved = 0;
+ using LargeStorageVector = SmallVector<MDOperand, 0>;
+
+ static constexpr size_t NumOpsFitInVector =
+ sizeof(LargeStorageVector) / sizeof(MDOperand);
+ static_assert(
+ NumOpsFitInVector * sizeof(MDOperand) == sizeof(LargeStorageVector),
+ "sizeof(LargeStorageVector) must be a multiple of sizeof(MDOperand)");
+
+ static constexpr size_t MaxSmallSize = 15;
+
+ static constexpr size_t getOpSize(unsigned NumOps) {
+ return sizeof(MDOperand) * NumOps;
+ }
+ /// Returns the number of operands the node has space for based on its
+ /// allocation characteristics.
+ static size_t getSmallSize(size_t NumOps, bool IsResizable, bool IsLarge) {
+ return IsLarge ? NumOpsFitInVector
+ : std::max(NumOps, NumOpsFitInVector * IsResizable);
+ }
+ /// Returns the number of bytes allocated for operands and header.
+ static size_t getAllocSize(StorageType Storage, size_t NumOps) {
+ return getOpSize(
+ getSmallSize(NumOps, isResizable(Storage), isLarge(NumOps))) +
+ sizeof(Header);
+ }
+
+ /// Only temporary and distinct nodes are resizable.
+ static bool isResizable(StorageType Storage) { return Storage != Uniqued; }
+ static bool isLarge(size_t NumOps) { return NumOps > MaxSmallSize; }
+
+ size_t getAllocSize() const {
+ return getOpSize(SmallSize) + sizeof(Header);
+ }
+ void *getAllocation() {
+ return reinterpret_cast<char *>(this + 1) -
+ alignTo(getAllocSize(), alignof(uint64_t));
+ }
+
+ void *getLargePtr() const;
+ void *getSmallPtr();
+
+ LargeStorageVector &getLarge() {
+ assert(IsLarge);
+ return *reinterpret_cast<LargeStorageVector *>(getLargePtr());
+ }
+
+ const LargeStorageVector &getLarge() const {
+ assert(IsLarge);
+ return *reinterpret_cast<const LargeStorageVector *>(getLargePtr());
+ }
+
+ void resizeSmall(size_t NumOps);
+ void resizeSmallToLarge(size_t NumOps);
+ void resize(size_t NumOps);
+
+ explicit Header(size_t NumOps, StorageType Storage);
+ ~Header();
+
+ MutableArrayRef<MDOperand> operands() {
+ if (IsLarge)
+ return getLarge();
+ return makeMutableArrayRef(
+ reinterpret_cast<MDOperand *>(this) - SmallSize, SmallNumOps);
+ }
+
+ ArrayRef<MDOperand> operands() const {
+ if (IsLarge)
+ return getLarge();
+ return makeArrayRef(reinterpret_cast<const MDOperand *>(this) - SmallSize,
+ SmallNumOps);
+ }
+ };
+
+ Header &getHeader() { return *(reinterpret_cast<Header *>(this) - 1); }
+
+ const Header &getHeader() const {
+ return *(reinterpret_cast<const Header *>(this) - 1);
+ }
ContextAndReplaceableUses Context;
@@ -937,7 +1045,7 @@ protected:
ArrayRef<Metadata *> Ops1, ArrayRef<Metadata *> Ops2 = None);
~MDNode() = default;
- void *operator new(size_t Size, unsigned NumOps);
+ void *operator new(size_t Size, size_t NumOps, StorageType Storage);
void operator delete(void *Mem);
/// Required by std, but never called.
@@ -952,8 +1060,8 @@ protected:
void dropAllReferences();
- MDOperand *mutable_begin() { return mutable_end() - NumOperands; }
- MDOperand *mutable_end() { return reinterpret_cast<MDOperand *>(this); }
+ MDOperand *mutable_begin() { return getHeader().operands().begin(); }
+ MDOperand *mutable_end() { return getHeader().operands().end(); }
using mutable_op_range = iterator_range<MDOperand *>;
@@ -999,7 +1107,7 @@ public:
/// As forward declarations are resolved, their containers should get
/// resolved automatically. However, if this (or one of its operands) is
/// involved in a cycle, \a resolveCycles() needs to be called explicitly.
- bool isResolved() const { return !isTemporary() && !NumUnresolved; }
+ bool isResolved() const { return !isTemporary() && !getNumUnresolved(); }
bool isUniqued() const { return Storage == Uniqued; }
bool isDistinct() const { return Storage == Distinct; }
@@ -1093,11 +1201,25 @@ protected:
/// Sets the operand directly, without worrying about uniquing.
void setOperand(unsigned I, Metadata *New);
+ unsigned getNumUnresolved() const { return getHeader().NumUnresolved; }
+
+ void setNumUnresolved(unsigned N) { getHeader().NumUnresolved = N; }
void storeDistinctInContext();
template <class T, class StoreT>
static T *storeImpl(T *N, StorageType Storage, StoreT &Store);
template <class T> static T *storeImpl(T *N, StorageType Storage);
+ /// Resize the node to hold \a NumOps operands.
+ ///
+ /// \pre \a isTemporary() or \a isDistinct()
+ /// \pre MetadataID == MDTupleKind
+ void resize(size_t NumOps) {
+ assert(!isUniqued() && "Resizing is not supported for uniqued nodes");
+ assert(getMetadataID() == MDTupleKind &&
+ "Resizing is not supported for this node kind");
+ getHeader().resize(NumOps);
+ }
+
private:
void handleChangedOperand(void *Ref, Metadata *New);
@@ -1154,12 +1276,12 @@ public:
op_range operands() const { return op_range(op_begin(), op_end()); }
const MDOperand &getOperand(unsigned I) const {
- assert(I < NumOperands && "Out of range");
- return op_begin()[I];
+ assert(I < getNumOperands() && "Out of range");
+ return getHeader().operands()[I];
}
/// Return number of MDNode operands.
- unsigned getNumOperands() const { return NumOperands; }
+ unsigned getNumOperands() const { return getHeader().operands().size(); }
/// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const Metadata *MD) {
@@ -1244,6 +1366,16 @@ public:
/// Return a (temporary) clone of this.
TempMDTuple clone() const { return cloneImpl(); }
+ /// Append an element to the tuple. This will resize the node.
+ void push_back(Metadata *MD) {
+ size_t NumOps = getNumOperands();
+ resize(NumOps + 1);
+ setOperand(NumOps, MD);
+ }
+
+ /// Shrink the operands by 1.
+ void pop_back() { resize(getNumOperands() - 1); }
+
static bool classof(const Metadata *MD) {
return MD->getMetadataID() == MDTupleKind;
}
diff --git a/llvm/include/llvm/IR/Module.h b/llvm/include/llvm/IR/Module.h
index 7b834fbeeebf..fc2d60947118 100644
--- a/llvm/include/llvm/IR/Module.h
+++ b/llvm/include/llvm/IR/Module.h
@@ -58,9 +58,9 @@ class VersionTuple;
/// other modules) this module depends on, a symbol table, and various data
/// about the target's characteristics.
///
-/// A module maintains a GlobalValRefMap object that is used to hold all
+/// A module maintains a GlobalList object that is used to hold all
/// constant references to global variables in the module. When a global
-/// variable is destroyed, it should have no entries in the GlobalValueRefMap.
+/// variable is destroyed, it should have no entries in the GlobalList.
/// The main container class for the LLVM Intermediate Representation.
class LLVM_EXTERNAL_VISIBILITY Module {
/// @name Types And Enumerations
@@ -146,9 +146,12 @@ public:
/// Takes the max of the two values, which are required to be integers.
Max = 7,
+ /// Takes the min of the two values, which are required to be integers.
+ Min = 8,
+
// Markers:
ModFlagBehaviorFirstVal = Error,
- ModFlagBehaviorLastVal = Max
+ ModFlagBehaviorLastVal = Min
};
/// Checks if Metadata represents a valid ModFlagBehavior, and stores the
@@ -360,6 +363,8 @@ public:
/// In all cases, the returned value is a FunctionCallee wrapper around the
/// 'FunctionType *T' passed in, as well as a 'Value*' either of the Function or
/// the bitcast to the function.
+ ///
+ /// Note: For library calls getOrInsertLibFunc() should be used instead.
FunctionCallee getOrInsertFunction(StringRef Name, FunctionType *T,
AttributeList AttributeList);
@@ -888,8 +893,8 @@ public:
void setRtLibUseGOT();
/// Get/set whether synthesized functions should get the uwtable attribute.
- bool getUwtable() const;
- void setUwtable();
+ UWTableKind getUwtable() const;
+ void setUwtable(UWTableKind Kind);
/// Get/set whether synthesized functions should get the "frame-pointer"
/// attribute.
@@ -939,10 +944,17 @@ public:
/// @returns a string containing the target variant triple.
StringRef getDarwinTargetVariantTriple() const;
+ /// Set the target variant triple which is a string describing a variant of
+ /// the target host platform.
+ void setDarwinTargetVariantTriple(StringRef T);
+
/// Get the target variant version build SDK version metadata.
///
/// An empty version is returned if no such metadata is attached.
VersionTuple getDarwinTargetVariantSDKVersion() const;
+
+ /// Set the target variant version build SDK version metadata.
+ void setDarwinTargetVariantSDKVersion(VersionTuple Version);
};
/// Given "llvm.used" or "llvm.compiler.used" as a global name, collect the
diff --git a/llvm/include/llvm/IR/NoFolder.h b/llvm/include/llvm/IR/NoFolder.h
index ec149747e3f4..4e9f772dfdb6 100644
--- a/llvm/include/llvm/IR/NoFolder.h
+++ b/llvm/include/llvm/IR/NoFolder.h
@@ -23,10 +23,11 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/FMF.h"
+#include "llvm/IR/IRBuilderFolder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IRBuilderFolder.h"
namespace llvm {
@@ -43,144 +44,72 @@ public:
// Return an existing value or a constant if the operation can be simplified.
// Otherwise return nullptr.
//===--------------------------------------------------------------------===//
- Value *FoldAdd(Value *LHS, Value *RHS, bool HasNUW = false,
- bool HasNSW = false) const override {
- return nullptr;
- }
- Value *FoldAnd(Value *LHS, Value *RHS) const override { return nullptr; }
-
- Value *FoldOr(Value *LHS, Value *RHS) const override { return nullptr; }
-
- Value *FoldICmp(CmpInst::Predicate P, Value *LHS, Value *RHS) const override {
+ Value *FoldBinOp(Instruction::BinaryOps Opc, Value *LHS,
+ Value *RHS) const override {
return nullptr;
}
- Value *FoldGEP(Type *Ty, Value *Ptr, ArrayRef<Value *> IdxList,
- bool IsInBounds = false) const override {
+ Value *FoldExactBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS,
+ bool IsExact) const override {
return nullptr;
}
- Value *FoldSelect(Value *C, Value *True, Value *False) const override {
+ Value *FoldNoWrapBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS,
+ bool HasNUW, bool HasNSW) const override {
return nullptr;
}
- //===--------------------------------------------------------------------===//
- // Binary Operators
- //===--------------------------------------------------------------------===//
-
- Instruction *CreateFAdd(Constant *LHS, Constant *RHS) const override {
- return BinaryOperator::CreateFAdd(LHS, RHS);
- }
-
- Instruction *CreateSub(Constant *LHS, Constant *RHS,
- bool HasNUW = false,
- bool HasNSW = false) const override {
- BinaryOperator *BO = BinaryOperator::CreateSub(LHS, RHS);
- if (HasNUW) BO->setHasNoUnsignedWrap();
- if (HasNSW) BO->setHasNoSignedWrap();
- return BO;
- }
-
- Instruction *CreateFSub(Constant *LHS, Constant *RHS) const override {
- return BinaryOperator::CreateFSub(LHS, RHS);
- }
-
- Instruction *CreateMul(Constant *LHS, Constant *RHS,
- bool HasNUW = false,
- bool HasNSW = false) const override {
- BinaryOperator *BO = BinaryOperator::CreateMul(LHS, RHS);
- if (HasNUW) BO->setHasNoUnsignedWrap();
- if (HasNSW) BO->setHasNoSignedWrap();
- return BO;
- }
-
- Instruction *CreateFMul(Constant *LHS, Constant *RHS) const override {
- return BinaryOperator::CreateFMul(LHS, RHS);
- }
-
- Instruction *CreateUDiv(Constant *LHS, Constant *RHS,
- bool isExact = false) const override {
- if (!isExact)
- return BinaryOperator::CreateUDiv(LHS, RHS);
- return BinaryOperator::CreateExactUDiv(LHS, RHS);
- }
-
- Instruction *CreateSDiv(Constant *LHS, Constant *RHS,
- bool isExact = false) const override {
- if (!isExact)
- return BinaryOperator::CreateSDiv(LHS, RHS);
- return BinaryOperator::CreateExactSDiv(LHS, RHS);
- }
-
- Instruction *CreateFDiv(Constant *LHS, Constant *RHS) const override {
- return BinaryOperator::CreateFDiv(LHS, RHS);
+ Value *FoldBinOpFMF(Instruction::BinaryOps Opc, Value *LHS, Value *RHS,
+ FastMathFlags FMF) const override {
+ return nullptr;
}
- Instruction *CreateURem(Constant *LHS, Constant *RHS) const override {
- return BinaryOperator::CreateURem(LHS, RHS);
+ Value *FoldICmp(CmpInst::Predicate P, Value *LHS, Value *RHS) const override {
+ return nullptr;
}
- Instruction *CreateSRem(Constant *LHS, Constant *RHS) const override {
- return BinaryOperator::CreateSRem(LHS, RHS);
+ Value *FoldGEP(Type *Ty, Value *Ptr, ArrayRef<Value *> IdxList,
+ bool IsInBounds = false) const override {
+ return nullptr;
}
- Instruction *CreateFRem(Constant *LHS, Constant *RHS) const override {
- return BinaryOperator::CreateFRem(LHS, RHS);
+ Value *FoldSelect(Value *C, Value *True, Value *False) const override {
+ return nullptr;
}
- Instruction *CreateShl(Constant *LHS, Constant *RHS, bool HasNUW = false,
- bool HasNSW = false) const override {
- BinaryOperator *BO = BinaryOperator::CreateShl(LHS, RHS);
- if (HasNUW) BO->setHasNoUnsignedWrap();
- if (HasNSW) BO->setHasNoSignedWrap();
- return BO;
+ Value *FoldExtractValue(Value *Agg,
+ ArrayRef<unsigned> IdxList) const override {
+ return nullptr;
}
- Instruction *CreateLShr(Constant *LHS, Constant *RHS,
- bool isExact = false) const override {
- if (!isExact)
- return BinaryOperator::CreateLShr(LHS, RHS);
- return BinaryOperator::CreateExactLShr(LHS, RHS);
+ Value *FoldInsertValue(Value *Agg, Value *Val,
+ ArrayRef<unsigned> IdxList) const override {
+ return nullptr;
}
- Instruction *CreateAShr(Constant *LHS, Constant *RHS,
- bool isExact = false) const override {
- if (!isExact)
- return BinaryOperator::CreateAShr(LHS, RHS);
- return BinaryOperator::CreateExactAShr(LHS, RHS);
+ Value *FoldExtractElement(Value *Vec, Value *Idx) const override {
+ return nullptr;
}
- Instruction *CreateXor(Constant *LHS, Constant *RHS) const override {
- return BinaryOperator::CreateXor(LHS, RHS);
+ Value *FoldInsertElement(Value *Vec, Value *NewElt,
+ Value *Idx) const override {
+ return nullptr;
}
- Instruction *CreateBinOp(Instruction::BinaryOps Opc,
- Constant *LHS, Constant *RHS) const override {
- return BinaryOperator::Create(Opc, LHS, RHS);
+ Value *FoldShuffleVector(Value *V1, Value *V2,
+ ArrayRef<int> Mask) const override {
+ return nullptr;
}
//===--------------------------------------------------------------------===//
// Unary Operators
//===--------------------------------------------------------------------===//
- Instruction *CreateNeg(Constant *C,
- bool HasNUW = false,
- bool HasNSW = false) const override {
- BinaryOperator *BO = BinaryOperator::CreateNeg(C);
- if (HasNUW) BO->setHasNoUnsignedWrap();
- if (HasNSW) BO->setHasNoSignedWrap();
- return BO;
- }
-
Instruction *CreateFNeg(Constant *C) const override {
return UnaryOperator::CreateFNeg(C);
}
- Instruction *CreateNot(Constant *C) const override {
- return BinaryOperator::CreateNot(C);
- }
-
Instruction *CreateUnOp(Instruction::UnaryOps Opc,
Constant *C) const override {
return UnaryOperator::Create(Opc, C);
@@ -245,35 +174,6 @@ public:
Constant *LHS, Constant *RHS) const override {
return new FCmpInst(P, LHS, RHS);
}
-
- //===--------------------------------------------------------------------===//
- // Other Instructions
- //===--------------------------------------------------------------------===//
-
- Instruction *CreateExtractElement(Constant *Vec,
- Constant *Idx) const override {
- return ExtractElementInst::Create(Vec, Idx);
- }
-
- Instruction *CreateInsertElement(Constant *Vec, Constant *NewElt,
- Constant *Idx) const override {
- return InsertElementInst::Create(Vec, NewElt, Idx);
- }
-
- Instruction *CreateShuffleVector(Constant *V1, Constant *V2,
- ArrayRef<int> Mask) const override {
- return new ShuffleVectorInst(V1, V2, Mask);
- }
-
- Instruction *CreateExtractValue(Constant *Agg,
- ArrayRef<unsigned> IdxList) const override {
- return ExtractValueInst::Create(Agg, IdxList);
- }
-
- Instruction *CreateInsertValue(Constant *Agg, Constant *Val,
- ArrayRef<unsigned> IdxList) const override {
- return InsertValueInst::Create(Agg, Val, IdxList);
- }
};
} // end namespace llvm
diff --git a/llvm/include/llvm/IR/Operator.h b/llvm/include/llvm/IR/Operator.h
index 7d232bba0864..1a234e273eff 100644
--- a/llvm/include/llvm/IR/Operator.h
+++ b/llvm/include/llvm/IR/Operator.h
@@ -18,6 +18,7 @@
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/FMF.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
@@ -161,105 +162,6 @@ public:
}
};
-/// Convenience struct for specifying and reasoning about fast-math flags.
-class FastMathFlags {
-private:
- friend class FPMathOperator;
-
- unsigned Flags = 0;
-
- FastMathFlags(unsigned F) {
- // If all 7 bits are set, turn this into -1. If the number of bits grows,
- // this must be updated. This is intended to provide some forward binary
- // compatibility insurance for the meaning of 'fast' in case bits are added.
- if (F == 0x7F) Flags = ~0U;
- else Flags = F;
- }
-
-public:
- // This is how the bits are used in Value::SubclassOptionalData so they
- // should fit there too.
- // WARNING: We're out of space. SubclassOptionalData only has 7 bits. New
- // functionality will require a change in how this information is stored.
- enum {
- AllowReassoc = (1 << 0),
- NoNaNs = (1 << 1),
- NoInfs = (1 << 2),
- NoSignedZeros = (1 << 3),
- AllowReciprocal = (1 << 4),
- AllowContract = (1 << 5),
- ApproxFunc = (1 << 6)
- };
-
- FastMathFlags() = default;
-
- static FastMathFlags getFast() {
- FastMathFlags FMF;
- FMF.setFast();
- return FMF;
- }
-
- bool any() const { return Flags != 0; }
- bool none() const { return Flags == 0; }
- bool all() const { return Flags == ~0U; }
-
- void clear() { Flags = 0; }
- void set() { Flags = ~0U; }
-
- /// Flag queries
- bool allowReassoc() const { return 0 != (Flags & AllowReassoc); }
- bool noNaNs() const { return 0 != (Flags & NoNaNs); }
- bool noInfs() const { return 0 != (Flags & NoInfs); }
- bool noSignedZeros() const { return 0 != (Flags & NoSignedZeros); }
- bool allowReciprocal() const { return 0 != (Flags & AllowReciprocal); }
- bool allowContract() const { return 0 != (Flags & AllowContract); }
- bool approxFunc() const { return 0 != (Flags & ApproxFunc); }
- /// 'Fast' means all bits are set.
- bool isFast() const { return all(); }
-
- /// Flag setters
- void setAllowReassoc(bool B = true) {
- Flags = (Flags & ~AllowReassoc) | B * AllowReassoc;
- }
- void setNoNaNs(bool B = true) {
- Flags = (Flags & ~NoNaNs) | B * NoNaNs;
- }
- void setNoInfs(bool B = true) {
- Flags = (Flags & ~NoInfs) | B * NoInfs;
- }
- void setNoSignedZeros(bool B = true) {
- Flags = (Flags & ~NoSignedZeros) | B * NoSignedZeros;
- }
- void setAllowReciprocal(bool B = true) {
- Flags = (Flags & ~AllowReciprocal) | B * AllowReciprocal;
- }
- void setAllowContract(bool B = true) {
- Flags = (Flags & ~AllowContract) | B * AllowContract;
- }
- void setApproxFunc(bool B = true) {
- Flags = (Flags & ~ApproxFunc) | B * ApproxFunc;
- }
- void setFast(bool B = true) { B ? set() : clear(); }
-
- void operator&=(const FastMathFlags &OtherFlags) {
- Flags &= OtherFlags.Flags;
- }
- void operator|=(const FastMathFlags &OtherFlags) {
- Flags |= OtherFlags.Flags;
- }
- bool operator!=(const FastMathFlags &OtherFlags) const {
- return Flags != OtherFlags.Flags;
- }
-
- /// Print fast-math flags to \p O.
- void print(raw_ostream &O) const;
-};
-
-inline raw_ostream &operator<<(raw_ostream &O, FastMathFlags FMF) {
- FMF.print(O);
- return O;
-}
-
/// Utility class for floating point operations which can have
/// information about relaxed accuracy requirements attached to them.
class FPMathOperator : public Operator {
diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h
index f9f4f1603861..7f0695b552e1 100644
--- a/llvm/include/llvm/IR/PatternMatch.h
+++ b/llvm/include/llvm/IR/PatternMatch.h
@@ -136,7 +136,9 @@ struct undef_match {
inline auto m_Undef() { return undef_match(); }
/// Match an arbitrary poison constant.
-inline class_match<PoisonValue> m_Poison() { return class_match<PoisonValue>(); }
+inline class_match<PoisonValue> m_Poison() {
+ return class_match<PoisonValue>();
+}
/// Match an arbitrary Constant and ignore it.
inline class_match<Constant> m_Constant() { return class_match<Constant>(); }
@@ -222,7 +224,7 @@ struct apint_match {
bool AllowUndef;
apint_match(const APInt *&Res, bool AllowUndef)
- : Res(Res), AllowUndef(AllowUndef) {}
+ : Res(Res), AllowUndef(AllowUndef) {}
template <typename ITy> bool match(ITy *V) {
if (auto *CI = dyn_cast<ConstantInt>(V)) {
@@ -231,8 +233,8 @@ struct apint_match {
}
if (V->getType()->isVectorTy())
if (const auto *C = dyn_cast<Constant>(V))
- if (auto *CI = dyn_cast_or_null<ConstantInt>(
- C->getSplatValue(AllowUndef))) {
+ if (auto *CI =
+ dyn_cast_or_null<ConstantInt>(C->getSplatValue(AllowUndef))) {
Res = &CI->getValue();
return true;
}
@@ -256,8 +258,8 @@ struct apfloat_match {
}
if (V->getType()->isVectorTy())
if (const auto *C = dyn_cast<Constant>(V))
- if (auto *CI = dyn_cast_or_null<ConstantFP>(
- C->getSplatValue(AllowUndef))) {
+ if (auto *CI =
+ dyn_cast_or_null<ConstantFP>(C->getSplatValue(AllowUndef))) {
Res = &CI->getValueAPF();
return true;
}
@@ -467,9 +469,7 @@ struct is_negative {
inline cst_pred_ty<is_negative> m_Negative() {
return cst_pred_ty<is_negative>();
}
-inline api_pred_ty<is_negative> m_Negative(const APInt *&V) {
- return V;
-}
+inline api_pred_ty<is_negative> m_Negative(const APInt *&V) { return V; }
struct is_nonnegative {
bool isValue(const APInt &C) { return C.isNonNegative(); }
@@ -479,9 +479,7 @@ struct is_nonnegative {
inline cst_pred_ty<is_nonnegative> m_NonNegative() {
return cst_pred_ty<is_nonnegative>();
}
-inline api_pred_ty<is_nonnegative> m_NonNegative(const APInt *&V) {
- return V;
-}
+inline api_pred_ty<is_nonnegative> m_NonNegative(const APInt *&V) { return V; }
struct is_strictlypositive {
bool isValue(const APInt &C) { return C.isStrictlyPositive(); }
@@ -510,9 +508,7 @@ struct is_one {
};
/// Match an integer 1 or a vector with all elements equal to 1.
/// For vectors, this includes constants with undefined elements.
-inline cst_pred_ty<is_one> m_One() {
- return cst_pred_ty<is_one>();
-}
+inline cst_pred_ty<is_one> m_One() { return cst_pred_ty<is_one>(); }
struct is_zero_int {
bool isValue(const APInt &C) { return C.isZero(); }
@@ -532,21 +528,15 @@ struct is_zero {
};
/// Match any null constant or a vector with all elements equal to 0.
/// For vectors, this includes constants with undefined elements.
-inline is_zero m_Zero() {
- return is_zero();
-}
+inline is_zero m_Zero() { return is_zero(); }
struct is_power2 {
bool isValue(const APInt &C) { return C.isPowerOf2(); }
};
/// Match an integer or vector power-of-2.
/// For vectors, this includes constants with undefined elements.
-inline cst_pred_ty<is_power2> m_Power2() {
- return cst_pred_ty<is_power2>();
-}
-inline api_pred_ty<is_power2> m_Power2(const APInt *&V) {
- return V;
-}
+inline cst_pred_ty<is_power2> m_Power2() { return cst_pred_ty<is_power2>(); }
+inline api_pred_ty<is_power2> m_Power2(const APInt *&V) { return V; }
struct is_negated_power2 {
bool isValue(const APInt &C) { return C.isNegatedPowerOf2(); }
@@ -589,9 +579,7 @@ struct is_lowbit_mask {
inline cst_pred_ty<is_lowbit_mask> m_LowBitMask() {
return cst_pred_ty<is_lowbit_mask>();
}
-inline api_pred_ty<is_lowbit_mask> m_LowBitMask(const APInt *&V) {
- return V;
-}
+inline api_pred_ty<is_lowbit_mask> m_LowBitMask(const APInt *&V) { return V; }
struct icmp_pred_with_threshold {
ICmpInst::Predicate Pred;
@@ -613,9 +601,7 @@ struct is_nan {
};
/// Match an arbitrary NaN constant. This includes quiet and signalling nans.
/// For vectors, this includes constants with undefined elements.
-inline cstfp_pred_ty<is_nan> m_NaN() {
- return cstfp_pred_ty<is_nan>();
-}
+inline cstfp_pred_ty<is_nan> m_NaN() { return cstfp_pred_ty<is_nan>(); }
struct is_nonnan {
bool isValue(const APFloat &C) { return !C.isNaN(); }
@@ -631,9 +617,7 @@ struct is_inf {
};
/// Match a positive or negative infinity FP constant.
/// For vectors, this includes constants with undefined elements.
-inline cstfp_pred_ty<is_inf> m_Inf() {
- return cstfp_pred_ty<is_inf>();
-}
+inline cstfp_pred_ty<is_inf> m_Inf() { return cstfp_pred_ty<is_inf>(); }
struct is_noninf {
bool isValue(const APFloat &C) { return !C.isInfinity(); }
@@ -729,7 +713,9 @@ inline bind_ty<UnaryOperator> m_UnOp(UnaryOperator *&I) { return I; }
/// Match a binary operator, capturing it if we match.
inline bind_ty<BinaryOperator> m_BinOp(BinaryOperator *&I) { return I; }
/// Match a with overflow intrinsic, capturing it if we match.
-inline bind_ty<WithOverflowInst> m_WithOverflowInst(WithOverflowInst *&I) { return I; }
+inline bind_ty<WithOverflowInst> m_WithOverflowInst(WithOverflowInst *&I) {
+ return I;
+}
inline bind_ty<const WithOverflowInst>
m_WithOverflowInst(const WithOverflowInst *&I) {
return I;
@@ -842,8 +828,7 @@ struct bind_const_intval_ty {
/// Match a specified integer value or vector of all elements of that
/// value.
-template <bool AllowUndefs>
-struct specific_intval {
+template <bool AllowUndefs> struct specific_intval {
APInt Val;
specific_intval(APInt V) : Val(std::move(V)) {}
@@ -1014,7 +999,8 @@ template <typename Op_t> struct FNeg_match {
FNeg_match(const Op_t &Op) : X(Op) {}
template <typename OpTy> bool match(OpTy *V) {
auto *FPMO = dyn_cast<FPMathOperator>(V);
- if (!FPMO) return false;
+ if (!FPMO)
+ return false;
if (FPMO->getOpcode() == Instruction::FNeg)
return X.match(FPMO->getOperand(0));
@@ -1038,9 +1024,7 @@ template <typename Op_t> struct FNeg_match {
};
/// Match 'fneg X' as 'fsub -0.0, X'.
-template <typename OpTy>
-inline FNeg_match<OpTy>
-m_FNeg(const OpTy &X) {
+template <typename OpTy> inline FNeg_match<OpTy> m_FNeg(const OpTy &X) {
return FNeg_match<OpTy>(X);
}
@@ -1165,32 +1149,32 @@ inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Add,
OverflowingBinaryOperator::NoSignedWrap>
m_NSWAdd(const LHS &L, const RHS &R) {
return OverflowingBinaryOp_match<LHS, RHS, Instruction::Add,
- OverflowingBinaryOperator::NoSignedWrap>(
- L, R);
+ OverflowingBinaryOperator::NoSignedWrap>(L,
+ R);
}
template <typename LHS, typename RHS>
inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub,
OverflowingBinaryOperator::NoSignedWrap>
m_NSWSub(const LHS &L, const RHS &R) {
return OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub,
- OverflowingBinaryOperator::NoSignedWrap>(
- L, R);
+ OverflowingBinaryOperator::NoSignedWrap>(L,
+ R);
}
template <typename LHS, typename RHS>
inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul,
OverflowingBinaryOperator::NoSignedWrap>
m_NSWMul(const LHS &L, const RHS &R) {
return OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul,
- OverflowingBinaryOperator::NoSignedWrap>(
- L, R);
+ OverflowingBinaryOperator::NoSignedWrap>(L,
+ R);
}
template <typename LHS, typename RHS>
inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl,
OverflowingBinaryOperator::NoSignedWrap>
m_NSWShl(const LHS &L, const RHS &R) {
return OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl,
- OverflowingBinaryOperator::NoSignedWrap>(
- L, R);
+ OverflowingBinaryOperator::NoSignedWrap>(L,
+ R);
}
template <typename LHS, typename RHS>
@@ -1384,7 +1368,7 @@ struct CmpClass_match {
Predicate = I->getPredicate();
return true;
} else if (Commutable && L.match(I->getOperand(1)) &&
- R.match(I->getOperand(0))) {
+ R.match(I->getOperand(0))) {
Predicate = I->getSwappedPredicate();
return true;
}
@@ -2080,15 +2064,13 @@ template <typename T0, typename T1> struct m_Intrinsic_Ty<T0, T1> {
};
template <typename T0, typename T1, typename T2>
struct m_Intrinsic_Ty<T0, T1, T2> {
- using Ty =
- match_combine_and<typename m_Intrinsic_Ty<T0, T1>::Ty,
- Argument_match<T2>>;
+ using Ty = match_combine_and<typename m_Intrinsic_Ty<T0, T1>::Ty,
+ Argument_match<T2>>;
};
template <typename T0, typename T1, typename T2, typename T3>
struct m_Intrinsic_Ty<T0, T1, T2, T3> {
- using Ty =
- match_combine_and<typename m_Intrinsic_Ty<T0, T1, T2>::Ty,
- Argument_match<T3>>;
+ using Ty = match_combine_and<typename m_Intrinsic_Ty<T0, T1, T2>::Ty,
+ Argument_match<T3>>;
};
template <typename T0, typename T1, typename T2, typename T3, typename T4>
@@ -2097,7 +2079,8 @@ struct m_Intrinsic_Ty<T0, T1, T2, T3, T4> {
Argument_match<T4>>;
};
-template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5>
+template <typename T0, typename T1, typename T2, typename T3, typename T4,
+ typename T5>
struct m_Intrinsic_Ty<T0, T1, T2, T3, T4, T5> {
using Ty = match_combine_and<typename m_Intrinsic_Ty<T0, T1, T2, T3, T4>::Ty,
Argument_match<T5>>;
@@ -2117,6 +2100,14 @@ m_MaskedLoad(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2,
return m_Intrinsic<Intrinsic::masked_load>(Op0, Op1, Op2, Op3);
}
+/// Matches MaskedGather Intrinsic.
+template <typename Opnd0, typename Opnd1, typename Opnd2, typename Opnd3>
+inline typename m_Intrinsic_Ty<Opnd0, Opnd1, Opnd2, Opnd3>::Ty
+m_MaskedGather(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2,
+ const Opnd3 &Op3) {
+ return m_Intrinsic<Intrinsic::masked_gather>(Op0, Op1, Op2, Op3);
+}
+
template <Intrinsic::ID IntrID, typename T0>
inline typename m_Intrinsic_Ty<T0>::Ty m_Intrinsic(const T0 &Op0) {
return m_CombineAnd(m_Intrinsic<IntrID>(), m_Argument<0>(Op0));
@@ -2204,6 +2195,11 @@ m_FShr(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2) {
return m_Intrinsic<Intrinsic::fshr>(Op0, Op1, Op2);
}
+template <typename Opnd0>
+inline typename m_Intrinsic_Ty<Opnd0>::Ty m_Sqrt(const Opnd0 &Op0) {
+ return m_Intrinsic<Intrinsic::sqrt>(Op0);
+}
+
//===----------------------------------------------------------------------===//
// Matchers for two-operands operators with the operators in either order
//
@@ -2532,8 +2528,8 @@ struct LogicalOp_match {
/// Matches L && R either in the form of L & R or L ? R : false.
/// Note that the latter form is poison-blocking.
template <typename LHS, typename RHS>
-inline LogicalOp_match<LHS, RHS, Instruction::And>
-m_LogicalAnd(const LHS &L, const RHS &R) {
+inline LogicalOp_match<LHS, RHS, Instruction::And> m_LogicalAnd(const LHS &L,
+ const RHS &R) {
return LogicalOp_match<LHS, RHS, Instruction::And>(L, R);
}
@@ -2550,8 +2546,8 @@ m_c_LogicalAnd(const LHS &L, const RHS &R) {
/// Matches L || R either in the form of L | R or L ? true : R.
/// Note that the latter form is poison-blocking.
template <typename LHS, typename RHS>
-inline LogicalOp_match<LHS, RHS, Instruction::Or>
-m_LogicalOr(const LHS &L, const RHS &R) {
+inline LogicalOp_match<LHS, RHS, Instruction::Or> m_LogicalOr(const LHS &L,
+ const RHS &R) {
return LogicalOp_match<LHS, RHS, Instruction::Or>(L, R);
}
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def
index 62d67308114f..39c11771ff41 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.def
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.def
@@ -47,6 +47,8 @@ HANDLE_LIBCALL(MUL_I16, "__mulhi3")
HANDLE_LIBCALL(MUL_I32, "__mulsi3")
HANDLE_LIBCALL(MUL_I64, "__muldi3")
HANDLE_LIBCALL(MUL_I128, "__multi3")
+HANDLE_LIBCALL(MUL_IEXT, nullptr)
+
HANDLE_LIBCALL(MULO_I32, "__mulosi4")
HANDLE_LIBCALL(MULO_I64, "__mulodi4")
HANDLE_LIBCALL(MULO_I128, "__muloti4")
@@ -55,31 +57,43 @@ HANDLE_LIBCALL(SDIV_I16, "__divhi3")
HANDLE_LIBCALL(SDIV_I32, "__divsi3")
HANDLE_LIBCALL(SDIV_I64, "__divdi3")
HANDLE_LIBCALL(SDIV_I128, "__divti3")
+HANDLE_LIBCALL(SDIV_IEXT, "__divei4")
+
HANDLE_LIBCALL(UDIV_I8, "__udivqi3")
HANDLE_LIBCALL(UDIV_I16, "__udivhi3")
HANDLE_LIBCALL(UDIV_I32, "__udivsi3")
HANDLE_LIBCALL(UDIV_I64, "__udivdi3")
HANDLE_LIBCALL(UDIV_I128, "__udivti3")
+HANDLE_LIBCALL(UDIV_IEXT, "__udivei4")
+
HANDLE_LIBCALL(SREM_I8, "__modqi3")
HANDLE_LIBCALL(SREM_I16, "__modhi3")
HANDLE_LIBCALL(SREM_I32, "__modsi3")
HANDLE_LIBCALL(SREM_I64, "__moddi3")
HANDLE_LIBCALL(SREM_I128, "__modti3")
+HANDLE_LIBCALL(SREM_IEXT, "__modei4")
+
HANDLE_LIBCALL(UREM_I8, "__umodqi3")
HANDLE_LIBCALL(UREM_I16, "__umodhi3")
HANDLE_LIBCALL(UREM_I32, "__umodsi3")
HANDLE_LIBCALL(UREM_I64, "__umoddi3")
HANDLE_LIBCALL(UREM_I128, "__umodti3")
+HANDLE_LIBCALL(UREM_IEXT, "__umodei4")
+
HANDLE_LIBCALL(SDIVREM_I8, nullptr)
HANDLE_LIBCALL(SDIVREM_I16, nullptr)
HANDLE_LIBCALL(SDIVREM_I32, nullptr)
HANDLE_LIBCALL(SDIVREM_I64, nullptr)
HANDLE_LIBCALL(SDIVREM_I128, nullptr)
+HANDLE_LIBCALL(SDIVREM_IEXT, nullptr)
+
HANDLE_LIBCALL(UDIVREM_I8, nullptr)
HANDLE_LIBCALL(UDIVREM_I16, nullptr)
HANDLE_LIBCALL(UDIVREM_I32, nullptr)
HANDLE_LIBCALL(UDIVREM_I64, nullptr)
HANDLE_LIBCALL(UDIVREM_I128, nullptr)
+HANDLE_LIBCALL(UDIVREM_IEXT, nullptr)
+
HANDLE_LIBCALL(NEG_I32, "__negsi2")
HANDLE_LIBCALL(NEG_I64, "__negdi2")
HANDLE_LIBCALL(CTLZ_I32, "__clzsi2")
@@ -296,6 +310,8 @@ HANDLE_LIBCALL(FPROUND_F64_F16, "__truncdfhf2")
HANDLE_LIBCALL(FPROUND_F80_F16, "__truncxfhf2")
HANDLE_LIBCALL(FPROUND_F128_F16, "__trunctfhf2")
HANDLE_LIBCALL(FPROUND_PPCF128_F16, "__trunctfhf2")
+HANDLE_LIBCALL(FPROUND_F32_BF16, "__truncsfbf2")
+HANDLE_LIBCALL(FPROUND_F64_BF16, "__truncdfbf2")
HANDLE_LIBCALL(FPROUND_F64_F32, "__truncdfsf2")
HANDLE_LIBCALL(FPROUND_F80_F32, "__truncxfsf2")
HANDLE_LIBCALL(FPROUND_F128_F32, "__trunctfsf2")
diff --git a/llvm/include/llvm/IR/Statepoint.h b/llvm/include/llvm/IR/Statepoint.h
index da9c732ad818..ba8ffbbaf397 100644
--- a/llvm/include/llvm/IR/Statepoint.h
+++ b/llvm/include/llvm/IR/Statepoint.h
@@ -121,9 +121,8 @@ public:
/// Return the type of the value returned by the call underlying the
/// statepoint.
Type *getActualReturnType() const {
- auto *CalleeTy =
- getActualCalledOperand()->getType()->getPointerElementType();
- return cast<FunctionType>(CalleeTy)->getReturnType();
+ auto *FT = cast<FunctionType>(getParamElementType(CalledFunctionPos));
+ return FT->getReturnType();
}
diff --git a/llvm/include/llvm/IR/Type.h b/llvm/include/llvm/IR/Type.h
index e4e8a5529c87..51263c6b8fcc 100644
--- a/llvm/include/llvm/IR/Type.h
+++ b/llvm/include/llvm/IR/Type.h
@@ -68,13 +68,14 @@ public:
TokenTyID, ///< Tokens
// Derived types... see DerivedTypes.h file.
- IntegerTyID, ///< Arbitrary bit width integers
- FunctionTyID, ///< Functions
- PointerTyID, ///< Pointers
- StructTyID, ///< Structures
- ArrayTyID, ///< Arrays
- FixedVectorTyID, ///< Fixed width SIMD vector type
- ScalableVectorTyID ///< Scalable SIMD vector type
+ IntegerTyID, ///< Arbitrary bit width integers
+ FunctionTyID, ///< Functions
+ PointerTyID, ///< Pointers
+ StructTyID, ///< Structures
+ ArrayTyID, ///< Arrays
+ FixedVectorTyID, ///< Fixed width SIMD vector type
+ ScalableVectorTyID, ///< Scalable SIMD vector type
+ DXILPointerTyID, ///< DXIL typed pointer used by DirectX target
};
private:
@@ -368,6 +369,9 @@ public:
/// This method is deprecated without replacement. Pointer element types are
/// not available with opaque pointers.
+ [[deprecated("Deprecated without replacement, see "
+ "https://llvm.org/docs/OpaquePointers.html for context and "
+ "migration instructions")]]
Type *getPointerElementType() const {
return getNonOpaquePointerElementType();
}
diff --git a/llvm/include/llvm/IR/User.h b/llvm/include/llvm/IR/User.h
index 221bb5b2cb1c..a9cf60151e5d 100644
--- a/llvm/include/llvm/IR/User.h
+++ b/llvm/include/llvm/IR/User.h
@@ -304,8 +304,8 @@ public:
/// Replace uses of one Value with another.
///
/// Replaces all references to the "From" definition with references to the
- /// "To" definition.
- void replaceUsesOfWith(Value *From, Value *To);
+ /// "To" definition. Returns whether any uses were replaced.
+ bool replaceUsesOfWith(Value *From, Value *To);
// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const Value *V) {
diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def
index 1abcbb874a8d..1d639e8aeb01 100644
--- a/llvm/include/llvm/IR/VPIntrinsics.def
+++ b/llvm/include/llvm/IR/VPIntrinsics.def
@@ -54,6 +54,12 @@
#define END_REGISTER_VP_SDNODE(VPSD)
#endif
+// Helper macro to set up the mapping from VP intrinsic to ISD opcode.
+// Note: More than one VP intrinsic may map to one ISD opcode.
+#ifndef HELPER_MAP_VPID_TO_VPSD
+#define HELPER_MAP_VPID_TO_VPSD(VPID, VPSD)
+#endif
+
// Helper macros for the common "1:1 - Intrinsic : SDNode" case.
//
// There is one VP intrinsic that maps directly to one SDNode that goes by the
@@ -70,7 +76,8 @@
// the SDNode is used.
#define BEGIN_REGISTER_VP(VPID, MASKPOS, EVLPOS, VPSD, LEGALPOS) \
BEGIN_REGISTER_VP_INTRINSIC(VPID, MASKPOS, EVLPOS) \
- BEGIN_REGISTER_VP_SDNODE(VPSD, LEGALPOS, VPID, MASKPOS, EVLPOS)
+ BEGIN_REGISTER_VP_SDNODE(VPSD, LEGALPOS, VPID, MASKPOS, EVLPOS) \
+ HELPER_MAP_VPID_TO_VPSD(VPID, VPSD)
#define END_REGISTER_VP(VPID, VPSD) \
END_REGISTER_VP_INTRINSIC(VPID) \
@@ -121,6 +128,18 @@
#define VP_PROPERTY_BINARYOP
#endif
+// A property to infer VP type casts automatically.
+#ifndef VP_PROPERTY_CASTOP
+#define VP_PROPERTY_CASTOP
+#endif
+
+// This VP Intrinsic is a comparison operation
+// The condition code arg is at CCPOS and accepts floating-point condition
+// codes if ISFP is set, else it accepts integer condition codes.
+#ifndef VP_PROPERTY_CMP
+#define VP_PROPERTY_CMP(CCPOS, ISFP)
+#endif
+
/// } Property Macros
///// Integer Arithmetic {
@@ -211,22 +230,130 @@ HELPER_REGISTER_BINARY_FP_VP(frem, VP_FREM, FRem)
#undef HELPER_REGISTER_BINARY_FP_VP
+// llvm.vp.fneg(x,mask,vlen)
+BEGIN_REGISTER_VP(vp_fneg, 1, 2, VP_FNEG, -1)
+VP_PROPERTY_FUNCTIONAL_OPC(FNeg)
+END_REGISTER_VP(vp_fneg, VP_FNEG)
+
+// llvm.vp.fma(x,y,z,mask,vlen)
+BEGIN_REGISTER_VP(vp_fma, 3, 4, VP_FMA, -1)
+VP_PROPERTY_CONSTRAINEDFP(1, 1, experimental_constrained_fma)
+END_REGISTER_VP(vp_fma, VP_FMA)
+
///// } Floating-Point Arithmetic
+///// Type Casts {
+// Specialized helper macro for type conversions.
+// <operation>(%x, %mask, %evl).
+#ifdef HELPER_REGISTER_FP_CAST_VP
+#error \
+ "The internal helper macro HELPER_REGISTER_FP_CAST_VP is already defined!"
+#endif
+#define HELPER_REGISTER_FP_CAST_VP(OPSUFFIX, VPSD, IROPC, HASROUND) \
+ BEGIN_REGISTER_VP(vp_##OPSUFFIX, 1, 2, VPSD, -1) \
+ VP_PROPERTY_FUNCTIONAL_OPC(IROPC) \
+ VP_PROPERTY_CONSTRAINEDFP(HASROUND, 1, experimental_constrained_##OPSUFFIX) \
+ VP_PROPERTY_CASTOP \
+ END_REGISTER_VP(vp_##OPSUFFIX, VPSD)
+
+// llvm.vp.fptoui(x,mask,vlen)
+HELPER_REGISTER_FP_CAST_VP(fptoui, VP_FPTOUI, FPToUI, 0)
+
+// llvm.vp.fptosi(x,mask,vlen)
+HELPER_REGISTER_FP_CAST_VP(fptosi, VP_FPTOSI, FPToSI, 0)
+
+// llvm.vp.uitofp(x,mask,vlen)
+HELPER_REGISTER_FP_CAST_VP(uitofp, VP_UITOFP, UIToFP, 1)
+
+// llvm.vp.sitofp(x,mask,vlen)
+HELPER_REGISTER_FP_CAST_VP(sitofp, VP_SITOFP, SIToFP, 1)
+
+// llvm.vp.fptrunc(x,mask,vlen)
+HELPER_REGISTER_FP_CAST_VP(fptrunc, VP_FP_ROUND, FPTrunc, 1)
+
+// llvm.vp.fpext(x,mask,vlen)
+HELPER_REGISTER_FP_CAST_VP(fpext, VP_FP_EXTEND, FPExt, 0)
+
+#undef HELPER_REGISTER_FP_CAST_VP
+
+// Specialized helper macro for integer type conversions.
+// <operation>(%x, %mask, %evl).
+#ifdef HELPER_REGISTER_INT_CAST_VP
+#error \
+ "The internal helper macro HELPER_REGISTER_INT_CAST_VP is already defined!"
+#endif
+#define HELPER_REGISTER_INT_CAST_VP(OPSUFFIX, VPSD, IROPC) \
+ BEGIN_REGISTER_VP(vp_##OPSUFFIX, 1, 2, VPSD, -1) \
+ VP_PROPERTY_FUNCTIONAL_OPC(IROPC) \
+ VP_PROPERTY_CASTOP \
+ END_REGISTER_VP(vp_##OPSUFFIX, VPSD)
+
+// llvm.vp.trunc(x,mask,vlen)
+HELPER_REGISTER_INT_CAST_VP(trunc, VP_TRUNCATE, Trunc)
+
+// llvm.vp.zext(x,mask,vlen)
+HELPER_REGISTER_INT_CAST_VP(zext, VP_ZERO_EXTEND, ZExt)
+
+// llvm.vp.sext(x,mask,vlen)
+HELPER_REGISTER_INT_CAST_VP(sext, VP_SIGN_EXTEND, SExt)
+
+// llvm.vp.ptrtoint(x,mask,vlen)
+HELPER_REGISTER_INT_CAST_VP(ptrtoint, VP_PTRTOINT, PtrToInt)
+
+// llvm.vp.inttoptr(x,mask,vlen)
+HELPER_REGISTER_INT_CAST_VP(inttoptr, VP_INTTOPTR, IntToPtr)
+
+#undef HELPER_REGISTER_INT_CAST_VP
+
+///// } Type Casts
+
+///// Comparisons {
+
+// VP_SETCC (ISel only)
+BEGIN_REGISTER_VP_SDNODE(VP_SETCC, 0, vp_setcc, 3, 4)
+END_REGISTER_VP_SDNODE(VP_SETCC)
+
+// llvm.vp.fcmp(x,y,cc,mask,vlen)
+BEGIN_REGISTER_VP_INTRINSIC(vp_fcmp, 3, 4)
+HELPER_MAP_VPID_TO_VPSD(vp_fcmp, VP_SETCC)
+VP_PROPERTY_FUNCTIONAL_OPC(FCmp)
+VP_PROPERTY_CMP(2, true)
+VP_PROPERTY_CONSTRAINEDFP(0, 1, experimental_constrained_fcmp)
+END_REGISTER_VP_INTRINSIC(vp_fcmp)
+
+// llvm.vp.icmp(x,y,cc,mask,vlen)
+BEGIN_REGISTER_VP_INTRINSIC(vp_icmp, 3, 4)
+HELPER_MAP_VPID_TO_VPSD(vp_icmp, VP_SETCC)
+VP_PROPERTY_FUNCTIONAL_OPC(ICmp)
+VP_PROPERTY_CMP(2, false)
+END_REGISTER_VP_INTRINSIC(vp_icmp)
+
+///// } Comparisons
+
///// Memory Operations {
// llvm.vp.store(val,ptr,mask,vlen)
BEGIN_REGISTER_VP_INTRINSIC(vp_store, 2, 3)
// chain = VP_STORE chain,val,base,offset,mask,evl
BEGIN_REGISTER_VP_SDNODE(VP_STORE, 0, vp_store, 4, 5)
+HELPER_MAP_VPID_TO_VPSD(vp_store, VP_STORE)
VP_PROPERTY_FUNCTIONAL_OPC(Store)
VP_PROPERTY_FUNCTIONAL_INTRINSIC(masked_store)
VP_PROPERTY_MEMOP(1, 0)
END_REGISTER_VP(vp_store, VP_STORE)
+// llvm.experimental.vp.strided.store(val,ptr,stride,mask,vlen)
+BEGIN_REGISTER_VP_INTRINSIC(experimental_vp_strided_store, 3, 4)
+// chain = EXPERIMENTAL_VP_STRIDED_STORE chain,val,base,offset,stride,mask,evl
+BEGIN_REGISTER_VP_SDNODE(EXPERIMENTAL_VP_STRIDED_STORE, 0, experimental_vp_strided_store, 5, 6)
+HELPER_MAP_VPID_TO_VPSD(experimental_vp_strided_store, EXPERIMENTAL_VP_STRIDED_STORE)
+VP_PROPERTY_MEMOP(1, 0)
+END_REGISTER_VP(experimental_vp_strided_store, EXPERIMENTAL_VP_STRIDED_STORE)
+
// llvm.vp.scatter(ptr,val,mask,vlen)
BEGIN_REGISTER_VP_INTRINSIC(vp_scatter, 2, 3)
// chain = VP_SCATTER chain,val,base,indices,scale,mask,evl
BEGIN_REGISTER_VP_SDNODE(VP_SCATTER, -1, vp_scatter, 5, 6)
+HELPER_MAP_VPID_TO_VPSD(vp_scatter, VP_SCATTER)
VP_PROPERTY_FUNCTIONAL_INTRINSIC(masked_scatter)
VP_PROPERTY_MEMOP(1, 0)
END_REGISTER_VP(vp_scatter, VP_SCATTER)
@@ -235,15 +362,25 @@ END_REGISTER_VP(vp_scatter, VP_SCATTER)
BEGIN_REGISTER_VP_INTRINSIC(vp_load, 1, 2)
// val,chain = VP_LOAD chain,base,offset,mask,evl
BEGIN_REGISTER_VP_SDNODE(VP_LOAD, -1, vp_load, 3, 4)
+HELPER_MAP_VPID_TO_VPSD(vp_load, VP_LOAD)
VP_PROPERTY_FUNCTIONAL_OPC(Load)
VP_PROPERTY_FUNCTIONAL_INTRINSIC(masked_load)
VP_PROPERTY_MEMOP(0, None)
END_REGISTER_VP(vp_load, VP_LOAD)
+// llvm.experimental.vp.strided.load(ptr,stride,mask,vlen)
+BEGIN_REGISTER_VP_INTRINSIC(experimental_vp_strided_load, 2, 3)
+// chain = EXPERIMENTAL_VP_STRIDED_LOAD chain,base,offset,stride,mask,evl
+BEGIN_REGISTER_VP_SDNODE(EXPERIMENTAL_VP_STRIDED_LOAD, -1, experimental_vp_strided_load, 4, 5)
+HELPER_MAP_VPID_TO_VPSD(experimental_vp_strided_load, EXPERIMENTAL_VP_STRIDED_LOAD)
+VP_PROPERTY_MEMOP(0, None)
+END_REGISTER_VP(experimental_vp_strided_load, EXPERIMENTAL_VP_STRIDED_LOAD)
+
// llvm.vp.gather(ptr,mask,vlen)
BEGIN_REGISTER_VP_INTRINSIC(vp_gather, 1, 2)
// val,chain = VP_GATHER chain,base,indices,scale,mask,evl
BEGIN_REGISTER_VP_SDNODE(VP_GATHER, -1, vp_gather, 4, 5)
+HELPER_MAP_VPID_TO_VPSD(vp_gather, VP_GATHER)
VP_PROPERTY_FUNCTIONAL_INTRINSIC(masked_gather)
VP_PROPERTY_MEMOP(0, None)
END_REGISTER_VP(vp_gather, VP_GATHER)
@@ -313,6 +450,8 @@ HELPER_REGISTER_REDUCTION_VP(vp_reduce_fmin, VP_REDUCE_FMIN,
// sequential and reassociative. These manifest as the presence of 'reassoc'
// fast-math flags in the IR and as two distinct ISD opcodes in the
// SelectionDAG.
+// Note we by default map from the VP intrinsic to the SEQ ISD opcode, which
+// can then be relaxed to the non-SEQ ISD opcode if the 'reassoc' flag is set.
#ifdef HELPER_REGISTER_REDUCTION_SEQ_VP
#error \
"The internal helper macro HELPER_REGISTER_REDUCTION_SEQ_VP is already defined!"
@@ -323,6 +462,7 @@ HELPER_REGISTER_REDUCTION_VP(vp_reduce_fmin, VP_REDUCE_FMIN,
VP_PROPERTY_REDUCTION(0, 1) \
END_REGISTER_VP_SDNODE(VPSD) \
BEGIN_REGISTER_VP_SDNODE(SEQ_VPSD, -1, VPID, 2, 3) \
+ HELPER_MAP_VPID_TO_VPSD(VPID, SEQ_VPSD) \
VP_PROPERTY_REDUCTION(0, 1) \
END_REGISTER_VP_SDNODE(SEQ_VPSD) \
VP_PROPERTY_FUNCTIONAL_INTRINSIC(INTRIN) \
@@ -344,13 +484,18 @@ HELPER_REGISTER_REDUCTION_SEQ_VP(vp_reduce_fmul, VP_REDUCE_FMUL,
///// Shuffles {
-// llvm.vp.select(mask,on_true,on_false,vlen)
-BEGIN_REGISTER_VP(vp_select, 0, 3, VP_SELECT, -1)
+// The mask 'cond' operand of llvm.vp.select and llvm.vp.merge are not reported
+// as masks with the BEGIN_REGISTER_VP_* macros. This is because, unlike other
+// VP intrinsics, these two have a defined result on lanes where the mask is
+// false.
+//
+// llvm.vp.select(cond,on_true,on_false,vlen)
+BEGIN_REGISTER_VP(vp_select, None, 3, VP_SELECT, -1)
VP_PROPERTY_FUNCTIONAL_OPC(Select)
END_REGISTER_VP(vp_select, VP_SELECT)
-// llvm.vp.merge(mask,on_true,on_false,pivot)
-BEGIN_REGISTER_VP(vp_merge, 0, 3, VP_MERGE, -1)
+// llvm.vp.merge(cond,on_true,on_false,pivot)
+BEGIN_REGISTER_VP(vp_merge, None, 3, VP_MERGE, -1)
END_REGISTER_VP(vp_merge, VP_MERGE)
BEGIN_REGISTER_VP(experimental_vp_splice, 3, 5, EXPERIMENTAL_VP_SPLICE, -1)
@@ -364,7 +509,10 @@ END_REGISTER_VP(experimental_vp_splice, EXPERIMENTAL_VP_SPLICE)
#undef END_REGISTER_VP
#undef END_REGISTER_VP_INTRINSIC
#undef END_REGISTER_VP_SDNODE
+#undef HELPER_MAP_VPID_TO_VPSD
#undef VP_PROPERTY_BINARYOP
+#undef VP_PROPERTY_CASTOP
+#undef VP_PROPERTY_CMP
#undef VP_PROPERTY_CONSTRAINEDFP
#undef VP_PROPERTY_FUNCTIONAL_INTRINSIC
#undef VP_PROPERTY_FUNCTIONAL_OPC
diff --git a/llvm/include/llvm/IR/ValueMap.h b/llvm/include/llvm/IR/ValueMap.h
index 67f275cc06d9..a4b6091cf115 100644
--- a/llvm/include/llvm/IR/ValueMap.h
+++ b/llvm/include/llvm/IR/ValueMap.h
@@ -104,8 +104,8 @@ public:
: Map(NumInitBuckets), Data() {}
explicit ValueMap(const ExtraData &Data, unsigned NumInitBuckets = 64)
: Map(NumInitBuckets), Data(Data) {}
- // ValueMap can't be copied nor moved, beucase the callbacks store pointer
- // to it.
+ // ValueMap can't be copied nor moved, because the callbacks store pointer to
+ // it.
ValueMap(const ValueMap &) = delete;
ValueMap(ValueMap &&) = delete;
ValueMap &operator=(const ValueMap &) = delete;
@@ -141,7 +141,7 @@ public:
size_type size() const { return Map.size(); }
/// Grow the map so that it has at least Size buckets. Does not shrink
- void resize(size_t Size) { Map.resize(Size); }
+ void reserve(size_t Size) { Map.reserve(Size); }
void clear() {
Map.clear();
diff --git a/llvm/include/llvm/IR/VectorBuilder.h b/llvm/include/llvm/IR/VectorBuilder.h
new file mode 100644
index 000000000000..301edaed70fe
--- /dev/null
+++ b/llvm/include/llvm/IR/VectorBuilder.h
@@ -0,0 +1,99 @@
+//===- llvm/VectorBuilder.h - Builder for VP Intrinsics ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the VectorBuilder class, which is used as a convenient way
+// to create VP intrinsics as if they were LLVM instructions with a consistent
+// and simplified interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_VECTORBUILDER_H
+#define LLVM_IR_VECTORBUILDER_H
+
+#include <llvm/IR/IRBuilder.h>
+#include <llvm/IR/InstrTypes.h>
+#include <llvm/IR/Instruction.h>
+#include <llvm/IR/Value.h>
+
+namespace llvm {
+
+class VectorBuilder {
+public:
+ enum class Behavior {
+ // Abort if the requested VP intrinsic could not be created.
+ // This is useful for strict consistency.
+ ReportAndAbort = 0,
+
+ // Return a default-initialized value if the requested VP intrinsic could
+ // not be created.
+ // This is useful for a defensive fallback to non-VP code.
+ SilentlyReturnNone = 1,
+ };
+
+private:
+ IRBuilderBase &Builder;
+ Behavior ErrorHandling;
+
+ // Explicit mask parameter.
+ Value *Mask;
+ // Explicit vector length parameter.
+ Value *ExplicitVectorLength;
+ // Compile-time vector length.
+ ElementCount StaticVectorLength;
+
+ // Get mask/evl value handles for the current configuration.
+ Value &requestMask();
+ Value &requestEVL();
+
+ void handleError(const char *ErrorMsg) const;
+ template <typename RetType>
+ RetType returnWithError(const char *ErrorMsg) const {
+ handleError(ErrorMsg);
+ return RetType();
+ }
+
+public:
+ VectorBuilder(IRBuilderBase &Builder,
+ Behavior ErrorHandling = Behavior::ReportAndAbort)
+ : Builder(Builder), ErrorHandling(ErrorHandling), Mask(nullptr),
+ ExplicitVectorLength(nullptr),
+ StaticVectorLength(ElementCount::getFixed(0)) {}
+
+ Module &getModule() const;
+ LLVMContext &getContext() const { return Builder.getContext(); }
+
+ // All-true mask for the currently configured explicit vector length.
+ Value *getAllTrueMask();
+
+ VectorBuilder &setMask(Value *NewMask) {
+ Mask = NewMask;
+ return *this;
+ }
+ VectorBuilder &setEVL(Value *NewExplicitVectorLength) {
+ ExplicitVectorLength = NewExplicitVectorLength;
+ return *this;
+ }
+ VectorBuilder &setStaticVL(unsigned NewFixedVL) {
+ StaticVectorLength = ElementCount::getFixed(NewFixedVL);
+ return *this;
+ }
+ // TODO: setStaticVL(ElementCount) for scalable types.
+
+ // Emit a VP intrinsic call that mimics a regular instruction.
+ // This operation behaves according to the VectorBuilderBehavior.
+ // \p Opcode The functional instruction opcode of the emitted intrinsic.
+ // \p ReturnTy The return type of the operation.
+ // \p VecOpArray The operand list.
+ Value *createVectorInstruction(unsigned Opcode, Type *ReturnTy,
+ ArrayRef<Value *> VecOpArray,
+ const Twine &Name = Twine());
+};
+
+} // namespace llvm
+
+#endif // LLVM_IR_VECTORBUILDER_H
diff --git a/llvm/include/llvm/IRReader/IRReader.h b/llvm/include/llvm/IRReader/IRReader.h
index a14e46e2edc8..3f2a01fdc54a 100644
--- a/llvm/include/llvm/IRReader/IRReader.h
+++ b/llvm/include/llvm/IRReader/IRReader.h
@@ -14,7 +14,9 @@
#ifndef LLVM_IRREADER_IRREADER_H
#define LLVM_IRREADER_IRREADER_H
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLForwardCompat.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/ADT/StringRef.h"
#include <memory>
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 489ef045796f..77f2c6330788 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -48,9 +48,6 @@ void initializeInstrumentation(PassRegistry&);
/// Initialize all passes linked into the Analysis library.
void initializeAnalysis(PassRegistry&);
-/// Initialize all passes linked into the Coroutines library.
-void initializeCoroutines(PassRegistry&);
-
/// Initialize all passes linked into the CodeGen library.
void initializeCodeGen(PassRegistry&);
@@ -65,9 +62,6 @@ void initializeAAResultsWrapperPassPass(PassRegistry&);
void initializeADCELegacyPassPass(PassRegistry&);
void initializeAddDiscriminatorsLegacyPassPass(PassRegistry&);
void initializeAddFSDiscriminatorsPass(PassRegistry &);
-void initializeModuleAddressSanitizerLegacyPassPass(PassRegistry &);
-void initializeASanGlobalsMetadataWrapperPassPass(PassRegistry &);
-void initializeAddressSanitizerLegacyPassPass(PassRegistry &);
void initializeAggressiveInstCombinerLegacyPassPass(PassRegistry&);
void initializeAliasSetPrinterPass(PassRegistry&);
void initializeAlignmentFromAssumptionsPass(PassRegistry&);
@@ -77,11 +71,11 @@ void initializeAssumeBuilderPassLegacyPassPass(PassRegistry &);
void initializeAnnotation2MetadataLegacyPass(PassRegistry &);
void initializeAnnotationRemarksLegacyPass(PassRegistry &);
void initializeOpenMPOptCGSCCLegacyPassPass(PassRegistry &);
-void initializeArgPromotionPass(PassRegistry&);
void initializeAssumptionCacheTrackerPass(PassRegistry&);
void initializeAtomicExpandPass(PassRegistry&);
void initializeAttributorLegacyPassPass(PassRegistry&);
void initializeAttributorCGSCCLegacyPassPass(PassRegistry &);
+void initializeBasicBlockSectionsProfileReaderPass(PassRegistry &);
void initializeBasicBlockSectionsPass(PassRegistry &);
void initializeBDCELegacyPassPass(PassRegistry&);
void initializeBarrierNoopPass(PassRegistry&);
@@ -103,6 +97,7 @@ void initializeCFGSimplifyPassPass(PassRegistry&);
void initializeCFGuardPass(PassRegistry&);
void initializeCFGuardLongjmpPass(PassRegistry&);
void initializeCFGViewerLegacyPassPass(PassRegistry&);
+void initializeCFIFixupPass(PassRegistry&);
void initializeCFIInstrInserterPass(PassRegistry&);
void initializeCFLAndersAAWrapperPassPass(PassRegistry&);
void initializeCFLSteensAAWrapperPassPass(PassRegistry&);
@@ -137,10 +132,10 @@ void initializeDependenceAnalysisPass(PassRegistry&);
void initializeDependenceAnalysisWrapperPassPass(PassRegistry&);
void initializeDetectDeadLanesPass(PassRegistry&);
void initializeDivRemPairsLegacyPassPass(PassRegistry&);
-void initializeDomOnlyPrinterPass(PassRegistry&);
-void initializeDomOnlyViewerPass(PassRegistry&);
-void initializeDomPrinterPass(PassRegistry&);
-void initializeDomViewerPass(PassRegistry&);
+void initializeDomOnlyPrinterWrapperPassPass(PassRegistry &);
+void initializeDomOnlyViewerWrapperPassPass(PassRegistry &);
+void initializeDomPrinterWrapperPassPass(PassRegistry &);
+void initializeDomViewerWrapperPassPass(PassRegistry &);
void initializeDominanceFrontierWrapperPassPass(PassRegistry&);
void initializeDominatorTreeWrapperPassPass(PassRegistry&);
void initializeDwarfEHPrepareLegacyPassPass(PassRegistry &);
@@ -174,7 +169,6 @@ void initializeFunctionImportLegacyPassPass(PassRegistry&);
void initializeFunctionSpecializationLegacyPassPass(PassRegistry &);
void initializeGCMachineCodeAnalysisPass(PassRegistry&);
void initializeGCModuleInfoPass(PassRegistry&);
-void initializeGCOVProfilerLegacyPassPass(PassRegistry&);
void initializeGVNHoistLegacyPassPass(PassRegistry&);
void initializeGVNLegacyPassPass(PassRegistry&);
void initializeGVNSinkLegacyPassPass(PassRegistry&);
@@ -188,7 +182,6 @@ void initializeHardwareLoopsPass(PassRegistry&);
void initializeMIRProfileLoaderPassPass(PassRegistry &);
void initializeMemProfilerLegacyPassPass(PassRegistry &);
void initializeHotColdSplittingLegacyPassPass(PassRegistry&);
-void initializeHWAddressSanitizerLegacyPassPass(PassRegistry &);
void initializeIPSCCPLegacyPassPass(PassRegistry&);
void initializeIRCELegacyPassPass(PassRegistry&);
void initializeIROutlinerLegacyPassPass(PassRegistry&);
@@ -215,6 +208,7 @@ void initializeInterleavedAccessPass(PassRegistry&);
void initializeInterleavedLoadCombinePass(PassRegistry &);
void initializeInternalizeLegacyPassPass(PassRegistry&);
void initializeIntervalPartitionPass(PassRegistry&);
+void initializeJMCInstrumenterPass(PassRegistry&);
void initializeJumpThreadingPass(PassRegistry&);
void initializeLCSSAVerificationPassPass(PassRegistry&);
void initializeLCSSAWrapperPassPass(PassRegistry&);
@@ -273,6 +267,7 @@ void initializeLowerAtomicLegacyPassPass(PassRegistry&);
void initializeLowerConstantIntrinsicsPass(PassRegistry&);
void initializeLowerEmuTLSPass(PassRegistry&);
void initializeLowerExpectIntrinsicPass(PassRegistry&);
+void initializeLowerGlobalDtorsLegacyPassPass(PassRegistry &);
void initializeLowerGuardIntrinsicLegacyPassPass(PassRegistry&);
void initializeLowerWidenableConditionLegacyPassPass(PassRegistry&);
void initializeLowerIntrinsicsPass(PassRegistry&);
@@ -316,7 +311,6 @@ void initializeMemDerefPrinterPass(PassRegistry&);
void initializeMemoryDependenceWrapperPassPass(PassRegistry&);
void initializeMemorySSAPrinterLegacyPassPass(PassRegistry&);
void initializeMemorySSAWrapperPassPass(PassRegistry&);
-void initializeMemorySanitizerLegacyPassPass(PassRegistry&);
void initializeMergeFunctionsLegacyPassPass(PassRegistry&);
void initializeMergeICmpsLegacyPassPass(PassRegistry &);
void initializeMergedLoadStoreMotionLegacyPassPass(PassRegistry&);
@@ -339,11 +333,6 @@ void initializeOptimizationRemarkEmitterWrapperPassPass(PassRegistry&);
void initializeOptimizePHIsPass(PassRegistry&);
void initializePAEvalPass(PassRegistry&);
void initializePEIPass(PassRegistry&);
-void initializePGOIndirectCallPromotionLegacyPassPass(PassRegistry&);
-void initializePGOInstrumentationGenLegacyPassPass(PassRegistry&);
-void initializePGOInstrumentationUseLegacyPassPass(PassRegistry&);
-void initializePGOInstrumentationGenCreateVarLegacyPassPass(PassRegistry&);
-void initializePGOMemOPSizeOptLegacyPassPass(PassRegistry&);
void initializePHIEliminationPass(PassRegistry&);
void initializePartialInlinerLegacyPassPass(PassRegistry&);
void initializePartiallyInlineLibCallsLegacyPassPass(PassRegistry&);
@@ -353,10 +342,10 @@ void initializePhiValuesWrapperPassPass(PassRegistry&);
void initializePhysicalRegisterUsageInfoPass(PassRegistry&);
void initializePlaceBackedgeSafepointsImplPass(PassRegistry&);
void initializePlaceSafepointsPass(PassRegistry&);
-void initializePostDomOnlyPrinterPass(PassRegistry&);
-void initializePostDomOnlyViewerPass(PassRegistry&);
-void initializePostDomPrinterPass(PassRegistry&);
-void initializePostDomViewerPass(PassRegistry&);
+void initializePostDomOnlyPrinterWrapperPassPass(PassRegistry &);
+void initializePostDomOnlyViewerWrapperPassPass(PassRegistry &);
+void initializePostDomPrinterWrapperPassPass(PassRegistry &);
+void initializePostDomViewerWrapperPassPass(PassRegistry &);
void initializePostDominatorTreeWrapperPassPass(PassRegistry&);
void initializePostInlineEntryExitInstrumenterPass(PassRegistry&);
void initializePostMachineSchedulerPass(PassRegistry&);
@@ -405,6 +394,7 @@ void initializeSROALegacyPassPass(PassRegistry&);
void initializeSafeStackLegacyPassPass(PassRegistry&);
void initializeSafepointIRVerifierPass(PassRegistry&);
void initializeSampleProfileLoaderLegacyPassPass(PassRegistry&);
+void initializeSelectOptimizePass(PassRegistry &);
void initializeModuleSanitizerCoverageLegacyPassPass(PassRegistry &);
void initializeScalarEvolutionWrapperPassPass(PassRegistry&);
void initializeScalarizeMaskedMemIntrinLegacyPassPass(PassRegistry &);
@@ -443,7 +433,7 @@ void initializeTailDuplicatePass(PassRegistry&);
void initializeTargetLibraryInfoWrapperPassPass(PassRegistry&);
void initializeTargetPassConfigPass(PassRegistry&);
void initializeTargetTransformInfoWrapperPassPass(PassRegistry&);
-void initializeThreadSanitizerLegacyPassPass(PassRegistry&);
+void initializeTLSVariableHoistLegacyPassPass(PassRegistry &);
void initializeTwoAddressInstructionPassPass(PassRegistry&);
void initializeTypeBasedAAWrapperPassPass(PassRegistry&);
void initializeTypePromotionPass(PassRegistry&);
diff --git a/llvm/include/llvm/InterfaceStub/ELFObjHandler.h b/llvm/include/llvm/InterfaceStub/ELFObjHandler.h
index 20a02c6d5445..c15838c4ae0a 100644
--- a/llvm/include/llvm/InterfaceStub/ELFObjHandler.h
+++ b/llvm/include/llvm/InterfaceStub/ELFObjHandler.h
@@ -13,16 +13,15 @@
#ifndef LLVM_INTERFACESTUB_ELFOBJHANDLER_H
#define LLVM_INTERFACESTUB_ELFOBJHANDLER_H
-#include "llvm/InterfaceStub/IFSStub.h"
-#include "llvm/Object/ELFObjectFile.h"
-#include "llvm/Object/ELFTypes.h"
-#include "llvm/Support/FileSystem.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MemoryBufferRef.h"
+#include <memory>
namespace llvm {
-class MemoryBuffer;
-
namespace ifs {
+struct IFSStub;
/// Attempt to read a binary ELF file from a MemoryBuffer.
Expected<std::unique_ptr<IFSStub>> readELFFile(MemoryBufferRef Buf);
diff --git a/llvm/include/llvm/InterfaceStub/IFSHandler.h b/llvm/include/llvm/InterfaceStub/IFSHandler.h
index 6ae6a421318e..bfa5692811d7 100644
--- a/llvm/include/llvm/InterfaceStub/IFSHandler.h
+++ b/llvm/include/llvm/InterfaceStub/IFSHandler.h
@@ -19,6 +19,8 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/VersionTuple.h"
#include <memory>
+#include <string>
+#include <vector>
namespace llvm {
@@ -51,8 +53,8 @@ Error validateIFSTarget(IFSStub &Stub, bool ParseTriple);
void stripIFSTarget(IFSStub &Stub, bool StripTriple, bool StripArch,
bool StripEndianness, bool StripBitWidth);
-/// Strips symbols from IFS symbol table that are undefined.
-void stripIFSUndefinedSymbols(IFSStub &Stub);
+Error filterIFSSyms(IFSStub &Stub, bool StripUndefined,
+ const std::vector<std::string> &Exclude = {});
/// Parse llvm triple string into a IFSTarget struct.
IFSTarget parseTriple(StringRef TripleStr);
diff --git a/llvm/include/llvm/InterfaceStub/IFSStub.h b/llvm/include/llvm/InterfaceStub/IFSStub.h
index 8c3cd171b1a2..0f935cd478d5 100644
--- a/llvm/include/llvm/InterfaceStub/IFSStub.h
+++ b/llvm/include/llvm/InterfaceStub/IFSStub.h
@@ -14,9 +14,8 @@
#ifndef LLVM_INTERFACESTUB_IFSSTUB_H
#define LLVM_INTERFACESTUB_IFSSTUB_H
-#include "llvm/Support/Error.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/Support/VersionTuple.h"
-#include <set>
#include <vector>
namespace llvm {
@@ -54,7 +53,7 @@ struct IFSSymbol {
IFSSymbol() = default;
explicit IFSSymbol(std::string SymbolName) : Name(std::move(SymbolName)) {}
std::string Name;
- uint64_t Size;
+ Optional<uint64_t> Size;
IFSSymbolType Type;
bool Undefined;
bool Weak;
diff --git a/llvm/include/llvm/LTO/Config.h b/llvm/include/llvm/LTO/Config.h
index eb793d62907e..54bb82d84d96 100644
--- a/llvm/include/llvm/LTO/Config.h
+++ b/llvm/include/llvm/LTO/Config.h
@@ -57,8 +57,8 @@ struct Config {
unsigned OptLevel = 2;
bool DisableVerify = false;
- /// Use the new pass manager
- bool UseNewPM = LLVM_ENABLE_NEW_PASS_MANAGER;
+ /// Use the standard optimization pipeline.
+ bool UseDefaultPipeline = false;
/// Flag to indicate that the optimizer should not assume builtins are present
/// on the target.
@@ -177,6 +177,10 @@ struct Config {
/// Add FSAFDO discriminators.
bool AddFSDiscriminator = false;
+ /// Use opaque pointer types. Used to call LLVMContext::setOpaquePointers
+ /// unless already set by the `-opaque-pointers` commandline option.
+ bool OpaquePointers = true;
+
/// If this field is set, LTO will write input file paths and symbol
/// resolutions here in llvm-lto2 command line flag format. This can be
/// used for testing and for running the LTO pipeline outside of the linker
@@ -288,6 +292,8 @@ struct LTOLLVMContext : LLVMContext {
enableDebugTypeODRUniquing();
setDiagnosticHandler(
std::make_unique<LTOLLVMDiagnosticHandler>(&DiagHandler), true);
+ if (!hasSetOpaquePointersValue())
+ setOpaquePointers(C.OpaquePointers);
}
DiagnosticHandlerFunction DiagHandler;
};
diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h
index 0d085a88a193..ea52226dca16 100644
--- a/llvm/include/llvm/LTO/LTO.h
+++ b/llvm/include/llvm/LTO/LTO.h
@@ -197,7 +197,17 @@ using ThinBackend = std::function<std::unique_ptr<ThinBackendProc>(
/// This ThinBackend runs the individual backend jobs in-process.
/// The default value means to use one job per hardware core (not hyper-thread).
-ThinBackend createInProcessThinBackend(ThreadPoolStrategy Parallelism);
+/// OnWrite is callback which receives module identifier and notifies LTO user
+/// that index file for the module (and optionally imports file) was created.
+/// ShouldEmitIndexFiles being true will write sharded ThinLTO index files
+/// to the same path as the input module, with suffix ".thinlto.bc"
+/// ShouldEmitImportsFiles is true it also writes a list of imported files to a
+/// similar path with ".imports" appended instead.
+using IndexWriteCallback = std::function<void(const std::string &)>;
+ThinBackend createInProcessThinBackend(ThreadPoolStrategy Parallelism,
+ IndexWriteCallback OnWrite = nullptr,
+ bool ShouldEmitIndexFiles = false,
+ bool ShouldEmitImportsFiles = false);
/// This ThinBackend writes individual module indexes to files, instead of
/// running the individual backend jobs. This backend is for distributed builds
@@ -212,7 +222,6 @@ ThinBackend createInProcessThinBackend(ThreadPoolStrategy Parallelism);
/// the final ThinLTO linking. Can be nullptr.
/// OnWrite is callback which receives module identifier and notifies LTO user
/// that index file for the module (and optionally imports file) was created.
-using IndexWriteCallback = std::function<void(const std::string &)>;
ThinBackend createWriteIndexesThinBackend(std::string OldPrefix,
std::string NewPrefix,
bool ShouldEmitImportsFiles,
diff --git a/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h b/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h
index 333f483f29c5..96f82a9276e0 100644
--- a/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h
+++ b/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h
@@ -184,7 +184,7 @@ struct LTOCodeGenerator {
void setDisableVerify(bool Value) { Config.DisableVerify = Value; }
- void setUseNewPM(bool Value) { Config.UseNewPM = Value; }
+ void setDebugPassManager(bool Enabled) { Config.DebugPassManager = Enabled; }
void setDiagnosticHandler(lto_diagnostic_handler_t, void *);
diff --git a/llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h b/llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h
index be1f3154029c..ab40d88af8c1 100644
--- a/llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h
+++ b/llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h
@@ -225,9 +225,6 @@ public:
OptLevel = (NewOptLevel > 3) ? 3 : NewOptLevel;
}
- /// Enable or disable the new pass manager.
- void setUseNewPM(unsigned Enabled) { UseNewPM = Enabled; }
-
/// Enable or disable debug output for the new pass manager.
void setDebugPassManager(unsigned Enabled) { DebugPassManager = Enabled; }
@@ -347,10 +344,6 @@ private:
/// IR Optimization Level [0-3].
unsigned OptLevel = 3;
- /// Flag to indicate whether the new pass manager should be used for IR
- /// optimizations.
- bool UseNewPM = LLVM_ENABLE_NEW_PASS_MANAGER;
-
/// Flag to indicate whether debug output should be enabled for the new pass
/// manager.
bool DebugPassManager = false;
diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h
index c8b9aaeed76a..af5926dcb38b 100644
--- a/llvm/include/llvm/LinkAllPasses.h
+++ b/llvm/include/llvm/LinkAllPasses.h
@@ -75,7 +75,6 @@ namespace {
(void) llvm::createAggressiveInstCombinerPass();
(void) llvm::createBitTrackingDCEPass();
(void)llvm::createOpenMPOptCGSCCLegacyPass();
- (void) llvm::createArgumentPromotionPass();
(void) llvm::createAlignmentFromAssumptionsPass();
(void) llvm::createBasicAAWrapperPass();
(void) llvm::createSCEVAAWrapperPass();
@@ -98,16 +97,10 @@ namespace {
(void) llvm::createDeadCodeEliminationPass();
(void) llvm::createDeadStoreEliminationPass();
(void) llvm::createDependenceAnalysisWrapperPass();
- (void) llvm::createDomOnlyPrinterPass();
- (void) llvm::createDomPrinterPass();
- (void) llvm::createDomOnlyViewerPass();
- (void) llvm::createDomViewerPass();
- (void) llvm::createGCOVProfilerPass();
- (void) llvm::createPGOInstrumentationGenLegacyPass();
- (void) llvm::createPGOInstrumentationUseLegacyPass();
- (void) llvm::createPGOInstrumentationGenCreateVarLegacyPass();
- (void) llvm::createPGOIndirectCallPromotionLegacyPass();
- (void) llvm::createPGOMemOPSizeOptLegacyPass();
+ (void) llvm::createDomOnlyPrinterWrapperPassPass();
+ (void) llvm::createDomPrinterWrapperPassPass();
+ (void) llvm::createDomOnlyViewerWrapperPassPass();
+ (void) llvm::createDomViewerWrapperPassPass();
(void) llvm::createInstrProfilingLegacyPass();
(void) llvm::createFunctionImportPass();
(void) llvm::createFunctionInliningPass();
@@ -123,6 +116,7 @@ namespace {
(void) llvm::createInstSimplifyLegacyPass();
(void) llvm::createInstructionCombiningPass();
(void) llvm::createInternalizePass();
+ (void) llvm::createJMCInstrumenterPass();
(void) llvm::createLCSSAPass();
(void) llvm::createLegacyDivergenceAnalysisPass();
(void) llvm::createLICMPass();
@@ -138,12 +132,12 @@ namespace {
(void) llvm::createLoopRerollPass();
(void) llvm::createLoopUnrollPass();
(void) llvm::createLoopUnrollAndJamPass();
- (void) llvm::createLoopUnswitchPass();
(void) llvm::createLoopVersioningLICMPass();
(void) llvm::createLoopIdiomPass();
(void) llvm::createLoopRotatePass();
(void) llvm::createLowerConstantIntrinsicsPass();
(void) llvm::createLowerExpectIntrinsicPass();
+ (void) llvm::createLowerGlobalDtorsLegacyPass();
(void) llvm::createLowerInvokePass();
(void) llvm::createLowerSwitchPass();
(void) llvm::createNaryReassociatePass();
@@ -156,10 +150,10 @@ namespace {
(void) llvm::createPromoteMemoryToRegisterPass();
(void) llvm::createDemoteRegisterToMemoryPass();
(void) llvm::createPruneEHPass();
- (void) llvm::createPostDomOnlyPrinterPass();
- (void) llvm::createPostDomPrinterPass();
- (void) llvm::createPostDomOnlyViewerPass();
- (void) llvm::createPostDomViewerPass();
+ (void)llvm::createPostDomOnlyPrinterWrapperPassPass();
+ (void)llvm::createPostDomPrinterWrapperPassPass();
+ (void)llvm::createPostDomOnlyViewerWrapperPassPass();
+ (void)llvm::createPostDomViewerWrapperPassPass();
(void) llvm::createReassociatePass();
(void) llvm::createRedundantDbgInstEliminationPass();
(void) llvm::createRegionInfoPass();
@@ -176,6 +170,7 @@ namespace {
(void) llvm::createStripDeadDebugInfoPass();
(void) llvm::createStripDeadPrototypesPass();
(void) llvm::createTailCallEliminationPass();
+ (void)llvm::createTLSVariableHoistPass();
(void) llvm::createJumpThreadingPass();
(void) llvm::createDFAJumpThreadingPass();
(void) llvm::createUnifyFunctionExitNodesPass();
@@ -236,6 +231,7 @@ namespace {
(void) llvm::createUnifyLoopExitsPass();
(void) llvm::createFixIrreduciblePass();
(void)llvm::createFunctionSpecializationPass();
+ (void)llvm::createSelectOptimizePass();
(void)new llvm::IntervalPartition();
(void)new llvm::ScalarEvolutionWrapperPass();
diff --git a/llvm/include/llvm/Linker/IRMover.h b/llvm/include/llvm/Linker/IRMover.h
index e5df83f01fe3..1e3c5394ffa2 100644
--- a/llvm/include/llvm/Linker/IRMover.h
+++ b/llvm/include/llvm/Linker/IRMover.h
@@ -11,6 +11,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/FunctionExtras.h"
#include <functional>
namespace llvm {
@@ -62,6 +63,8 @@ public:
IRMover(Module &M);
typedef std::function<void(GlobalValue &)> ValueAdder;
+ using LazyCallback =
+ llvm::unique_function<void(GlobalValue &GV, ValueAdder Add)>;
/// Move in the provide values in \p ValuesToLink from \p Src.
///
@@ -70,11 +73,11 @@ public:
/// not present in ValuesToLink. The GlobalValue and a ValueAdder callback
/// are passed as an argument, and the callback is expected to be called
/// if the GlobalValue needs to be added to the \p ValuesToLink and linked.
+ /// Pass nullptr if there's no work to be done in such cases.
/// - \p IsPerformingImport is true when this IR link is to perform ThinLTO
/// function importing from Src.
Error move(std::unique_ptr<Module> Src, ArrayRef<GlobalValue *> ValuesToLink,
- std::function<void(GlobalValue &GV, ValueAdder Add)> AddLazyFor,
- bool IsPerformingImport);
+ LazyCallback AddLazyFor, bool IsPerformingImport);
Module &getModule() { return Composite; }
private:
diff --git a/llvm/include/llvm/MC/ConstantPools.h b/llvm/include/llvm/MC/ConstantPools.h
index 9fe0cce8d68c..7eac75362eff 100644
--- a/llvm/include/llvm/MC/ConstantPools.h
+++ b/llvm/include/llvm/MC/ConstantPools.h
@@ -43,7 +43,8 @@ struct ConstantPoolEntry {
class ConstantPool {
using EntryVecTy = SmallVector<ConstantPoolEntry, 4>;
EntryVecTy Entries;
- std::map<int64_t, const MCSymbolRefExpr *> CachedEntries;
+ std::map<int64_t, const MCSymbolRefExpr *> CachedConstantEntries;
+ DenseMap<const MCSymbol *, const MCSymbolRefExpr *> CachedSymbolEntries;
public:
// Initialize a new empty constant pool
diff --git a/llvm/include/llvm/MC/MCAsmBackend.h b/llvm/include/llvm/MC/MCAsmBackend.h
index bb57c3453d10..a5e7b3f504f5 100644
--- a/llvm/include/llvm/MC/MCAsmBackend.h
+++ b/llvm/include/llvm/MC/MCAsmBackend.h
@@ -13,12 +13,17 @@
#include "llvm/ADT/Optional.h"
#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCFixup.h"
-#include "llvm/MC/MCFragment.h"
#include "llvm/Support/Endian.h"
#include <cstdint>
namespace llvm {
+class MCAlignFragment;
+class MCDwarfCallFrameFragment;
+class MCDwarfLineAddrFragment;
+class MCFragment;
+class MCRelaxableFragment;
+class MCSymbol;
class MCAsmLayout;
class MCAssembler;
class MCCFIInstruction;
@@ -31,6 +36,7 @@ class MCSubtargetInfo;
class MCValue;
class raw_pwrite_stream;
class StringRef;
+class raw_ostream;
/// Generic interface to target specific assembler backends.
class MCAsmBackend {
diff --git a/llvm/include/llvm/MC/MCAsmInfo.h b/llvm/include/llvm/MC/MCAsmInfo.h
index 355f569861d8..ec17131e17e8 100644
--- a/llvm/include/llvm/MC/MCAsmInfo.h
+++ b/llvm/include/llvm/MC/MCAsmInfo.h
@@ -430,6 +430,10 @@ protected:
/// hidden visibility. Defaults to MCSA_Hidden.
MCSymbolAttr HiddenVisibilityAttr = MCSA_Hidden;
+ /// This attribute, if not MCSA_Invalid, is used to declare a symbol as having
+ /// exported visibility. Defaults to MCSA_Exported.
+ MCSymbolAttr ExportedVisibilityAttr = MCSA_Exported;
+
/// This attribute, if not MCSA_Invalid, is used to declare an undefined
/// symbol as having hidden visibility. Defaults to MCSA_Hidden.
MCSymbolAttr HiddenDeclarationVisibilityAttr = MCSA_Hidden;
@@ -466,6 +470,10 @@ protected:
/// the .loc/.file directives. Defaults to true.
bool UsesDwarfFileAndLocDirectives = true;
+ /// True if DWARF `.file directory' directive syntax is used by
+ /// default.
+ bool EnableDwarfFileDirectoryDefault = true;
+
/// True if the target needs the DWARF section length in the header (if any)
/// of the DWARF section in the assembly file. Defaults to true.
bool DwarfSectionSizeRequired = true;
@@ -478,6 +486,10 @@ protected:
/// For example, foo(plt) instead of foo@plt. Defaults to false.
bool UseParensForSymbolVariant = false;
+ /// True if the target uses parens for symbol names starting with
+ /// '$' character to distinguish them from absolute names.
+ bool UseParensForDollarSignNames = true;
+
/// True if the target supports flags in ".loc" directive, false if only
/// location is allowed.
bool SupportsExtendedDwarfLocDirective = true;
@@ -671,6 +683,7 @@ public:
const char *getCode64Directive() const { return Code64Directive; }
unsigned getAssemblerDialect() const { return AssemblerDialect; }
bool doesAllowAtInName() const { return AllowAtInName; }
+ void setAllowAtInName(bool V) { AllowAtInName = V; }
bool doesAllowQuestionAtStartOfIdentifier() const {
return AllowQuestionAtStartOfIdentifier;
}
@@ -749,6 +762,8 @@ public:
MCSymbolAttr getHiddenVisibilityAttr() const { return HiddenVisibilityAttr; }
+ MCSymbolAttr getExportedVisibilityAttr() const { return ExportedVisibilityAttr; }
+
MCSymbolAttr getHiddenDeclarationVisibilityAttr() const {
return HiddenDeclarationVisibilityAttr;
}
@@ -788,6 +803,9 @@ public:
bool doDwarfFDESymbolsUseAbsDiff() const { return DwarfFDESymbolsUseAbsDiff; }
bool useDwarfRegNumForCFI() const { return DwarfRegNumForCFI; }
bool useParensForSymbolVariant() const { return UseParensForSymbolVariant; }
+ bool useParensForDollarSignNames() const {
+ return UseParensForDollarSignNames;
+ }
bool supportsExtendedDwarfLocDirective() const {
return SupportsExtendedDwarfLocDirective;
}
@@ -800,6 +818,10 @@ public:
return DwarfSectionSizeRequired;
}
+ bool enableDwarfFileDirectoryDefault() const {
+ return EnableDwarfFileDirectoryDefault;
+ }
+
void addInitialFrameState(const MCCFIInstruction &Inst);
const std::vector<MCCFIInstruction> &getInitialFrameState() const {
diff --git a/llvm/include/llvm/MC/MCAssembler.h b/llvm/include/llvm/MC/MCAssembler.h
index 9d5cb620c9de..80aa97c315da 100644
--- a/llvm/include/llvm/MC/MCAssembler.h
+++ b/llvm/include/llvm/MC/MCAssembler.h
@@ -10,7 +10,6 @@
#define LLVM_MC_MCASSEMBLER_H
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator.h"
@@ -18,20 +17,34 @@
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCDwarf.h"
-#include "llvm/MC/MCFixup.h"
-#include "llvm/MC/MCFragment.h"
#include "llvm/MC/MCLinkerOptimizationHint.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/SMLoc.h"
#include "llvm/Support/VersionTuple.h"
+#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdint>
+#include <memory>
#include <string>
+#include <tuple>
#include <utility>
#include <vector>
namespace llvm {
+class MCBoundaryAlignFragment;
+class MCCVDefRangeFragment;
+class MCCVInlineLineTableFragment;
+class MCDwarfCallFrameFragment;
+class MCDwarfLineAddrFragment;
+class MCEncodedFragment;
+class MCFixup;
+class MCLEBFragment;
+class MCPseudoProbeAddrFragment;
+class MCRelaxableFragment;
+class MCSymbolRefExpr;
+class raw_ostream;
class MCAsmBackend;
class MCAsmLayout;
class MCContext;
diff --git a/llvm/include/llvm/MC/MCCodeView.h b/llvm/include/llvm/MC/MCCodeView.h
index 5770f370341d..3d15c4009e43 100644
--- a/llvm/include/llvm/MC/MCCodeView.h
+++ b/llvm/include/llvm/MC/MCCodeView.h
@@ -13,18 +13,25 @@
#ifndef LLVM_MC_MCCODEVIEW_H
#define LLVM_MC_MCCODEVIEW_H
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/MC/MCFragment.h"
-#include "llvm/MC/MCObjectStreamer.h"
#include <map>
#include <vector>
namespace llvm {
+class MCAsmLayout;
+class MCCVDefRangeFragment;
+class MCCVInlineLineTableFragment;
+class MCDataFragment;
+class MCFragment;
+class MCSection;
+class MCSymbol;
class MCContext;
class MCObjectStreamer;
class MCStreamer;
-class CodeViewContext;
/// Instances of this class represent the information from a
/// .cv_loc directive.
diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h
index d2307d692278..a0e18891ed90 100644
--- a/llvm/include/llvm/MC/MCContext.h
+++ b/llvm/include/llvm/MC/MCContext.h
@@ -13,18 +13,15 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/Dwarf.h"
-#include "llvm/BinaryFormat/ELF.h"
#include "llvm/BinaryFormat/XCOFF.h"
#include "llvm/MC/MCAsmMacro.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCPseudoProbe.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/MCSection.h"
#include "llvm/MC/SectionKind.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Compiler.h"
@@ -44,798 +41,825 @@
namespace llvm {
- class CodeViewContext;
- class MCAsmInfo;
- class MCLabel;
- class MCObjectFileInfo;
- class MCRegisterInfo;
- class MCSection;
- class MCSectionCOFF;
- class MCSectionELF;
- class MCSectionGOFF;
- class MCSectionMachO;
- class MCSectionWasm;
- class MCSectionXCOFF;
- class MCStreamer;
- class MCSymbol;
- class MCSymbolELF;
- class MCSymbolWasm;
- class MCSymbolXCOFF;
- class MDNode;
- class SMDiagnostic;
- class SMLoc;
- class SourceMgr;
-
- /// Context object for machine code objects. This class owns all of the
- /// sections that it creates.
- ///
- class MCContext {
- public:
- using SymbolTable = StringMap<MCSymbol *, BumpPtrAllocator &>;
- using DiagHandlerTy =
- std::function<void(const SMDiagnostic &, bool, const SourceMgr &,
- std::vector<const MDNode *> &)>;
- enum Environment { IsMachO, IsELF, IsGOFF, IsCOFF, IsWasm, IsXCOFF };
-
- private:
- Environment Env;
-
- /// The name of the Segment where Swift5 Reflection Section data will be
- /// outputted
- StringRef Swift5ReflectionSegmentName;
-
- /// The triple for this object.
- Triple TT;
-
- /// The SourceMgr for this object, if any.
- const SourceMgr *SrcMgr;
-
- /// The SourceMgr for inline assembly, if any.
- std::unique_ptr<SourceMgr> InlineSrcMgr;
- std::vector<const MDNode *> LocInfos;
-
- DiagHandlerTy DiagHandler;
-
- /// The MCAsmInfo for this target.
- const MCAsmInfo *MAI;
-
- /// The MCRegisterInfo for this target.
- const MCRegisterInfo *MRI;
-
- /// The MCObjectFileInfo for this target.
- const MCObjectFileInfo *MOFI;
-
- /// The MCSubtargetInfo for this target.
- const MCSubtargetInfo *MSTI;
-
- std::unique_ptr<CodeViewContext> CVContext;
-
- /// Allocator object used for creating machine code objects.
- ///
- /// We use a bump pointer allocator to avoid the need to track all allocated
- /// objects.
- BumpPtrAllocator Allocator;
-
- SpecificBumpPtrAllocator<MCSectionCOFF> COFFAllocator;
- SpecificBumpPtrAllocator<MCSectionELF> ELFAllocator;
- SpecificBumpPtrAllocator<MCSectionMachO> MachOAllocator;
- SpecificBumpPtrAllocator<MCSectionGOFF> GOFFAllocator;
- SpecificBumpPtrAllocator<MCSectionWasm> WasmAllocator;
- SpecificBumpPtrAllocator<MCSectionXCOFF> XCOFFAllocator;
- SpecificBumpPtrAllocator<MCInst> MCInstAllocator;
-
- /// Bindings of names to symbols.
- SymbolTable Symbols;
-
- /// A mapping from a local label number and an instance count to a symbol.
- /// For example, in the assembly
- /// 1:
- /// 2:
- /// 1:
- /// We have three labels represented by the pairs (1, 0), (2, 0) and (1, 1)
- DenseMap<std::pair<unsigned, unsigned>, MCSymbol *> LocalSymbols;
-
- /// Keeps tracks of names that were used both for used declared and
- /// artificial symbols. The value is "true" if the name has been used for a
- /// non-section symbol (there can be at most one of those, plus an unlimited
- /// number of section symbols with the same name).
- StringMap<bool, BumpPtrAllocator &> UsedNames;
-
- /// Keeps track of labels that are used in inline assembly.
- SymbolTable InlineAsmUsedLabelNames;
-
- /// The next ID to dole out to an unnamed assembler temporary symbol with
- /// a given prefix.
- StringMap<unsigned> NextID;
-
- /// Instances of directional local labels.
- DenseMap<unsigned, MCLabel *> Instances;
- /// NextInstance() creates the next instance of the directional local label
- /// for the LocalLabelVal and adds it to the map if needed.
- unsigned NextInstance(unsigned LocalLabelVal);
- /// GetInstance() gets the current instance of the directional local label
- /// for the LocalLabelVal and adds it to the map if needed.
- unsigned GetInstance(unsigned LocalLabelVal);
-
- /// The file name of the log file from the environment variable
- /// AS_SECURE_LOG_FILE. Which must be set before the .secure_log_unique
- /// directive is used or it is an error.
- char *SecureLogFile;
- /// The stream that gets written to for the .secure_log_unique directive.
- std::unique_ptr<raw_fd_ostream> SecureLog;
- /// Boolean toggled when .secure_log_unique / .secure_log_reset is seen to
- /// catch errors if .secure_log_unique appears twice without
- /// .secure_log_reset appearing between them.
- bool SecureLogUsed = false;
-
- /// The compilation directory to use for DW_AT_comp_dir.
- SmallString<128> CompilationDir;
-
- /// Prefix replacement map for source file information.
- std::map<const std::string, const std::string> DebugPrefixMap;
-
- /// The main file name if passed in explicitly.
- std::string MainFileName;
-
- /// The dwarf file and directory tables from the dwarf .file directive.
- /// We now emit a line table for each compile unit. To reduce the prologue
- /// size of each line table, the files and directories used by each compile
- /// unit are separated.
- std::map<unsigned, MCDwarfLineTable> MCDwarfLineTablesCUMap;
-
- /// The current dwarf line information from the last dwarf .loc directive.
- MCDwarfLoc CurrentDwarfLoc;
- bool DwarfLocSeen = false;
-
- /// Generate dwarf debugging info for assembly source files.
- bool GenDwarfForAssembly = false;
-
- /// The current dwarf file number when generate dwarf debugging info for
- /// assembly source files.
- unsigned GenDwarfFileNumber = 0;
-
- /// Sections for generating the .debug_ranges and .debug_aranges sections.
- SetVector<MCSection *> SectionsForRanges;
-
- /// The information gathered from labels that will have dwarf label
- /// entries when generating dwarf assembly source files.
- std::vector<MCGenDwarfLabelEntry> MCGenDwarfLabelEntries;
-
- /// The string to embed in the debug information for the compile unit, if
- /// non-empty.
- StringRef DwarfDebugFlags;
-
- /// The string to embed in as the dwarf AT_producer for the compile unit, if
- /// non-empty.
- StringRef DwarfDebugProducer;
-
- /// The maximum version of dwarf that we should emit.
- uint16_t DwarfVersion = 4;
-
- /// The format of dwarf that we emit.
- dwarf::DwarfFormat DwarfFormat = dwarf::DWARF32;
-
- /// Honor temporary labels, this is useful for debugging semantic
- /// differences between temporary and non-temporary labels (primarily on
- /// Darwin).
- bool AllowTemporaryLabels = true;
- bool UseNamesOnTempLabels = false;
-
- /// The Compile Unit ID that we are currently processing.
- unsigned DwarfCompileUnitID = 0;
-
- /// A collection of MCPseudoProbe in the current module
- MCPseudoProbeTable PseudoProbeTable;
-
- // Sections are differentiated by the quadruple (section_name, group_name,
- // unique_id, link_to_symbol_name). Sections sharing the same quadruple are
- // combined into one section.
- struct ELFSectionKey {
- std::string SectionName;
- StringRef GroupName;
- StringRef LinkedToName;
- unsigned UniqueID;
-
- ELFSectionKey(StringRef SectionName, StringRef GroupName,
- StringRef LinkedToName, unsigned UniqueID)
- : SectionName(SectionName), GroupName(GroupName),
- LinkedToName(LinkedToName), UniqueID(UniqueID) {}
-
- bool operator<(const ELFSectionKey &Other) const {
- if (SectionName != Other.SectionName)
- return SectionName < Other.SectionName;
- if (GroupName != Other.GroupName)
- return GroupName < Other.GroupName;
- if (int O = LinkedToName.compare(Other.LinkedToName))
- return O < 0;
- return UniqueID < Other.UniqueID;
- }
- };
-
- struct COFFSectionKey {
- std::string SectionName;
- StringRef GroupName;
- int SelectionKey;
- unsigned UniqueID;
-
- COFFSectionKey(StringRef SectionName, StringRef GroupName,
- int SelectionKey, unsigned UniqueID)
- : SectionName(SectionName), GroupName(GroupName),
- SelectionKey(SelectionKey), UniqueID(UniqueID) {}
-
- bool operator<(const COFFSectionKey &Other) const {
- if (SectionName != Other.SectionName)
- return SectionName < Other.SectionName;
- if (GroupName != Other.GroupName)
- return GroupName < Other.GroupName;
- if (SelectionKey != Other.SelectionKey)
- return SelectionKey < Other.SelectionKey;
- return UniqueID < Other.UniqueID;
- }
- };
-
- struct WasmSectionKey {
- std::string SectionName;
- StringRef GroupName;
- unsigned UniqueID;
-
- WasmSectionKey(StringRef SectionName, StringRef GroupName,
- unsigned UniqueID)
- : SectionName(SectionName), GroupName(GroupName), UniqueID(UniqueID) {
- }
-
- bool operator<(const WasmSectionKey &Other) const {
- if (SectionName != Other.SectionName)
- return SectionName < Other.SectionName;
- if (GroupName != Other.GroupName)
- return GroupName < Other.GroupName;
- return UniqueID < Other.UniqueID;
- }
- };
-
- struct XCOFFSectionKey {
- // Section name.
- std::string SectionName;
- // Section property.
- // For csect section, it is storage mapping class.
- // For debug section, it is section type flags.
- union {
- XCOFF::StorageMappingClass MappingClass;
- XCOFF::DwarfSectionSubtypeFlags DwarfSubtypeFlags;
- };
- bool IsCsect;
-
- XCOFFSectionKey(StringRef SectionName,
- XCOFF::StorageMappingClass MappingClass)
- : SectionName(SectionName), MappingClass(MappingClass),
- IsCsect(true) {}
-
- XCOFFSectionKey(StringRef SectionName,
- XCOFF::DwarfSectionSubtypeFlags DwarfSubtypeFlags)
- : SectionName(SectionName), DwarfSubtypeFlags(DwarfSubtypeFlags),
- IsCsect(false) {}
-
- bool operator<(const XCOFFSectionKey &Other) const {
- if (IsCsect && Other.IsCsect)
- return std::tie(SectionName, MappingClass) <
- std::tie(Other.SectionName, Other.MappingClass);
- if (IsCsect != Other.IsCsect)
- return IsCsect;
- return std::tie(SectionName, DwarfSubtypeFlags) <
- std::tie(Other.SectionName, Other.DwarfSubtypeFlags);
- }
- };
-
- StringMap<MCSectionMachO *> MachOUniquingMap;
- std::map<ELFSectionKey, MCSectionELF *> ELFUniquingMap;
- std::map<COFFSectionKey, MCSectionCOFF *> COFFUniquingMap;
- std::map<std::string, MCSectionGOFF *> GOFFUniquingMap;
- std::map<WasmSectionKey, MCSectionWasm *> WasmUniquingMap;
- std::map<XCOFFSectionKey, MCSectionXCOFF *> XCOFFUniquingMap;
- StringMap<bool> RelSecNames;
-
- SpecificBumpPtrAllocator<MCSubtargetInfo> MCSubtargetAllocator;
+class CodeViewContext;
+class MCAsmInfo;
+class MCInst;
+class MCLabel;
+class MCObjectFileInfo;
+class MCRegisterInfo;
+class MCSection;
+class MCSectionCOFF;
+class MCSectionDXContainer;
+class MCSectionELF;
+class MCSectionGOFF;
+class MCSectionMachO;
+class MCSectionSPIRV;
+class MCSectionWasm;
+class MCSectionXCOFF;
+class MCStreamer;
+class MCSubtargetInfo;
+class MCSymbol;
+class MCSymbolELF;
+class MCSymbolWasm;
+class MCSymbolXCOFF;
+class MCTargetOptions;
+class MDNode;
+template <typename T> class SmallVectorImpl;
+class SMDiagnostic;
+class SMLoc;
+class SourceMgr;
+enum class EmitDwarfUnwindType;
+
+/// Context object for machine code objects. This class owns all of the
+/// sections that it creates.
+///
+class MCContext {
+public:
+ using SymbolTable = StringMap<MCSymbol *, BumpPtrAllocator &>;
+ using DiagHandlerTy =
+ std::function<void(const SMDiagnostic &, bool, const SourceMgr &,
+ std::vector<const MDNode *> &)>;
+ enum Environment {
+ IsMachO,
+ IsELF,
+ IsGOFF,
+ IsCOFF,
+ IsSPIRV,
+ IsWasm,
+ IsXCOFF,
+ IsDXContainer
+ };
- /// Do automatic reset in destructor
- bool AutoReset;
+private:
+ Environment Env;
- MCTargetOptions const *TargetOptions;
+ /// The name of the Segment where Swift5 Reflection Section data will be
+ /// outputted
+ StringRef Swift5ReflectionSegmentName;
- bool HadError = false;
+ /// The triple for this object.
+ Triple TT;
- void reportCommon(SMLoc Loc,
- std::function<void(SMDiagnostic &, const SourceMgr *)>);
+ /// The SourceMgr for this object, if any.
+ const SourceMgr *SrcMgr;
- MCSymbol *createSymbolImpl(const StringMapEntry<bool> *Name,
- bool CanBeUnnamed);
- MCSymbol *createSymbol(StringRef Name, bool AlwaysAddSuffix,
- bool IsTemporary);
+ /// The SourceMgr for inline assembly, if any.
+ std::unique_ptr<SourceMgr> InlineSrcMgr;
+ std::vector<const MDNode *> LocInfos;
- MCSymbol *getOrCreateDirectionalLocalSymbol(unsigned LocalLabelVal,
- unsigned Instance);
+ DiagHandlerTy DiagHandler;
- MCSectionELF *createELFSectionImpl(StringRef Section, unsigned Type,
- unsigned Flags, SectionKind K,
- unsigned EntrySize,
- const MCSymbolELF *Group, bool IsComdat,
- unsigned UniqueID,
- const MCSymbolELF *LinkedToSym);
+ /// The MCAsmInfo for this target.
+ const MCAsmInfo *MAI;
- MCSymbolXCOFF *createXCOFFSymbolImpl(const StringMapEntry<bool> *Name,
- bool IsTemporary);
+ /// The MCRegisterInfo for this target.
+ const MCRegisterInfo *MRI;
- /// Map of currently defined macros.
- StringMap<MCAsmMacro> MacroMap;
+ /// The MCObjectFileInfo for this target.
+ const MCObjectFileInfo *MOFI;
- struct ELFEntrySizeKey {
- std::string SectionName;
- unsigned Flags;
- unsigned EntrySize;
+ /// The MCSubtargetInfo for this target.
+ const MCSubtargetInfo *MSTI;
- ELFEntrySizeKey(StringRef SectionName, unsigned Flags, unsigned EntrySize)
- : SectionName(SectionName), Flags(Flags), EntrySize(EntrySize) {}
+ std::unique_ptr<CodeViewContext> CVContext;
- bool operator<(const ELFEntrySizeKey &Other) const {
- if (SectionName != Other.SectionName)
- return SectionName < Other.SectionName;
- if (Flags != Other.Flags)
- return Flags < Other.Flags;
- return EntrySize < Other.EntrySize;
- }
- };
-
- // Symbols must be assigned to a section with a compatible entry size and
- // flags. This map is used to assign unique IDs to sections to distinguish
- // between sections with identical names but incompatible entry sizes and/or
- // flags. This can occur when a symbol is explicitly assigned to a section,
- // e.g. via __attribute__((section("myname"))).
- std::map<ELFEntrySizeKey, unsigned> ELFEntrySizeMap;
-
- // This set is used to record the generic mergeable section names seen.
- // These are sections that are created as mergeable e.g. .debug_str. We need
- // to avoid assigning non-mergeable symbols to these sections. It is used
- // to prevent non-mergeable symbols being explicitly assigned to mergeable
- // sections (e.g. via _attribute_((section("myname")))).
- DenseSet<StringRef> ELFSeenGenericMergeableSections;
-
- public:
- explicit MCContext(const Triple &TheTriple, const MCAsmInfo *MAI,
- const MCRegisterInfo *MRI, const MCSubtargetInfo *MSTI,
- const SourceMgr *Mgr = nullptr,
- MCTargetOptions const *TargetOpts = nullptr,
- bool DoAutoReset = true,
- StringRef Swift5ReflSegmentName = {});
- MCContext(const MCContext &) = delete;
- MCContext &operator=(const MCContext &) = delete;
- ~MCContext();
-
- Environment getObjectFileType() const { return Env; }
-
- const StringRef &getSwift5ReflectionSegmentName() const {
- return Swift5ReflectionSegmentName;
+ /// Allocator object used for creating machine code objects.
+ ///
+ /// We use a bump pointer allocator to avoid the need to track all allocated
+ /// objects.
+ BumpPtrAllocator Allocator;
+
+ SpecificBumpPtrAllocator<MCSectionCOFF> COFFAllocator;
+ SpecificBumpPtrAllocator<MCSectionDXContainer> DXCAllocator;
+ SpecificBumpPtrAllocator<MCSectionELF> ELFAllocator;
+ SpecificBumpPtrAllocator<MCSectionMachO> MachOAllocator;
+ SpecificBumpPtrAllocator<MCSectionGOFF> GOFFAllocator;
+ SpecificBumpPtrAllocator<MCSectionSPIRV> SPIRVAllocator;
+ SpecificBumpPtrAllocator<MCSectionWasm> WasmAllocator;
+ SpecificBumpPtrAllocator<MCSectionXCOFF> XCOFFAllocator;
+ SpecificBumpPtrAllocator<MCInst> MCInstAllocator;
+
+ /// Bindings of names to symbols.
+ SymbolTable Symbols;
+
+ /// A mapping from a local label number and an instance count to a symbol.
+ /// For example, in the assembly
+ /// 1:
+ /// 2:
+ /// 1:
+ /// We have three labels represented by the pairs (1, 0), (2, 0) and (1, 1)
+ DenseMap<std::pair<unsigned, unsigned>, MCSymbol *> LocalSymbols;
+
+ /// Keeps tracks of names that were used both for used declared and
+ /// artificial symbols. The value is "true" if the name has been used for a
+ /// non-section symbol (there can be at most one of those, plus an unlimited
+ /// number of section symbols with the same name).
+ StringMap<bool, BumpPtrAllocator &> UsedNames;
+
+ /// Keeps track of labels that are used in inline assembly.
+ SymbolTable InlineAsmUsedLabelNames;
+
+ /// The next ID to dole out to an unnamed assembler temporary symbol with
+ /// a given prefix.
+ StringMap<unsigned> NextID;
+
+ /// Instances of directional local labels.
+ DenseMap<unsigned, MCLabel *> Instances;
+ /// NextInstance() creates the next instance of the directional local label
+ /// for the LocalLabelVal and adds it to the map if needed.
+ unsigned NextInstance(unsigned LocalLabelVal);
+ /// GetInstance() gets the current instance of the directional local label
+ /// for the LocalLabelVal and adds it to the map if needed.
+ unsigned GetInstance(unsigned LocalLabelVal);
+
+ /// LLVM_BB_ADDR_MAP version to emit.
+ uint8_t BBAddrMapVersion = 1;
+
+ /// The file name of the log file from the environment variable
+ /// AS_SECURE_LOG_FILE. Which must be set before the .secure_log_unique
+ /// directive is used or it is an error.
+ char *SecureLogFile;
+ /// The stream that gets written to for the .secure_log_unique directive.
+ std::unique_ptr<raw_fd_ostream> SecureLog;
+ /// Boolean toggled when .secure_log_unique / .secure_log_reset is seen to
+ /// catch errors if .secure_log_unique appears twice without
+ /// .secure_log_reset appearing between them.
+ bool SecureLogUsed = false;
+
+ /// The compilation directory to use for DW_AT_comp_dir.
+ SmallString<128> CompilationDir;
+
+ /// Prefix replacement map for source file information.
+ std::map<const std::string, const std::string> DebugPrefixMap;
+
+ /// The main file name if passed in explicitly.
+ std::string MainFileName;
+
+ /// The dwarf file and directory tables from the dwarf .file directive.
+ /// We now emit a line table for each compile unit. To reduce the prologue
+ /// size of each line table, the files and directories used by each compile
+ /// unit are separated.
+ std::map<unsigned, MCDwarfLineTable> MCDwarfLineTablesCUMap;
+
+ /// The current dwarf line information from the last dwarf .loc directive.
+ MCDwarfLoc CurrentDwarfLoc;
+ bool DwarfLocSeen = false;
+
+ /// Generate dwarf debugging info for assembly source files.
+ bool GenDwarfForAssembly = false;
+
+ /// The current dwarf file number when generate dwarf debugging info for
+ /// assembly source files.
+ unsigned GenDwarfFileNumber = 0;
+
+ /// Sections for generating the .debug_ranges and .debug_aranges sections.
+ SetVector<MCSection *> SectionsForRanges;
+
+ /// The information gathered from labels that will have dwarf label
+ /// entries when generating dwarf assembly source files.
+ std::vector<MCGenDwarfLabelEntry> MCGenDwarfLabelEntries;
+
+ /// The string to embed in the debug information for the compile unit, if
+ /// non-empty.
+ StringRef DwarfDebugFlags;
+
+ /// The string to embed in as the dwarf AT_producer for the compile unit, if
+ /// non-empty.
+ StringRef DwarfDebugProducer;
+
+ /// The maximum version of dwarf that we should emit.
+ uint16_t DwarfVersion = 4;
+
+ /// The format of dwarf that we emit.
+ dwarf::DwarfFormat DwarfFormat = dwarf::DWARF32;
+
+ /// Honor temporary labels, this is useful for debugging semantic
+ /// differences between temporary and non-temporary labels (primarily on
+ /// Darwin).
+ bool AllowTemporaryLabels = true;
+ bool UseNamesOnTempLabels = false;
+
+ /// The Compile Unit ID that we are currently processing.
+ unsigned DwarfCompileUnitID = 0;
+
+ /// A collection of MCPseudoProbe in the current module
+ MCPseudoProbeTable PseudoProbeTable;
+
+ // Sections are differentiated by the quadruple (section_name, group_name,
+ // unique_id, link_to_symbol_name). Sections sharing the same quadruple are
+ // combined into one section.
+ struct ELFSectionKey {
+ std::string SectionName;
+ StringRef GroupName;
+ StringRef LinkedToName;
+ unsigned UniqueID;
+
+ ELFSectionKey(StringRef SectionName, StringRef GroupName,
+ StringRef LinkedToName, unsigned UniqueID)
+ : SectionName(SectionName), GroupName(GroupName),
+ LinkedToName(LinkedToName), UniqueID(UniqueID) {}
+
+ bool operator<(const ELFSectionKey &Other) const {
+ if (SectionName != Other.SectionName)
+ return SectionName < Other.SectionName;
+ if (GroupName != Other.GroupName)
+ return GroupName < Other.GroupName;
+ if (int O = LinkedToName.compare(Other.LinkedToName))
+ return O < 0;
+ return UniqueID < Other.UniqueID;
}
- const Triple &getTargetTriple() const { return TT; }
- const SourceMgr *getSourceManager() const { return SrcMgr; }
+ };
- void initInlineSourceManager();
- SourceMgr *getInlineSourceManager() {
- return InlineSrcMgr.get();
- }
- std::vector<const MDNode *> &getLocInfos() { return LocInfos; }
- void setDiagnosticHandler(DiagHandlerTy DiagHandler) {
- this->DiagHandler = DiagHandler;
+ struct COFFSectionKey {
+ std::string SectionName;
+ StringRef GroupName;
+ int SelectionKey;
+ unsigned UniqueID;
+
+ COFFSectionKey(StringRef SectionName, StringRef GroupName, int SelectionKey,
+ unsigned UniqueID)
+ : SectionName(SectionName), GroupName(GroupName),
+ SelectionKey(SelectionKey), UniqueID(UniqueID) {}
+
+ bool operator<(const COFFSectionKey &Other) const {
+ if (SectionName != Other.SectionName)
+ return SectionName < Other.SectionName;
+ if (GroupName != Other.GroupName)
+ return GroupName < Other.GroupName;
+ if (SelectionKey != Other.SelectionKey)
+ return SelectionKey < Other.SelectionKey;
+ return UniqueID < Other.UniqueID;
}
+ };
- void setObjectFileInfo(const MCObjectFileInfo *Mofi) { MOFI = Mofi; }
-
- const MCAsmInfo *getAsmInfo() const { return MAI; }
-
- const MCRegisterInfo *getRegisterInfo() const { return MRI; }
-
- const MCObjectFileInfo *getObjectFileInfo() const { return MOFI; }
-
- const MCSubtargetInfo *getSubtargetInfo() const { return MSTI; }
-
- CodeViewContext &getCVContext();
-
- void setAllowTemporaryLabels(bool Value) { AllowTemporaryLabels = Value; }
- void setUseNamesOnTempLabels(bool Value) { UseNamesOnTempLabels = Value; }
-
- /// \name Module Lifetime Management
- /// @{
-
- /// reset - return object to right after construction state to prepare
- /// to process a new module
- void reset();
-
- /// @}
-
- /// \name McInst Management
-
- /// Create and return a new MC instruction.
- MCInst *createMCInst();
-
- /// \name Symbol Management
- /// @{
-
- /// Create and return a new linker temporary symbol with a unique but
- /// unspecified name.
- MCSymbol *createLinkerPrivateTempSymbol();
-
- /// Create a temporary symbol with a unique name. The name will be omitted
- /// in the symbol table if UseNamesOnTempLabels is false (default except
- /// MCAsmStreamer). The overload without Name uses an unspecified name.
- MCSymbol *createTempSymbol();
- MCSymbol *createTempSymbol(const Twine &Name, bool AlwaysAddSuffix = true);
-
- /// Create a temporary symbol with a unique name whose name cannot be
- /// omitted in the symbol table. This is rarely used.
- MCSymbol *createNamedTempSymbol();
- MCSymbol *createNamedTempSymbol(const Twine &Name);
-
- /// Create the definition of a directional local symbol for numbered label
- /// (used for "1:" definitions).
- MCSymbol *createDirectionalLocalSymbol(unsigned LocalLabelVal);
-
- /// Create and return a directional local symbol for numbered label (used
- /// for "1b" or 1f" references).
- MCSymbol *getDirectionalLocalSymbol(unsigned LocalLabelVal, bool Before);
-
- /// Lookup the symbol inside with the specified \p Name. If it exists,
- /// return it. If not, create a forward reference and return it.
- ///
- /// \param Name - The symbol name, which must be unique across all symbols.
- MCSymbol *getOrCreateSymbol(const Twine &Name);
+ struct WasmSectionKey {
+ std::string SectionName;
+ StringRef GroupName;
+ unsigned UniqueID;
+
+ WasmSectionKey(StringRef SectionName, StringRef GroupName,
+ unsigned UniqueID)
+ : SectionName(SectionName), GroupName(GroupName), UniqueID(UniqueID) {}
+
+ bool operator<(const WasmSectionKey &Other) const {
+ if (SectionName != Other.SectionName)
+ return SectionName < Other.SectionName;
+ if (GroupName != Other.GroupName)
+ return GroupName < Other.GroupName;
+ return UniqueID < Other.UniqueID;
+ }
+ };
- /// Gets a symbol that will be defined to the final stack offset of a local
- /// variable after codegen.
- ///
- /// \param Idx - The index of a local variable passed to \@llvm.localescape.
- MCSymbol *getOrCreateFrameAllocSymbol(StringRef FuncName, unsigned Idx);
+ struct XCOFFSectionKey {
+ // Section name.
+ std::string SectionName;
+ // Section property.
+ // For csect section, it is storage mapping class.
+ // For debug section, it is section type flags.
+ union {
+ XCOFF::StorageMappingClass MappingClass;
+ XCOFF::DwarfSectionSubtypeFlags DwarfSubtypeFlags;
+ };
+ bool IsCsect;
+
+ XCOFFSectionKey(StringRef SectionName,
+ XCOFF::StorageMappingClass MappingClass)
+ : SectionName(SectionName), MappingClass(MappingClass), IsCsect(true) {}
+
+ XCOFFSectionKey(StringRef SectionName,
+ XCOFF::DwarfSectionSubtypeFlags DwarfSubtypeFlags)
+ : SectionName(SectionName), DwarfSubtypeFlags(DwarfSubtypeFlags),
+ IsCsect(false) {}
+
+ bool operator<(const XCOFFSectionKey &Other) const {
+ if (IsCsect && Other.IsCsect)
+ return std::tie(SectionName, MappingClass) <
+ std::tie(Other.SectionName, Other.MappingClass);
+ if (IsCsect != Other.IsCsect)
+ return IsCsect;
+ return std::tie(SectionName, DwarfSubtypeFlags) <
+ std::tie(Other.SectionName, Other.DwarfSubtypeFlags);
+ }
+ };
- MCSymbol *getOrCreateParentFrameOffsetSymbol(StringRef FuncName);
+ StringMap<MCSectionMachO *> MachOUniquingMap;
+ std::map<ELFSectionKey, MCSectionELF *> ELFUniquingMap;
+ std::map<COFFSectionKey, MCSectionCOFF *> COFFUniquingMap;
+ std::map<std::string, MCSectionGOFF *> GOFFUniquingMap;
+ std::map<WasmSectionKey, MCSectionWasm *> WasmUniquingMap;
+ std::map<XCOFFSectionKey, MCSectionXCOFF *> XCOFFUniquingMap;
+ StringMap<MCSectionDXContainer *> DXCUniquingMap;
+ StringMap<bool> RelSecNames;
- MCSymbol *getOrCreateLSDASymbol(StringRef FuncName);
+ SpecificBumpPtrAllocator<MCSubtargetInfo> MCSubtargetAllocator;
- /// Get the symbol for \p Name, or null.
- MCSymbol *lookupSymbol(const Twine &Name) const;
+ /// Do automatic reset in destructor
+ bool AutoReset;
- /// Set value for a symbol.
- void setSymbolValue(MCStreamer &Streamer, StringRef Sym, uint64_t Val);
+ MCTargetOptions const *TargetOptions;
- /// getSymbols - Get a reference for the symbol table for clients that
- /// want to, for example, iterate over all symbols. 'const' because we
- /// still want any modifications to the table itself to use the MCContext
- /// APIs.
- const SymbolTable &getSymbols() const { return Symbols; }
+ bool HadError = false;
- /// isInlineAsmLabel - Return true if the name is a label referenced in
- /// inline assembly.
- MCSymbol *getInlineAsmLabel(StringRef Name) const {
- return InlineAsmUsedLabelNames.lookup(Name);
- }
+ void reportCommon(SMLoc Loc,
+ std::function<void(SMDiagnostic &, const SourceMgr *)>);
- /// registerInlineAsmLabel - Records that the name is a label referenced in
- /// inline assembly.
- void registerInlineAsmLabel(MCSymbol *Sym);
+ MCSymbol *createSymbolImpl(const StringMapEntry<bool> *Name,
+ bool CanBeUnnamed);
+ MCSymbol *createSymbol(StringRef Name, bool AlwaysAddSuffix,
+ bool IsTemporary);
- /// @}
+ MCSymbol *getOrCreateDirectionalLocalSymbol(unsigned LocalLabelVal,
+ unsigned Instance);
- /// \name Section Management
- /// @{
+ MCSectionELF *createELFSectionImpl(StringRef Section, unsigned Type,
+ unsigned Flags, SectionKind K,
+ unsigned EntrySize,
+ const MCSymbolELF *Group, bool IsComdat,
+ unsigned UniqueID,
+ const MCSymbolELF *LinkedToSym);
- enum : unsigned {
- /// Pass this value as the UniqueID during section creation to get the
- /// generic section with the given name and characteristics. The usual
- /// sections such as .text use this ID.
- GenericSectionID = ~0U
- };
+ MCSymbolXCOFF *createXCOFFSymbolImpl(const StringMapEntry<bool> *Name,
+ bool IsTemporary);
- /// Return the MCSection for the specified mach-o section. This requires
- /// the operands to be valid.
- MCSectionMachO *getMachOSection(StringRef Segment, StringRef Section,
- unsigned TypeAndAttributes,
- unsigned Reserved2, SectionKind K,
- const char *BeginSymName = nullptr);
-
- MCSectionMachO *getMachOSection(StringRef Segment, StringRef Section,
- unsigned TypeAndAttributes, SectionKind K,
- const char *BeginSymName = nullptr) {
- return getMachOSection(Segment, Section, TypeAndAttributes, 0, K,
- BeginSymName);
- }
+ /// Map of currently defined macros.
+ StringMap<MCAsmMacro> MacroMap;
- MCSectionELF *getELFSection(const Twine &Section, unsigned Type,
- unsigned Flags) {
- return getELFSection(Section, Type, Flags, 0, "", false);
- }
+ struct ELFEntrySizeKey {
+ std::string SectionName;
+ unsigned Flags;
+ unsigned EntrySize;
- MCSectionELF *getELFSection(const Twine &Section, unsigned Type,
- unsigned Flags, unsigned EntrySize) {
- return getELFSection(Section, Type, Flags, EntrySize, "", false,
- MCSection::NonUniqueID, nullptr);
- }
+ ELFEntrySizeKey(StringRef SectionName, unsigned Flags, unsigned EntrySize)
+ : SectionName(SectionName), Flags(Flags), EntrySize(EntrySize) {}
- MCSectionELF *getELFSection(const Twine &Section, unsigned Type,
- unsigned Flags, unsigned EntrySize,
- const Twine &Group, bool IsComdat) {
- return getELFSection(Section, Type, Flags, EntrySize, Group, IsComdat,
- MCSection::NonUniqueID, nullptr);
+ bool operator<(const ELFEntrySizeKey &Other) const {
+ if (SectionName != Other.SectionName)
+ return SectionName < Other.SectionName;
+ if (Flags != Other.Flags)
+ return Flags < Other.Flags;
+ return EntrySize < Other.EntrySize;
}
+ };
- MCSectionELF *getELFSection(const Twine &Section, unsigned Type,
- unsigned Flags, unsigned EntrySize,
- const Twine &Group, bool IsComdat,
- unsigned UniqueID,
- const MCSymbolELF *LinkedToSym);
-
- MCSectionELF *getELFSection(const Twine &Section, unsigned Type,
- unsigned Flags, unsigned EntrySize,
- const MCSymbolELF *Group, bool IsComdat,
- unsigned UniqueID,
- const MCSymbolELF *LinkedToSym);
-
- /// Get a section with the provided group identifier. This section is
- /// named by concatenating \p Prefix with '.' then \p Suffix. The \p Type
- /// describes the type of the section and \p Flags are used to further
- /// configure this named section.
- MCSectionELF *getELFNamedSection(const Twine &Prefix, const Twine &Suffix,
- unsigned Type, unsigned Flags,
- unsigned EntrySize = 0);
-
- MCSectionELF *createELFRelSection(const Twine &Name, unsigned Type,
- unsigned Flags, unsigned EntrySize,
- const MCSymbolELF *Group,
- const MCSectionELF *RelInfoSection);
-
- void renameELFSection(MCSectionELF *Section, StringRef Name);
+ // Symbols must be assigned to a section with a compatible entry size and
+ // flags. This map is used to assign unique IDs to sections to distinguish
+ // between sections with identical names but incompatible entry sizes and/or
+ // flags. This can occur when a symbol is explicitly assigned to a section,
+ // e.g. via __attribute__((section("myname"))).
+ std::map<ELFEntrySizeKey, unsigned> ELFEntrySizeMap;
- MCSectionELF *createELFGroupSection(const MCSymbolELF *Group,
- bool IsComdat);
+ // This set is used to record the generic mergeable section names seen.
+ // These are sections that are created as mergeable e.g. .debug_str. We need
+ // to avoid assigning non-mergeable symbols to these sections. It is used
+ // to prevent non-mergeable symbols being explicitly assigned to mergeable
+ // sections (e.g. via _attribute_((section("myname")))).
+ DenseSet<StringRef> ELFSeenGenericMergeableSections;
- void recordELFMergeableSectionInfo(StringRef SectionName, unsigned Flags,
- unsigned UniqueID, unsigned EntrySize);
+public:
+ explicit MCContext(const Triple &TheTriple, const MCAsmInfo *MAI,
+ const MCRegisterInfo *MRI, const MCSubtargetInfo *MSTI,
+ const SourceMgr *Mgr = nullptr,
+ MCTargetOptions const *TargetOpts = nullptr,
+ bool DoAutoReset = true,
+ StringRef Swift5ReflSegmentName = {});
+ MCContext(const MCContext &) = delete;
+ MCContext &operator=(const MCContext &) = delete;
+ ~MCContext();
- bool isELFImplicitMergeableSectionNamePrefix(StringRef Name);
+ Environment getObjectFileType() const { return Env; }
- bool isELFGenericMergeableSection(StringRef Name);
+ const StringRef &getSwift5ReflectionSegmentName() const {
+ return Swift5ReflectionSegmentName;
+ }
+ const Triple &getTargetTriple() const { return TT; }
+ const SourceMgr *getSourceManager() const { return SrcMgr; }
- /// Return the unique ID of the section with the given name, flags and entry
- /// size, if it exists.
- Optional<unsigned> getELFUniqueIDForEntsize(StringRef SectionName,
- unsigned Flags,
- unsigned EntrySize);
+ void initInlineSourceManager();
+ SourceMgr *getInlineSourceManager() { return InlineSrcMgr.get(); }
+ std::vector<const MDNode *> &getLocInfos() { return LocInfos; }
+ void setDiagnosticHandler(DiagHandlerTy DiagHandler) {
+ this->DiagHandler = DiagHandler;
+ }
- MCSectionGOFF *getGOFFSection(StringRef Section, SectionKind Kind);
+ void setObjectFileInfo(const MCObjectFileInfo *Mofi) { MOFI = Mofi; }
- MCSectionCOFF *getCOFFSection(StringRef Section, unsigned Characteristics,
- SectionKind Kind, StringRef COMDATSymName,
- int Selection,
- unsigned UniqueID = GenericSectionID,
- const char *BeginSymName = nullptr);
+ const MCAsmInfo *getAsmInfo() const { return MAI; }
- MCSectionCOFF *getCOFFSection(StringRef Section, unsigned Characteristics,
- SectionKind Kind,
- const char *BeginSymName = nullptr);
-
- /// Gets or creates a section equivalent to Sec that is associated with the
- /// section containing KeySym. For example, to create a debug info section
- /// associated with an inline function, pass the normal debug info section
- /// as Sec and the function symbol as KeySym.
- MCSectionCOFF *
- getAssociativeCOFFSection(MCSectionCOFF *Sec, const MCSymbol *KeySym,
- unsigned UniqueID = GenericSectionID);
-
- MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K,
- unsigned Flags = 0) {
- return getWasmSection(Section, K, Flags, nullptr);
- }
+ const MCRegisterInfo *getRegisterInfo() const { return MRI; }
- MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K,
- unsigned Flags, const char *BeginSymName) {
- return getWasmSection(Section, K, Flags, "", ~0, BeginSymName);
- }
+ const MCObjectFileInfo *getObjectFileInfo() const { return MOFI; }
- MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K,
- unsigned Flags, const Twine &Group,
- unsigned UniqueID) {
- return getWasmSection(Section, K, Flags, Group, UniqueID, nullptr);
- }
+ const MCSubtargetInfo *getSubtargetInfo() const { return MSTI; }
- MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K,
- unsigned Flags, const Twine &Group,
- unsigned UniqueID, const char *BeginSymName);
+ CodeViewContext &getCVContext();
- MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K,
- unsigned Flags, const MCSymbolWasm *Group,
- unsigned UniqueID, const char *BeginSymName);
+ void setAllowTemporaryLabels(bool Value) { AllowTemporaryLabels = Value; }
+ void setUseNamesOnTempLabels(bool Value) { UseNamesOnTempLabels = Value; }
- MCSectionXCOFF *getXCOFFSection(
- StringRef Section, SectionKind K,
- Optional<XCOFF::CsectProperties> CsectProp = None,
- bool MultiSymbolsAllowed = false, const char *BeginSymName = nullptr,
- Optional<XCOFF::DwarfSectionSubtypeFlags> DwarfSubtypeFlags = None);
+ /// \name Module Lifetime Management
+ /// @{
- // Create and save a copy of STI and return a reference to the copy.
- MCSubtargetInfo &getSubtargetCopy(const MCSubtargetInfo &STI);
+ /// reset - return object to right after construction state to prepare
+ /// to process a new module
+ void reset();
- /// @}
+ /// @}
- /// \name Dwarf Management
- /// @{
+ /// \name McInst Management
- /// Get the compilation directory for DW_AT_comp_dir
- /// The compilation directory should be set with \c setCompilationDir before
- /// calling this function. If it is unset, an empty string will be returned.
- StringRef getCompilationDir() const { return CompilationDir; }
+ /// Create and return a new MC instruction.
+ MCInst *createMCInst();
- /// Set the compilation directory for DW_AT_comp_dir
- void setCompilationDir(StringRef S) { CompilationDir = S.str(); }
+ /// \name Symbol Management
+ /// @{
- /// Add an entry to the debug prefix map.
- void addDebugPrefixMapEntry(const std::string &From, const std::string &To);
+ /// Create and return a new linker temporary symbol with a unique but
+ /// unspecified name.
+ MCSymbol *createLinkerPrivateTempSymbol();
- // Remaps all debug directory paths in-place as per the debug prefix map.
- void RemapDebugPaths();
+ /// Create a temporary symbol with a unique name. The name will be omitted
+ /// in the symbol table if UseNamesOnTempLabels is false (default except
+ /// MCAsmStreamer). The overload without Name uses an unspecified name.
+ MCSymbol *createTempSymbol();
+ MCSymbol *createTempSymbol(const Twine &Name, bool AlwaysAddSuffix = true);
- /// Get the main file name for use in error messages and debug
- /// info. This can be set to ensure we've got the correct file name
- /// after preprocessing or for -save-temps.
- const std::string &getMainFileName() const { return MainFileName; }
+ /// Create a temporary symbol with a unique name whose name cannot be
+ /// omitted in the symbol table. This is rarely used.
+ MCSymbol *createNamedTempSymbol();
+ MCSymbol *createNamedTempSymbol(const Twine &Name);
- /// Set the main file name and override the default.
- void setMainFileName(StringRef S) { MainFileName = std::string(S); }
+ /// Create the definition of a directional local symbol for numbered label
+ /// (used for "1:" definitions).
+ MCSymbol *createDirectionalLocalSymbol(unsigned LocalLabelVal);
- /// Creates an entry in the dwarf file and directory tables.
- Expected<unsigned> getDwarfFile(StringRef Directory, StringRef FileName,
- unsigned FileNumber,
- Optional<MD5::MD5Result> Checksum,
- Optional<StringRef> Source, unsigned CUID);
+ /// Create and return a directional local symbol for numbered label (used
+ /// for "1b" or 1f" references).
+ MCSymbol *getDirectionalLocalSymbol(unsigned LocalLabelVal, bool Before);
- bool isValidDwarfFileNumber(unsigned FileNumber, unsigned CUID = 0);
+ /// Lookup the symbol inside with the specified \p Name. If it exists,
+ /// return it. If not, create a forward reference and return it.
+ ///
+ /// \param Name - The symbol name, which must be unique across all symbols.
+ MCSymbol *getOrCreateSymbol(const Twine &Name);
- const std::map<unsigned, MCDwarfLineTable> &getMCDwarfLineTables() const {
- return MCDwarfLineTablesCUMap;
- }
+ /// Gets a symbol that will be defined to the final stack offset of a local
+ /// variable after codegen.
+ ///
+ /// \param Idx - The index of a local variable passed to \@llvm.localescape.
+ MCSymbol *getOrCreateFrameAllocSymbol(StringRef FuncName, unsigned Idx);
- MCDwarfLineTable &getMCDwarfLineTable(unsigned CUID) {
- return MCDwarfLineTablesCUMap[CUID];
- }
+ MCSymbol *getOrCreateParentFrameOffsetSymbol(StringRef FuncName);
- const MCDwarfLineTable &getMCDwarfLineTable(unsigned CUID) const {
- auto I = MCDwarfLineTablesCUMap.find(CUID);
- assert(I != MCDwarfLineTablesCUMap.end());
- return I->second;
- }
+ MCSymbol *getOrCreateLSDASymbol(StringRef FuncName);
- const SmallVectorImpl<MCDwarfFile> &getMCDwarfFiles(unsigned CUID = 0) {
- return getMCDwarfLineTable(CUID).getMCDwarfFiles();
- }
+ /// Get the symbol for \p Name, or null.
+ MCSymbol *lookupSymbol(const Twine &Name) const;
- const SmallVectorImpl<std::string> &getMCDwarfDirs(unsigned CUID = 0) {
- return getMCDwarfLineTable(CUID).getMCDwarfDirs();
- }
+ /// Set value for a symbol.
+ void setSymbolValue(MCStreamer &Streamer, StringRef Sym, uint64_t Val);
- unsigned getDwarfCompileUnitID() { return DwarfCompileUnitID; }
+ /// getSymbols - Get a reference for the symbol table for clients that
+ /// want to, for example, iterate over all symbols. 'const' because we
+ /// still want any modifications to the table itself to use the MCContext
+ /// APIs.
+ const SymbolTable &getSymbols() const { return Symbols; }
- void setDwarfCompileUnitID(unsigned CUIndex) {
- DwarfCompileUnitID = CUIndex;
- }
+ /// isInlineAsmLabel - Return true if the name is a label referenced in
+ /// inline assembly.
+ MCSymbol *getInlineAsmLabel(StringRef Name) const {
+ return InlineAsmUsedLabelNames.lookup(Name);
+ }
- /// Specifies the "root" file and directory of the compilation unit.
- /// These are "file 0" and "directory 0" in DWARF v5.
- void setMCLineTableRootFile(unsigned CUID, StringRef CompilationDir,
- StringRef Filename,
- Optional<MD5::MD5Result> Checksum,
- Optional<StringRef> Source) {
- getMCDwarfLineTable(CUID).setRootFile(CompilationDir, Filename, Checksum,
- Source);
- }
+ /// registerInlineAsmLabel - Records that the name is a label referenced in
+ /// inline assembly.
+ void registerInlineAsmLabel(MCSymbol *Sym);
- /// Reports whether MD5 checksum usage is consistent (all-or-none).
- bool isDwarfMD5UsageConsistent(unsigned CUID) const {
- return getMCDwarfLineTable(CUID).isMD5UsageConsistent();
- }
+ /// @}
- /// Saves the information from the currently parsed dwarf .loc directive
- /// and sets DwarfLocSeen. When the next instruction is assembled an entry
- /// in the line number table with this information and the address of the
- /// instruction will be created.
- void setCurrentDwarfLoc(unsigned FileNum, unsigned Line, unsigned Column,
- unsigned Flags, unsigned Isa,
- unsigned Discriminator) {
- CurrentDwarfLoc.setFileNum(FileNum);
- CurrentDwarfLoc.setLine(Line);
- CurrentDwarfLoc.setColumn(Column);
- CurrentDwarfLoc.setFlags(Flags);
- CurrentDwarfLoc.setIsa(Isa);
- CurrentDwarfLoc.setDiscriminator(Discriminator);
- DwarfLocSeen = true;
- }
+ /// \name Section Management
+ /// @{
- void clearDwarfLocSeen() { DwarfLocSeen = false; }
+ enum : unsigned {
+ /// Pass this value as the UniqueID during section creation to get the
+ /// generic section with the given name and characteristics. The usual
+ /// sections such as .text use this ID.
+ GenericSectionID = ~0U
+ };
- bool getDwarfLocSeen() { return DwarfLocSeen; }
- const MCDwarfLoc &getCurrentDwarfLoc() { return CurrentDwarfLoc; }
+ /// Return the MCSection for the specified mach-o section. This requires
+ /// the operands to be valid.
+ MCSectionMachO *getMachOSection(StringRef Segment, StringRef Section,
+ unsigned TypeAndAttributes,
+ unsigned Reserved2, SectionKind K,
+ const char *BeginSymName = nullptr);
- bool getGenDwarfForAssembly() { return GenDwarfForAssembly; }
- void setGenDwarfForAssembly(bool Value) { GenDwarfForAssembly = Value; }
- unsigned getGenDwarfFileNumber() { return GenDwarfFileNumber; }
+ MCSectionMachO *getMachOSection(StringRef Segment, StringRef Section,
+ unsigned TypeAndAttributes, SectionKind K,
+ const char *BeginSymName = nullptr) {
+ return getMachOSection(Segment, Section, TypeAndAttributes, 0, K,
+ BeginSymName);
+ }
+
+ MCSectionELF *getELFSection(const Twine &Section, unsigned Type,
+ unsigned Flags) {
+ return getELFSection(Section, Type, Flags, 0, "", false);
+ }
+
+ MCSectionELF *getELFSection(const Twine &Section, unsigned Type,
+ unsigned Flags, unsigned EntrySize) {
+ return getELFSection(Section, Type, Flags, EntrySize, "", false,
+ MCSection::NonUniqueID, nullptr);
+ }
+
+ MCSectionELF *getELFSection(const Twine &Section, unsigned Type,
+ unsigned Flags, unsigned EntrySize,
+ const Twine &Group, bool IsComdat) {
+ return getELFSection(Section, Type, Flags, EntrySize, Group, IsComdat,
+ MCSection::NonUniqueID, nullptr);
+ }
+
+ MCSectionELF *getELFSection(const Twine &Section, unsigned Type,
+ unsigned Flags, unsigned EntrySize,
+ const Twine &Group, bool IsComdat,
+ unsigned UniqueID,
+ const MCSymbolELF *LinkedToSym);
+
+ MCSectionELF *getELFSection(const Twine &Section, unsigned Type,
+ unsigned Flags, unsigned EntrySize,
+ const MCSymbolELF *Group, bool IsComdat,
+ unsigned UniqueID,
+ const MCSymbolELF *LinkedToSym);
+
+ /// Get a section with the provided group identifier. This section is
+ /// named by concatenating \p Prefix with '.' then \p Suffix. The \p Type
+ /// describes the type of the section and \p Flags are used to further
+ /// configure this named section.
+ MCSectionELF *getELFNamedSection(const Twine &Prefix, const Twine &Suffix,
+ unsigned Type, unsigned Flags,
+ unsigned EntrySize = 0);
+
+ MCSectionELF *createELFRelSection(const Twine &Name, unsigned Type,
+ unsigned Flags, unsigned EntrySize,
+ const MCSymbolELF *Group,
+ const MCSectionELF *RelInfoSection);
+
+ void renameELFSection(MCSectionELF *Section, StringRef Name);
+
+ MCSectionELF *createELFGroupSection(const MCSymbolELF *Group, bool IsComdat);
+
+ void recordELFMergeableSectionInfo(StringRef SectionName, unsigned Flags,
+ unsigned UniqueID, unsigned EntrySize);
+
+ bool isELFImplicitMergeableSectionNamePrefix(StringRef Name);
+
+ bool isELFGenericMergeableSection(StringRef Name);
+
+ /// Return the unique ID of the section with the given name, flags and entry
+ /// size, if it exists.
+ Optional<unsigned> getELFUniqueIDForEntsize(StringRef SectionName,
+ unsigned Flags,
+ unsigned EntrySize);
+
+ MCSectionGOFF *getGOFFSection(StringRef Section, SectionKind Kind,
+ MCSection *Parent, const MCExpr *SubsectionId);
+
+ MCSectionCOFF *getCOFFSection(StringRef Section, unsigned Characteristics,
+ SectionKind Kind, StringRef COMDATSymName,
+ int Selection,
+ unsigned UniqueID = GenericSectionID,
+ const char *BeginSymName = nullptr);
+
+ MCSectionCOFF *getCOFFSection(StringRef Section, unsigned Characteristics,
+ SectionKind Kind,
+ const char *BeginSymName = nullptr);
+
+ /// Gets or creates a section equivalent to Sec that is associated with the
+ /// section containing KeySym. For example, to create a debug info section
+ /// associated with an inline function, pass the normal debug info section
+ /// as Sec and the function symbol as KeySym.
+ MCSectionCOFF *
+ getAssociativeCOFFSection(MCSectionCOFF *Sec, const MCSymbol *KeySym,
+ unsigned UniqueID = GenericSectionID);
+
+ MCSectionSPIRV *getSPIRVSection();
+
+ MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K,
+ unsigned Flags = 0) {
+ return getWasmSection(Section, K, Flags, nullptr);
+ }
+
+ MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K,
+ unsigned Flags, const char *BeginSymName) {
+ return getWasmSection(Section, K, Flags, "", ~0, BeginSymName);
+ }
+
+ MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K,
+ unsigned Flags, const Twine &Group,
+ unsigned UniqueID) {
+ return getWasmSection(Section, K, Flags, Group, UniqueID, nullptr);
+ }
+
+ MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K,
+ unsigned Flags, const Twine &Group,
+ unsigned UniqueID, const char *BeginSymName);
+
+ MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K,
+ unsigned Flags, const MCSymbolWasm *Group,
+ unsigned UniqueID, const char *BeginSymName);
+
+ /// Get the section for the provided Section name
+ MCSectionDXContainer *getDXContainerSection(StringRef Section, SectionKind K);
+
+ bool hasXCOFFSection(StringRef Section,
+ XCOFF::CsectProperties CsectProp) const;
+
+ MCSectionXCOFF *getXCOFFSection(
+ StringRef Section, SectionKind K,
+ Optional<XCOFF::CsectProperties> CsectProp = None,
+ bool MultiSymbolsAllowed = false, const char *BeginSymName = nullptr,
+ Optional<XCOFF::DwarfSectionSubtypeFlags> DwarfSubtypeFlags = None);
+
+ // Create and save a copy of STI and return a reference to the copy.
+ MCSubtargetInfo &getSubtargetCopy(const MCSubtargetInfo &STI);
+
+ uint8_t getBBAddrMapVersion() const { return BBAddrMapVersion; }
+
+ /// @}
+
+ /// \name Dwarf Management
+ /// @{
+
+ /// Get the compilation directory for DW_AT_comp_dir
+ /// The compilation directory should be set with \c setCompilationDir before
+ /// calling this function. If it is unset, an empty string will be returned.
+ StringRef getCompilationDir() const { return CompilationDir; }
+
+ /// Set the compilation directory for DW_AT_comp_dir
+ void setCompilationDir(StringRef S) { CompilationDir = S.str(); }
+
+ /// Add an entry to the debug prefix map.
+ void addDebugPrefixMapEntry(const std::string &From, const std::string &To);
+
+ // Remaps all debug directory paths in-place as per the debug prefix map.
+ void RemapDebugPaths();
+
+ /// Get the main file name for use in error messages and debug
+ /// info. This can be set to ensure we've got the correct file name
+ /// after preprocessing or for -save-temps.
+ const std::string &getMainFileName() const { return MainFileName; }
+
+ /// Set the main file name and override the default.
+ void setMainFileName(StringRef S) { MainFileName = std::string(S); }
+
+ /// Creates an entry in the dwarf file and directory tables.
+ Expected<unsigned> getDwarfFile(StringRef Directory, StringRef FileName,
+ unsigned FileNumber,
+ Optional<MD5::MD5Result> Checksum,
+ Optional<StringRef> Source, unsigned CUID);
+
+ bool isValidDwarfFileNumber(unsigned FileNumber, unsigned CUID = 0);
+
+ const std::map<unsigned, MCDwarfLineTable> &getMCDwarfLineTables() const {
+ return MCDwarfLineTablesCUMap;
+ }
+
+ MCDwarfLineTable &getMCDwarfLineTable(unsigned CUID) {
+ return MCDwarfLineTablesCUMap[CUID];
+ }
+
+ const MCDwarfLineTable &getMCDwarfLineTable(unsigned CUID) const {
+ auto I = MCDwarfLineTablesCUMap.find(CUID);
+ assert(I != MCDwarfLineTablesCUMap.end());
+ return I->second;
+ }
+
+ const SmallVectorImpl<MCDwarfFile> &getMCDwarfFiles(unsigned CUID = 0) {
+ return getMCDwarfLineTable(CUID).getMCDwarfFiles();
+ }
+
+ const SmallVectorImpl<std::string> &getMCDwarfDirs(unsigned CUID = 0) {
+ return getMCDwarfLineTable(CUID).getMCDwarfDirs();
+ }
- void setGenDwarfFileNumber(unsigned FileNumber) {
- GenDwarfFileNumber = FileNumber;
- }
+ unsigned getDwarfCompileUnitID() { return DwarfCompileUnitID; }
- /// Specifies information about the "root file" for assembler clients
- /// (e.g., llvm-mc). Assumes compilation dir etc. have been set up.
- void setGenDwarfRootFile(StringRef FileName, StringRef Buffer);
+ void setDwarfCompileUnitID(unsigned CUIndex) { DwarfCompileUnitID = CUIndex; }
+
+ /// Specifies the "root" file and directory of the compilation unit.
+ /// These are "file 0" and "directory 0" in DWARF v5.
+ void setMCLineTableRootFile(unsigned CUID, StringRef CompilationDir,
+ StringRef Filename,
+ Optional<MD5::MD5Result> Checksum,
+ Optional<StringRef> Source) {
+ getMCDwarfLineTable(CUID).setRootFile(CompilationDir, Filename, Checksum,
+ Source);
+ }
+
+ /// Reports whether MD5 checksum usage is consistent (all-or-none).
+ bool isDwarfMD5UsageConsistent(unsigned CUID) const {
+ return getMCDwarfLineTable(CUID).isMD5UsageConsistent();
+ }
- const SetVector<MCSection *> &getGenDwarfSectionSyms() {
- return SectionsForRanges;
- }
+ /// Saves the information from the currently parsed dwarf .loc directive
+ /// and sets DwarfLocSeen. When the next instruction is assembled an entry
+ /// in the line number table with this information and the address of the
+ /// instruction will be created.
+ void setCurrentDwarfLoc(unsigned FileNum, unsigned Line, unsigned Column,
+ unsigned Flags, unsigned Isa,
+ unsigned Discriminator) {
+ CurrentDwarfLoc.setFileNum(FileNum);
+ CurrentDwarfLoc.setLine(Line);
+ CurrentDwarfLoc.setColumn(Column);
+ CurrentDwarfLoc.setFlags(Flags);
+ CurrentDwarfLoc.setIsa(Isa);
+ CurrentDwarfLoc.setDiscriminator(Discriminator);
+ DwarfLocSeen = true;
+ }
- bool addGenDwarfSection(MCSection *Sec) {
- return SectionsForRanges.insert(Sec);
- }
+ void clearDwarfLocSeen() { DwarfLocSeen = false; }
- void finalizeDwarfSections(MCStreamer &MCOS);
+ bool getDwarfLocSeen() { return DwarfLocSeen; }
+ const MCDwarfLoc &getCurrentDwarfLoc() { return CurrentDwarfLoc; }
- const std::vector<MCGenDwarfLabelEntry> &getMCGenDwarfLabelEntries() const {
- return MCGenDwarfLabelEntries;
- }
+ bool getGenDwarfForAssembly() { return GenDwarfForAssembly; }
+ void setGenDwarfForAssembly(bool Value) { GenDwarfForAssembly = Value; }
+ unsigned getGenDwarfFileNumber() { return GenDwarfFileNumber; }
+ EmitDwarfUnwindType emitDwarfUnwindInfo() const;
- void addMCGenDwarfLabelEntry(const MCGenDwarfLabelEntry &E) {
- MCGenDwarfLabelEntries.push_back(E);
- }
+ void setGenDwarfFileNumber(unsigned FileNumber) {
+ GenDwarfFileNumber = FileNumber;
+ }
- void setDwarfDebugFlags(StringRef S) { DwarfDebugFlags = S; }
- StringRef getDwarfDebugFlags() { return DwarfDebugFlags; }
+ /// Specifies information about the "root file" for assembler clients
+ /// (e.g., llvm-mc). Assumes compilation dir etc. have been set up.
+ void setGenDwarfRootFile(StringRef FileName, StringRef Buffer);
- void setDwarfDebugProducer(StringRef S) { DwarfDebugProducer = S; }
- StringRef getDwarfDebugProducer() { return DwarfDebugProducer; }
+ const SetVector<MCSection *> &getGenDwarfSectionSyms() {
+ return SectionsForRanges;
+ }
- void setDwarfFormat(dwarf::DwarfFormat f) { DwarfFormat = f; }
- dwarf::DwarfFormat getDwarfFormat() const { return DwarfFormat; }
+ bool addGenDwarfSection(MCSection *Sec) {
+ return SectionsForRanges.insert(Sec);
+ }
- void setDwarfVersion(uint16_t v) { DwarfVersion = v; }
- uint16_t getDwarfVersion() const { return DwarfVersion; }
+ void finalizeDwarfSections(MCStreamer &MCOS);
+
+ const std::vector<MCGenDwarfLabelEntry> &getMCGenDwarfLabelEntries() const {
+ return MCGenDwarfLabelEntries;
+ }
- /// @}
+ void addMCGenDwarfLabelEntry(const MCGenDwarfLabelEntry &E) {
+ MCGenDwarfLabelEntries.push_back(E);
+ }
+
+ void setDwarfDebugFlags(StringRef S) { DwarfDebugFlags = S; }
+ StringRef getDwarfDebugFlags() { return DwarfDebugFlags; }
- char *getSecureLogFile() { return SecureLogFile; }
- raw_fd_ostream *getSecureLog() { return SecureLog.get(); }
+ void setDwarfDebugProducer(StringRef S) { DwarfDebugProducer = S; }
+ StringRef getDwarfDebugProducer() { return DwarfDebugProducer; }
- void setSecureLog(std::unique_ptr<raw_fd_ostream> Value) {
- SecureLog = std::move(Value);
- }
+ void setDwarfFormat(dwarf::DwarfFormat f) { DwarfFormat = f; }
+ dwarf::DwarfFormat getDwarfFormat() const { return DwarfFormat; }
+
+ void setDwarfVersion(uint16_t v) { DwarfVersion = v; }
+ uint16_t getDwarfVersion() const { return DwarfVersion; }
+
+ /// @}
- bool getSecureLogUsed() { return SecureLogUsed; }
- void setSecureLogUsed(bool Value) { SecureLogUsed = Value; }
+ char *getSecureLogFile() { return SecureLogFile; }
+ raw_fd_ostream *getSecureLog() { return SecureLog.get(); }
+
+ void setSecureLog(std::unique_ptr<raw_fd_ostream> Value) {
+ SecureLog = std::move(Value);
+ }
- void *allocate(unsigned Size, unsigned Align = 8) {
- return Allocator.Allocate(Size, Align);
- }
+ bool getSecureLogUsed() { return SecureLogUsed; }
+ void setSecureLogUsed(bool Value) { SecureLogUsed = Value; }
- void deallocate(void *Ptr) {}
+ void *allocate(unsigned Size, unsigned Align = 8) {
+ return Allocator.Allocate(Size, Align);
+ }
- bool hadError() { return HadError; }
- void diagnose(const SMDiagnostic &SMD);
- void reportError(SMLoc L, const Twine &Msg);
- void reportWarning(SMLoc L, const Twine &Msg);
+ void deallocate(void *Ptr) {}
- const MCAsmMacro *lookupMacro(StringRef Name) {
- StringMap<MCAsmMacro>::iterator I = MacroMap.find(Name);
- return (I == MacroMap.end()) ? nullptr : &I->getValue();
- }
+ bool hadError() { return HadError; }
+ void diagnose(const SMDiagnostic &SMD);
+ void reportError(SMLoc L, const Twine &Msg);
+ void reportWarning(SMLoc L, const Twine &Msg);
+
+ const MCAsmMacro *lookupMacro(StringRef Name) {
+ StringMap<MCAsmMacro>::iterator I = MacroMap.find(Name);
+ return (I == MacroMap.end()) ? nullptr : &I->getValue();
+ }
- void defineMacro(StringRef Name, MCAsmMacro Macro) {
- MacroMap.insert(std::make_pair(Name, std::move(Macro)));
- }
+ void defineMacro(StringRef Name, MCAsmMacro Macro) {
+ MacroMap.insert(std::make_pair(Name, std::move(Macro)));
+ }
- void undefineMacro(StringRef Name) { MacroMap.erase(Name); }
+ void undefineMacro(StringRef Name) { MacroMap.erase(Name); }
- MCPseudoProbeTable &getMCPseudoProbeTable() { return PseudoProbeTable; }
- };
+ MCPseudoProbeTable &getMCPseudoProbeTable() { return PseudoProbeTable; }
+};
} // end namespace llvm
diff --git a/llvm/include/llvm/MC/MCDXContainerStreamer.h b/llvm/include/llvm/MC/MCDXContainerStreamer.h
new file mode 100644
index 000000000000..ef1a95f71778
--- /dev/null
+++ b/llvm/include/llvm/MC/MCDXContainerStreamer.h
@@ -0,0 +1,49 @@
+//===- MCDXContainerStreamer.h - MCDXContainerStreamer Interface ---*- C++ ===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Overrides MCObjectStreamer to disable all unnecessary features with stubs.
+// The DXContainer format isn't a fully featured object format. It doesn't
+// support symbols, and initially it will not support instruction data since it
+// is used as a bitcode container for DXIL.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCDXCONTAINERSTREAMER_H
+#define LLVM_MC_MCDXCONTAINERSTREAMER_H
+
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCObjectWriter.h"
+
+namespace llvm {
+class MCAssembler;
+class MCExpr;
+class MCInst;
+class raw_ostream;
+
+class MCDXContainerStreamer : public MCObjectStreamer {
+public:
+ MCDXContainerStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> TAB,
+ std::unique_ptr<MCObjectWriter> OW,
+ std::unique_ptr<MCCodeEmitter> Emitter)
+ : MCObjectStreamer(Context, std::move(TAB), std::move(OW),
+ std::move(Emitter)) {}
+
+ bool emitSymbolAttribute(MCSymbol *, MCSymbolAttr) override { return false; }
+ void emitCommonSymbol(MCSymbol *, uint64_t, unsigned) override {}
+ void emitZerofill(MCSection *, MCSymbol *Symbol = nullptr, uint64_t Size = 0,
+ unsigned ByteAlignment = 0, SMLoc Loc = SMLoc()) override {}
+
+private:
+ void emitInstToData(const MCInst &, const MCSubtargetInfo &) override;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_MC_MCDXCONTAINERSTREAMER_H
diff --git a/llvm/include/llvm/MC/MCDXContainerWriter.h b/llvm/include/llvm/MC/MCDXContainerWriter.h
new file mode 100644
index 000000000000..8ecb86c8a16f
--- /dev/null
+++ b/llvm/include/llvm/MC/MCDXContainerWriter.h
@@ -0,0 +1,45 @@
+//===- llvm/MC/MCDXContainerWriter.h - DXContainer Writer -*- C++ -------*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCDXCONTAINERWRITER_H
+#define LLVM_MC_MCDXCONTAINERWRITER_H
+
+#include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCObjectWriter.h"
+
+namespace llvm {
+
+class raw_pwrite_stream;
+
+class MCDXContainerTargetWriter : public MCObjectTargetWriter {
+protected:
+ MCDXContainerTargetWriter() {}
+
+public:
+ virtual ~MCDXContainerTargetWriter();
+
+ Triple::ObjectFormatType getFormat() const override {
+ return Triple::DXContainer;
+ }
+ static bool classof(const MCObjectTargetWriter *W) {
+ return W->getFormat() == Triple::DXContainer;
+ }
+};
+
+/// Construct a new DXContainer writer instance.
+///
+/// \param MOTW - The target specific DXContainer writer subclass.
+/// \param OS - The stream to write to.
+/// \returns The constructed object writer.
+std::unique_ptr<MCObjectWriter>
+createDXContainerObjectWriter(std::unique_ptr<MCDXContainerTargetWriter> MOTW,
+ raw_pwrite_stream &OS);
+
+} // end namespace llvm
+
+#endif // LLVM_MC_MCDXCONTAINERWRITER_H
diff --git a/llvm/include/llvm/MC/MCFixedLenDisassembler.h b/llvm/include/llvm/MC/MCDecoderOps.h
index 1edf3899c130..c1956993fca2 100644
--- a/llvm/include/llvm/MC/MCFixedLenDisassembler.h
+++ b/llvm/include/llvm/MC/MCDecoderOps.h
@@ -1,14 +1,14 @@
-//===-- llvm/MC/MCFixedLenDisassembler.h - Decoder driver -------*- C++ -*-===//
+//===------------ llvm/MC/MCDecoderOps.h - Decoder driver -------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-// Fixed length disassembler decoder state machine driver.
+// Disassembler decoder state machine driver.
//===----------------------------------------------------------------------===//
-#ifndef LLVM_MC_MCFIXEDLENDISASSEMBLER_H
-#define LLVM_MC_MCFIXEDLENDISASSEMBLER_H
+#ifndef LLVM_MC_MCDECODEROPS_H
+#define LLVM_MC_MCDECODEROPS_H
namespace llvm {
diff --git a/llvm/include/llvm/MC/MCDirectives.h b/llvm/include/llvm/MC/MCDirectives.h
index 51e57ad37021..d6ab29febeeb 100644
--- a/llvm/include/llvm/MC/MCDirectives.h
+++ b/llvm/include/llvm/MC/MCDirectives.h
@@ -31,6 +31,7 @@ enum MCSymbolAttr {
MCSA_LGlobal, ///< .lglobl (XCOFF)
MCSA_Extern, ///< .extern (XCOFF)
MCSA_Hidden, ///< .hidden (ELF)
+ MCSA_Exported, ///< .globl _foo, exported (XCOFF)
MCSA_IndirectSymbol, ///< .indirect_symbol (MachO)
MCSA_Internal, ///< .internal (ELF)
MCSA_LazyReference, ///< .lazy_reference (MachO)
diff --git a/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h b/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h
index 10037cd66ef1..de069ff95c2f 100644
--- a/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h
+++ b/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h
@@ -40,26 +40,35 @@ struct SymbolInfoTy {
private:
bool IsXCOFF;
+ bool HasType;
public:
SymbolInfoTy(uint64_t Addr, StringRef Name,
Optional<XCOFF::StorageMappingClass> Smc, Optional<uint32_t> Idx,
bool Label)
- : Addr(Addr), Name(Name), XCOFFSymInfo(Smc, Idx, Label), IsXCOFF(true) {}
- SymbolInfoTy(uint64_t Addr, StringRef Name, uint8_t Type)
- : Addr(Addr), Name(Name), Type(Type), IsXCOFF(false) {}
+ : Addr(Addr), Name(Name), XCOFFSymInfo(Smc, Idx, Label), IsXCOFF(true),
+ HasType(false) {}
+ SymbolInfoTy(uint64_t Addr, StringRef Name, uint8_t Type,
+ bool IsXCOFF = false)
+ : Addr(Addr), Name(Name), Type(Type), IsXCOFF(IsXCOFF), HasType(true) {}
bool isXCOFF() const { return IsXCOFF; }
private:
friend bool operator<(const SymbolInfoTy &P1, const SymbolInfoTy &P2) {
- assert(P1.IsXCOFF == P2.IsXCOFF &&
- "P1.IsXCOFF should be equal to P2.IsXCOFF.");
+ assert((P1.IsXCOFF == P2.IsXCOFF && P1.HasType == P2.HasType) &&
+ "The value of IsXCOFF and HasType in P1 and P2 should be the same "
+ "respectively.");
+
+ if (P1.IsXCOFF && P1.HasType)
+ return std::tie(P1.Addr, P1.Type, P1.Name) <
+ std::tie(P2.Addr, P2.Type, P2.Name);
+
if (P1.IsXCOFF)
return std::tie(P1.Addr, P1.XCOFFSymInfo, P1.Name) <
std::tie(P2.Addr, P2.XCOFFSymInfo, P2.Name);
return std::tie(P1.Addr, P1.Name, P1.Type) <
- std::tie(P2.Addr, P2.Name, P2.Type);
+ std::tie(P2.Addr, P2.Name, P2.Type);
}
};
@@ -172,10 +181,9 @@ protected:
public:
// Helpers around MCSymbolizer
- bool tryAddingSymbolicOperand(MCInst &Inst,
- int64_t Value,
- uint64_t Address, bool IsBranch,
- uint64_t Offset, uint64_t InstSize) const;
+ bool tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address,
+ bool IsBranch, uint64_t Offset, uint64_t OpSize,
+ uint64_t InstSize) const;
void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const;
diff --git a/llvm/include/llvm/MC/MCDisassembler/MCExternalSymbolizer.h b/llvm/include/llvm/MC/MCDisassembler/MCExternalSymbolizer.h
index ffac5ee5cb1f..8af3bb2296ec 100644
--- a/llvm/include/llvm/MC/MCDisassembler/MCExternalSymbolizer.h
+++ b/llvm/include/llvm/MC/MCDisassembler/MCExternalSymbolizer.h
@@ -15,7 +15,7 @@
#ifndef LLVM_MC_MCDISASSEMBLER_MCEXTERNALSYMBOLIZER_H
#define LLVM_MC_MCDISASSEMBLER_MCEXTERNALSYMBOLIZER_H
-#include "llvm-c/Disassembler.h"
+#include "llvm-c/DisassemblerTypes.h"
#include "llvm/MC/MCDisassembler/MCSymbolizer.h"
#include <memory>
@@ -46,7 +46,8 @@ public:
bool tryAddingSymbolicOperand(MCInst &MI, raw_ostream &CommentStream,
int64_t Value, uint64_t Address, bool IsBranch,
- uint64_t Offset, uint64_t InstSize) override;
+ uint64_t Offset, uint64_t OpSize,
+ uint64_t InstSize) override;
void tryAddingPcLoadReferenceComment(raw_ostream &CommentStream,
int64_t Value,
uint64_t Address) override;
diff --git a/llvm/include/llvm/MC/MCDisassembler/MCSymbolizer.h b/llvm/include/llvm/MC/MCDisassembler/MCSymbolizer.h
index b966106007db..1efb63f1a142 100644
--- a/llvm/include/llvm/MC/MCDisassembler/MCSymbolizer.h
+++ b/llvm/include/llvm/MC/MCDisassembler/MCSymbolizer.h
@@ -17,9 +17,9 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
-#include <algorithm>
#include <cstdint>
#include <memory>
+#include <utility>
namespace llvm {
@@ -63,12 +63,13 @@ public:
/// \param Address - Load address of the instruction.
/// \param IsBranch - Is the instruction a branch?
/// \param Offset - Byte offset of the operand inside the inst.
+ /// \param OpSize - Size of the operand in bytes.
/// \param InstSize - Size of the instruction in bytes.
/// \return Whether a symbolic operand was added.
virtual bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream,
int64_t Value, uint64_t Address,
bool IsBranch, uint64_t Offset,
- uint64_t InstSize) = 0;
+ uint64_t OpSize, uint64_t InstSize) = 0;
/// Try to add a comment on the PC-relative load.
/// For instance, in Mach-O, this is used to add annotations to instructions
diff --git a/llvm/include/llvm/MC/MCDwarf.h b/llvm/include/llvm/MC/MCDwarf.h
index 7e72d56f3097..ce65b173b3d2 100644
--- a/llvm/include/llvm/MC/MCDwarf.h
+++ b/llvm/include/llvm/MC/MCDwarf.h
@@ -19,14 +19,12 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/MC/MCSection.h"
#include "llvm/MC/StringTableBuilder.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/MD5.h"
#include <cassert>
#include <cstdint>
#include <string>
-#include <tuple>
#include <utility>
#include <vector>
@@ -36,6 +34,7 @@ template <typename T> class ArrayRef;
class MCAsmBackend;
class MCContext;
class MCObjectStreamer;
+class MCSection;
class MCStreamer;
class MCSymbol;
class raw_ostream;
@@ -63,6 +62,9 @@ public:
/// Emit the .debug_line_str section if appropriate.
void emitSection(MCStreamer *MCOS);
+
+ /// Returns finalized section.
+ SmallString<0> getFinalizedData();
};
/// Instances of this class represent the name of the dwarf .file directive and
@@ -294,8 +296,8 @@ public:
RootFile.DirIndex = 0;
RootFile.Checksum = Checksum;
RootFile.Source = Source;
- trackMD5Usage(Checksum.hasValue());
- HasSource = Source.hasValue();
+ trackMD5Usage(Checksum.has_value());
+ HasSource = Source.has_value();
}
void resetFileTable() {
@@ -686,6 +688,7 @@ struct MCDwarfFrameInfo {
bool IsSimple = false;
unsigned RAReg = static_cast<unsigned>(INT_MAX);
bool IsBKeyFrame = false;
+ bool IsMTETaggedFrame = false;
};
class MCDwarfFrameEmitter {
diff --git a/llvm/include/llvm/MC/MCELFStreamer.h b/llvm/include/llvm/MC/MCELFStreamer.h
index 8f2b176862c8..eac807aad908 100644
--- a/llvm/include/llvm/MC/MCELFStreamer.h
+++ b/llvm/include/llvm/MC/MCELFStreamer.h
@@ -10,12 +10,19 @@
#define LLVM_MC_MCELFSTREAMER_H
#include "llvm/ADT/SmallVector.h"
-#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCObjectStreamer.h"
namespace llvm {
+class MCContext;
+class MCDataFragment;
+class MCFragment;
+class MCObjectWriter;
+class MCSection;
+class MCSubtargetInfo;
+class MCSymbol;
+class MCSymbolRefExpr;
class MCAsmBackend;
class MCCodeEmitter;
class MCExpr;
diff --git a/llvm/include/llvm/MC/MCFragment.h b/llvm/include/llvm/MC/MCFragment.h
index 736fdd992063..b6329b131624 100644
--- a/llvm/include/llvm/MC/MCFragment.h
+++ b/llvm/include/llvm/MC/MCFragment.h
@@ -17,7 +17,6 @@
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/Alignment.h"
-#include "llvm/Support/Casting.h"
#include "llvm/Support/SMLoc.h"
#include <cstdint>
#include <utility>
@@ -294,7 +293,7 @@ public:
class MCAlignFragment : public MCFragment {
/// The alignment to ensure, in bytes.
- unsigned Alignment;
+ Align Alignment;
/// Flag to indicate that (optimal) NOPs should be emitted instead
/// of using the provided value. The exact interpretation of this flag is
@@ -315,12 +314,12 @@ class MCAlignFragment : public MCFragment {
const MCSubtargetInfo *STI;
public:
- MCAlignFragment(unsigned Alignment, int64_t Value, unsigned ValueSize,
+ MCAlignFragment(Align Alignment, int64_t Value, unsigned ValueSize,
unsigned MaxBytesToEmit, MCSection *Sec = nullptr)
: MCFragment(FT_Align, false, Sec), Alignment(Alignment), EmitNops(false),
Value(Value), ValueSize(ValueSize), MaxBytesToEmit(MaxBytesToEmit) {}
- unsigned getAlignment() const { return Alignment; }
+ Align getAlignment() const { return Alignment; }
int64_t getValue() const { return Value; }
diff --git a/llvm/include/llvm/MC/MCInstrAnalysis.h b/llvm/include/llvm/MC/MCInstrAnalysis.h
index 632a7d8f820e..a937f8203a0d 100644
--- a/llvm/include/llvm/MC/MCInstrAnalysis.h
+++ b/llvm/include/llvm/MC/MCInstrAnalysis.h
@@ -14,10 +14,13 @@
#ifndef LLVM_MC_MCINSTRANALYSIS_H
#define LLVM_MC_MCINSTRANALYSIS_H
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
#include <cstdint>
+#include <vector>
namespace llvm {
diff --git a/llvm/include/llvm/MC/MCInstrDesc.h b/llvm/include/llvm/MC/MCInstrDesc.h
index e8ffd29170e6..120c3482ce70 100644
--- a/llvm/include/llvm/MC/MCInstrDesc.h
+++ b/llvm/include/llvm/MC/MCInstrDesc.h
@@ -14,10 +14,11 @@
#ifndef LLVM_MC_MCINSTRDESC_H
#define LLVM_MC_MCINSTRDESC_H
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/MC/MCRegister.h"
namespace llvm {
+class MCRegisterInfo;
class MCInst;
@@ -148,6 +149,7 @@ enum Flag {
Variadic,
HasOptionalDef,
Pseudo,
+ Meta,
Return,
EHScopeReturn,
Call,
@@ -263,6 +265,10 @@ public:
/// correspond to a real machine instruction.
bool isPseudo() const { return Flags & (1ULL << MCID::Pseudo); }
+ /// Return true if this is a meta instruction that doesn't
+ /// produce any output in the form of executable instructions.
+ bool isMetaInstruction() const { return Flags & (1ULL << MCID::Meta); }
+
/// Return true if the instruction is a return.
bool isReturn() const { return Flags & (1ULL << MCID::Return); }
diff --git a/llvm/include/llvm/MC/MCInstrInfo.h b/llvm/include/llvm/MC/MCInstrInfo.h
index 598e24257e5d..84995b1e93fe 100644
--- a/llvm/include/llvm/MC/MCInstrInfo.h
+++ b/llvm/include/llvm/MC/MCInstrInfo.h
@@ -13,6 +13,7 @@
#ifndef LLVM_MC_MCINSTRINFO_H
#define LLVM_MC_MCINSTRINFO_H
+#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCInstrDesc.h"
#include <cassert>
diff --git a/llvm/include/llvm/MC/MCLinkerOptimizationHint.h b/llvm/include/llvm/MC/MCLinkerOptimizationHint.h
index 003491f32f75..b91fbc62aa75 100644
--- a/llvm/include/llvm/MC/MCLinkerOptimizationHint.h
+++ b/llvm/include/llvm/MC/MCLinkerOptimizationHint.h
@@ -19,7 +19,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstdint>
@@ -28,6 +27,7 @@ namespace llvm {
class MachObjectWriter;
class MCAsmLayout;
class MCSymbol;
+class raw_ostream;
/// Linker Optimization Hint Type.
enum MCLOHType {
diff --git a/llvm/include/llvm/MC/MCMachObjectWriter.h b/llvm/include/llvm/MC/MCMachObjectWriter.h
index f4f9c474cdcd..149373dd2b54 100644
--- a/llvm/include/llvm/MC/MCMachObjectWriter.h
+++ b/llvm/include/llvm/MC/MCMachObjectWriter.h
@@ -264,6 +264,8 @@ public:
bool IsPCRel) const override;
uint64_t writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
+
+ void writeAddrsigSection(MCAssembler &Asm);
};
/// Construct a new Mach-O writer instance.
diff --git a/llvm/include/llvm/MC/MCObjectFileInfo.h b/llvm/include/llvm/MC/MCObjectFileInfo.h
index 3c1d10c4e62f..ebc9b95d6d4e 100644
--- a/llvm/include/llvm/MC/MCObjectFileInfo.h
+++ b/llvm/include/llvm/MC/MCObjectFileInfo.h
@@ -13,13 +13,13 @@
#ifndef LLVM_MC_MCOBJECTFILEINFO_H
#define LLVM_MC_MCOBJECTFILEINFO_H
-#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/Swift.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/CodeGen.h"
#include "llvm/Support/VersionTuple.h"
+#include <array>
+
namespace llvm {
class MCContext;
class MCSection;
@@ -213,6 +213,7 @@ protected:
MCSection *LazySymbolPointerSection = nullptr;
MCSection *NonLazySymbolPointerSection = nullptr;
MCSection *ThreadLocalPointerSection = nullptr;
+ MCSection *AddrSigSection = nullptr;
/// COFF specific sections.
MCSection *DrectveSection = nullptr;
@@ -224,6 +225,9 @@ protected:
MCSection *GIATsSection = nullptr;
MCSection *GLJMPSection = nullptr;
+ // GOFF specific sections.
+ MCSection *PPA1Section = nullptr;
+
// XCOFF specific sections
MCSection *TOCBaseSection = nullptr;
MCSection *ReadOnly8Section = nullptr;
@@ -410,6 +414,7 @@ public:
MCSection *getThreadLocalPointerSection() const {
return ThreadLocalPointerSection;
}
+ MCSection *getAddrSigSection() const { return AddrSigSection; }
// COFF specific sections.
MCSection *getDrectveSection() const { return DrectveSection; }
@@ -421,6 +426,9 @@ public:
MCSection *getGIATsSection() const { return GIATsSection; }
MCSection *getGLJMPSection() const { return GLJMPSection; }
+ // GOFF specific sections.
+ MCSection *getPPA1Section() const { return PPA1Section; }
+
// XCOFF specific sections
MCSection *getTOCBaseSection() const { return TOCBaseSection; }
@@ -448,8 +456,10 @@ private:
void initELFMCObjectFileInfo(const Triple &T, bool Large);
void initGOFFMCObjectFileInfo(const Triple &T);
void initCOFFMCObjectFileInfo(const Triple &T);
+ void initSPIRVMCObjectFileInfo(const Triple &T);
void initWasmMCObjectFileInfo(const Triple &T);
void initXCOFFMCObjectFileInfo(const Triple &T);
+ void initDXContainerObjectFileInfo(const Triple &T);
MCSection *getDwarfComdatSection(const char *Name, uint64_t Hash) const;
public:
diff --git a/llvm/include/llvm/MC/MCObjectStreamer.h b/llvm/include/llvm/MC/MCObjectStreamer.h
index 183fd79fb9fc..6536c81d4aac 100644
--- a/llvm/include/llvm/MC/MCObjectStreamer.h
+++ b/llvm/include/llvm/MC/MCObjectStreamer.h
@@ -11,11 +11,17 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCFragment.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
namespace llvm {
+class MCContext;
+class MCInst;
+class MCObjectWriter;
+class MCSymbol;
+struct MCDwarfFrameInfo;
class MCAssembler;
class MCCodeEmitter;
class MCSubtargetInfo;
diff --git a/llvm/include/llvm/MC/MCObjectWriter.h b/llvm/include/llvm/MC/MCObjectWriter.h
index d2a2f1a13ff5..a8e24a0c56ba 100644
--- a/llvm/include/llvm/MC/MCObjectWriter.h
+++ b/llvm/include/llvm/MC/MCObjectWriter.h
@@ -10,6 +10,7 @@
#define LLVM_MC_MCOBJECTWRITER_H
#include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCSymbol.h"
#include <cstdint>
namespace llvm {
@@ -32,6 +33,9 @@ class MCValue;
/// should be emitted as part of writeObject().
class MCObjectWriter {
protected:
+ std::vector<const MCSymbol *> AddrsigSyms;
+ bool EmitAddrsigSection = false;
+
MCObjectWriter() = default;
public:
@@ -91,11 +95,15 @@ public:
/// Tell the object writer to emit an address-significance table during
/// writeObject(). If this function is not called, all symbols are treated as
/// address-significant.
- virtual void emitAddrsigSection() {}
+ void emitAddrsigSection() { EmitAddrsigSection = true; }
+
+ bool getEmitAddrsigSection() { return EmitAddrsigSection; }
/// Record the given symbol in the address-significance table to be written
/// diring writeObject().
- virtual void addAddrsigSymbol(const MCSymbol *Sym) {}
+ void addAddrsigSymbol(const MCSymbol *Sym) { AddrsigSyms.push_back(Sym); }
+
+ std::vector<const MCSymbol *> &getAddrsigSyms() { return AddrsigSyms; }
/// Write the object file and returns the number of bytes written.
///
diff --git a/llvm/include/llvm/MC/MCParser/MCAsmLexer.h b/llvm/include/llvm/MC/MCParser/MCAsmLexer.h
index 06796979b4fc..850a9cffe73a 100644
--- a/llvm/include/llvm/MC/MCParser/MCAsmLexer.h
+++ b/llvm/include/llvm/MC/MCParser/MCAsmLexer.h
@@ -12,10 +12,8 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCAsmMacro.h"
-#include <algorithm>
#include <cassert>
#include <cstddef>
-#include <cstdint>
#include <string>
namespace llvm {
diff --git a/llvm/include/llvm/MC/MCParser/MCAsmParser.h b/llvm/include/llvm/MC/MCParser/MCAsmParser.h
index 29386ffc45ac..4a1291856a20 100644
--- a/llvm/include/llvm/MC/MCParser/MCAsmParser.h
+++ b/llvm/include/llvm/MC/MCParser/MCAsmParser.h
@@ -10,20 +10,20 @@
#define LLVM_MC_MCPARSER_MCASMPARSER_H
#include "llvm/ADT/None.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCAsmMacro.h"
#include "llvm/Support/SMLoc.h"
#include <cstdint>
-#include <ctime>
#include <string>
#include <utility>
namespace llvm {
+class MCAsmLexer;
class MCAsmInfo;
class MCAsmParserExtension;
class MCContext;
diff --git a/llvm/include/llvm/MC/MCParser/MCAsmParserExtension.h b/llvm/include/llvm/MC/MCParser/MCAsmParserExtension.h
index fc10e33bcf6b..cbabc2c9d69d 100644
--- a/llvm/include/llvm/MC/MCParser/MCAsmParserExtension.h
+++ b/llvm/include/llvm/MC/MCParser/MCAsmParserExtension.h
@@ -9,9 +9,8 @@
#ifndef LLVM_MC_MCPARSER_MCASMPARSEREXTENSION_H
#define LLVM_MC_MCPARSER_MCASMPARSEREXTENSION_H
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/Support/SMLoc.h"
diff --git a/llvm/include/llvm/MC/MCParser/MCParsedAsmOperand.h b/llvm/include/llvm/MC/MCParser/MCParsedAsmOperand.h
index faf0a4474c8a..22f66a011ece 100644
--- a/llvm/include/llvm/MC/MCParser/MCParsedAsmOperand.h
+++ b/llvm/include/llvm/MC/MCParser/MCParsedAsmOperand.h
@@ -10,7 +10,6 @@
#define LLVM_MC_MCPARSER_MCPARSEDASMOPERAND_H
#include "llvm/ADT/StringRef.h"
-#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Support/SMLoc.h"
#include <string>
@@ -63,6 +62,13 @@ public:
/// isMem - Is this a memory operand?
virtual bool isMem() const = 0;
+ /// isMemUseUpRegs - Is memory operand use up regs, for example, intel MS
+ /// inline asm may use ARR[baseReg + IndexReg + ...] which may use up regs
+ /// in [...] expr, so ARR[baseReg + IndexReg + ...] can not use extra reg
+ /// for ARR. For example, calculating ARR address to a reg or use another
+ /// base reg in PIC model.
+ virtual bool isMemUseUpRegs() const { return false; }
+
/// getStartLoc - Get the location of the first token of this operand.
virtual SMLoc getStartLoc() const = 0;
/// getEndLoc - Get the location of the last token of this operand.
@@ -77,10 +83,6 @@ public:
/// assembly.
virtual bool isOffsetOfLocal() const { return false; }
- /// isMemPlaceholder - Do we need to ignore the constraint, rather than emit
- /// code? Only valid when parsing MS-style inline assembly.
- virtual bool isMemPlaceholder(const MCInstrDesc &Desc) const { return false; }
-
/// getOffsetOfLoc - Get the location of the offset operator.
virtual SMLoc getOffsetOfLoc() const { return SMLoc(); }
diff --git a/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h b/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h
index 908ee30e4060..1d380c6a00b7 100644
--- a/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h
+++ b/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h
@@ -11,10 +11,8 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCParser/MCAsmLexer.h"
-#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCAsmParserExtension.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Support/SMLoc.h"
@@ -23,9 +21,12 @@
namespace llvm {
+class MCContext;
class MCInst;
+class MCInstrInfo;
class MCStreamer;
class MCSubtargetInfo;
+class MCSymbol;
template <typename T> class SmallVectorImpl;
using OperandVector = SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>>;
@@ -100,10 +101,14 @@ struct AsmRewrite {
int64_t Val;
StringRef Label;
IntelExpr IntelExp;
+ bool IntelExpRestricted;
public:
- AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len = 0, int64_t val = 0)
- : Kind(kind), Loc(loc), Len(len), Done(false), Val(val) {}
+ AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len = 0, int64_t val = 0,
+ bool Restricted = false)
+ : Kind(kind), Loc(loc), Len(len), Done(false), Val(val) {
+ IntelExpRestricted = Restricted;
+ }
AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len, StringRef label)
: AsmRewrite(kind, loc, len) { Label = label; }
AsmRewrite(SMLoc loc, unsigned len, IntelExpr exp)
diff --git a/llvm/include/llvm/MC/MCPseudoProbe.h b/llvm/include/llvm/MC/MCPseudoProbe.h
index 9ff68f4236ca..d10d6015cd3c 100644
--- a/llvm/include/llvm/MC/MCPseudoProbe.h
+++ b/llvm/include/llvm/MC/MCPseudoProbe.h
@@ -55,6 +55,7 @@
#include <tuple>
#include <type_traits>
#include <unordered_map>
+#include <unordered_set>
#include <vector>
namespace llvm {
@@ -82,10 +83,9 @@ struct MCPseudoProbeFuncDesc {
void print(raw_ostream &OS);
};
-class MCPseudoProbe;
class MCDecodedPseudoProbe;
-// An inline frame has the form <Guid, ProbeID>
+// An inline frame has the form <CalleeGuid, ProbeID>
using InlineSite = std::tuple<uint64_t, uint32_t>;
using MCPseudoProbeInlineStack = SmallVector<InlineSite, 8>;
// GUID to PseudoProbeFuncDesc map
@@ -95,7 +95,6 @@ using GUIDProbeFunctionMap =
using AddressProbesMap =
std::unordered_map<uint64_t, std::list<MCDecodedPseudoProbe>>;
-class MCPseudoProbeInlineTree;
class MCDecodedPseudoProbeInlineTree;
class MCPseudoProbeBase {
@@ -272,7 +271,7 @@ public:
MCDecodedPseudoProbeInlineTree(const InlineSite &Site) : ISite(Site){};
// Return false if it's a dummy inline site
- bool hasInlineSite() const { return std::get<0>(ISite) != 0; }
+ bool hasInlineSite() const { return !isRoot() && !Parent->isRoot(); }
};
/// Instances of this class represent the pseudo probes inserted into a compile
@@ -355,6 +354,15 @@ public:
// Decode pseudo_probe section to build address to probes map.
bool buildAddress2ProbeMap(const uint8_t *Start, std::size_t Size);
+ // Decode pseudo_probe section to build address to probes map for specifed
+ // functions only.
+ bool buildAddress2ProbeMap(const uint8_t *Start, std::size_t Size,
+ std::unordered_set<uint64_t> &GuildFilter);
+
+ bool buildAddress2ProbeMap(MCDecodedPseudoProbeInlineTree *Cur,
+ uint64_t &LastAddr,
+ std::unordered_set<uint64_t> &GuildFilter);
+
// Print pseudo_probe_desc section info
void printGUID2FuncDescMap(raw_ostream &OS);
diff --git a/llvm/include/llvm/MC/MCRegisterInfo.h b/llvm/include/llvm/MC/MCRegisterInfo.h
index 65436dc74c3e..7165a2982d1b 100644
--- a/llvm/include/llvm/MC/MCRegisterInfo.h
+++ b/llvm/include/llvm/MC/MCRegisterInfo.h
@@ -580,6 +580,9 @@ public:
bool isSuperOrSubRegisterEq(MCRegister RegA, MCRegister RegB) const {
return isSubRegisterEq(RegA, RegB) || isSuperRegister(RegA, RegB);
}
+
+ /// Returns true if the two registers are equal or alias each other.
+ bool regsOverlap(MCRegister RegA, MCRegister RegB) const;
};
//===----------------------------------------------------------------------===//
@@ -698,6 +701,11 @@ public:
// unit, we can allow a 0 differential here.
advance();
}
+
+ MCRegUnitIterator &operator++() {
+ MCRegisterInfo::DiffListIterator::operator++();
+ return *this;
+ }
};
/// MCRegUnitMaskIterator enumerates a list of register units and their
diff --git a/llvm/include/llvm/MC/MCSPIRVObjectWriter.h b/llvm/include/llvm/MC/MCSPIRVObjectWriter.h
new file mode 100644
index 000000000000..a8baf96b8384
--- /dev/null
+++ b/llvm/include/llvm/MC/MCSPIRVObjectWriter.h
@@ -0,0 +1,40 @@
+//===-- llvm/MC/MCSPIRVObjectWriter.h - SPIR-V Object Writer -----*- C++ *-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCSPIRVOBJECTWRITER_H
+#define LLVM_MC_MCSPIRVOBJECTWRITER_H
+
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include <memory>
+
+namespace llvm {
+
+class MCSPIRVObjectTargetWriter : public MCObjectTargetWriter {
+protected:
+ explicit MCSPIRVObjectTargetWriter() {}
+
+public:
+ Triple::ObjectFormatType getFormat() const override { return Triple::SPIRV; }
+ static bool classof(const MCObjectTargetWriter *W) {
+ return W->getFormat() == Triple::SPIRV;
+ }
+};
+
+/// Construct a new SPIR-V writer instance.
+///
+/// \param MOTW - The target specific SPIR-V writer subclass.
+/// \param OS - The stream to write to.
+/// \returns The constructed object writer.
+std::unique_ptr<MCObjectWriter>
+createSPIRVObjectWriter(std::unique_ptr<MCSPIRVObjectTargetWriter> MOTW,
+ raw_pwrite_stream &OS);
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/MC/MCSPIRVStreamer.h b/llvm/include/llvm/MC/MCSPIRVStreamer.h
new file mode 100644
index 000000000000..7366e0a9d82c
--- /dev/null
+++ b/llvm/include/llvm/MC/MCSPIRVStreamer.h
@@ -0,0 +1,50 @@
+//===- MCSPIRVStreamer.h - MCStreamer SPIR-V Object File Interface -*- C++ ===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Overrides MCObjectStreamer to disable all unnecessary features with stubs.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCSPIRVSTREAMER_H
+#define LLVM_MC_MCSPIRVSTREAMER_H
+
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCObjectWriter.h"
+
+namespace llvm {
+class MCAssembler;
+class MCExpr;
+class MCInst;
+class raw_ostream;
+
+class MCSPIRVStreamer : public MCObjectStreamer {
+public:
+ MCSPIRVStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> TAB,
+ std::unique_ptr<MCObjectWriter> OW,
+ std::unique_ptr<MCCodeEmitter> Emitter)
+ : MCObjectStreamer(Context, std::move(TAB), std::move(OW),
+ std::move(Emitter)) {}
+
+ bool emitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override {
+ return false;
+ }
+ void emitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) override {}
+ void emitZerofill(MCSection *Section, MCSymbol *Symbol = nullptr,
+ uint64_t Size = 0, unsigned ByteAlignment = 0,
+ SMLoc Loc = SMLoc()) override {}
+
+private:
+ void emitInstToData(const MCInst &Inst, const MCSubtargetInfo &) override;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/MC/MCSection.h b/llvm/include/llvm/MC/MCSection.h
index 4335092f0920..2f7e17123c19 100644
--- a/llvm/include/llvm/MC/MCSection.h
+++ b/llvm/include/llvm/MC/MCSection.h
@@ -46,7 +46,9 @@ public:
SV_GOFF,
SV_MachO,
SV_Wasm,
- SV_XCOFF
+ SV_XCOFF,
+ SV_SPIRV,
+ SV_DXContainer,
};
/// Express the state of bundle locked groups while emitting code.
@@ -184,13 +186,13 @@ public:
void dump() const;
- virtual void PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
+ virtual void printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
raw_ostream &OS,
const MCExpr *Subsection) const = 0;
/// Return true if a .align directive should use "optimized nops" to fill
/// instead of 0s.
- virtual bool UseCodeAlign() const = 0;
+ virtual bool useCodeAlign() const = 0;
/// Check whether this section is "virtual", that is has no actual object
/// file contents.
diff --git a/llvm/include/llvm/MC/MCSectionCOFF.h b/llvm/include/llvm/MC/MCSectionCOFF.h
index 3ece6eb904bc..373863e21ff0 100644
--- a/llvm/include/llvm/MC/MCSectionCOFF.h
+++ b/llvm/include/llvm/MC/MCSectionCOFF.h
@@ -61,7 +61,7 @@ private:
public:
/// Decides whether a '.section' directive should be printed before the
/// section name
- bool ShouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const;
+ bool shouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const;
unsigned getCharacteristics() const { return Characteristics; }
MCSymbol *getCOMDATSymbol() const { return COMDATSymbol; }
@@ -69,10 +69,10 @@ public:
void setSelection(int Selection) const;
- void PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
+ void printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
raw_ostream &OS,
const MCExpr *Subsection) const override;
- bool UseCodeAlign() const override;
+ bool useCodeAlign() const override;
bool isVirtualSection() const override;
StringRef getVirtualSectionKind() const override;
diff --git a/llvm/include/llvm/MC/MCSectionDXContainer.h b/llvm/include/llvm/MC/MCSectionDXContainer.h
new file mode 100644
index 000000000000..014684a93529
--- /dev/null
+++ b/llvm/include/llvm/MC/MCSectionDXContainer.h
@@ -0,0 +1,38 @@
+//===- MCSectionDXContainer.h - DXContainer MC Sections ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MCSectionDXContainer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCSECTIONDXCONTAINER_H
+#define LLVM_MC_MCSECTIONDXCONTAINER_H
+
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/SectionKind.h"
+
+namespace llvm {
+
+class MCSymbol;
+
+class MCSectionDXContainer final : public MCSection {
+ friend class MCContext;
+
+ MCSectionDXContainer(StringRef Name, SectionKind K, MCSymbol *Begin)
+ : MCSection(SV_DXContainer, Name, K, Begin) {}
+
+public:
+ void printSwitchToSection(const MCAsmInfo &, const Triple &, raw_ostream &,
+ const MCExpr *) const override;
+ bool useCodeAlign() const override { return false; }
+ bool isVirtualSection() const override { return false; }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_MC_MCSECTIONDXCONTAINER_H
diff --git a/llvm/include/llvm/MC/MCSectionELF.h b/llvm/include/llvm/MC/MCSectionELF.h
index 8b17df25a158..3b5239394493 100644
--- a/llvm/include/llvm/MC/MCSectionELF.h
+++ b/llvm/include/llvm/MC/MCSectionELF.h
@@ -21,8 +21,6 @@
namespace llvm {
-class MCSymbol;
-
/// This represents a section on linux, lots of unix variants and some bare
/// metal systems.
class MCSectionELF final : public MCSection {
@@ -69,7 +67,7 @@ private:
public:
/// Decides whether a '.section' directive should be printed before the
/// section name
- bool ShouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const;
+ bool shouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const;
unsigned getType() const { return Type; }
unsigned getFlags() const { return Flags; }
@@ -78,10 +76,10 @@ public:
const MCSymbolELF *getGroup() const { return Group.getPointer(); }
bool isComdat() const { return Group.getInt(); }
- void PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
+ void printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
raw_ostream &OS,
const MCExpr *Subsection) const override;
- bool UseCodeAlign() const override;
+ bool useCodeAlign() const override;
bool isVirtualSection() const override;
StringRef getVirtualSectionKind() const override;
diff --git a/llvm/include/llvm/MC/MCSectionGOFF.h b/llvm/include/llvm/MC/MCSectionGOFF.h
index 4ba7f79f9696..d866329461ce 100644
--- a/llvm/include/llvm/MC/MCSectionGOFF.h
+++ b/llvm/include/llvm/MC/MCSectionGOFF.h
@@ -15,6 +15,7 @@
#ifndef LLVM_MC_MCSECTIONGOFF_H
#define LLVM_MC_MCSECTIONGOFF_H
+#include "llvm/BinaryFormat/GOFF.h"
#include "llvm/MC/MCSection.h"
#include "llvm/Support/raw_ostream.h"
@@ -24,21 +25,27 @@ class MCExpr;
class MCSectionGOFF final : public MCSection {
private:
+ MCSection *Parent;
+ const MCExpr *SubsectionId;
+
friend class MCContext;
- MCSectionGOFF(StringRef Name, SectionKind K)
- : MCSection(SV_GOFF, Name, K, nullptr) {}
+ MCSectionGOFF(StringRef Name, SectionKind K, MCSection *P, const MCExpr *Sub)
+ : MCSection(SV_GOFF, Name, K, nullptr), Parent(P), SubsectionId(Sub) {}
public:
- void PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
+ void printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
raw_ostream &OS,
const MCExpr *Subsection) const override {
OS << "\t.section\t\"" << getName() << "\"\n";
}
- bool UseCodeAlign() const override { return false; }
+ bool useCodeAlign() const override { return false; }
bool isVirtualSection() const override { return false; }
+ MCSection *getParent() const { return Parent; }
+ const MCExpr *getSubsectionId() const { return SubsectionId; }
+
static bool classof(const MCSection *S) { return S->getVariant() == SV_GOFF; }
};
} // end namespace llvm
diff --git a/llvm/include/llvm/MC/MCSectionMachO.h b/llvm/include/llvm/MC/MCSectionMachO.h
index bf8940524e5a..fdf1773d4002 100644
--- a/llvm/include/llvm/MC/MCSectionMachO.h
+++ b/llvm/include/llvm/MC/MCSectionMachO.h
@@ -68,10 +68,10 @@ public:
bool &TAAParsed, // Out.
unsigned &StubSize); // Out.
- void PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
+ void printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
raw_ostream &OS,
const MCExpr *Subsection) const override;
- bool UseCodeAlign() const override;
+ bool useCodeAlign() const override;
bool isVirtualSection() const override;
static bool classof(const MCSection *S) {
diff --git a/llvm/include/llvm/MC/MCSectionSPIRV.h b/llvm/include/llvm/MC/MCSectionSPIRV.h
new file mode 100644
index 000000000000..6534599d2091
--- /dev/null
+++ b/llvm/include/llvm/MC/MCSectionSPIRV.h
@@ -0,0 +1,41 @@
+//===- MCSectionSPIRV.h - SPIR-V Machine Code Sections ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MCSectionSPIRV class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCSECTIONSPIRV_H
+#define LLVM_MC_MCSECTIONSPIRV_H
+
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/SectionKind.h"
+
+namespace llvm {
+
+class MCSymbol;
+
+class MCSectionSPIRV final : public MCSection {
+ friend class MCContext;
+
+ MCSectionSPIRV(SectionKind K, MCSymbol *Begin)
+ : MCSection(SV_SPIRV, "", K, Begin) {}
+ // TODO: Add StringRef Name to MCSectionSPIRV.
+
+public:
+ ~MCSectionSPIRV() = default;
+ void printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
+ raw_ostream &OS,
+ const MCExpr *Subsection) const override {}
+ bool useCodeAlign() const override { return false; }
+ bool isVirtualSection() const override { return false; }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_MC_MCSECTIONSPIRV_H
diff --git a/llvm/include/llvm/MC/MCSectionWasm.h b/llvm/include/llvm/MC/MCSectionWasm.h
index f34dd6b3507c..579f92a75056 100644
--- a/llvm/include/llvm/MC/MCSectionWasm.h
+++ b/llvm/include/llvm/MC/MCSectionWasm.h
@@ -58,10 +58,10 @@ public:
const MCSymbolWasm *getGroup() const { return Group; }
unsigned getSegmentFlags() const { return SegmentFlags; }
- void PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
+ void printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
raw_ostream &OS,
const MCExpr *Subsection) const override;
- bool UseCodeAlign() const override;
+ bool useCodeAlign() const override;
bool isVirtualSection() const override;
bool isWasmData() const {
diff --git a/llvm/include/llvm/MC/MCSectionXCOFF.h b/llvm/include/llvm/MC/MCSectionXCOFF.h
index 1dafdd3ac500..95332647c9be 100644
--- a/llvm/include/llvm/MC/MCSectionXCOFF.h
+++ b/llvm/include/llvm/MC/MCSectionXCOFF.h
@@ -38,6 +38,7 @@ class MCSectionXCOFF final : public MCSection {
Optional<XCOFF::DwarfSectionSubtypeFlags> DwarfSubtypeFlags;
bool MultiSymbolsAllowed;
static constexpr unsigned DefaultAlignVal = 4;
+ static constexpr unsigned DefaultTextAlignVal = 32;
MCSectionXCOFF(StringRef Name, XCOFF::StorageMappingClass SMC,
XCOFF::SymbolType ST, SectionKind K, MCSymbolXCOFF *QualName,
@@ -57,9 +58,14 @@ class MCSectionXCOFF final : public MCSection {
QualName->setRepresentedCsect(this);
QualName->setStorageClass(XCOFF::C_HIDEXT);
- // A csect is 4 byte aligned by default, except for undefined symbol csects.
- if (ST != XCOFF::XTY_ER)
- setAlignment(Align(DefaultAlignVal));
+ if (ST != XCOFF::XTY_ER) {
+ // For a csect for program code, set the alignment to 32 bytes by default.
+ // For other csects, set the alignment to 4 bytes by default.
+ if (SMC == XCOFF::XMC_PR)
+ setAlignment(Align(DefaultTextAlignVal));
+ else
+ setAlignment(Align(DefaultAlignVal));
+ }
}
MCSectionXCOFF(StringRef Name, SectionKind K, MCSymbolXCOFF *QualName,
@@ -74,9 +80,8 @@ class MCSectionXCOFF final : public MCSection {
// FIXME: use a more meaningful name for non csect sections.
QualName->setRepresentedCsect(this);
- // Set default alignment 4 for all non csect sections for now.
- // FIXME: set different alignments according to section types.
- setAlignment(Align(DefaultAlignVal));
+ // Use default text alignment as the alignment for DWARF sections.
+ setAlignment(Align(DefaultTextAlignVal));
}
void printCsectDirective(raw_ostream &OS) const;
@@ -95,24 +100,28 @@ public:
XCOFF::StorageClass getStorageClass() const {
return QualName->getStorageClass();
}
+ XCOFF::VisibilityType getVisibilityType() const {
+ return QualName->getVisibilityType();
+ }
XCOFF::SymbolType getCSectType() const {
assert(isCsect() && "Only csect section has symbol type property!");
return CsectProp->Type;
}
MCSymbolXCOFF *getQualNameSymbol() const { return QualName; }
- void PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
+ void printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
raw_ostream &OS,
const MCExpr *Subsection) const override;
- bool UseCodeAlign() const override;
+ bool useCodeAlign() const override;
bool isVirtualSection() const override;
StringRef getSymbolTableName() const { return SymbolTableName; }
bool isMultiSymbolsAllowed() const { return MultiSymbolsAllowed; }
- bool isCsect() const { return CsectProp.hasValue(); }
- bool isDwarfSect() const { return DwarfSubtypeFlags.hasValue(); }
+ bool isCsect() const { return CsectProp.has_value(); }
+ bool isDwarfSect() const { return DwarfSubtypeFlags.has_value(); }
Optional<XCOFF::DwarfSectionSubtypeFlags> getDwarfSubtypeFlags() const {
return DwarfSubtypeFlags;
}
+ Optional<XCOFF::CsectProperties> getCsectProp() const { return CsectProp; }
};
} // end namespace llvm
diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h
index 3d6c512bfe73..e71014b8cccf 100644
--- a/llvm/include/llvm/MC/MCStreamer.h
+++ b/llvm/include/llvm/MC/MCStreamer.h
@@ -13,22 +13,20 @@
#ifndef LLVM_MC_MCSTREAMER_H
#define LLVM_MC_MCSTREAMER_H
-#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCDirectives.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCLinkerOptimizationHint.h"
#include "llvm/MC/MCPseudoProbe.h"
-#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCWinEH.h"
+#include "llvm/Support/ARMTargetParser.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/MD5.h"
#include "llvm/Support/SMLoc.h"
-#include "llvm/Support/ARMTargetParser.h"
-#include "llvm/Support/TargetParser.h"
#include "llvm/Support/VersionTuple.h"
#include <cassert>
#include <cstdint>
@@ -39,20 +37,24 @@
namespace llvm {
+class APInt;
class AssemblerConstantPools;
class MCAsmBackend;
+class MCAssembler;
class MCContext;
-struct MCDwarfFrameInfo;
class MCExpr;
+class MCFragment;
class MCInst;
class MCInstPrinter;
class MCRegister;
class MCSection;
class MCStreamer;
-class MCSymbolRefExpr;
class MCSubtargetInfo;
-class raw_ostream;
+class MCSymbol;
+class MCSymbolRefExpr;
+class Triple;
class Twine;
+class raw_ostream;
namespace codeview {
struct DefRangeRegisterRelHeader;
@@ -111,7 +113,7 @@ public:
/// Update streamer for a new active section.
///
- /// This is called by PopSection and SwitchSection, if the current
+ /// This is called by popSection and switchSection, if the current
/// section changes.
virtual void changeSection(const MCSection *CurSection, MCSection *Section,
const MCExpr *SubSection, raw_ostream &OS);
@@ -163,12 +165,23 @@ public:
virtual void finishAttributeSection();
virtual void emitInst(uint32_t Inst, char Suffix = '\0');
- virtual void AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE);
+ virtual void annotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE);
virtual void emitThumbSet(MCSymbol *Symbol, const MCExpr *Value);
void emitConstantPools() override;
+ virtual void emitARMWinCFIAllocStack(unsigned Size, bool Wide);
+ virtual void emitARMWinCFISaveRegMask(unsigned Mask, bool Wide);
+ virtual void emitARMWinCFISaveSP(unsigned Reg);
+ virtual void emitARMWinCFISaveFRegs(unsigned First, unsigned Last);
+ virtual void emitARMWinCFISaveLR(unsigned Offset);
+ virtual void emitARMWinCFIPrologEnd(bool Fragment);
+ virtual void emitARMWinCFINop(bool Wide);
+ virtual void emitARMWinCFIEpilogStart(unsigned Condition);
+ virtual void emitARMWinCFIEpilogEnd();
+ virtual void emitARMWinCFICustom(unsigned Opcode);
+
/// Reset any state between object emissions, i.e. the equivalent of
/// MCStreamer's reset method.
virtual void reset();
@@ -215,7 +228,7 @@ class MCStreamer {
DenseMap<const MCSymbol *, unsigned> SymbolOrdering;
/// This is stack of current and previous section values saved by
- /// PushSection.
+ /// pushSection.
SmallVector<std::pair<MCSectionSubPair, MCSectionSubPair>, 4> SectionStack;
/// Pointer to the parser's SMLoc if available. This is used to provide
@@ -247,9 +260,9 @@ protected:
return CurrentWinFrameInfo;
}
- virtual void EmitWindowsUnwindTables(WinEH::FrameInfo *Frame);
+ virtual void emitWindowsUnwindTables(WinEH::FrameInfo *Frame);
- virtual void EmitWindowsUnwindTables();
+ virtual void emitWindowsUnwindTables();
virtual void emitRawTextImpl(StringRef String);
@@ -344,7 +357,7 @@ public:
/// Return a raw_ostream that comments can be written to. Unlike
/// AddComment, you are required to terminate comments with \n if you use this
/// method.
- virtual raw_ostream &GetCommentOS();
+ virtual raw_ostream &getCommentOS();
/// Print T and prefix it with the comment string (normally #) and
/// optionally a tab. This prints the comment immediately, not at the end of
@@ -359,8 +372,8 @@ public:
/// Emit added explicit comments.
virtual void emitExplicitComments();
- /// AddBlankLine - Emit a blank line to a .s file to pretty it up.
- virtual void AddBlankLine() {}
+ /// Emit a blank line to a .s file to pretty it up.
+ virtual void addBlankLine() {}
/// @}
@@ -384,18 +397,18 @@ public:
/// Returns an index to represent the order a symbol was emitted in.
/// (zero if we did not emit that symbol)
- unsigned GetSymbolOrder(const MCSymbol *Sym) const {
+ unsigned getSymbolOrder(const MCSymbol *Sym) const {
return SymbolOrdering.lookup(Sym);
}
/// Update streamer for a new active section.
///
- /// This is called by PopSection and SwitchSection, if the current
+ /// This is called by popSection and switchSection, if the current
/// section changes.
virtual void changeSection(MCSection *, const MCExpr *);
/// Save the current and previous section on the section stack.
- void PushSection() {
+ void pushSection() {
SectionStack.push_back(
std::make_pair(getCurrentSection(), getPreviousSection()));
}
@@ -404,7 +417,7 @@ public:
/// Calls changeSection as needed.
///
/// Returns false if the stack was empty.
- bool PopSection() {
+ bool popSection() {
if (SectionStack.size() <= 1)
return false;
auto I = SectionStack.end();
@@ -419,11 +432,11 @@ public:
return true;
}
- bool SubSection(const MCExpr *Subsection) {
+ bool subSection(const MCExpr *Subsection) {
if (SectionStack.empty())
return false;
- SwitchSection(SectionStack.back().first.first, Subsection);
+ switchSection(SectionStack.back().first.first, Subsection);
return true;
}
@@ -431,13 +444,13 @@ public:
/// is required to update CurSection.
///
/// This corresponds to assembler directives like .section, .text, etc.
- virtual void SwitchSection(MCSection *Section,
+ virtual void switchSection(MCSection *Section,
const MCExpr *Subsection = nullptr);
/// Set the current section where code is being emitted to \p Section.
/// This is required to update CurSection. This version does not call
/// changeSection.
- void SwitchSectionNoChange(MCSection *Section,
+ void switchSectionNoChange(MCSection *Section,
const MCExpr *Subsection = nullptr) {
assert(Section && "Cannot switch to a null section!");
MCSectionSubPair curSection = SectionStack.back().first;
@@ -455,7 +468,7 @@ public:
///
/// Each emitted symbol will be tracked in the ordering table,
/// so we can sort on them later.
- void AssignFragment(MCSymbol *Symbol, MCFragment *Fragment);
+ void assignFragment(MCSymbol *Symbol, MCFragment *Fragment);
/// Returns the mnemonic for \p MI, if the streamer has access to a
/// instruction printer and returns an empty string otherwise.
@@ -550,40 +563,40 @@ public:
/// Start emitting COFF symbol definition
///
/// \param Symbol - The symbol to have its External & Type fields set.
- virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol);
+ virtual void beginCOFFSymbolDef(const MCSymbol *Symbol);
/// Emit the storage class of the symbol.
///
/// \param StorageClass - The storage class the symbol should have.
- virtual void EmitCOFFSymbolStorageClass(int StorageClass);
+ virtual void emitCOFFSymbolStorageClass(int StorageClass);
/// Emit the type of the symbol.
///
/// \param Type - A COFF type identifier (see COFF::SymbolType in X86COFF.h)
- virtual void EmitCOFFSymbolType(int Type);
+ virtual void emitCOFFSymbolType(int Type);
/// Marks the end of the symbol definition.
- virtual void EndCOFFSymbolDef();
+ virtual void endCOFFSymbolDef();
- virtual void EmitCOFFSafeSEH(MCSymbol const *Symbol);
+ virtual void emitCOFFSafeSEH(MCSymbol const *Symbol);
/// Emits the symbol table index of a Symbol into the current section.
- virtual void EmitCOFFSymbolIndex(MCSymbol const *Symbol);
+ virtual void emitCOFFSymbolIndex(MCSymbol const *Symbol);
/// Emits a COFF section index.
///
/// \param Symbol - Symbol the section number relocation should point to.
- virtual void EmitCOFFSectionIndex(MCSymbol const *Symbol);
+ virtual void emitCOFFSectionIndex(MCSymbol const *Symbol);
/// Emits a COFF section relative relocation.
///
/// \param Symbol - Symbol the section relative relocation should point to.
- virtual void EmitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset);
+ virtual void emitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset);
/// Emits a COFF image relative relocation.
///
/// \param Symbol - Symbol the image relative relocation should point to.
- virtual void EmitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset);
+ virtual void emitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset);
/// Emits an lcomm directive with XCOFF csect information.
///
@@ -615,6 +628,12 @@ public:
/// changed at the end of assembly.
virtual void emitXCOFFRenameDirective(const MCSymbol *Name, StringRef Rename);
+ /// Emit a XCOFF .ref directive which creates R_REF type entry in the
+ /// relocation table for one or more symbols.
+ ///
+ /// \param Sym - The symbol on the .ref directive.
+ virtual void emitXCOFFRefDirective(StringRef Sym);
+
/// Emit an ELF .size directive.
///
/// This corresponds to an assembler statement such as:
@@ -907,6 +926,7 @@ public:
unsigned CUID = 0);
virtual void emitCFIBKeyFrame();
+ virtual void emitCFIMTETaggedFrame();
/// This implements the DWARF2 '.loc fileno lineno ...' assembler
/// directive.
@@ -918,16 +938,16 @@ public:
/// Associate a filename with a specified logical file number, and also
/// specify that file's checksum information. This implements the '.cv_file 4
/// "foo.c"' assembler directive. Returns true on success.
- virtual bool EmitCVFileDirective(unsigned FileNo, StringRef Filename,
+ virtual bool emitCVFileDirective(unsigned FileNo, StringRef Filename,
ArrayRef<uint8_t> Checksum,
unsigned ChecksumKind);
/// Introduces a function id for use with .cv_loc.
- virtual bool EmitCVFuncIdDirective(unsigned FunctionId);
+ virtual bool emitCVFuncIdDirective(unsigned FunctionId);
/// Introduces an inline call site id for use with .cv_loc. Includes
/// extra information for inline line table generation.
- virtual bool EmitCVInlineSiteIdDirective(unsigned FunctionId, unsigned IAFunc,
+ virtual bool emitCVInlineSiteIdDirective(unsigned FunctionId, unsigned IAFunc,
unsigned IAFile, unsigned IALine,
unsigned IACol, SMLoc Loc);
@@ -983,7 +1003,7 @@ public:
virtual void emitCVFileChecksumOffsetDirective(unsigned FileNo) {}
/// This implements the CodeView '.cv_fpo_data' assembler directive.
- virtual void EmitCVFPOData(const MCSymbol *ProcSym, SMLoc Loc = {}) {}
+ virtual void emitCVFPOData(const MCSymbol *ProcSym, SMLoc Loc = {}) {}
/// Emit the absolute difference between two symbols.
///
@@ -1022,28 +1042,28 @@ public:
virtual void emitCFIWindowSave();
virtual void emitCFINegateRAState();
- virtual void EmitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc = SMLoc());
- virtual void EmitWinCFIEndProc(SMLoc Loc = SMLoc());
+ virtual void emitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc = SMLoc());
+ virtual void emitWinCFIEndProc(SMLoc Loc = SMLoc());
/// This is used on platforms, such as Windows on ARM64, that require function
/// or funclet sizes to be emitted in .xdata before the End marker is emitted
/// for the frame. We cannot use the End marker, as it is not set at the
/// point of emitting .xdata, in order to indicate that the frame is active.
- virtual void EmitWinCFIFuncletOrFuncEnd(SMLoc Loc = SMLoc());
- virtual void EmitWinCFIStartChained(SMLoc Loc = SMLoc());
- virtual void EmitWinCFIEndChained(SMLoc Loc = SMLoc());
- virtual void EmitWinCFIPushReg(MCRegister Register, SMLoc Loc = SMLoc());
- virtual void EmitWinCFISetFrame(MCRegister Register, unsigned Offset,
+ virtual void emitWinCFIFuncletOrFuncEnd(SMLoc Loc = SMLoc());
+ virtual void emitWinCFIStartChained(SMLoc Loc = SMLoc());
+ virtual void emitWinCFIEndChained(SMLoc Loc = SMLoc());
+ virtual void emitWinCFIPushReg(MCRegister Register, SMLoc Loc = SMLoc());
+ virtual void emitWinCFISetFrame(MCRegister Register, unsigned Offset,
SMLoc Loc = SMLoc());
- virtual void EmitWinCFIAllocStack(unsigned Size, SMLoc Loc = SMLoc());
- virtual void EmitWinCFISaveReg(MCRegister Register, unsigned Offset,
+ virtual void emitWinCFIAllocStack(unsigned Size, SMLoc Loc = SMLoc());
+ virtual void emitWinCFISaveReg(MCRegister Register, unsigned Offset,
SMLoc Loc = SMLoc());
- virtual void EmitWinCFISaveXMM(MCRegister Register, unsigned Offset,
+ virtual void emitWinCFISaveXMM(MCRegister Register, unsigned Offset,
SMLoc Loc = SMLoc());
- virtual void EmitWinCFIPushFrame(bool Code, SMLoc Loc = SMLoc());
- virtual void EmitWinCFIEndProlog(SMLoc Loc = SMLoc());
- virtual void EmitWinEHHandler(const MCSymbol *Sym, bool Unwind, bool Except,
+ virtual void emitWinCFIPushFrame(bool Code, SMLoc Loc = SMLoc());
+ virtual void emitWinCFIEndProlog(SMLoc Loc = SMLoc());
+ virtual void emitWinEHHandler(const MCSymbol *Sym, bool Unwind, bool Except,
SMLoc Loc = SMLoc());
- virtual void EmitWinEHHandlerData(SMLoc Loc = SMLoc());
+ virtual void emitWinEHHandlerData(SMLoc Loc = SMLoc());
virtual void emitCGProfileEntry(const MCSymbolRefExpr *From,
const MCSymbolRefExpr *To, uint64_t Count);
@@ -1099,7 +1119,7 @@ public:
/// Streamer specific finalization.
virtual void finishImpl();
/// Finish emission of machine code.
- void Finish(SMLoc EndLoc = SMLoc());
+ void finish(SMLoc EndLoc = SMLoc());
virtual bool mayHaveInstructions(MCSection &Sec) const { return true; }
diff --git a/llvm/include/llvm/MC/MCSubtargetInfo.h b/llvm/include/llvm/MC/MCSubtargetInfo.h
index 839a3bd85829..e1f0a86141e3 100644
--- a/llvm/include/llvm/MC/MCSubtargetInfo.h
+++ b/llvm/include/llvm/MC/MCSubtargetInfo.h
@@ -14,12 +14,13 @@
#define LLVM_MC_MCSUBTARGETINFO_H
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/MC/MCSchedule.h"
#include "llvm/MC/SubtargetFeature.h"
-#include <algorithm>
#include <cassert>
#include <cstdint>
#include <string>
diff --git a/llvm/include/llvm/MC/MCSymbol.h b/llvm/include/llvm/MC/MCSymbol.h
index d8fc4505d446..91ef6ee31d8d 100644
--- a/llvm/include/llvm/MC/MCSymbol.h
+++ b/llvm/include/llvm/MC/MCSymbol.h
@@ -14,7 +14,7 @@
#define LLVM_MC_MCSYMBOL_H
#include "llvm/ADT/PointerIntPair.h"
-#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringMapEntry.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFragment.h"
diff --git a/llvm/include/llvm/MC/MCSymbolWasm.h b/llvm/include/llvm/MC/MCSymbolWasm.h
index 5a4852e0e895..5eab32cb5c12 100644
--- a/llvm/include/llvm/MC/MCSymbolWasm.h
+++ b/llvm/include/llvm/MC/MCSymbolWasm.h
@@ -86,9 +86,9 @@ public:
bool omitFromLinkingSection() const { return OmitFromLinkingSection; }
void setOmitFromLinkingSection() { OmitFromLinkingSection = true; }
- bool hasImportModule() const { return ImportModule.hasValue(); }
+ bool hasImportModule() const { return ImportModule.has_value(); }
StringRef getImportModule() const {
- if (ImportModule.hasValue())
+ if (ImportModule)
return ImportModule.getValue();
// Use a default module name of "env" for now, for compatibility with
// existing tools.
@@ -98,15 +98,15 @@ public:
}
void setImportModule(StringRef Name) { ImportModule = Name; }
- bool hasImportName() const { return ImportName.hasValue(); }
+ bool hasImportName() const { return ImportName.has_value(); }
StringRef getImportName() const {
- if (ImportName.hasValue())
+ if (ImportName)
return ImportName.getValue();
return getName();
}
void setImportName(StringRef Name) { ImportName = Name; }
- bool hasExportName() const { return ExportName.hasValue(); }
+ bool hasExportName() const { return ExportName.has_value(); }
StringRef getExportName() const { return ExportName.getValue(); }
void setExportName(StringRef Name) { ExportName = Name; }
@@ -129,12 +129,12 @@ public:
void setSignature(wasm::WasmSignature *Sig) { Signature = Sig; }
const wasm::WasmGlobalType &getGlobalType() const {
- assert(GlobalType.hasValue());
+ assert(GlobalType);
return GlobalType.getValue();
}
void setGlobalType(wasm::WasmGlobalType GT) { GlobalType = GT; }
- bool hasTableType() const { return TableType.hasValue(); }
+ bool hasTableType() const { return TableType.has_value(); }
const wasm::WasmTableType &getTableType() const {
assert(hasTableType());
return TableType.getValue();
diff --git a/llvm/include/llvm/MC/MCSymbolXCOFF.h b/llvm/include/llvm/MC/MCSymbolXCOFF.h
index 752e1e7bba0f..2ec265e66300 100644
--- a/llvm/include/llvm/MC/MCSymbolXCOFF.h
+++ b/llvm/include/llvm/MC/MCSymbolXCOFF.h
@@ -39,8 +39,7 @@ public:
};
XCOFF::StorageClass getStorageClass() const {
- assert(StorageClass.hasValue() &&
- "StorageClass not set on XCOFF MCSymbol.");
+ assert(StorageClass && "StorageClass not set on XCOFF MCSymbol.");
return StorageClass.getValue();
}
diff --git a/llvm/include/llvm/MC/MCTargetOptions.h b/llvm/include/llvm/MC/MCTargetOptions.h
index db50dc6749e2..9c906cdc90d0 100644
--- a/llvm/include/llvm/MC/MCTargetOptions.h
+++ b/llvm/include/llvm/MC/MCTargetOptions.h
@@ -31,6 +31,12 @@ enum class DebugCompressionType {
Z, ///< zlib style complession
};
+enum class EmitDwarfUnwindType {
+ Always, // Always emit dwarf unwind
+ NoCompactUnwind, // Only emit if compact unwind isn't available
+ Default, // Default behavior is based on the target
+};
+
class StringRef;
class MCTargetOptions {
@@ -47,7 +53,6 @@ public:
bool MCNoDeprecatedWarn : 1;
bool MCNoTypeCheck : 1;
bool MCSaveTempLabels : 1;
- bool MCUseDwarfDirectory : 1;
bool MCIncrementalLinkerCompatible : 1;
bool ShowMCEncoding : 1;
bool ShowMCInst : 1;
@@ -57,8 +62,22 @@ public:
bool PreserveAsmComments : 1;
bool Dwarf64 : 1;
+
+ EmitDwarfUnwindType EmitDwarfUnwind;
+
int DwarfVersion = 0;
+ enum DwarfDirectory {
+ // Force disable
+ DisableDwarfDirectory,
+ // Force enable, for assemblers that support
+ // `.file fileno directory filename' syntax
+ EnableDwarfDirectory,
+ // Default is based on the target
+ DefaultDwarfDirectory
+ };
+ DwarfDirectory MCUseDwarfDirectory;
+
std::string ABIName;
std::string AssemblyLanguage;
std::string SplitDwarfFile;
diff --git a/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h b/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h
index 189484198916..d51e740177f7 100644
--- a/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h
+++ b/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h
@@ -20,6 +20,7 @@
namespace llvm {
class MCTargetOptions;
+enum class EmitDwarfUnwindType;
namespace mc {
@@ -32,6 +33,8 @@ int getDwarfVersion();
bool getDwarf64();
+EmitDwarfUnwindType getEmitDwarfUnwind();
+
bool getShowMCInst();
bool getFatalWarnings();
diff --git a/llvm/include/llvm/MC/MCValue.h b/llvm/include/llvm/MC/MCValue.h
index 37feee4c9ea8..37265d72c9df 100644
--- a/llvm/include/llvm/MC/MCValue.h
+++ b/llvm/include/llvm/MC/MCValue.h
@@ -15,7 +15,6 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/Support/DataTypes.h"
-#include <cassert>
namespace llvm {
class raw_ostream;
diff --git a/llvm/include/llvm/MC/MCWin64EH.h b/llvm/include/llvm/MC/MCWin64EH.h
index 065161d1759e..622a666b78dd 100644
--- a/llvm/include/llvm/MC/MCWin64EH.h
+++ b/llvm/include/llvm/MC/MCWin64EH.h
@@ -57,13 +57,19 @@ public:
bool HandlerData) const override;
};
-class ARM64UnwindEmitter : public WinEH::UnwindEmitter {
+class ARMUnwindEmitter : public WinEH::UnwindEmitter {
public:
void Emit(MCStreamer &Streamer) const override;
void EmitUnwindInfo(MCStreamer &Streamer, WinEH::FrameInfo *FI,
bool HandlerData) const override;
};
+class ARM64UnwindEmitter : public WinEH::UnwindEmitter {
+public:
+ void Emit(MCStreamer &Streamer) const override;
+ void EmitUnwindInfo(MCStreamer &Streamer, WinEH::FrameInfo *FI,
+ bool HandlerData) const override;
+};
}
} // end namespace llvm
diff --git a/llvm/include/llvm/MC/MCWinCOFFStreamer.h b/llvm/include/llvm/MC/MCWinCOFFStreamer.h
index af1ed6faf753..0778c4d52c5e 100644
--- a/llvm/include/llvm/MC/MCWinCOFFStreamer.h
+++ b/llvm/include/llvm/MC/MCWinCOFFStreamer.h
@@ -45,15 +45,15 @@ public:
void emitThumbFunc(MCSymbol *Func) override;
bool emitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override;
void emitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) override;
- void BeginCOFFSymbolDef(MCSymbol const *Symbol) override;
- void EmitCOFFSymbolStorageClass(int StorageClass) override;
- void EmitCOFFSymbolType(int Type) override;
- void EndCOFFSymbolDef() override;
- void EmitCOFFSafeSEH(MCSymbol const *Symbol) override;
- void EmitCOFFSymbolIndex(MCSymbol const *Symbol) override;
- void EmitCOFFSectionIndex(MCSymbol const *Symbol) override;
- void EmitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) override;
- void EmitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset) override;
+ void beginCOFFSymbolDef(MCSymbol const *Symbol) override;
+ void emitCOFFSymbolStorageClass(int StorageClass) override;
+ void emitCOFFSymbolType(int Type) override;
+ void endCOFFSymbolDef() override;
+ void emitCOFFSafeSEH(MCSymbol const *Symbol) override;
+ void emitCOFFSymbolIndex(MCSymbol const *Symbol) override;
+ void emitCOFFSectionIndex(MCSymbol const *Symbol) override;
+ void emitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) override;
+ void emitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset) override;
void emitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment) override;
void emitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
@@ -64,7 +64,7 @@ public:
void emitTBSSSymbol(MCSection *Section, MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment) override;
void emitIdent(StringRef IdentString) override;
- void EmitWinEHHandlerData(SMLoc Loc) override;
+ void emitWinEHHandlerData(SMLoc Loc) override;
void emitCGProfileEntry(const MCSymbolRefExpr *From,
const MCSymbolRefExpr *To, uint64_t Count) override;
void finishImpl() override;
diff --git a/llvm/include/llvm/MC/MCWinEH.h b/llvm/include/llvm/MC/MCWinEH.h
index 5688255810d0..c16396ea5e71 100644
--- a/llvm/include/llvm/MC/MCWinEH.h
+++ b/llvm/include/llvm/MC/MCWinEH.h
@@ -50,11 +50,17 @@ struct FrameInfo {
bool HandlesUnwind = false;
bool HandlesExceptions = false;
bool EmitAttempted = false;
+ bool Fragment = false;
int LastFrameInst = -1;
const FrameInfo *ChainedParent = nullptr;
std::vector<Instruction> Instructions;
- MapVector<MCSymbol*, std::vector<Instruction>> EpilogMap;
+ struct Epilog {
+ std::vector<Instruction> Instructions;
+ unsigned Condition;
+ MCSymbol *End;
+ };
+ MapVector<MCSymbol *, Epilog> EpilogMap;
FrameInfo() = default;
FrameInfo(const MCSymbol *Function, const MCSymbol *BeginFuncEHLabel)
@@ -68,7 +74,7 @@ struct FrameInfo {
if (!Instructions.empty())
return false;
for (const auto &E : EpilogMap)
- if (!E.second.empty())
+ if (!E.second.Instructions.empty())
return false;
return true;
}
diff --git a/llvm/include/llvm/MC/MCXCOFFStreamer.h b/llvm/include/llvm/MC/MCXCOFFStreamer.h
index 5fc2efbe5284..3faa03fa69e9 100644
--- a/llvm/include/llvm/MC/MCXCOFFStreamer.h
+++ b/llvm/include/llvm/MC/MCXCOFFStreamer.h
@@ -32,6 +32,10 @@ public:
void emitXCOFFSymbolLinkageWithVisibility(MCSymbol *Symbol,
MCSymbolAttr Linkage,
MCSymbolAttr Visibility) override;
+ void emitXCOFFRefDirective(StringRef Name) override {
+ report_fatal_error("emitXCOFFRefDirective is not implemented yet on object"
+ "generation path");
+ }
void emitXCOFFRenameDirective(const MCSymbol *Name,
StringRef Rename) override {
report_fatal_error("emitXCOFFRenameDirective is not implemented yet on "
diff --git a/llvm/include/llvm/MC/SectionKind.h b/llvm/include/llvm/MC/SectionKind.h
index 0fd86cc457de..61e400fe9ede 100644
--- a/llvm/include/llvm/MC/SectionKind.h
+++ b/llvm/include/llvm/MC/SectionKind.h
@@ -24,6 +24,10 @@ class SectionKind {
/// Metadata - Debug info sections or other metadata.
Metadata,
+ /// Exclude - This section will be excluded from the final executable or
+ /// shared library. Only valid for ELF / COFF targets.
+ Exclude,
+
/// Text - Text section, used for functions and other executable code.
Text,
@@ -118,6 +122,8 @@ public:
bool isMetadata() const { return K == Metadata; }
+ bool isExclude() const { return K == Exclude; }
+
bool isText() const { return K == Text || K == ExecuteOnly; }
bool isExecuteOnly() const { return K == ExecuteOnly; }
@@ -180,6 +186,7 @@ private:
public:
static SectionKind getMetadata() { return get(Metadata); }
+ static SectionKind getExclude() { return get(Exclude); }
static SectionKind getText() { return get(Text); }
static SectionKind getExecuteOnly() { return get(ExecuteOnly); }
static SectionKind getReadOnly() { return get(ReadOnly); }
diff --git a/llvm/include/llvm/MC/StringTableBuilder.h b/llvm/include/llvm/MC/StringTableBuilder.h
index 3f9c91be05d3..42133f3f7726 100644
--- a/llvm/include/llvm/MC/StringTableBuilder.h
+++ b/llvm/include/llvm/MC/StringTableBuilder.h
@@ -85,7 +85,6 @@ public:
void write(raw_ostream &OS) const;
void write(uint8_t *Buf) const;
-private:
bool isFinalized() const { return Finalized; }
};
diff --git a/llvm/include/llvm/MC/SubtargetFeature.h b/llvm/include/llvm/MC/SubtargetFeature.h
index 032e2a7df1f2..799912d4bacb 100644
--- a/llvm/include/llvm/MC/SubtargetFeature.h
+++ b/llvm/include/llvm/MC/SubtargetFeature.h
@@ -17,11 +17,10 @@
#ifndef LLVM_MC_SUBTARGETFEATURE_H
#define LLVM_MC_SUBTARGETFEATURE_H
-#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Support/MathExtras.h"
#include <array>
-#include <bitset>
#include <initializer_list>
#include <string>
#include <vector>
diff --git a/llvm/include/llvm/MC/TargetRegistry.h b/llvm/include/llvm/MC/TargetRegistry.h
index da9a9269edbf..eeac559f81b1 100644
--- a/llvm/include/llvm/MC/TargetRegistry.h
+++ b/llvm/include/llvm/MC/TargetRegistry.h
@@ -27,7 +27,6 @@
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
-#include <algorithm>
#include <cassert>
#include <cstddef>
#include <iterator>
@@ -56,13 +55,12 @@ class MCTargetAsmParser;
class MCTargetOptions;
class MCTargetStreamer;
class raw_ostream;
-class raw_pwrite_stream;
class TargetMachine;
class TargetOptions;
namespace mca {
class CustomBehaviour;
class InstrPostProcess;
-class SourceMgr;
+struct SourceMgr;
} // namespace mca
MCStreamer *createNullStreamer(MCContext &Ctx);
@@ -111,6 +109,16 @@ MCStreamer *createXCOFFStreamer(MCContext &Ctx,
std::unique_ptr<MCObjectWriter> &&OW,
std::unique_ptr<MCCodeEmitter> &&CE,
bool RelaxAll);
+MCStreamer *createSPIRVStreamer(MCContext &Ctx,
+ std::unique_ptr<MCAsmBackend> &&TAB,
+ std::unique_ptr<MCObjectWriter> &&OW,
+ std::unique_ptr<MCCodeEmitter> &&CE,
+ bool RelaxAll);
+MCStreamer *createDXContainerStreamer(MCContext &Ctx,
+ std::unique_ptr<MCAsmBackend> &&TAB,
+ std::unique_ptr<MCObjectWriter> &&OW,
+ std::unique_ptr<MCCodeEmitter> &&CE,
+ bool RelaxAll);
MCRelocationInfo *createMCRelocationInfo(const Triple &TT, MCContext &Ctx);
@@ -177,7 +185,6 @@ public:
const MCInstrInfo &MII,
const MCRegisterInfo &MRI);
using MCCodeEmitterCtorTy = MCCodeEmitter *(*)(const MCInstrInfo &II,
- const MCRegisterInfo &MRI,
MCContext &Ctx);
using ELFStreamerCtorTy =
MCStreamer *(*)(const Triple &T, MCContext &Ctx,
@@ -204,6 +211,17 @@ public:
std::unique_ptr<MCAsmBackend> &&TAB,
std::unique_ptr<MCObjectWriter> &&OW,
std::unique_ptr<MCCodeEmitter> &&Emitter, bool RelaxAll);
+ using SPIRVStreamerCtorTy =
+ MCStreamer *(*)(const Triple &T, MCContext &Ctx,
+ std::unique_ptr<MCAsmBackend> &&TAB,
+ std::unique_ptr<MCObjectWriter> &&OW,
+ std::unique_ptr<MCCodeEmitter> &&Emitter, bool RelaxAll);
+
+ using DXContainerStreamerCtorTy =
+ MCStreamer *(*)(const Triple &T, MCContext &Ctx,
+ std::unique_ptr<MCAsmBackend> &&TAB,
+ std::unique_ptr<MCObjectWriter> &&OW,
+ std::unique_ptr<MCCodeEmitter> &&Emitter, bool RelaxAll);
using NullTargetStreamerCtorTy = MCTargetStreamer *(*)(MCStreamer &S);
using AsmTargetStreamerCtorTy = MCTargetStreamer *(*)(
@@ -305,6 +323,8 @@ private:
ELFStreamerCtorTy ELFStreamerCtorFn = nullptr;
WasmStreamerCtorTy WasmStreamerCtorFn = nullptr;
XCOFFStreamerCtorTy XCOFFStreamerCtorFn = nullptr;
+ SPIRVStreamerCtorTy SPIRVStreamerCtorFn = nullptr;
+ DXContainerStreamerCtorTy DXContainerStreamerCtorFn = nullptr;
/// Construction function for this target's null TargetStreamer, if
/// registered (default = nullptr).
@@ -508,11 +528,10 @@ public:
/// createMCCodeEmitter - Create a target specific code emitter.
MCCodeEmitter *createMCCodeEmitter(const MCInstrInfo &II,
- const MCRegisterInfo &MRI,
MCContext &Ctx) const {
if (!MCCodeEmitterCtorFn)
return nullptr;
- return MCCodeEmitterCtorFn(II, MRI, Ctx);
+ return MCCodeEmitterCtorFn(II, Ctx);
}
/// Create a target specific MCStreamer.
@@ -576,6 +595,22 @@ public:
S = createXCOFFStreamer(Ctx, std::move(TAB), std::move(OW),
std::move(Emitter), RelaxAll);
break;
+ case Triple::SPIRV:
+ if (SPIRVStreamerCtorFn)
+ S = SPIRVStreamerCtorFn(T, Ctx, std::move(TAB), std::move(OW),
+ std::move(Emitter), RelaxAll);
+ else
+ S = createSPIRVStreamer(Ctx, std::move(TAB), std::move(OW),
+ std::move(Emitter), RelaxAll);
+ break;
+ case Triple::DXContainer:
+ if (DXContainerStreamerCtorFn)
+ S = DXContainerStreamerCtorFn(T, Ctx, std::move(TAB), std::move(OW),
+ std::move(Emitter), RelaxAll);
+ else
+ S = createDXContainerStreamer(Ctx, std::move(TAB), std::move(OW),
+ std::move(Emitter), RelaxAll);
+ break;
}
if (ObjectTargetStreamerCtorFn)
ObjectTargetStreamerCtorFn(*S, STI);
@@ -956,6 +991,14 @@ struct TargetRegistry {
T.ELFStreamerCtorFn = Fn;
}
+ static void RegisterSPIRVStreamer(Target &T, Target::SPIRVStreamerCtorTy Fn) {
+ T.SPIRVStreamerCtorFn = Fn;
+ }
+
+ static void RegisterDXContainerStreamer(Target &T, Target::DXContainerStreamerCtorTy Fn) {
+ T.DXContainerStreamerCtorFn = Fn;
+ }
+
static void RegisterWasmStreamer(Target &T, Target::WasmStreamerCtorTy Fn) {
T.WasmStreamerCtorFn = Fn;
}
@@ -1362,7 +1405,6 @@ template <class MCCodeEmitterImpl> struct RegisterMCCodeEmitter {
private:
static MCCodeEmitter *Allocator(const MCInstrInfo & /*II*/,
- const MCRegisterInfo & /*MRI*/,
MCContext & /*Ctx*/) {
return new MCCodeEmitterImpl();
}
diff --git a/llvm/include/llvm/MCA/CustomBehaviour.h b/llvm/include/llvm/MCA/CustomBehaviour.h
index c4be5312ea19..527dc766b739 100644
--- a/llvm/include/llvm/MCA/CustomBehaviour.h
+++ b/llvm/include/llvm/MCA/CustomBehaviour.h
@@ -49,6 +49,11 @@ public:
/// scheduling model.
virtual void postProcessInstruction(std::unique_ptr<Instruction> &Inst,
const MCInst &MCI) {}
+
+ // The resetState() method gets invoked at the beginning of each code region
+ // so that targets that override this function can clear any state that they
+ // have left from the previous code region.
+ virtual void resetState() {}
};
/// Class which can be overriden by targets to enforce instruction
diff --git a/llvm/include/llvm/MCA/IncrementalSourceMgr.h b/llvm/include/llvm/MCA/IncrementalSourceMgr.h
new file mode 100644
index 000000000000..d91cc5f23311
--- /dev/null
+++ b/llvm/include/llvm/MCA/IncrementalSourceMgr.h
@@ -0,0 +1,92 @@
+//===---------------- IncrementalSourceMgr.h --------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file contains IncrementalSourceMgr, an implementation of SourceMgr
+/// that allows users to add new instructions incrementally / dynamically.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_INCREMENTALSOURCEMGR_H
+#define LLVM_MCA_INCREMENTALSOURCEMGR_H
+
+#include "llvm/MCA/SourceMgr.h"
+#include <deque>
+
+namespace llvm {
+namespace mca {
+
+/// An implementation of \a SourceMgr that allows users to add new instructions
+/// incrementally / dynamically.
+/// Note that this SourceMgr takes ownership of all \a mca::Instruction.
+class IncrementalSourceMgr : public SourceMgr {
+ /// Owner of all mca::Instruction instances. Note that we use std::deque here
+ /// to have a better throughput, in comparison to std::vector or
+ /// llvm::SmallVector, as they usually pay a higher re-allocation cost when
+ /// there is a large number of instructions.
+ std::deque<UniqueInst> InstStorage;
+
+ /// Instructions that are ready to be used. Each of them is a pointer of an
+ /// \a UniqueInst inside InstStorage.
+ std::deque<Instruction *> Staging;
+
+ /// Current instruction index.
+ unsigned TotalCounter;
+
+ /// End-of-stream flag.
+ bool EOS;
+
+ /// Called when an instruction is no longer needed.
+ using InstFreedCallback = llvm::function_ref<void(Instruction *)>;
+ InstFreedCallback InstFreedCB;
+
+public:
+ IncrementalSourceMgr() : TotalCounter(0U), EOS(false) {}
+
+ void clear();
+
+ /// Set a callback that is invoked when a mca::Instruction is
+ /// no longer needed. This is usually used for recycling the
+ /// instruction.
+ void setOnInstFreedCallback(InstFreedCallback CB) { InstFreedCB = CB; }
+
+ ArrayRef<UniqueInst> getInstructions() const override {
+ llvm_unreachable("Not applicable");
+ }
+
+ bool hasNext() const override { return !Staging.empty(); }
+ bool isEnd() const override { return EOS; }
+
+ SourceRef peekNext() const override {
+ assert(hasNext());
+ return SourceRef(TotalCounter, *Staging.front());
+ }
+
+ /// Add a new instruction.
+ void addInst(UniqueInst &&Inst) {
+ InstStorage.emplace_back(std::move(Inst));
+ Staging.push_back(InstStorage.back().get());
+ }
+
+ /// Add a recycled instruction.
+ void addRecycledInst(Instruction *Inst) { Staging.push_back(Inst); }
+
+ void updateNext() override;
+
+ /// Mark the end of instruction stream.
+ void endOfStream() { EOS = true; }
+
+#ifndef NDEBUG
+ /// Print statistic about instruction recycling stats.
+ void printStatistic(raw_ostream &OS);
+#endif
+};
+
+} // end namespace mca
+} // end namespace llvm
+
+#endif // LLVM_MCA_INCREMENTALSOURCEMGR_H
diff --git a/llvm/include/llvm/MCA/InstrBuilder.h b/llvm/include/llvm/MCA/InstrBuilder.h
index 04b5cf590d70..92b92a515db9 100644
--- a/llvm/include/llvm/MCA/InstrBuilder.h
+++ b/llvm/include/llvm/MCA/InstrBuilder.h
@@ -14,6 +14,7 @@
#ifndef LLVM_MCA_INSTRBUILDER_H
#define LLVM_MCA_INSTRBUILDER_H
+#include "llvm/ADT/STLExtras.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
@@ -25,6 +26,27 @@
namespace llvm {
namespace mca {
+class RecycledInstErr : public ErrorInfo<RecycledInstErr> {
+ Instruction *RecycledInst;
+
+public:
+ static char ID;
+
+ explicit RecycledInstErr(Instruction *Inst) : RecycledInst(Inst) {}
+ // Always need to carry an Instruction
+ RecycledInstErr() = delete;
+
+ Instruction *getInst() const { return RecycledInst; }
+
+ void log(raw_ostream &OS) const override {
+ OS << "Instruction is recycled\n";
+ }
+
+ std::error_code convertToErrorCode() const override {
+ return llvm::inconvertibleErrorCode();
+ }
+};
+
/// A builder class that knows how to construct Instruction objects.
///
/// Every llvm-mca Instruction is described by an object of class InstrDesc.
@@ -48,6 +70,10 @@ class InstrBuilder {
bool FirstCallInst;
bool FirstReturnInst;
+ using InstRecycleCallback =
+ llvm::function_ref<Instruction *(const InstrDesc &)>;
+ InstRecycleCallback InstRecycleCB;
+
Expected<const InstrDesc &> createInstrDescImpl(const MCInst &MCI);
Expected<const InstrDesc &> getOrCreateInstrDesc(const MCInst &MCI);
@@ -69,6 +95,10 @@ public:
FirstReturnInst = true;
}
+ /// Set a callback which is invoked to retrieve a recycled mca::Instruction
+ /// or null if there isn't any.
+ void setInstRecycleCallback(InstRecycleCallback CB) { InstRecycleCB = CB; }
+
Expected<std::unique_ptr<Instruction>> createInstruction(const MCInst &MCI);
};
} // namespace mca
diff --git a/llvm/include/llvm/MCA/Instruction.h b/llvm/include/llvm/MCA/Instruction.h
index 33e3c8a2e630..86f2d7ade161 100644
--- a/llvm/include/llvm/MCA/Instruction.h
+++ b/llvm/include/llvm/MCA/Instruction.h
@@ -472,17 +472,15 @@ struct InstrDesc {
// subtarget when computing the reciprocal throughput.
unsigned SchedClassID;
- unsigned MayLoad : 1;
- unsigned MayStore : 1;
- unsigned HasSideEffects : 1;
- unsigned BeginGroup : 1;
- unsigned EndGroup : 1;
- unsigned RetireOOO : 1;
-
// True if all buffered resources are in-order, and there is at least one
// buffer which is a dispatch hazard (BufferSize = 0).
unsigned MustIssueImmediately : 1;
+ // True if the corresponding mca::Instruction can be recycled. Currently only
+ // instructions that are neither variadic nor have any variant can be
+ // recycled.
+ unsigned IsRecyclable : 1;
+
// A zero latency instruction doesn't consume any scheduler resources.
bool isZeroLatency() const { return !MaxLatency && Resources.empty(); }
@@ -518,8 +516,16 @@ class InstructionBase {
unsigned Opcode;
// Flags used by the LSUnit.
- bool IsALoadBarrier;
- bool IsAStoreBarrier;
+ bool IsALoadBarrier : 1;
+ bool IsAStoreBarrier : 1;
+ // Flags copied from the InstrDesc and potentially modified by
+ // CustomBehaviour or (more likely) InstrPostProcess.
+ bool MayLoad : 1;
+ bool MayStore : 1;
+ bool HasSideEffects : 1;
+ bool BeginGroup : 1;
+ bool EndGroup : 1;
+ bool RetireOOO : 1;
public:
InstructionBase(const InstrDesc &D, const unsigned Opcode)
@@ -568,7 +574,23 @@ public:
// Returns true if this instruction is a candidate for move elimination.
bool isOptimizableMove() const { return IsOptimizableMove; }
void setOptimizableMove() { IsOptimizableMove = true; }
- bool isMemOp() const { return Desc.MayLoad || Desc.MayStore; }
+ void clearOptimizableMove() { IsOptimizableMove = false; }
+ bool isMemOp() const { return MayLoad || MayStore; }
+
+ // Getters and setters for general instruction flags.
+ void setMayLoad(bool newVal) { MayLoad = newVal; }
+ void setMayStore(bool newVal) { MayStore = newVal; }
+ void setHasSideEffects(bool newVal) { HasSideEffects = newVal; }
+ void setBeginGroup(bool newVal) { BeginGroup = newVal; }
+ void setEndGroup(bool newVal) { EndGroup = newVal; }
+ void setRetireOOO(bool newVal) { RetireOOO = newVal; }
+
+ bool getMayLoad() const { return MayLoad; }
+ bool getMayStore() const { return MayStore; }
+ bool getHasSideEffects() const { return HasSideEffects; }
+ bool getBeginGroup() const { return BeginGroup; }
+ bool getEndGroup() const { return EndGroup; }
+ bool getRetireOOO() const { return RetireOOO; }
};
/// An instruction propagated through the simulated instruction pipeline.
@@ -628,6 +650,8 @@ public:
UsedBuffers(D.UsedBuffers), CriticalRegDep(), CriticalMemDep(),
CriticalResourceMask(0), IsEliminated(false) {}
+ void reset();
+
unsigned getRCUTokenID() const { return RCUTokenID; }
unsigned getLSUTokenID() const { return LSUTokenID; }
void setLSUTokenID(unsigned LSUTok) { LSUTokenID = LSUTok; }
@@ -657,6 +681,7 @@ public:
bool updateDispatched();
bool updatePending();
+ bool isInvalid() const { return Stage == IS_INVALID; }
bool isDispatched() const { return Stage == IS_DISPATCHED; }
bool isPending() const { return Stage == IS_PENDING; }
bool isReady() const { return Stage == IS_READY; }
diff --git a/llvm/include/llvm/MCA/Pipeline.h b/llvm/include/llvm/MCA/Pipeline.h
index 0ac988c52dc1..92c3836124ad 100644
--- a/llvm/include/llvm/MCA/Pipeline.h
+++ b/llvm/include/llvm/MCA/Pipeline.h
@@ -51,6 +51,13 @@ class Pipeline {
Pipeline(const Pipeline &P) = delete;
Pipeline &operator=(const Pipeline &P) = delete;
+ enum class State {
+ Created, // Pipeline was just created. The default state.
+ Started, // Pipeline has started running.
+ Paused // Pipeline is paused.
+ };
+ State CurrentState;
+
/// An ordered list of stages that define this instruction pipeline.
SmallVector<std::unique_ptr<Stage>, 8> Stages;
std::set<HWEventListener *> Listeners;
@@ -62,13 +69,16 @@ class Pipeline {
void notifyCycleEnd();
public:
- Pipeline() : Cycles(0) {}
+ Pipeline() : CurrentState(State::Created), Cycles(0) {}
void appendStage(std::unique_ptr<Stage> S);
/// Returns the total number of simulated cycles.
Expected<unsigned> run();
void addEventListener(HWEventListener *Listener);
+
+ /// Returns whether the pipeline is currently paused.
+ bool isPaused() const { return CurrentState == State::Paused; }
};
} // namespace mca
} // namespace llvm
diff --git a/llvm/include/llvm/MCA/SourceMgr.h b/llvm/include/llvm/MCA/SourceMgr.h
index e844171bdcab..16a60d1116ad 100644
--- a/llvm/include/llvm/MCA/SourceMgr.h
+++ b/llvm/include/llvm/MCA/SourceMgr.h
@@ -6,9 +6,8 @@
//
//===----------------------------------------------------------------------===//
/// \file
-/// This file implements class SourceMgr. Class SourceMgr abstracts the input
-/// code sequence (a sequence of MCInst), and assings unique identifiers to
-/// every instruction in the sequence.
+/// This file contains abstract class SourceMgr and the default implementation,
+/// CircularSourceMgr.
///
//===----------------------------------------------------------------------===//
@@ -25,30 +24,62 @@ namespace mca {
// prevent compiler error C2139 about intrinsic type trait '__is_assignable'.
typedef std::pair<unsigned, const Instruction &> SourceRef;
-class SourceMgr {
+/// Abstracting the input code sequence (a sequence of MCInst) and assigning
+/// unique identifiers to every instruction in the sequence.
+struct SourceMgr {
using UniqueInst = std::unique_ptr<Instruction>;
+
+ /// Provides a fixed range of \a UniqueInst to iterate.
+ virtual ArrayRef<UniqueInst> getInstructions() const = 0;
+
+ /// (Fixed) Number of \a UniqueInst. Returns the size of
+ /// \a getInstructions by default.
+ virtual size_t size() const { return getInstructions().size(); }
+
+ /// Whether there is any \a SourceRef to inspect / peek next.
+ /// Note that returning false from this doesn't mean the instruction
+ /// stream has ended.
+ virtual bool hasNext() const = 0;
+
+ /// Whether the instruction stream has eneded.
+ virtual bool isEnd() const = 0;
+
+ /// The next \a SourceRef.
+ virtual SourceRef peekNext() const = 0;
+
+ /// Advance to the next \a SourceRef.
+ virtual void updateNext() = 0;
+
+ virtual ~SourceMgr() {}
+};
+
+/// The default implementation of \a SourceMgr. It always takes a fixed number
+/// of instructions and provides an option to loop the given sequence for a
+/// certain iterations.
+class CircularSourceMgr : public SourceMgr {
ArrayRef<UniqueInst> Sequence;
unsigned Current;
const unsigned Iterations;
static const unsigned DefaultIterations = 100;
public:
- SourceMgr(ArrayRef<UniqueInst> S, unsigned Iter)
- : Sequence(S), Current(0), Iterations(Iter ? Iter : DefaultIterations) {}
+ CircularSourceMgr(ArrayRef<UniqueInst> S, unsigned Iter)
+ : Sequence(S), Current(0U), Iterations(Iter ? Iter : DefaultIterations) {}
+
+ ArrayRef<UniqueInst> getInstructions() const override { return Sequence; }
unsigned getNumIterations() const { return Iterations; }
- unsigned size() const { return Sequence.size(); }
- bool hasNext() const { return Current < (Iterations * Sequence.size()); }
- void updateNext() { ++Current; }
+ bool hasNext() const override {
+ return Current < (Iterations * Sequence.size());
+ }
+ bool isEnd() const override { return !hasNext(); }
- SourceRef peekNext() const {
+ SourceRef peekNext() const override {
assert(hasNext() && "Already at end of sequence!");
return SourceRef(Current, *Sequence[Current % Sequence.size()]);
}
- using const_iterator = ArrayRef<UniqueInst>::const_iterator;
- const_iterator begin() const { return Sequence.begin(); }
- const_iterator end() const { return Sequence.end(); }
+ void updateNext() override { ++Current; }
};
} // namespace mca
diff --git a/llvm/include/llvm/MCA/Stages/EntryStage.h b/llvm/include/llvm/MCA/Stages/EntryStage.h
index 4c50838bef4b..fb1244aa1933 100644
--- a/llvm/include/llvm/MCA/Stages/EntryStage.h
+++ b/llvm/include/llvm/MCA/Stages/EntryStage.h
@@ -30,7 +30,7 @@ class EntryStage final : public Stage {
unsigned NumRetired;
// Updates the program counter, and sets 'CurrentInstruction'.
- void getNextInstruction();
+ Error getNextInstruction();
EntryStage(const EntryStage &Other) = delete;
EntryStage &operator=(const EntryStage &Other) = delete;
@@ -42,6 +42,7 @@ public:
bool hasWorkToComplete() const override;
Error execute(InstRef &IR) override;
Error cycleStart() override;
+ Error cycleResume() override;
Error cycleEnd() override;
};
diff --git a/llvm/include/llvm/MCA/Stages/Stage.h b/llvm/include/llvm/MCA/Stages/Stage.h
index 84868e89ac29..2477b9b3d69c 100644
--- a/llvm/include/llvm/MCA/Stages/Stage.h
+++ b/llvm/include/llvm/MCA/Stages/Stage.h
@@ -48,6 +48,9 @@ public:
/// phase to prepare for the executions during the cycle.
virtual Error cycleStart() { return ErrorSuccess(); }
+ /// Called after the pipeline is resumed from pausing state.
+ virtual Error cycleResume() { return ErrorSuccess(); }
+
/// Called once at the end of each cycle.
virtual Error cycleEnd() { return ErrorSuccess(); }
@@ -82,6 +85,16 @@ public:
}
};
+/// This is actually not an error but a marker to indicate that
+/// the instruction stream is paused.
+struct InstStreamPause : public ErrorInfo<InstStreamPause> {
+ static char ID;
+
+ std::error_code convertToErrorCode() const override {
+ return llvm::inconvertibleErrorCode();
+ }
+ void log(raw_ostream &OS) const override { OS << "Stream is paused"; }
+};
} // namespace mca
} // namespace llvm
#endif // LLVM_MCA_STAGES_STAGE_H
diff --git a/llvm/tools/llvm-objcopy/COFF/COFFConfig.h b/llvm/include/llvm/ObjCopy/COFF/COFFConfig.h
index 7bf673fa4af9..29d56d75698b 100644
--- a/llvm/tools/llvm-objcopy/COFF/COFFConfig.h
+++ b/llvm/include/llvm/ObjCopy/COFF/COFFConfig.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_LLVM_OBJCOPY_COFF_COFFCONFIG_H
-#define LLVM_TOOLS_LLVM_OBJCOPY_COFF_COFFCONFIG_H
+#ifndef LLVM_OBJCOPY_COFF_COFFCONFIG_H
+#define LLVM_OBJCOPY_COFF_COFFCONFIG_H
#include "llvm/ADT/Optional.h"
@@ -24,4 +24,4 @@ struct COFFConfig {
} // namespace objcopy
} // namespace llvm
-#endif // LLVM_TOOLS_LLVM_OBJCOPY_COFF_COFFCONFIG_H
+#endif // LLVM_OBJCOPY_COFF_COFFCONFIG_H
diff --git a/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.h b/llvm/include/llvm/ObjCopy/COFF/COFFObjcopy.h
index 2c7ccd34653d..d9043d6c5d01 100644
--- a/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.h
+++ b/llvm/include/llvm/ObjCopy/COFF/COFFObjcopy.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_OBJCOPY_COFFOBJCOPY_H
-#define LLVM_TOOLS_OBJCOPY_COFFOBJCOPY_H
+#ifndef LLVM_OBJCOPY_COFF_COFFOBJCOPY_H
+#define LLVM_OBJCOPY_COFF_COFFOBJCOPY_H
namespace llvm {
class Error;
@@ -23,6 +23,9 @@ struct COFFConfig;
namespace coff {
+/// Apply the transformations described by \p Config and \p COFFConfig
+/// to \p In and writes the result into \p Out.
+/// \returns any Error encountered whilst performing the operation.
Error executeObjcopyOnBinary(const CommonConfig &Config, const COFFConfig &,
object::COFFObjectFile &In, raw_ostream &Out);
@@ -30,4 +33,4 @@ Error executeObjcopyOnBinary(const CommonConfig &Config, const COFFConfig &,
} // end namespace objcopy
} // end namespace llvm
-#endif // LLVM_TOOLS_OBJCOPY_COFFOBJCOPY_H
+#endif // LLVM_OBJCOPY_COFF_COFFOBJCOPY_H
diff --git a/llvm/tools/llvm-objcopy/CommonConfig.h b/llvm/include/llvm/ObjCopy/CommonConfig.h
index ea39a6da2ba5..24503caed342 100644
--- a/llvm/tools/llvm-objcopy/CommonConfig.h
+++ b/llvm/include/llvm/ObjCopy/CommonConfig.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_LLVM_OBJCOPY_COMMONCONFIG_H
-#define LLVM_TOOLS_LLVM_OBJCOPY_COMMONCONFIG_H
+#ifndef LLVM_OBJCOPY_COMMONCONFIG_H
+#define LLVM_OBJCOPY_COMMONCONFIG_H
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/CachedHashString.h"
@@ -18,6 +18,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Object/ELFTypes.h"
#include "llvm/Support/GlobPattern.h"
+#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Regex.h"
// Necessary for llvm::DebugCompressionType::None
#include "llvm/Target/TargetOptions.h"
@@ -186,6 +187,16 @@ struct NewSymbolInfo {
std::vector<StringRef> BeforeSyms;
};
+// Specify section name and section body for newly added or updated section.
+struct NewSectionInfo {
+ NewSectionInfo() = default;
+ NewSectionInfo(StringRef Name, std::unique_ptr<MemoryBuffer> &&Buffer)
+ : SectionName(Name), SectionData(std::move(Buffer)) {}
+
+ StringRef SectionName;
+ std::shared_ptr<MemoryBuffer> SectionData;
+};
+
// Configuration for copying/stripping a single file.
struct CommonConfig {
// Main input/output options
@@ -208,9 +219,9 @@ struct CommonConfig {
DiscardType DiscardMode = DiscardType::None;
// Repeated options
- std::vector<StringRef> AddSection;
+ std::vector<NewSectionInfo> AddSection;
std::vector<StringRef> DumpSection;
- std::vector<StringRef> UpdateSection;
+ std::vector<NewSectionInfo> UpdateSection;
// Section matchers
NameMatcher KeepSection;
@@ -257,4 +268,4 @@ struct CommonConfig {
} // namespace objcopy
} // namespace llvm
-#endif // LLVM_TOOLS_LLVM_OBJCOPY_COMMONCONFIG_H
+#endif // LLVM_OBJCOPY_COMMONCONFIG_H
diff --git a/llvm/include/llvm/ObjCopy/ConfigManager.h b/llvm/include/llvm/ObjCopy/ConfigManager.h
new file mode 100644
index 000000000000..2962cf99b270
--- /dev/null
+++ b/llvm/include/llvm/ObjCopy/ConfigManager.h
@@ -0,0 +1,50 @@
+//===- ConfigManager.h ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJCOPY_CONFIGMANAGER_H
+#define LLVM_OBJCOPY_CONFIGMANAGER_H
+
+#include "llvm/ObjCopy/COFF/COFFConfig.h"
+#include "llvm/ObjCopy/CommonConfig.h"
+#include "llvm/ObjCopy/ELF/ELFConfig.h"
+#include "llvm/ObjCopy/MachO/MachOConfig.h"
+#include "llvm/ObjCopy/MultiFormatConfig.h"
+#include "llvm/ObjCopy/wasm/WasmConfig.h"
+#include "llvm/ObjCopy/XCOFF/XCOFFConfig.h"
+
+namespace llvm {
+namespace objcopy {
+
+struct ConfigManager : public MultiFormatConfig {
+ virtual ~ConfigManager() {}
+
+ const CommonConfig &getCommonConfig() const override { return Common; }
+
+ Expected<const ELFConfig &> getELFConfig() const override { return ELF; }
+
+ Expected<const COFFConfig &> getCOFFConfig() const override;
+
+ Expected<const MachOConfig &> getMachOConfig() const override;
+
+ Expected<const WasmConfig &> getWasmConfig() const override;
+
+ Expected<const XCOFFConfig &> getXCOFFConfig() const override;
+
+ // All configs.
+ CommonConfig Common;
+ ELFConfig ELF;
+ COFFConfig COFF;
+ MachOConfig MachO;
+ WasmConfig Wasm;
+ XCOFFConfig XCOFF;
+};
+
+} // namespace objcopy
+} // namespace llvm
+
+#endif // LLVM_OBJCOPY_CONFIGMANAGER_H
diff --git a/llvm/tools/llvm-objcopy/ELF/ELFConfig.h b/llvm/include/llvm/ObjCopy/ELF/ELFConfig.h
index 229a8d61fb83..52bc728e36ff 100644
--- a/llvm/tools/llvm-objcopy/ELF/ELFConfig.h
+++ b/llvm/include/llvm/ObjCopy/ELF/ELFConfig.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_LLVM_OBJCOPY_ELF_ELFCONFIG_H
-#define LLVM_TOOLS_LLVM_OBJCOPY_ELF_ELFCONFIG_H
+#ifndef LLVM_OBJCOPY_ELF_ELFCONFIG_H
+#define LLVM_OBJCOPY_ELF_ELFCONFIG_H
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringRef.h"
@@ -35,4 +35,4 @@ struct ELFConfig {
} // namespace objcopy
} // namespace llvm
-#endif // LLVM_TOOLS_LLVM_OBJCOPY_ELF_ELFCONFIG_H
+#endif // LLVM_OBJCOPY_ELF_ELFCONFIG_H
diff --git a/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.h b/llvm/include/llvm/ObjCopy/ELF/ELFObjcopy.h
index 852661e68f37..552b6fb655f1 100644
--- a/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.h
+++ b/llvm/include/llvm/ObjCopy/ELF/ELFObjcopy.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_OBJCOPY_ELFOBJCOPY_H
-#define LLVM_TOOLS_OBJCOPY_ELFOBJCOPY_H
+#ifndef LLVM_OBJCOPY_ELF_ELFOBJCOPY_H
+#define LLVM_OBJCOPY_ELF_ELFOBJCOPY_H
namespace llvm {
class Error;
@@ -23,12 +23,25 @@ struct CommonConfig;
struct ELFConfig;
namespace elf {
+/// Apply the transformations described by \p Config and \p ELFConfig to
+/// \p In, which must represent an IHex file, and writes the result
+/// into \p Out.
+/// \returns any Error encountered whilst performing the operation.
Error executeObjcopyOnIHex(const CommonConfig &Config,
const ELFConfig &ELFConfig, MemoryBuffer &In,
raw_ostream &Out);
+
+/// Apply the transformations described by \p Config and \p ELFConfig to
+/// \p In, which is treated as a raw binary input, and writes the result
+/// into \p Out.
+/// \returns any Error encountered whilst performing the operation.
Error executeObjcopyOnRawBinary(const CommonConfig &Config,
const ELFConfig &ELFConfig, MemoryBuffer &In,
raw_ostream &Out);
+
+/// Apply the transformations described by \p Config and \p ELFConfig to
+/// \p In and writes the result into \p Out.
+/// \returns any Error encountered whilst performing the operation.
Error executeObjcopyOnBinary(const CommonConfig &Config,
const ELFConfig &ELFConfig,
object::ELFObjectFileBase &In, raw_ostream &Out);
@@ -37,4 +50,4 @@ Error executeObjcopyOnBinary(const CommonConfig &Config,
} // end namespace objcopy
} // end namespace llvm
-#endif // LLVM_TOOLS_OBJCOPY_ELFOBJCOPY_H
+#endif // LLVM_OBJCOPY_ELF_ELFOBJCOPY_H
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOConfig.h b/llvm/include/llvm/ObjCopy/MachO/MachOConfig.h
index 93f9facfcf0b..c5f861363297 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOConfig.h
+++ b/llvm/include/llvm/ObjCopy/MachO/MachOConfig.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_LLVM_OBJCOPY_MACHO_MACHOCONFIG_H
-#define LLVM_TOOLS_LLVM_OBJCOPY_MACHO_MACHOCONFIG_H
+#ifndef LLVM_OBJCOPY_MACHO_MACHOCONFIG_H
+#define LLVM_OBJCOPY_MACHO_MACHOCONFIG_H
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
@@ -29,6 +29,9 @@ struct MachOConfig {
// install-name-tool's id option
Optional<StringRef> SharedLibId;
+ // Segments to remove if they are empty
+ DenseSet<StringRef> EmptySegmentsToRemove;
+
// Boolean options
bool StripSwiftSymbols = false;
bool KeepUndefined = false;
@@ -40,4 +43,4 @@ struct MachOConfig {
} // namespace objcopy
} // namespace llvm
-#endif // LLVM_TOOLS_LLVM_OBJCOPY_MACHO_MACHOCONFIG_H
+#endif // LLVM_OBJCOPY_MACHO_MACHOCONFIG_H
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h b/llvm/include/llvm/ObjCopy/MachO/MachOObjcopy.h
index d03eee9d5fdb..73690d7ace8a 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h
+++ b/llvm/include/llvm/ObjCopy/MachO/MachOObjcopy.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_OBJCOPY_MACHOOBJCOPY_H
-#define LLVM_TOOLS_OBJCOPY_MACHOOBJCOPY_H
+#ifndef LLVM_OBJCOPY_MACHO_MACHOOBJCOPY_H
+#define LLVM_OBJCOPY_MACHO_MACHOOBJCOPY_H
namespace llvm {
class Error;
@@ -24,10 +24,16 @@ struct MachOConfig;
class MultiFormatConfig;
namespace macho {
+/// Apply the transformations described by \p Config and \p MachOConfig to
+/// \p In and writes the result into \p Out.
+/// \returns any Error encountered whilst performing the operation.
Error executeObjcopyOnBinary(const CommonConfig &Config,
const MachOConfig &MachOConfig,
object::MachOObjectFile &In, raw_ostream &Out);
+/// Apply the transformations described by \p Config and \p MachOConfig to
+/// \p In and writes the result into \p Out.
+/// \returns any Error encountered whilst performing the operation.
Error executeObjcopyOnMachOUniversalBinary(
const MultiFormatConfig &Config, const object::MachOUniversalBinary &In,
raw_ostream &Out);
@@ -36,4 +42,4 @@ Error executeObjcopyOnMachOUniversalBinary(
} // end namespace objcopy
} // end namespace llvm
-#endif // LLVM_TOOLS_OBJCOPY_MACHOOBJCOPY_H
+#endif // LLVM_OBJCOPY_MACHO_MACHOOBJCOPY_H
diff --git a/llvm/tools/llvm-objcopy/MultiFormatConfig.h b/llvm/include/llvm/ObjCopy/MultiFormatConfig.h
index 31d9883d6d3a..180f2f82a908 100644
--- a/llvm/tools/llvm-objcopy/MultiFormatConfig.h
+++ b/llvm/include/llvm/ObjCopy/MultiFormatConfig.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_LLVM_OBJCOPY_MULTIFORMATCONFIG_H
-#define LLVM_TOOLS_LLVM_OBJCOPY_MULTIFORMATCONFIG_H
+#ifndef LLVM_OBJCOPY_MULTIFORMATCONFIG_H
+#define LLVM_OBJCOPY_MULTIFORMATCONFIG_H
#include "llvm/Support/Error.h"
@@ -19,6 +19,7 @@ struct ELFConfig;
struct COFFConfig;
struct MachOConfig;
struct WasmConfig;
+struct XCOFFConfig;
class MultiFormatConfig {
public:
@@ -29,9 +30,10 @@ public:
virtual Expected<const COFFConfig &> getCOFFConfig() const = 0;
virtual Expected<const MachOConfig &> getMachOConfig() const = 0;
virtual Expected<const WasmConfig &> getWasmConfig() const = 0;
+ virtual Expected<const XCOFFConfig &> getXCOFFConfig() const = 0;
};
} // namespace objcopy
} // namespace llvm
-#endif // LLVM_TOOLS_LLVM_OBJCOPY_MULTIFORMATCONFIG_H
+#endif // LLVM_OBJCOPY_MULTIFORMATCONFIG_H
diff --git a/llvm/include/llvm/ObjCopy/ObjCopy.h b/llvm/include/llvm/ObjCopy/ObjCopy.h
new file mode 100644
index 000000000000..023814002c72
--- /dev/null
+++ b/llvm/include/llvm/ObjCopy/ObjCopy.h
@@ -0,0 +1,42 @@
+//===- ObjCopy.h ------------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJCOPY_OBJCOPY_H
+#define LLVM_OBJCOPY_OBJCOPY_H
+
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+class raw_ostream;
+
+namespace object {
+class Archive;
+class Binary;
+} // end namespace object
+
+namespace objcopy {
+class MultiFormatConfig;
+
+/// Applies the transformations described by \p Config to
+/// each member in archive \p Ar.
+/// Writes a result in a file specified by \p Config.OutputFilename.
+/// \returns any Error encountered whilst performing the operation.
+Error executeObjcopyOnArchive(const MultiFormatConfig &Config,
+ const object::Archive &Ar);
+
+/// Applies the transformations described by \p Config to \p In and writes
+/// the result into \p Out. This function does the dispatch based on the
+/// format of the input binary (COFF, ELF, MachO or wasm).
+/// \returns any Error encountered whilst performing the operation.
+Error executeObjcopyOnBinary(const MultiFormatConfig &Config,
+ object::Binary &In, raw_ostream &Out);
+
+} // end namespace objcopy
+} // end namespace llvm
+
+#endif // LLVM_OBJCOPY_OBJCOPY_H
diff --git a/llvm/include/llvm/ObjCopy/XCOFF/XCOFFConfig.h b/llvm/include/llvm/ObjCopy/XCOFF/XCOFFConfig.h
new file mode 100644
index 000000000000..adaeedc82b73
--- /dev/null
+++ b/llvm/include/llvm/ObjCopy/XCOFF/XCOFFConfig.h
@@ -0,0 +1,21 @@
+//===- XCOFFConfig.h --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJCOPY_XCOFF_XCOFFCONFIG_H
+#define LLVM_OBJCOPY_XCOFF_XCOFFCONFIG_H
+
+namespace llvm {
+namespace objcopy {
+
+// XCOFF specific configuration for copying/stripping a single file.
+struct XCOFFConfig {};
+
+} // namespace objcopy
+} // namespace llvm
+
+#endif // LLVM_OBJCOPY_XCOFF_XCOFFCONFIG_H
diff --git a/llvm/include/llvm/ObjCopy/XCOFF/XCOFFObjcopy.h b/llvm/include/llvm/ObjCopy/XCOFF/XCOFFObjcopy.h
new file mode 100644
index 000000000000..9fc85cb39fa5
--- /dev/null
+++ b/llvm/include/llvm/ObjCopy/XCOFF/XCOFFObjcopy.h
@@ -0,0 +1,35 @@
+//===- XCOFFObjcopy.h -------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJCOPY_XCOFF_XCOFFOBJCOPY_H
+#define LLVM_OBJCOPY_XCOFF_XCOFFOBJCOPY_H
+
+namespace llvm {
+class Error;
+class raw_ostream;
+
+namespace object {
+class XCOFFObjectFile;
+} // end namespace object
+
+namespace objcopy {
+struct CommonConfig;
+struct XCOFFConfig;
+
+namespace xcoff {
+/// Apply the transformations described by \p Config and \p XCOFFConfig
+/// to \p In and writes the result into \p Out.
+/// \returns any Error encountered whilst performing the operation.
+Error executeObjcopyOnBinary(const CommonConfig &Config, const XCOFFConfig &,
+ object::XCOFFObjectFile &In, raw_ostream &Out);
+
+} // end namespace xcoff
+} // end namespace objcopy
+} // end namespace llvm
+
+#endif // LLVM_OBJCOPY_XCOFF_XCOFFOBJCOPY_H
diff --git a/llvm/tools/llvm-objcopy/wasm/WasmConfig.h b/llvm/include/llvm/ObjCopy/wasm/WasmConfig.h
index 4e40926ae453..56a7055da9a7 100644
--- a/llvm/tools/llvm-objcopy/wasm/WasmConfig.h
+++ b/llvm/include/llvm/ObjCopy/wasm/WasmConfig.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_LLVM_OBJCOPY_WASM_WASMCONFIG_H
-#define LLVM_TOOLS_LLVM_OBJCOPY_WASM_WASMCONFIG_H
+#ifndef LLVM_OBJCOPY_WASM_WASMCONFIG_H
+#define LLVM_OBJCOPY_WASM_WASMCONFIG_H
namespace llvm {
namespace objcopy {
@@ -18,4 +18,4 @@ struct WasmConfig {};
} // namespace objcopy
} // namespace llvm
-#endif // LLVM_TOOLS_LLVM_OBJCOPY_WASM_WASMCONFIG_H
+#endif // LLVM_OBJCOPY_WASM_WASMCONFIG_H
diff --git a/llvm/tools/llvm-objcopy/wasm/WasmObjcopy.h b/llvm/include/llvm/ObjCopy/wasm/WasmObjcopy.h
index 28268e38c584..5b4181c22b97 100644
--- a/llvm/tools/llvm-objcopy/wasm/WasmObjcopy.h
+++ b/llvm/include/llvm/ObjCopy/wasm/WasmObjcopy.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_LLVM_OBJCOPY_WASM_WASMOBJCOPY_H
-#define LLVM_TOOLS_LLVM_OBJCOPY_WASM_WASMOBJCOPY_H
+#ifndef LLVM_OBJCOPY_WASM_WASMOBJCOPY_H
+#define LLVM_OBJCOPY_WASM_WASMOBJCOPY_H
namespace llvm {
class Error;
@@ -22,6 +22,9 @@ struct CommonConfig;
struct WasmConfig;
namespace wasm {
+/// Apply the transformations described by \p Config and \p WasmConfig
+/// to \p In and writes the result into \p Out.
+/// \returns any Error encountered whilst performing the operation.
Error executeObjcopyOnBinary(const CommonConfig &Config, const WasmConfig &,
object::WasmObjectFile &In, raw_ostream &Out);
@@ -29,4 +32,4 @@ Error executeObjcopyOnBinary(const CommonConfig &Config, const WasmConfig &,
} // end namespace objcopy
} // end namespace llvm
-#endif // LLVM_TOOLS_LLVM_OBJCOPY_WASM_WASMOBJCOPY_H
+#endif // LLVM_OBJCOPY_WASM_WASMOBJCOPY_H
diff --git a/llvm/include/llvm/Object/Archive.h b/llvm/include/llvm/Object/Archive.h
index b792cbc3d9ac..a36c9bd6163b 100644
--- a/llvm/include/llvm/Object/Archive.h
+++ b/llvm/include/llvm/Object/Archive.h
@@ -13,7 +13,6 @@
#ifndef LLVM_OBJECT_ARCHIVE_H
#define LLVM_OBJECT_ARCHIVE_H
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/fallible_iterator.h"
#include "llvm/ADT/iterator_range.h"
@@ -22,7 +21,6 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
-#include <algorithm>
#include <cassert>
#include <cstdint>
#include <memory>
@@ -30,6 +28,9 @@
#include <vector>
namespace llvm {
+
+template <typename T> class Optional;
+
namespace object {
const char ArchiveMagic[] = "!<arch>\n";
@@ -339,6 +340,7 @@ public:
Kind kind() const { return (Kind)Format; }
bool isThin() const { return IsThin; }
+ static object::Archive::Kind getDefaultKindForHost();
child_iterator child_begin(Error &Err, bool SkipInternal = true) const;
child_iterator child_end() const;
@@ -358,7 +360,7 @@ public:
// check if a symbol is in the archive
Expected<Optional<Child>> findSym(StringRef name) const;
- bool isEmpty() const;
+ virtual bool isEmpty() const;
bool hasSymbolTable() const;
StringRef getSymbolTable() const { return SymbolTable; }
StringRef getStringTable() const { return StringTable; }
@@ -390,6 +392,7 @@ private:
};
class BigArchive : public Archive {
+public:
/// Fixed-Length Header.
struct FixLenHdr {
char Magic[sizeof(BigArchiveMagic) - 1]; ///< Big archive magic string.
@@ -410,6 +413,9 @@ public:
BigArchive(MemoryBufferRef Source, Error &Err);
uint64_t getFirstChildOffset() const override { return FirstChildOffset; }
uint64_t getLastChildOffset() const { return LastChildOffset; }
+ bool isEmpty() const override {
+ return Data.getBufferSize() == sizeof(FixLenHdr);
+ };
};
} // end namespace object
diff --git a/llvm/include/llvm/Object/ArchiveWriter.h b/llvm/include/llvm/Object/ArchiveWriter.h
index 7eaf13e8fb22..6acab45215da 100644
--- a/llvm/include/llvm/Object/ArchiveWriter.h
+++ b/llvm/include/llvm/Object/ArchiveWriter.h
@@ -26,6 +26,11 @@ struct NewArchiveMember {
NewArchiveMember() = default;
NewArchiveMember(MemoryBufferRef BufRef);
+ // Detect the archive format from the object or bitcode file. This helps
+ // assume the archive format when creating or editing archives in the case
+ // one isn't explicitly set.
+ object::Archive::Kind detectKindFromObject() const;
+
static Expected<NewArchiveMember>
getOldMember(const object::Archive::Child &OldMember, bool Deterministic);
diff --git a/llvm/include/llvm/Object/Binary.h b/llvm/include/llvm/Object/Binary.h
index a8f4437d5dbb..53b299ae8612 100644
--- a/llvm/include/llvm/Object/Binary.h
+++ b/llvm/include/llvm/Object/Binary.h
@@ -16,9 +16,9 @@
#include "llvm-c/Types.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Object/Error.h"
+#include "llvm/Support/CBindingWrapping.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/MemoryBuffer.h"
-#include <algorithm>
#include <memory>
#include <utility>
@@ -50,6 +50,8 @@ protected:
ID_WinRes, // Windows resource (.res) file.
+ ID_Offload, // Offloading binary file.
+
// Object and children.
ID_StartObjects,
ID_COFF,
@@ -133,6 +135,8 @@ public:
bool isWasm() const { return TypeID == ID_Wasm; }
+ bool isOffloadFile() const { return TypeID == ID_Offload; }
+
bool isCOFFImportFile() const {
return TypeID == ID_COFFImportFile;
}
diff --git a/llvm/include/llvm/Object/COFF.h b/llvm/include/llvm/Object/COFF.h
index 3add3811069b..0b6975b9590f 100644
--- a/llvm/include/llvm/Object/COFF.h
+++ b/llvm/include/llvm/Object/COFF.h
@@ -1079,13 +1079,15 @@ public:
uint64_t getImageBase() const;
Error getVaPtr(uint64_t VA, uintptr_t &Res) const;
- Error getRvaPtr(uint32_t Rva, uintptr_t &Res) const;
+ Error getRvaPtr(uint32_t Rva, uintptr_t &Res,
+ const char *ErrorContext = nullptr) const;
/// Given an RVA base and size, returns a valid array of bytes or an error
/// code if the RVA and size is not contained completely within a valid
/// section.
Error getRvaAndSizeAsBytes(uint32_t RVA, uint32_t Size,
- ArrayRef<uint8_t> &Contents) const;
+ ArrayRef<uint8_t> &Contents,
+ const char *ErrorContext = nullptr) const;
Error getHintName(uint32_t Rva, uint16_t &Hint,
StringRef &Name) const;
@@ -1296,6 +1298,12 @@ struct FpoData {
frame_type getFP() const { return static_cast<frame_type>(Attributes >> 14); }
};
+class SectionStrippedError
+ : public ErrorInfo<SectionStrippedError, BinaryError> {
+public:
+ SectionStrippedError() { setErrorCode(object_error::section_stripped); }
+};
+
} // end namespace object
} // end namespace llvm
diff --git a/llvm/include/llvm/Object/COFFImportFile.h b/llvm/include/llvm/Object/COFFImportFile.h
index 0da0d8fa70c9..f8f0e0343b22 100644
--- a/llvm/include/llvm/Object/COFFImportFile.h
+++ b/llvm/include/llvm/Object/COFFImportFile.h
@@ -18,10 +18,9 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/Object/COFF.h"
-#include "llvm/Object/IRObjectFile.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Object/SymbolicFile.h"
-#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/MemoryBufferRef.h"
#include "llvm/Support/raw_ostream.h"
namespace llvm {
diff --git a/llvm/include/llvm/Object/COFFModuleDefinition.h b/llvm/include/llvm/Object/COFFModuleDefinition.h
index fb3d0952e3a3..8e14dd61472d 100644
--- a/llvm/include/llvm/Object/COFFModuleDefinition.h
+++ b/llvm/include/llvm/Object/COFFModuleDefinition.h
@@ -18,7 +18,7 @@
#ifndef LLVM_OBJECT_COFFMODULEDEFINITION_H
#define LLVM_OBJECT_COFFMODULEDEFINITION_H
-#include "llvm/Object/COFF.h"
+#include "llvm/BinaryFormat/COFF.h"
#include "llvm/Object/COFFImportFile.h"
namespace llvm {
diff --git a/llvm/include/llvm/Object/DXContainer.h b/llvm/include/llvm/Object/DXContainer.h
new file mode 100644
index 000000000000..7aa7d8ecf4c7
--- /dev/null
+++ b/llvm/include/llvm/Object/DXContainer.h
@@ -0,0 +1,124 @@
+//===- DXContainer.h - DXContainer file implementation ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the DXContainerFile class, which implements the ObjectFile
+// interface for DXContainer files.
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECT_DXCONTAINER_H
+#define LLVM_OBJECT_DXCONTAINER_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/DXContainer.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MemoryBufferRef.h"
+
+namespace llvm {
+namespace object {
+class DXContainer {
+public:
+ using DXILData = std::pair<dxbc::ProgramHeader, const char *>;
+
+private:
+ DXContainer(MemoryBufferRef O);
+
+ MemoryBufferRef Data;
+ dxbc::Header Header;
+ SmallVector<uint32_t, 4> PartOffsets;
+ Optional<DXILData> DXIL;
+
+ Error parseHeader();
+ Error parsePartOffsets();
+ Error parseDXILHeader(uint32_t Offset);
+ friend class PartIterator;
+
+public:
+ // The PartIterator is a wrapper around the iterator for the PartOffsets
+ // member of the DXContainer. It contains a refernce to the container, and the
+ // current iterator value, as well as storage for a parsed part header.
+ class PartIterator {
+ const DXContainer &Container;
+ SmallVectorImpl<uint32_t>::const_iterator OffsetIt;
+ struct PartData {
+ dxbc::PartHeader Part;
+ uint32_t Offset;
+ StringRef Data;
+ } IteratorState;
+
+ friend class DXContainer;
+
+ PartIterator(const DXContainer &C,
+ SmallVectorImpl<uint32_t>::const_iterator It)
+ : Container(C), OffsetIt(It) {
+ if (OffsetIt == Container.PartOffsets.end())
+ updateIteratorImpl(Container.PartOffsets.back());
+ else
+ updateIterator();
+ }
+
+ // Updates the iterator's state data. This results in copying the part
+ // header into the iterator and handling any required byte swapping. This is
+ // called when incrementing or decrementing the iterator.
+ void updateIterator() {
+ if (OffsetIt != Container.PartOffsets.end())
+ updateIteratorImpl(*OffsetIt);
+ }
+
+ // Implementation for updating the iterator state based on a specified
+ // offest.
+ void updateIteratorImpl(const uint32_t Offset);
+
+ public:
+ PartIterator &operator++() {
+ if (OffsetIt == Container.PartOffsets.end())
+ return *this;
+ ++OffsetIt;
+ updateIterator();
+ return *this;
+ }
+
+ PartIterator operator++(int) {
+ PartIterator Tmp = *this;
+ ++(*this);
+ return Tmp;
+ }
+
+ bool operator==(const PartIterator &RHS) const {
+ return OffsetIt == RHS.OffsetIt;
+ }
+
+ bool operator!=(const PartIterator &RHS) const {
+ return OffsetIt != RHS.OffsetIt;
+ }
+
+ const PartData &operator*() { return IteratorState; }
+ const PartData *operator->() { return &IteratorState; }
+ };
+
+ PartIterator begin() const {
+ return PartIterator(*this, PartOffsets.begin());
+ }
+
+ PartIterator end() const { return PartIterator(*this, PartOffsets.end()); }
+
+ StringRef getData() const { return Data.getBuffer(); }
+ static Expected<DXContainer> create(MemoryBufferRef Object);
+
+ const dxbc::Header &getHeader() const { return Header; }
+
+ Optional<DXILData> getDXIL() const { return DXIL; }
+};
+
+} // namespace object
+} // namespace llvm
+
+#endif // LLVM_OBJECT_DXCONTAINERFILE_H
diff --git a/llvm/include/llvm/Object/Decompressor.h b/llvm/include/llvm/Object/Decompressor.h
index cc918481b308..e04ee3c3e4c0 100644
--- a/llvm/include/llvm/Object/Decompressor.h
+++ b/llvm/include/llvm/Object/Decompressor.h
@@ -9,13 +9,15 @@
#ifndef LLVM_OBJECT_DECOMPRESSOR_H
#define LLVM_OBJECT_DECOMPRESSOR_H
-#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Error.h"
namespace llvm {
namespace object {
+class SectionRef;
+
/// Decompressor helps to handle decompression of compressed sections.
class Decompressor {
public:
diff --git a/llvm/include/llvm/Object/ELF.h b/llvm/include/llvm/Object/ELF.h
index 37f23c435ae1..1a59ba94098f 100644
--- a/llvm/include/llvm/Object/ELF.h
+++ b/llvm/include/llvm/Object/ELF.h
@@ -855,7 +855,7 @@ Expected<StringRef> ELFFile<ELFT>::getSymbolVersionByIndex(
const VersionEntry &Entry = *VersionMap[VersionIndex];
// A default version (@@) is only available for defined symbols.
- if (!Entry.IsVerDef || IsSymHidden.getValueOr(false))
+ if (!Entry.IsVerDef || IsSymHidden.value_or(false))
IsDefault = false;
else
IsDefault = !(SymbolVersionIndex & llvm::ELF::VERSYM_HIDDEN);
diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h
index e2d2784d4f23..c449a3dafc0c 100644
--- a/llvm/include/llvm/Object/ELFObjectFile.h
+++ b/llvm/include/llvm/Object/ELFObjectFile.h
@@ -15,7 +15,6 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/iterator_range.h"
@@ -27,19 +26,21 @@
#include "llvm/Object/Error.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Object/SymbolicFile.h"
-#include "llvm/Support/ARMAttributeParser.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/ELFAttributeParser.h"
#include "llvm/Support/ELFAttributes.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/MemoryBufferRef.h"
#include "llvm/Support/ScopedPrinter.h"
#include <cassert>
#include <cstdint>
-#include <system_error>
namespace llvm {
+
+template <typename T> class SmallVectorImpl;
+
namespace object {
constexpr int NumElfSymbolTypes = 16;
@@ -101,6 +102,12 @@ public:
/// Returns a vector containing a symbol version for each dynamic symbol.
/// Returns an empty vector if version sections do not exist.
Expected<std::vector<VersionEntry>> readDynsymVersions() const;
+
+ /// Returns a vector of all BB address maps in the object file. When
+ // `TextSectionIndex` is specified, only returns the BB address maps
+ // corresponding to the section with that index.
+ Expected<std::vector<BBAddrMap>>
+ readBBAddrMap(Optional<unsigned> TextSectionIndex = None) const;
};
class ELFSectionRef : public SectionRef {
@@ -1167,7 +1174,7 @@ uint8_t ELFObjectFile<ELFT>::getBytesInAddress() const {
template <class ELFT>
StringRef ELFObjectFile<ELFT>::getFileFormatName() const {
- bool IsLittleEndian = ELFT::TargetEndianness == support::little;
+ constexpr bool IsLittleEndian = ELFT::TargetEndianness == support::little;
switch (EF.getHeader().e_ident[ELF::EI_CLASS]) {
case ELF::ELFCLASS32:
switch (EF.getHeader().e_machine) {
@@ -1202,6 +1209,8 @@ StringRef ELFObjectFile<ELFT>::getFileFormatName() const {
return "elf32-sparc";
case ELF::EM_AMDGPU:
return "elf32-amdgpu";
+ case ELF::EM_LOONGARCH:
+ return "elf32-loongarch";
default:
return "elf32-unknown";
}
@@ -1229,6 +1238,8 @@ StringRef ELFObjectFile<ELFT>::getFileFormatName() const {
return "elf64-bpf";
case ELF::EM_VE:
return "elf64-ve";
+ case ELF::EM_LOONGARCH:
+ return "elf64-loongarch";
default:
return "elf64-unknown";
}
@@ -1313,6 +1324,17 @@ template <class ELFT> Triple::ArchType ELFObjectFile<ELFT>::getArch() const {
return Triple::ve;
case ELF::EM_CSKY:
return Triple::csky;
+
+ case ELF::EM_LOONGARCH:
+ switch (EF.getHeader().e_ident[ELF::EI_CLASS]) {
+ case ELF::ELFCLASS32:
+ return Triple::loongarch32;
+ case ELF::ELFCLASS64:
+ return Triple::loongarch64;
+ default:
+ report_fatal_error("Invalid ELFCLASS!");
+ }
+
default:
return Triple::UnknownArch;
}
diff --git a/llvm/include/llvm/Object/ELFTypes.h b/llvm/include/llvm/Object/ELFTypes.h
index c674b80c814d..5942b6f1d0a1 100644
--- a/llvm/include/llvm/Object/ELFTypes.h
+++ b/llvm/include/llvm/Object/ELFTypes.h
@@ -812,8 +812,20 @@ struct BBAddrMap {
: Offset(Offset), Size(Size), HasReturn(Metadata & 1),
HasTailCall(Metadata & (1 << 1)), IsEHPad(Metadata & (1 << 2)),
CanFallThrough(Metadata & (1 << 3)){};
+
+ bool operator==(const BBEntry &Other) const {
+ return Offset == Other.Offset && Size == Other.Size &&
+ HasReturn == Other.HasReturn && HasTailCall == Other.HasTailCall &&
+ IsEHPad == Other.IsEHPad && CanFallThrough == Other.CanFallThrough;
+ }
};
std::vector<BBEntry> BBEntries; // Basic block entries for this function.
+
+ // Equality operator for unit testing.
+ bool operator==(const BBAddrMap &Other) const {
+ return Addr == Other.Addr && std::equal(BBEntries.begin(), BBEntries.end(),
+ Other.BBEntries.begin());
+ }
};
} // end namespace object.
diff --git a/llvm/include/llvm/Object/Error.h b/llvm/include/llvm/Object/Error.h
index af334fc42658..8875fb6e1a20 100644
--- a/llvm/include/llvm/Object/Error.h
+++ b/llvm/include/llvm/Object/Error.h
@@ -34,6 +34,7 @@ enum class object_error {
invalid_section_index,
bitcode_section_not_found,
invalid_symbol_index,
+ section_stripped,
};
inline std::error_code make_error_code(object_error e) {
diff --git a/llvm/include/llvm/Object/IRObjectFile.h b/llvm/include/llvm/Object/IRObjectFile.h
index db47960237a0..6b3f2cd5671c 100644
--- a/llvm/include/llvm/Object/IRObjectFile.h
+++ b/llvm/include/llvm/Object/IRObjectFile.h
@@ -13,7 +13,6 @@
#ifndef LLVM_OBJECT_IROBJECTFILE_H
#define LLVM_OBJECT_IROBJECTFILE_H
-#include "llvm/ADT/PointerUnion.h"
#include "llvm/Object/IRSymtab.h"
#include "llvm/Object/ModuleSymbolTable.h"
#include "llvm/Object/SymbolicFile.h"
diff --git a/llvm/include/llvm/Object/MachO.h b/llvm/include/llvm/Object/MachO.h
index 49a0706b84be..4ec366055db6 100644
--- a/llvm/include/llvm/Object/MachO.h
+++ b/llvm/include/llvm/Object/MachO.h
@@ -260,6 +260,124 @@ private:
};
using bind_iterator = content_iterator<MachOBindEntry>;
+/// ChainedFixupTarget holds all the information about an external symbol
+/// necessary to bind this binary to that symbol. These values are referenced
+/// indirectly by chained fixup binds. This structure captures values from all
+/// import and symbol formats.
+///
+/// Be aware there are two notions of weak here:
+/// WeakImport == true
+/// The associated bind may be set to 0 if this symbol is missing from its
+/// parent library. This is called a "weak import."
+/// LibOrdinal == BIND_SPECIAL_DYLIB_WEAK_LOOKUP
+/// This symbol may be coalesced with other libraries vending the same
+/// symbol. E.g., C++'s "operator new". This is called a "weak bind."
+struct ChainedFixupTarget {
+public:
+ ChainedFixupTarget(int LibOrdinal, StringRef Symbol, uint64_t Addend,
+ bool WeakImport)
+ : LibOrdinal(LibOrdinal), SymbolName(Symbol), Addend(Addend),
+ WeakImport(WeakImport) {}
+
+ int libOrdinal() { return LibOrdinal; }
+ StringRef symbolName() { return SymbolName; }
+ uint64_t addend() { return Addend; }
+ bool weakImport() { return WeakImport; }
+ bool weakBind() {
+ return LibOrdinal == MachO::BIND_SPECIAL_DYLIB_WEAK_LOOKUP;
+ }
+
+private:
+ int LibOrdinal;
+ StringRef SymbolName;
+ uint64_t Addend;
+ bool WeakImport;
+};
+
+/// MachOAbstractFixupEntry is an abstract class representing a fixup in a
+/// MH_DYLDLINK file. Fixups generally represent rebases and binds. Binds also
+/// subdivide into additional subtypes (weak, lazy, reexport).
+///
+/// The two concrete subclasses of MachOAbstractFixupEntry are:
+///
+/// MachORebaseBindEntry - for dyld opcode-based tables, including threaded-
+/// rebase, where rebases are mixed in with other
+/// bind opcodes.
+/// MachOChainedFixupEntry - for pointer chains embedded in data pages.
+class MachOAbstractFixupEntry {
+public:
+ MachOAbstractFixupEntry(Error *Err, const MachOObjectFile *O);
+
+ int32_t segmentIndex() const;
+ uint64_t segmentOffset() const;
+ uint64_t segmentAddress() const;
+ StringRef segmentName() const;
+ StringRef sectionName() const;
+ StringRef typeName() const;
+ StringRef symbolName() const;
+ uint32_t flags() const;
+ int64_t addend() const;
+ int ordinal() const;
+
+ /// \return the location of this fixup as a VM Address. For the VM
+ /// Address this fixup is pointing to, use pointerValue().
+ uint64_t address() const;
+
+ /// \return the VM Address pointed to by this fixup. Use
+ /// pointerValue() to compare against other VM Addresses, such as
+ /// section addresses or segment vmaddrs.
+ uint64_t pointerValue() const { return PointerValue; }
+
+ /// \return the raw "on-disk" representation of the fixup. For
+ /// Threaded rebases and Chained pointers these values are generally
+ /// encoded into various different pointer formats. This value is
+ /// exposed in API for tools that want to display and annotate the
+ /// raw bits.
+ uint64_t rawValue() const { return RawValue; }
+
+ void moveNext();
+
+protected:
+ Error *E;
+ const MachOObjectFile *O;
+ uint64_t SegmentOffset = 0;
+ int32_t SegmentIndex = -1;
+ StringRef SymbolName;
+ int32_t Ordinal = 0;
+ uint32_t Flags = 0;
+ int64_t Addend = 0;
+ uint64_t PointerValue = 0;
+ uint64_t RawValue = 0;
+ bool Done = false;
+
+ void moveToFirst();
+ void moveToEnd();
+
+ /// \return the vm address of the start of __TEXT segment.
+ uint64_t textAddress() const { return TextAddress; }
+
+private:
+ uint64_t TextAddress;
+};
+
+class MachOChainedFixupEntry : public MachOAbstractFixupEntry {
+public:
+ enum class FixupKind { All, Bind, WeakBind, Rebase };
+
+ MachOChainedFixupEntry(Error *Err, const MachOObjectFile *O, bool Parse);
+
+ bool operator==(const MachOChainedFixupEntry &) const;
+
+ void moveNext();
+ void moveToFirst();
+ void moveToEnd();
+
+private:
+ std::vector<ChainedFixupTarget> FixupTargets;
+ uint32_t FixupIndex = 0;
+};
+using fixup_iterator = content_iterator<MachOChainedFixupEntry>;
+
class MachOObjectFile : public ObjectFile {
public:
struct LoadCommandInfo {
@@ -273,6 +391,8 @@ public:
create(MemoryBufferRef Object, bool IsLittleEndian, bool Is64Bits,
uint32_t UniversalCputype = 0, uint32_t UniversalIndex = 0);
+ static bool isMachOPairedReloc(uint64_t RelocType, uint64_t Arch);
+
void moveSymbolNext(DataRefImpl &Symb) const override;
uint64_t getNValue(DataRefImpl Sym) const;
@@ -402,6 +522,9 @@ public:
/// For use iterating over all bind table entries.
iterator_range<bind_iterator> bindTable(Error &Err);
+ /// For iterating over all chained fixups.
+ iterator_range<fixup_iterator> fixupTable(Error &Err);
+
/// For use iterating over all lazy bind table entries.
iterator_range<bind_iterator> lazyBindTable(Error &Err);
@@ -562,7 +685,12 @@ public:
ArrayRef<uint8_t> getDyldInfoBindOpcodes() const;
ArrayRef<uint8_t> getDyldInfoWeakBindOpcodes() const;
ArrayRef<uint8_t> getDyldInfoLazyBindOpcodes() const;
+ /// If the optional is None, no header was found, but the object was well-formed.
+ Expected<Optional<MachO::dyld_chained_fixups_header>>
+ getChainedFixupsHeader() const;
+ Expected<std::vector<ChainedFixupTarget>> getDyldChainedFixupTargets() const;
ArrayRef<uint8_t> getDyldInfoExportsTrie() const;
+ SmallVector<uint64_t> getFunctionStarts() const;
ArrayRef<uint8_t> getUuid() const;
StringRef getStringTableData() const;
@@ -689,6 +817,8 @@ private:
const char *DataInCodeLoadCmd = nullptr;
const char *LinkOptHintsLoadCmd = nullptr;
const char *DyldInfoLoadCmd = nullptr;
+ const char *FuncStartsLoadCmd = nullptr;
+ const char *DyldChainedFixupsLoadCmd = nullptr;
const char *UuidLoadCmd = nullptr;
bool HasPageZeroSegment = false;
};
diff --git a/llvm/include/llvm/Object/MachOUniversal.h b/llvm/include/llvm/Object/MachOUniversal.h
index e87eb31aad4e..4fe7a68d9680 100644
--- a/llvm/include/llvm/Object/MachOUniversal.h
+++ b/llvm/include/llvm/Object/MachOUniversal.h
@@ -16,7 +16,6 @@
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/BinaryFormat/MachO.h"
-#include "llvm/Object/Archive.h"
#include "llvm/Object/Binary.h"
#include "llvm/Object/MachO.h"
@@ -25,6 +24,7 @@ class StringRef;
class LLVMContext;
namespace object {
+class Archive;
class IRObjectFile;
class MachOUniversalBinary : public Binary {
diff --git a/llvm/include/llvm/Object/MachOUniversalWriter.h b/llvm/include/llvm/Object/MachOUniversalWriter.h
index 8d095766cf48..4004f25f3fb7 100644
--- a/llvm/include/llvm/Object/MachOUniversalWriter.h
+++ b/llvm/include/llvm/Object/MachOUniversalWriter.h
@@ -14,15 +14,22 @@
#ifndef LLVM_OBJECT_MACHOUNIVERSALWRITER_H
#define LLVM_OBJECT_MACHOUNIVERSALWRITER_H
-#include "llvm/Object/Archive.h"
-#include "llvm/Object/Binary.h"
-#include "llvm/Object/MachO.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/Support/Error.h"
+#include <cstdint>
+#include <string>
namespace llvm {
class LLVMContext;
namespace object {
+class Archive;
+class Binary;
class IRObjectFile;
+class MachOObjectFile;
class Slice {
const Binary *B;
diff --git a/llvm/include/llvm/Object/ObjectFile.h b/llvm/include/llvm/Object/ObjectFile.h
index 950c38a599d5..8754c229bd4b 100644
--- a/llvm/include/llvm/Object/ObjectFile.h
+++ b/llvm/include/llvm/Object/ObjectFile.h
@@ -13,7 +13,8 @@
#ifndef LLVM_OBJECT_OBJECTFILE_H
#define LLVM_OBJECT_OBJECTFILE_H
-#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/iterator_range.h"
@@ -24,11 +25,10 @@
#include "llvm/Object/SymbolicFile.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Error.h"
-#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/MemoryBufferRef.h"
#include <cassert>
#include <cstdint>
#include <memory>
-#include <system_error>
namespace llvm {
@@ -170,11 +170,11 @@ class SymbolRef : public BasicSymbolRef {
public:
enum Type {
ST_Unknown, // Type not specified
+ ST_Other,
ST_Data,
ST_Debug,
ST_File,
ST_Function,
- ST_Other
};
SymbolRef() = default;
@@ -350,6 +350,11 @@ public:
/// True if this is a relocatable object (.o/.obj).
virtual bool isRelocatableObject() const = 0;
+ /// True if the reflection section can be stripped by the linker.
+ bool isReflectionSectionStrippable(
+ llvm::binaryformat::Swift5ReflectionSectionKind ReflectionSectionKind)
+ const;
+
/// @returns Pointer to ObjectFile subclass to handle this type of object.
/// @param ObjectPath The path to the object file. ObjectPath.isObject must
/// return true.
diff --git a/llvm/include/llvm/Object/OffloadBinary.h b/llvm/include/llvm/Object/OffloadBinary.h
new file mode 100644
index 000000000000..5afc3ed295ae
--- /dev/null
+++ b/llvm/include/llvm/Object/OffloadBinary.h
@@ -0,0 +1,169 @@
+//===--- Offloading.h - Utilities for handling offloading code -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the binary format used for budingling device metadata with
+// an associated device image. The data can then be stored inside a host object
+// file to create a fat binary and read by the linker. This is intended to be a
+// thin wrapper around the image itself. If this format becomes sufficiently
+// complex it should be moved to a standard binary format like msgpack or ELF.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BINARYFORMAT_OFFLOADING_H
+#define LLVM_BINARYFORMAT_OFFLOADING_H
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Object/Binary.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <memory>
+
+namespace llvm {
+
+namespace object {
+
+/// The producer of the associated offloading image.
+enum OffloadKind : uint16_t {
+ OFK_None = 0,
+ OFK_OpenMP,
+ OFK_Cuda,
+ OFK_HIP,
+ OFK_LAST,
+};
+
+/// The type of contents the offloading image contains.
+enum ImageKind : uint16_t {
+ IMG_None = 0,
+ IMG_Object,
+ IMG_Bitcode,
+ IMG_Cubin,
+ IMG_Fatbinary,
+ IMG_PTX,
+ IMG_LAST,
+};
+
+/// A simple binary serialization of an offloading file. We use this format to
+/// embed the offloading image into the host executable so it can be extracted
+/// and used by the linker.
+///
+/// Many of these could be stored in the same section by the time the linker
+/// sees it so we mark this information with a header. The version is used to
+/// detect ABI stability and the size is used to find other offloading entries
+/// that may exist in the same section. All offsets are given as absolute byte
+/// offsets from the beginning of the file.
+class OffloadBinary : public Binary {
+public:
+ using string_iterator = StringMap<StringRef>::const_iterator;
+ using string_iterator_range = iterator_range<string_iterator>;
+
+ /// The current version of the binary used for backwards compatibility.
+ static const uint32_t Version = 1;
+
+ /// The offloading metadata that will be serialized to a memory buffer.
+ struct OffloadingImage {
+ ImageKind TheImageKind;
+ OffloadKind TheOffloadKind;
+ uint32_t Flags;
+ StringMap<StringRef> StringData;
+ std::unique_ptr<MemoryBuffer> Image;
+ };
+
+ /// Attempt to parse the offloading binary stored in \p Data.
+ static Expected<std::unique_ptr<OffloadBinary>> create(MemoryBufferRef);
+
+ /// Serialize the contents of \p File to a binary buffer to be read later.
+ static std::unique_ptr<MemoryBuffer> write(const OffloadingImage &);
+
+ static uint64_t getAlignment() { return alignof(Header); }
+
+ ImageKind getImageKind() const { return TheEntry->TheImageKind; }
+ OffloadKind getOffloadKind() const { return TheEntry->TheOffloadKind; }
+ uint32_t getVersion() const { return TheHeader->Version; }
+ uint32_t getFlags() const { return TheEntry->Flags; }
+ uint64_t getSize() const { return TheHeader->Size; }
+
+ StringRef getTriple() const { return getString("triple"); }
+ StringRef getArch() const { return getString("arch"); }
+ StringRef getImage() const {
+ return StringRef(&Buffer[TheEntry->ImageOffset], TheEntry->ImageSize);
+ }
+
+ // Iterator over all the key and value pairs in the binary.
+ string_iterator_range strings() const {
+ return string_iterator_range(StringData.begin(), StringData.end());
+ }
+
+ StringRef getString(StringRef Key) const { return StringData.lookup(Key); }
+
+ static bool classof(const Binary *V) { return V->isOffloadFile(); }
+
+ struct Header {
+ uint8_t Magic[4] = {0x10, 0xFF, 0x10, 0xAD}; // 0x10FF10AD magic bytes.
+ uint32_t Version = OffloadBinary::Version; // Version identifier.
+ uint64_t Size; // Size in bytes of this entire binary.
+ uint64_t EntryOffset; // Offset of the metadata entry in bytes.
+ uint64_t EntrySize; // Size of the metadata entry in bytes.
+ };
+
+ struct Entry {
+ ImageKind TheImageKind; // The kind of the image stored.
+ OffloadKind TheOffloadKind; // The producer of this image.
+ uint32_t Flags; // Additional flags associated with the image.
+ uint64_t StringOffset; // Offset in bytes to the string map.
+ uint64_t NumStrings; // Number of entries in the string map.
+ uint64_t ImageOffset; // Offset in bytes of the actual binary image.
+ uint64_t ImageSize; // Size in bytes of the binary image.
+ };
+
+ struct StringEntry {
+ uint64_t KeyOffset;
+ uint64_t ValueOffset;
+ };
+
+private:
+ OffloadBinary(MemoryBufferRef Source, const Header *TheHeader,
+ const Entry *TheEntry)
+ : Binary(Binary::ID_Offload, Source), Buffer(Source.getBufferStart()),
+ TheHeader(TheHeader), TheEntry(TheEntry) {
+ const StringEntry *StringMapBegin =
+ reinterpret_cast<const StringEntry *>(&Buffer[TheEntry->StringOffset]);
+ for (uint64_t I = 0, E = TheEntry->NumStrings; I != E; ++I) {
+ StringRef Key = &Buffer[StringMapBegin[I].KeyOffset];
+ StringData[Key] = &Buffer[StringMapBegin[I].ValueOffset];
+ }
+ }
+
+ OffloadBinary(const OffloadBinary &Other) = delete;
+
+ /// Map from keys to offsets in the binary.
+ StringMap<StringRef> StringData;
+ /// Raw pointer to the MemoryBufferRef for convenience.
+ const char *Buffer;
+ /// Location of the header within the binary.
+ const Header *TheHeader;
+ /// Location of the metadata entries within the binary.
+ const Entry *TheEntry;
+};
+
+/// Convert a string \p Name to an image kind.
+ImageKind getImageKind(StringRef Name);
+
+/// Convert an image kind to its string representation.
+StringRef getImageKindName(ImageKind Name);
+
+/// Convert a string \p Name to an offload kind.
+OffloadKind getOffloadKind(StringRef Name);
+
+/// Convert an offload kind to its string representation.
+StringRef getOffloadKindName(OffloadKind Name);
+
+} // namespace object
+
+} // namespace llvm
+#endif
diff --git a/llvm/include/llvm/Object/RelocationResolver.h b/llvm/include/llvm/Object/RelocationResolver.h
index d3b604018e89..2acdf5ed2fe1 100644
--- a/llvm/include/llvm/Object/RelocationResolver.h
+++ b/llvm/include/llvm/Object/RelocationResolver.h
@@ -15,22 +15,15 @@
#ifndef LLVM_OBJECT_RELOCATIONRESOLVER_H
#define LLVM_OBJECT_RELOCATIONRESOLVER_H
-#include "llvm/ADT/Triple.h"
-#include "llvm/BinaryFormat/ELF.h"
-#include "llvm/BinaryFormat/MachO.h"
-#include "llvm/Object/COFF.h"
-#include "llvm/Object/ELFObjectFile.h"
-#include "llvm/Object/MachO.h"
-#include "llvm/Object/ObjectFile.h"
-#include "llvm/Object/Wasm.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/ErrorHandling.h"
#include <cstdint>
-#include <system_error>
+#include <utility>
namespace llvm {
namespace object {
+class ObjectFile;
+class RelocationRef;
+
using SupportsRelocation = bool (*)(uint64_t);
using RelocationResolver = uint64_t (*)(uint64_t Type, uint64_t Offset,
uint64_t S, uint64_t LocData,
diff --git a/llvm/include/llvm/Object/SymbolicFile.h b/llvm/include/llvm/Object/SymbolicFile.h
index 284302c5e042..ea51afce5d2a 100644
--- a/llvm/include/llvm/Object/SymbolicFile.h
+++ b/llvm/include/llvm/Object/SymbolicFile.h
@@ -13,21 +13,23 @@
#ifndef LLVM_OBJECT_SYMBOLICFILE_H
#define LLVM_OBJECT_SYMBOLICFILE_H
-#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/BinaryFormat/Magic.h"
#include "llvm/Object/Binary.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/Format.h"
-#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/MemoryBufferRef.h"
#include <cinttypes>
#include <cstdint>
#include <cstring>
#include <iterator>
#include <memory>
-#include <system_error>
namespace llvm {
+
+class LLVMContext;
+class raw_ostream;
+
namespace object {
union DataRefImpl {
diff --git a/llvm/include/llvm/Object/TapiFile.h b/llvm/include/llvm/Object/TapiFile.h
index ffa27fdf9654..410e58dceaf4 100644
--- a/llvm/include/llvm/Object/TapiFile.h
+++ b/llvm/include/llvm/Object/TapiFile.h
@@ -14,13 +14,22 @@
#define LLVM_OBJECT_TAPIFILE_H
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/iterator_range.h"
+#include "llvm/Object/Binary.h"
#include "llvm/Object/SymbolicFile.h"
#include "llvm/Support/Error.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/TextAPI/InterfaceFile.h"
+#include "llvm/Support/MemoryBufferRef.h"
+#include "llvm/TextAPI/Architecture.h"
namespace llvm {
+
+class raw_ostream;
+
+namespace MachO {
+
+class InterfaceFile;
+
+}
+
namespace object {
class TapiFile : public SymbolicFile {
diff --git a/llvm/include/llvm/Object/TapiUniversal.h b/llvm/include/llvm/Object/TapiUniversal.h
index ab548aa5bb2a..fff66c28c1a4 100644
--- a/llvm/include/llvm/Object/TapiUniversal.h
+++ b/llvm/include/llvm/Object/TapiUniversal.h
@@ -13,16 +13,18 @@
#ifndef LLVM_OBJECT_TAPIUNIVERSAL_H
#define LLVM_OBJECT_TAPIUNIVERSAL_H
+#include "llvm/ADT/StringRef.h"
#include "llvm/Object/Binary.h"
-#include "llvm/Object/TapiFile.h"
#include "llvm/Support/Error.h"
-#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/MemoryBufferRef.h"
#include "llvm/TextAPI/Architecture.h"
#include "llvm/TextAPI/InterfaceFile.h"
namespace llvm {
namespace object {
+class TapiFile;
+
class TapiUniversal : public Binary {
public:
class ObjectForArch {
diff --git a/llvm/include/llvm/Object/Wasm.h b/llvm/include/llvm/Object/Wasm.h
index e4802c087b8b..abe0f6f528cc 100644
--- a/llvm/include/llvm/Object/Wasm.h
+++ b/llvm/include/llvm/Object/Wasm.h
@@ -287,7 +287,6 @@ private:
uint32_t StartFunction = -1;
bool HasLinkingSection = false;
bool HasDylinkSection = false;
- bool SeenCodeSection = false;
bool HasMemory64 = false;
wasm::WasmLinkingData LinkingData;
uint32_t NumImportedGlobals = 0;
diff --git a/llvm/include/llvm/Object/WindowsResource.h b/llvm/include/llvm/Object/WindowsResource.h
index b8fad299c693..acda9e2659b1 100644
--- a/llvm/include/llvm/Object/WindowsResource.h
+++ b/llvm/include/llvm/Object/WindowsResource.h
@@ -31,7 +31,6 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/Object/Binary.h"
-#include "llvm/Object/COFF.h"
#include "llvm/Object/Error.h"
#include "llvm/Support/BinaryByteStream.h"
#include "llvm/Support/BinaryStreamReader.h"
@@ -50,6 +49,7 @@ namespace object {
class WindowsResource;
class ResourceSectionRef;
+struct coff_resource_dir_table;
const size_t WIN_RES_MAGIC_SIZE = 16;
const size_t WIN_RES_NULL_ENTRY_SIZE = 16;
diff --git a/llvm/include/llvm/Object/XCOFFObjectFile.h b/llvm/include/llvm/Object/XCOFFObjectFile.h
index ac911e534f34..68d9afff887c 100644
--- a/llvm/include/llvm/Object/XCOFFObjectFile.h
+++ b/llvm/include/llvm/Object/XCOFFObjectFile.h
@@ -60,10 +60,13 @@ public:
return static_cast<const T *>(this)->FlagAndTDataAlignment &
AuxiHeaderFlagMask;
}
+
uint8_t getTDataAlignment() const {
return static_cast<const T *>(this)->FlagAndTDataAlignment &
AuxiHeaderTDataAlignmentMask;
}
+
+ uint16_t getVersion() const { return static_cast<const T *>(this)->Version; }
};
struct XCOFFAuxiliaryHeader32 : XCOFFAuxiliaryHeader<XCOFFAuxiliaryHeader32> {
@@ -113,7 +116,7 @@ struct XCOFFAuxiliaryHeader32 : XCOFFAuxiliaryHeader<XCOFFAuxiliaryHeader32> {
support::ubig16_t SecNumOfTBSS;
};
-struct XCOFFAuxiliaryHeader64 : XCOFFAuxiliaryHeader<XCOFFAuxiliaryHeader32> {
+struct XCOFFAuxiliaryHeader64 : XCOFFAuxiliaryHeader<XCOFFAuxiliaryHeader64> {
support::ubig16_t AuxMagic;
support::ubig16_t Version;
support::ubig32_t ReservedForDebugger;
@@ -448,9 +451,6 @@ private:
const void *SymbolTblPtr = nullptr;
XCOFFStringTable StringTable = {0, nullptr};
- const XCOFFFileHeader32 *fileHeader32() const;
- const XCOFFFileHeader64 *fileHeader64() const;
-
const XCOFFSectionHeader32 *sectionHeaderTable32() const;
const XCOFFSectionHeader64 *sectionHeaderTable64() const;
template <typename T> const T *sectionHeaderTable() const;
@@ -548,6 +548,8 @@ public:
// Below here is the non-inherited interface.
bool is64Bit() const;
+ Expected<StringRef> getRawData(const char *Start, uint64_t Size,
+ StringRef Name) const;
const XCOFFAuxiliaryHeader32 *auxiliaryHeader32() const;
const XCOFFAuxiliaryHeader64 *auxiliaryHeader64() const;
@@ -559,6 +561,8 @@ public:
XCOFFSymbolRef toSymbolRef(DataRefImpl Ref) const;
// File header related interfaces.
+ const XCOFFFileHeader32 *fileHeader32() const;
+ const XCOFFFileHeader64 *fileHeader64() const;
uint16_t getMagic() const;
uint16_t getNumberOfSections() const;
int32_t getTimeStamp() const;
@@ -687,6 +691,9 @@ public:
Entry32 = reinterpret_cast<const XCOFFSymbolEntry32 *>(SymEntDataRef.p);
}
+ const XCOFFSymbolEntry32 *getSymbol32() { return Entry32; }
+ const XCOFFSymbolEntry64 *getSymbol64() { return Entry64; }
+
uint64_t getValue() const { return Entry32 ? getValue32() : getValue64(); }
uint32_t getValue32() const { return Entry32->Value; }
diff --git a/llvm/include/llvm/ObjectYAML/DXContainerYAML.h b/llvm/include/llvm/ObjectYAML/DXContainerYAML.h
new file mode 100644
index 000000000000..d1c0cd912d97
--- /dev/null
+++ b/llvm/include/llvm/ObjectYAML/DXContainerYAML.h
@@ -0,0 +1,101 @@
+//===- DXContainerYAML.h - DXContainer YAMLIO implementation ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file declares classes for handling the YAML representation
+/// of DXContainer.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECTYAML_DXCONTAINERYAML_H
+#define LLVM_OBJECTYAML_DXCONTAINERYAML_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ObjectYAML/YAML.h"
+#include "llvm/Support/YAMLTraits.h"
+#include <cstdint>
+#include <string>
+#include <vector>
+
+namespace llvm {
+namespace DXContainerYAML {
+
+struct VersionTuple {
+ uint16_t Major;
+ uint16_t Minor;
+};
+
+// The optional header fields are required in the binary and will be populated
+// when reading from binary, but can be omitted in the YAML text because the
+// emitter can calculate them.
+struct FileHeader {
+ std::vector<llvm::yaml::Hex8> Hash;
+ VersionTuple Version;
+ Optional<uint32_t> FileSize;
+ uint32_t PartCount;
+ Optional<std::vector<uint32_t>> PartOffsets;
+};
+
+struct DXILProgram {
+ uint8_t MajorVersion;
+ uint8_t MinorVersion;
+ uint16_t ShaderKind;
+ Optional<uint32_t> Size;
+ uint16_t DXILMajorVersion;
+ uint16_t DXILMinorVersion;
+ Optional<uint32_t> DXILOffset;
+ Optional<uint32_t> DXILSize;
+ Optional<std::vector<llvm::yaml::Hex8>> DXIL;
+};
+
+struct Part {
+ std::string Name;
+ uint32_t Size;
+ Optional<DXILProgram> Program;
+};
+
+struct Object {
+ FileHeader Header;
+ std::vector<Part> Parts;
+};
+
+} // namespace DXContainerYAML
+} // namespace llvm
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DXContainerYAML::Part)
+namespace llvm {
+
+class raw_ostream;
+
+namespace yaml {
+
+template <> struct MappingTraits<DXContainerYAML::VersionTuple> {
+ static void mapping(IO &IO, DXContainerYAML::VersionTuple &Version);
+};
+
+template <> struct MappingTraits<DXContainerYAML::FileHeader> {
+ static void mapping(IO &IO, DXContainerYAML::FileHeader &Header);
+};
+
+template <> struct MappingTraits<DXContainerYAML::DXILProgram> {
+ static void mapping(IO &IO, DXContainerYAML::DXILProgram &Program);
+};
+
+template <> struct MappingTraits<DXContainerYAML::Part> {
+ static void mapping(IO &IO, DXContainerYAML::Part &Version);
+};
+
+template <> struct MappingTraits<DXContainerYAML::Object> {
+ static void mapping(IO &IO, DXContainerYAML::Object &Obj);
+};
+
+} // namespace yaml
+
+} // namespace llvm
+
+#endif // LLVM_OBJECTYAML_DXCONTAINERYAML_H
diff --git a/llvm/include/llvm/ObjectYAML/ELFYAML.h b/llvm/include/llvm/ObjectYAML/ELFYAML.h
index 92a9f78ce7bf..ddd5dd9cf3c9 100644
--- a/llvm/include/llvm/ObjectYAML/ELFYAML.h
+++ b/llvm/include/llvm/ObjectYAML/ELFYAML.h
@@ -161,6 +161,8 @@ struct BBAddrMapEntry {
llvm::yaml::Hex64 Size;
llvm::yaml::Hex64 Metadata;
};
+ uint8_t Version;
+ llvm::yaml::Hex8 Feature;
llvm::yaml::Hex64 Address;
Optional<uint64_t> NumBlocks;
Optional<std::vector<BBEntry>> BBEntries;
@@ -317,7 +319,7 @@ struct BBAddrMapSection : Section {
BBAddrMapSection() : Section(ChunkKind::BBAddrMap) {}
std::vector<std::pair<StringRef, bool>> getEntries() const override {
- return {{"Entries", Entries.hasValue()}};
+ return {{"Entries", Entries.has_value()}};
};
static bool classof(const Chunk *S) {
@@ -331,7 +333,7 @@ struct StackSizesSection : Section {
StackSizesSection() : Section(ChunkKind::StackSizes) {}
std::vector<std::pair<StringRef, bool>> getEntries() const override {
- return {{"Entries", Entries.hasValue()}};
+ return {{"Entries", Entries.has_value()}};
};
static bool classof(const Chunk *S) {
@@ -349,7 +351,7 @@ struct DynamicSection : Section {
DynamicSection() : Section(ChunkKind::Dynamic) {}
std::vector<std::pair<StringRef, bool>> getEntries() const override {
- return {{"Entries", Entries.hasValue()}};
+ return {{"Entries", Entries.has_value()}};
};
static bool classof(const Chunk *S) { return S->Kind == ChunkKind::Dynamic; }
@@ -380,7 +382,7 @@ struct NoteSection : Section {
NoteSection() : Section(ChunkKind::Note) {}
std::vector<std::pair<StringRef, bool>> getEntries() const override {
- return {{"Notes", Notes.hasValue()}};
+ return {{"Notes", Notes.has_value()}};
};
static bool classof(const Chunk *S) { return S->Kind == ChunkKind::Note; }
@@ -391,7 +393,7 @@ struct HashSection : Section {
Optional<std::vector<uint32_t>> Chain;
std::vector<std::pair<StringRef, bool>> getEntries() const override {
- return {{"Bucket", Bucket.hasValue()}, {"Chain", Chain.hasValue()}};
+ return {{"Bucket", Bucket.has_value()}, {"Chain", Chain.has_value()}};
};
// The following members are used to override section fields.
@@ -433,10 +435,10 @@ struct GnuHashSection : Section {
GnuHashSection() : Section(ChunkKind::GnuHash) {}
std::vector<std::pair<StringRef, bool>> getEntries() const override {
- return {{"Header", Header.hasValue()},
- {"BloomFilter", BloomFilter.hasValue()},
- {"HashBuckets", HashBuckets.hasValue()},
- {"HashValues", HashValues.hasValue()}};
+ return {{"Header", Header.has_value()},
+ {"BloomFilter", BloomFilter.has_value()},
+ {"HashBuckets", HashBuckets.has_value()},
+ {"HashValues", HashValues.has_value()}};
};
static bool classof(const Chunk *S) { return S->Kind == ChunkKind::GnuHash; }
@@ -462,7 +464,7 @@ struct VerneedSection : Section {
VerneedSection() : Section(ChunkKind::Verneed) {}
std::vector<std::pair<StringRef, bool>> getEntries() const override {
- return {{"Dependencies", VerneedV.hasValue()}};
+ return {{"Dependencies", VerneedV.has_value()}};
};
static bool classof(const Chunk *S) {
@@ -476,7 +478,7 @@ struct AddrsigSection : Section {
AddrsigSection() : Section(ChunkKind::Addrsig) {}
std::vector<std::pair<StringRef, bool>> getEntries() const override {
- return {{"Symbols", Symbols.hasValue()}};
+ return {{"Symbols", Symbols.has_value()}};
};
static bool classof(const Chunk *S) { return S->Kind == ChunkKind::Addrsig; }
@@ -493,7 +495,7 @@ struct LinkerOptionsSection : Section {
LinkerOptionsSection() : Section(ChunkKind::LinkerOptions) {}
std::vector<std::pair<StringRef, bool>> getEntries() const override {
- return {{"Options", Options.hasValue()}};
+ return {{"Options", Options.has_value()}};
};
static bool classof(const Chunk *S) {
@@ -507,7 +509,7 @@ struct DependentLibrariesSection : Section {
DependentLibrariesSection() : Section(ChunkKind::DependentLibraries) {}
std::vector<std::pair<StringRef, bool>> getEntries() const override {
- return {{"Libraries", Libs.hasValue()}};
+ return {{"Libraries", Libs.has_value()}};
};
static bool classof(const Chunk *S) {
@@ -527,7 +529,7 @@ struct CallGraphProfileSection : Section {
CallGraphProfileSection() : Section(ChunkKind::CallGraphProfile) {}
std::vector<std::pair<StringRef, bool>> getEntries() const override {
- return {{"Entries", Entries.hasValue()}};
+ return {{"Entries", Entries.has_value()}};
};
static bool classof(const Chunk *S) {
@@ -541,7 +543,7 @@ struct SymverSection : Section {
SymverSection() : Section(ChunkKind::Symver) {}
std::vector<std::pair<StringRef, bool>> getEntries() const override {
- return {{"Entries", Entries.hasValue()}};
+ return {{"Entries", Entries.has_value()}};
};
static bool classof(const Chunk *S) { return S->Kind == ChunkKind::Symver; }
@@ -562,7 +564,7 @@ struct VerdefSection : Section {
VerdefSection() : Section(ChunkKind::Verdef) {}
std::vector<std::pair<StringRef, bool>> getEntries() const override {
- return {{"Entries", Entries.hasValue()}};
+ return {{"Entries", Entries.has_value()}};
};
static bool classof(const Chunk *S) { return S->Kind == ChunkKind::Verdef; }
@@ -577,7 +579,7 @@ struct GroupSection : Section {
GroupSection() : Section(ChunkKind::Group) {}
std::vector<std::pair<StringRef, bool>> getEntries() const override {
- return {{"Members", Members.hasValue()}};
+ return {{"Members", Members.has_value()}};
};
static bool classof(const Chunk *S) { return S->Kind == ChunkKind::Group; }
@@ -597,7 +599,7 @@ struct RelocationSection : Section {
RelocationSection() : Section(ChunkKind::Relocation) {}
std::vector<std::pair<StringRef, bool>> getEntries() const override {
- return {{"Relocations", Relocations.hasValue()}};
+ return {{"Relocations", Relocations.has_value()}};
};
static bool classof(const Chunk *S) {
@@ -611,7 +613,7 @@ struct RelrSection : Section {
RelrSection() : Section(ChunkKind::Relr) {}
std::vector<std::pair<StringRef, bool>> getEntries() const override {
- return {{"Entries", Entries.hasValue()}};
+ return {{"Entries", Entries.has_value()}};
};
static bool classof(const Chunk *S) {
@@ -625,7 +627,7 @@ struct SymtabShndxSection : Section {
SymtabShndxSection() : Section(ChunkKind::SymtabShndxSection) {}
std::vector<std::pair<StringRef, bool>> getEntries() const override {
- return {{"Entries", Entries.hasValue()}};
+ return {{"Entries", Entries.has_value()}};
};
static bool classof(const Chunk *S) {
@@ -644,7 +646,7 @@ struct ARMIndexTableSection : Section {
ARMIndexTableSection() : Section(ChunkKind::ARMIndexTable) {}
std::vector<std::pair<StringRef, bool>> getEntries() const override {
- return {{"Entries", Entries.hasValue()}};
+ return {{"Entries", Entries.has_value()}};
};
static bool classof(const Chunk *S) {
@@ -720,6 +722,7 @@ struct Object {
llvm_unreachable("the section header table chunk must always be present");
}
+ ELF_ELFOSABI getOSAbi() const;
unsigned getMachine() const;
};
diff --git a/llvm/include/llvm/ObjectYAML/MachOYAML.h b/llvm/include/llvm/ObjectYAML/MachOYAML.h
index 38a7de3d6131..095377c1b824 100644
--- a/llvm/include/llvm/ObjectYAML/MachOYAML.h
+++ b/llvm/include/llvm/ObjectYAML/MachOYAML.h
@@ -122,6 +122,7 @@ struct LinkEditData {
std::vector<NListEntry> NameList;
std::vector<StringRef> StringTable;
std::vector<yaml::Hex32> IndirectSymbols;
+ std::vector<yaml::Hex64> FunctionStarts;
bool isEmpty() const;
};
diff --git a/llvm/include/llvm/ObjectYAML/ObjectYAML.h b/llvm/include/llvm/ObjectYAML/ObjectYAML.h
index 312777aadd4c..b63607e6796b 100644
--- a/llvm/include/llvm/ObjectYAML/ObjectYAML.h
+++ b/llvm/include/llvm/ObjectYAML/ObjectYAML.h
@@ -11,9 +11,11 @@
#include "llvm/ObjectYAML/ArchiveYAML.h"
#include "llvm/ObjectYAML/COFFYAML.h"
+#include "llvm/ObjectYAML/DXContainerYAML.h"
#include "llvm/ObjectYAML/ELFYAML.h"
#include "llvm/ObjectYAML/MachOYAML.h"
#include "llvm/ObjectYAML/MinidumpYAML.h"
+#include "llvm/ObjectYAML/OffloadYAML.h"
#include "llvm/ObjectYAML/WasmYAML.h"
#include "llvm/ObjectYAML/XCOFFYAML.h"
#include "llvm/Support/YAMLTraits.h"
@@ -31,8 +33,10 @@ struct YamlObjectFile {
std::unique_ptr<MachOYAML::Object> MachO;
std::unique_ptr<MachOYAML::UniversalBinary> FatMachO;
std::unique_ptr<MinidumpYAML::Object> Minidump;
+ std::unique_ptr<OffloadYAML::Binary> Offload;
std::unique_ptr<WasmYAML::Object> Wasm;
std::unique_ptr<XCOFFYAML::Object> Xcoff;
+ std::unique_ptr<DXContainerYAML::Object> DXContainer;
};
template <> struct MappingTraits<YamlObjectFile> {
diff --git a/llvm/include/llvm/ObjectYAML/OffloadYAML.h b/llvm/include/llvm/ObjectYAML/OffloadYAML.h
new file mode 100644
index 000000000000..a4fdbce0b320
--- /dev/null
+++ b/llvm/include/llvm/ObjectYAML/OffloadYAML.h
@@ -0,0 +1,79 @@
+//===- OffloadYAML.h - Offload Binary YAMLIO implementation -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file declares classes for handling the YAML representation of
+/// offloading binaries.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECTYAML_OFFLOADYAML_H
+#define LLVM_OBJECTYAML_OFFLOADYAML_H
+
+#include "llvm/ADT/MapVector.h"
+#include "llvm/Object/OffloadBinary.h"
+#include "llvm/ObjectYAML/YAML.h"
+#include "llvm/Support/YAMLTraits.h"
+
+namespace llvm {
+namespace OffloadYAML {
+
+struct Binary {
+ struct StringEntry {
+ StringRef Key;
+ StringRef Value;
+ };
+
+ struct Member {
+ Optional<object::ImageKind> ImageKind;
+ Optional<object::OffloadKind> OffloadKind;
+ Optional<uint32_t> Flags;
+ Optional<std::vector<StringEntry>> StringEntries;
+ Optional<yaml::BinaryRef> Content;
+ };
+
+ Optional<uint32_t> Version;
+ Optional<uint64_t> Size;
+ Optional<uint64_t> EntryOffset;
+ Optional<uint64_t> EntrySize;
+ std::vector<Member> Members;
+};
+
+} // end namespace OffloadYAML
+} // end namespace llvm
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::OffloadYAML::Binary::Member)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::OffloadYAML::Binary::StringEntry)
+
+namespace llvm {
+namespace yaml {
+
+template <> struct ScalarEnumerationTraits<object::ImageKind> {
+ static void enumeration(IO &IO, object::ImageKind &Value);
+};
+
+template <> struct ScalarEnumerationTraits<object::OffloadKind> {
+ static void enumeration(IO &IO, object::OffloadKind &Value);
+};
+
+template <> struct MappingTraits<OffloadYAML::Binary> {
+ static void mapping(IO &IO, OffloadYAML::Binary &O);
+};
+
+template <> struct MappingTraits<OffloadYAML::Binary::StringEntry> {
+ static void mapping(IO &IO, OffloadYAML::Binary::StringEntry &M);
+};
+
+template <> struct MappingTraits<OffloadYAML::Binary::Member> {
+ static void mapping(IO &IO, OffloadYAML::Binary::Member &M);
+};
+
+} // end namespace yaml
+} // end namespace llvm
+
+#endif // LLVM_OBJECTYAML_ARCHIVEYAML_H
diff --git a/llvm/include/llvm/ObjectYAML/WasmYAML.h b/llvm/include/llvm/ObjectYAML/WasmYAML.h
index e3a1ba0d58a6..0f6c4f06665f 100644
--- a/llvm/include/llvm/ObjectYAML/WasmYAML.h
+++ b/llvm/include/llvm/ObjectYAML/WasmYAML.h
@@ -62,11 +62,20 @@ struct Export {
uint32_t Index;
};
+struct InitExpr {
+ InitExpr() {}
+ bool Extended;
+ union {
+ wasm::WasmInitExprMVP Inst;
+ yaml::BinaryRef Body;
+ };
+};
+
struct ElemSegment {
uint32_t Flags;
uint32_t TableNumber;
ValueType ElemKind;
- wasm::WasmInitExpr Offset;
+ InitExpr Offset;
std::vector<uint32_t> Functions;
};
@@ -74,19 +83,20 @@ struct Global {
uint32_t Index;
ValueType Type;
bool Mutable;
- wasm::WasmInitExpr InitExpr;
+ InitExpr Init;
};
struct Import {
+ Import() {}
StringRef Module;
StringRef Field;
ExportKind Kind;
union {
uint32_t SigIndex;
- Global GlobalImport;
Table TableImport;
Limits Memory;
uint32_t TagIndex;
+ Global GlobalImport;
};
};
@@ -114,7 +124,7 @@ struct DataSegment {
uint32_t SectionOffset;
uint32_t InitFlags;
uint32_t MemoryIndex;
- wasm::WasmInitExpr Offset;
+ InitExpr Offset;
yaml::BinaryRef Content;
};
@@ -526,8 +536,8 @@ template <> struct MappingTraits<WasmYAML::LocalDecl> {
static void mapping(IO &IO, WasmYAML::LocalDecl &LocalDecl);
};
-template <> struct MappingTraits<wasm::WasmInitExpr> {
- static void mapping(IO &IO, wasm::WasmInitExpr &Expr);
+template <> struct MappingTraits<WasmYAML::InitExpr> {
+ static void mapping(IO &IO, WasmYAML::InitExpr &Expr);
};
template <> struct MappingTraits<WasmYAML::DataSegment> {
diff --git a/llvm/include/llvm/ObjectYAML/yaml2obj.h b/llvm/include/llvm/ObjectYAML/yaml2obj.h
index 468f673fd451..000da077bb18 100644
--- a/llvm/include/llvm/ObjectYAML/yaml2obj.h
+++ b/llvm/include/llvm/ObjectYAML/yaml2obj.h
@@ -36,6 +36,10 @@ namespace MinidumpYAML {
struct Object;
}
+namespace OffloadYAML {
+struct Binary;
+}
+
namespace WasmYAML {
struct Object;
}
@@ -48,6 +52,10 @@ namespace ArchYAML {
struct Archive;
}
+namespace DXContainerYAML {
+struct Object;
+} // namespace DXContainerYAML
+
namespace yaml {
class Input;
struct YamlObjectFile;
@@ -61,8 +69,11 @@ bool yaml2elf(ELFYAML::Object &Doc, raw_ostream &Out, ErrorHandler EH,
bool yaml2macho(YamlObjectFile &Doc, raw_ostream &Out, ErrorHandler EH);
bool yaml2minidump(MinidumpYAML::Object &Doc, raw_ostream &Out,
ErrorHandler EH);
+bool yaml2offload(OffloadYAML::Binary &Doc, raw_ostream &Out, ErrorHandler EH);
bool yaml2wasm(WasmYAML::Object &Doc, raw_ostream &Out, ErrorHandler EH);
bool yaml2xcoff(XCOFFYAML::Object &Doc, raw_ostream &Out, ErrorHandler EH);
+bool yaml2dxcontainer(DXContainerYAML::Object &Doc, raw_ostream &Out,
+ ErrorHandler EH);
bool convertYAML(Input &YIn, raw_ostream &Out, ErrorHandler ErrHandler,
unsigned DocNum = 1, uint64_t MaxSize = UINT64_MAX);
diff --git a/llvm/include/llvm/Option/ArgList.h b/llvm/include/llvm/Option/ArgList.h
index 74897de52a93..6a07e1c657dc 100644
--- a/llvm/include/llvm/Option/ArgList.h
+++ b/llvm/include/llvm/Option/ArgList.h
@@ -298,14 +298,24 @@ public:
/// true if the option is present, false if the negation is present, and
/// \p Default if neither option is given. If both the option and its
/// negation are present, the last one wins.
- bool hasFlag(OptSpecifier Pos, OptSpecifier Neg, bool Default=true) const;
+ bool hasFlag(OptSpecifier Pos, OptSpecifier Neg, bool Default) const;
/// hasFlag - Given an option \p Pos, an alias \p PosAlias and its negative
/// form \p Neg, return true if the option or its alias is present, false if
/// the negation is present, and \p Default if none of the options are
/// given. If multiple options are present, the last one wins.
bool hasFlag(OptSpecifier Pos, OptSpecifier PosAlias, OptSpecifier Neg,
- bool Default = true) const;
+ bool Default) const;
+
+ /// Given an option Pos and its negative form Neg, render the option if Pos is
+ /// present.
+ void addOptInFlag(ArgStringList &Output, OptSpecifier Pos,
+ OptSpecifier Neg) const;
+ /// Render the option if Neg is present.
+ void addOptOutFlag(ArgStringList &Output, OptSpecifier Pos,
+ OptSpecifier Neg) const {
+ addOptInFlag(Output, Neg, Pos);
+ }
/// Render only the last argument match \p Id0, if present.
template<typename ...OptSpecifiers>
diff --git a/llvm/include/llvm/Pass.h b/llvm/include/llvm/Pass.h
index 8aa9ba90a9ca..6445e16ab68f 100644
--- a/llvm/include/llvm/Pass.h
+++ b/llvm/include/llvm/Pass.h
@@ -228,6 +228,16 @@ public:
template <typename AnalysisType>
AnalysisType &getAnalysisID(AnalysisID PI, Function &F,
bool *Changed = nullptr);
+
+#ifdef EXPENSIVE_CHECKS
+ /// Hash a module in order to detect when a module (or more specific) pass has
+ /// modified it.
+ uint64_t structuralHash(Module &M) const;
+
+ /// Hash a function in order to detect when a function (or more specific) pass
+ /// has modified it.
+ virtual uint64_t structuralHash(Function &F) const;
+#endif
};
//===----------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h
index 66b0b149fa25..0cbbdf7f3ce8 100644
--- a/llvm/include/llvm/Passes/PassBuilder.h
+++ b/llvm/include/llvm/Passes/PassBuilder.h
@@ -215,8 +215,9 @@ public:
/// only intended for use when attempting to optimize code. If frontends
/// require some transformations for semantic reasons, they should explicitly
/// build them.
- ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level,
- bool LTOPreLink = false);
+ ModulePassManager
+ buildModuleOptimizationPipeline(OptimizationLevel Level,
+ ThinOrFullLTOPhase LTOPhase);
/// Build a per-module default optimization pipeline.
///
@@ -470,6 +471,15 @@ public:
/// Register a callback for a default optimizer pipeline extension point
///
+ /// This extension point allows adding optimizations before the function
+ /// optimization pipeline.
+ void registerOptimizerEarlyEPCallback(
+ const std::function<void(ModulePassManager &, OptimizationLevel)> &C) {
+ OptimizerEarlyEPCallbacks.push_back(C);
+ }
+
+ /// Register a callback for a default optimizer pipeline extension point
+ ///
/// This extension point allows adding optimizations at the very end of the
/// function optimization pipeline.
void registerOptimizerLastEPCallback(
@@ -477,6 +487,24 @@ public:
OptimizerLastEPCallbacks.push_back(C);
}
+ /// Register a callback for a default optimizer pipeline extension point
+ ///
+ /// This extension point allows adding optimizations at the start of the full
+ /// LTO pipeline.
+ void registerFullLinkTimeOptimizationEarlyEPCallback(
+ const std::function<void(ModulePassManager &, OptimizationLevel)> &C) {
+ FullLinkTimeOptimizationEarlyEPCallbacks.push_back(C);
+ }
+
+ /// Register a callback for a default optimizer pipeline extension point
+ ///
+ /// This extension point allows adding optimizations at the end of the full
+ /// LTO pipeline.
+ void registerFullLinkTimeOptimizationLastEPCallback(
+ const std::function<void(ModulePassManager &, OptimizationLevel)> &C) {
+ FullLinkTimeOptimizationLastEPCallbacks.push_back(C);
+ }
+
/// Register a callback for parsing an AliasAnalysis Name to populate
/// the given AAManager \p AA
void registerParseAACallback(
@@ -582,7 +610,8 @@ private:
void addPGOInstrPasses(ModulePassManager &MPM, OptimizationLevel Level,
bool RunProfileGen, bool IsCS, std::string ProfileFile,
- std::string ProfileRemappingFile);
+ std::string ProfileRemappingFile,
+ ThinOrFullLTOPhase LTOPhase);
void invokePeepholeEPCallbacks(FunctionPassManager &, OptimizationLevel);
// Extension Point callbacks
@@ -598,9 +627,15 @@ private:
CGSCCOptimizerLateEPCallbacks;
SmallVector<std::function<void(FunctionPassManager &, OptimizationLevel)>, 2>
VectorizerStartEPCallbacks;
+ // Module callbacks
+ SmallVector<std::function<void(ModulePassManager &, OptimizationLevel)>, 2>
+ OptimizerEarlyEPCallbacks;
SmallVector<std::function<void(ModulePassManager &, OptimizationLevel)>, 2>
OptimizerLastEPCallbacks;
- // Module callbacks
+ SmallVector<std::function<void(ModulePassManager &, OptimizationLevel)>, 2>
+ FullLinkTimeOptimizationEarlyEPCallbacks;
+ SmallVector<std::function<void(ModulePassManager &, OptimizationLevel)>, 2>
+ FullLinkTimeOptimizationLastEPCallbacks;
SmallVector<std::function<void(ModulePassManager &, OptimizationLevel)>, 2>
PipelineStartEPCallbacks;
SmallVector<std::function<void(ModulePassManager &, OptimizationLevel)>, 2>
diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h
index 561cd54fa998..32ecc9ec5fb0 100644
--- a/llvm/include/llvm/Passes/StandardInstrumentations.h
+++ b/llvm/include/llvm/Passes/StandardInstrumentations.h
@@ -187,17 +187,6 @@ protected:
// Register required callbacks.
void registerRequiredCallbacks(PassInstrumentationCallbacks &PIC);
- // Return true when this is a defined function for which printing
- // of changes is desired.
- bool isInterestingFunction(const Function &F);
-
- // Return true when this is a pass for which printing of changes is desired.
- bool isInterestingPass(StringRef PassID);
-
- // Return true when this is a pass on IR for which printing
- // of changes is desired.
- bool isInteresting(Any IR, StringRef PassID);
-
// Called on the first IR processed.
virtual void handleInitialIR(Any IR) = 0;
// Called before and after a pass to get the representation of the IR.
@@ -491,6 +480,25 @@ protected:
std::unique_ptr<raw_fd_ostream> HTML;
};
+// Print IR on crash.
+class PrintCrashIRInstrumentation {
+public:
+ PrintCrashIRInstrumentation()
+ : SavedIR("*** Dump of IR Before Last Pass Unknown ***") {}
+ ~PrintCrashIRInstrumentation();
+ void registerCallbacks(PassInstrumentationCallbacks &PIC);
+ void reportCrashIR();
+
+protected:
+ std::string SavedIR;
+
+private:
+ // The crash reporter that will report on a crash.
+ static PrintCrashIRInstrumentation *CrashReporter;
+ // Crash handler registered when print-on-crash is specified.
+ static void SignalHandler(void *);
+};
+
/// This class provides an interface to register all the standard pass
/// instrumentations and manages their state (if any).
class StandardInstrumentations {
@@ -504,6 +512,7 @@ class StandardInstrumentations {
PseudoProbeVerifier PseudoProbeVerification;
InLineChangePrinter PrintChangedDiff;
DotCfgChangeReporter WebsiteChangeReporter;
+ PrintCrashIRInstrumentation PrintCrashIR;
VerifyInstrumentation Verify;
bool VerifyEach;
diff --git a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h
index e1f45019b1a9..e35751512245 100644
--- a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h
+++ b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h
@@ -195,11 +195,11 @@ public:
ArrayRef<CounterExpression> getExpressions() const { return Expressions; }
/// Return a counter that represents the expression that adds LHS and RHS.
- Counter add(Counter LHS, Counter RHS);
+ Counter add(Counter LHS, Counter RHS, bool Simplify = true);
/// Return a counter that represents the expression that subtracts RHS from
/// LHS.
- Counter subtract(Counter LHS, Counter RHS);
+ Counter subtract(Counter LHS, Counter RHS, bool Simplify = true);
};
using LineColPair = std::pair<unsigned, unsigned>;
diff --git a/llvm/include/llvm/ProfileData/GCOV.h b/llvm/include/llvm/ProfileData/GCOV.h
index ef6515d39144..fe56f84f28b6 100644
--- a/llvm/include/llvm/ProfileData/GCOV.h
+++ b/llvm/include/llvm/ProfileData/GCOV.h
@@ -14,9 +14,7 @@
#ifndef LLVM_PROFILEDATA_GCOV_H
#define LLVM_PROFILEDATA_GCOV_H
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
@@ -26,10 +24,8 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
-#include <cassert>
#include <cstddef>
#include <cstdint>
-#include <limits>
#include <map>
#include <memory>
#include <string>
diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index a416eb28906e..401d278cbd06 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -281,13 +281,21 @@ bool needsComdatForCounter(const Function &F, const Module &M);
/// An enum describing the attributes of an instrumented profile.
enum class InstrProfKind {
Unknown = 0x0,
- FE = 0x1, // A frontend clang profile, incompatible with other attrs.
- IR = 0x2, // An IR-level profile (default when -fprofile-generate is used).
- BB = 0x4, // A profile with entry basic block instrumentation.
- CS = 0x8, // A context sensitive IR-level profile.
- SingleByteCoverage = 0x10, // Use single byte probes for coverage.
- FunctionEntryOnly = 0x20, // Only instrument the function entry basic block.
- LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/FunctionEntryOnly)
+ // A frontend clang profile, incompatible with other attrs.
+ FrontendInstrumentation = 0x1,
+ // An IR-level profile (default when -fprofile-generate is used).
+ IRInstrumentation = 0x2,
+ // A profile with entry basic block instrumentation.
+ FunctionEntryInstrumentation = 0x4,
+ // A context sensitive IR-level profile.
+ ContextSensitive = 0x8,
+ // Use single byte probes for coverage.
+ SingleByteCoverage = 0x10,
+ // Only instrument the function entry basic block.
+ FunctionEntryOnly = 0x20,
+ // A memory profile collected using -fprofile=memory.
+ MemProf = 0x40,
+ LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/MemProf)
};
const std::error_category &instrprof_category();
@@ -1011,7 +1019,9 @@ enum ProfVersion {
Version6 = 6,
// An additional counter is added around logical operators.
Version7 = 7,
- // The current version is 7.
+ // An additional (optional) memory profile type is added.
+ Version8 = 8,
+ // The current version is 8.
CurrentVersion = INSTR_PROF_INDEX_VERSION
};
const uint64_t Version = ProfVersion::CurrentVersion;
@@ -1028,6 +1038,21 @@ struct Header {
uint64_t Unused; // Becomes unused since version 4
uint64_t HashType;
uint64_t HashOffset;
+ uint64_t MemProfOffset;
+ // New fields should only be added at the end to ensure that the size
+ // computation is correct. The methods below need to be updated to ensure that
+ // the new field is read correctly.
+
+ // Reads a header struct from the buffer.
+ static Expected<Header> readFromBuffer(const unsigned char *Buffer);
+
+ // Returns the size of the header in bytes for all valid fields based on the
+ // version. I.e a older version header will return a smaller size.
+ size_t size() const;
+
+ // Returns the format version in little endian. The header retains the version
+ // in native endian of the compiler runtime.
+ uint64_t formatVersion() const;
};
// Profile summary data recorded in the profile data file in indexed
diff --git a/llvm/include/llvm/ProfileData/InstrProfCorrelator.h b/llvm/include/llvm/ProfileData/InstrProfCorrelator.h
index 3d0076fd9035..79995c813266 100644
--- a/llvm/include/llvm/ProfileData/InstrProfCorrelator.h
+++ b/llvm/include/llvm/ProfileData/InstrProfCorrelator.h
@@ -13,16 +13,17 @@
#define LLVM_PROFILEDATA_INSTRPROFCORRELATOR_H
#include "llvm/ADT/DenseSet.h"
-#include "llvm/DebugInfo/DWARF/DWARFContext.h"
-#include "llvm/Object/Binary.h"
-#include "llvm/Object/ObjectFile.h"
#include "llvm/ProfileData/InstrProf.h"
-#include "llvm/Support/Casting.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/MemoryBuffer.h"
#include <vector>
namespace llvm {
+class DWARFContext;
+class DWARFDie;
+namespace object {
+class ObjectFile;
+}
/// InstrProfCorrelator - A base class used to create raw instrumentation data
/// to their functions.
diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc
index 62054a6a3df5..282620d8b5dc 100644
--- a/llvm/include/llvm/ProfileData/InstrProfData.inc
+++ b/llvm/include/llvm/ProfileData/InstrProfData.inc
@@ -650,7 +650,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
/* Raw profile format version (start from 1). */
#define INSTR_PROF_RAW_VERSION 8
/* Indexed profile format version (start from 1). */
-#define INSTR_PROF_INDEX_VERSION 7
+#define INSTR_PROF_INDEX_VERSION 8
/* Coverage mapping format version (start from 0). */
#define INSTR_PROF_COVMAP_VERSION 5
@@ -662,6 +662,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
* The 59th bit indicates whether to use debug info to correlate profiles.
* The 60th bit indicates single byte coverage instrumentation.
* The 61st bit indicates function entry instrumentation only.
+ * The 62nd bit indicates whether memory profile information is present.
*/
#define VARIANT_MASKS_ALL 0xff00000000000000ULL
#define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL)
@@ -671,6 +672,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
#define VARIANT_MASK_DBG_CORRELATE (0x1ULL << 59)
#define VARIANT_MASK_BYTE_COVERAGE (0x1ULL << 60)
#define VARIANT_MASK_FUNCTION_ENTRY_ONLY (0x1ULL << 61)
+#define VARIANT_MASK_MEMPROF (0x1ULL << 62)
#define INSTR_PROF_RAW_VERSION_VAR __llvm_profile_raw_version
#define INSTR_PROF_PROFILE_RUNTIME_VAR __llvm_profile_runtime
#define INSTR_PROF_PROFILE_COUNTER_BIAS_VAR __llvm_profile_counter_bias
diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h
index e9dd19a69792..3a25de05bbf1 100644
--- a/llvm/include/llvm/ProfileData/InstrProfReader.h
+++ b/llvm/include/llvm/ProfileData/InstrProfReader.h
@@ -19,6 +19,7 @@
#include "llvm/IR/ProfileSummary.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/InstrProfCorrelator.h"
+#include "llvm/ProfileData/MemProf.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/LineIterator.h"
@@ -39,25 +40,36 @@ namespace llvm {
class InstrProfReader;
/// A file format agnostic iterator over profiling data.
+template <class record_type = NamedInstrProfRecord,
+ class reader_type = InstrProfReader>
class InstrProfIterator {
public:
using iterator_category = std::input_iterator_tag;
- using value_type = NamedInstrProfRecord;
+ using value_type = record_type;
using difference_type = std::ptrdiff_t;
using pointer = value_type *;
using reference = value_type &;
private:
- InstrProfReader *Reader = nullptr;
+ reader_type *Reader = nullptr;
value_type Record;
- void Increment();
+ void increment() {
+ if (Error E = Reader->readNextRecord(Record)) {
+ // Handle errors in the reader.
+ InstrProfError::take(std::move(E));
+ *this = InstrProfIterator();
+ }
+ }
public:
InstrProfIterator() = default;
- InstrProfIterator(InstrProfReader *Reader) : Reader(Reader) { Increment(); }
+ InstrProfIterator(reader_type *Reader) : Reader(Reader) { increment(); }
- InstrProfIterator &operator++() { Increment(); return *this; }
+ InstrProfIterator &operator++() {
+ increment();
+ return *this;
+ }
bool operator==(const InstrProfIterator &RHS) const {
return Reader == RHS.Reader;
}
@@ -88,8 +100,8 @@ public:
virtual Error printBinaryIds(raw_ostream &OS) { return success(); };
/// Iterator over profile data.
- InstrProfIterator begin() { return InstrProfIterator(this); }
- InstrProfIterator end() { return InstrProfIterator(); }
+ InstrProfIterator<> begin() { return InstrProfIterator<>(this); }
+ InstrProfIterator<> end() { return InstrProfIterator<>(); }
virtual bool isIRLevelProfile() const = 0;
@@ -201,15 +213,16 @@ public:
static bool hasFormat(const MemoryBuffer &Buffer);
bool isIRLevelProfile() const override {
- return static_cast<bool>(ProfileKind & InstrProfKind::IR);
+ return static_cast<bool>(ProfileKind & InstrProfKind::IRInstrumentation);
}
bool hasCSIRLevelProfile() const override {
- return static_cast<bool>(ProfileKind & InstrProfKind::CS);
+ return static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive);
}
bool instrEntryBBEnabled() const override {
- return static_cast<bool>(ProfileKind & InstrProfKind::BB);
+ return static_cast<bool>(ProfileKind &
+ InstrProfKind::FunctionEntryInstrumentation);
}
bool hasSingleByteCoverage() const override {
@@ -460,6 +473,11 @@ struct InstrProfReaderIndexBase {
using OnDiskHashTableImplV3 =
OnDiskIterableChainedHashTable<InstrProfLookupTrait>;
+using MemProfRecordHashTable =
+ OnDiskIterableChainedHashTable<memprof::RecordLookupTrait>;
+using MemProfFrameHashTable =
+ OnDiskIterableChainedHashTable<memprof::FrameLookupTrait>;
+
template <typename HashTableImpl>
class InstrProfReaderItaniumRemapper;
@@ -545,6 +563,13 @@ private:
std::unique_ptr<ProfileSummary> Summary;
/// Context sensitive profile summary data.
std::unique_ptr<ProfileSummary> CS_Summary;
+ /// MemProf profile schema (if available).
+ memprof::MemProfSchema Schema;
+ /// MemProf record profile data on-disk indexed via llvm::md5(FunctionName).
+ std::unique_ptr<MemProfRecordHashTable> MemProfRecordTable;
+ /// MemProf frame profile data on-disk indexed via frame id.
+ std::unique_ptr<MemProfFrameHashTable> MemProfFrameTable;
+
// Index to the current record in the record array.
unsigned RecordIndex;
@@ -598,6 +623,10 @@ public:
Expected<InstrProfRecord> getInstrProfRecord(StringRef FuncName,
uint64_t FuncHash);
+ /// Return the memprof record for the function identified by
+ /// llvm::md5(Name).
+ Expected<memprof::MemProfRecord> getMemProfRecord(uint64_t FuncNameHash);
+
/// Fill Counts with the profile data for the given function name.
Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash,
std::vector<uint64_t> &Counts);
diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h
index af1e46cf4fc2..29e07961a2f4 100644
--- a/llvm/include/llvm/ProfileData/InstrProfWriter.h
+++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h
@@ -15,11 +15,13 @@
#define LLVM_PROFILEDATA_INSTRPROFWRITER_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/StringMap.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/ProfileData/MemProf.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
-#include "llvm/Support/MemoryBuffer.h"
#include <cstdint>
#include <memory>
@@ -28,6 +30,7 @@ namespace llvm {
/// Writer for instrumentation based profile data.
class InstrProfRecordWriterTrait;
class ProfOStream;
+class MemoryBuffer;
class raw_fd_ostream;
class InstrProfWriter {
@@ -37,6 +40,16 @@ public:
private:
bool Sparse;
StringMap<ProfilingData> FunctionData;
+
+ // A map to hold memprof data per function. The lower 64 bits obtained from
+ // the md5 hash of the function name is used to index into the map.
+ llvm::MapVector<GlobalValue::GUID, memprof::IndexedMemProfRecord>
+ MemProfRecordData;
+ // A map to hold frame id to frame mappings. The mappings are used to
+ // convert IndexedMemProfRecord to MemProfRecords with frame information
+ // inline.
+ llvm::MapVector<memprof::FrameId, memprof::Frame> MemProfFrameData;
+
// An enum describing the attributes of the profile.
InstrProfKind ProfileKind = InstrProfKind::Unknown;
// Use raw pointer here for the incomplete type object.
@@ -57,6 +70,15 @@ public:
addRecord(std::move(I), 1, Warn);
}
+ /// Add a memprof record for a function identified by its \p Id.
+ void addMemProfRecord(const GlobalValue::GUID Id,
+ const memprof::IndexedMemProfRecord &Record);
+
+ /// Add a memprof frame identified by the hash of the contents of the frame in
+ /// \p FrameId.
+ bool addMemProfFrame(const memprof::FrameId, const memprof::Frame &F,
+ function_ref<void(Error)> Warn);
+
/// Merge existing function counts from the given writer.
void mergeRecordsFromWriter(InstrProfWriter &&IPW,
function_ref<void(Error)> Warn);
@@ -97,11 +119,13 @@ public:
// Check if the profiles are in-compatible. Clang frontend profiles can't be
// merged with other profile types.
- if (static_cast<bool>((ProfileKind & InstrProfKind::FE) ^
- (Other & InstrProfKind::FE))) {
+ if (static_cast<bool>(
+ (ProfileKind & InstrProfKind::FrontendInstrumentation) ^
+ (Other & InstrProfKind::FrontendInstrumentation))) {
return make_error<InstrProfError>(instrprof_error::unsupported_version);
}
- if (testIncompatible(InstrProfKind::FunctionEntryOnly, InstrProfKind::BB)) {
+ if (testIncompatible(InstrProfKind::FunctionEntryOnly,
+ InstrProfKind::FunctionEntryInstrumentation)) {
return make_error<InstrProfError>(
instrprof_error::unsupported_version,
"cannot merge FunctionEntryOnly profiles and BB profiles together");
@@ -112,6 +136,8 @@ public:
return Error::success();
}
+ InstrProfKind getProfileKind() const { return ProfileKind; }
+
// Internal interface for testing purpose only.
void setValueProfDataEndianness(support::endianness Endianness);
void setOutputSparse(bool Sparse);
diff --git a/llvm/include/llvm/ProfileData/MIBEntryDef.inc b/llvm/include/llvm/ProfileData/MIBEntryDef.inc
new file mode 100644
index 000000000000..f5c6f0e4924b
--- /dev/null
+++ b/llvm/include/llvm/ProfileData/MIBEntryDef.inc
@@ -0,0 +1,47 @@
+/*===-- MemEntryDef.inc - MemProf profiling runtime macros -*- C++ -*-======== *\
+|*
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+|* See https://llvm.org/LICENSE.txt for license information.
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+|*
+\*===----------------------------------------------------------------------===*/
+/*
+ * This file defines the macros for memprof profiling data structures.
+ * Eg. usage to define the memprof meminfoblock struct:
+ *
+ * struct MemInfoBlock {
+ * #define MIBEntryDef(NameTag, Name, Type) Type Name;
+ * #include MIBEntryDef.inc
+ * #undef MIBEntryDef
+ * };
+ *
+ * This file has two identical copies. The primary copy lives in LLVM and
+ * the other one sits in compiler-rt/include/profile directory. To make changes
+ * in this file, first modify the primary copy and copy it over to compiler-rt.
+ * Testing of any change in this file can start only after the two copies are
+ * synced up.
+ *
+\*===----------------------------------------------------------------------===*/
+#ifndef MIBEntryDef
+#define MIBEntryDef(NameTag, Name, Type)
+#endif
+
+MIBEntryDef(AllocCount = 1, AllocCount, uint32_t)
+MIBEntryDef(TotalAccessCount = 2, TotalAccessCount, uint64_t)
+MIBEntryDef(MinAccessCount = 3, MinAccessCount, uint64_t)
+MIBEntryDef(MaxAccessCount = 4, MaxAccessCount, uint64_t)
+MIBEntryDef(TotalSize = 5, TotalSize, uint64_t)
+MIBEntryDef(MinSize = 6, MinSize, uint32_t)
+MIBEntryDef(MaxSize = 7, MaxSize, uint32_t)
+MIBEntryDef(AllocTimestamp = 8, AllocTimestamp, uint32_t)
+MIBEntryDef(DeallocTimestamp = 9, DeallocTimestamp, uint32_t)
+MIBEntryDef(TotalLifetime = 10, TotalLifetime, uint64_t)
+MIBEntryDef(MinLifetime = 11, MinLifetime, uint32_t)
+MIBEntryDef(MaxLifetime = 12, MaxLifetime, uint32_t)
+MIBEntryDef(AllocCpuId = 13, AllocCpuId, uint32_t)
+MIBEntryDef(DeallocCpuId = 14, DeallocCpuId, uint32_t)
+MIBEntryDef(NumMigratedCpu = 15, NumMigratedCpu, uint32_t)
+MIBEntryDef(NumLifetimeOverlaps = 16, NumLifetimeOverlaps, uint32_t)
+MIBEntryDef(NumSameAllocCpu = 17, NumSameAllocCpu, uint32_t)
+MIBEntryDef(NumSameDeallocCpu = 18, NumSameDeallocCpu, uint32_t)
+MIBEntryDef(DataTypeId = 19, DataTypeId, uint64_t)
diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h
new file mode 100644
index 000000000000..bcee3b25bf87
--- /dev/null
+++ b/llvm/include/llvm/ProfileData/MemProf.h
@@ -0,0 +1,613 @@
+#ifndef LLVM_PROFILEDATA_MEMPROF_H_
+#define LLVM_PROFILEDATA_MEMPROF_H_
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/ProfileData/MemProfData.inc"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <cstdint>
+
+namespace llvm {
+namespace memprof {
+
+enum class Meta : uint64_t {
+ Start = 0,
+#define MIBEntryDef(NameTag, Name, Type) NameTag,
+#include "llvm/ProfileData/MIBEntryDef.inc"
+#undef MIBEntryDef
+ Size
+};
+
+using MemProfSchema = llvm::SmallVector<Meta, static_cast<int>(Meta::Size)>;
+
+// Holds the actual MemInfoBlock data with all fields. Contents may be read or
+// written partially by providing an appropriate schema to the serialize and
+// deserialize methods.
+struct PortableMemInfoBlock {
+ PortableMemInfoBlock() = default;
+ explicit PortableMemInfoBlock(const MemInfoBlock &Block) {
+#define MIBEntryDef(NameTag, Name, Type) Name = Block.Name;
+#include "llvm/ProfileData/MIBEntryDef.inc"
+#undef MIBEntryDef
+ }
+
+ PortableMemInfoBlock(const MemProfSchema &Schema, const unsigned char *Ptr) {
+ deserialize(Schema, Ptr);
+ }
+
+ // Read the contents of \p Ptr based on the \p Schema to populate the
+ // MemInfoBlock member.
+ void deserialize(const MemProfSchema &Schema, const unsigned char *Ptr) {
+ using namespace support;
+
+ for (const Meta Id : Schema) {
+ switch (Id) {
+#define MIBEntryDef(NameTag, Name, Type) \
+ case Meta::Name: { \
+ Name = endian::readNext<Type, little, unaligned>(Ptr); \
+ } break;
+#include "llvm/ProfileData/MIBEntryDef.inc"
+#undef MIBEntryDef
+ default:
+ llvm_unreachable("Unknown meta type id, is the profile collected from "
+ "a newer version of the runtime?");
+ }
+ }
+ }
+
+ // Write the contents of the MemInfoBlock based on the \p Schema provided to
+ // the raw_ostream \p OS.
+ void serialize(const MemProfSchema &Schema, raw_ostream &OS) const {
+ using namespace support;
+
+ endian::Writer LE(OS, little);
+ for (const Meta Id : Schema) {
+ switch (Id) {
+#define MIBEntryDef(NameTag, Name, Type) \
+ case Meta::Name: { \
+ LE.write<Type>(Name); \
+ } break;
+#include "llvm/ProfileData/MIBEntryDef.inc"
+#undef MIBEntryDef
+ default:
+ llvm_unreachable("Unknown meta type id, invalid input?");
+ }
+ }
+ }
+
+ // Print out the contents of the MemInfoBlock in YAML format.
+ void printYAML(raw_ostream &OS) const {
+ OS << " MemInfoBlock:\n";
+#define MIBEntryDef(NameTag, Name, Type) \
+ OS << " " << #Name << ": " << Name << "\n";
+#include "llvm/ProfileData/MIBEntryDef.inc"
+#undef MIBEntryDef
+ }
+
+ // Define getters for each type which can be called by analyses.
+#define MIBEntryDef(NameTag, Name, Type) \
+ Type get##Name() const { return Name; }
+#include "llvm/ProfileData/MIBEntryDef.inc"
+#undef MIBEntryDef
+
+ void clear() { *this = PortableMemInfoBlock(); }
+
+ // Returns the full schema currently in use.
+ static MemProfSchema getSchema() {
+ MemProfSchema List;
+#define MIBEntryDef(NameTag, Name, Type) List.push_back(Meta::Name);
+#include "llvm/ProfileData/MIBEntryDef.inc"
+#undef MIBEntryDef
+ return List;
+ }
+
+ bool operator==(const PortableMemInfoBlock &Other) const {
+#define MIBEntryDef(NameTag, Name, Type) \
+ if (Other.get##Name() != get##Name()) \
+ return false;
+#include "llvm/ProfileData/MIBEntryDef.inc"
+#undef MIBEntryDef
+ return true;
+ }
+
+ bool operator!=(const PortableMemInfoBlock &Other) const {
+ return !operator==(Other);
+ }
+
+ static constexpr size_t serializedSize() {
+ size_t Result = 0;
+#define MIBEntryDef(NameTag, Name, Type) Result += sizeof(Type);
+#include "llvm/ProfileData/MIBEntryDef.inc"
+#undef MIBEntryDef
+ return Result;
+ }
+
+private:
+#define MIBEntryDef(NameTag, Name, Type) Type Name = Type();
+#include "llvm/ProfileData/MIBEntryDef.inc"
+#undef MIBEntryDef
+};
+
+// A type representing the id generated by hashing the contents of the Frame.
+using FrameId = uint64_t;
+// Describes a call frame for a dynamic allocation context. The contents of
+// the frame are populated by symbolizing the stack depot call frame from the
+// compiler runtime.
+struct Frame {
+ // A uuid (uint64_t) identifying the function. It is obtained by
+ // llvm::md5(FunctionName) which returns the lower 64 bits.
+ GlobalValue::GUID Function;
+ // The symbol name for the function. Only populated in the Frame by the reader
+ // if requested during initialization. This field should not be serialized.
+ llvm::Optional<std::string> SymbolName;
+ // The source line offset of the call from the beginning of parent function.
+ uint32_t LineOffset;
+ // The source column number of the call to help distinguish multiple calls
+ // on the same line.
+ uint32_t Column;
+ // Whether the current frame is inlined.
+ bool IsInlineFrame;
+
+ Frame(const Frame &Other) {
+ Function = Other.Function;
+ SymbolName = Other.SymbolName;
+ LineOffset = Other.LineOffset;
+ Column = Other.Column;
+ IsInlineFrame = Other.IsInlineFrame;
+ }
+
+ Frame(uint64_t Hash, uint32_t Off, uint32_t Col, bool Inline)
+ : Function(Hash), LineOffset(Off), Column(Col), IsInlineFrame(Inline) {}
+
+ bool operator==(const Frame &Other) const {
+ // Ignore the SymbolName field to avoid a string compare. Comparing the
+ // function hash serves the same purpose.
+ return Other.Function == Function && Other.LineOffset == LineOffset &&
+ Other.Column == Column && Other.IsInlineFrame == IsInlineFrame;
+ }
+
+ Frame &operator=(const Frame &Other) {
+ Function = Other.Function;
+ SymbolName = Other.SymbolName;
+ LineOffset = Other.LineOffset;
+ Column = Other.Column;
+ IsInlineFrame = Other.IsInlineFrame;
+ return *this;
+ }
+
+ bool operator!=(const Frame &Other) const { return !operator==(Other); }
+
+ // Write the contents of the frame to the ostream \p OS.
+ void serialize(raw_ostream &OS) const {
+ using namespace support;
+
+ endian::Writer LE(OS, little);
+
+ // If the type of the GlobalValue::GUID changes, then we need to update
+ // the reader and the writer.
+ static_assert(std::is_same<GlobalValue::GUID, uint64_t>::value,
+ "Expect GUID to be uint64_t.");
+ LE.write<uint64_t>(Function);
+
+ LE.write<uint32_t>(LineOffset);
+ LE.write<uint32_t>(Column);
+ LE.write<bool>(IsInlineFrame);
+ }
+
+ // Read a frame from char data which has been serialized as little endian.
+ static Frame deserialize(const unsigned char *Ptr) {
+ using namespace support;
+
+ const uint64_t F = endian::readNext<uint64_t, little, unaligned>(Ptr);
+ const uint32_t L = endian::readNext<uint32_t, little, unaligned>(Ptr);
+ const uint32_t C = endian::readNext<uint32_t, little, unaligned>(Ptr);
+ const bool I = endian::readNext<bool, little, unaligned>(Ptr);
+ return Frame(/*Function=*/F, /*LineOffset=*/L, /*Column=*/C,
+ /*IsInlineFrame=*/I);
+ }
+
+ // Returns the size of the frame information.
+ static constexpr size_t serializedSize() {
+ return sizeof(Frame::Function) + sizeof(Frame::LineOffset) +
+ sizeof(Frame::Column) + sizeof(Frame::IsInlineFrame);
+ }
+
+ // Print the frame information in YAML format.
+ void printYAML(raw_ostream &OS) const {
+ OS << " -\n"
+ << " Function: " << Function << "\n"
+ << " SymbolName: " << SymbolName.value_or("<None>") << "\n"
+ << " LineOffset: " << LineOffset << "\n"
+ << " Column: " << Column << "\n"
+ << " Inline: " << IsInlineFrame << "\n";
+ }
+
+ // Return a hash value based on the contents of the frame. Here we don't use
+ // hashing from llvm ADT since we are going to persist the hash id, the hash
+ // combine algorithm in ADT uses a new randomized seed each time.
+ inline FrameId hash() const {
+ auto HashCombine = [](auto Value, size_t Seed) {
+ std::hash<decltype(Value)> Hasher;
+ // The constant used below is the 64 bit representation of the fractional
+ // part of the golden ratio. Used here for the randomness in their bit
+ // pattern.
+ return Hasher(Value) + 0x9e3779b97f4a7c15 + (Seed << 6) + (Seed >> 2);
+ };
+
+ size_t Result = 0;
+ Result ^= HashCombine(Function, Result);
+ Result ^= HashCombine(LineOffset, Result);
+ Result ^= HashCombine(Column, Result);
+ Result ^= HashCombine(IsInlineFrame, Result);
+ return static_cast<FrameId>(Result);
+ }
+};
+
+// Holds allocation information in a space efficient format where frames are
+// represented using unique identifiers.
+struct IndexedAllocationInfo {
+ // The dynamic calling context for the allocation in bottom-up (leaf-to-root)
+ // order. Frame contents are stored out-of-line.
+ llvm::SmallVector<FrameId> CallStack;
+ // The statistics obtained from the runtime for the allocation.
+ PortableMemInfoBlock Info;
+
+ IndexedAllocationInfo() = default;
+ IndexedAllocationInfo(ArrayRef<FrameId> CS, const MemInfoBlock &MB)
+ : CallStack(CS.begin(), CS.end()), Info(MB) {}
+
+ // Returns the size in bytes when this allocation info struct is serialized.
+ size_t serializedSize() const {
+ return sizeof(uint64_t) + // The number of frames to serialize.
+ sizeof(FrameId) * CallStack.size() + // The callstack frame ids.
+ PortableMemInfoBlock::serializedSize(); // The size of the payload.
+ }
+
+ bool operator==(const IndexedAllocationInfo &Other) const {
+ if (Other.Info != Info)
+ return false;
+
+ if (Other.CallStack.size() != CallStack.size())
+ return false;
+
+ for (size_t J = 0; J < Other.CallStack.size(); J++) {
+ if (Other.CallStack[J] != CallStack[J])
+ return false;
+ }
+ return true;
+ }
+
+ bool operator!=(const IndexedAllocationInfo &Other) const {
+ return !operator==(Other);
+ }
+};
+
+// Holds allocation information with frame contents inline. The type should
+// be used for temporary in-memory instances.
+struct AllocationInfo {
+ // Same as IndexedAllocationInfo::CallStack with the frame contents inline.
+ llvm::SmallVector<Frame> CallStack;
+ // Same as IndexedAllocationInfo::Info;
+ PortableMemInfoBlock Info;
+
+ AllocationInfo() = default;
+ AllocationInfo(
+ const IndexedAllocationInfo &IndexedAI,
+ llvm::function_ref<const Frame(const FrameId)> IdToFrameCallback) {
+ for (const FrameId &Id : IndexedAI.CallStack) {
+ CallStack.push_back(IdToFrameCallback(Id));
+ }
+ Info = IndexedAI.Info;
+ }
+
+ void printYAML(raw_ostream &OS) const {
+ OS << " -\n";
+ OS << " Callstack:\n";
+ // TODO: Print out the frame on one line with to make it easier for deep
+ // callstacks once we have a test to check valid YAML is generated.
+ for (const Frame &F : CallStack) {
+ F.printYAML(OS);
+ }
+ Info.printYAML(OS);
+ }
+};
+
+// Holds the memprof profile information for a function. The internal
+// representation stores frame ids for efficiency. This representation should
+// be used in the profile conversion and manipulation tools.
+struct IndexedMemProfRecord {
+ // Memory allocation sites in this function for which we have memory
+ // profiling data.
+ llvm::SmallVector<IndexedAllocationInfo> AllocSites;
+ // Holds call sites in this function which are part of some memory
+ // allocation context. We store this as a list of locations, each with its
+ // list of inline locations in bottom-up order i.e. from leaf to root. The
+ // inline location list may include additional entries, users should pick
+ // the last entry in the list with the same function GUID.
+ llvm::SmallVector<llvm::SmallVector<FrameId>> CallSites;
+
+ void clear() {
+ AllocSites.clear();
+ CallSites.clear();
+ }
+
+ void merge(const IndexedMemProfRecord &Other) {
+ // TODO: Filter out duplicates which may occur if multiple memprof
+ // profiles are merged together using llvm-profdata.
+ AllocSites.append(Other.AllocSites);
+ CallSites.append(Other.CallSites);
+ }
+
+ size_t serializedSize() const {
+ size_t Result = sizeof(GlobalValue::GUID);
+ for (const IndexedAllocationInfo &N : AllocSites)
+ Result += N.serializedSize();
+
+ // The number of callsites we have information for.
+ Result += sizeof(uint64_t);
+ for (const auto &Frames : CallSites) {
+ // The number of frame ids to serialize.
+ Result += sizeof(uint64_t);
+ Result += Frames.size() * sizeof(FrameId);
+ }
+ return Result;
+ }
+
+ bool operator==(const IndexedMemProfRecord &Other) const {
+ if (Other.AllocSites.size() != AllocSites.size())
+ return false;
+
+ if (Other.CallSites.size() != CallSites.size())
+ return false;
+
+ for (size_t I = 0; I < AllocSites.size(); I++) {
+ if (AllocSites[I] != Other.AllocSites[I])
+ return false;
+ }
+
+ for (size_t I = 0; I < CallSites.size(); I++) {
+ if (CallSites[I] != Other.CallSites[I])
+ return false;
+ }
+ return true;
+ }
+
+ // Serializes the memprof records in \p Records to the ostream \p OS based
+ // on the schema provided in \p Schema.
+ void serialize(const MemProfSchema &Schema, raw_ostream &OS);
+
+ // Deserializes memprof records from the Buffer.
+ static IndexedMemProfRecord deserialize(const MemProfSchema &Schema,
+ const unsigned char *Buffer);
+
+ // Returns the GUID for the function name after canonicalization. For
+ // memprof, we remove any .llvm suffix added by LTO. MemProfRecords are
+ // mapped to functions using this GUID.
+ static GlobalValue::GUID getGUID(const StringRef FunctionName);
+};
+
+// Holds the memprof profile information for a function. The internal
+// representation stores frame contents inline. This representation should
+// be used for small amount of temporary, in memory instances.
+struct MemProfRecord {
+ // Same as IndexedMemProfRecord::AllocSites with frame contents inline.
+ llvm::SmallVector<AllocationInfo> AllocSites;
+ // Same as IndexedMemProfRecord::CallSites with frame contents inline.
+ llvm::SmallVector<llvm::SmallVector<Frame>> CallSites;
+
+ MemProfRecord() = default;
+ MemProfRecord(
+ const IndexedMemProfRecord &Record,
+ llvm::function_ref<const Frame(const FrameId Id)> IdToFrameCallback) {
+ for (const IndexedAllocationInfo &IndexedAI : Record.AllocSites) {
+ AllocSites.emplace_back(IndexedAI, IdToFrameCallback);
+ }
+ for (const ArrayRef<FrameId> Site : Record.CallSites) {
+ llvm::SmallVector<Frame> Frames;
+ for (const FrameId Id : Site) {
+ Frames.push_back(IdToFrameCallback(Id));
+ }
+ CallSites.push_back(Frames);
+ }
+ }
+
+ // Prints out the contents of the memprof record in YAML.
+ void print(llvm::raw_ostream &OS) const {
+ if (!AllocSites.empty()) {
+ OS << " AllocSites:\n";
+ for (const AllocationInfo &N : AllocSites)
+ N.printYAML(OS);
+ }
+
+ if (!CallSites.empty()) {
+ OS << " CallSites:\n";
+ for (const llvm::SmallVector<Frame> &Frames : CallSites) {
+ for (const Frame &F : Frames) {
+ OS << " -\n";
+ F.printYAML(OS);
+ }
+ }
+ }
+ }
+};
+
+// Reads a memprof schema from a buffer. All entries in the buffer are
+// interpreted as uint64_t. The first entry in the buffer denotes the number of
+// ids in the schema. Subsequent entries are integers which map to memprof::Meta
+// enum class entries. After successfully reading the schema, the pointer is one
+// byte past the schema contents.
+Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer);
+
+// Trait for reading IndexedMemProfRecord data from the on-disk hash table.
+class RecordLookupTrait {
+public:
+ using data_type = const IndexedMemProfRecord &;
+ using internal_key_type = uint64_t;
+ using external_key_type = uint64_t;
+ using hash_value_type = uint64_t;
+ using offset_type = uint64_t;
+
+ RecordLookupTrait() = delete;
+ RecordLookupTrait(const MemProfSchema &S) : Schema(S) {}
+
+ static bool EqualKey(uint64_t A, uint64_t B) { return A == B; }
+ static uint64_t GetInternalKey(uint64_t K) { return K; }
+ static uint64_t GetExternalKey(uint64_t K) { return K; }
+
+ hash_value_type ComputeHash(uint64_t K) { return K; }
+
+ static std::pair<offset_type, offset_type>
+ ReadKeyDataLength(const unsigned char *&D) {
+ using namespace support;
+
+ offset_type KeyLen = endian::readNext<offset_type, little, unaligned>(D);
+ offset_type DataLen = endian::readNext<offset_type, little, unaligned>(D);
+ return std::make_pair(KeyLen, DataLen);
+ }
+
+ uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) {
+ using namespace support;
+ return endian::readNext<external_key_type, little, unaligned>(D);
+ }
+
+ data_type ReadData(uint64_t K, const unsigned char *D,
+ offset_type /*Unused*/) {
+ Record = IndexedMemProfRecord::deserialize(Schema, D);
+ return Record;
+ }
+
+private:
+ // Holds the memprof schema used to deserialize records.
+ MemProfSchema Schema;
+ // Holds the records from one function deserialized from the indexed format.
+ IndexedMemProfRecord Record;
+};
+
+// Trait for writing IndexedMemProfRecord data to the on-disk hash table.
+class RecordWriterTrait {
+public:
+ using key_type = uint64_t;
+ using key_type_ref = uint64_t;
+
+ using data_type = IndexedMemProfRecord;
+ using data_type_ref = IndexedMemProfRecord &;
+
+ using hash_value_type = uint64_t;
+ using offset_type = uint64_t;
+
+ // Pointer to the memprof schema to use for the generator. Unlike the reader
+ // we must use a default constructor with no params for the writer trait so we
+ // have a public member which must be initialized by the user.
+ MemProfSchema *Schema = nullptr;
+
+ RecordWriterTrait() = default;
+
+ static hash_value_type ComputeHash(key_type_ref K) { return K; }
+
+ static std::pair<offset_type, offset_type>
+ EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) {
+ using namespace support;
+
+ endian::Writer LE(Out, little);
+ offset_type N = sizeof(K);
+ LE.write<offset_type>(N);
+ offset_type M = V.serializedSize();
+ LE.write<offset_type>(M);
+ return std::make_pair(N, M);
+ }
+
+ void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) {
+ using namespace support;
+ endian::Writer LE(Out, little);
+ LE.write<uint64_t>(K);
+ }
+
+ void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V,
+ offset_type /*Unused*/) {
+ assert(Schema != nullptr && "MemProf schema is not initialized!");
+ V.serialize(*Schema, Out);
+ }
+};
+
+// Trait for writing frame mappings to the on-disk hash table.
+class FrameWriterTrait {
+public:
+ using key_type = FrameId;
+ using key_type_ref = FrameId;
+
+ using data_type = Frame;
+ using data_type_ref = Frame &;
+
+ using hash_value_type = FrameId;
+ using offset_type = uint64_t;
+
+ static hash_value_type ComputeHash(key_type_ref K) { return K; }
+
+ static std::pair<offset_type, offset_type>
+ EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) {
+ using namespace support;
+ endian::Writer LE(Out, little);
+ offset_type N = sizeof(K);
+ LE.write<offset_type>(N);
+ offset_type M = V.serializedSize();
+ LE.write<offset_type>(M);
+ return std::make_pair(N, M);
+ }
+
+ void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) {
+ using namespace support;
+ endian::Writer LE(Out, little);
+ LE.write<key_type>(K);
+ }
+
+ void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V,
+ offset_type /*Unused*/) {
+ V.serialize(Out);
+ }
+};
+
+// Trait for reading frame mappings from the on-disk hash table.
+class FrameLookupTrait {
+public:
+ using data_type = const Frame;
+ using internal_key_type = FrameId;
+ using external_key_type = FrameId;
+ using hash_value_type = FrameId;
+ using offset_type = uint64_t;
+
+ static bool EqualKey(internal_key_type A, internal_key_type B) {
+ return A == B;
+ }
+ static uint64_t GetInternalKey(internal_key_type K) { return K; }
+ static uint64_t GetExternalKey(external_key_type K) { return K; }
+
+ hash_value_type ComputeHash(internal_key_type K) { return K; }
+
+ static std::pair<offset_type, offset_type>
+ ReadKeyDataLength(const unsigned char *&D) {
+ using namespace support;
+
+ offset_type KeyLen = endian::readNext<offset_type, little, unaligned>(D);
+ offset_type DataLen = endian::readNext<offset_type, little, unaligned>(D);
+ return std::make_pair(KeyLen, DataLen);
+ }
+
+ uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) {
+ using namespace support;
+ return endian::readNext<external_key_type, little, unaligned>(D);
+ }
+
+ data_type ReadData(uint64_t K, const unsigned char *D,
+ offset_type /*Unused*/) {
+ return Frame::deserialize(D);
+ }
+};
+} // namespace memprof
+} // namespace llvm
+
+#endif // LLVM_PROFILEDATA_MEMPROF_H_
diff --git a/llvm/include/llvm/ProfileData/MemProfData.inc b/llvm/include/llvm/ProfileData/MemProfData.inc
index ff22a697965c..6433cef84865 100644
--- a/llvm/include/llvm/ProfileData/MemProfData.inc
+++ b/llvm/include/llvm/ProfileData/MemProfData.inc
@@ -1,5 +1,5 @@
-#ifndef LLVM_PROFILEDATA_MEMPROFDATA_INC
-#define LLVM_PROFILEDATA_MEMPROFDATA_INC
+#ifndef MEMPROF_DATA_INC
+#define MEMPROF_DATA_INC
/*===-- MemProfData.inc - MemProf profiling runtime structures -*- C++ -*-=== *\
|*
|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -80,71 +80,90 @@ PACKED(struct SegmentEntry {
}
});
+// Packed struct definition for MSVC. We can't use the PACKED macro defined in
+// MemProfData.inc since it would mean we are embedding a directive (the
+// #include for MIBEntryDef) into the macros which is undefined behaviour.
+#ifdef _MSC_VER
+__pragma(pack(push,1))
+#endif
+
// A struct representing the heap allocation characteristics of a particular
// runtime context. This struct is shared between the compiler-rt runtime and
// the raw profile reader. The indexed format uses a separate, self-describing
// backwards compatible format.
-PACKED(struct MemInfoBlock {
- uint32_t alloc_count;
- uint64_t total_access_count, min_access_count, max_access_count;
- uint64_t total_size;
- uint32_t min_size, max_size;
- uint32_t alloc_timestamp, dealloc_timestamp;
- uint64_t total_lifetime;
- uint32_t min_lifetime, max_lifetime;
- uint32_t alloc_cpu_id, dealloc_cpu_id;
- uint32_t num_migrated_cpu;
-
- // Only compared to prior deallocated object currently.
- uint32_t num_lifetime_overlaps;
- uint32_t num_same_alloc_cpu;
- uint32_t num_same_dealloc_cpu;
-
- uint64_t data_type_id; // TODO: hash of type name
-
- MemInfoBlock() : alloc_count(0) {}
-
- MemInfoBlock(uint32_t size, uint64_t access_count, uint32_t alloc_timestamp,
- uint32_t dealloc_timestamp, uint32_t alloc_cpu, uint32_t dealloc_cpu)
- : alloc_count(1), total_access_count(access_count),
- min_access_count(access_count), max_access_count(access_count),
- total_size(size), min_size(size), max_size(size),
- alloc_timestamp(alloc_timestamp), dealloc_timestamp(dealloc_timestamp),
- total_lifetime(dealloc_timestamp - alloc_timestamp),
- min_lifetime(total_lifetime), max_lifetime(total_lifetime),
- alloc_cpu_id(alloc_cpu), dealloc_cpu_id(dealloc_cpu),
- num_lifetime_overlaps(0), num_same_alloc_cpu(0),
- num_same_dealloc_cpu(0) {
- num_migrated_cpu = alloc_cpu_id != dealloc_cpu_id;
- }
-
- void Merge(const MemInfoBlock &newMIB) {
- alloc_count += newMIB.alloc_count;
-
- total_access_count += newMIB.total_access_count;
- min_access_count = newMIB.min_access_count < min_access_count ? newMIB.min_access_count : min_access_count;
- max_access_count = newMIB.max_access_count < max_access_count ? newMIB.max_access_count : max_access_count;
-
- total_size += newMIB.total_size;
- min_size = newMIB.min_size < min_size ? newMIB.min_size : min_size;
- max_size = newMIB.max_size < max_size ? newMIB.max_size : max_size;
+struct MemInfoBlock{
+
+#define MIBEntryDef(NameTag, Name, Type) Type Name;
+#include "MIBEntryDef.inc"
+#undef MIBEntryDef
+
+bool operator==(const MemInfoBlock& Other) const {
+ bool IsEqual = true;
+#define MIBEntryDef(NameTag, Name, Type) \
+ IsEqual = (IsEqual && Name == Other.Name);
+#include "MIBEntryDef.inc"
+#undef MIBEntryDef
+ return IsEqual;
+}
+
+MemInfoBlock() {
+#define MIBEntryDef(NameTag, Name, Type) Name = Type();
+#include "MIBEntryDef.inc"
+#undef MIBEntryDef
+}
+
+MemInfoBlock(uint32_t Size, uint64_t AccessCount, uint32_t AllocTs,
+ uint32_t DeallocTs, uint32_t AllocCpu, uint32_t DeallocCpu)
+ : MemInfoBlock() {
+ AllocCount = 1U;
+ TotalAccessCount = AccessCount;
+ MinAccessCount = AccessCount;
+ MaxAccessCount = AccessCount;
+ TotalSize = Size;
+ MinSize = Size;
+ MaxSize = Size;
+ AllocTimestamp = AllocTs;
+ DeallocTimestamp = DeallocTs;
+ TotalLifetime = DeallocTimestamp - AllocTimestamp;
+ MinLifetime = TotalLifetime;
+ MaxLifetime = TotalLifetime;
+ AllocCpuId = AllocCpu;
+ DeallocCpuId = DeallocCpu;
+ NumMigratedCpu = AllocCpuId != DeallocCpuId;
+}
+
+void Merge(const MemInfoBlock &newMIB) {
+ AllocCount += newMIB.AllocCount;
+
+ TotalAccessCount += newMIB.TotalAccessCount;
+ MinAccessCount = newMIB.MinAccessCount < MinAccessCount ? newMIB.MinAccessCount : MinAccessCount;
+ MaxAccessCount = newMIB.MaxAccessCount < MaxAccessCount ? newMIB.MaxAccessCount : MaxAccessCount;
+
+ TotalSize += newMIB.TotalSize;
+ MinSize = newMIB.MinSize < MinSize ? newMIB.MinSize : MinSize;
+ MaxSize = newMIB.MaxSize < MaxSize ? newMIB.MaxSize : MaxSize;
+
+ TotalLifetime += newMIB.TotalLifetime;
+ MinLifetime = newMIB.MinLifetime < MinLifetime ? newMIB.MinLifetime : MinLifetime;
+ MaxLifetime = newMIB.MaxLifetime > MaxLifetime ? newMIB.MaxLifetime : MaxLifetime;
+
+ // We know newMIB was deallocated later, so just need to check if it was
+ // allocated before last one deallocated.
+ NumLifetimeOverlaps += newMIB.AllocTimestamp < DeallocTimestamp;
+ AllocTimestamp = newMIB.AllocTimestamp;
+ DeallocTimestamp = newMIB.DeallocTimestamp;
+
+ NumSameAllocCpu += AllocCpuId == newMIB.AllocCpuId;
+ NumSameDeallocCpu += DeallocCpuId == newMIB.DeallocCpuId;
+ AllocCpuId = newMIB.AllocCpuId;
+ DeallocCpuId = newMIB.DeallocCpuId;
+}
- total_lifetime += newMIB.total_lifetime;
- min_lifetime = newMIB.min_lifetime < min_lifetime ? newMIB.min_lifetime : min_lifetime;
- max_lifetime = newMIB.max_lifetime > max_lifetime ? newMIB.max_lifetime : max_lifetime;
-
- // We know newMIB was deallocated later, so just need to check if it was
- // allocated before last one deallocated.
- num_lifetime_overlaps += newMIB.alloc_timestamp < dealloc_timestamp;
- alloc_timestamp = newMIB.alloc_timestamp;
- dealloc_timestamp = newMIB.dealloc_timestamp;
-
- num_same_alloc_cpu += alloc_cpu_id == newMIB.alloc_cpu_id;
- num_same_dealloc_cpu += dealloc_cpu_id == newMIB.dealloc_cpu_id;
- alloc_cpu_id = newMIB.alloc_cpu_id;
- dealloc_cpu_id = newMIB.dealloc_cpu_id;
- }
-});
+#ifdef _MSC_VER
+} __pragma(pack(pop));
+#else
+} __attribute__((__packed__));
+#endif
} // namespace memprof
} // namespace llvm
diff --git a/llvm/include/llvm/ProfileData/RawMemProfReader.h b/llvm/include/llvm/ProfileData/RawMemProfReader.h
index 45544927a86f..34f78063aa42 100644
--- a/llvm/include/llvm/ProfileData/RawMemProfReader.h
+++ b/llvm/include/llvm/ProfileData/RawMemProfReader.h
@@ -12,31 +12,142 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
+#include "llvm/DebugInfo/Symbolize/Symbolize.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/Object/Binary.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/ProfileData/InstrProfReader.h"
+#include "llvm/ProfileData/MemProf.h"
+#include "llvm/ProfileData/MemProfData.inc"
#include "llvm/Support/Error.h"
#include "llvm/Support/MemoryBuffer.h"
+#include <cstddef>
+
namespace llvm {
namespace memprof {
+// Map from id (recorded from sanitizer stack depot) to virtual addresses for
+// each program counter address in the callstack.
+using CallStackMap = llvm::DenseMap<uint64_t, llvm::SmallVector<uint64_t>>;
+
class RawMemProfReader {
public:
- RawMemProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
- : DataBuffer(std::move(DataBuffer)) {}
- // Prints aggregate counts for each raw profile parsed from the DataBuffer.
- void printSummaries(raw_ostream &OS) const;
+ RawMemProfReader(const RawMemProfReader &) = delete;
+ RawMemProfReader &operator=(const RawMemProfReader &) = delete;
+
+ // Prints the contents of the profile in YAML format.
+ void printYAML(raw_ostream &OS);
// Return true if the \p DataBuffer starts with magic bytes indicating it is
// a raw binary memprof profile.
static bool hasFormat(const MemoryBuffer &DataBuffer);
+ // Return true if the file at \p Path starts with magic bytes indicating it is
+ // a raw binary memprof profile.
+ static bool hasFormat(const StringRef Path);
// Create a RawMemProfReader after sanity checking the contents of the file at
- // \p Path.
- static Expected<std::unique_ptr<RawMemProfReader>> create(const Twine &Path);
+ // \p Path. The binary from which the profile has been collected is specified
+ // via a path in \p ProfiledBinary.
+ static Expected<std::unique_ptr<RawMemProfReader>>
+ create(const Twine &Path, const StringRef ProfiledBinary,
+ bool KeepName = false);
+
+ using GuidMemProfRecordPair = std::pair<GlobalValue::GUID, MemProfRecord>;
+ using Iterator = InstrProfIterator<GuidMemProfRecordPair, RawMemProfReader>;
+ Iterator end() { return Iterator(); }
+ Iterator begin() {
+ Iter = FunctionProfileData.begin();
+ return Iterator(this);
+ }
+
+ Error readNextRecord(GuidMemProfRecordPair &GuidRecord);
+
+ // The RawMemProfReader only holds memory profile information.
+ InstrProfKind getProfileKind() const { return InstrProfKind::MemProf; }
+
+ // Constructor for unittests only.
+ RawMemProfReader(std::unique_ptr<llvm::symbolize::SymbolizableModule> Sym,
+ llvm::SmallVectorImpl<SegmentEntry> &Seg,
+ llvm::MapVector<uint64_t, MemInfoBlock> &Prof,
+ CallStackMap &SM, bool KeepName = false)
+ : Symbolizer(std::move(Sym)), SegmentInfo(Seg.begin(), Seg.end()),
+ CallstackProfileData(Prof), StackMap(SM), KeepSymbolName(KeepName) {
+ // We don't call initialize here since there is no raw profile to read. The
+ // test should pass in the raw profile as structured data.
+
+ // If there is an error here then the mock symbolizer has not been
+ // initialized properly.
+ if (Error E = symbolizeAndFilterStackFrames())
+ report_fatal_error(std::move(E));
+ if (Error E = mapRawProfileToRecords())
+ report_fatal_error(std::move(E));
+ }
+
+ // Return a const reference to the internal Id to Frame mappings.
+ const llvm::DenseMap<FrameId, Frame> &getFrameMapping() const {
+ return IdToFrame;
+ }
+
+ // Return a const reference to the internal function profile data.
+ const llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> &
+ getProfileData() const {
+ return FunctionProfileData;
+ }
private:
- std::unique_ptr<MemoryBuffer> DataBuffer;
-};
+ RawMemProfReader(object::OwningBinary<object::Binary> &&Bin, bool KeepName)
+ : Binary(std::move(Bin)), KeepSymbolName(KeepName) {}
+ // Initializes the RawMemProfReader with the contents in `DataBuffer`.
+ Error initialize(std::unique_ptr<MemoryBuffer> DataBuffer);
+ // Read and parse the contents of the `DataBuffer` as a binary format profile.
+ Error readRawProfile(std::unique_ptr<MemoryBuffer> DataBuffer);
+ // Symbolize and cache all the virtual addresses we encounter in the
+ // callstacks from the raw profile. Also prune callstack frames which we can't
+ // symbolize or those that belong to the runtime. For profile entries where
+ // the entire callstack is pruned, we drop the entry from the profile.
+ Error symbolizeAndFilterStackFrames();
+ // Construct memprof records for each function and store it in the
+ // `FunctionProfileData` map. A function may have allocation profile data or
+ // callsite data or both.
+ Error mapRawProfileToRecords();
+
+ // A helper method to extract the frame from the IdToFrame map.
+ const Frame &idToFrame(const FrameId Id) const {
+ auto It = IdToFrame.find(Id);
+ assert(It != IdToFrame.end() && "Id not found in map.");
+ return It->getSecond();
+ }
+
+ object::SectionedAddress getModuleOffset(uint64_t VirtualAddress);
+
+ object::OwningBinary<object::Binary> Binary;
+ std::unique_ptr<llvm::symbolize::SymbolizableModule> Symbolizer;
+ // The contents of the raw profile.
+ llvm::SmallVector<SegmentEntry, 16> SegmentInfo;
+ // A map from callstack id (same as key in CallStackMap below) to the heap
+ // information recorded for that allocation context.
+ llvm::MapVector<uint64_t, MemInfoBlock> CallstackProfileData;
+ CallStackMap StackMap;
+
+ // Cached symbolization from PC to Frame.
+ llvm::DenseMap<uint64_t, llvm::SmallVector<FrameId>> SymbolizedFrame;
+ llvm::DenseMap<FrameId, Frame> IdToFrame;
+
+ llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> FunctionProfileData;
+ llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord>::iterator Iter;
+
+ // Whether to keep the symbol name for each frame after hashing.
+ bool KeepSymbolName = false;
+ // A mapping of the hash to symbol name, only used if KeepSymbolName is true.
+ llvm::DenseMap<uint64_t, std::string> GuidToSymbolName;
+};
} // namespace memprof
} // namespace llvm
diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index bad2139fe8f0..f11392c05318 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -18,15 +18,12 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/StringSet.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/Module.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cstdint>
#include <list>
@@ -40,6 +37,9 @@
namespace llvm {
+class DILocation;
+class raw_ostream;
+
const std::error_category &sampleprof_category();
enum class sampleprof_error {
@@ -55,7 +55,6 @@ enum class sampleprof_error {
not_implemented,
counter_overflow,
ostream_seek_unsupported,
- compress_failed,
uncompress_failed,
zlib_unavailable,
hash_mismatch
@@ -201,9 +200,9 @@ enum class SecProfSummaryFlags : uint32_t {
/// SecFlagFSDiscriminator means this profile uses flow-sensitive
/// discriminators.
SecFlagFSDiscriminator = (1 << 2),
- /// SecFlagIsCSNested means this is context-sensitive nested profile for
- /// CSSPGO
- SecFlagIsCSNested = (1 << 4),
+ /// SecFlagIsPreInlined means this profile contains ShouldBeInlined
+ /// contexts thus this is CS preinliner computed.
+ SecFlagIsPreInlined = (1 << 4),
};
enum class SecFuncMetadataFlags : uint32_t {
@@ -343,6 +342,15 @@ public:
: sampleprof_error::success;
}
+ /// Decrease the number of samples for this record by \p S. Return the amout
+ /// of samples actually decreased.
+ uint64_t removeSamples(uint64_t S) {
+ if (S > NumSamples)
+ S = NumSamples;
+ NumSamples -= S;
+ return S;
+ }
+
/// Add called function \p F with samples \p S.
/// Optionally scale sample count \p S by \p Weight.
///
@@ -358,6 +366,18 @@ public:
: sampleprof_error::success;
}
+ /// Remove called function from the call target map. Return the target sample
+ /// count of the called function.
+ uint64_t removeCalledTarget(StringRef F) {
+ uint64_t Count = 0;
+ auto I = CallTargets.find(F);
+ if (I != CallTargets.end()) {
+ Count = I->second;
+ CallTargets.erase(I);
+ }
+ return Count;
+ }
+
/// Return true if this sample record contains function calls.
bool hasCalls() const { return !CallTargets.empty(); }
@@ -367,6 +387,13 @@ public:
return SortCallTargets(CallTargets);
}
+ uint64_t getCallTargetSum() const {
+ uint64_t Sum = 0;
+ for (const auto &I : CallTargets)
+ Sum += I.second;
+ return Sum;
+ }
+
/// Sort call targets in descending order of call frequency.
static const SortedCallTargetSet SortCallTargets(const CallTargetMap &Targets) {
SortedCallTargetSet SortedTargets;
@@ -413,6 +440,8 @@ enum ContextAttributeMask {
ContextNone = 0x0,
ContextWasInlined = 0x1, // Leaf of context was inlined in previous build
ContextShouldBeInlined = 0x2, // Leaf of context should be inlined
+ ContextDuplicatedIntoBase =
+ 0x4, // Leaf of context is duplicated into the base profile
};
// Represents a context frame with function name and line location
@@ -524,16 +553,6 @@ public:
}
}
- // Promote context by removing top frames with the length of
- // `ContextFramesToRemove`. Note that with array representation of context,
- // the promotion is effectively a slice operation with first
- // `ContextFramesToRemove` elements removed from left.
- void promoteOnPath(uint32_t ContextFramesToRemove) {
- assert(ContextFramesToRemove <= FullContext.size() &&
- "Cannot remove more than the whole context");
- FullContext = FullContext.drop_front(ContextFramesToRemove);
- }
-
// Decode context string for a frame to get function name and location.
// `ContextStr` is in the form of `FuncName:StartLine.Discriminator`.
static void decodeContextString(StringRef ContextStr, StringRef &FName,
@@ -703,6 +722,13 @@ public:
: sampleprof_error::success;
}
+ void removeTotalSamples(uint64_t Num) {
+ if (TotalSamples < Num)
+ TotalSamples = 0;
+ else
+ TotalSamples -= Num;
+ }
+
void setTotalSamples(uint64_t Num) { TotalSamples = Num; }
sampleprof_error addHeadSamples(uint64_t Num, uint64_t Weight = 1) {
@@ -727,6 +753,22 @@ public:
FName, Num, Weight);
}
+ // Remove a call target and decrease the body sample correspondingly. Return
+ // the number of body samples actually decreased.
+ uint64_t removeCalledTargetAndBodySample(uint32_t LineOffset,
+ uint32_t Discriminator,
+ StringRef FName) {
+ uint64_t Count = 0;
+ auto I = BodySamples.find(LineLocation(LineOffset, Discriminator));
+ if (I != BodySamples.end()) {
+ Count = I->second.removeCalledTarget(FName);
+ Count = I->second.removeSamples(Count);
+ if (!I->second.getSamples())
+ BodySamples.erase(I);
+ }
+ return Count;
+ }
+
sampleprof_error addBodySamplesForProbe(uint32_t Index, uint64_t Num,
uint64_t Weight = 1) {
SampleRecord S;
@@ -734,6 +776,19 @@ public:
return BodySamples[LineLocation(Index, 0)].merge(S, Weight);
}
+ // Accumulate all call target samples to update the body samples.
+ void updateCallsiteSamples() {
+ for (auto &I : BodySamples) {
+ uint64_t TargetSamples = I.second.getCallTargetSum();
+ // It's possible that the body sample count can be greater than the call
+ // target sum. E.g, if some call targets are external targets, they won't
+ // be considered valid call targets, but the body sample count which is
+ // from lbr ranges can actually include them.
+ if (TargetSamples > I.second.getSamples())
+ I.second.addSamples(TargetSamples - I.second.getSamples());
+ }
+ }
+
// Accumulate all body samples to set total samples.
void updateTotalSamples() {
setTotalSamples(0);
@@ -829,7 +884,7 @@ public:
/// Return the sample count of the first instruction of the function.
/// The function can be either a standalone symbol or an inlined function.
uint64_t getEntrySamples() const {
- if (FunctionSamples::ProfileIsCSFlat && getHeadSamples()) {
+ if (FunctionSamples::ProfileIsCS && getHeadSamples()) {
// For CS profile, if we already have more accurate head samples
// counted by branch sample from caller, use them as entry samples.
return getHeadSamples();
@@ -1046,16 +1101,14 @@ public:
static bool ProfileIsProbeBased;
- static bool ProfileIsCSFlat;
+ static bool ProfileIsCS;
- static bool ProfileIsCSNested;
+ static bool ProfileIsPreInlined;
SampleContext &getContext() const { return Context; }
void setContext(const SampleContext &FContext) { Context = FContext; }
- static SampleProfileFormat Format;
-
/// Whether the profile uses MD5 to represent string.
static bool UseMD5;
diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h
index a2caca246d93..7da336b9f61b 100644
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -227,10 +227,8 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/DiagnosticInfo.h"
-#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/ProfileSummary.h"
#include "llvm/ProfileData/GCOV.h"
@@ -240,7 +238,6 @@
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SymbolRemappingReader.h"
-#include <algorithm>
#include <cstdint>
#include <list>
#include <memory>
@@ -473,11 +470,11 @@ public:
/// Whether input profile is based on pseudo probes.
bool profileIsProbeBased() const { return ProfileIsProbeBased; }
- /// Whether input profile is fully context-sensitive and flat.
- bool profileIsCSFlat() const { return ProfileIsCSFlat; }
+ /// Whether input profile is fully context-sensitive.
+ bool profileIsCS() const { return ProfileIsCS; }
- /// Whether input profile is fully context-sensitive and nested.
- bool profileIsCSNested() const { return ProfileIsCSNested; }
+ /// Whether input profile contains ShouldBeInlined contexts.
+ bool profileIsPreInlined() const { return ProfileIsPreInlined; }
virtual std::unique_ptr<ProfileSymbolList> getProfileSymbolList() {
return nullptr;
@@ -537,10 +534,10 @@ protected:
bool ProfileIsProbeBased = false;
/// Whether function profiles are context-sensitive flat profiles.
- bool ProfileIsCSFlat = false;
+ bool ProfileIsCS = false;
- /// Whether function profiles are context-sensitive nested profiles.
- bool ProfileIsCSNested = false;
+ /// Whether function profile contains ShouldBeInlined contexts.
+ bool ProfileIsPreInlined = false;
/// Number of context-sensitive profiles.
uint32_t CSProfileCount = 0;
diff --git a/llvm/include/llvm/ProfileData/SampleProfWriter.h b/llvm/include/llvm/ProfileData/SampleProfWriter.h
index 42decd255203..aa7f1cbdd7e8 100644
--- a/llvm/include/llvm/ProfileData/SampleProfWriter.h
+++ b/llvm/include/llvm/ProfileData/SampleProfWriter.h
@@ -13,19 +13,15 @@
#define LLVM_PROFILEDATA_SAMPLEPROFWRITER_H
#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/StringSet.h"
#include "llvm/IR/ProfileSummary.h"
#include "llvm/ProfileData/SampleProf.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
#include <cstdint>
#include <memory>
#include <set>
#include <system_error>
-#include <unordered_set>
namespace llvm {
namespace sampleprof {
diff --git a/llvm/include/llvm/Remarks/RemarkSerializer.h b/llvm/include/llvm/Remarks/RemarkSerializer.h
index 6217bd98d1a5..b971173ad2c6 100644
--- a/llvm/include/llvm/Remarks/RemarkSerializer.h
+++ b/llvm/include/llvm/Remarks/RemarkSerializer.h
@@ -13,7 +13,6 @@
#ifndef LLVM_REMARKS_REMARKSERIALIZER_H
#define LLVM_REMARKS_REMARKSERIALIZER_H
-#include "llvm/Remarks/Remark.h"
#include "llvm/Remarks/RemarkFormat.h"
#include "llvm/Remarks/RemarkStringTable.h"
diff --git a/llvm/include/llvm/Support/AArch64TargetParser.def b/llvm/include/llvm/Support/AArch64TargetParser.def
index a953e9439db4..e2f949856d9f 100644
--- a/llvm/include/llvm/Support/AArch64TargetParser.def
+++ b/llvm/include/llvm/Support/AArch64TargetParser.def
@@ -168,10 +168,10 @@ AARCH64_CPU_NAME("cortex-a510", ARMV9A, FK_NEON_FP_ARMV8, false,
AARCH64_CPU_NAME("cortex-a57", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_CRC))
AARCH64_CPU_NAME("cortex-a65", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
- (AArch64::AEK_DOTPROD | AArch64::AEK_FP16 | AArch64::AEK_RAS |
+ (AArch64::AEK_DOTPROD | AArch64::AEK_FP16 |
AArch64::AEK_RCPC | AArch64::AEK_SSBS))
AARCH64_CPU_NAME("cortex-a65ae", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
- (AArch64::AEK_DOTPROD | AArch64::AEK_FP16 | AArch64::AEK_RAS |
+ (AArch64::AEK_DOTPROD | AArch64::AEK_FP16 |
AArch64::AEK_RCPC | AArch64::AEK_SSBS))
AARCH64_CPU_NAME("cortex-a72", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_CRC))
@@ -190,10 +190,11 @@ AARCH64_CPU_NAME("cortex-a77", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
AArch64::AEK_SSBS))
AARCH64_CPU_NAME("cortex-a78", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC |
- AArch64::AEK_SSBS))
+ AArch64::AEK_SSBS | AArch64::AEK_PROFILE))
AARCH64_CPU_NAME("cortex-a78c", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC |
- AArch64::AEK_SSBS))
+ AArch64::AEK_SSBS | AArch64::AEK_PROFILE | AArch64::AEK_FLAGM |
+ AArch64::AEK_PAUTH | AArch64::AEK_FP16FML))
AARCH64_CPU_NAME("cortex-a710", ARMV9A, FK_NEON_FP_ARMV8, false,
(AArch64::AEK_MTE | AArch64::AEK_PAUTH | AArch64::AEK_FLAGM |
AArch64::AEK_SB | AArch64::AEK_I8MM | AArch64::AEK_FP16FML |
@@ -203,35 +204,37 @@ AARCH64_CPU_NAME("cortex-r82", ARMV8R, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_LSE))
AARCH64_CPU_NAME("cortex-x1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC |
- AArch64::AEK_SSBS))
+ AArch64::AEK_SSBS | AArch64::AEK_PROFILE))
AARCH64_CPU_NAME("cortex-x1c", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC |
- AArch64::AEK_SSBS | AArch64::AEK_PAUTH))
+ AArch64::AEK_SSBS | AArch64::AEK_PAUTH | AArch64::AEK_PROFILE))
AARCH64_CPU_NAME("cortex-x2", ARMV9A, FK_NEON_FP_ARMV8, false,
(AArch64::AEK_MTE | AArch64::AEK_BF16 | AArch64::AEK_I8MM |
AArch64::AEK_PAUTH | AArch64::AEK_SSBS | AArch64::AEK_SB |
AArch64::AEK_SVE | AArch64::AEK_SVE2 | AArch64::AEK_SVE2BITPERM |
AArch64::AEK_FP16FML))
AARCH64_CPU_NAME("neoverse-e1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
- (AArch64::AEK_DOTPROD | AArch64::AEK_FP16 | AArch64::AEK_RAS |
+ (AArch64::AEK_DOTPROD | AArch64::AEK_FP16 |
AArch64::AEK_RCPC | AArch64::AEK_SSBS))
AARCH64_CPU_NAME("neoverse-n1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_DOTPROD | AArch64::AEK_FP16 |
- AArch64::AEK_PROFILE | AArch64::AEK_RAS | AArch64::AEK_RCPC |
+ AArch64::AEK_PROFILE | AArch64::AEK_RCPC |
AArch64::AEK_SSBS))
AARCH64_CPU_NAME("neoverse-n2", ARMV8_5A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_BF16 | AArch64::AEK_DOTPROD | AArch64::AEK_FP16 |
- AArch64::AEK_I8MM | AArch64::AEK_MTE | AArch64::AEK_RAS |
- AArch64::AEK_RCPC | AArch64::AEK_SB | AArch64::AEK_SSBS |
+ AArch64::AEK_I8MM | AArch64::AEK_MTE |
+ AArch64::AEK_SB | AArch64::AEK_SSBS |
AArch64::AEK_SVE | AArch64::AEK_SVE2 | AArch64::AEK_SVE2BITPERM))
AARCH64_CPU_NAME("neoverse-512tvb", ARMV8_4A, FK_CRYPTO_NEON_FP_ARMV8, false,
- (AArch64::AEK_RAS | AArch64::AEK_SVE | AArch64::AEK_SSBS |
- AArch64::AEK_RCPC | AArch64::AEK_FP16 | AArch64::AEK_BF16 |
- AArch64::AEK_DOTPROD ))
+ (AArch64::AEK_SVE | AArch64::AEK_SSBS |
+ AArch64::AEK_FP16 | AArch64::AEK_BF16 |
+ AArch64::AEK_DOTPROD | AArch64::AEK_PROFILE |
+ AArch64::AEK_RAND | AArch64::AEK_FP16FML | AArch64::AEK_I8MM))
AARCH64_CPU_NAME("neoverse-v1", ARMV8_4A, FK_CRYPTO_NEON_FP_ARMV8, false,
- (AArch64::AEK_RAS | AArch64::AEK_SVE | AArch64::AEK_SSBS |
- AArch64::AEK_RCPC | AArch64::AEK_FP16 | AArch64::AEK_BF16 |
- AArch64::AEK_DOTPROD ))
+ (AArch64::AEK_SVE | AArch64::AEK_SSBS |
+ AArch64::AEK_FP16 | AArch64::AEK_BF16 |
+ AArch64::AEK_DOTPROD | AArch64::AEK_PROFILE |
+ AArch64::AEK_RAND | AArch64::AEK_FP16FML | AArch64::AEK_I8MM))
AARCH64_CPU_NAME("cyclone", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_NONE))
AARCH64_CPU_NAME("apple-a7", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
@@ -247,11 +250,11 @@ AARCH64_CPU_NAME("apple-a11", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
AARCH64_CPU_NAME("apple-a12", ARMV8_3A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_FP16))
AARCH64_CPU_NAME("apple-a13", ARMV8_4A, FK_CRYPTO_NEON_FP_ARMV8, false,
- (AArch64::AEK_FP16 | AArch64::AEK_FP16FML))
+ (AArch64::AEK_FP16 | AArch64::AEK_FP16FML | AArch64::AEK_SHA3))
AARCH64_CPU_NAME("apple-a14", ARMV8_5A, FK_CRYPTO_NEON_FP_ARMV8, false,
- (AArch64::AEK_FP16 | AArch64::AEK_FP16FML))
+ (AArch64::AEK_FP16 | AArch64::AEK_FP16FML | AArch64::AEK_SHA3))
AARCH64_CPU_NAME("apple-m1", ARMV8_5A, FK_CRYPTO_NEON_FP_ARMV8, false,
- (AArch64::AEK_FP16 | AArch64::AEK_FP16FML))
+ (AArch64::AEK_FP16 | AArch64::AEK_FP16FML | AArch64::AEK_SHA3))
AARCH64_CPU_NAME("apple-s4", ARMV8_3A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_FP16))
AARCH64_CPU_NAME("apple-s5", ARMV8_3A, FK_CRYPTO_NEON_FP_ARMV8, false,
@@ -271,17 +274,15 @@ AARCH64_CPU_NAME("kryo", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
AARCH64_CPU_NAME("thunderx2t99", ARMV8_1A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_NONE))
AARCH64_CPU_NAME("thunderx3t110", ARMV8_3A, FK_CRYPTO_NEON_FP_ARMV8, false,
- (AArch64::AEK_CRC | AEK_CRYPTO | AEK_FP | AEK_SIMD |
- AEK_LSE | AEK_RAND | AArch64::AEK_PROFILE |
- AArch64::AEK_RAS))
+ (AArch64::AEK_NONE))
AARCH64_CPU_NAME("thunderx", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
- (AArch64::AEK_CRC | AArch64::AEK_PROFILE))
+ (AArch64::AEK_CRC))
AARCH64_CPU_NAME("thunderxt88", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
- (AArch64::AEK_CRC | AArch64::AEK_PROFILE))
+ (AArch64::AEK_CRC))
AARCH64_CPU_NAME("thunderxt81", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
- (AArch64::AEK_CRC | AArch64::AEK_PROFILE))
+ (AArch64::AEK_CRC))
AARCH64_CPU_NAME("thunderxt83", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
- (AArch64::AEK_CRC | AArch64::AEK_PROFILE))
+ (AArch64::AEK_CRC))
AARCH64_CPU_NAME("tsv110", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_DOTPROD |
AArch64::AEK_FP16 | AArch64::AEK_FP16FML |
@@ -290,6 +291,8 @@ AARCH64_CPU_NAME("a64fx", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_FP16 | AArch64::AEK_SVE))
AARCH64_CPU_NAME("carmel", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
AArch64::AEK_FP16)
+AARCH64_CPU_NAME("ampere1", ARMV8_6A, FK_CRYPTO_NEON_FP_ARMV8, false,
+ (AArch64::AEK_FP16 | AArch64::AEK_SB | AArch64::AEK_SSBS))
// Invalid CPU
AARCH64_CPU_NAME("invalid", INVALID, FK_INVALID, true, AArch64::AEK_INVALID)
#undef AARCH64_CPU_NAME
diff --git a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
index aec80291f01f..41d144cfd5c4 100644
--- a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
+++ b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
@@ -136,13 +136,17 @@ enum : int32_t {
// Compute program resource register 3 for GFX10+. Must match hardware
// definition.
-#define COMPUTE_PGM_RSRC3_GFX10(NAME, SHIFT, WIDTH) \
- AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_ ## NAME, SHIFT, WIDTH)
+#define COMPUTE_PGM_RSRC3_GFX10_PLUS(NAME, SHIFT, WIDTH) \
+ AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_PLUS_ ## NAME, SHIFT, WIDTH)
enum : int32_t {
- COMPUTE_PGM_RSRC3_GFX10(SHARED_VGPR_COUNT, 0, 4), // GFX10+
- COMPUTE_PGM_RSRC3_GFX10(RESERVED0, 4, 28),
+ COMPUTE_PGM_RSRC3_GFX10_PLUS(SHARED_VGPR_COUNT, 0, 4), // GFX10+
+ COMPUTE_PGM_RSRC3_GFX10_PLUS(INST_PREF_SIZE, 4, 6), // GFX11+
+ COMPUTE_PGM_RSRC3_GFX10_PLUS(TRAP_ON_START, 10, 1), // GFX11+
+ COMPUTE_PGM_RSRC3_GFX10_PLUS(TRAP_ON_END, 11, 1), // GFX11+
+ COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED0, 12, 19),
+ COMPUTE_PGM_RSRC3_GFX10_PLUS(IMAGE_OP, 31, 1), // GFX11+
};
-#undef COMPUTE_PGM_RSRC3_GFX10
+#undef COMPUTE_PGM_RSRC3_GFX10_PLUS
// Kernel code properties. Must be kept backwards compatible.
#define KERNEL_CODE_PROPERTY(NAME, SHIFT, WIDTH) \
diff --git a/llvm/include/llvm/Support/ARMBuildAttributes.h b/llvm/include/llvm/Support/ARMBuildAttributes.h
index b4405e7d4908..35f8992ca932 100644
--- a/llvm/include/llvm/Support/ARMBuildAttributes.h
+++ b/llvm/include/llvm/Support/ARMBuildAttributes.h
@@ -90,25 +90,26 @@ enum AttrType : unsigned {
// Legal Values for CPU_arch, (=6), uleb128
enum CPUArch {
- Pre_v4 = 0,
- v4 = 1, // e.g. SA110
- v4T = 2, // e.g. ARM7TDMI
- v5T = 3, // e.g. ARM9TDMI
- v5TE = 4, // e.g. ARM946E_S
- v5TEJ = 5, // e.g. ARM926EJ_S
- v6 = 6, // e.g. ARM1136J_S
- v6KZ = 7, // e.g. ARM1176JZ_S
- v6T2 = 8, // e.g. ARM1156T2_S
- v6K = 9, // e.g. ARM1176JZ_S
- v7 = 10, // e.g. Cortex A8, Cortex M3
- v6_M = 11, // e.g. Cortex M1
- v6S_M = 12, // v6_M with the System extensions
- v7E_M = 13, // v7_M with DSP extensions
- v8_A = 14, // v8_A AArch32
- v8_R = 15, // e.g. Cortex R52
- v8_M_Base= 16, // v8_M_Base AArch32
- v8_M_Main= 17, // v8_M_Main AArch32
- v8_1_M_Main=21, // v8_1_M_Main AArch32
+ Pre_v4 = 0,
+ v4 = 1, // e.g. SA110
+ v4T = 2, // e.g. ARM7TDMI
+ v5T = 3, // e.g. ARM9TDMI
+ v5TE = 4, // e.g. ARM946E_S
+ v5TEJ = 5, // e.g. ARM926EJ_S
+ v6 = 6, // e.g. ARM1136J_S
+ v6KZ = 7, // e.g. ARM1176JZ_S
+ v6T2 = 8, // e.g. ARM1156T2_S
+ v6K = 9, // e.g. ARM1176JZ_S
+ v7 = 10, // e.g. Cortex A8, Cortex M3
+ v6_M = 11, // e.g. Cortex M1
+ v6S_M = 12, // v6_M with the System extensions
+ v7E_M = 13, // v7_M with DSP extensions
+ v8_A = 14, // v8_A AArch32
+ v8_R = 15, // e.g. Cortex R52
+ v8_M_Base = 16, // v8_M_Base AArch32
+ v8_M_Main = 17, // v8_M_Main AArch32
+ v8_1_M_Main = 21, // v8_1_M_Main AArch32
+ v9_A = 22, // v9_A AArch32
};
enum CPUArchProfile { // (=7), uleb128
diff --git a/llvm/include/llvm/Support/ARMTargetParser.def b/llvm/include/llvm/Support/ARMTargetParser.def
index 80deeb2a6e9d..6a1ac7213dad 100644
--- a/llvm/include/llvm/Support/ARMTargetParser.def
+++ b/llvm/include/llvm/Support/ARMTargetParser.def
@@ -129,22 +129,22 @@ ARM_ARCH("armv8.8-a", ARMV8_8A, "8.8-A", "v8.8a",
ARM::AEK_DOTPROD | ARM::AEK_BF16 | ARM::AEK_SHA2 | ARM::AEK_AES |
ARM::AEK_I8MM))
ARM_ARCH("armv9-a", ARMV9A, "9-A", "v9a",
- ARMBuildAttrs::CPUArch::v8_A, FK_NEON_FP_ARMV8,
+ ARMBuildAttrs::CPUArch::v9_A, FK_NEON_FP_ARMV8,
(ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS |
ARM::AEK_DOTPROD))
ARM_ARCH("armv9.1-a", ARMV9_1A, "9.1-A", "v9.1a",
- ARMBuildAttrs::CPUArch::v8_A, FK_NEON_FP_ARMV8,
+ ARMBuildAttrs::CPUArch::v9_A, FK_NEON_FP_ARMV8,
(ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS |
ARM::AEK_DOTPROD | ARM::AEK_BF16 | ARM::AEK_I8MM))
ARM_ARCH("armv9.2-a", ARMV9_2A, "9.2-A", "v9.2a",
- ARMBuildAttrs::CPUArch::v8_A, FK_NEON_FP_ARMV8,
+ ARMBuildAttrs::CPUArch::v9_A, FK_NEON_FP_ARMV8,
(ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS |
ARM::AEK_DOTPROD | ARM::AEK_BF16 | ARM::AEK_I8MM))
ARM_ARCH("armv9.3-a", ARMV9_3A, "9.3-A", "v9.3a",
- ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8,
+ ARMBuildAttrs::CPUArch::v9_A, FK_CRYPTO_NEON_FP_ARMV8,
(ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS |
ARM::AEK_DOTPROD | ARM::AEK_BF16 | ARM::AEK_I8MM))
diff --git a/llvm/include/llvm/Support/ARMWinEH.h b/llvm/include/llvm/Support/ARMWinEH.h
index 327aa9804849..dee2f31fb127 100644
--- a/llvm/include/llvm/Support/ARMWinEH.h
+++ b/llvm/include/llvm/Support/ARMWinEH.h
@@ -199,13 +199,14 @@ inline bool EpilogueFolding(const RuntimeFunction &RF) {
inline uint16_t StackAdjustment(const RuntimeFunction &RF) {
uint16_t Adjustment = RF.StackAdjust();
if (Adjustment >= 0x3f4)
- return (Adjustment & 0x3) ? ((Adjustment & 0x3) << 2) - 1 : 0;
+ return (Adjustment & 0x3) + 1;
return Adjustment;
}
/// SavedRegisterMask - Utility function to calculate the set of saved general
/// purpose (r0-r15) and VFP (d0-d31) registers.
-std::pair<uint16_t, uint32_t> SavedRegisterMask(const RuntimeFunction &RF);
+std::pair<uint16_t, uint32_t> SavedRegisterMask(const RuntimeFunction &RF,
+ bool Prologue = true);
/// RuntimeFunctionARM64 - An entry in the table of procedure data (.pdata)
///
diff --git a/llvm/include/llvm/Support/Alignment.h b/llvm/include/llvm/Support/Alignment.h
index 1176c026ba99..1543a5713d73 100644
--- a/llvm/include/llvm/Support/Alignment.h
+++ b/llvm/include/llvm/Support/Alignment.h
@@ -84,6 +84,14 @@ public:
/// Needed to interact with C for instance.
uint64_t value() const { return uint64_t(1) << ShiftValue; }
+ // Returns the previous alignment.
+ Align previous() const {
+ assert(ShiftValue != 0 && "Undefined operation");
+ Align Out;
+ Out.ShiftValue = ShiftValue - 1;
+ return Out;
+ }
+
/// Allow constructions of constexpr Align.
template <size_t kValue> constexpr static LogValue Constant() {
return LogValue{static_cast<uint8_t>(CTLog2<kValue>())};
@@ -131,7 +139,7 @@ public:
}
/// For convenience, returns a valid alignment or 1 if undefined.
- Align valueOrOne() const { return hasValue() ? getValue() : Align(); }
+ Align valueOrOne() const { return value_or(Align()); }
};
/// Checks that SizeInBytes is a multiple of the alignment.
@@ -173,13 +181,7 @@ inline uint64_t alignTo(uint64_t Size, Align A) {
inline uint64_t alignTo(uint64_t Size, Align A, uint64_t Skew) {
const uint64_t Value = A.value();
Skew %= Value;
- return ((Size + Value - 1 - Skew) & ~(Value - 1U)) + Skew;
-}
-
-/// Returns a multiple of A needed to store `Size` bytes.
-/// Returns `Size` if current alignment is undefined.
-inline uint64_t alignTo(uint64_t Size, MaybeAlign A) {
- return A ? alignTo(Size, A.getValue()) : Size;
+ return alignTo(Size - Skew, A) + Skew;
}
/// Aligns `Addr` to `Alignment` bytes, rounding up.
@@ -208,26 +210,10 @@ inline unsigned Log2(Align A) { return A.ShiftValue; }
/// Returns the alignment that satisfies both alignments.
/// Same semantic as MinAlign.
-inline Align commonAlignment(Align A, Align B) { return std::min(A, B); }
-
-/// Returns the alignment that satisfies both alignments.
-/// Same semantic as MinAlign.
inline Align commonAlignment(Align A, uint64_t Offset) {
return Align(MinAlign(A.value(), Offset));
}
-/// Returns the alignment that satisfies both alignments.
-/// Same semantic as MinAlign.
-inline MaybeAlign commonAlignment(MaybeAlign A, MaybeAlign B) {
- return A && B ? commonAlignment(*A, *B) : A ? A : B;
-}
-
-/// Returns the alignment that satisfies both alignments.
-/// Same semantic as MinAlign.
-inline MaybeAlign commonAlignment(MaybeAlign A, uint64_t Offset) {
- return MaybeAlign(MinAlign((*A).value(), Offset));
-}
-
/// Returns a representation of the alignment that encodes undefined as 0.
inline unsigned encode(MaybeAlign A) { return A ? A->ShiftValue + 1 : 0; }
@@ -270,14 +256,6 @@ inline bool operator>(Align Lhs, uint64_t Rhs) {
return Lhs.value() > Rhs;
}
-/// Comparisons between MaybeAlign and scalars.
-inline bool operator==(MaybeAlign Lhs, uint64_t Rhs) {
- return Lhs ? (*Lhs).value() == Rhs : Rhs == 0;
-}
-inline bool operator!=(MaybeAlign Lhs, uint64_t Rhs) {
- return Lhs ? (*Lhs).value() != Rhs : Rhs != 0;
-}
-
/// Comparisons operators between Align.
inline bool operator==(Align Lhs, Align Rhs) {
return Lhs.ShiftValue == Rhs.ShiftValue;
@@ -314,37 +292,6 @@ bool operator>=(MaybeAlign Lhs, MaybeAlign Rhs) = delete;
bool operator<(MaybeAlign Lhs, MaybeAlign Rhs) = delete;
bool operator>(MaybeAlign Lhs, MaybeAlign Rhs) = delete;
-inline Align operator*(Align Lhs, uint64_t Rhs) {
- assert(Rhs > 0 && "Rhs must be positive");
- return Align(Lhs.value() * Rhs);
-}
-
-inline MaybeAlign operator*(MaybeAlign Lhs, uint64_t Rhs) {
- assert(Rhs > 0 && "Rhs must be positive");
- return Lhs ? Lhs.getValue() * Rhs : MaybeAlign();
-}
-
-inline Align operator/(Align Lhs, uint64_t Divisor) {
- assert(llvm::isPowerOf2_64(Divisor) &&
- "Divisor must be positive and a power of 2");
- assert(Lhs != 1 && "Can't halve byte alignment");
- return Align(Lhs.value() / Divisor);
-}
-
-inline MaybeAlign operator/(MaybeAlign Lhs, uint64_t Divisor) {
- assert(llvm::isPowerOf2_64(Divisor) &&
- "Divisor must be positive and a power of 2");
- return Lhs ? Lhs.getValue() / Divisor : MaybeAlign();
-}
-
-inline Align max(MaybeAlign Lhs, Align Rhs) {
- return Lhs && *Lhs > Rhs ? *Lhs : Rhs;
-}
-
-inline Align max(Align Lhs, MaybeAlign Rhs) {
- return Rhs && *Rhs > Lhs ? *Rhs : Lhs;
-}
-
#ifndef NDEBUG
// For usage in LLVM_DEBUG macros.
inline std::string DebugStr(const Align &A) {
diff --git a/llvm/include/llvm/Support/Allocator.h b/llvm/include/llvm/Support/Allocator.h
index ec5ed06b7fa4..5ca0c9decac3 100644
--- a/llvm/include/llvm/Support/Allocator.h
+++ b/llvm/include/llvm/Support/Allocator.h
@@ -140,6 +140,9 @@ public:
// This method is *not* marked noalias, because
// SpecificBumpPtrAllocator::DestroyAll() loops over all allocations, and
// that loop is not based on the Allocate() return value.
+ //
+ // Allocate(0, N) is valid, it returns a non-null pointer (which should not
+ // be dereferenced).
LLVM_ATTRIBUTE_RETURNS_NONNULL void *Allocate(size_t Size, Align Alignment) {
// Keep track of how many bytes we've allocated.
BytesAllocated += Size;
@@ -154,7 +157,9 @@ public:
#endif
// Check if we have enough space.
- if (Adjustment + SizeToAllocate <= size_t(End - CurPtr)) {
+ if (Adjustment + SizeToAllocate <= size_t(End - CurPtr)
+ // We can't return nullptr even for a zero-sized allocation!
+ && CurPtr != nullptr) {
char *AlignedPtr = CurPtr + Adjustment;
CurPtr = AlignedPtr + SizeToAllocate;
// Update the allocation point of this memory block in MemorySanitizer.
diff --git a/llvm/include/llvm/Support/BLAKE3.h b/llvm/include/llvm/Support/BLAKE3.h
new file mode 100644
index 000000000000..7b30dbccd173
--- /dev/null
+++ b/llvm/include/llvm/Support/BLAKE3.h
@@ -0,0 +1,124 @@
+//==- BLAKE3.h - BLAKE3 C++ wrapper for LLVM ---------------------*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a C++ wrapper of the BLAKE3 C interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_BLAKE3_H
+#define LLVM_SUPPORT_BLAKE3_H
+
+#include "llvm-c/blake3.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+
+namespace llvm {
+
+/// The constant \p LLVM_BLAKE3_OUT_LEN provides the default output length,
+/// 32 bytes, which is recommended for most callers.
+///
+/// Outputs shorter than the default length of 32 bytes (256 bits) provide
+/// less security. An N-bit BLAKE3 output is intended to provide N bits of
+/// first and second preimage resistance and N/2 bits of collision
+/// resistance, for any N up to 256. Longer outputs don't provide any
+/// additional security.
+///
+/// Shorter BLAKE3 outputs are prefixes of longer ones. Explicitly
+/// requesting a short output is equivalent to truncating the default-length
+/// output.
+template <size_t NumBytes = LLVM_BLAKE3_OUT_LEN>
+using BLAKE3Result = std::array<uint8_t, NumBytes>;
+
+/// A class that wraps the BLAKE3 algorithm.
+class BLAKE3 {
+public:
+ BLAKE3() { init(); }
+
+ /// Reinitialize the internal state
+ void init() { llvm_blake3_hasher_init(&Hasher); }
+
+ /// Digest more data.
+ void update(ArrayRef<uint8_t> Data) {
+ llvm_blake3_hasher_update(&Hasher, Data.data(), Data.size());
+ }
+
+ /// Digest more data.
+ void update(StringRef Str) {
+ llvm_blake3_hasher_update(&Hasher, Str.data(), Str.size());
+ }
+
+ /// Finalize the hasher and put the result in \p Result.
+ /// This doesn't modify the hasher itself, and it's possible to finalize again
+ /// after adding more input.
+ template <size_t NumBytes = LLVM_BLAKE3_OUT_LEN>
+ void final(BLAKE3Result<NumBytes> &Result) {
+ llvm_blake3_hasher_finalize(&Hasher, Result.data(), Result.size());
+ }
+
+ /// Finalize the hasher and return an output of any length, given in bytes.
+ /// This doesn't modify the hasher itself, and it's possible to finalize again
+ /// after adding more input.
+ template <size_t NumBytes = LLVM_BLAKE3_OUT_LEN>
+ BLAKE3Result<NumBytes> final() {
+ BLAKE3Result<NumBytes> Result;
+ llvm_blake3_hasher_finalize(&Hasher, Result.data(), Result.size());
+ return Result;
+ }
+
+ /// Return the current output for the digested data since the last call to
+ /// init().
+ ///
+ /// Other hash functions distinguish between \p result() and \p final(), with
+ /// \p result() allowing more calls into \p update(), but there's no
+ // difference for the BLAKE3 hash function.
+ template <size_t NumBytes = LLVM_BLAKE3_OUT_LEN>
+ BLAKE3Result<NumBytes> result() {
+ return final<NumBytes>();
+ }
+
+ /// Returns a BLAKE3 hash for the given data.
+ template <size_t NumBytes = LLVM_BLAKE3_OUT_LEN>
+ static BLAKE3Result<NumBytes> hash(ArrayRef<uint8_t> Data) {
+ BLAKE3 Hasher;
+ Hasher.update(Data);
+ return Hasher.final<NumBytes>();
+ }
+
+private:
+ llvm_blake3_hasher Hasher;
+};
+
+/// Like \p BLAKE3 but using a class-level template parameter for specifying the
+/// hash size of the \p final() and \p result() functions.
+///
+/// This is useful for using BLAKE3 as the hasher type for \p HashBuilder with
+/// non-default hash sizes.
+template <size_t NumBytes> class TruncatedBLAKE3 : public BLAKE3 {
+public:
+ /// Finalize the hasher and put the result in \p Result.
+ /// This doesn't modify the hasher itself, and it's possible to finalize again
+ /// after adding more input.
+ void final(BLAKE3Result<NumBytes> &Result) { return BLAKE3::final(Result); }
+
+ /// Finalize the hasher and return an output of any length, given in bytes.
+ /// This doesn't modify the hasher itself, and it's possible to finalize again
+ /// after adding more input.
+ BLAKE3Result<NumBytes> final() { return BLAKE3::final<NumBytes>(); }
+
+ /// Return the current output for the digested data since the last call to
+ /// init().
+ ///
+ /// Other hash functions distinguish between \p result() and \p final(), with
+ /// \p result() allowing more calls into \p update(), but there's no
+ // difference for the BLAKE3 hash function.
+ BLAKE3Result<NumBytes> result() { return BLAKE3::result<NumBytes>(); }
+};
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/Support/Base64.h b/llvm/include/llvm/Support/Base64.h
index 62064a35aa34..da4ae1688574 100644
--- a/llvm/include/llvm/Support/Base64.h
+++ b/llvm/include/llvm/Support/Base64.h
@@ -13,6 +13,7 @@
#ifndef LLVM_SUPPORT_BASE64_H
#define LLVM_SUPPORT_BASE64_H
+#include <cstdint>
#include <string>
namespace llvm {
diff --git a/llvm/include/llvm/Support/BinaryStreamArray.h b/llvm/include/llvm/Support/BinaryStreamArray.h
index c3e0db4dcff0..ef2233c53ec2 100644
--- a/llvm/include/llvm/Support/BinaryStreamArray.h
+++ b/llvm/include/llvm/Support/BinaryStreamArray.h
@@ -111,6 +111,8 @@ public:
bool valid() const { return Stream.valid(); }
+ bool isOffsetValid(uint32_t Offset) const { return at(Offset) != end(); }
+
uint32_t skew() const { return Skew; }
Iterator end() const { return Iterator(E); }
diff --git a/llvm/include/llvm/Support/BinaryStreamRef.h b/llvm/include/llvm/Support/BinaryStreamRef.h
index bc8c6a496ecf..46fc9fb293df 100644
--- a/llvm/include/llvm/Support/BinaryStreamRef.h
+++ b/llvm/include/llvm/Support/BinaryStreamRef.h
@@ -48,7 +48,7 @@ public:
}
uint64_t getLength() const {
- if (Length.hasValue())
+ if (Length)
return *Length;
return BorrowedImpl ? (BorrowedImpl->getLength() - ViewOffset) : 0;
@@ -67,7 +67,7 @@ public:
return Result;
Result.ViewOffset += N;
- if (Result.Length.hasValue())
+ if (Result.Length)
*Result.Length -= N;
return Result;
}
@@ -87,7 +87,7 @@ public:
// Since we're dropping non-zero bytes from the end, stop length-tracking
// by setting the length of the resulting StreamRef to an explicit value.
- if (!Result.Length.hasValue())
+ if (!Result.Length)
Result.Length = getLength();
*Result.Length -= N;
diff --git a/llvm/include/llvm/Support/BranchProbability.h b/llvm/include/llvm/Support/BranchProbability.h
index 6f071c15421f..79d70cf611d4 100644
--- a/llvm/include/llvm/Support/BranchProbability.h
+++ b/llvm/include/llvm/Support/BranchProbability.h
@@ -16,6 +16,7 @@
#include "llvm/Support/DataTypes.h"
#include <algorithm>
#include <cassert>
+#include <iterator>
#include <numeric>
namespace llvm {
diff --git a/llvm/include/llvm/Support/CSKYAttributeParser.h b/llvm/include/llvm/Support/CSKYAttributeParser.h
new file mode 100644
index 000000000000..e926ebe5e306
--- /dev/null
+++ b/llvm/include/llvm/Support/CSKYAttributeParser.h
@@ -0,0 +1,43 @@
+//===---- CSKYAttributeParser.h - CSKY Attribute Parser ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_CSKYATTRIBUTEPARSER_H
+#define LLVM_SUPPORT_CSKYATTRIBUTEPARSER_H
+
+#include "llvm/Support/CSKYAttributes.h"
+#include "llvm/Support/ELFAttributeParser.h"
+
+namespace llvm {
+class CSKYAttributeParser : public ELFAttributeParser {
+ struct DisplayHandler {
+ CSKYAttrs::AttrType attribute;
+ Error (CSKYAttributeParser::*routine)(unsigned);
+ };
+ static const DisplayHandler displayRoutines[];
+
+ Error dspVersion(unsigned tag);
+ Error vdspVersion(unsigned tag);
+ Error fpuVersion(unsigned tag);
+ Error fpuABI(unsigned tag);
+ Error fpuRounding(unsigned tag);
+ Error fpuDenormal(unsigned tag);
+ Error fpuException(unsigned tag);
+ Error fpuHardFP(unsigned tag);
+
+ Error handler(uint64_t tag, bool &handled) override;
+
+public:
+ CSKYAttributeParser(ScopedPrinter *sw)
+ : ELFAttributeParser(sw, CSKYAttrs::getCSKYAttributeTags(), "csky") {}
+ CSKYAttributeParser()
+ : ELFAttributeParser(CSKYAttrs::getCSKYAttributeTags(), "csky") {}
+};
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/Support/CSKYAttributes.h b/llvm/include/llvm/Support/CSKYAttributes.h
new file mode 100644
index 000000000000..723f2ceee8fb
--- /dev/null
+++ b/llvm/include/llvm/Support/CSKYAttributes.h
@@ -0,0 +1,95 @@
+//===---- CSKYAttributes.h - CSKY Attributes --------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains enumerations for CSKY attributes.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_SUPPORT_CSKYATTRIBUTES_H
+#define LLVM_SUPPORT_CSKYATTRIBUTES_H
+
+#include "llvm/Support/ELFAttributes.h"
+
+namespace llvm {
+namespace CSKYAttrs {
+
+const TagNameMap &getCSKYAttributeTags();
+
+enum AttrType {
+ CSKY_ARCH_NAME = 4,
+ CSKY_CPU_NAME = 5,
+ CSKY_ISA_FLAGS = 6,
+ CSKY_ISA_EXT_FLAGS = 7,
+ CSKY_DSP_VERSION = 8,
+ CSKY_VDSP_VERSION = 9,
+ CSKY_FPU_VERSION = 16,
+ CSKY_FPU_ABI = 17,
+ CSKY_FPU_ROUNDING = 18,
+ CSKY_FPU_DENORMAL = 19,
+ CSKY_FPU_EXCEPTION = 20,
+ CSKY_FPU_NUMBER_MODULE = 21,
+ CSKY_FPU_HARDFP = 22
+};
+
+enum ISA_FLAGS {
+ V2_ISA_E1 = 1 << 1,
+ V2_ISA_1E2 = 1 << 2,
+ V2_ISA_2E3 = 1 << 3,
+ V2_ISA_3E7 = 1 << 4,
+ V2_ISA_7E10 = 1 << 5,
+ V2_ISA_3E3R1 = 1 << 6,
+ V2_ISA_3E3R2 = 1 << 7,
+ V2_ISA_10E60 = 1 << 8,
+ V2_ISA_3E3R3 = 1 << 9,
+ ISA_TRUST = 1 << 11,
+ ISA_CACHE = 1 << 12,
+ ISA_NVIC = 1 << 13,
+ ISA_CP = 1 << 14,
+ ISA_MP = 1 << 15,
+ ISA_MP_1E2 = 1 << 16,
+ ISA_JAVA = 1 << 17,
+ ISA_MAC = 1 << 18,
+ ISA_MAC_DSP = 1 << 19,
+ ISA_DSP = 1 << 20,
+ ISA_DSP_1E2 = 1 << 21,
+ ISA_DSP_ENHANCE = 1 << 22,
+ ISA_DSP_SILAN = 1 << 23,
+ ISA_VDSP = 1 << 24,
+ ISA_VDSP_2 = 1 << 25,
+ ISA_VDSP_2E3 = 1 << 26,
+ V2_ISA_DSPE60 = 1 << 27,
+ ISA_VDSP_2E60F = 1 << 28
+};
+
+enum ISA_EXT_FLAGS {
+ ISA_FLOAT_E1 = 1 << 0,
+ ISA_FLOAT_1E2 = 1 << 1,
+ ISA_FLOAT_1E3 = 1 << 2,
+ ISA_FLOAT_3E4 = 1 << 3,
+ ISA_FLOAT_7E60 = 1 << 4
+};
+
+enum { NONE = 0, NEEDED = 1 };
+
+enum DSP_VERSION { DSP_VERSION_EXTENSION = 1, DSP_VERSION_2 = 2 };
+
+enum VDSP_VERSION { VDSP_VERSION_1 = 1, VDSP_VERSION_2 = 2 };
+
+enum FPU_VERSION { FPU_VERSION_1 = 1, FPU_VERSION_2 = 2, FPU_VERSION_3 = 3 };
+
+enum FPU_ABI { FPU_ABI_SOFT = 1, FPU_ABI_SOFTFP = 2, FPU_ABI_HARD = 3 };
+
+enum FPU_HARDFP {
+ FPU_HARDFP_HALF = 1,
+ FPU_HARDFP_SINGLE = 2,
+ FPU_HARDFP_DOUBLE = 4
+};
+
+} // namespace CSKYAttrs
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/Support/CSKYTargetParser.def b/llvm/include/llvm/Support/CSKYTargetParser.def
new file mode 100644
index 000000000000..c93d6fdf8cce
--- /dev/null
+++ b/llvm/include/llvm/Support/CSKYTargetParser.def
@@ -0,0 +1,524 @@
+//===- CSKYTargetParser.def - CSKY target parsing defines -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides defines to build up the CSKY target parser's logic.
+//
+//===----------------------------------------------------------------------===//
+
+// NOTE: NO INCLUDE GUARD DESIRED!
+
+#ifndef CSKY_FPU
+#define CSKY_FPU(NAME, KIND, VERSION)
+#endif
+CSKY_FPU("invalid", FK_INVALID, FPUVersion::NONE)
+CSKY_FPU("auto", FK_AUTO, FPUVersion::FPV2)
+CSKY_FPU("fpv2", FK_FPV2, FPUVersion::FPV2)
+CSKY_FPU("fpv2_divd", FK_FPV2_DIVD, FPUVersion::FPV2)
+CSKY_FPU("fpv2_sf", FK_FPV2_SF, FPUVersion::FPV2)
+CSKY_FPU("fpv3", FK_FPV3, FPUVersion::FPV3)
+CSKY_FPU("fpv3_hf", FK_FPV3_HF, FPUVersion::FPV3)
+CSKY_FPU("fpv3_hsf", FK_FPV3_HSF, FPUVersion::FPV3)
+CSKY_FPU("fpv3_sdf", FK_FPV3_SDF, FPUVersion::FPV3)
+
+#undef CSKY_FPU
+
+#ifndef CSKY_ARCH
+#define CSKY_ARCH(NAME, ID, ARCH_BASE_EXT)
+#endif
+CSKY_ARCH("invalid", INVALID, CSKY::AEK_INVALID)
+CSKY_ARCH("ck801", CK801, CSKY::MAEK_E1 | CSKY::AEK_TRUST)
+CSKY_ARCH("ck802", CK802, CSKY::MAEK_E2 | CSKY::AEK_TRUST | CSKY::AEK_NVIC)
+CSKY_ARCH("ck803", CK803,
+ CSKY::MAEK_2E3 | CSKY::AEK_MP | CSKY::AEK_TRUST | CSKY::AEK_NVIC |
+ CSKY::AEK_HWDIV)
+CSKY_ARCH("ck803s", CK803S,
+ CSKY::MAEK_2E3 | CSKY::AEK_MP | CSKY::AEK_TRUST | CSKY::AEK_NVIC |
+ CSKY::AEK_HWDIV)
+CSKY_ARCH("ck804", CK804,
+ CSKY::MAEK_2E3 | CSKY::AEK_MP | CSKY::AEK_TRUST | CSKY::AEK_NVIC |
+ CSKY::AEK_HWDIV | CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3)
+CSKY_ARCH("ck805", CK805,
+ CSKY::MAEK_2E3 | CSKY::AEK_MP | CSKY::AEK_TRUST | CSKY::AEK_NVIC |
+ CSKY::AEK_HWDIV | CSKY::AEK_HIGHREG | CSKY::MAEK_3E3R2 |
+ CSKY::AEK_3E3R3 | CSKY::AEK_VDSPV2 | CSKY::AEK_VDSP2E3)
+CSKY_ARCH("ck807", CK807,
+ CSKY::MAEK_3E7 | CSKY::MAEK_MP | CSKY::MAEK_MP1E2 | CSKY::AEK_TRUST |
+ CSKY::AEK_HWDIV | CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 |
+ CSKY::AEK_DSPE60 | CSKY::AEK_HIGHREG | CSKY::AEK_HARDTP |
+ CSKY::AEK_NVIC | CSKY::AEK_CACHE)
+CSKY_ARCH("ck810", CK810,
+ CSKY::MAEK_7E10 | CSKY::MAEK_MP | CSKY::MAEK_MP1E2 | CSKY::AEK_TRUST |
+ CSKY::AEK_HWDIV | CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 |
+ CSKY::AEK_DSPE60 | CSKY::AEK_HIGHREG | CSKY::AEK_HARDTP |
+ CSKY::AEK_NVIC | CSKY::AEK_CACHE)
+CSKY_ARCH("ck810v", CK810V,
+ CSKY::MAEK_7E10 | CSKY::MAEK_MP | CSKY::MAEK_MP1E2 | CSKY::AEK_TRUST |
+ CSKY::AEK_HWDIV | CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 |
+ CSKY::AEK_DSPE60 | CSKY::AEK_HIGHREG | CSKY::AEK_HARDTP |
+ CSKY::AEK_NVIC | CSKY::AEK_CACHE | CSKY::AEK_VDSPV1)
+CSKY_ARCH("ck860", CK860,
+ CSKY::MAEK_10E60 | CSKY::MAEK_MP | CSKY::MAEK_MP1E2 |
+ CSKY::AEK_TRUST | CSKY::AEK_HWDIV | CSKY::AEK_DSPE60 |
+ CSKY::AEK_HIGHREG | CSKY::AEK_HARDTP | CSKY::AEK_NVIC |
+ CSKY::AEK_CACHE | CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3)
+CSKY_ARCH("ck860v", CK860V,
+ CSKY::MAEK_10E60 | CSKY::MAEK_MP | CSKY::MAEK_MP1E2 |
+ CSKY::AEK_TRUST | CSKY::AEK_HWDIV | CSKY::AEK_DSPE60 |
+ CSKY::AEK_HIGHREG | CSKY::AEK_HARDTP | CSKY::AEK_NVIC |
+ CSKY::AEK_CACHE | CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 |
+ CSKY::AEK_VDSPV2 | CSKY::AEK_VDSP2E60F)
+#undef CSKY_ARCH
+
+#ifndef CSKY_ARCH_EXT_NAME
+#define CSKY_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE)
+#endif
+CSKY_ARCH_EXT_NAME("invalid", CSKY::AEK_INVALID, nullptr, nullptr)
+CSKY_ARCH_EXT_NAME("none", CSKY::AEK_NONE, nullptr, nullptr)
+CSKY_ARCH_EXT_NAME("fpuv2_sf", CSKY::AEK_FPUV2SF, "+fpuv2_sf", "-fpuv2_sf")
+CSKY_ARCH_EXT_NAME("fpuv2_df", CSKY::AEK_FPUV2DF, "+fpuv2_df", "-fpuv2_df")
+CSKY_ARCH_EXT_NAME("fdivdu", CSKY::AEK_FDIVDU, "+fdivdu", "-fdivdu")
+CSKY_ARCH_EXT_NAME("fpuv3_hi", CSKY::AEK_FPUV3HI, "+fpuv3_hi", "-fpuv3_hi")
+CSKY_ARCH_EXT_NAME("fpuv3_hf", CSKY::AEK_FPUV3HF, "+fpuv3_hf", "-fpuv3_hf")
+CSKY_ARCH_EXT_NAME("fpuv3_sf", CSKY::AEK_FPUV3SF, "+fpuv3_sf", "-fpuv3_sf")
+CSKY_ARCH_EXT_NAME("fpuv3_df", CSKY::AEK_FPUV3DF, "+fpuv3_df", "-fpuv3_df")
+CSKY_ARCH_EXT_NAME("floate1", CSKY::AEK_FLOATE1, "+floate1", "-floate1")
+CSKY_ARCH_EXT_NAME("float1e2", CSKY::AEK_FLOAT1E2, "+float1e2", "-float1e2")
+CSKY_ARCH_EXT_NAME("float1e3", CSKY::AEK_FLOAT1E3, "+float1e3", "-float1e3")
+CSKY_ARCH_EXT_NAME("float3e4", CSKY::AEK_FLOAT3E4, "+float3e4", "-float3e4")
+CSKY_ARCH_EXT_NAME("float7e60", CSKY::AEK_FLOAT7E60, "+float7e60", "-float7e60")
+CSKY_ARCH_EXT_NAME("hwdiv", CSKY::AEK_HWDIV, "+hwdiv", "-hwdiv")
+CSKY_ARCH_EXT_NAME("multiple_stld", CSKY::AEK_STLD, "+multiple_stld",
+ "-multiple_stld")
+CSKY_ARCH_EXT_NAME("pushpop", CSKY::AEK_PUSHPOP, "+pushpop", "-pushpop")
+CSKY_ARCH_EXT_NAME("edsp", CSKY::AEK_EDSP, "+edsp", "-edsp")
+CSKY_ARCH_EXT_NAME("dsp1e2", CSKY::AEK_DSP1E2, "+dsp1e2", "-dsp1e2")
+CSKY_ARCH_EXT_NAME("dspe60", CSKY::AEK_DSPE60, "+dspe60", "-dspe60")
+CSKY_ARCH_EXT_NAME("dspv2", CSKY::AEK_DSPV2, "+dspv2", "-dspv2")
+CSKY_ARCH_EXT_NAME("dsp_silan", CSKY::AEK_DSPSILAN, "+dsp_silan", "-dsp_silan")
+CSKY_ARCH_EXT_NAME("elrw", CSKY::AEK_ELRW, "+elrw", "-elrw")
+CSKY_ARCH_EXT_NAME("trust", CSKY::AEK_TRUST, "+trust", "-trust")
+CSKY_ARCH_EXT_NAME("java", CSKY::AEK_JAVA, "+java", "-java")
+CSKY_ARCH_EXT_NAME("cache", CSKY::AEK_CACHE, "+cache", "-cache")
+CSKY_ARCH_EXT_NAME("nvic", CSKY::AEK_NVIC, "+nvic", "-nvic")
+CSKY_ARCH_EXT_NAME("doloop", CSKY::AEK_DOLOOP, "+doloop", "-doloop")
+CSKY_ARCH_EXT_NAME("high-registers", CSKY::AEK_HIGHREG, "+high-registers",
+ "-high-registers")
+CSKY_ARCH_EXT_NAME("smart", CSKY::AEK_SMART, "+smart", "-smart")
+CSKY_ARCH_EXT_NAME("vdsp2e3", CSKY::AEK_VDSP2E3, "+vdsp2e3", "-vdsp2e3")
+CSKY_ARCH_EXT_NAME("vdsp2e60f", CSKY::AEK_VDSP2E60F, "+vdsp2e60f", "-vdsp2e60f")
+CSKY_ARCH_EXT_NAME("vdspv2", CSKY::AEK_VDSPV2, "+vdspv2", "-vdspv2")
+CSKY_ARCH_EXT_NAME("hard-tp", CSKY::AEK_HARDTP, "+hard-tp", "-hard-tp")
+CSKY_ARCH_EXT_NAME("soft-tp", CSKY::AEK_SOFTTP, "+soft-tp", "-soft-tp")
+CSKY_ARCH_EXT_NAME("istack", CSKY::AEK_ISTACK, "+istack", "-istack")
+CSKY_ARCH_EXT_NAME("constpool", CSKY::AEK_CONSTPOOL, "+constpool", "-constpool")
+CSKY_ARCH_EXT_NAME("stack-size", CSKY::AEK_STACKSIZE, "+stack-size",
+ "-stack-size")
+CSKY_ARCH_EXT_NAME("ccrt", CSKY::AEK_CCRT, "+ccrt", "-ccrt")
+CSKY_ARCH_EXT_NAME("vdspv1", CSKY::AEK_VDSPV1, "+vdspv1", "-vdspv1")
+
+CSKY_ARCH_EXT_NAME("e1", CSKY::AEK_E1, "+e1", "-e1")
+CSKY_ARCH_EXT_NAME("e2", CSKY::AEK_E2, "+e2", "-e2")
+CSKY_ARCH_EXT_NAME("2e3", CSKY::AEK_2E3, "+2e3", "-2e3")
+CSKY_ARCH_EXT_NAME("mp", CSKY::AEK_MP, "+mp", "-mp")
+CSKY_ARCH_EXT_NAME("3e3r1", CSKY::AEK_3E3R1, "+3e3r1", "-3e3r1")
+CSKY_ARCH_EXT_NAME("3e3r2", CSKY::AEK_3E3R2, "+3e3r2", "-3e3r2")
+CSKY_ARCH_EXT_NAME("3e3r3", CSKY::AEK_3E3R3, "+3e3r3", "-3e3r3")
+CSKY_ARCH_EXT_NAME("3e7", CSKY::AEK_3E7, "+3e7", "-3e7")
+CSKY_ARCH_EXT_NAME("mp1e2", CSKY::AEK_MP1E2, "+mp1e2", "-mp1e2")
+CSKY_ARCH_EXT_NAME("7e10", CSKY::AEK_7E10, "+7e10", "-7e10")
+CSKY_ARCH_EXT_NAME("10e60", CSKY::AEK_10E60, "+10e60", "-10e60")
+
+#undef CSKY_ARCH_EXT_NAME
+
+#ifndef CSKY_CPU_NAME
+#define CSKY_CPU_NAME(NAME, ARCH_ID, DEFAULT_EXT)
+#endif
+
+CSKY_CPU_NAME("ck801", CK801, CSKY::AEK_NONE)
+CSKY_CPU_NAME("ck801t", CK801, CSKY::AEK_NONE)
+CSKY_CPU_NAME("e801", CK801, CSKY::AEK_NONE)
+
+CSKY_CPU_NAME("ck802", CK802, CSKY::AEK_NONE)
+CSKY_CPU_NAME("ck802t", CK802, CSKY::AEK_NONE)
+CSKY_CPU_NAME("ck802j", CK802, CSKY::AEK_JAVA)
+CSKY_CPU_NAME("e802", CK802, CSKY::AEK_NONE)
+CSKY_CPU_NAME("e802t", CK802, CSKY::AEK_NONE)
+CSKY_CPU_NAME("s802", CK802, CSKY::AEK_NONE)
+CSKY_CPU_NAME("s802t", CK802, CSKY::AEK_NONE)
+
+CSKY_CPU_NAME("ck803", CK803, CSKY::AEK_NONE)
+CSKY_CPU_NAME("ck803h", CK803, CSKY::AEK_NONE)
+CSKY_CPU_NAME("ck803t", CK803, CSKY::AEK_NONE)
+CSKY_CPU_NAME("ck803ht", CK803, CSKY::AEK_NONE)
+CSKY_CPU_NAME("ck803f", CK803,
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3)
+CSKY_CPU_NAME("ck803fh", CK803,
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3)
+CSKY_CPU_NAME("ck803e", CK803,
+ CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60)
+CSKY_CPU_NAME("ck803eh", CK803,
+ CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60)
+CSKY_CPU_NAME("ck803et", CK803,
+ CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60)
+CSKY_CPU_NAME("ck803eht", CK803,
+ CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60)
+CSKY_CPU_NAME("ck803ef", CK803,
+ CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 |
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3)
+CSKY_CPU_NAME("ck803efh", CK803,
+ CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 |
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3)
+CSKY_CPU_NAME("ck803ft", CK803,
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3)
+CSKY_CPU_NAME("ck803eft", CK803,
+ CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 |
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3)
+CSKY_CPU_NAME("ck803efht", CK803,
+ CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 |
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3)
+CSKY_CPU_NAME("ck803r1", CK803,
+ CSKY::MAEK_3E3R1 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2)
+CSKY_CPU_NAME("ck803r2", CK803,
+ CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2)
+CSKY_CPU_NAME("ck803r3", CK803,
+ CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2)
+CSKY_CPU_NAME("ck803hr1", CK803,
+ CSKY::MAEK_3E3R1 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2)
+CSKY_CPU_NAME("ck803hr2", CK803,
+ CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2)
+CSKY_CPU_NAME("ck803hr3", CK803,
+ CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2)
+CSKY_CPU_NAME("ck803tr1", CK803,
+ CSKY::MAEK_3E3R1 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2)
+CSKY_CPU_NAME("ck803tr2", CK803,
+ CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2)
+CSKY_CPU_NAME("ck803tr3", CK803,
+ CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2)
+CSKY_CPU_NAME("ck803htr1", CK803,
+ CSKY::MAEK_3E3R1 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2)
+CSKY_CPU_NAME("ck803htr2", CK803,
+ CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2)
+CSKY_CPU_NAME("ck803htr3", CK803,
+ CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2)
+CSKY_CPU_NAME("ck803fr1", CK803,
+ CSKY::MAEK_3E3R1 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 |
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3)
+CSKY_CPU_NAME("ck803fr2", CK803,
+ CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 |
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3)
+CSKY_CPU_NAME("ck803fr3", CK803,
+ CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 |
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3)
+CSKY_CPU_NAME("ck803fhr1", CK803,
+ CSKY::MAEK_3E3R1 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 |
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3)
+CSKY_CPU_NAME("ck803fhr2", CK803,
+ CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 |
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3)
+CSKY_CPU_NAME("ck803fhr3", CK803,
+ CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 |
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3)
+CSKY_CPU_NAME("ck803er1", CK803,
+ CSKY::MAEK_3E3R1 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 |
+ CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 |
+ CSKY::AEK_HIGHREG)
+CSKY_CPU_NAME("ck803er2", CK803,
+ CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 |
+ CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 |
+ CSKY::AEK_HIGHREG)
+CSKY_CPU_NAME("ck803er3", CK803,
+ CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 |
+ CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 |
+ CSKY::AEK_HIGHREG)
+CSKY_CPU_NAME("ck803ehr1", CK803,
+ CSKY::MAEK_3E3R1 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 |
+ CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 |
+ CSKY::AEK_HIGHREG)
+CSKY_CPU_NAME("ck803ehr2", CK803,
+ CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 |
+ CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 |
+ CSKY::AEK_HIGHREG)
+CSKY_CPU_NAME("ck803ehr3", CK803,
+ CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 |
+ CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 |
+ CSKY::AEK_HIGHREG)
+CSKY_CPU_NAME("ck803etr1", CK803,
+ CSKY::MAEK_3E3R1 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 |
+ CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 |
+ CSKY::AEK_HIGHREG)
+CSKY_CPU_NAME("ck803etr2", CK803,
+ CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 |
+ CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 |
+ CSKY::AEK_HIGHREG)
+CSKY_CPU_NAME("ck803etr3", CK803,
+ CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 |
+ CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 |
+ CSKY::AEK_HIGHREG)
+CSKY_CPU_NAME("ck803ehtr1", CK803,
+ CSKY::MAEK_3E3R1 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 |
+ CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 |
+ CSKY::AEK_HIGHREG)
+CSKY_CPU_NAME("ck803ehtr2", CK803,
+ CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 |
+ CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 |
+ CSKY::AEK_HIGHREG)
+CSKY_CPU_NAME("ck803ehtr3", CK803,
+ CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 |
+ CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 |
+ CSKY::AEK_HIGHREG)
+CSKY_CPU_NAME("ck803efr1", CK803,
+ CSKY::MAEK_3E3R1 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 |
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3 |
+ CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 |
+ CSKY::AEK_HIGHREG)
+CSKY_CPU_NAME("ck803efr2", CK803,
+ CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 |
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3 |
+ CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 |
+ CSKY::AEK_HIGHREG)
+CSKY_CPU_NAME("ck803efr3", CK803,
+ CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 |
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3 |
+ CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 |
+ CSKY::AEK_HIGHREG)
+CSKY_CPU_NAME("ck803efhr1", CK803,
+ CSKY::MAEK_3E3R1 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 |
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3 |
+ CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 |
+ CSKY::AEK_HIGHREG)
+CSKY_CPU_NAME("ck803efhr2", CK803,
+ CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 |
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3 |
+ CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 |
+ CSKY::AEK_HIGHREG)
+CSKY_CPU_NAME("ck803efhr3", CK803,
+ CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 |
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3 |
+ CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 |
+ CSKY::AEK_HIGHREG)
+CSKY_CPU_NAME("ck803ftr1", CK803,
+ CSKY::MAEK_3E3R1 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 |
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3)
+CSKY_CPU_NAME("ck803ftr2", CK803,
+ CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 |
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3)
+CSKY_CPU_NAME("ck803ftr3", CK803,
+ CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 |
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3)
+CSKY_CPU_NAME("ck803eftr1", CK803,
+ CSKY::MAEK_3E3R1 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 |
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3 |
+ CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 |
+ CSKY::AEK_HIGHREG)
+CSKY_CPU_NAME("ck803eftr2", CK803,
+ CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 |
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3 |
+ CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 |
+ CSKY::AEK_HIGHREG)
+CSKY_CPU_NAME("ck803eftr3", CK803,
+ CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 |
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3 |
+ CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 |
+ CSKY::AEK_HIGHREG)
+CSKY_CPU_NAME("ck803efhtr1", CK803,
+ CSKY::MAEK_3E3R1 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 |
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3 |
+ CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 |
+ CSKY::AEK_HIGHREG)
+CSKY_CPU_NAME("ck803efhtr2", CK803,
+ CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 |
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3 |
+ CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 |
+ CSKY::AEK_HIGHREG)
+CSKY_CPU_NAME("ck803efhtr3", CK803,
+ CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3 | CSKY::AEK_DSPV2 |
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3 |
+ CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 |
+ CSKY::AEK_HIGHREG)
+CSKY_CPU_NAME("s803", CK803, CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3)
+CSKY_CPU_NAME("s803t", CK803, CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3)
+CSKY_CPU_NAME("e803", CK803, CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3)
+CSKY_CPU_NAME("e803t", CK803, CSKY::MAEK_3E3R2 | CSKY::AEK_3E3R3)
+
+CSKY_CPU_NAME("ck803s", CK803S, CSKY::AEK_NONE)
+CSKY_CPU_NAME("ck803st", CK803S, CSKY::AEK_NONE)
+CSKY_CPU_NAME("ck803se", CK803S,
+ CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60)
+CSKY_CPU_NAME("ck803sf", CK803S,
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3)
+CSKY_CPU_NAME("ck803sef", CK803S,
+ CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 |
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3)
+CSKY_CPU_NAME("ck803seft", CK803S,
+ CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 |
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3)
+
+CSKY_CPU_NAME("ck804", CK804, CSKY::AEK_NONE)
+CSKY_CPU_NAME("ck804h", CK804, CSKY::AEK_NONE)
+CSKY_CPU_NAME("ck804t", CK804, CSKY::AEK_NONE)
+CSKY_CPU_NAME("ck804ht", CK804, CSKY::AEK_NONE)
+CSKY_CPU_NAME("ck804f", CK804,
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3)
+CSKY_CPU_NAME("ck804fh", CK804,
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3)
+CSKY_CPU_NAME("ck804e", CK804,
+ CSKY::AEK_DSPV2 | CSKY::AEK_3E3R1 | CSKY::AEK_3E3R3 |
+ CSKY::AEK_HIGHREG)
+CSKY_CPU_NAME("ck804eh", CK804,
+ CSKY::AEK_DSPV2 | CSKY::AEK_3E3R1 | CSKY::AEK_3E3R3 |
+ CSKY::AEK_HIGHREG)
+CSKY_CPU_NAME("ck804et", CK804,
+ CSKY::AEK_DSPV2 | CSKY::AEK_3E3R1 | CSKY::AEK_3E3R3 |
+ CSKY::AEK_HIGHREG)
+CSKY_CPU_NAME("ck804eht", CK804,
+ CSKY::AEK_DSPV2 | CSKY::AEK_3E3R1 | CSKY::AEK_3E3R3 |
+ CSKY::AEK_HIGHREG)
+CSKY_CPU_NAME("ck804ef", CK804,
+ CSKY::AEK_DSPV2 | CSKY::AEK_3E3R1 | CSKY::AEK_3E3R3 |
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3 |
+ CSKY::AEK_HIGHREG)
+CSKY_CPU_NAME("ck804efh", CK804,
+ CSKY::AEK_DSPV2 | CSKY::AEK_3E3R1 | CSKY::AEK_3E3R3 |
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3 |
+ CSKY::AEK_HIGHREG)
+CSKY_CPU_NAME("ck804ft", CK804,
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3)
+CSKY_CPU_NAME("ck804eft", CK804,
+ CSKY::AEK_DSPV2 | CSKY::AEK_3E3R1 | CSKY::AEK_3E3R3 |
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3 |
+ CSKY::AEK_HIGHREG)
+CSKY_CPU_NAME("ck804efht", CK804,
+ CSKY::AEK_DSPV2 | CSKY::AEK_3E3R1 | CSKY::AEK_3E3R3 |
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3 |
+ CSKY::AEK_HIGHREG)
+CSKY_CPU_NAME("e804d", CK804,
+ CSKY::AEK_DSPV2 | CSKY::AEK_3E3R1 | CSKY::AEK_3E3R3 |
+ CSKY::AEK_HIGHREG)
+CSKY_CPU_NAME("e804dt", CK804,
+ CSKY::AEK_DSPV2 | CSKY::AEK_3E3R1 | CSKY::AEK_3E3R3 |
+ CSKY::AEK_HIGHREG)
+CSKY_CPU_NAME("e804f", CK804,
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3)
+CSKY_CPU_NAME("e804ft", CK804,
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3)
+CSKY_CPU_NAME("e804df", CK804,
+ CSKY::AEK_DSPV2 | CSKY::AEK_3E3R1 | CSKY::AEK_3E3R3 |
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3 |
+ CSKY::AEK_HIGHREG)
+CSKY_CPU_NAME("e804dft", CK804,
+ CSKY::AEK_DSPV2 | CSKY::AEK_3E3R1 | CSKY::AEK_3E3R3 |
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3 |
+ CSKY::AEK_HIGHREG)
+
+CSKY_CPU_NAME("ck805", CK805, CSKY::AEK_NONE)
+CSKY_CPU_NAME("ck805e", CK805,
+ CSKY::AEK_DSPV2 | CSKY::AEK_3E3R1 | CSKY::AEK_3E3R3)
+CSKY_CPU_NAME("ck805f", CK805,
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3)
+CSKY_CPU_NAME("ck805t", CK805, CSKY::AEK_NONE)
+CSKY_CPU_NAME("ck805ef", CK805,
+ CSKY::AEK_DSPV2 | CSKY::AEK_3E3R1 | CSKY::AEK_3E3R3 |
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3)
+CSKY_CPU_NAME("ck805et", CK805,
+ CSKY::AEK_DSPV2 | CSKY::AEK_3E3R1 | CSKY::AEK_3E3R3)
+CSKY_CPU_NAME("ck805ft", CK805,
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3)
+CSKY_CPU_NAME("ck805eft", CK805,
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3 |
+ CSKY::AEK_DSPV2 | CSKY::AEK_3E3R1 | CSKY::AEK_3E3R3)
+CSKY_CPU_NAME("i805", CK805, CSKY::AEK_NONE)
+CSKY_CPU_NAME("i805f", CK805,
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E3)
+
+CSKY_CPU_NAME("ck807", CK807, CSKY::AEK_NONE)
+CSKY_CPU_NAME("ck807e", CK807,
+ CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60)
+CSKY_CPU_NAME("ck807f", CK807,
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FPUV2DF | CSKY::AEK_FDIVDU |
+ CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E2 | CSKY::AEK_FLOAT1E3 |
+ CSKY::AEK_FLOAT3E4)
+CSKY_CPU_NAME("ck807ef", CK807,
+ CSKY::AEK_EDSP | CSKY::AEK_DSP1E2 | CSKY::AEK_DSPE60 |
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FPUV2DF | CSKY::AEK_FDIVDU |
+ CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E2 | CSKY::AEK_FLOAT1E3 |
+ CSKY::AEK_FLOAT3E4)
+CSKY_CPU_NAME("c807", CK807, CSKY::AEK_NONE)
+CSKY_CPU_NAME("c807f", CK807,
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FPUV2DF | CSKY::AEK_FDIVDU |
+ CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E2 | CSKY::AEK_FLOAT1E3 |
+ CSKY::AEK_FLOAT3E4)
+CSKY_CPU_NAME("r807", CK807, CSKY::AEK_NONE)
+CSKY_CPU_NAME("r807f", CK807,
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FPUV2DF | CSKY::AEK_FDIVDU |
+ CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E2 | CSKY::AEK_FLOAT1E3 |
+ CSKY::AEK_FLOAT3E4)
+
+CSKY_CPU_NAME("ck810e", CK810, CSKY::AEK_NONE)
+CSKY_CPU_NAME("ck810et", CK810, CSKY::AEK_NONE)
+CSKY_CPU_NAME("ck810ef", CK810,
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FPUV2DF | CSKY::AEK_FDIVDU |
+ CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E2)
+CSKY_CPU_NAME("ck810eft", CK810,
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FPUV2DF | CSKY::AEK_FDIVDU |
+ CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E2)
+CSKY_CPU_NAME("ck810", CK810, CSKY::AEK_NONE)
+CSKY_CPU_NAME("ck810f", CK810,
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FPUV2DF | CSKY::AEK_FDIVDU |
+ CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E2)
+CSKY_CPU_NAME("ck810t", CK810, CSKY::AEK_NONE)
+CSKY_CPU_NAME("ck810ft", CK810,
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FPUV2DF | CSKY::AEK_FDIVDU |
+ CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E2)
+CSKY_CPU_NAME("c810", CK810,
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FPUV2DF | CSKY::AEK_FDIVDU |
+ CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E2)
+CSKY_CPU_NAME("c810t", CK810,
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FPUV2DF | CSKY::AEK_FDIVDU |
+ CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E2)
+
+CSKY_CPU_NAME("ck810v", CK810V, CSKY::AEK_NONE)
+CSKY_CPU_NAME("ck810ev", CK810V, CSKY::AEK_NONE)
+CSKY_CPU_NAME("ck810tv", CK810V, CSKY::AEK_NONE)
+CSKY_CPU_NAME("ck810etv", CK810V, CSKY::AEK_NONE)
+CSKY_CPU_NAME("c810v", CK810V,
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FPUV2DF | CSKY::AEK_FDIVDU |
+ CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E2)
+CSKY_CPU_NAME("ck810fv", CK810V,
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FPUV2DF | CSKY::AEK_FDIVDU |
+ CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E2)
+CSKY_CPU_NAME("ck810efv", CK810V,
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FPUV2DF | CSKY::AEK_FDIVDU |
+ CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E2)
+CSKY_CPU_NAME("ck810ftv", CK810V,
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FPUV2DF | CSKY::AEK_FDIVDU |
+ CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E2)
+CSKY_CPU_NAME("c810tv", CK810V,
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FPUV2DF | CSKY::AEK_FDIVDU |
+ CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E2)
+CSKY_CPU_NAME("c810eftv", CK810V,
+ CSKY::AEK_FPUV2SF | CSKY::AEK_FPUV2DF | CSKY::AEK_FDIVDU |
+ CSKY::AEK_FLOATE1 | CSKY::AEK_FLOAT1E2)
+
+CSKY_CPU_NAME("ck860", CK860, CSKY::AEK_NONE)
+CSKY_CPU_NAME("ck860f", CK860,
+ CSKY::AEK_FPUV3HI | CSKY::AEK_FPUV3HF | CSKY::AEK_FPUV3SF |
+ CSKY::AEK_FPUV3DF | CSKY::AEK_FLOAT7E60)
+CSKY_CPU_NAME("c860", CK860,
+ CSKY::AEK_FPUV3HI | CSKY::AEK_FPUV3HF | CSKY::AEK_FPUV3SF |
+ CSKY::AEK_FPUV3DF | CSKY::AEK_FLOAT7E60)
+
+CSKY_CPU_NAME("ck860v", CK860V, CSKY::AEK_NONE)
+CSKY_CPU_NAME("ck860fv", CK860V,
+ CSKY::AEK_FPUV3HI | CSKY::AEK_FPUV3HF | CSKY::AEK_FPUV3SF |
+ CSKY::AEK_FPUV3DF | CSKY::AEK_FLOAT7E60)
+CSKY_CPU_NAME("c860v", CK860V,
+ CSKY::AEK_FPUV3HI | CSKY::AEK_FPUV3HF | CSKY::AEK_FPUV3SF |
+ CSKY::AEK_FPUV3DF | CSKY::AEK_FLOAT7E60)
+// Invalid CPU
+CSKY_CPU_NAME("invalid", INVALID, CSKY::AEK_INVALID)
+#undef CSKY_CPU_NAME
diff --git a/llvm/include/llvm/Support/CSKYTargetParser.h b/llvm/include/llvm/Support/CSKYTargetParser.h
new file mode 100644
index 000000000000..ca33a7ee406c
--- /dev/null
+++ b/llvm/include/llvm/Support/CSKYTargetParser.h
@@ -0,0 +1,203 @@
+//===-- CSKYTargetParser - Parser for CSKY target features --------*- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a target parser to recognise CSKY hardware features
+// such as FPU/CPU/ARCH/extensions and specific support such as HWDIV.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_CSKYTARGETPARSER_H
+#define LLVM_SUPPORT_CSKYTARGETPARSER_H
+
+#include "llvm/ADT/Triple.h"
+#include <vector>
+
+namespace llvm {
+class StringRef;
+
+namespace CSKY {
+
+// Arch extension modifiers for CPUs.
+enum ArchExtKind : uint64_t {
+ AEK_INVALID = 0,
+ AEK_NONE = 1,
+ AEK_FPUV2SF = 1 << 1,
+ AEK_FPUV2DF = 1 << 2,
+ AEK_FDIVDU = 1 << 3,
+ AEK_FPUV3HI = 1 << 4,
+ AEK_FPUV3HF = 1 << 5,
+ AEK_FPUV3SF = 1 << 6,
+ AEK_FPUV3DF = 1 << 7,
+ AEK_FLOATE1 = 1 << 8,
+ AEK_FLOAT1E2 = 1 << 9,
+ AEK_FLOAT1E3 = 1 << 10,
+ AEK_FLOAT3E4 = 1 << 11,
+ AEK_FLOAT7E60 = 1 << 12,
+ AEK_HWDIV = 1 << 13,
+ AEK_STLD = 1 << 14,
+ AEK_PUSHPOP = 1 << 15,
+ AEK_EDSP = 1 << 16,
+ AEK_DSP1E2 = 1 << 17,
+ AEK_DSPE60 = 1 << 18,
+ AEK_DSPV2 = 1 << 19,
+ AEK_DSPSILAN = 1 << 20,
+ AEK_ELRW = 1 << 21,
+ AEK_TRUST = 1 << 22,
+ AEK_JAVA = 1 << 23,
+ AEK_CACHE = 1 << 24,
+ AEK_NVIC = 1 << 25,
+ AEK_DOLOOP = 1 << 26,
+ AEK_HIGHREG = 1 << 27,
+ AEK_SMART = 1 << 28,
+ AEK_VDSP2E3 = 1 << 29,
+ AEK_VDSP2E60F = 1 << 30,
+ AEK_VDSPV2 = 1ULL << 31,
+ AEK_HARDTP = 1ULL << 32,
+ AEK_SOFTTP = 1ULL << 33,
+ AEK_ISTACK = 1ULL << 34,
+ AEK_CONSTPOOL = 1ULL << 35,
+ AEK_STACKSIZE = 1ULL << 36,
+ AEK_CCRT = 1ULL << 37,
+ AEK_VDSPV1 = 1ULL << 38,
+ AEK_E1 = 1ULL << 39,
+ AEK_E2 = 1ULL << 40,
+ AEK_2E3 = 1ULL << 41,
+ AEK_MP = 1ULL << 42,
+ AEK_3E3R1 = 1ULL << 43,
+ AEK_3E3R2 = 1ULL << 44,
+ AEK_3E3R3 = 1ULL << 45,
+ AEK_3E7 = 1ULL << 46,
+ AEK_MP1E2 = 1ULL << 47,
+ AEK_7E10 = 1ULL << 48,
+ AEK_10E60 = 1ULL << 49
+
+};
+
+// Arch extension modifiers for CPUs.
+enum MultiArchExtKind : uint64_t {
+ MAEK_E1 = CSKY::AEK_E1 | CSKY::AEK_ELRW,
+ MAEK_E2 = CSKY::AEK_E2 | CSKY::MAEK_E1,
+ MAEK_2E3 = CSKY::AEK_2E3 | CSKY::MAEK_E2,
+ MAEK_MP = CSKY::AEK_MP | CSKY::MAEK_2E3,
+ MAEK_3E3R1 = CSKY::AEK_3E3R1,
+ MAEK_3E3R2 = CSKY::AEK_3E3R1 | CSKY::AEK_3E3R2 | CSKY::AEK_DOLOOP,
+ MAEK_3E7 = CSKY::AEK_3E7 | CSKY::MAEK_2E3,
+ MAEK_MP1E2 = CSKY::AEK_MP1E2 | CSKY::MAEK_3E7,
+ MAEK_7E10 = CSKY::AEK_7E10 | CSKY::MAEK_3E7,
+ MAEK_10E60 = CSKY::AEK_10E60 | CSKY::MAEK_7E10,
+};
+// FPU names.
+enum CSKYFPUKind {
+#define CSKY_FPU(NAME, KIND, VERSION) KIND,
+#include "CSKYTargetParser.def"
+ FK_LAST
+};
+
+// FPU Version
+enum class FPUVersion {
+ NONE,
+ FPV2,
+ FPV3,
+};
+
+// Arch names.
+enum class ArchKind {
+#define CSKY_ARCH(NAME, ID, ARCH_BASE_EXT) ID,
+#include "CSKYTargetParser.def"
+};
+
+// List of Arch Extension names.
+// FIXME: TableGen this.
+struct ExtName {
+ const char *NameCStr;
+ size_t NameLength;
+ uint64_t ID;
+ const char *Feature;
+ const char *NegFeature;
+
+ StringRef getName() const { return StringRef(NameCStr, NameLength); }
+};
+
+const CSKY::ExtName CSKYARCHExtNames[] = {
+#define CSKY_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE) \
+ {NAME, sizeof(NAME) - 1, ID, FEATURE, NEGFEATURE},
+#include "CSKYTargetParser.def"
+};
+
+// List of CPU names and their arches.
+template <typename T> struct CpuNames {
+ const char *NameCStr;
+ size_t NameLength;
+ T ArchID;
+ uint64_t defaultExt;
+
+ StringRef getName() const { return StringRef(NameCStr, NameLength); }
+};
+const CpuNames<CSKY::ArchKind> CPUNames[] = {
+#define CSKY_CPU_NAME(NAME, ARCH_ID, DEFAULT_EXT) \
+ {NAME, sizeof(NAME) - 1, CSKY::ArchKind::ARCH_ID, DEFAULT_EXT},
+#include "llvm/Support/CSKYTargetParser.def"
+};
+
+// FIXME: TableGen this.
+// The entries must appear in the order listed in CSKY::CSKYFPUKind for correct
+// indexing
+struct FPUName {
+ const char *NameCStr;
+ size_t NameLength;
+ CSKYFPUKind ID;
+ FPUVersion FPUVer;
+
+ StringRef getName() const { return StringRef(NameCStr, NameLength); }
+};
+
+static const FPUName FPUNames[] = {
+#define CSKY_FPU(NAME, KIND, VERSION) {NAME, sizeof(NAME) - 1, KIND, VERSION},
+#include "llvm/Support/CSKYTargetParser.def"
+};
+
+// List of canonical arch names.
+template <typename T> struct ArchNames {
+ const char *NameCStr;
+ size_t NameLength;
+ T ID;
+ uint64_t archBaseExt;
+ StringRef getName() const { return StringRef(NameCStr, NameLength); }
+};
+const ArchNames<CSKY::ArchKind> ARCHNames[] = {
+#define CSKY_ARCH(NAME, ID, ARCH_BASE_EXT) \
+ {NAME, sizeof(NAME) - 1, CSKY::ArchKind::ID, ARCH_BASE_EXT},
+#include "llvm/Support/CSKYTargetParser.def"
+};
+
+StringRef getArchName(ArchKind AK);
+StringRef getDefaultCPU(StringRef Arch);
+StringRef getArchExtName(uint64_t ArchExtKind);
+StringRef getArchExtFeature(StringRef ArchExt);
+uint64_t getDefaultExtensions(StringRef CPU);
+bool getExtensionFeatures(uint64_t Extensions,
+ std::vector<StringRef> &Features);
+
+// Information by ID
+StringRef getFPUName(unsigned FPUKind);
+FPUVersion getFPUVersion(unsigned FPUKind);
+
+bool getFPUFeatures(CSKYFPUKind Kind, std::vector<StringRef> &Features);
+
+// Parser
+ArchKind parseArch(StringRef Arch);
+ArchKind parseCPUArch(StringRef CPU);
+uint64_t parseArchExt(StringRef ArchExt);
+void fillValidCPUArchList(SmallVectorImpl<StringRef> &Values);
+
+} // namespace CSKY
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/Support/Casting.h b/llvm/include/llvm/Support/Casting.h
index d6f7793d5df0..894c1f439b64 100644
--- a/llvm/include/llvm/Support/Casting.h
+++ b/llvm/include/llvm/Support/Casting.h
@@ -6,14 +6,15 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines the isa<X>(), cast<X>(), dyn_cast<X>(), cast_or_null<X>(),
-// and dyn_cast_or_null<X>() templates.
+// This file defines the isa<X>(), cast<X>(), dyn_cast<X>(),
+// cast_if_present<X>(), and dyn_cast_if_present<X>() templates.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_SUPPORT_CASTING_H
#define LLVM_SUPPORT_CASTING_H
+#include "llvm/ADT/Optional.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/type_traits.h"
#include <cassert>
@@ -23,43 +24,47 @@
namespace llvm {
//===----------------------------------------------------------------------===//
-// isa<x> Support Templates
+// simplify_type
//===----------------------------------------------------------------------===//
-// Define a template that can be specialized by smart pointers to reflect the
-// fact that they are automatically dereferenced, and are not involved with the
-// template selection process... the default implementation is a noop.
-//
-template<typename From> struct simplify_type {
+/// Define a template that can be specialized by smart pointers to reflect the
+/// fact that they are automatically dereferenced, and are not involved with the
+/// template selection process... the default implementation is a noop.
+// TODO: rename this and/or replace it with other cast traits.
+template <typename From> struct simplify_type {
using SimpleType = From; // The real type this represents...
// An accessor to get the real value...
static SimpleType &getSimplifiedValue(From &Val) { return Val; }
};
-template<typename From> struct simplify_type<const From> {
+template <typename From> struct simplify_type<const From> {
using NonConstSimpleType = typename simplify_type<From>::SimpleType;
- using SimpleType =
- typename add_const_past_pointer<NonConstSimpleType>::type;
+ using SimpleType = typename add_const_past_pointer<NonConstSimpleType>::type;
using RetType =
typename add_lvalue_reference_if_not_pointer<SimpleType>::type;
- static RetType getSimplifiedValue(const From& Val) {
- return simplify_type<From>::getSimplifiedValue(const_cast<From&>(Val));
+ static RetType getSimplifiedValue(const From &Val) {
+ return simplify_type<From>::getSimplifiedValue(const_cast<From &>(Val));
}
};
+// TODO: add this namespace once everyone is switched to using the new
+// interface.
+// namespace detail {
+
+//===----------------------------------------------------------------------===//
+// isa_impl
+//===----------------------------------------------------------------------===//
+
// The core of the implementation of isa<X> is here; To and From should be
// the names of classes. This template can be specialized to customize the
// implementation of isa<> without rewriting it from scratch.
-template <typename To, typename From, typename Enabler = void>
-struct isa_impl {
- static inline bool doit(const From &Val) {
- return To::classof(&Val);
- }
+template <typename To, typename From, typename Enabler = void> struct isa_impl {
+ static inline bool doit(const From &Val) { return To::classof(&Val); }
};
-/// Always allow upcasts, and perform no dynamic check for them.
+// Always allow upcasts, and perform no dynamic check for them.
template <typename To, typename From>
struct isa_impl<To, From, std::enable_if_t<std::is_base_of<To, From>::value>> {
static inline bool doit(const From &) { return true; }
@@ -85,103 +90,78 @@ struct isa_impl_cl<To, const std::unique_ptr<From>> {
}
};
-template <typename To, typename From> struct isa_impl_cl<To, From*> {
+template <typename To, typename From> struct isa_impl_cl<To, From *> {
static inline bool doit(const From *Val) {
assert(Val && "isa<> used on a null pointer");
return isa_impl<To, From>::doit(*Val);
}
};
-template <typename To, typename From> struct isa_impl_cl<To, From*const> {
+template <typename To, typename From> struct isa_impl_cl<To, From *const> {
static inline bool doit(const From *Val) {
assert(Val && "isa<> used on a null pointer");
return isa_impl<To, From>::doit(*Val);
}
};
-template <typename To, typename From> struct isa_impl_cl<To, const From*> {
+template <typename To, typename From> struct isa_impl_cl<To, const From *> {
static inline bool doit(const From *Val) {
assert(Val && "isa<> used on a null pointer");
return isa_impl<To, From>::doit(*Val);
}
};
-template <typename To, typename From> struct isa_impl_cl<To, const From*const> {
+template <typename To, typename From>
+struct isa_impl_cl<To, const From *const> {
static inline bool doit(const From *Val) {
assert(Val && "isa<> used on a null pointer");
return isa_impl<To, From>::doit(*Val);
}
};
-template<typename To, typename From, typename SimpleFrom>
+template <typename To, typename From, typename SimpleFrom>
struct isa_impl_wrap {
// When From != SimplifiedType, we can simplify the type some more by using
// the simplify_type template.
static bool doit(const From &Val) {
return isa_impl_wrap<To, SimpleFrom,
- typename simplify_type<SimpleFrom>::SimpleType>::doit(
- simplify_type<const From>::getSimplifiedValue(Val));
+ typename simplify_type<SimpleFrom>::SimpleType>::
+ doit(simplify_type<const From>::getSimplifiedValue(Val));
}
};
-template<typename To, typename FromTy>
+template <typename To, typename FromTy>
struct isa_impl_wrap<To, FromTy, FromTy> {
// When From == SimpleType, we are as simple as we are going to get.
static bool doit(const FromTy &Val) {
- return isa_impl_cl<To,FromTy>::doit(Val);
+ return isa_impl_cl<To, FromTy>::doit(Val);
}
};
-// isa<X> - Return true if the parameter to the template is an instance of one
-// of the template type arguments. Used like this:
-//
-// if (isa<Type>(myVal)) { ... }
-// if (isa<Type0, Type1, Type2>(myVal)) { ... }
-//
-template <class X, class Y> LLVM_NODISCARD inline bool isa(const Y &Val) {
- return isa_impl_wrap<X, const Y,
- typename simplify_type<const Y>::SimpleType>::doit(Val);
-}
-
-template <typename First, typename Second, typename... Rest, typename Y>
-LLVM_NODISCARD inline bool isa(const Y &Val) {
- return isa<First>(Val) || isa<Second, Rest...>(Val);
-}
-
-// isa_and_nonnull<X> - Functionally identical to isa, except that a null value
-// is accepted.
-//
-template <typename... X, class Y>
-LLVM_NODISCARD inline bool isa_and_nonnull(const Y &Val) {
- if (!Val)
- return false;
- return isa<X...>(Val);
-}
-
//===----------------------------------------------------------------------===//
-// cast<x> Support Templates
+// cast_retty + cast_retty_impl
//===----------------------------------------------------------------------===//
-template<class To, class From> struct cast_retty;
+template <class To, class From> struct cast_retty;
// Calculate what type the 'cast' function should return, based on a requested
// type of To and a source type of From.
-template<class To, class From> struct cast_retty_impl {
- using ret_type = To &; // Normal case, return Ty&
+template <class To, class From> struct cast_retty_impl {
+ using ret_type = To &; // Normal case, return Ty&
};
-template<class To, class From> struct cast_retty_impl<To, const From> {
+template <class To, class From> struct cast_retty_impl<To, const From> {
using ret_type = const To &; // Normal case, return Ty&
};
-template<class To, class From> struct cast_retty_impl<To, From*> {
- using ret_type = To *; // Pointer arg case, return Ty*
+template <class To, class From> struct cast_retty_impl<To, From *> {
+ using ret_type = To *; // Pointer arg case, return Ty*
};
-template<class To, class From> struct cast_retty_impl<To, const From*> {
+template <class To, class From> struct cast_retty_impl<To, const From *> {
using ret_type = const To *; // Constant pointer arg case, return const Ty*
};
-template<class To, class From> struct cast_retty_impl<To, const From*const> {
+template <class To, class From> struct cast_retty_impl<To, const From *const> {
using ret_type = const To *; // Constant pointer arg case, return const Ty*
};
@@ -195,187 +175,604 @@ public:
using ret_type = std::unique_ptr<ResultType>;
};
-template<class To, class From, class SimpleFrom>
-struct cast_retty_wrap {
+template <class To, class From, class SimpleFrom> struct cast_retty_wrap {
// When the simplified type and the from type are not the same, use the type
// simplifier to reduce the type, then reuse cast_retty_impl to get the
// resultant type.
using ret_type = typename cast_retty<To, SimpleFrom>::ret_type;
};
-template<class To, class FromTy>
-struct cast_retty_wrap<To, FromTy, FromTy> {
+template <class To, class FromTy> struct cast_retty_wrap<To, FromTy, FromTy> {
// When the simplified type is equal to the from type, use it directly.
- using ret_type = typename cast_retty_impl<To,FromTy>::ret_type;
+ using ret_type = typename cast_retty_impl<To, FromTy>::ret_type;
};
-template<class To, class From>
-struct cast_retty {
+template <class To, class From> struct cast_retty {
using ret_type = typename cast_retty_wrap<
To, From, typename simplify_type<From>::SimpleType>::ret_type;
};
+//===----------------------------------------------------------------------===//
+// cast_convert_val
+//===----------------------------------------------------------------------===//
+
// Ensure the non-simple values are converted using the simplify_type template
// that may be specialized by smart pointers...
//
-template<class To, class From, class SimpleFrom> struct cast_convert_val {
+template <class To, class From, class SimpleFrom> struct cast_convert_val {
// This is not a simple type, use the template to simplify it...
- static typename cast_retty<To, From>::ret_type doit(From &Val) {
+ static typename cast_retty<To, From>::ret_type doit(const From &Val) {
return cast_convert_val<To, SimpleFrom,
- typename simplify_type<SimpleFrom>::SimpleType>::doit(
- simplify_type<From>::getSimplifiedValue(Val));
+ typename simplify_type<SimpleFrom>::SimpleType>::
+ doit(simplify_type<From>::getSimplifiedValue(const_cast<From &>(Val)));
}
};
-template<class To, class FromTy> struct cast_convert_val<To,FromTy,FromTy> {
- // This _is_ a simple type, just cast it.
+template <class To, class FromTy> struct cast_convert_val<To, FromTy, FromTy> {
+ // If it's a reference, switch to a pointer to do the cast and then deref it.
static typename cast_retty<To, FromTy>::ret_type doit(const FromTy &Val) {
- typename cast_retty<To, FromTy>::ret_type Res2
- = (typename cast_retty<To, FromTy>::ret_type)const_cast<FromTy&>(Val);
- return Res2;
+ return *(std::remove_reference_t<typename cast_retty<To, FromTy>::ret_type>
+ *)&const_cast<FromTy &>(Val);
+ }
+};
+
+template <class To, class FromTy>
+struct cast_convert_val<To, FromTy *, FromTy *> {
+ // If it's a pointer, we can use c-style casting directly.
+ static typename cast_retty<To, FromTy *>::ret_type doit(const FromTy *Val) {
+ return (typename cast_retty<To, FromTy *>::ret_type) const_cast<FromTy *>(
+ Val);
}
};
+//===----------------------------------------------------------------------===//
+// is_simple_type
+//===----------------------------------------------------------------------===//
+
template <class X> struct is_simple_type {
static const bool value =
std::is_same<X, typename simplify_type<X>::SimpleType>::value;
};
-// cast<X> - Return the argument parameter cast to the specified type. This
-// casting operator asserts that the type is correct, so it does not return null
-// on failure. It does not allow a null argument (use cast_or_null for that).
-// It is typically used like this:
-//
-// cast<Instruction>(myVal)->getParent()
-//
-template <class X, class Y>
-inline std::enable_if_t<!is_simple_type<Y>::value,
- typename cast_retty<X, const Y>::ret_type>
-cast(const Y &Val) {
- assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!");
- return cast_convert_val<
- X, const Y, typename simplify_type<const Y>::SimpleType>::doit(Val);
+// } // namespace detail
+
+//===----------------------------------------------------------------------===//
+// CastIsPossible
+//===----------------------------------------------------------------------===//
+
+/// This struct provides a way to check if a given cast is possible. It provides
+/// a static function called isPossible that is used to check if a cast can be
+/// performed. It should be overridden like this:
+///
+/// template<> struct CastIsPossible<foo, bar> {
+/// static inline bool isPossible(const bar &b) {
+/// return bar.isFoo();
+/// }
+/// };
+template <typename To, typename From, typename Enable = void>
+struct CastIsPossible {
+ static inline bool isPossible(const From &f) {
+ return isa_impl_wrap<
+ To, const From,
+ typename simplify_type<const From>::SimpleType>::doit(f);
+ }
+};
+
+// Needed for optional unwrapping. This could be implemented with isa_impl, but
+// we want to implement things in the new method and move old implementations
+// over. In fact, some of the isa_impl templates should be moved over to
+// CastIsPossible.
+template <typename To, typename From>
+struct CastIsPossible<To, Optional<From>> {
+ static inline bool isPossible(const Optional<From> &f) {
+ assert(f.hasValue() && "CastIsPossible::isPossible called on a nullopt!");
+ return isa_impl_wrap<
+ To, const From,
+ typename simplify_type<const From>::SimpleType>::doit(*f);
+ }
+};
+
+/// Upcasting (from derived to base) and casting from a type to itself should
+/// always be possible.
+template <typename To, typename From>
+struct CastIsPossible<To, From,
+ std::enable_if_t<std::is_base_of<To, From>::value>> {
+ static inline bool isPossible(const From &f) { return true; }
+};
+
+//===----------------------------------------------------------------------===//
+// Cast traits
+//===----------------------------------------------------------------------===//
+
+/// All of these cast traits are meant to be implementations for useful casts
+/// that users may want to use that are outside the standard behavior. An
+/// example of how to use a special cast called `CastTrait` is:
+///
+/// template<> struct CastInfo<foo, bar> : public CastTrait<foo, bar> {};
+///
+/// Essentially, if your use case falls directly into one of the use cases
+/// supported by a given cast trait, simply inherit your special CastInfo
+/// directly from one of these to avoid having to reimplement the boilerplate
+/// `isPossible/castFailed/doCast/doCastIfPossible`. A cast trait can also
+/// provide a subset of those functions.
+
+/// This cast trait just provides castFailed for the specified `To` type to make
+/// CastInfo specializations more declarative. In order to use this, the target
+/// result type must be `To` and `To` must be constructible from `nullptr`.
+template <typename To> struct NullableValueCastFailed {
+ static To castFailed() { return To(nullptr); }
+};
+
+/// This cast trait just provides the default implementation of doCastIfPossible
+/// to make CastInfo specializations more declarative. The `Derived` template
+/// parameter *must* be provided for forwarding castFailed and doCast.
+template <typename To, typename From, typename Derived>
+struct DefaultDoCastIfPossible {
+ static To doCastIfPossible(From f) {
+ if (!Derived::isPossible(f))
+ return Derived::castFailed();
+ return Derived::doCast(f);
+ }
+};
+
+namespace detail {
+/// A helper to derive the type to use with `Self` for cast traits, when the
+/// provided CRTP derived type is allowed to be void.
+template <typename OptionalDerived, typename Default>
+using SelfType = std::conditional_t<std::is_same<OptionalDerived, void>::value,
+ Default, OptionalDerived>;
+} // namespace detail
+
+/// This cast trait provides casting for the specific case of casting to a
+/// value-typed object from a pointer-typed object. Note that `To` must be
+/// nullable/constructible from a pointer to `From` to use this cast.
+template <typename To, typename From, typename Derived = void>
+struct ValueFromPointerCast
+ : public CastIsPossible<To, From *>,
+ public NullableValueCastFailed<To>,
+ public DefaultDoCastIfPossible<
+ To, From *,
+ detail::SelfType<Derived, ValueFromPointerCast<To, From>>> {
+ static inline To doCast(From *f) { return To(f); }
+};
+
+/// This cast trait provides std::unique_ptr casting. It has the semantics of
+/// moving the contents of the input unique_ptr into the output unique_ptr
+/// during the cast. It's also a good example of how to implement a move-only
+/// cast.
+template <typename To, typename From, typename Derived = void>
+struct UniquePtrCast : public CastIsPossible<To, From *> {
+ using Self = detail::SelfType<Derived, UniquePtrCast<To, From>>;
+ using CastResultType = std::unique_ptr<
+ std::remove_reference_t<typename cast_retty<To, From>::ret_type>>;
+
+ static inline CastResultType doCast(std::unique_ptr<From> &&f) {
+ return CastResultType((typename CastResultType::element_type *)f.release());
+ }
+
+ static inline CastResultType castFailed() { return CastResultType(nullptr); }
+
+ static inline CastResultType doCastIfPossible(std::unique_ptr<From> &&f) {
+ if (!Self::isPossible(f))
+ return castFailed();
+ return doCast(f);
+ }
+};
+
+/// This cast trait provides Optional<T> casting. This means that if you have a
+/// value type, you can cast it to another value type and have dyn_cast return
+/// an Optional<T>.
+template <typename To, typename From, typename Derived = void>
+struct OptionalValueCast
+ : public CastIsPossible<To, From>,
+ public DefaultDoCastIfPossible<
+ Optional<To>, From,
+ detail::SelfType<Derived, OptionalValueCast<To, From>>> {
+ static inline Optional<To> castFailed() { return Optional<To>{}; }
+
+ static inline Optional<To> doCast(const From &f) { return To(f); }
+};
+
+/// Provides a cast trait that strips `const` from types to make it easier to
+/// implement a const-version of a non-const cast. It just removes boilerplate
+/// and reduces the amount of code you as the user need to implement. You can
+/// use it like this:
+///
+/// template<> struct CastInfo<foo, bar> {
+/// ...verbose implementation...
+/// };
+///
+/// template<> struct CastInfo<foo, const bar> : public
+/// ConstStrippingForwardingCast<foo, const bar, CastInfo<foo, bar>> {};
+///
+template <typename To, typename From, typename ForwardTo>
+struct ConstStrippingForwardingCast {
+ // Remove the pointer if it exists, then we can get rid of consts/volatiles.
+ using DecayedFrom = std::remove_cv_t<std::remove_pointer_t<From>>;
+ // Now if it's a pointer, add it back. Otherwise, we want a ref.
+ using NonConstFrom = std::conditional_t<std::is_pointer<From>::value,
+ DecayedFrom *, DecayedFrom &>;
+
+ static inline bool isPossible(const From &f) {
+ return ForwardTo::isPossible(const_cast<NonConstFrom>(f));
+ }
+
+ static inline decltype(auto) castFailed() { return ForwardTo::castFailed(); }
+
+ static inline decltype(auto) doCast(const From &f) {
+ return ForwardTo::doCast(const_cast<NonConstFrom>(f));
+ }
+
+ static inline decltype(auto) doCastIfPossible(const From &f) {
+ return ForwardTo::doCastIfPossible(const_cast<NonConstFrom>(f));
+ }
+};
+
+/// Provides a cast trait that uses a defined pointer to pointer cast as a base
+/// for reference-to-reference casts. Note that it does not provide castFailed
+/// and doCastIfPossible because a pointer-to-pointer cast would likely just
+/// return `nullptr` which could cause nullptr dereference. You can use it like
+/// this:
+///
+/// template <> struct CastInfo<foo, bar *> { ... verbose implementation... };
+///
+/// template <>
+/// struct CastInfo<foo, bar>
+/// : public ForwardToPointerCast<foo, bar, CastInfo<foo, bar *>> {};
+///
+template <typename To, typename From, typename ForwardTo>
+struct ForwardToPointerCast {
+ static inline bool isPossible(const From &f) {
+ return ForwardTo::isPossible(&f);
+ }
+
+ static inline decltype(auto) doCast(const From &f) {
+ return *ForwardTo::doCast(&f);
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// CastInfo
+//===----------------------------------------------------------------------===//
+
+/// This struct provides a method for customizing the way a cast is performed.
+/// It inherits from CastIsPossible, to support the case of declaring many
+/// CastIsPossible specializations without having to specialize the full
+/// CastInfo.
+///
+/// In order to specialize different behaviors, specify different functions in
+/// your CastInfo specialization.
+/// For isa<> customization, provide:
+///
+/// `static bool isPossible(const From &f)`
+///
+/// For cast<> customization, provide:
+///
+/// `static To doCast(const From &f)`
+///
+/// For dyn_cast<> and the *_if_present<> variants' customization, provide:
+///
+/// `static To castFailed()` and `static To doCastIfPossible(const From &f)`
+///
+/// Your specialization might look something like this:
+///
+/// template<> struct CastInfo<foo, bar> : public CastIsPossible<foo, bar> {
+/// static inline foo doCast(const bar &b) {
+/// return foo(const_cast<bar &>(b));
+/// }
+/// static inline foo castFailed() { return foo(); }
+/// static inline foo doCastIfPossible(const bar &b) {
+/// if (!CastInfo<foo, bar>::isPossible(b))
+/// return castFailed();
+/// return doCast(b);
+/// }
+/// };
+
+// The default implementations of CastInfo don't use cast traits for now because
+// we need to specify types all over the place due to the current expected
+// casting behavior and the way cast_retty works. New use cases can and should
+// take advantage of the cast traits whenever possible!
+
+template <typename To, typename From, typename Enable = void>
+struct CastInfo : public CastIsPossible<To, From> {
+ using Self = CastInfo<To, From, Enable>;
+
+ using CastReturnType = typename cast_retty<To, From>::ret_type;
+
+ static inline CastReturnType doCast(const From &f) {
+ return cast_convert_val<
+ To, From,
+ typename simplify_type<From>::SimpleType>::doit(const_cast<From &>(f));
+ }
+
+ // This assumes that you can construct the cast return type from `nullptr`.
+ // This is largely to support legacy use cases - if you don't want this
+ // behavior you should specialize CastInfo for your use case.
+ static inline CastReturnType castFailed() { return CastReturnType(nullptr); }
+
+ static inline CastReturnType doCastIfPossible(const From &f) {
+ if (!Self::isPossible(f))
+ return castFailed();
+ return doCast(f);
+ }
+};
+
+/// This struct provides an overload for CastInfo where From has simplify_type
+/// defined. This simply forwards to the appropriate CastInfo with the
+/// simplified type/value, so you don't have to implement both.
+template <typename To, typename From>
+struct CastInfo<To, From, std::enable_if_t<!is_simple_type<From>::value>> {
+ using Self = CastInfo<To, From>;
+ using SimpleFrom = typename simplify_type<From>::SimpleType;
+ using SimplifiedSelf = CastInfo<To, SimpleFrom>;
+
+ static inline bool isPossible(From &f) {
+ return SimplifiedSelf::isPossible(
+ simplify_type<From>::getSimplifiedValue(f));
+ }
+
+ static inline decltype(auto) doCast(From &f) {
+ return SimplifiedSelf::doCast(simplify_type<From>::getSimplifiedValue(f));
+ }
+
+ static inline decltype(auto) castFailed() {
+ return SimplifiedSelf::castFailed();
+ }
+
+ static inline decltype(auto) doCastIfPossible(From &f) {
+ return SimplifiedSelf::doCastIfPossible(
+ simplify_type<From>::getSimplifiedValue(f));
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// Pre-specialized CastInfo
+//===----------------------------------------------------------------------===//
+
+/// Provide a CastInfo specialized for std::unique_ptr.
+template <typename To, typename From>
+struct CastInfo<To, std::unique_ptr<From>> : public UniquePtrCast<To, From> {};
+
+/// Provide a CastInfo specialized for Optional<From>. It's assumed that if the
+/// input is Optional<From> that the output can be Optional<To>. If that's not
+/// the case, specialize CastInfo for your use case.
+template <typename To, typename From>
+struct CastInfo<To, Optional<From>> : public OptionalValueCast<To, From> {};
+
+/// isa<X> - Return true if the parameter to the template is an instance of one
+/// of the template type arguments. Used like this:
+///
+/// if (isa<Type>(myVal)) { ... }
+/// if (isa<Type0, Type1, Type2>(myVal)) { ... }
+template <typename To, typename From>
+LLVM_NODISCARD inline bool isa(const From &Val) {
+ return CastInfo<To, const From>::isPossible(Val);
}
-template <class X, class Y>
-inline typename cast_retty<X, Y>::ret_type cast(Y &Val) {
- assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!");
- return cast_convert_val<X, Y,
- typename simplify_type<Y>::SimpleType>::doit(Val);
+template <typename First, typename Second, typename... Rest, typename From>
+LLVM_NODISCARD inline bool isa(const From &Val) {
+ return isa<First>(Val) || isa<Second, Rest...>(Val);
}
-template <class X, class Y>
-inline typename cast_retty<X, Y *>::ret_type cast(Y *Val) {
- assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!");
- return cast_convert_val<X, Y*,
- typename simplify_type<Y*>::SimpleType>::doit(Val);
+/// cast<X> - Return the argument parameter cast to the specified type. This
+/// casting operator asserts that the type is correct, so it does not return
+/// null on failure. It does not allow a null argument (use cast_if_present for
+/// that). It is typically used like this:
+///
+/// cast<Instruction>(myVal)->getParent()
+
+template <typename To, typename From>
+LLVM_NODISCARD inline decltype(auto) cast(const From &Val) {
+ assert(isa<To>(Val) && "cast<Ty>() argument of incompatible type!");
+ return CastInfo<To, const From>::doCast(Val);
}
-template <class X, class Y>
-inline typename cast_retty<X, std::unique_ptr<Y>>::ret_type
-cast(std::unique_ptr<Y> &&Val) {
- assert(isa<X>(Val.get()) && "cast<Ty>() argument of incompatible type!");
- using ret_type = typename cast_retty<X, std::unique_ptr<Y>>::ret_type;
- return ret_type(
- cast_convert_val<X, Y *, typename simplify_type<Y *>::SimpleType>::doit(
- Val.release()));
+template <typename To, typename From>
+LLVM_NODISCARD inline decltype(auto) cast(From &Val) {
+ assert(isa<To>(Val) && "cast<Ty>() argument of incompatible type!");
+ return CastInfo<To, From>::doCast(Val);
}
-// cast_or_null<X> - Functionally identical to cast, except that a null value is
-// accepted.
-//
-template <class X, class Y>
-LLVM_NODISCARD inline std::enable_if_t<
- !is_simple_type<Y>::value, typename cast_retty<X, const Y>::ret_type>
-cast_or_null(const Y &Val) {
- if (!Val)
- return nullptr;
- assert(isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!");
- return cast<X>(Val);
+template <typename To, typename From>
+LLVM_NODISCARD inline decltype(auto) cast(From *Val) {
+ assert(isa<To>(Val) && "cast<Ty>() argument of incompatible type!");
+ return CastInfo<To, From *>::doCast(Val);
}
-template <class X, class Y>
-LLVM_NODISCARD inline std::enable_if_t<!is_simple_type<Y>::value,
- typename cast_retty<X, Y>::ret_type>
-cast_or_null(Y &Val) {
- if (!Val)
- return nullptr;
- assert(isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!");
- return cast<X>(Val);
+template <typename To, typename From>
+LLVM_NODISCARD inline decltype(auto) cast(std::unique_ptr<From> &&Val) {
+ assert(isa<To>(Val) && "cast<Ty>() argument of incompatible type!");
+ return CastInfo<To, std::unique_ptr<From>>::doCast(std::move(Val));
}
-template <class X, class Y>
-LLVM_NODISCARD inline typename cast_retty<X, Y *>::ret_type
-cast_or_null(Y *Val) {
- if (!Val) return nullptr;
- assert(isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!");
- return cast<X>(Val);
+/// dyn_cast<X> - Return the argument parameter cast to the specified type. This
+/// casting operator returns null if the argument is of the wrong type, so it
+/// can be used to test for a type as well as cast if successful. The value
+/// passed in must be present, if not, use dyn_cast_if_present. This should be
+/// used in the context of an if statement like this:
+///
+/// if (const Instruction *I = dyn_cast<Instruction>(myVal)) { ... }
+
+template <typename To, typename From>
+LLVM_NODISCARD inline decltype(auto) dyn_cast(const From &Val) {
+ return CastInfo<To, const From>::doCastIfPossible(Val);
}
-template <class X, class Y>
-inline typename cast_retty<X, std::unique_ptr<Y>>::ret_type
-cast_or_null(std::unique_ptr<Y> &&Val) {
- if (!Val)
- return nullptr;
- return cast<X>(std::move(Val));
+template <typename To, typename From>
+LLVM_NODISCARD inline decltype(auto) dyn_cast(From &Val) {
+ return CastInfo<To, From>::doCastIfPossible(Val);
}
-// dyn_cast<X> - Return the argument parameter cast to the specified type. This
-// casting operator returns null if the argument is of the wrong type, so it can
-// be used to test for a type as well as cast if successful. This should be
-// used in the context of an if statement like this:
-//
-// if (const Instruction *I = dyn_cast<Instruction>(myVal)) { ... }
-//
+template <typename To, typename From>
+LLVM_NODISCARD inline decltype(auto) dyn_cast(From *Val) {
+ return CastInfo<To, From *>::doCastIfPossible(Val);
+}
-template <class X, class Y>
-LLVM_NODISCARD inline std::enable_if_t<
- !is_simple_type<Y>::value, typename cast_retty<X, const Y>::ret_type>
-dyn_cast(const Y &Val) {
- return isa<X>(Val) ? cast<X>(Val) : nullptr;
+template <typename To, typename From>
+LLVM_NODISCARD inline decltype(auto) dyn_cast(std::unique_ptr<From> &&Val) {
+ return CastInfo<To, std::unique_ptr<From>>::doCastIfPossible(std::move(Val));
}
-template <class X, class Y>
-LLVM_NODISCARD inline typename cast_retty<X, Y>::ret_type dyn_cast(Y &Val) {
- return isa<X>(Val) ? cast<X>(Val) : nullptr;
+//===----------------------------------------------------------------------===//
+// ValueIsPresent
+//===----------------------------------------------------------------------===//
+
+template <typename T>
+constexpr bool IsNullable = std::is_pointer<T>::value ||
+ std::is_constructible<T, std::nullptr_t>::value;
+
+/// ValueIsPresent provides a way to check if a value is, well, present. For
+/// pointers, this is the equivalent of checking against nullptr, for
+/// Optionals this is the equivalent of checking hasValue(). It also
+/// provides a method for unwrapping a value (think dereferencing a
+/// pointer).
+
+// Generic values can't *not* be present.
+template <typename T, typename Enable = void> struct ValueIsPresent {
+ using UnwrappedType = T;
+ static inline bool isPresent(const T &t) { return true; }
+ static inline decltype(auto) unwrapValue(T &t) { return t; }
+};
+
+// Optional provides its own way to check if something is present.
+template <typename T> struct ValueIsPresent<Optional<T>> {
+ using UnwrappedType = T;
+ static inline bool isPresent(const Optional<T> &t) { return t.has_value(); }
+ static inline decltype(auto) unwrapValue(Optional<T> &t) {
+ return t.getValue();
+ }
+};
+
+// If something is "nullable" then we just compare it to nullptr to see if it
+// exists.
+template <typename T>
+struct ValueIsPresent<T, std::enable_if_t<IsNullable<T>>> {
+ using UnwrappedType = T;
+ static inline bool isPresent(const T &t) { return t != nullptr; }
+ static inline decltype(auto) unwrapValue(T &t) { return t; }
+};
+
+namespace detail {
+// Convenience function we can use to check if a value is present. Because of
+// simplify_type, we have to call it on the simplified type for now.
+template <typename T> inline bool isPresent(const T &t) {
+ return ValueIsPresent<typename simplify_type<T>::SimpleType>::isPresent(
+ simplify_type<T>::getSimplifiedValue(const_cast<T &>(t)));
}
-template <class X, class Y>
-LLVM_NODISCARD inline typename cast_retty<X, Y *>::ret_type dyn_cast(Y *Val) {
- return isa<X>(Val) ? cast<X>(Val) : nullptr;
+// Convenience function we can use to unwrap a value.
+template <typename T> inline decltype(auto) unwrapValue(T &t) {
+ return ValueIsPresent<T>::unwrapValue(t);
}
+} // namespace detail
-// dyn_cast_or_null<X> - Functionally identical to dyn_cast, except that a null
-// value is accepted.
-//
-template <class X, class Y>
-LLVM_NODISCARD inline std::enable_if_t<
- !is_simple_type<Y>::value, typename cast_retty<X, const Y>::ret_type>
-dyn_cast_or_null(const Y &Val) {
- return (Val && isa<X>(Val)) ? cast<X>(Val) : nullptr;
+/// isa_and_present<X> - Functionally identical to isa, except that a null value
+/// is accepted.
+template <typename... X, class Y>
+LLVM_NODISCARD inline bool isa_and_present(const Y &Val) {
+ if (!detail::isPresent(Val))
+ return false;
+ return isa<X...>(Val);
}
+template <typename... X, class Y>
+LLVM_NODISCARD inline bool isa_and_nonnull(const Y &Val) {
+ return isa_and_present<X...>(Val);
+}
+
+/// cast_if_present<X> - Functionally identical to cast, except that a null
+/// value is accepted.
template <class X, class Y>
-LLVM_NODISCARD inline std::enable_if_t<!is_simple_type<Y>::value,
- typename cast_retty<X, Y>::ret_type>
-dyn_cast_or_null(Y &Val) {
- return (Val && isa<X>(Val)) ? cast<X>(Val) : nullptr;
+LLVM_NODISCARD inline auto cast_if_present(const Y &Val) {
+ if (!detail::isPresent(Val))
+ return CastInfo<X, const Y>::castFailed();
+ assert(isa<X>(Val) && "cast_if_present<Ty>() argument of incompatible type!");
+ return cast<X>(detail::unwrapValue(Val));
+}
+
+template <class X, class Y> LLVM_NODISCARD inline auto cast_if_present(Y &Val) {
+ if (!detail::isPresent(Val))
+ return CastInfo<X, Y>::castFailed();
+ assert(isa<X>(Val) && "cast_if_present<Ty>() argument of incompatible type!");
+ return cast<X>(detail::unwrapValue(Val));
+}
+
+template <class X, class Y> LLVM_NODISCARD inline auto cast_if_present(Y *Val) {
+ if (!detail::isPresent(Val))
+ return CastInfo<X, Y *>::castFailed();
+ assert(isa<X>(Val) && "cast_if_present<Ty>() argument of incompatible type!");
+ return cast<X>(detail::unwrapValue(Val));
}
template <class X, class Y>
-LLVM_NODISCARD inline typename cast_retty<X, Y *>::ret_type
-dyn_cast_or_null(Y *Val) {
- return (Val && isa<X>(Val)) ? cast<X>(Val) : nullptr;
+LLVM_NODISCARD inline auto cast_if_present(std::unique_ptr<Y> &&Val) {
+ if (!detail::isPresent(Val))
+ return UniquePtrCast<X, Y>::castFailed();
+ return UniquePtrCast<X, Y>::doCast(std::move(Val));
+}
+
+// Provide a forwarding from cast_or_null to cast_if_present for current
+// users. This is deprecated and will be removed in a future patch, use
+// cast_if_present instead.
+template <class X, class Y> auto cast_or_null(const Y &Val) {
+ return cast_if_present<X>(Val);
+}
+
+template <class X, class Y> auto cast_or_null(Y &Val) {
+ return cast_if_present<X>(Val);
+}
+
+template <class X, class Y> auto cast_or_null(Y *Val) {
+ return cast_if_present<X>(Val);
+}
+
+template <class X, class Y> auto cast_or_null(std::unique_ptr<Y> &&Val) {
+ return cast_if_present<X>(std::move(Val));
+}
+
+/// dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a
+/// null (or none in the case of optionals) value is accepted.
+template <class X, class Y> auto dyn_cast_if_present(const Y &Val) {
+ if (!detail::isPresent(Val))
+ return CastInfo<X, const Y>::castFailed();
+ return CastInfo<X, const Y>::doCastIfPossible(detail::unwrapValue(Val));
+}
+
+template <class X, class Y> auto dyn_cast_if_present(Y &Val) {
+ if (!detail::isPresent(Val))
+ return CastInfo<X, Y>::castFailed();
+ return CastInfo<X, Y>::doCastIfPossible(detail::unwrapValue(Val));
+}
+
+template <class X, class Y> auto dyn_cast_if_present(Y *Val) {
+ if (!detail::isPresent(Val))
+ return CastInfo<X, Y *>::castFailed();
+ return CastInfo<X, Y *>::doCastIfPossible(detail::unwrapValue(Val));
+}
+
+// Forwards to dyn_cast_if_present to avoid breaking current users. This is
+// deprecated and will be removed in a future patch, use
+// cast_if_present instead.
+template <class X, class Y> auto dyn_cast_or_null(const Y &Val) {
+ return dyn_cast_if_present<X>(Val);
+}
+
+template <class X, class Y> auto dyn_cast_or_null(Y &Val) {
+ return dyn_cast_if_present<X>(Val);
+}
+
+template <class X, class Y> auto dyn_cast_or_null(Y *Val) {
+ return dyn_cast_if_present<X>(Val);
}
-// unique_dyn_cast<X> - Given a unique_ptr<Y>, try to return a unique_ptr<X>,
-// taking ownership of the input pointer iff isa<X>(Val) is true. If the
-// cast is successful, From refers to nullptr on exit and the casted value
-// is returned. If the cast is unsuccessful, the function returns nullptr
-// and From is unchanged.
+/// unique_dyn_cast<X> - Given a unique_ptr<Y>, try to return a unique_ptr<X>,
+/// taking ownership of the input pointer iff isa<X>(Val) is true. If the
+/// cast is successful, From refers to nullptr on exit and the casted value
+/// is returned. If the cast is unsuccessful, the function returns nullptr
+/// and From is unchanged.
template <class X, class Y>
-LLVM_NODISCARD inline auto unique_dyn_cast(std::unique_ptr<Y> &Val)
- -> decltype(cast<X>(Val)) {
+LLVM_NODISCARD inline typename CastInfo<X, std::unique_ptr<Y>>::CastResultType
+unique_dyn_cast(std::unique_ptr<Y> &Val) {
if (!isa<X>(Val))
return nullptr;
return cast<X>(std::move(Val));
@@ -386,11 +783,11 @@ LLVM_NODISCARD inline auto unique_dyn_cast(std::unique_ptr<Y> &&Val) {
return unique_dyn_cast<X, Y>(Val);
}
-// dyn_cast_or_null<X> - Functionally identical to unique_dyn_cast, except that
-// a null value is accepted.
+// unique_dyn_cast_or_null<X> - Functionally identical to unique_dyn_cast,
+// except that a null value is accepted.
template <class X, class Y>
-LLVM_NODISCARD inline auto unique_dyn_cast_or_null(std::unique_ptr<Y> &Val)
- -> decltype(cast<X>(Val)) {
+LLVM_NODISCARD inline typename CastInfo<X, std::unique_ptr<Y>>::CastResultType
+unique_dyn_cast_or_null(std::unique_ptr<Y> &Val) {
if (!Val)
return nullptr;
return unique_dyn_cast<X, Y>(Val);
diff --git a/llvm/include/llvm/Support/CodeGen.h b/llvm/include/llvm/Support/CodeGen.h
index 9e66d84e185d..71d0ddbfe05e 100644
--- a/llvm/include/llvm/Support/CodeGen.h
+++ b/llvm/include/llvm/Support/CodeGen.h
@@ -69,6 +69,40 @@ namespace llvm {
// Specify what functions should keep the frame pointer.
enum class FramePointerKind { None, NonLeaf, All };
-} // end llvm namespace
+ // Specify what type of zeroing callee-used registers.
+ namespace ZeroCallUsedRegs {
+ const unsigned ONLY_USED = 1U << 1;
+ const unsigned ONLY_GPR = 1U << 2;
+ const unsigned ONLY_ARG = 1U << 3;
+
+ enum class ZeroCallUsedRegsKind : unsigned int {
+ // Don't zero any call-used regs.
+ Skip = 1U << 0,
+ // Only zeros call-used GPRs used in the fn and pass args.
+ UsedGPRArg = ONLY_USED | ONLY_GPR | ONLY_ARG,
+ // Only zeros call-used GPRs used in the fn.
+ UsedGPR = ONLY_USED | ONLY_GPR,
+ // Only zeros call-used regs used in the fn and pass args.
+ UsedArg = ONLY_USED | ONLY_ARG,
+ // Only zeros call-used regs used in the fn.
+ Used = ONLY_USED,
+ // Zeros all call-used GPRs that pass args.
+ AllGPRArg = ONLY_GPR | ONLY_ARG,
+ // Zeros all call-used GPRs.
+ AllGPR = ONLY_GPR,
+ // Zeros all call-used regs that pass args.
+ AllArg = ONLY_ARG,
+ // Zeros all call-used regs.
+ All = 0,
+ };
+ } // namespace ZeroCallUsedRegs
+
+ enum class UWTableKind {
+ None = 0, ///< No unwind table requested
+ Sync = 1, ///< "Synchronous" unwind tables
+ Async = 2, ///< "Asynchronous" unwind tables (instr precise)
+ Default = 2,
+ };
+ } // namespace llvm
#endif
diff --git a/llvm/include/llvm/Support/CommandLine.h b/llvm/include/llvm/Support/CommandLine.h
index c8e29ac42559..6461164fceff 100644
--- a/llvm/include/llvm/Support/CommandLine.h
+++ b/llvm/include/llvm/Support/CommandLine.h
@@ -49,13 +49,12 @@ class FileSystem;
class StringSaver;
-/// cl Namespace - This namespace contains all of the command line option
-/// processing machinery. It is intentionally a short name to make qualified
-/// usage concise.
+/// This namespace contains all of the command line option processing machinery.
+/// It is intentionally a short name to make qualified usage concise.
namespace cl {
//===----------------------------------------------------------------------===//
-// ParseCommandLineOptions - Command line option processing entry point.
+// Command line option processing entry point.
//
// Returns true on success. Otherwise, this will print the error message to
// stderr and exit if \p Errs is not set (nullptr by default), or print the
@@ -78,22 +77,19 @@ bool ParseCommandLineOptions(int argc, const char *const *argv,
using VersionPrinterTy = std::function<void(raw_ostream &)>;
///===---------------------------------------------------------------------===//
-/// SetVersionPrinter - Override the default (LLVM specific) version printer
-/// used to print out the version when --version is given
-/// on the command line. This allows other systems using the
-/// CommandLine utilities to print their own version string.
+/// Override the default (LLVM specific) version printer used to print out the
+/// version when --version is given on the command line. This allows other
+/// systems using the CommandLine utilities to print their own version string.
void SetVersionPrinter(VersionPrinterTy func);
///===---------------------------------------------------------------------===//
-/// AddExtraVersionPrinter - Add an extra printer to use in addition to the
-/// default one. This can be called multiple times,
-/// and each time it adds a new function to the list
-/// which will be called after the basic LLVM version
-/// printing is complete. Each can then add additional
-/// information specific to the tool.
+/// Add an extra printer to use in addition to the default one. This can be
+/// called multiple times, and each time it adds a new function to the list
+/// which will be called after the basic LLVM version printing is complete.
+/// Each can then add additional information specific to the tool.
void AddExtraVersionPrinter(VersionPrinterTy func);
-// PrintOptionValues - Print option values.
+// Print option values.
// With -print-options print the difference between option values and defaults.
// With -print-all-options print all option values.
// (Currently not perfect, but best-effort.)
@@ -121,9 +117,9 @@ enum NumOccurrencesFlag { // Flags for the number of occurrences allowed
Required = 0x02, // One occurrence required
OneOrMore = 0x03, // One or more occurrences required
- // ConsumeAfter - Indicates that this option is fed anything that follows the
- // last positional argument required by the application (it is an error if
- // there are zero positional arguments, and a ConsumeAfter option is used).
+ // Indicates that this option is fed anything that follows the last positional
+ // argument required by the application (it is an error if there are zero
+ // positional arguments, and a ConsumeAfter option is used).
// Thus, for example, all arguments to LLI are processed until a filename is
// found. Once a filename is found, all of the succeeding arguments are
// passed, unprocessed, to the ConsumeAfter option.
@@ -144,8 +140,8 @@ enum OptionHidden { // Control whether -help shows this option
ReallyHidden = 0x02 // Neither -help nor -help-hidden show this arg
};
-// Formatting flags - This controls special features that the option might have
-// that cause it to be parsed differently...
+// This controls special features that the option might have that cause it to be
+// parsed differently...
//
// Prefix - This option allows arguments that are otherwise unrecognized to be
// matched by options that are a prefix of the actual value. This is useful for
@@ -170,7 +166,7 @@ enum MiscFlags { // Miscellaneous flags to adjust argument
PositionalEatsArgs = 0x02, // Should this positional cl::list eat -args?
Sink = 0x04, // Should this cl::list eat all unknown options?
- // Grouping - Can this option group with other options?
+ // Can this option group with other options?
// If this is enabled, multiple letter options are allowed to bunch together
// with only a single hyphen for the whole group. This allows emulation
// of the behavior that ls uses for example: ls -la === ls -l -a
@@ -181,7 +177,6 @@ enum MiscFlags { // Miscellaneous flags to adjust argument
};
//===----------------------------------------------------------------------===//
-// Option Category class
//
class OptionCategory {
private:
@@ -205,7 +200,6 @@ public:
OptionCategory &getGeneralCategory();
//===----------------------------------------------------------------------===//
-// SubCommand class
//
class SubCommand {
private:
@@ -244,14 +238,13 @@ extern ManagedStatic<SubCommand> TopLevelSubCommand;
extern ManagedStatic<SubCommand> AllSubCommands;
//===----------------------------------------------------------------------===//
-// Option Base class
//
class Option {
friend class alias;
- // handleOccurrences - Overriden by subclasses to handle the value passed into
- // an argument. Should return true if there was an error processing the
- // argument and the program should exit.
+ // Overriden by subclasses to handle the value passed into an argument. Should
+ // return true if there was an error processing the argument and the program
+ // should exit.
//
virtual bool handleOccurrence(unsigned pos, StringRef ArgName,
StringRef Arg) = 0;
@@ -305,7 +298,7 @@ public:
inline unsigned getPosition() const { return Position; }
inline unsigned getNumAdditionalVals() const { return AdditionalVals; }
- // hasArgStr - Return true if the argstr != ""
+ // Return true if the argstr != ""
bool hasArgStr() const { return !ArgStr.empty(); }
bool isPositional() const { return getFormattingFlag() == cl::Positional; }
bool isSink() const { return getMiscFlags() & cl::Sink; }
@@ -348,7 +341,7 @@ protected:
public:
virtual ~Option() = default;
- // addArgument - Register this argument with the commandline system.
+ // Register this argument with the commandline system.
//
void addArgument();
@@ -361,8 +354,8 @@ public:
// Return the width of the option tag for printing...
virtual size_t getOptionWidth() const = 0;
- // printOptionInfo - Print out information about this option. The
- // to-be-maintained width is specified.
+ // Print out information about this option. The to-be-maintained width is
+ // specified.
//
virtual void printOptionInfo(size_t GlobalWidth) const = 0;
@@ -388,7 +381,7 @@ public:
virtual void getExtraOptionNames(SmallVectorImpl<StringRef> &) {}
- // addOccurrence - Wrapper around handleOccurrence that enforces Flags.
+ // Wrapper around handleOccurrence that enforces Flags.
//
virtual bool addOccurrence(unsigned pos, StringRef ArgName, StringRef Value,
bool MultiArg = false);
@@ -408,7 +401,7 @@ public:
// command line option parsers...
//
-// desc - Modifier to set the description shown in the -help output...
+// Modifier to set the description shown in the -help output...
struct desc {
StringRef Desc;
@@ -417,8 +410,7 @@ struct desc {
void apply(Option &O) const { O.setDescription(Desc); }
};
-// value_desc - Modifier to set the value description shown in the -help
-// output...
+// Modifier to set the value description shown in the -help output...
struct value_desc {
StringRef Desc;
@@ -427,10 +419,9 @@ struct value_desc {
void apply(Option &O) const { O.setValueStr(Desc); }
};
-// init - Specify a default (initial) value for the command line argument, if
-// the default constructor for the argument type does not give you what you
-// want. This is only valid on "opt" arguments, not on "list" arguments.
-//
+// Specify a default (initial) value for the command line argument, if the
+// default constructor for the argument type does not give you what you want.
+// This is only valid on "opt" arguments, not on "list" arguments.
template <class Ty> struct initializer {
const Ty &Init;
initializer(const Ty &Val) : Init(Val) {}
@@ -442,10 +433,9 @@ template <class Ty> initializer<Ty> init(const Ty &Val) {
return initializer<Ty>(Val);
}
-// location - Allow the user to specify which external variable they want to
-// store the results of the command line argument processing into, if they don't
-// want to store it in the option itself.
-//
+// Allow the user to specify which external variable they want to store the
+// results of the command line argument processing into, if they don't want to
+// store it in the option itself.
template <class Ty> struct LocationClass {
Ty &Loc;
@@ -458,8 +448,7 @@ template <class Ty> LocationClass<Ty> location(Ty &L) {
return LocationClass<Ty>(L);
}
-// cat - Specifiy the Option category for the command line argument to belong
-// to.
+// Specify the Option category for the command line argument to belong to.
struct cat {
OptionCategory &Category;
@@ -468,7 +457,7 @@ struct cat {
template <class Opt> void apply(Opt &O) const { O.addCategory(Category); }
};
-// sub - Specify the subcommand that this option belongs to.
+// Specify the subcommand that this option belongs to.
struct sub {
SubCommand &Sub;
@@ -514,7 +503,6 @@ callback(F CB) {
}
//===----------------------------------------------------------------------===//
-// OptionValue class
// Support value comparison outside the template.
struct GenericOptionValue {
@@ -672,8 +660,8 @@ struct OptionEnumValue {
#define clEnumValN(ENUMVAL, FLAGNAME, DESC) \
llvm::cl::OptionEnumValue { FLAGNAME, int(ENUMVAL), DESC }
-// values - For custom data types, allow specifying a group of values together
-// as the values that go into the mapping that the option handler uses.
+// For custom data types, allow specifying a group of values together as the
+// values that go into the mapping that the option handler uses.
//
class ValuesClass {
// Use a vector instead of a map, because the lists should be short,
@@ -699,16 +687,16 @@ template <typename... OptsTy> ValuesClass values(OptsTy... Options) {
}
//===----------------------------------------------------------------------===//
-// parser class - Parameterizable parser for different data types. By default,
-// known data types (string, int, bool) have specialized parsers, that do what
-// you would expect. The default parser, used for data types that are not
-// built-in, uses a mapping table to map specific options to values, which is
-// used, among other things, to handle enum types.
+// Parameterizable parser for different data types. By default, known data types
+// (string, int, bool) have specialized parsers, that do what you would expect.
+// The default parser, used for data types that are not built-in, uses a mapping
+// table to map specific options to values, which is used, among other things,
+// to handle enum types.
//--------------------------------------------------
-// generic_parser_base - This class holds all the non-generic code that we do
-// not need replicated for every instance of the generic parser. This also
-// allows us to put stuff into CommandLine.cpp
+// This class holds all the non-generic code that we do not need replicated for
+// every instance of the generic parser. This also allows us to put stuff into
+// CommandLine.cpp
//
class generic_parser_base {
protected:
@@ -726,15 +714,15 @@ public:
virtual ~generic_parser_base() = default;
// Base class should have virtual-destructor
- // getNumOptions - Virtual function implemented by generic subclass to
- // indicate how many entries are in Values.
+ // Virtual function implemented by generic subclass to indicate how many
+ // entries are in Values.
//
virtual unsigned getNumOptions() const = 0;
- // getOption - Return option name N.
+ // Return option name N.
virtual StringRef getOption(unsigned N) const = 0;
- // getDescription - Return description N
+ // Return description N
virtual StringRef getDescription(unsigned N) const = 0;
// Return the width of the option tag for printing...
@@ -742,8 +730,8 @@ public:
virtual const GenericOptionValue &getOptionValue(unsigned N) const = 0;
- // printOptionInfo - Print out information about this option. The
- // to-be-maintained width is specified.
+ // Print out information about this option. The to-be-maintained width is
+ // specified.
//
virtual void printOptionInfo(const Option &O, size_t GlobalWidth) const;
@@ -751,7 +739,7 @@ public:
const GenericOptionValue &Default,
size_t GlobalWidth) const;
- // printOptionDiff - print the value of an option and it's default.
+ // Print the value of an option and it's default.
//
// Template definition ensures that the option and default have the same
// DataType (via the same AnyOptionValue).
@@ -791,7 +779,7 @@ public:
return ValueDisallowed;
}
- // findOption - Return the option number corresponding to the specified
+ // Return the option number corresponding to the specified
// argument string. If the option is not found, getNumOptions() is returned.
//
unsigned findOption(StringRef Name);
@@ -829,12 +817,12 @@ public:
return Values[N].HelpStr;
}
- // getOptionValue - Return the value of option name N.
+ // Return the value of option name N.
const GenericOptionValue &getOptionValue(unsigned N) const override {
return Values[N].V;
}
- // parse - Return true on error.
+ // Return true on error.
bool parse(Option &O, StringRef ArgName, StringRef Arg, DataType &V) {
StringRef ArgVal;
if (Owner.hasArgStr())
@@ -851,7 +839,7 @@ public:
return O.error("Cannot find option named '" + ArgVal + "'!");
}
- /// addLiteralOption - Add an entry to the mapping table.
+ /// Add an entry to the mapping table.
///
template <class DT>
void addLiteralOption(StringRef Name, const DT &V, StringRef HelpStr) {
@@ -861,7 +849,7 @@ public:
AddLiteralOption(Owner, Name);
}
- /// removeLiteralOption - Remove the specified option.
+ /// Remove the specified option.
///
void removeLiteralOption(StringRef Name) {
unsigned N = findOption(Name);
@@ -871,7 +859,7 @@ public:
};
//--------------------------------------------------
-// basic_parser - Super class of parsers to provide boilerplate code
+// Super class of parsers to provide boilerplate code
//
class basic_parser_impl { // non-template implementation of basic_parser<t>
public:
@@ -890,16 +878,15 @@ public:
// Return the width of the option tag for printing...
size_t getOptionWidth(const Option &O) const;
- // printOptionInfo - Print out information about this option. The
- // to-be-maintained width is specified.
+ // Print out information about this option. The to-be-maintained width is
+ // specified.
//
void printOptionInfo(const Option &O, size_t GlobalWidth) const;
- // printOptionNoValue - Print a placeholder for options that don't yet support
- // printOptionDiff().
+ // Print a placeholder for options that don't yet support printOptionDiff().
void printOptionNoValue(const Option &O, size_t GlobalWidth) const;
- // getValueName - Overload in subclass to provide a better default value.
+ // Overload in subclass to provide a better default value.
virtual StringRef getValueName() const { return "value"; }
// An out-of-line virtual method to provide a 'home' for this class.
@@ -910,8 +897,8 @@ protected:
void printOptionName(const Option &O, size_t GlobalWidth) const;
};
-// basic_parser - The real basic parser is just a template wrapper that provides
-// a typedef for the provided data type.
+// The real basic parser is just a template wrapper that provides a typedef for
+// the provided data type.
//
template <class DataType> class basic_parser : public basic_parser_impl {
public:
@@ -922,8 +909,6 @@ public:
};
//--------------------------------------------------
-// parser<bool>
-//
extern template class basic_parser<bool>;
@@ -931,7 +916,7 @@ template <> class parser<bool> : public basic_parser<bool> {
public:
parser(Option &O) : basic_parser(O) {}
- // parse - Return true on error.
+ // Return true on error.
bool parse(Option &O, StringRef ArgName, StringRef Arg, bool &Val);
void initialize() {}
@@ -940,7 +925,7 @@ public:
return ValueOptional;
}
- // getValueName - Do not print =<value> at all.
+ // Do not print =<value> at all.
StringRef getValueName() const override { return StringRef(); }
void printOptionDiff(const Option &O, bool V, OptVal Default,
@@ -951,7 +936,6 @@ public:
};
//--------------------------------------------------
-// parser<boolOrDefault>
extern template class basic_parser<boolOrDefault>;
@@ -959,14 +943,14 @@ template <> class parser<boolOrDefault> : public basic_parser<boolOrDefault> {
public:
parser(Option &O) : basic_parser(O) {}
- // parse - Return true on error.
+ // Return true on error.
bool parse(Option &O, StringRef ArgName, StringRef Arg, boolOrDefault &Val);
enum ValueExpected getValueExpectedFlagDefault() const {
return ValueOptional;
}
- // getValueName - Do not print =<value> at all.
+ // Do not print =<value> at all.
StringRef getValueName() const override { return StringRef(); }
void printOptionDiff(const Option &O, boolOrDefault V, OptVal Default,
@@ -977,8 +961,6 @@ public:
};
//--------------------------------------------------
-// parser<int>
-//
extern template class basic_parser<int>;
@@ -986,10 +968,10 @@ template <> class parser<int> : public basic_parser<int> {
public:
parser(Option &O) : basic_parser(O) {}
- // parse - Return true on error.
+ // Return true on error.
bool parse(Option &O, StringRef ArgName, StringRef Arg, int &Val);
- // getValueName - Overload in subclass to provide a better default value.
+ // Overload in subclass to provide a better default value.
StringRef getValueName() const override { return "int"; }
void printOptionDiff(const Option &O, int V, OptVal Default,
@@ -1000,8 +982,6 @@ public:
};
//--------------------------------------------------
-// parser<long>
-//
extern template class basic_parser<long>;
@@ -1009,10 +989,10 @@ template <> class parser<long> final : public basic_parser<long> {
public:
parser(Option &O) : basic_parser(O) {}
- // parse - Return true on error.
+ // Return true on error.
bool parse(Option &O, StringRef ArgName, StringRef Arg, long &Val);
- // getValueName - Overload in subclass to provide a better default value.
+ // Overload in subclass to provide a better default value.
StringRef getValueName() const override { return "long"; }
void printOptionDiff(const Option &O, long V, OptVal Default,
@@ -1023,8 +1003,6 @@ public:
};
//--------------------------------------------------
-// parser<long long>
-//
extern template class basic_parser<long long>;
@@ -1032,10 +1010,10 @@ template <> class parser<long long> : public basic_parser<long long> {
public:
parser(Option &O) : basic_parser(O) {}
- // parse - Return true on error.
+ // Return true on error.
bool parse(Option &O, StringRef ArgName, StringRef Arg, long long &Val);
- // getValueName - Overload in subclass to provide a better default value.
+ // Overload in subclass to provide a better default value.
StringRef getValueName() const override { return "long"; }
void printOptionDiff(const Option &O, long long V, OptVal Default,
@@ -1046,8 +1024,6 @@ public:
};
//--------------------------------------------------
-// parser<unsigned>
-//
extern template class basic_parser<unsigned>;
@@ -1055,10 +1031,10 @@ template <> class parser<unsigned> : public basic_parser<unsigned> {
public:
parser(Option &O) : basic_parser(O) {}
- // parse - Return true on error.
+ // Return true on error.
bool parse(Option &O, StringRef ArgName, StringRef Arg, unsigned &Val);
- // getValueName - Overload in subclass to provide a better default value.
+ // Overload in subclass to provide a better default value.
StringRef getValueName() const override { return "uint"; }
void printOptionDiff(const Option &O, unsigned V, OptVal Default,
@@ -1069,8 +1045,6 @@ public:
};
//--------------------------------------------------
-// parser<unsigned long>
-//
extern template class basic_parser<unsigned long>;
@@ -1079,10 +1053,10 @@ class parser<unsigned long> final : public basic_parser<unsigned long> {
public:
parser(Option &O) : basic_parser(O) {}
- // parse - Return true on error.
+ // Return true on error.
bool parse(Option &O, StringRef ArgName, StringRef Arg, unsigned long &Val);
- // getValueName - Overload in subclass to provide a better default value.
+ // Overload in subclass to provide a better default value.
StringRef getValueName() const override { return "ulong"; }
void printOptionDiff(const Option &O, unsigned long V, OptVal Default,
@@ -1093,8 +1067,6 @@ public:
};
//--------------------------------------------------
-// parser<unsigned long long>
-//
extern template class basic_parser<unsigned long long>;
@@ -1103,11 +1075,11 @@ class parser<unsigned long long> : public basic_parser<unsigned long long> {
public:
parser(Option &O) : basic_parser(O) {}
- // parse - Return true on error.
+ // Return true on error.
bool parse(Option &O, StringRef ArgName, StringRef Arg,
unsigned long long &Val);
- // getValueName - Overload in subclass to provide a better default value.
+ // Overload in subclass to provide a better default value.
StringRef getValueName() const override { return "ulong"; }
void printOptionDiff(const Option &O, unsigned long long V, OptVal Default,
@@ -1118,8 +1090,6 @@ public:
};
//--------------------------------------------------
-// parser<double>
-//
extern template class basic_parser<double>;
@@ -1127,10 +1097,10 @@ template <> class parser<double> : public basic_parser<double> {
public:
parser(Option &O) : basic_parser(O) {}
- // parse - Return true on error.
+ // Return true on error.
bool parse(Option &O, StringRef ArgName, StringRef Arg, double &Val);
- // getValueName - Overload in subclass to provide a better default value.
+ // Overload in subclass to provide a better default value.
StringRef getValueName() const override { return "number"; }
void printOptionDiff(const Option &O, double V, OptVal Default,
@@ -1141,8 +1111,6 @@ public:
};
//--------------------------------------------------
-// parser<float>
-//
extern template class basic_parser<float>;
@@ -1150,10 +1118,10 @@ template <> class parser<float> : public basic_parser<float> {
public:
parser(Option &O) : basic_parser(O) {}
- // parse - Return true on error.
+ // Return true on error.
bool parse(Option &O, StringRef ArgName, StringRef Arg, float &Val);
- // getValueName - Overload in subclass to provide a better default value.
+ // Overload in subclass to provide a better default value.
StringRef getValueName() const override { return "number"; }
void printOptionDiff(const Option &O, float V, OptVal Default,
@@ -1164,8 +1132,6 @@ public:
};
//--------------------------------------------------
-// parser<std::string>
-//
extern template class basic_parser<std::string>;
@@ -1173,13 +1139,13 @@ template <> class parser<std::string> : public basic_parser<std::string> {
public:
parser(Option &O) : basic_parser(O) {}
- // parse - Return true on error.
+ // Return true on error.
bool parse(Option &, StringRef, StringRef Arg, std::string &Value) {
Value = Arg.str();
return false;
}
- // getValueName - Overload in subclass to provide a better default value.
+ // Overload in subclass to provide a better default value.
StringRef getValueName() const override { return "string"; }
void printOptionDiff(const Option &O, StringRef V, const OptVal &Default,
@@ -1190,8 +1156,6 @@ public:
};
//--------------------------------------------------
-// parser<char>
-//
extern template class basic_parser<char>;
@@ -1199,13 +1163,13 @@ template <> class parser<char> : public basic_parser<char> {
public:
parser(Option &O) : basic_parser(O) {}
- // parse - Return true on error.
+ // Return true on error.
bool parse(Option &, StringRef, StringRef Arg, char &Value) {
Value = Arg[0];
return false;
}
- // getValueName - Overload in subclass to provide a better default value.
+ // Overload in subclass to provide a better default value.
StringRef getValueName() const override { return "char"; }
void printOptionDiff(const Option &O, char V, OptVal Default,
@@ -1216,8 +1180,6 @@ public:
};
//--------------------------------------------------
-// PrintOptionDiff
-//
// This collection of wrappers is the intermediary between class opt and class
// parser to handle all the template nastiness.
@@ -1261,10 +1223,10 @@ void printOptionDiff(
}
//===----------------------------------------------------------------------===//
-// applicator class - This class is used because we must use partial
-// specialization to handle literal string arguments specially (const char* does
-// not correctly respond to the apply method). Because the syntax to use this
-// is a pain, we have the 'apply' method below to handle the nastiness...
+// This class is used because we must use partial specialization to handle
+// literal string arguments specially (const char* does not correctly respond to
+// the apply method). Because the syntax to use this is a pain, we have the
+// 'apply' method below to handle the nastiness...
//
template <class Mod> struct applicator {
template <class Opt> static void opt(const Mod &M, Opt &O) { M.apply(O); }
@@ -1313,7 +1275,7 @@ template <> struct applicator<MiscFlags> {
}
};
-// apply method - Apply modifiers to an option in a type safe way.
+// Apply modifiers to an option in a type safe way.
template <class Opt, class Mod, class... Mods>
void apply(Opt *O, const Mod &M, const Mods &... Ms) {
applicator<Mod>::opt(M, *O);
@@ -1325,8 +1287,6 @@ template <class Opt, class Mod> void apply(Opt *O, const Mod &M) {
}
//===----------------------------------------------------------------------===//
-// opt_storage class
-
// Default storage class definition: external storage. This implementation
// assumes the user will specify a variable to store the data into with the
// cl::location(x) modifier.
@@ -1406,7 +1366,7 @@ public:
// Make sure we initialize the value with the default constructor for the
// type.
- opt_storage() : Value(DataType()), Default(DataType()) {}
+ opt_storage() : Value(DataType()), Default() {}
template <class T> void setValue(const T &V, bool initial = false) {
Value = V;
@@ -1425,7 +1385,7 @@ public:
};
//===----------------------------------------------------------------------===//
-// opt - A scalar command line option.
+// A scalar command line option.
//
template <class DataType, bool ExternalStorage = false,
class ParserClass = parser<DataType>>
@@ -1476,6 +1436,8 @@ class opt : public Option,
const OptionValue<DataType> &V = this->getDefault();
if (V.hasValue())
this->setValue(V.getValue());
+ else
+ this->setValue(T());
}
template <class T,
@@ -1528,8 +1490,6 @@ extern template class opt<char>;
extern template class opt<bool>;
//===----------------------------------------------------------------------===//
-// list_storage class
-
// Default storage class definition: external storage. This implementation
// assumes the user will specify a variable to store the data into with the
// cl::location(x) modifier.
@@ -1634,7 +1594,7 @@ public:
};
//===----------------------------------------------------------------------===//
-// list - A list of command line options.
+// A list of command line options.
//
template <class DataType, class StorageClass = bool,
class ParserClass = parser<DataType>>
@@ -1716,7 +1676,7 @@ public:
[](const typename ParserClass::parser_data_type &) {};
};
-// multi_val - Modifier to set the number of additional values.
+// Modifier to set the number of additional values.
struct multi_val {
unsigned AdditionalVals;
explicit multi_val(unsigned N) : AdditionalVals(N) {}
@@ -1728,8 +1688,6 @@ struct multi_val {
};
//===----------------------------------------------------------------------===//
-// bits_storage class
-
// Default storage class definition: external storage. This implementation
// assumes the user will specify a variable to store the data into with the
// cl::location(x) modifier.
@@ -1738,7 +1696,7 @@ template <class DataType, class StorageClass> class bits_storage {
unsigned *Location = nullptr; // Where to store the bits...
template <class T> static unsigned Bit(const T &V) {
- unsigned BitPos = reinterpret_cast<unsigned>(V);
+ unsigned BitPos = static_cast<unsigned>(V);
assert(BitPos < sizeof(unsigned) * CHAR_BIT &&
"enum exceeds width of bit vector!");
return 1 << BitPos;
@@ -1763,6 +1721,11 @@ public:
unsigned getBits() { return *Location; }
+ void clear() {
+ if (Location)
+ *Location = 0;
+ }
+
template <class T> bool isSet(const T &V) {
return (*Location & Bit(V)) != 0;
}
@@ -1772,10 +1735,10 @@ public:
// This makes us exactly compatible with the bits in all cases that it is used.
//
template <class DataType> class bits_storage<DataType, bool> {
- unsigned Bits; // Where to store the bits...
+ unsigned Bits{0}; // Where to store the bits...
template <class T> static unsigned Bit(const T &V) {
- unsigned BitPos = (unsigned)V;
+ unsigned BitPos = static_cast<unsigned>(V);
assert(BitPos < sizeof(unsigned) * CHAR_BIT &&
"enum exceeds width of bit vector!");
return 1 << BitPos;
@@ -1786,11 +1749,13 @@ public:
unsigned getBits() { return Bits; }
+ void clear() { Bits = 0; }
+
template <class T> bool isSet(const T &V) { return (Bits & Bit(V)) != 0; }
};
//===----------------------------------------------------------------------===//
-// bits - A bit vector of command options.
+// A bit vector of command options.
//
template <class DataType, class Storage = bool,
class ParserClass = parser<DataType>>
@@ -1832,7 +1797,7 @@ class bits : public Option, public bits_storage<DataType, Storage> {
void printOptionValue(size_t /*GlobalWidth*/, bool /*Force*/) const override {
}
- void setDefault() override {}
+ void setDefault() override { bits_storage<DataType, Storage>::clear(); }
void done() {
addArgument();
@@ -1929,7 +1894,7 @@ public:
}
};
-// aliasfor - Modifier to set the option an alias aliases.
+// Modifier to set the option an alias aliases.
struct aliasopt {
Option &Opt;
@@ -1938,10 +1903,9 @@ struct aliasopt {
void apply(alias &A) const { A.setAliasFor(Opt); }
};
-// extrahelp - provide additional help at the end of the normal help
-// output. All occurrences of cl::extrahelp will be accumulated and
-// printed to stderr at the end of the regular help, just before
-// exit is called.
+// Provide additional help at the end of the normal help output. All occurrences
+// of cl::extrahelp will be accumulated and printed to stderr at the end of the
+// regular help, just before exit is called.
struct extrahelp {
StringRef morehelp;
@@ -2032,12 +1996,15 @@ void TokenizeGNUCommandLine(StringRef Source, StringSaver &Saver,
SmallVectorImpl<const char *> &NewArgv,
bool MarkEOLs = false);
-/// Tokenizes a Windows command line which may contain quotes and escaped
-/// quotes.
+/// Tokenizes a string of Windows command line arguments, which may contain
+/// quotes and escaped quotes.
///
/// See MSDN docs for CommandLineToArgvW for information on the quoting rules.
/// http://msdn.microsoft.com/en-us/library/windows/desktop/17w5ykft(v=vs.85).aspx
///
+/// For handling a full Windows command line including the executable name at
+/// the start, see TokenizeWindowsCommandLineFull below.
+///
/// \param [in] Source The string to be split on whitespace with quotes.
/// \param [in] Saver Delegates back to the caller for saving parsed strings.
/// \param [in] MarkEOLs true if tokenizing a response file and you want end of
@@ -2054,6 +2021,23 @@ void TokenizeWindowsCommandLine(StringRef Source, StringSaver &Saver,
void TokenizeWindowsCommandLineNoCopy(StringRef Source, StringSaver &Saver,
SmallVectorImpl<StringRef> &NewArgv);
+/// Tokenizes a Windows full command line, including command name at the start.
+///
+/// This uses the same syntax rules as TokenizeWindowsCommandLine for all but
+/// the first token. But the first token is expected to be parsed as the
+/// executable file name in the way CreateProcess would do it, rather than the
+/// way the C library startup code would do it: CreateProcess does not consider
+/// that \ is ever an escape character (because " is not a valid filename char,
+/// hence there's never a need to escape it to be used literally).
+///
+/// Parameters are the same as for TokenizeWindowsCommandLine. In particular,
+/// if you set MarkEOLs = true, then the first word of every line will be
+/// parsed using the special rules for command names, making this function
+/// suitable for parsing a file full of commands to execute.
+void TokenizeWindowsCommandLineFull(StringRef Source, StringSaver &Saver,
+ SmallVectorImpl<const char *> &NewArgv,
+ bool MarkEOLs = false);
+
/// String tokenization function type. Should be compatible with either
/// Windows or Unix command line tokenizers.
using TokenizerCallback = void (*)(StringRef Source, StringSaver &Saver,
diff --git a/llvm/include/llvm/Support/Compiler.h b/llvm/include/llvm/Support/Compiler.h
index f3317049524f..6708b7cc95cc 100644
--- a/llvm/include/llvm/Support/Compiler.h
+++ b/llvm/include/llvm/Support/Compiler.h
@@ -39,6 +39,10 @@
# define __has_builtin(x) 0
#endif
+#ifndef __has_include
+# define __has_include(x) 0
+#endif
+
// Only use __has_cpp_attribute in C++ mode. GCC defines __has_cpp_attribute in
// C mode, but the :: in __has_cpp_attribute(scoped::attribute) is invalid.
#ifndef LLVM_HAS_CPP_ATTRIBUTE
@@ -90,30 +94,14 @@
#define LLVM_MSC_PREREQ(version) (_MSC_VER >= (version))
// We require at least VS 2019.
+#if !defined(LLVM_FORCE_USE_OLD_TOOLCHAIN)
#if !LLVM_MSC_PREREQ(1920)
#error LLVM requires at least VS 2019.
#endif
-
-#else
-#define LLVM_MSC_PREREQ(version) 0
#endif
-/// Does the compiler support ref-qualifiers for *this?
-///
-/// Sadly, this is separate from just rvalue reference support because GCC
-/// and MSVC implemented this later than everything else. This appears to be
-/// corrected in MSVC 2019 but not MSVC 2017.
-/// FIXME: Remove LLVM_HAS_RVALUE_REFERENCE_THIS macro
-#define LLVM_HAS_RVALUE_REFERENCE_THIS 1
-
-/// Expands to '&' if ref-qualifiers for *this are supported.
-///
-/// This can be used to provide lvalue/rvalue overrides of member functions.
-/// The rvalue override should be guarded by LLVM_HAS_RVALUE_REFERENCE_THIS
-#if LLVM_HAS_RVALUE_REFERENCE_THIS
-#define LLVM_LVALUE_FUNCTION &
#else
-#define LLVM_LVALUE_FUNCTION
+#define LLVM_MSC_PREREQ(version) 0
#endif
/// LLVM_LIBRARY_VISIBILITY - If a class marked with this attribute is linked
@@ -325,20 +313,17 @@
#define LLVM_EXTENSION
#endif
-// LLVM_ATTRIBUTE_DEPRECATED(decl, "message")
-// This macro will be removed.
-// Use C++14's attribute instead: [[deprecated("message")]]
-#define LLVM_ATTRIBUTE_DEPRECATED(decl, message) [[deprecated(message)]] decl
-
/// LLVM_BUILTIN_UNREACHABLE - On compilers which support it, expands
/// to an expression which states that it is undefined behavior for the
/// compiler to reach this point. Otherwise is not defined.
+///
+/// '#else' is intentionally left out so that other macro logic (e.g.,
+/// LLVM_ASSUME_ALIGNED and llvm_unreachable()) can detect whether
+/// LLVM_BUILTIN_UNREACHABLE has a definition.
#if __has_builtin(__builtin_unreachable) || defined(__GNUC__)
# define LLVM_BUILTIN_UNREACHABLE __builtin_unreachable()
#elif defined(_MSC_VER)
# define LLVM_BUILTIN_UNREACHABLE __assume(false)
-#else
-# define LLVM_BUILTIN_UNREACHABLE
#endif
/// LLVM_BUILTIN_TRAP - On compilers which support it, expands to an expression
@@ -411,22 +396,6 @@
# define LLVM_PACKED_END _Pragma("pack(pop)")
#endif
-/// \macro LLVM_PTR_SIZE
-/// A constant integer equivalent to the value of sizeof(void*).
-/// Generally used in combination with alignas or when doing computation in the
-/// preprocessor.
-#ifdef __SIZEOF_POINTER__
-# define LLVM_PTR_SIZE __SIZEOF_POINTER__
-#elif defined(_WIN64)
-# define LLVM_PTR_SIZE 8
-#elif defined(_WIN32)
-# define LLVM_PTR_SIZE 4
-#elif defined(_MSC_VER)
-# error "could not determine LLVM_PTR_SIZE as a constant int for MSVC"
-#else
-# define LLVM_PTR_SIZE sizeof(void *)
-#endif
-
/// \macro LLVM_MEMORY_SANITIZER_BUILD
/// Whether LLVM itself is built with MemorySanitizer instrumentation.
#if __has_feature(memory_sanitizer)
@@ -444,8 +413,21 @@
/// Whether LLVM itself is built with AddressSanitizer instrumentation.
#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__)
# define LLVM_ADDRESS_SANITIZER_BUILD 1
+#if __has_include(<sanitizer/asan_interface.h>)
# include <sanitizer/asan_interface.h>
#else
+// These declarations exist to support ASan with MSVC. If MSVC eventually ships
+// asan_interface.h in their headers, then we can remove this.
+#ifdef __cplusplus
+extern "C" {
+#endif
+void __asan_poison_memory_region(void const volatile *addr, size_t size);
+void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
+#ifdef __cplusplus
+} // extern "C"
+#endif
+#endif
+#else
# define LLVM_ADDRESS_SANITIZER_BUILD 0
# define __asan_poison_memory_region(p, size)
# define __asan_unpoison_memory_region(p, size)
diff --git a/llvm/include/llvm/Support/Compression.h b/llvm/include/llvm/Support/Compression.h
index 5bc0e56913fe..e6f898229412 100644
--- a/llvm/include/llvm/Support/Compression.h
+++ b/llvm/include/llvm/Support/Compression.h
@@ -29,8 +29,8 @@ static constexpr int BestSizeCompression = 9;
bool isAvailable();
-Error compress(StringRef InputBuffer, SmallVectorImpl<char> &CompressedBuffer,
- int Level = DefaultCompression);
+void compress(StringRef InputBuffer, SmallVectorImpl<char> &CompressedBuffer,
+ int Level = DefaultCompression);
Error uncompress(StringRef InputBuffer, char *UncompressedBuffer,
size_t &UncompressedSize);
diff --git a/llvm/include/llvm/Support/ConvertUTF.h b/llvm/include/llvm/Support/ConvertUTF.h
index 374cdb907fdc..662f3aca5b54 100644
--- a/llvm/include/llvm/Support/ConvertUTF.h
+++ b/llvm/include/llvm/Support/ConvertUTF.h
@@ -126,6 +126,9 @@ typedef unsigned char Boolean; /* 0 or 1 */
#define UNI_UTF16_BYTE_ORDER_MARK_NATIVE 0xFEFF
#define UNI_UTF16_BYTE_ORDER_MARK_SWAPPED 0xFFFE
+#define UNI_UTF32_BYTE_ORDER_MARK_NATIVE 0x0000FEFF
+#define UNI_UTF32_BYTE_ORDER_MARK_SWAPPED 0xFFFE0000
+
typedef enum {
conversionOK, /* conversion successful */
sourceExhausted, /* partial character in source, but hit end */
@@ -282,6 +285,24 @@ bool convertUTF16ToUTF8String(ArrayRef<char> SrcBytes, std::string &Out);
bool convertUTF16ToUTF8String(ArrayRef<UTF16> Src, std::string &Out);
/**
+ * Converts a stream of raw bytes assumed to be UTF32 into a UTF8 std::string.
+ *
+ * \param [in] SrcBytes A buffer of what is assumed to be UTF-32 encoded text.
+ * \param [out] Out Converted UTF-8 is stored here on success.
+ * \returns true on success
+ */
+bool convertUTF32ToUTF8String(ArrayRef<char> SrcBytes, std::string &Out);
+
+/**
+ * Converts a UTF32 string into a UTF8 std::string.
+ *
+ * \param [in] Src A buffer of UTF-32 encoded text.
+ * \param [out] Out Converted UTF-8 is stored here on success.
+ * \returns true on success
+ */
+bool convertUTF32ToUTF8String(ArrayRef<UTF32> Src, std::string &Out);
+
+/**
* Converts a UTF-8 string into a UTF-16 string with native endianness.
*
* \returns true on success
diff --git a/llvm/include/llvm/Support/CrashRecoveryContext.h b/llvm/include/llvm/Support/CrashRecoveryContext.h
index f60e7335e197..26ddf97b3ef0 100644
--- a/llvm/include/llvm/Support/CrashRecoveryContext.h
+++ b/llvm/include/llvm/Support/CrashRecoveryContext.h
@@ -101,6 +101,9 @@ public:
/// return failure from RunSafely(). This function does not return.
[[noreturn]] void HandleExit(int RetCode);
+ /// Return true if RetCode indicates that a signal or an exception occurred.
+ static bool isCrash(int RetCode);
+
/// Throw again a signal or an exception, after it was catched once by a
/// CrashRecoveryContext.
static bool throwIfCrash(int RetCode);
diff --git a/llvm/include/llvm/Support/Debug.h b/llvm/include/llvm/Support/Debug.h
index 2ff978476c79..5788ab3b2138 100644
--- a/llvm/include/llvm/Support/Debug.h
+++ b/llvm/include/llvm/Support/Debug.h
@@ -67,8 +67,8 @@ void setCurrentDebugTypes(const char **Types, unsigned Count);
#else
#define isCurrentDebugType(X) (false)
-#define setCurrentDebugType(X)
-#define setCurrentDebugTypes(X, N)
+#define setCurrentDebugType(X) do { (void)(X); } while (false)
+#define setCurrentDebugTypes(X, N) do { (void)(X); (void)(N); } while (false)
#define DEBUG_WITH_TYPE(TYPE, X) do { } while (false)
#endif
diff --git a/llvm/include/llvm/Support/Errno.h b/llvm/include/llvm/Support/Errno.h
index 07df6765d9db..e095c66b9086 100644
--- a/llvm/include/llvm/Support/Errno.h
+++ b/llvm/include/llvm/Support/Errno.h
@@ -15,7 +15,6 @@
#include <cerrno>
#include <string>
-#include <type_traits>
namespace llvm {
namespace sys {
diff --git a/llvm/include/llvm/Support/Error.h b/llvm/include/llvm/Support/Error.h
index 881049b15b0d..1a801b6f2c7a 100644
--- a/llvm/include/llvm/Support/Error.h
+++ b/llvm/include/llvm/Support/Error.h
@@ -1269,7 +1269,7 @@ public:
void log(raw_ostream &OS) const override {
assert(Err && "Trying to log after takeError().");
OS << "'" << FileName << "': ";
- if (Line.hasValue())
+ if (Line)
OS << "line " << Line.getValue() << ": ";
Err->log(OS);
}
@@ -1281,7 +1281,7 @@ public:
return OS.str();
}
- StringRef getFileName() { return FileName; }
+ StringRef getFileName() const { return FileName; }
Error takeError() { return Error(std::move(Err)); }
diff --git a/llvm/include/llvm/Support/ErrorHandling.h b/llvm/include/llvm/Support/ErrorHandling.h
index f980510d37f0..004b3b7868fb 100644
--- a/llvm/include/llvm/Support/ErrorHandling.h
+++ b/llvm/include/llvm/Support/ErrorHandling.h
@@ -124,19 +124,30 @@ llvm_unreachable_internal(const char *msg = nullptr, const char *file = nullptr,
/// Marks that the current location is not supposed to be reachable.
/// In !NDEBUG builds, prints the message and location info to stderr.
-/// In NDEBUG builds, becomes an optimizer hint that the current location
-/// is not supposed to be reachable. On compilers that don't support
-/// such hints, prints a reduced message instead and aborts the program.
+/// In NDEBUG builds, if the platform does not support a builtin unreachable
+/// then we call an internal LLVM runtime function. Otherwise the behavior is
+/// controlled by the CMake flag
+/// -DLLVM_UNREACHABLE_OPTIMIZE
+/// * When "ON" (default) llvm_unreachable() becomes an optimizer hint
+/// that the current location is not supposed to be reachable: the hint
+/// turns such code path into undefined behavior. On compilers that don't
+/// support such hints, prints a reduced message instead and aborts the
+/// program.
+/// * When "OFF", a builtin_trap is emitted instead of an
+// optimizer hint or printing a reduced message.
///
-/// Use this instead of assert(0). It conveys intent more clearly and
-/// allows compilers to omit some unnecessary code.
+/// Use this instead of assert(0). It conveys intent more clearly, suppresses
+/// diagnostics for unreachable code paths, and allows compilers to omit
+/// unnecessary code.
#ifndef NDEBUG
#define llvm_unreachable(msg) \
::llvm::llvm_unreachable_internal(msg, __FILE__, __LINE__)
-#elif defined(LLVM_BUILTIN_UNREACHABLE)
+#elif !defined(LLVM_BUILTIN_UNREACHABLE)
+#define llvm_unreachable(msg) ::llvm::llvm_unreachable_internal()
+#elif LLVM_UNREACHABLE_OPTIMIZE
#define llvm_unreachable(msg) LLVM_BUILTIN_UNREACHABLE
#else
-#define llvm_unreachable(msg) ::llvm::llvm_unreachable_internal()
+#define llvm_unreachable(msg) LLVM_BUILTIN_TRAP, LLVM_BUILTIN_UNREACHABLE
#endif
#endif
diff --git a/llvm/include/llvm/Support/FileUtilities.h b/llvm/include/llvm/Support/FileUtilities.h
index f8a37fe1177d..0033638c6804 100644
--- a/llvm/include/llvm/Support/FileUtilities.h
+++ b/llvm/include/llvm/Support/FileUtilities.h
@@ -110,6 +110,27 @@ namespace llvm {
llvm::Error
writeFileAtomically(StringRef TempPathModel, StringRef FinalPath,
std::function<llvm::Error(llvm::raw_ostream &)> Writer);
+
+ /// FilePermssionsApplier helps to copy permissions from an input file to
+ /// an output one. It memorizes the status of the input file and can apply
+ /// permissions and dates to the output file.
+ class FilePermissionsApplier {
+ public:
+ static Expected<FilePermissionsApplier> create(StringRef InputFilename);
+
+ /// Apply stored permissions to the \p OutputFilename.
+ /// Copy LastAccess and ModificationTime if \p CopyDates is true.
+ /// Overwrite stored permissions if \p OverwritePermissions is specified.
+ Error apply(StringRef OutputFilename, bool CopyDates = false,
+ Optional<sys::fs::perms> OverwritePermissions = None);
+
+ private:
+ FilePermissionsApplier(StringRef InputFilename, sys::fs::file_status Status)
+ : InputFilename(InputFilename), InputStatus(Status) {}
+
+ StringRef InputFilename;
+ sys::fs::file_status InputStatus;
+ };
} // End llvm namespace
#endif
diff --git a/llvm/include/llvm/Support/FormatProviders.h b/llvm/include/llvm/Support/FormatProviders.h
index 3edd8844bc7a..8101ed7968ad 100644
--- a/llvm/include/llvm/Support/FormatProviders.h
+++ b/llvm/include/llvm/Support/FormatProviders.h
@@ -313,7 +313,7 @@ struct format_provider<T,
S = FloatStyle::Fixed;
Optional<size_t> Precision = parseNumericPrecision(Style);
- if (!Precision.hasValue())
+ if (!Precision)
Precision = getDefaultPrecision(S);
write_double(Stream, static_cast<double>(V), S, Precision);
diff --git a/llvm/include/llvm/Support/FormatVariadic.h b/llvm/include/llvm/Support/FormatVariadic.h
index a872afb5e45e..c1707b4fe9cb 100644
--- a/llvm/include/llvm/Support/FormatVariadic.h
+++ b/llvm/include/llvm/Support/FormatVariadic.h
@@ -172,7 +172,7 @@ public:
// Formats textual output. `Fmt` is a string consisting of one or more
// replacement sequences with the following grammar:
//
-// rep_field ::= "{" [index] ["," layout] [":" format] "}"
+// rep_field ::= "{" index ["," layout] [":" format] "}"
// index ::= <non-negative integer>
// layout ::= [[[char]loc]width]
// format ::= <any string not containing "{" or "}">
diff --git a/llvm/include/llvm/Support/HashBuilder.h b/llvm/include/llvm/Support/HashBuilder.h
index bf93a0d22da7..9d7680d2b667 100644
--- a/llvm/include/llvm/Support/HashBuilder.h
+++ b/llvm/include/llvm/Support/HashBuilder.h
@@ -39,6 +39,9 @@ struct IsHashableData
/// Declares the hasher member, and functions forwarding directly to the hasher.
template <typename HasherT> class HashBuilderBase {
public:
+ template <typename HasherT_ = HasherT>
+ using HashResultTy = decltype(std::declval<HasherT_ &>().final());
+
HasherT &getHasher() { return Hasher; }
/// Forward to `HasherT::update(ArrayRef<uint8_t>)`.
@@ -59,12 +62,12 @@ public:
}
/// Forward to `HasherT::final()` if available.
- template <typename HasherT_ = HasherT> StringRef final() {
+ template <typename HasherT_ = HasherT> HashResultTy<HasherT_> final() {
return this->getHasher().final();
}
/// Forward to `HasherT::result()` if available.
- template <typename HasherT_ = HasherT> StringRef result() {
+ template <typename HasherT_ = HasherT> HashResultTy<HasherT_> result() {
return this->getHasher().result();
}
diff --git a/llvm/include/llvm/Support/Host.h b/llvm/include/llvm/Support/Host.h
index b3c15f0683b9..f683371ad1d3 100644
--- a/llvm/include/llvm/Support/Host.h
+++ b/llvm/include/llvm/Support/Host.h
@@ -64,6 +64,7 @@ namespace sys {
StringRef getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent);
StringRef getHostCPUNameForARM(StringRef ProcCpuinfoContent);
StringRef getHostCPUNameForS390x(StringRef ProcCpuinfoContent);
+ StringRef getHostCPUNameForRISCV(StringRef ProcCpuinfoContent);
StringRef getHostCPUNameForBPF();
/// Helper functions to extract CPU details from CPUID on x86.
diff --git a/llvm/include/llvm/Support/KnownBits.h b/llvm/include/llvm/Support/KnownBits.h
index 96b7753e9b20..84e095e2bbab 100644
--- a/llvm/include/llvm/Support/KnownBits.h
+++ b/llvm/include/llvm/Support/KnownBits.h
@@ -324,7 +324,7 @@ public:
/// Compute known bits resulting from multiplying LHS and RHS.
static KnownBits mul(const KnownBits &LHS, const KnownBits &RHS,
- bool SelfMultiply = false);
+ bool NoUndefSelfMultiply = false);
/// Compute known bits from sign-extended multiply-hi.
static KnownBits mulhs(const KnownBits &LHS, const KnownBits &RHS);
@@ -415,6 +415,12 @@ public:
return KnownBits(Zero.reverseBits(), One.reverseBits());
}
+ bool operator==(const KnownBits &Other) const {
+ return Zero == Other.Zero && One == Other.One;
+ }
+
+ bool operator!=(const KnownBits &Other) const { return !(*this == Other); }
+
void print(raw_ostream &OS) const;
void dump() const;
};
diff --git a/llvm/include/llvm/Support/LowLevelTypeImpl.h b/llvm/include/llvm/Support/LowLevelTypeImpl.h
index dd286f5228fe..186a7e5930ec 100644
--- a/llvm/include/llvm/Support/LowLevelTypeImpl.h
+++ b/llvm/include/llvm/Support/LowLevelTypeImpl.h
@@ -207,6 +207,18 @@ public:
return scalar(getScalarSizeInBits() / Factor);
}
+ /// Produce a vector type that is \p Factor times bigger, preserving the
+ /// element type. For a scalar or pointer, this will produce a new vector with
+ /// \p Factor elements.
+ LLT multiplyElements(int Factor) const {
+ if (isVector()) {
+ return scalarOrVector(getElementCount().multiplyCoefficientBy(Factor),
+ getElementType());
+ }
+
+ return fixed_vector(Factor, *this);
+ }
+
bool isByteSized() const { return getSizeInBits().isKnownMultipleOf(8); }
unsigned getScalarSizeInBits() const {
diff --git a/llvm/include/llvm/Support/MD5.h b/llvm/include/llvm/Support/MD5.h
index 70d046601346..fa2f477261dd 100644
--- a/llvm/include/llvm/Support/MD5.h
+++ b/llvm/include/llvm/Support/MD5.h
@@ -40,26 +40,19 @@ template <typename T> class ArrayRef;
class MD5 {
public:
- struct MD5Result {
- std::array<uint8_t, 16> Bytes;
-
- operator std::array<uint8_t, 16>() const { return Bytes; }
-
- const uint8_t &operator[](size_t I) const { return Bytes[I]; }
- uint8_t &operator[](size_t I) { return Bytes[I]; }
-
+ struct MD5Result : public std::array<uint8_t, 16> {
SmallString<32> digest() const;
uint64_t low() const {
// Our MD5 implementation returns the result in little endian, so the low
// word is first.
using namespace support;
- return endian::read<uint64_t, little, unaligned>(Bytes.data());
+ return endian::read<uint64_t, little, unaligned>(data());
}
uint64_t high() const {
using namespace support;
- return endian::read<uint64_t, little, unaligned>(Bytes.data() + 8);
+ return endian::read<uint64_t, little, unaligned>(data() + 8);
}
std::pair<uint64_t, uint64_t> words() const {
using namespace support;
@@ -78,20 +71,20 @@ public:
/// Finishes off the hash and puts the result in result.
void final(MD5Result &Result);
- /// Finishes off the hash, and returns a reference to the 16-byte hash data.
- StringRef final();
+ /// Finishes off the hash, and returns the 16-byte hash data.
+ MD5Result final();
- /// Finishes off the hash, and returns a reference to the 16-byte hash data.
+ /// Finishes off the hash, and returns the 16-byte hash data.
/// This is suitable for getting the MD5 at any time without invalidating the
/// internal state, so that more calls can be made into `update`.
- StringRef result();
+ MD5Result result();
/// Translates the bytes in \p Res to a hex string that is
/// deposited into \p Str. The result will be of length 32.
static void stringifyResult(MD5Result &Result, SmallVectorImpl<char> &Str);
/// Computes the hash for a given bytes.
- static std::array<uint8_t, 16> hash(ArrayRef<uint8_t> Data);
+ static MD5Result hash(ArrayRef<uint8_t> Data);
private:
// Any 32-bit or wider unsigned integer data type will do.
@@ -109,15 +102,9 @@ private:
MD5_u32plus block[16];
} InternalState;
- MD5Result Result;
-
const uint8_t *body(ArrayRef<uint8_t> Data);
};
-inline bool operator==(const MD5::MD5Result &LHS, const MD5::MD5Result &RHS) {
- return LHS.Bytes == RHS.Bytes;
-}
-
/// Helper to compute and return lower 64 bits of the given string's MD5 hash.
inline uint64_t MD5Hash(StringRef Str) {
using namespace support;
diff --git a/llvm/include/llvm/Support/MachineValueType.h b/llvm/include/llvm/Support/MachineValueType.h
index 643c2d8ce981..5355c50bb762 100644
--- a/llvm/include/llvm/Support/MachineValueType.h
+++ b/llvm/include/llvm/Support/MachineValueType.h
@@ -41,143 +41,149 @@ namespace llvm {
// ValueTypes.td as well!
Other = 1, // This is a non-standard value
i1 = 2, // This is a 1 bit integer value
- i8 = 3, // This is an 8 bit integer value
- i16 = 4, // This is a 16 bit integer value
- i32 = 5, // This is a 32 bit integer value
- i64 = 6, // This is a 64 bit integer value
- i128 = 7, // This is a 128 bit integer value
+ i2 = 3, // This is a 2 bit integer value
+ i4 = 4, // This is a 4 bit integer value
+ i8 = 5, // This is an 8 bit integer value
+ i16 = 6, // This is a 16 bit integer value
+ i32 = 7, // This is a 32 bit integer value
+ i64 = 8, // This is a 64 bit integer value
+ i128 = 9, // This is a 128 bit integer value
FIRST_INTEGER_VALUETYPE = i1,
LAST_INTEGER_VALUETYPE = i128,
- bf16 = 8, // This is a 16 bit brain floating point value
- f16 = 9, // This is a 16 bit floating point value
- f32 = 10, // This is a 32 bit floating point value
- f64 = 11, // This is a 64 bit floating point value
- f80 = 12, // This is a 80 bit floating point value
- f128 = 13, // This is a 128 bit floating point value
- ppcf128 = 14, // This is a PPC 128-bit floating point value
+ bf16 = 10, // This is a 16 bit brain floating point value
+ f16 = 11, // This is a 16 bit floating point value
+ f32 = 12, // This is a 32 bit floating point value
+ f64 = 13, // This is a 64 bit floating point value
+ f80 = 14, // This is a 80 bit floating point value
+ f128 = 15, // This is a 128 bit floating point value
+ ppcf128 = 16, // This is a PPC 128-bit floating point value
FIRST_FP_VALUETYPE = bf16,
LAST_FP_VALUETYPE = ppcf128,
- v1i1 = 15, // 1 x i1
- v2i1 = 16, // 2 x i1
- v4i1 = 17, // 4 x i1
- v8i1 = 18, // 8 x i1
- v16i1 = 19, // 16 x i1
- v32i1 = 20, // 32 x i1
- v64i1 = 21, // 64 x i1
- v128i1 = 22, // 128 x i1
- v256i1 = 23, // 256 x i1
- v512i1 = 24, // 512 x i1
- v1024i1 = 25, // 1024 x i1
-
- v1i8 = 26, // 1 x i8
- v2i8 = 27, // 2 x i8
- v4i8 = 28, // 4 x i8
- v8i8 = 29, // 8 x i8
- v16i8 = 30, // 16 x i8
- v32i8 = 31, // 32 x i8
- v64i8 = 32, // 64 x i8
- v128i8 = 33, // 128 x i8
- v256i8 = 34, // 256 x i8
- v512i8 = 35, // 512 x i8
- v1024i8 = 36, // 1024 x i8
-
- v1i16 = 37, // 1 x i16
- v2i16 = 38, // 2 x i16
- v3i16 = 39, // 3 x i16
- v4i16 = 40, // 4 x i16
- v8i16 = 41, // 8 x i16
- v16i16 = 42, // 16 x i16
- v32i16 = 43, // 32 x i16
- v64i16 = 44, // 64 x i16
- v128i16 = 45, // 128 x i16
- v256i16 = 46, // 256 x i16
- v512i16 = 47, // 512 x i16
-
- v1i32 = 48, // 1 x i32
- v2i32 = 49, // 2 x i32
- v3i32 = 50, // 3 x i32
- v4i32 = 51, // 4 x i32
- v5i32 = 52, // 5 x i32
- v6i32 = 53, // 6 x i32
- v7i32 = 54, // 7 x i32
- v8i32 = 55, // 8 x i32
- v16i32 = 56, // 16 x i32
- v32i32 = 57, // 32 x i32
- v64i32 = 58, // 64 x i32
- v128i32 = 59, // 128 x i32
- v256i32 = 60, // 256 x i32
- v512i32 = 61, // 512 x i32
- v1024i32 = 62, // 1024 x i32
- v2048i32 = 63, // 2048 x i32
-
- v1i64 = 64, // 1 x i64
- v2i64 = 65, // 2 x i64
- v3i64 = 66, // 3 x i64
- v4i64 = 67, // 4 x i64
- v8i64 = 68, // 8 x i64
- v16i64 = 69, // 16 x i64
- v32i64 = 70, // 32 x i64
- v64i64 = 71, // 64 x i64
- v128i64 = 72, // 128 x i64
- v256i64 = 73, // 256 x i64
-
- v1i128 = 74, // 1 x i128
+ v1i1 = 17, // 1 x i1
+ v2i1 = 18, // 2 x i1
+ v4i1 = 19, // 4 x i1
+ v8i1 = 20, // 8 x i1
+ v16i1 = 21, // 16 x i1
+ v32i1 = 22, // 32 x i1
+ v64i1 = 23, // 64 x i1
+ v128i1 = 24, // 128 x i1
+ v256i1 = 25, // 256 x i1
+ v512i1 = 26, // 512 x i1
+ v1024i1 = 27, // 1024 x i1
+
+ v128i2 = 28, // 128 x i2
+
+ v64i4 = 29, // 64 x i4
+
+ v1i8 = 30, // 1 x i8
+ v2i8 = 31, // 2 x i8
+ v4i8 = 32, // 4 x i8
+ v8i8 = 33, // 8 x i8
+ v16i8 = 34, // 16 x i8
+ v32i8 = 35, // 32 x i8
+ v64i8 = 36, // 64 x i8
+ v128i8 = 37, // 128 x i8
+ v256i8 = 38, // 256 x i8
+ v512i8 = 39, // 512 x i8
+ v1024i8 = 40, // 1024 x i8
+
+ v1i16 = 41, // 1 x i16
+ v2i16 = 42, // 2 x i16
+ v3i16 = 43, // 3 x i16
+ v4i16 = 44, // 4 x i16
+ v8i16 = 45, // 8 x i16
+ v16i16 = 46, // 16 x i16
+ v32i16 = 47, // 32 x i16
+ v64i16 = 48, // 64 x i16
+ v128i16 = 49, // 128 x i16
+ v256i16 = 50, // 256 x i16
+ v512i16 = 51, // 512 x i16
+
+ v1i32 = 52, // 1 x i32
+ v2i32 = 53, // 2 x i32
+ v3i32 = 54, // 3 x i32
+ v4i32 = 55, // 4 x i32
+ v5i32 = 56, // 5 x i32
+ v6i32 = 57, // 6 x i32
+ v7i32 = 58, // 7 x i32
+ v8i32 = 59, // 8 x i32
+ v16i32 = 60, // 16 x i32
+ v32i32 = 61, // 32 x i32
+ v64i32 = 62, // 64 x i32
+ v128i32 = 63, // 128 x i32
+ v256i32 = 64, // 256 x i32
+ v512i32 = 65, // 512 x i32
+ v1024i32 = 66, // 1024 x i32
+ v2048i32 = 67, // 2048 x i32
+
+ v1i64 = 68, // 1 x i64
+ v2i64 = 69, // 2 x i64
+ v3i64 = 70, // 3 x i64
+ v4i64 = 71, // 4 x i64
+ v8i64 = 72, // 8 x i64
+ v16i64 = 73, // 16 x i64
+ v32i64 = 74, // 32 x i64
+ v64i64 = 75, // 64 x i64
+ v128i64 = 76, // 128 x i64
+ v256i64 = 77, // 256 x i64
+
+ v1i128 = 78, // 1 x i128
FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE = v1i1,
LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE = v1i128,
- v1f16 = 75, // 1 x f16
- v2f16 = 76, // 2 x f16
- v3f16 = 77, // 3 x f16
- v4f16 = 78, // 4 x f16
- v8f16 = 79, // 8 x f16
- v16f16 = 80, // 16 x f16
- v32f16 = 81, // 32 x f16
- v64f16 = 82, // 64 x f16
- v128f16 = 83, // 128 x f16
- v256f16 = 84, // 256 x f16
- v512f16 = 85, // 256 x f16
-
- v2bf16 = 86, // 2 x bf16
- v3bf16 = 87, // 3 x bf16
- v4bf16 = 88, // 4 x bf16
- v8bf16 = 89, // 8 x bf16
- v16bf16 = 90, // 16 x bf16
- v32bf16 = 91, // 32 x bf16
- v64bf16 = 92, // 64 x bf16
- v128bf16 = 93, // 128 x bf16
-
- v1f32 = 94, // 1 x f32
- v2f32 = 95, // 2 x f32
- v3f32 = 96, // 3 x f32
- v4f32 = 97, // 4 x f32
- v5f32 = 98, // 5 x f32
- v6f32 = 99, // 6 x f32
- v7f32 = 100, // 7 x f32
- v8f32 = 101, // 8 x f32
- v16f32 = 102, // 16 x f32
- v32f32 = 103, // 32 x f32
- v64f32 = 104, // 64 x f32
- v128f32 = 105, // 128 x f32
- v256f32 = 106, // 256 x f32
- v512f32 = 107, // 512 x f32
- v1024f32 = 108, // 1024 x f32
- v2048f32 = 109, // 2048 x f32
-
- v1f64 = 110, // 1 x f64
- v2f64 = 111, // 2 x f64
- v3f64 = 112, // 3 x f64
- v4f64 = 113, // 4 x f64
- v8f64 = 114, // 8 x f64
- v16f64 = 115, // 16 x f64
- v32f64 = 116, // 32 x f64
- v64f64 = 117, // 64 x f64
- v128f64 = 118, // 128 x f64
- v256f64 = 119, // 256 x f64
+ v1f16 = 79, // 1 x f16
+ v2f16 = 80, // 2 x f16
+ v3f16 = 81, // 3 x f16
+ v4f16 = 82, // 4 x f16
+ v8f16 = 83, // 8 x f16
+ v16f16 = 84, // 16 x f16
+ v32f16 = 85, // 32 x f16
+ v64f16 = 86, // 64 x f16
+ v128f16 = 87, // 128 x f16
+ v256f16 = 88, // 256 x f16
+ v512f16 = 89, // 256 x f16
+
+ v2bf16 = 90, // 2 x bf16
+ v3bf16 = 91, // 3 x bf16
+ v4bf16 = 92, // 4 x bf16
+ v8bf16 = 93, // 8 x bf16
+ v16bf16 = 94, // 16 x bf16
+ v32bf16 = 95, // 32 x bf16
+ v64bf16 = 96, // 64 x bf16
+ v128bf16 = 97, // 128 x bf16
+
+ v1f32 = 98, // 1 x f32
+ v2f32 = 99, // 2 x f32
+ v3f32 = 100, // 3 x f32
+ v4f32 = 101, // 4 x f32
+ v5f32 = 102, // 5 x f32
+ v6f32 = 103, // 6 x f32
+ v7f32 = 104, // 7 x f32
+ v8f32 = 105, // 8 x f32
+ v16f32 = 106, // 16 x f32
+ v32f32 = 107, // 32 x f32
+ v64f32 = 108, // 64 x f32
+ v128f32 = 109, // 128 x f32
+ v256f32 = 110, // 256 x f32
+ v512f32 = 111, // 512 x f32
+ v1024f32 = 112, // 1024 x f32
+ v2048f32 = 113, // 2048 x f32
+
+ v1f64 = 114, // 1 x f64
+ v2f64 = 115, // 2 x f64
+ v3f64 = 116, // 3 x f64
+ v4f64 = 117, // 4 x f64
+ v8f64 = 118, // 8 x f64
+ v16f64 = 119, // 16 x f64
+ v32f64 = 120, // 32 x f64
+ v64f64 = 121, // 64 x f64
+ v128f64 = 122, // 128 x f64
+ v256f64 = 123, // 256 x f64
FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE = v1f16,
LAST_FP_FIXEDLEN_VECTOR_VALUETYPE = v256f64,
@@ -185,68 +191,70 @@ namespace llvm {
FIRST_FIXEDLEN_VECTOR_VALUETYPE = v1i1,
LAST_FIXEDLEN_VECTOR_VALUETYPE = v256f64,
- nxv1i1 = 120, // n x 1 x i1
- nxv2i1 = 121, // n x 2 x i1
- nxv4i1 = 122, // n x 4 x i1
- nxv8i1 = 123, // n x 8 x i1
- nxv16i1 = 124, // n x 16 x i1
- nxv32i1 = 125, // n x 32 x i1
- nxv64i1 = 126, // n x 64 x i1
-
- nxv1i8 = 127, // n x 1 x i8
- nxv2i8 = 128, // n x 2 x i8
- nxv4i8 = 129, // n x 4 x i8
- nxv8i8 = 130, // n x 8 x i8
- nxv16i8 = 131, // n x 16 x i8
- nxv32i8 = 132, // n x 32 x i8
- nxv64i8 = 133, // n x 64 x i8
-
- nxv1i16 = 134, // n x 1 x i16
- nxv2i16 = 135, // n x 2 x i16
- nxv4i16 = 136, // n x 4 x i16
- nxv8i16 = 137, // n x 8 x i16
- nxv16i16 = 138, // n x 16 x i16
- nxv32i16 = 139, // n x 32 x i16
-
- nxv1i32 = 140, // n x 1 x i32
- nxv2i32 = 141, // n x 2 x i32
- nxv4i32 = 142, // n x 4 x i32
- nxv8i32 = 143, // n x 8 x i32
- nxv16i32 = 144, // n x 16 x i32
- nxv32i32 = 145, // n x 32 x i32
-
- nxv1i64 = 146, // n x 1 x i64
- nxv2i64 = 147, // n x 2 x i64
- nxv4i64 = 148, // n x 4 x i64
- nxv8i64 = 149, // n x 8 x i64
- nxv16i64 = 150, // n x 16 x i64
- nxv32i64 = 151, // n x 32 x i64
+ nxv1i1 = 124, // n x 1 x i1
+ nxv2i1 = 125, // n x 2 x i1
+ nxv4i1 = 126, // n x 4 x i1
+ nxv8i1 = 127, // n x 8 x i1
+ nxv16i1 = 128, // n x 16 x i1
+ nxv32i1 = 129, // n x 32 x i1
+ nxv64i1 = 130, // n x 64 x i1
+
+ nxv1i8 = 131, // n x 1 x i8
+ nxv2i8 = 132, // n x 2 x i8
+ nxv4i8 = 133, // n x 4 x i8
+ nxv8i8 = 134, // n x 8 x i8
+ nxv16i8 = 135, // n x 16 x i8
+ nxv32i8 = 136, // n x 32 x i8
+ nxv64i8 = 137, // n x 64 x i8
+
+ nxv1i16 = 138, // n x 1 x i16
+ nxv2i16 = 139, // n x 2 x i16
+ nxv4i16 = 140, // n x 4 x i16
+ nxv8i16 = 141, // n x 8 x i16
+ nxv16i16 = 142, // n x 16 x i16
+ nxv32i16 = 143, // n x 32 x i16
+
+ nxv1i32 = 144, // n x 1 x i32
+ nxv2i32 = 145, // n x 2 x i32
+ nxv4i32 = 146, // n x 4 x i32
+ nxv8i32 = 147, // n x 8 x i32
+ nxv16i32 = 148, // n x 16 x i32
+ nxv32i32 = 149, // n x 32 x i32
+
+ nxv1i64 = 150, // n x 1 x i64
+ nxv2i64 = 151, // n x 2 x i64
+ nxv4i64 = 152, // n x 4 x i64
+ nxv8i64 = 153, // n x 8 x i64
+ nxv16i64 = 154, // n x 16 x i64
+ nxv32i64 = 155, // n x 32 x i64
FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE = nxv1i1,
LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE = nxv32i64,
- nxv1f16 = 152, // n x 1 x f16
- nxv2f16 = 153, // n x 2 x f16
- nxv4f16 = 154, // n x 4 x f16
- nxv8f16 = 155, // n x 8 x f16
- nxv16f16 = 156, // n x 16 x f16
- nxv32f16 = 157, // n x 32 x f16
-
- nxv1bf16 = 158, // n x 1 x bf16
- nxv2bf16 = 159, // n x 2 x bf16
- nxv4bf16 = 160, // n x 4 x bf16
- nxv8bf16 = 161, // n x 8 x bf16
-
- nxv1f32 = 162, // n x 1 x f32
- nxv2f32 = 163, // n x 2 x f32
- nxv4f32 = 164, // n x 4 x f32
- nxv8f32 = 165, // n x 8 x f32
- nxv16f32 = 166, // n x 16 x f32
-
- nxv1f64 = 167, // n x 1 x f64
- nxv2f64 = 168, // n x 2 x f64
- nxv4f64 = 169, // n x 4 x f64
- nxv8f64 = 170, // n x 8 x f64
+ nxv1f16 = 156, // n x 1 x f16
+ nxv2f16 = 157, // n x 2 x f16
+ nxv4f16 = 158, // n x 4 x f16
+ nxv8f16 = 159, // n x 8 x f16
+ nxv16f16 = 160, // n x 16 x f16
+ nxv32f16 = 161, // n x 32 x f16
+
+ nxv1bf16 = 162, // n x 1 x bf16
+ nxv2bf16 = 163, // n x 2 x bf16
+ nxv4bf16 = 164, // n x 4 x bf16
+ nxv8bf16 = 165, // n x 8 x bf16
+ nxv16bf16 = 166, // n x 16 x bf16
+ nxv32bf16 = 167, // n x 32 x bf16
+
+ nxv1f32 = 168, // n x 1 x f32
+ nxv2f32 = 169, // n x 2 x f32
+ nxv4f32 = 170, // n x 4 x f32
+ nxv8f32 = 171, // n x 8 x f32
+ nxv16f32 = 172, // n x 16 x f32
+
+ nxv1f64 = 173, // n x 1 x f64
+ nxv2f64 = 174, // n x 2 x f64
+ nxv4f64 = 175, // n x 4 x f64
+ nxv8f64 = 176, // n x 8 x f64
FIRST_FP_SCALABLE_VECTOR_VALUETYPE = nxv1f16,
LAST_FP_SCALABLE_VECTOR_VALUETYPE = nxv8f64,
@@ -257,20 +265,20 @@ namespace llvm {
FIRST_VECTOR_VALUETYPE = v1i1,
LAST_VECTOR_VALUETYPE = nxv8f64,
- x86mmx = 171, // This is an X86 MMX value
+ x86mmx = 177, // This is an X86 MMX value
- Glue = 172, // This glues nodes together during pre-RA sched
+ Glue = 178, // This glues nodes together during pre-RA sched
- isVoid = 173, // This has no value
+ isVoid = 179, // This has no value
- Untyped = 174, // This value takes a register, but has
+ Untyped = 180, // This value takes a register, but has
// unspecified type. The register class
// will be determined by the opcode.
- funcref = 175, // WebAssembly's funcref type
- externref = 176, // WebAssembly's externref type
- x86amx = 177, // This is an X86 AMX value
- i64x8 = 178, // 8 Consecutive GPRs (AArch64)
+ funcref = 181, // WebAssembly's funcref type
+ externref = 182, // WebAssembly's externref type
+ x86amx = 183, // This is an X86 AMX value
+ i64x8 = 184, // 8 Consecutive GPRs (AArch64)
FIRST_VALUETYPE = 1, // This is always the beginning of the list.
LAST_VALUETYPE = i64x8, // This always remains at the end of the list.
@@ -415,10 +423,11 @@ namespace llvm {
/// Return true if this is a 256-bit vector type.
bool is256BitVector() const {
return (SimpleTy == MVT::v16f16 || SimpleTy == MVT::v16bf16 ||
- SimpleTy == MVT::v8f32 || SimpleTy == MVT::v4f64 ||
- SimpleTy == MVT::v32i8 || SimpleTy == MVT::v16i16 ||
- SimpleTy == MVT::v8i32 || SimpleTy == MVT::v4i64 ||
- SimpleTy == MVT::v256i1);
+ SimpleTy == MVT::v8f32 || SimpleTy == MVT::v4f64 ||
+ SimpleTy == MVT::v32i8 || SimpleTy == MVT::v16i16 ||
+ SimpleTy == MVT::v8i32 || SimpleTy == MVT::v4i64 ||
+ SimpleTy == MVT::v256i1 || SimpleTy == MVT::v128i2 ||
+ SimpleTy == MVT::v64i4);
}
/// Return true if this is a 512-bit vector type.
@@ -517,6 +526,7 @@ namespace llvm {
}
MVT getVectorElementType() const {
+ // clang-format off
switch (SimpleTy) {
default:
llvm_unreachable("Not a vector MVT!");
@@ -538,6 +548,8 @@ namespace llvm {
case nxv16i1:
case nxv32i1:
case nxv64i1: return i1;
+ case v128i2: return i2;
+ case v64i4: return i4;
case v1i8:
case v2i8:
case v4i8:
@@ -640,7 +652,9 @@ namespace llvm {
case nxv1bf16:
case nxv2bf16:
case nxv4bf16:
- case nxv8bf16: return bf16;
+ case nxv8bf16:
+ case nxv16bf16:
+ case nxv32bf16: return bf16;
case v1f32:
case v2f32:
case v3f32:
@@ -677,6 +691,7 @@ namespace llvm {
case nxv4f64:
case nxv8f64: return f64;
}
+ // clang-format on
}
/// Given a vector type, return the minimum number of elements it contains.
@@ -705,6 +720,7 @@ namespace llvm {
case v256f32:
case v256f64: return 256;
case v128i1:
+ case v128i2:
case v128i8:
case v128i16:
case v128i32:
@@ -714,6 +730,7 @@ namespace llvm {
case v128f32:
case v128f64: return 128;
case v64i1:
+ case v64i4:
case v64i8:
case v64i16:
case v64i32:
@@ -738,7 +755,8 @@ namespace llvm {
case nxv32i16:
case nxv32i32:
case nxv32i64:
- case nxv32f16: return 32;
+ case nxv32f16:
+ case nxv32bf16: return 32;
case v16i1:
case v16i8:
case v16i16:
@@ -754,6 +772,7 @@ namespace llvm {
case nxv16i32:
case nxv16i64:
case nxv16f16:
+ case nxv16bf16:
case nxv16f32: return 16;
case v8i1:
case v8i8:
@@ -883,8 +902,10 @@ namespace llvm {
case i1:
case v1i1: return TypeSize::Fixed(1);
case nxv1i1: return TypeSize::Scalable(1);
+ case i2:
case v2i1: return TypeSize::Fixed(2);
case nxv2i1: return TypeSize::Scalable(2);
+ case i4:
case v4i1: return TypeSize::Fixed(4);
case nxv4i1: return TypeSize::Scalable(4);
case i8 :
@@ -977,6 +998,8 @@ namespace llvm {
case v7i32:
case v7f32: return TypeSize::Fixed(224);
case v256i1:
+ case v128i2:
+ case v64i4:
case v32i8:
case v16i16:
case v8i32:
@@ -990,6 +1013,7 @@ namespace llvm {
case nxv8i32:
case nxv4i64:
case nxv16f16:
+ case nxv16bf16:
case nxv8f32:
case nxv4f64: return TypeSize::Scalable(256);
case i64x8:
@@ -1007,6 +1031,7 @@ namespace llvm {
case nxv16i32:
case nxv8i64:
case nxv32f16:
+ case nxv32bf16:
case nxv16f32:
case nxv8f64: return TypeSize::Scalable(512);
case v1024i1:
@@ -1078,6 +1103,12 @@ namespace llvm {
return {(BaseSize.getKnownMinSize() + 7) / 8, BaseSize.isScalable()};
}
+ // Return the number of bytes overwritten by a store of this value type or
+ // this value type's element type in the case of a vector.
+ uint64_t getScalarStoreSize() const {
+ return getScalarType().getStoreSize().getFixedSize();
+ }
+
/// Return the number of bits overwritten by a store of the specified value
/// type.
///
@@ -1165,6 +1196,10 @@ namespace llvm {
return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE);
case 1:
return MVT::i1;
+ case 2:
+ return MVT::i2;
+ case 4:
+ return MVT::i4;
case 8:
return MVT::i8;
case 16:
@@ -1179,6 +1214,7 @@ namespace llvm {
}
static MVT getVectorVT(MVT VT, unsigned NumElements) {
+ // clang-format off
switch (VT.SimpleTy) {
default:
break;
@@ -1195,6 +1231,12 @@ namespace llvm {
if (NumElements == 512) return MVT::v512i1;
if (NumElements == 1024) return MVT::v1024i1;
break;
+ case MVT::i2:
+ if (NumElements == 128) return MVT::v128i2;
+ break;
+ case MVT::i4:
+ if (NumElements == 64) return MVT::v64i4;
+ break;
case MVT::i8:
if (NumElements == 1) return MVT::v1i8;
if (NumElements == 2) return MVT::v2i8;
@@ -1309,6 +1351,7 @@ namespace llvm {
break;
}
return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE);
+ // clang-format on
}
static MVT getScalableVectorVT(MVT VT, unsigned NumElements) {
@@ -1370,6 +1413,8 @@ namespace llvm {
if (NumElements == 2) return MVT::nxv2bf16;
if (NumElements == 4) return MVT::nxv4bf16;
if (NumElements == 8) return MVT::nxv8bf16;
+ if (NumElements == 16) return MVT::nxv16bf16;
+ if (NumElements == 32) return MVT::nxv32bf16;
break;
case MVT::f32:
if (NumElements == 1) return MVT::nxv1f32;
diff --git a/llvm/include/llvm/Support/MathExtras.h b/llvm/include/llvm/Support/MathExtras.h
index 753b1998c40c..8079aa436933 100644
--- a/llvm/include/llvm/Support/MathExtras.h
+++ b/llvm/include/llvm/Support/MathExtras.h
@@ -571,6 +571,33 @@ inline unsigned countPopulation(T Value) {
return detail::PopulationCounter<T, sizeof(T)>::count(Value);
}
+/// Return true if the argument contains a non-empty sequence of ones with the
+/// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true.
+/// If true, \p MaskIdx will specify the index of the lowest set bit and \p
+/// MaskLen is updated to specify the length of the mask, else neither are
+/// updated.
+inline bool isShiftedMask_32(uint32_t Value, unsigned &MaskIdx,
+ unsigned &MaskLen) {
+ if (!isShiftedMask_32(Value))
+ return false;
+ MaskIdx = countTrailingZeros(Value);
+ MaskLen = countPopulation(Value);
+ return true;
+}
+
+/// Return true if the argument contains a non-empty sequence of ones with the
+/// remainder zero (64 bit version.) If true, \p MaskIdx will specify the index
+/// of the lowest set bit and \p MaskLen is updated to specify the length of the
+/// mask, else neither are updated.
+inline bool isShiftedMask_64(uint64_t Value, unsigned &MaskIdx,
+ unsigned &MaskLen) {
+ if (!isShiftedMask_64(Value))
+ return false;
+ MaskIdx = countTrailingZeros(Value);
+ MaskLen = countPopulation(Value);
+ return true;
+}
+
/// Compile time Log2.
/// Valid only for positive powers of two.
template <size_t kValue> constexpr inline size_t CTLog2() {
@@ -680,7 +707,7 @@ constexpr inline uint64_t MinAlign(uint64_t A, uint64_t B) {
/// Returns the next power of two (in 64-bits) that is strictly greater than A.
/// Returns zero on overflow.
-inline uint64_t NextPowerOf2(uint64_t A) {
+constexpr inline uint64_t NextPowerOf2(uint64_t A) {
A |= (A >> 1);
A |= (A >> 2);
A |= (A >> 4);
@@ -708,27 +735,34 @@ inline uint64_t PowerOf2Ceil(uint64_t A) {
/// Returns the next integer (mod 2**64) that is greater than or equal to
/// \p Value and is a multiple of \p Align. \p Align must be non-zero.
///
-/// If non-zero \p Skew is specified, the return value will be a minimal
-/// integer that is greater than or equal to \p Value and equal to
-/// \p Align * N + \p Skew for some integer N. If \p Skew is larger than
-/// \p Align, its value is adjusted to '\p Skew mod \p Align'.
-///
/// Examples:
/// \code
/// alignTo(5, 8) = 8
/// alignTo(17, 8) = 24
/// alignTo(~0LL, 8) = 0
/// alignTo(321, 255) = 510
+/// \endcode
+inline uint64_t alignTo(uint64_t Value, uint64_t Align) {
+ assert(Align != 0u && "Align can't be 0.");
+ return (Value + Align - 1) / Align * Align;
+}
+
+/// If non-zero \p Skew is specified, the return value will be a minimal integer
+/// that is greater than or equal to \p Size and equal to \p A * N + \p Skew for
+/// some integer N. If \p Skew is larger than \p A, its value is adjusted to '\p
+/// Skew mod \p A'. \p Align must be non-zero.
///
+/// Examples:
+/// \code
/// alignTo(5, 8, 7) = 7
/// alignTo(17, 8, 1) = 17
/// alignTo(~0LL, 8, 3) = 3
/// alignTo(321, 255, 42) = 552
/// \endcode
-inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
+inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew) {
assert(Align != 0u && "Align can't be 0.");
Skew %= Align;
- return (Value + Align - 1 - Skew) / Align * Align + Skew;
+ return alignTo(Value - Skew, Align) + Skew;
}
/// Returns the next integer (mod 2**64) that is greater than or equal to
@@ -879,7 +913,7 @@ extern const float huge_valf;
/// Add two signed integers, computing the two's complement truncated result,
-/// returning true if overflow occured.
+/// returning true if overflow occurred.
template <typename T>
std::enable_if_t<std::is_signed<T>::value, T> AddOverflow(T X, T Y, T &Result) {
#if __has_builtin(__builtin_add_overflow)
diff --git a/llvm/include/llvm/Support/Parallel.h b/llvm/include/llvm/Support/Parallel.h
index 04caf5eac961..ff113f9b44c4 100644
--- a/llvm/include/llvm/Support/Parallel.h
+++ b/llvm/include/llvm/Support/Parallel.h
@@ -193,11 +193,11 @@ void parallelSort(RandomAccessIterator Start, RandomAccessIterator End,
llvm::sort(Start, End, Comp);
}
-void parallelForEachN(size_t Begin, size_t End, function_ref<void(size_t)> Fn);
+void parallelFor(size_t Begin, size_t End, function_ref<void(size_t)> Fn);
template <class IterTy, class FuncTy>
void parallelForEach(IterTy Begin, IterTy End, FuncTy Fn) {
- parallelForEachN(0, End - Begin, [&](size_t I) { Fn(Begin[I]); });
+ parallelFor(0, End - Begin, [&](size_t I) { Fn(Begin[I]); });
}
template <class IterTy, class ResultTy, class ReduceFuncTy,
diff --git a/llvm/include/llvm/Support/Path.h b/llvm/include/llvm/Support/Path.h
index da5095714f48..ce69f32b6cc8 100644
--- a/llvm/include/llvm/Support/Path.h
+++ b/llvm/include/llvm/Support/Path.h
@@ -19,7 +19,6 @@
#include "llvm/ADT/iterator.h"
#include "llvm/Support/DataTypes.h"
#include <iterator>
-#include <system_error>
namespace llvm {
namespace sys {
diff --git a/llvm/include/llvm/Support/PluginLoader.h b/llvm/include/llvm/Support/PluginLoader.h
index 95c087f03d9b..bdd36366d1cf 100644
--- a/llvm/include/llvm/Support/PluginLoader.h
+++ b/llvm/include/llvm/Support/PluginLoader.h
@@ -31,9 +31,9 @@ namespace llvm {
#ifndef DONT_GET_PLUGIN_LOADER_OPTION
// This causes operator= above to be invoked for every -load option.
- static cl::opt<PluginLoader, false, cl::parser<std::string> >
- LoadOpt("load", cl::ZeroOrMore, cl::value_desc("pluginfilename"),
- cl::desc("Load the specified plugin"));
+ static cl::opt<PluginLoader, false, cl::parser<std::string>>
+ LoadOpt("load", cl::value_desc("pluginfilename"),
+ cl::desc("Load the specified plugin"));
#endif
}
diff --git a/llvm/include/llvm/Support/Printable.h b/llvm/include/llvm/Support/Printable.h
index 6403c32aad67..8e76f01f6ba2 100644
--- a/llvm/include/llvm/Support/Printable.h
+++ b/llvm/include/llvm/Support/Printable.h
@@ -24,12 +24,12 @@ class raw_ostream;
/// This class is useful to construct print helpers for raw_ostream.
///
/// Example:
-/// Printable PrintRegister(unsigned Register) {
+/// Printable printRegister(unsigned Register) {
/// return Printable([Register](raw_ostream &OS) {
/// OS << getRegisterName(Register);
-/// }
+/// });
/// }
-/// ... OS << PrintRegister(Register); ...
+/// ... OS << printRegister(Register); ...
///
/// Implementation note: Ideally this would just be a typedef, but doing so
/// leads to operator << being ambiguous as function has matching constructors
@@ -47,6 +47,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, const Printable &P) {
return OS;
}
-}
+} // namespace llvm
#endif
diff --git a/llvm/include/llvm/Support/Process.h b/llvm/include/llvm/Support/Process.h
index ee03efeed9b2..9f56bd9b6e61 100644
--- a/llvm/include/llvm/Support/Process.h
+++ b/llvm/include/llvm/Support/Process.h
@@ -25,7 +25,6 @@
#define LLVM_SUPPORT_PROCESS_H
#include "llvm/ADT/Optional.h"
-#include "llvm/Support/AllocatorBase.h"
#include "llvm/Support/Chrono.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/Error.h"
diff --git a/llvm/include/llvm/Support/Program.h b/llvm/include/llvm/Support/Program.h
index f91fca1c4464..4cb55c42c377 100644
--- a/llvm/include/llvm/Support/Program.h
+++ b/llvm/include/llvm/Support/Program.h
@@ -14,7 +14,6 @@
#define LLVM_SUPPORT_PROGRAM_H
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Config/llvm-config.h"
@@ -24,6 +23,7 @@
#include <system_error>
namespace llvm {
+class BitVector;
namespace sys {
/// This is the OS-specific separator for PATH like environment variables:
diff --git a/llvm/include/llvm/Support/RISCVISAInfo.h b/llvm/include/llvm/Support/RISCVISAInfo.h
index 7fa0e6ee3acf..eac6cc0925fb 100644
--- a/llvm/include/llvm/Support/RISCVISAInfo.h
+++ b/llvm/include/llvm/Support/RISCVISAInfo.h
@@ -66,6 +66,7 @@ public:
bool hasExtension(StringRef Ext) const;
std::string toString() const;
std::vector<std::string> toFeatureVector() const;
+ StringRef computeDefaultABI() const;
static bool isSupportedExtensionFeature(StringRef Ext);
static bool isSupportedExtension(StringRef Ext);
@@ -89,6 +90,7 @@ private:
Error checkDependency();
void updateImplication();
+ void updateCombination();
void updateFLen();
void updateMinVLen();
void updateMaxELen();
diff --git a/llvm/include/llvm/Support/RWMutex.h b/llvm/include/llvm/Support/RWMutex.h
index 33a5d3efffee..3dd962586c36 100644
--- a/llvm/include/llvm/Support/RWMutex.h
+++ b/llvm/include/llvm/Support/RWMutex.h
@@ -93,8 +93,8 @@ private:
/// running in multithreaded mode.
template <bool mt_only> class SmartRWMutex {
// shared_mutex (C++17) is more efficient than shared_timed_mutex (C++14)
- // on Windows and always available on MSVC.
-#if defined(_MSC_VER) || __cplusplus > 201402L
+ // on Windows and always available on MSVC except with libc++.
+#if (defined(_MSC_VER) && !defined(_LIBCPP_VERSION)) || __cplusplus > 201402L
std::shared_mutex impl;
#else
#if !defined(LLVM_USE_RW_MUTEX_IMPL)
diff --git a/llvm/include/llvm/Support/SHA1.h b/llvm/include/llvm/Support/SHA1.h
index efd8513cc201..ae6d62aed723 100644
--- a/llvm/include/llvm/Support/SHA1.h
+++ b/llvm/include/llvm/Support/SHA1.h
@@ -36,17 +36,17 @@ public:
/// Digest more data.
void update(StringRef Str);
- /// Return a reference to the current raw 160-bits SHA1 for the digested data
+ /// Return the current raw 160-bits SHA1 for the digested data
/// since the last call to init(). This call will add data to the internal
/// state and as such is not suited for getting an intermediate result
/// (see result()).
- StringRef final();
+ std::array<uint8_t, 20> final();
- /// Return a reference to the current raw 160-bits SHA1 for the digested data
+ /// Return the current raw 160-bits SHA1 for the digested data
/// since the last call to init(). This is suitable for getting the SHA1 at
/// any time without invalidating the internal state so that more calls can be
/// made into update.
- StringRef result();
+ std::array<uint8_t, 20> result();
/// Returns a raw 160-bit SHA1 hash for the given data.
static std::array<uint8_t, 20> hash(ArrayRef<uint8_t> Data);
@@ -68,14 +68,13 @@ private:
uint8_t BufferOffset;
} InternalState;
- // Internal copy of the hash, populated and accessed on calls to result()
- uint32_t HashResult[HASH_LENGTH / 4];
-
// Helper
void writebyte(uint8_t data);
void hashBlock();
void addUncounted(uint8_t data);
void pad();
+
+ void final(std::array<uint32_t, HASH_LENGTH / 4> &HashResult);
};
} // end llvm namespace
diff --git a/llvm/include/llvm/Support/SHA256.h b/llvm/include/llvm/Support/SHA256.h
index 9e295b0b9fae..68b32c7b4834 100644
--- a/llvm/include/llvm/Support/SHA256.h
+++ b/llvm/include/llvm/Support/SHA256.h
@@ -43,17 +43,17 @@ public:
/// Digest more data.
void update(StringRef Str);
- /// Return a reference to the current raw 256-bits SHA256 for the digested
+ /// Return the current raw 256-bits SHA256 for the digested
/// data since the last call to init(). This call will add data to the
/// internal state and as such is not suited for getting an intermediate
/// result (see result()).
- StringRef final();
+ std::array<uint8_t, 32> final();
- /// Return a reference to the current raw 256-bits SHA256 for the digested
+ /// Return the current raw 256-bits SHA256 for the digested
/// data since the last call to init(). This is suitable for getting the
/// SHA256 at any time without invalidating the internal state so that more
/// calls can be made into update.
- StringRef result();
+ std::array<uint8_t, 32> result();
/// Returns a raw 256-bit SHA256 hash for the given data.
static std::array<uint8_t, 32> hash(ArrayRef<uint8_t> Data);
@@ -75,14 +75,13 @@ private:
uint8_t BufferOffset;
} InternalState;
- // Internal copy of the hash, populated and accessed on calls to result()
- uint32_t HashResult[HASH_LENGTH / 4];
-
// Helper
void writebyte(uint8_t data);
void hashBlock();
void addUncounted(uint8_t data);
void pad();
+
+ void final(std::array<uint32_t, HASH_LENGTH / 4> &HashResult);
};
} // namespace llvm
diff --git a/llvm/include/llvm/Support/ScopedPrinter.h b/llvm/include/llvm/Support/ScopedPrinter.h
index 6b5daf710c9f..c9eabfb3788c 100644
--- a/llvm/include/llvm/Support/ScopedPrinter.h
+++ b/llvm/include/llvm/Support/ScopedPrinter.h
@@ -81,7 +81,6 @@ struct FlagEntry {
};
raw_ostream &operator<<(raw_ostream &OS, const HexNumber &Value);
-std::string to_hexString(uint64_t Value, bool UpperCase = true);
template <class T> std::string to_string(const T &Value) {
std::string number;
@@ -95,7 +94,7 @@ std::string enumToString(T Value, ArrayRef<EnumEntry<TEnum>> EnumValues) {
for (const EnumEntry<TEnum> &EnumItem : EnumValues)
if (EnumItem.Value == Value)
return std::string(EnumItem.AltName);
- return to_hexString(Value, false);
+ return utohexstr(Value, true);
}
class ScopedPrinter {
@@ -107,7 +106,7 @@ public:
ScopedPrinter(raw_ostream &OS,
ScopedPrinterKind Kind = ScopedPrinterKind::Base)
- : OS(OS), IndentLevel(0), Kind(Kind) {}
+ : OS(OS), Kind(Kind) {}
ScopedPrinterKind getKind() const { return Kind; }
@@ -498,7 +497,7 @@ private:
}
raw_ostream &OS;
- int IndentLevel;
+ int IndentLevel = 0;
StringRef Prefix;
ScopedPrinterKind Kind;
};
diff --git a/llvm/include/llvm/Support/Signals.h b/llvm/include/llvm/Support/Signals.h
index 44f5a750ff5c..937e0572d4a7 100644
--- a/llvm/include/llvm/Support/Signals.h
+++ b/llvm/include/llvm/Support/Signals.h
@@ -14,6 +14,7 @@
#ifndef LLVM_SUPPORT_SIGNALS_H
#define LLVM_SUPPORT_SIGNALS_H
+#include <cstdint>
#include <string>
namespace llvm {
diff --git a/llvm/include/llvm/Support/Signposts.h b/llvm/include/llvm/Support/Signposts.h
index dabbba6f89d1..37089bd1c17d 100644
--- a/llvm/include/llvm/Support/Signposts.h
+++ b/llvm/include/llvm/Support/Signposts.h
@@ -16,11 +16,11 @@
#ifndef LLVM_SUPPORT_SIGNPOSTS_H
#define LLVM_SUPPORT_SIGNPOSTS_H
-#include "llvm/ADT/StringRef.h"
#include <memory>
namespace llvm {
class SignpostEmitterImpl;
+class StringRef;
/// Manages the emission of signposts into the recording method supported by
/// the OS.
diff --git a/llvm/include/llvm/Support/SourceMgr.h b/llvm/include/llvm/Support/SourceMgr.h
index 28716b42f4ab..eced4574c82e 100644
--- a/llvm/include/llvm/Support/SourceMgr.h
+++ b/llvm/include/llvm/Support/SourceMgr.h
@@ -100,6 +100,9 @@ public:
SourceMgr &operator=(SourceMgr &&) = default;
~SourceMgr() = default;
+ /// Return the include directories of this source manager.
+ ArrayRef<std::string> getIncludeDirs() const { return IncludeDirectories; }
+
void setIncludeDirs(const std::vector<std::string> &Dirs) {
IncludeDirectories = Dirs;
}
@@ -147,6 +150,22 @@ public:
return Buffers.size();
}
+ /// Takes the source buffers from the given source manager and append them to
+ /// the current manager. `MainBufferIncludeLoc` is an optional include
+ /// location to attach to the main buffer of `SrcMgr` after it gets moved to
+ /// the current manager.
+ void takeSourceBuffersFrom(SourceMgr &SrcMgr,
+ SMLoc MainBufferIncludeLoc = SMLoc()) {
+ if (SrcMgr.Buffers.empty())
+ return;
+
+ size_t OldNumBuffers = getNumBuffers();
+ std::move(SrcMgr.Buffers.begin(), SrcMgr.Buffers.end(),
+ std::back_inserter(Buffers));
+ SrcMgr.Buffers.clear();
+ Buffers[OldNumBuffers].IncludeLoc = MainBufferIncludeLoc;
+ }
+
/// Search for a file with the specified name in the current directory or in
/// one of the IncludeDirs.
///
@@ -156,6 +175,17 @@ public:
unsigned AddIncludeFile(const std::string &Filename, SMLoc IncludeLoc,
std::string &IncludedFile);
+ /// Search for a file with the specified name in the current directory or in
+ /// one of the IncludeDirs, and try to open it **without** adding to the
+ /// SourceMgr. If the opened file is intended to be added to the source
+ /// manager, prefer `AddIncludeFile` instead.
+ ///
+ /// If no file is found, this returns an Error, otherwise it returns the
+ /// buffer of the stacked file. The full path to the included file can be
+ /// found in \p IncludedFile.
+ ErrorOr<std::unique_ptr<MemoryBuffer>>
+ OpenIncludeFile(const std::string &Filename, std::string &IncludedFile);
+
/// Return the ID of the buffer containing the specified location.
///
/// 0 is returned if the buffer is not found.
diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def
index 428cbb44705d..8df7ced0029d 100644
--- a/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/llvm/include/llvm/Support/TargetOpcodes.def
@@ -322,6 +322,9 @@ HANDLE_TARGET_OPCODE(G_BITCAST)
/// Generic freeze.
HANDLE_TARGET_OPCODE(G_FREEZE)
+// INTRINSIC fptrunc_round intrinsic.
+HANDLE_TARGET_OPCODE(G_INTRINSIC_FPTRUNC_ROUND)
+
/// INTRINSIC trunc intrinsic.
HANDLE_TARGET_OPCODE(G_INTRINSIC_TRUNC)
@@ -617,6 +620,9 @@ HANDLE_TARGET_OPCODE(G_FABS)
/// f64) is allowed.
HANDLE_TARGET_OPCODE(G_FCOPYSIGN)
+/// Generic test for floating-point class.
+HANDLE_TARGET_OPCODE(G_IS_FPCLASS)
+
/// Generic FP canonicalize value.
HANDLE_TARGET_OPCODE(G_FCANONICALIZE)
diff --git a/llvm/include/llvm/Support/TargetParser.h b/llvm/include/llvm/Support/TargetParser.h
index 02a8d72483db..c3a6cceaee6b 100644
--- a/llvm/include/llvm/Support/TargetParser.h
+++ b/llvm/include/llvm/Support/TargetParser.h
@@ -14,14 +14,14 @@
#ifndef LLVM_SUPPORT_TARGETPARSER_H
#define LLVM_SUPPORT_TARGETPARSER_H
+#include "llvm/ADT/StringRef.h"
+#include <cstdint>
// FIXME: vector is used because that's what clang uses for subtarget feature
// lists, but SmallVector would probably be better
-#include "llvm/Support/RISCVISAInfo.h"
#include <vector>
namespace llvm {
-class StringRef;
template <typename T> class SmallVectorImpl;
class Triple;
@@ -86,6 +86,7 @@ enum GPUKind : uint32_t {
GK_GFX909 = 65,
GK_GFX90A = 66,
GK_GFX90C = 67,
+ GK_GFX940 = 68,
GK_GFX1010 = 71,
GK_GFX1011 = 72,
@@ -97,9 +98,15 @@ enum GPUKind : uint32_t {
GK_GFX1033 = 78,
GK_GFX1034 = 79,
GK_GFX1035 = 80,
+ GK_GFX1036 = 81,
+
+ GK_GFX1100 = 90,
+ GK_GFX1101 = 91,
+ GK_GFX1102 = 92,
+ GK_GFX1103 = 93,
GK_AMDGCN_FIRST = GK_GFX600,
- GK_AMDGCN_LAST = GK_GFX1035,
+ GK_AMDGCN_LAST = GK_GFX1103,
};
/// Instruction set architecture version.
@@ -170,7 +177,6 @@ void fillValidCPUArchList(SmallVectorImpl<StringRef> &Values, bool IsRV64);
void fillValidTuneCPUArchList(SmallVectorImpl<StringRef> &Values, bool IsRV64);
bool getCPUFeaturesExceptStdExt(CPUKind Kind, std::vector<StringRef> &Features);
StringRef resolveTuneCPUAlias(StringRef TuneCPU, bool IsRV64);
-StringRef computeDefaultABIFromArch(const llvm::RISCVISAInfo &ISAInfo);
} // namespace RISCV
diff --git a/llvm/include/llvm/Support/ThreadPool.h b/llvm/include/llvm/Support/ThreadPool.h
index 868dd2819f83..5e67a312d5c7 100644
--- a/llvm/include/llvm/Support/ThreadPool.h
+++ b/llvm/include/llvm/Support/ThreadPool.h
@@ -13,26 +13,42 @@
#ifndef LLVM_SUPPORT_THREADPOOL_H
#define LLVM_SUPPORT_THREADPOOL_H
+#include "llvm/ADT/DenseMap.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/RWMutex.h"
#include "llvm/Support/Threading.h"
#include "llvm/Support/thread.h"
#include <future>
#include <condition_variable>
+#include <deque>
#include <functional>
#include <memory>
#include <mutex>
-#include <queue>
#include <utility>
namespace llvm {
+class ThreadPoolTaskGroup;
+
/// A ThreadPool for asynchronous parallel execution on a defined number of
/// threads.
///
/// The pool keeps a vector of threads alive, waiting on a condition variable
/// for some work to become available.
+///
+/// It is possible to reuse one thread pool for different groups of tasks
+/// by grouping tasks using ThreadPoolTaskGroup. All tasks are processed using
+/// the same queue, but it is possible to wait only for a specific group of
+/// tasks to finish.
+///
+/// It is also possible for worker threads to submit new tasks and wait for
+/// them. Note that this may result in a deadlock in cases such as when a task
+/// (directly or indirectly) tries to wait for its own completion, or when all
+/// available threads are used up by tasks waiting for a task that has no thread
+/// left to run on (this includes waiting on the returned future). It should be
+/// generally safe to wait() for a group as long as groups do not form a cycle.
class ThreadPool {
public:
/// Construct a pool using the hardware strategy \p S for mapping hardware
@@ -47,23 +63,47 @@ public:
/// Asynchronous submission of a task to the pool. The returned future can be
/// used to wait for the task to finish and is *non-blocking* on destruction.
template <typename Function, typename... Args>
- inline auto async(Function &&F, Args &&...ArgList) {
+ auto async(Function &&F, Args &&...ArgList) {
auto Task =
std::bind(std::forward<Function>(F), std::forward<Args>(ArgList)...);
return async(std::move(Task));
}
+ /// Overload, task will be in the given task group.
+ template <typename Function, typename... Args>
+ auto async(ThreadPoolTaskGroup &Group, Function &&F, Args &&...ArgList) {
+ auto Task =
+ std::bind(std::forward<Function>(F), std::forward<Args>(ArgList)...);
+ return async(Group, std::move(Task));
+ }
+
/// Asynchronous submission of a task to the pool. The returned future can be
/// used to wait for the task to finish and is *non-blocking* on destruction.
template <typename Func>
auto async(Func &&F) -> std::shared_future<decltype(F())> {
- return asyncImpl(std::function<decltype(F())()>(std::forward<Func>(F)));
+ return asyncImpl(std::function<decltype(F())()>(std::forward<Func>(F)),
+ nullptr);
+ }
+
+ template <typename Func>
+ auto async(ThreadPoolTaskGroup &Group, Func &&F)
+ -> std::shared_future<decltype(F())> {
+ return asyncImpl(std::function<decltype(F())()>(std::forward<Func>(F)),
+ &Group);
}
/// Blocking wait for all the threads to complete and the queue to be empty.
/// It is an error to try to add new tasks while blocking on this call.
+ /// Calling wait() from a task would deadlock waiting for itself.
void wait();
+ /// Blocking wait for only all the threads in the given group to complete.
+ /// It is possible to wait even inside a task, but waiting (directly or
+ /// indirectly) on itself will deadlock. If called from a task running on a
+ /// worker thread, the call may process pending tasks while waiting in order
+ /// not to waste the thread.
+ void wait(ThreadPoolTaskGroup &Group);
+
// TODO: misleading legacy name warning!
// Returns the maximum number of worker threads in the pool, not the current
// number of threads!
@@ -98,12 +138,15 @@ private:
std::move(F)};
}
- bool workCompletedUnlocked() { return !ActiveThreads && Tasks.empty(); }
+ /// Returns true if all tasks in the given group have finished (nullptr means
+ /// all tasks regardless of their group). QueueLock must be locked.
+ bool workCompletedUnlocked(ThreadPoolTaskGroup *Group) const;
/// Asynchronous submission of a task to the pool. The returned future can be
/// used to wait for the task to finish and is *non-blocking* on destruction.
template <typename ResTy>
- std::shared_future<ResTy> asyncImpl(std::function<ResTy()> Task) {
+ std::shared_future<ResTy> asyncImpl(std::function<ResTy()> Task,
+ ThreadPoolTaskGroup *Group) {
#if LLVM_ENABLE_THREADS
/// Wrap the Task in a std::function<void()> that sets the result of the
@@ -117,7 +160,7 @@ private:
// Don't allow enqueueing after disabling the pool
assert(EnableFlag && "Queuing a thread during ThreadPool destruction");
- Tasks.push(std::move(R.first));
+ Tasks.emplace_back(std::make_pair(std::move(R.first), Group));
requestedThreads = ActiveThreads + Tasks.size();
}
QueueCondition.notify_one();
@@ -130,7 +173,7 @@ private:
auto Future = std::async(std::launch::deferred, std::move(Task)).share();
// Wrap the future so that both ThreadPool::wait() can operate and the
// returned future can be sync'ed on.
- Tasks.push([Future]() { Future.get(); });
+ Tasks.emplace_back(std::make_pair([Future]() { Future.get(); }, Group));
return Future;
#endif
}
@@ -139,25 +182,29 @@ private:
// Grow to ensure that we have at least `requested` Threads, but do not go
// over MaxThreadCount.
void grow(int requested);
+
+ void processTasks(ThreadPoolTaskGroup *WaitingForGroup);
#endif
/// Threads in flight
std::vector<llvm::thread> Threads;
/// Lock protecting access to the Threads vector.
- mutable std::mutex ThreadsLock;
+ mutable llvm::sys::RWMutex ThreadsLock;
/// Tasks waiting for execution in the pool.
- std::queue<std::function<void()>> Tasks;
+ std::deque<std::pair<std::function<void()>, ThreadPoolTaskGroup *>> Tasks;
/// Locking and signaling for accessing the Tasks queue.
std::mutex QueueLock;
std::condition_variable QueueCondition;
- /// Signaling for job completion
+ /// Signaling for job completion (all tasks or all tasks in a group).
std::condition_variable CompletionCondition;
/// Keep track of the number of thread actually busy
unsigned ActiveThreads = 0;
+ /// Number of threads active for tasks in the given group (only non-zero).
+ DenseMap<ThreadPoolTaskGroup *, unsigned> ActiveGroups;
#if LLVM_ENABLE_THREADS // avoids warning for unused variable
/// Signal for the destruction of the pool, asking thread to exit.
@@ -169,6 +216,34 @@ private:
/// Maximum number of threads to potentially grow this pool to.
const unsigned MaxThreadCount;
};
-}
+
+/// A group of tasks to be run on a thread pool. Thread pool tasks in different
+/// groups can run on the same threadpool but can be waited for separately.
+/// It is even possible for tasks of one group to submit and wait for tasks
+/// of another group, as long as this does not form a loop.
+class ThreadPoolTaskGroup {
+public:
+ /// The ThreadPool argument is the thread pool to forward calls to.
+ ThreadPoolTaskGroup(ThreadPool &Pool) : Pool(Pool) {}
+
+ /// Blocking destructor: will wait for all the tasks in the group to complete
+ /// by calling ThreadPool::wait().
+ ~ThreadPoolTaskGroup() { wait(); }
+
+ /// Calls ThreadPool::async() for this group.
+ template <typename Function, typename... Args>
+ inline auto async(Function &&F, Args &&...ArgList) {
+ return Pool.async(*this, std::forward<Function>(F),
+ std::forward<Args>(ArgList)...);
+ }
+
+ /// Calls ThreadPool::wait() for this group.
+ void wait() { Pool.wait(*this); }
+
+private:
+ ThreadPool &Pool;
+};
+
+} // namespace llvm
#endif // LLVM_SUPPORT_THREADPOOL_H
diff --git a/llvm/include/llvm/Support/Threading.h b/llvm/include/llvm/Support/Threading.h
index 94de950d4470..1e7e5f7b8f50 100644
--- a/llvm/include/llvm/Support/Threading.h
+++ b/llvm/include/llvm/Support/Threading.h
@@ -15,13 +15,10 @@
#define LLVM_SUPPORT_THREADING_H
#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/FunctionExtras.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
#include "llvm/Support/Compiler.h"
#include <ciso646> // So we can check the C++ standard lib macros.
-#include <functional>
#if defined(_MSC_VER)
// MSVC's call_once implementation worked since VS 2015, which is the minimum
@@ -236,15 +233,20 @@ bool llvm_is_multithreaded();
unsigned get_cpus();
enum class ThreadPriority {
+ /// Lower the current thread's priority as much as possible. Can be used
+ /// for long-running tasks that are not time critical; more energy-
+ /// efficient than Low.
Background = 0,
- Default = 1,
+
+ /// Lower the current thread's priority such that it does not affect
+ /// foreground tasks significantly. This is a good default for long-
+ /// running, latency-insensitive tasks to make sure cpu is not hogged
+ /// by this task.
+ Low = 1,
+
+ /// Restore the current thread's priority to default scheduling priority.
+ Default = 2,
};
- /// If priority is Background tries to lower current threads priority such
- /// that it does not affect foreground tasks significantly. Can be used for
- /// long-running, latency-insensitive tasks to make sure cpu is not hogged by
- /// this task.
- /// If the priority is default tries to restore current threads priority to
- /// default scheduling priority.
enum class SetThreadPriorityResult { FAILURE, SUCCESS };
SetThreadPriorityResult set_thread_priority(ThreadPriority Priority);
}
diff --git a/llvm/include/llvm/Support/TrigramIndex.h b/llvm/include/llvm/Support/TrigramIndex.h
index f772deca0301..0bfac498393f 100644
--- a/llvm/include/llvm/Support/TrigramIndex.h
+++ b/llvm/include/llvm/Support/TrigramIndex.h
@@ -27,12 +27,12 @@
#define LLVM_SUPPORT_TRIGRAMINDEX_H
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
#include <string>
#include <unordered_map>
#include <vector>
namespace llvm {
+class StringRef;
class TrigramIndex {
public:
diff --git a/llvm/include/llvm/Support/TypeSize.h b/llvm/include/llvm/Support/TypeSize.h
index 6bddb602e8c1..0b40e970e8c9 100644
--- a/llvm/include/llvm/Support/TypeSize.h
+++ b/llvm/include/llvm/Support/TypeSize.h
@@ -362,12 +362,31 @@ public:
LinearPolySize::get(getKnownMinValue() / RHS, isScalable()));
}
+ LeafTy multiplyCoefficientBy(ScalarTy RHS) const {
+ return static_cast<LeafTy>(
+ LinearPolySize::get(getKnownMinValue() * RHS, isScalable()));
+ }
+
LeafTy coefficientNextPowerOf2() const {
return static_cast<LeafTy>(LinearPolySize::get(
static_cast<ScalarTy>(llvm::NextPowerOf2(getKnownMinValue())),
isScalable()));
}
+ /// Returns true if there exists a value X where RHS.multiplyCoefficientBy(X)
+ /// will result in a value whose size matches our own.
+ bool hasKnownScalarFactor(const LinearPolySize &RHS) const {
+ return isScalable() == RHS.isScalable() &&
+ getKnownMinValue() % RHS.getKnownMinValue() == 0;
+ }
+
+ /// Returns a value X where RHS.multiplyCoefficientBy(X) will result in a
+ /// value whose size matches our own.
+ ScalarTy getKnownScalarFactor(const LinearPolySize &RHS) const {
+ assert(hasKnownScalarFactor(RHS) && "Expected RHS to be a known factor!");
+ return getKnownMinValue() / RHS.getKnownMinValue();
+ }
+
/// Printing function.
void print(raw_ostream &OS) const {
if (isScalable())
diff --git a/llvm/include/llvm/Support/Unicode.h b/llvm/include/llvm/Support/Unicode.h
index ca17bba2fbb4..729775431e16 100644
--- a/llvm/include/llvm/Support/Unicode.h
+++ b/llvm/include/llvm/Support/Unicode.h
@@ -14,6 +14,10 @@
#ifndef LLVM_SUPPORT_UNICODE_H
#define LLVM_SUPPORT_UNICODE_H
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallString.h"
+#include <string>
+
namespace llvm {
class StringRef;
@@ -30,19 +34,13 @@ enum ColumnWidthErrors {
/// terminal, so we define the semantic that should be suitable for generic case
/// of a terminal capable to output Unicode characters.
///
-/// All characters from the Unicode code point range are considered printable
-/// except for:
-/// * C0 and C1 control character ranges;
-/// * default ignorable code points as per 5.21 of
-/// http://www.unicode.org/versions/Unicode6.2.0/UnicodeStandard-6.2.pdf
-/// except for U+00AD SOFT HYPHEN, as it's actually displayed on most
-/// terminals;
-/// * format characters (category = Cf);
-/// * surrogates (category = Cs);
-/// * unassigned characters (category = Cn).
+/// Printable codepoints are those in the categories L, M, N, P, S and Zs
/// \return true if the character is considered printable.
bool isPrintable(int UCS);
+// Formatting codepoints are codepoints in the Cf category.
+bool isFormatting(int UCS);
+
/// Gets the number of positions the UTF8-encoded \p Text is likely to occupy
/// when output on a terminal ("character width"). This depends on the
/// implementation of the terminal, and there's no standard definition of
@@ -63,6 +61,30 @@ int columnWidthUTF8(StringRef Text);
/// rules.
int foldCharSimple(int C);
+/// Maps the name or the alias of a Unicode character to its associated
+/// codepoints.
+/// The names and aliases are derived from UnicodeData.txt and NameAliases.txt
+/// For compatibility with the semantics of named character escape sequences in
+/// C++, this mapping does an exact match sensitive to casing and spacing.
+/// \return The codepoint of the corresponding character, if any.
+Optional<char32_t> nameToCodepointStrict(StringRef Name);
+
+struct LooseMatchingResult {
+ char32_t CodePoint;
+ SmallString<64> Name;
+};
+
+Optional<LooseMatchingResult> nameToCodepointLooseMatching(StringRef Name);
+
+struct MatchForCodepointName {
+ std::string Name;
+ uint32_t Distance = 0;
+ char32_t Value = 0;
+};
+
+SmallVector<MatchForCodepointName>
+nearestMatchesForCodepointName(StringRef Pattern, std::size_t MaxMatchesCount);
+
} // namespace unicode
} // namespace sys
} // namespace llvm
diff --git a/llvm/include/llvm/Support/VersionTuple.h b/llvm/include/llvm/Support/VersionTuple.h
index 1a1072d228f1..2020a5c06f56 100644
--- a/llvm/include/llvm/Support/VersionTuple.h
+++ b/llvm/include/llvm/Support/VersionTuple.h
@@ -17,11 +17,13 @@
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/Optional.h"
-#include "llvm/Support/HashBuilder.h"
+#include "llvm/Support/Endian.h"
#include <string>
#include <tuple>
namespace llvm {
+template <typename HasherT, support::endianness Endianness>
+class HashBuilderImpl;
class raw_ostream;
class StringRef;
@@ -97,6 +99,12 @@ public:
return *this;
}
+ /// Return a version tuple that contains a different major version but
+ /// everything else is the same.
+ VersionTuple withMajorReplaced(unsigned NewMajor) const {
+ return VersionTuple(NewMajor, Minor, Subminor, Build);
+ }
+
/// Return a version tuple that contains only components that are non-zero.
VersionTuple normalize() const {
VersionTuple Result = *this;
@@ -161,8 +169,8 @@ public:
return !(X < Y);
}
- friend llvm::hash_code hash_value(const VersionTuple &VT) {
- return llvm::hash_combine(VT.Major, VT.Minor, VT.Subminor, VT.Build);
+ friend hash_code hash_value(const VersionTuple &VT) {
+ return hash_combine(VT.Major, VT.Minor, VT.Subminor, VT.Build);
}
template <typename HasherT, llvm::support::endianness Endianness>
diff --git a/llvm/include/llvm/Support/VirtualFileSystem.h b/llvm/include/llvm/Support/VirtualFileSystem.h
index f5dde334b0a7..3c99b0d8efdb 100644
--- a/llvm/include/llvm/Support/VirtualFileSystem.h
+++ b/llvm/include/llvm/Support/VirtualFileSystem.h
@@ -22,6 +22,7 @@
#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/Support/Chrono.h"
#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/Errc.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/SourceMgr.h"
@@ -58,6 +59,17 @@ public:
// FIXME: remove when files support multiple names
bool IsVFSMapped = false;
+ /// Whether this entity has an external path different from the virtual path,
+ /// and the external path is exposed by leaking it through the abstraction.
+ /// For example, a RedirectingFileSystem will set this for paths where
+ /// UseExternalName is true.
+ ///
+ /// FIXME: Currently the external path is exposed by replacing the virtual
+ /// path in this Status object. Instead, we should leave the path in the
+ /// Status intact (matching the requested virtual path) - see
+ /// FileManager::getFileRef for how how we plan to fix this.
+ bool ExposesExternalVFSPath = false;
+
Status() = default;
Status(const llvm::sys::fs::file_status &Status);
Status(const Twine &Name, llvm::sys::fs::UniqueID UID,
@@ -306,6 +318,28 @@ public:
/// \returns success if \a path has been made absolute, otherwise a
/// platform-specific error_code.
virtual std::error_code makeAbsolute(SmallVectorImpl<char> &Path) const;
+
+ enum class PrintType { Summary, Contents, RecursiveContents };
+ void print(raw_ostream &OS, PrintType Type = PrintType::Contents,
+ unsigned IndentLevel = 0) const {
+ printImpl(OS, Type, IndentLevel);
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD void dump() const;
+#endif
+
+protected:
+ virtual void printImpl(raw_ostream &OS, PrintType Type,
+ unsigned IndentLevel) const {
+ printIndent(OS, IndentLevel);
+ OS << "FileSystem\n";
+ }
+
+ void printIndent(raw_ostream &OS, unsigned IndentLevel) const {
+ for (unsigned i = 0; i < IndentLevel; ++i)
+ OS << " ";
+ }
};
/// Gets an \p vfs::FileSystem for the 'real' file system, as seen by
@@ -357,6 +391,8 @@ public:
using const_iterator = FileSystemList::const_reverse_iterator;
using reverse_iterator = FileSystemList::iterator;
using const_reverse_iterator = FileSystemList::const_iterator;
+ using range = iterator_range<iterator>;
+ using const_range = iterator_range<const_iterator>;
/// Get an iterator pointing to the most recently added file system.
iterator overlays_begin() { return FSList.rbegin(); }
@@ -373,6 +409,13 @@ public:
/// Get an iterator pointing one-past the most recently added file system.
reverse_iterator overlays_rend() { return FSList.end(); }
const_reverse_iterator overlays_rend() const { return FSList.end(); }
+
+ range overlays_range() { return llvm::reverse(FSList); }
+ const_range overlays_range() const { return llvm::reverse(FSList); }
+
+protected:
+ void printImpl(raw_ostream &OS, PrintType Type,
+ unsigned IndentLevel) const override;
};
/// By default, this delegates all calls to the underlying file system. This
@@ -436,6 +479,24 @@ struct NewInMemoryNodeInfo {
Status makeStatus() const;
};
+class NamedNodeOrError {
+ ErrorOr<std::pair<llvm::SmallString<128>, const detail::InMemoryNode *>>
+ Value;
+
+public:
+ NamedNodeOrError(llvm::SmallString<128> Name,
+ const detail::InMemoryNode *Node)
+ : Value(std::make_pair(Name, Node)) {}
+ NamedNodeOrError(std::error_code EC) : Value(EC) {}
+ NamedNodeOrError(llvm::errc EC) : Value(EC) {}
+
+ StringRef getName() const { return (*Value).first; }
+ explicit operator bool() const { return static_cast<bool>(Value); }
+ operator std::error_code() const { return Value.getError(); }
+ std::error_code getError() const { return Value.getError(); }
+ const detail::InMemoryNode *operator*() const { return (*Value).second; }
+};
+
} // namespace detail
/// An in-memory file system.
@@ -454,6 +515,14 @@ class InMemoryFileSystem : public FileSystem {
Optional<llvm::sys::fs::file_type> Type,
Optional<llvm::sys::fs::perms> Perms, MakeNodeFn MakeNode);
+ /// Looks up the in-memory node for the path \param P.
+ /// If \param FollowFinalSymlink is true, the returned node is guaranteed to
+ /// not be a symlink and its path may differ from \param P.
+ detail::NamedNodeOrError lookupNode(const Twine &P, bool FollowFinalSymlink,
+ size_t SymlinkDepth = 0) const;
+
+ class DirIterator;
+
public:
explicit InMemoryFileSystem(bool UseNormalizedPaths = true);
~InMemoryFileSystem() override;
@@ -471,18 +540,32 @@ public:
Optional<llvm::sys::fs::perms> Perms = None);
/// Add a hard link to a file.
+ ///
/// Here hard links are not intended to be fully equivalent to the classical
/// filesystem. Both the hard link and the file share the same buffer and
/// status (and thus have the same UniqueID). Because of this there is no way
/// to distinguish between the link and the file after the link has been
/// added.
///
- /// The To path must be an existing file or a hardlink. The From file must not
- /// have been added before. The To Path must not be a directory. The From Node
- /// is added as a hard link which points to the resolved file of To Node.
+ /// The \param Target path must be an existing file or a hardlink. The
+ /// \param NewLink file must not have been added before. The \param Target
+ /// path must not be a directory. The \param NewLink node is added as a hard
+ /// link which points to the resolved file of \param Target node.
/// \return true if the above condition is satisfied and hardlink was
/// successfully created, false otherwise.
- bool addHardLink(const Twine &From, const Twine &To);
+ bool addHardLink(const Twine &NewLink, const Twine &Target);
+
+ /// Arbitrary max depth to search through symlinks. We can get into problems
+ /// if a link links to a link that links back to the link, for example.
+ static constexpr size_t MaxSymlinkDepth = 16;
+
+ /// Add a symbolic link. Unlike a HardLink, because \param Target doesn't need
+ /// to refer to a file (or refer to anything, as it happens). Also, an
+ /// in-memory directory for \param Target isn't automatically created.
+ bool addSymbolicLink(const Twine &NewLink, const Twine &Target,
+ time_t ModificationTime, Optional<uint32_t> User = None,
+ Optional<uint32_t> Group = None,
+ Optional<llvm::sys::fs::perms> Perms = None);
/// Add a buffer to the VFS with a path. The VFS does not own the buffer.
/// If present, User, Group, Type and Perms apply to the newly-created file
@@ -520,6 +603,10 @@ public:
SmallVectorImpl<char> &Output) const override;
std::error_code isLocal(const Twine &Path, bool &Result) override;
std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
+
+protected:
+ void printImpl(raw_ostream &OS, PrintType Type,
+ unsigned IndentLevel) const override;
};
/// Get a globally unique ID for a virtual file or directory.
@@ -571,7 +658,10 @@ class RedirectingFileSystemParser;
/// 'case-sensitive': <boolean, default=(true for Posix, false for Windows)>
/// 'use-external-names': <boolean, default=true>
/// 'overlay-relative': <boolean, default=false>
-/// 'fallthrough': <boolean, default=true>
+/// 'fallthrough': <boolean, default=true, deprecated - use 'redirecting-with'
+/// instead>
+/// 'redirecting-with': <string, one of 'fallthrough', 'fallback', or
+/// 'redirect-only', default='fallthrough'>
///
/// Virtual directories that list their contents are represented as
/// \verbatim
@@ -642,6 +732,20 @@ public:
enum EntryKind { EK_Directory, EK_DirectoryRemap, EK_File };
enum NameKind { NK_NotSet, NK_External, NK_Virtual };
+ /// The type of redirection to perform.
+ enum class RedirectKind {
+ /// Lookup the redirected path first (ie. the one specified in
+ /// 'external-contents') and if that fails "fallthrough" to a lookup of the
+ /// originally provided path.
+ Fallthrough,
+ /// Lookup the provided path first and if that fails, "fallback" to a
+ /// lookup of the redirected path.
+ Fallback,
+ /// Only lookup the redirected path, do not lookup the originally provided
+ /// path.
+ RedirectOnly
+ };
+
/// A single file or directory in the VFS.
class Entry {
EntryKind Kind;
@@ -776,17 +880,11 @@ private:
friend class RedirectingFSDirIterImpl;
friend class RedirectingFileSystemParser;
- bool shouldUseExternalFS() const { return IsFallthrough; }
-
/// Canonicalize path by removing ".", "..", "./", components. This is
/// a VFS request, do not bother about symlinks in the path components
/// but canonicalize in order to perform the correct entry search.
std::error_code makeCanonical(SmallVectorImpl<char> &Path) const;
- /// Whether to fall back to the external file system when an operation fails
- /// with the given error code on a path associated with the provided Entry.
- bool shouldFallBackToExternalFS(std::error_code EC, Entry *E = nullptr) const;
-
/// Get the File status, or error, from the underlying external file system.
/// This returns the status with the originally requested name, while looking
/// up the entry using the canonical path.
@@ -834,9 +932,9 @@ private:
/// names of files. This global value is overridable on a per-file basis.
bool UseExternalNames = true;
- /// Whether to attempt a file lookup in external file system after it wasn't
- /// found in VFS.
- bool IsFallthrough = true;
+ /// Determines the lookups to perform, as well as their order. See
+ /// \c RedirectKind for details.
+ RedirectKind Redirection = RedirectKind::Fallthrough;
/// @}
RedirectingFileSystem(IntrusiveRefCntPtr<FileSystem> ExternalFS);
@@ -891,15 +989,19 @@ public:
StringRef getExternalContentsPrefixDir() const;
+ /// Sets the redirection kind to \c Fallthrough if true or \c RedirectOnly
+ /// otherwise. Will removed in the future, use \c setRedirection instead.
void setFallthrough(bool Fallthrough);
+ void setRedirection(RedirectingFileSystem::RedirectKind Kind);
+
std::vector<llvm::StringRef> getRoots() const;
- void dump(raw_ostream &OS) const;
- void dumpEntry(raw_ostream &OS, Entry *E, int NumSpaces = 0) const;
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- LLVM_DUMP_METHOD void dump() const;
-#endif
+ void printEntry(raw_ostream &OS, Entry *E, unsigned IndentLevel = 0) const;
+
+protected:
+ void printImpl(raw_ostream &OS, PrintType Type,
+ unsigned IndentLevel) const override;
};
/// Collect all pairs of <virtual path, real path> entries from the
diff --git a/llvm/include/llvm/Support/Win64EH.h b/llvm/include/llvm/Support/Win64EH.h
index 9359fcb4286a..31345beaa66a 100644
--- a/llvm/include/llvm/Support/Win64EH.h
+++ b/llvm/include/llvm/Support/Win64EH.h
@@ -24,6 +24,9 @@ namespace Win64EH {
/// UnwindOpcodes - Enumeration whose values specify a single operation in
/// the prolog of a function.
enum UnwindOpcodes {
+ // The following set of unwind opcodes is for x86_64. They are documented at
+ // https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64.
+ // Some generic values from this set are used for other architectures too.
UOP_PushNonVol = 0,
UOP_AllocLarge,
UOP_AllocSmall,
@@ -57,7 +60,38 @@ enum UnwindOpcodes {
UOP_SaveNext,
UOP_TrapFrame,
UOP_Context,
- UOP_ClearUnwoundToCall
+ UOP_ClearUnwoundToCall,
+ // The following set of unwind opcodes is for ARM. They are documented at
+ // https://docs.microsoft.com/en-us/cpp/build/arm-exception-handling
+
+ // Stack allocations use UOP_AllocSmall, UOP_AllocLarge from above, plus
+ // the following. AllocSmall, AllocLarge and AllocHuge represent a 16 bit
+ // instruction, while the WideAlloc* opcodes represent a 32 bit instruction.
+ // Small can represent a stack offset of 0x7f*4 (252) bytes, Medium can
+ // represent up to 0x3ff*4 (4092) bytes, Large up to 0xffff*4 (262140) bytes,
+ // and Huge up to 0xffffff*4 (67108860) bytes.
+ UOP_AllocHuge,
+ UOP_WideAllocMedium,
+ UOP_WideAllocLarge,
+ UOP_WideAllocHuge,
+
+ UOP_WideSaveRegMask,
+ UOP_SaveSP,
+ UOP_SaveRegsR4R7LR,
+ UOP_WideSaveRegsR4R11LR,
+ UOP_SaveFRegD8D15,
+ UOP_SaveRegMask,
+ UOP_SaveLR,
+ UOP_SaveFRegD0D15,
+ UOP_SaveFRegD16D31,
+ // Using UOP_Nop from above
+ UOP_WideNop,
+ // Using UOP_End from above
+ UOP_EndNop,
+ UOP_WideEndNop,
+ // A custom unspecified opcode, consisting of one or more bytes. This
+ // allows producing opcodes in the implementation defined/reserved range.
+ UOP_Custom,
};
/// UnwindCode - This union describes a single operation in a function prolog,
diff --git a/llvm/include/llvm/Support/WithColor.h b/llvm/include/llvm/Support/WithColor.h
index e772ea667f4f..b249f34da1fa 100644
--- a/llvm/include/llvm/Support/WithColor.h
+++ b/llvm/include/llvm/Support/WithColor.h
@@ -51,10 +51,9 @@ enum class ColorMode {
/// An RAII object that temporarily switches an output stream to a specific
/// color.
class WithColor {
- raw_ostream &OS;
- ColorMode Mode;
-
public:
+ using AutoDetectFunctionType = bool (*)(const raw_ostream &OS);
+
/// To be used like this: WithColor(OS, HighlightColor::String) << "text";
/// @param OS The output stream
/// @param S Symbolic name for syntax element to color
@@ -132,6 +131,19 @@ public:
/// Implement default handling for Warning.
/// Print "warning: " to stderr.
static void defaultWarningHandler(Error Warning);
+
+ /// Retrieve the default color auto detection function.
+ static AutoDetectFunctionType defaultAutoDetectFunction();
+
+ /// Change the global auto detection function.
+ static void
+ setAutoDetectFunction(AutoDetectFunctionType NewAutoDetectFunction);
+
+private:
+ raw_ostream &OS;
+ ColorMode Mode;
+
+ static AutoDetectFunctionType AutoDetectFunction;
};
} // end namespace llvm
diff --git a/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h b/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
index aca717a9f6cb..169b8e97986e 100644
--- a/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
+++ b/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
@@ -120,8 +120,6 @@ enum attributeBits {
ENUM_ENTRY(IC_VEX_XS, 2, "requires VEX and the XS prefix") \
ENUM_ENTRY(IC_VEX_XD, 2, "requires VEX and the XD prefix") \
ENUM_ENTRY(IC_VEX_OPSIZE, 2, "requires VEX and the OpSize prefix") \
- ENUM_ENTRY(IC_64BIT_VEX_OPSIZE, 4, "requires 64-bit mode and VEX") \
- ENUM_ENTRY(IC_64BIT_VEX_OPSIZE_ADSIZE, 5, "requires 64-bit mode, VEX, and AdSize")\
ENUM_ENTRY(IC_VEX_W, 3, "requires VEX and the W prefix") \
ENUM_ENTRY(IC_VEX_W_XS, 4, "requires VEX, W, and XS prefix") \
ENUM_ENTRY(IC_VEX_W_XD, 4, "requires VEX, W, and XD prefix") \
diff --git a/llvm/include/llvm/Support/X86TargetParser.def b/llvm/include/llvm/Support/X86TargetParser.def
index 4443d822d3e8..58fa3b3842e7 100644
--- a/llvm/include/llvm/Support/X86TargetParser.def
+++ b/llvm/include/llvm/Support/X86TargetParser.def
@@ -211,47 +211,47 @@ X86_FEATURE (LVI_LOAD_HARDENING, "lvi-load-hardening")
#undef X86_FEATURE
#ifndef CPU_SPECIFIC
-#define CPU_SPECIFIC(NAME, MANGLING, FEATURES)
+#define CPU_SPECIFIC(NAME, TUNE_NAME, MANGLING, FEATURES)
#endif
#ifndef CPU_SPECIFIC_ALIAS
-#define CPU_SPECIFIC_ALIAS(NEW_NAME, NAME)
+#define CPU_SPECIFIC_ALIAS(NEW_NAME, TUNE_NAME, NAME)
#endif
-CPU_SPECIFIC("generic", 'A', "")
-CPU_SPECIFIC("pentium", 'B', "")
-CPU_SPECIFIC("pentium_pro", 'C', "+cmov")
-CPU_SPECIFIC("pentium_mmx", 'D', "+mmx")
-CPU_SPECIFIC("pentium_ii", 'E', "+cmov,+mmx")
-CPU_SPECIFIC("pentium_iii", 'H', "+cmov,+mmx,+sse")
-CPU_SPECIFIC_ALIAS("pentium_iii_no_xmm_regs", "pentium_iii")
-CPU_SPECIFIC("pentium_4", 'J', "+cmov,+mmx,+sse,+sse2")
-CPU_SPECIFIC("pentium_m", 'K', "+cmov,+mmx,+sse,+sse2")
-CPU_SPECIFIC("pentium_4_sse3", 'L', "+cmov,+mmx,+sse,+sse2,+sse3")
-CPU_SPECIFIC("core_2_duo_ssse3", 'M', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3")
-CPU_SPECIFIC("core_2_duo_sse4_1", 'N', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1")
-CPU_SPECIFIC("atom", 'O', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+movbe")
-CPU_SPECIFIC("atom_sse4_2", 'c', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt")
-CPU_SPECIFIC("core_i7_sse4_2", 'P', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt")
-CPU_SPECIFIC("core_aes_pclmulqdq", 'Q', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt")
-CPU_SPECIFIC("atom_sse4_2_movbe", 'd', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt")
-CPU_SPECIFIC("goldmont", 'i', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt")
-CPU_SPECIFIC("sandybridge", 'R', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+avx")
-CPU_SPECIFIC_ALIAS("core_2nd_gen_avx", "sandybridge")
-CPU_SPECIFIC("ivybridge", 'S', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+f16c,+avx")
-CPU_SPECIFIC_ALIAS("core_3rd_gen_avx", "ivybridge")
-CPU_SPECIFIC("haswell", 'V', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2")
-CPU_SPECIFIC_ALIAS("core_4th_gen_avx", "haswell")
-CPU_SPECIFIC("core_4th_gen_avx_tsx", 'W', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2")
-CPU_SPECIFIC("broadwell", 'X', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+adx")
-CPU_SPECIFIC_ALIAS("core_5th_gen_avx", "broadwell")
-CPU_SPECIFIC("core_5th_gen_avx_tsx", 'Y', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+adx")
-CPU_SPECIFIC("knl", 'Z', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512f,+adx,+avx512er,+avx512pf,+avx512cd")
-CPU_SPECIFIC_ALIAS("mic_avx512", "knl")
-CPU_SPECIFIC("skylake", 'b', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+adx,+mpx")
-CPU_SPECIFIC( "skylake_avx512", 'a', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512dq,+avx512f,+adx,+avx512cd,+avx512bw,+avx512vl,+clwb")
-CPU_SPECIFIC("cannonlake", 'e', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512dq,+avx512f,+adx,+avx512ifma,+avx512cd,+avx512bw,+avx512vl,+avx512vbmi")
-CPU_SPECIFIC("knm", 'j', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512f,+adx,+avx512er,+avx512pf,+avx512cd,+avx5124fmaps,+avx5124vnniw,+avx512vpopcntdq")
+CPU_SPECIFIC("generic", "generic", 'A', "")
+CPU_SPECIFIC("pentium", "pentium", 'B', "")
+CPU_SPECIFIC("pentium_pro", "pentiumpro", 'C', "+cmov")
+CPU_SPECIFIC("pentium_mmx", "pentium-mmx", 'D', "+mmx")
+CPU_SPECIFIC("pentium_ii", "pentium2", 'E', "+cmov,+mmx")
+CPU_SPECIFIC("pentium_iii", "pentium3", 'H', "+cmov,+mmx,+sse")
+CPU_SPECIFIC_ALIAS("pentium_iii_no_xmm_regs", "pentium3", "pentium_iii")
+CPU_SPECIFIC("pentium_4", "pentium4", 'J', "+cmov,+mmx,+sse,+sse2")
+CPU_SPECIFIC("pentium_m", "pentium-m", 'K', "+cmov,+mmx,+sse,+sse2")
+CPU_SPECIFIC("pentium_4_sse3", "prescott", 'L', "+cmov,+mmx,+sse,+sse2,+sse3")
+CPU_SPECIFIC("core_2_duo_ssse3", "core2", 'M', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3")
+CPU_SPECIFIC("core_2_duo_sse4_1", "penryn", 'N', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1")
+CPU_SPECIFIC("atom", "atom", 'O', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+movbe")
+CPU_SPECIFIC("atom_sse4_2", "silvermont", 'c', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt")
+CPU_SPECIFIC("core_i7_sse4_2", "nehalem", 'P', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt")
+CPU_SPECIFIC("core_aes_pclmulqdq", "westmere", 'Q', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt")
+CPU_SPECIFIC("atom_sse4_2_movbe", "silvermont", 'd', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt")
+CPU_SPECIFIC("goldmont", "goldmont", 'i', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt")
+CPU_SPECIFIC("sandybridge", "sandybridge", 'R', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+avx")
+CPU_SPECIFIC_ALIAS("core_2nd_gen_avx", "sandybridge", "sandybridge")
+CPU_SPECIFIC("ivybridge", "ivybridge", 'S', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+f16c,+avx")
+CPU_SPECIFIC_ALIAS("core_3rd_gen_avx", "ivybridge", "ivybridge")
+CPU_SPECIFIC("haswell", "haswell", 'V', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2")
+CPU_SPECIFIC_ALIAS("core_4th_gen_avx", "haswell", "haswell")
+CPU_SPECIFIC("core_4th_gen_avx_tsx", "haswell", 'W', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2")
+CPU_SPECIFIC("broadwell", "broadwell", 'X', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+adx")
+CPU_SPECIFIC_ALIAS("core_5th_gen_avx", "broadwell", "broadwell")
+CPU_SPECIFIC("core_5th_gen_avx_tsx", "broadwell", 'Y', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+adx")
+CPU_SPECIFIC("knl", "knl", 'Z', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512f,+adx,+avx512er,+avx512pf,+avx512cd")
+CPU_SPECIFIC_ALIAS("mic_avx512", "knl", "knl")
+CPU_SPECIFIC("skylake", "skylake", 'b', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+adx,+mpx")
+CPU_SPECIFIC( "skylake_avx512", "skylake-avx512", 'a', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512dq,+avx512f,+adx,+avx512cd,+avx512bw,+avx512vl,+clwb")
+CPU_SPECIFIC("cannonlake", "cannonlake", 'e', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512dq,+avx512f,+adx,+avx512ifma,+avx512cd,+avx512bw,+avx512vl,+avx512vbmi")
+CPU_SPECIFIC("knm", "knm", 'j', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512f,+adx,+avx512er,+avx512pf,+avx512cd,+avx5124fmaps,+avx5124vnniw,+avx512vpopcntdq")
#undef CPU_SPECIFIC_ALIAS
#undef CPU_SPECIFIC
diff --git a/llvm/include/llvm/Support/YAMLParser.h b/llvm/include/llvm/Support/YAMLParser.h
index a4b2ab5e49ec..231cc1d28c9a 100644
--- a/llvm/include/llvm/Support/YAMLParser.h
+++ b/llvm/include/llvm/Support/YAMLParser.h
@@ -11,7 +11,6 @@
// See http://www.yaml.org/spec/1.2/spec.html for the full standard.
//
// This currently does not implement the following:
-// * Multi-line literal folding.
// * Tag resolution.
// * UTF-16.
// * BOMs anywhere other than the first Unicode scalar value in the file.
diff --git a/llvm/include/llvm/Support/YAMLTraits.h b/llvm/include/llvm/Support/YAMLTraits.h
index 7ad73543fc6e..8ade9b15642b 100644
--- a/llvm/include/llvm/Support/YAMLTraits.h
+++ b/llvm/include/llvm/Support/YAMLTraits.h
@@ -24,7 +24,6 @@
#include "llvm/Support/YAMLParser.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
-#include <cctype>
#include <map>
#include <memory>
#include <new>
@@ -63,6 +62,7 @@ struct MappingTraits {
// static void mapping(IO &io, T &fields);
// Optionally may provide:
// static std::string validate(IO &io, T &fields);
+ // static void enumInput(IO &io, T &value);
//
// The optional flow flag will cause generated YAML to use a flow mapping
// (e.g. { a: 0, b: 1 }):
@@ -446,6 +446,31 @@ template <class T> struct has_MappingValidateTraits<T, EmptyContext> {
static bool const value = (sizeof(test<MappingTraits<T>>(nullptr)) == 1);
};
+// Test if MappingContextTraits<T>::enumInput() is defined on type T.
+template <class T, class Context> struct has_MappingEnumInputTraits {
+ using Signature_validate = void (*)(class IO &, T &);
+
+ template <typename U>
+ static char test(SameType<Signature_validate, &U::enumInput> *);
+
+ template <typename U> static double test(...);
+
+ static bool const value =
+ (sizeof(test<MappingContextTraits<T, Context>>(nullptr)) == 1);
+};
+
+// Test if MappingTraits<T>::enumInput() is defined on type T.
+template <class T> struct has_MappingEnumInputTraits<T, EmptyContext> {
+ using Signature_validate = void (*)(class IO &, T &);
+
+ template <typename U>
+ static char test(SameType<Signature_validate, &U::enumInput> *);
+
+ template <typename U> static double test(...);
+
+ static bool const value = (sizeof(test<MappingTraits<T>>(nullptr)) == 1);
+};
+
// Test if SequenceTraits<T> is defined on type T.
template <class T>
struct has_SequenceMethodTraits
@@ -537,9 +562,8 @@ template <class T> struct has_PolymorphicTraits {
};
inline bool isNumeric(StringRef S) {
- const static auto skipDigits = [](StringRef Input) {
- return Input.drop_front(
- std::min(Input.find_first_not_of("0123456789"), Input.size()));
+ const auto skipDigits = [](StringRef Input) {
+ return Input.ltrim("0123456789");
};
// Make S.front() and S.drop_front().front() (if S.front() is [+-]) calls
@@ -666,8 +690,7 @@ inline QuotingType needsQuotes(StringRef S) {
// 7.3.3 Plain Style
// Plain scalars must not begin with most indicators, as this would cause
// ambiguity with other YAML constructs.
- static constexpr char Indicators[] = R"(-?:\,[]{}#&*!|>'"%@`)";
- if (S.find_first_of(Indicators) == 0)
+ if (std::strchr(R"(-?:\,[]{}#&*!|>'"%@`)", S[0]) != nullptr)
MaxQuotingNeeded = QuotingType::Single;
for (unsigned char C : S) {
@@ -1062,8 +1085,29 @@ yamlize(IO &io, T &Val, bool, Context &Ctx) {
}
template <typename T, typename Context>
+std::enable_if_t<!has_MappingEnumInputTraits<T, Context>::value, bool>
+yamlizeMappingEnumInput(IO &io, T &Val) {
+ return false;
+}
+
+template <typename T, typename Context>
+std::enable_if_t<has_MappingEnumInputTraits<T, Context>::value, bool>
+yamlizeMappingEnumInput(IO &io, T &Val) {
+ if (io.outputting())
+ return false;
+
+ io.beginEnumScalar();
+ MappingTraits<T>::enumInput(io, Val);
+ bool Matched = !io.matchEnumFallback();
+ io.endEnumScalar();
+ return Matched;
+}
+
+template <typename T, typename Context>
std::enable_if_t<unvalidatedMappingTraits<T, Context>::value, void>
yamlize(IO &io, T &Val, bool, Context &Ctx) {
+ if (yamlizeMappingEnumInput<T, Context>(io, Val))
+ return;
if (has_FlowTraits<MappingTraits<T>>::value) {
io.beginFlowMapping();
detail::doMapping(io, Val, Ctx);
@@ -1624,14 +1668,13 @@ template <typename T, typename Context>
void IO::processKeyWithDefault(const char *Key, Optional<T> &Val,
const Optional<T> &DefaultValue, bool Required,
Context &Ctx) {
- assert(DefaultValue.hasValue() == false &&
- "Optional<T> shouldn't have a value!");
+ assert(!DefaultValue && "Optional<T> shouldn't have a value!");
void *SaveInfo;
bool UseDefault = true;
- const bool sameAsDefault = outputting() && !Val.hasValue();
- if (!outputting() && !Val.hasValue())
+ const bool sameAsDefault = outputting() && !Val;
+ if (!outputting() && !Val)
Val = T();
- if (Val.hasValue() &&
+ if (Val &&
this->preflightKey(Key, Required, sameAsDefault, UseDefault, SaveInfo)) {
// When reading an Optional<X> key from a YAML description, we allow the
@@ -1648,7 +1691,7 @@ void IO::processKeyWithDefault(const char *Key, Optional<T> &Val,
if (IsNone)
Val = DefaultValue;
else
- yamlize(*this, Val.getValue(), Required, Ctx);
+ yamlize(*this, *Val, Required, Ctx);
this->postflightKey(SaveInfo);
} else {
if (UseDefault)
diff --git a/llvm/include/llvm/Support/circular_raw_ostream.h b/llvm/include/llvm/Support/circular_raw_ostream.h
index d2f01ea6a7f2..17fb8fa0e476 100644
--- a/llvm/include/llvm/Support/circular_raw_ostream.h
+++ b/llvm/include/llvm/Support/circular_raw_ostream.h
@@ -38,7 +38,7 @@ namespace llvm {
/// TheStream - The real stream we output to. We set it to be
/// unbuffered, since we're already doing our own buffering.
///
- raw_ostream *TheStream;
+ raw_ostream *TheStream = nullptr;
/// OwnsStream - Are we responsible for managing the underlying
/// stream?
@@ -51,7 +51,7 @@ namespace llvm {
/// BufferArray - The actual buffer storage.
///
- char *BufferArray;
+ char *BufferArray = nullptr;
/// Cur - Pointer to the current output point in BufferArray.
///
@@ -60,7 +60,7 @@ namespace llvm {
/// Filled - Indicate whether the buffer has been completely
/// filled. This helps avoid garbage output.
///
- bool Filled;
+ bool Filled = false;
/// Banner - A pointer to a banner to print before dumping the
/// log.
@@ -106,9 +106,8 @@ namespace llvm {
///
circular_raw_ostream(raw_ostream &Stream, const char *Header,
size_t BuffSize = 0, bool Owns = REFERENCE_ONLY)
- : raw_ostream(/*unbuffered*/ true), TheStream(nullptr),
- OwnsStream(Owns), BufferSize(BuffSize), BufferArray(nullptr),
- Filled(false), Banner(Header) {
+ : raw_ostream(/*unbuffered*/ true), OwnsStream(Owns),
+ BufferSize(BuffSize), Banner(Header) {
if (BufferSize != 0)
BufferArray = new char[BufferSize];
Cur = BufferArray;
diff --git a/llvm/include/llvm/Support/raw_sha1_ostream.h b/llvm/include/llvm/Support/raw_sha1_ostream.h
index 3991691796b5..299f6e6b5e88 100644
--- a/llvm/include/llvm/Support/raw_sha1_ostream.h
+++ b/llvm/include/llvm/Support/raw_sha1_ostream.h
@@ -30,7 +30,7 @@ class raw_sha1_ostream : public raw_ostream {
public:
/// Return the current SHA1 hash for the content of the stream
- StringRef sha1() {
+ std::array<uint8_t, 20> sha1() {
flush();
return State.result();
}
diff --git a/llvm/include/llvm/TableGen/Parser.h b/llvm/include/llvm/TableGen/Parser.h
new file mode 100644
index 000000000000..411259e4033c
--- /dev/null
+++ b/llvm/include/llvm/TableGen/Parser.h
@@ -0,0 +1,34 @@
+//===- llvm/TableGen/Parser.h - tblgen parser entry point -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares an entry point into the tablegen parser for use by tools.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TABLEGEN_PARSER_H
+#define LLVM_TABLEGEN_PARSER_H
+
+#include "llvm/ADT/STLExtras.h"
+#include <string>
+#include <vector>
+
+namespace llvm {
+class RecordKeeper;
+class SourceMgr;
+
+/// Parse the TableGen file defined within the main buffer of the given
+/// SourceMgr. On success, populates the provided RecordKeeper with the parsed
+/// records and returns false. On failure, returns true.
+///
+/// NOTE: TableGen currently relies on global state within a given parser
+/// invocation, so this function is not thread-safe.
+bool TableGenParseFile(SourceMgr &InputSrcMgr, RecordKeeper &Records);
+
+} // end namespace llvm
+
+#endif // LLVM_TABLEGEN_PARSER_H
diff --git a/llvm/include/llvm/TableGen/Record.h b/llvm/include/llvm/TableGen/Record.h
index 1157487eced3..44daad976c12 100644
--- a/llvm/include/llvm/TableGen/Record.h
+++ b/llvm/include/llvm/TableGen/Record.h
@@ -28,7 +28,6 @@
#include "llvm/Support/Timer.h"
#include "llvm/Support/TrailingObjects.h"
#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdint>
@@ -40,7 +39,7 @@
namespace llvm {
namespace detail {
-struct RecordContext;
+struct RecordKeeperImpl;
} // namespace detail
class ListRecTy;
@@ -70,15 +69,20 @@ public:
private:
RecTyKind Kind;
+ /// The RecordKeeper that uniqued this Type.
+ RecordKeeper &RK;
/// ListRecTy of the list that has elements of this type.
ListRecTy *ListTy = nullptr;
public:
- RecTy(RecTyKind K) : Kind(K) {}
+ RecTy(RecTyKind K, RecordKeeper &RK) : Kind(K), RK(RK) {}
virtual ~RecTy() = default;
RecTyKind getRecTyKind() const { return Kind; }
+ /// Return the RecordKeeper that uniqued this Type.
+ RecordKeeper &getRecordKeeper() const { return RK; }
+
virtual std::string getAsString() const = 0;
void print(raw_ostream &OS) const { OS << getAsString(); }
void dump() const;
@@ -102,16 +106,16 @@ inline raw_ostream &operator<<(raw_ostream &OS, const RecTy &Ty) {
/// 'bit' - Represent a single bit
class BitRecTy : public RecTy {
- friend detail::RecordContext;
+ friend detail::RecordKeeperImpl;
- BitRecTy() : RecTy(BitRecTyKind) {}
+ BitRecTy(RecordKeeper &RK) : RecTy(BitRecTyKind, RK) {}
public:
static bool classof(const RecTy *RT) {
return RT->getRecTyKind() == BitRecTyKind;
}
- static BitRecTy *get();
+ static BitRecTy *get(RecordKeeper &RK);
std::string getAsString() const override { return "bit"; }
@@ -122,14 +126,15 @@ public:
class BitsRecTy : public RecTy {
unsigned Size;
- explicit BitsRecTy(unsigned Sz) : RecTy(BitsRecTyKind), Size(Sz) {}
+ explicit BitsRecTy(RecordKeeper &RK, unsigned Sz)
+ : RecTy(BitsRecTyKind, RK), Size(Sz) {}
public:
static bool classof(const RecTy *RT) {
return RT->getRecTyKind() == BitsRecTyKind;
}
- static BitsRecTy *get(unsigned Sz);
+ static BitsRecTy *get(RecordKeeper &RK, unsigned Sz);
unsigned getNumBits() const { return Size; }
@@ -142,16 +147,16 @@ public:
/// 'int' - Represent an integer value of no particular size
class IntRecTy : public RecTy {
- friend detail::RecordContext;
+ friend detail::RecordKeeperImpl;
- IntRecTy() : RecTy(IntRecTyKind) {}
+ IntRecTy(RecordKeeper &RK) : RecTy(IntRecTyKind, RK) {}
public:
static bool classof(const RecTy *RT) {
return RT->getRecTyKind() == IntRecTyKind;
}
- static IntRecTy *get();
+ static IntRecTy *get(RecordKeeper &RK);
std::string getAsString() const override { return "int"; }
@@ -160,16 +165,16 @@ public:
/// 'string' - Represent an string value
class StringRecTy : public RecTy {
- friend detail::RecordContext;
+ friend detail::RecordKeeperImpl;
- StringRecTy() : RecTy(StringRecTyKind) {}
+ StringRecTy(RecordKeeper &RK) : RecTy(StringRecTyKind, RK) {}
public:
static bool classof(const RecTy *RT) {
return RT->getRecTyKind() == StringRecTyKind;
}
- static StringRecTy *get();
+ static StringRecTy *get(RecordKeeper &RK);
std::string getAsString() const override;
@@ -183,7 +188,8 @@ class ListRecTy : public RecTy {
RecTy *ElementTy;
- explicit ListRecTy(RecTy *T) : RecTy(ListRecTyKind), ElementTy(T) {}
+ explicit ListRecTy(RecTy *T)
+ : RecTy(ListRecTyKind, T->getRecordKeeper()), ElementTy(T) {}
public:
static bool classof(const RecTy *RT) {
@@ -202,16 +208,16 @@ public:
/// 'dag' - Represent a dag fragment
class DagRecTy : public RecTy {
- friend detail::RecordContext;
+ friend detail::RecordKeeperImpl;
- DagRecTy() : RecTy(DagRecTyKind) {}
+ DagRecTy(RecordKeeper &RK) : RecTy(DagRecTyKind, RK) {}
public:
static bool classof(const RecTy *RT) {
return RT->getRecTyKind() == DagRecTyKind;
}
- static DagRecTy *get();
+ static DagRecTy *get(RecordKeeper &RK);
std::string getAsString() const override;
};
@@ -223,12 +229,12 @@ public:
class RecordRecTy final : public RecTy, public FoldingSetNode,
public TrailingObjects<RecordRecTy, Record *> {
friend class Record;
- friend detail::RecordContext;
+ friend detail::RecordKeeperImpl;
unsigned NumClasses;
- explicit RecordRecTy(unsigned Num)
- : RecTy(RecordRecTyKind), NumClasses(Num) {}
+ explicit RecordRecTy(RecordKeeper &RK, unsigned Num)
+ : RecTy(RecordRecTyKind, RK), NumClasses(Num) {}
public:
RecordRecTy(const RecordRecTy &) = delete;
@@ -242,7 +248,8 @@ public:
}
/// Get the record type with the given non-redundant list of superclasses.
- static RecordRecTy *get(ArrayRef<Record *> Classes);
+ static RecordRecTy *get(RecordKeeper &RK, ArrayRef<Record *> Classes);
+ static RecordRecTy *get(Record *Class);
void Profile(FoldingSetNodeID &ID) const;
@@ -304,6 +311,7 @@ protected:
IK_CondOpInit,
IK_FoldOpInit,
IK_IsAOpInit,
+ IK_ExistsOpInit,
IK_AnonymousNameInit,
IK_StringInit,
IK_VarInit,
@@ -327,6 +335,9 @@ public:
/// Get the kind (type) of the value.
InitKind getKind() const { return Kind; }
+ /// Get the record keeper that initialized this Init.
+ RecordKeeper &getRecordKeeper() const;
+
protected:
explicit Init(InitKind K, uint8_t Opc = 0) : Kind(K), Opc(Opc) {}
@@ -426,6 +437,9 @@ public:
/// Get the type of the Init as a RecTy.
RecTy *getType() const { return ValueTy; }
+ /// Get the record keeper that initialized this Init.
+ RecordKeeper &getRecordKeeper() const { return ValueTy->getRecordKeeper(); }
+
Init *getCastTo(RecTy *Ty) const override;
Init *convertInitializerTo(RecTy *Ty) const override;
@@ -440,9 +454,12 @@ public:
/// '?' - Represents an uninitialized value.
class UnsetInit : public Init {
- friend detail::RecordContext;
+ friend detail::RecordKeeperImpl;
- UnsetInit() : Init(IK_UnsetInit) {}
+ /// The record keeper that initialized this Init.
+ RecordKeeper &RK;
+
+ UnsetInit(RecordKeeper &RK) : Init(IK_UnsetInit), RK(RK) {}
public:
UnsetInit(const UnsetInit &) = delete;
@@ -453,7 +470,10 @@ public:
}
/// Get the singleton unset Init.
- static UnsetInit *get();
+ static UnsetInit *get(RecordKeeper &RK);
+
+ /// Get the record keeper that initialized this Init.
+ RecordKeeper &getRecordKeeper() const { return RK; }
Init *getCastTo(RecTy *Ty) const override;
Init *convertInitializerTo(RecTy *Ty) const override;
@@ -473,7 +493,7 @@ public:
/// 'true'/'false' - Represent a concrete initializer for a bit.
class BitInit final : public TypedInit {
- friend detail::RecordContext;
+ friend detail::RecordKeeperImpl;
bool Value;
@@ -487,7 +507,7 @@ public:
return I->getKind() == IK_BitInit;
}
- static BitInit *get(bool V);
+ static BitInit *get(RecordKeeper &RK, bool V);
bool getValue() const { return Value; }
@@ -508,8 +528,8 @@ class BitsInit final : public TypedInit, public FoldingSetNode,
public TrailingObjects<BitsInit, Init *> {
unsigned NumBits;
- BitsInit(unsigned N)
- : TypedInit(IK_BitsInit, BitsRecTy::get(N)), NumBits(N) {}
+ BitsInit(RecordKeeper &RK, unsigned N)
+ : TypedInit(IK_BitsInit, BitsRecTy::get(RK, N)), NumBits(N) {}
public:
BitsInit(const BitsInit &) = delete;
@@ -522,7 +542,7 @@ public:
return I->getKind() == IK_BitsInit;
}
- static BitsInit *get(ArrayRef<Init *> Range);
+ static BitsInit *get(RecordKeeper &RK, ArrayRef<Init *> Range);
void Profile(FoldingSetNodeID &ID) const;
@@ -558,8 +578,8 @@ public:
class IntInit : public TypedInit {
int64_t Value;
- explicit IntInit(int64_t V)
- : TypedInit(IK_IntInit, IntRecTy::get()), Value(V) {}
+ explicit IntInit(RecordKeeper &RK, int64_t V)
+ : TypedInit(IK_IntInit, IntRecTy::get(RK)), Value(V) {}
public:
IntInit(const IntInit &) = delete;
@@ -569,7 +589,7 @@ public:
return I->getKind() == IK_IntInit;
}
- static IntInit *get(int64_t V);
+ static IntInit *get(RecordKeeper &RK, int64_t V);
int64_t getValue() const { return Value; }
@@ -580,7 +600,7 @@ public:
std::string getAsString() const override;
Init *getBit(unsigned Bit) const override {
- return BitInit::get((Value & (1ULL << Bit)) != 0);
+ return BitInit::get(getRecordKeeper(), (Value & (1ULL << Bit)) != 0);
}
};
@@ -588,8 +608,8 @@ public:
class AnonymousNameInit : public TypedInit {
unsigned Value;
- explicit AnonymousNameInit(unsigned V)
- : TypedInit(IK_AnonymousNameInit, StringRecTy::get()), Value(V) {}
+ explicit AnonymousNameInit(RecordKeeper &RK, unsigned V)
+ : TypedInit(IK_AnonymousNameInit, StringRecTy::get(RK)), Value(V) {}
public:
AnonymousNameInit(const AnonymousNameInit &) = delete;
@@ -599,7 +619,7 @@ public:
return I->getKind() == IK_AnonymousNameInit;
}
- static AnonymousNameInit *get(unsigned);
+ static AnonymousNameInit *get(RecordKeeper &RK, unsigned);
unsigned getValue() const { return Value; }
@@ -626,8 +646,8 @@ private:
StringRef Value;
StringFormat Format;
- explicit StringInit(StringRef V, StringFormat Fmt)
- : TypedInit(IK_StringInit, StringRecTy::get()), Value(V), Format(Fmt) {}
+ explicit StringInit(RecordKeeper &RK, StringRef V, StringFormat Fmt)
+ : TypedInit(IK_StringInit, StringRecTy::get(RK)), Value(V), Format(Fmt) {}
public:
StringInit(const StringInit &) = delete;
@@ -637,7 +657,8 @@ public:
return I->getKind() == IK_StringInit;
}
- static StringInit *get(StringRef, StringFormat Fmt = SF_String);
+ static StringInit *get(RecordKeeper &RK, StringRef,
+ StringFormat Fmt = SF_String);
static StringFormat determineFormat(StringFormat Fmt1, StringFormat Fmt2) {
return (Fmt1 == SF_Code || Fmt2 == SF_Code) ? SF_Code : SF_String;
@@ -678,7 +699,7 @@ public:
private:
explicit ListInit(unsigned N, RecTy *EltTy)
- : TypedInit(IK_ListInit, ListRecTy::get(EltTy)), NumValues(N) {}
+ : TypedInit(IK_ListInit, ListRecTy::get(EltTy)), NumValues(N) {}
public:
ListInit(const ListInit &) = delete;
@@ -1049,8 +1070,8 @@ private:
Init *Expr;
IsAOpInit(RecTy *CheckType, Init *Expr)
- : TypedInit(IK_IsAOpInit, IntRecTy::get()), CheckType(CheckType),
- Expr(Expr) {}
+ : TypedInit(IK_IsAOpInit, IntRecTy::get(CheckType->getRecordKeeper())),
+ CheckType(CheckType), Expr(Expr) {}
public:
IsAOpInit(const IsAOpInit &) = delete;
@@ -1075,6 +1096,40 @@ public:
std::string getAsString() const override;
};
+/// !exists<type>(expr) - Dynamically determine if a record of `type` named
+/// `expr` exists.
+class ExistsOpInit : public TypedInit, public FoldingSetNode {
+private:
+ RecTy *CheckType;
+ Init *Expr;
+
+ ExistsOpInit(RecTy *CheckType, Init *Expr)
+ : TypedInit(IK_ExistsOpInit, IntRecTy::get(CheckType->getRecordKeeper())),
+ CheckType(CheckType), Expr(Expr) {}
+
+public:
+ ExistsOpInit(const ExistsOpInit &) = delete;
+ ExistsOpInit &operator=(const ExistsOpInit &) = delete;
+
+ static bool classof(const Init *I) { return I->getKind() == IK_ExistsOpInit; }
+
+ static ExistsOpInit *get(RecTy *CheckType, Init *Expr);
+
+ void Profile(FoldingSetNodeID &ID) const;
+
+ // Fold - If possible, fold this to a simpler init. Return this if not
+ // possible to fold.
+ Init *Fold(Record *CurRec, bool IsFinal = false) const;
+
+ bool isComplete() const override { return false; }
+
+ Init *resolveReferences(Resolver &R) const override;
+
+ Init *getBit(unsigned Bit) const override;
+
+ std::string getAsString() const override;
+};
+
/// 'Opcode' - Represent a reference to an entire variable object.
class VarInit : public TypedInit {
Init *VarName;
@@ -1118,7 +1173,8 @@ class VarBitInit final : public TypedInit {
unsigned Bit;
VarBitInit(TypedInit *T, unsigned B)
- : TypedInit(IK_VarBitInit, BitRecTy::get()), TI(T), Bit(B) {
+ : TypedInit(IK_VarBitInit, BitRecTy::get(T->getRecordKeeper())), TI(T),
+ Bit(B) {
assert(T->getType() &&
(isa<IntRecTy>(T->getType()) ||
(isa<BitsRecTy>(T->getType()) &&
@@ -1223,8 +1279,7 @@ class VarDefInit final : public TypedInit, public FoldingSetNode,
DefInit *Def = nullptr; // after instantiation
unsigned NumArgs;
- explicit VarDefInit(Record *Class, unsigned N)
- : TypedInit(IK_VarDefInit, RecordRecTy::get(Class)), Class(Class), NumArgs(N) {}
+ explicit VarDefInit(Record *Class, unsigned N);
DefInit *instantiate();
@@ -1321,8 +1376,8 @@ class DagInit final : public TypedInit, public FoldingSetNode,
unsigned NumArgNames;
DagInit(Init *V, StringInit *VN, unsigned NumArgs, unsigned NumArgNames)
- : TypedInit(IK_DagInit, DagRecTy::get()), Val(V), ValName(VN),
- NumArgs(NumArgs), NumArgNames(NumArgNames) {}
+ : TypedInit(IK_DagInit, DagRecTy::get(V->getRecordKeeper())), Val(V),
+ ValName(VN), NumArgs(NumArgs), NumArgNames(NumArgNames) {}
size_t numTrailingObjects(OverloadToken<Init *>) const { return NumArgs; }
@@ -1427,6 +1482,9 @@ public:
RecordVal(Init *N, RecTy *T, FieldKind K);
RecordVal(Init *N, SMLoc Loc, RecTy *T, FieldKind K);
+ /// Get the record keeper used to unique this value.
+ RecordKeeper &getRecordKeeper() const { return Name->getRecordKeeper(); }
+
/// Get the name of the field as a StringRef.
StringRef getName() const;
@@ -1527,13 +1585,14 @@ public:
explicit Record(Init *N, ArrayRef<SMLoc> locs, RecordKeeper &records,
bool Anonymous = false, bool Class = false)
: Name(N), Locs(locs.begin(), locs.end()), TrackedRecords(records),
- ID(getNewUID()), IsAnonymous(Anonymous), IsClass(Class) {
+ ID(getNewUID(N->getRecordKeeper())), IsAnonymous(Anonymous),
+ IsClass(Class) {
checkName();
}
explicit Record(StringRef N, ArrayRef<SMLoc> locs, RecordKeeper &records,
bool Class = false)
- : Record(StringInit::get(N), locs, records, false, Class) {}
+ : Record(StringInit::get(records, N), locs, records, false, Class) {}
// When copy-constructing a Record, we must still guarantee a globally unique
// ID number. Don't copy CorrespondingDefInit either, since it's owned by the
@@ -1542,9 +1601,10 @@ public:
: Name(O.Name), Locs(O.Locs), TemplateArgs(O.TemplateArgs),
Values(O.Values), Assertions(O.Assertions),
SuperClasses(O.SuperClasses), TrackedRecords(O.TrackedRecords),
- ID(getNewUID()), IsAnonymous(O.IsAnonymous), IsClass(O.IsClass) {}
+ ID(getNewUID(O.getRecords())), IsAnonymous(O.IsAnonymous),
+ IsClass(O.IsClass) {}
- static unsigned getNewUID();
+ static unsigned getNewUID(RecordKeeper &RK);
unsigned getID() const { return ID; }
@@ -1600,7 +1660,7 @@ public:
}
const RecordVal *getValue(StringRef Name) const {
- return getValue(StringInit::get(Name));
+ return getValue(StringInit::get(getRecords(), Name));
}
RecordVal *getValue(const Init *Name) {
@@ -1631,7 +1691,7 @@ public:
}
void removeValue(StringRef Name) {
- removeValue(StringInit::get(Name));
+ removeValue(StringInit::get(getRecords(), Name));
}
void addAssertion(SMLoc Loc, Init *Condition, Init *Message) {
@@ -1671,11 +1731,11 @@ public:
SuperClasses.push_back(std::make_pair(R, Range));
}
- /// If there are any field references that refer to fields
- /// that have been filled in, we can propagate the values now.
+ /// If there are any field references that refer to fields that have been
+ /// filled in, we can propagate the values now.
///
- /// This is a final resolve: any error messages, e.g. due to undefined
- /// !cast references, are generated now.
+ /// This is a final resolve: any error messages, e.g. due to undefined !cast
+ /// references, are generated now.
void resolveReferences(Init *NewName = nullptr);
/// Apply the resolver to the name of the record as well as to the
@@ -1699,11 +1759,11 @@ public:
// High-level methods useful to tablegen back-ends
//
- ///Return the source location for the named field.
+ /// Return the source location for the named field.
SMLoc getFieldLoc(StringRef FieldName) const;
- /// Return the initializer for a value with the specified name,
- /// or throw an exception if the field does not exist.
+ /// Return the initializer for a value with the specified name, or throw an
+ /// exception if the field does not exist.
Init *getValueInit(StringRef FieldName) const;
/// Return true if the named field is unset.
@@ -1711,96 +1771,85 @@ public:
return isa<UnsetInit>(getValueInit(FieldName));
}
- /// This method looks up the specified field and returns
- /// its value as a string, throwing an exception if the field does not exist
- /// or if the value is not a string.
+ /// This method looks up the specified field and returns its value as a
+ /// string, throwing an exception if the field does not exist or if the value
+ /// is not a string.
StringRef getValueAsString(StringRef FieldName) const;
- /// This method looks up the specified field and returns
- /// its value as a string, throwing an exception if the field if the value is
- /// not a string and llvm::Optional() if the field does not exist.
+ /// This method looks up the specified field and returns its value as a
+ /// string, throwing an exception if the value is not a string and
+ /// llvm::Optional() if the field does not exist.
llvm::Optional<StringRef> getValueAsOptionalString(StringRef FieldName) const;
- /// This method looks up the specified field and returns
- /// its value as a BitsInit, throwing an exception if the field does not exist
- /// or if the value is not the right type.
+ /// This method looks up the specified field and returns its value as a
+ /// BitsInit, throwing an exception if the field does not exist or if the
+ /// value is not the right type.
BitsInit *getValueAsBitsInit(StringRef FieldName) const;
- /// This method looks up the specified field and returns
- /// its value as a ListInit, throwing an exception if the field does not exist
- /// or if the value is not the right type.
+ /// This method looks up the specified field and returns its value as a
+ /// ListInit, throwing an exception if the field does not exist or if the
+ /// value is not the right type.
ListInit *getValueAsListInit(StringRef FieldName) const;
- /// This method looks up the specified field and
- /// returns its value as a vector of records, throwing an exception if the
- /// field does not exist or if the value is not the right type.
+ /// This method looks up the specified field and returns its value as a
+ /// vector of records, throwing an exception if the field does not exist or
+ /// if the value is not the right type.
std::vector<Record*> getValueAsListOfDefs(StringRef FieldName) const;
- /// This method looks up the specified field and
- /// returns its value as a vector of integers, throwing an exception if the
- /// field does not exist or if the value is not the right type.
+ /// This method looks up the specified field and returns its value as a
+ /// vector of integers, throwing an exception if the field does not exist or
+ /// if the value is not the right type.
std::vector<int64_t> getValueAsListOfInts(StringRef FieldName) const;
- /// This method looks up the specified field and
- /// returns its value as a vector of strings, throwing an exception if the
- /// field does not exist or if the value is not the right type.
+ /// This method looks up the specified field and returns its value as a
+ /// vector of strings, throwing an exception if the field does not exist or
+ /// if the value is not the right type.
std::vector<StringRef> getValueAsListOfStrings(StringRef FieldName) const;
- /// This method looks up the specified field and returns its
- /// value as a Record, throwing an exception if the field does not exist or if
- /// the value is not the right type.
+ /// This method looks up the specified field and returns its value as a
+ /// Record, throwing an exception if the field does not exist or if the value
+ /// is not the right type.
Record *getValueAsDef(StringRef FieldName) const;
/// This method looks up the specified field and returns its value as a
- /// Record, returning null if the field exists but is "uninitialized"
- /// (i.e. set to `?`), and throwing an exception if the field does not
- /// exist or if its value is not the right type.
+ /// Record, returning null if the field exists but is "uninitialized" (i.e.
+ /// set to `?`), and throwing an exception if the field does not exist or if
+ /// its value is not the right type.
Record *getValueAsOptionalDef(StringRef FieldName) const;
- /// This method looks up the specified field and returns its
- /// value as a bit, throwing an exception if the field does not exist or if
- /// the value is not the right type.
+ /// This method looks up the specified field and returns its value as a bit,
+ /// throwing an exception if the field does not exist or if the value is not
+ /// the right type.
bool getValueAsBit(StringRef FieldName) const;
- /// This method looks up the specified field and
- /// returns its value as a bit. If the field is unset, sets Unset to true and
- /// returns false.
+ /// This method looks up the specified field and returns its value as a bit.
+ /// If the field is unset, sets Unset to true and returns false.
bool getValueAsBitOrUnset(StringRef FieldName, bool &Unset) const;
- /// This method looks up the specified field and returns its
- /// value as an int64_t, throwing an exception if the field does not exist or
- /// if the value is not the right type.
+ /// This method looks up the specified field and returns its value as an
+ /// int64_t, throwing an exception if the field does not exist or if the
+ /// value is not the right type.
int64_t getValueAsInt(StringRef FieldName) const;
- /// This method looks up the specified field and returns its
- /// value as an Dag, throwing an exception if the field does not exist or if
- /// the value is not the right type.
+ /// This method looks up the specified field and returns its value as an Dag,
+ /// throwing an exception if the field does not exist or if the value is not
+ /// the right type.
DagInit *getValueAsDag(StringRef FieldName) const;
};
raw_ostream &operator<<(raw_ostream &OS, const Record &R);
class RecordKeeper {
- friend class RecordRecTy;
-
using RecordMap = std::map<std::string, std::unique_ptr<Record>, std::less<>>;
using GlobalMap = std::map<std::string, Init *, std::less<>>;
- std::string InputFilename;
- RecordMap Classes, Defs;
- mutable StringMap<std::vector<Record *>> ClassRecordsMap;
- FoldingSet<RecordRecTy> RecordTypePool;
- std::map<std::string, Init *, std::less<>> ExtraGlobals;
- unsigned AnonCounter = 0;
+public:
+ RecordKeeper();
+ ~RecordKeeper();
- // These members are for the phase timing feature. We need a timer group,
- // the last timer started, and a flag to say whether the last timer
- // is the special "backend overall timer."
- TimerGroup *TimingGroup = nullptr;
- Timer *LastTimer = nullptr;
- bool BackendTimer = false;
+ /// Return the internal implementation of the RecordKeeper.
+ detail::RecordKeeperImpl &getImpl() { return *Impl; }
-public:
/// Get the main TableGen input file's name.
const std::string getInputFilename() const { return InputFilename; }
@@ -1896,7 +1945,33 @@ public:
std::vector<Record *> getAllDerivedDefinitions(
ArrayRef<StringRef> ClassNames) const;
+ /// Get all the concrete records that inherit from specified class, if the
+ /// class is defined. Returns an empty vector if the class is not defined.
+ std::vector<Record *>
+ getAllDerivedDefinitionsIfDefined(StringRef ClassName) const;
+
void dump() const;
+
+private:
+ RecordKeeper(RecordKeeper &&) = delete;
+ RecordKeeper(const RecordKeeper &) = delete;
+ RecordKeeper &operator=(RecordKeeper &&) = delete;
+ RecordKeeper &operator=(const RecordKeeper &) = delete;
+
+ std::string InputFilename;
+ RecordMap Classes, Defs;
+ mutable StringMap<std::vector<Record *>> ClassRecordsMap;
+ GlobalMap ExtraGlobals;
+
+ // These members are for the phase timing feature. We need a timer group,
+ // the last timer started, and a flag to say whether the last timer
+ // is the special "backend overall timer."
+ TimerGroup *TimingGroup = nullptr;
+ Timer *LastTimer = nullptr;
+ bool BackendTimer = false;
+
+ /// The internal uniquer implementation of the RecordKeeper.
+ std::unique_ptr<detail::RecordKeeperImpl> Impl;
};
/// Sorting predicate to sort record pointers by name.
diff --git a/llvm/include/llvm/Target/CGPassBuilderOption.h b/llvm/include/llvm/Target/CGPassBuilderOption.h
index f84889392d13..7a6d91061701 100644
--- a/llvm/include/llvm/Target/CGPassBuilderOption.h
+++ b/llvm/include/llvm/Target/CGPassBuilderOption.h
@@ -42,6 +42,7 @@ struct CGPassBuilderOption {
bool DisableMergeICmps = false;
bool DisablePartialLibcallInlining = false;
bool DisableConstantHoisting = false;
+ bool DisableSelectOptimize = true;
bool PrintISelInput = false;
bool PrintGCInfo = false;
bool RequiresCodeGenSCCOrder = false;
diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td
index 2af20ab6a53f..3e2f18b57d1e 100644
--- a/llvm/include/llvm/Target/GenericOpcodes.td
+++ b/llvm/include/llvm/Target/GenericOpcodes.td
@@ -745,6 +745,13 @@ def G_FCANONICALIZE : GenericInstruction {
let hasSideEffects = false;
}
+// Generic opcode equivalent to the llvm.is_fpclass intrinsic.
+def G_IS_FPCLASS: GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type1:$src, unknown:$test, unknown:$fpsem);
+ let hasSideEffects = false;
+}
+
// FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two
// values.
//
@@ -965,6 +972,12 @@ def G_FNEARBYINT : GenericInstruction {
//------------------------------------------------------------------------------
// Opcodes for LLVM Intrinsics
//------------------------------------------------------------------------------
+def G_INTRINSIC_FPTRUNC_ROUND : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type1:$src1, i32imm:$round_mode);
+ let hasSideEffects = false;
+}
+
def G_INTRINSIC_TRUNC : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1);
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 4859cf6b57b7..89f08d200021 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -118,6 +118,7 @@ def int64_matchinfo: GIDefMatchData<"int64_t">;
def apint_matchinfo : GIDefMatchData<"APInt">;
def build_fn_matchinfo :
GIDefMatchData<"std::function<void(MachineIRBuilder &)>">;
+def unsigned_matchinfo: GIDefMatchData<"unsigned">;
def copy_prop : GICombineRule<
(defs root:$d),
@@ -234,6 +235,12 @@ def binop_left_undef_to_zero: GICombineRule<
[{ return Helper.matchOperandIsUndef(*${root}, 1); }]),
(apply [{ Helper.replaceInstWithConstant(*${root}, 0); }])>;
+def binop_right_undef_to_undef: GICombineRule<
+ (defs root:$root),
+ (match (wip_match_opcode G_SHL, G_ASHR, G_LSHR):$root,
+ [{ return Helper.matchOperandIsUndef(*${root}, 2); }]),
+ (apply [{ Helper.replaceInstWithUndef(*${root}); }])>;
+
// Instructions where if any source operand is undef, the instruction can be
// replaced with undef.
def propagate_undef_any_op: GICombineRule<
@@ -283,6 +290,13 @@ def select_constant_cmp: GICombineRule<
(apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, ${matchinfo}); }])
>;
+def select_to_logical : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_SELECT):$root,
+ [{ return Helper.matchSelectToLogical(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])
+>;
+
// Fold x op 0 -> x
def right_identity_zero: GICombineRule<
(defs root:$root),
@@ -323,6 +337,26 @@ def urem_pow2_to_mask : GICombineRule<
(apply [{ Helper.applySimplifyURemByPow2(*${root}); }])
>;
+// Push a binary operator through a select on constants.
+//
+// binop (select cond, K0, K1), K2 ->
+// select cond, (binop K0, K2), (binop K1, K2)
+
+// Every binary operator that has constant folding. We currently do
+// not have constant folding for G_FPOW, G_FMAXNUM_IEEE or
+// G_FMINNUM_IEEE.
+def fold_binop_into_select : GICombineRule<
+ (defs root:$root, unsigned_matchinfo:$select_op_no),
+ (match (wip_match_opcode
+ G_ADD, G_SUB, G_PTR_ADD, G_AND, G_OR, G_XOR,
+ G_SDIV, G_SREM, G_UDIV, G_UREM, G_LSHR, G_ASHR, G_SHL,
+ G_SMIN, G_SMAX, G_UMIN, G_UMAX,
+ G_FMUL, G_FADD, G_FSUB, G_FDIV, G_FREM,
+ G_FMINNUM, G_FMAXNUM, G_FMINIMUM, G_FMAXIMUM):$root,
+ [{ return Helper.matchFoldBinOpIntoSelect(*${root}, ${select_op_no}); }]),
+ (apply [{ return Helper.applyFoldBinOpIntoSelect(*${root}, ${select_op_no}); }])
+>;
+
// Transform d = [su]div(x, y) and r = [su]rem(x, y) - > d, r = [su]divrem(x, y)
def div_rem_to_divrem_matchdata : GIDefMatchData<"MachineInstr *">;
def div_rem_to_divrem : GICombineRule<
@@ -753,6 +787,18 @@ def mulo_by_2: GICombineRule<
[{ return Helper.matchMulOBy2(*${root}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;
+def mulo_by_0: GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_UMULO, G_SMULO):$root,
+ [{ return Helper.matchMulOBy0(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+
+def addo_by_0: GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_UADDO, G_SADDO):$root,
+ [{ return Helper.matchAddOBy0(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+
def mulh_to_lshr : GICombineRule<
(defs root:$root),
(match (wip_match_opcode G_UMULH):$root,
@@ -845,10 +891,26 @@ def combine_fsub_fpext_fneg_fmul_to_fmad_or_fma: GICombineRule<
*${root}, ${info}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
+def combine_minmax_nan: GICombineRule<
+ (defs root:$root, unsigned_matchinfo:$info),
+ (match (wip_match_opcode G_FMINNUM, G_FMAXNUM, G_FMINIMUM, G_FMAXIMUM):$root,
+ [{ return Helper.matchCombineFMinMaxNaN(*${root}, ${info}); }]),
+ (apply [{ Helper.replaceSingleDefInstWithOperand(*${root}, ${info}); }])>;
+
+// Transform (add x, (sub y, x)) -> y
+// Transform (add (sub y, x), x) -> y
+def add_sub_reg: GICombineRule <
+ (defs root:$root, register_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_ADD):$root,
+ [{ return Helper.matchAddSubSameReg(*${root}, ${matchinfo}); }]),
+ (apply [{ return Helper.replaceSingleDefInstWithReg(*${root},
+ ${matchinfo}); }])>;
+
// FIXME: These should use the custom predicate feature once it lands.
def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
undef_to_negative_one,
binop_left_undef_to_zero,
+ binop_right_undef_to_undef,
propagate_undef_any_op,
propagate_undef_all_ops,
propagate_undef_shuffle_mask,
@@ -859,10 +921,12 @@ def identity_combines : GICombineGroup<[select_same_val, right_identity_zero,
binop_same_val, binop_left_to_zero,
binop_right_to_zero, p2i_to_i2p,
i2p_to_p2i, anyext_trunc_fold,
- fneg_fneg_fold, right_identity_one]>;
+ fneg_fneg_fold, right_identity_one,
+ add_sub_reg]>;
def const_combines : GICombineGroup<[constant_fp_op, const_ptradd_to_i2p,
- overlapping_and, mulo_by_2]>;
+ overlapping_and, mulo_by_2, mulo_by_0,
+ addo_by_0, combine_minmax_nan]>;
def known_bits_simplifications : GICombineGroup<[
redundant_and, redundant_sext_inreg, redundant_or, urem_pow2_to_mask,
@@ -873,7 +937,8 @@ def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend,
def phi_combines : GICombineGroup<[extend_through_phis]>;
-def select_combines : GICombineGroup<[select_undef_cmp, select_constant_cmp]>;
+def select_combines : GICombineGroup<[select_undef_cmp, select_constant_cmp,
+ select_to_logical]>;
def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd,
mul_by_neg_one]>;
@@ -900,7 +965,7 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
truncstore_merge, div_rem_to_divrem, funnel_shift_combines,
form_bitfield_extract, constant_fold, fabs_fneg_fold,
intdiv_combines, mulh_combines, redundant_neg_operands,
- and_or_disjoint_mask, fma_combines]>;
+ and_or_disjoint_mask, fma_combines, fold_binop_into_select]>;
// A combine group used to for prelegalizer combiners at -O0. The combines in
// this group have been selected based on experiments to balance code size and
diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td
index d8faa63ee877..c5b2462dc868 100644
--- a/llvm/include/llvm/Target/Target.td
+++ b/llvm/include/llvm/Target/Target.td
@@ -279,6 +279,8 @@ class RegisterClass<string namespace, list<ValueType> regTypes, int alignment,
// heuristic. Classes with higher priority values are assigned first. This is
// useful as it is sometimes beneficial to assign registers to highly
// constrained classes first. The value has to be in the range [0,63].
+ // Values >= 32 should be used with care since they may overlap with other
+ // fields in the allocator's priority heuristics.
int AllocationPriority = 0;
// Generate register pressure set for this register class and any class
@@ -389,6 +391,14 @@ class RegisterTuples<list<SubRegIndex> Indices, list<dag> Regs,
list<string> RegAsmNames = RegNames;
}
+// RegisterCategory - This class is a list of RegisterClasses that belong to a
+// general cateogry --- e.g. "general purpose" or "fixed" registers. This is
+// useful for identifying registers in a generic way instead of having
+// information about a specific target's registers.
+class RegisterCategory<list<RegisterClass> classes> {
+ // Classes - A list of register classes that fall within the category.
+ list<RegisterClass> Classes = classes;
+}
//===----------------------------------------------------------------------===//
// DwarfRegNum - This class provides a mapping of the llvm register enumeration
@@ -560,6 +570,9 @@ class Instruction : InstructionEncoding {
bit isPseudo = false; // Is this instruction a pseudo-instruction?
// If so, won't have encoding information for
// the [MC]CodeEmitter stuff.
+ bit isMeta = false; // Is this instruction a meta-instruction?
+ // If so, won't produce any output in the form of
+ // executable instructions
bit isExtractSubreg = false; // Is this instruction a kind of extract subreg?
// If so, make sure to override
// TargetInstrInfo::getExtractSubregLikeInputs.
@@ -748,6 +761,33 @@ def ins;
/// of operands.
def variable_ops;
+/// variable-length instruction encoding utilities.
+/// The `ascend` operator should be used like this:
+/// (ascend 0b0010, 0b1101)
+/// Which represent a seqence of encoding fragments placing from LSB to MSB.
+/// Thus, in this case the final encoding will be 0b1101_0010.
+/// The arguments for `ascend` can either be `bits` or another DAG.
+def ascend;
+/// In addition, we can use `descend` to describe an encoding that places
+/// its arguments (i.e. encoding fragments) from MSB to LSB. For instance:
+/// (descend 0b0010, 0b1101)
+/// This results in an encoding of 0b0010_1101.
+def descend;
+/// The `operand` operator should be used like this:
+/// (operand "$src", 4)
+/// Which represents a 4-bit encoding for an instruction operand named `$src`.
+def operand;
+/// Similar to `operand`, we can reference only part of the operand's encoding:
+/// (slice "$src", 6, 8)
+/// (slice "$src", 8, 6)
+/// Both DAG represent bit 6 to 8 (total of 3 bits) in the encoding of operand
+/// `$src`.
+def slice;
+/// You can use `encoder` to specify a custom encoder function for a specific
+/// `operand` or `encoder` directive. For example:
+/// (operand "$src", 4, (encoder "encodeMyImm"))
+/// (slice "$src", 8, 6, (encoder "encodeMyReg"))
+def encoder;
/// PointerLikeRegClass - Values that are designed to have pointer width are
/// derived from this. TableGen treats the register class as having a symbolic
@@ -1064,6 +1104,7 @@ def CFI_INSTRUCTION : StandardPseudoInstruction {
let hasCtrlDep = true;
let hasSideEffects = false;
let isNotDuplicable = true;
+ let isMeta = true;
}
def EH_LABEL : StandardPseudoInstruction {
let OutOperandList = (outs);
@@ -1072,6 +1113,7 @@ def EH_LABEL : StandardPseudoInstruction {
let hasCtrlDep = true;
let hasSideEffects = false;
let isNotDuplicable = true;
+ let isMeta = true;
}
def GC_LABEL : StandardPseudoInstruction {
let OutOperandList = (outs);
@@ -1080,6 +1122,7 @@ def GC_LABEL : StandardPseudoInstruction {
let hasCtrlDep = true;
let hasSideEffects = false;
let isNotDuplicable = true;
+ let isMeta = true;
}
def ANNOTATION_LABEL : StandardPseudoInstruction {
let OutOperandList = (outs);
@@ -1094,6 +1137,7 @@ def KILL : StandardPseudoInstruction {
let InOperandList = (ins variable_ops);
let AsmString = "";
let hasSideEffects = false;
+ let isMeta = true;
}
def EXTRACT_SUBREG : StandardPseudoInstruction {
let OutOperandList = (outs unknown:$dst);
@@ -1115,6 +1159,7 @@ def IMPLICIT_DEF : StandardPseudoInstruction {
let hasSideEffects = false;
let isReMaterializable = true;
let isAsCheapAsAMove = true;
+ let isMeta = true;
}
def SUBREG_TO_REG : StandardPseudoInstruction {
let OutOperandList = (outs unknown:$dst);
@@ -1134,30 +1179,35 @@ def DBG_VALUE : StandardPseudoInstruction {
let InOperandList = (ins variable_ops);
let AsmString = "DBG_VALUE";
let hasSideEffects = false;
+ let isMeta = true;
}
def DBG_VALUE_LIST : StandardPseudoInstruction {
let OutOperandList = (outs);
let InOperandList = (ins variable_ops);
let AsmString = "DBG_VALUE_LIST";
let hasSideEffects = 0;
+ let isMeta = true;
}
def DBG_INSTR_REF : StandardPseudoInstruction {
let OutOperandList = (outs);
let InOperandList = (ins variable_ops);
let AsmString = "DBG_INSTR_REF";
let hasSideEffects = false;
+ let isMeta = true;
}
def DBG_PHI : StandardPseudoInstruction {
let OutOperandList = (outs);
let InOperandList = (ins variable_ops);
let AsmString = "DBG_PHI";
let hasSideEffects = 0;
+ let isMeta = true;
}
def DBG_LABEL : StandardPseudoInstruction {
let OutOperandList = (outs);
let InOperandList = (ins unknown:$label);
let AsmString = "DBG_LABEL";
let hasSideEffects = false;
+ let isMeta = true;
}
def REG_SEQUENCE : StandardPseudoInstruction {
let OutOperandList = (outs unknown:$dst);
@@ -1185,18 +1235,21 @@ def LIFETIME_START : StandardPseudoInstruction {
let InOperandList = (ins i32imm:$id);
let AsmString = "LIFETIME_START";
let hasSideEffects = false;
+ let isMeta = true;
}
def LIFETIME_END : StandardPseudoInstruction {
let OutOperandList = (outs);
let InOperandList = (ins i32imm:$id);
let AsmString = "LIFETIME_END";
let hasSideEffects = false;
+ let isMeta = true;
}
def PSEUDO_PROBE : StandardPseudoInstruction {
let OutOperandList = (outs);
let InOperandList = (ins i64imm:$guid, i64imm:$index, i8imm:$type, i32imm:$attr);
let AsmString = "PSEUDO_PROBE";
let hasSideEffects = 1;
+ let isMeta = true;
}
def ARITH_FENCE : StandardPseudoInstruction {
let OutOperandList = (outs unknown:$dst);
@@ -1204,6 +1257,7 @@ def ARITH_FENCE : StandardPseudoInstruction {
let AsmString = "";
let hasSideEffects = false;
let Constraints = "$src = $dst";
+ let isMeta = true;
}
def STACKMAP : StandardPseudoInstruction {
diff --git a/llvm/include/llvm/Target/TargetLoweringObjectFile.h b/llvm/include/llvm/Target/TargetLoweringObjectFile.h
index 392ee4334cb5..0c09cfe68478 100644
--- a/llvm/include/llvm/Target/TargetLoweringObjectFile.h
+++ b/llvm/include/llvm/Target/TargetLoweringObjectFile.h
@@ -20,6 +20,7 @@
namespace llvm {
+struct Align;
class Constant;
class DataLayout;
class Function;
@@ -276,7 +277,7 @@ public:
}
/// If supported, return the function entry point symbol.
- /// Otherwise, returns nulltpr.
+ /// Otherwise, returns nullptr.
/// Func must be a function or an alias which has a function as base object.
virtual MCSymbol *getFunctionEntryPointSymbol(const GlobalValue *Func,
const TargetMachine &TM) const {
diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h
index acfb265a9ff9..bf37ad7010ec 100644
--- a/llvm/include/llvm/Target/TargetMachine.h
+++ b/llvm/include/llvm/Target/TargetMachine.h
@@ -18,7 +18,6 @@
#include "llvm/ADT/Triple.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/Pass.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/PGOOptions.h"
@@ -30,8 +29,6 @@
namespace llvm {
class AAManager;
-template <typename IRUnitT, typename AnalysisManagerT, typename... ExtraArgTs>
-class PassManager;
using ModulePassManager = PassManager<Module>;
class Function;
@@ -225,7 +222,10 @@ public:
/// Returns the code model. The choices are small, kernel, medium, large, and
/// target default.
- CodeModel::Model getCodeModel() const;
+ CodeModel::Model getCodeModel() const { return CMModel; }
+
+ /// Set the code model.
+ void setCodeModel(CodeModel::Model CM) { CMModel = CM; }
bool isPositionIndependent() const;
@@ -260,6 +260,8 @@ public:
Options.SupportsDebugEntryValues = Enable;
}
+ void setCFIFixup(bool Enable) { Options.EnableCFIFixup = Enable; }
+
bool getAIXExtendedAltivecABI() const {
return Options.EnableAIXExtendedAltivecABI;
}
@@ -337,13 +339,13 @@ public:
/// This is used to construct the new pass manager's target IR analysis pass,
/// set up appropriately for this target machine. Even the old pass manager
/// uses this to answer queries about the IR.
- TargetIRAnalysis getTargetIRAnalysis();
+ TargetIRAnalysis getTargetIRAnalysis() const;
/// Return a TargetTransformInfo for a given function.
///
/// The returned TargetTransformInfo is specialized to the subtarget
/// corresponding to \p F.
- virtual TargetTransformInfo getTargetTransformInfo(const Function &F);
+ virtual TargetTransformInfo getTargetTransformInfo(const Function &F) const;
/// Allow the target to modify the pass manager, e.g. by calling
/// PassManagerBuilder::addExtension.
@@ -398,6 +400,12 @@ public:
virtual unsigned getSjLjDataSize() const { return DefaultSjLjDataSize; }
static std::pair<int, int> parseBinutilsVersion(StringRef Version);
+
+ /// getAddressSpaceForPseudoSourceKind - Given the kind of memory
+ /// (e.g. stack) the target returns the corresponding address space.
+ virtual unsigned getAddressSpaceForPseudoSourceKind(unsigned Kind) const {
+ return 0;
+ }
};
/// This class describes a target machine that is implemented with the LLVM
@@ -417,7 +425,7 @@ public:
///
/// The TTI returned uses the common code generator to answer queries about
/// the IR.
- TargetTransformInfo getTargetTransformInfo(const Function &F) override;
+ TargetTransformInfo getTargetTransformInfo(const Function &F) const override;
/// Create a pass configuration object to be used by addPassToEmitX methods
/// for generating a pipeline of CodeGen passes.
diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h
index a636c4822832..6083d18d96f7 100644
--- a/llvm/include/llvm/Target/TargetOptions.h
+++ b/llvm/include/llvm/Target/TargetOptions.h
@@ -130,19 +130,21 @@ namespace llvm {
HonorSignDependentRoundingFPMathOption(false), NoZerosInBSS(false),
GuaranteedTailCallOpt(false), StackSymbolOrdering(true),
EnableFastISel(false), EnableGlobalISel(false), UseInitArray(false),
- DisableIntegratedAS(false), RelaxELFRelocations(false),
- FunctionSections(false), DataSections(false),
- IgnoreXCOFFVisibility(false), XCOFFTracebackTable(true),
- UniqueSectionNames(true), UniqueBasicBlockSectionNames(false),
- TrapUnreachable(false), NoTrapAfterNoreturn(false), TLSSize(0),
- EmulatedTLS(false), ExplicitEmulatedTLS(false), EnableIPRA(false),
+ LowerGlobalDtorsViaCxaAtExit(false), DisableIntegratedAS(false),
+ RelaxELFRelocations(false), FunctionSections(false),
+ DataSections(false), IgnoreXCOFFVisibility(false),
+ XCOFFTracebackTable(true), UniqueSectionNames(true),
+ UniqueBasicBlockSectionNames(false), TrapUnreachable(false),
+ NoTrapAfterNoreturn(false), TLSSize(0), EmulatedTLS(false),
+ ExplicitEmulatedTLS(false), EnableIPRA(false),
EmitStackSizeSection(false), EnableMachineOutliner(false),
EnableMachineFunctionSplitter(false), SupportsDefaultOutlining(false),
EmitAddrsig(false), EmitCallSiteInfo(false),
SupportsDebugEntryValues(false), EnableDebugEntryValues(false),
ValueTrackingVariableLocations(false), ForceDwarfFrameSection(false),
XRayOmitFunctionIndex(false), DebugStrictDwarf(false),
- Hotpatch(false),
+ Hotpatch(false), PPCGenScalarMASSEntries(false), JMCInstrument(false),
+ EnableCFIFixup(false), MisExpect(false),
FPDenormalMode(DenormalMode::IEEE, DenormalMode::IEEE) {}
/// DisableFramePointerElim - This returns true if frame pointer elimination
@@ -245,6 +247,10 @@ namespace llvm {
/// constructors.
unsigned UseInitArray : 1;
+ /// Use __cxa_atexit to register global destructors; determines how
+ /// llvm.global_dtors is lowered.
+ unsigned LowerGlobalDtorsViaCxaAtExit : 1;
+
/// Disable the integrated assembler.
unsigned DisableIntegratedAS : 1;
@@ -345,6 +351,19 @@ namespace llvm {
/// Emit the hotpatch flag in CodeView debug.
unsigned Hotpatch : 1;
+ /// Enables scalar MASS conversions
+ unsigned PPCGenScalarMASSEntries : 1;
+
+ /// Enable JustMyCode instrumentation.
+ unsigned JMCInstrument : 1;
+
+ /// Enable the CFIFixup pass.
+ unsigned EnableCFIFixup : 1;
+
+ /// When set to true, enable MisExpect Diagnostics
+ /// By default, it is set to false
+ unsigned MisExpect : 1;
+
/// Name of the stack usage file (i.e., .su file) if user passes
/// -fstack-usage. If empty, it can be implied that -fstack-usage is not
/// passed on the command line.
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index d8ef7c49a5f9..47b686aca7b5 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -238,6 +238,16 @@ def SDTMaskedLoad: SDTypeProfile<1, 4, [ // masked load
SDTCisSameNumEltsAs<0, 3>
]>;
+def SDTMaskedGather : SDTypeProfile<1, 4, [
+ SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVec<2>, SDTCisPtrTy<3>, SDTCisVec<4>,
+ SDTCisSameNumEltsAs<0, 2>, SDTCisSameNumEltsAs<0, 4>
+]>;
+
+def SDTMaskedScatter : SDTypeProfile<0, 4, [
+ SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2>, SDTCisVec<3>,
+ SDTCisSameNumEltsAs<0, 1>, SDTCisSameNumEltsAs<0, 3>
+]>;
+
def SDTVecShuffle : SDTypeProfile<1, 2, [
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
]>;
@@ -365,6 +375,10 @@ def mul : SDNode<"ISD::MUL" , SDTIntBinOp,
[SDNPCommutative, SDNPAssociative]>;
def mulhs : SDNode<"ISD::MULHS" , SDTIntBinOp, [SDNPCommutative]>;
def mulhu : SDNode<"ISD::MULHU" , SDTIntBinOp, [SDNPCommutative]>;
+def avgfloors : SDNode<"ISD::AVGFLOORS" , SDTIntBinOp, [SDNPCommutative]>;
+def avgflooru : SDNode<"ISD::AVGFLOORU" , SDTIntBinOp, [SDNPCommutative]>;
+def avgceils : SDNode<"ISD::AVGCEILS" , SDTIntBinOp, [SDNPCommutative]>;
+def avgceilu : SDNode<"ISD::AVGCEILU" , SDTIntBinOp, [SDNPCommutative]>;
def abds : SDNode<"ISD::ABDS" , SDTIntBinOp, [SDNPCommutative]>;
def abdu : SDNode<"ISD::ABDU" , SDTIntBinOp, [SDNPCommutative]>;
def smullohi : SDNode<"ISD::SMUL_LOHI" , SDTIntBinHiLoOp, [SDNPCommutative]>;
@@ -648,6 +662,12 @@ def masked_st : SDNode<"ISD::MSTORE", SDTMaskedStore,
def masked_ld : SDNode<"ISD::MLOAD", SDTMaskedLoad,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def masked_gather : SDNode<"ISD::MGATHER", SDTMaskedGather,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+
+def masked_scatter : SDNode<"ISD::MSCATTER", SDTMaskedScatter,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
// Do not use ld, st directly. Use load, extload, sextload, zextload, store,
// and truncst (see below).
def ld : SDNode<"ISD::LOAD" , SDTLoad,
@@ -1624,6 +1644,124 @@ def atomic_load_64 :
let MemoryVT = i64;
}
+def nonext_masked_gather :
+ PatFrag<(ops node:$def, node:$pred, node:$ptr, node:$idx),
+ (masked_gather node:$def, node:$pred, node:$ptr, node:$idx), [{
+ return cast<MaskedGatherSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD;
+}]>;
+
+// Any extending masked gather fragments.
+def ext_masked_gather_i8 :
+ PatFrag<(ops node:$def, node:$pred, node:$ptr, node:$idx),
+ (masked_gather node:$def, node:$pred, node:$ptr, node:$idx), [{
+ auto MGN = cast<MaskedGatherSDNode>(N);
+ return MGN->getExtensionType() == ISD::EXTLOAD &&
+ MGN->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+def ext_masked_gather_i16 :
+ PatFrag<(ops node:$def, node:$pred, node:$ptr, node:$idx),
+ (masked_gather node:$def, node:$pred, node:$ptr, node:$idx), [{
+ auto MGN = cast<MaskedGatherSDNode>(N);
+ return MGN->getExtensionType() == ISD::EXTLOAD &&
+ MGN->getMemoryVT().getScalarType() == MVT::i16;
+}]>;
+def ext_masked_gather_i32 :
+ PatFrag<(ops node:$def, node:$pred, node:$ptr, node:$idx),
+ (masked_gather node:$def, node:$pred, node:$ptr, node:$idx), [{
+ auto MGN = cast<MaskedGatherSDNode>(N);
+ return MGN->getExtensionType() == ISD::EXTLOAD &&
+ MGN->getMemoryVT().getScalarType() == MVT::i32;
+}]>;
+
+// Sign extending masked gather fragments.
+def sext_masked_gather_i8 :
+ PatFrag<(ops node:$def, node:$pred, node:$ptr, node:$idx),
+ (masked_gather node:$def, node:$pred, node:$ptr, node:$idx), [{
+ auto MGN = cast<MaskedGatherSDNode>(N);
+ return MGN->getExtensionType() == ISD::SEXTLOAD &&
+ MGN->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+def sext_masked_gather_i16 :
+ PatFrag<(ops node:$def, node:$pred, node:$ptr, node:$idx),
+ (masked_gather node:$def, node:$pred, node:$ptr, node:$idx), [{
+ auto MGN = cast<MaskedGatherSDNode>(N);
+ return MGN->getExtensionType() == ISD::SEXTLOAD &&
+ MGN->getMemoryVT().getScalarType() == MVT::i16;
+}]>;
+def sext_masked_gather_i32 :
+ PatFrag<(ops node:$def, node:$pred, node:$ptr, node:$idx),
+ (masked_gather node:$def, node:$pred, node:$ptr, node:$idx), [{
+ auto MGN = cast<MaskedGatherSDNode>(N);
+ return MGN->getExtensionType() == ISD::SEXTLOAD &&
+ MGN->getMemoryVT().getScalarType() == MVT::i32;
+}]>;
+
+// Zero extending masked gather fragments.
+def zext_masked_gather_i8 :
+ PatFrag<(ops node:$def, node:$pred, node:$ptr, node:$idx),
+ (masked_gather node:$def, node:$pred, node:$ptr, node:$idx), [{
+ auto MGN = cast<MaskedGatherSDNode>(N);
+ return MGN->getExtensionType() == ISD::ZEXTLOAD &&
+ MGN->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+def zext_masked_gather_i16 :
+ PatFrag<(ops node:$def, node:$pred, node:$ptr, node:$idx),
+ (masked_gather node:$def, node:$pred, node:$ptr, node:$idx), [{
+ auto MGN = cast<MaskedGatherSDNode>(N);
+ return MGN->getExtensionType() == ISD::ZEXTLOAD &&
+ MGN->getMemoryVT().getScalarType() == MVT::i16;
+}]>;
+def zext_masked_gather_i32 :
+ PatFrag<(ops node:$def, node:$pred, node:$ptr, node:$idx),
+ (masked_gather node:$def, node:$pred, node:$ptr, node:$idx), [{
+ auto MGN = cast<MaskedGatherSDNode>(N);
+ return MGN->getExtensionType() == ISD::ZEXTLOAD &&
+ MGN->getMemoryVT().getScalarType() == MVT::i32;
+}]>;
+
+// Any/Zero extending masked gather fragments.
+def azext_masked_gather_i8 :
+ PatFrags<(ops node:$def, node:$pred, node:$ptr, node:$idx),
+ [(ext_masked_gather_i8 node:$def, node:$pred, node:$ptr, node:$idx),
+ (zext_masked_gather_i8 node:$def, node:$pred, node:$ptr, node:$idx)]>;
+def azext_masked_gather_i16 :
+ PatFrags<(ops node:$def, node:$pred, node:$ptr, node:$idx),
+ [(ext_masked_gather_i16 node:$def, node:$pred, node:$ptr, node:$idx),
+ (zext_masked_gather_i16 node:$def, node:$pred, node:$ptr, node:$idx)]>;
+def azext_masked_gather_i32 :
+ PatFrags<(ops node:$def, node:$pred, node:$ptr, node:$idx),
+ [(ext_masked_gather_i32 node:$def, node:$pred, node:$ptr, node:$idx),
+ (zext_masked_gather_i32 node:$def, node:$pred, node:$ptr, node:$idx)]>;
+
+def nontrunc_masked_scatter :
+ PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx),
+ (masked_scatter node:$val, node:$pred, node:$ptr, node:$idx), [{
+ return !cast<MaskedScatterSDNode>(N)->isTruncatingStore();
+}]>;
+
+// Truncating masked scatter fragments.
+def trunc_masked_scatter_i8 :
+ PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx),
+ (masked_scatter node:$val, node:$pred, node:$ptr, node:$idx), [{
+ auto MSN = cast<MaskedScatterSDNode>(N);
+ return MSN->isTruncatingStore() &&
+ MSN->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+def trunc_masked_scatter_i16 :
+ PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx),
+ (masked_scatter node:$val, node:$pred, node:$ptr, node:$idx), [{
+ auto MSN = cast<MaskedScatterSDNode>(N);
+ return MSN->isTruncatingStore() &&
+ MSN->getMemoryVT().getScalarType() == MVT::i16;
+}]>;
+def trunc_masked_scatter_i32 :
+ PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx),
+ (masked_scatter node:$val, node:$pred, node:$ptr, node:$idx), [{
+ auto MSN = cast<MaskedScatterSDNode>(N);
+ return MSN->isTruncatingStore() &&
+ MSN->getMemoryVT().getScalarType() == MVT::i32;
+}]>;
+
//===----------------------------------------------------------------------===//
// Selection DAG Pattern Support.
//
diff --git a/llvm/include/llvm/Testing/Support/SupportHelpers.h b/llvm/include/llvm/Testing/Support/SupportHelpers.h
index 2419fc95d817..b1c59cf97f7f 100644
--- a/llvm/include/llvm/Testing/Support/SupportHelpers.h
+++ b/llvm/include/llvm/Testing/Support/SupportHelpers.h
@@ -77,7 +77,7 @@ public:
bool MatchAndExplain(const llvm::Optional<T> &Input,
testing::MatchResultListener *L) const override {
- return Input && ValueMatcher.MatchAndExplain(Input.getValue(), L);
+ return Input && ValueMatcher.MatchAndExplain(*Input, L);
}
void DescribeTo(std::ostream *OS) const override {
@@ -238,6 +238,12 @@ public:
}
}
+ TempFile(const TempFile &) = delete;
+ TempFile &operator=(const TempFile &) = delete;
+
+ TempFile(TempFile &&) = default;
+ TempFile &operator=(TempFile &&) = default;
+
/// The path to the file.
StringRef path() const { return Path; }
};
diff --git a/llvm/include/llvm/TextAPI/Symbol.h b/llvm/include/llvm/TextAPI/Symbol.h
index dfc84908bba2..1c25295b299d 100644
--- a/llvm/include/llvm/TextAPI/Symbol.h
+++ b/llvm/include/llvm/TextAPI/Symbol.h
@@ -11,7 +11,6 @@
#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/Error.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TextAPI/ArchitectureSet.h"
#include "llvm/TextAPI/Target.h"
diff --git a/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h b/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h
index 072ccf7320e8..3931c9c55c07 100644
--- a/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h
+++ b/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h
@@ -18,10 +18,12 @@
#define LLVM_TRANSFORMS_AGGRESSIVEINSTCOMBINE_AGGRESSIVEINSTCOMBINE_H
#include "llvm/IR/PassManager.h"
-#include "llvm/Pass.h"
namespace llvm {
+class Function;
+class FunctionPass;
+
class AggressiveInstCombinePass
: public PassInfoMixin<AggressiveInstCombinePass> {
diff --git a/llvm/include/llvm/Transforms/Coroutines.h b/llvm/include/llvm/Transforms/Coroutines.h
deleted file mode 100644
index f68ef705fdef..000000000000
--- a/llvm/include/llvm/Transforms/Coroutines.h
+++ /dev/null
@@ -1,37 +0,0 @@
-//===-- Coroutines.h - Coroutine Transformations ----------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// Declare accessor functions for coroutine lowering passes.
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TRANSFORMS_COROUTINES_H
-#define LLVM_TRANSFORMS_COROUTINES_H
-
-namespace llvm {
-
-class Pass;
-class PassManagerBuilder;
-
-/// Add all coroutine passes to appropriate extension points.
-void addCoroutinePassesToExtensionPoints(PassManagerBuilder &Builder);
-
-/// Lower coroutine intrinsics that are not needed by later passes.
-Pass *createCoroEarlyLegacyPass();
-
-/// Split up coroutines into multiple functions driving their state machines.
-Pass *createCoroSplitLegacyPass(bool IsOptimizing = false);
-
-/// Analyze coroutines use sites, devirtualize resume/destroy calls and elide
-/// heap allocation for coroutine frame where possible.
-Pass *createCoroElideLegacyPass();
-
-/// Lower all remaining coroutine intrinsics.
-Pass *createCoroCleanupLegacyPass();
-
-}
-
-#endif
diff --git a/llvm/include/llvm/Transforms/Coroutines/CoroCleanup.h b/llvm/include/llvm/Transforms/Coroutines/CoroCleanup.h
index 7ecdc050335d..3000a38258f4 100644
--- a/llvm/include/llvm/Transforms/Coroutines/CoroCleanup.h
+++ b/llvm/include/llvm/Transforms/Coroutines/CoroCleanup.h
@@ -18,10 +18,10 @@
namespace llvm {
-class Function;
+class Module;
struct CoroCleanupPass : PassInfoMixin<CoroCleanupPass> {
- PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
static bool isRequired() { return true; }
};
} // end namespace llvm
diff --git a/llvm/include/llvm/Transforms/Coroutines/CoroConditionalWrapper.h b/llvm/include/llvm/Transforms/Coroutines/CoroConditionalWrapper.h
new file mode 100644
index 000000000000..ea19ec533c4d
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Coroutines/CoroConditionalWrapper.h
@@ -0,0 +1,30 @@
+//===---- CoroConditionalWrapper.h ------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_COROUTINES_COROCONDITIONALWRAPPER_H
+#define LLVM_TRANSFORMS_COROUTINES_COROCONDITIONALWRAPPER_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class Module;
+
+// Only runs passes in the contained pass manager if the module contains any
+// coroutine intrinsic declarations.
+struct CoroConditionalWrapper : PassInfoMixin<CoroConditionalWrapper> {
+ CoroConditionalWrapper(ModulePassManager &&);
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+ static bool isRequired() { return true; }
+
+private:
+ ModulePassManager PM;
+};
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_COROUTINES_COROCONDITIONALWRAPPER_H
diff --git a/llvm/include/llvm/Transforms/Coroutines/CoroEarly.h b/llvm/include/llvm/Transforms/Coroutines/CoroEarly.h
index 3f5ec2abd172..d55dcc6dfa6d 100644
--- a/llvm/include/llvm/Transforms/Coroutines/CoroEarly.h
+++ b/llvm/include/llvm/Transforms/Coroutines/CoroEarly.h
@@ -21,10 +21,10 @@
namespace llvm {
-class Function;
+class Module;
struct CoroEarlyPass : PassInfoMixin<CoroEarlyPass> {
- PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
static bool isRequired() { return true; }
};
} // end namespace llvm
diff --git a/llvm/include/llvm/Transforms/IPO.h b/llvm/include/llvm/Transforms/IPO.h
index 67b9a93c47b2..6b7d4f4821f0 100644
--- a/llvm/include/llvm/Transforms/IPO.h
+++ b/llvm/include/llvm/Transforms/IPO.h
@@ -152,13 +152,6 @@ ModulePass *createDeadArgEliminationPass();
ModulePass *createDeadArgHackingPass();
//===----------------------------------------------------------------------===//
-/// createArgumentPromotionPass - This pass promotes "by reference" arguments to
-/// be passed by value if the number of elements passed is smaller or
-/// equal to maxElements (maxElements == 0 means always promote).
-///
-Pass *createArgumentPromotionPass(unsigned maxElements = 3);
-
-//===----------------------------------------------------------------------===//
/// createOpenMPOptLegacyPass - OpenMP specific optimizations.
Pass *createOpenMPOptCGSCCLegacyPass();
diff --git a/llvm/include/llvm/Transforms/IPO/AlwaysInliner.h b/llvm/include/llvm/Transforms/IPO/AlwaysInliner.h
index 78b2f909f1c9..252cfd4dc5f3 100644
--- a/llvm/include/llvm/Transforms/IPO/AlwaysInliner.h
+++ b/llvm/include/llvm/Transforms/IPO/AlwaysInliner.h
@@ -15,10 +15,12 @@
#define LLVM_TRANSFORMS_IPO_ALWAYSINLINER_H
#include "llvm/IR/PassManager.h"
-#include "llvm/Pass.h"
namespace llvm {
+class Module;
+class Pass;
+
/// Inlines functions marked as "always_inline".
///
/// Note that this does not inline call sites marked as always_inline and does
diff --git a/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h b/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h
index 225def99678a..3865f098b8de 100644
--- a/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h
+++ b/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h
@@ -14,7 +14,6 @@
#include "llvm/IR/PassManager.h"
namespace llvm {
-class TargetTransformInfo;
/// Argument promotion pass.
///
@@ -25,10 +24,7 @@ class ArgumentPromotionPass : public PassInfoMixin<ArgumentPromotionPass> {
unsigned MaxElements;
public:
- ArgumentPromotionPass(unsigned MaxElements = 3u) : MaxElements(MaxElements) {}
-
- /// Checks if a type could have padding bytes.
- static bool isDenselyPacked(Type *type, const DataLayout &DL);
+ ArgumentPromotionPass(unsigned MaxElements = 2u) : MaxElements(MaxElements) {}
PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
LazyCallGraph &CG, CGSCCUpdateResult &UR);
diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 7eee16f71d64..17e29695ab73 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -116,15 +116,24 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/AbstractCallSite.h"
#include "llvm/IR/ConstantRange.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/Support/Alignment.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/DOTGraphTraits.h"
#include "llvm/Support/TimeProfiler.h"
#include "llvm/Transforms/Utils/CallGraphUpdater.h"
+#include <map>
+
namespace llvm {
+class DataLayout;
+class LLVMContext;
+class Pass;
+template <typename Fn> class function_ref;
struct AADepGraphNode;
struct AADepGraph;
struct Attributor;
@@ -140,6 +149,24 @@ class Function;
/// Abstract Attribute helper functions.
namespace AA {
+/// Flags to distinguish intra-procedural queries from *potentially*
+/// inter-procedural queries. Not that information can be valid for both and
+/// therefore both bits might be set.
+enum ValueScope : uint8_t {
+ Intraprocedural = 1,
+ Interprocedural = 2,
+};
+
+struct ValueAndContext : public std::pair<Value *, const Instruction *> {
+ using Base = std::pair<Value *, const Instruction *>;
+ ValueAndContext(const Base &B) : Base(B) {}
+ ValueAndContext(Value &V, const Instruction *CtxI) : Base(&V, CtxI) {}
+ ValueAndContext(Value &V, const Instruction &CtxI) : Base(&V, &CtxI) {}
+
+ Value *getValue() const { return this->first; }
+ const Instruction *getCtxI() const { return this->second; }
+};
+
/// Return true if \p I is a `nosync` instruction. Use generic reasoning and
/// potentially the corresponding AANoSync.
bool isNoSyncInst(Attributor &A, const Instruction &I,
@@ -147,18 +174,20 @@ bool isNoSyncInst(Attributor &A, const Instruction &I,
/// Return true if \p V is dynamically unique, that is, there are no two
/// "instances" of \p V at runtime with different values.
+/// Note: If \p ForAnalysisOnly is set we only check that the Attributor will
+/// never use \p V to represent two "instances" not that \p V could not
+/// technically represent them.
bool isDynamicallyUnique(Attributor &A, const AbstractAttribute &QueryingAA,
- const Value &V);
+ const Value &V, bool ForAnalysisOnly = true);
/// Return true if \p V is a valid value in \p Scope, that is a constant or an
/// instruction/argument of \p Scope.
bool isValidInScope(const Value &V, const Function *Scope);
-/// Return true if \p V is a valid value at position \p CtxI, that is a
-/// constant, an argument of the same function as \p CtxI, or an instruction in
-/// that function that dominates \p CtxI.
-bool isValidAtPosition(const Value &V, const Instruction &CtxI,
- InformationCache &InfoCache);
+/// Return true if the value of \p VAC is a valid at the position of \p VAC,
+/// that is a constant, an argument of the same function, or an instruction in
+/// that function that dominates the position.
+bool isValidAtPosition(const ValueAndContext &VAC, InformationCache &InfoCache);
/// Try to convert \p V to type \p Ty without introducing new instructions. If
/// this is not possible return `nullptr`. Note: this function basically knows
@@ -192,11 +221,29 @@ bool getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr,
SmallVectorImpl<Value *> &Objects,
const AbstractAttribute &QueryingAA,
const Instruction *CtxI,
- bool Intraprocedural = false);
+ bool &UsedAssumedInformation,
+ AA::ValueScope VS = Interprocedural);
+
+/// Collect all potential values \p LI could read into \p PotentialValues. That
+/// is, the only values read by \p LI are assumed to be known and all are in
+/// \p PotentialValues. \p PotentialValueOrigins will contain all the
+/// instructions that might have put a potential value into \p PotentialValues.
+/// Dependences onto \p QueryingAA are properly tracked, \p
+/// UsedAssumedInformation will inform the caller if assumed information was
+/// used.
+///
+/// \returns True if the assumed potential copies are all in \p PotentialValues,
+/// false if something went wrong and the copies could not be
+/// determined.
+bool getPotentiallyLoadedValues(
+ Attributor &A, LoadInst &LI, SmallSetVector<Value *, 4> &PotentialValues,
+ SmallSetVector<Instruction *, 4> &PotentialValueOrigins,
+ const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation,
+ bool OnlyExact = false);
/// Collect all potential values of the one stored by \p SI into
/// \p PotentialCopies. That is, the only copies that were made via the
-/// store are assumed to be known and all in \p PotentialCopies. Dependences
+/// store are assumed to be known and all are in \p PotentialCopies. Dependences
/// onto \p QueryingAA are properly tracked, \p UsedAssumedInformation will
/// inform the caller if assumed information was used.
///
@@ -205,7 +252,8 @@ bool getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr,
/// determined.
bool getPotentialCopiesOfStoredValue(
Attributor &A, StoreInst &SI, SmallSetVector<Value *, 4> &PotentialCopies,
- const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation);
+ const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation,
+ bool OnlyExact = false);
/// Return true if \p IRP is readonly. This will query respective AAs that
/// deduce the information and introduce dependences for \p QueryingAA.
@@ -237,6 +285,26 @@ bool isPotentiallyReachable(
} // namespace AA
+template <>
+struct DenseMapInfo<AA::ValueAndContext>
+ : public DenseMapInfo<AA::ValueAndContext::Base> {
+ using Base = DenseMapInfo<AA::ValueAndContext::Base>;
+ static inline AA::ValueAndContext getEmptyKey() {
+ return Base::getEmptyKey();
+ }
+ static inline AA::ValueAndContext getTombstoneKey() {
+ return Base::getTombstoneKey();
+ }
+ static unsigned getHashValue(const AA::ValueAndContext &VAC) {
+ return Base::getHashValue(VAC);
+ }
+
+ static bool isEqual(const AA::ValueAndContext &LHS,
+ const AA::ValueAndContext &RHS) {
+ return Base::isEqual(LHS, RHS);
+ }
+};
+
/// The value passed to the line option that defines the maximal initialization
/// chain length.
extern unsigned MaxInitializationChainLength;
@@ -1033,6 +1101,10 @@ struct InformationCache {
return FI.CalledViaMustTail || FI.ContainsMustTailCall;
}
+ bool isOnlyUsedByAssume(const Instruction &I) const {
+ return AssumeOnlyValues.contains(&I);
+ }
+
/// Return the analysis result from a pass \p AP for function \p F.
template <typename AP>
typename AP::Result *getAnalysisResultForFunction(const Function &F) {
@@ -1125,6 +1197,9 @@ private:
/// A map with knowledge retained in `llvm.assume` instructions.
RetainedKnowledgeMap KnowledgeMap;
+ /// A container for all instructions that are only used by `llvm.assume`.
+ SetVector<const Instruction *> AssumeOnlyValues;
+
/// Getters for analysis.
AnalysisGetter &AG;
@@ -1143,6 +1218,53 @@ private:
friend struct Attributor;
};
+/// Configuration for the Attributor.
+struct AttributorConfig {
+
+ AttributorConfig(CallGraphUpdater &CGUpdater) : CGUpdater(CGUpdater) {}
+
+ /// Is the user of the Attributor a module pass or not. This determines what
+ /// IR we can look at and modify. If it is a module pass we might deduce facts
+ /// outside the initial function set and modify functions outside that set,
+ /// but only as part of the optimization of the functions in the initial
+ /// function set. For CGSCC passes we can look at the IR of the module slice
+ /// but never run any deduction, or perform any modification, outside the
+ /// initial function set (which we assume is the SCC).
+ bool IsModulePass = true;
+
+ /// Flag to determine if we can delete functions or keep dead ones around.
+ bool DeleteFns = true;
+
+ /// Flag to determine if we rewrite function signatures.
+ bool RewriteSignatures = true;
+
+ /// Flag to determine if we want to initialize all default AAs for an internal
+ /// function marked live.
+ /// TODO: This should probably be a callback, or maybe
+ /// identifyDefaultAbstractAttributes should be virtual, something to allow
+ /// customizable lazy initialization for internal functions.
+ bool DefaultInitializeLiveInternals = true;
+
+ /// Helper to update an underlying call graph and to delete functions.
+ CallGraphUpdater &CGUpdater;
+
+ /// If not null, a set limiting the attribute opportunities.
+ DenseSet<const char *> *Allowed = nullptr;
+
+ /// Maximum number of iterations to run until fixpoint.
+ Optional<unsigned> MaxFixpointIterations = None;
+
+ /// A callback function that returns an ORE object from a Function pointer.
+ ///{
+ using OptimizationRemarkGetter =
+ function_ref<OptimizationRemarkEmitter &(Function *)>;
+ OptimizationRemarkGetter OREGetter = nullptr;
+ ///}
+
+ /// The name of the pass running the attributor, used to emit remarks.
+ const char *PassName = nullptr;
+};
+
/// The fixpoint analysis framework that orchestrates the attribute deduction.
///
/// The Attributor provides a general abstract analysis framework (guided
@@ -1172,52 +1294,17 @@ private:
/// described in the file comment.
struct Attributor {
- using OptimizationRemarkGetter =
- function_ref<OptimizationRemarkEmitter &(Function *)>;
-
/// Constructor
///
/// \param Functions The set of functions we are deriving attributes for.
/// \param InfoCache Cache to hold various information accessible for
/// the abstract attributes.
- /// \param CGUpdater Helper to update an underlying call graph.
- /// \param Allowed If not null, a set limiting the attribute opportunities.
- /// \param DeleteFns Whether to delete functions.
- /// \param RewriteSignatures Whether to rewrite function signatures.
+ /// \param Configuration The Attributor configuration which determines what
+ /// generic features to use.
Attributor(SetVector<Function *> &Functions, InformationCache &InfoCache,
- CallGraphUpdater &CGUpdater,
- DenseSet<const char *> *Allowed = nullptr, bool DeleteFns = true,
- bool RewriteSignatures = true)
+ AttributorConfig Configuration)
: Allocator(InfoCache.Allocator), Functions(Functions),
- InfoCache(InfoCache), CGUpdater(CGUpdater), Allowed(Allowed),
- DeleteFns(DeleteFns), RewriteSignatures(RewriteSignatures),
- MaxFixpointIterations(None), OREGetter(None), PassName("") {}
-
- /// Constructor
- ///
- /// \param Functions The set of functions we are deriving attributes for.
- /// \param InfoCache Cache to hold various information accessible for
- /// the abstract attributes.
- /// \param CGUpdater Helper to update an underlying call graph.
- /// \param Allowed If not null, a set limiting the attribute opportunities.
- /// \param DeleteFns Whether to delete functions
- /// \param RewriteSignatures Whether to rewrite function signatures.
- /// \param MaxFixpointIterations Maximum number of iterations to run until
- /// fixpoint.
- /// \param OREGetter A callback function that returns an ORE object from a
- /// Function pointer.
- /// \param PassName The name of the pass emitting remarks.
- Attributor(SetVector<Function *> &Functions, InformationCache &InfoCache,
- CallGraphUpdater &CGUpdater, DenseSet<const char *> *Allowed,
- bool DeleteFns, bool RewriteSignatures,
- Optional<unsigned> MaxFixpointIterations,
- OptimizationRemarkGetter OREGetter, const char *PassName)
- : Allocator(InfoCache.Allocator), Functions(Functions),
- InfoCache(InfoCache), CGUpdater(CGUpdater), Allowed(Allowed),
- DeleteFns(DeleteFns), RewriteSignatures(RewriteSignatures),
- MaxFixpointIterations(MaxFixpointIterations),
- OREGetter(Optional<OptimizationRemarkGetter>(OREGetter)),
- PassName(PassName) {}
+ InfoCache(InfoCache), Configuration(Configuration) {}
~Attributor();
@@ -1301,11 +1388,15 @@ struct Attributor {
registerAA(AA);
// For now we ignore naked and optnone functions.
- bool Invalidate = Allowed && !Allowed->count(&AAType::ID);
- const Function *FnScope = IRP.getAnchorScope();
- if (FnScope)
- Invalidate |= FnScope->hasFnAttribute(Attribute::Naked) ||
- FnScope->hasFnAttribute(Attribute::OptimizeNone);
+ bool Invalidate =
+ Configuration.Allowed && !Configuration.Allowed->count(&AAType::ID);
+ const Function *AnchorFn = IRP.getAnchorScope();
+ if (AnchorFn) {
+ Invalidate |=
+ AnchorFn->hasFnAttribute(Attribute::Naked) ||
+ AnchorFn->hasFnAttribute(Attribute::OptimizeNone) ||
+ (!isModulePass() && !getInfoCache().isInModuleSlice(*AnchorFn));
+ }
// Avoid too many nested initializations to prevent a stack overflow.
Invalidate |= InitializationChainLength > MaxInitializationChainLength;
@@ -1325,15 +1416,12 @@ struct Attributor {
--InitializationChainLength;
}
- // Initialize and update is allowed for code outside of the current function
- // set, but only if it is part of module slice we are allowed to look at.
- // Only exception is AAIsDeadFunction whose initialization is prevented
- // directly, since we don't to compute it twice.
- if (FnScope && !Functions.count(const_cast<Function *>(FnScope))) {
- if (!getInfoCache().isInModuleSlice(*FnScope)) {
- AA.getState().indicatePessimisticFixpoint();
- return AA;
- }
+ // We update only AAs associated with functions in the Functions set or
+ // call sites of them.
+ if ((AnchorFn && !Functions.count(const_cast<Function *>(AnchorFn))) &&
+ !Functions.count(IRP.getAssociatedFunction())) {
+ AA.getState().indicatePessimisticFixpoint();
+ return AA;
}
// If this is queried in the manifest stage, we force the AA to indicate
@@ -1443,10 +1531,7 @@ struct Attributor {
InformationCache &getInfoCache() { return InfoCache; }
/// Return true if this is a module pass, false otherwise.
- bool isModulePass() const {
- return !Functions.empty() &&
- Functions.size() == Functions.front()->getParent()->size();
- }
+ bool isModulePass() const { return Configuration.IsModulePass; }
/// Return true if we derive attributes for \p Fn
bool isRunOn(Function &Fn) const {
@@ -1481,7 +1566,8 @@ struct Attributor {
assert(F.hasLocalLinkage() &&
"Only local linkage is assumed dead initially.");
- identifyDefaultAbstractAttributes(const_cast<Function &>(F));
+ if (Configuration.DefaultInitializeLiveInternals)
+ identifyDefaultAbstractAttributes(const_cast<Function &>(F));
}
/// Helper function to remove callsite.
@@ -1489,7 +1575,7 @@ struct Attributor {
if (!CI)
return;
- CGUpdater.removeCallSite(*CI);
+ Configuration.CGUpdater.removeCallSite(*CI);
}
/// Record that \p U is to be replaces with \p NV after information was
@@ -1505,11 +1591,17 @@ struct Attributor {
return true;
}
- /// Helper function to replace all uses of \p V with \p NV. Return true if
- /// there is any change. The flag \p ChangeDroppable indicates if dropppable
- /// uses should be changed too.
- bool changeValueAfterManifest(Value &V, Value &NV,
- bool ChangeDroppable = true) {
+ /// Helper function to replace all uses associated with \p IRP with \p NV.
+ /// Return true if there is any change. The flag \p ChangeDroppable indicates
+ /// if dropppable uses should be changed too.
+ bool changeAfterManifest(const IRPosition IRP, Value &NV,
+ bool ChangeDroppable = true) {
+ if (IRP.getPositionKind() == IRPosition::IRP_CALL_SITE_ARGUMENT) {
+ auto *CB = cast<CallBase>(IRP.getCtxI());
+ return changeUseAfterManifest(
+ CB->getArgOperandUse(IRP.getCallSiteArgNo()), NV);
+ }
+ Value &V = IRP.getAssociatedValue();
auto &Entry = ToBeChangedValues[&V];
Value *&CurNV = Entry.first;
if (CurNV && (CurNV->stripPointerCasts() == NV.stripPointerCasts() ||
@@ -1532,7 +1624,7 @@ struct Attributor {
/// is used, e.g., to replace \p II with a call, after information was
/// manifested.
void registerInvokeWithDeadSuccessor(InvokeInst &II) {
- InvokeWithDeadSuccessor.push_back(&II);
+ InvokeWithDeadSuccessor.insert(&II);
}
/// Record that \p I is deleted after information was manifested. This also
@@ -1551,7 +1643,9 @@ struct Attributor {
/// Record that \p F is deleted after information was manifested.
void deleteAfterManifest(Function &F) {
- if (DeleteFns)
+ errs() << "Delete " << F.getName() << " : " << (Configuration.DeleteFns)
+ << "\n";
+ if (Configuration.DeleteFns)
ToBeDeletedFunctions.insert(&F);
}
@@ -1668,6 +1762,7 @@ public:
const AbstractAttribute &QueryingAA, const Value &V,
bool CheckBBLivenessOnly = false,
DepClassTy LivenessDepClass = DepClassTy::OPTIONAL,
+ bool IgnoreDroppableUses = true,
function_ref<bool(const Use &OldU, const Use &NewU)>
EquivalentUseCB = nullptr);
@@ -1685,37 +1780,41 @@ public:
template <typename RemarkKind, typename RemarkCallBack>
void emitRemark(Instruction *I, StringRef RemarkName,
RemarkCallBack &&RemarkCB) const {
- if (!OREGetter)
+ if (!Configuration.OREGetter)
return;
Function *F = I->getFunction();
- auto &ORE = OREGetter.getValue()(F);
+ auto &ORE = Configuration.OREGetter(F);
if (RemarkName.startswith("OMP"))
ORE.emit([&]() {
- return RemarkCB(RemarkKind(PassName, RemarkName, I))
+ return RemarkCB(RemarkKind(Configuration.PassName, RemarkName, I))
<< " [" << RemarkName << "]";
});
else
- ORE.emit([&]() { return RemarkCB(RemarkKind(PassName, RemarkName, I)); });
+ ORE.emit([&]() {
+ return RemarkCB(RemarkKind(Configuration.PassName, RemarkName, I));
+ });
}
/// Emit a remark on a function.
template <typename RemarkKind, typename RemarkCallBack>
void emitRemark(Function *F, StringRef RemarkName,
RemarkCallBack &&RemarkCB) const {
- if (!OREGetter)
+ if (!Configuration.OREGetter)
return;
- auto &ORE = OREGetter.getValue()(F);
+ auto &ORE = Configuration.OREGetter(F);
if (RemarkName.startswith("OMP"))
ORE.emit([&]() {
- return RemarkCB(RemarkKind(PassName, RemarkName, F))
+ return RemarkCB(RemarkKind(Configuration.PassName, RemarkName, F))
<< " [" << RemarkName << "]";
});
else
- ORE.emit([&]() { return RemarkCB(RemarkKind(PassName, RemarkName, F)); });
+ ORE.emit([&]() {
+ return RemarkCB(RemarkKind(Configuration.PassName, RemarkName, F));
+ });
}
/// Helper struct used in the communication between an abstract attribute (AA)
@@ -1824,23 +1923,24 @@ public:
/// This method will evaluate \p Pred on call sites and return
/// true if \p Pred holds in every call sites. However, this is only possible
/// all call sites are known, hence the function has internal linkage.
- /// If true is returned, \p AllCallSitesKnown is set if all possible call
- /// sites of the function have been visited.
+ /// If true is returned, \p UsedAssumedInformation is set if assumed
+ /// information was used to skip or simplify potential call sites.
bool checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred,
const AbstractAttribute &QueryingAA,
- bool RequireAllCallSites, bool &AllCallSitesKnown);
+ bool RequireAllCallSites,
+ bool &UsedAssumedInformation);
/// Check \p Pred on all call sites of \p Fn.
///
/// This method will evaluate \p Pred on call sites and return
/// true if \p Pred holds in every call sites. However, this is only possible
/// all call sites are known, hence the function has internal linkage.
- /// If true is returned, \p AllCallSitesKnown is set if all possible call
- /// sites of the function have been visited.
+ /// If true is returned, \p UsedAssumedInformation is set if assumed
+ /// information was used to skip or simplify potential call sites.
bool checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred,
const Function &Fn, bool RequireAllCallSites,
const AbstractAttribute *QueryingAA,
- bool &AllCallSitesKnown);
+ bool &UsedAssumedInformation);
/// Check \p Pred on all values potentially returned by \p F.
///
@@ -1859,6 +1959,19 @@ public:
bool checkForAllReturnedValues(function_ref<bool(Value &)> Pred,
const AbstractAttribute &QueryingAA);
+ /// Check \p Pred on all instructions in \p Fn with an opcode present in
+ /// \p Opcodes.
+ ///
+ /// This method will evaluate \p Pred on all instructions with an opcode
+ /// present in \p Opcode and return true if \p Pred holds on all of them.
+ bool checkForAllInstructions(function_ref<bool(Instruction &)> Pred,
+ const Function *Fn,
+ const AbstractAttribute &QueryingAA,
+ const ArrayRef<unsigned> &Opcodes,
+ bool &UsedAssumedInformation,
+ bool CheckBBLivenessOnly = false,
+ bool CheckPotentiallyDead = false);
+
/// Check \p Pred on all instructions with an opcode present in \p Opcodes.
///
/// This method will evaluate \p Pred on all instructions with an opcode
@@ -1987,7 +2100,7 @@ private:
/// (\see registerFunctionSignatureRewrite) and return Changed if the module
/// was altered.
ChangeStatus
- rewriteFunctionSignatures(SmallPtrSetImpl<Function *> &ModifiedFns);
+ rewriteFunctionSignatures(SmallSetVector<Function *, 8> &ModifiedFns);
/// Check if the Attribute \p AA should be seeded.
/// See getOrCreateAAFor.
@@ -2011,15 +2124,12 @@ private:
/// The information cache that holds pre-processed (LLVM-IR) information.
InformationCache &InfoCache;
- /// Helper to update an underlying call graph.
- CallGraphUpdater &CGUpdater;
-
/// Abstract Attribute dependency graph
AADepGraph DG;
/// Set of functions for which we modified the content such that it might
/// impact the call graph.
- SmallPtrSet<Function *, 8> CGModifiedFunctions;
+ SmallSetVector<Function *, 8> CGModifiedFunctions;
/// Information about a dependence. If FromAA is changed ToAA needs to be
/// updated as well.
@@ -2039,34 +2149,22 @@ private:
using DependenceVector = SmallVector<DepInfo, 8>;
SmallVector<DependenceVector *, 16> DependenceStack;
- /// If not null, a set limiting the attribute opportunities.
- const DenseSet<const char *> *Allowed;
-
- /// Whether to delete functions.
- const bool DeleteFns;
-
- /// Whether to rewrite signatures.
- const bool RewriteSignatures;
-
- /// Maximum number of fixedpoint iterations.
- Optional<unsigned> MaxFixpointIterations;
-
/// A set to remember the functions we already assume to be live and visited.
DenseSet<const Function *> VisitedFunctions;
/// Uses we replace with a new value after manifest is done. We will remove
/// then trivially dead instructions as well.
- DenseMap<Use *, Value *> ToBeChangedUses;
+ SmallMapVector<Use *, Value *, 32> ToBeChangedUses;
/// Values we replace with a new value after manifest is done. We will remove
/// then trivially dead instructions as well.
- DenseMap<Value *, std::pair<Value *, bool>> ToBeChangedValues;
+ SmallMapVector<Value *, std::pair<Value *, bool>, 32> ToBeChangedValues;
/// Instructions we replace with `unreachable` insts after manifest is done.
- SmallDenseSet<WeakVH, 16> ToBeChangedToUnreachableInsts;
+ SmallSetVector<WeakVH, 16> ToBeChangedToUnreachableInsts;
/// Invoke instructions with at least a single dead successor block.
- SmallVector<WeakVH, 16> InvokeWithDeadSuccessor;
+ SmallSetVector<WeakVH, 16> InvokeWithDeadSuccessor;
/// A flag that indicates which stage of the process we are in. Initially, the
/// phase is SEEDING. Phase is changed in `Attributor::run()`
@@ -2083,21 +2181,18 @@ private:
/// Functions, blocks, and instructions we delete after manifest is done.
///
///{
- SmallPtrSet<Function *, 8> ToBeDeletedFunctions;
- SmallPtrSet<BasicBlock *, 8> ToBeDeletedBlocks;
SmallPtrSet<BasicBlock *, 8> ManifestAddedBlocks;
- SmallDenseSet<WeakVH, 8> ToBeDeletedInsts;
+ SmallSetVector<Function *, 8> ToBeDeletedFunctions;
+ SmallSetVector<BasicBlock *, 8> ToBeDeletedBlocks;
+ SmallSetVector<WeakVH, 8> ToBeDeletedInsts;
///}
- /// Callback to get an OptimizationRemarkEmitter from a Function *.
- Optional<OptimizationRemarkGetter> OREGetter;
-
/// Container with all the query AAs that requested an update via
/// registerForUpdate.
SmallSetVector<AbstractAttribute *, 16> QueryAAsAwaitingUpdate;
- /// The name of the pass to emit remarks for.
- const char *PassName = "";
+ /// User provided configuration for this Attributor instance.
+ const AttributorConfig Configuration;
friend AADepGraph;
friend AttributorCallGraph;
@@ -2515,16 +2610,6 @@ struct IntegerRangeState : public AbstractState {
unionAssumed(R.getAssumed());
}
- /// Unite known range with the passed state.
- void unionKnown(const ConstantRange &R) {
- // Don't loose a known range.
- Known = Known.unionWith(R);
- Assumed = Assumed.unionWith(Known);
- }
-
- /// See IntegerRangeState::unionKnown(..).
- void unionKnown(const IntegerRangeState &R) { unionKnown(R.getKnown()); }
-
/// Intersect known range with the passed state.
void intersectKnown(const ConstantRange &R) {
Assumed = Assumed.intersectWith(R);
@@ -2554,8 +2639,8 @@ struct IntegerRangeState : public AbstractState {
IntegerRangeState operator&=(const IntegerRangeState &R) {
// NOTE: `&=` operator seems like `intersect` but in this case, we need to
// take `union`.
- unionKnown(R);
- unionAssumed(R);
+ Known = Known.unionWith(R.getKnown());
+ Assumed = Assumed.unionWith(R.getAssumed());
return *this;
}
};
@@ -3363,6 +3448,12 @@ protected:
/// Returns true if \p I is known dead.
virtual bool isKnownDead(const Instruction *I) const = 0;
+ /// Return true if the underlying value is a store that is known to be
+ /// removable. This is different from dead stores as the removable store
+ /// can have an effect on live values, especially loads, but that effect
+ /// is propagated which allows us to remove the store in turn.
+ virtual bool isRemovableStore() const { return false; }
+
/// This method is used to check if at least one instruction in a collection
/// of instructions is live.
template <typename T> bool isLiveInstSet(T begin, T end) const {
@@ -3618,10 +3709,10 @@ struct AAAlign : public IRAttribute<
AAAlign(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
/// Return assumed alignment.
- uint64_t getAssumedAlign() const { return getAssumed(); }
+ Align getAssumedAlign() const { return Align(getAssumed()); }
/// Return known alignment.
- uint64_t getKnownAlign() const { return getKnown(); }
+ Align getKnownAlign() const { return Align(getKnown()); }
/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAAlign"; }
@@ -3641,6 +3732,46 @@ struct AAAlign : public IRAttribute<
static const char ID;
};
+/// An abstract interface to track if a value leaves it's defining function
+/// instance.
+/// TODO: We should make it a ternary AA tracking uniqueness, and uniqueness
+/// wrt. the Attributor analysis separately.
+struct AAInstanceInfo : public StateWrapper<BooleanState, AbstractAttribute> {
+ AAInstanceInfo(const IRPosition &IRP, Attributor &A)
+ : StateWrapper<BooleanState, AbstractAttribute>(IRP) {}
+
+ /// Return true if we know that the underlying value is unique in its scope
+ /// wrt. the Attributor analysis. That means it might not be unique but we can
+ /// still use pointer equality without risking to represent two instances with
+ /// one `llvm::Value`.
+ bool isKnownUniqueForAnalysis() const { return isKnown(); }
+
+ /// Return true if we assume that the underlying value is unique in its scope
+ /// wrt. the Attributor analysis. That means it might not be unique but we can
+ /// still use pointer equality without risking to represent two instances with
+ /// one `llvm::Value`.
+ bool isAssumedUniqueForAnalysis() const { return isAssumed(); }
+
+ /// Create an abstract attribute view for the position \p IRP.
+ static AAInstanceInfo &createForPosition(const IRPosition &IRP,
+ Attributor &A);
+
+ /// See AbstractAttribute::getName()
+ const std::string getName() const override { return "AAInstanceInfo"; }
+
+ /// See AbstractAttribute::getIdAddr()
+ const char *getIdAddr() const override { return &ID; }
+
+ /// This function should return true if the type of the \p AA is
+ /// AAInstanceInfo
+ static bool classof(const AbstractAttribute *AA) {
+ return (AA->getIdAddr() == &ID);
+ }
+
+ /// Unique ID (due to the unique address)
+ static const char ID;
+};
+
/// An abstract interface for all nocapture attributes.
struct AANoCapture
: public IRAttribute<
@@ -4150,13 +4281,14 @@ struct AAValueConstantRange
/// Return an assumed constant for the associated value a program point \p
/// CtxI.
- Optional<ConstantInt *>
- getAssumedConstantInt(Attributor &A,
- const Instruction *CtxI = nullptr) const {
+ Optional<Constant *>
+ getAssumedConstant(Attributor &A, const Instruction *CtxI = nullptr) const {
ConstantRange RangeV = getAssumedConstantRange(A, CtxI);
- if (auto *C = RangeV.getSingleElement())
- return cast<ConstantInt>(
- ConstantInt::get(getAssociatedValue().getType(), *C));
+ if (auto *C = RangeV.getSingleElement()) {
+ Type *Ty = getAssociatedValue().getType();
+ return cast_or_null<Constant>(
+ AA::getWithType(*ConstantInt::get(Ty->getContext(), *C), *Ty));
+ }
if (RangeV.isEmptySet())
return llvm::None;
return nullptr;
@@ -4185,10 +4317,9 @@ struct AAValueConstantRange
/// contains every possible value (i.e. we cannot in any way limit the value
/// that the target position can take). That never happens naturally, we only
/// force it. As for the conditions under which we force it, see
-/// AAPotentialValues.
-template <typename MemberTy, typename KeyInfo = DenseMapInfo<MemberTy>>
-struct PotentialValuesState : AbstractState {
- using SetTy = DenseSet<MemberTy, KeyInfo>;
+/// AAPotentialConstantValues.
+template <typename MemberTy> struct PotentialValuesState : AbstractState {
+ using SetTy = SmallSetVector<MemberTy, 8>;
PotentialValuesState() : IsValidState(true), UndefIsContained(false) {}
@@ -4247,7 +4378,7 @@ struct PotentialValuesState : AbstractState {
return PotentialValuesState(true);
}
- static PotentialValuesState getBestState(PotentialValuesState &PVS) {
+ static PotentialValuesState getBestState(const PotentialValuesState &PVS) {
return getBestState();
}
@@ -4278,6 +4409,12 @@ struct PotentialValuesState : AbstractState {
return *this;
}
+protected:
+ SetTy &getAssumedSet() {
+ assert(isValidState() && "This set shoud not be used when it is invalid!");
+ return Set;
+ }
+
private:
/// Check the size of this set, and invalidate when the size is no
/// less than \p MaxPotentialValues threshold.
@@ -4372,10 +4509,10 @@ raw_ostream &operator<<(raw_ostream &OS,
/// operator we do not currently handle).
///
/// TODO: Support values other than constant integers.
-struct AAPotentialValues
+struct AAPotentialConstantValues
: public StateWrapper<PotentialConstantIntValuesState, AbstractAttribute> {
using Base = StateWrapper<PotentialConstantIntValuesState, AbstractAttribute>;
- AAPotentialValues(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
+ AAPotentialConstantValues(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
/// See AbstractAttribute::getState(...).
PotentialConstantIntValuesState &getState() override { return *this; }
@@ -4384,22 +4521,23 @@ struct AAPotentialValues
}
/// Create an abstract attribute view for the position \p IRP.
- static AAPotentialValues &createForPosition(const IRPosition &IRP,
- Attributor &A);
+ static AAPotentialConstantValues &createForPosition(const IRPosition &IRP,
+ Attributor &A);
/// Return assumed constant for the associated value
- Optional<ConstantInt *>
- getAssumedConstantInt(Attributor &A,
- const Instruction *CtxI = nullptr) const {
+ Optional<Constant *>
+ getAssumedConstant(Attributor &A, const Instruction *CtxI = nullptr) const {
if (!isValidState())
return nullptr;
- if (getAssumedSet().size() == 1)
- return cast<ConstantInt>(ConstantInt::get(getAssociatedValue().getType(),
- *(getAssumedSet().begin())));
+ if (getAssumedSet().size() == 1) {
+ Type *Ty = getAssociatedValue().getType();
+ return cast_or_null<Constant>(AA::getWithType(
+ *ConstantInt::get(Ty->getContext(), *(getAssumedSet().begin())),
+ *Ty));
+ }
if (getAssumedSet().size() == 0) {
if (undefIsContained())
- return cast<ConstantInt>(
- ConstantInt::get(getAssociatedValue().getType(), 0));
+ return UndefValue::get(getAssociatedValue().getType());
return llvm::None;
}
@@ -4407,13 +4545,15 @@ struct AAPotentialValues
}
/// See AbstractAttribute::getName()
- const std::string getName() const override { return "AAPotentialValues"; }
+ const std::string getName() const override {
+ return "AAPotentialConstantValues";
+ }
/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }
/// This function should return true if the type of the \p AA is
- /// AAPotentialValues
+ /// AAPotentialConstantValues
static bool classof(const AbstractAttribute *AA) {
return (AA->getIdAddr() == &ID);
}
@@ -4744,12 +4884,10 @@ struct AAPointerInfo : public AbstractAttribute {
Instruction *getRemoteInst() const { return RemoteI; }
/// Return true if the value written is not known yet.
- bool isWrittenValueYetUndetermined() const { return !Content.hasValue(); }
+ bool isWrittenValueYetUndetermined() const { return !Content; }
/// Return true if the value written cannot be determined at all.
- bool isWrittenValueUnknown() const {
- return Content.hasValue() && !*Content;
- }
+ bool isWrittenValueUnknown() const { return Content && !*Content; }
/// Return the type associated with the access, if known.
Type *getType() const { return Ty; }
@@ -4792,21 +4930,55 @@ struct AAPointerInfo : public AbstractAttribute {
/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }
- /// Call \p CB on all accesses that might interfere with \p LI and return true
- /// if all such accesses were known and the callback returned true for all of
- /// them, false otherwise.
- virtual bool forallInterferingAccesses(
- LoadInst &LI, function_ref<bool(const Access &, bool)> CB) const = 0;
+ /// Helper to represent an access offset and size, with logic to deal with
+ /// uncertainty and check for overlapping accesses.
+ struct OffsetAndSize : public std::pair<int64_t, int64_t> {
+ using BaseTy = std::pair<int64_t, int64_t>;
+ OffsetAndSize(int64_t Offset, int64_t Size) : BaseTy(Offset, Size) {}
+ OffsetAndSize(const BaseTy &P) : BaseTy(P) {}
+ int64_t getOffset() const { return first; }
+ int64_t getSize() const { return second; }
+ static OffsetAndSize getUnknown() {
+ return OffsetAndSize(Unknown, Unknown);
+ }
+
+ /// Return true if offset or size are unknown.
+ bool offsetOrSizeAreUnknown() const {
+ return getOffset() == OffsetAndSize::Unknown ||
+ getSize() == OffsetAndSize::Unknown;
+ }
+
+ /// Return true if this offset and size pair might describe an address that
+ /// overlaps with \p OAS.
+ bool mayOverlap(const OffsetAndSize &OAS) const {
+ // Any unknown value and we are giving up -> overlap.
+ if (offsetOrSizeAreUnknown() || OAS.offsetOrSizeAreUnknown())
+ return true;
+
+ // Check if one offset point is in the other interval [offset,
+ // offset+size].
+ return OAS.getOffset() + OAS.getSize() > getOffset() &&
+ OAS.getOffset() < getOffset() + getSize();
+ }
+
+ /// Constant used to represent unknown offset or sizes.
+ static constexpr int64_t Unknown = 1 << 31;
+ };
+
+ /// Call \p CB on all accesses that might interfere with \p OAS and return
+ /// true if all such accesses were known and the callback returned true for
+ /// all of them, false otherwise. An access interferes with an offset-size
+ /// pair if it might read or write that memory region.
virtual bool forallInterferingAccesses(
- StoreInst &SI, function_ref<bool(const Access &, bool)> CB) const = 0;
+ OffsetAndSize OAS, function_ref<bool(const Access &, bool)> CB) const = 0;
- /// Call \p CB on all write accesses that might interfere with \p LI and
+ /// Call \p CB on all accesses that might interfere with \p I and
/// return true if all such accesses were known and the callback returned true
/// for all of them, false otherwise. In contrast to forallInterferingAccesses
/// this function will perform reasoning to exclude write accesses that cannot
/// affect the load even if they on the surface look as if they would.
- virtual bool forallInterferingWrites(
- Attributor &A, const AbstractAttribute &QueryingAA, LoadInst &LI,
+ virtual bool forallInterferingAccesses(
+ Attributor &A, const AbstractAttribute &QueryingAA, Instruction &I,
function_ref<bool(const Access &, bool)> CB) const = 0;
/// This function should return true if the type of the \p AA is AAPointerInfo
diff --git a/llvm/include/llvm/Transforms/IPO/DeadArgumentElimination.h b/llvm/include/llvm/Transforms/IPO/DeadArgumentElimination.h
index 496ceea12bc9..a71fa3bf404d 100644
--- a/llvm/include/llvm/Transforms/IPO/DeadArgumentElimination.h
+++ b/llvm/include/llvm/Transforms/IPO/DeadArgumentElimination.h
@@ -66,25 +66,24 @@ public:
}
};
- /// Liveness enum - During our initial pass over the program, we determine
- /// that things are either alive or maybe alive. We don't mark anything
- /// explicitly dead (even if we know they are), since anything not alive
- /// with no registered uses (in Uses) will never be marked alive and will
- /// thus become dead in the end.
+ /// During our initial pass over the program, we determine that things are
+ /// either alive or maybe alive. We don't mark anything explicitly dead (even
+ /// if we know they are), since anything not alive with no registered uses
+ /// (in Uses) will never be marked alive and will thus become dead in the end.
enum Liveness { Live, MaybeLive };
- DeadArgumentEliminationPass(bool ShouldHackArguments_ = false)
- : ShouldHackArguments(ShouldHackArguments_) {}
+ DeadArgumentEliminationPass(bool ShouldHackArguments = false)
+ : ShouldHackArguments(ShouldHackArguments) {}
PreservedAnalyses run(Module &M, ModuleAnalysisManager &);
/// Convenience wrapper
- RetOrArg CreateRet(const Function *F, unsigned Idx) {
+ RetOrArg createRet(const Function *F, unsigned Idx) {
return RetOrArg(F, Idx, false);
}
/// Convenience wrapper
- RetOrArg CreateArg(const Function *F, unsigned Idx) {
+ RetOrArg createArg(const Function *F, unsigned Idx) {
return RetOrArg(F, Idx, true);
}
@@ -122,21 +121,21 @@ public:
bool ShouldHackArguments = false;
private:
- Liveness MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses);
- Liveness SurveyUse(const Use *U, UseVector &MaybeLiveUses,
+ Liveness markIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses);
+ Liveness surveyUse(const Use *U, UseVector &MaybeLiveUses,
unsigned RetValNum = -1U);
- Liveness SurveyUses(const Value *V, UseVector &MaybeLiveUses);
+ Liveness surveyUses(const Value *V, UseVector &MaybeLiveUses);
- void SurveyFunction(const Function &F);
- bool IsLive(const RetOrArg &RA);
- void MarkValue(const RetOrArg &RA, Liveness L,
+ void surveyFunction(const Function &F);
+ bool isLive(const RetOrArg &RA);
+ void markValue(const RetOrArg &RA, Liveness L,
const UseVector &MaybeLiveUses);
- void MarkLive(const RetOrArg &RA);
- void MarkLive(const Function &F);
- void PropagateLiveness(const RetOrArg &RA);
- bool RemoveDeadStuffFromFunction(Function *F);
- bool DeleteDeadVarargs(Function &Fn);
- bool RemoveDeadArgumentsFromCallers(Function &Fn);
+ void markLive(const RetOrArg &RA);
+ void markLive(const Function &F);
+ void propagateLiveness(const RetOrArg &RA);
+ bool removeDeadStuffFromFunction(Function *F);
+ bool deleteDeadVarargs(Function &F);
+ bool removeDeadArgumentsFromCallers(Function &F);
};
} // end namespace llvm
diff --git a/llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h b/llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h
index a2b93f8aa30d..07c7cac77354 100644
--- a/llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h
+++ b/llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h
@@ -14,9 +14,10 @@
#define LLVM_TRANSFORMS_IPO_FORCEFUNCTIONATTRS_H
#include "llvm/IR/PassManager.h"
-#include "llvm/Pass.h"
namespace llvm {
+class Module;
+class Pass;
/// Pass which forces specific function attributes into the IR, primarily as
/// a debugging tool.
diff --git a/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h b/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h
index 0b6734a3929d..bcb75025f8e5 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h
@@ -15,29 +15,22 @@
#ifndef LLVM_TRANSFORMS_IPO_FUNCTIONATTRS_H
#define LLVM_TRANSFORMS_IPO_FUNCTIONATTRS_H
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/LazyCallGraph.h"
-#include "llvm/IR/ModuleSummaryIndex.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
-class AAResults;
+class GlobalValueSummary;
+class ModuleSummaryIndex;
class Function;
class Module;
class Pass;
-/// The three kinds of memory access relevant to 'readonly' and
-/// 'readnone' attributes.
-enum MemoryAccessKind {
- MAK_ReadNone = 0,
- MAK_ReadOnly = 1,
- MAK_MayWrite = 2,
- MAK_WriteOnly = 3
-};
-
/// Returns the memory access properties of this copy of the function.
-MemoryAccessKind computeFunctionBodyMemoryAccess(Function &F, AAResults &AAR);
+FunctionModRefBehavior computeFunctionBodyMemoryAccess(Function &F,
+ AAResults &AAR);
/// Propagate function attributes for function summaries along the index's
/// callgraph during thinlink
diff --git a/llvm/include/llvm/Transforms/IPO/GlobalDCE.h b/llvm/include/llvm/Transforms/IPO/GlobalDCE.h
index 0a6851849e7e..a24196efb83b 100644
--- a/llvm/include/llvm/Transforms/IPO/GlobalDCE.h
+++ b/llvm/include/llvm/Transforms/IPO/GlobalDCE.h
@@ -19,11 +19,18 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
-#include "llvm/IR/Module.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/PassManager.h"
#include <unordered_map>
namespace llvm {
+class Comdat;
+class Constant;
+class Function;
+class GlobalVariable;
+class Metadata;
+class Module;
+class Value;
/// Pass to remove unused function declarations.
class GlobalDCEPass : public PassInfoMixin<GlobalDCEPass> {
diff --git a/llvm/include/llvm/Transforms/IPO/IROutliner.h b/llvm/include/llvm/Transforms/IPO/IROutliner.h
index e4807a1c9c65..315587e0f922 100644
--- a/llvm/include/llvm/Transforms/IPO/IROutliner.h
+++ b/llvm/include/llvm/Transforms/IPO/IROutliner.h
@@ -43,14 +43,13 @@
#include "llvm/Analysis/IRSimilarityIdentifier.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/IR/ValueMap.h"
#include "llvm/Support/InstructionCost.h"
#include "llvm/Transforms/Utils/CodeExtractor.h"
-#include <set>
struct OutlinableGroup;
namespace llvm {
+using namespace CallingConv;
using namespace IRSimilarity;
class Module;
@@ -86,6 +85,13 @@ struct OutlinableRegion {
DenseMap<unsigned, unsigned> ExtractedArgToAgg;
DenseMap<unsigned, unsigned> AggArgToExtracted;
+ /// Values in the outlined functions will often be replaced by arguments. When
+ /// finding corresponding values from one region to another, the found value
+ /// will be the value the argument previously replaced. This structure maps
+ /// any replaced values for the region to the aggregate aggregate argument
+ /// in the overall function.
+ DenseMap<Value *, Value *> RemappedArguments;
+
/// Marks whether we need to change the order of the arguments when mapping
/// the old extracted function call to the new aggregate outlined function
/// call.
@@ -168,6 +174,15 @@ struct OutlinableRegion {
/// \return The corresponding Value to \p V if it exists, otherwise nullptr.
Value *findCorrespondingValueIn(const OutlinableRegion &Other, Value *V);
+ /// Find a corresponding BasicBlock for \p BB in similar OutlinableRegion \p Other.
+ ///
+ /// \param Other [in] - The OutlinableRegion to find the corresponding
+ /// BasicBlock in.
+ /// \param BB [in] - The BasicBlock to look for in the other region.
+ /// \return The corresponding Value to \p V if it exists, otherwise nullptr.
+ BasicBlock *findCorrespondingBlockIn(const OutlinableRegion &Other,
+ BasicBlock *BB);
+
/// Get the size of the code removed from the region.
///
/// \param [in] TTI - The TargetTransformInfo for the parent function.
@@ -372,6 +387,25 @@ private:
// the call in outlined functions.
if (CI.canReturnTwice())
return false;
+ // TODO: Update the outliner to capture whether the outlined function
+ // needs these extra attributes.
+
+ // Functions marked with the swifttailcc and tailcc calling conventions
+ // require special handling when outlining musttail functions. The
+ // calling convention must be passed down to the outlined function as
+ // well. Further, there is special handling for musttail calls as well,
+ // requiring a return call directly after. For now, the outliner does not
+ // support this.
+ bool IsTailCC = CI.getCallingConv() == CallingConv::SwiftTail ||
+ CI.getCallingConv() == CallingConv::Tail;
+ if (IsTailCC && !EnableMustTailCalls)
+ return false;
+ if (CI.isMustTailCall() && !EnableMustTailCalls)
+ return false;
+ // The outliner can only handle musttail items if it is also accompanied
+ // by the tailcc or swifttailcc calling convention.
+ if (CI.isMustTailCall() && !IsTailCC)
+ return false;
return true;
}
// TODO: Handle FreezeInsts. Since a frozen value could be frozen inside
@@ -397,6 +431,9 @@ private:
// The flag variable that marks whether we should allow intrinsics
// instructions to be outlined.
bool EnableIntrinsics = false;
+
+ // The flag variable that marks whether we should allow musttail calls.
+ bool EnableMustTailCalls = false;
};
/// A InstVisitor used to exclude certain instructions from being outlined.
diff --git a/llvm/include/llvm/Transforms/IPO/InferFunctionAttrs.h b/llvm/include/llvm/Transforms/IPO/InferFunctionAttrs.h
index 302695d96355..880af2b46d7f 100644
--- a/llvm/include/llvm/Transforms/IPO/InferFunctionAttrs.h
+++ b/llvm/include/llvm/Transforms/IPO/InferFunctionAttrs.h
@@ -15,11 +15,11 @@
#ifndef LLVM_TRANSFORMS_IPO_INFERFUNCTIONATTRS_H
#define LLVM_TRANSFORMS_IPO_INFERFUNCTIONATTRS_H
-#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/Pass.h"
namespace llvm {
+class Module;
+class Pass;
/// A pass which infers function attributes from the names and signatures of
/// function declarations in a module.
diff --git a/llvm/include/llvm/Transforms/IPO/Inliner.h b/llvm/include/llvm/Transforms/IPO/Inliner.h
index a7060943c4c0..1e154eb8f5da 100644
--- a/llvm/include/llvm/Transforms/IPO/Inliner.h
+++ b/llvm/include/llvm/Transforms/IPO/Inliner.h
@@ -16,7 +16,6 @@
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h"
#include "llvm/IR/PassManager.h"
-#include <utility>
namespace llvm {
@@ -96,7 +95,9 @@ protected:
/// passes be composed to achieve the same end result.
class InlinerPass : public PassInfoMixin<InlinerPass> {
public:
- InlinerPass(bool OnlyMandatory = false) : OnlyMandatory(OnlyMandatory) {}
+ InlinerPass(bool OnlyMandatory = false,
+ ThinOrFullLTOPhase LTOPhase = ThinOrFullLTOPhase::None)
+ : OnlyMandatory(OnlyMandatory), LTOPhase(LTOPhase) {}
InlinerPass(InlinerPass &&Arg) = default;
PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
@@ -110,6 +111,7 @@ private:
FunctionAnalysisManager &FAM, Module &M);
std::unique_ptr<InlineAdvisor> OwnedAdvisor;
const bool OnlyMandatory;
+ const ThinOrFullLTOPhase LTOPhase;
};
/// Module pass, wrapping the inliner pass. This works in conjunction with the
@@ -122,6 +124,7 @@ class ModuleInlinerWrapperPass
public:
ModuleInlinerWrapperPass(
InlineParams Params = getInlineParams(), bool MandatoryFirst = true,
+ InlineContext IC = {},
InliningAdvisorMode Mode = InliningAdvisorMode::Default,
unsigned MaxDevirtIterations = 0);
ModuleInlinerWrapperPass(ModuleInlinerWrapperPass &&Arg) = default;
@@ -147,6 +150,7 @@ public:
private:
const InlineParams Params;
+ const InlineContext IC;
const InliningAdvisorMode Mode;
const unsigned MaxDevirtIterations;
// TODO: Clean this up so we only have one ModulePassManager.
diff --git a/llvm/include/llvm/Transforms/IPO/Internalize.h b/llvm/include/llvm/Transforms/IPO/Internalize.h
index 41816df93360..adcf5a932be0 100644
--- a/llvm/include/llvm/Transforms/IPO/Internalize.h
+++ b/llvm/include/llvm/Transforms/IPO/Internalize.h
@@ -23,7 +23,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringSet.h"
-#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/PassManager.h"
#include <functional>
diff --git a/llvm/include/llvm/Transforms/IPO/ModuleInliner.h b/llvm/include/llvm/Transforms/IPO/ModuleInliner.h
index 7474e48aafaf..24cfff6083ff 100644
--- a/llvm/include/llvm/Transforms/IPO/ModuleInliner.h
+++ b/llvm/include/llvm/Transforms/IPO/ModuleInliner.h
@@ -11,10 +11,7 @@
#include "llvm/Analysis/InlineAdvisor.h"
#include "llvm/Analysis/InlineCost.h"
-#include "llvm/Analysis/ReplayInlineAdvisor.h"
-#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h"
#include "llvm/IR/PassManager.h"
-#include <utility>
namespace llvm {
@@ -30,8 +27,9 @@ namespace llvm {
class ModuleInlinerPass : public PassInfoMixin<ModuleInlinerPass> {
public:
ModuleInlinerPass(InlineParams Params = getInlineParams(),
- InliningAdvisorMode Mode = InliningAdvisorMode::Default)
- : Params(Params), Mode(Mode){};
+ InliningAdvisorMode Mode = InliningAdvisorMode::Default,
+ ThinOrFullLTOPhase LTOPhase = ThinOrFullLTOPhase::None)
+ : Params(Params), Mode(Mode), LTOPhase(LTOPhase){};
ModuleInlinerPass(ModuleInlinerPass &&Arg) = default;
PreservedAnalyses run(Module &, ModuleAnalysisManager &);
@@ -42,6 +40,7 @@ private:
std::unique_ptr<InlineAdvisor> OwnedAdvisor;
const InlineParams Params;
const InliningAdvisorMode Mode;
+ const ThinOrFullLTOPhase LTOPhase;
};
} // end namespace llvm
diff --git a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h
index 3b944878a810..2676f2705424 100644
--- a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h
+++ b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h
@@ -16,7 +16,6 @@
#include "llvm-c/Transforms/PassManagerBuilder.h"
#include <functional>
-#include <memory>
#include <string>
#include <vector>
@@ -214,7 +213,6 @@ private:
void addInitialAliasAnalysisPasses(legacy::PassManagerBase &PM) const;
void addLTOOptimizationPasses(legacy::PassManagerBase &PM);
void addLateLTOOptimizationPasses(legacy::PassManagerBase &PM);
- void addPGOInstrPasses(legacy::PassManagerBase &MPM, bool IsCS);
void addFunctionSimplificationPasses(legacy::PassManagerBase &MPM);
void addVectorPasses(legacy::PassManagerBase &PM, bool IsFullLTO);
@@ -226,8 +224,6 @@ public:
/// populateModulePassManager - This sets up the primary pass manager.
void populateModulePassManager(legacy::PassManagerBase &MPM);
- void populateLTOPassManager(legacy::PassManagerBase &PM);
- void populateThinLTOPassManager(legacy::PassManagerBase &PM);
};
/// Registers a function for adding a standard set of passes. This should be
diff --git a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h
index 893654650caa..fff06da22cf3 100644
--- a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h
+++ b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h
@@ -18,9 +18,6 @@
#include <queue>
#include <set>
-using namespace llvm;
-using namespace sampleprof;
-
namespace llvm {
namespace sampleprof {
@@ -51,10 +48,10 @@ struct ProfiledCallGraphNode {
}
};
- using iterator = std::set<ProfiledCallGraphEdge>::iterator;
- using const_iterator = std::set<ProfiledCallGraphEdge>::const_iterator;
using edge = ProfiledCallGraphEdge;
- using edges = std::set<ProfiledCallGraphEdge, ProfiledCallGraphEdgeComparer>;
+ using edges = std::set<edge, ProfiledCallGraphEdgeComparer>;
+ using iterator = edges::iterator;
+ using const_iterator = edges::const_iterator;
ProfiledCallGraphNode(StringRef FName = StringRef()) : Name(FName) {}
@@ -64,11 +61,11 @@ struct ProfiledCallGraphNode {
class ProfiledCallGraph {
public:
- using iterator = std::set<ProfiledCallGraphEdge>::iterator;
+ using iterator = ProfiledCallGraphNode::iterator;
// Constructor for non-CS profile.
ProfiledCallGraph(SampleProfileMap &ProfileMap) {
- assert(!FunctionSamples::ProfileIsCSFlat &&
+ assert(!FunctionSamples::ProfileIsCS &&
"CS flat profile is not handled here");
for (const auto &Samples : ProfileMap) {
addProfiledCalls(Samples.second);
diff --git a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h
index cf87d028600f..a97d5ee3d710 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h
@@ -15,20 +15,18 @@
#ifndef LLVM_TRANSFORMS_IPO_SAMPLECONTEXTTRACKER_H
#define LLVM_TRANSFORMS_IPO_SAMPLECONTEXTTRACKER_H
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/ProfileData/SampleProf.h"
-#include <list>
#include <map>
+#include <queue>
#include <vector>
-using namespace llvm;
-using namespace sampleprof;
-
namespace llvm {
+class CallBase;
+class DILocation;
+class Function;
+class Instruction;
// Internal trie tree representation used for tracking context tree and sample
// profiles. The path from root node to a given node represents the context of
@@ -47,11 +45,6 @@ public:
ContextTrieNode *getOrCreateChildContext(const LineLocation &CallSite,
StringRef ChildName,
bool AllowCreate = true);
-
- ContextTrieNode &moveToChildContext(const LineLocation &CallSite,
- ContextTrieNode &&NodeToMove,
- uint32_t ContextFramesToRemove,
- bool DeleteNode = true);
void removeChildContext(const LineLocation &CallSite, StringRef ChildName);
std::map<uint64_t, ContextTrieNode> &getAllChildContext();
StringRef getFuncName() const;
@@ -62,6 +55,7 @@ public:
LineLocation getCallSiteLoc() const;
ContextTrieNode *getParentContext() const;
void setParentContext(ContextTrieNode *Parent);
+ void setCallSiteLoc(const LineLocation &Loc);
void dumpNode();
void dumpTree();
@@ -94,22 +88,13 @@ private:
// calling context and the context is identified by path from root to the node.
class SampleContextTracker {
public:
- struct ProfileComparer {
- bool operator()(FunctionSamples *A, FunctionSamples *B) const {
- // Sort function profiles by the number of total samples and their
- // contexts.
- if (A->getTotalSamples() == B->getTotalSamples())
- return A->getContext() < B->getContext();
- return A->getTotalSamples() > B->getTotalSamples();
- }
- };
-
- // Keep profiles of a function sorted so that they will be processed/promoted
- // deterministically.
- using ContextSamplesTy = std::set<FunctionSamples *, ProfileComparer>;
+ using ContextSamplesTy = std::vector<FunctionSamples *>;
+ SampleContextTracker() = default;
SampleContextTracker(SampleProfileMap &Profiles,
const DenseMap<uint64_t, StringRef> *GUIDToFuncNameMap);
+ // Populate the FuncToCtxtProfiles map after the trie is built.
+ void populateFuncToCtxtMap();
// Query context profile for a specific callee with given name at a given
// call-site. The full context is identified by location of call instruction.
FunctionSamples *getCalleeContextSamplesFor(const CallBase &Inst,
@@ -125,6 +110,8 @@ public:
// Get all context profile for given function.
ContextSamplesTy &getAllContextSamplesFor(const Function &Func);
ContextSamplesTy &getAllContextSamplesFor(StringRef Name);
+ ContextTrieNode *getOrCreateContextPath(const SampleContext &Context,
+ bool AllowCreate);
// Query base profile for a given function. A base profile is a merged view
// of all context profiles for contexts that are not inlined.
FunctionSamples *getBaseSamplesFor(const Function &Func,
@@ -142,6 +129,64 @@ public:
ContextTrieNode &getRootContext();
void promoteMergeContextSamplesTree(const Instruction &Inst,
StringRef CalleeName);
+
+ // Create a merged conext-less profile map.
+ void createContextLessProfileMap(SampleProfileMap &ContextLessProfiles);
+ ContextTrieNode *
+ getContextNodeForProfile(const FunctionSamples *FSamples) const {
+ auto I = ProfileToNodeMap.find(FSamples);
+ if (I == ProfileToNodeMap.end())
+ return nullptr;
+ return I->second;
+ }
+ StringMap<ContextSamplesTy> &getFuncToCtxtProfiles() {
+ return FuncToCtxtProfiles;
+ }
+
+ class Iterator : public std::iterator<std::forward_iterator_tag,
+ const ContextTrieNode *> {
+ std::queue<ContextTrieNode *> NodeQueue;
+
+ public:
+ explicit Iterator() = default;
+ explicit Iterator(ContextTrieNode *Node) { NodeQueue.push(Node); }
+ Iterator &operator++() {
+ assert(!NodeQueue.empty() && "Iterator already at the end");
+ ContextTrieNode *Node = NodeQueue.front();
+ NodeQueue.pop();
+ for (auto &It : Node->getAllChildContext())
+ NodeQueue.push(&It.second);
+ return *this;
+ }
+
+ Iterator operator++(int) {
+ assert(!NodeQueue.empty() && "Iterator already at the end");
+ Iterator Ret = *this;
+ ++(*this);
+ return Ret;
+ }
+ bool operator==(const Iterator &Other) const {
+ if (NodeQueue.empty() && Other.NodeQueue.empty())
+ return true;
+ if (NodeQueue.empty() || Other.NodeQueue.empty())
+ return false;
+ return NodeQueue.front() == Other.NodeQueue.front();
+ }
+ bool operator!=(const Iterator &Other) const { return !(*this == Other); }
+ ContextTrieNode *operator*() const {
+ assert(!NodeQueue.empty() && "Invalid access to end iterator");
+ return NodeQueue.front();
+ }
+ };
+
+ Iterator begin() { return Iterator(&RootContext); }
+ Iterator end() { return Iterator(); }
+
+#ifndef NDEBUG
+ // Get a context string from root to current node.
+ std::string getContextString(const FunctionSamples &FSamples) const;
+ std::string getContextString(ContextTrieNode *Node) const;
+#endif
// Dump the internal context profile trie.
void dump();
@@ -149,21 +194,26 @@ private:
ContextTrieNode *getContextFor(const DILocation *DIL);
ContextTrieNode *getCalleeContextFor(const DILocation *DIL,
StringRef CalleeName);
- ContextTrieNode *getOrCreateContextPath(const SampleContext &Context,
- bool AllowCreate);
ContextTrieNode *getTopLevelContextNode(StringRef FName);
ContextTrieNode &addTopLevelContextNode(StringRef FName);
ContextTrieNode &promoteMergeContextSamplesTree(ContextTrieNode &NodeToPromo);
- void mergeContextNode(ContextTrieNode &FromNode, ContextTrieNode &ToNode,
- uint32_t ContextFramesToRemove);
+ void mergeContextNode(ContextTrieNode &FromNode, ContextTrieNode &ToNode);
ContextTrieNode &
promoteMergeContextSamplesTree(ContextTrieNode &FromNode,
- ContextTrieNode &ToNodeParent,
- uint32_t ContextFramesToRemove);
-
+ ContextTrieNode &ToNodeParent);
+ ContextTrieNode &moveContextSamples(ContextTrieNode &ToNodeParent,
+ const LineLocation &CallSite,
+ ContextTrieNode &&NodeToMove);
+ void setContextNode(const FunctionSamples *FSample, ContextTrieNode *Node) {
+ ProfileToNodeMap[FSample] = Node;
+ }
// Map from function name to context profiles (excluding base profile)
StringMap<ContextSamplesTy> FuncToCtxtProfiles;
+ // Map from current FunctionSample to the belonged context trie.
+ std::unordered_map<const FunctionSamples *, ContextTrieNode *>
+ ProfileToNodeMap;
+
// Map from function guid to real function names. Only used in md5 mode.
const DenseMap<uint64_t, StringRef> *GUIDToFuncNameMap;
diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfile.h b/llvm/include/llvm/Transforms/IPO/SampleProfile.h
index 704b793ab3ea..d838c8b8a83e 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfile.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfile.h
@@ -36,7 +36,7 @@ public:
private:
std::string ProfileFileName;
std::string ProfileRemappingFileName;
- ThinOrFullLTOPhase LTOPhase;
+ const ThinOrFullLTOPhase LTOPhase;
};
} // end namespace llvm
diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
index e73c36043cb2..ed296d2dd080 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
@@ -16,17 +16,19 @@
#define LLVM_TRANSFORMS_IPO_SAMPLEPROFILEPROBE_H
#include "llvm/ADT/DenseMap.h"
-#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/LazyCallGraph.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/IR/PassInstrumentation.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/IR/PseudoProbe.h"
#include "llvm/ProfileData/SampleProf.h"
-#include "llvm/Target/TargetMachine.h"
#include <unordered_map>
namespace llvm {
+class Any;
+class BasicBlock;
+class Function;
+class Instruction;
+class Loop;
+class PassInstrumentationCallbacks;
+class TargetMachine;
class Module;
diff --git a/llvm/include/llvm/Transforms/IPO/StripDeadPrototypes.h b/llvm/include/llvm/Transforms/IPO/StripDeadPrototypes.h
index f4a15c36afc9..4a2eaad63113 100644
--- a/llvm/include/llvm/Transforms/IPO/StripDeadPrototypes.h
+++ b/llvm/include/llvm/Transforms/IPO/StripDeadPrototypes.h
@@ -16,11 +16,12 @@
#ifndef LLVM_TRANSFORMS_IPO_STRIPDEADPROTOTYPES_H
#define LLVM_TRANSFORMS_IPO_STRIPDEADPROTOTYPES_H
-#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
+class Module;
+
/// Pass to remove unused function declarations.
struct StripDeadPrototypesPass : PassInfoMixin<StripDeadPrototypesPass> {
PreservedAnalyses run(Module &M, ModuleAnalysisManager &);
diff --git a/llvm/include/llvm/Transforms/IPO/ThinLTOBitcodeWriter.h b/llvm/include/llvm/Transforms/IPO/ThinLTOBitcodeWriter.h
index 7acb922b37e1..469cf2bc5011 100644
--- a/llvm/include/llvm/Transforms/IPO/ThinLTOBitcodeWriter.h
+++ b/llvm/include/llvm/Transforms/IPO/ThinLTOBitcodeWriter.h
@@ -17,9 +17,10 @@
#define LLVM_TRANSFORMS_IPO_THINLTOBITCODEWRITER_H
#include <llvm/IR/PassManager.h>
-#include <llvm/Support/raw_ostream.h>
namespace llvm {
+class Module;
+class raw_ostream;
class ThinLTOBitcodeWriterPass
: public PassInfoMixin<ThinLTOBitcodeWriterPass> {
diff --git a/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h b/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h
index 2e9744cfd524..47c137e70a7f 100644
--- a/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h
+++ b/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h
@@ -14,16 +14,17 @@
#ifndef LLVM_TRANSFORMS_IPO_WHOLEPROGRAMDEVIRT_H
#define LLVM_TRANSFORMS_IPO_WHOLEPROGRAMDEVIRT_H
-#include "llvm/IR/Module.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/Transforms/IPO/FunctionImport.h"
#include <cassert>
#include <cstdint>
+#include <map>
#include <set>
#include <utility>
#include <vector>
namespace llvm {
+class Module;
template <typename T> class ArrayRef;
template <typename T> class MutableArrayRef;
diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombine.h b/llvm/include/llvm/Transforms/InstCombine/InstCombine.h
index 6dee38c83b36..35a3a8c3218b 100644
--- a/llvm/include/llvm/Transforms/InstCombine/InstCombine.h
+++ b/llvm/include/llvm/Transforms/InstCombine/InstCombine.h
@@ -18,6 +18,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
#define DEBUG_TYPE "instcombine"
#include "llvm/Transforms/Utils/InstructionWorklist.h"
diff --git a/llvm/include/llvm/Transforms/Instrumentation.h b/llvm/include/llvm/Transforms/Instrumentation.h
index a288a3972c3d..9ff45fc29b06 100644
--- a/llvm/include/llvm/Transforms/Instrumentation.h
+++ b/llvm/include/llvm/Transforms/Instrumentation.h
@@ -15,6 +15,10 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
#include <cassert>
#include <cstdint>
#include <limits>
@@ -75,21 +79,6 @@ struct GCOVOptions {
std::string Exclude;
};
-ModulePass *createGCOVProfilerPass(const GCOVOptions &Options =
- GCOVOptions::getDefault());
-
-// PGO Instrumention. Parameter IsCS indicates if this is the context sensitive
-// instrumentation.
-ModulePass *createPGOInstrumentationGenLegacyPass(bool IsCS = false);
-ModulePass *
-createPGOInstrumentationUseLegacyPass(StringRef Filename = StringRef(""),
- bool IsCS = false);
-ModulePass *createPGOInstrumentationGenCreateVarLegacyPass(
- StringRef CSInstrName = StringRef(""));
-ModulePass *createPGOIndirectCallPromotionLegacyPass(bool InLTO = false,
- bool SamplePGO = false);
-FunctionPass *createPGOMemOPSizeOptLegacyPass();
-
ModulePass *createCGProfileLegacyPass();
// The pgo-specific indirect call promotion function declared below is used by
@@ -194,6 +183,26 @@ static inline uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale) {
assert(Scaled <= std::numeric_limits<uint32_t>::max() && "overflow 32-bits");
return Scaled;
}
+
+// Use to ensure the inserted instrumentation has a DebugLocation; if none is
+// attached to the source instruction, try to use a DILocation with offset 0
+// scoped to surrounding function (if it has a DebugLocation).
+//
+// Some non-call instructions may be missing debug info, but when inserting
+// instrumentation calls, some builds (e.g. LTO) want calls to have debug info
+// if the enclosing function does.
+struct InstrumentationIRBuilder : IRBuilder<> {
+ static void ensureDebugInfo(IRBuilder<> &IRB, const Function &F) {
+ if (IRB.getCurrentDebugLocation())
+ return;
+ if (DISubprogram *SP = F.getSubprogram())
+ IRB.SetCurrentDebugLocation(DILocation::get(SP->getContext(), 0, 0, SP));
+ }
+
+ explicit InstrumentationIRBuilder(Instruction *IP) : IRBuilder<>(IP) {
+ ensureDebugInfo(*this, *IP->getFunction());
+ }
+};
} // end namespace llvm
#endif // LLVM_TRANSFORMS_INSTRUMENTATION_H
diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h
index a0d8118c23f7..d12b2cf45825 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h
@@ -13,82 +13,17 @@
#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_ADDRESSSANITIZER_H
#define LLVM_TRANSFORMS_INSTRUMENTATION_ADDRESSSANITIZER_H
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/Pass.h"
#include "llvm/Transforms/Instrumentation/AddressSanitizerOptions.h"
namespace llvm {
-
-/// Frontend-provided metadata for source location.
-struct LocationMetadata {
- StringRef Filename;
- int LineNo = 0;
- int ColumnNo = 0;
-
- LocationMetadata() = default;
-
- bool empty() const { return Filename.empty(); }
- void parse(MDNode *MDN);
-};
-
-/// Frontend-provided metadata for global variables.
-class GlobalsMetadata {
-public:
- struct Entry {
- LocationMetadata SourceLoc;
- StringRef Name;
- bool IsDynInit = false;
- bool IsExcluded = false;
-
- Entry() = default;
- };
-
- /// Create a default uninitialized GlobalsMetadata instance.
- GlobalsMetadata() = default;
-
- /// Create an initialized GlobalsMetadata instance.
- GlobalsMetadata(Module &M);
-
- /// Returns metadata entry for a given global.
- Entry get(GlobalVariable *G) const {
- auto Pos = Entries.find(G);
- return (Pos != Entries.end()) ? Pos->second : Entry();
- }
-
- /// Handle invalidation from the pass manager.
- /// These results are never invalidated.
- bool invalidate(Module &, const PreservedAnalyses &,
- ModuleAnalysisManager::Invalidator &) {
- return false;
- }
- bool invalidate(Function &, const PreservedAnalyses &,
- FunctionAnalysisManager::Invalidator &) {
- return false;
- }
-
-private:
- DenseMap<GlobalVariable *, Entry> Entries;
-};
-
-/// The ASanGlobalsMetadataAnalysis initializes and returns a GlobalsMetadata
-/// object. More specifically, ASan requires looking at all globals registered
-/// in 'llvm.asan.globals' before running, which only depends on reading module
-/// level metadata. This analysis is required to run before running the
-/// AddressSanitizerPass since it collects that metadata.
-/// The legacy pass manager equivalent of this is ASanGlobalsMetadataLegacyPass.
-class ASanGlobalsMetadataAnalysis
- : public AnalysisInfoMixin<ASanGlobalsMetadataAnalysis> {
-public:
- using Result = GlobalsMetadata;
-
- Result run(Module &, ModuleAnalysisManager &);
-
-private:
- friend AnalysisInfoMixin<ASanGlobalsMetadataAnalysis>;
- static AnalysisKey Key;
-};
+class Function;
+class FunctionPass;
+class GlobalVariable;
+class MDNode;
+class Module;
+class ModulePass;
+class raw_ostream;
struct AddressSanitizerOptions {
bool CompileKernel = false;
@@ -98,26 +33,6 @@ struct AddressSanitizerOptions {
AsanDetectStackUseAfterReturnMode::Runtime;
};
-/// Public interface to the address sanitizer pass for instrumenting code to
-/// check for various memory errors at runtime.
-///
-/// The sanitizer itself is a function pass that works by inserting various
-/// calls to the ASan runtime library functions. The runtime library essentially
-/// replaces malloc() and free() with custom implementations that allow regions
-/// surrounding requested memory to be checked for invalid accesses.
-class AddressSanitizerPass : public PassInfoMixin<AddressSanitizerPass> {
-public:
- AddressSanitizerPass(const AddressSanitizerOptions &Options)
- : Options(Options){};
- PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
- void printPipeline(raw_ostream &OS,
- function_ref<StringRef(StringRef)> MapClassName2PassName);
- static bool isRequired() { return true; }
-
-private:
- AddressSanitizerOptions Options;
-};
-
/// Public interface to the address sanitizer module pass for instrumenting code
/// to check for various memory errors.
///
@@ -142,17 +57,6 @@ private:
AsanDtorKind DestructorKind;
};
-// Insert AddressSanitizer (address basic correctness checking) instrumentation
-FunctionPass *createAddressSanitizerFunctionPass(
- bool CompileKernel = false, bool Recover = false,
- bool UseAfterScope = false,
- AsanDetectStackUseAfterReturnMode UseAfterReturn =
- AsanDetectStackUseAfterReturnMode::Runtime);
-ModulePass *createModuleAddressSanitizerLegacyPassPass(
- bool CompileKernel = false, bool Recover = false, bool UseGlobalsGC = true,
- bool UseOdrIndicator = true,
- AsanDtorKind DestructorKind = AsanDtorKind::Global);
-
struct ASanAccessInfo {
const int32_t Packed;
const uint8_t AccessSizeIndex;
diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h
index 0a5456c5956f..7858a1c4b2fd 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h
@@ -47,51 +47,6 @@ public:
Value *getPtr() { return PtrUse->get(); }
};
-// For an alloca valid between lifetime markers Start and Ends, call the
-// Callback for all possible exits out of the lifetime in the containing
-// function, which can return from the instructions in RetVec.
-//
-// Returns whether Ends covered all possible exits. If they did not,
-// the caller should remove Ends to ensure that work done at the other
-// exits does not happen outside of the lifetime.
-template <typename F>
-bool forAllReachableExits(const DominatorTree &DT, const PostDominatorTree &PDT,
- const Instruction *Start,
- const SmallVectorImpl<IntrinsicInst *> &Ends,
- const SmallVectorImpl<Instruction *> &RetVec,
- F Callback) {
- if (Ends.size() == 1 && PDT.dominates(Ends[0], Start)) {
- Callback(Ends[0]);
- return true;
- }
- SmallVector<Instruction *, 8> ReachableRetVec;
- unsigned NumCoveredExits = 0;
- for (auto *RI : RetVec) {
- if (!isPotentiallyReachable(Start, RI, nullptr, &DT))
- continue;
- ReachableRetVec.push_back(RI);
- // TODO(fmayer): We don't support diamond shapes, where multiple lifetime
- // ends together dominate the RI, but none of them does by itself.
- // Check how often this happens and decide whether to support this here.
- if (std::any_of(Ends.begin(), Ends.end(),
- [&](Instruction *End) { return DT.dominates(End, RI); }))
- ++NumCoveredExits;
- }
- // If there's a mix of covered and non-covered exits, just put the untag
- // on exits, so we avoid the redundancy of untagging twice.
- if (NumCoveredExits == ReachableRetVec.size()) {
- for (auto *End : Ends)
- Callback(End);
- } else {
- for (auto *RI : ReachableRetVec)
- Callback(RI);
- // We may have inserted untag outside of the lifetime interval.
- // Signal the caller to remove the lifetime end call for this alloca.
- return false;
- }
- return true;
-}
-
// Get AddressSanitizer parameters.
void getAddressSanitizerParams(const Triple &TargetTriple, int LongSize,
bool IsKasan, uint64_t *ShadowBase,
diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerOptions.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerOptions.h
index f019d1c00a35..187aaedb6000 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerOptions.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerOptions.h
@@ -17,14 +17,13 @@ enum class AsanDtorKind {
None, ///< Do not emit any destructors for ASan
Global, ///< Append to llvm.global_dtors
Invalid, ///< Not a valid destructor Kind.
- // TODO(dliew): Add more more kinds.
};
/// Mode of ASan detect stack use after return
enum class AsanDetectStackUseAfterReturnMode {
Never, ///< Never detect stack use after return.
- Runtime, ///< Detect stack use after return if runtime flag is enabled
- ///< (ASAN_OPTIONS=detect_stack_use_after_return=1)
+ Runtime, ///< Detect stack use after return if not disabled runtime with
+ ///< (ASAN_OPTIONS=detect_stack_use_after_return=0).
Always, ///< Always detect stack use after return.
Invalid, ///< Not a valid detect mode.
};
diff --git a/llvm/include/llvm/Transforms/Instrumentation/BoundsChecking.h b/llvm/include/llvm/Transforms/Instrumentation/BoundsChecking.h
index 76d586252743..5e68141e3399 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/BoundsChecking.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/BoundsChecking.h
@@ -10,9 +10,10 @@
#define LLVM_TRANSFORMS_INSTRUMENTATION_BOUNDSCHECKING_H
#include "llvm/IR/PassManager.h"
-#include "llvm/Pass.h"
namespace llvm {
+class Function;
+class FunctionPass;
/// A pass to instrument code and perform run-time bounds checking on loads,
/// stores, and other memory intrinsics.
diff --git a/llvm/include/llvm/Transforms/Instrumentation/CGProfile.h b/llvm/include/llvm/Transforms/Instrumentation/CGProfile.h
index c56e4c78cad5..9f9ce42277a0 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/CGProfile.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/CGProfile.h
@@ -12,10 +12,10 @@
#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_CGPROFILE_H
#define LLVM_TRANSFORMS_INSTRUMENTATION_CGPROFILE_H
-#include "llvm/ADT/MapVector.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
+class Module;
class CGProfilePass : public PassInfoMixin<CGProfilePass> {
public:
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
diff --git a/llvm/include/llvm/Transforms/Instrumentation/ControlHeightReduction.h b/llvm/include/llvm/Transforms/Instrumentation/ControlHeightReduction.h
index 18b428582046..0bace514c361 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/ControlHeightReduction.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/ControlHeightReduction.h
@@ -14,7 +14,6 @@
#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_CONTROLHEIGHTREDUCTION_H
#define LLVM_TRANSFORMS_INSTRUMENTATION_CONTROLHEIGHTREDUCTION_H
-#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
diff --git a/llvm/include/llvm/Transforms/Instrumentation/DataFlowSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/DataFlowSanitizer.h
index 9b57b1f9a9ea..41ba05cd67f0 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/DataFlowSanitizer.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/DataFlowSanitizer.h
@@ -8,12 +8,12 @@
#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_DATAFLOWSANITIZER_H
#define LLVM_TRANSFORMS_INSTRUMENTATION_DATAFLOWSANITIZER_H
-#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include <string>
#include <vector>
namespace llvm {
+class Module;
class DataFlowSanitizerPass : public PassInfoMixin<DataFlowSanitizerPass> {
private:
diff --git a/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h
index 70949026a892..d3b5b5ca5c25 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h
@@ -13,11 +13,14 @@
#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_HWADDRESSSANITIZER_H
#define LLVM_TRANSFORMS_INSTRUMENTATION_HWADDRESSSANITIZER_H
-#include "llvm/IR/Function.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/Pass.h"
namespace llvm {
+class FunctionPass;
+class Module;
+class StringRef;
+class raw_ostream;
struct HWAddressSanitizerOptions {
HWAddressSanitizerOptions()
@@ -47,11 +50,6 @@ private:
HWAddressSanitizerOptions Options;
};
-FunctionPass *
-createHWAddressSanitizerLegacyPassPass(bool CompileKernel = false,
- bool Recover = false,
- bool DisableOptimization = false);
-
namespace HWASanAccessInfo {
// Bit field positions for the accessinfo parameter to
diff --git a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h
index 5873db22a5d1..90fc0670448b 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h
@@ -19,7 +19,6 @@
#include "llvm/IR/PassManager.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Transforms/Instrumentation.h"
-#include <cstddef>
#include <cstdint>
#include <cstring>
#include <vector>
@@ -57,6 +56,9 @@ private:
}
};
DenseMap<GlobalVariable *, PerFunctionProfileData> ProfileDataMap;
+ /// If runtime relocation is enabled, this maps functions to the load
+ /// instruction that produces the profile relocation bias.
+ DenseMap<const Function *, LoadInst *> FunctionToProfileBiasMap;
std::vector<GlobalValue *> CompilerUsedVars;
std::vector<GlobalValue *> UsedVars;
std::vector<GlobalVariable *> ReferencedNames;
diff --git a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
index b9ad56ba7509..b584b9984492 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
@@ -12,12 +12,13 @@
#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_MEMPROFILER_H
#define LLVM_TRANSFORMS_INSTRUMENTATION_MEMPROFILER_H
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/Pass.h"
namespace llvm {
+class Function;
+class FunctionPass;
+class Module;
+class ModulePass;
/// Public interface to the memory profiler pass for instrumenting code to
/// profile memory accesses.
diff --git a/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h
index e5779dc775ba..e4654a0fc7ef 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h
@@ -13,10 +13,15 @@
#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_MEMORYSANITIZER_H
#define LLVM_TRANSFORMS_INSTRUMENTATION_MEMORYSANITIZER_H
+#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/Pass.h"
namespace llvm {
+class Function;
+class FunctionPass;
+class Module;
+class StringRef;
+class raw_ostream;
struct MemorySanitizerOptions {
MemorySanitizerOptions() : MemorySanitizerOptions(0, false, false, false){};
@@ -30,10 +35,6 @@ struct MemorySanitizerOptions {
bool EagerChecks;
};
-// Insert MemorySanitizer instrumentation (detection of uninitialized reads)
-FunctionPass *
-createMemorySanitizerLegacyPassPass(MemorySanitizerOptions Options = {});
-
/// A function pass for msan instrumentation.
///
/// Instruments functions to detect unitialized reads. This function pass
diff --git a/llvm/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h b/llvm/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h
index e3d268cb0781..9bacb7eb38a5 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h
@@ -16,13 +16,14 @@
#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_SANITIZERCOVERAGE_H
#define LLVM_TRANSFORMS_INSTRUMENTATION_SANITIZERCOVERAGE_H
-#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Support/SpecialCaseList.h"
#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Transforms/Instrumentation.h"
namespace llvm {
+class Module;
+class ModulePass;
/// This is the ModuleSanitizerCoverage pass used in the new pass manager. The
/// pass instruments functions for coverage, adds initialization calls to the
diff --git a/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h
index e795043630d5..b3a067ba59c2 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h
@@ -14,11 +14,11 @@
#define LLVM_TRANSFORMS_INSTRUMENTATION_THREADSANITIZER_H
#include "llvm/IR/PassManager.h"
-#include "llvm/Pass.h"
namespace llvm {
-// Insert ThreadSanitizer (race detection) instrumentation
-FunctionPass *createThreadSanitizerLegacyPassPass();
+class Function;
+class FunctionPass;
+class Module;
/// A function pass for tsan instrumentation.
///
diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h
index d6228700aa9a..edd492b0343d 100644
--- a/llvm/include/llvm/Transforms/Scalar.h
+++ b/llvm/include/llvm/Transforms/Scalar.h
@@ -133,7 +133,8 @@ Pass *createIndVarSimplifyPass();
//
Pass *createLICMPass();
Pass *createLICMPass(unsigned LicmMssaOptCap,
- unsigned LicmMssaNoAccForPromotionCap);
+ unsigned LicmMssaNoAccForPromotionCap,
+ bool AllowSpeculation);
//===----------------------------------------------------------------------===//
//
@@ -170,13 +171,6 @@ Pass *createLoopStrengthReducePass();
//===----------------------------------------------------------------------===//
//
-// LoopUnswitch - This pass is a simple loop unswitching pass.
-//
-Pass *createLoopUnswitchPass(bool OptimizeForSize = false,
- bool hasBranchDivergence = false);
-
-//===----------------------------------------------------------------------===//
-//
// LoopInstSimplify - This pass simplifies instructions in a loop's body.
//
Pass *createLoopInstSimplifyPass();
@@ -246,12 +240,10 @@ FunctionPass *createReassociatePass();
//===----------------------------------------------------------------------===//
//
// JumpThreading - Thread control through mult-pred/multi-succ blocks where some
-// preds always go to some succ. If FreezeSelectCond is true, unfold the
-// condition of a select that unfolds to branch. Thresholds other than minus one
+// preds always go to some succ. Thresholds other than minus one
// override the internal BB duplication default threshold.
//
-FunctionPass *createJumpThreadingPass(bool FreezeSelectCond = false,
- int Threshold = -1);
+FunctionPass *createJumpThreadingPass(int Threshold = -1);
//===----------------------------------------------------------------------===//
//
@@ -428,6 +420,12 @@ FunctionPass *createLowerExpectIntrinsicPass();
//===----------------------------------------------------------------------===//
//
+// TLSVariableHoist - This pass reduce duplicated TLS address call.
+//
+FunctionPass *createTLSVariableHoistPass();
+
+//===----------------------------------------------------------------------===//
+//
// LowerConstantIntrinsicss - Expand any remaining llvm.objectsize and
// llvm.is.constant intrinsic calls, even for the unknown cases.
//
diff --git a/llvm/include/llvm/Transforms/Scalar/BDCE.h b/llvm/include/llvm/Transforms/Scalar/BDCE.h
index 996622bccdba..0763f31dfad4 100644
--- a/llvm/include/llvm/Transforms/Scalar/BDCE.h
+++ b/llvm/include/llvm/Transforms/Scalar/BDCE.h
@@ -16,11 +16,12 @@
#ifndef LLVM_TRANSFORMS_SCALAR_BDCE_H
#define LLVM_TRANSFORMS_SCALAR_BDCE_H
-#include "llvm/IR/Function.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
+class Function;
+
// The Bit-Tracking Dead Code Elimination pass.
struct BDCEPass : PassInfoMixin<BDCEPass> {
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
diff --git a/llvm/include/llvm/Transforms/Scalar/CallSiteSplitting.h b/llvm/include/llvm/Transforms/Scalar/CallSiteSplitting.h
index ee2b6f264086..661340f4598f 100644
--- a/llvm/include/llvm/Transforms/Scalar/CallSiteSplitting.h
+++ b/llvm/include/llvm/Transforms/Scalar/CallSiteSplitting.h
@@ -9,11 +9,12 @@
#ifndef LLVM_TRANSFORMS_SCALAR_CALLSITESPLITTING_H
#define LLVM_TRANSFORMS_SCALAR_CALLSITESPLITTING_H
-#include "llvm/IR/Function.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
+class Function;
+
struct CallSiteSplittingPass : PassInfoMixin<CallSiteSplittingPass> {
/// Run the pass over the function.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
diff --git a/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h b/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h
index 11379e59467f..e59734b92244 100644
--- a/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h
+++ b/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h
@@ -40,7 +40,6 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PointerUnion.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/PassManager.h"
#include <algorithm>
diff --git a/llvm/include/llvm/Transforms/Scalar/DCE.h b/llvm/include/llvm/Transforms/Scalar/DCE.h
index 4d83296b1d86..8d1616a7b75d 100644
--- a/llvm/include/llvm/Transforms/Scalar/DCE.h
+++ b/llvm/include/llvm/Transforms/Scalar/DCE.h
@@ -13,11 +13,12 @@
#ifndef LLVM_TRANSFORMS_SCALAR_DCE_H
#define LLVM_TRANSFORMS_SCALAR_DCE_H
-#include "llvm/IR/Function.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
+class Function;
+
/// Basic Dead Code Elimination pass.
class DCEPass : public PassInfoMixin<DCEPass> {
public:
diff --git a/llvm/include/llvm/Transforms/Scalar/DFAJumpThreading.h b/llvm/include/llvm/Transforms/Scalar/DFAJumpThreading.h
index afebd9bbc122..4e9fbf65e163 100644
--- a/llvm/include/llvm/Transforms/Scalar/DFAJumpThreading.h
+++ b/llvm/include/llvm/Transforms/Scalar/DFAJumpThreading.h
@@ -13,11 +13,12 @@
#ifndef LLVM_TRANSFORMS_SCALAR_DFAJUMPTHREADING_H
#define LLVM_TRANSFORMS_SCALAR_DFAJUMPTHREADING_H
-#include "llvm/IR/Function.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
+class Function;
+
struct DFAJumpThreadingPass : PassInfoMixin<DFAJumpThreadingPass> {
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
diff --git a/llvm/include/llvm/Transforms/Scalar/Float2Int.h b/llvm/include/llvm/Transforms/Scalar/Float2Int.h
index 5fb47af6f795..f4bec228ea96 100644
--- a/llvm/include/llvm/Transforms/Scalar/Float2Int.h
+++ b/llvm/include/llvm/Transforms/Scalar/Float2Int.h
@@ -18,11 +18,17 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/IR/ConstantRange.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
+class DominatorTree;
+class Function;
+class Instruction;
+class LLVMContext;
+template <typename T> class Optional;
+class Type;
+class Value;
+
class Float2IntPass : public PassInfoMixin<Float2IntPass> {
public:
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
@@ -36,6 +42,7 @@ private:
ConstantRange badRange();
ConstantRange unknownRange();
ConstantRange validateRange(ConstantRange R);
+ Optional<ConstantRange> calcRange(Instruction *I);
void walkBackwards();
void walkForwards();
bool validateAndTransform();
diff --git a/llvm/include/llvm/Transforms/Scalar/GVN.h b/llvm/include/llvm/Transforms/Scalar/GVN.h
index 9e660c92124e..16ab1a490162 100644
--- a/llvm/include/llvm/Transforms/Scalar/GVN.h
+++ b/llvm/include/llvm/Transforms/Scalar/GVN.h
@@ -17,10 +17,8 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/InstructionPrecedenceTracking.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/PassManager.h"
@@ -42,6 +40,8 @@ class CallInst;
class ExtractValueInst;
class Function;
class FunctionPass;
+class GetElementPtrInst;
+class ImplicitControlFlowTracking;
class LoadInst;
class LoopInfo;
class MemDepResult;
@@ -178,6 +178,7 @@ public:
Expression createCmpExpr(unsigned Opcode, CmpInst::Predicate Predicate,
Value *LHS, Value *RHS);
Expression createExtractvalueExpr(ExtractValueInst *EI);
+ Expression createGEPExpr(GetElementPtrInst *GEP);
uint32_t lookupOrAddCall(CallInst *C);
uint32_t phiTranslateImpl(const BasicBlock *BB, const BasicBlock *PhiBlock,
uint32_t Num, GVNPass &Gvn);
diff --git a/llvm/include/llvm/Transforms/Scalar/GuardWidening.h b/llvm/include/llvm/Transforms/Scalar/GuardWidening.h
index d08d042ab055..fa03d5f678fd 100644
--- a/llvm/include/llvm/Transforms/Scalar/GuardWidening.h
+++ b/llvm/include/llvm/Transforms/Scalar/GuardWidening.h
@@ -15,12 +15,13 @@
#ifndef LLVM_TRANSFORMS_SCALAR_GUARDWIDENING_H
#define LLVM_TRANSFORMS_SCALAR_GUARDWIDENING_H
-#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/Transforms/Scalar/LoopPassManager.h"
namespace llvm {
+class LPMUpdater;
+class Loop;
class Function;
struct GuardWideningPass : public PassInfoMixin<GuardWideningPass> {
diff --git a/llvm/include/llvm/Transforms/Scalar/IVUsersPrinter.h b/llvm/include/llvm/Transforms/Scalar/IVUsersPrinter.h
index a1f20d9ca983..4136c45e1905 100644
--- a/llvm/include/llvm/Transforms/Scalar/IVUsersPrinter.h
+++ b/llvm/include/llvm/Transforms/Scalar/IVUsersPrinter.h
@@ -9,11 +9,13 @@
#ifndef LLVM_TRANSFORMS_SCALAR_IVUSERSPRINTER_H
#define LLVM_TRANSFORMS_SCALAR_IVUSERSPRINTER_H
-#include "llvm/Analysis/IVUsers.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar/LoopPassManager.h"
+#include "llvm/Analysis/LoopAnalysisManager.h"
+#include "llvm/IR/PassManager.h"
namespace llvm {
+class LPMUpdater;
+class Loop;
+class raw_ostream;
/// Printer pass for the \c IVUsers for a loop.
class IVUsersPrinterPass : public PassInfoMixin<IVUsersPrinterPass> {
diff --git a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
index 0ac7d7c62b7a..09d08bf423a6 100644
--- a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
+++ b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
@@ -16,14 +16,11 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
-#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/IR/ValueHandle.h"
-#include <memory>
#include <utility>
namespace llvm {
@@ -95,10 +92,9 @@ class JumpThreadingPass : public PassInfoMixin<JumpThreadingPass> {
unsigned BBDupThreshold;
unsigned DefaultBBDupThreshold;
- bool InsertFreezeWhenUnfoldingSelect;
public:
- JumpThreadingPass(bool InsertFreezeWhenUnfoldingSelect = false, int T = -1);
+ JumpThreadingPass(int T = -1);
// Glue for old PM.
bool runImpl(Function &F, TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
diff --git a/llvm/include/llvm/Transforms/Scalar/LICM.h b/llvm/include/llvm/Transforms/Scalar/LICM.h
index 751f75c0ccb2..f7dd40be47e5 100644
--- a/llvm/include/llvm/Transforms/Scalar/LICM.h
+++ b/llvm/include/llvm/Transforms/Scalar/LICM.h
@@ -32,46 +32,70 @@
#ifndef LLVM_TRANSFORMS_SCALAR_LICM_H
#define LLVM_TRANSFORMS_SCALAR_LICM_H
-#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Transforms/Scalar/LoopPassManager.h"
namespace llvm {
+class LPMUpdater;
+class Loop;
+class LoopNest;
+
extern cl::opt<unsigned> SetLicmMssaOptCap;
extern cl::opt<unsigned> SetLicmMssaNoAccForPromotionCap;
+struct LICMOptions {
+ unsigned MssaOptCap;
+ unsigned MssaNoAccForPromotionCap;
+ bool AllowSpeculation;
+
+ LICMOptions()
+ : MssaOptCap(SetLicmMssaOptCap),
+ MssaNoAccForPromotionCap(SetLicmMssaNoAccForPromotionCap),
+ AllowSpeculation(true) {}
+
+ LICMOptions(unsigned MssaOptCap, unsigned MssaNoAccForPromotionCap,
+ bool AllowSpeculation)
+ : MssaOptCap(MssaOptCap),
+ MssaNoAccForPromotionCap(MssaNoAccForPromotionCap),
+ AllowSpeculation(AllowSpeculation) {}
+};
+
/// Performs Loop Invariant Code Motion Pass.
class LICMPass : public PassInfoMixin<LICMPass> {
- unsigned LicmMssaOptCap;
- unsigned LicmMssaNoAccForPromotionCap;
+ LICMOptions Opts;
public:
- LICMPass()
- : LicmMssaOptCap(SetLicmMssaOptCap),
- LicmMssaNoAccForPromotionCap(SetLicmMssaNoAccForPromotionCap) {}
- LICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap)
- : LicmMssaOptCap(LicmMssaOptCap),
- LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {}
+ LICMPass(unsigned MssaOptCap, unsigned MssaNoAccForPromotionCap,
+ bool AllowSpeculation)
+ : LICMPass(LICMOptions(MssaOptCap, MssaNoAccForPromotionCap,
+ AllowSpeculation)) {}
+ LICMPass(LICMOptions Opts) : Opts(Opts) {}
+
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR, LPMUpdater &U);
+
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
};
/// Performs LoopNest Invariant Code Motion Pass.
class LNICMPass : public PassInfoMixin<LNICMPass> {
- unsigned LicmMssaOptCap;
- unsigned LicmMssaNoAccForPromotionCap;
+ LICMOptions Opts;
public:
- LNICMPass()
- : LicmMssaOptCap(SetLicmMssaOptCap),
- LicmMssaNoAccForPromotionCap(SetLicmMssaNoAccForPromotionCap) {}
- LNICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap)
- : LicmMssaOptCap(LicmMssaOptCap),
- LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {}
+ LNICMPass(unsigned MssaOptCap, unsigned MssaNoAccForPromotionCap,
+ bool AllowSpeculation)
+ : LNICMPass(LICMOptions(MssaOptCap, MssaNoAccForPromotionCap,
+ AllowSpeculation)) {}
+ LNICMPass(LICMOptions Opts) : Opts(Opts) {}
+
PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR, LPMUpdater &U);
+
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
};
} // end namespace llvm
diff --git a/llvm/include/llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h b/llvm/include/llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h
index 3f250fc1ce8c..50a837acf4e3 100644
--- a/llvm/include/llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h
+++ b/llvm/include/llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h
@@ -8,12 +8,14 @@
#ifndef LLVM_TRANSFORMS_SCALAR_LOOPACCESSANALYSISPRINTER_H
#define LLVM_TRANSFORMS_SCALAR_LOOPACCESSANALYSISPRINTER_H
-
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar/LoopPassManager.h"
+#include "llvm/Analysis/LoopAnalysisManager.h"
+#include "llvm/IR/PassManager.h"
namespace llvm {
+class LPMUpdater;
+class Loop;
+class raw_ostream;
/// Printer pass for the \c LoopAccessInfo results.
class LoopAccessInfoPrinterPass
: public PassInfoMixin<LoopAccessInfoPrinterPass> {
diff --git a/llvm/include/llvm/Transforms/Scalar/LoopBoundSplit.h b/llvm/include/llvm/Transforms/Scalar/LoopBoundSplit.h
index 306b6fa046df..0c597bf295b2 100644
--- a/llvm/include/llvm/Transforms/Scalar/LoopBoundSplit.h
+++ b/llvm/include/llvm/Transforms/Scalar/LoopBoundSplit.h
@@ -10,11 +10,11 @@
#define LLVM_TRANSFORMS_SCALAR_LOOPBOUNDSPLIT_H
#include "llvm/Analysis/LoopAnalysisManager.h"
-#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/Transforms/Scalar/LoopPassManager.h"
namespace llvm {
+class LPMUpdater;
+class Loop;
/// This pass transforms loops that contain a conditional branch with induction
/// variable. For example, it transforms left code to right code:
diff --git a/llvm/include/llvm/Transforms/Scalar/LoopDataPrefetch.h b/llvm/include/llvm/Transforms/Scalar/LoopDataPrefetch.h
index 9ebd5984cea9..d5e15ffff075 100644
--- a/llvm/include/llvm/Transforms/Scalar/LoopDataPrefetch.h
+++ b/llvm/include/llvm/Transforms/Scalar/LoopDataPrefetch.h
@@ -13,11 +13,12 @@
#ifndef LLVM_TRANSFORMS_SCALAR_LOOPDATAPREFETCH_H
#define LLVM_TRANSFORMS_SCALAR_LOOPDATAPREFETCH_H
-#include "llvm/IR/Function.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
+class Function;
+
/// An optimization pass inserting data prefetches in loops.
class LoopDataPrefetchPass : public PassInfoMixin<LoopDataPrefetchPass> {
public:
diff --git a/llvm/include/llvm/Transforms/Scalar/LoopDeletion.h b/llvm/include/llvm/Transforms/Scalar/LoopDeletion.h
index 557616e2e6ba..459a5cd3ece4 100644
--- a/llvm/include/llvm/Transforms/Scalar/LoopDeletion.h
+++ b/llvm/include/llvm/Transforms/Scalar/LoopDeletion.h
@@ -14,13 +14,13 @@
#define LLVM_TRANSFORMS_SCALAR_LOOPDELETION_H
#include "llvm/Analysis/LoopAnalysisManager.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/Transforms/Scalar/LoopPassManager.h"
namespace llvm {
+class Loop;
+class LPMUpdater;
+
class LoopDeletionPass : public PassInfoMixin<LoopDeletionPass> {
public:
LoopDeletionPass() = default;
diff --git a/llvm/include/llvm/Transforms/Scalar/LoopFlatten.h b/llvm/include/llvm/Transforms/Scalar/LoopFlatten.h
index 3d259bdbe986..311b843e83b5 100644
--- a/llvm/include/llvm/Transforms/Scalar/LoopFlatten.h
+++ b/llvm/include/llvm/Transforms/Scalar/LoopFlatten.h
@@ -14,11 +14,11 @@
#define LLVM_TRANSFORMS_SCALAR_LOOPFLATTEN_H
#include "llvm/Analysis/LoopAnalysisManager.h"
-#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/Transforms/Scalar/LoopPassManager.h"
namespace llvm {
+class LPMUpdater;
+class LoopNest;
class LoopFlattenPass : public PassInfoMixin<LoopFlattenPass> {
public:
diff --git a/llvm/include/llvm/Transforms/Scalar/LoopInterchange.h b/llvm/include/llvm/Transforms/Scalar/LoopInterchange.h
index c67a30293d2f..8fa14d747f5c 100644
--- a/llvm/include/llvm/Transforms/Scalar/LoopInterchange.h
+++ b/llvm/include/llvm/Transforms/Scalar/LoopInterchange.h
@@ -9,11 +9,14 @@
#ifndef LLVM_TRANSFORMS_SCALAR_LOOPINTERCHANGE_H
#define LLVM_TRANSFORMS_SCALAR_LOOPINTERCHANGE_H
+#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/Transforms/Scalar/LoopPassManager.h"
namespace llvm {
+class LPMUpdater;
+class LoopNest;
+
struct LoopInterchangePass : public PassInfoMixin<LoopInterchangePass> {
PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR, LPMUpdater &U);
diff --git a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h
index e83cc2b9bef0..1df510474ca7 100644
--- a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h
+++ b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h
@@ -40,8 +40,6 @@
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopNestAnalysis.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/PassInstrumentation.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Transforms/Utils/LCSSA.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
@@ -52,6 +50,7 @@ namespace llvm {
// Forward declarations of an update tracking API used in the pass manager.
class LPMUpdater;
+class PassInstrumentation;
namespace {
diff --git a/llvm/include/llvm/Transforms/Scalar/LoopPredication.h b/llvm/include/llvm/Transforms/Scalar/LoopPredication.h
index 252daafab7a3..83f533603419 100644
--- a/llvm/include/llvm/Transforms/Scalar/LoopPredication.h
+++ b/llvm/include/llvm/Transforms/Scalar/LoopPredication.h
@@ -14,12 +14,13 @@
#ifndef LLVM_TRANSFORMS_SCALAR_LOOPPREDICATION_H
#define LLVM_TRANSFORMS_SCALAR_LOOPPREDICATION_H
-#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/Transforms/Scalar/LoopPassManager.h"
namespace llvm {
+class LPMUpdater;
+class Loop;
/// Performs Loop Predication Pass.
class LoopPredicationPass : public PassInfoMixin<LoopPredicationPass> {
public:
diff --git a/llvm/include/llvm/Transforms/Scalar/LoopRotation.h b/llvm/include/llvm/Transforms/Scalar/LoopRotation.h
index f68ac70da324..c0e6f105a412 100644
--- a/llvm/include/llvm/Transforms/Scalar/LoopRotation.h
+++ b/llvm/include/llvm/Transforms/Scalar/LoopRotation.h
@@ -13,11 +13,12 @@
#ifndef LLVM_TRANSFORMS_SCALAR_LOOPROTATION_H
#define LLVM_TRANSFORMS_SCALAR_LOOPROTATION_H
-#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/Transforms/Scalar/LoopPassManager.h"
namespace llvm {
+class LPMUpdater;
+class Loop;
/// A simple loop rotation transformation.
class LoopRotatePass : public PassInfoMixin<LoopRotatePass> {
diff --git a/llvm/include/llvm/Transforms/Scalar/LoopSimplifyCFG.h b/llvm/include/llvm/Transforms/Scalar/LoopSimplifyCFG.h
index 2d718592aef5..82c8a4406d00 100644
--- a/llvm/include/llvm/Transforms/Scalar/LoopSimplifyCFG.h
+++ b/llvm/include/llvm/Transforms/Scalar/LoopSimplifyCFG.h
@@ -16,12 +16,14 @@
#ifndef LLVM_TRANSFORMS_SCALAR_LOOPSIMPLIFYCFG_H
#define LLVM_TRANSFORMS_SCALAR_LOOPSIMPLIFYCFG_H
-#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/Transforms/Scalar/LoopPassManager.h"
namespace llvm {
+class LPMUpdater;
+class Loop;
+
/// Performs basic CFG simplifications to assist other loop passes.
class LoopSimplifyCFGPass : public PassInfoMixin<LoopSimplifyCFGPass> {
public:
diff --git a/llvm/include/llvm/Transforms/Scalar/LoopSink.h b/llvm/include/llvm/Transforms/Scalar/LoopSink.h
index 234c48cbebc5..26e50590a625 100644
--- a/llvm/include/llvm/Transforms/Scalar/LoopSink.h
+++ b/llvm/include/llvm/Transforms/Scalar/LoopSink.h
@@ -13,12 +13,12 @@
#ifndef LLVM_TRANSFORMS_SCALAR_LOOPSINK_H
#define LLVM_TRANSFORMS_SCALAR_LOOPSINK_H
-#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/Transforms/Scalar/LoopPassManager.h"
namespace llvm {
+class Function;
+
/// A pass that does profile-guided sinking of instructions into loops.
///
/// This is a function pass as it shouldn't be composed into any kind of
diff --git a/llvm/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h b/llvm/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h
index 72663d3d62a8..54f70d7ed4b3 100644
--- a/llvm/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h
+++ b/llvm/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h
@@ -9,10 +9,12 @@
#ifndef LLVM_TRANSFORMS_SCALAR_LOOPUNROLLANDJAMPASS_H
#define LLVM_TRANSFORMS_SCALAR_LOOPUNROLLANDJAMPASS_H
+#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/Transforms/Scalar/LoopPassManager.h"
namespace llvm {
+class LPMUpdater;
+class LoopNest;
/// A simple loop rotation transformation.
class LoopUnrollAndJamPass : public PassInfoMixin<LoopUnrollAndJamPass> {
diff --git a/llvm/include/llvm/Transforms/Scalar/LoopVersioningLICM.h b/llvm/include/llvm/Transforms/Scalar/LoopVersioningLICM.h
index 87d6d6759db2..04e0012330da 100644
--- a/llvm/include/llvm/Transforms/Scalar/LoopVersioningLICM.h
+++ b/llvm/include/llvm/Transforms/Scalar/LoopVersioningLICM.h
@@ -9,10 +9,12 @@
#ifndef LLVM_TRANSFORMS_SCALAR_LOOPVERSIONINGLICM_H
#define LLVM_TRANSFORMS_SCALAR_LOOPVERSIONINGLICM_H
+#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/Transforms/Scalar/LoopPassManager.h"
namespace llvm {
+class LPMUpdater;
+class Loop;
class LoopVersioningLICMPass : public PassInfoMixin<LoopVersioningLICMPass> {
public:
diff --git a/llvm/include/llvm/Transforms/Scalar/LowerAtomic.h b/llvm/include/llvm/Transforms/Scalar/LowerAtomicPass.h
index 87d945d06901..60bbf916fced 100644
--- a/llvm/include/llvm/Transforms/Scalar/LowerAtomic.h
+++ b/llvm/include/llvm/Transforms/Scalar/LowerAtomicPass.h
@@ -1,4 +1,4 @@
-//===- LowerAtomic.cpp - Lower atomic intrinsics ----------------*- C++ -*-===//
+//===- LowerAtomicPass.h - Lower atomic intrinsics --------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -11,8 +11,8 @@
///
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TRANSFORMS_SCALAR_LOWERATOMIC_H
-#define LLVM_TRANSFORMS_SCALAR_LOWERATOMIC_H
+#ifndef LLVM_TRANSFORMS_SCALAR_LOWERATOMICPASS_H
+#define LLVM_TRANSFORMS_SCALAR_LOWERATOMICPASS_H
#include "llvm/IR/PassManager.h"
@@ -25,11 +25,6 @@ public:
static bool isRequired() { return true; }
};
-class AtomicRMWInst;
-/// Convert the given RMWI into primitive load and stores,
-/// assuming that doing so is legal. Return true if the lowering
-/// succeeds.
-bool lowerAtomicRMWInst(AtomicRMWInst *RMWI);
}
-#endif // LLVM_TRANSFORMS_SCALAR_LOWERATOMIC_H
+#endif // LLVM_TRANSFORMS_SCALAR_LOWERATOMICPASS_H
diff --git a/llvm/include/llvm/Transforms/Scalar/LowerConstantIntrinsics.h b/llvm/include/llvm/Transforms/Scalar/LowerConstantIntrinsics.h
index 61c7bf0454e1..e8e404bb93d6 100644
--- a/llvm/include/llvm/Transforms/Scalar/LowerConstantIntrinsics.h
+++ b/llvm/include/llvm/Transforms/Scalar/LowerConstantIntrinsics.h
@@ -15,11 +15,12 @@
#ifndef LLVM_TRANSFORMS_SCALAR_LOWERCONSTANTINTRINSICS_H
#define LLVM_TRANSFORMS_SCALAR_LOWERCONSTANTINTRINSICS_H
-#include "llvm/IR/Function.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
+class Function;
+
struct LowerConstantIntrinsicsPass :
PassInfoMixin<LowerConstantIntrinsicsPass> {
public:
diff --git a/llvm/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h b/llvm/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h
index 4e47ff70d557..95ef0f73e8af 100644
--- a/llvm/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h
+++ b/llvm/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h
@@ -15,11 +15,12 @@
#ifndef LLVM_TRANSFORMS_SCALAR_LOWEREXPECTINTRINSIC_H
#define LLVM_TRANSFORMS_SCALAR_LOWEREXPECTINTRINSIC_H
-#include "llvm/IR/Function.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
+class Function;
+
struct LowerExpectIntrinsicPass : PassInfoMixin<LowerExpectIntrinsicPass> {
/// Run the pass over the function.
///
diff --git a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
index 3a4db13d670a..8103b0a92489 100644
--- a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
+++ b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
@@ -16,8 +16,6 @@
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/PassManager.h"
-#include <cstdint>
-#include <functional>
namespace llvm {
@@ -63,7 +61,7 @@ private:
bool processMemMove(MemMoveInst *M);
bool performCallSlotOptzn(Instruction *cpyLoad, Instruction *cpyStore,
Value *cpyDst, Value *cpySrc, TypeSize cpyLen,
- Align cpyAlign, CallInst *C);
+ Align cpyAlign, std::function<CallInst *()> GetC);
bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep);
bool processMemSetMemCpyDependence(MemCpyInst *MemCpy, MemSetInst *MemSet);
bool performMemCpyToMemSetOptzn(MemCpyInst *MemCpy, MemSetInst *MemSet);
diff --git a/llvm/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h b/llvm/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h
index 256d03675a07..71e11e59a471 100644
--- a/llvm/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h
+++ b/llvm/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h
@@ -23,10 +23,11 @@
#ifndef LLVM_TRANSFORMS_SCALAR_MERGEDLOADSTOREMOTION_H
#define LLVM_TRANSFORMS_SCALAR_MERGEDLOADSTOREMOTION_H
-#include "llvm/IR/Module.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
+class Function;
struct MergedLoadStoreMotionOptions {
bool SplitFooterBB;
MergedLoadStoreMotionOptions(bool SplitFooterBB = false)
diff --git a/llvm/include/llvm/Transforms/Scalar/PartiallyInlineLibCalls.h b/llvm/include/llvm/Transforms/Scalar/PartiallyInlineLibCalls.h
index fd5a06c5051d..b8a8fcc71e57 100644
--- a/llvm/include/llvm/Transforms/Scalar/PartiallyInlineLibCalls.h
+++ b/llvm/include/llvm/Transforms/Scalar/PartiallyInlineLibCalls.h
@@ -15,10 +15,10 @@
#ifndef LLVM_TRANSFORMS_SCALAR_PARTIALLYINLINELIBCALLS_H
#define LLVM_TRANSFORMS_SCALAR_PARTIALLYINLINELIBCALLS_H
-#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
+class Function;
class PartiallyInlineLibCallsPass
: public PassInfoMixin<PartiallyInlineLibCallsPass> {
public:
diff --git a/llvm/include/llvm/Transforms/Scalar/SCCP.h b/llvm/include/llvm/Transforms/Scalar/SCCP.h
index cd4100447880..032a9b15fc46 100644
--- a/llvm/include/llvm/Transforms/Scalar/SCCP.h
+++ b/llvm/include/llvm/Transforms/Scalar/SCCP.h
@@ -20,17 +20,19 @@
#ifndef LLVM_TRANSFORMS_SCALAR_SCCP_H
#define LLVM_TRANSFORMS_SCALAR_SCCP_H
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Module.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/Transforms/Utils/PredicateInfo.h"
-#include "llvm/Transforms/Utils/SCCPSolver.h"
+
+#include <functional>
namespace llvm {
+class AssumptionCache;
+class DataLayout;
+class Function;
+class Module;
+class TargetLibraryInfo;
+class TargetTransformInfo;
+struct AnalysisResultsForFn;
/// This pass performs function-level constant propagation and merging.
class SCCPPass : public PassInfoMixin<SCCPPass> {
diff --git a/llvm/include/llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h b/llvm/include/llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h
index e4002159edbd..5e876fc82ac1 100644
--- a/llvm/include/llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h
+++ b/llvm/include/llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h
@@ -1,5 +1,5 @@
//===- ScalarizeMaskedMemIntrin.h - Scalarize unsupported masked mem ----===//
-// instrinsics
+// intrinsics
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/include/llvm/Transforms/Scalar/Scalarizer.h b/llvm/include/llvm/Transforms/Scalar/Scalarizer.h
index f4472e699295..5cc67f78e5a2 100644
--- a/llvm/include/llvm/Transforms/Scalar/Scalarizer.h
+++ b/llvm/include/llvm/Transforms/Scalar/Scalarizer.h
@@ -17,14 +17,33 @@
#ifndef LLVM_TRANSFORMS_SCALAR_SCALARIZER_H
#define LLVM_TRANSFORMS_SCALAR_SCALARIZER_H
+#include "llvm/ADT/Optional.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/Pass.h"
namespace llvm {
+class Function;
+class FunctionPass;
+
+struct ScalarizerPassOptions {
+ // These optional booleans correspond 1:1 to cl::opt<bool> options defined in
+ // Scalarizer.cpp. When the cl::opt are specified, they take precedence.
+ // When the cl::opt are not specified, the present optional booleans allow to
+ // override the cl::opt's default values.
+ llvm::Optional<bool> ScalarizeVariableInsertExtract;
+ llvm::Optional<bool> ScalarizeLoadStore;
+};
+
class ScalarizerPass : public PassInfoMixin<ScalarizerPass> {
+ ScalarizerPassOptions Options;
+
public:
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
+ void setScalarizeVariableInsertExtract(bool Value) {
+ Options.ScalarizeVariableInsertExtract = Value;
+ }
+ void setScalarizeLoadStore(bool Value) { Options.ScalarizeLoadStore = Value; }
};
/// Create a legacy pass manager instance of the Scalarizer pass
diff --git a/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h b/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h
index dfb1619c7f2a..68c121560b13 100644
--- a/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h
+++ b/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h
@@ -9,13 +9,18 @@
#ifndef LLVM_TRANSFORMS_SCALAR_SIMPLELOOPUNSWITCH_H
#define LLVM_TRANSFORMS_SCALAR_SIMPLELOOPUNSWITCH_H
+#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
-#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/Transforms/Scalar/LoopPassManager.h"
namespace llvm {
+class LPMUpdater;
+class Loop;
+class Pass;
+class StringRef;
+class raw_ostream;
+
/// This pass transforms loops that contain branches or switches on loop-
/// invariant conditions to have multiple loops. For example, it turns the left
/// into the right code:
diff --git a/llvm/include/llvm/Transforms/Scalar/Sink.h b/llvm/include/llvm/Transforms/Scalar/Sink.h
index 6cbe964d1580..759153f22853 100644
--- a/llvm/include/llvm/Transforms/Scalar/Sink.h
+++ b/llvm/include/llvm/Transforms/Scalar/Sink.h
@@ -14,11 +14,12 @@
#ifndef LLVM_TRANSFORMS_SCALAR_SINK_H
#define LLVM_TRANSFORMS_SCALAR_SINK_H
-#include "llvm/IR/Function.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
+class Function;
+
/// Move instructions into successor blocks when possible.
class SinkingPass : public PassInfoMixin<SinkingPass> {
public:
diff --git a/llvm/include/llvm/Transforms/Scalar/SpeculativeExecution.h b/llvm/include/llvm/Transforms/Scalar/SpeculativeExecution.h
index 41de544e7c9c..0ec2a395f875 100644
--- a/llvm/include/llvm/Transforms/Scalar/SpeculativeExecution.h
+++ b/llvm/include/llvm/Transforms/Scalar/SpeculativeExecution.h
@@ -62,10 +62,10 @@
#ifndef LLVM_TRANSFORMS_SCALAR_SPECULATIVEEXECUTION_H
#define LLVM_TRANSFORMS_SCALAR_SPECULATIVEEXECUTION_H
-#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
+class TargetTransformInfo;
class SpeculativeExecutionPass
: public PassInfoMixin<SpeculativeExecutionPass> {
public:
diff --git a/llvm/include/llvm/Transforms/Scalar/TLSVariableHoist.h b/llvm/include/llvm/Transforms/Scalar/TLSVariableHoist.h
new file mode 100644
index 000000000000..2a1b02b40eeb
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Scalar/TLSVariableHoist.h
@@ -0,0 +1,131 @@
+//==- TLSVariableHoist.h ------ Remove Redundant TLS Loads -------*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass identifies/eliminates Redundant TLS Loads if related option is set.
+// For example:
+// static __thread int x;
+// int g();
+// int f(int c) {
+// int *px = &x;
+// while (c--)
+// *px += g();
+// return *px;
+// }
+//
+// will generate Redundant TLS Loads by compiling it with
+// clang++ -fPIC -ftls-model=global-dynamic -O2 -S
+//
+// .LBB0_2: # %while.body
+// # =>This Inner Loop Header: Depth=1
+// callq _Z1gv@PLT
+// movl %eax, %ebp
+// leaq _ZL1x@TLSLD(%rip), %rdi
+// callq __tls_get_addr@PLT
+// addl _ZL1x@DTPOFF(%rax), %ebp
+// movl %ebp, _ZL1x@DTPOFF(%rax)
+// addl $-1, %ebx
+// jne .LBB0_2
+// jmp .LBB0_3
+// .LBB0_4: # %entry.while.end_crit_edge
+// leaq _ZL1x@TLSLD(%rip), %rdi
+// callq __tls_get_addr@PLT
+// movl _ZL1x@DTPOFF(%rax), %ebp
+//
+// The Redundant TLS Loads will hurt the performance, especially in loops.
+// So we try to eliminate/move them if required by customers, let it be:
+//
+// # %bb.0: # %entry
+// ...
+// movl %edi, %ebx
+// leaq _ZL1x@TLSLD(%rip), %rdi
+// callq __tls_get_addr@PLT
+// leaq _ZL1x@DTPOFF(%rax), %r14
+// testl %ebx, %ebx
+// je .LBB0_1
+// .LBB0_2: # %while.body
+// # =>This Inner Loop Header: Depth=1
+// callq _Z1gv@PLT
+// addl (%r14), %eax
+// movl %eax, (%r14)
+// addl $-1, %ebx
+// jne .LBB0_2
+// jmp .LBB0_3
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_TLSVARIABLEHOIST_H
+#define LLVM_TRANSFORMS_SCALAR_TLSVARIABLEHOIST_H
+
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class BasicBlock;
+class DominatorTree;
+class Function;
+class GlobalVariable;
+class Instruction;
+
+/// A private "module" namespace for types and utilities used by
+/// TLSVariableHoist. These are implementation details and should
+/// not be used by clients.
+namespace tlshoist {
+
+/// Keeps track of the user of a TLS variable and the operand index
+/// where the variable is used.
+struct TLSUser {
+ Instruction *Inst;
+ unsigned OpndIdx;
+
+ TLSUser(Instruction *Inst, unsigned Idx) : Inst(Inst), OpndIdx(Idx) {}
+};
+
+/// Keeps track of a TLS variable candidate and its users.
+struct TLSCandidate {
+ SmallVector<TLSUser, 8> Users;
+
+ /// Add the user to the use list and update the cost.
+ void addUser(Instruction *Inst, unsigned Idx) {
+ Users.push_back(TLSUser(Inst, Idx));
+ }
+};
+
+} // end namespace tlshoist
+
+class TLSVariableHoistPass : public PassInfoMixin<TLSVariableHoistPass> {
+public:
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
+ // Glue for old PM.
+ bool runImpl(Function &F, DominatorTree &DT, LoopInfo &LI);
+
+private:
+ DominatorTree *DT;
+ LoopInfo *LI;
+
+ /// Keeps track of TLS variable candidates found in the function.
+ using TLSCandMapType = MapVector<GlobalVariable *, tlshoist::TLSCandidate>;
+ TLSCandMapType TLSCandMap;
+
+ void collectTLSCandidates(Function &Fn);
+ void collectTLSCandidate(Instruction *Inst);
+ Instruction *getNearestLoopDomInst(BasicBlock *BB, Loop *L);
+ Instruction *getDomInst(Instruction *I1, Instruction *I2);
+ BasicBlock::iterator findInsertPos(Function &Fn, GlobalVariable *GV,
+ BasicBlock *&PosBB);
+ Instruction *genBitCastInst(Function &Fn, GlobalVariable *GV);
+ bool tryReplaceTLSCandidates(Function &Fn);
+ bool tryReplaceTLSCandidate(Function &Fn, GlobalVariable *GV);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_TLSVARIABLEHOIST_H
diff --git a/llvm/include/llvm/Transforms/Scalar/TailRecursionElimination.h b/llvm/include/llvm/Transforms/Scalar/TailRecursionElimination.h
index 906867644504..57b1ed9bf4fe 100644
--- a/llvm/include/llvm/Transforms/Scalar/TailRecursionElimination.h
+++ b/llvm/include/llvm/Transforms/Scalar/TailRecursionElimination.h
@@ -52,11 +52,12 @@
#ifndef LLVM_TRANSFORMS_SCALAR_TAILRECURSIONELIMINATION_H
#define LLVM_TRANSFORMS_SCALAR_TAILRECURSIONELIMINATION_H
-#include "llvm/IR/Function.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
+class Function;
+
struct TailCallElimPass : PassInfoMixin<TailCallElimPass> {
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
diff --git a/llvm/include/llvm/Transforms/Scalar/WarnMissedTransforms.h b/llvm/include/llvm/Transforms/Scalar/WarnMissedTransforms.h
index 64691d68b1c4..80d098a1ea52 100644
--- a/llvm/include/llvm/Transforms/Scalar/WarnMissedTransforms.h
+++ b/llvm/include/llvm/Transforms/Scalar/WarnMissedTransforms.h
@@ -14,10 +14,11 @@
#define LLVM_TRANSFORMS_SCALAR_WARNMISSEDTRANSFORMS_H
#include "llvm/IR/PassManager.h"
-#include "llvm/Pass.h"
namespace llvm {
class Function;
+class Pass;
+class PassRegistry;
// New pass manager boilerplate.
class WarnMissedTransformationsPass
diff --git a/llvm/include/llvm/Transforms/Utils.h b/llvm/include/llvm/Transforms/Utils.h
index 1e9c0a040ad2..ebd4bd318573 100644
--- a/llvm/include/llvm/Transforms/Utils.h
+++ b/llvm/include/llvm/Transforms/Utils.h
@@ -155,6 +155,12 @@ FunctionPass *createAssumeSimplifyPass();
// don't block SCEV.
//
Pass *createCanonicalizeFreezeInLoopsPass();
+
+//===----------------------------------------------------------------------===//
+// LowerGlobalDtorsLegacy - Lower @llvm.global_dtors by creating wrapper
+// functions that are registered in @llvm.global_ctors and which contain a call
+// to `__cxa_atexit` to register their destructor functions.
+ModulePass *createLowerGlobalDtorsLegacyPass();
} // namespace llvm
#endif
diff --git a/llvm/include/llvm/Transforms/Utils/AssumeBundleBuilder.h b/llvm/include/llvm/Transforms/Utils/AssumeBundleBuilder.h
index d679bca69510..991ecb8efbd0 100644
--- a/llvm/include/llvm/Transforms/Utils/AssumeBundleBuilder.h
+++ b/llvm/include/llvm/Transforms/Utils/AssumeBundleBuilder.h
@@ -17,12 +17,13 @@
#define LLVM_TRANSFORMS_UTILS_ASSUMEBUNDLEBUILDER_H
#include "llvm/Analysis/AssumeBundleQueries.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/Instruction.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/Pass.h"
namespace llvm {
+class AssumeInst;
+class Function;
+class FunctionPass;
+class Instruction;
class AssumptionCache;
class DominatorTree;
diff --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
index d99b2a56559d..fcdd2aa0e060 100644
--- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -18,21 +18,20 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/Analysis/DomTreeUpdater.h"
-#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CFG.h"
-#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Dominators.h"
#include <cassert>
namespace llvm {
-
+class BranchInst;
+class LandingPadInst;
+class Loop;
+class PHINode;
+template <typename PtrType> class SmallPtrSetImpl;
class BlockFrequencyInfo;
class BranchProbabilityInfo;
-class DominatorTree;
class DomTreeUpdater;
class Function;
-class Instruction;
class LoopInfo;
class MDNode;
class MemoryDependenceResults;
@@ -500,7 +499,9 @@ BranchInst *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
// create the following structure:
// A -> D0A, B -> D0A, I -> D0B, D0A -> D1, D0B -> D1
// If BPI and BFI aren't non-null, BPI/BFI will be updated accordingly.
-bool SplitIndirectBrCriticalEdges(Function &F,
+// When `IgnoreBlocksWithoutPHI` is set to `true` critical edges leading to a
+// block without phi-instructions will not be split.
+bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI,
BranchProbabilityInfo *BPI = nullptr,
BlockFrequencyInfo *BFI = nullptr);
diff --git a/llvm/include/llvm/Transforms/Utils/BreakCriticalEdges.h b/llvm/include/llvm/Transforms/Utils/BreakCriticalEdges.h
index 3644f1ed7a13..6de080ce3128 100644
--- a/llvm/include/llvm/Transforms/Utils/BreakCriticalEdges.h
+++ b/llvm/include/llvm/Transforms/Utils/BreakCriticalEdges.h
@@ -17,10 +17,11 @@
#ifndef LLVM_TRANSFORMS_UTILS_BREAKCRITICALEDGES_H
#define LLVM_TRANSFORMS_UTILS_BREAKCRITICALEDGES_H
-#include "llvm/IR/Function.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
+
+class Function;
struct BreakCriticalEdgesPass : public PassInfoMixin<BreakCriticalEdgesPass> {
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
diff --git a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
index 87d33b9b11b7..6ea195ce31ac 100644
--- a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
+++ b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
@@ -22,23 +22,63 @@ namespace llvm {
class IRBuilderBase;
/// Analyze the name and prototype of the given function and set any
- /// applicable attributes.
+ /// applicable attributes. Note that this merely helps optimizations on an
+ /// already existing function but does not consider mandatory attributes.
+ ///
/// If the library function is unavailable, this doesn't modify it.
///
/// Returns true if any attributes were set and false otherwise.
- bool inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI);
- bool inferLibFuncAttributes(Module *M, StringRef Name, const TargetLibraryInfo &TLI);
+ bool inferNonMandatoryLibFuncAttrs(Module *M, StringRef Name,
+ const TargetLibraryInfo &TLI);
+ bool inferNonMandatoryLibFuncAttrs(Function &F, const TargetLibraryInfo &TLI);
+
+ /// Calls getOrInsertFunction() and then makes sure to add mandatory
+ /// argument attributes.
+ FunctionCallee getOrInsertLibFunc(Module *M, const TargetLibraryInfo &TLI,
+ LibFunc TheLibFunc, FunctionType *T,
+ AttributeList AttributeList);
+ FunctionCallee getOrInsertLibFunc(Module *M, const TargetLibraryInfo &TLI,
+ LibFunc TheLibFunc, FunctionType *T);
+ template <typename... ArgsTy>
+ FunctionCallee getOrInsertLibFunc(Module *M, const TargetLibraryInfo &TLI,
+ LibFunc TheLibFunc, AttributeList AttributeList,
+ Type *RetTy, ArgsTy... Args) {
+ SmallVector<Type*, sizeof...(ArgsTy)> ArgTys{Args...};
+ return getOrInsertLibFunc(M, TLI, TheLibFunc,
+ FunctionType::get(RetTy, ArgTys, false),
+ AttributeList);
+ }
+ /// Same as above, but without the attributes.
+ template <typename... ArgsTy>
+ FunctionCallee getOrInsertLibFunc(Module *M, const TargetLibraryInfo &TLI,
+ LibFunc TheLibFunc, Type *RetTy, ArgsTy... Args) {
+ return getOrInsertLibFunc(M, TLI, TheLibFunc, AttributeList{}, RetTy,
+ Args...);
+ }
+ // Avoid an incorrect ordering that'd otherwise compile incorrectly.
+ template <typename... ArgsTy>
+ FunctionCallee
+ getOrInsertLibFunc(Module *M, const TargetLibraryInfo &TLI,
+ LibFunc TheLibFunc, AttributeList AttributeList,
+ FunctionType *Invalid, ArgsTy... Args) = delete;
+
+ /// Check whether the library function is available on target and also that
+ /// it in the current Module is a Function with the right type.
+ bool isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI,
+ LibFunc TheLibFunc);
+ bool isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI,
+ StringRef Name);
/// Check whether the overloaded floating point function
/// corresponding to \a Ty is available.
- bool hasFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
+ bool hasFloatFn(const Module *M, const TargetLibraryInfo *TLI, Type *Ty,
LibFunc DoubleFn, LibFunc FloatFn, LibFunc LongDoubleFn);
/// Get the name of the overloaded floating point function
- /// corresponding to \a Ty.
- StringRef getFloatFnName(const TargetLibraryInfo *TLI, Type *Ty,
- LibFunc DoubleFn, LibFunc FloatFn,
- LibFunc LongDoubleFn);
+ /// corresponding to \a Ty. Return the LibFunc in \a TheLibFunc.
+ StringRef getFloatFn(const Module *M, const TargetLibraryInfo *TLI, Type *Ty,
+ LibFunc DoubleFn, LibFunc FloatFn, LibFunc LongDoubleFn,
+ LibFunc &TheLibFunc);
/// Return V if it is an i8*, otherwise cast it to i8*.
Value *castToCStr(Value *V, IRBuilderBase &B);
@@ -99,6 +139,10 @@ namespace llvm {
Value *emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilderBase &B,
const DataLayout &DL, const TargetLibraryInfo *TLI);
+ /// Emit a call to the memrchr function, analogously to emitMemChr.
+ Value *emitMemRChr(Value *Ptr, Value *Val, Value *Len, IRBuilderBase &B,
+ const DataLayout &DL, const TargetLibraryInfo *TLI);
+
/// Emit a call to the memcmp function.
Value *emitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilderBase &B,
const DataLayout &DL, const TargetLibraryInfo *TLI);
@@ -148,7 +192,8 @@ namespace llvm {
/// function is known to take a single of type matching 'Op' and returns one
/// value with the same type. If 'Op' is a long double, 'l' is added as the
/// suffix of name, if 'Op' is a float, we add a 'f' suffix.
- Value *emitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilderBase &B,
+ Value *emitUnaryFloatFnCall(Value *Op, const TargetLibraryInfo *TLI,
+ StringRef Name, IRBuilderBase &B,
const AttributeList &Attrs);
/// Emit a call to the unary function DoubleFn, FloatFn or LongDoubleFn,
@@ -162,8 +207,10 @@ namespace llvm {
/// function is known to take type matching 'Op1' and 'Op2' and return one
/// value with the same type. If 'Op1/Op2' are long double, 'l' is added as
/// the suffix of name, if 'Op1/Op2' are float, we add a 'f' suffix.
- Value *emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name,
- IRBuilderBase &B, const AttributeList &Attrs);
+ Value *emitBinaryFloatFnCall(Value *Op1, Value *Op2,
+ const TargetLibraryInfo *TLI,
+ StringRef Name, IRBuilderBase &B,
+ const AttributeList &Attrs);
/// Emit a call to the binary function DoubleFn, FloatFn or LongDoubleFn,
/// depending of the type of Op1.
diff --git a/llvm/include/llvm/Transforms/Utils/CallGraphUpdater.h b/llvm/include/llvm/Transforms/Utils/CallGraphUpdater.h
index e12d7e09aad6..7e6683fd0c8a 100644
--- a/llvm/include/llvm/Transforms/Utils/CallGraphUpdater.h
+++ b/llvm/include/llvm/Transforms/Utils/CallGraphUpdater.h
@@ -16,12 +16,13 @@
#define LLVM_TRANSFORMS_UTILS_CALLGRAPHUPDATER_H
#include "llvm/Analysis/CGSCCPassManager.h"
-#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/LazyCallGraph.h"
namespace llvm {
+class CallGraph;
+class CallGraphSCC;
+
/// Wrapper to unify "old style" CallGraph and "new style" LazyCallGraph. This
/// simplifies the interface and the call sites, e.g., new and old pass manager
/// passes can share the same code.
diff --git a/llvm/include/llvm/Transforms/Utils/CallPromotionUtils.h b/llvm/include/llvm/Transforms/Utils/CallPromotionUtils.h
index daa88981d3bf..fcb384ec3613 100644
--- a/llvm/include/llvm/Transforms/Utils/CallPromotionUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/CallPromotionUtils.h
@@ -19,6 +19,7 @@ class CallBase;
class CastInst;
class Function;
class MDNode;
+class Value;
/// Return true if the given indirect call site can be made to call \p Callee.
///
@@ -73,6 +74,15 @@ CallBase &promoteCallWithIfThenElse(CallBase &CB, Function *Callee,
///
bool tryPromoteCall(CallBase &CB);
+/// Predicate and clone the given call site.
+///
+/// This function creates an if-then-else structure at the location of the call
+/// site. The "if" condition compares the call site's called value to the given
+/// callee. The original call site is moved into the "else" block, and a clone
+/// of the call site is placed in the "then" block. The cloned instruction is
+/// returned.
+CallBase &versionCallSite(CallBase &CB, Value *Callee, MDNode *BranchWeights);
+
} // end namespace llvm
#endif // LLVM_TRANSFORMS_UTILS_CALLPROMOTIONUTILS_H
diff --git a/llvm/include/llvm/Transforms/Utils/CanonicalizeAliases.h b/llvm/include/llvm/Transforms/Utils/CanonicalizeAliases.h
index fdb390db3aff..0bdc1a12d1fb 100644
--- a/llvm/include/llvm/Transforms/Utils/CanonicalizeAliases.h
+++ b/llvm/include/llvm/Transforms/Utils/CanonicalizeAliases.h
@@ -13,11 +13,12 @@
#ifndef LLVM_TRANSFORMS_UTILS_CANONICALIZEALIASES_H
#define LLVM_TRANSFORMS_UTILS_CANONICALIZEALIASES_H
-#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
+class Module;
+
/// Simple pass that canonicalizes aliases.
class CanonicalizeAliasesPass : public PassInfoMixin<CanonicalizeAliasesPass> {
public:
diff --git a/llvm/include/llvm/Transforms/Utils/CanonicalizeFreezeInLoops.h b/llvm/include/llvm/Transforms/Utils/CanonicalizeFreezeInLoops.h
index 9de032935f88..924b6cdf7ca0 100644
--- a/llvm/include/llvm/Transforms/Utils/CanonicalizeFreezeInLoops.h
+++ b/llvm/include/llvm/Transforms/Utils/CanonicalizeFreezeInLoops.h
@@ -14,10 +14,10 @@
#define LLVM_TRANSFORMS_UTILS_CANONICALIZEFREEZEINLOOPS_H
#include "llvm/Analysis/LoopAnalysisManager.h"
-#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
+class Loop;
class LPMUpdater;
/// A pass that canonicalizes freeze instructions in a loop.
diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
index 8aed3d0e40d9..bb23cf4a9a3c 100644
--- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
+++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
@@ -17,11 +17,11 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include <limits>
namespace llvm {
+template <typename PtrType> class SmallPtrSetImpl;
class AllocaInst;
class BasicBlock;
class BlockFrequency;
@@ -92,6 +92,11 @@ public:
BranchProbabilityInfo *BPI;
AssumptionCache *AC;
+ // A block outside of the extraction set where any intermediate
+ // allocations will be placed inside. If this is null, allocations
+ // will be placed in the entry block of the function.
+ BasicBlock *AllocationBlock;
+
// If true, varargs functions can be extracted.
bool AllowVarArgs;
@@ -120,11 +125,15 @@ public:
/// code is extracted, including vastart. If AllowAlloca is true, then
/// extraction of blocks containing alloca instructions would be possible,
/// however code extractor won't validate whether extraction is legal.
+ /// Any new allocations will be placed in the AllocationBlock, unless
+ /// it is null, in which case it will be placed in the entry block of
+ /// the function from which the code is being extracted.
CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT = nullptr,
bool AggregateArgs = false, BlockFrequencyInfo *BFI = nullptr,
BranchProbabilityInfo *BPI = nullptr,
- AssumptionCache *AC = nullptr,
- bool AllowVarArgs = false, bool AllowAlloca = false,
+ AssumptionCache *AC = nullptr, bool AllowVarArgs = false,
+ bool AllowAlloca = false,
+ BasicBlock *AllocationBlock = nullptr,
std::string Suffix = "");
/// Create a code extractor for a loop body.
diff --git a/llvm/include/llvm/Transforms/Utils/CtorUtils.h b/llvm/include/llvm/Transforms/Utils/CtorUtils.h
index 3ef3ba244b43..40b290a5a6f4 100644
--- a/llvm/include/llvm/Transforms/Utils/CtorUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/CtorUtils.h
@@ -13,7 +13,7 @@
#ifndef LLVM_TRANSFORMS_UTILS_CTORUTILS_H
#define LLVM_TRANSFORMS_UTILS_CTORUTILS_H
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
namespace llvm {
@@ -22,9 +22,9 @@ class Module;
/// Call "ShouldRemove" for every entry in M's global_ctor list and remove the
/// entries for which it returns true. Return true if anything changed.
-bool optimizeGlobalCtorsList(Module &M,
- function_ref<bool(Function *)> ShouldRemove);
+bool optimizeGlobalCtorsList(
+ Module &M, function_ref<bool(uint32_t, Function *)> ShouldRemove);
-} // End llvm namespace
+} // namespace llvm
#endif
diff --git a/llvm/include/llvm/Transforms/Utils/Debugify.h b/llvm/include/llvm/Transforms/Utils/Debugify.h
index 892e354cd9ed..405bbb8e0be8 100644
--- a/llvm/include/llvm/Transforms/Utils/Debugify.h
+++ b/llvm/include/llvm/Transforms/Utils/Debugify.h
@@ -23,7 +23,8 @@
#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
-using DebugFnMap = llvm::MapVector<llvm::StringRef, const llvm::DISubprogram *>;
+using DebugFnMap =
+ llvm::MapVector<const llvm::Function *, const llvm::DISubprogram *>;
using DebugInstMap = llvm::MapVector<const llvm::Instruction *, bool>;
using DebugVarMap = llvm::MapVector<const llvm::DILocalVariable *, unsigned>;
using WeakInstValueMap =
@@ -42,9 +43,6 @@ struct DebugInfoPerPass {
DebugVarMap DIVariables;
};
-/// Map pass names to a per-pass DebugInfoPerPass instance.
-using DebugInfoPerPassMap = llvm::MapVector<llvm::StringRef, DebugInfoPerPass>;
-
namespace llvm {
class DIBuilder;
@@ -69,24 +67,24 @@ bool stripDebugifyMetadata(Module &M);
///
/// \param M The module to collect debug information from.
/// \param Functions A range of functions to collect debug information from.
-/// \param DIPreservationMap A map to collect the DI metadata.
+/// \param DebugInfoBeforePass DI metadata before a pass.
/// \param Banner A prefix string to add to debug/error messages.
/// \param NameOfWrappedPass A name of a pass to add to debug/error messages.
bool collectDebugInfoMetadata(Module &M,
iterator_range<Module::iterator> Functions,
- DebugInfoPerPassMap &DIPreservationMap,
+ DebugInfoPerPass &DebugInfoBeforePass,
StringRef Banner, StringRef NameOfWrappedPass);
/// Check original debug information after a pass.
///
/// \param M The module to collect debug information from.
/// \param Functions A range of functions to collect debug information from.
-/// \param DIPreservationMap A map used to check collected the DI metadata.
+/// \param DebugInfoBeforePass DI metadata before a pass.
/// \param Banner A prefix string to add to debug/error messages.
/// \param NameOfWrappedPass A name of a pass to add to debug/error messages.
bool checkDebugInfoMetadata(Module &M,
iterator_range<Module::iterator> Functions,
- DebugInfoPerPassMap &DIPreservationMap,
+ DebugInfoPerPass &DebugInfoBeforePass,
StringRef Banner, StringRef NameOfWrappedPass,
StringRef OrigDIVerifyBugsReportFilePath);
} // namespace llvm
@@ -97,11 +95,11 @@ enum class DebugifyMode { NoDebugify, SyntheticDebugInfo, OriginalDebugInfo };
llvm::ModulePass *createDebugifyModulePass(
enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo,
llvm::StringRef NameOfWrappedPass = "",
- DebugInfoPerPassMap *DIPreservationMap = nullptr);
+ DebugInfoPerPass *DebugInfoBeforePass = nullptr);
llvm::FunctionPass *createDebugifyFunctionPass(
enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo,
llvm::StringRef NameOfWrappedPass = "",
- DebugInfoPerPassMap *DIPreservationMap = nullptr);
+ DebugInfoPerPass *DebugInfoBeforePass = nullptr);
struct NewPMDebugifyPass : public llvm::PassInfoMixin<NewPMDebugifyPass> {
llvm::PreservedAnalyses run(llvm::Module &M, llvm::ModuleAnalysisManager &AM);
@@ -140,14 +138,14 @@ llvm::ModulePass *createCheckDebugifyModulePass(
bool Strip = false, llvm::StringRef NameOfWrappedPass = "",
DebugifyStatsMap *StatsMap = nullptr,
enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo,
- DebugInfoPerPassMap *DIPreservationMap = nullptr,
+ DebugInfoPerPass *DebugInfoBeforePass = nullptr,
llvm::StringRef OrigDIVerifyBugsReportFilePath = "");
llvm::FunctionPass *createCheckDebugifyFunctionPass(
bool Strip = false, llvm::StringRef NameOfWrappedPass = "",
DebugifyStatsMap *StatsMap = nullptr,
enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo,
- DebugInfoPerPassMap *DIPreservationMap = nullptr,
+ DebugInfoPerPass *DebugInfoBeforePass = nullptr,
llvm::StringRef OrigDIVerifyBugsReportFilePath = "");
struct NewPMCheckDebugifyPass
@@ -171,7 +169,7 @@ struct DebugifyEachInstrumentation {
class DebugifyCustomPassManager : public legacy::PassManager {
StringRef OrigDIVerifyBugsReportFilePath;
DebugifyStatsMap *DIStatsMap = nullptr;
- DebugInfoPerPassMap *DIPreservationMap = nullptr;
+ DebugInfoPerPass *DebugInfoBeforePass = nullptr;
enum DebugifyMode Mode = DebugifyMode::NoDebugify;
public:
@@ -197,17 +195,17 @@ public:
// TODO: Implement Debugify for LoopPass.
switch (Kind) {
case PT_Function:
- super::add(createDebugifyFunctionPass(Mode, Name, DIPreservationMap));
+ super::add(createDebugifyFunctionPass(Mode, Name, DebugInfoBeforePass));
super::add(P);
super::add(createCheckDebugifyFunctionPass(
- isSyntheticDebugInfo(), Name, DIStatsMap, Mode, DIPreservationMap,
+ isSyntheticDebugInfo(), Name, DIStatsMap, Mode, DebugInfoBeforePass,
OrigDIVerifyBugsReportFilePath));
break;
case PT_Module:
- super::add(createDebugifyModulePass(Mode, Name, DIPreservationMap));
+ super::add(createDebugifyModulePass(Mode, Name, DebugInfoBeforePass));
super::add(P);
super::add(createCheckDebugifyModulePass(
- isSyntheticDebugInfo(), Name, DIStatsMap, Mode, DIPreservationMap,
+ isSyntheticDebugInfo(), Name, DIStatsMap, Mode, DebugInfoBeforePass,
OrigDIVerifyBugsReportFilePath));
break;
default:
@@ -219,8 +217,8 @@ public:
// Used within DebugifyMode::SyntheticDebugInfo mode.
void setDIStatsMap(DebugifyStatsMap &StatMap) { DIStatsMap = &StatMap; }
// Used within DebugifyMode::OriginalDebugInfo mode.
- void setDIPreservationMap(DebugInfoPerPassMap &PerPassMap) {
- DIPreservationMap = &PerPassMap;
+ void setDebugInfoBeforePass(DebugInfoPerPass &PerPassDI) {
+ DebugInfoBeforePass = &PerPassDI;
}
void setOrigDIVerifyBugsReportFilePath(StringRef BugsReportFilePath) {
OrigDIVerifyBugsReportFilePath = BugsReportFilePath;
@@ -239,7 +237,7 @@ public:
}
const DebugifyStatsMap &getDebugifyStatsMap() const { return *DIStatsMap; }
- DebugInfoPerPassMap &getDebugInfoPerPassMap() { return *DIPreservationMap; }
+ DebugInfoPerPass &getDebugInfoPerPass() { return *DebugInfoBeforePass; }
};
} // namespace llvm
diff --git a/llvm/include/llvm/Transforms/Utils/EscapeEnumerator.h b/llvm/include/llvm/Transforms/Utils/EscapeEnumerator.h
index bb5c6f04dd0c..3d8447e9bf23 100644
--- a/llvm/include/llvm/Transforms/Utils/EscapeEnumerator.h
+++ b/llvm/include/llvm/Transforms/Utils/EscapeEnumerator.h
@@ -32,7 +32,7 @@ class EscapeEnumerator {
Function::iterator StateBB, StateE;
IRBuilder<> Builder;
- bool Done;
+ bool Done = false;
bool HandleExceptions;
DomTreeUpdater *DTU;
@@ -41,8 +41,7 @@ public:
EscapeEnumerator(Function &F, const char *N = "cleanup",
bool HandleExceptions = true, DomTreeUpdater *DTU = nullptr)
: F(F), CleanupBBName(N), StateBB(F.begin()), StateE(F.end()),
- Builder(F.getContext()), Done(false),
- HandleExceptions(HandleExceptions), DTU(DTU) {}
+ Builder(F.getContext()), HandleExceptions(HandleExceptions), DTU(DTU) {}
IRBuilder<> *Next();
};
diff --git a/llvm/include/llvm/Transforms/Utils/Evaluator.h b/llvm/include/llvm/Transforms/Utils/Evaluator.h
index 99e826bf855f..2b8384897c6b 100644
--- a/llvm/include/llvm/Transforms/Utils/Evaluator.h
+++ b/llvm/include/llvm/Transforms/Utils/Evaluator.h
@@ -18,8 +18,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
#include <cassert>
#include <deque>
@@ -27,6 +25,7 @@
namespace llvm {
+class CallBase;
class DataLayout;
class Function;
class TargetLibraryInfo;
@@ -139,6 +138,8 @@ private:
SmallVectorImpl<Constant *> &Formals);
Constant *ComputeLoadResult(Constant *P, Type *Ty);
+ Constant *ComputeLoadResult(GlobalVariable *GV, Type *Ty,
+ const APInt &Offset);
/// As we compute SSA register values, we store their contents here. The back
/// of the deque contains the current function and the stack contains the
diff --git a/llvm/include/llvm/Transforms/Utils/FunctionComparator.h b/llvm/include/llvm/Transforms/Utils/FunctionComparator.h
index 964fdce45744..b6b53d0f10cb 100644
--- a/llvm/include/llvm/Transforms/Utils/FunctionComparator.h
+++ b/llvm/include/llvm/Transforms/Utils/FunctionComparator.h
@@ -16,7 +16,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/IR/Attributes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/ValueMap.h"
@@ -28,6 +27,7 @@
namespace llvm {
class APFloat;
+class AttributeList;
class APInt;
class BasicBlock;
class Constant;
diff --git a/llvm/include/llvm/Transforms/Utils/GlobalStatus.h b/llvm/include/llvm/Transforms/Utils/GlobalStatus.h
index 775dd23d8f23..60c91fc30174 100644
--- a/llvm/include/llvm/Transforms/Utils/GlobalStatus.h
+++ b/llvm/include/llvm/Transforms/Utils/GlobalStatus.h
@@ -35,6 +35,9 @@ struct GlobalStatus {
/// can be deleted.
bool IsLoaded = false;
+ /// Number of stores to the global.
+ unsigned NumStores = 0;
+
/// Keep track of what stores to the global look like.
enum StoredType {
/// There is no store to this global. It can thus be marked constant.
diff --git a/llvm/include/llvm/Transforms/Utils/InjectTLIMappings.h b/llvm/include/llvm/Transforms/Utils/InjectTLIMappings.h
index af9cdb9fd619..d2ce0c5d3988 100644
--- a/llvm/include/llvm/Transforms/Utils/InjectTLIMappings.h
+++ b/llvm/include/llvm/Transforms/Utils/InjectTLIMappings.h
@@ -18,6 +18,7 @@
#include "llvm/Pass.h"
namespace llvm {
+class Function;
class InjectTLIMappings : public PassInfoMixin<InjectTLIMappings> {
public:
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h
index 873127554b47..946fc84b9a2c 100644
--- a/llvm/include/llvm/Transforms/Utils/Local.h
+++ b/llvm/include/llvm/Transforms/Utils/Local.h
@@ -15,26 +15,18 @@
#define LLVM_TRANSFORMS_UTILS_LOCAL_H
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/Utils/Local.h"
-#include "llvm/IR/Constant.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Operator.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/User.h"
-#include "llvm/IR/Value.h"
-#include "llvm/IR/ValueHandle.h"
-#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
#include <cstdint>
-#include <limits>
namespace llvm {
+class DataLayout;
+class Value;
+class WeakTrackingVH;
+class WeakVH;
+template <typename T> class SmallVectorImpl;
class AAResults;
class AllocaInst;
class AssumptionCache;
@@ -343,7 +335,7 @@ bool replaceAllDbgUsesWith(Instruction &From, Value &To, Instruction &DomPoint,
/// Remove all instructions from a basic block other than its terminator
/// and any present EH pad instructions. Returns a pair where the first element
-/// is the number of instructions (excluding debug info instrinsics) that have
+/// is the number of instructions (excluding debug info intrinsics) that have
/// been removed, and the second element is the number of debug info intrinsics
/// that have been removed.
std::pair<unsigned, unsigned>
diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index 3a712d78df67..676c0c1487db 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -13,18 +13,18 @@
#ifndef LLVM_TRANSFORMS_UTILS_LOOPUTILS_H
#define LLVM_TRANSFORMS_UTILS_LOOPUTILS_H
-#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/IVDescriptors.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
namespace llvm {
template <typename T> class DomTreeNodeBase;
using DomTreeNode = DomTreeNodeBase<BasicBlock>;
+class StringRef;
+class AnalysisUsage;
+class TargetTransformInfo;
class AAResults;
-class AliasSet;
-class AliasSetTracker;
class BasicBlock;
class BlockFrequencyInfo;
class ICFLoopSafetyInfo;
@@ -49,8 +49,6 @@ typedef std::pair<const RuntimeCheckingPtrGroup *,
template <typename T> class Optional;
template <typename T, unsigned N> class SmallSetVector;
-template <typename T, unsigned N> class SmallVector;
-template <typename T> class SmallVectorImpl;
template <typename T, unsigned N> class SmallPriorityWorklist;
BasicBlock *InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,
@@ -150,7 +148,7 @@ protected:
/// this function is called by \p sinkRegionForLoopNest.
bool sinkRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *,
BlockFrequencyInfo *, TargetLibraryInfo *,
- TargetTransformInfo *, Loop *CurLoop, MemorySSAUpdater *,
+ TargetTransformInfo *, Loop *CurLoop, MemorySSAUpdater &,
ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &,
OptimizationRemarkEmitter *, Loop *OutermostLoop = nullptr);
@@ -159,7 +157,7 @@ bool sinkRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *,
bool sinkRegionForLoopNest(DomTreeNode *, AAResults *, LoopInfo *,
DominatorTree *, BlockFrequencyInfo *,
TargetLibraryInfo *, TargetTransformInfo *, Loop *,
- MemorySSAUpdater *, ICFLoopSafetyInfo *,
+ MemorySSAUpdater &, ICFLoopSafetyInfo *,
SinkAndHoistLICMFlags &,
OptimizationRemarkEmitter *);
@@ -171,10 +169,13 @@ bool sinkRegionForLoopNest(DomTreeNode *, AAResults *, LoopInfo *,
/// BlockFrequencyInfo, TargetLibraryInfo, Loop, AliasSet information for all
/// instructions of the loop and loop safety information as arguments.
/// Diagnostics is emitted via \p ORE. It returns changed status.
+/// \p AllowSpeculation is whether values should be hoisted even if they are not
+/// guaranteed to execute in the loop, but are safe to speculatively execute.
bool hoistRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *,
BlockFrequencyInfo *, TargetLibraryInfo *, Loop *,
- MemorySSAUpdater *, ScalarEvolution *, ICFLoopSafetyInfo *,
- SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, bool);
+ MemorySSAUpdater &, ScalarEvolution *, ICFLoopSafetyInfo *,
+ SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, bool,
+ bool AllowSpeculation);
/// This function deletes dead loops. The caller of this function needs to
/// guarantee that the loop is infact dead.
@@ -204,12 +205,14 @@ void breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
/// LoopInfo, DominatorTree, Loop, AliasSet information for all instructions
/// of the loop and loop safety information as arguments.
/// Diagnostics is emitted via \p ORE. It returns changed status.
+/// \p AllowSpeculation is whether values should be hoisted even if they are not
+/// guaranteed to execute in the loop, but are safe to speculatively execute.
bool promoteLoopAccessesToScalars(
const SmallSetVector<Value *, 8> &, SmallVectorImpl<BasicBlock *> &,
SmallVectorImpl<Instruction *> &, SmallVectorImpl<MemoryAccess *> &,
PredIteratorCache &, LoopInfo *, DominatorTree *, const TargetLibraryInfo *,
- Loop *, MemorySSAUpdater *, ICFLoopSafetyInfo *,
- OptimizationRemarkEmitter *);
+ Loop *, MemorySSAUpdater &, ICFLoopSafetyInfo *,
+ OptimizationRemarkEmitter *, bool AllowSpeculation);
/// Does a BFS from a given node to all of its children inside a given loop.
/// The returned vector of nodes includes the starting point.
@@ -342,9 +345,9 @@ void getLoopAnalysisUsage(AnalysisUsage &AU);
/// true when moving out of loop and not true when moving into loops.
/// If \p ORE is set use it to emit optimization remarks.
bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
- Loop *CurLoop, AliasSetTracker *CurAST,
- MemorySSAUpdater *MSSAU, bool TargetExecutesOncePerLoop,
- SinkAndHoistLICMFlags *LICMFlags = nullptr,
+ Loop *CurLoop, MemorySSAUpdater &MSSAU,
+ bool TargetExecutesOncePerLoop,
+ SinkAndHoistLICMFlags &LICMFlags,
OptimizationRemarkEmitter *ORE = nullptr);
/// Returns the comparison predicate used when expanding a min/max reduction.
@@ -410,8 +413,10 @@ Value *createOrderedReduction(IRBuilderBase &B,
/// of each scalar operation (VL) that will be converted into a vector (I).
/// If OpValue is non-null, we only consider operations similar to OpValue
/// when intersecting.
-/// Flag set: NSW, NUW, exact, and all of fast-math.
-void propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue = nullptr);
+/// Flag set: NSW, NUW (if IncludeWrapFlags is true), exact, and all of
+/// fast-math.
+void propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue = nullptr,
+ bool IncludeWrapFlags = true);
/// Returns true if we can prove that \p S is defined and always negative in
/// loop \p L.
@@ -497,6 +502,12 @@ addRuntimeChecks(Instruction *Loc, Loop *TheLoop,
const SmallVectorImpl<RuntimePointerCheck> &PointerChecks,
SCEVExpander &Expander);
+Value *
+addDiffRuntimeChecks(Instruction *Loc, Loop *TheLoop,
+ ArrayRef<PointerDiffInfo> Checks, SCEVExpander &Expander,
+ function_ref<Value *(IRBuilderBase &, unsigned)> GetVF,
+ unsigned IC);
+
/// Struct to hold information about a partially invariant condition.
struct IVConditionInfo {
/// Instructions that need to be duplicated and checked for the unswitching
diff --git a/llvm/include/llvm/Transforms/Utils/LoopVersioning.h b/llvm/include/llvm/Transforms/Utils/LoopVersioning.h
index 4a8831ed45b2..eeab98c56b66 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopVersioning.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopVersioning.h
@@ -15,7 +15,6 @@
#ifndef LLVM_TRANSFORMS_UTILS_LOOPVERSIONING_H
#define LLVM_TRANSFORMS_UTILS_LOOPVERSIONING_H
-#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
@@ -23,6 +22,8 @@
namespace llvm {
class Loop;
+class SCEVPredicate;
+class ScalarEvolution;
class LoopAccessInfo;
class LoopInfo;
struct RuntimeCheckingPtrGroup;
@@ -113,7 +114,7 @@ private:
Loop *VersionedLoop;
/// The fall-back loop. I.e. control flows here if pointers in the
/// loop may alias (memchecks failed).
- Loop *NonVersionedLoop;
+ Loop *NonVersionedLoop = nullptr;
/// This maps the instructions from VersionedLoop to their counterpart
/// in NonVersionedLoop.
@@ -123,7 +124,7 @@ private:
SmallVector<RuntimePointerCheck, 4> AliasChecks;
/// The set of SCEV checks that we are versioning for.
- const SCEVUnionPredicate &Preds;
+ const SCEVPredicate &Preds;
/// Maps a pointer to the pointer checking group that the pointer
/// belongs to.
diff --git a/llvm/include/llvm/Transforms/Utils/LowerAtomic.h b/llvm/include/llvm/Transforms/Utils/LowerAtomic.h
new file mode 100644
index 000000000000..c85f8e3a5646
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Utils/LowerAtomic.h
@@ -0,0 +1,37 @@
+//===- LowerAtomic.h - Lower atomic intrinsics ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+// This pass lowers atomic intrinsics to non-atomic form for use in a known
+// non-preemptible environment.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_LOWERATOMIC_H
+#define LLVM_TRANSFORMS_SCALAR_LOWERATOMIC_H
+
+#include "llvm/IR/Instructions.h"
+
+namespace llvm {
+
+class IRBuilderBase;
+
+/// Convert the given Cmpxchg into primitive load and compare.
+bool lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI);
+
+/// Convert the given RMWI into primitive load and stores,
+/// assuming that doing so is legal. Return true if the lowering
+/// succeeds.
+bool lowerAtomicRMWInst(AtomicRMWInst *RMWI);
+
+/// Emit IR to implement the given atomicrmw operation on values in registers,
+/// returning the new value.
+Value *buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder,
+ Value *Loaded, Value *Inc);
+}
+
+#endif // LLVM_TRANSFORMS_SCALAR_LOWERATOMIC_H
diff --git a/llvm/include/llvm/Transforms/Utils/LowerGlobalDtors.h b/llvm/include/llvm/Transforms/Utils/LowerGlobalDtors.h
new file mode 100644
index 000000000000..993a6f57361c
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Utils/LowerGlobalDtors.h
@@ -0,0 +1,28 @@
+//===- LowerGlobalDtors.h - Lower @llvm.global_dtors ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers @llvm.global_dtors by creating wrapper functions that are
+// registered in @llvm.global_ctors and which contain a call to `__cxa_atexit`
+// to register their destructor functions.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TRANSFORMS_UTILS_LOWERGLOBALDTORS_H
+#define LLVM_TRANSFORMS_UTILS_LOWERGLOBALDTORS_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class LowerGlobalDtorsPass : public PassInfoMixin<LowerGlobalDtorsPass> {
+public:
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_UTILS_LOWERGLOBALDTORS_H
diff --git a/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h b/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h
index 8d0956033d9f..acf59ff580a4 100644
--- a/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h
+++ b/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h
@@ -14,13 +14,17 @@
#ifndef LLVM_TRANSFORMS_UTILS_LOWERMEMINTRINSICS_H
#define LLVM_TRANSFORMS_UTILS_LOWERMEMINTRINSICS_H
+#include "llvm/ADT/Optional.h"
+
namespace llvm {
+class AtomicMemCpyInst;
class ConstantInt;
class Instruction;
class MemCpyInst;
class MemMoveInst;
class MemSetInst;
+class ScalarEvolution;
class TargetTransformInfo;
class Value;
struct Align;
@@ -28,10 +32,11 @@ struct Align;
/// Emit a loop implementing the semantics of llvm.memcpy where the size is not
/// a compile-time constant. Loop will be insterted at \p InsertBefore.
void createMemCpyLoopUnknownSize(Instruction *InsertBefore, Value *SrcAddr,
- Value *DstAddr, Value *CopyLen,
- Align SrcAlign, Align DestAlign,
- bool SrcIsVolatile, bool DstIsVolatile,
- const TargetTransformInfo &TTI);
+ Value *DstAddr, Value *CopyLen, Align SrcAlign,
+ Align DestAlign, bool SrcIsVolatile,
+ bool DstIsVolatile, bool CanOverlap,
+ const TargetTransformInfo &TTI,
+ Optional<unsigned> AtomicSize = None);
/// Emit a loop implementing the semantics of an llvm.memcpy whose size is a
/// compile time constant. Loop is inserted at \p InsertBefore.
@@ -39,10 +44,12 @@ void createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
Value *DstAddr, ConstantInt *CopyLen,
Align SrcAlign, Align DestAlign,
bool SrcIsVolatile, bool DstIsVolatile,
- const TargetTransformInfo &TTI);
+ bool CanOverlap, const TargetTransformInfo &TTI,
+ Optional<uint32_t> AtomicCpySize = None);
/// Expand \p MemCpy as a loop. \p MemCpy is not deleted.
-void expandMemCpyAsLoop(MemCpyInst *MemCpy, const TargetTransformInfo &TTI);
+void expandMemCpyAsLoop(MemCpyInst *MemCpy, const TargetTransformInfo &TTI,
+ ScalarEvolution *SE = nullptr);
/// Expand \p MemMove as a loop. \p MemMove is not deleted.
void expandMemMoveAsLoop(MemMoveInst *MemMove);
@@ -50,6 +57,11 @@ void expandMemMoveAsLoop(MemMoveInst *MemMove);
/// Expand \p MemSet as a loop. \p MemSet is not deleted.
void expandMemSetAsLoop(MemSetInst *MemSet);
+/// Expand \p AtomicMemCpy as a loop. \p AtomicMemCpy is not deleted.
+void expandAtomicMemCpyAsLoop(AtomicMemCpyInst *AtomicMemCpy,
+ const TargetTransformInfo &TTI,
+ ScalarEvolution *SE);
+
} // End llvm namespace
#endif
diff --git a/llvm/include/llvm/Transforms/Utils/MemoryTaggingSupport.h b/llvm/include/llvm/Transforms/Utils/MemoryTaggingSupport.h
new file mode 100644
index 000000000000..a2b85e03897b
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Utils/MemoryTaggingSupport.h
@@ -0,0 +1,82 @@
+//===- MemoryTaggingSupport.h - helpers for memory tagging implementations ===//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares common infrastructure for HWAddressSanitizer and
+// Aarch64StackTagging.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TRANSFORMS_UTILS_MEMORYTAGGINGSUPPORT_H
+#define LLVM_TRANSFORMS_UTILS_MEMORYTAGGINGSUPPORT_H
+
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Support/Alignment.h"
+
+namespace llvm {
+class DominatorTree;
+class DbgVariableIntrinsic;
+class IntrinsicInst;
+class PostDominatorTree;
+class AllocaInst;
+class Instruction;
+namespace memtag {
+// For an alloca valid between lifetime markers Start and Ends, call the
+// Callback for all possible exits out of the lifetime in the containing
+// function, which can return from the instructions in RetVec.
+//
+// Returns whether Ends covered all possible exits. If they did not,
+// the caller should remove Ends to ensure that work done at the other
+// exits does not happen outside of the lifetime.
+bool forAllReachableExits(const DominatorTree &DT, const PostDominatorTree &PDT,
+ const LoopInfo &LI, const Instruction *Start,
+ const SmallVectorImpl<IntrinsicInst *> &Ends,
+ const SmallVectorImpl<Instruction *> &RetVec,
+ llvm::function_ref<void(Instruction *)> Callback);
+
+bool isStandardLifetime(const SmallVectorImpl<IntrinsicInst *> &LifetimeStart,
+ const SmallVectorImpl<IntrinsicInst *> &LifetimeEnd,
+ const DominatorTree *DT, const LoopInfo *LI,
+ size_t MaxLifetimes);
+
+Instruction *getUntagLocationIfFunctionExit(Instruction &Inst);
+
+struct AllocaInfo {
+ AllocaInst *AI;
+ SmallVector<IntrinsicInst *, 2> LifetimeStart;
+ SmallVector<IntrinsicInst *, 2> LifetimeEnd;
+ SmallVector<DbgVariableIntrinsic *, 2> DbgVariableIntrinsics;
+};
+
+struct StackInfo {
+ MapVector<AllocaInst *, AllocaInfo> AllocasToInstrument;
+ SmallVector<Instruction *, 4> UnrecognizedLifetimes;
+ SmallVector<Instruction *, 8> RetVec;
+ bool CallsReturnTwice = false;
+};
+
+class StackInfoBuilder {
+public:
+ StackInfoBuilder(std::function<bool(const AllocaInst &)> IsInterestingAlloca)
+ : IsInterestingAlloca(IsInterestingAlloca) {}
+
+ void visit(Instruction &Inst);
+ StackInfo &get() { return Info; };
+
+private:
+ StackInfo Info;
+ std::function<bool(const AllocaInst &)> IsInterestingAlloca;
+};
+
+uint64_t getAllocaSizeInBytes(const AllocaInst &AI);
+void alignAndPadAlloca(memtag::AllocaInfo &Info, llvm::Align Align);
+
+} // namespace memtag
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/Transforms/Utils/MisExpect.h b/llvm/include/llvm/Transforms/Utils/MisExpect.h
new file mode 100644
index 000000000000..064eeac4c669
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Utils/MisExpect.h
@@ -0,0 +1,77 @@
+//===--- MisExpect.h - Check the use of llvm.expect with PGO data ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This contains code to emit diagnostic messages for potentially incorrect
+// usage of the llvm.expect intrinsic. This utility extracts the threshold
+// values from metadata associated with the instrumented Branch or Switch
+// instruction. The threshold values are then used to determine if a diagnostic
+// should be emitted.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+
+namespace llvm {
+namespace misexpect {
+
+/// checkBackendInstrumentation - compares PGO counters to the thresholds used
+/// for llvm.expect and warns if the PGO counters are outside of the expected
+/// range. It extracts the expected weights from the MD_prof weights attatched
+/// to the instruction, which are are assumed to come from lowered llvm.expect
+/// intrinsics. The RealWeights parameter and the extracted expected weights are
+/// then passed to verifyMisexpect() for verification
+///
+/// \param I The Instruction being checked
+/// \param RealWeights A vector of profile weights for each target block
+void checkBackendInstrumentation(Instruction &I,
+ const llvm::ArrayRef<uint32_t> RealWeights);
+
+/// checkFrontendInstrumentation - compares PGO counters to the thresholds used
+/// for llvm.expect and warns if the PGO counters are outside of the expected
+/// range. It extracts the expected weights from the MD_prof weights attatched
+/// to the instruction, which are are assumed to come from profiling data
+/// attached by the frontend prior to llvm.expect intrinsic lowering. The
+/// ExpectedWeights parameter and the extracted real weights are then passed to
+/// verifyMisexpect() for verification
+///
+/// \param I The Instruction being checked
+/// \param ExpectedWeights A vector of the expected weights for each target
+/// block, this determines the threshold values used when emiting diagnostics
+void checkFrontendInstrumentation(Instruction &I,
+ const ArrayRef<uint32_t> ExpectedWeights);
+
+/// veryifyMisExpect - compares RealWeights to the thresholds used
+/// for llvm.expect and warns if the PGO counters are outside of the expected
+/// range.
+///
+/// \param I The Instruction being checked
+/// \param RealWeights A vector of profile weights from the profile data
+/// \param ExpectedWeights A vector of the weights attatch by llvm.expect
+void verifyMisExpect(Instruction &I, ArrayRef<uint32_t> RealWeights,
+ const ArrayRef<uint32_t> ExpectedWeights);
+
+/// checkExpectAnnotations - compares PGO counters to the thresholds used
+/// for llvm.expect and warns if the PGO counters are outside of the expected
+/// range. It extracts the expected weights from the MD_prof weights attatched
+/// to the instruction, which are are assumed to come from lowered llvm.expect
+/// intrinsics. The RealWeights parameter and the extracted expected weights are
+/// then passed to verifyMisexpect() for verification. It is a thin wrapper
+/// around the checkFrontendInstrumentation and checkBackendInstrumentation APIs
+///
+/// \param I The Instruction being checked
+/// \param RealWeights A vector of profile weights for each target block
+/// \param IsBackend A boolean describing if this is Frontend instrumentation
+void checkExpectAnnotations(Instruction &I,
+ const ArrayRef<uint32_t> ExistingWeights,
+ bool IsFrontend);
+
+} // namespace misexpect
+} // namespace llvm
diff --git a/llvm/include/llvm/Transforms/Utils/ModuleUtils.h b/llvm/include/llvm/Transforms/Utils/ModuleUtils.h
index 8d459972336b..85263fc00bc3 100644
--- a/llvm/include/llvm/Transforms/Utils/ModuleUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/ModuleUtils.h
@@ -13,12 +13,13 @@
#ifndef LLVM_TRANSFORMS_UTILS_MODULEUTILS_H
#define LLVM_TRANSFORMS_UTILS_MODULEUTILS_H
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Alignment.h"
+#include "llvm/Support/MemoryBufferRef.h"
#include <utility> // for std::pair
namespace llvm {
+template <typename T> class SmallVectorImpl;
template <typename T> class ArrayRef;
class Module;
@@ -109,14 +110,14 @@ std::string getUniqueModuleId(Module *M);
/// Embed the memory buffer \p Buf into the module \p M as a global using the
/// specified section name.
-void embedBufferInModule(Module &M, MemoryBufferRef Buf, StringRef SectionName);
+void embedBufferInModule(Module &M, MemoryBufferRef Buf, StringRef SectionName,
+ Align Alignment = Align(1));
class CallInst;
namespace VFABI {
/// Overwrite the Vector Function ABI variants attribute with the names provide
/// in \p VariantMappings.
-void setVectorVariantNames(CallInst *CI,
- const SmallVector<std::string, 8> &VariantMappings);
+void setVectorVariantNames(CallInst *CI, ArrayRef<std::string> VariantMappings);
} // End VFABI namespace
} // End llvm namespace
diff --git a/llvm/include/llvm/Transforms/Utils/NameAnonGlobals.h b/llvm/include/llvm/Transforms/Utils/NameAnonGlobals.h
index 03d8840a22d2..a59f9bc3ebfb 100644
--- a/llvm/include/llvm/Transforms/Utils/NameAnonGlobals.h
+++ b/llvm/include/llvm/Transforms/Utils/NameAnonGlobals.h
@@ -14,7 +14,6 @@
#ifndef LLVM_TRANSFORMS_UTILS_NAMEANONGLOBALS_H
#define LLVM_TRANSFORMS_UTILS_NAMEANONGLOBALS_H
-#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
diff --git a/llvm/include/llvm/Transforms/Utils/PredicateInfo.h b/llvm/include/llvm/Transforms/Utils/PredicateInfo.h
index c922476ac79d..e57e598b6918 100644
--- a/llvm/include/llvm/Transforms/Utils/PredicateInfo.h
+++ b/llvm/include/llvm/Transforms/Utils/PredicateInfo.h
@@ -56,7 +56,6 @@
#include "llvm/ADT/ilist_node.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
@@ -65,6 +64,7 @@ namespace llvm {
class AssumptionCache;
class DominatorTree;
class Function;
+class Value;
class IntrinsicInst;
class raw_ostream;
diff --git a/llvm/include/llvm/Transforms/Utils/RelLookupTableConverter.h b/llvm/include/llvm/Transforms/Utils/RelLookupTableConverter.h
index 54c257383fb5..0992a4456c9d 100644
--- a/llvm/include/llvm/Transforms/Utils/RelLookupTableConverter.h
+++ b/llvm/include/llvm/Transforms/Utils/RelLookupTableConverter.h
@@ -51,11 +51,12 @@
#ifndef LLVM_TRANSFORMS_UTILS_RELLOOKUPTABLECONVERTER_H
#define LLVM_TRANSFORMS_UTILS_RELLOOKUPTABLECONVERTER_H
-#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
+class Module;
+
// Pass that converts lookup tables to relative lookup tables.
class RelLookupTableConverterPass
: public PassInfoMixin<RelLookupTableConverterPass> {
diff --git a/llvm/include/llvm/Transforms/Utils/SCCPSolver.h b/llvm/include/llvm/Transforms/Utils/SCCPSolver.h
index bf418e659a04..17bd072598ee 100644
--- a/llvm/include/llvm/Transforms/Utils/SCCPSolver.h
+++ b/llvm/include/llvm/Transforms/Utils/SCCPSolver.h
@@ -16,16 +16,25 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/Analysis/DomTreeUpdater.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Analysis/ValueLattice.h"
-#include "llvm/Analysis/ValueLatticeUtils.h"
-#include "llvm/IR/InstVisitor.h"
#include "llvm/Transforms/Utils/PredicateInfo.h"
-#include <cassert>
-#include <utility>
#include <vector>
namespace llvm {
+class Argument;
+class BasicBlock;
+class CallInst;
+class Constant;
+class DataLayout;
+class DominatorTree;
+class Function;
+class GlobalVariable;
+class Instruction;
+class LLVMContext;
+class PostDominatorTree;
+class StructType;
+class TargetLibraryInfo;
+class Value;
+class ValueLatticeElement;
/// Helper struct for bundling up the analysis results per function for IPSCCP.
struct AnalysisResultsForFn {
@@ -34,6 +43,14 @@ struct AnalysisResultsForFn {
PostDominatorTree *PDT;
};
+/// Helper struct shared between Function Specialization and SCCP Solver.
+struct ArgInfo {
+ Argument *Formal; // The Formal argument being analysed.
+ Constant *Actual; // A corresponding actual constant argument.
+
+ ArgInfo(Argument *F, Constant *A) : Formal(F), Actual(A){};
+};
+
class SCCPInstVisitor;
//===----------------------------------------------------------------------===//
@@ -134,11 +151,14 @@ public:
/// Return a reference to the set of argument tracked functions.
SmallPtrSetImpl<Function *> &getArgumentTrackedFunctions();
- /// Mark argument \p A constant with value \p C in a new function
- /// specialization. The argument's parent function is a specialization of the
- /// original function \p F. All other arguments of the specialization inherit
- /// the lattice state of their corresponding values in the original function.
- void markArgInFuncSpecialization(Function *F, Argument *A, Constant *C);
+ /// Mark the constant arguments of a new function specialization. \p F points
+ /// to the cloned function and \p Args contains a list of constant arguments
+ /// represented as pairs of {formal,actual} values (the formal argument is
+ /// associated with the original function definition). All other arguments of
+ /// the specialization inherit the lattice state of their corresponding values
+ /// in the original function.
+ void markArgInFuncSpecialization(Function *F,
+ const SmallVectorImpl<ArgInfo> &Args);
/// Mark all of the blocks in function \p F non-executable. Clients can used
/// this method to erase a function from the module (e.g., if it has been
diff --git a/llvm/include/llvm/Transforms/Utils/SSAUpdaterImpl.h b/llvm/include/llvm/Transforms/Utils/SSAUpdaterImpl.h
index ee06893ca660..a3e5ac3ac19d 100644
--- a/llvm/include/llvm/Transforms/Utils/SSAUpdaterImpl.h
+++ b/llvm/include/llvm/Transforms/Utils/SSAUpdaterImpl.h
@@ -323,6 +323,28 @@ public:
} while (Changed);
}
+ /// Check all predecessors and if all of them have the same AvailableVal use
+ /// it as value for block represented by Info. Return true if singluar value
+ /// is found.
+ bool FindSingularVal(BBInfo *Info) {
+ if (!Info->NumPreds)
+ return false;
+ ValT Singular = Info->Preds[0]->DefBB->AvailableVal;
+ if (!Singular)
+ return false;
+ for (unsigned Idx = 1; Idx < Info->NumPreds; ++Idx) {
+ ValT PredVal = Info->Preds[Idx]->DefBB->AvailableVal;
+ if (!PredVal || Singular != PredVal)
+ return false;
+ }
+ // Record Singular value.
+ (*AvailableVals)[Info->BB] = Singular;
+ assert(BBMap[Info->BB] == Info && "Info missed in BBMap?");
+ Info->AvailableVal = Singular;
+ Info->DefBB = Info->Preds[0]->DefBB;
+ return true;
+ }
+
/// FindAvailableVal - If this block requires a PHI, first check if an
/// existing PHI matches the PHI placement and reaching definitions computed
/// earlier, and if not, create a new PHI. Visit all the block's
@@ -339,6 +361,10 @@ public:
if (Info->DefBB != Info)
continue;
+ // Look for singular value.
+ if (FindSingularVal(Info))
+ continue;
+
// Look for an existing PHI.
FindExistingPHI(Info->BB, BlockList);
if (Info->AvailableVal)
diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
index e1f681bbd367..5a4c28063a1d 100644
--- a/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
+++ b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
@@ -24,7 +24,6 @@
namespace llvm {
-class BasicBlock;
class Function;
class MachineBasicBlock;
class MachineFunction;
diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
index 175bdde7fd05..2250e928d1e6 100644
--- a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
+++ b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
@@ -76,6 +76,7 @@ template <> struct IRTraits<BasicBlock> {
} // end namespace afdo_detail
extern cl::opt<bool> SampleProfileUseProfi;
+extern cl::opt<bool> SampleProfileInferEntryCount;
template <typename BT> class SampleProfileLoaderBaseImpl {
public:
@@ -920,7 +921,9 @@ void SampleProfileLoaderBaseImpl<BT>::finalizeWeightPropagation(
// Samples->getHeadSamples() + 1 to avoid functions with zero count.
if (SampleProfileUseProfi) {
const BasicBlockT *EntryBB = getEntryBB(&F);
- if (BlockWeights[EntryBB] > 0) {
+ ErrorOr<uint64_t> EntryWeight = getBlockWeight(EntryBB);
+ if (BlockWeights[EntryBB] > 0 &&
+ (SampleProfileInferEntryCount || !EntryWeight)) {
getFunction(F).setEntryCount(
ProfileCount(BlockWeights[EntryBB], Function::PCT_Real),
&InlinedGUIDs);
diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h
index a621cb3078c5..bd7175aa96ff 100644
--- a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h
+++ b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h
@@ -16,20 +16,14 @@
#define LLVM_TRANSFORMS_UTILS_SAMPLEPROFILELOADERBASEUTIL_H
#include "llvm/ADT/DenseMap.h"
-#include "llvm/Analysis/ProfileSummaryInfo.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CFG.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DebugLoc.h"
-#include "llvm/IR/Function.h"
#include "llvm/ProfileData/SampleProf.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Transforms/Utils/ModuleUtils.h"
namespace llvm {
using namespace sampleprof;
class ProfileSummaryInfo;
+class Module;
extern cl::opt<unsigned> SampleProfileMaxPropagateIterations;
extern cl::opt<unsigned> SampleProfileRecordCoverage;
diff --git a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
index 277eb7acf238..260ed1a97831 100644
--- a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
+++ b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
@@ -15,13 +15,10 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/InstSimplifyFolder.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ScalarEvolutionNormalization.h"
-#include "llvm/Analysis/TargetFolder.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/ValueHandle.h"
@@ -293,8 +290,9 @@ public:
Value *expandCodeForPredicate(const SCEVPredicate *Pred, Instruction *Loc);
/// A specialized variant of expandCodeForPredicate, handling the case when
- /// we are expanding code for a SCEVEqualPredicate.
- Value *expandEqualPredicate(const SCEVEqualPredicate *Pred, Instruction *Loc);
+ /// we are expanding code for a SCEVComparePredicate.
+ Value *expandComparePredicate(const SCEVComparePredicate *Pred,
+ Instruction *Loc);
/// Generates code that evaluates if the \p AR expression will overflow.
Value *generateOverflowCheck(const SCEVAddRecExpr *AR, Instruction *Loc,
@@ -384,8 +382,8 @@ public:
/// Note that this function does not perform an exhaustive search. I.e if it
/// didn't find any value it does not mean that there is no such value.
///
- Optional<ScalarEvolution::ValueOffsetPair>
- getRelatedExistingExpansion(const SCEV *S, const Instruction *At, Loop *L);
+ Value *getRelatedExistingExpansion(const SCEV *S, const Instruction *At,
+ Loop *L);
/// Returns a suitable insert point after \p I, that dominates \p
/// MustDominate. Skips instructions inserted by the expander.
@@ -443,21 +441,15 @@ private:
Value *expandAddToGEP(const SCEV *Op, PointerType *PTy, Type *Ty, Value *V);
/// Find a previous Value in ExprValueMap for expand.
- ScalarEvolution::ValueOffsetPair
- FindValueInExprValueMap(const SCEV *S, const Instruction *InsertPt);
+ Value *FindValueInExprValueMap(const SCEV *S, const Instruction *InsertPt);
Value *expand(const SCEV *S);
/// Determine the most "relevant" loop for the given SCEV.
const Loop *getRelevantLoop(const SCEV *);
- Value *expandSMaxExpr(const SCEVNAryExpr *S);
-
- Value *expandUMaxExpr(const SCEVNAryExpr *S);
-
- Value *expandSMinExpr(const SCEVNAryExpr *S);
-
- Value *expandUMinExpr(const SCEVNAryExpr *S);
+ Value *expandMinMaxExpr(const SCEVNAryExpr *S, Intrinsic::ID IntrinID,
+ Twine Name, bool IsSequential = false);
Value *visitConstant(const SCEVConstant *S) { return S->getValue(); }
diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h b/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h
index fb3a7490346f..7af879638a4d 100644
--- a/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h
+++ b/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h
@@ -23,6 +23,7 @@ class AssumptionCache;
struct SimplifyCFGOptions {
int BonusInstThreshold = 1;
bool ForwardSwitchCondToPhi = false;
+ bool ConvertSwitchRangeToICmp = false;
bool ConvertSwitchToLookupTable = false;
bool NeedCanonicalLoop = true;
bool HoistCommonInsts = false;
@@ -41,6 +42,10 @@ struct SimplifyCFGOptions {
ForwardSwitchCondToPhi = B;
return *this;
}
+ SimplifyCFGOptions &convertSwitchRangeToICmp(bool B) {
+ ConvertSwitchRangeToICmp = B;
+ return *this;
+ }
SimplifyCFGOptions &convertSwitchToLookupTable(bool B) {
ConvertSwitchToLookupTable = B;
return *this;
diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h b/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h
index 4ba56fb45afa..ff60811b6168 100644
--- a/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h
+++ b/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h
@@ -15,12 +15,11 @@
#ifndef LLVM_TRANSFORMS_UTILS_SIMPLIFYINDVAR_H
#define LLVM_TRANSFORMS_UTILS_SIMPLIFYINDVAR_H
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/IR/ConstantRange.h"
-#include "llvm/IR/ValueHandle.h"
-
namespace llvm {
+class Type;
+class WeakTrackingVH;
+template <typename T> class SmallVectorImpl;
class CastInst;
class DominatorTree;
class Loop;
diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
index a88e72fc9ba8..79a44b667445 100644
--- a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
+++ b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
@@ -14,7 +14,7 @@
#ifndef LLVM_TRANSFORMS_UTILS_SIMPLIFYLIBCALLS_H
#define LLVM_TRANSFORMS_UTILS_SIMPLIFYLIBCALLS_H
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
namespace llvm {
@@ -105,7 +105,7 @@ private:
OptimizationRemarkEmitter &ORE;
BlockFrequencyInfo *BFI;
ProfileSummaryInfo *PSI;
- bool UnsafeFPShrink;
+ bool UnsafeFPShrink = false;
function_ref<void(Instruction *, Value *)> Replacer;
function_ref<void(Instruction *)> Eraser;
@@ -163,6 +163,7 @@ private:
Value *optimizeStpCpy(CallInst *CI, IRBuilderBase &B);
Value *optimizeStrNCpy(CallInst *CI, IRBuilderBase &B);
Value *optimizeStrLen(CallInst *CI, IRBuilderBase &B);
+ Value *optimizeStrNLen(CallInst *CI, IRBuilderBase &B);
Value *optimizeStrPBrk(CallInst *CI, IRBuilderBase &B);
Value *optimizeStrTo(CallInst *CI, IRBuilderBase &B);
Value *optimizeStrSpn(CallInst *CI, IRBuilderBase &B);
@@ -234,10 +235,11 @@ private:
/// hasFloatVersion - Checks if there is a float version of the specified
/// function by checking for an existing function with name FuncName + f
- bool hasFloatVersion(StringRef FuncName);
+ bool hasFloatVersion(const Module *M, StringRef FuncName);
- /// Shared code to optimize strlen+wcslen.
- Value *optimizeStringLength(CallInst *CI, IRBuilderBase &B, unsigned CharSize);
+ /// Shared code to optimize strlen+wcslen and strnlen+wcsnlen.
+ Value *optimizeStringLength(CallInst *CI, IRBuilderBase &B, unsigned CharSize,
+ Value *Bound = nullptr);
};
} // End llvm namespace
diff --git a/llvm/include/llvm/Transforms/Utils/SizeOpts.h b/llvm/include/llvm/Transforms/Utils/SizeOpts.h
index 11bf5501598f..aa9e9bd6c69b 100644
--- a/llvm/include/llvm/Transforms/Utils/SizeOpts.h
+++ b/llvm/include/llvm/Transforms/Utils/SizeOpts.h
@@ -13,7 +13,6 @@
#ifndef LLVM_TRANSFORMS_UTILS_SIZEOPTS_H
#define LLVM_TRANSFORMS_UTILS_SIZEOPTS_H
-#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Support/CommandLine.h"
diff --git a/llvm/include/llvm/Transforms/Utils/SplitModule.h b/llvm/include/llvm/Transforms/Utils/SplitModule.h
index 42b3784db417..a5450738060a 100644
--- a/llvm/include/llvm/Transforms/Utils/SplitModule.h
+++ b/llvm/include/llvm/Transforms/Utils/SplitModule.h
@@ -15,7 +15,7 @@
#ifndef LLVM_TRANSFORMS_UTILS_SPLITMODULE_H
#define LLVM_TRANSFORMS_UTILS_SPLITMODULE_H
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
#include <memory>
namespace llvm {
diff --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
index 320c36b36924..65fe8eff6442 100644
--- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
+++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
@@ -17,6 +17,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Support/InstructionCost.h"
namespace llvm {
@@ -123,11 +124,9 @@ TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
Optional<bool> UserUpperBound, Optional<unsigned> UserFullUnrollMaxCount);
-unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
- bool &NotDuplicatable, bool &Convergent,
- const TargetTransformInfo &TTI,
- const SmallPtrSetImpl<const Value *> &EphValues,
- unsigned BEInsns);
+InstructionCost ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
+ bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI,
+ const SmallPtrSetImpl<const Value *> &EphValues, unsigned BEInsns);
} // end namespace llvm
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoadStoreVectorizer.h b/llvm/include/llvm/Transforms/Vectorize/LoadStoreVectorizer.h
index 3636285e38f5..15a46baa190d 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoadStoreVectorizer.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoadStoreVectorizer.h
@@ -10,9 +10,10 @@
#define LLVM_TRANSFORMS_VECTORIZE_LOADSTOREVECTORIZER_H
#include "llvm/IR/PassManager.h"
-#include "llvm/Pass.h"
namespace llvm {
+class Pass;
+class Function;
class LoadStoreVectorizerPass : public PassInfoMixin<LoadStoreVectorizerPass> {
public:
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index 32d295a2dd16..b01bd222b252 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -28,12 +28,26 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
-#include "llvm/Analysis/OptimizationRemarkEmitter.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Support/TypeSize.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
namespace llvm {
+class AAResults;
+class AssumptionCache;
+class BasicBlock;
+class BlockFrequencyInfo;
+class DemandedBits;
+class DominatorTree;
+class Function;
+class Loop;
+class LoopInfo;
+class Metadata;
+class OptimizationRemarkEmitter;
+class PredicatedScalarEvolution;
+class ProfileSummaryInfo;
+class TargetLibraryInfo;
+class TargetTransformInfo;
+class Type;
/// Utility class for getting and setting loop vectorizer hints in the form
/// of loop metadata.
@@ -207,7 +221,6 @@ public:
void addRuntimePointerChecks(unsigned Num) { NumRuntimePointerChecks = Num; }
-
Instruction *getExactFPInst() { return ExactFPMathInst; }
unsigned getNumRuntimePointerChecks() const {
@@ -294,6 +307,14 @@ public:
/// Returns the widest induction type.
Type *getWidestInductionType() { return WidestIndTy; }
+ /// Returns True if given store is a final invariant store of one of the
+ /// reductions found in the loop.
+ bool isInvariantStoreOfReduction(StoreInst *SI);
+
+ /// Returns True if given address is invariant and is used to store recurrent
+ /// expression
+ bool isInvariantAddressOfReduction(Value *V);
+
/// Returns True if V is a Phi node of an induction variable in this loop.
bool isInductionPhi(const Value *V) const;
@@ -301,6 +322,10 @@ public:
/// floating point induction.
const InductionDescriptor *getIntOrFpInductionDescriptor(PHINode *Phi) const;
+ /// Returns a pointer to the induction descriptor, if \p Phi is pointer
+ /// induction.
+ const InductionDescriptor *getPointerInductionDescriptor(PHINode *Phi) const;
+
/// Returns True if V is a cast that is part of an induction def-use chain,
/// and had been proven to be redundant under a runtime guard (in other
/// words, the cast has the same SCEV expression as the induction phi).
diff --git a/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h b/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
index cd605aacb52d..b41f3efc5b55 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
@@ -20,7 +20,6 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/None.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/PassManager.h"
@@ -30,7 +29,6 @@ class AAResults;
class AssumptionCache;
class BasicBlock;
class CmpInst;
-class DataLayout;
class DemandedBits;
class DominatorTree;
class Function;
@@ -135,7 +133,7 @@ private:
bool vectorizeChainsInBlock(BasicBlock *BB, slpvectorizer::BoUpSLP &R);
bool vectorizeStoreChain(ArrayRef<Value *> Chain, slpvectorizer::BoUpSLP &R,
- unsigned Idx);
+ unsigned Idx, unsigned MinVF);
bool vectorizeStores(ArrayRef<StoreInst *> Stores, slpvectorizer::BoUpSLP &R);
diff --git a/llvm/include/llvm/WindowsDriver/MSVCPaths.h b/llvm/include/llvm/WindowsDriver/MSVCPaths.h
new file mode 100644
index 000000000000..7256a4f66eaa
--- /dev/null
+++ b/llvm/include/llvm/WindowsDriver/MSVCPaths.h
@@ -0,0 +1,107 @@
+//===-- MSVCPaths.h - MSVC path-parsing helpers -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_MSVCPATHS_H
+#define LLVM_SUPPORT_MSVCPATHS_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include <string>
+
+namespace llvm {
+
+namespace vfs {
+class FileSystem;
+}
+
+enum class SubDirectoryType {
+ Bin,
+ Include,
+ Lib,
+};
+
+enum class ToolsetLayout {
+ OlderVS,
+ VS2017OrNewer,
+ DevDivInternal,
+};
+
+// Windows SDKs and VC Toolchains group their contents into subdirectories based
+// on the target architecture. This function converts an llvm::Triple::ArchType
+// to the corresponding subdirectory name.
+const char *archToWindowsSDKArch(llvm::Triple::ArchType Arch);
+
+// Similar to the above function, but for Visual Studios before VS2017.
+const char *archToLegacyVCArch(llvm::Triple::ArchType Arch);
+
+// Similar to the above function, but for DevDiv internal builds.
+const char *archToDevDivInternalArch(llvm::Triple::ArchType Arch);
+
+bool appendArchToWindowsSDKLibPath(int SDKMajor, llvm::SmallString<128> LibPath,
+ llvm::Triple::ArchType Arch,
+ std::string &path);
+
+// Get the path to a specific subdirectory in the current toolchain for
+// a given target architecture.
+// VS2017 changed the VC toolchain layout, so this should be used instead
+// of hardcoding paths.
+std::string getSubDirectoryPath(SubDirectoryType Type, ToolsetLayout VSLayout,
+ const std::string &VCToolChainPath,
+ llvm::Triple::ArchType TargetArch,
+ llvm::StringRef SubdirParent = "");
+
+// Check if the Include path of a specified version of Visual Studio contains
+// specific header files. If not, they are probably shipped with Universal CRT.
+bool useUniversalCRT(ToolsetLayout VSLayout, const std::string &VCToolChainPath,
+ llvm::Triple::ArchType TargetArch,
+ llvm::vfs::FileSystem &VFS);
+
+/// Get Windows SDK installation directory.
+bool getWindowsSDKDir(vfs::FileSystem &VFS,
+ llvm::Optional<llvm::StringRef> WinSdkDir,
+ llvm::Optional<llvm::StringRef> WinSdkVersion,
+ llvm::Optional<llvm::StringRef> WinSysRoot,
+ std::string &Path, int &Major,
+ std::string &WindowsSDKIncludeVersion,
+ std::string &WindowsSDKLibVersion);
+
+bool getUniversalCRTSdkDir(vfs::FileSystem &VFS,
+ llvm::Optional<llvm::StringRef> WinSdkDir,
+ llvm::Optional<llvm::StringRef> WinSdkVersion,
+ llvm::Optional<llvm::StringRef> WinSysRoot,
+ std::string &Path,
+ std::string &UCRTVersion);
+
+// Check command line arguments to try and find a toolchain.
+bool findVCToolChainViaCommandLine(
+ vfs::FileSystem &VFS, llvm::Optional<llvm::StringRef> VCToolsDir,
+ llvm::Optional<llvm::StringRef> VCToolsVersion,
+ llvm::Optional<llvm::StringRef> WinSysRoot, std::string &Path,
+ ToolsetLayout &VSLayout);
+
+// Check various environment variables to try and find a toolchain.
+bool findVCToolChainViaEnvironment(vfs::FileSystem &VFS, std::string &Path,
+ ToolsetLayout &VSLayout);
+
+// Query the Setup Config server for installs, then pick the newest version
+// and find its default VC toolchain.
+// This is the preferred way to discover new Visual Studios, as they're no
+// longer listed in the registry.
+bool findVCToolChainViaSetupConfig(vfs::FileSystem &VFS, std::string &Path,
+ ToolsetLayout &VSLayout);
+
+// Look in the registry for Visual Studio installs, and use that to get
+// a toolchain path. VS2017 and newer don't get added to the registry.
+// So if we find something here, we know that it's an older version.
+bool findVCToolChainViaRegistry(std::string &Path, ToolsetLayout &VSLayout);
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/WindowsDriver/MSVCSetupApi.h b/llvm/include/llvm/WindowsDriver/MSVCSetupApi.h
new file mode 100644
index 000000000000..28e6e3e08e37
--- /dev/null
+++ b/llvm/include/llvm/WindowsDriver/MSVCSetupApi.h
@@ -0,0 +1,523 @@
+// <copyright file="Program.cpp" company="Microsoft Corporation">
+// Copyright (C) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+// </copyright>
+// <license>
+// The MIT License (MIT)
+//
+// Copyright (C) Microsoft Corporation. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+// </license>
+
+#pragma once
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wnon-virtual-dtor"
+#endif
+
+// Constants
+//
+#ifndef E_NOTFOUND
+#define E_NOTFOUND HRESULT_FROM_WIN32(ERROR_NOT_FOUND)
+#endif
+
+#ifndef E_FILENOTFOUND
+#define E_FILENOTFOUND HRESULT_FROM_WIN32(ERROR_FILE_NOT_FOUND)
+#endif
+
+// Enumerations
+//
+/// <summary>
+/// The state of an instance.
+/// </summary>
+enum InstanceState : unsigned {
+ /// <summary>
+ /// The instance state has not been determined.
+ /// </summary>
+ eNone = 0,
+
+ /// <summary>
+ /// The instance installation path exists.
+ /// </summary>
+ eLocal = 1,
+
+ /// <summary>
+ /// A product is registered to the instance.
+ /// </summary>
+ eRegistered = 2,
+
+ /// <summary>
+ /// No reboot is required for the instance.
+ /// </summary>
+ eNoRebootRequired = 4,
+
+ /// <summary>
+ /// The instance represents a complete install.
+ /// </summary>
+ eComplete = MAXUINT,
+};
+
+// Forward interface declarations
+//
+#ifndef __ISetupInstance_FWD_DEFINED__
+#define __ISetupInstance_FWD_DEFINED__
+typedef struct ISetupInstance ISetupInstance;
+#endif
+
+#ifndef __ISetupInstance2_FWD_DEFINED__
+#define __ISetupInstance2_FWD_DEFINED__
+typedef struct ISetupInstance2 ISetupInstance2;
+#endif
+
+#ifndef __IEnumSetupInstances_FWD_DEFINED__
+#define __IEnumSetupInstances_FWD_DEFINED__
+typedef struct IEnumSetupInstances IEnumSetupInstances;
+#endif
+
+#ifndef __ISetupConfiguration_FWD_DEFINED__
+#define __ISetupConfiguration_FWD_DEFINED__
+typedef struct ISetupConfiguration ISetupConfiguration;
+#endif
+
+#ifndef __ISetupConfiguration2_FWD_DEFINED__
+#define __ISetupConfiguration2_FWD_DEFINED__
+typedef struct ISetupConfiguration2 ISetupConfiguration2;
+#endif
+
+#ifndef __ISetupPackageReference_FWD_DEFINED__
+#define __ISetupPackageReference_FWD_DEFINED__
+typedef struct ISetupPackageReference ISetupPackageReference;
+#endif
+
+#ifndef __ISetupHelper_FWD_DEFINED__
+#define __ISetupHelper_FWD_DEFINED__
+typedef struct ISetupHelper ISetupHelper;
+#endif
+
+// Forward class declarations
+//
+#ifndef __SetupConfiguration_FWD_DEFINED__
+#define __SetupConfiguration_FWD_DEFINED__
+
+#ifdef __cplusplus
+typedef class SetupConfiguration SetupConfiguration;
+#endif
+
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Interface definitions
+//
+EXTERN_C const IID IID_ISetupInstance;
+
+#if defined(__cplusplus) && !defined(CINTERFACE)
+/// <summary>
+/// Information about an instance of a product.
+/// </summary>
+struct DECLSPEC_UUID("B41463C3-8866-43B5-BC33-2B0676F7F42E")
+ DECLSPEC_NOVTABLE ISetupInstance : public IUnknown {
+ /// <summary>
+ /// Gets the instance identifier (should match the name of the parent instance
+ /// directory).
+ /// </summary>
+ /// <param name="pbstrInstanceId">The instance identifier.</param>
+ /// <returns>Standard HRESULT indicating success or failure, including
+ /// E_FILENOTFOUND if the instance state does not exist.</returns>
+ STDMETHOD(GetInstanceId)(_Out_ BSTR *pbstrInstanceId) = 0;
+
+ /// <summary>
+ /// Gets the local date and time when the installation was originally
+ /// installed.
+ /// </summary>
+ /// <param name="pInstallDate">The local date and time when the installation
+ /// was originally installed.</param>
+ /// <returns>Standard HRESULT indicating success or failure, including
+ /// E_FILENOTFOUND if the instance state does not exist and E_NOTFOUND if the
+ /// property is not defined.</returns>
+ STDMETHOD(GetInstallDate)(_Out_ LPFILETIME pInstallDate) = 0;
+
+ /// <summary>
+ /// Gets the unique name of the installation, often indicating the branch and
+ /// other information used for telemetry.
+ /// </summary>
+ /// <param name="pbstrInstallationName">The unique name of the installation,
+ /// often indicating the branch and other information used for
+ /// telemetry.</param>
+ /// <returns>Standard HRESULT indicating success or failure, including
+ /// E_FILENOTFOUND if the instance state does not exist and E_NOTFOUND if the
+ /// property is not defined.</returns>
+ STDMETHOD(GetInstallationName)(_Out_ BSTR *pbstrInstallationName) = 0;
+
+ /// <summary>
+ /// Gets the path to the installation root of the product.
+ /// </summary>
+ /// <param name="pbstrInstallationPath">The path to the installation root of
+ /// the product.</param>
+ /// <returns>Standard HRESULT indicating success or failure, including
+ /// E_FILENOTFOUND if the instance state does not exist and E_NOTFOUND if the
+ /// property is not defined.</returns>
+ STDMETHOD(GetInstallationPath)(_Out_ BSTR *pbstrInstallationPath) = 0;
+
+ /// <summary>
+ /// Gets the version of the product installed in this instance.
+ /// </summary>
+ /// <param name="pbstrInstallationVersion">The version of the product
+ /// installed in this instance.</param>
+ /// <returns>Standard HRESULT indicating success or failure, including
+ /// E_FILENOTFOUND if the instance state does not exist and E_NOTFOUND if the
+ /// property is not defined.</returns>
+ STDMETHOD(GetInstallationVersion)(_Out_ BSTR *pbstrInstallationVersion) = 0;
+
+ /// <summary>
+ /// Gets the display name (title) of the product installed in this instance.
+ /// </summary>
+ /// <param name="lcid">The LCID for the display name.</param>
+ /// <param name="pbstrDisplayName">The display name (title) of the product
+ /// installed in this instance.</param>
+ /// <returns>Standard HRESULT indicating success or failure, including
+ /// E_FILENOTFOUND if the instance state does not exist and E_NOTFOUND if the
+ /// property is not defined.</returns>
+ STDMETHOD(GetDisplayName)(_In_ LCID lcid, _Out_ BSTR *pbstrDisplayName) = 0;
+
+ /// <summary>
+ /// Gets the description of the product installed in this instance.
+ /// </summary>
+ /// <param name="lcid">The LCID for the description.</param>
+ /// <param name="pbstrDescription">The description of the product installed in
+ /// this instance.</param>
+ /// <returns>Standard HRESULT indicating success or failure, including
+ /// E_FILENOTFOUND if the instance state does not exist and E_NOTFOUND if the
+ /// property is not defined.</returns>
+ STDMETHOD(GetDescription)(_In_ LCID lcid, _Out_ BSTR *pbstrDescription) = 0;
+
+ /// <summary>
+ /// Resolves the optional relative path to the root path of the instance.
+ /// </summary>
+ /// <param name="pwszRelativePath">A relative path within the instance to
+ /// resolve, or NULL to get the root path.</param>
+ /// <param name="pbstrAbsolutePath">The full path to the optional relative
+ /// path within the instance. If the relative path is NULL, the root path will
+ /// always terminate in a backslash.</param>
+ /// <returns>Standard HRESULT indicating success or failure, including
+ /// E_FILENOTFOUND if the instance state does not exist and E_NOTFOUND if the
+ /// property is not defined.</returns>
+ STDMETHOD(ResolvePath)
+ (_In_opt_z_ LPCOLESTR pwszRelativePath, _Out_ BSTR *pbstrAbsolutePath) = 0;
+};
+#endif
+
+EXTERN_C const IID IID_ISetupInstance2;
+
+#if defined(__cplusplus) && !defined(CINTERFACE)
+/// <summary>
+/// Information about an instance of a product.
+/// </summary>
+struct DECLSPEC_UUID("89143C9A-05AF-49B0-B717-72E218A2185C")
+ DECLSPEC_NOVTABLE ISetupInstance2 : public ISetupInstance {
+ /// <summary>
+ /// Gets the state of the instance.
+ /// </summary>
+ /// <param name="pState">The state of the instance.</param>
+ /// <returns>Standard HRESULT indicating success or failure, including
+ /// E_FILENOTFOUND if the instance state does not exist.</returns>
+ STDMETHOD(GetState)(_Out_ InstanceState *pState) = 0;
+
+ /// <summary>
+ /// Gets an array of package references registered to the instance.
+ /// </summary>
+ /// <param name="ppsaPackages">Pointer to an array of <see
+ /// cref="ISetupPackageReference"/>.</param>
+ /// <returns>Standard HRESULT indicating success or failure, including
+ /// E_FILENOTFOUND if the instance state does not exist and E_NOTFOUND if the
+ /// packages property is not defined.</returns>
+ STDMETHOD(GetPackages)(_Out_ LPSAFEARRAY *ppsaPackages) = 0;
+
+ /// <summary>
+ /// Gets a pointer to the <see cref="ISetupPackageReference"/> that represents
+ /// the registered product.
+ /// </summary>
+ /// <param name="ppPackage">Pointer to an instance of <see
+ /// cref="ISetupPackageReference"/>. This may be NULL if <see
+ /// cref="GetState"/> does not return <see cref="eComplete"/>.</param>
+ /// <returns>Standard HRESULT indicating success or failure, including
+ /// E_FILENOTFOUND if the instance state does not exist and E_NOTFOUND if the
+ /// packages property is not defined.</returns>
+ STDMETHOD(GetProduct)
+ (_Outptr_result_maybenull_ ISetupPackageReference **ppPackage) = 0;
+
+ /// <summary>
+ /// Gets the relative path to the product application, if available.
+ /// </summary>
+ /// <param name="pbstrProductPath">The relative path to the product
+ /// application, if available.</param>
+ /// <returns>Standard HRESULT indicating success or failure, including
+ /// E_FILENOTFOUND if the instance state does not exist.</returns>
+ STDMETHOD(GetProductPath)
+ (_Outptr_result_maybenull_ BSTR *pbstrProductPath) = 0;
+};
+#endif
+
+EXTERN_C const IID IID_IEnumSetupInstances;
+
+#if defined(__cplusplus) && !defined(CINTERFACE)
+/// <summary>
+/// A enumerator of installed <see cref="ISetupInstance"/> objects.
+/// </summary>
+struct DECLSPEC_UUID("6380BCFF-41D3-4B2E-8B2E-BF8A6810C848")
+ DECLSPEC_NOVTABLE IEnumSetupInstances : public IUnknown {
+ /// <summary>
+ /// Retrieves the next set of product instances in the enumeration sequence.
+ /// </summary>
+ /// <param name="celt">The number of product instances to retrieve.</param>
+ /// <param name="rgelt">A pointer to an array of <see
+ /// cref="ISetupInstance"/>.</param>
+ /// <param name="pceltFetched">A pointer to the number of product instances
+ /// retrieved. If celt is 1 this parameter may be NULL.</param>
+ /// <returns>S_OK if the number of elements were fetched, S_FALSE if nothing
+ /// was fetched (at end of enumeration), E_INVALIDARG if celt is greater than
+ /// 1 and pceltFetched is NULL, or E_OUTOFMEMORY if an <see
+ /// cref="ISetupInstance"/> could not be allocated.</returns>
+ STDMETHOD(Next)
+ (_In_ ULONG celt, _Out_writes_to_(celt, *pceltFetched) ISetupInstance **rgelt,
+ _Out_opt_ _Deref_out_range_(0, celt) ULONG *pceltFetched) = 0;
+
+ /// <summary>
+ /// Skips the next set of product instances in the enumeration sequence.
+ /// </summary>
+ /// <param name="celt">The number of product instances to skip.</param>
+ /// <returns>S_OK if the number of elements could be skipped; otherwise,
+ /// S_FALSE;</returns>
+ STDMETHOD(Skip)(_In_ ULONG celt) = 0;
+
+ /// <summary>
+ /// Resets the enumeration sequence to the beginning.
+ /// </summary>
+ /// <returns>Always returns S_OK;</returns>
+ STDMETHOD(Reset)(void) = 0;
+
+ /// <summary>
+ /// Creates a new enumeration object in the same state as the current
+ /// enumeration object: the new object points to the same place in the
+ /// enumeration sequence.
+ /// </summary>
+ /// <param name="ppenum">A pointer to a pointer to a new <see
+ /// cref="IEnumSetupInstances"/> interface. If the method fails, this
+ /// parameter is undefined.</param>
+ /// <returns>S_OK if a clone was returned; otherwise, E_OUTOFMEMORY.</returns>
+ STDMETHOD(Clone)(_Deref_out_opt_ IEnumSetupInstances **ppenum) = 0;
+};
+#endif
+
+EXTERN_C const IID IID_ISetupConfiguration;
+
+#if defined(__cplusplus) && !defined(CINTERFACE)
+/// <summary>
+/// Gets information about product instances set up on the machine.
+/// </summary>
+struct DECLSPEC_UUID("42843719-DB4C-46C2-8E7C-64F1816EFD5B")
+ DECLSPEC_NOVTABLE ISetupConfiguration : public IUnknown {
+ /// <summary>
+ /// Enumerates all completed product instances installed.
+ /// </summary>
+ /// <param name="ppEnumInstances">An enumeration of completed, installed
+ /// product instances.</param>
+ /// <returns>Standard HRESULT indicating success or failure.</returns>
+ STDMETHOD(EnumInstances)(_Out_ IEnumSetupInstances **ppEnumInstances) = 0;
+
+ /// <summary>
+ /// Gets the instance for the current process path.
+ /// </summary>
+ /// <param name="ppInstance">The instance for the current process
+ /// path.</param>
+ /// <returns>The instance for the current process path, or E_NOTFOUND if not
+ /// found.</returns>
+ STDMETHOD(GetInstanceForCurrentProcess)
+ (_Out_ ISetupInstance **ppInstance) = 0;
+
+ /// <summary>
+ /// Gets the instance for the given path.
+ /// </summary>
+ /// <param name="ppInstance">The instance for the given path.</param>
+ /// <returns>The instance for the given path, or E_NOTFOUND if not
+ /// found.</returns>
+ STDMETHOD(GetInstanceForPath)
+ (_In_z_ LPCWSTR wzPath, _Out_ ISetupInstance **ppInstance) = 0;
+};
+#endif
+
+EXTERN_C const IID IID_ISetupConfiguration2;
+
+#if defined(__cplusplus) && !defined(CINTERFACE)
+/// <summary>
+/// Gets information about product instances.
+/// </summary>
+struct DECLSPEC_UUID("26AAB78C-4A60-49D6-AF3B-3C35BC93365D")
+ DECLSPEC_NOVTABLE ISetupConfiguration2 : public ISetupConfiguration {
+ /// <summary>
+ /// Enumerates all product instances.
+ /// </summary>
+ /// <param name="ppEnumInstances">An enumeration of all product
+ /// instances.</param>
+ /// <returns>Standard HRESULT indicating success or failure.</returns>
+ STDMETHOD(EnumAllInstances)(_Out_ IEnumSetupInstances **ppEnumInstances) = 0;
+};
+#endif
+
+EXTERN_C const IID IID_ISetupPackageReference;
+
+#if defined(__cplusplus) && !defined(CINTERFACE)
+/// <summary>
+/// A reference to a package.
+/// </summary>
+struct DECLSPEC_UUID("da8d8a16-b2b6-4487-a2f1-594ccccd6bf5")
+ DECLSPEC_NOVTABLE ISetupPackageReference : public IUnknown {
+ /// <summary>
+ /// Gets the general package identifier.
+ /// </summary>
+ /// <param name="pbstrId">The general package identifier.</param>
+ /// <returns>Standard HRESULT indicating success or failure.</returns>
+ STDMETHOD(GetId)(_Out_ BSTR *pbstrId) = 0;
+
+ /// <summary>
+ /// Gets the version of the package.
+ /// </summary>
+ /// <param name="pbstrVersion">The version of the package.</param>
+ /// <returns>Standard HRESULT indicating success or failure.</returns>
+ STDMETHOD(GetVersion)(_Out_ BSTR *pbstrVersion) = 0;
+
+ /// <summary>
+ /// Gets the target process architecture of the package.
+ /// </summary>
+ /// <param name="pbstrChip">The target process architecture of the
+ /// package.</param>
+ /// <returns>Standard HRESULT indicating success or failure.</returns>
+ STDMETHOD(GetChip)(_Out_ BSTR *pbstrChip) = 0;
+
+ /// <summary>
+ /// Gets the language and optional region identifier.
+ /// </summary>
+ /// <param name="pbstrLanguage">The language and optional region
+ /// identifier.</param>
+ /// <returns>Standard HRESULT indicating success or failure.</returns>
+ STDMETHOD(GetLanguage)(_Out_ BSTR *pbstrLanguage) = 0;
+
+ /// <summary>
+ /// Gets the build branch of the package.
+ /// </summary>
+ /// <param name="pbstrBranch">The build branch of the package.</param>
+ /// <returns>Standard HRESULT indicating success or failure.</returns>
+ STDMETHOD(GetBranch)(_Out_ BSTR *pbstrBranch) = 0;
+
+ /// <summary>
+ /// Gets the type of the package.
+ /// </summary>
+ /// <param name="pbstrType">The type of the package.</param>
+ /// <returns>Standard HRESULT indicating success or failure.</returns>
+ STDMETHOD(GetType)(_Out_ BSTR *pbstrType) = 0;
+
+ /// <summary>
+ /// Gets the unique identifier consisting of all defined tokens.
+ /// </summary>
+ /// <param name="pbstrUniqueId">The unique identifier consisting of all
+ /// defined tokens.</param>
+ /// <returns>Standard HRESULT indicating success or failure, including
+ /// E_UNEXPECTED if no Id was defined (required).</returns>
+ STDMETHOD(GetUniqueId)(_Out_ BSTR *pbstrUniqueId) = 0;
+};
+#endif
+
+EXTERN_C const IID IID_ISetupHelper;
+
+#if defined(__cplusplus) && !defined(CINTERFACE)
+/// <summary>
+/// Helper functions.
+/// </summary>
+/// <remarks>
+/// You can query for this interface from the <see cref="SetupConfiguration"/>
+/// class.
+/// </remarks>
+struct DECLSPEC_UUID("42b21b78-6192-463e-87bf-d577838f1d5c")
+ DECLSPEC_NOVTABLE ISetupHelper : public IUnknown {
+ /// <summary>
+ /// Parses a dotted quad version string into a 64-bit unsigned integer.
+ /// </summary>
+ /// <param name="pwszVersion">The dotted quad version string to parse, e.g.
+ /// 1.2.3.4.</param>
+ /// <param name="pullVersion">A 64-bit unsigned integer representing the
+ /// version. You can compare this to other versions.</param>
+ /// <returns>Standard HRESULT indicating success or failure.</returns>
+ STDMETHOD(ParseVersion)
+ (_In_ LPCOLESTR pwszVersion, _Out_ PULONGLONG pullVersion) = 0;
+
+ /// <summary>
+ /// Parses a dotted quad version string into a 64-bit unsigned integer.
+ /// </summary>
+ /// <param name="pwszVersionRange">The string containing 1 or 2 dotted quad
+ /// version strings to parse, e.g. [1.0,) that means 1.0.0.0 or newer.</param>
+ /// <param name="pullMinVersion">A 64-bit unsigned integer representing the
+ /// minimum version, which may be 0. You can compare this to other
+ /// versions.</param>
+ /// <param name="pullMaxVersion">A 64-bit unsigned integer representing the
+ /// maximum version, which may be MAXULONGLONG. You can compare this to other
+ /// versions.</param>
+ /// <returns>Standard HRESULT indicating success or failure.</returns>
+ STDMETHOD(ParseVersionRange)
+ (_In_ LPCOLESTR pwszVersionRange, _Out_ PULONGLONG pullMinVersion,
+ _Out_ PULONGLONG pullMaxVersion) = 0;
+};
+#endif
+
+// Class declarations
+//
+EXTERN_C const CLSID CLSID_SetupConfiguration;
+
+#ifdef __cplusplus
+/// <summary>
+/// This class implements <see cref="ISetupConfiguration"/>, <see
+/// cref="ISetupConfiguration2"/>, and <see cref="ISetupHelper"/>.
+/// </summary>
+class DECLSPEC_UUID("177F0C4A-1CD3-4DE7-A32C-71DBBB9FA36D") SetupConfiguration;
+#endif
+
+// Function declarations
+//
+/// <summary>
+/// Gets an <see cref="ISetupConfiguration"/> that provides information about
+/// product instances installed on the machine.
+/// </summary>
+/// <param name="ppConfiguration">The <see cref="ISetupConfiguration"/> that
+/// provides information about product instances installed on the
+/// machine.</param>
+/// <param name="pReserved">Reserved for future use.</param>
+/// <returns>Standard HRESULT indicating success or failure.</returns>
+STDMETHODIMP GetSetupConfiguration(_Out_ ISetupConfiguration **ppConfiguration,
+ _Reserved_ LPVOID pReserved);
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
diff --git a/llvm/include/llvm/module.modulemap b/llvm/include/llvm/module.modulemap
index d0693ccfd8f6..76b10621541c 100644
--- a/llvm/include/llvm/module.modulemap
+++ b/llvm/include/llvm/module.modulemap
@@ -4,6 +4,7 @@ module LLVM_Analysis {
module * { export * }
// This is intended for (repeated) textual inclusion.
+ textual header "Analysis/ScalarFuncs.def"
textual header "Analysis/TargetLibraryInfo.def"
textual header "Analysis/VecFuncs.def"
}
@@ -71,6 +72,7 @@ module LLVM_BinaryFormat {
textual header "BinaryFormat/ELFRelocs/Hexagon.def"
textual header "BinaryFormat/ELFRelocs/i386.def"
textual header "BinaryFormat/ELFRelocs/Lanai.def"
+ textual header "BinaryFormat/ELFRelocs/LoongArch.def"
textual header "BinaryFormat/ELFRelocs/M68k.def"
textual header "BinaryFormat/ELFRelocs/Mips.def"
textual header "BinaryFormat/ELFRelocs/MSP430.def"
@@ -242,6 +244,7 @@ module LLVM_intrinsic_gen {
export *
}
module IR_AbstractCallSite { header "IR/AbstractCallSite.h" export * }
+ module IR_ConstantFold { header "IR/ConstantFold.h" export * }
module IR_ConstantFolder { header "IR/ConstantFolder.h" export * }
module IR_GlobalVariable { header "IR/GlobalVariable.h" export * }
module IR_NoFolder { header "IR/NoFolder.h" export * }
@@ -253,6 +256,7 @@ module LLVM_intrinsic_gen {
module IR_InstrTypes { header "IR/InstrTypes.h" export * }
module IR_Instructions { header "IR/Instructions.h" export * }
module IR_TypeFinder { header "IR/TypeFinder.h" export * }
+ module IR_VectorBuilder { header "IR/VectorBuilder.h" export * }
// Intrinsics.h
@@ -331,7 +335,6 @@ module LLVM_MC {
module LLVM_MC_TableGen {
requires cplusplus
module MC_LaneBitmask { header "MC/LaneBitmask.h" export * }
- module MC_FixedLenDisassembler { header "MC/MCFixedLenDisassembler.h" export * }
module MC_InstrItineraries { header "MC/MCInstrItineraries.h" export * }
module MC_Schedule { header "MC/MCSchedule.h" export * }
module MC_SubtargetFeature { header "MC/SubtargetFeature.h" export * }
@@ -357,6 +360,7 @@ module LLVM_ProfileData {
textual header "ProfileData/InstrProfData.inc"
textual header "ProfileData/MemProfData.inc"
+ textual header "ProfileData/MIBEntryDef.inc"
}
// FIXME: Mislayered?
@@ -410,6 +414,7 @@ module LLVM_Utils {
// These are intended for textual inclusion.
textual header "Support/AArch64TargetParser.def"
textual header "Support/ARMTargetParser.def"
+ textual header "Support/CSKYTargetParser.def"
textual header "Support/RISCVTargetParser.def"
textual header "Support/TargetOpcodes.def"
textual header "Support/X86TargetParser.def"
diff --git a/llvm/lib/Analysis/AliasAnalysis.cpp b/llvm/lib/Analysis/AliasAnalysis.cpp
index a8132e5abf54..e249c38ecd34 100644
--- a/llvm/lib/Analysis/AliasAnalysis.cpp
+++ b/llvm/lib/Analysis/AliasAnalysis.cpp
@@ -42,7 +42,6 @@
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
@@ -680,7 +679,7 @@ ModRefInfo AAResults::getModRefInfo(const Instruction *I,
}
}
- const MemoryLocation &Loc = OptLoc.getValueOr(MemoryLocation());
+ const MemoryLocation &Loc = OptLoc.value_or(MemoryLocation());
switch (I->getOpcode()) {
case Instruction::VAArg:
@@ -988,6 +987,28 @@ bool llvm::isIdentifiedFunctionLocal(const Value *V) {
return isa<AllocaInst>(V) || isNoAliasCall(V) || isNoAliasOrByValArgument(V);
}
+bool llvm::isEscapeSource(const Value *V) {
+ if (auto *CB = dyn_cast<CallBase>(V))
+ return !isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(CB,
+ true);
+
+ // The load case works because isNonEscapingLocalObject considers all
+ // stores to be escapes (it passes true for the StoreCaptures argument
+ // to PointerMayBeCaptured).
+ if (isa<LoadInst>(V))
+ return true;
+
+ // The inttoptr case works because isNonEscapingLocalObject considers all
+ // means of converting or equating a pointer to an int (ptrtoint, ptr store
+ // which could be followed by an integer load, ptr<->int compare) as
+ // escaping, and objects located at well-known addresses via platform-specific
+ // means cannot be considered non-escaping local objects.
+ if (isa<IntToPtrInst>(V))
+ return true;
+
+ return false;
+}
+
bool llvm::isNotVisibleOnUnwind(const Value *Object,
bool &RequiresNoCaptureBeforeUnwind) {
RequiresNoCaptureBeforeUnwind = false;
diff --git a/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp b/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
index 1577f1eb70b1..e3446a1f3130 100644
--- a/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -9,9 +9,7 @@
#include "llvm/Analysis/AliasAnalysisEvaluator.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
@@ -19,7 +17,6 @@
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -41,30 +38,48 @@ static cl::opt<bool> PrintMustModRef("print-mustmodref", cl::ReallyHidden);
static cl::opt<bool> EvalAAMD("evaluate-aa-metadata", cl::ReallyHidden);
-static void PrintResults(AliasResult AR, bool P, const Value *V1,
- const Value *V2, const Module *M) {
+static void PrintResults(AliasResult AR, bool P,
+ std::pair<const Value *, Type *> Loc1,
+ std::pair<const Value *, Type *> Loc2,
+ const Module *M) {
if (PrintAll || P) {
+ Type *Ty1 = Loc1.second, *Ty2 = Loc2.second;
+ unsigned AS1 = Loc1.first->getType()->getPointerAddressSpace();
+ unsigned AS2 = Loc2.first->getType()->getPointerAddressSpace();
std::string o1, o2;
{
raw_string_ostream os1(o1), os2(o2);
- V1->printAsOperand(os1, true, M);
- V2->printAsOperand(os2, true, M);
+ Loc1.first->printAsOperand(os1, false, M);
+ Loc2.first->printAsOperand(os2, false, M);
}
if (o2 < o1) {
std::swap(o1, o2);
+ std::swap(Ty1, Ty2);
+ std::swap(AS1, AS2);
// Change offset sign for the local AR, for printing only.
AR.swap();
}
- errs() << " " << AR << ":\t" << o1 << ", " << o2 << "\n";
+ errs() << " " << AR << ":\t";
+ Ty1->print(errs(), false, /* NoDetails */ true);
+ if (AS1 != 0)
+ errs() << " addrspace(" << AS1 << ")";
+ errs() << "* " << o1 << ", ";
+ Ty2->print(errs(), false, /* NoDetails */ true);
+ if (AS2 != 0)
+ errs() << " addrspace(" << AS2 << ")";
+ errs() << "* " << o2 << "\n";
}
}
-static inline void PrintModRefResults(const char *Msg, bool P, Instruction *I,
- Value *Ptr, Module *M) {
+static inline void PrintModRefResults(
+ const char *Msg, bool P, Instruction *I,
+ std::pair<const Value *, Type *> Loc, Module *M) {
if (PrintAll || P) {
errs() << " " << Msg << ": Ptr: ";
- Ptr->printAsOperand(errs(), true, M);
+ Loc.second->print(errs(), false, /* NoDetails */ true);
+ errs() << "* ";
+ Loc.first->printAsOperand(errs(), false, M);
errs() << "\t<->" << *I << '\n';
}
}
@@ -84,11 +99,6 @@ static inline void PrintLoadStoreResults(AliasResult AR, bool P,
}
}
-static inline bool isInterestingPointer(Value *V) {
- return V->getType()->isPointerTy()
- && !isa<ConstantPointerNull>(V);
-}
-
PreservedAnalyses AAEvaluator::run(Function &F, FunctionAnalysisManager &AM) {
runInternal(F, AM.getResult<AAManager>(F));
return PreservedAnalyses::all();
@@ -99,38 +109,21 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) {
++FunctionCount;
- SetVector<Value *> Pointers;
+ SetVector<std::pair<const Value *, Type *>> Pointers;
SmallSetVector<CallBase *, 16> Calls;
SetVector<Value *> Loads;
SetVector<Value *> Stores;
- for (auto &I : F.args())
- if (I.getType()->isPointerTy()) // Add all pointer arguments.
- Pointers.insert(&I);
-
for (Instruction &Inst : instructions(F)) {
- if (Inst.getType()->isPointerTy()) // Add all pointer instructions.
- Pointers.insert(&Inst);
- if (EvalAAMD && isa<LoadInst>(&Inst))
- Loads.insert(&Inst);
- if (EvalAAMD && isa<StoreInst>(&Inst))
- Stores.insert(&Inst);
- if (auto *Call = dyn_cast<CallBase>(&Inst)) {
- Value *Callee = Call->getCalledOperand();
- // Skip actual functions for direct function calls.
- if (!isa<Function>(Callee) && isInterestingPointer(Callee))
- Pointers.insert(Callee);
- // Consider formals.
- for (Use &DataOp : Call->data_ops())
- if (isInterestingPointer(DataOp))
- Pointers.insert(DataOp);
- Calls.insert(Call);
- } else {
- // Consider all operands.
- for (Use &Op : Inst.operands())
- if (isInterestingPointer(Op))
- Pointers.insert(Op);
- }
+ if (auto *LI = dyn_cast<LoadInst>(&Inst)) {
+ Pointers.insert({LI->getPointerOperand(), LI->getType()});
+ Loads.insert(LI);
+ } else if (auto *SI = dyn_cast<StoreInst>(&Inst)) {
+ Pointers.insert({SI->getPointerOperand(),
+ SI->getValueOperand()->getType()});
+ Stores.insert(SI);
+ } else if (auto *CB = dyn_cast<CallBase>(&Inst))
+ Calls.insert(CB);
}
if (PrintAll || PrintNoAlias || PrintMayAlias || PrintPartialAlias ||
@@ -139,20 +132,12 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) {
<< " pointers, " << Calls.size() << " call sites\n";
// iterate over the worklist, and run the full (n^2)/2 disambiguations
- for (SetVector<Value *>::iterator I1 = Pointers.begin(), E = Pointers.end();
- I1 != E; ++I1) {
- auto I1Size = LocationSize::afterPointer();
- Type *I1ElTy = (*I1)->getType()->getPointerElementType();
- if (I1ElTy->isSized())
- I1Size = LocationSize::precise(DL.getTypeStoreSize(I1ElTy));
-
- for (SetVector<Value *>::iterator I2 = Pointers.begin(); I2 != I1; ++I2) {
- auto I2Size = LocationSize::afterPointer();
- Type *I2ElTy = (*I2)->getType()->getPointerElementType();
- if (I2ElTy->isSized())
- I2Size = LocationSize::precise(DL.getTypeStoreSize(I2ElTy));
-
- AliasResult AR = AA.alias(*I1, I1Size, *I2, I2Size);
+ for (auto I1 = Pointers.begin(), E = Pointers.end(); I1 != E; ++I1) {
+ LocationSize Size1 = LocationSize::precise(DL.getTypeStoreSize(I1->second));
+ for (auto I2 = Pointers.begin(); I2 != I1; ++I2) {
+ LocationSize Size2 =
+ LocationSize::precise(DL.getTypeStoreSize(I2->second));
+ AliasResult AR = AA.alias(I1->first, Size1, I2->first, Size2);
switch (AR) {
case AliasResult::NoAlias:
PrintResults(AR, PrintNoAlias, *I1, *I2, F.getParent());
@@ -231,13 +216,10 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) {
// Mod/ref alias analysis: compare all pairs of calls and values
for (CallBase *Call : Calls) {
- for (auto Pointer : Pointers) {
- auto Size = LocationSize::afterPointer();
- Type *ElTy = Pointer->getType()->getPointerElementType();
- if (ElTy->isSized())
- Size = LocationSize::precise(DL.getTypeStoreSize(ElTy));
-
- switch (AA.getModRefInfo(Call, Pointer, Size)) {
+ for (const auto &Pointer : Pointers) {
+ LocationSize Size =
+ LocationSize::precise(DL.getTypeStoreSize(Pointer.second));
+ switch (AA.getModRefInfo(Call, Pointer.first, Size)) {
case ModRefInfo::NoModRef:
PrintModRefResults("NoModRef", PrintNoModRef, Call, Pointer,
F.getParent());
diff --git a/llvm/lib/Analysis/AliasSetTracker.cpp b/llvm/lib/Analysis/AliasSetTracker.cpp
index 5dc6c7780a0c..234a73bff6a8 100644
--- a/llvm/lib/Analysis/AliasSetTracker.cpp
+++ b/llvm/lib/Analysis/AliasSetTracker.cpp
@@ -13,16 +13,12 @@
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/GuardUtils.h"
-#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Config/llvm-config.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Value.h"
@@ -237,8 +233,8 @@ bool AliasSet::aliasesUnknownInst(const Instruction *Inst,
if (AliasAny)
return true;
- assert(Inst->mayReadOrWriteMemory() &&
- "Instruction must either read or write memory.");
+ if (!Inst->mayReadOrWriteMemory())
+ return false;
for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) {
if (auto *UnknownInst = getUnknownInst(i)) {
@@ -258,31 +254,6 @@ bool AliasSet::aliasesUnknownInst(const Instruction *Inst,
return false;
}
-Instruction* AliasSet::getUniqueInstruction() {
- if (AliasAny)
- // May have collapses alias set
- return nullptr;
- if (begin() != end()) {
- if (!UnknownInsts.empty())
- // Another instruction found
- return nullptr;
- if (std::next(begin()) != end())
- // Another instruction found
- return nullptr;
- Value *Addr = begin()->getValue();
- assert(!Addr->user_empty() &&
- "where's the instruction which added this pointer?");
- if (std::next(Addr->user_begin()) != Addr->user_end())
- // Another instruction found -- this is really restrictive
- // TODO: generalize!
- return nullptr;
- return cast<Instruction>(*(Addr->user_begin()));
- }
- if (1 != UnknownInsts.size())
- return nullptr;
- return cast<Instruction>(UnknownInsts[0]);
-}
-
void AliasSetTracker::clear() {
// Delete all the PointerRec entries.
for (auto &I : PointerMap)
diff --git a/llvm/lib/Analysis/Analysis.cpp b/llvm/lib/Analysis/Analysis.cpp
index 177f38af13d8..460dddceaf17 100644
--- a/llvm/lib/Analysis/Analysis.cpp
+++ b/llvm/lib/Analysis/Analysis.cpp
@@ -40,14 +40,14 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeDelinearizationPass(Registry);
initializeDemandedBitsWrapperPassPass(Registry);
initializeDominanceFrontierWrapperPassPass(Registry);
- initializeDomViewerPass(Registry);
- initializeDomPrinterPass(Registry);
- initializeDomOnlyViewerPass(Registry);
- initializePostDomViewerPass(Registry);
- initializeDomOnlyPrinterPass(Registry);
- initializePostDomPrinterPass(Registry);
- initializePostDomOnlyViewerPass(Registry);
- initializePostDomOnlyPrinterPass(Registry);
+ initializeDomViewerWrapperPassPass(Registry);
+ initializeDomPrinterWrapperPassPass(Registry);
+ initializeDomOnlyViewerWrapperPassPass(Registry);
+ initializePostDomViewerWrapperPassPass(Registry);
+ initializeDomOnlyPrinterWrapperPassPass(Registry);
+ initializePostDomPrinterWrapperPassPass(Registry);
+ initializePostDomOnlyViewerWrapperPassPass(Registry);
+ initializePostDomOnlyPrinterWrapperPassPass(Registry);
initializeAAResultsWrapperPassPass(Registry);
initializeGlobalsAAWrapperPassPass(Registry);
initializeIVUsersWrapperPassPass(Registry);
diff --git a/llvm/lib/Analysis/AssumeBundleQueries.cpp b/llvm/lib/Analysis/AssumeBundleQueries.cpp
index 9d4fe1225b33..7440dbd29ccf 100644
--- a/llvm/lib/Analysis/AssumeBundleQueries.cpp
+++ b/llvm/lib/Analysis/AssumeBundleQueries.cpp
@@ -10,8 +10,8 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/DebugCounter.h"
diff --git a/llvm/lib/Analysis/AssumptionCache.cpp b/llvm/lib/Analysis/AssumptionCache.cpp
index 3e0214e21ecd..e7e476dfb572 100644
--- a/llvm/lib/Analysis/AssumptionCache.cpp
+++ b/llvm/lib/Analysis/AssumptionCache.cpp
@@ -11,18 +11,17 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Analysis/AssumeBundleQueries.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AssumeBundleQueries.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
@@ -31,7 +30,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
#include <cassert>
#include <utility>
diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index 0a0b53796add..c78f822b8bcf 100644
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -22,7 +22,6 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CaptureTracking.h"
-#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/PhiValues.h"
@@ -45,7 +44,6 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/Metadata.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
@@ -105,29 +103,6 @@ bool BasicAAResult::invalidate(Function &Fn, const PreservedAnalyses &PA,
// Useful predicates
//===----------------------------------------------------------------------===//
-/// Returns true if the pointer is one which would have been considered an
-/// escape by isNonEscapingLocalObject.
-static bool isEscapeSource(const Value *V) {
- if (isa<CallBase>(V))
- return true;
-
- // The load case works because isNonEscapingLocalObject considers all
- // stores to be escapes (it passes true for the StoreCaptures argument
- // to PointerMayBeCaptured).
- if (isa<LoadInst>(V))
- return true;
-
- // The inttoptr case works because isNonEscapingLocalObject considers all
- // means of converting or equating a pointer to an int (ptrtoint, ptr store
- // which could be followed by an integer load, ptr<->int compare) as
- // escaping, and objects located at well-known addresses via platform-specific
- // means cannot be considered non-escaping local objects.
- if (isa<IntToPtrInst>(V))
- return true;
-
- return false;
-}
-
/// Returns the size of the object specified by V or UnknownSize if unknown.
static uint64_t getObjectSize(const Value *V, const DataLayout &DL,
const TargetLibraryInfo &TLI,
@@ -234,7 +209,7 @@ bool EarliestEscapeInfo::isNotCapturedBeforeOrAt(const Value *Object,
if (Iter.second) {
Instruction *EarliestCapture = FindEarliestCapture(
Object, *const_cast<Function *>(I->getFunction()),
- /*ReturnCaptures=*/false, /*StoreCaptures=*/true, DT);
+ /*ReturnCaptures=*/false, /*StoreCaptures=*/true, DT, EphValues);
if (EarliestCapture) {
auto Ins = Inst2Obj.insert({EarliestCapture, {}});
Ins.first->second.push_back(Object);
@@ -661,8 +636,8 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
unsigned TypeSize =
DL.getTypeAllocSize(GTI.getIndexedType()).getFixedSize();
LE = LE.mul(APInt(IndexSize, TypeSize), GEPOp->isInBounds());
- Decomposed.Offset += LE.Offset.sextOrSelf(MaxIndexSize);
- APInt Scale = LE.Scale.sextOrSelf(MaxIndexSize);
+ Decomposed.Offset += LE.Offset.sext(MaxIndexSize);
+ APInt Scale = LE.Scale.sext(MaxIndexSize);
// If we already had an occurrence of this index variable, merge this
// scale into it. For example, we want to handle:
@@ -1299,8 +1274,31 @@ AliasResult BasicAAResult::aliasGEP(
const VariableGEPIndex &Var = DecompGEP1.VarIndices[0];
if (Var.Val.TruncBits == 0 &&
isKnownNonZero(Var.Val.V, DL, 0, &AC, Var.CxtI, DT)) {
- // If V != 0 then abs(VarIndex) >= abs(Scale).
- MinAbsVarIndex = Var.Scale.abs();
+ // If V != 0, then abs(VarIndex) > 0.
+ MinAbsVarIndex = APInt(Var.Scale.getBitWidth(), 1);
+
+ // Check if abs(V*Scale) >= abs(Scale) holds in the presence of
+ // potentially wrapping math.
+ auto MultiplyByScaleNoWrap = [](const VariableGEPIndex &Var) {
+ if (Var.IsNSW)
+ return true;
+
+ int ValOrigBW = Var.Val.V->getType()->getPrimitiveSizeInBits();
+ // If Scale is small enough so that abs(V*Scale) >= abs(Scale) holds.
+ // The max value of abs(V) is 2^ValOrigBW - 1. Multiplying with a
+ // constant smaller than 2^(bitwidth(Val) - ValOrigBW) won't wrap.
+ int MaxScaleValueBW = Var.Val.getBitWidth() - ValOrigBW;
+ if (MaxScaleValueBW <= 0)
+ return false;
+ return Var.Scale.ule(
+ APInt::getMaxValue(MaxScaleValueBW).zext(Var.Scale.getBitWidth()));
+ };
+ // Refine MinAbsVarIndex, if abs(Scale*V) >= abs(Scale) holds in the
+ // presence of potentially wrapping math.
+ if (MultiplyByScaleNoWrap(Var)) {
+ // If V != 0 then abs(VarIndex) >= abs(Scale).
+ MinAbsVarIndex = Var.Scale.abs();
+ }
}
} else if (DecompGEP1.VarIndices.size() == 2) {
// VarIndex = Scale*V0 + (-Scale)*V1.
@@ -1370,15 +1368,15 @@ BasicAAResult::aliasSelect(const SelectInst *SI, LocationSize SISize,
// If both arms of the Select node NoAlias or MustAlias V2, then returns
// NoAlias / MustAlias. Otherwise, returns MayAlias.
- AliasResult Alias = getBestAAResults().alias(
- MemoryLocation(V2, V2Size),
- MemoryLocation(SI->getTrueValue(), SISize), AAQI);
+ AliasResult Alias =
+ getBestAAResults().alias(MemoryLocation(SI->getTrueValue(), SISize),
+ MemoryLocation(V2, V2Size), AAQI);
if (Alias == AliasResult::MayAlias)
return AliasResult::MayAlias;
- AliasResult ThisAlias = getBestAAResults().alias(
- MemoryLocation(V2, V2Size),
- MemoryLocation(SI->getFalseValue(), SISize), AAQI);
+ AliasResult ThisAlias =
+ getBestAAResults().alias(MemoryLocation(SI->getFalseValue(), SISize),
+ MemoryLocation(V2, V2Size), AAQI);
return MergeAliasResults(ThisAlias, Alias);
}
@@ -1500,8 +1498,7 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize,
AAQueryInfo *UseAAQI = BlockInserted ? &NewAAQI : &AAQI;
AliasResult Alias = getBestAAResults().alias(
- MemoryLocation(V2, V2Size),
- MemoryLocation(V1Srcs[0], PNSize), *UseAAQI);
+ MemoryLocation(V1Srcs[0], PNSize), MemoryLocation(V2, V2Size), *UseAAQI);
// Early exit if the check of the first PHI source against V2 is MayAlias.
// Other results are not possible.
@@ -1518,7 +1515,7 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize,
Value *V = V1Srcs[i];
AliasResult ThisAlias = getBestAAResults().alias(
- MemoryLocation(V2, V2Size), MemoryLocation(V, PNSize), *UseAAQI);
+ MemoryLocation(V, PNSize), MemoryLocation(V2, V2Size), *UseAAQI);
Alias = MergeAliasResults(ThisAlias, Alias);
if (Alias == AliasResult::MayAlias)
break;
diff --git a/llvm/lib/Analysis/BlockFrequencyInfo.cpp b/llvm/lib/Analysis/BlockFrequencyInfo.cpp
index b464071a33e6..436b01764033 100644
--- a/llvm/lib/Analysis/BlockFrequencyInfo.cpp
+++ b/llvm/lib/Analysis/BlockFrequencyInfo.cpp
@@ -25,7 +25,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
#include <cassert>
#include <string>
diff --git a/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp b/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
index 2a5e1f65d731..ec8d318b675b 100644
--- a/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
+++ b/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
@@ -13,7 +13,6 @@
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/Config/llvm-config.h"
@@ -22,8 +21,8 @@
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ScaledNumber.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ScaledNumber.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
@@ -48,7 +47,7 @@ cl::opt<bool> CheckBFIUnknownBlockQueries(
"for debugging missed BFI updates"));
cl::opt<bool> UseIterativeBFIInference(
- "use-iterative-bfi-inference", cl::init(false), cl::Hidden, cl::ZeroOrMore,
+ "use-iterative-bfi-inference", cl::Hidden,
cl::desc("Apply an iterative post-processing to infer correct BFI counts"));
cl::opt<unsigned> IterativeBFIMaxIterationsPerBlock(
diff --git a/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/llvm/lib/Analysis/BranchProbabilityInfo.cpp
index ffb80134749a..1d880424e55c 100644
--- a/llvm/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/llvm/lib/Analysis/BranchProbabilityInfo.cpp
@@ -414,8 +414,7 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) {
const LoopBlock DstLoopBB = getLoopBlock(TI->getSuccessor(I - 1));
auto EstimatedWeight = getEstimatedEdgeWeight({SrcLoopBB, DstLoopBB});
if (EstimatedWeight &&
- EstimatedWeight.getValue() <=
- static_cast<uint32_t>(BlockExecWeight::UNREACHABLE))
+ *EstimatedWeight <= static_cast<uint32_t>(BlockExecWeight::UNREACHABLE))
UnreachableIdxs.push_back(I - 1);
else
ReachableIdxs.push_back(I - 1);
@@ -688,7 +687,7 @@ Optional<uint32_t> BranchProbabilityInfo::getMaxEstimatedEdgeWeight(
if (!Weight)
return None;
- if (!MaxWeight || MaxWeight.getValue() < Weight.getValue())
+ if (!MaxWeight || *MaxWeight < *Weight)
MaxWeight = Weight;
}
@@ -852,8 +851,7 @@ void BranchProbabilityInfo::computeEestimateBlockWeight(
if (LoopWeight <= static_cast<uint32_t>(BlockExecWeight::UNREACHABLE))
LoopWeight = static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO);
- EstimatedLoopWeight.insert(
- {LoopBB.getLoopData(), LoopWeight.getValue()});
+ EstimatedLoopWeight.insert({LoopBB.getLoopData(), *LoopWeight});
// Add all blocks entering the loop into working list.
getLoopEnterBlocks(LoopBB, BlockWorkList);
}
@@ -875,7 +873,7 @@ void BranchProbabilityInfo::computeEestimateBlockWeight(
auto MaxWeight = getMaxEstimatedEdgeWeight(LoopBB, successors(BB));
if (MaxWeight)
- propagateEstimatedBlockWeight(LoopBB, DT, PDT, MaxWeight.getValue(),
+ propagateEstimatedBlockWeight(LoopBB, DT, PDT, *MaxWeight,
BlockWorkList, LoopWorkList);
}
} while (!BlockWorkList.empty() || !LoopWorkList.empty());
@@ -913,7 +911,7 @@ bool BranchProbabilityInfo::calcEstimatedHeuristics(const BasicBlock *BB) {
// Scale down loop exiting weight by trip count.
Weight = std::max(
static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO),
- Weight.getValueOr(static_cast<uint32_t>(BlockExecWeight::DEFAULT)) /
+ Weight.value_or(static_cast<uint32_t>(BlockExecWeight::DEFAULT)) /
TC);
}
bool IsUnlikelyEdge = LoopBB.getLoop() && UnlikelyBlocks.contains(SuccBB);
@@ -923,15 +921,14 @@ bool BranchProbabilityInfo::calcEstimatedHeuristics(const BasicBlock *BB) {
// 'Unlikely' blocks have twice lower weight.
Weight = std::max(
static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO),
- Weight.getValueOr(static_cast<uint32_t>(BlockExecWeight::DEFAULT)) /
- 2);
+ Weight.value_or(static_cast<uint32_t>(BlockExecWeight::DEFAULT)) / 2);
}
if (Weight)
FoundEstimatedWeight = true;
auto WeightVal =
- Weight.getValueOr(static_cast<uint32_t>(BlockExecWeight::DEFAULT));
+ Weight.value_or(static_cast<uint32_t>(BlockExecWeight::DEFAULT));
TotalWeight += WeightVal;
SuccWeights.push_back(WeightVal);
}
diff --git a/llvm/lib/Analysis/CFG.cpp b/llvm/lib/Analysis/CFG.cpp
index ec25ee161e2c..1902d72f2f89 100644
--- a/llvm/lib/Analysis/CFG.cpp
+++ b/llvm/lib/Analysis/CFG.cpp
@@ -127,11 +127,7 @@ bool llvm::isCriticalEdge(const Instruction *TI, const BasicBlock *Dest,
// the outermost loop in the loop nest that contains BB.
static const Loop *getOutermostLoop(const LoopInfo *LI, const BasicBlock *BB) {
const Loop *L = LI->getLoopFor(BB);
- if (L) {
- while (const Loop *Parent = L->getParentLoop())
- L = Parent;
- }
- return L;
+ return L ? L->getOutermostLoop() : nullptr;
}
bool llvm::isPotentiallyReachableFromMany(
diff --git a/llvm/lib/Analysis/CFGPrinter.cpp b/llvm/lib/Analysis/CFGPrinter.cpp
index 04ccdc590845..f8eba1a00f28 100644
--- a/llvm/lib/Analysis/CFGPrinter.cpp
+++ b/llvm/lib/Analysis/CFGPrinter.cpp
@@ -23,7 +23,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
-#include <algorithm>
+#include "llvm/Support/GraphWriter.h"
using namespace llvm;
diff --git a/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp b/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp
index 1216d03e448b..602a01867f3b 100644
--- a/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp
@@ -831,14 +831,14 @@ CFLAndersAAResult::ensureCached(const Function &Fn) {
scan(Fn);
Iter = Cache.find(&Fn);
assert(Iter != Cache.end());
- assert(Iter->second.hasValue());
+ assert(Iter->second);
}
return Iter->second;
}
const AliasSummary *CFLAndersAAResult::getAliasSummary(const Function &Fn) {
auto &FunInfo = ensureCached(Fn);
- if (FunInfo.hasValue())
+ if (FunInfo)
return &FunInfo->getAliasSummary();
else
return nullptr;
diff --git a/llvm/lib/Analysis/CFLGraph.h b/llvm/lib/Analysis/CFLGraph.h
index 02a13d673f40..60fc8d18678c 100644
--- a/llvm/lib/Analysis/CFLGraph.h
+++ b/llvm/lib/Analysis/CFLGraph.h
@@ -403,7 +403,7 @@ template <typename CFLAA> class CFLGraphBuilder {
auto &RetParamRelations = Summary->RetParamRelations;
for (auto &Relation : RetParamRelations) {
auto IRelation = instantiateExternalRelation(Relation, Call);
- if (IRelation.hasValue()) {
+ if (IRelation) {
Graph.addNode(IRelation->From);
Graph.addNode(IRelation->To);
Graph.addEdge(IRelation->From, IRelation->To);
@@ -413,7 +413,7 @@ template <typename CFLAA> class CFLGraphBuilder {
auto &RetParamAttributes = Summary->RetParamAttributes;
for (auto &Attribute : RetParamAttributes) {
auto IAttr = instantiateExternalAttribute(Attribute, Call);
- if (IAttr.hasValue())
+ if (IAttr)
Graph.addNode(IAttr->IValue, IAttr->Attr);
}
}
diff --git a/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp b/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp
index 090dccc53b6e..f92869c2ec63 100644
--- a/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp
@@ -165,7 +165,7 @@ CFLSteensAAResult::FunctionInfo::FunctionInfo(
assert(RetVal != nullptr);
assert(RetVal->getType()->isPointerTy());
auto RetInfo = Sets.find(InstantiatedValue{RetVal, 0});
- if (RetInfo.hasValue())
+ if (RetInfo)
AddToRetParamRelations(0, RetInfo->Index);
}
@@ -174,7 +174,7 @@ CFLSteensAAResult::FunctionInfo::FunctionInfo(
for (auto &Param : Fn.args()) {
if (Param.getType()->isPointerTy()) {
auto ParamInfo = Sets.find(InstantiatedValue{&Param, 0});
- if (ParamInfo.hasValue())
+ if (ParamInfo)
AddToRetParamRelations(I + 1, ParamInfo->Index);
}
++I;
@@ -250,14 +250,14 @@ CFLSteensAAResult::ensureCached(Function *Fn) {
scan(Fn);
Iter = Cache.find(Fn);
assert(Iter != Cache.end());
- assert(Iter->second.hasValue());
+ assert(Iter->second);
}
return Iter->second;
}
const AliasSummary *CFLSteensAAResult::getAliasSummary(Function &Fn) {
auto &FunInfo = ensureCached(&Fn);
- if (FunInfo.hasValue())
+ if (FunInfo)
return &FunInfo->getAliasSummary();
else
return nullptr;
@@ -293,15 +293,15 @@ AliasResult CFLSteensAAResult::query(const MemoryLocation &LocA,
assert(Fn != nullptr);
auto &MaybeInfo = ensureCached(Fn);
- assert(MaybeInfo.hasValue());
+ assert(MaybeInfo);
auto &Sets = MaybeInfo->getStratifiedSets();
auto MaybeA = Sets.find(InstantiatedValue{ValA, 0});
- if (!MaybeA.hasValue())
+ if (!MaybeA)
return AliasResult::MayAlias;
auto MaybeB = Sets.find(InstantiatedValue{ValB, 0});
- if (!MaybeB.hasValue())
+ if (!MaybeB)
return AliasResult::MayAlias;
auto SetA = *MaybeA;
diff --git a/llvm/lib/Analysis/CGSCCPassManager.cpp b/llvm/lib/Analysis/CGSCCPassManager.cpp
index c60b70ae5b69..b2e7422bbf8b 100644
--- a/llvm/lib/Analysis/CGSCCPassManager.cpp
+++ b/llvm/lib/Analysis/CGSCCPassManager.cpp
@@ -9,6 +9,7 @@
#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/PriorityWorklist.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -27,7 +28,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TimeProfiler.h"
#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
#include <cassert>
#include <iterator>
@@ -164,9 +164,9 @@ ModuleToPostOrderCGSCCPassAdaptor::run(Module &M, ModuleAnalysisManager &AM) {
InlinedInternalEdges;
CGSCCUpdateResult UR = {
- RCWorklist, CWorklist, InvalidRefSCCSet, InvalidSCCSet,
- nullptr, nullptr, PreservedAnalyses::all(), InlinedInternalEdges,
- {}};
+ RCWorklist, CWorklist, InvalidRefSCCSet,
+ InvalidSCCSet, nullptr, PreservedAnalyses::all(),
+ InlinedInternalEdges, {}};
// Request PassInstrumentation from analysis manager, will use it to run
// instrumenting callbacks for the passes later.
@@ -174,9 +174,8 @@ ModuleToPostOrderCGSCCPassAdaptor::run(Module &M, ModuleAnalysisManager &AM) {
PreservedAnalyses PA = PreservedAnalyses::all();
CG.buildRefSCCs();
- for (auto RCI = CG.postorder_ref_scc_begin(),
- RCE = CG.postorder_ref_scc_end();
- RCI != RCE;) {
+ for (LazyCallGraph::RefSCC &RC :
+ llvm::make_early_inc_range(CG.postorder_ref_sccs())) {
assert(RCWorklist.empty() &&
"Should always start with an empty RefSCC worklist");
// The postorder_ref_sccs range we are walking is lazily constructed, so
@@ -190,7 +189,7 @@ ModuleToPostOrderCGSCCPassAdaptor::run(Module &M, ModuleAnalysisManager &AM) {
//
// We also eagerly increment the iterator to the next position because
// the CGSCC passes below may delete the current RefSCC.
- RCWorklist.insert(&*RCI++);
+ RCWorklist.insert(&RC);
do {
LazyCallGraph::RefSCC *RC = RCWorklist.pop_back_val();
@@ -230,11 +229,15 @@ ModuleToPostOrderCGSCCPassAdaptor::run(Module &M, ModuleAnalysisManager &AM) {
LLVM_DEBUG(dbgs() << "Skipping redundant run on SCC: " << *C << "\n");
continue;
}
- if (&C->getOuterRefSCC() != RC) {
- LLVM_DEBUG(dbgs() << "Skipping an SCC that is now part of some other "
- "RefSCC...\n");
- continue;
- }
+ // We used to also check if the current SCC is part of the current
+ // RefSCC and bail if it wasn't, since it should be in RCWorklist.
+ // However, this can cause compile time explosions in some cases on
+ // modules with a huge RefSCC. If a non-trivial amount of SCCs in the
+ // huge RefSCC can become their own child RefSCC, we create one child
+ // RefSCC, bail on the current RefSCC, visit the child RefSCC, revisit
+ // the huge RefSCC, and repeat. By visiting all SCCs in the original
+ // RefSCC we create all the child RefSCCs in one pass of the RefSCC,
+ // rather one pass of the RefSCC creating one child RefSCC at a time.
// Ensure we can proxy analysis updates from the CGSCC analysis manager
// into the the Function analysis manager by getting a proxy here.
@@ -264,11 +267,8 @@ ModuleToPostOrderCGSCCPassAdaptor::run(Module &M, ModuleAnalysisManager &AM) {
// Check that we didn't miss any update scenario.
assert(!InvalidSCCSet.count(C) && "Processing an invalid SCC!");
assert(C->begin() != C->end() && "Cannot have an empty SCC!");
- assert(&C->getOuterRefSCC() == RC &&
- "Processing an SCC in a different RefSCC!");
LastUpdatedC = UR.UpdatedC;
- UR.UpdatedRC = nullptr;
UR.UpdatedC = nullptr;
// Check the PassInstrumentation's BeforePass callbacks before
@@ -290,7 +290,6 @@ ModuleToPostOrderCGSCCPassAdaptor::run(Module &M, ModuleAnalysisManager &AM) {
// Update the SCC and RefSCC if necessary.
C = UR.UpdatedC ? UR.UpdatedC : C;
- RC = UR.UpdatedRC ? UR.UpdatedRC : RC;
if (UR.UpdatedC) {
// If we're updating the SCC, also update the FAM inside the proxy's
@@ -1213,10 +1212,8 @@ static LazyCallGraph::SCC &updateCGAndAnalysisManagerForPass(
assert(!UR.InvalidatedRefSCCs.count(RC) && "Invalidated the current RefSCC!");
assert(&C->getOuterRefSCC() == RC && "Current SCC not in current RefSCC!");
- // Record the current RefSCC and SCC for higher layers of the CGSCC pass
- // manager now that all the updates have been applied.
- if (RC != &InitialRC)
- UR.UpdatedRC = RC;
+ // Record the current SCC for higher layers of the CGSCC pass manager now that
+ // all the updates have been applied.
if (C != &InitialC)
UR.UpdatedC = C;
diff --git a/llvm/lib/Analysis/CallGraph.cpp b/llvm/lib/Analysis/CallGraph.cpp
index dfbd29b7d636..f85527122b2a 100644
--- a/llvm/lib/Analysis/CallGraph.cpp
+++ b/llvm/lib/Analysis/CallGraph.cpp
@@ -21,7 +21,6 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
#include <cassert>
using namespace llvm;
@@ -70,8 +69,7 @@ bool CallGraph::invalidate(Module &, const PreservedAnalyses &PA,
// Check whether the analysis, all analyses on functions, or the function's
// CFG have been preserved.
auto PAC = PA.getChecker<CallGraphAnalysis>();
- return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Module>>() ||
- PAC.preservedSet<CFGAnalyses>());
+ return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Module>>());
}
void CallGraph::addToCallGraph(Function *F) {
diff --git a/llvm/lib/Analysis/CallGraphSCCPass.cpp b/llvm/lib/Analysis/CallGraphSCCPass.cpp
index 930cb13c0cb3..8438f33f4712 100644
--- a/llvm/lib/Analysis/CallGraphSCCPass.cpp
+++ b/llvm/lib/Analysis/CallGraphSCCPass.cpp
@@ -28,7 +28,6 @@
#include "llvm/IR/OptBisect.h"
#include "llvm/IR/PassTimingInfo.h"
#include "llvm/IR/PrintPasses.h"
-#include "llvm/IR/StructuralHash.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -271,7 +270,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
Calls.count(Call) ||
// If the call edge is not from a call or invoke, or it is a
- // instrinsic call, then the function pass RAUW'd a call with
+ // intrinsic call, then the function pass RAUW'd a call with
// another value. This can happen when constant folding happens
// of well known functions etc.
(Call->getCalledFunction() &&
@@ -470,7 +469,7 @@ bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG,
initializeAnalysisImpl(P);
#ifdef EXPENSIVE_CHECKS
- uint64_t RefHash = StructuralHash(CG.getModule());
+ uint64_t RefHash = P->structuralHash(CG.getModule());
#endif
// Actually run this pass on the current SCC.
@@ -480,7 +479,7 @@ bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG,
Changed |= LocalChanged;
#ifdef EXPENSIVE_CHECKS
- if (!LocalChanged && (RefHash != StructuralHash(CG.getModule()))) {
+ if (!LocalChanged && (RefHash != P->structuralHash(CG.getModule()))) {
llvm::errs() << "Pass modifies its input and doesn't report it: "
<< P->getPassName() << "\n";
llvm_unreachable("Pass modifies its input and doesn't report it");
diff --git a/llvm/lib/Analysis/CallPrinter.cpp b/llvm/lib/Analysis/CallPrinter.cpp
index 829532a0fa10..65e3184fad91 100644
--- a/llvm/lib/Analysis/CallPrinter.cpp
+++ b/llvm/lib/Analysis/CallPrinter.cpp
@@ -14,18 +14,23 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/CallPrinter.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
-#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Analysis/DOTGraphTraitsPass.h"
#include "llvm/Analysis/HeatUtils.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/InitializePasses.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/DOTGraphTraits.h"
+#include "llvm/Support/GraphWriter.h"
using namespace llvm;
+namespace llvm {
+template <class GraphType> struct GraphTraits;
+}
+
// This option shows static (relative) call counts.
// FIXME:
// Need to show real counts when profile data is available
@@ -213,6 +218,71 @@ struct DOTGraphTraits<CallGraphDOTInfo *> : public DefaultDOTGraphTraits {
} // end llvm namespace
namespace {
+void doCallGraphDOTPrinting(
+ Module &M, function_ref<BlockFrequencyInfo *(Function &)> LookupBFI) {
+ std::string Filename;
+ if (!CallGraphDotFilenamePrefix.empty())
+ Filename = (CallGraphDotFilenamePrefix + ".callgraph.dot");
+ else
+ Filename = (std::string(M.getModuleIdentifier()) + ".callgraph.dot");
+ errs() << "Writing '" << Filename << "'...";
+
+ std::error_code EC;
+ raw_fd_ostream File(Filename, EC, sys::fs::OF_Text);
+
+ CallGraph CG(M);
+ CallGraphDOTInfo CFGInfo(&M, &CG, LookupBFI);
+
+ if (!EC)
+ WriteGraph(File, &CFGInfo);
+ else
+ errs() << " error opening file for writing!";
+ errs() << "\n";
+}
+
+void viewCallGraph(Module &M,
+ function_ref<BlockFrequencyInfo *(Function &)> LookupBFI) {
+ CallGraph CG(M);
+ CallGraphDOTInfo CFGInfo(&M, &CG, LookupBFI);
+
+ std::string Title =
+ DOTGraphTraits<CallGraphDOTInfo *>::getGraphName(&CFGInfo);
+ ViewGraph(&CFGInfo, "callgraph", true, Title);
+}
+} // namespace
+
+namespace llvm {
+PreservedAnalyses CallGraphDOTPrinterPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+
+ auto LookupBFI = [&FAM](Function &F) {
+ return &FAM.getResult<BlockFrequencyAnalysis>(F);
+ };
+
+ doCallGraphDOTPrinting(M, LookupBFI);
+
+ return PreservedAnalyses::all();
+}
+
+PreservedAnalyses CallGraphViewerPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+
+ auto LookupBFI = [&FAM](Function &F) {
+ return &FAM.getResult<BlockFrequencyAnalysis>(F);
+ };
+
+ viewCallGraph(M, LookupBFI);
+
+ return PreservedAnalyses::all();
+}
+} // namespace llvm
+
+namespace {
// Viewer
class CallGraphViewer : public ModulePass {
public:
@@ -234,12 +304,7 @@ bool CallGraphViewer::runOnModule(Module &M) {
return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
};
- CallGraph CG(M);
- CallGraphDOTInfo CFGInfo(&M, &CG, LookupBFI);
-
- std::string Title =
- DOTGraphTraits<CallGraphDOTInfo *>::getGraphName(&CFGInfo);
- ViewGraph(&CFGInfo, "callgraph", true, Title);
+ viewCallGraph(M, LookupBFI);
return false;
}
@@ -266,24 +331,7 @@ bool CallGraphDOTPrinter::runOnModule(Module &M) {
return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
};
- std::string Filename;
- if (!CallGraphDotFilenamePrefix.empty())
- Filename = (CallGraphDotFilenamePrefix + ".callgraph.dot");
- else
- Filename = (std::string(M.getModuleIdentifier()) + ".callgraph.dot");
- errs() << "Writing '" << Filename << "'...";
-
- std::error_code EC;
- raw_fd_ostream File(Filename, EC, sys::fs::OF_Text);
-
- CallGraph CG(M);
- CallGraphDOTInfo CFGInfo(&M, &CG, LookupBFI);
-
- if (!EC)
- WriteGraph(File, &CFGInfo);
- else
- errs() << " error opening file for writing!";
- errs() << "\n";
+ doCallGraphDOTPrinting(M, LookupBFI);
return false;
}
diff --git a/llvm/lib/Analysis/CaptureTracking.cpp b/llvm/lib/Analysis/CaptureTracking.cpp
index ba8462e659d5..f4fd660ac7e0 100644
--- a/llvm/lib/Analysis/CaptureTracking.cpp
+++ b/llvm/lib/Analysis/CaptureTracking.cpp
@@ -16,6 +16,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -44,15 +45,15 @@ STATISTIC(NumNotCapturedBefore, "Number of pointers not captured before");
/// use it where possible. The caching version can use much higher limit or
/// don't have this cap at all.
static cl::opt<unsigned>
-DefaultMaxUsesToExplore("capture-tracking-max-uses-to-explore", cl::Hidden,
- cl::desc("Maximal number of uses to explore."),
- cl::init(20));
+ DefaultMaxUsesToExplore("capture-tracking-max-uses-to-explore", cl::Hidden,
+ cl::desc("Maximal number of uses to explore."),
+ cl::init(100));
unsigned llvm::getDefaultMaxUsesToExploreForCaptureTracking() {
return DefaultMaxUsesToExplore;
}
-CaptureTracker::~CaptureTracker() {}
+CaptureTracker::~CaptureTracker() = default;
bool CaptureTracker::shouldExplore(const Use *U) { return true; }
@@ -74,8 +75,10 @@ bool CaptureTracker::isDereferenceableOrNull(Value *O, const DataLayout &DL) {
namespace {
struct SimpleCaptureTracker : public CaptureTracker {
- explicit SimpleCaptureTracker(bool ReturnCaptures)
- : ReturnCaptures(ReturnCaptures) {}
+ explicit SimpleCaptureTracker(
+
+ const SmallPtrSetImpl<const Value *> &EphValues, bool ReturnCaptures)
+ : EphValues(EphValues), ReturnCaptures(ReturnCaptures) {}
void tooManyUses() override { Captured = true; }
@@ -83,10 +86,15 @@ namespace {
if (isa<ReturnInst>(U->getUser()) && !ReturnCaptures)
return false;
+ if (EphValues.contains(U->getUser()))
+ return false;
+
Captured = true;
return true;
}
+ const SmallPtrSetImpl<const Value *> &EphValues;
+
bool ReturnCaptures;
bool Captured = false;
@@ -154,8 +162,9 @@ namespace {
// escape are not in a cycle.
struct EarliestCaptures : public CaptureTracker {
- EarliestCaptures(bool ReturnCaptures, Function &F, const DominatorTree &DT)
- : DT(DT), ReturnCaptures(ReturnCaptures), F(F) {}
+ EarliestCaptures(bool ReturnCaptures, Function &F, const DominatorTree &DT,
+ const SmallPtrSetImpl<const Value *> &EphValues)
+ : EphValues(EphValues), DT(DT), ReturnCaptures(ReturnCaptures), F(F) {}
void tooManyUses() override {
Captured = true;
@@ -167,6 +176,9 @@ namespace {
if (isa<ReturnInst>(I) && !ReturnCaptures)
return false;
+ if (EphValues.contains(I))
+ return false;
+
if (!EarliestCapture) {
EarliestCapture = I;
} else if (EarliestCapture->getParent() == I->getParent()) {
@@ -193,6 +205,8 @@ namespace {
return false;
}
+ const SmallPtrSetImpl<const Value *> &EphValues;
+
Instruction *EarliestCapture = nullptr;
const DominatorTree &DT;
@@ -212,8 +226,18 @@ namespace {
/// counts as capturing it or not. The boolean StoreCaptures specified whether
/// storing the value (or part of it) into memory anywhere automatically
/// counts as capturing it or not.
-bool llvm::PointerMayBeCaptured(const Value *V,
- bool ReturnCaptures, bool StoreCaptures,
+bool llvm::PointerMayBeCaptured(const Value *V, bool ReturnCaptures,
+ bool StoreCaptures, unsigned MaxUsesToExplore) {
+ SmallPtrSet<const Value *, 1> Empty;
+ return PointerMayBeCaptured(V, ReturnCaptures, StoreCaptures, Empty,
+ MaxUsesToExplore);
+}
+
+/// Variant of the above function which accepts a set of Values that are
+/// ephemeral and cannot cause pointers to escape.
+bool llvm::PointerMayBeCaptured(const Value *V, bool ReturnCaptures,
+ bool StoreCaptures,
+ const SmallPtrSetImpl<const Value *> &EphValues,
unsigned MaxUsesToExplore) {
assert(!isa<GlobalValue>(V) &&
"It doesn't make sense to ask whether a global is captured.");
@@ -224,7 +248,7 @@ bool llvm::PointerMayBeCaptured(const Value *V,
// take advantage of this.
(void)StoreCaptures;
- SimpleCaptureTracker SCT(ReturnCaptures);
+ SimpleCaptureTracker SCT(EphValues, ReturnCaptures);
PointerMayBeCaptured(V, &SCT, MaxUsesToExplore);
if (SCT.Captured)
++NumCaptured;
@@ -266,14 +290,16 @@ bool llvm::PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures,
return CB.Captured;
}
-Instruction *llvm::FindEarliestCapture(const Value *V, Function &F,
- bool ReturnCaptures, bool StoreCaptures,
- const DominatorTree &DT,
- unsigned MaxUsesToExplore) {
+Instruction *
+llvm::FindEarliestCapture(const Value *V, Function &F, bool ReturnCaptures,
+ bool StoreCaptures, const DominatorTree &DT,
+
+ const SmallPtrSetImpl<const Value *> &EphValues,
+ unsigned MaxUsesToExplore) {
assert(!isa<GlobalValue>(V) &&
"It doesn't make sense to ask whether a global is captured.");
- EarliestCaptures CB(ReturnCaptures, F, DT);
+ EarliestCaptures CB(ReturnCaptures, F, DT, EphValues);
PointerMayBeCaptured(V, &CB, MaxUsesToExplore);
if (CB.Captured)
++NumCapturedBefore;
@@ -282,6 +308,132 @@ Instruction *llvm::FindEarliestCapture(const Value *V, Function &F,
return CB.EarliestCapture;
}
+UseCaptureKind llvm::DetermineUseCaptureKind(
+ const Use &U,
+ function_ref<bool(Value *, const DataLayout &)> IsDereferenceableOrNull) {
+ Instruction *I = cast<Instruction>(U.getUser());
+
+ switch (I->getOpcode()) {
+ case Instruction::Call:
+ case Instruction::Invoke: {
+ auto *Call = cast<CallBase>(I);
+ // Not captured if the callee is readonly, doesn't return a copy through
+ // its return value and doesn't unwind (a readonly function can leak bits
+ // by throwing an exception or not depending on the input value).
+ if (Call->onlyReadsMemory() && Call->doesNotThrow() &&
+ Call->getType()->isVoidTy())
+ return UseCaptureKind::NO_CAPTURE;
+
+ // The pointer is not captured if returned pointer is not captured.
+ // NOTE: CaptureTracking users should not assume that only functions
+ // marked with nocapture do not capture. This means that places like
+ // getUnderlyingObject in ValueTracking or DecomposeGEPExpression
+ // in BasicAA also need to know about this property.
+ if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(Call, true))
+ return UseCaptureKind::PASSTHROUGH;
+
+ // Volatile operations effectively capture the memory location that they
+ // load and store to.
+ if (auto *MI = dyn_cast<MemIntrinsic>(Call))
+ if (MI->isVolatile())
+ return UseCaptureKind::MAY_CAPTURE;
+
+ // Calling a function pointer does not in itself cause the pointer to
+ // be captured. This is a subtle point considering that (for example)
+ // the callee might return its own address. It is analogous to saying
+ // that loading a value from a pointer does not cause the pointer to be
+ // captured, even though the loaded value might be the pointer itself
+ // (think of self-referential objects).
+ if (Call->isCallee(&U))
+ return UseCaptureKind::NO_CAPTURE;
+
+ // Not captured if only passed via 'nocapture' arguments.
+ if (Call->isDataOperand(&U) &&
+ !Call->doesNotCapture(Call->getDataOperandNo(&U))) {
+ // The parameter is not marked 'nocapture' - captured.
+ return UseCaptureKind::MAY_CAPTURE;
+ }
+ return UseCaptureKind::NO_CAPTURE;
+ }
+ case Instruction::Load:
+ // Volatile loads make the address observable.
+ if (cast<LoadInst>(I)->isVolatile())
+ return UseCaptureKind::MAY_CAPTURE;
+ return UseCaptureKind::NO_CAPTURE;
+ case Instruction::VAArg:
+ // "va-arg" from a pointer does not cause it to be captured.
+ return UseCaptureKind::NO_CAPTURE;
+ case Instruction::Store:
+ // Stored the pointer - conservatively assume it may be captured.
+ // Volatile stores make the address observable.
+ if (U.getOperandNo() == 0 || cast<StoreInst>(I)->isVolatile())
+ return UseCaptureKind::MAY_CAPTURE;
+ return UseCaptureKind::NO_CAPTURE;
+ case Instruction::AtomicRMW: {
+ // atomicrmw conceptually includes both a load and store from
+ // the same location.
+ // As with a store, the location being accessed is not captured,
+ // but the value being stored is.
+ // Volatile stores make the address observable.
+ auto *ARMWI = cast<AtomicRMWInst>(I);
+ if (U.getOperandNo() == 1 || ARMWI->isVolatile())
+ return UseCaptureKind::MAY_CAPTURE;
+ return UseCaptureKind::NO_CAPTURE;
+ }
+ case Instruction::AtomicCmpXchg: {
+ // cmpxchg conceptually includes both a load and store from
+ // the same location.
+ // As with a store, the location being accessed is not captured,
+ // but the value being stored is.
+ // Volatile stores make the address observable.
+ auto *ACXI = cast<AtomicCmpXchgInst>(I);
+ if (U.getOperandNo() == 1 || U.getOperandNo() == 2 || ACXI->isVolatile())
+ return UseCaptureKind::MAY_CAPTURE;
+ return UseCaptureKind::NO_CAPTURE;
+ }
+ case Instruction::BitCast:
+ case Instruction::GetElementPtr:
+ case Instruction::PHI:
+ case Instruction::Select:
+ case Instruction::AddrSpaceCast:
+ // The original value is not captured via this if the new value isn't.
+ return UseCaptureKind::PASSTHROUGH;
+ case Instruction::ICmp: {
+ unsigned Idx = U.getOperandNo();
+ unsigned OtherIdx = 1 - Idx;
+ if (auto *CPN = dyn_cast<ConstantPointerNull>(I->getOperand(OtherIdx))) {
+ // Don't count comparisons of a no-alias return value against null as
+ // captures. This allows us to ignore comparisons of malloc results
+ // with null, for example.
+ if (CPN->getType()->getAddressSpace() == 0)
+ if (isNoAliasCall(U.get()->stripPointerCasts()))
+ return UseCaptureKind::NO_CAPTURE;
+ if (!I->getFunction()->nullPointerIsDefined()) {
+ auto *O = I->getOperand(Idx)->stripPointerCastsSameRepresentation();
+ // Comparing a dereferenceable_or_null pointer against null cannot
+ // lead to pointer escapes, because if it is not null it must be a
+ // valid (in-bounds) pointer.
+ const DataLayout &DL = I->getModule()->getDataLayout();
+ if (IsDereferenceableOrNull && IsDereferenceableOrNull(O, DL))
+ return UseCaptureKind::NO_CAPTURE;
+ }
+ }
+ // Comparison against value stored in global variable. Given the pointer
+ // does not escape, its value cannot be guessed and stored separately in a
+ // global variable.
+ auto *LI = dyn_cast<LoadInst>(I->getOperand(OtherIdx));
+ if (LI && isa<GlobalVariable>(LI->getPointerOperand()))
+ return UseCaptureKind::NO_CAPTURE;
+ // Otherwise, be conservative. There are crazy ways to capture pointers
+ // using comparisons.
+ return UseCaptureKind::MAY_CAPTURE;
+ }
+ default:
+ // Something else - be conservative and say it is captured.
+ return UseCaptureKind::MAY_CAPTURE;
+ }
+}
+
void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker,
unsigned MaxUsesToExplore) {
assert(V->getType()->isPointerTy() && "Capture is for pointers only!");
@@ -293,11 +445,10 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker,
SmallSet<const Use *, 20> Visited;
auto AddUses = [&](const Value *V) {
- unsigned Count = 0;
for (const Use &U : V->uses()) {
// If there are lots of uses, conservatively say that the value
// is captured to avoid taking too much compile time.
- if (Count++ >= MaxUsesToExplore) {
+ if (Visited.size() >= MaxUsesToExplore) {
Tracker->tooManyUses();
return false;
}
@@ -312,144 +463,22 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker,
if (!AddUses(V))
return;
+ auto IsDereferenceableOrNull = [Tracker](Value *V, const DataLayout &DL) {
+ return Tracker->isDereferenceableOrNull(V, DL);
+ };
while (!Worklist.empty()) {
const Use *U = Worklist.pop_back_val();
- Instruction *I = cast<Instruction>(U->getUser());
-
- switch (I->getOpcode()) {
- case Instruction::Call:
- case Instruction::Invoke: {
- auto *Call = cast<CallBase>(I);
- // Not captured if the callee is readonly, doesn't return a copy through
- // its return value and doesn't unwind (a readonly function can leak bits
- // by throwing an exception or not depending on the input value).
- if (Call->onlyReadsMemory() && Call->doesNotThrow() &&
- Call->getType()->isVoidTy())
- break;
-
- // The pointer is not captured if returned pointer is not captured.
- // NOTE: CaptureTracking users should not assume that only functions
- // marked with nocapture do not capture. This means that places like
- // getUnderlyingObject in ValueTracking or DecomposeGEPExpression
- // in BasicAA also need to know about this property.
- if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(Call,
- true)) {
- if (!AddUses(Call))
- return;
- break;
- }
-
- // Volatile operations effectively capture the memory location that they
- // load and store to.
- if (auto *MI = dyn_cast<MemIntrinsic>(Call))
- if (MI->isVolatile())
- if (Tracker->captured(U))
- return;
-
- // Calling a function pointer does not in itself cause the pointer to
- // be captured. This is a subtle point considering that (for example)
- // the callee might return its own address. It is analogous to saying
- // that loading a value from a pointer does not cause the pointer to be
- // captured, even though the loaded value might be the pointer itself
- // (think of self-referential objects).
- if (Call->isCallee(U))
- break;
-
- // Not captured if only passed via 'nocapture' arguments.
- if (Call->isDataOperand(U) &&
- !Call->doesNotCapture(Call->getDataOperandNo(U))) {
- // The parameter is not marked 'nocapture' - captured.
- if (Tracker->captured(U))
- return;
- }
- break;
- }
- case Instruction::Load:
- // Volatile loads make the address observable.
- if (cast<LoadInst>(I)->isVolatile())
- if (Tracker->captured(U))
- return;
- break;
- case Instruction::VAArg:
- // "va-arg" from a pointer does not cause it to be captured.
- break;
- case Instruction::Store:
- // Stored the pointer - conservatively assume it may be captured.
- // Volatile stores make the address observable.
- if (U->getOperandNo() == 0 || cast<StoreInst>(I)->isVolatile())
- if (Tracker->captured(U))
- return;
- break;
- case Instruction::AtomicRMW: {
- // atomicrmw conceptually includes both a load and store from
- // the same location.
- // As with a store, the location being accessed is not captured,
- // but the value being stored is.
- // Volatile stores make the address observable.
- auto *ARMWI = cast<AtomicRMWInst>(I);
- if (U->getOperandNo() == 1 || ARMWI->isVolatile())
- if (Tracker->captured(U))
- return;
- break;
- }
- case Instruction::AtomicCmpXchg: {
- // cmpxchg conceptually includes both a load and store from
- // the same location.
- // As with a store, the location being accessed is not captured,
- // but the value being stored is.
- // Volatile stores make the address observable.
- auto *ACXI = cast<AtomicCmpXchgInst>(I);
- if (U->getOperandNo() == 1 || U->getOperandNo() == 2 ||
- ACXI->isVolatile())
- if (Tracker->captured(U))
- return;
- break;
- }
- case Instruction::BitCast:
- case Instruction::GetElementPtr:
- case Instruction::PHI:
- case Instruction::Select:
- case Instruction::AddrSpaceCast:
- // The original value is not captured via this if the new value isn't.
- if (!AddUses(I))
- return;
- break;
- case Instruction::ICmp: {
- unsigned Idx = U->getOperandNo();
- unsigned OtherIdx = 1 - Idx;
- if (auto *CPN = dyn_cast<ConstantPointerNull>(I->getOperand(OtherIdx))) {
- // Don't count comparisons of a no-alias return value against null as
- // captures. This allows us to ignore comparisons of malloc results
- // with null, for example.
- if (CPN->getType()->getAddressSpace() == 0)
- if (isNoAliasCall(U->get()->stripPointerCasts()))
- break;
- if (!I->getFunction()->nullPointerIsDefined()) {
- auto *O = I->getOperand(Idx)->stripPointerCastsSameRepresentation();
- // Comparing a dereferenceable_or_null pointer against null cannot
- // lead to pointer escapes, because if it is not null it must be a
- // valid (in-bounds) pointer.
- if (Tracker->isDereferenceableOrNull(O, I->getModule()->getDataLayout()))
- break;
- }
- }
- // Comparison against value stored in global variable. Given the pointer
- // does not escape, its value cannot be guessed and stored separately in a
- // global variable.
- auto *LI = dyn_cast<LoadInst>(I->getOperand(OtherIdx));
- if (LI && isa<GlobalVariable>(LI->getPointerOperand()))
- break;
- // Otherwise, be conservative. There are crazy ways to capture pointers
- // using comparisons.
+ switch (DetermineUseCaptureKind(*U, IsDereferenceableOrNull)) {
+ case UseCaptureKind::NO_CAPTURE:
+ continue;
+ case UseCaptureKind::MAY_CAPTURE:
if (Tracker->captured(U))
return;
- break;
- }
- default:
- // Something else - be conservative and say it is captured.
- if (Tracker->captured(U))
+ continue;
+ case UseCaptureKind::PASSTHROUGH:
+ if (!AddUses(U->getUser()))
return;
- break;
+ continue;
}
}
diff --git a/llvm/lib/Analysis/CmpInstAnalysis.cpp b/llvm/lib/Analysis/CmpInstAnalysis.cpp
index 5b951980a0aa..20b1df6e1495 100644
--- a/llvm/lib/Analysis/CmpInstAnalysis.cpp
+++ b/llvm/lib/Analysis/CmpInstAnalysis.cpp
@@ -18,9 +18,7 @@
using namespace llvm;
-unsigned llvm::getICmpCode(const ICmpInst *ICI, bool InvertPred) {
- ICmpInst::Predicate Pred = InvertPred ? ICI->getInversePredicate()
- : ICI->getPredicate();
+unsigned llvm::getICmpCode(CmpInst::Predicate Pred) {
switch (Pred) {
// False -> 0
case ICmpInst::ICMP_UGT: return 1; // 001
@@ -63,6 +61,18 @@ bool llvm::predicatesFoldable(ICmpInst::Predicate P1, ICmpInst::Predicate P2) {
(CmpInst::isSigned(P2) && ICmpInst::isEquality(P1));
}
+Constant *llvm::getPredForFCmpCode(unsigned Code, Type *OpTy,
+ CmpInst::Predicate &Pred) {
+ Pred = static_cast<FCmpInst::Predicate>(Code);
+ assert(FCmpInst::FCMP_FALSE <= Pred && Pred <= FCmpInst::FCMP_TRUE &&
+ "Unexpected FCmp predicate!");
+ if (Pred == FCmpInst::FCMP_FALSE)
+ return ConstantInt::get(CmpInst::makeCmpResultType(OpTy), 0);
+ if (Pred == FCmpInst::FCMP_TRUE)
+ return ConstantInt::get(CmpInst::makeCmpResultType(OpTy), 1);
+ return nullptr;
+}
+
bool llvm::decomposeBitTestICmp(Value *LHS, Value *RHS,
CmpInst::Predicate &Pred,
Value *&X, APInt &Mask, bool LookThruTrunc) {
diff --git a/llvm/lib/Analysis/CodeMetrics.cpp b/llvm/lib/Analysis/CodeMetrics.cpp
index 27c52506352f..6d9084215dee 100644
--- a/llvm/lib/Analysis/CodeMetrics.cpp
+++ b/llvm/lib/Analysis/CodeMetrics.cpp
@@ -15,7 +15,6 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/InstructionCost.h"
@@ -118,13 +117,6 @@ void CodeMetrics::analyzeBasicBlock(
const BasicBlock *BB, const TargetTransformInfo &TTI,
const SmallPtrSetImpl<const Value *> &EphValues, bool PrepareForLTO) {
++NumBlocks;
- // Use a proxy variable for NumInsts of type InstructionCost, so that it can
- // use InstructionCost's arithmetic properties such as saturation when this
- // feature is added to InstructionCost.
- // When storing the value back to NumInsts, we can assume all costs are Valid
- // because the IR should not contain any nodes that cannot be costed. If that
- // happens the cost-model is broken.
- InstructionCost NumInstsProxy = NumInsts;
InstructionCost NumInstsBeforeThisBB = NumInsts;
for (const Instruction &I : *BB) {
// Skip ephemeral values.
@@ -184,8 +176,7 @@ void CodeMetrics::analyzeBasicBlock(
if (InvI->cannotDuplicate())
notDuplicatable = true;
- NumInstsProxy += TTI.getUserCost(&I, TargetTransformInfo::TCK_CodeSize);
- NumInsts = *NumInstsProxy.getValue();
+ NumInsts += TTI.getUserCost(&I, TargetTransformInfo::TCK_CodeSize);
}
if (isa<ReturnInst>(BB->getTerminator()))
@@ -205,6 +196,6 @@ void CodeMetrics::analyzeBasicBlock(
notDuplicatable |= isa<IndirectBrInst>(BB->getTerminator());
// Remember NumInsts for this BB.
- InstructionCost NumInstsThisBB = NumInstsProxy - NumInstsBeforeThisBB;
- NumBBInsts[BB] = *NumInstsThisBB.getValue();
+ InstructionCost NumInstsThisBB = NumInsts - NumInstsBeforeThisBB;
+ NumBBInsts[BB] = NumInstsThisBB;
}
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 7cf69f613c66..a81041845052 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -57,7 +57,6 @@
#include <cerrno>
#include <cfenv>
#include <cmath>
-#include <cstddef>
#include <cstdint>
using namespace llvm;
@@ -92,7 +91,7 @@ static Constant *foldConstVectorToAPInt(APInt &Result, Type *DestTy,
return ConstantExpr::getBitCast(C, DestTy);
Result <<= BitShift;
- Result |= ElementCI->getValue().zextOrSelf(Result.getBitWidth());
+ Result |= ElementCI->getValue().zext(Result.getBitWidth());
}
return nullptr;
@@ -589,14 +588,17 @@ Constant *FoldReinterpretLoadFromConst(Constant *C, Type *LoadTy,
if (BytesLoaded > 32 || BytesLoaded == 0)
return nullptr;
- int64_t InitializerSize = DL.getTypeAllocSize(C->getType()).getFixedSize();
-
// If we're not accessing anything in this constant, the result is undefined.
if (Offset <= -1 * static_cast<int64_t>(BytesLoaded))
return UndefValue::get(IntType);
+ // TODO: We should be able to support scalable types.
+ TypeSize InitializerSize = DL.getTypeAllocSize(C->getType());
+ if (InitializerSize.isScalable())
+ return nullptr;
+
// If we're not accessing anything in this constant, the result is undefined.
- if (Offset >= InitializerSize)
+ if (Offset >= (int64_t)InitializerSize.getFixedValue())
return UndefValue::get(IntType);
unsigned char RawBytes[32] = {0};
@@ -631,6 +633,39 @@ Constant *FoldReinterpretLoadFromConst(Constant *C, Type *LoadTy,
return ConstantInt::get(IntType->getContext(), ResultVal);
}
+} // anonymous namespace
+
+// If GV is a constant with an initializer read its representation starting
+// at Offset and return it as a constant array of unsigned char. Otherwise
+// return null.
+Constant *llvm::ReadByteArrayFromGlobal(const GlobalVariable *GV,
+ uint64_t Offset) {
+ if (!GV->isConstant() || !GV->hasDefinitiveInitializer())
+ return nullptr;
+
+ const DataLayout &DL = GV->getParent()->getDataLayout();
+ Constant *Init = const_cast<Constant *>(GV->getInitializer());
+ TypeSize InitSize = DL.getTypeAllocSize(Init->getType());
+ if (InitSize < Offset)
+ return nullptr;
+
+ uint64_t NBytes = InitSize - Offset;
+ if (NBytes > UINT16_MAX)
+ // Bail for large initializers in excess of 64K to avoid allocating
+ // too much memory.
+ // Offset is assumed to be less than or equal than InitSize (this
+ // is enforced in ReadDataFromGlobal).
+ return nullptr;
+
+ SmallVector<unsigned char, 256> RawBytes(static_cast<size_t>(NBytes));
+ unsigned char *CurPtr = RawBytes.data();
+
+ if (!ReadDataFromGlobal(Init, Offset, CurPtr, NBytes, DL))
+ return nullptr;
+
+ return ConstantDataArray::get(GV->getContext(), RawBytes);
+}
+
/// If this Offset points exactly to the start of an aggregate element, return
/// that element, otherwise return nullptr.
Constant *getConstantAtOffset(Constant *Base, APInt Offset,
@@ -659,8 +694,6 @@ Constant *getConstantAtOffset(Constant *Base, APInt Offset,
return C;
}
-} // end anonymous namespace
-
Constant *llvm::ConstantFoldLoadFromConst(Constant *C, Type *Ty,
const APInt &Offset,
const DataLayout &DL) {
@@ -864,21 +897,6 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
Type *IntIdxTy = DL.getIndexType(Ptr->getType());
- // If this is "gep i8* Ptr, (sub 0, V)", fold this as:
- // "inttoptr (sub (ptrtoint Ptr), V)"
- if (Ops.size() == 2 && ResElemTy->isIntegerTy(8)) {
- auto *CE = dyn_cast<ConstantExpr>(Ops[1]);
- assert((!CE || CE->getType() == IntIdxTy) &&
- "CastGEPIndices didn't canonicalize index types!");
- if (CE && CE->getOpcode() == Instruction::Sub &&
- CE->getOperand(0)->isNullValue()) {
- Constant *Res = ConstantExpr::getPtrToInt(Ptr, CE->getType());
- Res = ConstantExpr::getSub(Res, CE->getOperand(1));
- Res = ConstantExpr::getIntToPtr(Res, ResTy);
- return ConstantFoldConstant(Res, DL, TLI);
- }
- }
-
for (unsigned i = 1, e = Ops.size(); i != e; ++i)
if (!isa<ConstantInt>(Ops[i]))
return nullptr;
@@ -1012,8 +1030,24 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode,
if (Instruction::isUnaryOp(Opcode))
return ConstantFoldUnaryOpOperand(Opcode, Ops[0], DL);
- if (Instruction::isBinaryOp(Opcode))
+ if (Instruction::isBinaryOp(Opcode)) {
+ switch (Opcode) {
+ default:
+ break;
+ case Instruction::FAdd:
+ case Instruction::FSub:
+ case Instruction::FMul:
+ case Instruction::FDiv:
+ case Instruction::FRem:
+ // Handle floating point instructions separately to account for denormals
+ // TODO: If a constant expression is being folded rather than an
+ // instruction, denormals will not be flushed/treated as zero
+ if (const auto *I = dyn_cast<Instruction>(InstOrCE)) {
+ return ConstantFoldFPInstOperands(Opcode, Ops[0], Ops[1], DL, I);
+ }
+ }
return ConstantFoldBinaryOpOperands(Opcode, Ops[0], Ops[1], DL);
+ }
if (Instruction::isCast(Opcode))
return ConstantFoldCastOperand(Opcode, Ops[0], DestTy, DL);
@@ -1027,13 +1061,21 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode,
GEP->getInRangeIndex());
}
- if (auto *CE = dyn_cast<ConstantExpr>(InstOrCE))
+ if (auto *CE = dyn_cast<ConstantExpr>(InstOrCE)) {
+ if (CE->isCompare())
+ return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1],
+ DL, TLI);
return CE->getWithOperands(Ops);
+ }
switch (Opcode) {
default: return nullptr;
case Instruction::ICmp:
- case Instruction::FCmp: llvm_unreachable("Invalid for compares");
+ case Instruction::FCmp: {
+ auto *C = cast<CmpInst>(InstOrCE);
+ return ConstantFoldCompareInstOperands(C->getPredicate(), Ops[0], Ops[1],
+ DL, TLI, C);
+ }
case Instruction::Freeze:
return isGuaranteedNotToBeUndefOrPoison(Ops[0]) ? Ops[0] : nullptr;
case Instruction::Call:
@@ -1048,13 +1090,22 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode,
case Instruction::ExtractElement:
return ConstantExpr::getExtractElement(Ops[0], Ops[1]);
case Instruction::ExtractValue:
- return ConstantExpr::getExtractValue(
+ return ConstantFoldExtractValueInstruction(
Ops[0], cast<ExtractValueInst>(InstOrCE)->getIndices());
case Instruction::InsertElement:
return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]);
+ case Instruction::InsertValue:
+ return ConstantFoldInsertValueInstruction(
+ Ops[0], Ops[1], cast<InsertValueInst>(InstOrCE)->getIndices());
case Instruction::ShuffleVector:
return ConstantExpr::getShuffleVector(
Ops[0], Ops[1], cast<ShuffleVectorInst>(InstOrCE)->getShuffleMask());
+ case Instruction::Load: {
+ const auto *LI = dyn_cast<LoadInst>(InstOrCE);
+ if (LI->isVolatile())
+ return nullptr;
+ return ConstantFoldLoadFromConstPtr(Ops[0], LI->getType(), DL);
+ }
}
}
@@ -1091,13 +1142,8 @@ ConstantFoldConstantImpl(const Constant *C, const DataLayout &DL,
Ops.push_back(NewC);
}
- if (auto *CE = dyn_cast<ConstantExpr>(C)) {
- if (CE->isCompare())
- return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1],
- DL, TLI);
-
+ if (auto *CE = dyn_cast<ConstantExpr>(C))
return ConstantFoldInstOperandsImpl(CE, CE->getOpcode(), Ops, DL, TLI);
- }
assert(isa<ConstantVector>(C));
return ConstantVector::get(Ops);
@@ -1150,22 +1196,6 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const DataLayout &DL,
Ops.push_back(Op);
}
- if (const auto *CI = dyn_cast<CmpInst>(I))
- return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1],
- DL, TLI);
-
- if (const auto *LI = dyn_cast<LoadInst>(I)) {
- if (LI->isVolatile())
- return nullptr;
- return ConstantFoldLoadFromConstPtr(Ops[0], LI->getType(), DL);
- }
-
- if (auto *IVI = dyn_cast<InsertValueInst>(I))
- return ConstantExpr::getInsertValue(Ops[0], Ops[1], IVI->getIndices());
-
- if (auto *EVI = dyn_cast<ExtractValueInst>(I))
- return ConstantExpr::getExtractValue(Ops[0], EVI->getIndices());
-
return ConstantFoldInstOperands(I, Ops, DL, TLI);
}
@@ -1182,10 +1212,9 @@ Constant *llvm::ConstantFoldInstOperands(Instruction *I,
return ConstantFoldInstOperandsImpl(I, I->getOpcode(), Ops, DL, TLI);
}
-Constant *llvm::ConstantFoldCompareInstOperands(unsigned IntPredicate,
- Constant *Ops0, Constant *Ops1,
- const DataLayout &DL,
- const TargetLibraryInfo *TLI) {
+Constant *llvm::ConstantFoldCompareInstOperands(
+ unsigned IntPredicate, Constant *Ops0, Constant *Ops1, const DataLayout &DL,
+ const TargetLibraryInfo *TLI, const Instruction *I) {
CmpInst::Predicate Predicate = (CmpInst::Predicate)IntPredicate;
// fold: icmp (inttoptr x), null -> icmp x, 0
// fold: icmp null, (inttoptr x) -> icmp 0, x
@@ -1287,6 +1316,11 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned IntPredicate,
return ConstantFoldCompareInstOperands(Predicate, Ops1, Ops0, DL, TLI);
}
+ // Flush any denormal constant float input according to denormal handling
+ // mode.
+ Ops0 = FlushFPConstant(Ops0, I, /* IsOutput */ false);
+ Ops1 = FlushFPConstant(Ops1, I, /* IsOutput */ false);
+
return ConstantExpr::getCompare(Predicate, Ops0, Ops1);
}
@@ -1308,6 +1342,63 @@ Constant *llvm::ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS,
return ConstantExpr::get(Opcode, LHS, RHS);
}
+Constant *llvm::FlushFPConstant(Constant *Operand, const Instruction *I,
+ bool IsOutput) {
+ if (!I || !I->getParent() || !I->getFunction())
+ return Operand;
+
+ ConstantFP *CFP = dyn_cast<ConstantFP>(Operand);
+ if (!CFP)
+ return Operand;
+
+ const APFloat &APF = CFP->getValueAPF();
+ Type *Ty = CFP->getType();
+ DenormalMode DenormMode =
+ I->getFunction()->getDenormalMode(Ty->getFltSemantics());
+ DenormalMode::DenormalModeKind Mode =
+ IsOutput ? DenormMode.Output : DenormMode.Input;
+ switch (Mode) {
+ default:
+ llvm_unreachable("unknown denormal mode");
+ return Operand;
+ case DenormalMode::IEEE:
+ return Operand;
+ case DenormalMode::PreserveSign:
+ if (APF.isDenormal()) {
+ return ConstantFP::get(
+ Ty->getContext(),
+ APFloat::getZero(Ty->getFltSemantics(), APF.isNegative()));
+ }
+ return Operand;
+ case DenormalMode::PositiveZero:
+ if (APF.isDenormal()) {
+ return ConstantFP::get(Ty->getContext(),
+ APFloat::getZero(Ty->getFltSemantics(), false));
+ }
+ return Operand;
+ }
+ return Operand;
+}
+
+Constant *llvm::ConstantFoldFPInstOperands(unsigned Opcode, Constant *LHS,
+ Constant *RHS, const DataLayout &DL,
+ const Instruction *I) {
+ if (Instruction::isBinaryOp(Opcode)) {
+ // Flush denormal inputs if needed.
+ Constant *Op0 = FlushFPConstant(LHS, I, /* IsOutput */ false);
+ Constant *Op1 = FlushFPConstant(RHS, I, /* IsOutput */ false);
+
+ // Calculate constant result.
+ Constant *C = ConstantFoldBinaryOpOperands(Opcode, Op0, Op1, DL);
+
+ // Flush denormal output if needed.
+ return FlushFPConstant(C, I, /* IsOutput */ true);
+ }
+ // If instruction lacks a parent/function and the denormal mode cannot be
+ // determined, use the default (IEEE).
+ return ConstantFoldBinaryOpOperands(Opcode, LHS, RHS, DL);
+}
+
Constant *llvm::ConstantFoldCastOperand(unsigned Opcode, Constant *C,
Type *DestTy, const DataLayout &DL) {
assert(Instruction::isCast(Opcode));
@@ -1334,6 +1425,19 @@ Constant *llvm::ConstantFoldCastOperand(unsigned Opcode, Constant *C,
DL, BaseOffset, /*AllowNonInbounds=*/true));
if (Base->isNullValue()) {
FoldedValue = ConstantInt::get(CE->getContext(), BaseOffset);
+ } else {
+ // ptrtoint (gep i8, Ptr, (sub 0, V)) -> sub (ptrtoint Ptr), V
+ if (GEP->getNumIndices() == 1 &&
+ GEP->getSourceElementType()->isIntegerTy(8)) {
+ auto *Ptr = cast<Constant>(GEP->getPointerOperand());
+ auto *Sub = dyn_cast<ConstantExpr>(GEP->getOperand(1));
+ Type *IntIdxTy = DL.getIndexType(Ptr->getType());
+ if (Sub && Sub->getType() == IntIdxTy &&
+ Sub->getOpcode() == Instruction::Sub &&
+ Sub->getOperand(0)->isNullValue())
+ FoldedValue = ConstantExpr::getSub(
+ ConstantExpr::getPtrToInt(Ptr, IntIdxTy), Sub->getOperand(1));
+ }
}
}
if (FoldedValue) {
@@ -1386,6 +1490,8 @@ Constant *llvm::ConstantFoldCastOperand(unsigned Opcode, Constant *C,
bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
if (Call->isNoBuiltin())
return false;
+ if (Call->getFunctionType() != F->getFunctionType())
+ return false;
switch (F->getIntrinsicID()) {
// Operations that do not operate floating-point numbers and do not depend on
// FP environment can be folded even in strictfp functions.
@@ -1527,6 +1633,8 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::experimental_constrained_trunc:
case Intrinsic::experimental_constrained_nearbyint:
case Intrinsic::experimental_constrained_rint:
+ case Intrinsic::experimental_constrained_fcmp:
+ case Intrinsic::experimental_constrained_fcmps:
return true;
default:
return false;
@@ -1798,12 +1906,12 @@ static bool mayFoldConstrained(ConstrainedFPIntrinsic *CI,
// If evaluation raised FP exception, the result can depend on rounding
// mode. If the latter is unknown, folding is not possible.
- if (!ORM || *ORM == RoundingMode::Dynamic)
+ if (ORM && *ORM == RoundingMode::Dynamic)
return false;
// If FP exceptions are ignored, fold the call, even if such exception is
// raised.
- if (!EB || *EB != fp::ExceptionBehavior::ebStrict)
+ if (EB && *EB != fp::ExceptionBehavior::ebStrict)
return true;
// Leave the calculation for runtime so that exception flags be correctly set
@@ -1979,7 +2087,7 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
case Intrinsic::experimental_constrained_rint: {
auto CI = cast<ConstrainedFPIntrinsic>(Call);
RM = CI->getRoundingMode();
- if (!RM || RM.getValue() == RoundingMode::Dynamic)
+ if (!RM || *RM == RoundingMode::Dynamic)
return nullptr;
break;
}
@@ -2301,6 +2409,24 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
return nullptr;
}
+static Constant *evaluateCompare(const APFloat &Op1, const APFloat &Op2,
+ const ConstrainedFPIntrinsic *Call) {
+ APFloat::opStatus St = APFloat::opOK;
+ auto *FCmp = cast<ConstrainedFPCmpIntrinsic>(Call);
+ FCmpInst::Predicate Cond = FCmp->getPredicate();
+ if (FCmp->isSignaling()) {
+ if (Op1.isNaN() || Op2.isNaN())
+ St = APFloat::opInvalidOp;
+ } else {
+ if (Op1.isSignaling() || Op2.isSignaling())
+ St = APFloat::opInvalidOp;
+ }
+ bool Result = FCmpInst::compare(Op1, Op2, Cond);
+ if (mayFoldConstrained(const_cast<ConstrainedFPCmpIntrinsic *>(FCmp), St))
+ return ConstantInt::get(Call->getType()->getScalarType(), Result);
+ return nullptr;
+}
+
static Constant *ConstantFoldScalarCall2(StringRef Name,
Intrinsic::ID IntrinsicID,
Type *Ty,
@@ -2329,8 +2455,6 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
}
if (const auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
- if (!Ty->isFloatingPointTy())
- return nullptr;
const APFloat &Op1V = Op1->getValueAPF();
if (const auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
@@ -2360,6 +2484,9 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
case Intrinsic::experimental_constrained_frem:
St = Res.mod(Op2V);
break;
+ case Intrinsic::experimental_constrained_fcmp:
+ case Intrinsic::experimental_constrained_fcmps:
+ return evaluateCompare(Op1V, Op2V, ConstrIntr);
}
if (mayFoldConstrained(const_cast<ConstrainedFPIntrinsic *>(ConstrIntr),
St))
@@ -2484,6 +2611,11 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
case Intrinsic::smin:
case Intrinsic::umax:
case Intrinsic::umin:
+ // This is the same as for binary ops - poison propagates.
+ // TODO: Poison handling should be consolidated.
+ if (isa<PoisonValue>(Operands[0]) || isa<PoisonValue>(Operands[1]))
+ return PoisonValue::get(Ty);
+
if (!C0 && !C1)
return UndefValue::get(Ty);
if (!C0 || !C1)
@@ -2550,6 +2682,11 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
}
case Intrinsic::uadd_sat:
case Intrinsic::sadd_sat:
+ // This is the same as for binary ops - poison propagates.
+ // TODO: Poison handling should be consolidated.
+ if (isa<PoisonValue>(Operands[0]) || isa<PoisonValue>(Operands[1]))
+ return PoisonValue::get(Ty);
+
if (!C0 && !C1)
return UndefValue::get(Ty);
if (!C0 || !C1)
@@ -2560,6 +2697,11 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
return ConstantInt::get(Ty, C0->sadd_sat(*C1));
case Intrinsic::usub_sat:
case Intrinsic::ssub_sat:
+ // This is the same as for binary ops - poison propagates.
+ // TODO: Poison handling should be consolidated.
+ if (isa<PoisonValue>(Operands[0]) || isa<PoisonValue>(Operands[1]))
+ return PoisonValue::get(Ty);
+
if (!C0 && !C1)
return UndefValue::get(Ty);
if (!C0 || !C1)
@@ -2840,11 +2982,11 @@ static Constant *ConstantFoldScalarCall3(StringRef Name,
unsigned Width = C0->getBitWidth();
assert(Scale < Width && "Illegal scale.");
unsigned ExtendedWidth = Width * 2;
- APInt Product = (C0->sextOrSelf(ExtendedWidth) *
- C1->sextOrSelf(ExtendedWidth)).ashr(Scale);
+ APInt Product =
+ (C0->sext(ExtendedWidth) * C1->sext(ExtendedWidth)).ashr(Scale);
if (IntrinsicID == Intrinsic::smul_fix_sat) {
- APInt Max = APInt::getSignedMaxValue(Width).sextOrSelf(ExtendedWidth);
- APInt Min = APInt::getSignedMinValue(Width).sextOrSelf(ExtendedWidth);
+ APInt Max = APInt::getSignedMaxValue(Width).sext(ExtendedWidth);
+ APInt Min = APInt::getSignedMinValue(Width).sext(ExtendedWidth);
Product = APIntOps::smin(Product, Max);
Product = APIntOps::smax(Product, Min);
}
@@ -2998,7 +3140,7 @@ static Constant *ConstantFoldFixedVectorCall(
// Gather a column of constants.
for (unsigned J = 0, JE = Operands.size(); J != JE; ++J) {
// Some intrinsics use a scalar type for certain arguments.
- if (hasVectorInstrinsicScalarOpd(IntrinsicID, J)) {
+ if (isVectorIntrinsicWithScalarOpAtArg(IntrinsicID, J)) {
Lane[J] = Operands[J];
continue;
}
diff --git a/llvm/lib/Analysis/ConstraintSystem.cpp b/llvm/lib/Analysis/ConstraintSystem.cpp
index 773f71ada0ee..dc774728ab3d 100644
--- a/llvm/lib/Analysis/ConstraintSystem.cpp
+++ b/llvm/lib/Analysis/ConstraintSystem.cpp
@@ -12,7 +12,6 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/Debug.h"
-#include <algorithm>
#include <string>
using namespace llvm;
diff --git a/llvm/lib/Analysis/CostModel.cpp b/llvm/lib/Analysis/CostModel.cpp
index 326bacad01fe..52e424ae324b 100644
--- a/llvm/lib/Analysis/CostModel.cpp
+++ b/llvm/lib/Analysis/CostModel.cpp
@@ -17,7 +17,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/CostModel.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Function.h"
@@ -25,7 +24,6 @@
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -119,7 +117,7 @@ void CostModelAnalysis::print(raw_ostream &OS, const Module*) const {
PreservedAnalyses CostModelPrinterPass::run(Function &F,
FunctionAnalysisManager &AM) {
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
- OS << "Cost Model for function '" << F.getName() << "'\n";
+ OS << "Printing analysis 'Cost Model Analysis' for function '" << F.getName() << "':\n";
for (BasicBlock &B : F) {
for (Instruction &Inst : B) {
// TODO: Use a pass parameter instead of cl::opt CostKind to determine
diff --git a/llvm/lib/Analysis/CycleAnalysis.cpp b/llvm/lib/Analysis/CycleAnalysis.cpp
index 09c7ee67e05c..17998123fce7 100644
--- a/llvm/lib/Analysis/CycleAnalysis.cpp
+++ b/llvm/lib/Analysis/CycleAnalysis.cpp
@@ -8,11 +8,15 @@
#include "llvm/Analysis/CycleAnalysis.h"
#include "llvm/ADT/GenericCycleImpl.h"
-#include "llvm/IR/CFG.h"
+#include "llvm/IR/CFG.h" // for successors found by ADL in GenericCycleImpl.h
#include "llvm/InitializePasses.h"
using namespace llvm;
+namespace llvm {
+class Module;
+}
+
template class llvm::GenericCycleInfo<SSAContext>;
template class llvm::GenericCycle<SSAContext>;
diff --git a/llvm/lib/Analysis/DDG.cpp b/llvm/lib/Analysis/DDG.cpp
index 7e1357959a3f..998c888dd2d9 100644
--- a/llvm/lib/Analysis/DDG.cpp
+++ b/llvm/lib/Analysis/DDG.cpp
@@ -17,13 +17,12 @@
using namespace llvm;
static cl::opt<bool> SimplifyDDG(
- "ddg-simplify", cl::init(true), cl::Hidden, cl::ZeroOrMore,
+ "ddg-simplify", cl::init(true), cl::Hidden,
cl::desc(
"Simplify DDG by merging nodes that have less interesting edges."));
-static cl::opt<bool>
- CreatePiBlocks("ddg-pi-blocks", cl::init(true), cl::Hidden, cl::ZeroOrMore,
- cl::desc("Create pi-block nodes."));
+static cl::opt<bool> CreatePiBlocks("ddg-pi-blocks", cl::init(true), cl::Hidden,
+ cl::desc("Create pi-block nodes."));
#define DEBUG_TYPE "ddg"
@@ -34,7 +33,7 @@ template class llvm::DirectedGraph<DDGNode, DDGEdge>;
//===--------------------------------------------------------------------===//
// DDGNode implementation
//===--------------------------------------------------------------------===//
-DDGNode::~DDGNode() {}
+DDGNode::~DDGNode() = default;
bool DDGNode::collectInstructions(
llvm::function_ref<bool(Instruction *)> const &Pred,
diff --git a/llvm/lib/Analysis/DDGPrinter.cpp b/llvm/lib/Analysis/DDGPrinter.cpp
index 0d5a936723ce..6b5acd204ec7 100644
--- a/llvm/lib/Analysis/DDGPrinter.cpp
+++ b/llvm/lib/Analysis/DDGPrinter.cpp
@@ -18,8 +18,8 @@
using namespace llvm;
-static cl::opt<bool> DotOnly("dot-ddg-only", cl::init(false), cl::Hidden,
- cl::ZeroOrMore, cl::desc("simple ddg dot graph"));
+static cl::opt<bool> DotOnly("dot-ddg-only", cl::Hidden,
+ cl::desc("simple ddg dot graph"));
static cl::opt<std::string> DDGDotFilenamePrefix(
"dot-ddg-filename-prefix", cl::init("ddg"), cl::Hidden,
cl::desc("The prefix used for the DDG dot file names."));
diff --git a/llvm/lib/Analysis/Delinearization.cpp b/llvm/lib/Analysis/Delinearization.cpp
index 670532c6d9a8..c36e1d922915 100644
--- a/llvm/lib/Analysis/Delinearization.cpp
+++ b/llvm/lib/Analysis/Delinearization.cpp
@@ -24,9 +24,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/IR/Type.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
@@ -523,6 +521,44 @@ bool llvm::getIndexExpressionsFromGEP(ScalarEvolution &SE,
return !Subscripts.empty();
}
+bool llvm::tryDelinearizeFixedSizeImpl(
+ ScalarEvolution *SE, Instruction *Inst, const SCEV *AccessFn,
+ SmallVectorImpl<const SCEV *> &Subscripts, SmallVectorImpl<int> &Sizes) {
+ Value *SrcPtr = getLoadStorePointerOperand(Inst);
+
+ // Check the simple case where the array dimensions are fixed size.
+ auto *SrcGEP = dyn_cast<GetElementPtrInst>(SrcPtr);
+ if (!SrcGEP)
+ return false;
+
+ getIndexExpressionsFromGEP(*SE, SrcGEP, Subscripts, Sizes);
+
+ // Check that the two size arrays are non-empty and equal in length and
+ // value.
+ // TODO: it would be better to let the caller to clear Subscripts, similar
+ // to how we handle Sizes.
+ if (Sizes.empty() || Subscripts.size() <= 1) {
+ Subscripts.clear();
+ return false;
+ }
+
+ // Check that for identical base pointers we do not miss index offsets
+ // that have been added before this GEP is applied.
+ Value *SrcBasePtr = SrcGEP->getOperand(0)->stripPointerCasts();
+ const SCEVUnknown *SrcBase =
+ dyn_cast<SCEVUnknown>(SE->getPointerBase(AccessFn));
+ if (!SrcBase || SrcBasePtr != SrcBase->getValue()) {
+ Subscripts.clear();
+ return false;
+ }
+
+ assert(Subscripts.size() == Sizes.size() + 1 &&
+ "Expected equal number of entries in the list of size and "
+ "subscript.");
+
+ return true;
+}
+
namespace {
class Delinearization : public FunctionPass {
diff --git a/llvm/lib/Analysis/DemandedBits.cpp b/llvm/lib/Analysis/DemandedBits.cpp
index 117b12fc0701..e01ed48be376 100644
--- a/llvm/lib/Analysis/DemandedBits.cpp
+++ b/llvm/lib/Analysis/DemandedBits.cpp
@@ -21,19 +21,13 @@
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PassManager.h"
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp
index f827f74d5367..3d2d84ecadb4 100644
--- a/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -50,7 +50,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/DependenceAnalysis.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/Delinearization.h"
@@ -58,10 +57,8 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Config/llvm-config.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/Operator.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -109,11 +106,10 @@ STATISTIC(BanerjeeIndependence, "Banerjee independence");
STATISTIC(BanerjeeSuccesses, "Banerjee successes");
static cl::opt<bool>
- Delinearize("da-delinearize", cl::init(true), cl::Hidden, cl::ZeroOrMore,
+ Delinearize("da-delinearize", cl::init(true), cl::Hidden,
cl::desc("Try to delinearize array references."));
static cl::opt<bool> DisableDelinearizationChecks(
- "da-disable-delinearization-checks", cl::init(false), cl::Hidden,
- cl::ZeroOrMore,
+ "da-disable-delinearization-checks", cl::Hidden,
cl::desc(
"Disable checks that try to statically verify validity of "
"delinearized subscripts. Enabling this option may result in incorrect "
@@ -121,7 +117,7 @@ static cl::opt<bool> DisableDelinearizationChecks(
"dimension to underflow or overflow into another dimension."));
static cl::opt<unsigned> MIVMaxLevelThreshold(
- "da-miv-max-level-threshold", cl::init(7), cl::Hidden, cl::ZeroOrMore,
+ "da-miv-max-level-threshold", cl::init(7), cl::Hidden,
cl::desc("Maximum depth allowed for the recursive algorithm used to "
"explore MIV direction vectors."));
@@ -787,6 +783,8 @@ unsigned DependenceInfo::mapSrcLoop(const Loop *SrcLoop) const {
unsigned DependenceInfo::mapDstLoop(const Loop *DstLoop) const {
unsigned D = DstLoop->getLoopDepth();
if (D > CommonLevels)
+ // This tries to make sure that we assign unique numbers to src and dst when
+ // the memory accesses reside in different loops that have the same depth.
return D - CommonLevels + SrcLevels;
else
return D;
@@ -796,10 +794,16 @@ unsigned DependenceInfo::mapDstLoop(const Loop *DstLoop) const {
// Returns true if Expression is loop invariant in LoopNest.
bool DependenceInfo::isLoopInvariant(const SCEV *Expression,
const Loop *LoopNest) const {
+ // Unlike ScalarEvolution::isLoopInvariant() we consider an access outside of
+ // any loop as invariant, because we only consier expression evaluation at a
+ // specific position (where the array access takes place), and not across the
+ // entire function.
if (!LoopNest)
return true;
- return SE->isLoopInvariant(Expression, LoopNest) &&
- isLoopInvariant(Expression, LoopNest->getParentLoop());
+
+ // If the expression is invariant in the outermost loop of the loop nest, it
+ // is invariant anywhere in the loop nest.
+ return SE->isLoopInvariant(Expression, LoopNest->getOutermostLoop());
}
@@ -890,13 +894,25 @@ void DependenceInfo::removeMatchingExtensions(Subscript *Pair) {
}
}
-// Examine the scev and return true iff it's linear.
+// Examine the scev and return true iff it's affine.
// Collect any loops mentioned in the set of "Loops".
bool DependenceInfo::checkSubscript(const SCEV *Expr, const Loop *LoopNest,
SmallBitVector &Loops, bool IsSrc) {
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
if (!AddRec)
return isLoopInvariant(Expr, LoopNest);
+
+ // The AddRec must depend on one of the containing loops. Otherwise,
+ // mapSrcLoop and mapDstLoop return indices outside the intended range. This
+ // can happen when a subscript in one loop references an IV from a sibling
+ // loop that could not be replaced with a concrete exit value by
+ // getSCEVAtScope.
+ const Loop *L = LoopNest;
+ while (L && AddRec->getLoop() != L)
+ L = L->getParentLoop();
+ if (!L)
+ return false;
+
const SCEV *Start = AddRec->getStart();
const SCEV *Step = AddRec->getStepRecurrence(*SE);
const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop());
@@ -3318,59 +3334,45 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
return true;
}
+/// Try to delinearize \p SrcAccessFn and \p DstAccessFn if the underlying
+/// arrays accessed are fixed-size arrays. Return true if delinearization was
+/// successful.
bool DependenceInfo::tryDelinearizeFixedSize(
Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn,
const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,
SmallVectorImpl<const SCEV *> &DstSubscripts) {
-
- Value *SrcPtr = getLoadStorePointerOperand(Src);
- Value *DstPtr = getLoadStorePointerOperand(Dst);
- const SCEVUnknown *SrcBase =
- dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
- const SCEVUnknown *DstBase =
- dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
- assert(SrcBase && DstBase && SrcBase == DstBase &&
- "expected src and dst scev unknowns to be equal");
-
- // Check the simple case where the array dimensions are fixed size.
- auto *SrcGEP = dyn_cast<GetElementPtrInst>(SrcPtr);
- auto *DstGEP = dyn_cast<GetElementPtrInst>(DstPtr);
- if (!SrcGEP || !DstGEP)
+ LLVM_DEBUG({
+ const SCEVUnknown *SrcBase =
+ dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
+ const SCEVUnknown *DstBase =
+ dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
+ assert(SrcBase && DstBase && SrcBase == DstBase &&
+ "expected src and dst scev unknowns to be equal");
+ });
+
+ SmallVector<int, 4> SrcSizes;
+ SmallVector<int, 4> DstSizes;
+ if (!tryDelinearizeFixedSizeImpl(SE, Src, SrcAccessFn, SrcSubscripts,
+ SrcSizes) ||
+ !tryDelinearizeFixedSizeImpl(SE, Dst, DstAccessFn, DstSubscripts,
+ DstSizes))
return false;
- SmallVector<int, 4> SrcSizes, DstSizes;
- getIndexExpressionsFromGEP(*SE, SrcGEP, SrcSubscripts, SrcSizes);
- getIndexExpressionsFromGEP(*SE, DstGEP, DstSubscripts, DstSizes);
-
// Check that the two size arrays are non-empty and equal in length and
// value.
- if (SrcSizes.empty() || SrcSubscripts.size() <= 1 ||
- SrcSizes.size() != DstSizes.size() ||
+ if (SrcSizes.size() != DstSizes.size() ||
!std::equal(SrcSizes.begin(), SrcSizes.end(), DstSizes.begin())) {
SrcSubscripts.clear();
DstSubscripts.clear();
return false;
}
- Value *SrcBasePtr = SrcGEP->getOperand(0);
- Value *DstBasePtr = DstGEP->getOperand(0);
- while (auto *PCast = dyn_cast<BitCastInst>(SrcBasePtr))
- SrcBasePtr = PCast->getOperand(0);
- while (auto *PCast = dyn_cast<BitCastInst>(DstBasePtr))
- DstBasePtr = PCast->getOperand(0);
-
- // Check that for identical base pointers we do not miss index offsets
- // that have been added before this GEP is applied.
- if (SrcBasePtr != SrcBase->getValue() || DstBasePtr != DstBase->getValue()) {
- SrcSubscripts.clear();
- DstSubscripts.clear();
- return false;
- }
-
assert(SrcSubscripts.size() == DstSubscripts.size() &&
- SrcSubscripts.size() == SrcSizes.size() + 1 &&
- "Expected equal number of entries in the list of sizes and "
- "subscripts.");
+ "Expected equal number of entries in the list of SrcSubscripts and "
+ "DstSubscripts.");
+
+ Value *SrcPtr = getLoadStorePointerOperand(Src);
+ Value *DstPtr = getLoadStorePointerOperand(Dst);
// In general we cannot safely assume that the subscripts recovered from GEPs
// are in the range of values defined for their corresponding array
@@ -3406,8 +3408,8 @@ bool DependenceInfo::tryDelinearizeFixedSize(
}
LLVM_DEBUG({
dbgs() << "Delinearized subscripts of fixed-size array\n"
- << "SrcGEP:" << *SrcGEP << "\n"
- << "DstGEP:" << *DstGEP << "\n";
+ << "SrcGEP:" << *SrcPtr << "\n"
+ << "DstGEP:" << *DstPtr << "\n";
});
return true;
}
diff --git a/llvm/lib/Analysis/DependenceGraphBuilder.cpp b/llvm/lib/Analysis/DependenceGraphBuilder.cpp
index 6b90db4bafe1..7ee2adf49ebb 100644
--- a/llvm/lib/Analysis/DependenceGraphBuilder.cpp
+++ b/llvm/lib/Analysis/DependenceGraphBuilder.cpp
@@ -12,6 +12,7 @@
#include "llvm/Analysis/DependenceGraphBuilder.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/EnumeratedArray.h"
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/DDG.h"
diff --git a/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp b/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp
index 4a792fce51d1..79ea160afc22 100644
--- a/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp
+++ b/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
#include "llvm/Config/config.h"
-#include "llvm/Support/Casting.h"
#if defined(LLVM_HAVE_TF_API)
#include "llvm/ADT/BitVector.h"
@@ -273,8 +272,8 @@ static const std::vector<TensorSpec> TrainingOnlyFeatures{
static const std::vector<TensorSpec> getInputFeatures() {
std::vector<TensorSpec> InputSpecs;
for (size_t I = 0; I < NumberOfFeatures; ++I)
- InputSpecs.push_back(
- TensorSpec::createSpec<int64_t>(TFFeedPrefix + FeatureNameMap[I], {1}));
+ InputSpecs.push_back(TensorSpec::createSpec<int64_t>(
+ TFFeedPrefix + FeatureMap[I].name(), FeatureMap[I].shape()));
append_range(InputSpecs, TrainingOnlyFeatures);
return InputSpecs;
}
@@ -290,8 +289,7 @@ TrainingLogger::TrainingLogger(StringRef LogFileName,
std::vector<LoggedFeatureSpec> FT;
for (size_t I = 0; I < NumberOfFeatures; ++I)
- FT.push_back(
- {TensorSpec::createSpec<int64_t>(FeatureNameMap.at(I), {1}), None});
+ FT.push_back({FeatureMap.at(I), None});
if (MUTR && MUTR->outputLoggedFeatureSpecs().size() > 1)
append_range(FT, drop_begin(MUTR->outputLoggedFeatureSpecs()));
diff --git a/llvm/lib/Analysis/DivergenceAnalysis.cpp b/llvm/lib/Analysis/DivergenceAnalysis.cpp
index 39e80c2ad51c..1a4b09e0cac2 100644
--- a/llvm/lib/Analysis/DivergenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DivergenceAnalysis.cpp
@@ -73,15 +73,14 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/llvm/lib/Analysis/DomPrinter.cpp b/llvm/lib/Analysis/DomPrinter.cpp
index 6088de53028d..e9f5103e1276 100644
--- a/llvm/lib/Analysis/DomPrinter.cpp
+++ b/llvm/lib/Analysis/DomPrinter.cpp
@@ -24,74 +24,6 @@
using namespace llvm;
-namespace llvm {
-template<>
-struct DOTGraphTraits<DomTreeNode*> : public DefaultDOTGraphTraits {
-
- DOTGraphTraits (bool isSimple=false)
- : DefaultDOTGraphTraits(isSimple) {}
-
- std::string getNodeLabel(DomTreeNode *Node, DomTreeNode *Graph) {
-
- BasicBlock *BB = Node->getBlock();
-
- if (!BB)
- return "Post dominance root node";
-
-
- if (isSimple())
- return DOTGraphTraits<DOTFuncInfo *>
- ::getSimpleNodeLabel(BB, nullptr);
- else
- return DOTGraphTraits<DOTFuncInfo *>
- ::getCompleteNodeLabel(BB, nullptr);
- }
-};
-
-template<>
-struct DOTGraphTraits<DominatorTree*> : public DOTGraphTraits<DomTreeNode*> {
-
- DOTGraphTraits (bool isSimple=false)
- : DOTGraphTraits<DomTreeNode*>(isSimple) {}
-
- static std::string getGraphName(DominatorTree *DT) {
- return "Dominator tree";
- }
-
- std::string getNodeLabel(DomTreeNode *Node, DominatorTree *G) {
- return DOTGraphTraits<DomTreeNode*>::getNodeLabel(Node, G->getRootNode());
- }
-};
-
-template<>
-struct DOTGraphTraits<PostDominatorTree*>
- : public DOTGraphTraits<DomTreeNode*> {
-
- DOTGraphTraits (bool isSimple=false)
- : DOTGraphTraits<DomTreeNode*>(isSimple) {}
-
- static std::string getGraphName(PostDominatorTree *DT) {
- return "Post dominator tree";
- }
-
- std::string getNodeLabel(DomTreeNode *Node, PostDominatorTree *G ) {
- return DOTGraphTraits<DomTreeNode*>::getNodeLabel(Node, G->getRootNode());
- }
-};
-}
-
-PreservedAnalyses DomTreePrinterPass::run(Function &F,
- FunctionAnalysisManager &AM) {
- WriteDOTGraphToFile(F, &AM.getResult<DominatorTreeAnalysis>(F), "dom", false);
- return PreservedAnalyses::all();
-}
-
-PreservedAnalyses DomTreeOnlyPrinterPass::run(Function &F,
- FunctionAnalysisManager &AM) {
- WriteDOTGraphToFile(F, &AM.getResult<DominatorTreeAnalysis>(F), "domonly",
- true);
- return PreservedAnalyses::all();
-}
void DominatorTree::viewGraph(const Twine &Name, const Twine &Title) {
#ifndef NDEBUG
@@ -110,166 +42,167 @@ void DominatorTree::viewGraph() {
}
namespace {
-struct DominatorTreeWrapperPassAnalysisGraphTraits {
+struct LegacyDominatorTreeWrapperPassAnalysisGraphTraits {
static DominatorTree *getGraph(DominatorTreeWrapperPass *DTWP) {
return &DTWP->getDomTree();
}
};
-struct DomViewer : public DOTGraphTraitsViewer<
- DominatorTreeWrapperPass, false, DominatorTree *,
- DominatorTreeWrapperPassAnalysisGraphTraits> {
+struct DomViewerWrapperPass
+ : public DOTGraphTraitsViewerWrapperPass<
+ DominatorTreeWrapperPass, false, DominatorTree *,
+ LegacyDominatorTreeWrapperPassAnalysisGraphTraits> {
static char ID;
- DomViewer()
- : DOTGraphTraitsViewer<DominatorTreeWrapperPass, false, DominatorTree *,
- DominatorTreeWrapperPassAnalysisGraphTraits>(
- "dom", ID) {
- initializeDomViewerPass(*PassRegistry::getPassRegistry());
+ DomViewerWrapperPass()
+ : DOTGraphTraitsViewerWrapperPass<
+ DominatorTreeWrapperPass, false, DominatorTree *,
+ LegacyDominatorTreeWrapperPassAnalysisGraphTraits>("dom", ID) {
+ initializeDomViewerWrapperPassPass(*PassRegistry::getPassRegistry());
}
};
-struct DomOnlyViewer : public DOTGraphTraitsViewer<
- DominatorTreeWrapperPass, true, DominatorTree *,
- DominatorTreeWrapperPassAnalysisGraphTraits> {
+struct DomOnlyViewerWrapperPass
+ : public DOTGraphTraitsViewerWrapperPass<
+ DominatorTreeWrapperPass, true, DominatorTree *,
+ LegacyDominatorTreeWrapperPassAnalysisGraphTraits> {
static char ID;
- DomOnlyViewer()
- : DOTGraphTraitsViewer<DominatorTreeWrapperPass, true, DominatorTree *,
- DominatorTreeWrapperPassAnalysisGraphTraits>(
- "domonly", ID) {
- initializeDomOnlyViewerPass(*PassRegistry::getPassRegistry());
+ DomOnlyViewerWrapperPass()
+ : DOTGraphTraitsViewerWrapperPass<
+ DominatorTreeWrapperPass, true, DominatorTree *,
+ LegacyDominatorTreeWrapperPassAnalysisGraphTraits>("domonly", ID) {
+ initializeDomOnlyViewerWrapperPassPass(*PassRegistry::getPassRegistry());
}
};
-struct PostDominatorTreeWrapperPassAnalysisGraphTraits {
+struct LegacyPostDominatorTreeWrapperPassAnalysisGraphTraits {
static PostDominatorTree *getGraph(PostDominatorTreeWrapperPass *PDTWP) {
return &PDTWP->getPostDomTree();
}
};
-struct PostDomViewer : public DOTGraphTraitsViewer<
- PostDominatorTreeWrapperPass, false,
- PostDominatorTree *,
- PostDominatorTreeWrapperPassAnalysisGraphTraits> {
+struct PostDomViewerWrapperPass
+ : public DOTGraphTraitsViewerWrapperPass<
+ PostDominatorTreeWrapperPass, false, PostDominatorTree *,
+ LegacyPostDominatorTreeWrapperPassAnalysisGraphTraits> {
static char ID;
- PostDomViewer() :
- DOTGraphTraitsViewer<PostDominatorTreeWrapperPass, false,
- PostDominatorTree *,
- PostDominatorTreeWrapperPassAnalysisGraphTraits>(
- "postdom", ID){
- initializePostDomViewerPass(*PassRegistry::getPassRegistry());
- }
+ PostDomViewerWrapperPass()
+ : DOTGraphTraitsViewerWrapperPass<
+ PostDominatorTreeWrapperPass, false, PostDominatorTree *,
+ LegacyPostDominatorTreeWrapperPassAnalysisGraphTraits>("postdom",
+ ID) {
+ initializePostDomViewerWrapperPassPass(*PassRegistry::getPassRegistry());
+ }
};
-struct PostDomOnlyViewer : public DOTGraphTraitsViewer<
- PostDominatorTreeWrapperPass, true,
- PostDominatorTree *,
- PostDominatorTreeWrapperPassAnalysisGraphTraits> {
+struct PostDomOnlyViewerWrapperPass
+ : public DOTGraphTraitsViewerWrapperPass<
+ PostDominatorTreeWrapperPass, true, PostDominatorTree *,
+ LegacyPostDominatorTreeWrapperPassAnalysisGraphTraits> {
static char ID;
- PostDomOnlyViewer() :
- DOTGraphTraitsViewer<PostDominatorTreeWrapperPass, true,
- PostDominatorTree *,
- PostDominatorTreeWrapperPassAnalysisGraphTraits>(
- "postdomonly", ID){
- initializePostDomOnlyViewerPass(*PassRegistry::getPassRegistry());
- }
+ PostDomOnlyViewerWrapperPass()
+ : DOTGraphTraitsViewerWrapperPass<
+ PostDominatorTreeWrapperPass, true, PostDominatorTree *,
+ LegacyPostDominatorTreeWrapperPassAnalysisGraphTraits>(
+ "postdomonly", ID) {
+ initializePostDomOnlyViewerWrapperPassPass(
+ *PassRegistry::getPassRegistry());
+ }
};
} // end anonymous namespace
-char DomViewer::ID = 0;
-INITIALIZE_PASS(DomViewer, "view-dom",
+char DomViewerWrapperPass::ID = 0;
+INITIALIZE_PASS(DomViewerWrapperPass, "view-dom",
"View dominance tree of function", false, false)
-char DomOnlyViewer::ID = 0;
-INITIALIZE_PASS(DomOnlyViewer, "view-dom-only",
+char DomOnlyViewerWrapperPass::ID = 0;
+INITIALIZE_PASS(DomOnlyViewerWrapperPass, "view-dom-only",
"View dominance tree of function (with no function bodies)",
false, false)
-char PostDomViewer::ID = 0;
-INITIALIZE_PASS(PostDomViewer, "view-postdom",
+char PostDomViewerWrapperPass::ID = 0;
+INITIALIZE_PASS(PostDomViewerWrapperPass, "view-postdom",
"View postdominance tree of function", false, false)
-char PostDomOnlyViewer::ID = 0;
-INITIALIZE_PASS(PostDomOnlyViewer, "view-postdom-only",
+char PostDomOnlyViewerWrapperPass::ID = 0;
+INITIALIZE_PASS(PostDomOnlyViewerWrapperPass, "view-postdom-only",
"View postdominance tree of function "
"(with no function bodies)",
false, false)
namespace {
-struct DomPrinter : public DOTGraphTraitsPrinter<
- DominatorTreeWrapperPass, false, DominatorTree *,
- DominatorTreeWrapperPassAnalysisGraphTraits> {
+struct DomPrinterWrapperPass
+ : public DOTGraphTraitsPrinterWrapperPass<
+ DominatorTreeWrapperPass, false, DominatorTree *,
+ LegacyDominatorTreeWrapperPassAnalysisGraphTraits> {
static char ID;
- DomPrinter()
- : DOTGraphTraitsPrinter<DominatorTreeWrapperPass, false, DominatorTree *,
- DominatorTreeWrapperPassAnalysisGraphTraits>(
- "dom", ID) {
- initializeDomPrinterPass(*PassRegistry::getPassRegistry());
+ DomPrinterWrapperPass()
+ : DOTGraphTraitsPrinterWrapperPass<
+ DominatorTreeWrapperPass, false, DominatorTree *,
+ LegacyDominatorTreeWrapperPassAnalysisGraphTraits>("dom", ID) {
+ initializeDomPrinterWrapperPassPass(*PassRegistry::getPassRegistry());
}
};
-struct DomOnlyPrinter : public DOTGraphTraitsPrinter<
- DominatorTreeWrapperPass, true, DominatorTree *,
- DominatorTreeWrapperPassAnalysisGraphTraits> {
+struct DomOnlyPrinterWrapperPass
+ : public DOTGraphTraitsPrinterWrapperPass<
+ DominatorTreeWrapperPass, true, DominatorTree *,
+ LegacyDominatorTreeWrapperPassAnalysisGraphTraits> {
static char ID;
- DomOnlyPrinter()
- : DOTGraphTraitsPrinter<DominatorTreeWrapperPass, true, DominatorTree *,
- DominatorTreeWrapperPassAnalysisGraphTraits>(
- "domonly", ID) {
- initializeDomOnlyPrinterPass(*PassRegistry::getPassRegistry());
+ DomOnlyPrinterWrapperPass()
+ : DOTGraphTraitsPrinterWrapperPass<
+ DominatorTreeWrapperPass, true, DominatorTree *,
+ LegacyDominatorTreeWrapperPassAnalysisGraphTraits>("domonly", ID) {
+ initializeDomOnlyPrinterWrapperPassPass(*PassRegistry::getPassRegistry());
}
};
-struct PostDomPrinter
- : public DOTGraphTraitsPrinter<
- PostDominatorTreeWrapperPass, false,
- PostDominatorTree *,
- PostDominatorTreeWrapperPassAnalysisGraphTraits> {
+struct PostDomPrinterWrapperPass
+ : public DOTGraphTraitsPrinterWrapperPass<
+ PostDominatorTreeWrapperPass, false, PostDominatorTree *,
+ LegacyPostDominatorTreeWrapperPassAnalysisGraphTraits> {
static char ID;
- PostDomPrinter() :
- DOTGraphTraitsPrinter<PostDominatorTreeWrapperPass, false,
- PostDominatorTree *,
- PostDominatorTreeWrapperPassAnalysisGraphTraits>(
- "postdom", ID) {
- initializePostDomPrinterPass(*PassRegistry::getPassRegistry());
- }
+ PostDomPrinterWrapperPass()
+ : DOTGraphTraitsPrinterWrapperPass<
+ PostDominatorTreeWrapperPass, false, PostDominatorTree *,
+ LegacyPostDominatorTreeWrapperPassAnalysisGraphTraits>("postdom",
+ ID) {
+ initializePostDomPrinterWrapperPassPass(*PassRegistry::getPassRegistry());
+ }
};
-struct PostDomOnlyPrinter
- : public DOTGraphTraitsPrinter<
- PostDominatorTreeWrapperPass, true,
- PostDominatorTree *,
- PostDominatorTreeWrapperPassAnalysisGraphTraits> {
+struct PostDomOnlyPrinterWrapperPass
+ : public DOTGraphTraitsPrinterWrapperPass<
+ PostDominatorTreeWrapperPass, true, PostDominatorTree *,
+ LegacyPostDominatorTreeWrapperPassAnalysisGraphTraits> {
static char ID;
- PostDomOnlyPrinter() :
- DOTGraphTraitsPrinter<PostDominatorTreeWrapperPass, true,
- PostDominatorTree *,
- PostDominatorTreeWrapperPassAnalysisGraphTraits>(
- "postdomonly", ID) {
- initializePostDomOnlyPrinterPass(*PassRegistry::getPassRegistry());
- }
+ PostDomOnlyPrinterWrapperPass()
+ : DOTGraphTraitsPrinterWrapperPass<
+ PostDominatorTreeWrapperPass, true, PostDominatorTree *,
+ LegacyPostDominatorTreeWrapperPassAnalysisGraphTraits>(
+ "postdomonly", ID) {
+ initializePostDomOnlyPrinterWrapperPassPass(
+ *PassRegistry::getPassRegistry());
+ }
};
} // end anonymous namespace
+char DomPrinterWrapperPass::ID = 0;
+INITIALIZE_PASS(DomPrinterWrapperPass, "dot-dom",
+ "Print dominance tree of function to 'dot' file", false, false)
-
-char DomPrinter::ID = 0;
-INITIALIZE_PASS(DomPrinter, "dot-dom",
- "Print dominance tree of function to 'dot' file",
- false, false)
-
-char DomOnlyPrinter::ID = 0;
-INITIALIZE_PASS(DomOnlyPrinter, "dot-dom-only",
+char DomOnlyPrinterWrapperPass::ID = 0;
+INITIALIZE_PASS(DomOnlyPrinterWrapperPass, "dot-dom-only",
"Print dominance tree of function to 'dot' file "
"(with no function bodies)",
false, false)
-char PostDomPrinter::ID = 0;
-INITIALIZE_PASS(PostDomPrinter, "dot-postdom",
- "Print postdominance tree of function to 'dot' file",
- false, false)
+char PostDomPrinterWrapperPass::ID = 0;
+INITIALIZE_PASS(PostDomPrinterWrapperPass, "dot-postdom",
+ "Print postdominance tree of function to 'dot' file", false,
+ false)
-char PostDomOnlyPrinter::ID = 0;
-INITIALIZE_PASS(PostDomOnlyPrinter, "dot-postdom-only",
+char PostDomOnlyPrinterWrapperPass::ID = 0;
+INITIALIZE_PASS(PostDomOnlyPrinterWrapperPass, "dot-postdom-only",
"Print postdominance tree of function to 'dot' file "
"(with no function bodies)",
false, false)
@@ -278,34 +211,34 @@ INITIALIZE_PASS(PostDomOnlyPrinter, "dot-postdom-only",
// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by
// the link time optimization.
-FunctionPass *llvm::createDomPrinterPass() {
- return new DomPrinter();
+FunctionPass *llvm::createDomPrinterWrapperPassPass() {
+ return new DomPrinterWrapperPass();
}
-FunctionPass *llvm::createDomOnlyPrinterPass() {
- return new DomOnlyPrinter();
+FunctionPass *llvm::createDomOnlyPrinterWrapperPassPass() {
+ return new DomOnlyPrinterWrapperPass();
}
-FunctionPass *llvm::createDomViewerPass() {
- return new DomViewer();
+FunctionPass *llvm::createDomViewerWrapperPassPass() {
+ return new DomViewerWrapperPass();
}
-FunctionPass *llvm::createDomOnlyViewerPass() {
- return new DomOnlyViewer();
+FunctionPass *llvm::createDomOnlyViewerWrapperPassPass() {
+ return new DomOnlyViewerWrapperPass();
}
-FunctionPass *llvm::createPostDomPrinterPass() {
- return new PostDomPrinter();
+FunctionPass *llvm::createPostDomPrinterWrapperPassPass() {
+ return new PostDomPrinterWrapperPass();
}
-FunctionPass *llvm::createPostDomOnlyPrinterPass() {
- return new PostDomOnlyPrinter();
+FunctionPass *llvm::createPostDomOnlyPrinterWrapperPassPass() {
+ return new PostDomOnlyPrinterWrapperPass();
}
-FunctionPass *llvm::createPostDomViewerPass() {
- return new PostDomViewer();
+FunctionPass *llvm::createPostDomViewerWrapperPassPass() {
+ return new PostDomViewerWrapperPass();
}
-FunctionPass *llvm::createPostDomOnlyViewerPass() {
- return new PostDomOnlyViewer();
+FunctionPass *llvm::createPostDomOnlyViewerWrapperPassPass() {
+ return new PostDomOnlyViewerWrapperPass();
}
diff --git a/llvm/lib/Analysis/DomTreeUpdater.cpp b/llvm/lib/Analysis/DomTreeUpdater.cpp
index 6e299263e66d..888c16723208 100644
--- a/llvm/lib/Analysis/DomTreeUpdater.cpp
+++ b/llvm/lib/Analysis/DomTreeUpdater.cpp
@@ -14,6 +14,7 @@
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/Analysis/PostDominators.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Support/GenericDomTree.h"
#include <algorithm>
@@ -314,98 +315,6 @@ PostDominatorTree &DomTreeUpdater::getPostDomTree() {
return *PDT;
}
-void DomTreeUpdater::insertEdge(BasicBlock *From, BasicBlock *To) {
-
-#ifndef NDEBUG
- assert(isUpdateValid({DominatorTree::Insert, From, To}) &&
- "Inserted edge does not appear in the CFG");
-#endif
-
- if (!DT && !PDT)
- return;
-
- // Won't affect DomTree and PostDomTree; discard update.
- if (From == To)
- return;
-
- if (Strategy == UpdateStrategy::Eager) {
- if (DT)
- DT->insertEdge(From, To);
- if (PDT)
- PDT->insertEdge(From, To);
- return;
- }
-
- PendUpdates.push_back({DominatorTree::Insert, From, To});
-}
-
-void DomTreeUpdater::insertEdgeRelaxed(BasicBlock *From, BasicBlock *To) {
- if (From == To)
- return;
-
- if (!DT && !PDT)
- return;
-
- if (!isUpdateValid({DominatorTree::Insert, From, To}))
- return;
-
- if (Strategy == UpdateStrategy::Eager) {
- if (DT)
- DT->insertEdge(From, To);
- if (PDT)
- PDT->insertEdge(From, To);
- return;
- }
-
- PendUpdates.push_back({DominatorTree::Insert, From, To});
-}
-
-void DomTreeUpdater::deleteEdge(BasicBlock *From, BasicBlock *To) {
-
-#ifndef NDEBUG
- assert(isUpdateValid({DominatorTree::Delete, From, To}) &&
- "Deleted edge still exists in the CFG!");
-#endif
-
- if (!DT && !PDT)
- return;
-
- // Won't affect DomTree and PostDomTree; discard update.
- if (From == To)
- return;
-
- if (Strategy == UpdateStrategy::Eager) {
- if (DT)
- DT->deleteEdge(From, To);
- if (PDT)
- PDT->deleteEdge(From, To);
- return;
- }
-
- PendUpdates.push_back({DominatorTree::Delete, From, To});
-}
-
-void DomTreeUpdater::deleteEdgeRelaxed(BasicBlock *From, BasicBlock *To) {
- if (From == To)
- return;
-
- if (!DT && !PDT)
- return;
-
- if (!isUpdateValid({DominatorTree::Delete, From, To}))
- return;
-
- if (Strategy == UpdateStrategy::Eager) {
- if (DT)
- DT->deleteEdge(From, To);
- if (PDT)
- PDT->deleteEdge(From, To);
- return;
- }
-
- PendUpdates.push_back({DominatorTree::Delete, From, To});
-}
-
void DomTreeUpdater::dropOutOfDateUpdates() {
if (Strategy == DomTreeUpdater::UpdateStrategy::Eager)
return;
diff --git a/llvm/lib/Analysis/DominanceFrontier.cpp b/llvm/lib/Analysis/DominanceFrontier.cpp
index a8806fe5a480..ccba913ccfe5 100644
--- a/llvm/lib/Analysis/DominanceFrontier.cpp
+++ b/llvm/lib/Analysis/DominanceFrontier.cpp
@@ -15,7 +15,6 @@
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/llvm/lib/Analysis/EHPersonalities.cpp b/llvm/lib/Analysis/EHPersonalities.cpp
index df8b7e12e8d7..277ff6ba735f 100644
--- a/llvm/lib/Analysis/EHPersonalities.cpp
+++ b/llvm/lib/Analysis/EHPersonalities.cpp
@@ -8,6 +8,7 @@
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
@@ -67,7 +68,10 @@ StringRef llvm::getEHPersonalityName(EHPersonality Pers) {
}
EHPersonality llvm::getDefaultEHPersonality(const Triple &T) {
- return EHPersonality::GNU_C;
+ if (T.isPS5())
+ return EHPersonality::GNU_CXX;
+ else
+ return EHPersonality::GNU_C;
}
bool llvm::canSimplifyInvokeNoUnwind(const Function *F) {
diff --git a/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp b/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp
index 33519038e225..782c11937507 100644
--- a/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp
+++ b/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp
@@ -12,48 +12,87 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/FunctionPropertiesAnalysis.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
+#include <deque>
using namespace llvm;
-FunctionPropertiesInfo
-FunctionPropertiesInfo::getFunctionPropertiesInfo(const Function &F,
- const LoopInfo &LI) {
-
- FunctionPropertiesInfo FPI;
+namespace {
+int64_t getNrBlocksFromCond(const BasicBlock &BB) {
+ int64_t Ret = 0;
+ if (const auto *BI = dyn_cast<BranchInst>(BB.getTerminator())) {
+ if (BI->isConditional())
+ Ret += BI->getNumSuccessors();
+ } else if (const auto *SI = dyn_cast<SwitchInst>(BB.getTerminator())) {
+ Ret += (SI->getNumCases() + (nullptr != SI->getDefaultDest()));
+ }
+ return Ret;
+}
- FPI.Uses = ((!F.hasLocalLinkage()) ? 1 : 0) + F.getNumUses();
+int64_t getUses(const Function &F) {
+ return ((!F.hasLocalLinkage()) ? 1 : 0) + F.getNumUses();
+}
+} // namespace
- for (const auto &BB : F) {
- ++FPI.BasicBlockCount;
+void FunctionPropertiesInfo::reIncludeBB(const BasicBlock &BB) {
+ updateForBB(BB, +1);
+}
- if (const auto *BI = dyn_cast<BranchInst>(BB.getTerminator())) {
- if (BI->isConditional())
- FPI.BlocksReachedFromConditionalInstruction += BI->getNumSuccessors();
- } else if (const auto *SI = dyn_cast<SwitchInst>(BB.getTerminator())) {
- FPI.BlocksReachedFromConditionalInstruction +=
- (SI->getNumCases() + (nullptr != SI->getDefaultDest()));
+void FunctionPropertiesInfo::updateForBB(const BasicBlock &BB,
+ int64_t Direction) {
+ assert(Direction == 1 || Direction == -1);
+ BasicBlockCount += Direction;
+ BlocksReachedFromConditionalInstruction +=
+ (Direction * getNrBlocksFromCond(BB));
+ for (const auto &I : BB) {
+ if (auto *CS = dyn_cast<CallBase>(&I)) {
+ const auto *Callee = CS->getCalledFunction();
+ if (Callee && !Callee->isIntrinsic() && !Callee->isDeclaration())
+ DirectCallsToDefinedFunctions += Direction;
}
-
- for (const auto &I : BB) {
- if (auto *CS = dyn_cast<CallBase>(&I)) {
- const auto *Callee = CS->getCalledFunction();
- if (Callee && !Callee->isIntrinsic() && !Callee->isDeclaration())
- ++FPI.DirectCallsToDefinedFunctions;
- }
- if (I.getOpcode() == Instruction::Load) {
- ++FPI.LoadInstCount;
- } else if (I.getOpcode() == Instruction::Store) {
- ++FPI.StoreInstCount;
- }
+ if (I.getOpcode() == Instruction::Load) {
+ LoadInstCount += Direction;
+ } else if (I.getOpcode() == Instruction::Store) {
+ StoreInstCount += Direction;
}
- // Loop Depth of the Basic Block
- int64_t LoopDepth;
- LoopDepth = LI.getLoopDepth(&BB);
- if (FPI.MaxLoopDepth < LoopDepth)
- FPI.MaxLoopDepth = LoopDepth;
}
- FPI.TopLevelLoopCount += llvm::size(LI);
+ TotalInstructionCount += Direction * BB.sizeWithoutDebug();
+}
+
+void FunctionPropertiesInfo::updateAggregateStats(const Function &F,
+ const LoopInfo &LI) {
+
+ Uses = getUses(F);
+ TopLevelLoopCount = llvm::size(LI);
+ MaxLoopDepth = 0;
+ std::deque<const Loop *> Worklist;
+ llvm::append_range(Worklist, LI);
+ while (!Worklist.empty()) {
+ const auto *L = Worklist.front();
+ MaxLoopDepth =
+ std::max(MaxLoopDepth, static_cast<int64_t>(L->getLoopDepth()));
+ Worklist.pop_front();
+ llvm::append_range(Worklist, L->getSubLoops());
+ }
+}
+
+FunctionPropertiesInfo FunctionPropertiesInfo::getFunctionPropertiesInfo(
+ const Function &F, FunctionAnalysisManager &FAM) {
+
+ FunctionPropertiesInfo FPI;
+ // The const casts are due to the getResult API - there's no mutation of F.
+ const auto &LI = FAM.getResult<LoopAnalysis>(const_cast<Function &>(F));
+ const auto &DT =
+ FAM.getResult<DominatorTreeAnalysis>(const_cast<Function &>(F));
+ for (const auto &BB : F)
+ if (DT.isReachableFromEntry(&BB))
+ FPI.reIncludeBB(BB);
+ FPI.updateAggregateStats(F, LI);
return FPI;
}
@@ -67,15 +106,15 @@ void FunctionPropertiesInfo::print(raw_ostream &OS) const {
<< "LoadInstCount: " << LoadInstCount << "\n"
<< "StoreInstCount: " << StoreInstCount << "\n"
<< "MaxLoopDepth: " << MaxLoopDepth << "\n"
- << "TopLevelLoopCount: " << TopLevelLoopCount << "\n\n";
+ << "TopLevelLoopCount: " << TopLevelLoopCount << "\n"
+ << "TotalInstructionCount: " << TotalInstructionCount << "\n\n";
}
AnalysisKey FunctionPropertiesAnalysis::Key;
FunctionPropertiesInfo
FunctionPropertiesAnalysis::run(Function &F, FunctionAnalysisManager &FAM) {
- return FunctionPropertiesInfo::getFunctionPropertiesInfo(
- F, FAM.getResult<LoopAnalysis>(F));
+ return FunctionPropertiesInfo::getFunctionPropertiesInfo(F, FAM);
}
PreservedAnalyses
@@ -86,3 +125,127 @@ FunctionPropertiesPrinterPass::run(Function &F, FunctionAnalysisManager &AM) {
AM.getResult<FunctionPropertiesAnalysis>(F).print(OS);
return PreservedAnalyses::all();
}
+
+FunctionPropertiesUpdater::FunctionPropertiesUpdater(
+ FunctionPropertiesInfo &FPI, const CallBase &CB)
+ : FPI(FPI), CallSiteBB(*CB.getParent()), Caller(*CallSiteBB.getParent()) {
+ assert(isa<CallInst>(CB) || isa<InvokeInst>(CB));
+ // For BBs that are likely to change, we subtract from feature totals their
+ // contribution. Some features, like max loop counts or depths, are left
+ // invalid, as they will be updated post-inlining.
+ SmallPtrSet<const BasicBlock *, 4> LikelyToChangeBBs;
+ // The CB BB will change - it'll either be split or the callee's body (single
+ // BB) will be pasted in.
+ LikelyToChangeBBs.insert(&CallSiteBB);
+
+ // The caller's entry BB may change due to new alloca instructions.
+ LikelyToChangeBBs.insert(&*Caller.begin());
+
+ // The successors may become unreachable in the case of `invoke` inlining.
+ // We track successors separately, too, because they form a boundary, together
+ // with the CB BB ('Entry') between which the inlined callee will be pasted.
+ Successors.insert(succ_begin(&CallSiteBB), succ_end(&CallSiteBB));
+
+ // Inlining only handles invoke and calls. If this is an invoke, and inlining
+ // it pulls another invoke, the original landing pad may get split, so as to
+ // share its content with other potential users. So the edge up to which we
+ // need to invalidate and then re-account BB data is the successors of the
+ // current landing pad. We can leave the current lp, too - if it doesn't get
+ // split, then it will be the place traversal stops. Either way, the
+ // discounted BBs will be checked if reachable and re-added.
+ if (const auto *II = dyn_cast<InvokeInst>(&CB)) {
+ const auto *UnwindDest = II->getUnwindDest();
+ Successors.insert(succ_begin(UnwindDest), succ_end(UnwindDest));
+ }
+
+ // Exclude the CallSiteBB, if it happens to be its own successor (1-BB loop).
+ // We are only interested in BBs the graph moves past the callsite BB to
+ // define the frontier past which we don't want to re-process BBs. Including
+ // the callsite BB in this case would prematurely stop the traversal in
+ // finish().
+ Successors.erase(&CallSiteBB);
+
+ for (const auto *BB : Successors)
+ LikelyToChangeBBs.insert(BB);
+
+ // Commit the change. While some of the BBs accounted for above may play dual
+ // role - e.g. caller's entry BB may be the same as the callsite BB - set
+ // insertion semantics make sure we account them once. This needs to be
+ // followed in `finish`, too.
+ for (const auto *BB : LikelyToChangeBBs)
+ FPI.updateForBB(*BB, -1);
+}
+
+void FunctionPropertiesUpdater::finish(FunctionAnalysisManager &FAM) const {
+ // Update feature values from the BBs that were copied from the callee, or
+ // might have been modified because of inlining. The latter have been
+ // subtracted in the FunctionPropertiesUpdater ctor.
+ // There could be successors that were reached before but now are only
+ // reachable from elsewhere in the CFG.
+ // One example is the following diamond CFG (lines are arrows pointing down):
+ // A
+ // / \
+ // B C
+ // | |
+ // | D
+ // | |
+ // | E
+ // \ /
+ // F
+ // There's a call site in C that is inlined. Upon doing that, it turns out
+ // it expands to
+ // call void @llvm.trap()
+ // unreachable
+ // F isn't reachable from C anymore, but we did discount it when we set up
+ // FunctionPropertiesUpdater, so we need to re-include it here.
+ // At the same time, D and E were reachable before, but now are not anymore,
+ // so we need to leave D out (we discounted it at setup), and explicitly
+ // remove E.
+ SetVector<const BasicBlock *> Reinclude;
+ SetVector<const BasicBlock *> Unreachable;
+ const auto &DT =
+ FAM.getResult<DominatorTreeAnalysis>(const_cast<Function &>(Caller));
+
+ if (&CallSiteBB != &*Caller.begin())
+ Reinclude.insert(&*Caller.begin());
+
+ // Distribute the successors to the 2 buckets.
+ for (const auto *Succ : Successors)
+ if (DT.isReachableFromEntry(Succ))
+ Reinclude.insert(Succ);
+ else
+ Unreachable.insert(Succ);
+
+ // For reinclusion, we want to stop at the reachable successors, who are at
+ // the beginning of the worklist; but, starting from the callsite bb and
+ // ending at those successors, we also want to perform a traversal.
+ // IncludeSuccessorsMark is the index after which we include successors.
+ const auto IncludeSuccessorsMark = Reinclude.size();
+ bool CSInsertion = Reinclude.insert(&CallSiteBB);
+ (void)CSInsertion;
+ assert(CSInsertion);
+ for (size_t I = 0; I < Reinclude.size(); ++I) {
+ const auto *BB = Reinclude[I];
+ FPI.reIncludeBB(*BB);
+ if (I >= IncludeSuccessorsMark)
+ Reinclude.insert(succ_begin(BB), succ_end(BB));
+ }
+
+ // For exclusion, we don't need to exclude the set of BBs that were successors
+ // before and are now unreachable, because we already did that at setup. For
+ // the rest, as long as a successor is unreachable, we want to explicitly
+ // exclude it.
+ const auto AlreadyExcludedMark = Unreachable.size();
+ for (size_t I = 0; I < Unreachable.size(); ++I) {
+ const auto *U = Unreachable[I];
+ if (I >= AlreadyExcludedMark)
+ FPI.updateForBB(*U, -1);
+ for (const auto *Succ : successors(U))
+ if (!DT.isReachableFromEntry(Succ))
+ Unreachable.insert(Succ);
+ }
+
+ const auto &LI = FAM.getResult<LoopAnalysis>(const_cast<Function &>(Caller));
+ FPI.updateAggregateStats(Caller, LI);
+ assert(FPI == FunctionPropertiesInfo::getFunctionPropertiesInfo(Caller, FAM));
+}
diff --git a/llvm/lib/Analysis/GlobalsModRef.cpp b/llvm/lib/Analysis/GlobalsModRef.cpp
index 6869530148c5..e82d2fae9356 100644
--- a/llvm/lib/Analysis/GlobalsModRef.cpp
+++ b/llvm/lib/Analysis/GlobalsModRef.cpp
@@ -21,11 +21,11 @@
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
@@ -68,8 +68,8 @@ class GlobalsAAResult::FunctionInfo {
/// should provide this much alignment at least, but this makes it clear we
/// specifically rely on this amount of alignment.
struct alignas(8) AlignedMap {
- AlignedMap() {}
- AlignedMap(const AlignedMap &Arg) : Map(Arg.Map) {}
+ AlignedMap() = default;
+ AlignedMap(const AlignedMap &Arg) = default;
GlobalInfoMapType Map;
};
@@ -102,7 +102,7 @@ class GlobalsAAResult::FunctionInfo {
"Insufficient low bits to store our flag and ModRef info.");
public:
- FunctionInfo() {}
+ FunctionInfo() = default;
~FunctionInfo() {
delete Info.getPointer();
}
@@ -511,6 +511,18 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
Handles.front().I = Handles.begin();
bool KnowNothing = false;
+ // Intrinsics, like any other synchronizing function, can make effects
+ // of other threads visible. Without nosync we know nothing really.
+ // Similarly, if `nocallback` is missing the function, or intrinsic,
+ // can call into the module arbitrarily. If both are set the function
+ // has an effect but will not interact with accesses of internal
+ // globals inside the module. We are conservative here for optnone
+ // functions, might not be necessary.
+ auto MaySyncOrCallIntoModule = [](const Function &F) {
+ return !F.isDeclaration() || !F.hasNoSync() ||
+ !F.hasFnAttribute(Attribute::NoCallback);
+ };
+
// Collect the mod/ref properties due to called functions. We only compute
// one mod-ref set.
for (unsigned i = 0, e = SCC.size(); i != e && !KnowNothing; ++i) {
@@ -525,7 +537,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
// Can't do better than that!
} else if (F->onlyReadsMemory()) {
FI.addModRefInfo(ModRefInfo::Ref);
- if (!F->isIntrinsic() && !F->onlyAccessesArgMemory())
+ if (!F->onlyAccessesArgMemory() && MaySyncOrCallIntoModule(*F))
// This function might call back into the module and read a global -
// consider every global as possibly being read by this function.
FI.setMayReadAnyGlobal();
@@ -533,7 +545,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
FI.addModRefInfo(ModRefInfo::ModRef);
if (!F->onlyAccessesArgMemory())
FI.setMayReadAnyGlobal();
- if (!F->isIntrinsic()) {
+ if (MaySyncOrCallIntoModule(*F)) {
KnowNothing = true;
break;
}
@@ -585,12 +597,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
// We handle calls specially because the graph-relevant aspects are
// handled above.
if (auto *Call = dyn_cast<CallBase>(&I)) {
- auto &TLI = GetTLI(*Node->getFunction());
- if (isAllocationFn(Call, &TLI) || isFreeCall(Call, &TLI)) {
- // FIXME: It is completely unclear why this is necessary and not
- // handled by the above graph code.
- FI.addModRefInfo(ModRefInfo::ModRef);
- } else if (Function *Callee = Call->getCalledFunction()) {
+ if (Function *Callee = Call->getCalledFunction()) {
// The callgraph doesn't include intrinsic calls.
if (Callee->isIntrinsic()) {
if (isa<DbgInfoIntrinsic>(Call))
@@ -979,7 +986,7 @@ GlobalsAAResult::GlobalsAAResult(GlobalsAAResult &&Arg)
}
}
-GlobalsAAResult::~GlobalsAAResult() {}
+GlobalsAAResult::~GlobalsAAResult() = default;
/*static*/ GlobalsAAResult GlobalsAAResult::analyzeModule(
Module &M, std::function<const TargetLibraryInfo &(Function &F)> GetTLI,
@@ -1010,6 +1017,24 @@ GlobalsAAResult GlobalsAA::run(Module &M, ModuleAnalysisManager &AM) {
AM.getResult<CallGraphAnalysis>(M));
}
+PreservedAnalyses RecomputeGlobalsAAPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ if (auto *G = AM.getCachedResult<GlobalsAA>(M)) {
+ auto &CG = AM.getResult<CallGraphAnalysis>(M);
+ G->NonAddressTakenGlobals.clear();
+ G->UnknownFunctionsWithLocalLinkage = false;
+ G->IndirectGlobals.clear();
+ G->AllocsForIndirectGlobals.clear();
+ G->FunctionInfos.clear();
+ G->FunctionToSCCMap.clear();
+ G->Handles.clear();
+ G->CollectSCCMembership(CG);
+ G->AnalyzeGlobals(M);
+ G->AnalyzeCallGraph(CG, M);
+ }
+ return PreservedAnalyses::all();
+}
+
char GlobalsAAWrapperPass::ID = 0;
INITIALIZE_PASS_BEGIN(GlobalsAAWrapperPass, "globals-aa",
"Globals Alias Analysis", false, true)
diff --git a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
index 01681c47418a..3d51042f4da8 100644
--- a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
+++ b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
@@ -64,7 +64,7 @@ void IRInstructionData::initializeInstruction() {
// Here we collect the operands and their types for determining whether
// the structure of the operand use matches between two different candidates.
for (Use &OI : Inst->operands()) {
- if (isa<CmpInst>(Inst) && RevisedPredicate.hasValue()) {
+ if (isa<CmpInst>(Inst) && RevisedPredicate) {
// If we have a CmpInst where the predicate is reversed, it means the
// operands must be reversed as well.
OperVals.insert(OperVals.begin(), OI.get());
@@ -183,7 +183,7 @@ CmpInst::Predicate IRInstructionData::getPredicate() const {
assert(isa<CmpInst>(Inst) &&
"Can only get a predicate from a compare instruction");
- if (RevisedPredicate.hasValue())
+ if (RevisedPredicate)
return RevisedPredicate.getValue();
return cast<CmpInst>(Inst)->getPredicate();
@@ -193,7 +193,7 @@ StringRef IRInstructionData::getCalleeName() const {
assert(isa<CallInst>(Inst) &&
"Can only get a name from a call instruction");
- assert(CalleeName.hasValue() && "CalleeName has not been set");
+ assert(CalleeName && "CalleeName has not been set");
return *CalleeName;
}
@@ -289,14 +289,12 @@ void IRInstructionMapper::convertToUnsignedVec(
}
}
- if (HaveLegalRange) {
- if (AddedIllegalLastTime)
- mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true);
- for (IRInstructionData *ID : InstrListForBB)
- this->IDL->push_back(*ID);
- llvm::append_range(InstrList, InstrListForBB);
- llvm::append_range(IntegerMapping, IntegerMappingForBB);
- }
+ if (AddedIllegalLastTime)
+ mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true);
+ for (IRInstructionData *ID : InstrListForBB)
+ this->IDL->push_back(*ID);
+ llvm::append_range(InstrList, InstrListForBB);
+ llvm::append_range(IntegerMapping, IntegerMappingForBB);
}
// TODO: This is the same as the MachineOutliner, and should be consolidated
@@ -461,6 +459,18 @@ IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len,
// that both of these instructions are not nullptrs.
FirstInst = FirstInstIt;
LastInst = LastInstIt;
+
+ // Add the basic blocks contained in the set into the global value numbering.
+ DenseSet<BasicBlock *> BBSet;
+ getBasicBlocks(BBSet);
+ for (BasicBlock *BB : BBSet) {
+ if (ValueToNumber.find(BB) != ValueToNumber.end())
+ continue;
+
+ ValueToNumber.try_emplace(BB, LocalValNumber);
+ NumberToValue.try_emplace(LocalValNumber, BB);
+ LocalValNumber++;
+ }
}
bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A,
@@ -516,19 +526,13 @@ static bool checkNumberingAndReplaceCommutative(
for (Value *V : SourceOperands) {
ArgVal = SourceValueToNumberMapping.find(V)->second;
+ // Instead of finding a current mapping, we attempt to insert a set.
std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert(
std::make_pair(ArgVal, TargetValueNumbers));
- // Instead of finding a current mapping, we inserted a set. This means a
- // mapping did not exist for the source Instruction operand, it has no
- // current constraints we need to check.
- if (WasInserted)
- continue;
-
- // If a mapping already exists for the source operand to the values in the
- // other IRSimilarityCandidate we need to iterate over the items in other
- // IRSimilarityCandidate's Instruction to determine whether there is a valid
- // mapping of Value to Value.
+ // We need to iterate over the items in other IRSimilarityCandidate's
+ // Instruction to determine whether there is a valid mapping of
+ // Value to Value.
DenseSet<unsigned> NewSet;
for (unsigned &Curr : ValueMappingIt->second)
// If we can find the value in the mapping, we add it to the new set.
@@ -548,7 +552,6 @@ static bool checkNumberingAndReplaceCommutative(
if (ValueMappingIt->second.size() != 1)
continue;
-
unsigned ValToRemove = *ValueMappingIt->second.begin();
// When there is only one item left in the mapping for and operand, remove
// the value from the other operands. If it results in there being no
@@ -791,7 +794,8 @@ bool IRSimilarityCandidate::compareStructure(
// We have different paths for commutative instructions and non-commutative
// instructions since commutative instructions could allow multiple mappings
// to certain values.
- if (IA->isCommutative() && !isa<FPMathOperator>(IA)) {
+ if (IA->isCommutative() && !isa<FPMathOperator>(IA) &&
+ !isa<IntrinsicInst>(IA)) {
if (!compareCommutativeOperandMapping(
{A, OperValsA, ValueNumberMappingA},
{B, OperValsB, ValueNumberMappingB}))
@@ -1008,6 +1012,40 @@ void IRSimilarityCandidate::createCanonicalRelationFrom(
CanonNumToNumber.insert(std::make_pair(CanonNum, SourceGVN));
NumberToCanonNum.insert(std::make_pair(SourceGVN, CanonNum));
}
+
+ DenseSet<BasicBlock *> BBSet;
+ getBasicBlocks(BBSet);
+ // Find canonical numbers for the BasicBlocks in the current candidate.
+ // This is done by finding the corresponding value for the first instruction
+ // in the block in the current candidate, finding the matching value in the
+ // source candidate. Then by finding the parent of this value, use the
+ // canonical number of the block in the source candidate for the canonical
+ // number in the current candidate.
+ for (BasicBlock *BB : BBSet) {
+ unsigned BBGVNForCurrCand = ValueToNumber.find(BB)->second;
+
+ // We can skip the BasicBlock if the canonical numbering has already been
+ // found in a separate instruction.
+ if (NumberToCanonNum.find(BBGVNForCurrCand) != NumberToCanonNum.end())
+ continue;
+
+ // If the basic block is the starting block, then the shared instruction may
+ // not be the first instruction in the block, it will be the first
+ // instruction in the similarity region.
+ Value *FirstOutlineInst = BB == getStartBB()
+ ? frontInstruction()
+ : &*BB->instructionsWithoutDebug().begin();
+
+ unsigned FirstInstGVN = *getGVN(FirstOutlineInst);
+ unsigned FirstInstCanonNum = *getCanonicalNum(FirstInstGVN);
+ unsigned SourceGVN = *SourceCand.fromCanonicalNum(FirstInstCanonNum);
+ Value *SourceV = *SourceCand.fromGVN(SourceGVN);
+ BasicBlock *SourceBB = cast<Instruction>(SourceV)->getParent();
+ unsigned SourceBBGVN = *SourceCand.getGVN(SourceBB);
+ unsigned SourceCanonBBGVN = *SourceCand.getCanonicalNum(SourceBBGVN);
+ CanonNumToNumber.insert(std::make_pair(SourceCanonBBGVN, BBGVNForCurrCand));
+ NumberToCanonNum.insert(std::make_pair(BBGVNForCurrCand, SourceCanonBBGVN));
+ }
}
void IRSimilarityCandidate::createCanonicalMappingFor(
@@ -1162,11 +1200,12 @@ SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(
Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
+ Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls;
populateMapper(Modules, InstrList, IntegerMapping);
findCandidates(InstrList, IntegerMapping);
- return SimilarityCandidates.getValue();
+ return *SimilarityCandidates;
}
SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) {
@@ -1175,6 +1214,7 @@ SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) {
Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
+ Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls;
std::vector<IRInstructionData *> InstrList;
std::vector<unsigned> IntegerMapping;
@@ -1182,7 +1222,7 @@ SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) {
populateMapper(M, InstrList, IntegerMapping);
findCandidates(InstrList, IntegerMapping);
- return SimilarityCandidates.getValue();
+ return *SimilarityCandidates;
}
INITIALIZE_PASS(IRSimilarityIdentifierWrapperPass, "ir-similarity-identifier",
@@ -1196,7 +1236,8 @@ IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass()
bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) {
IRSI.reset(new IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,
- MatchCallsByName, !DisableIntrinsics));
+ MatchCallsByName, !DisableIntrinsics,
+ false));
return false;
}
@@ -1214,7 +1255,8 @@ AnalysisKey IRSimilarityAnalysis::Key;
IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M,
ModuleAnalysisManager &) {
auto IRSI = IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,
- MatchCallsByName, !DisableIntrinsics);
+ MatchCallsByName, !DisableIntrinsics,
+ false);
IRSI.findSimilarity(M);
return IRSI;
}
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index 44b1d94ebdc8..e4d706ab045c 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -11,26 +11,16 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/IVDescriptors.h"
-#include "llvm/ADT/ScopeExit.h"
-#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/DemandedBits.h"
-#include "llvm/Analysis/DomTreeUpdater.h"
-#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ValueHandle.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/KnownBits.h"
@@ -237,12 +227,10 @@ static bool checkOrderedReduction(RecurKind Kind, Instruction *ExactFPMathInst,
return true;
}
-bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
- Loop *TheLoop, FastMathFlags FuncFMF,
- RecurrenceDescriptor &RedDes,
- DemandedBits *DB,
- AssumptionCache *AC,
- DominatorTree *DT) {
+bool RecurrenceDescriptor::AddReductionVar(
+ PHINode *Phi, RecurKind Kind, Loop *TheLoop, FastMathFlags FuncFMF,
+ RecurrenceDescriptor &RedDes, DemandedBits *DB, AssumptionCache *AC,
+ DominatorTree *DT, ScalarEvolution *SE) {
if (Phi->getNumIncomingValues() != 2)
return false;
@@ -259,6 +247,12 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
// This includes users of the reduction, variables (which form a cycle
// which ends in the phi node).
Instruction *ExitInstruction = nullptr;
+
+ // Variable to keep last visited store instruction. By the end of the
+ // algorithm this variable will be either empty or having intermediate
+ // reduction value stored in invariant address.
+ StoreInst *IntermediateStore = nullptr;
+
// Indicates that we found a reduction operation in our scan.
bool FoundReduxOp = false;
@@ -324,6 +318,10 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
// - By instructions outside of the loop (safe).
// * One value may have several outside users, but all outside
// uses must be of the same value.
+ // - By store instructions with a loop invariant address (safe with
+ // the following restrictions):
+ // * If there are several stores, all must have the same address.
+ // * Final value should be stored in that loop invariant address.
// - By an instruction that is not part of the reduction (not safe).
// This is either:
// * An instruction type other than PHI or the reduction operation.
@@ -331,6 +329,43 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
while (!Worklist.empty()) {
Instruction *Cur = Worklist.pop_back_val();
+ // Store instructions are allowed iff it is the store of the reduction
+ // value to the same loop invariant memory location.
+ if (auto *SI = dyn_cast<StoreInst>(Cur)) {
+ if (!SE) {
+ LLVM_DEBUG(dbgs() << "Store instructions are not processed without "
+ << "Scalar Evolution Analysis\n");
+ return false;
+ }
+
+ const SCEV *PtrScev = SE->getSCEV(SI->getPointerOperand());
+ // Check it is the same address as previous stores
+ if (IntermediateStore) {
+ const SCEV *OtherScev =
+ SE->getSCEV(IntermediateStore->getPointerOperand());
+
+ if (OtherScev != PtrScev) {
+ LLVM_DEBUG(dbgs() << "Storing reduction value to different addresses "
+ << "inside the loop: " << *SI->getPointerOperand()
+ << " and "
+ << *IntermediateStore->getPointerOperand() << '\n');
+ return false;
+ }
+ }
+
+ // Check the pointer is loop invariant
+ if (!SE->isLoopInvariant(PtrScev, TheLoop)) {
+ LLVM_DEBUG(dbgs() << "Storing reduction value to non-uniform address "
+ << "inside the loop: " << *SI->getPointerOperand()
+ << '\n');
+ return false;
+ }
+
+ // IntermediateStore is always the last store in the loop.
+ IntermediateStore = SI;
+ continue;
+ }
+
// No Users.
// If the instruction has no users then this is a broken chain and can't be
// a reduction variable.
@@ -453,10 +488,17 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
// reductions which are represented as a cmp followed by a select.
InstDesc IgnoredVal(false, nullptr);
if (VisitedInsts.insert(UI).second) {
- if (isa<PHINode>(UI))
+ if (isa<PHINode>(UI)) {
PHIs.push_back(UI);
- else
+ } else {
+ StoreInst *SI = dyn_cast<StoreInst>(UI);
+ if (SI && SI->getPointerOperand() == Cur) {
+ // Reduction variable chain can only be stored somewhere but it
+ // can't be used as an address.
+ return false;
+ }
NonPHIs.push_back(UI);
+ }
} else if (!isa<PHINode>(UI) &&
((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) &&
!isa<SelectInst>(UI)) ||
@@ -476,7 +518,7 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
// This means we have seen one but not the other instruction of the
// pattern or more than just a select and cmp. Zero implies that we saw a
- // llvm.min/max instrinsic, which is always OK.
+ // llvm.min/max intrinsic, which is always OK.
if (isMinMaxRecurrenceKind(Kind) && NumCmpSelectPatternInst != 2 &&
NumCmpSelectPatternInst != 0)
return false;
@@ -484,6 +526,32 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
if (isSelectCmpRecurrenceKind(Kind) && NumCmpSelectPatternInst != 1)
return false;
+ if (IntermediateStore) {
+ // Check that stored value goes to the phi node again. This way we make sure
+ // that the value stored in IntermediateStore is indeed the final reduction
+ // value.
+ if (!is_contained(Phi->operands(), IntermediateStore->getValueOperand())) {
+ LLVM_DEBUG(dbgs() << "Not a final reduction value stored: "
+ << *IntermediateStore << '\n');
+ return false;
+ }
+
+ // If there is an exit instruction it's value should be stored in
+ // IntermediateStore
+ if (ExitInstruction &&
+ IntermediateStore->getValueOperand() != ExitInstruction) {
+ LLVM_DEBUG(dbgs() << "Last store Instruction of reduction value does not "
+ "store last calculated value of the reduction: "
+ << *IntermediateStore << '\n');
+ return false;
+ }
+
+ // If all uses are inside the loop (intermediate stores), then the
+ // reduction value after the loop will be the one used in the last store.
+ if (!ExitInstruction)
+ ExitInstruction = cast<Instruction>(IntermediateStore->getValueOperand());
+ }
+
if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
return false;
@@ -545,9 +613,9 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
// is saved as part of the RecurrenceDescriptor.
// Save the description of this reduction variable.
- RecurrenceDescriptor RD(RdxStart, ExitInstruction, Kind, FMF, ExactFPMathInst,
- RecurrenceType, IsSigned, IsOrdered, CastInsts,
- MinWidthCastToRecurrenceType);
+ RecurrenceDescriptor RD(RdxStart, ExitInstruction, IntermediateStore, Kind,
+ FMF, ExactFPMathInst, RecurrenceType, IsSigned,
+ IsOrdered, CastInsts, MinWidthCastToRecurrenceType);
RedDes = RD;
return true;
@@ -771,7 +839,8 @@ bool RecurrenceDescriptor::hasMultipleUsesOf(
bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
RecurrenceDescriptor &RedDes,
DemandedBits *DB, AssumptionCache *AC,
- DominatorTree *DT) {
+ DominatorTree *DT,
+ ScalarEvolution *SE) {
BasicBlock *Header = TheLoop->getHeader();
Function &F = *Header->getParent();
FastMathFlags FMF;
@@ -780,72 +849,85 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
FMF.setNoSignedZeros(
F.getFnAttribute("no-signed-zeros-fp-math").getValueAsBool());
- if (AddReductionVar(Phi, RecurKind::Add, TheLoop, FMF, RedDes, DB, AC, DT)) {
+ if (AddReductionVar(Phi, RecurKind::Add, TheLoop, FMF, RedDes, DB, AC, DT,
+ SE)) {
LLVM_DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n");
return true;
}
- if (AddReductionVar(Phi, RecurKind::Mul, TheLoop, FMF, RedDes, DB, AC, DT)) {
+ if (AddReductionVar(Phi, RecurKind::Mul, TheLoop, FMF, RedDes, DB, AC, DT,
+ SE)) {
LLVM_DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n");
return true;
}
- if (AddReductionVar(Phi, RecurKind::Or, TheLoop, FMF, RedDes, DB, AC, DT)) {
+ if (AddReductionVar(Phi, RecurKind::Or, TheLoop, FMF, RedDes, DB, AC, DT,
+ SE)) {
LLVM_DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n");
return true;
}
- if (AddReductionVar(Phi, RecurKind::And, TheLoop, FMF, RedDes, DB, AC, DT)) {
+ if (AddReductionVar(Phi, RecurKind::And, TheLoop, FMF, RedDes, DB, AC, DT,
+ SE)) {
LLVM_DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n");
return true;
}
- if (AddReductionVar(Phi, RecurKind::Xor, TheLoop, FMF, RedDes, DB, AC, DT)) {
+ if (AddReductionVar(Phi, RecurKind::Xor, TheLoop, FMF, RedDes, DB, AC, DT,
+ SE)) {
LLVM_DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n");
return true;
}
- if (AddReductionVar(Phi, RecurKind::SMax, TheLoop, FMF, RedDes, DB, AC, DT)) {
+ if (AddReductionVar(Phi, RecurKind::SMax, TheLoop, FMF, RedDes, DB, AC, DT,
+ SE)) {
LLVM_DEBUG(dbgs() << "Found a SMAX reduction PHI." << *Phi << "\n");
return true;
}
- if (AddReductionVar(Phi, RecurKind::SMin, TheLoop, FMF, RedDes, DB, AC, DT)) {
+ if (AddReductionVar(Phi, RecurKind::SMin, TheLoop, FMF, RedDes, DB, AC, DT,
+ SE)) {
LLVM_DEBUG(dbgs() << "Found a SMIN reduction PHI." << *Phi << "\n");
return true;
}
- if (AddReductionVar(Phi, RecurKind::UMax, TheLoop, FMF, RedDes, DB, AC, DT)) {
+ if (AddReductionVar(Phi, RecurKind::UMax, TheLoop, FMF, RedDes, DB, AC, DT,
+ SE)) {
LLVM_DEBUG(dbgs() << "Found a UMAX reduction PHI." << *Phi << "\n");
return true;
}
- if (AddReductionVar(Phi, RecurKind::UMin, TheLoop, FMF, RedDes, DB, AC, DT)) {
+ if (AddReductionVar(Phi, RecurKind::UMin, TheLoop, FMF, RedDes, DB, AC, DT,
+ SE)) {
LLVM_DEBUG(dbgs() << "Found a UMIN reduction PHI." << *Phi << "\n");
return true;
}
if (AddReductionVar(Phi, RecurKind::SelectICmp, TheLoop, FMF, RedDes, DB, AC,
- DT)) {
+ DT, SE)) {
LLVM_DEBUG(dbgs() << "Found an integer conditional select reduction PHI."
<< *Phi << "\n");
return true;
}
- if (AddReductionVar(Phi, RecurKind::FMul, TheLoop, FMF, RedDes, DB, AC, DT)) {
+ if (AddReductionVar(Phi, RecurKind::FMul, TheLoop, FMF, RedDes, DB, AC, DT,
+ SE)) {
LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");
return true;
}
- if (AddReductionVar(Phi, RecurKind::FAdd, TheLoop, FMF, RedDes, DB, AC, DT)) {
+ if (AddReductionVar(Phi, RecurKind::FAdd, TheLoop, FMF, RedDes, DB, AC, DT,
+ SE)) {
LLVM_DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n");
return true;
}
- if (AddReductionVar(Phi, RecurKind::FMax, TheLoop, FMF, RedDes, DB, AC, DT)) {
+ if (AddReductionVar(Phi, RecurKind::FMax, TheLoop, FMF, RedDes, DB, AC, DT,
+ SE)) {
LLVM_DEBUG(dbgs() << "Found a float MAX reduction PHI." << *Phi << "\n");
return true;
}
- if (AddReductionVar(Phi, RecurKind::FMin, TheLoop, FMF, RedDes, DB, AC, DT)) {
+ if (AddReductionVar(Phi, RecurKind::FMin, TheLoop, FMF, RedDes, DB, AC, DT,
+ SE)) {
LLVM_DEBUG(dbgs() << "Found a float MIN reduction PHI." << *Phi << "\n");
return true;
}
if (AddReductionVar(Phi, RecurKind::SelectFCmp, TheLoop, FMF, RedDes, DB, AC,
- DT)) {
+ DT, SE)) {
LLVM_DEBUG(dbgs() << "Found a float conditional select reduction PHI."
<< " PHI." << *Phi << "\n");
return true;
}
- if (AddReductionVar(Phi, RecurKind::FMulAdd, TheLoop, FMF, RedDes, DB, AC,
- DT)) {
+ if (AddReductionVar(Phi, RecurKind::FMulAdd, TheLoop, FMF, RedDes, DB, AC, DT,
+ SE)) {
LLVM_DEBUG(dbgs() << "Found an FMulAdd reduction PHI." << *Phi << "\n");
return true;
}
@@ -917,12 +999,37 @@ bool RecurrenceDescriptor::isFirstOrderRecurrence(
SinkCandidate->mayReadFromMemory() || SinkCandidate->isTerminator())
return false;
- // Do not try to sink an instruction multiple times (if multiple operands
- // are first order recurrences).
- // TODO: We can support this case, by sinking the instruction after the
- // 'deepest' previous instruction.
- if (SinkAfter.find(SinkCandidate) != SinkAfter.end())
- return false;
+ // Avoid sinking an instruction multiple times (if multiple operands are
+ // first order recurrences) by sinking once - after the latest 'previous'
+ // instruction.
+ auto It = SinkAfter.find(SinkCandidate);
+ if (It != SinkAfter.end()) {
+ auto *OtherPrev = It->second;
+ // Find the earliest entry in the 'sink-after' chain. The last entry in
+ // the chain is the original 'Previous' for a recurrence handled earlier.
+ auto EarlierIt = SinkAfter.find(OtherPrev);
+ while (EarlierIt != SinkAfter.end()) {
+ Instruction *EarlierInst = EarlierIt->second;
+ EarlierIt = SinkAfter.find(EarlierInst);
+ // Bail out if order has not been preserved.
+ if (EarlierIt != SinkAfter.end() &&
+ !DT->dominates(EarlierInst, OtherPrev))
+ return false;
+ OtherPrev = EarlierInst;
+ }
+ // Bail out if order has not been preserved.
+ if (OtherPrev != It->second && !DT->dominates(It->second, OtherPrev))
+ return false;
+
+ // SinkCandidate is already being sunk after an instruction after
+ // Previous. Nothing left to do.
+ if (DT->dominates(Previous, OtherPrev) || Previous == OtherPrev)
+ return true;
+ // Otherwise, Previous comes after OtherPrev and SinkCandidate needs to be
+ // re-sunk to Previous, instead of sinking to OtherPrev. Remove
+ // SinkCandidate from SinkAfter to ensure it's insert position is updated.
+ SinkAfter.erase(SinkCandidate);
+ }
// If we reach a PHI node that is not dominated by Previous, we reached a
// header PHI. No need for sinking.
@@ -1052,7 +1159,7 @@ RecurrenceDescriptor::getReductionOpChain(PHINode *Phi, Loop *L) const {
// to check for a pair of icmp/select, for which we use getNextInstruction and
// isCorrectOpcode functions to step the right number of instruction, and
// check the icmp/select pair.
- // FIXME: We also do not attempt to look through Phi/Select's yet, which might
+ // FIXME: We also do not attempt to look through Select's yet, which might
// be part of the reduction chain, or attempt to looks through And's to find a
// smaller bitwidth. Subs are also currently not allowed (which are usually
// treated as part of a add reduction) as they are expected to generally be
@@ -1062,16 +1169,21 @@ RecurrenceDescriptor::getReductionOpChain(PHINode *Phi, Loop *L) const {
if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp)
ExpectedUses = 2;
- auto getNextInstruction = [&](Instruction *Cur) {
- if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) {
- // We are expecting a icmp/select pair, which we go to the next select
- // instruction if we can. We already know that Cur has 2 uses.
- if (isa<SelectInst>(*Cur->user_begin()))
- return cast<Instruction>(*Cur->user_begin());
- else
- return cast<Instruction>(*std::next(Cur->user_begin()));
+ auto getNextInstruction = [&](Instruction *Cur) -> Instruction * {
+ for (auto User : Cur->users()) {
+ Instruction *UI = cast<Instruction>(User);
+ if (isa<PHINode>(UI))
+ continue;
+ if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) {
+ // We are expecting a icmp/select pair, which we go to the next select
+ // instruction if we can. We already know that Cur has 2 uses.
+ if (isa<SelectInst>(UI))
+ return UI;
+ continue;
+ }
+ return UI;
}
- return cast<Instruction>(*Cur->user_begin());
+ return nullptr;
};
auto isCorrectOpcode = [&](Instruction *Cur) {
if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) {
@@ -1086,22 +1198,46 @@ RecurrenceDescriptor::getReductionOpChain(PHINode *Phi, Loop *L) const {
return Cur->getOpcode() == RedOp;
};
+ // Attempt to look through Phis which are part of the reduction chain
+ unsigned ExtraPhiUses = 0;
+ Instruction *RdxInstr = LoopExitInstr;
+ if (auto ExitPhi = dyn_cast<PHINode>(LoopExitInstr)) {
+ if (ExitPhi->getNumIncomingValues() != 2)
+ return {};
+
+ Instruction *Inc0 = dyn_cast<Instruction>(ExitPhi->getIncomingValue(0));
+ Instruction *Inc1 = dyn_cast<Instruction>(ExitPhi->getIncomingValue(1));
+
+ Instruction *Chain = nullptr;
+ if (Inc0 == Phi)
+ Chain = Inc1;
+ else if (Inc1 == Phi)
+ Chain = Inc0;
+ else
+ return {};
+
+ RdxInstr = Chain;
+ ExtraPhiUses = 1;
+ }
+
// The loop exit instruction we check first (as a quick test) but add last. We
// check the opcode is correct (and dont allow them to be Subs) and that they
// have expected to have the expected number of uses. They will have one use
// from the phi and one from a LCSSA value, no matter the type.
- if (!isCorrectOpcode(LoopExitInstr) || !LoopExitInstr->hasNUses(2))
+ if (!isCorrectOpcode(RdxInstr) || !LoopExitInstr->hasNUses(2))
return {};
- // Check that the Phi has one (or two for min/max) uses.
- if (!Phi->hasNUses(ExpectedUses))
+ // Check that the Phi has one (or two for min/max) uses, plus an extra use
+ // for conditional reductions.
+ if (!Phi->hasNUses(ExpectedUses + ExtraPhiUses))
return {};
+
Instruction *Cur = getNextInstruction(Phi);
// Each other instruction in the chain should have the expected number of uses
// and be the correct opcode.
- while (Cur != LoopExitInstr) {
- if (!isCorrectOpcode(Cur) || !Cur->hasNUses(ExpectedUses))
+ while (Cur != RdxInstr) {
+ if (!Cur || !isCorrectOpcode(Cur) || !Cur->hasNUses(ExpectedUses))
return {};
ReductionOperations.push_back(Cur);
@@ -1428,10 +1564,14 @@ bool InductionDescriptor::isInductionPHI(
ConstantInt *CV = ConstStep->getValue();
const DataLayout &DL = Phi->getModule()->getDataLayout();
- int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(ElementType));
- if (!Size)
+ TypeSize TySize = DL.getTypeAllocSize(ElementType);
+ // TODO: We could potentially support this for scalable vectors if we can
+ // prove at compile time that the constant step is always a multiple of
+ // the scalable type.
+ if (TySize.isZero() || TySize.isScalable())
return false;
+ int64_t Size = static_cast<int64_t>(TySize.getFixedSize());
int64_t CVSize = CV->getSExtValue();
if (CVSize % Size)
return false;
diff --git a/llvm/lib/Analysis/IVUsers.cpp b/llvm/lib/Analysis/IVUsers.cpp
index 0f3929f45506..5bde947bd851 100644
--- a/llvm/lib/Analysis/IVUsers.cpp
+++ b/llvm/lib/Analysis/IVUsers.cpp
@@ -12,25 +12,21 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/IVUsers.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Config/llvm-config.h"
-#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
using namespace llvm;
#define DEBUG_TYPE "iv-users"
diff --git a/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
index b112ed2e4439..ebfa1c8fc08e 100644
--- a/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
+++ b/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
@@ -13,12 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Analysis/IndirectCallVisitor.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/InstVisitor.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -31,7 +26,7 @@ using namespace llvm;
// The percent threshold for the direct-call target (this call site vs the
// remaining call count) for it to be considered as the promotion target.
static cl::opt<unsigned> ICPRemainingPercentThreshold(
- "icp-remaining-percent-threshold", cl::init(30), cl::Hidden, cl::ZeroOrMore,
+ "icp-remaining-percent-threshold", cl::init(30), cl::Hidden,
cl::desc("The percentage threshold against remaining unpromoted indirect "
"call count for the promotion"));
@@ -39,14 +34,14 @@ static cl::opt<unsigned> ICPRemainingPercentThreshold(
// total call count) for it to be considered as the promotion target.
static cl::opt<unsigned>
ICPTotalPercentThreshold("icp-total-percent-threshold", cl::init(5),
- cl::Hidden, cl::ZeroOrMore,
+ cl::Hidden,
cl::desc("The percentage threshold against total "
"count for the promotion"));
// Set the maximum number of targets to promote for a single indirect-call
// callsite.
static cl::opt<unsigned>
- MaxNumPromotions("icp-max-prom", cl::init(3), cl::Hidden, cl::ZeroOrMore,
+ MaxNumPromotions("icp-max-prom", cl::init(3), cl::Hidden,
cl::desc("Max number of promotions for a single indirect "
"call callsite"));
diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp
index f6e3dd354ff8..cf8592c41eda 100644
--- a/llvm/lib/Analysis/InlineAdvisor.cpp
+++ b/llvm/lib/Analysis/InlineAdvisor.cpp
@@ -13,14 +13,15 @@
#include "llvm/Analysis/InlineAdvisor.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ReplayInlineAdvisor.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h"
#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
@@ -55,6 +56,11 @@ static cl::opt<int>
cl::desc("Scale to limit the cost of inline deferral"),
cl::init(2), cl::Hidden);
+static cl::opt<bool> AnnotateInlinePhase(
+ "annotate-inline-phase", cl::Hidden, cl::init(false),
+ cl::desc("If true, annotate inline advisor remarks "
+ "with LTO and pass information."));
+
extern cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats;
namespace {
@@ -80,7 +86,8 @@ private:
void recordUnsuccessfulInliningImpl(const InlineResult &Result) override {
if (IsInliningRecommended)
ORE.emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
+ return OptimizationRemarkMissed(Advisor->getAnnotatedInlinePassName(),
+ "NotInlined", DLoc, Block)
<< "'" << NV("Callee", Callee) << "' is not AlwaysInline into '"
<< NV("Caller", Caller)
<< "': " << NV("Reason", Result.getFailureReason());
@@ -99,7 +106,8 @@ void DefaultInlineAdvice::recordUnsuccessfulInliningImpl(
llvm::setInlineRemark(*OriginalCB, std::string(Result.getFailureReason()) +
"; " + inlineCostStr(*OIC));
ORE.emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
+ return OptimizationRemarkMissed(Advisor->getAnnotatedInlinePassName(),
+ "NotInlined", DLoc, Block)
<< "'" << NV("Callee", Callee) << "' is not inlined into '"
<< NV("Caller", Caller)
<< "': " << NV("Reason", Result.getFailureReason());
@@ -108,12 +116,16 @@ void DefaultInlineAdvice::recordUnsuccessfulInliningImpl(
void DefaultInlineAdvice::recordInliningWithCalleeDeletedImpl() {
if (EmitRemarks)
- emitInlinedIntoBasedOnCost(ORE, DLoc, Block, *Callee, *Caller, *OIC);
+ emitInlinedIntoBasedOnCost(ORE, DLoc, Block, *Callee, *Caller, *OIC,
+ /* ForProfileContext= */ false,
+ Advisor->getAnnotatedInlinePassName());
}
void DefaultInlineAdvice::recordInliningImpl() {
if (EmitRemarks)
- emitInlinedIntoBasedOnCost(ORE, DLoc, Block, *Callee, *Caller, *OIC);
+ emitInlinedIntoBasedOnCost(ORE, DLoc, Block, *Callee, *Caller, *OIC,
+ /* ForProfileContext= */ false,
+ Advisor->getAnnotatedInlinePassName());
}
llvm::Optional<llvm::InlineCost> static getDefaultInlineAdvice(
@@ -146,7 +158,7 @@ llvm::Optional<llvm::InlineCost> static getDefaultInlineAdvice(
};
return llvm::shouldInline(
CB, GetInlineCost, ORE,
- Params.EnableDeferral.getValueOr(EnableInlineDeferral));
+ Params.EnableDeferral.value_or(EnableInlineDeferral));
}
std::unique_ptr<InlineAdvice>
@@ -185,18 +197,18 @@ AnalysisKey InlineAdvisorAnalysis::Key;
bool InlineAdvisorAnalysis::Result::tryCreate(
InlineParams Params, InliningAdvisorMode Mode,
- const ReplayInlinerSettings &ReplaySettings) {
+ const ReplayInlinerSettings &ReplaySettings, InlineContext IC) {
auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
switch (Mode) {
case InliningAdvisorMode::Default:
LLVM_DEBUG(dbgs() << "Using default inliner heuristic.\n");
- Advisor.reset(new DefaultInlineAdvisor(M, FAM, Params));
+ Advisor.reset(new DefaultInlineAdvisor(M, FAM, Params, IC));
// Restrict replay to default advisor, ML advisors are stateful so
// replay will need augmentations to interleave with them correctly.
if (!ReplaySettings.ReplayFile.empty()) {
Advisor = llvm::getReplayInlineAdvisor(M, FAM, M.getContext(),
std::move(Advisor), ReplaySettings,
- /* EmitRemarks =*/true);
+ /* EmitRemarks =*/true, IC);
}
break;
case InliningAdvisorMode::Development:
@@ -442,7 +454,7 @@ std::string llvm::formatCallSiteLocation(DebugLoc DLoc,
}
void llvm::addLocationToRemarks(OptimizationRemark &Remark, DebugLoc DLoc) {
- if (!DLoc.get()) {
+ if (!DLoc) {
return;
}
@@ -499,8 +511,11 @@ void llvm::emitInlinedIntoBasedOnCost(
PassName);
}
-InlineAdvisor::InlineAdvisor(Module &M, FunctionAnalysisManager &FAM)
- : M(M), FAM(FAM) {
+InlineAdvisor::InlineAdvisor(Module &M, FunctionAnalysisManager &FAM,
+ Optional<InlineContext> IC)
+ : M(M), FAM(FAM), IC(IC),
+ AnnotatedInlinePassName((IC && AnnotateInlinePhase) ? llvm::AnnotateInlinePassName(*IC)
+ : DEBUG_TYPE) {
if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) {
ImportedFunctionsStats =
std::make_unique<ImportedFunctionsInliningStatistics>();
@@ -522,6 +537,48 @@ std::unique_ptr<InlineAdvice> InlineAdvisor::getMandatoryAdvice(CallBase &CB,
Advice);
}
+static inline const char *getLTOPhase(ThinOrFullLTOPhase LTOPhase) {
+ switch (LTOPhase) {
+ case (ThinOrFullLTOPhase::None):
+ return "main";
+ case (ThinOrFullLTOPhase::ThinLTOPreLink):
+ case (ThinOrFullLTOPhase::FullLTOPreLink):
+ return "prelink";
+ case (ThinOrFullLTOPhase::ThinLTOPostLink):
+ case (ThinOrFullLTOPhase::FullLTOPostLink):
+ return "postlink";
+ }
+ llvm_unreachable("unreachable");
+}
+
+static inline const char *getInlineAdvisorContext(InlinePass IP) {
+ switch (IP) {
+ case (InlinePass::AlwaysInliner):
+ return "always-inline";
+ case (InlinePass::CGSCCInliner):
+ return "cgscc-inline";
+ case (InlinePass::EarlyInliner):
+ return "early-inline";
+ case (InlinePass::MLInliner):
+ return "ml-inline";
+ case (InlinePass::ModuleInliner):
+ return "module-inline";
+ case (InlinePass::ReplayCGSCCInliner):
+ return "replay-cgscc-inline";
+ case (InlinePass::ReplaySampleProfileInliner):
+ return "replay-sample-profile-inline";
+ case (InlinePass::SampleProfileInliner):
+ return "sample-profile-inline";
+ }
+
+ llvm_unreachable("unreachable");
+}
+
+std::string llvm::AnnotateInlinePassName(InlineContext IC) {
+ return std::string(getLTOPhase(IC.LTOPhase)) + "-" +
+ std::string(getInlineAdvisorContext(IC.Pass));
+}
+
InlineAdvisor::MandatoryInliningKind
InlineAdvisor::getMandatoryKind(CallBase &CB, FunctionAnalysisManager &FAM,
OptimizationRemarkEmitter &ORE) {
@@ -536,7 +593,7 @@ InlineAdvisor::getMandatoryKind(CallBase &CB, FunctionAnalysisManager &FAM,
auto TrivialDecision =
llvm::getAttributeBasedInliningDecision(CB, &Callee, TIR, GetTLI);
- if (TrivialDecision.hasValue()) {
+ if (TrivialDecision) {
if (TrivialDecision->isSuccess())
return MandatoryInliningKind::Always;
else
@@ -568,3 +625,22 @@ InlineAdvisorAnalysisPrinterPass::run(Module &M, ModuleAnalysisManager &MAM) {
IA->getAdvisor()->print(OS);
return PreservedAnalyses::all();
}
+
+PreservedAnalyses InlineAdvisorAnalysisPrinterPass::run(
+ LazyCallGraph::SCC &InitialC, CGSCCAnalysisManager &AM, LazyCallGraph &CG,
+ CGSCCUpdateResult &UR) {
+ const auto &MAMProxy =
+ AM.getResult<ModuleAnalysisManagerCGSCCProxy>(InitialC, CG);
+
+ if (InitialC.size() == 0) {
+ OS << "SCC is empty!\n";
+ return PreservedAnalyses::all();
+ }
+ Module &M = *InitialC.begin()->getFunction().getParent();
+ const auto *IA = MAMProxy.getCachedResult<InlineAdvisorAnalysis>(M);
+ if (!IA)
+ OS << "No Inline Advisor\n";
+ else
+ IA->getAdvisor()->print(OS);
+ return PreservedAnalyses::all();
+}
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index d5411d916c77..e63497260e6e 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -18,11 +18,11 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
-#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -42,6 +42,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/raw_ostream.h"
+#include <limits>
using namespace llvm;
@@ -51,24 +52,33 @@ STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
static cl::opt<int>
DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(225),
- cl::ZeroOrMore,
cl::desc("Default amount of inlining to perform"));
+// We introduce this option since there is a minor compile-time win by avoiding
+// addition of TTI attributes (target-features in particular) to inline
+// candidates when they are guaranteed to be the same as top level methods in
+// some use cases. If we avoid adding the attribute, we need an option to avoid
+// checking these attributes.
+static cl::opt<bool> IgnoreTTIInlineCompatible(
+ "ignore-tti-inline-compatible", cl::Hidden, cl::init(false),
+ cl::desc("Ignore TTI attributes compatibility check between callee/caller "
+ "during inline cost calculation"));
+
static cl::opt<bool> PrintInstructionComments(
"print-instruction-comments", cl::Hidden, cl::init(false),
cl::desc("Prints comments for instruction based on inline cost analysis"));
static cl::opt<int> InlineThreshold(
- "inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
+ "inline-threshold", cl::Hidden, cl::init(225),
cl::desc("Control the amount of inlining to perform (default = 225)"));
static cl::opt<int> HintThreshold(
- "inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore,
+ "inlinehint-threshold", cl::Hidden, cl::init(325),
cl::desc("Threshold for inlining functions with inline hint"));
static cl::opt<int>
ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden,
- cl::init(45), cl::ZeroOrMore,
+ cl::init(45),
cl::desc("Threshold for inlining cold callsites"));
static cl::opt<bool> InlineEnableCostBenefitAnalysis(
@@ -76,12 +86,11 @@ static cl::opt<bool> InlineEnableCostBenefitAnalysis(
cl::desc("Enable the cost-benefit analysis for the inliner"));
static cl::opt<int> InlineSavingsMultiplier(
- "inline-savings-multiplier", cl::Hidden, cl::init(8), cl::ZeroOrMore,
+ "inline-savings-multiplier", cl::Hidden, cl::init(8),
cl::desc("Multiplier to multiply cycle savings by during inlining"));
static cl::opt<int>
InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100),
- cl::ZeroOrMore,
cl::desc("The maximum size of a callee that get's "
"inlined without sufficient cycle savings"));
@@ -89,26 +98,25 @@ static cl::opt<int>
// PGO before we actually hook up inliner with analysis passes such as BPI and
// BFI.
static cl::opt<int> ColdThreshold(
- "inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore,
+ "inlinecold-threshold", cl::Hidden, cl::init(45),
cl::desc("Threshold for inlining functions with cold attribute"));
static cl::opt<int>
HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000),
- cl::ZeroOrMore,
cl::desc("Threshold for hot callsites "));
static cl::opt<int> LocallyHotCallSiteThreshold(
- "locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::ZeroOrMore,
+ "locally-hot-callsite-threshold", cl::Hidden, cl::init(525),
cl::desc("Threshold for locally hot callsites "));
static cl::opt<int> ColdCallSiteRelFreq(
- "cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore,
+ "cold-callsite-rel-freq", cl::Hidden, cl::init(2),
cl::desc("Maximum block frequency, expressed as a percentage of caller's "
"entry frequency, for a callsite to be cold in the absence of "
"profile information."));
static cl::opt<int> HotCallSiteRelFreq(
- "hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::ZeroOrMore,
+ "hot-callsite-rel-freq", cl::Hidden, cl::init(60),
cl::desc("Minimum block frequency, expressed as a multiple of caller's "
"entry frequency, for a callsite to be hot in the absence of "
"profile information."));
@@ -117,14 +125,19 @@ static cl::opt<int> CallPenalty(
"inline-call-penalty", cl::Hidden, cl::init(25),
cl::desc("Call penalty that is applied per callsite when inlining"));
+static cl::opt<size_t>
+ StackSizeThreshold("inline-max-stacksize", cl::Hidden,
+ cl::init(std::numeric_limits<size_t>::max()),
+ cl::desc("Do not inline functions with a stack size "
+ "that exceeds the specified limit"));
+
static cl::opt<bool> OptComputeFullInlineCost(
- "inline-cost-full", cl::Hidden, cl::init(false), cl::ZeroOrMore,
+ "inline-cost-full", cl::Hidden,
cl::desc("Compute the full inline cost of a call site even when the cost "
"exceeds the threshold."));
static cl::opt<bool> InlineCallerSupersetNoBuiltin(
"inline-caller-superset-nobuiltin", cl::Hidden, cl::init(true),
- cl::ZeroOrMore,
cl::desc("Allow inlining when caller has a superset of callee's nobuiltin "
"attributes."));
@@ -132,33 +145,18 @@ static cl::opt<bool> DisableGEPConstOperand(
"disable-gep-const-evaluation", cl::Hidden, cl::init(false),
cl::desc("Disables evaluation of GetElementPtr with constant operands"));
-namespace {
-class InlineCostCallAnalyzer;
-
-/// This function behaves more like CallBase::hasFnAttr: when it looks for the
-/// requested attribute, it check both the call instruction and the called
-/// function (if it's available and operand bundles don't prohibit that).
-Attribute getFnAttr(CallBase &CB, StringRef AttrKind) {
- Attribute CallAttr = CB.getFnAttr(AttrKind);
- if (CallAttr.isValid())
- return CallAttr;
-
- // Operand bundles override attributes on the called function, but don't
- // override attributes directly present on the call instruction.
- if (!CB.isFnAttrDisallowedByOpBundle(AttrKind))
- if (const Function *F = CB.getCalledFunction())
- return F->getFnAttribute(AttrKind);
-
- return {};
-}
-
+namespace llvm {
Optional<int> getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind) {
- Attribute Attr = getFnAttr(CB, AttrKind);
+ Attribute Attr = CB.getFnAttr(AttrKind);
int AttrValue;
if (Attr.getValueAsString().getAsInteger(10, AttrValue))
return None;
return AttrValue;
}
+} // namespace llvm
+
+namespace {
+class InlineCostCallAnalyzer;
// This struct is used to store information about inline cost of a
// particular instruction
@@ -198,7 +196,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
friend class InstVisitor<CallAnalyzer, bool>;
protected:
- virtual ~CallAnalyzer() {}
+ virtual ~CallAnalyzer() = default;
/// The TargetTransformInfo available for this compilation.
const TargetTransformInfo &TTI;
@@ -352,7 +350,7 @@ protected:
DenseMap<Value *, std::pair<Value *, APInt>> ConstantOffsetPtrs;
/// Keep track of dead blocks due to the constant arguments.
- SetVector<BasicBlock *> DeadBlocks;
+ SmallPtrSet<BasicBlock *, 16> DeadBlocks;
/// The mapping of the blocks to their known unique successors due to the
/// constant arguments.
@@ -385,8 +383,7 @@ protected:
bool canFoldInboundsGEP(GetElementPtrInst &I);
bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
bool simplifyCallSite(Function *F, CallBase &Call);
- template <typename Callable>
- bool simplifyInstruction(Instruction &I, Callable Evaluate);
+ bool simplifyInstruction(Instruction &I);
bool simplifyIntrinsicCallIsConstant(CallBase &CB);
ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
@@ -704,7 +701,7 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
BlockFrequencyInfo *BFI = &(GetBFI(F));
assert(BFI && "BFI must be available");
auto ProfileCount = BFI->getBlockProfileCount(BB);
- assert(ProfileCount.hasValue());
+ assert(ProfileCount);
if (ProfileCount.getValue() == 0)
ColdSize += Cost - CostAtBBStart;
}
@@ -829,14 +826,14 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
}
auto ProfileCount = CalleeBFI->getBlockProfileCount(&BB);
- assert(ProfileCount.hasValue());
+ assert(ProfileCount);
CurrentSavings *= ProfileCount.getValue();
CycleSavings += CurrentSavings;
}
// Compute the cycle savings per call.
auto EntryProfileCount = F.getEntryCount();
- assert(EntryProfileCount.hasValue() && EntryProfileCount->getCount());
+ assert(EntryProfileCount && EntryProfileCount->getCount());
auto EntryCount = EntryProfileCount->getCount();
CycleSavings += EntryCount / 2;
CycleSavings = CycleSavings.udiv(EntryCount);
@@ -845,7 +842,7 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
auto *CallerBB = CandidateCall.getParent();
BlockFrequencyInfo *CallerBFI = &(GetBFI(*(CallerBB->getParent())));
CycleSavings += getCallsiteCost(this->CandidateCall, DL);
- CycleSavings *= CallerBFI->getBlockProfileCount(CallerBB).getValue();
+ CycleSavings *= *CallerBFI->getBlockProfileCount(CallerBB);
// Remove the cost of the cold basic blocks.
int Size = Cost - ColdSize;
@@ -904,13 +901,18 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
getStringFnAttrAsInt(CandidateCall, "function-inline-cost"))
Cost = *AttrCost;
+ if (Optional<int> AttrCostMult = getStringFnAttrAsInt(
+ CandidateCall,
+ InlineConstants::FunctionInlineCostMultiplierAttributeName))
+ Cost *= *AttrCostMult;
+
if (Optional<int> AttrThreshold =
getStringFnAttrAsInt(CandidateCall, "function-inline-threshold"))
Threshold = *AttrThreshold;
if (auto Result = costBenefitAnalysis()) {
DecidedByCostBenefit = true;
- if (Result.getValue())
+ if (*Result)
return InlineResult::success();
else
return InlineResult::failure("Cost over threshold.");
@@ -978,6 +980,8 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
if (F.getCallingConv() == CallingConv::Cold)
Cost += InlineConstants::ColdccPenalty;
+ LLVM_DEBUG(dbgs() << " Initial cost: " << Cost << "\n");
+
// Check if we're done. This can happen due to bonuses and penalties.
if (Cost >= Threshold && !ComputeFullInlineCost)
return InlineResult::failure("high cost");
@@ -1002,7 +1006,7 @@ public:
BoostIndirectCalls(BoostIndirect), IgnoreThreshold(IgnoreThreshold),
CostBenefitAnalysisEnabled(isCostBenefitAnalysisEnabled()),
Writer(this) {
- AllowRecursiveCall = Params.AllowRecursiveCall.getValue();
+ AllowRecursiveCall = *Params.AllowRecursiveCall;
}
/// Annotation Writer for instruction details
@@ -1020,7 +1024,7 @@ public:
return None;
}
- virtual ~InlineCostCallAnalyzer() {}
+ virtual ~InlineCostCallAnalyzer() = default;
int getThreshold() const { return Threshold; }
int getCost() const { return Cost; }
Optional<CostBenefitPair> getCostBenefitPair() { return CostBenefit; }
@@ -1203,6 +1207,10 @@ private:
set(InlineCostFeatureIndex::ColdCcPenalty,
(F.getCallingConv() == CallingConv::Cold));
+ set(InlineCostFeatureIndex::LastCallToStaticBonus,
+ (F.hasLocalLinkage() && F.hasOneLiveUse() &&
+ &F == CandidateCall.getCalledFunction()));
+
// FIXME: we shouldn't repeat this logic in both the Features and Cost
// analyzer - instead, we should abstract it to a common method in the
// CallAnalyzer
@@ -1262,7 +1270,7 @@ void InlineCostAnnotationWriter::emitInstructionAnnot(
auto C = ICCA->getSimplifiedValue(const_cast<Instruction *>(I));
if (C) {
OS << ", simplified to ";
- C.getValue()->print(OS, true);
+ (*C)->print(OS, true);
}
OS << "\n";
}
@@ -1501,13 +1509,7 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
};
if (!DisableGEPConstOperand)
- if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
- SmallVector<Constant *, 2> Indices;
- for (unsigned int Index = 1; Index < COps.size(); ++Index)
- Indices.push_back(COps[Index]);
- return ConstantExpr::getGetElementPtr(
- I.getSourceElementType(), COps[0], Indices, I.isInBounds());
- }))
+ if (simplifyInstruction(I))
return true;
if ((I.isInBounds() && canFoldInboundsGEP(I)) || IsGEPOffsetConstant(I)) {
@@ -1525,11 +1527,8 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
}
/// Simplify \p I if its operands are constants and update SimplifiedValues.
-/// \p Evaluate is a callable specific to instruction type that evaluates the
-/// instruction when all the operands are constants.
-template <typename Callable>
-bool CallAnalyzer::simplifyInstruction(Instruction &I, Callable Evaluate) {
- SmallVector<Constant *, 2> COps;
+bool CallAnalyzer::simplifyInstruction(Instruction &I) {
+ SmallVector<Constant *> COps;
for (Value *Op : I.operands()) {
Constant *COp = dyn_cast<Constant>(Op);
if (!COp)
@@ -1538,7 +1537,7 @@ bool CallAnalyzer::simplifyInstruction(Instruction &I, Callable Evaluate) {
return false;
COps.push_back(COp);
}
- auto *C = Evaluate(COps);
+ auto *C = ConstantFoldInstOperands(&I, COps, DL);
if (!C)
return false;
SimplifiedValues[&I] = C;
@@ -1568,9 +1567,7 @@ bool CallAnalyzer::simplifyIntrinsicCallIsConstant(CallBase &CB) {
bool CallAnalyzer::visitBitCast(BitCastInst &I) {
// Propagate constants through bitcasts.
- if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
- return ConstantExpr::getBitCast(COps[0], I.getType());
- }))
+ if (simplifyInstruction(I))
return true;
// Track base/offsets through casts
@@ -1590,9 +1587,7 @@ bool CallAnalyzer::visitBitCast(BitCastInst &I) {
bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
// Propagate constants through ptrtoint.
- if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
- return ConstantExpr::getPtrToInt(COps[0], I.getType());
- }))
+ if (simplifyInstruction(I))
return true;
// Track base/offset pairs when converted to a plain integer provided the
@@ -1622,9 +1617,7 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
// Propagate constants through ptrtoint.
- if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
- return ConstantExpr::getIntToPtr(COps[0], I.getType());
- }))
+ if (simplifyInstruction(I))
return true;
// Track base/offset pairs when round-tripped through a pointer without
@@ -1647,9 +1640,7 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
bool CallAnalyzer::visitCastInst(CastInst &I) {
// Propagate constants through casts.
- if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
- return ConstantExpr::getCast(I.getOpcode(), COps[0], I.getType());
- }))
+ if (simplifyInstruction(I))
return true;
// Disable SROA in the face of arbitrary casts we don't explicitly list
@@ -1855,7 +1846,7 @@ void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) {
// current threshold, but AutoFDO + ThinLTO currently relies on this
// behavior to prevent inlining of hot callsites during ThinLTO
// compile phase.
- Threshold = HotCallSiteThreshold.getValue();
+ Threshold = *HotCallSiteThreshold;
} else if (isColdCallSite(Call, CallerBFI)) {
LLVM_DEBUG(dbgs() << "Cold callsite.\n");
// Do not apply bonuses for a cold callsite including the
@@ -1906,9 +1897,7 @@ void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) {
bool CallAnalyzer::visitCmpInst(CmpInst &I) {
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
// First try to handle simplified comparisons.
- if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
- return ConstantExpr::getCompare(I.getPredicate(), COps[0], COps[1]);
- }))
+ if (simplifyInstruction(I))
return true;
if (I.getOpcode() == Instruction::FCmp)
@@ -1984,11 +1973,11 @@ bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
Value *SimpleV = nullptr;
if (auto FI = dyn_cast<FPMathOperator>(&I))
- SimpleV = SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS,
+ SimpleV = simplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS,
FI->getFastMathFlags(), DL);
else
SimpleV =
- SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, DL);
+ simplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, DL);
if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
SimplifiedValues[&I] = C;
@@ -2018,7 +2007,7 @@ bool CallAnalyzer::visitFNeg(UnaryOperator &I) {
if (!COp)
COp = SimplifiedValues.lookup(Op);
- Value *SimpleV = SimplifyFNegInst(
+ Value *SimpleV = simplifyFNegInst(
COp ? COp : Op, cast<FPMathOperator>(I).getFastMathFlags(), DL);
if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
@@ -2067,9 +2056,7 @@ bool CallAnalyzer::visitStore(StoreInst &I) {
bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
// Constant folding for extract value is trivial.
- if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
- return ConstantExpr::getExtractValue(COps[0], I.getIndices());
- }))
+ if (simplifyInstruction(I))
return true;
// SROA can't look through these, but they may be free.
@@ -2078,11 +2065,7 @@ bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
// Constant folding for insert value is trivial.
- if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
- return ConstantExpr::getInsertValue(/*AggregateOperand*/ COps[0],
- /*InsertedValueOperand*/ COps[1],
- I.getIndices());
- }))
+ if (simplifyInstruction(I))
return true;
// SROA can't look through these, but they may be free.
@@ -2136,14 +2119,14 @@ bool CallAnalyzer::visitCallBase(CallBase &Call) {
if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate())
ContainsNoDuplicateCall = true;
- Value *Callee = Call.getCalledOperand();
- Function *F = dyn_cast_or_null<Function>(Callee);
+ Function *F = Call.getCalledFunction();
bool IsIndirectCall = !F;
if (IsIndirectCall) {
// Check if this happens to be an indirect function call to a known function
// in this inline context. If not, we've done all we can.
+ Value *Callee = Call.getCalledOperand();
F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
- if (!F) {
+ if (!F || F->getFunctionType() != Call.getFunctionType()) {
onCallArgumentSetup(Call);
if (!Call.onlyReadsMemory())
@@ -2552,7 +2535,7 @@ void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) {
NewDead.push_back(Succ);
while (!NewDead.empty()) {
BasicBlock *Dead = NewDead.pop_back_val();
- if (DeadBlocks.insert(Dead))
+ if (DeadBlocks.insert(Dead).second)
// Continue growing the dead block lists.
for (BasicBlock *S : successors(Dead))
if (IsNewlyDead(S))
@@ -2707,6 +2690,11 @@ InlineResult CallAnalyzer::analyze() {
if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
return InlineResult::failure("noduplicate");
+ // If the callee's stack size exceeds the user-specified threshold,
+ // do not let it be inlined.
+ if (AllocatedSize > StackSizeThreshold)
+ return InlineResult::failure("stacksize");
+
return finalizeAnalysis();
}
@@ -2745,7 +2733,8 @@ static bool functionsHaveCompatibleAttributes(
// object, and always returns the same object (which is overwritten on each
// GetTLI call). Therefore we copy the first result.
auto CalleeTLI = GetTLI(*Callee);
- return TTI.areInlineCompatible(Caller, Callee) &&
+ return (IgnoreTTIInlineCompatible ||
+ TTI.areInlineCompatible(Caller, Callee)) &&
GetTLI(*Caller).areInlineCompatible(CalleeTLI,
InlineCallerSupersetNoBuiltin) &&
AttributeFuncs::areInlineCompatible(*Caller, *Callee);
@@ -2864,6 +2853,9 @@ Optional<InlineResult> llvm::getAttributeBasedInliningDecision(
// Calls to functions with always-inline attributes should be inlined
// whenever possible.
if (Call.hasFnAttr(Attribute::AlwaysInline)) {
+ if (Call.getAttributes().hasFnAttr(Attribute::NoInline))
+ return InlineResult::failure("noinline call site attribute");
+
auto IsViable = isInlineViable(*Callee);
if (IsViable.isSuccess())
return InlineResult::success();
@@ -2911,7 +2903,7 @@ InlineCost llvm::getInlineCost(
auto UserDecision =
llvm::getAttributeBasedInliningDecision(Call, Callee, CalleeTTI, GetTLI);
- if (UserDecision.hasValue()) {
+ if (UserDecision) {
if (UserDecision->isSuccess())
return llvm::InlineCost::getAlways("always inline attribute");
return llvm::InlineCost::getNever(UserDecision->getFailureReason());
diff --git a/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp b/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp
index a2e231e2d0f4..2371ecbba615 100644
--- a/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp
+++ b/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp
@@ -15,33 +15,32 @@
#ifdef LLVM_HAVE_TF_API
#include "llvm/Analysis/Utils/TFUtils.h"
#endif
+#include "llvm/IR/Function.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+AnalysisKey InlineSizeEstimatorAnalysis::Key;
+
+#ifdef LLVM_HAVE_TF_API
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/PassManager.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/raw_ostream.h"
-
#include <algorithm>
#include <deque>
-using namespace llvm;
-
-AnalysisKey InlineSizeEstimatorAnalysis::Key;
-
-#define DEBUG_TYPE "inline-size-estimator"
-
-#ifdef LLVM_HAVE_TF_API
cl::opt<std::string> TFIR2NativeModelPath(
"ml-inliner-ir2native-model", cl::Hidden,
cl::desc("Path to saved model evaluating native size from IR."));
+#define DEBUG_TYPE "inline-size-estimator"
namespace {
unsigned getMaxInstructionID() {
#define LAST_OTHER_INST(NR) return NR;
@@ -261,10 +260,10 @@ InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis(
namespace llvm {
class TFModelEvaluator {};
} // namespace llvm
-InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {}
+InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() = default;
InlineSizeEstimatorAnalysis ::InlineSizeEstimatorAnalysis(
InlineSizeEstimatorAnalysis &&) {}
-InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {}
+InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() = default;
InlineSizeEstimatorAnalysis::Result
InlineSizeEstimatorAnalysis::run(const Function &F,
FunctionAnalysisManager &FAM) {
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 4775340b3438..013e4d6489fa 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -20,7 +20,6 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
@@ -36,13 +35,10 @@
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
-#include "llvm/IR/GetElementPtrTypeIterator.h"
-#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
-#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/KnownBits.h"
#include <algorithm>
using namespace llvm;
@@ -52,28 +48,30 @@ using namespace llvm::PatternMatch;
enum { RecursionLimit = 3 };
-STATISTIC(NumExpand, "Number of expansions");
+STATISTIC(NumExpand, "Number of expansions");
STATISTIC(NumReassoc, "Number of reassociations");
-static Value *SimplifyAndInst(Value *, Value *, const SimplifyQuery &, unsigned);
+static Value *simplifyAndInst(Value *, Value *, const SimplifyQuery &,
+ unsigned);
static Value *simplifyUnOp(unsigned, Value *, const SimplifyQuery &, unsigned);
static Value *simplifyFPUnOp(unsigned, Value *, const FastMathFlags &,
const SimplifyQuery &, unsigned);
-static Value *SimplifyBinOp(unsigned, Value *, Value *, const SimplifyQuery &,
+static Value *simplifyBinOp(unsigned, Value *, Value *, const SimplifyQuery &,
unsigned);
-static Value *SimplifyBinOp(unsigned, Value *, Value *, const FastMathFlags &,
+static Value *simplifyBinOp(unsigned, Value *, Value *, const FastMathFlags &,
const SimplifyQuery &, unsigned);
-static Value *SimplifyCmpInst(unsigned, Value *, Value *, const SimplifyQuery &,
+static Value *simplifyCmpInst(unsigned, Value *, Value *, const SimplifyQuery &,
unsigned);
-static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+static Value *simplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
const SimplifyQuery &Q, unsigned MaxRecurse);
-static Value *SimplifyOrInst(Value *, Value *, const SimplifyQuery &, unsigned);
-static Value *SimplifyXorInst(Value *, Value *, const SimplifyQuery &, unsigned);
-static Value *SimplifyCastInst(unsigned, Value *, Type *,
- const SimplifyQuery &, unsigned);
-static Value *SimplifyGEPInst(Type *, Value *, ArrayRef<Value *>, bool,
+static Value *simplifyOrInst(Value *, Value *, const SimplifyQuery &, unsigned);
+static Value *simplifyXorInst(Value *, Value *, const SimplifyQuery &,
+ unsigned);
+static Value *simplifyCastInst(unsigned, Value *, Type *, const SimplifyQuery &,
+ unsigned);
+static Value *simplifyGEPInst(Type *, Value *, ArrayRef<Value *>, bool,
const SimplifyQuery &, unsigned);
-static Value *SimplifySelectInst(Value *, Value *, Value *,
+static Value *simplifySelectInst(Value *, Value *, Value *,
const SimplifyQuery &, unsigned);
static Value *foldSelectWithBinaryOp(Value *Cond, Value *TrueVal,
@@ -120,15 +118,11 @@ static Value *foldSelectWithBinaryOp(Value *Cond, Value *TrueVal,
/// For a boolean type or a vector of boolean type, return false or a vector
/// with every element false.
-static Constant *getFalse(Type *Ty) {
- return ConstantInt::getFalse(Ty);
-}
+static Constant *getFalse(Type *Ty) { return ConstantInt::getFalse(Ty); }
/// For a boolean type or a vector of boolean type, return true or a vector
/// with every element true.
-static Constant *getTrue(Type *Ty) {
- return ConstantInt::getTrue(Ty);
-}
+static Constant *getTrue(Type *Ty) { return ConstantInt::getTrue(Ty); }
/// isSameCompare - Is V equivalent to the comparison "LHS Pred RHS"?
static bool isSameCompare(Value *V, CmpInst::Predicate Pred, Value *LHS,
@@ -141,7 +135,7 @@ static bool isSameCompare(Value *V, CmpInst::Predicate Pred, Value *LHS,
if (CPred == Pred && CLHS == LHS && CRHS == RHS)
return true;
return CPred == CmpInst::getSwappedPredicate(Pred) && CLHS == RHS &&
- CRHS == LHS;
+ CRHS == LHS;
}
/// Simplify comparison with true or false branch of select:
@@ -153,7 +147,7 @@ static Value *simplifyCmpSelCase(CmpInst::Predicate Pred, Value *LHS,
Value *RHS, Value *Cond,
const SimplifyQuery &Q, unsigned MaxRecurse,
Constant *TrueOrFalse) {
- Value *SimplifiedCmp = SimplifyCmpInst(Pred, LHS, RHS, Q, MaxRecurse);
+ Value *SimplifiedCmp = simplifyCmpInst(Pred, LHS, RHS, Q, MaxRecurse);
if (SimplifiedCmp == Cond) {
// %cmp simplified to the select condition (%cond).
return TrueOrFalse;
@@ -196,17 +190,17 @@ static Value *handleOtherCmpSelSimplifications(Value *TCmp, Value *FCmp,
// checks whether folding it does not convert a well-defined value into
// poison.
if (match(FCmp, m_Zero()) && impliesPoison(TCmp, Cond))
- if (Value *V = SimplifyAndInst(Cond, TCmp, Q, MaxRecurse))
+ if (Value *V = simplifyAndInst(Cond, TCmp, Q, MaxRecurse))
return V;
// If the true value simplified to true, then the result of the compare
// is equal to "Cond || FCmp".
if (match(TCmp, m_One()) && impliesPoison(FCmp, Cond))
- if (Value *V = SimplifyOrInst(Cond, FCmp, Q, MaxRecurse))
+ if (Value *V = simplifyOrInst(Cond, FCmp, Q, MaxRecurse))
return V;
// Finally, if the false value simplified to true and the true value to
// false, then the result of the compare is equal to "!Cond".
if (match(FCmp, m_One()) && match(TCmp, m_Zero()))
- if (Value *V = SimplifyXorInst(
+ if (Value *V = simplifyXorInst(
Cond, Constant::getAllOnesValue(Cond->getType()), Q, MaxRecurse))
return V;
return nullptr;
@@ -248,12 +242,12 @@ static Value *expandBinOp(Instruction::BinaryOps Opcode, Value *V,
if (!B || B->getOpcode() != OpcodeToExpand)
return nullptr;
Value *B0 = B->getOperand(0), *B1 = B->getOperand(1);
- Value *L = SimplifyBinOp(Opcode, B0, OtherOp, Q.getWithoutUndef(),
- MaxRecurse);
+ Value *L =
+ simplifyBinOp(Opcode, B0, OtherOp, Q.getWithoutUndef(), MaxRecurse);
if (!L)
return nullptr;
- Value *R = SimplifyBinOp(Opcode, B1, OtherOp, Q.getWithoutUndef(),
- MaxRecurse);
+ Value *R =
+ simplifyBinOp(Opcode, B1, OtherOp, Q.getWithoutUndef(), MaxRecurse);
if (!R)
return nullptr;
@@ -265,7 +259,7 @@ static Value *expandBinOp(Instruction::BinaryOps Opcode, Value *V,
}
// Otherwise, return "L op' R" if it simplifies.
- Value *S = SimplifyBinOp(OpcodeToExpand, L, R, Q, MaxRecurse);
+ Value *S = simplifyBinOp(OpcodeToExpand, L, R, Q, MaxRecurse);
if (!S)
return nullptr;
@@ -275,8 +269,8 @@ static Value *expandBinOp(Instruction::BinaryOps Opcode, Value *V,
/// Try to simplify binops of form "A op (B op' C)" or the commuted variant by
/// distributing op over op'.
-static Value *expandCommutativeBinOp(Instruction::BinaryOps Opcode,
- Value *L, Value *R,
+static Value *expandCommutativeBinOp(Instruction::BinaryOps Opcode, Value *L,
+ Value *R,
Instruction::BinaryOps OpcodeToExpand,
const SimplifyQuery &Q,
unsigned MaxRecurse) {
@@ -293,7 +287,7 @@ static Value *expandCommutativeBinOp(Instruction::BinaryOps Opcode,
/// Generic simplifications for associative binary operations.
/// Returns the simpler value, or null if none was found.
-static Value *SimplifyAssociativeBinOp(Instruction::BinaryOps Opcode,
+static Value *simplifyAssociativeBinOp(Instruction::BinaryOps Opcode,
Value *LHS, Value *RHS,
const SimplifyQuery &Q,
unsigned MaxRecurse) {
@@ -313,12 +307,13 @@ static Value *SimplifyAssociativeBinOp(Instruction::BinaryOps Opcode,
Value *C = RHS;
// Does "B op C" simplify?
- if (Value *V = SimplifyBinOp(Opcode, B, C, Q, MaxRecurse)) {
+ if (Value *V = simplifyBinOp(Opcode, B, C, Q, MaxRecurse)) {
// It does! Return "A op V" if it simplifies or is already available.
// If V equals B then "A op V" is just the LHS.
- if (V == B) return LHS;
+ if (V == B)
+ return LHS;
// Otherwise return "A op V" if it simplifies.
- if (Value *W = SimplifyBinOp(Opcode, A, V, Q, MaxRecurse)) {
+ if (Value *W = simplifyBinOp(Opcode, A, V, Q, MaxRecurse)) {
++NumReassoc;
return W;
}
@@ -332,12 +327,13 @@ static Value *SimplifyAssociativeBinOp(Instruction::BinaryOps Opcode,
Value *C = Op1->getOperand(1);
// Does "A op B" simplify?
- if (Value *V = SimplifyBinOp(Opcode, A, B, Q, MaxRecurse)) {
+ if (Value *V = simplifyBinOp(Opcode, A, B, Q, MaxRecurse)) {
// It does! Return "V op C" if it simplifies or is already available.
// If V equals B then "V op C" is just the RHS.
- if (V == B) return RHS;
+ if (V == B)
+ return RHS;
// Otherwise return "V op C" if it simplifies.
- if (Value *W = SimplifyBinOp(Opcode, V, C, Q, MaxRecurse)) {
+ if (Value *W = simplifyBinOp(Opcode, V, C, Q, MaxRecurse)) {
++NumReassoc;
return W;
}
@@ -355,12 +351,13 @@ static Value *SimplifyAssociativeBinOp(Instruction::BinaryOps Opcode,
Value *C = RHS;
// Does "C op A" simplify?
- if (Value *V = SimplifyBinOp(Opcode, C, A, Q, MaxRecurse)) {
+ if (Value *V = simplifyBinOp(Opcode, C, A, Q, MaxRecurse)) {
// It does! Return "V op B" if it simplifies or is already available.
// If V equals A then "V op B" is just the LHS.
- if (V == A) return LHS;
+ if (V == A)
+ return LHS;
// Otherwise return "V op B" if it simplifies.
- if (Value *W = SimplifyBinOp(Opcode, V, B, Q, MaxRecurse)) {
+ if (Value *W = simplifyBinOp(Opcode, V, B, Q, MaxRecurse)) {
++NumReassoc;
return W;
}
@@ -374,12 +371,13 @@ static Value *SimplifyAssociativeBinOp(Instruction::BinaryOps Opcode,
Value *C = Op1->getOperand(1);
// Does "C op A" simplify?
- if (Value *V = SimplifyBinOp(Opcode, C, A, Q, MaxRecurse)) {
+ if (Value *V = simplifyBinOp(Opcode, C, A, Q, MaxRecurse)) {
// It does! Return "B op V" if it simplifies or is already available.
// If V equals C then "B op V" is just the RHS.
- if (V == C) return RHS;
+ if (V == C)
+ return RHS;
// Otherwise return "B op V" if it simplifies.
- if (Value *W = SimplifyBinOp(Opcode, B, V, Q, MaxRecurse)) {
+ if (Value *W = simplifyBinOp(Opcode, B, V, Q, MaxRecurse)) {
++NumReassoc;
return W;
}
@@ -393,7 +391,7 @@ static Value *SimplifyAssociativeBinOp(Instruction::BinaryOps Opcode,
/// try to simplify the binop by seeing whether evaluating it on both branches
/// of the select results in the same value. Returns the common value if so,
/// otherwise returns null.
-static Value *ThreadBinOpOverSelect(Instruction::BinaryOps Opcode, Value *LHS,
+static Value *threadBinOpOverSelect(Instruction::BinaryOps Opcode, Value *LHS,
Value *RHS, const SimplifyQuery &Q,
unsigned MaxRecurse) {
// Recursion is always used, so bail out at once if we already hit the limit.
@@ -412,11 +410,11 @@ static Value *ThreadBinOpOverSelect(Instruction::BinaryOps Opcode, Value *LHS,
Value *TV;
Value *FV;
if (SI == LHS) {
- TV = SimplifyBinOp(Opcode, SI->getTrueValue(), RHS, Q, MaxRecurse);
- FV = SimplifyBinOp(Opcode, SI->getFalseValue(), RHS, Q, MaxRecurse);
+ TV = simplifyBinOp(Opcode, SI->getTrueValue(), RHS, Q, MaxRecurse);
+ FV = simplifyBinOp(Opcode, SI->getFalseValue(), RHS, Q, MaxRecurse);
} else {
- TV = SimplifyBinOp(Opcode, LHS, SI->getTrueValue(), Q, MaxRecurse);
- FV = SimplifyBinOp(Opcode, LHS, SI->getFalseValue(), Q, MaxRecurse);
+ TV = simplifyBinOp(Opcode, LHS, SI->getTrueValue(), Q, MaxRecurse);
+ FV = simplifyBinOp(Opcode, LHS, SI->getFalseValue(), Q, MaxRecurse);
}
// If they simplified to the same value, then return the common value.
@@ -471,7 +469,7 @@ static Value *ThreadBinOpOverSelect(Instruction::BinaryOps Opcode, Value *LHS,
/// We can simplify %cmp1 to true, because both branches of select are
/// less than 3. We compose new comparison by substituting %tmp with both
/// branches of select and see if it can be simplified.
-static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
+static Value *threadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
Value *RHS, const SimplifyQuery &Q,
unsigned MaxRecurse) {
// Recursion is always used, so bail out at once if we already hit the limit.
@@ -517,7 +515,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
/// try to simplify the binop by seeing whether evaluating it on the incoming
/// phi values yields the same result for every value. If so returns the common
/// value, otherwise returns null.
-static Value *ThreadBinOpOverPHI(Instruction::BinaryOps Opcode, Value *LHS,
+static Value *threadBinOpOverPHI(Instruction::BinaryOps Opcode, Value *LHS,
Value *RHS, const SimplifyQuery &Q,
unsigned MaxRecurse) {
// Recursion is always used, so bail out at once if we already hit the limit.
@@ -542,10 +540,10 @@ static Value *ThreadBinOpOverPHI(Instruction::BinaryOps Opcode, Value *LHS,
Value *CommonValue = nullptr;
for (Value *Incoming : PI->incoming_values()) {
// If the incoming value is the phi node itself, it can safely be skipped.
- if (Incoming == PI) continue;
- Value *V = PI == LHS ?
- SimplifyBinOp(Opcode, Incoming, RHS, Q, MaxRecurse) :
- SimplifyBinOp(Opcode, LHS, Incoming, Q, MaxRecurse);
+ if (Incoming == PI)
+ continue;
+ Value *V = PI == LHS ? simplifyBinOp(Opcode, Incoming, RHS, Q, MaxRecurse)
+ : simplifyBinOp(Opcode, LHS, Incoming, Q, MaxRecurse);
// If the operation failed to simplify, or simplified to a different value
// to previously, then give up.
if (!V || (CommonValue && V != CommonValue))
@@ -560,7 +558,7 @@ static Value *ThreadBinOpOverPHI(Instruction::BinaryOps Opcode, Value *LHS,
/// comparison by seeing whether comparing with all of the incoming phi values
/// yields the same result every time. If so returns the common result,
/// otherwise returns null.
-static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
+static Value *threadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
const SimplifyQuery &Q, unsigned MaxRecurse) {
// Recursion is always used, so bail out at once if we already hit the limit.
if (!MaxRecurse--)
@@ -584,11 +582,12 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
Value *Incoming = PI->getIncomingValue(u);
Instruction *InTI = PI->getIncomingBlock(u)->getTerminator();
// If the incoming value is the phi node itself, it can safely be skipped.
- if (Incoming == PI) continue;
+ if (Incoming == PI)
+ continue;
// Change the context instruction to the "edge" that flows into the phi.
// This is important because that is where incoming is actually "evaluated"
// even though it is used later somewhere else.
- Value *V = SimplifyCmpInst(Pred, Incoming, RHS, Q.getWithInstruction(InTI),
+ Value *V = simplifyCmpInst(Pred, Incoming, RHS, Q.getWithInstruction(InTI),
MaxRecurse);
// If the operation failed to simplify, or simplified to a different value
// to previously, then give up.
@@ -604,8 +603,20 @@ static Constant *foldOrCommuteConstant(Instruction::BinaryOps Opcode,
Value *&Op0, Value *&Op1,
const SimplifyQuery &Q) {
if (auto *CLHS = dyn_cast<Constant>(Op0)) {
- if (auto *CRHS = dyn_cast<Constant>(Op1))
+ if (auto *CRHS = dyn_cast<Constant>(Op1)) {
+ switch (Opcode) {
+ default:
+ break;
+ case Instruction::FAdd:
+ case Instruction::FSub:
+ case Instruction::FMul:
+ case Instruction::FDiv:
+ case Instruction::FRem:
+ if (Q.CxtI != nullptr)
+ return ConstantFoldFPInstOperands(Opcode, CLHS, CRHS, Q.DL, Q.CxtI);
+ }
return ConstantFoldBinaryOpOperands(Opcode, CLHS, CRHS, Q.DL);
+ }
// Canonicalize the constant to the RHS if this is a commutative operation.
if (Instruction::isCommutative(Opcode))
@@ -616,7 +627,7 @@ static Constant *foldOrCommuteConstant(Instruction::BinaryOps Opcode,
/// Given operands for an Add, see if we can fold the result.
/// If not, this returns null.
-static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool IsNSW, bool IsNUW,
+static Value *simplifyAddInst(Value *Op0, Value *Op1, bool IsNSW, bool IsNUW,
const SimplifyQuery &Q, unsigned MaxRecurse) {
if (Constant *C = foldOrCommuteConstant(Instruction::Add, Op0, Op1, Q))
return C;
@@ -647,8 +658,7 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool IsNSW, bool IsNUW,
// X + ~X -> -1 since ~X = -X-1
Type *Ty = Op0->getType();
- if (match(Op0, m_Not(m_Specific(Op1))) ||
- match(Op1, m_Not(m_Specific(Op0))))
+ if (match(Op0, m_Not(m_Specific(Op1))) || match(Op1, m_Not(m_Specific(Op0))))
return Constant::getAllOnesValue(Ty);
// add nsw/nuw (xor Y, signmask), signmask --> Y
@@ -664,12 +674,12 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool IsNSW, bool IsNUW,
/// i1 add -> xor.
if (MaxRecurse && Op0->getType()->isIntOrIntVectorTy(1))
- if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1))
+ if (Value *V = simplifyXorInst(Op0, Op1, Q, MaxRecurse - 1))
return V;
// Try some generic simplifications for associative operations.
- if (Value *V = SimplifyAssociativeBinOp(Instruction::Add, Op0, Op1, Q,
- MaxRecurse))
+ if (Value *V =
+ simplifyAssociativeBinOp(Instruction::Add, Op0, Op1, Q, MaxRecurse))
return V;
// Threading Add over selects and phi nodes is pointless, so don't bother.
@@ -684,45 +694,37 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool IsNSW, bool IsNUW,
return nullptr;
}
-Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool IsNSW, bool IsNUW,
+Value *llvm::simplifyAddInst(Value *Op0, Value *Op1, bool IsNSW, bool IsNUW,
const SimplifyQuery &Query) {
- return ::SimplifyAddInst(Op0, Op1, IsNSW, IsNUW, Query, RecursionLimit);
+ return ::simplifyAddInst(Op0, Op1, IsNSW, IsNUW, Query, RecursionLimit);
}
/// Compute the base pointer and cumulative constant offsets for V.
///
/// This strips all constant offsets off of V, leaving it the base pointer, and
-/// accumulates the total constant offset applied in the returned constant. It
-/// returns 0 if V is not a pointer, and returns the constant '0' if there are
-/// no constant offsets applied.
+/// accumulates the total constant offset applied in the returned constant.
+/// It returns zero if there are no constant offsets applied.
///
-/// This is very similar to GetPointerBaseWithConstantOffset except it doesn't
-/// follow non-inbounds geps. This allows it to remain usable for icmp ult/etc.
-/// folding.
-static Constant *stripAndComputeConstantOffsets(const DataLayout &DL, Value *&V,
- bool AllowNonInbounds = false) {
+/// This is very similar to stripAndAccumulateConstantOffsets(), except it
+/// normalizes the offset bitwidth to the stripped pointer type, not the
+/// original pointer type.
+static APInt stripAndComputeConstantOffsets(const DataLayout &DL, Value *&V,
+ bool AllowNonInbounds = false) {
assert(V->getType()->isPtrOrPtrVectorTy());
APInt Offset = APInt::getZero(DL.getIndexTypeSizeInBits(V->getType()));
-
V = V->stripAndAccumulateConstantOffsets(DL, Offset, AllowNonInbounds);
// As that strip may trace through `addrspacecast`, need to sext or trunc
// the offset calculated.
- Type *IntIdxTy = DL.getIndexType(V->getType())->getScalarType();
- Offset = Offset.sextOrTrunc(IntIdxTy->getIntegerBitWidth());
-
- Constant *OffsetIntPtr = ConstantInt::get(IntIdxTy, Offset);
- if (VectorType *VecTy = dyn_cast<VectorType>(V->getType()))
- return ConstantVector::getSplat(VecTy->getElementCount(), OffsetIntPtr);
- return OffsetIntPtr;
+ return Offset.sextOrTrunc(DL.getIndexTypeSizeInBits(V->getType()));
}
/// Compute the constant difference between two pointer values.
/// If the difference is not a constant, returns zero.
static Constant *computePointerDifference(const DataLayout &DL, Value *LHS,
Value *RHS) {
- Constant *LHSOffset = stripAndComputeConstantOffsets(DL, LHS);
- Constant *RHSOffset = stripAndComputeConstantOffsets(DL, RHS);
+ APInt LHSOffset = stripAndComputeConstantOffsets(DL, LHS);
+ APInt RHSOffset = stripAndComputeConstantOffsets(DL, RHS);
// If LHS and RHS are not related via constant offsets to the same base
// value, there is nothing we can do here.
@@ -733,12 +735,15 @@ static Constant *computePointerDifference(const DataLayout &DL, Value *LHS,
// LHS - RHS
// = (LHSOffset + Base) - (RHSOffset + Base)
// = LHSOffset - RHSOffset
- return ConstantExpr::getSub(LHSOffset, RHSOffset);
+ Constant *Res = ConstantInt::get(LHS->getContext(), LHSOffset - RHSOffset);
+ if (auto *VecTy = dyn_cast<VectorType>(LHS->getType()))
+ Res = ConstantVector::getSplat(VecTy->getElementCount(), Res);
+ return Res;
}
/// Given operands for a Sub, see if we can fold the result.
/// If not, this returns null.
-static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+static Value *simplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
const SimplifyQuery &Q, unsigned MaxRecurse) {
if (Constant *C = foldOrCommuteConstant(Instruction::Sub, Op0, Op1, Q))
return C;
@@ -784,17 +789,17 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
Value *X = nullptr, *Y = nullptr, *Z = Op1;
if (MaxRecurse && match(Op0, m_Add(m_Value(X), m_Value(Y)))) { // (X + Y) - Z
// See if "V === Y - Z" simplifies.
- if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, Q, MaxRecurse-1))
+ if (Value *V = simplifyBinOp(Instruction::Sub, Y, Z, Q, MaxRecurse - 1))
// It does! Now see if "X + V" simplifies.
- if (Value *W = SimplifyBinOp(Instruction::Add, X, V, Q, MaxRecurse-1)) {
+ if (Value *W = simplifyBinOp(Instruction::Add, X, V, Q, MaxRecurse - 1)) {
// It does, we successfully reassociated!
++NumReassoc;
return W;
}
// See if "V === X - Z" simplifies.
- if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, Q, MaxRecurse-1))
+ if (Value *V = simplifyBinOp(Instruction::Sub, X, Z, Q, MaxRecurse - 1))
// It does! Now see if "Y + V" simplifies.
- if (Value *W = SimplifyBinOp(Instruction::Add, Y, V, Q, MaxRecurse-1)) {
+ if (Value *W = simplifyBinOp(Instruction::Add, Y, V, Q, MaxRecurse - 1)) {
// It does, we successfully reassociated!
++NumReassoc;
return W;
@@ -806,17 +811,17 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
X = Op0;
if (MaxRecurse && match(Op1, m_Add(m_Value(Y), m_Value(Z)))) { // X - (Y + Z)
// See if "V === X - Y" simplifies.
- if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, Q, MaxRecurse-1))
+ if (Value *V = simplifyBinOp(Instruction::Sub, X, Y, Q, MaxRecurse - 1))
// It does! Now see if "V - Z" simplifies.
- if (Value *W = SimplifyBinOp(Instruction::Sub, V, Z, Q, MaxRecurse-1)) {
+ if (Value *W = simplifyBinOp(Instruction::Sub, V, Z, Q, MaxRecurse - 1)) {
// It does, we successfully reassociated!
++NumReassoc;
return W;
}
// See if "V === X - Z" simplifies.
- if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, Q, MaxRecurse-1))
+ if (Value *V = simplifyBinOp(Instruction::Sub, X, Z, Q, MaxRecurse - 1))
// It does! Now see if "V - Y" simplifies.
- if (Value *W = SimplifyBinOp(Instruction::Sub, V, Y, Q, MaxRecurse-1)) {
+ if (Value *W = simplifyBinOp(Instruction::Sub, V, Y, Q, MaxRecurse - 1)) {
// It does, we successfully reassociated!
++NumReassoc;
return W;
@@ -828,9 +833,9 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
Z = Op0;
if (MaxRecurse && match(Op1, m_Sub(m_Value(X), m_Value(Y)))) // Z - (X - Y)
// See if "V === Z - X" simplifies.
- if (Value *V = SimplifyBinOp(Instruction::Sub, Z, X, Q, MaxRecurse-1))
+ if (Value *V = simplifyBinOp(Instruction::Sub, Z, X, Q, MaxRecurse - 1))
// It does! Now see if "V + Y" simplifies.
- if (Value *W = SimplifyBinOp(Instruction::Add, V, Y, Q, MaxRecurse-1)) {
+ if (Value *W = simplifyBinOp(Instruction::Add, V, Y, Q, MaxRecurse - 1)) {
// It does, we successfully reassociated!
++NumReassoc;
return W;
@@ -841,22 +846,21 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
match(Op1, m_Trunc(m_Value(Y))))
if (X->getType() == Y->getType())
// See if "V === X - Y" simplifies.
- if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, Q, MaxRecurse-1))
+ if (Value *V = simplifyBinOp(Instruction::Sub, X, Y, Q, MaxRecurse - 1))
// It does! Now see if "trunc V" simplifies.
- if (Value *W = SimplifyCastInst(Instruction::Trunc, V, Op0->getType(),
+ if (Value *W = simplifyCastInst(Instruction::Trunc, V, Op0->getType(),
Q, MaxRecurse - 1))
// It does, return the simplified "trunc V".
return W;
// Variations on GEP(base, I, ...) - GEP(base, i, ...) -> GEP(null, I-i, ...).
- if (match(Op0, m_PtrToInt(m_Value(X))) &&
- match(Op1, m_PtrToInt(m_Value(Y))))
+ if (match(Op0, m_PtrToInt(m_Value(X))) && match(Op1, m_PtrToInt(m_Value(Y))))
if (Constant *Result = computePointerDifference(Q.DL, X, Y))
return ConstantExpr::getIntegerCast(Result, Op0->getType(), true);
// i1 sub -> xor.
if (MaxRecurse && Op0->getType()->isIntOrIntVectorTy(1))
- if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1))
+ if (Value *V = simplifyXorInst(Op0, Op1, Q, MaxRecurse - 1))
return V;
// Threading Sub over selects and phi nodes is pointless, so don't bother.
@@ -871,14 +875,14 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
return nullptr;
}
-Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+Value *llvm::simplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
const SimplifyQuery &Q) {
- return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Q, RecursionLimit);
+ return ::simplifySubInst(Op0, Op1, isNSW, isNUW, Q, RecursionLimit);
}
/// Given operands for a Mul, see if we can fold the result.
/// If not, this returns null.
-static Value *SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
+static Value *simplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
unsigned MaxRecurse) {
if (Constant *C = foldOrCommuteConstant(Instruction::Mul, Op0, Op1, Q))
return C;
@@ -906,12 +910,12 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
// i1 mul -> and.
if (MaxRecurse && Op0->getType()->isIntOrIntVectorTy(1))
- if (Value *V = SimplifyAndInst(Op0, Op1, Q, MaxRecurse-1))
+ if (Value *V = simplifyAndInst(Op0, Op1, Q, MaxRecurse - 1))
return V;
// Try some generic simplifications for associative operations.
- if (Value *V = SimplifyAssociativeBinOp(Instruction::Mul, Op0, Op1, Q,
- MaxRecurse))
+ if (Value *V =
+ simplifyAssociativeBinOp(Instruction::Mul, Op0, Op1, Q, MaxRecurse))
return V;
// Mul distributes over Add. Try some generic simplifications based on this.
@@ -922,22 +926,22 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
// If the operation is with the result of a select instruction, check whether
// operating on either branch of the select always yields the same value.
if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
- if (Value *V = ThreadBinOpOverSelect(Instruction::Mul, Op0, Op1, Q,
- MaxRecurse))
+ if (Value *V =
+ threadBinOpOverSelect(Instruction::Mul, Op0, Op1, Q, MaxRecurse))
return V;
// If the operation is with the result of a phi instruction, check whether
// operating on all incoming values of the phi always yields the same value.
if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
- if (Value *V = ThreadBinOpOverPHI(Instruction::Mul, Op0, Op1, Q,
- MaxRecurse))
+ if (Value *V =
+ threadBinOpOverPHI(Instruction::Mul, Op0, Op1, Q, MaxRecurse))
return V;
return nullptr;
}
-Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
- return ::SimplifyMulInst(Op0, Op1, Q, RecursionLimit);
+Value *llvm::simplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
+ return ::simplifyMulInst(Op0, Op1, Q, RecursionLimit);
}
/// Check for common or similar folds of integer division or integer remainder.
@@ -1026,7 +1030,7 @@ static Value *simplifyDivRem(Instruction::BinaryOps Opcode, Value *Op0,
/// when we can prove a relationship between the operands.
static bool isICmpTrue(ICmpInst::Predicate Pred, Value *LHS, Value *RHS,
const SimplifyQuery &Q, unsigned MaxRecurse) {
- Value *V = SimplifyICmpInst(Pred, LHS, RHS, Q, MaxRecurse);
+ Value *V = simplifyICmpInst(Pred, LHS, RHS, Q, MaxRecurse);
Constant *C = dyn_cast_or_null<Constant>(V);
return (C && C->isAllOnesValue());
}
@@ -1122,13 +1126,13 @@ static Value *simplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
// If the operation is with the result of a select instruction, check whether
// operating on either branch of the select always yields the same value.
if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
- if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse))
+ if (Value *V = threadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse))
return V;
// If the operation is with the result of a phi instruction, check whether
// operating on all incoming values of the phi always yields the same value.
if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
- if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
+ if (Value *V = threadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
return V;
if (isDivZero(Op0, Op1, Q, MaxRecurse, IsSigned))
@@ -1164,13 +1168,13 @@ static Value *simplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
// If the operation is with the result of a select instruction, check whether
// operating on either branch of the select always yields the same value.
if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
- if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse))
+ if (Value *V = threadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse))
return V;
// If the operation is with the result of a phi instruction, check whether
// operating on all incoming values of the phi always yields the same value.
if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
- if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
+ if (Value *V = threadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
return V;
// If X / Y == 0, then X % Y == X.
@@ -1182,7 +1186,7 @@ static Value *simplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
/// Given operands for an SDiv, see if we can fold the result.
/// If not, this returns null.
-static Value *SimplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
+static Value *simplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
unsigned MaxRecurse) {
// If two operands are negated and no signed overflow, return -1.
if (isKnownNegation(Op0, Op1, /*NeedNSW=*/true))
@@ -1191,24 +1195,24 @@ static Value *SimplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
return simplifyDiv(Instruction::SDiv, Op0, Op1, Q, MaxRecurse);
}
-Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
- return ::SimplifySDivInst(Op0, Op1, Q, RecursionLimit);
+Value *llvm::simplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
+ return ::simplifySDivInst(Op0, Op1, Q, RecursionLimit);
}
/// Given operands for a UDiv, see if we can fold the result.
/// If not, this returns null.
-static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
+static Value *simplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
unsigned MaxRecurse) {
return simplifyDiv(Instruction::UDiv, Op0, Op1, Q, MaxRecurse);
}
-Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
- return ::SimplifyUDivInst(Op0, Op1, Q, RecursionLimit);
+Value *llvm::simplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
+ return ::simplifyUDivInst(Op0, Op1, Q, RecursionLimit);
}
/// Given operands for an SRem, see if we can fold the result.
/// If not, this returns null.
-static Value *SimplifySRemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
+static Value *simplifySRemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
unsigned MaxRecurse) {
// If the divisor is 0, the result is undefined, so assume the divisor is -1.
// srem Op0, (sext i1 X) --> srem Op0, -1 --> 0
@@ -1223,19 +1227,19 @@ static Value *SimplifySRemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
return simplifyRem(Instruction::SRem, Op0, Op1, Q, MaxRecurse);
}
-Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
- return ::SimplifySRemInst(Op0, Op1, Q, RecursionLimit);
+Value *llvm::simplifySRemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
+ return ::simplifySRemInst(Op0, Op1, Q, RecursionLimit);
}
/// Given operands for a URem, see if we can fold the result.
/// If not, this returns null.
-static Value *SimplifyURemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
+static Value *simplifyURemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
unsigned MaxRecurse) {
return simplifyRem(Instruction::URem, Op0, Op1, Q, MaxRecurse);
}
-Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
- return ::SimplifyURemInst(Op0, Op1, Q, RecursionLimit);
+Value *llvm::simplifyURemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
+ return ::simplifyURemInst(Op0, Op1, Q, RecursionLimit);
}
/// Returns true if a shift by \c Amount always yields poison.
@@ -1268,7 +1272,7 @@ static bool isPoisonShift(Value *Amount, const SimplifyQuery &Q) {
/// Given operands for an Shl, LShr or AShr, see if we can fold the result.
/// If not, this returns null.
-static Value *SimplifyShift(Instruction::BinaryOps Opcode, Value *Op0,
+static Value *simplifyShift(Instruction::BinaryOps Opcode, Value *Op0,
Value *Op1, bool IsNSW, const SimplifyQuery &Q,
unsigned MaxRecurse) {
if (Constant *C = foldOrCommuteConstant(Opcode, Op0, Op1, Q))
@@ -1297,13 +1301,13 @@ static Value *SimplifyShift(Instruction::BinaryOps Opcode, Value *Op0,
// If the operation is with the result of a select instruction, check whether
// operating on either branch of the select always yields the same value.
if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
- if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse))
+ if (Value *V = threadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse))
return V;
// If the operation is with the result of a phi instruction, check whether
// operating on all incoming values of the phi always yields the same value.
if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
- if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
+ if (Value *V = threadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
return V;
// If any bits in the shift amount make that value greater than or equal to
@@ -1338,11 +1342,11 @@ static Value *SimplifyShift(Instruction::BinaryOps Opcode, Value *Op0,
/// Given operands for an Shl, LShr or AShr, see if we can
/// fold the result. If not, this returns null.
-static Value *SimplifyRightShift(Instruction::BinaryOps Opcode, Value *Op0,
- Value *Op1, bool isExact, const SimplifyQuery &Q,
- unsigned MaxRecurse) {
+static Value *simplifyRightShift(Instruction::BinaryOps Opcode, Value *Op0,
+ Value *Op1, bool isExact,
+ const SimplifyQuery &Q, unsigned MaxRecurse) {
if (Value *V =
- SimplifyShift(Opcode, Op0, Op1, /*IsNSW*/ false, Q, MaxRecurse))
+ simplifyShift(Opcode, Op0, Op1, /*IsNSW*/ false, Q, MaxRecurse))
return V;
// X >> X -> 0
@@ -1356,7 +1360,8 @@ static Value *SimplifyRightShift(Instruction::BinaryOps Opcode, Value *Op0,
// The low bit cannot be shifted out of an exact shift if it is set.
if (isExact) {
- KnownBits Op0Known = computeKnownBits(Op0, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT);
+ KnownBits Op0Known =
+ computeKnownBits(Op0, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT);
if (Op0Known.One[0])
return Op0;
}
@@ -1366,10 +1371,10 @@ static Value *SimplifyRightShift(Instruction::BinaryOps Opcode, Value *Op0,
/// Given operands for an Shl, see if we can fold the result.
/// If not, this returns null.
-static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+static Value *simplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
const SimplifyQuery &Q, unsigned MaxRecurse) {
if (Value *V =
- SimplifyShift(Instruction::Shl, Op0, Op1, isNSW, Q, MaxRecurse))
+ simplifyShift(Instruction::Shl, Op0, Op1, isNSW, Q, MaxRecurse))
return V;
// undef << X -> 0
@@ -1392,18 +1397,18 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
return nullptr;
}
-Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+Value *llvm::simplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
const SimplifyQuery &Q) {
- return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Q, RecursionLimit);
+ return ::simplifyShlInst(Op0, Op1, isNSW, isNUW, Q, RecursionLimit);
}
/// Given operands for an LShr, see if we can fold the result.
/// If not, this returns null.
-static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
+static Value *simplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
const SimplifyQuery &Q, unsigned MaxRecurse) {
- if (Value *V = SimplifyRightShift(Instruction::LShr, Op0, Op1, isExact, Q,
+ if (Value *V = simplifyRightShift(Instruction::LShr, Op0, Op1, isExact, Q,
MaxRecurse))
- return V;
+ return V;
// (X << A) >> A -> X
Value *X;
@@ -1429,16 +1434,16 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
return nullptr;
}
-Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
+Value *llvm::simplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
const SimplifyQuery &Q) {
- return ::SimplifyLShrInst(Op0, Op1, isExact, Q, RecursionLimit);
+ return ::simplifyLShrInst(Op0, Op1, isExact, Q, RecursionLimit);
}
/// Given operands for an AShr, see if we can fold the result.
/// If not, this returns null.
-static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
+static Value *simplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
const SimplifyQuery &Q, unsigned MaxRecurse) {
- if (Value *V = SimplifyRightShift(Instruction::AShr, Op0, Op1, isExact, Q,
+ if (Value *V = simplifyRightShift(Instruction::AShr, Op0, Op1, isExact, Q,
MaxRecurse))
return V;
@@ -1462,9 +1467,9 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
return nullptr;
}
-Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
+Value *llvm::simplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
const SimplifyQuery &Q) {
- return ::SimplifyAShrInst(Op0, Op1, isExact, Q, RecursionLimit);
+ return ::simplifyAShrInst(Op0, Op1, isExact, Q, RecursionLimit);
}
/// Commuted variants are assumed to be handled by calling this function again
@@ -1581,7 +1586,7 @@ static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp,
/// with the parameters swapped.
static Value *simplifyAndOfICmpsWithSameOperands(ICmpInst *Op0, ICmpInst *Op1) {
ICmpInst::Predicate Pred0, Pred1;
- Value *A ,*B;
+ Value *A, *B;
if (!match(Op0, m_ICmp(Pred0, m_Value(A), m_Value(B))) ||
!match(Op1, m_ICmp(Pred1, m_Specific(A), m_Specific(B))))
return nullptr;
@@ -1606,7 +1611,7 @@ static Value *simplifyAndOfICmpsWithSameOperands(ICmpInst *Op0, ICmpInst *Op1) {
/// with the parameters swapped.
static Value *simplifyOrOfICmpsWithSameOperands(ICmpInst *Op0, ICmpInst *Op1) {
ICmpInst::Predicate Pred0, Pred1;
- Value *A ,*B;
+ Value *A, *B;
if (!match(Op0, m_ICmp(Pred0, m_Value(A), m_Value(B))) ||
!match(Op1, m_ICmp(Pred1, m_Specific(A), m_Specific(B))))
return nullptr;
@@ -1812,6 +1817,27 @@ static Value *simplifyAndOrOfICmpsWithLimitConst(ICmpInst *Cmp0, ICmpInst *Cmp1,
return nullptr;
}
+/// Try to simplify and/or of icmp with ctpop intrinsic.
+static Value *simplifyAndOrOfICmpsWithCtpop(ICmpInst *Cmp0, ICmpInst *Cmp1,
+ bool IsAnd) {
+ ICmpInst::Predicate Pred0, Pred1;
+ Value *X;
+ const APInt *C;
+ if (!match(Cmp0, m_ICmp(Pred0, m_Intrinsic<Intrinsic::ctpop>(m_Value(X)),
+ m_APInt(C))) ||
+ !match(Cmp1, m_ICmp(Pred1, m_Specific(X), m_ZeroInt())) || C->isZero())
+ return nullptr;
+
+ // (ctpop(X) == C) || (X != 0) --> X != 0 where C > 0
+ if (!IsAnd && Pred0 == ICmpInst::ICMP_EQ && Pred1 == ICmpInst::ICMP_NE)
+ return Cmp1;
+ // (ctpop(X) != C) && (X == 0) --> X == 0 where C > 0
+ if (IsAnd && Pred0 == ICmpInst::ICMP_NE && Pred1 == ICmpInst::ICMP_EQ)
+ return Cmp1;
+
+ return nullptr;
+}
+
static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1,
const SimplifyQuery &Q) {
if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true, Q))
@@ -1833,6 +1859,11 @@ static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1,
if (Value *X = simplifyAndOrOfICmpsWithZero(Op0, Op1, true))
return X;
+ if (Value *X = simplifyAndOrOfICmpsWithCtpop(Op0, Op1, true))
+ return X;
+ if (Value *X = simplifyAndOrOfICmpsWithCtpop(Op1, Op0, true))
+ return X;
+
if (Value *X = simplifyAndOfICmpsWithAdd(Op0, Op1, Q.IIQ))
return X;
if (Value *X = simplifyAndOfICmpsWithAdd(Op1, Op0, Q.IIQ))
@@ -1909,6 +1940,11 @@ static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1,
if (Value *X = simplifyAndOrOfICmpsWithZero(Op0, Op1, false))
return X;
+ if (Value *X = simplifyAndOrOfICmpsWithCtpop(Op0, Op1, false))
+ return X;
+ if (Value *X = simplifyAndOrOfICmpsWithCtpop(Op1, Op0, false))
+ return X;
+
if (Value *X = simplifyOrOfICmpsWithAdd(Op0, Op1, Q.IIQ))
return X;
if (Value *X = simplifyOrOfICmpsWithAdd(Op1, Op0, Q.IIQ))
@@ -1917,8 +1953,8 @@ static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1,
return nullptr;
}
-static Value *simplifyAndOrOfFCmps(const TargetLibraryInfo *TLI,
- FCmpInst *LHS, FCmpInst *RHS, bool IsAnd) {
+static Value *simplifyAndOrOfFCmps(const TargetLibraryInfo *TLI, FCmpInst *LHS,
+ FCmpInst *RHS, bool IsAnd) {
Value *LHS0 = LHS->getOperand(0), *LHS1 = LHS->getOperand(1);
Value *RHS0 = RHS->getOperand(0), *RHS1 = RHS->getOperand(1);
if (LHS0->getType() != RHS0->getType())
@@ -1955,8 +1991,8 @@ static Value *simplifyAndOrOfFCmps(const TargetLibraryInfo *TLI,
return nullptr;
}
-static Value *simplifyAndOrOfCmps(const SimplifyQuery &Q,
- Value *Op0, Value *Op1, bool IsAnd) {
+static Value *simplifyAndOrOfCmps(const SimplifyQuery &Q, Value *Op0,
+ Value *Op1, bool IsAnd) {
// Look through casts of the 'and' operands to find compares.
auto *Cast0 = dyn_cast<CastInst>(Op0);
auto *Cast1 = dyn_cast<CastInst>(Op1);
@@ -2017,7 +2053,7 @@ static Value *simplifyLogicOfAddSub(Value *Op0, Value *Op1,
/// Given operands for an And, see if we can fold the result.
/// If not, this returns null.
-static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
+static Value *simplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
unsigned MaxRecurse) {
if (Constant *C = foldOrCommuteConstant(Instruction::And, Op0, Op1, Q))
return C;
@@ -2043,8 +2079,7 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
return Op0;
// A & ~A = ~A & A = 0
- if (match(Op0, m_Not(m_Specific(Op1))) ||
- match(Op1, m_Not(m_Specific(Op0))))
+ if (match(Op0, m_Not(m_Specific(Op1))) || match(Op1, m_Not(m_Specific(Op0))))
return Constant::getNullValue(Op0->getType());
// (A | ?) & A = A
@@ -2117,8 +2152,8 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
return V;
// Try some generic simplifications for associative operations.
- if (Value *V = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, Q,
- MaxRecurse))
+ if (Value *V =
+ simplifyAssociativeBinOp(Instruction::And, Op0, Op1, Q, MaxRecurse))
return V;
// And distributes over Or. Try some generic simplifications based on this.
@@ -2142,16 +2177,16 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
// If the operation is with the result of a select instruction, check
// whether operating on either branch of the select always yields the same
// value.
- if (Value *V = ThreadBinOpOverSelect(Instruction::And, Op0, Op1, Q,
- MaxRecurse))
+ if (Value *V =
+ threadBinOpOverSelect(Instruction::And, Op0, Op1, Q, MaxRecurse))
return V;
}
// If the operation is with the result of a phi instruction, check whether
// operating on all incoming values of the phi always yields the same value.
if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
- if (Value *V = ThreadBinOpOverPHI(Instruction::And, Op0, Op1, Q,
- MaxRecurse))
+ if (Value *V =
+ threadBinOpOverPHI(Instruction::And, Op0, Op1, Q, MaxRecurse))
return V;
// Assuming the effective width of Y is not larger than A, i.e. all bits
@@ -2174,8 +2209,7 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
const KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
const unsigned EffWidthY = YKnown.countMaxActiveBits();
if (EffWidthY <= ShftCnt) {
- const KnownBits XKnown = computeKnownBits(X, Q.DL, 0, Q.AC, Q.CxtI,
- Q.DT);
+ const KnownBits XKnown = computeKnownBits(X, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
const unsigned EffWidthX = XKnown.countMaxActiveBits();
const APInt EffBitsY = APInt::getLowBitsSet(Width, EffWidthY);
const APInt EffBitsX = APInt::getLowBitsSet(Width, EffWidthX) << ShftCnt;
@@ -2197,11 +2231,20 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
match(Op1, m_c_Xor(m_Specific(Or), m_Specific(Y))))
return Constant::getNullValue(Op0->getType());
+ if (Op0->getType()->isIntOrIntVectorTy(1)) {
+ // Op0&Op1 -> Op0 where Op0 implies Op1
+ if (isImpliedCondition(Op0, Op1, Q.DL).value_or(false))
+ return Op0;
+ // Op0&Op1 -> Op1 where Op1 implies Op0
+ if (isImpliedCondition(Op1, Op0, Q.DL).value_or(false))
+ return Op1;
+ }
+
return nullptr;
}
-Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
- return ::SimplifyAndInst(Op0, Op1, Q, RecursionLimit);
+Value *llvm::simplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
+ return ::simplifyAndInst(Op0, Op1, Q, RecursionLimit);
}
static Value *simplifyOrLogic(Value *X, Value *Y) {
@@ -2289,7 +2332,7 @@ static Value *simplifyOrLogic(Value *X, Value *Y) {
/// Given operands for an Or, see if we can fold the result.
/// If not, this returns null.
-static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
+static Value *simplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
unsigned MaxRecurse) {
if (Constant *C = foldOrCommuteConstant(Instruction::Or, Op0, Op1, Q))
return C;
@@ -2334,6 +2377,31 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
}
}
+ // A funnel shift (rotate) can be decomposed into simpler shifts. See if we
+ // are mixing in another shift that is redundant with the funnel shift.
+
+ // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y
+ // (shl X, Y) | (fshl X, ?, Y) --> fshl X, ?, Y
+ if (match(Op0,
+ m_Intrinsic<Intrinsic::fshl>(m_Value(X), m_Value(), m_Value(Y))) &&
+ match(Op1, m_Shl(m_Specific(X), m_Specific(Y))))
+ return Op0;
+ if (match(Op1,
+ m_Intrinsic<Intrinsic::fshl>(m_Value(X), m_Value(), m_Value(Y))) &&
+ match(Op0, m_Shl(m_Specific(X), m_Specific(Y))))
+ return Op1;
+
+ // (fshr ?, X, Y) | (lshr X, Y) --> fshr ?, X, Y
+ // (lshr X, Y) | (fshr ?, X, Y) --> fshr ?, X, Y
+ if (match(Op0,
+ m_Intrinsic<Intrinsic::fshr>(m_Value(), m_Value(X), m_Value(Y))) &&
+ match(Op1, m_LShr(m_Specific(X), m_Specific(Y))))
+ return Op0;
+ if (match(Op1,
+ m_Intrinsic<Intrinsic::fshr>(m_Value(), m_Value(X), m_Value(Y))) &&
+ match(Op0, m_LShr(m_Specific(X), m_Specific(Y))))
+ return Op1;
+
if (Value *V = simplifyAndOrOfCmps(Q, Op0, Op1, false))
return V;
@@ -2346,8 +2414,8 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
return Op0;
// Try some generic simplifications for associative operations.
- if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, Q,
- MaxRecurse))
+ if (Value *V =
+ simplifyAssociativeBinOp(Instruction::Or, Op0, Op1, Q, MaxRecurse))
return V;
// Or distributes over And. Try some generic simplifications based on this.
@@ -2366,8 +2434,8 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
// If the operation is with the result of a select instruction, check
// whether operating on either branch of the select always yields the same
// value.
- if (Value *V = ThreadBinOpOverSelect(Instruction::Or, Op0, Op1, Q,
- MaxRecurse))
+ if (Value *V =
+ threadBinOpOverSelect(Instruction::Or, Op0, Op1, Q, MaxRecurse))
return V;
}
@@ -2389,8 +2457,7 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
return A;
}
// Or commutes, try both ways.
- if (C1->isMask() &&
- match(B, m_c_Add(m_Specific(A), m_Value(N)))) {
+ if (C1->isMask() && match(B, m_c_Add(m_Specific(A), m_Value(N)))) {
// Add commutes, try both ways.
if (MaskedValueIsZero(N, *C1, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
return B;
@@ -2401,19 +2468,28 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
// If the operation is with the result of a phi instruction, check whether
// operating on all incoming values of the phi always yields the same value.
if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
- if (Value *V = ThreadBinOpOverPHI(Instruction::Or, Op0, Op1, Q, MaxRecurse))
+ if (Value *V = threadBinOpOverPHI(Instruction::Or, Op0, Op1, Q, MaxRecurse))
return V;
+ if (Op0->getType()->isIntOrIntVectorTy(1)) {
+ // Op0|Op1 -> Op1 where Op0 implies Op1
+ if (isImpliedCondition(Op0, Op1, Q.DL).value_or(false))
+ return Op1;
+ // Op0|Op1 -> Op0 where Op1 implies Op0
+ if (isImpliedCondition(Op1, Op0, Q.DL).value_or(false))
+ return Op0;
+ }
+
return nullptr;
}
-Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
- return ::SimplifyOrInst(Op0, Op1, Q, RecursionLimit);
+Value *llvm::simplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
+ return ::simplifyOrInst(Op0, Op1, Q, RecursionLimit);
}
/// Given operands for a Xor, see if we can fold the result.
/// If not, this returns null.
-static Value *SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
+static Value *simplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
unsigned MaxRecurse) {
if (Constant *C = foldOrCommuteConstant(Instruction::Xor, Op0, Op1, Q))
return C;
@@ -2435,8 +2511,7 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
return Constant::getNullValue(Op0->getType());
// A ^ ~A = ~A ^ A = -1
- if (match(Op0, m_Not(m_Specific(Op1))) ||
- match(Op1, m_Not(m_Specific(Op0))))
+ if (match(Op0, m_Not(m_Specific(Op1))) || match(Op1, m_Not(m_Specific(Op0))))
return Constant::getAllOnesValue(Op0->getType());
auto foldAndOrNot = [](Value *X, Value *Y) -> Value * {
@@ -2467,8 +2542,8 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
return V;
// Try some generic simplifications for associative operations.
- if (Value *V = SimplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, Q,
- MaxRecurse))
+ if (Value *V =
+ simplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, Q, MaxRecurse))
return V;
// Threading Xor over selects and phi nodes is pointless, so don't bother.
@@ -2483,19 +2558,18 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
return nullptr;
}
-Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
- return ::SimplifyXorInst(Op0, Op1, Q, RecursionLimit);
+Value *llvm::simplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
+ return ::simplifyXorInst(Op0, Op1, Q, RecursionLimit);
}
-
-static Type *GetCompareTy(Value *Op) {
+static Type *getCompareTy(Value *Op) {
return CmpInst::makeCmpResultType(Op->getType());
}
/// Rummage around inside V looking for something equivalent to the comparison
/// "LHS Pred RHS". Return such a value if found, otherwise return null.
/// Helper function for analyzing max/min idioms.
-static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred,
+static Value *extractEquivalentCondition(Value *V, CmpInst::Predicate Pred,
Value *LHS, Value *RHS) {
SelectInst *SI = dyn_cast<SelectInst>(V);
if (!SI)
@@ -2512,6 +2586,70 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred,
return nullptr;
}
+/// Return true if the underlying object (storage) must be disjoint from
+/// storage returned by any noalias return call.
+static bool isAllocDisjoint(const Value *V) {
+ // For allocas, we consider only static ones (dynamic
+ // allocas might be transformed into calls to malloc not simultaneously
+ // live with the compared-to allocation). For globals, we exclude symbols
+ // that might be resolve lazily to symbols in another dynamically-loaded
+ // library (and, thus, could be malloc'ed by the implementation).
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(V))
+ return AI->getParent() && AI->getFunction() && AI->isStaticAlloca();
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
+ return (GV->hasLocalLinkage() || GV->hasHiddenVisibility() ||
+ GV->hasProtectedVisibility() || GV->hasGlobalUnnamedAddr()) &&
+ !GV->isThreadLocal();
+ if (const Argument *A = dyn_cast<Argument>(V))
+ return A->hasByValAttr();
+ return false;
+}
+
+/// Return true if V1 and V2 are each the base of some distict storage region
+/// [V, object_size(V)] which do not overlap. Note that zero sized regions
+/// *are* possible, and that zero sized regions do not overlap with any other.
+static bool haveNonOverlappingStorage(const Value *V1, const Value *V2) {
+ // Global variables always exist, so they always exist during the lifetime
+ // of each other and all allocas. Global variables themselves usually have
+ // non-overlapping storage, but since their addresses are constants, the
+ // case involving two globals does not reach here and is instead handled in
+ // constant folding.
+ //
+ // Two different allocas usually have different addresses...
+ //
+ // However, if there's an @llvm.stackrestore dynamically in between two
+ // allocas, they may have the same address. It's tempting to reduce the
+ // scope of the problem by only looking at *static* allocas here. That would
+ // cover the majority of allocas while significantly reducing the likelihood
+ // of having an @llvm.stackrestore pop up in the middle. However, it's not
+ // actually impossible for an @llvm.stackrestore to pop up in the middle of
+ // an entry block. Also, if we have a block that's not attached to a
+ // function, we can't tell if it's "static" under the current definition.
+ // Theoretically, this problem could be fixed by creating a new kind of
+ // instruction kind specifically for static allocas. Such a new instruction
+ // could be required to be at the top of the entry block, thus preventing it
+ // from being subject to a @llvm.stackrestore. Instcombine could even
+ // convert regular allocas into these special allocas. It'd be nifty.
+ // However, until then, this problem remains open.
+ //
+ // So, we'll assume that two non-empty allocas have different addresses
+ // for now.
+ auto isByValArg = [](const Value *V) {
+ const Argument *A = dyn_cast<Argument>(V);
+ return A && A->hasByValAttr();
+ };
+
+ // Byval args are backed by store which does not overlap with each other,
+ // allocas, or globals.
+ if (isByValArg(V1))
+ return isa<AllocaInst>(V2) || isa<GlobalVariable>(V2) || isByValArg(V2);
+ if (isByValArg(V2))
+ return isa<AllocaInst>(V1) || isa<GlobalVariable>(V1) || isByValArg(V1);
+
+ return isa<AllocaInst>(V1) &&
+ (isa<AllocaInst>(V2) || isa<GlobalVariable>(V2));
+}
+
// A significant optimization not implemented here is assuming that alloca
// addresses are not equal to incoming argument values. They don't *alias*,
// as we say, but that doesn't mean they aren't equal, so we take a
@@ -2540,9 +2678,8 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred,
// If the C and C++ standards are ever made sufficiently restrictive in this
// area, it may be possible to update LLVM's semantics accordingly and reinstate
// this optimization.
-static Constant *
-computePointerICmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
- const SimplifyQuery &Q) {
+static Constant *computePointerICmp(CmpInst::Predicate Pred, Value *LHS,
+ Value *RHS, const SimplifyQuery &Q) {
const DataLayout &DL = Q.DL;
const TargetLibraryInfo *TLI = Q.TLI;
const DominatorTree *DT = Q.DT;
@@ -2557,8 +2694,7 @@ computePointerICmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
if (isa<ConstantPointerNull>(RHS) && ICmpInst::isEquality(Pred) &&
llvm::isKnownNonZero(LHS, DL, 0, nullptr, nullptr, nullptr,
IIQ.UseInstrInfo))
- return ConstantInt::get(GetCompareTy(LHS),
- !CmpInst::isTrueWhenEqual(Pred));
+ return ConstantInt::get(getCompareTy(LHS), !CmpInst::isTrueWhenEqual(Pred));
// We can only fold certain predicates on pointer comparisons.
switch (Pred) {
@@ -2588,88 +2724,47 @@ computePointerICmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
// numerous hazards. AliasAnalysis and its utilities rely on special rules
// governing loads and stores which don't apply to icmps. Also, AliasAnalysis
// doesn't need to guarantee pointer inequality when it says NoAlias.
- Constant *LHSOffset = stripAndComputeConstantOffsets(DL, LHS);
- Constant *RHSOffset = stripAndComputeConstantOffsets(DL, RHS);
+
+ // Even if an non-inbounds GEP occurs along the path we can still optimize
+ // equality comparisons concerning the result.
+ bool AllowNonInbounds = ICmpInst::isEquality(Pred);
+ APInt LHSOffset = stripAndComputeConstantOffsets(DL, LHS, AllowNonInbounds);
+ APInt RHSOffset = stripAndComputeConstantOffsets(DL, RHS, AllowNonInbounds);
// If LHS and RHS are related via constant offsets to the same base
// value, we can replace it with an icmp which just compares the offsets.
if (LHS == RHS)
- return ConstantExpr::getICmp(Pred, LHSOffset, RHSOffset);
+ return ConstantInt::get(getCompareTy(LHS),
+ ICmpInst::compare(LHSOffset, RHSOffset, Pred));
// Various optimizations for (in)equality comparisons.
if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) {
// Different non-empty allocations that exist at the same time have
- // different addresses (if the program can tell). Global variables always
- // exist, so they always exist during the lifetime of each other and all
- // allocas. Two different allocas usually have different addresses...
- //
- // However, if there's an @llvm.stackrestore dynamically in between two
- // allocas, they may have the same address. It's tempting to reduce the
- // scope of the problem by only looking at *static* allocas here. That would
- // cover the majority of allocas while significantly reducing the likelihood
- // of having an @llvm.stackrestore pop up in the middle. However, it's not
- // actually impossible for an @llvm.stackrestore to pop up in the middle of
- // an entry block. Also, if we have a block that's not attached to a
- // function, we can't tell if it's "static" under the current definition.
- // Theoretically, this problem could be fixed by creating a new kind of
- // instruction kind specifically for static allocas. Such a new instruction
- // could be required to be at the top of the entry block, thus preventing it
- // from being subject to a @llvm.stackrestore. Instcombine could even
- // convert regular allocas into these special allocas. It'd be nifty.
- // However, until then, this problem remains open.
- //
- // So, we'll assume that two non-empty allocas have different addresses
- // for now.
- //
- // With all that, if the offsets are within the bounds of their allocations
- // (and not one-past-the-end! so we can't use inbounds!), and their
- // allocations aren't the same, the pointers are not equal.
- //
- // Note that it's not necessary to check for LHS being a global variable
- // address, due to canonicalization and constant folding.
- if (isa<AllocaInst>(LHS) &&
- (isa<AllocaInst>(RHS) || isa<GlobalVariable>(RHS))) {
- ConstantInt *LHSOffsetCI = dyn_cast<ConstantInt>(LHSOffset);
- ConstantInt *RHSOffsetCI = dyn_cast<ConstantInt>(RHSOffset);
+ // different addresses (if the program can tell). If the offsets are
+ // within the bounds of their allocations (and not one-past-the-end!
+ // so we can't use inbounds!), and their allocations aren't the same,
+ // the pointers are not equal.
+ if (haveNonOverlappingStorage(LHS, RHS)) {
uint64_t LHSSize, RHSSize;
ObjectSizeOpts Opts;
- Opts.NullIsUnknownSize =
- NullPointerIsDefined(cast<AllocaInst>(LHS)->getFunction());
- if (LHSOffsetCI && RHSOffsetCI &&
- getObjectSize(LHS, LHSSize, DL, TLI, Opts) &&
- getObjectSize(RHS, RHSSize, DL, TLI, Opts)) {
- const APInt &LHSOffsetValue = LHSOffsetCI->getValue();
- const APInt &RHSOffsetValue = RHSOffsetCI->getValue();
- if (!LHSOffsetValue.isNegative() &&
- !RHSOffsetValue.isNegative() &&
- LHSOffsetValue.ult(LHSSize) &&
- RHSOffsetValue.ult(RHSSize)) {
- return ConstantInt::get(GetCompareTy(LHS),
- !CmpInst::isTrueWhenEqual(Pred));
- }
- }
-
- // Repeat the above check but this time without depending on DataLayout
- // or being able to compute a precise size.
- if (!cast<PointerType>(LHS->getType())->isEmptyTy() &&
- !cast<PointerType>(RHS->getType())->isEmptyTy() &&
- LHSOffset->isNullValue() &&
- RHSOffset->isNullValue())
- return ConstantInt::get(GetCompareTy(LHS),
+ Opts.EvalMode = ObjectSizeOpts::Mode::Min;
+ auto *F = [](Value *V) -> Function * {
+ if (auto *I = dyn_cast<Instruction>(V))
+ return I->getFunction();
+ if (auto *A = dyn_cast<Argument>(V))
+ return A->getParent();
+ return nullptr;
+ }(LHS);
+ Opts.NullIsUnknownSize = F ? NullPointerIsDefined(F) : true;
+ if (getObjectSize(LHS, LHSSize, DL, TLI, Opts) &&
+ getObjectSize(RHS, RHSSize, DL, TLI, Opts) &&
+ !LHSOffset.isNegative() && !RHSOffset.isNegative() &&
+ LHSOffset.ult(LHSSize) && RHSOffset.ult(RHSSize)) {
+ return ConstantInt::get(getCompareTy(LHS),
!CmpInst::isTrueWhenEqual(Pred));
+ }
}
- // Even if an non-inbounds GEP occurs along the path we can still optimize
- // equality comparisons concerning the result. We avoid walking the whole
- // chain again by starting where the last calls to
- // stripAndComputeConstantOffsets left off and accumulate the offsets.
- Constant *LHSNoBound = stripAndComputeConstantOffsets(DL, LHS, true);
- Constant *RHSNoBound = stripAndComputeConstantOffsets(DL, RHS, true);
- if (LHS == RHS)
- return ConstantExpr::getICmp(Pred,
- ConstantExpr::getAdd(LHSOffset, LHSNoBound),
- ConstantExpr::getAdd(RHSOffset, RHSNoBound));
-
// If one side of the equality comparison must come from a noalias call
// (meaning a system memory allocation function), and the other side must
// come from a pointer that cannot overlap with dynamically-allocated
@@ -2685,29 +2780,16 @@ computePointerICmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
};
// Is the set of underlying objects all things which must be disjoint from
- // noalias calls. For allocas, we consider only static ones (dynamic
- // allocas might be transformed into calls to malloc not simultaneously
- // live with the compared-to allocation). For globals, we exclude symbols
- // that might be resolve lazily to symbols in another dynamically-loaded
- // library (and, thus, could be malloc'ed by the implementation).
+ // noalias calls. We assume that indexing from such disjoint storage
+ // into the heap is undefined, and thus offsets can be safely ignored.
auto IsAllocDisjoint = [](ArrayRef<const Value *> Objects) {
- return all_of(Objects, [](const Value *V) {
- if (const AllocaInst *AI = dyn_cast<AllocaInst>(V))
- return AI->getParent() && AI->getFunction() && AI->isStaticAlloca();
- if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
- return (GV->hasLocalLinkage() || GV->hasHiddenVisibility() ||
- GV->hasProtectedVisibility() || GV->hasGlobalUnnamedAddr()) &&
- !GV->isThreadLocal();
- if (const Argument *A = dyn_cast<Argument>(V))
- return A->hasByValAttr();
- return false;
- });
+ return all_of(Objects, ::isAllocDisjoint);
};
if ((IsNAC(LHSUObjs) && IsAllocDisjoint(RHSUObjs)) ||
(IsNAC(RHSUObjs) && IsAllocDisjoint(LHSUObjs)))
- return ConstantInt::get(GetCompareTy(LHS),
- !CmpInst::isTrueWhenEqual(Pred));
+ return ConstantInt::get(getCompareTy(LHS),
+ !CmpInst::isTrueWhenEqual(Pred));
// Fold comparisons for non-escaping pointer even if the allocation call
// cannot be elided. We cannot fold malloc comparison to null. Also, the
@@ -2724,7 +2806,7 @@ computePointerICmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
// FIXME: We should also fold the compare when the pointer escapes, but the
// compare dominates the pointer escape
if (MI && !PointerMayBeCaptured(MI, true, true))
- return ConstantInt::get(GetCompareTy(LHS),
+ return ConstantInt::get(getCompareTy(LHS),
CmpInst::isFalseWhenEqual(Pred));
}
@@ -2735,7 +2817,7 @@ computePointerICmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
/// Fold an icmp when its operands have i1 scalar type.
static Value *simplifyICmpOfBools(CmpInst::Predicate Pred, Value *LHS,
Value *RHS, const SimplifyQuery &Q) {
- Type *ITy = GetCompareTy(LHS); // The return type.
+ Type *ITy = getCompareTy(LHS); // The return type.
Type *OpTy = LHS->getType(); // The operand type.
if (!OpTy->isIntOrIntVectorTy(1))
return nullptr;
@@ -2773,7 +2855,8 @@ static Value *simplifyICmpOfBools(CmpInst::Predicate Pred, Value *LHS,
case CmpInst::ICMP_SLE: // X <=s 0 -> true
return getTrue(ITy);
- default: break;
+ default:
+ break;
}
} else if (match(RHS, m_One())) {
switch (Pred) {
@@ -2797,7 +2880,8 @@ static Value *simplifyICmpOfBools(CmpInst::Predicate Pred, Value *LHS,
case CmpInst::ICMP_SGE: // X >=s -1 -> true
return getTrue(ITy);
- default: break;
+ default:
+ break;
}
}
@@ -2805,7 +2889,7 @@ static Value *simplifyICmpOfBools(CmpInst::Predicate Pred, Value *LHS,
default:
break;
case ICmpInst::ICMP_UGE:
- if (isImpliedCondition(RHS, LHS, Q.DL).getValueOr(false))
+ if (isImpliedCondition(RHS, LHS, Q.DL).value_or(false))
return getTrue(ITy);
break;
case ICmpInst::ICMP_SGE:
@@ -2816,11 +2900,11 @@ static Value *simplifyICmpOfBools(CmpInst::Predicate Pred, Value *LHS,
/// 0 | 1 | 1 (0 >= -1) | 1
/// 1 | 0 | 0 (-1 >= 0) | 0
/// 1 | 1 | 1 (-1 >= -1) | 1
- if (isImpliedCondition(LHS, RHS, Q.DL).getValueOr(false))
+ if (isImpliedCondition(LHS, RHS, Q.DL).value_or(false))
return getTrue(ITy);
break;
case ICmpInst::ICMP_ULE:
- if (isImpliedCondition(LHS, RHS, Q.DL).getValueOr(false))
+ if (isImpliedCondition(LHS, RHS, Q.DL).value_or(false))
return getTrue(ITy);
break;
}
@@ -2834,7 +2918,7 @@ static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS,
if (!match(RHS, m_Zero()))
return nullptr;
- Type *ITy = GetCompareTy(LHS); // The return type.
+ Type *ITy = getCompareTy(LHS); // The return type.
switch (Pred) {
default:
llvm_unreachable("Unknown ICmp predicate!");
@@ -2893,7 +2977,7 @@ static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS,
static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS,
Value *RHS, const InstrInfoQuery &IIQ) {
- Type *ITy = GetCompareTy(RHS); // The return type.
+ Type *ITy = getCompareTy(RHS); // The return type.
Value *X;
// Sign-bit checks can be optimized to true/false after unsigned
@@ -2940,10 +3024,11 @@ static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS,
return nullptr;
}
-static Value *simplifyICmpWithBinOpOnLHS(
- CmpInst::Predicate Pred, BinaryOperator *LBO, Value *RHS,
- const SimplifyQuery &Q, unsigned MaxRecurse) {
- Type *ITy = GetCompareTy(RHS); // The return type.
+static Value *simplifyICmpWithBinOpOnLHS(CmpInst::Predicate Pred,
+ BinaryOperator *LBO, Value *RHS,
+ const SimplifyQuery &Q,
+ unsigned MaxRecurse) {
+ Type *ITy = getCompareTy(RHS); // The return type.
Value *Y = nullptr;
// icmp pred (or X, Y), X
@@ -3078,7 +3163,6 @@ static Value *simplifyICmpWithBinOpOnLHS(
return nullptr;
}
-
// If only one of the icmp's operands has NSW flags, try to prove that:
//
// icmp slt (x + C1), (x +nsw C2)
@@ -3113,7 +3197,6 @@ static bool trySimplifyICmpWithAdds(CmpInst::Predicate Pred, Value *LHS,
(C2->slt(*C1) && C1->isNonPositive());
}
-
/// TODO: A large part of this logic is duplicated in InstCombine's
/// foldICmpBinOp(). We should be able to share that and avoid the code
/// duplication.
@@ -3150,7 +3233,7 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
// icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow.
if ((A == RHS || B == RHS) && NoLHSWrapProblem)
- if (Value *V = SimplifyICmpInst(Pred, A == RHS ? B : A,
+ if (Value *V = simplifyICmpInst(Pred, A == RHS ? B : A,
Constant::getNullValue(RHS->getType()), Q,
MaxRecurse - 1))
return V;
@@ -3158,7 +3241,7 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
// icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow.
if ((C == LHS || D == LHS) && NoRHSWrapProblem)
if (Value *V =
- SimplifyICmpInst(Pred, Constant::getNullValue(LHS->getType()),
+ simplifyICmpInst(Pred, Constant::getNullValue(LHS->getType()),
C == LHS ? D : C, Q, MaxRecurse - 1))
return V;
@@ -3186,7 +3269,7 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
Y = A;
Z = C;
}
- if (Value *V = SimplifyICmpInst(Pred, Y, Z, Q, MaxRecurse - 1))
+ if (Value *V = simplifyICmpInst(Pred, Y, Z, Q, MaxRecurse - 1))
return V;
}
}
@@ -3206,15 +3289,15 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
if (match(RHS, m_APInt(C))) {
if (C->isStrictlyPositive()) {
if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_NE)
- return ConstantInt::getTrue(GetCompareTy(RHS));
+ return ConstantInt::getTrue(getCompareTy(RHS));
if (Pred == ICmpInst::ICMP_SGE || Pred == ICmpInst::ICMP_EQ)
- return ConstantInt::getFalse(GetCompareTy(RHS));
+ return ConstantInt::getFalse(getCompareTy(RHS));
}
if (C->isNonNegative()) {
if (Pred == ICmpInst::ICMP_SLE)
- return ConstantInt::getTrue(GetCompareTy(RHS));
+ return ConstantInt::getTrue(getCompareTy(RHS));
if (Pred == ICmpInst::ICMP_SGT)
- return ConstantInt::getFalse(GetCompareTy(RHS));
+ return ConstantInt::getFalse(getCompareTy(RHS));
}
}
}
@@ -3237,9 +3320,9 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(LBO)) ||
match(LHS, m_Shl(m_One(), m_Value())) || !C->isZero()) {
if (Pred == ICmpInst::ICMP_EQ)
- return ConstantInt::getFalse(GetCompareTy(RHS));
+ return ConstantInt::getFalse(getCompareTy(RHS));
if (Pred == ICmpInst::ICMP_NE)
- return ConstantInt::getTrue(GetCompareTy(RHS));
+ return ConstantInt::getTrue(getCompareTy(RHS));
}
}
@@ -3248,9 +3331,9 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
// (1 << X) <=u 0x8000 --> true
if (match(LHS, m_Shl(m_One(), m_Value())) && match(RHS, m_SignMask())) {
if (Pred == ICmpInst::ICMP_UGT)
- return ConstantInt::getFalse(GetCompareTy(RHS));
+ return ConstantInt::getFalse(getCompareTy(RHS));
if (Pred == ICmpInst::ICMP_ULE)
- return ConstantInt::getTrue(GetCompareTy(RHS));
+ return ConstantInt::getTrue(getCompareTy(RHS));
}
if (MaxRecurse && LBO && RBO && LBO->getOpcode() == RBO->getOpcode() &&
@@ -3263,22 +3346,22 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
if (ICmpInst::isSigned(Pred) || !Q.IIQ.isExact(LBO) ||
!Q.IIQ.isExact(RBO))
break;
- if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
+ if (Value *V = simplifyICmpInst(Pred, LBO->getOperand(0),
RBO->getOperand(0), Q, MaxRecurse - 1))
- return V;
+ return V;
break;
case Instruction::SDiv:
if (!ICmpInst::isEquality(Pred) || !Q.IIQ.isExact(LBO) ||
!Q.IIQ.isExact(RBO))
break;
- if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
+ if (Value *V = simplifyICmpInst(Pred, LBO->getOperand(0),
RBO->getOperand(0), Q, MaxRecurse - 1))
return V;
break;
case Instruction::AShr:
if (!Q.IIQ.isExact(LBO) || !Q.IIQ.isExact(RBO))
break;
- if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
+ if (Value *V = simplifyICmpInst(Pred, LBO->getOperand(0),
RBO->getOperand(0), Q, MaxRecurse - 1))
return V;
break;
@@ -3289,7 +3372,7 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
break;
if (!NSW && ICmpInst::isSigned(Pred))
break;
- if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
+ if (Value *V = simplifyICmpInst(Pred, LBO->getOperand(0),
RBO->getOperand(0), Q, MaxRecurse - 1))
return V;
break;
@@ -3299,12 +3382,12 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
return nullptr;
}
-/// Simplify integer comparisons where at least one operand of the compare
+/// simplify integer comparisons where at least one operand of the compare
/// matches an integer min/max idiom.
static Value *simplifyICmpWithMinMax(CmpInst::Predicate Pred, Value *LHS,
Value *RHS, const SimplifyQuery &Q,
unsigned MaxRecurse) {
- Type *ITy = GetCompareTy(LHS); // The return type.
+ Type *ITy = getCompareTy(LHS); // The return type.
Value *A, *B;
CmpInst::Predicate P = CmpInst::BAD_ICMP_PREDICATE;
CmpInst::Predicate EqP; // Chosen so that "A == max/min(A,B)" iff "A EqP B".
@@ -3349,13 +3432,13 @@ static Value *simplifyICmpWithMinMax(CmpInst::Predicate Pred, Value *LHS,
case CmpInst::ICMP_SLE:
// Equivalent to "A EqP B". This may be the same as the condition tested
// in the max/min; if so, we can just return that.
- if (Value *V = ExtractEquivalentCondition(LHS, EqP, A, B))
+ if (Value *V = extractEquivalentCondition(LHS, EqP, A, B))
return V;
- if (Value *V = ExtractEquivalentCondition(RHS, EqP, A, B))
+ if (Value *V = extractEquivalentCondition(RHS, EqP, A, B))
return V;
// Otherwise, see if "A EqP B" simplifies.
if (MaxRecurse)
- if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse - 1))
+ if (Value *V = simplifyICmpInst(EqP, A, B, Q, MaxRecurse - 1))
return V;
break;
case CmpInst::ICMP_NE:
@@ -3363,13 +3446,13 @@ static Value *simplifyICmpWithMinMax(CmpInst::Predicate Pred, Value *LHS,
CmpInst::Predicate InvEqP = CmpInst::getInversePredicate(EqP);
// Equivalent to "A InvEqP B". This may be the same as the condition
// tested in the max/min; if so, we can just return that.
- if (Value *V = ExtractEquivalentCondition(LHS, InvEqP, A, B))
+ if (Value *V = extractEquivalentCondition(LHS, InvEqP, A, B))
return V;
- if (Value *V = ExtractEquivalentCondition(RHS, InvEqP, A, B))
+ if (Value *V = extractEquivalentCondition(RHS, InvEqP, A, B))
return V;
// Otherwise, see if "A InvEqP B" simplifies.
if (MaxRecurse)
- if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse - 1))
+ if (Value *V = simplifyICmpInst(InvEqP, A, B, Q, MaxRecurse - 1))
return V;
break;
}
@@ -3423,13 +3506,13 @@ static Value *simplifyICmpWithMinMax(CmpInst::Predicate Pred, Value *LHS,
case CmpInst::ICMP_ULE:
// Equivalent to "A EqP B". This may be the same as the condition tested
// in the max/min; if so, we can just return that.
- if (Value *V = ExtractEquivalentCondition(LHS, EqP, A, B))
+ if (Value *V = extractEquivalentCondition(LHS, EqP, A, B))
return V;
- if (Value *V = ExtractEquivalentCondition(RHS, EqP, A, B))
+ if (Value *V = extractEquivalentCondition(RHS, EqP, A, B))
return V;
// Otherwise, see if "A EqP B" simplifies.
if (MaxRecurse)
- if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse - 1))
+ if (Value *V = simplifyICmpInst(EqP, A, B, Q, MaxRecurse - 1))
return V;
break;
case CmpInst::ICMP_NE:
@@ -3437,13 +3520,13 @@ static Value *simplifyICmpWithMinMax(CmpInst::Predicate Pred, Value *LHS,
CmpInst::Predicate InvEqP = CmpInst::getInversePredicate(EqP);
// Equivalent to "A InvEqP B". This may be the same as the condition
// tested in the max/min; if so, we can just return that.
- if (Value *V = ExtractEquivalentCondition(LHS, InvEqP, A, B))
+ if (Value *V = extractEquivalentCondition(LHS, InvEqP, A, B))
return V;
- if (Value *V = ExtractEquivalentCondition(RHS, InvEqP, A, B))
+ if (Value *V = extractEquivalentCondition(RHS, InvEqP, A, B))
return V;
// Otherwise, see if "A InvEqP B" simplifies.
if (MaxRecurse)
- if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse - 1))
+ if (Value *V = simplifyICmpInst(InvEqP, A, B, Q, MaxRecurse - 1))
return V;
break;
}
@@ -3499,11 +3582,10 @@ static Value *simplifyICmpWithDominatingAssume(CmpInst::Predicate Predicate,
continue;
CallInst *Assume = cast<CallInst>(AssumeVH);
- if (Optional<bool> Imp =
- isImpliedCondition(Assume->getArgOperand(0), Predicate, LHS, RHS,
- Q.DL))
+ if (Optional<bool> Imp = isImpliedCondition(Assume->getArgOperand(0),
+ Predicate, LHS, RHS, Q.DL))
if (isValidAssumeForContext(Assume, Q.CxtI, Q.DT))
- return ConstantInt::get(GetCompareTy(LHS), *Imp);
+ return ConstantInt::get(getCompareTy(LHS), *Imp);
}
}
@@ -3512,7 +3594,7 @@ static Value *simplifyICmpWithDominatingAssume(CmpInst::Predicate Predicate,
/// Given operands for an ICmpInst, see if we can fold the result.
/// If not, this returns null.
-static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+static Value *simplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
const SimplifyQuery &Q, unsigned MaxRecurse) {
CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!");
@@ -3527,7 +3609,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
assert(!isa<UndefValue>(LHS) && "Unexpected icmp undef,%X");
- Type *ITy = GetCompareTy(LHS); // The return type.
+ Type *ITy = getCompareTy(LHS); // The return type.
// icmp poison, X -> poison
if (isa<PoisonValue>(RHS))
@@ -3589,15 +3671,15 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
Q.DL.getTypeSizeInBits(SrcTy) == DstTy->getPrimitiveSizeInBits()) {
if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
// Transfer the cast to the constant.
- if (Value *V = SimplifyICmpInst(Pred, SrcOp,
+ if (Value *V = simplifyICmpInst(Pred, SrcOp,
ConstantExpr::getIntToPtr(RHSC, SrcTy),
- Q, MaxRecurse-1))
+ Q, MaxRecurse - 1))
return V;
} else if (PtrToIntInst *RI = dyn_cast<PtrToIntInst>(RHS)) {
if (RI->getOperand(0)->getType() == SrcTy)
// Compare without the cast.
- if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
- Q, MaxRecurse-1))
+ if (Value *V = simplifyICmpInst(Pred, SrcOp, RI->getOperand(0), Q,
+ MaxRecurse - 1))
return V;
}
}
@@ -3608,9 +3690,9 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (ZExtInst *RI = dyn_cast<ZExtInst>(RHS)) {
if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
// Compare X and Y. Note that signed predicates become unsigned.
- if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
- SrcOp, RI->getOperand(0), Q,
- MaxRecurse-1))
+ if (Value *V =
+ simplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred), SrcOp,
+ RI->getOperand(0), Q, MaxRecurse - 1))
return V;
}
// Fold (zext X) ule (sext X), (zext X) sge (sext X) to true.
@@ -3633,15 +3715,16 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// If the re-extended constant didn't change then this is effectively
// also a case of comparing two zero-extended values.
if (RExt == CI && MaxRecurse)
- if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
- SrcOp, Trunc, Q, MaxRecurse-1))
+ if (Value *V = simplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
+ SrcOp, Trunc, Q, MaxRecurse - 1))
return V;
// Otherwise the upper bits of LHS are zero while RHS has a non-zero bit
// there. Use this to work out the result of the comparison.
if (RExt != CI) {
switch (Pred) {
- default: llvm_unreachable("Unknown ICmp predicate!");
+ default:
+ llvm_unreachable("Unknown ICmp predicate!");
// LHS <u RHS.
case ICmpInst::ICMP_EQ:
case ICmpInst::ICMP_UGT:
@@ -3657,15 +3740,15 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// is non-negative then LHS <s RHS.
case ICmpInst::ICMP_SGT:
case ICmpInst::ICMP_SGE:
- return CI->getValue().isNegative() ?
- ConstantInt::getTrue(CI->getContext()) :
- ConstantInt::getFalse(CI->getContext());
+ return CI->getValue().isNegative()
+ ? ConstantInt::getTrue(CI->getContext())
+ : ConstantInt::getFalse(CI->getContext());
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_SLE:
- return CI->getValue().isNegative() ?
- ConstantInt::getFalse(CI->getContext()) :
- ConstantInt::getTrue(CI->getContext());
+ return CI->getValue().isNegative()
+ ? ConstantInt::getFalse(CI->getContext())
+ : ConstantInt::getTrue(CI->getContext());
}
}
}
@@ -3677,8 +3760,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (SExtInst *RI = dyn_cast<SExtInst>(RHS)) {
if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
// Compare X and Y. Note that the predicate does not change.
- if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
- Q, MaxRecurse-1))
+ if (Value *V = simplifyICmpInst(Pred, SrcOp, RI->getOperand(0), Q,
+ MaxRecurse - 1))
return V;
}
// Fold (sext X) uge (zext X), (sext X) sle (zext X) to true.
@@ -3701,14 +3784,16 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// If the re-extended constant didn't change then this is effectively
// also a case of comparing two sign-extended values.
if (RExt == CI && MaxRecurse)
- if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, Q, MaxRecurse-1))
+ if (Value *V =
+ simplifyICmpInst(Pred, SrcOp, Trunc, Q, MaxRecurse - 1))
return V;
// Otherwise the upper bits of LHS are all equal, while RHS has varying
// bits there. Use this to work out the result of the comparison.
if (RExt != CI) {
switch (Pred) {
- default: llvm_unreachable("Unknown ICmp predicate!");
+ default:
+ llvm_unreachable("Unknown ICmp predicate!");
case ICmpInst::ICMP_EQ:
return ConstantInt::getFalse(CI->getContext());
case ICmpInst::ICMP_NE:
@@ -3718,14 +3803,14 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// LHS >s RHS.
case ICmpInst::ICMP_SGT:
case ICmpInst::ICMP_SGE:
- return CI->getValue().isNegative() ?
- ConstantInt::getTrue(CI->getContext()) :
- ConstantInt::getFalse(CI->getContext());
+ return CI->getValue().isNegative()
+ ? ConstantInt::getTrue(CI->getContext())
+ : ConstantInt::getFalse(CI->getContext());
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_SLE:
- return CI->getValue().isNegative() ?
- ConstantInt::getFalse(CI->getContext()) :
- ConstantInt::getTrue(CI->getContext());
+ return CI->getValue().isNegative()
+ ? ConstantInt::getFalse(CI->getContext())
+ : ConstantInt::getTrue(CI->getContext());
// If LHS is non-negative then LHS <u RHS. If LHS is negative then
// LHS >u RHS.
@@ -3733,18 +3818,18 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
case ICmpInst::ICMP_UGE:
// Comparison is true iff the LHS <s 0.
if (MaxRecurse)
- if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SLT, SrcOp,
- Constant::getNullValue(SrcTy),
- Q, MaxRecurse-1))
+ if (Value *V = simplifyICmpInst(ICmpInst::ICMP_SLT, SrcOp,
+ Constant::getNullValue(SrcTy), Q,
+ MaxRecurse - 1))
return V;
break;
case ICmpInst::ICMP_ULT:
case ICmpInst::ICMP_ULE:
// Comparison is true iff the LHS >=s 0.
if (MaxRecurse)
- if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SGE, SrcOp,
- Constant::getNullValue(SrcTy),
- Q, MaxRecurse-1))
+ if (Value *V = simplifyICmpInst(ICmpInst::ICMP_SGE, SrcOp,
+ Constant::getNullValue(SrcTy), Q,
+ MaxRecurse - 1))
return V;
break;
}
@@ -3788,26 +3873,26 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// If the comparison is with the result of a select instruction, check whether
// comparing with either branch of the select always yields the same value.
if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
- if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, Q, MaxRecurse))
+ if (Value *V = threadCmpOverSelect(Pred, LHS, RHS, Q, MaxRecurse))
return V;
// If the comparison is with the result of a phi instruction, check whether
// doing the compare with each incoming phi value yields a common result.
if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
- if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse))
+ if (Value *V = threadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse))
return V;
return nullptr;
}
-Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+Value *llvm::simplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
const SimplifyQuery &Q) {
- return ::SimplifyICmpInst(Predicate, LHS, RHS, Q, RecursionLimit);
+ return ::simplifyICmpInst(Predicate, LHS, RHS, Q, RecursionLimit);
}
/// Given operands for an FCmpInst, see if we can fold the result.
/// If not, this returns null.
-static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+static Value *simplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
FastMathFlags FMF, const SimplifyQuery &Q,
unsigned MaxRecurse) {
CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
@@ -3815,7 +3900,8 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
if (Constant *CRHS = dyn_cast<Constant>(RHS))
- return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.DL, Q.TLI);
+ return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.DL, Q.TLI,
+ Q.CxtI);
// If we have a constant, make sure it is on the RHS.
std::swap(LHS, RHS);
@@ -3823,7 +3909,7 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
// Fold trivial predicates.
- Type *RetTy = GetCompareTy(LHS);
+ Type *RetTy = getCompareTy(LHS);
if (Pred == FCmpInst::FCMP_FALSE)
return getFalse(RetTy);
if (Pred == FCmpInst::FCMP_TRUE)
@@ -3943,23 +4029,29 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// The ordered relationship and minnum/maxnum guarantee that we do not
// have NaN constants, so ordered/unordered preds are handled the same.
switch (Pred) {
- case FCmpInst::FCMP_OEQ: case FCmpInst::FCMP_UEQ:
+ case FCmpInst::FCMP_OEQ:
+ case FCmpInst::FCMP_UEQ:
// minnum(X, LesserC) == C --> false
// maxnum(X, GreaterC) == C --> false
return getFalse(RetTy);
- case FCmpInst::FCMP_ONE: case FCmpInst::FCMP_UNE:
+ case FCmpInst::FCMP_ONE:
+ case FCmpInst::FCMP_UNE:
// minnum(X, LesserC) != C --> true
// maxnum(X, GreaterC) != C --> true
return getTrue(RetTy);
- case FCmpInst::FCMP_OGE: case FCmpInst::FCMP_UGE:
- case FCmpInst::FCMP_OGT: case FCmpInst::FCMP_UGT:
+ case FCmpInst::FCMP_OGE:
+ case FCmpInst::FCMP_UGE:
+ case FCmpInst::FCMP_OGT:
+ case FCmpInst::FCMP_UGT:
// minnum(X, LesserC) >= C --> false
// minnum(X, LesserC) > C --> false
// maxnum(X, GreaterC) >= C --> true
// maxnum(X, GreaterC) > C --> true
return ConstantInt::get(RetTy, IsMaxNum);
- case FCmpInst::FCMP_OLE: case FCmpInst::FCMP_ULE:
- case FCmpInst::FCMP_OLT: case FCmpInst::FCMP_ULT:
+ case FCmpInst::FCMP_OLE:
+ case FCmpInst::FCMP_ULE:
+ case FCmpInst::FCMP_OLT:
+ case FCmpInst::FCMP_ULT:
// minnum(X, LesserC) <= C --> true
// minnum(X, LesserC) < C --> true
// maxnum(X, GreaterC) <= C --> false
@@ -3997,21 +4089,21 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// If the comparison is with the result of a select instruction, check whether
// comparing with either branch of the select always yields the same value.
if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
- if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, Q, MaxRecurse))
+ if (Value *V = threadCmpOverSelect(Pred, LHS, RHS, Q, MaxRecurse))
return V;
// If the comparison is with the result of a phi instruction, check whether
// doing the compare with each incoming phi value yields a common result.
if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
- if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse))
+ if (Value *V = threadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse))
return V;
return nullptr;
}
-Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+Value *llvm::simplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
FastMathFlags FMF, const SimplifyQuery &Q) {
- return ::SimplifyFCmpInst(Predicate, LHS, RHS, FMF, Q, RecursionLimit);
+ return ::simplifyFCmpInst(Predicate, LHS, RHS, FMF, Q, RecursionLimit);
}
static Value *simplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
@@ -4078,22 +4170,21 @@ static Value *simplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
};
if (auto *B = dyn_cast<BinaryOperator>(I))
- return PreventSelfSimplify(SimplifyBinOp(B->getOpcode(), NewOps[0],
+ return PreventSelfSimplify(simplifyBinOp(B->getOpcode(), NewOps[0],
NewOps[1], Q, MaxRecurse - 1));
if (CmpInst *C = dyn_cast<CmpInst>(I))
- return PreventSelfSimplify(SimplifyCmpInst(C->getPredicate(), NewOps[0],
+ return PreventSelfSimplify(simplifyCmpInst(C->getPredicate(), NewOps[0],
NewOps[1], Q, MaxRecurse - 1));
if (auto *GEP = dyn_cast<GetElementPtrInst>(I))
- return PreventSelfSimplify(SimplifyGEPInst(
+ return PreventSelfSimplify(simplifyGEPInst(
GEP->getSourceElementType(), NewOps[0], makeArrayRef(NewOps).slice(1),
GEP->isInBounds(), Q, MaxRecurse - 1));
if (isa<SelectInst>(I))
- return PreventSelfSimplify(
- SimplifySelectInst(NewOps[0], NewOps[1], NewOps[2], Q,
- MaxRecurse - 1));
+ return PreventSelfSimplify(simplifySelectInst(
+ NewOps[0], NewOps[1], NewOps[2], Q, MaxRecurse - 1));
// TODO: We could hand off more cases to instsimplify here.
}
@@ -4119,14 +4210,6 @@ static Value *simplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
if (!AllowRefinement && canCreatePoison(cast<Operator>(I)))
return nullptr;
- if (CmpInst *C = dyn_cast<CmpInst>(I))
- return ConstantFoldCompareInstOperands(C->getPredicate(), ConstOps[0],
- ConstOps[1], Q.DL, Q.TLI);
-
- if (LoadInst *LI = dyn_cast<LoadInst>(I))
- if (!LI->isVolatile())
- return ConstantFoldLoadFromConstPtr(ConstOps[0], LI->getType(), Q.DL);
-
return ConstantFoldInstOperands(I, ConstOps, Q.DL, Q.TLI);
}
@@ -4189,7 +4272,8 @@ static Value *simplifySelectWithFakeICmpEq(Value *CmpLHS, Value *CmpRHS,
/// Try to simplify a select instruction when its condition operand is an
/// integer comparison.
static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
- Value *FalseVal, const SimplifyQuery &Q,
+ Value *FalseVal,
+ const SimplifyQuery &Q,
unsigned MaxRecurse) {
ICmpInst::Predicate Pred;
Value *CmpLHS, *CmpRHS;
@@ -4209,7 +4293,8 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
Value *X, *Y;
SelectPatternFlavor SPF =
matchDecomposedSelectPattern(cast<ICmpInst>(CondVal), TrueVal, FalseVal,
- X, Y).Flavor;
+ X, Y)
+ .Flavor;
if (SelectPatternResult::isMinOrMax(SPF) && Pred == getMinMaxPred(SPF)) {
APInt LimitC = getMinMaxLimit(getInverseMinMaxFlavor(SPF),
X->getType()->getScalarSizeInBits());
@@ -4261,8 +4346,8 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
}
// Check for other compares that behave like bit test.
- if (Value *V = simplifySelectWithFakeICmpEq(CmpLHS, CmpRHS, Pred,
- TrueVal, FalseVal))
+ if (Value *V =
+ simplifySelectWithFakeICmpEq(CmpLHS, CmpRHS, Pred, TrueVal, FalseVal))
return V;
// If we have a scalar equality comparison, then we know the value in one of
@@ -4272,18 +4357,18 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
// because each element of a vector select is chosen independently.
if (Pred == ICmpInst::ICMP_EQ && !CondVal->getType()->isVectorTy()) {
if (simplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q,
- /* AllowRefinement */ false, MaxRecurse) ==
- TrueVal ||
+ /* AllowRefinement */ false,
+ MaxRecurse) == TrueVal ||
simplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q,
- /* AllowRefinement */ false, MaxRecurse) ==
- TrueVal)
+ /* AllowRefinement */ false,
+ MaxRecurse) == TrueVal)
return FalseVal;
if (simplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q,
- /* AllowRefinement */ true, MaxRecurse) ==
- FalseVal ||
+ /* AllowRefinement */ true,
+ MaxRecurse) == FalseVal ||
simplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q,
- /* AllowRefinement */ true, MaxRecurse) ==
- FalseVal)
+ /* AllowRefinement */ true,
+ MaxRecurse) == FalseVal)
return FalseVal;
}
@@ -4302,11 +4387,11 @@ static Value *simplifySelectWithFCmp(Value *Cond, Value *T, Value *F,
// This transform is safe if we do not have (do not care about) -0.0 or if
// at least one operand is known to not be -0.0. Otherwise, the select can
// change the sign of a zero operand.
- bool HasNoSignedZeros = Q.CxtI && isa<FPMathOperator>(Q.CxtI) &&
- Q.CxtI->hasNoSignedZeros();
+ bool HasNoSignedZeros =
+ Q.CxtI && isa<FPMathOperator>(Q.CxtI) && Q.CxtI->hasNoSignedZeros();
const APFloat *C;
if (HasNoSignedZeros || (match(T, m_APFloat(C)) && C->isNonZero()) ||
- (match(F, m_APFloat(C)) && C->isNonZero())) {
+ (match(F, m_APFloat(C)) && C->isNonZero())) {
// (T == F) ? T : F --> F
// (F == T) ? T : F --> F
if (Pred == FCmpInst::FCMP_OEQ)
@@ -4323,7 +4408,7 @@ static Value *simplifySelectWithFCmp(Value *Cond, Value *T, Value *F,
/// Given operands for a SelectInst, see if we can fold the result.
/// If not, this returns null.
-static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
+static Value *simplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
const SimplifyQuery &Q, unsigned MaxRecurse) {
if (auto *CondC = dyn_cast<Constant>(Cond)) {
if (auto *TrueC = dyn_cast<Constant>(TrueVal))
@@ -4439,14 +4524,14 @@ static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
return nullptr;
}
-Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
+Value *llvm::simplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
const SimplifyQuery &Q) {
- return ::SimplifySelectInst(Cond, TrueVal, FalseVal, Q, RecursionLimit);
+ return ::simplifySelectInst(Cond, TrueVal, FalseVal, Q, RecursionLimit);
}
/// Given operands for an GetElementPtrInst, see if we can fold the result.
/// If not, this returns null.
-static Value *SimplifyGEPInst(Type *SrcTy, Value *Ptr,
+static Value *simplifyGEPInst(Type *SrcTy, Value *Ptr,
ArrayRef<Value *> Indices, bool InBounds,
const SimplifyQuery &Q, unsigned) {
// The type of the GEP pointer operand.
@@ -4473,6 +4558,13 @@ static Value *SimplifyGEPInst(Type *SrcTy, Value *Ptr,
}
}
+ // For opaque pointers an all-zero GEP is a no-op. For typed pointers,
+ // it may be equivalent to a bitcast.
+ if (Ptr->getType()->getScalarType()->isOpaquePointerTy() &&
+ Ptr->getType() == GEPTy &&
+ all_of(Indices, [](const auto *V) { return match(V, m_Zero()); }))
+ return Ptr;
+
// getelementptr poison, idx -> poison
// getelementptr baseptr, poison -> poison
if (isa<PoisonValue>(Ptr) ||
@@ -4577,16 +4669,16 @@ static Value *SimplifyGEPInst(Type *SrcTy, Value *Ptr,
return ConstantFoldConstant(CE, Q.DL);
}
-Value *llvm::SimplifyGEPInst(Type *SrcTy, Value *Ptr, ArrayRef<Value *> Indices,
+Value *llvm::simplifyGEPInst(Type *SrcTy, Value *Ptr, ArrayRef<Value *> Indices,
bool InBounds, const SimplifyQuery &Q) {
- return ::SimplifyGEPInst(SrcTy, Ptr, Indices, InBounds, Q, RecursionLimit);
+ return ::simplifyGEPInst(SrcTy, Ptr, Indices, InBounds, Q, RecursionLimit);
}
/// Given operands for an InsertValueInst, see if we can fold the result.
/// If not, this returns null.
-static Value *SimplifyInsertValueInst(Value *Agg, Value *Val,
- ArrayRef<unsigned> Idxs, const SimplifyQuery &Q,
- unsigned) {
+static Value *simplifyInsertValueInst(Value *Agg, Value *Val,
+ ArrayRef<unsigned> Idxs,
+ const SimplifyQuery &Q, unsigned) {
if (Constant *CAgg = dyn_cast<Constant>(Agg))
if (Constant *CVal = dyn_cast<Constant>(Val))
return ConstantFoldInsertValueInstruction(CAgg, CVal, Idxs);
@@ -4611,13 +4703,13 @@ static Value *SimplifyInsertValueInst(Value *Agg, Value *Val,
return nullptr;
}
-Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val,
+Value *llvm::simplifyInsertValueInst(Value *Agg, Value *Val,
ArrayRef<unsigned> Idxs,
const SimplifyQuery &Q) {
- return ::SimplifyInsertValueInst(Agg, Val, Idxs, Q, RecursionLimit);
+ return ::simplifyInsertValueInst(Agg, Val, Idxs, Q, RecursionLimit);
}
-Value *llvm::SimplifyInsertElementInst(Value *Vec, Value *Val, Value *Idx,
+Value *llvm::simplifyInsertElementInst(Value *Vec, Value *Val, Value *Idx,
const SimplifyQuery &Q) {
// Try to constant fold.
auto *VecC = dyn_cast<Constant>(Vec);
@@ -4654,7 +4746,7 @@ Value *llvm::SimplifyInsertElementInst(Value *Vec, Value *Val, Value *Idx,
/// Given operands for an ExtractValueInst, see if we can fold the result.
/// If not, this returns null.
-static Value *SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
+static Value *simplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
const SimplifyQuery &, unsigned) {
if (auto *CAgg = dyn_cast<Constant>(Agg))
return ConstantFoldExtractValueInstruction(CAgg, Idxs);
@@ -4677,14 +4769,14 @@ static Value *SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
return nullptr;
}
-Value *llvm::SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
+Value *llvm::simplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
const SimplifyQuery &Q) {
- return ::SimplifyExtractValueInst(Agg, Idxs, Q, RecursionLimit);
+ return ::simplifyExtractValueInst(Agg, Idxs, Q, RecursionLimit);
}
/// Given operands for an ExtractElementInst, see if we can fold the result.
/// If not, this returns null.
-static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx,
+static Value *simplifyExtractElementInst(Value *Vec, Value *Idx,
const SimplifyQuery &Q, unsigned) {
auto *VecVTy = cast<VectorType>(Vec->getType());
if (auto *CVec = dyn_cast<Constant>(Vec)) {
@@ -4721,13 +4813,13 @@ static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx,
return nullptr;
}
-Value *llvm::SimplifyExtractElementInst(Value *Vec, Value *Idx,
+Value *llvm::simplifyExtractElementInst(Value *Vec, Value *Idx,
const SimplifyQuery &Q) {
- return ::SimplifyExtractElementInst(Vec, Idx, Q, RecursionLimit);
+ return ::simplifyExtractElementInst(Vec, Idx, Q, RecursionLimit);
}
/// See if we can fold the given phi. If not, returns null.
-static Value *SimplifyPHINode(PHINode *PN, ArrayRef<Value *> IncomingValues,
+static Value *simplifyPHINode(PHINode *PN, ArrayRef<Value *> IncomingValues,
const SimplifyQuery &Q) {
// WARNING: no matter how worthwhile it may seem, we can not perform PHI CSE
// here, because the PHI we may succeed simplifying to was not
@@ -4739,14 +4831,15 @@ static Value *SimplifyPHINode(PHINode *PN, ArrayRef<Value *> IncomingValues,
bool HasUndefInput = false;
for (Value *Incoming : IncomingValues) {
// If the incoming value is the phi node itself, it can safely be skipped.
- if (Incoming == PN) continue;
+ if (Incoming == PN)
+ continue;
if (Q.isUndefValue(Incoming)) {
// Remember that we saw an undef value, but otherwise ignore them.
HasUndefInput = true;
continue;
}
if (CommonValue && Incoming != CommonValue)
- return nullptr; // Not the same, bail out.
+ return nullptr; // Not the same, bail out.
CommonValue = Incoming;
}
@@ -4755,17 +4848,24 @@ static Value *SimplifyPHINode(PHINode *PN, ArrayRef<Value *> IncomingValues,
if (!CommonValue)
return UndefValue::get(PN->getType());
- // If we have a PHI node like phi(X, undef, X), where X is defined by some
- // instruction, we cannot return X as the result of the PHI node unless it
- // dominates the PHI block.
- if (HasUndefInput)
+ if (HasUndefInput) {
+ // We cannot start executing a trapping constant expression on more control
+ // flow paths.
+ auto *C = dyn_cast<Constant>(CommonValue);
+ if (C && C->canTrap())
+ return nullptr;
+
+ // If we have a PHI node like phi(X, undef, X), where X is defined by some
+ // instruction, we cannot return X as the result of the PHI node unless it
+ // dominates the PHI block.
return valueDominatesPHI(CommonValue, PN, Q.DT) ? CommonValue : nullptr;
+ }
return CommonValue;
}
-static Value *SimplifyCastInst(unsigned CastOpc, Value *Op,
- Type *Ty, const SimplifyQuery &Q, unsigned MaxRecurse) {
+static Value *simplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty,
+ const SimplifyQuery &Q, unsigned MaxRecurse) {
if (auto *C = dyn_cast<Constant>(Op))
return ConstantFoldCastOperand(CastOpc, C, Ty, Q.DL);
@@ -4798,9 +4898,9 @@ static Value *SimplifyCastInst(unsigned CastOpc, Value *Op,
return nullptr;
}
-Value *llvm::SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty,
+Value *llvm::simplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty,
const SimplifyQuery &Q) {
- return ::SimplifyCastInst(CastOpc, Op, Ty, Q, RecursionLimit);
+ return ::simplifyCastInst(CastOpc, Op, Ty, Q, RecursionLimit);
}
/// For the given destination element of a shuffle, peek through shuffles to
@@ -4854,7 +4954,7 @@ static Value *foldIdentityShuffles(int DestElt, Value *Op0, Value *Op1,
return RootVec;
}
-static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1,
+static Value *simplifyShuffleVectorInst(Value *Op0, Value *Op1,
ArrayRef<int> Mask, Type *RetTy,
const SimplifyQuery &Q,
unsigned MaxRecurse) {
@@ -4970,14 +5070,14 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1,
}
/// Given operands for a ShuffleVectorInst, fold the result or return null.
-Value *llvm::SimplifyShuffleVectorInst(Value *Op0, Value *Op1,
+Value *llvm::simplifyShuffleVectorInst(Value *Op0, Value *Op1,
ArrayRef<int> Mask, Type *RetTy,
const SimplifyQuery &Q) {
- return ::SimplifyShuffleVectorInst(Op0, Op1, Mask, RetTy, Q, RecursionLimit);
+ return ::simplifyShuffleVectorInst(Op0, Op1, Mask, RetTy, Q, RecursionLimit);
}
-static Constant *foldConstant(Instruction::UnaryOps Opcode,
- Value *&Op, const SimplifyQuery &Q) {
+static Constant *foldConstant(Instruction::UnaryOps Opcode, Value *&Op,
+ const SimplifyQuery &Q) {
if (auto *C = dyn_cast<Constant>(Op))
return ConstantFoldUnaryOpOperand(Opcode, C, Q.DL);
return nullptr;
@@ -4998,7 +5098,7 @@ static Value *simplifyFNegInst(Value *Op, FastMathFlags FMF,
return nullptr;
}
-Value *llvm::SimplifyFNegInst(Value *Op, FastMathFlags FMF,
+Value *llvm::simplifyFNegInst(Value *Op, FastMathFlags FMF,
const SimplifyQuery &Q) {
return ::simplifyFNegInst(Op, FMF, Q, RecursionLimit);
}
@@ -5049,15 +5149,10 @@ static Constant *simplifyFPOp(ArrayRef<Value *> Ops, FastMathFlags FMF,
return nullptr;
}
-// TODO: Move this out to a header file:
-static inline bool canIgnoreSNaN(fp::ExceptionBehavior EB, FastMathFlags FMF) {
- return (EB == fp::ebIgnore || FMF.noNaNs());
-}
-
/// Given operands for an FAdd, see if we can fold the result. If not, this
/// returns null.
static Value *
-SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+simplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
const SimplifyQuery &Q, unsigned MaxRecurse,
fp::ExceptionBehavior ExBehavior = fp::ebIgnore,
RoundingMode Rounding = RoundingMode::NearestTiesToEven) {
@@ -5119,7 +5214,7 @@ SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
/// Given operands for an FSub, see if we can fold the result. If not, this
/// returns null.
static Value *
-SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+simplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
const SimplifyQuery &Q, unsigned MaxRecurse,
fp::ExceptionBehavior ExBehavior = fp::ebIgnore,
RoundingMode Rounding = RoundingMode::NearestTiesToEven) {
@@ -5130,24 +5225,28 @@ SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
if (Constant *C = simplifyFPOp({Op0, Op1}, FMF, Q, ExBehavior, Rounding))
return C;
- if (!isDefaultFPEnvironment(ExBehavior, Rounding))
- return nullptr;
-
// fsub X, +0 ==> X
- if (match(Op1, m_PosZeroFP()))
- return Op0;
+ if (canIgnoreSNaN(ExBehavior, FMF) &&
+ (!canRoundingModeBe(Rounding, RoundingMode::TowardNegative) ||
+ FMF.noSignedZeros()))
+ if (match(Op1, m_PosZeroFP()))
+ return Op0;
// fsub X, -0 ==> X, when we know X is not -0
- if (match(Op1, m_NegZeroFP()) &&
- (FMF.noSignedZeros() || CannotBeNegativeZero(Op0, Q.TLI)))
- return Op0;
+ if (canIgnoreSNaN(ExBehavior, FMF))
+ if (match(Op1, m_NegZeroFP()) &&
+ (FMF.noSignedZeros() || CannotBeNegativeZero(Op0, Q.TLI)))
+ return Op0;
// fsub -0.0, (fsub -0.0, X) ==> X
// fsub -0.0, (fneg X) ==> X
Value *X;
- if (match(Op0, m_NegZeroFP()) &&
- match(Op1, m_FNeg(m_Value(X))))
- return X;
+ if (canIgnoreSNaN(ExBehavior, FMF))
+ if (match(Op0, m_NegZeroFP()) && match(Op1, m_FNeg(m_Value(X))))
+ return X;
+
+ if (!isDefaultFPEnvironment(ExBehavior, Rounding))
+ return nullptr;
// fsub 0.0, (fsub 0.0, X) ==> X if signed zeros are ignored.
// fsub 0.0, (fneg X) ==> X if signed zeros are ignored.
@@ -5170,7 +5269,7 @@ SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
return nullptr;
}
-static Value *SimplifyFMAFMul(Value *Op0, Value *Op1, FastMathFlags FMF,
+static Value *simplifyFMAFMul(Value *Op0, Value *Op1, FastMathFlags FMF,
const SimplifyQuery &Q, unsigned MaxRecurse,
fp::ExceptionBehavior ExBehavior,
RoundingMode Rounding) {
@@ -5201,8 +5300,8 @@ static Value *SimplifyFMAFMul(Value *Op0, Value *Op1, FastMathFlags FMF,
// 2. Ignore non-zero negative numbers because sqrt would produce NAN.
// 3. Ignore -0.0 because sqrt(-0.0) == -0.0, but -0.0 * -0.0 == 0.0.
Value *X;
- if (Op0 == Op1 && match(Op0, m_Intrinsic<Intrinsic::sqrt>(m_Value(X))) &&
- FMF.allowReassoc() && FMF.noNaNs() && FMF.noSignedZeros())
+ if (Op0 == Op1 && match(Op0, m_Sqrt(m_Value(X))) && FMF.allowReassoc() &&
+ FMF.noNaNs() && FMF.noSignedZeros())
return X;
return nullptr;
@@ -5210,7 +5309,7 @@ static Value *SimplifyFMAFMul(Value *Op0, Value *Op1, FastMathFlags FMF,
/// Given the operands for an FMul, see if we can fold the result
static Value *
-SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+simplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
const SimplifyQuery &Q, unsigned MaxRecurse,
fp::ExceptionBehavior ExBehavior = fp::ebIgnore,
RoundingMode Rounding = RoundingMode::NearestTiesToEven) {
@@ -5219,43 +5318,43 @@ SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
return C;
// Now apply simplifications that do not require rounding.
- return SimplifyFMAFMul(Op0, Op1, FMF, Q, MaxRecurse, ExBehavior, Rounding);
+ return simplifyFMAFMul(Op0, Op1, FMF, Q, MaxRecurse, ExBehavior, Rounding);
}
-Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+Value *llvm::simplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
const SimplifyQuery &Q,
fp::ExceptionBehavior ExBehavior,
RoundingMode Rounding) {
- return ::SimplifyFAddInst(Op0, Op1, FMF, Q, RecursionLimit, ExBehavior,
+ return ::simplifyFAddInst(Op0, Op1, FMF, Q, RecursionLimit, ExBehavior,
Rounding);
}
-Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+Value *llvm::simplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
const SimplifyQuery &Q,
fp::ExceptionBehavior ExBehavior,
RoundingMode Rounding) {
- return ::SimplifyFSubInst(Op0, Op1, FMF, Q, RecursionLimit, ExBehavior,
+ return ::simplifyFSubInst(Op0, Op1, FMF, Q, RecursionLimit, ExBehavior,
Rounding);
}
-Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+Value *llvm::simplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
const SimplifyQuery &Q,
fp::ExceptionBehavior ExBehavior,
RoundingMode Rounding) {
- return ::SimplifyFMulInst(Op0, Op1, FMF, Q, RecursionLimit, ExBehavior,
+ return ::simplifyFMulInst(Op0, Op1, FMF, Q, RecursionLimit, ExBehavior,
Rounding);
}
-Value *llvm::SimplifyFMAFMul(Value *Op0, Value *Op1, FastMathFlags FMF,
+Value *llvm::simplifyFMAFMul(Value *Op0, Value *Op1, FastMathFlags FMF,
const SimplifyQuery &Q,
fp::ExceptionBehavior ExBehavior,
RoundingMode Rounding) {
- return ::SimplifyFMAFMul(Op0, Op1, FMF, Q, RecursionLimit, ExBehavior,
+ return ::simplifyFMAFMul(Op0, Op1, FMF, Q, RecursionLimit, ExBehavior,
Rounding);
}
static Value *
-SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+simplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
const SimplifyQuery &Q, unsigned,
fp::ExceptionBehavior ExBehavior = fp::ebIgnore,
RoundingMode Rounding = RoundingMode::NearestTiesToEven) {
@@ -5301,16 +5400,16 @@ SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
return nullptr;
}
-Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+Value *llvm::simplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
const SimplifyQuery &Q,
fp::ExceptionBehavior ExBehavior,
RoundingMode Rounding) {
- return ::SimplifyFDivInst(Op0, Op1, FMF, Q, RecursionLimit, ExBehavior,
+ return ::simplifyFDivInst(Op0, Op1, FMF, Q, RecursionLimit, ExBehavior,
Rounding);
}
static Value *
-SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+simplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
const SimplifyQuery &Q, unsigned,
fp::ExceptionBehavior ExBehavior = fp::ebIgnore,
RoundingMode Rounding = RoundingMode::NearestTiesToEven) {
@@ -5339,11 +5438,11 @@ SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
return nullptr;
}
-Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+Value *llvm::simplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
const SimplifyQuery &Q,
fp::ExceptionBehavior ExBehavior,
RoundingMode Rounding) {
- return ::SimplifyFRemInst(Op0, Op1, FMF, Q, RecursionLimit, ExBehavior,
+ return ::simplifyFRemInst(Op0, Op1, FMF, Q, RecursionLimit, ExBehavior,
Rounding);
}
@@ -5365,8 +5464,8 @@ static Value *simplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q,
/// If not, this returns null.
/// Try to use FastMathFlags when folding the result.
static Value *simplifyFPUnOp(unsigned Opcode, Value *Op,
- const FastMathFlags &FMF,
- const SimplifyQuery &Q, unsigned MaxRecurse) {
+ const FastMathFlags &FMF, const SimplifyQuery &Q,
+ unsigned MaxRecurse) {
switch (Opcode) {
case Instruction::FNeg:
return simplifyFNegInst(Op, FMF, Q, MaxRecurse);
@@ -5375,56 +5474,56 @@ static Value *simplifyFPUnOp(unsigned Opcode, Value *Op,
}
}
-Value *llvm::SimplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q) {
+Value *llvm::simplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q) {
return ::simplifyUnOp(Opcode, Op, Q, RecursionLimit);
}
-Value *llvm::SimplifyUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF,
+Value *llvm::simplifyUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF,
const SimplifyQuery &Q) {
return ::simplifyFPUnOp(Opcode, Op, FMF, Q, RecursionLimit);
}
/// Given operands for a BinaryOperator, see if we can fold the result.
/// If not, this returns null.
-static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+static Value *simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
const SimplifyQuery &Q, unsigned MaxRecurse) {
switch (Opcode) {
case Instruction::Add:
- return SimplifyAddInst(LHS, RHS, false, false, Q, MaxRecurse);
+ return simplifyAddInst(LHS, RHS, false, false, Q, MaxRecurse);
case Instruction::Sub:
- return SimplifySubInst(LHS, RHS, false, false, Q, MaxRecurse);
+ return simplifySubInst(LHS, RHS, false, false, Q, MaxRecurse);
case Instruction::Mul:
- return SimplifyMulInst(LHS, RHS, Q, MaxRecurse);
+ return simplifyMulInst(LHS, RHS, Q, MaxRecurse);
case Instruction::SDiv:
- return SimplifySDivInst(LHS, RHS, Q, MaxRecurse);
+ return simplifySDivInst(LHS, RHS, Q, MaxRecurse);
case Instruction::UDiv:
- return SimplifyUDivInst(LHS, RHS, Q, MaxRecurse);
+ return simplifyUDivInst(LHS, RHS, Q, MaxRecurse);
case Instruction::SRem:
- return SimplifySRemInst(LHS, RHS, Q, MaxRecurse);
+ return simplifySRemInst(LHS, RHS, Q, MaxRecurse);
case Instruction::URem:
- return SimplifyURemInst(LHS, RHS, Q, MaxRecurse);
+ return simplifyURemInst(LHS, RHS, Q, MaxRecurse);
case Instruction::Shl:
- return SimplifyShlInst(LHS, RHS, false, false, Q, MaxRecurse);
+ return simplifyShlInst(LHS, RHS, false, false, Q, MaxRecurse);
case Instruction::LShr:
- return SimplifyLShrInst(LHS, RHS, false, Q, MaxRecurse);
+ return simplifyLShrInst(LHS, RHS, false, Q, MaxRecurse);
case Instruction::AShr:
- return SimplifyAShrInst(LHS, RHS, false, Q, MaxRecurse);
+ return simplifyAShrInst(LHS, RHS, false, Q, MaxRecurse);
case Instruction::And:
- return SimplifyAndInst(LHS, RHS, Q, MaxRecurse);
+ return simplifyAndInst(LHS, RHS, Q, MaxRecurse);
case Instruction::Or:
- return SimplifyOrInst(LHS, RHS, Q, MaxRecurse);
+ return simplifyOrInst(LHS, RHS, Q, MaxRecurse);
case Instruction::Xor:
- return SimplifyXorInst(LHS, RHS, Q, MaxRecurse);
+ return simplifyXorInst(LHS, RHS, Q, MaxRecurse);
case Instruction::FAdd:
- return SimplifyFAddInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
+ return simplifyFAddInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
case Instruction::FSub:
- return SimplifyFSubInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
+ return simplifyFSubInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
case Instruction::FMul:
- return SimplifyFMulInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
+ return simplifyFMulInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
case Instruction::FDiv:
- return SimplifyFDivInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
+ return simplifyFDivInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
case Instruction::FRem:
- return SimplifyFRemInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
+ return simplifyFRemInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
default:
llvm_unreachable("Unexpected opcode");
}
@@ -5433,49 +5532,50 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
/// Given operands for a BinaryOperator, see if we can fold the result.
/// If not, this returns null.
/// Try to use FastMathFlags when folding the result.
-static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+static Value *simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
const FastMathFlags &FMF, const SimplifyQuery &Q,
unsigned MaxRecurse) {
switch (Opcode) {
case Instruction::FAdd:
- return SimplifyFAddInst(LHS, RHS, FMF, Q, MaxRecurse);
+ return simplifyFAddInst(LHS, RHS, FMF, Q, MaxRecurse);
case Instruction::FSub:
- return SimplifyFSubInst(LHS, RHS, FMF, Q, MaxRecurse);
+ return simplifyFSubInst(LHS, RHS, FMF, Q, MaxRecurse);
case Instruction::FMul:
- return SimplifyFMulInst(LHS, RHS, FMF, Q, MaxRecurse);
+ return simplifyFMulInst(LHS, RHS, FMF, Q, MaxRecurse);
case Instruction::FDiv:
- return SimplifyFDivInst(LHS, RHS, FMF, Q, MaxRecurse);
+ return simplifyFDivInst(LHS, RHS, FMF, Q, MaxRecurse);
default:
- return SimplifyBinOp(Opcode, LHS, RHS, Q, MaxRecurse);
+ return simplifyBinOp(Opcode, LHS, RHS, Q, MaxRecurse);
}
}
-Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+Value *llvm::simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
const SimplifyQuery &Q) {
- return ::SimplifyBinOp(Opcode, LHS, RHS, Q, RecursionLimit);
+ return ::simplifyBinOp(Opcode, LHS, RHS, Q, RecursionLimit);
}
-Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+Value *llvm::simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
FastMathFlags FMF, const SimplifyQuery &Q) {
- return ::SimplifyBinOp(Opcode, LHS, RHS, FMF, Q, RecursionLimit);
+ return ::simplifyBinOp(Opcode, LHS, RHS, FMF, Q, RecursionLimit);
}
/// Given operands for a CmpInst, see if we can fold the result.
-static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+static Value *simplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
const SimplifyQuery &Q, unsigned MaxRecurse) {
if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate))
- return SimplifyICmpInst(Predicate, LHS, RHS, Q, MaxRecurse);
- return SimplifyFCmpInst(Predicate, LHS, RHS, FastMathFlags(), Q, MaxRecurse);
+ return simplifyICmpInst(Predicate, LHS, RHS, Q, MaxRecurse);
+ return simplifyFCmpInst(Predicate, LHS, RHS, FastMathFlags(), Q, MaxRecurse);
}
-Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+Value *llvm::simplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
const SimplifyQuery &Q) {
- return ::SimplifyCmpInst(Predicate, LHS, RHS, Q, RecursionLimit);
+ return ::simplifyCmpInst(Predicate, LHS, RHS, Q, RecursionLimit);
}
-static bool IsIdempotent(Intrinsic::ID ID) {
+static bool isIdempotent(Intrinsic::ID ID) {
switch (ID) {
- default: return false;
+ default:
+ return false;
// Unary idempotent: f(f(x)) = f(x)
case Intrinsic::fabs:
@@ -5491,7 +5591,7 @@ static bool IsIdempotent(Intrinsic::ID ID) {
}
}
-static Value *SimplifyRelativeLoad(Constant *Ptr, Constant *Offset,
+static Value *simplifyRelativeLoad(Constant *Ptr, Constant *Offset,
const DataLayout &DL) {
GlobalValue *PtrSym;
APInt PtrOffset;
@@ -5551,7 +5651,7 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0,
const SimplifyQuery &Q) {
// Idempotent functions return the same result when called repeatedly.
Intrinsic::ID IID = F->getIntrinsicID();
- if (IsIdempotent(IID))
+ if (isIdempotent(IID))
if (auto *II = dyn_cast<IntrinsicInst>(Op0))
if (II->getIntrinsicID() == IID)
return II;
@@ -5559,15 +5659,18 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0,
Value *X;
switch (IID) {
case Intrinsic::fabs:
- if (SignBitMustBeZero(Op0, Q.TLI)) return Op0;
+ if (SignBitMustBeZero(Op0, Q.TLI))
+ return Op0;
break;
case Intrinsic::bswap:
// bswap(bswap(x)) -> x
- if (match(Op0, m_BSwap(m_Value(X)))) return X;
+ if (match(Op0, m_BSwap(m_Value(X))))
+ return X;
break;
case Intrinsic::bitreverse:
// bitreverse(bitreverse(x)) -> x
- if (match(Op0, m_BitReverse(m_Value(X)))) return X;
+ if (match(Op0, m_BitReverse(m_Value(X))))
+ return X;
break;
case Intrinsic::ctpop: {
// If everything but the lowest bit is zero, that bit is the pop-count. Ex:
@@ -5581,30 +5684,34 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0,
case Intrinsic::exp:
// exp(log(x)) -> x
if (Q.CxtI->hasAllowReassoc() &&
- match(Op0, m_Intrinsic<Intrinsic::log>(m_Value(X)))) return X;
+ match(Op0, m_Intrinsic<Intrinsic::log>(m_Value(X))))
+ return X;
break;
case Intrinsic::exp2:
// exp2(log2(x)) -> x
if (Q.CxtI->hasAllowReassoc() &&
- match(Op0, m_Intrinsic<Intrinsic::log2>(m_Value(X)))) return X;
+ match(Op0, m_Intrinsic<Intrinsic::log2>(m_Value(X))))
+ return X;
break;
case Intrinsic::log:
// log(exp(x)) -> x
if (Q.CxtI->hasAllowReassoc() &&
- match(Op0, m_Intrinsic<Intrinsic::exp>(m_Value(X)))) return X;
+ match(Op0, m_Intrinsic<Intrinsic::exp>(m_Value(X))))
+ return X;
break;
case Intrinsic::log2:
// log2(exp2(x)) -> x
if (Q.CxtI->hasAllowReassoc() &&
(match(Op0, m_Intrinsic<Intrinsic::exp2>(m_Value(X))) ||
- match(Op0, m_Intrinsic<Intrinsic::pow>(m_SpecificFP(2.0),
- m_Value(X))))) return X;
+ match(Op0,
+ m_Intrinsic<Intrinsic::pow>(m_SpecificFP(2.0), m_Value(X)))))
+ return X;
break;
case Intrinsic::log10:
// log10(pow(10.0, x)) -> x
if (Q.CxtI->hasAllowReassoc() &&
- match(Op0, m_Intrinsic<Intrinsic::pow>(m_SpecificFP(10.0),
- m_Value(X)))) return X;
+ match(Op0, m_Intrinsic<Intrinsic::pow>(m_SpecificFP(10.0), m_Value(X))))
+ return X;
break;
case Intrinsic::floor:
case Intrinsic::trunc:
@@ -5826,7 +5933,7 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
case Intrinsic::load_relative:
if (auto *C0 = dyn_cast<Constant>(Op0))
if (auto *C1 = dyn_cast<Constant>(Op1))
- return SimplifyRelativeLoad(C0, C1, Q.DL);
+ return simplifyRelativeLoad(C0, C1, Q.DL);
break;
case Intrinsic::powi:
if (auto *Power = dyn_cast<ConstantInt>(Op1)) {
@@ -5853,7 +5960,8 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
case Intrinsic::maximum:
case Intrinsic::minimum: {
// If the arguments are the same, this is a no-op.
- if (Op0 == Op1) return Op0;
+ if (Op0 == Op1)
+ return Op0;
// Canonicalize constant operand as Op1.
if (isa<Constant>(Op0))
@@ -5906,14 +6014,14 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
break;
}
- case Intrinsic::experimental_vector_extract: {
+ case Intrinsic::vector_extract: {
Type *ReturnType = F->getReturnType();
// (extract_vector (insert_vector _, X, 0), 0) -> X
unsigned IdxN = cast<ConstantInt>(Op1)->getZExtValue();
Value *X = nullptr;
- if (match(Op0, m_Intrinsic<Intrinsic::experimental_vector_insert>(
- m_Value(), m_Value(X), m_Zero())) &&
+ if (match(Op0, m_Intrinsic<Intrinsic::vector_insert>(m_Value(), m_Value(X),
+ m_Zero())) &&
IdxN == 0 && X->getType() == ReturnType)
return X;
@@ -6054,7 +6162,7 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) {
return nullptr;
}
- case Intrinsic::experimental_vector_insert: {
+ case Intrinsic::vector_insert: {
Value *Vec = Call->getArgOperand(0);
Value *SubVec = Call->getArgOperand(1);
Value *Idx = Call->getArgOperand(2);
@@ -6064,8 +6172,8 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) {
// where: Y is X, or Y is undef
unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
Value *X = nullptr;
- if (match(SubVec, m_Intrinsic<Intrinsic::experimental_vector_extract>(
- m_Value(X), m_Zero())) &&
+ if (match(SubVec,
+ m_Intrinsic<Intrinsic::vector_extract>(m_Value(X), m_Zero())) &&
(Q.isUndefValue(Vec) || Vec == X) && IdxN == 0 &&
X->getType() == ReturnType)
return X;
@@ -6074,43 +6182,38 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) {
}
case Intrinsic::experimental_constrained_fadd: {
auto *FPI = cast<ConstrainedFPIntrinsic>(Call);
- return SimplifyFAddInst(FPI->getArgOperand(0), FPI->getArgOperand(1),
+ return simplifyFAddInst(FPI->getArgOperand(0), FPI->getArgOperand(1),
FPI->getFastMathFlags(), Q,
FPI->getExceptionBehavior().getValue(),
FPI->getRoundingMode().getValue());
- break;
}
case Intrinsic::experimental_constrained_fsub: {
auto *FPI = cast<ConstrainedFPIntrinsic>(Call);
- return SimplifyFSubInst(FPI->getArgOperand(0), FPI->getArgOperand(1),
+ return simplifyFSubInst(FPI->getArgOperand(0), FPI->getArgOperand(1),
FPI->getFastMathFlags(), Q,
FPI->getExceptionBehavior().getValue(),
FPI->getRoundingMode().getValue());
- break;
}
case Intrinsic::experimental_constrained_fmul: {
auto *FPI = cast<ConstrainedFPIntrinsic>(Call);
- return SimplifyFMulInst(FPI->getArgOperand(0), FPI->getArgOperand(1),
+ return simplifyFMulInst(FPI->getArgOperand(0), FPI->getArgOperand(1),
FPI->getFastMathFlags(), Q,
FPI->getExceptionBehavior().getValue(),
FPI->getRoundingMode().getValue());
- break;
}
case Intrinsic::experimental_constrained_fdiv: {
auto *FPI = cast<ConstrainedFPIntrinsic>(Call);
- return SimplifyFDivInst(FPI->getArgOperand(0), FPI->getArgOperand(1),
+ return simplifyFDivInst(FPI->getArgOperand(0), FPI->getArgOperand(1),
FPI->getFastMathFlags(), Q,
FPI->getExceptionBehavior().getValue(),
FPI->getRoundingMode().getValue());
- break;
}
case Intrinsic::experimental_constrained_frem: {
auto *FPI = cast<ConstrainedFPIntrinsic>(Call);
- return SimplifyFRemInst(FPI->getArgOperand(0), FPI->getArgOperand(1),
+ return simplifyFRemInst(FPI->getArgOperand(0), FPI->getArgOperand(1),
FPI->getFastMathFlags(), Q,
FPI->getExceptionBehavior().getValue(),
FPI->getRoundingMode().getValue());
- break;
}
default:
return nullptr;
@@ -6138,7 +6241,7 @@ static Value *tryConstantFoldCall(CallBase *Call, const SimplifyQuery &Q) {
return ConstantFoldCall(Call, F, ConstantArgs, Q.TLI);
}
-Value *llvm::SimplifyCall(CallBase *Call, const SimplifyQuery &Q) {
+Value *llvm::simplifyCall(CallBase *Call, const SimplifyQuery &Q) {
// musttail calls can only be simplified if they are also DCEd.
// As we can't guarantee this here, don't simplify them.
if (Call->isMustTailCall())
@@ -6161,8 +6264,17 @@ Value *llvm::SimplifyCall(CallBase *Call, const SimplifyQuery &Q) {
return nullptr;
}
+Value *llvm::simplifyConstrainedFPCall(CallBase *Call, const SimplifyQuery &Q) {
+ assert(isa<ConstrainedFPIntrinsic>(Call));
+ if (Value *V = tryConstantFoldCall(Call, Q))
+ return V;
+ if (Value *Ret = simplifyIntrinsic(Call, Q))
+ return Ret;
+ return nullptr;
+}
+
/// Given operands for a Freeze, see if we can fold the result.
-static Value *SimplifyFreezeInst(Value *Op0, const SimplifyQuery &Q) {
+static Value *simplifyFreezeInst(Value *Op0, const SimplifyQuery &Q) {
// Use a utility function defined in ValueTracking.
if (llvm::isGuaranteedNotToBeUndefOrPoison(Op0, Q.AC, Q.CxtI, Q.DT))
return Op0;
@@ -6170,11 +6282,11 @@ static Value *SimplifyFreezeInst(Value *Op0, const SimplifyQuery &Q) {
return nullptr;
}
-Value *llvm::SimplifyFreezeInst(Value *Op0, const SimplifyQuery &Q) {
- return ::SimplifyFreezeInst(Op0, Q);
+Value *llvm::simplifyFreezeInst(Value *Op0, const SimplifyQuery &Q) {
+ return ::simplifyFreezeInst(Op0, Q);
}
-static Value *SimplifyLoadInst(LoadInst *LI, Value *PtrOp,
+static Value *simplifyLoadInst(LoadInst *LI, Value *PtrOp,
const SimplifyQuery &Q) {
if (LI->isVolatile())
return nullptr;
@@ -6218,134 +6330,134 @@ static Value *simplifyInstructionWithOperands(Instruction *I,
}
break;
case Instruction::FNeg:
- Result = SimplifyFNegInst(NewOps[0], I->getFastMathFlags(), Q);
+ Result = simplifyFNegInst(NewOps[0], I->getFastMathFlags(), Q);
break;
case Instruction::FAdd:
- Result = SimplifyFAddInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q);
+ Result = simplifyFAddInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q);
break;
case Instruction::Add:
- Result = SimplifyAddInst(
+ Result = simplifyAddInst(
NewOps[0], NewOps[1], Q.IIQ.hasNoSignedWrap(cast<BinaryOperator>(I)),
Q.IIQ.hasNoUnsignedWrap(cast<BinaryOperator>(I)), Q);
break;
case Instruction::FSub:
- Result = SimplifyFSubInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q);
+ Result = simplifyFSubInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q);
break;
case Instruction::Sub:
- Result = SimplifySubInst(
+ Result = simplifySubInst(
NewOps[0], NewOps[1], Q.IIQ.hasNoSignedWrap(cast<BinaryOperator>(I)),
Q.IIQ.hasNoUnsignedWrap(cast<BinaryOperator>(I)), Q);
break;
case Instruction::FMul:
- Result = SimplifyFMulInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q);
+ Result = simplifyFMulInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q);
break;
case Instruction::Mul:
- Result = SimplifyMulInst(NewOps[0], NewOps[1], Q);
+ Result = simplifyMulInst(NewOps[0], NewOps[1], Q);
break;
case Instruction::SDiv:
- Result = SimplifySDivInst(NewOps[0], NewOps[1], Q);
+ Result = simplifySDivInst(NewOps[0], NewOps[1], Q);
break;
case Instruction::UDiv:
- Result = SimplifyUDivInst(NewOps[0], NewOps[1], Q);
+ Result = simplifyUDivInst(NewOps[0], NewOps[1], Q);
break;
case Instruction::FDiv:
- Result = SimplifyFDivInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q);
+ Result = simplifyFDivInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q);
break;
case Instruction::SRem:
- Result = SimplifySRemInst(NewOps[0], NewOps[1], Q);
+ Result = simplifySRemInst(NewOps[0], NewOps[1], Q);
break;
case Instruction::URem:
- Result = SimplifyURemInst(NewOps[0], NewOps[1], Q);
+ Result = simplifyURemInst(NewOps[0], NewOps[1], Q);
break;
case Instruction::FRem:
- Result = SimplifyFRemInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q);
+ Result = simplifyFRemInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q);
break;
case Instruction::Shl:
- Result = SimplifyShlInst(
+ Result = simplifyShlInst(
NewOps[0], NewOps[1], Q.IIQ.hasNoSignedWrap(cast<BinaryOperator>(I)),
Q.IIQ.hasNoUnsignedWrap(cast<BinaryOperator>(I)), Q);
break;
case Instruction::LShr:
- Result = SimplifyLShrInst(NewOps[0], NewOps[1],
+ Result = simplifyLShrInst(NewOps[0], NewOps[1],
Q.IIQ.isExact(cast<BinaryOperator>(I)), Q);
break;
case Instruction::AShr:
- Result = SimplifyAShrInst(NewOps[0], NewOps[1],
+ Result = simplifyAShrInst(NewOps[0], NewOps[1],
Q.IIQ.isExact(cast<BinaryOperator>(I)), Q);
break;
case Instruction::And:
- Result = SimplifyAndInst(NewOps[0], NewOps[1], Q);
+ Result = simplifyAndInst(NewOps[0], NewOps[1], Q);
break;
case Instruction::Or:
- Result = SimplifyOrInst(NewOps[0], NewOps[1], Q);
+ Result = simplifyOrInst(NewOps[0], NewOps[1], Q);
break;
case Instruction::Xor:
- Result = SimplifyXorInst(NewOps[0], NewOps[1], Q);
+ Result = simplifyXorInst(NewOps[0], NewOps[1], Q);
break;
case Instruction::ICmp:
- Result = SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(), NewOps[0],
+ Result = simplifyICmpInst(cast<ICmpInst>(I)->getPredicate(), NewOps[0],
NewOps[1], Q);
break;
case Instruction::FCmp:
- Result = SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(), NewOps[0],
+ Result = simplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(), NewOps[0],
NewOps[1], I->getFastMathFlags(), Q);
break;
case Instruction::Select:
- Result = SimplifySelectInst(NewOps[0], NewOps[1], NewOps[2], Q);
+ Result = simplifySelectInst(NewOps[0], NewOps[1], NewOps[2], Q);
break;
case Instruction::GetElementPtr: {
auto *GEPI = cast<GetElementPtrInst>(I);
Result =
- SimplifyGEPInst(GEPI->getSourceElementType(), NewOps[0],
+ simplifyGEPInst(GEPI->getSourceElementType(), NewOps[0],
makeArrayRef(NewOps).slice(1), GEPI->isInBounds(), Q);
break;
}
case Instruction::InsertValue: {
InsertValueInst *IV = cast<InsertValueInst>(I);
- Result = SimplifyInsertValueInst(NewOps[0], NewOps[1], IV->getIndices(), Q);
+ Result = simplifyInsertValueInst(NewOps[0], NewOps[1], IV->getIndices(), Q);
break;
}
case Instruction::InsertElement: {
- Result = SimplifyInsertElementInst(NewOps[0], NewOps[1], NewOps[2], Q);
+ Result = simplifyInsertElementInst(NewOps[0], NewOps[1], NewOps[2], Q);
break;
}
case Instruction::ExtractValue: {
auto *EVI = cast<ExtractValueInst>(I);
- Result = SimplifyExtractValueInst(NewOps[0], EVI->getIndices(), Q);
+ Result = simplifyExtractValueInst(NewOps[0], EVI->getIndices(), Q);
break;
}
case Instruction::ExtractElement: {
- Result = SimplifyExtractElementInst(NewOps[0], NewOps[1], Q);
+ Result = simplifyExtractElementInst(NewOps[0], NewOps[1], Q);
break;
}
case Instruction::ShuffleVector: {
auto *SVI = cast<ShuffleVectorInst>(I);
- Result = SimplifyShuffleVectorInst(
+ Result = simplifyShuffleVectorInst(
NewOps[0], NewOps[1], SVI->getShuffleMask(), SVI->getType(), Q);
break;
}
case Instruction::PHI:
- Result = SimplifyPHINode(cast<PHINode>(I), NewOps, Q);
+ Result = simplifyPHINode(cast<PHINode>(I), NewOps, Q);
break;
case Instruction::Call: {
// TODO: Use NewOps
- Result = SimplifyCall(cast<CallInst>(I), Q);
+ Result = simplifyCall(cast<CallInst>(I), Q);
break;
}
case Instruction::Freeze:
- Result = llvm::SimplifyFreezeInst(NewOps[0], Q);
+ Result = llvm::simplifyFreezeInst(NewOps[0], Q);
break;
#define HANDLE_CAST_INST(num, opc, clas) case Instruction::opc:
#include "llvm/IR/Instruction.def"
#undef HANDLE_CAST_INST
- Result = SimplifyCastInst(I->getOpcode(), NewOps[0], I->getType(), Q);
+ Result = simplifyCastInst(I->getOpcode(), NewOps[0], I->getType(), Q);
break;
case Instruction::Alloca:
// No simplifications for Alloca and it can't be constant folded.
Result = nullptr;
break;
case Instruction::Load:
- Result = SimplifyLoadInst(cast<LoadInst>(I), NewOps[0], Q);
+ Result = simplifyLoadInst(cast<LoadInst>(I), NewOps[0], Q);
break;
}
@@ -6355,7 +6467,7 @@ static Value *simplifyInstructionWithOperands(Instruction *I,
return Result == I ? UndefValue::get(I->getType()) : Result;
}
-Value *llvm::SimplifyInstructionWithOperands(Instruction *I,
+Value *llvm::simplifyInstructionWithOperands(Instruction *I,
ArrayRef<Value *> NewOps,
const SimplifyQuery &SQ,
OptimizationRemarkEmitter *ORE) {
@@ -6364,7 +6476,7 @@ Value *llvm::SimplifyInstructionWithOperands(Instruction *I,
return ::simplifyInstructionWithOperands(I, NewOps, SQ, ORE);
}
-Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ,
+Value *llvm::simplifyInstruction(Instruction *I, const SimplifyQuery &SQ,
OptimizationRemarkEmitter *ORE) {
SmallVector<Value *, 8> Ops(I->operands());
return ::simplifyInstructionWithOperands(I, Ops, SQ, ORE);
@@ -6415,7 +6527,7 @@ static bool replaceAndRecursivelySimplifyImpl(
I = Worklist[Idx];
// See if this instruction simplifies.
- SimpleV = SimplifyInstruction(I, {DL, TLI, DT, AC});
+ SimpleV = simplifyInstruction(I, {DL, TLI, DT, AC});
if (!SimpleV) {
if (UnsimplifiedUsers)
UnsimplifiedUsers->insert(I);
@@ -6478,6 +6590,6 @@ const SimplifyQuery getBestSimplifyQuery(AnalysisManager<T, TArgs...> &AM,
}
template const SimplifyQuery getBestSimplifyQuery(AnalysisManager<Function> &,
Function &);
-}
+} // namespace llvm
void InstSimplifyFolder::anchor() {}
diff --git a/llvm/lib/Analysis/Interval.cpp b/llvm/lib/Analysis/Interval.cpp
index e228ec4f2126..f7fffcb3d5e6 100644
--- a/llvm/lib/Analysis/Interval.cpp
+++ b/llvm/lib/Analysis/Interval.cpp
@@ -13,7 +13,6 @@
#include "llvm/Analysis/Interval.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CFG.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/llvm/lib/Analysis/LazyCallGraph.cpp b/llvm/lib/Analysis/LazyCallGraph.cpp
index e8e9593d7030..20a905e04a9d 100644
--- a/llvm/lib/Analysis/LazyCallGraph.cpp
+++ b/llvm/lib/Analysis/LazyCallGraph.cpp
@@ -9,14 +9,13 @@
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/Sequence.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Analysis/VectorUtils.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/InstIterator.h"
@@ -30,12 +29,15 @@
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
-#include <cstddef>
#include <iterator>
#include <string>
#include <tuple>
#include <utility>
+#ifdef EXPENSIVE_CHECKS
+#include "llvm/ADT/ScopeExit.h"
+#endif
+
using namespace llvm;
#define DEBUG_TYPE "lcg"
diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp
index e311b40ab25c..8a8e9e923b7c 100644
--- a/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -38,7 +38,6 @@
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/raw_ostream.h"
-#include <map>
using namespace llvm;
using namespace PatternMatch;
@@ -919,7 +918,7 @@ Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValueCast(
// transfer rule on the full set since we may be able to locally infer
// interesting facts.
Optional<ConstantRange> LHSRes = getRangeFor(CI->getOperand(0), CI, BB);
- if (!LHSRes.hasValue())
+ if (!LHSRes)
// More work to do before applying this transfer rule.
return None;
const ConstantRange &LHSRange = LHSRes.getValue();
@@ -943,7 +942,7 @@ Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValueBinaryOpImpl(
// @foo()), 32"
Optional<ConstantRange> LHSRes = getRangeFor(I->getOperand(0), I, BB);
Optional<ConstantRange> RHSRes = getRangeFor(I->getOperand(1), I, BB);
- if (!LHSRes.hasValue() || !RHSRes.hasValue())
+ if (!LHSRes || !RHSRes)
// More work to do before applying this transfer rule.
return None;
@@ -956,13 +955,6 @@ Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValueBinaryOp(
BinaryOperator *BO, BasicBlock *BB) {
assert(BO->getOperand(0)->getType()->isSized() &&
"all operands to binary operators are sized");
- if (BO->getOpcode() == Instruction::Xor) {
- // Xor is the only operation not supported by ConstantRange::binaryOp().
- LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName()
- << "' - overdefined (unknown binary operator).\n");
- return ValueLatticeElement::getOverdefined();
- }
-
if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(BO)) {
unsigned NoWrapKind = 0;
if (OBO->hasNoUnsignedWrap())
@@ -1020,7 +1012,7 @@ Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValueExtractValue(
// Handle extractvalue of insertvalue to allow further simplification
// based on replaced with.overflow intrinsics.
- if (Value *V = SimplifyExtractValueInst(
+ if (Value *V = simplifyExtractValueInst(
EVI->getAggregateOperand(), EVI->getIndices(),
EVI->getModule()->getDataLayout()))
return getBlockValue(V, BB, EVI);
@@ -1141,7 +1133,7 @@ static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI,
ConstantRange CR = ConstantRange::makeExactICmpRegion(EdgePred, *C);
if (!CR.isEmptySet())
return ValueLatticeElement::getRange(ConstantRange::getNonEmpty(
- CR.getUnsignedMin().zextOrSelf(BitWidth), APInt(BitWidth, 0)));
+ CR.getUnsignedMin().zext(BitWidth), APInt(BitWidth, 0)));
}
return ValueLatticeElement::getOverdefined();
@@ -1278,7 +1270,7 @@ static ValueLatticeElement constantFoldUser(User *Usr, Value *Op,
if (auto *CI = dyn_cast<CastInst>(Usr)) {
assert(CI->getOperand(0) == Op && "Operand 0 isn't Op");
if (auto *C = dyn_cast_or_null<ConstantInt>(
- SimplifyCastInst(CI->getOpcode(), OpConst,
+ simplifyCastInst(CI->getOpcode(), OpConst,
CI->getDestTy(), DL))) {
return ValueLatticeElement::getRange(ConstantRange(C->getValue()));
}
@@ -1290,7 +1282,7 @@ static ValueLatticeElement constantFoldUser(User *Usr, Value *Op,
Value *LHS = Op0Match ? OpConst : BO->getOperand(0);
Value *RHS = Op1Match ? OpConst : BO->getOperand(1);
if (auto *C = dyn_cast_or_null<ConstantInt>(
- SimplifyBinOp(BO->getOpcode(), LHS, RHS, DL))) {
+ simplifyBinOp(BO->getOpcode(), LHS, RHS, DL))) {
return ValueLatticeElement::getRange(ConstantRange(C->getValue()));
}
} else if (isa<FreezeInst>(Usr)) {
@@ -1361,7 +1353,7 @@ static Optional<ValueLatticeElement> getEdgeValueLocal(Value *Val,
ValueLatticeElement OpLatticeVal =
getValueFromCondition(Op, Condition, isTrueDest);
if (Optional<APInt> OpConst = OpLatticeVal.asConstantInteger()) {
- Result = constantFoldUser(Usr, Op, OpConst.getValue(), DL);
+ Result = constantFoldUser(Usr, Op, *OpConst, DL);
break;
}
}
@@ -1432,8 +1424,9 @@ Optional<ValueLatticeElement> LazyValueInfoImpl::getEdgeValue(
if (Constant *VC = dyn_cast<Constant>(Val))
return ValueLatticeElement::get(VC);
- ValueLatticeElement LocalResult = getEdgeValueLocal(Val, BBFrom, BBTo)
- .getValueOr(ValueLatticeElement::getOverdefined());
+ ValueLatticeElement LocalResult =
+ getEdgeValueLocal(Val, BBFrom, BBTo)
+ .value_or(ValueLatticeElement::getOverdefined());
if (hasSingleValue(LocalResult))
// Can't get any more precise here
return LocalResult;
@@ -1886,6 +1879,11 @@ void LazyValueInfo::eraseBlock(BasicBlock *BB) {
}
}
+void LazyValueInfo::clear(const Module *M) {
+ if (PImpl) {
+ getImpl(PImpl, AC, M).clear();
+ }
+}
void LazyValueInfo::printLVI(Function &F, DominatorTree &DTree, raw_ostream &OS) {
if (PImpl) {
diff --git a/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp b/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp
index 031bf3bae51d..491d44335f22 100644
--- a/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp
+++ b/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp
@@ -68,6 +68,7 @@
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetTransformInfo.h"
diff --git a/llvm/lib/Analysis/Lint.cpp b/llvm/lib/Analysis/Lint.cpp
index f9a7a5bdf434..9cfb91a22b7d 100644
--- a/llvm/lib/Analysis/Lint.cpp
+++ b/llvm/lib/Analysis/Lint.cpp
@@ -44,7 +44,6 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryLocation.h"
-#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
@@ -69,9 +68,7 @@
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/KnownBits.h"
-#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstdint>
@@ -169,8 +166,8 @@ public:
};
} // end anonymous namespace
-// Assert - We know that cond should be true, if not print an error message.
-#define Assert(C, ...) \
+// Check - We know that cond should be true, if not print an error message.
+#define Check(C, ...) \
do { \
if (!(C)) { \
CheckFailed(__VA_ARGS__); \
@@ -181,8 +178,8 @@ public:
void Lint::visitFunction(Function &F) {
// This isn't undefined behavior, it's just a little unusual, and it's a
// fairly common mistake to neglect to name a function.
- Assert(F.hasName() || F.hasLocalLinkage(),
- "Unusual: Unnamed function with non-local linkage", &F);
+ Check(F.hasName() || F.hasLocalLinkage(),
+ "Unusual: Unnamed function with non-local linkage", &F);
// TODO: Check for irreducible control flow.
}
@@ -195,23 +192,23 @@ void Lint::visitCallBase(CallBase &I) {
if (Function *F = dyn_cast<Function>(findValue(Callee,
/*OffsetOk=*/false))) {
- Assert(I.getCallingConv() == F->getCallingConv(),
- "Undefined behavior: Caller and callee calling convention differ",
- &I);
+ Check(I.getCallingConv() == F->getCallingConv(),
+ "Undefined behavior: Caller and callee calling convention differ",
+ &I);
FunctionType *FT = F->getFunctionType();
unsigned NumActualArgs = I.arg_size();
- Assert(FT->isVarArg() ? FT->getNumParams() <= NumActualArgs
- : FT->getNumParams() == NumActualArgs,
- "Undefined behavior: Call argument count mismatches callee "
- "argument count",
- &I);
+ Check(FT->isVarArg() ? FT->getNumParams() <= NumActualArgs
+ : FT->getNumParams() == NumActualArgs,
+ "Undefined behavior: Call argument count mismatches callee "
+ "argument count",
+ &I);
- Assert(FT->getReturnType() == I.getType(),
- "Undefined behavior: Call return type mismatches "
- "callee return type",
- &I);
+ Check(FT->getReturnType() == I.getType(),
+ "Undefined behavior: Call return type mismatches "
+ "callee return type",
+ &I);
// Check argument types (in case the callee was casted) and attributes.
// TODO: Verify that caller and callee attributes are compatible.
@@ -221,10 +218,10 @@ void Lint::visitCallBase(CallBase &I) {
Value *Actual = *AI;
if (PI != PE) {
Argument *Formal = &*PI++;
- Assert(Formal->getType() == Actual->getType(),
- "Undefined behavior: Call argument type mismatches "
- "callee parameter type",
- &I);
+ Check(Formal->getType() == Actual->getType(),
+ "Undefined behavior: Call argument type mismatches "
+ "callee parameter type",
+ &I);
// Check that noalias arguments don't alias other arguments. This is
// not fully precise because we don't know the sizes of the dereferenced
@@ -242,9 +239,9 @@ void Lint::visitCallBase(CallBase &I) {
continue;
if (AI != BI && (*BI)->getType()->isPointerTy()) {
AliasResult Result = AA->alias(*AI, *BI);
- Assert(Result != AliasResult::MustAlias &&
- Result != AliasResult::PartialAlias,
- "Unusual: noalias argument aliases another argument", &I);
+ Check(Result != AliasResult::MustAlias &&
+ Result != AliasResult::PartialAlias,
+ "Unusual: noalias argument aliases another argument", &I);
}
}
}
@@ -271,10 +268,10 @@ void Lint::visitCallBase(CallBase &I) {
if (PAL.hasParamAttr(ArgNo++, Attribute::ByVal))
continue;
Value *Obj = findValue(Arg, /*OffsetOk=*/true);
- Assert(!isa<AllocaInst>(Obj),
- "Undefined behavior: Call with \"tail\" keyword references "
- "alloca",
- &I);
+ Check(!isa<AllocaInst>(Obj),
+ "Undefined behavior: Call with \"tail\" keyword references "
+ "alloca",
+ &I);
}
}
}
@@ -302,9 +299,9 @@ void Lint::visitCallBase(CallBase &I) {
/*OffsetOk=*/false)))
if (Len->getValue().isIntN(32))
Size = LocationSize::precise(Len->getValue().getZExtValue());
- Assert(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) !=
- AliasResult::MustAlias,
- "Undefined behavior: memcpy source and destination overlap", &I);
+ Check(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) !=
+ AliasResult::MustAlias,
+ "Undefined behavior: memcpy source and destination overlap", &I);
break;
}
case Intrinsic::memcpy_inline: {
@@ -319,9 +316,9 @@ void Lint::visitCallBase(CallBase &I) {
// isn't expressive enough for what we really want to do. Known partial
// overlap is not distinguished from the case where nothing is known.
const LocationSize LS = LocationSize::precise(Size);
- Assert(AA->alias(MCII->getSource(), LS, MCII->getDest(), LS) !=
- AliasResult::MustAlias,
- "Undefined behavior: memcpy source and destination overlap", &I);
+ Check(AA->alias(MCII->getSource(), LS, MCII->getDest(), LS) !=
+ AliasResult::MustAlias,
+ "Undefined behavior: memcpy source and destination overlap", &I);
break;
}
case Intrinsic::memmove: {
@@ -338,11 +335,17 @@ void Lint::visitCallBase(CallBase &I) {
MSI->getDestAlign(), nullptr, MemRef::Write);
break;
}
+ case Intrinsic::memset_inline: {
+ MemSetInlineInst *MSII = cast<MemSetInlineInst>(&I);
+ visitMemoryReference(I, MemoryLocation::getForDest(MSII),
+ MSII->getDestAlign(), nullptr, MemRef::Write);
+ break;
+ }
case Intrinsic::vastart:
- Assert(I.getParent()->getParent()->isVarArg(),
- "Undefined behavior: va_start called in a non-varargs function",
- &I);
+ Check(I.getParent()->getParent()->isVarArg(),
+ "Undefined behavior: va_start called in a non-varargs function",
+ &I);
visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI), None,
nullptr, MemRef::Read | MemRef::Write);
@@ -367,20 +370,22 @@ void Lint::visitCallBase(CallBase &I) {
break;
case Intrinsic::get_active_lane_mask:
if (auto *TripCount = dyn_cast<ConstantInt>(I.getArgOperand(1)))
- Assert(!TripCount->isZero(), "get_active_lane_mask: operand #2 "
- "must be greater than 0", &I);
+ Check(!TripCount->isZero(),
+ "get_active_lane_mask: operand #2 "
+ "must be greater than 0",
+ &I);
break;
}
}
void Lint::visitReturnInst(ReturnInst &I) {
Function *F = I.getParent()->getParent();
- Assert(!F->doesNotReturn(),
- "Unusual: Return statement in function with noreturn attribute", &I);
+ Check(!F->doesNotReturn(),
+ "Unusual: Return statement in function with noreturn attribute", &I);
if (Value *V = I.getReturnValue()) {
Value *Obj = findValue(V, /*OffsetOk=*/true);
- Assert(!isa<AllocaInst>(Obj), "Unusual: Returning alloca value", &I);
+ Check(!isa<AllocaInst>(Obj), "Unusual: Returning alloca value", &I);
}
}
@@ -395,39 +400,39 @@ void Lint::visitMemoryReference(Instruction &I, const MemoryLocation &Loc,
Value *Ptr = const_cast<Value *>(Loc.Ptr);
Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true);
- Assert(!isa<ConstantPointerNull>(UnderlyingObject),
- "Undefined behavior: Null pointer dereference", &I);
- Assert(!isa<UndefValue>(UnderlyingObject),
- "Undefined behavior: Undef pointer dereference", &I);
- Assert(!isa<ConstantInt>(UnderlyingObject) ||
- !cast<ConstantInt>(UnderlyingObject)->isMinusOne(),
- "Unusual: All-ones pointer dereference", &I);
- Assert(!isa<ConstantInt>(UnderlyingObject) ||
- !cast<ConstantInt>(UnderlyingObject)->isOne(),
- "Unusual: Address one pointer dereference", &I);
+ Check(!isa<ConstantPointerNull>(UnderlyingObject),
+ "Undefined behavior: Null pointer dereference", &I);
+ Check(!isa<UndefValue>(UnderlyingObject),
+ "Undefined behavior: Undef pointer dereference", &I);
+ Check(!isa<ConstantInt>(UnderlyingObject) ||
+ !cast<ConstantInt>(UnderlyingObject)->isMinusOne(),
+ "Unusual: All-ones pointer dereference", &I);
+ Check(!isa<ConstantInt>(UnderlyingObject) ||
+ !cast<ConstantInt>(UnderlyingObject)->isOne(),
+ "Unusual: Address one pointer dereference", &I);
if (Flags & MemRef::Write) {
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(UnderlyingObject))
- Assert(!GV->isConstant(), "Undefined behavior: Write to read-only memory",
- &I);
- Assert(!isa<Function>(UnderlyingObject) &&
- !isa<BlockAddress>(UnderlyingObject),
- "Undefined behavior: Write to text section", &I);
+ Check(!GV->isConstant(), "Undefined behavior: Write to read-only memory",
+ &I);
+ Check(!isa<Function>(UnderlyingObject) &&
+ !isa<BlockAddress>(UnderlyingObject),
+ "Undefined behavior: Write to text section", &I);
}
if (Flags & MemRef::Read) {
- Assert(!isa<Function>(UnderlyingObject), "Unusual: Load from function body",
- &I);
- Assert(!isa<BlockAddress>(UnderlyingObject),
- "Undefined behavior: Load from block address", &I);
+ Check(!isa<Function>(UnderlyingObject), "Unusual: Load from function body",
+ &I);
+ Check(!isa<BlockAddress>(UnderlyingObject),
+ "Undefined behavior: Load from block address", &I);
}
if (Flags & MemRef::Callee) {
- Assert(!isa<BlockAddress>(UnderlyingObject),
- "Undefined behavior: Call to block address", &I);
+ Check(!isa<BlockAddress>(UnderlyingObject),
+ "Undefined behavior: Call to block address", &I);
}
if (Flags & MemRef::Branchee) {
- Assert(!isa<Constant>(UnderlyingObject) ||
- isa<BlockAddress>(UnderlyingObject),
- "Undefined behavior: Branch to non-blockaddress", &I);
+ Check(!isa<Constant>(UnderlyingObject) ||
+ isa<BlockAddress>(UnderlyingObject),
+ "Undefined behavior: Branch to non-blockaddress", &I);
}
// Check for buffer overflows and misalignment.
@@ -461,17 +466,17 @@ void Lint::visitMemoryReference(Instruction &I, const MemoryLocation &Loc,
// Accesses from before the start or after the end of the object are not
// defined.
- Assert(!Loc.Size.hasValue() || BaseSize == MemoryLocation::UnknownSize ||
- (Offset >= 0 && Offset + Loc.Size.getValue() <= BaseSize),
- "Undefined behavior: Buffer overflow", &I);
+ Check(!Loc.Size.hasValue() || BaseSize == MemoryLocation::UnknownSize ||
+ (Offset >= 0 && Offset + Loc.Size.getValue() <= BaseSize),
+ "Undefined behavior: Buffer overflow", &I);
// Accesses that say that the memory is more aligned than it is are not
// defined.
if (!Align && Ty && Ty->isSized())
Align = DL->getABITypeAlign(Ty);
if (BaseAlign && Align)
- Assert(*Align <= commonAlignment(*BaseAlign, Offset),
- "Undefined behavior: Memory reference address is misaligned", &I);
+ Check(*Align <= commonAlignment(*BaseAlign, Offset),
+ "Undefined behavior: Memory reference address is misaligned", &I);
}
}
@@ -486,34 +491,34 @@ void Lint::visitStoreInst(StoreInst &I) {
}
void Lint::visitXor(BinaryOperator &I) {
- Assert(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)),
- "Undefined result: xor(undef, undef)", &I);
+ Check(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)),
+ "Undefined result: xor(undef, undef)", &I);
}
void Lint::visitSub(BinaryOperator &I) {
- Assert(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)),
- "Undefined result: sub(undef, undef)", &I);
+ Check(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)),
+ "Undefined result: sub(undef, undef)", &I);
}
void Lint::visitLShr(BinaryOperator &I) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(1),
/*OffsetOk=*/false)))
- Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
- "Undefined result: Shift count out of range", &I);
+ Check(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
+ "Undefined result: Shift count out of range", &I);
}
void Lint::visitAShr(BinaryOperator &I) {
if (ConstantInt *CI =
dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
- Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
- "Undefined result: Shift count out of range", &I);
+ Check(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
+ "Undefined result: Shift count out of range", &I);
}
void Lint::visitShl(BinaryOperator &I) {
if (ConstantInt *CI =
dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
- Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
- "Undefined result: Shift count out of range", &I);
+ Check(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
+ "Undefined result: Shift count out of range", &I);
}
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT,
@@ -554,30 +559,30 @@ static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT,
}
void Lint::visitSDiv(BinaryOperator &I) {
- Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
- "Undefined behavior: Division by zero", &I);
+ Check(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
+ "Undefined behavior: Division by zero", &I);
}
void Lint::visitUDiv(BinaryOperator &I) {
- Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
- "Undefined behavior: Division by zero", &I);
+ Check(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
+ "Undefined behavior: Division by zero", &I);
}
void Lint::visitSRem(BinaryOperator &I) {
- Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
- "Undefined behavior: Division by zero", &I);
+ Check(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
+ "Undefined behavior: Division by zero", &I);
}
void Lint::visitURem(BinaryOperator &I) {
- Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
- "Undefined behavior: Division by zero", &I);
+ Check(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
+ "Undefined behavior: Division by zero", &I);
}
void Lint::visitAllocaInst(AllocaInst &I) {
if (isa<ConstantInt>(I.getArraySize()))
// This isn't undefined behavior, it's just an obvious pessimization.
- Assert(&I.getParent()->getParent()->getEntryBlock() == I.getParent(),
- "Pessimization: Static alloca outside of entry block", &I);
+ Check(&I.getParent()->getParent()->getEntryBlock() == I.getParent(),
+ "Pessimization: Static alloca outside of entry block", &I);
// TODO: Check for an unusual size (MSB set?)
}
@@ -591,14 +596,14 @@ void Lint::visitIndirectBrInst(IndirectBrInst &I) {
visitMemoryReference(I, MemoryLocation::getAfter(I.getAddress()), None,
nullptr, MemRef::Branchee);
- Assert(I.getNumDestinations() != 0,
- "Undefined behavior: indirectbr with no destinations", &I);
+ Check(I.getNumDestinations() != 0,
+ "Undefined behavior: indirectbr with no destinations", &I);
}
void Lint::visitExtractElementInst(ExtractElementInst &I) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getIndexOperand(),
/*OffsetOk=*/false)))
- Assert(
+ Check(
CI->getValue().ult(
cast<FixedVectorType>(I.getVectorOperandType())->getNumElements()),
"Undefined result: extractelement index out of range", &I);
@@ -607,18 +612,18 @@ void Lint::visitExtractElementInst(ExtractElementInst &I) {
void Lint::visitInsertElementInst(InsertElementInst &I) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(2),
/*OffsetOk=*/false)))
- Assert(CI->getValue().ult(
- cast<FixedVectorType>(I.getType())->getNumElements()),
- "Undefined result: insertelement index out of range", &I);
+ Check(CI->getValue().ult(
+ cast<FixedVectorType>(I.getType())->getNumElements()),
+ "Undefined result: insertelement index out of range", &I);
}
void Lint::visitUnreachableInst(UnreachableInst &I) {
// This isn't undefined behavior, it's merely suspicious.
- Assert(&I == &I.getParent()->front() ||
- std::prev(I.getIterator())->mayHaveSideEffects(),
- "Unusual: unreachable immediately preceded by instruction without "
- "side effects",
- &I);
+ Check(&I == &I.getParent()->front() ||
+ std::prev(I.getIterator())->mayHaveSideEffects(),
+ "Unusual: unreachable immediately preceded by instruction without "
+ "side effects",
+ &I);
}
/// findValue - Look through bitcasts and simple memory reference patterns
@@ -681,17 +686,12 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
CE->getOperand(0)->getType(), CE->getType(),
*DL))
return findValueImpl(CE->getOperand(0), OffsetOk, Visited);
- } else if (CE->getOpcode() == Instruction::ExtractValue) {
- ArrayRef<unsigned> Indices = CE->getIndices();
- if (Value *W = FindInsertedValue(CE->getOperand(0), Indices))
- if (W != V)
- return findValueImpl(W, OffsetOk, Visited);
}
}
// As a last resort, try SimplifyInstruction or constant folding.
if (Instruction *Inst = dyn_cast<Instruction>(V)) {
- if (Value *W = SimplifyInstruction(Inst, {*DL, TLI, DT, AC}))
+ if (Value *W = simplifyInstruction(Inst, {*DL, TLI, DT, AC}))
return findValueImpl(W, OffsetOk, Visited);
} else if (auto *C = dyn_cast<Constant>(V)) {
Value *W = ConstantFoldConstant(C, *DL, TLI);
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index cd0d4d6b9ca8..bc1d82cf1480 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -13,19 +13,14 @@
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumeBundleQueries.h"
-#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/GlobalAlias.h"
-#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
@@ -509,8 +504,8 @@ static Value *getAvailableLoadStore(Instruction *Inst, const Value *Ptr,
if (CastInst::isBitOrNoopPointerCastable(Val->getType(), AccessTy, DL))
return Val;
- TypeSize StoreSize = DL.getTypeStoreSize(Val->getType());
- TypeSize LoadSize = DL.getTypeStoreSize(AccessTy);
+ TypeSize StoreSize = DL.getTypeSizeInBits(Val->getType());
+ TypeSize LoadSize = DL.getTypeSizeInBits(AccessTy);
if (TypeSize::isKnownLE(LoadSize, StoreSize))
if (auto *C = dyn_cast<Constant>(Val))
return ConstantFoldLoadFromConst(C, AccessTy, DL);
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 2ab78d2b7ee2..79161db9b5e4 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -47,6 +47,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
@@ -60,12 +61,12 @@
#include <algorithm>
#include <cassert>
#include <cstdint>
-#include <cstdlib>
#include <iterator>
#include <utility>
#include <vector>
using namespace llvm;
+using namespace llvm::PatternMatch;
#define DEBUG_TYPE "loop-accesses"
@@ -172,7 +173,8 @@ RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup(
: High(RtCheck.Pointers[Index].End), Low(RtCheck.Pointers[Index].Start),
AddressSpace(RtCheck.Pointers[Index]
.PointerValue->getType()
- ->getPointerAddressSpace()) {
+ ->getPointerAddressSpace()),
+ NeedsFreeze(RtCheck.Pointers[Index].NeedsFreeze) {
Members.push_back(Index);
}
@@ -189,21 +191,20 @@ RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup(
///
/// There is no conflict when the intervals are disjoint:
/// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
-void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
+void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,
+ Type *AccessTy, bool WritePtr,
unsigned DepSetId, unsigned ASId,
- const ValueToValueMap &Strides,
- PredicatedScalarEvolution &PSE) {
- // Get the stride replaced scev.
- const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
+ PredicatedScalarEvolution &PSE,
+ bool NeedsFreeze) {
ScalarEvolution *SE = PSE.getSE();
const SCEV *ScStart;
const SCEV *ScEnd;
- if (SE->isLoopInvariant(Sc, Lp)) {
- ScStart = ScEnd = Sc;
+ if (SE->isLoopInvariant(PtrExpr, Lp)) {
+ ScStart = ScEnd = PtrExpr;
} else {
- const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrExpr);
assert(AR && "Invalid addrec expression");
const SCEV *Ex = PSE.getBackedgeTakenCount();
@@ -227,15 +228,100 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
// Add the size of the pointed element to ScEnd.
auto &DL = Lp->getHeader()->getModule()->getDataLayout();
Type *IdxTy = DL.getIndexType(Ptr->getType());
- const SCEV *EltSizeSCEV =
- SE->getStoreSizeOfExpr(IdxTy, Ptr->getType()->getPointerElementType());
+ const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy);
ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
- Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc);
+ Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, PtrExpr,
+ NeedsFreeze);
}
-SmallVector<RuntimePointerCheck, 4>
-RuntimePointerChecking::generateChecks() const {
+void RuntimePointerChecking::tryToCreateDiffCheck(
+ const RuntimeCheckingPtrGroup &CGI, const RuntimeCheckingPtrGroup &CGJ) {
+ if (!CanUseDiffCheck)
+ return;
+
+ // If either group contains multiple different pointers, bail out.
+ // TODO: Support multiple pointers by using the minimum or maximum pointer,
+ // depending on src & sink.
+ if (CGI.Members.size() != 1 || CGJ.Members.size() != 1) {
+ CanUseDiffCheck = false;
+ return;
+ }
+
+ PointerInfo *Src = &Pointers[CGI.Members[0]];
+ PointerInfo *Sink = &Pointers[CGJ.Members[0]];
+
+ // If either pointer is read and written, multiple checks may be needed. Bail
+ // out.
+ if (!DC.getOrderForAccess(Src->PointerValue, !Src->IsWritePtr).empty() ||
+ !DC.getOrderForAccess(Sink->PointerValue, !Sink->IsWritePtr).empty()) {
+ CanUseDiffCheck = false;
+ return;
+ }
+
+ ArrayRef<unsigned> AccSrc =
+ DC.getOrderForAccess(Src->PointerValue, Src->IsWritePtr);
+ ArrayRef<unsigned> AccSink =
+ DC.getOrderForAccess(Sink->PointerValue, Sink->IsWritePtr);
+ // If either pointer is accessed multiple times, there may not be a clear
+ // src/sink relation. Bail out for now.
+ if (AccSrc.size() != 1 || AccSink.size() != 1) {
+ CanUseDiffCheck = false;
+ return;
+ }
+ // If the sink is accessed before src, swap src/sink.
+ if (AccSink[0] < AccSrc[0])
+ std::swap(Src, Sink);
+
+ auto *SrcAR = dyn_cast<SCEVAddRecExpr>(Src->Expr);
+ auto *SinkAR = dyn_cast<SCEVAddRecExpr>(Sink->Expr);
+ if (!SrcAR || !SinkAR) {
+ CanUseDiffCheck = false;
+ return;
+ }
+
+ const DataLayout &DL =
+ SinkAR->getLoop()->getHeader()->getModule()->getDataLayout();
+ SmallVector<Instruction *, 4> SrcInsts =
+ DC.getInstructionsForAccess(Src->PointerValue, Src->IsWritePtr);
+ SmallVector<Instruction *, 4> SinkInsts =
+ DC.getInstructionsForAccess(Sink->PointerValue, Sink->IsWritePtr);
+ Type *SrcTy = getLoadStoreType(SrcInsts[0]);
+ Type *DstTy = getLoadStoreType(SinkInsts[0]);
+ if (isa<ScalableVectorType>(SrcTy) || isa<ScalableVectorType>(DstTy))
+ return;
+ unsigned AllocSize =
+ std::max(DL.getTypeAllocSize(SrcTy), DL.getTypeAllocSize(DstTy));
+ IntegerType *IntTy =
+ IntegerType::get(Src->PointerValue->getContext(),
+ DL.getPointerSizeInBits(CGI.AddressSpace));
+
+ // Only matching constant steps matching the AllocSize are supported at the
+ // moment. This simplifies the difference computation. Can be extended in the
+ // future.
+ auto *Step = dyn_cast<SCEVConstant>(SinkAR->getStepRecurrence(*SE));
+ if (!Step || Step != SrcAR->getStepRecurrence(*SE) ||
+ Step->getAPInt().abs() != AllocSize) {
+ CanUseDiffCheck = false;
+ return;
+ }
+
+ // When counting down, the dependence distance needs to be swapped.
+ if (Step->getValue()->isNegative())
+ std::swap(SinkAR, SrcAR);
+
+ const SCEV *SinkStartInt = SE->getPtrToIntExpr(SinkAR->getStart(), IntTy);
+ const SCEV *SrcStartInt = SE->getPtrToIntExpr(SrcAR->getStart(), IntTy);
+ if (isa<SCEVCouldNotCompute>(SinkStartInt) ||
+ isa<SCEVCouldNotCompute>(SrcStartInt)) {
+ CanUseDiffCheck = false;
+ return;
+ }
+ DiffChecks.emplace_back(SrcStartInt, SinkStartInt, AllocSize,
+ Src->NeedsFreeze || Sink->NeedsFreeze);
+}
+
+SmallVector<RuntimePointerCheck, 4> RuntimePointerChecking::generateChecks() {
SmallVector<RuntimePointerCheck, 4> Checks;
for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
@@ -243,8 +329,10 @@ RuntimePointerChecking::generateChecks() const {
const RuntimeCheckingPtrGroup &CGI = CheckingGroups[I];
const RuntimeCheckingPtrGroup &CGJ = CheckingGroups[J];
- if (needsChecking(CGI, CGJ))
+ if (needsChecking(CGI, CGJ)) {
+ tryToCreateDiffCheck(CGI, CGJ);
Checks.push_back(std::make_pair(&CGI, &CGJ));
+ }
}
}
return Checks;
@@ -285,11 +373,12 @@ bool RuntimeCheckingPtrGroup::addPointer(unsigned Index,
return addPointer(
Index, RtCheck.Pointers[Index].Start, RtCheck.Pointers[Index].End,
RtCheck.Pointers[Index].PointerValue->getType()->getPointerAddressSpace(),
- *RtCheck.SE);
+ RtCheck.Pointers[Index].NeedsFreeze, *RtCheck.SE);
}
bool RuntimeCheckingPtrGroup::addPointer(unsigned Index, const SCEV *Start,
const SCEV *End, unsigned AS,
+ bool NeedsFreeze,
ScalarEvolution &SE) {
assert(AddressSpace == AS &&
"all pointers in a checking group must be in the same address space");
@@ -314,6 +403,7 @@ bool RuntimeCheckingPtrGroup::addPointer(unsigned Index, const SCEV *Start,
High = End;
Members.push_back(Index);
+ this->NeedsFreeze |= NeedsFreeze;
return true;
}
@@ -371,9 +461,11 @@ void RuntimePointerChecking::groupChecks(
unsigned TotalComparisons = 0;
- DenseMap<Value *, unsigned> PositionMap;
- for (unsigned Index = 0; Index < Pointers.size(); ++Index)
- PositionMap[Pointers[Index].PointerValue] = Index;
+ DenseMap<Value *, SmallVector<unsigned>> PositionMap;
+ for (unsigned Index = 0; Index < Pointers.size(); ++Index) {
+ auto Iter = PositionMap.insert({Pointers[Index].PointerValue, {}});
+ Iter.first->second.push_back(Index);
+ }
// We need to keep track of what pointers we've already seen so we
// don't process them twice.
@@ -404,34 +496,35 @@ void RuntimePointerChecking::groupChecks(
auto PointerI = PositionMap.find(MI->getPointer());
assert(PointerI != PositionMap.end() &&
"pointer in equivalence class not found in PositionMap");
- unsigned Pointer = PointerI->second;
- bool Merged = false;
- // Mark this pointer as seen.
- Seen.insert(Pointer);
-
- // Go through all the existing sets and see if we can find one
- // which can include this pointer.
- for (RuntimeCheckingPtrGroup &Group : Groups) {
- // Don't perform more than a certain amount of comparisons.
- // This should limit the cost of grouping the pointers to something
- // reasonable. If we do end up hitting this threshold, the algorithm
- // will create separate groups for all remaining pointers.
- if (TotalComparisons > MemoryCheckMergeThreshold)
- break;
-
- TotalComparisons++;
-
- if (Group.addPointer(Pointer, *this)) {
- Merged = true;
- break;
+ for (unsigned Pointer : PointerI->second) {
+ bool Merged = false;
+ // Mark this pointer as seen.
+ Seen.insert(Pointer);
+
+ // Go through all the existing sets and see if we can find one
+ // which can include this pointer.
+ for (RuntimeCheckingPtrGroup &Group : Groups) {
+ // Don't perform more than a certain amount of comparisons.
+ // This should limit the cost of grouping the pointers to something
+ // reasonable. If we do end up hitting this threshold, the algorithm
+ // will create separate groups for all remaining pointers.
+ if (TotalComparisons > MemoryCheckMergeThreshold)
+ break;
+
+ TotalComparisons++;
+
+ if (Group.addPointer(Pointer, *this)) {
+ Merged = true;
+ break;
+ }
}
- }
- if (!Merged)
- // We couldn't add this pointer to any existing set or the threshold
- // for the number of comparisons has been reached. Create a new group
- // to hold the current pointer.
- Groups.push_back(RuntimeCheckingPtrGroup(Pointer, *this));
+ if (!Merged)
+ // We couldn't add this pointer to any existing set or the threshold
+ // for the number of comparisons has been reached. Create a new group
+ // to hold the current pointer.
+ Groups.push_back(RuntimeCheckingPtrGroup(Pointer, *this));
+ }
}
// We've computed the grouped checks for this partition.
@@ -522,19 +615,19 @@ public:
: TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA), PSE(PSE) {}
/// Register a load and whether it is only read from.
- void addLoad(MemoryLocation &Loc, bool IsReadOnly) {
+ void addLoad(MemoryLocation &Loc, Type *AccessTy, bool IsReadOnly) {
Value *Ptr = const_cast<Value*>(Loc.Ptr);
AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags);
- Accesses.insert(MemAccessInfo(Ptr, false));
+ Accesses[MemAccessInfo(Ptr, false)].insert(AccessTy);
if (IsReadOnly)
ReadOnlyPtr.insert(Ptr);
}
/// Register a store.
- void addStore(MemoryLocation &Loc) {
+ void addStore(MemoryLocation &Loc, Type *AccessTy) {
Value *Ptr = const_cast<Value*>(Loc.Ptr);
AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags);
- Accesses.insert(MemAccessInfo(Ptr, true));
+ Accesses[MemAccessInfo(Ptr, true)].insert(AccessTy);
}
/// Check if we can emit a run-time no-alias check for \p Access.
@@ -545,12 +638,11 @@ public:
/// we will attempt to use additional run-time checks in order to get
/// the bounds of the pointer.
bool createCheckForAccess(RuntimePointerChecking &RtCheck,
- MemAccessInfo Access,
+ MemAccessInfo Access, Type *AccessTy,
const ValueToValueMap &Strides,
DenseMap<Value *, unsigned> &DepSetId,
Loop *TheLoop, unsigned &RunningDepId,
- unsigned ASId, bool ShouldCheckStride,
- bool Assume);
+ unsigned ASId, bool ShouldCheckStride, bool Assume);
/// Check whether we can check the pointers at runtime for
/// non-intersection.
@@ -559,7 +651,7 @@ public:
/// (i.e. the pointers have computable bounds).
bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE,
Loop *TheLoop, const ValueToValueMap &Strides,
- bool ShouldCheckWrap = false);
+ Value *&UncomputablePtr, bool ShouldCheckWrap = false);
/// Goes over all memory accesses, checks whether a RT check is needed
/// and builds sets of dependent accesses.
@@ -583,14 +675,15 @@ public:
MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; }
private:
- typedef SetVector<MemAccessInfo> PtrAccessSet;
+ typedef MapVector<MemAccessInfo, SmallSetVector<Type *, 1>> PtrAccessMap;
/// Go over all memory access and check whether runtime pointer checks
/// are needed and build sets of dependency check candidates.
void processMemAccesses();
- /// Set of all accesses.
- PtrAccessSet Accesses;
+ /// Map of all accesses. Values are the types used to access memory pointed to
+ /// by the pointer.
+ PtrAccessMap Accesses;
/// The loop being checked.
const Loop *TheLoop;
@@ -630,11 +723,8 @@ private:
/// Check whether a pointer can participate in a runtime bounds check.
/// If \p Assume, try harder to prove that we can compute the bounds of \p Ptr
/// by adding run-time checks (overflow checks) if necessary.
-static bool hasComputableBounds(PredicatedScalarEvolution &PSE,
- const ValueToValueMap &Strides, Value *Ptr,
- Loop *L, bool Assume) {
- const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
-
+static bool hasComputableBounds(PredicatedScalarEvolution &PSE, Value *Ptr,
+ const SCEV *PtrScev, Loop *L, bool Assume) {
// The bounds for loop-invariant pointer is trivial.
if (PSE.getSE()->isLoopInvariant(PtrScev, L))
return true;
@@ -652,12 +742,12 @@ static bool hasComputableBounds(PredicatedScalarEvolution &PSE,
/// Check whether a pointer address cannot wrap.
static bool isNoWrap(PredicatedScalarEvolution &PSE,
- const ValueToValueMap &Strides, Value *Ptr, Loop *L) {
+ const ValueToValueMap &Strides, Value *Ptr, Type *AccessTy,
+ Loop *L) {
const SCEV *PtrScev = PSE.getSCEV(Ptr);
if (PSE.getSE()->isLoopInvariant(PtrScev, L))
return true;
- Type *AccessTy = Ptr->getType()->getPointerElementType();
int64_t Stride = getPtrStride(PSE, AccessTy, Ptr, L, Strides);
if (Stride == 1 || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW))
return true;
@@ -689,7 +779,7 @@ static void visitPointers(Value *StartPtr, const Loop &InnermostLoop,
}
bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
- MemAccessInfo Access,
+ MemAccessInfo Access, Type *AccessTy,
const ValueToValueMap &StridesMap,
DenseMap<Value *, unsigned> &DepSetId,
Loop *TheLoop, unsigned &RunningDepId,
@@ -697,42 +787,75 @@ bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
bool Assume) {
Value *Ptr = Access.getPointer();
- if (!hasComputableBounds(PSE, StridesMap, Ptr, TheLoop, Assume))
- return false;
+ ScalarEvolution &SE = *PSE.getSE();
+ SmallVector<std::pair<const SCEV *, bool>> TranslatedPtrs;
+ auto *SI = dyn_cast<SelectInst>(Ptr);
+ // Look through selects in the current loop.
+ if (SI && !TheLoop->isLoopInvariant(SI)) {
+ TranslatedPtrs = {
+ std::make_pair(SE.getSCEV(SI->getOperand(1)),
+ !isGuaranteedNotToBeUndefOrPoison(SI->getOperand(1))),
+ std::make_pair(SE.getSCEV(SI->getOperand(2)),
+ !isGuaranteedNotToBeUndefOrPoison(SI->getOperand(2)))};
+ } else
+ TranslatedPtrs = {
+ std::make_pair(replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr), false)};
- // When we run after a failing dependency check we have to make sure
- // we don't have wrapping pointers.
- if (ShouldCheckWrap && !isNoWrap(PSE, StridesMap, Ptr, TheLoop)) {
- auto *Expr = PSE.getSCEV(Ptr);
- if (!Assume || !isa<SCEVAddRecExpr>(Expr))
+ for (auto &P : TranslatedPtrs) {
+ const SCEV *PtrExpr = P.first;
+ if (!hasComputableBounds(PSE, Ptr, PtrExpr, TheLoop, Assume))
return false;
- PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
+
+ // When we run after a failing dependency check we have to make sure
+ // we don't have wrapping pointers.
+ if (ShouldCheckWrap) {
+ // Skip wrap checking when translating pointers.
+ if (TranslatedPtrs.size() > 1)
+ return false;
+
+ if (!isNoWrap(PSE, StridesMap, Ptr, AccessTy, TheLoop)) {
+ auto *Expr = PSE.getSCEV(Ptr);
+ if (!Assume || !isa<SCEVAddRecExpr>(Expr))
+ return false;
+ PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
+ }
+ }
+ // If there's only one option for Ptr, look it up after bounds and wrap
+ // checking, because assumptions might have been added to PSE.
+ if (TranslatedPtrs.size() == 1)
+ TranslatedPtrs[0] = std::make_pair(
+ replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr), false);
}
- // The id of the dependence set.
- unsigned DepId;
+ for (auto &P : TranslatedPtrs) {
+ const SCEV *PtrExpr = P.first;
- if (isDependencyCheckNeeded()) {
- Value *Leader = DepCands.getLeaderValue(Access).getPointer();
- unsigned &LeaderId = DepSetId[Leader];
- if (!LeaderId)
- LeaderId = RunningDepId++;
- DepId = LeaderId;
- } else
- // Each access has its own dependence set.
- DepId = RunningDepId++;
+ // The id of the dependence set.
+ unsigned DepId;
- bool IsWrite = Access.getInt();
- RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE);
- LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
+ if (isDependencyCheckNeeded()) {
+ Value *Leader = DepCands.getLeaderValue(Access).getPointer();
+ unsigned &LeaderId = DepSetId[Leader];
+ if (!LeaderId)
+ LeaderId = RunningDepId++;
+ DepId = LeaderId;
+ } else
+ // Each access has its own dependence set.
+ DepId = RunningDepId++;
+
+ bool IsWrite = Access.getInt();
+ RtCheck.insert(TheLoop, Ptr, PtrExpr, AccessTy, IsWrite, DepId, ASId, PSE,
+ P.second);
+ LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
+ }
return true;
- }
+}
bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
ScalarEvolution *SE, Loop *TheLoop,
const ValueToValueMap &StridesMap,
- bool ShouldCheckWrap) {
+ Value *&UncomputablePtr, bool ShouldCheckWrap) {
// Find pointers with computable bounds. We are going to use this information
// to place a runtime bound check.
bool CanDoRT = true;
@@ -788,12 +911,15 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
}
for (auto &Access : AccessInfos) {
- if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, TheLoop,
- RunningDepId, ASId, ShouldCheckWrap, false)) {
- LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:"
- << *Access.getPointer() << '\n');
- Retries.push_back(Access);
- CanDoAliasSetRT = false;
+ for (auto &AccessTy : Accesses[Access]) {
+ if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
+ DepSetId, TheLoop, RunningDepId, ASId,
+ ShouldCheckWrap, false)) {
+ LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:"
+ << *Access.getPointer() << '\n');
+ Retries.push_back(Access);
+ CanDoAliasSetRT = false;
+ }
}
}
@@ -815,13 +941,17 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
// We know that we need these checks, so we can now be more aggressive
// and add further checks if required (overflow checks).
CanDoAliasSetRT = true;
- for (auto Access : Retries)
- if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId,
- TheLoop, RunningDepId, ASId,
- ShouldCheckWrap, /*Assume=*/true)) {
- CanDoAliasSetRT = false;
- break;
+ for (auto Access : Retries) {
+ for (auto &AccessTy : Accesses[Access]) {
+ if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
+ DepSetId, TheLoop, RunningDepId, ASId,
+ ShouldCheckWrap, /*Assume=*/true)) {
+ CanDoAliasSetRT = false;
+ UncomputablePtr = Access.getPointer();
+ break;
+ }
}
+ }
}
CanDoRT &= CanDoAliasSetRT;
@@ -886,9 +1016,12 @@ void AccessAnalysis::processMemAccesses() {
LLVM_DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n");
LLVM_DEBUG({
for (auto A : Accesses)
- dbgs() << "\t" << *A.getPointer() << " (" <<
- (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ?
- "read-only" : "read")) << ")\n";
+ dbgs() << "\t" << *A.first.getPointer() << " ("
+ << (A.first.getInt()
+ ? "write"
+ : (ReadOnlyPtr.count(A.first.getPointer()) ? "read-only"
+ : "read"))
+ << ")\n";
});
// The AliasSetTracker has nicely partitioned our pointers by metadata
@@ -907,13 +1040,13 @@ void AccessAnalysis::processMemAccesses() {
UnderlyingObjToAccessMap ObjToLastAccess;
// Set of access to check after all writes have been processed.
- PtrAccessSet DeferredAccesses;
+ PtrAccessMap DeferredAccesses;
// Iterate over each alias set twice, once to process read/write pointers,
// and then to process read-only pointers.
for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
bool UseDeferred = SetIteration > 0;
- PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
+ PtrAccessMap &S = UseDeferred ? DeferredAccesses : Accesses;
for (const auto &AV : AS) {
Value *Ptr = AV.getValue();
@@ -921,10 +1054,10 @@ void AccessAnalysis::processMemAccesses() {
// For a single memory access in AliasSetTracker, Accesses may contain
// both read and write, and they both need to be handled for CheckDeps.
for (const auto &AC : S) {
- if (AC.getPointer() != Ptr)
+ if (AC.first.getPointer() != Ptr)
continue;
- bool IsWrite = AC.getInt();
+ bool IsWrite = AC.first.getInt();
// If we're using the deferred access set, then it contains only
// reads.
@@ -946,7 +1079,9 @@ void AccessAnalysis::processMemAccesses() {
// consecutive as "read-only" pointers (so that we check
// "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
if (!UseDeferred && IsReadOnlyPtr) {
- DeferredAccesses.insert(Access);
+ // We only use the pointer keys, the types vector values don't
+ // matter.
+ DeferredAccesses.insert({Access, {}});
continue;
}
@@ -1445,13 +1580,13 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
const SCEV *CastedDist = &Dist;
const SCEV *CastedProduct = Product;
- uint64_t DistTypeSize = DL.getTypeAllocSize(Dist.getType());
- uint64_t ProductTypeSize = DL.getTypeAllocSize(Product->getType());
+ uint64_t DistTypeSizeBits = DL.getTypeSizeInBits(Dist.getType());
+ uint64_t ProductTypeSizeBits = DL.getTypeSizeInBits(Product->getType());
// The dependence distance can be positive/negative, so we sign extend Dist;
// The multiplication of the absolute stride in bytes and the
// backedgeTakenCount is non-negative, so we zero extend Product.
- if (DistTypeSize > ProductTypeSize)
+ if (DistTypeSizeBits > ProductTypeSizeBits)
CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType());
else
CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType());
@@ -1518,8 +1653,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
Value *BPtr = B.getPointer();
bool AIsWrite = A.getInt();
bool BIsWrite = B.getInt();
- Type *ATy = APtr->getType()->getPointerElementType();
- Type *BTy = BPtr->getType()->getPointerElementType();
+ Type *ATy = getLoadStoreType(InstMap[AIdx]);
+ Type *BTy = getLoadStoreType(InstMap[BIdx]);
// Two reads are independent.
if (!AIsWrite && !BIsWrite)
@@ -1842,8 +1977,6 @@ bool LoopAccessInfo::canAnalyzeLoop() {
void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
const TargetLibraryInfo *TLI,
DominatorTree *DT) {
- typedef SmallPtrSet<Value*, 16> ValueSet;
-
// Holds the Load and Store instructions.
SmallVector<LoadInst *, 16> Loads;
SmallVector<StoreInst *, 16> Stores;
@@ -1975,22 +2108,26 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
// for read and once for write, it will only appear once (on the write
// list). This is okay, since we are going to check for conflicts between
// writes and between reads and writes, but not between reads and reads.
- ValueSet Seen;
+ SmallSet<std::pair<Value *, Type *>, 16> Seen;
// Record uniform store addresses to identify if we have multiple stores
// to the same address.
- ValueSet UniformStores;
+ SmallPtrSet<Value *, 16> UniformStores;
for (StoreInst *ST : Stores) {
Value *Ptr = ST->getPointerOperand();
- if (isUniform(Ptr))
+ if (isUniform(Ptr)) {
+ // Record store instructions to loop invariant addresses
+ StoresToInvariantAddresses.push_back(ST);
HasDependenceInvolvingLoopInvariantAddress |=
!UniformStores.insert(Ptr).second;
+ }
// If we did *not* see this pointer before, insert it to the read-write
// list. At this phase it is only a 'write' list.
- if (Seen.insert(Ptr).second) {
+ Type *AccessTy = getLoadStoreType(ST);
+ if (Seen.insert({Ptr, AccessTy}).second) {
++NumReadWrites;
MemoryLocation Loc = MemoryLocation::get(ST);
@@ -2001,9 +2138,9 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
Loc.AATags.TBAA = nullptr;
visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
- [&Accesses, Loc](Value *Ptr) {
+ [&Accesses, AccessTy, Loc](Value *Ptr) {
MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
- Accesses.addStore(NewLoc);
+ Accesses.addStore(NewLoc, AccessTy);
});
}
}
@@ -2027,7 +2164,8 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
// read a few words, modify, and write a few words, and some of the
// words may be written to the same address.
bool IsReadOnlyPtr = false;
- if (Seen.insert(Ptr).second ||
+ Type *AccessTy = getLoadStoreType(LD);
+ if (Seen.insert({Ptr, AccessTy}).second ||
!getPtrStride(*PSE, LD->getType(), Ptr, TheLoop, SymbolicStrides)) {
++NumReads;
IsReadOnlyPtr = true;
@@ -2049,9 +2187,9 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
Loc.AATags.TBAA = nullptr;
visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
- [&Accesses, Loc, IsReadOnlyPtr](Value *Ptr) {
+ [&Accesses, AccessTy, Loc, IsReadOnlyPtr](Value *Ptr) {
MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
- Accesses.addLoad(NewLoc, IsReadOnlyPtr);
+ Accesses.addLoad(NewLoc, AccessTy, IsReadOnlyPtr);
});
}
@@ -2069,10 +2207,14 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
// Find pointers with computable bounds. We are going to use this information
// to place a runtime bound check.
- bool CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(),
- TheLoop, SymbolicStrides);
+ Value *UncomputablePtr = nullptr;
+ bool CanDoRTIfNeeded =
+ Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(), TheLoop,
+ SymbolicStrides, UncomputablePtr, false);
if (!CanDoRTIfNeeded) {
- recordAnalysis("CantIdentifyArrayBounds") << "cannot identify array bounds";
+ auto *I = dyn_cast_or_null<Instruction>(UncomputablePtr);
+ recordAnalysis("CantIdentifyArrayBounds", I)
+ << "cannot identify array bounds";
LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
<< "the array bounds.\n");
CanVecMem = false;
@@ -2099,12 +2241,14 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
PtrRtChecking->Need = true;
auto *SE = PSE->getSE();
- CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, SE, TheLoop,
- SymbolicStrides, true);
+ UncomputablePtr = nullptr;
+ CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(
+ *PtrRtChecking, SE, TheLoop, SymbolicStrides, UncomputablePtr, true);
// Check that we found the bounds for the pointer.
if (!CanDoRTIfNeeded) {
- recordAnalysis("CantCheckMemDepsAtRunTime")
+ auto *I = dyn_cast_or_null<Instruction>(UncomputablePtr);
+ recordAnalysis("CantCheckMemDepsAtRunTime", I)
<< "cannot check memory dependencies at runtime";
LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
CanVecMem = false;
@@ -2129,13 +2273,61 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
<< (PtrRtChecking->Need ? "" : " don't")
<< " need runtime memory checks.\n");
- else {
- recordAnalysis("UnsafeMemDep")
- << "unsafe dependent memory operations in loop. Use "
- "#pragma loop distribute(enable) to allow loop distribution "
- "to attempt to isolate the offending operations into a separate "
- "loop";
- LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
+ else
+ emitUnsafeDependenceRemark();
+}
+
+void LoopAccessInfo::emitUnsafeDependenceRemark() {
+ auto Deps = getDepChecker().getDependences();
+ if (!Deps)
+ return;
+ auto Found = std::find_if(
+ Deps->begin(), Deps->end(), [](const MemoryDepChecker::Dependence &D) {
+ return MemoryDepChecker::Dependence::isSafeForVectorization(D.Type) !=
+ MemoryDepChecker::VectorizationSafetyStatus::Safe;
+ });
+ if (Found == Deps->end())
+ return;
+ MemoryDepChecker::Dependence Dep = *Found;
+
+ LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
+
+ // Emit remark for first unsafe dependence
+ OptimizationRemarkAnalysis &R =
+ recordAnalysis("UnsafeDep", Dep.getDestination(*this))
+ << "unsafe dependent memory operations in loop. Use "
+ "#pragma loop distribute(enable) to allow loop distribution "
+ "to attempt to isolate the offending operations into a separate "
+ "loop";
+
+ switch (Dep.Type) {
+ case MemoryDepChecker::Dependence::NoDep:
+ case MemoryDepChecker::Dependence::Forward:
+ case MemoryDepChecker::Dependence::BackwardVectorizable:
+ llvm_unreachable("Unexpected dependence");
+ case MemoryDepChecker::Dependence::Backward:
+ R << "\nBackward loop carried data dependence.";
+ break;
+ case MemoryDepChecker::Dependence::ForwardButPreventsForwarding:
+ R << "\nForward loop carried data dependence that prevents "
+ "store-to-load forwarding.";
+ break;
+ case MemoryDepChecker::Dependence::BackwardVectorizableButPreventsForwarding:
+ R << "\nBackward loop carried data dependence that prevents "
+ "store-to-load forwarding.";
+ break;
+ case MemoryDepChecker::Dependence::Unknown:
+ R << "\nUnknown data dependence.";
+ break;
+ }
+
+ if (Instruction *I = Dep.getSource(*this)) {
+ DebugLoc SourceLoc = I->getDebugLoc();
+ if (auto *DD = dyn_cast_or_null<Instruction>(getPointerOperand(I)))
+ SourceLoc = DD->getDebugLoc();
+ if (SourceLoc)
+ R << " Memory location is the same as accessed at "
+ << ore::NV("Location", SourceLoc);
}
}
@@ -2212,12 +2404,12 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
// The Stride can be positive/negative, so we sign extend Stride;
// The backedgeTakenCount is non-negative, so we zero extend BETakenCount.
const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
- uint64_t StrideTypeSize = DL.getTypeAllocSize(StrideExpr->getType());
- uint64_t BETypeSize = DL.getTypeAllocSize(BETakenCount->getType());
+ uint64_t StrideTypeSizeBits = DL.getTypeSizeInBits(StrideExpr->getType());
+ uint64_t BETypeSizeBits = DL.getTypeSizeInBits(BETakenCount->getType());
const SCEV *CastedStride = StrideExpr;
const SCEV *CastedBECount = BETakenCount;
ScalarEvolution *SE = PSE->getSE();
- if (BETypeSize >= StrideTypeSize)
+ if (BETypeSizeBits >= StrideTypeSizeBits)
CastedStride = SE->getNoopOrSignExtend(StrideExpr, BETakenCount->getType());
else
CastedBECount = SE->getZeroExtendExpr(BETakenCount, StrideExpr->getType());
@@ -2232,7 +2424,7 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
"at most once.\n");
return;
}
- LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version.");
+ LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version.\n");
SymbolicStrides[Ptr] = Stride;
StrideSet.insert(Stride);
@@ -2242,10 +2434,12 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
const TargetLibraryInfo *TLI, AAResults *AA,
DominatorTree *DT, LoopInfo *LI)
: PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)),
- PtrRtChecking(std::make_unique<RuntimePointerChecking>(SE)),
+ PtrRtChecking(nullptr),
DepChecker(std::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L) {
- if (canAnalyzeLoop())
+ PtrRtChecking = std::make_unique<RuntimePointerChecking>(*DepChecker, SE);
+ if (canAnalyzeLoop()) {
analyzeLoop(AA, LI, TLI, DT);
+ }
}
void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
@@ -2283,7 +2477,7 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
<< "found in loop.\n";
OS.indent(Depth) << "SCEV assumptions:\n";
- PSE->getUnionPredicate().print(OS, Depth);
+ PSE->getPredicate().print(OS, Depth);
OS << "\n";
@@ -2301,7 +2495,7 @@ const LoopAccessInfo &LoopAccessLegacyAnalysis::getInfo(Loop *L) {
if (!LAI)
LAI = std::make_unique<LoopAccessInfo>(L, SE, TLI, AA, DT, LI);
- return *LAI.get();
+ return *LAI;
}
void LoopAccessLegacyAnalysis::print(raw_ostream &OS, const Module *M) const {
diff --git a/llvm/lib/Analysis/LoopAnalysisManager.cpp b/llvm/lib/Analysis/LoopAnalysisManager.cpp
index 4d6f8a64329a..8d71b31ca393 100644
--- a/llvm/lib/Analysis/LoopAnalysisManager.cpp
+++ b/llvm/lib/Analysis/LoopAnalysisManager.cpp
@@ -8,12 +8,9 @@
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/BasicAliasAnalysis.h"
-#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/PassManagerImpl.h"
diff --git a/llvm/lib/Analysis/LoopCacheAnalysis.cpp b/llvm/lib/Analysis/LoopCacheAnalysis.cpp
index ba014bd08c98..2cbf1f7f2d28 100644
--- a/llvm/lib/Analysis/LoopCacheAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopCacheAnalysis.cpp
@@ -103,14 +103,24 @@ static bool isOneDimensionalArray(const SCEV &AccessFn, const SCEV &ElemSize,
return StepRec == &ElemSize;
}
-/// Compute the trip count for the given loop \p L. Return the SCEV expression
-/// for the trip count or nullptr if it cannot be computed.
-static const SCEV *computeTripCount(const Loop &L, ScalarEvolution &SE) {
+/// Compute the trip count for the given loop \p L or assume a default value if
+/// it is not a compile time constant. Return the SCEV expression for the trip
+/// count.
+static const SCEV *computeTripCount(const Loop &L, const SCEV &ElemSize,
+ ScalarEvolution &SE) {
const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(&L);
- if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
- !isa<SCEVConstant>(BackedgeTakenCount))
- return nullptr;
- return SE.getTripCountFromExitCount(BackedgeTakenCount);
+ const SCEV *TripCount = (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) &&
+ isa<SCEVConstant>(BackedgeTakenCount))
+ ? SE.getTripCountFromExitCount(BackedgeTakenCount)
+ : nullptr;
+
+ if (!TripCount) {
+ LLVM_DEBUG(dbgs() << "Trip count of loop " << L.getName()
+ << " could not be computed, using DefaultTripCount\n");
+ TripCount = SE.getConstant(ElemSize.getType(), DefaultTripCount);
+ }
+
+ return TripCount;
}
//===----------------------------------------------------------------------===//
@@ -274,22 +284,18 @@ CacheCostTy IndexedReference::computeRefCost(const Loop &L,
return 1;
}
- const SCEV *TripCount = computeTripCount(L, SE);
- if (!TripCount) {
- LLVM_DEBUG(dbgs() << "Trip count of loop " << L.getName()
- << " could not be computed, using DefaultTripCount\n");
- const SCEV *ElemSize = Sizes.back();
- TripCount = SE.getConstant(ElemSize->getType(), DefaultTripCount);
- }
+ const SCEV *TripCount = computeTripCount(L, *Sizes.back(), SE);
+ assert(TripCount && "Expecting valid TripCount");
LLVM_DEBUG(dbgs() << "TripCount=" << *TripCount << "\n");
- // If the indexed reference is 'consecutive' the cost is
- // (TripCount*Stride)/CLS, otherwise the cost is TripCount.
- const SCEV *RefCost = TripCount;
-
+ const SCEV *RefCost = nullptr;
if (isConsecutive(L, CLS)) {
+ // If the indexed reference is 'consecutive' the cost is
+ // (TripCount*Stride)/CLS.
const SCEV *Coeff = getLastCoefficient();
const SCEV *ElemSize = Sizes.back();
+ assert(Coeff->getType() == ElemSize->getType() &&
+ "Expecting the same type");
const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize);
Type *WiderType = SE.getWiderType(Stride->getType(), TripCount->getType());
const SCEV *CacheLineSize = SE.getConstant(WiderType, CLS);
@@ -303,10 +309,33 @@ CacheCostTy IndexedReference::computeRefCost(const Loop &L,
LLVM_DEBUG(dbgs().indent(4)
<< "Access is consecutive: RefCost=(TripCount*Stride)/CLS="
<< *RefCost << "\n");
- } else
+ } else {
+ // If the indexed reference is not 'consecutive' the cost is proportional to
+ // the trip count and the depth of the dimension which the subject loop
+ // subscript is accessing. We try to estimate this by multiplying the cost
+ // by the trip counts of loops corresponding to the inner dimensions. For
+ // example, given the indexed reference 'A[i][j][k]', and assuming the
+ // i-loop is in the innermost position, the cost would be equal to the
+ // iterations of the i-loop multiplied by iterations of the j-loop.
+ RefCost = TripCount;
+
+ int Index = getSubscriptIndex(L);
+ assert(Index >= 0 && "Cound not locate a valid Index");
+
+ for (unsigned I = Index + 1; I < getNumSubscripts() - 1; ++I) {
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(getSubscript(I));
+ assert(AR && AR->getLoop() && "Expecting valid loop");
+ const SCEV *TripCount =
+ computeTripCount(*AR->getLoop(), *Sizes.back(), SE);
+ Type *WiderType = SE.getWiderType(RefCost->getType(), TripCount->getType());
+ RefCost = SE.getMulExpr(SE.getNoopOrAnyExtend(RefCost, WiderType),
+ SE.getNoopOrAnyExtend(TripCount, WiderType));
+ }
+
LLVM_DEBUG(dbgs().indent(4)
- << "Access is not consecutive: RefCost=TripCount=" << *RefCost
- << "\n");
+ << "Access is not consecutive: RefCost=" << *RefCost << "\n");
+ }
+ assert(RefCost && "Expecting a valid RefCost");
// Attempt to fold RefCost into a constant.
if (auto ConstantCost = dyn_cast<SCEVConstant>(RefCost))
@@ -319,6 +348,26 @@ CacheCostTy IndexedReference::computeRefCost(const Loop &L,
return CacheCost::InvalidCost;
}
+bool IndexedReference::tryDelinearizeFixedSize(
+ const SCEV *AccessFn, SmallVectorImpl<const SCEV *> &Subscripts) {
+ SmallVector<int, 4> ArraySizes;
+ if (!tryDelinearizeFixedSizeImpl(&SE, &StoreOrLoadInst, AccessFn, Subscripts,
+ ArraySizes))
+ return false;
+
+ // Populate Sizes with scev expressions to be used in calculations later.
+ for (auto Idx : seq<unsigned>(1, Subscripts.size()))
+ Sizes.push_back(
+ SE.getConstant(Subscripts[Idx]->getType(), ArraySizes[Idx - 1]));
+
+ LLVM_DEBUG({
+ dbgs() << "Delinearized subscripts of fixed-size array\n"
+ << "GEP:" << *getLoadStorePointerOperand(&StoreOrLoadInst)
+ << "\n";
+ });
+ return true;
+}
+
bool IndexedReference::delinearize(const LoopInfo &LI) {
assert(Subscripts.empty() && "Subscripts should be empty");
assert(Sizes.empty() && "Sizes should be empty");
@@ -340,13 +389,25 @@ bool IndexedReference::delinearize(const LoopInfo &LI) {
return false;
}
- AccessFn = SE.getMinusSCEV(AccessFn, BasePointer);
+ bool IsFixedSize = false;
+ // Try to delinearize fixed-size arrays.
+ if (tryDelinearizeFixedSize(AccessFn, Subscripts)) {
+ IsFixedSize = true;
+ // The last element of Sizes is the element size.
+ Sizes.push_back(ElemSize);
+ LLVM_DEBUG(dbgs().indent(2) << "In Loop '" << L->getName()
+ << "', AccessFn: " << *AccessFn << "\n");
+ }
- LLVM_DEBUG(dbgs().indent(2) << "In Loop '" << L->getName()
- << "', AccessFn: " << *AccessFn << "\n");
+ AccessFn = SE.getMinusSCEV(AccessFn, BasePointer);
- llvm::delinearize(SE, AccessFn, Subscripts, Sizes,
- SE.getElementSize(&StoreOrLoadInst));
+ // Try to delinearize parametric-size arrays.
+ if (!IsFixedSize) {
+ LLVM_DEBUG(dbgs().indent(2) << "In Loop '" << L->getName()
+ << "', AccessFn: " << *AccessFn << "\n");
+ llvm::delinearize(SE, AccessFn, Subscripts, Sizes,
+ SE.getElementSize(&StoreOrLoadInst));
+ }
if (Subscripts.empty() || Sizes.empty() ||
Subscripts.size() != Sizes.size()) {
@@ -424,6 +485,16 @@ bool IndexedReference::isConsecutive(const Loop &L, unsigned CLS) const {
return SE.isKnownPredicate(ICmpInst::ICMP_ULT, Stride, CacheLineSize);
}
+int IndexedReference::getSubscriptIndex(const Loop &L) const {
+ for (auto Idx : seq<int>(0, getNumSubscripts())) {
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(getSubscript(Idx));
+ if (AR && AR->getLoop() == &L) {
+ return Idx;
+ }
+ }
+ return -1;
+}
+
const SCEV *IndexedReference::getLastCoefficient() const {
const SCEV *LastSubscript = getLastSubscript();
auto *AR = cast<SCEVAddRecExpr>(LastSubscript);
@@ -550,7 +621,7 @@ bool CacheCost::populateReferenceGroups(ReferenceGroupsTy &RefGroups) const {
bool Added = false;
for (ReferenceGroupTy &RefGroup : RefGroups) {
- const IndexedReference &Representative = *RefGroup.front().get();
+ const IndexedReference &Representative = *RefGroup.front();
LLVM_DEBUG({
dbgs() << "References:\n";
dbgs().indent(2) << *R << "\n";
@@ -574,8 +645,8 @@ bool CacheCost::populateReferenceGroups(ReferenceGroupsTy &RefGroups) const {
Optional<bool> HasSpacialReuse =
R->hasSpacialReuse(Representative, CLS, AA);
- if ((HasTemporalReuse.hasValue() && *HasTemporalReuse) ||
- (HasSpacialReuse.hasValue() && *HasSpacialReuse)) {
+ if ((HasTemporalReuse && *HasTemporalReuse) ||
+ (HasSpacialReuse && *HasSpacialReuse)) {
RefGroup.push_back(std::move(R));
Added = true;
break;
diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp
index b161c490a6bc..29c2437ff5ea 100644
--- a/llvm/lib/Analysis/LoopInfo.cpp
+++ b/llvm/lib/Analysis/LoopInfo.cpp
@@ -14,7 +14,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/IVDescriptors.h"
@@ -30,7 +29,6 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Dominators.h"
-#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
@@ -38,9 +36,7 @@
#include "llvm/IR/PrintPasses.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
using namespace llvm;
// Explicitly instantiate methods in LoopInfoImpl.h for IR-level Loops.
@@ -740,6 +736,7 @@ void UnloopUpdater::updateBlockParents() {
bool Changed = FoundIB;
for (unsigned NIters = 0; Changed; ++NIters) {
assert(NIters < Unloop.getNumBlocks() && "runaway iterative algorithm");
+ (void) NIters;
// Iterate over the postorder list of blocks, propagating the nearest loop
// from successors to predecessors as before.
@@ -1085,13 +1082,13 @@ Optional<bool> llvm::getOptionalBoolLoopAttribute(const Loop *TheLoop,
}
bool llvm::getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name) {
- return getOptionalBoolLoopAttribute(TheLoop, Name).getValueOr(false);
+ return getOptionalBoolLoopAttribute(TheLoop, Name).value_or(false);
}
llvm::Optional<int> llvm::getOptionalIntLoopAttribute(const Loop *TheLoop,
StringRef Name) {
const MDOperand *AttrMD =
- findStringMetadataForLoop(TheLoop, Name).getValueOr(nullptr);
+ findStringMetadataForLoop(TheLoop, Name).value_or(nullptr);
if (!AttrMD)
return None;
@@ -1104,7 +1101,7 @@ llvm::Optional<int> llvm::getOptionalIntLoopAttribute(const Loop *TheLoop,
int llvm::getIntLoopAttribute(const Loop *TheLoop, StringRef Name,
int Default) {
- return getOptionalIntLoopAttribute(TheLoop, Name).getValueOr(Default);
+ return getOptionalIntLoopAttribute(TheLoop, Name).value_or(Default);
}
bool llvm::isFinite(const Loop *L) {
diff --git a/llvm/lib/Analysis/LoopNestAnalysis.cpp b/llvm/lib/Analysis/LoopNestAnalysis.cpp
index 675bb7a7749c..bff796f339ab 100644
--- a/llvm/lib/Analysis/LoopNestAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopNestAnalysis.cpp
@@ -13,8 +13,7 @@
#include "llvm/Analysis/LoopNestAnalysis.h"
#include "llvm/ADT/BreadthFirstIterator.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/PostDominators.h"
+#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/Analysis/ValueTracking.h"
using namespace llvm;
diff --git a/llvm/lib/Analysis/LoopPass.cpp b/llvm/lib/Analysis/LoopPass.cpp
index b720bab454e9..5d824aece488 100644
--- a/llvm/lib/Analysis/LoopPass.cpp
+++ b/llvm/lib/Analysis/LoopPass.cpp
@@ -13,14 +13,12 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/LoopAnalysisManager.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/OptBisect.h"
-#include "llvm/IR/PassManager.h"
#include "llvm/IR/PassTimingInfo.h"
#include "llvm/IR/PrintPasses.h"
-#include "llvm/IR/StructuralHash.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/TimeProfiler.h"
@@ -192,12 +190,12 @@ bool LPPassManager::runOnFunction(Function &F) {
PassManagerPrettyStackEntry X(P, *CurrentLoop->getHeader());
TimeRegion PassTimer(getPassTimer(P));
#ifdef EXPENSIVE_CHECKS
- uint64_t RefHash = StructuralHash(F);
+ uint64_t RefHash = P->structuralHash(F);
#endif
LocalChanged = P->runOnLoop(CurrentLoop, *this);
#ifdef EXPENSIVE_CHECKS
- if (!LocalChanged && (RefHash != StructuralHash(F))) {
+ if (!LocalChanged && (RefHash != P->structuralHash(F))) {
llvm::errs() << "Pass modifies its input and doesn't report it: "
<< P->getPassName() << "\n";
llvm_unreachable("Pass modifies its input and doesn't report it");
diff --git a/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp b/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp
index 15095d67d385..84f1eff9a732 100644
--- a/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp
+++ b/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp
@@ -13,7 +13,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/LoopUnrollAnalyzer.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/IR/Operator.h"
using namespace llvm;
@@ -84,9 +87,9 @@ bool UnrolledInstAnalyzer::visitBinaryOperator(BinaryOperator &I) {
const DataLayout &DL = I.getModule()->getDataLayout();
if (auto FI = dyn_cast<FPMathOperator>(&I))
SimpleV =
- SimplifyBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL);
+ simplifyBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL);
else
- SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL);
+ SimpleV = simplifyBinOp(I.getOpcode(), LHS, RHS, DL);
if (SimpleV) {
SimplifiedValues[&I] = SimpleV;
@@ -155,7 +158,7 @@ bool UnrolledInstAnalyzer::visitCastInst(CastInst &I) {
// i32 0).
if (CastInst::castIsValid(I.getOpcode(), Op, I.getType())) {
const DataLayout &DL = I.getModule()->getDataLayout();
- if (Value *V = SimplifyCastInst(I.getOpcode(), Op, I.getType(), DL)) {
+ if (Value *V = simplifyCastInst(I.getOpcode(), Op, I.getType(), DL)) {
SimplifiedValues[&I] = V;
return true;
}
@@ -192,7 +195,7 @@ bool UnrolledInstAnalyzer::visitCmpInst(CmpInst &I) {
}
const DataLayout &DL = I.getModule()->getDataLayout();
- if (Value *V = SimplifyCmpInst(I.getPredicate(), LHS, RHS, DL)) {
+ if (Value *V = simplifyCmpInst(I.getPredicate(), LHS, RHS, DL)) {
SimplifiedValues[&I] = V;
return true;
}
diff --git a/llvm/lib/Analysis/MLInlineAdvisor.cpp b/llvm/lib/Analysis/MLInlineAdvisor.cpp
index 0480c1cd2842..f55de71ea98a 100644
--- a/llvm/lib/Analysis/MLInlineAdvisor.cpp
+++ b/llvm/lib/Analysis/MLInlineAdvisor.cpp
@@ -13,30 +13,25 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/MLInlineAdvisor.h"
#include "llvm/ADT/SCCIterator.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/FunctionPropertiesAnalysis.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/InlineModelFeatureMaps.h"
#include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MLModelRunner.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
-#include "llvm/Analysis/ReleaseModeModelRunner.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Config/config.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Path.h"
-
-#include <limits>
-#include <unordered_map>
-#include <unordered_set>
using namespace llvm;
#if defined(LLVM_HAVE_TF_AOT_INLINERSIZEMODEL)
+#include "llvm/Analysis/ReleaseModeModelRunner.h"
// codegen-ed file
#include "InlinerSizeModel.h" // NOLINT
@@ -44,7 +39,7 @@ std::unique_ptr<InlineAdvisor>
llvm::getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM) {
auto AOTRunner =
std::make_unique<ReleaseModeModelRunner<llvm::InlinerSizeModel>>(
- M.getContext(), FeatureNameMap, DecisionName);
+ M.getContext(), FeatureMap, DecisionName);
return std::make_unique<MLInlineAdvisor>(M, MAM, std::move(AOTRunner));
}
#endif
@@ -57,15 +52,21 @@ static cl::opt<float> SizeIncreaseThreshold(
"blocking any further inlining."),
cl::init(2.0));
+static cl::opt<bool> KeepFPICache(
+ "ml-advisor-keep-fpi-cache", cl::Hidden,
+ cl::desc(
+ "For test - keep the ML Inline advisor's FunctionPropertiesInfo cache"),
+ cl::init(false));
+
// clang-format off
-const std::array<std::string, NumberOfFeatures> llvm::FeatureNameMap{
+const std::array<TensorSpec, NumberOfFeatures> llvm::FeatureMap{
+#define POPULATE_NAMES(_, NAME) TensorSpec::createSpec<int64_t>(NAME, {1} ),
// InlineCost features - these must come first
-#define POPULATE_NAMES(INDEX_NAME, NAME) NAME,
INLINE_COST_FEATURE_ITERATOR(POPULATE_NAMES)
#undef POPULATE_NAMES
// Non-cost features
-#define POPULATE_NAMES(INDEX_NAME, NAME, COMMENT) NAME,
+#define POPULATE_NAMES(_, NAME, __) TensorSpec::createSpec<int64_t>(NAME, {1} ),
INLINE_FEATURE_ITERATOR(POPULATE_NAMES)
#undef POPULATE_NAMES
};
@@ -138,7 +139,10 @@ unsigned MLInlineAdvisor::getInitialFunctionLevel(const Function &F) const {
return CG.lookup(F) ? FunctionLevels.at(CG.lookup(F)) : 0;
}
-void MLInlineAdvisor::onPassEntry() {
+void MLInlineAdvisor::onPassEntry(LazyCallGraph::SCC *LastSCC) {
+ if (!LastSCC || ForceStop)
+ return;
+ FPICache.clear();
// Function passes executed between InlinerPass runs may have changed the
// module-wide features.
// The cgscc pass manager rules are such that:
@@ -154,8 +158,8 @@ void MLInlineAdvisor::onPassEntry() {
// care about the nature of the Edge (call or ref).
NodeCount -= static_cast<int64_t>(NodesInLastSCC.size());
while (!NodesInLastSCC.empty()) {
- const auto *N = NodesInLastSCC.front();
- NodesInLastSCC.pop_front();
+ const auto *N = *NodesInLastSCC.begin();
+ NodesInLastSCC.erase(N);
// The Function wrapped by N could have been deleted since we last saw it.
if (N->isDead()) {
assert(!N->getFunction().isDeclaration());
@@ -168,34 +172,52 @@ void MLInlineAdvisor::onPassEntry() {
assert(!AdjNode->isDead() && !AdjNode->getFunction().isDeclaration());
auto I = AllNodes.insert(AdjNode);
if (I.second)
- NodesInLastSCC.push_back(AdjNode);
+ NodesInLastSCC.insert(AdjNode);
}
}
EdgeCount -= EdgesOfLastSeenNodes;
EdgesOfLastSeenNodes = 0;
+
+ // (Re)use NodesInLastSCC to remember the nodes in the SCC right now,
+ // in case the SCC is split before onPassExit and some nodes are split out
+ assert(NodesInLastSCC.empty());
+ for (const auto &N : *LastSCC)
+ NodesInLastSCC.insert(&N);
}
void MLInlineAdvisor::onPassExit(LazyCallGraph::SCC *LastSCC) {
- if (!LastSCC)
+ // No need to keep this around - function passes will invalidate it.
+ if (!KeepFPICache)
+ FPICache.clear();
+ if (!LastSCC || ForceStop)
return;
// Keep track of the nodes and edges we last saw. Then, in onPassEntry,
// we update the node count and edge count from the subset of these nodes that
// survived.
- assert(NodesInLastSCC.empty());
- assert(NodeCount >= LastSCC->size());
EdgesOfLastSeenNodes = 0;
+
+ // Check on nodes that were in SCC onPassEntry
+ for (auto I = NodesInLastSCC.begin(); I != NodesInLastSCC.end();) {
+ if ((*I)->isDead())
+ NodesInLastSCC.erase(*I++);
+ else
+ EdgesOfLastSeenNodes += getLocalCalls((*I++)->getFunction());
+ }
+
+ // Check on nodes that may have got added to SCC
for (const auto &N : *LastSCC) {
assert(!N.isDead());
- EdgesOfLastSeenNodes += getLocalCalls(N.getFunction());
- NodesInLastSCC.push_back(&N);
+ auto I = NodesInLastSCC.insert(&N);
+ if (I.second)
+ EdgesOfLastSeenNodes += getLocalCalls(N.getFunction());
}
+ assert(NodeCount >= NodesInLastSCC.size());
assert(EdgeCount >= EdgesOfLastSeenNodes);
}
int64_t MLInlineAdvisor::getLocalCalls(Function &F) {
- return FAM.getResult<FunctionPropertiesAnalysis>(F)
- .DirectCallsToDefinedFunctions;
+ return getCachedFPI(F).DirectCallsToDefinedFunctions;
}
// Update the internal state of the advisor, and force invalidate feature
@@ -208,13 +230,15 @@ void MLInlineAdvisor::onSuccessfulInlining(const MLInlineAdvice &Advice,
assert(!ForceStop);
Function *Caller = Advice.getCaller();
Function *Callee = Advice.getCallee();
-
// The caller features aren't valid anymore.
{
PreservedAnalyses PA = PreservedAnalyses::all();
PA.abandon<FunctionPropertiesAnalysis>();
+ PA.abandon<DominatorTreeAnalysis>();
+ PA.abandon<LoopAnalysis>();
FAM.invalidate(*Caller, PA);
}
+ Advice.updateCachedCallerFPI(FAM);
int64_t IRSizeAfter =
getIRSize(*Caller) + (CalleeWasDeleted ? 0 : Advice.CalleeIRSize);
CurrentIRSize += IRSizeAfter - (Advice.CallerIRSize + Advice.CalleeIRSize);
@@ -227,15 +251,13 @@ void MLInlineAdvisor::onSuccessfulInlining(const MLInlineAdvice &Advice,
// For edges, we 'forget' the edges that the caller and callee used to have
// before inlining, and add back what they currently have together.
int64_t NewCallerAndCalleeEdges =
- FAM.getResult<FunctionPropertiesAnalysis>(*Caller)
- .DirectCallsToDefinedFunctions;
+ getCachedFPI(*Caller).DirectCallsToDefinedFunctions;
if (CalleeWasDeleted)
--NodeCount;
else
NewCallerAndCalleeEdges +=
- FAM.getResult<FunctionPropertiesAnalysis>(*Callee)
- .DirectCallsToDefinedFunctions;
+ getCachedFPI(*Callee).DirectCallsToDefinedFunctions;
EdgeCount += (NewCallerAndCalleeEdges - Advice.CallerAndCalleeEdges);
assert(CurrentIRSize >= 0 && EdgeCount >= 0 && NodeCount >= 0);
}
@@ -248,7 +270,19 @@ int64_t MLInlineAdvisor::getModuleIRSize() const {
return Ret;
}
+FunctionPropertiesInfo &MLInlineAdvisor::getCachedFPI(Function &F) const {
+ auto InsertPair =
+ FPICache.insert(std::make_pair(&F, FunctionPropertiesInfo()));
+ if (!InsertPair.second)
+ return InsertPair.first->second;
+ InsertPair.first->second = FAM.getResult<FunctionPropertiesAnalysis>(F);
+ return InsertPair.first->second;
+}
+
std::unique_ptr<InlineAdvice> MLInlineAdvisor::getAdviceImpl(CallBase &CB) {
+ if (auto Skip = getSkipAdviceIfUnreachableCallsite(CB))
+ return Skip;
+
auto &Caller = *CB.getCaller();
auto &Callee = *CB.getCalledFunction();
@@ -307,8 +341,8 @@ std::unique_ptr<InlineAdvice> MLInlineAdvisor::getAdviceImpl(CallBase &CB) {
NrCtantParams += (isa<Constant>(*I));
}
- auto &CallerBefore = FAM.getResult<FunctionPropertiesAnalysis>(Caller);
- auto &CalleeBefore = FAM.getResult<FunctionPropertiesAnalysis>(Callee);
+ auto &CallerBefore = getCachedFPI(Caller);
+ auto &CalleeBefore = getCachedFPI(Callee);
*ModelRunner->getTensor<int64_t>(FeatureIndex::CalleeBasicBlockCount) =
CalleeBefore.BasicBlockCount;
@@ -348,9 +382,19 @@ MLInlineAdvisor::getAdviceFromModel(CallBase &CB,
this, CB, ORE, static_cast<bool>(ModelRunner->evaluate<int64_t>()));
}
+std::unique_ptr<InlineAdvice>
+MLInlineAdvisor::getSkipAdviceIfUnreachableCallsite(CallBase &CB) {
+ if (!FAM.getResult<DominatorTreeAnalysis>(*CB.getCaller())
+ .isReachableFromEntry(CB.getParent()))
+ return std::make_unique<InlineAdvice>(this, CB, getCallerORE(CB), false);
+ return nullptr;
+}
+
std::unique_ptr<InlineAdvice> MLInlineAdvisor::getMandatoryAdvice(CallBase &CB,
bool Advice) {
// Make sure we track inlinings in all cases - mandatory or not.
+ if (auto Skip = getSkipAdviceIfUnreachableCallsite(CB))
+ return Skip;
if (Advice && !ForceStop)
return getMandatoryAdviceImpl(CB);
@@ -366,16 +410,47 @@ MLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) {
return std::make_unique<MLInlineAdvice>(this, CB, getCallerORE(CB), true);
}
+void MLInlineAdvisor::print(raw_ostream &OS) const {
+ OS << "[MLInlineAdvisor] Nodes: " << NodeCount << " Edges: " << EdgeCount
+ << " EdgesOfLastSeenNodes: " << EdgesOfLastSeenNodes << "\n";
+ OS << "[MLInlineAdvisor] FPI:\n";
+ for (auto I : FPICache) {
+ OS << I.getFirst()->getName() << ":\n";
+ I.getSecond().print(OS);
+ OS << "\n";
+ }
+ OS << "\n";
+}
+
+MLInlineAdvice::MLInlineAdvice(MLInlineAdvisor *Advisor, CallBase &CB,
+ OptimizationRemarkEmitter &ORE,
+ bool Recommendation)
+ : InlineAdvice(Advisor, CB, ORE, Recommendation),
+ CallerIRSize(Advisor->isForcedToStop() ? 0 : Advisor->getIRSize(*Caller)),
+ CalleeIRSize(Advisor->isForcedToStop() ? 0 : Advisor->getIRSize(*Callee)),
+ CallerAndCalleeEdges(Advisor->isForcedToStop()
+ ? 0
+ : (Advisor->getLocalCalls(*Caller) +
+ Advisor->getLocalCalls(*Callee))),
+ PreInlineCallerFPI(Advisor->getCachedFPI(*Caller)) {
+ if (Recommendation)
+ FPU.emplace(Advisor->getCachedFPI(*getCaller()), CB);
+}
+
void MLInlineAdvice::reportContextForRemark(
DiagnosticInfoOptimizationBase &OR) {
using namespace ore;
OR << NV("Callee", Callee->getName());
for (size_t I = 0; I < NumberOfFeatures; ++I)
- OR << NV(FeatureNameMap[I],
+ OR << NV(FeatureMap[I].name(),
*getAdvisor()->getModelRunner().getTensor<int64_t>(I));
OR << NV("ShouldInline", isInliningRecommended());
}
+void MLInlineAdvice::updateCachedCallerFPI(FunctionAnalysisManager &FAM) const {
+ FPU->finish(FAM);
+}
+
void MLInlineAdvice::recordInliningImpl() {
ORE.emit([&]() {
OptimizationRemark R(DEBUG_TYPE, "InliningSuccess", DLoc, Block);
@@ -397,6 +472,7 @@ void MLInlineAdvice::recordInliningWithCalleeDeletedImpl() {
void MLInlineAdvice::recordUnsuccessfulInliningImpl(
const InlineResult &Result) {
+ getAdvisor()->getCachedFPI(*Caller) = PreInlineCallerFPI;
ORE.emit([&]() {
OptimizationRemarkMissed R(DEBUG_TYPE, "InliningAttemptedAndUnsuccessful",
DLoc, Block);
@@ -405,6 +481,7 @@ void MLInlineAdvice::recordUnsuccessfulInliningImpl(
});
}
void MLInlineAdvice::recordUnattemptedInliningImpl() {
+ assert(!FPU);
ORE.emit([&]() {
OptimizationRemarkMissed R(DEBUG_TYPE, "IniningNotAttempted", DLoc, Block);
reportContextForRemark(R);
diff --git a/llvm/lib/Analysis/MemDepPrinter.cpp b/llvm/lib/Analysis/MemDepPrinter.cpp
index 00642347102a..305ae3e2a992 100644
--- a/llvm/lib/Analysis/MemDepPrinter.cpp
+++ b/llvm/lib/Analysis/MemDepPrinter.cpp
@@ -15,7 +15,6 @@
#include "llvm/Analysis/Passes.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/llvm/lib/Analysis/MemDerefPrinter.cpp b/llvm/lib/Analysis/MemDerefPrinter.cpp
index 82617c7256a5..4dd5c76cc604 100644
--- a/llvm/lib/Analysis/MemDerefPrinter.cpp
+++ b/llvm/lib/Analysis/MemDerefPrinter.cpp
@@ -9,14 +9,11 @@
#include "llvm/Analysis/MemDerefPrinter.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/Passes.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/llvm/lib/Analysis/MemoryBuiltins.cpp b/llvm/lib/Analysis/MemoryBuiltins.cpp
index 208f93aa1ac6..91501b04448e 100644
--- a/llvm/lib/Analysis/MemoryBuiltins.cpp
+++ b/llvm/lib/Analysis/MemoryBuiltins.cpp
@@ -17,7 +17,7 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/TargetFolder.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/Utils/Local.h"
@@ -43,6 +43,8 @@
#include <cassert>
#include <cstdint>
#include <iterator>
+#include <numeric>
+#include <type_traits>
#include <utility>
using namespace llvm;
@@ -62,6 +64,42 @@ enum AllocType : uint8_t {
AnyAlloc = AllocLike | ReallocLike
};
+enum class MallocFamily {
+ Malloc,
+ CPPNew, // new(unsigned int)
+ CPPNewAligned, // new(unsigned int, align_val_t)
+ CPPNewArray, // new[](unsigned int)
+ CPPNewArrayAligned, // new[](unsigned long, align_val_t)
+ MSVCNew, // new(unsigned int)
+ MSVCArrayNew, // new[](unsigned int)
+ VecMalloc,
+ KmpcAllocShared,
+};
+
+StringRef mangledNameForMallocFamily(const MallocFamily &Family) {
+ switch (Family) {
+ case MallocFamily::Malloc:
+ return "malloc";
+ case MallocFamily::CPPNew:
+ return "_Znwm";
+ case MallocFamily::CPPNewAligned:
+ return "_ZnwmSt11align_val_t";
+ case MallocFamily::CPPNewArray:
+ return "_Znam";
+ case MallocFamily::CPPNewArrayAligned:
+ return "_ZnamSt11align_val_t";
+ case MallocFamily::MSVCNew:
+ return "??2@YAPAXI@Z";
+ case MallocFamily::MSVCArrayNew:
+ return "??_U@YAPAXI@Z";
+ case MallocFamily::VecMalloc:
+ return "vec_malloc";
+ case MallocFamily::KmpcAllocShared:
+ return "__kmpc_alloc_shared";
+ }
+ llvm_unreachable("missing an alloc family");
+}
+
struct AllocFnsTy {
AllocType AllocTy;
unsigned NumParams;
@@ -69,50 +107,55 @@ struct AllocFnsTy {
int FstParam, SndParam;
// Alignment parameter for aligned_alloc and aligned new
int AlignParam;
+ // Name of default allocator function to group malloc/free calls by family
+ MallocFamily Family;
};
+// clang-format off
// FIXME: certain users need more information. E.g., SimplifyLibCalls needs to
// know which functions are nounwind, noalias, nocapture parameters, etc.
static const std::pair<LibFunc, AllocFnsTy> AllocationFnData[] = {
- {LibFunc_malloc, {MallocLike, 1, 0, -1, -1}},
- {LibFunc_vec_malloc, {MallocLike, 1, 0, -1, -1}},
- {LibFunc_valloc, {MallocLike, 1, 0, -1, -1}},
- {LibFunc_Znwj, {OpNewLike, 1, 0, -1, -1}}, // new(unsigned int)
- {LibFunc_ZnwjRKSt9nothrow_t, {MallocLike, 2, 0, -1, -1}}, // new(unsigned int, nothrow)
- {LibFunc_ZnwjSt11align_val_t, {OpNewLike, 2, 0, -1, 1}}, // new(unsigned int, align_val_t)
- {LibFunc_ZnwjSt11align_val_tRKSt9nothrow_t, {MallocLike, 3, 0, -1, 1}}, // new(unsigned int, align_val_t, nothrow)
- {LibFunc_Znwm, {OpNewLike, 1, 0, -1, -1}}, // new(unsigned long)
- {LibFunc_ZnwmRKSt9nothrow_t, {MallocLike, 2, 0, -1, -1}}, // new(unsigned long, nothrow)
- {LibFunc_ZnwmSt11align_val_t, {OpNewLike, 2, 0, -1, 1}}, // new(unsigned long, align_val_t)
- {LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t, {MallocLike, 3, 0, -1, 1}}, // new(unsigned long, align_val_t, nothrow)
- {LibFunc_Znaj, {OpNewLike, 1, 0, -1, -1}}, // new[](unsigned int)
- {LibFunc_ZnajRKSt9nothrow_t, {MallocLike, 2, 0, -1, -1}}, // new[](unsigned int, nothrow)
- {LibFunc_ZnajSt11align_val_t, {OpNewLike, 2, 0, -1, 1}}, // new[](unsigned int, align_val_t)
- {LibFunc_ZnajSt11align_val_tRKSt9nothrow_t, {MallocLike, 3, 0, -1, 1}}, // new[](unsigned int, align_val_t, nothrow)
- {LibFunc_Znam, {OpNewLike, 1, 0, -1, -1}}, // new[](unsigned long)
- {LibFunc_ZnamRKSt9nothrow_t, {MallocLike, 2, 0, -1, -1}}, // new[](unsigned long, nothrow)
- {LibFunc_ZnamSt11align_val_t, {OpNewLike, 2, 0, -1, 1}}, // new[](unsigned long, align_val_t)
- {LibFunc_ZnamSt11align_val_tRKSt9nothrow_t, {MallocLike, 3, 0, -1, 1}}, // new[](unsigned long, align_val_t, nothrow)
- {LibFunc_msvc_new_int, {OpNewLike, 1, 0, -1, -1}}, // new(unsigned int)
- {LibFunc_msvc_new_int_nothrow, {MallocLike, 2, 0, -1, -1}}, // new(unsigned int, nothrow)
- {LibFunc_msvc_new_longlong, {OpNewLike, 1, 0, -1, -1}}, // new(unsigned long long)
- {LibFunc_msvc_new_longlong_nothrow, {MallocLike, 2, 0, -1, -1}}, // new(unsigned long long, nothrow)
- {LibFunc_msvc_new_array_int, {OpNewLike, 1, 0, -1, -1}}, // new[](unsigned int)
- {LibFunc_msvc_new_array_int_nothrow, {MallocLike, 2, 0, -1, -1}}, // new[](unsigned int, nothrow)
- {LibFunc_msvc_new_array_longlong, {OpNewLike, 1, 0, -1, -1}}, // new[](unsigned long long)
- {LibFunc_msvc_new_array_longlong_nothrow, {MallocLike, 2, 0, -1, -1}}, // new[](unsigned long long, nothrow)
- {LibFunc_aligned_alloc, {AlignedAllocLike, 2, 1, -1, 0}},
- {LibFunc_memalign, {AlignedAllocLike, 2, 1, -1, 0}},
- {LibFunc_calloc, {CallocLike, 2, 0, 1, -1}},
- {LibFunc_vec_calloc, {CallocLike, 2, 0, 1, -1}},
- {LibFunc_realloc, {ReallocLike, 2, 1, -1, -1}},
- {LibFunc_vec_realloc, {ReallocLike, 2, 1, -1, -1}},
- {LibFunc_reallocf, {ReallocLike, 2, 1, -1, -1}},
- {LibFunc_strdup, {StrDupLike, 1, -1, -1, -1}},
- {LibFunc_strndup, {StrDupLike, 2, 1, -1, -1}},
- {LibFunc___kmpc_alloc_shared, {MallocLike, 1, 0, -1, -1}},
- // TODO: Handle "int posix_memalign(void **, size_t, size_t)"
+ {LibFunc_malloc, {MallocLike, 1, 0, -1, -1, MallocFamily::Malloc}},
+ {LibFunc_vec_malloc, {MallocLike, 1, 0, -1, -1, MallocFamily::VecMalloc}},
+ {LibFunc_valloc, {MallocLike, 1, 0, -1, -1, MallocFamily::Malloc}},
+ {LibFunc_Znwj, {OpNewLike, 1, 0, -1, -1, MallocFamily::CPPNew}}, // new(unsigned int)
+ {LibFunc_ZnwjRKSt9nothrow_t, {MallocLike, 2, 0, -1, -1, MallocFamily::CPPNew}}, // new(unsigned int, nothrow)
+ {LibFunc_ZnwjSt11align_val_t, {OpNewLike, 2, 0, -1, 1, MallocFamily::CPPNewAligned}}, // new(unsigned int, align_val_t)
+ {LibFunc_ZnwjSt11align_val_tRKSt9nothrow_t, {MallocLike, 3, 0, -1, 1, MallocFamily::CPPNewAligned}}, // new(unsigned int, align_val_t, nothrow)
+ {LibFunc_Znwm, {OpNewLike, 1, 0, -1, -1, MallocFamily::CPPNew}}, // new(unsigned long)
+ {LibFunc_ZnwmRKSt9nothrow_t, {MallocLike, 2, 0, -1, -1, MallocFamily::CPPNew}}, // new(unsigned long, nothrow)
+ {LibFunc_ZnwmSt11align_val_t, {OpNewLike, 2, 0, -1, 1, MallocFamily::CPPNewAligned}}, // new(unsigned long, align_val_t)
+ {LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t, {MallocLike, 3, 0, -1, 1, MallocFamily::CPPNewAligned}}, // new(unsigned long, align_val_t, nothrow)
+ {LibFunc_Znaj, {OpNewLike, 1, 0, -1, -1, MallocFamily::CPPNewArray}}, // new[](unsigned int)
+ {LibFunc_ZnajRKSt9nothrow_t, {MallocLike, 2, 0, -1, -1, MallocFamily::CPPNewArray}}, // new[](unsigned int, nothrow)
+ {LibFunc_ZnajSt11align_val_t, {OpNewLike, 2, 0, -1, 1, MallocFamily::CPPNewArrayAligned}}, // new[](unsigned int, align_val_t)
+ {LibFunc_ZnajSt11align_val_tRKSt9nothrow_t, {MallocLike, 3, 0, -1, 1, MallocFamily::CPPNewArrayAligned}}, // new[](unsigned int, align_val_t, nothrow)
+ {LibFunc_Znam, {OpNewLike, 1, 0, -1, -1, MallocFamily::CPPNewArray}}, // new[](unsigned long)
+ {LibFunc_ZnamRKSt9nothrow_t, {MallocLike, 2, 0, -1, -1, MallocFamily::CPPNewArray}}, // new[](unsigned long, nothrow)
+ {LibFunc_ZnamSt11align_val_t, {OpNewLike, 2, 0, -1, 1, MallocFamily::CPPNewArrayAligned}}, // new[](unsigned long, align_val_t)
+ {LibFunc_ZnamSt11align_val_tRKSt9nothrow_t, {MallocLike, 3, 0, -1, 1, MallocFamily::CPPNewArrayAligned}}, // new[](unsigned long, align_val_t, nothrow)
+ {LibFunc_msvc_new_int, {OpNewLike, 1, 0, -1, -1, MallocFamily::MSVCNew}}, // new(unsigned int)
+ {LibFunc_msvc_new_int_nothrow, {MallocLike, 2, 0, -1, -1, MallocFamily::MSVCNew}}, // new(unsigned int, nothrow)
+ {LibFunc_msvc_new_longlong, {OpNewLike, 1, 0, -1, -1, MallocFamily::MSVCNew}}, // new(unsigned long long)
+ {LibFunc_msvc_new_longlong_nothrow, {MallocLike, 2, 0, -1, -1, MallocFamily::MSVCNew}}, // new(unsigned long long, nothrow)
+ {LibFunc_msvc_new_array_int, {OpNewLike, 1, 0, -1, -1, MallocFamily::MSVCArrayNew}}, // new[](unsigned int)
+ {LibFunc_msvc_new_array_int_nothrow, {MallocLike, 2, 0, -1, -1, MallocFamily::MSVCArrayNew}}, // new[](unsigned int, nothrow)
+ {LibFunc_msvc_new_array_longlong, {OpNewLike, 1, 0, -1, -1, MallocFamily::MSVCArrayNew}}, // new[](unsigned long long)
+ {LibFunc_msvc_new_array_longlong_nothrow, {MallocLike, 2, 0, -1, -1, MallocFamily::MSVCArrayNew}}, // new[](unsigned long long, nothrow)
+ {LibFunc_aligned_alloc, {AlignedAllocLike, 2, 1, -1, 0, MallocFamily::Malloc}},
+ {LibFunc_memalign, {AlignedAllocLike, 2, 1, -1, 0, MallocFamily::Malloc}},
+ {LibFunc_calloc, {CallocLike, 2, 0, 1, -1, MallocFamily::Malloc}},
+ {LibFunc_vec_calloc, {CallocLike, 2, 0, 1, -1, MallocFamily::VecMalloc}},
+ {LibFunc_realloc, {ReallocLike, 2, 1, -1, -1, MallocFamily::Malloc}},
+ {LibFunc_vec_realloc, {ReallocLike, 2, 1, -1, -1, MallocFamily::VecMalloc}},
+ {LibFunc_reallocf, {ReallocLike, 2, 1, -1, -1, MallocFamily::Malloc}},
+ {LibFunc_strdup, {StrDupLike, 1, -1, -1, -1, MallocFamily::Malloc}},
+ {LibFunc_dunder_strdup, {StrDupLike, 1, -1, -1, -1, MallocFamily::Malloc}},
+ {LibFunc_strndup, {StrDupLike, 2, 1, -1, -1, MallocFamily::Malloc}},
+ {LibFunc_dunder_strndup, {StrDupLike, 2, 1, -1, -1, MallocFamily::Malloc}},
+ {LibFunc___kmpc_alloc_shared, {MallocLike, 1, 0, -1, -1, MallocFamily::KmpcAllocShared}},
};
+// clang-format on
static const Function *getCalledFunction(const Value *V,
bool &IsNoBuiltin) {
@@ -217,7 +260,7 @@ static Optional<AllocFnsTy> getAllocationSize(const Value *V,
Result.AllocTy = MallocLike;
Result.NumParams = Callee->getNumOperands();
Result.FstParam = Args.first;
- Result.SndParam = Args.second.getValueOr(-1);
+ Result.SndParam = Args.second.value_or(-1);
// Allocsize has no way to specify an alignment argument
Result.AlignParam = -1;
return Result;
@@ -227,54 +270,53 @@ static Optional<AllocFnsTy> getAllocationSize(const Value *V,
/// allocates or reallocates memory (either malloc, calloc, realloc, or strdup
/// like).
bool llvm::isAllocationFn(const Value *V, const TargetLibraryInfo *TLI) {
- return getAllocationData(V, AnyAlloc, TLI).hasValue();
+ return getAllocationData(V, AnyAlloc, TLI).has_value();
}
bool llvm::isAllocationFn(
const Value *V, function_ref<const TargetLibraryInfo &(Function &)> GetTLI) {
- return getAllocationData(V, AnyAlloc, GetTLI).hasValue();
+ return getAllocationData(V, AnyAlloc, GetTLI).has_value();
}
/// Tests if a value is a call or invoke to a library function that
/// allocates uninitialized memory (such as malloc).
static bool isMallocLikeFn(const Value *V, const TargetLibraryInfo *TLI) {
- return getAllocationData(V, MallocOrOpNewLike, TLI).hasValue();
+ return getAllocationData(V, MallocOrOpNewLike, TLI).has_value();
}
/// Tests if a value is a call or invoke to a library function that
/// allocates uninitialized memory with alignment (such as aligned_alloc).
static bool isAlignedAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI) {
- return getAllocationData(V, AlignedAllocLike, TLI)
- .hasValue();
+ return getAllocationData(V, AlignedAllocLike, TLI).has_value();
}
/// Tests if a value is a call or invoke to a library function that
/// allocates zero-filled memory (such as calloc).
static bool isCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI) {
- return getAllocationData(V, CallocLike, TLI).hasValue();
+ return getAllocationData(V, CallocLike, TLI).has_value();
}
/// Tests if a value is a call or invoke to a library function that
/// allocates memory similar to malloc or calloc.
bool llvm::isMallocOrCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI) {
- return getAllocationData(V, MallocOrCallocLike, TLI).hasValue();
+ return getAllocationData(V, MallocOrCallocLike, TLI).has_value();
}
/// Tests if a value is a call or invoke to a library function that
/// allocates memory (either malloc, calloc, or strdup like).
bool llvm::isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI) {
- return getAllocationData(V, AllocLike, TLI).hasValue();
+ return getAllocationData(V, AllocLike, TLI).has_value();
}
/// Tests if a value is a call or invoke to a library function that
/// reallocates memory (e.g., realloc).
bool llvm::isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI) {
- return getAllocationData(V, ReallocLike, TLI).hasValue();
+ return getAllocationData(V, ReallocLike, TLI).has_value();
}
/// Tests if a functions is a call or invoke to a library function that
/// reallocates memory (e.g., realloc).
bool llvm::isReallocLikeFn(const Function *F, const TargetLibraryInfo *TLI) {
- return getAllocationDataForFunction(F, ReallocLike, TLI).hasValue();
+ return getAllocationDataForFunction(F, ReallocLike, TLI).has_value();
}
bool llvm::isAllocRemovable(const CallBase *CB, const TargetLibraryInfo *TLI) {
@@ -291,13 +333,11 @@ bool llvm::isAllocRemovable(const CallBase *CB, const TargetLibraryInfo *TLI) {
Value *llvm::getAllocAlignment(const CallBase *V,
const TargetLibraryInfo *TLI) {
- assert(isAllocationFn(V, TLI));
-
const Optional<AllocFnsTy> FnData = getAllocationData(V, AnyAlloc, TLI);
- if (!FnData.hasValue() || FnData->AlignParam < 0) {
- return nullptr;
+ if (FnData && FnData->AlignParam >= 0) {
+ return V->getOperand(FnData->AlignParam);
}
- return V->getOperand(FnData->AlignParam);
+ return V->getArgOperandWithAttribute(Attribute::AllocAlign);
}
/// When we're compiling N-bit code, and the user uses parameters that are
@@ -344,7 +384,7 @@ llvm::getAllocSize(const CallBase *CB,
if (!Arg)
return None;
- APInt MaxSize = Arg->getValue().zextOrSelf(IntTyBits);
+ APInt MaxSize = Arg->getValue().zext(IntTyBits);
if (Size.ugt(MaxSize))
Size = MaxSize + 1;
}
@@ -379,10 +419,12 @@ llvm::getAllocSize(const CallBase *CB,
return Size;
}
-Constant *llvm::getInitialValueOfAllocation(const CallBase *Alloc,
+Constant *llvm::getInitialValueOfAllocation(const Value *V,
const TargetLibraryInfo *TLI,
Type *Ty) {
- assert(isAllocationFn(Alloc, TLI));
+ auto *Alloc = dyn_cast<CallBase>(V);
+ if (!Alloc)
+ return nullptr;
// malloc and aligned_alloc are uninitialized (undef)
if (isMallocLikeFn(Alloc, TLI) || isAlignedAllocLikeFn(Alloc, TLI))
@@ -395,43 +437,81 @@ Constant *llvm::getInitialValueOfAllocation(const CallBase *Alloc,
return nullptr;
}
+struct FreeFnsTy {
+ unsigned NumParams;
+ // Name of default allocator function to group malloc/free calls by family
+ MallocFamily Family;
+};
+
+// clang-format off
+static const std::pair<LibFunc, FreeFnsTy> FreeFnData[] = {
+ {LibFunc_free, {1, MallocFamily::Malloc}},
+ {LibFunc_vec_free, {1, MallocFamily::VecMalloc}},
+ {LibFunc_ZdlPv, {1, MallocFamily::CPPNew}}, // operator delete(void*)
+ {LibFunc_ZdaPv, {1, MallocFamily::CPPNewArray}}, // operator delete[](void*)
+ {LibFunc_msvc_delete_ptr32, {1, MallocFamily::MSVCNew}}, // operator delete(void*)
+ {LibFunc_msvc_delete_ptr64, {1, MallocFamily::MSVCNew}}, // operator delete(void*)
+ {LibFunc_msvc_delete_array_ptr32, {1, MallocFamily::MSVCArrayNew}}, // operator delete[](void*)
+ {LibFunc_msvc_delete_array_ptr64, {1, MallocFamily::MSVCArrayNew}}, // operator delete[](void*)
+ {LibFunc_ZdlPvj, {2, MallocFamily::CPPNew}}, // delete(void*, uint)
+ {LibFunc_ZdlPvm, {2, MallocFamily::CPPNew}}, // delete(void*, ulong)
+ {LibFunc_ZdlPvRKSt9nothrow_t, {2, MallocFamily::CPPNew}}, // delete(void*, nothrow)
+ {LibFunc_ZdlPvSt11align_val_t, {2, MallocFamily::CPPNewAligned}}, // delete(void*, align_val_t)
+ {LibFunc_ZdaPvj, {2, MallocFamily::CPPNewArray}}, // delete[](void*, uint)
+ {LibFunc_ZdaPvm, {2, MallocFamily::CPPNewArray}}, // delete[](void*, ulong)
+ {LibFunc_ZdaPvRKSt9nothrow_t, {2, MallocFamily::CPPNewArray}}, // delete[](void*, nothrow)
+ {LibFunc_ZdaPvSt11align_val_t, {2, MallocFamily::CPPNewArrayAligned}}, // delete[](void*, align_val_t)
+ {LibFunc_msvc_delete_ptr32_int, {2, MallocFamily::MSVCNew}}, // delete(void*, uint)
+ {LibFunc_msvc_delete_ptr64_longlong, {2, MallocFamily::MSVCNew}}, // delete(void*, ulonglong)
+ {LibFunc_msvc_delete_ptr32_nothrow, {2, MallocFamily::MSVCNew}}, // delete(void*, nothrow)
+ {LibFunc_msvc_delete_ptr64_nothrow, {2, MallocFamily::MSVCNew}}, // delete(void*, nothrow)
+ {LibFunc_msvc_delete_array_ptr32_int, {2, MallocFamily::MSVCArrayNew}}, // delete[](void*, uint)
+ {LibFunc_msvc_delete_array_ptr64_longlong, {2, MallocFamily::MSVCArrayNew}}, // delete[](void*, ulonglong)
+ {LibFunc_msvc_delete_array_ptr32_nothrow, {2, MallocFamily::MSVCArrayNew}}, // delete[](void*, nothrow)
+ {LibFunc_msvc_delete_array_ptr64_nothrow, {2, MallocFamily::MSVCArrayNew}}, // delete[](void*, nothrow)
+ {LibFunc___kmpc_free_shared, {2, MallocFamily::KmpcAllocShared}}, // OpenMP Offloading RTL free
+ {LibFunc_ZdlPvSt11align_val_tRKSt9nothrow_t, {3, MallocFamily::CPPNewAligned}}, // delete(void*, align_val_t, nothrow)
+ {LibFunc_ZdaPvSt11align_val_tRKSt9nothrow_t, {3, MallocFamily::CPPNewArrayAligned}}, // delete[](void*, align_val_t, nothrow)
+ {LibFunc_ZdlPvjSt11align_val_t, {3, MallocFamily::CPPNewAligned}}, // delete(void*, unsigned int, align_val_t)
+ {LibFunc_ZdlPvmSt11align_val_t, {3, MallocFamily::CPPNewAligned}}, // delete(void*, unsigned long, align_val_t)
+ {LibFunc_ZdaPvjSt11align_val_t, {3, MallocFamily::CPPNewArrayAligned}}, // delete[](void*, unsigned int, align_val_t)
+ {LibFunc_ZdaPvmSt11align_val_t, {3, MallocFamily::CPPNewArrayAligned}}, // delete[](void*, unsigned long, align_val_t)
+};
+// clang-format on
+
+Optional<FreeFnsTy> getFreeFunctionDataForFunction(const Function *Callee,
+ const LibFunc TLIFn) {
+ const auto *Iter =
+ find_if(FreeFnData, [TLIFn](const std::pair<LibFunc, FreeFnsTy> &P) {
+ return P.first == TLIFn;
+ });
+ if (Iter == std::end(FreeFnData))
+ return None;
+ return Iter->second;
+}
+
+Optional<StringRef> llvm::getAllocationFamily(const Value *I,
+ const TargetLibraryInfo *TLI) {
+ bool IsNoBuiltin;
+ const Function *Callee = getCalledFunction(I, IsNoBuiltin);
+ if (Callee == nullptr || IsNoBuiltin)
+ return None;
+ LibFunc TLIFn;
+ if (!TLI || !TLI->getLibFunc(*Callee, TLIFn) || !TLI->has(TLIFn))
+ return None;
+ const auto AllocData = getAllocationDataForFunction(Callee, AnyAlloc, TLI);
+ if (AllocData)
+ return mangledNameForMallocFamily(AllocData.getValue().Family);
+ const auto FreeData = getFreeFunctionDataForFunction(Callee, TLIFn);
+ if (FreeData)
+ return mangledNameForMallocFamily(FreeData.getValue().Family);
+ return None;
+}
+
/// isLibFreeFunction - Returns true if the function is a builtin free()
bool llvm::isLibFreeFunction(const Function *F, const LibFunc TLIFn) {
- unsigned ExpectedNumParams;
- if (TLIFn == LibFunc_free ||
- TLIFn == LibFunc_ZdlPv || // operator delete(void*)
- TLIFn == LibFunc_ZdaPv || // operator delete[](void*)
- TLIFn == LibFunc_msvc_delete_ptr32 || // operator delete(void*)
- TLIFn == LibFunc_msvc_delete_ptr64 || // operator delete(void*)
- TLIFn == LibFunc_msvc_delete_array_ptr32 || // operator delete[](void*)
- TLIFn == LibFunc_msvc_delete_array_ptr64) // operator delete[](void*)
- ExpectedNumParams = 1;
- else if (TLIFn == LibFunc_ZdlPvj || // delete(void*, uint)
- TLIFn == LibFunc_ZdlPvm || // delete(void*, ulong)
- TLIFn == LibFunc_ZdlPvRKSt9nothrow_t || // delete(void*, nothrow)
- TLIFn == LibFunc_ZdlPvSt11align_val_t || // delete(void*, align_val_t)
- TLIFn == LibFunc_ZdaPvj || // delete[](void*, uint)
- TLIFn == LibFunc_ZdaPvm || // delete[](void*, ulong)
- TLIFn == LibFunc_ZdaPvRKSt9nothrow_t || // delete[](void*, nothrow)
- TLIFn == LibFunc_ZdaPvSt11align_val_t || // delete[](void*, align_val_t)
- TLIFn == LibFunc_msvc_delete_ptr32_int || // delete(void*, uint)
- TLIFn == LibFunc_msvc_delete_ptr64_longlong || // delete(void*, ulonglong)
- TLIFn == LibFunc_msvc_delete_ptr32_nothrow || // delete(void*, nothrow)
- TLIFn == LibFunc_msvc_delete_ptr64_nothrow || // delete(void*, nothrow)
- TLIFn == LibFunc_msvc_delete_array_ptr32_int || // delete[](void*, uint)
- TLIFn == LibFunc_msvc_delete_array_ptr64_longlong || // delete[](void*, ulonglong)
- TLIFn == LibFunc_msvc_delete_array_ptr32_nothrow || // delete[](void*, nothrow)
- TLIFn == LibFunc_msvc_delete_array_ptr64_nothrow || // delete[](void*, nothrow)
- TLIFn == LibFunc___kmpc_free_shared) // OpenMP Offloading RTL free
- ExpectedNumParams = 2;
- else if (TLIFn == LibFunc_ZdaPvSt11align_val_tRKSt9nothrow_t || // delete(void*, align_val_t, nothrow)
- TLIFn == LibFunc_ZdlPvSt11align_val_tRKSt9nothrow_t || // delete[](void*, align_val_t, nothrow)
- TLIFn == LibFunc_ZdlPvjSt11align_val_t || // delete(void*, unsigned long, align_val_t)
- TLIFn == LibFunc_ZdlPvmSt11align_val_t || // delete(void*, unsigned long, align_val_t)
- TLIFn == LibFunc_ZdaPvjSt11align_val_t || // delete[](void*, unsigned int, align_val_t)
- TLIFn == LibFunc_ZdaPvmSt11align_val_t) // delete[](void*, unsigned long, align_val_t)
- ExpectedNumParams = 3;
- else
+ Optional<FreeFnsTy> FnData = getFreeFunctionDataForFunction(F, TLIFn);
+ if (!FnData)
return false;
// Check free prototype.
@@ -440,7 +520,7 @@ bool llvm::isLibFreeFunction(const Function *F, const LibFunc TLIFn) {
FunctionType *FTy = F->getFunctionType();
if (!FTy->getReturnType()->isVoidTy())
return false;
- if (FTy->getNumParams() != ExpectedNumParams)
+ if (FTy->getNumParams() != FnData->NumParams)
return false;
if (FTy->getParamType(0) != Type::getInt8PtrTy(F->getContext()))
return false;
@@ -491,11 +571,21 @@ Value *llvm::lowerObjectSizeCall(IntrinsicInst *ObjectSize,
const DataLayout &DL,
const TargetLibraryInfo *TLI,
bool MustSucceed) {
+ return lowerObjectSizeCall(ObjectSize, DL, TLI, /*AAResults=*/nullptr,
+ MustSucceed);
+}
+
+Value *llvm::lowerObjectSizeCall(IntrinsicInst *ObjectSize,
+ const DataLayout &DL,
+ const TargetLibraryInfo *TLI, AAResults *AA,
+ bool MustSucceed) {
assert(ObjectSize->getIntrinsicID() == Intrinsic::objectsize &&
"ObjectSize must be a call to llvm.objectsize!");
bool MaxVal = cast<ConstantInt>(ObjectSize->getArgOperand(1))->isZero();
ObjectSizeOpts EvalOptions;
+ EvalOptions.AA = AA;
+
// Unless we have to fold this to something, try to be as accurate as
// possible.
if (MustSucceed)
@@ -559,7 +649,7 @@ STATISTIC(ObjectVisitorLoad,
APInt ObjectSizeOffsetVisitor::align(APInt Size, MaybeAlign Alignment) {
if (Options.RoundToAlign && Alignment)
- return APInt(IntTyBits, alignTo(Size.getZExtValue(), Alignment));
+ return APInt(IntTyBits, alignTo(Size.getZExtValue(), *Alignment));
return Size;
}
@@ -573,18 +663,48 @@ ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout &DL,
}
SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) {
+ unsigned InitialIntTyBits = DL.getIndexTypeSizeInBits(V->getType());
+
+ // Stripping pointer casts can strip address space casts which can change the
+ // index type size. The invariant is that we use the value type to determine
+ // the index type size and if we stripped address space casts we have to
+ // readjust the APInt as we pass it upwards in order for the APInt to match
+ // the type the caller passed in.
+ APInt Offset(InitialIntTyBits, 0);
+ V = V->stripAndAccumulateConstantOffsets(
+ DL, Offset, /* AllowNonInbounds */ true, /* AllowInvariantGroup */ true);
+
+ // Later we use the index type size and zero but it will match the type of the
+ // value that is passed to computeImpl.
IntTyBits = DL.getIndexTypeSizeInBits(V->getType());
Zero = APInt::getZero(IntTyBits);
- V = V->stripPointerCasts();
+ bool IndexTypeSizeChanged = InitialIntTyBits != IntTyBits;
+ if (!IndexTypeSizeChanged && Offset.isZero())
+ return computeImpl(V);
+
+ // We stripped an address space cast that changed the index type size or we
+ // accumulated some constant offset (or both). Readjust the bit width to match
+ // the argument index type size and apply the offset, as required.
+ SizeOffsetType SOT = computeImpl(V);
+ if (IndexTypeSizeChanged) {
+ if (knownSize(SOT) && !::CheckedZextOrTrunc(SOT.first, InitialIntTyBits))
+ SOT.first = APInt();
+ if (knownOffset(SOT) && !::CheckedZextOrTrunc(SOT.second, InitialIntTyBits))
+ SOT.second = APInt();
+ }
+ // If the computed offset is "unknown" we cannot add the stripped offset.
+ return {SOT.first,
+ SOT.second.getBitWidth() > 1 ? SOT.second + Offset : SOT.second};
+}
+
+SizeOffsetType ObjectSizeOffsetVisitor::computeImpl(Value *V) {
if (Instruction *I = dyn_cast<Instruction>(V)) {
// If we have already seen this instruction, bail out. Cycles can happen in
// unreachable code after constant propagation.
if (!SeenInsts.insert(I).second)
return unknown();
- if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
- return visitGEPOperator(*GEP);
return visit(*I);
}
if (Argument *A = dyn_cast<Argument>(V))
@@ -597,12 +717,6 @@ SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) {
return visitGlobalVariable(*GV);
if (UndefValue *UV = dyn_cast<UndefValue>(V))
return visitUndefValue(*UV);
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
- if (CE->getOpcode() == Instruction::IntToPtr)
- return unknown(); // clueless
- if (CE->getOpcode() == Instruction::GetElementPtr)
- return visitGEPOperator(cast<GEPOperator>(*CE));
- }
LLVM_DEBUG(dbgs() << "ObjectSizeOffsetVisitor::compute() unhandled value: "
<< *V << '\n');
@@ -617,10 +731,10 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) {
if (!I.getAllocatedType()->isSized())
return unknown();
- if (isa<ScalableVectorType>(I.getAllocatedType()))
+ TypeSize ElemSize = DL.getTypeAllocSize(I.getAllocatedType());
+ if (ElemSize.isScalable() && Options.EvalMode != ObjectSizeOpts::Mode::Min)
return unknown();
-
- APInt Size(IntTyBits, DL.getTypeAllocSize(I.getAllocatedType()));
+ APInt Size(IntTyBits, ElemSize.getKnownMinSize());
if (!I.isArrayAllocation())
return std::make_pair(align(Size, I.getAlign()), Zero);
@@ -682,15 +796,6 @@ ObjectSizeOffsetVisitor::visitExtractValueInst(ExtractValueInst&) {
return unknown();
}
-SizeOffsetType ObjectSizeOffsetVisitor::visitGEPOperator(GEPOperator &GEP) {
- SizeOffsetType PtrData = compute(GEP.getPointerOperand());
- APInt Offset(DL.getIndexTypeSizeInBits(GEP.getPointerOperand()->getType()), 0);
- if (!bothKnown(PtrData) || !GEP.accumulateConstantOffset(DL, Offset))
- return unknown();
-
- return std::make_pair(PtrData.first, PtrData.second + Offset);
-}
-
SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalAlias(GlobalAlias &GA) {
if (GA.isInterposable())
return unknown();
@@ -710,42 +815,161 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitIntToPtrInst(IntToPtrInst&) {
return unknown();
}
-SizeOffsetType ObjectSizeOffsetVisitor::visitLoadInst(LoadInst&) {
- ++ObjectVisitorLoad;
- return unknown();
-}
+SizeOffsetType ObjectSizeOffsetVisitor::findLoadSizeOffset(
+ LoadInst &Load, BasicBlock &BB, BasicBlock::iterator From,
+ SmallDenseMap<BasicBlock *, SizeOffsetType, 8> &VisitedBlocks,
+ unsigned &ScannedInstCount) {
+ constexpr unsigned MaxInstsToScan = 128;
+
+ auto Where = VisitedBlocks.find(&BB);
+ if (Where != VisitedBlocks.end())
+ return Where->second;
+
+ auto Unknown = [this, &BB, &VisitedBlocks]() {
+ return VisitedBlocks[&BB] = unknown();
+ };
+ auto Known = [&BB, &VisitedBlocks](SizeOffsetType SO) {
+ return VisitedBlocks[&BB] = SO;
+ };
+
+ do {
+ Instruction &I = *From;
+
+ if (I.isDebugOrPseudoInst())
+ continue;
+
+ if (++ScannedInstCount > MaxInstsToScan)
+ return Unknown();
+
+ if (!I.mayWriteToMemory())
+ continue;
+
+ if (auto *SI = dyn_cast<StoreInst>(&I)) {
+ AliasResult AR =
+ Options.AA->alias(SI->getPointerOperand(), Load.getPointerOperand());
+ switch ((AliasResult::Kind)AR) {
+ case AliasResult::NoAlias:
+ continue;
+ case AliasResult::MustAlias:
+ if (SI->getValueOperand()->getType()->isPointerTy())
+ return Known(compute(SI->getValueOperand()));
+ else
+ return Unknown(); // No handling of non-pointer values by `compute`.
+ default:
+ return Unknown();
+ }
+ }
-SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode&) {
- // too complex to analyze statically.
- return unknown();
+ if (auto *CB = dyn_cast<CallBase>(&I)) {
+ Function *Callee = CB->getCalledFunction();
+ // Bail out on indirect call.
+ if (!Callee)
+ return Unknown();
+
+ LibFunc TLIFn;
+ if (!TLI || !TLI->getLibFunc(*CB->getCalledFunction(), TLIFn) ||
+ !TLI->has(TLIFn))
+ return Unknown();
+
+ // TODO: There's probably more interesting case to support here.
+ if (TLIFn != LibFunc_posix_memalign)
+ return Unknown();
+
+ AliasResult AR =
+ Options.AA->alias(CB->getOperand(0), Load.getPointerOperand());
+ switch ((AliasResult::Kind)AR) {
+ case AliasResult::NoAlias:
+ continue;
+ case AliasResult::MustAlias:
+ break;
+ default:
+ return Unknown();
+ }
+
+ // Is the error status of posix_memalign correctly checked? If not it
+ // would be incorrect to assume it succeeds and load doesn't see the
+ // previous value.
+ Optional<bool> Checked = isImpliedByDomCondition(
+ ICmpInst::ICMP_EQ, CB, ConstantInt::get(CB->getType(), 0), &Load, DL);
+ if (!Checked || !*Checked)
+ return Unknown();
+
+ Value *Size = CB->getOperand(2);
+ auto *C = dyn_cast<ConstantInt>(Size);
+ if (!C)
+ return Unknown();
+
+ return Known({C->getValue(), APInt(C->getValue().getBitWidth(), 0)});
+ }
+
+ return Unknown();
+ } while (From-- != BB.begin());
+
+ SmallVector<SizeOffsetType> PredecessorSizeOffsets;
+ for (auto *PredBB : predecessors(&BB)) {
+ PredecessorSizeOffsets.push_back(findLoadSizeOffset(
+ Load, *PredBB, BasicBlock::iterator(PredBB->getTerminator()),
+ VisitedBlocks, ScannedInstCount));
+ if (!bothKnown(PredecessorSizeOffsets.back()))
+ return Unknown();
+ }
+
+ if (PredecessorSizeOffsets.empty())
+ return Unknown();
+
+ return Known(std::accumulate(PredecessorSizeOffsets.begin() + 1,
+ PredecessorSizeOffsets.end(),
+ PredecessorSizeOffsets.front(),
+ [this](SizeOffsetType LHS, SizeOffsetType RHS) {
+ return combineSizeOffset(LHS, RHS);
+ }));
}
-SizeOffsetType ObjectSizeOffsetVisitor::visitSelectInst(SelectInst &I) {
- SizeOffsetType TrueSide = compute(I.getTrueValue());
- SizeOffsetType FalseSide = compute(I.getFalseValue());
- if (bothKnown(TrueSide) && bothKnown(FalseSide)) {
- if (TrueSide == FalseSide) {
- return TrueSide;
- }
+SizeOffsetType ObjectSizeOffsetVisitor::visitLoadInst(LoadInst &LI) {
+ if (!Options.AA) {
+ ++ObjectVisitorLoad;
+ return unknown();
+ }
- APInt TrueResult = getSizeWithOverflow(TrueSide);
- APInt FalseResult = getSizeWithOverflow(FalseSide);
+ SmallDenseMap<BasicBlock *, SizeOffsetType, 8> VisitedBlocks;
+ unsigned ScannedInstCount = 0;
+ SizeOffsetType SO =
+ findLoadSizeOffset(LI, *LI.getParent(), BasicBlock::iterator(LI),
+ VisitedBlocks, ScannedInstCount);
+ if (!bothKnown(SO))
+ ++ObjectVisitorLoad;
+ return SO;
+}
- if (TrueResult == FalseResult) {
- return TrueSide;
- }
- if (Options.EvalMode == ObjectSizeOpts::Mode::Min) {
- if (TrueResult.slt(FalseResult))
- return TrueSide;
- return FalseSide;
- }
- if (Options.EvalMode == ObjectSizeOpts::Mode::Max) {
- if (TrueResult.sgt(FalseResult))
- return TrueSide;
- return FalseSide;
- }
+SizeOffsetType ObjectSizeOffsetVisitor::combineSizeOffset(SizeOffsetType LHS,
+ SizeOffsetType RHS) {
+ if (!bothKnown(LHS) || !bothKnown(RHS))
+ return unknown();
+
+ switch (Options.EvalMode) {
+ case ObjectSizeOpts::Mode::Min:
+ return (getSizeWithOverflow(LHS).slt(getSizeWithOverflow(RHS))) ? LHS : RHS;
+ case ObjectSizeOpts::Mode::Max:
+ return (getSizeWithOverflow(LHS).sgt(getSizeWithOverflow(RHS))) ? LHS : RHS;
+ case ObjectSizeOpts::Mode::Exact:
+ return (getSizeWithOverflow(LHS).eq(getSizeWithOverflow(RHS))) ? LHS
+ : unknown();
}
- return unknown();
+ llvm_unreachable("missing an eval mode");
+}
+
+SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode &PN) {
+ auto IncomingValues = PN.incoming_values();
+ return std::accumulate(IncomingValues.begin() + 1, IncomingValues.end(),
+ compute(*IncomingValues.begin()),
+ [this](SizeOffsetType LHS, Value *VRHS) {
+ return combineSizeOffset(LHS, compute(VRHS));
+ });
+}
+
+SizeOffsetType ObjectSizeOffsetVisitor::visitSelectInst(SelectInst &I) {
+ return combineSizeOffset(compute(I.getTrueValue()),
+ compute(I.getFalseValue()));
}
SizeOffsetType ObjectSizeOffsetVisitor::visitUndefValue(UndefValue&) {
@@ -790,7 +1014,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute(Value *V) {
// Erase any instructions we inserted as part of the traversal.
for (Instruction *I : InsertedInstructions) {
- I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ I->replaceAllUsesWith(PoisonValue::get(I->getType()));
I->eraseFromParent();
}
}
@@ -919,7 +1143,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitIntToPtrInst(IntToPtrInst&) {
return unknown();
}
-SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitLoadInst(LoadInst&) {
+SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitLoadInst(LoadInst &LI) {
return unknown();
}
@@ -937,10 +1161,10 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitPHINode(PHINode &PHI) {
SizeOffsetEvalType EdgeData = compute_(PHI.getIncomingValue(i));
if (!bothKnown(EdgeData)) {
- OffsetPHI->replaceAllUsesWith(UndefValue::get(IntTy));
+ OffsetPHI->replaceAllUsesWith(PoisonValue::get(IntTy));
OffsetPHI->eraseFromParent();
InsertedInstructions.erase(OffsetPHI);
- SizePHI->replaceAllUsesWith(UndefValue::get(IntTy));
+ SizePHI->replaceAllUsesWith(PoisonValue::get(IntTy));
SizePHI->eraseFromParent();
InsertedInstructions.erase(SizePHI);
return unknown();
diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index 36df462c7a66..690d575ef979 100644
--- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -27,11 +27,7 @@
#include "llvm/Analysis/PhiValues.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
@@ -44,7 +40,6 @@
#include "llvm/IR/PredIteratorCache.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
-#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -53,10 +48,8 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
#include <algorithm>
#include <cassert>
-#include <cstdint>
#include <iterator>
#include <utility>
@@ -414,20 +407,17 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
isInvariantLoad = true;
}
- // Return "true" if and only if the instruction I is either a non-simple
- // load or a non-simple store.
- auto isNonSimpleLoadOrStore = [](Instruction *I) -> bool {
+ // True for volatile instruction.
+ // For Load/Store return true if atomic ordering is stronger than AO,
+ // for other instruction just true if it can read or write to memory.
+ auto isComplexForReordering = [](Instruction * I, AtomicOrdering AO)->bool {
+ if (I->isVolatile())
+ return true;
if (auto *LI = dyn_cast<LoadInst>(I))
- return !LI->isSimple();
+ return isStrongerThan(LI->getOrdering(), AO);
if (auto *SI = dyn_cast<StoreInst>(I))
- return !SI->isSimple();
- return false;
- };
-
- // Return "true" if I is not a load and not a store, but it does access
- // memory.
- auto isOtherMemAccess = [](Instruction *I) -> bool {
- return !isa<LoadInst>(I) && !isa<StoreInst>(I) && I->mayReadOrWriteMemory();
+ return isStrongerThan(SI->getOrdering(), AO);
+ return I->mayReadOrWriteMemory();
};
// Walk backwards through the basic block, looking for dependencies.
@@ -500,8 +490,8 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
// atomic.
// FIXME: This is overly conservative.
if (LI->isAtomic() && isStrongerThanUnordered(LI->getOrdering())) {
- if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
- isOtherMemAccess(QueryInst))
+ if (!QueryInst ||
+ isComplexForReordering(QueryInst, AtomicOrdering::NotAtomic))
return MemDepResult::getClobber(LI);
if (LI->getOrdering() != AtomicOrdering::Monotonic)
return MemDepResult::getClobber(LI);
@@ -512,10 +502,10 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
// If we found a pointer, check if it could be the same as our pointer.
AliasResult R = BatchAA.alias(LoadLoc, MemLoc);
- if (isLoad) {
- if (R == AliasResult::NoAlias)
- continue;
+ if (R == AliasResult::NoAlias)
+ continue;
+ if (isLoad) {
// Must aliased loads are defs of each other.
if (R == AliasResult::MustAlias)
return MemDepResult::getDef(Inst);
@@ -532,10 +522,6 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
continue;
}
- // Stores don't depend on other no-aliased accesses.
- if (R == AliasResult::NoAlias)
- continue;
-
// Stores don't alias loads from read-only memory.
if (BatchAA.pointsToConstantMemory(LoadLoc))
continue;
@@ -549,20 +535,25 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
// A Monotonic store is OK if the query inst is itself not atomic.
// FIXME: This is overly conservative.
if (!SI->isUnordered() && SI->isAtomic()) {
- if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
- isOtherMemAccess(QueryInst))
- return MemDepResult::getClobber(SI);
- if (SI->getOrdering() != AtomicOrdering::Monotonic)
+ if (!QueryInst ||
+ isComplexForReordering(QueryInst, AtomicOrdering::Unordered))
return MemDepResult::getClobber(SI);
+ // Ok, if we are here the guard above guarantee us that
+ // QueryInst is a non-atomic or unordered load/store.
+ // SI is atomic with monotonic or release semantic (seq_cst for store
+ // is actually a release semantic plus total order over other seq_cst
+ // instructions, as soon as QueryInst is not seq_cst we can consider it
+ // as simple release semantic).
+ // Monotonic and Release semantic allows re-ordering before store
+ // so we are safe to go further and check the aliasing. It will prohibit
+ // re-ordering in case locations are may or must alias.
}
- // FIXME: this is overly conservative.
// While volatile access cannot be eliminated, they do not have to clobber
// non-aliasing locations, as normal accesses can for example be reordered
// with volatile accesses.
if (SI->isVolatile())
- if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
- isOtherMemAccess(QueryInst))
+ if (!QueryInst || QueryInst->isVolatile())
return MemDepResult::getClobber(SI);
// If alias analysis can tell that this store is guaranteed to not modify
@@ -743,8 +734,6 @@ MemoryDependenceResults::getNonLocalCallDependency(CallBase *QueryCall) {
llvm::sort(Cache);
++NumCacheDirtyNonLocal;
- // cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: "
- // << Cache.size() << " cached: " << *QueryInst;
} else {
// Seed DirtyBlocks with each of the preds of QueryInst's block.
BasicBlock *QueryBB = QueryCall->getParent();
@@ -1204,7 +1193,6 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
// If we do process a large number of blocks it becomes very expensive and
// likely it isn't worth worrying about
if (Result.size() > NumResultsLimit) {
- Worklist.clear();
// Sort it now (if needed) so that recursive invocations of
// getNonLocalPointerDepFromBB and other routines that could reuse the
// cache value will only see properly sorted cache arrays.
diff --git a/llvm/lib/Analysis/MemoryLocation.cpp b/llvm/lib/Analysis/MemoryLocation.cpp
index a877b19df866..2ed32227bd9e 100644
--- a/llvm/lib/Analysis/MemoryLocation.cpp
+++ b/llvm/lib/Analysis/MemoryLocation.cpp
@@ -8,12 +8,10 @@
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsARM.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
using namespace llvm;
diff --git a/llvm/lib/Analysis/MemorySSA.cpp b/llvm/lib/Analysis/MemorySSA.cpp
index 57f431ec21f5..76371b88812e 100644
--- a/llvm/lib/Analysis/MemorySSA.cpp
+++ b/llvm/lib/Analysis/MemorySSA.cpp
@@ -36,8 +36,8 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Operator.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Use.h"
#include "llvm/InitializePasses.h"
@@ -49,10 +49,10 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
-#include <cstdlib>
#include <iterator>
#include <memory>
#include <utility>
@@ -130,6 +130,12 @@ public:
MemorySSAWalkerAnnotatedWriter(MemorySSA *M)
: MSSA(M), Walker(M->getWalker()) {}
+ void emitBasicBlockStartAnnot(const BasicBlock *BB,
+ formatted_raw_ostream &OS) override {
+ if (MemoryAccess *MA = MSSA->getMemoryAccess(BB))
+ OS << "; " << *MA << "\n";
+ }
+
void emitInstructionAnnot(const Instruction *I,
formatted_raw_ostream &OS) override {
if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) {
@@ -732,7 +738,7 @@ template <class AliasAnalysisType> class ClobberWalker {
struct generic_def_path_iterator
: public iterator_facade_base<generic_def_path_iterator<T, Walker>,
std::forward_iterator_tag, T *> {
- generic_def_path_iterator() {}
+ generic_def_path_iterator() = default;
generic_def_path_iterator(Walker *W, ListIndex N) : W(W), N(N) {}
T &operator*() const { return curNode(); }
@@ -743,9 +749,9 @@ template <class AliasAnalysisType> class ClobberWalker {
}
bool operator==(const generic_def_path_iterator &O) const {
- if (N.hasValue() != O.N.hasValue())
+ if (N.has_value() != O.N.has_value())
return false;
- return !N.hasValue() || *N == *O.N;
+ return !N || *N == *O.N;
}
private:
@@ -1397,6 +1403,9 @@ void MemorySSA::OptimizeUses::optimizeUsesInBlock(
continue;
}
+ if (MU->isOptimized())
+ continue;
+
if (isUseTriviallyOptimizableToLiveOnEntry(*AA, MU->getMemoryInst())) {
MU->setDefiningAccess(MSSA->getLiveOnEntryDef(), true, None);
continue;
@@ -1585,10 +1594,6 @@ void MemorySSA::buildMemorySSA(BatchAAResults &BAA) {
SmallPtrSet<BasicBlock *, 16> Visited;
renamePass(DT->getRootNode(), LiveOnEntryDef.get(), Visited);
- ClobberWalkerBase<BatchAAResults> WalkerBase(this, &BAA, DT);
- CachingWalker<BatchAAResults> WalkerLocal(this, &WalkerBase);
- OptimizeUses(this, &WalkerLocal, &BAA, DT).optimizeUses();
-
// Mark the uses in unreachable blocks as live on entry, so that they go
// somewhere.
for (auto &BB : F)
@@ -2178,6 +2183,17 @@ bool MemorySSA::dominates(const MemoryAccess *Dominator,
return dominates(Dominator, cast<MemoryAccess>(Dominatee.getUser()));
}
+void MemorySSA::ensureOptimizedUses() {
+ if (IsOptimized)
+ return;
+
+ BatchAAResults BatchAA(*AA);
+ ClobberWalkerBase<BatchAAResults> WalkerBase(this, &BatchAA, DT);
+ CachingWalker<BatchAAResults> WalkerLocal(this, &WalkerBase);
+ OptimizeUses(this, &WalkerLocal, &BatchAA, DT).optimizeUses();
+ IsOptimized = true;
+}
+
void MemoryAccess::print(raw_ostream &OS) const {
switch (getValueID()) {
case MemoryPhiVal: return static_cast<const MemoryPhi *>(this)->print(OS);
@@ -2350,6 +2366,7 @@ struct DOTGraphTraits<DOTFuncMSSAInfo *> : public DefaultDOTGraphTraits {
bool MemorySSAPrinterLegacyPass::runOnFunction(Function &F) {
auto &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
+ MSSA.ensureOptimizedUses();
if (DotCFGMSSA != "") {
DOTFuncMSSAInfo CFGInfo(F, MSSA);
WriteGraph(&CFGInfo, "", false, "MSSA", DotCFGMSSA);
@@ -2382,6 +2399,7 @@ bool MemorySSAAnalysis::Result::invalidate(
PreservedAnalyses MemorySSAPrinterPass::run(Function &F,
FunctionAnalysisManager &AM) {
auto &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
+ MSSA.ensureOptimizedUses();
if (DotCFGMSSA != "") {
DOTFuncMSSAInfo CFGInfo(F, MSSA);
WriteGraph(&CFGInfo, "", false, "MSSA", DotCFGMSSA);
diff --git a/llvm/lib/Analysis/MemorySSAUpdater.cpp b/llvm/lib/Analysis/MemorySSAUpdater.cpp
index 9c841883de6d..eb75118210b9 100644
--- a/llvm/lib/Analysis/MemorySSAUpdater.cpp
+++ b/llvm/lib/Analysis/MemorySSAUpdater.cpp
@@ -10,22 +10,15 @@
//
//===----------------------------------------------------------------===//
#include "llvm/Analysis/MemorySSAUpdater.h"
-#include "llvm/Analysis/LoopIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/IteratedDominanceFrontier.h"
+#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/FormattedStream.h"
#include <algorithm>
#define DEBUG_TYPE "memoryssa"
@@ -243,6 +236,7 @@ MemoryAccess *MemorySSAUpdater::tryRemoveTrivialPhi(MemoryPhi *Phi,
}
void MemorySSAUpdater::insertUse(MemoryUse *MU, bool RenameUses) {
+ VisitedBlocks.clear();
InsertedPHIs.clear();
MU->setDefiningAccess(getPreviousDef(MU));
@@ -311,6 +305,13 @@ static void setMemoryPhiValueForBlock(MemoryPhi *MP, const BasicBlock *BB,
// point to the correct new defs, to ensure we only have one variable, and no
// disconnected stores.
void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
+ // Don't bother updating dead code.
+ if (!MSSA->DT->isReachableFromEntry(MD->getBlock())) {
+ MD->setDefiningAccess(MSSA->getLiveOnEntryDef());
+ return;
+ }
+
+ VisitedBlocks.clear();
InsertedPHIs.clear();
// See if we had a local def, and if not, go hunting.
@@ -427,10 +428,10 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
if (NewPhiSize)
tryRemoveTrivialPhis(ArrayRef<WeakVH>(&InsertedPHIs[NewPhiIndex], NewPhiSize));
- // Now that all fixups are done, rename all uses if we are asked. Skip
- // renaming for defs in unreachable blocks.
+ // Now that all fixups are done, rename all uses if we are asked. The defs are
+ // guaranteed to be in reachable code due to the check at the method entry.
BasicBlock *StartBlock = MD->getBlock();
- if (RenameUses && MSSA->getDomTree().getNode(StartBlock)) {
+ if (RenameUses) {
SmallPtrSet<BasicBlock *, 16> Visited;
// We are guaranteed there is a def in the block, because we just got it
// handed to us in this function.
diff --git a/llvm/lib/Analysis/ModelUnderTrainingRunner.cpp b/llvm/lib/Analysis/ModelUnderTrainingRunner.cpp
index fab51d6a7aaf..dc149f326271 100644
--- a/llvm/lib/Analysis/ModelUnderTrainingRunner.cpp
+++ b/llvm/lib/Analysis/ModelUnderTrainingRunner.cpp
@@ -22,7 +22,7 @@ ModelUnderTrainingRunner::ModelUnderTrainingRunner(
LLVMContext &Ctx, const std::string &ModelPath,
const std::vector<TensorSpec> &InputSpecs,
const std::vector<LoggedFeatureSpec> &OutputSpecs)
- : MLModelRunner(Ctx, MLModelRunner::Kind::Development),
+ : MLModelRunner(Ctx, MLModelRunner::Kind::Development, InputSpecs.size()),
OutputSpecs(OutputSpecs) {
Evaluator = std::make_unique<TFModelEvaluator>(
ModelPath, InputSpecs, [&](size_t I) { return OutputSpecs[I].Spec; },
@@ -32,6 +32,10 @@ ModelUnderTrainingRunner::ModelUnderTrainingRunner(
Evaluator.reset();
return;
}
+
+ for (size_t I = 0, E = InputSpecs.size(); I < E; ++I) {
+ setUpBufferForTensor(I, InputSpecs[I], Evaluator->getUntypedInput(I));
+ }
}
void *ModelUnderTrainingRunner::evaluateUntyped() {
@@ -43,24 +47,31 @@ void *ModelUnderTrainingRunner::evaluateUntyped() {
return LastEvaluationResult->getUntypedTensorValue(0);
}
-void *ModelUnderTrainingRunner::getTensorUntyped(size_t Index) {
- return Evaluator->getUntypedInput(Index);
-}
-
std::unique_ptr<ModelUnderTrainingRunner>
ModelUnderTrainingRunner::createAndEnsureValid(
LLVMContext &Ctx, const std::string &ModelPath, StringRef DecisionName,
const std::vector<TensorSpec> &InputSpecs,
StringRef OutputSpecsPathOverride) {
- std::unique_ptr<ModelUnderTrainingRunner> MUTR;
if (auto MaybeOutputSpecs = loadOutputSpecs(Ctx, DecisionName, ModelPath,
OutputSpecsPathOverride))
- MUTR.reset(new ModelUnderTrainingRunner(Ctx, ModelPath, InputSpecs,
- *MaybeOutputSpecs));
+ return createAndEnsureValid(Ctx, ModelPath, DecisionName, InputSpecs,
+ *MaybeOutputSpecs);
+ Ctx.emitError("Could not load the policy model from the provided path");
+ return nullptr;
+}
+
+std::unique_ptr<ModelUnderTrainingRunner>
+ModelUnderTrainingRunner::createAndEnsureValid(
+ LLVMContext &Ctx, const std::string &ModelPath, StringRef DecisionName,
+ const std::vector<TensorSpec> &InputSpecs,
+ const std::vector<LoggedFeatureSpec> &OutputSpecs) {
+ std::unique_ptr<ModelUnderTrainingRunner> MUTR;
+ MUTR.reset(
+ new ModelUnderTrainingRunner(Ctx, ModelPath, InputSpecs, OutputSpecs));
if (MUTR && MUTR->isValid())
return MUTR;
- Ctx.emitError("Could not load the policy model from the provided path");
+ Ctx.emitError("Could not load or create model evaluator.");
return nullptr;
}
diff --git a/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp b/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
index 64fd5eb1acd4..373aaa48b1d1 100644
--- a/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
+++ b/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
@@ -15,8 +15,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/ModuleDebugInfoPrinter.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/Passes.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
index 2880ca62a7f8..2b98634ef7bf 100644
--- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -38,7 +38,6 @@
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ModuleSummaryIndex.h"
@@ -368,7 +367,7 @@ static void computeFunctionSummary(
// We should have named any anonymous globals
assert(CalledFunction->hasName());
auto ScaledCount = PSI->getProfileCount(*CB, BFI);
- auto Hotness = ScaledCount ? getHotness(ScaledCount.getValue(), PSI)
+ auto Hotness = ScaledCount ? getHotness(*ScaledCount, PSI)
: CalleeInfo::HotnessType::Unknown;
if (ForceSummaryEdgesCold != FunctionSummary::FSHT_None)
Hotness = CalleeInfo::HotnessType::Cold;
@@ -490,8 +489,7 @@ static void computeFunctionSummary(
HasIndirBranchToBlockAddress;
GlobalValueSummary::GVFlags Flags(
F.getLinkage(), F.getVisibility(), NotEligibleForImport,
- /* Live = */ false, F.isDSOLocal(),
- F.hasLinkOnceODRLinkage() && F.hasGlobalUnnamedAddr());
+ /* Live = */ false, F.isDSOLocal(), F.canBeOmittedFromSymbolTable());
FunctionSummary::FFlags FunFlags{
F.hasFnAttribute(Attribute::ReadNone),
F.hasFnAttribute(Attribute::ReadOnly),
@@ -612,8 +610,7 @@ static void computeVariableSummary(ModuleSummaryIndex &Index,
bool NonRenamableLocal = isNonRenamableLocal(V);
GlobalValueSummary::GVFlags Flags(
V.getLinkage(), V.getVisibility(), NonRenamableLocal,
- /* Live = */ false, V.isDSOLocal(),
- V.hasLinkOnceODRLinkage() && V.hasGlobalUnnamedAddr());
+ /* Live = */ false, V.isDSOLocal(), V.canBeOmittedFromSymbolTable());
VTableFuncList VTableFuncs;
// If splitting is not enabled, then we compute the summary information
@@ -655,8 +652,7 @@ computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A,
bool NonRenamableLocal = isNonRenamableLocal(A);
GlobalValueSummary::GVFlags Flags(
A.getLinkage(), A.getVisibility(), NonRenamableLocal,
- /* Live = */ false, A.isDSOLocal(),
- A.hasLinkOnceODRLinkage() && A.hasGlobalUnnamedAddr());
+ /* Live = */ false, A.isDSOLocal(), A.canBeOmittedFromSymbolTable());
auto AS = std::make_unique<AliasSummary>(Flags);
auto *Aliasee = A.getAliaseeObject();
auto AliaseeVI = Index.getValueInfo(Aliasee->getGUID());
@@ -733,8 +729,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
GlobalValue::InternalLinkage, GlobalValue::DefaultVisibility,
/* NotEligibleToImport = */ true,
/* Live = */ true,
- /* Local */ GV->isDSOLocal(),
- GV->hasLinkOnceODRLinkage() && GV->hasGlobalUnnamedAddr());
+ /* Local */ GV->isDSOLocal(), GV->canBeOmittedFromSymbolTable());
CantBePromoted.insert(GV->getGUID());
// Create the appropriate summary type.
if (Function *F = dyn_cast<Function>(GV)) {
diff --git a/llvm/lib/Analysis/MustExecute.cpp b/llvm/lib/Analysis/MustExecute.cpp
index 5ca72f5f3623..5cff986245b9 100644
--- a/llvm/lib/Analysis/MustExecute.cpp
+++ b/llvm/lib/Analysis/MustExecute.cpp
@@ -16,14 +16,11 @@
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/AssemblyAnnotationWriter.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/raw_ostream.h"
@@ -143,7 +140,7 @@ static bool CanProveNotTakenFirstIteration(const BasicBlock *ExitBlock,
return false;
auto DL = ExitBlock->getModule()->getDataLayout();
auto *IVStart = LHS->getIncomingValueForBlock(CurLoop->getLoopPreheader());
- auto *SimpleValOrNull = SimplifyCmpInst(Cond->getPredicate(),
+ auto *SimpleValOrNull = simplifyCmpInst(Cond->getPredicate(),
IVStart, RHS,
{DL, /*TLI*/ nullptr,
DT, /*AC*/ nullptr, BI});
@@ -494,7 +491,7 @@ template <typename K, typename V, typename FnTy, typename... ArgsTy>
static V getOrCreateCachedOptional(K Key, DenseMap<K, Optional<V>> &Map,
FnTy &&Fn, ArgsTy&&... args) {
Optional<V> &OptVal = Map[Key];
- if (!OptVal.hasValue())
+ if (!OptVal)
OptVal = Fn(std::forward<ArgsTy>(args)...);
return OptVal.getValue();
}
diff --git a/llvm/lib/Analysis/NoInferenceModelRunner.cpp b/llvm/lib/Analysis/NoInferenceModelRunner.cpp
index 7178120ebe4f..1914b22f5d71 100644
--- a/llvm/lib/Analysis/NoInferenceModelRunner.cpp
+++ b/llvm/lib/Analysis/NoInferenceModelRunner.cpp
@@ -10,24 +10,14 @@
// logs for the default policy, in 'development' mode, but never ask it to
// 'run'.
//===----------------------------------------------------------------------===//
-#include "llvm/Config/config.h"
-#if defined(LLVM_HAVE_TF_API)
-
#include "llvm/Analysis/NoInferenceModelRunner.h"
-#include "llvm/Analysis/Utils/TFUtils.h"
using namespace llvm;
NoInferenceModelRunner::NoInferenceModelRunner(
LLVMContext &Ctx, const std::vector<TensorSpec> &Inputs)
- : MLModelRunner(Ctx, MLModelRunner::Kind::NoOp) {
- ValuesBuffer.reserve(Inputs.size());
+ : MLModelRunner(Ctx, MLModelRunner::Kind::NoOp, Inputs.size()) {
+ size_t Index = 0;
for (const auto &TS : Inputs)
- ValuesBuffer.push_back(std::make_unique<char[]>(TS.getElementCount() *
- TS.getElementByteSize()));
-}
-
-void *NoInferenceModelRunner::getTensorUntyped(size_t Index) {
- return ValuesBuffer[Index].get();
+ setUpBufferForTensor(Index++, TS, nullptr);
}
-#endif // defined(LLVM_HAVE_TF_API)
diff --git a/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp b/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp
index 0826b3078672..6fe056d36668 100644
--- a/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp
@@ -26,8 +26,6 @@
#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
diff --git a/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp b/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp
index 6f3d4d536c40..17b40f03a5a5 100644
--- a/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp
+++ b/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp
@@ -47,7 +47,7 @@ OptimizationRemarkEmitter::OptimizationRemarkEmitter(const Function *F)
bool OptimizationRemarkEmitter::invalidate(
Function &F, const PreservedAnalyses &PA,
FunctionAnalysisManager::Invalidator &Inv) {
- if (OwnedBFI.get()) {
+ if (OwnedBFI) {
OwnedBFI.reset();
BFI = nullptr;
}
@@ -80,7 +80,7 @@ void OptimizationRemarkEmitter::emit(
computeHotness(OptDiag);
// Only emit it if its hotness meets the threshold.
- if (OptDiag.getHotness().getValueOr(0) <
+ if (OptDiag.getHotness().value_or(0) <
F->getContext().getDiagnosticsHotnessThreshold()) {
return;
}
diff --git a/llvm/lib/Analysis/OverflowInstAnalysis.cpp b/llvm/lib/Analysis/OverflowInstAnalysis.cpp
index 87a85e6a7364..8bfd6642f760 100644
--- a/llvm/lib/Analysis/OverflowInstAnalysis.cpp
+++ b/llvm/lib/Analysis/OverflowInstAnalysis.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/OverflowInstAnalysis.h"
-#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/PatternMatch.h"
diff --git a/llvm/lib/Analysis/PHITransAddr.cpp b/llvm/lib/Analysis/PHITransAddr.cpp
index 02d084937ccb..7571bd0059cc 100644
--- a/llvm/lib/Analysis/PHITransAddr.cpp
+++ b/llvm/lib/Analysis/PHITransAddr.cpp
@@ -17,7 +17,6 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -35,9 +34,6 @@ static bool CanPHITrans(Instruction *Inst) {
isa<ConstantInt>(Inst->getOperand(1)))
return true;
- // cerr << "MEMDEP: Could not PHI translate: " << *Pointer;
- // if (isa<BitCastInst>(PtrInst) || isa<GetElementPtrInst>(PtrInst))
- // cerr << "OP:\t\t\t\t" << *PtrInst->getOperand(0);
return false;
}
@@ -226,7 +222,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
return GEP;
// Simplify the GEP to handle 'gep x, 0' -> x etc.
- if (Value *V = SimplifyGEPInst(GEP->getSourceElementType(), GEPOps[0],
+ if (Value *V = simplifyGEPInst(GEP->getSourceElementType(), GEPOps[0],
ArrayRef<Value *>(GEPOps).slice(1),
GEP->isInBounds(), {DL, TLI, DT, AC})) {
for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
@@ -240,6 +236,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
for (User *U : APHIOp->users()) {
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U))
if (GEPI->getType() == GEP->getType() &&
+ GEPI->getSourceElementType() == GEP->getSourceElementType() &&
GEPI->getNumOperands() == GEPOps.size() &&
GEPI->getParent()->getParent() == CurBB->getParent() &&
(!DT || DT->dominates(GEPI->getParent(), PredBB))) {
@@ -277,7 +274,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
}
// See if the add simplifies away.
- if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, {DL, TLI, DT, AC})) {
+ if (Value *Res = simplifyAddInst(LHS, RHS, isNSW, isNUW, {DL, TLI, DT, AC})) {
// If we simplified the operands, the LHS is no longer an input, but Res
// is.
RemoveInstInputs(LHS, InstInputs);
diff --git a/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/llvm/lib/Analysis/ProfileSummaryInfo.cpp
index 268ed9d04741..9d5fa6d0a41b 100644
--- a/llvm/lib/Analysis/ProfileSummaryInfo.cpp
+++ b/llvm/lib/Analysis/ProfileSummaryInfo.cpp
@@ -15,7 +15,6 @@
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ProfileSummary.h"
#include "llvm/InitializePasses.h"
@@ -125,7 +124,7 @@ bool ProfileSummaryInfo::isFunctionHotInCallGraph(
for (const auto &I : BB)
if (isa<CallInst>(I) || isa<InvokeInst>(I))
if (auto CallCount = getProfileCount(cast<CallBase>(I), nullptr))
- TotalCallCount += CallCount.getValue();
+ TotalCallCount += *CallCount;
if (isHotCount(TotalCallCount))
return true;
}
@@ -154,7 +153,7 @@ bool ProfileSummaryInfo::isFunctionColdInCallGraph(
for (const auto &I : BB)
if (isa<CallInst>(I) || isa<InvokeInst>(I))
if (auto CallCount = getProfileCount(cast<CallBase>(I), nullptr))
- TotalCallCount += CallCount.getValue();
+ TotalCallCount += *CallCount;
if (!isColdCount(TotalCallCount))
return false;
}
@@ -166,7 +165,7 @@ bool ProfileSummaryInfo::isFunctionColdInCallGraph(
bool ProfileSummaryInfo::isFunctionHotnessUnknown(const Function &F) const {
assert(hasPartialSampleProfile() && "Expect partial sample profile");
- return !F.getEntryCount().hasValue();
+ return !F.getEntryCount();
}
template <bool isHot>
@@ -188,7 +187,7 @@ bool ProfileSummaryInfo::isFunctionHotOrColdInCallGraphNthPercentile(
for (const auto &I : BB)
if (isa<CallInst>(I) || isa<InvokeInst>(I))
if (auto CallCount = getProfileCount(cast<CallBase>(I), nullptr))
- TotalCallCount += CallCount.getValue();
+ TotalCallCount += *CallCount;
if (isHot && isHotCountNthPercentile(PercentileCutoff, TotalCallCount))
return true;
if (!isHot && !isColdCountNthPercentile(PercentileCutoff, TotalCallCount))
@@ -316,11 +315,11 @@ bool ProfileSummaryInfo::isColdCountNthPercentile(int PercentileCutoff,
}
uint64_t ProfileSummaryInfo::getOrCompHotCountThreshold() const {
- return HotCountThreshold.getValueOr(UINT64_MAX);
+ return HotCountThreshold.value_or(UINT64_MAX);
}
uint64_t ProfileSummaryInfo::getOrCompColdCountThreshold() const {
- return ColdCountThreshold.getValueOr(0);
+ return ColdCountThreshold.value_or(0);
}
bool ProfileSummaryInfo::isHotBlock(const BasicBlock *BB,
diff --git a/llvm/lib/Analysis/PtrUseVisitor.cpp b/llvm/lib/Analysis/PtrUseVisitor.cpp
index 9a834ba4866a..49304818d7ef 100644
--- a/llvm/lib/Analysis/PtrUseVisitor.cpp
+++ b/llvm/lib/Analysis/PtrUseVisitor.cpp
@@ -14,7 +14,6 @@
#include "llvm/Analysis/PtrUseVisitor.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
-#include <algorithm>
using namespace llvm;
diff --git a/llvm/lib/Analysis/RegionInfo.cpp b/llvm/lib/Analysis/RegionInfo.cpp
index 3ba0bb9eaf2c..9be23a374eca 100644
--- a/llvm/lib/Analysis/RegionInfo.cpp
+++ b/llvm/lib/Analysis/RegionInfo.cpp
@@ -10,6 +10,7 @@
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DominanceFrontier.h"
#include "llvm/InitializePasses.h"
#ifndef NDEBUG
#include "llvm/Analysis/RegionPrinter.h"
diff --git a/llvm/lib/Analysis/RegionPass.cpp b/llvm/lib/Analysis/RegionPass.cpp
index 10c8569096c6..ddef3be8df37 100644
--- a/llvm/lib/Analysis/RegionPass.cpp
+++ b/llvm/lib/Analysis/RegionPass.cpp
@@ -12,14 +12,16 @@
// Most of this code has been COPIED from LoopPass.cpp
//
//===----------------------------------------------------------------------===//
+
#include "llvm/Analysis/RegionPass.h"
+#include "llvm/Analysis/RegionInfo.h"
#include "llvm/IR/OptBisect.h"
#include "llvm/IR/PassTimingInfo.h"
#include "llvm/IR/PrintPasses.h"
-#include "llvm/IR/StructuralHash.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
+
using namespace llvm;
#define DEBUG_TYPE "regionpassmgr"
@@ -93,12 +95,12 @@ bool RGPassManager::runOnFunction(Function &F) {
TimeRegion PassTimer(getPassTimer(P));
#ifdef EXPENSIVE_CHECKS
- uint64_t RefHash = StructuralHash(F);
+ uint64_t RefHash = P->structuralHash(F);
#endif
LocalChanged = P->runOnRegion(CurrentRegion, *this);
#ifdef EXPENSIVE_CHECKS
- if (!LocalChanged && (RefHash != StructuralHash(F))) {
+ if (!LocalChanged && (RefHash != P->structuralHash(F))) {
llvm::errs() << "Pass modifies its input and doesn't report it: "
<< P->getPassName() << "\n";
llvm_unreachable("Pass modifies its input and doesn't report it");
diff --git a/llvm/lib/Analysis/RegionPrinter.cpp b/llvm/lib/Analysis/RegionPrinter.cpp
index 1fb5faaa6a71..fbd3d17febff 100644
--- a/llvm/lib/Analysis/RegionPrinter.cpp
+++ b/llvm/lib/Analysis/RegionPrinter.cpp
@@ -10,15 +10,11 @@
#include "llvm/Analysis/RegionPrinter.h"
#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/DOTGraphTraitsPass.h"
-#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/Analysis/RegionIterator.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#ifndef NDEBUG
#include "llvm/IR/LegacyPassManager.h"
@@ -35,28 +31,20 @@ onlySimpleRegions("only-simple-regions",
cl::init(false));
namespace llvm {
-template<>
-struct DOTGraphTraits<RegionNode*> : public DefaultDOTGraphTraits {
- DOTGraphTraits (bool isSimple=false)
- : DefaultDOTGraphTraits(isSimple) {}
+std::string DOTGraphTraits<RegionNode *>::getNodeLabel(RegionNode *Node,
+ RegionNode *Graph) {
+ if (!Node->isSubRegion()) {
+ BasicBlock *BB = Node->getNodeAs<BasicBlock>();
- std::string getNodeLabel(RegionNode *Node, RegionNode *Graph) {
-
- if (!Node->isSubRegion()) {
- BasicBlock *BB = Node->getNodeAs<BasicBlock>();
-
- if (isSimple())
- return DOTGraphTraits<DOTFuncInfo *>
- ::getSimpleNodeLabel(BB, nullptr);
- else
- return DOTGraphTraits<DOTFuncInfo *>
- ::getCompleteNodeLabel(BB, nullptr);
- }
-
- return "Not implemented";
+ if (isSimple())
+ return DOTGraphTraits<DOTFuncInfo *>::getSimpleNodeLabel(BB, nullptr);
+ else
+ return DOTGraphTraits<DOTFuncInfo *>::getCompleteNodeLabel(BB, nullptr);
}
-};
+
+ return "Not implemented";
+}
template <>
struct DOTGraphTraits<RegionInfo *> : public DOTGraphTraits<RegionNode *> {
@@ -138,7 +126,7 @@ struct DOTGraphTraits<RegionInfo *> : public DOTGraphTraits<RegionNode *> {
printRegionCluster(*G->getTopLevelRegion(), GW, 4);
}
};
-} //end namespace llvm
+} // end namespace llvm
namespace {
@@ -149,48 +137,49 @@ struct RegionInfoPassGraphTraits {
};
struct RegionPrinter
- : public DOTGraphTraitsPrinter<RegionInfoPass, false, RegionInfo *,
- RegionInfoPassGraphTraits> {
+ : public DOTGraphTraitsPrinterWrapperPass<
+ RegionInfoPass, false, RegionInfo *, RegionInfoPassGraphTraits> {
static char ID;
RegionPrinter()
- : DOTGraphTraitsPrinter<RegionInfoPass, false, RegionInfo *,
- RegionInfoPassGraphTraits>("reg", ID) {
+ : DOTGraphTraitsPrinterWrapperPass<RegionInfoPass, false, RegionInfo *,
+ RegionInfoPassGraphTraits>("reg", ID) {
initializeRegionPrinterPass(*PassRegistry::getPassRegistry());
}
};
char RegionPrinter::ID = 0;
struct RegionOnlyPrinter
- : public DOTGraphTraitsPrinter<RegionInfoPass, true, RegionInfo *,
- RegionInfoPassGraphTraits> {
+ : public DOTGraphTraitsPrinterWrapperPass<
+ RegionInfoPass, true, RegionInfo *, RegionInfoPassGraphTraits> {
static char ID;
RegionOnlyPrinter()
- : DOTGraphTraitsPrinter<RegionInfoPass, true, RegionInfo *,
- RegionInfoPassGraphTraits>("reg", ID) {
+ : DOTGraphTraitsPrinterWrapperPass<RegionInfoPass, true, RegionInfo *,
+ RegionInfoPassGraphTraits>("reg", ID) {
initializeRegionOnlyPrinterPass(*PassRegistry::getPassRegistry());
}
};
char RegionOnlyPrinter::ID = 0;
struct RegionViewer
- : public DOTGraphTraitsViewer<RegionInfoPass, false, RegionInfo *,
- RegionInfoPassGraphTraits> {
+ : public DOTGraphTraitsViewerWrapperPass<
+ RegionInfoPass, false, RegionInfo *, RegionInfoPassGraphTraits> {
static char ID;
RegionViewer()
- : DOTGraphTraitsViewer<RegionInfoPass, false, RegionInfo *,
- RegionInfoPassGraphTraits>("reg", ID) {
+ : DOTGraphTraitsViewerWrapperPass<RegionInfoPass, false, RegionInfo *,
+ RegionInfoPassGraphTraits>("reg", ID) {
initializeRegionViewerPass(*PassRegistry::getPassRegistry());
}
};
char RegionViewer::ID = 0;
struct RegionOnlyViewer
- : public DOTGraphTraitsViewer<RegionInfoPass, true, RegionInfo *,
- RegionInfoPassGraphTraits> {
+ : public DOTGraphTraitsViewerWrapperPass<RegionInfoPass, true, RegionInfo *,
+ RegionInfoPassGraphTraits> {
static char ID;
RegionOnlyViewer()
- : DOTGraphTraitsViewer<RegionInfoPass, true, RegionInfo *,
- RegionInfoPassGraphTraits>("regonly", ID) {
+ : DOTGraphTraitsViewerWrapperPass<RegionInfoPass, true, RegionInfo *,
+ RegionInfoPassGraphTraits>("regonly",
+ ID) {
initializeRegionOnlyViewerPass(*PassRegistry::getPassRegistry());
}
};
diff --git a/llvm/lib/Analysis/ReplayInlineAdvisor.cpp b/llvm/lib/Analysis/ReplayInlineAdvisor.cpp
index 294bc38c17ad..afc3d7fc4c35 100644
--- a/llvm/lib/Analysis/ReplayInlineAdvisor.cpp
+++ b/llvm/lib/Analysis/ReplayInlineAdvisor.cpp
@@ -14,9 +14,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/ReplayInlineAdvisor.h"
-#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/Instructions.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Support/LineIterator.h"
+#include "llvm/Support/MemoryBuffer.h"
#include <memory>
using namespace llvm;
@@ -26,8 +26,9 @@ using namespace llvm;
ReplayInlineAdvisor::ReplayInlineAdvisor(
Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context,
std::unique_ptr<InlineAdvisor> OriginalAdvisor,
- const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks)
- : InlineAdvisor(M, FAM), OriginalAdvisor(std::move(OriginalAdvisor)),
+ const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks,
+ InlineContext IC)
+ : InlineAdvisor(M, FAM, IC), OriginalAdvisor(std::move(OriginalAdvisor)),
ReplaySettings(ReplaySettings), EmitRemarks(EmitRemarks) {
auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(ReplaySettings.ReplayFile);
@@ -75,12 +76,15 @@ ReplayInlineAdvisor::ReplayInlineAdvisor(
HasReplayRemarks = true;
}
-std::unique_ptr<InlineAdvisor> llvm::getReplayInlineAdvisor(
- Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context,
- std::unique_ptr<InlineAdvisor> OriginalAdvisor,
- const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks) {
+std::unique_ptr<InlineAdvisor>
+llvm::getReplayInlineAdvisor(Module &M, FunctionAnalysisManager &FAM,
+ LLVMContext &Context,
+ std::unique_ptr<InlineAdvisor> OriginalAdvisor,
+ const ReplayInlinerSettings &ReplaySettings,
+ bool EmitRemarks, InlineContext IC) {
auto Advisor = std::make_unique<ReplayInlineAdvisor>(
- M, FAM, Context, std::move(OriginalAdvisor), ReplaySettings, EmitRemarks);
+ M, FAM, Context, std::move(OriginalAdvisor), ReplaySettings, EmitRemarks,
+ IC);
if (!Advisor->areReplayRemarksLoaded())
Advisor.reset();
return Advisor;
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 977fc0911355..207f4df79e45 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -79,7 +79,6 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/ScalarEvolutionDivision.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -96,7 +95,6 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
@@ -104,7 +102,6 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Metadata.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
@@ -125,7 +122,6 @@
#include <algorithm>
#include <cassert>
#include <climits>
-#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <map>
@@ -146,17 +142,21 @@ STATISTIC(NumTripCountsNotComputed,
STATISTIC(NumBruteForceTripCountsComputed,
"Number of loops with trip counts computed by force");
+#ifdef EXPENSIVE_CHECKS
+bool llvm::VerifySCEV = true;
+#else
+bool llvm::VerifySCEV = false;
+#endif
+
static cl::opt<unsigned>
-MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden,
- cl::ZeroOrMore,
- cl::desc("Maximum number of iterations SCEV will "
- "symbolically execute a constant "
- "derived loop"),
- cl::init(100));
-
-// FIXME: Enable this with EXPENSIVE_CHECKS when the test suite is clean.
-static cl::opt<bool> VerifySCEV(
- "verify-scev", cl::Hidden,
+ MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden,
+ cl::desc("Maximum number of iterations SCEV will "
+ "symbolically execute a constant "
+ "derived loop"),
+ cl::init(100));
+
+static cl::opt<bool, true> VerifySCEVOpt(
+ "verify-scev", cl::Hidden, cl::location(VerifySCEV),
cl::desc("Verify ScalarEvolution's backedge taken counts (slow)"));
static cl::opt<bool> VerifySCEVStrict(
"verify-scev-strict", cl::Hidden,
@@ -231,6 +231,17 @@ static cl::opt<bool> UseExpensiveRangeSharpening(
cl::desc("Use more powerful methods of sharpening expression ranges. May "
"be costly in terms of compile time"));
+static cl::opt<unsigned> MaxPhiSCCAnalysisSize(
+ "scalar-evolution-max-scc-analysis-depth", cl::Hidden,
+ cl::desc("Maximum amount of nodes to process while searching SCEVUnknown "
+ "Phi strongly connected components"),
+ cl::init(8));
+
+static cl::opt<bool>
+ EnableFiniteLoopControl("scalar-evolution-finite-loop", cl::Hidden,
+ cl::desc("Handle <= and >= in finite loops"),
+ cl::init(true));
+
//===----------------------------------------------------------------------===//
// SCEV class definitions
//===----------------------------------------------------------------------===//
@@ -519,12 +530,13 @@ void SCEVUnknown::deleted() {
}
void SCEVUnknown::allUsesReplacedWith(Value *New) {
+ // Clear this SCEVUnknown from various maps.
+ SE->forgetMemoizedResults(this);
+
// Remove this SCEVUnknown from the uniquing map.
SE->UniqueSCEVs.RemoveNode(this);
- // Update this SCEVUnknown to point to the new value. This is needed
- // because there may still be outstanding SCEVs which still point to
- // this SCEVUnknown.
+ // Replace the value pointer in case someone is still using this SCEVUnknown.
setValPtr(New);
}
@@ -1643,10 +1655,12 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
// If we have special knowledge that this addrec won't overflow,
// we don't need to do any further analysis.
- if (AR->hasNoUnsignedWrap())
- return getAddRecExpr(
- getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Depth + 1),
- getZeroExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
+ if (AR->hasNoUnsignedWrap()) {
+ Start =
+ getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Depth + 1);
+ Step = getZeroExtendExpr(Step, Ty, Depth + 1);
+ return getAddRecExpr(Start, Step, L, AR->getNoWrapFlags());
+ }
// Check whether the backedge-taken count is SCEVCouldNotCompute.
// Note that this serves two purposes: It filters out loops that are
@@ -1688,11 +1702,10 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
// Cache knowledge of AR NUW, which is propagated to this AddRec.
setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNUW);
// Return the expression with the addrec on the outside.
- return getAddRecExpr(
- getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
- Depth + 1),
- getZeroExtendExpr(Step, Ty, Depth + 1), L,
- AR->getNoWrapFlags());
+ Start = getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
+ Depth + 1);
+ Step = getZeroExtendExpr(Step, Ty, Depth + 1);
+ return getAddRecExpr(Start, Step, L, AR->getNoWrapFlags());
}
// Similar to above, only this time treat the step value as signed.
// This covers loops that count down.
@@ -1707,11 +1720,10 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
// Negative step causes unsigned wrap, but it still can't self-wrap.
setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNW);
// Return the expression with the addrec on the outside.
- return getAddRecExpr(
- getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
- Depth + 1),
- getSignExtendExpr(Step, Ty, Depth + 1), L,
- AR->getNoWrapFlags());
+ Start = getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
+ Depth + 1);
+ Step = getSignExtendExpr(Step, Ty, Depth + 1);
+ return getAddRecExpr(Start, Step, L, AR->getNoWrapFlags());
}
}
}
@@ -1733,11 +1745,10 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
// issue. It's not clear that the order of checks does matter, but
// it's one of two issue possible causes for a change which was
// reverted. Be conservative for the moment.
- return getAddRecExpr(
- getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
- Depth + 1),
- getZeroExtendExpr(Step, Ty, Depth + 1), L,
- AR->getNoWrapFlags());
+ Start =
+ getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Depth + 1);
+ Step = getZeroExtendExpr(Step, Ty, Depth + 1);
+ return getAddRecExpr(Start, Step, L, AR->getNoWrapFlags());
}
// For a negative step, we can extend the operands iff doing so only
@@ -1752,11 +1763,10 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
// still can't self-wrap.
setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNW);
// Return the expression with the addrec on the outside.
- return getAddRecExpr(
- getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
- Depth + 1),
- getSignExtendExpr(Step, Ty, Depth + 1), L,
- AR->getNoWrapFlags());
+ Start = getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
+ Depth + 1);
+ Step = getSignExtendExpr(Step, Ty, Depth + 1);
+ return getAddRecExpr(Start, Step, L, AR->getNoWrapFlags());
}
}
}
@@ -1780,9 +1790,10 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
if (proveNoWrapByVaryingStart<SCEVZeroExtendExpr>(Start, Step, L)) {
setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNUW);
- return getAddRecExpr(
- getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Depth + 1),
- getZeroExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
+ Start =
+ getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Depth + 1);
+ Step = getZeroExtendExpr(Step, Ty, Depth + 1);
+ return getAddRecExpr(Start, Step, L, AR->getNoWrapFlags());
}
}
@@ -1984,10 +1995,12 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
// If we have special knowledge that this addrec won't overflow,
// we don't need to do any further analysis.
- if (AR->hasNoSignedWrap())
- return getAddRecExpr(
- getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1),
- getSignExtendExpr(Step, Ty, Depth + 1), L, SCEV::FlagNSW);
+ if (AR->hasNoSignedWrap()) {
+ Start =
+ getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1);
+ Step = getSignExtendExpr(Step, Ty, Depth + 1);
+ return getAddRecExpr(Start, Step, L, SCEV::FlagNSW);
+ }
// Check whether the backedge-taken count is SCEVCouldNotCompute.
// Note that this serves two purposes: It filters out loops that are
@@ -2030,11 +2043,10 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
// Cache knowledge of AR NSW, which is propagated to this AddRec.
setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNSW);
// Return the expression with the addrec on the outside.
- return getAddRecExpr(
- getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this,
- Depth + 1),
- getSignExtendExpr(Step, Ty, Depth + 1), L,
- AR->getNoWrapFlags());
+ Start = getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this,
+ Depth + 1);
+ Step = getSignExtendExpr(Step, Ty, Depth + 1);
+ return getAddRecExpr(Start, Step, L, AR->getNoWrapFlags());
}
// Similar to above, only this time treat the step value as unsigned.
// This covers loops that count up with an unsigned step.
@@ -2056,11 +2068,10 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNW);
// Return the expression with the addrec on the outside.
- return getAddRecExpr(
- getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this,
- Depth + 1),
- getZeroExtendExpr(Step, Ty, Depth + 1), L,
- AR->getNoWrapFlags());
+ Start = getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this,
+ Depth + 1);
+ Step = getZeroExtendExpr(Step, Ty, Depth + 1);
+ return getAddRecExpr(Start, Step, L, AR->getNoWrapFlags());
}
}
}
@@ -2072,9 +2083,10 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
// issue. It's not clear that the order of checks does matter, but
// it's one of two issue possible causes for a change which was
// reverted. Be conservative for the moment.
- return getAddRecExpr(
- getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1),
- getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
+ Start =
+ getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1);
+ Step = getSignExtendExpr(Step, Ty, Depth + 1);
+ return getAddRecExpr(Start, Step, L, AR->getNoWrapFlags());
}
// sext({C,+,Step}) --> (sext(D) + sext({C-D,+,Step}))<nuw><nsw>
@@ -2096,9 +2108,10 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
if (proveNoWrapByVaryingStart<SCEVSignExtendExpr>(Start, Step, L)) {
setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNSW);
- return getAddRecExpr(
- getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1),
- getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
+ Start =
+ getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1);
+ Step = getSignExtendExpr(Step, Ty, Depth + 1);
+ return getAddRecExpr(Start, Step, L, AR->getNoWrapFlags());
}
}
@@ -2300,9 +2313,9 @@ bool ScalarEvolution::willNotOverflow(Instruction::BinaryOps BinOp, bool Signed,
const SCEV *A = (this->*Extension)(
(this->*Operation)(LHS, RHS, SCEV::FlagAnyWrap, 0), WideTy, 0);
- const SCEV *B = (this->*Operation)((this->*Extension)(LHS, WideTy, 0),
- (this->*Extension)(RHS, WideTy, 0),
- SCEV::FlagAnyWrap, 0);
+ const SCEV *LHSB = (this->*Extension)(LHS, WideTy, 0);
+ const SCEV *RHSB = (this->*Extension)(RHS, WideTy, 0);
+ const SCEV *B = (this->*Operation)(LHSB, RHSB, SCEV::FlagAnyWrap, 0);
return A == B;
}
@@ -3106,12 +3119,13 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
// TODO: There are some cases where this transformation is not
// profitable; for example, Add = (C0 + X) * Y + Z. Maybe the scope of
// this transformation should be narrowed down.
- if (Add->getNumOperands() == 2 && containsConstantInAddMulChain(Add))
- return getAddExpr(getMulExpr(LHSC, Add->getOperand(0),
- SCEV::FlagAnyWrap, Depth + 1),
- getMulExpr(LHSC, Add->getOperand(1),
- SCEV::FlagAnyWrap, Depth + 1),
- SCEV::FlagAnyWrap, Depth + 1);
+ if (Add->getNumOperands() == 2 && containsConstantInAddMulChain(Add)) {
+ const SCEV *LHS = getMulExpr(LHSC, Add->getOperand(0),
+ SCEV::FlagAnyWrap, Depth + 1);
+ const SCEV *RHS = getMulExpr(LHSC, Add->getOperand(1),
+ SCEV::FlagAnyWrap, Depth + 1);
+ return getAddExpr(LHS, RHS, SCEV::FlagAnyWrap, Depth + 1);
+ }
if (Ops[0]->isAllOnesValue()) {
// If we have a mul by -1 of an add, try distributing the -1 among the
@@ -3466,12 +3480,8 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
}
// Fold if both operands are constant.
- if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
- Constant *LHSCV = LHSC->getValue();
- Constant *RHSCV = RHSC->getValue();
- return getConstant(cast<ConstantInt>(ConstantExpr::getUDiv(LHSCV,
- RHSCV)));
- }
+ if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS))
+ return getConstant(LHSC->getAPInt().udiv(RHSC->getAPInt()));
}
}
@@ -4002,6 +4012,59 @@ public:
} // namespace
+/// Return true if V is poison given that AssumedPoison is already poison.
+static bool impliesPoison(const SCEV *AssumedPoison, const SCEV *S) {
+ // The only way poison may be introduced in a SCEV expression is from a
+ // poison SCEVUnknown (ConstantExprs are also represented as SCEVUnknown,
+ // not SCEVConstant). Notably, nowrap flags in SCEV nodes can *not*
+ // introduce poison -- they encode guaranteed, non-speculated knowledge.
+ //
+ // Additionally, all SCEV nodes propagate poison from inputs to outputs,
+ // with the notable exception of umin_seq, where only poison from the first
+ // operand is (unconditionally) propagated.
+ struct SCEVPoisonCollector {
+ bool LookThroughSeq;
+ SmallPtrSet<const SCEV *, 4> MaybePoison;
+ SCEVPoisonCollector(bool LookThroughSeq) : LookThroughSeq(LookThroughSeq) {}
+
+ bool follow(const SCEV *S) {
+ // TODO: We can always follow the first operand, but the SCEVTraversal
+ // API doesn't support this.
+ if (!LookThroughSeq && isa<SCEVSequentialMinMaxExpr>(S))
+ return false;
+
+ if (auto *SU = dyn_cast<SCEVUnknown>(S)) {
+ if (!isGuaranteedNotToBePoison(SU->getValue()))
+ MaybePoison.insert(S);
+ }
+ return true;
+ }
+ bool isDone() const { return false; }
+ };
+
+ // First collect all SCEVs that might result in AssumedPoison to be poison.
+ // We need to look through umin_seq here, because we want to find all SCEVs
+ // that *might* result in poison, not only those that are *required* to.
+ SCEVPoisonCollector PC1(/* LookThroughSeq */ true);
+ visitAll(AssumedPoison, PC1);
+
+ // AssumedPoison is never poison. As the assumption is false, the implication
+ // is true. Don't bother walking the other SCEV in this case.
+ if (PC1.MaybePoison.empty())
+ return true;
+
+ // Collect all SCEVs in S that, if poison, *will* result in S being poison
+ // as well. We cannot look through umin_seq here, as its argument only *may*
+ // make the result poison.
+ SCEVPoisonCollector PC2(/* LookThroughSeq */ false);
+ visitAll(S, PC2);
+
+ // Make sure that no matter which SCEV in PC1.MaybePoison is actually poison,
+ // it will also make S poison by being part of PC2.MaybePoison.
+ return all_of(PC1.MaybePoison,
+ [&](const SCEV *S) { return PC2.MaybePoison.contains(S); });
+}
+
const SCEV *
ScalarEvolution::getSequentialMinMaxExpr(SCEVTypes Kind,
SmallVectorImpl<const SCEV *> &Ops) {
@@ -4010,11 +4073,6 @@ ScalarEvolution::getSequentialMinMaxExpr(SCEVTypes Kind,
assert(!Ops.empty() && "Cannot get empty (u|s)(min|max)!");
if (Ops.size() == 1)
return Ops[0];
- if (Ops.size() == 2 &&
- any_of(Ops, [](const SCEV *Op) { return isa<SCEVConstant>(Op); }))
- return getMinMaxExpr(
- SCEVSequentialMinMaxExpr::getEquivalentNonSequentialSCEVType(Kind),
- Ops);
#ifndef NDEBUG
Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
for (unsigned i = 1, e = Ops.size(); i != e; ++i) {
@@ -4063,6 +4121,39 @@ ScalarEvolution::getSequentialMinMaxExpr(SCEVTypes Kind,
return getSequentialMinMaxExpr(Kind, Ops);
}
+ const SCEV *SaturationPoint;
+ ICmpInst::Predicate Pred;
+ switch (Kind) {
+ case scSequentialUMinExpr:
+ SaturationPoint = getZero(Ops[0]->getType());
+ Pred = ICmpInst::ICMP_ULE;
+ break;
+ default:
+ llvm_unreachable("Not a sequential min/max type.");
+ }
+
+ for (unsigned i = 1, e = Ops.size(); i != e; ++i) {
+ // We can replace %x umin_seq %y with %x umin %y if either:
+ // * %y being poison implies %x is also poison.
+ // * %x cannot be the saturating value (e.g. zero for umin).
+ if (::impliesPoison(Ops[i], Ops[i - 1]) ||
+ isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_NE, Ops[i - 1],
+ SaturationPoint)) {
+ SmallVector<const SCEV *> SeqOps = {Ops[i - 1], Ops[i]};
+ Ops[i - 1] = getMinMaxExpr(
+ SCEVSequentialMinMaxExpr::getEquivalentNonSequentialSCEVType(Kind),
+ SeqOps);
+ Ops.erase(Ops.begin() + i);
+ return getSequentialMinMaxExpr(Kind, Ops);
+ }
+ // Fold %x umin_seq %y to %x if %x ule %y.
+ // TODO: We might be able to prove the predicate for a later operand.
+ if (isKnownViaNonRecursiveReasoning(Pred, Ops[i - 1], Ops[i])) {
+ Ops.erase(Ops.begin() + i);
+ return getSequentialMinMaxExpr(Kind, Ops);
+ }
+ }
+
// Okay, it looks like we really DO need an expr. Check to see if we
// already have one, otherwise create a new one.
FoldingSetNodeID ID;
@@ -4265,39 +4356,20 @@ bool ScalarEvolution::containsAddRecurrence(const SCEV *S) {
return FoundAddRec;
}
-/// Try to split a SCEVAddExpr into a pair of {SCEV, ConstantInt}.
-/// If \p S is a SCEVAddExpr and is composed of a sub SCEV S' and an
-/// offset I, then return {S', I}, else return {\p S, nullptr}.
-static std::pair<const SCEV *, ConstantInt *> splitAddExpr(const SCEV *S) {
- const auto *Add = dyn_cast<SCEVAddExpr>(S);
- if (!Add)
- return {S, nullptr};
-
- if (Add->getNumOperands() != 2)
- return {S, nullptr};
-
- auto *ConstOp = dyn_cast<SCEVConstant>(Add->getOperand(0));
- if (!ConstOp)
- return {S, nullptr};
-
- return {Add->getOperand(1), ConstOp->getValue()};
-}
-
/// Return the ValueOffsetPair set for \p S. \p S can be represented
/// by the value and offset from any ValueOffsetPair in the set.
-ScalarEvolution::ValueOffsetPairSetVector *
-ScalarEvolution::getSCEVValues(const SCEV *S) {
+ArrayRef<Value *> ScalarEvolution::getSCEVValues(const SCEV *S) {
ExprValueMapType::iterator SI = ExprValueMap.find_as(S);
if (SI == ExprValueMap.end())
- return nullptr;
+ return None;
#ifndef NDEBUG
if (VerifySCEVMap) {
// Check there is no dangling Value in the set returned.
- for (const auto &VE : SI->second)
- assert(ValueExprMap.count(VE.first));
+ for (Value *V : SI->second)
+ assert(ValueExprMap.count(V));
}
#endif
- return &SI->second;
+ return SI->second.getArrayRef();
}
/// Erase Value from ValueExprMap and ExprValueMap. ValueExprMap.erase(V)
@@ -4306,20 +4378,11 @@ ScalarEvolution::getSCEVValues(const SCEV *S) {
void ScalarEvolution::eraseValueFromMap(Value *V) {
ValueExprMapType::iterator I = ValueExprMap.find_as(V);
if (I != ValueExprMap.end()) {
- const SCEV *S = I->second;
- // Remove {V, 0} from the set of ExprValueMap[S]
- if (auto *SV = getSCEVValues(S))
- SV->remove({V, nullptr});
-
- // Remove {V, Offset} from the set of ExprValueMap[Stripped]
- const SCEV *Stripped;
- ConstantInt *Offset;
- std::tie(Stripped, Offset) = splitAddExpr(S);
- if (Offset != nullptr) {
- if (auto *SV = getSCEVValues(Stripped))
- SV->remove({V, Offset});
- }
- ValueExprMap.erase(V);
+ auto EVIt = ExprValueMap.find(I->second);
+ bool Removed = EVIt->second.remove(V);
+ (void) Removed;
+ assert(Removed && "Value not in ExprValueMap?");
+ ValueExprMap.erase(I);
}
}
@@ -4330,7 +4393,7 @@ void ScalarEvolution::insertValueToMap(Value *V, const SCEV *S) {
auto It = ValueExprMap.find_as(V);
if (It == ValueExprMap.end()) {
ValueExprMap.insert({SCEVCallbackVH(V, this), S});
- ExprValueMap[S].insert({V, nullptr});
+ ExprValueMap[S].insert(V);
}
}
@@ -4339,33 +4402,9 @@ void ScalarEvolution::insertValueToMap(Value *V, const SCEV *S) {
const SCEV *ScalarEvolution::getSCEV(Value *V) {
assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
- const SCEV *S = getExistingSCEV(V);
- if (S == nullptr) {
- S = createSCEV(V);
- // During PHI resolution, it is possible to create two SCEVs for the same
- // V, so it is needed to double check whether V->S is inserted into
- // ValueExprMap before insert S->{V, 0} into ExprValueMap.
- std::pair<ValueExprMapType::iterator, bool> Pair =
- ValueExprMap.insert({SCEVCallbackVH(V, this), S});
- if (Pair.second) {
- ExprValueMap[S].insert({V, nullptr});
-
- // If S == Stripped + Offset, add Stripped -> {V, Offset} into
- // ExprValueMap.
- const SCEV *Stripped = S;
- ConstantInt *Offset = nullptr;
- std::tie(Stripped, Offset) = splitAddExpr(S);
- // If stripped is SCEVUnknown, don't bother to save
- // Stripped -> {V, offset}. It doesn't simplify and sometimes even
- // increase the complexity of the expansion code.
- // If V is GetElementPtrInst, don't save Stripped -> {V, offset}
- // because it may generate add/sub instead of GEP in SCEV expansion.
- if (Offset != nullptr && !isa<SCEVUnknown>(Stripped) &&
- !isa<GetElementPtrInst>(V))
- ExprValueMap[Stripped].insert({V, Offset});
- }
- }
- return S;
+ if (const SCEV *S = getExistingSCEV(V))
+ return S;
+ return createSCEVIter(V);
}
const SCEV *ScalarEvolution::getExistingSCEV(Value *V) {
@@ -4795,7 +4834,7 @@ public:
SelectInst *SI = cast<SelectInst>(I);
Optional<const SCEV *> Res =
compareWithBackedgeCondition(SI->getCondition());
- if (Res.hasValue()) {
+ if (Res) {
bool IsOne = cast<SCEVConstant>(Res.getValue())->getValue()->isOne();
Result = SE.getSCEV(IsOne ? SI->getTrueValue() : SI->getFalseValue());
}
@@ -4803,7 +4842,7 @@ public:
}
default: {
Optional<const SCEV *> Res = compareWithBackedgeCondition(I);
- if (Res.hasValue())
+ if (Res)
Result = Res.getValue();
break;
}
@@ -5067,6 +5106,9 @@ static Optional<BinaryOp> MatchBinaryOp(Value *V, DominatorTree &DT) {
// Instcombine turns add of signmask into xor as a strength reduction step.
if (RHSC->getValue().isSignMask())
return BinaryOp(Instruction::Add, Op->getOperand(0), Op->getOperand(1));
+ // Binary `xor` is a bit-wise `add`.
+ if (V->getType()->isIntegerTy(1))
+ return BinaryOp(Instruction::Add, Op->getOperand(0), Op->getOperand(1));
return BinaryOp(Op);
case Instruction::LShr:
@@ -5489,8 +5531,8 @@ bool PredicatedScalarEvolution::areAddRecsEqualWithPreds(
return true;
auto areExprsEqual = [&](const SCEV *Expr1, const SCEV *Expr2) -> bool {
- if (Expr1 != Expr2 && !Preds.implies(SE.getEqualPredicate(Expr1, Expr2)) &&
- !Preds.implies(SE.getEqualPredicate(Expr2, Expr1)))
+ if (Expr1 != Expr2 && !Preds->implies(SE.getEqualPredicate(Expr1, Expr2)) &&
+ !Preds->implies(SE.getEqualPredicate(Expr2, Expr1)))
return false;
return true;
};
@@ -5872,31 +5914,53 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
if (const SCEV *S = createNodeFromSelectLikePHI(PN))
return S;
- // If the PHI has a single incoming value, follow that value, unless the
- // PHI's incoming blocks are in a different loop, in which case doing so
- // risks breaking LCSSA form. Instcombine would normally zap these, but
- // it doesn't have DominatorTree information, so it may miss cases.
- if (Value *V = SimplifyInstruction(PN, {getDataLayout(), &TLI, &DT, &AC}))
- if (LI.replacementPreservesLCSSAForm(PN, V))
- return getSCEV(V);
+ if (Value *V = simplifyInstruction(PN, {getDataLayout(), &TLI, &DT, &AC}))
+ return getSCEV(V);
// If it's not a loop phi, we can't handle it yet.
return getUnknown(PN);
}
-const SCEV *ScalarEvolution::createNodeForSelectOrPHI(Instruction *I,
- Value *Cond,
- Value *TrueVal,
- Value *FalseVal) {
- // Handle "constant" branch or select. This can occur for instance when a
- // loop pass transforms an inner loop and moves on to process the outer loop.
- if (auto *CI = dyn_cast<ConstantInt>(Cond))
- return getSCEV(CI->isOne() ? TrueVal : FalseVal);
+bool SCEVMinMaxExprContains(const SCEV *Root, const SCEV *OperandToFind,
+ SCEVTypes RootKind) {
+ struct FindClosure {
+ const SCEV *OperandToFind;
+ const SCEVTypes RootKind; // Must be a sequential min/max expression.
+ const SCEVTypes NonSequentialRootKind; // Non-seq variant of RootKind.
+
+ bool Found = false;
+
+ bool canRecurseInto(SCEVTypes Kind) const {
+ // We can only recurse into the SCEV expression of the same effective type
+ // as the type of our root SCEV expression, and into zero-extensions.
+ return RootKind == Kind || NonSequentialRootKind == Kind ||
+ scZeroExtend == Kind;
+ };
+
+ FindClosure(const SCEV *OperandToFind, SCEVTypes RootKind)
+ : OperandToFind(OperandToFind), RootKind(RootKind),
+ NonSequentialRootKind(
+ SCEVSequentialMinMaxExpr::getEquivalentNonSequentialSCEVType(
+ RootKind)) {}
+ bool follow(const SCEV *S) {
+ Found = S == OperandToFind;
+
+ return !isDone() && canRecurseInto(S->getSCEVType());
+ }
+
+ bool isDone() const { return Found; }
+ };
+
+ FindClosure FC(OperandToFind, RootKind);
+ visitAll(Root, FC);
+ return FC.Found;
+}
+
+const SCEV *ScalarEvolution::createNodeForSelectOrPHIInstWithICmpInstCond(
+ Instruction *I, ICmpInst *Cond, Value *TrueVal, Value *FalseVal) {
// Try to match some simple smax or umax patterns.
- auto *ICI = dyn_cast<ICmpInst>(Cond);
- if (!ICI)
- return getUnknown(I);
+ auto *ICI = Cond;
Value *LHS = ICI->getOperand(0);
Value *RHS = ICI->getOperand(1);
@@ -5958,31 +6022,36 @@ const SCEV *ScalarEvolution::createNodeForSelectOrPHI(Instruction *I,
}
break;
case ICmpInst::ICMP_NE:
- // n != 0 ? n+x : 1+x -> umax(n, 1)+x
- if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType()) &&
- isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) {
- const SCEV *One = getOne(I->getType());
- const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType());
- const SCEV *LA = getSCEV(TrueVal);
- const SCEV *RA = getSCEV(FalseVal);
- const SCEV *LDiff = getMinusSCEV(LA, LS);
- const SCEV *RDiff = getMinusSCEV(RA, One);
- if (LDiff == RDiff)
- return getAddExpr(getUMaxExpr(One, LS), LDiff);
- }
- break;
+ // x != 0 ? x+y : C+y -> x == 0 ? C+y : x+y
+ std::swap(TrueVal, FalseVal);
+ LLVM_FALLTHROUGH;
case ICmpInst::ICMP_EQ:
- // n == 0 ? 1+x : n+x -> umax(n, 1)+x
+ // x == 0 ? C+y : x+y -> umax(x, C)+y iff C u<= 1
if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType()) &&
isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) {
- const SCEV *One = getOne(I->getType());
- const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType());
- const SCEV *LA = getSCEV(TrueVal);
- const SCEV *RA = getSCEV(FalseVal);
- const SCEV *LDiff = getMinusSCEV(LA, One);
- const SCEV *RDiff = getMinusSCEV(RA, LS);
- if (LDiff == RDiff)
- return getAddExpr(getUMaxExpr(One, LS), LDiff);
+ const SCEV *X = getNoopOrZeroExtend(getSCEV(LHS), I->getType());
+ const SCEV *TrueValExpr = getSCEV(TrueVal); // C+y
+ const SCEV *FalseValExpr = getSCEV(FalseVal); // x+y
+ const SCEV *Y = getMinusSCEV(FalseValExpr, X); // y = (x+y)-x
+ const SCEV *C = getMinusSCEV(TrueValExpr, Y); // C = (C+y)-y
+ if (isa<SCEVConstant>(C) && cast<SCEVConstant>(C)->getAPInt().ule(1))
+ return getAddExpr(getUMaxExpr(X, C), Y);
+ }
+ // x == 0 ? 0 : umin (..., x, ...) -> umin_seq(x, umin (...))
+ // x == 0 ? 0 : umin_seq(..., x, ...) -> umin_seq(x, umin_seq(...))
+ // x == 0 ? 0 : umin (..., umin_seq(..., x, ...), ...)
+ // -> umin_seq(x, umin (..., umin_seq(...), ...))
+ if (isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero() &&
+ isa<ConstantInt>(TrueVal) && cast<ConstantInt>(TrueVal)->isZero()) {
+ const SCEV *X = getSCEV(LHS);
+ while (auto *ZExt = dyn_cast<SCEVZeroExtendExpr>(X))
+ X = ZExt->getOperand();
+ if (getTypeSizeInBits(X->getType()) <= getTypeSizeInBits(I->getType())) {
+ const SCEV *FalseValExpr = getSCEV(FalseVal);
+ if (SCEVMinMaxExprContains(FalseValExpr, X, scSequentialUMinExpr))
+ return getUMinExpr(getNoopOrZeroExtend(X, I->getType()), FalseValExpr,
+ /*Sequential=*/true);
+ }
}
break;
default:
@@ -5992,12 +6061,95 @@ const SCEV *ScalarEvolution::createNodeForSelectOrPHI(Instruction *I,
return getUnknown(I);
}
+static Optional<const SCEV *>
+createNodeForSelectViaUMinSeq(ScalarEvolution *SE, const SCEV *CondExpr,
+ const SCEV *TrueExpr, const SCEV *FalseExpr) {
+ assert(CondExpr->getType()->isIntegerTy(1) &&
+ TrueExpr->getType() == FalseExpr->getType() &&
+ TrueExpr->getType()->isIntegerTy(1) &&
+ "Unexpected operands of a select.");
+
+ // i1 cond ? i1 x : i1 C --> C + (i1 cond ? (i1 x - i1 C) : i1 0)
+ // --> C + (umin_seq cond, x - C)
+ //
+ // i1 cond ? i1 C : i1 x --> C + (i1 cond ? i1 0 : (i1 x - i1 C))
+ // --> C + (i1 ~cond ? (i1 x - i1 C) : i1 0)
+ // --> C + (umin_seq ~cond, x - C)
+
+ // FIXME: while we can't legally model the case where both of the hands
+ // are fully variable, we only require that the *difference* is constant.
+ if (!isa<SCEVConstant>(TrueExpr) && !isa<SCEVConstant>(FalseExpr))
+ return None;
+
+ const SCEV *X, *C;
+ if (isa<SCEVConstant>(TrueExpr)) {
+ CondExpr = SE->getNotSCEV(CondExpr);
+ X = FalseExpr;
+ C = TrueExpr;
+ } else {
+ X = TrueExpr;
+ C = FalseExpr;
+ }
+ return SE->getAddExpr(C, SE->getUMinExpr(CondExpr, SE->getMinusSCEV(X, C),
+ /*Sequential=*/true));
+}
+
+static Optional<const SCEV *> createNodeForSelectViaUMinSeq(ScalarEvolution *SE,
+ Value *Cond,
+ Value *TrueVal,
+ Value *FalseVal) {
+ if (!isa<ConstantInt>(TrueVal) && !isa<ConstantInt>(FalseVal))
+ return None;
+
+ const auto *SECond = SE->getSCEV(Cond);
+ const auto *SETrue = SE->getSCEV(TrueVal);
+ const auto *SEFalse = SE->getSCEV(FalseVal);
+ return createNodeForSelectViaUMinSeq(SE, SECond, SETrue, SEFalse);
+}
+
+const SCEV *ScalarEvolution::createNodeForSelectOrPHIViaUMinSeq(
+ Value *V, Value *Cond, Value *TrueVal, Value *FalseVal) {
+ assert(Cond->getType()->isIntegerTy(1) && "Select condition is not an i1?");
+ assert(TrueVal->getType() == FalseVal->getType() &&
+ V->getType() == TrueVal->getType() &&
+ "Types of select hands and of the result must match.");
+
+ // For now, only deal with i1-typed `select`s.
+ if (!V->getType()->isIntegerTy(1))
+ return getUnknown(V);
+
+ if (Optional<const SCEV *> S =
+ createNodeForSelectViaUMinSeq(this, Cond, TrueVal, FalseVal))
+ return *S;
+
+ return getUnknown(V);
+}
+
+const SCEV *ScalarEvolution::createNodeForSelectOrPHI(Value *V, Value *Cond,
+ Value *TrueVal,
+ Value *FalseVal) {
+ // Handle "constant" branch or select. This can occur for instance when a
+ // loop pass transforms an inner loop and moves on to process the outer loop.
+ if (auto *CI = dyn_cast<ConstantInt>(Cond))
+ return getSCEV(CI->isOne() ? TrueVal : FalseVal);
+
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ if (auto *ICI = dyn_cast<ICmpInst>(Cond)) {
+ const SCEV *S = createNodeForSelectOrPHIInstWithICmpInstCond(
+ I, ICI, TrueVal, FalseVal);
+ if (!isa<SCEVUnknown>(S))
+ return S;
+ }
+ }
+
+ return createNodeForSelectOrPHIViaUMinSeq(V, Cond, TrueVal, FalseVal);
+}
+
/// Expand GEP instructions into add and multiply operations. This allows them
/// to be analyzed by regular SCEV code.
const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
- // Don't attempt to analyze GEPs over unsized objects.
- if (!GEP->getSourceElementType()->isSized())
- return getUnknown(GEP);
+ assert(GEP->getSourceElementType()->isSized() &&
+ "GEP source element type must be sized");
SmallVector<const SCEV *, 4> IndexExprs;
for (Value *Index : GEP->indices())
@@ -6430,7 +6582,7 @@ ScalarEvolution::getRangeRef(const SCEV *S,
// Check if the IR explicitly contains !range metadata.
Optional<ConstantRange> MDRange = GetRangeFromMetadata(U->getValue());
- if (MDRange.hasValue())
+ if (MDRange)
ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue(),
RangeType);
@@ -6719,7 +6871,7 @@ ConstantRange ScalarEvolution::getRangeViaFactoring(const SCEV *Start,
FalseValue = *FalseVal;
// Re-apply the cast we peeled off earlier
- if (CastOp.hasValue())
+ if (CastOp)
switch (*CastOp) {
default:
llvm_unreachable("Unknown SCEV cast type!");
@@ -7020,6 +7172,211 @@ bool ScalarEvolution::loopIsFiniteByAssumption(const Loop *L) {
return isFinite(L) || (isMustProgress(L) && loopHasNoSideEffects(L));
}
+const SCEV *ScalarEvolution::createSCEVIter(Value *V) {
+ // Worklist item with a Value and a bool indicating whether all operands have
+ // been visited already.
+ using PointerTy = PointerIntPair<Value *, 1, bool>;
+ SmallVector<PointerTy> Stack;
+
+ Stack.emplace_back(V, true);
+ Stack.emplace_back(V, false);
+ while (!Stack.empty()) {
+ auto E = Stack.pop_back_val();
+ Value *CurV = E.getPointer();
+
+ if (getExistingSCEV(CurV))
+ continue;
+
+ SmallVector<Value *> Ops;
+ const SCEV *CreatedSCEV = nullptr;
+ // If all operands have been visited already, create the SCEV.
+ if (E.getInt()) {
+ CreatedSCEV = createSCEV(CurV);
+ } else {
+ // Otherwise get the operands we need to create SCEV's for before creating
+ // the SCEV for CurV. If the SCEV for CurV can be constructed trivially,
+ // just use it.
+ CreatedSCEV = getOperandsToCreate(CurV, Ops);
+ }
+
+ if (CreatedSCEV) {
+ insertValueToMap(CurV, CreatedSCEV);
+ } else {
+ // Queue CurV for SCEV creation, followed by its's operands which need to
+ // be constructed first.
+ Stack.emplace_back(CurV, true);
+ for (Value *Op : Ops)
+ Stack.emplace_back(Op, false);
+ }
+ }
+
+ return getExistingSCEV(V);
+}
+
+const SCEV *
+ScalarEvolution::getOperandsToCreate(Value *V, SmallVectorImpl<Value *> &Ops) {
+ if (!isSCEVable(V->getType()))
+ return getUnknown(V);
+
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ // Don't attempt to analyze instructions in blocks that aren't
+ // reachable. Such instructions don't matter, and they aren't required
+ // to obey basic rules for definitions dominating uses which this
+ // analysis depends on.
+ if (!DT.isReachableFromEntry(I->getParent()))
+ return getUnknown(PoisonValue::get(V->getType()));
+ } else if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
+ return getConstant(CI);
+ else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+ if (!GA->isInterposable()) {
+ Ops.push_back(GA->getAliasee());
+ return nullptr;
+ }
+ return getUnknown(V);
+ } else if (!isa<ConstantExpr>(V))
+ return getUnknown(V);
+
+ Operator *U = cast<Operator>(V);
+ if (auto BO = MatchBinaryOp(U, DT)) {
+ bool IsConstArg = isa<ConstantInt>(BO->RHS);
+ switch (U->getOpcode()) {
+ case Instruction::Add: {
+ // For additions and multiplications, traverse add/mul chains for which we
+ // can potentially create a single SCEV, to reduce the number of
+ // get{Add,Mul}Expr calls.
+ do {
+ if (BO->Op) {
+ if (BO->Op != V && getExistingSCEV(BO->Op)) {
+ Ops.push_back(BO->Op);
+ break;
+ }
+ }
+ Ops.push_back(BO->RHS);
+ auto NewBO = MatchBinaryOp(BO->LHS, DT);
+ if (!NewBO || (NewBO->Opcode != Instruction::Add &&
+ NewBO->Opcode != Instruction::Sub)) {
+ Ops.push_back(BO->LHS);
+ break;
+ }
+ BO = NewBO;
+ } while (true);
+ return nullptr;
+ }
+
+ case Instruction::Mul: {
+ do {
+ if (BO->Op) {
+ if (BO->Op != V && getExistingSCEV(BO->Op)) {
+ Ops.push_back(BO->Op);
+ break;
+ }
+ }
+ Ops.push_back(BO->RHS);
+ auto NewBO = MatchBinaryOp(BO->LHS, DT);
+ if (!NewBO || NewBO->Opcode != Instruction::Mul) {
+ Ops.push_back(BO->LHS);
+ break;
+ }
+ BO = NewBO;
+ } while (true);
+ return nullptr;
+ }
+
+ case Instruction::AShr:
+ case Instruction::Shl:
+ case Instruction::Xor:
+ if (!IsConstArg)
+ return nullptr;
+ break;
+ case Instruction::And:
+ case Instruction::Or:
+ if (!IsConstArg && BO->LHS->getType()->isIntegerTy(1))
+ return nullptr;
+ break;
+ default:
+ break;
+ }
+
+ Ops.push_back(BO->LHS);
+ Ops.push_back(BO->RHS);
+ return nullptr;
+ }
+
+ switch (U->getOpcode()) {
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::PtrToInt:
+ Ops.push_back(U->getOperand(0));
+ return nullptr;
+
+ case Instruction::BitCast:
+ if (isSCEVable(U->getType()) && isSCEVable(U->getOperand(0)->getType())) {
+ Ops.push_back(U->getOperand(0));
+ return nullptr;
+ }
+ return getUnknown(V);
+
+ case Instruction::SDiv:
+ case Instruction::SRem:
+ Ops.push_back(U->getOperand(0));
+ Ops.push_back(U->getOperand(1));
+ return nullptr;
+
+ case Instruction::GetElementPtr:
+ assert(cast<GEPOperator>(U)->getSourceElementType()->isSized() &&
+ "GEP source element type must be sized");
+ for (Value *Index : U->operands())
+ Ops.push_back(Index);
+ return nullptr;
+
+ case Instruction::IntToPtr:
+ return getUnknown(V);
+
+ case Instruction::PHI:
+ // Keep constructing SCEVs' for phis recursively for now.
+ return nullptr;
+
+ case Instruction::Select:
+ for (Value *Inc : U->operands())
+ Ops.push_back(Inc);
+ return nullptr;
+ break;
+
+ case Instruction::Call:
+ case Instruction::Invoke:
+ if (Value *RV = cast<CallBase>(U)->getReturnedArgOperand()) {
+ Ops.push_back(RV);
+ return nullptr;
+ }
+
+ if (auto *II = dyn_cast<IntrinsicInst>(U)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::abs:
+ Ops.push_back(II->getArgOperand(0));
+ return nullptr;
+ case Intrinsic::umax:
+ case Intrinsic::umin:
+ case Intrinsic::smax:
+ case Intrinsic::smin:
+ case Intrinsic::usub_sat:
+ case Intrinsic::uadd_sat:
+ Ops.push_back(II->getArgOperand(0));
+ Ops.push_back(II->getArgOperand(1));
+ return nullptr;
+ case Intrinsic::start_loop_iterations:
+ Ops.push_back(II->getArgOperand(0));
+ return nullptr;
+ default:
+ break;
+ }
+ }
+ break;
+ }
+
+ return nullptr;
+}
+
const SCEV *ScalarEvolution::createSCEV(Value *V) {
if (!isSCEVable(V->getType()))
return getUnknown(V);
@@ -7030,7 +7387,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
// to obey basic rules for definitions dominating uses which this
// analysis depends on.
if (!DT.isReachableFromEntry(I->getParent()))
- return getUnknown(UndefValue::get(V->getType()));
+ return getUnknown(PoisonValue::get(V->getType()));
} else if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
return getConstant(CI);
else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
@@ -7038,6 +7395,9 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
else if (!isa<ConstantExpr>(V))
return getUnknown(V);
+ const SCEV *LHS;
+ const SCEV *RHS;
+
Operator *U = cast<Operator>(V);
if (auto BO = MatchBinaryOp(U, DT)) {
switch (BO->Opcode) {
@@ -7103,8 +7463,9 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(BO->Op);
if (Flags != SCEV::FlagAnyWrap) {
- MulOps.push_back(
- getMulExpr(getSCEV(BO->LHS), getSCEV(BO->RHS), Flags));
+ LHS = getSCEV(BO->LHS);
+ RHS = getSCEV(BO->RHS);
+ MulOps.push_back(getMulExpr(LHS, RHS, Flags));
break;
}
}
@@ -7121,14 +7482,20 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
return getMulExpr(MulOps);
}
case Instruction::UDiv:
- return getUDivExpr(getSCEV(BO->LHS), getSCEV(BO->RHS));
+ LHS = getSCEV(BO->LHS);
+ RHS = getSCEV(BO->RHS);
+ return getUDivExpr(LHS, RHS);
case Instruction::URem:
- return getURemExpr(getSCEV(BO->LHS), getSCEV(BO->RHS));
+ LHS = getSCEV(BO->LHS);
+ RHS = getSCEV(BO->RHS);
+ return getURemExpr(LHS, RHS);
case Instruction::Sub: {
SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
if (BO->Op)
Flags = getNoWrapFlagsFromUB(BO->Op);
- return getMinusSCEV(getSCEV(BO->LHS), getSCEV(BO->RHS), Flags);
+ LHS = getSCEV(BO->LHS);
+ RHS = getSCEV(BO->RHS);
+ return getMinusSCEV(LHS, RHS, Flags);
}
case Instruction::And:
// For an expression like x&255 that merely masks off the high bits,
@@ -7180,6 +7547,12 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
MulCount);
}
}
+ // Binary `and` is a bit-wise `umin`.
+ if (BO->LHS->getType()->isIntegerTy(1)) {
+ LHS = getSCEV(BO->LHS);
+ RHS = getSCEV(BO->RHS);
+ return getUMinExpr(LHS, RHS);
+ }
break;
case Instruction::Or:
@@ -7199,6 +7572,12 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
(SCEV::NoWrapFlags)(SCEV::FlagNUW | SCEV::FlagNSW));
}
}
+ // Binary `or` is a bit-wise `umax`.
+ if (BO->LHS->getType()->isIntegerTy(1)) {
+ LHS = getSCEV(BO->LHS);
+ RHS = getSCEV(BO->RHS);
+ return getUMaxExpr(LHS, RHS);
+ }
break;
case Instruction::Xor:
@@ -7266,9 +7645,9 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
Flags = (SCEV::NoWrapFlags)(Flags | SCEV::FlagNUW);
}
- Constant *X = ConstantInt::get(
+ ConstantInt *X = ConstantInt::get(
getContext(), APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
- return getMulExpr(getSCEV(BO->LHS), getSCEV(X), Flags);
+ return getMulExpr(getSCEV(BO->LHS), getConstant(X), Flags);
}
break;
@@ -7394,14 +7773,8 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
return createNodeForPHI(cast<PHINode>(U));
case Instruction::Select:
- // U can also be a select constant expr, which let fall through. Since
- // createNodeForSelect only works for a condition that is an `ICmpInst`, and
- // constant expressions cannot have instructions as operands, we'd have
- // returned getUnknown for a select constant expressions anyway.
- if (isa<Instruction>(U))
- return createNodeForSelectOrPHI(cast<Instruction>(U), U->getOperand(0),
- U->getOperand(1), U->getOperand(2));
- break;
+ return createNodeForSelectOrPHI(U, U->getOperand(0), U->getOperand(1),
+ U->getOperand(2));
case Instruction::Call:
case Instruction::Invoke:
@@ -7415,17 +7788,21 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
getSCEV(II->getArgOperand(0)),
/*IsNSW=*/cast<ConstantInt>(II->getArgOperand(1))->isOne());
case Intrinsic::umax:
- return getUMaxExpr(getSCEV(II->getArgOperand(0)),
- getSCEV(II->getArgOperand(1)));
+ LHS = getSCEV(II->getArgOperand(0));
+ RHS = getSCEV(II->getArgOperand(1));
+ return getUMaxExpr(LHS, RHS);
case Intrinsic::umin:
- return getUMinExpr(getSCEV(II->getArgOperand(0)),
- getSCEV(II->getArgOperand(1)));
+ LHS = getSCEV(II->getArgOperand(0));
+ RHS = getSCEV(II->getArgOperand(1));
+ return getUMinExpr(LHS, RHS);
case Intrinsic::smax:
- return getSMaxExpr(getSCEV(II->getArgOperand(0)),
- getSCEV(II->getArgOperand(1)));
+ LHS = getSCEV(II->getArgOperand(0));
+ RHS = getSCEV(II->getArgOperand(1));
+ return getSMaxExpr(LHS, RHS);
case Intrinsic::smin:
- return getSMinExpr(getSCEV(II->getArgOperand(0)),
- getSCEV(II->getArgOperand(1)));
+ LHS = getSCEV(II->getArgOperand(0));
+ RHS = getSCEV(II->getArgOperand(1));
+ return getSMinExpr(LHS, RHS);
case Intrinsic::usub_sat: {
const SCEV *X = getSCEV(II->getArgOperand(0));
const SCEV *Y = getSCEV(II->getArgOperand(1));
@@ -7640,7 +8017,7 @@ unsigned ScalarEvolution::getSmallConstantTripMultiple(const Loop *L) {
Res = Multiple;
Res = (unsigned)GreatestCommonDivisor64(*Res, Multiple);
}
- return Res.getValueOr(1);
+ return Res.value_or(1);
}
unsigned ScalarEvolution::getSmallConstantTripMultiple(const Loop *L,
@@ -7708,7 +8085,7 @@ const SCEV *ScalarEvolution::getExitCount(const Loop *L,
const SCEV *
ScalarEvolution::getPredicatedBackedgeTakenCount(const Loop *L,
- SCEVUnionPredicate &Preds) {
+ SmallVector<const SCEVPredicate *, 4> &Preds) {
return getPredicatedBackedgeTakenInfo(L).getExact(L, this, &Preds);
}
@@ -7870,7 +8247,6 @@ void ScalarEvolution::forgetLoop(const Loop *L) {
if (LoopUsersItr != LoopUsers.end()) {
ToForget.insert(ToForget.end(), LoopUsersItr->second.begin(),
LoopUsersItr->second.end());
- LoopUsers.erase(LoopUsersItr);
}
// Drop information about expressions based on loop-header PHIs.
@@ -7900,9 +8276,7 @@ void ScalarEvolution::forgetLoop(const Loop *L) {
}
void ScalarEvolution::forgetTopmostLoop(const Loop *L) {
- while (Loop *Parent = L->getParentLoop())
- L = Parent;
- forgetLoop(L);
+ forgetLoop(L->getOutermostLoop());
}
void ScalarEvolution::forgetValue(Value *V) {
@@ -7944,7 +8318,7 @@ void ScalarEvolution::forgetLoopDispositions(const Loop *L) {
/// the relevant loop exiting block using getExact(ExitingBlock, SE).
const SCEV *
ScalarEvolution::BackedgeTakenInfo::getExact(const Loop *L, ScalarEvolution *SE,
- SCEVUnionPredicate *Preds) const {
+ SmallVector<const SCEVPredicate *, 4> *Preds) const {
// If any exits were not computable, the loop is not computable.
if (!isComplete() || ExitNotTaken.empty())
return SE->getCouldNotCompute();
@@ -7966,14 +8340,18 @@ ScalarEvolution::BackedgeTakenInfo::getExact(const Loop *L, ScalarEvolution *SE,
Ops.push_back(BECount);
- if (Preds && !ENT.hasAlwaysTruePredicate())
- Preds->add(ENT.Predicate.get());
+ if (Preds)
+ for (auto *P : ENT.Predicates)
+ Preds->push_back(P);
assert((Preds || ENT.hasAlwaysTruePredicate()) &&
"Predicate should be always true!");
}
- return SE->getUMinFromMismatchedTypes(Ops);
+ // If an earlier exit exits on the first iteration (exit count zero), then
+ // a later poison exit count should not propagate into the result. This are
+ // exactly the semantics provided by umin_seq.
+ return SE->getUMinFromMismatchedTypes(Ops, /* Sequential */ true);
}
/// Get the exact not taken count for this loop exit.
@@ -8082,16 +8460,8 @@ ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
[&](const EdgeExitInfo &EEI) {
BasicBlock *ExitBB = EEI.first;
const ExitLimit &EL = EEI.second;
- if (EL.Predicates.empty())
- return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, EL.MaxNotTaken,
- nullptr);
-
- std::unique_ptr<SCEVUnionPredicate> Predicate(new SCEVUnionPredicate);
- for (auto *Pred : EL.Predicates)
- Predicate->add(Pred);
-
return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, EL.MaxNotTaken,
- std::move(Predicate));
+ EL.Predicates);
});
assert((isa<SCEVCouldNotCompute>(ConstantMax) ||
isa<SCEVConstant>(ConstantMax)) &&
@@ -8385,11 +8755,6 @@ ScalarEvolution::computeExitLimitFromCondFromBinOp(
BECount = getUMinFromMismatchedTypes(
EL0.ExactNotTaken, EL1.ExactNotTaken,
/*Sequential=*/!isa<BinaryOperator>(ExitCond));
-
- // If EL0.ExactNotTaken was zero and ExitCond was a short-circuit form,
- // it should have been simplified to zero (see the condition (3) above)
- assert(!isa<BinaryOperator>(ExitCond) || !EL0.ExactNotTaken->isZero() ||
- BECount->isZero());
}
if (EL0.MaxNotTaken == getCouldNotCompute())
MaxBECount = EL1.MaxNotTaken;
@@ -8470,7 +8835,8 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L,
ControlsExit && loopHasNoAbnormalExits(L) && loopIsFiniteByAssumption(L);
// Simplify the operands before analyzing them.
(void)SimplifyICmpOperands(Pred, LHS, RHS, /*Depth=*/0,
- ControllingFiniteLoop);
+ (EnableFiniteLoopControl ? ControllingFiniteLoop
+ : false));
// If we have a comparison of a chrec against a constant, try to use value
// ranges to answer this query.
@@ -8683,7 +9049,7 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit(
// and the kind of shift should be match the kind of shift we peeled
// off, if any.
- (!PostShiftOpCode.hasValue() || *PostShiftOpCode == OpCodeOut);
+ (!PostShiftOpCode || *PostShiftOpCode == OpCodeOut);
};
PHINode *PN;
@@ -8871,13 +9237,6 @@ static Constant *EvaluateExpression(Value *V, const Loop *L,
Operands[i] = C;
}
- if (CmpInst *CI = dyn_cast<CmpInst>(I))
- return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
- Operands[1], DL, TLI);
- if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- if (!LI->isVolatile())
- return ConstantFoldLoadFromConstPtr(Operands[0], LI->getType(), DL);
- }
return ConstantFoldInstOperands(I, Operands, DL, TLI);
}
@@ -9121,58 +9480,42 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) {
}
case scAddExpr: {
const SCEVAddExpr *SA = cast<SCEVAddExpr>(V);
- if (Constant *C = BuildConstantFromSCEV(SA->getOperand(0))) {
- if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) {
- unsigned AS = PTy->getAddressSpace();
- Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS);
- C = ConstantExpr::getBitCast(C, DestPtrTy);
+ Constant *C = nullptr;
+ for (const SCEV *Op : SA->operands()) {
+ Constant *OpC = BuildConstantFromSCEV(Op);
+ if (!OpC)
+ return nullptr;
+ if (!C) {
+ C = OpC;
+ continue;
}
- for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) {
- Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i));
- if (!C2)
- return nullptr;
-
- // First pointer!
- if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) {
- unsigned AS = C2->getType()->getPointerAddressSpace();
- std::swap(C, C2);
- Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS);
- // The offsets have been converted to bytes. We can add bytes to an
- // i8* by GEP with the byte count in the first index.
- C = ConstantExpr::getBitCast(C, DestPtrTy);
- }
-
- // Don't bother trying to sum two pointers. We probably can't
- // statically compute a load that results from it anyway.
- if (C2->getType()->isPointerTy())
- return nullptr;
-
- if (C->getType()->isPointerTy()) {
- C = ConstantExpr::getGetElementPtr(Type::getInt8Ty(C->getContext()),
- C, C2);
- } else {
- C = ConstantExpr::getAdd(C, C2);
- }
+ assert(!C->getType()->isPointerTy() &&
+ "Can only have one pointer, and it must be last");
+ if (auto *PT = dyn_cast<PointerType>(OpC->getType())) {
+ // The offsets have been converted to bytes. We can add bytes to an
+ // i8* by GEP with the byte count in the first index.
+ Type *DestPtrTy =
+ Type::getInt8PtrTy(PT->getContext(), PT->getAddressSpace());
+ OpC = ConstantExpr::getBitCast(OpC, DestPtrTy);
+ C = ConstantExpr::getGetElementPtr(Type::getInt8Ty(C->getContext()),
+ OpC, C);
+ } else {
+ C = ConstantExpr::getAdd(C, OpC);
}
- return C;
}
- return nullptr;
+ return C;
}
case scMulExpr: {
const SCEVMulExpr *SM = cast<SCEVMulExpr>(V);
- if (Constant *C = BuildConstantFromSCEV(SM->getOperand(0))) {
- // Don't bother with pointers at all.
- if (C->getType()->isPointerTy())
+ Constant *C = nullptr;
+ for (const SCEV *Op : SM->operands()) {
+ assert(!Op->getType()->isPointerTy() && "Can't multiply pointers");
+ Constant *OpC = BuildConstantFromSCEV(Op);
+ if (!OpC)
return nullptr;
- for (unsigned i = 1, e = SM->getNumOperands(); i != e; ++i) {
- Constant *C2 = BuildConstantFromSCEV(SM->getOperand(i));
- if (!C2 || C2->getType()->isPointerTy())
- return nullptr;
- C = ConstantExpr::getMul(C, C2);
- }
- return C;
+ C = C ? ConstantExpr::getMul(C, OpC) : OpC;
}
- return nullptr;
+ return C;
}
case scUDivExpr: {
const SCEVUDivExpr *SU = cast<SCEVUDivExpr>(V);
@@ -9297,15 +9640,7 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
if (MadeImprovement) {
Constant *C = nullptr;
const DataLayout &DL = getDataLayout();
- if (const CmpInst *CI = dyn_cast<CmpInst>(I))
- C = ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
- Operands[1], DL, &TLI);
- else if (const LoadInst *Load = dyn_cast<LoadInst>(I)) {
- if (!Load->isVolatile())
- C = ConstantFoldLoadFromConstPtr(Operands[0], Load->getType(),
- DL);
- } else
- C = ConstantFoldInstOperands(I, Operands, DL, &TLI);
+ C = ConstantFoldInstOperands(I, Operands, DL, &TLI);
if (!C) return V;
return getSCEV(C);
}
@@ -9535,15 +9870,15 @@ GetQuadraticEquation(const SCEVAddRecExpr *AddRec) {
/// (b) if neither X nor Y exist, return None,
/// (c) if exactly one of X and Y exists, return that value.
static Optional<APInt> MinOptional(Optional<APInt> X, Optional<APInt> Y) {
- if (X.hasValue() && Y.hasValue()) {
+ if (X && Y) {
unsigned W = std::max(X->getBitWidth(), Y->getBitWidth());
- APInt XW = X->sextOrSelf(W);
- APInt YW = Y->sextOrSelf(W);
+ APInt XW = X->sext(W);
+ APInt YW = Y->sext(W);
return XW.slt(YW) ? *X : *Y;
}
- if (!X.hasValue() && !Y.hasValue())
+ if (!X && !Y)
return None;
- return X.hasValue() ? *X : *Y;
+ return X ? *X : *Y;
}
/// Helper function to truncate an optional APInt to a given BitWidth.
@@ -9558,7 +9893,7 @@ static Optional<APInt> MinOptional(Optional<APInt> X, Optional<APInt> Y) {
/// equation are BW+1 bits wide (to avoid truncation when converting from
/// the addrec to the equation).
static Optional<APInt> TruncIfPossible(Optional<APInt> X, unsigned BitWidth) {
- if (!X.hasValue())
+ if (!X)
return None;
unsigned W = X->getBitWidth();
if (BitWidth > 1 && BitWidth < W && X->isIntN(BitWidth))
@@ -9585,13 +9920,13 @@ SolveQuadraticAddRecExact(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
APInt A, B, C, M;
unsigned BitWidth;
auto T = GetQuadraticEquation(AddRec);
- if (!T.hasValue())
+ if (!T)
return None;
std::tie(A, B, C, M, BitWidth) = *T;
LLVM_DEBUG(dbgs() << __func__ << ": solving for unsigned overflow\n");
Optional<APInt> X = APIntOps::SolveQuadraticEquationWrap(A, B, C, BitWidth+1);
- if (!X.hasValue())
+ if (!X)
return None;
ConstantInt *CX = ConstantInt::get(SE.getContext(), *X);
@@ -9627,7 +9962,7 @@ SolveQuadraticAddRecRange(const SCEVAddRecExpr *AddRec,
APInt A, B, C, M;
unsigned BitWidth;
auto T = GetQuadraticEquation(AddRec);
- if (!T.hasValue())
+ if (!T)
return None;
// Be careful about the return value: there can be two reasons for not
@@ -9672,7 +10007,7 @@ SolveQuadraticAddRecRange(const SCEVAddRecExpr *AddRec,
// If SolveQuadraticEquationWrap returns None, it means that there can
// be a solution, but the function failed to find it. We cannot treat it
// as "no solution".
- if (!SO.hasValue() || !UO.hasValue())
+ if (!SO || !UO)
return { None, false };
// Check the smaller value first to see if it leaves the range.
@@ -9690,8 +10025,8 @@ SolveQuadraticAddRecRange(const SCEVAddRecExpr *AddRec,
std::tie(A, B, C, M, BitWidth) = *T;
// Lower bound is inclusive, subtract 1 to represent the exiting value.
- APInt Lower = Range.getLower().sextOrSelf(A.getBitWidth()) - 1;
- APInt Upper = Range.getUpper().sextOrSelf(A.getBitWidth());
+ APInt Lower = Range.getLower().sext(A.getBitWidth()) - 1;
+ APInt Upper = Range.getUpper().sext(A.getBitWidth());
auto SL = SolveForBoundary(Lower);
auto SU = SolveForBoundary(Upper);
// If any of the solutions was unknown, no meaninigful conclusions can
@@ -9776,7 +10111,7 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
// value at this index. When solving for "X*X != 5", for example, we
// should not accept a root of 2.
if (auto S = SolveQuadraticAddRecExact(AddRec, *this)) {
- const auto *R = cast<SCEVConstant>(getConstant(S.getValue()));
+ const auto *R = cast<SCEVConstant>(getConstant(*S));
return ExitLimit(R, R, false, Predicates);
}
return getCouldNotCompute();
@@ -10296,7 +10631,7 @@ ScalarEvolution::getMonotonicPredicateType(const SCEVAddRecExpr *LHS,
auto ResultSwapped =
getMonotonicPredicateTypeImpl(LHS, ICmpInst::getSwappedPredicate(Pred));
- assert(ResultSwapped.hasValue() && "should be able to analyze both!");
+ assert(ResultSwapped && "should be able to analyze both!");
assert(ResultSwapped.getValue() != Result.getValue() &&
"monotonicity should flip as we flip the predicate");
}
@@ -10479,17 +10814,27 @@ bool ScalarEvolution::isKnownPredicateViaConstantRanges(
return false;
if (Pred == CmpInst::ICMP_NE) {
- if (CheckRanges(getSignedRange(LHS), getSignedRange(RHS)) ||
- CheckRanges(getUnsignedRange(LHS), getUnsignedRange(RHS)))
+ auto SL = getSignedRange(LHS);
+ auto SR = getSignedRange(RHS);
+ if (CheckRanges(SL, SR))
+ return true;
+ auto UL = getUnsignedRange(LHS);
+ auto UR = getUnsignedRange(RHS);
+ if (CheckRanges(UL, UR))
return true;
auto *Diff = getMinusSCEV(LHS, RHS);
return !isa<SCEVCouldNotCompute>(Diff) && isKnownNonZero(Diff);
}
- if (CmpInst::isSigned(Pred))
- return CheckRanges(getSignedRange(LHS), getSignedRange(RHS));
+ if (CmpInst::isSigned(Pred)) {
+ auto SL = getSignedRange(LHS);
+ auto SR = getSignedRange(RHS);
+ return CheckRanges(SL, SR);
+ }
- return CheckRanges(getUnsignedRange(LHS), getUnsignedRange(RHS));
+ auto UL = getUnsignedRange(LHS);
+ auto UR = getUnsignedRange(RHS);
+ return CheckRanges(UL, UR);
}
bool ScalarEvolution::isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred,
@@ -12596,7 +12941,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(const ConstantRange &Range,
if (isQuadratic()) {
if (auto S = SolveQuadraticAddRecRange(this, Range, SE))
- return SE.getConstant(S.getValue());
+ return SE.getConstant(*S);
}
return SE.getCouldNotCompute();
@@ -12636,6 +12981,15 @@ bool ScalarEvolution::containsUndefs(const SCEV *S) const {
});
}
+// Return true when S contains a value that is a nullptr.
+bool ScalarEvolution::containsErasedValue(const SCEV *S) const {
+ return SCEVExprContains(S, [](const SCEV *S) {
+ if (const auto *SU = dyn_cast<SCEVUnknown>(S))
+ return SU->getValue() == nullptr;
+ return false;
+ });
+}
+
/// Return the size of an element read or written by Inst.
const SCEV *ScalarEvolution::getElementSize(Instruction *Inst) {
Type *Ty;
@@ -12820,12 +13174,13 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
OS << ": ";
- SCEVUnionPredicate Pred;
- auto PBT = SE->getPredicatedBackedgeTakenCount(L, Pred);
+ SmallVector<const SCEVPredicate *, 4> Preds;
+ auto PBT = SE->getPredicatedBackedgeTakenCount(L, Preds);
if (!isa<SCEVCouldNotCompute>(PBT)) {
OS << "Predicated backedge-taken count is " << *PBT << "\n";
OS << " Predicates:\n";
- Pred.print(OS, 4);
+ for (auto *P : Preds)
+ P->print(OS, 4);
} else {
OS << "Unpredictable predicated backedge-taken count. ";
}
@@ -13202,12 +13557,10 @@ void ScalarEvolution::forgetMemoizedResultsImpl(const SCEV *S) {
auto ExprIt = ExprValueMap.find(S);
if (ExprIt != ExprValueMap.end()) {
- for (auto &ValueAndOffset : ExprIt->second) {
- if (ValueAndOffset.second == nullptr) {
- auto ValueIt = ValueExprMap.find_as(ValueAndOffset.first);
- if (ValueIt != ValueExprMap.end())
- ValueExprMap.erase(ValueIt);
- }
+ for (Value *V : ExprIt->second) {
+ auto ValueIt = ValueExprMap.find_as(V);
+ if (ValueIt != ValueExprMap.end())
+ ValueExprMap.erase(ValueIt);
}
ExprValueMap.erase(ExprIt);
}
@@ -13258,6 +13611,43 @@ ScalarEvolution::getUsedLoops(const SCEV *S,
SCEVTraversal<FindUsedLoops>(F).visitAll(S);
}
+void ScalarEvolution::getReachableBlocks(
+ SmallPtrSetImpl<BasicBlock *> &Reachable, Function &F) {
+ SmallVector<BasicBlock *> Worklist;
+ Worklist.push_back(&F.getEntryBlock());
+ while (!Worklist.empty()) {
+ BasicBlock *BB = Worklist.pop_back_val();
+ if (!Reachable.insert(BB).second)
+ continue;
+
+ Value *Cond;
+ BasicBlock *TrueBB, *FalseBB;
+ if (match(BB->getTerminator(), m_Br(m_Value(Cond), m_BasicBlock(TrueBB),
+ m_BasicBlock(FalseBB)))) {
+ if (auto *C = dyn_cast<ConstantInt>(Cond)) {
+ Worklist.push_back(C->isOne() ? TrueBB : FalseBB);
+ continue;
+ }
+
+ if (auto *Cmp = dyn_cast<ICmpInst>(Cond)) {
+ const SCEV *L = getSCEV(Cmp->getOperand(0));
+ const SCEV *R = getSCEV(Cmp->getOperand(1));
+ if (isKnownPredicateViaConstantRanges(Cmp->getPredicate(), L, R)) {
+ Worklist.push_back(TrueBB);
+ continue;
+ }
+ if (isKnownPredicateViaConstantRanges(Cmp->getInversePredicate(), L,
+ R)) {
+ Worklist.push_back(FalseBB);
+ continue;
+ }
+ }
+ }
+
+ append_range(Worklist, successors(BB));
+ }
+}
+
void ScalarEvolution::verify() const {
ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
ScalarEvolution SE2(F, TLI, AC, DT, LI);
@@ -13282,13 +13672,44 @@ void ScalarEvolution::verify() const {
};
SCEVMapper SCM(SE2);
+ SmallPtrSet<BasicBlock *, 16> ReachableBlocks;
+ SE2.getReachableBlocks(ReachableBlocks, F);
+
+ auto GetDelta = [&](const SCEV *Old, const SCEV *New) -> const SCEV * {
+ if (containsUndefs(Old) || containsUndefs(New)) {
+ // SCEV treats "undef" as an unknown but consistent value (i.e. it does
+ // not propagate undef aggressively). This means we can (and do) fail
+ // verification in cases where a transform makes a value go from "undef"
+ // to "undef+1" (say). The transform is fine, since in both cases the
+ // result is "undef", but SCEV thinks the value increased by 1.
+ return nullptr;
+ }
+
+ // Unless VerifySCEVStrict is set, we only compare constant deltas.
+ const SCEV *Delta = SE2.getMinusSCEV(Old, New);
+ if (!VerifySCEVStrict && !isa<SCEVConstant>(Delta))
+ return nullptr;
+
+ return Delta;
+ };
while (!LoopStack.empty()) {
auto *L = LoopStack.pop_back_val();
llvm::append_range(LoopStack, *L);
- auto *CurBECount = SCM.visit(
- const_cast<ScalarEvolution *>(this)->getBackedgeTakenCount(L));
+ // Only verify BECounts in reachable loops. For an unreachable loop,
+ // any BECount is legal.
+ if (!ReachableBlocks.contains(L->getHeader()))
+ continue;
+
+ // Only verify cached BECounts. Computing new BECounts may change the
+ // results of subsequent SCEV uses.
+ auto It = BackedgeTakenCounts.find(L);
+ if (It == BackedgeTakenCounts.end())
+ continue;
+
+ auto *CurBECount =
+ SCM.visit(It->second.getExact(L, const_cast<ScalarEvolution *>(this)));
auto *NewBECount = SE2.getBackedgeTakenCount(L);
if (CurBECount == SE2.getCouldNotCompute() ||
@@ -13301,16 +13722,6 @@ void ScalarEvolution::verify() const {
continue;
}
- if (containsUndefs(CurBECount) || containsUndefs(NewBECount)) {
- // SCEV treats "undef" as an unknown but consistent value (i.e. it does
- // not propagate undef aggressively). This means we can (and do) fail
- // verification in cases where a transform makes the trip count of a loop
- // go from "undef" to "undef+1" (say). The transform is fine, since in
- // both cases the loop iterates "undef" times, but SCEV thinks we
- // increased the trip count of the loop by 1 incorrectly.
- continue;
- }
-
if (SE.getTypeSizeInBits(CurBECount->getType()) >
SE.getTypeSizeInBits(NewBECount->getType()))
NewBECount = SE2.getZeroExtendExpr(NewBECount, CurBECount->getType());
@@ -13318,10 +13729,8 @@ void ScalarEvolution::verify() const {
SE.getTypeSizeInBits(NewBECount->getType()))
CurBECount = SE2.getZeroExtendExpr(CurBECount, NewBECount->getType());
- const SCEV *Delta = SE2.getMinusSCEV(CurBECount, NewBECount);
-
- // Unless VerifySCEVStrict is set, we only compare constant deltas.
- if ((VerifySCEVStrict || isa<SCEVConstant>(Delta)) && !Delta->isZero()) {
+ const SCEV *Delta = GetDelta(CurBECount, NewBECount);
+ if (Delta && !Delta->isZero()) {
dbgs() << "Trip Count for " << *L << " Changed!\n";
dbgs() << "Old: " << *CurBECount << "\n";
dbgs() << "New: " << *NewBECount << "\n";
@@ -13335,10 +13744,8 @@ void ScalarEvolution::verify() const {
SmallVector<Loop *, 32> Worklist(LI.begin(), LI.end());
while (!Worklist.empty()) {
Loop *L = Worklist.pop_back_val();
- if (ValidLoops.contains(L))
- continue;
- ValidLoops.insert(L);
- Worklist.append(L->begin(), L->end());
+ if (ValidLoops.insert(L).second)
+ Worklist.append(L->begin(), L->end());
}
for (auto &KV : ValueExprMap) {
#ifndef NDEBUG
@@ -13351,27 +13758,38 @@ void ScalarEvolution::verify() const {
// Check that the value is also part of the reverse map.
auto It = ExprValueMap.find(KV.second);
- if (It == ExprValueMap.end() || !It->second.contains({KV.first, nullptr})) {
+ if (It == ExprValueMap.end() || !It->second.contains(KV.first)) {
dbgs() << "Value " << *KV.first
<< " is in ValueExprMap but not in ExprValueMap\n";
std::abort();
}
- }
- for (const auto &KV : ExprValueMap) {
- for (const auto &ValueAndOffset : KV.second) {
- if (ValueAndOffset.second != nullptr)
+ if (auto *I = dyn_cast<Instruction>(&*KV.first)) {
+ if (!ReachableBlocks.contains(I->getParent()))
continue;
+ const SCEV *OldSCEV = SCM.visit(KV.second);
+ const SCEV *NewSCEV = SE2.getSCEV(I);
+ const SCEV *Delta = GetDelta(OldSCEV, NewSCEV);
+ if (Delta && !Delta->isZero()) {
+ dbgs() << "SCEV for value " << *I << " changed!\n"
+ << "Old: " << *OldSCEV << "\n"
+ << "New: " << *NewSCEV << "\n"
+ << "Delta: " << *Delta << "\n";
+ std::abort();
+ }
+ }
+ }
- auto It = ValueExprMap.find_as(ValueAndOffset.first);
+ for (const auto &KV : ExprValueMap) {
+ for (Value *V : KV.second) {
+ auto It = ValueExprMap.find_as(V);
if (It == ValueExprMap.end()) {
- dbgs() << "Value " << *ValueAndOffset.first
+ dbgs() << "Value " << *V
<< " is in ExprValueMap but not in ValueExprMap\n";
std::abort();
}
if (It->second != KV.first) {
- dbgs() << "Value " << *ValueAndOffset.first
- << " mapped to " << *It->second
+ dbgs() << "Value " << *V << " mapped to " << *It->second
<< " rather than " << *KV.first << "\n";
std::abort();
}
@@ -13537,18 +13955,25 @@ void ScalarEvolutionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
const SCEVPredicate *ScalarEvolution::getEqualPredicate(const SCEV *LHS,
const SCEV *RHS) {
+ return getComparePredicate(ICmpInst::ICMP_EQ, LHS, RHS);
+}
+
+const SCEVPredicate *
+ScalarEvolution::getComparePredicate(const ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS) {
FoldingSetNodeID ID;
assert(LHS->getType() == RHS->getType() &&
"Type mismatch between LHS and RHS");
// Unique this node based on the arguments
- ID.AddInteger(SCEVPredicate::P_Equal);
+ ID.AddInteger(SCEVPredicate::P_Compare);
+ ID.AddInteger(Pred);
ID.AddPointer(LHS);
ID.AddPointer(RHS);
void *IP = nullptr;
if (const auto *S = UniquePreds.FindNodeOrInsertPos(ID, IP))
return S;
- SCEVEqualPredicate *Eq = new (SCEVAllocator)
- SCEVEqualPredicate(ID.Intern(SCEVAllocator), LHS, RHS);
+ SCEVComparePredicate *Eq = new (SCEVAllocator)
+ SCEVComparePredicate(ID.Intern(SCEVAllocator), Pred, LHS, RHS);
UniquePreds.InsertNode(Eq, IP);
return Eq;
}
@@ -13585,18 +14010,24 @@ public:
/// \p NewPreds such that the result will be an AddRecExpr.
static const SCEV *rewrite(const SCEV *S, const Loop *L, ScalarEvolution &SE,
SmallPtrSetImpl<const SCEVPredicate *> *NewPreds,
- SCEVUnionPredicate *Pred) {
+ const SCEVPredicate *Pred) {
SCEVPredicateRewriter Rewriter(L, SE, NewPreds, Pred);
return Rewriter.visit(S);
}
const SCEV *visitUnknown(const SCEVUnknown *Expr) {
if (Pred) {
- auto ExprPreds = Pred->getPredicatesForExpr(Expr);
- for (auto *Pred : ExprPreds)
- if (const auto *IPred = dyn_cast<SCEVEqualPredicate>(Pred))
- if (IPred->getLHS() == Expr)
- return IPred->getRHS();
+ if (auto *U = dyn_cast<SCEVUnionPredicate>(Pred)) {
+ for (auto *Pred : U->getPredicates())
+ if (const auto *IPred = dyn_cast<SCEVComparePredicate>(Pred))
+ if (IPred->getLHS() == Expr &&
+ IPred->getPredicate() == ICmpInst::ICMP_EQ)
+ return IPred->getRHS();
+ } else if (const auto *IPred = dyn_cast<SCEVComparePredicate>(Pred)) {
+ if (IPred->getLHS() == Expr &&
+ IPred->getPredicate() == ICmpInst::ICMP_EQ)
+ return IPred->getRHS();
+ }
}
return convertToAddRecWithPreds(Expr);
}
@@ -13636,7 +14067,7 @@ public:
private:
explicit SCEVPredicateRewriter(const Loop *L, ScalarEvolution &SE,
SmallPtrSetImpl<const SCEVPredicate *> *NewPreds,
- SCEVUnionPredicate *Pred)
+ const SCEVPredicate *Pred)
: SCEVRewriteVisitor(SE), NewPreds(NewPreds), Pred(Pred), L(L) {}
bool addOverflowAssumption(const SCEVPredicate *P) {
@@ -13670,8 +14101,7 @@ private:
for (auto *P : PredicatedRewrite->second){
// Wrap predicates from outer loops are not supported.
if (auto *WP = dyn_cast<const SCEVWrapPredicate>(P)) {
- auto *AR = cast<const SCEVAddRecExpr>(WP->getExpr());
- if (L != AR->getLoop())
+ if (L != WP->getExpr()->getLoop())
return Expr;
}
if (!addOverflowAssumption(P))
@@ -13681,14 +14111,15 @@ private:
}
SmallPtrSetImpl<const SCEVPredicate *> *NewPreds;
- SCEVUnionPredicate *Pred;
+ const SCEVPredicate *Pred;
const Loop *L;
};
} // end anonymous namespace
-const SCEV *ScalarEvolution::rewriteUsingPredicate(const SCEV *S, const Loop *L,
- SCEVUnionPredicate &Preds) {
+const SCEV *
+ScalarEvolution::rewriteUsingPredicate(const SCEV *S, const Loop *L,
+ const SCEVPredicate &Preds) {
return SCEVPredicateRewriter::rewrite(S, L, *this, nullptr, &Preds);
}
@@ -13715,28 +14146,36 @@ SCEVPredicate::SCEVPredicate(const FoldingSetNodeIDRef ID,
SCEVPredicateKind Kind)
: FastID(ID), Kind(Kind) {}
-SCEVEqualPredicate::SCEVEqualPredicate(const FoldingSetNodeIDRef ID,
- const SCEV *LHS, const SCEV *RHS)
- : SCEVPredicate(ID, P_Equal), LHS(LHS), RHS(RHS) {
+SCEVComparePredicate::SCEVComparePredicate(const FoldingSetNodeIDRef ID,
+ const ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS)
+ : SCEVPredicate(ID, P_Compare), Pred(Pred), LHS(LHS), RHS(RHS) {
assert(LHS->getType() == RHS->getType() && "LHS and RHS types don't match");
assert(LHS != RHS && "LHS and RHS are the same SCEV");
}
-bool SCEVEqualPredicate::implies(const SCEVPredicate *N) const {
- const auto *Op = dyn_cast<SCEVEqualPredicate>(N);
+bool SCEVComparePredicate::implies(const SCEVPredicate *N) const {
+ const auto *Op = dyn_cast<SCEVComparePredicate>(N);
if (!Op)
return false;
+ if (Pred != ICmpInst::ICMP_EQ)
+ return false;
+
return Op->LHS == LHS && Op->RHS == RHS;
}
-bool SCEVEqualPredicate::isAlwaysTrue() const { return false; }
+bool SCEVComparePredicate::isAlwaysTrue() const { return false; }
-const SCEV *SCEVEqualPredicate::getExpr() const { return LHS; }
+void SCEVComparePredicate::print(raw_ostream &OS, unsigned Depth) const {
+ if (Pred == ICmpInst::ICMP_EQ)
+ OS.indent(Depth) << "Equal predicate: " << *LHS << " == " << *RHS << "\n";
+ else
+ OS.indent(Depth) << "Compare predicate: " << *LHS
+ << " " << CmpInst::getPredicateName(Pred) << ") "
+ << *RHS << "\n";
-void SCEVEqualPredicate::print(raw_ostream &OS, unsigned Depth) const {
- OS.indent(Depth) << "Equal predicate: " << *LHS << " == " << *RHS << "\n";
}
SCEVWrapPredicate::SCEVWrapPredicate(const FoldingSetNodeIDRef ID,
@@ -13744,7 +14183,7 @@ SCEVWrapPredicate::SCEVWrapPredicate(const FoldingSetNodeIDRef ID,
IncrementWrapFlags Flags)
: SCEVPredicate(ID, P_Wrap), AR(AR), Flags(Flags) {}
-const SCEV *SCEVWrapPredicate::getExpr() const { return AR; }
+const SCEVAddRecExpr *SCEVWrapPredicate::getExpr() const { return AR; }
bool SCEVWrapPredicate::implies(const SCEVPredicate *N) const {
const auto *Op = dyn_cast<SCEVWrapPredicate>(N);
@@ -13793,38 +14232,26 @@ SCEVWrapPredicate::getImpliedFlags(const SCEVAddRecExpr *AR,
}
/// Union predicates don't get cached so create a dummy set ID for it.
-SCEVUnionPredicate::SCEVUnionPredicate()
- : SCEVPredicate(FoldingSetNodeIDRef(nullptr, 0), P_Union) {}
+SCEVUnionPredicate::SCEVUnionPredicate(ArrayRef<const SCEVPredicate *> Preds)
+ : SCEVPredicate(FoldingSetNodeIDRef(nullptr, 0), P_Union) {
+ for (auto *P : Preds)
+ add(P);
+}
bool SCEVUnionPredicate::isAlwaysTrue() const {
return all_of(Preds,
[](const SCEVPredicate *I) { return I->isAlwaysTrue(); });
}
-ArrayRef<const SCEVPredicate *>
-SCEVUnionPredicate::getPredicatesForExpr(const SCEV *Expr) {
- auto I = SCEVToPreds.find(Expr);
- if (I == SCEVToPreds.end())
- return ArrayRef<const SCEVPredicate *>();
- return I->second;
-}
-
bool SCEVUnionPredicate::implies(const SCEVPredicate *N) const {
if (const auto *Set = dyn_cast<SCEVUnionPredicate>(N))
return all_of(Set->Preds,
[this](const SCEVPredicate *I) { return this->implies(I); });
- auto ScevPredsIt = SCEVToPreds.find(N->getExpr());
- if (ScevPredsIt == SCEVToPreds.end())
- return false;
- auto &SCEVPreds = ScevPredsIt->second;
-
- return any_of(SCEVPreds,
+ return any_of(Preds,
[N](const SCEVPredicate *I) { return I->implies(N); });
}
-const SCEV *SCEVUnionPredicate::getExpr() const { return nullptr; }
-
void SCEVUnionPredicate::print(raw_ostream &OS, unsigned Depth) const {
for (auto Pred : Preds)
Pred->print(OS, Depth);
@@ -13837,20 +14264,15 @@ void SCEVUnionPredicate::add(const SCEVPredicate *N) {
return;
}
- if (implies(N))
- return;
-
- const SCEV *Key = N->getExpr();
- assert(Key && "Only SCEVUnionPredicate doesn't have an "
- " associated expression!");
-
- SCEVToPreds[Key].push_back(N);
Preds.push_back(N);
}
PredicatedScalarEvolution::PredicatedScalarEvolution(ScalarEvolution &SE,
Loop &L)
- : SE(SE), L(L) {}
+ : SE(SE), L(L) {
+ SmallVector<const SCEVPredicate*, 4> Empty;
+ Preds = std::make_unique<SCEVUnionPredicate>(Empty);
+}
void ScalarEvolution::registerUser(const SCEV *User,
ArrayRef<const SCEV *> Ops) {
@@ -13875,7 +14297,7 @@ const SCEV *PredicatedScalarEvolution::getSCEV(Value *V) {
if (Entry.second)
Expr = Entry.second;
- const SCEV *NewSCEV = SE.rewriteUsingPredicate(Expr, &L, Preds);
+ const SCEV *NewSCEV = SE.rewriteUsingPredicate(Expr, &L, *Preds);
Entry = {Generation, NewSCEV};
return NewSCEV;
@@ -13883,22 +14305,27 @@ const SCEV *PredicatedScalarEvolution::getSCEV(Value *V) {
const SCEV *PredicatedScalarEvolution::getBackedgeTakenCount() {
if (!BackedgeCount) {
- SCEVUnionPredicate BackedgePred;
- BackedgeCount = SE.getPredicatedBackedgeTakenCount(&L, BackedgePred);
- addPredicate(BackedgePred);
+ SmallVector<const SCEVPredicate *, 4> Preds;
+ BackedgeCount = SE.getPredicatedBackedgeTakenCount(&L, Preds);
+ for (auto *P : Preds)
+ addPredicate(*P);
}
return BackedgeCount;
}
void PredicatedScalarEvolution::addPredicate(const SCEVPredicate &Pred) {
- if (Preds.implies(&Pred))
+ if (Preds->implies(&Pred))
return;
- Preds.add(&Pred);
+
+ auto &OldPreds = Preds->getPredicates();
+ SmallVector<const SCEVPredicate*, 4> NewPreds(OldPreds.begin(), OldPreds.end());
+ NewPreds.push_back(&Pred);
+ Preds = std::make_unique<SCEVUnionPredicate>(NewPreds);
updateGeneration();
}
-const SCEVUnionPredicate &PredicatedScalarEvolution::getUnionPredicate() const {
- return Preds;
+const SCEVPredicate &PredicatedScalarEvolution::getPredicate() const {
+ return *Preds;
}
void PredicatedScalarEvolution::updateGeneration() {
@@ -13906,7 +14333,7 @@ void PredicatedScalarEvolution::updateGeneration() {
if (++Generation == 0) {
for (auto &II : RewriteMap) {
const SCEV *Rewritten = II.second.second;
- II.second = {Generation, SE.rewriteUsingPredicate(Rewritten, &L, Preds)};
+ II.second = {Generation, SE.rewriteUsingPredicate(Rewritten, &L, *Preds)};
}
}
}
@@ -13952,17 +14379,17 @@ const SCEVAddRecExpr *PredicatedScalarEvolution::getAsAddRec(Value *V) {
return nullptr;
for (auto *P : NewPreds)
- Preds.add(P);
+ addPredicate(*P);
- updateGeneration();
RewriteMap[SE.getSCEV(V)] = {Generation, New};
return New;
}
PredicatedScalarEvolution::PredicatedScalarEvolution(
const PredicatedScalarEvolution &Init)
- : RewriteMap(Init.RewriteMap), SE(Init.SE), L(Init.L), Preds(Init.Preds),
- Generation(Init.Generation), BackedgeCount(Init.BackedgeCount) {
+ : RewriteMap(Init.RewriteMap), SE(Init.SE), L(Init.L),
+ Preds(std::make_unique<SCEVUnionPredicate>(Init.Preds->getPredicates())),
+ Generation(Init.Generation), BackedgeCount(Init.BackedgeCount) {
for (auto I : Init.FlagsMap)
FlagsMap.insert(I);
}
@@ -14243,12 +14670,23 @@ const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) {
ExprsToRewrite.push_back(LHS);
}
};
- // First, collect conditions from dominating branches. Starting at the loop
+
+ SmallVector<std::pair<Value *, bool>> Terms;
+ // First, collect information from assumptions dominating the loop.
+ for (auto &AssumeVH : AC.assumptions()) {
+ if (!AssumeVH)
+ continue;
+ auto *AssumeI = cast<CallInst>(AssumeVH);
+ if (!DT.dominates(AssumeI, L->getHeader()))
+ continue;
+ Terms.emplace_back(AssumeI->getOperand(0), true);
+ }
+
+ // Second, collect conditions from dominating branches. Starting at the loop
// predecessor, climb up the predecessor chain, as long as there are
// predecessors that can be found that have unique successors leading to the
// original header.
// TODO: share this logic with isLoopEntryGuardedByCond.
- SmallVector<std::pair<Value *, bool>> Terms;
for (std::pair<const BasicBlock *, const BasicBlock *> Pair(
L->getLoopPredecessor(), L->getHeader());
Pair.first; Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) {
@@ -14280,8 +14718,9 @@ const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) {
if (auto *Cmp = dyn_cast<ICmpInst>(Cond)) {
auto Predicate =
EnterIfTrue ? Cmp->getPredicate() : Cmp->getInversePredicate();
- CollectCondition(Predicate, getSCEV(Cmp->getOperand(0)),
- getSCEV(Cmp->getOperand(1)), RewriteMap);
+ const auto *LHS = getSCEV(Cmp->getOperand(0));
+ const auto *RHS = getSCEV(Cmp->getOperand(1));
+ CollectCondition(Predicate, LHS, RHS, RewriteMap);
continue;
}
@@ -14294,18 +14733,6 @@ const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) {
}
}
- // Also collect information from assumptions dominating the loop.
- for (auto &AssumeVH : AC.assumptions()) {
- if (!AssumeVH)
- continue;
- auto *AssumeI = cast<CallInst>(AssumeVH);
- auto *Cmp = dyn_cast<ICmpInst>(AssumeI->getOperand(0));
- if (!Cmp || !DT.dominates(AssumeI, L->getHeader()))
- continue;
- CollectCondition(Cmp->getPredicate(), getSCEV(Cmp->getOperand(0)),
- getSCEV(Cmp->getOperand(1)), RewriteMap);
- }
-
if (RewriteMap.empty())
return Expr;
diff --git a/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
index f4fa159d1ec7..3d47dc6b30df 100644
--- a/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -20,6 +20,7 @@
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/InitializePasses.h"
using namespace llvm;
diff --git a/llvm/lib/Analysis/ScalarEvolutionDivision.cpp b/llvm/lib/Analysis/ScalarEvolutionDivision.cpp
index 64e908bdf342..0619569bf816 100644
--- a/llvm/lib/Analysis/ScalarEvolutionDivision.cpp
+++ b/llvm/lib/Analysis/ScalarEvolutionDivision.cpp
@@ -15,9 +15,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/IR/Constants.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/ErrorHandling.h"
#include <cassert>
#include <cstdint>
diff --git a/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp b/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp
index 209ae66ca53e..22dff5efec5c 100644
--- a/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp
+++ b/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp
@@ -13,6 +13,7 @@
#include "llvm/Analysis/ScalarEvolutionNormalization.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
using namespace llvm;
diff --git a/llvm/lib/Analysis/ScopedNoAliasAA.cpp b/llvm/lib/Analysis/ScopedNoAliasAA.cpp
index e847bf8f0f6b..f510991b4463 100644
--- a/llvm/lib/Analysis/ScopedNoAliasAA.cpp
+++ b/llvm/lib/Analysis/ScopedNoAliasAA.cpp
@@ -36,7 +36,6 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instruction.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/InitializePasses.h"
diff --git a/llvm/lib/Analysis/StackLifetime.cpp b/llvm/lib/Analysis/StackLifetime.cpp
index 9056cc01484d..52e8566aca3c 100644
--- a/llvm/lib/Analysis/StackLifetime.cpp
+++ b/llvm/lib/Analysis/StackLifetime.cpp
@@ -19,17 +19,12 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FormattedStream.h"
#include <algorithm>
-#include <memory>
#include <tuple>
using namespace llvm;
@@ -75,7 +70,7 @@ static const AllocaInst *findMatchingAlloca(const IntrinsicInst &II,
auto AllocaSizeInBits = AI->getAllocationSizeInBits(DL);
if (!AllocaSizeInBits)
return nullptr;
- int64_t AllocaSize = AllocaSizeInBits.getValue() / 8;
+ int64_t AllocaSize = *AllocaSizeInBits / 8;
auto *Size = dyn_cast<ConstantInt>(II.getArgOperand(0));
if (!Size)
diff --git a/llvm/lib/Analysis/StackSafetyAnalysis.cpp b/llvm/lib/Analysis/StackSafetyAnalysis.cpp
index 54f3605ee033..94b646ab7c06 100644
--- a/llvm/lib/Analysis/StackSafetyAnalysis.cpp
+++ b/llvm/lib/Analysis/StackSafetyAnalysis.cpp
@@ -15,7 +15,6 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/StackLifetime.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/DerivedTypes.h"
@@ -384,9 +383,9 @@ bool StackSafetyLocalAnalysis::isSafeAccess(const Use &U, AllocaInst *AI,
const SCEV *Max = SE.getMinusSCEV(ToDiffTy(SE.getConstant(Size.getUpper())),
ToDiffTy(AccessSize));
return SE.evaluatePredicateAt(ICmpInst::Predicate::ICMP_SGE, Diff, Min, I)
- .getValueOr(false) &&
+ .value_or(false) &&
SE.evaluatePredicateAt(ICmpInst::Predicate::ICMP_SLE, Diff, Max, I)
- .getValueOr(false);
+ .value_or(false);
}
/// The function analyzes all local uses of Ptr (alloca or argument) and
diff --git a/llvm/lib/Analysis/StratifiedSets.h b/llvm/lib/Analysis/StratifiedSets.h
index 60ea2451b0ef..883ebd24efdc 100644
--- a/llvm/lib/Analysis/StratifiedSets.h
+++ b/llvm/lib/Analysis/StratifiedSets.h
@@ -340,10 +340,10 @@ public:
return StratifiedSets<T>(std::move(Values), std::move(StratLinks));
}
- bool has(const T &Elem) const { return get(Elem).hasValue(); }
+ bool has(const T &Elem) const { return get(Elem).has_value(); }
bool add(const T &Main) {
- if (get(Main).hasValue())
+ if (get(Main))
return false;
auto NewIndex = getNewUnlinkedIndex();
@@ -560,7 +560,7 @@ private:
Optional<StratifiedIndex> indexOf(const T &Val) {
auto MaybeVal = get(Val);
- if (!MaybeVal.hasValue())
+ if (!MaybeVal)
return None;
auto *Info = *MaybeVal;
auto &Link = linksAt(Info->Index);
diff --git a/llvm/lib/Analysis/SyncDependenceAnalysis.cpp b/llvm/lib/Analysis/SyncDependenceAnalysis.cpp
index ff833b55bbce..3446e50a4344 100644
--- a/llvm/lib/Analysis/SyncDependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/SyncDependenceAnalysis.cpp
@@ -116,18 +116,16 @@
// around from the latch.
//
//===----------------------------------------------------------------------===//
+
#include "llvm/Analysis/SyncDependenceAnalysis.h"
-#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include <functional>
-#include <stack>
-#include <unordered_set>
#define DEBUG_TYPE "sync-dependence"
@@ -257,7 +255,7 @@ SyncDependenceAnalysis::SyncDependenceAnalysis(const DominatorTree &DT,
[&](const BasicBlock &BB) { LoopPO.appendBlock(BB); });
}
-SyncDependenceAnalysis::~SyncDependenceAnalysis() {}
+SyncDependenceAnalysis::~SyncDependenceAnalysis() = default;
// divergence propagator for reducible CFGs
struct DivergencePropagator {
diff --git a/llvm/lib/Analysis/SyntheticCountsUtils.cpp b/llvm/lib/Analysis/SyntheticCountsUtils.cpp
index a3edce76cd88..29c41fda5e28 100644
--- a/llvm/lib/Analysis/SyntheticCountsUtils.cpp
+++ b/llvm/lib/Analysis/SyntheticCountsUtils.cpp
@@ -14,9 +14,6 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/Analysis/CallGraph.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/IR/ModuleSummaryIndex.h"
using namespace llvm;
@@ -57,7 +54,7 @@ void SyntheticCountsUtils<CallGraphType>::propagateFromSCC(
if (!OptProfCount)
continue;
auto Callee = CGT::edge_dest(E.second);
- AdditionalCounts[Callee] += OptProfCount.getValue();
+ AdditionalCounts[Callee] += *OptProfCount;
}
// Update the counts for the nodes in the SCC.
@@ -70,7 +67,7 @@ void SyntheticCountsUtils<CallGraphType>::propagateFromSCC(
if (!OptProfCount)
continue;
auto Callee = CGT::edge_dest(E.second);
- AddCount(Callee, OptProfCount.getValue());
+ AddCount(Callee, *OptProfCount);
}
}
diff --git a/llvm/lib/Analysis/TFUtils.cpp b/llvm/lib/Analysis/TFUtils.cpp
index 26bc63983b4e..203858c1cf06 100644
--- a/llvm/lib/Analysis/TFUtils.cpp
+++ b/llvm/lib/Analysis/TFUtils.cpp
@@ -82,6 +82,33 @@ void serialize(const Message &SE, std::string *OutStr) {
*OutStr = SE.SerializeAsString();
}
}
+
+int getTFTypeIndex(TensorType TType) {
+ switch (TType) {
+ case TensorType::Double:
+ return TF_DOUBLE;
+ case TensorType::Float:
+ return TF_FLOAT;
+ case TensorType::Int8:
+ return TF_INT8;
+ case TensorType::UInt8:
+ return TF_UINT8;
+ case TensorType::Int16:
+ return TF_INT16;
+ case TensorType::UInt16:
+ return TF_UINT16;
+ case TensorType::Int32:
+ return TF_INT32;
+ case TensorType::UInt32:
+ return TF_UINT32;
+ case TensorType::Int64:
+ return TF_INT64;
+ case TensorType::UInt64:
+ return TF_UINT64;
+ case TensorType::Invalid:
+ llvm_unreachable("Unknown tensor type");
+ }
+}
} // namespace
namespace llvm {
@@ -105,116 +132,6 @@ private:
std::vector<TF_Tensor *> Output;
};
-size_t TensorSpec::getElementByteSize() const {
- return TF_DataTypeSize(static_cast<TF_DataType>(TypeIndex));
-}
-
-TensorSpec::TensorSpec(const std::string &Name, int Port, int TypeIndex,
- const std::vector<int64_t> &Shape)
- : Name(Name), Port(Port), TypeIndex(TypeIndex), Shape(Shape),
- ElementCount(std::accumulate(Shape.begin(), Shape.end(), 1,
- std::multiplies<int64_t>())) {}
-
-Optional<TensorSpec> getTensorSpecFromJSON(LLVMContext &Ctx,
- const json::Value &Value) {
- auto EmitError = [&](const llvm::Twine &Message) -> Optional<TensorSpec> {
- std::string S;
- llvm::raw_string_ostream OS(S);
- OS << Value;
- Ctx.emitError("Unable to parse JSON Value as spec (" + Message + "): " + S);
- return None;
- };
- // FIXME: accept a Path as a parameter, and use it for error reporting.
- json::Path::Root Root("tensor_spec");
- json::ObjectMapper Mapper(Value, Root);
- if (!Mapper)
- return EmitError("Value is not a dict");
-
- std::string TensorName;
- int TensorPort = -1;
- std::string TensorType;
- std::vector<int64_t> TensorShape;
-
- if (!Mapper.map<std::string>("name", TensorName))
- return EmitError("'name' property not present or not a string");
- if (!Mapper.map<std::string>("type", TensorType))
- return EmitError("'type' property not present or not a string");
- if (!Mapper.map<int>("port", TensorPort))
- return EmitError("'port' property not present or not an int");
- if (!Mapper.map<std::vector<int64_t>>("shape", TensorShape))
- return EmitError("'shape' property not present or not an int array");
-
-#define PARSE_TYPE(T, E) \
- if (TensorType == #T) \
- return TensorSpec::createSpec<T>(TensorName, TensorShape, TensorPort);
- TFUTILS_SUPPORTED_TYPES(PARSE_TYPE)
-#undef PARSE_TYPE
- return None;
-}
-
-Optional<std::vector<LoggedFeatureSpec>>
-loadOutputSpecs(LLVMContext &Ctx, StringRef ExpectedDecisionName,
- StringRef ModelPath, StringRef SpecFileOverride) {
- SmallVector<char, 128> OutputSpecsPath;
- StringRef FileName = SpecFileOverride;
- if (FileName.empty()) {
- llvm::sys::path::append(OutputSpecsPath, ModelPath, "output_spec.json");
- FileName = {OutputSpecsPath.data(), OutputSpecsPath.size()};
- }
-
- auto BufferOrError = MemoryBuffer::getFileOrSTDIN(FileName);
- if (!BufferOrError) {
- Ctx.emitError("Error opening output specs file: " + FileName + " : " +
- BufferOrError.getError().message());
- return None;
- }
- auto ParsedJSONValues = json::parse(BufferOrError.get()->getBuffer());
- if (!ParsedJSONValues) {
- Ctx.emitError("Could not parse specs file: " + FileName);
- return None;
- }
- auto ValuesArray = ParsedJSONValues->getAsArray();
- if (!ValuesArray) {
- Ctx.emitError("Expected an array of {tensor_spec:<TensorSpec>, "
- "logging_name:<name>} dictionaries");
- return None;
- }
- std::vector<LoggedFeatureSpec> Ret;
- for (const auto &Value : *ValuesArray)
- if (const auto *Obj = Value.getAsObject())
- if (const auto *SpecPart = Obj->get("tensor_spec"))
- if (auto TensorSpec = getTensorSpecFromJSON(Ctx, *SpecPart))
- if (auto LoggingName = Obj->getString("logging_name")) {
- if (!TensorSpec->isElementType<int64_t>() &&
- !TensorSpec->isElementType<int32_t>() &&
- !TensorSpec->isElementType<float>()) {
- Ctx.emitError(
- "Only int64, int32, and float tensors are supported. "
- "Found unsupported type for tensor named " +
- TensorSpec->name());
- return None;
- }
- Ret.push_back({*TensorSpec, LoggingName->str()});
- }
-
- if (ValuesArray->size() != Ret.size()) {
- Ctx.emitError(
- "Unable to parse output spec. It should be a json file containing an "
- "array of dictionaries. Each dictionary must have a 'tensor_spec' key, "
- "with a json object describing a TensorSpec; and a 'logging_name' key, "
- "which is a string to use as name when logging this tensor in the "
- "training log.");
- return None;
- }
- if (Ret.empty() || *Ret[0].LoggingName != ExpectedDecisionName) {
- Ctx.emitError("The first output spec must describe the decision tensor, "
- "and must have the logging_name " +
- StringRef(ExpectedDecisionName));
- return None;
- }
- return Ret;
-}
-
class TFModelEvaluatorImpl {
public:
TFModelEvaluatorImpl(StringRef SavedModelPath,
@@ -383,16 +300,29 @@ TFModelEvaluatorImpl::TFModelEvaluatorImpl(
errs() << TF_Message(Status.get());
invalidate();
}
+ size_t NrSupported = 0;
for (size_t I = 0; I < InputSpecs.size(); ++I) {
auto &InputSpec = InputSpecs[I];
InputFeed[I] = {
TF_GraphOperationByName(Graph.get(), (InputSpec.name()).c_str()),
InputSpec.port()};
+ if (!InputFeed[I].oper) {
+ continue;
+ }
+ if (NrSupported++ != I) {
+ errs()
+ << "Unsupported features must be placed at the end of the InputSpecs";
+ invalidate();
+ return;
+ }
if (!checkReportAndInvalidate(InputFeed[I], InputSpec))
return;
- initInput(I, static_cast<TF_DataType>(InputSpec.typeIndex()),
+ initInput(I, static_cast<TF_DataType>(getTFTypeIndex(InputSpec.type())),
InputSpec.shape());
}
+ InputFeed.resize(NrSupported);
+ Input.resize(NrSupported);
+
for (size_t I = 0; I < OutputSpecsSize; ++I) {
auto OutputSpec = GetOutputSpecs(I);
OutputFeed[I] = {
@@ -470,7 +400,9 @@ void TFModelEvaluatorImpl::initInput(size_t Index, TF_DataType Type,
}
void *TFModelEvaluator::getUntypedInput(size_t Index) {
- return TF_TensorData(Impl->getInput()[Index]);
+ if (Index < Impl->getInput().size())
+ return TF_TensorData(Impl->getInput()[Index]);
+ return nullptr;
}
TFModelEvaluator::EvaluationResult::EvaluationResult(
@@ -495,13 +427,6 @@ TFModelEvaluator::EvaluationResult::getUntypedTensorValue(size_t Index) const {
return TF_TensorData(Impl->getOutput()[Index]);
}
-#define TFUTILS_GETDATATYPE_IMPL(T, E) \
- template <> int TensorSpec::getDataType<T>() { return E; }
-
-TFUTILS_SUPPORTED_TYPES(TFUTILS_GETDATATYPE_IMPL)
-
-#undef TFUTILS_GETDATATYPE_IMPL
-
TFModelEvaluator::EvaluationResult::~EvaluationResult() {}
TFModelEvaluator::~TFModelEvaluator() {}
diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp
index 02923c2c7eb1..8ebdb65e88dc 100644
--- a/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -659,12 +659,12 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setUnavailable(LibFunc_stpncpy);
}
- if (T.isPS4()) {
- // PS4 does have memalign.
+ if (T.isPS()) {
+ // PS4/PS5 do have memalign.
TLI.setAvailable(LibFunc_memalign);
- // PS4 does not have new/delete with "unsigned int" size parameter;
- // it only has the "unsigned long" versions.
+ // PS4/PS5 do not have new/delete with "unsigned int" size parameter;
+ // they only have the "unsigned long" versions.
TLI.setUnavailable(LibFunc_ZdaPvj);
TLI.setUnavailable(LibFunc_ZdaPvjSt11align_val_t);
TLI.setUnavailable(LibFunc_ZdlPvj);
@@ -1110,9 +1110,11 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
case LibFunc_system:
return (NumParams == 1 && FTy.getParamType(0)->isPointerTy());
case LibFunc___kmpc_alloc_shared:
+ return NumParams == 1 && FTy.getReturnType()->isPointerTy();
case LibFunc_malloc:
case LibFunc_vec_malloc:
- return (NumParams == 1 && FTy.getReturnType()->isPointerTy());
+ return NumParams == 1 && FTy.getParamType(0)->isIntegerTy(SizeTBits) &&
+ FTy.getReturnType()->isPointerTy();
case LibFunc_memcmp:
return NumParams == 3 && FTy.getReturnType()->isIntegerTy(32) &&
FTy.getParamType(0)->isPointerTy() &&
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 25e9dee98e13..66f61961d01b 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -11,7 +11,6 @@
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/TargetTransformInfoImpl.h"
#include "llvm/IR/CFG.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
@@ -21,7 +20,6 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ErrorHandling.h"
#include <utility>
using namespace llvm;
@@ -33,6 +31,11 @@ static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false),
cl::Hidden,
cl::desc("Recognize reduction patterns."));
+static cl::opt<unsigned> CacheLineSize(
+ "cache-line-size", cl::init(0), cl::Hidden,
+ cl::desc("Use this to override the target cache line size when "
+ "specified by the user."));
+
namespace {
/// No-op implementation of the TTI interface using the utility base
/// classes.
@@ -179,7 +182,7 @@ bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE,
TargetTransformInfo::TargetTransformInfo(const DataLayout &DL)
: TTIImpl(new Model<NoTTIImpl>(NoTTIImpl(DL))) {}
-TargetTransformInfo::~TargetTransformInfo() {}
+TargetTransformInfo::~TargetTransformInfo() = default;
TargetTransformInfo::TargetTransformInfo(TargetTransformInfo &&Arg)
: TTIImpl(std::move(Arg.TTIImpl)) {}
@@ -350,7 +353,8 @@ bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
Scale, AddrSpace, I);
}
-bool TargetTransformInfo::isLSRCostLess(LSRCost &C1, LSRCost &C2) const {
+bool TargetTransformInfo::isLSRCostLess(const LSRCost &C1,
+ const LSRCost &C2) const {
return TTIImpl->isLSRCostLess(C1, C2);
}
@@ -398,11 +402,22 @@ bool TargetTransformInfo::isLegalNTLoad(Type *DataType, Align Alignment) const {
return TTIImpl->isLegalNTLoad(DataType, Alignment);
}
+bool TargetTransformInfo::isLegalBroadcastLoad(Type *ElementTy,
+ ElementCount NumElements) const {
+ return TTIImpl->isLegalBroadcastLoad(ElementTy, NumElements);
+}
+
bool TargetTransformInfo::isLegalMaskedGather(Type *DataType,
Align Alignment) const {
return TTIImpl->isLegalMaskedGather(DataType, Alignment);
}
+bool TargetTransformInfo::isLegalAltInstr(
+ VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
+ const SmallBitVector &OpcodeMask) const {
+ return TTIImpl->isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask);
+}
+
bool TargetTransformInfo::isLegalMaskedScatter(Type *DataType,
Align Alignment) const {
return TTIImpl->isLegalMaskedScatter(DataType, Alignment);
@@ -470,7 +485,7 @@ bool TargetTransformInfo::isTypeLegal(Type *Ty) const {
return TTIImpl->isTypeLegal(Ty);
}
-InstructionCost TargetTransformInfo::getRegUsageForType(Type *Ty) const {
+unsigned TargetTransformInfo::getRegUsageForType(Type *Ty) const {
return TTIImpl->getRegUsageForType(Ty);
}
@@ -507,6 +522,10 @@ bool TargetTransformInfo::supportsEfficientVectorElementLoadStore() const {
return TTIImpl->supportsEfficientVectorElementLoadStore();
}
+bool TargetTransformInfo::supportsTailCalls() const {
+ return TTIImpl->supportsTailCalls();
+}
+
bool TargetTransformInfo::enableAggressiveInterleaving(
bool LoopHasReductions) const {
return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
@@ -623,8 +642,9 @@ Optional<unsigned> TargetTransformInfo::getVScaleForTuning() const {
return TTIImpl->getVScaleForTuning();
}
-bool TargetTransformInfo::shouldMaximizeVectorBandwidth() const {
- return TTIImpl->shouldMaximizeVectorBandwidth();
+bool TargetTransformInfo::shouldMaximizeVectorBandwidth(
+ TargetTransformInfo::RegisterKind K) const {
+ return TTIImpl->shouldMaximizeVectorBandwidth(K);
}
ElementCount TargetTransformInfo::getMinimumVF(unsigned ElemWidth,
@@ -637,6 +657,11 @@ unsigned TargetTransformInfo::getMaximumVF(unsigned ElemWidth,
return TTIImpl->getMaximumVF(ElemWidth, Opcode);
}
+unsigned TargetTransformInfo::getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
+ Type *ScalarValTy) const {
+ return TTIImpl->getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
+}
+
bool TargetTransformInfo::shouldConsiderAddressTypePromotion(
const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
return TTIImpl->shouldConsiderAddressTypePromotion(
@@ -644,7 +669,8 @@ bool TargetTransformInfo::shouldConsiderAddressTypePromotion(
}
unsigned TargetTransformInfo::getCacheLineSize() const {
- return TTIImpl->getCacheLineSize();
+ return CacheLineSize.getNumOccurrences() > 0 ? CacheLineSize
+ : TTIImpl->getCacheLineSize();
}
llvm::Optional<unsigned>
@@ -742,12 +768,11 @@ InstructionCost TargetTransformInfo::getArithmeticInstrCost(
return Cost;
}
-InstructionCost TargetTransformInfo::getShuffleCost(ShuffleKind Kind,
- VectorType *Ty,
- ArrayRef<int> Mask,
- int Index,
- VectorType *SubTp) const {
- InstructionCost Cost = TTIImpl->getShuffleCost(Kind, Ty, Mask, Index, SubTp);
+InstructionCost TargetTransformInfo::getShuffleCost(
+ ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask, int Index,
+ VectorType *SubTp, ArrayRef<const Value *> Args) const {
+ InstructionCost Cost =
+ TTIImpl->getShuffleCost(Kind, Ty, Mask, Index, SubTp, Args);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
@@ -973,18 +998,21 @@ Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic(
Type *TargetTransformInfo::getMemcpyLoopLoweringType(
LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
- unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign) const {
+ unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
+ Optional<uint32_t> AtomicElementSize) const {
return TTIImpl->getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
- DestAddrSpace, SrcAlign, DestAlign);
+ DestAddrSpace, SrcAlign, DestAlign,
+ AtomicElementSize);
}
void TargetTransformInfo::getMemcpyLoopResidualLoweringType(
SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
- unsigned SrcAlign, unsigned DestAlign) const {
- TTIImpl->getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
- SrcAddrSpace, DestAddrSpace,
- SrcAlign, DestAlign);
+ unsigned SrcAlign, unsigned DestAlign,
+ Optional<uint32_t> AtomicCpySize) const {
+ TTIImpl->getMemcpyLoopResidualLoweringType(
+ OpsOut, Context, RemainingBytes, SrcAddrSpace, DestAddrSpace, SrcAlign,
+ DestAlign, AtomicCpySize);
}
bool TargetTransformInfo::areInlineCompatible(const Function *Caller,
@@ -1155,7 +1183,7 @@ TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
}
}
-TargetTransformInfo::Concept::~Concept() {}
+TargetTransformInfo::Concept::~Concept() = default;
TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
diff --git a/llvm/lib/Analysis/TensorSpec.cpp b/llvm/lib/Analysis/TensorSpec.cpp
new file mode 100644
index 000000000000..f6a5882371a7
--- /dev/null
+++ b/llvm/lib/Analysis/TensorSpec.cpp
@@ -0,0 +1,144 @@
+//===- TensorSpec.cpp - tensor type abstraction ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation file for the abstraction of a tensor type, and JSON loading
+// utils.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/Config/config.h"
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/TensorSpec.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <numeric>
+
+using namespace llvm;
+
+namespace llvm {
+
+#define TFUTILS_GETDATATYPE_IMPL(T, E) \
+ template <> TensorType TensorSpec::getDataType<T>() { return TensorType::E; }
+
+SUPPORTED_TENSOR_TYPES(TFUTILS_GETDATATYPE_IMPL)
+
+#undef TFUTILS_GETDATATYPE_IMPL
+
+TensorSpec::TensorSpec(const std::string &Name, int Port, TensorType Type,
+ size_t ElementSize, const std::vector<int64_t> &Shape)
+ : Name(Name), Port(Port), Type(Type), Shape(Shape),
+ ElementCount(std::accumulate(Shape.begin(), Shape.end(), 1,
+ std::multiplies<int64_t>())),
+ ElementSize(ElementSize) {}
+
+Optional<TensorSpec> getTensorSpecFromJSON(LLVMContext &Ctx,
+ const json::Value &Value) {
+ auto EmitError = [&](const llvm::Twine &Message) -> Optional<TensorSpec> {
+ std::string S;
+ llvm::raw_string_ostream OS(S);
+ OS << Value;
+ Ctx.emitError("Unable to parse JSON Value as spec (" + Message + "): " + S);
+ return None;
+ };
+ // FIXME: accept a Path as a parameter, and use it for error reporting.
+ json::Path::Root Root("tensor_spec");
+ json::ObjectMapper Mapper(Value, Root);
+ if (!Mapper)
+ return EmitError("Value is not a dict");
+
+ std::string TensorName;
+ int TensorPort = -1;
+ std::string TensorType;
+ std::vector<int64_t> TensorShape;
+
+ if (!Mapper.map<std::string>("name", TensorName))
+ return EmitError("'name' property not present or not a string");
+ if (!Mapper.map<std::string>("type", TensorType))
+ return EmitError("'type' property not present or not a string");
+ if (!Mapper.map<int>("port", TensorPort))
+ return EmitError("'port' property not present or not an int");
+ if (!Mapper.map<std::vector<int64_t>>("shape", TensorShape))
+ return EmitError("'shape' property not present or not an int array");
+
+#define PARSE_TYPE(T, E) \
+ if (TensorType == #T) \
+ return TensorSpec::createSpec<T>(TensorName, TensorShape, TensorPort);
+ SUPPORTED_TENSOR_TYPES(PARSE_TYPE)
+#undef PARSE_TYPE
+ return None;
+}
+
+Optional<std::vector<LoggedFeatureSpec>>
+loadOutputSpecs(LLVMContext &Ctx, StringRef ExpectedDecisionName,
+ StringRef ModelPath, StringRef SpecFileOverride) {
+ SmallVector<char, 128> OutputSpecsPath;
+ StringRef FileName = SpecFileOverride;
+ if (FileName.empty()) {
+ llvm::sys::path::append(OutputSpecsPath, ModelPath, "output_spec.json");
+ FileName = {OutputSpecsPath.data(), OutputSpecsPath.size()};
+ }
+
+ auto BufferOrError = MemoryBuffer::getFileOrSTDIN(FileName);
+ if (!BufferOrError) {
+ Ctx.emitError("Error opening output specs file: " + FileName + " : " +
+ BufferOrError.getError().message());
+ return None;
+ }
+ auto ParsedJSONValues = json::parse(BufferOrError.get()->getBuffer());
+ if (!ParsedJSONValues) {
+ Ctx.emitError("Could not parse specs file: " + FileName);
+ return None;
+ }
+ auto ValuesArray = ParsedJSONValues->getAsArray();
+ if (!ValuesArray) {
+ Ctx.emitError("Expected an array of {tensor_spec:<TensorSpec>, "
+ "logging_name:<name>} dictionaries");
+ return None;
+ }
+ std::vector<LoggedFeatureSpec> Ret;
+ for (const auto &Value : *ValuesArray)
+ if (const auto *Obj = Value.getAsObject())
+ if (const auto *SpecPart = Obj->get("tensor_spec"))
+ if (auto TensorSpec = getTensorSpecFromJSON(Ctx, *SpecPart))
+ if (auto LoggingName = Obj->getString("logging_name")) {
+ if (!TensorSpec->isElementType<int64_t>() &&
+ !TensorSpec->isElementType<int32_t>() &&
+ !TensorSpec->isElementType<float>()) {
+ Ctx.emitError(
+ "Only int64, int32, and float tensors are supported. "
+ "Found unsupported type for tensor named " +
+ TensorSpec->name());
+ return None;
+ }
+ Ret.push_back({*TensorSpec, LoggingName->str()});
+ }
+
+ if (ValuesArray->size() != Ret.size()) {
+ Ctx.emitError(
+ "Unable to parse output spec. It should be a json file containing an "
+ "array of dictionaries. Each dictionary must have a 'tensor_spec' key, "
+ "with a json object describing a TensorSpec; and a 'logging_name' key, "
+ "which is a string to use as name when logging this tensor in the "
+ "training log.");
+ return None;
+ }
+ if (Ret.empty() || *Ret[0].LoggingName != ExpectedDecisionName) {
+ Ctx.emitError("The first output spec must describe the decision tensor, "
+ "and must have the logging_name " +
+ StringRef(ExpectedDecisionName));
+ return None;
+ }
+ return Ret;
+}
+} // namespace llvm
diff --git a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 627a78a2a2fd..9bcbe4a4cc1e 100644
--- a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -112,7 +112,6 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instruction.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/InitializePasses.h"
diff --git a/llvm/lib/Analysis/TypeMetadataUtils.cpp b/llvm/lib/Analysis/TypeMetadataUtils.cpp
index 80051fd5f7c1..201e64770766 100644
--- a/llvm/lib/Analysis/TypeMetadataUtils.cpp
+++ b/llvm/lib/Analysis/TypeMetadataUtils.cpp
@@ -16,7 +16,6 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
using namespace llvm;
diff --git a/llvm/lib/Analysis/VFABIDemangling.cpp b/llvm/lib/Analysis/VFABIDemangling.cpp
index 7573975a3dd3..e6d297877b62 100644
--- a/llvm/lib/Analysis/VFABIDemangling.cpp
+++ b/llvm/lib/Analysis/VFABIDemangling.cpp
@@ -6,8 +6,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallString.h"
#include "llvm/Analysis/VectorUtils.h"
using namespace llvm;
diff --git a/llvm/lib/Analysis/ValueLatticeUtils.cpp b/llvm/lib/Analysis/ValueLatticeUtils.cpp
index 53638c351f72..2bcb4d5b0e6b 100644
--- a/llvm/lib/Analysis/ValueLatticeUtils.cpp
+++ b/llvm/lib/Analysis/ValueLatticeUtils.cpp
@@ -29,12 +29,13 @@ bool llvm::canTrackGlobalVariableInterprocedurally(GlobalVariable *GV) {
!GV->hasDefinitiveInitializer())
return false;
return all_of(GV->users(), [&](User *U) {
- // Currently all users of a global variable have to be none-volatile loads
- // or stores and the global cannot be stored itself.
+ // Currently all users of a global variable have to be non-volatile loads
+ // or stores of the global type, and the global cannot be stored itself.
if (auto *Store = dyn_cast<StoreInst>(U))
- return Store->getValueOperand() != GV && !Store->isVolatile();
+ return Store->getValueOperand() != GV && !Store->isVolatile() &&
+ Store->getValueOperand()->getType() == GV->getValueType();
if (auto *Load = dyn_cast<LoadInst>(U))
- return !Load->isVolatile();
+ return !Load->isVolatile() && Load->getType() == GV->getValueType();
return false;
});
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index c14bdb8bc262..05d5e47bb8d7 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -26,6 +26,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumeBundleQueries.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/GuardUtils.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -70,10 +71,8 @@
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include <algorithm>
-#include <array>
#include <cassert>
#include <cstdint>
-#include <iterator>
#include <utility>
using namespace llvm;
@@ -86,13 +85,12 @@ static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
// According to the LangRef, branching on a poison condition is absolutely
// immediate full UB. However, historically we haven't implemented that
-// consistently as we have an important transformation (non-trivial unswitch)
-// which introduces instances of branch on poison/undef to otherwise well
-// defined programs. This flag exists to let us test optimization benefit
-// of exploiting the specified behavior (in combination with enabling the
-// unswitch fix.)
+// consistently as we had an important transformation (non-trivial unswitch)
+// which introduced instances of branch on poison/undef to otherwise well
+// defined programs. This issue has since been fixed, but the flag is
+// temporarily retained to easily diagnose potential regressions.
static cl::opt<bool> BranchOnPoisonAsUB("branch-on-poison-as-ub",
- cl::Hidden, cl::init(false));
+ cl::Hidden, cl::init(true));
/// Returns the bitwidth of the given scalar or pointer type. For vector types,
@@ -275,13 +273,39 @@ bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS,
assert(LHS->getType()->isIntOrIntVectorTy() &&
"LHS and RHS should be integers");
// Look for an inverted mask: (X & ~M) op (Y & M).
- Value *M;
- if (match(LHS, m_c_And(m_Not(m_Value(M)), m_Value())) &&
- match(RHS, m_c_And(m_Specific(M), m_Value())))
+ {
+ Value *M;
+ if (match(LHS, m_c_And(m_Not(m_Value(M)), m_Value())) &&
+ match(RHS, m_c_And(m_Specific(M), m_Value())))
+ return true;
+ if (match(RHS, m_c_And(m_Not(m_Value(M)), m_Value())) &&
+ match(LHS, m_c_And(m_Specific(M), m_Value())))
+ return true;
+ }
+
+ // X op (Y & ~X)
+ if (match(RHS, m_c_And(m_Not(m_Specific(LHS)), m_Value())) ||
+ match(LHS, m_c_And(m_Not(m_Specific(RHS)), m_Value())))
return true;
- if (match(RHS, m_c_And(m_Not(m_Value(M)), m_Value())) &&
- match(LHS, m_c_And(m_Specific(M), m_Value())))
+
+ // X op ((X & Y) ^ Y) -- this is the canonical form of the previous pattern
+ // for constant Y.
+ Value *Y;
+ if (match(RHS,
+ m_c_Xor(m_c_And(m_Specific(LHS), m_Value(Y)), m_Deferred(Y))) ||
+ match(LHS, m_c_Xor(m_c_And(m_Specific(RHS), m_Value(Y)), m_Deferred(Y))))
return true;
+
+ // Look for: (A & B) op ~(A | B)
+ {
+ Value *A, *B;
+ if (match(LHS, m_And(m_Value(A), m_Value(B))) &&
+ match(RHS, m_Not(m_c_Or(m_Specific(A), m_Specific(B)))))
+ return true;
+ if (match(RHS, m_And(m_Value(A), m_Value(B))) &&
+ match(LHS, m_Not(m_c_Or(m_Specific(A), m_Specific(B)))))
+ return true;
+ }
IntegerType *IT = cast<IntegerType>(LHS->getType()->getScalarType());
KnownBits LHSKnown(IT->getBitWidth());
KnownBits RHSKnown(IT->getBitWidth());
@@ -451,7 +475,12 @@ static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW,
}
}
- Known = KnownBits::mul(Known, Known2);
+ bool SelfMultiply = Op0 == Op1;
+ // TODO: SelfMultiply can be poison, but not undef.
+ if (SelfMultiply)
+ SelfMultiply &=
+ isGuaranteedNotToBeUndefOrPoison(Op0, Q.AC, Q.CxtI, Q.DT, Depth + 1);
+ Known = KnownBits::mul(Known, Known2, SelfMultiply);
// Only make use of no-wrap flags if we failed to compute the sign bit
// directly. This matters if the multiplication always overflows, in
@@ -656,7 +685,8 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
if (V->getType()->isPointerTy()) {
if (RetainedKnowledge RK = getKnowledgeValidInContext(
V, {Attribute::Alignment}, Q.CxtI, Q.DT, Q.AC)) {
- Known.Zero.setLowBits(Log2_64(RK.ArgValue));
+ if (isPowerOf2_64(RK.ArgValue))
+ Known.Zero.setLowBits(Log2_64(RK.ArgValue));
}
}
@@ -1041,7 +1071,7 @@ static void computeKnownBitsFromShiftOperator(
// bits. This check is sunk down as far as possible to avoid the expensive
// call to isKnownNonZero if the cheaper checks above fail.
if (ShiftAmt == 0) {
- if (!ShifterOperandIsNonZero.hasValue())
+ if (!ShifterOperandIsNonZero)
ShifterOperandIsNonZero =
isKnownNonZero(I->getOperand(1), DemandedElts, Depth + 1, Q);
if (*ShifterOperandIsNonZero)
@@ -1726,8 +1756,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
break;
}
- unsigned FirstZeroHighBit =
- 32 - countLeadingZeros(VScaleMax.getValue());
+ unsigned FirstZeroHighBit = 32 - countLeadingZeros(*VScaleMax);
if (FirstZeroHighBit < BitWidth)
Known.Zero.setBitsFrom(FirstZeroHighBit);
@@ -2007,6 +2036,63 @@ void computeKnownBits(const Value *V, const APInt &DemandedElts,
assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
}
+/// Try to detect a recurrence that the value of the induction variable is
+/// always a power of two (or zero).
+static bool isPowerOfTwoRecurrence(const PHINode *PN, bool OrZero,
+ unsigned Depth, Query &Q) {
+ BinaryOperator *BO = nullptr;
+ Value *Start = nullptr, *Step = nullptr;
+ if (!matchSimpleRecurrence(PN, BO, Start, Step))
+ return false;
+
+ // Initial value must be a power of two.
+ for (const Use &U : PN->operands()) {
+ if (U.get() == Start) {
+ // Initial value comes from a different BB, need to adjust context
+ // instruction for analysis.
+ Q.CxtI = PN->getIncomingBlock(U)->getTerminator();
+ if (!isKnownToBeAPowerOfTwo(Start, OrZero, Depth, Q))
+ return false;
+ }
+ }
+
+ // Except for Mul, the induction variable must be on the left side of the
+ // increment expression, otherwise its value can be arbitrary.
+ if (BO->getOpcode() != Instruction::Mul && BO->getOperand(1) != Step)
+ return false;
+
+ Q.CxtI = BO->getParent()->getTerminator();
+ switch (BO->getOpcode()) {
+ case Instruction::Mul:
+ // Power of two is closed under multiplication.
+ return (OrZero || Q.IIQ.hasNoUnsignedWrap(BO) ||
+ Q.IIQ.hasNoSignedWrap(BO)) &&
+ isKnownToBeAPowerOfTwo(Step, OrZero, Depth, Q);
+ case Instruction::SDiv:
+ // Start value must not be signmask for signed division, so simply being a
+ // power of two is not sufficient, and it has to be a constant.
+ if (!match(Start, m_Power2()) || match(Start, m_SignMask()))
+ return false;
+ LLVM_FALLTHROUGH;
+ case Instruction::UDiv:
+ // Divisor must be a power of two.
+ // If OrZero is false, cannot guarantee induction variable is non-zero after
+ // division, same for Shr, unless it is exact division.
+ return (OrZero || Q.IIQ.isExact(BO)) &&
+ isKnownToBeAPowerOfTwo(Step, false, Depth, Q);
+ case Instruction::Shl:
+ return OrZero || Q.IIQ.hasNoUnsignedWrap(BO) || Q.IIQ.hasNoSignedWrap(BO);
+ case Instruction::AShr:
+ if (!match(Start, m_Power2()) || match(Start, m_SignMask()))
+ return false;
+ LLVM_FALLTHROUGH;
+ case Instruction::LShr:
+ return OrZero || Q.IIQ.isExact(BO);
+ default:
+ return false;
+ }
+}
+
/// Return true if the given value is known to have exactly one
/// bit set when defined. For vectors return true if every element is known to
/// be a power of two when defined. Supports values with integer or pointer
@@ -2098,6 +2184,30 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
}
}
+ // A PHI node is power of two if all incoming values are power of two, or if
+ // it is an induction variable where in each step its value is a power of two.
+ if (const PHINode *PN = dyn_cast<PHINode>(V)) {
+ Query RecQ = Q;
+
+ // Check if it is an induction variable and always power of two.
+ if (isPowerOfTwoRecurrence(PN, OrZero, Depth, RecQ))
+ return true;
+
+ // Recursively check all incoming values. Limit recursion to 2 levels, so
+ // that search complexity is limited to number of operands^2.
+ unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1);
+ return llvm::all_of(PN->operands(), [&](const Use &U) {
+ // Value is power of 2 if it is coming from PHI node itself by induction.
+ if (U.get() == PN)
+ return true;
+
+ // Change the context instruction to the incoming block where it is
+ // evaluated.
+ RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator();
+ return isKnownToBeAPowerOfTwo(U.get(), OrZero, NewDepth, RecQ);
+ });
+ }
+
// An exact divide or right shift can only shift off zero bits, so the result
// is a power of two only if the first operand is a power of two and not
// copying a sign bit (sdiv int_min, 2).
@@ -2588,6 +2698,9 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth,
if (isKnownNonZero(Op, Depth, Q) &&
isGuaranteedNotToBePoison(Op, Q.AC, Q.CxtI, Q.DT, Depth))
return true;
+ } else if (const auto *II = dyn_cast<IntrinsicInst>(V)) {
+ if (II->getIntrinsicID() == Intrinsic::vscale)
+ return true;
}
KnownBits Known(BitWidth);
@@ -2885,6 +2998,24 @@ static bool isSignedMinMaxClamp(const Value *Select, const Value *&In,
return CLow->sle(*CHigh);
}
+static bool isSignedMinMaxIntrinsicClamp(const IntrinsicInst *II,
+ const APInt *&CLow,
+ const APInt *&CHigh) {
+ assert((II->getIntrinsicID() == Intrinsic::smin ||
+ II->getIntrinsicID() == Intrinsic::smax) && "Must be smin/smax");
+
+ Intrinsic::ID InverseID = getInverseMinMaxIntrinsic(II->getIntrinsicID());
+ auto *InnerII = dyn_cast<IntrinsicInst>(II->getArgOperand(0));
+ if (!InnerII || InnerII->getIntrinsicID() != InverseID ||
+ !match(II->getArgOperand(1), m_APInt(CLow)) ||
+ !match(InnerII->getArgOperand(1), m_APInt(CHigh)))
+ return false;
+
+ if (II->getIntrinsicID() == Intrinsic::smin)
+ std::swap(CLow, CHigh);
+ return CLow->sle(*CHigh);
+}
+
/// For vector constants, loop over the elements and find the constant with the
/// minimum number of sign bits. Return 0 if the value is not a vector constant
/// or if any element was not analyzed; otherwise, return the count for the
@@ -3225,6 +3356,12 @@ static unsigned ComputeNumSignBitsImpl(const Value *V,
// Absolute value reduces number of sign bits by at most 1.
return Tmp - 1;
+ case Intrinsic::smin:
+ case Intrinsic::smax: {
+ const APInt *CLow, *CHigh;
+ if (isSignedMinMaxIntrinsicClamp(II, CLow, CHigh))
+ return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits());
+ }
}
}
}
@@ -3358,9 +3495,6 @@ Intrinsic::ID llvm::getIntrinsicForCallSite(const CallBase &CB,
/// NOTE: Do not check 'nsz' here because that fast-math-flag does not guarantee
/// that a value is not -0.0. It only guarantees that -0.0 may be treated
/// the same as +0.0 in floating-point ops.
-///
-/// NOTE: this function will need to be revisited when we support non-default
-/// rounding modes!
bool llvm::CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI,
unsigned Depth) {
if (auto *CFP = dyn_cast<ConstantFP>(V))
@@ -3390,9 +3524,21 @@ bool llvm::CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI,
case Intrinsic::sqrt:
case Intrinsic::canonicalize:
return CannotBeNegativeZero(Call->getArgOperand(0), TLI, Depth + 1);
+ case Intrinsic::experimental_constrained_sqrt: {
+ // NOTE: This rounding mode restriction may be too strict.
+ const auto *CI = cast<ConstrainedFPIntrinsic>(Call);
+ if (CI->getRoundingMode() == RoundingMode::NearestTiesToEven)
+ return CannotBeNegativeZero(Call->getArgOperand(0), TLI, Depth + 1);
+ else
+ return false;
+ }
// fabs(x) != -0.0
case Intrinsic::fabs:
return true;
+ // sitofp and uitofp turn into +0.0 for zero.
+ case Intrinsic::experimental_constrained_sitofp:
+ case Intrinsic::experimental_constrained_uitofp:
+ return true;
}
}
@@ -4032,69 +4178,83 @@ bool llvm::isGEPBasedOnPointerToString(const GEPOperator *GEP,
return true;
}
+// If V refers to an initialized global constant, set Slice either to
+// its initializer if the size of its elements equals ElementSize, or,
+// for ElementSize == 8, to its representation as an array of unsiged
+// char. Return true on success.
bool llvm::getConstantDataArrayInfo(const Value *V,
ConstantDataArraySlice &Slice,
unsigned ElementSize, uint64_t Offset) {
assert(V);
- // Look through bitcast instructions and geps.
- V = V->stripPointerCasts();
+ // Drill down into the pointer expression V, ignoring any intervening
+ // casts, and determine the identity of the object it references along
+ // with the cumulative byte offset into it.
+ const GlobalVariable *GV =
+ dyn_cast<GlobalVariable>(getUnderlyingObject(V));
+ if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
+ // Fail if V is not based on constant global object.
+ return false;
- // If the value is a GEP instruction or constant expression, treat it as an
- // offset.
- if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
- // The GEP operator should be based on a pointer to string constant, and is
- // indexing into the string constant.
- if (!isGEPBasedOnPointerToString(GEP, ElementSize))
- return false;
+ const DataLayout &DL = GV->getParent()->getDataLayout();
+ APInt Off(DL.getIndexTypeSizeInBits(V->getType()), 0);
- // If the second index isn't a ConstantInt, then this is a variable index
- // into the array. If this occurs, we can't say anything meaningful about
- // the string.
- uint64_t StartIdx = 0;
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2)))
- StartIdx = CI->getZExtValue();
- else
- return false;
- return getConstantDataArrayInfo(GEP->getOperand(0), Slice, ElementSize,
- StartIdx + Offset);
- }
+ if (GV != V->stripAndAccumulateConstantOffsets(DL, Off,
+ /*AllowNonInbounds*/ true))
+ // Fail if a constant offset could not be determined.
+ return false;
- // The GEP instruction, constant or instruction, must reference a global
- // variable that is a constant and is initialized. The referenced constant
- // initializer is the array that we'll use for optimization.
- const GlobalVariable *GV = dyn_cast<GlobalVariable>(V);
- if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
+ uint64_t StartIdx = Off.getLimitedValue();
+ if (StartIdx == UINT64_MAX)
+ // Fail if the constant offset is excessive.
return false;
- const ConstantDataArray *Array;
- ArrayType *ArrayTy;
+ Offset += StartIdx;
+
+ ConstantDataArray *Array = nullptr;
+ ArrayType *ArrayTy = nullptr;
+
if (GV->getInitializer()->isNullValue()) {
Type *GVTy = GV->getValueType();
- if ( (ArrayTy = dyn_cast<ArrayType>(GVTy)) ) {
- // A zeroinitializer for the array; there is no ConstantDataArray.
- Array = nullptr;
- } else {
- const DataLayout &DL = GV->getParent()->getDataLayout();
- uint64_t SizeInBytes = DL.getTypeStoreSize(GVTy).getFixedSize();
- uint64_t Length = SizeInBytes / (ElementSize / 8);
- if (Length <= Offset)
- return false;
+ uint64_t SizeInBytes = DL.getTypeStoreSize(GVTy).getFixedSize();
+ uint64_t Length = SizeInBytes / (ElementSize / 8);
+
+ Slice.Array = nullptr;
+ Slice.Offset = 0;
+ // Return an empty Slice for undersized constants to let callers
+ // transform even undefined library calls into simpler, well-defined
+ // expressions. This is preferable to making the calls although it
+ // prevents sanitizers from detecting such calls.
+ Slice.Length = Length < Offset ? 0 : Length - Offset;
+ return true;
+ }
- Slice.Array = nullptr;
- Slice.Offset = 0;
- Slice.Length = Length - Offset;
- return true;
+ auto *Init = const_cast<Constant *>(GV->getInitializer());
+ if (auto *ArrayInit = dyn_cast<ConstantDataArray>(Init)) {
+ Type *InitElTy = ArrayInit->getElementType();
+ if (InitElTy->isIntegerTy(ElementSize)) {
+ // If Init is an initializer for an array of the expected type
+ // and size, use it as is.
+ Array = ArrayInit;
+ ArrayTy = ArrayInit->getType();
}
- } else {
- // This must be a ConstantDataArray.
- Array = dyn_cast<ConstantDataArray>(GV->getInitializer());
- if (!Array)
+ }
+
+ if (!Array) {
+ if (ElementSize != 8)
+ // TODO: Handle conversions to larger integral types.
return false;
- ArrayTy = Array->getType();
+
+ // Otherwise extract the portion of the initializer starting
+ // at Offset as an array of bytes, and reset Offset.
+ Init = ReadByteArrayFromGlobal(GV, Offset);
+ if (!Init)
+ return false;
+
+ Offset = 0;
+ Array = dyn_cast<ConstantDataArray>(Init);
+ ArrayTy = dyn_cast<ArrayType>(Init->getType());
}
- if (!ArrayTy->getElementType()->isIntegerTy(ElementSize))
- return false;
uint64_t NumElts = ArrayTy->getArrayNumElements();
if (Offset > NumElts)
@@ -4117,6 +4277,12 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
if (Slice.Array == nullptr) {
if (TrimAtNul) {
+ // Return a nul-terminated string even for an empty Slice. This is
+ // safe because all existing SimplifyLibcalls callers require string
+ // arguments and the behavior of the functions they fold is undefined
+ // otherwise. Folding the calls this way is preferable to making
+ // the undefined library calls, even though it prevents sanitizers
+ // from reporting such calls.
Str = StringRef();
return true;
}
@@ -4196,9 +4362,13 @@ static uint64_t GetStringLengthH(const Value *V,
return 0;
if (Slice.Array == nullptr)
+ // Zeroinitializer (including an empty one).
return 1;
- // Search for nul characters
+ // Search for the first nul character. Return a conservative result even
+ // when there is no nul. This is safe since otherwise the string function
+ // being folded such as strlen is undefined, and can be preferable to
+ // making the undefined library call.
unsigned NullIndex = 0;
for (unsigned E = Slice.Length; NullIndex < E; ++NullIndex) {
if (Slice.Array->getElementAsInteger(Slice.Offset + NullIndex) == 0)
@@ -4517,13 +4687,40 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
const Operator *Inst = dyn_cast<Operator>(V);
if (!Inst)
return false;
+ return isSafeToSpeculativelyExecuteWithOpcode(Inst->getOpcode(), Inst, CtxI, DT, TLI);
+}
+
+bool llvm::isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode,
+ const Operator *Inst,
+ const Instruction *CtxI,
+ const DominatorTree *DT,
+ const TargetLibraryInfo *TLI) {
+#ifndef NDEBUG
+ if (Inst->getOpcode() != Opcode) {
+ // Check that the operands are actually compatible with the Opcode override.
+ auto hasEqualReturnAndLeadingOperandTypes =
+ [](const Operator *Inst, unsigned NumLeadingOperands) {
+ if (Inst->getNumOperands() < NumLeadingOperands)
+ return false;
+ const Type *ExpectedType = Inst->getType();
+ for (unsigned ItOp = 0; ItOp < NumLeadingOperands; ++ItOp)
+ if (Inst->getOperand(ItOp)->getType() != ExpectedType)
+ return false;
+ return true;
+ };
+ assert(!Instruction::isBinaryOp(Opcode) ||
+ hasEqualReturnAndLeadingOperandTypes(Inst, 2));
+ assert(!Instruction::isUnaryOp(Opcode) ||
+ hasEqualReturnAndLeadingOperandTypes(Inst, 1));
+ }
+#endif
for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i)
if (Constant *C = dyn_cast<Constant>(Inst->getOperand(i)))
if (C->canTrap())
return false;
- switch (Inst->getOpcode()) {
+ switch (Opcode) {
default:
return true;
case Instruction::UDiv:
@@ -4554,7 +4751,9 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
return false;
}
case Instruction::Load: {
- const LoadInst *LI = cast<LoadInst>(Inst);
+ const LoadInst *LI = dyn_cast<LoadInst>(Inst);
+ if (!LI)
+ return false;
if (mustSuppressSpeculation(*LI))
return false;
const DataLayout &DL = LI->getModule()->getDataLayout();
@@ -4563,7 +4762,9 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
TLI);
}
case Instruction::Call: {
- auto *CI = cast<const CallInst>(Inst);
+ auto *CI = dyn_cast<const CallInst>(Inst);
+ if (!CI)
+ return false;
const Function *Callee = CI->getCalledFunction();
// The called function could have undefined behavior or side-effects, even
@@ -4595,8 +4796,20 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
}
}
-bool llvm::mayBeMemoryDependent(const Instruction &I) {
- return I.mayReadOrWriteMemory() || !isSafeToSpeculativelyExecute(&I);
+bool llvm::mayHaveNonDefUseDependency(const Instruction &I) {
+ if (I.mayReadOrWriteMemory())
+ // Memory dependency possible
+ return true;
+ if (!isSafeToSpeculativelyExecute(&I))
+ // Can't move above a maythrow call or infinite loop. Or if an
+ // inalloca alloca, above a stacksave call.
+ return true;
+ if (!isGuaranteedToTransferExecutionToSuccessor(&I))
+ // 1) Can't reorder two inf-loop calls, even if readonly
+ // 2) Also can't reorder an inf-loop call below a instruction which isn't
+ // safe to speculative execute. (Inverse of above)
+ return true;
+ return false;
}
/// Convert ConstantRange OverflowResult into ValueTracking OverflowResult.
@@ -4766,6 +4979,22 @@ OverflowResult llvm::computeOverflowForUnsignedSub(const Value *LHS,
AssumptionCache *AC,
const Instruction *CxtI,
const DominatorTree *DT) {
+ // X - (X % ?)
+ // The remainder of a value can't have greater magnitude than itself,
+ // so the subtraction can't overflow.
+
+ // X - (X -nuw ?)
+ // In the minimal case, this would simplify to "?", so there's no subtract
+ // at all. But if this analysis is used to peek through casts, for example,
+ // then determining no-overflow may allow other transforms.
+
+ // TODO: There are other patterns like this.
+ // See simplifyICmpWithBinOpOnLHS() for candidates.
+ if (match(RHS, m_URem(m_Specific(LHS), m_Value())) ||
+ match(RHS, m_NUWSub(m_Specific(LHS), m_Value())))
+ if (isGuaranteedNotToBeUndefOrPoison(LHS, AC, CxtI, DT))
+ return OverflowResult::NeverOverflows;
+
// Checking for conditions implied by dominating conditions may be expensive.
// Limit it to usub_with_overflow calls for now.
if (match(CxtI,
@@ -4789,6 +5018,19 @@ OverflowResult llvm::computeOverflowForSignedSub(const Value *LHS,
AssumptionCache *AC,
const Instruction *CxtI,
const DominatorTree *DT) {
+ // X - (X % ?)
+ // The remainder of a value can't have greater magnitude than itself,
+ // so the subtraction can't overflow.
+
+ // X - (X -nsw ?)
+ // In the minimal case, this would simplify to "?", so there's no subtract
+ // at all. But if this analysis is used to peek through casts, for example,
+ // then determining no-overflow may allow other transforms.
+ if (match(RHS, m_SRem(m_Specific(LHS), m_Value())) ||
+ match(RHS, m_NSWSub(m_Specific(LHS), m_Value())))
+ if (isGuaranteedNotToBeUndefOrPoison(LHS, AC, CxtI, DT))
+ return OverflowResult::NeverOverflows;
+
// If LHS and RHS each have at least two sign bits, the subtraction
// cannot overflow.
if (ComputeNumSignBits(LHS, DL, 0, AC, CxtI, DT) > 1 &&
@@ -5100,7 +5342,9 @@ static bool isGuaranteedNotToBeUndefOrPoison(const Value *V,
}
if (auto *I = dyn_cast<LoadInst>(V))
- if (I->getMetadata(LLVMContext::MD_noundef))
+ if (I->hasMetadata(LLVMContext::MD_noundef) ||
+ I->hasMetadata(LLVMContext::MD_dereferenceable) ||
+ I->hasMetadata(LLVMContext::MD_dereferenceable_or_null))
return true;
if (programUndefinedIfUndefOrPoison(V, PoisonOnly))
@@ -5125,10 +5369,10 @@ static bool isGuaranteedNotToBeUndefOrPoison(const Value *V,
auto *TI = Dominator->getBlock()->getTerminator();
Value *Cond = nullptr;
- if (auto BI = dyn_cast<BranchInst>(TI)) {
+ if (auto BI = dyn_cast_or_null<BranchInst>(TI)) {
if (BI->isConditional())
Cond = BI->getCondition();
- } else if (auto SI = dyn_cast<SwitchInst>(TI)) {
+ } else if (auto SI = dyn_cast_or_null<SwitchInst>(TI)) {
Cond = SI->getCondition();
}
@@ -5763,20 +6007,6 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
if (Pred != CmpInst::ICMP_SGT && Pred != CmpInst::ICMP_SLT)
return {SPF_UNKNOWN, SPNB_NA, false};
- // Z = X -nsw Y
- // (X >s Y) ? 0 : Z ==> (Z >s 0) ? 0 : Z ==> SMIN(Z, 0)
- // (X <s Y) ? 0 : Z ==> (Z <s 0) ? 0 : Z ==> SMAX(Z, 0)
- if (match(TrueVal, m_Zero()) &&
- match(FalseVal, m_NSWSub(m_Specific(CmpLHS), m_Specific(CmpRHS))))
- return {Pred == CmpInst::ICMP_SGT ? SPF_SMIN : SPF_SMAX, SPNB_NA, false};
-
- // Z = X -nsw Y
- // (X >s Y) ? Z : 0 ==> (Z >s 0) ? Z : 0 ==> SMAX(Z, 0)
- // (X <s Y) ? Z : 0 ==> (Z <s 0) ? Z : 0 ==> SMIN(Z, 0)
- if (match(FalseVal, m_Zero()) &&
- match(TrueVal, m_NSWSub(m_Specific(CmpLHS), m_Specific(CmpRHS))))
- return {Pred == CmpInst::ICMP_SGT ? SPF_SMAX : SPF_SMIN, SPNB_NA, false};
-
const APInt *C1;
if (!match(CmpRHS, m_APInt(C1)))
return {SPF_UNKNOWN, SPNB_NA, false};
@@ -6576,11 +6806,38 @@ Optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS,
if (LHS == RHS)
return LHSIsTrue;
- const ICmpInst *RHSCmp = dyn_cast<ICmpInst>(RHS);
- if (RHSCmp)
+ if (const ICmpInst *RHSCmp = dyn_cast<ICmpInst>(RHS))
return isImpliedCondition(LHS, RHSCmp->getPredicate(),
RHSCmp->getOperand(0), RHSCmp->getOperand(1), DL,
LHSIsTrue, Depth);
+
+ if (Depth == MaxAnalysisRecursionDepth)
+ return None;
+
+ // LHS ==> (RHS1 || RHS2) if LHS ==> RHS1 or LHS ==> RHS2
+ // LHS ==> !(RHS1 && RHS2) if LHS ==> !RHS1 or LHS ==> !RHS2
+ const Value *RHS1, *RHS2;
+ if (match(RHS, m_LogicalOr(m_Value(RHS1), m_Value(RHS2)))) {
+ if (Optional<bool> Imp =
+ isImpliedCondition(LHS, RHS1, DL, LHSIsTrue, Depth + 1))
+ if (*Imp == true)
+ return true;
+ if (Optional<bool> Imp =
+ isImpliedCondition(LHS, RHS2, DL, LHSIsTrue, Depth + 1))
+ if (*Imp == true)
+ return true;
+ }
+ if (match(RHS, m_LogicalAnd(m_Value(RHS1), m_Value(RHS2)))) {
+ if (Optional<bool> Imp =
+ isImpliedCondition(LHS, RHS1, DL, LHSIsTrue, Depth + 1))
+ if (*Imp == false)
+ return false;
+ if (Optional<bool> Imp =
+ isImpliedCondition(LHS, RHS2, DL, LHSIsTrue, Depth + 1))
+ if (*Imp == false)
+ return false;
+ }
+
return None;
}
@@ -7072,66 +7329,25 @@ getOffsetFromIndex(const GEPOperator *GEP, unsigned Idx, const DataLayout &DL) {
Optional<int64_t> llvm::isPointerOffset(const Value *Ptr1, const Value *Ptr2,
const DataLayout &DL) {
- Ptr1 = Ptr1->stripPointerCasts();
- Ptr2 = Ptr2->stripPointerCasts();
+ APInt Offset1(DL.getIndexTypeSizeInBits(Ptr1->getType()), 0);
+ APInt Offset2(DL.getIndexTypeSizeInBits(Ptr2->getType()), 0);
+ Ptr1 = Ptr1->stripAndAccumulateConstantOffsets(DL, Offset1, true);
+ Ptr2 = Ptr2->stripAndAccumulateConstantOffsets(DL, Offset2, true);
// Handle the trivial case first.
- if (Ptr1 == Ptr2) {
- return 0;
- }
+ if (Ptr1 == Ptr2)
+ return Offset2.getSExtValue() - Offset1.getSExtValue();
const GEPOperator *GEP1 = dyn_cast<GEPOperator>(Ptr1);
const GEPOperator *GEP2 = dyn_cast<GEPOperator>(Ptr2);
- // If one pointer is a GEP see if the GEP is a constant offset from the base,
- // as in "P" and "gep P, 1".
- // Also do this iteratively to handle the the following case:
- // Ptr_t1 = GEP Ptr1, c1
- // Ptr_t2 = GEP Ptr_t1, c2
- // Ptr2 = GEP Ptr_t2, c3
- // where we will return c1+c2+c3.
- // TODO: Handle the case when both Ptr1 and Ptr2 are GEPs of some common base
- // -- replace getOffsetFromBase with getOffsetAndBase, check that the bases
- // are the same, and return the difference between offsets.
- auto getOffsetFromBase = [&DL](const GEPOperator *GEP,
- const Value *Ptr) -> Optional<int64_t> {
- const GEPOperator *GEP_T = GEP;
- int64_t OffsetVal = 0;
- bool HasSameBase = false;
- while (GEP_T) {
- auto Offset = getOffsetFromIndex(GEP_T, 1, DL);
- if (!Offset)
- return None;
- OffsetVal += *Offset;
- auto Op0 = GEP_T->getOperand(0)->stripPointerCasts();
- if (Op0 == Ptr) {
- HasSameBase = true;
- break;
- }
- GEP_T = dyn_cast<GEPOperator>(Op0);
- }
- if (!HasSameBase)
- return None;
- return OffsetVal;
- };
-
- if (GEP1) {
- auto Offset = getOffsetFromBase(GEP1, Ptr2);
- if (Offset)
- return -*Offset;
- }
- if (GEP2) {
- auto Offset = getOffsetFromBase(GEP2, Ptr1);
- if (Offset)
- return Offset;
- }
-
// Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical
// base. After that base, they may have some number of common (and
// potentially variable) indices. After that they handle some constant
// offset, which determines their offset from each other. At this point, we
// handle no other case.
- if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0))
+ if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0) ||
+ GEP1->getSourceElementType() != GEP2->getSourceElementType())
return None;
// Skip any common indices and track the GEP types.
@@ -7140,9 +7356,10 @@ Optional<int64_t> llvm::isPointerOffset(const Value *Ptr1, const Value *Ptr2,
if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx))
break;
- auto Offset1 = getOffsetFromIndex(GEP1, Idx, DL);
- auto Offset2 = getOffsetFromIndex(GEP2, Idx, DL);
- if (!Offset1 || !Offset2)
+ auto IOffset1 = getOffsetFromIndex(GEP1, Idx, DL);
+ auto IOffset2 = getOffsetFromIndex(GEP2, Idx, DL);
+ if (!IOffset1 || !IOffset2)
return None;
- return *Offset2 - *Offset1;
+ return *IOffset2 - *IOffset1 + Offset2.getSExtValue() -
+ Offset1.getSExtValue();
}
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index 655c248907f6..f863a1ffad3a 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -40,7 +40,7 @@ static cl::opt<unsigned> MaxInterleaveGroupFactor(
/// Return true if all of the intrinsic's arguments and return type are scalars
/// for the scalar form of the intrinsic, and vectors for the vector form of the
/// intrinsic (except operands that are marked as always being scalar by
-/// hasVectorInstrinsicScalarOpd).
+/// isVectorIntrinsicWithScalarOpAtArg).
bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
switch (ID) {
case Intrinsic::abs: // Begin integer bit-manipulation.
@@ -89,6 +89,8 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
case Intrinsic::fmuladd:
case Intrinsic::powi:
case Intrinsic::canonicalize:
+ case Intrinsic::fptosi_sat:
+ case Intrinsic::fptoui_sat:
return true;
default:
return false;
@@ -96,8 +98,8 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
}
/// Identifies if the vector form of the intrinsic has a scalar operand.
-bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID,
- unsigned ScalarOpdIdx) {
+bool llvm::isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,
+ unsigned ScalarOpdIdx) {
switch (ID) {
case Intrinsic::abs:
case Intrinsic::ctlz:
@@ -114,11 +116,14 @@ bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID,
}
}
-bool llvm::hasVectorInstrinsicOverloadedScalarOpd(Intrinsic::ID ID,
- unsigned ScalarOpdIdx) {
+bool llvm::isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID,
+ unsigned OpdIdx) {
switch (ID) {
+ case Intrinsic::fptosi_sat:
+ case Intrinsic::fptoui_sat:
+ return OpdIdx == 0;
case Intrinsic::powi:
- return (ScalarOpdIdx == 1);
+ return OpdIdx == 1;
default:
return false;
}
@@ -496,6 +501,116 @@ bool llvm::widenShuffleMaskElts(int Scale, ArrayRef<int> Mask,
return true;
}
+void llvm::processShuffleMasks(
+ ArrayRef<int> Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs,
+ unsigned NumOfUsedRegs, function_ref<void()> NoInputAction,
+ function_ref<void(ArrayRef<int>, unsigned, unsigned)> SingleInputAction,
+ function_ref<void(ArrayRef<int>, unsigned, unsigned)> ManyInputsAction) {
+ SmallVector<SmallVector<SmallVector<int>>> Res(NumOfDestRegs);
+ // Try to perform better estimation of the permutation.
+ // 1. Split the source/destination vectors into real registers.
+ // 2. Do the mask analysis to identify which real registers are
+ // permuted.
+ int Sz = Mask.size();
+ unsigned SzDest = Sz / NumOfDestRegs;
+ unsigned SzSrc = Sz / NumOfSrcRegs;
+ for (unsigned I = 0; I < NumOfDestRegs; ++I) {
+ auto &RegMasks = Res[I];
+ RegMasks.assign(NumOfSrcRegs, {});
+ // Check that the values in dest registers are in the one src
+ // register.
+ for (unsigned K = 0; K < SzDest; ++K) {
+ int Idx = I * SzDest + K;
+ if (Idx == Sz)
+ break;
+ if (Mask[Idx] >= Sz || Mask[Idx] == UndefMaskElem)
+ continue;
+ int SrcRegIdx = Mask[Idx] / SzSrc;
+ // Add a cost of PermuteTwoSrc for each new source register permute,
+ // if we have more than one source registers.
+ if (RegMasks[SrcRegIdx].empty())
+ RegMasks[SrcRegIdx].assign(SzDest, UndefMaskElem);
+ RegMasks[SrcRegIdx][K] = Mask[Idx] % SzSrc;
+ }
+ }
+ // Process split mask.
+ for (unsigned I = 0; I < NumOfUsedRegs; ++I) {
+ auto &Dest = Res[I];
+ int NumSrcRegs =
+ count_if(Dest, [](ArrayRef<int> Mask) { return !Mask.empty(); });
+ switch (NumSrcRegs) {
+ case 0:
+ // No input vectors were used!
+ NoInputAction();
+ break;
+ case 1: {
+ // Find the only mask with at least single undef mask elem.
+ auto *It =
+ find_if(Dest, [](ArrayRef<int> Mask) { return !Mask.empty(); });
+ unsigned SrcReg = std::distance(Dest.begin(), It);
+ SingleInputAction(*It, SrcReg, I);
+ break;
+ }
+ default: {
+ // The first mask is a permutation of a single register. Since we have >2
+ // input registers to shuffle, we merge the masks for 2 first registers
+ // and generate a shuffle of 2 registers rather than the reordering of the
+ // first register and then shuffle with the second register. Next,
+ // generate the shuffles of the resulting register + the remaining
+ // registers from the list.
+ auto &&CombineMasks = [](MutableArrayRef<int> FirstMask,
+ ArrayRef<int> SecondMask) {
+ for (int Idx = 0, VF = FirstMask.size(); Idx < VF; ++Idx) {
+ if (SecondMask[Idx] != UndefMaskElem) {
+ assert(FirstMask[Idx] == UndefMaskElem &&
+ "Expected undefined mask element.");
+ FirstMask[Idx] = SecondMask[Idx] + VF;
+ }
+ }
+ };
+ auto &&NormalizeMask = [](MutableArrayRef<int> Mask) {
+ for (int Idx = 0, VF = Mask.size(); Idx < VF; ++Idx) {
+ if (Mask[Idx] != UndefMaskElem)
+ Mask[Idx] = Idx;
+ }
+ };
+ int SecondIdx;
+ do {
+ int FirstIdx = -1;
+ SecondIdx = -1;
+ MutableArrayRef<int> FirstMask, SecondMask;
+ for (unsigned I = 0; I < NumOfDestRegs; ++I) {
+ SmallVectorImpl<int> &RegMask = Dest[I];
+ if (RegMask.empty())
+ continue;
+
+ if (FirstIdx == SecondIdx) {
+ FirstIdx = I;
+ FirstMask = RegMask;
+ continue;
+ }
+ SecondIdx = I;
+ SecondMask = RegMask;
+ CombineMasks(FirstMask, SecondMask);
+ ManyInputsAction(FirstMask, FirstIdx, SecondIdx);
+ NormalizeMask(FirstMask);
+ RegMask.clear();
+ SecondMask = FirstMask;
+ SecondIdx = FirstIdx;
+ }
+ if (FirstIdx != SecondIdx && SecondIdx >= 0) {
+ CombineMasks(SecondMask, FirstMask);
+ ManyInputsAction(SecondMask, SecondIdx, FirstIdx);
+ Dest[FirstIdx].clear();
+ NormalizeMask(SecondMask);
+ }
+ } while (SecondIdx >= 0);
+ break;
+ }
+ }
+ }
+}
+
MapVector<Instruction *, uint64_t>
llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
const TargetTransformInfo *TTI) {
@@ -543,9 +658,8 @@ llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
Value *Val = Worklist.pop_back_val();
Value *Leader = ECs.getOrInsertLeaderValue(Val);
- if (Visited.count(Val))
+ if (!Visited.insert(Val).second)
continue;
- Visited.insert(Val);
// Non-instructions terminate a chain successfully.
if (!isa<Instruction>(Val))
@@ -1387,7 +1501,7 @@ void VFABI::getVectorVariantNames(
#ifndef NDEBUG
LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << S << "'\n");
Optional<VFInfo> Info = VFABI::tryDemangleForVFABI(S, *(CI.getModule()));
- assert(Info.hasValue() && "Invalid name for a VFABI variant.");
+ assert(Info && "Invalid name for a VFABI variant.");
assert(CI.getModule()->getFunction(Info.getValue().VectorName) &&
"Vector function is missing.");
#endif
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index e3bf41c9721b..30e6f8599208 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -567,7 +567,6 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(exact);
KEYWORD(inbounds);
KEYWORD(inrange);
- KEYWORD(align);
KEYWORD(addrspace);
KEYWORD(section);
KEYWORD(partition);
@@ -576,12 +575,16 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(module);
KEYWORD(asm);
KEYWORD(sideeffect);
- KEYWORD(alignstack);
KEYWORD(inteldialect);
KEYWORD(gc);
KEYWORD(prefix);
KEYWORD(prologue);
+ KEYWORD(no_sanitize_address);
+ KEYWORD(no_sanitize_hwaddress);
+ KEYWORD(no_sanitize_memtag);
+ KEYWORD(sanitize_address_dyninit);
+
KEYWORD(ccc);
KEYWORD(fastcc);
KEYWORD(coldcc);
@@ -632,82 +635,13 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(c);
KEYWORD(attributes);
+ KEYWORD(sync);
+ KEYWORD(async);
- KEYWORD(alwaysinline);
- KEYWORD(allocsize);
- KEYWORD(argmemonly);
- KEYWORD(builtin);
- KEYWORD(byval);
- KEYWORD(inalloca);
- KEYWORD(cold);
- KEYWORD(convergent);
- KEYWORD(dereferenceable);
- KEYWORD(dereferenceable_or_null);
- KEYWORD(disable_sanitizer_instrumentation);
- KEYWORD(elementtype);
- KEYWORD(inaccessiblememonly);
- KEYWORD(inaccessiblemem_or_argmemonly);
- KEYWORD(inlinehint);
- KEYWORD(inreg);
- KEYWORD(jumptable);
- KEYWORD(minsize);
- KEYWORD(naked);
- KEYWORD(nest);
- KEYWORD(noalias);
- KEYWORD(nobuiltin);
- KEYWORD(nocallback);
- KEYWORD(nocapture);
- KEYWORD(noduplicate);
- KEYWORD(nofree);
- KEYWORD(noimplicitfloat);
- KEYWORD(noinline);
- KEYWORD(norecurse);
- KEYWORD(nonlazybind);
- KEYWORD(nomerge);
- KEYWORD(nonnull);
- KEYWORD(noprofile);
- KEYWORD(noredzone);
- KEYWORD(noreturn);
- KEYWORD(nosync);
- KEYWORD(nocf_check);
- KEYWORD(noundef);
- KEYWORD(nounwind);
- KEYWORD(nosanitize_coverage);
- KEYWORD(null_pointer_is_valid);
- KEYWORD(optforfuzzing);
- KEYWORD(optnone);
- KEYWORD(optsize);
- KEYWORD(preallocated);
- KEYWORD(readnone);
- KEYWORD(readonly);
- KEYWORD(returned);
- KEYWORD(returns_twice);
- KEYWORD(signext);
- KEYWORD(speculatable);
- KEYWORD(sret);
- KEYWORD(ssp);
- KEYWORD(sspreq);
- KEYWORD(sspstrong);
- KEYWORD(strictfp);
- KEYWORD(safestack);
- KEYWORD(shadowcallstack);
- KEYWORD(sanitize_address);
- KEYWORD(sanitize_hwaddress);
- KEYWORD(sanitize_memtag);
- KEYWORD(sanitize_thread);
- KEYWORD(sanitize_memory);
- KEYWORD(speculative_load_hardening);
- KEYWORD(swifterror);
- KEYWORD(swiftself);
- KEYWORD(swiftasync);
- KEYWORD(uwtable);
- KEYWORD(vscale_range);
- KEYWORD(willreturn);
- KEYWORD(writeonly);
- KEYWORD(zeroext);
- KEYWORD(immarg);
- KEYWORD(byref);
- KEYWORD(mustprogress);
+#define GET_ATTR_NAMES
+#define ATTRIBUTE_ENUM(ENUM_NAME, DISPLAY_NAME) \
+ KEYWORD(DISPLAY_NAME);
+#include "llvm/IR/Attributes.inc"
KEYWORD(type);
KEYWORD(opaque);
@@ -781,7 +715,6 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(param);
KEYWORD(hotness);
KEYWORD(unknown);
- KEYWORD(hot);
KEYWORD(critical);
KEYWORD(relbf);
KEYWORD(variable);
@@ -856,7 +789,10 @@ lltok::Kind LLLexer::LexIdentifier() {
TYPEKEYWORD("token", Type::getTokenTy(Context));
if (Keyword == "ptr") {
- if (Context.supportsTypedPointers()) {
+ // setOpaquePointers() must be called before creating any pointer types.
+ if (!Context.hasSetOpaquePointersValue()) {
+ Context.setOpaquePointers(true);
+ } else if (Context.supportsTypedPointers()) {
Warning("ptr type is only supported in -opaque-pointers mode");
return lltok::Error;
}
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 432ec151cf8a..a1cdeac2b47f 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -37,6 +37,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/Support/Casting.h"
@@ -47,7 +48,6 @@
#include <algorithm>
#include <cassert>
#include <cstring>
-#include <iterator>
#include <vector>
using namespace llvm;
@@ -59,9 +59,31 @@ static std::string getTypeString(Type *T) {
return Tmp.str();
}
+static void setContextOpaquePointers(LLLexer &L, LLVMContext &C) {
+ while (true) {
+ lltok::Kind K = L.Lex();
+ // LLLexer will set the opaque pointers option in LLVMContext if it sees an
+ // explicit "ptr".
+ if (K == lltok::star || K == lltok::Error || K == lltok::Eof ||
+ isa_and_nonnull<PointerType>(L.getTyVal())) {
+ if (K == lltok::star)
+ C.setOpaquePointers(false);
+ return;
+ }
+ }
+}
+
/// Run: module ::= toplevelentity*
bool LLParser::Run(bool UpgradeDebugInfo,
DataLayoutCallbackTy DataLayoutCallback) {
+ // If we haven't decided on whether or not we're using opaque pointers, do a
+ // quick lex over the tokens to see if we explicitly construct any typed or
+ // opaque pointer types.
+ // Don't bail out on an error so we do the same work in the parsing below
+ // regardless of if --opaque-pointers is set.
+ if (!Context.hasSetOpaquePointersValue())
+ setContextOpaquePointers(OPLex, Context);
+
// Prime the lexer.
Lex.Lex();
@@ -248,7 +270,7 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) {
// remangle intrinsics names as well.
for (Function &F : llvm::make_early_inc_range(*M)) {
if (auto Remangled = Intrinsic::remangleIntrinsicFunction(&F)) {
- F.replaceAllUsesWith(Remangled.getValue());
+ F.replaceAllUsesWith(*Remangled);
F.eraseFromParent();
}
}
@@ -1081,6 +1103,45 @@ bool LLParser::parseAliasOrIFunc(const std::string &Name, LocTy NameLoc,
return false;
}
+static bool isSanitizer(lltok::Kind Kind) {
+ switch (Kind) {
+ case lltok::kw_no_sanitize_address:
+ case lltok::kw_no_sanitize_hwaddress:
+ case lltok::kw_no_sanitize_memtag:
+ case lltok::kw_sanitize_address_dyninit:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool LLParser::parseSanitizer(GlobalVariable *GV) {
+ using SanitizerMetadata = GlobalValue::SanitizerMetadata;
+ SanitizerMetadata Meta;
+ if (GV->hasSanitizerMetadata())
+ Meta = GV->getSanitizerMetadata();
+
+ switch (Lex.getKind()) {
+ case lltok::kw_no_sanitize_address:
+ Meta.NoAddress = true;
+ break;
+ case lltok::kw_no_sanitize_hwaddress:
+ Meta.NoHWAddress = true;
+ break;
+ case lltok::kw_no_sanitize_memtag:
+ Meta.NoMemtag = true;
+ break;
+ case lltok::kw_sanitize_address_dyninit:
+ Meta.IsDynInit = true;
+ break;
+ default:
+ return tokError("non-sanitizer token passed to LLParser::parseSanitizer()");
+ }
+ GV->setSanitizerMetadata(Meta);
+ Lex.Lex();
+ return false;
+}
+
/// parseGlobal
/// ::= GlobalVar '=' OptionalLinkage OptionalPreemptionSpecifier
/// OptionalVisibility OptionalDLLStorageClass
@@ -1168,7 +1229,7 @@ bool LLParser::parseGlobal(const std::string &Name, LocTy NameLoc,
GV->setUnnamedAddr(UnnamedAddr);
if (GVal) {
- if (!GVal->getType()->isOpaque() && GVal->getValueType() != Ty)
+ if (GVal->getType() != Ty->getPointerTo(AddrSpace))
return error(
TyLoc,
"forward reference and definition of global have different types");
@@ -1199,6 +1260,9 @@ bool LLParser::parseGlobal(const std::string &Name, LocTy NameLoc,
} else if (Lex.getKind() == lltok::MetadataVar) {
if (parseGlobalObjectMetadataAttachment(*GV))
return true;
+ } else if (isSanitizer(Lex.getKind())) {
+ if (parseSanitizer(GV))
+ return true;
} else {
Comdat *C;
if (parseOptionalComdat(Name, C))
@@ -1333,6 +1397,20 @@ bool LLParser::parseEnumAttribute(Attribute::AttrKind Attr, AttrBuilder &B,
B.addDereferenceableOrNullAttr(Bytes);
return false;
}
+ case Attribute::UWTable: {
+ UWTableKind Kind;
+ if (parseOptionalUWTableKind(Kind))
+ return true;
+ B.addUWTableAttr(Kind);
+ return false;
+ }
+ case Attribute::AllocKind: {
+ AllocFnKind Kind = AllocFnKind::Unknown;
+ if (parseAllocKind(Kind))
+ return true;
+ B.addAllocKindAttr(Kind);
+ return false;
+ }
default:
B.addAttribute(Attr);
Lex.Lex();
@@ -1996,6 +2074,56 @@ bool LLParser::parseOptionalDerefAttrBytes(lltok::Kind AttrKind,
return false;
}
+bool LLParser::parseOptionalUWTableKind(UWTableKind &Kind) {
+ Lex.Lex();
+ Kind = UWTableKind::Default;
+ if (!EatIfPresent(lltok::lparen))
+ return false;
+ LocTy KindLoc = Lex.getLoc();
+ if (Lex.getKind() == lltok::kw_sync)
+ Kind = UWTableKind::Sync;
+ else if (Lex.getKind() == lltok::kw_async)
+ Kind = UWTableKind::Async;
+ else
+ return error(KindLoc, "expected unwind table kind");
+ Lex.Lex();
+ return parseToken(lltok::rparen, "expected ')'");
+}
+
+bool LLParser::parseAllocKind(AllocFnKind &Kind) {
+ Lex.Lex();
+ LocTy ParenLoc = Lex.getLoc();
+ if (!EatIfPresent(lltok::lparen))
+ return error(ParenLoc, "expected '('");
+ LocTy KindLoc = Lex.getLoc();
+ std::string Arg;
+ if (parseStringConstant(Arg))
+ return error(KindLoc, "expected allockind value");
+ for (StringRef A : llvm::split(Arg, ",")) {
+ if (A == "alloc") {
+ Kind |= AllocFnKind::Alloc;
+ } else if (A == "realloc") {
+ Kind |= AllocFnKind::Realloc;
+ } else if (A == "free") {
+ Kind |= AllocFnKind::Free;
+ } else if (A == "uninitialized") {
+ Kind |= AllocFnKind::Uninitialized;
+ } else if (A == "zeroed") {
+ Kind |= AllocFnKind::Zeroed;
+ } else if (A == "aligned") {
+ Kind |= AllocFnKind::Aligned;
+ } else {
+ return error(KindLoc, Twine("unknown allockind ") + A);
+ }
+ }
+ ParenLoc = Lex.getLoc();
+ if (!EatIfPresent(lltok::rparen))
+ return error(ParenLoc, "expected ')'");
+ if (Kind == AllocFnKind::Unknown)
+ return error(KindLoc, "expected allockind value");
+ return false;
+}
+
/// parseOptionalCommaAlign
/// ::=
/// ::= ',' align 4
@@ -3344,24 +3472,8 @@ bool LLParser::parseValID(ValID &ID, PerFunctionState *PFS, Type *ExpectedTy) {
ID.Kind = ValID::t_Constant;
return false;
}
- case lltok::kw_extractvalue: {
- Lex.Lex();
- Constant *Val;
- SmallVector<unsigned, 4> Indices;
- if (parseToken(lltok::lparen,
- "expected '(' in extractvalue constantexpr") ||
- parseGlobalTypeAndValue(Val) || parseIndexList(Indices) ||
- parseToken(lltok::rparen, "expected ')' in extractvalue constantexpr"))
- return true;
-
- if (!Val->getType()->isAggregateType())
- return error(ID.Loc, "extractvalue operand must be aggregate type");
- if (!ExtractValueInst::getIndexedType(Val->getType(), Indices))
- return error(ID.Loc, "invalid indices for extractvalue");
- ID.ConstantVal = ConstantExpr::getExtractValue(Val, Indices);
- ID.Kind = ValID::t_Constant;
- return false;
- }
+ case lltok::kw_extractvalue:
+ return error(ID.Loc, "extractvalue constexprs are no longer supported");
case lltok::kw_insertvalue: {
Lex.Lex();
Constant *Val0, *Val1;
@@ -3881,11 +3993,11 @@ struct MDAPSIntField : public MDFieldImpl<APSInt> {
};
struct MDSignedField : public MDFieldImpl<int64_t> {
- int64_t Min;
- int64_t Max;
+ int64_t Min = INT64_MIN;
+ int64_t Max = INT64_MAX;
MDSignedField(int64_t Default = 0)
- : ImplTy(Default), Min(INT64_MIN), Max(INT64_MAX) {}
+ : ImplTy(Default) {}
MDSignedField(int64_t Default, int64_t Min, int64_t Max)
: ImplTy(Default), Min(Min), Max(Max) {}
};
@@ -4144,8 +4256,8 @@ bool LLParser::parseMDField(LocTy Loc, StringRef Name, DIFlagField &Result) {
Val = DINode::getFlag(Lex.getStrVal());
if (!Val)
- return tokError(Twine("invalid debug info flag flag '") +
- Lex.getStrVal() + "'");
+ return tokError(Twine("invalid debug info flag '") + Lex.getStrVal() +
+ "'");
Lex.Lex();
return false;
};
@@ -4779,7 +4891,8 @@ bool LLParser::parseDISubprogram(MDNode *&Result, bool IsDistinct) {
OPTIONAL(declaration, MDField, ); \
OPTIONAL(retainedNodes, MDField, ); \
OPTIONAL(thrownTypes, MDField, ); \
- OPTIONAL(annotations, MDField, );
+ OPTIONAL(annotations, MDField, ); \
+ OPTIONAL(targetFuncName, MDStringField, );
PARSE_MD_FIELDS();
#undef VISIT_MD_FIELDS
@@ -4798,7 +4911,8 @@ bool LLParser::parseDISubprogram(MDNode *&Result, bool IsDistinct) {
(Context, scope.Val, name.Val, linkageName.Val, file.Val, line.Val,
type.Val, scopeLine.Val, containingType.Val, virtualIndex.Val,
thisAdjustment.Val, flags.Val, SPFlags, unit.Val, templateParams.Val,
- declaration.Val, retainedNodes.Val, thrownTypes.Val, annotations.Val));
+ declaration.Val, retainedNodes.Val, thrownTypes.Val, annotations.Val,
+ targetFuncName.Val));
return false;
}
@@ -4965,7 +5079,7 @@ bool LLParser::parseDITemplateValueParameter(MDNode *&Result, bool IsDistinct) {
/// declaration: !4, align: 8)
bool LLParser::parseDIGlobalVariable(MDNode *&Result, bool IsDistinct) {
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
- REQUIRED(name, MDStringField, (/* AllowEmpty */ false)); \
+ OPTIONAL(name, MDStringField, (/* AllowEmpty */ false)); \
OPTIONAL(scope, MDField, ); \
OPTIONAL(linkageName, MDStringField, ); \
OPTIONAL(file, MDField, ); \
@@ -5603,20 +5717,19 @@ bool LLParser::parseFunctionHeader(Function *&Fn, bool IsDefine) {
auto FRVI = ForwardRefVals.find(FunctionName);
if (FRVI != ForwardRefVals.end()) {
FwdFn = FRVI->second.first;
- if (!FwdFn->getType()->isOpaque()) {
- if (!FwdFn->getType()->getNonOpaquePointerElementType()->isFunctionTy())
- return error(FRVI->second.second, "invalid forward reference to "
- "function as global value!");
- if (FwdFn->getType() != PFT)
- return error(FRVI->second.second,
- "invalid forward reference to "
- "function '" +
- FunctionName +
- "' with wrong type: "
- "expected '" +
- getTypeString(PFT) + "' but was '" +
- getTypeString(FwdFn->getType()) + "'");
- }
+ if (!FwdFn->getType()->isOpaque() &&
+ !FwdFn->getType()->getNonOpaquePointerElementType()->isFunctionTy())
+ return error(FRVI->second.second, "invalid forward reference to "
+ "function as global value!");
+ if (FwdFn->getType() != PFT)
+ return error(FRVI->second.second,
+ "invalid forward reference to "
+ "function '" +
+ FunctionName +
+ "' with wrong type: "
+ "expected '" +
+ getTypeString(PFT) + "' but was '" +
+ getTypeString(FwdFn->getType()) + "'");
ForwardRefVals.erase(FRVI);
} else if ((Fn = M->getFunction(FunctionName))) {
// Reject redefinitions.
@@ -5631,8 +5744,8 @@ bool LLParser::parseFunctionHeader(Function *&Fn, bool IsDefine) {
// types agree.
auto I = ForwardRefValIDs.find(NumberedVals.size());
if (I != ForwardRefValIDs.end()) {
- FwdFn = cast<Function>(I->second.first);
- if (!FwdFn->getType()->isOpaque() && FwdFn->getType() != PFT)
+ FwdFn = I->second.first;
+ if (FwdFn->getType() != PFT)
return error(NameLoc, "type of definition and forward reference of '@" +
Twine(NumberedVals.size()) +
"' disagree: "
@@ -7322,9 +7435,9 @@ int LLParser::parseCmpXchg(Instruction *&Inst, PerFunctionState &PFS) {
PFS.getFunction().getParent()->getDataLayout().getTypeStoreSize(
Cmp->getType()));
- AtomicCmpXchgInst *CXI = new AtomicCmpXchgInst(
- Ptr, Cmp, New, Alignment.getValueOr(DefaultAlignment), SuccessOrdering,
- FailureOrdering, SSID);
+ AtomicCmpXchgInst *CXI =
+ new AtomicCmpXchgInst(Ptr, Cmp, New, Alignment.value_or(DefaultAlignment),
+ SuccessOrdering, FailureOrdering, SSID);
CXI->setVolatile(isVolatile);
CXI->setWeak(isWeak);
@@ -7390,10 +7503,12 @@ int LLParser::parseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) {
if (Operation == AtomicRMWInst::Xchg) {
if (!Val->getType()->isIntegerTy() &&
- !Val->getType()->isFloatingPointTy()) {
- return error(ValLoc,
- "atomicrmw " + AtomicRMWInst::getOperationName(Operation) +
- " operand must be an integer or floating point type");
+ !Val->getType()->isFloatingPointTy() &&
+ !Val->getType()->isPointerTy()) {
+ return error(
+ ValLoc,
+ "atomicrmw " + AtomicRMWInst::getOperationName(Operation) +
+ " operand must be an integer, floating point, or pointer type");
}
} else if (IsFP) {
if (!Val->getType()->isFloatingPointTy()) {
@@ -7409,7 +7524,9 @@ int LLParser::parseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) {
}
}
- unsigned Size = Val->getType()->getPrimitiveSizeInBits();
+ unsigned Size =
+ PFS.getFunction().getParent()->getDataLayout().getTypeStoreSizeInBits(
+ Val->getType());
if (Size < 8 || (Size & (Size - 1)))
return error(ValLoc, "atomicrmw operand must be power-of-two byte-sized"
" integer");
@@ -7418,7 +7535,7 @@ int LLParser::parseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) {
Val->getType()));
AtomicRMWInst *RMWI =
new AtomicRMWInst(Operation, Ptr, Val,
- Alignment.getValueOr(DefaultAlignment), Ordering, SSID);
+ Alignment.value_or(DefaultAlignment), Ordering, SSID);
RMWI->setVolatile(isVolatile);
Inst = RMWI;
return AteExtraComma ? InstExtraComma : InstNormal;
diff --git a/llvm/lib/AsmParser/Parser.cpp b/llvm/lib/AsmParser/Parser.cpp
index 156fbbe71adb..95b9079f0f9c 100644
--- a/llvm/lib/AsmParser/Parser.cpp
+++ b/llvm/lib/AsmParser/Parser.cpp
@@ -11,13 +11,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/AsmParser/Parser.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/AsmParser/LLParser.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ModuleSummaryIndex.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
-#include <cstring>
#include <system_error>
using namespace llvm;
diff --git a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
index 0d28d93c93c0..1613e7e42a0a 100644
--- a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
+++ b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
@@ -106,8 +106,7 @@ bool MetadataVerifier::verifyKernelArgs(msgpack::DocNode &Node) {
return false;
if (!verifyIntegerEntry(ArgsMap, ".offset", true))
return false;
- if (!verifyScalarEntry(ArgsMap, ".value_kind", true,
- msgpack::Type::String,
+ if (!verifyScalarEntry(ArgsMap, ".value_kind", true, msgpack::Type::String,
[](msgpack::DocNode &SNode) {
return StringSwitch<bool>(SNode.getString())
.Case("by_value", true)
@@ -133,6 +132,7 @@ bool MetadataVerifier::verifyKernelArgs(msgpack::DocNode &Node) {
.Case("hidden_none", true)
.Case("hidden_printf_buffer", true)
.Case("hidden_hostcall_buffer", true)
+ .Case("hidden_heap_v1", true)
.Case("hidden_default_queue", true)
.Case("hidden_completion_action", true)
.Case("hidden_multigrid_sync_arg", true)
diff --git a/llvm/lib/BinaryFormat/COFF.cpp b/llvm/lib/BinaryFormat/COFF.cpp
new file mode 100644
index 000000000000..8fbee0218b79
--- /dev/null
+++ b/llvm/lib/BinaryFormat/COFF.cpp
@@ -0,0 +1,57 @@
+//===- llvm/BinaryFormat/COFF.cpp - The COFF format -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Twine.h"
+
+// Maximum offsets for different string table entry encodings.
+enum : unsigned { Max7DecimalOffset = 9999999U };
+enum : uint64_t { MaxBase64Offset = 0xFFFFFFFFFULL }; // 64^6, including 0
+
+// Encode a string table entry offset in base 64, padded to 6 chars, and
+// prefixed with a double slash: '//AAAAAA', '//AAAAAB', ...
+// Buffer must be at least 8 bytes large. No terminating null appended.
+static void encodeBase64StringEntry(char *Buffer, uint64_t Value) {
+ assert(Value > Max7DecimalOffset && Value <= MaxBase64Offset &&
+ "Illegal section name encoding for value");
+
+ static const char Alphabet[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "abcdefghijklmnopqrstuvwxyz"
+ "0123456789+/";
+
+ Buffer[0] = '/';
+ Buffer[1] = '/';
+
+ char *Ptr = Buffer + 7;
+ for (unsigned i = 0; i < 6; ++i) {
+ unsigned Rem = Value % 64;
+ Value /= 64;
+ *(Ptr--) = Alphabet[Rem];
+ }
+}
+
+bool llvm::COFF::encodeSectionName(char *Out, uint64_t Offset) {
+ if (Offset <= Max7DecimalOffset) {
+ // Offsets of 7 digits or less are encoded in ASCII.
+ SmallVector<char, COFF::NameSize> Buffer;
+ Twine('/').concat(Twine(Offset)).toVector(Buffer);
+ assert(Buffer.size() <= COFF::NameSize && Buffer.size() >= 2);
+ std::memcpy(Out, Buffer.data(), Buffer.size());
+ return true;
+ }
+
+ if (Offset <= MaxBase64Offset) {
+ // Starting with 10,000,000, offsets are encoded as base64.
+ encodeBase64StringEntry(Out, Offset);
+ return true;
+ }
+
+ // The offset is too large to be encoded.
+ return false;
+}
diff --git a/llvm/lib/BinaryFormat/Magic.cpp b/llvm/lib/BinaryFormat/Magic.cpp
index 044e4840cb3b..d45195fb95c5 100644
--- a/llvm/lib/BinaryFormat/Magic.cpp
+++ b/llvm/lib/BinaryFormat/Magic.cpp
@@ -74,6 +74,11 @@ file_magic llvm::identify_magic(StringRef Magic) {
return file_magic::goff_object;
break;
+ case 0x10:
+ if (startswith(Magic, "\x10\xFF\x10\xAD"))
+ return file_magic::offload_binary;
+ break;
+
case 0xDE: // 0x0B17C0DE = BC wraper
if (startswith(Magic, "\xDE\xC0\x17\x0B"))
return file_magic::bitcode;
@@ -185,6 +190,10 @@ file_magic llvm::identify_magic(StringRef Magic) {
case 0x84: // Alpha 64-bit
case 0x66: // MPS R4000 Windows
case 0x50: // mc68K
+ if (startswith(Magic, "\x50\xed\x55\xba"))
+ return file_magic::cuda_fatbinary;
+ LLVM_FALLTHROUGH;
+
case 0x4c: // 80386 Windows
case 0xc4: // ARMNT Windows
if (Magic[1] == 0x01)
@@ -221,6 +230,11 @@ file_magic llvm::identify_magic(StringRef Magic) {
if (startswith(Magic, "--- !tapi") || startswith(Magic, "---\narchs:"))
return file_magic::tapi_file;
break;
+
+ case 'D': // DirectX container file - DXBC
+ if (startswith(Magic, "DXBC"))
+ return file_magic::dxcontainer_object;
+ break;
default:
break;
diff --git a/llvm/lib/BinaryFormat/Wasm.cpp b/llvm/lib/BinaryFormat/Wasm.cpp
index 55efe31f2669..babeb12e49ef 100644
--- a/llvm/lib/BinaryFormat/Wasm.cpp
+++ b/llvm/lib/BinaryFormat/Wasm.cpp
@@ -8,7 +8,7 @@
#include "llvm/BinaryFormat/Wasm.h"
-std::string llvm::wasm::toString(wasm::WasmSymbolType Type) {
+llvm::StringRef llvm::wasm::toString(wasm::WasmSymbolType Type) {
switch (Type) {
case wasm::WASM_SYMBOL_TYPE_FUNCTION:
return "WASM_SYMBOL_TYPE_FUNCTION";
@@ -26,7 +26,7 @@ std::string llvm::wasm::toString(wasm::WasmSymbolType Type) {
llvm_unreachable("unknown symbol type");
}
-std::string llvm::wasm::relocTypetoString(uint32_t Type) {
+llvm::StringRef llvm::wasm::relocTypetoString(uint32_t Type) {
switch (Type) {
#define WASM_RELOC(NAME, VALUE) \
case VALUE: \
@@ -38,6 +38,31 @@ std::string llvm::wasm::relocTypetoString(uint32_t Type) {
}
}
+llvm::StringRef llvm::wasm::sectionTypeToString(uint32_t Type) {
+#define ECase(X) \
+ case wasm::WASM_SEC_##X: \
+ return #X;
+ switch (Type) {
+ ECase(CUSTOM);
+ ECase(TYPE);
+ ECase(IMPORT);
+ ECase(FUNCTION);
+ ECase(TABLE);
+ ECase(MEMORY);
+ ECase(GLOBAL);
+ ECase(EXPORT);
+ ECase(START);
+ ECase(ELEM);
+ ECase(CODE);
+ ECase(DATA);
+ ECase(DATACOUNT);
+ ECase(TAG);
+ default:
+ llvm_unreachable("unknown section type");
+ }
+#undef ECase
+}
+
bool llvm::wasm::relocTypeHasAddend(uint32_t Type) {
switch (Type) {
case R_WASM_MEMORY_ADDR_LEB:
diff --git a/llvm/lib/Bitcode/Reader/BitReader.cpp b/llvm/lib/Bitcode/Reader/BitReader.cpp
index 5ac893aef14e..da2cf0770ec5 100644
--- a/llvm/lib/Bitcode/Reader/BitReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitReader.cpp
@@ -12,7 +12,6 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/raw_ostream.h"
#include <cstring>
#include <string>
diff --git a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
index ffef35299981..1d16211c65bf 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
@@ -267,6 +267,7 @@ static Optional<const char *> GetCodeName(unsigned CodeID, unsigned BlockID,
STRINGIFY_CODE(FUNC_CODE, INST_STOREATOMIC)
STRINGIFY_CODE(FUNC_CODE, INST_CMPXCHG)
STRINGIFY_CODE(FUNC_CODE, INST_CALLBR)
+ STRINGIFY_CODE(FUNC_CODE, BLOCKADDR_USERS)
}
case bitc::VALUE_SYMTAB_BLOCK_ID:
switch (CodeID) {
@@ -735,7 +736,7 @@ Error BitcodeAnalyzer::parseBlock(unsigned BlockID, unsigned IndentLevel,
BlockStats.NumInstances++;
// BLOCKINFO is a special part of the stream.
- bool DumpRecords = O.hasValue();
+ bool DumpRecords = O.has_value();
if (BlockID == bitc::BLOCKINFO_BLOCK_ID) {
if (O && !O->DumpBlockinfo)
O->OS << Indent << "<BLOCKINFO_BLOCK/>\n";
@@ -864,7 +865,10 @@ Error BitcodeAnalyzer::parseBlock(unsigned BlockID, unsigned IndentLevel,
O->OS << " codeid=" << Code;
const BitCodeAbbrev *Abbv = nullptr;
if (Entry.ID != bitc::UNABBREV_RECORD) {
- Abbv = Stream.getAbbrev(Entry.ID);
+ Expected<const BitCodeAbbrev *> MaybeAbbv = Stream.getAbbrev(Entry.ID);
+ if (!MaybeAbbv)
+ return MaybeAbbv.takeError();
+ Abbv = MaybeAbbv.get();
O->OS << " abbrevid=" << Entry.ID;
}
@@ -894,13 +898,13 @@ Error BitcodeAnalyzer::parseBlock(unsigned BlockID, unsigned IndentLevel,
// If we found a module hash, let's verify that it matches!
if (BlockID == bitc::MODULE_BLOCK_ID && Code == bitc::MODULE_CODE_HASH &&
- CheckHash.hasValue()) {
+ CheckHash) {
if (Record.size() != 5)
O->OS << " (invalid)";
else {
// Recompute the hash and compare it to the one in the bitcode
SHA1 Hasher;
- StringRef Hash;
+ std::array<uint8_t, 20> Hash;
Hasher.update(*CheckHash);
{
int BlockSize = (CurrentRecordPos / 8) - BlockEntryPos;
@@ -908,14 +912,14 @@ Error BitcodeAnalyzer::parseBlock(unsigned BlockID, unsigned IndentLevel,
Hasher.update(ArrayRef<uint8_t>(Ptr, BlockSize));
Hash = Hasher.result();
}
- std::array<char, 20> RecordedHash;
+ std::array<uint8_t, 20> RecordedHash;
int Pos = 0;
for (auto &Val : Record) {
assert(!(Val >> 32) && "Unexpected high bits set");
support::endian::write32be(&RecordedHash[Pos], Val);
Pos += 4;
}
- if (Hash == StringRef(RecordedHash.data(), RecordedHash.size()))
+ if (Hash == RecordedHash)
O->OS << " (match)";
else
O->OS << " (!mismatch!)";
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 720ab560f988..93b07fc0db30 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -39,6 +39,7 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GVMaterializer.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalIFunc.h"
#include "llvm/IR/GlobalObject.h"
@@ -50,6 +51,8 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsAArch64.h"
+#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
@@ -91,6 +94,11 @@ static cl::opt<bool> PrintSummaryGUIDs(
cl::desc(
"Print the global id for each value when reading the module summary"));
+static cl::opt<bool> ExpandConstantExprs(
+ "expand-constant-exprs", cl::Hidden,
+ cl::desc(
+ "Expand constant expressions to instructions for testing purposes"));
+
namespace {
enum {
@@ -282,7 +290,7 @@ static Expected<bool> hasObjCCategoryInModule(BitstreamCursor &Stream) {
case bitc::MODULE_CODE_SECTIONNAME: { // SECTIONNAME: [strchr x N]
std::string S;
if (convertToString(Record, 0, S))
- return error("Invalid record");
+ return error("Invalid section name record");
// Check for the i386 and other (x86_64, ARM) conventions
if (S.find("__DATA,__objc_catlist") != std::string::npos ||
S.find("__OBJC,__category") != std::string::npos)
@@ -361,7 +369,7 @@ static Expected<std::string> readModuleTriple(BitstreamCursor &Stream) {
case bitc::MODULE_CODE_TRIPLE: { // TRIPLE: [strchr x N]
std::string S;
if (convertToString(Record, 0, S))
- return error("Invalid record");
+ return error("Invalid triple record");
Triple = S;
break;
}
@@ -429,7 +437,7 @@ protected:
std::pair<StringRef, ArrayRef<uint64_t>>
readNameFromStrtab(ArrayRef<uint64_t> Record);
- bool readBlockInfo();
+ Error readBlockInfo();
// Contains an arbitrary and optional string identifying the bitcode producer
std::string ProducerIdentification;
@@ -450,7 +458,7 @@ Error BitcodeReaderBase::error(const Twine &Message) {
Expected<unsigned>
BitcodeReaderBase::parseVersionRecord(ArrayRef<uint64_t> Record) {
if (Record.empty())
- return error("Invalid record");
+ return error("Invalid version record");
unsigned ModuleVersion = Record[0];
if (ModuleVersion > 2)
return error("Invalid value");
@@ -470,6 +478,90 @@ BitcodeReaderBase::readNameFromStrtab(ArrayRef<uint64_t> Record) {
namespace {
+/// This represents a constant expression or constant aggregate using a custom
+/// structure internal to the bitcode reader. Later, this structure will be
+/// expanded by materializeValue() either into a constant expression/aggregate,
+/// or into an instruction sequence at the point of use. This allows us to
+/// upgrade bitcode using constant expressions even if this kind of constant
+/// expression is no longer supported.
+class BitcodeConstant final : public Value,
+ TrailingObjects<BitcodeConstant, unsigned> {
+ friend TrailingObjects;
+
+ // Value subclass ID: Pick largest possible value to avoid any clashes.
+ static constexpr uint8_t SubclassID = 255;
+
+public:
+ // Opcodes used for non-expressions. This includes constant aggregates
+ // (struct, array, vector) that might need expansion, as well as non-leaf
+ // constants that don't need expansion (no_cfi, dso_local, blockaddress),
+ // but still go through BitcodeConstant to avoid different uselist orders
+ // between the two cases.
+ static constexpr uint8_t ConstantStructOpcode = 255;
+ static constexpr uint8_t ConstantArrayOpcode = 254;
+ static constexpr uint8_t ConstantVectorOpcode = 253;
+ static constexpr uint8_t NoCFIOpcode = 252;
+ static constexpr uint8_t DSOLocalEquivalentOpcode = 251;
+ static constexpr uint8_t BlockAddressOpcode = 250;
+ static constexpr uint8_t FirstSpecialOpcode = BlockAddressOpcode;
+
+ // Separate struct to make passing different number of parameters to
+ // BitcodeConstant::create() more convenient.
+ struct ExtraInfo {
+ uint8_t Opcode;
+ uint8_t Flags;
+ unsigned Extra;
+ Type *SrcElemTy;
+
+ ExtraInfo(uint8_t Opcode, uint8_t Flags = 0, unsigned Extra = 0,
+ Type *SrcElemTy = nullptr)
+ : Opcode(Opcode), Flags(Flags), Extra(Extra), SrcElemTy(SrcElemTy) {}
+ };
+
+ uint8_t Opcode;
+ uint8_t Flags;
+ unsigned NumOperands;
+ unsigned Extra; // GEP inrange index or blockaddress BB id.
+ Type *SrcElemTy; // GEP source element type.
+
+private:
+ BitcodeConstant(Type *Ty, const ExtraInfo &Info, ArrayRef<unsigned> OpIDs)
+ : Value(Ty, SubclassID), Opcode(Info.Opcode), Flags(Info.Flags),
+ NumOperands(OpIDs.size()), Extra(Info.Extra),
+ SrcElemTy(Info.SrcElemTy) {
+ std::uninitialized_copy(OpIDs.begin(), OpIDs.end(),
+ getTrailingObjects<unsigned>());
+ }
+
+ BitcodeConstant &operator=(const BitcodeConstant &) = delete;
+
+public:
+ static BitcodeConstant *create(BumpPtrAllocator &A, Type *Ty,
+ const ExtraInfo &Info,
+ ArrayRef<unsigned> OpIDs) {
+ void *Mem = A.Allocate(totalSizeToAlloc<unsigned>(OpIDs.size()),
+ alignof(BitcodeConstant));
+ return new (Mem) BitcodeConstant(Ty, Info, OpIDs);
+ }
+
+ static bool classof(const Value *V) { return V->getValueID() == SubclassID; }
+
+ ArrayRef<unsigned> getOperandIDs() const {
+ return makeArrayRef(getTrailingObjects<unsigned>(), NumOperands);
+ }
+
+ Optional<unsigned> getInRangeIndex() const {
+ assert(Opcode == Instruction::GetElementPtr);
+ if (Extra == (unsigned)-1)
+ return None;
+ return Extra;
+ }
+
+ const char *getOpcodeName() const {
+ return Instruction::getOpcodeName(Opcode);
+ }
+};
+
class BitcodeReader : public BitcodeReaderBase, public GVMaterializer {
LLVMContext &Context;
Module *TheModule = nullptr;
@@ -483,8 +575,23 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer {
std::vector<std::string> SectionTable;
std::vector<std::string> GCTable;
- std::vector<Type*> TypeList;
- DenseMap<Function *, FunctionType *> FunctionTypes;
+ std::vector<Type *> TypeList;
+ /// Track type IDs of contained types. Order is the same as the contained
+ /// types of a Type*. This is used during upgrades of typed pointer IR in
+ /// opaque pointer mode.
+ DenseMap<unsigned, SmallVector<unsigned, 1>> ContainedTypeIDs;
+ /// In some cases, we need to create a type ID for a type that was not
+ /// explicitly encoded in the bitcode, or we don't know about at the current
+ /// point. For example, a global may explicitly encode the value type ID, but
+ /// not have a type ID for the pointer to value type, for which we create a
+ /// virtual type ID instead. This map stores the new type ID that was created
+ /// for the given pair of Type and contained type ID.
+ DenseMap<std::pair<Type *, unsigned>, unsigned> VirtualTypeIDs;
+ DenseMap<Function *, unsigned> FunctionTypeIDs;
+ /// Allocator for BitcodeConstants. This should come before ValueList,
+ /// because the ValueList might hold ValueHandles to these constants, so
+ /// ValueList must be destroyed before Alloc.
+ BumpPtrAllocator Alloc;
BitcodeReaderValueList ValueList;
Optional<MetadataLoader> MDLoader;
std::vector<Comdat *> ComdatList;
@@ -544,6 +651,13 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer {
DenseMap<Function *, std::vector<BasicBlock *>> BasicBlockFwdRefs;
std::deque<Function *> BasicBlockFwdRefQueue;
+ /// These are Functions that contain BlockAddresses which refer a different
+ /// Function. When parsing the different Function, queue Functions that refer
+ /// to the different Function. Those Functions must be materialized in order
+ /// to resolve their BlockAddress constants before the different Function
+ /// gets moved into another Module.
+ std::vector<Function *> BackwardRefFunctions;
+
/// Indicates that we are using a new encoding for instruction operands where
/// most operands in the current FUNCTION_BLOCK are encoded relative to the
/// instruction number, for a more compact encoding. Some instruction
@@ -575,8 +689,8 @@ public:
/// Main interface to parsing a bitcode buffer.
/// \returns true if an error occurred.
Error parseBitcodeInto(
- Module *M, bool ShouldLazyLoadMetadata = false, bool IsImporting = false,
- DataLayoutCallbackTy DataLayoutCallback = [](StringRef) { return None; });
+ Module *M, bool ShouldLazyLoadMetadata, bool IsImporting,
+ DataLayoutCallbackTy DataLayoutCallback);
static uint64_t decodeSignRotatedValue(uint64_t V);
@@ -590,12 +704,21 @@ private:
StructType *createIdentifiedStructType(LLVMContext &Context, StringRef Name);
StructType *createIdentifiedStructType(LLVMContext &Context);
+ static constexpr unsigned InvalidTypeID = ~0u;
+
Type *getTypeByID(unsigned ID);
+ Type *getPtrElementTypeByID(unsigned ID);
+ unsigned getContainedTypeID(unsigned ID, unsigned Idx = 0);
+ unsigned getVirtualTypeID(Type *Ty, ArrayRef<unsigned> ContainedTypeIDs = {});
+
+ Expected<Value *> materializeValue(unsigned ValID, BasicBlock *InsertBB);
+ Expected<Constant *> getValueForInitializer(unsigned ID);
- Value *getFnValueByID(unsigned ID, Type *Ty) {
+ Value *getFnValueByID(unsigned ID, Type *Ty, unsigned TyID,
+ BasicBlock *ConstExprInsertBB) {
if (Ty && Ty->isMetadataTy())
return MetadataAsValue::get(Ty->getContext(), getFnMetadataByID(ID));
- return ValueList.getValueFwdRef(ID, Ty);
+ return ValueList.getValueFwdRef(ID, Ty, TyID, ConstExprInsertBB);
}
Metadata *getFnMetadataByID(unsigned ID) {
@@ -617,7 +740,8 @@ private:
/// Increment Slot past the number of slots used in the record. Return true on
/// failure.
bool getValueTypePair(const SmallVectorImpl<uint64_t> &Record, unsigned &Slot,
- unsigned InstNum, Value *&ResVal) {
+ unsigned InstNum, Value *&ResVal, unsigned &TypeID,
+ BasicBlock *ConstExprInsertBB) {
if (Slot == Record.size()) return true;
unsigned ValNo = (unsigned)Record[Slot++];
// Adjust the ValNo, if it was encoded relative to the InstNum.
@@ -626,14 +750,18 @@ private:
if (ValNo < InstNum) {
// If this is not a forward reference, just return the value we already
// have.
- ResVal = getFnValueByID(ValNo, nullptr);
+ TypeID = ValueList.getTypeID(ValNo);
+ ResVal = getFnValueByID(ValNo, nullptr, TypeID, ConstExprInsertBB);
+ assert((!ResVal || ResVal->getType() == getTypeByID(TypeID)) &&
+ "Incorrect type ID stored for value");
return ResVal == nullptr;
}
if (Slot == Record.size())
return true;
- unsigned TypeNo = (unsigned)Record[Slot++];
- ResVal = getFnValueByID(ValNo, getTypeByID(TypeNo));
+ TypeID = (unsigned)Record[Slot++];
+ ResVal = getFnValueByID(ValNo, getTypeByID(TypeID), TypeID,
+ ConstExprInsertBB);
return ResVal == nullptr;
}
@@ -641,8 +769,9 @@ private:
/// past the number of slots used by the value in the record. Return true if
/// there is an error.
bool popValue(const SmallVectorImpl<uint64_t> &Record, unsigned &Slot,
- unsigned InstNum, Type *Ty, Value *&ResVal) {
- if (getValue(Record, Slot, InstNum, Ty, ResVal))
+ unsigned InstNum, Type *Ty, unsigned TyID, Value *&ResVal,
+ BasicBlock *ConstExprInsertBB) {
+ if (getValue(Record, Slot, InstNum, Ty, TyID, ResVal, ConstExprInsertBB))
return true;
// All values currently take a single record slot.
++Slot;
@@ -651,38 +780,41 @@ private:
/// Like popValue, but does not increment the Slot number.
bool getValue(const SmallVectorImpl<uint64_t> &Record, unsigned Slot,
- unsigned InstNum, Type *Ty, Value *&ResVal) {
- ResVal = getValue(Record, Slot, InstNum, Ty);
+ unsigned InstNum, Type *Ty, unsigned TyID, Value *&ResVal,
+ BasicBlock *ConstExprInsertBB) {
+ ResVal = getValue(Record, Slot, InstNum, Ty, TyID, ConstExprInsertBB);
return ResVal == nullptr;
}
/// Version of getValue that returns ResVal directly, or 0 if there is an
/// error.
Value *getValue(const SmallVectorImpl<uint64_t> &Record, unsigned Slot,
- unsigned InstNum, Type *Ty) {
+ unsigned InstNum, Type *Ty, unsigned TyID,
+ BasicBlock *ConstExprInsertBB) {
if (Slot == Record.size()) return nullptr;
unsigned ValNo = (unsigned)Record[Slot];
// Adjust the ValNo, if it was encoded relative to the InstNum.
if (UseRelativeIDs)
ValNo = InstNum - ValNo;
- return getFnValueByID(ValNo, Ty);
+ return getFnValueByID(ValNo, Ty, TyID, ConstExprInsertBB);
}
/// Like getValue, but decodes signed VBRs.
Value *getValueSigned(const SmallVectorImpl<uint64_t> &Record, unsigned Slot,
- unsigned InstNum, Type *Ty) {
+ unsigned InstNum, Type *Ty, unsigned TyID,
+ BasicBlock *ConstExprInsertBB) {
if (Slot == Record.size()) return nullptr;
unsigned ValNo = (unsigned)decodeSignRotatedValue(Record[Slot]);
// Adjust the ValNo, if it was encoded relative to the InstNum.
if (UseRelativeIDs)
ValNo = InstNum - ValNo;
- return getFnValueByID(ValNo, Ty);
+ return getFnValueByID(ValNo, Ty, TyID, ConstExprInsertBB);
}
/// Upgrades old-style typeless byval/sret/inalloca attributes by adding the
/// corresponding argument's pointee type. Also upgrades intrinsics that now
/// require an elementtype attribute.
- void propagateAttributeTypes(CallBase *CB, ArrayRef<Type *> ArgsTys);
+ Error propagateAttributeTypes(CallBase *CB, ArrayRef<unsigned> ArgsTys);
/// Converts alignment exponent (i.e. power of two (or zero)) to the
/// corresponding alignment to use. If alignment is too large, returns
@@ -827,7 +959,10 @@ BitcodeReader::BitcodeReader(BitstreamCursor Stream, StringRef Strtab,
StringRef ProducerIdentification,
LLVMContext &Context)
: BitcodeReaderBase(std::move(Stream), Strtab), Context(Context),
- ValueList(Context, Stream.SizeInBytes()) {
+ ValueList(this->Stream.SizeInBytes(),
+ [this](unsigned ValID, BasicBlock *InsertBB) {
+ return materializeValue(ValID, InsertBB);
+ }) {
this->ProducerIdentification = std::string(ProducerIdentification);
}
@@ -859,6 +994,11 @@ Error BitcodeReader::materializeForwardReferencedFunctions() {
}
assert(BasicBlockFwdRefs.empty() && "Function missing from queue");
+ for (Function *F : BackwardRefFunctions)
+ if (Error Err = materialize(F))
+ return Err;
+ BackwardRefFunctions.clear();
+
// Reset state.
WillMaterializeAllForwardRefs = false;
return Error::success();
@@ -1176,6 +1316,324 @@ Type *BitcodeReader::getTypeByID(unsigned ID) {
return TypeList[ID] = createIdentifiedStructType(Context);
}
+unsigned BitcodeReader::getContainedTypeID(unsigned ID, unsigned Idx) {
+ auto It = ContainedTypeIDs.find(ID);
+ if (It == ContainedTypeIDs.end())
+ return InvalidTypeID;
+
+ if (Idx >= It->second.size())
+ return InvalidTypeID;
+
+ return It->second[Idx];
+}
+
+Type *BitcodeReader::getPtrElementTypeByID(unsigned ID) {
+ if (ID >= TypeList.size())
+ return nullptr;
+
+ Type *Ty = TypeList[ID];
+ if (!Ty->isPointerTy())
+ return nullptr;
+
+ Type *ElemTy = getTypeByID(getContainedTypeID(ID, 0));
+ if (!ElemTy)
+ return nullptr;
+
+ assert(cast<PointerType>(Ty)->isOpaqueOrPointeeTypeMatches(ElemTy) &&
+ "Incorrect element type");
+ return ElemTy;
+}
+
+unsigned BitcodeReader::getVirtualTypeID(Type *Ty,
+ ArrayRef<unsigned> ChildTypeIDs) {
+ unsigned ChildTypeID = ChildTypeIDs.empty() ? InvalidTypeID : ChildTypeIDs[0];
+ auto CacheKey = std::make_pair(Ty, ChildTypeID);
+ auto It = VirtualTypeIDs.find(CacheKey);
+ if (It != VirtualTypeIDs.end()) {
+ // The cmpxchg return value is the only place we need more than one
+ // contained type ID, however the second one will always be the same (i1),
+ // so we don't need to include it in the cache key. This asserts that the
+ // contained types are indeed as expected and there are no collisions.
+ assert((ChildTypeIDs.empty() ||
+ ContainedTypeIDs[It->second] == ChildTypeIDs) &&
+ "Incorrect cached contained type IDs");
+ return It->second;
+ }
+
+#ifndef NDEBUG
+ if (!Ty->isOpaquePointerTy()) {
+ assert(Ty->getNumContainedTypes() == ChildTypeIDs.size() &&
+ "Wrong number of contained types");
+ for (auto Pair : zip(Ty->subtypes(), ChildTypeIDs)) {
+ assert(std::get<0>(Pair) == getTypeByID(std::get<1>(Pair)) &&
+ "Incorrect contained type ID");
+ }
+ }
+#endif
+
+ unsigned TypeID = TypeList.size();
+ TypeList.push_back(Ty);
+ if (!ChildTypeIDs.empty())
+ append_range(ContainedTypeIDs[TypeID], ChildTypeIDs);
+ VirtualTypeIDs.insert({CacheKey, TypeID});
+ return TypeID;
+}
+
+static bool isConstExprSupported(uint8_t Opcode) {
+ // These are not real constant expressions, always consider them supported.
+ if (Opcode >= BitcodeConstant::FirstSpecialOpcode)
+ return true;
+
+ return !ExpandConstantExprs;
+}
+
+Expected<Value *> BitcodeReader::materializeValue(unsigned StartValID,
+ BasicBlock *InsertBB) {
+ // Quickly handle the case where there is no BitcodeConstant to resolve.
+ if (StartValID < ValueList.size() && ValueList[StartValID] &&
+ !isa<BitcodeConstant>(ValueList[StartValID]))
+ return ValueList[StartValID];
+
+ SmallDenseMap<unsigned, Value *> MaterializedValues;
+ SmallVector<unsigned> Worklist;
+ Worklist.push_back(StartValID);
+ while (!Worklist.empty()) {
+ unsigned ValID = Worklist.back();
+ if (MaterializedValues.count(ValID)) {
+ // Duplicate expression that was already handled.
+ Worklist.pop_back();
+ continue;
+ }
+
+ if (ValID >= ValueList.size() || !ValueList[ValID])
+ return error("Invalid value ID");
+
+ Value *V = ValueList[ValID];
+ auto *BC = dyn_cast<BitcodeConstant>(V);
+ if (!BC) {
+ MaterializedValues.insert({ValID, V});
+ Worklist.pop_back();
+ continue;
+ }
+
+ // Iterate in reverse, so values will get popped from the worklist in
+ // expected order.
+ SmallVector<Value *> Ops;
+ for (unsigned OpID : reverse(BC->getOperandIDs())) {
+ auto It = MaterializedValues.find(OpID);
+ if (It != MaterializedValues.end())
+ Ops.push_back(It->second);
+ else
+ Worklist.push_back(OpID);
+ }
+
+ // Some expressions have not been resolved yet, handle them first and then
+ // revisit this one.
+ if (Ops.size() != BC->getOperandIDs().size())
+ continue;
+ std::reverse(Ops.begin(), Ops.end());
+
+ SmallVector<Constant *> ConstOps;
+ for (Value *Op : Ops)
+ if (auto *C = dyn_cast<Constant>(Op))
+ ConstOps.push_back(C);
+
+ // Materialize as constant expression if possible.
+ if (isConstExprSupported(BC->Opcode) && ConstOps.size() == Ops.size()) {
+ Constant *C;
+ if (Instruction::isCast(BC->Opcode)) {
+ C = UpgradeBitCastExpr(BC->Opcode, ConstOps[0], BC->getType());
+ if (!C)
+ C = ConstantExpr::getCast(BC->Opcode, ConstOps[0], BC->getType());
+ } else if (Instruction::isUnaryOp(BC->Opcode)) {
+ C = ConstantExpr::get(BC->Opcode, ConstOps[0], BC->Flags);
+ } else if (Instruction::isBinaryOp(BC->Opcode)) {
+ C = ConstantExpr::get(BC->Opcode, ConstOps[0], ConstOps[1], BC->Flags);
+ } else {
+ switch (BC->Opcode) {
+ case BitcodeConstant::NoCFIOpcode: {
+ auto *GV = dyn_cast<GlobalValue>(ConstOps[0]);
+ if (!GV)
+ return error("no_cfi operand must be GlobalValue");
+ C = NoCFIValue::get(GV);
+ break;
+ }
+ case BitcodeConstant::DSOLocalEquivalentOpcode: {
+ auto *GV = dyn_cast<GlobalValue>(ConstOps[0]);
+ if (!GV)
+ return error("dso_local operand must be GlobalValue");
+ C = DSOLocalEquivalent::get(GV);
+ break;
+ }
+ case BitcodeConstant::BlockAddressOpcode: {
+ Function *Fn = dyn_cast<Function>(ConstOps[0]);
+ if (!Fn)
+ return error("blockaddress operand must be a function");
+
+ // If the function is already parsed we can insert the block address
+ // right away.
+ BasicBlock *BB;
+ unsigned BBID = BC->Extra;
+ if (!BBID)
+ // Invalid reference to entry block.
+ return error("Invalid ID");
+ if (!Fn->empty()) {
+ Function::iterator BBI = Fn->begin(), BBE = Fn->end();
+ for (size_t I = 0, E = BBID; I != E; ++I) {
+ if (BBI == BBE)
+ return error("Invalid ID");
+ ++BBI;
+ }
+ BB = &*BBI;
+ } else {
+ // Otherwise insert a placeholder and remember it so it can be
+ // inserted when the function is parsed.
+ auto &FwdBBs = BasicBlockFwdRefs[Fn];
+ if (FwdBBs.empty())
+ BasicBlockFwdRefQueue.push_back(Fn);
+ if (FwdBBs.size() < BBID + 1)
+ FwdBBs.resize(BBID + 1);
+ if (!FwdBBs[BBID])
+ FwdBBs[BBID] = BasicBlock::Create(Context);
+ BB = FwdBBs[BBID];
+ }
+ C = BlockAddress::get(Fn, BB);
+ break;
+ }
+ case BitcodeConstant::ConstantStructOpcode:
+ C = ConstantStruct::get(cast<StructType>(BC->getType()), ConstOps);
+ break;
+ case BitcodeConstant::ConstantArrayOpcode:
+ C = ConstantArray::get(cast<ArrayType>(BC->getType()), ConstOps);
+ break;
+ case BitcodeConstant::ConstantVectorOpcode:
+ C = ConstantVector::get(ConstOps);
+ break;
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ C = ConstantExpr::getCompare(BC->Flags, ConstOps[0], ConstOps[1]);
+ break;
+ case Instruction::GetElementPtr:
+ C = ConstantExpr::getGetElementPtr(
+ BC->SrcElemTy, ConstOps[0], makeArrayRef(ConstOps).drop_front(),
+ BC->Flags, BC->getInRangeIndex());
+ break;
+ case Instruction::Select:
+ C = ConstantExpr::getSelect(ConstOps[0], ConstOps[1], ConstOps[2]);
+ break;
+ case Instruction::ExtractElement:
+ C = ConstantExpr::getExtractElement(ConstOps[0], ConstOps[1]);
+ break;
+ case Instruction::InsertElement:
+ C = ConstantExpr::getInsertElement(ConstOps[0], ConstOps[1],
+ ConstOps[2]);
+ break;
+ case Instruction::ShuffleVector: {
+ SmallVector<int, 16> Mask;
+ ShuffleVectorInst::getShuffleMask(ConstOps[2], Mask);
+ C = ConstantExpr::getShuffleVector(ConstOps[0], ConstOps[1], Mask);
+ break;
+ }
+ default:
+ llvm_unreachable("Unhandled bitcode constant");
+ }
+ }
+
+ // Cache resolved constant.
+ ValueList.replaceValueWithoutRAUW(ValID, C);
+ MaterializedValues.insert({ValID, C});
+ Worklist.pop_back();
+ continue;
+ }
+
+ if (!InsertBB)
+ return error(Twine("Value referenced by initializer is an unsupported "
+ "constant expression of type ") +
+ BC->getOpcodeName());
+
+ // Materialize as instructions if necessary.
+ Instruction *I;
+ if (Instruction::isCast(BC->Opcode)) {
+ I = CastInst::Create((Instruction::CastOps)BC->Opcode, Ops[0],
+ BC->getType(), "constexpr", InsertBB);
+ } else if (Instruction::isUnaryOp(BC->Opcode)) {
+ I = UnaryOperator::Create((Instruction::UnaryOps)BC->Opcode, Ops[0],
+ "constexpr", InsertBB);
+ } else if (Instruction::isBinaryOp(BC->Opcode)) {
+ I = BinaryOperator::Create((Instruction::BinaryOps)BC->Opcode, Ops[0],
+ Ops[1], "constexpr", InsertBB);
+ if (isa<OverflowingBinaryOperator>(I)) {
+ if (BC->Flags & OverflowingBinaryOperator::NoSignedWrap)
+ I->setHasNoSignedWrap();
+ if (BC->Flags & OverflowingBinaryOperator::NoUnsignedWrap)
+ I->setHasNoUnsignedWrap();
+ }
+ if (isa<PossiblyExactOperator>(I) &&
+ (BC->Flags & PossiblyExactOperator::IsExact))
+ I->setIsExact();
+ } else {
+ switch (BC->Opcode) {
+ case BitcodeConstant::ConstantStructOpcode:
+ case BitcodeConstant::ConstantArrayOpcode:
+ case BitcodeConstant::ConstantVectorOpcode: {
+ Type *IdxTy = Type::getInt32Ty(BC->getContext());
+ Value *V = PoisonValue::get(BC->getType());
+ for (auto Pair : enumerate(Ops)) {
+ Value *Idx = ConstantInt::get(IdxTy, Pair.index());
+ V = InsertElementInst::Create(V, Pair.value(), Idx, "constexpr.ins",
+ InsertBB);
+ }
+ I = cast<Instruction>(V);
+ break;
+ }
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ I = CmpInst::Create((Instruction::OtherOps)BC->Opcode,
+ (CmpInst::Predicate)BC->Flags, Ops[0], Ops[1],
+ "constexpr", InsertBB);
+ break;
+ case Instruction::GetElementPtr:
+ I = GetElementPtrInst::Create(BC->SrcElemTy, Ops[0],
+ makeArrayRef(Ops).drop_front(),
+ "constexpr", InsertBB);
+ if (BC->Flags)
+ cast<GetElementPtrInst>(I)->setIsInBounds();
+ break;
+ case Instruction::Select:
+ I = SelectInst::Create(Ops[0], Ops[1], Ops[2], "constexpr", InsertBB);
+ break;
+ case Instruction::ExtractElement:
+ I = ExtractElementInst::Create(Ops[0], Ops[1], "constexpr", InsertBB);
+ break;
+ case Instruction::InsertElement:
+ I = InsertElementInst::Create(Ops[0], Ops[1], Ops[2], "constexpr",
+ InsertBB);
+ break;
+ case Instruction::ShuffleVector:
+ I = new ShuffleVectorInst(Ops[0], Ops[1], Ops[2], "constexpr",
+ InsertBB);
+ break;
+ default:
+ llvm_unreachable("Unhandled bitcode constant");
+ }
+ }
+
+ MaterializedValues.insert({ValID, I});
+ Worklist.pop_back();
+ }
+
+ return MaterializedValues[StartValID];
+}
+
+Expected<Constant *> BitcodeReader::getValueForInitializer(unsigned ID) {
+ Expected<Value *> MaybeV = materializeValue(ID, /* InsertBB */ nullptr);
+ if (!MaybeV)
+ return MaybeV.takeError();
+
+ // Result must be Constant if InsertBB is nullptr.
+ return cast<Constant>(MaybeV.get());
+}
+
StructType *BitcodeReader::createIdentifiedStructType(LLVMContext &Context,
StringRef Name) {
auto *Ret = StructType::create(Context, Name);
@@ -1346,7 +1804,7 @@ Error BitcodeReader::parseAttributeBlock() {
case bitc::PARAMATTR_CODE_ENTRY_OLD: // ENTRY: [paramidx0, attr0, ...]
// Deprecated, but still needed to read old bitcode files.
if (Record.size() & 1)
- return error("Invalid record");
+ return error("Invalid parameter attribute record");
for (unsigned i = 0, e = Record.size(); i != e; i += 2) {
AttrBuilder B(Context);
@@ -1437,8 +1895,14 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) {
return Attribute::Dereferenceable;
case bitc::ATTR_KIND_DEREFERENCEABLE_OR_NULL:
return Attribute::DereferenceableOrNull;
+ case bitc::ATTR_KIND_ALLOC_ALIGN:
+ return Attribute::AllocAlign;
+ case bitc::ATTR_KIND_ALLOC_KIND:
+ return Attribute::AllocKind;
case bitc::ATTR_KIND_ALLOC_SIZE:
return Attribute::AllocSize;
+ case bitc::ATTR_KIND_ALLOCATED_POINTER:
+ return Attribute::AllocatedPointer;
case bitc::ATTR_KIND_NO_RED_ZONE:
return Attribute::NoRedZone;
case bitc::ATTR_KIND_NO_RETURN:
@@ -1451,6 +1915,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) {
return Attribute::NoProfile;
case bitc::ATTR_KIND_NO_UNWIND:
return Attribute::NoUnwind;
+ case bitc::ATTR_KIND_NO_SANITIZE_BOUNDS:
+ return Attribute::NoSanitizeBounds;
case bitc::ATTR_KIND_NO_SANITIZE_COVERAGE:
return Attribute::NoSanitizeCoverage;
case bitc::ATTR_KIND_NULL_POINTER_IS_VALID:
@@ -1529,6 +1995,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) {
return Attribute::MustProgress;
case bitc::ATTR_KIND_HOT:
return Attribute::Hot;
+ case bitc::ATTR_KIND_PRESPLIT_COROUTINE:
+ return Attribute::PresplitCoroutine;
}
}
@@ -1586,7 +2054,7 @@ Error BitcodeReader::parseAttributeGroupBlock() {
break;
case bitc::PARAMATTR_GRP_CODE_ENTRY: { // ENTRY: [grpid, idx, a0, a1, ...]
if (Record.size() < 3)
- return error("Invalid record");
+ return error("Invalid grp record");
uint64_t GrpID = Record[0];
uint64_t Idx = Record[1]; // Index of the object this attribute refers to.
@@ -1607,6 +2075,8 @@ Error BitcodeReader::parseAttributeGroupBlock() {
B.addStructRetAttr(nullptr);
else if (Kind == Attribute::InAlloca)
B.addInAllocaAttr(nullptr);
+ else if (Kind == Attribute::UWTable)
+ B.addUWTableAttr(UWTableKind::Default);
else if (Attribute::isEnumAttrKind(Kind))
B.addAttribute(Kind);
else
@@ -1629,6 +2099,10 @@ Error BitcodeReader::parseAttributeGroupBlock() {
B.addAllocSizeAttrFromRawRepr(Record[++i]);
else if (Kind == Attribute::VScaleRange)
B.addVScaleRangeAttrFromRawRepr(Record[++i]);
+ else if (Kind == Attribute::UWTable)
+ B.addUWTableAttr(UWTableKind(Record[++i]));
+ else if (Kind == Attribute::AllocKind)
+ B.addAllocKindAttr(static_cast<AllocFnKind>(Record[++i]));
} else if (Record[i] == 3 || Record[i] == 4) { // String attribute
bool HasValue = (Record[i++] == 4);
SmallString<64> KindStr;
@@ -1647,9 +2121,7 @@ Error BitcodeReader::parseAttributeGroupBlock() {
}
B.addAttribute(KindStr.str(), ValStr.str());
- } else {
- assert((Record[i] == 5 || Record[i] == 6) &&
- "Invalid attribute group entry");
+ } else if (Record[i] == 5 || Record[i] == 6) {
bool HasType = Record[i] == 6;
Attribute::AttrKind Kind;
if (Error Err = parseAttrKind(Record[++i], &Kind))
@@ -1658,6 +2130,8 @@ Error BitcodeReader::parseAttributeGroupBlock() {
return error("Not a type attribute");
B.addTypeAttr(Kind, HasType ? getTypeByID(Record[++i]) : nullptr);
+ } else {
+ return error("Invalid attribute group entry");
}
}
@@ -1708,6 +2182,7 @@ Error BitcodeReader::parseTypeTableBody() {
// Read a record.
Record.clear();
Type *ResultTy = nullptr;
+ SmallVector<unsigned> ContainedIDs;
Expected<unsigned> MaybeRecord = Stream.readRecord(Entry.ID, Record);
if (!MaybeRecord)
return MaybeRecord.takeError();
@@ -1718,7 +2193,7 @@ Error BitcodeReader::parseTypeTableBody() {
// TYPE_CODE_NUMENTRY contains a count of the number of types in the
// type list. This allows us to reserve space.
if (Record.empty())
- return error("Invalid record");
+ return error("Invalid numentry record");
TypeList.resize(Record[0]);
continue;
case bitc::TYPE_CODE_VOID: // VOID
@@ -1762,7 +2237,7 @@ Error BitcodeReader::parseTypeTableBody() {
break;
case bitc::TYPE_CODE_INTEGER: { // INTEGER: [width]
if (Record.empty())
- return error("Invalid record");
+ return error("Invalid integer record");
uint64_t NumBits = Record[0];
if (NumBits < IntegerType::MIN_INT_BITS ||
@@ -1774,7 +2249,7 @@ Error BitcodeReader::parseTypeTableBody() {
case bitc::TYPE_CODE_POINTER: { // POINTER: [pointee type] or
// [pointee type, address space]
if (Record.empty())
- return error("Invalid record");
+ return error("Invalid pointer record");
unsigned AddressSpace = 0;
if (Record.size() == 2)
AddressSpace = Record[1];
@@ -1782,13 +2257,18 @@ Error BitcodeReader::parseTypeTableBody() {
if (!ResultTy ||
!PointerType::isValidElementType(ResultTy))
return error("Invalid type");
+ if (LLVM_UNLIKELY(!Context.hasSetOpaquePointersValue()))
+ Context.setOpaquePointers(false);
+ ContainedIDs.push_back(Record[0]);
ResultTy = PointerType::get(ResultTy, AddressSpace);
break;
}
case bitc::TYPE_CODE_OPAQUE_POINTER: { // OPAQUE_POINTER: [addrspace]
if (Record.size() != 1)
- return error("Invalid record");
- if (Context.supportsTypedPointers())
+ return error("Invalid opaque pointer record");
+ if (LLVM_UNLIKELY(!Context.hasSetOpaquePointersValue())) {
+ Context.setOpaquePointers(true);
+ } else if (Context.supportsTypedPointers())
return error(
"Opaque pointers are only supported in -opaque-pointers mode");
unsigned AddressSpace = Record[0];
@@ -1799,7 +2279,7 @@ Error BitcodeReader::parseTypeTableBody() {
// Deprecated, but still needed to read old bitcode files.
// FUNCTION: [vararg, attrid, retty, paramty x N]
if (Record.size() < 3)
- return error("Invalid record");
+ return error("Invalid function record");
SmallVector<Type*, 8> ArgTys;
for (unsigned i = 3, e = Record.size(); i != e; ++i) {
if (Type *T = getTypeByID(Record[i]))
@@ -1812,13 +2292,14 @@ Error BitcodeReader::parseTypeTableBody() {
if (!ResultTy || ArgTys.size() < Record.size()-3)
return error("Invalid type");
+ ContainedIDs.append(Record.begin() + 2, Record.end());
ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]);
break;
}
case bitc::TYPE_CODE_FUNCTION: {
// FUNCTION: [vararg, retty, paramty x N]
if (Record.size() < 2)
- return error("Invalid record");
+ return error("Invalid function record");
SmallVector<Type*, 8> ArgTys;
for (unsigned i = 2, e = Record.size(); i != e; ++i) {
if (Type *T = getTypeByID(Record[i])) {
@@ -1834,12 +2315,13 @@ Error BitcodeReader::parseTypeTableBody() {
if (!ResultTy || ArgTys.size() < Record.size()-2)
return error("Invalid type");
+ ContainedIDs.append(Record.begin() + 1, Record.end());
ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]);
break;
}
case bitc::TYPE_CODE_STRUCT_ANON: { // STRUCT: [ispacked, eltty x N]
if (Record.empty())
- return error("Invalid record");
+ return error("Invalid anon struct record");
SmallVector<Type*, 8> EltTys;
for (unsigned i = 1, e = Record.size(); i != e; ++i) {
if (Type *T = getTypeByID(Record[i]))
@@ -1849,17 +2331,18 @@ Error BitcodeReader::parseTypeTableBody() {
}
if (EltTys.size() != Record.size()-1)
return error("Invalid type");
+ ContainedIDs.append(Record.begin() + 1, Record.end());
ResultTy = StructType::get(Context, EltTys, Record[0]);
break;
}
case bitc::TYPE_CODE_STRUCT_NAME: // STRUCT_NAME: [strchr x N]
if (convertToString(Record, 0, TypeName))
- return error("Invalid record");
+ return error("Invalid struct name record");
continue;
case bitc::TYPE_CODE_STRUCT_NAMED: { // STRUCT: [ispacked, eltty x N]
if (Record.empty())
- return error("Invalid record");
+ return error("Invalid named struct record");
if (NumRecords >= TypeList.size())
return error("Invalid TYPE table");
@@ -1881,14 +2364,15 @@ Error BitcodeReader::parseTypeTableBody() {
break;
}
if (EltTys.size() != Record.size()-1)
- return error("Invalid record");
+ return error("Invalid named struct record");
Res->setBody(EltTys, Record[0]);
+ ContainedIDs.append(Record.begin() + 1, Record.end());
ResultTy = Res;
break;
}
case bitc::TYPE_CODE_OPAQUE: { // OPAQUE: []
if (Record.size() != 1)
- return error("Invalid record");
+ return error("Invalid opaque type record");
if (NumRecords >= TypeList.size())
return error("Invalid TYPE table");
@@ -1906,22 +2390,24 @@ Error BitcodeReader::parseTypeTableBody() {
}
case bitc::TYPE_CODE_ARRAY: // ARRAY: [numelts, eltty]
if (Record.size() < 2)
- return error("Invalid record");
+ return error("Invalid array type record");
ResultTy = getTypeByID(Record[1]);
if (!ResultTy || !ArrayType::isValidElementType(ResultTy))
return error("Invalid type");
+ ContainedIDs.push_back(Record[1]);
ResultTy = ArrayType::get(ResultTy, Record[0]);
break;
case bitc::TYPE_CODE_VECTOR: // VECTOR: [numelts, eltty] or
// [numelts, eltty, scalable]
if (Record.size() < 2)
- return error("Invalid record");
+ return error("Invalid vector type record");
if (Record[0] == 0)
return error("Invalid vector length");
ResultTy = getTypeByID(Record[1]);
if (!ResultTy || !VectorType::isValidElementType(ResultTy))
return error("Invalid type");
bool Scalable = Record.size() > 2 ? Record[2] : false;
+ ContainedIDs.push_back(Record[1]);
ResultTy = VectorType::get(ResultTy, Record[0], Scalable);
break;
}
@@ -1932,7 +2418,10 @@ Error BitcodeReader::parseTypeTableBody() {
return error(
"Invalid TYPE table: Only named structs can be forward referenced");
assert(ResultTy && "Didn't read a type?");
- TypeList[NumRecords++] = ResultTy;
+ TypeList[NumRecords] = ResultTy;
+ if (!ContainedIDs.empty())
+ ContainedTypeIDs[NumRecords] = std::move(ContainedIDs);
+ ++NumRecords;
}
}
@@ -1968,12 +2457,12 @@ Error BitcodeReader::parseOperandBundleTags() {
if (!MaybeRecord)
return MaybeRecord.takeError();
if (MaybeRecord.get() != bitc::OPERAND_BUNDLE_TAG)
- return error("Invalid record");
+ return error("Invalid operand bundle record");
// OPERAND_BUNDLE_TAG: [strchr x N]
BundleTags.emplace_back();
if (convertToString(Record, 0, BundleTags.back()))
- return error("Invalid record");
+ return error("Invalid operand bundle record");
Record.clear();
}
}
@@ -2012,11 +2501,11 @@ Error BitcodeReader::parseSyncScopeNames() {
if (!MaybeRecord)
return MaybeRecord.takeError();
if (MaybeRecord.get() != bitc::SYNC_SCOPE_NAME)
- return error("Invalid record");
+ return error("Invalid sync scope record");
SmallString<16> SSN;
if (convertToString(Record, 0, SSN))
- return error("Invalid record");
+ return error("Invalid sync scope record");
SSIDs.push_back(Context.getOrInsertSyncScopeID(SSN));
Record.clear();
@@ -2056,8 +2545,9 @@ static Expected<uint64_t> jumpToValueSymbolTable(uint64_t Offset,
Expected<BitstreamEntry> MaybeEntry = Stream.advance();
if (!MaybeEntry)
return MaybeEntry.takeError();
- assert(MaybeEntry.get().Kind == BitstreamEntry::SubBlock);
- assert(MaybeEntry.get().ID == bitc::VALUE_SYMTAB_BLOCK_ID);
+ if (MaybeEntry.get().Kind != BitstreamEntry::SubBlock ||
+ MaybeEntry.get().ID != bitc::VALUE_SYMTAB_BLOCK_ID)
+ return error("Expected value symbol table subblock");
return CurrentBit;
}
@@ -2107,11 +2597,15 @@ Error BitcodeReader::parseGlobalValueSymbolTable() {
if (!MaybeRecord)
return MaybeRecord.takeError();
switch (MaybeRecord.get()) {
- case bitc::VST_CODE_FNENTRY: // [valueid, offset]
+ case bitc::VST_CODE_FNENTRY: { // [valueid, offset]
+ unsigned ValueID = Record[0];
+ if (ValueID >= ValueList.size() || !ValueList[ValueID])
+ return error("Invalid value reference in symbol table");
setDeferredFunctionInfo(FuncBitcodeOffsetDelta,
- cast<Function>(ValueList[Record[0]]), Record);
+ cast<Function>(ValueList[ValueID]), Record);
break;
}
+ }
}
}
@@ -2213,10 +2707,10 @@ Error BitcodeReader::parseValueSymbolTable(uint64_t Offset) {
}
case bitc::VST_CODE_BBENTRY: {
if (convertToString(Record, 1, ValueName))
- return error("Invalid record");
+ return error("Invalid bbentry record");
BasicBlock *BB = getBasicBlock(Record[0]);
if (!BB)
- return error("Invalid record");
+ return error("Invalid bbentry record");
BB->setName(StringRef(ValueName.data(), ValueName.size()));
ValueName.clear();
@@ -2253,10 +2747,10 @@ Error BitcodeReader::resolveGlobalAndIndirectSymbolInits() {
// Not ready to resolve this yet, it requires something later in the file.
GlobalInits.push_back(GlobalInitWorklist.back());
} else {
- if (Constant *C = dyn_cast_or_null<Constant>(ValueList[ValID]))
- GlobalInitWorklist.back().first->setInitializer(C);
- else
- return error("Expected a constant");
+ Expected<Constant *> MaybeC = getValueForInitializer(ValID);
+ if (!MaybeC)
+ return MaybeC.takeError();
+ GlobalInitWorklist.back().first->setInitializer(MaybeC.get());
}
GlobalInitWorklist.pop_back();
}
@@ -2266,9 +2760,10 @@ Error BitcodeReader::resolveGlobalAndIndirectSymbolInits() {
if (ValID >= ValueList.size()) {
IndirectSymbolInits.push_back(IndirectSymbolInitWorklist.back());
} else {
- Constant *C = dyn_cast_or_null<Constant>(ValueList[ValID]);
- if (!C)
- return error("Expected a constant");
+ Expected<Constant *> MaybeC = getValueForInitializer(ValID);
+ if (!MaybeC)
+ return MaybeC.takeError();
+ Constant *C = MaybeC.get();
GlobalValue *GV = IndirectSymbolInitWorklist.back().first;
if (auto *GA = dyn_cast<GlobalAlias>(GV)) {
if (C->getType() != GV->getType())
@@ -2292,30 +2787,30 @@ Error BitcodeReader::resolveGlobalAndIndirectSymbolInits() {
if (Info.PersonalityFn) {
unsigned ValID = Info.PersonalityFn - 1;
if (ValID < ValueList.size()) {
- if (Constant *C = dyn_cast_or_null<Constant>(ValueList[ValID]))
- Info.F->setPersonalityFn(C);
- else
- return error("Expected a constant");
+ Expected<Constant *> MaybeC = getValueForInitializer(ValID);
+ if (!MaybeC)
+ return MaybeC.takeError();
+ Info.F->setPersonalityFn(MaybeC.get());
Info.PersonalityFn = 0;
}
}
if (Info.Prefix) {
unsigned ValID = Info.Prefix - 1;
if (ValID < ValueList.size()) {
- if (Constant *C = dyn_cast_or_null<Constant>(ValueList[ValID]))
- Info.F->setPrefixData(C);
- else
- return error("Expected a constant");
+ Expected<Constant *> MaybeC = getValueForInitializer(ValID);
+ if (!MaybeC)
+ return MaybeC.takeError();
+ Info.F->setPrefixData(MaybeC.get());
Info.Prefix = 0;
}
}
if (Info.Prologue) {
unsigned ValID = Info.Prologue - 1;
if (ValID < ValueList.size()) {
- if (Constant *C = dyn_cast_or_null<Constant>(ValueList[ValID]))
- Info.F->setPrologueData(C);
- else
- return error("Expected a constant");
+ Expected<Constant *> MaybeC = getValueForInitializer(ValID);
+ if (!MaybeC)
+ return MaybeC.takeError();
+ Info.F->setPrologueData(MaybeC.get());
Info.Prologue = 0;
}
}
@@ -2343,26 +2838,11 @@ Error BitcodeReader::parseConstants() {
// Read all the records for this value table.
Type *CurTy = Type::getInt32Ty(Context);
+ unsigned Int32TyID = getVirtualTypeID(CurTy);
+ unsigned CurTyID = Int32TyID;
+ Type *CurElemTy = nullptr;
unsigned NextCstNo = ValueList.size();
- struct DelayedShufTy {
- VectorType *OpTy;
- VectorType *RTy;
- uint64_t Op0Idx;
- uint64_t Op1Idx;
- uint64_t Op2Idx;
- unsigned CstNo;
- };
- std::vector<DelayedShufTy> DelayedShuffles;
- struct DelayedSelTy {
- Type *OpTy;
- uint64_t Op0Idx;
- uint64_t Op1Idx;
- uint64_t Op2Idx;
- unsigned CstNo;
- };
- std::vector<DelayedSelTy> DelayedSelectors;
-
while (true) {
Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
if (!MaybeEntry)
@@ -2374,57 +2854,8 @@ Error BitcodeReader::parseConstants() {
case BitstreamEntry::Error:
return error("Malformed block");
case BitstreamEntry::EndBlock:
- // Once all the constants have been read, go through and resolve forward
- // references.
- //
- // We have to treat shuffles specially because they don't have three
- // operands anymore. We need to convert the shuffle mask into an array,
- // and we can't convert a forward reference.
- for (auto &DelayedShuffle : DelayedShuffles) {
- VectorType *OpTy = DelayedShuffle.OpTy;
- VectorType *RTy = DelayedShuffle.RTy;
- uint64_t Op0Idx = DelayedShuffle.Op0Idx;
- uint64_t Op1Idx = DelayedShuffle.Op1Idx;
- uint64_t Op2Idx = DelayedShuffle.Op2Idx;
- uint64_t CstNo = DelayedShuffle.CstNo;
- Constant *Op0 = ValueList.getConstantFwdRef(Op0Idx, OpTy);
- Constant *Op1 = ValueList.getConstantFwdRef(Op1Idx, OpTy);
- Type *ShufTy =
- VectorType::get(Type::getInt32Ty(Context), RTy->getElementCount());
- Constant *Op2 = ValueList.getConstantFwdRef(Op2Idx, ShufTy);
- if (!ShuffleVectorInst::isValidOperands(Op0, Op1, Op2))
- return error("Invalid shufflevector operands");
- SmallVector<int, 16> Mask;
- ShuffleVectorInst::getShuffleMask(Op2, Mask);
- Value *V = ConstantExpr::getShuffleVector(Op0, Op1, Mask);
- ValueList.assignValue(V, CstNo);
- }
- for (auto &DelayedSelector : DelayedSelectors) {
- Type *OpTy = DelayedSelector.OpTy;
- Type *SelectorTy = Type::getInt1Ty(Context);
- uint64_t Op0Idx = DelayedSelector.Op0Idx;
- uint64_t Op1Idx = DelayedSelector.Op1Idx;
- uint64_t Op2Idx = DelayedSelector.Op2Idx;
- uint64_t CstNo = DelayedSelector.CstNo;
- Constant *Op1 = ValueList.getConstantFwdRef(Op1Idx, OpTy);
- Constant *Op2 = ValueList.getConstantFwdRef(Op2Idx, OpTy);
- // The selector might be an i1 or an <n x i1>
- // Get the type from the ValueList before getting a forward ref.
- if (VectorType *VTy = dyn_cast<VectorType>(OpTy)) {
- Value *V = ValueList[Op0Idx];
- assert(V);
- if (SelectorTy != V->getType())
- SelectorTy = VectorType::get(SelectorTy, VTy->getElementCount());
- }
- Constant *Op0 = ValueList.getConstantFwdRef(Op0Idx, SelectorTy);
- Value *V = ConstantExpr::getSelect(Op0, Op1, Op2);
- ValueList.assignValue(V, CstNo);
- }
-
if (NextCstNo != ValueList.size())
return error("Invalid constant reference");
-
- ValueList.resolveConstantForwardRefs();
return Error::success();
case BitstreamEntry::Record:
// The interesting case.
@@ -2448,12 +2879,14 @@ Error BitcodeReader::parseConstants() {
break;
case bitc::CST_CODE_SETTYPE: // SETTYPE: [typeid]
if (Record.empty())
- return error("Invalid record");
+ return error("Invalid settype record");
if (Record[0] >= TypeList.size() || !TypeList[Record[0]])
- return error("Invalid record");
+ return error("Invalid settype record");
if (TypeList[Record[0]] == VoidType)
return error("Invalid constant type");
- CurTy = TypeList[Record[0]];
+ CurTyID = Record[0];
+ CurTy = TypeList[CurTyID];
+ CurElemTy = getPtrElementTypeByID(CurTyID);
continue; // Skip the ValueList manipulation.
case bitc::CST_CODE_NULL: // NULL
if (CurTy->isVoidTy() || CurTy->isFunctionTy() || CurTy->isLabelTy())
@@ -2462,12 +2895,12 @@ Error BitcodeReader::parseConstants() {
break;
case bitc::CST_CODE_INTEGER: // INTEGER: [intval]
if (!CurTy->isIntegerTy() || Record.empty())
- return error("Invalid record");
+ return error("Invalid integer const record");
V = ConstantInt::get(CurTy, decodeSignRotatedValue(Record[0]));
break;
case bitc::CST_CODE_WIDE_INTEGER: {// WIDE_INTEGER: [n x intval]
if (!CurTy->isIntegerTy() || Record.empty())
- return error("Invalid record");
+ return error("Invalid wide integer const record");
APInt VInt =
readWideAPInt(Record, cast<IntegerType>(CurTy)->getBitWidth());
@@ -2477,7 +2910,7 @@ Error BitcodeReader::parseConstants() {
}
case bitc::CST_CODE_FLOAT: { // FLOAT: [fpval]
if (Record.empty())
- return error("Invalid record");
+ return error("Invalid float const record");
if (CurTy->isHalfTy())
V = ConstantFP::get(Context, APFloat(APFloat::IEEEhalf(),
APInt(16, (uint16_t)Record[0])));
@@ -2510,26 +2943,22 @@ Error BitcodeReader::parseConstants() {
case bitc::CST_CODE_AGGREGATE: {// AGGREGATE: [n x value number]
if (Record.empty())
- return error("Invalid record");
+ return error("Invalid aggregate record");
unsigned Size = Record.size();
- SmallVector<Constant*, 16> Elts;
-
- if (StructType *STy = dyn_cast<StructType>(CurTy)) {
- for (unsigned i = 0; i != Size; ++i)
- Elts.push_back(ValueList.getConstantFwdRef(Record[i],
- STy->getElementType(i)));
- V = ConstantStruct::get(STy, Elts);
- } else if (ArrayType *ATy = dyn_cast<ArrayType>(CurTy)) {
- Type *EltTy = ATy->getElementType();
- for (unsigned i = 0; i != Size; ++i)
- Elts.push_back(ValueList.getConstantFwdRef(Record[i], EltTy));
- V = ConstantArray::get(ATy, Elts);
- } else if (VectorType *VTy = dyn_cast<VectorType>(CurTy)) {
- Type *EltTy = VTy->getElementType();
- for (unsigned i = 0; i != Size; ++i)
- Elts.push_back(ValueList.getConstantFwdRef(Record[i], EltTy));
- V = ConstantVector::get(Elts);
+ SmallVector<unsigned, 16> Elts;
+ for (unsigned i = 0; i != Size; ++i)
+ Elts.push_back(Record[i]);
+
+ if (isa<StructType>(CurTy)) {
+ V = BitcodeConstant::create(
+ Alloc, CurTy, BitcodeConstant::ConstantStructOpcode, Elts);
+ } else if (isa<ArrayType>(CurTy)) {
+ V = BitcodeConstant::create(Alloc, CurTy,
+ BitcodeConstant::ConstantArrayOpcode, Elts);
+ } else if (isa<VectorType>(CurTy)) {
+ V = BitcodeConstant::create(
+ Alloc, CurTy, BitcodeConstant::ConstantVectorOpcode, Elts);
} else {
V = UndefValue::get(CurTy);
}
@@ -2538,7 +2967,7 @@ Error BitcodeReader::parseConstants() {
case bitc::CST_CODE_STRING: // STRING: [values]
case bitc::CST_CODE_CSTRING: { // CSTRING: [values]
if (Record.empty())
- return error("Invalid record");
+ return error("Invalid string record");
SmallString<16> Elts(Record.begin(), Record.end());
V = ConstantDataArray::getString(Context, Elts,
@@ -2547,7 +2976,7 @@ Error BitcodeReader::parseConstants() {
}
case bitc::CST_CODE_DATA: {// DATA: [n x value]
if (Record.empty())
- return error("Invalid record");
+ return error("Invalid data record");
Type *EltTy;
if (auto *Array = dyn_cast<ArrayType>(CurTy))
@@ -2609,27 +3038,23 @@ Error BitcodeReader::parseConstants() {
}
case bitc::CST_CODE_CE_UNOP: { // CE_UNOP: [opcode, opval]
if (Record.size() < 2)
- return error("Invalid record");
+ return error("Invalid unary op constexpr record");
int Opc = getDecodedUnaryOpcode(Record[0], CurTy);
if (Opc < 0) {
V = UndefValue::get(CurTy); // Unknown unop.
} else {
- Constant *LHS = ValueList.getConstantFwdRef(Record[1], CurTy);
- unsigned Flags = 0;
- V = ConstantExpr::get(Opc, LHS, Flags);
+ V = BitcodeConstant::create(Alloc, CurTy, Opc, (unsigned)Record[1]);
}
break;
}
case bitc::CST_CODE_CE_BINOP: { // CE_BINOP: [opcode, opval, opval]
if (Record.size() < 3)
- return error("Invalid record");
+ return error("Invalid binary op constexpr record");
int Opc = getDecodedBinaryOpcode(Record[0], CurTy);
if (Opc < 0) {
V = UndefValue::get(CurTy); // Unknown binop.
} else {
- Constant *LHS = ValueList.getConstantFwdRef(Record[1], CurTy);
- Constant *RHS = ValueList.getConstantFwdRef(Record[2], CurTy);
- unsigned Flags = 0;
+ uint8_t Flags = 0;
if (Record.size() >= 4) {
if (Opc == Instruction::Add ||
Opc == Instruction::Sub ||
@@ -2647,23 +3072,23 @@ Error BitcodeReader::parseConstants() {
Flags |= SDivOperator::IsExact;
}
}
- V = ConstantExpr::get(Opc, LHS, RHS, Flags);
+ V = BitcodeConstant::create(Alloc, CurTy, {(uint8_t)Opc, Flags},
+ {(unsigned)Record[1], (unsigned)Record[2]});
}
break;
}
case bitc::CST_CODE_CE_CAST: { // CE_CAST: [opcode, opty, opval]
if (Record.size() < 3)
- return error("Invalid record");
+ return error("Invalid cast constexpr record");
int Opc = getDecodedCastOpcode(Record[0]);
if (Opc < 0) {
V = UndefValue::get(CurTy); // Unknown cast.
} else {
- Type *OpTy = getTypeByID(Record[1]);
+ unsigned OpTyID = Record[1];
+ Type *OpTy = getTypeByID(OpTyID);
if (!OpTy)
- return error("Invalid record");
- Constant *Op = ValueList.getConstantFwdRef(Record[2], OpTy);
- V = UpgradeBitCastExpr(Opc, Op, CurTy);
- if (!V) V = ConstantExpr::getCast(Opc, Op, CurTy);
+ return error("Invalid cast constexpr record");
+ V = BitcodeConstant::create(Alloc, CurTy, Opc, (unsigned)Record[2]);
}
break;
}
@@ -2671,6 +3096,8 @@ Error BitcodeReader::parseConstants() {
case bitc::CST_CODE_CE_GEP: // [ty, n x operands]
case bitc::CST_CODE_CE_GEP_WITH_INRANGE_INDEX: { // [ty, flags, n x
// operands]
+ if (Record.size() < 2)
+ return error("Constant GEP record must have at least two elements");
unsigned OpNum = 0;
Type *PointeeType = nullptr;
if (BitCode == bitc::CST_CODE_CE_GEP_WITH_INRANGE_INDEX ||
@@ -2686,180 +3113,190 @@ Error BitcodeReader::parseConstants() {
} else if (BitCode == bitc::CST_CODE_CE_INBOUNDS_GEP)
InBounds = true;
- SmallVector<Constant*, 16> Elts;
- Type *Elt0FullTy = nullptr;
+ SmallVector<unsigned, 16> Elts;
+ unsigned BaseTypeID = Record[OpNum];
while (OpNum != Record.size()) {
- if (!Elt0FullTy)
- Elt0FullTy = getTypeByID(Record[OpNum]);
- Type *ElTy = getTypeByID(Record[OpNum++]);
+ unsigned ElTyID = Record[OpNum++];
+ Type *ElTy = getTypeByID(ElTyID);
if (!ElTy)
- return error("Invalid record");
- Elts.push_back(ValueList.getConstantFwdRef(Record[OpNum++], ElTy));
+ return error("Invalid getelementptr constexpr record");
+ Elts.push_back(Record[OpNum++]);
}
if (Elts.size() < 1)
return error("Invalid gep with no operands");
- PointerType *OrigPtrTy = cast<PointerType>(Elt0FullTy->getScalarType());
- if (!PointeeType)
- PointeeType = OrigPtrTy->getPointerElementType();
- else if (!OrigPtrTy->isOpaqueOrPointeeTypeMatches(PointeeType))
+ Type *BaseType = getTypeByID(BaseTypeID);
+ if (isa<VectorType>(BaseType)) {
+ BaseTypeID = getContainedTypeID(BaseTypeID, 0);
+ BaseType = getTypeByID(BaseTypeID);
+ }
+
+ PointerType *OrigPtrTy = dyn_cast_or_null<PointerType>(BaseType);
+ if (!OrigPtrTy)
+ return error("GEP base operand must be pointer or vector of pointer");
+
+ if (!PointeeType) {
+ PointeeType = getPtrElementTypeByID(BaseTypeID);
+ if (!PointeeType)
+ return error("Missing element type for old-style constant GEP");
+ } else if (!OrigPtrTy->isOpaqueOrPointeeTypeMatches(PointeeType))
return error("Explicit gep operator type does not match pointee type "
"of pointer operand");
- ArrayRef<Constant *> Indices(Elts.begin() + 1, Elts.end());
- V = ConstantExpr::getGetElementPtr(PointeeType, Elts[0], Indices,
- InBounds, InRangeIndex);
+ V = BitcodeConstant::create(Alloc, CurTy,
+ {Instruction::GetElementPtr, InBounds,
+ InRangeIndex.value_or(-1), PointeeType},
+ Elts);
break;
}
case bitc::CST_CODE_CE_SELECT: { // CE_SELECT: [opval#, opval#, opval#]
if (Record.size() < 3)
- return error("Invalid record");
+ return error("Invalid select constexpr record");
- DelayedSelectors.push_back(
- {CurTy, Record[0], Record[1], Record[2], NextCstNo});
- (void)ValueList.getConstantFwdRef(NextCstNo, CurTy);
- ++NextCstNo;
- continue;
+ V = BitcodeConstant::create(
+ Alloc, CurTy, Instruction::Select,
+ {(unsigned)Record[0], (unsigned)Record[1], (unsigned)Record[2]});
+ break;
}
case bitc::CST_CODE_CE_EXTRACTELT
: { // CE_EXTRACTELT: [opty, opval, opty, opval]
if (Record.size() < 3)
- return error("Invalid record");
+ return error("Invalid extractelement constexpr record");
+ unsigned OpTyID = Record[0];
VectorType *OpTy =
- dyn_cast_or_null<VectorType>(getTypeByID(Record[0]));
+ dyn_cast_or_null<VectorType>(getTypeByID(OpTyID));
if (!OpTy)
- return error("Invalid record");
- Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
- Constant *Op1 = nullptr;
+ return error("Invalid extractelement constexpr record");
+ unsigned IdxRecord;
if (Record.size() == 4) {
- Type *IdxTy = getTypeByID(Record[2]);
+ unsigned IdxTyID = Record[2];
+ Type *IdxTy = getTypeByID(IdxTyID);
if (!IdxTy)
- return error("Invalid record");
- Op1 = ValueList.getConstantFwdRef(Record[3], IdxTy);
+ return error("Invalid extractelement constexpr record");
+ IdxRecord = Record[3];
} else {
// Deprecated, but still needed to read old bitcode files.
- Op1 = ValueList.getConstantFwdRef(Record[2], Type::getInt32Ty(Context));
+ IdxRecord = Record[2];
}
- if (!Op1)
- return error("Invalid record");
- V = ConstantExpr::getExtractElement(Op0, Op1);
+ V = BitcodeConstant::create(Alloc, CurTy, Instruction::ExtractElement,
+ {(unsigned)Record[1], IdxRecord});
break;
}
case bitc::CST_CODE_CE_INSERTELT
: { // CE_INSERTELT: [opval, opval, opty, opval]
VectorType *OpTy = dyn_cast<VectorType>(CurTy);
if (Record.size() < 3 || !OpTy)
- return error("Invalid record");
- Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy);
- Constant *Op1 = ValueList.getConstantFwdRef(Record[1],
- OpTy->getElementType());
- Constant *Op2 = nullptr;
+ return error("Invalid insertelement constexpr record");
+ unsigned IdxRecord;
if (Record.size() == 4) {
- Type *IdxTy = getTypeByID(Record[2]);
+ unsigned IdxTyID = Record[2];
+ Type *IdxTy = getTypeByID(IdxTyID);
if (!IdxTy)
- return error("Invalid record");
- Op2 = ValueList.getConstantFwdRef(Record[3], IdxTy);
+ return error("Invalid insertelement constexpr record");
+ IdxRecord = Record[3];
} else {
// Deprecated, but still needed to read old bitcode files.
- Op2 = ValueList.getConstantFwdRef(Record[2], Type::getInt32Ty(Context));
+ IdxRecord = Record[2];
}
- if (!Op2)
- return error("Invalid record");
- V = ConstantExpr::getInsertElement(Op0, Op1, Op2);
+ V = BitcodeConstant::create(
+ Alloc, CurTy, Instruction::InsertElement,
+ {(unsigned)Record[0], (unsigned)Record[1], IdxRecord});
break;
}
case bitc::CST_CODE_CE_SHUFFLEVEC: { // CE_SHUFFLEVEC: [opval, opval, opval]
VectorType *OpTy = dyn_cast<VectorType>(CurTy);
if (Record.size() < 3 || !OpTy)
- return error("Invalid record");
- DelayedShuffles.push_back(
- {OpTy, OpTy, Record[0], Record[1], Record[2], NextCstNo});
- ++NextCstNo;
- continue;
+ return error("Invalid shufflevector constexpr record");
+ V = BitcodeConstant::create(
+ Alloc, CurTy, Instruction::ShuffleVector,
+ {(unsigned)Record[0], (unsigned)Record[1], (unsigned)Record[2]});
+ break;
}
case bitc::CST_CODE_CE_SHUFVEC_EX: { // [opty, opval, opval, opval]
VectorType *RTy = dyn_cast<VectorType>(CurTy);
VectorType *OpTy =
dyn_cast_or_null<VectorType>(getTypeByID(Record[0]));
if (Record.size() < 4 || !RTy || !OpTy)
- return error("Invalid record");
- DelayedShuffles.push_back(
- {OpTy, RTy, Record[1], Record[2], Record[3], NextCstNo});
- ++NextCstNo;
- continue;
+ return error("Invalid shufflevector constexpr record");
+ V = BitcodeConstant::create(
+ Alloc, CurTy, Instruction::ShuffleVector,
+ {(unsigned)Record[1], (unsigned)Record[2], (unsigned)Record[3]});
+ break;
}
case bitc::CST_CODE_CE_CMP: { // CE_CMP: [opty, opval, opval, pred]
if (Record.size() < 4)
- return error("Invalid record");
- Type *OpTy = getTypeByID(Record[0]);
+ return error("Invalid cmp constexpt record");
+ unsigned OpTyID = Record[0];
+ Type *OpTy = getTypeByID(OpTyID);
if (!OpTy)
- return error("Invalid record");
- Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
- Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy);
-
- if (OpTy->isFPOrFPVectorTy())
- V = ConstantExpr::getFCmp(Record[3], Op0, Op1);
- else
- V = ConstantExpr::getICmp(Record[3], Op0, Op1);
+ return error("Invalid cmp constexpr record");
+ V = BitcodeConstant::create(
+ Alloc, CurTy,
+ {(uint8_t)(OpTy->isFPOrFPVectorTy() ? Instruction::FCmp
+ : Instruction::ICmp),
+ (uint8_t)Record[3]},
+ {(unsigned)Record[1], (unsigned)Record[2]});
break;
}
// This maintains backward compatibility, pre-asm dialect keywords.
// Deprecated, but still needed to read old bitcode files.
case bitc::CST_CODE_INLINEASM_OLD: {
if (Record.size() < 2)
- return error("Invalid record");
+ return error("Invalid inlineasm record");
std::string AsmStr, ConstrStr;
bool HasSideEffects = Record[0] & 1;
bool IsAlignStack = Record[0] >> 1;
unsigned AsmStrSize = Record[1];
if (2+AsmStrSize >= Record.size())
- return error("Invalid record");
+ return error("Invalid inlineasm record");
unsigned ConstStrSize = Record[2+AsmStrSize];
if (3+AsmStrSize+ConstStrSize > Record.size())
- return error("Invalid record");
+ return error("Invalid inlineasm record");
for (unsigned i = 0; i != AsmStrSize; ++i)
AsmStr += (char)Record[2+i];
for (unsigned i = 0; i != ConstStrSize; ++i)
ConstrStr += (char)Record[3+AsmStrSize+i];
UpgradeInlineAsmString(&AsmStr);
- // FIXME: support upgrading in opaque pointers mode.
- V = InlineAsm::get(cast<FunctionType>(CurTy->getPointerElementType()),
- AsmStr, ConstrStr, HasSideEffects, IsAlignStack);
+ if (!CurElemTy)
+ return error("Missing element type for old-style inlineasm");
+ V = InlineAsm::get(cast<FunctionType>(CurElemTy), AsmStr, ConstrStr,
+ HasSideEffects, IsAlignStack);
break;
}
// This version adds support for the asm dialect keywords (e.g.,
// inteldialect).
case bitc::CST_CODE_INLINEASM_OLD2: {
if (Record.size() < 2)
- return error("Invalid record");
+ return error("Invalid inlineasm record");
std::string AsmStr, ConstrStr;
bool HasSideEffects = Record[0] & 1;
bool IsAlignStack = (Record[0] >> 1) & 1;
unsigned AsmDialect = Record[0] >> 2;
unsigned AsmStrSize = Record[1];
if (2+AsmStrSize >= Record.size())
- return error("Invalid record");
+ return error("Invalid inlineasm record");
unsigned ConstStrSize = Record[2+AsmStrSize];
if (3+AsmStrSize+ConstStrSize > Record.size())
- return error("Invalid record");
+ return error("Invalid inlineasm record");
for (unsigned i = 0; i != AsmStrSize; ++i)
AsmStr += (char)Record[2+i];
for (unsigned i = 0; i != ConstStrSize; ++i)
ConstrStr += (char)Record[3+AsmStrSize+i];
UpgradeInlineAsmString(&AsmStr);
- // FIXME: support upgrading in opaque pointers mode.
- V = InlineAsm::get(cast<FunctionType>(CurTy->getPointerElementType()),
- AsmStr, ConstrStr, HasSideEffects, IsAlignStack,
+ if (!CurElemTy)
+ return error("Missing element type for old-style inlineasm");
+ V = InlineAsm::get(cast<FunctionType>(CurElemTy), AsmStr, ConstrStr,
+ HasSideEffects, IsAlignStack,
InlineAsm::AsmDialect(AsmDialect));
break;
}
// This version adds support for the unwind keyword.
case bitc::CST_CODE_INLINEASM_OLD3: {
if (Record.size() < 2)
- return error("Invalid record");
+ return error("Invalid inlineasm record");
unsigned OpNum = 0;
std::string AsmStr, ConstrStr;
bool HasSideEffects = Record[OpNum] & 1;
@@ -2870,10 +3307,10 @@ Error BitcodeReader::parseConstants() {
unsigned AsmStrSize = Record[OpNum];
++OpNum;
if (OpNum + AsmStrSize >= Record.size())
- return error("Invalid record");
+ return error("Invalid inlineasm record");
unsigned ConstStrSize = Record[OpNum + AsmStrSize];
if (OpNum + 1 + AsmStrSize + ConstStrSize > Record.size())
- return error("Invalid record");
+ return error("Invalid inlineasm record");
for (unsigned i = 0; i != AsmStrSize; ++i)
AsmStr += (char)Record[OpNum + i];
@@ -2881,21 +3318,22 @@ Error BitcodeReader::parseConstants() {
for (unsigned i = 0; i != ConstStrSize; ++i)
ConstrStr += (char)Record[OpNum + AsmStrSize + i];
UpgradeInlineAsmString(&AsmStr);
- // FIXME: support upgrading in opaque pointers mode.
- V = InlineAsm::get(cast<FunctionType>(CurTy->getPointerElementType()),
- AsmStr, ConstrStr, HasSideEffects, IsAlignStack,
+ if (!CurElemTy)
+ return error("Missing element type for old-style inlineasm");
+ V = InlineAsm::get(cast<FunctionType>(CurElemTy), AsmStr, ConstrStr,
+ HasSideEffects, IsAlignStack,
InlineAsm::AsmDialect(AsmDialect), CanThrow);
break;
}
// This version adds explicit function type.
case bitc::CST_CODE_INLINEASM: {
if (Record.size() < 3)
- return error("Invalid record");
+ return error("Invalid inlineasm record");
unsigned OpNum = 0;
auto *FnTy = dyn_cast_or_null<FunctionType>(getTypeByID(Record[OpNum]));
++OpNum;
if (!FnTy)
- return error("Invalid record");
+ return error("Invalid inlineasm record");
std::string AsmStr, ConstrStr;
bool HasSideEffects = Record[OpNum] & 1;
bool IsAlignStack = (Record[OpNum] >> 1) & 1;
@@ -2905,10 +3343,10 @@ Error BitcodeReader::parseConstants() {
unsigned AsmStrSize = Record[OpNum];
++OpNum;
if (OpNum + AsmStrSize >= Record.size())
- return error("Invalid record");
+ return error("Invalid inlineasm record");
unsigned ConstStrSize = Record[OpNum + AsmStrSize];
if (OpNum + 1 + AsmStrSize + ConstStrSize > Record.size())
- return error("Invalid record");
+ return error("Invalid inlineasm record");
for (unsigned i = 0; i != AsmStrSize; ++i)
AsmStr += (char)Record[OpNum + i];
@@ -2922,75 +3360,44 @@ Error BitcodeReader::parseConstants() {
}
case bitc::CST_CODE_BLOCKADDRESS:{
if (Record.size() < 3)
- return error("Invalid record");
- Type *FnTy = getTypeByID(Record[0]);
+ return error("Invalid blockaddress record");
+ unsigned FnTyID = Record[0];
+ Type *FnTy = getTypeByID(FnTyID);
if (!FnTy)
- return error("Invalid record");
- Function *Fn =
- dyn_cast_or_null<Function>(ValueList.getConstantFwdRef(Record[1],FnTy));
- if (!Fn)
- return error("Invalid record");
-
- // If the function is already parsed we can insert the block address right
- // away.
- BasicBlock *BB;
- unsigned BBID = Record[2];
- if (!BBID)
- // Invalid reference to entry block.
- return error("Invalid ID");
- if (!Fn->empty()) {
- Function::iterator BBI = Fn->begin(), BBE = Fn->end();
- for (size_t I = 0, E = BBID; I != E; ++I) {
- if (BBI == BBE)
- return error("Invalid ID");
- ++BBI;
- }
- BB = &*BBI;
- } else {
- // Otherwise insert a placeholder and remember it so it can be inserted
- // when the function is parsed.
- auto &FwdBBs = BasicBlockFwdRefs[Fn];
- if (FwdBBs.empty())
- BasicBlockFwdRefQueue.push_back(Fn);
- if (FwdBBs.size() < BBID + 1)
- FwdBBs.resize(BBID + 1);
- if (!FwdBBs[BBID])
- FwdBBs[BBID] = BasicBlock::Create(Context);
- BB = FwdBBs[BBID];
- }
- V = BlockAddress::get(Fn, BB);
+ return error("Invalid blockaddress record");
+ V = BitcodeConstant::create(
+ Alloc, CurTy,
+ {BitcodeConstant::BlockAddressOpcode, 0, (unsigned)Record[2]},
+ Record[1]);
break;
}
case bitc::CST_CODE_DSO_LOCAL_EQUIVALENT: {
if (Record.size() < 2)
- return error("Invalid record");
- Type *GVTy = getTypeByID(Record[0]);
+ return error("Invalid dso_local record");
+ unsigned GVTyID = Record[0];
+ Type *GVTy = getTypeByID(GVTyID);
if (!GVTy)
- return error("Invalid record");
- GlobalValue *GV = dyn_cast_or_null<GlobalValue>(
- ValueList.getConstantFwdRef(Record[1], GVTy));
- if (!GV)
- return error("Invalid record");
-
- V = DSOLocalEquivalent::get(GV);
+ return error("Invalid dso_local record");
+ V = BitcodeConstant::create(
+ Alloc, CurTy, BitcodeConstant::DSOLocalEquivalentOpcode, Record[1]);
break;
}
case bitc::CST_CODE_NO_CFI_VALUE: {
if (Record.size() < 2)
- return error("Invalid record");
- Type *GVTy = getTypeByID(Record[0]);
+ return error("Invalid no_cfi record");
+ unsigned GVTyID = Record[0];
+ Type *GVTy = getTypeByID(GVTyID);
if (!GVTy)
- return error("Invalid record");
- GlobalValue *GV = dyn_cast_or_null<GlobalValue>(
- ValueList.getConstantFwdRef(Record[1], GVTy));
- if (!GV)
- return error("Invalid record");
- V = NoCFIValue::get(GV);
+ return error("Invalid no_cfi record");
+ V = BitcodeConstant::create(Alloc, CurTy, BitcodeConstant::NoCFIOpcode,
+ Record[1]);
break;
}
}
- ValueList.assignValue(V, NextCstNo);
+ assert(V->getType() == getTypeByID(CurTyID) && "Incorrect result type ID");
+ if (Error Err = ValueList.assignValue(NextCstNo, V, CurTyID))
+ return Err;
++NextCstNo;
}
}
@@ -3146,7 +3553,7 @@ Error BitcodeReader::globalCleanup() {
// Some types could be renamed during loading if several modules are
// loaded in the same LLVMContext (LTO scenario). In this case we should
// remangle intrinsics names as well.
- RemangledIntrinsics[&F] = Remangled.getValue();
+ RemangledIntrinsics[&F] = *Remangled;
// Look for functions that rely on old function attribute behavior.
UpgradeFunctionAttributes(F);
}
@@ -3211,17 +3618,17 @@ Error BitcodeReader::rememberAndSkipFunctionBodies() {
}
}
-bool BitcodeReaderBase::readBlockInfo() {
+Error BitcodeReaderBase::readBlockInfo() {
Expected<Optional<BitstreamBlockInfo>> MaybeNewBlockInfo =
Stream.ReadBlockInfoBlock();
if (!MaybeNewBlockInfo)
- return true; // FIXME Handle the error.
+ return MaybeNewBlockInfo.takeError();
Optional<BitstreamBlockInfo> NewBlockInfo =
std::move(MaybeNewBlockInfo.get());
if (!NewBlockInfo)
- return true;
+ return error("Malformed block");
BlockInfo = std::move(*NewBlockInfo);
- return false;
+ return Error::success();
}
Error BitcodeReader::parseComdatRecord(ArrayRef<uint64_t> Record) {
@@ -3238,6 +3645,8 @@ Error BitcodeReader::parseComdatRecord(ArrayRef<uint64_t> Record) {
if (Record.size() < 2)
return error("Invalid record");
unsigned ComdatNameSize = Record[1];
+ if (ComdatNameSize > Record.size() - 2)
+ return error("Comdat name size too large");
OldFormatName.reserve(ComdatNameSize);
for (unsigned i = 0; i != ComdatNameSize; ++i)
OldFormatName += (char)Record[2 + i];
@@ -3256,6 +3665,19 @@ static void inferDSOLocal(GlobalValue *GV) {
GV->setDSOLocal(true);
}
+GlobalValue::SanitizerMetadata deserializeSanitizerMetadata(unsigned V) {
+ GlobalValue::SanitizerMetadata Meta;
+ if (V & (1 << 0))
+ Meta.NoAddress = true;
+ if (V & (1 << 1))
+ Meta.NoHWAddress = true;
+ if (V & (1 << 2))
+ Meta.NoMemtag = true;
+ if (V & (1 << 3))
+ Meta.IsDynInit = true;
+ return Meta;
+}
+
Error BitcodeReader::parseGlobalVarRecord(ArrayRef<uint64_t> Record) {
// v1: [pointer type, isconst, initid, linkage, alignment, section,
// visibility, threadlocal, unnamed_addr, externally_initialized,
@@ -3267,7 +3689,8 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef<uint64_t> Record) {
if (Record.size() < 6)
return error("Invalid record");
- Type *Ty = getTypeByID(Record[0]);
+ unsigned TyID = Record[0];
+ Type *Ty = getTypeByID(TyID);
if (!Ty)
return error("Invalid record");
bool isConstant = Record[1] & 1;
@@ -3279,7 +3702,10 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef<uint64_t> Record) {
if (!Ty->isPointerTy())
return error("Invalid type for value");
AddressSpace = cast<PointerType>(Ty)->getAddressSpace();
- Ty = Ty->getPointerElementType();
+ TyID = getContainedTypeID(TyID);
+ Ty = getTypeByID(TyID);
+ if (!Ty)
+ return error("Missing element type for old-style global");
}
uint64_t RawLinkage = Record[3];
@@ -3325,7 +3751,7 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef<uint64_t> Record) {
else
upgradeDLLImportExportLinkage(NewGV, RawLinkage);
- ValueList.push_back(NewGV);
+ ValueList.push_back(NewGV, getVirtualTypeID(NewGV->getType(), TyID));
// Remember which value to use for the global initializer.
if (unsigned InitID = Record[2])
@@ -3355,6 +3781,12 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef<uint64_t> Record) {
if (Record.size() > 15)
NewGV->setPartition(StringRef(Strtab.data() + Record[14], Record[15]));
+ if (Record.size() > 16 && Record[16]) {
+ llvm::GlobalValue::SanitizerMetadata Meta =
+ deserializeSanitizerMetadata(Record[16]);
+ NewGV->setSanitizerMetadata(Meta);
+ }
+
return Error::success();
}
@@ -3368,11 +3800,16 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) {
if (Record.size() < 8)
return error("Invalid record");
- Type *FTy = getTypeByID(Record[0]);
+ unsigned FTyID = Record[0];
+ Type *FTy = getTypeByID(FTyID);
if (!FTy)
return error("Invalid record");
- if (auto *PTy = dyn_cast<PointerType>(FTy))
- FTy = PTy->getPointerElementType();
+ if (isa<PointerType>(FTy)) {
+ FTyID = getContainedTypeID(FTyID, 0);
+ FTy = getTypeByID(FTyID);
+ if (!FTy)
+ return error("Missing element type for old-style function");
+ }
if (!isa<FunctionType>(FTy))
return error("Invalid type for value");
@@ -3390,7 +3827,7 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) {
assert(Func->getFunctionType() == FTy &&
"Incorrect fully specified type provided for function");
- FunctionTypes[Func] = cast<FunctionType>(FTy);
+ FunctionTypeIDs[Func] = FTyID;
Func->setCallingConv(CC);
bool isProto = Record[2];
@@ -3412,8 +3849,11 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) {
Func->removeParamAttr(i, Kind);
- Type *PTy = cast<FunctionType>(FTy)->getParamType(i);
- Type *PtrEltTy = PTy->getPointerElementType();
+ unsigned ParamTypeID = getContainedTypeID(FTyID, i + 1);
+ Type *PtrEltTy = getPtrElementTypeByID(ParamTypeID);
+ if (!PtrEltTy)
+ return error("Missing param element type for attribute upgrade");
+
Attribute NewAttr;
switch (Kind) {
case Attribute::ByVal:
@@ -3433,6 +3873,16 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) {
}
}
+ if (Func->getCallingConv() == CallingConv::X86_INTR &&
+ !Func->arg_empty() && !Func->hasParamAttribute(0, Attribute::ByVal)) {
+ unsigned ParamTypeID = getContainedTypeID(FTyID, 1);
+ Type *ByValTy = getPtrElementTypeByID(ParamTypeID);
+ if (!ByValTy)
+ return error("Missing param element type for x86_intrcc upgrade");
+ Attribute NewAttr = Attribute::getWithByValType(Context, ByValTy);
+ Func->addParamAttr(0, NewAttr);
+ }
+
MaybeAlign Alignment;
if (Error Err = parseAlignmentValue(Record[5], Alignment))
return Err;
@@ -3495,7 +3945,7 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) {
Func->setPartition(StringRef(Strtab.data() + Record[17], Record[18]));
}
- ValueList.push_back(Func);
+ ValueList.push_back(Func, getVirtualTypeID(Func->getType(), FTyID));
if (OperandInfo.PersonalityFn || OperandInfo.Prefix || OperandInfo.Prologue)
FunctionOperands.push_back(OperandInfo);
@@ -3527,7 +3977,8 @@ Error BitcodeReader::parseGlobalIndirectSymbolRecord(
if (Record.size() < (3 + (unsigned)NewRecord))
return error("Invalid record");
unsigned OpNum = 0;
- Type *Ty = getTypeByID(Record[OpNum++]);
+ unsigned TypeID = Record[OpNum++];
+ Type *Ty = getTypeByID(TypeID);
if (!Ty)
return error("Invalid record");
@@ -3536,8 +3987,11 @@ Error BitcodeReader::parseGlobalIndirectSymbolRecord(
auto *PTy = dyn_cast<PointerType>(Ty);
if (!PTy)
return error("Invalid type for value");
- Ty = PTy->getPointerElementType();
AddrSpace = PTy->getAddressSpace();
+ TypeID = getContainedTypeID(TypeID);
+ Ty = getTypeByID(TypeID);
+ if (!Ty)
+ return error("Missing element type for old-style indirect symbol");
} else {
AddrSpace = Record[OpNum++];
}
@@ -3582,7 +4036,7 @@ Error BitcodeReader::parseGlobalIndirectSymbolRecord(
OpNum += 2;
}
- ValueList.push_back(NewGA);
+ ValueList.push_back(NewGA, getVirtualTypeID(NewGA->getType(), TypeID));
IndirectSymbolInits.push_back(std::make_pair(NewGA, Val));
return Error::success();
}
@@ -3639,8 +4093,8 @@ Error BitcodeReader::parseModule(uint64_t ResumeBit,
return Err;
break;
case bitc::BLOCKINFO_BLOCK_ID:
- if (readBlockInfo())
- return error("Malformed block");
+ if (Error Err = readBlockInfo())
+ return Err;
break;
case bitc::PARAMATTR_BLOCK_ID:
if (Error Err = parseAttributeBlock())
@@ -3796,7 +4250,10 @@ Error BitcodeReader::parseModule(uint64_t ResumeBit,
std::string S;
if (convertToString(Record, 0, S))
return error("Invalid record");
- TheModule->setDataLayout(S);
+ Expected<DataLayout> MaybeDL = DataLayout::parse(S);
+ if (!MaybeDL)
+ return MaybeDL.takeError();
+ TheModule->setDataLayout(MaybeDL.get());
break;
}
case bitc::MODULE_CODE_ASM: { // ASM: [strchr x N]
@@ -3894,18 +4351,20 @@ Error BitcodeReader::typeCheckLoadStoreInst(Type *ValType, Type *PtrType) {
return Error::success();
}
-void BitcodeReader::propagateAttributeTypes(CallBase *CB,
- ArrayRef<Type *> ArgsTys) {
+Error BitcodeReader::propagateAttributeTypes(CallBase *CB,
+ ArrayRef<unsigned> ArgTyIDs) {
+ AttributeList Attrs = CB->getAttributes();
for (unsigned i = 0; i != CB->arg_size(); ++i) {
for (Attribute::AttrKind Kind : {Attribute::ByVal, Attribute::StructRet,
Attribute::InAlloca}) {
- if (!CB->paramHasAttr(i, Kind) ||
- CB->getParamAttr(i, Kind).getValueAsType())
+ if (!Attrs.hasParamAttr(i, Kind) ||
+ Attrs.getParamAttr(i, Kind).getValueAsType())
continue;
- CB->removeParamAttr(i, Kind);
+ Type *PtrEltTy = getPtrElementTypeByID(ArgTyIDs[i]);
+ if (!PtrEltTy)
+ return error("Missing element type for typed attribute upgrade");
- Type *PtrEltTy = ArgsTys[i]->getPointerElementType();
Attribute NewAttr;
switch (Kind) {
case Attribute::ByVal:
@@ -3921,7 +4380,7 @@ void BitcodeReader::propagateAttributeTypes(CallBase *CB,
llvm_unreachable("not an upgraded type attribute");
}
- CB->addParamAttr(i, NewAttr);
+ Attrs = Attrs.addParamAttribute(Context, i, NewAttr);
}
}
@@ -3932,10 +4391,13 @@ void BitcodeReader::propagateAttributeTypes(CallBase *CB,
if (!CI.hasArg())
continue;
- if (CI.isIndirect && !CB->getAttributes().getParamElementType(ArgNo)) {
- Type *ElemTy = ArgsTys[ArgNo]->getPointerElementType();
- CB->addParamAttr(
- ArgNo, Attribute::get(Context, Attribute::ElementType, ElemTy));
+ if (CI.isIndirect && !Attrs.getParamElementType(ArgNo)) {
+ Type *ElemTy = getPtrElementTypeByID(ArgTyIDs[ArgNo]);
+ if (!ElemTy)
+ return error("Missing element type for inline asm upgrade");
+ Attrs = Attrs.addParamAttribute(
+ Context, ArgNo,
+ Attribute::get(Context, Attribute::ElementType, ElemTy));
}
ArgNo++;
@@ -3945,15 +4407,41 @@ void BitcodeReader::propagateAttributeTypes(CallBase *CB,
switch (CB->getIntrinsicID()) {
case Intrinsic::preserve_array_access_index:
case Intrinsic::preserve_struct_access_index:
- if (!CB->getAttributes().getParamElementType(0)) {
- Type *ElTy = ArgsTys[0]->getPointerElementType();
+ case Intrinsic::aarch64_ldaxr:
+ case Intrinsic::aarch64_ldxr:
+ case Intrinsic::aarch64_stlxr:
+ case Intrinsic::aarch64_stxr:
+ case Intrinsic::arm_ldaex:
+ case Intrinsic::arm_ldrex:
+ case Intrinsic::arm_stlex:
+ case Intrinsic::arm_strex: {
+ unsigned ArgNo;
+ switch (CB->getIntrinsicID()) {
+ case Intrinsic::aarch64_stlxr:
+ case Intrinsic::aarch64_stxr:
+ case Intrinsic::arm_stlex:
+ case Intrinsic::arm_strex:
+ ArgNo = 1;
+ break;
+ default:
+ ArgNo = 0;
+ break;
+ }
+ if (!Attrs.getParamElementType(ArgNo)) {
+ Type *ElTy = getPtrElementTypeByID(ArgTyIDs[ArgNo]);
+ if (!ElTy)
+ return error("Missing element type for elementtype upgrade");
Attribute NewAttr = Attribute::get(Context, Attribute::ElementType, ElTy);
- CB->addParamAttr(0, NewAttr);
+ Attrs = Attrs.addParamAttribute(Context, ArgNo, NewAttr);
}
break;
+ }
default:
break;
}
+
+ CB->setAttributes(Attrs);
+ return Error::success();
}
/// Lazily parse the specified function body block.
@@ -3970,18 +4458,24 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
unsigned ModuleMDLoaderSize = MDLoader->size();
// Add all the function arguments to the value table.
-#ifndef NDEBUG
unsigned ArgNo = 0;
- FunctionType *FTy = FunctionTypes[F];
-#endif
+ unsigned FTyID = FunctionTypeIDs[F];
for (Argument &I : F->args()) {
- assert(I.getType() == FTy->getParamType(ArgNo++) &&
+ unsigned ArgTyID = getContainedTypeID(FTyID, ArgNo + 1);
+ assert(I.getType() == getTypeByID(ArgTyID) &&
"Incorrect fully specified type for Function Argument");
- ValueList.push_back(&I);
+ ValueList.push_back(&I, ArgTyID);
+ ++ArgNo;
}
unsigned NextValueNo = ValueList.size();
BasicBlock *CurBB = nullptr;
unsigned CurBBNo = 0;
+ // Block into which constant expressions from phi nodes are materialized.
+ BasicBlock *PhiConstExprBB = nullptr;
+ // Edge blocks for phi nodes into which constant expressions have been
+ // expanded.
+ SmallMapVector<std::pair<BasicBlock *, BasicBlock *>, BasicBlock *, 4>
+ ConstExprEdgeBBs;
DebugLoc LastLoc;
auto getLastInstruction = [&]() -> Instruction * {
@@ -4050,6 +4544,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
// Read a record.
Record.clear();
Instruction *I = nullptr;
+ unsigned ResTypeID = InvalidTypeID;
Expected<unsigned> MaybeBitCode = Stream.readRecord(Entry.ID, Record);
if (!MaybeBitCode)
return MaybeBitCode.takeError();
@@ -4091,6 +4586,31 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
continue;
}
+ case bitc::FUNC_CODE_BLOCKADDR_USERS: // BLOCKADDR_USERS: [vals...]
+ // The record should not be emitted if it's an empty list.
+ if (Record.empty())
+ return error("Invalid record");
+ // When we have the RARE case of a BlockAddress Constant that is not
+ // scoped to the Function it refers to, we need to conservatively
+ // materialize the referred to Function, regardless of whether or not
+ // that Function will ultimately be linked, otherwise users of
+ // BitcodeReader might start splicing out Function bodies such that we
+ // might no longer be able to materialize the BlockAddress since the
+ // BasicBlock (and entire body of the Function) the BlockAddress refers
+ // to may have been moved. In the case that the user of BitcodeReader
+ // decides ultimately not to link the Function body, materializing here
+ // could be considered wasteful, but it's better than a deserialization
+ // failure as described. This keeps BitcodeReader unaware of complex
+ // linkage policy decisions such as those use by LTO, leaving those
+ // decisions "one layer up."
+ for (uint64_t ValID : Record)
+ if (auto *F = dyn_cast<Function>(ValueList[ValID]))
+ BackwardRefFunctions.push_back(F);
+ else
+ return error("Invalid record");
+
+ continue;
+
case bitc::FUNC_CODE_DEBUG_LOC_AGAIN: // DEBUG_LOC_AGAIN
// This record indicates that the last instruction is at the same
// location as the previous instruction with a location.
@@ -4133,7 +4653,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
case bitc::FUNC_CODE_INST_UNOP: { // UNOP: [opval, ty, opcode]
unsigned OpNum = 0;
Value *LHS;
- if (getValueTypePair(Record, OpNum, NextValueNo, LHS) ||
+ unsigned TypeID;
+ if (getValueTypePair(Record, OpNum, NextValueNo, LHS, TypeID, CurBB) ||
OpNum+1 > Record.size())
return error("Invalid record");
@@ -4141,6 +4662,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
if (Opc == -1)
return error("Invalid record");
I = UnaryOperator::Create((Instruction::UnaryOps)Opc, LHS);
+ ResTypeID = TypeID;
InstructionList.push_back(I);
if (OpNum < Record.size()) {
if (isa<FPMathOperator>(I)) {
@@ -4154,8 +4676,10 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
case bitc::FUNC_CODE_INST_BINOP: { // BINOP: [opval, ty, opval, opcode]
unsigned OpNum = 0;
Value *LHS, *RHS;
- if (getValueTypePair(Record, OpNum, NextValueNo, LHS) ||
- popValue(Record, OpNum, NextValueNo, LHS->getType(), RHS) ||
+ unsigned TypeID;
+ if (getValueTypePair(Record, OpNum, NextValueNo, LHS, TypeID, CurBB) ||
+ popValue(Record, OpNum, NextValueNo, LHS->getType(), TypeID, RHS,
+ CurBB) ||
OpNum+1 > Record.size())
return error("Invalid record");
@@ -4163,6 +4687,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
if (Opc == -1)
return error("Invalid record");
I = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
+ ResTypeID = TypeID;
InstructionList.push_back(I);
if (OpNum < Record.size()) {
if (Opc == Instruction::Add ||
@@ -4191,11 +4716,13 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
case bitc::FUNC_CODE_INST_CAST: { // CAST: [opval, opty, destty, castopc]
unsigned OpNum = 0;
Value *Op;
- if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
+ unsigned OpTypeID;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Op, OpTypeID, CurBB) ||
OpNum+2 != Record.size())
return error("Invalid record");
- Type *ResTy = getTypeByID(Record[OpNum]);
+ ResTypeID = Record[OpNum];
+ Type *ResTy = getTypeByID(ResTypeID);
int Opc = getDecodedCastOpcode(Record[OpNum + 1]);
if (Opc == -1 || !ResTy)
return error("Invalid record");
@@ -4220,23 +4747,31 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
case bitc::FUNC_CODE_INST_GEP: { // GEP: type, [n x operands]
unsigned OpNum = 0;
+ unsigned TyID;
Type *Ty;
bool InBounds;
if (BitCode == bitc::FUNC_CODE_INST_GEP) {
InBounds = Record[OpNum++];
- Ty = getTypeByID(Record[OpNum++]);
+ TyID = Record[OpNum++];
+ Ty = getTypeByID(TyID);
} else {
InBounds = BitCode == bitc::FUNC_CODE_INST_INBOUNDS_GEP_OLD;
+ TyID = InvalidTypeID;
Ty = nullptr;
}
Value *BasePtr;
- if (getValueTypePair(Record, OpNum, NextValueNo, BasePtr))
+ unsigned BasePtrTypeID;
+ if (getValueTypePair(Record, OpNum, NextValueNo, BasePtr, BasePtrTypeID,
+ CurBB))
return error("Invalid record");
if (!Ty) {
- Ty = BasePtr->getType()->getScalarType()->getPointerElementType();
+ TyID = getContainedTypeID(BasePtrTypeID);
+ if (BasePtr->getType()->isVectorTy())
+ TyID = getContainedTypeID(TyID);
+ Ty = getTypeByID(TyID);
} else if (!cast<PointerType>(BasePtr->getType()->getScalarType())
->isOpaqueOrPointeeTypeMatches(Ty)) {
return error(
@@ -4246,13 +4781,37 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
SmallVector<Value*, 16> GEPIdx;
while (OpNum != Record.size()) {
Value *Op;
- if (getValueTypePair(Record, OpNum, NextValueNo, Op))
+ unsigned OpTypeID;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Op, OpTypeID, CurBB))
return error("Invalid record");
GEPIdx.push_back(Op);
}
I = GetElementPtrInst::Create(Ty, BasePtr, GEPIdx);
+ ResTypeID = TyID;
+ if (cast<GEPOperator>(I)->getNumIndices() != 0) {
+ auto GTI = std::next(gep_type_begin(I));
+ for (Value *Idx : drop_begin(cast<GEPOperator>(I)->indices())) {
+ unsigned SubType = 0;
+ if (GTI.isStruct()) {
+ ConstantInt *IdxC =
+ Idx->getType()->isVectorTy()
+ ? cast<ConstantInt>(cast<Constant>(Idx)->getSplatValue())
+ : cast<ConstantInt>(Idx);
+ SubType = IdxC->getZExtValue();
+ }
+ ResTypeID = getContainedTypeID(ResTypeID, SubType);
+ ++GTI;
+ }
+ }
+
+ // At this point ResTypeID is the result element type. We need a pointer
+ // or vector of pointer to it.
+ ResTypeID = getVirtualTypeID(I->getType()->getScalarType(), ResTypeID);
+ if (I->getType()->isVectorTy())
+ ResTypeID = getVirtualTypeID(I->getType(), ResTypeID);
+
InstructionList.push_back(I);
if (InBounds)
cast<GetElementPtrInst>(I)->setIsInBounds(true);
@@ -4263,7 +4822,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
// EXTRACTVAL: [opty, opval, n x indices]
unsigned OpNum = 0;
Value *Agg;
- if (getValueTypePair(Record, OpNum, NextValueNo, Agg))
+ unsigned AggTypeID;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Agg, AggTypeID, CurBB))
return error("Invalid record");
Type *Ty = Agg->getType();
@@ -4272,6 +4832,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
return error("EXTRACTVAL: Invalid instruction with 0 indices");
SmallVector<unsigned, 4> EXTRACTVALIdx;
+ ResTypeID = AggTypeID;
for (; OpNum != RecSize; ++OpNum) {
bool IsArray = Ty->isArrayTy();
bool IsStruct = Ty->isStructTy();
@@ -4287,10 +4848,13 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
return error("EXTRACTVAL: Invalid array index");
EXTRACTVALIdx.push_back((unsigned)Index);
- if (IsStruct)
+ if (IsStruct) {
Ty = Ty->getStructElementType(Index);
- else
+ ResTypeID = getContainedTypeID(ResTypeID, Index);
+ } else {
Ty = Ty->getArrayElementType();
+ ResTypeID = getContainedTypeID(ResTypeID);
+ }
}
I = ExtractValueInst::Create(Agg, EXTRACTVALIdx);
@@ -4302,10 +4866,12 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
// INSERTVAL: [opty, opval, opty, opval, n x indices]
unsigned OpNum = 0;
Value *Agg;
- if (getValueTypePair(Record, OpNum, NextValueNo, Agg))
+ unsigned AggTypeID;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Agg, AggTypeID, CurBB))
return error("Invalid record");
Value *Val;
- if (getValueTypePair(Record, OpNum, NextValueNo, Val))
+ unsigned ValTypeID;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Val, ValTypeID, CurBB))
return error("Invalid record");
unsigned RecSize = Record.size();
@@ -4339,6 +4905,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
return error("Inserted value type doesn't match aggregate type");
I = InsertValueInst::Create(Agg, Val, INSERTVALIdx);
+ ResTypeID = AggTypeID;
InstructionList.push_back(I);
break;
}
@@ -4348,12 +4915,18 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
// handles select i1 ... in old bitcode
unsigned OpNum = 0;
Value *TrueVal, *FalseVal, *Cond;
- if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) ||
- popValue(Record, OpNum, NextValueNo, TrueVal->getType(), FalseVal) ||
- popValue(Record, OpNum, NextValueNo, Type::getInt1Ty(Context), Cond))
+ unsigned TypeID;
+ Type *CondType = Type::getInt1Ty(Context);
+ if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal, TypeID,
+ CurBB) ||
+ popValue(Record, OpNum, NextValueNo, TrueVal->getType(), TypeID,
+ FalseVal, CurBB) ||
+ popValue(Record, OpNum, NextValueNo, CondType,
+ getVirtualTypeID(CondType), Cond, CurBB))
return error("Invalid record");
I = SelectInst::Create(Cond, TrueVal, FalseVal);
+ ResTypeID = TypeID;
InstructionList.push_back(I);
break;
}
@@ -4363,9 +4936,12 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
// handles select i1 or select [N x i1]
unsigned OpNum = 0;
Value *TrueVal, *FalseVal, *Cond;
- if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) ||
- popValue(Record, OpNum, NextValueNo, TrueVal->getType(), FalseVal) ||
- getValueTypePair(Record, OpNum, NextValueNo, Cond))
+ unsigned ValTypeID, CondTypeID;
+ if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal, ValTypeID,
+ CurBB) ||
+ popValue(Record, OpNum, NextValueNo, TrueVal->getType(), ValTypeID,
+ FalseVal, CurBB) ||
+ getValueTypePair(Record, OpNum, NextValueNo, Cond, CondTypeID, CurBB))
return error("Invalid record");
// select condition can be either i1 or [N x i1]
@@ -4381,6 +4957,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
}
I = SelectInst::Create(Cond, TrueVal, FalseVal);
+ ResTypeID = ValTypeID;
InstructionList.push_back(I);
if (OpNum < Record.size() && isa<FPMathOperator>(I)) {
FastMathFlags FMF = getDecodedFastMathFlags(Record[OpNum]);
@@ -4393,12 +4970,14 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
case bitc::FUNC_CODE_INST_EXTRACTELT: { // EXTRACTELT: [opty, opval, opval]
unsigned OpNum = 0;
Value *Vec, *Idx;
- if (getValueTypePair(Record, OpNum, NextValueNo, Vec) ||
- getValueTypePair(Record, OpNum, NextValueNo, Idx))
+ unsigned VecTypeID, IdxTypeID;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Vec, VecTypeID, CurBB) ||
+ getValueTypePair(Record, OpNum, NextValueNo, Idx, IdxTypeID, CurBB))
return error("Invalid record");
if (!Vec->getType()->isVectorTy())
return error("Invalid type for value");
I = ExtractElementInst::Create(Vec, Idx);
+ ResTypeID = getContainedTypeID(VecTypeID);
InstructionList.push_back(I);
break;
}
@@ -4406,15 +4985,18 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
case bitc::FUNC_CODE_INST_INSERTELT: { // INSERTELT: [ty, opval,opval,opval]
unsigned OpNum = 0;
Value *Vec, *Elt, *Idx;
- if (getValueTypePair(Record, OpNum, NextValueNo, Vec))
+ unsigned VecTypeID, IdxTypeID;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Vec, VecTypeID, CurBB))
return error("Invalid record");
if (!Vec->getType()->isVectorTy())
return error("Invalid type for value");
if (popValue(Record, OpNum, NextValueNo,
- cast<VectorType>(Vec->getType())->getElementType(), Elt) ||
- getValueTypePair(Record, OpNum, NextValueNo, Idx))
+ cast<VectorType>(Vec->getType())->getElementType(),
+ getContainedTypeID(VecTypeID), Elt, CurBB) ||
+ getValueTypePair(Record, OpNum, NextValueNo, Idx, IdxTypeID, CurBB))
return error("Invalid record");
I = InsertElementInst::Create(Vec, Elt, Idx);
+ ResTypeID = VecTypeID;
InstructionList.push_back(I);
break;
}
@@ -4422,16 +5004,22 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
case bitc::FUNC_CODE_INST_SHUFFLEVEC: {// SHUFFLEVEC: [opval,ty,opval,opval]
unsigned OpNum = 0;
Value *Vec1, *Vec2, *Mask;
- if (getValueTypePair(Record, OpNum, NextValueNo, Vec1) ||
- popValue(Record, OpNum, NextValueNo, Vec1->getType(), Vec2))
+ unsigned Vec1TypeID;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Vec1, Vec1TypeID,
+ CurBB) ||
+ popValue(Record, OpNum, NextValueNo, Vec1->getType(), Vec1TypeID,
+ Vec2, CurBB))
return error("Invalid record");
- if (getValueTypePair(Record, OpNum, NextValueNo, Mask))
+ unsigned MaskTypeID;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Mask, MaskTypeID, CurBB))
return error("Invalid record");
if (!Vec1->getType()->isVectorTy() || !Vec2->getType()->isVectorTy())
return error("Invalid type for value");
I = new ShuffleVectorInst(Vec1, Vec2, Mask);
+ ResTypeID =
+ getVirtualTypeID(I->getType(), getContainedTypeID(Vec1TypeID));
InstructionList.push_back(I);
break;
}
@@ -4445,8 +5033,10 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
unsigned OpNum = 0;
Value *LHS, *RHS;
- if (getValueTypePair(Record, OpNum, NextValueNo, LHS) ||
- popValue(Record, OpNum, NextValueNo, LHS->getType(), RHS))
+ unsigned LHSTypeID;
+ if (getValueTypePair(Record, OpNum, NextValueNo, LHS, LHSTypeID, CurBB) ||
+ popValue(Record, OpNum, NextValueNo, LHS->getType(), LHSTypeID, RHS,
+ CurBB))
return error("Invalid record");
if (OpNum >= Record.size())
@@ -4467,6 +5057,10 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
else
I = new ICmpInst((ICmpInst::Predicate)PredVal, LHS, RHS);
+ ResTypeID = getVirtualTypeID(I->getType()->getScalarType());
+ if (LHS->getType()->isVectorTy())
+ ResTypeID = getVirtualTypeID(I->getType(), ResTypeID);
+
if (FMF.any())
I->setFastMathFlags(FMF);
InstructionList.push_back(I);
@@ -4484,7 +5078,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
unsigned OpNum = 0;
Value *Op = nullptr;
- if (getValueTypePair(Record, OpNum, NextValueNo, Op))
+ unsigned OpTypeID;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Op, OpTypeID, CurBB))
return error("Invalid record");
if (OpNum != Record.size())
return error("Invalid record");
@@ -4506,8 +5101,9 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
}
else {
BasicBlock *FalseDest = getBasicBlock(Record[1]);
- Value *Cond = getValue(Record, 2, NextValueNo,
- Type::getInt1Ty(Context));
+ Type *CondType = Type::getInt1Ty(Context);
+ Value *Cond = getValue(Record, 2, NextValueNo, CondType,
+ getVirtualTypeID(CondType), CurBB);
if (!FalseDest || !Cond)
return error("Invalid record");
I = BranchInst::Create(TrueDest, FalseDest, Cond);
@@ -4519,8 +5115,9 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
if (Record.size() != 1 && Record.size() != 2)
return error("Invalid record");
unsigned Idx = 0;
- Value *CleanupPad =
- getValue(Record, Idx++, NextValueNo, Type::getTokenTy(Context));
+ Type *TokenTy = Type::getTokenTy(Context);
+ Value *CleanupPad = getValue(Record, Idx++, NextValueNo, TokenTy,
+ getVirtualTypeID(TokenTy), CurBB);
if (!CleanupPad)
return error("Invalid record");
BasicBlock *UnwindDest = nullptr;
@@ -4538,8 +5135,9 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
if (Record.size() != 2)
return error("Invalid record");
unsigned Idx = 0;
- Value *CatchPad =
- getValue(Record, Idx++, NextValueNo, Type::getTokenTy(Context));
+ Type *TokenTy = Type::getTokenTy(Context);
+ Value *CatchPad = getValue(Record, Idx++, NextValueNo, TokenTy,
+ getVirtualTypeID(TokenTy), CurBB);
if (!CatchPad)
return error("Invalid record");
BasicBlock *BB = getBasicBlock(Record[Idx++]);
@@ -4557,8 +5155,9 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
unsigned Idx = 0;
- Value *ParentPad =
- getValue(Record, Idx++, NextValueNo, Type::getTokenTy(Context));
+ Type *TokenTy = Type::getTokenTy(Context);
+ Value *ParentPad = getValue(Record, Idx++, NextValueNo, TokenTy,
+ getVirtualTypeID(TokenTy), CurBB);
unsigned NumHandlers = Record[Idx++];
@@ -4585,6 +5184,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
for (BasicBlock *Handler : Handlers)
CatchSwitch->addHandler(Handler);
I = CatchSwitch;
+ ResTypeID = getVirtualTypeID(I->getType());
InstructionList.push_back(I);
break;
}
@@ -4596,15 +5196,17 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
unsigned Idx = 0;
- Value *ParentPad =
- getValue(Record, Idx++, NextValueNo, Type::getTokenTy(Context));
+ Type *TokenTy = Type::getTokenTy(Context);
+ Value *ParentPad = getValue(Record, Idx++, NextValueNo, TokenTy,
+ getVirtualTypeID(TokenTy), CurBB);
unsigned NumArgOperands = Record[Idx++];
SmallVector<Value *, 2> Args;
for (unsigned Op = 0; Op != NumArgOperands; ++Op) {
Value *Val;
- if (getValueTypePair(Record, Idx, NextValueNo, Val))
+ unsigned ValTypeID;
+ if (getValueTypePair(Record, Idx, NextValueNo, Val, ValTypeID, nullptr))
return error("Invalid record");
Args.push_back(Val);
}
@@ -4616,6 +5218,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
I = CleanupPadInst::Create(ParentPad, Args);
else
I = CatchPadInst::Create(ParentPad, Args);
+ ResTypeID = getVirtualTypeID(I->getType());
InstructionList.push_back(I);
break;
}
@@ -4627,10 +5230,11 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
// Hopefully someday we will have support for case ranges and can use
// this format again.
- Type *OpTy = getTypeByID(Record[1]);
+ unsigned OpTyID = Record[1];
+ Type *OpTy = getTypeByID(OpTyID);
unsigned ValueBitWidth = cast<IntegerType>(OpTy)->getBitWidth();
- Value *Cond = getValue(Record, 2, NextValueNo, OpTy);
+ Value *Cond = getValue(Record, 2, NextValueNo, OpTy, OpTyID, CurBB);
BasicBlock *Default = getBasicBlock(Record[3]);
if (!OpTy || !Cond || !Default)
return error("Invalid record");
@@ -4684,8 +5288,9 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
if (Record.size() < 3 || (Record.size() & 1) == 0)
return error("Invalid record");
- Type *OpTy = getTypeByID(Record[0]);
- Value *Cond = getValue(Record, 1, NextValueNo, OpTy);
+ unsigned OpTyID = Record[0];
+ Type *OpTy = getTypeByID(OpTyID);
+ Value *Cond = getValue(Record, 1, NextValueNo, OpTy, OpTyID, CurBB);
BasicBlock *Default = getBasicBlock(Record[2]);
if (!OpTy || !Cond || !Default)
return error("Invalid record");
@@ -4693,8 +5298,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
SwitchInst *SI = SwitchInst::Create(Cond, Default, NumCases);
InstructionList.push_back(SI);
for (unsigned i = 0, e = NumCases; i != e; ++i) {
- ConstantInt *CaseVal =
- dyn_cast_or_null<ConstantInt>(getFnValueByID(Record[3+i*2], OpTy));
+ ConstantInt *CaseVal = dyn_cast_or_null<ConstantInt>(
+ getFnValueByID(Record[3+i*2], OpTy, OpTyID, nullptr));
BasicBlock *DestBB = getBasicBlock(Record[1+3+i*2]);
if (!CaseVal || !DestBB) {
delete SI;
@@ -4708,8 +5313,9 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
case bitc::FUNC_CODE_INST_INDIRECTBR: { // INDIRECTBR: [opty, op0, op1, ...]
if (Record.size() < 2)
return error("Invalid record");
- Type *OpTy = getTypeByID(Record[0]);
- Value *Address = getValue(Record, 1, NextValueNo, OpTy);
+ unsigned OpTyID = Record[0];
+ Type *OpTy = getTypeByID(OpTyID);
+ Value *Address = getValue(Record, 1, NextValueNo, OpTy, OpTyID, CurBB);
if (!OpTy || !Address)
return error("Invalid record");
unsigned NumDests = Record.size()-2;
@@ -4737,23 +5343,27 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
BasicBlock *NormalBB = getBasicBlock(Record[OpNum++]);
BasicBlock *UnwindBB = getBasicBlock(Record[OpNum++]);
+ unsigned FTyID = InvalidTypeID;
FunctionType *FTy = nullptr;
if ((CCInfo >> 13) & 1) {
- FTy = dyn_cast<FunctionType>(getTypeByID(Record[OpNum++]));
+ FTyID = Record[OpNum++];
+ FTy = dyn_cast<FunctionType>(getTypeByID(FTyID));
if (!FTy)
return error("Explicit invoke type is not a function type");
}
Value *Callee;
- if (getValueTypePair(Record, OpNum, NextValueNo, Callee))
+ unsigned CalleeTypeID;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Callee, CalleeTypeID,
+ CurBB))
return error("Invalid record");
PointerType *CalleeTy = dyn_cast<PointerType>(Callee->getType());
if (!CalleeTy)
return error("Callee is not a pointer");
if (!FTy) {
- FTy =
- dyn_cast<FunctionType>(Callee->getType()->getPointerElementType());
+ FTyID = getContainedTypeID(CalleeTypeID);
+ FTy = dyn_cast_or_null<FunctionType>(getTypeByID(FTyID));
if (!FTy)
return error("Callee is not of pointer to function type");
} else if (!CalleeTy->isOpaqueOrPointeeTypeMatches(FTy))
@@ -4763,11 +5373,12 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
return error("Insufficient operands to call");
SmallVector<Value*, 16> Ops;
- SmallVector<Type *, 16> ArgsTys;
+ SmallVector<unsigned, 16> ArgTyIDs;
for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) {
- Ops.push_back(getValue(Record, OpNum, NextValueNo,
- FTy->getParamType(i)));
- ArgsTys.push_back(FTy->getParamType(i));
+ unsigned ArgTyID = getContainedTypeID(FTyID, i + 1);
+ Ops.push_back(getValue(Record, OpNum, NextValueNo, FTy->getParamType(i),
+ ArgTyID, CurBB));
+ ArgTyIDs.push_back(ArgTyID);
if (!Ops.back())
return error("Invalid record");
}
@@ -4779,28 +5390,38 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
// Read type/value pairs for varargs params.
while (OpNum != Record.size()) {
Value *Op;
- if (getValueTypePair(Record, OpNum, NextValueNo, Op))
+ unsigned OpTypeID;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Op, OpTypeID, CurBB))
return error("Invalid record");
Ops.push_back(Op);
- ArgsTys.push_back(Op->getType());
+ ArgTyIDs.push_back(OpTypeID);
}
}
+ // Upgrade the bundles if needed.
+ if (!OperandBundles.empty())
+ UpgradeOperandBundles(OperandBundles);
+
I = InvokeInst::Create(FTy, Callee, NormalBB, UnwindBB, Ops,
OperandBundles);
+ ResTypeID = getContainedTypeID(FTyID);
OperandBundles.clear();
InstructionList.push_back(I);
cast<InvokeInst>(I)->setCallingConv(
static_cast<CallingConv::ID>(CallingConv::MaxID & CCInfo));
cast<InvokeInst>(I)->setAttributes(PAL);
- propagateAttributeTypes(cast<CallBase>(I), ArgsTys);
+ if (Error Err = propagateAttributeTypes(cast<CallBase>(I), ArgTyIDs)) {
+ I->deleteValue();
+ return Err;
+ }
break;
}
case bitc::FUNC_CODE_INST_RESUME: { // RESUME: [opval]
unsigned Idx = 0;
Value *Val = nullptr;
- if (getValueTypePair(Record, Idx, NextValueNo, Val))
+ unsigned ValTypeID;
+ if (getValueTypePair(Record, Idx, NextValueNo, Val, ValTypeID, CurBB))
return error("Invalid record");
I = ResumeInst::Create(Val);
InstructionList.push_back(I);
@@ -4818,23 +5439,27 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
for (unsigned i = 0, e = NumIndirectDests; i != e; ++i)
IndirectDests.push_back(getBasicBlock(Record[OpNum++]));
+ unsigned FTyID = InvalidTypeID;
FunctionType *FTy = nullptr;
if ((CCInfo >> bitc::CALL_EXPLICIT_TYPE) & 1) {
- FTy = dyn_cast<FunctionType>(getTypeByID(Record[OpNum++]));
+ FTyID = Record[OpNum++];
+ FTy = dyn_cast_or_null<FunctionType>(getTypeByID(FTyID));
if (!FTy)
return error("Explicit call type is not a function type");
}
Value *Callee;
- if (getValueTypePair(Record, OpNum, NextValueNo, Callee))
+ unsigned CalleeTypeID;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Callee, CalleeTypeID,
+ CurBB))
return error("Invalid record");
PointerType *OpTy = dyn_cast<PointerType>(Callee->getType());
if (!OpTy)
return error("Callee is not a pointer type");
if (!FTy) {
- FTy =
- dyn_cast<FunctionType>(Callee->getType()->getPointerElementType());
+ FTyID = getContainedTypeID(CalleeTypeID);
+ FTy = dyn_cast_or_null<FunctionType>(getTypeByID(FTyID));
if (!FTy)
return error("Callee is not of pointer to function type");
} else if (!OpTy->isOpaqueOrPointeeTypeMatches(FTy))
@@ -4844,18 +5469,20 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
return error("Insufficient operands to call");
SmallVector<Value*, 16> Args;
- SmallVector<Type *, 16> ArgsTys;
+ SmallVector<unsigned, 16> ArgTyIDs;
// Read the fixed params.
for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) {
Value *Arg;
+ unsigned ArgTyID = getContainedTypeID(FTyID, i + 1);
if (FTy->getParamType(i)->isLabelTy())
Arg = getBasicBlock(Record[OpNum]);
else
- Arg = getValue(Record, OpNum, NextValueNo, FTy->getParamType(i));
+ Arg = getValue(Record, OpNum, NextValueNo, FTy->getParamType(i),
+ ArgTyID, CurBB);
if (!Arg)
return error("Invalid record");
Args.push_back(Arg);
- ArgsTys.push_back(Arg->getType());
+ ArgTyIDs.push_back(ArgTyID);
}
// Read type/value pairs for varargs params.
@@ -4865,21 +5492,30 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
} else {
while (OpNum != Record.size()) {
Value *Op;
- if (getValueTypePair(Record, OpNum, NextValueNo, Op))
+ unsigned OpTypeID;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Op, OpTypeID, CurBB))
return error("Invalid record");
Args.push_back(Op);
- ArgsTys.push_back(Op->getType());
+ ArgTyIDs.push_back(OpTypeID);
}
}
+ // Upgrade the bundles if needed.
+ if (!OperandBundles.empty())
+ UpgradeOperandBundles(OperandBundles);
+
I = CallBrInst::Create(FTy, Callee, DefaultDest, IndirectDests, Args,
OperandBundles);
+ ResTypeID = getContainedTypeID(FTyID);
OperandBundles.clear();
InstructionList.push_back(I);
cast<CallBrInst>(I)->setCallingConv(
static_cast<CallingConv::ID>((0x7ff & CCInfo) >> bitc::CALL_CCONV));
cast<CallBrInst>(I)->setAttributes(PAL);
- propagateAttributeTypes(cast<CallBase>(I), ArgsTys);
+ if (Error Err = propagateAttributeTypes(cast<CallBase>(I), ArgTyIDs)) {
+ I->deleteValue();
+ return Err;
+ }
break;
}
case bitc::FUNC_CODE_INST_UNREACHABLE: // UNREACHABLE
@@ -4888,36 +5524,76 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
break;
case bitc::FUNC_CODE_INST_PHI: { // PHI: [ty, val0,bb0, ...]
if (Record.empty())
- return error("Invalid record");
+ return error("Invalid phi record");
// The first record specifies the type.
- Type *Ty = getTypeByID(Record[0]);
+ unsigned TyID = Record[0];
+ Type *Ty = getTypeByID(TyID);
if (!Ty)
- return error("Invalid record");
+ return error("Invalid phi record");
// Phi arguments are pairs of records of [value, basic block].
// There is an optional final record for fast-math-flags if this phi has a
// floating-point type.
size_t NumArgs = (Record.size() - 1) / 2;
PHINode *PN = PHINode::Create(Ty, NumArgs);
- if ((Record.size() - 1) % 2 == 1 && !isa<FPMathOperator>(PN))
- return error("Invalid record");
+ if ((Record.size() - 1) % 2 == 1 && !isa<FPMathOperator>(PN)) {
+ PN->deleteValue();
+ return error("Invalid phi record");
+ }
InstructionList.push_back(PN);
+ SmallDenseMap<BasicBlock *, Value *> Args;
for (unsigned i = 0; i != NumArgs; i++) {
- Value *V;
+ BasicBlock *BB = getBasicBlock(Record[i * 2 + 2]);
+ if (!BB) {
+ PN->deleteValue();
+ return error("Invalid phi BB");
+ }
+
+ // Phi nodes may contain the same predecessor multiple times, in which
+ // case the incoming value must be identical. Directly reuse the already
+ // seen value here, to avoid expanding a constant expression multiple
+ // times.
+ auto It = Args.find(BB);
+ if (It != Args.end()) {
+ PN->addIncoming(It->second, BB);
+ continue;
+ }
+
+ // If there already is a block for this edge (from a different phi),
+ // use it.
+ BasicBlock *EdgeBB = ConstExprEdgeBBs.lookup({BB, CurBB});
+ if (!EdgeBB) {
+ // Otherwise, use a temporary block (that we will discard if it
+ // turns out to be unnecessary).
+ if (!PhiConstExprBB)
+ PhiConstExprBB = BasicBlock::Create(Context, "phi.constexpr", F);
+ EdgeBB = PhiConstExprBB;
+ }
+
// With the new function encoding, it is possible that operands have
// negative IDs (for forward references). Use a signed VBR
// representation to keep the encoding small.
+ Value *V;
if (UseRelativeIDs)
- V = getValueSigned(Record, i * 2 + 1, NextValueNo, Ty);
+ V = getValueSigned(Record, i * 2 + 1, NextValueNo, Ty, TyID, EdgeBB);
else
- V = getValue(Record, i * 2 + 1, NextValueNo, Ty);
- BasicBlock *BB = getBasicBlock(Record[i * 2 + 2]);
- if (!V || !BB)
- return error("Invalid record");
+ V = getValue(Record, i * 2 + 1, NextValueNo, Ty, TyID, EdgeBB);
+ if (!V) {
+ PN->deleteValue();
+ PhiConstExprBB->eraseFromParent();
+ return error("Invalid phi record");
+ }
+
+ if (EdgeBB == PhiConstExprBB && !EdgeBB->empty()) {
+ ConstExprEdgeBBs.insert({{BB, CurBB}, EdgeBB});
+ PhiConstExprBB = nullptr;
+ }
PN->addIncoming(V, BB);
+ Args.insert({BB, V});
}
I = PN;
+ ResTypeID = TyID;
// If there are an even number of records, the final record must be FMF.
if (Record.size() % 2 == 0) {
@@ -4942,12 +5618,15 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
if (Record.size() < 4)
return error("Invalid record");
}
- Type *Ty = getTypeByID(Record[Idx++]);
+ ResTypeID = Record[Idx++];
+ Type *Ty = getTypeByID(ResTypeID);
if (!Ty)
return error("Invalid record");
if (BitCode == bitc::FUNC_CODE_INST_LANDINGPAD_OLD) {
Value *PersFn = nullptr;
- if (getValueTypePair(Record, Idx, NextValueNo, PersFn))
+ unsigned PersFnTypeID;
+ if (getValueTypePair(Record, Idx, NextValueNo, PersFn, PersFnTypeID,
+ nullptr))
return error("Invalid record");
if (!F->hasPersonalityFn())
@@ -4964,8 +5643,10 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
LandingPadInst::ClauseType CT =
LandingPadInst::ClauseType(Record[Idx++]); (void)CT;
Value *Val;
+ unsigned ValTypeID;
- if (getValueTypePair(Record, Idx, NextValueNo, Val)) {
+ if (getValueTypePair(Record, Idx, NextValueNo, Val, ValTypeID,
+ nullptr)) {
delete LP;
return error("Invalid record");
}
@@ -4985,21 +5666,23 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
}
case bitc::FUNC_CODE_INST_ALLOCA: { // ALLOCA: [instty, opty, op, align]
- if (Record.size() != 4)
+ if (Record.size() != 4 && Record.size() != 5)
return error("Invalid record");
using APV = AllocaPackedValues;
const uint64_t Rec = Record[3];
const bool InAlloca = Bitfield::get<APV::UsedWithInAlloca>(Rec);
const bool SwiftError = Bitfield::get<APV::SwiftError>(Rec);
- Type *Ty = getTypeByID(Record[0]);
+ unsigned TyID = Record[0];
+ Type *Ty = getTypeByID(TyID);
if (!Bitfield::get<APV::ExplicitType>(Rec)) {
- auto *PTy = dyn_cast_or_null<PointerType>(Ty);
- if (!PTy)
- return error("Old-style alloca with a non-pointer type");
- Ty = PTy->getPointerElementType();
+ TyID = getContainedTypeID(TyID);
+ Ty = getTypeByID(TyID);
+ if (!Ty)
+ return error("Missing element type for old-style alloca");
}
- Type *OpTy = getTypeByID(Record[1]);
- Value *Size = getFnValueByID(Record[2], OpTy);
+ unsigned OpTyID = Record[1];
+ Type *OpTy = getTypeByID(OpTyID);
+ Value *Size = getFnValueByID(Record[2], OpTy, OpTyID, CurBB);
MaybeAlign Align;
uint64_t AlignExp =
Bitfield::get<APV::AlignLower>(Rec) |
@@ -5010,9 +5693,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
if (!Ty || !Size)
return error("Invalid record");
- // FIXME: Make this an optional field.
const DataLayout &DL = TheModule->getDataLayout();
- unsigned AS = DL.getAllocaAddrSpace();
+ unsigned AS = Record.size() == 5 ? Record[4] : DL.getAllocaAddrSpace();
SmallPtrSet<Type *, 4> Visited;
if (!Align && !Ty->isSized(&Visited))
@@ -5024,13 +5706,15 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
AI->setUsedWithInAlloca(InAlloca);
AI->setSwiftError(SwiftError);
I = AI;
+ ResTypeID = getVirtualTypeID(AI->getType(), TyID);
InstructionList.push_back(I);
break;
}
case bitc::FUNC_CODE_INST_LOAD: { // LOAD: [opty, op, align, vol]
unsigned OpNum = 0;
Value *Op;
- if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
+ unsigned OpTypeID;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Op, OpTypeID, CurBB) ||
(OpNum + 2 != Record.size() && OpNum + 3 != Record.size()))
return error("Invalid record");
@@ -5039,9 +5723,13 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
Type *Ty = nullptr;
if (OpNum + 3 == Record.size()) {
- Ty = getTypeByID(Record[OpNum++]);
+ ResTypeID = Record[OpNum++];
+ Ty = getTypeByID(ResTypeID);
} else {
- Ty = Op->getType()->getPointerElementType();
+ ResTypeID = getContainedTypeID(OpTypeID);
+ Ty = getTypeByID(ResTypeID);
+ if (!Ty)
+ return error("Missing element type for old-style load");
}
if (Error Err = typeCheckLoadStoreInst(Ty, Op->getType()))
@@ -5063,7 +5751,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
// LOADATOMIC: [opty, op, align, vol, ordering, ssid]
unsigned OpNum = 0;
Value *Op;
- if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
+ unsigned OpTypeID;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Op, OpTypeID, CurBB) ||
(OpNum + 4 != Record.size() && OpNum + 5 != Record.size()))
return error("Invalid record");
@@ -5072,9 +5761,13 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
Type *Ty = nullptr;
if (OpNum + 5 == Record.size()) {
- Ty = getTypeByID(Record[OpNum++]);
+ ResTypeID = Record[OpNum++];
+ Ty = getTypeByID(ResTypeID);
} else {
- Ty = Op->getType()->getPointerElementType();
+ ResTypeID = getContainedTypeID(OpTypeID);
+ Ty = getTypeByID(ResTypeID);
+ if (!Ty)
+ return error("Missing element type for old style atomic load");
}
if (Error Err = typeCheckLoadStoreInst(Ty, Op->getType()))
@@ -5102,12 +5795,21 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
case bitc::FUNC_CODE_INST_STORE_OLD: { // STORE2:[ptrty, ptr, val, align, vol]
unsigned OpNum = 0;
Value *Val, *Ptr;
- if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
- (BitCode == bitc::FUNC_CODE_INST_STORE
- ? getValueTypePair(Record, OpNum, NextValueNo, Val)
- : popValue(Record, OpNum, NextValueNo,
- Ptr->getType()->getPointerElementType(), Val)) ||
- OpNum + 2 != Record.size())
+ unsigned PtrTypeID, ValTypeID;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Ptr, PtrTypeID, CurBB))
+ return error("Invalid record");
+
+ if (BitCode == bitc::FUNC_CODE_INST_STORE) {
+ if (getValueTypePair(Record, OpNum, NextValueNo, Val, ValTypeID, CurBB))
+ return error("Invalid record");
+ } else {
+ ValTypeID = getContainedTypeID(PtrTypeID);
+ if (popValue(Record, OpNum, NextValueNo, getTypeByID(ValTypeID),
+ ValTypeID, Val, CurBB))
+ return error("Invalid record");
+ }
+
+ if (OpNum + 2 != Record.size())
return error("Invalid record");
if (Error Err = typeCheckLoadStoreInst(Val->getType(), Ptr->getType()))
@@ -5129,13 +5831,21 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
// STOREATOMIC: [ptrty, ptr, val, align, vol, ordering, ssid]
unsigned OpNum = 0;
Value *Val, *Ptr;
- if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
- !isa<PointerType>(Ptr->getType()) ||
- (BitCode == bitc::FUNC_CODE_INST_STOREATOMIC
- ? getValueTypePair(Record, OpNum, NextValueNo, Val)
- : popValue(Record, OpNum, NextValueNo,
- Ptr->getType()->getPointerElementType(), Val)) ||
- OpNum + 4 != Record.size())
+ unsigned PtrTypeID, ValTypeID;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Ptr, PtrTypeID, CurBB) ||
+ !isa<PointerType>(Ptr->getType()))
+ return error("Invalid record");
+ if (BitCode == bitc::FUNC_CODE_INST_STOREATOMIC) {
+ if (getValueTypePair(Record, OpNum, NextValueNo, Val, ValTypeID, CurBB))
+ return error("Invalid record");
+ } else {
+ ValTypeID = getContainedTypeID(PtrTypeID);
+ if (popValue(Record, OpNum, NextValueNo, getTypeByID(ValTypeID),
+ ValTypeID, Val, CurBB))
+ return error("Invalid record");
+ }
+
+ if (OpNum + 4 != Record.size())
return error("Invalid record");
if (Error Err = typeCheckLoadStoreInst(Val->getType(), Ptr->getType()))
@@ -5164,20 +5874,22 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
const size_t NumRecords = Record.size();
unsigned OpNum = 0;
Value *Ptr = nullptr;
- if (getValueTypePair(Record, OpNum, NextValueNo, Ptr))
+ unsigned PtrTypeID;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Ptr, PtrTypeID, CurBB))
return error("Invalid record");
if (!isa<PointerType>(Ptr->getType()))
return error("Cmpxchg operand is not a pointer type");
Value *Cmp = nullptr;
- if (popValue(Record, OpNum, NextValueNo,
- cast<PointerType>(Ptr->getType())->getPointerElementType(),
- Cmp))
+ unsigned CmpTypeID = getContainedTypeID(PtrTypeID);
+ if (popValue(Record, OpNum, NextValueNo, getTypeByID(CmpTypeID),
+ CmpTypeID, Cmp, CurBB))
return error("Invalid record");
Value *New = nullptr;
- if (popValue(Record, OpNum, NextValueNo, Cmp->getType(), New) ||
+ if (popValue(Record, OpNum, NextValueNo, Cmp->getType(), CmpTypeID,
+ New, CurBB) ||
NumRecords < OpNum + 3 || NumRecords > OpNum + 5)
return error("Invalid record");
@@ -5214,8 +5926,11 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
// expecting the first component of a modern cmpxchg.
CurBB->getInstList().push_back(I);
I = ExtractValueInst::Create(I, 0);
+ ResTypeID = CmpTypeID;
} else {
cast<AtomicCmpXchgInst>(I)->setWeak(Record[OpNum + 4]);
+ unsigned I1TypeID = getVirtualTypeID(Type::getInt1Ty(Context));
+ ResTypeID = getVirtualTypeID(I->getType(), {CmpTypeID, I1TypeID});
}
InstructionList.push_back(I);
@@ -5227,18 +5942,21 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
const size_t NumRecords = Record.size();
unsigned OpNum = 0;
Value *Ptr = nullptr;
- if (getValueTypePair(Record, OpNum, NextValueNo, Ptr))
+ unsigned PtrTypeID;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Ptr, PtrTypeID, CurBB))
return error("Invalid record");
if (!isa<PointerType>(Ptr->getType()))
return error("Cmpxchg operand is not a pointer type");
Value *Cmp = nullptr;
- if (getValueTypePair(Record, OpNum, NextValueNo, Cmp))
+ unsigned CmpTypeID;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Cmp, CmpTypeID, CurBB))
return error("Invalid record");
Value *Val = nullptr;
- if (popValue(Record, OpNum, NextValueNo, Cmp->getType(), Val))
+ if (popValue(Record, OpNum, NextValueNo, Cmp->getType(), CmpTypeID, Val,
+ CurBB))
return error("Invalid record");
if (NumRecords < OpNum + 3 || NumRecords > OpNum + 6)
@@ -5278,6 +5996,9 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
cast<AtomicCmpXchgInst>(I)->setVolatile(IsVol);
cast<AtomicCmpXchgInst>(I)->setWeak(IsWeak);
+ unsigned I1TypeID = getVirtualTypeID(Type::getInt1Ty(Context));
+ ResTypeID = getVirtualTypeID(I->getType(), {CmpTypeID, I1TypeID});
+
InstructionList.push_back(I);
break;
}
@@ -5289,20 +6010,22 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
unsigned OpNum = 0;
Value *Ptr = nullptr;
- if (getValueTypePair(Record, OpNum, NextValueNo, Ptr))
+ unsigned PtrTypeID;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Ptr, PtrTypeID, CurBB))
return error("Invalid record");
if (!isa<PointerType>(Ptr->getType()))
return error("Invalid record");
Value *Val = nullptr;
+ unsigned ValTypeID = InvalidTypeID;
if (BitCode == bitc::FUNC_CODE_INST_ATOMICRMW_OLD) {
+ ValTypeID = getContainedTypeID(PtrTypeID);
if (popValue(Record, OpNum, NextValueNo,
- cast<PointerType>(Ptr->getType())->getPointerElementType(),
- Val))
+ getTypeByID(ValTypeID), ValTypeID, Val, CurBB))
return error("Invalid record");
} else {
- if (getValueTypePair(Record, OpNum, NextValueNo, Val))
+ if (getValueTypePair(Record, OpNum, NextValueNo, Val, ValTypeID, CurBB))
return error("Invalid record");
}
@@ -5336,6 +6059,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
Align(TheModule->getDataLayout().getTypeStoreSize(Val->getType()));
I = new AtomicRMWInst(Operation, Ptr, Val, *Alignment, Ordering, SSID);
+ ResTypeID = ValTypeID;
cast<AtomicRMWInst>(I)->setVolatile(IsVol);
InstructionList.push_back(I);
@@ -5370,23 +6094,27 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
return error("Fast math flags indicator set for call with no FMF");
}
+ unsigned FTyID = InvalidTypeID;
FunctionType *FTy = nullptr;
if ((CCInfo >> bitc::CALL_EXPLICIT_TYPE) & 1) {
- FTy = dyn_cast<FunctionType>(getTypeByID(Record[OpNum++]));
+ FTyID = Record[OpNum++];
+ FTy = dyn_cast_or_null<FunctionType>(getTypeByID(FTyID));
if (!FTy)
return error("Explicit call type is not a function type");
}
Value *Callee;
- if (getValueTypePair(Record, OpNum, NextValueNo, Callee))
+ unsigned CalleeTypeID;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Callee, CalleeTypeID,
+ CurBB))
return error("Invalid record");
PointerType *OpTy = dyn_cast<PointerType>(Callee->getType());
if (!OpTy)
return error("Callee is not a pointer type");
if (!FTy) {
- FTy =
- dyn_cast<FunctionType>(Callee->getType()->getPointerElementType());
+ FTyID = getContainedTypeID(CalleeTypeID);
+ FTy = dyn_cast_or_null<FunctionType>(getTypeByID(FTyID));
if (!FTy)
return error("Callee is not of pointer to function type");
} else if (!OpTy->isOpaqueOrPointeeTypeMatches(FTy))
@@ -5396,15 +6124,16 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
return error("Insufficient operands to call");
SmallVector<Value*, 16> Args;
- SmallVector<Type *, 16> ArgsTys;
+ SmallVector<unsigned, 16> ArgTyIDs;
// Read the fixed params.
for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) {
+ unsigned ArgTyID = getContainedTypeID(FTyID, i + 1);
if (FTy->getParamType(i)->isLabelTy())
Args.push_back(getBasicBlock(Record[OpNum]));
else
Args.push_back(getValue(Record, OpNum, NextValueNo,
- FTy->getParamType(i)));
- ArgsTys.push_back(FTy->getParamType(i));
+ FTy->getParamType(i), ArgTyID, CurBB));
+ ArgTyIDs.push_back(ArgTyID);
if (!Args.back())
return error("Invalid record");
}
@@ -5416,14 +6145,20 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
} else {
while (OpNum != Record.size()) {
Value *Op;
- if (getValueTypePair(Record, OpNum, NextValueNo, Op))
+ unsigned OpTypeID;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Op, OpTypeID, CurBB))
return error("Invalid record");
Args.push_back(Op);
- ArgsTys.push_back(Op->getType());
+ ArgTyIDs.push_back(OpTypeID);
}
}
+ // Upgrade the bundles if needed.
+ if (!OperandBundles.empty())
+ UpgradeOperandBundles(OperandBundles);
+
I = CallInst::Create(FTy, Callee, Args, OperandBundles);
+ ResTypeID = getContainedTypeID(FTyID);
OperandBundles.clear();
InstructionList.push_back(I);
cast<CallInst>(I)->setCallingConv(
@@ -5437,7 +6172,10 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
TCK = CallInst::TCK_NoTail;
cast<CallInst>(I)->setTailCallKind(TCK);
cast<CallInst>(I)->setAttributes(PAL);
- propagateAttributeTypes(cast<CallBase>(I), ArgsTys);
+ if (Error Err = propagateAttributeTypes(cast<CallBase>(I), ArgTyIDs)) {
+ I->deleteValue();
+ return Err;
+ }
if (FMF.any()) {
if (!isa<FPMathOperator>(I))
return error("Fast-math-flags specified for call without "
@@ -5449,9 +6187,11 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
case bitc::FUNC_CODE_INST_VAARG: { // VAARG: [valistty, valist, instty]
if (Record.size() < 3)
return error("Invalid record");
- Type *OpTy = getTypeByID(Record[0]);
- Value *Op = getValue(Record, 1, NextValueNo, OpTy);
- Type *ResTy = getTypeByID(Record[2]);
+ unsigned OpTyID = Record[0];
+ Type *OpTy = getTypeByID(OpTyID);
+ Value *Op = getValue(Record, 1, NextValueNo, OpTy, OpTyID, CurBB);
+ ResTypeID = Record[2];
+ Type *ResTy = getTypeByID(ResTypeID);
if (!OpTy || !Op || !ResTy)
return error("Invalid record");
I = new VAArgInst(Op, ResTy);
@@ -5472,7 +6212,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
unsigned OpNum = 1;
while (OpNum != Record.size()) {
Value *Op;
- if (getValueTypePair(Record, OpNum, NextValueNo, Op))
+ unsigned OpTypeID;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Op, OpTypeID, CurBB))
return error("Invalid record");
Inputs.push_back(Op);
}
@@ -5484,12 +6225,14 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
case bitc::FUNC_CODE_INST_FREEZE: { // FREEZE: [opty,opval]
unsigned OpNum = 0;
Value *Op = nullptr;
- if (getValueTypePair(Record, OpNum, NextValueNo, Op))
+ unsigned OpTypeID;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Op, OpTypeID, CurBB))
return error("Invalid record");
if (OpNum != Record.size())
return error("Invalid record");
I = new FreezeInst(Op);
+ ResTypeID = OpTypeID;
InstructionList.push_back(I);
break;
}
@@ -5514,8 +6257,12 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
}
// Non-void values get registered in the value table for future use.
- if (!I->getType()->isVoidTy())
- ValueList.assignValue(I, NextValueNo++);
+ if (!I->getType()->isVoidTy()) {
+ assert(I->getType() == getTypeByID(ResTypeID) &&
+ "Incorrect result type ID");
+ if (Error Err = ValueList.assignValue(NextValueNo++, I, ResTypeID))
+ return Err;
+ }
}
OutOfRecordLoop:
@@ -5541,6 +6288,19 @@ OutOfRecordLoop:
if (MDLoader->hasFwdRefs())
return error("Invalid function metadata: outgoing forward refs");
+ if (PhiConstExprBB)
+ PhiConstExprBB->eraseFromParent();
+
+ for (const auto &Pair : ConstExprEdgeBBs) {
+ BasicBlock *From = Pair.first.first;
+ BasicBlock *To = Pair.first.second;
+ BasicBlock *EdgeBB = Pair.second;
+ BranchInst::Create(To, EdgeBB);
+ From->getTerminator()->replaceSuccessorWith(To, EdgeBB);
+ To->replacePhiUsesWith(From, EdgeBB);
+ EdgeBB->moveBefore(To);
+ }
+
// Trim the value list down to the size it was before we parsed this function.
ValueList.shrinkTo(ModuleValueListSize);
MDLoader->shrinkTo(ModuleMDLoaderSize);
@@ -5913,8 +6673,8 @@ Error ModuleSummaryIndexBitcodeReader::parseModule() {
break;
case bitc::BLOCKINFO_BLOCK_ID:
// Need to parse these to get abbrev ids (e.g. for VST)
- if (readBlockInfo())
- return error("Malformed block");
+ if (Error Err = readBlockInfo())
+ return Err;
break;
case bitc::VALUE_SYMTAB_BLOCK_ID:
// Should have been parsed earlier via VSTOffset, unless there
diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
index 0f4111514057..0d57ae4ef9df 100644
--- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
@@ -9,74 +9,60 @@
#include "MetadataLoader.h"
#include "ValueList.h"
-#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/None.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/ilist_iterator.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/Bitcode/LLVMBitCodes.h"
#include "llvm/Bitstream/BitstreamReader.h"
-#include "llvm/IR/Argument.h"
-#include "llvm/IR/Attributes.h"
#include "llvm/IR/AutoUpgrade.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallingConv.h"
-#include "llvm/IR/Comdat.h"
-#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/DebugLoc.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/GVMaterializer.h"
-#include "llvm/IR/GlobalAlias.h"
-#include "llvm/IR/GlobalIFunc.h"
#include "llvm/IR/GlobalObject.h"
-#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/InlineAsm.h"
-#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/ModuleSummaryIndex.h"
-#include "llvm/IR/OperandTraits.h"
#include "llvm/IR/TrackingMDRef.h"
#include "llvm/IR/Type.h"
-#include "llvm/IR/ValueHandle.h"
-#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/type_traits.h"
+
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <deque>
+#include <iterator>
#include <limits>
-#include <map>
#include <string>
-#include <system_error>
#include <tuple>
+#include <type_traits>
#include <utility>
#include <vector>
+namespace llvm {
+class Argument;
+}
using namespace llvm;
@@ -678,8 +664,8 @@ public:
bool hasSeenOldLoopTags() const { return HasSeenOldLoopTags; }
- Error parseMetadataAttachment(
- Function &F, const SmallVectorImpl<Instruction *> &InstructionList);
+ Error parseMetadataAttachment(Function &F,
+ ArrayRef<Instruction *> InstructionList);
Error parseMetadataKinds();
@@ -1233,14 +1219,16 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
break;
}
- Type *Ty = getTypeByID(Record[0]);
+ unsigned TyID = Record[0];
+ Type *Ty = getTypeByID(TyID);
if (Ty->isMetadataTy() || Ty->isVoidTy()) {
dropRecord();
break;
}
MetadataList.assignValue(
- LocalAsMetadata::get(ValueList.getValueFwdRef(Record[1], Ty)),
+ LocalAsMetadata::get(ValueList.getValueFwdRef(
+ Record[1], Ty, TyID, /*ConstExprInsertBB*/ nullptr)),
NextMetadataNo);
NextMetadataNo++;
break;
@@ -1253,14 +1241,15 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
unsigned Size = Record.size();
SmallVector<Metadata *, 8> Elts;
for (unsigned i = 0; i != Size; i += 2) {
- Type *Ty = getTypeByID(Record[i]);
+ unsigned TyID = Record[i];
+ Type *Ty = getTypeByID(TyID);
if (!Ty)
return error("Invalid record");
if (Ty->isMetadataTy())
Elts.push_back(getMD(Record[i + 1]));
else if (!Ty->isVoidTy()) {
- auto *MD =
- ValueAsMetadata::get(ValueList.getValueFwdRef(Record[i + 1], Ty));
+ auto *MD = ValueAsMetadata::get(ValueList.getValueFwdRef(
+ Record[i + 1], Ty, TyID, /*ConstExprInsertBB*/ nullptr));
assert(isa<ConstantAsMetadata>(MD) &&
"Expected non-function-local metadata");
Elts.push_back(MD);
@@ -1275,12 +1264,14 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
if (Record.size() != 2)
return error("Invalid record");
- Type *Ty = getTypeByID(Record[0]);
+ unsigned TyID = Record[0];
+ Type *Ty = getTypeByID(TyID);
if (Ty->isMetadataTy() || Ty->isVoidTy())
return error("Invalid record");
MetadataList.assignValue(
- ValueAsMetadata::get(ValueList.getValueFwdRef(Record[1], Ty)),
+ ValueAsMetadata::get(ValueList.getValueFwdRef(
+ Record[1], Ty, TyID, /*ConstExprInsertBB*/ nullptr)),
NextMetadataNo);
NextMetadataNo++;
break;
@@ -1514,6 +1505,15 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
Tag == dwarf::DW_TAG_structure_type ||
Tag == dwarf::DW_TAG_union_type)) {
Flags = Flags | DINode::FlagFwdDecl;
+ if (Name) {
+ // This is a hack around preserving template parameters for simplified
+ // template names - it should probably be replaced with a
+ // DICompositeType flag specifying whether template parameters are
+ // required on declarations of this type.
+ StringRef NameStr = Name->getString();
+ if (!NameStr.contains('<') || NameStr.startswith("_STN|"))
+ TemplateParams = getMDOrNull(Record[14]);
+ }
} else {
BaseType = getDITypeRefOrNull(Record[6]);
OffsetInBits = Record[9];
@@ -1700,6 +1700,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
bool HasThisAdj = true;
bool HasThrownTypes = true;
bool HasAnnotations = false;
+ bool HasTargetFuncName = false;
unsigned OffsetA = 0;
unsigned OffsetB = 0;
if (!HasSPFlags) {
@@ -1713,6 +1714,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
HasThrownTypes = Record.size() >= 21;
} else {
HasAnnotations = Record.size() >= 19;
+ HasTargetFuncName = Record.size() >= 20;
}
Metadata *CUorFn = getMDOrNull(Record[12 + OffsetB]);
DISubprogram *SP = GET_OR_DISTINCT(
@@ -1737,7 +1739,9 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
HasThrownTypes ? getMDOrNull(Record[17 + OffsetB])
: nullptr, // thrownTypes
HasAnnotations ? getMDOrNull(Record[18 + OffsetB])
- : nullptr // annotations
+ : nullptr, // annotations
+ HasTargetFuncName ? getMDString(Record[19 + OffsetB])
+ : nullptr // targetFuncName
));
MetadataList.assignValue(SP, NextMetadataNo);
NextMetadataNo++;
@@ -2047,8 +2051,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
break;
}
case bitc::METADATA_IMPORTED_ENTITY: {
- if (Record.size() < 6 && Record.size() > 8)
- return error("Invalid record");
+ if (Record.size() < 6 || Record.size() > 8)
+ return error("Invalid DIImportedEntity record");
IsDistinct = Record[0];
bool HasFile = (Record.size() >= 7);
@@ -2181,7 +2185,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseGlobalObjectAttachment(
/// Parse metadata attachments.
Error MetadataLoader::MetadataLoaderImpl::parseMetadataAttachment(
- Function &F, const SmallVectorImpl<Instruction *> &InstructionList) {
+ Function &F, ArrayRef<Instruction *> InstructionList) {
if (Error Err = Stream.EnterSubBlock(bitc::METADATA_ATTACHMENT_ID))
return Err;
@@ -2357,7 +2361,7 @@ DISubprogram *MetadataLoader::lookupSubprogramForFunction(Function *F) {
}
Error MetadataLoader::parseMetadataAttachment(
- Function &F, const SmallVectorImpl<Instruction *> &InstructionList) {
+ Function &F, ArrayRef<Instruction *> InstructionList) {
return Pimpl->parseMetadataAttachment(F, InstructionList);
}
diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.h b/llvm/lib/Bitcode/Reader/MetadataLoader.h
index 709800850f0d..653f1402bead 100644
--- a/llvm/lib/Bitcode/Reader/MetadataLoader.h
+++ b/llvm/lib/Bitcode/Reader/MetadataLoader.h
@@ -13,7 +13,6 @@
#ifndef LLVM_LIB_BITCODE_READER_METADATALOADER_H
#define LLVM_LIB_BITCODE_READER_METADATALOADER_H
-#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Error.h"
#include <functional>
@@ -28,6 +27,7 @@ class Instruction;
class Metadata;
class Module;
class Type;
+template <typename T> class ArrayRef;
/// Helper class that handles loading Metadatas and keeping them available.
class MetadataLoader {
@@ -66,8 +66,8 @@ public:
DISubprogram *lookupSubprogramForFunction(Function *F);
/// Parse a `METADATA_ATTACHMENT` block for a function.
- Error parseMetadataAttachment(
- Function &F, const SmallVectorImpl<Instruction *> &InstructionList);
+ Error parseMetadataAttachment(Function &F,
+ ArrayRef<Instruction *> InstructionList);
/// Parse a `METADATA_KIND` block for the current module.
Error parseMetadataKinds();
diff --git a/llvm/lib/Bitcode/Reader/ValueList.cpp b/llvm/lib/Bitcode/Reader/ValueList.cpp
index 86ed664070f6..b9dbf904c89e 100644
--- a/llvm/lib/Bitcode/Reader/ValueList.cpp
+++ b/llvm/lib/Bitcode/Reader/ValueList.cpp
@@ -17,80 +17,44 @@
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
-#include <algorithm>
#include <cstddef>
-#include <limits>
using namespace llvm;
-namespace llvm {
-
-namespace {
-
-/// A class for maintaining the slot number definition
-/// as a placeholder for the actual definition for forward constants defs.
-class ConstantPlaceHolder : public ConstantExpr {
-public:
- explicit ConstantPlaceHolder(Type *Ty, LLVMContext &Context)
- : ConstantExpr(Ty, Instruction::UserOp1, &Op<0>(), 1) {
- Op<0>() = UndefValue::get(Type::getInt32Ty(Context));
- }
-
- ConstantPlaceHolder &operator=(const ConstantPlaceHolder &) = delete;
-
- // allocate space for exactly one operand
- void *operator new(size_t s) { return User::operator new(s, 1); }
-
- /// Methods to support type inquiry through isa, cast, and dyn_cast.
- static bool classof(const Value *V) {
- return isa<ConstantExpr>(V) &&
- cast<ConstantExpr>(V)->getOpcode() == Instruction::UserOp1;
- }
-
- /// Provide fast operand accessors
- DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-};
-
-} // end anonymous namespace
-
-// FIXME: can we inherit this from ConstantExpr?
-template <>
-struct OperandTraits<ConstantPlaceHolder>
- : public FixedNumOperandTraits<ConstantPlaceHolder, 1> {};
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ConstantPlaceHolder, Value)
-
-} // end namespace llvm
-
-void BitcodeReaderValueList::assignValue(Value *V, unsigned Idx) {
+Error BitcodeReaderValueList::assignValue(unsigned Idx, Value *V,
+ unsigned TypeID) {
if (Idx == size()) {
- push_back(V);
- return;
+ push_back(V, TypeID);
+ return Error::success();
}
if (Idx >= size())
resize(Idx + 1);
- WeakTrackingVH &OldV = ValuePtrs[Idx];
- if (!OldV) {
- OldV = V;
- return;
+ auto &Old = ValuePtrs[Idx];
+ if (!Old.first) {
+ Old.first = V;
+ Old.second = TypeID;
+ return Error::success();
}
- // Handle constants and non-constants (e.g. instrs) differently for
- // efficiency.
- if (Constant *PHC = dyn_cast<Constant>(&*OldV)) {
- ResolveConstants.push_back(std::make_pair(PHC, Idx));
- OldV = V;
- } else {
- // If there was a forward reference to this value, replace it.
- Value *PrevVal = OldV;
- OldV->replaceAllUsesWith(V);
- PrevVal->deleteValue();
- }
+ assert(!isa<Constant>(&*Old.first) && "Shouldn't update constant");
+ // If there was a forward reference to this value, replace it.
+ Value *PrevVal = Old.first;
+ if (PrevVal->getType() != V->getType())
+ return createStringError(
+ std::errc::illegal_byte_sequence,
+ "Assigned value does not match type of forward declaration");
+ Old.first->replaceAllUsesWith(V);
+ PrevVal->deleteValue();
+ return Error::success();
}
-Constant *BitcodeReaderValueList::getConstantFwdRef(unsigned Idx, Type *Ty) {
+Value *BitcodeReaderValueList::getValueFwdRef(unsigned Idx, Type *Ty,
+ unsigned TyID,
+ BasicBlock *ConstExprInsertBB) {
// Bail out for a clearly invalid value.
if (Idx >= RefsUpperBound)
return nullptr;
@@ -98,31 +62,18 @@ Constant *BitcodeReaderValueList::getConstantFwdRef(unsigned Idx, Type *Ty) {
if (Idx >= size())
resize(Idx + 1);
- if (Value *V = ValuePtrs[Idx]) {
- if (Ty != V->getType())
- report_fatal_error("Type mismatch in constant table!");
- return cast<Constant>(V);
- }
-
- // Create and return a placeholder, which will later be RAUW'd.
- Constant *C = new ConstantPlaceHolder(Ty, Context);
- ValuePtrs[Idx] = C;
- return C;
-}
-
-Value *BitcodeReaderValueList::getValueFwdRef(unsigned Idx, Type *Ty) {
- // Bail out for a clearly invalid value.
- if (Idx >= RefsUpperBound)
- return nullptr;
-
- if (Idx >= size())
- resize(Idx + 1);
-
- if (Value *V = ValuePtrs[Idx]) {
+ if (Value *V = ValuePtrs[Idx].first) {
// If the types don't match, it's invalid.
if (Ty && Ty != V->getType())
return nullptr;
- return V;
+
+ Expected<Value *> MaybeV = MaterializeValueFn(Idx, ConstExprInsertBB);
+ if (!MaybeV) {
+ // TODO: We might want to propagate the precise error message here.
+ consumeError(MaybeV.takeError());
+ return nullptr;
+ }
+ return MaybeV.get();
}
// No type specified, must be invalid reference.
@@ -131,86 +82,6 @@ Value *BitcodeReaderValueList::getValueFwdRef(unsigned Idx, Type *Ty) {
// Create and return a placeholder, which will later be RAUW'd.
Value *V = new Argument(Ty);
- ValuePtrs[Idx] = V;
+ ValuePtrs[Idx] = {V, TyID};
return V;
}
-
-/// Once all constants are read, this method bulk resolves any forward
-/// references. The idea behind this is that we sometimes get constants (such
-/// as large arrays) which reference *many* forward ref constants. Replacing
-/// each of these causes a lot of thrashing when building/reuniquing the
-/// constant. Instead of doing this, we look at all the uses and rewrite all
-/// the place holders at once for any constant that uses a placeholder.
-void BitcodeReaderValueList::resolveConstantForwardRefs() {
- // Sort the values by-pointer so that they are efficient to look up with a
- // binary search.
- llvm::sort(ResolveConstants);
-
- SmallVector<Constant *, 64> NewOps;
-
- while (!ResolveConstants.empty()) {
- Value *RealVal = operator[](ResolveConstants.back().second);
- Constant *Placeholder = ResolveConstants.back().first;
- ResolveConstants.pop_back();
-
- // Loop over all users of the placeholder, updating them to reference the
- // new value. If they reference more than one placeholder, update them all
- // at once.
- while (!Placeholder->use_empty()) {
- auto UI = Placeholder->user_begin();
- User *U = *UI;
-
- // If the using object isn't uniqued, just update the operands. This
- // handles instructions and initializers for global variables.
- if (!isa<Constant>(U) || isa<GlobalValue>(U)) {
- UI.getUse().set(RealVal);
- continue;
- }
-
- // Otherwise, we have a constant that uses the placeholder. Replace that
- // constant with a new constant that has *all* placeholder uses updated.
- Constant *UserC = cast<Constant>(U);
- for (User::op_iterator I = UserC->op_begin(), E = UserC->op_end(); I != E;
- ++I) {
- Value *NewOp;
- if (!isa<ConstantPlaceHolder>(*I)) {
- // Not a placeholder reference.
- NewOp = *I;
- } else if (*I == Placeholder) {
- // Common case is that it just references this one placeholder.
- NewOp = RealVal;
- } else {
- // Otherwise, look up the placeholder in ResolveConstants.
- ResolveConstantsTy::iterator It = llvm::lower_bound(
- ResolveConstants,
- std::pair<Constant *, unsigned>(cast<Constant>(*I), 0));
- assert(It != ResolveConstants.end() && It->first == *I);
- NewOp = operator[](It->second);
- }
-
- NewOps.push_back(cast<Constant>(NewOp));
- }
-
- // Make the new constant.
- Constant *NewC;
- if (ConstantArray *UserCA = dyn_cast<ConstantArray>(UserC)) {
- NewC = ConstantArray::get(UserCA->getType(), NewOps);
- } else if (ConstantStruct *UserCS = dyn_cast<ConstantStruct>(UserC)) {
- NewC = ConstantStruct::get(UserCS->getType(), NewOps);
- } else if (isa<ConstantVector>(UserC)) {
- NewC = ConstantVector::get(NewOps);
- } else {
- assert(isa<ConstantExpr>(UserC) && "Must be a ConstantExpr.");
- NewC = cast<ConstantExpr>(UserC)->getWithOperands(NewOps);
- }
-
- UserC->replaceAllUsesWith(NewC);
- UserC->destroyConstant();
- NewOps.clear();
- }
-
- // Update all ValueHandles, they should be the only users at this point.
- Placeholder->replaceAllUsesWith(RealVal);
- delete cast<ConstantPlaceHolder>(Placeholder);
- }
-}
diff --git a/llvm/lib/Bitcode/Reader/ValueList.h b/llvm/lib/Bitcode/Reader/ValueList.h
index a39617018f42..995d46f01f75 100644
--- a/llvm/lib/Bitcode/Reader/ValueList.h
+++ b/llvm/lib/Bitcode/Reader/ValueList.h
@@ -14,6 +14,7 @@
#define LLVM_LIB_BITCODE_READER_VALUELIST_H
#include "llvm/IR/ValueHandle.h"
+#include "llvm/Support/Error.h"
#include <cassert>
#include <utility>
#include <vector>
@@ -21,56 +22,53 @@
namespace llvm {
class Constant;
-class LLVMContext;
+class Error;
class Type;
class Value;
class BitcodeReaderValueList {
- std::vector<WeakTrackingVH> ValuePtrs;
-
- /// As we resolve forward-referenced constants, we add information about them
- /// to this vector. This allows us to resolve them in bulk instead of
- /// resolving each reference at a time. See the code in
- /// ResolveConstantForwardRefs for more information about this.
- ///
- /// The key of this vector is the placeholder constant, the value is the slot
- /// number that holds the resolved value.
- using ResolveConstantsTy = std::vector<std::pair<Constant *, unsigned>>;
- ResolveConstantsTy ResolveConstants;
- LLVMContext &Context;
+ /// Maps Value ID to pair of Value* and Type ID.
+ std::vector<std::pair<WeakTrackingVH, unsigned>> ValuePtrs;
/// Maximum number of valid references. Forward references exceeding the
/// maximum must be invalid.
unsigned RefsUpperBound;
-public:
- BitcodeReaderValueList(LLVMContext &C, size_t RefsUpperBound)
- : Context(C),
- RefsUpperBound(std::min((size_t)std::numeric_limits<unsigned>::max(),
- RefsUpperBound)) {}
+ using MaterializeValueFnTy =
+ std::function<Expected<Value *>(unsigned, BasicBlock *)>;
+ MaterializeValueFnTy MaterializeValueFn;
- ~BitcodeReaderValueList() {
- assert(ResolveConstants.empty() && "Constants not resolved?");
- }
+public:
+ BitcodeReaderValueList(size_t RefsUpperBound,
+ MaterializeValueFnTy MaterializeValueFn)
+ : RefsUpperBound(std::min((size_t)std::numeric_limits<unsigned>::max(),
+ RefsUpperBound)),
+ MaterializeValueFn(MaterializeValueFn) {}
// vector compatibility methods
unsigned size() const { return ValuePtrs.size(); }
void resize(unsigned N) {
ValuePtrs.resize(N);
}
- void push_back(Value *V) { ValuePtrs.emplace_back(V); }
+ void push_back(Value *V, unsigned TypeID) {
+ ValuePtrs.emplace_back(V, TypeID);
+ }
void clear() {
- assert(ResolveConstants.empty() && "Constants not resolved?");
ValuePtrs.clear();
}
Value *operator[](unsigned i) const {
assert(i < ValuePtrs.size());
- return ValuePtrs[i];
+ return ValuePtrs[i].first;
}
- Value *back() const { return ValuePtrs.back(); }
+ unsigned getTypeID(unsigned ValNo) const {
+ assert(ValNo < ValuePtrs.size());
+ return ValuePtrs[ValNo].second;
+ }
+
+ Value *back() const { return ValuePtrs.back().first; }
void pop_back() {
ValuePtrs.pop_back();
}
@@ -81,14 +79,15 @@ public:
ValuePtrs.resize(N);
}
- Constant *getConstantFwdRef(unsigned Idx, Type *Ty);
- Value *getValueFwdRef(unsigned Idx, Type *Ty);
+ void replaceValueWithoutRAUW(unsigned ValNo, Value *NewV) {
+ assert(ValNo < ValuePtrs.size());
+ ValuePtrs[ValNo].first = NewV;
+ }
- void assignValue(Value *V, unsigned Idx);
+ Value *getValueFwdRef(unsigned Idx, Type *Ty, unsigned TyID,
+ BasicBlock *ConstExprInsertBB);
- /// Once all constants are read, this method bulk resolves any forward
- /// references.
- void resolveConstantForwardRefs();
+ Error assignValue(unsigned Idx, Value *V, unsigned TypeID);
};
} // end namespace llvm
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 4bba0b356675..941ed808bab1 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -19,6 +19,8 @@
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
@@ -610,6 +612,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
switch (Kind) {
case Attribute::Alignment:
return bitc::ATTR_KIND_ALIGNMENT;
+ case Attribute::AllocAlign:
+ return bitc::ATTR_KIND_ALLOC_ALIGN;
case Attribute::AllocSize:
return bitc::ATTR_KIND_ALLOC_SIZE;
case Attribute::AlwaysInline:
@@ -644,6 +648,10 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
return bitc::ATTR_KIND_JUMP_TABLE;
case Attribute::MinSize:
return bitc::ATTR_KIND_MIN_SIZE;
+ case Attribute::AllocatedPointer:
+ return bitc::ATTR_KIND_ALLOCATED_POINTER;
+ case Attribute::AllocKind:
+ return bitc::ATTR_KIND_ALLOC_KIND;
case Attribute::Naked:
return bitc::ATTR_KIND_NAKED;
case Attribute::Nest:
@@ -688,6 +696,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
return bitc::ATTR_KIND_NO_PROFILE;
case Attribute::NoUnwind:
return bitc::ATTR_KIND_NO_UNWIND;
+ case Attribute::NoSanitizeBounds:
+ return bitc::ATTR_KIND_NO_SANITIZE_BOUNDS;
case Attribute::NoSanitizeCoverage:
return bitc::ATTR_KIND_NO_SANITIZE_COVERAGE;
case Attribute::NullPointerIsValid:
@@ -764,6 +774,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
return bitc::ATTR_KIND_BYREF;
case Attribute::MustProgress:
return bitc::ATTR_KIND_MUSTPROGRESS;
+ case Attribute::PresplitCoroutine:
+ return bitc::ATTR_KIND_PRESPLIT_COROUTINE;
case Attribute::EndAttrKinds:
llvm_unreachable("Can not encode end-attribute kinds marker.");
case Attribute::None:
@@ -1013,6 +1025,8 @@ void ModuleBitcodeWriter::writeTypeTable() {
TypeVals.push_back(true);
break;
}
+ case Type::DXILPointerTyID:
+ llvm_unreachable("DXIL pointers cannot be added to IR modules");
}
// Emit the finished record.
@@ -1211,6 +1225,14 @@ static StringEncoding getStringEncoding(StringRef Str) {
return SE_Fixed7;
}
+static_assert(sizeof(GlobalValue::SanitizerMetadata) <= sizeof(unsigned),
+ "Sanitizer Metadata is too large for naive serialization.");
+static unsigned
+serializeSanitizerMetadata(const GlobalValue::SanitizerMetadata &Meta) {
+ return Meta.NoAddress | (Meta.NoHWAddress << 1) |
+ (Meta.NoMemtag << 2) | (Meta.IsDynInit << 3);
+}
+
/// Emit top-level description of module, including target triple, inline asm,
/// descriptors for global variables, and function prototype info.
/// Returns the bit offset to backpatch with the location of the real VST.
@@ -1334,7 +1356,7 @@ void ModuleBitcodeWriter::writeModuleInfo() {
// GLOBALVAR: [strtab offset, strtab size, type, isconst, initid,
// linkage, alignment, section, visibility, threadlocal,
// unnamed_addr, externally_initialized, dllstorageclass,
- // comdat, attributes, DSO_Local]
+ // comdat, attributes, DSO_Local, GlobalSanitizer]
Vals.push_back(addToStrtab(GV.getName()));
Vals.push_back(GV.getName().size());
Vals.push_back(VE.getTypeID(GV.getValueType()));
@@ -1350,10 +1372,8 @@ void ModuleBitcodeWriter::writeModuleInfo() {
GV.getUnnamedAddr() != GlobalValue::UnnamedAddr::None ||
GV.isExternallyInitialized() ||
GV.getDLLStorageClass() != GlobalValue::DefaultStorageClass ||
- GV.hasComdat() ||
- GV.hasAttributes() ||
- GV.isDSOLocal() ||
- GV.hasPartition()) {
+ GV.hasComdat() || GV.hasAttributes() || GV.isDSOLocal() ||
+ GV.hasPartition() || GV.hasSanitizerMetadata()) {
Vals.push_back(getEncodedVisibility(GV));
Vals.push_back(getEncodedThreadLocalMode(GV));
Vals.push_back(getEncodedUnnamedAddr(GV));
@@ -1367,6 +1387,10 @@ void ModuleBitcodeWriter::writeModuleInfo() {
Vals.push_back(GV.isDSOLocal());
Vals.push_back(addToStrtab(GV.getPartition()));
Vals.push_back(GV.getPartition().size());
+
+ Vals.push_back((GV.hasSanitizerMetadata() ? serializeSanitizerMetadata(
+ GV.getSanitizerMetadata())
+ : 0));
} else {
AbbrevToUse = SimpleGVarAbbrev;
}
@@ -1817,6 +1841,7 @@ void ModuleBitcodeWriter::writeDISubprogram(const DISubprogram *N,
Record.push_back(N->getThisAdjustment());
Record.push_back(VE.getMetadataOrNullID(N->getThrownTypes().get()));
Record.push_back(VE.getMetadataOrNullID(N->getAnnotations().get()));
+ Record.push_back(VE.getMetadataOrNullID(N->getRawTargetFuncName()));
Stream.EmitRecord(bitc::METADATA_SUBPROGRAM, Record, Abbrev);
Record.clear();
@@ -2649,6 +2674,9 @@ void ModuleBitcodeWriter::writeConstants(unsigned FirstVal, unsigned LastVal,
Record.push_back(VE.getValueID(C->getOperand(1)));
Record.push_back(CE->getPredicate());
break;
+ case Instruction::InsertValue:
+ report_fatal_error("insertvalue constexprs not supported");
+ break;
}
} else if (const BlockAddress *BA = dyn_cast<BlockAddress>(C)) {
Code = bitc::CST_CODE_BLOCKADDRESS;
@@ -3068,6 +3096,10 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I,
Bitfield::set<APV::ExplicitType>(Record, true);
Bitfield::set<APV::SwiftError>(Record, AI.isSwiftError());
Vals.push_back(Record);
+
+ unsigned AS = AI.getAddressSpace();
+ if (AS != M.getDataLayout().getAllocaAddrSpace())
+ Vals.push_back(AS);
break;
}
@@ -3347,8 +3379,10 @@ void ModuleBitcodeWriter::writeFunction(
bool NeedsMetadataAttachment = F.hasMetadata();
DILocation *LastDL = nullptr;
+ SmallSetVector<Function *, 4> BlockAddressUsers;
+
// Finally, emit all the instructions, in order.
- for (const BasicBlock &BB : F)
+ for (const BasicBlock &BB : F) {
for (const Instruction &I : BB) {
writeInstruction(I, InstID, Vals);
@@ -3380,6 +3414,32 @@ void ModuleBitcodeWriter::writeFunction(
LastDL = DL;
}
+ if (BlockAddress *BA = BlockAddress::lookup(&BB)) {
+ SmallVector<Value *> Worklist{BA};
+ SmallPtrSet<Value *, 8> Visited{BA};
+ while (!Worklist.empty()) {
+ Value *V = Worklist.pop_back_val();
+ for (User *U : V->users()) {
+ if (auto *I = dyn_cast<Instruction>(U)) {
+ Function *P = I->getFunction();
+ if (P != &F)
+ BlockAddressUsers.insert(P);
+ } else if (isa<Constant>(U) && !isa<GlobalValue>(U) &&
+ Visited.insert(U).second)
+ Worklist.push_back(U);
+ }
+ }
+ }
+ }
+
+ if (!BlockAddressUsers.empty()) {
+ Vals.resize(BlockAddressUsers.size());
+ for (auto I : llvm::enumerate(BlockAddressUsers))
+ Vals[I.index()] = VE.getValueID(I.value());
+ Stream.EmitRecord(bitc::FUNC_CODE_BLOCKADDR_USERS, Vals);
+ Vals.clear();
+ }
+
// Emit names for all the instructions etc.
if (auto *Symtab = F.getValueSymbolTable())
writeFunctionLevelValueSymbolTable(*Symtab);
@@ -4375,7 +4435,7 @@ void ModuleBitcodeWriter::writeModuleHash(size_t BlockStartPos) {
uint32_t Vals[5];
Hasher.update(ArrayRef<uint8_t>((const uint8_t *)&(Buffer)[BlockStartPos],
Buffer.size() - BlockStartPos));
- StringRef Hash = Hasher.result();
+ std::array<uint8_t, 20> Hash = Hasher.result();
for (int Pos = 0; Pos < 20; Pos += 4) {
Vals[Pos / 4] = support::endian::read32be(Hash.data() + Pos);
}
@@ -4855,9 +4915,15 @@ static const char *getSectionNameForBitcode(const Triple &T) {
case Triple::GOFF:
llvm_unreachable("GOFF is not yet implemented");
break;
+ case Triple::SPIRV:
+ llvm_unreachable("SPIRV is not yet implemented");
+ break;
case Triple::XCOFF:
llvm_unreachable("XCOFF is not yet implemented");
break;
+ case Triple::DXContainer:
+ llvm_unreachable("DXContainer is not yet implemented");
+ break;
}
llvm_unreachable("Unimplemented ObjectFormatType");
}
@@ -4874,9 +4940,15 @@ static const char *getSectionNameForCommandline(const Triple &T) {
case Triple::GOFF:
llvm_unreachable("GOFF is not yet implemented");
break;
+ case Triple::SPIRV:
+ llvm_unreachable("SPIRV is not yet implemented");
+ break;
case Triple::XCOFF:
llvm_unreachable("XCOFF is not yet implemented");
break;
+ case Triple::DXContainer:
+ llvm_unreachable("DXC is not yet implemented");
+ break;
}
llvm_unreachable("Unimplemented ObjectFormatType");
}
@@ -4931,7 +5003,7 @@ void llvm::embedBitcodeInModule(llvm::Module &M, llvm::MemoryBufferRef Buf,
ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType));
if (llvm::GlobalVariable *Old =
M.getGlobalVariable("llvm.embedded.module", true)) {
- assert(Old->hasOneUse() &&
+ assert(Old->hasZeroLiveUses() &&
"llvm.embedded.module can only be used once in llvm.compiler.used");
GV->takeName(Old);
Old->eraseFromParent();
@@ -4954,7 +5026,7 @@ void llvm::embedBitcodeInModule(llvm::Module &M, llvm::MemoryBufferRef Buf,
UsedArray.push_back(
ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType));
if (llvm::GlobalVariable *Old = M.getGlobalVariable("llvm.cmdline", true)) {
- assert(Old->hasOneUse() &&
+ assert(Old->hasZeroLiveUses() &&
"llvm.cmdline can only be used once in llvm.compiler.used");
GV->takeName(Old);
Old->eraseFromParent();
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp
index d884415aafd5..536d04f2fe26 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp
@@ -13,7 +13,6 @@
#include "llvm/Bitcode/BitcodeWriterPass.h"
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
#include "llvm/Bitcode/BitcodeWriter.h"
-#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
diff --git a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
index 01f7e85bd60e..727ec2e02cc2 100644
--- a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -50,17 +50,12 @@ namespace {
struct OrderMap {
DenseMap<const Value *, std::pair<unsigned, bool>> IDs;
- unsigned LastGlobalConstantID = 0;
unsigned LastGlobalValueID = 0;
OrderMap() = default;
- bool isGlobalConstant(unsigned ID) const {
- return ID <= LastGlobalConstantID;
- }
-
bool isGlobalValue(unsigned ID) const {
- return ID <= LastGlobalValueID && !isGlobalConstant(ID);
+ return ID <= LastGlobalValueID;
}
unsigned size() const { return IDs.size(); }
@@ -84,7 +79,7 @@ static void orderValue(const Value *V, OrderMap &OM) {
return;
if (const Constant *C = dyn_cast<Constant>(V)) {
- if (C->getNumOperands() && !isa<GlobalValue>(C)) {
+ if (C->getNumOperands()) {
for (const Value *Op : C->operands())
if (!isa<BasicBlock>(Op) && !isa<GlobalValue>(Op))
orderValue(Op, OM);
@@ -104,39 +99,40 @@ static OrderMap orderModule(const Module &M) {
// and ValueEnumerator::incorporateFunction().
OrderMap OM;
- // In the reader, initializers of GlobalValues are set *after* all the
- // globals have been read. Rather than awkwardly modeling this behaviour
- // directly in predictValueUseListOrderImpl(), just assign IDs to
- // initializers of GlobalValues before GlobalValues themselves to model this
- // implicitly.
- for (const GlobalVariable &G : M.globals())
- if (G.hasInitializer())
- if (!isa<GlobalValue>(G.getInitializer()))
- orderValue(G.getInitializer(), OM);
- for (const GlobalAlias &A : M.aliases())
- if (!isa<GlobalValue>(A.getAliasee()))
- orderValue(A.getAliasee(), OM);
- for (const GlobalIFunc &I : M.ifuncs())
- if (!isa<GlobalValue>(I.getResolver()))
- orderValue(I.getResolver(), OM);
- for (const Function &F : M) {
- for (const Use &U : F.operands())
- if (!isa<GlobalValue>(U.get()))
- orderValue(U.get(), OM);
- }
+ // Initializers of GlobalValues are processed in
+ // BitcodeReader::ResolveGlobalAndAliasInits(). Match the order there rather
+ // than ValueEnumerator, and match the code in predictValueUseListOrderImpl()
+ // by giving IDs in reverse order.
+ //
+ // Since GlobalValues never reference each other directly (just through
+ // initializers), their relative IDs only matter for determining order of
+ // uses in their initializers.
+ for (const GlobalVariable &G : reverse(M.globals()))
+ orderValue(&G, OM);
+ for (const GlobalAlias &A : reverse(M.aliases()))
+ orderValue(&A, OM);
+ for (const GlobalIFunc &I : reverse(M.ifuncs()))
+ orderValue(&I, OM);
+ for (const Function &F : reverse(M))
+ orderValue(&F, OM);
+ OM.LastGlobalValueID = OM.size();
- // As constants used in metadata operands are emitted as module-level
- // constants, we must order them before other operands. Also, we must order
- // these before global values, as these will be read before setting the
- // global values' initializers. The latter matters for constants which have
- // uses towards other constants that are used as initializers.
auto orderConstantValue = [&OM](const Value *V) {
- if ((isa<Constant>(V) && !isa<GlobalValue>(V)) || isa<InlineAsm>(V))
+ if (isa<Constant>(V) || isa<InlineAsm>(V))
orderValue(V, OM);
};
+
for (const Function &F : M) {
if (F.isDeclaration())
continue;
+ // Here we need to match the union of ValueEnumerator::incorporateFunction()
+ // and WriteFunction(). Basic blocks are implicitly declared before
+ // anything else (by declaring their size).
+ for (const BasicBlock &BB : F)
+ orderValue(&BB, OM);
+
+ // Metadata used by instructions is decoded before the actual instructions,
+ // so visit any constants used by it beforehand.
for (const BasicBlock &BB : F)
for (const Instruction &I : BB)
for (const Value *V : I.operands()) {
@@ -151,49 +147,17 @@ static OrderMap orderModule(const Module &M) {
}
}
}
- }
- OM.LastGlobalConstantID = OM.size();
-
- // Initializers of GlobalValues are processed in
- // BitcodeReader::ResolveGlobalAndAliasInits(). Match the order there rather
- // than ValueEnumerator, and match the code in predictValueUseListOrderImpl()
- // by giving IDs in reverse order.
- //
- // Since GlobalValues never reference each other directly (just through
- // initializers), their relative IDs only matter for determining order of
- // uses in their initializers.
- for (const Function &F : M)
- orderValue(&F, OM);
- for (const GlobalAlias &A : M.aliases())
- orderValue(&A, OM);
- for (const GlobalIFunc &I : M.ifuncs())
- orderValue(&I, OM);
- for (const GlobalVariable &G : M.globals())
- orderValue(&G, OM);
- OM.LastGlobalValueID = OM.size();
- for (const Function &F : M) {
- if (F.isDeclaration())
- continue;
- // Here we need to match the union of ValueEnumerator::incorporateFunction()
- // and WriteFunction(). Basic blocks are implicitly declared before
- // anything else (by declaring their size).
- for (const BasicBlock &BB : F)
- orderValue(&BB, OM);
for (const Argument &A : F.args())
orderValue(&A, OM);
for (const BasicBlock &BB : F)
for (const Instruction &I : BB) {
for (const Value *Op : I.operands())
- if ((isa<Constant>(*Op) && !isa<GlobalValue>(*Op)) ||
- isa<InlineAsm>(*Op))
- orderValue(Op, OM);
+ orderConstantValue(Op);
if (auto *SVI = dyn_cast<ShuffleVectorInst>(&I))
orderValue(SVI->getShuffleMaskForBitcode(), OM);
- }
- for (const BasicBlock &BB : F)
- for (const Instruction &I : BB)
orderValue(&I, OM);
+ }
}
return OM;
}
@@ -223,18 +187,6 @@ static void predictValueUseListOrderImpl(const Value *V, const Function *F,
auto LID = OM.lookup(LU->getUser()).first;
auto RID = OM.lookup(RU->getUser()).first;
- // Global values are processed in reverse order.
- //
- // Moreover, initializers of GlobalValues are set *after* all the globals
- // have been read (despite having earlier IDs). Rather than awkwardly
- // modeling this behaviour here, orderModule() has assigned IDs to
- // initializers of GlobalValues before GlobalValues themselves.
- if (OM.isGlobalValue(LID) && OM.isGlobalValue(RID)) {
- if (LID == RID)
- return LU->getOperandNo() > RU->getOperandNo();
- return LID < RID;
- }
-
// If ID is 4, then expect: 7 6 5 1 2 3.
if (LID < RID) {
if (RID <= ID)
@@ -257,9 +209,7 @@ static void predictValueUseListOrderImpl(const Value *V, const Function *F,
return LU->getOperandNo() > RU->getOperandNo();
});
- if (llvm::is_sorted(List, [](const Entry &L, const Entry &R) {
- return L.second < R.second;
- }))
+ if (llvm::is_sorted(List, llvm::less_second()))
// Order is already correct.
return;
@@ -319,16 +269,25 @@ static UseListOrderStack predictUseListOrder(const Module &M) {
predictValueUseListOrder(&A, &F, OM, Stack);
for (const BasicBlock &BB : F)
for (const Instruction &I : BB) {
- for (const Value *Op : I.operands())
+ for (const Value *Op : I.operands()) {
if (isa<Constant>(*Op) || isa<InlineAsm>(*Op)) // Visit GlobalValues.
predictValueUseListOrder(Op, &F, OM, Stack);
+ if (const auto *MAV = dyn_cast<MetadataAsValue>(Op)) {
+ if (const auto *VAM =
+ dyn_cast<ValueAsMetadata>(MAV->getMetadata())) {
+ predictValueUseListOrder(VAM->getValue(), &F, OM, Stack);
+ } else if (const auto *AL =
+ dyn_cast<DIArgList>(MAV->getMetadata())) {
+ for (const auto *VAM : AL->getArgs())
+ predictValueUseListOrder(VAM->getValue(), &F, OM, Stack);
+ }
+ }
+ }
if (auto *SVI = dyn_cast<ShuffleVectorInst>(&I))
predictValueUseListOrder(SVI->getShuffleMaskForBitcode(), &F, OM,
Stack);
- }
- for (const BasicBlock &BB : F)
- for (const Instruction &I : BB)
predictValueUseListOrder(&I, &F, OM, Stack);
+ }
}
// Visit globals last, since the module-level use-list block will be seen
@@ -939,9 +898,12 @@ void ValueEnumerator::EnumerateValue(const Value *V) {
I != E; ++I)
if (!isa<BasicBlock>(*I)) // Don't enumerate BB operand to BlockAddress.
EnumerateValue(*I);
- if (auto *CE = dyn_cast<ConstantExpr>(C))
+ if (auto *CE = dyn_cast<ConstantExpr>(C)) {
if (CE->getOpcode() == Instruction::ShuffleVector)
EnumerateValue(CE->getShuffleMaskForBitcode());
+ if (auto *GEP = dyn_cast<GEPOperator>(CE))
+ EnumerateType(GEP->getSourceElementType());
+ }
// Finally, add the value. Doing this could make the ValueID reference be
// dangling, don't reuse it.
diff --git a/llvm/lib/Bitstream/Reader/BitstreamReader.cpp b/llvm/lib/Bitstream/Reader/BitstreamReader.cpp
index 28adfe6268f9..c297e16bdfdf 100644
--- a/llvm/lib/Bitstream/Reader/BitstreamReader.cpp
+++ b/llvm/lib/Bitstream/Reader/BitstreamReader.cpp
@@ -16,6 +16,10 @@ using namespace llvm;
//===----------------------------------------------------------------------===//
// BitstreamCursor implementation
//===----------------------------------------------------------------------===//
+//
+static Error error(const char *Message) {
+ return createStringError(std::errc::illegal_byte_sequence, Message);
+}
/// Having read the ENTER_SUBBLOCK abbrevid, enter the block.
Error BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) {
@@ -97,7 +101,7 @@ Expected<unsigned> BitstreamCursor::skipRecord(unsigned AbbrevID) {
unsigned Code = MaybeCode.get();
Expected<uint32_t> MaybeVBR = ReadVBR(6);
if (!MaybeVBR)
- return MaybeVBR.get();
+ return MaybeVBR.takeError();
unsigned NumElts = MaybeVBR.get();
for (unsigned i = 0; i != NumElts; ++i)
if (Expected<uint64_t> Res = ReadVBR64(6))
@@ -107,7 +111,11 @@ Expected<unsigned> BitstreamCursor::skipRecord(unsigned AbbrevID) {
return Code;
}
- const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID);
+ Expected<const BitCodeAbbrev *> MaybeAbbv = getAbbrev(AbbrevID);
+ if (!MaybeAbbv)
+ return MaybeAbbv.takeError();
+
+ const BitCodeAbbrev *Abbv = MaybeAbbv.get();
const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0);
unsigned Code;
if (CodeOp.isLiteral())
@@ -152,7 +160,7 @@ Expected<unsigned> BitstreamCursor::skipRecord(unsigned AbbrevID) {
// Decode the value as we are commanded.
switch (EltEnc.getEncoding()) {
default:
- report_fatal_error("Array element type can't be an Array or a Blob");
+ return error("Array element type can't be an Array or a Blob");
case BitCodeAbbrevOp::Fixed:
assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize);
if (Error Err =
@@ -212,8 +220,12 @@ Expected<unsigned> BitstreamCursor::readRecord(unsigned AbbrevID,
uint32_t Code = MaybeCode.get();
Expected<uint32_t> MaybeNumElts = ReadVBR(6);
if (!MaybeNumElts)
- return MaybeNumElts.takeError();
+ return error(
+ ("Failed to read size: " + toString(MaybeNumElts.takeError()))
+ .c_str());
uint32_t NumElts = MaybeNumElts.get();
+ if (!isSizePlausible(NumElts))
+ return error("Size is not plausible");
Vals.reserve(Vals.size() + NumElts);
for (unsigned i = 0; i != NumElts; ++i)
@@ -224,7 +236,10 @@ Expected<unsigned> BitstreamCursor::readRecord(unsigned AbbrevID,
return Code;
}
- const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID);
+ Expected<const BitCodeAbbrev *> MaybeAbbv = getAbbrev(AbbrevID);
+ if (!MaybeAbbv)
+ return MaybeAbbv.takeError();
+ const BitCodeAbbrev *Abbv = MaybeAbbv.get();
// Read the record code first.
assert(Abbv->getNumOperandInfos() != 0 && "no record code in abbreviation?");
@@ -235,7 +250,7 @@ Expected<unsigned> BitstreamCursor::readRecord(unsigned AbbrevID,
else {
if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
- report_fatal_error("Abbreviation starts with an Array or a Blob");
+ return error("Abbreviation starts with an Array or a Blob");
if (Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp))
Code = MaybeCode.get();
else
@@ -262,22 +277,26 @@ Expected<unsigned> BitstreamCursor::readRecord(unsigned AbbrevID,
// Array case. Read the number of elements as a vbr6.
Expected<uint32_t> MaybeNumElts = ReadVBR(6);
if (!MaybeNumElts)
- return MaybeNumElts.takeError();
+ return error(
+ ("Failed to read size: " + toString(MaybeNumElts.takeError()))
+ .c_str());
uint32_t NumElts = MaybeNumElts.get();
+ if (!isSizePlausible(NumElts))
+ return error("Size is not plausible");
Vals.reserve(Vals.size() + NumElts);
// Get the element encoding.
if (i + 2 != e)
- report_fatal_error("Array op not second to last");
+ return error("Array op not second to last");
const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
if (!EltEnc.isEncoding())
- report_fatal_error(
+ return error(
"Array element type has to be an encoding of a type");
// Read all the elements.
switch (EltEnc.getEncoding()) {
default:
- report_fatal_error("Array element type can't be an Array or a Blob");
+ return error("Array element type can't be an Array or a Blob");
case BitCodeAbbrevOp::Fixed:
for (; NumElts; --NumElts)
if (Expected<SimpleBitstreamCursor::word_t> MaybeVal =
@@ -316,13 +335,9 @@ Expected<unsigned> BitstreamCursor::readRecord(unsigned AbbrevID,
size_t CurBitPos = GetCurrentBitNo();
const size_t NewEnd = CurBitPos + alignTo(NumElts, 4) * 8;
- // If this would read off the end of the bitcode file, just set the
- // record to empty and return.
- if (!canSkipToPos(NewEnd/8)) {
- Vals.append(NumElts, 0);
- skipToEnd();
- break;
- }
+ // Make sure the bitstream is large enough to contain the blob.
+ if (!canSkipToPos(NewEnd/8))
+ return error("Blob ends too soon");
// Otherwise, inform the streamer that we need these bytes in memory. Skip
// over tail padding first, in case jumping to NewEnd invalidates the Blob
@@ -366,6 +381,9 @@ Error BitstreamCursor::ReadAbbrevRecord() {
Expected<word_t> MaybeEncoding = Read(3);
if (!MaybeEncoding)
return MaybeEncoding.takeError();
+ if (!BitCodeAbbrevOp::isValidEncoding(MaybeEncoding.get()))
+ return error("Invalid encoding");
+
BitCodeAbbrevOp::Encoding E =
(BitCodeAbbrevOp::Encoding)MaybeEncoding.get();
if (BitCodeAbbrevOp::hasEncodingData(E)) {
@@ -385,8 +403,7 @@ Error BitstreamCursor::ReadAbbrevRecord() {
if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) &&
Data > MaxChunkSize)
- report_fatal_error(
- "Fixed or VBR abbrev record with size > MaxChunkData");
+ return error("Fixed or VBR abbrev record with size > MaxChunkData");
Abbv->Add(BitCodeAbbrevOp(E, Data));
} else
@@ -394,7 +411,7 @@ Error BitstreamCursor::ReadAbbrevRecord() {
}
if (Abbv->getNumOperandInfos() == 0)
- report_fatal_error("Abbrev record with no operands");
+ return error("Abbrev record with no operands");
CurAbbrevs.push_back(std::move(Abbv));
return Error::success();
diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp
index cdf5586766da..f5dbaccfcad5 100644
--- a/llvm/lib/CodeGen/Analysis.cpp
+++ b/llvm/lib/CodeGen/Analysis.cpp
@@ -21,12 +21,9 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Transforms/Utils/GlobalStatus.h"
using namespace llvm;
diff --git a/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp b/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
index 03e63321e3c4..1940f46232d3 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
@@ -38,8 +38,19 @@ void AIXException::emitExceptionInfoTable(const MCSymbol *LSDA,
// unsigned long personality; /* Pointer to the personality routine */
// }
- Asm->OutStreamer->SwitchSection(
- Asm->getObjFileLowering().getCompactUnwindSection());
+ auto *EHInfo =
+ cast<MCSectionXCOFF>(Asm->getObjFileLowering().getCompactUnwindSection());
+ if (Asm->TM.getFunctionSections()) {
+ // If option -ffunction-sections is on, append the function name to the
+ // name of EH Info Table csect so that each function has its own EH Info
+ // Table csect. This helps the linker to garbage-collect EH info of unused
+ // functions.
+ SmallString<128> NameStr = EHInfo->getName();
+ raw_svector_ostream(NameStr) << '.' << Asm->MF->getFunction().getName();
+ EHInfo = Asm->OutContext.getXCOFFSection(NameStr, EHInfo->getKind(),
+ EHInfo->getCsectProp());
+ }
+ Asm->OutStreamer->switchSection(EHInfo);
MCSymbol *EHInfoLabel =
TargetLoweringObjectFileXCOFF::getEHInfoTableSymbol(Asm->MF);
Asm->OutStreamer->emitLabel(EHInfoLabel);
@@ -74,8 +85,8 @@ void AIXException::endFunction(const MachineFunction *MF) {
const Function &F = MF->getFunction();
assert(F.hasPersonalityFn() &&
"Landingpads are presented, but no personality routine is found.");
- const GlobalValue *Per =
- dyn_cast<GlobalValue>(F.getPersonalityFn()->stripPointerCasts());
+ const auto *Per =
+ cast<GlobalValue>(F.getPersonalityFn()->stripPointerCasts());
const MCSymbol *PerSym = Asm->TM.getSymbol(Per);
emitExceptionInfoTable(LSDALabel, PerSym);
diff --git a/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
index 223840c21d8b..e04a29fbb42b 100644
--- a/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -14,21 +14,14 @@
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Mangler.h"
-#include "llvm/IR/Module.h"
+#include "llvm/IR/Function.h"
#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/FormattedStream.h"
-#include "llvm/Target/TargetOptions.h"
using namespace llvm;
ARMException::ARMException(AsmPrinter *A) : DwarfCFIExceptionBase(A) {}
-ARMException::~ARMException() {}
+ARMException::~ARMException() = default;
ARMTargetStreamer &ARMException::getTargetStreamer() {
MCTargetStreamer &TS = *Asm->OutStreamer->getTargetStreamer();
@@ -101,7 +94,7 @@ void ARMException::emitTypeInfos(unsigned TTypeEncoding,
// Emit the Catch TypeInfos.
if (VerboseAsm && !TypeInfos.empty()) {
Asm->OutStreamer->AddComment(">> Catch TypeInfos <<");
- Asm->OutStreamer->AddBlankLine();
+ Asm->OutStreamer->addBlankLine();
Entry = TypeInfos.size();
}
@@ -116,7 +109,7 @@ void ARMException::emitTypeInfos(unsigned TTypeEncoding,
// Emit the Exception Specifications.
if (VerboseAsm && !FilterIds.empty()) {
Asm->OutStreamer->AddComment(">> Filter TypeInfos <<");
- Asm->OutStreamer->AddBlankLine();
+ Asm->OutStreamer->addBlankLine();
Entry = 0;
}
for (std::vector<unsigned>::const_iterator
diff --git a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
index 65c45f73e965..b10d79f4b5a6 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
@@ -18,7 +18,6 @@
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/DIE.h"
-#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/raw_ostream.h"
@@ -563,7 +562,7 @@ void llvm::emitDWARF5AccelTable(
if (CompUnits.empty())
return;
- Asm->OutStreamer->SwitchSection(
+ Asm->OutStreamer->switchSection(
Asm->getObjFileLowering().getDwarfDebugNamesSection());
Contents.finalize(Asm, "names");
diff --git a/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp b/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
index 21da9d50efba..32d8dc793510 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
@@ -17,7 +17,7 @@
using namespace llvm;
unsigned AddressPool::getIndex(const MCSymbol *Sym, bool TLS) {
- HasBeenUsed = true;
+ resetUsedFlag(true);
auto IterBool =
Pool.insert(std::make_pair(Sym, AddressPoolEntry(Pool.size(), TLS)));
return IterBool.first->second.Number;
@@ -44,7 +44,7 @@ void AddressPool::emit(AsmPrinter &Asm, MCSection *AddrSection) {
return;
// Start the dwarf addr section.
- Asm.OutStreamer->SwitchSection(AddrSection);
+ Asm.OutStreamer->switchSection(AddrSection);
MCSymbol *EndLabel = nullptr;
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 3e8e190eecc3..4a31bf85446b 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -27,6 +27,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/ConstantFolding.h"
@@ -48,7 +49,6 @@
#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/MachineOperand.h"
@@ -82,33 +82,26 @@
#include "llvm/IR/PseudoProbe.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
+#include "llvm/IR/ValueHandle.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDirectives.h"
-#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
-#include "llvm/MC/MCSectionXCOFF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolELF.h"
-#include "llvm/MC/MCSymbolXCOFF.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/MCValue.h"
#include "llvm/MC/SectionKind.h"
-#include "llvm/MC/TargetRegistry.h"
#include "llvm/Pass.h"
-#include "llvm/Remarks/Remark.h"
-#include "llvm/Remarks/RemarkFormat.h"
#include "llvm/Remarks/RemarkStreamer.h"
-#include "llvm/Remarks/RemarkStringTable.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
@@ -125,7 +118,6 @@
#include <cinttypes>
#include <cstdint>
#include <iterator>
-#include <limits>
#include <memory>
#include <string>
#include <utility>
@@ -135,11 +127,6 @@ using namespace llvm;
#define DEBUG_TYPE "asm-printer"
-// FIXME: this option currently only applies to DWARF, and not CodeView, tables
-static cl::opt<bool>
- DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden,
- cl::desc("Disable debug info printing"));
-
const char DWARFGroupName[] = "dwarf";
const char DWARFGroupDescription[] = "DWARF Emission";
const char DbgTimerName[] = "emit";
@@ -167,6 +154,178 @@ static gcp_map_type &getGCMap(void *&P) {
return *(gcp_map_type*)P;
}
+namespace {
+class AddrLabelMapCallbackPtr final : CallbackVH {
+ AddrLabelMap *Map = nullptr;
+
+public:
+ AddrLabelMapCallbackPtr() = default;
+ AddrLabelMapCallbackPtr(Value *V) : CallbackVH(V) {}
+
+ void setPtr(BasicBlock *BB) {
+ ValueHandleBase::operator=(BB);
+ }
+
+ void setMap(AddrLabelMap *map) { Map = map; }
+
+ void deleted() override;
+ void allUsesReplacedWith(Value *V2) override;
+};
+} // namespace
+
+class llvm::AddrLabelMap {
+ MCContext &Context;
+ struct AddrLabelSymEntry {
+ /// The symbols for the label.
+ TinyPtrVector<MCSymbol *> Symbols;
+
+ Function *Fn; // The containing function of the BasicBlock.
+ unsigned Index; // The index in BBCallbacks for the BasicBlock.
+ };
+
+ DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry> AddrLabelSymbols;
+
+ /// Callbacks for the BasicBlock's that we have entries for. We use this so
+ /// we get notified if a block is deleted or RAUWd.
+ std::vector<AddrLabelMapCallbackPtr> BBCallbacks;
+
+ /// This is a per-function list of symbols whose corresponding BasicBlock got
+ /// deleted. These symbols need to be emitted at some point in the file, so
+ /// AsmPrinter emits them after the function body.
+ DenseMap<AssertingVH<Function>, std::vector<MCSymbol *>>
+ DeletedAddrLabelsNeedingEmission;
+
+public:
+ AddrLabelMap(MCContext &context) : Context(context) {}
+
+ ~AddrLabelMap() {
+ assert(DeletedAddrLabelsNeedingEmission.empty() &&
+ "Some labels for deleted blocks never got emitted");
+ }
+
+ ArrayRef<MCSymbol *> getAddrLabelSymbolToEmit(BasicBlock *BB);
+
+ void takeDeletedSymbolsForFunction(Function *F,
+ std::vector<MCSymbol *> &Result);
+
+ void UpdateForDeletedBlock(BasicBlock *BB);
+ void UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New);
+};
+
+ArrayRef<MCSymbol *> AddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) {
+ assert(BB->hasAddressTaken() &&
+ "Shouldn't get label for block without address taken");
+ AddrLabelSymEntry &Entry = AddrLabelSymbols[BB];
+
+ // If we already had an entry for this block, just return it.
+ if (!Entry.Symbols.empty()) {
+ assert(BB->getParent() == Entry.Fn && "Parent changed");
+ return Entry.Symbols;
+ }
+
+ // Otherwise, this is a new entry, create a new symbol for it and add an
+ // entry to BBCallbacks so we can be notified if the BB is deleted or RAUWd.
+ BBCallbacks.emplace_back(BB);
+ BBCallbacks.back().setMap(this);
+ Entry.Index = BBCallbacks.size() - 1;
+ Entry.Fn = BB->getParent();
+ MCSymbol *Sym = BB->hasAddressTaken() ? Context.createNamedTempSymbol()
+ : Context.createTempSymbol();
+ Entry.Symbols.push_back(Sym);
+ return Entry.Symbols;
+}
+
+/// If we have any deleted symbols for F, return them.
+void AddrLabelMap::takeDeletedSymbolsForFunction(
+ Function *F, std::vector<MCSymbol *> &Result) {
+ DenseMap<AssertingVH<Function>, std::vector<MCSymbol *>>::iterator I =
+ DeletedAddrLabelsNeedingEmission.find(F);
+
+ // If there are no entries for the function, just return.
+ if (I == DeletedAddrLabelsNeedingEmission.end())
+ return;
+
+ // Otherwise, take the list.
+ std::swap(Result, I->second);
+ DeletedAddrLabelsNeedingEmission.erase(I);
+}
+
+//===- Address of Block Management ----------------------------------------===//
+
+ArrayRef<MCSymbol *>
+AsmPrinter::getAddrLabelSymbolToEmit(const BasicBlock *BB) {
+ // Lazily create AddrLabelSymbols.
+ if (!AddrLabelSymbols)
+ AddrLabelSymbols = std::make_unique<AddrLabelMap>(OutContext);
+ return AddrLabelSymbols->getAddrLabelSymbolToEmit(
+ const_cast<BasicBlock *>(BB));
+}
+
+void AsmPrinter::takeDeletedSymbolsForFunction(
+ const Function *F, std::vector<MCSymbol *> &Result) {
+ // If no blocks have had their addresses taken, we're done.
+ if (!AddrLabelSymbols)
+ return;
+ return AddrLabelSymbols->takeDeletedSymbolsForFunction(
+ const_cast<Function *>(F), Result);
+}
+
+void AddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) {
+ // If the block got deleted, there is no need for the symbol. If the symbol
+ // was already emitted, we can just forget about it, otherwise we need to
+ // queue it up for later emission when the function is output.
+ AddrLabelSymEntry Entry = std::move(AddrLabelSymbols[BB]);
+ AddrLabelSymbols.erase(BB);
+ assert(!Entry.Symbols.empty() && "Didn't have a symbol, why a callback?");
+ BBCallbacks[Entry.Index] = nullptr; // Clear the callback.
+
+#if !LLVM_MEMORY_SANITIZER_BUILD
+ // BasicBlock is destroyed already, so this access is UB detectable by msan.
+ assert((BB->getParent() == nullptr || BB->getParent() == Entry.Fn) &&
+ "Block/parent mismatch");
+#endif
+
+ for (MCSymbol *Sym : Entry.Symbols) {
+ if (Sym->isDefined())
+ return;
+
+ // If the block is not yet defined, we need to emit it at the end of the
+ // function. Add the symbol to the DeletedAddrLabelsNeedingEmission list
+ // for the containing Function. Since the block is being deleted, its
+ // parent may already be removed, we have to get the function from 'Entry'.
+ DeletedAddrLabelsNeedingEmission[Entry.Fn].push_back(Sym);
+ }
+}
+
+void AddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) {
+ // Get the entry for the RAUW'd block and remove it from our map.
+ AddrLabelSymEntry OldEntry = std::move(AddrLabelSymbols[Old]);
+ AddrLabelSymbols.erase(Old);
+ assert(!OldEntry.Symbols.empty() && "Didn't have a symbol, why a callback?");
+
+ AddrLabelSymEntry &NewEntry = AddrLabelSymbols[New];
+
+ // If New is not address taken, just move our symbol over to it.
+ if (NewEntry.Symbols.empty()) {
+ BBCallbacks[OldEntry.Index].setPtr(New); // Update the callback.
+ NewEntry = std::move(OldEntry); // Set New's entry.
+ return;
+ }
+
+ BBCallbacks[OldEntry.Index] = nullptr; // Update the callback.
+
+ // Otherwise, we need to add the old symbols to the new block's set.
+ llvm::append_range(NewEntry.Symbols, OldEntry.Symbols);
+}
+
+void AddrLabelMapCallbackPtr::deleted() {
+ Map->UpdateForDeletedBlock(cast<BasicBlock>(getValPtr()));
+}
+
+void AddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) {
+ Map->UpdateForRAUWBlock(cast<BasicBlock>(getValPtr()), cast<BasicBlock>(V2));
+}
+
/// getGVAlignment - Return the alignment to use for the specified global
/// value. This rounds up to the preferred alignment if possible and legal.
Align AsmPrinter::getGVAlignment(const GlobalObject *GV, const DataLayout &DL,
@@ -271,6 +430,10 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
bool AsmPrinter::doInitialization(Module &M) {
auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
MMI = MMIWP ? &MMIWP->getMMI() : nullptr;
+ HasSplitStack = false;
+ HasNoSplitStack = false;
+
+ AddrLabelSymbols = nullptr;
// Initialize TargetLoweringObjectFile.
const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
@@ -281,9 +444,6 @@ bool AsmPrinter::doInitialization(Module &M) {
OutStreamer->initSections(false, *TM.getMCSubtargetInfo());
- if (DisableDebugInfoPrinting)
- MMI->setDebugInfoAvailability(false);
-
// Emit the version-min deployment target directive if needed.
//
// FIXME: If we end up with a collection of these sorts of Darwin-specific
@@ -335,11 +495,11 @@ bool AsmPrinter::doInitialization(Module &M) {
// Emit module-level inline asm if it exists.
if (!M.getModuleInlineAsm().empty()) {
OutStreamer->AddComment("Start of file scope inline assembly");
- OutStreamer->AddBlankLine();
+ OutStreamer->addBlankLine();
emitInlineAsm(M.getModuleInlineAsm() + "\n", *TM.getMCSubtargetInfo(),
TM.Options.MCOptions);
OutStreamer->AddComment("End of file scope inline assembly");
- OutStreamer->AddBlankLine();
+ OutStreamer->addBlankLine();
}
if (MAI->doesSupportDebugInformation()) {
@@ -351,7 +511,7 @@ bool AsmPrinter::doInitialization(Module &M) {
CodeViewLineTablesGroupDescription);
}
if (!EmitCodeView || M.getDwarfVersion()) {
- if (!DisableDebugInfoPrinting) {
+ if (MMI->hasDebugInfo()) {
DD = new DwarfDebug(this);
Handlers.emplace_back(std::unique_ptr<DwarfDebug>(DD), DbgTimerName,
DbgTimerDescription, DWARFGroupName,
@@ -536,9 +696,9 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
if (isVerbose()) {
// When printing the control variable __emutls_v.*,
// we don't need to print the original TLS variable name.
- GV->printAsOperand(OutStreamer->GetCommentOS(),
- /*PrintType=*/false, GV->getParent());
- OutStreamer->GetCommentOS() << '\n';
+ GV->printAsOperand(OutStreamer->getCommentOS(),
+ /*PrintType=*/false, GV->getParent());
+ OutStreamer->getCommentOS() << '\n';
}
}
@@ -652,7 +812,7 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
TheSection = getObjFileLowering().getTLSBSSSection();
OutStreamer->emitTBSSSymbol(TheSection, MangSym, Size, Alignment.value());
} else if (GVKind.isThreadData()) {
- OutStreamer->SwitchSection(TheSection);
+ OutStreamer->switchSection(TheSection);
emitAlignment(Alignment, GV);
OutStreamer->emitLabel(MangSym);
@@ -661,12 +821,12 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
GV->getInitializer());
}
- OutStreamer->AddBlankLine();
+ OutStreamer->addBlankLine();
// Emit the variable struct for the runtime.
MCSection *TLVSect = getObjFileLowering().getTLSExtraDataSection();
- OutStreamer->SwitchSection(TLVSect);
+ OutStreamer->switchSection(TLVSect);
// Emit the linkage here.
emitLinkage(GV, GVSym);
OutStreamer->emitLabel(GVSym);
@@ -681,13 +841,13 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
OutStreamer->emitIntValue(0, PtrSize);
OutStreamer->emitSymbolValue(MangSym, PtrSize);
- OutStreamer->AddBlankLine();
+ OutStreamer->addBlankLine();
return;
}
MCSymbol *EmittedInitSym = GVSym;
- OutStreamer->SwitchSection(TheSection);
+ OutStreamer->switchSection(TheSection);
emitLinkage(GV, EmittedInitSym);
emitAlignment(Alignment, GV);
@@ -704,7 +864,7 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
OutStreamer->emitELFSize(EmittedInitSym,
MCConstantExpr::create(Size, OutContext));
- OutStreamer->AddBlankLine();
+ OutStreamer->addBlankLine();
}
/// Emit the directive and value for debug thread local expression
@@ -723,7 +883,7 @@ void AsmPrinter::emitFunctionHeader() {
const Function &F = MF->getFunction();
if (isVerbose())
- OutStreamer->GetCommentOS()
+ OutStreamer->getCommentOS()
<< "-- Begin function "
<< GlobalValue::dropLLVMManglingEscape(F.getName()) << '\n';
@@ -737,7 +897,7 @@ void AsmPrinter::emitFunctionHeader() {
MF->setSection(getObjFileLowering().getUniqueSectionForFunction(F, TM));
else
MF->setSection(getObjFileLowering().SectionForGlobal(&F, TM));
- OutStreamer->SwitchSection(MF->getSection());
+ OutStreamer->switchSection(MF->getSection());
if (!MAI->hasVisibilityOnlyWithLinkage())
emitVisibility(CurrentFnSym, F.getVisibility());
@@ -756,10 +916,10 @@ void AsmPrinter::emitFunctionHeader() {
OutStreamer->emitSymbolAttribute(CurrentFnSym, MCSA_Cold);
if (isVerbose()) {
- F.printAsOperand(OutStreamer->GetCommentOS(),
- /*PrintType=*/false, F.getParent());
+ F.printAsOperand(OutStreamer->getCommentOS(),
+ /*PrintType=*/false, F.getParent());
emitFunctionHeaderComment();
- OutStreamer->GetCommentOS() << '\n';
+ OutStreamer->getCommentOS() << '\n';
}
// Emit the prefix data.
@@ -817,7 +977,7 @@ void AsmPrinter::emitFunctionHeader() {
// references to the dangling symbols. Emit them at the start of the function
// so that we don't get references to undefined symbols.
std::vector<MCSymbol*> DeadBlockSyms;
- MMI->takeDeletedSymbolsForFunction(&F, DeadBlockSyms);
+ takeDeletedSymbolsForFunction(&F, DeadBlockSyms);
for (MCSymbol *DeadBlockSym : DeadBlockSyms) {
OutStreamer->AddComment("Address taken block that was later removed");
OutStreamer->emitLabel(DeadBlockSym);
@@ -844,6 +1004,24 @@ void AsmPrinter::emitFunctionHeader() {
// Emit the prologue data.
if (F.hasPrologueData())
emitGlobalConstant(F.getParent()->getDataLayout(), F.getPrologueData());
+
+ // Emit the function prologue data for the indirect call sanitizer.
+ if (const MDNode *MD = F.getMetadata(LLVMContext::MD_func_sanitize)) {
+ assert(TM.getTargetTriple().getArch() == Triple::x86 ||
+ TM.getTargetTriple().getArch() == Triple::x86_64);
+ assert(MD->getNumOperands() == 2);
+
+ auto *PrologueSig = mdconst::extract<Constant>(MD->getOperand(0));
+ auto *FTRTTIProxy = mdconst::extract<Constant>(MD->getOperand(1));
+ assert(PrologueSig && FTRTTIProxy);
+ emitGlobalConstant(F.getParent()->getDataLayout(), PrologueSig);
+
+ const MCExpr *Proxy = lowerConstant(FTRTTIProxy);
+ const MCExpr *FnExp = MCSymbolRefExpr::create(CurrentFnSym, OutContext);
+ const MCExpr *PCRel = MCBinaryExpr::createSub(Proxy, FnExp, OutContext);
+ // Use 32 bit since only small code model is supported.
+ OutStreamer->emitValue(PCRel, 4u);
+ }
}
/// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the
@@ -912,7 +1090,7 @@ void AsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
<< printReg(RegNo, MF->getSubtarget().getRegisterInfo());
OutStreamer->AddComment(OS.str());
- OutStreamer->AddBlankLine();
+ OutStreamer->addBlankLine();
}
static void emitKill(const MachineInstr *MI, AsmPrinter &AP) {
@@ -925,7 +1103,7 @@ static void emitKill(const MachineInstr *MI, AsmPrinter &AP) {
<< printReg(Op.getReg(), AP.MF->getSubtarget().getRegisterInfo());
}
AP.OutStreamer->AddComment(OS.str());
- AP.OutStreamer->AddBlankLine();
+ AP.OutStreamer->addBlankLine();
}
/// emitDebugValueComment - This method handles the target-independent form
@@ -1147,32 +1325,42 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
const MCSymbol *FunctionSymbol = getFunctionBegin();
- OutStreamer->PushSection();
- OutStreamer->SwitchSection(BBAddrMapSection);
+ OutStreamer->pushSection();
+ OutStreamer->switchSection(BBAddrMapSection);
+ OutStreamer->AddComment("version");
+ OutStreamer->emitInt8(OutStreamer->getContext().getBBAddrMapVersion());
+ OutStreamer->AddComment("feature");
+ OutStreamer->emitInt8(0);
+ OutStreamer->AddComment("function address");
OutStreamer->emitSymbolValue(FunctionSymbol, getPointerSize());
- // Emit the total number of basic blocks in this function.
+ OutStreamer->AddComment("number of basic blocks");
OutStreamer->emitULEB128IntValue(MF.size());
+ const MCSymbol *PrevMBBEndSymbol = FunctionSymbol;
// Emit BB Information for each basic block in the funciton.
for (const MachineBasicBlock &MBB : MF) {
const MCSymbol *MBBSymbol =
MBB.isEntryBlock() ? FunctionSymbol : MBB.getSymbol();
- // Emit the basic block offset.
- emitLabelDifferenceAsULEB128(MBBSymbol, FunctionSymbol);
+ // Emit the basic block offset relative to the end of the previous block.
+ // This is zero unless the block is padded due to alignment.
+ emitLabelDifferenceAsULEB128(MBBSymbol, PrevMBBEndSymbol);
// Emit the basic block size. When BBs have alignments, their size cannot
// always be computed from their offsets.
emitLabelDifferenceAsULEB128(MBB.getEndSymbol(), MBBSymbol);
OutStreamer->emitULEB128IntValue(getBBAddrMapMetadata(MBB));
+ PrevMBBEndSymbol = MBB.getEndSymbol();
}
- OutStreamer->PopSection();
+ OutStreamer->popSection();
}
void AsmPrinter::emitPseudoProbe(const MachineInstr &MI) {
- auto GUID = MI.getOperand(0).getImm();
- auto Index = MI.getOperand(1).getImm();
- auto Type = MI.getOperand(2).getImm();
- auto Attr = MI.getOperand(3).getImm();
- DILocation *DebugLoc = MI.getDebugLoc();
- PP->emitPseudoProbe(GUID, Index, Type, Attr, DebugLoc);
+ if (PP) {
+ auto GUID = MI.getOperand(0).getImm();
+ auto Index = MI.getOperand(1).getImm();
+ auto Type = MI.getOperand(2).getImm();
+ auto Attr = MI.getOperand(3).getImm();
+ DILocation *DebugLoc = MI.getDebugLoc();
+ PP->emitPseudoProbe(GUID, Index, Type, Attr, DebugLoc);
+ }
}
void AsmPrinter::emitStackSizeSection(const MachineFunction &MF) {
@@ -1189,15 +1377,16 @@ void AsmPrinter::emitStackSizeSection(const MachineFunction &MF) {
if (FrameInfo.hasVarSizedObjects())
return;
- OutStreamer->PushSection();
- OutStreamer->SwitchSection(StackSizeSection);
+ OutStreamer->pushSection();
+ OutStreamer->switchSection(StackSizeSection);
const MCSymbol *FunctionSymbol = getFunctionBegin();
- uint64_t StackSize = FrameInfo.getStackSize();
+ uint64_t StackSize =
+ FrameInfo.getStackSize() + FrameInfo.getUnsafeStackSize();
OutStreamer->emitSymbolValue(FunctionSymbol, TM.getProgramPointerSize());
OutStreamer->emitULEB128IntValue(StackSize);
- OutStreamer->PopSection();
+ OutStreamer->popSection();
}
void AsmPrinter::emitStackUsage(const MachineFunction &MF) {
@@ -1208,7 +1397,8 @@ void AsmPrinter::emitStackUsage(const MachineFunction &MF) {
return;
const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
- uint64_t StackSize = FrameInfo.getStackSize();
+ uint64_t StackSize =
+ FrameInfo.getStackSize() + FrameInfo.getUnsafeStackSize();
if (StackUsageStream == nullptr) {
std::error_code EC;
@@ -1298,7 +1488,7 @@ void AsmPrinter::emitFunctionBody() {
}
if (isVerbose())
- emitComments(MI, OutStreamer->GetCommentOS());
+ emitComments(MI, OutStreamer->getCommentOS());
switch (MI.getOpcode()) {
case TargetOpcode::CFI_INSTRUCTION:
@@ -1460,7 +1650,7 @@ void AsmPrinter::emitFunctionBody() {
}
// Switch to the original section in case basic block sections was used.
- OutStreamer->SwitchSection(MF->getSection());
+ OutStreamer->switchSection(MF->getSection());
const Function &F = MF->getFunction();
for (const auto &BB : F) {
@@ -1527,9 +1717,9 @@ void AsmPrinter::emitFunctionBody() {
emitPatchableFunctionEntries();
if (isVerbose())
- OutStreamer->GetCommentOS() << "-- End function\n";
+ OutStreamer->getCommentOS() << "-- End function\n";
- OutStreamer->AddBlankLine();
+ OutStreamer->addBlankLine();
}
/// Compute the number of Global Variables that uses a Constant.
@@ -1617,10 +1807,7 @@ void AsmPrinter::emitGlobalAlias(Module &M, const GlobalAlias &GA) {
// Treat bitcasts of functions as functions also. This is important at least
// on WebAssembly where object and function addresses can't alias each other.
if (!IsFunction)
- if (auto *CE = dyn_cast<ConstantExpr>(GA.getAliasee()))
- if (CE->getOpcode() == Instruction::BitCast)
- IsFunction =
- CE->getOperand(0)->getType()->getPointerElementType()->isFunctionTy();
+ IsFunction = isa<Function>(GA.getAliasee()->stripPointerCasts());
// AIX's assembly directive `.set` is not usable for aliasing purpose,
// so AIX has to use the extra-label-at-definition strategy. At this
@@ -1650,13 +1837,13 @@ void AsmPrinter::emitGlobalAlias(Module &M, const GlobalAlias &GA) {
if (IsFunction) {
OutStreamer->emitSymbolAttribute(Name, MCSA_ELF_TypeFunction);
if (TM.getTargetTriple().isOSBinFormatCOFF()) {
- OutStreamer->BeginCOFFSymbolDef(Name);
- OutStreamer->EmitCOFFSymbolStorageClass(
+ OutStreamer->beginCOFFSymbolDef(Name);
+ OutStreamer->emitCOFFSymbolStorageClass(
GA.hasLocalLinkage() ? COFF::IMAGE_SYM_CLASS_STATIC
: COFF::IMAGE_SYM_CLASS_EXTERNAL);
- OutStreamer->EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
+ OutStreamer->emitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
<< COFF::SCT_COMPLEX_TYPE_SHIFT);
- OutStreamer->EndCOFFSymbolDef();
+ OutStreamer->endCOFFSymbolDef();
}
}
@@ -1734,7 +1921,7 @@ void AsmPrinter::emitRemarksSection(remarks::RemarkStreamer &RS) {
// Switch to the remarks section.
MCSection *RemarksSection =
OutContext.getObjectFileInfo()->getRemarksSection();
- OutStreamer->SwitchSection(RemarksSection);
+ OutStreamer->switchSection(RemarksSection);
OutStreamer->emitBinaryData(OS.str());
}
@@ -1805,7 +1992,7 @@ bool AsmPrinter::doFinalization(Module &M) {
// Output stubs for external and common global variables.
MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
if (!Stubs.empty()) {
- OutStreamer->SwitchSection(TLOF.getDataSection());
+ OutStreamer->switchSection(TLOF.getDataSection());
const DataLayout &DL = M.getDataLayout();
emitAlignment(Align(DL.getPointerSize()));
@@ -1829,7 +2016,7 @@ bool AsmPrinter::doFinalization(Module &M) {
for (const auto &Stub : Stubs) {
SmallString<256> SectionName = StringRef(".rdata$");
SectionName += Stub.first->getName();
- OutStreamer->SwitchSection(OutContext.getCOFFSection(
+ OutStreamer->switchSection(OutContext.getCOFFSection(
SectionName,
COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ |
COFF::IMAGE_SCN_LNK_COMDAT,
@@ -1920,31 +2107,14 @@ bool AsmPrinter::doFinalization(Module &M) {
// Emit bytes for llvm.commandline metadata.
emitModuleCommandLines(M);
- // Emit __morestack address if needed for indirect calls.
- if (MMI->usesMorestackAddr()) {
- Align Alignment(1);
- MCSection *ReadOnlySection = getObjFileLowering().getSectionForConstant(
- getDataLayout(), SectionKind::getReadOnly(),
- /*C=*/nullptr, Alignment);
- OutStreamer->SwitchSection(ReadOnlySection);
-
- MCSymbol *AddrSymbol =
- OutContext.getOrCreateSymbol(StringRef("__morestack_addr"));
- OutStreamer->emitLabel(AddrSymbol);
-
- unsigned PtrSize = MAI->getCodePointerSize();
- OutStreamer->emitSymbolValue(GetExternalSymbolSymbol("__morestack"),
- PtrSize);
- }
-
// Emit .note.GNU-split-stack and .note.GNU-no-split-stack sections if
// split-stack is used.
- if (TM.getTargetTriple().isOSBinFormatELF() && MMI->hasSplitStack()) {
- OutStreamer->SwitchSection(
- OutContext.getELFSection(".note.GNU-split-stack", ELF::SHT_PROGBITS, 0));
- if (MMI->hasNosplitStack())
- OutStreamer->SwitchSection(
- OutContext.getELFSection(".note.GNU-no-split-stack", ELF::SHT_PROGBITS, 0));
+ if (TM.getTargetTriple().isOSBinFormatELF() && HasSplitStack) {
+ OutStreamer->switchSection(OutContext.getELFSection(".note.GNU-split-stack",
+ ELF::SHT_PROGBITS, 0));
+ if (HasNoSplitStack)
+ OutStreamer->switchSection(OutContext.getELFSection(
+ ".note.GNU-no-split-stack", ELF::SHT_PROGBITS, 0));
}
// If we don't have any trampolines, then we don't require stack memory
@@ -1952,7 +2122,7 @@ bool AsmPrinter::doFinalization(Module &M) {
Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline");
if (!InitTrampolineIntrinsic || InitTrampolineIntrinsic->use_empty())
if (MCSection *S = MAI->getNonexecutableStackSection(OutContext))
- OutStreamer->SwitchSection(S);
+ OutStreamer->switchSection(S);
if (TM.Options.EmitAddrsig) {
// Emit address-significance attributes for all globals.
@@ -1973,7 +2143,7 @@ bool AsmPrinter::doFinalization(Module &M) {
GV.getVisibility() != GlobalValue::DefaultVisibility)
continue;
- OutStreamer->SwitchSection(
+ OutStreamer->switchSection(
OutContext.getELFSection(".llvm_sympart", ELF::SHT_LLVM_SYMPART, 0, 0,
"", false, ++UniqueID, nullptr));
OutStreamer->emitBytes(GV.getPartition());
@@ -1989,8 +2159,9 @@ bool AsmPrinter::doFinalization(Module &M) {
emitEndOfAsmFile(M);
MMI = nullptr;
+ AddrLabelSymbols = nullptr;
- OutStreamer->Finish();
+ OutStreamer->finish();
OutStreamer->reset();
OwnedMLI.reset();
OwnedMDT.reset();
@@ -2009,6 +2180,16 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
this->MF = &MF;
const Function &F = MF.getFunction();
+ // Record that there are split-stack functions, so we will emit a special
+ // section to tell the linker.
+ if (MF.shouldSplitStack()) {
+ HasSplitStack = true;
+
+ if (!MF.getFrameInfo().needsSplitStackProlog())
+ HasNoSplitStack = true;
+ } else
+ HasNoSplitStack = true;
+
// Get the function symbol.
if (!MAI->needsFunctionDescriptors()) {
CurrentFnSym = getSymbol(&MF.getFunction());
@@ -2113,7 +2294,7 @@ void AsmPrinter::emitConstantPool() {
continue;
if (CurSection != CPSections[i].S) {
- OutStreamer->SwitchSection(CPSections[i].S);
+ OutStreamer->switchSection(CPSections[i].S);
emitAlignment(Align(CPSections[i].Alignment));
CurSection = CPSections[i].S;
Offset = 0;
@@ -2156,7 +2337,7 @@ void AsmPrinter::emitJumpTableInfo() {
if (JTInDiffSection) {
// Drop it in the readonly section.
MCSection *ReadOnlySection = TLOF.getSectionForJumpTable(F, TM);
- OutStreamer->SwitchSection(ReadOnlySection);
+ OutStreamer->switchSection(ReadOnlySection);
}
emitAlignment(Align(MJTI->getEntryAlignment(DL)));
@@ -2392,7 +2573,7 @@ void AsmPrinter::emitXXStructorList(const DataLayout &DL, const Constant *List,
MCSection *OutputSection =
(IsCtor ? Obj.getStaticCtorSection(S.Priority, KeySym)
: Obj.getStaticDtorSection(S.Priority, KeySym));
- OutStreamer->SwitchSection(OutputSection);
+ OutStreamer->switchSection(OutputSection);
if (OutStreamer->getCurrentSection() != OutStreamer->getPreviousSection())
emitAlignment(Align);
emitXXStructor(DL, S.Func);
@@ -2423,8 +2604,8 @@ void AsmPrinter::emitModuleCommandLines(Module &M) {
if (!NMD || !NMD->getNumOperands())
return;
- OutStreamer->PushSection();
- OutStreamer->SwitchSection(CommandLine);
+ OutStreamer->pushSection();
+ OutStreamer->switchSection(CommandLine);
OutStreamer->emitZeros(1);
for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
const MDNode *N = NMD->getOperand(i);
@@ -2434,7 +2615,7 @@ void AsmPrinter::emitModuleCommandLines(Module &M) {
OutStreamer->emitBytes(S->getString());
OutStreamer->emitZeros(1);
}
- OutStreamer->PopSection();
+ OutStreamer->popSection();
}
//===--------------------------------------------------------------------===//
@@ -2471,7 +2652,7 @@ void AsmPrinter::emitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
unsigned Size,
bool IsSectionRelative) const {
if (MAI->needsDwarfSectionOffsetDirective() && IsSectionRelative) {
- OutStreamer->EmitCOFFSecRel32(Label, Offset);
+ OutStreamer->emitCOFFSecRel32(Label, Offset);
if (Size > 4)
OutStreamer->emitZeros(Size - 4);
return;
@@ -2541,6 +2722,9 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
llvm_unreachable("Unknown constant value to lower!");
}
+ // The constant expression opcodes are limited to those that are necessary
+ // to represent relocations on supported targets. Expressions involving only
+ // constant addresses are constant folded instead.
switch (CE->getOpcode()) {
case Instruction::AddrSpaceCast: {
const Constant *Op = CE->getOperand(0);
@@ -2658,34 +2842,17 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
return RelocExpr;
}
}
+
+ const MCExpr *LHS = lowerConstant(CE->getOperand(0));
+ const MCExpr *RHS = lowerConstant(CE->getOperand(1));
+ return MCBinaryExpr::createSub(LHS, RHS, Ctx);
+ break;
}
- // else fallthrough
- LLVM_FALLTHROUGH;
-
- // The MC library also has a right-shift operator, but it isn't consistently
- // signed or unsigned between different targets.
- case Instruction::Add:
- case Instruction::Mul:
- case Instruction::SDiv:
- case Instruction::SRem:
- case Instruction::Shl:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor: {
+
+ case Instruction::Add: {
const MCExpr *LHS = lowerConstant(CE->getOperand(0));
const MCExpr *RHS = lowerConstant(CE->getOperand(1));
- switch (CE->getOpcode()) {
- default: llvm_unreachable("Unknown binary operator constant cast expr");
- case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
- case Instruction::Sub: return MCBinaryExpr::createSub(LHS, RHS, Ctx);
- case Instruction::Mul: return MCBinaryExpr::createMul(LHS, RHS, Ctx);
- case Instruction::SDiv: return MCBinaryExpr::createDiv(LHS, RHS, Ctx);
- case Instruction::SRem: return MCBinaryExpr::createMod(LHS, RHS, Ctx);
- case Instruction::Shl: return MCBinaryExpr::createShl(LHS, RHS, Ctx);
- case Instruction::And: return MCBinaryExpr::createAnd(LHS, RHS, Ctx);
- case Instruction::Or: return MCBinaryExpr::createOr (LHS, RHS, Ctx);
- case Instruction::Xor: return MCBinaryExpr::createXor(LHS, RHS, Ctx);
- }
+ return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
}
}
}
@@ -2719,7 +2886,7 @@ static int isRepeatedByteSequence(const Value *V, const DataLayout &DL) {
assert(Size % 8 == 0);
// Extend the element to take zero padding into account.
- APInt Value = CI->getValue().zextOrSelf(Size);
+ APInt Value = CI->getValue().zext(Size);
if (!Value.isSplat(8))
return -1;
@@ -2768,8 +2935,8 @@ static void emitGlobalConstantDataSequential(const DataLayout &DL,
if (isa<IntegerType>(CDS->getElementType())) {
for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
if (AP.isVerbose())
- AP.OutStreamer->GetCommentOS() << format("0x%" PRIx64 "\n",
- CDS->getElementAsInteger(i));
+ AP.OutStreamer->getCommentOS()
+ << format("0x%" PRIx64 "\n", CDS->getElementAsInteger(i));
AP.OutStreamer->emitIntValue(CDS->getElementAsInteger(i),
ElementByteSize);
}
@@ -2855,8 +3022,8 @@ static void emitGlobalConstantFP(APFloat APF, Type *ET, AsmPrinter &AP) {
if (AP.isVerbose()) {
SmallString<8> StrVal;
APF.toString(StrVal);
- ET->print(AP.OutStreamer->GetCommentOS());
- AP.OutStreamer->GetCommentOS() << ' ' << StrVal << '\n';
+ ET->print(AP.OutStreamer->getCommentOS());
+ AP.OutStreamer->getCommentOS() << ' ' << StrVal << '\n';
}
// Now iterate through the APInt chunks, emitting them in endian-correct
@@ -3061,8 +3228,8 @@ static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV,
if (StoreSize <= 8) {
if (AP.isVerbose())
- AP.OutStreamer->GetCommentOS() << format("0x%" PRIx64 "\n",
- CI->getZExtValue());
+ AP.OutStreamer->getCommentOS()
+ << format("0x%" PRIx64 "\n", CI->getZExtValue());
AP.OutStreamer->emitIntValue(CI->getZExtValue(), StoreSize);
} else {
emitGlobalConstantLargeInt(CI, AP);
@@ -3163,11 +3330,12 @@ MCSymbol *AsmPrinter::createTempSymbol(const Twine &Name) const {
}
MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BlockAddress *BA) const {
- return MMI->getAddrLabelSymbol(BA->getBasicBlock());
+ return const_cast<AsmPrinter *>(this)->getAddrLabelSymbol(
+ BA->getBasicBlock());
}
MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const {
- return MMI->getAddrLabelSymbol(BB);
+ return const_cast<AsmPrinter *>(this)->getAddrLabelSymbol(BB);
}
/// GetCPISymbol - Return the symbol for the specified constant pool entry.
@@ -3272,7 +3440,7 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB,
// Otherwise, it is a loop header. Print out information about child and
// parent loops.
- raw_ostream &OS = AP.OutStreamer->GetCommentOS();
+ raw_ostream &OS = AP.OutStreamer->getCommentOS();
PrintParentLoopComment(OS, Loop->getParentLoop(), AP.getFunctionNumber());
@@ -3308,7 +3476,7 @@ void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
// entry block is always placed in the function section and is handled
// separately.
if (MBB.isBeginSection() && !MBB.isEntryBlock()) {
- OutStreamer->SwitchSection(
+ OutStreamer->switchSection(
getObjFileLowering().getSectionForMachineBasicBlock(MF->getFunction(),
MBB, TM));
CurrentSectionBeginSym = MBB.getSymbol();
@@ -3326,7 +3494,7 @@ void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
// MBBs can have their address taken as part of CodeGen without having
// their corresponding BB's address taken in IR
if (BB && BB->hasAddressTaken())
- for (MCSymbol *Sym : MMI->getAddrLabelSymbolToEmit(BB))
+ for (MCSymbol *Sym : getAddrLabelSymbolToEmit(BB))
OutStreamer->emitLabel(Sym);
}
@@ -3334,9 +3502,9 @@ void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
if (isVerbose()) {
if (BB) {
if (BB->hasName()) {
- BB->printAsOperand(OutStreamer->GetCommentOS(),
+ BB->printAsOperand(OutStreamer->getCommentOS(),
/*PrintType=*/false, BB->getModule());
- OutStreamer->GetCommentOS() << '\n';
+ OutStreamer->getCommentOS() << '\n';
}
}
@@ -3563,7 +3731,7 @@ void AsmPrinter::emitXRayTable() {
// range of sleds associated with a function.
auto &Ctx = OutContext;
MCSymbol *SledsStart = OutContext.createTempSymbol("xray_sleds_start", true);
- OutStreamer->SwitchSection(InstMap);
+ OutStreamer->switchSection(InstMap);
OutStreamer->emitLabel(SledsStart);
for (const auto &Sled : Sleds) {
MCSymbol *Dot = Ctx.createTempSymbol();
@@ -3590,11 +3758,11 @@ void AsmPrinter::emitXRayTable() {
// Each entry here will be 2 * word size aligned, as we're writing down two
// pointers. This should work for both 32-bit and 64-bit platforms.
if (FnSledIndex) {
- OutStreamer->SwitchSection(FnSledIndex);
+ OutStreamer->switchSection(FnSledIndex);
OutStreamer->emitCodeAlignment(2 * WordSizeBytes, &getSubtargetInfo());
OutStreamer->emitSymbolValue(SledsStart, WordSizeBytes, false);
OutStreamer->emitSymbolValue(SledsEnd, WordSizeBytes, false);
- OutStreamer->SwitchSection(PrevSection);
+ OutStreamer->switchSection(PrevSection);
}
Sleds.clear();
}
@@ -3639,7 +3807,7 @@ void AsmPrinter::emitPatchableFunctionEntries() {
}
LinkedToSym = cast<MCSymbolELF>(CurrentFnSym);
}
- OutStreamer->SwitchSection(OutContext.getELFSection(
+ OutStreamer->switchSection(OutContext.getELFSection(
"__patchable_function_entries", ELF::SHT_PROGBITS, Flags, 0, GroupName,
F.hasComdat(), MCSection::NonUniqueID, LinkedToSym));
emitAlignment(Align(PointerSize));
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index fc127f4cf9da..719fec06aa33 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -10,7 +10,6 @@
//
//===----------------------------------------------------------------------===//
-#include "ByteStreamer.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/AsmPrinter.h"
@@ -19,14 +18,11 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCDwarf.h"
-#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MachineLocation.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetMachine.h"
#include <cstdint>
using namespace llvm;
@@ -162,7 +158,7 @@ void AsmPrinter::emitDwarfSymbolReference(const MCSymbol *Label,
if (MAI->needsDwarfSectionOffsetDirective()) {
assert(!isDwarf64() &&
"emitting DWARF64 is not implemented for COFF targets");
- OutStreamer->EmitCOFFSecRel32(Label, /*Offset=*/0);
+ OutStreamer->emitCOFFSecRel32(Label, /*Offset=*/0);
return;
}
@@ -277,6 +273,12 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const {
case MCCFIInstruction::OpUndefined:
OutStreamer->emitCFIUndefined(Inst.getRegister());
break;
+ case MCCFIInstruction::OpRememberState:
+ OutStreamer->emitCFIRememberState();
+ break;
+ case MCCFIInstruction::OpRestoreState:
+ OutStreamer->emitCFIRestoreState();
+ break;
}
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 5d0cadefdbf7..88c82cbc958b 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -17,8 +17,8 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DiagnosticInfo.h"
@@ -26,9 +26,10 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
@@ -115,7 +116,7 @@ void AsmPrinter::emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
report_fatal_error("Inline asm not supported by this streamer because"
" we don't have an asm parser for this target\n");
Parser->setAssemblerDialect(Dialect);
- Parser->setTargetParser(*TAP.get());
+ Parser->setTargetParser(*TAP);
// Enable lexing Masm binary and hex integer literals in intel inline
// assembly.
if (Dialect == InlineAsm::AD_Intel)
@@ -398,9 +399,9 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const {
if (!RestrRegs.empty()) {
std::string Msg = "inline asm clobber list contains reserved registers: ";
ListSeparator LS;
- for (const Register &RR : RestrRegs) {
+ for (const Register RR : RestrRegs) {
Msg += LS;
- Msg += TRI->getName(RR);
+ Msg += TRI->getRegAsmName(RR);
}
const char *Note =
"Reserved registers on the clobber list may not be "
diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 52c74713551c..701c0affdfa6 100644
--- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -11,7 +11,6 @@
//===----------------------------------------------------------------------===//
#include "CodeViewDebug.h"
-#include "DwarfExpression.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
@@ -29,7 +28,6 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -41,7 +39,6 @@
#include "llvm/DebugInfo/CodeView/EnumTables.h"
#include "llvm/DebugInfo/CodeView/Line.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
-#include "llvm/DebugInfo/CodeView/TypeDumpVisitor.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
#include "llvm/DebugInfo/CodeView/TypeTableCollection.h"
#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h"
@@ -58,11 +55,8 @@
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/BinaryByteStream.h"
-#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/BinaryStreamWriter.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
@@ -230,7 +224,7 @@ unsigned CodeViewDebug::maybeRecordFile(const DIFile *F) {
break;
}
}
- bool Success = OS.EmitCVFileDirective(NextId, FullPath, ChecksumAsBytes,
+ bool Success = OS.emitCVFileDirective(NextId, FullPath, ChecksumAsBytes,
static_cast<unsigned>(CSKind));
(void)Success;
assert(Success && ".cv_file directive failed");
@@ -251,7 +245,7 @@ CodeViewDebug::getInlineSite(const DILocation *InlinedAt,
.SiteFuncId;
Site->SiteFuncId = NextFuncId++;
- OS.EmitCVInlineSiteIdDirective(
+ OS.emitCVInlineSiteIdDirective(
Site->SiteFuncId, ParentFuncId, maybeRecordFile(InlinedAt->getFile()),
InlinedAt->getLine(), InlinedAt->getColumn(), SMLoc());
Site->Inlinee = Inlinee;
@@ -515,7 +509,7 @@ void CodeViewDebug::maybeRecordLocation(const DebugLoc &DL,
if (!DL || DL == PrevInstLoc)
return;
- const DIScope *Scope = DL.get()->getScope();
+ const DIScope *Scope = DL->getScope();
if (!Scope)
return;
@@ -614,18 +608,16 @@ static SourceLanguage MapDWLangToCVLang(unsigned DWLang) {
void CodeViewDebug::beginModule(Module *M) {
// If module doesn't have named metadata anchors or COFF debug section
// is not available, skip any debug info related stuff.
- NamedMDNode *CUs = M->getNamedMetadata("llvm.dbg.cu");
- if (!CUs || !Asm->getObjFileLowering().getCOFFDebugSymbolsSection()) {
+ if (!MMI->hasDebugInfo() ||
+ !Asm->getObjFileLowering().getCOFFDebugSymbolsSection()) {
Asm = nullptr;
return;
}
- // Tell MMI that we have and need debug info.
- MMI->setDebugInfoAvailability(true);
TheCPU = mapArchToCVCPUType(Triple(M->getTargetTriple()).getArch());
// Get the current source language.
- const MDNode *Node = *CUs->operands().begin();
+ const MDNode *Node = *M->debug_compile_units_begin();
const auto *CU = cast<DICompileUnit>(Node);
CurrentSourceLanguage = MapDWLangToCVLang(CU->getSourceLanguage());
@@ -727,7 +719,7 @@ void CodeViewDebug::emitTypeInformation() {
return;
// Start the .debug$T or .debug$P section with 0x4.
- OS.SwitchSection(Asm->getObjFileLowering().getCOFFDebugTypesSection());
+ OS.switchSection(Asm->getObjFileLowering().getCOFFDebugTypesSection());
emitCodeViewMagicVersion();
TypeTableCollection Table(TypeTable.records());
@@ -760,7 +752,7 @@ void CodeViewDebug::emitTypeGlobalHashes() {
// Start the .debug$H section with the version and hash algorithm, currently
// hardcoded to version 0, SHA1.
- OS.SwitchSection(Asm->getObjFileLowering().getCOFFGlobalTypeHashesSection());
+ OS.switchSection(Asm->getObjFileLowering().getCOFFGlobalTypeHashesSection());
OS.emitValueToAlignment(4);
OS.AddComment("Magic");
@@ -826,6 +818,8 @@ static Version parseVersion(StringRef Name) {
if (isdigit(C)) {
V.Part[N] *= 10;
V.Part[N] += C - '0';
+ V.Part[N] =
+ std::min<int>(V.Part[N], std::numeric_limits<uint16_t>::max());
} else if (C == '.') {
++N;
if (N >= 4)
@@ -867,7 +861,6 @@ void CodeViewDebug::emitCompilerInformation() {
Version FrontVer = parseVersion(CompilerVersion);
OS.AddComment("Frontend version");
for (int N : FrontVer.Part) {
- N = std::min<int>(N, std::numeric_limits<uint16_t>::max());
OS.emitInt16(N);
}
@@ -985,11 +978,11 @@ void CodeViewDebug::emitInlineeLinesSubsection() {
assert(TypeIndices.count({SP, nullptr}));
TypeIndex InlineeIdx = TypeIndices[{SP, nullptr}];
- OS.AddBlankLine();
+ OS.addBlankLine();
unsigned FileId = maybeRecordFile(SP->getFile());
OS.AddComment("Inlined function " + SP->getName() + " starts at " +
SP->getFilename() + Twine(':') + Twine(SP->getLine()));
- OS.AddBlankLine();
+ OS.addBlankLine();
OS.AddComment("Type index of inlined function");
OS.emitInt32(InlineeIdx.getIndex());
OS.AddComment("Offset into filechecksum table");
@@ -1051,7 +1044,7 @@ void CodeViewDebug::switchToDebugSectionForSymbol(const MCSymbol *GVSym) {
Asm->getObjFileLowering().getCOFFDebugSymbolsSection());
DebugSec = OS.getContext().getAssociativeCOFFSection(DebugSec, KeySym);
- OS.SwitchSection(DebugSec);
+ OS.switchSection(DebugSec);
// Emit the magic version number if this is the first time we've switched to
// this section.
@@ -1080,9 +1073,9 @@ void CodeViewDebug::emitDebugInfoForThunk(const Function *GV,
OS.AddComment("PtrNext");
OS.emitInt32(0);
OS.AddComment("Thunk section relative address");
- OS.EmitCOFFSecRel32(Fn, /*Offset=*/0);
+ OS.emitCOFFSecRel32(Fn, /*Offset=*/0);
OS.AddComment("Thunk section index");
- OS.EmitCOFFSectionIndex(Fn);
+ OS.emitCOFFSectionIndex(Fn);
OS.AddComment("Code size");
OS.emitAbsoluteSymbolDiff(FI.End, Fn, 2);
OS.AddComment("Ordinal");
@@ -1132,7 +1125,7 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
// Emit FPO data, but only on 32-bit x86. No other platforms use it.
if (Triple(MMI->getModule()->getTargetTriple()).getArch() == Triple::x86)
- OS.EmitCVFPOData(Fn);
+ OS.emitCVFPOData(Fn);
// Emit a symbol subsection, required by VS2012+ to find function boundaries.
OS.AddComment("Symbol subsection for " + Twine(FuncName));
@@ -1160,9 +1153,9 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
OS.AddComment("Function type index");
OS.emitInt32(getFuncIdForSubprogram(GV->getSubprogram()).getIndex());
OS.AddComment("Function section relative address");
- OS.EmitCOFFSecRel32(Fn, /*Offset=*/0);
+ OS.emitCOFFSecRel32(Fn, /*Offset=*/0);
OS.AddComment("Function section index");
- OS.EmitCOFFSectionIndex(Fn);
+ OS.emitCOFFSectionIndex(Fn);
OS.AddComment("Flags");
OS.emitInt8(0);
// Emit the function display name as a null-terminated string.
@@ -1207,9 +1200,9 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
MCSymbol *Label = Annot.first;
MDTuple *Strs = cast<MDTuple>(Annot.second);
MCSymbol *AnnotEnd = beginSymbolRecord(SymbolKind::S_ANNOTATION);
- OS.EmitCOFFSecRel32(Label, /*Offset=*/0);
+ OS.emitCOFFSecRel32(Label, /*Offset=*/0);
// FIXME: Make sure we don't overflow the max record size.
- OS.EmitCOFFSectionIndex(Label);
+ OS.emitCOFFSectionIndex(Label);
OS.emitInt16(Strs->getNumOperands());
for (Metadata *MD : Strs->operands()) {
// MDStrings are null terminated, so we can do EmitBytes and get the
@@ -1227,9 +1220,9 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
const DIType *DITy = std::get<2>(HeapAllocSite);
MCSymbol *HeapAllocEnd = beginSymbolRecord(SymbolKind::S_HEAPALLOCSITE);
OS.AddComment("Call site offset");
- OS.EmitCOFFSecRel32(BeginLabel, /*Offset=*/0);
+ OS.emitCOFFSecRel32(BeginLabel, /*Offset=*/0);
OS.AddComment("Call site section index");
- OS.EmitCOFFSectionIndex(BeginLabel);
+ OS.emitCOFFSectionIndex(BeginLabel);
OS.AddComment("Call instruction length");
OS.emitAbsoluteSymbolDiff(EndLabel, BeginLabel, 2);
OS.AddComment("Type index");
@@ -1249,9 +1242,9 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
OS.emitCVLinetableDirective(FI.FuncId, Fn, FI.End);
}
-CodeViewDebug::LocalVarDefRange
+CodeViewDebug::LocalVarDef
CodeViewDebug::createDefRangeMem(uint16_t CVRegister, int Offset) {
- LocalVarDefRange DR;
+ LocalVarDef DR;
DR.InMemory = -1;
DR.DataOffset = Offset;
assert(DR.DataOffset == Offset && "truncation");
@@ -1303,19 +1296,19 @@ void CodeViewDebug::collectVariableInfoFromMFTable(
"Frame offsets with a scalable component are not supported");
// Calculate the label ranges.
- LocalVarDefRange DefRange =
+ LocalVarDef DefRange =
createDefRangeMem(CVReg, FrameOffset.getFixed() + ExprOffset);
+ LocalVariable Var;
+ Var.DIVar = VI.Var;
+
for (const InsnRange &Range : Scope->getRanges()) {
const MCSymbol *Begin = getLabelBeforeInsn(Range.first);
const MCSymbol *End = getLabelAfterInsn(Range.second);
End = End ? End : Asm->getFunctionEnd();
- DefRange.Ranges.emplace_back(Begin, End);
+ Var.DefRanges[DefRange].emplace_back(Begin, End);
}
- LocalVariable Var;
- Var.DIVar = VI.Var;
- Var.DefRanges.emplace_back(std::move(DefRange));
if (Deref)
Var.UseReferenceType = true;
@@ -1374,24 +1367,18 @@ void CodeViewDebug::calculateRanges(
// We can only handle a register or an offseted load of a register.
if (Location->Register == 0 || Location->LoadChain.size() > 1)
continue;
- {
- LocalVarDefRange DR;
- DR.CVRegister = TRI->getCodeViewRegNum(Location->Register);
- DR.InMemory = !Location->LoadChain.empty();
- DR.DataOffset =
- !Location->LoadChain.empty() ? Location->LoadChain.back() : 0;
- if (Location->FragmentInfo) {
- DR.IsSubfield = true;
- DR.StructOffset = Location->FragmentInfo->OffsetInBits / 8;
- } else {
- DR.IsSubfield = false;
- DR.StructOffset = 0;
- }
- if (Var.DefRanges.empty() ||
- Var.DefRanges.back().isDifferentLocation(DR)) {
- Var.DefRanges.emplace_back(std::move(DR));
- }
+ LocalVarDef DR;
+ DR.CVRegister = TRI->getCodeViewRegNum(Location->Register);
+ DR.InMemory = !Location->LoadChain.empty();
+ DR.DataOffset =
+ !Location->LoadChain.empty() ? Location->LoadChain.back() : 0;
+ if (Location->FragmentInfo) {
+ DR.IsSubfield = true;
+ DR.StructOffset = Location->FragmentInfo->OffsetInBits / 8;
+ } else {
+ DR.IsSubfield = false;
+ DR.StructOffset = 0;
}
// Compute the label range.
@@ -1408,7 +1395,7 @@ void CodeViewDebug::calculateRanges(
// If the last range end is our begin, just extend the last range.
// Otherwise make a new range.
SmallVectorImpl<std::pair<const MCSymbol *, const MCSymbol *>> &R =
- Var.DefRanges.back().Ranges;
+ Var.DefRanges[DR];
if (!R.empty() && R.back().second == Begin)
R.back().second = End;
else
@@ -1525,7 +1512,7 @@ void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) {
// FIXME: Set GuardCfg when it is implemented.
CurFn->FrameProcOpts = FPO;
- OS.EmitCVFuncIdDirective(CurFn->FuncId);
+ OS.emitCVFuncIdDirective(CurFn->FuncId);
// Find the end of the function prolog. First known non-DBG_VALUE and
// non-frame setup location marks the beginning of the function body.
@@ -1825,6 +1812,7 @@ TypeIndex CodeViewDebug::lowerTypeBasic(const DIBasicType *Ty) {
break;
case dwarf::DW_ATE_UTF:
switch (ByteSize) {
+ case 1: STK = SimpleTypeKind::Character8; break;
case 2: STK = SimpleTypeKind::Character16; break;
case 4: STK = SimpleTypeKind::Character32; break;
}
@@ -2820,7 +2808,9 @@ void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI,
// records and on disk formats are described in SymbolRecords.h. BytePrefix
// should be big enough to hold all forms without memory allocation.
SmallString<20> BytePrefix;
- for (const LocalVarDefRange &DefRange : Var.DefRanges) {
+ for (const auto &Pair : Var.DefRanges) {
+ LocalVarDef DefRange = Pair.first;
+ const auto &Ranges = Pair.second;
BytePrefix.clear();
if (DefRange.InMemory) {
int Offset = DefRange.DataOffset;
@@ -2844,7 +2834,7 @@ void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI,
: (EncFP == FI.EncodedLocalFramePtrReg))) {
DefRangeFramePointerRelHeader DRHdr;
DRHdr.Offset = Offset;
- OS.emitCVDefRangeDirective(DefRange.Ranges, DRHdr);
+ OS.emitCVDefRangeDirective(Ranges, DRHdr);
} else {
uint16_t RegRelFlags = 0;
if (DefRange.IsSubfield) {
@@ -2856,7 +2846,7 @@ void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI,
DRHdr.Register = Reg;
DRHdr.Flags = RegRelFlags;
DRHdr.BasePointerOffset = Offset;
- OS.emitCVDefRangeDirective(DefRange.Ranges, DRHdr);
+ OS.emitCVDefRangeDirective(Ranges, DRHdr);
}
} else {
assert(DefRange.DataOffset == 0 && "unexpected offset into register");
@@ -2865,12 +2855,12 @@ void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI,
DRHdr.Register = DefRange.CVRegister;
DRHdr.MayHaveNoName = 0;
DRHdr.OffsetInParent = DefRange.StructOffset;
- OS.emitCVDefRangeDirective(DefRange.Ranges, DRHdr);
+ OS.emitCVDefRangeDirective(Ranges, DRHdr);
} else {
DefRangeRegisterHeader DRHdr;
DRHdr.Register = DefRange.CVRegister;
DRHdr.MayHaveNoName = 0;
- OS.emitCVDefRangeDirective(DefRange.Ranges, DRHdr);
+ OS.emitCVDefRangeDirective(Ranges, DRHdr);
}
}
}
@@ -2894,9 +2884,9 @@ void CodeViewDebug::emitLexicalBlock(const LexicalBlock &Block,
OS.AddComment("Code size");
OS.emitAbsoluteSymbolDiff(Block.End, Block.Begin, 4); // Code Size
OS.AddComment("Function section relative address");
- OS.EmitCOFFSecRel32(Block.Begin, /*Offset=*/0); // Func Offset
+ OS.emitCOFFSecRel32(Block.Begin, /*Offset=*/0); // Func Offset
OS.AddComment("Function section index");
- OS.EmitCOFFSectionIndex(FI.Begin); // Func Symbol
+ OS.emitCOFFSectionIndex(FI.Begin); // Func Symbol
OS.AddComment("Lexical block name");
emitNullTerminatedSymbolName(OS, Block.Name); // Name
endSymbolRecord(RecordEnd);
@@ -3181,6 +3171,11 @@ void CodeViewDebug::collectGlobalVariableInfo() {
for (const auto *GVE : CU->getGlobalVariables()) {
const DIGlobalVariable *DIGV = GVE->getVariable();
const DIExpression *DIE = GVE->getExpression();
+ // Don't emit string literals in CodeView, as the only useful parts are
+ // generally the filename and line number, which isn't possible to output
+ // in CodeView. String literals should be the only unnamed GlobalVariable
+ // with debug info.
+ if (DIGV->getName().empty()) continue;
if ((DIE->getNumElements() == 2) &&
(DIE->getElement(0) == dwarf::DW_OP_plus_uconst))
@@ -3380,10 +3375,10 @@ void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) {
if (CVGlobalVariableOffsets.find(DIGV) != CVGlobalVariableOffsets.end())
// Use the offset seen while collecting info on globals.
Offset = CVGlobalVariableOffsets[DIGV];
- OS.EmitCOFFSecRel32(GVSym, Offset);
+ OS.emitCOFFSecRel32(GVSym, Offset);
OS.AddComment("Segment");
- OS.EmitCOFFSectionIndex(GVSym);
+ OS.emitCOFFSectionIndex(GVSym);
OS.AddComment("Name");
const unsigned LengthOfDataRecord = 12;
emitNullTerminatedSymbolName(OS, QualifiedName, LengthOfDataRecord);
diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index d1fc3cdccb20..16f0082723ed 100644
--- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -50,18 +50,8 @@ class MachineFunction;
/// Collects and handles line tables information in a CodeView format.
class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
- MCStreamer &OS;
- BumpPtrAllocator Allocator;
- codeview::GlobalTypeTableBuilder TypeTable;
-
- /// Whether to emit type record hashes into .debug$H.
- bool EmitDebugGlobalHashes = false;
-
- /// The codeview CPU type used by the translation unit.
- codeview::CPUType TheCPU;
-
- /// Represents the most general definition range.
- struct LocalVarDefRange {
+public:
+ struct LocalVarDef {
/// Indicates that variable data is stored in memory relative to the
/// specified register.
int InMemory : 1;
@@ -79,23 +69,40 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
/// location containing the data.
uint16_t CVRegister;
- /// Compares all location fields. This includes all fields except the label
- /// ranges.
- bool isDifferentLocation(LocalVarDefRange &O) {
- return InMemory != O.InMemory || DataOffset != O.DataOffset ||
- IsSubfield != O.IsSubfield || StructOffset != O.StructOffset ||
- CVRegister != O.CVRegister;
+ uint64_t static toOpaqueValue(const LocalVarDef DR) {
+ uint64_t Val = 0;
+ std::memcpy(&Val, &DR, sizeof(Val));
+ return Val;
}
- SmallVector<std::pair<const MCSymbol *, const MCSymbol *>, 1> Ranges;
+ LocalVarDef static createFromOpaqueValue(uint64_t Val) {
+ LocalVarDef DR;
+ std::memcpy(&DR, &Val, sizeof(Val));
+ return DR;
+ }
};
- static LocalVarDefRange createDefRangeMem(uint16_t CVRegister, int Offset);
+ static_assert(sizeof(uint64_t) == sizeof(LocalVarDef), "");
+
+private:
+ MCStreamer &OS;
+ BumpPtrAllocator Allocator;
+ codeview::GlobalTypeTableBuilder TypeTable;
+
+ /// Whether to emit type record hashes into .debug$H.
+ bool EmitDebugGlobalHashes = false;
+
+ /// The codeview CPU type used by the translation unit.
+ codeview::CPUType TheCPU;
+
+ static LocalVarDef createDefRangeMem(uint16_t CVRegister, int Offset);
/// Similar to DbgVariable in DwarfDebug, but not dwarf-specific.
struct LocalVariable {
const DILocalVariable *DIVar = nullptr;
- SmallVector<LocalVarDefRange, 1> DefRanges;
+ MapVector<LocalVarDef,
+ SmallVector<std::pair<const MCSymbol *, const MCSymbol *>, 1>>
+ DefRanges;
bool UseReferenceType = false;
};
@@ -493,6 +500,27 @@ public:
void beginInstruction(const MachineInstr *MI) override;
};
+template <> struct DenseMapInfo<CodeViewDebug::LocalVarDef> {
+
+ static inline CodeViewDebug::LocalVarDef getEmptyKey() {
+ return CodeViewDebug::LocalVarDef::createFromOpaqueValue(~0ULL);
+ }
+
+ static inline CodeViewDebug::LocalVarDef getTombstoneKey() {
+ return CodeViewDebug::LocalVarDef::createFromOpaqueValue(~0ULL - 1ULL);
+ }
+
+ static unsigned getHashValue(const CodeViewDebug::LocalVarDef &DR) {
+ return CodeViewDebug::LocalVarDef::toOpaqueValue(DR) * 37ULL;
+ }
+
+ static bool isEqual(const CodeViewDebug::LocalVarDef &LHS,
+ const CodeViewDebug::LocalVarDef &RHS) {
+ return CodeViewDebug::LocalVarDef::toOpaqueValue(LHS) ==
+ CodeViewDebug::LocalVarDef::toOpaqueValue(RHS);
+ }
+};
+
} // end namespace llvm
#endif // LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H
diff --git a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
index 396322c4979d..617ddbd66e4e 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -13,21 +13,15 @@
#include "llvm/CodeGen/DIE.h"
#include "DwarfCompileUnit.h"
#include "DwarfDebug.h"
-#include "DwarfUnit.h"
-#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/Config/llvm-config.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
-#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/LEB128.h"
-#include "llvm/Support/MD5.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -170,7 +164,7 @@ DIEAbbrev &DIEAbbrevSet::uniqueAbbreviation(DIE &Die) {
void DIEAbbrevSet::Emit(const AsmPrinter *AP, MCSection *Section) const {
if (!Abbreviations.empty()) {
// Start the debug abbrev section.
- AP->OutStreamer->SwitchSection(Section);
+ AP->OutStreamer->switchSection(Section);
AP->emitDwarfAbbrevs(Abbreviations);
}
}
@@ -204,6 +198,7 @@ const DIE *DIE::getUnitDie() const {
const DIE *p = this;
while (p) {
if (p->getTag() == dwarf::DW_TAG_compile_unit ||
+ p->getTag() == dwarf::DW_TAG_skeleton_unit ||
p->getTag() == dwarf::DW_TAG_type_unit)
return p;
p = p->getParent();
@@ -378,7 +373,7 @@ void DIEInteger::emitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
case dwarf::DW_FORM_flag_present:
// Emit something to keep the lines and comments in sync.
// FIXME: Is there a better way to do this?
- Asm->OutStreamer->AddBlankLine();
+ Asm->OutStreamer->addBlankLine();
return;
case dwarf::DW_FORM_flag:
case dwarf::DW_FORM_ref1:
diff --git a/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
index e175854f7b93..5da50d7aab9f 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -19,7 +19,6 @@
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/Endian.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
index dd795079ac1a..1358f4d25990 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/DbgEntityHistoryCalculator.h"
-#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
@@ -204,7 +203,7 @@ void DbgValueHistoryMap::trimLocationRanges(
if (auto R = intersects(StartMI, EndMI, ScopeRanges, Ordering)) {
// Adjust ScopeRanges to exclude ranges which subsequent location ranges
// cannot possibly intersect.
- ScopeRanges = ArrayRef<InsnRange>(R.getValue(), ScopeRanges.end());
+ ScopeRanges = ArrayRef<InsnRange>(*R, ScopeRanges.end());
} else {
// If the location range does not intersect any scope range then the
// DBG_VALUE which opened this location range is usless, mark it for
diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
index 18fc46c74eb4..660a064687d3 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
@@ -13,7 +13,6 @@
#include "llvm/CodeGen/DebugHandlerBase.h"
#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index 63343d2519f9..5f187acf13dc 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -11,23 +11,13 @@
//===----------------------------------------------------------------------===//
#include "DwarfException.h"
-#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Mangler.h"
-#include "llvm/IR/Module.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MachineLocation.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FormattedStream.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -53,7 +43,7 @@ void DwarfCFIExceptionBase::endFragment() {
DwarfCFIException::DwarfCFIException(AsmPrinter *A)
: DwarfCFIExceptionBase(A) {}
-DwarfCFIException::~DwarfCFIException() {}
+DwarfCFIException::~DwarfCFIException() = default;
/// endModule - Emit all exception information that should come after the
/// content.
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 5913c687db48..b3f99d346faa 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -21,7 +21,6 @@
#include "llvm/CodeGen/DIE.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -67,13 +66,13 @@ DwarfCompileUnit::DwarfCompileUnit(unsigned UID, const DICompileUnit *Node,
/// DW_FORM_addr or DW_FORM_GNU_addr_index.
void DwarfCompileUnit::addLabelAddress(DIE &Die, dwarf::Attribute Attribute,
const MCSymbol *Label) {
+ if ((Skeleton || !DD->useSplitDwarf()) && Label)
+ DD->addArangeLabel(SymbolCU(this, Label));
+
// Don't use the address pool in non-fission or in the skeleton unit itself.
if ((!DD->useSplitDwarf() || !Skeleton) && DD->getDwarfVersion() < 5)
return addLocalLabelAddress(Die, Attribute, Label);
- if (Label)
- DD->addArangeLabel(SymbolCU(this, Label));
-
bool UseAddrOffsetFormOrExpressions =
DD->useAddrOffsetForm() || DD->useAddrOffsetExpressions();
@@ -109,9 +108,6 @@ void DwarfCompileUnit::addLocalLabelAddress(DIE &Die,
dwarf::Attribute Attribute,
const MCSymbol *Label) {
if (Label)
- DD->addArangeLabel(SymbolCU(this, Label));
-
- if (Label)
addAttribute(Die, Attribute, dwarf::DW_FORM_addr, DIELabel(Label));
else
addAttribute(Die, Attribute, dwarf::DW_FORM_addr, DIEInteger(0));
@@ -169,7 +165,9 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
} else {
DeclContext = GV->getScope();
// Add name and type.
- addString(*VariableDIE, dwarf::DW_AT_name, GV->getDisplayName());
+ StringRef DisplayName = GV->getDisplayName();
+ if (!DisplayName.empty())
+ addString(*VariableDIE, dwarf::DW_AT_name, GV->getDisplayName());
if (GTy)
addType(*VariableDIE, GTy);
@@ -303,8 +301,11 @@ void DwarfCompileUnit::addLocationAttribute(
DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address
: dwarf::DW_OP_form_tls_address);
}
- } else if (Asm->TM.getRelocationModel() == Reloc::RWPI ||
- Asm->TM.getRelocationModel() == Reloc::ROPI_RWPI) {
+ } else if ((Asm->TM.getRelocationModel() == Reloc::RWPI ||
+ Asm->TM.getRelocationModel() == Reloc::ROPI_RWPI) &&
+ !Asm->getObjFileLowering()
+ .getKindForGlobal(Global, Asm->TM)
+ .isReadOnly()) {
auto FormAndOp = GetPointerSizedFormAndOp();
// Constant
addUInt(*Loc, dwarf::DW_FORM_data1, FormAndOp.Op);
@@ -505,7 +506,7 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) {
// FIXME: when writing dwo, we need to avoid relocations. Probably
// the "right" solution is to treat globals the way func and data
// symbols are (with entries in .debug_addr).
- // For now, since we only ever use index 0, this should work as-is.
+ // For now, since we only ever use index 0, this should work as-is.
addUInt(*Loc, dwarf::DW_FORM_data4, FrameBase.Location.WasmLoc.Index);
}
addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value);
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index f2e1f6346803..61412cde34c8 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -25,7 +25,6 @@
#include "llvm/CodeGen/LexicalScopes.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/Support/Casting.h"
-#include <algorithm>
#include <cassert>
#include <cstdint>
#include <memory>
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 609b568f28be..866338a949f3 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -31,8 +31,8 @@
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/DebugInfo/DWARF/DWARFExpression.h"
#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
+#include "llvm/DebugInfo/DWARF/DWARFExpression.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
@@ -45,14 +45,11 @@
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/MachineLocation.h"
#include "llvm/MC/SectionKind.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MD5.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
@@ -360,7 +357,7 @@ DwarfDebug::DwarfDebug(AsmPrinter *A)
DebuggerTuning = Asm->TM.Options.DebuggerTuning;
else if (IsDarwin)
DebuggerTuning = DebuggerKind::LLDB;
- else if (TT.isPS4CPU())
+ else if (TT.isPS())
DebuggerTuning = DebuggerKind::SCE;
else if (TT.isOSAIX())
DebuggerTuning = DebuggerKind::DBX;
@@ -2315,7 +2312,7 @@ void DwarfDebug::emitStringOffsetsTableHeader() {
template <typename AccelTableT>
void DwarfDebug::emitAccel(AccelTableT &Accel, MCSection *Section,
StringRef TableName) {
- Asm->OutStreamer->SwitchSection(Section);
+ Asm->OutStreamer->switchSection(Section);
// Emit the full data.
emitAppleAccelTable(Asm, Accel, TableName, Section->getBeginSymbol());
@@ -2434,12 +2431,12 @@ void DwarfDebug::emitDebugPubSections() {
bool GnuStyle = TheU->getCUNode()->getNameTableKind() ==
DICompileUnit::DebugNameTableKind::GNU;
- Asm->OutStreamer->SwitchSection(
+ Asm->OutStreamer->switchSection(
GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubNamesSection()
: Asm->getObjFileLowering().getDwarfPubNamesSection());
emitDebugPubSection(GnuStyle, "Names", TheU, TheU->getGlobalNames());
- Asm->OutStreamer->SwitchSection(
+ Asm->OutStreamer->switchSection(
GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubTypesSection()
: Asm->getObjFileLowering().getDwarfPubTypesSection());
emitDebugPubSection(GnuStyle, "Types", TheU, TheU->getGlobalTypes());
@@ -2849,7 +2846,7 @@ void DwarfDebug::emitDebugLocImpl(MCSection *Sec) {
if (DebugLocs.getLists().empty())
return;
- Asm->OutStreamer->SwitchSection(Sec);
+ Asm->OutStreamer->switchSection(Sec);
MCSymbol *TableEnd = nullptr;
if (getDwarfVersion() >= 5)
@@ -2880,7 +2877,7 @@ void DwarfDebug::emitDebugLocDWO() {
}
for (const auto &List : DebugLocs.getLists()) {
- Asm->OutStreamer->SwitchSection(
+ Asm->OutStreamer->switchSection(
Asm->getObjFileLowering().getDwarfLocDWOSection());
Asm->OutStreamer->emitLabel(List.Label);
@@ -2953,8 +2950,8 @@ void DwarfDebug::emitDebugARanges() {
// Sort the symbols by offset within the section.
llvm::stable_sort(List, [&](const SymbolCU &A, const SymbolCU &B) {
- unsigned IA = A.Sym ? Asm->OutStreamer->GetSymbolOrder(A.Sym) : 0;
- unsigned IB = B.Sym ? Asm->OutStreamer->GetSymbolOrder(B.Sym) : 0;
+ unsigned IA = A.Sym ? Asm->OutStreamer->getSymbolOrder(A.Sym) : 0;
+ unsigned IB = B.Sym ? Asm->OutStreamer->getSymbolOrder(B.Sym) : 0;
// Symbols with no order assigned should be placed at the end.
// (e.g. section end labels)
@@ -2987,7 +2984,7 @@ void DwarfDebug::emitDebugARanges() {
}
// Start the dwarf aranges section.
- Asm->OutStreamer->SwitchSection(
+ Asm->OutStreamer->switchSection(
Asm->getObjFileLowering().getDwarfARangesSection());
unsigned PtrSize = Asm->MAI->getCodePointerSize();
@@ -3045,15 +3042,22 @@ void DwarfDebug::emitDebugARanges() {
for (const ArangeSpan &Span : List) {
Asm->emitLabelReference(Span.Start, PtrSize);
- // Calculate the size as being from the span start to it's end.
- if (Span.End) {
+ // Calculate the size as being from the span start to its end.
+ //
+ // If the size is zero, then round it up to one byte. The DWARF
+ // specification requires that entries in this table have nonzero
+ // lengths.
+ auto SizeRef = SymSize.find(Span.Start);
+ if ((SizeRef == SymSize.end() || SizeRef->second != 0) && Span.End) {
Asm->emitLabelDifference(Span.End, Span.Start, PtrSize);
} else {
// For symbols without an end marker (e.g. common), we
// write a single arange entry containing just that one symbol.
- uint64_t Size = SymSize[Span.Start];
- if (Size == 0)
+ uint64_t Size;
+ if (SizeRef == SymSize.end() || SizeRef->second == 0)
Size = 1;
+ else
+ Size = SizeRef->second;
Asm->OutStreamer->emitIntValue(Size, PtrSize);
}
@@ -3087,7 +3091,7 @@ void DwarfDebug::emitDebugRangesImpl(const DwarfFile &Holder, MCSection *Section
return !Pair.second->getCUNode()->isDebugDirectivesOnly();
}));
- Asm->OutStreamer->SwitchSection(Section);
+ Asm->OutStreamer->switchSection(Section);
MCSymbol *TableEnd = nullptr;
if (getDwarfVersion() >= 5)
@@ -3239,7 +3243,7 @@ void DwarfDebug::emitDebugMacinfoImpl(MCSection *Section) {
DIMacroNodeArray Macros = CUNode->getMacros();
if (Macros.empty())
continue;
- Asm->OutStreamer->SwitchSection(Section);
+ Asm->OutStreamer->switchSection(Section);
Asm->OutStreamer->emitLabel(U.getMacroLabelBegin());
if (UseDebugMacroSection)
emitMacroHeader(Asm, *this, U, getDwarfVersion());
@@ -3447,22 +3451,6 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
CU.addDIETypeSignature(RefDie, Signature);
}
-DwarfDebug::NonTypeUnitContext::NonTypeUnitContext(DwarfDebug *DD)
- : DD(DD),
- TypeUnitsUnderConstruction(std::move(DD->TypeUnitsUnderConstruction)), AddrPoolUsed(DD->AddrPool.hasBeenUsed()) {
- DD->TypeUnitsUnderConstruction.clear();
- DD->AddrPool.resetUsedFlag();
-}
-
-DwarfDebug::NonTypeUnitContext::~NonTypeUnitContext() {
- DD->TypeUnitsUnderConstruction = std::move(TypeUnitsUnderConstruction);
- DD->AddrPool.resetUsedFlag(AddrPoolUsed);
-}
-
-DwarfDebug::NonTypeUnitContext DwarfDebug::enterNonTypeUnitContext() {
- return NonTypeUnitContext(this);
-}
-
// Add the Name along with its companion DIE to the appropriate accelerator
// table (for AccelTableKind::Dwarf it's always AccelDebugNames, for
// AccelTableKind::Apple, we use the table we got as an argument). If
@@ -3555,6 +3543,6 @@ Optional<MD5::MD5Result> DwarfDebug::getMD5AsBytes(const DIFile *File) const {
// An MD5 checksum is 16 bytes.
std::string ChecksumString = fromHex(Checksum->Value);
MD5::MD5Result CKMem;
- std::copy(ChecksumString.begin(), ChecksumString.end(), CKMem.Bytes.data());
+ std::copy(ChecksumString.begin(), ChecksumString.end(), CKMem.data());
return CKMem;
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 4e1a1b1e068d..31e4081b7141 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -14,14 +14,13 @@
#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H
#include "AddressPool.h"
-#include "DebugLocStream.h"
#include "DebugLocEntry.h"
+#include "DebugLocStream.h"
#include "DwarfFile.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -31,7 +30,6 @@
#include "llvm/CodeGen/AccelTable.h"
#include "llvm/CodeGen/DbgEntityHistoryCalculator.h"
#include "llvm/CodeGen/DebugHandlerBase.h"
-#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Metadata.h"
@@ -80,7 +78,7 @@ private:
public:
DbgEntity(const DINode *N, const DILocation *IA, DbgEntityKind ID)
: Entity(N), InlinedAt(IA), SubclassID(ID) {}
- virtual ~DbgEntity() {}
+ virtual ~DbgEntity() = default;
/// Accessors.
/// @{
@@ -667,19 +665,6 @@ public:
void addDwarfTypeUnitType(DwarfCompileUnit &CU, StringRef Identifier,
DIE &Die, const DICompositeType *CTy);
- class NonTypeUnitContext {
- DwarfDebug *DD;
- decltype(DwarfDebug::TypeUnitsUnderConstruction) TypeUnitsUnderConstruction;
- bool AddrPoolUsed;
- friend class DwarfDebug;
- NonTypeUnitContext(DwarfDebug *DD);
- public:
- NonTypeUnitContext(NonTypeUnitContext&&) = default;
- ~NonTypeUnitContext();
- };
-
- NonTypeUnitContext enterNonTypeUnitContext();
-
/// Add a label so that arange data can be generated for it.
void addArangeLabel(SymbolCU SCU) { ArangeLabels.push_back(SCU); }
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index fe438102ee98..1c21d5ee8bb1 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -329,7 +329,16 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
return false;
}
- assert(DwarfRegs.size() == 1);
+ // TODO: We should not give up here but the following code needs to be changed
+ // to deal with multiple (sub)registers first.
+ if (DwarfRegs.size() > 1) {
+ LLVM_DEBUG(dbgs() << "TODO: giving up on debug information due to "
+ "multi-register usage.\n");
+ DwarfRegs.clear();
+ LocationKind = Unknown;
+ return false;
+ }
+
auto Reg = DwarfRegs[0];
bool FBReg = isFrameRegister(TRI, MachineReg);
int SignedOffset = 0;
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
index a67d0f032cf6..a497aa07284e 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -12,9 +12,7 @@
#include "DwarfUnit.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/Metadata.h"
#include "llvm/MC/MCStreamer.h"
-#include <algorithm>
#include <cstdint>
using namespace llvm;
@@ -47,7 +45,7 @@ void DwarfFile::emitUnit(DwarfUnit *TheU, bool UseOffsets) {
if (llvm::empty(TheU->getUnitDie().values()))
return;
- Asm->OutStreamer->SwitchSection(S);
+ Asm->OutStreamer->switchSection(S);
TheU->emitHeader(UseOffsets);
Asm->emitDwarfDIE(TheU->getUnitDie());
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
index a876f8ccace9..67b72f0b455d 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
@@ -39,7 +39,7 @@ DwarfStringPool::getEntryImpl(AsmPrinter &Asm, StringRef Str) {
DwarfStringPool::EntryRef DwarfStringPool::getEntry(AsmPrinter &Asm,
StringRef Str) {
auto &MapEntry = getEntryImpl(Asm, Str);
- return EntryRef(MapEntry, false);
+ return EntryRef(MapEntry);
}
DwarfStringPool::EntryRef DwarfStringPool::getIndexedEntry(AsmPrinter &Asm,
@@ -47,7 +47,7 @@ DwarfStringPool::EntryRef DwarfStringPool::getIndexedEntry(AsmPrinter &Asm,
auto &MapEntry = getEntryImpl(Asm, Str);
if (!MapEntry.getValue().isIndexed())
MapEntry.getValue().Index = NumIndexedStrings++;
- return EntryRef(MapEntry, true);
+ return EntryRef(MapEntry);
}
void DwarfStringPool::emitStringOffsetsTableHeader(AsmPrinter &Asm,
@@ -55,7 +55,7 @@ void DwarfStringPool::emitStringOffsetsTableHeader(AsmPrinter &Asm,
MCSymbol *StartSym) {
if (getNumIndexedStrings() == 0)
return;
- Asm.OutStreamer->SwitchSection(Section);
+ Asm.OutStreamer->switchSection(Section);
unsigned EntrySize = Asm.getDwarfOffsetByteSize();
// We are emitting the header for a contribution to the string offsets
// table. The header consists of an entry with the contribution's
@@ -78,7 +78,7 @@ void DwarfStringPool::emit(AsmPrinter &Asm, MCSection *StrSection,
return;
// Start the dwarf str section.
- Asm.OutStreamer->SwitchSection(StrSection);
+ Asm.OutStreamer->switchSection(StrSection);
// Get all of the string pool entries and sort them by their offset.
SmallVector<const StringMapEntry<EntryTy> *, 64> Entries;
@@ -117,7 +117,7 @@ void DwarfStringPool::emit(AsmPrinter &Asm, MCSection *StrSection,
Entries[Entry.getValue().Index] = &Entry;
}
- Asm.OutStreamer->SwitchSection(OffsetSection);
+ Asm.OutStreamer->switchSection(OffsetSection);
unsigned size = Asm.getDwarfOffsetByteSize();
for (const auto &Entry : Entries)
if (UseRelativeOffsets)
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 5a2bd479f277..81238b0fe0d2 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -17,12 +17,8 @@
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/None.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/iterator_range.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/GlobalValue.h"
@@ -32,9 +28,7 @@
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MachineLocation.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include <cassert>
#include <cstdint>
@@ -380,6 +374,8 @@ void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute,
CU = getUnitDie().getUnit();
if (!EntryCU)
EntryCU = getUnitDie().getUnit();
+ assert(EntryCU == CU || !DD->useSplitDwarf() || DD->shareAcrossDWOCUs() ||
+ !static_cast<const DwarfUnit*>(CU)->isDwoUnit());
addAttribute(Die, Attribute,
EntryCU == CU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr,
Entry);
@@ -596,10 +592,8 @@ DIE *DwarfUnit::createTypeDIE(const DIScope *Context, DIE &ContextDIE,
// Skip updating the accelerator tables since this is not the full type.
if (MDString *TypeId = CTy->getRawIdentifier())
DD->addDwarfTypeUnitType(getCU(), TypeId->getString(), TyDIE, CTy);
- else {
- auto X = DD->enterNonTypeUnitContext();
+ else
finishNonUnitTypeDIE(TyDIE, CTy);
- }
return &TyDIE;
}
constructTypeDIE(TyDIE, CTy);
@@ -805,7 +799,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) {
// or reference types.
if (DTy->getDWARFAddressSpace())
addUInt(Buffer, dwarf::DW_AT_address_class, dwarf::DW_FORM_data4,
- DTy->getDWARFAddressSpace().getValue());
+ *DTy->getDWARFAddressSpace());
}
void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DITypeRefArray Args) {
@@ -1350,6 +1344,9 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
if (SP->isRecursive())
addFlag(SPDie, dwarf::DW_AT_recursive);
+ if (!SP->getTargetFuncName().empty())
+ addString(SPDie, dwarf::DW_AT_trampoline, SP->getTargetFuncName());
+
if (DD->getDwarfVersion() >= 5 && SP->isDeleted())
addFlag(SPDie, dwarf::DW_AT_deleted);
}
@@ -1442,7 +1439,8 @@ DIE *DwarfUnit::getIndexTyDie() {
addString(*IndexTyDie, dwarf::DW_AT_name, Name);
addUInt(*IndexTyDie, dwarf::DW_AT_byte_size, None, sizeof(int64_t));
addUInt(*IndexTyDie, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
- dwarf::DW_ATE_unsigned);
+ dwarf::getArrayIndexTypeEncoding(
+ (dwarf::SourceLanguage)getLanguage()));
DD->addAccelType(*CUNode, Name, *IndexTyDie, /*Flags*/ 0);
return IndexTyDie;
}
@@ -1847,11 +1845,5 @@ void DwarfUnit::addRnglistsBase() {
}
void DwarfTypeUnit::finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) {
- addFlag(D, dwarf::DW_AT_declaration);
- StringRef Name = CTy->getName();
- if (!Name.empty())
- addString(D, dwarf::DW_AT_name, Name);
- if (Name.startswith("_STN") || !Name.contains('<'))
- addTemplateParams(D, CTy->getTemplateParams());
- getCU().createTypeDIE(CTy);
+ DD->getAddressPool().resetUsedFlag(true);
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index 39f40b172c1b..31644959bdca 100644
--- a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -19,7 +19,6 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
@@ -458,7 +457,7 @@ MCSymbol *EHStreamer::emitExceptionTable() {
// Sometimes we want not to emit the data into separate section (e.g. ARM
// EHABI). In this case LSDASection will be NULL.
if (LSDASection)
- Asm->OutStreamer->SwitchSection(LSDASection);
+ Asm->OutStreamer->switchSection(LSDASection);
Asm->emitAlignment(Align(4));
// Emit the LSDA.
@@ -806,7 +805,7 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) {
// Emit the Catch TypeInfos.
if (VerboseAsm && !TypeInfos.empty()) {
Asm->OutStreamer->AddComment(">> Catch TypeInfos <<");
- Asm->OutStreamer->AddBlankLine();
+ Asm->OutStreamer->addBlankLine();
Entry = TypeInfos.size();
}
@@ -821,7 +820,7 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) {
// Emit the Exception Specifications.
if (VerboseAsm && !FilterIds.empty()) {
Asm->OutStreamer->AddComment(">> Filter TypeInfos <<");
- Asm->OutStreamer->AddBlankLine();
+ Asm->OutStreamer->addBlankLine();
Entry = 0;
}
for (std::vector<unsigned>::const_iterator
diff --git a/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
index 70777f07fc6c..62fd15d89512 100644
--- a/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
@@ -23,7 +23,6 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
using namespace llvm;
@@ -46,9 +45,8 @@ void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
unsigned IntPtrSize = M.getDataLayout().getPointerSize();
// Put this in a custom .note section.
- OS.SwitchSection(
- AP.getObjFileLowering().getContext().getELFSection(".note.gc",
- ELF::SHT_PROGBITS, 0));
+ OS.switchSection(AP.getObjFileLowering().getContext().getELFSection(
+ ".note.gc", ELF::SHT_PROGBITS, 0));
// For each function...
for (GCModuleInfo::FuncInfoVec::iterator FI = Info.funcinfo_begin(),
diff --git a/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index 3ade262d9af2..74fa30ab321b 100644
--- a/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -72,10 +72,10 @@ static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) {
void OcamlGCMetadataPrinter::beginAssembly(Module &M, GCModuleInfo &Info,
AsmPrinter &AP) {
- AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getTextSection());
+ AP.OutStreamer->switchSection(AP.getObjFileLowering().getTextSection());
EmitCamlGlobal(M, AP, "code_begin");
- AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getDataSection());
+ AP.OutStreamer->switchSection(AP.getObjFileLowering().getDataSection());
EmitCamlGlobal(M, AP, "data_begin");
}
@@ -99,16 +99,16 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
AsmPrinter &AP) {
unsigned IntPtrSize = M.getDataLayout().getPointerSize();
- AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getTextSection());
+ AP.OutStreamer->switchSection(AP.getObjFileLowering().getTextSection());
EmitCamlGlobal(M, AP, "code_end");
- AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getDataSection());
+ AP.OutStreamer->switchSection(AP.getObjFileLowering().getDataSection());
EmitCamlGlobal(M, AP, "data_end");
// FIXME: Why does ocaml emit this??
AP.OutStreamer->emitIntValue(0, IntPtrSize);
- AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getDataSection());
+ AP.OutStreamer->switchSection(AP.getObjFileLowering().getDataSection());
EmitCamlGlobal(M, AP, "frametable");
int NumDescriptors = 0;
@@ -147,7 +147,7 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
AP.OutStreamer->AddComment("live roots for " +
Twine(FI->getFunction().getName()));
- AP.OutStreamer->AddBlankLine();
+ AP.OutStreamer->addBlankLine();
for (GCFunctionInfo::iterator J = FI->begin(), JE = FI->end(); J != JE;
++J) {
diff --git a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
index bab187f46535..135eabc34838 100644
--- a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
@@ -13,7 +13,7 @@
#include "PseudoProbePrinter.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/Module.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/PseudoProbe.h"
#include "llvm/MC/MCPseudoProbe.h"
#include "llvm/MC/MCStreamer.h"
diff --git a/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp b/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp
index a17a2ca2790e..a514ff161cee 100644
--- a/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp
@@ -12,6 +12,8 @@
//===----------------------------------------------------------------------===//
#include "WasmException.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/Mangler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCStreamer.h"
diff --git a/llvm/lib/CodeGen/AsmPrinter/WasmException.h b/llvm/lib/CodeGen/AsmPrinter/WasmException.h
index f06de786bd76..2abbe37cb6d9 100644
--- a/llvm/lib/CodeGen/AsmPrinter/WasmException.h
+++ b/llvm/lib/CodeGen/AsmPrinter/WasmException.h
@@ -15,9 +15,12 @@
#define LLVM_LIB_CODEGEN_ASMPRINTER_WASMEXCEPTION_H
#include "EHStreamer.h"
-#include "llvm/CodeGen/AsmPrinter.h"
namespace llvm {
+class AsmPrinter;
+class MachineFunction;
+struct LandingPadInfo;
+template <typename T> class SmallVectorImpl;
class LLVM_LIBRARY_VISIBILITY WasmException : public EHStreamer {
public:
diff --git a/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp b/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
index ad8432343a60..5d813b72c0b7 100644
--- a/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
@@ -15,11 +15,8 @@
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCStreamer.h"
@@ -29,7 +26,7 @@ using namespace llvm;
WinCFGuard::WinCFGuard(AsmPrinter *A) : Asm(A) {}
-WinCFGuard::~WinCFGuard() {}
+WinCFGuard::~WinCFGuard() = default;
void WinCFGuard::endFunction(const MachineFunction *MF) {
@@ -110,19 +107,19 @@ void WinCFGuard::endModule() {
// Emit the symbol index of each GFIDs entry to form the .gfids section.
auto &OS = *Asm->OutStreamer;
- OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGFIDsSection());
+ OS.switchSection(Asm->OutContext.getObjectFileInfo()->getGFIDsSection());
for (const MCSymbol *S : GFIDsEntries)
- OS.EmitCOFFSymbolIndex(S);
+ OS.emitCOFFSymbolIndex(S);
// Emit the symbol index of each GIATs entry to form the .giats section.
- OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGIATsSection());
+ OS.switchSection(Asm->OutContext.getObjectFileInfo()->getGIATsSection());
for (const MCSymbol *S : GIATsEntries) {
- OS.EmitCOFFSymbolIndex(S);
+ OS.emitCOFFSymbolIndex(S);
}
// Emit the symbol index of each longjmp target to form the .gljmp section.
- OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGLJMPSection());
+ OS.switchSection(Asm->OutContext.getObjectFileInfo()->getGLJMPSection());
for (const MCSymbol *S : LongjmpTargets) {
- OS.EmitCOFFSymbolIndex(S);
+ OS.emitCOFFSymbolIndex(S);
}
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
index ef57031c7294..c3ca9c92bf71 100644
--- a/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -23,19 +23,13 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Mangler.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FormattedStream.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
using namespace llvm;
WinException::WinException(AsmPrinter *A) : EHStreamer(A) {
@@ -46,7 +40,7 @@ WinException::WinException(AsmPrinter *A) : EHStreamer(A) {
isThumb = Asm->TM.getTargetTriple().isThumb();
}
-WinException::~WinException() {}
+WinException::~WinException() = default;
/// endModule - Emit all exception information that should come after the
/// content.
@@ -55,13 +49,13 @@ void WinException::endModule() {
const Module *M = MMI->getModule();
for (const Function &F : *M)
if (F.hasFnAttribute("safeseh"))
- OS.EmitCOFFSafeSEH(Asm->getSymbol(&F));
+ OS.emitCOFFSafeSEH(Asm->getSymbol(&F));
if (M->getModuleFlag("ehcontguard") && !EHContTargets.empty()) {
// Emit the symbol index of each ehcont target.
- OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGEHContSection());
+ OS.switchSection(Asm->OutContext.getObjectFileInfo()->getGEHContSection());
for (const MCSymbol *S : EHContTargets) {
- OS.EmitCOFFSymbolIndex(S);
+ OS.emitCOFFSymbolIndex(S);
}
}
}
@@ -122,7 +116,7 @@ void WinException::beginFunction(const MachineFunction *MF) {
void WinException::markFunctionEnd() {
if (isAArch64 && CurrentFuncletEntry &&
(shouldEmitMoves || shouldEmitPersonality))
- Asm->OutStreamer->EmitWinCFIFuncletOrFuncEnd();
+ Asm->OutStreamer->emitWinCFIFuncletOrFuncEnd();
}
/// endFunction - Gather and emit post-function exception information.
@@ -151,12 +145,12 @@ void WinException::endFunction(const MachineFunction *MF) {
return;
if (shouldEmitPersonality || shouldEmitLSDA) {
- Asm->OutStreamer->PushSection();
+ Asm->OutStreamer->pushSection();
// Just switch sections to the right xdata section.
MCSection *XData = Asm->OutStreamer->getAssociatedXDataSection(
Asm->OutStreamer->getCurrentSectionOnly());
- Asm->OutStreamer->SwitchSection(XData);
+ Asm->OutStreamer->switchSection(XData);
// Emit the tables appropriate to the personality function in use. If we
// don't recognize the personality, assume it uses an Itanium-style LSDA.
@@ -171,7 +165,7 @@ void WinException::endFunction(const MachineFunction *MF) {
else
emitExceptionTable();
- Asm->OutStreamer->PopSection();
+ Asm->OutStreamer->popSection();
}
if (!MF->getCatchretTargets().empty()) {
@@ -211,11 +205,11 @@ void WinException::beginFunclet(const MachineBasicBlock &MBB,
Sym = getMCSymbolForMBB(Asm, &MBB);
// Describe our funclet symbol as a function with internal linkage.
- Asm->OutStreamer->BeginCOFFSymbolDef(Sym);
- Asm->OutStreamer->EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC);
- Asm->OutStreamer->EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
+ Asm->OutStreamer->beginCOFFSymbolDef(Sym);
+ Asm->OutStreamer->emitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC);
+ Asm->OutStreamer->emitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
<< COFF::SCT_COMPLEX_TYPE_SHIFT);
- Asm->OutStreamer->EndCOFFSymbolDef();
+ Asm->OutStreamer->endCOFFSymbolDef();
// We want our funclet's entry point to be aligned such that no nops will be
// present after the label.
@@ -229,7 +223,7 @@ void WinException::beginFunclet(const MachineBasicBlock &MBB,
// Mark 'Sym' as starting our funclet.
if (shouldEmitMoves || shouldEmitPersonality) {
CurrentFuncletTextSection = Asm->OutStreamer->getCurrentSectionOnly();
- Asm->OutStreamer->EmitWinCFIStartProc(Sym);
+ Asm->OutStreamer->emitWinCFIStartProc(Sym);
}
if (shouldEmitPersonality) {
@@ -248,15 +242,15 @@ void WinException::beginFunclet(const MachineBasicBlock &MBB,
// inliner doesn't allow inlining them, this isn't a major problem in
// practice.
if (!CurrentFuncletEntry->isCleanupFuncletEntry())
- Asm->OutStreamer->EmitWinEHHandler(PersHandlerSym, true, true);
+ Asm->OutStreamer->emitWinEHHandler(PersHandlerSym, true, true);
}
}
void WinException::endFunclet() {
if (isAArch64 && CurrentFuncletEntry &&
(shouldEmitMoves || shouldEmitPersonality)) {
- Asm->OutStreamer->SwitchSection(CurrentFuncletTextSection);
- Asm->OutStreamer->EmitWinCFIFuncletOrFuncEnd();
+ Asm->OutStreamer->switchSection(CurrentFuncletTextSection);
+ Asm->OutStreamer->emitWinCFIFuncletOrFuncEnd();
}
endFuncletImpl();
}
@@ -276,7 +270,7 @@ void WinException::endFuncletImpl() {
if (Per == EHPersonality::MSVC_CXX && shouldEmitPersonality &&
!CurrentFuncletEntry->isCleanupFuncletEntry()) {
// Emit an UNWIND_INFO struct describing the prologue.
- Asm->OutStreamer->EmitWinEHHandlerData();
+ Asm->OutStreamer->emitWinEHHandlerData();
// If this is a C++ catch funclet (or the parent function),
// emit a reference to the LSDA for the parent function.
@@ -287,14 +281,14 @@ void WinException::endFuncletImpl() {
} else if (Per == EHPersonality::MSVC_TableSEH && MF->hasEHFunclets() &&
!CurrentFuncletEntry->isEHFuncletEntry()) {
// Emit an UNWIND_INFO struct describing the prologue.
- Asm->OutStreamer->EmitWinEHHandlerData();
+ Asm->OutStreamer->emitWinEHHandlerData();
// If this is the parent function in Win64 SEH, emit the LSDA immediately
// following .seh_handlerdata.
emitCSpecificHandlerTable(MF);
} else if (shouldEmitPersonality || shouldEmitLSDA) {
// Emit an UNWIND_INFO struct describing the prologue.
- Asm->OutStreamer->EmitWinEHHandlerData();
+ Asm->OutStreamer->emitWinEHHandlerData();
// In these cases, no further info is written to the .xdata section
// right here, but is written by e.g. emitExceptionTable in endFunction()
// above.
@@ -307,8 +301,8 @@ void WinException::endFuncletImpl() {
// Switch back to the funclet start .text section now that we are done
// writing to .xdata, and emit an .seh_endproc directive to mark the end of
// the function.
- Asm->OutStreamer->SwitchSection(CurrentFuncletTextSection);
- Asm->OutStreamer->EmitWinCFIEndProc();
+ Asm->OutStreamer->switchSection(CurrentFuncletTextSection);
+ Asm->OutStreamer->emitWinCFIEndProc();
}
// Let's make sure we don't try to end the same funclet twice.
@@ -699,7 +693,12 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
}
int UnwindHelpOffset = 0;
- if (Asm->MAI->usesWindowsCFI())
+ // TODO: The check for UnwindHelpFrameIdx against max() below (and the
+ // second check further below) can be removed if MS C++ unwinding is
+ // implemented for ARM, when test/CodeGen/ARM/Windows/wineh-basic.ll
+ // passes without the check.
+ if (Asm->MAI->usesWindowsCFI() &&
+ FuncInfo.UnwindHelpFrameIdx != std::numeric_limits<int>::max())
UnwindHelpOffset =
getFrameIndexOffset(FuncInfo.UnwindHelpFrameIdx, FuncInfo);
@@ -761,7 +760,8 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
AddComment("IPToStateXData");
OS.emitValue(create32bitRef(IPToStateXData), 4);
- if (Asm->MAI->usesWindowsCFI()) {
+ if (Asm->MAI->usesWindowsCFI() &&
+ FuncInfo.UnwindHelpFrameIdx != std::numeric_limits<int>::max()) {
AddComment("UnwindHelp");
OS.emitInt32(UnwindHelpOffset);
}
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 4838f6da750d..5ce6fbb5f647 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -15,7 +15,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/CodeGen/AtomicExpandUtils.h"
@@ -47,6 +47,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/LowerAtomic.h"
#include <cassert>
#include <cstdint>
#include <iterator>
@@ -57,71 +58,72 @@ using namespace llvm;
namespace {
- class AtomicExpand: public FunctionPass {
- const TargetLowering *TLI = nullptr;
+class AtomicExpand : public FunctionPass {
+ const TargetLowering *TLI = nullptr;
- public:
- static char ID; // Pass identification, replacement for typeid
+public:
+ static char ID; // Pass identification, replacement for typeid
- AtomicExpand() : FunctionPass(ID) {
- initializeAtomicExpandPass(*PassRegistry::getPassRegistry());
- }
+ AtomicExpand() : FunctionPass(ID) {
+ initializeAtomicExpandPass(*PassRegistry::getPassRegistry());
+ }
- bool runOnFunction(Function &F) override;
-
- private:
- bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
- IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
- LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
- bool tryExpandAtomicLoad(LoadInst *LI);
- bool expandAtomicLoadToLL(LoadInst *LI);
- bool expandAtomicLoadToCmpXchg(LoadInst *LI);
- StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
- bool expandAtomicStore(StoreInst *SI);
- bool tryExpandAtomicRMW(AtomicRMWInst *AI);
- AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
- Value *
- insertRMWLLSCLoop(IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
- Align AddrAlign, AtomicOrdering MemOpOrder,
- function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
- void expandAtomicOpToLLSC(
- Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign,
- AtomicOrdering MemOpOrder,
- function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
- void expandPartwordAtomicRMW(
- AtomicRMWInst *I,
- TargetLoweringBase::AtomicExpansionKind ExpansionKind);
- AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
- bool expandPartwordCmpXchg(AtomicCmpXchgInst *I);
- void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
- void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
-
- AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
- static Value *insertRMWCmpXchgLoop(
- IRBuilder<> &Builder, Type *ResultType, Value *Addr, Align AddrAlign,
- AtomicOrdering MemOpOrder, SyncScope::ID SSID,
- function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
- CreateCmpXchgInstFun CreateCmpXchg);
- bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
-
- bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
- bool isIdempotentRMW(AtomicRMWInst *RMWI);
- bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
-
- bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment,
- Value *PointerOperand, Value *ValueOperand,
- Value *CASExpected, AtomicOrdering Ordering,
- AtomicOrdering Ordering2,
- ArrayRef<RTLIB::Libcall> Libcalls);
- void expandAtomicLoadToLibcall(LoadInst *LI);
- void expandAtomicStoreToLibcall(StoreInst *LI);
- void expandAtomicRMWToLibcall(AtomicRMWInst *I);
- void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
-
- friend bool
- llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
- CreateCmpXchgInstFun CreateCmpXchg);
- };
+ bool runOnFunction(Function &F) override;
+
+private:
+ bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
+ IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
+ LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
+ bool tryExpandAtomicLoad(LoadInst *LI);
+ bool expandAtomicLoadToLL(LoadInst *LI);
+ bool expandAtomicLoadToCmpXchg(LoadInst *LI);
+ StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
+ bool tryExpandAtomicStore(StoreInst *SI);
+ void expandAtomicStore(StoreInst *SI);
+ bool tryExpandAtomicRMW(AtomicRMWInst *AI);
+ AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
+ Value *
+ insertRMWLLSCLoop(IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
+ Align AddrAlign, AtomicOrdering MemOpOrder,
+ function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
+ void
+ expandAtomicOpToLLSC(Instruction *I, Type *ResultTy, Value *Addr,
+ Align AddrAlign, AtomicOrdering MemOpOrder,
+ function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
+ void expandPartwordAtomicRMW(
+ AtomicRMWInst *I, TargetLoweringBase::AtomicExpansionKind ExpansionKind);
+ AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
+ bool expandPartwordCmpXchg(AtomicCmpXchgInst *I);
+ void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
+ void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
+
+ AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
+ static Value *
+ insertRMWCmpXchgLoop(IRBuilder<> &Builder, Type *ResultType, Value *Addr,
+ Align AddrAlign, AtomicOrdering MemOpOrder,
+ SyncScope::ID SSID,
+ function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
+ CreateCmpXchgInstFun CreateCmpXchg);
+ bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
+
+ bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
+ bool isIdempotentRMW(AtomicRMWInst *RMWI);
+ bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
+
+ bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment,
+ Value *PointerOperand, Value *ValueOperand,
+ Value *CASExpected, AtomicOrdering Ordering,
+ AtomicOrdering Ordering2,
+ ArrayRef<RTLIB::Libcall> Libcalls);
+ void expandAtomicLoadToLibcall(LoadInst *LI);
+ void expandAtomicStoreToLibcall(StoreInst *LI);
+ void expandAtomicRMWToLibcall(AtomicRMWInst *I);
+ void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
+
+ friend bool
+ llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
+ CreateCmpXchgInstFun CreateCmpXchg);
+};
} // end anonymous namespace
@@ -129,8 +131,8 @@ char AtomicExpand::ID = 0;
char &llvm::AtomicExpandID = AtomicExpand::ID;
-INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions",
- false, false)
+INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions", false,
+ false)
FunctionPass *llvm::createAtomicExpandPass() { return new AtomicExpand(); }
@@ -252,7 +254,8 @@ bool AtomicExpand::runOnFunction(Function &F) {
}
if (LI) {
- if (LI->getType()->isFloatingPointTy()) {
+ if (TLI->shouldCastAtomicLoadInIR(LI) ==
+ TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
// TODO: add a TLI hook to control this so that each target can
// convert to lowering the original type one at a time.
LI = convertAtomicLoadToIntegerType(LI);
@@ -262,7 +265,8 @@ bool AtomicExpand::runOnFunction(Function &F) {
MadeChange |= tryExpandAtomicLoad(LI);
} else if (SI) {
- if (SI->getValueOperand()->getType()->isFloatingPointTy()) {
+ if (TLI->shouldCastAtomicStoreInIR(SI) ==
+ TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
// TODO: add a TLI hook to control this so that each target can
// convert to lowering the original type one at a time.
SI = convertAtomicStoreToIntegerType(SI);
@@ -271,8 +275,8 @@ bool AtomicExpand::runOnFunction(Function &F) {
MadeChange = true;
}
- if (TLI->shouldExpandAtomicStoreInIR(SI))
- MadeChange |= expandAtomicStore(SI);
+ if (tryExpandAtomicStore(SI))
+ MadeChange = true;
} else if (RMWI) {
// There are two different ways of expanding RMW instructions:
// - into a load if it is idempotent
@@ -283,8 +287,8 @@ bool AtomicExpand::runOnFunction(Function &F) {
MadeChange = true;
} else {
AtomicRMWInst::BinOp Op = RMWI->getOperation();
- if (Op == AtomicRMWInst::Xchg &&
- RMWI->getValOperand()->getType()->isFloatingPointTy()) {
+ if (TLI->shouldCastAtomicRMWIInIR(RMWI) ==
+ TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
// TODO: add a TLI hook to control this so that each target can
// convert to lowering the original type one at a time.
RMWI = convertAtomicXchgToIntegerType(RMWI);
@@ -308,7 +312,7 @@ bool AtomicExpand::runOnFunction(Function &F) {
// extend convertCmpXchgToInteger for floating point too.
assert(!CASI->getCompareOperand()->getType()->isFloatingPointTy() &&
"unimplemented - floating point not legal at IR level");
- if (CASI->getCompareOperand()->getType()->isPointerTy() ) {
+ if (CASI->getCompareOperand()->getType()->isPointerTy()) {
// TODO: add a TLI hook to control this so that each target can
// convert to lowering the original type one at a time.
CASI = convertCmpXchgToIntegerType(CASI);
@@ -351,14 +355,12 @@ IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T,
/// convertAtomicStoreToIntegerType for background.
LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
auto *M = LI->getModule();
- Type *NewTy = getCorrespondingIntegerType(LI->getType(),
- M->getDataLayout());
+ Type *NewTy = getCorrespondingIntegerType(LI->getType(), M->getDataLayout());
IRBuilder<> Builder(LI);
Value *Addr = LI->getPointerOperand();
- Type *PT = PointerType::get(NewTy,
- Addr->getType()->getPointerAddressSpace());
+ Type *PT = PointerType::get(NewTy, Addr->getType()->getPointerAddressSpace());
Value *NewAddr = Builder.CreateBitCast(Addr, PT);
auto *NewLI = Builder.CreateLoad(NewTy, NewAddr);
@@ -385,7 +387,9 @@ AtomicExpand::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
Value *Val = RMWI->getValOperand();
Type *PT = PointerType::get(NewTy, RMWI->getPointerAddressSpace());
Value *NewAddr = Builder.CreateBitCast(Addr, PT);
- Value *NewVal = Builder.CreateBitCast(Val, NewTy);
+ Value *NewVal = Val->getType()->isPointerTy()
+ ? Builder.CreatePtrToInt(Val, NewTy)
+ : Builder.CreateBitCast(Val, NewTy);
auto *NewRMWI =
Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, NewAddr, NewVal,
@@ -393,7 +397,9 @@ AtomicExpand::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
NewRMWI->setVolatile(RMWI->isVolatile());
LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n");
- Value *NewRVal = Builder.CreateBitCast(NewRMWI, RMWI->getType());
+ Value *NewRVal = RMWI->getType()->isPointerTy()
+ ? Builder.CreateIntToPtr(NewRMWI, RMWI->getType())
+ : Builder.CreateBitCast(NewRMWI, RMWI->getType());
RMWI->replaceAllUsesWith(NewRVal);
RMWI->eraseFromParent();
return NewRMWI;
@@ -413,11 +419,29 @@ bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
return expandAtomicLoadToLL(LI);
case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
return expandAtomicLoadToCmpXchg(LI);
+ case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
+ LI->setAtomic(AtomicOrdering::NotAtomic);
+ return true;
default:
llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
}
}
+bool AtomicExpand::tryExpandAtomicStore(StoreInst *SI) {
+ switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
+ case TargetLoweringBase::AtomicExpansionKind::None:
+ return false;
+ case TargetLoweringBase::AtomicExpansionKind::Expand:
+ expandAtomicStore(SI);
+ return true;
+ case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
+ SI->setAtomic(AtomicOrdering::NotAtomic);
+ return true;
+ default:
+ llvm_unreachable("Unhandled case in tryExpandAtomicStore");
+ }
+}
+
bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
IRBuilder<> Builder(LI);
@@ -471,8 +495,7 @@ StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
Value *Addr = SI->getPointerOperand();
- Type *PT = PointerType::get(NewTy,
- Addr->getType()->getPointerAddressSpace());
+ Type *PT = PointerType::get(NewTy, Addr->getType()->getPointerAddressSpace());
Value *NewAddr = Builder.CreateBitCast(Addr, PT);
StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr);
@@ -484,7 +507,7 @@ StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
return NewSI;
}
-bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
+void AtomicExpand::expandAtomicStore(StoreInst *SI) {
// This function is only called on atomic stores that are too large to be
// atomic if implemented as a native store. So we replace them by an
// atomic swap, that can be implemented for example as a ldrex/strex on ARM
@@ -498,7 +521,7 @@ bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
SI->eraseFromParent();
// Now we have an appropriate swap instruction, lower it as usual.
- return tryExpandAtomicRMW(AI);
+ tryExpandAtomicRMW(AI);
}
static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,
@@ -508,6 +531,7 @@ static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,
Type *OrigTy = NewVal->getType();
// This code can go away when cmpxchg supports FP types.
+ assert(!OrigTy->isPointerTy());
bool NeedBitcast = OrigTy->isFloatingPointTy();
if (NeedBitcast) {
IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
@@ -527,47 +551,6 @@ static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,
NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
}
-/// Emit IR to implement the given atomicrmw operation on values in registers,
-/// returning the new value.
-static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
- Value *Loaded, Value *Inc) {
- Value *NewVal;
- switch (Op) {
- case AtomicRMWInst::Xchg:
- return Inc;
- case AtomicRMWInst::Add:
- return Builder.CreateAdd(Loaded, Inc, "new");
- case AtomicRMWInst::Sub:
- return Builder.CreateSub(Loaded, Inc, "new");
- case AtomicRMWInst::And:
- return Builder.CreateAnd(Loaded, Inc, "new");
- case AtomicRMWInst::Nand:
- return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new");
- case AtomicRMWInst::Or:
- return Builder.CreateOr(Loaded, Inc, "new");
- case AtomicRMWInst::Xor:
- return Builder.CreateXor(Loaded, Inc, "new");
- case AtomicRMWInst::Max:
- NewVal = Builder.CreateICmpSGT(Loaded, Inc);
- return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
- case AtomicRMWInst::Min:
- NewVal = Builder.CreateICmpSLE(Loaded, Inc);
- return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
- case AtomicRMWInst::UMax:
- NewVal = Builder.CreateICmpUGT(Loaded, Inc);
- return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
- case AtomicRMWInst::UMin:
- NewVal = Builder.CreateICmpULE(Loaded, Inc);
- return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
- case AtomicRMWInst::FAdd:
- return Builder.CreateFAdd(Loaded, Inc, "new");
- case AtomicRMWInst::FSub:
- return Builder.CreateFSub(Loaded, Inc, "new");
- default:
- llvm_unreachable("Unknown atomic op");
- }
-}
-
bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
LLVMContext &Ctx = AI->getModule()->getContext();
TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI);
@@ -582,8 +565,8 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
TargetLoweringBase::AtomicExpansionKind::LLSC);
} else {
auto PerformOp = [&](IRBuilder<> &Builder, Value *Loaded) {
- return performAtomicOp(AI->getOperation(), Builder, Loaded,
- AI->getValOperand());
+ return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
+ AI->getValOperand());
};
expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
AI->getAlign(), AI->getOrdering(), PerformOp);
@@ -621,6 +604,12 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
expandAtomicRMWToMaskedIntrinsic(AI);
return true;
}
+ case TargetLoweringBase::AtomicExpansionKind::BitTestIntrinsic: {
+ TLI->emitBitTestAtomicRMWIntrinsic(AI);
+ return true;
+ }
+ case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
+ return lowerAtomicRMWInst(AI);
default:
llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
}
@@ -703,7 +692,7 @@ static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I,
PMV.AlignedAddr = Addr;
PMV.AlignedAddrAlignment = AddrAlign;
PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0);
- PMV.Mask = ConstantInt::get(PMV.ValueType, ~0);
+ PMV.Mask = ConstantInt::get(PMV.ValueType, ~0, /*isSigned*/ true);
return PMV;
}
@@ -787,7 +776,7 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
case AtomicRMWInst::Sub:
case AtomicRMWInst::Nand: {
// The other arithmetic ops need to be masked into place.
- Value *NewVal = performAtomicOp(Op, Builder, Loaded, Shifted_Inc);
+ Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded, Shifted_Inc);
Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
@@ -801,7 +790,7 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
// truncate down to the original size, and expand out again after
// doing the operation.
Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV);
- Value *NewVal = performAtomicOp(Op, Builder, Loaded_Extract, Inc);
+ Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded_Extract, Inc);
Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV);
return FinalVal;
}
@@ -840,9 +829,8 @@ void AtomicExpand::expandPartwordAtomicRMW(
Value *OldResult;
if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) {
OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr,
- PMV.AlignedAddrAlignment, MemOpOrder,
- SSID, PerformPartwordOp,
- createCmpXchgInstFun);
+ PMV.AlignedAddrAlignment, MemOpOrder, SSID,
+ PerformPartwordOp, createCmpXchgInstFun);
} else {
assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC);
OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
@@ -1106,7 +1094,7 @@ Value *AtomicExpand::insertRMWLLSCLoop(
// [...]
BasicBlock *ExitBB =
BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
- BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
+ BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
// The split call above "helpfully" added a branch at the end of BB (to the
// wrong place).
@@ -1135,7 +1123,8 @@ Value *AtomicExpand::insertRMWLLSCLoop(
/// IR. As a migration step, we convert back to what use to be the standard
/// way to represent a pointer cmpxchg so that we can update backends one by
/// one.
-AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
+AtomicCmpXchgInst *
+AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
auto *M = CI->getModule();
Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
M->getDataLayout());
@@ -1143,8 +1132,7 @@ AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *
IRBuilder<> Builder(CI);
Value *Addr = CI->getPointerOperand();
- Type *PT = PointerType::get(NewTy,
- Addr->getType()->getPointerAddressSpace());
+ Type *PT = PointerType::get(NewTy, Addr->getType()->getPointerAddressSpace());
Value *NewAddr = Builder.CreateBitCast(Addr, PT);
Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
@@ -1305,9 +1293,8 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB);
Value *NewValueInsert =
insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV);
- Value *StoreSuccess =
- TLI->emitStoreConditional(Builder, NewValueInsert, PMV.AlignedAddr,
- MemOpOrder);
+ Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewValueInsert,
+ PMV.AlignedAddr, MemOpOrder);
StoreSuccess = Builder.CreateICmpEQ(
StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
@@ -1418,27 +1405,27 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
return true;
}
-bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) {
+bool AtomicExpand::isIdempotentRMW(AtomicRMWInst *RMWI) {
auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
- if(!C)
+ if (!C)
return false;
AtomicRMWInst::BinOp Op = RMWI->getOperation();
- switch(Op) {
- case AtomicRMWInst::Add:
- case AtomicRMWInst::Sub:
- case AtomicRMWInst::Or:
- case AtomicRMWInst::Xor:
- return C->isZero();
- case AtomicRMWInst::And:
- return C->isMinusOne();
- // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
- default:
- return false;
+ switch (Op) {
+ case AtomicRMWInst::Add:
+ case AtomicRMWInst::Sub:
+ case AtomicRMWInst::Or:
+ case AtomicRMWInst::Xor:
+ return C->isZero();
+ case AtomicRMWInst::And:
+ return C->isMinusOne();
+ // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
+ default:
+ return false;
}
}
-bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) {
+bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst *RMWI) {
if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
tryExpandAtomicLoad(ResultingLoad);
return true;
@@ -1524,6 +1511,8 @@ bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic:
expandAtomicCmpXchgToMaskedIntrinsic(CI);
return true;
+ case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
+ return lowerAtomicCmpXchgInst(CI);
}
}
@@ -1535,8 +1524,8 @@ bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(),
AI->getOrdering(), AI->getSyncScopeID(),
[&](IRBuilder<> &Builder, Value *Loaded) {
- return performAtomicOp(AI->getOperation(), Builder, Loaded,
- AI->getValOperand());
+ return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
+ AI->getValOperand());
},
CreateCmpXchg);
@@ -1738,11 +1727,21 @@ bool AtomicExpand::expandAtomicOpToLibcall(
RTLIB::Libcall RTLibType;
if (UseSizedLibcall) {
switch (Size) {
- case 1: RTLibType = Libcalls[1]; break;
- case 2: RTLibType = Libcalls[2]; break;
- case 4: RTLibType = Libcalls[3]; break;
- case 8: RTLibType = Libcalls[4]; break;
- case 16: RTLibType = Libcalls[5]; break;
+ case 1:
+ RTLibType = Libcalls[1];
+ break;
+ case 2:
+ RTLibType = Libcalls[2];
+ break;
+ case 4:
+ RTLibType = Libcalls[3];
+ break;
+ case 8:
+ RTLibType = Libcalls[4];
+ break;
+ case 16:
+ RTLibType = Libcalls[5];
+ break;
}
} else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
RTLibType = Libcalls[0];
@@ -1806,8 +1805,8 @@ bool AtomicExpand::expandAtomicOpToLibcall(
// that property, we'd need to extend this mechanism to support AS-specific
// families of atomic intrinsics.
auto PtrTypeAS = PointerOperand->getType()->getPointerAddressSpace();
- Value *PtrVal = Builder.CreateBitCast(PointerOperand,
- Type::getInt8PtrTy(Ctx, PtrTypeAS));
+ Value *PtrVal =
+ Builder.CreateBitCast(PointerOperand, Type::getInt8PtrTy(Ctx, PtrTypeAS));
PtrVal = Builder.CreateAddrSpaceCast(PtrVal, Type::getInt8PtrTy(Ctx));
Args.push_back(PtrVal);
@@ -1815,11 +1814,10 @@ bool AtomicExpand::expandAtomicOpToLibcall(
if (CASExpected) {
AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
AllocaCASExpected->setAlignment(AllocaAlignment);
- unsigned AllocaAS = AllocaCASExpected->getType()->getPointerAddressSpace();
+ unsigned AllocaAS = AllocaCASExpected->getType()->getPointerAddressSpace();
- AllocaCASExpected_i8 =
- Builder.CreateBitCast(AllocaCASExpected,
- Type::getInt8PtrTy(Ctx, AllocaAS));
+ AllocaCASExpected_i8 = Builder.CreateBitCast(
+ AllocaCASExpected, Type::getInt8PtrTy(Ctx, AllocaAS));
Builder.CreateLifetimeStart(AllocaCASExpected_i8, SizeVal64);
Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
Args.push_back(AllocaCASExpected_i8);
@@ -1846,9 +1844,9 @@ bool AtomicExpand::expandAtomicOpToLibcall(
if (!CASExpected && HasResult && !UseSizedLibcall) {
AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
AllocaResult->setAlignment(AllocaAlignment);
- unsigned AllocaAS = AllocaResult->getType()->getPointerAddressSpace();
+ unsigned AllocaAS = AllocaResult->getType()->getPointerAddressSpace();
AllocaResult_i8 =
- Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx, AllocaAS));
+ Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx, AllocaAS));
Builder.CreateLifetimeStart(AllocaResult_i8, SizeVal64);
Args.push_back(AllocaResult_i8);
}
diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp
index c1901bc46d72..f05f5b9f9947 100644
--- a/llvm/lib/CodeGen/BasicBlockSections.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSections.cpp
@@ -60,7 +60,7 @@
// Basic Block Labels
// ==================
//
-// With -fbasic-block-sections=labels, we emit the offsets of BB addresses of
+// With -fbasic-block-sections=labels, we encode the offsets of BB addresses of
// every function into the .llvm_bb_addr_map section. Along with the function
// symbols, this allows for mapping of virtual addresses in PMU profiles back to
// the corresponding basic blocks. This logic is implemented in AsmPrinter. This
@@ -69,26 +69,17 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
#include "llvm/CodeGen/BasicBlockSectionUtils.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Support/Error.h"
-#include "llvm/Support/LineIterator.h"
-#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Target/TargetMachine.h"
-using llvm::SmallSet;
-using llvm::SmallVector;
-using llvm::StringMap;
-using llvm::StringRef;
using namespace llvm;
// Placing the cold clusters in a separate section mitigates against poor
@@ -108,41 +99,11 @@ cl::opt<bool> BBSectionsDetectSourceDrift(
namespace {
-// This struct represents the cluster information for a machine basic block.
-struct BBClusterInfo {
- // MachineBasicBlock ID.
- unsigned MBBNumber;
- // Cluster ID this basic block belongs to.
- unsigned ClusterID;
- // Position of basic block within the cluster.
- unsigned PositionInCluster;
-};
-
-using ProgramBBClusterInfoMapTy = StringMap<SmallVector<BBClusterInfo, 4>>;
-
class BasicBlockSections : public MachineFunctionPass {
public:
static char ID;
- // This contains the basic-block-sections profile.
- const MemoryBuffer *MBuf = nullptr;
-
- // This encapsulates the BB cluster information for the whole program.
- //
- // For every function name, it contains the cluster information for (all or
- // some of) its basic blocks. The cluster information for every basic block
- // includes its cluster ID along with the position of the basic block in that
- // cluster.
- ProgramBBClusterInfoMapTy ProgramBBClusterInfo;
-
- // Some functions have alias names. We use this map to find the main alias
- // name for which we have mapping in ProgramBBClusterInfo.
- StringMap<StringRef> FuncAliasMap;
-
- BasicBlockSections(const MemoryBuffer *Buf)
- : MachineFunctionPass(ID), MBuf(Buf) {
- initializeBasicBlockSectionsPass(*PassRegistry::getPassRegistry());
- };
+ BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr;
BasicBlockSections() : MachineFunctionPass(ID) {
initializeBasicBlockSectionsPass(*PassRegistry::getPassRegistry());
@@ -154,9 +115,6 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override;
- /// Read profiles of basic blocks if available here.
- bool doInitialization(Module &M) override;
-
/// Identify basic blocks that need separate sections and prepare to emit them
/// accordingly.
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -206,21 +164,18 @@ static void updateBranches(
// This function provides the BBCluster information associated with a function.
// Returns true if a valid association exists and false otherwise.
-static bool getBBClusterInfoForFunction(
- const MachineFunction &MF, const StringMap<StringRef> FuncAliasMap,
- const ProgramBBClusterInfoMapTy &ProgramBBClusterInfo,
+bool getBBClusterInfoForFunction(
+ const MachineFunction &MF,
+ BasicBlockSectionsProfileReader *BBSectionsProfileReader,
std::vector<Optional<BBClusterInfo>> &V) {
- // Get the main alias name for the function.
- auto FuncName = MF.getName();
- auto R = FuncAliasMap.find(FuncName);
- StringRef AliasName = R == FuncAliasMap.end() ? FuncName : R->second;
// Find the assoicated cluster information.
- auto P = ProgramBBClusterInfo.find(AliasName);
- if (P == ProgramBBClusterInfo.end())
+ std::pair<bool, SmallVector<BBClusterInfo, 4>> P =
+ BBSectionsProfileReader->getBBClusterInfoForFunction(MF.getName());
+ if (!P.first)
return false;
- if (P->second.empty()) {
+ if (P.second.empty()) {
// This indicates that sections are desired for all basic blocks of this
// function. We clear the BBClusterInfo vector to denote this.
V.clear();
@@ -228,7 +183,7 @@ static bool getBBClusterInfoForFunction(
}
V.resize(MF.getNumBlockIDs());
- for (auto bbClusterInfo : P->second) {
+ for (auto bbClusterInfo : P.second) {
// Bail out if the cluster information contains invalid MBB numbers.
if (bbClusterInfo.MBBNumber >= MF.getNumBlockIDs())
return false;
@@ -266,7 +221,7 @@ assignSections(MachineFunction &MF,
// set every basic block's section ID equal to its number (basic block
// id). This further ensures that basic blocks are ordered canonically.
MBB.setSectionID({static_cast<unsigned int>(MBB.getNumber())});
- } else if (FuncBBClusterInfo[MBB.getNumber()].hasValue())
+ } else if (FuncBBClusterInfo[MBB.getNumber()])
MBB.setSectionID(FuncBBClusterInfo[MBB.getNumber()]->ClusterID);
else {
// BB goes into the special cold section if it is not specified in the
@@ -279,9 +234,8 @@ assignSections(MachineFunction &MF,
// If we already have one cluster containing eh_pads, this must be updated
// to ExceptionSectionID. Otherwise, we set it equal to the current
// section ID.
- EHPadsSectionID = EHPadsSectionID.hasValue()
- ? MBBSectionID::ExceptionSectionID
- : MBB.getSectionID();
+ EHPadsSectionID = EHPadsSectionID ? MBBSectionID::ExceptionSectionID
+ : MBB.getSectionID();
}
}
@@ -290,7 +244,7 @@ assignSections(MachineFunction &MF,
if (EHPadsSectionID == MBBSectionID::ExceptionSectionID)
for (auto &MBB : MF)
if (MBB.isEHPad())
- MBB.setSectionID(EHPadsSectionID.getValue());
+ MBB.setSectionID(*EHPadsSectionID);
}
void llvm::sortBasicBlocksAndUpdateBranches(
@@ -377,9 +331,11 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) {
return true;
}
+ BBSectionsProfileReader = &getAnalysis<BasicBlockSectionsProfileReader>();
+
std::vector<Optional<BBClusterInfo>> FuncBBClusterInfo;
if (BBSectionsType == BasicBlockSection::List &&
- !getBBClusterInfoForFunction(MF, FuncAliasMap, ProgramBBClusterInfo,
+ !getBBClusterInfoForFunction(MF, BBSectionsProfileReader,
FuncBBClusterInfo))
return true;
MF.setBBSectionsType(BBSectionsType);
@@ -427,107 +383,12 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) {
return true;
}
-// Basic Block Sections can be enabled for a subset of machine basic blocks.
-// This is done by passing a file containing names of functions for which basic
-// block sections are desired. Additionally, machine basic block ids of the
-// functions can also be specified for a finer granularity. Moreover, a cluster
-// of basic blocks could be assigned to the same section.
-// A file with basic block sections for all of function main and three blocks
-// for function foo (of which 1 and 2 are placed in a cluster) looks like this:
-// ----------------------------
-// list.txt:
-// !main
-// !foo
-// !!1 2
-// !!4
-static Error getBBClusterInfo(const MemoryBuffer *MBuf,
- ProgramBBClusterInfoMapTy &ProgramBBClusterInfo,
- StringMap<StringRef> &FuncAliasMap) {
- assert(MBuf);
- line_iterator LineIt(*MBuf, /*SkipBlanks=*/true, /*CommentMarker=*/'#');
-
- auto invalidProfileError = [&](auto Message) {
- return make_error<StringError>(
- Twine("Invalid profile " + MBuf->getBufferIdentifier() + " at line " +
- Twine(LineIt.line_number()) + ": " + Message),
- inconvertibleErrorCode());
- };
-
- auto FI = ProgramBBClusterInfo.end();
-
- // Current cluster ID corresponding to this function.
- unsigned CurrentCluster = 0;
- // Current position in the current cluster.
- unsigned CurrentPosition = 0;
-
- // Temporary set to ensure every basic block ID appears once in the clusters
- // of a function.
- SmallSet<unsigned, 4> FuncBBIDs;
-
- for (; !LineIt.is_at_eof(); ++LineIt) {
- StringRef S(*LineIt);
- if (S[0] == '@')
- continue;
- // Check for the leading "!"
- if (!S.consume_front("!") || S.empty())
- break;
- // Check for second "!" which indicates a cluster of basic blocks.
- if (S.consume_front("!")) {
- if (FI == ProgramBBClusterInfo.end())
- return invalidProfileError(
- "Cluster list does not follow a function name specifier.");
- SmallVector<StringRef, 4> BBIndexes;
- S.split(BBIndexes, ' ');
- // Reset current cluster position.
- CurrentPosition = 0;
- for (auto BBIndexStr : BBIndexes) {
- unsigned long long BBIndex;
- if (getAsUnsignedInteger(BBIndexStr, 10, BBIndex))
- return invalidProfileError(Twine("Unsigned integer expected: '") +
- BBIndexStr + "'.");
- if (!FuncBBIDs.insert(BBIndex).second)
- return invalidProfileError(Twine("Duplicate basic block id found '") +
- BBIndexStr + "'.");
- if (!BBIndex && CurrentPosition)
- return invalidProfileError("Entry BB (0) does not begin a cluster.");
-
- FI->second.emplace_back(BBClusterInfo{
- ((unsigned)BBIndex), CurrentCluster, CurrentPosition++});
- }
- CurrentCluster++;
- } else { // This is a function name specifier.
- // Function aliases are separated using '/'. We use the first function
- // name for the cluster info mapping and delegate all other aliases to
- // this one.
- SmallVector<StringRef, 4> Aliases;
- S.split(Aliases, '/');
- for (size_t i = 1; i < Aliases.size(); ++i)
- FuncAliasMap.try_emplace(Aliases[i], Aliases.front());
-
- // Prepare for parsing clusters of this function name.
- // Start a new cluster map for this function name.
- FI = ProgramBBClusterInfo.try_emplace(Aliases.front()).first;
- CurrentCluster = 0;
- FuncBBIDs.clear();
- }
- }
- return Error::success();
-}
-
-bool BasicBlockSections::doInitialization(Module &M) {
- if (!MBuf)
- return false;
- if (auto Err = getBBClusterInfo(MBuf, ProgramBBClusterInfo, FuncAliasMap))
- report_fatal_error(std::move(Err));
- return false;
-}
-
void BasicBlockSections::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
+ AU.addRequired<BasicBlockSectionsProfileReader>();
MachineFunctionPass::getAnalysisUsage(AU);
}
-MachineFunctionPass *
-llvm::createBasicBlockSectionsPass(const MemoryBuffer *Buf) {
- return new BasicBlockSections(Buf);
+MachineFunctionPass *llvm::createBasicBlockSectionsPass() {
+ return new BasicBlockSections();
}
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
new file mode 100644
index 000000000000..c2acf115998b
--- /dev/null
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -0,0 +1,144 @@
+//===-- BasicBlockSectionsProfileReader.cpp -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the basic block sections profile reader pass. It parses
+// and stores the basic block sections profile file (which is specified via the
+// `-basic-block-sections` flag).
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/LineIterator.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+using namespace llvm;
+
+char BasicBlockSectionsProfileReader::ID = 0;
+INITIALIZE_PASS(BasicBlockSectionsProfileReader, "bbsections-profile-reader",
+ "Reads and parses a basic block sections profile.", false,
+ false)
+
+bool BasicBlockSectionsProfileReader::isFunctionHot(StringRef FuncName) const {
+ return getBBClusterInfoForFunction(FuncName).first;
+}
+
+std::pair<bool, SmallVector<BBClusterInfo>>
+BasicBlockSectionsProfileReader::getBBClusterInfoForFunction(
+ StringRef FuncName) const {
+ std::pair<bool, SmallVector<BBClusterInfo>> cluster_info(false, {});
+ auto R = ProgramBBClusterInfo.find(getAliasName(FuncName));
+ if (R != ProgramBBClusterInfo.end()) {
+ cluster_info.second = R->second;
+ cluster_info.first = true;
+ }
+ return cluster_info;
+}
+
+// Basic Block Sections can be enabled for a subset of machine basic blocks.
+// This is done by passing a file containing names of functions for which basic
+// block sections are desired. Additionally, machine basic block ids of the
+// functions can also be specified for a finer granularity. Moreover, a cluster
+// of basic blocks could be assigned to the same section.
+// A file with basic block sections for all of function main and three blocks
+// for function foo (of which 1 and 2 are placed in a cluster) looks like this:
+// ----------------------------
+// list.txt:
+// !main
+// !foo
+// !!1 2
+// !!4
+static Error getBBClusterInfo(const MemoryBuffer *MBuf,
+ ProgramBBClusterInfoMapTy &ProgramBBClusterInfo,
+ StringMap<StringRef> &FuncAliasMap) {
+ assert(MBuf);
+ line_iterator LineIt(*MBuf, /*SkipBlanks=*/true, /*CommentMarker=*/'#');
+
+ auto invalidProfileError = [&](auto Message) {
+ return make_error<StringError>(
+ Twine("Invalid profile " + MBuf->getBufferIdentifier() + " at line " +
+ Twine(LineIt.line_number()) + ": " + Message),
+ inconvertibleErrorCode());
+ };
+
+ auto FI = ProgramBBClusterInfo.end();
+
+ // Current cluster ID corresponding to this function.
+ unsigned CurrentCluster = 0;
+ // Current position in the current cluster.
+ unsigned CurrentPosition = 0;
+
+ // Temporary set to ensure every basic block ID appears once in the clusters
+ // of a function.
+ SmallSet<unsigned, 4> FuncBBIDs;
+
+ for (; !LineIt.is_at_eof(); ++LineIt) {
+ StringRef S(*LineIt);
+ if (S[0] == '@')
+ continue;
+ // Check for the leading "!"
+ if (!S.consume_front("!") || S.empty())
+ break;
+ // Check for second "!" which indicates a cluster of basic blocks.
+ if (S.consume_front("!")) {
+ if (FI == ProgramBBClusterInfo.end())
+ return invalidProfileError(
+ "Cluster list does not follow a function name specifier.");
+ SmallVector<StringRef, 4> BBIndexes;
+ S.split(BBIndexes, ' ');
+ // Reset current cluster position.
+ CurrentPosition = 0;
+ for (auto BBIndexStr : BBIndexes) {
+ unsigned long long BBIndex;
+ if (getAsUnsignedInteger(BBIndexStr, 10, BBIndex))
+ return invalidProfileError(Twine("Unsigned integer expected: '") +
+ BBIndexStr + "'.");
+ if (!FuncBBIDs.insert(BBIndex).second)
+ return invalidProfileError(Twine("Duplicate basic block id found '") +
+ BBIndexStr + "'.");
+ if (!BBIndex && CurrentPosition)
+ return invalidProfileError("Entry BB (0) does not begin a cluster.");
+
+ FI->second.emplace_back(BBClusterInfo{
+ ((unsigned)BBIndex), CurrentCluster, CurrentPosition++});
+ }
+ CurrentCluster++;
+ } else { // This is a function name specifier.
+ // Function aliases are separated using '/'. We use the first function
+ // name for the cluster info mapping and delegate all other aliases to
+ // this one.
+ SmallVector<StringRef, 4> Aliases;
+ S.split(Aliases, '/');
+ for (size_t i = 1; i < Aliases.size(); ++i)
+ FuncAliasMap.try_emplace(Aliases[i], Aliases.front());
+
+ // Prepare for parsing clusters of this function name.
+ // Start a new cluster map for this function name.
+ FI = ProgramBBClusterInfo.try_emplace(Aliases.front()).first;
+ CurrentCluster = 0;
+ FuncBBIDs.clear();
+ }
+ }
+ return Error::success();
+}
+
+void BasicBlockSectionsProfileReader::initializePass() {
+ if (!MBuf)
+ return;
+ if (auto Err = getBBClusterInfo(MBuf, ProgramBBClusterInfo, FuncAliasMap))
+ report_fatal_error(std::move(Err));
+}
+
+ImmutablePass *
+llvm::createBasicBlockSectionsProfileReaderPass(const MemoryBuffer *Buf) {
+ return new BasicBlockSectionsProfileReader(Buf);
+}
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index 0ff67f7ca00a..07be03d2dab9 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -24,6 +24,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MBFIWrapper.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -32,11 +33,9 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineSizeOpts.h"
-#include "llvm/CodeGen/MBFIWrapper.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -105,6 +104,11 @@ namespace {
AU.addRequired<TargetPassConfig>();
MachineFunctionPass::getAnalysisUsage(AU);
}
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoPHIs);
+ }
};
} // end anonymous namespace
diff --git a/llvm/lib/CodeGen/BranchFolding.h b/llvm/lib/CodeGen/BranchFolding.h
index 95d5dcfbbd0f..d0b6ed5ebe05 100644
--- a/llvm/lib/CodeGen/BranchFolding.h
+++ b/llvm/lib/CodeGen/BranchFolding.h
@@ -14,7 +14,6 @@
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/Support/Compiler.h"
-#include <cstdint>
#include <vector>
namespace llvm {
diff --git a/llvm/lib/CodeGen/BranchRelaxation.cpp b/llvm/lib/CodeGen/BranchRelaxation.cpp
index eda0f37fdeb7..29508f8f35a6 100644
--- a/llvm/lib/CodeGen/BranchRelaxation.cpp
+++ b/llvm/lib/CodeGen/BranchRelaxation.cpp
@@ -24,7 +24,6 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Format.h"
-#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstdint>
diff --git a/llvm/lib/CodeGen/BreakFalseDeps.cpp b/llvm/lib/CodeGen/BreakFalseDeps.cpp
index 558700bd9b3b..57170c58db14 100644
--- a/llvm/lib/CodeGen/BreakFalseDeps.cpp
+++ b/llvm/lib/CodeGen/BreakFalseDeps.cpp
@@ -19,11 +19,13 @@
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ReachingDefAnalysis.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegister.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
diff --git a/llvm/lib/CodeGen/CFIFixup.cpp b/llvm/lib/CodeGen/CFIFixup.cpp
new file mode 100644
index 000000000000..837dbd77d073
--- /dev/null
+++ b/llvm/lib/CodeGen/CFIFixup.cpp
@@ -0,0 +1,225 @@
+//===------ CFIFixup.cpp - Insert CFI remember/restore instructions -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+
+// This pass inserts the necessary instructions to adjust for the inconsistency
+// of the call-frame information caused by final machine basic block layout.
+// The pass relies in constraints LLVM imposes on the placement of
+// save/restore points (cf. ShrinkWrap):
+// * there is a single basic block, containing the function prologue
+// * possibly multiple epilogue blocks, where each epilogue block is
+// complete and self-contained, i.e. CSR restore instructions (and the
+// corresponding CFI instructions are not split across two or more blocks.
+// * prologue and epilogue blocks are outside of any loops
+// Thus, during execution, at the beginning and at the end of each basic block
+// the function can be in one of two states:
+// - "has a call frame", if the function has executed the prologue, and
+// has not executed any epilogue
+// - "does not have a call frame", if the function has not executed the
+// prologue, or has executed an epilogue
+// which can be computed by a single RPO traversal.
+
+// In order to accommodate backends which do not generate unwind info in
+// epilogues we compute an additional property "strong no call frame on entry",
+// which is set for the entry point of the function and for every block
+// reachable from the entry along a path that does not execute the prologue. If
+// this property holds, it takes precedence over the "has a call frame"
+// property.
+
+// From the point of view of the unwind tables, the "has/does not have call
+// frame" state at beginning of each block is determined by the state at the end
+// of the previous block, in layout order. Where these states differ, we insert
+// compensating CFI instructions, which come in two flavours:
+
+// - CFI instructions, which reset the unwind table state to the initial one.
+// This is done by a target specific hook and is expected to be trivial
+// to implement, for example it could be:
+// .cfi_def_cfa <sp>, 0
+// .cfi_same_value <rN>
+// .cfi_same_value <rN-1>
+// ...
+// where <rN> are the callee-saved registers.
+// - CFI instructions, which reset the unwind table state to the one
+// created by the function prologue. These are
+// .cfi_restore_state
+// .cfi_remember_state
+// In this case we also insert a `.cfi_remember_state` after the last CFI
+// instruction in the function prologue.
+//
+// Known limitations:
+// * the pass cannot handle an epilogue preceding the prologue in the basic
+// block layout
+// * the pass does not handle functions where SP is used as a frame pointer and
+// SP adjustments up and down are done in different basic blocks (TODO)
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/CFIFixup.h"
+
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "cfi-fixup"
+
+char CFIFixup::ID = 0;
+
+INITIALIZE_PASS(CFIFixup, "cfi-fixup",
+ "Insert CFI remember/restore state instructions", false, false)
+FunctionPass *llvm::createCFIFixup() { return new CFIFixup(); }
+
+static bool isPrologueCFIInstruction(const MachineInstr &MI) {
+ return MI.getOpcode() == TargetOpcode::CFI_INSTRUCTION &&
+ MI.getFlag(MachineInstr::FrameSetup);
+}
+
+static bool containsPrologue(const MachineBasicBlock &MBB) {
+ return llvm::any_of(MBB.instrs(), isPrologueCFIInstruction);
+}
+
+static bool containsEpilogue(const MachineBasicBlock &MBB) {
+ return llvm::any_of(llvm::reverse(MBB), [](const auto &MI) {
+ return MI.getOpcode() == TargetOpcode::CFI_INSTRUCTION &&
+ MI.getFlag(MachineInstr::FrameDestroy);
+ });
+}
+
+bool CFIFixup::runOnMachineFunction(MachineFunction &MF) {
+ const TargetFrameLowering &TFL = *MF.getSubtarget().getFrameLowering();
+ if (!TFL.enableCFIFixup(MF))
+ return false;
+
+ const unsigned NumBlocks = MF.getNumBlockIDs();
+ if (NumBlocks < 2)
+ return false;
+
+ struct BlockFlags {
+ bool Reachable : 1;
+ bool StrongNoFrameOnEntry : 1;
+ bool HasFrameOnEntry : 1;
+ bool HasFrameOnExit : 1;
+ };
+ SmallVector<BlockFlags, 32> BlockInfo(NumBlocks, {false, false, false, false});
+ BlockInfo[0].Reachable = true;
+ BlockInfo[0].StrongNoFrameOnEntry = true;
+
+ // Compute the presence/absence of frame at each basic block.
+ MachineBasicBlock *PrologueBlock = nullptr;
+ ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
+ for (MachineBasicBlock *MBB : RPOT) {
+ BlockFlags &Info = BlockInfo[MBB->getNumber()];
+
+ // Set to true if the current block contains the prologue or the epilogue,
+ // respectively.
+ bool HasPrologue = false;
+ bool HasEpilogue = false;
+
+ if (!PrologueBlock && !Info.HasFrameOnEntry && containsPrologue(*MBB)) {
+ PrologueBlock = MBB;
+ HasPrologue = true;
+ }
+
+ if (Info.HasFrameOnEntry || HasPrologue)
+ HasEpilogue = containsEpilogue(*MBB);
+
+ // If the function has a call frame at the entry of the current block or the
+ // current block contains the prologue, then the function has a call frame
+ // at the exit of the block, unless the block contains the epilogue.
+ Info.HasFrameOnExit = (Info.HasFrameOnEntry || HasPrologue) && !HasEpilogue;
+
+ // Set the successors' state on entry.
+ for (MachineBasicBlock *Succ : MBB->successors()) {
+ BlockFlags &SuccInfo = BlockInfo[Succ->getNumber()];
+ SuccInfo.Reachable = true;
+ SuccInfo.StrongNoFrameOnEntry |=
+ Info.StrongNoFrameOnEntry && !HasPrologue;
+ SuccInfo.HasFrameOnEntry = Info.HasFrameOnExit;
+ }
+ }
+
+ if (!PrologueBlock)
+ return false;
+
+ // Walk the blocks of the function in "physical" order.
+ // Every block inherits the frame state (as recorded in the unwind tables)
+ // of the previous block. If the intended frame state is different, insert
+ // compensating CFI instructions.
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ bool Change = false;
+ // `InsertPt` always points to the point in a preceding block where we have to
+ // insert a `.cfi_remember_state`, in the case that the current block needs a
+ // `.cfi_restore_state`.
+ MachineBasicBlock *InsertMBB = PrologueBlock;
+ MachineBasicBlock::iterator InsertPt = PrologueBlock->begin();
+ for (MachineInstr &MI : *PrologueBlock)
+ if (isPrologueCFIInstruction(MI))
+ InsertPt = std::next(MI.getIterator());
+
+ assert(InsertPt != PrologueBlock->begin() &&
+ "Inconsistent notion of \"prologue block\"");
+
+ // No point starting before the prologue block.
+ // TODO: the unwind tables will still be incorrect if an epilogue physically
+ // preceeds the prologue.
+ MachineFunction::iterator CurrBB = std::next(PrologueBlock->getIterator());
+ bool HasFrame = BlockInfo[PrologueBlock->getNumber()].HasFrameOnExit;
+ while (CurrBB != MF.end()) {
+ const BlockFlags &Info = BlockInfo[CurrBB->getNumber()];
+ if (!Info.Reachable) {
+ ++CurrBB;
+ continue;
+ }
+
+#ifndef NDEBUG
+ if (!Info.StrongNoFrameOnEntry) {
+ for (auto *Pred : CurrBB->predecessors()) {
+ BlockFlags &PredInfo = BlockInfo[Pred->getNumber()];
+ assert((!PredInfo.Reachable ||
+ Info.HasFrameOnEntry == PredInfo.HasFrameOnExit) &&
+ "Inconsistent call frame state");
+ }
+ }
+#endif
+ if (!Info.StrongNoFrameOnEntry && Info.HasFrameOnEntry && !HasFrame) {
+ // Reset to the "after prologue" state.
+
+ // Insert a `.cfi_remember_state` into the last block known to have a
+ // stack frame.
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::createRememberState(nullptr));
+ BuildMI(*InsertMBB, InsertPt, DebugLoc(),
+ TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ // Insert a `.cfi_restore_state` at the beginning of the current block.
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::createRestoreState(nullptr));
+ InsertPt = BuildMI(*CurrBB, CurrBB->begin(), DebugLoc(),
+ TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ ++InsertPt;
+ InsertMBB = &*CurrBB;
+ Change = true;
+ } else if ((Info.StrongNoFrameOnEntry || !Info.HasFrameOnEntry) &&
+ HasFrame) {
+ // Reset to the state upon function entry.
+ TFL.resetCFIToInitialState(*CurrBB);
+ Change = true;
+ }
+
+ HasFrame = Info.HasFrameOnExit;
+ ++CurrBB;
+ }
+
+ return Change;
+}
diff --git a/llvm/lib/CodeGen/CFIInstrInserter.cpp b/llvm/lib/CodeGen/CFIInstrInserter.cpp
index de173a9dfd62..42523c47a671 100644
--- a/llvm/lib/CodeGen/CFIInstrInserter.cpp
+++ b/llvm/lib/CodeGen/CFIInstrInserter.cpp
@@ -19,16 +19,14 @@
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/SetOperations.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/MC/MCDwarf.h"
using namespace llvm;
static cl::opt<bool> VerifyCFI("verify-cfiinstrs",
diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp
index 84a0e4142bb6..689e49978d43 100644
--- a/llvm/lib/CodeGen/CalcSpillWeights.cpp
+++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -145,11 +145,6 @@ void VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &LI) {
LI.setWeight(Weight);
}
-float VirtRegAuxInfo::futureWeight(LiveInterval &LI, SlotIndex Start,
- SlotIndex End) {
- return weightCalcHelper(LI, &Start, &End);
-}
-
float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
SlotIndex *End) {
MachineRegisterInfo &MRI = MF.getRegInfo();
diff --git a/llvm/lib/CodeGen/CallingConvLower.cpp b/llvm/lib/CodeGen/CallingConvLower.cpp
index c9246f6e8754..f74ff30ab2e1 100644
--- a/llvm/lib/CodeGen/CallingConvLower.cpp
+++ b/llvm/lib/CodeGen/CallingConvLower.cpp
@@ -14,16 +14,14 @@
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/SaveAndRestore.h"
#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
using namespace llvm;
@@ -72,15 +70,9 @@ bool CCState::IsShadowAllocatedReg(MCRegister Reg) const {
if (!isAllocated(Reg))
return false;
- for (auto const &ValAssign : Locs) {
- if (ValAssign.isRegLoc()) {
- for (MCRegAliasIterator AI(ValAssign.getLocReg(), &TRI, true);
- AI.isValid(); ++AI) {
- if (*AI == Reg)
- return false;
- }
- }
- }
+ for (auto const &ValAssign : Locs)
+ if (ValAssign.isRegLoc() && TRI.regsOverlap(ValAssign.getLocReg(), Reg))
+ return false;
return true;
}
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 7c236a9785d8..5050395fbc0f 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -24,6 +24,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeBranchFolderPassPass(Registry);
initializeBranchRelaxationPass(Registry);
initializeCFGuardLongjmpPass(Registry);
+ initializeCFIFixupPass(Registry);
initializeCFIInstrInserterPass(Registry);
initializeCheckDebugMachineModulePass(Registry);
initializeCodeGenPreparePass(Registry);
@@ -50,6 +51,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeIndirectBrExpandPassPass(Registry);
initializeInterleavedLoadCombinePass(Registry);
initializeInterleavedAccessPass(Registry);
+ initializeJMCInstrumenterPass(Registry);
initializeLiveDebugValuesPass(Registry);
initializeLiveDebugVariablesPass(Registry);
initializeLiveIntervalsPass(Registry);
@@ -57,6 +59,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeLiveStacksPass(Registry);
initializeLiveVariablesPass(Registry);
initializeLocalStackSlotPassPass(Registry);
+ initializeLowerGlobalDtorsLegacyPassPass(Registry);
initializeLowerIntrinsicsPass(Registry);
initializeMIRAddFSDiscriminatorsPass(Registry);
initializeMIRCanonicalizerPass(Registry);
@@ -104,6 +107,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeRemoveRedundantDebugValuesPass(Registry);
initializeRenameIndependentSubregsPass(Registry);
initializeSafeStackLegacyPassPass(Registry);
+ initializeSelectOptimizePass(Registry);
initializeShadowStackGCLoweringPass(Registry);
initializeShrinkWrapPass(Registry);
initializeSjLjEHPreparePass(Registry);
diff --git a/llvm/lib/CodeGen/CodeGenCommonISel.cpp b/llvm/lib/CodeGen/CodeGenCommonISel.cpp
index 877aa69c3e58..8f185a161bd0 100644
--- a/llvm/lib/CodeGen/CodeGenCommonISel.cpp
+++ b/llvm/lib/CodeGen/CodeGenCommonISel.cpp
@@ -129,7 +129,9 @@ llvm::findSplitPointForStackProtector(MachineBasicBlock *BB,
MachineBasicBlock::iterator Start = BB->begin();
MachineBasicBlock::iterator Previous = SplitPoint;
- --Previous;
+ do {
+ --Previous;
+ } while (Previous != Start && Previous->isDebugInstr());
if (TII.isTailCall(*SplitPoint) &&
Previous->getOpcode() == TII.getCallFrameDestroyOpcode()) {
@@ -142,7 +144,7 @@ llvm::findSplitPointForStackProtector(MachineBasicBlock *BB,
// ADJCALLSTACKUP ...
// TAILJMP somewhere
// On the other hand, it could be an unrelated call in which case this tail
- // call has to register moves of its own and should be the split point. For
+ // call has no register moves of its own and should be the split point. For
// example:
// ADJCALLSTACKDOWN
// CALL something_else
@@ -167,3 +169,31 @@ llvm::findSplitPointForStackProtector(MachineBasicBlock *BB,
return SplitPoint;
}
+
+unsigned llvm::getInvertedFPClassTest(unsigned Test) {
+ unsigned InvertedTest = ~Test & fcAllFlags;
+ switch (InvertedTest) {
+ default:
+ break;
+ case fcNan:
+ case fcSNan:
+ case fcQNan:
+ case fcInf:
+ case fcPosInf:
+ case fcNegInf:
+ case fcNormal:
+ case fcPosNormal:
+ case fcNegNormal:
+ case fcSubnormal:
+ case fcPosSubnormal:
+ case fcNegSubnormal:
+ case fcZero:
+ case fcPosZero:
+ case fcNegZero:
+ case fcFinite:
+ case fcPosFinite:
+ case fcNegFinite:
+ return InvertedTest;
+ }
+ return 0;
+}
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index c888adeafca5..6778af22f532 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -23,16 +23,15 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
-#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -174,12 +173,11 @@ static cl::opt<bool> DisablePreheaderProtect(
cl::desc("Disable protection against removing loop preheaders"));
static cl::opt<bool> ProfileGuidedSectionPrefix(
- "profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::ZeroOrMore,
+ "profile-guided-section-prefix", cl::Hidden, cl::init(true),
cl::desc("Use profile info to add section prefix for hot/cold functions"));
static cl::opt<bool> ProfileUnknownInSpecialSection(
- "profile-unknown-in-special-section", cl::Hidden, cl::init(false),
- cl::ZeroOrMore,
+ "profile-unknown-in-special-section", cl::Hidden,
cl::desc("In profiling mode like sampleFDO, if a function doesn't have "
"profile, we cannot tell the function is cold for sure because "
"it may be a function newly added without ever being sampled. "
@@ -188,6 +186,15 @@ static cl::opt<bool> ProfileUnknownInSpecialSection(
"to handle it in a different way than .text section, to save "
"RAM for example. "));
+static cl::opt<bool> BBSectionsGuidedSectionPrefix(
+ "bbsections-guided-section-prefix", cl::Hidden, cl::init(true),
+ cl::desc("Use the basic-block-sections profile to determine the text "
+ "section prefix for hot functions. Functions with "
+ "basic-block-sections profile will be placed in `.text.hot` "
+ "regardless of their FDO profile info. Other functions won't be "
+ "impacted, i.e., their prefixes will be decided by FDO/sampleFDO "
+ "profiles."));
+
static cl::opt<unsigned> FreqRatioToSkipMerge(
"cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2),
cl::desc("Skip merging empty blocks if (frequency of empty block) / "
@@ -274,6 +281,7 @@ class TypePromotionTransaction;
const TargetLowering *TLI = nullptr;
const TargetRegisterInfo *TRI;
const TargetTransformInfo *TTI = nullptr;
+ const BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr;
const TargetLibraryInfo *TLInfo;
const LoopInfo *LI;
std::unique_ptr<BlockFrequencyInfo> BFI;
@@ -349,6 +357,7 @@ class TypePromotionTransaction;
AU.addRequired<TargetPassConfig>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
+ AU.addUsedIfAvailable<BasicBlockSectionsProfileReader>();
}
private:
@@ -401,6 +410,8 @@ class TypePromotionTransaction;
bool optimizeFunnelShift(IntrinsicInst *Fsh);
bool optimizeSelectInst(SelectInst *SI);
bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
+ bool optimizeSwitchType(SwitchInst *SI);
+ bool optimizeSwitchPhiConstants(SwitchInst *SI);
bool optimizeSwitchInst(SwitchInst *SI);
bool optimizeExtractElementInst(Instruction *Inst);
bool dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT);
@@ -442,6 +453,7 @@ char CodeGenPrepare::ID = 0;
INITIALIZE_PASS_BEGIN(CodeGenPrepare, DEBUG_TYPE,
"Optimize for code generation", false, false)
+INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReader)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
@@ -473,8 +485,14 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
BPI.reset(new BranchProbabilityInfo(F, *LI));
BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ BBSectionsProfileReader =
+ getAnalysisIfAvailable<BasicBlockSectionsProfileReader>();
OptSize = F.hasOptSize();
- if (ProfileGuidedSectionPrefix) {
+ // Use the basic-block-sections profile to promote hot functions to .text.hot if requested.
+ if (BBSectionsGuidedSectionPrefix && BBSectionsProfileReader &&
+ BBSectionsProfileReader->isFunctionHot(F.getName())) {
+ F.setSectionPrefix("hot");
+ } else if (ProfileGuidedSectionPrefix) {
// The hot attribute overwrites profile count based hotness while profile
// counts based hotness overwrite the cold attribute.
// This is a conservative behabvior.
@@ -524,7 +542,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// Split some critical edges where one of the sources is an indirect branch,
// to help generate sane code for PHIs involving such edges.
- EverMadeChange |= SplitIndirectBrCriticalEdges(F);
+ EverMadeChange |=
+ SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/true);
bool MadeChange = true;
while (MadeChange) {
@@ -2037,7 +2056,8 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros,
return false;
// Bail if the value is never zero.
- if (llvm::isKnownNonZero(CountZeros->getOperand(0), *DL))
+ Use &Op = CountZeros->getOperandUse(0);
+ if (isKnownNonZero(Op, *DL))
return false;
// The intrinsic will be sunk behind a compare against zero and branch.
@@ -2058,7 +2078,10 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros,
// Replace the unconditional branch that was created by the first split with
// a compare against zero and a conditional branch.
Value *Zero = Constant::getNullValue(Ty);
- Value *Cmp = Builder.CreateICmpEQ(CountZeros->getOperand(0), Zero, "cmpz");
+ // Avoid introducing branch on poison. This also replaces the ctz operand.
+ if (!isGuaranteedNotToBeUndefOrPoison(Op))
+ Op = Builder.CreateFreeze(Op, Op->getName() + ".fr");
+ Value *Cmp = Builder.CreateICmpEQ(Op, Zero, "cmpz");
Builder.CreateCondBr(Cmp, EndBlock, CallBlock);
StartBlock->getTerminator()->eraseFromParent();
@@ -2101,7 +2124,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
// Align the pointer arguments to this call if the target thinks it's a good
// idea
- unsigned MinSize, PrefAlign;
+ unsigned MinSize;
+ Align PrefAlign;
if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
for (auto &Arg : CI->args()) {
// We want to align both objects whose address is used directly and
@@ -2115,12 +2139,12 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
0);
Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset);
uint64_t Offset2 = Offset.getLimitedValue();
- if ((Offset2 & (PrefAlign-1)) != 0)
+ if (!isAligned(PrefAlign, Offset2))
continue;
AllocaInst *AI;
- if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlignment() < PrefAlign &&
+ if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlign() < PrefAlign &&
DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
- AI->setAlignment(Align(PrefAlign));
+ AI->setAlignment(PrefAlign);
// Global variables can only be aligned if they are defined in this
// object (i.e. they are uniquely initialized in this object), and
// over-aligning global variables that have an explicit section is
@@ -2130,7 +2154,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
GV->getPointerAlignment(*DL) < PrefAlign &&
DL->getTypeAllocSize(GV->getValueType()) >=
MinSize + Offset2)
- GV->setAlignment(MaybeAlign(PrefAlign));
+ GV->setAlignment(PrefAlign);
}
// If this is a memcpy (or similar) then we may be able to improve the
// alignment
@@ -3371,7 +3395,7 @@ public:
if (!Visited.insert(P).second)
continue;
if (auto *PI = dyn_cast<Instruction>(P))
- if (Value *V = SimplifyInstruction(cast<Instruction>(PI), SQ)) {
+ if (Value *V = simplifyInstruction(cast<Instruction>(PI), SQ)) {
for (auto *U : PI->users())
WorkList.push_back(cast<Value>(U));
Put(PI, V);
@@ -3416,7 +3440,7 @@ public:
void destroyNewNodes(Type *CommonType) {
// For safe erasing, replace the uses with dummy value first.
- auto *Dummy = UndefValue::get(CommonType);
+ auto *Dummy = PoisonValue::get(CommonType);
for (auto *I : AllPhiNodes) {
I->replaceAllUsesWith(Dummy);
I->eraseFromParent();
@@ -3785,7 +3809,7 @@ private:
SmallVector<Value *, 32> Worklist;
assert((isa<PHINode>(Original) || isa<SelectInst>(Original)) &&
"Address must be a Phi or Select node");
- auto *Dummy = UndefValue::get(CommonType);
+ auto *Dummy = PoisonValue::get(CommonType);
Worklist.push_back(Original);
while (!Worklist.empty()) {
Value *Current = Worklist.pop_back_val();
@@ -4550,9 +4574,9 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
if (!RHS || RHS->getBitWidth() > 64)
return false;
- int64_t Scale = RHS->getSExtValue();
- if (Opcode == Instruction::Shl)
- Scale = 1LL << Scale;
+ int64_t Scale = Opcode == Instruction::Shl
+ ? 1LL << RHS->getLimitedValue(RHS->getBitWidth() - 1)
+ : RHS->getSExtValue();
return matchScaledValue(AddrInst->getOperand(0), Scale, Depth);
}
@@ -4783,7 +4807,6 @@ bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
}
// It isn't profitable to do this, roll back.
- //cerr << "NOT FOLDING: " << *I;
AddrMode = BackupAddrMode;
AddrModeInsts.resize(OldSize);
TPT.rollback(LastKnownGood);
@@ -4836,7 +4859,7 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
TLI.ComputeConstraintToUse(OpInfo, SDValue());
// If this asm operand is our Value*, and if it isn't an indirect memory
- // operand, we can't fold it!
+ // operand, we can't fold it! TODO: Also handle C_Address?
if (OpInfo.CallOperandVal == OpVal &&
(OpInfo.ConstraintType != TargetLowering::C_Memory ||
!OpInfo.isIndirect))
@@ -5158,8 +5181,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// GEP, collect the GEP. Skip the GEPs that are the new bases of
// previously split data structures.
LargeOffsetGEPMap[GEP->getPointerOperand()].push_back(LargeOffsetGEP);
- if (LargeOffsetGEPID.find(GEP) == LargeOffsetGEPID.end())
- LargeOffsetGEPID[GEP] = LargeOffsetGEPID.size();
+ LargeOffsetGEPID.insert(std::make_pair(GEP, LargeOffsetGEPID.size()));
}
NewAddrMode.OriginalValue = V;
@@ -5323,11 +5345,8 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// SDAG consecutive load/store merging.
if (ResultPtr->getType() != I8PtrTy)
ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
- ResultPtr =
- AddrMode.InBounds
- ? Builder.CreateInBoundsGEP(I8Ty, ResultPtr, ResultIndex,
- "sunkaddr")
- : Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
+ ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex,
+ "sunkaddr", AddrMode.InBounds);
}
ResultIndex = V;
@@ -5338,11 +5357,8 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
} else {
if (ResultPtr->getType() != I8PtrTy)
ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
- SunkAddr =
- AddrMode.InBounds
- ? Builder.CreateInBoundsGEP(I8Ty, ResultPtr, ResultIndex,
- "sunkaddr")
- : Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
+ SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr",
+ AddrMode.InBounds);
}
if (SunkAddr->getType() != Addr->getType())
@@ -5619,6 +5635,7 @@ bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
// Compute the constraint code and ConstraintType to use.
TLI->ComputeConstraintToUse(OpInfo, SDValue());
+ // TODO: Also handle C_Address?
if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
OpInfo.isIndirect) {
Value *OpVal = CS->getArgOperand(ArgNo++);
@@ -6002,31 +6019,25 @@ bool CodeGenPrepare::optimizePhiType(
for (Value *V : Phi->incoming_values()) {
if (auto *OpPhi = dyn_cast<PHINode>(V)) {
if (!PhiNodes.count(OpPhi)) {
- if (Visited.count(OpPhi))
+ if (!Visited.insert(OpPhi).second)
return false;
PhiNodes.insert(OpPhi);
- Visited.insert(OpPhi);
Worklist.push_back(OpPhi);
}
} else if (auto *OpLoad = dyn_cast<LoadInst>(V)) {
if (!OpLoad->isSimple())
return false;
- if (!Defs.count(OpLoad)) {
- Defs.insert(OpLoad);
+ if (Defs.insert(OpLoad).second)
Worklist.push_back(OpLoad);
- }
} else if (auto *OpEx = dyn_cast<ExtractElementInst>(V)) {
- if (!Defs.count(OpEx)) {
- Defs.insert(OpEx);
+ if (Defs.insert(OpEx).second)
Worklist.push_back(OpEx);
- }
} else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
if (!ConvertTy)
ConvertTy = OpBC->getOperand(0)->getType();
if (OpBC->getOperand(0)->getType() != ConvertTy)
return false;
- if (!Defs.count(OpBC)) {
- Defs.insert(OpBC);
+ if (Defs.insert(OpBC).second) {
Worklist.push_back(OpBC);
AnyAnchored |= !isa<LoadInst>(OpBC->getOperand(0)) &&
!isa<ExtractElementInst>(OpBC->getOperand(0));
@@ -6127,7 +6138,7 @@ bool CodeGenPrepare::optimizePhiTypes(Function &F) {
// Remove any old phi's that have been converted.
for (auto *I : DeletedInstrs) {
- I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ I->replaceAllUsesWith(PoisonValue::get(I->getType()));
I->eraseFromParent();
}
@@ -6979,12 +6990,12 @@ bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
return Changed;
}
-bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
+bool CodeGenPrepare::optimizeSwitchType(SwitchInst *SI) {
Value *Cond = SI->getCondition();
Type *OldType = Cond->getType();
LLVMContext &Context = Cond->getContext();
EVT OldVT = TLI->getValueType(*DL, OldType);
- MVT RegType = TLI->getRegisterType(Context, OldVT);
+ MVT RegType = TLI->getPreferredSwitchConditionType(Context, OldVT);
unsigned RegWidth = RegType.getSizeInBits();
if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth())
@@ -7019,7 +7030,7 @@ bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
ExtInst->setDebugLoc(SI->getDebugLoc());
SI->setCondition(ExtInst);
for (auto Case : SI->cases()) {
- APInt NarrowConst = Case.getCaseValue()->getValue();
+ const APInt &NarrowConst = Case.getCaseValue()->getValue();
APInt WideConst = (ExtType == Instruction::ZExt) ?
NarrowConst.zext(RegWidth) : NarrowConst.sext(RegWidth);
Case.setValue(ConstantInt::get(Context, WideConst));
@@ -7028,6 +7039,89 @@ bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
return true;
}
+bool CodeGenPrepare::optimizeSwitchPhiConstants(SwitchInst *SI) {
+ // The SCCP optimization tends to produce code like this:
+ // switch(x) { case 42: phi(42, ...) }
+ // Materializing the constant for the phi-argument needs instructions; So we
+ // change the code to:
+ // switch(x) { case 42: phi(x, ...) }
+
+ Value *Condition = SI->getCondition();
+ // Avoid endless loop in degenerate case.
+ if (isa<ConstantInt>(*Condition))
+ return false;
+
+ bool Changed = false;
+ BasicBlock *SwitchBB = SI->getParent();
+ Type *ConditionType = Condition->getType();
+
+ for (const SwitchInst::CaseHandle &Case : SI->cases()) {
+ ConstantInt *CaseValue = Case.getCaseValue();
+ BasicBlock *CaseBB = Case.getCaseSuccessor();
+ // Set to true if we previously checked that `CaseBB` is only reached by
+ // a single case from this switch.
+ bool CheckedForSinglePred = false;
+ for (PHINode &PHI : CaseBB->phis()) {
+ Type *PHIType = PHI.getType();
+ // If ZExt is free then we can also catch patterns like this:
+ // switch((i32)x) { case 42: phi((i64)42, ...); }
+ // and replace `(i64)42` with `zext i32 %x to i64`.
+ bool TryZExt =
+ PHIType->isIntegerTy() &&
+ PHIType->getIntegerBitWidth() > ConditionType->getIntegerBitWidth() &&
+ TLI->isZExtFree(ConditionType, PHIType);
+ if (PHIType == ConditionType || TryZExt) {
+ // Set to true to skip this case because of multiple preds.
+ bool SkipCase = false;
+ Value *Replacement = nullptr;
+ for (unsigned I = 0, E = PHI.getNumIncomingValues(); I != E; I++) {
+ Value *PHIValue = PHI.getIncomingValue(I);
+ if (PHIValue != CaseValue) {
+ if (!TryZExt)
+ continue;
+ ConstantInt *PHIValueInt = dyn_cast<ConstantInt>(PHIValue);
+ if (!PHIValueInt ||
+ PHIValueInt->getValue() !=
+ CaseValue->getValue().zext(PHIType->getIntegerBitWidth()))
+ continue;
+ }
+ if (PHI.getIncomingBlock(I) != SwitchBB)
+ continue;
+ // We cannot optimize if there are multiple case labels jumping to
+ // this block. This check may get expensive when there are many
+ // case labels so we test for it last.
+ if (!CheckedForSinglePred) {
+ CheckedForSinglePred = true;
+ if (SI->findCaseDest(CaseBB) == nullptr) {
+ SkipCase = true;
+ break;
+ }
+ }
+
+ if (Replacement == nullptr) {
+ if (PHIValue == CaseValue) {
+ Replacement = Condition;
+ } else {
+ IRBuilder<> Builder(SI);
+ Replacement = Builder.CreateZExt(Condition, PHIType);
+ }
+ }
+ PHI.setIncomingValue(I, Replacement);
+ Changed = true;
+ }
+ if (SkipCase)
+ break;
+ }
+ }
+ }
+ return Changed;
+}
+
+bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
+ bool Changed = optimizeSwitchType(SI);
+ Changed |= optimizeSwitchPhiConstants(SI);
+ return Changed;
+}
namespace {
@@ -7777,7 +7871,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
// It is possible for very late stage optimizations (such as SimplifyCFG)
// to introduce PHI nodes too late to be cleaned up. If we detect such a
// trivial PHI, go ahead and zap it here.
- if (Value *V = SimplifyInstruction(P, {*DL, TLInfo})) {
+ if (Value *V = simplifyInstruction(P, {*DL, TLInfo})) {
LargeOffsetGEPMap.erase(P);
P->replaceAllUsesWith(V);
P->eraseFromParent();
diff --git a/llvm/lib/CodeGen/CommandFlags.cpp b/llvm/lib/CodeGen/CommandFlags.cpp
index 1d50e1d22b95..fd52191882cb 100644
--- a/llvm/lib/CodeGen/CommandFlags.cpp
+++ b/llvm/lib/CodeGen/CommandFlags.cpp
@@ -13,7 +13,12 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/CommandFlags.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
+#include "llvm/MC/MCTargetOptionsCommandFlags.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Host.h"
@@ -58,6 +63,7 @@ CGOPT(bool, EnableUnsafeFPMath)
CGOPT(bool, EnableNoInfsFPMath)
CGOPT(bool, EnableNoNaNsFPMath)
CGOPT(bool, EnableNoSignedZerosFPMath)
+CGOPT(bool, EnableApproxFuncFPMath)
CGOPT(bool, EnableNoTrappingFPMath)
CGOPT(bool, EnableAIXExtendedAltivecABI)
CGOPT(DenormalMode::DenormalModeKind, DenormalFPMath)
@@ -73,6 +79,7 @@ CGOPT(bool, StackSymbolOrdering)
CGOPT(bool, StackRealign)
CGOPT(std::string, TrapFuncName)
CGOPT(bool, UseCtors)
+CGOPT(bool, LowerGlobalDtorsViaCxaAtExit)
CGOPT(bool, RelaxELFRelocations)
CGOPT_EXP(bool, DataSections)
CGOPT_EXP(bool, FunctionSections)
@@ -94,6 +101,7 @@ CGOPT(bool, ForceDwarfFrameSection)
CGOPT(bool, XRayOmitFunctionIndex)
CGOPT(bool, DebugStrictDwarf)
CGOPT(unsigned, AlignLoops)
+CGOPT(bool, JMCInstrument)
codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
#define CGBINDOPT(NAME) \
@@ -218,6 +226,12 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
cl::init(false));
CGBINDOPT(EnableNoSignedZerosFPMath);
+ static cl::opt<bool> EnableApproxFuncFPMath(
+ "enable-approx-func-fp-math",
+ cl::desc("Enable FP math optimizations that assume approx func"),
+ cl::init(false));
+ CGBINDOPT(EnableApproxFuncFPMath);
+
static cl::opt<bool> EnableNoTrappingFPMath(
"enable-no-trapping-fp-math",
cl::desc("Enable setting the FP exceptions build "
@@ -333,6 +347,12 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
cl::init(false));
CGBINDOPT(UseCtors);
+ static cl::opt<bool> LowerGlobalDtorsViaCxaAtExit(
+ "lower-global-dtors-via-cxa-atexit",
+ cl::desc("Lower llvm.global_dtors (global destructors) via __cxa_atexit"),
+ cl::init(true));
+ CGBINDOPT(LowerGlobalDtorsViaCxaAtExit);
+
static cl::opt<bool> RelaxELFRelocations(
"relax-elf-relocations",
cl::desc(
@@ -457,6 +477,12 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
cl::desc("Default alignment for loops"));
CGBINDOPT(AlignLoops);
+ static cl::opt<bool> JMCInstrument(
+ "enable-jmc-instrument",
+ cl::desc("Instrument functions with a call to __CheckForDebuggerJustMyCode"),
+ cl::init(false));
+ CGBINDOPT(JMCInstrument);
+
#undef CGBINDOPT
mc::RegisterMCTargetOptionsFlags();
@@ -493,6 +519,7 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) {
Options.NoInfsFPMath = getEnableNoInfsFPMath();
Options.NoNaNsFPMath = getEnableNoNaNsFPMath();
Options.NoSignedZerosFPMath = getEnableNoSignedZerosFPMath();
+ Options.ApproxFuncFPMath = getEnableApproxFuncFPMath();
Options.NoTrappingFPMath = getEnableNoTrappingFPMath();
DenormalMode::DenormalModeKind DenormKind = getDenormalFPMath();
@@ -509,9 +536,10 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) {
Options.GuaranteedTailCallOpt = getEnableGuaranteedTailCallOpt();
Options.StackSymbolOrdering = getStackSymbolOrdering();
Options.UseInitArray = !getUseCtors();
+ Options.LowerGlobalDtorsViaCxaAtExit = getLowerGlobalDtorsViaCxaAtExit();
Options.RelaxELFRelocations = getRelaxELFRelocations();
Options.DataSections =
- getExplicitDataSections().getValueOr(TheTriple.hasDefaultDataSections());
+ getExplicitDataSections().value_or(TheTriple.hasDefaultDataSections());
Options.FunctionSections = getFunctionSections();
Options.IgnoreXCOFFVisibility = getIgnoreXCOFFVisibility();
Options.XCOFFTracebackTable = getXCOFFTracebackTable();
@@ -531,6 +559,7 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) {
Options.XRayOmitFunctionIndex = getXRayOmitFunctionIndex();
Options.DebugStrictDwarf = getDebugStrictDwarf();
Options.LoopAlignment = getAlignLoops();
+ Options.JMCInstrument = getJMCInstrument();
Options.MCOptions = mc::InitMCTargetOptionsFromFlags();
@@ -643,6 +672,7 @@ void codegen::setFunctionAttributes(StringRef CPU, StringRef Features,
HANDLE_BOOL_ATTR(EnableNoInfsFPMathView, "no-infs-fp-math");
HANDLE_BOOL_ATTR(EnableNoNaNsFPMathView, "no-nans-fp-math");
HANDLE_BOOL_ATTR(EnableNoSignedZerosFPMathView, "no-signed-zeros-fp-math");
+ HANDLE_BOOL_ATTR(EnableApproxFuncFPMathView, "approx-func-fp-math");
if (DenormalFPMathView->getNumOccurrences() > 0 &&
!F.hasFnAttribute("denormal-fp-math")) {
@@ -684,4 +714,3 @@ void codegen::setFunctionAttributes(StringRef CPU, StringRef Features,
for (Function &F : M)
setFunctionAttributes(CPU, Features, F);
}
-
diff --git a/llvm/lib/CodeGen/DFAPacketizer.cpp b/llvm/lib/CodeGen/DFAPacketizer.cpp
index d38bacdb1aa7..42192f41dbda 100644
--- a/llvm/lib/CodeGen/DFAPacketizer.cpp
+++ b/llvm/lib/CodeGen/DFAPacketizer.cpp
@@ -30,10 +30,10 @@
#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/ScheduleDAGMutation.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
index 5579152f1ce0..ce00be634e9a 100644
--- a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -14,7 +14,6 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
diff --git a/llvm/lib/CodeGen/DetectDeadLanes.cpp b/llvm/lib/CodeGen/DetectDeadLanes.cpp
index 1337e57f360b..565c8b405f82 100644
--- a/llvm/lib/CodeGen/DetectDeadLanes.cpp
+++ b/llvm/lib/CodeGen/DetectDeadLanes.cpp
@@ -28,12 +28,9 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
-#include "llvm/PassRegistry.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <deque>
@@ -93,7 +90,7 @@ private:
LaneBitmask transferUsedLanes(const MachineInstr &MI, LaneBitmask UsedLanes,
const MachineOperand &MO) const;
- bool runOnce(MachineFunction &MF);
+ std::pair<bool, bool> runOnce(MachineFunction &MF);
LaneBitmask determineInitialDefinedLanes(unsigned Reg);
LaneBitmask determineInitialUsedLanes(unsigned Reg);
@@ -487,7 +484,7 @@ bool DetectDeadLanes::isUndefInput(const MachineOperand &MO,
return true;
}
-bool DetectDeadLanes::runOnce(MachineFunction &MF) {
+std::pair<bool, bool> DetectDeadLanes::runOnce(MachineFunction &MF) {
// First pass: Populate defs/uses of vregs with initial values
unsigned NumVirtRegs = MRI->getNumVirtRegs();
for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) {
@@ -528,6 +525,7 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) {
dbgs() << "\n";
});
+ bool Changed = false;
bool Again = false;
// Mark operands as dead/unused.
for (MachineBasicBlock &MBB : MF) {
@@ -544,6 +542,7 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) {
LLVM_DEBUG(dbgs()
<< "Marking operand '" << MO << "' as dead in " << MI);
MO.setIsDead();
+ Changed = true;
}
if (MO.readsReg()) {
bool CrossCopy = false;
@@ -551,10 +550,12 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) {
LLVM_DEBUG(dbgs()
<< "Marking operand '" << MO << "' as undef in " << MI);
MO.setIsUndef();
+ Changed = true;
} else if (isUndefInput(MO, &CrossCopy)) {
LLVM_DEBUG(dbgs()
<< "Marking operand '" << MO << "' as undef in " << MI);
MO.setIsUndef();
+ Changed = true;
if (CrossCopy)
Again = true;
}
@@ -563,7 +564,7 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) {
}
}
- return Again;
+ return std::make_pair(Changed, Again);
}
bool DetectDeadLanes::runOnMachineFunction(MachineFunction &MF) {
@@ -585,13 +586,16 @@ bool DetectDeadLanes::runOnMachineFunction(MachineFunction &MF) {
WorklistMembers.resize(NumVirtRegs);
DefinedByCopy.resize(NumVirtRegs);
+ bool Changed = false;
bool Again;
do {
- Again = runOnce(MF);
+ bool LocalChanged;
+ std::tie(LocalChanged, Again) = runOnce(MF);
+ Changed |= LocalChanged;
} while(Again);
DefinedByCopy.clear();
WorklistMembers.clear();
delete[] VRegInfos;
- return true;
+ return Changed;
}
diff --git a/llvm/lib/CodeGen/EHContGuardCatchret.cpp b/llvm/lib/CodeGen/EHContGuardCatchret.cpp
index c18532946bf9..b26aa792bb93 100644
--- a/llvm/lib/CodeGen/EHContGuardCatchret.cpp
+++ b/llvm/lib/CodeGen/EHContGuardCatchret.cpp
@@ -17,9 +17,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/InitializePasses.h"
diff --git a/llvm/lib/CodeGen/EarlyIfConversion.cpp b/llvm/lib/CodeGen/EarlyIfConversion.cpp
index 6a0da4dad3c1..32858d043383 100644
--- a/llvm/lib/CodeGen/EarlyIfConversion.cpp
+++ b/llvm/lib/CodeGen/EarlyIfConversion.cpp
@@ -17,10 +17,10 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SparseSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -30,7 +30,6 @@
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineTraceMetrics.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -664,8 +663,8 @@ void SSAIfConv::rewritePHIOperands() {
PI.PHI->getOperand(i-1).setMBB(Head);
PI.PHI->getOperand(i-2).setReg(DstReg);
} else if (MBB == getFPred()) {
- PI.PHI->RemoveOperand(i-1);
- PI.PHI->RemoveOperand(i-2);
+ PI.PHI->removeOperand(i-1);
+ PI.PHI->removeOperand(i-2);
}
}
LLVM_DEBUG(dbgs() << " --> " << *PI.PHI);
diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp
index 60ee1812ee2c..b2639636dda7 100644
--- a/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp
@@ -19,7 +19,6 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Dominators.h"
@@ -32,6 +31,10 @@
using namespace llvm;
+namespace llvm {
+class TargetLowering;
+}
+
#define DEBUG_TYPE "expandmemcmp"
STATISTIC(NumMemCmpCalls, "Number of memcmp calls");
@@ -737,7 +740,7 @@ Value *MemCmpExpansion::getMemCmpExpansion() {
static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
const TargetLowering *TLI, const DataLayout *DL,
ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI,
- DomTreeUpdater *DTU) {
+ DomTreeUpdater *DTU, const bool IsBCmp) {
NumMemCmpCalls++;
// Early exit from expansion if -Oz.
@@ -757,7 +760,8 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
}
// TTI call to check if target would like to expand memcmp. Also, get the
// available load sizes.
- const bool IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI);
+ const bool IsUsedForZeroCmp =
+ IsBCmp || isOnlyUsedInZeroEqualityComparison(CI);
bool OptForSize = CI->getFunction()->hasOptSize() ||
llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
auto Options = TTI->enableMemCmpExpansion(OptForSize,
@@ -861,7 +865,7 @@ bool ExpandMemCmpPass::runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
LibFunc Func;
if (TLI->getLibFunc(*CI, Func) &&
(Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
- expandMemCmp(CI, TTI, TL, &DL, PSI, BFI, DTU)) {
+ expandMemCmp(CI, TTI, TL, &DL, PSI, BFI, DTU, Func == LibFunc_bcmp)) {
return true;
}
}
@@ -881,7 +885,7 @@ ExpandMemCmpPass::runImpl(Function &F, const TargetLibraryInfo *TLI,
bool MadeChanges = false;
for (auto BBIt = F.begin(); BBIt != F.end();) {
if (runOnBlock(*BBIt, TLI, TTI, TL, DL, PSI, BFI,
- DTU.hasValue() ? DTU.getPointer() : nullptr)) {
+ DTU ? DTU.getPointer() : nullptr)) {
MadeChanges = true;
// If changes were made, restart the function from the beginning, since
// the structure of the function was changed.
diff --git a/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
index d9caa8ad42d0..086b4a4dcc47 100644
--- a/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -13,8 +13,6 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -104,8 +102,8 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {
if (MI->allDefsAreDead()) {
MI->setDesc(TII->get(TargetOpcode::KILL));
- MI->RemoveOperand(3); // SubIdx
- MI->RemoveOperand(1); // Imm
+ MI->removeOperand(3); // SubIdx
+ MI->removeOperand(1); // Imm
LLVM_DEBUG(dbgs() << "subreg: replaced by: " << *MI);
return true;
}
@@ -117,8 +115,8 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {
// We must leave %rax live.
if (DstReg != InsReg) {
MI->setDesc(TII->get(TargetOpcode::KILL));
- MI->RemoveOperand(3); // SubIdx
- MI->RemoveOperand(1); // Imm
+ MI->removeOperand(3); // SubIdx
+ MI->removeOperand(1); // Imm
LLVM_DEBUG(dbgs() << "subreg: replace by: " << *MI);
return true;
}
diff --git a/llvm/lib/CodeGen/ExpandReductions.cpp b/llvm/lib/CodeGen/ExpandReductions.cpp
index 2bcaf750911b..f08c47d220ea 100644
--- a/llvm/lib/CodeGen/ExpandReductions.cpp
+++ b/llvm/lib/CodeGen/ExpandReductions.cpp
@@ -14,12 +14,10 @@
#include "llvm/CodeGen/ExpandReductions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
diff --git a/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/llvm/lib/CodeGen/ExpandVectorPredication.cpp
index bb8d2b3e9a78..7883a48d121c 100644
--- a/llvm/lib/CodeGen/ExpandVectorPredication.cpp
+++ b/llvm/lib/CodeGen/ExpandVectorPredication.cpp
@@ -23,13 +23,11 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
using namespace llvm;
@@ -115,6 +113,17 @@ static void replaceOperation(Value &NewOp, VPIntrinsic &OldOp) {
OldOp.eraseFromParent();
}
+static bool maySpeculateLanes(VPIntrinsic &VPI) {
+ // The result of VP reductions depends on the mask and evl.
+ if (isa<VPReductionIntrinsic>(VPI))
+ return false;
+ // Fallback to whether the intrinsic is speculatable.
+ Optional<unsigned> OpcOpt = VPI.getFunctionalOpcode();
+ unsigned FunctionalOpc = OpcOpt.value_or((unsigned)Instruction::Call);
+ return isSafeToSpeculativelyExecuteWithOpcode(FunctionalOpc,
+ cast<Operator>(&VPI));
+}
+
//// } Helpers
namespace {
@@ -218,8 +227,7 @@ Value *CachingVPExpander::convertEVLToMask(IRBuilder<> &Builder,
Value *
CachingVPExpander::expandPredicationInBinaryOperator(IRBuilder<> &Builder,
VPIntrinsic &VPI) {
- assert((isSafeToSpeculativelyExecute(&VPI) ||
- VPI.canIgnoreVectorLengthParam()) &&
+ assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) &&
"Implicitly dropping %evl in non-speculatable operator!");
auto OC = static_cast<Instruction::BinaryOps>(*VPI.getFunctionalOpcode());
@@ -298,8 +306,7 @@ static Value *getNeutralReductionElement(const VPReductionIntrinsic &VPI,
Value *
CachingVPExpander::expandPredicationInReduction(IRBuilder<> &Builder,
VPReductionIntrinsic &VPI) {
- assert((isSafeToSpeculativelyExecute(&VPI) ||
- VPI.canIgnoreVectorLengthParam()) &&
+ assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) &&
"Implicitly dropping %evl in non-speculatable operator!");
Value *Mask = VPI.getMaskParam();
@@ -473,9 +480,9 @@ struct TransformJob {
bool isDone() const { return Strategy.shouldDoNothing(); }
};
-void sanitizeStrategy(Instruction &I, VPLegalization &LegalizeStrat) {
- // Speculatable instructions do not strictly need predication.
- if (isSafeToSpeculativelyExecute(&I)) {
+void sanitizeStrategy(VPIntrinsic &VPI, VPLegalization &LegalizeStrat) {
+ // Operations with speculatable lanes do not strictly need predication.
+ if (maySpeculateLanes(VPI)) {
// Converting a speculatable VP intrinsic means dropping %mask and %evl.
// No need to expand %evl into the %mask only to ignore that code.
if (LegalizeStrat.OpStrategy == VPLegalization::Convert)
@@ -520,7 +527,7 @@ bool CachingVPExpander::expandVectorPredication() {
if (!VPI)
continue;
auto VPStrat = getVPLegalizationStrategy(*VPI);
- sanitizeStrategy(I, VPStrat);
+ sanitizeStrategy(*VPI, VPStrat);
if (!VPStrat.shouldDoNothing())
Worklist.emplace_back(VPI, VPStrat);
}
diff --git a/llvm/lib/CodeGen/FEntryInserter.cpp b/llvm/lib/CodeGen/FEntryInserter.cpp
index c2194929e2e7..68304dd41db0 100644
--- a/llvm/lib/CodeGen/FEntryInserter.cpp
+++ b/llvm/lib/CodeGen/FEntryInserter.cpp
@@ -13,12 +13,9 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
using namespace llvm;
diff --git a/llvm/lib/CodeGen/FaultMaps.cpp b/llvm/lib/CodeGen/FaultMaps.cpp
index 1d35b194f218..3ec666227651 100644
--- a/llvm/lib/CodeGen/FaultMaps.cpp
+++ b/llvm/lib/CodeGen/FaultMaps.cpp
@@ -52,7 +52,7 @@ void FaultMaps::serializeToFaultMapSection() {
// Create the section.
MCSection *FaultMapSection =
OutContext.getObjectFileInfo()->getFaultMapSection();
- OS.SwitchSection(FaultMapSection);
+ OS.switchSection(FaultMapSection);
// Emit a dummy symbol to force section inclusion.
OS.emitLabel(OutContext.getOrCreateSymbol(Twine("__LLVM_FaultMaps")));
diff --git a/llvm/lib/CodeGen/FinalizeISel.cpp b/llvm/lib/CodeGen/FinalizeISel.cpp
index 00040e92a829..329c9587e321 100644
--- a/llvm/lib/CodeGen/FinalizeISel.cpp
+++ b/llvm/lib/CodeGen/FinalizeISel.cpp
@@ -16,11 +16,9 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Support/Debug.h"
using namespace llvm;
#define DEBUG_TYPE "finalize-isel"
diff --git a/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp b/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
index ec6bf18b2769..252910fd9462 100644
--- a/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
+++ b/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
@@ -24,10 +24,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/StackMaps.h"
-#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/Statepoint.h"
#include "llvm/InitializePasses.h"
@@ -156,12 +153,17 @@ static Register performCopyPropagation(Register Reg,
RI = ++MachineBasicBlock::iterator(Def);
IsKill = DestSrc->Source->isKill();
- // There are no uses of original register between COPY and STATEPOINT.
- // There can't be any after STATEPOINT, so we can eliminate Def.
if (!Use) {
+ // There are no uses of original register between COPY and STATEPOINT.
+ // There can't be any after STATEPOINT, so we can eliminate Def.
LLVM_DEBUG(dbgs() << "spillRegisters: removing dead copy " << *Def);
Def->eraseFromParent();
+ } else if (IsKill) {
+ // COPY will remain in place, spill will be inserted *after* it, so it is
+ // not a kill of source anymore.
+ const_cast<MachineOperand *>(DestSrc->Source)->setIsKill(false);
}
+
return SrcReg;
}
diff --git a/llvm/lib/CodeGen/GCMetadata.cpp b/llvm/lib/CodeGen/GCMetadata.cpp
index af5515cc6bfd..4d27143c5298 100644
--- a/llvm/lib/CodeGen/GCMetadata.cpp
+++ b/llvm/lib/CodeGen/GCMetadata.cpp
@@ -11,16 +11,13 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GCMetadata.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Pass.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
#include <cassert>
#include <memory>
#include <string>
diff --git a/llvm/lib/CodeGen/GCRootLowering.cpp b/llvm/lib/CodeGen/GCRootLowering.cpp
index 637a877810a1..80feb0045406 100644
--- a/llvm/lib/CodeGen/GCRootLowering.cpp
+++ b/llvm/lib/CodeGen/GCRootLowering.cpp
@@ -14,7 +14,6 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -24,9 +23,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/MC/MCContext.h"
using namespace llvm;
diff --git a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
index f9bfe8518083..ac140e745600 100644
--- a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
@@ -67,7 +67,8 @@ bool CSEConfigFull::shouldCSEOpc(unsigned Opc) {
}
bool CSEConfigConstantOnly::shouldCSEOpc(unsigned Opc) {
- return Opc == TargetOpcode::G_CONSTANT || Opc == TargetOpcode::G_IMPLICIT_DEF;
+ return Opc == TargetOpcode::G_CONSTANT || Opc == TargetOpcode::G_FCONSTANT ||
+ Opc == TargetOpcode::G_IMPLICIT_DEF;
}
std::unique_ptr<CSEConfigBase>
@@ -88,7 +89,7 @@ void GISelCSEInfo::setMF(MachineFunction &MF) {
this->MRI = &MF.getRegInfo();
}
-GISelCSEInfo::~GISelCSEInfo() {}
+GISelCSEInfo::~GISelCSEInfo() = default;
bool GISelCSEInfo::isUniqueMachineInstValid(
const UniqueMachineInstr &UMI) const {
diff --git a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
index 1a642e233a6a..a432e4ed7fb7 100644
--- a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
@@ -12,6 +12,7 @@
//
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -174,6 +175,7 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc,
default:
break;
case TargetOpcode::G_ADD:
+ case TargetOpcode::G_PTR_ADD:
case TargetOpcode::G_AND:
case TargetOpcode::G_ASHR:
case TargetOpcode::G_LSHR:
@@ -185,23 +187,54 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc,
case TargetOpcode::G_UDIV:
case TargetOpcode::G_SDIV:
case TargetOpcode::G_UREM:
- case TargetOpcode::G_SREM: {
+ case TargetOpcode::G_SREM:
+ case TargetOpcode::G_SMIN:
+ case TargetOpcode::G_SMAX:
+ case TargetOpcode::G_UMIN:
+ case TargetOpcode::G_UMAX: {
// Try to constant fold these.
assert(SrcOps.size() == 2 && "Invalid sources");
assert(DstOps.size() == 1 && "Invalid dsts");
- if (SrcOps[0].getLLTTy(*getMRI()).isVector()) {
+ LLT SrcTy = SrcOps[0].getLLTTy(*getMRI());
+
+ if (Opc == TargetOpcode::G_PTR_ADD &&
+ getDataLayout().isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
+ break;
+
+ if (SrcTy.isVector()) {
// Try to constant fold vector constants.
- Register VecCst = ConstantFoldVectorBinop(
- Opc, SrcOps[0].getReg(), SrcOps[1].getReg(), *getMRI(), *this);
- if (VecCst)
- return buildCopy(DstOps[0], VecCst);
+ SmallVector<APInt> VecCst = ConstantFoldVectorBinop(
+ Opc, SrcOps[0].getReg(), SrcOps[1].getReg(), *getMRI());
+ if (!VecCst.empty())
+ return buildBuildVectorConstant(DstOps[0], VecCst);
break;
}
+
if (Optional<APInt> Cst = ConstantFoldBinOp(Opc, SrcOps[0].getReg(),
SrcOps[1].getReg(), *getMRI()))
return buildConstant(DstOps[0], *Cst);
break;
}
+ case TargetOpcode::G_FADD:
+ case TargetOpcode::G_FSUB:
+ case TargetOpcode::G_FMUL:
+ case TargetOpcode::G_FDIV:
+ case TargetOpcode::G_FREM:
+ case TargetOpcode::G_FMINNUM:
+ case TargetOpcode::G_FMAXNUM:
+ case TargetOpcode::G_FMINNUM_IEEE:
+ case TargetOpcode::G_FMAXNUM_IEEE:
+ case TargetOpcode::G_FMINIMUM:
+ case TargetOpcode::G_FMAXIMUM:
+ case TargetOpcode::G_FCOPYSIGN: {
+ // Try to constant fold these.
+ assert(SrcOps.size() == 2 && "Invalid sources");
+ assert(DstOps.size() == 1 && "Invalid dsts");
+ if (Optional<APFloat> Cst = ConstantFoldFPBinOp(
+ Opc, SrcOps[0].getReg(), SrcOps[1].getReg(), *getMRI()))
+ return buildFConstant(DstOps[0], *Cst);
+ break;
+ }
case TargetOpcode::G_SEXT_INREG: {
assert(DstOps.size() == 1 && "Invalid dst ops");
assert(SrcOps.size() == 2 && "Invalid src ops");
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 1ec7868f2234..081c8b125f17 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -11,16 +11,16 @@
///
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/CodeGen/GlobalISel/CallLowering.h"
-#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Target/TargetMachine.h"
@@ -698,10 +698,12 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
ValTy, extendOpFromFlags(Args[i].Flags[0]));
}
+ bool BigEndianPartOrdering = TLI->hasBigEndianPartOrdering(OrigVT, DL);
for (unsigned Part = 0; Part < NumParts; ++Part) {
Register ArgReg = Args[i].Regs[Part];
// There should be Regs.size() ArgLocs per argument.
- VA = ArgLocs[j + Part];
+ unsigned Idx = BigEndianPartOrdering ? NumParts - 1 - Part : Part;
+ CCValAssign &VA = ArgLocs[j + Idx];
const ISD::ArgFlagsTy Flags = Args[i].Flags[Part];
if (VA.isMemLoc() && !Flags.isByVal()) {
diff --git a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
index 30f8838805b5..1a5fe3e84c17 100644
--- a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
@@ -13,14 +13,13 @@
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
-#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/GISelWorkList.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
#define DEBUG_TYPE "gi-combiner"
@@ -57,8 +56,7 @@ class WorkListMaintainer : public GISelChangeObserver {
public:
WorkListMaintainer(WorkListTy &WorkList) : WorkList(WorkList) {}
- virtual ~WorkListMaintainer() {
- }
+ virtual ~WorkListMaintainer() = default;
void erasingInstr(MachineInstr &MI) override {
LLVM_DEBUG(dbgs() << "Erasing: " << MI << "\n");
@@ -115,7 +113,7 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF,
bool MFChanged = false;
bool Changed;
- MachineIRBuilder &B = *Builder.get();
+ MachineIRBuilder &B = *Builder;
do {
// Collect all instructions. Do a post order traversal for basic blocks and
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index d6a009744161..2c94f87804ac 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -8,7 +8,6 @@
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallBitVector.h"
-#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
@@ -16,23 +15,22 @@
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/DivisionByConstantInfo.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetMachine.h"
#include <tuple>
#define DEBUG_TYPE "gi-combiner"
@@ -131,9 +129,27 @@ isBigEndian(const SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
return BigEndian;
}
+bool CombinerHelper::isPreLegalize() const { return !LI; }
+
+bool CombinerHelper::isLegal(const LegalityQuery &Query) const {
+ assert(LI && "Must have LegalizerInfo to query isLegal!");
+ return LI->getAction(Query).Action == LegalizeActions::Legal;
+}
+
bool CombinerHelper::isLegalOrBeforeLegalizer(
const LegalityQuery &Query) const {
- return !LI || LI->getAction(Query).Action == LegalizeActions::Legal;
+ return isPreLegalize() || isLegal(Query);
+}
+
+bool CombinerHelper::isConstantLegalOrBeforeLegalizer(const LLT Ty) const {
+ if (!Ty.isVector())
+ return isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {Ty}});
+ // Vector constants are represented as a G_BUILD_VECTOR of scalar G_CONSTANTs.
+ if (isPreLegalize())
+ return true;
+ LLT EltTy = Ty.getElementType();
+ return isLegal({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}}) &&
+ isLegal({TargetOpcode::G_CONSTANT, {EltTy}});
}
void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg,
@@ -1275,12 +1291,12 @@ bool CombinerHelper::matchCombineConstantFoldFpUnary(MachineInstr &MI,
Register SrcReg = MI.getOperand(1).getReg();
LLT DstTy = MRI.getType(DstReg);
Cst = constantFoldFpUnary(MI.getOpcode(), DstTy, SrcReg, MRI);
- return Cst.hasValue();
+ return Cst.has_value();
}
void CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI,
Optional<APFloat> &Cst) {
- assert(Cst.hasValue() && "Optional is unexpectedly empty!");
+ assert(Cst && "Optional is unexpectedly empty!");
Builder.setInstrAndDebugLoc(MI);
MachineFunction &MF = Builder.getMF();
auto *FPVal = ConstantFP::get(MF.getFunction().getContext(), *Cst);
@@ -2350,6 +2366,19 @@ bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1,
if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad(nullptr))
return false;
+ // If both instructions are loads or stores, they are equal only if both
+ // are dereferenceable invariant loads with the same number of bits.
+ if (I1->mayLoadOrStore() && I2->mayLoadOrStore()) {
+ GLoadStore *LS1 = dyn_cast<GLoadStore>(I1);
+ GLoadStore *LS2 = dyn_cast<GLoadStore>(I2);
+ if (!LS1 || !LS2)
+ return false;
+
+ if (!I2->isDereferenceableInvariantLoad(nullptr) ||
+ (LS1->getMemSizeInBits() != LS2->getMemSizeInBits()))
+ return false;
+ }
+
// Check for physical registers on the instructions first to avoid cases
// like this:
//
@@ -2397,7 +2426,7 @@ bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) {
return false;
auto *MI = MRI.getVRegDef(MOP.getReg());
auto MaybeCst = isConstantOrConstantSplatVector(*MI, MRI);
- return MaybeCst.hasValue() && MaybeCst->getBitWidth() <= 64 &&
+ return MaybeCst && MaybeCst->getBitWidth() <= 64 &&
MaybeCst->getSExtValue() == C;
}
@@ -2916,7 +2945,7 @@ bool CombinerHelper::matchNotCmp(MachineInstr &MI,
int64_t Cst;
if (Ty.isVector()) {
MachineInstr *CstDef = MRI.getVRegDef(CstReg);
- auto MaybeCst = getBuildVectorConstantSplat(*CstDef, MRI);
+ auto MaybeCst = getIConstantSplatSExtVal(*CstDef, MRI);
if (!MaybeCst)
return false;
if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
@@ -3049,6 +3078,102 @@ void CombinerHelper::applySimplifyURemByPow2(MachineInstr &MI) {
MI.eraseFromParent();
}
+bool CombinerHelper::matchFoldBinOpIntoSelect(MachineInstr &MI,
+ unsigned &SelectOpNo) {
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+
+ Register OtherOperandReg = RHS;
+ SelectOpNo = 1;
+ MachineInstr *Select = MRI.getVRegDef(LHS);
+
+ // Don't do this unless the old select is going away. We want to eliminate the
+ // binary operator, not replace a binop with a select.
+ if (Select->getOpcode() != TargetOpcode::G_SELECT ||
+ !MRI.hasOneNonDBGUse(LHS)) {
+ OtherOperandReg = LHS;
+ SelectOpNo = 2;
+ Select = MRI.getVRegDef(RHS);
+ if (Select->getOpcode() != TargetOpcode::G_SELECT ||
+ !MRI.hasOneNonDBGUse(RHS))
+ return false;
+ }
+
+ MachineInstr *SelectLHS = MRI.getVRegDef(Select->getOperand(2).getReg());
+ MachineInstr *SelectRHS = MRI.getVRegDef(Select->getOperand(3).getReg());
+
+ if (!isConstantOrConstantVector(*SelectLHS, MRI,
+ /*AllowFP*/ true,
+ /*AllowOpaqueConstants*/ false))
+ return false;
+ if (!isConstantOrConstantVector(*SelectRHS, MRI,
+ /*AllowFP*/ true,
+ /*AllowOpaqueConstants*/ false))
+ return false;
+
+ unsigned BinOpcode = MI.getOpcode();
+
+ // We know know one of the operands is a select of constants. Now verify that
+ // the other binary operator operand is either a constant, or we can handle a
+ // variable.
+ bool CanFoldNonConst =
+ (BinOpcode == TargetOpcode::G_AND || BinOpcode == TargetOpcode::G_OR) &&
+ (isNullOrNullSplat(*SelectLHS, MRI) ||
+ isAllOnesOrAllOnesSplat(*SelectLHS, MRI)) &&
+ (isNullOrNullSplat(*SelectRHS, MRI) ||
+ isAllOnesOrAllOnesSplat(*SelectRHS, MRI));
+ if (CanFoldNonConst)
+ return true;
+
+ return isConstantOrConstantVector(*MRI.getVRegDef(OtherOperandReg), MRI,
+ /*AllowFP*/ true,
+ /*AllowOpaqueConstants*/ false);
+}
+
+/// \p SelectOperand is the operand in binary operator \p MI that is the select
+/// to fold.
+bool CombinerHelper::applyFoldBinOpIntoSelect(MachineInstr &MI,
+ const unsigned &SelectOperand) {
+ Builder.setInstrAndDebugLoc(MI);
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ MachineInstr *Select = MRI.getVRegDef(MI.getOperand(SelectOperand).getReg());
+
+ Register SelectCond = Select->getOperand(1).getReg();
+ Register SelectTrue = Select->getOperand(2).getReg();
+ Register SelectFalse = Select->getOperand(3).getReg();
+
+ LLT Ty = MRI.getType(Dst);
+ unsigned BinOpcode = MI.getOpcode();
+
+ Register FoldTrue, FoldFalse;
+
+ // We have a select-of-constants followed by a binary operator with a
+ // constant. Eliminate the binop by pulling the constant math into the select.
+ // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
+ if (SelectOperand == 1) {
+ // TODO: SelectionDAG verifies this actually constant folds before
+ // committing to the combine.
+
+ FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {SelectTrue, RHS}).getReg(0);
+ FoldFalse =
+ Builder.buildInstr(BinOpcode, {Ty}, {SelectFalse, RHS}).getReg(0);
+ } else {
+ FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectTrue}).getReg(0);
+ FoldFalse =
+ Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectFalse}).getReg(0);
+ }
+
+ Builder.buildSelect(Dst, SelectCond, FoldTrue, FoldFalse, MI.getFlags());
+ Observer.erasingInstr(*Select);
+ Select->eraseFromParent();
+ MI.eraseFromParent();
+
+ return true;
+}
+
Optional<SmallVector<Register, 8>>
CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!");
@@ -3340,7 +3465,7 @@ bool CombinerHelper::matchLoadOrCombine(
// BSWAP.
bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
Optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
- if (!IsBigEndian.hasValue())
+ if (!IsBigEndian)
return false;
bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}}))
@@ -3848,7 +3973,7 @@ bool CombinerHelper::matchExtractAllEltsFromBuildVector(
auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI);
if (!Cst)
return false;
- unsigned Idx = Cst.getValue().getZExtValue();
+ unsigned Idx = Cst->getZExtValue();
if (Idx >= NumElts)
return false; // Out of range.
ExtractedElts.set(Idx);
@@ -3904,10 +4029,9 @@ bool CombinerHelper::matchOrShiftToFunnelShift(MachineInstr &MI,
// Given constants C0 and C1 such that C0 + C1 is bit-width:
// (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1)
- // TODO: Match constant splat.
int64_t CstShlAmt, CstLShrAmt;
- if (mi_match(ShlAmt, MRI, m_ICst(CstShlAmt)) &&
- mi_match(LShrAmt, MRI, m_ICst(CstLShrAmt)) &&
+ if (mi_match(ShlAmt, MRI, m_ICstOrSplat(CstShlAmt)) &&
+ mi_match(LShrAmt, MRI, m_ICstOrSplat(CstLShrAmt)) &&
CstShlAmt + CstLShrAmt == BitWidth) {
FshOpc = TargetOpcode::G_FSHR;
Amt = LShrAmt;
@@ -3958,7 +4082,7 @@ void CombinerHelper::applyFunnelShiftToRotate(MachineInstr &MI) {
Observer.changingInstr(MI);
MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL
: TargetOpcode::G_ROTR));
- MI.RemoveOperand(2);
+ MI.removeOperand(2);
Observer.changedInstr(MI);
}
@@ -4100,18 +4224,23 @@ bool CombinerHelper::matchAndOrDisjointMask(
return false;
Register Src;
- int64_t MaskAnd;
- int64_t MaskOr;
+ Register AndMaskReg;
+ int64_t AndMaskBits;
+ int64_t OrMaskBits;
if (!mi_match(MI, MRI,
- m_GAnd(m_GOr(m_Reg(Src), m_ICst(MaskOr)), m_ICst(MaskAnd))))
+ m_GAnd(m_GOr(m_Reg(Src), m_ICst(OrMaskBits)),
+ m_all_of(m_ICst(AndMaskBits), m_Reg(AndMaskReg)))))
return false;
- // Check if MaskOr could turn on any bits in Src.
- if (MaskAnd & MaskOr)
+ // Check if OrMask could turn on any bits in Src.
+ if (AndMaskBits & OrMaskBits)
return false;
MatchInfo = [=, &MI](MachineIRBuilder &B) {
Observer.changingInstr(MI);
+ // Canonicalize the result to have the constant on the RHS.
+ if (MI.getOperand(1).getReg() == AndMaskReg)
+ MI.getOperand(2).setReg(AndMaskReg);
MI.getOperand(1).setReg(Src);
Observer.changedInstr(MI);
};
@@ -4259,6 +4388,14 @@ bool CombinerHelper::matchBitfieldExtractFromShrAnd(
if (ShrAmt < 0 || ShrAmt >= Size)
return false;
+ // If the shift subsumes the mask, emit the 0 directly.
+ if (0 == (SMask >> ShrAmt)) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildConstant(Dst, 0);
+ };
+ return true;
+ }
+
// Check that ubfx can do the extraction, with no holes in the mask.
uint64_t UMask = SMask;
UMask |= maskTrailingOnes<uint64_t>(ShrAmt);
@@ -4585,6 +4722,42 @@ bool CombinerHelper::matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) {
return true;
}
+bool CombinerHelper::matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo) {
+ // (G_*MULO x, 0) -> 0 + no carry out
+ assert(MI.getOpcode() == TargetOpcode::G_UMULO ||
+ MI.getOpcode() == TargetOpcode::G_SMULO);
+ if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(0)))
+ return false;
+ Register Dst = MI.getOperand(0).getReg();
+ Register Carry = MI.getOperand(1).getReg();
+ if (!isConstantLegalOrBeforeLegalizer(MRI.getType(Dst)) ||
+ !isConstantLegalOrBeforeLegalizer(MRI.getType(Carry)))
+ return false;
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildConstant(Dst, 0);
+ B.buildConstant(Carry, 0);
+ };
+ return true;
+}
+
+bool CombinerHelper::matchAddOBy0(MachineInstr &MI, BuildFnTy &MatchInfo) {
+ // (G_*ADDO x, 0) -> x + no carry out
+ assert(MI.getOpcode() == TargetOpcode::G_UADDO ||
+ MI.getOpcode() == TargetOpcode::G_SADDO);
+ if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(0)))
+ return false;
+ Register Carry = MI.getOperand(1).getReg();
+ if (!isConstantLegalOrBeforeLegalizer(MRI.getType(Carry)))
+ return false;
+ Register Dst = MI.getOperand(0).getReg();
+ Register LHS = MI.getOperand(2).getReg();
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildCopy(Dst, LHS);
+ B.buildConstant(Carry, 0);
+ };
+ return true;
+}
+
MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_UDIV);
auto &UDiv = cast<GenericMachineInstr>(MI);
@@ -5376,6 +5549,106 @@ bool CombinerHelper::matchCombineFSubFpExtFNegFMulToFMadOrFMA(
return false;
}
+bool CombinerHelper::matchSelectToLogical(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ GSelect &Sel = cast<GSelect>(MI);
+ Register DstReg = Sel.getReg(0);
+ Register Cond = Sel.getCondReg();
+ Register TrueReg = Sel.getTrueReg();
+ Register FalseReg = Sel.getFalseReg();
+
+ auto *TrueDef = getDefIgnoringCopies(TrueReg, MRI);
+ auto *FalseDef = getDefIgnoringCopies(FalseReg, MRI);
+
+ const LLT CondTy = MRI.getType(Cond);
+ const LLT OpTy = MRI.getType(TrueReg);
+ if (CondTy != OpTy || OpTy.getScalarSizeInBits() != 1)
+ return false;
+
+ // We have a boolean select.
+
+ // select Cond, Cond, F --> or Cond, F
+ // select Cond, 1, F --> or Cond, F
+ auto MaybeCstTrue = isConstantOrConstantSplatVector(*TrueDef, MRI);
+ if (Cond == TrueReg || (MaybeCstTrue && MaybeCstTrue->isOne())) {
+ MatchInfo = [=](MachineIRBuilder &MIB) {
+ MIB.buildOr(DstReg, Cond, FalseReg);
+ };
+ return true;
+ }
+
+ // select Cond, T, Cond --> and Cond, T
+ // select Cond, T, 0 --> and Cond, T
+ auto MaybeCstFalse = isConstantOrConstantSplatVector(*FalseDef, MRI);
+ if (Cond == FalseReg || (MaybeCstFalse && MaybeCstFalse->isZero())) {
+ MatchInfo = [=](MachineIRBuilder &MIB) {
+ MIB.buildAnd(DstReg, Cond, TrueReg);
+ };
+ return true;
+ }
+
+ // select Cond, T, 1 --> or (not Cond), T
+ if (MaybeCstFalse && MaybeCstFalse->isOne()) {
+ MatchInfo = [=](MachineIRBuilder &MIB) {
+ MIB.buildOr(DstReg, MIB.buildNot(OpTy, Cond), TrueReg);
+ };
+ return true;
+ }
+
+ // select Cond, 0, F --> and (not Cond), F
+ if (MaybeCstTrue && MaybeCstTrue->isZero()) {
+ MatchInfo = [=](MachineIRBuilder &MIB) {
+ MIB.buildAnd(DstReg, MIB.buildNot(OpTy, Cond), FalseReg);
+ };
+ return true;
+ }
+ return false;
+}
+
+bool CombinerHelper::matchCombineFMinMaxNaN(MachineInstr &MI,
+ unsigned &IdxToPropagate) {
+ bool PropagateNaN;
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ case TargetOpcode::G_FMINNUM:
+ case TargetOpcode::G_FMAXNUM:
+ PropagateNaN = false;
+ break;
+ case TargetOpcode::G_FMINIMUM:
+ case TargetOpcode::G_FMAXIMUM:
+ PropagateNaN = true;
+ break;
+ }
+
+ auto MatchNaN = [&](unsigned Idx) {
+ Register MaybeNaNReg = MI.getOperand(Idx).getReg();
+ const ConstantFP *MaybeCst = getConstantFPVRegVal(MaybeNaNReg, MRI);
+ if (!MaybeCst || !MaybeCst->getValueAPF().isNaN())
+ return false;
+ IdxToPropagate = PropagateNaN ? Idx : (Idx == 1 ? 2 : 1);
+ return true;
+ };
+
+ return MatchNaN(1) || MatchNaN(2);
+}
+
+bool CombinerHelper::matchAddSubSameReg(MachineInstr &MI, Register &Src) {
+ assert(MI.getOpcode() == TargetOpcode::G_ADD && "Expected a G_ADD");
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+
+ // Helper lambda to check for opportunities for
+ // A + (B - A) -> B
+ // (B - A) + A -> B
+ auto CheckFold = [&](Register MaybeSub, Register MaybeSameReg) {
+ Register Reg;
+ return mi_match(MaybeSub, MRI, m_GSub(m_Reg(Src), m_Reg(Reg))) &&
+ Reg == MaybeSameReg;
+ };
+ return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
+}
+
bool CombinerHelper::tryCombine(MachineInstr &MI) {
if (tryCombineCopy(MI))
return true;
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
index 64c2f0d5f8e4..4f03af0fce82 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
@@ -567,6 +567,26 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
Known = KnownBits::ashr(KnownBits::shl(Known, ShiftKnown), ShiftKnown);
break;
}
+ case TargetOpcode::G_UADDO:
+ case TargetOpcode::G_UADDE:
+ case TargetOpcode::G_SADDO:
+ case TargetOpcode::G_SADDE:
+ case TargetOpcode::G_USUBO:
+ case TargetOpcode::G_USUBE:
+ case TargetOpcode::G_SSUBO:
+ case TargetOpcode::G_SSUBE:
+ case TargetOpcode::G_UMULO:
+ case TargetOpcode::G_SMULO: {
+ if (MI.getOperand(1).getReg() == R) {
+ // If we know the result of a compare has the top bits zero, use this
+ // info.
+ if (TL.getBooleanContents(DstTy.isVector(), false) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ BitWidth > 1)
+ Known.Zero.setBitsFrom(1);
+ }
+ break;
+ }
}
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
@@ -673,6 +693,27 @@ unsigned GISelKnownBits::computeNumSignBits(Register R,
MI.getOperand(3).getReg(), DemandedElts,
Depth + 1);
}
+ case TargetOpcode::G_SADDO:
+ case TargetOpcode::G_SADDE:
+ case TargetOpcode::G_UADDO:
+ case TargetOpcode::G_UADDE:
+ case TargetOpcode::G_SSUBO:
+ case TargetOpcode::G_SSUBE:
+ case TargetOpcode::G_USUBO:
+ case TargetOpcode::G_USUBE:
+ case TargetOpcode::G_SMULO:
+ case TargetOpcode::G_UMULO: {
+ // If compares returns 0/-1, all bits are sign bits.
+ // We know that we have an integer-based boolean since these operations
+ // are only available for integer.
+ if (MI.getOperand(1).getReg() == R) {
+ if (TL.getBooleanContents(DstTy.isVector(), false) ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent)
+ return TyBits;
+ }
+
+ break;
+ }
case TargetOpcode::G_INTRINSIC:
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
default: {
diff --git a/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp b/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp
index 252b931602c6..efcc40641ea8 100644
--- a/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp
@@ -11,7 +11,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/InitializePasses.h"
-#include "llvm/PassRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 6d415c9c7f90..a2af66d28f4a 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -16,10 +16,11 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
-#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
+#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"
@@ -47,7 +48,6 @@
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
@@ -78,7 +78,6 @@
#include "llvm/Transforms/Utils/MemoryOpRemark.h"
#include <algorithm>
#include <cassert>
-#include <cstddef>
#include <cstdint>
#include <iterator>
#include <string>
@@ -1818,7 +1817,7 @@ static unsigned getConstrainedOpcode(Intrinsic::ID ID) {
bool IRTranslator::translateConstrainedFPIntrinsic(
const ConstrainedFPIntrinsic &FPI, MachineIRBuilder &MIRBuilder) {
- fp::ExceptionBehavior EB = FPI.getExceptionBehavior().getValue();
+ fp::ExceptionBehavior EB = *FPI.getExceptionBehavior();
unsigned Opcode = getConstrainedOpcode(FPI.getIntrinsicID());
if (!Opcode)
@@ -2252,6 +2251,23 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
Info.OrigRet = {Register(), Type::getVoidTy(CI.getContext()), 0};
return CLI->lowerCall(MIRBuilder, Info);
}
+ case Intrinsic::fptrunc_round: {
+ unsigned Flags = MachineInstr::copyFlagsFromInstruction(CI);
+
+ // Convert the metadata argument to a constant integer
+ Metadata *MD = cast<MetadataAsValue>(CI.getArgOperand(1))->getMetadata();
+ Optional<RoundingMode> RoundMode =
+ convertStrToRoundingMode(cast<MDString>(MD)->getString());
+
+ // Add the Rounding mode as an integer
+ MIRBuilder
+ .buildInstr(TargetOpcode::G_INTRINSIC_FPTRUNC_ROUND,
+ {getOrCreateVReg(CI)},
+ {getOrCreateVReg(*CI.getArgOperand(0))}, Flags)
+ .addImm((int)*RoundMode);
+
+ return true;
+ }
#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
case Intrinsic::INTRINSIC:
#include "llvm/IR/ConstrainedOps.def"
@@ -2409,7 +2425,7 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
TargetLowering::IntrinsicInfo Info;
// TODO: Add a GlobalISel version of getTgtMemIntrinsic.
if (TLI.getTgtMemIntrinsic(Info, CI, *MF, ID)) {
- Align Alignment = Info.align.getValueOr(
+ Align Alignment = Info.align.value_or(
DL->getABITypeAlign(Info.memVT.getTypeForEVT(F->getContext())));
LLT MemTy = Info.memVT.isSimple()
? getLLTForMVT(Info.memVT.getSimpleVT())
@@ -2934,15 +2950,6 @@ void IRTranslator::finishPendingPhis() {
}
}
-bool IRTranslator::valueIsSplit(const Value &V,
- SmallVectorImpl<uint64_t> *Offsets) {
- SmallVector<LLT, 4> SplitTys;
- if (Offsets && !Offsets->empty())
- Offsets->clear();
- computeValueLLTs(*DL, *V.getType(), SplitTys, Offsets);
- return SplitTys.size() > 1;
-}
-
bool IRTranslator::translate(const Instruction &Inst) {
CurBuilder->setDebugLoc(Inst.getDebugLoc());
@@ -2984,7 +2991,7 @@ bool IRTranslator::translate(const Constant &C, Register Reg) {
// Return the scalar if it is a <1 x Ty> vector.
unsigned NumElts = CAZ->getElementCount().getFixedValue();
if (NumElts == 1)
- return translateCopy(C, *CAZ->getElementValue(0u), *EntryBuilder.get());
+ return translateCopy(C, *CAZ->getElementValue(0u), *EntryBuilder);
SmallVector<Register, 4> Ops;
for (unsigned I = 0; I < NumElts; ++I) {
Constant &Elt = *CAZ->getElementValue(I);
@@ -2994,8 +3001,7 @@ bool IRTranslator::translate(const Constant &C, Register Reg) {
} else if (auto CV = dyn_cast<ConstantDataVector>(&C)) {
// Return the scalar if it is a <1 x Ty> vector.
if (CV->getNumElements() == 1)
- return translateCopy(C, *CV->getElementAsConstant(0),
- *EntryBuilder.get());
+ return translateCopy(C, *CV->getElementAsConstant(0), *EntryBuilder);
SmallVector<Register, 4> Ops;
for (unsigned i = 0; i < CV->getNumElements(); ++i) {
Constant &Elt = *CV->getElementAsConstant(i);
@@ -3013,7 +3019,7 @@ bool IRTranslator::translate(const Constant &C, Register Reg) {
}
} else if (auto CV = dyn_cast<ConstantVector>(&C)) {
if (CV->getNumOperands() == 1)
- return translateCopy(C, *CV->getOperand(0), *EntryBuilder.get());
+ return translateCopy(C, *CV->getOperand(0), *EntryBuilder);
SmallVector<Register, 4> Ops;
for (unsigned i = 0; i < CV->getNumOperands(); ++i) {
Ops.push_back(getOrCreateVReg(*CV->getOperand(i)));
@@ -3255,14 +3261,13 @@ bool IRTranslator::emitSPDescriptorFailure(StackProtectorDescriptor &SPD,
return false;
}
- // On PS4, the "return address" must still be within the calling function,
- // even if it's at the very end, so emit an explicit TRAP here.
- // Passing 'true' for doesNotReturn above won't generate the trap for us.
+ // On PS4/PS5, the "return address" must still be within the calling
+ // function, even if it's at the very end, so emit an explicit TRAP here.
// WebAssembly needs an unreachable instruction after a non-returning call,
// because the function return type can be different from __stack_chk_fail's
// return type (void).
const TargetMachine &TM = MF->getTarget();
- if (TM.getTargetTriple().isPS4CPU() || TM.getTargetTriple().isWasm()) {
+ if (TM.getTargetTriple().isPS() || TM.getTargetTriple().isWasm()) {
LLVM_DEBUG(dbgs() << "Unhandled trap emission for stack protector fail\n");
return false;
}
@@ -3413,7 +3418,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
}
}
- if (!CLI->lowerFormalArguments(*EntryBuilder.get(), F, VRegArgs, FuncInfo)) {
+ if (!CLI->lowerFormalArguments(*EntryBuilder, F, VRegArgs, FuncInfo)) {
OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
F.getSubprogram(), &F.getEntryBlock());
R << "unable to lower arguments: " << ore::NV("Prototype", F.getType());
@@ -3469,8 +3474,13 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
return false;
}
- if (!finalizeBasicBlock(*BB, MBB))
+ if (!finalizeBasicBlock(*BB, MBB)) {
+ OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
+ BB->getTerminator()->getDebugLoc(), BB);
+ R << "unable to translate basic block";
+ reportTranslationError(*MF, *TPC, *ORE, R);
return false;
+ }
}
#ifndef NDEBUG
WrapperObserver.removeObserver(&Verifier);
diff --git a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
index e5f95ca5aa73..95ae8383b6fa 100644
--- a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
@@ -12,15 +12,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"
-#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
-#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#define DEBUG_TYPE "inline-asm-lowering"
@@ -150,6 +145,7 @@ static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
case TargetLowering::C_RegisterClass:
return 2;
case TargetLowering::C_Memory:
+ case TargetLowering::C_Address:
return 3;
}
llvm_unreachable("Invalid constraint type");
@@ -310,7 +306,7 @@ bool InlineAsmLowering::lowerInlineAsm(
// If this is an indirect operand, the operand is a pointer to the
// accessed type.
if (OpInfo.isIndirect) {
- OpTy = Call.getAttributes().getParamElementType(ArgNo);
+ OpTy = Call.getParamElementType(ArgNo);
assert(OpTy && "Indirect operand must have elementtype attribute");
}
@@ -649,6 +645,8 @@ bool InlineAsmLowering::lowerInlineAsm(
return false;
case TargetLowering::C_Memory:
break; // Already handled.
+ case TargetLowering::C_Address:
+ break; // Silence warning.
case TargetLowering::C_Unknown:
LLVM_DEBUG(dbgs() << "Unexpected unknown constraint\n");
return false;
diff --git a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
index 2bb5addefe48..28f3b425c67d 100644
--- a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
@@ -12,8 +12,6 @@
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/ScopeExit.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
@@ -23,14 +21,13 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/config.h"
-#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/CodeGenCoverage.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetMachine.h"
diff --git a/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
index 1d0c106fd5db..8959d215ecd1 100644
--- a/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -13,16 +13,9 @@
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cassert>
#define DEBUG_TYPE "instructionselector"
@@ -66,6 +59,10 @@ bool InstructionSelector::isObviouslySafeToFold(MachineInstr &MI,
std::next(MI.getIterator()) == IntoMI.getIterator())
return true;
+ // Convergent instructions cannot be moved in the CFG.
+ if (MI.isConvergent() && MI.getParent() != IntoMI.getParent())
+ return false;
+
return !MI.mayLoadOrStore() && !MI.mayRaiseFPException() &&
!MI.hasUnmodeledSideEffects() && MI.implicit_operands().empty();
}
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
index 1f0738a8d9d2..54a82cac95d5 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
@@ -188,6 +188,13 @@ LegalityPredicate LegalityPredicates::memSizeInBytesNotPow2(unsigned MMOIdx) {
};
}
+LegalityPredicate LegalityPredicates::memSizeNotByteSizePow2(unsigned MMOIdx) {
+ return [=](const LegalityQuery &Query) {
+ const LLT MemTy = Query.MMODescrs[MMOIdx].MemoryTy;
+ return !MemTy.isByteSized() || !isPowerOf2_32(MemTy.getSizeInBytes());
+ };
+}
+
LegalityPredicate LegalityPredicates::numElementsNotPow2(unsigned TypeIdx) {
return [=](const LegalityQuery &Query) {
const LLT QueryTy = Query.Types[TypeIdx];
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
index 75b7fcb5663a..25c1db91b05d 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
@@ -43,6 +43,27 @@ LegalizeMutation LegalizeMutations::changeElementTo(unsigned TypeIdx,
};
}
+LegalizeMutation LegalizeMutations::changeElementCountTo(unsigned TypeIdx,
+ unsigned FromTypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ const LLT OldTy = Query.Types[TypeIdx];
+ const LLT NewTy = Query.Types[FromTypeIdx];
+ ElementCount NewEltCount =
+ NewTy.isVector() ? NewTy.getElementCount() : ElementCount::getFixed(1);
+ return std::make_pair(TypeIdx, OldTy.changeElementCount(NewEltCount));
+ };
+}
+
+LegalizeMutation LegalizeMutations::changeElementCountTo(unsigned TypeIdx,
+ LLT NewEltTy) {
+ return [=](const LegalityQuery &Query) {
+ const LLT OldTy = Query.Types[TypeIdx];
+ ElementCount NewEltCount = NewEltTy.isVector() ? NewEltTy.getElementCount()
+ : ElementCount::getFixed(1);
+ return std::make_pair(TypeIdx, OldTy.changeElementCount(NewEltCount));
+ };
+}
+
LegalizeMutation LegalizeMutations::changeElementSizeTo(unsigned TypeIdx,
unsigned FromTypeIdx) {
return [=](const LegalityQuery &Query) {
diff --git a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
index 0ab4a7f64840..f09e5b7ce783 100644
--- a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -14,7 +14,7 @@
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
@@ -24,15 +24,11 @@
#include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Error.h"
-#include "llvm/Target/TargetMachine.h"
-
-#include <iterator>
#define DEBUG_TYPE "legalizer"
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 37bc8a65dc7c..fb046d519ac8 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -15,10 +15,13 @@
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -1611,40 +1614,6 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
return Legalized;
}
-Register LegalizerHelper::widenWithUnmerge(LLT WideTy, Register OrigReg) {
- Register WideReg = MRI.createGenericVirtualRegister(WideTy);
- LLT OrigTy = MRI.getType(OrigReg);
- LLT LCMTy = getLCMType(WideTy, OrigTy);
-
- const int NumMergeParts = LCMTy.getSizeInBits() / WideTy.getSizeInBits();
- const int NumUnmergeParts = LCMTy.getSizeInBits() / OrigTy.getSizeInBits();
-
- Register UnmergeSrc = WideReg;
-
- // Create a merge to the LCM type, padding with undef
- // %0:_(<3 x s32>) = G_FOO => <4 x s32>
- // =>
- // %1:_(<4 x s32>) = G_FOO
- // %2:_(<4 x s32>) = G_IMPLICIT_DEF
- // %3:_(<12 x s32>) = G_CONCAT_VECTORS %1, %2, %2
- // %0:_(<3 x s32>), %4:_, %5:_, %6:_ = G_UNMERGE_VALUES %3
- if (NumMergeParts > 1) {
- Register Undef = MIRBuilder.buildUndef(WideTy).getReg(0);
- SmallVector<Register, 8> MergeParts(NumMergeParts, Undef);
- MergeParts[0] = WideReg;
- UnmergeSrc = MIRBuilder.buildMerge(LCMTy, MergeParts).getReg(0);
- }
-
- // Unmerge to the original register and pad with dead defs.
- SmallVector<Register, 8> UnmergeResults(NumUnmergeParts);
- UnmergeResults[0] = OrigReg;
- for (int I = 1; I != NumUnmergeParts; ++I)
- UnmergeResults[I] = MRI.createGenericVirtualRegister(OrigTy);
-
- MIRBuilder.buildUnmerge(UnmergeResults, UnmergeSrc);
- return WideReg;
-}
-
LegalizerHelper::LegalizeResult
LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
LLT WideTy) {
@@ -1867,9 +1836,6 @@ LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
LegalizerHelper::LegalizeResult
LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
LLT WideTy) {
- if (TypeIdx == 1)
- return UnableToLegalize; // TODO
-
unsigned Opcode;
unsigned ExtOpcode;
Optional<Register> CarryIn = None;
@@ -1914,6 +1880,18 @@ LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
break;
}
+ if (TypeIdx == 1) {
+ unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
+
+ Observer.changingInstr(MI);
+ widenScalarDst(MI, WideTy, 1);
+ if (CarryIn)
+ widenScalarSrc(MI, WideTy, 4, BoolExtOp);
+
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
// Do the arithmetic in the larger type.
@@ -1985,8 +1963,12 @@ LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
LegalizerHelper::LegalizeResult
LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
LLT WideTy) {
- if (TypeIdx == 1)
- return UnableToLegalize;
+ if (TypeIdx == 1) {
+ Observer.changingInstr(MI);
+ widenScalarDst(MI, WideTy, 1);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
Register Result = MI.getOperand(0).getReg();
@@ -2992,7 +2974,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
if (isa<GSExtLoad>(LoadMI)) {
auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
- } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == DstTy) {
+ } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
// The extra bits are guaranteed to be zero, since we stored them that
// way. A zext load from Wide thus automatically gives zext from MemVT.
@@ -3314,7 +3296,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
Observer.changingInstr(MI);
const auto &TII = MIRBuilder.getTII();
MI.setDesc(TII.get(TargetOpcode::G_MUL));
- MI.RemoveOperand(1);
+ MI.removeOperand(1);
Observer.changedInstr(MI);
auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
@@ -4096,13 +4078,14 @@ LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx,
// is a load, return the new registers in ValRegs. For a store, each elements
// of ValRegs should be PartTy. Returns the next offset that needs to be
// handled.
+ bool isBigEndian = MIRBuilder.getDataLayout().isBigEndian();
auto MMO = LdStMI.getMMO();
auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
- unsigned Offset) -> unsigned {
+ unsigned NumParts, unsigned Offset) -> unsigned {
MachineFunction &MF = MIRBuilder.getMF();
unsigned PartSize = PartTy.getSizeInBits();
for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
- Offset += PartSize, ++Idx) {
+ ++Idx) {
unsigned ByteOffset = Offset / 8;
Register NewAddrReg;
@@ -4118,16 +4101,19 @@ LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx,
} else {
MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
}
+ Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
}
return Offset;
};
- unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0);
+ unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
+ unsigned HandledOffset =
+ splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
// Handle the rest of the register if this isn't an even type breakdown.
if (LeftoverTy.isValid())
- splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset);
+ splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
if (IsLoad) {
insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
@@ -4236,6 +4222,14 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_INTTOPTR:
case G_PTRTOINT:
case G_ADDRSPACE_CAST:
+ case G_UADDO:
+ case G_USUBO:
+ case G_UADDE:
+ case G_USUBE:
+ case G_SADDO:
+ case G_SSUBO:
+ case G_SADDE:
+ case G_SSUBE:
return fewerElementsVectorMultiEltType(GMI, NumElts);
case G_ICMP:
case G_FCMP:
@@ -4882,10 +4876,26 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
moreElementsVectorDst(MI, MoreTy, 0);
Observer.changedInstr(MI);
return Legalized;
- case TargetOpcode::G_SELECT:
- if (TypeIdx != 0)
- return UnableToLegalize;
- if (MRI.getType(MI.getOperand(1).getReg()).isVector())
+ case TargetOpcode::G_SELECT: {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register CondReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ LLT CondTy = MRI.getType(CondReg);
+ if (TypeIdx == 1) {
+ if (!CondTy.isScalar() ||
+ DstTy.getElementCount() != MoreTy.getElementCount())
+ return UnableToLegalize;
+
+ // This is turning a scalar select of vectors into a vector
+ // select. Broadcast the select condition.
+ auto ShufSplat = MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(ShufSplat.getReg(0));
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
+ if (CondTy.isVector())
return UnableToLegalize;
Observer.changingInstr(MI);
@@ -4894,6 +4904,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
moreElementsVectorDst(MI, MoreTy, 0);
Observer.changedInstr(MI);
return Legalized;
+ }
case TargetOpcode::G_UNMERGE_VALUES:
return UnableToLegalize;
case TargetOpcode::G_PHI:
@@ -7229,25 +7240,32 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) {
Register Op2Reg = MI.getOperand(3).getReg();
LLT DstTy = MRI.getType(DstReg);
LLT MaskTy = MRI.getType(MaskReg);
- LLT Op1Ty = MRI.getType(Op1Reg);
if (!DstTy.isVector())
return UnableToLegalize;
- // Vector selects can have a scalar predicate. If so, splat into a vector and
- // finish for later legalization attempts to try again.
if (MaskTy.isScalar()) {
+ // Turn the scalar condition into a vector condition mask.
+
Register MaskElt = MaskReg;
- if (MaskTy.getSizeInBits() < DstTy.getScalarSizeInBits())
- MaskElt = MIRBuilder.buildSExt(DstTy.getElementType(), MaskElt).getReg(0);
- // Generate a vector splat idiom to be pattern matched later.
+
+ // The condition was potentially zero extended before, but we want a sign
+ // extended boolean.
+ if (MaskTy.getSizeInBits() <= DstTy.getScalarSizeInBits() &&
+ MaskTy != LLT::scalar(1)) {
+ MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
+ }
+
+ // Continue the sign extension (or truncate) to match the data type.
+ MaskElt = MIRBuilder.buildSExtOrTrunc(DstTy.getElementType(),
+ MaskElt).getReg(0);
+
+ // Generate a vector splat idiom.
auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
- Observer.changingInstr(MI);
- MI.getOperand(1).setReg(ShufSplat.getReg(0));
- Observer.changedInstr(MI);
- return Legalized;
+ MaskReg = ShufSplat.getReg(0);
+ MaskTy = DstTy;
}
- if (MaskTy.getSizeInBits() != Op1Ty.getSizeInBits()) {
+ if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
return UnableToLegalize;
}
@@ -7414,7 +7432,7 @@ static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
unsigned NumBits = Ty.getScalarSizeInBits();
auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
if (!Ty.isVector() && ValVRegAndVal) {
- APInt Scalar = ValVRegAndVal->Value.truncOrSelf(8);
+ APInt Scalar = ValVRegAndVal->Value.trunc(8);
APInt SplatVal = APInt::getSplat(NumBits, Scalar);
return MIB.buildConstant(Ty, SplatVal).getReg(0);
}
@@ -7569,7 +7587,7 @@ LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
// See if this is a constant length copy
auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
// FIXME: support dynamically sized G_MEMCPY_INLINE
- assert(LenVRegAndVal.hasValue() &&
+ assert(LenVRegAndVal &&
"inline memcpy with dynamic size is not yet supported");
uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
if (KnownLen == 0) {
@@ -7609,7 +7627,7 @@ LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
bool DstAlignCanChange = false;
MachineFrameInfo &MFI = MF.getFrameInfo();
- Align Alignment = commonAlignment(DstAlign, SrcAlign);
+ Align Alignment = std::min(DstAlign, SrcAlign);
MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
@@ -7644,7 +7662,7 @@ LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (!TRI->hasStackRealignment(MF))
while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
- NewAlign = NewAlign / 2;
+ NewAlign = NewAlign.previous();
if (NewAlign > Alignment) {
Alignment = NewAlign;
@@ -7717,7 +7735,7 @@ LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
bool DstAlignCanChange = false;
MachineFrameInfo &MFI = MF.getFrameInfo();
bool OptSize = shouldLowerMemFuncForSize(MF);
- Align Alignment = commonAlignment(DstAlign, SrcAlign);
+ Align Alignment = std::min(DstAlign, SrcAlign);
MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
@@ -7752,7 +7770,7 @@ LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (!TRI->hasStackRealignment(MF))
while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
- NewAlign = NewAlign / 2;
+ NewAlign = NewAlign.previous();
if (NewAlign > Alignment) {
Alignment = NewAlign;
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
index 30697913a6a4..6adb7ddb5b66 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -13,7 +13,6 @@
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/ADT/SmallBitVector.h"
-#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -23,9 +22,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LowLevelTypeImpl.h"
-#include "llvm/Support/MathExtras.h"
#include <algorithm>
-#include <map>
using namespace llvm;
using namespace LegalizeActions;
@@ -132,15 +129,16 @@ static bool mutationIsSane(const LegalizeRule &Rule,
LLVM_FALLTHROUGH;
case MoreElements: {
// MoreElements can go from scalar to vector.
- const unsigned OldElts = OldTy.isVector() ? OldTy.getNumElements() : 1;
+ const ElementCount OldElts = OldTy.isVector() ?
+ OldTy.getElementCount() : ElementCount::getFixed(1);
if (NewTy.isVector()) {
if (Rule.getAction() == FewerElements) {
// Make sure the element count really decreased.
- if (NewTy.getNumElements() >= OldElts)
+ if (ElementCount::isKnownGE(NewTy.getElementCount(), OldElts))
return false;
} else {
// Make sure the element count really increased.
- if (NewTy.getNumElements() <= OldElts)
+ if (ElementCount::isKnownLE(NewTy.getElementCount(), OldElts))
return false;
}
} else if (Rule.getAction() == MoreElements)
diff --git a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
index de8dbd456901..d4fbf7d15089 100644
--- a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
@@ -73,6 +73,7 @@ void LoadStoreOpt::init(MachineFunction &MF) {
void LoadStoreOpt::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AAResultsWrapperPass>();
+ AU.setPreservesAll();
getSelectionDAGFallbackAnalysisUsage(AU);
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -508,6 +509,12 @@ bool LoadStoreOpt::addStoreToCandidate(GStore &StoreMI,
if (StoreMI.getMemSizeInBits() != ValueTy.getSizeInBits())
return false;
+ // Avoid adding volatile or ordered stores to the candidate. We already have a
+ // check for this in instMayAlias() but that only get's called later between
+ // potential aliasing hazards.
+ if (!StoreMI.isSimple())
+ return false;
+
Register StoreAddr = StoreMI.getPointerReg();
auto BIO = getPointerInfo(StoreAddr, *MRI);
Register StoreBase = BIO.BaseReg;
diff --git a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
index 328a278f3d68..c1287693e74d 100644
--- a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
@@ -13,6 +13,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/InitializePasses.h"
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index c6720568b362..19ebf46191a9 100644
--- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -9,8 +9,6 @@
/// This file implements the MachineIRBuidler class.
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
-#include "llvm/Analysis/MemoryLocation.h"
-#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -19,7 +17,7 @@
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
using namespace llvm;
@@ -568,47 +566,6 @@ MachineInstrBuilder MachineIRBuilder::buildExtract(const DstOp &Dst,
return Extract;
}
-void MachineIRBuilder::buildSequence(Register Res, ArrayRef<Register> Ops,
- ArrayRef<uint64_t> Indices) {
-#ifndef NDEBUG
- assert(Ops.size() == Indices.size() && "incompatible args");
- assert(!Ops.empty() && "invalid trivial sequence");
- assert(llvm::is_sorted(Indices) &&
- "sequence offsets must be in ascending order");
-
- assert(getMRI()->getType(Res).isValid() && "invalid operand type");
- for (auto Op : Ops)
- assert(getMRI()->getType(Op).isValid() && "invalid operand type");
-#endif
-
- LLT ResTy = getMRI()->getType(Res);
- LLT OpTy = getMRI()->getType(Ops[0]);
- unsigned OpSize = OpTy.getSizeInBits();
- bool MaybeMerge = true;
- for (unsigned i = 0; i < Ops.size(); ++i) {
- if (getMRI()->getType(Ops[i]) != OpTy || Indices[i] != i * OpSize) {
- MaybeMerge = false;
- break;
- }
- }
-
- if (MaybeMerge && Ops.size() * OpSize == ResTy.getSizeInBits()) {
- buildMerge(Res, Ops);
- return;
- }
-
- Register ResIn = getMRI()->createGenericVirtualRegister(ResTy);
- buildUndef(ResIn);
-
- for (unsigned i = 0; i < Ops.size(); ++i) {
- Register ResOut = i + 1 == Ops.size()
- ? Res
- : getMRI()->createGenericVirtualRegister(ResTy);
- buildInsert(ResOut, ResIn, Ops[i], Indices[i]);
- ResIn = ResOut;
- }
-}
-
MachineInstrBuilder MachineIRBuilder::buildUndef(const DstOp &Res) {
return buildInstr(TargetOpcode::G_IMPLICIT_DEF, {Res}, {});
}
@@ -666,6 +623,17 @@ MachineInstrBuilder MachineIRBuilder::buildBuildVector(const DstOp &Res,
return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec);
}
+MachineInstrBuilder
+MachineIRBuilder::buildBuildVectorConstant(const DstOp &Res,
+ ArrayRef<APInt> Ops) {
+ SmallVector<SrcOp> TmpVec;
+ TmpVec.reserve(Ops.size());
+ LLT EltTy = Res.getLLTTy(*getMRI()).getElementType();
+ for (auto &Op : Ops)
+ TmpVec.push_back(buildConstant(EltTy, Op));
+ return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec);
+}
+
MachineInstrBuilder MachineIRBuilder::buildSplatVector(const DstOp &Res,
const SrcOp &Src) {
SmallVector<SrcOp, 8> TmpVec(Res.getLLTTy(*getMRI()).getNumElements(), Src);
diff --git a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index 01af6bb51bb7..bce850ee212c 100644
--- a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -14,8 +14,6 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
@@ -25,12 +23,13 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterBank.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
-#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -631,7 +630,8 @@ bool RegBankSelect::assignInstr(MachineInstr &MI) {
"Unexpected hint opcode!");
// The only correct mapping for these is to always use the source register
// bank.
- const RegisterBank *RB = MRI->getRegBankOrNull(MI.getOperand(1).getReg());
+ const RegisterBank *RB =
+ RBI->getRegBank(MI.getOperand(1).getReg(), *MRI, *TRI);
// We can assume every instruction above this one has a selected register
// bank.
assert(RB && "Expected source register to have a register bank?");
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 544af9a2954f..7781761bc131 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -16,14 +16,14 @@
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
-#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
-#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSizeOpts.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
#include "llvm/CodeGen/StackProtector.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -31,6 +31,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
#define DEBUG_TYPE "globalisel-utils"
@@ -56,6 +57,11 @@ Register llvm::constrainOperandRegClass(
// Assume physical registers are properly constrained.
assert(Register::isVirtualRegister(Reg) && "PhysReg not implemented");
+ // Save the old register class to check whether
+ // the change notifications will be required.
+ // TODO: A better approach would be to pass
+ // the observers to constrainRegToClass().
+ auto *OldRegClass = MRI.getRegClassOrNull(Reg);
Register ConstrainedReg = constrainRegToClass(MRI, TII, RBI, Reg, RegClass);
// If we created a new virtual register because the class is not compatible
// then create a copy between the new and the old register.
@@ -81,7 +87,7 @@ Register llvm::constrainOperandRegClass(
if (GISelChangeObserver *Observer = MF.getObserver()) {
Observer->changedInstr(*RegMO.getParent());
}
- } else {
+ } else if (OldRegClass != MRI.getRegClassOrNull(Reg)) {
if (GISelChangeObserver *Observer = MF.getObserver()) {
if (!RegMO.isDef()) {
MachineInstr *RegDef = MRI.getVRegDef(Reg);
@@ -500,6 +506,7 @@ Optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode, const Register Op1,
default:
break;
case TargetOpcode::G_ADD:
+ case TargetOpcode::G_PTR_ADD:
return C1 + C2;
case TargetOpcode::G_AND:
return C1 & C2;
@@ -533,6 +540,14 @@ Optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode, const Register Op1,
if (!C2.getBoolValue())
break;
return C1.srem(C2);
+ case TargetOpcode::G_SMIN:
+ return APIntOps::smin(C1, C2);
+ case TargetOpcode::G_SMAX:
+ return APIntOps::smax(C1, C2);
+ case TargetOpcode::G_UMIN:
+ return APIntOps::umin(C1, C2);
+ case TargetOpcode::G_UMAX:
+ return APIntOps::umax(C1, C2);
}
return None;
@@ -592,33 +607,27 @@ Optional<APFloat> llvm::ConstantFoldFPBinOp(unsigned Opcode, const Register Op1,
return None;
}
-Register llvm::ConstantFoldVectorBinop(unsigned Opcode, const Register Op1,
- const Register Op2,
- const MachineRegisterInfo &MRI,
- MachineIRBuilder &MIB) {
+SmallVector<APInt>
+llvm::ConstantFoldVectorBinop(unsigned Opcode, const Register Op1,
+ const Register Op2,
+ const MachineRegisterInfo &MRI) {
auto *SrcVec2 = getOpcodeDef<GBuildVector>(Op2, MRI);
if (!SrcVec2)
- return Register();
+ return SmallVector<APInt>();
auto *SrcVec1 = getOpcodeDef<GBuildVector>(Op1, MRI);
if (!SrcVec1)
- return Register();
+ return SmallVector<APInt>();
- const LLT EltTy = MRI.getType(SrcVec1->getSourceReg(0));
-
- SmallVector<Register, 16> FoldedElements;
+ SmallVector<APInt> FoldedElements;
for (unsigned Idx = 0, E = SrcVec1->getNumSources(); Idx < E; ++Idx) {
auto MaybeCst = ConstantFoldBinOp(Opcode, SrcVec1->getSourceReg(Idx),
SrcVec2->getSourceReg(Idx), MRI);
if (!MaybeCst)
- return Register();
- auto FoldedCstReg = MIB.buildConstant(EltTy, *MaybeCst).getReg(0);
- FoldedElements.emplace_back(FoldedCstReg);
+ return SmallVector<APInt>();
+ FoldedElements.push_back(*MaybeCst);
}
- // Create the new vector constant.
- auto CstVec =
- MIB.buildBuildVector(MRI.getType(SrcVec1->getReg(0)), FoldedElements);
- return CstVec.getReg(0);
+ return FoldedElements;
}
bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
@@ -1061,15 +1070,38 @@ bool llvm::isBuildVectorConstantSplat(const MachineInstr &MI,
AllowUndef);
}
+Optional<APInt> llvm::getIConstantSplatVal(const Register Reg,
+ const MachineRegisterInfo &MRI) {
+ if (auto SplatValAndReg =
+ getAnyConstantSplat(Reg, MRI, /* AllowUndef */ false)) {
+ Optional<ValueAndVReg> ValAndVReg =
+ getIConstantVRegValWithLookThrough(SplatValAndReg->VReg, MRI);
+ return ValAndVReg->Value;
+ }
+
+ return None;
+}
+
+Optional<APInt> getIConstantSplatVal(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+ return getIConstantSplatVal(MI.getOperand(0).getReg(), MRI);
+}
+
Optional<int64_t>
-llvm::getBuildVectorConstantSplat(const MachineInstr &MI,
- const MachineRegisterInfo &MRI) {
+llvm::getIConstantSplatSExtVal(const Register Reg,
+ const MachineRegisterInfo &MRI) {
if (auto SplatValAndReg =
- getAnyConstantSplat(MI.getOperand(0).getReg(), MRI, false))
+ getAnyConstantSplat(Reg, MRI, /* AllowUndef */ false))
return getIConstantVRegSExtVal(SplatValAndReg->VReg, MRI);
return None;
}
+Optional<int64_t>
+llvm::getIConstantSplatSExtVal(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+ return getIConstantSplatSExtVal(MI.getOperand(0).getReg(), MRI);
+}
+
Optional<FPValueAndVReg> llvm::getFConstantSplat(Register VReg,
const MachineRegisterInfo &MRI,
bool AllowUndef) {
@@ -1095,7 +1127,7 @@ Optional<RegOrConstant> llvm::getVectorSplat(const MachineInstr &MI,
unsigned Opc = MI.getOpcode();
if (!isBuildVectorOp(Opc))
return None;
- if (auto Splat = getBuildVectorConstantSplat(MI, MRI))
+ if (auto Splat = getIConstantSplatSExtVal(MI, MRI))
return RegOrConstant(*Splat);
auto Reg = MI.getOperand(1).getReg();
if (any_of(make_range(MI.operands_begin() + 2, MI.operands_end()),
@@ -1104,6 +1136,26 @@ Optional<RegOrConstant> llvm::getVectorSplat(const MachineInstr &MI,
return RegOrConstant(Reg);
}
+static bool isConstantScalar(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ bool AllowFP = true,
+ bool AllowOpaqueConstants = true) {
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_CONSTANT:
+ case TargetOpcode::G_IMPLICIT_DEF:
+ return true;
+ case TargetOpcode::G_FCONSTANT:
+ return AllowFP;
+ case TargetOpcode::G_GLOBAL_VALUE:
+ case TargetOpcode::G_FRAME_INDEX:
+ case TargetOpcode::G_BLOCK_ADDR:
+ case TargetOpcode::G_JUMP_TABLE:
+ return AllowOpaqueConstants;
+ default:
+ return false;
+ }
+}
+
bool llvm::isConstantOrConstantVector(MachineInstr &MI,
const MachineRegisterInfo &MRI) {
Register Def = MI.getOperand(0).getReg();
@@ -1121,19 +1173,71 @@ bool llvm::isConstantOrConstantVector(MachineInstr &MI,
return true;
}
+bool llvm::isConstantOrConstantVector(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ bool AllowFP, bool AllowOpaqueConstants) {
+ if (isConstantScalar(MI, MRI, AllowFP, AllowOpaqueConstants))
+ return true;
+
+ if (!isBuildVectorOp(MI.getOpcode()))
+ return false;
+
+ const unsigned NumOps = MI.getNumOperands();
+ for (unsigned I = 1; I != NumOps; ++I) {
+ const MachineInstr *ElementDef = MRI.getVRegDef(MI.getOperand(I).getReg());
+ if (!isConstantScalar(*ElementDef, MRI, AllowFP, AllowOpaqueConstants))
+ return false;
+ }
+
+ return true;
+}
+
Optional<APInt>
llvm::isConstantOrConstantSplatVector(MachineInstr &MI,
const MachineRegisterInfo &MRI) {
Register Def = MI.getOperand(0).getReg();
if (auto C = getIConstantVRegValWithLookThrough(Def, MRI))
return C->Value;
- auto MaybeCst = getBuildVectorConstantSplat(MI, MRI);
+ auto MaybeCst = getIConstantSplatSExtVal(MI, MRI);
if (!MaybeCst)
return None;
const unsigned ScalarSize = MRI.getType(Def).getScalarSizeInBits();
return APInt(ScalarSize, *MaybeCst, true);
}
+bool llvm::isNullOrNullSplat(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI, bool AllowUndefs) {
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_IMPLICIT_DEF:
+ return AllowUndefs;
+ case TargetOpcode::G_CONSTANT:
+ return MI.getOperand(1).getCImm()->isNullValue();
+ case TargetOpcode::G_FCONSTANT: {
+ const ConstantFP *FPImm = MI.getOperand(1).getFPImm();
+ return FPImm->isZero() && !FPImm->isNegative();
+ }
+ default:
+ if (!AllowUndefs) // TODO: isBuildVectorAllZeros assumes undef is OK already
+ return false;
+ return isBuildVectorAllZeros(MI, MRI);
+ }
+}
+
+bool llvm::isAllOnesOrAllOnesSplat(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ bool AllowUndefs) {
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_IMPLICIT_DEF:
+ return AllowUndefs;
+ case TargetOpcode::G_CONSTANT:
+ return MI.getOperand(1).getCImm()->isAllOnesValue();
+ default:
+ if (!AllowUndefs) // TODO: isBuildVectorAllOnes assumes undef is OK already
+ return false;
+ return isBuildVectorAllOnes(MI, MRI);
+ }
+}
+
bool llvm::matchUnaryPredicate(
const MachineRegisterInfo &MRI, Register Reg,
std::function<bool(const Constant *ConstVal)> Match, bool AllowUndefs) {
diff --git a/llvm/lib/CodeGen/GlobalMerge.cpp b/llvm/lib/CodeGen/GlobalMerge.cpp
index bbd9006a5d8c..f5833d3b9086 100644
--- a/llvm/lib/CodeGen/GlobalMerge.cpp
+++ b/llvm/lib/CodeGen/GlobalMerge.cpp
@@ -592,6 +592,13 @@ void GlobalMerge::setMustKeepGlobalVariables(Module &M) {
if (const GlobalVariable *GV =
dyn_cast<GlobalVariable>(U->stripPointerCasts()))
MustKeepGlobalVariables.insert(GV);
+ else if (const ConstantArray *CA = dyn_cast<ConstantArray>(U->stripPointerCasts())) {
+ for (const Use &Elt : CA->operands()) {
+ if (const GlobalVariable *GV =
+ dyn_cast<GlobalVariable>(Elt->stripPointerCasts()))
+ MustKeepGlobalVariables.insert(GV);
+ }
+ }
}
}
}
@@ -609,6 +616,13 @@ bool GlobalMerge::doInitialization(Module &M) {
bool Changed = false;
setMustKeepGlobalVariables(M);
+ LLVM_DEBUG({
+ dbgs() << "Number of GV that must be kept: " <<
+ MustKeepGlobalVariables.size() << "\n";
+ for (auto KeptGV = MustKeepGlobalVariables.begin();
+ KeptGV != MustKeepGlobalVariables.end(); KeptGV++)
+ dbgs() << "Kept: " << **KeptGV << "\n";
+ });
// Grab all non-const globals.
for (auto &GV : M.globals()) {
// Merge is safe for "normal" internal or external globals only
diff --git a/llvm/lib/CodeGen/HardwareLoops.cpp b/llvm/lib/CodeGen/HardwareLoops.cpp
index 83b8c2d0eacb..67d6a3df7807 100644
--- a/llvm/lib/CodeGen/HardwareLoops.cpp
+++ b/llvm/lib/CodeGen/HardwareLoops.cpp
@@ -23,10 +23,8 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
@@ -37,7 +35,6 @@
#include "llvm/PassRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
diff --git a/llvm/lib/CodeGen/IfConversion.cpp b/llvm/lib/CodeGen/IfConversion.cpp
index 1b20d1da20ad..105ab908d3fa 100644
--- a/llvm/lib/CodeGen/IfConversion.cpp
+++ b/llvm/lib/CodeGen/IfConversion.cpp
@@ -21,6 +21,7 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MBFIWrapper.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
@@ -28,16 +29,13 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MBFIWrapper.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/IR/Attributes.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/MCRegisterInfo.h"
diff --git a/llvm/lib/CodeGen/IndirectBrExpandPass.cpp b/llvm/lib/CodeGen/IndirectBrExpandPass.cpp
index 2d38a44d5a33..5be98e114673 100644
--- a/llvm/lib/CodeGen/IndirectBrExpandPass.cpp
+++ b/llvm/lib/CodeGen/IndirectBrExpandPass.cpp
@@ -32,17 +32,13 @@
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp
index c975013db8c8..06c660807c5c 100644
--- a/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -23,7 +23,6 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/CodeGen/LiveIntervalCalc.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/CodeGen/LiveStacks.h"
@@ -686,7 +685,7 @@ void InlineSpiller::reMaterializeAll() {
// Remove any values that were completely rematted.
for (Register Reg : RegsToSpill) {
LiveInterval &LI = LIS.getInterval(Reg);
- for (VNInfo *VNI : llvm::make_range(LI.vni_begin(), LI.vni_end())) {
+ for (VNInfo *VNI : LI.vnis()) {
if (VNI->isUnused() || VNI->isPHIDef() || UsedValues.count(VNI))
continue;
MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def);
@@ -839,6 +838,13 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
unsigned Idx = OpPair.second;
assert(MI == OpPair.first && "Instruction conflict during operand folding");
MachineOperand &MO = MI->getOperand(Idx);
+
+ // No point restoring an undef read, and we'll produce an invalid live
+ // interval.
+ // TODO: Is this really the correct way to handle undef tied uses?
+ if (MO.isUse() && !MO.readsReg() && !MO.isTied())
+ continue;
+
if (MO.isImplicit()) {
ImpReg = MO.getReg();
continue;
@@ -964,7 +970,7 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
if (!MO.isReg() || !MO.isImplicit())
break;
if (MO.getReg() == ImpReg)
- FoldMI->RemoveOperand(i - 1);
+ FoldMI->removeOperand(i - 1);
}
LLVM_DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MIS.end(), LIS,
@@ -1608,7 +1614,7 @@ void HoistSpillHelper::hoistAllSpills() {
for (unsigned i = RMEnt->getNumOperands(); i; --i) {
MachineOperand &MO = RMEnt->getOperand(i - 1);
if (MO.isReg() && MO.isImplicit() && MO.isDef() && !MO.isDead())
- RMEnt->RemoveOperand(i - 1);
+ RMEnt->removeOperand(i - 1);
}
}
Edit.eliminateDeadDefs(SpillsToRm, None, AA);
diff --git a/llvm/lib/CodeGen/InterferenceCache.h b/llvm/lib/CodeGen/InterferenceCache.h
index ace1691c1363..97464da9f17b 100644
--- a/llvm/lib/CodeGen/InterferenceCache.h
+++ b/llvm/lib/CodeGen/InterferenceCache.h
@@ -37,7 +37,7 @@ class LLVM_LIBRARY_VISIBILITY InterferenceCache {
SlotIndex First;
SlotIndex Last;
- BlockInterference() {}
+ BlockInterference() = default;
};
/// Entry - A cache entry containing interference information for all aliases
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 5a20580e5479..b3f38a3b53f3 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -46,6 +46,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -57,7 +58,6 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Type.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
diff --git a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
index 230c6846dde2..43858071025a 100644
--- a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
+++ b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
@@ -19,7 +19,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
@@ -31,9 +30,8 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -173,10 +171,10 @@ class Polynomial {
};
/// Number of Error Bits e
- unsigned ErrorMSBs;
+ unsigned ErrorMSBs = (unsigned)-1;
/// Value
- Value *V;
+ Value *V = nullptr;
/// Coefficient B
SmallVector<std::pair<BOps, APInt>, 4> B;
@@ -185,7 +183,7 @@ class Polynomial {
APInt A;
public:
- Polynomial(Value *V) : ErrorMSBs((unsigned)-1), V(V) {
+ Polynomial(Value *V) : V(V) {
IntegerType *Ty = dyn_cast<IntegerType>(V->getType());
if (Ty) {
ErrorMSBs = 0;
@@ -195,12 +193,12 @@ public:
}
Polynomial(const APInt &A, unsigned ErrorMSBs = 0)
- : ErrorMSBs(ErrorMSBs), V(nullptr), A(A) {}
+ : ErrorMSBs(ErrorMSBs), A(A) {}
Polynomial(unsigned BitWidth, uint64_t A, unsigned ErrorMSBs = 0)
- : ErrorMSBs(ErrorMSBs), V(nullptr), A(BitWidth, A) {}
+ : ErrorMSBs(ErrorMSBs), A(BitWidth, A) {}
- Polynomial() : ErrorMSBs((unsigned)-1), V(nullptr) {}
+ Polynomial() = default;
/// Increment and clamp the number of undefined bits.
void incErrorMSBs(unsigned amt) {
@@ -1206,9 +1204,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad,
->getNumElements();
FixedVectorType *ILTy = FixedVectorType::get(ETy, Factor * ElementsPerSVI);
- SmallVector<unsigned, 4> Indices;
- for (unsigned i = 0; i < Factor; i++)
- Indices.push_back(i);
+ auto Indices = llvm::to_vector<4>(llvm::seq<unsigned>(0, Factor));
InterleavedCost = TTI.getInterleavedMemoryOpCost(
Instruction::Load, ILTy, Factor, Indices, InsertionPoint->getAlign(),
InsertionPoint->getPointerAddressSpace(), CostKind);
@@ -1228,7 +1224,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad,
auto MSSAU = MemorySSAUpdater(&MSSA);
MemoryUse *MSSALoad = cast<MemoryUse>(MSSAU.createMemoryAccessBefore(
LI, nullptr, MSSA.getMemoryAccess(InsertionPoint)));
- MSSAU.insertUse(MSSALoad);
+ MSSAU.insertUse(MSSALoad, /*RenameUses=*/ true);
// Create the final SVIs and replace all uses.
int i = 0;
diff --git a/llvm/lib/CodeGen/JMCInstrumenter.cpp b/llvm/lib/CodeGen/JMCInstrumenter.cpp
new file mode 100644
index 000000000000..23220872b532
--- /dev/null
+++ b/llvm/lib/CodeGen/JMCInstrumenter.cpp
@@ -0,0 +1,233 @@
+//===- JMCInstrumenter.cpp - JMC Instrumentation --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// JMCInstrumenter pass:
+// - instrument each function with a call to __CheckForDebuggerJustMyCode. The
+// sole argument should be defined in .msvcjmc. Each flag is 1 byte initilized
+// to 1.
+// - create the dummy COMDAT function __JustMyCode_Default to prevent linking
+// error if __CheckForDebuggerJustMyCode is not available.
+// - For MSVC:
+// add "/alternatename:__CheckForDebuggerJustMyCode=__JustMyCode_Default" to
+// "llvm.linker.options"
+// For ELF:
+// Rename __JustMyCode_Default to __CheckForDebuggerJustMyCode and mark it as
+// weak symbol.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/DJB.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "jmc-instrument"
+
+namespace {
+struct JMCInstrumenter : public ModulePass {
+ static char ID;
+ JMCInstrumenter() : ModulePass(ID) {
+ initializeJMCInstrumenterPass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnModule(Module &M) override;
+};
+char JMCInstrumenter::ID = 0;
+} // namespace
+
+INITIALIZE_PASS(
+ JMCInstrumenter, DEBUG_TYPE,
+ "Instrument function entry with call to __CheckForDebuggerJustMyCode",
+ false, false)
+
+ModulePass *llvm::createJMCInstrumenterPass() { return new JMCInstrumenter(); }
+
+namespace {
+const char CheckFunctionName[] = "__CheckForDebuggerJustMyCode";
+
+std::string getFlagName(DISubprogram &SP, bool UseX86FastCall) {
+ // absolute windows path: windows_backslash
+ // relative windows backslash path: windows_backslash
+ // relative windows slash path: posix
+ // absolute posix path: posix
+ // relative posix path: posix
+ sys::path::Style PathStyle =
+ has_root_name(SP.getDirectory(), sys::path::Style::windows_backslash) ||
+ SP.getDirectory().contains("\\") ||
+ SP.getFilename().contains("\\")
+ ? sys::path::Style::windows_backslash
+ : sys::path::Style::posix;
+ // Best effort path normalization. This is to guarantee an unique flag symbol
+ // is produced for the same directory. Some builds may want to use relative
+ // paths, or paths with a specific prefix (see the -fdebug-compilation-dir
+ // flag), so only hash paths in debuginfo. Don't expand them to absolute
+ // paths.
+ SmallString<256> FilePath(SP.getDirectory());
+ sys::path::append(FilePath, PathStyle, SP.getFilename());
+ sys::path::native(FilePath, PathStyle);
+ sys::path::remove_dots(FilePath, /*remove_dot_dot=*/true, PathStyle);
+
+ // The naming convention for the flag name is __<hash>_<file name> with '.' in
+ // <file name> replaced with '@'. For example C:\file.any.c would have a flag
+ // __D032E919_file@any@c. The naming convention match MSVC's format however
+ // the match is not required to make JMC work. The hashing function used here
+ // is different from MSVC's.
+
+ std::string Suffix;
+ for (auto C : sys::path::filename(FilePath, PathStyle))
+ Suffix.push_back(C == '.' ? '@' : C);
+
+ sys::path::remove_filename(FilePath, PathStyle);
+ return (UseX86FastCall ? "_" : "__") +
+ utohexstr(djbHash(FilePath), /*LowerCase=*/false,
+ /*Width=*/8) +
+ "_" + Suffix;
+}
+
+void attachDebugInfo(GlobalVariable &GV, DISubprogram &SP) {
+ Module &M = *GV.getParent();
+ DICompileUnit *CU = SP.getUnit();
+ assert(CU);
+ DIBuilder DB(M, false, CU);
+
+ auto *DType =
+ DB.createBasicType("unsigned char", 8, dwarf::DW_ATE_unsigned_char,
+ llvm::DINode::FlagArtificial);
+
+ auto *DGVE = DB.createGlobalVariableExpression(
+ CU, GV.getName(), /*LinkageName=*/StringRef(), SP.getFile(),
+ /*LineNo=*/0, DType, /*IsLocalToUnit=*/true, /*IsDefined=*/true);
+ GV.addMetadata(LLVMContext::MD_dbg, *DGVE);
+ DB.finalize();
+}
+
+FunctionType *getCheckFunctionType(LLVMContext &Ctx) {
+ Type *VoidTy = Type::getVoidTy(Ctx);
+ PointerType *VoidPtrTy = Type::getInt8PtrTy(Ctx);
+ return FunctionType::get(VoidTy, VoidPtrTy, false);
+}
+
+Function *createDefaultCheckFunction(Module &M, bool UseX86FastCall) {
+ LLVMContext &Ctx = M.getContext();
+ const char *DefaultCheckFunctionName =
+ UseX86FastCall ? "_JustMyCode_Default" : "__JustMyCode_Default";
+ // Create the function.
+ Function *DefaultCheckFunc =
+ Function::Create(getCheckFunctionType(Ctx), GlobalValue::ExternalLinkage,
+ DefaultCheckFunctionName, &M);
+ DefaultCheckFunc->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+ DefaultCheckFunc->addParamAttr(0, Attribute::NoUndef);
+ if (UseX86FastCall)
+ DefaultCheckFunc->addParamAttr(0, Attribute::InReg);
+
+ BasicBlock *EntryBB = BasicBlock::Create(Ctx, "", DefaultCheckFunc);
+ ReturnInst::Create(Ctx, EntryBB);
+ return DefaultCheckFunc;
+}
+} // namespace
+
+bool JMCInstrumenter::runOnModule(Module &M) {
+ bool Changed = false;
+ LLVMContext &Ctx = M.getContext();
+ Triple ModuleTriple(M.getTargetTriple());
+ bool IsMSVC = ModuleTriple.isKnownWindowsMSVCEnvironment();
+ bool IsELF = ModuleTriple.isOSBinFormatELF();
+ assert((IsELF || IsMSVC) && "Unsupported triple for JMC");
+ bool UseX86FastCall = IsMSVC && ModuleTriple.getArch() == Triple::x86;
+ const char *const FlagSymbolSection = IsELF ? ".just.my.code" : ".msvcjmc";
+
+ GlobalValue *CheckFunction = nullptr;
+ DenseMap<DISubprogram *, Constant *> SavedFlags(8);
+ for (auto &F : M) {
+ if (F.isDeclaration())
+ continue;
+ auto *SP = F.getSubprogram();
+ if (!SP)
+ continue;
+
+ Constant *&Flag = SavedFlags[SP];
+ if (!Flag) {
+ std::string FlagName = getFlagName(*SP, UseX86FastCall);
+ IntegerType *FlagTy = Type::getInt8Ty(Ctx);
+ Flag = M.getOrInsertGlobal(FlagName, FlagTy, [&] {
+ // FIXME: Put the GV in comdat and have linkonce_odr linkage to save
+ // .msvcjmc section space? maybe not worth it.
+ GlobalVariable *GV = new GlobalVariable(
+ M, FlagTy, /*isConstant=*/false, GlobalValue::InternalLinkage,
+ ConstantInt::get(FlagTy, 1), FlagName);
+ GV->setSection(FlagSymbolSection);
+ GV->setAlignment(Align(1));
+ GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+ attachDebugInfo(*GV, *SP);
+ return GV;
+ });
+ }
+
+ if (!CheckFunction) {
+ Function *DefaultCheckFunc =
+ createDefaultCheckFunction(M, UseX86FastCall);
+ if (IsELF) {
+ DefaultCheckFunc->setName(CheckFunctionName);
+ DefaultCheckFunc->setLinkage(GlobalValue::WeakAnyLinkage);
+ CheckFunction = DefaultCheckFunc;
+ } else {
+ assert(!M.getFunction(CheckFunctionName) &&
+ "JMC instrument more than once?");
+ auto *CheckFunc = cast<Function>(
+ M.getOrInsertFunction(CheckFunctionName, getCheckFunctionType(Ctx))
+ .getCallee());
+ CheckFunc->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+ CheckFunc->addParamAttr(0, Attribute::NoUndef);
+ if (UseX86FastCall) {
+ CheckFunc->setCallingConv(CallingConv::X86_FastCall);
+ CheckFunc->addParamAttr(0, Attribute::InReg);
+ }
+ CheckFunction = CheckFunc;
+
+ StringRef DefaultCheckFunctionName = DefaultCheckFunc->getName();
+ appendToUsed(M, {DefaultCheckFunc});
+ Comdat *C = M.getOrInsertComdat(DefaultCheckFunctionName);
+ C->setSelectionKind(Comdat::Any);
+ DefaultCheckFunc->setComdat(C);
+ // Add a linker option /alternatename to set the default implementation
+ // for the check function.
+ // https://devblogs.microsoft.com/oldnewthing/20200731-00/?p=104024
+ std::string AltOption = std::string("/alternatename:") +
+ CheckFunctionName + "=" +
+ DefaultCheckFunctionName.str();
+ llvm::Metadata *Ops[] = {llvm::MDString::get(Ctx, AltOption)};
+ MDTuple *N = MDNode::get(Ctx, Ops);
+ M.getOrInsertNamedMetadata("llvm.linker.options")->addOperand(N);
+ }
+ }
+ // FIXME: it would be nice to make CI scheduling boundary, although in
+ // practice it does not matter much.
+ auto *CI = CallInst::Create(getCheckFunctionType(Ctx), CheckFunction,
+ {Flag}, "", &*F.begin()->getFirstInsertionPt());
+ CI->addParamAttr(0, Attribute::NoUndef);
+ if (UseX86FastCall) {
+ CI->setCallingConv(CallingConv::X86_FastCall);
+ CI->addParamAttr(0, Attribute::InReg);
+ }
+
+ Changed = true;
+ }
+ return Changed;
+}
diff --git a/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/llvm/lib/CodeGen/LLVMTargetMachine.cpp
index 0d3685d4141c..3192dcadb5f5 100644
--- a/llvm/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/llvm/lib/CodeGen/LLVMTargetMachine.cpp
@@ -23,20 +23,19 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
-static cl::opt<bool> EnableTrapUnreachable("trap-unreachable",
- cl::Hidden, cl::ZeroOrMore, cl::init(false),
- cl::desc("Enable generating trap for unreachable"));
+static cl::opt<bool>
+ EnableTrapUnreachable("trap-unreachable", cl::Hidden,
+ cl::desc("Enable generating trap for unreachable"));
void LLVMTargetMachine::initAsmInfo() {
MRI.reset(TheTarget.createMCRegInfo(getTargetTriple().str()));
@@ -99,7 +98,7 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T,
}
TargetTransformInfo
-LLVMTargetMachine::getTargetTransformInfo(const Function &F) {
+LLVMTargetMachine::getTargetTransformInfo(const Function &F) const {
return TargetTransformInfo(BasicTTIImpl(this, F));
}
@@ -164,22 +163,35 @@ Expected<std::unique_ptr<MCStreamer>> LLVMTargetMachine::createMCStreamer(
// Create a code emitter if asked to show the encoding.
std::unique_ptr<MCCodeEmitter> MCE;
if (Options.MCOptions.ShowMCEncoding)
- MCE.reset(getTarget().createMCCodeEmitter(MII, MRI, Context));
+ MCE.reset(getTarget().createMCCodeEmitter(MII, Context));
+
+ bool UseDwarfDirectory = false;
+ switch (Options.MCOptions.MCUseDwarfDirectory) {
+ case MCTargetOptions::DisableDwarfDirectory:
+ UseDwarfDirectory = false;
+ break;
+ case MCTargetOptions::EnableDwarfDirectory:
+ UseDwarfDirectory = true;
+ break;
+ case MCTargetOptions::DefaultDwarfDirectory:
+ UseDwarfDirectory = MAI.enableDwarfFileDirectoryDefault();
+ break;
+ }
std::unique_ptr<MCAsmBackend> MAB(
getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions));
auto FOut = std::make_unique<formatted_raw_ostream>(Out);
MCStreamer *S = getTarget().createAsmStreamer(
Context, std::move(FOut), Options.MCOptions.AsmVerbose,
- Options.MCOptions.MCUseDwarfDirectory, InstPrinter, std::move(MCE),
- std::move(MAB), Options.MCOptions.ShowMCInst);
+ UseDwarfDirectory, InstPrinter, std::move(MCE), std::move(MAB),
+ Options.MCOptions.ShowMCInst);
AsmStreamer.reset(S);
break;
}
case CGFT_ObjectFile: {
// Create the code emitter for the target if it exists. If not, .o file
// emission fails.
- MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, Context);
+ MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, Context);
if (!MCE)
return make_error<StringError>("createMCCodeEmitter failed",
inconvertibleErrorCode());
@@ -252,6 +264,9 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
"Cannot emit MC with limited codegen pipeline");
Ctx = &MMIWP->getMMI().getContext();
+ // libunwind is unable to load compact unwind dynamically, so we must generate
+ // DWARF unwind info for the JIT.
+ Options.MCOptions.EmitDwarfUnwind = EmitDwarfUnwindType::Always;
if (Options.MCOptions.MCSaveTempLabels)
Ctx->setAllowTemporaryLabels(false);
@@ -259,8 +274,7 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
// emission fails.
const MCSubtargetInfo &STI = *getMCSubtargetInfo();
const MCRegisterInfo &MRI = *getMCRegisterInfo();
- MCCodeEmitter *MCE =
- getTarget().createMCCodeEmitter(*getMCInstrInfo(), MRI, *Ctx);
+ MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getMCInstrInfo(), *Ctx);
MCAsmBackend *MAB =
getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions);
if (!MCE || !MAB)
diff --git a/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp b/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
index 63a0d0c1c43e..39b44b917d9e 100644
--- a/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
+++ b/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
@@ -14,6 +14,7 @@
///===---------------------------------------------------------------------===//
#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/InitializePasses.h"
using namespace llvm;
@@ -87,7 +88,7 @@ LazyMachineBlockFrequencyInfoPass::calculateIfNotAvailable() const {
OwnedMBFI = std::make_unique<MachineBlockFrequencyInfo>();
OwnedMBFI->calculate(*MF, MBPI, *MLI);
- return *OwnedMBFI.get();
+ return *OwnedMBFI;
}
bool LazyMachineBlockFrequencyInfoPass::runOnMachineFunction(
diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
index 0eb6100230bd..30ca8bd871e8 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
@@ -84,21 +84,18 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/IteratedDominanceFrontier.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/LexicalScopes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -106,27 +103,23 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
-#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Module.h"
-#include "llvm/InitializePasses.h"
#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/GenericIteratedDominanceFrontier.h"
#include "llvm/Support/TypeSize.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/SSAUpdaterImpl.h"
#include <algorithm>
#include <cassert>
+#include <climits>
#include <cstdint>
#include <functional>
-#include <limits.h>
-#include <limits>
#include <queue>
#include <tuple>
#include <utility>
@@ -148,6 +141,20 @@ static cl::opt<bool> EmulateOldLDV("emulate-old-livedebugvalues", cl::Hidden,
cl::desc("Act like old LiveDebugValues did"),
cl::init(false));
+// Limit for the maximum number of stack slots we should track, past which we
+// will ignore any spills. InstrRefBasedLDV gathers detailed information on all
+// stack slots which leads to high memory consumption, and in some scenarios
+// (such as asan with very many locals) the working set of the function can be
+// very large, causing many spills. In these scenarios, it is very unlikely that
+// the developer has hundreds of variables live at the same time that they're
+// carefully thinking about -- instead, they probably autogenerated the code.
+// When this happens, gracefully stop tracking excess spill slots, rather than
+// consuming all the developer's memory.
+static cl::opt<unsigned>
+ StackWorkingSetLimit("livedebugvalues-max-stack-slots", cl::Hidden,
+ cl::desc("livedebugvalues-stack-ws-limit"),
+ cl::init(250));
+
/// Tracker for converting machine value locations and variable values into
/// variable locations (the output of LiveDebugValues), recorded as DBG_VALUEs
/// specifying block live-in locations and transfers within blocks.
@@ -252,7 +259,7 @@ public:
/// object fields to track variable locations as we step through the block.
/// FIXME: could just examine mloctracker instead of passing in \p mlocs?
void
- loadInlocs(MachineBasicBlock &MBB, ValueIDNum *MLocs,
+ loadInlocs(MachineBasicBlock &MBB, ValueTable &MLocs,
const SmallVectorImpl<std::pair<DebugVariable, DbgValue>> &VLocs,
unsigned NumLocs) {
ActiveMLocs.clear();
@@ -715,6 +722,20 @@ MLocTracker::MLocTracker(MachineFunction &MF, const TargetInstrInfo &TII,
StackSlotIdxes.insert({{Size, Offs}, Idx});
}
+ // There may also be strange register class sizes (think x86 fp80s).
+ for (const TargetRegisterClass *RC : TRI.regclasses()) {
+ unsigned Size = TRI.getRegSizeInBits(*RC);
+
+ // We might see special reserved values as sizes, and classes for other
+ // stuff the machine tries to model. If it's more than 512 bits, then it
+ // is very unlikely to be a register than can be spilt.
+ if (Size > 512)
+ continue;
+
+ unsigned Idx = StackSlotIdxes.size();
+ StackSlotIdxes.insert({{Size, 0}, Idx});
+ }
+
for (auto &Idx : StackSlotIdxes)
StackIdxesToPos[Idx.second] = Idx.first;
@@ -757,9 +778,15 @@ void MLocTracker::writeRegMask(const MachineOperand *MO, unsigned CurBB,
Masks.push_back(std::make_pair(MO, InstID));
}
-SpillLocationNo MLocTracker::getOrTrackSpillLoc(SpillLoc L) {
+Optional<SpillLocationNo> MLocTracker::getOrTrackSpillLoc(SpillLoc L) {
SpillLocationNo SpillID(SpillLocs.idFor(L));
+
if (SpillID.id() == 0) {
+ // If there is no location, and we have reached the limit of how many stack
+ // slots to track, then don't track this one.
+ if (SpillLocs.size() >= StackWorkingSetLimit)
+ return None;
+
// Spill location is untracked: create record for this one, and all
// subregister slots too.
SpillID = SpillLocationNo(SpillLocs.insert(L));
@@ -843,19 +870,72 @@ MachineInstrBuilder MLocTracker::emitLoc(Optional<LocIdx> MLoc,
// the variable is.
if (Offset == 0) {
const SpillLoc &Spill = SpillLocs[SpillID.id()];
- Expr = TRI.prependOffsetExpression(Expr, DIExpression::ApplyOffset,
- Spill.SpillOffset);
unsigned Base = Spill.SpillBase;
MIB.addReg(Base);
- MIB.addImm(0);
- // Being on the stack makes this location indirect; if it was _already_
- // indirect though, we need to add extra indirection. See this test for
- // a scenario where this happens:
- // llvm/test/DebugInfo/X86/spill-nontrivial-param.ll
+ // There are several ways we can dereference things, and several inputs
+ // to consider:
+ // * NRVO variables will appear with IsIndirect set, but should have
+ // nothing else in their DIExpressions,
+ // * Variables with DW_OP_stack_value in their expr already need an
+ // explicit dereference of the stack location,
+ // * Values that don't match the variable size need DW_OP_deref_size,
+ // * Everything else can just become a simple location expression.
+
+ // We need to use deref_size whenever there's a mismatch between the
+ // size of value and the size of variable portion being read.
+ // Additionally, we should use it whenever dealing with stack_value
+ // fragments, to avoid the consumer having to determine the deref size
+ // from DW_OP_piece.
+ bool UseDerefSize = false;
+ unsigned ValueSizeInBits = getLocSizeInBits(*MLoc);
+ unsigned DerefSizeInBytes = ValueSizeInBits / 8;
+ if (auto Fragment = Var.getFragment()) {
+ unsigned VariableSizeInBits = Fragment->SizeInBits;
+ if (VariableSizeInBits != ValueSizeInBits || Expr->isComplex())
+ UseDerefSize = true;
+ } else if (auto Size = Var.getVariable()->getSizeInBits()) {
+ if (*Size != ValueSizeInBits) {
+ UseDerefSize = true;
+ }
+ }
+
if (Properties.Indirect) {
- std::vector<uint64_t> Elts = {dwarf::DW_OP_deref};
- Expr = DIExpression::append(Expr, Elts);
+ // This is something like an NRVO variable, where the pointer has been
+ // spilt to the stack, or a dbg.addr pointing at a coroutine frame
+ // field. It should end up being a memory location, with the pointer
+ // to the variable loaded off the stack with a deref. It can't be a
+ // DW_OP_stack_value expression.
+ assert(!Expr->isImplicit());
+ Expr = TRI.prependOffsetExpression(
+ Expr, DIExpression::ApplyOffset | DIExpression::DerefAfter,
+ Spill.SpillOffset);
+ MIB.addImm(0);
+ } else if (UseDerefSize) {
+ // We're loading a value off the stack that's not the same size as the
+ // variable. Add / subtract stack offset, explicitly deref with a size,
+ // and add DW_OP_stack_value if not already present.
+ SmallVector<uint64_t, 2> Ops = {dwarf::DW_OP_deref_size,
+ DerefSizeInBytes};
+ Expr = DIExpression::prependOpcodes(Expr, Ops, true);
+ unsigned Flags = DIExpression::StackValue | DIExpression::ApplyOffset;
+ Expr = TRI.prependOffsetExpression(Expr, Flags, Spill.SpillOffset);
+ MIB.addReg(0);
+ } else if (Expr->isComplex()) {
+ // A variable with no size ambiguity, but with extra elements in it's
+ // expression. Manually dereference the stack location.
+ assert(Expr->isComplex());
+ Expr = TRI.prependOffsetExpression(
+ Expr, DIExpression::ApplyOffset | DIExpression::DerefAfter,
+ Spill.SpillOffset);
+ MIB.addReg(0);
+ } else {
+ // A plain value that has been spilt to the stack, with no further
+ // context. Request a location expression, marking the DBG_VALUE as
+ // IsIndirect.
+ Expr = TRI.prependOffsetExpression(Expr, DIExpression::ApplyOffset,
+ Spill.SpillOffset);
+ MIB.addImm(0);
}
} else {
// This is a stack location with a weird subregister offset: emit an undef
@@ -879,7 +959,7 @@ MachineInstrBuilder MLocTracker::emitLoc(Optional<LocIdx> MLoc,
}
/// Default construct and initialize the pass.
-InstrRefBasedLDV::InstrRefBasedLDV() {}
+InstrRefBasedLDV::InstrRefBasedLDV() = default;
bool InstrRefBasedLDV::isCalleeSaved(LocIdx L) const {
unsigned Reg = MTracker->LocIdxToLocID[L];
@@ -898,7 +978,7 @@ bool InstrRefBasedLDV::isCalleeSaved(LocIdx L) const {
// void InstrRefBasedLDV::printVarLocInMBB(..)
#endif
-SpillLocationNo
+Optional<SpillLocationNo>
InstrRefBasedLDV::extractSpillBaseRegAndOffset(const MachineInstr &MI) {
assert(MI.hasOneMemOperand() &&
"Spill instruction does not have exactly one memory operand?");
@@ -913,8 +993,11 @@ InstrRefBasedLDV::extractSpillBaseRegAndOffset(const MachineInstr &MI) {
return MTracker->getOrTrackSpillLoc({Reg, Offset});
}
-Optional<LocIdx> InstrRefBasedLDV::findLocationForMemOperand(const MachineInstr &MI) {
- SpillLocationNo SpillLoc = extractSpillBaseRegAndOffset(MI);
+Optional<LocIdx>
+InstrRefBasedLDV::findLocationForMemOperand(const MachineInstr &MI) {
+ Optional<SpillLocationNo> SpillLoc = extractSpillBaseRegAndOffset(MI);
+ if (!SpillLoc)
+ return None;
// Where in the stack slot is this value defined -- i.e., what size of value
// is this? An important question, because it could be loaded into a register
@@ -930,7 +1013,7 @@ Optional<LocIdx> InstrRefBasedLDV::findLocationForMemOperand(const MachineInstr
// occur, but the safe action is to indicate the variable is optimised out.
return None;
- unsigned SpillID = MTracker->getSpillIDWithIdx(SpillLoc, IdxIt->second);
+ unsigned SpillID = MTracker->getSpillIDWithIdx(*SpillLoc, IdxIt->second);
return MTracker->getSpillMLoc(SpillID);
}
@@ -999,14 +1082,14 @@ bool InstrRefBasedLDV::transferDebugValue(const MachineInstr &MI) {
}
bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI,
- ValueIDNum **MLiveOuts,
- ValueIDNum **MLiveIns) {
+ const ValueTable *MLiveOuts,
+ const ValueTable *MLiveIns) {
if (!MI.isDebugRef())
return false;
// Only handle this instruction when we are building the variable value
// transfer function.
- if (!VTracker)
+ if (!VTracker && !TTracker)
return false;
unsigned InstNo = MI.getOperand(0).getImm();
@@ -1068,15 +1151,25 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI,
if (L)
NewID = ValueIDNum(BlockNo, InstrIt->second.second, *L);
} else if (OpNo != MachineFunction::DebugOperandMemNumber) {
- assert(OpNo < TargetInstr.getNumOperands());
- const MachineOperand &MO = TargetInstr.getOperand(OpNo);
-
- // Today, this can only be a register.
- assert(MO.isReg() && MO.isDef());
+ // Permit the debug-info to be completely wrong: identifying a nonexistant
+ // operand, or one that is not a register definition, means something
+ // unexpected happened during optimisation. Broken debug-info, however,
+ // shouldn't crash the compiler -- instead leave the variable value as
+ // None, which will make it appear "optimised out".
+ if (OpNo < TargetInstr.getNumOperands()) {
+ const MachineOperand &MO = TargetInstr.getOperand(OpNo);
+
+ if (MO.isReg() && MO.isDef() && MO.getReg()) {
+ unsigned LocID = MTracker->getLocID(MO.getReg());
+ LocIdx L = MTracker->LocIDToLocIdx[LocID];
+ NewID = ValueIDNum(BlockNo, InstrIt->second.second, L);
+ }
+ }
- unsigned LocID = MTracker->getLocID(MO.getReg());
- LocIdx L = MTracker->LocIDToLocIdx[LocID];
- NewID = ValueIDNum(BlockNo, InstrIt->second.second, L);
+ if (!NewID) {
+ LLVM_DEBUG(
+ { dbgs() << "Seen instruction reference to illegal operand\n"; });
+ }
}
// else: NewID is left as None.
} else if (PHIIt != DebugPHINumToValue.end() && PHIIt->InstrNum == InstNo) {
@@ -1162,7 +1255,8 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI,
// for DBG_INSTR_REFs as DBG_VALUEs (just, the former can refer to values that
// aren't immediately available).
DbgValueProperties Properties(Expr, false);
- VTracker->defVar(MI, Properties, NewID);
+ if (VTracker)
+ VTracker->defVar(MI, Properties, NewID);
// If we're on the final pass through the function, decompose this INSTR_REF
// into a plain DBG_VALUE.
@@ -1225,7 +1319,16 @@ bool InstrRefBasedLDV::transferDebugPHI(MachineInstr &MI) {
const MachineOperand &MO = MI.getOperand(0);
unsigned InstrNum = MI.getOperand(1).getImm();
- if (MO.isReg()) {
+ auto EmitBadPHI = [this, &MI, InstrNum](void) -> bool {
+ // Helper lambda to do any accounting when we fail to find a location for
+ // a DBG_PHI. This can happen if DBG_PHIs are malformed, or refer to a
+ // dead stack slot, for example.
+ // Record a DebugPHIRecord with an empty value + location.
+ DebugPHINumToValue.push_back({InstrNum, MI.getParent(), None, None});
+ return true;
+ };
+
+ if (MO.isReg() && MO.getReg()) {
// The value is whatever's currently in the register. Read and record it,
// to be analysed later.
Register Reg = MO.getReg();
@@ -1237,57 +1340,45 @@ bool InstrRefBasedLDV::transferDebugPHI(MachineInstr &MI) {
// Ensure this register is tracked.
for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI)
MTracker->lookupOrTrackRegister(*RAI);
- } else {
+ } else if (MO.isFI()) {
// The value is whatever's in this stack slot.
- assert(MO.isFI());
unsigned FI = MO.getIndex();
// If the stack slot is dead, then this was optimized away.
// FIXME: stack slot colouring should account for slots that get merged.
if (MFI->isDeadObjectIndex(FI))
- return true;
+ return EmitBadPHI();
// Identify this spill slot, ensure it's tracked.
Register Base;
StackOffset Offs = TFI->getFrameIndexReference(*MI.getMF(), FI, Base);
SpillLoc SL = {Base, Offs};
- SpillLocationNo SpillNo = MTracker->getOrTrackSpillLoc(SL);
-
- // Problem: what value should we extract from the stack? LLVM does not
- // record what size the last store to the slot was, and it would become
- // sketchy after stack slot colouring anyway. Take a look at what values
- // are stored on the stack, and pick the largest one that wasn't def'd
- // by a spill (i.e., the value most likely to have been def'd in a register
- // and then spilt.
- std::array<unsigned, 4> CandidateSizes = {64, 32, 16, 8};
- Optional<ValueIDNum> Result = None;
- Optional<LocIdx> SpillLoc = None;
- for (unsigned CS : CandidateSizes) {
- unsigned SpillID = MTracker->getLocID(SpillNo, {CS, 0});
- SpillLoc = MTracker->getSpillMLoc(SpillID);
- ValueIDNum Val = MTracker->readMLoc(*SpillLoc);
- // If this value was defined in it's own position, then it was probably
- // an aliasing index of a small value that was spilt.
- if (Val.getLoc() != SpillLoc->asU64()) {
- Result = Val;
- break;
- }
- }
+ Optional<SpillLocationNo> SpillNo = MTracker->getOrTrackSpillLoc(SL);
- // If we didn't find anything, we're probably looking at a PHI, or a memory
- // store folded into an instruction. FIXME: Take a guess that's it's 64
- // bits. This isn't ideal, but tracking the size that the spill is
- // "supposed" to be is more complex, and benefits a small number of
- // locations.
- if (!Result) {
- unsigned SpillID = MTracker->getLocID(SpillNo, {64, 0});
- SpillLoc = MTracker->getSpillMLoc(SpillID);
- Result = MTracker->readMLoc(*SpillLoc);
- }
+ // We might be able to find a value, but have chosen not to, to avoid
+ // tracking too much stack information.
+ if (!SpillNo)
+ return EmitBadPHI();
+
+ // Any stack location DBG_PHI should have an associate bit-size.
+ assert(MI.getNumOperands() == 3 && "Stack DBG_PHI with no size?");
+ unsigned slotBitSize = MI.getOperand(2).getImm();
+
+ unsigned SpillID = MTracker->getLocID(*SpillNo, {slotBitSize, 0});
+ LocIdx SpillLoc = MTracker->getSpillMLoc(SpillID);
+ ValueIDNum Result = MTracker->readMLoc(SpillLoc);
// Record this DBG_PHI for later analysis.
- auto DbgPHI = DebugPHIRecord({InstrNum, MI.getParent(), *Result, *SpillLoc});
+ auto DbgPHI = DebugPHIRecord({InstrNum, MI.getParent(), Result, SpillLoc});
DebugPHINumToValue.push_back(DbgPHI);
+ } else {
+ // Else: if the operand is neither a legal register or a stack slot, then
+ // we're being fed illegal debug-info. Record an empty PHI, so that any
+ // debug users trying to read this number will be put off trying to
+ // interpret the value.
+ LLVM_DEBUG(
+ { dbgs() << "Seen DBG_PHI with unrecognised operand format\n"; });
+ return EmitBadPHI();
}
return true;
@@ -1357,11 +1448,12 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) {
// If this instruction writes to a spill slot, def that slot.
if (hasFoldedStackStore(MI)) {
- SpillLocationNo SpillNo = extractSpillBaseRegAndOffset(MI);
- for (unsigned int I = 0; I < MTracker->NumSlotIdxes; ++I) {
- unsigned SpillID = MTracker->getSpillIDWithIdx(SpillNo, I);
- LocIdx L = MTracker->getSpillMLoc(SpillID);
- MTracker->setMLoc(L, ValueIDNum(CurBB, CurInst, L));
+ if (Optional<SpillLocationNo> SpillNo = extractSpillBaseRegAndOffset(MI)) {
+ for (unsigned int I = 0; I < MTracker->NumSlotIdxes; ++I) {
+ unsigned SpillID = MTracker->getSpillIDWithIdx(*SpillNo, I);
+ LocIdx L = MTracker->getSpillMLoc(SpillID);
+ MTracker->setMLoc(L, ValueIDNum(CurBB, CurInst, L));
+ }
}
}
@@ -1398,11 +1490,12 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) {
// Tell TTracker about any folded stack store.
if (hasFoldedStackStore(MI)) {
- SpillLocationNo SpillNo = extractSpillBaseRegAndOffset(MI);
- for (unsigned int I = 0; I < MTracker->NumSlotIdxes; ++I) {
- unsigned SpillID = MTracker->getSpillIDWithIdx(SpillNo, I);
- LocIdx L = MTracker->getSpillMLoc(SpillID);
- TTracker->clobberMloc(L, MI.getIterator(), true);
+ if (Optional<SpillLocationNo> SpillNo = extractSpillBaseRegAndOffset(MI)) {
+ for (unsigned int I = 0; I < MTracker->NumSlotIdxes; ++I) {
+ unsigned SpillID = MTracker->getSpillIDWithIdx(*SpillNo, I);
+ LocIdx L = MTracker->getSpillMLoc(SpillID);
+ TTracker->clobberMloc(L, MI.getIterator(), true);
+ }
}
}
}
@@ -1438,23 +1531,24 @@ void InstrRefBasedLDV::performCopy(Register SrcRegNum, Register DstRegNum) {
}
}
-bool InstrRefBasedLDV::isSpillInstruction(const MachineInstr &MI,
- MachineFunction *MF) {
+Optional<SpillLocationNo>
+InstrRefBasedLDV::isSpillInstruction(const MachineInstr &MI,
+ MachineFunction *MF) {
// TODO: Handle multiple stores folded into one.
if (!MI.hasOneMemOperand())
- return false;
+ return None;
// Reject any memory operand that's aliased -- we can't guarantee its value.
auto MMOI = MI.memoperands_begin();
const PseudoSourceValue *PVal = (*MMOI)->getPseudoValue();
if (PVal->isAliased(MFI))
- return false;
+ return None;
if (!MI.getSpillSize(TII) && !MI.getFoldedSpillSize(TII))
- return false; // This is not a spill instruction, since no valid size was
- // returned from either function.
+ return None; // This is not a spill instruction, since no valid size was
+ // returned from either function.
- return true;
+ return extractSpillBaseRegAndOffset(MI);
}
bool InstrRefBasedLDV::isLocationSpill(const MachineInstr &MI,
@@ -1511,13 +1605,11 @@ bool InstrRefBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI) {
// First, if there are any DBG_VALUEs pointing at a spill slot that is
// written to, terminate that variable location. The value in memory
// will have changed. DbgEntityHistoryCalculator doesn't try to detect this.
- if (isSpillInstruction(MI, MF)) {
- SpillLocationNo Loc = extractSpillBaseRegAndOffset(MI);
-
+ if (Optional<SpillLocationNo> Loc = isSpillInstruction(MI, MF)) {
// Un-set this location and clobber, so that earlier locations don't
// continue past this store.
for (unsigned SlotIdx = 0; SlotIdx < MTracker->NumSlotIdxes; ++SlotIdx) {
- unsigned SpillID = MTracker->getSpillIDWithIdx(Loc, SlotIdx);
+ unsigned SpillID = MTracker->getSpillIDWithIdx(*Loc, SlotIdx);
Optional<LocIdx> MLoc = MTracker->getSpillMLoc(SpillID);
if (!MLoc)
continue;
@@ -1535,7 +1627,9 @@ bool InstrRefBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI) {
// Try to recognise spill and restore instructions that may transfer a value.
if (isLocationSpill(MI, MF, Reg)) {
- SpillLocationNo Loc = extractSpillBaseRegAndOffset(MI);
+ // isLocationSpill returning true should guarantee we can extract a
+ // location.
+ SpillLocationNo Loc = *extractSpillBaseRegAndOffset(MI);
auto DoTransfer = [&](Register SrcReg, unsigned SpillID) {
auto ReadValue = MTracker->readReg(SrcReg);
@@ -1562,10 +1656,9 @@ bool InstrRefBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI) {
unsigned SpillID = MTracker->getLocID(Loc, {Size, 0});
DoTransfer(Reg, SpillID);
} else {
- Optional<SpillLocationNo> OptLoc = isRestoreInstruction(MI, MF, Reg);
- if (!OptLoc)
+ Optional<SpillLocationNo> Loc = isRestoreInstruction(MI, MF, Reg);
+ if (!Loc)
return false;
- SpillLocationNo Loc = *OptLoc;
// Assumption: we're reading from the base of the stack slot, not some
// offset into it. It seems very unlikely LLVM would ever generate
@@ -1583,22 +1676,17 @@ bool InstrRefBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI) {
LocIdx SrcIdx = MTracker->getSpillMLoc(SpillID);
auto ReadValue = MTracker->readMLoc(SrcIdx);
MTracker->setReg(DestReg, ReadValue);
-
- if (TTracker) {
- LocIdx DstLoc = MTracker->getRegMLoc(DestReg);
- TTracker->transferMlocs(SrcIdx, DstLoc, MI.getIterator());
- }
};
for (MCSubRegIterator SRI(Reg, TRI, false); SRI.isValid(); ++SRI) {
unsigned Subreg = TRI->getSubRegIndex(Reg, *SRI);
- unsigned SpillID = MTracker->getLocID(Loc, Subreg);
+ unsigned SpillID = MTracker->getLocID(*Loc, Subreg);
DoTransfer(*SRI, SpillID);
}
// Directly look up this registers slot idx by size, and transfer.
unsigned Size = TRI->getRegSizeInBits(Reg, *MRI);
- unsigned SpillID = MTracker->getLocID(Loc, {Size, 0});
+ unsigned SpillID = MTracker->getLocID(*Loc, {Size, 0});
DoTransfer(Reg, SpillID);
}
return true;
@@ -1724,8 +1812,8 @@ void InstrRefBasedLDV::accumulateFragmentMap(MachineInstr &MI) {
AllSeenFragments.insert(ThisFragment);
}
-void InstrRefBasedLDV::process(MachineInstr &MI, ValueIDNum **MLiveOuts,
- ValueIDNum **MLiveIns) {
+void InstrRefBasedLDV::process(MachineInstr &MI, const ValueTable *MLiveOuts,
+ const ValueTable *MLiveIns) {
// Try to interpret an MI as a debug or transfer instruction. Only if it's
// none of these should we interpret it's register defs as new value
// definitions.
@@ -1775,7 +1863,10 @@ void InstrRefBasedLDV::produceMLocTransferFunction(
// Step through each instruction in this block.
for (auto &MI : MBB) {
- process(MI);
+ // Pass in an empty unique_ptr for the value tables when accumulating the
+ // machine transfer function.
+ process(MI, nullptr, nullptr);
+
// Also accumulate fragment map.
if (MI.isDebugValue() || MI.isDebugRef())
accumulateFragmentMap(MI);
@@ -1864,7 +1955,7 @@ void InstrRefBasedLDV::produceMLocTransferFunction(
bool InstrRefBasedLDV::mlocJoin(
MachineBasicBlock &MBB, SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
- ValueIDNum **OutLocs, ValueIDNum *InLocs) {
+ FuncValueTable &OutLocs, ValueTable &InLocs) {
LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n");
bool Changed = false;
@@ -1965,7 +2056,7 @@ void InstrRefBasedLDV::findStackIndexInterference(
void InstrRefBasedLDV::placeMLocPHIs(
MachineFunction &MF, SmallPtrSetImpl<MachineBasicBlock *> &AllBlocks,
- ValueIDNum **MInLocs, SmallVectorImpl<MLocTransferMap> &MLocTransfer) {
+ FuncValueTable &MInLocs, SmallVectorImpl<MLocTransferMap> &MLocTransfer) {
SmallVector<unsigned, 4> StackUnits;
findStackIndexInterference(StackUnits);
@@ -2094,7 +2185,7 @@ void InstrRefBasedLDV::placeMLocPHIs(
}
void InstrRefBasedLDV::buildMLocValueMap(
- MachineFunction &MF, ValueIDNum **MInLocs, ValueIDNum **MOutLocs,
+ MachineFunction &MF, FuncValueTable &MInLocs, FuncValueTable &MOutLocs,
SmallVectorImpl<MLocTransferMap> &MLocTransfer) {
std::priority_queue<unsigned int, std::vector<unsigned int>,
std::greater<unsigned int>>
@@ -2236,7 +2327,7 @@ void InstrRefBasedLDV::BlockPHIPlacement(
Optional<ValueIDNum> InstrRefBasedLDV::pickVPHILoc(
const MachineBasicBlock &MBB, const DebugVariable &Var,
- const LiveIdxT &LiveOuts, ValueIDNum **MOutLocs,
+ const LiveIdxT &LiveOuts, FuncValueTable &MOutLocs,
const SmallVectorImpl<const MachineBasicBlock *> &BlockOrders) {
// Collect a set of locations from predecessor where its live-out value can
// be found.
@@ -2504,7 +2595,7 @@ void InstrRefBasedLDV::getBlocksForScope(
void InstrRefBasedLDV::buildVLocValueMap(
const DILocation *DILoc, const SmallSet<DebugVariable, 4> &VarsWeCareAbout,
SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks, LiveInsT &Output,
- ValueIDNum **MOutLocs, ValueIDNum **MInLocs,
+ FuncValueTable &MOutLocs, FuncValueTable &MInLocs,
SmallVectorImpl<VLocTracker> &AllTheVLocs) {
// This method is much like buildMLocValueMap: but focuses on a single
// LexicalScope at a time. Pick out a set of blocks and variables that are
@@ -2765,6 +2856,11 @@ void InstrRefBasedLDV::placePHIsForSingleVarDefinition(
auto ValueIt = VLocs.Vars.find(Var);
const DbgValue &Value = ValueIt->second;
+ // If it's an explicit assignment of "undef", that means there is no location
+ // anyway, anywhere.
+ if (Value.Kind == DbgValue::Undef)
+ return;
+
// Assign the variable value to entry to each dominated block that's in scope.
// Skip the definition block -- it's assigned the variable value in the middle
// of the block somewhere.
@@ -2790,35 +2886,6 @@ void InstrRefBasedLDV::dump_mloc_transfer(
}
#endif
-void InstrRefBasedLDV::emitLocations(
- MachineFunction &MF, LiveInsT SavedLiveIns, ValueIDNum **MOutLocs,
- ValueIDNum **MInLocs, DenseMap<DebugVariable, unsigned> &AllVarsNumbering,
- const TargetPassConfig &TPC) {
- TTracker = new TransferTracker(TII, MTracker, MF, *TRI, CalleeSavedRegs, TPC);
- unsigned NumLocs = MTracker->getNumLocs();
-
- // For each block, load in the machine value locations and variable value
- // live-ins, then step through each instruction in the block. New DBG_VALUEs
- // to be inserted will be created along the way.
- for (MachineBasicBlock &MBB : MF) {
- unsigned bbnum = MBB.getNumber();
- MTracker->reset();
- MTracker->loadFromArray(MInLocs[bbnum], bbnum);
- TTracker->loadInlocs(MBB, MInLocs[bbnum], SavedLiveIns[MBB.getNumber()],
- NumLocs);
-
- CurBB = bbnum;
- CurInst = 1;
- for (auto &MI : MBB) {
- process(MI, MOutLocs, MInLocs);
- TTracker->checkInstForNewValues(CurInst, MI.getIterator());
- ++CurInst;
- }
- }
-
- emitTransfers(AllVarsNumbering);
-}
-
void InstrRefBasedLDV::initialSetup(MachineFunction &MF) {
// Build some useful data structures.
@@ -2861,8 +2928,172 @@ void InstrRefBasedLDV::initialSetup(MachineFunction &MF) {
#endif
}
+// Produce an "ejection map" for blocks, i.e., what's the highest-numbered
+// lexical scope it's used in. When exploring in DFS order and we pass that
+// scope, the block can be processed and any tracking information freed.
+void InstrRefBasedLDV::makeDepthFirstEjectionMap(
+ SmallVectorImpl<unsigned> &EjectionMap,
+ const ScopeToDILocT &ScopeToDILocation,
+ ScopeToAssignBlocksT &ScopeToAssignBlocks) {
+ SmallPtrSet<const MachineBasicBlock *, 8> BlocksToExplore;
+ SmallVector<std::pair<LexicalScope *, ssize_t>, 4> WorkStack;
+ auto *TopScope = LS.getCurrentFunctionScope();
+
+ // Unlike lexical scope explorers, we explore in reverse order, to find the
+ // "last" lexical scope used for each block early.
+ WorkStack.push_back({TopScope, TopScope->getChildren().size() - 1});
+
+ while (!WorkStack.empty()) {
+ auto &ScopePosition = WorkStack.back();
+ LexicalScope *WS = ScopePosition.first;
+ ssize_t ChildNum = ScopePosition.second--;
+
+ const SmallVectorImpl<LexicalScope *> &Children = WS->getChildren();
+ if (ChildNum >= 0) {
+ // If ChildNum is positive, there are remaining children to explore.
+ // Push the child and its children-count onto the stack.
+ auto &ChildScope = Children[ChildNum];
+ WorkStack.push_back(
+ std::make_pair(ChildScope, ChildScope->getChildren().size() - 1));
+ } else {
+ WorkStack.pop_back();
+
+ // We've explored all children and any later blocks: examine all blocks
+ // in our scope. If they haven't yet had an ejection number set, then
+ // this scope will be the last to use that block.
+ auto DILocationIt = ScopeToDILocation.find(WS);
+ if (DILocationIt != ScopeToDILocation.end()) {
+ getBlocksForScope(DILocationIt->second, BlocksToExplore,
+ ScopeToAssignBlocks.find(WS)->second);
+ for (auto *MBB : BlocksToExplore) {
+ unsigned BBNum = MBB->getNumber();
+ if (EjectionMap[BBNum] == 0)
+ EjectionMap[BBNum] = WS->getDFSOut();
+ }
+
+ BlocksToExplore.clear();
+ }
+ }
+ }
+}
+
+bool InstrRefBasedLDV::depthFirstVLocAndEmit(
+ unsigned MaxNumBlocks, const ScopeToDILocT &ScopeToDILocation,
+ const ScopeToVarsT &ScopeToVars, ScopeToAssignBlocksT &ScopeToAssignBlocks,
+ LiveInsT &Output, FuncValueTable &MOutLocs, FuncValueTable &MInLocs,
+ SmallVectorImpl<VLocTracker> &AllTheVLocs, MachineFunction &MF,
+ DenseMap<DebugVariable, unsigned> &AllVarsNumbering,
+ const TargetPassConfig &TPC) {
+ TTracker = new TransferTracker(TII, MTracker, MF, *TRI, CalleeSavedRegs, TPC);
+ unsigned NumLocs = MTracker->getNumLocs();
+ VTracker = nullptr;
+
+ // No scopes? No variable locations.
+ if (!LS.getCurrentFunctionScope())
+ return false;
+
+ // Build map from block number to the last scope that uses the block.
+ SmallVector<unsigned, 16> EjectionMap;
+ EjectionMap.resize(MaxNumBlocks, 0);
+ makeDepthFirstEjectionMap(EjectionMap, ScopeToDILocation,
+ ScopeToAssignBlocks);
+
+ // Helper lambda for ejecting a block -- if nothing is going to use the block,
+ // we can translate the variable location information into DBG_VALUEs and then
+ // free all of InstrRefBasedLDV's data structures.
+ auto EjectBlock = [&](MachineBasicBlock &MBB) -> void {
+ unsigned BBNum = MBB.getNumber();
+ AllTheVLocs[BBNum].clear();
+
+ // Prime the transfer-tracker, and then step through all the block
+ // instructions, installing transfers.
+ MTracker->reset();
+ MTracker->loadFromArray(MInLocs[BBNum], BBNum);
+ TTracker->loadInlocs(MBB, MInLocs[BBNum], Output[BBNum], NumLocs);
+
+ CurBB = BBNum;
+ CurInst = 1;
+ for (auto &MI : MBB) {
+ process(MI, MOutLocs.get(), MInLocs.get());
+ TTracker->checkInstForNewValues(CurInst, MI.getIterator());
+ ++CurInst;
+ }
+
+ // Free machine-location tables for this block.
+ MInLocs[BBNum].reset();
+ MOutLocs[BBNum].reset();
+ // We don't need live-in variable values for this block either.
+ Output[BBNum].clear();
+ AllTheVLocs[BBNum].clear();
+ };
+
+ SmallPtrSet<const MachineBasicBlock *, 8> BlocksToExplore;
+ SmallVector<std::pair<LexicalScope *, ssize_t>, 4> WorkStack;
+ WorkStack.push_back({LS.getCurrentFunctionScope(), 0});
+ unsigned HighestDFSIn = 0;
+
+ // Proceed to explore in depth first order.
+ while (!WorkStack.empty()) {
+ auto &ScopePosition = WorkStack.back();
+ LexicalScope *WS = ScopePosition.first;
+ ssize_t ChildNum = ScopePosition.second++;
+
+ // We obesrve scopes with children twice here, once descending in, once
+ // ascending out of the scope nest. Use HighestDFSIn as a ratchet to ensure
+ // we don't process a scope twice. Additionally, ignore scopes that don't
+ // have a DILocation -- by proxy, this means we never tracked any variable
+ // assignments in that scope.
+ auto DILocIt = ScopeToDILocation.find(WS);
+ if (HighestDFSIn <= WS->getDFSIn() && DILocIt != ScopeToDILocation.end()) {
+ const DILocation *DILoc = DILocIt->second;
+ auto &VarsWeCareAbout = ScopeToVars.find(WS)->second;
+ auto &BlocksInScope = ScopeToAssignBlocks.find(WS)->second;
+
+ buildVLocValueMap(DILoc, VarsWeCareAbout, BlocksInScope, Output, MOutLocs,
+ MInLocs, AllTheVLocs);
+ }
+
+ HighestDFSIn = std::max(HighestDFSIn, WS->getDFSIn());
+
+ // Descend into any scope nests.
+ const SmallVectorImpl<LexicalScope *> &Children = WS->getChildren();
+ if (ChildNum < (ssize_t)Children.size()) {
+ // There are children to explore -- push onto stack and continue.
+ auto &ChildScope = Children[ChildNum];
+ WorkStack.push_back(std::make_pair(ChildScope, 0));
+ } else {
+ WorkStack.pop_back();
+
+ // We've explored a leaf, or have explored all the children of a scope.
+ // Try to eject any blocks where this is the last scope it's relevant to.
+ auto DILocationIt = ScopeToDILocation.find(WS);
+ if (DILocationIt == ScopeToDILocation.end())
+ continue;
+
+ getBlocksForScope(DILocationIt->second, BlocksToExplore,
+ ScopeToAssignBlocks.find(WS)->second);
+ for (auto *MBB : BlocksToExplore)
+ if (WS->getDFSOut() == EjectionMap[MBB->getNumber()])
+ EjectBlock(const_cast<MachineBasicBlock &>(*MBB));
+
+ BlocksToExplore.clear();
+ }
+ }
+
+ // Some artificial blocks may not have been ejected, meaning they're not
+ // connected to an actual legitimate scope. This can technically happen
+ // with things like the entry block. In theory, we shouldn't need to do
+ // anything for such out-of-scope blocks, but for the sake of being similar
+ // to VarLocBasedLDV, eject these too.
+ for (auto *MBB : ArtificialBlocks)
+ if (MOutLocs[MBB->getNumber()])
+ EjectBlock(*MBB);
+
+ return emitTransfers(AllVarsNumbering);
+}
+
bool InstrRefBasedLDV::emitTransfers(
- DenseMap<DebugVariable, unsigned> &AllVarsNumbering) {
+ DenseMap<DebugVariable, unsigned> &AllVarsNumbering) {
// Go through all the transfers recorded in the TransferTracker -- this is
// both the live-ins to a block, and any movements of values that happen
// in the middle.
@@ -2944,24 +3175,24 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
assert(MaxNumBlocks >= 0);
++MaxNumBlocks;
+ initialSetup(MF);
+
MLocTransfer.resize(MaxNumBlocks);
vlocs.resize(MaxNumBlocks, VLocTracker(OverlapFragments, EmptyExpr));
SavedLiveIns.resize(MaxNumBlocks);
- initialSetup(MF);
-
produceMLocTransferFunction(MF, MLocTransfer, MaxNumBlocks);
// Allocate and initialize two array-of-arrays for the live-in and live-out
// machine values. The outer dimension is the block number; while the inner
// dimension is a LocIdx from MLocTracker.
- ValueIDNum **MOutLocs = new ValueIDNum *[MaxNumBlocks];
- ValueIDNum **MInLocs = new ValueIDNum *[MaxNumBlocks];
+ FuncValueTable MOutLocs = std::make_unique<ValueTable[]>(MaxNumBlocks);
+ FuncValueTable MInLocs = std::make_unique<ValueTable[]>(MaxNumBlocks);
unsigned NumLocs = MTracker->getNumLocs();
for (int i = 0; i < MaxNumBlocks; ++i) {
// These all auto-initialize to ValueIDNum::EmptyValue
- MOutLocs[i] = new ValueIDNum[NumLocs];
- MInLocs[i] = new ValueIDNum[NumLocs];
+ MOutLocs[i] = std::make_unique<ValueIDNum[]>(NumLocs);
+ MInLocs[i] = std::make_unique<ValueIDNum[]>(NumLocs);
}
// Solve the machine value dataflow problem using the MLocTransfer function,
@@ -2974,7 +3205,10 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
// either live-through machine values, or PHIs.
for (auto &DBG_PHI : DebugPHINumToValue) {
// Identify unresolved block-live-ins.
- ValueIDNum &Num = DBG_PHI.ValueRead;
+ if (!DBG_PHI.ValueRead)
+ continue;
+
+ ValueIDNum &Num = *DBG_PHI.ValueRead;
if (!Num.isPHI())
continue;
@@ -2995,7 +3229,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
MTracker->loadFromArray(MInLocs[CurBB], CurBB);
CurInst = 1;
for (auto &MI : MBB) {
- process(MI, MOutLocs, MInLocs);
+ process(MI, MOutLocs.get(), MInLocs.get());
++CurInst;
}
MTracker->reset();
@@ -3051,32 +3285,13 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
<< VarAssignCount
<< " variable assignments, exceeding limits.\n");
} else {
- // Compute the extended ranges, iterating over scopes. There might be
- // something to be said for ordering them by size/locality, but that's for
- // the future. For each scope, solve the variable value problem, producing
- // a map of variables to values in SavedLiveIns.
- for (auto &P : ScopeToVars) {
- buildVLocValueMap(ScopeToDILocation[P.first], P.second,
- ScopeToAssignBlocks[P.first], SavedLiveIns, MOutLocs, MInLocs,
- vlocs);
- }
-
- // Using the computed value locations and variable values for each block,
- // create the DBG_VALUE instructions representing the extended variable
- // locations.
- emitLocations(MF, SavedLiveIns, MOutLocs, MInLocs, AllVarsNumbering, *TPC);
-
- // Did we actually make any changes? If we created any DBG_VALUEs, then yes.
- Changed = TTracker->Transfers.size() != 0;
- }
-
- // Common clean-up of memory.
- for (int Idx = 0; Idx < MaxNumBlocks; ++Idx) {
- delete[] MOutLocs[Idx];
- delete[] MInLocs[Idx];
+ // Optionally, solve the variable value problem and emit to blocks by using
+ // a lexical-scope-depth search. It should be functionally identical to
+ // the "else" block of this condition.
+ Changed = depthFirstVLocAndEmit(
+ MaxNumBlocks, ScopeToDILocation, ScopeToVars, ScopeToAssignBlocks,
+ SavedLiveIns, MOutLocs, MInLocs, vlocs, MF, AllVarsNumbering, *TPC);
}
- delete[] MOutLocs;
- delete[] MInLocs;
delete MTracker;
delete TTracker;
@@ -3092,6 +3307,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
DebugPHINumToValue.clear();
OverlapFragments.clear();
SeenFragments.clear();
+ SeenDbgPHIs.clear();
return Changed;
}
@@ -3193,9 +3409,10 @@ public:
/// Machine location where any PHI must occur.
LocIdx Loc;
/// Table of live-in machine value numbers for blocks / locations.
- ValueIDNum **MLiveIns;
+ const ValueTable *MLiveIns;
- LDVSSAUpdater(LocIdx L, ValueIDNum **MLiveIns) : Loc(L), MLiveIns(MLiveIns) {}
+ LDVSSAUpdater(LocIdx L, const ValueTable *MLiveIns)
+ : Loc(L), MLiveIns(MLiveIns) {}
void reset() {
for (auto &Block : BlockMap)
@@ -3352,11 +3569,28 @@ public:
} // end namespace llvm
-Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIs(MachineFunction &MF,
- ValueIDNum **MLiveOuts,
- ValueIDNum **MLiveIns,
- MachineInstr &Here,
- uint64_t InstrNum) {
+Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIs(
+ MachineFunction &MF, const ValueTable *MLiveOuts,
+ const ValueTable *MLiveIns, MachineInstr &Here, uint64_t InstrNum) {
+ assert(MLiveOuts && MLiveIns &&
+ "Tried to resolve DBG_PHI before location "
+ "tables allocated?");
+
+ // This function will be called twice per DBG_INSTR_REF, and might end up
+ // computing lots of SSA information: memoize it.
+ auto SeenDbgPHIIt = SeenDbgPHIs.find(&Here);
+ if (SeenDbgPHIIt != SeenDbgPHIs.end())
+ return SeenDbgPHIIt->second;
+
+ Optional<ValueIDNum> Result =
+ resolveDbgPHIsImpl(MF, MLiveOuts, MLiveIns, Here, InstrNum);
+ SeenDbgPHIs.insert({&Here, Result});
+ return Result;
+}
+
+Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIsImpl(
+ MachineFunction &MF, const ValueTable *MLiveOuts,
+ const ValueTable *MLiveIns, MachineInstr &Here, uint64_t InstrNum) {
// Pick out records of DBG_PHI instructions that have been observed. If there
// are none, then we cannot compute a value number.
auto RangePair = std::equal_range(DebugPHINumToValue.begin(),
@@ -3368,17 +3602,24 @@ Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIs(MachineFunction &MF,
if (LowerIt == UpperIt)
return None;
+ // If any DBG_PHIs referred to a location we didn't understand, don't try to
+ // compute a value. There might be scenarios where we could recover a value
+ // for some range of DBG_INSTR_REFs, but at this point we can have high
+ // confidence that we've seen a bug.
+ auto DBGPHIRange = make_range(LowerIt, UpperIt);
+ for (const DebugPHIRecord &DBG_PHI : DBGPHIRange)
+ if (!DBG_PHI.ValueRead)
+ return None;
+
// If there's only one DBG_PHI, then that is our value number.
if (std::distance(LowerIt, UpperIt) == 1)
- return LowerIt->ValueRead;
-
- auto DBGPHIRange = make_range(LowerIt, UpperIt);
+ return *LowerIt->ValueRead;
// Pick out the location (physreg, slot) where any PHIs must occur. It's
// technically possible for us to merge values in different registers in each
// block, but highly unlikely that LLVM will generate such code after register
// allocation.
- LocIdx Loc = LowerIt->ReadLoc;
+ LocIdx Loc = *LowerIt->ReadLoc;
// We have several DBG_PHIs, and a use position (the Here inst). All each
// DBG_PHI does is identify a value at a program position. We can treat each
@@ -3397,7 +3638,7 @@ Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIs(MachineFunction &MF,
// for the SSAUpdater.
for (const auto &DBG_PHI : DBGPHIRange) {
LDVSSABlock *Block = Updater.getSSALDVBlock(DBG_PHI.MBB);
- const ValueIDNum &Num = DBG_PHI.ValueRead;
+ const ValueIDNum &Num = *DBG_PHI.ValueRead;
AvailableValues.insert(std::make_pair(Block, Num.asU64()));
}
@@ -3431,7 +3672,7 @@ Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIs(MachineFunction &MF,
// Define all the input DBG_PHI values in ValidatedValues.
for (const auto &DBG_PHI : DBGPHIRange) {
LDVSSABlock *Block = Updater.getSSALDVBlock(DBG_PHI.MBB);
- const ValueIDNum &Num = DBG_PHI.ValueRead;
+ const ValueIDNum &Num = *DBG_PHI.ValueRead;
ValidatedValues.insert(std::make_pair(Block, Num));
}
@@ -3456,7 +3697,7 @@ Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIs(MachineFunction &MF,
return None;
ValueIDNum ValueToCheck;
- ValueIDNum *BlockLiveOuts = MLiveOuts[PHIIt.first->BB.getNumber()];
+ const ValueTable &BlockLiveOuts = MLiveOuts[PHIIt.first->BB.getNumber()];
auto VVal = ValidatedValues.find(PHIIt.first);
if (VVal == ValidatedValues.end()) {
diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
index e7383209c027..70aae47c8bdc 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
+++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
@@ -10,17 +10,14 @@
#define LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_INSTRREFBASEDLDV_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IndexedMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/UniqueVector.h"
#include "llvm/CodeGen/LexicalScopes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/TargetFrameLowering.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "LiveDebugValues.h"
@@ -171,6 +168,13 @@ public:
static ValueIDNum TombstoneValue;
};
+/// Type for a table of values in a block.
+using ValueTable = std::unique_ptr<ValueIDNum[]>;
+
+/// Type for a table-of-table-of-values, i.e., the collection of either
+/// live-in or live-out values for each block in the function.
+using FuncValueTable = std::unique_ptr<ValueTable[]>;
+
/// Thin wrapper around an integer -- designed to give more type safety to
/// spill location numbers.
class SpillLocationNo {
@@ -192,7 +196,7 @@ public:
};
/// Meta qualifiers for a value. Pair of whatever expression is used to qualify
-/// the the value, and Boolean of whether or not it's indirect.
+/// the value, and Boolean of whether or not it's indirect.
class DbgValueProperties {
public:
DbgValueProperties(const DIExpression *DIExpr, bool Indirect)
@@ -507,7 +511,7 @@ public:
/// Load values for each location from array of ValueIDNums. Take current
/// bbnum just in case we read a value from a hitherto untouched register.
- void loadFromArray(ValueIDNum *Locs, unsigned NewCurBB) {
+ void loadFromArray(ValueTable &Locs, unsigned NewCurBB) {
CurBB = NewCurBB;
// Iterate over all tracked locations, and load each locations live-in
// value into our local index.
@@ -616,7 +620,9 @@ public:
void writeRegMask(const MachineOperand *MO, unsigned CurBB, unsigned InstID);
/// Find LocIdx for SpillLoc \p L, creating a new one if it's not tracked.
- SpillLocationNo getOrTrackSpillLoc(SpillLoc L);
+ /// Returns None when in scenarios where a spill slot could be tracked, but
+ /// we would likely run into resource limitations.
+ Optional<SpillLocationNo> getOrTrackSpillLoc(SpillLoc L);
// Get LocIdx of a spill ID.
LocIdx getSpillMLoc(unsigned SpillID) {
@@ -627,6 +633,19 @@ public:
/// Return true if Idx is a spill machine location.
bool isSpill(LocIdx Idx) const { return LocIdxToLocID[Idx] >= NumRegs; }
+ /// How large is this location (aka, how wide is a value defined there?).
+ unsigned getLocSizeInBits(LocIdx L) const {
+ unsigned ID = LocIdxToLocID[L];
+ if (!isSpill(L)) {
+ return TRI.getRegSizeInBits(Register(ID), MF.getRegInfo());
+ } else {
+ // The slot location on the stack is uninteresting, we care about the
+ // position of the value within the slot (which comes with a size).
+ StackSlotPos Pos = locIDToSpillIdx(ID);
+ return Pos.first;
+ }
+ }
+
MLocIterator begin() { return MLocIterator(LocIdxToIDNum, 0); }
MLocIterator end() {
@@ -678,7 +697,7 @@ public:
/// movement of values between locations inside of a block is handled at a
/// much later stage, in the TransferTracker class.
MapVector<DebugVariable, DbgValue> Vars;
- DenseMap<DebugVariable, const DILocation *> Scopes;
+ SmallDenseMap<DebugVariable, const DILocation *, 8> Scopes;
MachineBasicBlock *MBB = nullptr;
const OverlapMap &OverlappingFragments;
DbgValueProperties EmptyProperties;
@@ -747,6 +766,11 @@ public:
Scopes[Overlapped] = Loc;
}
}
+
+ void clear() {
+ Vars.clear();
+ Scopes.clear();
+ }
};
// XXX XXX docs
@@ -844,10 +868,16 @@ private:
/// Record of where we observed a DBG_PHI instruction.
class DebugPHIRecord {
public:
- uint64_t InstrNum; ///< Instruction number of this DBG_PHI.
- MachineBasicBlock *MBB; ///< Block where DBG_PHI occurred.
- ValueIDNum ValueRead; ///< The value number read by the DBG_PHI.
- LocIdx ReadLoc; ///< Register/Stack location the DBG_PHI reads.
+ /// Instruction number of this DBG_PHI.
+ uint64_t InstrNum;
+ /// Block where DBG_PHI occurred.
+ MachineBasicBlock *MBB;
+ /// The value number read by the DBG_PHI -- or None if it didn't refer to
+ /// a value.
+ Optional<ValueIDNum> ValueRead;
+ /// Register/Stack location the DBG_PHI reads -- or None if it referred to
+ /// something unexpected.
+ Optional<LocIdx> ReadLoc;
operator unsigned() const { return InstrNum; }
};
@@ -862,6 +892,12 @@ private:
OverlapMap OverlapFragments;
VarToFragments SeenFragments;
+ /// Mapping of DBG_INSTR_REF instructions to their values, for those
+ /// DBG_INSTR_REFs that call resolveDbgPHIs. These variable references solve
+ /// a mini SSA problem caused by DBG_PHIs being cloned, this collection caches
+ /// the result.
+ DenseMap<MachineInstr *, Optional<ValueIDNum>> SeenDbgPHIs;
+
/// True if we need to examine call instructions for stack clobbers. We
/// normally assume that they don't clobber SP, but stack probes on Windows
/// do.
@@ -873,7 +909,8 @@ private:
StringRef StackProbeSymbolName;
/// Tests whether this instruction is a spill to a stack slot.
- bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF);
+ Optional<SpillLocationNo> isSpillInstruction(const MachineInstr &MI,
+ MachineFunction *MF);
/// Decide if @MI is a spill instruction and return true if it is. We use 2
/// criteria to make this decision:
@@ -891,11 +928,12 @@ private:
/// Given a spill instruction, extract the spill slot information, ensure it's
/// tracked, and return the spill number.
- SpillLocationNo extractSpillBaseRegAndOffset(const MachineInstr &MI);
+ Optional<SpillLocationNo>
+ extractSpillBaseRegAndOffset(const MachineInstr &MI);
/// Observe a single instruction while stepping through a block.
- void process(MachineInstr &MI, ValueIDNum **MLiveOuts = nullptr,
- ValueIDNum **MLiveIns = nullptr);
+ void process(MachineInstr &MI, const ValueTable *MLiveOuts,
+ const ValueTable *MLiveIns);
/// Examines whether \p MI is a DBG_VALUE and notifies trackers.
/// \returns true if MI was recognized and processed.
@@ -903,8 +941,8 @@ private:
/// Examines whether \p MI is a DBG_INSTR_REF and notifies trackers.
/// \returns true if MI was recognized and processed.
- bool transferDebugInstrRef(MachineInstr &MI, ValueIDNum **MLiveOuts,
- ValueIDNum **MLiveIns);
+ bool transferDebugInstrRef(MachineInstr &MI, const ValueTable *MLiveOuts,
+ const ValueTable *MLiveIns);
/// Stores value-information about where this PHI occurred, and what
/// instruction number is associated with it.
@@ -936,9 +974,15 @@ private:
/// \p InstrNum Debug instruction number defined by DBG_PHI instructions.
/// \returns The machine value number at position Here, or None.
Optional<ValueIDNum> resolveDbgPHIs(MachineFunction &MF,
- ValueIDNum **MLiveOuts,
- ValueIDNum **MLiveIns, MachineInstr &Here,
- uint64_t InstrNum);
+ const ValueTable *MLiveOuts,
+ const ValueTable *MLiveIns,
+ MachineInstr &Here, uint64_t InstrNum);
+
+ Optional<ValueIDNum> resolveDbgPHIsImpl(MachineFunction &MF,
+ const ValueTable *MLiveOuts,
+ const ValueTable *MLiveIns,
+ MachineInstr &Here,
+ uint64_t InstrNum);
/// Step through the function, recording register definitions and movements
/// in an MLocTracker. Convert the observations into a per-block transfer
@@ -954,8 +998,8 @@ private:
/// live-out arrays to the (initialized to zero) multidimensional arrays in
/// \p MInLocs and \p MOutLocs. The outer dimension is indexed by block
/// number, the inner by LocIdx.
- void buildMLocValueMap(MachineFunction &MF, ValueIDNum **MInLocs,
- ValueIDNum **MOutLocs,
+ void buildMLocValueMap(MachineFunction &MF, FuncValueTable &MInLocs,
+ FuncValueTable &MOutLocs,
SmallVectorImpl<MLocTransferMap> &MLocTransfer);
/// Examine the stack indexes (i.e. offsets within the stack) to find the
@@ -966,7 +1010,7 @@ private:
/// the IDF of each register.
void placeMLocPHIs(MachineFunction &MF,
SmallPtrSetImpl<MachineBasicBlock *> &AllBlocks,
- ValueIDNum **MInLocs,
+ FuncValueTable &MInLocs,
SmallVectorImpl<MLocTransferMap> &MLocTransfer);
/// Propagate variable values to blocks in the common case where there's
@@ -997,7 +1041,7 @@ private:
/// is true, revisiting this block is necessary.
bool mlocJoin(MachineBasicBlock &MBB,
SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
- ValueIDNum **OutLocs, ValueIDNum *InLocs);
+ FuncValueTable &OutLocs, ValueTable &InLocs);
/// Produce a set of blocks that are in the current lexical scope. This means
/// those blocks that contain instructions "in" the scope, blocks where
@@ -1025,11 +1069,11 @@ private:
/// scope, but which do contain DBG_VALUEs, which VarLocBasedImpl tracks
/// locations through.
void buildVLocValueMap(const DILocation *DILoc,
- const SmallSet<DebugVariable, 4> &VarsWeCareAbout,
- SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks,
- LiveInsT &Output, ValueIDNum **MOutLocs,
- ValueIDNum **MInLocs,
- SmallVectorImpl<VLocTracker> &AllTheVLocs);
+ const SmallSet<DebugVariable, 4> &VarsWeCareAbout,
+ SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks,
+ LiveInsT &Output, FuncValueTable &MOutLocs,
+ FuncValueTable &MInLocs,
+ SmallVectorImpl<VLocTracker> &AllTheVLocs);
/// Attempt to eliminate un-necessary PHIs on entry to a block. Examines the
/// live-in values coming from predecessors live-outs, and replaces any PHIs
@@ -1047,21 +1091,9 @@ private:
/// \returns Value ID of a machine PHI if an appropriate one is available.
Optional<ValueIDNum>
pickVPHILoc(const MachineBasicBlock &MBB, const DebugVariable &Var,
- const LiveIdxT &LiveOuts, ValueIDNum **MOutLocs,
+ const LiveIdxT &LiveOuts, FuncValueTable &MOutLocs,
const SmallVectorImpl<const MachineBasicBlock *> &BlockOrders);
- /// Given the solutions to the two dataflow problems, machine value locations
- /// in \p MInLocs and live-in variable values in \p SavedLiveIns, runs the
- /// TransferTracker class over the function to produce live-in and transfer
- /// DBG_VALUEs, then inserts them. Groups of DBG_VALUEs are inserted in the
- /// order given by AllVarsNumbering -- this could be any stable order, but
- /// right now "order of appearence in function, when explored in RPO", so
- /// that we can compare explictly against VarLocBasedImpl.
- void emitLocations(MachineFunction &MF, LiveInsT SavedLiveIns,
- ValueIDNum **MOutLocs, ValueIDNum **MInLocs,
- DenseMap<DebugVariable, unsigned> &AllVarsNumbering,
- const TargetPassConfig &TPC);
-
/// Take collections of DBG_VALUE instructions stored in TTracker, and
/// install them into their output blocks. Preserves a stable order of
/// DBG_VALUEs produced (which would otherwise cause nondeterminism) through
@@ -1072,6 +1104,28 @@ private:
/// RPOT block ordering.
void initialSetup(MachineFunction &MF);
+ /// Produce a map of the last lexical scope that uses a block, using the
+ /// scopes DFSOut number. Mapping is block-number to DFSOut.
+ /// \p EjectionMap Pre-allocated vector in which to install the built ma.
+ /// \p ScopeToDILocation Mapping of LexicalScopes to their DILocations.
+ /// \p AssignBlocks Map of blocks where assignments happen for a scope.
+ void makeDepthFirstEjectionMap(SmallVectorImpl<unsigned> &EjectionMap,
+ const ScopeToDILocT &ScopeToDILocation,
+ ScopeToAssignBlocksT &AssignBlocks);
+
+ /// When determining per-block variable values and emitting to DBG_VALUEs,
+ /// this function explores by lexical scope depth. Doing so means that per
+ /// block information can be fully computed before exploration finishes,
+ /// allowing us to emit it and free data structures earlier than otherwise.
+ /// It's also good for locality.
+ bool depthFirstVLocAndEmit(
+ unsigned MaxNumBlocks, const ScopeToDILocT &ScopeToDILocation,
+ const ScopeToVarsT &ScopeToVars, ScopeToAssignBlocksT &ScopeToBlocks,
+ LiveInsT &Output, FuncValueTable &MOutLocs, FuncValueTable &MInLocs,
+ SmallVectorImpl<VLocTracker> &AllTheVLocs, MachineFunction &MF,
+ DenseMap<DebugVariable, unsigned> &AllVarsNumbering,
+ const TargetPassConfig &TPC);
+
bool ExtendRanges(MachineFunction &MF, MachineDominatorTree *DomTree,
TargetPassConfig *TPC, unsigned InputBBLimit,
unsigned InputDbgValLimit) override;
diff --git a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
index 8f697611a82c..141008ac2296 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
@@ -8,14 +8,16 @@
#include "LiveDebugValues.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetMachine.h"
/// \file LiveDebugValues.cpp
///
@@ -65,7 +67,7 @@ public:
static char ID;
LiveDebugValues();
- ~LiveDebugValues() {}
+ ~LiveDebugValues() = default;
/// Calculate the liveness information for the given machine function.
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -123,6 +125,11 @@ bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) {
}
bool llvm::debuginfoShouldUseDebugInstrRef(const Triple &T) {
+ // Enable by default on x86_64, disable if explicitly turned off on cmdline.
+ if (T.getArch() == llvm::Triple::x86_64 &&
+ ValueTrackingVariableLocations != cl::boolOrDefault::BOU_FALSE)
+ return true;
+
// Enable if explicitly requested on command line.
return ValueTrackingVariableLocations == cl::boolOrDefault::BOU_TRUE;
}
diff --git a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h
index 8f0b2ec3e1fc..6cc1685c0022 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h
+++ b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h
@@ -9,12 +9,11 @@
#ifndef LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_LIVEDEBUGVALUES_H
#define LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_LIVEDEBUGVALUES_H
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/ADT/Triple.h"
-
namespace llvm {
+class MachineDominatorTree;
+class MachineFunction;
+class TargetPassConfig;
+class Triple;
// Inline namespace for types / symbols shared between different
// LiveDebugValues implementations.
@@ -28,7 +27,7 @@ public:
virtual bool ExtendRanges(MachineFunction &MF, MachineDominatorTree *DomTree,
TargetPassConfig *TPC, unsigned InputBBLimit,
unsigned InputDbgValLimit) = 0;
- virtual ~LDVImpl() {}
+ virtual ~LDVImpl() = default;
};
} // namespace SharedLiveDebugValues
diff --git a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
index 42a0967bce3f..24c00b8a10ec 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
@@ -118,18 +118,15 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/UniqueVector.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/LexicalScopes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -137,16 +134,11 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
-#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Module.h"
-#include "llvm/InitializePasses.h"
#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/TypeSize.h"
#include "llvm/Support/raw_ostream.h"
@@ -922,14 +914,14 @@ private:
std::unique_ptr<VarLocSet> &VLS = Locs[MBB];
if (!VLS)
VLS = std::make_unique<VarLocSet>(Alloc);
- return *VLS.get();
+ return *VLS;
}
const VarLocSet &getVarLocsInMBB(const MachineBasicBlock *MBB,
const VarLocInMBB &Locs) const {
auto It = Locs.find(MBB);
assert(It != Locs.end() && "MBB not in map");
- return *It->second.get();
+ return *It->second;
}
/// Tests whether this instruction is a spill to a stack location.
@@ -1035,9 +1027,9 @@ public:
// Implementation
//===----------------------------------------------------------------------===//
-VarLocBasedLDV::VarLocBasedLDV() { }
+VarLocBasedLDV::VarLocBasedLDV() = default;
-VarLocBasedLDV::~VarLocBasedLDV() { }
+VarLocBasedLDV::~VarLocBasedLDV() = default;
/// Erase a variable from the set of open ranges, and additionally erase any
/// fragments that may overlap it. If the VarLoc is a backup location, erase
@@ -1948,7 +1940,7 @@ bool VarLocBasedLDV::join(
// Just copy over the Out locs to incoming locs for the first visited
// predecessor, and for all other predecessors join the Out locs.
- VarLocSet &OutLocVLS = *OL->second.get();
+ VarLocSet &OutLocVLS = *OL->second;
if (!NumVisited)
InLocsT = OutLocVLS;
else
@@ -2007,7 +1999,7 @@ void VarLocBasedLDV::flushPendingLocs(VarLocInMBB &PendingInLocs,
for (auto &Iter : PendingInLocs) {
// Map is keyed on a constant pointer, unwrap it so we can insert insts.
auto &MBB = const_cast<MachineBasicBlock &>(*Iter.first);
- VarLocSet &Pending = *Iter.second.get();
+ VarLocSet &Pending = *Iter.second;
SmallVector<VarLoc, 32> VarLocs;
collectAllVarLocs(VarLocs, Pending, VarLocIDs);
diff --git a/llvm/lib/CodeGen/LiveDebugVariables.cpp b/llvm/lib/CodeGen/LiveDebugVariables.cpp
index 6d806135240e..35cf25330186 100644
--- a/llvm/lib/CodeGen/LiveDebugVariables.cpp
+++ b/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -28,6 +28,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/LexicalScopes.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervals.h"
@@ -38,11 +39,9 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
@@ -50,15 +49,12 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Metadata.h"
#include "llvm/InitializePasses.h"
-#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
#include <algorithm>
#include <cassert>
#include <iterator>
@@ -976,7 +972,7 @@ void UserValue::extendDef(
if (Segment->end < Stop) {
Stop = Segment->end;
Kills = {Stop, {LII.first}};
- } else if (Segment->end == Stop && Kills.hasValue()) {
+ } else if (Segment->end == Stop && Kills) {
// If multiple locations end at the same place, track all of them in
// Kills.
Kills->second.push_back(LII.first);
@@ -1854,16 +1850,33 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
const TargetRegisterClass *TRC = MRI.getRegClass(Reg);
unsigned SpillSize, SpillOffset;
- // Test whether this location is legal with the given subreg.
+ unsigned regSizeInBits = TRI->getRegSizeInBits(*TRC);
+ if (SubReg)
+ regSizeInBits = TRI->getSubRegIdxSize(SubReg);
+
+ // Test whether this location is legal with the given subreg. If the
+ // subregister has a nonzero offset, drop this location, it's too complex
+ // to describe. (TODO: future work).
bool Success =
TII->getStackSlotRange(TRC, SubReg, SpillSize, SpillOffset, *MF);
- if (Success) {
+ if (Success && SpillOffset == 0) {
auto Builder = BuildMI(*OrigMBB, OrigMBB->begin(), DebugLoc(),
TII->get(TargetOpcode::DBG_PHI));
Builder.addFrameIndex(VRM->getStackSlot(Reg));
Builder.addImm(InstNum);
+ // Record how large the original value is. The stack slot might be
+ // merged and altered during optimisation, but we will want to know how
+ // large the value is, at this DBG_PHI.
+ Builder.addImm(regSizeInBits);
+ }
+
+ LLVM_DEBUG(
+ if (SpillOffset != 0) {
+ dbgs() << "DBG_PHI for Vreg " << Reg << " subreg " << SubReg <<
+ " has nonzero offset\n";
}
+ );
}
// If there was no mapping for a value ID, it's optimized out. Create no
// DBG_PHI, and any variables using this value will become optimized out.
diff --git a/llvm/lib/CodeGen/LiveInterval.cpp b/llvm/lib/CodeGen/LiveInterval.cpp
index 9ded0fb6ae0a..9378aaeb181c 100644
--- a/llvm/lib/CodeGen/LiveInterval.cpp
+++ b/llvm/lib/CodeGen/LiveInterval.cpp
@@ -348,23 +348,8 @@ private:
//===----------------------------------------------------------------------===//
LiveRange::iterator LiveRange::find(SlotIndex Pos) {
- // This algorithm is basically std::upper_bound.
- // Unfortunately, std::upper_bound cannot be used with mixed types until we
- // adopt C++0x. Many libraries can do it, but not all.
- if (empty() || Pos >= endIndex())
- return end();
- iterator I = begin();
- size_t Len = size();
- do {
- size_t Mid = Len >> 1;
- if (Pos < I[Mid].end) {
- Len = Mid;
- } else {
- I += Mid + 1;
- Len -= Mid + 1;
- }
- } while (Len);
- return I;
+ return llvm::partition_point(*this,
+ [&](const Segment &X) { return X.end <= Pos; });
}
VNInfo *LiveRange::createDeadDef(SlotIndex Def, VNInfo::Allocator &VNIAlloc) {
diff --git a/llvm/lib/CodeGen/LiveIntervalCalc.cpp b/llvm/lib/CodeGen/LiveIntervalCalc.cpp
index 2756086cb8b1..3176d73b35f6 100644
--- a/llvm/lib/CodeGen/LiveIntervalCalc.cpp
+++ b/llvm/lib/CodeGen/LiveIntervalCalc.cpp
@@ -11,13 +11,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/LiveIntervalCalc.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -25,12 +21,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
#include <cassert>
-#include <iterator>
-#include <tuple>
-#include <utility>
using namespace llvm;
diff --git a/llvm/lib/CodeGen/LiveIntervalUnion.cpp b/llvm/lib/CodeGen/LiveIntervalUnion.cpp
index 50b31e1eb247..11a4ecf0bef9 100644
--- a/llvm/lib/CodeGen/LiveIntervalUnion.cpp
+++ b/llvm/lib/CodeGen/LiveIntervalUnion.cpp
@@ -26,7 +26,8 @@ using namespace llvm;
#define DEBUG_TYPE "regalloc"
// Merge a LiveInterval's segments. Guarantee no overlaps.
-void LiveIntervalUnion::unify(LiveInterval &VirtReg, const LiveRange &Range) {
+void LiveIntervalUnion::unify(const LiveInterval &VirtReg,
+ const LiveRange &Range) {
if (Range.empty())
return;
++Tag;
@@ -53,7 +54,8 @@ void LiveIntervalUnion::unify(LiveInterval &VirtReg, const LiveRange &Range) {
}
// Remove a live virtual register's segments from this union.
-void LiveIntervalUnion::extract(LiveInterval &VirtReg, const LiveRange &Range) {
+void LiveIntervalUnion::extract(const LiveInterval &VirtReg,
+ const LiveRange &Range) {
if (Range.empty())
return;
++Tag;
@@ -99,7 +101,7 @@ void LiveIntervalUnion::verify(LiveVirtRegBitSet& VisitedVRegs) {
}
#endif //!NDEBUG
-LiveInterval *LiveIntervalUnion::getOneVReg() const {
+const LiveInterval *LiveIntervalUnion::getOneVReg() const {
if (empty())
return nullptr;
for (LiveSegments::const_iterator SI = Segments.begin(); SI.valid(); ++SI) {
@@ -111,7 +113,8 @@ LiveInterval *LiveIntervalUnion::getOneVReg() const {
// Scan the vector of interfering virtual registers in this union. Assume it's
// quite small.
-bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const {
+bool LiveIntervalUnion::Query::isSeenInterference(
+ const LiveInterval *VirtReg) const {
return is_contained(InterferingVRegs, VirtReg);
}
@@ -147,14 +150,14 @@ LiveIntervalUnion::Query::collectInterferingVRegs(unsigned MaxInterferingRegs) {
}
LiveRange::const_iterator LREnd = LR->end();
- LiveInterval *RecentReg = nullptr;
+ const LiveInterval *RecentReg = nullptr;
while (LiveUnionI.valid()) {
assert(LRI != LREnd && "Reached end of LR");
// Check for overlapping interference.
while (LRI->start < LiveUnionI.stop() && LRI->end > LiveUnionI.start()) {
// This is an overlap, record the interfering register.
- LiveInterval *VReg = LiveUnionI.value();
+ const LiveInterval *VReg = LiveUnionI.value();
if (VReg != RecentReg && !isSeenInterference(VReg)) {
RecentReg = VReg;
InterferingVRegs.push_back(VReg);
diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp
index 9571afa434c1..7d825a8bf853 100644
--- a/llvm/lib/CodeGen/LiveIntervals.cpp
+++ b/llvm/lib/CodeGen/LiveIntervals.cpp
@@ -33,22 +33,20 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/Config/llvm-config.h"
-#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Statepoint.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
-#include "llvm/Support/BlockFrequency.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/CodeGen/StackMaps.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -149,7 +147,7 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
getRegUnit(i);
}
LLVM_DEBUG(dump());
- return true;
+ return false;
}
void LiveIntervals::print(raw_ostream &OS, const Module* ) const {
@@ -500,7 +498,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
// Create new live ranges with only minimal live segments per def.
LiveRange NewLR;
- createSegmentsForValues(NewLR, make_range(li->vni_begin(), li->vni_end()));
+ createSegmentsForValues(NewLR, li->vnis());
extendSegmentsToUses(NewLR, WorkList, Reg, LaneBitmask::getNone());
// Move the trimmed segments back.
@@ -604,7 +602,7 @@ void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, Register Reg) {
// Create a new live ranges with only minimal live segments per def.
LiveRange NewLR;
- createSegmentsForValues(NewLR, make_range(SR.vni_begin(), SR.vni_end()));
+ createSegmentsForValues(NewLR, SR.vnis());
extendSegmentsToUses(NewLR, WorkList, Reg, SR.LaneMask);
// Move the trimmed ranges back.
@@ -913,11 +911,11 @@ static bool hasLiveThroughUse(const MachineInstr *MI, Register Reg) {
return false;
}
-bool LiveIntervals::checkRegMaskInterference(LiveInterval &LI,
+bool LiveIntervals::checkRegMaskInterference(const LiveInterval &LI,
BitVector &UsableRegs) {
if (LI.empty())
return false;
- LiveInterval::iterator LiveI = LI.begin(), LiveE = LI.end();
+ LiveInterval::const_iterator LiveI = LI.begin(), LiveE = LI.end();
// Use a smaller arrays for local live ranges.
ArrayRef<SlotIndex> Slots;
diff --git a/llvm/lib/CodeGen/LiveRangeCalc.cpp b/llvm/lib/CodeGen/LiveRangeCalc.cpp
index 3ef28042acb0..26f6e1ede1ad 100644
--- a/llvm/lib/CodeGen/LiveRangeCalc.cpp
+++ b/llvm/lib/CodeGen/LiveRangeCalc.cpp
@@ -20,11 +20,9 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/MC/LaneBitmask.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
diff --git a/llvm/lib/CodeGen/LiveRangeEdit.cpp b/llvm/lib/CodeGen/LiveRangeEdit.cpp
index 05768140cbdf..58eb4110f153 100644
--- a/llvm/lib/CodeGen/LiveRangeEdit.cpp
+++ b/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -371,7 +371,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,
const MachineOperand &MO = MI->getOperand(i-1);
if (MO.isReg() && Register::isPhysicalRegister(MO.getReg()))
continue;
- MI->RemoveOperand(i-1);
+ MI->removeOperand(i-1);
}
LLVM_DEBUG(dbgs() << "Converted physregs to:\t" << *MI);
} else {
diff --git a/llvm/lib/CodeGen/LiveRangeShrink.cpp b/llvm/lib/CodeGen/LiveRangeShrink.cpp
index 054f4370b609..8e56985246db 100644
--- a/llvm/lib/CodeGen/LiveRangeShrink.cpp
+++ b/llvm/lib/CodeGen/LiveRangeShrink.cpp
@@ -23,7 +23,6 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
diff --git a/llvm/lib/CodeGen/LiveRegMatrix.cpp b/llvm/lib/CodeGen/LiveRegMatrix.cpp
index 4c0172a930b5..6ca7f00a7885 100644
--- a/llvm/lib/CodeGen/LiveRegMatrix.cpp
+++ b/llvm/lib/CodeGen/LiveRegMatrix.cpp
@@ -78,13 +78,13 @@ void LiveRegMatrix::releaseMemory() {
template <typename Callable>
static bool foreachUnit(const TargetRegisterInfo *TRI,
- LiveInterval &VRegInterval, MCRegister PhysReg,
+ const LiveInterval &VRegInterval, MCRegister PhysReg,
Callable Func) {
if (VRegInterval.hasSubRanges()) {
for (MCRegUnitMaskIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
unsigned Unit = (*Units).first;
LaneBitmask Mask = (*Units).second;
- for (LiveInterval::SubRange &S : VRegInterval.subranges()) {
+ for (const LiveInterval::SubRange &S : VRegInterval.subranges()) {
if ((S.LaneMask & Mask).any()) {
if (Func(Unit, S))
return true;
@@ -101,7 +101,7 @@ static bool foreachUnit(const TargetRegisterInfo *TRI,
return false;
}
-void LiveRegMatrix::assign(LiveInterval &VirtReg, MCRegister PhysReg) {
+void LiveRegMatrix::assign(const LiveInterval &VirtReg, MCRegister PhysReg) {
LLVM_DEBUG(dbgs() << "assigning " << printReg(VirtReg.reg(), TRI) << " to "
<< printReg(PhysReg, TRI) << ':');
assert(!VRM->hasPhys(VirtReg.reg()) && "Duplicate VirtReg assignment");
@@ -118,7 +118,7 @@ void LiveRegMatrix::assign(LiveInterval &VirtReg, MCRegister PhysReg) {
LLVM_DEBUG(dbgs() << '\n');
}
-void LiveRegMatrix::unassign(LiveInterval &VirtReg) {
+void LiveRegMatrix::unassign(const LiveInterval &VirtReg) {
Register PhysReg = VRM->getPhys(VirtReg.reg());
LLVM_DEBUG(dbgs() << "unassigning " << printReg(VirtReg.reg(), TRI)
<< " from " << printReg(PhysReg, TRI) << ':');
@@ -143,7 +143,7 @@ bool LiveRegMatrix::isPhysRegUsed(MCRegister PhysReg) const {
return false;
}
-bool LiveRegMatrix::checkRegMaskInterference(LiveInterval &VirtReg,
+bool LiveRegMatrix::checkRegMaskInterference(const LiveInterval &VirtReg,
MCRegister PhysReg) {
// Check if the cached information is valid.
// The same BitVector can be reused for all PhysRegs.
@@ -161,7 +161,7 @@ bool LiveRegMatrix::checkRegMaskInterference(LiveInterval &VirtReg,
return !RegMaskUsable.empty() && (!PhysReg || !RegMaskUsable.test(PhysReg));
}
-bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg,
+bool LiveRegMatrix::checkRegUnitInterference(const LiveInterval &VirtReg,
MCRegister PhysReg) {
if (VirtReg.empty())
return false;
@@ -183,7 +183,8 @@ LiveIntervalUnion::Query &LiveRegMatrix::query(const LiveRange &LR,
}
LiveRegMatrix::InterferenceKind
-LiveRegMatrix::checkInterference(LiveInterval &VirtReg, MCRegister PhysReg) {
+LiveRegMatrix::checkInterference(const LiveInterval &VirtReg,
+ MCRegister PhysReg) {
if (VirtReg.empty())
return IK_Free;
@@ -237,7 +238,7 @@ bool LiveRegMatrix::checkInterference(SlotIndex Start, SlotIndex End,
}
Register LiveRegMatrix::getOneVReg(unsigned PhysReg) const {
- LiveInterval *VRegInterval = nullptr;
+ const LiveInterval *VRegInterval = nullptr;
for (MCRegUnitIterator Unit(PhysReg, TRI); Unit.isValid(); ++Unit) {
if ((VRegInterval = Matrix[*Unit].getOneVReg()))
return VRegInterval->reg();
diff --git a/llvm/lib/CodeGen/LiveStacks.cpp b/llvm/lib/CodeGen/LiveStacks.cpp
index 8df84ebf4f06..8fc5a929d77b 100644
--- a/llvm/lib/CodeGen/LiveStacks.cpp
+++ b/llvm/lib/CodeGen/LiveStacks.cpp
@@ -13,12 +13,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/LiveStacks.h"
-#include "llvm/CodeGen/LiveIntervals.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/InitializePasses.h"
using namespace llvm;
#define DEBUG_TYPE "livestacks"
diff --git a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
index 37fd3e4853ac..5f54d7cc8472 100644
--- a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -23,7 +23,6 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -118,7 +117,7 @@ bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) {
// If the target doesn't want/need this pass, or if there are no locals
// to consider, early exit.
if (LocalObjectCount == 0 || !TRI->requiresVirtualBaseRegisters(MF))
- return true;
+ return false;
// Make sure we have enough space to store the local offsets.
LocalOffsets.resize(MFI.getObjectIndexEnd());
@@ -344,7 +343,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
MachineBasicBlock *Entry = &Fn.front();
- unsigned BaseReg = 0;
+ Register BaseReg;
int64_t BaseOffset = 0;
// Loop through the frame references and allocate for them as necessary.
@@ -414,20 +413,14 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
continue;
}
- const MachineFunction *MF = MI.getMF();
- const TargetRegisterClass *RC = TRI->getPointerRegClass(*MF);
- BaseReg = Fn.getRegInfo().createVirtualRegister(RC);
-
- LLVM_DEBUG(dbgs() << " Materializing base register"
- << " at frame local offset "
- << LocalOffset + InstrOffset);
-
// Tell the target to insert the instruction to initialize
// the base register.
// MachineBasicBlock::iterator InsertionPt = Entry->begin();
BaseReg = TRI->materializeFrameBaseRegister(Entry, FrameIdx, InstrOffset);
- LLVM_DEBUG(dbgs() << " into " << printReg(BaseReg, TRI) << '\n');
+ LLVM_DEBUG(dbgs() << " Materialized base register at frame local offset "
+ << LocalOffset + InstrOffset
+ << " into " << printReg(BaseReg, TRI) << '\n');
// The base register already includes any offset specified
// by the instruction, so account for that so it doesn't get
@@ -437,7 +430,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
++NumBaseRegisters;
UsedBaseReg = true;
}
- assert(BaseReg != 0 && "Unable to allocate virtual base register!");
+ assert(BaseReg && "Unable to allocate virtual base register!");
// Modify the instruction to use the new base register rather
// than the frame index operand.
diff --git a/llvm/lib/CodeGen/LowLevelType.cpp b/llvm/lib/CodeGen/LowLevelType.cpp
index dce64ab9f5ca..b47c96e50831 100644
--- a/llvm/lib/CodeGen/LowLevelType.cpp
+++ b/llvm/lib/CodeGen/LowLevelType.cpp
@@ -15,7 +15,6 @@
#include "llvm/ADT/APFloat.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
-#include "llvm/Support/raw_ostream.h"
using namespace llvm;
LLT llvm::getLLTForType(Type &Ty, const DataLayout &DL) {
diff --git a/llvm/lib/CodeGen/LowerEmuTLS.cpp b/llvm/lib/CodeGen/LowerEmuTLS.cpp
index a06d1d6255c7..984dc452fbfd 100644
--- a/llvm/lib/CodeGen/LowerEmuTLS.cpp
+++ b/llvm/lib/CodeGen/LowerEmuTLS.cpp
@@ -17,7 +17,6 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
diff --git a/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
index 3ec8c627f131..eea24d8e9353 100644
--- a/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
+++ b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
@@ -27,15 +27,12 @@
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include <queue>
-
using namespace llvm;
#define DEBUG_TYPE "mir-canonicalizer"
@@ -106,10 +103,7 @@ rescheduleLexographically(std::vector<MachineInstr *> instructions,
StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II});
}
- llvm::sort(StringInstrMap,
- [](const StringInstrPair &a, const StringInstrPair &b) -> bool {
- return (a.first < b.first);
- });
+ llvm::sort(StringInstrMap, llvm::less_first());
for (auto &II : StringInstrMap) {
diff --git a/llvm/lib/CodeGen/MIRFSDiscriminator.cpp b/llvm/lib/CodeGen/MIRFSDiscriminator.cpp
index bf78594e9b23..3152102410d7 100644
--- a/llvm/lib/CodeGen/MIRFSDiscriminator.cpp
+++ b/llvm/lib/CodeGen/MIRFSDiscriminator.cpp
@@ -15,12 +15,14 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h"
-#include <unordered_map>
using namespace llvm;
using namespace sampleprof;
@@ -68,6 +70,8 @@ static uint64_t getCallStackHash(const MachineBasicBlock &BB,
bool MIRAddFSDiscriminators::runOnMachineFunction(MachineFunction &MF) {
if (!EnableFSDiscriminator)
return false;
+ if (!MF.getFunction().isDebugInfoForProfiling())
+ return false;
bool Changed = false;
using LocationDiscriminator = std::tuple<StringRef, unsigned, unsigned>;
@@ -131,6 +135,7 @@ bool MIRAddFSDiscriminators::runOnMachineFunction(MachineFunction &MF) {
if (Changed) {
createFSDiscriminatorVariable(MF.getFunction().getParent());
LLVM_DEBUG(dbgs() << "Num of FS Discriminators: " << NumNewD << "\n");
+ (void) NumNewD;
}
return Changed;
diff --git a/llvm/lib/CodeGen/MIRNamerPass.cpp b/llvm/lib/CodeGen/MIRNamerPass.cpp
index 9f61dd9ef243..bc65700aba06 100644
--- a/llvm/lib/CodeGen/MIRNamerPass.cpp
+++ b/llvm/lib/CodeGen/MIRNamerPass.cpp
@@ -18,11 +18,7 @@
#include "MIRVRegNamerUtils.h"
#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/InitializePasses.h"
using namespace llvm;
diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/llvm/lib/CodeGen/MIRParser/MILexer.cpp
index 0ca820f160aa..b0daa20913f5 100644
--- a/llvm/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MILexer.cpp
@@ -15,7 +15,6 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
-#include <algorithm>
#include <cassert>
#include <cctype>
#include <string>
@@ -250,7 +249,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("dereferenceable", MIToken::kw_dereferenceable)
.Case("invariant", MIToken::kw_invariant)
.Case("align", MIToken::kw_align)
- .Case("basealign", MIToken::kw_align)
+ .Case("basealign", MIToken::kw_basealign)
.Case("addrspace", MIToken::kw_addrspace)
.Case("stack", MIToken::kw_stack)
.Case("got", MIToken::kw_got)
diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index 6477965bdc21..40ae7053ea09 100644
--- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -26,8 +26,6 @@
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/AsmParser/Parser.h"
#include "llvm/AsmParser/SlotMapping.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/MIRFormatter.h"
#include "llvm/CodeGen/MIRPrinter.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -38,6 +36,8 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterBank.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -60,7 +60,6 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/Casting.h"
@@ -69,10 +68,8 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include <algorithm>
#include <cassert>
#include <cctype>
#include <cstddef>
@@ -744,7 +741,7 @@ bool MIParser::parseBasicBlockDefinition(
MBB->setIsEHPad(IsLandingPad);
MBB->setIsInlineAsmBrIndirectTarget(IsInlineAsmBrIndirectTarget);
MBB->setIsEHFuncletEntry(IsEHFuncletEntry);
- if (SectionID.hasValue()) {
+ if (SectionID) {
MBB->setSectionID(SectionID.getValue());
MF.setBBSectionsType(BasicBlockSection::List);
}
@@ -1094,11 +1091,23 @@ bool MIParser::parse(MachineInstr *&MI) {
return true;
}
- // TODO: Check for extraneous machine operands.
MI = MF.CreateMachineInstr(MCID, DebugLocation, /*NoImplicit=*/true);
MI->setFlags(Flags);
- for (const auto &Operand : Operands)
+
+ unsigned NumExplicitOps = 0;
+ for (const auto &Operand : Operands) {
+ bool IsImplicitOp = Operand.Operand.isReg() && Operand.Operand.isImplicit();
+ if (!IsImplicitOp) {
+ if (!MCID.isVariadic() && NumExplicitOps >= MCID.getNumOperands() &&
+ !Operand.Operand.isValidExcessOperand())
+ return error(Operand.Begin, "too many operands for instruction");
+
+ ++NumExplicitOps;
+ }
+
MI->addOperand(MF, Operand.Operand);
+ }
+
if (assignRegisterTies(*MI, Operands))
return true;
if (PreInstrSymbol)
@@ -1609,7 +1618,7 @@ bool MIParser::assignRegisterTies(MachineInstr &MI,
continue;
// The parser ensures that this operand is a register use, so we just have
// to check the tied-def operand.
- unsigned DefIdx = Operands[I].TiedDefIdx.getValue();
+ unsigned DefIdx = *Operands[I].TiedDefIdx;
if (DefIdx >= E)
return error(Operands[I].Begin,
Twine("use of invalid tied-def operand index '" +
@@ -1714,6 +1723,15 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest,
RegInfo->Kind == VRegInfo::REGBANK)
return error("generic virtual registers must have a type");
}
+
+ if (Flags & RegState::Define) {
+ if (Flags & RegState::Kill)
+ return error("cannot have a killed def operand");
+ } else {
+ if (Flags & RegState::Dead)
+ return error("cannot have a dead use operand");
+ }
+
Dest = MachineOperand::CreateReg(
Reg, Flags & RegState::Define, Flags & RegState::Implicit,
Flags & RegState::Kill, Flags & RegState::Dead, Flags & RegState::Undef,
@@ -2689,19 +2707,19 @@ bool MIParser::parseCustomRegisterMaskOperand(MachineOperand &Dest) {
return true;
uint32_t *Mask = MF.allocateRegMask();
- while (true) {
- if (Token.isNot(MIToken::NamedRegister))
- return error("expected a named register");
- Register Reg;
- if (parseNamedRegister(Reg))
- return true;
- lex();
- Mask[Reg / 32] |= 1U << (Reg % 32);
+ do {
+ if (Token.isNot(MIToken::rparen)) {
+ if (Token.isNot(MIToken::NamedRegister))
+ return error("expected a named register");
+ Register Reg;
+ if (parseNamedRegister(Reg))
+ return true;
+ lex();
+ Mask[Reg / 32] |= 1U << (Reg % 32);
+ }
+
// TODO: Report an error if the same register is used more than once.
- if (Token.isNot(MIToken::comma))
- break;
- lex();
- }
+ } while (consumeIfPresent(MIToken::comma));
if (expectAndConsume(MIToken::rparen))
return true;
@@ -3269,11 +3287,21 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
MDNode *Range = nullptr;
while (consumeIfPresent(MIToken::comma)) {
switch (Token.kind()) {
- case MIToken::kw_align:
+ case MIToken::kw_align: {
// align is printed if it is different than size.
- if (parseAlignment(BaseAlignment))
+ uint64_t Alignment;
+ if (parseAlignment(Alignment))
return true;
+ if (Ptr.Offset & (Alignment - 1)) {
+ // MachineMemOperand::getAlign never returns a value greater than the
+ // alignment of offset, so this just guards against hand-written MIR
+ // that specifies a large "align" value when it should probably use
+ // "basealign" instead.
+ return error("specified alignment is more aligned than offset");
+ }
+ BaseAlignment = Alignment;
break;
+ }
case MIToken::kw_basealign:
// basealign is printed if it is different than align.
if (parseAlignment(BaseAlignment))
diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
index f144639770bc..4944cb46c5b5 100644
--- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -13,13 +13,10 @@
#include "llvm/CodeGen/MIRParser/MIRParser.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/AsmParser/Parser.h"
#include "llvm/AsmParser/SlotMapping.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/MIRParser/MIParser.h"
#include "llvm/CodeGen/MIRYamlMapping.h"
#include "llvm/CodeGen/MachineConstantPool.h"
@@ -29,7 +26,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
@@ -46,6 +43,8 @@
using namespace llvm;
namespace llvm {
+class MDNode;
+class RegisterBank;
/// This class implements the parsing of LLVM IR that's embedded inside a MIR
/// file.
@@ -459,6 +458,12 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,
MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice);
MF.setHasWinCFI(YamlMF.HasWinCFI);
+ MF.setCallsEHReturn(YamlMF.CallsEHReturn);
+ MF.setCallsUnwindInit(YamlMF.CallsUnwindInit);
+ MF.setHasEHCatchret(YamlMF.HasEHCatchret);
+ MF.setHasEHScopes(YamlMF.HasEHScopes);
+ MF.setHasEHFunclets(YamlMF.HasEHFunclets);
+
if (YamlMF.Legalized)
MF.getProperties().set(MachineFunctionProperties::Property::Legalized);
if (YamlMF.RegBankSelected)
@@ -638,7 +643,7 @@ bool MIRParserImpl::parseRegisterInfo(PerFunctionMIParsingState &PFS,
// be saved for the caller).
if (YamlMF.CalleeSavedRegisters) {
SmallVector<MCPhysReg, 16> CalleeSavedRegisters;
- for (const auto &RegSource : YamlMF.CalleeSavedRegisters.getValue()) {
+ for (const auto &RegSource : *YamlMF.CalleeSavedRegisters) {
Register Reg;
if (parseNamedRegisterReference(PFS, Reg, RegSource.Value, Error))
return error(Error, RegSource.SourceRange);
@@ -809,7 +814,7 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
Object.CalleeSavedRestored, ObjectIdx))
return true;
if (Object.LocalOffset)
- MFI.mapLocalFrameObject(ObjectIdx, Object.LocalOffset.getValue());
+ MFI.mapLocalFrameObject(ObjectIdx, *Object.LocalOffset);
if (parseStackObjectsDebugInfo(PFS, Object, ObjectIdx))
return true;
}
@@ -826,6 +831,15 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
return error(Error, YamlMFI.StackProtector.SourceRange);
MFI.setStackProtectorIndex(FI);
}
+
+ if (!YamlMFI.FunctionContext.Value.empty()) {
+ SMDiagnostic Error;
+ int FI;
+ if (parseStackObjectReference(PFS, FI, YamlMFI.FunctionContext.Value, Error))
+ return error(Error, YamlMFI.FunctionContext.SourceRange);
+ MFI.setFunctionContextIndex(FI);
+ }
+
return false;
}
@@ -909,7 +923,7 @@ bool MIRParserImpl::initializeConstantPool(PerFunctionMIParsingState &PFS,
return error(Error, YamlConstant.Value.SourceRange);
const Align PrefTypeAlign =
M.getDataLayout().getPrefTypeAlign(Value->getType());
- const Align Alignment = YamlConstant.Alignment.getValueOr(PrefTypeAlign);
+ const Align Alignment = YamlConstant.Alignment.value_or(PrefTypeAlign);
unsigned Index = ConstantPool.getConstantPoolIndex(Value, Alignment);
if (!ConstantPoolSlots.insert(std::make_pair(YamlConstant.ID.Value, Index))
.second)
@@ -1023,7 +1037,7 @@ SMDiagnostic MIRParserImpl::diagFromBlockStringDiag(const SMDiagnostic &Error,
MIRParser::MIRParser(std::unique_ptr<MIRParserImpl> Impl)
: Impl(std::move(Impl)) {}
-MIRParser::~MIRParser() {}
+MIRParser::~MIRParser() = default;
std::unique_ptr<Module>
MIRParser::parseIRModule(DataLayoutCallbackTy DataLayoutCallback) {
diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp
index dc72f83ad0e4..25823b1567f7 100644
--- a/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -13,14 +13,11 @@
#include "llvm/CodeGen/MIRPrinter.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/MIRYamlMapping.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
@@ -32,29 +29,19 @@
#include "llvm/CodeGen/MachineModuleSlotTracker.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/IRPrintingPasses.h"
-#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ModuleSlotTracker.h"
#include "llvm/IR/Value.h"
#include "llvm/MC/LaneBitmask.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCDwarf.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -63,7 +50,6 @@
#include "llvm/Support/LowLevelTypeImpl.h"
#include "llvm/Support/YAMLTraits.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetMachine.h"
#include <algorithm>
#include <cassert>
@@ -209,6 +195,12 @@ void MIRPrinter::print(const MachineFunction &MF) {
YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice();
YamlMF.HasWinCFI = MF.hasWinCFI();
+ YamlMF.CallsEHReturn = MF.callsEHReturn();
+ YamlMF.CallsUnwindInit = MF.callsUnwindInit();
+ YamlMF.HasEHCatchret = MF.hasEHCatchret();
+ YamlMF.HasEHScopes = MF.hasEHScopes();
+ YamlMF.HasEHFunclets = MF.hasEHFunclets();
+
YamlMF.Legalized = MF.getProperties().hasProperty(
MachineFunctionProperties::Property::Legalized);
YamlMF.RegBankSelected = MF.getProperties().hasProperty(
@@ -489,6 +481,12 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
.printStackObjectReference(MFI.getStackProtectorIndex());
}
+ if (MFI.hasFunctionContextIndex()) {
+ raw_string_ostream StrOS(YMF.FrameInfo.FunctionContext.Value);
+ MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping)
+ .printStackObjectReference(MFI.getFunctionContextIndex());
+ }
+
// Print the debug variable information.
for (const MachineFunction::VariableDbgInfo &DebugVar :
MF.getVariableDbgInfo()) {
@@ -693,11 +691,11 @@ void MIPrinter::print(const MachineBasicBlock &MBB) {
// Print the live in registers.
const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
- if (MRI.tracksLiveness() && !MBB.livein_empty()) {
+ if (!MBB.livein_empty()) {
const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
OS.indent(2) << "liveins: ";
bool First = true;
- for (const auto &LI : MBB.liveins()) {
+ for (const auto &LI : MBB.liveins_dbg()) {
if (!First)
OS << ", ";
First = false;
diff --git a/llvm/lib/CodeGen/MIRSampleProfile.cpp b/llvm/lib/CodeGen/MIRSampleProfile.cpp
index b742ad9823c9..a8996a586909 100644
--- a/llvm/lib/CodeGen/MIRSampleProfile.cpp
+++ b/llvm/lib/CodeGen/MIRSampleProfile.cpp
@@ -15,7 +15,15 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp b/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp
index 5862504109f0..a2abe71a6bd7 100644
--- a/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp
+++ b/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp
@@ -10,7 +10,6 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineStableHash.h"
#include "llvm/IR/Constants.h"
-#include "llvm/Support/Debug.h"
using namespace llvm;
diff --git a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
index 33782c755eb0..7daf9025d303 100644
--- a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
+++ b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
@@ -10,17 +10,19 @@
//
//===----------------------------------------------------------------------===//
+#include "AllocationOrder.h"
#include "RegAllocEvictionAdvisor.h"
#include "RegAllocGreedy.h"
-#include "RegAllocScore.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/MLModelRunner.h"
+#include "llvm/Analysis/TensorSpec.h"
+#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL) || defined(LLVM_HAVE_TF_API)
#include "llvm/Analysis/ModelUnderTrainingRunner.h"
#include "llvm/Analysis/NoInferenceModelRunner.h"
+#endif
#include "llvm/Analysis/ReleaseModeModelRunner.h"
-#include "llvm/Analysis/Utils/TFUtils.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
@@ -28,13 +30,11 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
-#include "llvm/Config/config.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/PassRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetMachine.h"
#include <array>
#include <memory>
@@ -46,10 +46,16 @@ using namespace llvm;
// Generated header in release (AOT) mode
#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL)
#include "RegallocEvictModel.h"
+using CompiledModelType = RegallocEvictModel;
+#else
+using CompiledModelType = NoopSavedModelImpl;
#endif
// Options that only make sense in development mode
#ifdef LLVM_HAVE_TF_API
+#include "RegAllocScore.h"
+#include "llvm/Analysis/Utils/TFUtils.h"
+
static cl::opt<std::string> TrainingLog(
"regalloc-training-log", cl::Hidden,
cl::desc("Training log for the register allocator eviction model"));
@@ -60,6 +66,8 @@ static cl::opt<std::string> ModelUnderTraining(
#endif // #ifdef LLVM_HAVE_TF_API
+extern cl::opt<unsigned> EvictInterferenceCutoff;
+
/// The score injection pass.
/// This pass calculates the score for a function and inserts it in the log, but
/// this happens only in development mode. It's a no-op otherwise.
@@ -240,8 +248,8 @@ using FeaturesListNormalizer = std::array<float, FeatureIDs::FeatureCount>;
/// The ML evictor (commonalities between release and development mode)
class MLEvictAdvisor : public RegAllocEvictionAdvisor {
public:
- MLEvictAdvisor(MachineFunction &MF, const RAGreedy &RA, MLModelRunner *Runner,
- const MachineBlockFrequencyInfo &MBFI,
+ MLEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA,
+ MLModelRunner *Runner, const MachineBlockFrequencyInfo &MBFI,
const MachineLoopInfo &Loops);
protected:
@@ -257,14 +265,16 @@ protected:
/// if we're just capturing the log of the default advisor, it needs to call
/// the latter instead, so we need to pass all the necessary parameters for
/// it. In the development case, it will also log.
- virtual int64_t tryFindEvictionCandidatePosition(
- LiveInterval &VirtReg, const AllocationOrder &Order, unsigned OrderLimit,
- uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const;
+ virtual int64_t
+ tryFindEvictionCandidatePosition(const LiveInterval &VirtReg,
+ const AllocationOrder &Order,
+ unsigned OrderLimit, uint8_t CostPerUseLimit,
+ const SmallVirtRegSet &FixedRegisters) const;
/// Load the features of the given VirtReg (allocated or not) at column Pos,
/// but if that can't be evicted, return false instead.
bool
- loadInterferenceFeatures(LiveInterval &VirtReg, MCRegister PhysReg,
+ loadInterferenceFeatures(const LiveInterval &VirtReg, MCRegister PhysReg,
bool IsHint, const SmallVirtRegSet &FixedRegisters,
std::array<float, FeatureIDs::FeatureCount> &Largest,
size_t Pos) const;
@@ -273,24 +283,24 @@ private:
static float getInitialQueueSize(const MachineFunction &MF);
MCRegister tryFindEvictionCandidate(
- LiveInterval &VirtReg, const AllocationOrder &Order,
+ const LiveInterval &VirtReg, const AllocationOrder &Order,
uint8_t CostPerUseLimit,
const SmallVirtRegSet &FixedRegisters) const override;
- void extractFeatures(const SmallVectorImpl<LiveInterval *> &Intervals,
+ void extractFeatures(const SmallVectorImpl<const LiveInterval *> &Intervals,
std::array<float, FeatureIDs::FeatureCount> &Largest,
size_t Pos, int64_t IsHint, int64_t LocalIntfsCount,
float NrUrgent) const;
// Point-in-time: we didn't learn this, so we always delegate to the default.
bool canEvictHintInterference(
- LiveInterval &VirtReg, MCRegister PhysReg,
+ const LiveInterval &VirtReg, MCRegister PhysReg,
const SmallVirtRegSet &FixedRegisters) const override {
return getDefaultAdvisor().canEvictHintInterference(VirtReg, PhysReg,
FixedRegisters);
}
- const LIFeatureComponents
+ const LIFeatureComponents &
getLIFeatureComponents(const LiveInterval &LI) const;
// Hold on to a default advisor for:
@@ -306,17 +316,21 @@ private:
// This could be static and shared, but its initialization is non-trivial.
std::bitset<FeatureIDs::FeatureCount> DoNotNormalize;
const float InitialQSize;
+
+ using RegID = unsigned;
+ mutable DenseMap<RegID, LIFeatureComponents> CachedFeatures;
};
+#define _DECL_FEATURES(type, name, shape, _) \
+ TensorSpec::createSpec<type>(#name, shape),
+
+static const std::vector<TensorSpec> InputFeatures{
+ {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)},
+};
+#undef _DECL_FEATURES
// ===================================
// Release (AOT) - specifics
// ===================================
-#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL)
-const std::array<std::string, FeatureIDs::FeatureCount> FeatureNames{
-#define _GETNAME(_, NAME, __, ___) #NAME,
- RA_EVICT_FEATURES_LIST(_GETNAME)
-#undef _GETNAME
-};
class ReleaseModeEvictionAdvisorAnalysis final
: public RegAllocEvictionAdvisorAnalysis {
public:
@@ -335,17 +349,16 @@ private:
}
std::unique_ptr<RegAllocEvictionAdvisor>
- getAdvisor(MachineFunction &MF, const RAGreedy &RA) override {
+ getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override {
if (!Runner)
- Runner = std::make_unique<ReleaseModeModelRunner<RegallocEvictModel>>(
- MF.getFunction().getContext(), FeatureNames, DecisionName);
+ Runner = std::make_unique<ReleaseModeModelRunner<CompiledModelType>>(
+ MF.getFunction().getContext(), InputFeatures, DecisionName);
return std::make_unique<MLEvictAdvisor>(
MF, RA, Runner.get(), getAnalysis<MachineBlockFrequencyInfo>(),
getAnalysis<MachineLoopInfo>());
}
- std::unique_ptr<ReleaseModeModelRunner<RegallocEvictModel>> Runner;
+ std::unique_ptr<ReleaseModeModelRunner<CompiledModelType>> Runner;
};
-#endif
// ===================================
// Development mode-specifics
@@ -353,13 +366,6 @@ private:
//
// Features we log
#ifdef LLVM_HAVE_TF_API
-#define _DECL_FEATURES(type, name, shape, _) \
- TensorSpec::createSpec<type>(#name, shape),
-
-static const std::vector<TensorSpec> InputFeatures{
- {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)},
-};
-#undef _DECL_FEATURES
static const TensorSpec Output =
TensorSpec::createSpec<int64_t>(DecisionName, {1});
static const TensorSpec Reward = TensorSpec::createSpec<float>("reward", {1});
@@ -380,7 +386,7 @@ static const std::vector<TensorSpec> TrainingInputFeatures{
class DevelopmentModeEvictAdvisor : public MLEvictAdvisor {
public:
- DevelopmentModeEvictAdvisor(MachineFunction &MF, const RAGreedy &RA,
+ DevelopmentModeEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA,
MLModelRunner *Runner,
const MachineBlockFrequencyInfo &MBFI,
const MachineLoopInfo &Loops, Logger *Log)
@@ -388,8 +394,8 @@ public:
private:
int64_t tryFindEvictionCandidatePosition(
- LiveInterval &VirtReg, const AllocationOrder &Order, unsigned OrderLimit,
- uint8_t CostPerUseLimit,
+ const LiveInterval &VirtReg, const AllocationOrder &Order,
+ unsigned OrderLimit, uint8_t CostPerUseLimit,
const SmallVirtRegSet &FixedRegisters) const override;
Logger *const Log;
@@ -436,7 +442,7 @@ private:
}
std::unique_ptr<RegAllocEvictionAdvisor>
- getAdvisor(MachineFunction &MF, const RAGreedy &RA) override {
+ getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override {
LLVMContext &Ctx = MF.getFunction().getContext();
if (ModelUnderTraining.empty() && TrainingLog.empty()) {
Ctx.emitError("Regalloc development mode should be requested with at "
@@ -496,7 +502,7 @@ float MLEvictAdvisor::getInitialQueueSize(const MachineFunction &MF) {
return Ret;
}
-MLEvictAdvisor::MLEvictAdvisor(MachineFunction &MF, const RAGreedy &RA,
+MLEvictAdvisor::MLEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA,
MLModelRunner *Runner,
const MachineBlockFrequencyInfo &MBFI,
const MachineLoopInfo &Loops)
@@ -514,7 +520,7 @@ MLEvictAdvisor::MLEvictAdvisor(MachineFunction &MF, const RAGreedy &RA,
}
int64_t MLEvictAdvisor::tryFindEvictionCandidatePosition(
- LiveInterval &, const AllocationOrder &, unsigned, uint8_t,
+ const LiveInterval &, const AllocationOrder &, unsigned, uint8_t,
const SmallVirtRegSet &) const {
int64_t Ret = Runner->evaluate<int64_t>();
assert(Ret >= 0);
@@ -523,7 +529,7 @@ int64_t MLEvictAdvisor::tryFindEvictionCandidatePosition(
}
bool MLEvictAdvisor::loadInterferenceFeatures(
- LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint,
+ const LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint,
const SmallVirtRegSet &FixedRegisters, FeaturesListNormalizer &Largest,
size_t Pos) const {
// It is only possible to evict virtual register interference.
@@ -539,16 +545,18 @@ bool MLEvictAdvisor::loadInterferenceFeatures(
// The cascade tracking is the same as in the default advisor
unsigned Cascade = RA.getExtraInfo().getCascadeOrCurrentNext(VirtReg.reg());
- SmallVector<LiveInterval *, MaxInterferences> InterferingIntervals;
+ SmallVector<const LiveInterval *, MaxInterferences> InterferingIntervals;
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
// Different from the default heuristic, we don't make any assumptions about
// what having more than 10 results in the query may mean.
- const auto &IFIntervals = Q.interferingVRegs();
+ const auto &IFIntervals = Q.interferingVRegs(EvictInterferenceCutoff);
if (IFIntervals.empty() && InterferingIntervals.empty())
continue;
+ if (IFIntervals.size() >= EvictInterferenceCutoff)
+ return false;
InterferingIntervals.append(IFIntervals.begin(), IFIntervals.end());
- for (LiveInterval *Intf : reverse(IFIntervals)) {
+ for (const LiveInterval *Intf : reverse(IFIntervals)) {
assert(Register::isVirtualRegister(Intf->reg()) &&
"Only expecting virtual register interference from query");
// This is the same set of legality checks as in the default case: don't
@@ -587,7 +595,7 @@ bool MLEvictAdvisor::loadInterferenceFeatures(
}
MCRegister MLEvictAdvisor::tryFindEvictionCandidate(
- LiveInterval &VirtReg, const AllocationOrder &Order,
+ const LiveInterval &VirtReg, const AllocationOrder &Order,
uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const {
auto MaybeOrderLimit = getOrderLimit(VirtReg, Order, CostPerUseLimit);
if (!MaybeOrderLimit)
@@ -652,7 +660,7 @@ MCRegister MLEvictAdvisor::tryFindEvictionCandidate(
// decision making process.
Regs[CandidateVirtRegPos].second = !MustFindEviction;
if (!MustFindEviction)
- extractFeatures(SmallVector<LiveInterval *, 1>(1, &VirtReg), Largest,
+ extractFeatures(SmallVector<const LiveInterval *, 1>(1, &VirtReg), Largest,
CandidateVirtRegPos, /*IsHint*/ 0, /*LocalIntfsCount*/ 0,
/*NrUrgent*/ 0.0);
assert(InitialQSize > 0.0 && "We couldn't have gotten here if we had "
@@ -686,9 +694,15 @@ MCRegister MLEvictAdvisor::tryFindEvictionCandidate(
return Regs[CandidatePos].first;
}
-const LIFeatureComponents
+const LIFeatureComponents &
MLEvictAdvisor::getLIFeatureComponents(const LiveInterval &LI) const {
- LIFeatureComponents Ret;
+ RegID ID = LI.reg().id();
+ LIFeatureComponents Empty;
+ auto I = CachedFeatures.insert(std::make_pair(ID, Empty));
+ LIFeatureComponents &Ret = I.first->getSecond();
+ if (!I.second)
+ return Ret;
+
SmallPtrSet<MachineInstr *, 8> Visited;
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
@@ -733,7 +747,7 @@ MLEvictAdvisor::getLIFeatureComponents(const LiveInterval &LI) const {
// Overall, this currently mimics what we do for weight calculation, but instead
// of accummulating the various features, we keep them separate.
void MLEvictAdvisor::extractFeatures(
- const SmallVectorImpl<LiveInterval *> &Intervals,
+ const SmallVectorImpl<const LiveInterval *> &Intervals,
std::array<float, FeatureIDs::FeatureCount> &Largest, size_t Pos,
int64_t IsHint, int64_t LocalIntfsCount, float NrUrgent) const {
int64_t NrDefsAndUses = 0;
@@ -769,7 +783,7 @@ void MLEvictAdvisor::extractFeatures(
if (LI.endIndex() > EndSI)
EndSI = LI.endIndex();
- const LIFeatureComponents LIFC = getLIFeatureComponents(LI);
+ const LIFeatureComponents &LIFC = getLIFeatureComponents(LI);
NrBrokenHints += VRM->hasPreferredPhys(LI.reg());
NrDefsAndUses += LIFC.NrDefsAndUses;
@@ -831,8 +845,9 @@ RegAllocEvictionAdvisorAnalysis *llvm::createDevelopmentModeAdvisor() {
}
int64_t DevelopmentModeEvictAdvisor::tryFindEvictionCandidatePosition(
- LiveInterval &VirtReg, const AllocationOrder &Order, unsigned OrderLimit,
- uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const {
+ const LiveInterval &VirtReg, const AllocationOrder &Order,
+ unsigned OrderLimit, uint8_t CostPerUseLimit,
+ const SmallVirtRegSet &FixedRegisters) const {
int64_t Ret = 0;
if (isa<ModelUnderTrainingRunner>(getRunner())) {
Ret = MLEvictAdvisor::tryFindEvictionCandidatePosition(
@@ -885,11 +900,9 @@ bool RegAllocScoring::runOnMachineFunction(MachineFunction &MF) {
}
#endif // #ifdef LLVM_HAVE_TF_API
-#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL)
RegAllocEvictionAdvisorAnalysis *llvm::createReleaseModeAdvisor() {
return new ReleaseModeEvictionAdvisorAnalysis();
}
-#endif
// In all cases except development mode, we don't need scoring.
#if !defined(LLVM_HAVE_TF_API)
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 8c9d00d08c6a..c186d0ba9969 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -11,8 +11,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -26,12 +26,10 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/ModuleSlotTracker.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/Support/DataTypes.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
@@ -53,8 +51,7 @@ MachineBasicBlock::MachineBasicBlock(MachineFunction &MF, const BasicBlock *B)
IrrLoopHeaderWeight = B->getIrrLoopHeaderWeight();
}
-MachineBasicBlock::~MachineBasicBlock() {
-}
+MachineBasicBlock::~MachineBasicBlock() = default;
/// Return the MCSymbol for this basic block.
MCSymbol *MachineBasicBlock::getSymbol() const {
@@ -135,7 +132,7 @@ void ilist_callback_traits<MachineBasicBlock>::addNodeToList(
// Make sure the instructions have their operands in the reginfo lists.
MachineRegisterInfo &RegInfo = MF.getRegInfo();
for (MachineInstr &MI : N->instrs())
- MI.AddRegOperandsToUseLists(RegInfo);
+ MI.addRegOperandsToUseLists(RegInfo);
}
void ilist_callback_traits<MachineBasicBlock>::removeNodeFromList(
@@ -153,7 +150,7 @@ void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) {
// Add the instruction's register operands to their corresponding
// use/def lists.
MachineFunction *MF = Parent->getParent();
- N->AddRegOperandsToUseLists(MF->getRegInfo());
+ N->addRegOperandsToUseLists(MF->getRegInfo());
MF->handleInsertion(*N);
}
@@ -165,7 +162,7 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) {
// Remove from the use/def lists.
if (MachineFunction *MF = N->getMF()) {
MF->handleRemoval(*N);
- N->RemoveRegOperandsFromUseLists(MF->getRegInfo());
+ N->removeRegOperandsFromUseLists(MF->getRegInfo());
}
N->setParent(nullptr);
@@ -918,6 +915,10 @@ bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const {
return std::next(I) == MachineFunction::const_iterator(MBB);
}
+const MachineBasicBlock *MachineBasicBlock::getSingleSuccessor() const {
+ return Successors.size() == 1 ? Successors[0] : nullptr;
+}
+
MachineBasicBlock *MachineBasicBlock::getFallThrough() {
MachineFunction::iterator Fallthrough = getIterator();
++Fallthrough;
@@ -1620,6 +1621,16 @@ MachineBasicBlock::liveout_iterator MachineBasicBlock::liveout_begin() const {
return liveout_iterator(*this, ExceptionPointer, ExceptionSelector, false);
}
+bool MachineBasicBlock::sizeWithoutDebugLargerThan(unsigned Limit) const {
+ unsigned Cntr = 0;
+ auto R = instructionsWithoutDebug(begin(), end());
+ for (auto I = R.begin(), E = R.end(); I != E; ++I) {
+ if (++Cntr > Limit)
+ return true;
+ }
+ return false;
+}
+
const MBBSectionID MBBSectionID::ColdSectionID(MBBSectionID::SectionType::Cold);
const MBBSectionID
MBBSectionID::ExceptionSectionID(MBBSectionID::SectionType::Exception);
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index c93ffaabf74c..4cc84f22bdde 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -34,13 +34,13 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/CodeGen/MBFIWrapper.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/TailDuplicator.h"
@@ -50,6 +50,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/PrintPasses.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Allocator.h"
@@ -200,10 +201,8 @@ static cl::opt<unsigned> TriangleChainCount(
cl::init(2),
cl::Hidden);
-static cl::opt<bool> EnableExtTspBlockPlacement(
- "enable-ext-tsp-block-placement", cl::Hidden, cl::init(false),
- cl::desc("Enable machine block placement based on the ext-tsp model, "
- "optimizing I-cache utilization."));
+extern cl::opt<bool> EnableExtTspBlockPlacement;
+extern cl::opt<bool> ApplyExtTspWithoutProfile;
namespace llvm {
extern cl::opt<unsigned> StaticLikelyProb;
@@ -3422,7 +3421,8 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
}
// Apply a post-processing optimizing block placement.
- if (MF.size() >= 3 && EnableExtTspBlockPlacement) {
+ if (MF.size() >= 3 && EnableExtTspBlockPlacement &&
+ (ApplyExtTspWithoutProfile || MF.getFunction().hasProfileData())) {
// Find a new placement and modify the layout of the blocks in the function.
applyExtTsp();
@@ -3660,6 +3660,9 @@ bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) {
if (std::next(F.begin()) == F.end())
return false;
+ if (!isFunctionInPrintList(F.getName()))
+ return false;
+
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
diff --git a/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
index c9f762f9a6e7..a84377d70855 100644
--- a/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
+++ b/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -12,10 +12,8 @@
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/llvm/lib/CodeGen/MachineCSE.cpp b/llvm/lib/CodeGen/MachineCSE.cpp
index 0fcb07252d0e..e60fd9f7883a 100644
--- a/llvm/lib/CodeGen/MachineCSE.cpp
+++ b/llvm/lib/CodeGen/MachineCSE.cpp
@@ -34,7 +34,6 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/InitializePasses.h"
-#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegister.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
@@ -91,6 +90,11 @@ namespace {
AU.addPreserved<MachineBlockFrequencyInfo>();
}
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties()
+ .set(MachineFunctionProperties::Property::IsSSA);
+ }
+
void releaseMemory() override {
ScopeMap.clear();
PREMap.clear();
diff --git a/llvm/lib/CodeGen/MachineCheckDebugify.cpp b/llvm/lib/CodeGen/MachineCheckDebugify.cpp
index bd7f0f862947..1e5b8dd0bbb0 100644
--- a/llvm/lib/CodeGen/MachineCheckDebugify.cpp
+++ b/llvm/lib/CodeGen/MachineCheckDebugify.cpp
@@ -11,13 +11,14 @@
/// DILocalVariable which mir-debugifiy generated before.
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Transforms/Utils/Debugify.h"
+#include "llvm/Pass.h"
#define DEBUG_TYPE "mir-check-debugify"
@@ -27,9 +28,6 @@ namespace {
struct CheckDebugMachineModule : public ModulePass {
bool runOnModule(Module &M) override {
- MachineModuleInfo &MMI =
- getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
-
NamedMDNode *NMD = M.getNamedMetadata("llvm.mir.debugify");
if (!NMD) {
errs() << "WARNING: Please run mir-debugify to generate "
@@ -37,6 +35,9 @@ struct CheckDebugMachineModule : public ModulePass {
return false;
}
+ MachineModuleInfo &MMI =
+ getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
+
auto getDebugifyOperand = [&](unsigned Idx) -> unsigned {
return mdconst::extract<ConstantInt>(NMD->getOperand(Idx)->getOperand(0))
->getZExtValue();
@@ -106,8 +107,7 @@ struct CheckDebugMachineModule : public ModulePass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineModuleInfoWrapperPass>();
- AU.addPreserved<MachineModuleInfoWrapperPass>();
- AU.setPreservesCFG();
+ AU.setPreservesAll();
}
static char ID; // Pass identification.
diff --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp
index 72ab9ee4f388..722a709af240 100644
--- a/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -21,7 +21,6 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/MachineTraceMetrics.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -278,6 +277,8 @@ static CombinerObjective getCombinerObjective(MachineCombinerPattern P) {
case MachineCombinerPattern::REASSOC_XA_YB:
case MachineCombinerPattern::REASSOC_XY_AMM_BMM:
case MachineCombinerPattern::REASSOC_XMM_AMM_BMM:
+ case MachineCombinerPattern::SUBADD_OP1:
+ case MachineCombinerPattern::SUBADD_OP2:
return CombinerObjective::MustReduceDepth;
case MachineCombinerPattern::REASSOC_XY_BCA:
case MachineCombinerPattern::REASSOC_XY_BAC:
diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index 57fbe4112e47..66f0eb83e57c 100644
--- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -83,8 +83,24 @@ STATISTIC(NumCopyBackwardPropagated, "Number of copy defs backward propagated");
DEBUG_COUNTER(FwdCounter, "machine-cp-fwd",
"Controls which register COPYs are forwarded");
+static cl::opt<bool> MCPUseCopyInstr("mcp-use-is-copy-instr", cl::init(false),
+ cl::Hidden);
+
namespace {
+static Optional<DestSourcePair> isCopyInstr(const MachineInstr &MI,
+ const TargetInstrInfo &TII,
+ bool UseCopyInstr) {
+ if (UseCopyInstr)
+ return TII.isCopyInstr(MI);
+
+ if (MI.isCopy())
+ return Optional<DestSourcePair>(
+ DestSourcePair{MI.getOperand(0), MI.getOperand(1)});
+
+ return None;
+}
+
class CopyTracker {
struct CopyInfo {
MachineInstr *MI;
@@ -110,7 +126,8 @@ public:
}
/// Remove register from copy maps.
- void invalidateRegister(MCRegister Reg, const TargetRegisterInfo &TRI) {
+ void invalidateRegister(MCRegister Reg, const TargetRegisterInfo &TRI,
+ const TargetInstrInfo &TII, bool UseCopyInstr) {
// Since Reg might be a subreg of some registers, only invalidate Reg is not
// enough. We have to find the COPY defines Reg or registers defined by Reg
// and invalidate all of them.
@@ -120,8 +137,13 @@ public:
auto I = Copies.find(*RUI);
if (I != Copies.end()) {
if (MachineInstr *MI = I->second.MI) {
- RegsToInvalidate.insert(MI->getOperand(0).getReg().asMCReg());
- RegsToInvalidate.insert(MI->getOperand(1).getReg().asMCReg());
+ Optional<DestSourcePair> CopyOperands =
+ isCopyInstr(*MI, TII, UseCopyInstr);
+ assert(CopyOperands && "Expect copy");
+
+ RegsToInvalidate.insert(
+ CopyOperands->Destination->getReg().asMCReg());
+ RegsToInvalidate.insert(CopyOperands->Source->getReg().asMCReg());
}
RegsToInvalidate.insert(I->second.DefRegs.begin(),
I->second.DefRegs.end());
@@ -133,7 +155,8 @@ public:
}
/// Clobber a single register, removing it from the tracker's copy maps.
- void clobberRegister(MCRegister Reg, const TargetRegisterInfo &TRI) {
+ void clobberRegister(MCRegister Reg, const TargetRegisterInfo &TRI,
+ const TargetInstrInfo &TII, bool UseCopyInstr) {
for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) {
auto I = Copies.find(*RUI);
if (I != Copies.end()) {
@@ -142,8 +165,12 @@ public:
markRegsUnavailable(I->second.DefRegs, TRI);
// When we clobber the destination of a copy, we need to clobber the
// whole register it defined.
- if (MachineInstr *MI = I->second.MI)
- markRegsUnavailable({MI->getOperand(0).getReg().asMCReg()}, TRI);
+ if (MachineInstr *MI = I->second.MI) {
+ Optional<DestSourcePair> CopyOperands =
+ isCopyInstr(*MI, TII, UseCopyInstr);
+ markRegsUnavailable({CopyOperands->Destination->getReg().asMCReg()},
+ TRI);
+ }
// Now we can erase the copy.
Copies.erase(I);
}
@@ -151,11 +178,13 @@ public:
}
/// Add this copy's registers into the tracker's copy maps.
- void trackCopy(MachineInstr *MI, const TargetRegisterInfo &TRI) {
- assert(MI->isCopy() && "Tracking non-copy?");
+ void trackCopy(MachineInstr *MI, const TargetRegisterInfo &TRI,
+ const TargetInstrInfo &TII, bool UseCopyInstr) {
+ Optional<DestSourcePair> CopyOperands = isCopyInstr(*MI, TII, UseCopyInstr);
+ assert(CopyOperands && "Tracking non-copy?");
- MCRegister Def = MI->getOperand(0).getReg().asMCReg();
- MCRegister Src = MI->getOperand(1).getReg().asMCReg();
+ MCRegister Src = CopyOperands->Source->getReg().asMCReg();
+ MCRegister Def = CopyOperands->Destination->getReg().asMCReg();
// Remember Def is defined by the copy.
for (MCRegUnitIterator RUI(Def, &TRI); RUI.isValid(); ++RUI)
@@ -198,15 +227,22 @@ public:
}
MachineInstr *findAvailBackwardCopy(MachineInstr &I, MCRegister Reg,
- const TargetRegisterInfo &TRI) {
+ const TargetRegisterInfo &TRI,
+ const TargetInstrInfo &TII,
+ bool UseCopyInstr) {
MCRegUnitIterator RUI(Reg, &TRI);
MachineInstr *AvailCopy = findCopyDefViaUnit(*RUI, TRI);
- if (!AvailCopy ||
- !TRI.isSubRegisterEq(AvailCopy->getOperand(1).getReg(), Reg))
+
+ if (!AvailCopy)
+ return nullptr;
+
+ Optional<DestSourcePair> CopyOperands =
+ isCopyInstr(*AvailCopy, TII, UseCopyInstr);
+ Register AvailSrc = CopyOperands->Source->getReg();
+ Register AvailDef = CopyOperands->Destination->getReg();
+ if (!TRI.isSubRegisterEq(AvailSrc, Reg))
return nullptr;
- Register AvailSrc = AvailCopy->getOperand(1).getReg();
- Register AvailDef = AvailCopy->getOperand(0).getReg();
for (const MachineInstr &MI :
make_range(AvailCopy->getReverseIterator(), I.getReverseIterator()))
for (const MachineOperand &MO : MI.operands())
@@ -219,20 +255,26 @@ public:
}
MachineInstr *findAvailCopy(MachineInstr &DestCopy, MCRegister Reg,
- const TargetRegisterInfo &TRI) {
+ const TargetRegisterInfo &TRI,
+ const TargetInstrInfo &TII, bool UseCopyInstr) {
// We check the first RegUnit here, since we'll only be interested in the
// copy if it copies the entire register anyway.
MCRegUnitIterator RUI(Reg, &TRI);
MachineInstr *AvailCopy =
findCopyForUnit(*RUI, TRI, /*MustBeAvailable=*/true);
- if (!AvailCopy ||
- !TRI.isSubRegisterEq(AvailCopy->getOperand(0).getReg(), Reg))
+
+ if (!AvailCopy)
+ return nullptr;
+
+ Optional<DestSourcePair> CopyOperands =
+ isCopyInstr(*AvailCopy, TII, UseCopyInstr);
+ Register AvailSrc = CopyOperands->Source->getReg();
+ Register AvailDef = CopyOperands->Destination->getReg();
+ if (!TRI.isSubRegisterEq(AvailDef, Reg))
return nullptr;
// Check that the available copy isn't clobbered by any regmasks between
// itself and the destination.
- Register AvailSrc = AvailCopy->getOperand(1).getReg();
- Register AvailDef = AvailCopy->getOperand(0).getReg();
for (const MachineInstr &MI :
make_range(AvailCopy->getIterator(), DestCopy.getIterator()))
for (const MachineOperand &MO : MI.operands())
@@ -253,10 +295,14 @@ class MachineCopyPropagation : public MachineFunctionPass {
const TargetInstrInfo *TII;
const MachineRegisterInfo *MRI;
+ // Return true if this is a copy instruction and false otherwise.
+ bool UseCopyInstr;
+
public:
static char ID; // Pass identification, replacement for typeid
- MachineCopyPropagation() : MachineFunctionPass(ID) {
+ MachineCopyPropagation(bool CopyInstr = false)
+ : MachineFunctionPass(ID), UseCopyInstr(CopyInstr || MCPUseCopyInstr) {
initializeMachineCopyPropagationPass(*PassRegistry::getPassRegistry());
}
@@ -334,9 +380,13 @@ void MachineCopyPropagation::ReadRegister(MCRegister Reg, MachineInstr &Reader,
/// isNopCopy("ecx = COPY eax", AX, CX) == true
/// isNopCopy("ecx = COPY eax", AH, CL) == false
static bool isNopCopy(const MachineInstr &PreviousCopy, MCRegister Src,
- MCRegister Def, const TargetRegisterInfo *TRI) {
- MCRegister PreviousSrc = PreviousCopy.getOperand(1).getReg().asMCReg();
- MCRegister PreviousDef = PreviousCopy.getOperand(0).getReg().asMCReg();
+ MCRegister Def, const TargetRegisterInfo *TRI,
+ const TargetInstrInfo *TII, bool UseCopyInstr) {
+
+ Optional<DestSourcePair> CopyOperands =
+ isCopyInstr(PreviousCopy, *TII, UseCopyInstr);
+ MCRegister PreviousSrc = CopyOperands->Source->getReg().asMCReg();
+ MCRegister PreviousDef = CopyOperands->Destination->getReg().asMCReg();
if (Src == PreviousSrc && Def == PreviousDef)
return true;
if (!TRI->isSubRegister(PreviousSrc, Src))
@@ -356,22 +406,26 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy,
return false;
// Search for an existing copy.
- MachineInstr *PrevCopy = Tracker.findAvailCopy(Copy, Def, *TRI);
+ MachineInstr *PrevCopy =
+ Tracker.findAvailCopy(Copy, Def, *TRI, *TII, UseCopyInstr);
if (!PrevCopy)
return false;
+ auto PrevCopyOperands = isCopyInstr(*PrevCopy, *TII, UseCopyInstr);
// Check that the existing copy uses the correct sub registers.
- if (PrevCopy->getOperand(0).isDead())
+ if (PrevCopyOperands->Destination->isDead())
return false;
- if (!isNopCopy(*PrevCopy, Src, Def, TRI))
+ if (!isNopCopy(*PrevCopy, Src, Def, TRI, TII, UseCopyInstr))
return false;
LLVM_DEBUG(dbgs() << "MCP: copy is a NOP, removing: "; Copy.dump());
// Copy was redundantly redefining either Src or Def. Remove earlier kill
// flags between Copy and PrevCopy because the value will be reused now.
- assert(Copy.isCopy());
- Register CopyDef = Copy.getOperand(0).getReg();
+ Optional<DestSourcePair> CopyOperands = isCopyInstr(Copy, *TII, UseCopyInstr);
+ assert(CopyOperands);
+
+ Register CopyDef = CopyOperands->Destination->getReg();
assert(CopyDef == Src || CopyDef == Def);
for (MachineInstr &MI :
make_range(PrevCopy->getIterator(), Copy.getIterator()))
@@ -385,7 +439,9 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy,
bool MachineCopyPropagation::isBackwardPropagatableRegClassCopy(
const MachineInstr &Copy, const MachineInstr &UseI, unsigned UseIdx) {
- Register Def = Copy.getOperand(0).getReg();
+
+ Optional<DestSourcePair> CopyOperands = isCopyInstr(Copy, *TII, UseCopyInstr);
+ Register Def = CopyOperands->Destination->getReg();
if (const TargetRegisterClass *URC =
UseI.getRegClassConstraint(UseIdx, TII, TRI))
@@ -403,7 +459,8 @@ bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy,
const MachineInstr &UseI,
unsigned UseIdx) {
- Register CopySrcReg = Copy.getOperand(1).getReg();
+ Optional<DestSourcePair> CopyOperands = isCopyInstr(Copy, *TII, UseCopyInstr);
+ Register CopySrcReg = CopyOperands->Source->getReg();
// If the new register meets the opcode register constraints, then allow
// forwarding.
@@ -411,34 +468,10 @@ bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy,
UseI.getRegClassConstraint(UseIdx, TII, TRI))
return URC->contains(CopySrcReg);
- if (!UseI.isCopy())
+ auto UseICopyOperands = isCopyInstr(UseI, *TII, UseCopyInstr);
+ if (!UseICopyOperands)
return false;
- const TargetRegisterClass *CopySrcRC =
- TRI->getMinimalPhysRegClass(CopySrcReg);
- const TargetRegisterClass *UseDstRC =
- TRI->getMinimalPhysRegClass(UseI.getOperand(0).getReg());
- const TargetRegisterClass *CrossCopyRC = TRI->getCrossCopyRegClass(CopySrcRC);
-
- // If cross copy register class is not the same as copy source register class
- // then it is not possible to copy the register directly and requires a cross
- // register class copy. Fowarding this copy without checking register class of
- // UseDst may create additional cross register copies when expanding the copy
- // instruction in later passes.
- if (CopySrcRC != CrossCopyRC) {
- const TargetRegisterClass *CopyDstRC =
- TRI->getMinimalPhysRegClass(Copy.getOperand(0).getReg());
-
- // Check if UseDstRC matches the necessary register class to copy from
- // CopySrc's register class. If so then forwarding the copy will not
- // introduce any cross-class copys. Else if CopyDstRC matches then keep the
- // copy and do not forward. If neither UseDstRC or CopyDstRC matches then
- // we may need a cross register copy later but we do not worry about it
- // here.
- if (UseDstRC != CrossCopyRC && CopyDstRC == CrossCopyRC)
- return false;
- }
-
/// COPYs don't have register class constraints, so if the user instruction
/// is a COPY, we just try to avoid introducing additional cross-class
/// COPYs. For example:
@@ -455,12 +488,34 @@ bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy,
///
/// so we have reduced the number of cross-class COPYs and potentially
/// introduced a nop COPY that can be removed.
- const TargetRegisterClass *SuperRC = UseDstRC;
- for (TargetRegisterClass::sc_iterator SuperRCI = UseDstRC->getSuperClasses();
- SuperRC; SuperRC = *SuperRCI++)
- if (SuperRC->contains(CopySrcReg))
- return true;
+ // Allow forwarding if src and dst belong to any common class, so long as they
+ // don't belong to any (possibly smaller) common class that requires copies to
+ // go via a different class.
+ Register UseDstReg = UseICopyOperands->Destination->getReg();
+ bool Found = false;
+ bool IsCrossClass = false;
+ for (const TargetRegisterClass *RC : TRI->regclasses()) {
+ if (RC->contains(CopySrcReg) && RC->contains(UseDstReg)) {
+ Found = true;
+ if (TRI->getCrossCopyRegClass(RC) != RC) {
+ IsCrossClass = true;
+ break;
+ }
+ }
+ }
+ if (!Found)
+ return false;
+ if (!IsCrossClass)
+ return true;
+ // The forwarded copy would be cross-class. Only do this if the original copy
+ // was also cross-class.
+ Register CopyDstReg = CopyOperands->Destination->getReg();
+ for (const TargetRegisterClass *RC : TRI->regclasses()) {
+ if (RC->contains(CopySrcReg) && RC->contains(CopyDstReg) &&
+ TRI->getCrossCopyRegClass(RC) != RC)
+ return true;
+ }
return false;
}
@@ -527,13 +582,15 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
if (!MOUse.isRenamable())
continue;
- MachineInstr *Copy =
- Tracker.findAvailCopy(MI, MOUse.getReg().asMCReg(), *TRI);
+ MachineInstr *Copy = Tracker.findAvailCopy(MI, MOUse.getReg().asMCReg(),
+ *TRI, *TII, UseCopyInstr);
if (!Copy)
continue;
- Register CopyDstReg = Copy->getOperand(0).getReg();
- const MachineOperand &CopySrc = Copy->getOperand(1);
+ Optional<DestSourcePair> CopyOperands =
+ isCopyInstr(*Copy, *TII, UseCopyInstr);
+ Register CopyDstReg = CopyOperands->Destination->getReg();
+ const MachineOperand &CopySrc = *CopyOperands->Source;
Register CopySrcReg = CopySrc.getReg();
// FIXME: Don't handle partial uses of wider COPYs yet.
@@ -557,7 +614,8 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
// Check that the instruction is not a copy that partially overwrites the
// original copy source that we are about to use. The tracker mechanism
// cannot cope with that.
- if (MI.isCopy() && MI.modifiesRegister(CopySrcReg, TRI) &&
+ if (isCopyInstr(MI, *TII, UseCopyInstr) &&
+ MI.modifiesRegister(CopySrcReg, TRI) &&
!MI.definesRegister(CopySrcReg)) {
LLVM_DEBUG(dbgs() << "MCP: Copy source overlap with dest in " << MI);
continue;
@@ -596,76 +654,82 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
// Analyze copies (which don't overlap themselves).
- if (MI.isCopy() && !TRI->regsOverlap(MI.getOperand(0).getReg(),
- MI.getOperand(1).getReg())) {
- assert(MI.getOperand(0).getReg().isPhysical() &&
- MI.getOperand(1).getReg().isPhysical() &&
- "MachineCopyPropagation should be run after register allocation!");
-
- MCRegister Def = MI.getOperand(0).getReg().asMCReg();
- MCRegister Src = MI.getOperand(1).getReg().asMCReg();
-
- // The two copies cancel out and the source of the first copy
- // hasn't been overridden, eliminate the second one. e.g.
- // %ecx = COPY %eax
- // ... nothing clobbered eax.
- // %eax = COPY %ecx
- // =>
- // %ecx = COPY %eax
- //
- // or
- //
- // %ecx = COPY %eax
- // ... nothing clobbered eax.
- // %ecx = COPY %eax
- // =>
- // %ecx = COPY %eax
- if (eraseIfRedundant(MI, Def, Src) || eraseIfRedundant(MI, Src, Def))
- continue;
+ Optional<DestSourcePair> CopyOperands = isCopyInstr(MI, *TII, UseCopyInstr);
+ if (CopyOperands) {
+
+ Register RegSrc = CopyOperands->Source->getReg();
+ Register RegDef = CopyOperands->Destination->getReg();
+
+ if (!TRI->regsOverlap(RegDef, RegSrc)) {
+ assert(RegDef.isPhysical() && RegSrc.isPhysical() &&
+ "MachineCopyPropagation should be run after register allocation!");
+
+ MCRegister Def = RegDef.asMCReg();
+ MCRegister Src = RegSrc.asMCReg();
+
+ // The two copies cancel out and the source of the first copy
+ // hasn't been overridden, eliminate the second one. e.g.
+ // %ecx = COPY %eax
+ // ... nothing clobbered eax.
+ // %eax = COPY %ecx
+ // =>
+ // %ecx = COPY %eax
+ //
+ // or
+ //
+ // %ecx = COPY %eax
+ // ... nothing clobbered eax.
+ // %ecx = COPY %eax
+ // =>
+ // %ecx = COPY %eax
+ if (eraseIfRedundant(MI, Def, Src) || eraseIfRedundant(MI, Src, Def))
+ continue;
- forwardUses(MI);
+ forwardUses(MI);
+
+ // Src may have been changed by forwardUses()
+ CopyOperands = isCopyInstr(MI, *TII, UseCopyInstr);
+ Src = CopyOperands->Source->getReg().asMCReg();
+
+ // If Src is defined by a previous copy, the previous copy cannot be
+ // eliminated.
+ ReadRegister(Src, MI, RegularUse);
+ for (const MachineOperand &MO : MI.implicit_operands()) {
+ if (!MO.isReg() || !MO.readsReg())
+ continue;
+ MCRegister Reg = MO.getReg().asMCReg();
+ if (!Reg)
+ continue;
+ ReadRegister(Reg, MI, RegularUse);
+ }
- // Src may have been changed by forwardUses()
- Src = MI.getOperand(1).getReg().asMCReg();
+ LLVM_DEBUG(dbgs() << "MCP: Copy is a deletion candidate: "; MI.dump());
+
+ // Copy is now a candidate for deletion.
+ if (!MRI->isReserved(Def))
+ MaybeDeadCopies.insert(&MI);
+
+ // If 'Def' is previously source of another copy, then this earlier copy's
+ // source is no longer available. e.g.
+ // %xmm9 = copy %xmm2
+ // ...
+ // %xmm2 = copy %xmm0
+ // ...
+ // %xmm2 = copy %xmm9
+ Tracker.clobberRegister(Def, *TRI, *TII, UseCopyInstr);
+ for (const MachineOperand &MO : MI.implicit_operands()) {
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ MCRegister Reg = MO.getReg().asMCReg();
+ if (!Reg)
+ continue;
+ Tracker.clobberRegister(Reg, *TRI, *TII, UseCopyInstr);
+ }
- // If Src is defined by a previous copy, the previous copy cannot be
- // eliminated.
- ReadRegister(Src, MI, RegularUse);
- for (const MachineOperand &MO : MI.implicit_operands()) {
- if (!MO.isReg() || !MO.readsReg())
- continue;
- MCRegister Reg = MO.getReg().asMCReg();
- if (!Reg)
- continue;
- ReadRegister(Reg, MI, RegularUse);
- }
+ Tracker.trackCopy(&MI, *TRI, *TII, UseCopyInstr);
- LLVM_DEBUG(dbgs() << "MCP: Copy is a deletion candidate: "; MI.dump());
-
- // Copy is now a candidate for deletion.
- if (!MRI->isReserved(Def))
- MaybeDeadCopies.insert(&MI);
-
- // If 'Def' is previously source of another copy, then this earlier copy's
- // source is no longer available. e.g.
- // %xmm9 = copy %xmm2
- // ...
- // %xmm2 = copy %xmm0
- // ...
- // %xmm2 = copy %xmm9
- Tracker.clobberRegister(Def, *TRI);
- for (const MachineOperand &MO : MI.implicit_operands()) {
- if (!MO.isReg() || !MO.isDef())
- continue;
- MCRegister Reg = MO.getReg().asMCReg();
- if (!Reg)
- continue;
- Tracker.clobberRegister(Reg, *TRI);
+ continue;
}
-
- Tracker.trackCopy(&MI, *TRI);
-
- continue;
}
// Clobber any earlyclobber regs first.
@@ -677,7 +741,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
// later.
if (MO.isTied())
ReadRegister(Reg, MI, RegularUse);
- Tracker.clobberRegister(Reg, *TRI);
+ Tracker.clobberRegister(Reg, *TRI, *TII, UseCopyInstr);
}
forwardUses(MI);
@@ -713,7 +777,9 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
MaybeDeadCopies.begin();
DI != MaybeDeadCopies.end();) {
MachineInstr *MaybeDead = *DI;
- MCRegister Reg = MaybeDead->getOperand(0).getReg().asMCReg();
+ Optional<DestSourcePair> CopyOperands =
+ isCopyInstr(*MaybeDead, *TII, UseCopyInstr);
+ MCRegister Reg = CopyOperands->Destination->getReg().asMCReg();
assert(!MRI->isReserved(Reg));
if (!RegMask->clobbersPhysReg(Reg)) {
@@ -726,7 +792,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
// Make sure we invalidate any entries in the copy maps before erasing
// the instruction.
- Tracker.clobberRegister(Reg, *TRI);
+ Tracker.clobberRegister(Reg, *TRI, *TII, UseCopyInstr);
// erase() will return the next valid iterator pointing to the next
// element after the erased one.
@@ -739,7 +805,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
// Any previous copy definition or reading the Defs is no longer available.
for (MCRegister Reg : Defs)
- Tracker.clobberRegister(Reg, *TRI);
+ Tracker.clobberRegister(Reg, *TRI, *TII, UseCopyInstr);
}
// If MBB doesn't have successors, delete the copies whose defs are not used.
@@ -749,12 +815,16 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
for (MachineInstr *MaybeDead : MaybeDeadCopies) {
LLVM_DEBUG(dbgs() << "MCP: Removing copy due to no live-out succ: ";
MaybeDead->dump());
- assert(!MRI->isReserved(MaybeDead->getOperand(0).getReg()));
+
+ Optional<DestSourcePair> CopyOperands =
+ isCopyInstr(*MaybeDead, *TII, UseCopyInstr);
+ assert(CopyOperands);
+
+ Register SrcReg = CopyOperands->Source->getReg();
+ Register DestReg = CopyOperands->Destination->getReg();
+ assert(!MRI->isReserved(DestReg));
// Update matching debug values, if any.
- assert(MaybeDead->isCopy());
- Register SrcReg = MaybeDead->getOperand(1).getReg();
- Register DestReg = MaybeDead->getOperand(0).getReg();
SmallVector<MachineInstr *> MaybeDeadDbgUsers(
CopyDbgUsers[MaybeDead].begin(), CopyDbgUsers[MaybeDead].end());
MRI->updateDbgUsersToReg(DestReg.asMCReg(), SrcReg.asMCReg(),
@@ -772,10 +842,14 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
}
static bool isBackwardPropagatableCopy(MachineInstr &MI,
- const MachineRegisterInfo &MRI) {
- assert(MI.isCopy() && "MI is expected to be a COPY");
- Register Def = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
+ const MachineRegisterInfo &MRI,
+ const TargetInstrInfo &TII,
+ bool UseCopyInstr) {
+ Optional<DestSourcePair> CopyOperands = isCopyInstr(MI, TII, UseCopyInstr);
+ assert(CopyOperands && "MI is expected to be a COPY");
+
+ Register Def = CopyOperands->Destination->getReg();
+ Register Src = CopyOperands->Source->getReg();
if (!Def || !Src)
return false;
@@ -783,7 +857,7 @@ static bool isBackwardPropagatableCopy(MachineInstr &MI,
if (MRI.isReserved(Def) || MRI.isReserved(Src))
return false;
- return MI.getOperand(1).isRenamable() && MI.getOperand(1).isKill();
+ return CopyOperands->Source->isRenamable() && CopyOperands->Source->isKill();
}
void MachineCopyPropagation::propagateDefs(MachineInstr &MI) {
@@ -808,13 +882,15 @@ void MachineCopyPropagation::propagateDefs(MachineInstr &MI) {
if (!MODef.isRenamable())
continue;
- MachineInstr *Copy =
- Tracker.findAvailBackwardCopy(MI, MODef.getReg().asMCReg(), *TRI);
+ MachineInstr *Copy = Tracker.findAvailBackwardCopy(
+ MI, MODef.getReg().asMCReg(), *TRI, *TII, UseCopyInstr);
if (!Copy)
continue;
- Register Def = Copy->getOperand(0).getReg();
- Register Src = Copy->getOperand(1).getReg();
+ Optional<DestSourcePair> CopyOperands =
+ isCopyInstr(*Copy, *TII, UseCopyInstr);
+ Register Def = CopyOperands->Destination->getReg();
+ Register Src = CopyOperands->Source->getReg();
if (MODef.getReg() != Src)
continue;
@@ -833,7 +909,7 @@ void MachineCopyPropagation::propagateDefs(MachineInstr &MI) {
<< MI << " from " << *Copy);
MODef.setReg(Def);
- MODef.setIsRenamable(Copy->getOperand(0).isRenamable());
+ MODef.setIsRenamable(CopyOperands->Destination->isRenamable());
LLVM_DEBUG(dbgs() << "MCP: After replacement: " << MI << "\n");
MaybeDeadCopies.insert(Copy);
@@ -849,20 +925,23 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
for (MachineInstr &MI : llvm::make_early_inc_range(llvm::reverse(MBB))) {
// Ignore non-trivial COPYs.
- if (MI.isCopy() && MI.getNumOperands() == 2 &&
- !TRI->regsOverlap(MI.getOperand(0).getReg(),
- MI.getOperand(1).getReg())) {
-
- MCRegister Def = MI.getOperand(0).getReg().asMCReg();
- MCRegister Src = MI.getOperand(1).getReg().asMCReg();
-
- // Unlike forward cp, we don't invoke propagateDefs here,
- // just let forward cp do COPY-to-COPY propagation.
- if (isBackwardPropagatableCopy(MI, *MRI)) {
- Tracker.invalidateRegister(Src, *TRI);
- Tracker.invalidateRegister(Def, *TRI);
- Tracker.trackCopy(&MI, *TRI);
- continue;
+ Optional<DestSourcePair> CopyOperands = isCopyInstr(MI, *TII, UseCopyInstr);
+ if (CopyOperands && MI.getNumOperands() == 2) {
+ Register DefReg = CopyOperands->Destination->getReg();
+ Register SrcReg = CopyOperands->Source->getReg();
+
+ if (!TRI->regsOverlap(DefReg, SrcReg)) {
+ MCRegister Def = DefReg.asMCReg();
+ MCRegister Src = SrcReg.asMCReg();
+
+ // Unlike forward cp, we don't invoke propagateDefs here,
+ // just let forward cp do COPY-to-COPY propagation.
+ if (isBackwardPropagatableCopy(MI, *MRI, *TII, UseCopyInstr)) {
+ Tracker.invalidateRegister(Src, *TRI, *TII, UseCopyInstr);
+ Tracker.invalidateRegister(Def, *TRI, *TII, UseCopyInstr);
+ Tracker.trackCopy(&MI, *TRI, *TII, UseCopyInstr);
+ continue;
+ }
}
}
@@ -872,7 +951,7 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
MCRegister Reg = MO.getReg().asMCReg();
if (!Reg)
continue;
- Tracker.invalidateRegister(Reg, *TRI);
+ Tracker.invalidateRegister(Reg, *TRI, *TII, UseCopyInstr);
}
propagateDefs(MI);
@@ -884,7 +963,8 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
continue;
if (MO.isDef())
- Tracker.invalidateRegister(MO.getReg().asMCReg(), *TRI);
+ Tracker.invalidateRegister(MO.getReg().asMCReg(), *TRI, *TII,
+ UseCopyInstr);
if (MO.readsReg()) {
if (MO.isDebug()) {
@@ -898,7 +978,8 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
}
}
} else {
- Tracker.invalidateRegister(MO.getReg().asMCReg(), *TRI);
+ Tracker.invalidateRegister(MO.getReg().asMCReg(), *TRI, *TII,
+ UseCopyInstr);
}
}
}
@@ -906,8 +987,10 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
for (auto *Copy : MaybeDeadCopies) {
- Register Src = Copy->getOperand(1).getReg();
- Register Def = Copy->getOperand(0).getReg();
+ Optional<DestSourcePair> CopyOperands =
+ isCopyInstr(*Copy, *TII, UseCopyInstr);
+ Register Src = CopyOperands->Source->getReg();
+ Register Def = CopyOperands->Destination->getReg();
SmallVector<MachineInstr *> MaybeDeadDbgUsers(CopyDbgUsers[Copy].begin(),
CopyDbgUsers[Copy].end());
@@ -938,3 +1021,8 @@ bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
return Changed;
}
+
+MachineFunctionPass *
+llvm::createMachineCopyPropagationPass(bool UseCopyInstr = false) {
+ return new MachineCopyPropagation(UseCopyInstr);
+}
diff --git a/llvm/lib/CodeGen/MachineCycleAnalysis.cpp b/llvm/lib/CodeGen/MachineCycleAnalysis.cpp
index 42a5e2b7af01..6871ac35b300 100644
--- a/llvm/lib/CodeGen/MachineCycleAnalysis.cpp
+++ b/llvm/lib/CodeGen/MachineCycleAnalysis.cpp
@@ -8,50 +8,15 @@
#include "llvm/CodeGen/MachineCycleAnalysis.h"
#include "llvm/ADT/GenericCycleImpl.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineSSAContext.h"
-#include "llvm/InitializePasses.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
using namespace llvm;
template class llvm::GenericCycleInfo<llvm::MachineSSAContext>;
template class llvm::GenericCycle<llvm::MachineSSAContext>;
-namespace {
-
-/// Legacy analysis pass which computes a \ref MachineCycleInfo.
-class MachineCycleInfoWrapperPass : public MachineFunctionPass {
- MachineFunction *F = nullptr;
- MachineCycleInfo CI;
-
-public:
- static char ID;
-
- MachineCycleInfoWrapperPass();
-
- MachineCycleInfo &getCycleInfo() { return CI; }
- const MachineCycleInfo &getCycleInfo() const { return CI; }
-
- bool runOnMachineFunction(MachineFunction &F) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override;
- void releaseMemory() override;
- void print(raw_ostream &OS, const Module *M = nullptr) const override;
-
- // TODO: verify analysis
-};
-
-class MachineCycleInfoPrinterPass : public MachineFunctionPass {
-public:
- static char ID;
-
- MachineCycleInfoPrinterPass();
-
- bool runOnMachineFunction(MachineFunction &F) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override;
-};
-
-} // namespace
-
char MachineCycleInfoWrapperPass::ID = 0;
MachineCycleInfoWrapperPass::MachineCycleInfoWrapperPass()
@@ -87,6 +52,16 @@ void MachineCycleInfoWrapperPass::releaseMemory() {
F = nullptr;
}
+class MachineCycleInfoPrinterPass : public MachineFunctionPass {
+public:
+ static char ID;
+
+ MachineCycleInfoPrinterPass();
+
+ bool runOnMachineFunction(MachineFunction &F) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+
char MachineCycleInfoPrinterPass::ID = 0;
MachineCycleInfoPrinterPass::MachineCycleInfoPrinterPass()
@@ -111,3 +86,62 @@ bool MachineCycleInfoPrinterPass::runOnMachineFunction(MachineFunction &F) {
CI.print(errs());
return false;
}
+
+bool llvm::isCycleInvariant(const MachineCycle *Cycle, MachineInstr &I) {
+ MachineFunction *MF = I.getParent()->getParent();
+ MachineRegisterInfo *MRI = &MF->getRegInfo();
+ const TargetSubtargetInfo &ST = MF->getSubtarget();
+ const TargetRegisterInfo *TRI = ST.getRegisterInfo();
+ const TargetInstrInfo *TII = ST.getInstrInfo();
+
+ // The instruction is cycle invariant if all of its operands are.
+ for (const MachineOperand &MO : I.operands()) {
+ if (!MO.isReg())
+ continue;
+
+ Register Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+
+ // An instruction that uses or defines a physical register can't e.g. be
+ // hoisted, so mark this as not invariant.
+ if (Register::isPhysicalRegister(Reg)) {
+ if (MO.isUse()) {
+ // If the physreg has no defs anywhere, it's just an ambient register
+ // and we can freely move its uses. Alternatively, if it's allocatable,
+ // it could get allocated to something with a def during allocation.
+ // However, if the physreg is known to always be caller saved/restored
+ // then this use is safe to hoist.
+ if (!MRI->isConstantPhysReg(Reg) &&
+ !(TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *I.getMF())) &&
+ !TII->isIgnorableUse(MO))
+ return false;
+ // Otherwise it's safe to move.
+ continue;
+ } else if (!MO.isDead()) {
+ // A def that isn't dead can't be moved.
+ return false;
+ } else if (any_of(Cycle->getEntries(),
+ [&](const MachineBasicBlock *Block) {
+ return Block->isLiveIn(Reg);
+ })) {
+ // If the reg is live into any header of the cycle we can't hoist an
+ // instruction which would clobber it.
+ return false;
+ }
+ }
+
+ if (!MO.isUse())
+ continue;
+
+ assert(MRI->getVRegDef(Reg) && "Machine instr not mapped for this vreg?!");
+
+ // If the cycle contains the definition of an operand, then the instruction
+ // isn't cycle invariant.
+ if (Cycle->contains(MRI->getVRegDef(Reg)->getParent()))
+ return false;
+ }
+
+ // If we got this far, the instruction is cycle invariant!
+ return true;
+}
diff --git a/llvm/lib/CodeGen/MachineDebugify.cpp b/llvm/lib/CodeGen/MachineDebugify.cpp
index 599a81847592..b726a032ca18 100644
--- a/llvm/lib/CodeGen/MachineDebugify.cpp
+++ b/llvm/lib/CodeGen/MachineDebugify.cpp
@@ -16,14 +16,11 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/IR/DIBuilder.h"
-#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/InitializePasses.h"
#include "llvm/Transforms/Utils/Debugify.h"
diff --git a/llvm/lib/CodeGen/MachineDominanceFrontier.cpp b/llvm/lib/CodeGen/MachineDominanceFrontier.cpp
index a39dc79baaa8..346cfedde390 100644
--- a/llvm/lib/CodeGen/MachineDominanceFrontier.cpp
+++ b/llvm/lib/CodeGen/MachineDominanceFrontier.cpp
@@ -7,10 +7,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineDominanceFrontier.h"
-#include "llvm/Analysis/DominanceFrontierImpl.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/CodeGen/MachineDominators.cpp b/llvm/lib/CodeGen/MachineDominators.cpp
index 28cff2a4f3f3..0632cde9c6f4 100644
--- a/llvm/lib/CodeGen/MachineDominators.cpp
+++ b/llvm/lib/CodeGen/MachineDominators.cpp
@@ -15,6 +15,8 @@
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp
index fd5ea5cad072..f58996ea90c6 100644
--- a/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/llvm/lib/CodeGen/MachineFunction.cpp
@@ -44,7 +44,6 @@
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
@@ -61,7 +60,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/DOTGraphTraits.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
@@ -109,6 +107,27 @@ static const char *getPropertyName(MachineFunctionProperties::Property Prop) {
llvm_unreachable("Invalid machine function property");
}
+void setUnsafeStackSize(const Function &F, MachineFrameInfo &FrameInfo) {
+ if (!F.hasFnAttribute(Attribute::SafeStack))
+ return;
+
+ auto *Existing =
+ dyn_cast_or_null<MDTuple>(F.getMetadata(LLVMContext::MD_annotation));
+
+ if (!Existing || Existing->getNumOperands() != 2)
+ return;
+
+ auto *MetadataName = "unsafe-stack-size";
+ if (auto &N = Existing->getOperand(0)) {
+ if (cast<MDString>(N.get())->getString() == MetadataName) {
+ if (auto &Op = Existing->getOperand(1)) {
+ auto Val = mdconst::extract<ConstantInt>(Op)->getZExtValue();
+ FrameInfo.setUnsafeStackSize(Val);
+ }
+ }
+ }
+}
+
// Pin the vtable to this file.
void MachineFunction::Delegate::anchor() {}
@@ -133,11 +152,11 @@ void ilist_alloc_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
MBB->getParent()->deleteMachineBasicBlock(MBB);
}
-static inline unsigned getFnStackAlignment(const TargetSubtargetInfo *STI,
+static inline Align getFnStackAlignment(const TargetSubtargetInfo *STI,
const Function &F) {
if (auto MA = F.getFnStackAlign())
- return MA->value();
- return STI->getFrameLowering()->getStackAlign().value();
+ return *MA;
+ return STI->getFrameLowering()->getStackAlign();
}
MachineFunction::MachineFunction(Function &F, const LLVMTargetMachine &Target,
@@ -177,6 +196,8 @@ void MachineFunction::init() {
/*ForcedRealign=*/CanRealignSP &&
F.hasFnAttribute(Attribute::StackAlignment));
+ setUnsafeStackSize(F, *FrameInfo);
+
if (F.hasFnAttribute(Attribute::StackAlignment))
FrameInfo->ensureMaxAlignment(*F.getFnStackAlign());
@@ -208,9 +229,7 @@ void MachineFunction::init() {
"Can't create a MachineFunction using a Module with a "
"Target-incompatible DataLayout attached\n");
- PSVManager =
- std::make_unique<PseudoSourceValueManager>(*(getSubtarget().
- getInstrInfo()));
+ PSVManager = std::make_unique<PseudoSourceValueManager>(getTarget());
}
MachineFunction::~MachineFunction() {
@@ -837,25 +856,6 @@ void MachineFunction::addCleanup(MachineBasicBlock *LandingPad) {
LP.TypeIds.push_back(0);
}
-void MachineFunction::addSEHCatchHandler(MachineBasicBlock *LandingPad,
- const Function *Filter,
- const BlockAddress *RecoverBA) {
- LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
- SEHHandler Handler;
- Handler.FilterOrFinally = Filter;
- Handler.RecoverBA = RecoverBA;
- LP.SEHHandlers.push_back(Handler);
-}
-
-void MachineFunction::addSEHCleanupHandler(MachineBasicBlock *LandingPad,
- const Function *Cleanup) {
- LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
- SEHHandler Handler;
- Handler.FilterOrFinally = Cleanup;
- Handler.RecoverBA = nullptr;
- LP.SEHHandlers.push_back(Handler);
-}
-
void MachineFunction::setCallSiteLandingPad(MCSymbol *Sym,
ArrayRef<unsigned> Sites) {
LPadToCallSiteMap[Sym].append(Sites.begin(), Sites.end());
@@ -1012,7 +1012,32 @@ void MachineFunction::substituteDebugValuesForInst(const MachineInstr &Old,
}
}
-auto MachineFunction::salvageCopySSA(MachineInstr &MI)
+auto MachineFunction::salvageCopySSA(
+ MachineInstr &MI, DenseMap<Register, DebugInstrOperandPair> &DbgPHICache)
+ -> DebugInstrOperandPair {
+ const TargetInstrInfo &TII = *getSubtarget().getInstrInfo();
+
+ // Check whether this copy-like instruction has already been salvaged into
+ // an operand pair.
+ Register Dest;
+ if (auto CopyDstSrc = TII.isCopyInstr(MI)) {
+ Dest = CopyDstSrc->Destination->getReg();
+ } else {
+ assert(MI.isSubregToReg());
+ Dest = MI.getOperand(0).getReg();
+ }
+
+ auto CacheIt = DbgPHICache.find(Dest);
+ if (CacheIt != DbgPHICache.end())
+ return CacheIt->second;
+
+ // Calculate the instruction number to use, or install a DBG_PHI.
+ auto OperandPair = salvageCopySSAImpl(MI);
+ DbgPHICache.insert({Dest, OperandPair});
+ return OperandPair;
+}
+
+auto MachineFunction::salvageCopySSAImpl(MachineInstr &MI)
-> DebugInstrOperandPair {
MachineRegisterInfo &MRI = getRegInfo();
const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
@@ -1141,26 +1166,13 @@ auto MachineFunction::salvageCopySSA(MachineInstr &MI)
MachineBasicBlock &InsertBB = *CurInst->getParent();
// We reached the start of the block before finding a defining instruction.
- // It could be from a constant register, otherwise it must be an argument.
- if (TRI.isConstantPhysReg(State.first)) {
- // We can produce a DBG_PHI that identifies the constant physreg. Doesn't
- // matter where we put it, as it's constant valued.
- assert(CurInst->isCopy());
- } else if (State.first == TRI.getFrameRegister(*this)) {
- // LLVM IR is allowed to read the framepointer by calling a
- // llvm.frameaddress.* intrinsic. We can support this by emitting a
- // DBG_PHI $fp. This isn't ideal, because it extends the behaviours /
- // position that DBG_PHIs appear at, limiting what can be done later.
- // TODO: see if there's a better way of expressing these variable
- // locations.
- ;
- } else {
- // Assert that this is the entry block, or an EH pad. If it isn't, then
- // there is some code construct we don't recognise that deals with physregs
- // across blocks.
- assert(!State.first.isVirtual());
- assert(&*InsertBB.getParent()->begin() == &InsertBB || InsertBB.isEHPad());
- }
+ // There are numerous scenarios where this can happen:
+ // * Constant physical registers,
+ // * Several intrinsics that allow LLVM-IR to read arbitary registers,
+ // * Arguments in the entry block,
+ // * Exception handling landing pads.
+ // Validating all of them is too difficult, so just insert a DBG_PHI reading
+ // the variable value at this position, rather than checking it makes sense.
// Create DBG_PHI for specified physreg.
auto Builder = BuildMI(InsertBB, InsertBB.getFirstNonPHI(), DebugLoc(),
@@ -1181,9 +1193,7 @@ void MachineFunction::finalizeDebugInstrRefs() {
MI.getOperand(1).ChangeToRegister(0, false);
};
- if (!useDebugInstrRef())
- return;
-
+ DenseMap<Register, DebugInstrOperandPair> ArgDbgPHIs;
for (auto &MBB : *this) {
for (auto &MI : MBB) {
if (!MI.isDebugRef() || !MI.getOperand(0).isReg())
@@ -1206,7 +1216,7 @@ void MachineFunction::finalizeDebugInstrRefs() {
// instruction that defines the source value, see salvageCopySSA docs
// for why this is important.
if (DefMI.isCopyLike() || TII->isCopyInstr(DefMI)) {
- auto Result = salvageCopySSA(DefMI);
+ auto Result = salvageCopySSA(DefMI, ArgDbgPHIs);
MI.getOperand(0).ChangeToImmediate(Result.first);
MI.getOperand(1).setImm(Result.second);
} else {
diff --git a/llvm/lib/CodeGen/MachineFunctionPass.cpp b/llvm/lib/CodeGen/MachineFunctionPass.cpp
index 16cde1f601f9..99494122d608 100644
--- a/llvm/lib/CodeGen/MachineFunctionPass.cpp
+++ b/llvm/lib/CodeGen/MachineFunctionPass.cpp
@@ -17,6 +17,7 @@
#include "llvm/Analysis/IVUsers.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/CodeGen/MachineFunction.h"
diff --git a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
index 0e0eb8b8e00f..81c97ba6a086 100644
--- a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
+++ b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
@@ -24,7 +24,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/BasicBlockSectionUtils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -34,7 +33,6 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
@@ -82,7 +80,7 @@ static bool isColdBlock(const MachineBasicBlock &MBB,
const MachineBlockFrequencyInfo *MBFI,
ProfileSummaryInfo *PSI) {
Optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB);
- if (!Count.hasValue())
+ if (!Count)
return true;
if (PercentileCutoff > 0) {
@@ -108,9 +106,8 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
// We don't want to proceed further for cold functions
// or functions of unknown hotness. Lukewarm functions have no prefix.
Optional<StringRef> SectionPrefix = MF.getFunction().getSectionPrefix();
- if (SectionPrefix.hasValue() &&
- (SectionPrefix.getValue().equals("unlikely") ||
- SectionPrefix.getValue().equals("unknown"))) {
+ if (SectionPrefix && (SectionPrefix.getValue().equals("unlikely") ||
+ SectionPrefix.getValue().equals("unknown"))) {
return false;
}
diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp
index 85b266afceef..31f45e194a97 100644
--- a/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/llvm/lib/CodeGen/MachineInstr.cpp
@@ -11,19 +11,14 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallBitVector.h"
-#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryLocation.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -38,42 +33,30 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
-#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InlineAsm.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ModuleSlotTracker.h"
#include "llvm/IR/Operator.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/Value.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/LowLevelTypeImpl.h"
-#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetMachine.h"
#include <algorithm>
#include <cassert>
-#include <cstddef>
#include <cstdint>
#include <cstring>
-#include <iterator>
#include <utility>
using namespace llvm;
@@ -163,19 +146,13 @@ MachineRegisterInfo *MachineInstr::getRegInfo() {
return nullptr;
}
-/// RemoveRegOperandsFromUseLists - Unlink all of the register operands in
-/// this instruction from their respective use lists. This requires that the
-/// operands already be on their use lists.
-void MachineInstr::RemoveRegOperandsFromUseLists(MachineRegisterInfo &MRI) {
+void MachineInstr::removeRegOperandsFromUseLists(MachineRegisterInfo &MRI) {
for (MachineOperand &MO : operands())
if (MO.isReg())
MRI.removeRegOperandFromUseList(&MO);
}
-/// AddRegOperandsToUseLists - Add all of the register operands in
-/// this instruction from their respective use lists. This requires that the
-/// operands not be on their use lists yet.
-void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &MRI) {
+void MachineInstr::addRegOperandsToUseLists(MachineRegisterInfo &MRI) {
for (MachineOperand &MO : operands())
if (MO.isReg())
MRI.addRegOperandToUseList(&MO);
@@ -232,16 +209,12 @@ void MachineInstr::addOperand(MachineFunction &MF, const MachineOperand &Op) {
}
}
-#ifndef NDEBUG
- bool isDebugOp = Op.getType() == MachineOperand::MO_Metadata ||
- Op.getType() == MachineOperand::MO_MCSymbol;
// OpNo now points as the desired insertion point. Unless this is a variadic
// instruction, only implicit regs are allowed beyond MCID->getNumOperands().
// RegMask operands go between the explicit and implicit operands.
- assert((isImpReg || Op.isRegMask() || MCID->isVariadic() ||
- OpNo < MCID->getNumOperands() || isDebugOp) &&
+ assert((MCID->isVariadic() || OpNo < MCID->getNumOperands() ||
+ Op.isValidExcessOperand()) &&
"Trying to add an operand to a machine instr that is already done!");
-#endif
MachineRegisterInfo *MRI = getRegInfo();
@@ -300,10 +273,7 @@ void MachineInstr::addOperand(MachineFunction &MF, const MachineOperand &Op) {
}
}
-/// RemoveOperand - Erase an operand from an instruction, leaving it with one
-/// fewer operand than it started with.
-///
-void MachineInstr::RemoveOperand(unsigned OpNo) {
+void MachineInstr::removeOperand(unsigned OpNo) {
assert(OpNo < getNumOperands() && "Invalid operand number");
untieRegOperand(OpNo);
@@ -1401,11 +1371,10 @@ bool MachineInstr::isDereferenceableInvariantLoad(AAResults *AA) const {
continue;
// A load from a constant PseudoSourceValue is invariant.
- if (const PseudoSourceValue *PSV = MMO->getPseudoValue())
+ if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) {
if (PSV->isConstant(&MFI))
continue;
-
- if (const Value *V = MMO->getValue()) {
+ } else if (const Value *V = MMO->getValue()) {
// If we have an AliasAnalysis, ask it whether the memory is constant.
if (AA &&
AA->pointsToConstantMemory(
@@ -1904,7 +1873,7 @@ bool MachineInstr::addRegisterKilled(Register IncomingReg,
unsigned OpIdx = DeadOps.back();
if (getOperand(OpIdx).isImplicit() &&
(!isInlineAsm() || findInlineAsmFlagIdx(OpIdx) < 0))
- RemoveOperand(OpIdx);
+ removeOperand(OpIdx);
else
getOperand(OpIdx).setIsKill(false);
DeadOps.pop_back();
@@ -1969,7 +1938,7 @@ bool MachineInstr::addRegisterDead(Register Reg,
unsigned OpIdx = DeadOps.back();
if (getOperand(OpIdx).isImplicit() &&
(!isInlineAsm() || findInlineAsmFlagIdx(OpIdx) < 0))
- RemoveOperand(OpIdx);
+ removeOperand(OpIdx);
else
getOperand(OpIdx).setIsDead(false);
DeadOps.pop_back();
diff --git a/llvm/lib/CodeGen/MachineInstrBundle.cpp b/llvm/lib/CodeGen/MachineInstrBundle.cpp
index 759cff179790..2f1d7b976264 100644
--- a/llvm/lib/CodeGen/MachineInstrBundle.cpp
+++ b/llvm/lib/CodeGen/MachineInstrBundle.cpp
@@ -16,7 +16,8 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
#include <utility>
using namespace llvm;
@@ -109,7 +110,7 @@ bool FinalizeMachineBundles::runOnMachineFunction(MachineFunction &MF) {
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI,
MachineBasicBlock::instr_iterator LastMI) {
for (auto MII = FirstMI; MII != LastMI; ++MII)
- if (MII->getDebugLoc().get())
+ if (MII->getDebugLoc())
return MII->getDebugLoc();
return DebugLoc();
}
diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp
index 500cf8e0b79b..00d75f8231c7 100644
--- a/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/llvm/lib/CodeGen/MachineLICM.cpp
@@ -240,7 +240,7 @@ namespace {
void ExitScopeIfDone(
MachineDomTreeNode *Node,
DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren,
- DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap);
+ const DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap);
void HoistOutOfLoop(MachineDomTreeNode *HeaderN);
@@ -696,19 +696,16 @@ void MachineLICMBase::ExitScope(MachineBasicBlock *MBB) {
/// destroy ancestors which are now done.
void MachineLICMBase::ExitScopeIfDone(MachineDomTreeNode *Node,
DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
- DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) {
+ const DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) {
if (OpenChildren[Node])
return;
- // Pop scope.
- ExitScope(Node->getBlock());
-
- // Now traverse upwards to pop ancestors whose offsprings are all done.
- while (MachineDomTreeNode *Parent = ParentMap[Node]) {
- unsigned Left = --OpenChildren[Parent];
- if (Left != 0)
+ for(;;) {
+ ExitScope(Node->getBlock());
+ // Now traverse upwards to pop ancestors whose offsprings are all done.
+ MachineDomTreeNode *Parent = ParentMap.lookup(Node);
+ if (!Parent || --OpenChildren[Parent] != 0)
break;
- ExitScope(Parent->getBlock());
Node = Parent;
}
}
@@ -999,6 +996,9 @@ bool MachineLICMBase::IsLICMCandidate(MachineInstr &I) {
if (I.isConvergent())
return false;
+ if (!TII->shouldHoist(I, CurLoop))
+ return false;
+
return true;
}
diff --git a/llvm/lib/CodeGen/MachineLoopInfo.cpp b/llvm/lib/CodeGen/MachineLoopInfo.cpp
index 9b96bc5e5e7f..5cbded4b9264 100644
--- a/llvm/lib/CodeGen/MachineLoopInfo.cpp
+++ b/llvm/lib/CodeGen/MachineLoopInfo.cpp
@@ -17,13 +17,12 @@
#include "llvm/Analysis/LoopInfoImpl.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/CodeGen/MachineLoopUtils.cpp b/llvm/lib/CodeGen/MachineLoopUtils.cpp
index fdcc8472f1c2..0e8335d4974d 100644
--- a/llvm/lib/CodeGen/MachineLoopUtils.cpp
+++ b/llvm/lib/CodeGen/MachineLoopUtils.cpp
@@ -6,7 +6,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineLoopUtils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -64,7 +63,11 @@ MachineBasicBlock *llvm::PeelSingleBlockLoop(LoopPeelDirection Direction,
if (Use.getParent()->getParent() != Loop)
Uses.push_back(&Use);
for (auto *Use : Uses) {
- MRI.constrainRegClass(R, MRI.getRegClass(Use->getReg()));
+ const TargetRegisterClass *ConstrainRegClass =
+ MRI.constrainRegClass(R, MRI.getRegClass(Use->getReg()));
+ assert(ConstrainRegClass &&
+ "Expected a valid constrained register class!");
+ (void)ConstrainRegClass;
Use->setReg(R);
}
}
@@ -90,25 +93,24 @@ MachineBasicBlock *llvm::PeelSingleBlockLoop(LoopPeelDirection Direction,
if (Remaps.count(R))
R = Remaps[R];
OrigPhi.getOperand(InitRegIdx).setReg(R);
- MI.RemoveOperand(LoopRegIdx + 1);
- MI.RemoveOperand(LoopRegIdx + 0);
+ MI.removeOperand(LoopRegIdx + 1);
+ MI.removeOperand(LoopRegIdx + 0);
} else {
// When peeling back, the initial value is the loop-carried value from
// the original loop.
Register LoopReg = OrigPhi.getOperand(LoopRegIdx).getReg();
MI.getOperand(LoopRegIdx).setReg(LoopReg);
- MI.RemoveOperand(InitRegIdx + 1);
- MI.RemoveOperand(InitRegIdx + 0);
+ MI.removeOperand(InitRegIdx + 1);
+ MI.removeOperand(InitRegIdx + 0);
}
}
DebugLoc DL;
if (Direction == LPD_Front) {
- Preheader->replaceSuccessor(Loop, NewBB);
+ Preheader->ReplaceUsesOfBlockWith(Loop, NewBB);
NewBB->addSuccessor(Loop);
Loop->replacePhiUsesWith(Preheader, NewBB);
- if (TII->removeBranch(*Preheader) > 0)
- TII->insertBranch(*Preheader, NewBB, nullptr, {}, DL);
+ Preheader->updateTerminator(Loop);
TII->removeBranch(*NewBB);
TII->insertBranch(*NewBB, Loop, nullptr, {}, DL);
} else {
diff --git a/llvm/lib/CodeGen/MachineModuleInfo.cpp b/llvm/lib/CodeGen/MachineModuleInfo.cpp
index 31d4fc7d02bf..23d55a5df9f5 100644
--- a/llvm/lib/CodeGen/MachineModuleInfo.cpp
+++ b/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -7,27 +7,18 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/DiagnosticInfo.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/Value.h"
-#include "llvm/IR/ValueHandle.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCSymbolXCOFF.h"
#include "llvm/Pass.h"
-#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
@@ -40,174 +31,24 @@
using namespace llvm;
using namespace llvm::dwarf;
+static cl::opt<bool>
+ DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden,
+ cl::desc("Disable debug info printing"));
+
// Out of line virtual method.
MachineModuleInfoImpl::~MachineModuleInfoImpl() = default;
-namespace llvm {
-
-class MMIAddrLabelMapCallbackPtr final : CallbackVH {
- MMIAddrLabelMap *Map = nullptr;
-
-public:
- MMIAddrLabelMapCallbackPtr() = default;
- MMIAddrLabelMapCallbackPtr(Value *V) : CallbackVH(V) {}
-
- void setPtr(BasicBlock *BB) {
- ValueHandleBase::operator=(BB);
- }
-
- void setMap(MMIAddrLabelMap *map) { Map = map; }
-
- void deleted() override;
- void allUsesReplacedWith(Value *V2) override;
-};
-
-class MMIAddrLabelMap {
- MCContext &Context;
- struct AddrLabelSymEntry {
- /// The symbols for the label.
- TinyPtrVector<MCSymbol *> Symbols;
-
- Function *Fn; // The containing function of the BasicBlock.
- unsigned Index; // The index in BBCallbacks for the BasicBlock.
- };
-
- DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry> AddrLabelSymbols;
-
- /// Callbacks for the BasicBlock's that we have entries for. We use this so
- /// we get notified if a block is deleted or RAUWd.
- std::vector<MMIAddrLabelMapCallbackPtr> BBCallbacks;
-
- /// This is a per-function list of symbols whose corresponding BasicBlock got
- /// deleted. These symbols need to be emitted at some point in the file, so
- /// AsmPrinter emits them after the function body.
- DenseMap<AssertingVH<Function>, std::vector<MCSymbol*>>
- DeletedAddrLabelsNeedingEmission;
-
-public:
- MMIAddrLabelMap(MCContext &context) : Context(context) {}
-
- ~MMIAddrLabelMap() {
- assert(DeletedAddrLabelsNeedingEmission.empty() &&
- "Some labels for deleted blocks never got emitted");
- }
-
- ArrayRef<MCSymbol *> getAddrLabelSymbolToEmit(BasicBlock *BB);
-
- void takeDeletedSymbolsForFunction(Function *F,
- std::vector<MCSymbol*> &Result);
-
- void UpdateForDeletedBlock(BasicBlock *BB);
- void UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New);
-};
-
-} // end namespace llvm
-
-ArrayRef<MCSymbol *> MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) {
- assert(BB->hasAddressTaken() &&
- "Shouldn't get label for block without address taken");
- AddrLabelSymEntry &Entry = AddrLabelSymbols[BB];
-
- // If we already had an entry for this block, just return it.
- if (!Entry.Symbols.empty()) {
- assert(BB->getParent() == Entry.Fn && "Parent changed");
- return Entry.Symbols;
- }
-
- // Otherwise, this is a new entry, create a new symbol for it and add an
- // entry to BBCallbacks so we can be notified if the BB is deleted or RAUWd.
- BBCallbacks.emplace_back(BB);
- BBCallbacks.back().setMap(this);
- Entry.Index = BBCallbacks.size() - 1;
- Entry.Fn = BB->getParent();
- MCSymbol *Sym = BB->hasAddressTaken() ? Context.createNamedTempSymbol()
- : Context.createTempSymbol();
- Entry.Symbols.push_back(Sym);
- return Entry.Symbols;
-}
-
-/// If we have any deleted symbols for F, return them.
-void MMIAddrLabelMap::
-takeDeletedSymbolsForFunction(Function *F, std::vector<MCSymbol*> &Result) {
- DenseMap<AssertingVH<Function>, std::vector<MCSymbol*>>::iterator I =
- DeletedAddrLabelsNeedingEmission.find(F);
-
- // If there are no entries for the function, just return.
- if (I == DeletedAddrLabelsNeedingEmission.end()) return;
-
- // Otherwise, take the list.
- std::swap(Result, I->second);
- DeletedAddrLabelsNeedingEmission.erase(I);
-}
-
-void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) {
- // If the block got deleted, there is no need for the symbol. If the symbol
- // was already emitted, we can just forget about it, otherwise we need to
- // queue it up for later emission when the function is output.
- AddrLabelSymEntry Entry = std::move(AddrLabelSymbols[BB]);
- AddrLabelSymbols.erase(BB);
- assert(!Entry.Symbols.empty() && "Didn't have a symbol, why a callback?");
- BBCallbacks[Entry.Index] = nullptr; // Clear the callback.
-
- assert((BB->getParent() == nullptr || BB->getParent() == Entry.Fn) &&
- "Block/parent mismatch");
-
- for (MCSymbol *Sym : Entry.Symbols) {
- if (Sym->isDefined())
- return;
-
- // If the block is not yet defined, we need to emit it at the end of the
- // function. Add the symbol to the DeletedAddrLabelsNeedingEmission list
- // for the containing Function. Since the block is being deleted, its
- // parent may already be removed, we have to get the function from 'Entry'.
- DeletedAddrLabelsNeedingEmission[Entry.Fn].push_back(Sym);
- }
-}
-
-void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) {
- // Get the entry for the RAUW'd block and remove it from our map.
- AddrLabelSymEntry OldEntry = std::move(AddrLabelSymbols[Old]);
- AddrLabelSymbols.erase(Old);
- assert(!OldEntry.Symbols.empty() && "Didn't have a symbol, why a callback?");
-
- AddrLabelSymEntry &NewEntry = AddrLabelSymbols[New];
-
- // If New is not address taken, just move our symbol over to it.
- if (NewEntry.Symbols.empty()) {
- BBCallbacks[OldEntry.Index].setPtr(New); // Update the callback.
- NewEntry = std::move(OldEntry); // Set New's entry.
- return;
- }
-
- BBCallbacks[OldEntry.Index] = nullptr; // Update the callback.
-
- // Otherwise, we need to add the old symbols to the new block's set.
- llvm::append_range(NewEntry.Symbols, OldEntry.Symbols);
-}
-
-void MMIAddrLabelMapCallbackPtr::deleted() {
- Map->UpdateForDeletedBlock(cast<BasicBlock>(getValPtr()));
-}
-
-void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) {
- Map->UpdateForRAUWBlock(cast<BasicBlock>(getValPtr()), cast<BasicBlock>(V2));
-}
-
void MachineModuleInfo::initialize() {
ObjFileMMI = nullptr;
CurCallSite = 0;
NextFnNum = 0;
- UsesMSVCFloatingPoint = UsesMorestackAddr = false;
- HasSplitStack = HasNosplitStack = false;
- AddrLabelSymbols = nullptr;
+ UsesMSVCFloatingPoint = false;
+ DbgInfoAvailable = false;
}
void MachineModuleInfo::finalize() {
Personalities.clear();
- delete AddrLabelSymbols;
- AddrLabelSymbols = nullptr;
-
Context.reset();
// We don't clear the ExternalContext.
@@ -219,16 +60,11 @@ MachineModuleInfo::MachineModuleInfo(MachineModuleInfo &&MMI)
: TM(std::move(MMI.TM)),
Context(MMI.TM.getTargetTriple(), MMI.TM.getMCAsmInfo(),
MMI.TM.getMCRegisterInfo(), MMI.TM.getMCSubtargetInfo(), nullptr,
- nullptr, false),
+ &MMI.TM.Options.MCOptions, false),
MachineFunctions(std::move(MMI.MachineFunctions)) {
Context.setObjectFileInfo(MMI.TM.getObjFileLowering());
ObjFileMMI = MMI.ObjFileMMI;
CurCallSite = MMI.CurCallSite;
- UsesMSVCFloatingPoint = MMI.UsesMSVCFloatingPoint;
- UsesMorestackAddr = MMI.UsesMorestackAddr;
- HasSplitStack = MMI.HasSplitStack;
- HasNosplitStack = MMI.HasNosplitStack;
- AddrLabelSymbols = MMI.AddrLabelSymbols;
ExternalContext = MMI.ExternalContext;
TheModule = MMI.TheModule;
}
@@ -236,7 +72,7 @@ MachineModuleInfo::MachineModuleInfo(MachineModuleInfo &&MMI)
MachineModuleInfo::MachineModuleInfo(const LLVMTargetMachine *TM)
: TM(*TM), Context(TM->getTargetTriple(), TM->getMCAsmInfo(),
TM->getMCRegisterInfo(), TM->getMCSubtargetInfo(),
- nullptr, nullptr, false) {
+ nullptr, &TM->Options.MCOptions, false) {
Context.setObjectFileInfo(TM->getObjFileLowering());
initialize();
}
@@ -245,7 +81,7 @@ MachineModuleInfo::MachineModuleInfo(const LLVMTargetMachine *TM,
MCContext *ExtContext)
: TM(*TM), Context(TM->getTargetTriple(), TM->getMCAsmInfo(),
TM->getMCRegisterInfo(), TM->getMCSubtargetInfo(),
- nullptr, nullptr, false),
+ nullptr, &TM->Options.MCOptions, false),
ExternalContext(ExtContext) {
Context.setObjectFileInfo(TM->getObjFileLowering());
initialize();
@@ -253,25 +89,6 @@ MachineModuleInfo::MachineModuleInfo(const LLVMTargetMachine *TM,
MachineModuleInfo::~MachineModuleInfo() { finalize(); }
-//===- Address of Block Management ----------------------------------------===//
-
-ArrayRef<MCSymbol *>
-MachineModuleInfo::getAddrLabelSymbolToEmit(const BasicBlock *BB) {
- // Lazily create AddrLabelSymbols.
- if (!AddrLabelSymbols)
- AddrLabelSymbols = new MMIAddrLabelMap(getContext());
- return AddrLabelSymbols->getAddrLabelSymbolToEmit(const_cast<BasicBlock*>(BB));
-}
-
-void MachineModuleInfo::
-takeDeletedSymbolsForFunction(const Function *F,
- std::vector<MCSymbol*> &Result) {
- // If no blocks have had their addresses taken, we're done.
- if (!AddrLabelSymbols) return;
- return AddrLabelSymbols->
- takeDeletedSymbolsForFunction(const_cast<Function*>(F), Result);
-}
-
/// \name Exception Handling
/// \{
@@ -318,6 +135,13 @@ void MachineModuleInfo::deleteMachineFunctionFor(Function &F) {
LastResult = nullptr;
}
+void MachineModuleInfo::insertFunction(const Function &F,
+ std::unique_ptr<MachineFunction> &&MF) {
+ auto I = MachineFunctions.insert(std::make_pair(&F, std::move(MF)));
+ assert(I.second && "machine function already mapped");
+ (void)I;
+}
+
namespace {
/// This pass frees the MachineFunction object associated with a Function.
@@ -409,7 +233,8 @@ bool MachineModuleInfoWrapperPass::doInitialization(Module &M) {
Ctx.diagnose(
DiagnosticInfoSrcMgr(SMD, M.getName(), IsInlineAsm, LocCookie));
});
- MMI.DbgInfoAvailable = !M.debug_compile_units().empty();
+ MMI.DbgInfoAvailable = !DisableDebugInfoPrinting &&
+ !M.debug_compile_units().empty();
return false;
}
@@ -424,6 +249,7 @@ MachineModuleInfo MachineModuleAnalysis::run(Module &M,
ModuleAnalysisManager &) {
MachineModuleInfo MMI(TM);
MMI.TheModule = &M;
- MMI.DbgInfoAvailable = !M.debug_compile_units().empty();
+ MMI.DbgInfoAvailable = !DisableDebugInfoPrinting &&
+ !M.debug_compile_units().empty();
return MMI;
}
diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp
index 680dbe54ffaf..46ad1de78c46 100644
--- a/llvm/lib/CodeGen/MachineOperand.cpp
+++ b/llvm/lib/CodeGen/MachineOperand.cpp
@@ -14,9 +14,7 @@
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/Loads.h"
-#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/CodeGen/MIRFormatter.h"
-#include "llvm/CodeGen/MIRPrinter.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
diff --git a/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
index 5347a7b0d890..631768ec986c 100644
--- a/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
+++ b/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
@@ -53,10 +53,8 @@ void MachineOptimizationRemarkEmitter::emit(
LLVMContext &Ctx = MF.getFunction().getContext();
// Only emit it if its hotness meets the threshold.
- if (OptDiag.getHotness().getValueOr(0) <
- Ctx.getDiagnosticsHotnessThreshold()) {
+ if (OptDiag.getHotness().value_or(0) < Ctx.getDiagnosticsHotnessThreshold())
return;
- }
Ctx.diagnose(OptDiag);
}
diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp
index 7783b5e0d3cc..5da68abc8f6a 100644
--- a/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -59,6 +59,8 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/Passes.h"
@@ -82,9 +84,17 @@ using namespace llvm;
using namespace ore;
using namespace outliner;
+// Statistics for outlined functions.
STATISTIC(NumOutlined, "Number of candidates outlined");
STATISTIC(FunctionsCreated, "Number of functions created");
+// Statistics for instruction mapping.
+STATISTIC(NumLegalInUnsignedVec, "Number of legal instrs in unsigned vector");
+STATISTIC(NumIllegalInUnsignedVec,
+ "Number of illegal instrs in unsigned vector");
+STATISTIC(NumInvisible, "Number of invisible instrs in unsigned vector");
+STATISTIC(UnsignedVecSize, "Size of unsigned vector");
+
// Set to true if the user wants the outliner to run on linkonceodr linkage
// functions. This is false by default because the linker can dedupe linkonceodr
// functions. Since the outliner is confined to a single module (modulo LTO),
@@ -188,6 +198,8 @@ struct InstructionMapper {
assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
"Tried to assign DenseMap tombstone or empty key to instruction.");
+ // Statistics.
+ ++NumLegalInUnsignedVec;
return MINumber;
}
@@ -215,6 +227,8 @@ struct InstructionMapper {
InstrListForMBB.push_back(It);
UnsignedVecForMBB.push_back(IllegalInstrNumber);
IllegalInstrNumber--;
+ // Statistics.
+ ++NumIllegalInUnsignedVec;
assert(LegalInstrNumber < IllegalInstrNumber &&
"Instruction mapping overflow!");
@@ -293,6 +307,7 @@ struct InstructionMapper {
case InstrType::Invisible:
// Normally this is set by mapTo(Blah)Unsigned, but we just want to
// skip this instruction. So, unset the flag here.
+ ++NumInvisible;
AddedIllegalLastTime = false;
break;
}
@@ -623,6 +638,15 @@ MachineFunction *MachineOutliner::createOutlinedFunction(
TII.mergeOutliningCandidateAttributes(*F, OF.Candidates);
+ // Set uwtable, so we generate eh_frame.
+ UWTableKind UW = std::accumulate(
+ OF.Candidates.cbegin(), OF.Candidates.cend(), UWTableKind::None,
+ [](UWTableKind K, const outliner::Candidate &C) {
+ return std::max(K, C.getMF()->getFunction().getUWTableKind());
+ });
+ if (UW != UWTableKind::None)
+ F->setUWTableKind(UW);
+
BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
IRBuilder<> Builder(EntryBB);
Builder.CreateRetVoid();
@@ -641,17 +665,20 @@ MachineFunction *MachineOutliner::createOutlinedFunction(
++I) {
if (I->isDebugInstr())
continue;
- MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
+
+ // Don't keep debug information for outlined instructions.
+ auto DL = DebugLoc();
if (I->isCFIInstruction()) {
- unsigned CFIIndex = NewMI->getOperand(0).getCFIIndex();
+ unsigned CFIIndex = I->getOperand(0).getCFIIndex();
MCCFIInstruction CFI = Instrs[CFIIndex];
- (void)MF.addFrameInst(CFI);
+ BuildMI(MBB, MBB.end(), DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(MF.addFrameInst(CFI));
+ } else {
+ MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
+ NewMI->dropMemRefs(MF);
+ NewMI->setDebugLoc(DL);
+ MBB.insert(MBB.end(), NewMI);
}
- NewMI->dropMemRefs(MF);
-
- // Don't keep debug information for outlined instructions.
- NewMI->setDebugLoc(DebugLoc());
- MBB.insert(MBB.end(), NewMI);
}
// Set normal properties for a late MachineFunction.
@@ -831,9 +858,10 @@ bool MachineOutliner::outline(Module &M,
MBB.erase(std::next(StartIt), std::next(EndIt));
// Keep track of what we removed by marking them all as -1.
- std::for_each(Mapper.UnsignedVec.begin() + C.getStartIdx(),
- Mapper.UnsignedVec.begin() + C.getEndIdx() + 1,
- [](unsigned &I) { I = static_cast<unsigned>(-1); });
+ for (unsigned &I :
+ llvm::make_range(Mapper.UnsignedVec.begin() + C.getStartIdx(),
+ Mapper.UnsignedVec.begin() + C.getEndIdx() + 1))
+ I = static_cast<unsigned>(-1);
OutlinedSomething = true;
// Statistics.
@@ -896,6 +924,9 @@ void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M,
// MBB is suitable for outlining. Map it to a list of unsigneds.
Mapper.convertToUnsignedVec(MBB, *TII);
}
+
+ // Statistics.
+ UnsignedVecSize = Mapper.UnsignedVec.size();
}
}
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index 762395542b40..8d500398f55e 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -29,6 +29,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/MachinePipeliner.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
@@ -43,6 +44,7 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/DFAPacketizer.h"
#include "llvm/CodeGen/LiveIntervals.h"
@@ -55,7 +57,6 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/MachinePipeliner.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ModuloSchedule.h"
#include "llvm/CodeGen/RegisterPressure.h"
@@ -66,7 +67,6 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Attributes.h"
-#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCInstrDesc.h"
@@ -109,7 +109,6 @@ STATISTIC(NumFailLargeMaxStage, "Pipeliner abort due to too many stages");
/// A command line option to turn software pipelining on or off.
static cl::opt<bool> EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true),
- cl::ZeroOrMore,
cl::desc("Enable Software Pipelining"));
/// A command line option to enable SWP at -Os.
@@ -147,8 +146,8 @@ static cl::opt<int> SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1));
#endif
static cl::opt<bool> SwpIgnoreRecMII("pipeliner-ignore-recmii",
- cl::ReallyHidden, cl::init(false),
- cl::ZeroOrMore, cl::desc("Ignore RecMII"));
+ cl::ReallyHidden,
+ cl::desc("Ignore RecMII"));
static cl::opt<bool> SwpShowResMask("pipeliner-show-mask", cl::Hidden,
cl::init(false));
@@ -169,10 +168,9 @@ static cl::opt<bool> ExperimentalCodeGen(
namespace llvm {
// A command line option to enable the CopyToPhi DAG mutation.
-cl::opt<bool>
- SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden,
- cl::init(true), cl::ZeroOrMore,
- cl::desc("Enable CopyToPhi DAG Mutation"));
+cl::opt<bool> SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden,
+ cl::init(true),
+ cl::desc("Enable CopyToPhi DAG Mutation"));
} // end namespace llvm
@@ -255,6 +253,7 @@ bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
<< "Failed to pipeline loop";
});
+ LI.LoopPipelinerInfo.reset();
return Changed;
}
@@ -262,6 +261,7 @@ bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
Changed = swingModuloScheduler(L);
+ LI.LoopPipelinerInfo.reset();
return Changed;
}
@@ -354,7 +354,8 @@ bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
LI.LoopInductionVar = nullptr;
LI.LoopCompare = nullptr;
- if (!TII->analyzeLoopForPipelining(L.getTopBlock())) {
+ LI.LoopPipelinerInfo = TII->analyzeLoopForPipelining(L.getTopBlock());
+ if (!LI.LoopPipelinerInfo) {
LLVM_DEBUG(dbgs() << "Unable to analyzeLoop, can NOT pipeline Loop\n");
NumFailLoop++;
ORE->emit([&]() {
@@ -419,7 +420,7 @@ bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
assert(L.getBlocks().size() == 1 && "SMS works on single blocks only.");
SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo,
- II_setByPragma);
+ II_setByPragma, LI.LoopPipelinerInfo.get());
MachineBasicBlock *MBB = L.getHeader();
// The kernel should not include any terminator instructions. These
@@ -513,7 +514,7 @@ void SwingSchedulerDAG::schedule() {
// Don't pipeline large loops.
if (SwpMaxMii != -1 && (int)MII > SwpMaxMii) {
LLVM_DEBUG(dbgs() << "MII > " << SwpMaxMii
- << ", we don't pipleline large loops\n");
+ << ", we don't pipeline large loops\n");
NumFailLargeMaxMII++;
Pass.ORE->emit([&]() {
return MachineOptimizationRemarkAnalysis(
@@ -1297,8 +1298,7 @@ bool SwingSchedulerDAG::Circuits::circuit(int V, int S, NodeSetType &NodeSets,
for (auto W : AdjK[V]) {
if (W < S)
continue;
- if (B[W].count(SV) == 0)
- B[W].insert(SV);
+ B[W].insert(SV);
}
}
Stack.pop_back();
@@ -1422,7 +1422,7 @@ void SwingSchedulerDAG::CopyToPhiMutation::apply(ScheduleDAGInstrs *DAG) {
/// We ignore the back-edge recurrence in order to avoid unbounded recursion
/// in the calculation of the ASAP, ALAP, etc functions.
static bool ignoreDependence(const SDep &D, bool isPred) {
- if (D.isArtificial())
+ if (D.isArtificial() || D.getSUnit()->isBoundaryNode())
return true;
return D.getKind() == SDep::Anti && isPred;
}
@@ -1471,6 +1471,8 @@ void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
SUnit *SU = &SUnits[I];
for (const SDep &S : SU->Succs) {
SUnit *succ = S.getSUnit();
+ if (succ->isBoundaryNode())
+ continue;
if (S.getLatency() == 0)
zeroLatencyHeight =
std::max(zeroLatencyHeight, getZeroLatencyHeight(succ) + 1);
@@ -1575,7 +1577,9 @@ static bool computePath(SUnit *Cur, SetVector<SUnit *> &Path,
return Path.contains(Cur);
bool FoundPath = false;
for (auto &SI : Cur->Succs)
- FoundPath |= computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited);
+ if (!ignoreDependence(SI, false))
+ FoundPath |=
+ computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited);
for (auto &PI : Cur->Preds)
if (PI.getKind() == SDep::Anti)
FoundPath |=
@@ -1663,7 +1667,7 @@ void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
LLVM_DEBUG(
dbgs() << "Excess register pressure: SU(" << SU->NodeNum << ") "
<< TRI->getRegPressureSetName(RPDelta.Excess.getPSet())
- << ":" << RPDelta.Excess.getUnitInc());
+ << ":" << RPDelta.Excess.getUnitInc() << "\n");
NS.setExceedPressure(SU);
break;
}
@@ -1718,7 +1722,7 @@ void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
}
/// Add the nodes that do not belong to a recurrence set into groups
-/// based upon connected componenets.
+/// based upon connected components.
void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
SetVector<SUnit *> NodesAdded;
SmallPtrSet<SUnit *, 8> Visited;
@@ -1788,7 +1792,8 @@ void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
NodesAdded.insert(SU);
for (auto &SI : SU->Succs) {
SUnit *Successor = SI.getSUnit();
- if (!SI.isArtificial() && NodesAdded.count(Successor) == 0)
+ if (!SI.isArtificial() && !Successor->isBoundaryNode() &&
+ NodesAdded.count(Successor) == 0)
addConnectedNodes(Successor, NewSet, NodesAdded);
}
for (auto &PI : SU->Preds) {
@@ -1803,8 +1808,7 @@ void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
static bool isIntersect(SmallSetVector<SUnit *, 8> &Set1, const NodeSet &Set2,
SmallSetVector<SUnit *, 8> &Result) {
Result.clear();
- for (unsigned i = 0, e = Set1.size(); i != e; ++i) {
- SUnit *SU = Set1[i];
+ for (SUnit *SU : Set1) {
if (Set2.count(SU) != 0)
Result.insert(SU);
}
@@ -2080,6 +2084,11 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
});
} while (++NI != NE && scheduleFound);
+ // If a schedule is found, ensure non-pipelined instructions are in stage 0
+ if (scheduleFound)
+ scheduleFound =
+ Schedule.normalizeNonPipelinedInstructions(this, LoopPipelinerInfo);
+
// If a schedule is found, check if it is a valid schedule too.
if (scheduleFound)
scheduleFound = Schedule.isValidSchedule(this);
@@ -2263,7 +2272,7 @@ MachineInstr *SwingSchedulerDAG::findDefInLoop(Register Reg) {
bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
bool isSucc) {
if ((Dep.getKind() != SDep::Order && Dep.getKind() != SDep::Output) ||
- Dep.isArtificial())
+ Dep.isArtificial() || Dep.getSUnit()->isBoundaryNode())
return false;
if (!SwpPruneLoopCarried)
@@ -2430,7 +2439,7 @@ int SMSchedule::latestCycleInChain(const SDep &Dep) {
while (!Worklist.empty()) {
const SDep &Cur = Worklist.pop_back_val();
SUnit *SuccSU = Cur.getSUnit();
- if (Visited.count(SuccSU))
+ if (Visited.count(SuccSU) || SuccSU->isBoundaryNode())
continue;
std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
if (it == InstrToCycle.end())
@@ -2697,21 +2706,91 @@ bool SMSchedule::isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD,
return false;
}
+/// Determine transitive dependences of unpipelineable instructions
+SmallSet<SUnit *, 8> SMSchedule::computeUnpipelineableNodes(
+ SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI) {
+ SmallSet<SUnit *, 8> DoNotPipeline;
+ SmallVector<SUnit *, 8> Worklist;
+
+ for (auto &SU : SSD->SUnits)
+ if (SU.isInstr() && PLI->shouldIgnoreForPipelining(SU.getInstr()))
+ Worklist.push_back(&SU);
+
+ while (!Worklist.empty()) {
+ auto SU = Worklist.pop_back_val();
+ if (DoNotPipeline.count(SU))
+ continue;
+ LLVM_DEBUG(dbgs() << "Do not pipeline SU(" << SU->NodeNum << ")\n");
+ DoNotPipeline.insert(SU);
+ for (auto &Dep : SU->Preds)
+ Worklist.push_back(Dep.getSUnit());
+ if (SU->getInstr()->isPHI())
+ for (auto &Dep : SU->Succs)
+ if (Dep.getKind() == SDep::Anti)
+ Worklist.push_back(Dep.getSUnit());
+ }
+ return DoNotPipeline;
+}
+
+// Determine all instructions upon which any unpipelineable instruction depends
+// and ensure that they are in stage 0. If unable to do so, return false.
+bool SMSchedule::normalizeNonPipelinedInstructions(
+ SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI) {
+ SmallSet<SUnit *, 8> DNP = computeUnpipelineableNodes(SSD, PLI);
+
+ int NewLastCycle = INT_MIN;
+ for (SUnit &SU : SSD->SUnits) {
+ if (!SU.isInstr())
+ continue;
+ if (!DNP.contains(&SU) || stageScheduled(&SU) == 0) {
+ NewLastCycle = std::max(NewLastCycle, InstrToCycle[&SU]);
+ continue;
+ }
+
+ // Put the non-pipelined instruction as early as possible in the schedule
+ int NewCycle = getFirstCycle();
+ for (auto &Dep : SU.Preds)
+ NewCycle = std::max(InstrToCycle[Dep.getSUnit()], NewCycle);
+
+ int OldCycle = InstrToCycle[&SU];
+ if (OldCycle != NewCycle) {
+ InstrToCycle[&SU] = NewCycle;
+ auto &OldS = getInstructions(OldCycle);
+ llvm::erase_value(OldS, &SU);
+ getInstructions(NewCycle).emplace_back(&SU);
+ LLVM_DEBUG(dbgs() << "SU(" << SU.NodeNum
+ << ") is not pipelined; moving from cycle " << OldCycle
+ << " to " << NewCycle << " Instr:" << *SU.getInstr());
+ }
+ NewLastCycle = std::max(NewLastCycle, NewCycle);
+ }
+ LastCycle = NewLastCycle;
+ return true;
+}
+
// Check if the generated schedule is valid. This function checks if
// an instruction that uses a physical register is scheduled in a
// different stage than the definition. The pipeliner does not handle
// physical register values that may cross a basic block boundary.
+// Furthermore, if a physical def/use pair is assigned to the same
+// cycle, orderDependence does not guarantee def/use ordering, so that
+// case should be considered invalid. (The test checks for both
+// earlier and same-cycle use to be more robust.)
bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) {
for (SUnit &SU : SSD->SUnits) {
if (!SU.hasPhysRegDefs)
continue;
int StageDef = stageScheduled(&SU);
+ int CycleDef = InstrToCycle[&SU];
assert(StageDef != -1 && "Instruction should have been scheduled.");
for (auto &SI : SU.Succs)
- if (SI.isAssignedRegDep())
- if (Register::isPhysicalRegister(SI.getReg()))
+ if (SI.isAssignedRegDep() && !SI.getSUnit()->isBoundaryNode())
+ if (Register::isPhysicalRegister(SI.getReg())) {
if (stageScheduled(SI.getSUnit()) != StageDef)
return false;
+ if (InstrToCycle[SI.getSUnit()] <= CycleDef)
+ return false;
+ }
}
return true;
}
@@ -2998,7 +3077,7 @@ bool ResourceManager::canReserveResources(const MCInstrDesc *MID) const {
if (!SCDesc->isValid()) {
LLVM_DEBUG({
dbgs() << "No valid Schedule Class Desc for schedClass!\n";
- dbgs() << "isPseduo:" << MID->isPseudo() << "\n";
+ dbgs() << "isPseudo:" << MID->isPseudo() << "\n";
});
return true;
}
@@ -3038,7 +3117,7 @@ void ResourceManager::reserveResources(const MCInstrDesc *MID) {
if (!SCDesc->isValid()) {
LLVM_DEBUG({
dbgs() << "No valid Schedule Class Desc for schedClass!\n";
- dbgs() << "isPseduo:" << MID->isPseudo() << "\n";
+ dbgs() << "isPseudo:" << MID->isPseudo() << "\n";
});
return;
}
diff --git a/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index 1a4ad53ddf81..511bb80052c2 100644
--- a/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -12,7 +12,6 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/ADT/iterator_range.h"
-#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -651,3 +650,18 @@ bool MachineRegisterInfo::isReservedRegUnit(unsigned Unit) const {
}
return false;
}
+
+bool MachineRegisterInfo::isArgumentRegister(const MachineFunction &MF,
+ MCRegister Reg) const {
+ return getTargetRegisterInfo()->isArgumentRegister(MF, Reg);
+}
+
+bool MachineRegisterInfo::isFixedRegister(const MachineFunction &MF,
+ MCRegister Reg) const {
+ return getTargetRegisterInfo()->isFixedRegister(MF, Reg);
+}
+
+bool MachineRegisterInfo::isGeneralPurposeRegister(const MachineFunction &MF,
+ MCRegister Reg) const {
+ return getTargetRegisterInfo()->isGeneralPurposeRegister(MF, Reg);
+}
diff --git a/llvm/lib/CodeGen/MachineSSAContext.cpp b/llvm/lib/CodeGen/MachineSSAContext.cpp
index 8db893535daf..01cea85ecc7c 100644
--- a/llvm/lib/CodeGen/MachineSSAContext.cpp
+++ b/llvm/lib/CodeGen/MachineSSAContext.cpp
@@ -14,7 +14,9 @@
#include "llvm/CodeGen/MachineSSAContext.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index b043d4c1b0c1..4e00a211713e 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -32,7 +32,6 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachinePassRegistry.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/RegisterPressure.h"
#include "llvm/CodeGen/ScheduleDAG.h"
@@ -752,7 +751,7 @@ void ScheduleDAGMI::moveInstruction(
}
bool ScheduleDAGMI::checkSchedLimit() {
-#ifndef NDEBUG
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
CurrentTop = CurrentBottom;
return false;
@@ -920,12 +919,10 @@ void ScheduleDAGMI::placeDebugValues() {
MachineBasicBlock::iterator OrigPrevMI = P.second;
if (&*RegionBegin == DbgValue)
++RegionBegin;
- BB->splice(++OrigPrevMI, BB, DbgValue);
- if (OrigPrevMI == std::prev(RegionEnd))
+ BB->splice(std::next(OrigPrevMI), BB, DbgValue);
+ if (RegionEnd != BB->end() && OrigPrevMI == &*RegionEnd)
RegionEnd = DbgValue;
}
- DbgValues.clear();
- FirstDbgValue = nullptr;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -2008,7 +2005,7 @@ void SchedBoundary::reset() {
ReservedCycles.clear();
ReservedCyclesIndex.clear();
ResourceGroupSubUnitMasks.clear();
-#ifndef NDEBUG
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
// Track the maximum number of stall cycles that could arise either from the
// latency of a DAG edge or the number of cycles that a processor resource is
// reserved (SchedBoundary::ReservedCycles).
@@ -2196,7 +2193,7 @@ bool SchedBoundary::checkHazard(SUnit *SU) {
unsigned NRCycle, InstanceIdx;
std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(SC, ResIdx, Cycles);
if (NRCycle > CurrCycle) {
-#ifndef NDEBUG
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
MaxObservedStall = std::max(Cycles, MaxObservedStall);
#endif
LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
@@ -2263,7 +2260,7 @@ void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue,
unsigned Idx) {
assert(SU->getInstr() && "Scheduled SUnit must have instr");
-#ifndef NDEBUG
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
// ReadyCycle was been bumped up to the CurrCycle when this node was
// scheduled, but CurrCycle may have been eagerly advanced immediately after
// scheduling, so may now be greater than ReadyCycle.
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index 0dbbc218e946..006ba9273dfb 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -16,17 +16,20 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/SparseBitVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineCycleAnalysis.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -93,18 +96,18 @@ static cl::opt<unsigned> SinkLoadBlocksThreshold(
cl::init(20), cl::Hidden);
static cl::opt<bool>
-SinkInstsIntoLoop("sink-insts-to-avoid-spills",
- cl::desc("Sink instructions into loops to avoid "
- "register spills"),
- cl::init(false), cl::Hidden);
-
-static cl::opt<unsigned> SinkIntoLoopLimit(
- "machine-sink-loop-limit",
- cl::desc("The maximum number of instructions considered for loop sinking."),
+ SinkInstsIntoCycle("sink-insts-to-avoid-spills",
+ cl::desc("Sink instructions into cycles to avoid "
+ "register spills"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<unsigned> SinkIntoCycleLimit(
+ "machine-sink-cycle-limit",
+ cl::desc("The maximum number of instructions considered for cycle sinking."),
cl::init(50), cl::Hidden);
STATISTIC(NumSunk, "Number of machine instructions sunk");
-STATISTIC(NumLoopSunk, "Number of machine instructions sunk into a loop");
+STATISTIC(NumCycleSunk, "Number of machine instructions sunk into a cycle");
STATISTIC(NumSplit, "Number of critical edges split");
STATISTIC(NumCoalesces, "Number of copies coalesced");
STATISTIC(NumPostRACopySink, "Number of copies sunk after RA");
@@ -117,7 +120,7 @@ namespace {
MachineRegisterInfo *MRI; // Machine register information
MachineDominatorTree *DT; // Machine dominator tree
MachinePostDominatorTree *PDT; // Machine post dominator tree
- MachineLoopInfo *LI;
+ MachineCycleInfo *CI;
MachineBlockFrequencyInfo *MBFI;
const MachineBranchProbabilityInfo *MBPI;
AliasAnalysis *AA;
@@ -178,8 +181,9 @@ namespace {
AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<MachineDominatorTree>();
AU.addRequired<MachinePostDominatorTree>();
- AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineCycleInfoWrapperPass>();
AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addPreserved<MachineCycleInfoWrapperPass>();
AU.addPreserved<MachineLoopInfo>();
if (UseBlockFreqInfo)
AU.addRequired<MachineBlockFrequencyInfo>();
@@ -230,9 +234,9 @@ namespace {
MachineBasicBlock *FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
bool &BreakPHIEdge, AllSuccsCache &AllSuccessors);
- void FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB,
- SmallVectorImpl<MachineInstr *> &Candidates);
- bool SinkIntoLoop(MachineLoop *L, MachineInstr &I);
+ void FindCycleSinkCandidates(MachineCycle *Cycle, MachineBasicBlock *BB,
+ SmallVectorImpl<MachineInstr *> &Candidates);
+ bool SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I);
bool isProfitableToSinkTo(Register Reg, MachineInstr &MI,
MachineBasicBlock *MBB,
@@ -259,7 +263,7 @@ INITIALIZE_PASS_BEGIN(MachineSinking, DEBUG_TYPE,
"Machine code sinking", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(MachineSinking, DEBUG_TYPE,
"Machine code sinking", false, false)
@@ -376,26 +380,27 @@ static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) {
return false;
}
-void MachineSinking::FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB,
+void MachineSinking::FindCycleSinkCandidates(
+ MachineCycle *Cycle, MachineBasicBlock *BB,
SmallVectorImpl<MachineInstr *> &Candidates) {
for (auto &MI : *BB) {
- LLVM_DEBUG(dbgs() << "LoopSink: Analysing candidate: " << MI);
+ LLVM_DEBUG(dbgs() << "CycleSink: Analysing candidate: " << MI);
if (!TII->shouldSink(MI)) {
- LLVM_DEBUG(dbgs() << "LoopSink: Instruction not a candidate for this "
+ LLVM_DEBUG(dbgs() << "CycleSink: Instruction not a candidate for this "
"target\n");
continue;
}
- if (!L->isLoopInvariant(MI)) {
- LLVM_DEBUG(dbgs() << "LoopSink: Instruction is not loop invariant\n");
+ if (!isCycleInvariant(Cycle, MI)) {
+ LLVM_DEBUG(dbgs() << "CycleSink: Instruction is not cycle invariant\n");
continue;
}
bool DontMoveAcrossStore = true;
if (!MI.isSafeToMove(AA, DontMoveAcrossStore)) {
- LLVM_DEBUG(dbgs() << "LoopSink: Instruction not safe to move.\n");
+ LLVM_DEBUG(dbgs() << "CycleSink: Instruction not safe to move.\n");
continue;
}
if (MI.mayLoad() && !mayLoadFromGOTOrConstantPool(MI)) {
- LLVM_DEBUG(dbgs() << "LoopSink: Dont sink GOT or constant pool loads\n");
+ LLVM_DEBUG(dbgs() << "CycleSink: Dont sink GOT or constant pool loads\n");
continue;
}
if (MI.isConvergent())
@@ -407,7 +412,7 @@ void MachineSinking::FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *B
if (!MRI->hasOneDef(MO.getReg()))
continue;
- LLVM_DEBUG(dbgs() << "LoopSink: Instruction added as candidate.\n");
+ LLVM_DEBUG(dbgs() << "CycleSink: Instruction added as candidate.\n");
Candidates.push_back(&MI);
}
}
@@ -423,7 +428,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
DT = &getAnalysis<MachineDominatorTree>();
PDT = &getAnalysis<MachinePostDominatorTree>();
- LI = &getAnalysis<MachineLoopInfo>();
+ CI = &getAnalysis<MachineCycleInfoWrapperPass>().getCycleInfo();
MBFI = UseBlockFreqInfo ? &getAnalysis<MachineBlockFrequencyInfo>() : nullptr;
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
@@ -461,32 +466,33 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
EverMadeChange = true;
}
- if (SinkInstsIntoLoop) {
- SmallVector<MachineLoop *, 8> Loops(LI->begin(), LI->end());
- for (auto *L : Loops) {
- MachineBasicBlock *Preheader = LI->findLoopPreheader(L);
+ if (SinkInstsIntoCycle) {
+ SmallVector<MachineCycle *, 8> Cycles(CI->toplevel_begin(),
+ CI->toplevel_end());
+ for (auto *Cycle : Cycles) {
+ MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
if (!Preheader) {
- LLVM_DEBUG(dbgs() << "LoopSink: Can't find preheader\n");
+ LLVM_DEBUG(dbgs() << "CycleSink: Can't find preheader\n");
continue;
}
SmallVector<MachineInstr *, 8> Candidates;
- FindLoopSinkCandidates(L, Preheader, Candidates);
+ FindCycleSinkCandidates(Cycle, Preheader, Candidates);
// Walk the candidates in reverse order so that we start with the use
// of a def-use chain, if there is any.
// TODO: Sort the candidates using a cost-model.
unsigned i = 0;
for (MachineInstr *I : llvm::reverse(Candidates)) {
- if (i++ == SinkIntoLoopLimit) {
- LLVM_DEBUG(dbgs() << "LoopSink: Limit reached of instructions to "
+ if (i++ == SinkIntoCycleLimit) {
+ LLVM_DEBUG(dbgs() << "CycleSink: Limit reached of instructions to "
"be analysed.");
break;
}
- if (!SinkIntoLoop(L, *I))
+ if (!SinkIntoCycle(Cycle, *I))
break;
EverMadeChange = true;
- ++NumLoopSunk;
+ ++NumCycleSunk;
}
}
}
@@ -508,12 +514,12 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
// Don't bother sinking code out of unreachable blocks. In addition to being
// unprofitable, it can also lead to infinite looping, because in an
- // unreachable loop there may be nowhere to stop.
+ // unreachable cycle there may be nowhere to stop.
if (!DT->isReachableFromEntry(&MBB)) return false;
bool MadeChange = false;
- // Cache all successors, sorted by frequency info and loop depth.
+ // Cache all successors, sorted by frequency info and cycle depth.
AllSuccsCache AllSuccessors;
// Walk the basic block bottom-up. Remember if we saw a store.
@@ -632,13 +638,16 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB))
return false;
- // Avoid breaking back edge. From == To means backedge for single BB loop.
+ // Avoid breaking back edge. From == To means backedge for single BB cycle.
if (!SplitEdges || FromBB == ToBB)
return false;
- // Check for backedges of more "complex" loops.
- if (LI->getLoopFor(FromBB) == LI->getLoopFor(ToBB) &&
- LI->isLoopHeader(ToBB))
+ MachineCycle *FromCycle = CI->getCycle(FromBB);
+ MachineCycle *ToCycle = CI->getCycle(ToBB);
+
+ // Check for backedges of more "complex" cycles.
+ if (FromCycle == ToCycle && FromCycle &&
+ (!FromCycle->isReducible() || FromCycle->getHeader() == ToBB))
return false;
// It's not always legal to break critical edges and sink the computation
@@ -741,9 +750,9 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
if (!PDT->dominates(SuccToSinkTo, MBB))
return true;
- // It is profitable to sink an instruction from a deeper loop to a shallower
- // loop, even if the latter post-dominates the former (PR21115).
- if (LI->getLoopDepth(MBB) > LI->getLoopDepth(SuccToSinkTo))
+ // It is profitable to sink an instruction from a deeper cycle to a shallower
+ // cycle, even if the latter post-dominates the former (PR21115).
+ if (CI->getCycleDepth(MBB) > CI->getCycleDepth(SuccToSinkTo))
return true;
// Check if only use in post dominated block is PHI instruction.
@@ -764,11 +773,11 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge, AllSuccessors))
return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2, AllSuccessors);
- MachineLoop *ML = LI->getLoopFor(MBB);
+ MachineCycle *MCycle = CI->getCycle(MBB);
- // If the instruction is not inside a loop, it is not profitable to sink MI to
+ // If the instruction is not inside a cycle, it is not profitable to sink MI to
// a post dominate block SuccToSinkTo.
- if (!ML)
+ if (!MCycle)
return false;
auto isRegisterPressureSetExceedLimit = [&](const TargetRegisterClass *RC) {
@@ -786,7 +795,7 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
return false;
};
- // If this instruction is inside a loop and sinking this instruction can make
+ // If this instruction is inside a Cycle and sinking this instruction can make
// more registers live range shorten, it is still prifitable.
for (const MachineOperand &MO : MI.operands()) {
// Ignore non-register operands.
@@ -814,14 +823,17 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
return false;
} else {
MachineInstr *DefMI = MRI->getVRegDef(Reg);
- // DefMI is defined outside of loop. There should be no live range
- // impact for this operand. Defination outside of loop means:
- // 1: defination is outside of loop.
- // 2: defination is in this loop, but it is a PHI in the loop header.
- if (LI->getLoopFor(DefMI->getParent()) != ML ||
- (DefMI->isPHI() && LI->isLoopHeader(DefMI->getParent())))
+ if (!DefMI)
+ continue;
+ MachineCycle *Cycle = CI->getCycle(DefMI->getParent());
+ // DefMI is defined outside of cycle. There should be no live range
+ // impact for this operand. Defination outside of cycle means:
+ // 1: defination is outside of cycle.
+ // 2: defination is in this cycle, but it is a PHI in the cycle header.
+ if (Cycle != MCycle || (DefMI->isPHI() && Cycle && Cycle->isReducible() &&
+ Cycle->getHeader() == DefMI->getParent()))
continue;
- // The DefMI is defined inside the loop.
+ // The DefMI is defined inside the cycle.
// If sinking this operand makes some register pressure set exceed limit,
// it is not profitable.
if (isRegisterPressureSetExceedLimit(MRI->getRegClass(Reg))) {
@@ -831,8 +843,8 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
}
}
- // If MI is in loop and all its operands are alive across the whole loop or if
- // no operand sinking make register pressure set exceed limit, it is
+ // If MI is in cycle and all its operands are alive across the whole cycle or
+ // if no operand sinking make register pressure set exceed limit, it is
// profitable to sink MI.
return true;
}
@@ -864,14 +876,14 @@ MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
AllSuccs.push_back(DTChild->getBlock());
}
- // Sort Successors according to their loop depth or block frequency info.
+ // Sort Successors according to their cycle depth or block frequency info.
llvm::stable_sort(
AllSuccs, [this](const MachineBasicBlock *L, const MachineBasicBlock *R) {
uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0;
uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0;
bool HasBlockFreq = LHSFreq != 0 && RHSFreq != 0;
return HasBlockFreq ? LHSFreq < RHSFreq
- : LI->getLoopDepth(L) < LI->getLoopDepth(R);
+ : CI->getCycleDepth(L) < CI->getCycleDepth(R);
});
auto it = AllSuccessors.insert(std::make_pair(MBB, AllSuccs));
@@ -886,7 +898,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
AllSuccsCache &AllSuccessors) {
assert (MBB && "Invalid MachineBasicBlock!");
- // Loop over all the operands of the specified instruction. If there is
+ // loop over all the operands of the specified instruction. If there is
// anything we can't handle, bail out.
// SuccToSinkTo - This is the successor to sink this instruction to, once we
@@ -933,7 +945,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
// Otherwise, we should look at all the successors and decide which one
// we should sink to. If we have reliable block frequency information
// (frequency != 0) available, give successors with smaller frequencies
- // higher priority, otherwise prioritize smaller loop depths.
+ // higher priority, otherwise prioritize smaller cycle depths.
for (MachineBasicBlock *SuccBlock :
GetAllSortedSuccessors(MI, MBB, AllSuccessors)) {
bool LocalUse = false;
@@ -956,7 +968,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
}
// It is not possible to sink an instruction into its own block. This can
- // happen with loops.
+ // happen with cycles.
if (MBB == SuccToSinkTo)
return nullptr;
@@ -1081,8 +1093,7 @@ using MIRegs = std::pair<MachineInstr *, SmallVector<unsigned, 2>>;
/// Sink an instruction and its associated debug instructions.
static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo,
MachineBasicBlock::iterator InsertPos,
- SmallVectorImpl<MIRegs> &DbgValuesToSink) {
-
+ ArrayRef<MIRegs> DbgValuesToSink) {
// If we cannot find a location to use (merge with), then we erase the debug
// location to prevent debug-info driven tools from potentially reporting
// wrong location information.
@@ -1101,7 +1112,7 @@ static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo,
// DBG_VALUE location as 'undef', indicating that any earlier variable
// location should be terminated as we've optimised away the value at this
// point.
- for (auto DbgValueToSink : DbgValuesToSink) {
+ for (const auto &DbgValueToSink : DbgValuesToSink) {
MachineInstr *DbgMI = DbgValueToSink.first;
MachineInstr *NewDbgMI = DbgMI->getMF()->CloneMachineInstr(DbgMI);
SuccToSinkTo.insert(InsertPos, NewDbgMI);
@@ -1166,7 +1177,7 @@ bool MachineSinking::hasStoreBetween(MachineBasicBlock *From,
// If this BB is too big or the block number in straight line between From
// and To is too big, stop searching to save compiling time.
- if (BB->size() > SinkLoadInstsPerBlockThreshold ||
+ if (BB->sizeWithoutDebugLargerThan(SinkLoadInstsPerBlockThreshold) ||
HandledDomBlocks.size() > SinkLoadBlocksThreshold) {
for (auto *DomBB : HandledDomBlocks) {
if (DomBB != BB && DT->dominates(DomBB, BB))
@@ -1211,69 +1222,78 @@ bool MachineSinking::hasStoreBetween(MachineBasicBlock *From,
return HasAliasedStore;
}
-/// Sink instructions into loops if profitable. This especially tries to prevent
-/// register spills caused by register pressure if there is little to no
-/// overhead moving instructions into loops.
-bool MachineSinking::SinkIntoLoop(MachineLoop *L, MachineInstr &I) {
- LLVM_DEBUG(dbgs() << "LoopSink: Finding sink block for: " << I);
- MachineBasicBlock *Preheader = L->getLoopPreheader();
- assert(Preheader && "Loop sink needs a preheader block");
+/// Sink instructions into cycles if profitable. This especially tries to
+/// prevent register spills caused by register pressure if there is little to no
+/// overhead moving instructions into cycles.
+bool MachineSinking::SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I) {
+ LLVM_DEBUG(dbgs() << "CycleSink: Finding sink block for: " << I);
+ MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
+ assert(Preheader && "Cycle sink needs a preheader block");
MachineBasicBlock *SinkBlock = nullptr;
bool CanSink = true;
const MachineOperand &MO = I.getOperand(0);
for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) {
- LLVM_DEBUG(dbgs() << "LoopSink: Analysing use: " << MI);
- if (!L->contains(&MI)) {
- LLVM_DEBUG(dbgs() << "LoopSink: Use not in loop, can't sink.\n");
+ LLVM_DEBUG(dbgs() << "CycleSink: Analysing use: " << MI);
+ if (!Cycle->contains(MI.getParent())) {
+ LLVM_DEBUG(dbgs() << "CycleSink: Use not in cycle, can't sink.\n");
CanSink = false;
break;
}
// FIXME: Come up with a proper cost model that estimates whether sinking
- // the instruction (and thus possibly executing it on every loop
+ // the instruction (and thus possibly executing it on every cycle
// iteration) is more expensive than a register.
// For now assumes that copies are cheap and thus almost always worth it.
if (!MI.isCopy()) {
- LLVM_DEBUG(dbgs() << "LoopSink: Use is not a copy\n");
+ LLVM_DEBUG(dbgs() << "CycleSink: Use is not a copy\n");
CanSink = false;
break;
}
if (!SinkBlock) {
SinkBlock = MI.getParent();
- LLVM_DEBUG(dbgs() << "LoopSink: Setting sink block to: "
+ LLVM_DEBUG(dbgs() << "CycleSink: Setting sink block to: "
<< printMBBReference(*SinkBlock) << "\n");
continue;
}
SinkBlock = DT->findNearestCommonDominator(SinkBlock, MI.getParent());
if (!SinkBlock) {
- LLVM_DEBUG(dbgs() << "LoopSink: Can't find nearest dominator\n");
+ LLVM_DEBUG(dbgs() << "CycleSink: Can't find nearest dominator\n");
CanSink = false;
break;
}
- LLVM_DEBUG(dbgs() << "LoopSink: Setting nearest common dom block: " <<
+ LLVM_DEBUG(dbgs() << "CycleSink: Setting nearest common dom block: " <<
printMBBReference(*SinkBlock) << "\n");
}
if (!CanSink) {
- LLVM_DEBUG(dbgs() << "LoopSink: Can't sink instruction.\n");
+ LLVM_DEBUG(dbgs() << "CycleSink: Can't sink instruction.\n");
return false;
}
if (!SinkBlock) {
- LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, can't find sink block.\n");
+ LLVM_DEBUG(dbgs() << "CycleSink: Not sinking, can't find sink block.\n");
return false;
}
if (SinkBlock == Preheader) {
- LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, sink block is the preheader\n");
+ LLVM_DEBUG(
+ dbgs() << "CycleSink: Not sinking, sink block is the preheader\n");
return false;
}
- if (SinkBlock->size() > SinkLoadInstsPerBlockThreshold) {
- LLVM_DEBUG(dbgs() << "LoopSink: Not Sinking, block too large to analyse.\n");
+ if (SinkBlock->sizeWithoutDebugLargerThan(SinkLoadInstsPerBlockThreshold)) {
+ LLVM_DEBUG(
+ dbgs() << "CycleSink: Not Sinking, block too large to analyse.\n");
return false;
}
- LLVM_DEBUG(dbgs() << "LoopSink: Sinking instruction!\n");
- SinkBlock->splice(SinkBlock->getFirstNonPHI(), Preheader, I);
+ LLVM_DEBUG(dbgs() << "CycleSink: Sinking instruction!\n");
+ SinkBlock->splice(SinkBlock->SkipPHIsAndLabels(SinkBlock->begin()), Preheader,
+ I);
+
+ // Conservatively clear any kill flags on uses of sunk instruction
+ for (MachineOperand &MO : I.operands()) {
+ if (MO.isReg() && MO.readsReg())
+ RegsToClearKillFlags.insert(MO.getReg());
+ }
// The instruction is moved from its basic block, so do not retain the
// debug information.
@@ -1282,6 +1302,45 @@ bool MachineSinking::SinkIntoLoop(MachineLoop *L, MachineInstr &I) {
return true;
}
+/// Return true if a target defined block prologue instruction interferes
+/// with a sink candidate.
+static bool blockPrologueInterferes(MachineBasicBlock *BB,
+ MachineBasicBlock::iterator End,
+ MachineInstr &MI,
+ const TargetRegisterInfo *TRI,
+ const TargetInstrInfo *TII,
+ const MachineRegisterInfo *MRI) {
+ if (BB->begin() == End)
+ return false; // no prologue
+ for (MachineBasicBlock::iterator PI = BB->getFirstNonPHI(); PI != End; ++PI) {
+ // Only check target defined prologue instructions
+ if (!TII->isBasicBlockPrologue(*PI))
+ continue;
+ for (auto &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isUse()) {
+ if (Register::isPhysicalRegister(Reg) &&
+ (TII->isIgnorableUse(MO) || (MRI && MRI->isConstantPhysReg(Reg))))
+ continue;
+ if (PI->modifiesRegister(Reg, TRI))
+ return true;
+ } else {
+ if (PI->readsRegister(Reg, TRI))
+ return true;
+ // Check for interference with non-dead defs
+ auto *DefOp = PI->findRegisterDefOperand(Reg, false, true, TRI);
+ if (DefOp && !DefOp->isDead())
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
/// SinkInstruction - Determine whether it is safe to sink the specified machine
/// instruction out of its current block into a successor.
bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
@@ -1356,9 +1415,11 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
TryBreak = true;
}
- // Don't sink instructions into a loop.
- if (!TryBreak && LI->isLoopHeader(SuccToSinkTo)) {
- LLVM_DEBUG(dbgs() << " *** NOTE: Loop header found\n");
+ // Don't sink instructions into a cycle.
+ if (!TryBreak && CI->getCycle(SuccToSinkTo) &&
+ (!CI->getCycle(SuccToSinkTo)->isReducible() ||
+ CI->getCycle(SuccToSinkTo)->getHeader() == SuccToSinkTo)) {
+ LLVM_DEBUG(dbgs() << " *** NOTE: cycle header found\n");
TryBreak = true;
}
@@ -1393,9 +1454,12 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
}
// Determine where to insert into. Skip phi nodes.
- MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin();
- while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI())
- ++InsertPos;
+ MachineBasicBlock::iterator InsertPos =
+ SuccToSinkTo->SkipPHIsAndLabels(SuccToSinkTo->begin());
+ if (blockPrologueInterferes(SuccToSinkTo, InsertPos, MI, TRI, TII, MRI)) {
+ LLVM_DEBUG(dbgs() << " *** Not sinking: prologue interference\n");
+ return false;
+ }
// Collect debug users of any vreg that this inst defines.
SmallVector<MIRegs, 4> DbgUsersToSink;
@@ -1684,14 +1748,6 @@ static bool hasRegisterDependency(MachineInstr *MI,
return HasRegDependency;
}
-static SmallSet<MCRegister, 4> getRegUnits(MCRegister Reg,
- const TargetRegisterInfo *TRI) {
- SmallSet<MCRegister, 4> RegUnits;
- for (auto RI = MCRegUnitIterator(Reg, TRI); RI.isValid(); ++RI)
- RegUnits.insert(*RI);
- return RegUnits;
-}
-
bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
MachineFunction &MF,
const TargetRegisterInfo *TRI,
@@ -1737,14 +1793,15 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
}
// Record debug use of each reg unit.
- SmallSet<MCRegister, 4> RegUnits = getRegUnits(MO.getReg(), TRI);
- for (MCRegister Reg : RegUnits)
- MIUnits[Reg].push_back(MO.getReg());
+ for (auto RI = MCRegUnitIterator(MO.getReg(), TRI); RI.isValid();
+ ++RI)
+ MIUnits[*RI].push_back(MO.getReg());
}
}
if (IsValid) {
- for (auto RegOps : MIUnits)
- SeenDbgInstrs[RegOps.first].push_back({&MI, RegOps.second});
+ for (auto &RegOps : MIUnits)
+ SeenDbgInstrs[RegOps.first].emplace_back(&MI,
+ std::move(RegOps.second));
}
continue;
}
@@ -1791,22 +1848,29 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
if (!MO.isReg() || !MO.isDef())
continue;
- SmallSet<MCRegister, 4> Units = getRegUnits(MO.getReg(), TRI);
- for (MCRegister Reg : Units) {
- for (auto MIRegs : SeenDbgInstrs.lookup(Reg)) {
+ for (auto RI = MCRegUnitIterator(MO.getReg(), TRI); RI.isValid(); ++RI) {
+ for (const auto &MIRegs : SeenDbgInstrs.lookup(*RI)) {
auto &Regs = DbgValsToSinkMap[MIRegs.first];
for (unsigned Reg : MIRegs.second)
Regs.push_back(Reg);
}
}
}
- SmallVector<MIRegs, 4> DbgValsToSink(DbgValsToSinkMap.begin(),
- DbgValsToSinkMap.end());
+ auto DbgValsToSink = DbgValsToSinkMap.takeVector();
+
+ LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccBB);
+
+ MachineBasicBlock::iterator InsertPos =
+ SuccBB->SkipPHIsAndLabels(SuccBB->begin());
+ if (blockPrologueInterferes(SuccBB, InsertPos, MI, TRI, TII, nullptr)) {
+ LLVM_DEBUG(
+ dbgs() << " *** Not sinking: prologue interference\n");
+ continue;
+ }
// Clear the kill flag if SrcReg is killed between MI and the end of the
// block.
clearKillFlags(&MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI);
- MachineBasicBlock::iterator InsertPos = SuccBB->getFirstNonPHI();
performSink(MI, *SuccBB, InsertPos, DbgValsToSink);
updateLiveIn(&MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy);
diff --git a/llvm/lib/CodeGen/MachineStableHash.cpp b/llvm/lib/CodeGen/MachineStableHash.cpp
index 0803c2b8b85a..a85dbf1de1ee 100644
--- a/llvm/lib/CodeGen/MachineStableHash.cpp
+++ b/llvm/lib/CodeGen/MachineStableHash.cpp
@@ -12,29 +12,30 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineStableHash.h"
-#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Analysis/Loads.h"
-#include "llvm/Analysis/MemoryLocation.h"
-#include "llvm/CodeGen/MIRFormatter.h"
-#include "llvm/CodeGen/MIRPrinter.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/ADT/ilist_iterator.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineInstrBundleIterator.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/StableHashing.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/IRPrintingPasses.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/ModuleSlotTracker.h"
-#include "llvm/MC/MCDwarf.h"
-#include "llvm/Target/TargetIntrinsicInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Alignment.h"
+#include "llvm/Support/ErrorHandling.h"
#define DEBUG_TYPE "machine-stable-hash"
@@ -64,7 +65,10 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
case MachineOperand::MO_Register:
if (Register::isVirtualRegister(MO.getReg())) {
const MachineRegisterInfo &MRI = MO.getParent()->getMF()->getRegInfo();
- return MRI.getVRegDef(MO.getReg())->getOpcode();
+ SmallVector<unsigned> DefOpcodes;
+ for (auto &Def : MRI.def_instructions(MO.getReg()))
+ DefOpcodes.push_back(Def.getOpcode());
+ return hash_combine_range(DefOpcodes.begin(), DefOpcodes.end());
}
// Register operands don't have target flags.
@@ -192,3 +196,21 @@ stable_hash llvm::stableHashValue(const MachineInstr &MI, bool HashVRegs,
return stable_hash_combine_range(HashComponents.begin(),
HashComponents.end());
}
+
+stable_hash llvm::stableHashValue(const MachineBasicBlock &MBB) {
+ SmallVector<stable_hash> HashComponents;
+ // TODO: Hash more stuff like block alignment and branch probabilities.
+ for (auto &MI : MBB)
+ HashComponents.push_back(stableHashValue(MI));
+ return stable_hash_combine_range(HashComponents.begin(),
+ HashComponents.end());
+}
+
+stable_hash llvm::stableHashValue(const MachineFunction &MF) {
+ SmallVector<stable_hash> HashComponents;
+ // TODO: Hash lots more stuff like function alignment and stack objects.
+ for (auto &MBB : MF)
+ HashComponents.push_back(stableHashValue(MBB));
+ return stable_hash_combine_range(HashComponents.begin(),
+ HashComponents.end());
+}
diff --git a/llvm/lib/CodeGen/MachineStripDebug.cpp b/llvm/lib/CodeGen/MachineStripDebug.cpp
index 86cf4999d4b0..6128248a028e 100644
--- a/llvm/lib/CodeGen/MachineStripDebug.cpp
+++ b/llvm/lib/CodeGen/MachineStripDebug.cpp
@@ -10,10 +10,10 @@
/// tests can be debugified without affecting the output MIR.
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/DebugInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/Debugify.h"
diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
index c9d3e473062b..db04f2bcc095 100644
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -32,10 +32,10 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/EHPersonalities.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/CodeGen/LiveIntervalCalc.h"
#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveRangeCalc.h"
#include "llvm/CodeGen/LiveStacks.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -48,6 +48,8 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/RegisterBank.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -55,12 +57,14 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCTargetOptions.h"
@@ -95,6 +99,7 @@ namespace {
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
const MachineRegisterInfo *MRI;
+ const RegisterBankInfo *RBI;
unsigned foundErrors;
@@ -370,6 +375,7 @@ unsigned MachineVerifier::verify(const MachineFunction &MF) {
TM = &MF.getTarget();
TII = MF.getSubtarget().getInstrInfo();
TRI = MF.getSubtarget().getRegisterInfo();
+ RBI = MF.getSubtarget().getRegBankInfo();
MRI = &MF.getRegInfo();
const bool isFunctionFailedISel = MF.getProperties().hasProperty(
@@ -442,7 +448,7 @@ unsigned MachineVerifier::verify(const MachineFunction &MF) {
for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
const MachineOperand &Op = MI.getOperand(I);
if (Op.getParent() != &MI) {
- // Make sure to use correct addOperand / RemoveOperand / ChangeTo
+ // Make sure to use correct addOperand / removeOperand / ChangeTo
// functions when replacing operands of a MachineInstr.
report("Instruction has operand with wrong parent set", &MI);
}
@@ -1000,17 +1006,23 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
break;
}
- if (MRI->getRegBankOrNull(Src) != MRI->getRegBankOrNull(Dst)) {
- report(
- Twine(OpcName, " source and destination register banks must match"),
- MI);
+ const RegisterBank *SrcRB = RBI->getRegBank(Src, *MRI, *TRI);
+ const RegisterBank *DstRB = RBI->getRegBank(Dst, *MRI, *TRI);
+
+ // Allow only the source bank to be set.
+ if ((SrcRB && DstRB && SrcRB != DstRB) || (DstRB && !SrcRB)) {
+ report(Twine(OpcName, " cannot change register bank"), MI);
break;
}
- if (MRI->getRegClassOrNull(Src) != MRI->getRegClassOrNull(Dst))
+ // Don't allow a class change. Do allow member class->regbank.
+ const TargetRegisterClass *DstRC = MRI->getRegClassOrNull(Dst);
+ if (DstRC && DstRC != MRI->getRegClassOrNull(Src)) {
report(
Twine(OpcName, " source and destination register classes must match"),
MI);
+ break;
+ }
break;
}
@@ -1072,6 +1084,18 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
if (ValTy.getSizeInBytes() < MMO.getSize())
report("store memory size cannot exceed value size", MI);
}
+
+ const AtomicOrdering Order = MMO.getSuccessOrdering();
+ if (Opc == TargetOpcode::G_STORE) {
+ if (Order == AtomicOrdering::Acquire ||
+ Order == AtomicOrdering::AcquireRelease)
+ report("atomic store cannot use acquire ordering", MI);
+
+ } else {
+ if (Order == AtomicOrdering::Release ||
+ Order == AtomicOrdering::AcquireRelease)
+ report("atomic load cannot use release ordering", MI);
+ }
}
break;
@@ -1628,6 +1652,43 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
verifyAllRegOpsScalar(*MI, *MRI);
break;
}
+ case TargetOpcode::G_IS_FPCLASS: {
+ LLT DestTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT DestEltTy = DestTy.getScalarType();
+ if (!DestEltTy.isScalar()) {
+ report("Destination must be a scalar or vector of scalars", MI);
+ break;
+ }
+ LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
+ LLT SrcEltTy = SrcTy.getScalarType();
+ if (!SrcEltTy.isScalar()) {
+ report("Source must be a scalar or vector of scalars", MI);
+ break;
+ }
+ if (!verifyVectorElementMatch(DestTy, SrcTy, MI))
+ break;
+ const MachineOperand &TestMO = MI->getOperand(2);
+ if (!TestMO.isImm()) {
+ report("floating-point class set (operand 2) must be an immediate", MI);
+ break;
+ }
+ int64_t Test = TestMO.getImm();
+ if (Test < 0 || Test > fcAllFlags) {
+ report("Incorrect floating-point class set (operand 2)", MI);
+ break;
+ }
+ const MachineOperand &SemanticsMO = MI->getOperand(3);
+ if (!SemanticsMO.isImm()) {
+ report("floating-point semantics (operand 3) must be an immediate", MI);
+ break;
+ }
+ int64_t Semantics = SemanticsMO.getImm();
+ if (Semantics < 0 || Semantics > APFloat::S_MaxSemantics) {
+ report("Incorrect floating-point semantics (operand 3)", MI);
+ break;
+ }
+ break;
+ }
default:
break;
}
@@ -1912,6 +1973,10 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
if (MRI->tracksLiveness() && !MI->isDebugInstr())
checkLiveness(MO, MONum);
+ if (MO->isDef() && MO->isUndef() && !MO->getSubReg() &&
+ MO->getReg().isVirtual()) // TODO: Apply to physregs too
+ report("Undef virtual register def operands require a subregister", MO, MONum);
+
// Verify the consistency of tied operands.
if (MO->isTied()) {
unsigned OtherIdx = MI->findTiedOperandIdx(MONum);
@@ -2148,6 +2213,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
}
break;
+ case MachineOperand::MO_CFIIndex:
+ if (MO->getCFIIndex() >= MF->getFrameInstructions().size())
+ report("CFI instruction has invalid index", MO, MONum);
+ break;
+
default:
break;
}
diff --git a/llvm/lib/CodeGen/MacroFusion.cpp b/llvm/lib/CodeGen/MacroFusion.cpp
index b0760322064c..fa5df68b8abc 100644
--- a/llvm/lib/CodeGen/MacroFusion.cpp
+++ b/llvm/lib/CodeGen/MacroFusion.cpp
@@ -12,11 +12,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MacroFusion.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/CodeGen/ScheduleDAGMutation.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/Support/CommandLine.h"
diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp
index f91a9d2c3a32..3245d9649be1 100644
--- a/llvm/lib/CodeGen/ModuloSchedule.cpp
+++ b/llvm/lib/CodeGen/ModuloSchedule.cpp
@@ -11,6 +11,7 @@
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/MCContext.h"
@@ -157,7 +158,7 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
SmallVector<MachineBasicBlock *, 4> EpilogBBs;
// Generate the epilog instructions to complete the pipeline.
- generateEpilog(MaxStageCount, KernelBB, VRMap, EpilogBBs, PrologBBs);
+ generateEpilog(MaxStageCount, KernelBB, BB, VRMap, EpilogBBs, PrologBBs);
// We need this step because the register allocation doesn't handle some
// situations well, so we insert copies to help out.
@@ -239,11 +240,9 @@ void ModuloScheduleExpander::generateProlog(unsigned LastStage,
/// Generate the pipeline epilog code. The epilog code finishes the iterations
/// that were started in either the prolog or the kernel. We create a basic
/// block for each stage that needs to complete.
-void ModuloScheduleExpander::generateEpilog(unsigned LastStage,
- MachineBasicBlock *KernelBB,
- ValueMapTy *VRMap,
- MBBVectorTy &EpilogBBs,
- MBBVectorTy &PrologBBs) {
+void ModuloScheduleExpander::generateEpilog(
+ unsigned LastStage, MachineBasicBlock *KernelBB, MachineBasicBlock *OrigBB,
+ ValueMapTy *VRMap, MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs) {
// We need to change the branch from the kernel to the first epilog block, so
// this call to analyze branch uses the kernel rather than the original BB.
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
@@ -313,7 +312,12 @@ void ModuloScheduleExpander::generateEpilog(unsigned LastStage,
// Create a branch to the new epilog from the kernel.
// Remove the original branch and add a new branch to the epilog.
TII->removeBranch(*KernelBB);
- TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
+ assert((OrigBB == TBB || OrigBB == FBB) &&
+ "Unable to determine looping branch direction");
+ if (OrigBB != TBB)
+ TII->insertBranch(*KernelBB, EpilogStart, KernelBB, Cond, DebugLoc());
+ else
+ TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
// Add a branch to the loop exit.
if (EpilogBBs.size() > 0) {
MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
@@ -813,8 +817,8 @@ static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) {
break;
for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)
if (MI.getOperand(i + 1).getMBB() == Incoming) {
- MI.RemoveOperand(i + 1);
- MI.RemoveOperand(i);
+ MI.removeOperand(i + 1);
+ MI.removeOperand(i);
break;
}
}
@@ -846,7 +850,7 @@ void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
Optional<bool> StaticallyGreater =
LoopInfo->createTripCountGreaterCondition(j + 1, *Prolog, Cond);
unsigned numAdded = 0;
- if (!StaticallyGreater.hasValue()) {
+ if (!StaticallyGreater) {
Prolog->addSuccessor(Epilog);
numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
} else if (*StaticallyGreater == false) {
@@ -999,7 +1003,7 @@ MachineInstr *ModuloScheduleExpander::cloneAndChangeInstr(
}
/// Update the machine instruction with new virtual registers. This
-/// function may change the defintions and/or uses.
+/// function may change the definitions and/or uses.
void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
bool LastDef,
unsigned CurStageNum,
@@ -1159,8 +1163,17 @@ void ModuloScheduleExpander::rewriteScheduledInstr(
if (!InProlog && !Phi->isPHI() && StagePhi < StageSched)
ReplaceReg = NewReg;
if (ReplaceReg) {
- MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
- UseOp.setReg(ReplaceReg);
+ const TargetRegisterClass *NRC =
+ MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
+ if (NRC)
+ UseOp.setReg(ReplaceReg);
+ else {
+ Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
+ BuildMI(*BB, UseMI, UseMI->getDebugLoc(), TII->get(TargetOpcode::COPY),
+ SplitReg)
+ .addReg(ReplaceReg);
+ UseOp.setReg(SplitReg);
+ }
}
}
}
@@ -1205,8 +1218,12 @@ void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI,
MI.eraseFromParent();
Changed = true;
} else if (!KeepSingleSrcPhi && MI.getNumExplicitOperands() == 3) {
- MRI.constrainRegClass(MI.getOperand(1).getReg(),
- MRI.getRegClass(MI.getOperand(0).getReg()));
+ const TargetRegisterClass *ConstrainRegClass =
+ MRI.constrainRegClass(MI.getOperand(1).getReg(),
+ MRI.getRegClass(MI.getOperand(0).getReg()));
+ assert(ConstrainRegClass &&
+ "Expected a valid constrained register class!");
+ (void)ConstrainRegClass;
MRI.replaceRegWith(MI.getOperand(0).getReg(),
MI.getOperand(1).getReg());
if (LIS)
@@ -1404,7 +1421,7 @@ Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) {
while (DefaultI != Defaults.rend())
LoopReg = phi(LoopReg, *DefaultI++, MRI.getRegClass(Reg));
- if (IllegalPhiDefault.hasValue()) {
+ if (IllegalPhiDefault) {
// The consumer optionally consumes LoopProducer in the same iteration
// (because the producer is scheduled at an earlier cycle than the consumer)
// or the initial value. To facilitate this we create an illegal block here
@@ -1414,7 +1431,7 @@ Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) {
Register R = MRI.createVirtualRegister(RC);
MachineInstr *IllegalPhi =
BuildMI(*BB, MI, DebugLoc(), TII->get(TargetOpcode::PHI), R)
- .addReg(IllegalPhiDefault.getValue())
+ .addReg(*IllegalPhiDefault)
.addMBB(PreheaderBB) // Block choice is arbitrary and has no effect.
.addReg(LoopReg)
.addMBB(BB); // Block choice is arbitrary and has no effect.
@@ -1430,7 +1447,7 @@ Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) {
Register KernelRewriter::phi(Register LoopReg, Optional<Register> InitReg,
const TargetRegisterClass *RC) {
// If the init register is not undef, try and find an existing phi.
- if (InitReg.hasValue()) {
+ if (InitReg) {
auto I = Phis.find({LoopReg, InitReg.getValue()});
if (I != Phis.end())
return I->second;
@@ -1446,7 +1463,7 @@ Register KernelRewriter::phi(Register LoopReg, Optional<Register> InitReg,
auto I = UndefPhis.find(LoopReg);
if (I != UndefPhis.end()) {
Register R = I->second;
- if (!InitReg.hasValue())
+ if (!InitReg)
// Found a phi taking undef as input, and this input is undef so return
// without any more changes.
return R;
@@ -1454,7 +1471,10 @@ Register KernelRewriter::phi(Register LoopReg, Optional<Register> InitReg,
MachineInstr *MI = MRI.getVRegDef(R);
MI->getOperand(1).setReg(InitReg.getValue());
Phis.insert({{LoopReg, InitReg.getValue()}, R});
- MRI.constrainRegClass(R, MRI.getRegClass(InitReg.getValue()));
+ const TargetRegisterClass *ConstrainRegClass =
+ MRI.constrainRegClass(R, MRI.getRegClass(InitReg.getValue()));
+ assert(ConstrainRegClass && "Expected a valid constrained register class!");
+ (void)ConstrainRegClass;
UndefPhis.erase(I);
return R;
}
@@ -1463,14 +1483,18 @@ Register KernelRewriter::phi(Register LoopReg, Optional<Register> InitReg,
if (!RC)
RC = MRI.getRegClass(LoopReg);
Register R = MRI.createVirtualRegister(RC);
- if (InitReg.hasValue())
- MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
+ if (InitReg) {
+ const TargetRegisterClass *ConstrainRegClass =
+ MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
+ assert(ConstrainRegClass && "Expected a valid constrained register class!");
+ (void)ConstrainRegClass;
+ }
BuildMI(*BB, BB->getFirstNonPHI(), DebugLoc(), TII->get(TargetOpcode::PHI), R)
- .addReg(InitReg.hasValue() ? *InitReg : undef(RC))
+ .addReg(InitReg ? *InitReg : undef(RC))
.addMBB(PreheaderBB)
.addReg(LoopReg)
.addMBB(BB);
- if (!InitReg.hasValue())
+ if (!InitReg)
UndefPhis[LoopReg] = R;
else
Phis[{LoopReg, *InitReg}] = R;
@@ -1793,10 +1817,10 @@ void PeelingModuloScheduleExpander::peelPrologAndEpilogs() {
// Iterate in reverse order over all instructions, remapping as we go.
for (MachineBasicBlock *B : reverse(Blocks)) {
- for (auto I = B->getFirstInstrTerminator()->getReverseIterator();
+ for (auto I = B->instr_rbegin();
I != std::next(B->getFirstNonPHI()->getReverseIterator());) {
- MachineInstr *MI = &*I++;
- rewriteUsesOf(MI);
+ MachineBasicBlock::reverse_instr_iterator MI = I++;
+ rewriteUsesOf(&*MI);
}
}
for (auto *MI : IllegalPhisToDelete) {
@@ -1919,7 +1943,7 @@ void PeelingModuloScheduleExpander::fixupBranches() {
TII->removeBranch(*Prolog);
Optional<bool> StaticallyGreater =
LoopInfo->createTripCountGreaterCondition(TC, *Prolog, Cond);
- if (!StaticallyGreater.hasValue()) {
+ if (!StaticallyGreater) {
LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC << "\n");
// Dynamically branch based on Cond.
TII->insertBranch(*Prolog, Epilog, Fallthrough, Cond, DebugLoc());
@@ -1929,8 +1953,8 @@ void PeelingModuloScheduleExpander::fixupBranches() {
// blocks. Leave it to unreachable-block-elim to clean up.
Prolog->removeSuccessor(Fallthrough);
for (MachineInstr &P : Fallthrough->phis()) {
- P.RemoveOperand(2);
- P.RemoveOperand(1);
+ P.removeOperand(2);
+ P.removeOperand(1);
}
TII->insertUnconditionalBranch(*Prolog, Epilog, DebugLoc());
KernelDisposed = true;
@@ -1939,8 +1963,8 @@ void PeelingModuloScheduleExpander::fixupBranches() {
// Prolog always falls through; remove incoming values in epilog.
Prolog->removeSuccessor(Epilog);
for (MachineInstr &P : Epilog->phis()) {
- P.RemoveOperand(4);
- P.RemoveOperand(3);
+ P.removeOperand(4);
+ P.removeOperand(3);
}
}
}
diff --git a/llvm/lib/CodeGen/NonRelocatableStringpool.cpp b/llvm/lib/CodeGen/NonRelocatableStringpool.cpp
index db5217469fba..7304bfef55cb 100644
--- a/llvm/lib/CodeGen/NonRelocatableStringpool.cpp
+++ b/llvm/lib/CodeGen/NonRelocatableStringpool.cpp
@@ -25,7 +25,7 @@ DwarfStringPoolEntryRef NonRelocatableStringpool::getEntry(StringRef S) {
Entry.Symbol = nullptr;
CurrentEndOffset += S.size() + 1;
}
- return DwarfStringPoolEntryRef(*I.first, true);
+ return DwarfStringPoolEntryRef(*I.first);
}
StringRef NonRelocatableStringpool::internString(StringRef S) {
@@ -44,7 +44,7 @@ NonRelocatableStringpool::getEntriesForEmission() const {
Result.reserve(Strings.size());
for (const auto &E : Strings)
if (E.getValue().isIndexed())
- Result.emplace_back(E, true);
+ Result.emplace_back(E);
llvm::sort(Result, [](const DwarfStringPoolEntryRef A,
const DwarfStringPoolEntryRef B) {
return A.getIndex() < B.getIndex();
diff --git a/llvm/lib/CodeGen/OptimizePHIs.cpp b/llvm/lib/CodeGen/OptimizePHIs.cpp
index 8a6cf47c0d89..d5d262e4047a 100644
--- a/llvm/lib/CodeGen/OptimizePHIs.cpp
+++ b/llvm/lib/CodeGen/OptimizePHIs.cpp
@@ -19,7 +19,6 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
diff --git a/llvm/lib/CodeGen/PHIElimination.cpp b/llvm/lib/CodeGen/PHIElimination.cpp
index 7693ab417de9..7709095cd683 100644
--- a/llvm/lib/CodeGen/PHIElimination.cpp
+++ b/llvm/lib/CodeGen/PHIElimination.cpp
@@ -31,9 +31,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Pass.h"
diff --git a/llvm/lib/CodeGen/ParallelCG.cpp b/llvm/lib/CodeGen/ParallelCG.cpp
index 3e32afaafa6e..43b23368ead2 100644
--- a/llvm/lib/CodeGen/ParallelCG.cpp
+++ b/llvm/lib/CodeGen/ParallelCG.cpp
@@ -16,8 +16,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
-#include "llvm/Support/ErrorOr.h"
-#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/MemoryBufferRef.h"
#include "llvm/Support/ThreadPool.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/SplitModule.h"
diff --git a/llvm/lib/CodeGen/PatchableFunction.cpp b/llvm/lib/CodeGen/PatchableFunction.cpp
index ca44b7a53982..0f9da0637ced 100644
--- a/llvm/lib/CodeGen/PatchableFunction.cpp
+++ b/llvm/lib/CodeGen/PatchableFunction.cpp
@@ -14,11 +14,11 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index f9b16d2630d6..31e37c4cd7e3 100644
--- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -90,7 +90,6 @@
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstdint>
@@ -214,8 +213,9 @@ namespace {
const SmallSet<Register, 2> &TargetReg,
RecurrenceCycle &RC);
- /// If copy instruction \p MI is a virtual register copy, track it in
- /// the set \p CopyMIs. If this virtual register was previously seen as a
+ /// If copy instruction \p MI is a virtual register copy or a copy of a
+ /// constant physical register to a virtual register, track it in the
+ /// set \p CopyMIs. If this virtual register was previously seen as a
/// copy, replace the uses of this copy with the previously seen copy's
/// destination register.
bool foldRedundantCopy(MachineInstr &MI,
@@ -810,7 +810,7 @@ protected:
unsigned CurrentSrcIdx = 0; ///< The index of the source being rewritten.
public:
Rewriter(MachineInstr &CopyLike) : CopyLike(CopyLike) {}
- virtual ~Rewriter() {}
+ virtual ~Rewriter() = default;
/// Get the next rewritable source (SrcReg, SrcSubReg) and
/// the related value that it affects (DstReg, DstSubReg).
@@ -1022,7 +1022,7 @@ public:
CurrentSrcIdx = -1;
// Rewrite the operation as a COPY.
// Get rid of the sub-register index.
- CopyLike.RemoveOperand(2);
+ CopyLike.removeOperand(2);
// Morph the operation into a COPY.
CopyLike.setDesc(TII.get(TargetOpcode::COPY));
return true;
@@ -1412,7 +1412,7 @@ bool PeepholeOptimizer::foldRedundantCopy(
Register SrcReg = MI.getOperand(1).getReg();
unsigned SrcSubReg = MI.getOperand(1).getSubReg();
- if (!SrcReg.isVirtual())
+ if (!SrcReg.isVirtual() && !MRI->isConstantPhysReg(SrcReg))
return false;
Register DstReg = MI.getOperand(0).getReg();
@@ -1643,8 +1643,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
// without any intervening re-definition of $physreg.
DenseMap<Register, MachineInstr *> NAPhysToVirtMIs;
- // Set of pairs of virtual registers and their subregs that are copied
- // from.
+ // Set of copies to virtual registers keyed by source register. Never
+ // holds any physreg which requires def tracking.
DenseMap<RegSubRegPair, MachineInstr *> CopySrcMIs;
bool IsLoopHeader = MLI->isLoopHeader(&MBB);
diff --git a/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp b/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp
index 82ed386db827..97b1532300b1 100644
--- a/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp
+++ b/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp
@@ -28,14 +28,11 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Pass.h"
using namespace llvm;
#define DEBUG_TYPE "post-RA-hazard-rec"
@@ -72,10 +69,11 @@ bool PostRAHazardRecognizer::runOnMachineFunction(MachineFunction &Fn) {
TII->CreateTargetPostRAHazardRecognizer(Fn));
// Return if the target has not implemented a hazard recognizer.
- if (!HazardRec.get())
+ if (!HazardRec)
return false;
// Loop over all of the basic blocks
+ bool Changed = false;
for (auto &MBB : Fn) {
// We do not call HazardRec->reset() here to make sure we are handling noop
// hazards at the start of basic blocks.
@@ -85,6 +83,8 @@ bool PostRAHazardRecognizer::runOnMachineFunction(MachineFunction &Fn) {
HazardRec->EmitNoops(NumPreNoops);
TII->insertNoops(MBB, MachineBasicBlock::iterator(MI), NumPreNoops);
NumNoops += NumPreNoops;
+ if (NumPreNoops)
+ Changed = true;
HazardRec->EmitInstruction(&MI);
if (HazardRec->atIssueLimit()) {
@@ -92,5 +92,5 @@ bool PostRAHazardRecognizer::runOnMachineFunction(MachineFunction &Fn) {
}
}
}
- return true;
+ return Changed;
}
diff --git a/llvm/lib/CodeGen/PostRASchedulerList.cpp b/llvm/lib/CodeGen/PostRASchedulerList.cpp
index aac46cb22084..98fc7e07a1b4 100644
--- a/llvm/lib/CodeGen/PostRASchedulerList.cpp
+++ b/llvm/lib/CodeGen/PostRASchedulerList.cpp
@@ -25,18 +25,16 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/ScheduleDAGMutation.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -72,7 +70,7 @@ DebugMod("postra-sched-debugmod",
cl::desc("Debug control MBBs that are scheduled"),
cl::init(0), cl::Hidden);
-AntiDepBreaker::~AntiDepBreaker() { }
+AntiDepBreaker::~AntiDepBreaker() = default;
namespace {
class PostRAScheduler : public MachineFunctionPass {
diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index 74b903f99284..1115c2a27956 100644
--- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -18,10 +18,8 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
-#include "llvm/IR/User.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
diff --git a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
index d232ca3a69c3..7327f9e52efc 100644
--- a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -11,10 +11,11 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -45,6 +46,11 @@ public:
void getAnalysisUsage(AnalysisUsage &au) const override;
bool runOnMachineFunction(MachineFunction &MF) override;
+
+ virtual MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::IsSSA);
+ }
};
} // end anonymous namespace
@@ -124,7 +130,7 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
// Using instr wasn't found, it could be in another block.
// Leave the physreg IMPLICIT_DEF, but trim any extra operands.
for (unsigned i = MI->getNumOperands() - 1; i; --i)
- MI->RemoveOperand(i);
+ MI->removeOperand(i);
LLVM_DEBUG(dbgs() << "Keeping physreg: " << *MI);
}
@@ -140,7 +146,6 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getSubtarget().getInstrInfo();
TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
- assert(MRI->isSSA() && "ProcessImplicitDefs only works on SSA form.");
assert(WorkList.empty() && "Inconsistent worklist state");
for (MachineBasicBlock &MBB : MF) {
diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index 8d8a6126dad0..1a0f296d5fdc 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -55,10 +55,8 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/CodeGen.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -130,6 +128,7 @@ private:
void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
int &SPAdj);
void insertPrologEpilogCode(MachineFunction &MF);
+ void insertZeroCallUsedRegs(MachineFunction &MF);
};
} // end anonymous namespace
@@ -284,6 +283,9 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
assert(!Failed && "Invalid warn-stack-size fn attr value");
(void)Failed;
}
+ if (MF.getFunction().hasFnAttribute(Attribute::SafeStack)) {
+ StackSize += MFI.getUnsafeStackSize();
+ }
if (StackSize > Threshold) {
DiagnosticInfoStackSize DiagStackSize(F, StackSize, Threshold, DS_Warning);
F.getContext().diagnose(DiagStackSize);
@@ -837,8 +839,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
// Adjust 'Offset' to point to the end of last fixed sized preallocated
// object.
for (int i = MFI.getObjectIndexBegin(); i != 0; ++i) {
- if (MFI.getStackID(i) !=
- TargetStackID::Default) // Only allocate objects on the default stack.
+ // Only allocate objects on the default stack.
+ if (MFI.getStackID(i) != TargetStackID::Default)
continue;
int64_t FixedOff;
@@ -855,47 +857,34 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
if (FixedOff > Offset) Offset = FixedOff;
}
+ Align MaxAlign = MFI.getMaxAlign();
// First assign frame offsets to stack objects that are used to spill
// callee saved registers.
- if (StackGrowsDown && MaxCSFrameIndex >= MinCSFrameIndex) {
- for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) {
- if (MFI.getStackID(i) !=
- TargetStackID::Default) // Only allocate objects on the default stack.
- continue;
+ if (MaxCSFrameIndex >= MinCSFrameIndex) {
+ for (unsigned i = 0; i <= MaxCSFrameIndex - MinCSFrameIndex; ++i) {
+ unsigned FrameIndex =
+ StackGrowsDown ? MinCSFrameIndex + i : MaxCSFrameIndex - i;
- // If the stack grows down, we need to add the size to find the lowest
- // address of the object.
- Offset += MFI.getObjectSize(i);
-
- // Adjust to alignment boundary
- Offset = alignTo(Offset, MFI.getObjectAlign(i), Skew);
-
- LLVM_DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << -Offset << "]\n");
- MFI.setObjectOffset(i, -Offset); // Set the computed offset
- }
- } else if (MaxCSFrameIndex >= MinCSFrameIndex) {
- // Be careful about underflow in comparisons agains MinCSFrameIndex.
- for (unsigned i = MaxCSFrameIndex; i != MinCSFrameIndex - 1; --i) {
- if (MFI.getStackID(i) !=
- TargetStackID::Default) // Only allocate objects on the default stack.
+ // Only allocate objects on the default stack.
+ if (MFI.getStackID(FrameIndex) != TargetStackID::Default)
continue;
- if (MFI.isDeadObjectIndex(i))
+ // TODO: should this just be if (MFI.isDeadObjectIndex(FrameIndex))
+ if (!StackGrowsDown && MFI.isDeadObjectIndex(FrameIndex))
continue;
- // Adjust to alignment boundary
- Offset = alignTo(Offset, MFI.getObjectAlign(i), Skew);
-
- LLVM_DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << Offset << "]\n");
- MFI.setObjectOffset(i, Offset);
- Offset += MFI.getObjectSize(i);
+ AdjustStackOffset(MFI, FrameIndex, StackGrowsDown, Offset, MaxAlign,
+ Skew);
}
}
+ assert(MaxAlign == MFI.getMaxAlign() &&
+ "MFI.getMaxAlign should already account for all callee-saved "
+ "registers without a fixed stack slot");
+
// FixedCSEnd is the stack offset to the end of the fixed and callee-save
// stack area.
int64_t FixedCSEnd = Offset;
- Align MaxAlign = MFI.getMaxAlign();
// Make sure the special register scavenging spill slot is closest to the
// incoming stack pointer if a frame pointer is required and is closer
@@ -982,8 +971,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
continue;
if (StackProtectorFI == (int)i || EHRegNodeFrameIndex == (int)i)
continue;
- if (MFI.getStackID(i) !=
- TargetStackID::Default) // Only allocate objects on the default stack.
+ // Only allocate objects on the default stack.
+ if (MFI.getStackID(i) != TargetStackID::Default)
continue;
switch (MFI.getObjectSSPLayout(i)) {
@@ -1036,8 +1025,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
continue;
if (ProtectedObjs.count(i))
continue;
- if (MFI.getStackID(i) !=
- TargetStackID::Default) // Only allocate objects on the default stack.
+ // Only allocate objects on the default stack.
+ if (MFI.getStackID(i) != TargetStackID::Default)
continue;
// Add the objects that we need to allocate to our working set.
@@ -1145,6 +1134,9 @@ void PEI::insertPrologEpilogCode(MachineFunction &MF) {
for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
TFI.emitEpilogue(MF, *RestoreBlock);
+ // Zero call used registers before restoring callee-saved registers.
+ insertZeroCallUsedRegs(MF);
+
for (MachineBasicBlock *SaveBlock : SaveBlocks)
TFI.inlineStackProbe(MF, *SaveBlock);
@@ -1155,11 +1147,7 @@ void PEI::insertPrologEpilogCode(MachineFunction &MF) {
if (MF.shouldSplitStack()) {
for (MachineBasicBlock *SaveBlock : SaveBlocks)
TFI.adjustForSegmentedStacks(MF, *SaveBlock);
- // Record that there are split-stack functions, so we will emit a
- // special section to tell the linker.
- MF.getMMI().setHasSplitStack(true);
- } else
- MF.getMMI().setHasNosplitStack(true);
+ }
// Emit additional code that is required to explicitly handle the stack in
// HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The
@@ -1171,6 +1159,120 @@ void PEI::insertPrologEpilogCode(MachineFunction &MF) {
TFI.adjustForHiPEPrologue(MF, *SaveBlock);
}
+/// insertZeroCallUsedRegs - Zero out call used registers.
+void PEI::insertZeroCallUsedRegs(MachineFunction &MF) {
+ const Function &F = MF.getFunction();
+
+ if (!F.hasFnAttribute("zero-call-used-regs"))
+ return;
+
+ using namespace ZeroCallUsedRegs;
+
+ ZeroCallUsedRegsKind ZeroRegsKind =
+ StringSwitch<ZeroCallUsedRegsKind>(
+ F.getFnAttribute("zero-call-used-regs").getValueAsString())
+ .Case("skip", ZeroCallUsedRegsKind::Skip)
+ .Case("used-gpr-arg", ZeroCallUsedRegsKind::UsedGPRArg)
+ .Case("used-gpr", ZeroCallUsedRegsKind::UsedGPR)
+ .Case("used-arg", ZeroCallUsedRegsKind::UsedArg)
+ .Case("used", ZeroCallUsedRegsKind::Used)
+ .Case("all-gpr-arg", ZeroCallUsedRegsKind::AllGPRArg)
+ .Case("all-gpr", ZeroCallUsedRegsKind::AllGPR)
+ .Case("all-arg", ZeroCallUsedRegsKind::AllArg)
+ .Case("all", ZeroCallUsedRegsKind::All);
+
+ if (ZeroRegsKind == ZeroCallUsedRegsKind::Skip)
+ return;
+
+ const bool OnlyGPR = static_cast<unsigned>(ZeroRegsKind) & ONLY_GPR;
+ const bool OnlyUsed = static_cast<unsigned>(ZeroRegsKind) & ONLY_USED;
+ const bool OnlyArg = static_cast<unsigned>(ZeroRegsKind) & ONLY_ARG;
+
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ const BitVector AllocatableSet(TRI.getAllocatableSet(MF));
+
+ // Mark all used registers.
+ BitVector UsedRegs(TRI.getNumRegs());
+ if (OnlyUsed)
+ for (const MachineBasicBlock &MBB : MF)
+ for (const MachineInstr &MI : MBB)
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+
+ MCRegister Reg = MO.getReg();
+ if (AllocatableSet[Reg] && !MO.isImplicit() &&
+ (MO.isDef() || MO.isUse()))
+ UsedRegs.set(Reg);
+ }
+
+ BitVector RegsToZero(TRI.getNumRegs());
+ for (MCRegister Reg : AllocatableSet.set_bits()) {
+ // Skip over fixed registers.
+ if (TRI.isFixedRegister(MF, Reg))
+ continue;
+
+ // Want only general purpose registers.
+ if (OnlyGPR && !TRI.isGeneralPurposeRegister(MF, Reg))
+ continue;
+
+ // Want only used registers.
+ if (OnlyUsed && !UsedRegs[Reg])
+ continue;
+
+ // Want only registers used for arguments.
+ if (OnlyArg && !TRI.isArgumentRegister(MF, Reg))
+ continue;
+
+ RegsToZero.set(Reg);
+ }
+
+ // Don't clear registers that are live when leaving the function.
+ for (const MachineBasicBlock &MBB : MF)
+ for (const MachineInstr &MI : MBB.terminators()) {
+ if (!MI.isReturn())
+ continue;
+
+ for (const auto &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+
+ for (MCPhysReg SReg : TRI.sub_and_superregs_inclusive(MO.getReg()))
+ RegsToZero.reset(SReg);
+ }
+ }
+
+ // Don't need to clear registers that are used/clobbered by terminating
+ // instructions.
+ for (const MachineBasicBlock &MBB : MF) {
+ if (!MBB.isReturnBlock())
+ continue;
+
+ MachineBasicBlock::const_iterator MBBI = MBB.getFirstTerminator();
+ for (MachineBasicBlock::const_iterator I = MBBI, E = MBB.end(); I != E;
+ ++I) {
+ for (const MachineOperand &MO : I->operands()) {
+ if (!MO.isReg())
+ continue;
+
+ for (const MCPhysReg &Reg :
+ TRI.sub_and_superregs_inclusive(MO.getReg()))
+ RegsToZero.reset(Reg);
+ }
+ }
+ }
+
+ // Don't clear registers that are reset before exiting.
+ for (const CalleeSavedInfo &CSI : MF.getFrameInfo().getCalleeSavedInfo())
+ for (MCRegister Reg : TRI.sub_and_superregs_inclusive(CSI.getReg()))
+ RegsToZero.reset(Reg);
+
+ const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
+ for (MachineBasicBlock &MBB : MF)
+ if (MBB.isReturnBlock())
+ TFI.emitZeroCallUsedRegs(RegsToZero, MBB);
+}
+
/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
/// register references and actual offsets.
void PEI::replaceFrameIndices(MachineFunction &MF) {
diff --git a/llvm/lib/CodeGen/PseudoProbeInserter.cpp b/llvm/lib/CodeGen/PseudoProbeInserter.cpp
index 5f69f9194125..86ea3ec67178 100644
--- a/llvm/lib/CodeGen/PseudoProbeInserter.cpp
+++ b/llvm/lib/CodeGen/PseudoProbeInserter.cpp
@@ -18,11 +18,9 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/PseudoProbe.h"
#include "llvm/InitializePasses.h"
-#include "llvm/MC/MCPseudoProbe.h"
-#include "llvm/Target/TargetMachine.h"
-#include <unordered_set>
#define DEBUG_TYPE "pseudo-probe-inserter"
diff --git a/llvm/lib/CodeGen/PseudoSourceValue.cpp b/llvm/lib/CodeGen/PseudoSourceValue.cpp
index 74e721dbd138..40c52b9d9707 100644
--- a/llvm/lib/CodeGen/PseudoSourceValue.cpp
+++ b/llvm/lib/CodeGen/PseudoSourceValue.cpp
@@ -11,26 +11,23 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+
using namespace llvm;
static const char *const PSVNames[] = {
"Stack", "GOT", "JumpTable", "ConstantPool", "FixedStack",
"GlobalValueCallEntry", "ExternalSymbolCallEntry"};
-PseudoSourceValue::PseudoSourceValue(unsigned Kind, const TargetInstrInfo &TII)
+PseudoSourceValue::PseudoSourceValue(unsigned Kind, const TargetMachine &TM)
: Kind(Kind) {
- AddressSpace = TII.getAddressSpaceForPseudoSourceKind(Kind);
+ AddressSpace = TM.getAddressSpaceForPseudoSourceKind(Kind);
}
-
-PseudoSourceValue::~PseudoSourceValue() {}
+PseudoSourceValue::~PseudoSourceValue() = default;
void PseudoSourceValue::printCustom(raw_ostream &O) const {
if (Kind < TargetCustom)
@@ -79,9 +76,9 @@ void FixedStackPseudoSourceValue::printCustom(raw_ostream &OS) const {
OS << "FixedStack" << FI;
}
-CallEntryPseudoSourceValue::CallEntryPseudoSourceValue(
- unsigned Kind, const TargetInstrInfo &TII)
- : PseudoSourceValue(Kind, TII) {}
+CallEntryPseudoSourceValue::CallEntryPseudoSourceValue(unsigned Kind,
+ const TargetMachine &TM)
+ : PseudoSourceValue(Kind, TM) {}
bool CallEntryPseudoSourceValue::isConstant(const MachineFrameInfo *) const {
return false;
@@ -96,20 +93,17 @@ bool CallEntryPseudoSourceValue::mayAlias(const MachineFrameInfo *) const {
}
GlobalValuePseudoSourceValue::GlobalValuePseudoSourceValue(
- const GlobalValue *GV,
- const TargetInstrInfo &TII)
- : CallEntryPseudoSourceValue(GlobalValueCallEntry, TII), GV(GV) {}
+ const GlobalValue *GV, const TargetMachine &TM)
+ : CallEntryPseudoSourceValue(GlobalValueCallEntry, TM), GV(GV) {}
ExternalSymbolPseudoSourceValue::ExternalSymbolPseudoSourceValue(
- const char *ES, const TargetInstrInfo &TII)
- : CallEntryPseudoSourceValue(ExternalSymbolCallEntry, TII), ES(ES) {}
+ const char *ES, const TargetMachine &TM)
+ : CallEntryPseudoSourceValue(ExternalSymbolCallEntry, TM), ES(ES) {}
-PseudoSourceValueManager::PseudoSourceValueManager(
- const TargetInstrInfo &TIInfo)
- : TII(TIInfo),
- StackPSV(PseudoSourceValue::Stack, TII),
- GOTPSV(PseudoSourceValue::GOT, TII),
- JumpTablePSV(PseudoSourceValue::JumpTable, TII),
- ConstantPoolPSV(PseudoSourceValue::ConstantPool, TII) {}
+PseudoSourceValueManager::PseudoSourceValueManager(const TargetMachine &TMInfo)
+ : TM(TMInfo), StackPSV(PseudoSourceValue::Stack, TM),
+ GOTPSV(PseudoSourceValue::GOT, TM),
+ JumpTablePSV(PseudoSourceValue::JumpTable, TM),
+ ConstantPoolPSV(PseudoSourceValue::ConstantPool, TM) {}
const PseudoSourceValue *PseudoSourceValueManager::getStack() {
return &StackPSV;
@@ -129,7 +123,7 @@ const PseudoSourceValue *
PseudoSourceValueManager::getFixedStack(int FI) {
std::unique_ptr<FixedStackPseudoSourceValue> &V = FSValues[FI];
if (!V)
- V = std::make_unique<FixedStackPseudoSourceValue>(FI, TII);
+ V = std::make_unique<FixedStackPseudoSourceValue>(FI, TM);
return V.get();
}
@@ -138,7 +132,7 @@ PseudoSourceValueManager::getGlobalValueCallEntry(const GlobalValue *GV) {
std::unique_ptr<const GlobalValuePseudoSourceValue> &E =
GlobalCallEntries[GV];
if (!E)
- E = std::make_unique<GlobalValuePseudoSourceValue>(GV, TII);
+ E = std::make_unique<GlobalValuePseudoSourceValue>(GV, TM);
return E.get();
}
@@ -147,6 +141,6 @@ PseudoSourceValueManager::getExternalSymbolCallEntry(const char *ES) {
std::unique_ptr<const ExternalSymbolPseudoSourceValue> &E =
ExternalCallEntries[ES];
if (!E)
- E = std::make_unique<ExternalSymbolPseudoSourceValue>(ES, TII);
+ E = std::make_unique<ExternalSymbolPseudoSourceValue>(ES, TM);
return E.get();
}
diff --git a/llvm/lib/CodeGen/RDFGraph.cpp b/llvm/lib/CodeGen/RDFGraph.cpp
index 882f8e91bf1d..ec383b9b1c65 100644
--- a/llvm/lib/CodeGen/RDFGraph.cpp
+++ b/llvm/lib/CodeGen/RDFGraph.cpp
@@ -8,6 +8,7 @@
//
// Target-independent, SSA-based data flow graph for register data flow (RDF).
//
+#include "llvm/CodeGen/RDFGraph.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
@@ -18,7 +19,6 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RDFGraph.h"
#include "llvm/CodeGen/RDFRegisters.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -27,8 +27,6 @@
#include "llvm/IR/Function.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -979,18 +977,6 @@ RegisterRef DataFlowGraph::makeRegRef(const MachineOperand &Op) const {
return RegisterRef(PRI.getRegMaskId(Op.getRegMask()), LaneBitmask::getAll());
}
-RegisterRef DataFlowGraph::restrictRef(RegisterRef AR, RegisterRef BR) const {
- if (AR.Reg == BR.Reg) {
- LaneBitmask M = AR.Mask & BR.Mask;
- return M.any() ? RegisterRef(AR.Reg, M) : RegisterRef();
- }
- // This isn't strictly correct, because the overlap may happen in the
- // part masked out.
- if (PRI.alias(AR, BR))
- return AR;
- return RegisterRef();
-}
-
// For each stack in the map DefM, push the delimiter for block B on it.
void DataFlowGraph::markBlock(NodeId B, DefStackMap &DefM) {
// Push block delimiters.
diff --git a/llvm/lib/CodeGen/RDFLiveness.cpp b/llvm/lib/CodeGen/RDFLiveness.cpp
index d704cf7b3213..2fd947086b4d 100644
--- a/llvm/lib/CodeGen/RDFLiveness.cpp
+++ b/llvm/lib/CodeGen/RDFLiveness.cpp
@@ -22,6 +22,7 @@
// and Embedded Architectures and Compilers", 8 (4),
// <10.1145/2086696.2086706>. <hal-00647369>
//
+#include "llvm/CodeGen/RDFLiveness.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
@@ -32,14 +33,12 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/RDFLiveness.h"
#include "llvm/CodeGen/RDFGraph.h"
#include "llvm/CodeGen/RDFRegisters.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -341,9 +340,8 @@ Liveness::getAllReachingDefsRecImpl(RegisterRef RefRR, NodeAddr<RefNode*> RefA,
if (!(DA.Addr->getFlags() & NodeAttrs::PhiRef))
continue;
NodeAddr<PhiNode*> PA = DA.Addr->getOwner(DFG);
- if (Visited.count(PA.Id))
+ if (!Visited.insert(PA.Id).second)
continue;
- Visited.insert(PA.Id);
// Go over all phi uses and get the reaching defs for each use.
for (auto U : PA.Addr->members_if(DFG.IsRef<NodeAttrs::Use>, DFG)) {
const auto &T = getAllReachingDefsRecImpl(RefRR, U, Visited, TmpDefs,
diff --git a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
index 1264e6021b6e..69db8bad54f9 100644
--- a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
+++ b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
@@ -34,12 +34,7 @@ static bool isValidRegUseOf(const MachineOperand &MO, MCRegister PhysReg,
const TargetRegisterInfo *TRI) {
if (!isValidRegUse(MO))
return false;
- if (MO.getReg() == PhysReg)
- return true;
- for (MCRegAliasIterator R(PhysReg, TRI, false); R.isValid(); ++R)
- if (MO.getReg() == *R)
- return true;
- return false;
+ return TRI->regsOverlap(MO.getReg(), PhysReg);
}
static bool isValidRegDef(const MachineOperand &MO) {
@@ -50,12 +45,7 @@ static bool isValidRegDefOf(const MachineOperand &MO, MCRegister PhysReg,
const TargetRegisterInfo *TRI) {
if (!isValidRegDef(MO))
return false;
- if (MO.getReg() == PhysReg)
- return true;
- for (MCRegAliasIterator R(PhysReg, TRI, false); R.isValid(); ++R)
- if (MO.getReg() == *R)
- return true;
- return false;
+ return TRI->regsOverlap(MO.getReg(), PhysReg);
}
void ReachingDefAnalysis::enterBasicBlock(MachineBasicBlock *MBB) {
diff --git a/llvm/lib/CodeGen/RegAllocBase.cpp b/llvm/lib/CodeGen/RegAllocBase.cpp
index d891d4c2ffbb..0c18814189eb 100644
--- a/llvm/lib/CodeGen/RegAllocBase.cpp
+++ b/llvm/lib/CodeGen/RegAllocBase.cpp
@@ -85,7 +85,7 @@ void RegAllocBase::allocatePhysRegs() {
seedLiveRegs();
// Continue assigning vregs one at a time to available physical registers.
- while (LiveInterval *VirtReg = dequeue()) {
+ while (const LiveInterval *VirtReg = dequeue()) {
assert(!VRM->hasPhys(VirtReg->reg()) && "Register already assigned");
// Unused registers can appear when the spiller coalesces snippets.
@@ -140,10 +140,7 @@ void RegAllocBase::allocatePhysRegs() {
// Keep going after reporting the error.
VRM->assignVirt2Phys(VirtReg->reg(), AllocOrder.front());
- continue;
- }
-
- if (AvailablePhysReg)
+ } else if (AvailablePhysReg)
Matrix->assign(*VirtReg, AvailablePhysReg);
for (Register Reg : SplitVRegs) {
@@ -176,7 +173,7 @@ void RegAllocBase::postOptimization() {
DeadRemats.clear();
}
-void RegAllocBase::enqueue(LiveInterval *LI) {
+void RegAllocBase::enqueue(const LiveInterval *LI) {
const Register Reg = LI->reg();
assert(Reg.isVirtual() && "Can only enqueue virtual registers");
diff --git a/llvm/lib/CodeGen/RegAllocBase.h b/llvm/lib/CodeGen/RegAllocBase.h
index 1fb56dbaebb7..a8bf305a50c9 100644
--- a/llvm/lib/CodeGen/RegAllocBase.h
+++ b/llvm/lib/CodeGen/RegAllocBase.h
@@ -96,19 +96,19 @@ protected:
virtual Spiller &spiller() = 0;
/// enqueue - Add VirtReg to the priority queue of unassigned registers.
- virtual void enqueueImpl(LiveInterval *LI) = 0;
+ virtual void enqueueImpl(const LiveInterval *LI) = 0;
/// enqueue - Add VirtReg to the priority queue of unassigned registers.
- void enqueue(LiveInterval *LI);
+ void enqueue(const LiveInterval *LI);
/// dequeue - Return the next unassigned register, or NULL.
- virtual LiveInterval *dequeue() = 0;
+ virtual const LiveInterval *dequeue() = 0;
// A RegAlloc pass should override this to provide the allocation heuristics.
// Each call must guarantee forward progess by returning an available PhysReg
// or new set of split live virtual registers. It is up to the splitter to
// converge quickly toward fully spilled live ranges.
- virtual MCRegister selectOrSplit(LiveInterval &VirtReg,
+ virtual MCRegister selectOrSplit(const LiveInterval &VirtReg,
SmallVectorImpl<Register> &splitLVRs) = 0;
// Use this group name for NamedRegionTimer.
@@ -116,7 +116,7 @@ protected:
static const char TimerGroupDescription[];
/// Method called when the allocator is about to remove a LiveInterval.
- virtual void aboutToRemoveInterval(LiveInterval &LI) {}
+ virtual void aboutToRemoveInterval(const LiveInterval &LI) {}
public:
/// VerifyEnabled - True when -verify-regalloc is given.
diff --git a/llvm/lib/CodeGen/RegAllocBasic.cpp b/llvm/lib/CodeGen/RegAllocBasic.cpp
index a9816b13e798..7defdf04aec8 100644
--- a/llvm/lib/CodeGen/RegAllocBasic.cpp
+++ b/llvm/lib/CodeGen/RegAllocBasic.cpp
@@ -22,9 +22,7 @@
#include "llvm/CodeGen/LiveStacks.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/Spiller.h"
@@ -33,7 +31,6 @@
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include <cstdlib>
#include <queue>
using namespace llvm;
@@ -45,7 +42,7 @@ static RegisterRegAlloc basicRegAlloc("basic", "basic register allocator",
namespace {
struct CompSpillWeight {
- bool operator()(LiveInterval *A, LiveInterval *B) const {
+ bool operator()(const LiveInterval *A, const LiveInterval *B) const {
return A->weight() < B->weight();
}
};
@@ -65,8 +62,9 @@ class RABasic : public MachineFunctionPass,
// state
std::unique_ptr<Spiller> SpillerInstance;
- std::priority_queue<LiveInterval*, std::vector<LiveInterval*>,
- CompSpillWeight> Queue;
+ std::priority_queue<const LiveInterval *, std::vector<const LiveInterval *>,
+ CompSpillWeight>
+ Queue;
// Scratch space. Allocated here to avoid repeated malloc calls in
// selectOrSplit().
@@ -88,19 +86,17 @@ public:
Spiller &spiller() override { return *SpillerInstance; }
- void enqueueImpl(LiveInterval *LI) override {
- Queue.push(LI);
- }
+ void enqueueImpl(const LiveInterval *LI) override { Queue.push(LI); }
- LiveInterval *dequeue() override {
+ const LiveInterval *dequeue() override {
if (Queue.empty())
return nullptr;
- LiveInterval *LI = Queue.top();
+ const LiveInterval *LI = Queue.top();
Queue.pop();
return LI;
}
- MCRegister selectOrSplit(LiveInterval &VirtReg,
+ MCRegister selectOrSplit(const LiveInterval &VirtReg,
SmallVectorImpl<Register> &SplitVRegs) override;
/// Perform register allocation.
@@ -119,7 +115,7 @@ public:
// Helper for spilling all live virtual registers currently unified under preg
// that interfere with the most recently queried lvr. Return true if spilling
// was successful, and append any new spilled/split intervals to splitLVRs.
- bool spillInterferences(LiveInterval &VirtReg, MCRegister PhysReg,
+ bool spillInterferences(const LiveInterval &VirtReg, MCRegister PhysReg,
SmallVectorImpl<Register> &SplitVRegs);
static char ID;
@@ -208,16 +204,17 @@ void RABasic::releaseMemory() {
// Spill or split all live virtual registers currently unified under PhysReg
// that interfere with VirtReg. The newly spilled or split live intervals are
// returned by appending them to SplitVRegs.
-bool RABasic::spillInterferences(LiveInterval &VirtReg, MCRegister PhysReg,
+bool RABasic::spillInterferences(const LiveInterval &VirtReg,
+ MCRegister PhysReg,
SmallVectorImpl<Register> &SplitVRegs) {
// Record each interference and determine if all are spillable before mutating
// either the union or live intervals.
- SmallVector<LiveInterval*, 8> Intfs;
+ SmallVector<const LiveInterval *, 8> Intfs;
// Collect interferences assigned to any alias of the physical register.
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
- for (auto *Intf : reverse(Q.interferingVRegs())) {
+ for (const auto *Intf : reverse(Q.interferingVRegs())) {
if (!Intf->isSpillable() || Intf->weight() > VirtReg.weight())
return false;
Intfs.push_back(Intf);
@@ -229,7 +226,7 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, MCRegister PhysReg,
// Spill each interfering vreg allocated to PhysReg or an alias.
for (unsigned i = 0, e = Intfs.size(); i != e; ++i) {
- LiveInterval &Spill = *Intfs[i];
+ const LiveInterval &Spill = *Intfs[i];
// Skip duplicates.
if (!VRM->hasPhys(Spill.reg()))
@@ -258,7 +255,7 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, MCRegister PhysReg,
// |vregs| * |machineregs|. And since the number of interference tests is
// minimal, there is no value in caching them outside the scope of
// selectOrSplit().
-MCRegister RABasic::selectOrSplit(LiveInterval &VirtReg,
+MCRegister RABasic::selectOrSplit(const LiveInterval &VirtReg,
SmallVectorImpl<Register> &SplitVRegs) {
// Populate a list of physical register spill candidates.
SmallVector<MCRegister, 8> PhysRegSpillCands;
diff --git a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
index fc5d1104a999..ee03feda796f 100644
--- a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
+++ b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
@@ -11,13 +11,14 @@
//===----------------------------------------------------------------------===//
#include "RegAllocEvictionAdvisor.h"
+#include "AllocationOrder.h"
#include "RegAllocGreedy.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
-#include "llvm/PassRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetMachine.h"
@@ -25,7 +26,7 @@
using namespace llvm;
static cl::opt<RegAllocEvictionAdvisorAnalysis::AdvisorMode> Mode(
- "regalloc-enable-advisor", cl::Hidden, cl::ZeroOrMore,
+ "regalloc-enable-advisor", cl::Hidden,
cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default),
cl::desc("Enable regalloc advisor mode"),
cl::values(
@@ -42,6 +43,14 @@ static cl::opt<bool> EnableLocalReassignment(
"may be compile time intensive"),
cl::init(false));
+cl::opt<unsigned> EvictInterferenceCutoff(
+ "regalloc-eviction-max-interference-cutoff", cl::Hidden,
+ cl::desc("Number of interferences after which we declare "
+ "an interference unevictable and bail out. This "
+ "is a compilation cost-saving consideration. To "
+ "disable, pass a very large number."),
+ cl::init(10));
+
#define DEBUG_TYPE "regalloc"
#ifdef LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL
#define LLVM_HAVE_TF_AOT
@@ -66,7 +75,7 @@ public:
private:
std::unique_ptr<RegAllocEvictionAdvisor>
- getAdvisor(MachineFunction &MF, const RAGreedy &RA) override {
+ getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override {
return std::make_unique<DefaultEvictionAdvisor>(MF, RA);
}
bool doInitialization(Module &M) override {
@@ -113,7 +122,7 @@ StringRef RegAllocEvictionAdvisorAnalysis::getPassName() const {
llvm_unreachable("Unknown advisor kind");
}
-RegAllocEvictionAdvisor::RegAllocEvictionAdvisor(MachineFunction &MF,
+RegAllocEvictionAdvisor::RegAllocEvictionAdvisor(const MachineFunction &MF,
const RAGreedy &RA)
: MF(MF), RA(RA), Matrix(RA.getInterferenceMatrix()),
LIS(RA.getLiveIntervals()), VRM(RA.getVirtRegMap()),
@@ -136,8 +145,8 @@ RegAllocEvictionAdvisor::RegAllocEvictionAdvisor(MachineFunction &MF,
/// register.
/// @param B The live range to be evicted.
/// @param BreaksHint True when B is already assigned to its preferred register.
-bool DefaultEvictionAdvisor::shouldEvict(LiveInterval &A, bool IsHint,
- LiveInterval &B,
+bool DefaultEvictionAdvisor::shouldEvict(const LiveInterval &A, bool IsHint,
+ const LiveInterval &B,
bool BreaksHint) const {
bool CanSplit = RA.getExtraInfo().getStage(B) < RS_Spill;
@@ -156,7 +165,7 @@ bool DefaultEvictionAdvisor::shouldEvict(LiveInterval &A, bool IsHint,
/// canEvictHintInterference - return true if the interference for VirtReg
/// on the PhysReg, which is VirtReg's hint, can be evicted in favor of VirtReg.
bool DefaultEvictionAdvisor::canEvictHintInterference(
- LiveInterval &VirtReg, MCRegister PhysReg,
+ const LiveInterval &VirtReg, MCRegister PhysReg,
const SmallVirtRegSet &FixedRegisters) const {
EvictionCost MaxCost;
MaxCost.setBrokenHints(1);
@@ -174,7 +183,7 @@ bool DefaultEvictionAdvisor::canEvictHintInterference(
/// when returning true.
/// @returns True when interference can be evicted cheaper than MaxCost.
bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost(
- LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint,
+ const LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint,
EvictionCost &MaxCost, const SmallVirtRegSet &FixedRegisters) const {
// It is only possible to evict virtual register interference.
if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg)
@@ -195,12 +204,12 @@ bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost(
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
// If there is 10 or more interferences, chances are one is heavier.
- const auto &Interferences = Q.interferingVRegs(10);
- if (Interferences.size() >= 10)
+ const auto &Interferences = Q.interferingVRegs(EvictInterferenceCutoff);
+ if (Interferences.size() >= EvictInterferenceCutoff)
return false;
// Check if any interfering live range is heavier than MaxWeight.
- for (LiveInterval *Intf : reverse(Interferences)) {
+ for (const LiveInterval *Intf : reverse(Interferences)) {
assert(Register::isVirtualRegister(Intf->reg()) &&
"Only expecting virtual register interference from query");
@@ -227,7 +236,10 @@ bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost(
MRI->getRegClass(Intf->reg())));
// Only evict older cascades or live ranges without a cascade.
unsigned IntfCascade = RA.getExtraInfo().getCascade(Intf->reg());
- if (Cascade <= IntfCascade) {
+ if (Cascade == IntfCascade)
+ return false;
+
+ if (Cascade < IntfCascade) {
if (!Urgent)
return false;
// We permit breaking cascades for urgent evictions. It should be the
@@ -261,7 +273,7 @@ bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost(
}
MCRegister DefaultEvictionAdvisor::tryFindEvictionCandidate(
- LiveInterval &VirtReg, const AllocationOrder &Order,
+ const LiveInterval &VirtReg, const AllocationOrder &Order,
uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const {
// Keep track of the cheapest interference seen so far.
EvictionCost BestCost;
diff --git a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
index 1f40386db8da..d57b0ca6d53d 100644
--- a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
+++ b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
@@ -9,19 +9,25 @@
#ifndef LLVM_CODEGEN_REGALLOCEVICTIONADVISOR_H
#define LLVM_CODEGEN_REGALLOCEVICTIONADVISOR_H
-#include "AllocationOrder.h"
-#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallSet.h"
-#include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/CodeGen/LiveIntervals.h"
-#include "llvm/CodeGen/LiveRegMatrix.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/Register.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/MC/MCRegister.h"
#include "llvm/Pass.h"
namespace llvm {
+class AllocationOrder;
+class LiveInterval;
+class LiveIntervals;
+class LiveRegMatrix;
+class MachineFunction;
+class MachineRegisterInfo;
+class RegisterClassInfo;
+class TargetRegisterInfo;
+class VirtRegMap;
using SmallVirtRegSet = SmallSet<Register, 16>;
@@ -99,15 +105,14 @@ public:
/// Find a physical register that can be freed by evicting the FixedRegisters,
/// or return NoRegister. The eviction decision is assumed to be correct (i.e.
/// no fixed live ranges are evicted) and profitable.
- virtual MCRegister
- tryFindEvictionCandidate(LiveInterval &VirtReg, const AllocationOrder &Order,
- uint8_t CostPerUseLimit,
- const SmallVirtRegSet &FixedRegisters) const = 0;
+ virtual MCRegister tryFindEvictionCandidate(
+ const LiveInterval &VirtReg, const AllocationOrder &Order,
+ uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const = 0;
/// Find out if we can evict the live ranges occupying the given PhysReg,
/// which is a hint (preferred register) for VirtReg.
virtual bool
- canEvictHintInterference(LiveInterval &VirtReg, MCRegister PhysReg,
+ canEvictHintInterference(const LiveInterval &VirtReg, MCRegister PhysReg,
const SmallVirtRegSet &FixedRegisters) const = 0;
/// Returns true if the given \p PhysReg is a callee saved register and has
@@ -115,9 +120,9 @@ public:
bool isUnusedCalleeSavedReg(MCRegister PhysReg) const;
protected:
- RegAllocEvictionAdvisor(MachineFunction &MF, const RAGreedy &RA);
+ RegAllocEvictionAdvisor(const MachineFunction &MF, const RAGreedy &RA);
- Register canReassign(LiveInterval &VirtReg, Register PrevReg) const;
+ Register canReassign(const LiveInterval &VirtReg, Register PrevReg) const;
// Get the upper limit of elements in the given Order we need to analize.
// TODO: is this heuristic, we could consider learning it.
@@ -173,7 +178,7 @@ public:
/// Get an advisor for the given context (i.e. machine function, etc)
virtual std::unique_ptr<RegAllocEvictionAdvisor>
- getAdvisor(MachineFunction &MF, const RAGreedy &RA) = 0;
+ getAdvisor(const MachineFunction &MF, const RAGreedy &RA) = 0;
AdvisorMode getAdvisorMode() const { return Mode; }
protected:
@@ -200,19 +205,20 @@ RegAllocEvictionAdvisorAnalysis *createDevelopmentModeAdvisor();
// out of RegAllocGreedy.cpp
class DefaultEvictionAdvisor : public RegAllocEvictionAdvisor {
public:
- DefaultEvictionAdvisor(MachineFunction &MF, const RAGreedy &RA)
+ DefaultEvictionAdvisor(const MachineFunction &MF, const RAGreedy &RA)
: RegAllocEvictionAdvisor(MF, RA) {}
private:
- MCRegister tryFindEvictionCandidate(LiveInterval &, const AllocationOrder &,
- uint8_t,
+ MCRegister tryFindEvictionCandidate(const LiveInterval &,
+ const AllocationOrder &, uint8_t,
const SmallVirtRegSet &) const override;
- bool canEvictHintInterference(LiveInterval &, MCRegister,
+ bool canEvictHintInterference(const LiveInterval &, MCRegister,
const SmallVirtRegSet &) const override;
- bool canEvictInterferenceBasedOnCost(LiveInterval &, MCRegister, bool,
+ bool canEvictInterferenceBasedOnCost(const LiveInterval &, MCRegister, bool,
EvictionCost &,
const SmallVirtRegSet &) const;
- bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool) const;
+ bool shouldEvict(const LiveInterval &A, bool, const LiveInterval &B,
+ bool) const;
};
} // namespace llvm
diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp
index 6653145d3d2a..72ceaa768803 100644
--- a/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -35,14 +35,9 @@
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/IR/DebugLoc.h"
-#include "llvm/IR/Metadata.h"
#include "llvm/InitializePasses.h"
-#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -364,7 +359,16 @@ bool RegAllocFast::mayLiveOut(Register VirtReg) {
// If this block loops back to itself, it is necessary to check whether the
// use comes after the def.
if (MBB->isSuccessor(MBB)) {
- SelfLoopDef = MRI->getUniqueVRegDef(VirtReg);
+ // Find the first def in the self loop MBB.
+ for (const MachineInstr &DefInst : MRI->def_instructions(VirtReg)) {
+ if (DefInst.getParent() != MBB) {
+ MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
+ return true;
+ } else {
+ if (!SelfLoopDef || dominates(*MBB, DefInst.getIterator(), SelfLoopDef))
+ SelfLoopDef = &DefInst;
+ }
+ }
if (!SelfLoopDef) {
MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
return true;
@@ -1117,6 +1121,12 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
RegMasks.clear();
BundleVirtRegsMap.clear();
+ auto TiedOpIsUndef = [&](const MachineOperand &MO, unsigned Idx) {
+ assert(MO.isTied());
+ unsigned TiedIdx = MI.findTiedOperandIdx(Idx);
+ const MachineOperand &TiedMO = MI.getOperand(TiedIdx);
+ return TiedMO.isUndef();
+ };
// Scan for special cases; Apply pre-assigned register defs to state.
bool HasPhysRegUse = false;
bool HasRegMask = false;
@@ -1124,7 +1134,8 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
bool HasDef = false;
bool HasEarlyClobber = false;
bool NeedToAssignLiveThroughs = false;
- for (MachineOperand &MO : MI.operands()) {
+ for (unsigned I = 0; I < MI.getNumOperands(); ++I) {
+ MachineOperand &MO = MI.getOperand(I);
if (MO.isReg()) {
Register Reg = MO.getReg();
if (Reg.isVirtual()) {
@@ -1135,7 +1146,8 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
HasEarlyClobber = true;
NeedToAssignLiveThroughs = true;
}
- if (MO.isTied() || (MO.getSubReg() != 0 && !MO.isUndef()))
+ if ((MO.isTied() && !TiedOpIsUndef(MO, I)) ||
+ (MO.getSubReg() != 0 && !MO.isUndef()))
NeedToAssignLiveThroughs = true;
}
} else if (Reg.isPhysical()) {
@@ -1235,7 +1247,8 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
MachineOperand &MO = MI.getOperand(OpIdx);
LLVM_DEBUG(dbgs() << "Allocating " << MO << '\n');
unsigned Reg = MO.getReg();
- if (MO.isEarlyClobber() || MO.isTied() ||
+ if (MO.isEarlyClobber() ||
+ (MO.isTied() && !TiedOpIsUndef(MO, OpIdx)) ||
(MO.getSubReg() && !MO.isUndef())) {
defineLiveThroughVirtReg(MI, OpIdx, Reg);
} else {
@@ -1258,7 +1271,8 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
// Free registers occupied by defs.
// Iterate operands in reverse order, so we see the implicit super register
// defs first (we added them earlier in case of <def,read-undef>).
- for (MachineOperand &MO : llvm::reverse(MI.operands())) {
+ for (signed I = MI.getNumOperands() - 1; I >= 0; --I) {
+ MachineOperand &MO = MI.getOperand(I);
if (!MO.isReg() || !MO.isDef())
continue;
@@ -1273,7 +1287,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
"tied def assigned to clobbered register");
// Do not free tied operands and early clobbers.
- if (MO.isTied() || MO.isEarlyClobber())
+ if ((MO.isTied() && !TiedOpIsUndef(MO, I)) || MO.isEarlyClobber())
continue;
Register Reg = MO.getReg();
if (!Reg)
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 7870574df5b2..2efb98ae200d 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -21,9 +21,7 @@
#include "SplitKit.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IndexedMap.h"
-#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
@@ -62,6 +60,7 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/BlockFrequency.h"
@@ -71,13 +70,9 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
-#include <memory>
-#include <queue>
-#include <tuple>
#include <utility>
using namespace llvm;
@@ -127,11 +122,18 @@ CSRFirstTimeCost("regalloc-csr-first-time-cost",
cl::desc("Cost for first time use of callee-saved register."),
cl::init(0), cl::Hidden);
-static cl::opt<bool> ConsiderLocalIntervalCost(
- "consider-local-interval-cost", cl::Hidden,
- cl::desc("Consider the cost of local intervals created by a split "
- "candidate when choosing the best split candidate."),
- cl::init(false));
+static cl::opt<unsigned long> GrowRegionComplexityBudget(
+ "grow-region-complexity-budget",
+ cl::desc("growRegion() does not scale with the number of BB edges, so "
+ "limit its budget and bail out once we reach the limit."),
+ cl::init(10000), cl::Hidden);
+
+static cl::opt<bool> GreedyRegClassPriorityTrumpsGlobalness(
+ "greedy-regclass-priority-trumps-globalness",
+ cl::desc("Change the greedy register allocator's live range priority "
+ "calculation to make the AllocationPriority of the register class "
+ "more important then whether the range is global"),
+ cl::Hidden);
static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
createGreedyRegisterAllocator);
@@ -277,9 +279,9 @@ void RAGreedy::releaseMemory() {
GlobalCand.clear();
}
-void RAGreedy::enqueueImpl(LiveInterval *LI) { enqueue(Queue, LI); }
+void RAGreedy::enqueueImpl(const LiveInterval *LI) { enqueue(Queue, LI); }
-void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
+void RAGreedy::enqueue(PQueue &CurQueue, const LiveInterval *LI) {
// Prioritize live ranges by size, assigning larger ranges first.
// The queue holds (size, reg) pairs.
const unsigned Size = LI->getSize();
@@ -308,8 +310,10 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
// prevents excessive spilling in pathological cases.
bool ReverseLocal = TRI->reverseLocalAssignment();
const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
- bool ForceGlobal = !ReverseLocal &&
- (Size / SlotIndex::InstrDist) > (2 * RCI.getNumAllocatableRegs(&RC));
+ bool ForceGlobal =
+ !ReverseLocal && (Size / SlotIndex::InstrDist) >
+ (2 * RegClassInfo.getNumAllocatableRegs(&RC));
+ unsigned GlobalBit = 0;
if (Stage == RS_Assign && !ForceGlobal && !LI->empty() &&
LIS->intervalIsInOneMBB(*LI)) {
@@ -324,15 +328,18 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
// large blocks on targets with many physical registers.
Prio = Indexes->getZeroIndex().getInstrDistance(LI->endIndex());
}
- Prio |= RC.AllocationPriority << 24;
} else {
// Allocate global and split ranges in long->short order. Long ranges that
// don't fit should be spilled (or split) ASAP so they don't create
// interference. Mark a bit to prioritize global above local ranges.
- Prio = (1u << 29) + Size;
-
- Prio |= RC.AllocationPriority << 24;
+ Prio = Size;
+ GlobalBit = 1;
}
+ if (RegClassPriorityTrumpsGlobalness)
+ Prio |= RC.AllocationPriority << 25 | GlobalBit << 24;
+ else
+ Prio |= GlobalBit << 29 | RC.AllocationPriority << 24;
+
// Mark a higher bit to prioritize global and local above RS_Split.
Prio |= (1u << 31);
@@ -345,9 +352,9 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
CurQueue.push(std::make_pair(Prio, ~Reg));
}
-LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); }
+const LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); }
-LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) {
+const LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) {
if (CurQueue.empty())
return nullptr;
LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second);
@@ -360,10 +367,10 @@ LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) {
//===----------------------------------------------------------------------===//
/// tryAssign - Try to assign VirtReg to an available register.
-MCRegister RAGreedy::tryAssign(LiveInterval &VirtReg,
- AllocationOrder &Order,
- SmallVectorImpl<Register> &NewVRegs,
- const SmallVirtRegSet &FixedRegisters) {
+MCRegister RAGreedy::tryAssign(const LiveInterval &VirtReg,
+ AllocationOrder &Order,
+ SmallVectorImpl<Register> &NewVRegs,
+ const SmallVirtRegSet &FixedRegisters) {
MCRegister PhysReg;
for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) {
assert(*I);
@@ -413,7 +420,7 @@ MCRegister RAGreedy::tryAssign(LiveInterval &VirtReg,
// Interference eviction
//===----------------------------------------------------------------------===//
-Register RegAllocEvictionAdvisor::canReassign(LiveInterval &VirtReg,
+Register RegAllocEvictionAdvisor::canReassign(const LiveInterval &VirtReg,
Register PrevReg) const {
auto Order =
AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix);
@@ -440,94 +447,11 @@ Register RegAllocEvictionAdvisor::canReassign(LiveInterval &VirtReg,
return PhysReg;
}
-/// Return true if all interferences between VirtReg and PhysReg between
-/// Start and End can be evicted.
-///
-/// \param VirtReg Live range that is about to be assigned.
-/// \param PhysReg Desired register for assignment.
-/// \param Start Start of range to look for interferences.
-/// \param End End of range to look for interferences.
-/// \param MaxCost Only look for cheaper candidates and update with new cost
-/// when returning true.
-/// \return True when interference can be evicted cheaper than MaxCost.
-bool RAGreedy::canEvictInterferenceInRange(const LiveInterval &VirtReg,
- MCRegister PhysReg, SlotIndex Start,
- SlotIndex End,
- EvictionCost &MaxCost) const {
- EvictionCost Cost;
-
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
- LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
-
- // Check if any interfering live range is heavier than MaxWeight.
- for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) {
- // Check if interference overlast the segment in interest.
- if (!Intf->overlaps(Start, End))
- continue;
-
- // Cannot evict non virtual reg interference.
- if (!Register::isVirtualRegister(Intf->reg()))
- return false;
- // Never evict spill products. They cannot split or spill.
- if (ExtraInfo->getStage(*Intf) == RS_Done)
- return false;
-
- // Would this break a satisfied hint?
- bool BreaksHint = VRM->hasPreferredPhys(Intf->reg());
- // Update eviction cost.
- Cost.BrokenHints += BreaksHint;
- Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight());
- // Abort if this would be too expensive.
- if (!(Cost < MaxCost))
- return false;
- }
- }
-
- if (Cost.MaxWeight == 0)
- return false;
-
- MaxCost = Cost;
- return true;
-}
-
-/// Return the physical register that will be best
-/// candidate for eviction by a local split interval that will be created
-/// between Start and End.
-///
-/// \param Order The allocation order
-/// \param VirtReg Live range that is about to be assigned.
-/// \param Start Start of range to look for interferences
-/// \param End End of range to look for interferences
-/// \param BestEvictweight The eviction cost of that eviction
-/// \return The PhysReg which is the best candidate for eviction and the
-/// eviction cost in BestEvictweight
-MCRegister RAGreedy::getCheapestEvicteeWeight(const AllocationOrder &Order,
- const LiveInterval &VirtReg,
- SlotIndex Start, SlotIndex End,
- float *BestEvictweight) const {
- EvictionCost BestEvictCost;
- BestEvictCost.setMax();
- BestEvictCost.MaxWeight = VirtReg.weight();
- MCRegister BestEvicteePhys;
-
- // Go over all physical registers and find the best candidate for eviction
- for (MCRegister PhysReg : Order.getOrder()) {
-
- if (!canEvictInterferenceInRange(VirtReg, PhysReg, Start, End,
- BestEvictCost))
- continue;
-
- // Best so far.
- BestEvicteePhys = PhysReg;
- }
- *BestEvictweight = BestEvictCost.MaxWeight;
- return BestEvicteePhys;
-}
-
/// evictInterference - Evict any interferring registers that prevent VirtReg
/// from being assigned to Physreg. This assumes that canEvictInterference
/// returned true.
-void RAGreedy::evictInterference(LiveInterval &VirtReg, MCRegister PhysReg,
+void RAGreedy::evictInterference(const LiveInterval &VirtReg,
+ MCRegister PhysReg,
SmallVectorImpl<Register> &NewVRegs) {
// Make sure that VirtReg has a cascade number, and assign that cascade
// number to every evicted register. These live ranges than then only be
@@ -538,25 +462,23 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, MCRegister PhysReg,
<< " interference: Cascade " << Cascade << '\n');
// Collect all interfering virtregs first.
- SmallVector<LiveInterval*, 8> Intfs;
+ SmallVector<const LiveInterval *, 8> Intfs;
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
// We usually have the interfering VRegs cached so collectInterferingVRegs()
// should be fast, we may need to recalculate if when different physregs
// overlap the same register unit so we had different SubRanges queried
// against it.
- ArrayRef<LiveInterval*> IVR = Q.interferingVRegs();
+ ArrayRef<const LiveInterval *> IVR = Q.interferingVRegs();
Intfs.append(IVR.begin(), IVR.end());
}
// Evict them second. This will invalidate the queries.
- for (LiveInterval *Intf : Intfs) {
+ for (const LiveInterval *Intf : Intfs) {
// The same VirtReg may be present in multiple RegUnits. Skip duplicates.
if (!VRM->hasPhys(Intf->reg()))
continue;
- LastEvicted.addEviction(PhysReg, VirtReg.reg(), Intf->reg());
-
Matrix->unassign(*Intf);
assert((ExtraInfo->getCascade(Intf->reg()) < Cascade ||
VirtReg.isSpillable() < Intf->isSpillable()) &&
@@ -624,7 +546,8 @@ bool RegAllocEvictionAdvisor::canAllocatePhysReg(unsigned CostPerUseLimit,
/// @param VirtReg Currently unassigned virtual register.
/// @param Order Physregs to try.
/// @return Physreg to assign VirtReg, or 0.
-MCRegister RAGreedy::tryEvict(LiveInterval &VirtReg, AllocationOrder &Order,
+MCRegister RAGreedy::tryEvict(const LiveInterval &VirtReg,
+ AllocationOrder &Order,
SmallVectorImpl<Register> &NewVRegs,
uint8_t CostPerUseLimit,
const SmallVirtRegSet &FixedRegisters) {
@@ -782,12 +705,17 @@ bool RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
unsigned Visited = 0;
#endif
+ unsigned long Budget = GrowRegionComplexityBudget;
while (true) {
ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive();
// Find new through blocks in the periphery of PrefRegBundles.
for (unsigned Bundle : NewBundles) {
// Look at all blocks connected to Bundle in the full graph.
ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle);
+ // Limit compilation time by bailing out after we use all our budget.
+ if (Blocks.size() >= Budget)
+ return false;
+ Budget -= Blocks.size();
for (unsigned Block : Blocks) {
if (!Todo.test(Block))
continue;
@@ -887,147 +815,14 @@ BlockFrequency RAGreedy::calcSpillCost() {
return Cost;
}
-/// Check if splitting Evictee will create a local split interval in
-/// basic block number BBNumber that may cause a bad eviction chain. This is
-/// intended to prevent bad eviction sequences like:
-/// movl %ebp, 8(%esp) # 4-byte Spill
-/// movl %ecx, %ebp
-/// movl %ebx, %ecx
-/// movl %edi, %ebx
-/// movl %edx, %edi
-/// cltd
-/// idivl %esi
-/// movl %edi, %edx
-/// movl %ebx, %edi
-/// movl %ecx, %ebx
-/// movl %ebp, %ecx
-/// movl 16(%esp), %ebp # 4 - byte Reload
-///
-/// Such sequences are created in 2 scenarios:
-///
-/// Scenario #1:
-/// %0 is evicted from physreg0 by %1.
-/// Evictee %0 is intended for region splitting with split candidate
-/// physreg0 (the reg %0 was evicted from).
-/// Region splitting creates a local interval because of interference with the
-/// evictor %1 (normally region splitting creates 2 interval, the "by reg"
-/// and "by stack" intervals and local interval created when interference
-/// occurs).
-/// One of the split intervals ends up evicting %2 from physreg1.
-/// Evictee %2 is intended for region splitting with split candidate
-/// physreg1.
-/// One of the split intervals ends up evicting %3 from physreg2, etc.
-///
-/// Scenario #2
-/// %0 is evicted from physreg0 by %1.
-/// %2 is evicted from physreg2 by %3 etc.
-/// Evictee %0 is intended for region splitting with split candidate
-/// physreg1.
-/// Region splitting creates a local interval because of interference with the
-/// evictor %1.
-/// One of the split intervals ends up evicting back original evictor %1
-/// from physreg0 (the reg %0 was evicted from).
-/// Another evictee %2 is intended for region splitting with split candidate
-/// physreg1.
-/// One of the split intervals ends up evicting %3 from physreg2, etc.
-///
-/// \param Evictee The register considered to be split.
-/// \param Cand The split candidate that determines the physical register
-/// we are splitting for and the interferences.
-/// \param BBNumber The number of a BB for which the region split process will
-/// create a local split interval.
-/// \param Order The physical registers that may get evicted by a split
-/// artifact of Evictee.
-/// \return True if splitting Evictee may cause a bad eviction chain, false
-/// otherwise.
-bool RAGreedy::splitCanCauseEvictionChain(Register Evictee,
- GlobalSplitCandidate &Cand,
- unsigned BBNumber,
- const AllocationOrder &Order) {
- EvictionTrack::EvictorInfo VregEvictorInfo = LastEvicted.getEvictor(Evictee);
- unsigned Evictor = VregEvictorInfo.first;
- MCRegister PhysReg = VregEvictorInfo.second;
-
- // No actual evictor.
- if (!Evictor || !PhysReg)
- return false;
-
- float MaxWeight = 0;
- MCRegister FutureEvictedPhysReg =
- getCheapestEvicteeWeight(Order, LIS->getInterval(Evictee),
- Cand.Intf.first(), Cand.Intf.last(), &MaxWeight);
-
- // The bad eviction chain occurs when either the split candidate is the
- // evicting reg or one of the split artifact will evict the evicting reg.
- if ((PhysReg != Cand.PhysReg) && (PhysReg != FutureEvictedPhysReg))
- return false;
-
- Cand.Intf.moveToBlock(BBNumber);
-
- // Check to see if the Evictor contains interference (with Evictee) in the
- // given BB. If so, this interference caused the eviction of Evictee from
- // PhysReg. This suggest that we will create a local interval during the
- // region split to avoid this interference This local interval may cause a bad
- // eviction chain.
- if (!LIS->hasInterval(Evictor))
- return false;
- LiveInterval &EvictorLI = LIS->getInterval(Evictor);
- if (EvictorLI.FindSegmentContaining(Cand.Intf.first()) == EvictorLI.end())
- return false;
-
- // Now, check to see if the local interval we will create is going to be
- // expensive enough to evict somebody If so, this may cause a bad eviction
- // chain.
- float splitArtifactWeight =
- VRAI->futureWeight(LIS->getInterval(Evictee),
- Cand.Intf.first().getPrevIndex(), Cand.Intf.last());
- if (splitArtifactWeight >= 0 && splitArtifactWeight < MaxWeight)
- return false;
-
- return true;
-}
-
-/// Check if splitting VirtRegToSplit will create a local split interval
-/// in basic block number BBNumber that may cause a spill.
-///
-/// \param VirtRegToSplit The register considered to be split.
-/// \param Cand The split candidate that determines the physical
-/// register we are splitting for and the interferences.
-/// \param BBNumber The number of a BB for which the region split process
-/// will create a local split interval.
-/// \param Order The physical registers that may get evicted by a
-/// split artifact of VirtRegToSplit.
-/// \return True if splitting VirtRegToSplit may cause a spill, false
-/// otherwise.
-bool RAGreedy::splitCanCauseLocalSpill(unsigned VirtRegToSplit,
- GlobalSplitCandidate &Cand,
- unsigned BBNumber,
- const AllocationOrder &Order) {
- Cand.Intf.moveToBlock(BBNumber);
-
- // Check if the local interval will find a non interfereing assignment.
- for (auto PhysReg : Order.getOrder()) {
- if (!Matrix->checkInterference(Cand.Intf.first().getPrevIndex(),
- Cand.Intf.last(), PhysReg))
- return false;
- }
-
- // The local interval is not able to find non interferencing assignment
- // and not able to evict a less worthy interval, therfore, it can cause a
- // spill.
- return true;
-}
-
/// calcGlobalSplitCost - Return the global split cost of following the split
/// pattern in LiveBundles. This cost should be added to the local cost of the
/// interference pattern in SplitConstraints.
///
BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
- const AllocationOrder &Order,
- bool *CanCauseEvictionChain) {
+ const AllocationOrder &Order) {
BlockFrequency GlobalCost = 0;
const BitVector &LiveBundles = Cand.LiveBundles;
- Register VirtRegToSplit = SA->getParent().reg();
ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
for (unsigned I = 0; I != UseBlocks.size(); ++I) {
const SplitAnalysis::BlockInfo &BI = UseBlocks[I];
@@ -1037,29 +832,6 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
unsigned Ins = 0;
Cand.Intf.moveToBlock(BC.Number);
- // Check wheather a local interval is going to be created during the region
- // split. Calculate adavanced spilt cost (cost of local intervals) if option
- // is enabled.
- if (EnableAdvancedRASplitCost && Cand.Intf.hasInterference() && BI.LiveIn &&
- BI.LiveOut && RegIn && RegOut) {
-
- if (CanCauseEvictionChain &&
- splitCanCauseEvictionChain(VirtRegToSplit, Cand, BC.Number, Order)) {
- // This interference causes our eviction from this assignment, we might
- // evict somebody else and eventually someone will spill, add that cost.
- // See splitCanCauseEvictionChain for detailed description of scenarios.
- GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
- GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
-
- *CanCauseEvictionChain = true;
-
- } else if (splitCanCauseLocalSpill(VirtRegToSplit, Cand, BC.Number,
- Order)) {
- // This interference causes local interval to spill, add that cost.
- GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
- GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
- }
- }
if (BI.LiveIn)
Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg);
@@ -1080,20 +852,6 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
if (Cand.Intf.hasInterference()) {
GlobalCost += SpillPlacer->getBlockFrequency(Number);
GlobalCost += SpillPlacer->getBlockFrequency(Number);
-
- // Check wheather a local interval is going to be created during the
- // region split.
- if (EnableAdvancedRASplitCost && CanCauseEvictionChain &&
- splitCanCauseEvictionChain(VirtRegToSplit, Cand, Number, Order)) {
- // This interference cause our eviction from this assignment, we might
- // evict somebody else, add that cost.
- // See splitCanCauseEvictionChain for detailed description of
- // scenarios.
- GlobalCost += SpillPlacer->getBlockFrequency(Number);
- GlobalCost += SpillPlacer->getBlockFrequency(Number);
-
- *CanCauseEvictionChain = true;
- }
}
continue;
}
@@ -1253,7 +1011,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
MF->verify(this, "After splitting live range around region");
}
-MCRegister RAGreedy::tryRegionSplit(LiveInterval &VirtReg,
+MCRegister RAGreedy::tryRegionSplit(const LiveInterval &VirtReg,
AllocationOrder &Order,
SmallVectorImpl<Register> &NewVRegs) {
if (!TRI->shouldRegionSplitForVirtReg(*MF, VirtReg))
@@ -1276,19 +1034,8 @@ MCRegister RAGreedy::tryRegionSplit(LiveInterval &VirtReg,
MBFI->printBlockFreq(dbgs(), BestCost) << '\n');
}
- bool CanCauseEvictionChain = false;
- unsigned BestCand =
- calculateRegionSplitCost(VirtReg, Order, BestCost, NumCands,
- false /*IgnoreCSR*/, &CanCauseEvictionChain);
-
- // Split candidates with compact regions can cause a bad eviction sequence.
- // See splitCanCauseEvictionChain for detailed description of scenarios.
- // To avoid it, we need to comapre the cost with the spill cost and not the
- // current max frequency.
- if (HasCompact && (BestCost > SpillCost) && (BestCand != NoCand) &&
- CanCauseEvictionChain) {
- return MCRegister::NoRegister;
- }
+ unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
+ NumCands, false /*IgnoreCSR*/);
// No solutions found, fall back to single block splitting.
if (!HasCompact && BestCand == NoCand)
@@ -1297,11 +1044,11 @@ MCRegister RAGreedy::tryRegionSplit(LiveInterval &VirtReg,
return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs);
}
-unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
+unsigned RAGreedy::calculateRegionSplitCost(const LiveInterval &VirtReg,
AllocationOrder &Order,
BlockFrequency &BestCost,
- unsigned &NumCands, bool IgnoreCSR,
- bool *CanCauseEvictionChain) {
+ unsigned &NumCands,
+ bool IgnoreCSR) {
unsigned BestCand = NoCand;
for (MCPhysReg PhysReg : Order) {
assert(PhysReg);
@@ -1364,8 +1111,7 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
continue;
}
- bool HasEvictionChain = false;
- Cost += calcGlobalSplitCost(Cand, Order, &HasEvictionChain);
+ Cost += calcGlobalSplitCost(Cand, Order);
LLVM_DEBUG({
dbgs() << ", total = ";
MBFI->printBlockFreq(dbgs(), Cost) << " with bundles";
@@ -1376,28 +1122,14 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
if (Cost < BestCost) {
BestCand = NumCands;
BestCost = Cost;
- // See splitCanCauseEvictionChain for detailed description of bad
- // eviction chain scenarios.
- if (CanCauseEvictionChain)
- *CanCauseEvictionChain = HasEvictionChain;
}
++NumCands;
}
- if (CanCauseEvictionChain && BestCand != NoCand) {
- // See splitCanCauseEvictionChain for detailed description of bad
- // eviction chain scenarios.
- LLVM_DEBUG(dbgs() << "Best split candidate of vreg "
- << printReg(VirtReg.reg(), TRI) << " may ");
- if (!(*CanCauseEvictionChain))
- LLVM_DEBUG(dbgs() << "not ");
- LLVM_DEBUG(dbgs() << "cause bad eviction chain\n");
- }
-
return BestCand;
}
-unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand,
+unsigned RAGreedy::doRegionSplit(const LiveInterval &VirtReg, unsigned BestCand,
bool HasCompact,
SmallVectorImpl<Register> &NewVRegs) {
SmallVector<unsigned, 8> UsedCands;
@@ -1444,7 +1176,8 @@ unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand,
/// tryBlockSplit - Split a global live range around every block with uses. This
/// creates a lot of local live ranges, that will be split by tryLocalSplit if
/// they don't allocate.
-unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order,
+unsigned RAGreedy::tryBlockSplit(const LiveInterval &VirtReg,
+ AllocationOrder &Order,
SmallVectorImpl<Register> &NewVRegs) {
assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
Register Reg = VirtReg.reg();
@@ -1507,9 +1240,9 @@ static unsigned getNumAllocatableRegsForConstraints(
/// be moved to a larger register class.
///
/// This is similar to spilling to a larger register class.
-unsigned
-RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
- SmallVectorImpl<Register> &NewVRegs) {
+unsigned RAGreedy::tryInstructionSplit(const LiveInterval &VirtReg,
+ AllocationOrder &Order,
+ SmallVectorImpl<Register> &NewVRegs) {
const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
// There is no point to this if there are no larger sub-classes.
if (!RegClassInfo.isProperSubClass(CurRC))
@@ -1529,7 +1262,8 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
const TargetRegisterClass *SuperRC =
TRI->getLargestLegalSuperClass(CurRC, *MF);
- unsigned SuperRCNumAllocatableRegs = RCI.getNumAllocatableRegs(SuperRC);
+ unsigned SuperRCNumAllocatableRegs =
+ RegClassInfo.getNumAllocatableRegs(SuperRC);
// Split around every non-copy instruction if this split will relax
// the constraints on the virtual register.
// Otherwise, splitting just inserts uncoalescable copies that do not help
@@ -1539,7 +1273,7 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
if (MI->isFullCopy() ||
SuperRCNumAllocatableRegs ==
getNumAllocatableRegsForConstraints(MI, VirtReg.reg(), SuperRC,
- TII, TRI, RCI)) {
+ TII, TRI, RegClassInfo)) {
LLVM_DEBUG(dbgs() << " skip:\t" << Use << '\t' << *MI);
continue;
}
@@ -1649,7 +1383,8 @@ void RAGreedy::calcGapWeights(MCRegister PhysReg,
/// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only
/// basic block.
///
-unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
+unsigned RAGreedy::tryLocalSplit(const LiveInterval &VirtReg,
+ AllocationOrder &Order,
SmallVectorImpl<Register> &NewVRegs) {
// TODO: the function currently only handles a single UseBlock; it should be
// possible to generalize.
@@ -1879,7 +1614,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
/// trySplit - Try to split VirtReg or one of its interferences, making it
/// assignable.
/// @return Physreg when VirtReg may be assigned and/or new NewVRegs.
-unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
+unsigned RAGreedy::trySplit(const LiveInterval &VirtReg, AllocationOrder &Order,
SmallVectorImpl<Register> &NewVRegs,
const SmallVirtRegSet &FixedRegisters) {
// Ranges must be Split2 or less.
@@ -1928,6 +1663,18 @@ static bool hasTiedDef(MachineRegisterInfo *MRI, unsigned reg) {
return false;
}
+/// Return true if the existing assignment of \p Intf overlaps, but is not the
+/// same, as \p PhysReg.
+static bool assignedRegPartiallyOverlaps(const TargetRegisterInfo &TRI,
+ const VirtRegMap &VRM,
+ MCRegister PhysReg,
+ const LiveInterval &Intf) {
+ MCRegister AssignedReg = VRM.getPhys(Intf.reg());
+ if (PhysReg == AssignedReg)
+ return false;
+ return TRI.regsOverlap(PhysReg, AssignedReg);
+}
+
/// mayRecolorAllInterferences - Check if the virtual registers that
/// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be
/// recolored to free \p PhysReg.
@@ -1937,8 +1684,8 @@ static bool hasTiedDef(MachineRegisterInfo *MRI, unsigned reg) {
/// \p FixedRegisters contains all the virtual registers that cannot be
/// recolored.
bool RAGreedy::mayRecolorAllInterferences(
- MCRegister PhysReg, LiveInterval &VirtReg, SmallLISet &RecoloringCandidates,
- const SmallVirtRegSet &FixedRegisters) {
+ MCRegister PhysReg, const LiveInterval &VirtReg,
+ SmallLISet &RecoloringCandidates, const SmallVirtRegSet &FixedRegisters) {
const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
@@ -1952,13 +1699,21 @@ bool RAGreedy::mayRecolorAllInterferences(
CutOffInfo |= CO_Interf;
return false;
}
- for (LiveInterval *Intf : reverse(Q.interferingVRegs())) {
- // If Intf is done and sit on the same register class as VirtReg,
- // it would not be recolorable as it is in the same state as VirtReg.
- // However, if VirtReg has tied defs and Intf doesn't, then
+ for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) {
+ // If Intf is done and sits on the same register class as VirtReg, it
+ // would not be recolorable as it is in the same state as
+ // VirtReg. However there are at least two exceptions.
+ //
+ // If VirtReg has tied defs and Intf doesn't, then
// there is still a point in examining if it can be recolorable.
+ //
+ // Additionally, if the register class has overlapping tuple members, it
+ // may still be recolorable using a different tuple. This is more likely
+ // if the existing assignment aliases with the candidate.
+ //
if (((ExtraInfo->getStage(*Intf) == RS_Done &&
- MRI->getRegClass(Intf->reg()) == CurRC) &&
+ MRI->getRegClass(Intf->reg()) == CurRC &&
+ !assignedRegPartiallyOverlaps(*TRI, *VRM, PhysReg, *Intf)) &&
!(hasTiedDef(MRI, VirtReg.reg()) &&
!hasTiedDef(MRI, Intf->reg()))) ||
FixedRegisters.count(Intf->reg())) {
@@ -2008,18 +1763,26 @@ bool RAGreedy::mayRecolorAllInterferences(
/// (split, spill) during the process and that must be assigned.
/// \p FixedRegisters contains all the virtual registers that cannot be
/// recolored.
+///
+/// \p RecolorStack tracks the original assignments of successfully recolored
+/// registers.
+///
/// \p Depth gives the current depth of the last chance recoloring.
/// \return a physical register that can be used for VirtReg or ~0u if none
/// exists.
-unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
+unsigned RAGreedy::tryLastChanceRecoloring(const LiveInterval &VirtReg,
AllocationOrder &Order,
SmallVectorImpl<Register> &NewVRegs,
SmallVirtRegSet &FixedRegisters,
+ RecoloringStack &RecolorStack,
unsigned Depth) {
if (!TRI->shouldUseLastChanceRecoloringForVirtReg(*MF, VirtReg))
return ~0u;
LLVM_DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n');
+
+ const ssize_t EntryStackSize = RecolorStack.size();
+
// Ranges must be Done.
assert((ExtraInfo->getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) &&
"Last chance recoloring should really be last chance");
@@ -2035,9 +1798,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
// Set of Live intervals that will need to be recolored.
SmallLISet RecoloringCandidates;
- // Record the original mapping virtual register to physical register in case
- // the recoloring fails.
- DenseMap<Register, MCRegister> VirtRegToPhysReg;
+
// Mark VirtReg as fixed, i.e., it will not be recolored pass this point in
// this recoloring "session".
assert(!FixedRegisters.count(VirtReg.reg()));
@@ -2049,7 +1810,6 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
LLVM_DEBUG(dbgs() << "Try to assign: " << VirtReg << " to "
<< printReg(PhysReg, TRI) << '\n');
RecoloringCandidates.clear();
- VirtRegToPhysReg.clear();
CurrentNewVRegs.clear();
// It is only possible to recolor virtual register interference.
@@ -2069,18 +1829,19 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
continue;
}
- // RecoloringCandidates contains all the virtual registers that interfer
- // with VirtReg on PhysReg (or one of its aliases).
- // Enqueue them for recoloring and perform the actual recoloring.
+ // RecoloringCandidates contains all the virtual registers that interfere
+ // with VirtReg on PhysReg (or one of its aliases). Enqueue them for
+ // recoloring and perform the actual recoloring.
PQueue RecoloringQueue;
- for (LiveInterval *RC : RecoloringCandidates) {
+ for (const LiveInterval *RC : RecoloringCandidates) {
Register ItVirtReg = RC->reg();
enqueue(RecoloringQueue, RC);
assert(VRM->hasPhys(ItVirtReg) &&
"Interferences are supposed to be with allocated variables");
// Record the current allocation.
- VirtRegToPhysReg[ItVirtReg] = VRM->getPhys(ItVirtReg);
+ RecolorStack.push_back(std::make_pair(RC, VRM->getPhys(ItVirtReg)));
+
// unset the related struct.
Matrix->unassign(*RC);
}
@@ -2095,7 +1856,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
// at this point for the next physical register.
SmallVirtRegSet SaveFixedRegisters(FixedRegisters);
if (tryRecoloringCandidates(RecoloringQueue, CurrentNewVRegs,
- FixedRegisters, Depth)) {
+ FixedRegisters, RecolorStack, Depth)) {
// Push the queued vregs into the main queue.
for (Register NewVReg : CurrentNewVRegs)
NewVRegs.push_back(NewVReg);
@@ -2122,13 +1883,31 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
NewVRegs.push_back(R);
}
- for (LiveInterval *RC : RecoloringCandidates) {
- Register ItVirtReg = RC->reg();
- if (VRM->hasPhys(ItVirtReg))
- Matrix->unassign(*RC);
- MCRegister ItPhysReg = VirtRegToPhysReg[ItVirtReg];
- Matrix->assign(*RC, ItPhysReg);
+ // Roll back our unsuccessful recoloring. Also roll back any successful
+ // recolorings in any recursive recoloring attempts, since it's possible
+ // they would have introduced conflicts with assignments we will be
+ // restoring further up the stack. Perform all unassignments prior to
+ // reassigning, since sub-recolorings may have conflicted with the registers
+ // we are going to restore to their original assignments.
+ for (ssize_t I = RecolorStack.size() - 1; I >= EntryStackSize; --I) {
+ const LiveInterval *LI;
+ MCRegister PhysReg;
+ std::tie(LI, PhysReg) = RecolorStack[I];
+
+ if (VRM->hasPhys(LI->reg()))
+ Matrix->unassign(*LI);
}
+
+ for (size_t I = EntryStackSize; I != RecolorStack.size(); ++I) {
+ const LiveInterval *LI;
+ MCRegister PhysReg;
+ std::tie(LI, PhysReg) = RecolorStack[I];
+ if (!LI->empty() && !MRI->reg_nodbg_empty(LI->reg()))
+ Matrix->assign(*LI, PhysReg);
+ }
+
+ // Pop the stack of recoloring attempts.
+ RecolorStack.resize(EntryStackSize);
}
// Last chance recoloring did not worked either, give up.
@@ -2146,12 +1925,13 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
SmallVectorImpl<Register> &NewVRegs,
SmallVirtRegSet &FixedRegisters,
+ RecoloringStack &RecolorStack,
unsigned Depth) {
while (!RecoloringQueue.empty()) {
- LiveInterval *LI = dequeue(RecoloringQueue);
+ const LiveInterval *LI = dequeue(RecoloringQueue);
LLVM_DEBUG(dbgs() << "Try to recolor: " << *LI << '\n');
- MCRegister PhysReg =
- selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, Depth + 1);
+ MCRegister PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters,
+ RecolorStack, Depth + 1);
// When splitting happens, the live-range may actually be empty.
// In that case, this is okay to continue the recoloring even
// if we did not find an alternative color for it. Indeed,
@@ -2178,12 +1958,14 @@ bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
// Main Entry Point
//===----------------------------------------------------------------------===//
-MCRegister RAGreedy::selectOrSplit(LiveInterval &VirtReg,
+MCRegister RAGreedy::selectOrSplit(const LiveInterval &VirtReg,
SmallVectorImpl<Register> &NewVRegs) {
CutOffInfo = CO_None;
LLVMContext &Ctx = MF->getFunction().getContext();
SmallVirtRegSet FixedRegisters;
- MCRegister Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters);
+ RecoloringStack RecolorStack;
+ MCRegister Reg =
+ selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters, RecolorStack);
if (Reg == ~0U && (CutOffInfo != CO_None)) {
uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf);
if (CutOffEncountered == CO_Depth)
@@ -2208,10 +1990,9 @@ MCRegister RAGreedy::selectOrSplit(LiveInterval &VirtReg,
/// Spilling a live range in the cold path can have lower cost than using
/// the CSR for the first time. Returns the physical register if we decide
/// to use the CSR; otherwise return 0.
-MCRegister
-RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order,
- MCRegister PhysReg, uint8_t &CostPerUseLimit,
- SmallVectorImpl<Register> &NewVRegs) {
+MCRegister RAGreedy::tryAssignCSRFirstTime(
+ const LiveInterval &VirtReg, AllocationOrder &Order, MCRegister PhysReg,
+ uint8_t &CostPerUseLimit, SmallVectorImpl<Register> &NewVRegs) {
if (ExtraInfo->getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) {
// We choose spill over using the CSR for the first time if the spill cost
// is lower than CSRCost.
@@ -2243,7 +2024,7 @@ RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order,
return PhysReg;
}
-void RAGreedy::aboutToRemoveInterval(LiveInterval &LI) {
+void RAGreedy::aboutToRemoveInterval(const LiveInterval &LI) {
// Do not keep invalid information around.
SetOfBrokenHints.remove(&LI);
}
@@ -2317,7 +2098,7 @@ BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List,
/// For a given live range, profitability is determined by the sum of the
/// frequencies of the non-identity copies it would introduce with the old
/// and new register.
-void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) {
+void RAGreedy::tryHintRecoloring(const LiveInterval &VirtReg) {
// We have a broken hint, check if it is possible to fix it by
// reusing PhysReg for the copy-related live-ranges. Indeed, we evicted
// some register and PhysReg may be available for the other live-ranges.
@@ -2431,7 +2212,7 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) {
/// This is likely that we can assign the same register for b, c, and d,
/// getting rid of 2 copies.
void RAGreedy::tryHintsRecoloring() {
- for (LiveInterval *LI : SetOfBrokenHints) {
+ for (const LiveInterval *LI : SetOfBrokenHints) {
assert(Register::isVirtualRegister(LI->reg()) &&
"Recoloring is possible only for virtual registers");
// Some dead defs may be around (e.g., because of debug uses).
@@ -2442,9 +2223,10 @@ void RAGreedy::tryHintsRecoloring() {
}
}
-MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
+MCRegister RAGreedy::selectOrSplitImpl(const LiveInterval &VirtReg,
SmallVectorImpl<Register> &NewVRegs,
SmallVirtRegSet &FixedRegisters,
+ RecoloringStack &RecolorStack,
unsigned Depth) {
uint8_t CostPerUseLimit = uint8_t(~0u);
// First try assigning a free register.
@@ -2452,8 +2234,6 @@ MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix);
if (MCRegister PhysReg =
tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) {
- // If VirtReg got an assignment, the eviction info is no longer relevant.
- LastEvicted.clearEvicteeInfo(VirtReg.reg());
// When NewVRegs is not empty, we may have made decisions such as evicting
// a virtual register, go with the earlier decisions and use the physical
// register.
@@ -2488,9 +2268,6 @@ MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
// copy-related live-ranges.
if (Hint && Hint != PhysReg)
SetOfBrokenHints.insert(&VirtReg);
- // If VirtReg eviction someone, the eviction info for it as an evictee is
- // no longer relevant.
- LastEvicted.clearEvicteeInfo(VirtReg.reg());
return PhysReg;
}
@@ -2510,18 +2287,16 @@ MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
// Try splitting VirtReg or interferences.
unsigned NewVRegSizeBefore = NewVRegs.size();
Register PhysReg = trySplit(VirtReg, Order, NewVRegs, FixedRegisters);
- if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore)) {
- // If VirtReg got split, the eviction info is no longer relevant.
- LastEvicted.clearEvicteeInfo(VirtReg.reg());
+ if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore))
return PhysReg;
- }
}
// If we couldn't allocate a register from spilling, there is probably some
// invalid inline assembly. The base class will report it.
- if (Stage >= RS_Done || !VirtReg.isSpillable())
+ if (Stage >= RS_Done || !VirtReg.isSpillable()) {
return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters,
- Depth);
+ RecolorStack, Depth);
+ }
// Finally spill VirtReg itself.
if ((EnableDeferredSpilling ||
@@ -2713,19 +2488,27 @@ void RAGreedy::reportStats() {
}
}
+bool RAGreedy::hasVirtRegAlloc() {
+ for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) {
+ Register Reg = Register::index2VirtReg(I);
+ if (MRI->reg_nodbg_empty(Reg))
+ continue;
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ if (!RC)
+ continue;
+ if (ShouldAllocateClass(*TRI, *RC))
+ return true;
+ }
+
+ return false;
+}
+
bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
LLVM_DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
<< "********** Function: " << mf.getName() << '\n');
MF = &mf;
- TRI = MF->getSubtarget().getRegisterInfo();
TII = MF->getSubtarget().getInstrInfo();
- RCI.runOnMachineFunction(mf);
-
- EnableAdvancedRASplitCost =
- ConsiderLocalIntervalCost.getNumOccurrences()
- ? ConsiderLocalIntervalCost
- : MF->getSubtarget().enableAdvancedRASplitCost();
if (VerifyEnabled)
MF->verify(this, "Before greedy register allocator");
@@ -2733,6 +2516,12 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
RegAllocBase::init(getAnalysis<VirtRegMap>(),
getAnalysis<LiveIntervals>(),
getAnalysis<LiveRegMatrix>());
+
+ // Early return if there is no virtual register to be allocated to a
+ // physical register.
+ if (!hasVirtRegAlloc())
+ return false;
+
Indexes = &getAnalysis<SlotIndexes>();
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
DomTree = &getAnalysis<MachineDominatorTree>();
@@ -2746,6 +2535,10 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
initializeCSRCost();
RegCosts = TRI->getRegisterCosts(*MF);
+ RegClassPriorityTrumpsGlobalness =
+ GreedyRegClassPriorityTrumpsGlobalness.getNumOccurrences()
+ ? GreedyRegClassPriorityTrumpsGlobalness
+ : TRI->regClassPriorityTrumpsGlobalness(*MF);
ExtraInfo.emplace();
EvictAdvisor =
@@ -2764,7 +2557,6 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI);
GlobalCand.resize(32); // This will grow as needed.
SetOfBrokenHints.clear();
- LastEvicted.clear();
allocatePhysRegs();
tryHintsRecoloring();
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.h b/llvm/lib/CodeGen/RegAllocGreedy.h
index e9a5fe635f26..358e74541a54 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.h
+++ b/llvm/lib/CodeGen/RegAllocGreedy.h
@@ -12,9 +12,7 @@
#ifndef LLVM_CODEGEN_REGALLOCGREEDY_H_
#define LLVM_CODEGEN_REGALLOCGREEDY_H_
-#include "AllocationOrder.h"
#include "InterferenceCache.h"
-#include "LiveDebugVariables.h"
#include "RegAllocBase.h"
#include "RegAllocEvictionAdvisor.h"
#include "SpillPlacement.h"
@@ -23,52 +21,44 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IndexedMap.h"
-#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
-#include "llvm/CodeGen/EdgeBundles.h"
#include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/CodeGen/LiveIntervalUnion.h"
-#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveRangeEdit.h"
-#include "llvm/CodeGen/LiveRegMatrix.h"
-#include "llvm/CodeGen/LiveStacks.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
-#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/Spiller.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/CodeGen/VirtRegMap.h"
-#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/BranchProbability.h"
-#include "llvm/Target/TargetMachine.h"
#include <algorithm>
-#include <cassert>
#include <cstdint>
#include <memory>
#include <queue>
-#include <tuple>
#include <utility>
namespace llvm {
+class AllocationOrder;
+class AnalysisUsage;
+class EdgeBundles;
+class LiveDebugVariables;
+class LiveIntervals;
+class LiveRegMatrix;
+class MachineBasicBlock;
+class MachineBlockFrequencyInfo;
+class MachineDominatorTree;
+class MachineLoop;
+class MachineLoopInfo;
+class MachineOptimizationRemarkEmitter;
+class MachineOptimizationRemarkMissed;
+class SlotIndex;
+class SlotIndexes;
+class TargetInstrInfo;
+class VirtRegMap;
+
class LLVM_LIBRARY_VISIBILITY RAGreedy : public MachineFunctionPass,
public RegAllocBase,
private LiveRangeEdit::Delegate {
@@ -162,15 +152,18 @@ public:
private:
// Convenient shortcuts.
using PQueue = std::priority_queue<std::pair<unsigned, unsigned>>;
- using SmallLISet = SmallPtrSet<LiveInterval *, 4>;
+ using SmallLISet = SmallPtrSet<const LiveInterval *, 4>;
+
+ // We need to track all tentative recolorings so we can roll back any
+ // successful and unsuccessful recoloring attempts.
+ using RecoloringStack =
+ SmallVector<std::pair<const LiveInterval *, MCRegister>, 8>;
// context
MachineFunction *MF;
// Shortcuts to some useful interface.
const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
- RegisterClassInfo RCI;
// analyses
SlotIndexes *Indexes;
@@ -210,57 +203,6 @@ private:
static const char *const StageName[];
#endif
- /// EvictionTrack - Keeps track of past evictions in order to optimize region
- /// split decision.
- class EvictionTrack {
-
- public:
- using EvictorInfo =
- std::pair<Register /* evictor */, MCRegister /* physreg */>;
- using EvicteeInfo = llvm::DenseMap<Register /* evictee */, EvictorInfo>;
-
- private:
- /// Each Vreg that has been evicted in the last stage of selectOrSplit will
- /// be mapped to the evictor Vreg and the PhysReg it was evicted from.
- EvicteeInfo Evictees;
-
- public:
- /// Clear all eviction information.
- void clear() { Evictees.clear(); }
-
- /// Clear eviction information for the given evictee Vreg.
- /// E.g. when Vreg get's a new allocation, the old eviction info is no
- /// longer relevant.
- /// \param Evictee The evictee Vreg for whom we want to clear collected
- /// eviction info.
- void clearEvicteeInfo(Register Evictee) { Evictees.erase(Evictee); }
-
- /// Track new eviction.
- /// The Evictor vreg has evicted the Evictee vreg from Physreg.
- /// \param PhysReg The physical register Evictee was evicted from.
- /// \param Evictor The evictor Vreg that evicted Evictee.
- /// \param Evictee The evictee Vreg.
- void addEviction(MCRegister PhysReg, Register Evictor, Register Evictee) {
- Evictees[Evictee].first = Evictor;
- Evictees[Evictee].second = PhysReg;
- }
-
- /// Return the Evictor Vreg which evicted Evictee Vreg from PhysReg.
- /// \param Evictee The evictee vreg.
- /// \return The Evictor vreg which evicted Evictee vreg from PhysReg. 0 if
- /// nobody has evicted Evictee from PhysReg.
- EvictorInfo getEvictor(Register Evictee) {
- if (Evictees.count(Evictee)) {
- return Evictees[Evictee];
- }
-
- return EvictorInfo(0, 0);
- }
- };
-
- // Keeps track of past evictions in order to optimize region split decision.
- EvictionTrack LastEvicted;
-
// splitting state.
std::unique_ptr<SplitAnalysis> SA;
std::unique_ptr<SplitEditor> SE;
@@ -320,17 +262,17 @@ private:
/// Callee-save register cost, calculated once per machine function.
BlockFrequency CSRCost;
- /// Enable or not the consideration of the cost of local intervals created
- /// by a split candidate when choosing the best split candidate.
- bool EnableAdvancedRASplitCost;
-
/// Set of broken hints that may be reconciled later because of eviction.
- SmallSetVector<LiveInterval *, 8> SetOfBrokenHints;
+ SmallSetVector<const LiveInterval *, 8> SetOfBrokenHints;
/// The register cost values. This list will be recreated for each Machine
/// Function
ArrayRef<uint8_t> RegCosts;
+ /// Flags for the live range priority calculation, determined once per
+ /// machine function.
+ bool RegClassPriorityTrumpsGlobalness;
+
public:
RAGreedy(const RegClassFilterFunc F = allocateAllRegClasses);
@@ -341,11 +283,11 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override;
void releaseMemory() override;
Spiller &spiller() override { return *SpillerInstance; }
- void enqueueImpl(LiveInterval *LI) override;
- LiveInterval *dequeue() override;
- MCRegister selectOrSplit(LiveInterval &,
+ void enqueueImpl(const LiveInterval *LI) override;
+ const LiveInterval *dequeue() override;
+ MCRegister selectOrSplit(const LiveInterval &,
SmallVectorImpl<Register> &) override;
- void aboutToRemoveInterval(LiveInterval &) override;
+ void aboutToRemoveInterval(const LiveInterval &) override;
/// Perform register allocation.
bool runOnMachineFunction(MachineFunction &mf) override;
@@ -363,81 +305,70 @@ public:
static char ID;
private:
- MCRegister selectOrSplitImpl(LiveInterval &, SmallVectorImpl<Register> &,
- SmallVirtRegSet &, unsigned = 0);
+ MCRegister selectOrSplitImpl(const LiveInterval &,
+ SmallVectorImpl<Register> &, SmallVirtRegSet &,
+ RecoloringStack &, unsigned = 0);
bool LRE_CanEraseVirtReg(Register) override;
void LRE_WillShrinkVirtReg(Register) override;
void LRE_DidCloneVirtReg(Register, Register) override;
- void enqueue(PQueue &CurQueue, LiveInterval *LI);
- LiveInterval *dequeue(PQueue &CurQueue);
+ void enqueue(PQueue &CurQueue, const LiveInterval *LI);
+ const LiveInterval *dequeue(PQueue &CurQueue);
+ bool hasVirtRegAlloc();
BlockFrequency calcSpillCost();
bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency &);
bool addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>);
bool growRegion(GlobalSplitCandidate &Cand);
- bool splitCanCauseEvictionChain(Register Evictee, GlobalSplitCandidate &Cand,
- unsigned BBNumber,
- const AllocationOrder &Order);
- bool splitCanCauseLocalSpill(unsigned VirtRegToSplit,
- GlobalSplitCandidate &Cand, unsigned BBNumber,
- const AllocationOrder &Order);
BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate &,
- const AllocationOrder &Order,
- bool *CanCauseEvictionChain);
+ const AllocationOrder &Order);
bool calcCompactRegion(GlobalSplitCandidate &);
void splitAroundRegion(LiveRangeEdit &, ArrayRef<unsigned>);
void calcGapWeights(MCRegister, SmallVectorImpl<float> &);
- bool canEvictInterferenceInRange(const LiveInterval &VirtReg,
- MCRegister PhysReg, SlotIndex Start,
- SlotIndex End, EvictionCost &MaxCost) const;
- MCRegister getCheapestEvicteeWeight(const AllocationOrder &Order,
- const LiveInterval &VirtReg,
- SlotIndex Start, SlotIndex End,
- float *BestEvictWeight) const;
- void evictInterference(LiveInterval &, MCRegister,
+ void evictInterference(const LiveInterval &, MCRegister,
SmallVectorImpl<Register> &);
- bool mayRecolorAllInterferences(MCRegister PhysReg, LiveInterval &VirtReg,
+ bool mayRecolorAllInterferences(MCRegister PhysReg,
+ const LiveInterval &VirtReg,
SmallLISet &RecoloringCandidates,
const SmallVirtRegSet &FixedRegisters);
- MCRegister tryAssign(LiveInterval &, AllocationOrder &,
+ MCRegister tryAssign(const LiveInterval &, AllocationOrder &,
SmallVectorImpl<Register> &, const SmallVirtRegSet &);
- MCRegister tryEvict(LiveInterval &, AllocationOrder &,
+ MCRegister tryEvict(const LiveInterval &, AllocationOrder &,
SmallVectorImpl<Register> &, uint8_t,
const SmallVirtRegSet &);
- MCRegister tryRegionSplit(LiveInterval &, AllocationOrder &,
+ MCRegister tryRegionSplit(const LiveInterval &, AllocationOrder &,
SmallVectorImpl<Register> &);
/// Calculate cost of region splitting.
- unsigned calculateRegionSplitCost(LiveInterval &VirtReg,
+ unsigned calculateRegionSplitCost(const LiveInterval &VirtReg,
AllocationOrder &Order,
BlockFrequency &BestCost,
- unsigned &NumCands, bool IgnoreCSR,
- bool *CanCauseEvictionChain = nullptr);
+ unsigned &NumCands, bool IgnoreCSR);
/// Perform region splitting.
- unsigned doRegionSplit(LiveInterval &VirtReg, unsigned BestCand,
+ unsigned doRegionSplit(const LiveInterval &VirtReg, unsigned BestCand,
bool HasCompact, SmallVectorImpl<Register> &NewVRegs);
/// Check other options before using a callee-saved register for the first
/// time.
- MCRegister tryAssignCSRFirstTime(LiveInterval &VirtReg,
+ MCRegister tryAssignCSRFirstTime(const LiveInterval &VirtReg,
AllocationOrder &Order, MCRegister PhysReg,
uint8_t &CostPerUseLimit,
SmallVectorImpl<Register> &NewVRegs);
void initializeCSRCost();
- unsigned tryBlockSplit(LiveInterval &, AllocationOrder &,
+ unsigned tryBlockSplit(const LiveInterval &, AllocationOrder &,
SmallVectorImpl<Register> &);
- unsigned tryInstructionSplit(LiveInterval &, AllocationOrder &,
+ unsigned tryInstructionSplit(const LiveInterval &, AllocationOrder &,
SmallVectorImpl<Register> &);
- unsigned tryLocalSplit(LiveInterval &, AllocationOrder &,
+ unsigned tryLocalSplit(const LiveInterval &, AllocationOrder &,
SmallVectorImpl<Register> &);
- unsigned trySplit(LiveInterval &, AllocationOrder &,
+ unsigned trySplit(const LiveInterval &, AllocationOrder &,
SmallVectorImpl<Register> &, const SmallVirtRegSet &);
- unsigned tryLastChanceRecoloring(LiveInterval &, AllocationOrder &,
+ unsigned tryLastChanceRecoloring(const LiveInterval &, AllocationOrder &,
SmallVectorImpl<Register> &,
- SmallVirtRegSet &, unsigned);
+ SmallVirtRegSet &, RecoloringStack &,
+ unsigned);
bool tryRecoloringCandidates(PQueue &, SmallVectorImpl<Register> &,
- SmallVirtRegSet &, unsigned);
- void tryHintRecoloring(LiveInterval &);
+ SmallVirtRegSet &, RecoloringStack &, unsigned);
+ void tryHintRecoloring(const LiveInterval &);
void tryHintsRecoloring();
/// Model the information carried by one end of a copy.
diff --git a/llvm/lib/CodeGen/RegAllocPBQP.cpp b/llvm/lib/CodeGen/RegAllocPBQP.cpp
index 93be8f689d57..8c262130fb70 100644
--- a/llvm/lib/CodeGen/RegAllocPBQP.cpp
+++ b/llvm/lib/CodeGen/RegAllocPBQP.cpp
@@ -847,6 +847,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
while (!PBQPAllocComplete) {
LLVM_DEBUG(dbgs() << " PBQP Regalloc round " << Round << ":\n");
+ (void) Round;
PBQPRAGraph G(PBQPRAGraph::GraphMetadata(MF, LIS, MBFI));
initializeGraph(G, VRM, *VRegSpiller);
diff --git a/llvm/lib/CodeGen/RegAllocScore.cpp b/llvm/lib/CodeGen/RegAllocScore.cpp
index 740890831617..32fa5e07dd16 100644
--- a/llvm/lib/CodeGen/RegAllocScore.cpp
+++ b/llvm/lib/CodeGen/RegAllocScore.cpp
@@ -13,19 +13,19 @@
//===----------------------------------------------------------------------===//
#include "RegAllocScore.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/STLForwardCompat.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/ADT/ilist_iterator.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBundleIterator.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
-#include <cassert>
-#include <cstdint>
-#include <numeric>
-#include <vector>
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
cl::opt<double> CopyWeight("regalloc-copy-weight", cl::init(0.2), cl::Hidden);
diff --git a/llvm/lib/CodeGen/RegAllocScore.h b/llvm/lib/CodeGen/RegAllocScore.h
index 3c28bb61189d..2bcd0b5895bf 100644
--- a/llvm/lib/CodeGen/RegAllocScore.h
+++ b/llvm/lib/CodeGen/RegAllocScore.h
@@ -15,21 +15,16 @@
#ifndef LLVM_CODEGEN_REGALLOCSCORE_H_
#define LLVM_CODEGEN_REGALLOCSCORE_H_
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/Analysis/ProfileSummaryInfo.h"
-#include "llvm/Analysis/Utils/TFUtils.h"
-#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/IR/Module.h"
-#include <cassert>
-#include <cstdint>
-#include <limits>
+#include "llvm/ADT/STLFunctionalExtras.h"
namespace llvm {
+class AAResults;
+class MachineBasicBlock;
+class MachineBlockFrequencyInfo;
+class MachineFunction;
+class MachineInstr;
+
/// Regalloc score.
class RegAllocScore final {
double CopyCounts = 0.0;
diff --git a/llvm/lib/CodeGen/RegUsageInfoCollector.cpp b/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
index 5a79ac44dcf4..16afd15e29e4 100644
--- a/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
+++ b/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
@@ -17,16 +17,15 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegisterUsageInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/CodeGen/TargetFrameLowering.h"
using namespace llvm;
diff --git a/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp b/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp
index 800d952469a5..d356962e0d78 100644
--- a/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp
+++ b/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp
@@ -19,8 +19,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
@@ -29,7 +29,6 @@
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
using namespace llvm;
diff --git a/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp b/llvm/lib/CodeGen/RegisterBank.cpp
index 5c4d18ad79c5..512b21aeacaf 100644
--- a/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp
+++ b/llvm/lib/CodeGen/RegisterBank.cpp
@@ -9,7 +9,7 @@
/// This file implements the RegisterBank class.
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/CodeGen/RegisterBank.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/Config/llvm-config.h"
diff --git a/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/llvm/lib/CodeGen/RegisterBankInfo.cpp
index 650500c7eb31..de851ffc7fdc 100644
--- a/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
+++ b/llvm/lib/CodeGen/RegisterBankInfo.cpp
@@ -9,20 +9,17 @@
/// This file implements the RegisterBankInfo class.
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
-#include "llvm/ADT/SmallString.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/iterator_range.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterBank.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
-#include "llvm/IR/Type.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/llvm/lib/CodeGen/RegisterClassInfo.cpp b/llvm/lib/CodeGen/RegisterClassInfo.cpp
index 65a65b9cae95..374fcc9a6014 100644
--- a/llvm/lib/CodeGen/RegisterClassInfo.cpp
+++ b/llvm/lib/CodeGen/RegisterClassInfo.cpp
@@ -19,7 +19,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
@@ -44,9 +43,11 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
bool Update = false;
MF = &mf;
+ auto &STI = MF->getSubtarget();
+
// Allocate new array the first time we see a new target.
- if (MF->getSubtarget().getRegisterInfo() != TRI) {
- TRI = MF->getSubtarget().getRegisterInfo();
+ if (STI.getRegisterInfo() != TRI) {
+ TRI = STI.getRegisterInfo();
RegClass.reset(new RCInfo[TRI->getNumRegClasses()]);
Update = true;
}
@@ -68,6 +69,18 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
}
CalleeSavedRegs = CSR;
+ // Even if CSR list is same, we could have had a different allocation order
+ // if ignoreCSRForAllocationOrder is evaluated differently.
+ BitVector CSRHintsForAllocOrder(TRI->getNumRegs());
+ for (const MCPhysReg *I = CSR; *I; ++I)
+ for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI)
+ CSRHintsForAllocOrder[*AI] = STI.ignoreCSRForAllocationOrder(mf, *AI);
+ if (IgnoreCSRForAllocOrder.size() != CSRHintsForAllocOrder.size() ||
+ IgnoreCSRForAllocOrder != CSRHintsForAllocOrder) {
+ Update = true;
+ IgnoreCSRForAllocOrder = CSRHintsForAllocOrder;
+ }
+
RegCosts = TRI->getRegisterCosts(*MF);
// Different reserved registers?
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index a917b0d27d4a..930d05324440 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -1647,7 +1647,7 @@ MachineInstr *RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
for (unsigned i = CopyMI->getNumOperands(); i != 0; --i) {
MachineOperand &MO = CopyMI->getOperand(i-1);
if (MO.isReg() && MO.isUse())
- CopyMI->RemoveOperand(i-1);
+ CopyMI->removeOperand(i-1);
}
LLVM_DEBUG(dbgs() << "\tReplaced copy of <undef> value with an "
"implicit def\n");
diff --git a/llvm/lib/CodeGen/RegisterScavenging.cpp b/llvm/lib/CodeGen/RegisterScavenging.cpp
index 424ad7419165..289d31be2d2d 100644
--- a/llvm/lib/CodeGen/RegisterScavenging.cpp
+++ b/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -37,11 +37,9 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
#include <cassert>
#include <iterator>
#include <limits>
-#include <string>
#include <utility>
using namespace llvm;
diff --git a/llvm/lib/CodeGen/RegisterUsageInfo.cpp b/llvm/lib/CodeGen/RegisterUsageInfo.cpp
index 6858d7233bc5..9d9cdf9edbb3 100644
--- a/llvm/lib/CodeGen/RegisterUsageInfo.cpp
+++ b/llvm/lib/CodeGen/RegisterUsageInfo.cpp
@@ -22,8 +22,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
-#include <algorithm>
-#include <cassert>
#include <cstdint>
#include <utility>
#include <vector>
diff --git a/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp b/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp
index 49859aeec78b..01886e40a4a3 100644
--- a/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp
+++ b/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp
@@ -12,13 +12,12 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Function.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
/// \file RemoveRedundantDebugValues.cpp
///
diff --git a/llvm/lib/CodeGen/RenameIndependentSubregs.cpp b/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
index 0872ec303460..466022ae0ac1 100644
--- a/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
+++ b/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
@@ -33,9 +33,9 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
using namespace llvm;
diff --git a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
index 0ff045fa787e..87b8ac59bdba 100644
--- a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
+++ b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
@@ -1,4 +1,4 @@
-//=== ReplaceWithVeclib.cpp - Replace vector instrinsics with veclib calls ===//
+//=== ReplaceWithVeclib.cpp - Replace vector intrinsics with veclib calls -===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -23,7 +23,6 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
using namespace llvm;
@@ -110,7 +109,7 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
auto *ArgType = Arg.value()->getType();
// Vector calls to intrinsics can still have
// scalar operands for specific arguments.
- if (hasVectorInstrinsicScalarOpd(IntrinsicID, Arg.index())) {
+ if (isVectorIntrinsicWithScalarOpAtArg(IntrinsicID, Arg.index())) {
ScalarTypes.push_back(ArgType);
} else {
// The argument in this place should be a vector if
diff --git a/llvm/lib/CodeGen/SafeStack.cpp b/llvm/lib/CodeGen/SafeStack.cpp
index 3d8a7eecce18..e7116ec3ea28 100644
--- a/llvm/lib/CodeGen/SafeStack.cpp
+++ b/llvm/lib/CodeGen/SafeStack.cpp
@@ -17,7 +17,6 @@
#include "SafeStackLayout.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -49,10 +48,10 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
-#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -97,31 +96,12 @@ static cl::opt<bool>
SafeStackUsePointerAddress("safestack-use-pointer-address",
cl::init(false), cl::Hidden);
-// Disabled by default due to PR32143.
static cl::opt<bool> ClColoring("safe-stack-coloring",
cl::desc("enable safe stack coloring"),
- cl::Hidden, cl::init(false));
+ cl::Hidden, cl::init(true));
namespace {
-/// Rewrite an SCEV expression for a memory access address to an expression that
-/// represents offset from the given alloca.
-///
-/// The implementation simply replaces all mentions of the alloca with zero.
-class AllocaOffsetRewriter : public SCEVRewriteVisitor<AllocaOffsetRewriter> {
- const Value *AllocaPtr;
-
-public:
- AllocaOffsetRewriter(ScalarEvolution &SE, const Value *AllocaPtr)
- : SCEVRewriteVisitor(SE), AllocaPtr(AllocaPtr) {}
-
- const SCEV *visitUnknown(const SCEVUnknown *Expr) {
- if (Expr->getValue() == AllocaPtr)
- return SE.getZero(Expr->getType());
- return Expr;
- }
-};
-
/// The SafeStack pass splits the stack of each function into the safe
/// stack, which is only accessed through memory safe dereferences (as
/// determined statically), and the unsafe stack, which contains all
@@ -147,7 +127,7 @@ class SafeStack {
///
/// 16 seems like a reasonable upper bound on the alignment of objects that we
/// might expect to appear on the stack on most common targets.
- static constexpr uint64_t StackAlignment = 16;
+ static constexpr Align StackAlignment = Align::Constant<16>();
/// Return the value of the stack canary.
Value *getStackGuard(IRBuilder<> &IRB, Function &F);
@@ -221,7 +201,7 @@ public:
bool run();
};
-constexpr uint64_t SafeStack::StackAlignment;
+constexpr Align SafeStack::StackAlignment;
uint64_t SafeStack::getStaticAllocaAllocationSize(const AllocaInst* AI) {
uint64_t Size = DL.getTypeAllocSize(AI->getAllocatedType());
@@ -236,9 +216,18 @@ uint64_t SafeStack::getStaticAllocaAllocationSize(const AllocaInst* AI) {
bool SafeStack::IsAccessSafe(Value *Addr, uint64_t AccessSize,
const Value *AllocaPtr, uint64_t AllocaSize) {
- AllocaOffsetRewriter Rewriter(SE, AllocaPtr);
- const SCEV *Expr = Rewriter.visit(SE.getSCEV(Addr));
+ const SCEV *AddrExpr = SE.getSCEV(Addr);
+ const auto *Base = dyn_cast<SCEVUnknown>(SE.getPointerBase(AddrExpr));
+ if (!Base || Base->getValue() != AllocaPtr) {
+ LLVM_DEBUG(
+ dbgs() << "[SafeStack] "
+ << (isa<AllocaInst>(AllocaPtr) ? "Alloca " : "ByValArgument ")
+ << *AllocaPtr << "\n"
+ << "SCEV " << *AddrExpr << " not directly based on alloca\n");
+ return false;
+ }
+ const SCEV *Expr = SE.removePointerBase(AddrExpr);
uint64_t BitWidth = SE.getTypeSizeInBits(Expr->getType());
ConstantRange AccessStartRange = SE.getUnsignedRange(Expr);
ConstantRange SizeRange =
@@ -645,6 +634,13 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
// FIXME: no need to update BasePointer in leaf functions.
unsigned FrameSize = alignTo(SSL.getFrameSize(), StackAlignment);
+ MDBuilder MDB(F.getContext());
+ SmallVector<Metadata *, 2> Data;
+ Data.push_back(MDB.createString("unsafe-stack-size"));
+ Data.push_back(MDB.createConstant(ConstantInt::get(Int32Ty, FrameSize)));
+ MDNode *MD = MDTuple::get(F.getContext(), Data);
+ F.setMetadata(LLVMContext::MD_annotation, MD);
+
// Update shadow stack pointer in the function epilogue.
IRB.SetInsertPoint(BasePointer->getNextNode());
@@ -677,13 +673,12 @@ void SafeStack::moveDynamicAllocasToUnsafeStack(
SP = IRB.CreateSub(SP, Size);
// Align the SP value to satisfy the AllocaInst, type and stack alignments.
- uint64_t Align =
- std::max(std::max(DL.getPrefTypeAlignment(Ty), AI->getAlignment()),
- StackAlignment);
+ auto Align = std::max(std::max(DL.getPrefTypeAlign(Ty), AI->getAlign()),
+ StackAlignment);
- assert(isPowerOf2_32(Align));
Value *NewTop = IRB.CreateIntToPtr(
- IRB.CreateAnd(SP, ConstantInt::get(IntPtrTy, ~uint64_t(Align - 1))),
+ IRB.CreateAnd(SP,
+ ConstantInt::get(IntPtrTy, ~uint64_t(Align.value() - 1))),
StackPtrTy);
// Save the stack pointer.
diff --git a/llvm/lib/CodeGen/SafeStackLayout.cpp b/llvm/lib/CodeGen/SafeStackLayout.cpp
index 602afcfa9001..f821145f4b63 100644
--- a/llvm/lib/CodeGen/SafeStackLayout.cpp
+++ b/llvm/lib/CodeGen/SafeStackLayout.cpp
@@ -11,7 +11,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
diff --git a/llvm/lib/CodeGen/SafeStackLayout.h b/llvm/lib/CodeGen/SafeStackLayout.h
index 4ac7af2059f5..6126c7a67854 100644
--- a/llvm/lib/CodeGen/SafeStackLayout.h
+++ b/llvm/lib/CodeGen/SafeStackLayout.h
@@ -52,7 +52,7 @@ class StackLayout {
void layoutObject(StackObject &Obj);
public:
- StackLayout(uint64_t StackAlignment) : MaxAlignment(StackAlignment) {}
+ StackLayout(Align StackAlignment) : MaxAlignment(StackAlignment) {}
/// Add an object to the stack frame. Value pointer is opaque and used as a
/// handle to retrieve the object's offset in the frame later.
diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index 0e8e8338b46d..07dcc34fbf15 100644
--- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -14,7 +14,6 @@
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/ADT/IntEqClasses.h"
#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SparseSet.h"
#include "llvm/ADT/iterator_range.h"
@@ -40,9 +39,6 @@
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/MC/LaneBitmask.h"
@@ -65,9 +61,9 @@ using namespace llvm;
#define DEBUG_TYPE "machine-scheduler"
-static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
- cl::ZeroOrMore, cl::init(false),
- cl::desc("Enable use of AA during MI DAG construction"));
+static cl::opt<bool>
+ EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
+ cl::desc("Enable use of AA during MI DAG construction"));
static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden,
cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"));
diff --git a/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp b/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
index 05b2a3764cca..e7b14944acfe 100644
--- a/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -10,13 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/llvm/lib/CodeGen/SelectOptimize.cpp b/llvm/lib/CodeGen/SelectOptimize.cpp
new file mode 100644
index 000000000000..c199b6a6cca8
--- /dev/null
+++ b/llvm/lib/CodeGen/SelectOptimize.cpp
@@ -0,0 +1,989 @@
+//===--- SelectOptimize.cpp - Convert select to branches if profitable ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass converts selects to conditional jumps when profitable.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/ScaledNumber.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
+#include <algorithm>
+#include <memory>
+#include <queue>
+#include <stack>
+#include <string>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "select-optimize"
+
+STATISTIC(NumSelectOptAnalyzed,
+ "Number of select groups considered for conversion to branch");
+STATISTIC(NumSelectConvertedExpColdOperand,
+ "Number of select groups converted due to expensive cold operand");
+STATISTIC(NumSelectConvertedHighPred,
+ "Number of select groups converted due to high-predictability");
+STATISTIC(NumSelectUnPred,
+ "Number of select groups not converted due to unpredictability");
+STATISTIC(NumSelectColdBB,
+ "Number of select groups not converted due to cold basic block");
+STATISTIC(NumSelectConvertedLoop,
+ "Number of select groups converted due to loop-level analysis");
+STATISTIC(NumSelectsConverted, "Number of selects converted");
+
+static cl::opt<unsigned> ColdOperandThreshold(
+ "cold-operand-threshold",
+ cl::desc("Maximum frequency of path for an operand to be considered cold."),
+ cl::init(20), cl::Hidden);
+
+static cl::opt<unsigned> ColdOperandMaxCostMultiplier(
+ "cold-operand-max-cost-multiplier",
+ cl::desc("Maximum cost multiplier of TCC_expensive for the dependence "
+ "slice of a cold operand to be considered inexpensive."),
+ cl::init(1), cl::Hidden);
+
+static cl::opt<unsigned>
+ GainGradientThreshold("select-opti-loop-gradient-gain-threshold",
+ cl::desc("Gradient gain threshold (%)."),
+ cl::init(25), cl::Hidden);
+
+static cl::opt<unsigned>
+ GainCycleThreshold("select-opti-loop-cycle-gain-threshold",
+ cl::desc("Minimum gain per loop (in cycles) threshold."),
+ cl::init(4), cl::Hidden);
+
+static cl::opt<unsigned> GainRelativeThreshold(
+ "select-opti-loop-relative-gain-threshold",
+ cl::desc(
+ "Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"),
+ cl::init(8), cl::Hidden);
+
+static cl::opt<unsigned> MispredictDefaultRate(
+ "mispredict-default-rate", cl::Hidden, cl::init(25),
+ cl::desc("Default mispredict rate (initialized to 25%)."));
+
+static cl::opt<bool>
+ DisableLoopLevelHeuristics("disable-loop-level-heuristics", cl::Hidden,
+ cl::init(false),
+ cl::desc("Disable loop-level heuristics."));
+
+namespace {
+
+class SelectOptimize : public FunctionPass {
+ const TargetMachine *TM = nullptr;
+ const TargetSubtargetInfo *TSI;
+ const TargetLowering *TLI = nullptr;
+ const TargetTransformInfo *TTI = nullptr;
+ const LoopInfo *LI;
+ DominatorTree *DT;
+ std::unique_ptr<BlockFrequencyInfo> BFI;
+ std::unique_ptr<BranchProbabilityInfo> BPI;
+ ProfileSummaryInfo *PSI;
+ OptimizationRemarkEmitter *ORE;
+ TargetSchedModel TSchedModel;
+
+public:
+ static char ID;
+
+ SelectOptimize() : FunctionPass(ID) {
+ initializeSelectOptimizePass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ AU.addRequired<TargetPassConfig>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
+ }
+
+private:
+ // Select groups consist of consecutive select instructions with the same
+ // condition.
+ using SelectGroup = SmallVector<SelectInst *, 2>;
+ using SelectGroups = SmallVector<SelectGroup, 2>;
+
+ using Scaled64 = ScaledNumber<uint64_t>;
+
+ struct CostInfo {
+ /// Predicated cost (with selects as conditional moves).
+ Scaled64 PredCost;
+ /// Non-predicated cost (with selects converted to branches).
+ Scaled64 NonPredCost;
+ };
+
+ // Converts select instructions of a function to conditional jumps when deemed
+ // profitable. Returns true if at least one select was converted.
+ bool optimizeSelects(Function &F);
+
+ // Heuristics for determining which select instructions can be profitably
+ // conveted to branches. Separate heuristics for selects in inner-most loops
+ // and the rest of code regions (base heuristics for non-inner-most loop
+ // regions).
+ void optimizeSelectsBase(Function &F, SelectGroups &ProfSIGroups);
+ void optimizeSelectsInnerLoops(Function &F, SelectGroups &ProfSIGroups);
+
+ // Converts to branches the select groups that were deemed
+ // profitable-to-convert.
+ void convertProfitableSIGroups(SelectGroups &ProfSIGroups);
+
+ // Splits selects of a given basic block into select groups.
+ void collectSelectGroups(BasicBlock &BB, SelectGroups &SIGroups);
+
+ // Determines for which select groups it is profitable converting to branches
+ // (base and inner-most-loop heuristics).
+ void findProfitableSIGroupsBase(SelectGroups &SIGroups,
+ SelectGroups &ProfSIGroups);
+ void findProfitableSIGroupsInnerLoops(const Loop *L, SelectGroups &SIGroups,
+ SelectGroups &ProfSIGroups);
+
+ // Determines if a select group should be converted to a branch (base
+ // heuristics).
+ bool isConvertToBranchProfitableBase(const SmallVector<SelectInst *, 2> &ASI);
+
+ // Returns true if there are expensive instructions in the cold value
+ // operand's (if any) dependence slice of any of the selects of the given
+ // group.
+ bool hasExpensiveColdOperand(const SmallVector<SelectInst *, 2> &ASI);
+
+ // For a given source instruction, collect its backwards dependence slice
+ // consisting of instructions exclusively computed for producing the operands
+ // of the source instruction.
+ void getExclBackwardsSlice(Instruction *I, std::stack<Instruction *> &Slice,
+ bool ForSinking = false);
+
+ // Returns true if the condition of the select is highly predictable.
+ bool isSelectHighlyPredictable(const SelectInst *SI);
+
+ // Loop-level checks to determine if a non-predicated version (with branches)
+ // of the given loop is more profitable than its predicated version.
+ bool checkLoopHeuristics(const Loop *L, const CostInfo LoopDepth[2]);
+
+ // Computes instruction and loop-critical-path costs for both the predicated
+ // and non-predicated version of the given loop.
+ bool computeLoopCosts(const Loop *L, const SelectGroups &SIGroups,
+ DenseMap<const Instruction *, CostInfo> &InstCostMap,
+ CostInfo *LoopCost);
+
+ // Returns a set of all the select instructions in the given select groups.
+ SmallPtrSet<const Instruction *, 2> getSIset(const SelectGroups &SIGroups);
+
+ // Returns the latency cost of a given instruction.
+ Optional<uint64_t> computeInstCost(const Instruction *I);
+
+ // Returns the misprediction cost of a given select when converted to branch.
+ Scaled64 getMispredictionCost(const SelectInst *SI, const Scaled64 CondCost);
+
+ // Returns the cost of a branch when the prediction is correct.
+ Scaled64 getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
+ const SelectInst *SI);
+
+ // Returns true if the target architecture supports lowering a given select.
+ bool isSelectKindSupported(SelectInst *SI);
+};
+} // namespace
+
+char SelectOptimize::ID = 0;
+
+INITIALIZE_PASS_BEGIN(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
+INITIALIZE_PASS_END(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
+ false)
+
+FunctionPass *llvm::createSelectOptimizePass() { return new SelectOptimize(); }
+
+bool SelectOptimize::runOnFunction(Function &F) {
+ TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
+ TSI = TM->getSubtargetImpl(F);
+ TLI = TSI->getTargetLowering();
+
+ // If none of the select types is supported then skip this pass.
+ // This is an optimization pass. Legality issues will be handled by
+ // instruction selection.
+ if (!TLI->isSelectSupported(TargetLowering::ScalarValSelect) &&
+ !TLI->isSelectSupported(TargetLowering::ScalarCondVectorVal) &&
+ !TLI->isSelectSupported(TargetLowering::VectorMaskSelect))
+ return false;
+
+ TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ BPI.reset(new BranchProbabilityInfo(F, *LI));
+ BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
+ PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
+ TSchedModel.init(TSI);
+
+ // When optimizing for size, selects are preferable over branches.
+ if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI.get()))
+ return false;
+
+ return optimizeSelects(F);
+}
+
+bool SelectOptimize::optimizeSelects(Function &F) {
+ // Determine for which select groups it is profitable converting to branches.
+ SelectGroups ProfSIGroups;
+ // Base heuristics apply only to non-loops and outer loops.
+ optimizeSelectsBase(F, ProfSIGroups);
+ // Separate heuristics for inner-most loops.
+ optimizeSelectsInnerLoops(F, ProfSIGroups);
+
+ // Convert to branches the select groups that were deemed
+ // profitable-to-convert.
+ convertProfitableSIGroups(ProfSIGroups);
+
+ // Code modified if at least one select group was converted.
+ return !ProfSIGroups.empty();
+}
+
+void SelectOptimize::optimizeSelectsBase(Function &F,
+ SelectGroups &ProfSIGroups) {
+ // Collect all the select groups.
+ SelectGroups SIGroups;
+ for (BasicBlock &BB : F) {
+ // Base heuristics apply only to non-loops and outer loops.
+ Loop *L = LI->getLoopFor(&BB);
+ if (L && L->isInnermost())
+ continue;
+ collectSelectGroups(BB, SIGroups);
+ }
+
+ // Determine for which select groups it is profitable converting to branches.
+ findProfitableSIGroupsBase(SIGroups, ProfSIGroups);
+}
+
+void SelectOptimize::optimizeSelectsInnerLoops(Function &F,
+ SelectGroups &ProfSIGroups) {
+ SmallVector<Loop *, 4> Loops(LI->begin(), LI->end());
+ // Need to check size on each iteration as we accumulate child loops.
+ for (unsigned long i = 0; i < Loops.size(); ++i)
+ for (Loop *ChildL : Loops[i]->getSubLoops())
+ Loops.push_back(ChildL);
+
+ for (Loop *L : Loops) {
+ if (!L->isInnermost())
+ continue;
+
+ SelectGroups SIGroups;
+ for (BasicBlock *BB : L->getBlocks())
+ collectSelectGroups(*BB, SIGroups);
+
+ findProfitableSIGroupsInnerLoops(L, SIGroups, ProfSIGroups);
+ }
+}
+
+/// If \p isTrue is true, return the true value of \p SI, otherwise return
+/// false value of \p SI. If the true/false value of \p SI is defined by any
+/// select instructions in \p Selects, look through the defining select
+/// instruction until the true/false value is not defined in \p Selects.
+static Value *
+getTrueOrFalseValue(SelectInst *SI, bool isTrue,
+ const SmallPtrSet<const Instruction *, 2> &Selects) {
+ Value *V = nullptr;
+ for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
+ DefSI = dyn_cast<SelectInst>(V)) {
+ assert(DefSI->getCondition() == SI->getCondition() &&
+ "The condition of DefSI does not match with SI");
+ V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
+ }
+ assert(V && "Failed to get select true/false value");
+ return V;
+}
+
+void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
+ for (SelectGroup &ASI : ProfSIGroups) {
+ // The code transformation here is a modified version of the sinking
+ // transformation in CodeGenPrepare::optimizeSelectInst with a more
+ // aggressive strategy of which instructions to sink.
+ //
+ // TODO: eliminate the redundancy of logic transforming selects to branches
+ // by removing CodeGenPrepare::optimizeSelectInst and optimizing here
+ // selects for all cases (with and without profile information).
+
+ // Transform a sequence like this:
+ // start:
+ // %cmp = cmp uge i32 %a, %b
+ // %sel = select i1 %cmp, i32 %c, i32 %d
+ //
+ // Into:
+ // start:
+ // %cmp = cmp uge i32 %a, %b
+ // %cmp.frozen = freeze %cmp
+ // br i1 %cmp.frozen, label %select.true, label %select.false
+ // select.true:
+ // br label %select.end
+ // select.false:
+ // br label %select.end
+ // select.end:
+ // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
+ //
+ // %cmp should be frozen, otherwise it may introduce undefined behavior.
+ // In addition, we may sink instructions that produce %c or %d into the
+ // destination(s) of the new branch.
+ // If the true or false blocks do not contain a sunken instruction, that
+ // block and its branch may be optimized away. In that case, one side of the
+ // first branch will point directly to select.end, and the corresponding PHI
+ // predecessor block will be the start block.
+
+ // Find all the instructions that can be soundly sunk to the true/false
+ // blocks. These are instructions that are computed solely for producing the
+ // operands of the select instructions in the group and can be sunk without
+ // breaking the semantics of the LLVM IR (e.g., cannot sink instructions
+ // with side effects).
+ SmallVector<std::stack<Instruction *>, 2> TrueSlices, FalseSlices;
+ typedef std::stack<Instruction *>::size_type StackSizeType;
+ StackSizeType maxTrueSliceLen = 0, maxFalseSliceLen = 0;
+ for (SelectInst *SI : ASI) {
+ // For each select, compute the sinkable dependence chains of the true and
+ // false operands.
+ if (auto *TI = dyn_cast<Instruction>(SI->getTrueValue())) {
+ std::stack<Instruction *> TrueSlice;
+ getExclBackwardsSlice(TI, TrueSlice, true);
+ maxTrueSliceLen = std::max(maxTrueSliceLen, TrueSlice.size());
+ TrueSlices.push_back(TrueSlice);
+ }
+ if (auto *FI = dyn_cast<Instruction>(SI->getFalseValue())) {
+ std::stack<Instruction *> FalseSlice;
+ getExclBackwardsSlice(FI, FalseSlice, true);
+ maxFalseSliceLen = std::max(maxFalseSliceLen, FalseSlice.size());
+ FalseSlices.push_back(FalseSlice);
+ }
+ }
+ // In the case of multiple select instructions in the same group, the order
+ // of non-dependent instructions (instructions of different dependence
+ // slices) in the true/false blocks appears to affect performance.
+ // Interleaving the slices seems to experimentally be the optimal approach.
+ // This interleaving scheduling allows for more ILP (with a natural downside
+ // of increasing a bit register pressure) compared to a simple ordering of
+ // one whole chain after another. One would expect that this ordering would
+ // not matter since the scheduling in the backend of the compiler would
+ // take care of it, but apparently the scheduler fails to deliver optimal
+ // ILP with a naive ordering here.
+ SmallVector<Instruction *, 2> TrueSlicesInterleaved, FalseSlicesInterleaved;
+ for (StackSizeType IS = 0; IS < maxTrueSliceLen; ++IS) {
+ for (auto &S : TrueSlices) {
+ if (!S.empty()) {
+ TrueSlicesInterleaved.push_back(S.top());
+ S.pop();
+ }
+ }
+ }
+ for (StackSizeType IS = 0; IS < maxFalseSliceLen; ++IS) {
+ for (auto &S : FalseSlices) {
+ if (!S.empty()) {
+ FalseSlicesInterleaved.push_back(S.top());
+ S.pop();
+ }
+ }
+ }
+
+ // We split the block containing the select(s) into two blocks.
+ SelectInst *SI = ASI.front();
+ SelectInst *LastSI = ASI.back();
+ BasicBlock *StartBlock = SI->getParent();
+ BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI));
+ BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
+ BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock).getFrequency());
+ // Delete the unconditional branch that was just created by the split.
+ StartBlock->getTerminator()->eraseFromParent();
+
+ // Move any debug/pseudo instructions that were in-between the select
+ // group to the newly-created end block.
+ SmallVector<Instruction *, 2> DebugPseudoINS;
+ auto DIt = SI->getIterator();
+ while (&*DIt != LastSI) {
+ if (DIt->isDebugOrPseudoInst())
+ DebugPseudoINS.push_back(&*DIt);
+ DIt++;
+ }
+ for (auto DI : DebugPseudoINS) {
+ DI->moveBefore(&*EndBlock->getFirstInsertionPt());
+ }
+
+ // These are the new basic blocks for the conditional branch.
+ // At least one will become an actual new basic block.
+ BasicBlock *TrueBlock = nullptr, *FalseBlock = nullptr;
+ BranchInst *TrueBranch = nullptr, *FalseBranch = nullptr;
+ if (!TrueSlicesInterleaved.empty()) {
+ TrueBlock = BasicBlock::Create(LastSI->getContext(), "select.true.sink",
+ EndBlock->getParent(), EndBlock);
+ TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
+ TrueBranch->setDebugLoc(LastSI->getDebugLoc());
+ for (Instruction *TrueInst : TrueSlicesInterleaved)
+ TrueInst->moveBefore(TrueBranch);
+ }
+ if (!FalseSlicesInterleaved.empty()) {
+ FalseBlock = BasicBlock::Create(LastSI->getContext(), "select.false.sink",
+ EndBlock->getParent(), EndBlock);
+ FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
+ FalseBranch->setDebugLoc(LastSI->getDebugLoc());
+ for (Instruction *FalseInst : FalseSlicesInterleaved)
+ FalseInst->moveBefore(FalseBranch);
+ }
+ // If there was nothing to sink, then arbitrarily choose the 'false' side
+ // for a new input value to the PHI.
+ if (TrueBlock == FalseBlock) {
+ assert(TrueBlock == nullptr &&
+ "Unexpected basic block transform while optimizing select");
+
+ FalseBlock = BasicBlock::Create(SI->getContext(), "select.false",
+ EndBlock->getParent(), EndBlock);
+ auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
+ FalseBranch->setDebugLoc(SI->getDebugLoc());
+ }
+
+ // Insert the real conditional branch based on the original condition.
+ // If we did not create a new block for one of the 'true' or 'false' paths
+ // of the condition, it means that side of the branch goes to the end block
+ // directly and the path originates from the start block from the point of
+ // view of the new PHI.
+ BasicBlock *TT, *FT;
+ if (TrueBlock == nullptr) {
+ TT = EndBlock;
+ FT = FalseBlock;
+ TrueBlock = StartBlock;
+ } else if (FalseBlock == nullptr) {
+ TT = TrueBlock;
+ FT = EndBlock;
+ FalseBlock = StartBlock;
+ } else {
+ TT = TrueBlock;
+ FT = FalseBlock;
+ }
+ IRBuilder<> IB(SI);
+ auto *CondFr =
+ IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen");
+ IB.CreateCondBr(CondFr, TT, FT, SI);
+
+ SmallPtrSet<const Instruction *, 2> INS;
+ INS.insert(ASI.begin(), ASI.end());
+ // Use reverse iterator because later select may use the value of the
+ // earlier select, and we need to propagate value through earlier select
+ // to get the PHI operand.
+ for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) {
+ SelectInst *SI = *It;
+ // The select itself is replaced with a PHI Node.
+ PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front());
+ PN->takeName(SI);
+ PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
+ PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
+ PN->setDebugLoc(SI->getDebugLoc());
+
+ SI->replaceAllUsesWith(PN);
+ SI->eraseFromParent();
+ INS.erase(SI);
+ ++NumSelectsConverted;
+ }
+ }
+}
+
+void SelectOptimize::collectSelectGroups(BasicBlock &BB,
+ SelectGroups &SIGroups) {
+ BasicBlock::iterator BBIt = BB.begin();
+ while (BBIt != BB.end()) {
+ Instruction *I = &*BBIt++;
+ if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
+ SelectGroup SIGroup;
+ SIGroup.push_back(SI);
+ while (BBIt != BB.end()) {
+ Instruction *NI = &*BBIt;
+ SelectInst *NSI = dyn_cast<SelectInst>(NI);
+ if (NSI && SI->getCondition() == NSI->getCondition()) {
+ SIGroup.push_back(NSI);
+ } else if (!NI->isDebugOrPseudoInst()) {
+ // Debug/pseudo instructions should be skipped and not prevent the
+ // formation of a select group.
+ break;
+ }
+ ++BBIt;
+ }
+
+ // If the select type is not supported, no point optimizing it.
+ // Instruction selection will take care of it.
+ if (!isSelectKindSupported(SI))
+ continue;
+
+ SIGroups.push_back(SIGroup);
+ }
+ }
+}
+
+void SelectOptimize::findProfitableSIGroupsBase(SelectGroups &SIGroups,
+ SelectGroups &ProfSIGroups) {
+ for (SelectGroup &ASI : SIGroups) {
+ ++NumSelectOptAnalyzed;
+ if (isConvertToBranchProfitableBase(ASI))
+ ProfSIGroups.push_back(ASI);
+ }
+}
+
+void SelectOptimize::findProfitableSIGroupsInnerLoops(
+ const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
+ NumSelectOptAnalyzed += SIGroups.size();
+ // For each select group in an inner-most loop,
+ // a branch is more preferable than a select/conditional-move if:
+ // i) conversion to branches for all the select groups of the loop satisfies
+ // loop-level heuristics including reducing the loop's critical path by
+ // some threshold (see SelectOptimize::checkLoopHeuristics); and
+ // ii) the total cost of the select group is cheaper with a branch compared
+ // to its predicated version. The cost is in terms of latency and the cost
+ // of a select group is the cost of its most expensive select instruction
+ // (assuming infinite resources and thus fully leveraging available ILP).
+
+ DenseMap<const Instruction *, CostInfo> InstCostMap;
+ CostInfo LoopCost[2] = {{Scaled64::getZero(), Scaled64::getZero()},
+ {Scaled64::getZero(), Scaled64::getZero()}};
+ if (!computeLoopCosts(L, SIGroups, InstCostMap, LoopCost) ||
+ !checkLoopHeuristics(L, LoopCost)) {
+ return;
+ }
+
+ for (SelectGroup &ASI : SIGroups) {
+ // Assuming infinite resources, the cost of a group of instructions is the
+ // cost of the most expensive instruction of the group.
+ Scaled64 SelectCost = Scaled64::getZero(), BranchCost = Scaled64::getZero();
+ for (SelectInst *SI : ASI) {
+ SelectCost = std::max(SelectCost, InstCostMap[SI].PredCost);
+ BranchCost = std::max(BranchCost, InstCostMap[SI].NonPredCost);
+ }
+ if (BranchCost < SelectCost) {
+ OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", ASI.front());
+ OR << "Profitable to convert to branch (loop analysis). BranchCost="
+ << BranchCost.toString() << ", SelectCost=" << SelectCost.toString()
+ << ". ";
+ ORE->emit(OR);
+ ++NumSelectConvertedLoop;
+ ProfSIGroups.push_back(ASI);
+ } else {
+ OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", ASI.front());
+ ORmiss << "Select is more profitable (loop analysis). BranchCost="
+ << BranchCost.toString()
+ << ", SelectCost=" << SelectCost.toString() << ". ";
+ ORE->emit(ORmiss);
+ }
+ }
+}
+
+bool SelectOptimize::isConvertToBranchProfitableBase(
+ const SmallVector<SelectInst *, 2> &ASI) {
+ SelectInst *SI = ASI.front();
+ OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", SI);
+ OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", SI);
+
+ // Skip cold basic blocks. Better to optimize for size for cold blocks.
+ if (PSI->isColdBlock(SI->getParent(), BFI.get())) {
+ ++NumSelectColdBB;
+ ORmiss << "Not converted to branch because of cold basic block. ";
+ ORE->emit(ORmiss);
+ return false;
+ }
+
+ // If unpredictable, branch form is less profitable.
+ if (SI->getMetadata(LLVMContext::MD_unpredictable)) {
+ ++NumSelectUnPred;
+ ORmiss << "Not converted to branch because of unpredictable branch. ";
+ ORE->emit(ORmiss);
+ return false;
+ }
+
+ // If highly predictable, branch form is more profitable, unless a
+ // predictable select is inexpensive in the target architecture.
+ if (isSelectHighlyPredictable(SI) && TLI->isPredictableSelectExpensive()) {
+ ++NumSelectConvertedHighPred;
+ OR << "Converted to branch because of highly predictable branch. ";
+ ORE->emit(OR);
+ return true;
+ }
+
+ // Look for expensive instructions in the cold operand's (if any) dependence
+ // slice of any of the selects in the group.
+ if (hasExpensiveColdOperand(ASI)) {
+ ++NumSelectConvertedExpColdOperand;
+ OR << "Converted to branch because of expensive cold operand.";
+ ORE->emit(OR);
+ return true;
+ }
+
+ ORmiss << "Not profitable to convert to branch (base heuristic).";
+ ORE->emit(ORmiss);
+ return false;
+}
+
+static InstructionCost divideNearest(InstructionCost Numerator,
+ uint64_t Denominator) {
+ return (Numerator + (Denominator / 2)) / Denominator;
+}
+
+bool SelectOptimize::hasExpensiveColdOperand(
+ const SmallVector<SelectInst *, 2> &ASI) {
+ bool ColdOperand = false;
+ uint64_t TrueWeight, FalseWeight, TotalWeight;
+ if (ASI.front()->extractProfMetadata(TrueWeight, FalseWeight)) {
+ uint64_t MinWeight = std::min(TrueWeight, FalseWeight);
+ TotalWeight = TrueWeight + FalseWeight;
+ // Is there a path with frequency <ColdOperandThreshold% (default:20%) ?
+ ColdOperand = TotalWeight * ColdOperandThreshold > 100 * MinWeight;
+ } else if (PSI->hasProfileSummary()) {
+ OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", ASI.front());
+ ORmiss << "Profile data available but missing branch-weights metadata for "
+ "select instruction. ";
+ ORE->emit(ORmiss);
+ }
+ if (!ColdOperand)
+ return false;
+ // Check if the cold path's dependence slice is expensive for any of the
+ // selects of the group.
+ for (SelectInst *SI : ASI) {
+ Instruction *ColdI = nullptr;
+ uint64_t HotWeight;
+ if (TrueWeight < FalseWeight) {
+ ColdI = dyn_cast<Instruction>(SI->getTrueValue());
+ HotWeight = FalseWeight;
+ } else {
+ ColdI = dyn_cast<Instruction>(SI->getFalseValue());
+ HotWeight = TrueWeight;
+ }
+ if (ColdI) {
+ std::stack<Instruction *> ColdSlice;
+ getExclBackwardsSlice(ColdI, ColdSlice);
+ InstructionCost SliceCost = 0;
+ while (!ColdSlice.empty()) {
+ SliceCost += TTI->getInstructionCost(ColdSlice.top(),
+ TargetTransformInfo::TCK_Latency);
+ ColdSlice.pop();
+ }
+ // The colder the cold value operand of the select is the more expensive
+ // the cmov becomes for computing the cold value operand every time. Thus,
+ // the colder the cold operand is the more its cost counts.
+ // Get nearest integer cost adjusted for coldness.
+ InstructionCost AdjSliceCost =
+ divideNearest(SliceCost * HotWeight, TotalWeight);
+ if (AdjSliceCost >=
+ ColdOperandMaxCostMultiplier * TargetTransformInfo::TCC_Expensive)
+ return true;
+ }
+ }
+ return false;
+}
+
+// For a given source instruction, collect its backwards dependence slice
+// consisting of instructions exclusively computed for the purpose of producing
+// the operands of the source instruction. As an approximation
+// (sufficiently-accurate in practice), we populate this set with the
+// instructions of the backwards dependence slice that only have one-use and
+// form an one-use chain that leads to the source instruction.
+void SelectOptimize::getExclBackwardsSlice(Instruction *I,
+ std::stack<Instruction *> &Slice,
+ bool ForSinking) {
+ SmallPtrSet<Instruction *, 2> Visited;
+ std::queue<Instruction *> Worklist;
+ Worklist.push(I);
+ while (!Worklist.empty()) {
+ Instruction *II = Worklist.front();
+ Worklist.pop();
+
+ // Avoid cycles.
+ if (!Visited.insert(II).second)
+ continue;
+
+ if (!II->hasOneUse())
+ continue;
+
+ // Cannot soundly sink instructions with side-effects.
+ // Terminator or phi instructions cannot be sunk.
+ // Avoid sinking other select instructions (should be handled separetely).
+ if (ForSinking && (II->isTerminator() || II->mayHaveSideEffects() ||
+ isa<SelectInst>(II) || isa<PHINode>(II)))
+ continue;
+
+ // Avoid considering instructions with less frequency than the source
+ // instruction (i.e., avoid colder code regions of the dependence slice).
+ if (BFI->getBlockFreq(II->getParent()) < BFI->getBlockFreq(I->getParent()))
+ continue;
+
+ // Eligible one-use instruction added to the dependence slice.
+ Slice.push(II);
+
+ // Explore all the operands of the current instruction to expand the slice.
+ for (unsigned k = 0; k < II->getNumOperands(); ++k)
+ if (auto *OpI = dyn_cast<Instruction>(II->getOperand(k)))
+ Worklist.push(OpI);
+ }
+}
+
+bool SelectOptimize::isSelectHighlyPredictable(const SelectInst *SI) {
+ uint64_t TrueWeight, FalseWeight;
+ if (SI->extractProfMetadata(TrueWeight, FalseWeight)) {
+ uint64_t Max = std::max(TrueWeight, FalseWeight);
+ uint64_t Sum = TrueWeight + FalseWeight;
+ if (Sum != 0) {
+ auto Probability = BranchProbability::getBranchProbability(Max, Sum);
+ if (Probability > TTI->getPredictableBranchThreshold())
+ return true;
+ }
+ }
+ return false;
+}
+
+bool SelectOptimize::checkLoopHeuristics(const Loop *L,
+ const CostInfo LoopCost[2]) {
+ // Loop-level checks to determine if a non-predicated version (with branches)
+ // of the loop is more profitable than its predicated version.
+
+ if (DisableLoopLevelHeuristics)
+ return true;
+
+ OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti",
+ L->getHeader()->getFirstNonPHI());
+
+ if (LoopCost[0].NonPredCost > LoopCost[0].PredCost ||
+ LoopCost[1].NonPredCost >= LoopCost[1].PredCost) {
+ ORmissL << "No select conversion in the loop due to no reduction of loop's "
+ "critical path. ";
+ ORE->emit(ORmissL);
+ return false;
+ }
+
+ Scaled64 Gain[2] = {LoopCost[0].PredCost - LoopCost[0].NonPredCost,
+ LoopCost[1].PredCost - LoopCost[1].NonPredCost};
+
+ // Profitably converting to branches need to reduce the loop's critical path
+ // by at least some threshold (absolute gain of GainCycleThreshold cycles and
+ // relative gain of 12.5%).
+ if (Gain[1] < Scaled64::get(GainCycleThreshold) ||
+ Gain[1] * Scaled64::get(GainRelativeThreshold) < LoopCost[1].PredCost) {
+ Scaled64 RelativeGain = Scaled64::get(100) * Gain[1] / LoopCost[1].PredCost;
+ ORmissL << "No select conversion in the loop due to small reduction of "
+ "loop's critical path. Gain="
+ << Gain[1].toString()
+ << ", RelativeGain=" << RelativeGain.toString() << "%. ";
+ ORE->emit(ORmissL);
+ return false;
+ }
+
+ // If the loop's critical path involves loop-carried dependences, the gradient
+ // of the gain needs to be at least GainGradientThreshold% (defaults to 25%).
+ // This check ensures that the latency reduction for the loop's critical path
+ // keeps decreasing with sufficient rate beyond the two analyzed loop
+ // iterations.
+ if (Gain[1] > Gain[0]) {
+ Scaled64 GradientGain = Scaled64::get(100) * (Gain[1] - Gain[0]) /
+ (LoopCost[1].PredCost - LoopCost[0].PredCost);
+ if (GradientGain < Scaled64::get(GainGradientThreshold)) {
+ ORmissL << "No select conversion in the loop due to small gradient gain. "
+ "GradientGain="
+ << GradientGain.toString() << "%. ";
+ ORE->emit(ORmissL);
+ return false;
+ }
+ }
+ // If the gain decreases it is not profitable to convert.
+ else if (Gain[1] < Gain[0]) {
+ ORmissL
+ << "No select conversion in the loop due to negative gradient gain. ";
+ ORE->emit(ORmissL);
+ return false;
+ }
+
+ // Non-predicated version of the loop is more profitable than its
+ // predicated version.
+ return true;
+}
+
+// Computes instruction and loop-critical-path costs for both the predicated
+// and non-predicated version of the given loop.
+// Returns false if unable to compute these costs due to invalid cost of loop
+// instruction(s).
+bool SelectOptimize::computeLoopCosts(
+ const Loop *L, const SelectGroups &SIGroups,
+ DenseMap<const Instruction *, CostInfo> &InstCostMap, CostInfo *LoopCost) {
+ const auto &SIset = getSIset(SIGroups);
+ // Compute instruction and loop-critical-path costs across two iterations for
+ // both predicated and non-predicated version.
+ const unsigned Iterations = 2;
+ for (unsigned Iter = 0; Iter < Iterations; ++Iter) {
+ // Cost of the loop's critical path.
+ CostInfo &MaxCost = LoopCost[Iter];
+ for (BasicBlock *BB : L->getBlocks()) {
+ for (const Instruction &I : *BB) {
+ if (I.isDebugOrPseudoInst())
+ continue;
+ // Compute the predicated and non-predicated cost of the instruction.
+ Scaled64 IPredCost = Scaled64::getZero(),
+ INonPredCost = Scaled64::getZero();
+
+ // Assume infinite resources that allow to fully exploit the available
+ // instruction-level parallelism.
+ // InstCost = InstLatency + max(Op1Cost, Op2Cost, … OpNCost)
+ for (const Use &U : I.operands()) {
+ auto UI = dyn_cast<Instruction>(U.get());
+ if (!UI)
+ continue;
+ if (InstCostMap.count(UI)) {
+ IPredCost = std::max(IPredCost, InstCostMap[UI].PredCost);
+ INonPredCost = std::max(INonPredCost, InstCostMap[UI].NonPredCost);
+ }
+ }
+ auto ILatency = computeInstCost(&I);
+ if (!ILatency) {
+ OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti", &I);
+ ORmissL << "Invalid instruction cost preventing analysis and "
+ "optimization of the inner-most loop containing this "
+ "instruction. ";
+ ORE->emit(ORmissL);
+ return false;
+ }
+ IPredCost += Scaled64::get(ILatency.getValue());
+ INonPredCost += Scaled64::get(ILatency.getValue());
+
+ // For a select that can be converted to branch,
+ // compute its cost as a branch (non-predicated cost).
+ //
+ // BranchCost = PredictedPathCost + MispredictCost
+ // PredictedPathCost = TrueOpCost * TrueProb + FalseOpCost * FalseProb
+ // MispredictCost = max(MispredictPenalty, CondCost) * MispredictRate
+ if (SIset.contains(&I)) {
+ auto SI = dyn_cast<SelectInst>(&I);
+
+ Scaled64 TrueOpCost = Scaled64::getZero(),
+ FalseOpCost = Scaled64::getZero();
+ if (auto *TI = dyn_cast<Instruction>(SI->getTrueValue()))
+ if (InstCostMap.count(TI))
+ TrueOpCost = InstCostMap[TI].NonPredCost;
+ if (auto *FI = dyn_cast<Instruction>(SI->getFalseValue()))
+ if (InstCostMap.count(FI))
+ FalseOpCost = InstCostMap[FI].NonPredCost;
+ Scaled64 PredictedPathCost =
+ getPredictedPathCost(TrueOpCost, FalseOpCost, SI);
+
+ Scaled64 CondCost = Scaled64::getZero();
+ if (auto *CI = dyn_cast<Instruction>(SI->getCondition()))
+ if (InstCostMap.count(CI))
+ CondCost = InstCostMap[CI].NonPredCost;
+ Scaled64 MispredictCost = getMispredictionCost(SI, CondCost);
+
+ INonPredCost = PredictedPathCost + MispredictCost;
+ }
+
+ InstCostMap[&I] = {IPredCost, INonPredCost};
+ MaxCost.PredCost = std::max(MaxCost.PredCost, IPredCost);
+ MaxCost.NonPredCost = std::max(MaxCost.NonPredCost, INonPredCost);
+ }
+ }
+ }
+ return true;
+}
+
+SmallPtrSet<const Instruction *, 2>
+SelectOptimize::getSIset(const SelectGroups &SIGroups) {
+ SmallPtrSet<const Instruction *, 2> SIset;
+ for (const SelectGroup &ASI : SIGroups)
+ for (const SelectInst *SI : ASI)
+ SIset.insert(SI);
+ return SIset;
+}
+
+Optional<uint64_t> SelectOptimize::computeInstCost(const Instruction *I) {
+ InstructionCost ICost =
+ TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency);
+ if (auto OC = ICost.getValue())
+ return Optional<uint64_t>(*OC);
+ return Optional<uint64_t>(None);
+}
+
+ScaledNumber<uint64_t>
+SelectOptimize::getMispredictionCost(const SelectInst *SI,
+ const Scaled64 CondCost) {
+ uint64_t MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty;
+
+ // Account for the default misprediction rate when using a branch
+ // (conservatively set to 25% by default).
+ uint64_t MispredictRate = MispredictDefaultRate;
+ // If the select condition is obviously predictable, then the misprediction
+ // rate is zero.
+ if (isSelectHighlyPredictable(SI))
+ MispredictRate = 0;
+
+ // CondCost is included to account for cases where the computation of the
+ // condition is part of a long dependence chain (potentially loop-carried)
+ // that would delay detection of a misprediction and increase its cost.
+ Scaled64 MispredictCost =
+ std::max(Scaled64::get(MispredictPenalty), CondCost) *
+ Scaled64::get(MispredictRate);
+ MispredictCost /= Scaled64::get(100);
+
+ return MispredictCost;
+}
+
+// Returns the cost of a branch when the prediction is correct.
+// TrueCost * TrueProbability + FalseCost * FalseProbability.
+ScaledNumber<uint64_t>
+SelectOptimize::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
+ const SelectInst *SI) {
+ Scaled64 PredPathCost;
+ uint64_t TrueWeight, FalseWeight;
+ if (SI->extractProfMetadata(TrueWeight, FalseWeight)) {
+ uint64_t SumWeight = TrueWeight + FalseWeight;
+ if (SumWeight != 0) {
+ PredPathCost = TrueCost * Scaled64::get(TrueWeight) +
+ FalseCost * Scaled64::get(FalseWeight);
+ PredPathCost /= Scaled64::get(SumWeight);
+ return PredPathCost;
+ }
+ }
+ // Without branch weight metadata, we assume 75% for the one path and 25% for
+ // the other, and pick the result with the biggest cost.
+ PredPathCost = std::max(TrueCost * Scaled64::get(3) + FalseCost,
+ FalseCost * Scaled64::get(3) + TrueCost);
+ PredPathCost /= Scaled64::get(4);
+ return PredPathCost;
+}
+
+bool SelectOptimize::isSelectKindSupported(SelectInst *SI) {
+ bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
+ if (VectorCond)
+ return false;
+ TargetLowering::SelectSupportKind SelectKind;
+ if (SI->getType()->isVectorTy())
+ SelectKind = TargetLowering::ScalarCondVectorVal;
+ else
+ SelectKind = TargetLowering::ScalarValSelect;
+ return TLI->isSelectSupported(SelectKind);
+}
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 041d7e5b4a4a..aa688d9dda3c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -35,7 +35,6 @@
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/DAGCombine.h"
#include "llvm/CodeGen/ISDOpcodes.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
@@ -52,7 +51,6 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
@@ -426,6 +424,7 @@ namespace {
SDValue visitREM(SDNode *N);
SDValue visitMULHU(SDNode *N);
SDValue visitMULHS(SDNode *N);
+ SDValue visitAVG(SDNode *N);
SDValue visitSMUL_LOHI(SDNode *N);
SDValue visitUMUL_LOHI(SDNode *N);
SDValue visitMULO(SDNode *N);
@@ -511,6 +510,7 @@ namespace {
SDValue visitMSCATTER(SDNode *N);
SDValue visitFP_TO_FP16(SDNode *N);
SDValue visitFP16_TO_FP(SDNode *N);
+ SDValue visitFP_TO_BF16(SDNode *N);
SDValue visitVECREDUCE(SDNode *N);
SDValue visitVPOp(SDNode *N);
@@ -520,7 +520,9 @@ namespace {
SDValue XformToShuffleWithZero(SDNode *N);
bool reassociationCanBreakAddressingModePattern(unsigned Opc,
- const SDLoc &DL, SDValue N0,
+ const SDLoc &DL,
+ SDNode *N,
+ SDValue N0,
SDValue N1);
SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
SDValue N1);
@@ -570,6 +572,8 @@ namespace {
SDValue BuildSDIV(SDNode *N);
SDValue BuildSDIVPow2(SDNode *N);
SDValue BuildUDIV(SDNode *N);
+ SDValue BuildSREMPow2(SDNode *N);
+ SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
@@ -583,11 +587,11 @@ namespace {
bool DemandHighBits = true);
SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
- SDValue InnerPos, SDValue InnerNeg,
+ SDValue InnerPos, SDValue InnerNeg, bool HasPos,
unsigned PosOpcode, unsigned NegOpcode,
const SDLoc &DL);
SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
- SDValue InnerPos, SDValue InnerNeg,
+ SDValue InnerPos, SDValue InnerNeg, bool HasPos,
unsigned PosOpcode, unsigned NegOpcode,
const SDLoc &DL);
SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
@@ -665,9 +669,8 @@ namespace {
/// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
/// MulNode is the original multiply, AddNode is (add x, c1),
/// and ConstNode is c2.
- bool isMulAddWithConstProfitable(SDNode *MulNode,
- SDValue &AddNode,
- SDValue &ConstNode);
+ bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
+ SDValue ConstNode);
/// This is a helper function for visitAND and visitZERO_EXTEND. Returns
/// true if the (and (load x) c) pattern matches an extload. ExtVT returns
@@ -880,8 +883,8 @@ void DAGCombiner::deleteAndRecombine(SDNode *N) {
// We provide an Offset so that we can create bitwidths that won't overflow.
static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
- LHS = LHS.zextOrSelf(Bits);
- RHS = RHS.zextOrSelf(Bits);
+ LHS = LHS.zext(Bits);
+ RHS = RHS.zext(Bits);
}
// Return true if this node is a setcc, or is a select_cc
@@ -926,7 +929,7 @@ bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
/// it is profitable to do so.
bool DAGCombiner::isOneUseSetCC(SDValue N) const {
SDValue N0, N1, N2;
- if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
+ if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
return true;
return false;
}
@@ -996,6 +999,7 @@ static bool canSplitIdx(LoadSDNode *LD) {
bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
const SDLoc &DL,
+ SDNode *N,
SDValue N0,
SDValue N1) {
// Currently this only tries to ensure we don't undo the GEP splits done by
@@ -1004,33 +1008,62 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
// (load/store (add, (add, x, offset1), offset2)) ->
// (load/store (add, x, offset1+offset2)).
- if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
- return false;
+ // (load/store (add, (add, x, y), offset2)) ->
+ // (load/store (add, (add, x, offset2), y)).
- if (N0.hasOneUse())
+ if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
return false;
- auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
auto *C2 = dyn_cast<ConstantSDNode>(N1);
- if (!C1 || !C2)
+ if (!C2)
return false;
- const APInt &C1APIntVal = C1->getAPIntValue();
const APInt &C2APIntVal = C2->getAPIntValue();
- if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
+ if (C2APIntVal.getSignificantBits() > 64)
return false;
- const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
- if (CombinedValueIntVal.getBitWidth() > 64)
- return false;
- const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
-
- for (SDNode *Node : N0->uses()) {
- auto LoadStore = dyn_cast<MemSDNode>(Node);
- if (LoadStore) {
- // Is x[offset2] already not a legal addressing mode? If so then
- // reassociating the constants breaks nothing (we test offset2 because
- // that's the one we hope to fold into the load or store).
+ if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ if (N0.hasOneUse())
+ return false;
+
+ const APInt &C1APIntVal = C1->getAPIntValue();
+ const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
+ if (CombinedValueIntVal.getSignificantBits() > 64)
+ return false;
+ const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
+
+ for (SDNode *Node : N->uses()) {
+ if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
+ // Is x[offset2] already not a legal addressing mode? If so then
+ // reassociating the constants breaks nothing (we test offset2 because
+ // that's the one we hope to fold into the load or store).
+ TargetLoweringBase::AddrMode AM;
+ AM.HasBaseReg = true;
+ AM.BaseOffs = C2APIntVal.getSExtValue();
+ EVT VT = LoadStore->getMemoryVT();
+ unsigned AS = LoadStore->getAddressSpace();
+ Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
+ if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
+ continue;
+
+ // Would x[offset1+offset2] still be a legal addressing mode?
+ AM.BaseOffs = CombinedValue;
+ if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
+ return true;
+ }
+ }
+ } else {
+ if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
+ if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
+ return false;
+
+ for (SDNode *Node : N->uses()) {
+ auto *LoadStore = dyn_cast<MemSDNode>(Node);
+ if (!LoadStore)
+ return false;
+
+ // Is x[offset2] a legal addressing mode? If so then
+ // reassociating the constants breaks address pattern
TargetLoweringBase::AddrMode AM;
AM.HasBaseReg = true;
AM.BaseOffs = C2APIntVal.getSExtValue();
@@ -1038,13 +1071,9 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
unsigned AS = LoadStore->getAddressSpace();
Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
- continue;
-
- // Would x[offset1+offset2] still be a legal addressing mode?
- AM.BaseOffs = CombinedValue;
- if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
- return true;
+ return false;
}
+ return true;
}
return false;
@@ -1072,11 +1101,51 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
if (TLI.isReassocProfitable(DAG, N0, N1)) {
// Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
// iff (op x, c1) has one use
- if (SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1))
- return DAG.getNode(Opc, DL, VT, OpNode, N01);
- return SDValue();
+ SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1);
+ return DAG.getNode(Opc, DL, VT, OpNode, N01);
}
}
+
+ // Check for repeated operand logic simplifications.
+ if (Opc == ISD::AND || Opc == ISD::OR) {
+ // (N00 & N01) & N00 --> N00 & N01
+ // (N00 & N01) & N01 --> N00 & N01
+ // (N00 | N01) | N00 --> N00 | N01
+ // (N00 | N01) | N01 --> N00 | N01
+ if (N1 == N00 || N1 == N01)
+ return N0;
+ }
+ if (Opc == ISD::XOR) {
+ // (N00 ^ N01) ^ N00 --> N01
+ if (N1 == N00)
+ return N01;
+ // (N00 ^ N01) ^ N01 --> N00
+ if (N1 == N01)
+ return N00;
+ }
+
+ if (TLI.isReassocProfitable(DAG, N0, N1)) {
+ if (N1 != N01) {
+ // Reassociate if (op N00, N1) already exist
+ if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
+ // if Op (Op N00, N1), N01 already exist
+ // we need to stop reassciate to avoid dead loop
+ if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
+ return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
+ }
+ }
+
+ if (N1 != N00) {
+ // Reassociate if (op N01, N1) already exist
+ if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
+ // if Op (Op N01, N1), N00 already exist
+ // we need to stop reassciate to avoid dead loop
+ if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
+ return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
+ }
+ }
+ }
+
return SDValue();
}
@@ -1103,7 +1172,7 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
++NodesCombined;
LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
- To[0].getNode()->dump(&DAG);
+ To[0].dump(&DAG);
dbgs() << " and " << NumTo - 1 << " other values\n");
for (unsigned i = 0, e = NumTo; i != e; ++i)
assert((!To[i].getNode() ||
@@ -1115,10 +1184,8 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
if (AddTo) {
// Push the new nodes and any users onto the worklist
for (unsigned i = 0, e = NumTo; i != e; ++i) {
- if (To[i].getNode()) {
- AddToWorklist(To[i].getNode());
- AddUsersToWorklist(To[i].getNode());
- }
+ if (To[i].getNode())
+ AddToWorklistWithUsers(To[i].getNode());
}
}
@@ -1134,9 +1201,8 @@ void DAGCombiner::
CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
// Replace the old value with the new one.
++NodesCombined;
- LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
- dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
- dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);
+ dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n');
// Replace all uses. If any nodes become isomorphic to other nodes and
// are deleted, make sure to remove them from our worklist.
@@ -1149,7 +1215,7 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
// Finally, if the node is now dead, remove it from the graph. The node
// may not be dead if the replacement process recursively simplified to
// something else needing this node.
- if (TLO.Old.getNode()->use_empty())
+ if (TLO.Old->use_empty())
deleteAndRecombine(TLO.Old.getNode());
}
@@ -1196,7 +1262,7 @@ void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
- Trunc.getNode()->dump(&DAG); dbgs() << '\n');
+ Trunc.dump(&DAG); dbgs() << '\n');
WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
@@ -1295,7 +1361,7 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
assert(PVT != VT && "Don't know what type to promote to!");
- LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
+ LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
bool Replace0 = false;
SDValue N0 = Op.getOperand(0);
@@ -1322,7 +1388,7 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
// If operands have a use ordering, make sure we deal with
// predecessor first.
- if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
+ if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
std::swap(N0, N1);
std::swap(NN0, NN1);
}
@@ -1363,11 +1429,10 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
assert(PVT != VT && "Don't know what type to promote to!");
- LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
+ LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
bool Replace = false;
SDValue N0 = Op.getOperand(0);
- SDValue N1 = Op.getOperand(1);
if (Opc == ISD::SRA)
N0 = SExtPromoteOperand(N0, PVT);
else if (Opc == ISD::SRL)
@@ -1379,6 +1444,7 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
return SDValue();
SDLoc DL(Op);
+ SDValue N1 = Op.getOperand(1);
SDValue RV =
DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
@@ -1414,7 +1480,7 @@ SDValue DAGCombiner::PromoteExtend(SDValue Op) {
// fold (aext (aext x)) -> (aext x)
// fold (aext (zext x)) -> (zext x)
// fold (aext (sext x)) -> (sext x)
- LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
+ LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
}
return SDValue();
@@ -1455,7 +1521,7 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {
SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
- Result.getNode()->dump(&DAG); dbgs() << '\n');
+ Result.dump(&DAG); dbgs() << '\n');
WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
@@ -1569,9 +1635,9 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
RV.getOpcode() != ISD::DELETED_NODE &&
"Node was deleted but visit returned new node!");
- LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
+ LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG));
- if (N->getNumValues() == RV.getNode()->getNumValues())
+ if (N->getNumValues() == RV->getNumValues())
DAG.ReplaceAllUsesWith(N, RV.getNode());
else {
assert(N->getValueType(0) == RV.getValueType() &&
@@ -1635,6 +1701,10 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::UREM: return visitREM(N);
case ISD::MULHU: return visitMULHU(N);
case ISD::MULHS: return visitMULHS(N);
+ case ISD::AVGFLOORS:
+ case ISD::AVGFLOORU:
+ case ISD::AVGCEILS:
+ case ISD::AVGCEILU: return visitAVG(N);
case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
case ISD::SMULO:
@@ -1724,6 +1794,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::LIFETIME_END: return visitLIFETIME_END(N);
case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
+ case ISD::FP_TO_BF16: return visitFP_TO_BF16(N);
case ISD::FREEZE: return visitFREEZE(N);
case ISD::VECREDUCE_FADD:
case ISD::VECREDUCE_FMUL:
@@ -2072,8 +2143,9 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG,
return false;
VT = ST->getMemoryVT();
AS = ST->getAddressSpace();
- } else
+ } else {
return false;
+ }
TargetLowering::AddrMode AM;
if (N->getOpcode() == ISD::ADD) {
@@ -2094,17 +2166,100 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG,
else
// [reg +/- reg]
AM.Scale = 1;
- } else
+ } else {
return false;
+ }
return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
VT.getTypeForEVT(*DAG.getContext()), AS);
}
+/// This inverts a canonicalization in IR that replaces a variable select arm
+/// with an identity constant. Codegen improves if we re-use the variable
+/// operand rather than load a constant. This can also be converted into a
+/// masked vector operation if the target supports it.
+static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG,
+ bool ShouldCommuteOperands) {
+ // Match a select as operand 1. The identity constant that we are looking for
+ // is only valid as operand 1 of a non-commutative binop.
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ if (ShouldCommuteOperands)
+ std::swap(N0, N1);
+
+ // TODO: Should this apply to scalar select too?
+ if (!N1.hasOneUse() || N1.getOpcode() != ISD::VSELECT)
+ return SDValue();
+
+ unsigned Opcode = N->getOpcode();
+ EVT VT = N->getValueType(0);
+ SDValue Cond = N1.getOperand(0);
+ SDValue TVal = N1.getOperand(1);
+ SDValue FVal = N1.getOperand(2);
+
+ // TODO: The cases should match with IR's ConstantExpr::getBinOpIdentity().
+ // TODO: Target-specific opcodes could be added. Ex: "isCommutativeBinOp()".
+ // TODO: With fast-math (NSZ), allow the opposite-sign form of zero?
+ auto isIdentityConstantForOpcode = [](unsigned Opcode, SDValue V) {
+ if (ConstantFPSDNode *C = isConstOrConstSplatFP(V)) {
+ switch (Opcode) {
+ case ISD::FADD: // X + -0.0 --> X
+ return C->isZero() && C->isNegative();
+ case ISD::FSUB: // X - 0.0 --> X
+ return C->isZero() && !C->isNegative();
+ case ISD::FMUL: // X * 1.0 --> X
+ case ISD::FDIV: // X / 1.0 --> X
+ return C->isExactlyValue(1.0);
+ }
+ }
+ if (ConstantSDNode *C = isConstOrConstSplat(V)) {
+ switch (Opcode) {
+ case ISD::ADD: // X + 0 --> X
+ case ISD::SUB: // X - 0 --> X
+ case ISD::SHL: // X << 0 --> X
+ case ISD::SRA: // X s>> 0 --> X
+ case ISD::SRL: // X u>> 0 --> X
+ return C->isZero();
+ case ISD::MUL: // X * 1 --> X
+ return C->isOne();
+ }
+ }
+ return false;
+ };
+
+ // This transform increases uses of N0, so freeze it to be safe.
+ // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
+ if (isIdentityConstantForOpcode(Opcode, TVal)) {
+ SDValue F0 = DAG.getFreeze(N0);
+ SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
+ return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
+ }
+ // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
+ if (isIdentityConstantForOpcode(Opcode, FVal)) {
+ SDValue F0 = DAG.getFreeze(N0);
+ SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
+ return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
"Unexpected binary operator");
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ auto BinOpcode = BO->getOpcode();
+ EVT VT = BO->getValueType(0);
+ if (TLI.shouldFoldSelectWithIdentityConstant(BinOpcode, VT)) {
+ if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
+ return Sel;
+
+ if (TLI.isCommutativeBinOp(BO->getOpcode()))
+ if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
+ return Sel;
+ }
+
// Don't do this unless the old select is going away. We want to eliminate the
// binary operator, not replace a binop with a select.
// TODO: Handle ISD::SELECT_CC.
@@ -2133,7 +2288,6 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
// propagate non constant operands into select. I.e.:
// and (select Cond, 0, -1), X --> select Cond, 0, X
// or X, (select Cond, -1, 0) --> select Cond, -1, X
- auto BinOpcode = BO->getOpcode();
bool CanFoldNonConst =
(BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
(isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
@@ -2145,8 +2299,6 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
!DAG.isConstantFPBuildVectorOrConstantFP(CBO))
return SDValue();
- EVT VT = BO->getValueType(0);
-
// We have a select-of-constants followed by a binary operator with a
// constant. Eliminate the binop by pulling the constant math into the select.
// Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
@@ -2249,6 +2401,15 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
+static bool isADDLike(SDValue V, const SelectionDAG &DAG) {
+ unsigned Opcode = V.getOpcode();
+ if (Opcode == ISD::OR)
+ return DAG.haveNoCommonBitsSet(V.getOperand(0), V.getOperand(1));
+ if (Opcode == ISD::XOR)
+ return isMinSignedConstant(V.getOperand(1));
+ return false;
+}
+
/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
/// are no common bits set in the operands).
@@ -2287,66 +2448,60 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
if (isNullConstant(N1))
return N0;
- if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
+ if (N0.getOpcode() == ISD::SUB) {
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+
// fold ((A-c1)+c2) -> (A+(c2-c1))
- if (N0.getOpcode() == ISD::SUB &&
- isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
- SDValue Sub =
- DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N0.getOperand(1)});
- assert(Sub && "Constant folding failed");
+ if (SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N01}))
return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
- }
// fold ((c1-A)+c2) -> (c1+c2)-A
- if (N0.getOpcode() == ISD::SUB &&
- isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
- SDValue Add =
- DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N0.getOperand(0)});
- assert(Add && "Constant folding failed");
+ if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N00}))
return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
- }
+ }
- // add (sext i1 X), 1 -> zext (not i1 X)
- // We don't transform this pattern:
- // add (zext i1 X), -1 -> sext (not i1 X)
- // because most (?) targets generate better code for the zext form.
- if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
- isOneOrOneSplat(N1)) {
- SDValue X = N0.getOperand(0);
- if ((!LegalOperations ||
- (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
- TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
- X.getScalarValueSizeInBits() == 1) {
- SDValue Not = DAG.getNOT(DL, X, X.getValueType());
- return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
- }
+ // add (sext i1 X), 1 -> zext (not i1 X)
+ // We don't transform this pattern:
+ // add (zext i1 X), -1 -> sext (not i1 X)
+ // because most (?) targets generate better code for the zext form.
+ if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
+ isOneOrOneSplat(N1)) {
+ SDValue X = N0.getOperand(0);
+ if ((!LegalOperations ||
+ (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
+ TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
+ X.getScalarValueSizeInBits() == 1) {
+ SDValue Not = DAG.getNOT(DL, X, X.getValueType());
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
}
+ }
- // Fold (add (or x, c0), c1) -> (add x, (c0 + c1)) if (or x, c0) is
- // equivalent to (add x, c0).
- if (N0.getOpcode() == ISD::OR &&
- isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
- DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
- if (SDValue Add0 = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT,
- {N1, N0.getOperand(1)}))
- return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
- }
+ // Fold (add (or x, c0), c1) -> (add x, (c0 + c1))
+ // iff (or x, c0) is equivalent to (add x, c0).
+ // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
+ // iff (xor x, c0) is equivalent to (add x, c0).
+ if (isADDLike(N0, DAG)) {
+ SDValue N01 = N0.getOperand(1);
+ if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01}))
+ return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add);
}
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
// reassociate add
- if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
+ if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) {
if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
return RADD;
// Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
// equivalent to (add x, c).
+ // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
+ // equivalent to (add x, c).
auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
- if (N0.getOpcode() == ISD::OR && N0.hasOneUse() &&
- isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
- DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
+ if (isADDLike(N0, DAG) && N0.hasOneUse() &&
+ isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
return DAG.getNode(ISD::ADD, DL, VT,
DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
N0.getOperand(1));
@@ -2406,7 +2561,8 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
N1.getOperand(1));
// fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
- if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
+ if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
+ N0->hasOneUse() && N1->hasOneUse()) {
SDValue N00 = N0.getOperand(0);
SDValue N01 = N0.getOperand(1);
SDValue N10 = N1.getOperand(0);
@@ -2459,8 +2615,8 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
// add (add x, y), 1
// And if the target does not like this form then turn into:
// sub y, (xor x, -1)
- if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
- N0.getOpcode() == ISD::ADD) {
+ if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
+ N0.hasOneUse()) {
SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
DAG.getAllOnesConstant(DL, VT));
return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
@@ -2468,7 +2624,7 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
}
// (x - y) + -1 -> add (xor y, -1), x
- if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+ if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
isAllOnesOrAllOnesSplat(N1)) {
SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
@@ -2565,7 +2721,8 @@ SDValue DAGCombiner::visitADDSAT(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
- // TODO SimplifyVBinOp
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
// fold (add_sat x, 0) -> x, vector edition
if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
@@ -2611,7 +2768,7 @@ static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
return SDValue();
- EVT VT = V.getNode()->getValueType(0);
+ EVT VT = V->getValueType(0);
if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
return SDValue();
@@ -2664,27 +2821,27 @@ SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
// add (add x, 1), y
// And if the target does not like this form then turn into:
// sub y, (xor x, -1)
- if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
- N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
+ if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
+ N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1))) {
SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
DAG.getAllOnesConstant(DL, VT));
return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
}
- // Hoist one-use subtraction by non-opaque constant:
- // (x - C) + y -> (x + y) - C
- // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
- if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
- isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
- SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
- return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
- }
- // Hoist one-use subtraction from non-opaque constant:
- // (C - x) + y -> (y - x) + C
- if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
- isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
- SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
- return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
+ if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
+ // Hoist one-use subtraction by non-opaque constant:
+ // (x - C) + y -> (x + y) - C
+ // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
+ if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
+ return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
+ }
+ // Hoist one-use subtraction from non-opaque constant:
+ // (C - x) + y -> (y - x) + C
+ if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
+ return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
+ }
}
// If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
@@ -3060,21 +3217,26 @@ static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
// Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
// a single path for carry/borrow out propagation:
static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI,
- SDValue Carry0, SDValue Carry1, SDNode *N) {
- if (Carry0.getResNo() != 1 || Carry1.getResNo() != 1)
+ SDValue N0, SDValue N1, SDNode *N) {
+ SDValue Carry0 = getAsCarry(TLI, N0);
+ if (!Carry0)
+ return SDValue();
+ SDValue Carry1 = getAsCarry(TLI, N1);
+ if (!Carry1)
return SDValue();
+
unsigned Opcode = Carry0.getOpcode();
if (Opcode != Carry1.getOpcode())
return SDValue();
if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
return SDValue();
- // Canonicalize the add/sub of A and B as Carry0 and the add/sub of the
- // carry/borrow in as Carry1. (The top and middle uaddo nodes respectively in
- // the above ASCII art.)
- if (Carry1.getOperand(0) != Carry0.getValue(0) &&
- Carry1.getOperand(1) != Carry0.getValue(0))
+ // Canonicalize the add/sub of A and B (the top node in the above ASCII art)
+ // as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
+ if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
std::swap(Carry0, Carry1);
+
+ // Check if nodes are connected in expected way.
if (Carry1.getOperand(0) != Carry0.getValue(0) &&
Carry1.getOperand(1) != Carry0.getValue(0))
return SDValue();
@@ -3254,9 +3416,15 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
EVT VT = N0.getValueType();
SDLoc DL(N);
+ auto PeekThroughFreeze = [](SDValue N) {
+ if (N->getOpcode() == ISD::FREEZE && N.hasOneUse())
+ return N->getOperand(0);
+ return N;
+ };
+
// fold (sub x, x) -> 0
// FIXME: Refactor this and xor and other similar operations together.
- if (N0 == N1)
+ if (PeekThroughFreeze(N0) == PeekThroughFreeze(N1))
return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
// fold (sub c1, c2) -> c3
@@ -3314,7 +3482,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
}
// Convert 0 - abs(x).
- if (N1->getOpcode() == ISD::ABS &&
+ if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() &&
!TLI.isOperationLegalOrCustom(ISD::ABS, VT))
if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
return Result;
@@ -3352,44 +3520,31 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
return N0.getOperand(0);
// fold (A+C1)-C2 -> A+(C1-C2)
- if (N0.getOpcode() == ISD::ADD &&
- isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
- isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
- SDValue NewC =
- DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(1), N1});
- assert(NewC && "Constant folding failed");
- return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
+ if (N0.getOpcode() == ISD::ADD) {
+ SDValue N01 = N0.getOperand(1);
+ if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N01, N1}))
+ return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
}
// fold C2-(A+C1) -> (C2-C1)-A
if (N1.getOpcode() == ISD::ADD) {
SDValue N11 = N1.getOperand(1);
- if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
- isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
- SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11});
- assert(NewC && "Constant folding failed");
+ if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}))
return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
- }
}
// fold (A-C1)-C2 -> A-(C1+C2)
- if (N0.getOpcode() == ISD::SUB &&
- isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
- isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
- SDValue NewC =
- DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0.getOperand(1), N1});
- assert(NewC && "Constant folding failed");
- return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
+ if (N0.getOpcode() == ISD::SUB) {
+ SDValue N01 = N0.getOperand(1);
+ if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N01, N1}))
+ return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
}
// fold (c1-A)-c2 -> (c1-c2)-A
- if (N0.getOpcode() == ISD::SUB &&
- isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
- isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
- SDValue NewC =
- DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(0), N1});
- assert(NewC && "Constant folding failed");
- return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
+ if (N0.getOpcode() == ISD::SUB) {
+ SDValue N00 = N0.getOperand(0);
+ if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N00, N1}))
+ return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
}
// fold ((A+(B+or-C))-B) -> A+or-C
@@ -3584,6 +3739,15 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
}
}
+ // As with the previous fold, prefer add for more folding potential.
+ // Subtracting SMIN/0 is the same as adding SMIN/0:
+ // N0 - (X << BW-1) --> N0 + (X << BW-1)
+ if (N1.getOpcode() == ISD::SHL) {
+ ConstantSDNode *ShlC = isConstOrConstSplat(N1.getOperand(1));
+ if (ShlC && ShlC->getAPIntValue() == VT.getScalarSizeInBits() - 1)
+ return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
+ }
+
if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
// (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry)
if (SDValue Carry = getAsCarry(TLI, N0)) {
@@ -3619,7 +3783,8 @@ SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
- // TODO SimplifyVBinOp
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
// fold (sub_sat x, 0) -> x, vector edition
if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
@@ -3770,19 +3935,20 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
+ SDLoc DL(N);
// fold (mul x, undef) -> 0
if (N0.isUndef() || N1.isUndef())
- return DAG.getConstant(0, SDLoc(N), VT);
+ return DAG.getConstant(0, DL, VT);
// fold (mul c1, c2) -> c1*c2
- if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1}))
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, DL, VT, {N0, N1}))
return C;
// canonicalize constant to RHS (vector doesn't have to splat)
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
- return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
+ return DAG.getNode(ISD::MUL, DL, VT, N1, N0);
bool N1IsConst = false;
bool N1IsOpaqueConst = false;
@@ -3790,7 +3956,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
@@ -3817,17 +3983,14 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
return NewSel;
// fold (mul x, -1) -> 0-x
- if (N1IsConst && ConstValue1.isAllOnes()) {
- SDLoc DL(N);
+ if (N1IsConst && ConstValue1.isAllOnes())
return DAG.getNode(ISD::SUB, DL, VT,
DAG.getConstant(0, DL, VT), N0);
- }
// fold (mul x, (1 << c)) -> x << c
if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
DAG.isKnownToBeAPowerOfTwo(N1) &&
(!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
- SDLoc DL(N);
SDValue LogBase2 = BuildLogBase2(N1, DL);
EVT ShiftVT = getShiftAmountTy(N0.getValueType());
SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
@@ -3837,7 +4000,6 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
unsigned Log2Val = (-ConstValue1).logBase2();
- SDLoc DL(N);
// FIXME: If the input is something that is easily negated (e.g. a
// single-use add), we should put the negate there.
return DAG.getNode(ISD::SUB, DL, VT,
@@ -3882,7 +4044,6 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
ShAmt += TZeros;
assert(ShAmt < VT.getScalarSizeInBits() &&
"multiply-by-constant generated out of bounds shift");
- SDLoc DL(N);
SDValue Shl =
DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
SDValue R =
@@ -3897,12 +4058,10 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
}
// (mul (shl X, c1), c2) -> (mul X, c2 << c1)
- if (N0.getOpcode() == ISD::SHL &&
- isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
- isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
- SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
- if (isConstantOrConstantVector(C3))
- return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
+ if (N0.getOpcode() == ISD::SHL) {
+ SDValue N01 = N0.getOperand(1);
+ if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01}))
+ return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3);
}
// Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
@@ -3912,18 +4071,17 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
if (N0.getOpcode() == ISD::SHL &&
- isConstantOrConstantVector(N0.getOperand(1)) &&
- N0.getNode()->hasOneUse()) {
+ isConstantOrConstantVector(N0.getOperand(1)) && N0->hasOneUse()) {
Sh = N0; Y = N1;
} else if (N1.getOpcode() == ISD::SHL &&
isConstantOrConstantVector(N1.getOperand(1)) &&
- N1.getNode()->hasOneUse()) {
+ N1->hasOneUse()) {
Sh = N1; Y = N0;
}
if (Sh.getNode()) {
- SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
- return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
+ return DAG.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
}
}
@@ -3932,18 +4090,17 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
N0.getOpcode() == ISD::ADD &&
DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
isMulAddWithConstProfitable(N, N0, N1))
- return DAG.getNode(ISD::ADD, SDLoc(N), VT,
- DAG.getNode(ISD::MUL, SDLoc(N0), VT,
- N0.getOperand(0), N1),
- DAG.getNode(ISD::MUL, SDLoc(N1), VT,
- N0.getOperand(1), N1));
+ return DAG.getNode(
+ ISD::ADD, DL, VT,
+ DAG.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
+ DAG.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
// Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
if (N0.getOpcode() == ISD::VSCALE)
if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) {
const APInt &C0 = N0.getConstantOperandAPInt(0);
const APInt &C1 = NC1->getAPIntValue();
- return DAG.getVScale(SDLoc(N), VT, C0 * C1);
+ return DAG.getVScale(DL, VT, C0 * C1);
}
// Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
@@ -3952,7 +4109,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
if (ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
const APInt &C0 = N0.getConstantOperandAPInt(0);
APInt NewStep = C0 * MulVal;
- return DAG.getStepVector(SDLoc(N), VT, NewStep);
+ return DAG.getStepVector(DL, VT, NewStep);
}
// Fold ((mul x, 0/undef) -> 0,
@@ -3974,7 +4131,6 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
- SDLoc DL(N);
EVT LegalSVT = N1.getOperand(0).getValueType();
SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
@@ -3987,7 +4143,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
}
// reassociate mul
- if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
+ if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
return RMUL;
return SDValue();
@@ -4050,7 +4206,7 @@ SDValue DAGCombiner::useDivRem(SDNode *Node) {
SDValue Op0 = Node->getOperand(0);
SDValue Op1 = Node->getOperand(1);
SDValue combined;
- for (SDNode *User : Op0.getNode()->uses()) {
+ for (SDNode *User : Op0->uses()) {
if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
User->use_empty())
continue;
@@ -4190,12 +4346,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
return SDValue();
}
-SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
- SDLoc DL(N);
- EVT VT = N->getValueType(0);
- EVT CCVT = getSetCCResultType(VT);
- unsigned BitWidth = VT.getScalarSizeInBits();
-
+static bool isDivisorPowerOfTwo(SDValue Divisor) {
// Helper for determining whether a value is a power-2 constant scalar or a
// vector of such elements.
auto IsPowerOfTwo = [](ConstantSDNode *C) {
@@ -4208,11 +4359,20 @@ SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
return false;
};
+ return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo);
+}
+
+SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ EVT CCVT = getSetCCResultType(VT);
+ unsigned BitWidth = VT.getScalarSizeInBits();
+
// fold (sdiv X, pow2) -> simple ops after legalize
// FIXME: We check for the exact bit here because the generic lowering gives
// better results in that case. The target-specific lowering should learn how
// to handle exact sdivs efficiently.
- if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
+ if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1)) {
// Target-specific implementation of sdiv x, pow2.
if (SDValue Res = BuildSDIVPow2(N))
return Res;
@@ -4368,6 +4528,16 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N) {
+ if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1) &&
+ !DAG.doesNodeExist(ISD::SDIV, N->getVTList(), {N0, N1})) {
+ // Target-specific implementation of srem x, pow2.
+ if (SDValue Res = BuildSREMPow2(N))
+ return Res;
+ }
+ return SDValue();
+}
+
// handles ISD::SREM and ISD::UREM
SDValue DAGCombiner::visitREM(SDNode *N) {
unsigned Opcode = N->getOpcode();
@@ -4384,10 +4554,13 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
return C;
- // fold (urem X, -1) -> select(X == -1, 0, x)
- if (!isSigned && N1C && N1C->isAllOnes())
- return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
- DAG.getConstant(0, DL, VT), N0);
+ // fold (urem X, -1) -> select(FX == -1, 0, FX)
+ // Freeze the numerator to avoid a miscompile with an undefined value.
+ if (!isSigned && N1C && N1C->isAllOnes()) {
+ SDValue F0 = DAG.getFreeze(N0);
+ SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ);
+ return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0);
+ }
if (SDValue V = simplifyDivRem(N, DAG))
return V;
@@ -4428,6 +4601,12 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
// combine will not return a DIVREM. Regardless, checking cheapness here
// makes sense since the simplification results in fatter code.
if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
+ if (isSigned) {
+ // check if we can build faster implementation for srem
+ if (SDValue OptimizedRem = buildOptimizedSREM(N0, N1, N))
+ return OptimizedRem;
+ }
+
SDValue OptimizedDiv =
isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
@@ -4587,6 +4766,46 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitAVG(SDNode *N) {
+ unsigned Opcode = N->getOpcode();
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ // fold (avg c1, c2)
+ if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
+ return C;
+
+ // canonicalize constant to RHS.
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
+
+ if (VT.isVector()) {
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
+
+ // fold (avgfloor x, 0) -> x >> 1
+ if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) {
+ if (Opcode == ISD::AVGFLOORS)
+ return DAG.getNode(ISD::SRA, DL, VT, N0, DAG.getConstant(1, DL, VT));
+ if (Opcode == ISD::AVGFLOORU)
+ return DAG.getNode(ISD::SRL, DL, VT, N0, DAG.getConstant(1, DL, VT));
+ }
+ }
+
+ // fold (avg x, undef) -> x
+ if (N0.isUndef())
+ return N1;
+ if (N1.isUndef())
+ return N0;
+
+ // TODO If we use avg for scalars anywhere, we can add (avgfl x, 0) -> x >> 1
+
+ return SDValue();
+}
+
/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
/// give the opcodes for the two computations that are being performed. Return
/// true if a simplification was made.
@@ -4745,7 +4964,9 @@ SDValue DAGCombiner::visitMULO(SDNode *N) {
DAG.getConstant(0, DL, CarryVT));
// (mulo x, 2) -> (addo x, x)
- if (N1C && N1C->getAPIntValue() == 2)
+ // FIXME: This needs a freeze.
+ if (N1C && N1C->getAPIntValue() == 2 &&
+ (!IsSigned || VT.getScalarSizeInBits() > 2))
return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
N->getVTList(), N0, N0);
@@ -4802,8 +5023,7 @@ static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2,
return 0;
const APInt &C1 = N1C->getAPIntValue();
const APInt &C2 = N3C->getAPIntValue();
- if (C1.getBitWidth() < C2.getBitWidth() ||
- C1 != C2.sextOrSelf(C1.getBitWidth()))
+ if (C1.getBitWidth() < C2.getBitWidth() || C1 != C2.sext(C1.getBitWidth()))
return 0;
return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
};
@@ -4910,7 +5130,7 @@ static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
const APInt &C1 = N1C->getAPIntValue();
const APInt &C3 = N3C->getAPIntValue();
if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() ||
- C1 != C3.zextOrSelf(C1.getBitWidth()))
+ C1 != C3.zext(C1.getBitWidth()))
return SDValue();
unsigned BW = (C1 + 1).exactLogBase2();
@@ -4940,6 +5160,10 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
return C;
+ // If the operands are the same, this is a no-op.
+ if (N0 == N1)
+ return N0;
+
// canonicalize constant to RHS
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
@@ -5245,29 +5469,27 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
}
// Turn compare of constants whose difference is 1 bit into add+and+setcc.
- // TODO - support non-uniform vector amounts.
if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
// Match a shared variable operand and 2 non-opaque constant operands.
- ConstantSDNode *C0 = isConstOrConstSplat(LR);
- ConstantSDNode *C1 = isConstOrConstSplat(RR);
- if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
+ auto MatchDiffPow2 = [&](ConstantSDNode *C0, ConstantSDNode *C1) {
+ // The difference of the constants must be a single bit.
const APInt &CMax =
APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
const APInt &CMin =
APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
- // The difference of the constants must be a single bit.
- if ((CMax - CMin).isPowerOf2()) {
- // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
- // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
- SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
- SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
- SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
- SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
- SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
- SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
- SDValue Zero = DAG.getConstant(0, DL, OpVT);
- return DAG.getSetCC(DL, VT, And, Zero, CC0);
- }
+ return !C0->isOpaque() && !C1->isOpaque() && (CMax - CMin).isPowerOf2();
+ };
+ if (LL == RL && ISD::matchBinaryPredicate(LR, RR, MatchDiffPow2)) {
+ // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
+ // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
+ SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
+ SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
+ SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
+ SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
+ SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
+ SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
+ SDValue Zero = DAG.getConstant(0, DL, OpVT);
+ return DAG.getSetCC(DL, VT, And, Zero, CC0);
}
}
}
@@ -5769,6 +5991,9 @@ static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
if (ShiftAmt.uge(VTBitWidth))
return SDValue();
+ if (!TLI.hasBitTest(Srl.getOperand(0), Srl.getOperand(1)))
+ return SDValue();
+
// Turn this into a bit-test pattern using mask op + setcc:
// and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
SDLoc DL(And);
@@ -5815,6 +6040,53 @@ static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0), SignMask);
}
+/// Given a bitwise logic operation N with a matching bitwise logic operand,
+/// fold a pattern where 2 of the source operands are identically shifted
+/// values. For example:
+/// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z
+static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp,
+ SelectionDAG &DAG) {
+ unsigned LogicOpcode = N->getOpcode();
+ assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
+ LogicOpcode == ISD::XOR)
+ && "Expected bitwise logic operation");
+
+ if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse())
+ return SDValue();
+
+ // Match another bitwise logic op and a shift.
+ unsigned ShiftOpcode = ShiftOp.getOpcode();
+ if (LogicOp.getOpcode() != LogicOpcode ||
+ !(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL ||
+ ShiftOpcode == ISD::SRA))
+ return SDValue();
+
+ // Match another shift op inside the first logic operand. Handle both commuted
+ // possibilities.
+ // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
+ // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
+ SDValue X1 = ShiftOp.getOperand(0);
+ SDValue Y = ShiftOp.getOperand(1);
+ SDValue X0, Z;
+ if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode &&
+ LogicOp.getOperand(0).getOperand(1) == Y) {
+ X0 = LogicOp.getOperand(0).getOperand(0);
+ Z = LogicOp.getOperand(1);
+ } else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode &&
+ LogicOp.getOperand(1).getOperand(1) == Y) {
+ X0 = LogicOp.getOperand(1).getOperand(0);
+ Z = LogicOp.getOperand(0);
+ } else {
+ return SDValue();
+ }
+
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+ SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1);
+ SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y);
+ return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
+}
+
SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -5848,27 +6120,25 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
return N0;
- // fold (and (masked_load) (build_vec (x, ...))) to zext_masked_load
+ // fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load
auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
- auto *BVec = dyn_cast<BuildVectorSDNode>(N1);
- if (MLoad && BVec && MLoad->getExtensionType() == ISD::EXTLOAD &&
- N0.hasOneUse() && N1.hasOneUse()) {
+ ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true);
+ if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && N0.hasOneUse() &&
+ Splat && N1.hasOneUse()) {
EVT LoadVT = MLoad->getMemoryVT();
EVT ExtVT = VT;
if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
// For this AND to be a zero extension of the masked load the elements
// of the BuildVec must mask the bottom bits of the extended element
// type
- if (ConstantSDNode *Splat = BVec->getConstantSplatNode()) {
- uint64_t ElementSize =
- LoadVT.getVectorElementType().getScalarSizeInBits();
- if (Splat->getAPIntValue().isMask(ElementSize)) {
- return DAG.getMaskedLoad(
- ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(),
- MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
- LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
- ISD::ZEXTLOAD, MLoad->isExpandingLoad());
- }
+ uint64_t ElementSize =
+ LoadVT.getVectorElementType().getScalarSizeInBits();
+ if (Splat->getAPIntValue().isMask(ElementSize)) {
+ return DAG.getMaskedLoad(
+ ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(),
+ MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
+ LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
+ ISD::ZEXTLOAD, MLoad->isExpandingLoad());
}
}
}
@@ -5944,7 +6214,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// This can be a pure constant or a vector splat, in which case we treat the
// vector as a scalar and use the splat value.
APInt Constant = APInt::getZero(1);
- if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
+ if (const ConstantSDNode *C = isConstOrConstSplat(N1)) {
Constant = C->getAPIntValue();
} else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
APInt SplatValue, SplatUndef;
@@ -6084,6 +6354,11 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
return V;
+ if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
+ return R;
+ if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
+ return R;
+
// Masking the negated extension of a boolean is just the zero-extended
// boolean:
// and (sub 0, zext(bool X)), 1 --> zext(bool X)
@@ -6142,9 +6417,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
return Shifts;
- if (TLI.hasBitTest(N0, N1))
- if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
- return V;
+ if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
+ return V;
// Recognize the following pattern:
//
@@ -6194,11 +6468,11 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
bool LookPassAnd0 = false;
bool LookPassAnd1 = false;
if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
- std::swap(N0, N1);
+ std::swap(N0, N1);
if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
- std::swap(N0, N1);
+ std::swap(N0, N1);
if (N0.getOpcode() == ISD::AND) {
- if (!N0.getNode()->hasOneUse())
+ if (!N0->hasOneUse())
return SDValue();
ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
// Also handle 0xffff since the LHS is guaranteed to have zeros there.
@@ -6211,7 +6485,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
}
if (N1.getOpcode() == ISD::AND) {
- if (!N1.getNode()->hasOneUse())
+ if (!N1->hasOneUse())
return SDValue();
ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
if (!N11C || N11C->getZExtValue() != 0xFF)
@@ -6224,7 +6498,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
std::swap(N0, N1);
if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
return SDValue();
- if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
+ if (!N0->hasOneUse() || !N1->hasOneUse())
return SDValue();
ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
@@ -6237,7 +6511,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
// Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
SDValue N00 = N0->getOperand(0);
if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
- if (!N00.getNode()->hasOneUse())
+ if (!N00->hasOneUse())
return SDValue();
ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
if (!N001C || N001C->getZExtValue() != 0xFF)
@@ -6248,7 +6522,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
SDValue N10 = N1->getOperand(0);
if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
- if (!N10.getNode()->hasOneUse())
+ if (!N10->hasOneUse())
return SDValue();
ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
// Also allow 0xFFFF since the bits will be shifted out. This is needed
@@ -6266,19 +6540,23 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
// Make sure everything beyond the low halfword gets set to zero since the SRL
// 16 will clear the top bits.
unsigned OpSizeInBits = VT.getSizeInBits();
- if (DemandHighBits && OpSizeInBits > 16) {
+ if (OpSizeInBits > 16) {
// If the left-shift isn't masked out then the only way this is a bswap is
// if all bits beyond the low 8 are 0. In that case the entire pattern
// reduces to a left shift anyway: leave it for other parts of the combiner.
- if (!LookPassAnd0)
+ if (DemandHighBits && !LookPassAnd0)
return SDValue();
// However, if the right shift isn't masked out then it might be because
- // it's not needed. See if we can spot that too.
- if (!LookPassAnd1 &&
- !DAG.MaskedValueIsZero(
- N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
- return SDValue();
+ // it's not needed. See if we can spot that too. If the high bits aren't
+ // demanded, we only need bits 23:16 to be zero. Otherwise, we need all
+ // upper bits to be zero.
+ if (!LookPassAnd1) {
+ unsigned HighBit = DemandHighBits ? OpSizeInBits : 24;
+ if (!DAG.MaskedValueIsZero(N10,
+ APInt::getBitsSet(OpSizeInBits, 16, HighBit)))
+ return SDValue();
+ }
}
SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
@@ -6298,7 +6576,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
/// ((x & 0x00ff0000) << 8) |
/// ((x & 0xff000000) >> 8)
static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
- if (!N.getNode()->hasOneUse())
+ if (!N->hasOneUse())
return false;
unsigned Opc = N.getOpcode();
@@ -6485,8 +6763,9 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
!(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
return SDValue();
- } else
+ } else {
return SDValue();
+ }
// Make sure the parts are all coming from the same node.
if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
@@ -6524,7 +6803,7 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
// (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
// Don't increase # computations.
- (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
+ (N0->hasOneUse() || N1->hasOneUse())) {
// We can only do this xform if we know that bits from X that are set in C2
// but not in C1 are already zero. Likewise for Y.
if (const ConstantSDNode *N0O1C =
@@ -6552,7 +6831,7 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
N1.getOpcode() == ISD::AND &&
N0.getOperand(0) == N1.getOperand(0) &&
// Don't increase # computations.
- (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
+ (N0->hasOneUse() || N1->hasOneUse())) {
SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
N0.getOperand(1), N1.getOperand(1));
return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
@@ -6567,14 +6846,38 @@ static SDValue visitORCommutative(
EVT VT = N0.getValueType();
if (N0.getOpcode() == ISD::AND) {
// fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
- if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
+ // TODO: Set AllowUndefs = true.
+ if (getBitwiseNotOperand(N0.getOperand(1), N0.getOperand(0),
+ /* AllowUndefs */ false) == N1)
return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
// fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
- if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
+ if (getBitwiseNotOperand(N0.getOperand(0), N0.getOperand(1),
+ /* AllowUndefs */ false) == N1)
return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
}
+ if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
+ return R;
+
+ auto peekThroughZext = [](SDValue V) {
+ if (V->getOpcode() == ISD::ZERO_EXTEND)
+ return V->getOperand(0);
+ return V;
+ };
+
+ // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y
+ if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::SHL &&
+ N0.getOperand(0) == N1.getOperand(0) &&
+ peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
+ return N0;
+
+ // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y
+ if (N0.getOpcode() == ISD::FSHR && N1.getOpcode() == ISD::SRL &&
+ N0.getOperand(1) == N1.getOperand(0) &&
+ peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
+ return N0;
+
return SDValue();
}
@@ -6611,11 +6914,10 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
// fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
- // Do this only if the resulting shuffle is legal.
- if (isa<ShuffleVectorSDNode>(N0) &&
- isa<ShuffleVectorSDNode>(N1) &&
- // Avoid folding a node with illegal type.
- TLI.isTypeLegal(VT)) {
+ // Do this only if the resulting type / shuffle is legal.
+ auto *SV0 = dyn_cast<ShuffleVectorSDNode>(N0);
+ auto *SV1 = dyn_cast<ShuffleVectorSDNode>(N1);
+ if (SV0 && SV1 && TLI.isTypeLegal(VT)) {
bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
@@ -6624,11 +6926,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
- const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
- const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
bool CanFold = true;
int NumElts = VT.getVectorNumElements();
- SmallVector<int, 4> Mask(NumElts);
+ SmallVector<int, 4> Mask(NumElts, -1);
for (int i = 0; i != NumElts; ++i) {
int M0 = SV0->getMaskElt(i);
@@ -6640,10 +6940,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
// If one element is zero and the otherside is undef, keep undef.
// This also handles the case that both are undef.
- if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
- Mask[i] = -1;
+ if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0))
continue;
- }
// Make sure only one of the elements is zero.
if (M0Zero == M1Zero) {
@@ -6711,7 +7009,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
};
- if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
+ if (N0.getOpcode() == ISD::AND && N0->hasOneUse() &&
ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
{N1, N0.getOperand(1)})) {
@@ -7031,8 +7329,9 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
// Neg with outer conversions stripped away.
SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
SDValue Neg, SDValue InnerPos,
- SDValue InnerNeg, unsigned PosOpcode,
- unsigned NegOpcode, const SDLoc &DL) {
+ SDValue InnerNeg, bool HasPos,
+ unsigned PosOpcode, unsigned NegOpcode,
+ const SDLoc &DL) {
// fold (or (shl x, (*ext y)),
// (srl x, (*ext (sub 32, y)))) ->
// (rotl x, y) or (rotr x, (sub 32, y))
@@ -7043,7 +7342,6 @@ SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
EVT VT = Shifted.getValueType();
if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
/*IsRotate*/ true)) {
- bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
HasPos ? Pos : Neg);
}
@@ -7059,8 +7357,9 @@ SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
// TODO: Merge with MatchRotatePosNeg.
SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
SDValue Neg, SDValue InnerPos,
- SDValue InnerNeg, unsigned PosOpcode,
- unsigned NegOpcode, const SDLoc &DL) {
+ SDValue InnerNeg, bool HasPos,
+ unsigned PosOpcode, unsigned NegOpcode,
+ const SDLoc &DL) {
EVT VT = N0.getValueType();
unsigned EltBits = VT.getScalarSizeInBits();
@@ -7072,7 +7371,6 @@ SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
// (srl x1, (*ext y))) ->
// (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
- bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
HasPos ? Pos : Neg);
}
@@ -7134,6 +7432,16 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
bool HasROTR = hasOperation(ISD::ROTR, VT);
bool HasFSHL = hasOperation(ISD::FSHL, VT);
bool HasFSHR = hasOperation(ISD::FSHR, VT);
+
+ // If the type is going to be promoted and the target has enabled custom
+ // lowering for rotate, allow matching rotate by non-constants. Only allow
+ // this for scalar types.
+ if (VT.isScalarInteger() && TLI.getTypeAction(*DAG.getContext(), VT) ==
+ TargetLowering::TypePromoteInteger) {
+ HasROTL |= TLI.getOperationAction(ISD::ROTL, VT) == TargetLowering::Custom;
+ HasROTR |= TLI.getOperationAction(ISD::ROTR, VT) == TargetLowering::Custom;
+ }
+
if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
return SDValue();
@@ -7187,11 +7495,6 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
if (LHSShift.getOpcode() == RHSShift.getOpcode())
return SDValue(); // Shifts must disagree.
- // TODO: Support pre-legalization funnel-shift by constant.
- bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
- if (!IsRotate && !(HasFSHL || HasFSHR))
- return SDValue(); // Requires funnel shift support.
-
// Canonicalize shl to left side in a shl/srl pair.
if (RHSShift.getOpcode() == ISD::SHL) {
std::swap(LHS, RHS);
@@ -7205,27 +7508,12 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
SDValue RHSShiftArg = RHSShift.getOperand(0);
SDValue RHSShiftAmt = RHSShift.getOperand(1);
- // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
- // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
- // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
- // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
- // iff C1+C2 == EltSizeInBits
auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
ConstantSDNode *RHS) {
return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
};
- if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
- SDValue Res;
- if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
- bool UseROTL = !LegalOperations || HasROTL;
- Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
- UseROTL ? LHSShiftAmt : RHSShiftAmt);
- } else {
- bool UseFSHL = !LegalOperations || HasFSHL;
- Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
- RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
- }
+ auto ApplyMasks = [&](SDValue Res) {
// If there is an AND of either shifted operand, apply it to the result.
if (LHSMask.getNode() || RHSMask.getNode()) {
SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
@@ -7246,6 +7534,71 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
}
return Res;
+ };
+
+ // TODO: Support pre-legalization funnel-shift by constant.
+ bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
+ if (!IsRotate && !(HasFSHL || HasFSHR)) {
+ if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() &&
+ ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
+ // Look for a disguised rotate by constant.
+ // The common shifted operand X may be hidden inside another 'or'.
+ SDValue X, Y;
+ auto matchOr = [&X, &Y](SDValue Or, SDValue CommonOp) {
+ if (!Or.hasOneUse() || Or.getOpcode() != ISD::OR)
+ return false;
+ if (CommonOp == Or.getOperand(0)) {
+ X = CommonOp;
+ Y = Or.getOperand(1);
+ return true;
+ }
+ if (CommonOp == Or.getOperand(1)) {
+ X = CommonOp;
+ Y = Or.getOperand(0);
+ return true;
+ }
+ return false;
+ };
+
+ SDValue Res;
+ if (matchOr(LHSShiftArg, RHSShiftArg)) {
+ // (shl (X | Y), C1) | (srl X, C2) --> (rotl X, C1) | (shl Y, C1)
+ SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
+ SDValue ShlY = DAG.getNode(ISD::SHL, DL, VT, Y, LHSShiftAmt);
+ Res = DAG.getNode(ISD::OR, DL, VT, RotX, ShlY);
+ } else if (matchOr(RHSShiftArg, LHSShiftArg)) {
+ // (shl X, C1) | (srl (X | Y), C2) --> (rotl X, C1) | (srl Y, C2)
+ SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
+ SDValue SrlY = DAG.getNode(ISD::SRL, DL, VT, Y, RHSShiftAmt);
+ Res = DAG.getNode(ISD::OR, DL, VT, RotX, SrlY);
+ } else {
+ return SDValue();
+ }
+
+ return ApplyMasks(Res);
+ }
+
+ return SDValue(); // Requires funnel shift support.
+ }
+
+ // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
+ // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
+ // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
+ // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
+ // iff C1+C2 == EltSizeInBits
+ if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
+ SDValue Res;
+ if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
+ bool UseROTL = !LegalOperations || HasROTL;
+ Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
+ UseROTL ? LHSShiftAmt : RHSShiftAmt);
+ } else {
+ bool UseFSHL = !LegalOperations || HasFSHL;
+ Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
+ RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
+ }
+
+ return ApplyMasks(Res);
}
// Even pre-legalization, we can't easily rotate/funnel-shift by a variable
@@ -7276,26 +7629,26 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
if (IsRotate && (HasROTL || HasROTR)) {
SDValue TryL =
MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
- RExtOp0, ISD::ROTL, ISD::ROTR, DL);
+ RExtOp0, HasROTL, ISD::ROTL, ISD::ROTR, DL);
if (TryL)
return TryL;
SDValue TryR =
MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
- LExtOp0, ISD::ROTR, ISD::ROTL, DL);
+ LExtOp0, HasROTR, ISD::ROTR, ISD::ROTL, DL);
if (TryR)
return TryR;
}
SDValue TryL =
MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
- LExtOp0, RExtOp0, ISD::FSHL, ISD::FSHR, DL);
+ LExtOp0, RExtOp0, HasFSHL, ISD::FSHL, ISD::FSHR, DL);
if (TryL)
return TryL;
SDValue TryR =
MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
- RExtOp0, LExtOp0, ISD::FSHR, ISD::FSHL, DL);
+ RExtOp0, LExtOp0, HasFSHR, ISD::FSHR, ISD::FSHL, DL);
if (TryR)
return TryR;
@@ -7810,7 +8163,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
// little endian value load
Optional<bool> IsBigEndian = isBigEndian(
makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
- if (!IsBigEndian.hasValue())
+ if (!IsBigEndian)
return SDValue();
assert(FirstByteProvider && "must be set");
@@ -8017,6 +8370,13 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
return RXOR;
+ // look for 'add-like' folds:
+ // XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE)
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
+ isMinSignedConstant(N1))
+ if (SDValue Combined = visitADDLike(N))
+ return Combined;
+
// fold !(x cc y) -> (x !cc y)
unsigned N0Opcode = N0.getOpcode();
SDValue LHS, RHS, CC;
@@ -8182,6 +8542,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
return V;
+ if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
+ return R;
+ if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
+ return R;
+
// Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
if (SDValue MM = unfoldMaskedMerge(N))
return MM;
@@ -8412,7 +8777,9 @@ SDValue DAGCombiner::visitRotate(SDNode *N) {
}
unsigned NextOp = N0.getOpcode();
- // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
+
+ // fold (rot* (rot* x, c2), c1)
+ // -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize)) % bitsize)
if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
@@ -8420,14 +8787,19 @@ SDValue DAGCombiner::visitRotate(SDNode *N) {
EVT ShiftVT = C1->getValueType(0);
bool SameSide = (N->getOpcode() == NextOp);
unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
- if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
- CombineOp, dl, ShiftVT, {N1, N0.getOperand(1)})) {
- SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
- SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
- ISD::SREM, dl, ShiftVT, {CombinedShift, BitsizeC});
- return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
- CombinedShiftNorm);
- }
+ SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
+ SDValue Norm1 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
+ {N1, BitsizeC});
+ SDValue Norm2 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
+ {N0.getOperand(1), BitsizeC});
+ if (Norm1 && Norm2)
+ if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
+ CombineOp, dl, ShiftVT, {Norm1, Norm2})) {
+ SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
+ ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC});
+ return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
+ CombinedShiftNorm);
+ }
}
}
return SDValue();
@@ -8587,52 +8959,63 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
}
}
- // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
- // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2
- // TODO - support non-uniform vector shift amounts.
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
- if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
- N0->getFlags().hasExact()) {
- if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
- uint64_t C1 = N0C1->getZExtValue();
- uint64_t C2 = N1C->getZExtValue();
- SDLoc DL(N);
- if (C1 <= C2)
- return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
- DAG.getConstant(C2 - C1, DL, ShiftVT));
- return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
- DAG.getConstant(C1 - C2, DL, ShiftVT));
+ if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) {
+ auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
+ ConstantSDNode *RHS) {
+ const APInt &LHSC = LHS->getAPIntValue();
+ const APInt &RHSC = RHS->getAPIntValue();
+ return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
+ LHSC.getZExtValue() <= RHSC.getZExtValue();
+ };
+
+ SDLoc DL(N);
+
+ // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
+ // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2
+ if (N0->getFlags().hasExact()) {
+ if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
+ /*AllowUndefs*/ false,
+ /*AllowTypeMismatch*/ true)) {
+ SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
+ SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
+ return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
+ }
+ if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
+ /*AllowUndefs*/ false,
+ /*AllowTypeMismatch*/ true)) {
+ SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
+ SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
+ return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Diff);
+ }
}
- }
- // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
- // (and (srl x, (sub c1, c2), MASK)
- // Only fold this if the inner shift has no other uses -- if it does, folding
- // this will increase the total number of instructions.
- // TODO - drop hasOneUse requirement if c1 == c2?
- // TODO - support non-uniform vector shift amounts.
- if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
- TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
- if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
- if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
- uint64_t c1 = N0C1->getZExtValue();
- uint64_t c2 = N1C->getZExtValue();
- APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
- SDValue Shift;
- if (c2 > c1) {
- Mask <<= c2 - c1;
- SDLoc DL(N);
- Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
- DAG.getConstant(c2 - c1, DL, ShiftVT));
- } else {
- Mask.lshrInPlace(c1 - c2);
- SDLoc DL(N);
- Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
- DAG.getConstant(c1 - c2, DL, ShiftVT));
- }
- SDLoc DL(N0);
- return DAG.getNode(ISD::AND, DL, VT, Shift,
- DAG.getConstant(Mask, DL, VT));
+ // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
+ // (and (srl x, (sub c1, c2), MASK)
+ // Only fold this if the inner shift has no other uses -- if it does,
+ // folding this will increase the total number of instructions.
+ if (N0.getOpcode() == ISD::SRL &&
+ (N0.getOperand(1) == N1 || N0.hasOneUse()) &&
+ TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
+ if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
+ /*AllowUndefs*/ false,
+ /*AllowTypeMismatch*/ true)) {
+ SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
+ SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
+ SDValue Mask = DAG.getAllOnesConstant(DL, VT);
+ Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N01);
+ Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, Diff);
+ SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
+ return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
+ }
+ if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
+ /*AllowUndefs*/ false,
+ /*AllowTypeMismatch*/ true)) {
+ SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
+ SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
+ SDValue Mask = DAG.getAllOnesConstant(DL, VT);
+ Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N1);
+ SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
+ return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
}
}
}
@@ -8651,7 +9034,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// Variant of version done on multiply, except mul by a power of 2 is turned
// into a shift.
if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
- N0.getNode()->hasOneUse() &&
+ N0->hasOneUse() &&
isConstantOrConstantVector(N1, /* No Opaques */ true) &&
isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
TLI.isDesirableToCommuteWithShift(N, Level)) {
@@ -8663,14 +9046,14 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
}
// fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
- if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
- isConstantOrConstantVector(N1, /* No Opaques */ true) &&
- isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
- SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
- if (isConstantOrConstantVector(Shl))
+ if (N0.getOpcode() == ISD::MUL && N0->hasOneUse()) {
+ SDValue N01 = N0.getOperand(1);
+ if (SDValue Shl =
+ DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1}))
return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
}
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N1C && !N1C->isOpaque())
if (SDValue NewSHL = visitShiftByConstant(N))
return NewSHL;
@@ -8956,8 +9339,10 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
if (LargeShift->getAPIntValue() == TruncBits) {
SDLoc DL(N);
- SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL,
- getShiftAmountTy(LargeVT));
+ EVT LargeShiftVT = getShiftAmountTy(LargeVT);
+ SDValue Amt = DAG.getZExtOrTrunc(N1, DL, LargeShiftVT);
+ Amt = DAG.getNode(ISD::ADD, DL, LargeShiftVT, Amt,
+ DAG.getConstant(TruncBits, DL, LargeShiftVT));
SDValue SRA =
DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
@@ -8996,6 +9381,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return V;
EVT VT = N0.getValueType();
+ EVT ShiftVT = N1.getValueType();
unsigned OpSizeInBits = VT.getScalarSizeInBits();
// fold (srl c1, c2) -> c1 >>u c2
@@ -9037,7 +9423,6 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
};
if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
SDLoc DL(N);
- EVT ShiftVT = N1.getValueType();
SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
}
@@ -9081,15 +9466,41 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
}
}
- // fold (srl (shl x, c), c) -> (and x, cst2)
- // TODO - (srl (shl x, c1), c2).
- if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
- isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
- SDLoc DL(N);
- SDValue Mask =
- DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
- AddToWorklist(Mask.getNode());
- return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
+ // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
+ // (and (srl x, (sub c2, c1), MASK)
+ if (N0.getOpcode() == ISD::SHL &&
+ (N0.getOperand(1) == N1 || N0->hasOneUse()) &&
+ TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
+ auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
+ ConstantSDNode *RHS) {
+ const APInt &LHSC = LHS->getAPIntValue();
+ const APInt &RHSC = RHS->getAPIntValue();
+ return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
+ LHSC.getZExtValue() <= RHSC.getZExtValue();
+ };
+ if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
+ /*AllowUndefs*/ false,
+ /*AllowTypeMismatch*/ true)) {
+ SDLoc DL(N);
+ SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
+ SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
+ SDValue Mask = DAG.getAllOnesConstant(DL, VT);
+ Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01);
+ Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff);
+ SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
+ return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
+ }
+ if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
+ /*AllowUndefs*/ false,
+ /*AllowTypeMismatch*/ true)) {
+ SDLoc DL(N);
+ SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
+ SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
+ SDValue Mask = DAG.getAllOnesConstant(DL, VT);
+ Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1);
+ SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
+ return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
+ }
}
// fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
@@ -9345,6 +9756,21 @@ SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, {N0, N1}))
return C;
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+
+ if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) {
+ // fold (sshlsat x, c) -> (shl x, c)
+ if (N->getOpcode() == ISD::SSHLSAT && N1C &&
+ N1C->getAPIntValue().ult(DAG.ComputeNumSignBits(N0)))
+ return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1);
+
+ // fold (ushlsat x, c) -> (shl x, c)
+ if (N->getOpcode() == ISD::USHLSAT && N1C &&
+ N1C->getAPIntValue().ule(
+ DAG.computeKnownBits(N0).countMinLeadingZeros()))
+ return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1);
+ }
+
return SDValue();
}
@@ -9368,18 +9794,27 @@ static SDValue combineABSToABD(SDNode *N, SelectionDAG &DAG,
(Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND))
return SDValue();
+ EVT VT = N->getValueType(0);
EVT VT1 = Op0.getOperand(0).getValueType();
EVT VT2 = Op1.getOperand(0).getValueType();
- // Check if the operands are of same type and valid size.
unsigned ABDOpcode = (Opc0 == ISD::SIGN_EXTEND) ? ISD::ABDS : ISD::ABDU;
- if (VT1 != VT2 || !TLI.isOperationLegalOrCustom(ABDOpcode, VT1))
- return SDValue();
- Op0 = Op0.getOperand(0);
- Op1 = Op1.getOperand(0);
- SDValue ABD =
- DAG.getNode(ABDOpcode, SDLoc(N), Op0->getValueType(0), Op0, Op1);
- return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), ABD);
+ // fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
+ // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
+ // NOTE: Extensions must be equivalent.
+ if (VT1 == VT2 && TLI.isOperationLegalOrCustom(ABDOpcode, VT1)) {
+ Op0 = Op0.getOperand(0);
+ Op1 = Op1.getOperand(0);
+ SDValue ABD = DAG.getNode(ABDOpcode, SDLoc(N), VT1, Op0, Op1);
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, ABD);
+ }
+
+ // fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y))
+ // fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y))
+ if (TLI.isOperationLegalOrCustom(ABDOpcode, VT))
+ return DAG.getNode(ABDOpcode, SDLoc(N), VT, Op0, Op1);
+
+ return SDValue();
}
SDValue DAGCombiner::visitABS(SDNode *N) {
@@ -9405,24 +9840,60 @@ SDValue DAGCombiner::visitABS(SDNode *N) {
SDValue DAGCombiner::visitBSWAP(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ SDLoc DL(N);
// fold (bswap c1) -> c2
if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
- return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
+ return DAG.getNode(ISD::BSWAP, DL, VT, N0);
// fold (bswap (bswap x)) -> x
if (N0.getOpcode() == ISD::BSWAP)
- return N0->getOperand(0);
+ return N0.getOperand(0);
// Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
// isn't supported, it will be expanded to bswap followed by a manual reversal
// of bits in each byte. By placing bswaps before bitreverse, we can remove
// the two bswaps if the bitreverse gets expanded.
if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) {
- SDLoc DL(N);
SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap);
}
+ // fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2))))))
+ // iff x >= bw/2 (i.e. lower half is known zero)
+ unsigned BW = VT.getScalarSizeInBits();
+ if (BW >= 32 && N0.getOpcode() == ISD::SHL && N0.hasOneUse()) {
+ auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), BW / 2);
+ if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
+ ShAmt->getZExtValue() >= (BW / 2) &&
+ (ShAmt->getZExtValue() % 16) == 0 && TLI.isTypeLegal(HalfVT) &&
+ TLI.isTruncateFree(VT, HalfVT) &&
+ (!LegalOperations || hasOperation(ISD::BSWAP, HalfVT))) {
+ SDValue Res = N0.getOperand(0);
+ if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2)))
+ Res = DAG.getNode(ISD::SHL, DL, VT, Res,
+ DAG.getConstant(NewShAmt, DL, getShiftAmountTy(VT)));
+ Res = DAG.getZExtOrTrunc(Res, DL, HalfVT);
+ Res = DAG.getNode(ISD::BSWAP, DL, HalfVT, Res);
+ return DAG.getZExtOrTrunc(Res, DL, VT);
+ }
+ }
+
+ // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
+ // inverse-shift-of-bswap:
+ // bswap (X u<< C) --> (bswap X) u>> C
+ // bswap (X u>> C) --> (bswap X) u<< C
+ if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
+ N0.hasOneUse()) {
+ auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
+ ShAmt->getZExtValue() % 8 == 0) {
+ SDValue NewSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
+ unsigned InverseShift = N0.getOpcode() == ISD::SHL ? ISD::SRL : ISD::SHL;
+ return DAG.getNode(InverseShift, DL, VT, NewSwap, N0.getOperand(1));
+ }
+ }
+
return SDValue();
}
@@ -9673,7 +10144,8 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
if (C1Val.isPowerOf2() && C2Val.isZero()) {
if (VT != MVT::i1)
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
- SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);
+ SDValue ShAmtC =
+ DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL);
return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
}
@@ -9956,7 +10428,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
// Any flags available in a select/setcc fold will be on the setcc as they
// migrated from fcmp
- Flags = N0.getNode()->getFlags();
+ Flags = N0->getFlags();
SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
N2, N0.getOperand(2));
SelectNode->setFlags(Flags);
@@ -10029,14 +10501,19 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
}
-bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) {
+bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled,
+ SelectionDAG &DAG) {
if (!isNullConstant(BasePtr) || Index.getOpcode() != ISD::ADD)
return false;
+ // Only perform the transformation when existing operands can be reused.
+ if (IndexIsScaled)
+ return false;
+
// For now we check only the LHS of the add.
SDValue LHS = Index.getOperand(0);
SDValue SplatVal = DAG.getSplatValue(LHS);
- if (!SplatVal)
+ if (!SplatVal || SplatVal.getValueType() != BasePtr.getValueType())
return false;
BasePtr = SplatVal;
@@ -10045,23 +10522,29 @@ bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) {
}
// Fold sext/zext of index into index type.
-bool refineIndexType(MaskedGatherScatterSDNode *MGS, SDValue &Index,
- bool Scaled, SelectionDAG &DAG) {
+bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT,
+ SelectionDAG &DAG) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ // It's always safe to look through zero extends.
if (Index.getOpcode() == ISD::ZERO_EXTEND) {
SDValue Op = Index.getOperand(0);
- MGS->setIndexType(Scaled ? ISD::UNSIGNED_SCALED : ISD::UNSIGNED_UNSCALED);
- if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
+ if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType(), DataVT)) {
+ IndexType = ISD::UNSIGNED_SCALED;
Index = Op;
return true;
}
+ if (ISD::isIndexTypeSigned(IndexType)) {
+ IndexType = ISD::UNSIGNED_SCALED;
+ return true;
+ }
}
- if (Index.getOpcode() == ISD::SIGN_EXTEND) {
+ // It's only safe to look through sign extends when Index is signed.
+ if (Index.getOpcode() == ISD::SIGN_EXTEND &&
+ ISD::isIndexTypeSigned(IndexType)) {
SDValue Op = Index.getOperand(0);
- MGS->setIndexType(Scaled ? ISD::SIGNED_SCALED : ISD::SIGNED_UNSCALED);
- if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
+ if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType(), DataVT)) {
Index = Op;
return true;
}
@@ -10078,24 +10561,25 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
SDValue Scale = MSC->getScale();
SDValue StoreVal = MSC->getValue();
SDValue BasePtr = MSC->getBasePtr();
+ ISD::MemIndexType IndexType = MSC->getIndexType();
SDLoc DL(N);
// Zap scatters with a zero mask.
if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
return Chain;
- if (refineUniformBase(BasePtr, Index, DAG)) {
+ if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG)) {
SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
- return DAG.getMaskedScatter(
- DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops,
- MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
+ return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
+ DL, Ops, MSC->getMemOperand(), IndexType,
+ MSC->isTruncatingStore());
}
- if (refineIndexType(MSC, Index, MSC->isIndexScaled(), DAG)) {
+ if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
- return DAG.getMaskedScatter(
- DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops,
- MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
+ return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
+ DL, Ops, MSC->getMemOperand(), IndexType,
+ MSC->isTruncatingStore());
}
return SDValue();
@@ -10150,7 +10634,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
// If this is a TRUNC followed by a masked store, fold this into a masked
// truncating store. We can do this even if this is already a masked
// truncstore.
- if ((Value.getOpcode() == ISD::TRUNCATE) && Value.getNode()->hasOneUse() &&
+ if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() &&
MST->isUnindexed() &&
TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
MST->getMemoryVT(), LegalOperations)) {
@@ -10173,26 +10657,25 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) {
SDValue Scale = MGT->getScale();
SDValue PassThru = MGT->getPassThru();
SDValue BasePtr = MGT->getBasePtr();
+ ISD::MemIndexType IndexType = MGT->getIndexType();
SDLoc DL(N);
// Zap gathers with a zero mask.
if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
return CombineTo(N, PassThru, MGT->getChain());
- if (refineUniformBase(BasePtr, Index, DAG)) {
+ if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG)) {
SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
- return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
- MGT->getMemoryVT(), DL, Ops,
- MGT->getMemOperand(), MGT->getIndexType(),
- MGT->getExtensionType());
+ return DAG.getMaskedGather(
+ DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
+ Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
}
- if (refineIndexType(MGT, Index, MGT->isIndexScaled(), DAG)) {
+ if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
- return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
- MGT->getMemoryVT(), DL, Ops,
- MGT->getMemOperand(), MGT->getIndexType(),
- MGT->getExtensionType());
+ return DAG.getMaskedGather(
+ DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
+ Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
}
return SDValue();
@@ -10446,23 +10929,25 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
Other = N1;
}
+ // zext(x) >= y ? trunc(zext(x) - y) : 0
+ // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
+ // zext(x) > y ? trunc(zext(x) - y) : 0
+ // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
+ if (Other && Other.getOpcode() == ISD::TRUNCATE &&
+ Other.getOperand(0).getOpcode() == ISD::SUB &&
+ (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)) {
+ SDValue OpLHS = Other.getOperand(0).getOperand(0);
+ SDValue OpRHS = Other.getOperand(0).getOperand(1);
+ if (LHS == OpLHS && RHS == OpRHS && LHS.getOpcode() == ISD::ZERO_EXTEND)
+ if (SDValue R = getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS,
+ DAG, DL))
+ return R;
+ }
+
if (Other && Other.getNumOperands() == 2) {
SDValue CondRHS = RHS;
SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
- if (Other.getOpcode() == ISD::SUB &&
- LHS.getOpcode() == ISD::ZERO_EXTEND && LHS.getOperand(0) == OpLHS &&
- OpRHS.getOpcode() == ISD::TRUNCATE && OpRHS.getOperand(0) == RHS) {
- // Look for a general sub with unsigned saturation first.
- // zext(x) >= y ? x - trunc(y) : 0
- // --> usubsat(x,trunc(umin(y,SatLimit)))
- // zext(x) > y ? x - trunc(y) : 0
- // --> usubsat(x,trunc(umin(y,SatLimit)))
- if (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)
- return getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS, DAG,
- DL);
- }
-
if (OpLHS == LHS) {
// Look for a general sub with unsigned saturation first.
// x >= y ? x-y : 0 --> usubsat x, y
@@ -10493,8 +10978,8 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
// Another special case: If C was a sign bit, the sub has been
// canonicalized into a xor.
- // FIXME: Would it be better to use computeKnownBits to determine
- // whether it's safe to decanonicalize the xor?
+ // FIXME: Would it be better to use computeKnownBits to
+ // determine whether it's safe to decanonicalize the xor?
// x s< 0 ? x^C : 0 --> usubsat x, C
APInt SplatValue;
if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
@@ -10560,17 +11045,18 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
CC, SDLoc(N), false)) {
AddToWorklist(SCC.getNode());
- if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
- if (!SCCC->isZero())
- return N2; // cond always true -> true val
- else
- return N3; // cond always false -> false val
- } else if (SCC->isUndef()) {
- // When the condition is UNDEF, just return the first operand. This is
- // coherent the DAG creation, no setcc node is created in this case
+ // cond always true -> true val
+ // cond always false -> false val
+ if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode()))
+ return SCCC->isZero() ? N3 : N2;
+
+ // When the condition is UNDEF, just return the first operand. This is
+ // coherent the DAG creation, no setcc node is created in this case
+ if (SCC->isUndef())
return N2;
- } else if (SCC.getOpcode() == ISD::SETCC) {
- // Fold to a simpler select_cc
+
+ // Fold to a simpler select_cc
+ if (SCC.getOpcode() == ISD::SETCC) {
SDValue SelectOp = DAG.getNode(
ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
SCC.getOperand(1), N2, N3, SCC.getOperand(2));
@@ -10853,9 +11339,8 @@ static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
const TargetLowering &TLI) {
bool HasCopyToRegUses = false;
bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
- for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
- UE = N0.getNode()->use_end();
- UI != UE; ++UI) {
+ for (SDNode::use_iterator UI = N0->use_begin(), UE = N0->use_end(); UI != UE;
+ ++UI) {
SDNode *User = *UI;
if (User == N)
continue;
@@ -11187,9 +11672,12 @@ static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
bool LegalOperations, SDNode *N, SDValue N0,
ISD::LoadExtType ExtLoadType,
ISD::NodeType ExtOpc) {
+ // TODO: isFixedLengthVector() should be removed and any negative effects on
+ // code generation being the result of that target's implementation of
+ // isVectorLoadExtDesirable().
if (!ISD::isNON_EXTLoad(N0.getNode()) ||
!ISD::isUNINDEXEDLoad(N0.getNode()) ||
- ((LegalOperations || VT.isVector() ||
+ ((LegalOperations || VT.isFixedLengthVector() ||
!cast<LoadSDNode>(N0)->isSimple()) &&
!TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
return {};
@@ -11413,6 +11901,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
+ // sext(undef) = 0 because the top bit will all be the same.
+ if (N0.isUndef())
+ return DAG.getConstant(0, DL, VT);
+
if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
return Res;
@@ -11582,10 +12074,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// Return SDValue here as the xor should have already been replaced in
// this sext.
return SDValue();
- } else {
- // Return a new sext with the new xor.
- return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
}
+
+ // Return a new sext with the new xor.
+ return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
}
SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
@@ -11658,6 +12150,10 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ // zext(undef) = 0
+ if (N0.isUndef())
+ return DAG.getConstant(0, SDLoc(N), VT);
+
if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
return Res;
@@ -11917,6 +12413,10 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ // aext(undef) = undef
+ if (N0.isUndef())
+ return DAG.getUNDEF(VT);
+
if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
return Res;
@@ -11954,11 +12454,10 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
N0.getValueType())) {
SDLoc DL(N);
- SDValue X = N0.getOperand(0).getOperand(0);
- X = DAG.getAnyExtOrTrunc(X, DL, VT);
- APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
- return DAG.getNode(ISD::AND, DL, VT,
- X, DAG.getConstant(Mask, DL, VT));
+ SDValue X = DAG.getAnyExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
+ SDValue Y = DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(1));
+ assert(isa<ConstantSDNode>(Y) && "Expected constant to be folded!");
+ return DAG.getNode(ISD::AND, DL, VT, X, Y);
}
// fold (aext (load x)) -> (aext (truncate (extload x)))
@@ -12086,13 +12585,9 @@ SDValue DAGCombiner::visitAssertExt(SDNode *N) {
// This eliminates the later assert:
// assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
// assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
+ SDLoc DL(N);
SDValue BigA = N0.getOperand(0);
EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
- assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
- "Asserting zero/sign-extended bits to a type larger than the "
- "truncated destination does not provide information");
-
- SDLoc DL(N);
EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
@@ -12108,10 +12603,6 @@ SDValue DAGCombiner::visitAssertExt(SDNode *N) {
Opcode == ISD::AssertZext) {
SDValue BigA = N0.getOperand(0);
EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
- assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
- "Asserting zero/sign-extended bits to a type larger than the "
- "truncated destination does not provide information");
-
if (AssertVT.bitsLT(BigA_AssertVT)) {
SDLoc DL(N);
SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
@@ -12229,13 +12720,11 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
unsigned ActiveBits = 0;
if (Mask.isMask()) {
ActiveBits = Mask.countTrailingOnes();
- } else if (Mask.isShiftedMask()) {
- ShAmt = Mask.countTrailingZeros();
- APInt ShiftedMask = Mask.lshr(ShAmt);
- ActiveBits = ShiftedMask.countTrailingOnes();
+ } else if (Mask.isShiftedMask(ShAmt, ActiveBits)) {
HasShiftedOffset = true;
- } else
+ } else {
return SDValue();
+ }
ExtType = ISD::ZEXTLOAD;
ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
@@ -12852,21 +13341,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
- // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
- // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
- // When the adde's carry is not used.
- if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
- N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
- // We only do for addcarry before legalize operation
- ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
- TLI.isOperationLegal(N0.getOpcode(), VT))) {
- SDLoc SL(N);
- auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
- auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
- auto VTs = DAG.getVTList(VT, N0->getValueType(1));
- return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
- }
-
// fold (truncate (extract_subvector(ext x))) ->
// (extract_subvector x)
// TODO: This can be generalized to cover cases where the truncate and extract
@@ -12911,6 +13385,22 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
}
}
break;
+ case ISD::ADDE:
+ case ISD::ADDCARRY:
+ // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
+ // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
+ // When the adde's carry is not used.
+ // We only do for addcarry before legalize operation
+ if (((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
+ TLI.isOperationLegal(N0.getOpcode(), VT)) &&
+ N0.hasOneUse() && !N0->hasAnyUseOfValue(1)) {
+ SDLoc DL(N);
+ SDValue X = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
+ SDValue Y = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
+ SDVTList VTs = DAG.getVTList(VT, N0->getValueType(1));
+ return DAG.getNode(N0.getOpcode(), DL, VTs, X, Y, N0.getOperand(2));
+ }
+ break;
case ISD::USUBSAT:
// Truncate the USUBSAT only if LHS is a known zero-extension, its not
// enough to know that the upper bits are zero we must ensure that we don't
@@ -13044,7 +13534,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
(!LegalTypes ||
(!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
TLI.isTypeLegal(VT.getVectorElementType()))) &&
- N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
+ N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() &&
cast<BuildVectorSDNode>(N0)->isConstant())
return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
VT.getVectorElementType());
@@ -13112,8 +13602,8 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
// This often reduces constant pool loads.
if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
(N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
- N0.getNode()->hasOneUse() && VT.isInteger() &&
- !VT.isVector() && !N0.getValueType().isVector()) {
+ N0->hasOneUse() && VT.isInteger() && !VT.isVector() &&
+ !N0.getValueType().isVector()) {
SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
AddToWorklist(NewConv.getNode());
@@ -13161,9 +13651,9 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
// (xor (bitcast cst), (bitcast x)), 0),
// signbit)
// (xor (bitcast cst) (build_pair flipbit, flipbit))
- if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
- isa<ConstantFPSDNode>(N0.getOperand(0)) &&
- VT.isInteger() && !VT.isVector()) {
+ if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
+ isa<ConstantFPSDNode>(N0.getOperand(0)) && VT.isInteger() &&
+ !VT.isVector()) {
unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
if (isTypeLegal(IntXVT)) {
@@ -13245,8 +13735,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
if (Op.getOpcode() == ISD::BITCAST &&
Op.getOperand(0).getValueType() == VT)
return SDValue(Op.getOperand(0));
- if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
- ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
+ if (Op.isUndef() || isAnyConstantBuildVector(Op))
return DAG.getBitcast(VT, Op);
return SDValue();
};
@@ -13286,6 +13775,14 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
return N0;
+ // Fold freeze(bitcast(x)) -> bitcast(freeze(x)).
+ // TODO: Replace with pushFreezeToPreventPoisonFromPropagating fold.
+ if (N0.getOpcode() == ISD::BITCAST)
+ return DAG.getBitcast(N->getValueType(0),
+ DAG.getNode(ISD::FREEZE, SDLoc(N0),
+ N0.getOperand(0).getValueType(),
+ N0.getOperand(0)));
+
return SDValue();
}
@@ -13377,7 +13874,7 @@ static bool isContractableFMUL(const TargetOptions &Options, SDValue N) {
// Returns true if `N` can assume no infinities involved in its computation.
static bool hasNoInfs(const TargetOptions &Options, SDValue N) {
- return Options.NoInfsFPMath || N.getNode()->getFlags().hasNoInfs();
+ return Options.NoInfsFPMath || N->getFlags().hasNoInfs();
}
/// Try to perform FMA combining on a given FADD node.
@@ -13431,7 +13928,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
// prefer to fold the multiply with fewer uses.
if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
- if (N0.getNode()->use_size() > N1.getNode()->use_size())
+ if (N0->use_size() > N1->use_size())
std::swap(N0, N1);
}
@@ -13661,7 +14158,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
// prefer to fold the multiply with fewer uses.
if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
- (N0.getNode()->use_size() > N1.getNode()->use_size())) {
+ (N0->use_size() > N1->use_size())) {
// fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
if (SDValue V = tryToFoldXSubYZ(N0, N1))
return V;
@@ -14784,7 +15281,7 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
// fold (frem c1, c2) -> fmod(c1,c2)
if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, SDLoc(N), VT, {N0, N1}))
return C;
-
+
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -15107,7 +15604,7 @@ static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
// This means this is also safe for a signed input and unsigned output, since
// a negative input would lead to undefined behavior.
unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
- unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
+ unsigned OutputSize = (int)VT.getScalarSizeInBits();
unsigned ActualSize = std::min(InputSize, OutputSize);
const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
@@ -15198,7 +15695,7 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
}
// fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
- if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
+ if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse()) {
SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
N0.getOperand(0), N1);
AddToWorklist(Tmp.getNode());
@@ -15642,7 +16139,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
// If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
// out. There is no reason to make this a preinc/predec.
if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
- Ptr.getNode()->hasOneUse())
+ Ptr->hasOneUse())
return false;
// Ask the target to do addressing mode selection.
@@ -15702,8 +16199,8 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
// a copy of the original base pointer.
SmallVector<SDNode *, 16> OtherUses;
if (isa<ConstantSDNode>(Offset))
- for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
- UE = BasePtr.getNode()->use_end();
+ for (SDNode::use_iterator UI = BasePtr->use_begin(),
+ UE = BasePtr->use_end();
UI != UE; ++UI) {
SDUse &Use = UI.getUse();
// Skip the use that is Ptr and uses of other results from BasePtr's
@@ -15741,7 +16238,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
// Now check for #3 and #4.
bool RealUse = false;
- for (SDNode *Use : Ptr.getNode()->uses()) {
+ for (SDNode *Use : Ptr->uses()) {
if (Use == N)
continue;
if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
@@ -15774,7 +16271,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
++PreIndexedNodes;
++NodesCombined;
LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
- Result.getNode()->dump(&DAG); dbgs() << '\n');
+ Result.dump(&DAG); dbgs() << '\n');
WorklistRemover DeadNodes(*this);
if (IsLoad) {
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
@@ -15864,7 +16361,7 @@ static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse,
return false;
SmallPtrSet<const SDNode *, 32> Visited;
- for (SDNode *Use : BasePtr.getNode()->uses()) {
+ for (SDNode *Use : BasePtr->uses()) {
if (Use == Ptr.getNode())
continue;
@@ -15901,7 +16398,7 @@ static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad,
const TargetLowering &TLI) {
if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad,
IsMasked, Ptr, TLI) ||
- Ptr.getNode()->hasOneUse())
+ Ptr->hasOneUse())
return nullptr;
// Try turning it into a post-indexed load / store except when
@@ -15961,9 +16458,8 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
BasePtr, Offset, AM);
++PostIndexedNodes;
++NodesCombined;
- LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
- dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
- dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: ";
+ Result.dump(&DAG); dbgs() << '\n');
WorklistRemover DeadNodes(*this);
if (IsLoad) {
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
@@ -16204,7 +16700,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
// Now we replace use of chain2 with chain1. This makes the second load
// isomorphic to the one we are deleting, and thus makes this load live.
LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
- dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
+ dbgs() << "\nWith chain: "; Chain.dump(&DAG);
dbgs() << "\n");
WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
@@ -16235,7 +16731,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
} else
Index = DAG.getUNDEF(N->getValueType(1));
LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
- dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
+ dbgs() << "\nWith: "; Undef.dump(&DAG);
dbgs() << " and 2 other values\n");
WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
@@ -16947,11 +17443,19 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
// Check that it is legal on the target to do this. It is legal if the new
// VT we're shrinking to (i8/i16/i32) is legal or we're still before type
- // legalization (and the target doesn't explicitly think this is a bad idea).
+ // legalization. If the source type is legal, but the store type isn't, see
+ // if we can use a truncating store.
MVT VT = MVT::getIntegerVT(NumBytes * 8);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (!DC->isTypeLegal(VT))
+ bool UseTruncStore;
+ if (DC->isTypeLegal(VT))
+ UseTruncStore = false;
+ else if (TLI.isTypeLegal(IVal.getValueType()) &&
+ TLI.isTruncStoreLegal(IVal.getValueType(), VT))
+ UseTruncStore = true;
+ else
return SDValue();
+ // Check that the target doesn't think this is a bad idea.
if (St->getMemOperand() &&
!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
*St->getMemOperand()))
@@ -16979,10 +17483,15 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(StOffset), DL);
}
+ ++OpsNarrowed;
+ if (UseTruncStore)
+ return DAG.getTruncStore(St->getChain(), SDLoc(St), IVal, Ptr,
+ St->getPointerInfo().getWithOffset(StOffset),
+ VT, St->getOriginalAlign());
+
// Truncate down to the new size.
IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
- ++OpsNarrowed;
return DAG
.getStore(St->getChain(), SDLoc(St), IVal, Ptr,
St->getPointerInfo().getWithOffset(StOffset),
@@ -17003,11 +17512,15 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
SDValue Ptr = ST->getBasePtr();
EVT VT = Value.getValueType();
- if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
+ if (ST->isTruncatingStore() || VT.isVector())
return SDValue();
unsigned Opc = Value.getOpcode();
+ if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
+ !Value.hasOneUse())
+ return SDValue();
+
// If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
// is a byte mask indicating a consecutive number of bytes, check to see if
// Y is known to provide just those bytes. If so, we try to replace the
@@ -17032,8 +17545,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
if (!EnableReduceLoadOpStoreWidth)
return SDValue();
- if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
- Value.getOperand(1).getOpcode() != ISD::Constant)
+ if (Value.getOperand(1).getOpcode() != ISD::Constant)
return SDValue();
SDValue N0 = Value.getOperand(0);
@@ -17189,14 +17701,13 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
// (A + c1) * c3
// (A + c2) * c3
// We're checking for cases where we have common "c3 * A" expressions.
-bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
- SDValue &AddNode,
- SDValue &ConstNode) {
+bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
+ SDValue ConstNode) {
APInt Val;
// If the add only has one use, and the target thinks the folding is
// profitable or does not lead to worse code, this would be OK to do.
- if (AddNode.getNode()->hasOneUse() &&
+ if (AddNode->hasOneUse() &&
TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
return true;
@@ -17330,7 +17841,9 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
if (isa<ConstantFPSDNode>(Val)) {
// Not clear how to truncate FP values.
return false;
- } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
+ }
+
+ if (auto *C = dyn_cast<ConstantSDNode>(Val))
Val = DAG.getConstant(C->getAPIntValue()
.zextOrTrunc(Val.getValueSizeInBits())
.zextOrTrunc(ElementSizeBits),
@@ -17424,7 +17937,7 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
if (!UseTrunc) {
NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(),
- FirstInChain->getAlign(), Flags.getValue(), AAInfo);
+ FirstInChain->getAlign(), *Flags, AAInfo);
} else { // Must be realized as a trunc store
EVT LegalizedStoredValTy =
TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
@@ -17436,7 +17949,7 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
NewStore = DAG.getTruncStore(
NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
- FirstInChain->getAlign(), Flags.getValue(), AAInfo);
+ FirstInChain->getAlign(), *Flags, AAInfo);
}
// Replace all merged stores with the new store.
@@ -17604,11 +18117,9 @@ void DAGCombiner::getStoreMergeCandidates(
}
}
-// We need to check that merging these stores does not cause a loop in
-// the DAG. Any store candidate may depend on another candidate
-// indirectly through its operand (we already consider dependencies
-// through the chain). Check in parallel by searching up from
-// non-chain operands of candidates.
+// We need to check that merging these stores does not cause a loop in the
+// DAG. Any store candidate may depend on another candidate indirectly through
+// its operands. Check in parallel by searching up from operands of candidates.
bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
SDNode *RootNode) {
@@ -17642,8 +18153,13 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
SDNode *N = StoreNodes[i].MemNode;
// Of the 4 Store Operands:
// * Chain (Op 0) -> We have already considered these
- // in candidate selection and can be
- // safely ignored
+ // in candidate selection, but only by following the
+ // chain dependencies. We could still have a chain
+ // dependency to a load, that has a non-chain dep to
+ // another load, that depends on a store, etc. So it is
+ // possible to have dependencies that consist of a mix
+ // of chain and non-chain deps, and we need to include
+ // chain operands in the analysis here..
// * Value (Op 1) -> Cycles may happen (e.g. through load chains)
// * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
// but aren't necessarily fromt the same base node, so
@@ -17651,7 +18167,7 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
// * (Op 3) -> Represents the pre or post-indexing offset (or undef for
// non-indexed stores). Not constant on all targets (e.g. ARM)
// and so can participate in a cycle.
- for (unsigned j = 1; j < N->getNumOperands(); ++j)
+ for (unsigned j = 0; j < N->getNumOperands(); ++j)
Worklist.push_back(N->getOperand(j).getNode());
}
// Search through DAG. We can stop early if we find a store node.
@@ -17726,7 +18242,7 @@ bool DAGCombiner::tryStoreMergeOfConstants(
while (NumConsecutiveStores >= 2) {
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
unsigned FirstStoreAS = FirstInChain->getAddressSpace();
- unsigned FirstStoreAlign = FirstInChain->getAlignment();
+ Align FirstStoreAlign = FirstInChain->getAlign();
unsigned LastLegalType = 1;
unsigned LastLegalVectorType = 1;
bool LastIntegerTrunc = false;
@@ -17814,7 +18330,7 @@ bool DAGCombiner::tryStoreMergeOfConstants(
unsigned NumSkip = 1;
while ((NumSkip < NumConsecutiveStores) &&
(NumSkip < FirstZeroAfterNonZero) &&
- (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
+ (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
NumSkip++;
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
@@ -17853,7 +18369,7 @@ bool DAGCombiner::tryStoreMergeOfExtracts(
while (NumConsecutiveStores >= 2) {
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
unsigned FirstStoreAS = FirstInChain->getAddressSpace();
- unsigned FirstStoreAlign = FirstInChain->getAlignment();
+ Align FirstStoreAlign = FirstInChain->getAlign();
unsigned NumStoresToMerge = 1;
for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
// Find a legal type for the vector store.
@@ -17884,7 +18400,7 @@ bool DAGCombiner::tryStoreMergeOfExtracts(
// improved. Drop as many candidates as we can here.
unsigned NumSkip = 1;
while ((NumSkip < NumConsecutiveStores) &&
- (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
+ (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
NumSkip++;
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
@@ -18181,7 +18697,7 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
for (unsigned i = 0; i < NumElem; ++i) {
SDValue Val = StoreNodes[i].MemNode->getOperand(1);
CombineTo(StoreNodes[i].MemNode, NewStore);
- if (Val.getNode()->use_empty())
+ if (Val->use_empty())
recursivelyDeleteUnusedNodes(Val.getNode());
}
@@ -18331,6 +18847,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
default:
llvm_unreachable("Unknown FP type");
case MVT::f16: // We don't do this for these yet.
+ case MVT::bf16:
case MVT::f80:
case MVT::f128:
case MVT::ppcf128:
@@ -18338,7 +18855,6 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
case MVT::f32:
if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
- ;
Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
bitcastToAPInt().getZExtValue(), SDLoc(CFP),
MVT::i32);
@@ -18350,7 +18866,6 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
ST->isSimple()) ||
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
- ;
Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
getZExtValue(), SDLoc(CFP), MVT::i64);
return DAG.getStore(Chain, DL, Tmp,
@@ -18544,7 +19059,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// truncating store. We can do this even if this is already a truncstore.
if ((Value.getOpcode() == ISD::FP_ROUND ||
Value.getOpcode() == ISD::TRUNCATE) &&
- Value.getNode()->hasOneUse() && ST->isUnindexed() &&
+ Value->hasOneUse() && ST->isUnindexed() &&
TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
ST->getMemoryVT(), LegalOperations)) {
return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
@@ -18807,6 +19322,14 @@ SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
}
}
+ // If we failed to find a match, see if we can replace an UNDEF shuffle
+ // operand.
+ if (ElementOffset == -1 && Y.isUndef() &&
+ InsertVal0.getValueType() == Y.getValueType()) {
+ ElementOffset = Mask.size();
+ Y = InsertVal0;
+ }
+
if (ElementOffset != -1) {
SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
@@ -18905,10 +19428,9 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
if (VT.isScalableVector())
return DAG.getSplatVector(VT, DL, InVal);
- else {
- SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal);
- return DAG.getBuildVector(VT, DL, Ops);
- }
+
+ SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal);
+ return DAG.getBuildVector(VT, DL, Ops);
}
return SDValue();
}
@@ -18920,9 +19442,19 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
// We must know which element is being inserted for folds below here.
unsigned Elt = IndexC->getZExtValue();
+
if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
return Shuf;
+ // Handle <1 x ???> vector insertion special cases.
+ if (VT.getVectorNumElements() == 1) {
+ // insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y
+ if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ InVal.getOperand(0).getValueType() == VT &&
+ isNullConstant(InVal.getOperand(1)))
+ return InVal.getOperand(0);
+ }
+
// Canonicalize insert_vector_elt dag nodes.
// Example:
// (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
@@ -18943,36 +19475,84 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
}
}
- // If we can't generate a legal BUILD_VECTOR, exit
- if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
- return SDValue();
+ // Attempt to fold the insertion into a legal BUILD_VECTOR.
+ if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
+ auto UpdateBuildVector = [&](SmallVectorImpl<SDValue> &Ops) {
+ assert(Ops.size() == NumElts && "Unexpected vector size");
- // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
- // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
- // vector elements.
- SmallVector<SDValue, 8> Ops;
- // Do not combine these two vectors if the output vector will not replace
- // the input vector.
- if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
- Ops.append(InVec.getNode()->op_begin(),
- InVec.getNode()->op_end());
- } else if (InVec.isUndef()) {
- Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
- } else {
- return SDValue();
- }
- assert(Ops.size() == NumElts && "Unexpected vector size");
+ // Insert the element
+ if (Elt < Ops.size()) {
+ // All the operands of BUILD_VECTOR must have the same type;
+ // we enforce that here.
+ EVT OpVT = Ops[0].getValueType();
+ Ops[Elt] =
+ OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
+ }
+
+ // Return the new vector
+ return DAG.getBuildVector(VT, DL, Ops);
+ };
+
+ // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
+ // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
+ // vector elements.
+ SmallVector<SDValue, 8> Ops;
+
+ // Do not combine these two vectors if the output vector will not replace
+ // the input vector.
+ if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
+ Ops.append(InVec->op_begin(), InVec->op_end());
+ return UpdateBuildVector(Ops);
+ }
+
+ if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && InVec.hasOneUse()) {
+ Ops.push_back(InVec.getOperand(0));
+ Ops.append(NumElts - 1, DAG.getUNDEF(InVec.getOperand(0).getValueType()));
+ return UpdateBuildVector(Ops);
+ }
+
+ if (InVec.isUndef()) {
+ Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
+ return UpdateBuildVector(Ops);
+ }
+
+ // If we're inserting into the end of a vector as part of an sequence, see
+ // if we can create a BUILD_VECTOR by following the sequence back up the
+ // chain.
+ if (Elt == (NumElts - 1)) {
+ SmallVector<SDValue> ReverseInsertions;
+ ReverseInsertions.push_back(InVal);
+
+ EVT MaxEltVT = InVal.getValueType();
+ SDValue CurVec = InVec;
+ for (unsigned I = 1; I != NumElts; ++I) {
+ if (CurVec.getOpcode() != ISD::INSERT_VECTOR_ELT || !CurVec.hasOneUse())
+ break;
+
+ auto *CurIdx = dyn_cast<ConstantSDNode>(CurVec.getOperand(2));
+ if (!CurIdx || CurIdx->getAPIntValue() != ((NumElts - 1) - I))
+ break;
+ SDValue CurVal = CurVec.getOperand(1);
+ ReverseInsertions.push_back(CurVal);
+ if (VT.isInteger()) {
+ EVT CurValVT = CurVal.getValueType();
+ MaxEltVT = MaxEltVT.bitsGE(CurValVT) ? MaxEltVT : CurValVT;
+ }
+ CurVec = CurVec.getOperand(0);
+ }
- // Insert the element
- if (Elt < Ops.size()) {
- // All the operands of BUILD_VECTOR must have the same type;
- // we enforce that here.
- EVT OpVT = Ops[0].getValueType();
- Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
+ if (ReverseInsertions.size() == NumElts) {
+ for (unsigned I = 0; I != NumElts; ++I) {
+ SDValue Val = ReverseInsertions[(NumElts - 1) - I];
+ Val = VT.isInteger() ? DAG.getAnyExtOrTrunc(Val, DL, MaxEltVT) : Val;
+ Ops.push_back(Val);
+ }
+ return DAG.getBuildVector(VT, DL, Ops);
+ }
+ }
}
- // Return the new vector
- return DAG.getBuildVector(VT, DL, Ops);
+ return SDValue();
}
SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
@@ -19021,47 +19601,33 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
InVecVT, EltNo);
- // The replacement we need to do here is a little tricky: we need to
- // replace an extractelement of a load with a load.
- // Use ReplaceAllUsesOfValuesWith to do the replacement.
- // Note that this replacement assumes that the extractvalue is the only
- // use of the load; that's okay because we don't want to perform this
- // transformation in other cases anyway.
+ // We are replacing a vector load with a scalar load. The new load must have
+ // identical memory op ordering to the original.
SDValue Load;
- SDValue Chain;
if (ResultVT.bitsGT(VecEltVT)) {
// If the result type of vextract is wider than the load, then issue an
// extending load instead.
- ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
- VecEltVT)
- ? ISD::ZEXTLOAD
- : ISD::EXTLOAD;
- Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
- OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
- Alignment, OriginalLoad->getMemOperand()->getFlags(),
+ ISD::LoadExtType ExtType =
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT) ? ISD::ZEXTLOAD
+ : ISD::EXTLOAD;
+ Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
+ NewPtr, MPI, VecEltVT, Alignment,
+ OriginalLoad->getMemOperand()->getFlags(),
OriginalLoad->getAAInfo());
- Chain = Load.getValue(1);
+ DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
} else {
- Load = DAG.getLoad(
- VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, Alignment,
- OriginalLoad->getMemOperand()->getFlags(), OriginalLoad->getAAInfo());
- Chain = Load.getValue(1);
+ // The result type is narrower or the same width as the vector element
+ Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
+ Alignment, OriginalLoad->getMemOperand()->getFlags(),
+ OriginalLoad->getAAInfo());
+ DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
if (ResultVT.bitsLT(VecEltVT))
- Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
+ Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
else
Load = DAG.getBitcast(ResultVT, Load);
}
- WorklistRemover DeadNodes(*this);
- SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
- SDValue To[] = { Load, Chain };
- DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
- // Make sure to revisit this node to clean it up; it will usually be dead.
- AddToWorklist(EVE);
- // Since we're explicitly calling ReplaceAllUses, add the new node to the
- // worklist explicitly as well.
- AddToWorklistWithUsers(Load.getNode());
++OpsNarrowed;
- return SDValue(EVE, 0);
+ return Load;
}
/// Transform a vector binary operation into a scalar binary operation by moving
@@ -19073,7 +19639,7 @@ static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
SDValue Index = ExtElt->getOperand(1);
auto *IndexC = dyn_cast<ConstantSDNode>(Index);
if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
- Vec.getNode()->getNumValues() != 1)
+ Vec->getNumValues() != 1)
return SDValue();
// Targets may want to avoid this to prevent an expensive register transfer.
@@ -19129,8 +19695,9 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// EXTRACT_VECTOR_ELT may widen the extracted vector.
SDValue InOp = VecOp.getOperand(0);
if (InOp.getValueType() != ScalarVT) {
- assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
- return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
+ assert(InOp.getValueType().isInteger() && ScalarVT.isInteger() &&
+ InOp.getValueType().bitsGT(ScalarVT));
+ return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, InOp);
}
return InOp;
}
@@ -19588,7 +20155,7 @@ SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
if (!isa<ConstantSDNode>(ShiftAmtVal))
return SDValue();
- uint64_t ShiftAmt = In.getNode()->getConstantOperandVal(1);
+ uint64_t ShiftAmt = In.getConstantOperandVal(1);
// The extracted value is not extracted at the right position
if (ShiftAmt != i * ScalarTypeBitsize)
@@ -20029,18 +20596,39 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
int Left = 2 * In;
int Right = 2 * In + 1;
SmallVector<int, 8> Mask(NumElems, -1);
- for (unsigned i = 0; i != NumElems; ++i) {
- if (VectorMask[i] == Left) {
- Mask[i] = i;
- VectorMask[i] = In;
- } else if (VectorMask[i] == Right) {
- Mask[i] = i + NumElems;
- VectorMask[i] = In;
+ SDValue L = Shuffles[Left];
+ ArrayRef<int> LMask;
+ bool IsLeftShuffle = L.getOpcode() == ISD::VECTOR_SHUFFLE &&
+ L.use_empty() && L.getOperand(1).isUndef() &&
+ L.getOperand(0).getValueType() == L.getValueType();
+ if (IsLeftShuffle) {
+ LMask = cast<ShuffleVectorSDNode>(L.getNode())->getMask();
+ L = L.getOperand(0);
+ }
+ SDValue R = Shuffles[Right];
+ ArrayRef<int> RMask;
+ bool IsRightShuffle = R.getOpcode() == ISD::VECTOR_SHUFFLE &&
+ R.use_empty() && R.getOperand(1).isUndef() &&
+ R.getOperand(0).getValueType() == R.getValueType();
+ if (IsRightShuffle) {
+ RMask = cast<ShuffleVectorSDNode>(R.getNode())->getMask();
+ R = R.getOperand(0);
+ }
+ for (unsigned I = 0; I != NumElems; ++I) {
+ if (VectorMask[I] == Left) {
+ Mask[I] = I;
+ if (IsLeftShuffle)
+ Mask[I] = LMask[I];
+ VectorMask[I] = In;
+ } else if (VectorMask[I] == Right) {
+ Mask[I] = I + NumElems;
+ if (IsRightShuffle)
+ Mask[I] = RMask[I] + NumElems;
+ VectorMask[I] = In;
}
}
- Shuffles[In] =
- DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
+ Shuffles[In] = DAG.getVectorShuffle(VT, DL, L, R, Mask);
}
}
return Shuffles[0];
@@ -20628,7 +21216,7 @@ static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue BinOp = Extract->getOperand(0);
unsigned BinOpcode = BinOp.getOpcode();
- if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1)
+ if (!TLI.isBinOp(BinOpcode) || BinOp->getNumValues() != 1)
return SDValue();
EVT VecVT = BinOp.getValueType();
@@ -20677,7 +21265,7 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
unsigned BOpcode = BinOp.getOpcode();
- if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1)
+ if (!TLI.isBinOp(BOpcode) || BinOp->getNumValues() != 1)
return SDValue();
// Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
@@ -20736,8 +21324,8 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
BinOp.getOperand(0), NewExtIndex);
SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
BinOp.getOperand(1), NewExtIndex);
- SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
- BinOp.getNode()->getFlags());
+ SDValue NarrowBinOp =
+ DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, BinOp->getFlags());
return DAG.getBitcast(VT, NarrowBinOp);
}
@@ -21018,6 +21606,12 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
}
}
+ // ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V)
+ if (V.getOpcode() == ISD::SPLAT_VECTOR)
+ if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse())
+ if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT))
+ return DAG.getSplatVector(NVT, SDLoc(N), V.getOperand(0));
+
// Try to move vector bitcast after extract_subv by scaling extraction index:
// extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
if (V.getOpcode() == ISD::BITCAST &&
@@ -21383,9 +21977,10 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
if (SVT != VT.getScalarType())
for (SDValue &Op : Ops)
- Op = TLI.isZExtFree(Op.getValueType(), SVT)
- ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
- : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
+ Op = Op.isUndef() ? DAG.getUNDEF(SVT)
+ : (TLI.isZExtFree(Op.getValueType(), SVT)
+ ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
+ : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT));
return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
}
@@ -21515,6 +22110,13 @@ static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
SelectionDAG &DAG) {
if (!Shuf->getOperand(1).isUndef())
return SDValue();
+
+ // If the inner operand is a known splat with no undefs, just return that directly.
+ // TODO: Create DemandedElts mask from Shuf's mask.
+ // TODO: Allow undef elements and merge with the shuffle code below.
+ if (DAG.isSplatValue(Shuf->getOperand(0), /*AllowUndefs*/ false))
+ return Shuf->getOperand(0);
+
auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
if (!Splat || !Splat->isSplat())
return SDValue();
@@ -21561,6 +22163,53 @@ static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
NewMask);
}
+// Combine shuffles of bitcasts into a shuffle of the bitcast type, providing
+// the mask can be treated as a larger type.
+static SDValue combineShuffleOfBitcast(ShuffleVectorSDNode *SVN,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ bool LegalOperations) {
+ SDValue Op0 = SVN->getOperand(0);
+ SDValue Op1 = SVN->getOperand(1);
+ EVT VT = SVN->getValueType(0);
+ if (Op0.getOpcode() != ISD::BITCAST)
+ return SDValue();
+ EVT InVT = Op0.getOperand(0).getValueType();
+ if (!InVT.isVector() ||
+ (!Op1.isUndef() && (Op1.getOpcode() != ISD::BITCAST ||
+ Op1.getOperand(0).getValueType() != InVT)))
+ return SDValue();
+ if (isAnyConstantBuildVector(Op0.getOperand(0)) &&
+ (Op1.isUndef() || isAnyConstantBuildVector(Op1.getOperand(0))))
+ return SDValue();
+
+ int VTLanes = VT.getVectorNumElements();
+ int InLanes = InVT.getVectorNumElements();
+ if (VTLanes <= InLanes || VTLanes % InLanes != 0 ||
+ (LegalOperations &&
+ !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, InVT)))
+ return SDValue();
+ int Factor = VTLanes / InLanes;
+
+ // Check that each group of lanes in the mask are either undef or make a valid
+ // mask for the wider lane type.
+ ArrayRef<int> Mask = SVN->getMask();
+ SmallVector<int> NewMask;
+ if (!widenShuffleMaskElts(Factor, Mask, NewMask))
+ return SDValue();
+
+ if (!TLI.isShuffleMaskLegal(NewMask, InVT))
+ return SDValue();
+
+ // Create the new shuffle with the new mask and bitcast it back to the
+ // original type.
+ SDLoc DL(SVN);
+ Op0 = Op0.getOperand(0);
+ Op1 = Op1.isUndef() ? DAG.getUNDEF(InVT) : Op1.getOperand(0);
+ SDValue NewShuf = DAG.getVectorShuffle(InVT, DL, Op0, Op1, NewMask);
+ return DAG.getBitcast(VT, NewShuf);
+}
+
/// Combine shuffle of shuffle of the form:
/// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf,
@@ -21772,7 +22421,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
int SplatIndex = SVN->getSplatIndex();
if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
- TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {
+ TLI.isBinOp(N0.getOpcode()) && N0->getNumValues() == 1) {
// splat (vector_bo L, R), Index -->
// splat (scalar_bo (extelt L, Index), (extelt R, Index))
SDValue L = N0.getOperand(0), R = N0.getOperand(1);
@@ -21781,13 +22430,26 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
- SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
- N0.getNode()->getFlags());
+ SDValue NewBO =
+ DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, N0->getFlags());
SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
}
+ // splat(scalar_to_vector(x), 0) -> build_vector(x,...,x)
+ // splat(insert_vector_elt(v, x, c), c) -> build_vector(x,...,x)
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) &&
+ N0.hasOneUse()) {
+ if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && SplatIndex == 0)
+ return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(0));
+
+ if (N0.getOpcode() == ISD::INSERT_VECTOR_ELT)
+ if (auto *Idx = dyn_cast<ConstantSDNode>(N0.getOperand(2)))
+ if (Idx->getAPIntValue() == SplatIndex)
+ return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(1));
+ }
+
// If this is a bit convert that changes the element type of the vector but
// not the number of vector elements, look through it. Be careful not to
// look though conversions that change things like v4f32 to v2f64.
@@ -22011,6 +22673,11 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
}
}
+ // Match shuffles of bitcasts, so long as the mask can be treated as the
+ // larger type.
+ if (SDValue V = combineShuffleOfBitcast(SVN, DAG, TLI, LegalOperations))
+ return V;
+
// Compute the combined shuffle mask for a shuffle with SV0 as the first
// operand, and SV1 as the second operand.
// i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
@@ -22342,6 +23009,11 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
return N1.getOperand(0);
+ // Simplify scalar inserts into an undef vector:
+ // insert_subvector undef, (splat X), N2 -> splat X
+ if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR)
+ return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0));
+
// If we are inserting a bitcast value into an undef, with the same
// number of elements, just use the bitcast input of the extract.
// i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
@@ -22489,6 +23161,16 @@ SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+
+ // fold (fp_to_bf16 (bf16_to_fp op)) -> op
+ if (N0->getOpcode() == ISD::BF16_TO_FP)
+ return N0->getOperand(0);
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N0.getValueType();
@@ -22516,6 +23198,19 @@ SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
}
+ // vecreduce_or(insert_subvector(zero or undef, val)) -> vecreduce_or(val)
+ // vecreduce_and(insert_subvector(ones or undef, val)) -> vecreduce_and(val)
+ if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ TLI.isTypeLegal(N0.getOperand(1).getValueType())) {
+ SDValue Vec = N0.getOperand(0);
+ SDValue Subvec = N0.getOperand(1);
+ if ((Opcode == ISD::VECREDUCE_OR &&
+ (N0.getOperand(0).isUndef() || isNullOrNullSplat(Vec))) ||
+ (Opcode == ISD::VECREDUCE_AND &&
+ (N0.getOperand(0).isUndef() || isAllOnesOrAllOnesSplat(Vec))))
+ return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), Subvec);
+ }
+
return SDValue();
}
@@ -22819,7 +23514,7 @@ SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
// Check to see if we got a select_cc back (to turn into setcc/select).
// Otherwise, just return whatever node we got back, like fabs.
if (SCC.getOpcode() == ISD::SELECT_CC) {
- const SDNodeFlags Flags = N0.getNode()->getFlags();
+ const SDNodeFlags Flags = N0->getFlags();
SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
N0.getValueType(),
SCC.getOperand(0), SCC.getOperand(1),
@@ -23489,6 +24184,27 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
return SDValue();
}
+/// Given an ISD::SREM node expressing a remainder by constant power of 2,
+/// return a DAG expression that will generate the same value.
+SDValue DAGCombiner::BuildSREMPow2(SDNode *N) {
+ ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
+ if (!C)
+ return SDValue();
+
+ // Avoid division by zero.
+ if (C->isZero())
+ return SDValue();
+
+ SmallVector<SDNode *, 8> Built;
+ if (SDValue S = TLI.BuildSREMPow2(N, C->getAPIntValue(), DAG, Built)) {
+ for (SDNode *N : Built)
+ AddToWorklist(N);
+ return S;
+ }
+
+ return SDValue();
+}
+
/// Determines the LogBase2 value for a non-null input value using the
/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
@@ -23798,9 +24514,8 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
auto &Size0 = MUC0.NumBytes;
auto &Size1 = MUC1.NumBytes;
if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
- Size0.hasValue() && Size1.hasValue() && *Size0 == *Size1 &&
- OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 &&
- SrcValOffset1 % *Size1 == 0) {
+ Size0 && Size1 && *Size0 == *Size1 && OrigAlignment0 > *Size0 &&
+ SrcValOffset0 % *Size0 == 0 && SrcValOffset1 % *Size1 == 0) {
int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
@@ -23819,8 +24534,8 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
UseAA = false;
#endif
- if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
- Size0.hasValue() && Size1.hasValue()) {
+ if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() && Size0 &&
+ Size1) {
// Use alias analysis information.
int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset;
@@ -23853,7 +24568,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
unsigned Depth = 0;
// Attempt to improve chain by a single step
- std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool {
+ auto ImproveChain = [&](SDValue &C) -> bool {
switch (C.getOpcode()) {
case ISD::EntryToken:
// No need to mark EntryToken.
diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index d8ef79fe9a7b..ff5779967e22 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -72,7 +72,6 @@
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
@@ -94,7 +93,6 @@
#include "llvm/IR/Value.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -1265,7 +1263,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
// If using instruction referencing, mutate this into a DBG_INSTR_REF,
// to be later patched up by finalizeDebugInstrRefs. Tack a deref onto
// the expression, we don't have an "indirect" flag in DBG_INSTR_REF.
- if (FuncInfo.MF->useDebugInstrRef() && Op->isReg()) {
+ if (UseInstrRefDebugInfo && Op->isReg()) {
Builder->setDesc(TII.get(TargetOpcode::DBG_INSTR_REF));
Builder->getOperand(1).ChangeToImmediate(0);
auto *NewExpr =
@@ -1324,7 +1322,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
// If using instruction referencing, mutate this into a DBG_INSTR_REF,
// to be later patched up by finalizeDebugInstrRefs.
- if (FuncInfo.MF->useDebugInstrRef()) {
+ if (UseInstrRefDebugInfo) {
Builder->setDesc(TII.get(TargetOpcode::DBG_INSTR_REF));
Builder->getOperand(1).ChangeToImmediate(0);
}
@@ -1408,16 +1406,6 @@ bool FastISel::selectCast(const User *I, unsigned Opcode) {
}
bool FastISel::selectBitCast(const User *I) {
- // If the bitcast doesn't change the type, just use the operand value.
- if (I->getType() == I->getOperand(0)->getType()) {
- Register Reg = getRegForValue(I->getOperand(0));
- if (!Reg)
- return false;
- updateValueMap(I, Reg);
- return true;
- }
-
- // Bitcasts of other values become reg-reg copies or BITCAST operators.
EVT SrcEVT = TLI.getValueType(DL, I->getOperand(0)->getType());
EVT DstEVT = TLI.getValueType(DL, I->getType());
if (SrcEVT == MVT::Other || DstEVT == MVT::Other ||
@@ -1431,23 +1419,14 @@ bool FastISel::selectBitCast(const User *I) {
if (!Op0) // Unhandled operand. Halt "fast" selection and bail.
return false;
- // First, try to perform the bitcast by inserting a reg-reg copy.
- Register ResultReg;
+ // If the bitcast doesn't change the type, just use the operand value.
if (SrcVT == DstVT) {
- const TargetRegisterClass *SrcClass = TLI.getRegClassFor(SrcVT);
- const TargetRegisterClass *DstClass = TLI.getRegClassFor(DstVT);
- // Don't attempt a cross-class copy. It will likely fail.
- if (SrcClass == DstClass) {
- ResultReg = createResultReg(DstClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), ResultReg).addReg(Op0);
- }
+ updateValueMap(I, Op0);
+ return true;
}
- // If the reg-reg copy failed, select a BITCAST opcode.
- if (!ResultReg)
- ResultReg = fastEmit_r(SrcVT, DstVT, ISD::BITCAST, Op0);
-
+ // Otherwise, select a BITCAST opcode.
+ Register ResultReg = fastEmit_r(SrcVT, DstVT, ISD::BITCAST, Op0);
if (!ResultReg)
return false;
@@ -2251,6 +2230,11 @@ bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) {
if (!MRI.hasOneUse(LoadReg))
return false;
+ // If the register has fixups, there may be additional uses through a
+ // different alias of the register.
+ if (FuncInfo.RegsWithFixups.contains(LoadReg))
+ return false;
+
MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LoadReg);
MachineInstr *User = RI->getParent();
diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 85c6eca5775e..aa9c77f9cabf 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -31,13 +31,10 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetOptions.h"
#include <algorithm>
using namespace llvm;
@@ -57,7 +54,7 @@ static bool isUsedOutsideOfDefiningBlock(const Instruction *I) {
return false;
}
-static ISD::NodeType getPreferredExtendForValue(const Value *V) {
+static ISD::NodeType getPreferredExtendForValue(const Instruction *I) {
// For the users of the source value being used for compare instruction, if
// the number of signed predicate is greater than unsigned predicate, we
// prefer to use SIGN_EXTEND.
@@ -67,7 +64,7 @@ static ISD::NodeType getPreferredExtendForValue(const Value *V) {
// can be exposed.
ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
unsigned NumOfSigned = 0, NumOfUnsigned = 0;
- for (const User *U : V->users()) {
+ for (const User *U : I->users()) {
if (const auto *CI = dyn_cast<CmpInst>(U)) {
NumOfSigned += CI->isSigned();
NumOfUnsigned += CI->isUnsigned();
@@ -448,9 +445,14 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
IntVT = TLI->getTypeToTransformTo(PN->getContext(), IntVT);
unsigned BitWidth = IntVT.getSizeInBits();
- Register DestReg = ValueMap[PN];
- if (!Register::isVirtualRegister(DestReg))
+ auto It = ValueMap.find(PN);
+ if (It == ValueMap.end())
return;
+
+ Register DestReg = It->second;
+ if (DestReg == 0)
+ return
+ assert(Register::isVirtualRegister(DestReg) && "Expected a virtual reg");
LiveOutRegInfo.grow(DestReg);
LiveOutInfo &DestLOI = LiveOutRegInfo[DestReg];
@@ -462,7 +464,11 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
}
if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
- APInt Val = CI->getValue().zextOrTrunc(BitWidth);
+ APInt Val;
+ if (TLI->signExtendConstant(CI))
+ Val = CI->getValue().sext(BitWidth);
+ else
+ Val = CI->getValue().zext(BitWidth);
DestLOI.NumSignBits = Val.getNumSignBits();
DestLOI.Known = KnownBits::makeConstant(Val);
} else {
@@ -494,7 +500,11 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
}
if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
- APInt Val = CI->getValue().zextOrTrunc(BitWidth);
+ APInt Val;
+ if (TLI->signExtendConstant(CI))
+ Val = CI->getValue().sext(BitWidth);
+ else
+ Val = CI->getValue().zext(BitWidth);
DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, Val.getNumSignBits());
DestLOI.Known.Zero &= ~Val;
DestLOI.Known.One &= Val;
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 331e0325aea3..3d3b504c6abd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -14,22 +14,18 @@
#include "InstrEmitter.h"
#include "SDNodeDbgValue.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/PseudoProbe.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -321,8 +317,15 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB,
OpRC = TII->getRegClass(*II, IIOpNum, TRI, *MF);
if (OpRC) {
+ unsigned MinNumRegs = MinRCSize;
+ // Don't apply any RC size limit for IMPLICIT_DEF. Each use has a unique
+ // virtual register.
+ if (Op.isMachineOpcode() &&
+ Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF)
+ MinNumRegs = 0;
+
const TargetRegisterClass *ConstrainedRC
- = MRI->constrainRegClass(VReg, OpRC, MinRCSize);
+ = MRI->constrainRegClass(VReg, OpRC, MinNumRegs);
if (!ConstrainedRC) {
OpRC = TRI->getAllocatableClass(OpRC);
assert(OpRC && "Constraints cannot be fulfilled for allocation");
@@ -1341,11 +1344,12 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
/// InstrEmitter - Construct an InstrEmitter and set it to start inserting
/// at the given position in the given block.
InstrEmitter::InstrEmitter(const TargetMachine &TM, MachineBasicBlock *mbb,
- MachineBasicBlock::iterator insertpos)
+ MachineBasicBlock::iterator insertpos,
+ bool UseInstrRefDebugInfo)
: MF(mbb->getParent()), MRI(&MF->getRegInfo()),
TII(MF->getSubtarget().getInstrInfo()),
TRI(MF->getSubtarget().getRegisterInfo()),
TLI(MF->getSubtarget().getTargetLowering()), MBB(mbb),
InsertPos(insertpos) {
- EmitDebugInstrRefs = MF->useDebugInstrRef();
+ EmitDebugInstrRefs = UseInstrRefDebugInfo;
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
index ac8a70156522..ced8f064b9be 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -154,7 +154,8 @@ public:
/// InstrEmitter - Construct an InstrEmitter and set it to start inserting
/// at the given position in the given block.
InstrEmitter(const TargetMachine &TM, MachineBasicBlock *mbb,
- MachineBasicBlock::iterator insertpos);
+ MachineBasicBlock::iterator insertpos,
+ bool UseInstrRefDebugInfo);
private:
void EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 54481b94fdd8..8bdc9410d131 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -13,6 +13,7 @@
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/FloatingPointMode.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
@@ -45,7 +46,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include <algorithm>
#include <cassert>
#include <cstdint>
#include <tuple>
@@ -142,12 +142,10 @@ private:
RTLIB::Libcall Call_F128,
RTLIB::Libcall Call_PPCF128,
SmallVectorImpl<SDValue> &Results);
- SDValue ExpandIntLibCall(SDNode *Node, bool isSigned,
- RTLIB::Libcall Call_I8,
- RTLIB::Libcall Call_I16,
- RTLIB::Libcall Call_I32,
- RTLIB::Libcall Call_I64,
- RTLIB::Libcall Call_I128);
+ SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8,
+ RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32,
+ RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128,
+ RTLIB::Libcall Call_IEXT);
void ExpandArgFPLibCall(SDNode *Node,
RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64,
RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128,
@@ -1000,6 +998,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);
break;
case ISD::FP_TO_FP16:
+ case ISD::FP_TO_BF16:
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
case ISD::EXTRACT_VECTOR_ELT:
@@ -1036,14 +1035,18 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::STRICT_FSETCC:
case ISD::STRICT_FSETCCS:
case ISD::SETCC:
+ case ISD::VP_SETCC:
case ISD::BR_CC: {
- unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 :
- Node->getOpcode() == ISD::STRICT_FSETCC ? 3 :
- Node->getOpcode() == ISD::STRICT_FSETCCS ? 3 :
- Node->getOpcode() == ISD::SETCC ? 2 : 1;
- unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 :
- Node->getOpcode() == ISD::STRICT_FSETCC ? 1 :
- Node->getOpcode() == ISD::STRICT_FSETCCS ? 1 : 0;
+ unsigned Opc = Node->getOpcode();
+ unsigned CCOperand = Opc == ISD::SELECT_CC ? 4
+ : Opc == ISD::STRICT_FSETCC ? 3
+ : Opc == ISD::STRICT_FSETCCS ? 3
+ : (Opc == ISD::SETCC || Opc == ISD::VP_SETCC) ? 2
+ : 1;
+ unsigned CompareOperand = Opc == ISD::BR_CC ? 2
+ : Opc == ISD::STRICT_FSETCC ? 1
+ : Opc == ISD::STRICT_FSETCCS ? 1
+ : 0;
MVT OpVT = Node->getOperand(CompareOperand).getSimpleValueType();
ISD::CondCode CCCode =
cast<CondCodeSDNode>(Node->getOperand(CCOperand))->get();
@@ -1174,6 +1177,11 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Node->getOpcode(),
cast<VPStoreSDNode>(Node)->getValue().getValueType());
break;
+ case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
+ Action = TLI.getOperationAction(
+ Node->getOpcode(),
+ cast<VPStridedStoreSDNode>(Node)->getValue().getValueType());
+ break;
case ISD::VECREDUCE_FADD:
case ISD::VECREDUCE_FMUL:
case ISD::VECREDUCE_ADD:
@@ -1187,6 +1195,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::VECREDUCE_UMIN:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN:
+ case ISD::IS_FPCLASS:
Action = TLI.getOperationAction(
Node->getOpcode(), Node->getOperand(0).getValueType());
break;
@@ -1212,7 +1221,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
break;
default:
if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
- Action = TargetLowering::Legal;
+ Action = TLI.getCustomOperationAction(*Node);
} else {
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
}
@@ -1723,16 +1732,14 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT,
SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT,
EVT DestVT, const SDLoc &dl,
SDValue Chain) {
- unsigned SrcSize = SrcOp.getValueSizeInBits();
- unsigned SlotSize = SlotVT.getSizeInBits();
- unsigned DestSize = DestVT.getSizeInBits();
+ EVT SrcVT = SrcOp.getValueType();
Type *DestType = DestVT.getTypeForEVT(*DAG.getContext());
Align DestAlign = DAG.getDataLayout().getPrefTypeAlign(DestType);
// Don't convert with stack if the load/store is expensive.
- if ((SrcSize > SlotSize &&
+ if ((SrcVT.bitsGT(SlotVT) &&
!TLI.isTruncStoreLegalOrCustom(SrcOp.getValueType(), SlotVT)) ||
- (SlotSize < DestSize &&
+ (SlotVT.bitsLT(DestVT) &&
!TLI.isLoadExtLegalOrCustom(ISD::EXTLOAD, DestVT, SlotVT)))
return SDValue();
@@ -1750,20 +1757,19 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT,
// later than DestVT.
SDValue Store;
- if (SrcSize > SlotSize)
+ if (SrcVT.bitsGT(SlotVT))
Store = DAG.getTruncStore(Chain, dl, SrcOp, FIPtr, PtrInfo,
SlotVT, SrcAlign);
else {
- assert(SrcSize == SlotSize && "Invalid store");
- Store =
- DAG.getStore(Chain, dl, SrcOp, FIPtr, PtrInfo, SrcAlign);
+ assert(SrcVT.bitsEq(SlotVT) && "Invalid store");
+ Store = DAG.getStore(Chain, dl, SrcOp, FIPtr, PtrInfo, SrcAlign);
}
// Result is a load from the stack slot.
- if (SlotSize == DestSize)
+ if (SlotVT.bitsEq(DestVT))
return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo, DestAlign);
- assert(SlotSize < DestSize && "Unknown extension!");
+ assert(SlotVT.bitsLT(DestVT) && "Unknown extension!");
return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, PtrInfo, SlotVT,
DestAlign);
}
@@ -2101,15 +2107,17 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
ExpandFPLibCall(Node, LC, Results);
}
-SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
- RTLIB::Libcall Call_I8,
- RTLIB::Libcall Call_I16,
- RTLIB::Libcall Call_I32,
- RTLIB::Libcall Call_I64,
- RTLIB::Libcall Call_I128) {
+SDValue SelectionDAGLegalize::ExpandIntLibCall(
+ SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8,
+ RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32, RTLIB::Libcall Call_I64,
+ RTLIB::Libcall Call_I128, RTLIB::Libcall Call_IEXT) {
RTLIB::Libcall LC;
switch (Node->getSimpleValueType(0).SimpleTy) {
- default: llvm_unreachable("Unexpected request for libcall!");
+
+ default:
+ LC = Call_IEXT;
+ break;
+
case MVT::i8: LC = Call_I8; break;
case MVT::i16: LC = Call_I16; break;
case MVT::i32: LC = Call_I32; break;
@@ -2144,7 +2152,11 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
RTLIB::Libcall LC;
switch (Node->getSimpleValueType(0).SimpleTy) {
- default: llvm_unreachable("Unexpected request for libcall!");
+
+ default:
+ LC = isSigned ? RTLIB::SDIVREM_IEXT : RTLIB::UDIVREM_IEXT;
+ break;
+
case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
@@ -2893,6 +2905,18 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Node->getValueType(0), dl)))
Results.push_back(Tmp1);
break;
+ case ISD::BF16_TO_FP: {
+ // Always expand bf16 to f32 casts, they lower to ext + shift.
+ SDValue Op = DAG.getNode(ISD::BITCAST, dl, MVT::i16, Node->getOperand(0));
+ Op = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op);
+ Op = DAG.getNode(
+ ISD::SHL, dl, MVT::i32, Op,
+ DAG.getConstant(16, dl,
+ TLI.getShiftAmountTy(MVT::i32, DAG.getDataLayout())));
+ Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op);
+ Results.push_back(Op);
+ break;
+ }
case ISD::SIGN_EXTEND_INREG: {
EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
EVT VT = Node->getValueType(0);
@@ -2904,7 +2928,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// SIGN_EXTEND_INREG does not guarantee that the high bits are already zero.
// TODO: Do this for vectors too?
- if (ExtraVT.getSizeInBits() == 1) {
+ if (ExtraVT.isScalarInteger() && ExtraVT.getSizeInBits() == 1) {
SDValue One = DAG.getConstant(1, dl, VT);
SDValue And = DAG.getNode(ISD::AND, dl, VT, Node->getOperand(0), One);
SDValue Zero = DAG.getConstant(0, dl, VT);
@@ -3135,6 +3159,15 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::FABS:
Results.push_back(ExpandFABS(Node));
break;
+ case ISD::IS_FPCLASS: {
+ auto CNode = cast<ConstantSDNode>(Node->getOperand(1));
+ auto Test = static_cast<FPClassTest>(CNode->getZExtValue());
+ if (SDValue Expanded =
+ TLI.expandIS_FPCLASS(Node->getValueType(0), Node->getOperand(0),
+ Test, Node->getFlags(), SDLoc(Node), DAG))
+ Results.push_back(Expanded);
+ break;
+ }
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
@@ -3577,18 +3610,26 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Tmp1);
break;
case ISD::SETCC:
+ case ISD::VP_SETCC:
case ISD::STRICT_FSETCC:
case ISD::STRICT_FSETCCS: {
- bool IsStrict = Node->getOpcode() != ISD::SETCC;
+ bool IsVP = Node->getOpcode() == ISD::VP_SETCC;
+ bool IsStrict = Node->getOpcode() == ISD::STRICT_FSETCC ||
+ Node->getOpcode() == ISD::STRICT_FSETCCS;
bool IsSignaling = Node->getOpcode() == ISD::STRICT_FSETCCS;
SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue();
unsigned Offset = IsStrict ? 1 : 0;
Tmp1 = Node->getOperand(0 + Offset);
Tmp2 = Node->getOperand(1 + Offset);
Tmp3 = Node->getOperand(2 + Offset);
- bool Legalized =
- TLI.LegalizeSetCCCondCode(DAG, Node->getValueType(0), Tmp1, Tmp2, Tmp3,
- NeedInvert, dl, Chain, IsSignaling);
+ SDValue Mask, EVL;
+ if (IsVP) {
+ Mask = Node->getOperand(3 + Offset);
+ EVL = Node->getOperand(4 + Offset);
+ }
+ bool Legalized = TLI.LegalizeSetCCCondCode(
+ DAG, Node->getValueType(0), Tmp1, Tmp2, Tmp3, Mask, EVL, NeedInvert, dl,
+ Chain, IsSignaling);
if (Legalized) {
// If we expanded the SETCC by swapping LHS and RHS, or by inverting the
@@ -3598,6 +3639,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Tmp1 = DAG.getNode(Node->getOpcode(), dl, Node->getVTList(),
{Chain, Tmp1, Tmp2, Tmp3}, Node->getFlags());
Chain = Tmp1.getValue(1);
+ } else if (IsVP) {
+ Tmp1 = DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0),
+ {Tmp1, Tmp2, Tmp3, Mask, EVL}, Node->getFlags());
} else {
Tmp1 = DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Tmp1,
Tmp2, Tmp3, Node->getFlags());
@@ -3606,8 +3650,13 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// If we expanded the SETCC by inverting the condition code, then wrap
// the existing SETCC in a NOT to restore the intended condition.
- if (NeedInvert)
- Tmp1 = DAG.getLogicalNOT(dl, Tmp1, Tmp1->getValueType(0));
+ if (NeedInvert) {
+ if (!IsVP)
+ Tmp1 = DAG.getLogicalNOT(dl, Tmp1, Tmp1->getValueType(0));
+ else
+ Tmp1 =
+ DAG.getVPLogicalNOT(dl, Tmp1, Mask, EVL, Tmp1->getValueType(0));
+ }
Results.push_back(Tmp1);
if (IsStrict)
@@ -3622,21 +3671,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// Otherwise, SETCC for the given comparison type must be completely
// illegal; expand it into a SELECT_CC.
+ // FIXME: This drops the mask/evl for VP_SETCC.
EVT VT = Node->getValueType(0);
- int TrueValue;
- switch (TLI.getBooleanContents(Tmp1.getValueType())) {
- case TargetLowering::ZeroOrOneBooleanContent:
- case TargetLowering::UndefinedBooleanContent:
- TrueValue = 1;
- break;
- case TargetLowering::ZeroOrNegativeOneBooleanContent:
- TrueValue = -1;
- break;
- }
+ EVT Tmp1VT = Tmp1.getValueType();
Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, VT, Tmp1, Tmp2,
- DAG.getConstant(TrueValue, dl, VT),
- DAG.getConstant(0, dl, VT),
- Tmp3);
+ DAG.getBoolConstant(true, dl, VT, Tmp1VT),
+ DAG.getBoolConstant(false, dl, VT, Tmp1VT), Tmp3);
Tmp1->setFlags(Node->getFlags());
Results.push_back(Tmp1);
break;
@@ -3692,7 +3732,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
if (!Legalized) {
Legalized = TLI.LegalizeSetCCCondCode(
DAG, getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC,
- NeedInvert, dl, Chain);
+ /*Mask*/ SDValue(), /*EVL*/ SDValue(), NeedInvert, dl, Chain);
assert(Legalized && "Can't legalize SELECT_CC with legal condition!");
@@ -3725,9 +3765,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Tmp3 = Node->getOperand(3); // RHS
Tmp4 = Node->getOperand(1); // CC
- bool Legalized =
- TLI.LegalizeSetCCCondCode(DAG, getSetCCResultType(Tmp2.getValueType()),
- Tmp2, Tmp3, Tmp4, NeedInvert, dl, Chain);
+ bool Legalized = TLI.LegalizeSetCCCondCode(
+ DAG, getSetCCResultType(Tmp2.getValueType()), Tmp2, Tmp3, Tmp4,
+ /*Mask*/ SDValue(), /*EVL*/ SDValue(), NeedInvert, dl, Chain);
(void)Legalized;
assert(Legalized && "Can't legalize BR_CC with legal condition!");
@@ -4068,12 +4108,25 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fpowi.");
if (!TLI.getLibcallName(LC)) {
// Some targets don't have a powi libcall; use pow instead.
- SDValue Exponent = DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node),
- Node->getValueType(0),
- Node->getOperand(1));
- Results.push_back(DAG.getNode(ISD::FPOW, SDLoc(Node),
- Node->getValueType(0), Node->getOperand(0),
- Exponent));
+ if (Node->isStrictFPOpcode()) {
+ SDValue Exponent =
+ DAG.getNode(ISD::STRICT_SINT_TO_FP, SDLoc(Node),
+ {Node->getValueType(0), Node->getValueType(1)},
+ {Node->getOperand(0), Node->getOperand(2)});
+ SDValue FPOW =
+ DAG.getNode(ISD::STRICT_FPOW, SDLoc(Node),
+ {Node->getValueType(0), Node->getValueType(1)},
+ {Exponent.getValue(1), Node->getOperand(1), Exponent});
+ Results.push_back(FPOW);
+ Results.push_back(FPOW.getValue(1));
+ } else {
+ SDValue Exponent =
+ DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node), Node->getValueType(0),
+ Node->getOperand(1));
+ Results.push_back(DAG.getNode(ISD::FPOW, SDLoc(Node),
+ Node->getValueType(0),
+ Node->getOperand(0), Exponent));
+ }
break;
}
unsigned Offset = Node->isStrictFPOpcode() ? 1 : 0;
@@ -4176,6 +4229,13 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
Results.push_back(ExpandLibCall(LC, Node, false));
break;
}
+ case ISD::FP_TO_BF16: {
+ RTLIB::Libcall LC =
+ RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::bf16);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to expand fp_to_bf16");
+ Results.push_back(ExpandLibCall(LC, Node, false));
+ break;
+ }
case ISD::STRICT_SINT_TO_FP:
case ISD::STRICT_UINT_TO_FP:
case ISD::SINT_TO_FP:
@@ -4315,28 +4375,24 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::SUB_PPCF128, Results);
break;
case ISD::SREM:
- Results.push_back(ExpandIntLibCall(Node, true,
- RTLIB::SREM_I8,
- RTLIB::SREM_I16, RTLIB::SREM_I32,
- RTLIB::SREM_I64, RTLIB::SREM_I128));
+ Results.push_back(ExpandIntLibCall(
+ Node, true, RTLIB::SREM_I8, RTLIB::SREM_I16, RTLIB::SREM_I32,
+ RTLIB::SREM_I64, RTLIB::SREM_I128, RTLIB::SREM_IEXT));
break;
case ISD::UREM:
- Results.push_back(ExpandIntLibCall(Node, false,
- RTLIB::UREM_I8,
- RTLIB::UREM_I16, RTLIB::UREM_I32,
- RTLIB::UREM_I64, RTLIB::UREM_I128));
+ Results.push_back(ExpandIntLibCall(
+ Node, false, RTLIB::UREM_I8, RTLIB::UREM_I16, RTLIB::UREM_I32,
+ RTLIB::UREM_I64, RTLIB::UREM_I128, RTLIB::UREM_IEXT));
break;
case ISD::SDIV:
- Results.push_back(ExpandIntLibCall(Node, true,
- RTLIB::SDIV_I8,
- RTLIB::SDIV_I16, RTLIB::SDIV_I32,
- RTLIB::SDIV_I64, RTLIB::SDIV_I128));
+ Results.push_back(ExpandIntLibCall(
+ Node, true, RTLIB::SDIV_I8, RTLIB::SDIV_I16, RTLIB::SDIV_I32,
+ RTLIB::SDIV_I64, RTLIB::SDIV_I128, RTLIB::SDIV_IEXT));
break;
case ISD::UDIV:
- Results.push_back(ExpandIntLibCall(Node, false,
- RTLIB::UDIV_I8,
- RTLIB::UDIV_I16, RTLIB::UDIV_I32,
- RTLIB::UDIV_I64, RTLIB::UDIV_I128));
+ Results.push_back(ExpandIntLibCall(
+ Node, false, RTLIB::UDIV_I8, RTLIB::UDIV_I16, RTLIB::UDIV_I32,
+ RTLIB::UDIV_I64, RTLIB::UDIV_I128, RTLIB::UDIV_IEXT));
break;
case ISD::SDIVREM:
case ISD::UDIVREM:
@@ -4344,10 +4400,9 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
ExpandDivRemLibCall(Node, Results);
break;
case ISD::MUL:
- Results.push_back(ExpandIntLibCall(Node, false,
- RTLIB::MUL_I8,
- RTLIB::MUL_I16, RTLIB::MUL_I32,
- RTLIB::MUL_I64, RTLIB::MUL_I128));
+ Results.push_back(ExpandIntLibCall(
+ Node, false, RTLIB::MUL_I8, RTLIB::MUL_I16, RTLIB::MUL_I32,
+ RTLIB::MUL_I64, RTLIB::MUL_I128, RTLIB::MUL_IEXT));
break;
case ISD::CTLZ_ZERO_UNDEF:
switch (Node->getSimpleValueType(0).SimpleTy) {
@@ -4700,6 +4755,12 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
Tmp3, DAG.getIntPtrConstant(0, dl)));
break;
+ case ISD::STRICT_FADD:
+ case ISD::STRICT_FSUB:
+ case ISD::STRICT_FMUL:
+ case ISD::STRICT_FDIV:
+ case ISD::STRICT_FMINNUM:
+ case ISD::STRICT_FMAXNUM:
case ISD::STRICT_FREM:
case ISD::STRICT_FPOW:
Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other},
@@ -4724,6 +4785,22 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, Tmp3),
DAG.getIntPtrConstant(0, dl)));
break;
+ case ISD::STRICT_FMA:
+ Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other},
+ {Node->getOperand(0), Node->getOperand(1)});
+ Tmp2 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other},
+ {Node->getOperand(0), Node->getOperand(2)});
+ Tmp3 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other},
+ {Node->getOperand(0), Node->getOperand(3)});
+ Tmp4 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Tmp1.getValue(1),
+ Tmp2.getValue(1), Tmp3.getValue(1));
+ Tmp4 = DAG.getNode(Node->getOpcode(), dl, {NVT, MVT::Other},
+ {Tmp4, Tmp1, Tmp2, Tmp3});
+ Tmp4 = DAG.getNode(ISD::STRICT_FP_ROUND, dl, {OVT, MVT::Other},
+ {Tmp4.getValue(1), Tmp4, DAG.getIntPtrConstant(0, dl)});
+ Results.push_back(Tmp4);
+ Results.push_back(Tmp4.getValue(1));
+ break;
case ISD::FCOPYSIGN:
case ISD::FPOWI: {
Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
@@ -4740,6 +4817,16 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Tmp3, DAG.getIntPtrConstant(isTrunc, dl)));
break;
}
+ case ISD::STRICT_FPOWI:
+ Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other},
+ {Node->getOperand(0), Node->getOperand(1)});
+ Tmp2 = DAG.getNode(Node->getOpcode(), dl, {NVT, MVT::Other},
+ {Tmp1.getValue(1), Tmp1, Node->getOperand(2)});
+ Tmp3 = DAG.getNode(ISD::STRICT_FP_ROUND, dl, {OVT, MVT::Other},
+ {Tmp2.getValue(1), Tmp2, DAG.getIntPtrConstant(0, dl)});
+ Results.push_back(Tmp3);
+ Results.push_back(Tmp3.getValue(1));
+ break;
case ISD::FFLOOR:
case ISD::FCEIL:
case ISD::FRINT:
@@ -4764,12 +4851,19 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
break;
case ISD::STRICT_FFLOOR:
case ISD::STRICT_FCEIL:
+ case ISD::STRICT_FRINT:
+ case ISD::STRICT_FNEARBYINT:
case ISD::STRICT_FROUND:
+ case ISD::STRICT_FROUNDEVEN:
+ case ISD::STRICT_FTRUNC:
+ case ISD::STRICT_FSQRT:
case ISD::STRICT_FSIN:
case ISD::STRICT_FCOS:
case ISD::STRICT_FLOG:
+ case ISD::STRICT_FLOG2:
case ISD::STRICT_FLOG10:
case ISD::STRICT_FEXP:
+ case ISD::STRICT_FEXP2:
Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other},
{Node->getOperand(0), Node->getOperand(1)});
Tmp2 = DAG.getNode(Node->getOpcode(), dl, {NVT, MVT::Other},
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 6bf38d7296a8..f464208cd9dc 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -273,6 +273,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) {
+ if (SDValue SelCC = TLI.createSelectForFMINNUM_FMAXNUM(N, DAG))
+ return SoftenFloatRes_SELECT_CC(SelCC.getNode());
return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
RTLIB::FMIN_F32,
RTLIB::FMIN_F64,
@@ -282,6 +284,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) {
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) {
+ if (SDValue SelCC = TLI.createSelectForFMINNUM_FMAXNUM(N, DAG))
+ return SoftenFloatRes_SELECT_CC(SelCC.getNode());
return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
RTLIB::FMAX_F32,
RTLIB::FMAX_F64,
@@ -830,6 +834,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break;
case ISD::STRICT_FP_TO_FP16:
case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes
+ case ISD::FP_TO_BF16:
case ISD::STRICT_FP_ROUND:
case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;
case ISD::STRICT_FP_TO_SINT:
@@ -881,16 +886,19 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
// returns an i16 so doesn't meet the constraints necessary for FP_ROUND.
assert(N->getOpcode() == ISD::FP_ROUND || N->getOpcode() == ISD::FP_TO_FP16 ||
N->getOpcode() == ISD::STRICT_FP_TO_FP16 ||
+ N->getOpcode() == ISD::FP_TO_BF16 ||
N->getOpcode() == ISD::STRICT_FP_ROUND);
bool IsStrict = N->isStrictFPOpcode();
SDValue Op = N->getOperand(IsStrict ? 1 : 0);
EVT SVT = Op.getValueType();
EVT RVT = N->getValueType(0);
- EVT FloatRVT = (N->getOpcode() == ISD::FP_TO_FP16 ||
- N->getOpcode() == ISD::STRICT_FP_TO_FP16)
- ? MVT::f16
- : RVT;
+ EVT FloatRVT = RVT;
+ if (N->getOpcode() == ISD::FP_TO_FP16 ||
+ N->getOpcode() == ISD::STRICT_FP_TO_FP16)
+ FloatRVT = MVT::f16;
+ else if (N->getOpcode() == ISD::FP_TO_BF16)
+ FloatRVT = MVT::bf16;
RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, FloatRVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall");
@@ -2064,9 +2072,13 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_LLRINT(SDNode *N) {
static ISD::NodeType GetPromotionOpcode(EVT OpVT, EVT RetVT) {
if (OpVT == MVT::f16) {
- return ISD::FP16_TO_FP;
+ return ISD::FP16_TO_FP;
} else if (RetVT == MVT::f16) {
- return ISD::FP_TO_FP16;
+ return ISD::FP_TO_FP16;
+ } else if (OpVT == MVT::bf16) {
+ return ISD::BF16_TO_FP;
+ } else if (RetVT == MVT::bf16) {
+ return ISD::FP_TO_BF16;
}
report_fatal_error("Attempt at an invalid promotion-related conversion");
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 8c7b90b6cd33..69fd83bcd7b3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -78,6 +78,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::SELECT:
case ISD::VSELECT:
case ISD::VP_SELECT:
+ case ISD::VP_MERGE:
Res = PromoteIntRes_Select(N);
break;
case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break;
@@ -97,6 +98,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::VP_ASHR: Res = PromoteIntRes_SRA(N); break;
case ISD::SRL:
case ISD::VP_LSHR: Res = PromoteIntRes_SRL(N); break;
+ case ISD::VP_TRUNCATE:
case ISD::TRUNCATE: Res = PromoteIntRes_TRUNCATE(N); break;
case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break;
case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break;
@@ -115,11 +117,12 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::INSERT_VECTOR_ELT:
Res = PromoteIntRes_INSERT_VECTOR_ELT(N); break;
case ISD::BUILD_VECTOR:
- Res = PromoteIntRes_BUILD_VECTOR(N); break;
- case ISD::SCALAR_TO_VECTOR:
- Res = PromoteIntRes_SCALAR_TO_VECTOR(N); break;
+ Res = PromoteIntRes_BUILD_VECTOR(N);
+ break;
case ISD::SPLAT_VECTOR:
- Res = PromoteIntRes_SPLAT_VECTOR(N); break;
+ case ISD::SCALAR_TO_VECTOR:
+ Res = PromoteIntRes_ScalarOp(N);
+ break;
case ISD::STEP_VECTOR: Res = PromoteIntRes_STEP_VECTOR(N); break;
case ISD::CONCAT_VECTORS:
Res = PromoteIntRes_CONCAT_VECTORS(N); break;
@@ -133,6 +136,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break;
+ case ISD::VP_FPTOSI:
+ case ISD::VP_FPTOUI:
case ISD::STRICT_FP_TO_SINT:
case ISD::STRICT_FP_TO_UINT:
case ISD::FP_TO_SINT:
@@ -262,6 +267,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::FSHR:
Res = PromoteIntRes_FunnelShift(N);
break;
+
+ case ISD::IS_FPCLASS:
+ Res = PromoteIntRes_IS_FPCLASS(N);
+ break;
}
// If the result is null then the sub-method took care of registering it.
@@ -435,10 +444,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
// interesting bits will end up at the wrong place.
if (DAG.getDataLayout().isBigEndian()) {
unsigned ShiftAmt = NInVT.getSizeInBits() - InVT.getSizeInBits();
- EVT ShiftAmtTy = TLI.getShiftAmountTy(NOutVT, DAG.getDataLayout());
assert(ShiftAmt < NOutVT.getSizeInBits() && "Too large shift amount!");
Res = DAG.getNode(ISD::SRL, dl, NOutVT, Res,
- DAG.getConstant(ShiftAmt, dl, ShiftAmtTy));
+ DAG.getShiftAmountConstant(ShiftAmt, NOutVT, dl));
}
return Res;
}
@@ -446,13 +454,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
// as the widened input type would be a legal type, we can widen the bitcast
// and handle the promotion after.
if (NOutVT.isVector()) {
- unsigned WidenInSize = NInVT.getSizeInBits();
- unsigned OutSize = OutVT.getSizeInBits();
- if (WidenInSize % OutSize == 0) {
- unsigned Scale = WidenInSize / OutSize;
- EVT WideOutVT = EVT::getVectorVT(*DAG.getContext(),
- OutVT.getVectorElementType(),
- OutVT.getVectorNumElements() * Scale);
+ TypeSize WidenInSize = NInVT.getSizeInBits();
+ TypeSize OutSize = OutVT.getSizeInBits();
+ if (WidenInSize.hasKnownScalarFactor(OutSize)) {
+ unsigned Scale = WidenInSize.getKnownScalarFactor(OutSize);
+ EVT WideOutVT =
+ EVT::getVectorVT(*DAG.getContext(), OutVT.getVectorElementType(),
+ OutVT.getVectorElementCount() * Scale);
if (isTypeLegal(WideOutVT)) {
InOp = DAG.getBitcast(WideOutVT, GetWidenedVector(InOp));
InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OutVT, InOp,
@@ -490,9 +498,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
}
unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
- EVT ShiftVT = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
- DAG.getConstant(DiffBits, dl, ShiftVT));
+ DAG.getShiftAmountConstant(DiffBits, NVT, dl));
}
SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
@@ -512,10 +519,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
}
unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
- EVT ShiftVT = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
return DAG.getNode(ISD::SRL, dl, NVT,
DAG.getNode(ISD::BITREVERSE, dl, NVT, Op),
- DAG.getConstant(DiffBits, dl, ShiftVT));
+ DAG.getShiftAmountConstant(DiffBits, NVT, dl));
}
SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) {
@@ -666,6 +672,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT))
NewOpc = ISD::STRICT_FP_TO_SINT;
+ if (N->getOpcode() == ISD::VP_FPTOUI &&
+ !TLI.isOperationLegal(ISD::VP_FPTOUI, NVT) &&
+ TLI.isOperationLegalOrCustom(ISD::VP_FPTOSI, NVT))
+ NewOpc = ISD::VP_FPTOSI;
+
SDValue Res;
if (N->isStrictFPOpcode()) {
Res = DAG.getNode(NewOpc, dl, {NVT, MVT::Other},
@@ -673,8 +684,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
- } else
+ } else if (NewOpc == ISD::VP_FPTOSI || NewOpc == ISD::VP_FPTOUI) {
+ Res = DAG.getNode(NewOpc, dl, NVT, {N->getOperand(0), N->getOperand(1),
+ N->getOperand(2)});
+ } else {
Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0));
+ }
// Assert that the converted value fits in the original type. If it doesn't
// (eg: because the value being converted is too big), then the result of the
@@ -684,8 +699,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
// before legalization: fp-to-uint16, 65534. -> 0xfffe
// after legalization: fp-to-sint32, 65534. -> 0x0000fffe
return DAG.getNode((N->getOpcode() == ISD::FP_TO_UINT ||
- N->getOpcode() == ISD::STRICT_FP_TO_UINT) ?
- ISD::AssertZext : ISD::AssertSext, dl, NVT, Res,
+ N->getOpcode() == ISD::STRICT_FP_TO_UINT ||
+ N->getOpcode() == ISD::VP_FPTOUI)
+ ? ISD::AssertZext
+ : ISD::AssertSext,
+ dl, NVT, Res,
DAG.getValueType(N->getValueType(0).getScalarType()));
}
@@ -889,8 +907,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) {
}
unsigned SHLAmount = NewBits - OldBits;
- EVT SHVT = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout());
- SDValue ShiftAmount = DAG.getConstant(SHLAmount, dl, SHVT);
+ SDValue ShiftAmount =
+ DAG.getShiftAmountConstant(SHLAmount, PromotedType, dl);
Op1Promoted =
DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount);
if (!IsShift)
@@ -939,14 +957,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) {
// which is extends the values that we clamp to on saturation. This could be
// resolved by shifting one of the operands the same amount, which would
// also shift the result we compare against, then shifting back.
- EVT ShiftTy = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout());
- Op1Promoted = DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted,
- DAG.getConstant(DiffSize, dl, ShiftTy));
+ Op1Promoted =
+ DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted,
+ DAG.getShiftAmountConstant(DiffSize, PromotedType, dl));
SDValue Result = DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted,
Op2Promoted, N->getOperand(2));
unsigned ShiftOp = Signed ? ISD::SRA : ISD::SRL;
return DAG.getNode(ShiftOp, dl, PromotedType, Result,
- DAG.getConstant(DiffSize, dl, ShiftTy));
+ DAG.getShiftAmountConstant(DiffSize, PromotedType, dl));
}
return DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted, Op2Promoted,
N->getOperand(2));
@@ -1043,17 +1061,17 @@ SDValue DAGTypeLegalizer::PromoteIntRes_DIVFIX(SDNode *N) {
TargetLowering::LegalizeAction Action =
TLI.getFixedPointOperationAction(N->getOpcode(), PromotedType, Scale);
if (Action == TargetLowering::Legal || Action == TargetLowering::Custom) {
- EVT ShiftTy = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout());
unsigned Diff = PromotedType.getScalarSizeInBits() -
N->getValueType(0).getScalarSizeInBits();
if (Saturating)
- Op1Promoted = DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted,
- DAG.getConstant(Diff, dl, ShiftTy));
+ Op1Promoted =
+ DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted,
+ DAG.getShiftAmountConstant(Diff, PromotedType, dl));
SDValue Res = DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted,
Op2Promoted, N->getOperand(2));
if (Saturating)
Res = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, PromotedType, Res,
- DAG.getConstant(Diff, dl, ShiftTy));
+ DAG.getShiftAmountConstant(Diff, PromotedType, dl));
return Res;
}
}
@@ -1110,11 +1128,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Select(SDNode *N) {
SDValue RHS = GetPromotedInteger(N->getOperand(2));
unsigned Opcode = N->getOpcode();
- return Opcode == ISD::VP_SELECT
- ? DAG.getNode(Opcode, SDLoc(N), LHS.getValueType(), Mask, LHS, RHS,
- N->getOperand(3))
- : DAG.getNode(Opcode, SDLoc(N), LHS.getValueType(), Mask, LHS,
- RHS);
+ if (Opcode == ISD::VP_SELECT || Opcode == ISD::VP_MERGE)
+ return DAG.getNode(Opcode, SDLoc(N), LHS.getValueType(), Mask, LHS, RHS,
+ N->getOperand(3));
+ return DAG.getNode(Opcode, SDLoc(N), LHS.getValueType(), Mask, LHS, RHS);
}
SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) {
@@ -1167,6 +1184,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
return DAG.getSExtOrTrunc(SetCC, dl, NVT);
}
+SDValue DAGTypeLegalizer::PromoteIntRes_IS_FPCLASS(SDNode *N) {
+ SDLoc DL(N);
+ SDValue Arg = N->getOperand(0);
+ SDValue Test = N->getOperand(1);
+ EVT NResVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.getNode(ISD::IS_FPCLASS, DL, NResVT, Arg, Test);
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) {
SDValue LHS = GetPromotedInteger(N->getOperand(0));
SDValue RHS = N->getOperand(1);
@@ -1265,7 +1290,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Rotate(SDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntRes_FunnelShift(SDNode *N) {
SDValue Hi = GetPromotedInteger(N->getOperand(0));
SDValue Lo = GetPromotedInteger(N->getOperand(1));
- SDValue Amt = GetPromotedInteger(N->getOperand(2));
+ SDValue Amt = N->getOperand(2);
+ if (getTypeAction(Amt.getValueType()) == TargetLowering::TypePromoteInteger)
+ Amt = ZExtPromotedInteger(Amt);
+ EVT AmtVT = Amt.getValueType();
SDLoc DL(N);
EVT OldVT = N->getOperand(0).getValueType();
@@ -1276,7 +1304,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FunnelShift(SDNode *N) {
unsigned NewBits = VT.getScalarSizeInBits();
// Amount has to be interpreted modulo the old bit width.
- Amt = DAG.getNode(ISD::UREM, DL, VT, Amt, DAG.getConstant(OldBits, DL, VT));
+ Amt = DAG.getNode(ISD::UREM, DL, AmtVT, Amt,
+ DAG.getConstant(OldBits, DL, AmtVT));
// If the promoted type is twice the size (or more), then we use the
// traditional funnel 'double' shift codegen. This isn't necessary if the
@@ -1296,13 +1325,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FunnelShift(SDNode *N) {
}
// Shift Lo up to occupy the upper bits of the promoted type.
- SDValue ShiftOffset = DAG.getConstant(NewBits - OldBits, DL, VT);
+ SDValue ShiftOffset = DAG.getConstant(NewBits - OldBits, DL, AmtVT);
Lo = DAG.getNode(ISD::SHL, DL, VT, Lo, ShiftOffset);
// Increase Amount to shift the result into the lower bits of the promoted
// type.
if (IsFSHR)
- Amt = DAG.getNode(ISD::ADD, DL, VT, Amt, ShiftOffset);
+ Amt = DAG.getNode(ISD::ADD, DL, AmtVT, Amt, ShiftOffset);
return DAG.getNode(Opcode, DL, VT, Hi, Lo, Amt);
}
@@ -1336,11 +1365,23 @@ SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
EVT HalfNVT = EVT::getVectorVT(*DAG.getContext(), NVT.getScalarType(),
NumElts.divideCoefficientBy(2));
- EOp1 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp1);
- EOp2 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp2);
-
+ if (N->getOpcode() == ISD::TRUNCATE) {
+ EOp1 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp1);
+ EOp2 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp2);
+ } else {
+ assert(N->getOpcode() == ISD::VP_TRUNCATE &&
+ "Expected VP_TRUNCATE opcode");
+ SDValue MaskLo, MaskHi, EVLLo, EVLHi;
+ std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(1));
+ std::tie(EVLLo, EVLHi) =
+ DAG.SplitEVL(N->getOperand(2), N->getValueType(0), dl);
+ EOp1 = DAG.getNode(ISD::VP_TRUNCATE, dl, HalfNVT, EOp1, MaskLo, EVLLo);
+ EOp2 = DAG.getNode(ISD::VP_TRUNCATE, dl, HalfNVT, EOp2, MaskHi, EVLHi);
+ }
return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, EOp1, EOp2);
}
+ // TODO: VP_TRUNCATE need to handle when TypeWidenVector access to some
+ // targets.
case TargetLowering::TypeWidenVector: {
SDValue WideInOp = GetWidenedVector(InOp);
@@ -1362,6 +1403,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
}
// Truncate to NVT instead of VT
+ if (N->getOpcode() == ISD::VP_TRUNCATE)
+ return DAG.getNode(ISD::VP_TRUNCATE, dl, NVT, Res, N->getOperand(1),
+ N->getOperand(2));
return DAG.getNode(ISD::TRUNCATE, dl, NVT, Res);
}
@@ -1432,6 +1476,19 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO_CARRY(SDNode *N,
}
SDValue DAGTypeLegalizer::PromoteIntRes_ABS(SDNode *N) {
+ EVT OVT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
+
+ // If a larger ABS or SMAX isn't supported by the target, try to expand now.
+ // If we expand later we'll end up sign extending more than just the sra input
+ // in sra+xor+sub expansion.
+ if (!OVT.isVector() &&
+ !TLI.isOperationLegalOrCustomOrPromote(ISD::ABS, NVT) &&
+ !TLI.isOperationLegal(ISD::SMAX, NVT)) {
+ if (SDValue Res = TLI.expandABS(N, DAG))
+ return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), NVT, Res);
+ }
+
SDValue Op0 = SExtPromotedInteger(N->getOperand(0));
return DAG.getNode(ISD::ABS, SDLoc(N), Op0.getValueType(), Op0);
}
@@ -1466,9 +1523,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
if (N->getOpcode() == ISD::UMULO) {
// Unsigned overflow occurred if the high part is non-zero.
unsigned Shift = SmallVT.getScalarSizeInBits();
- EVT ShiftTy = TLI.getShiftAmountTy(Mul.getValueType(), DAG.getDataLayout());
- SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul,
- DAG.getConstant(Shift, DL, ShiftTy));
+ SDValue Hi =
+ DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul,
+ DAG.getShiftAmountConstant(Shift, Mul.getValueType(), DL));
Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi,
DAG.getConstant(0, DL, Hi.getValueType()),
ISD::SETNE);
@@ -1498,7 +1555,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VSCALE(SDNode *N) {
EVT VT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
APInt MulImm = cast<ConstantSDNode>(N->getOperand(0))->getAPIntValue();
- return DAG.getVScale(SDLoc(N), VT, MulImm.sextOrSelf(VT.getSizeInBits()));
+ return DAG.getVScale(SDLoc(N), VT, MulImm.sext(VT.getSizeInBits()));
}
SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
@@ -1578,16 +1635,19 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::CONCAT_VECTORS: Res = PromoteIntOp_CONCAT_VECTORS(N); break;
case ISD::EXTRACT_VECTOR_ELT: Res = PromoteIntOp_EXTRACT_VECTOR_ELT(N); break;
case ISD::INSERT_VECTOR_ELT:
- Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break;
- case ISD::SCALAR_TO_VECTOR:
- Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break;
+ Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);
+ break;
case ISD::SPLAT_VECTOR:
- Res = PromoteIntOp_SPLAT_VECTOR(N); break;
+ case ISD::SCALAR_TO_VECTOR:
+ Res = PromoteIntOp_ScalarOp(N);
+ break;
case ISD::VSELECT:
case ISD::SELECT: Res = PromoteIntOp_SELECT(N, OpNo); break;
case ISD::SELECT_CC: Res = PromoteIntOp_SELECT_CC(N, OpNo); break;
+ case ISD::VP_SETCC:
case ISD::SETCC: Res = PromoteIntOp_SETCC(N, OpNo); break;
case ISD::SIGN_EXTEND: Res = PromoteIntOp_SIGN_EXTEND(N); break;
+ case ISD::VP_SITOFP:
case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break;
case ISD::STRICT_SINT_TO_FP: Res = PromoteIntOp_STRICT_SINT_TO_FP(N); break;
case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N),
@@ -1600,8 +1660,10 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
OpNo); break;
case ISD::MSCATTER: Res = PromoteIntOp_MSCATTER(cast<MaskedScatterSDNode>(N),
OpNo); break;
+ case ISD::VP_TRUNCATE:
case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break;
case ISD::FP16_TO_FP:
+ case ISD::VP_UITOFP:
case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break;
case ISD::STRICT_UINT_TO_FP: Res = PromoteIntOp_STRICT_UINT_TO_FP(N); break;
case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break;
@@ -1614,6 +1676,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::ROTL:
case ISD::ROTR: Res = PromoteIntOp_Shift(N); break;
+ case ISD::FSHL:
+ case ISD::FSHR: Res = PromoteIntOp_FunnelShift(N); break;
+
case ISD::SADDO_CARRY:
case ISD::SSUBO_CARRY:
case ISD::ADDCARRY:
@@ -1848,20 +1913,13 @@ SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N,
N->getOperand(1), Idx), 0);
}
-SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) {
- // Integer SCALAR_TO_VECTOR operands are implicitly truncated, so just promote
- // the operand in place.
+SDValue DAGTypeLegalizer::PromoteIntOp_ScalarOp(SDNode *N) {
+ // Integer SPLAT_VECTOR/SCALAR_TO_VECTOR operands are implicitly truncated,
+ // so just promote the operand in place.
return SDValue(DAG.UpdateNodeOperands(N,
GetPromotedInteger(N->getOperand(0))), 0);
}
-SDValue DAGTypeLegalizer::PromoteIntOp_SPLAT_VECTOR(SDNode *N) {
- // Integer SPLAT_VECTOR operands are implicitly truncated, so just promote the
- // operand in place.
- return SDValue(
- DAG.UpdateNodeOperands(N, GetPromotedInteger(N->getOperand(0))), 0);
-}
-
SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) {
assert(OpNo == 0 && "Only know how to promote the condition!");
SDValue Cond = N->getOperand(0);
@@ -1900,7 +1958,14 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SETCC(SDNode *N, unsigned OpNo) {
PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(2))->get());
// The CC (#2) is always legal.
- return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2)), 0);
+ if (N->getOpcode() == ISD::SETCC)
+ return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2)), 0);
+
+ assert(N->getOpcode() == ISD::VP_SETCC && "Expected VP_SETCC opcode");
+
+ return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2),
+ N->getOperand(3), N->getOperand(4)),
+ 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) {
@@ -1908,6 +1973,11 @@ SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) {
ZExtPromotedInteger(N->getOperand(1))), 0);
}
+SDValue DAGTypeLegalizer::PromoteIntOp_FunnelShift(SDNode *N) {
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1),
+ ZExtPromotedInteger(N->getOperand(2))), 0);
+}
+
SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) {
SDValue Op = GetPromotedInteger(N->getOperand(0));
SDLoc dl(N);
@@ -1917,6 +1987,11 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) {
+ if (N->getOpcode() == ISD::VP_SITOFP)
+ return SDValue(DAG.UpdateNodeOperands(N,
+ SExtPromotedInteger(N->getOperand(0)),
+ N->getOperand(1), N->getOperand(2)),
+ 0);
return SDValue(DAG.UpdateNodeOperands(N,
SExtPromotedInteger(N->getOperand(0))), 0);
}
@@ -1980,8 +2055,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N,
SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N,
unsigned OpNo) {
-
SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
+
if (OpNo == 2) {
// The Mask
EVT DataVT = N->getValueType(0);
@@ -2010,6 +2085,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N,
unsigned OpNo) {
bool TruncateStore = N->isTruncatingStore();
SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
+
if (OpNo == 2) {
// The Mask
EVT DataVT = N->getValue().getValueType();
@@ -2021,9 +2097,6 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N,
NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo));
else
NewOps[OpNo] = ZExtPromotedInteger(N->getOperand(OpNo));
-
- N->setIndexType(TLI.getCanonicalIndexType(N->getIndexType(),
- N->getMemoryVT(), NewOps[OpNo]));
} else {
NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
TruncateStore = true;
@@ -2036,10 +2109,18 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N,
SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) {
SDValue Op = GetPromotedInteger(N->getOperand(0));
+ if (N->getOpcode() == ISD::VP_TRUNCATE)
+ return DAG.getNode(ISD::VP_TRUNCATE, SDLoc(N), N->getValueType(0), Op,
+ N->getOperand(1), N->getOperand(2));
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), Op);
}
SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) {
+ if (N->getOpcode() == ISD::VP_UITOFP)
+ return SDValue(DAG.UpdateNodeOperands(N,
+ ZExtPromotedInteger(N->getOperand(0)),
+ N->getOperand(1), N->getOperand(2)),
+ 0);
return SDValue(DAG.UpdateNodeOperands(N,
ZExtPromotedInteger(N->getOperand(0))), 0);
}
@@ -2468,7 +2549,7 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, const APInt &Amt,
EVT ShTy = N->getOperand(1).getValueType();
if (N->getOpcode() == ISD::SHL) {
- if (Amt.ugt(VTBits)) {
+ if (Amt.uge(VTBits)) {
Lo = Hi = DAG.getConstant(0, DL, NVT);
} else if (Amt.ugt(NVTBits)) {
Lo = DAG.getConstant(0, DL, NVT);
@@ -2489,7 +2570,7 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, const APInt &Amt,
}
if (N->getOpcode() == ISD::SRL) {
- if (Amt.ugt(VTBits)) {
+ if (Amt.uge(VTBits)) {
Lo = Hi = DAG.getConstant(0, DL, NVT);
} else if (Amt.ugt(NVTBits)) {
Lo = DAG.getNode(ISD::SRL, DL,
@@ -2510,7 +2591,7 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, const APInt &Amt,
}
assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
- if (Amt.ugt(VTBits)) {
+ if (Amt.uge(VTBits)) {
Hi = Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
DAG.getConstant(NVTBits - 1, DL, ShTy));
} else if (Amt.ugt(NVTBits)) {
@@ -3132,24 +3213,23 @@ void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) {
GetExpandedInteger(N0, Lo, Hi);
EVT NVT = Lo.getValueType();
- // If we have ADDCARRY, use the expanded form of the sra+add+xor sequence we
- // use in LegalizeDAG. The ADD part of the expansion is based on
- // ExpandIntRes_ADDSUB which also uses ADDCARRY/UADDO after checking that
- // ADDCARRY is LegalOrCustom. Each of the pieces here can be further expanded
+ // If we have SUBCARRY, use the expanded form of the sra+xor+sub sequence we
+ // use in LegalizeDAG. The SUB part of the expansion is based on
+ // ExpandIntRes_ADDSUB which also uses SUBCARRY/USUBO after checking that
+ // SUBCARRY is LegalOrCustom. Each of the pieces here can be further expanded
// if needed. Shift expansion has a special case for filling with sign bits
// so that we will only end up with one SRA.
- bool HasAddCarry = TLI.isOperationLegalOrCustom(
- ISD::ADDCARRY, TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
- if (HasAddCarry) {
- EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
- SDValue Sign =
- DAG.getNode(ISD::SRA, dl, NVT, Hi,
- DAG.getConstant(NVT.getSizeInBits() - 1, dl, ShiftAmtTy));
+ bool HasSubCarry = TLI.isOperationLegalOrCustom(
+ ISD::SUBCARRY, TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
+ if (HasSubCarry) {
+ SDValue Sign = DAG.getNode(
+ ISD::SRA, dl, NVT, Hi,
+ DAG.getShiftAmountConstant(NVT.getSizeInBits() - 1, NVT, dl));
SDVTList VTList = DAG.getVTList(NVT, getSetCCResultType(NVT));
- Lo = DAG.getNode(ISD::UADDO, dl, VTList, Lo, Sign);
- Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, Hi, Sign, Lo.getValue(1));
Lo = DAG.getNode(ISD::XOR, dl, NVT, Lo, Sign);
Hi = DAG.getNode(ISD::XOR, dl, NVT, Hi, Sign);
+ Lo = DAG.getNode(ISD::USUBO, dl, VTList, Lo, Sign);
+ Hi = DAG.getNode(ISD::SUBCARRY, dl, VTList, Hi, Sign, Lo.getValue(1));
return;
}
@@ -3160,8 +3240,8 @@ void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDValue NegLo, NegHi;
SplitInteger(Neg, NegLo, NegHi);
- SDValue HiIsNeg = DAG.getSetCC(dl, getSetCCResultType(NVT),
- DAG.getConstant(0, dl, NVT), Hi, ISD::SETGT);
+ SDValue HiIsNeg = DAG.getSetCC(dl, getSetCCResultType(NVT), Hi,
+ DAG.getConstant(0, dl, NVT), ISD::SETLT);
Lo = DAG.getSelect(dl, NVT, HiIsNeg, NegLo, Lo);
Hi = DAG.getSelect(dl, NVT, HiIsNeg, NegHi, Hi);
}
@@ -3223,12 +3303,11 @@ void DAGTypeLegalizer::ExpandIntRes_FLT_ROUNDS(SDNode *N, SDValue &Lo,
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
unsigned NBitWidth = NVT.getSizeInBits();
- EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
Lo = DAG.getNode(ISD::FLT_ROUNDS_, dl, {NVT, MVT::Other}, N->getOperand(0));
SDValue Chain = Lo.getValue(1);
// The high part is the sign of Lo, as -1 is a valid value for FLT_ROUNDS
Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
- DAG.getConstant(NBitWidth - 1, dl, ShiftAmtTy));
+ DAG.getShiftAmountConstant(NBitWidth - 1, NVT, dl));
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
@@ -3535,8 +3614,7 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
SDValue T = DAG.getNode(ISD::MUL, dl, NVT, LLL, RLL);
SDValue TL = DAG.getNode(ISD::AND, dl, NVT, T, Mask);
- EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
- SDValue Shift = DAG.getConstant(HalfBits, dl, ShiftAmtTy);
+ SDValue Shift = DAG.getShiftAmountConstant(HalfBits, NVT, dl);
SDValue TH = DAG.getNode(ISD::SRL, dl, NVT, T, Shift);
SDValue LLH = DAG.getNode(ISD::SRL, dl, NVT, LL, Shift);
SDValue RLH = DAG.getNode(ISD::SRL, dl, NVT, RL, Shift);
@@ -3667,7 +3745,6 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
unsigned NVTSize = NVT.getScalarSizeInBits();
assert((VTSize == NVTSize * 2) && "Expected the new value type to be half "
"the size of the current value type");
- EVT ShiftTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
// After getting the multiplication result in 4 parts, we need to perform a
// shift right by the amount of the scale to get the result in that scale.
@@ -3690,7 +3767,7 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
// shifting.
uint64_t Part0 = Scale / NVTSize; // Part holding lowest bit needed.
if (Scale % NVTSize) {
- SDValue ShiftAmount = DAG.getConstant(Scale % NVTSize, dl, ShiftTy);
+ SDValue ShiftAmount = DAG.getShiftAmountConstant(Scale % NVTSize, NVT, dl);
Lo = DAG.getNode(ISD::FSHR, dl, NVT, Result[Part0 + 1], Result[Part0],
ShiftAmount);
Hi = DAG.getNode(ISD::FSHR, dl, NVT, Result[Part0 + 2], Result[Part0 + 1],
@@ -3731,8 +3808,9 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
if (!Signed) {
if (Scale < NVTSize) {
// Overflow happened if ((HH | (HL >> Scale)) != 0).
- SDValue HLAdjusted = DAG.getNode(ISD::SRL, dl, NVT, ResultHL,
- DAG.getConstant(Scale, dl, ShiftTy));
+ SDValue HLAdjusted =
+ DAG.getNode(ISD::SRL, dl, NVT, ResultHL,
+ DAG.getShiftAmountConstant(Scale, NVT, dl));
SDValue Tmp = DAG.getNode(ISD::OR, dl, NVT, HLAdjusted, ResultHH);
SatMax = DAG.getSetCC(dl, BoolNVT, Tmp, NVTZero, ISD::SETNE);
} else if (Scale == NVTSize) {
@@ -3740,9 +3818,9 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETNE);
} else if (Scale < VTSize) {
// Overflow happened if ((HH >> (Scale - NVTSize)) != 0).
- SDValue HLAdjusted = DAG.getNode(ISD::SRL, dl, NVT, ResultHL,
- DAG.getConstant(Scale - NVTSize, dl,
- ShiftTy));
+ SDValue HLAdjusted =
+ DAG.getNode(ISD::SRL, dl, NVT, ResultHL,
+ DAG.getShiftAmountConstant(Scale - NVTSize, NVT, dl));
SatMax = DAG.getSetCC(dl, BoolNVT, HLAdjusted, NVTZero, ISD::SETNE);
} else
llvm_unreachable("Scale must be less or equal to VTSize for UMULFIXSAT"
@@ -3901,6 +3979,70 @@ void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
ReplaceValueWith(SDValue(Node, 1), Ovf);
}
+// Emit a call to __udivei4 and friends which require
+// the arguments be based on the stack
+// and extra argument that contains the number of bits of the operands.
+// Returns the result of the call operation.
+static SDValue ExpandExtIntRes_DIVREM(const TargetLowering &TLI,
+ const RTLIB::Libcall &LC,
+ SelectionDAG &DAG, SDNode *N,
+ const SDLoc &DL, const EVT &VT) {
+
+ SDValue InChain = DAG.getEntryNode();
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+
+ // The signature of __udivei4 is
+ // void __udivei4(unsigned int *quo, unsigned int *a, unsigned int *b,
+ // unsigned int bits)
+ EVT ArgVT = N->op_begin()->getValueType();
+ assert(ArgVT.isInteger() && ArgVT.getSizeInBits() > 128 &&
+ "Unexpected argument type for lowering");
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+
+ SDValue Output = DAG.CreateStackTemporary(ArgVT);
+ Entry.Node = Output;
+ Entry.Ty = ArgTy->getPointerTo();
+ Entry.IsSExt = false;
+ Entry.IsZExt = false;
+ Args.push_back(Entry);
+
+ for (const llvm::SDUse &Op : N->ops()) {
+ SDValue StackPtr = DAG.CreateStackTemporary(ArgVT);
+ InChain = DAG.getStore(InChain, DL, Op, StackPtr, MachinePointerInfo());
+ Entry.Node = StackPtr;
+ Entry.Ty = ArgTy->getPointerTo();
+ Entry.IsSExt = false;
+ Entry.IsZExt = false;
+ Args.push_back(Entry);
+ }
+
+ int Bits = N->getOperand(0)
+ .getValueType()
+ .getTypeForEVT(*DAG.getContext())
+ ->getIntegerBitWidth();
+ Entry.Node = DAG.getConstant(Bits, DL, TLI.getPointerTy(DAG.getDataLayout()));
+ Entry.Ty = Type::getInt32Ty(*DAG.getContext());
+ Entry.IsSExt = false;
+ Entry.IsZExt = true;
+ Args.push_back(Entry);
+
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy(DAG.getDataLayout()));
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(DL)
+ .setChain(InChain)
+ .setLibCallee(TLI.getLibcallCallingConv(LC),
+ Type::getVoidTy(*DAG.getContext()), Callee, std::move(Args))
+ .setDiscardResult();
+
+ SDValue Chain = TLI.LowerCallTo(CLI).second;
+
+ return DAG.getLoad(ArgVT, DL, Chain, Output, MachinePointerInfo());
+}
+
void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT VT = N->getValueType(0);
@@ -3922,6 +4064,14 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
LC = RTLIB::SDIV_I64;
else if (VT == MVT::i128)
LC = RTLIB::SDIV_I128;
+
+ else {
+ SDValue Result =
+ ExpandExtIntRes_DIVREM(TLI, RTLIB::SDIV_IEXT, DAG, N, dl, VT);
+ SplitInteger(Result, Lo, Hi);
+ return;
+ }
+
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
TargetLowering::MakeLibCallOptions CallOptions;
@@ -4113,6 +4263,14 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,
LC = RTLIB::SREM_I64;
else if (VT == MVT::i128)
LC = RTLIB::SREM_I128;
+
+ else {
+ SDValue Result =
+ ExpandExtIntRes_DIVREM(TLI, RTLIB::SREM_IEXT, DAG, N, dl, VT);
+ SplitInteger(Result, Lo, Hi);
+ return;
+ }
+
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
TargetLowering::MakeLibCallOptions CallOptions;
@@ -4288,6 +4446,14 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
LC = RTLIB::UDIV_I64;
else if (VT == MVT::i128)
LC = RTLIB::UDIV_I128;
+
+ else {
+ SDValue Result =
+ ExpandExtIntRes_DIVREM(TLI, RTLIB::UDIV_IEXT, DAG, N, dl, VT);
+ SplitInteger(Result, Lo, Hi);
+ return;
+ }
+
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!");
TargetLowering::MakeLibCallOptions CallOptions;
@@ -4315,6 +4481,14 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
LC = RTLIB::UREM_I64;
else if (VT == MVT::i128)
LC = RTLIB::UREM_I128;
+
+ else {
+ SDValue Result =
+ ExpandExtIntRes_DIVREM(TLI, RTLIB::UREM_IEXT, DAG, N, dl, VT);
+ SplitInteger(Result, Lo, Hi);
+ return;
+ }
+
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!");
TargetLowering::MakeLibCallOptions CallOptions;
@@ -5060,7 +5234,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) {
return DAG.getBuildVector(NOutVT, dl, Ops);
}
-SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_ScalarOp(SDNode *N) {
SDLoc dl(N);
@@ -5070,35 +5244,19 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) {
EVT OutVT = N->getValueType(0);
EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
assert(NOutVT.isVector() && "This type must be promoted to a vector type");
- EVT NOutVTElem = NOutVT.getVectorElementType();
-
- SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(0));
-
- return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NOutVT, Op);
-}
-
-SDValue DAGTypeLegalizer::PromoteIntRes_SPLAT_VECTOR(SDNode *N) {
- SDLoc dl(N);
-
- SDValue SplatVal = N->getOperand(0);
-
- assert(!SplatVal.getValueType().isVector() && "Input must be a scalar");
-
- EVT OutVT = N->getValueType(0);
- EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
- assert(NOutVT.isVector() && "Type must be promoted to a vector type");
EVT NOutElemVT = NOutVT.getVectorElementType();
- SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutElemVT, SplatVal);
+ SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutElemVT, N->getOperand(0));
- return DAG.getNode(ISD::SPLAT_VECTOR, dl, NOutVT, Op);
+ return DAG.getNode(N->getOpcode(), dl, NOutVT, Op);
}
SDValue DAGTypeLegalizer::PromoteIntRes_STEP_VECTOR(SDNode *N) {
SDLoc dl(N);
EVT OutVT = N->getValueType(0);
EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
- assert(NOutVT.isVector() && "Type must be promoted to a vector type");
+ assert(NOutVT.isScalableVector() &&
+ "Type must be promoted to a scalable vector type");
APInt StepVal = cast<ConstantSDNode>(N->getOperand(0))->getAPIntValue();
return DAG.getStepVector(dl, NOutVT,
StepVal.sext(NOutVT.getScalarSizeInBits()));
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 03dcd0f6d2c9..8fe9a83b9c3d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -13,10 +13,7 @@
//===----------------------------------------------------------------------===//
#include "LegalizeTypes.h"
-#include "SDNodeDbgValue.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
@@ -86,46 +83,49 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
auto ResId = ValueToIdMap.lookup(Res);
unsigned Mapped = 0;
- if (ResId && (ReplacedValues.find(ResId) != ReplacedValues.end())) {
- Mapped |= 1;
- // Check that remapped values are only used by nodes marked NewNode.
- for (SDNode::use_iterator UI = Node.use_begin(), UE = Node.use_end();
- UI != UE; ++UI)
- if (UI.getUse().getResNo() == i)
- assert(UI->getNodeId() == NewNode &&
- "Remapped value has non-trivial use!");
-
- // Check that the final result of applying ReplacedValues is not
- // marked NewNode.
- auto NewValId = ReplacedValues[ResId];
- auto I = ReplacedValues.find(NewValId);
- while (I != ReplacedValues.end()) {
- NewValId = I->second;
+ if (ResId) {
+ auto I = ReplacedValues.find(ResId);
+ if (I != ReplacedValues.end()) {
+ Mapped |= 1;
+ // Check that remapped values are only used by nodes marked NewNode.
+ for (SDNode::use_iterator UI = Node.use_begin(), UE = Node.use_end();
+ UI != UE; ++UI)
+ if (UI.getUse().getResNo() == i)
+ assert(UI->getNodeId() == NewNode &&
+ "Remapped value has non-trivial use!");
+
+ // Check that the final result of applying ReplacedValues is not
+ // marked NewNode.
+ auto NewValId = I->second;
I = ReplacedValues.find(NewValId);
+ while (I != ReplacedValues.end()) {
+ NewValId = I->second;
+ I = ReplacedValues.find(NewValId);
+ }
+ SDValue NewVal = getSDValue(NewValId);
+ (void)NewVal;
+ assert(NewVal.getNode()->getNodeId() != NewNode &&
+ "ReplacedValues maps to a new node!");
}
- SDValue NewVal = getSDValue(NewValId);
- (void)NewVal;
- assert(NewVal.getNode()->getNodeId() != NewNode &&
- "ReplacedValues maps to a new node!");
+ if (PromotedIntegers.count(ResId))
+ Mapped |= 2;
+ if (SoftenedFloats.count(ResId))
+ Mapped |= 4;
+ if (ScalarizedVectors.count(ResId))
+ Mapped |= 8;
+ if (ExpandedIntegers.count(ResId))
+ Mapped |= 16;
+ if (ExpandedFloats.count(ResId))
+ Mapped |= 32;
+ if (SplitVectors.count(ResId))
+ Mapped |= 64;
+ if (WidenedVectors.count(ResId))
+ Mapped |= 128;
+ if (PromotedFloats.count(ResId))
+ Mapped |= 256;
+ if (SoftPromotedHalfs.count(ResId))
+ Mapped |= 512;
}
- if (ResId && PromotedIntegers.find(ResId) != PromotedIntegers.end())
- Mapped |= 2;
- if (ResId && SoftenedFloats.find(ResId) != SoftenedFloats.end())
- Mapped |= 4;
- if (ResId && ScalarizedVectors.find(ResId) != ScalarizedVectors.end())
- Mapped |= 8;
- if (ResId && ExpandedIntegers.find(ResId) != ExpandedIntegers.end())
- Mapped |= 16;
- if (ResId && ExpandedFloats.find(ResId) != ExpandedFloats.end())
- Mapped |= 32;
- if (ResId && SplitVectors.find(ResId) != SplitVectors.end())
- Mapped |= 64;
- if (ResId && WidenedVectors.find(ResId) != WidenedVectors.end())
- Mapped |= 128;
- if (ResId && PromotedFloats.find(ResId) != PromotedFloats.end())
- Mapped |= 256;
- if (ResId && SoftPromotedHalfs.find(ResId) != SoftPromotedHalfs.end())
- Mapped |= 512;
if (Node.getNodeId() != Processed) {
// Since we allow ReplacedValues to map deleted nodes, it may map nodes
@@ -143,8 +143,16 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
}
} else {
if (Mapped == 0) {
- dbgs() << "Processed value not in any map!";
- Failed = true;
+ SDValue NodeById = IdToValueMap.lookup(ResId);
+ // It is possible the node has been remapped to another node and had
+ // its Id updated in the Value to Id table. The node it remapped to
+ // may not have been processed yet. Look up the Id in the Id to Value
+ // table and re-check the Processed state. If the node hasn't been
+ // remapped we'll get the same state as we got earlier.
+ if (NodeById->getNodeId() == Processed) {
+ dbgs() << "Processed value not in any map!";
+ Failed = true;
+ }
} else if (Mapped & (Mapped - 1)) {
dbgs() << "Value in multiple maps!";
Failed = true;
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 4d8daa82d8c0..de320290bda9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -19,7 +19,6 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
namespace llvm {
@@ -309,8 +308,7 @@ private:
SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N);
SDValue PromoteIntRes_VECTOR_SPLICE(SDNode *N);
SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N);
- SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N);
- SDValue PromoteIntRes_SPLAT_VECTOR(SDNode *N);
+ SDValue PromoteIntRes_ScalarOp(SDNode *N);
SDValue PromoteIntRes_STEP_VECTOR(SDNode *N);
SDValue PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N);
SDValue PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N);
@@ -362,6 +360,7 @@ private:
SDValue PromoteIntRes_ABS(SDNode *N);
SDValue PromoteIntRes_Rotate(SDNode *N);
SDValue PromoteIntRes_FunnelShift(SDNode *N);
+ SDValue PromoteIntRes_IS_FPCLASS(SDNode *N);
// Integer Operand Promotion.
bool PromoteIntegerOperand(SDNode *N, unsigned OpNo);
@@ -377,12 +376,12 @@ private:
SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue PromoteIntOp_INSERT_SUBVECTOR(SDNode *N);
SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N);
- SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N);
- SDValue PromoteIntOp_SPLAT_VECTOR(SDNode *N);
+ SDValue PromoteIntOp_ScalarOp(SDNode *N);
SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_Shift(SDNode *N);
+ SDValue PromoteIntOp_FunnelShift(SDNode *N);
SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N);
SDValue PromoteIntOp_SINT_TO_FP(SDNode *N);
SDValue PromoteIntOp_STRICT_SINT_TO_FP(SDNode *N);
@@ -784,6 +783,7 @@ private:
SDValue ScalarizeVecRes_UNDEF(SDNode *N);
SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N);
SDValue ScalarizeVecRes_FP_TO_XINT_SAT(SDNode *N);
+ SDValue ScalarizeVecRes_IS_FPCLASS(SDNode *N);
SDValue ScalarizeVecRes_FIX(SDNode *N);
@@ -850,6 +850,7 @@ private:
void SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi);
void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi);
@@ -960,6 +961,7 @@ private:
SDValue WidenVecRes_Convert_StrictFP(SDNode *N);
SDValue WidenVecRes_FP_TO_XINT_SAT(SDNode *N);
SDValue WidenVecRes_FCOPYSIGN(SDNode *N);
+ SDValue WidenVecRes_IS_FPCLASS(SDNode *N);
SDValue WidenVecRes_POWI(SDNode *N);
SDValue WidenVecRes_Unary(SDNode *N);
SDValue WidenVecRes_InregOp(SDNode *N);
@@ -985,6 +987,7 @@ private:
SDValue WidenVecOp_Convert(SDNode *N);
SDValue WidenVecOp_FP_TO_XINT_SAT(SDNode *N);
SDValue WidenVecOp_FCOPYSIGN(SDNode *N);
+ SDValue WidenVecOp_IS_FPCLASS(SDNode *N);
SDValue WidenVecOp_VECREDUCE(SDNode *N);
SDValue WidenVecOp_VECREDUCE_SEQ(SDNode *N);
SDValue WidenVecOp_VP_REDUCE(SDNode *N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index abf6a3ac6916..842ffa2aa23e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -26,11 +26,9 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/ISDOpcodes.h"
-#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -41,7 +39,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachineValueType.h"
-#include "llvm/Support/MathExtras.h"
#include <cassert>
#include <cstdint>
#include <iterator>
@@ -464,6 +461,12 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::VPID: { \
EVT LegalizeVT = LEGALPOS < 0 ? Node->getValueType(-(1 + LEGALPOS)) \
: Node->getOperand(LEGALPOS).getValueType(); \
+ if (ISD::VPID == ISD::VP_SETCC) { \
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get(); \
+ Action = TLI.getCondCodeAction(CCCode, LegalizeVT.getSimpleVT()); \
+ if (Action != TargetLowering::Legal) \
+ break; \
+ } \
Action = TLI.getOperationAction(Node->getOpcode(), LegalizeVT); \
} break;
#include "llvm/IR/VPIntrinsics.def"
@@ -747,6 +750,7 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
ExpandFSUB(Node, Results);
return;
case ISD::SETCC:
+ case ISD::VP_SETCC:
ExpandSETCC(Node, Results);
return;
case ISD::ABS:
@@ -1050,10 +1054,7 @@ SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node) {
// Shuffle the incoming lanes into the correct position, and pull all other
// lanes from the zero vector.
- SmallVector<int, 16> ShuffleMask;
- ShuffleMask.reserve(NumSrcElements);
- for (int i = 0; i < NumSrcElements; ++i)
- ShuffleMask.push_back(i);
+ auto ShuffleMask = llvm::to_vector<16>(llvm::seq<int>(0, NumSrcElements));
int ExtLaneScale = NumSrcElements / NumElements;
int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
@@ -1423,6 +1424,7 @@ void VectorLegalizer::ExpandFSUB(SDNode *Node,
void VectorLegalizer::ExpandSETCC(SDNode *Node,
SmallVectorImpl<SDValue> &Results) {
bool NeedInvert = false;
+ bool IsVP = Node->getOpcode() == ISD::VP_SETCC;
SDLoc dl(Node);
MVT OpVT = Node->getOperand(0).getSimpleValueType();
ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get();
@@ -1436,20 +1438,36 @@ void VectorLegalizer::ExpandSETCC(SDNode *Node,
SDValue LHS = Node->getOperand(0);
SDValue RHS = Node->getOperand(1);
SDValue CC = Node->getOperand(2);
- bool Legalized = TLI.LegalizeSetCCCondCode(DAG, Node->getValueType(0), LHS,
- RHS, CC, NeedInvert, dl, Chain);
+ SDValue Mask, EVL;
+ if (IsVP) {
+ Mask = Node->getOperand(3);
+ EVL = Node->getOperand(4);
+ }
+
+ bool Legalized =
+ TLI.LegalizeSetCCCondCode(DAG, Node->getValueType(0), LHS, RHS, CC, Mask,
+ EVL, NeedInvert, dl, Chain);
if (Legalized) {
// If we expanded the SETCC by swapping LHS and RHS, or by inverting the
// condition code, create a new SETCC node.
- if (CC.getNode())
- LHS = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), LHS, RHS, CC,
- Node->getFlags());
+ if (CC.getNode()) {
+ if (!IsVP)
+ LHS = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), LHS, RHS, CC,
+ Node->getFlags());
+ else
+ LHS = DAG.getNode(ISD::VP_SETCC, dl, Node->getValueType(0),
+ {LHS, RHS, CC, Mask, EVL}, Node->getFlags());
+ }
// If we expanded the SETCC by inverting the condition code, then wrap
// the existing SETCC in a NOT to restore the intended condition.
- if (NeedInvert)
- LHS = DAG.getLogicalNOT(dl, LHS, LHS->getValueType(0));
+ if (NeedInvert) {
+ if (!IsVP)
+ LHS = DAG.getLogicalNOT(dl, LHS, LHS->getValueType(0));
+ else
+ LHS = DAG.getVPLogicalNOT(dl, LHS, Mask, EVL, LHS->getValueType(0));
+ }
} else {
// Otherwise, SETCC for the given comparison type must be completely
// illegal; expand it into a SELECT_CC.
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 0bd44ce4c872..fa555be00ded 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -20,7 +20,9 @@
//===----------------------------------------------------------------------===//
#include "LegalizeTypes.h"
+#include "llvm/ADT/SmallBitVector.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TypeSize.h"
@@ -64,6 +66,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break;
case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break;
case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break;
+ case ISD::IS_FPCLASS: R = ScalarizeVecRes_IS_FPCLASS(N); break;
case ISD::ANY_EXTEND_VECTOR_INREG:
case ISD::SIGN_EXTEND_VECTOR_INREG:
case ISD::ZERO_EXTEND_VECTOR_INREG:
@@ -231,9 +234,16 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) {
// Now process the remaining operands.
for (unsigned i = 1; i < NumOpers; ++i) {
SDValue Oper = N->getOperand(i);
+ EVT OperVT = Oper.getValueType();
- if (Oper.getValueType().isVector())
- Oper = GetScalarizedVector(Oper);
+ if (OperVT.isVector()) {
+ if (getTypeAction(OperVT) == TargetLowering::TypeScalarizeVector)
+ Oper = GetScalarizedVector(Oper);
+ else
+ Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ OperVT.getVectorElementType(), Oper,
+ DAG.getVectorIdxConstant(0, dl));
+ }
Opers[i] = Oper;
}
@@ -582,6 +592,29 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) {
return DAG.getNode(ExtendCode, DL, NVT, Res);
}
+SDValue DAGTypeLegalizer::ScalarizeVecRes_IS_FPCLASS(SDNode *N) {
+ SDLoc DL(N);
+ SDValue Arg = N->getOperand(0);
+ SDValue Test = N->getOperand(1);
+ EVT ArgVT = Arg.getValueType();
+ EVT ResultVT = N->getValueType(0).getVectorElementType();
+
+ if (getTypeAction(ArgVT) == TargetLowering::TypeScalarizeVector) {
+ Arg = GetScalarizedVector(Arg);
+ } else {
+ EVT VT = ArgVT.getVectorElementType();
+ Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Arg,
+ DAG.getVectorIdxConstant(0, DL));
+ }
+
+ SDValue Res =
+ DAG.getNode(ISD::IS_FPCLASS, DL, MVT::i1, {Arg, Test}, N->getFlags());
+ // Vectors may have a different boolean contents to scalars. Promote the
+ // value appropriately.
+ ISD::NodeType ExtendCode =
+ TargetLowering::getExtendForContent(TLI.getBooleanContents(ArgVT));
+ return DAG.getNode(ExtendCode, DL, ResultVT, Res);
+}
//===----------------------------------------------------------------------===//
// Operand Vector Scalarization <1 x ty> -> ty.
@@ -926,6 +959,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break;
case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break;
case ISD::FCOPYSIGN: SplitVecRes_FCOPYSIGN(N, Lo, Hi); break;
+ case ISD::IS_FPCLASS: SplitVecRes_IS_FPCLASS(N, Lo, Hi); break;
case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
case ISD::SPLAT_VECTOR:
case ISD::SCALAR_TO_VECTOR:
@@ -949,6 +983,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
SplitVecRes_Gather(cast<MemSDNode>(N), Lo, Hi, /*SplitSETCC*/ true);
break;
case ISD::SETCC:
+ case ISD::VP_SETCC:
SplitVecRes_SETCC(N, Lo, Hi);
break;
case ISD::VECTOR_REVERSE:
@@ -988,13 +1023,17 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FLOG10:
case ISD::FLOG2:
case ISD::FNEARBYINT:
- case ISD::FNEG:
+ case ISD::FNEG: case ISD::VP_FNEG:
case ISD::FREEZE:
case ISD::ARITH_FENCE:
case ISD::FP_EXTEND:
+ case ISD::VP_FP_EXTEND:
case ISD::FP_ROUND:
+ case ISD::VP_FP_ROUND:
case ISD::FP_TO_SINT:
+ case ISD::VP_FPTOSI:
case ISD::FP_TO_UINT:
+ case ISD::VP_FPTOUI:
case ISD::FRINT:
case ISD::FROUND:
case ISD::FROUNDEVEN:
@@ -1002,8 +1041,11 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FSQRT:
case ISD::FTRUNC:
case ISD::SINT_TO_FP:
+ case ISD::VP_SITOFP:
case ISD::TRUNCATE:
+ case ISD::VP_TRUNCATE:
case ISD::UINT_TO_FP:
+ case ISD::VP_UITOFP:
case ISD::FCANONICALIZE:
SplitVecRes_UnaryOp(N, Lo, Hi);
break;
@@ -1011,6 +1053,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::ANY_EXTEND:
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
+ case ISD::VP_SIGN_EXTEND:
+ case ISD::VP_ZERO_EXTEND:
SplitVecRes_ExtendOp(N, Lo, Hi);
break;
@@ -1053,7 +1097,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::ROTR:
SplitVecRes_BinOp(N, Lo, Hi);
break;
- case ISD::FMA:
+ case ISD::FMA: case ISD::VP_FMA:
case ISD::FSHL:
case ISD::FSHR:
SplitVecRes_TernaryOp(N, Lo, Hi);
@@ -1175,10 +1219,28 @@ void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo,
GetSplitVector(N->getOperand(2), Op2Lo, Op2Hi);
SDLoc dl(N);
- Lo = DAG.getNode(N->getOpcode(), dl, Op0Lo.getValueType(), Op0Lo, Op1Lo,
- Op2Lo, N->getFlags());
- Hi = DAG.getNode(N->getOpcode(), dl, Op0Hi.getValueType(), Op0Hi, Op1Hi,
- Op2Hi, N->getFlags());
+ const SDNodeFlags Flags = N->getFlags();
+ unsigned Opcode = N->getOpcode();
+ if (N->getNumOperands() == 3) {
+ Lo = DAG.getNode(Opcode, dl, Op0Lo.getValueType(), Op0Lo, Op1Lo, Op2Lo, Flags);
+ Hi = DAG.getNode(Opcode, dl, Op0Hi.getValueType(), Op0Hi, Op1Hi, Op2Hi, Flags);
+ return;
+ }
+
+ assert(N->getNumOperands() == 5 && "Unexpected number of operands!");
+ assert(N->isVPOpcode() && "Expected VP opcode");
+
+ SDValue MaskLo, MaskHi;
+ std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(3));
+
+ SDValue EVLLo, EVLHi;
+ std::tie(EVLLo, EVLHi) =
+ DAG.SplitEVL(N->getOperand(4), N->getValueType(0), dl);
+
+ Lo = DAG.getNode(Opcode, dl, Op0Lo.getValueType(),
+ {Op0Lo, Op1Lo, Op2Lo, MaskLo, EVLLo}, Flags);
+ Hi = DAG.getNode(Opcode, dl, Op0Hi.getValueType(),
+ {Op0Hi, Op1Hi, Op2Hi, MaskHi, EVLHi}, Flags);
}
void DAGTypeLegalizer::SplitVecRes_FIX(SDNode *N, SDValue &Lo, SDValue &Hi) {
@@ -1398,6 +1460,19 @@ void DAGTypeLegalizer::SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo,
Hi = DAG.getNode(ISD::FCOPYSIGN, DL, LHSHi.getValueType(), LHSHi, RHSHi);
}
+void DAGTypeLegalizer::SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc DL(N);
+ SDValue ArgLo, ArgHi;
+ SDValue Test = N->getOperand(1);
+ GetSplitVector(N->getOperand(0), ArgLo, ArgHi);
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+
+ Lo = DAG.getNode(ISD::IS_FPCLASS, DL, LoVT, ArgLo, Test, N->getFlags());
+ Hi = DAG.getNode(ISD::IS_FPCLASS, DL, HiVT, ArgHi, Test, N->getFlags());
+}
+
void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue LHSLo, LHSHi;
@@ -2043,8 +2118,20 @@ void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) {
else
std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
- Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
- Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
+ if (N->getOpcode() == ISD::SETCC) {
+ Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
+ Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
+ } else {
+ assert(N->getOpcode() == ISD::VP_SETCC && "Expected VP_SETCC opcode");
+ SDValue MaskLo, MaskHi, EVLLo, EVLHi;
+ std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(3));
+ std::tie(EVLLo, EVLHi) =
+ DAG.SplitEVL(N->getOperand(4), N->getValueType(0), DL);
+ Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2), MaskLo,
+ EVLLo);
+ Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2), MaskHi,
+ EVLHi);
+ }
}
void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
@@ -2056,22 +2143,37 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
// If the input also splits, handle it directly for a compile time speedup.
// Otherwise split it by hand.
- unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
- EVT InVT = N->getOperand(OpNo).getValueType();
+ EVT InVT = N->getOperand(0).getValueType();
if (getTypeAction(InVT) == TargetLowering::TypeSplitVector)
- GetSplitVector(N->getOperand(OpNo), Lo, Hi);
+ GetSplitVector(N->getOperand(0), Lo, Hi);
else
- std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, OpNo);
+ std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
- if (N->getOpcode() == ISD::FP_ROUND) {
- Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1),
- N->getFlags());
- Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1),
- N->getFlags());
- } else {
- Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getFlags());
- Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getFlags());
+ const SDNodeFlags Flags = N->getFlags();
+ unsigned Opcode = N->getOpcode();
+ if (N->getNumOperands() <= 2) {
+ if (Opcode == ISD::FP_ROUND) {
+ Lo = DAG.getNode(Opcode, dl, LoVT, Lo, N->getOperand(1), Flags);
+ Hi = DAG.getNode(Opcode, dl, HiVT, Hi, N->getOperand(1), Flags);
+ } else {
+ Lo = DAG.getNode(Opcode, dl, LoVT, Lo, Flags);
+ Hi = DAG.getNode(Opcode, dl, HiVT, Hi, Flags);
+ }
+ return;
}
+
+ assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
+ assert(N->isVPOpcode() && "Expected VP opcode");
+
+ SDValue MaskLo, MaskHi;
+ std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(1));
+
+ SDValue EVLLo, EVLHi;
+ std::tie(EVLLo, EVLHi) =
+ DAG.SplitEVL(N->getOperand(2), N->getValueType(0), dl);
+
+ Lo = DAG.getNode(Opcode, dl, LoVT, {Lo, MaskLo, EVLLo}, Flags);
+ Hi = DAG.getNode(Opcode, dl, HiVT, {Hi, MaskHi, EVLHi}, Flags);
}
void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo,
@@ -2107,14 +2209,34 @@ void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo,
TLI.isTypeLegal(NewSrcVT) && TLI.isTypeLegal(SplitLoVT)) {
LLVM_DEBUG(dbgs() << "Split vector extend via incremental extend:";
N->dump(&DAG); dbgs() << "\n");
+ if (!N->isVPOpcode()) {
+ // Extend the source vector by one step.
+ SDValue NewSrc =
+ DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0));
+ // Get the low and high halves of the new, extended one step, vector.
+ std::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl);
+ // Extend those vector halves the rest of the way.
+ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
+ Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
+ return;
+ }
+
// Extend the source vector by one step.
SDValue NewSrc =
- DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0));
+ DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0),
+ N->getOperand(1), N->getOperand(2));
// Get the low and high halves of the new, extended one step, vector.
std::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl);
+
+ SDValue MaskLo, MaskHi;
+ std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(1));
+
+ SDValue EVLLo, EVLHi;
+ std::tie(EVLLo, EVLHi) =
+ DAG.SplitEVL(N->getOperand(2), N->getValueType(0), dl);
// Extend those vector halves the rest of the way.
- Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
- Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
+ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, {Lo, MaskLo, EVLLo});
+ Hi = DAG.getNode(N->getOpcode(), dl, HiVT, {Hi, MaskHi, EVLHi});
return;
}
}
@@ -2126,108 +2248,352 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
SDValue &Lo, SDValue &Hi) {
// The low and high parts of the original input give four input vectors.
SDValue Inputs[4];
- SDLoc dl(N);
+ SDLoc DL(N);
GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]);
GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]);
EVT NewVT = Inputs[0].getValueType();
unsigned NewElts = NewVT.getVectorNumElements();
+ auto &&IsConstant = [](const SDValue &N) {
+ APInt SplatValue;
+ return N.getResNo() == 0 &&
+ (ISD::isConstantSplatVector(N.getNode(), SplatValue) ||
+ ISD::isBuildVectorOfConstantSDNodes(N.getNode()));
+ };
+ auto &&BuildVector = [NewElts, &DAG = DAG, NewVT, &DL](SDValue &Input1,
+ SDValue &Input2,
+ ArrayRef<int> Mask) {
+ assert(Input1->getOpcode() == ISD::BUILD_VECTOR &&
+ Input2->getOpcode() == ISD::BUILD_VECTOR &&
+ "Expected build vector node.");
+ EVT EltVT = NewVT.getVectorElementType();
+ SmallVector<SDValue> Ops(NewElts, DAG.getUNDEF(EltVT));
+ for (unsigned I = 0; I < NewElts; ++I) {
+ if (Mask[I] == UndefMaskElem)
+ continue;
+ unsigned Idx = Mask[I];
+ if (Idx >= NewElts)
+ Ops[I] = Input2.getOperand(Idx - NewElts);
+ else
+ Ops[I] = Input1.getOperand(Idx);
+ // Make the type of all elements the same as the element type.
+ if (Ops[I].getValueType().bitsGT(EltVT))
+ Ops[I] = DAG.getNode(ISD::TRUNCATE, DL, EltVT, Ops[I]);
+ }
+ return DAG.getBuildVector(NewVT, DL, Ops);
+ };
+
// If Lo or Hi uses elements from at most two of the four input vectors, then
// express it as a vector shuffle of those two inputs. Otherwise extract the
// input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
- SmallVector<int, 16> Ops;
- for (unsigned High = 0; High < 2; ++High) {
- SDValue &Output = High ? Hi : Lo;
-
- // Build a shuffle mask for the output, discovering on the fly which
- // input vectors to use as shuffle operands (recorded in InputUsed).
- // If building a suitable shuffle vector proves too hard, then bail
- // out with useBuildVector set.
- unsigned InputUsed[2] = { -1U, -1U }; // Not yet discovered.
- unsigned FirstMaskIdx = High * NewElts;
- bool useBuildVector = false;
- for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
- // The mask element. This indexes into the input.
- int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset);
-
- // The input vector this mask element indexes into.
- unsigned Input = (unsigned)Idx / NewElts;
-
- if (Input >= array_lengthof(Inputs)) {
- // The mask element does not index into any input vector.
- Ops.push_back(-1);
+ SmallVector<int> OrigMask(N->getMask().begin(), N->getMask().end());
+ // Try to pack incoming shuffles/inputs.
+ auto &&TryPeekThroughShufflesInputs = [&Inputs, &NewVT, this, NewElts,
+ &DL](SmallVectorImpl<int> &Mask) {
+ // Check if all inputs are shuffles of the same operands or non-shuffles.
+ MapVector<std::pair<SDValue, SDValue>, SmallVector<unsigned>> ShufflesIdxs;
+ for (unsigned Idx = 0; Idx < array_lengthof(Inputs); ++Idx) {
+ SDValue Input = Inputs[Idx];
+ auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Input.getNode());
+ if (!Shuffle ||
+ Input.getOperand(0).getValueType() != Input.getValueType())
+ continue;
+ ShufflesIdxs[std::make_pair(Input.getOperand(0), Input.getOperand(1))]
+ .push_back(Idx);
+ ShufflesIdxs[std::make_pair(Input.getOperand(1), Input.getOperand(0))]
+ .push_back(Idx);
+ }
+ for (auto &P : ShufflesIdxs) {
+ if (P.second.size() < 2)
continue;
+ // Use shuffles operands instead of shuffles themselves.
+ // 1. Adjust mask.
+ for (int &Idx : Mask) {
+ if (Idx == UndefMaskElem)
+ continue;
+ unsigned SrcRegIdx = Idx / NewElts;
+ if (Inputs[SrcRegIdx].isUndef()) {
+ Idx = UndefMaskElem;
+ continue;
+ }
+ auto *Shuffle =
+ dyn_cast<ShuffleVectorSDNode>(Inputs[SrcRegIdx].getNode());
+ if (!Shuffle || !is_contained(P.second, SrcRegIdx))
+ continue;
+ int MaskElt = Shuffle->getMaskElt(Idx % NewElts);
+ if (MaskElt == UndefMaskElem) {
+ Idx = UndefMaskElem;
+ continue;
+ }
+ Idx = MaskElt % NewElts +
+ P.second[Shuffle->getOperand(MaskElt / NewElts) == P.first.first
+ ? 0
+ : 1] *
+ NewElts;
}
-
- // Turn the index into an offset from the start of the input vector.
- Idx -= Input * NewElts;
-
- // Find or create a shuffle vector operand to hold this input.
- unsigned OpNo;
- for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
- if (InputUsed[OpNo] == Input) {
- // This input vector is already an operand.
- break;
- } else if (InputUsed[OpNo] == -1U) {
- // Create a new operand for this input vector.
- InputUsed[OpNo] = Input;
- break;
+ // 2. Update inputs.
+ Inputs[P.second[0]] = P.first.first;
+ Inputs[P.second[1]] = P.first.second;
+ // Clear the pair data.
+ P.second.clear();
+ ShufflesIdxs[std::make_pair(P.first.second, P.first.first)].clear();
+ }
+ // Check if any concat_vectors can be simplified.
+ SmallBitVector UsedSubVector(2 * array_lengthof(Inputs));
+ for (int &Idx : Mask) {
+ if (Idx == UndefMaskElem)
+ continue;
+ unsigned SrcRegIdx = Idx / NewElts;
+ if (Inputs[SrcRegIdx].isUndef()) {
+ Idx = UndefMaskElem;
+ continue;
+ }
+ TargetLowering::LegalizeTypeAction TypeAction =
+ getTypeAction(Inputs[SrcRegIdx].getValueType());
+ if (Inputs[SrcRegIdx].getOpcode() == ISD::CONCAT_VECTORS &&
+ Inputs[SrcRegIdx].getNumOperands() == 2 &&
+ !Inputs[SrcRegIdx].getOperand(1).isUndef() &&
+ (TypeAction == TargetLowering::TypeLegal ||
+ TypeAction == TargetLowering::TypeWidenVector))
+ UsedSubVector.set(2 * SrcRegIdx + (Idx % NewElts) / (NewElts / 2));
+ }
+ if (UsedSubVector.count() > 1) {
+ SmallVector<SmallVector<std::pair<unsigned, int>, 2>> Pairs;
+ for (unsigned I = 0; I < array_lengthof(Inputs); ++I) {
+ if (UsedSubVector.test(2 * I) == UsedSubVector.test(2 * I + 1))
+ continue;
+ if (Pairs.empty() || Pairs.back().size() == 2)
+ Pairs.emplace_back();
+ if (UsedSubVector.test(2 * I)) {
+ Pairs.back().emplace_back(I, 0);
+ } else {
+ assert(UsedSubVector.test(2 * I + 1) &&
+ "Expected to be used one of the subvectors.");
+ Pairs.back().emplace_back(I, 1);
}
}
-
- if (OpNo >= array_lengthof(InputUsed)) {
- // More than two input vectors used! Give up on trying to create a
- // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
- useBuildVector = true;
- break;
+ if (!Pairs.empty() && Pairs.front().size() > 1) {
+ // Adjust mask.
+ for (int &Idx : Mask) {
+ if (Idx == UndefMaskElem)
+ continue;
+ unsigned SrcRegIdx = Idx / NewElts;
+ auto *It = find_if(
+ Pairs, [SrcRegIdx](ArrayRef<std::pair<unsigned, int>> Idxs) {
+ return Idxs.front().first == SrcRegIdx ||
+ Idxs.back().first == SrcRegIdx;
+ });
+ if (It == Pairs.end())
+ continue;
+ Idx = It->front().first * NewElts + (Idx % NewElts) % (NewElts / 2) +
+ (SrcRegIdx == It->front().first ? 0 : (NewElts / 2));
+ }
+ // Adjust inputs.
+ for (ArrayRef<std::pair<unsigned, int>> Idxs : Pairs) {
+ Inputs[Idxs.front().first] = DAG.getNode(
+ ISD::CONCAT_VECTORS, DL,
+ Inputs[Idxs.front().first].getValueType(),
+ Inputs[Idxs.front().first].getOperand(Idxs.front().second),
+ Inputs[Idxs.back().first].getOperand(Idxs.back().second));
+ }
}
-
- // Add the mask index for the new shuffle vector.
- Ops.push_back(Idx + OpNo * NewElts);
}
-
- if (useBuildVector) {
- EVT EltVT = NewVT.getVectorElementType();
- SmallVector<SDValue, 16> SVOps;
-
- // Extract the input elements by hand.
- for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
- // The mask element. This indexes into the input.
- int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset);
-
- // The input vector this mask element indexes into.
- unsigned Input = (unsigned)Idx / NewElts;
-
- if (Input >= array_lengthof(Inputs)) {
- // The mask element is "undef" or indexes off the end of the input.
- SVOps.push_back(DAG.getUNDEF(EltVT));
+ bool Changed;
+ do {
+ // Try to remove extra shuffles (except broadcasts) and shuffles with the
+ // reused operands.
+ Changed = false;
+ for (unsigned I = 0; I < array_lengthof(Inputs); ++I) {
+ auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Inputs[I].getNode());
+ if (!Shuffle)
continue;
+ if (Shuffle->getOperand(0).getValueType() != NewVT)
+ continue;
+ int Op = -1;
+ if (!Inputs[I].hasOneUse() && Shuffle->getOperand(1).isUndef() &&
+ !Shuffle->isSplat()) {
+ Op = 0;
+ } else if (!Inputs[I].hasOneUse() &&
+ !Shuffle->getOperand(1).isUndef()) {
+ // Find the only used operand, if possible.
+ for (int &Idx : Mask) {
+ if (Idx == UndefMaskElem)
+ continue;
+ unsigned SrcRegIdx = Idx / NewElts;
+ if (SrcRegIdx != I)
+ continue;
+ int MaskElt = Shuffle->getMaskElt(Idx % NewElts);
+ if (MaskElt == UndefMaskElem) {
+ Idx = UndefMaskElem;
+ continue;
+ }
+ int OpIdx = MaskElt / NewElts;
+ if (Op == -1) {
+ Op = OpIdx;
+ continue;
+ }
+ if (Op != OpIdx) {
+ Op = -1;
+ break;
+ }
+ }
+ }
+ if (Op < 0) {
+ // Try to check if one of the shuffle operands is used already.
+ for (int OpIdx = 0; OpIdx < 2; ++OpIdx) {
+ if (Shuffle->getOperand(OpIdx).isUndef())
+ continue;
+ auto *It = find(Inputs, Shuffle->getOperand(OpIdx));
+ if (It == std::end(Inputs))
+ continue;
+ int FoundOp = std::distance(std::begin(Inputs), It);
+ // Found that operand is used already.
+ // 1. Fix the mask for the reused operand.
+ for (int &Idx : Mask) {
+ if (Idx == UndefMaskElem)
+ continue;
+ unsigned SrcRegIdx = Idx / NewElts;
+ if (SrcRegIdx != I)
+ continue;
+ int MaskElt = Shuffle->getMaskElt(Idx % NewElts);
+ if (MaskElt == UndefMaskElem) {
+ Idx = UndefMaskElem;
+ continue;
+ }
+ int MaskIdx = MaskElt / NewElts;
+ if (OpIdx == MaskIdx)
+ Idx = MaskElt % NewElts + FoundOp * NewElts;
+ }
+ // 2. Set Op to the unused OpIdx.
+ Op = (OpIdx + 1) % 2;
+ break;
+ }
+ }
+ if (Op >= 0) {
+ Changed = true;
+ Inputs[I] = Shuffle->getOperand(Op);
+ // Adjust mask.
+ for (int &Idx : Mask) {
+ if (Idx == UndefMaskElem)
+ continue;
+ unsigned SrcRegIdx = Idx / NewElts;
+ if (SrcRegIdx != I)
+ continue;
+ int MaskElt = Shuffle->getMaskElt(Idx % NewElts);
+ int OpIdx = MaskElt / NewElts;
+ if (OpIdx != Op)
+ continue;
+ Idx = MaskElt % NewElts + SrcRegIdx * NewElts;
+ }
}
-
- // Turn the index into an offset from the start of the input vector.
- Idx -= Input * NewElts;
-
- // Extract the vector element by hand.
- SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
- Inputs[Input],
- DAG.getVectorIdxConstant(Idx, dl)));
}
-
- // Construct the Lo/Hi output using a BUILD_VECTOR.
- Output = DAG.getBuildVector(NewVT, dl, SVOps);
- } else if (InputUsed[0] == -1U) {
- // No input vectors were used! The result is undefined.
- Output = DAG.getUNDEF(NewVT);
- } else {
- SDValue Op0 = Inputs[InputUsed[0]];
- // If only one input was used, use an undefined vector for the other.
- SDValue Op1 = InputUsed[1] == -1U ?
- DAG.getUNDEF(NewVT) : Inputs[InputUsed[1]];
- // At least one input vector was used. Create a new shuffle vector.
- Output = DAG.getVectorShuffle(NewVT, dl, Op0, Op1, Ops);
+ } while (Changed);
+ };
+ TryPeekThroughShufflesInputs(OrigMask);
+ // Proces unique inputs.
+ auto &&MakeUniqueInputs = [&Inputs, &IsConstant,
+ NewElts](SmallVectorImpl<int> &Mask) {
+ SetVector<SDValue> UniqueInputs;
+ SetVector<SDValue> UniqueConstantInputs;
+ for (unsigned I = 0; I < array_lengthof(Inputs); ++I) {
+ if (IsConstant(Inputs[I]))
+ UniqueConstantInputs.insert(Inputs[I]);
+ else if (!Inputs[I].isUndef())
+ UniqueInputs.insert(Inputs[I]);
+ }
+ // Adjust mask in case of reused inputs. Also, need to insert constant
+ // inputs at first, otherwise it affects the final outcome.
+ if (UniqueInputs.size() != array_lengthof(Inputs)) {
+ auto &&UniqueVec = UniqueInputs.takeVector();
+ auto &&UniqueConstantVec = UniqueConstantInputs.takeVector();
+ unsigned ConstNum = UniqueConstantVec.size();
+ for (int &Idx : Mask) {
+ if (Idx == UndefMaskElem)
+ continue;
+ unsigned SrcRegIdx = Idx / NewElts;
+ if (Inputs[SrcRegIdx].isUndef()) {
+ Idx = UndefMaskElem;
+ continue;
+ }
+ const auto It = find(UniqueConstantVec, Inputs[SrcRegIdx]);
+ if (It != UniqueConstantVec.end()) {
+ Idx = (Idx % NewElts) +
+ NewElts * std::distance(UniqueConstantVec.begin(), It);
+ assert(Idx >= 0 && "Expected defined mask idx.");
+ continue;
+ }
+ const auto RegIt = find(UniqueVec, Inputs[SrcRegIdx]);
+ assert(RegIt != UniqueVec.end() && "Cannot find non-const value.");
+ Idx = (Idx % NewElts) +
+ NewElts * (std::distance(UniqueVec.begin(), RegIt) + ConstNum);
+ assert(Idx >= 0 && "Expected defined mask idx.");
+ }
+ copy(UniqueConstantVec, std::begin(Inputs));
+ copy(UniqueVec, std::next(std::begin(Inputs), ConstNum));
}
+ };
+ MakeUniqueInputs(OrigMask);
+ SDValue OrigInputs[4];
+ copy(Inputs, std::begin(OrigInputs));
+ for (unsigned High = 0; High < 2; ++High) {
+ SDValue &Output = High ? Hi : Lo;
- Ops.clear();
+ // Build a shuffle mask for the output, discovering on the fly which
+ // input vectors to use as shuffle operands.
+ unsigned FirstMaskIdx = High * NewElts;
+ SmallVector<int> Mask(NewElts * array_lengthof(Inputs), UndefMaskElem);
+ copy(makeArrayRef(OrigMask).slice(FirstMaskIdx, NewElts), Mask.begin());
+ assert(!Output && "Expected default initialized initial value.");
+ TryPeekThroughShufflesInputs(Mask);
+ MakeUniqueInputs(Mask);
+ SDValue TmpInputs[4];
+ copy(Inputs, std::begin(TmpInputs));
+ // Track changes in the output registers.
+ int UsedIdx = -1;
+ bool SecondIteration = false;
+ auto &&AccumulateResults = [&UsedIdx, &SecondIteration](unsigned Idx) {
+ if (UsedIdx < 0) {
+ UsedIdx = Idx;
+ return false;
+ }
+ if (UsedIdx >= 0 && static_cast<unsigned>(UsedIdx) == Idx)
+ SecondIteration = true;
+ return SecondIteration;
+ };
+ processShuffleMasks(
+ Mask, array_lengthof(Inputs), array_lengthof(Inputs),
+ /*NumOfUsedRegs=*/1,
+ [&Output, &DAG = DAG, NewVT]() { Output = DAG.getUNDEF(NewVT); },
+ [&Output, &DAG = DAG, NewVT, &DL, &Inputs,
+ &BuildVector](ArrayRef<int> Mask, unsigned Idx, unsigned /*Unused*/) {
+ if (Inputs[Idx]->getOpcode() == ISD::BUILD_VECTOR)
+ Output = BuildVector(Inputs[Idx], Inputs[Idx], Mask);
+ else
+ Output = DAG.getVectorShuffle(NewVT, DL, Inputs[Idx],
+ DAG.getUNDEF(NewVT), Mask);
+ Inputs[Idx] = Output;
+ },
+ [&AccumulateResults, &Output, &DAG = DAG, NewVT, &DL, &Inputs,
+ &TmpInputs,
+ &BuildVector](ArrayRef<int> Mask, unsigned Idx1, unsigned Idx2) {
+ if (AccumulateResults(Idx1)) {
+ if (Inputs[Idx1]->getOpcode() == ISD::BUILD_VECTOR &&
+ Inputs[Idx2]->getOpcode() == ISD::BUILD_VECTOR)
+ Output = BuildVector(Inputs[Idx1], Inputs[Idx2], Mask);
+ else
+ Output = DAG.getVectorShuffle(NewVT, DL, Inputs[Idx1],
+ Inputs[Idx2], Mask);
+ } else {
+ if (TmpInputs[Idx1]->getOpcode() == ISD::BUILD_VECTOR &&
+ TmpInputs[Idx2]->getOpcode() == ISD::BUILD_VECTOR)
+ Output = BuildVector(TmpInputs[Idx1], TmpInputs[Idx2], Mask);
+ else
+ Output = DAG.getVectorShuffle(NewVT, DL, TmpInputs[Idx1],
+ TmpInputs[Idx2], Mask);
+ }
+ Inputs[Idx1] = Output;
+ });
+ copy(OrigInputs, std::begin(Inputs));
}
}
@@ -2268,6 +2634,32 @@ void DAGTypeLegalizer::SplitVecRes_FP_TO_XINT_SAT(SDNode *N, SDValue &Lo,
Hi = DAG.getNode(N->getOpcode(), dl, DstVTHi, SrcHi, N->getOperand(1));
}
+void DAGTypeLegalizer::SplitVecRes_VECTOR_REVERSE(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue InLo, InHi;
+ GetSplitVector(N->getOperand(0), InLo, InHi);
+ SDLoc DL(N);
+
+ Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, InHi.getValueType(), InHi);
+ Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, InLo.getValueType(), InLo);
+}
+
+void DAGTypeLegalizer::SplitVecRes_VECTOR_SPLICE(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
+
+ SDValue Expanded = TLI.expandVectorSplice(N, DAG);
+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, Expanded,
+ DAG.getVectorIdxConstant(0, DL));
+ Hi =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, Expanded,
+ DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
+}
+
//===----------------------------------------------------------------------===//
// Operand Vector Splitting
//===----------------------------------------------------------------------===//
@@ -2294,16 +2686,19 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
report_fatal_error("Do not know how to split this operator's "
"operand!\n");
+ case ISD::VP_SETCC:
case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break;
case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break;
case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
case ISD::INSERT_SUBVECTOR: Res = SplitVecOp_INSERT_SUBVECTOR(N, OpNo); break;
case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break;
+ case ISD::VP_TRUNCATE:
case ISD::TRUNCATE:
Res = SplitVecOp_TruncateHelper(N);
break;
case ISD::STRICT_FP_ROUND:
+ case ISD::VP_FP_ROUND:
case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break;
case ISD::FCOPYSIGN: Res = SplitVecOp_FCOPYSIGN(N); break;
case ISD::STORE:
@@ -2543,6 +2938,14 @@ SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Ch);
+ } else if (N->getNumOperands() == 3) {
+ assert(N->isVPOpcode() && "Expected VP opcode");
+ SDValue MaskLo, MaskHi, EVLLo, EVLHi;
+ std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(1));
+ std::tie(EVLLo, EVLHi) =
+ DAG.SplitEVL(N->getOperand(2), N->getValueType(0), dl);
+ Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo, MaskLo, EVLLo);
+ Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi, MaskHi, EVLHi);
} else {
Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo);
Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi);
@@ -3128,8 +3531,20 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) {
EVT PartResVT = EVT::getVectorVT(Context, MVT::i1, PartEltCnt);
EVT WideResVT = EVT::getVectorVT(Context, MVT::i1, PartEltCnt*2);
- LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2));
- HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2));
+ if (N->getOpcode() == ISD::SETCC) {
+ LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2));
+ HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2));
+ } else {
+ assert(N->getOpcode() == ISD::VP_SETCC && "Expected VP_SETCC opcode");
+ SDValue MaskLo, MaskHi, EVLLo, EVLHi;
+ std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(3));
+ std::tie(EVLLo, EVLHi) =
+ DAG.SplitEVL(N->getOperand(4), N->getValueType(0), DL);
+ LoRes = DAG.getNode(ISD::VP_SETCC, DL, PartResVT, Lo0, Lo1,
+ N->getOperand(2), MaskLo, EVLLo);
+ HiRes = DAG.getNode(ISD::VP_SETCC, DL, PartResVT, Hi0, Hi1,
+ N->getOperand(2), MaskHi, EVLHi);
+ }
SDValue Con = DAG.getNode(ISD::CONCAT_VECTORS, DL, WideResVT, LoRes, HiRes);
EVT OpVT = N->getOperand(0).getValueType();
@@ -3160,6 +3575,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
Lo.getValue(1), Hi.getValue(1));
ReplaceValueWith(SDValue(N, 1), NewChain);
+ } else if (N->getOpcode() == ISD::VP_FP_ROUND) {
+ SDValue MaskLo, MaskHi, EVLLo, EVLHi;
+ std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(1));
+ std::tie(EVLLo, EVLHi) =
+ DAG.SplitEVL(N->getOperand(2), N->getValueType(0), DL);
+ Lo = DAG.getNode(ISD::VP_FP_ROUND, DL, OutVT, Lo, MaskLo, EVLLo);
+ Hi = DAG.getNode(ISD::VP_FP_ROUND, DL, OutVT, Hi, MaskHi, EVLHi);
} else {
Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1));
Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1));
@@ -3204,6 +3626,22 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
return;
SDValue Res = SDValue();
+
+ auto unrollExpandedOp = [&]() {
+ // We're going to widen this vector op to a legal type by padding with undef
+ // elements. If the wide vector op is eventually going to be expanded to
+ // scalar libcalls, then unroll into scalar ops now to avoid unnecessary
+ // libcalls on the undef elements.
+ EVT VT = N->getValueType(0);
+ EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ if (!TLI.isOperationLegalOrCustom(N->getOpcode(), WideVecVT) &&
+ TLI.isOperationExpand(N->getOpcode(), VT.getScalarType())) {
+ Res = DAG.UnrollVectorOp(N, WideVecVT.getVectorNumElements());
+ return true;
+ }
+ return false;
+ };
+
switch (N->getOpcode()) {
default:
#ifndef NDEBUG
@@ -3223,6 +3661,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;
+ case ISD::STEP_VECTOR:
case ISD::SPLAT_VECTOR:
case ISD::SCALAR_TO_VECTOR:
Res = WidenVecRes_ScalarOp(N);
@@ -3235,6 +3674,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_Select(N);
break;
case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break;
+ case ISD::VP_SETCC:
case ISD::SETCC: Res = WidenVecRes_SETCC(N); break;
case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break;
case ISD::VECTOR_SHUFFLE:
@@ -3280,6 +3720,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::USHLSAT:
case ISD::ROTL:
case ISD::ROTR:
+ case ISD::AVGFLOORS:
+ case ISD::AVGFLOORU:
+ case ISD::AVGCEILS:
+ case ISD::AVGCEILU:
// Vector-predicated binary op widening. Note that -- unlike the
// unpredicated versions -- we don't have to worry about trapping on
// operations like UDIV, FADD, etc., as we pass on the original vector
@@ -3297,12 +3741,19 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_Binary(N);
break;
+ case ISD::FPOW:
+ case ISD::FREM:
+ if (unrollExpandedOp())
+ break;
+ // If the target has custom/legal support for the scalar FP intrinsic ops
+ // (they are probably not destined to become libcalls), then widen those
+ // like any other binary ops.
+ LLVM_FALLTHROUGH;
+
case ISD::FADD:
case ISD::FMUL:
- case ISD::FPOW:
case ISD::FSUB:
case ISD::FDIV:
- case ISD::FREM:
case ISD::SDIV:
case ISD::UDIV:
case ISD::SREM:
@@ -3338,6 +3789,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_FCOPYSIGN(N);
break;
+ case ISD::IS_FPCLASS:
+ Res = WidenVecRes_IS_FPCLASS(N);
+ break;
+
case ISD::FPOWI:
Res = WidenVecRes_POWI(N);
break;
@@ -3350,14 +3805,23 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::ANY_EXTEND:
case ISD::FP_EXTEND:
+ case ISD::VP_FP_EXTEND:
case ISD::FP_ROUND:
+ case ISD::VP_FP_ROUND:
case ISD::FP_TO_SINT:
+ case ISD::VP_FPTOSI:
case ISD::FP_TO_UINT:
+ case ISD::VP_FPTOUI:
case ISD::SIGN_EXTEND:
+ case ISD::VP_SIGN_EXTEND:
case ISD::SINT_TO_FP:
+ case ISD::VP_SITOFP:
+ case ISD::VP_TRUNCATE:
case ISD::TRUNCATE:
case ISD::UINT_TO_FP:
+ case ISD::VP_UITOFP:
case ISD::ZERO_EXTEND:
+ case ISD::VP_ZERO_EXTEND:
Res = WidenVecRes_Convert(N);
break;
@@ -3381,23 +3845,13 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FROUNDEVEN:
case ISD::FSIN:
case ISD::FSQRT:
- case ISD::FTRUNC: {
- // We're going to widen this vector op to a legal type by padding with undef
- // elements. If the wide vector op is eventually going to be expanded to
- // scalar libcalls, then unroll into scalar ops now to avoid unnecessary
- // libcalls on the undef elements.
- EVT VT = N->getValueType(0);
- EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
- if (!TLI.isOperationLegalOrCustom(N->getOpcode(), WideVecVT) &&
- TLI.isOperationExpand(N->getOpcode(), VT.getScalarType())) {
- Res = DAG.UnrollVectorOp(N, WideVecVT.getVectorNumElements());
+ case ISD::FTRUNC:
+ if (unrollExpandedOp())
break;
- }
- }
- // If the target has custom/legal support for the scalar FP intrinsic ops
- // (they are probably not destined to become libcalls), then widen those like
- // any other unary ops.
- LLVM_FALLTHROUGH;
+ // If the target has custom/legal support for the scalar FP intrinsic ops
+ // (they are probably not destined to become libcalls), then widen those
+ // like any other unary ops.
+ LLVM_FALLTHROUGH;
case ISD::ABS:
case ISD::BITREVERSE:
@@ -3407,13 +3861,13 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::CTPOP:
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
- case ISD::FNEG:
+ case ISD::FNEG: case ISD::VP_FNEG:
case ISD::FREEZE:
case ISD::ARITH_FENCE:
case ISD::FCANONICALIZE:
Res = WidenVecRes_Unary(N);
break;
- case ISD::FMA:
+ case ISD::FMA: case ISD::VP_FMA:
case ISD::FSHL:
case ISD::FSHR:
Res = WidenVecRes_Ternary(N);
@@ -3432,7 +3886,16 @@ SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) {
SDValue InOp1 = GetWidenedVector(N->getOperand(0));
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
SDValue InOp3 = GetWidenedVector(N->getOperand(2));
- return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3);
+ if (N->getNumOperands() == 3)
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3);
+
+ assert(N->getNumOperands() == 5 && "Unexpected number of operands!");
+ assert(N->isVPOpcode() && "Expected VP opcode");
+
+ SDValue Mask =
+ GetWidenedMask(N->getOperand(3), WidenVT.getVectorElementCount());
+ return DAG.getNode(N->getOpcode(), dl, WidenVT,
+ {InOp1, InOp2, InOp3, Mask, N->getOperand(4)});
}
SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
@@ -3552,7 +4015,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
EVT WidenEltVT = WidenVT.getVectorElementType();
EVT VT = WidenVT;
- unsigned NumElts = VT.getVectorNumElements();
+ unsigned NumElts = VT.getVectorMinNumElements();
const SDNodeFlags Flags = N->getFlags();
while (!TLI.isTypeLegal(VT) && NumElts != 1) {
NumElts = NumElts / 2;
@@ -3566,6 +4029,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, Flags);
}
+ // FIXME: Improve support for scalable vectors.
+ assert(!VT.isScalableVector() && "Scalable vectors not handled yet.");
+
// No legal vector version so unroll the vector operation and then widen.
if (NumElts == 1)
return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
@@ -3826,6 +4292,12 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
if (InVTEC == WidenEC) {
if (N->getNumOperands() == 1)
return DAG.getNode(Opcode, DL, WidenVT, InOp);
+ if (N->getNumOperands() == 3) {
+ assert(N->isVPOpcode() && "Expected VP opcode");
+ SDValue Mask =
+ GetWidenedMask(N->getOperand(1), WidenVT.getVectorElementCount());
+ return DAG.getNode(Opcode, DL, WidenVT, InOp, Mask, N->getOperand(2));
+ }
return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags);
}
if (WidenVT.getSizeInBits() == InVT.getSizeInBits()) {
@@ -4007,6 +4479,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) {
return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
}
+SDValue DAGTypeLegalizer::WidenVecRes_IS_FPCLASS(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Arg = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, {Arg, N->getOperand(1)},
+ N->getFlags());
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue InOp = GetWidenedVector(N->getOperand(0));
@@ -4018,7 +4497,16 @@ SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) {
// Unary op widening.
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue InOp = GetWidenedVector(N->getOperand(0));
- return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp);
+ if (N->getNumOperands() == 1)
+ return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp);
+
+ assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
+ assert(N->isVPOpcode() && "Expected VP opcode");
+
+ SDValue Mask =
+ GetWidenedMask(N->getOperand(1), WidenVT.getVectorElementCount());
+ return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT,
+ {InOp, Mask, N->getOperand(2)});
}
SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) {
@@ -4243,11 +4731,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_SUBVECTOR(SDNode *N) {
}
SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
- EVT VT = N->getValueType(0);
- EVT EltVT = VT.getVectorElementType();
- EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
- SDValue InOp = N->getOperand(0);
- SDValue Idx = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue InOp = N->getOperand(0);
+ SDValue Idx = N->getOperand(1);
SDLoc dl(N);
auto InOpTypeAction = getTypeAction(InOp.getValueType());
@@ -4264,6 +4752,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
// Check if we can extract from the vector.
unsigned WidenNumElts = WidenVT.getVectorMinNumElements();
unsigned InNumElts = InVT.getVectorMinNumElements();
+ unsigned VTNumElts = VT.getVectorMinNumElements();
+ assert(IdxVal % VTNumElts == 0 &&
+ "Expected Idx to be a multiple of subvector minimum vector length");
if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx);
@@ -4277,8 +4768,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
// nxv2i64 extract_subvector(nxv16i64, 8)
// nxv2i64 extract_subvector(nxv16i64, 10)
// undef)
- unsigned VTNElts = VT.getVectorMinNumElements();
- unsigned GCD = greatestCommonDivisor(VTNElts, WidenNumElts);
+ unsigned GCD = greatestCommonDivisor(VTNumElts, WidenNumElts);
assert((IdxVal % GCD) == 0 && "Expected Idx to be a multiple of the broken "
"down type's element count");
EVT PartVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
@@ -4287,7 +4777,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
if (getTypeAction(PartVT) != TargetLowering::TypeWidenVector) {
SmallVector<SDValue> Parts;
unsigned I = 0;
- for (; I < VTNElts / GCD; ++I)
+ for (; I < VTNumElts / GCD; ++I)
Parts.push_back(
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, PartVT, InOp,
DAG.getVectorIdxConstant(IdxVal + I * GCD, dl)));
@@ -4304,9 +4794,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
// We could try widening the input to the right length but for now, extract
// the original elements, fill the rest with undefs and build a vector.
SmallVector<SDValue, 16> Ops(WidenNumElts);
- unsigned NumElts = VT.getVectorNumElements();
unsigned i;
- for (i = 0; i < NumElts; ++i)
+ for (i = 0; i < VTNumElts; ++i)
Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
DAG.getVectorIdxConstant(IdxVal + i, dl));
@@ -4783,10 +5272,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_Select(SDNode *N) {
SDValue InOp1 = GetWidenedVector(N->getOperand(1));
SDValue InOp2 = GetWidenedVector(N->getOperand(2));
assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT);
- return Opcode == ISD::VP_SELECT || Opcode == ISD::VP_MERGE
- ? DAG.getNode(Opcode, SDLoc(N), WidenVT, Cond1, InOp1, InOp2,
- N->getOperand(3))
- : DAG.getNode(Opcode, SDLoc(N), WidenVT, Cond1, InOp1, InOp2);
+ if (Opcode == ISD::VP_SELECT || Opcode == ISD::VP_MERGE)
+ return DAG.getNode(Opcode, SDLoc(N), WidenVT, Cond1, InOp1, InOp2,
+ N->getOperand(3));
+ return DAG.getNode(Opcode, SDLoc(N), WidenVT, Cond1, InOp1, InOp2);
}
SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) {
@@ -4832,13 +5321,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) {
N->getOperand(0).getValueType().isVector() &&
"Operands must be vectors");
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ ElementCount WidenEC = WidenVT.getVectorElementCount();
SDValue InOp1 = N->getOperand(0);
EVT InVT = InOp1.getValueType();
assert(InVT.isVector() && "can not widen non-vector type");
- EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(),
- InVT.getVectorElementType(), WidenNumElts);
+ EVT WidenInVT =
+ EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), WidenEC);
// The input and output types often differ here, and it could be that while
// we'd prefer to widen the result type, the input operands have been split.
@@ -4865,8 +5354,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) {
InOp2.getValueType() == WidenInVT &&
"Input not widened to expected type!");
(void)WidenInVT;
- return DAG.getNode(ISD::SETCC, SDLoc(N),
- WidenVT, InOp1, InOp2, N->getOperand(2));
+ if (N->getOpcode() == ISD::VP_SETCC) {
+ SDValue Mask =
+ GetWidenedMask(N->getOperand(3), WidenVT.getVectorElementCount());
+ return DAG.getNode(ISD::VP_SETCC, SDLoc(N), WidenVT, InOp1, InOp2,
+ N->getOperand(2), Mask, N->getOperand(4));
+ }
+ return DAG.getNode(ISD::SETCC, SDLoc(N), WidenVT, InOp1, InOp2,
+ N->getOperand(2));
}
SDValue DAGTypeLegalizer::WidenVecRes_STRICT_FSETCC(SDNode *N) {
@@ -4946,6 +5441,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::STRICT_FSETCCS: Res = WidenVecOp_STRICT_FSETCC(N); break;
case ISD::VSELECT: Res = WidenVecOp_VSELECT(N); break;
case ISD::FCOPYSIGN: Res = WidenVecOp_FCOPYSIGN(N); break;
+ case ISD::IS_FPCLASS: Res = WidenVecOp_IS_FPCLASS(N); break;
case ISD::ANY_EXTEND:
case ISD::SIGN_EXTEND:
@@ -5098,6 +5594,34 @@ SDValue DAGTypeLegalizer::WidenVecOp_FCOPYSIGN(SDNode *N) {
return DAG.UnrollVectorOp(N);
}
+SDValue DAGTypeLegalizer::WidenVecOp_IS_FPCLASS(SDNode *N) {
+ SDLoc DL(N);
+ EVT ResultVT = N->getValueType(0);
+ SDValue Test = N->getOperand(1);
+ SDValue WideArg = GetWidenedVector(N->getOperand(0));
+
+ // Process this node similarly to SETCC.
+ EVT WideResultVT = getSetCCResultType(WideArg.getValueType());
+ if (ResultVT.getScalarType() == MVT::i1)
+ WideResultVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ WideResultVT.getVectorNumElements());
+
+ SDValue WideNode = DAG.getNode(ISD::IS_FPCLASS, DL, WideResultVT,
+ {WideArg, Test}, N->getFlags());
+
+ // Extract the needed results from the result vector.
+ EVT ResVT =
+ EVT::getVectorVT(*DAG.getContext(), WideResultVT.getVectorElementType(),
+ ResultVT.getVectorNumElements());
+ SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, WideNode,
+ DAG.getVectorIdxConstant(0, DL));
+
+ EVT OpVT = N->getOperand(0).getValueType();
+ ISD::NodeType ExtendCode =
+ TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
+ return DAG.getNode(ExtendCode, DL, ResultVT, CC);
+}
+
SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
// Since the result is legal and the input is illegal.
EVT VT = N->getValueType(0);
@@ -5192,11 +5716,12 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
SDLoc dl(N);
// Check if we can convert between two legal vector types and extract.
- unsigned InWidenSize = InWidenVT.getSizeInBits();
- unsigned Size = VT.getSizeInBits();
+ TypeSize InWidenSize = InWidenVT.getSizeInBits();
+ TypeSize Size = VT.getSizeInBits();
// x86mmx is not an acceptable vector element type, so don't try.
- if (InWidenSize % Size == 0 && !VT.isVector() && VT != MVT::x86mmx) {
- unsigned NewNumElts = InWidenSize / Size;
+ if (!VT.isVector() && VT != MVT::x86mmx &&
+ InWidenSize.hasKnownScalarFactor(Size)) {
+ unsigned NewNumElts = InWidenSize.getKnownScalarFactor(Size);
EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts);
if (TLI.isTypeLegal(NewVT)) {
SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
@@ -5211,9 +5736,11 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
// having to copy via memory.
if (VT.isVector()) {
EVT EltVT = VT.getVectorElementType();
- unsigned EltSize = EltVT.getSizeInBits();
- if (InWidenSize % EltSize == 0) {
- unsigned NewNumElts = InWidenSize / EltSize;
+ unsigned EltSize = EltVT.getFixedSizeInBits();
+ if (InWidenSize.isKnownMultipleOf(EltSize)) {
+ ElementCount NewNumElts =
+ (InWidenVT.getVectorElementCount() * InWidenVT.getScalarSizeInBits())
+ .divideCoefficientBy(EltSize);
EVT NewVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NewNumElts);
if (TLI.isTypeLegal(NewVT)) {
SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
@@ -5266,18 +5793,17 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
}
SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) {
+ EVT VT = N->getValueType(0);
SDValue SubVec = N->getOperand(1);
SDValue InVec = N->getOperand(0);
- if (getTypeAction(InVec.getValueType()) == TargetLowering::TypeWidenVector)
- InVec = GetWidenedVector(InVec);
-
if (getTypeAction(SubVec.getValueType()) == TargetLowering::TypeWidenVector)
SubVec = GetWidenedVector(SubVec);
- if (SubVec.getValueType() == InVec.getValueType() && InVec.isUndef() &&
+ if (SubVec.getValueType().knownBitsLE(VT) && InVec.isUndef() &&
N->getConstantOperandVal(2) == 0)
- return SubVec;
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, InVec, SubVec,
+ N->getOperand(2));
report_fatal_error("Don't know how to widen the operands for "
"INSERT_SUBVECTOR");
@@ -5500,11 +6026,11 @@ SDValue DAGTypeLegalizer::WidenVecOp_VP_SCATTER(SDNode *N, unsigned OpNo) {
Mask = GetWidenedMask(Mask, WideEC);
WideMemVT = EVT::getVectorVT(*DAG.getContext(),
VPSC->getMemoryVT().getScalarType(), WideEC);
- } else if (OpNo == 4) {
+ } else if (OpNo == 3) {
// Just widen the index. It's allowed to have extra elements.
Index = GetWidenedVector(Index);
} else
- llvm_unreachable("Can't widen this operand of mscatter");
+ llvm_unreachable("Can't widen this operand of VP_SCATTER");
SDValue Ops[] = {
VPSC->getChain(), DataOp, VPSC->getBasePtr(), Index, Scale, Mask,
@@ -5597,8 +6123,20 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) {
assert(NeutralElem && "Neutral element must exist");
// Pad the vector with the neutral element.
- unsigned OrigElts = OrigVT.getVectorNumElements();
- unsigned WideElts = WideVT.getVectorNumElements();
+ unsigned OrigElts = OrigVT.getVectorMinNumElements();
+ unsigned WideElts = WideVT.getVectorMinNumElements();
+
+ if (WideVT.isScalableVector()) {
+ unsigned GCD = greatestCommonDivisor(OrigElts, WideElts);
+ EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), ElemVT,
+ ElementCount::getScalable(GCD));
+ SDValue SplatNeutral = DAG.getSplatVector(SplatVT, dl, NeutralElem);
+ for (unsigned Idx = OrigElts; Idx < WideElts; Idx = Idx + GCD)
+ Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVT, Op, SplatNeutral,
+ DAG.getVectorIdxConstant(Idx, dl));
+ return DAG.getNode(Opc, dl, N->getValueType(0), Op, Flags);
+ }
+
for (unsigned Idx = OrigElts; Idx < WideElts; Idx++)
Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, WideVT, Op, NeutralElem,
DAG.getVectorIdxConstant(Idx, dl));
@@ -5622,8 +6160,20 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE_SEQ(SDNode *N) {
SDValue NeutralElem = DAG.getNeutralElement(BaseOpc, dl, ElemVT, Flags);
// Pad the vector with the neutral element.
- unsigned OrigElts = OrigVT.getVectorNumElements();
- unsigned WideElts = WideVT.getVectorNumElements();
+ unsigned OrigElts = OrigVT.getVectorMinNumElements();
+ unsigned WideElts = WideVT.getVectorMinNumElements();
+
+ if (WideVT.isScalableVector()) {
+ unsigned GCD = greatestCommonDivisor(OrigElts, WideElts);
+ EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), ElemVT,
+ ElementCount::getScalable(GCD));
+ SDValue SplatNeutral = DAG.getSplatVector(SplatVT, dl, NeutralElem);
+ for (unsigned Idx = OrigElts; Idx < WideElts; Idx = Idx + GCD)
+ Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVT, Op, SplatNeutral,
+ DAG.getVectorIdxConstant(Idx, dl));
+ return DAG.getNode(Opc, dl, N->getValueType(0), AccOp, Op, Flags);
+ }
+
for (unsigned Idx = OrigElts; Idx < WideElts; Idx++)
Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, WideVT, Op, NeutralElem,
DAG.getVectorIdxConstant(Idx, dl));
@@ -5795,7 +6345,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
// Allow wider loads if they are sufficiently aligned to avoid memory faults
// and if the original load is simple.
unsigned LdAlign =
- (!LD->isSimple() || LdVT.isScalableVector()) ? 0 : LD->getAlignment();
+ (!LD->isSimple() || LdVT.isScalableVector()) ? 0 : LD->getAlign().value();
// Find the vector type that can load from.
Optional<EVT> FirstVT =
@@ -6103,7 +6653,7 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT,
EVT InVT = InOp.getValueType();
assert(InVT.getVectorElementType() == NVT.getVectorElementType() &&
"input and widen element type must match");
- assert(!InVT.isScalableVector() && !NVT.isScalableVector() &&
+ assert(InVT.isScalableVector() == NVT.isScalableVector() &&
"cannot modify scalable vectors in this way");
SDLoc dl(InOp);
@@ -6111,10 +6661,10 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT,
if (InVT == NVT)
return InOp;
- unsigned InNumElts = InVT.getVectorNumElements();
- unsigned WidenNumElts = NVT.getVectorNumElements();
- if (WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0) {
- unsigned NumConcat = WidenNumElts / InNumElts;
+ ElementCount InEC = InVT.getVectorElementCount();
+ ElementCount WidenEC = NVT.getVectorElementCount();
+ if (WidenEC.hasKnownScalarFactor(InEC)) {
+ unsigned NumConcat = WidenEC.getKnownScalarFactor(InEC);
SmallVector<SDValue, 16> Ops(NumConcat);
SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, InVT) :
DAG.getUNDEF(InVT);
@@ -6125,10 +6675,16 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT,
return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, Ops);
}
- if (WidenNumElts < InNumElts && InNumElts % WidenNumElts)
+ if (InEC.hasKnownScalarFactor(WidenEC))
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp,
DAG.getVectorIdxConstant(0, dl));
+ assert(!InVT.isScalableVector() && !NVT.isScalableVector() &&
+ "Scalable vectors should have been handled already.");
+
+ unsigned InNumElts = InEC.getFixedValue();
+ unsigned WidenNumElts = WidenEC.getFixedValue();
+
// Fall back to extract and build.
SmallVector<SDValue, 16> Ops(WidenNumElts);
EVT EltVT = NVT.getVectorElementType();
@@ -6144,29 +6700,3 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT,
Ops[Idx] = FillVal;
return DAG.getBuildVector(NVT, dl, Ops);
}
-
-void DAGTypeLegalizer::SplitVecRes_VECTOR_REVERSE(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
- SDValue InLo, InHi;
- GetSplitVector(N->getOperand(0), InLo, InHi);
- SDLoc DL(N);
-
- Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, InHi.getValueType(), InHi);
- Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, InLo.getValueType(), InLo);
-}
-
-void DAGTypeLegalizer::SplitVecRes_VECTOR_SPLICE(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
- EVT VT = N->getValueType(0);
- SDLoc DL(N);
-
- EVT LoVT, HiVT;
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
-
- SDValue Expanded = TLI.expandVectorSplice(N, DAG);
- Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, Expanded,
- DAG.getVectorIdxConstant(0, DL));
- Hi =
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, Expanded,
- DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
-}
diff --git a/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index 3d5c4c5b1cae..e0e8d503ca92 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -20,7 +20,6 @@
#include "llvm/CodeGen/ResourcePriorityQueue.h"
#include "llvm/CodeGen/DFAPacketizer.h"
-#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -28,21 +27,18 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
using namespace llvm;
#define DEBUG_TYPE "scheduler"
-static cl::opt<bool> DisableDFASched("disable-dfa-sched", cl::Hidden,
- cl::ZeroOrMore, cl::init(false),
- cl::desc("Disable use of DFA during scheduling"));
+static cl::opt<bool>
+ DisableDFASched("disable-dfa-sched", cl::Hidden,
+ cl::desc("Disable use of DFA during scheduling"));
static cl::opt<int> RegPressureThreshold(
- "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5),
- cl::desc("Track reg pressure and switch priority to in-depth"));
+ "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::init(5),
+ cl::desc("Track reg pressure and switch priority to in-depth"));
ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS)
: Picker(this), InstrItins(IS->MF->getSubtarget().getInstrItineraryData()) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index f64b332a7fef..9fcf692babdc 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -14,6 +14,7 @@
#define LLVM_LIB_CODEGEN_SELECTIONDAG_SDNODEDBGVALUE_H
#include "llvm/IR/DebugLoc.h"
+#include "llvm/Support/Allocator.h"
#include "llvm/Support/DataTypes.h"
#include <utility>
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 1b89864116cb..78fc407e9573 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -11,16 +11,14 @@
//===----------------------------------------------------------------------===//
#include "InstrEmitter.h"
-#include "ScheduleDAGSDNodes.h"
#include "SDNodeDbgValue.h"
-#include "llvm/ADT/STLExtras.h"
+#include "ScheduleDAGSDNodes.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -442,17 +440,29 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
/// CheckForLiveRegDef - Return true and update live register vector if the
/// specified register def of the specified SUnit clobbers any "live" registers.
static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
- std::vector<SUnit*> &LiveRegDefs,
+ std::vector<SUnit *> &LiveRegDefs,
SmallSet<unsigned, 4> &RegAdded,
SmallVectorImpl<unsigned> &LRegs,
- const TargetRegisterInfo *TRI) {
+ const TargetRegisterInfo *TRI,
+ const SDNode *Node = nullptr) {
bool Added = false;
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
- if (LiveRegDefs[*AI] && LiveRegDefs[*AI] != SU) {
- if (RegAdded.insert(*AI).second) {
- LRegs.push_back(*AI);
- Added = true;
- }
+ // Check if Ref is live.
+ if (!LiveRegDefs[*AI])
+ continue;
+
+ // Allow multiple uses of the same def.
+ if (LiveRegDefs[*AI] == SU)
+ continue;
+
+ // Allow multiple uses of same def
+ if (Node && LiveRegDefs[*AI]->getNode() == Node)
+ continue;
+
+ // Add Reg to the set of interfering live regs.
+ if (RegAdded.insert(*AI).second) {
+ LRegs.push_back(*AI);
+ Added = true;
}
}
return Added;
@@ -504,6 +514,15 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
}
continue;
}
+
+ if (Node->getOpcode() == ISD::CopyToReg) {
+ Register Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+ if (Reg.isPhysical()) {
+ SDNode *SrcNode = Node->getOperand(2).getNode();
+ CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI, SrcNode);
+ }
+ }
+
if (!Node->isMachineOpcode())
continue;
const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
@@ -758,7 +777,8 @@ void ScheduleDAGLinearize::Schedule() {
MachineBasicBlock*
ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
- InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos);
+ InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos,
+ DAG->getUseInstrRefDebugInfo());
DenseMap<SDValue, Register> VRBaseMap;
LLVM_DEBUG({ dbgs() << "\n*** Final schedule ***\n"; });
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 7a5e8ac6075e..8a04ce7535a1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -1294,11 +1294,11 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
/// CheckForLiveRegDef - Return true and update live register vector if the
/// specified register def of the specified SUnit clobbers any "live" registers.
-static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
- SUnit **LiveRegDefs,
+static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, SUnit **LiveRegDefs,
SmallSet<unsigned, 4> &RegAdded,
SmallVectorImpl<unsigned> &LRegs,
- const TargetRegisterInfo *TRI) {
+ const TargetRegisterInfo *TRI,
+ const SDNode *Node = nullptr) {
for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) {
// Check if Ref is live.
@@ -1307,6 +1307,10 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
// Allow multiple uses of the same def.
if (LiveRegDefs[*AliasI] == SU) continue;
+ // Allow multiple uses of same def
+ if (Node && LiveRegDefs[*AliasI]->getNode() == Node)
+ continue;
+
// Add Reg to the set of interfering live regs.
if (RegAdded.insert(*AliasI).second) {
LRegs.push_back(*AliasI);
@@ -1387,6 +1391,15 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
continue;
}
+ if (Node->getOpcode() == ISD::CopyToReg) {
+ Register Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+ if (Reg.isPhysical()) {
+ SDNode *SrcNode = Node->getOperand(2).getNode();
+ CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI,
+ SrcNode);
+ }
+ }
+
if (!Node->isMachineOpcode())
continue;
// If we're in the middle of scheduling a call, don't begin scheduling
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 55f6f288f3e3..2a10157b404e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -843,7 +843,8 @@ EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, Register> &VRBaseMap,
/// not necessarily refer to returned BB. The emitter may split blocks.
MachineBasicBlock *ScheduleDAGSDNodes::
EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
- InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos);
+ InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos,
+ DAG->getUseInstrRefDebugInfo());
DenseMap<SDValue, Register> VRBaseMap;
DenseMap<SUnit*, Register> CopyVRBaseMap;
SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders;
@@ -883,7 +884,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
if (MI->isCandidateForCallSiteEntry() &&
DAG->getTarget().Options.EmitCallSiteInfo)
- MF.addCallArgsForwardingRegs(MI, DAG->getSDCallSiteInfo(Node));
+ MF.addCallArgsForwardingRegs(MI, DAG->getCallSiteInfo(Node));
if (DAG->getNoMergeSiteInfo(Node)) {
MI->setFlag(MachineInstr::MIFlag::NoMerge);
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index 10940478010e..1ba1fd65b8c9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -19,19 +19,15 @@
#include "ScheduleDAGSDNodes.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/LatencyPriorityQueue.h"
#include "llvm/CodeGen/ResourcePriorityQueue.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include <climits>
using namespace llvm;
#define DEBUG_TYPE "pre-RA-sched"
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index d5998d166d25..b3b8756ae9ba 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -24,9 +24,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/MemoryLocation.h"
-#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
@@ -55,7 +53,6 @@
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Type.h"
-#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Compiler.h"
@@ -144,11 +141,11 @@ bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) {
unsigned EltSize =
N->getValueType(0).getVectorElementType().getSizeInBits();
if (auto *Op0 = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
- SplatVal = Op0->getAPIntValue().truncOrSelf(EltSize);
+ SplatVal = Op0->getAPIntValue().trunc(EltSize);
return true;
}
if (auto *Op0 = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) {
- SplatVal = Op0->getValueAPF().bitcastToAPInt().truncOrSelf(EltSize);
+ SplatVal = Op0->getValueAPF().bitcastToAPInt().trunc(EltSize);
return true;
}
}
@@ -714,6 +711,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(LD->getMemoryVT().getRawBits());
ID.AddInteger(LD->getRawSubclassData());
ID.AddInteger(LD->getPointerInfo().getAddrSpace());
+ ID.AddInteger(LD->getMemOperand()->getFlags());
break;
}
case ISD::STORE: {
@@ -721,6 +719,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(ST->getMemoryVT().getRawBits());
ID.AddInteger(ST->getRawSubclassData());
ID.AddInteger(ST->getPointerInfo().getAddrSpace());
+ ID.AddInteger(ST->getMemOperand()->getFlags());
break;
}
case ISD::VP_LOAD: {
@@ -728,6 +727,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(ELD->getMemoryVT().getRawBits());
ID.AddInteger(ELD->getRawSubclassData());
ID.AddInteger(ELD->getPointerInfo().getAddrSpace());
+ ID.AddInteger(ELD->getMemOperand()->getFlags());
break;
}
case ISD::VP_STORE: {
@@ -735,6 +735,21 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(EST->getMemoryVT().getRawBits());
ID.AddInteger(EST->getRawSubclassData());
ID.AddInteger(EST->getPointerInfo().getAddrSpace());
+ ID.AddInteger(EST->getMemOperand()->getFlags());
+ break;
+ }
+ case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: {
+ const VPStridedLoadSDNode *SLD = cast<VPStridedLoadSDNode>(N);
+ ID.AddInteger(SLD->getMemoryVT().getRawBits());
+ ID.AddInteger(SLD->getRawSubclassData());
+ ID.AddInteger(SLD->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::EXPERIMENTAL_VP_STRIDED_STORE: {
+ const VPStridedStoreSDNode *SST = cast<VPStridedStoreSDNode>(N);
+ ID.AddInteger(SST->getMemoryVT().getRawBits());
+ ID.AddInteger(SST->getRawSubclassData());
+ ID.AddInteger(SST->getPointerInfo().getAddrSpace());
break;
}
case ISD::VP_GATHER: {
@@ -742,6 +757,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(EG->getMemoryVT().getRawBits());
ID.AddInteger(EG->getRawSubclassData());
ID.AddInteger(EG->getPointerInfo().getAddrSpace());
+ ID.AddInteger(EG->getMemOperand()->getFlags());
break;
}
case ISD::VP_SCATTER: {
@@ -749,6 +765,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(ES->getMemoryVT().getRawBits());
ID.AddInteger(ES->getRawSubclassData());
ID.AddInteger(ES->getPointerInfo().getAddrSpace());
+ ID.AddInteger(ES->getMemOperand()->getFlags());
break;
}
case ISD::MLOAD: {
@@ -756,6 +773,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(MLD->getMemoryVT().getRawBits());
ID.AddInteger(MLD->getRawSubclassData());
ID.AddInteger(MLD->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MLD->getMemOperand()->getFlags());
break;
}
case ISD::MSTORE: {
@@ -763,6 +781,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(MST->getMemoryVT().getRawBits());
ID.AddInteger(MST->getRawSubclassData());
ID.AddInteger(MST->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MST->getMemOperand()->getFlags());
break;
}
case ISD::MGATHER: {
@@ -770,6 +789,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(MG->getMemoryVT().getRawBits());
ID.AddInteger(MG->getRawSubclassData());
ID.AddInteger(MG->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MG->getMemOperand()->getFlags());
break;
}
case ISD::MSCATTER: {
@@ -777,6 +797,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(MS->getMemoryVT().getRawBits());
ID.AddInteger(MS->getRawSubclassData());
ID.AddInteger(MS->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MS->getMemOperand()->getFlags());
break;
}
case ISD::ATOMIC_CMP_SWAP:
@@ -799,11 +820,13 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(AT->getMemoryVT().getRawBits());
ID.AddInteger(AT->getRawSubclassData());
ID.AddInteger(AT->getPointerInfo().getAddrSpace());
+ ID.AddInteger(AT->getMemOperand()->getFlags());
break;
}
case ISD::PREFETCH: {
const MemSDNode *PF = cast<MemSDNode>(N);
ID.AddInteger(PF->getPointerInfo().getAddrSpace());
+ ID.AddInteger(PF->getMemOperand()->getFlags());
break;
}
case ISD::VECTOR_SHUFFLE: {
@@ -821,11 +844,18 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(BA->getTargetFlags());
break;
}
+ case ISD::AssertAlign:
+ ID.AddInteger(cast<AssertAlignSDNode>(N)->getAlign().value());
+ break;
} // end switch (N->getOpcode())
- // Target specific memory nodes could also have address spaces to check.
- if (N->isTargetMemoryOpcode())
- ID.AddInteger(cast<MemSDNode>(N)->getPointerInfo().getAddrSpace());
+ // Target specific memory nodes could also have address spaces and flags
+ // to check.
+ if (N->isTargetMemoryOpcode()) {
+ const MemSDNode *MN = cast<MemSDNode>(N);
+ ID.AddInteger(MN->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MN->getMemOperand()->getFlags());
+ }
}
/// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID
@@ -1395,6 +1425,12 @@ SDValue SelectionDAG::getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT) {
return getNode(ISD::XOR, DL, VT, Val, TrueValue);
}
+SDValue SelectionDAG::getVPLogicalNOT(const SDLoc &DL, SDValue Val,
+ SDValue Mask, SDValue EVL, EVT VT) {
+ SDValue TrueValue = getBoolConstant(true, DL, VT, VT);
+ return getNode(ISD::VP_XOR, DL, VT, Val, TrueValue, Mask, EVL);
+}
+
SDValue SelectionDAG::getBoolConstant(bool V, const SDLoc &DL, EVT VT,
EVT OpVT) {
if (!V)
@@ -2433,23 +2469,9 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits) {
if (VT.isScalableVector())
return SDValue();
- APInt DemandedElts = VT.isVector()
- ? APInt::getAllOnes(VT.getVectorNumElements())
- : APInt(1, 1);
- return GetDemandedBits(V, DemandedBits, DemandedElts);
-}
-
-/// See if the specified operand can be simplified with the knowledge that only
-/// the bits specified by DemandedBits are used in the elements specified by
-/// DemandedElts.
-/// TODO: really we should be making this into the DAG equivalent of
-/// SimplifyMultipleUseDemandedBits and not generate any new nodes.
-SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits,
- const APInt &DemandedElts) {
switch (V.getOpcode()) {
default:
- return TLI->SimplifyMultipleUseDemandedBits(V, DemandedBits, DemandedElts,
- *this);
+ return TLI->SimplifyMultipleUseDemandedBits(V, DemandedBits, *this);
case ISD::Constant: {
const APInt &CVal = cast<ConstantSDNode>(V)->getAPIntValue();
APInt NewVal = CVal & DemandedBits;
@@ -2469,8 +2491,8 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits,
if (Amt >= DemandedBits.getBitWidth())
break;
APInt SrcDemandedBits = DemandedBits << Amt;
- if (SDValue SimplifyLHS =
- GetDemandedBits(V.getOperand(0), SrcDemandedBits))
+ if (SDValue SimplifyLHS = TLI->SimplifyMultipleUseDemandedBits(
+ V.getOperand(0), SrcDemandedBits, *this))
return getNode(ISD::SRL, SDLoc(V), V.getValueType(), SimplifyLHS,
V.getOperand(1));
}
@@ -2503,6 +2525,14 @@ bool SelectionDAG::MaskedValueIsZero(SDValue V, const APInt &Mask,
return Mask.isSubsetOf(computeKnownBits(V, DemandedElts, Depth).Zero);
}
+/// MaskedVectorIsZero - Return true if 'Op' is known to be zero in
+/// DemandedElts. We use this predicate to simplify operations downstream.
+bool SelectionDAG::MaskedVectorIsZero(SDValue V, const APInt &DemandedElts,
+ unsigned Depth /* = 0 */) const {
+ APInt Mask = APInt::getAllOnes(V.getScalarValueSizeInBits());
+ return Mask.isSubsetOf(computeKnownBits(V, DemandedElts, Depth).Zero);
+}
+
/// MaskedValueIsAllOnes - Return true if '(Op & Mask) == Mask'.
bool SelectionDAG::MaskedValueIsAllOnes(SDValue V, const APInt &Mask,
unsigned Depth) const {
@@ -2587,9 +2617,9 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
return true;
}
case ISD::VECTOR_SHUFFLE: {
- // Check if this is a shuffle node doing a splat.
- // TODO: Do we need to handle shuffle(splat, undef, mask)?
- int SplatIndex = -1;
+ // Check if this is a shuffle node doing a splat or a shuffle of a splat.
+ APInt DemandedLHS = APInt::getNullValue(NumElts);
+ APInt DemandedRHS = APInt::getNullValue(NumElts);
ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(V)->getMask();
for (int i = 0; i != (int)NumElts; ++i) {
int M = Mask[i];
@@ -2599,11 +2629,30 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
}
if (!DemandedElts[i])
continue;
- if (0 <= SplatIndex && SplatIndex != M)
- return false;
- SplatIndex = M;
+ if (M < (int)NumElts)
+ DemandedLHS.setBit(M);
+ else
+ DemandedRHS.setBit(M - NumElts);
}
- return true;
+
+ // If we aren't demanding either op, assume there's no splat.
+ // If we are demanding both ops, assume there's no splat.
+ if ((DemandedLHS.isZero() && DemandedRHS.isZero()) ||
+ (!DemandedLHS.isZero() && !DemandedRHS.isZero()))
+ return false;
+
+ // See if the demanded elts of the source op is a splat or we only demand
+ // one element, which should always be a splat.
+ // TODO: Handle source ops splats with undefs.
+ auto CheckSplatSrc = [&](SDValue Src, const APInt &SrcElts) {
+ APInt SrcUndefs;
+ return (SrcElts.countPopulation() == 1) ||
+ (isSplatValue(Src, SrcElts, SrcUndefs, Depth + 1) &&
+ (SrcElts & SrcUndefs).isZero());
+ };
+ if (!DemandedLHS.isZero())
+ return CheckSplatSrc(V.getOperand(0), DemandedLHS);
+ return CheckSplatSrc(V.getOperand(1), DemandedRHS);
}
case ISD::EXTRACT_SUBVECTOR: {
// Offset the demanded elts by the subvector index.
@@ -2614,7 +2663,7 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
uint64_t Idx = V.getConstantOperandVal(1);
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
APInt UndefSrcElts;
- APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
+ APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
if (isSplatValue(Src, DemandedSrcElts, UndefSrcElts, Depth + 1)) {
UndefElts = UndefSrcElts.extractBits(NumElts, Idx);
return true;
@@ -2631,9 +2680,49 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
return false;
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
APInt UndefSrcElts;
- APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts);
+ APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
if (isSplatValue(Src, DemandedSrcElts, UndefSrcElts, Depth + 1)) {
- UndefElts = UndefSrcElts.truncOrSelf(NumElts);
+ UndefElts = UndefSrcElts.trunc(NumElts);
+ return true;
+ }
+ break;
+ }
+ case ISD::BITCAST: {
+ SDValue Src = V.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ unsigned SrcBitWidth = SrcVT.getScalarSizeInBits();
+ unsigned BitWidth = VT.getScalarSizeInBits();
+
+ // Ignore bitcasts from unsupported types.
+ // TODO: Add fp support?
+ if (!SrcVT.isVector() || !SrcVT.isInteger() || !VT.isInteger())
+ break;
+
+ // Bitcast 'small element' vector to 'large element' vector.
+ if ((BitWidth % SrcBitWidth) == 0) {
+ // See if each sub element is a splat.
+ unsigned Scale = BitWidth / SrcBitWidth;
+ unsigned NumSrcElts = SrcVT.getVectorNumElements();
+ APInt ScaledDemandedElts =
+ APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
+ for (unsigned I = 0; I != Scale; ++I) {
+ APInt SubUndefElts;
+ APInt SubDemandedElt = APInt::getOneBitSet(Scale, I);
+ APInt SubDemandedElts = APInt::getSplat(NumSrcElts, SubDemandedElt);
+ SubDemandedElts &= ScaledDemandedElts;
+ if (!isSplatValue(Src, SubDemandedElts, SubUndefElts, Depth + 1))
+ return false;
+
+ // Here we can't do "MatchAnyBits" operation merge for undef bits.
+ // Because some operation only use part value of the source.
+ // Take llvm.fshl.* for example:
+ // t1: v4i32 = Constant:i32<12>, undef:i32, Constant:i32<12>, undef:i32
+ // t2: v2i64 = bitcast t1
+ // t5: v2i64 = fshl t3, t4, t2
+ // We can not convert t2 to {i64 undef, i64 undef}
+ UndefElts |= APIntOps::ScaleBitMask(SubUndefElts, NumElts,
+ /*MatchAllBits=*/true);
+ }
return true;
}
break;
@@ -2978,7 +3067,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
break;
uint64_t Idx = Op.getConstantOperandVal(1);
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
- APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
+ APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
Known = computeKnownBits(Src, DemandedSrcElts, Depth + 1);
break;
}
@@ -3083,9 +3172,18 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
bool SelfMultiply = Op.getOperand(0) == Op.getOperand(1);
// TODO: SelfMultiply can be poison, but not undef.
- SelfMultiply &= isGuaranteedNotToBeUndefOrPoison(
- Op.getOperand(0), DemandedElts, false, Depth + 1);
+ if (SelfMultiply)
+ SelfMultiply &= isGuaranteedNotToBeUndefOrPoison(
+ Op.getOperand(0), DemandedElts, false, Depth + 1);
Known = KnownBits::mul(Known, Known2, SelfMultiply);
+
+ // If the multiplication is known not to overflow, the product of a number
+ // with itself is non-negative. Only do this if we didn't already computed
+ // the opposite value for the sign bit.
+ if (Op->getFlags().hasNoSignedWrap() &&
+ Op.getOperand(0) == Op.getOperand(1) &&
+ !Known.isNegative())
+ Known.makeNonNegative();
break;
}
case ISD::MULHU: {
@@ -3128,6 +3226,16 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known = KnownBits::udiv(Known, Known2);
break;
}
+ case ISD::AVGCEILU: {
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = Known.zext(BitWidth + 1);
+ Known2 = Known2.zext(BitWidth + 1);
+ KnownBits One = KnownBits::makeConstant(APInt(1, 1));
+ Known = KnownBits::computeForAddCarry(Known, Known2, One);
+ Known = Known.extractBits(BitWidth, 1);
+ break;
+ }
case ISD::SELECT:
case ISD::VSELECT:
Known = computeKnownBits(Op.getOperand(2), DemandedElts, Depth+1);
@@ -3330,7 +3438,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
}
case ISD::ZERO_EXTEND_VECTOR_INREG: {
EVT InVT = Op.getOperand(0).getValueType();
- APInt InDemandedElts = DemandedElts.zextOrSelf(InVT.getVectorNumElements());
+ APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements());
Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1);
Known = Known.zext(BitWidth);
break;
@@ -3342,7 +3450,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
}
case ISD::SIGN_EXTEND_VECTOR_INREG: {
EVT InVT = Op.getOperand(0).getValueType();
- APInt InDemandedElts = DemandedElts.zextOrSelf(InVT.getVectorNumElements());
+ APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements());
Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1);
// If the sign bit is known to be zero or one, then sext will extend
// it to the top bits, else it will just zext.
@@ -3358,7 +3466,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
}
case ISD::ANY_EXTEND_VECTOR_INREG: {
EVT InVT = Op.getOperand(0).getValueType();
- APInt InDemandedElts = DemandedElts.zextOrSelf(InVT.getVectorNumElements());
+ APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements());
Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1);
Known = Known.anyext(BitWidth);
break;
@@ -3605,6 +3713,19 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known = KnownBits::smax(Known, Known2);
else
Known = KnownBits::smin(Known, Known2);
+
+ // For SMAX, if CstLow is non-negative we know the result will be
+ // non-negative and thus all sign bits are 0.
+ // TODO: There's an equivalent of this for smin with negative constant for
+ // known ones.
+ if (IsMax && CstLow) {
+ const APInt &ValueLow = CstLow->getAPIntValue();
+ if (ValueLow.isNonNegative()) {
+ unsigned SignBits = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+ Known.Zero.setHighBits(std::min(SignBits, ValueLow.getNumSignBits()));
+ }
+ }
+
break;
}
case ISD::FP_TO_UINT_SAT: {
@@ -3905,7 +4026,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
case ISD::SIGN_EXTEND_VECTOR_INREG: {
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
- APInt DemandedSrcElts = DemandedElts.zextOrSelf(SrcVT.getVectorNumElements());
+ APInt DemandedSrcElts = DemandedElts.zext(SrcVT.getVectorNumElements());
Tmp = VTBits - SrcVT.getScalarSizeInBits();
return ComputeNumSignBits(Src, DemandedSrcElts, Depth+1) + Tmp;
}
@@ -4192,7 +4313,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
break;
uint64_t Idx = Op.getConstantOperandVal(1);
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
- APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
+ APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
return ComputeNumSignBits(Src, DemandedSrcElts, Depth + 1);
}
case ISD::CONCAT_VECTORS: {
@@ -4585,26 +4706,54 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
return false;
}
+// Only bits set in Mask must be negated, other bits may be arbitrary.
+SDValue llvm::getBitwiseNotOperand(SDValue V, SDValue Mask, bool AllowUndefs) {
+ if (isBitwiseNot(V, AllowUndefs))
+ return V.getOperand(0);
+
+ // Handle any_extend (not (truncate X)) pattern, where Mask only sets
+ // bits in the non-extended part.
+ ConstantSDNode *MaskC = isConstOrConstSplat(Mask);
+ if (!MaskC || V.getOpcode() != ISD::ANY_EXTEND)
+ return SDValue();
+ SDValue ExtArg = V.getOperand(0);
+ if (ExtArg.getScalarValueSizeInBits() >=
+ MaskC->getAPIntValue().getActiveBits() &&
+ isBitwiseNot(ExtArg, AllowUndefs) &&
+ ExtArg.getOperand(0).getOpcode() == ISD::TRUNCATE &&
+ ExtArg.getOperand(0).getOperand(0).getValueType() == V.getValueType())
+ return ExtArg.getOperand(0).getOperand(0);
+ return SDValue();
+}
+
+static bool haveNoCommonBitsSetCommutative(SDValue A, SDValue B) {
+ // Match masked merge pattern (X & ~M) op (Y & M)
+ // Including degenerate case (X & ~M) op M
+ auto MatchNoCommonBitsPattern = [&](SDValue Not, SDValue Mask,
+ SDValue Other) {
+ if (SDValue NotOperand =
+ getBitwiseNotOperand(Not, Mask, /* AllowUndefs */ true)) {
+ if (Other == NotOperand)
+ return true;
+ if (Other->getOpcode() == ISD::AND)
+ return NotOperand == Other->getOperand(0) ||
+ NotOperand == Other->getOperand(1);
+ }
+ return false;
+ };
+ if (A->getOpcode() == ISD::AND)
+ return MatchNoCommonBitsPattern(A->getOperand(0), A->getOperand(1), B) ||
+ MatchNoCommonBitsPattern(A->getOperand(1), A->getOperand(0), B);
+ return false;
+}
+
// FIXME: unify with llvm::haveNoCommonBitsSet.
bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {
assert(A.getValueType() == B.getValueType() &&
"Values must have the same type");
- // Match masked merge pattern (X & ~M) op (Y & M)
- if (A->getOpcode() == ISD::AND && B->getOpcode() == ISD::AND) {
- auto MatchNoCommonBitsPattern = [&](SDValue NotM, SDValue And) {
- if (isBitwiseNot(NotM, true)) {
- SDValue NotOperand = NotM->getOperand(0);
- return NotOperand == And->getOperand(0) ||
- NotOperand == And->getOperand(1);
- }
- return false;
- };
- if (MatchNoCommonBitsPattern(A->getOperand(0), B) ||
- MatchNoCommonBitsPattern(A->getOperand(1), B) ||
- MatchNoCommonBitsPattern(B->getOperand(0), A) ||
- MatchNoCommonBitsPattern(B->getOperand(1), A))
- return true;
- }
+ if (haveNoCommonBitsSetCommutative(A, B) ||
+ haveNoCommonBitsSetCommutative(B, A))
+ return true;
return KnownBits::haveNoCommonBitsSet(computeKnownBits(A),
computeKnownBits(B));
}
@@ -4833,9 +4982,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::CTTZ_ZERO_UNDEF:
return getConstant(Val.countTrailingZeros(), DL, VT, C->isTargetOpcode(),
C->isOpaque());
- case ISD::FP16_TO_FP: {
+ case ISD::FP16_TO_FP:
+ case ISD::BF16_TO_FP: {
bool Ignored;
- APFloat FPV(APFloat::IEEEhalf(),
+ APFloat FPV(Opcode == ISD::FP16_TO_FP ? APFloat::IEEEhalf()
+ : APFloat::BFloat(),
(Val.getBitWidth() == 16) ? Val : Val.trunc(16));
// This can return overflow, underflow, or inexact; we don't care.
@@ -4909,11 +5060,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT);
break;
- case ISD::FP_TO_FP16: {
+ case ISD::FP_TO_FP16:
+ case ISD::FP_TO_BF16: {
bool Ignored;
// This can return overflow, underflow, or inexact; we don't care.
// FIXME need to be more flexible about rounding mode.
- (void)V.convert(APFloat::IEEEhalf(),
+ (void)V.convert(Opcode == ISD::FP_TO_FP16 ? APFloat::IEEEhalf()
+ : APFloat::BFloat(),
APFloat::rmNearestTiesToEven, &Ignored);
return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT);
}
@@ -4965,6 +5118,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
break;
case ISD::FREEZE:
assert(VT == Operand.getValueType() && "Unexpected VT!");
+ if (isGuaranteedNotToBeUndefOrPoison(Operand))
+ return Operand;
break;
case ISD::TokenFactor:
case ISD::MERGE_VALUES:
@@ -5114,7 +5269,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
assert(VT.isInteger() && VT == Operand.getValueType() &&
"Invalid ABS!");
if (OpOpcode == ISD::UNDEF)
- return getUNDEF(VT);
+ return getConstant(0, DL, VT);
break;
case ISD::BSWAP:
assert(VT.isInteger() && VT == Operand.getValueType() &&
@@ -5182,6 +5337,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (Operand.getValueType().getScalarType() == MVT::i1)
return getNOT(DL, Operand, Operand.getValueType());
break;
+ case ISD::VECREDUCE_ADD:
+ if (Operand.getValueType().getScalarType() == MVT::i1)
+ return getNode(ISD::VECREDUCE_XOR, DL, VT, Operand);
+ break;
case ISD::VECREDUCE_SMIN:
case ISD::VECREDUCE_UMAX:
if (Operand.getValueType().getScalarType() == MVT::i1)
@@ -5273,6 +5432,30 @@ static llvm::Optional<APInt> FoldValue(unsigned Opcode, const APInt &C1,
APInt C2Ext = C2.zext(FullWidth);
return (C1Ext * C2Ext).extractBits(C1.getBitWidth(), C1.getBitWidth());
}
+ case ISD::AVGFLOORS: {
+ unsigned FullWidth = C1.getBitWidth() + 1;
+ APInt C1Ext = C1.sext(FullWidth);
+ APInt C2Ext = C2.sext(FullWidth);
+ return (C1Ext + C2Ext).extractBits(C1.getBitWidth(), 1);
+ }
+ case ISD::AVGFLOORU: {
+ unsigned FullWidth = C1.getBitWidth() + 1;
+ APInt C1Ext = C1.zext(FullWidth);
+ APInt C2Ext = C2.zext(FullWidth);
+ return (C1Ext + C2Ext).extractBits(C1.getBitWidth(), 1);
+ }
+ case ISD::AVGCEILS: {
+ unsigned FullWidth = C1.getBitWidth() + 1;
+ APInt C1Ext = C1.sext(FullWidth);
+ APInt C2Ext = C2.sext(FullWidth);
+ return (C1Ext + C2Ext + 1).extractBits(C1.getBitWidth(), 1);
+ }
+ case ISD::AVGCEILU: {
+ unsigned FullWidth = C1.getBitWidth() + 1;
+ APInt C1Ext = C1.zext(FullWidth);
+ APInt C2Ext = C2.zext(FullWidth);
+ return (C1Ext + C2Ext + 1).extractBits(C1.getBitWidth(), 1);
+ }
}
return llvm::None;
}
@@ -5355,7 +5538,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
if (!FoldAttempt)
return SDValue();
- SDValue Folded = getConstant(FoldAttempt.getValue(), DL, VT);
+ SDValue Folded = getConstant(*FoldAttempt, DL, VT);
assert((!Folded || !VT.isVector()) &&
"Can't fold vectors ops with scalar operands");
return Folded;
@@ -5400,7 +5583,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
Optional<APInt> Fold = FoldValue(Opcode, RawBits1[I], RawBits2[I]);
if (!Fold)
break;
- RawBits.push_back(Fold.getValue());
+ RawBits.push_back(*Fold);
}
if (RawBits.size() == NumElts.getFixedValue()) {
// We have constant folded, but we need to cast this again back to
@@ -5416,7 +5599,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
for (unsigned I = 0, E = DstBits.size(); I != E; ++I) {
if (DstUndefs[I])
continue;
- Ops[I] = getConstant(DstBits[I].sextOrSelf(BVEltBits), DL, BVEltVT);
+ Ops[I] = getConstant(DstBits[I].sext(BVEltBits), DL, BVEltVT);
}
return getBitcast(VT, getBuildVector(BVVT, DL, Ops));
}
@@ -5455,9 +5638,14 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
!llvm::all_of(Ops, IsScalarOrSameVectorSize))
return SDValue();
- // If we are comparing vectors, then the result needs to be a i1 boolean
- // that is then sign-extended back to the legal result type.
+ // If we are comparing vectors, then the result needs to be a i1 boolean that
+ // is then extended back to the legal result type depending on how booleans
+ // are represented.
EVT SVT = (Opcode == ISD::SETCC ? MVT::i1 : VT.getScalarType());
+ ISD::NodeType ExtendCode =
+ (Opcode == ISD::SETCC && SVT != VT.getScalarType())
+ ? TargetLowering::getExtendForContent(TLI->getBooleanContents(VT))
+ : ISD::SIGN_EXTEND;
// Find legal integer scalar type for constant promotion and
// ensure that its scalar size is at least as large as source.
@@ -5494,8 +5682,18 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
// Build vector (integer) scalar operands may need implicit
// truncation - do this before constant folding.
- if (ScalarVT.isInteger() && ScalarVT.bitsGT(InSVT))
+ if (ScalarVT.isInteger() && ScalarVT.bitsGT(InSVT)) {
+ // Don't create illegally-typed nodes unless they're constants or undef
+ // - if we fail to constant fold we can't guarantee the (dead) nodes
+ // we're creating will be cleaned up before being visited for
+ // legalization.
+ if (NewNodesMustHaveLegalTypes && !ScalarOp.isUndef() &&
+ !isa<ConstantSDNode>(ScalarOp) &&
+ TLI->getTypeAction(*getContext(), InSVT) !=
+ TargetLowering::TypeLegal)
+ return SDValue();
ScalarOp = getNode(ISD::TRUNCATE, DL, InSVT, ScalarOp);
+ }
ScalarOps.push_back(ScalarOp);
}
@@ -5505,7 +5703,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
// Legalize the (integer) scalar constant if necessary.
if (LegalSVT != SVT)
- ScalarResult = getNode(ISD::SIGN_EXTEND, DL, LegalSVT, ScalarResult);
+ ScalarResult = getNode(ExtendCode, DL, LegalSVT, ScalarResult);
// Scalar folding only succeeded if the result is a constant or UNDEF.
if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant &&
@@ -5629,20 +5827,34 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
return getNode(Opcode, DL, VT, N1, N2, Flags);
}
+void SelectionDAG::canonicalizeCommutativeBinop(unsigned Opcode, SDValue &N1,
+ SDValue &N2) const {
+ if (!TLI->isCommutativeBinOp(Opcode))
+ return;
+
+ // Canonicalize:
+ // binop(const, nonconst) -> binop(nonconst, const)
+ bool IsN1C = isConstantIntBuildVectorOrConstantInt(N1);
+ bool IsN2C = isConstantIntBuildVectorOrConstantInt(N2);
+ bool IsN1CFP = isConstantFPBuildVectorOrConstantFP(N1);
+ bool IsN2CFP = isConstantFPBuildVectorOrConstantFP(N2);
+ if ((IsN1C && !IsN2C) || (IsN1CFP && !IsN2CFP))
+ std::swap(N1, N2);
+
+ // Canonicalize:
+ // binop(splat(x), step_vector) -> binop(step_vector, splat(x))
+ else if (N1.getOpcode() == ISD::SPLAT_VECTOR &&
+ N2.getOpcode() == ISD::STEP_VECTOR)
+ std::swap(N1, N2);
+}
+
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
SDValue N1, SDValue N2, const SDNodeFlags Flags) {
assert(N1.getOpcode() != ISD::DELETED_NODE &&
N2.getOpcode() != ISD::DELETED_NODE &&
"Operand is DELETED_NODE!");
- // Canonicalize constant to RHS if commutative.
- if (TLI->isCommutativeBinOp(Opcode)) {
- bool IsN1C = isConstantIntBuildVectorOrConstantInt(N1);
- bool IsN2C = isConstantIntBuildVectorOrConstantInt(N2);
- bool IsN1CFP = isConstantFPBuildVectorOrConstantFP(N1);
- bool IsN2CFP = isConstantFPBuildVectorOrConstantFP(N2);
- if ((IsN1C && !IsN2C) || (IsN1CFP && !IsN2CFP))
- std::swap(N1, N2);
- }
+
+ canonicalizeCommutativeBinop(Opcode, N1, N2);
auto *N1C = dyn_cast<ConstantSDNode>(N1);
auto *N2C = dyn_cast<ConstantSDNode>(N2);
@@ -5946,6 +6158,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (N1Op2C->getZExtValue() == N2C->getZExtValue()) {
if (VT == N1.getOperand(1).getValueType())
return N1.getOperand(1);
+ if (VT.isFloatingPoint()) {
+ assert(VT.getSizeInBits() > N1.getOperand(1).getValueType().getSizeInBits());
+ return getFPExtendOrRound(N1.getOperand(1), DL, VT);
+ }
return getSExtOrTrunc(N1.getOperand(1), DL, VT);
}
return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2);
@@ -6043,9 +6259,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
std::swap(N1, N2);
} else {
switch (Opcode) {
- case ISD::SIGN_EXTEND_INREG:
case ISD::SUB:
return getUNDEF(VT); // fold op(undef, arg2) -> undef
+ case ISD::SIGN_EXTEND_INREG:
case ISD::UDIV:
case ISD::SDIV:
case ISD::UREM:
@@ -6534,7 +6750,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (!TRI->hasStackRealignment(MF))
while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
- NewAlign = NewAlign / 2;
+ NewAlign = NewAlign.previous();
if (NewAlign > Alignment) {
// Give the stack frame object a larger alignment if needed.
@@ -6782,17 +6998,18 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
/// \param Size Number of bytes to write.
/// \param Alignment Alignment of the destination in bytes.
/// \param isVol True if destination is volatile.
+/// \param AlwaysInline Makes sure no function call is generated.
/// \param DstPtrInfo IR information on the memory pointer.
/// \returns New head in the control flow, if lowering was successful, empty
/// SDValue otherwise.
///
/// The function tries to replace 'llvm.memset' intrinsic with several store
/// operations and value calculation code. This is usually profitable for small
-/// memory size.
+/// memory size or when the semantic requires inlining.
static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Dst, SDValue Src,
uint64_t Size, Align Alignment, bool isVol,
- MachinePointerInfo DstPtrInfo,
+ bool AlwaysInline, MachinePointerInfo DstPtrInfo,
const AAMDNodes &AAInfo) {
// Turn a memset of undef to nop.
// FIXME: We need to honor volatile even is Src is undef.
@@ -6812,8 +7029,10 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
DstAlignCanChange = true;
bool IsZeroVal =
isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isZero();
+ unsigned Limit = AlwaysInline ? ~0 : TLI.getMaxStoresPerMemset(OptSize);
+
if (!TLI.findOptimalMemOpLowering(
- MemOps, TLI.getMaxStoresPerMemset(OptSize),
+ MemOps, Limit,
MemOp::Set(Size, DstAlignCanChange, Alignment, IsZeroVal, isVol),
DstPtrInfo.getAddrSpace(), ~0u, MF.getFunction().getAttributes()))
return SDValue();
@@ -6964,10 +7183,9 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
}
SDValue SelectionDAG::getAtomicMemcpy(SDValue Chain, const SDLoc &dl,
- SDValue Dst, unsigned DstAlign,
- SDValue Src, unsigned SrcAlign,
- SDValue Size, Type *SizeTy,
- unsigned ElemSz, bool isTailCall,
+ SDValue Dst, SDValue Src, SDValue Size,
+ Type *SizeTy, unsigned ElemSz,
+ bool isTailCall,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) {
// Emit a library call.
@@ -7067,10 +7285,9 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
}
SDValue SelectionDAG::getAtomicMemmove(SDValue Chain, const SDLoc &dl,
- SDValue Dst, unsigned DstAlign,
- SDValue Src, unsigned SrcAlign,
- SDValue Size, Type *SizeTy,
- unsigned ElemSz, bool isTailCall,
+ SDValue Dst, SDValue Src, SDValue Size,
+ Type *SizeTy, unsigned ElemSz,
+ bool isTailCall,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) {
// Emit a library call.
@@ -7109,7 +7326,7 @@ SDValue SelectionDAG::getAtomicMemmove(SDValue Chain, const SDLoc &dl,
SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
SDValue Src, SDValue Size, Align Alignment,
- bool isVol, bool isTailCall,
+ bool isVol, bool AlwaysInline, bool isTailCall,
MachinePointerInfo DstPtrInfo,
const AAMDNodes &AAInfo) {
// Check to see if we should lower the memset to stores first.
@@ -7122,7 +7339,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
SDValue Result = getMemsetStores(*this, dl, Chain, Dst, Src,
ConstantSize->getZExtValue(), Alignment,
- isVol, DstPtrInfo, AAInfo);
+ isVol, false, DstPtrInfo, AAInfo);
if (Result.getNode())
return Result;
@@ -7132,45 +7349,75 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
// code. If the target chooses to do this, this is the next best.
if (TSI) {
SDValue Result = TSI->EmitTargetCodeForMemset(
- *this, dl, Chain, Dst, Src, Size, Alignment, isVol, DstPtrInfo);
+ *this, dl, Chain, Dst, Src, Size, Alignment, isVol, AlwaysInline, DstPtrInfo);
if (Result.getNode())
return Result;
}
+ // If we really need inline code and the target declined to provide it,
+ // use a (potentially long) sequence of loads and stores.
+ if (AlwaysInline) {
+ assert(ConstantSize && "AlwaysInline requires a constant size!");
+ SDValue Result = getMemsetStores(*this, dl, Chain, Dst, Src,
+ ConstantSize->getZExtValue(), Alignment,
+ isVol, true, DstPtrInfo, AAInfo);
+ assert(Result &&
+ "getMemsetStores must return a valid sequence when AlwaysInline");
+ return Result;
+ }
+
checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());
// Emit a library call.
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
- Entry.Node = Dst; Entry.Ty = Type::getInt8PtrTy(*getContext());
- Args.push_back(Entry);
- Entry.Node = Src;
- Entry.Ty = Src.getValueType().getTypeForEVT(*getContext());
- Args.push_back(Entry);
- Entry.Node = Size;
- Entry.Ty = getDataLayout().getIntPtrType(*getContext());
- Args.push_back(Entry);
+ auto &Ctx = *getContext();
+ const auto& DL = getDataLayout();
- // FIXME: pass in SDLoc
TargetLowering::CallLoweringInfo CLI(*this);
- CLI.setDebugLoc(dl)
- .setChain(Chain)
- .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET),
- Dst.getValueType().getTypeForEVT(*getContext()),
- getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET),
- TLI->getPointerTy(getDataLayout())),
- std::move(Args))
- .setDiscardResult()
- .setTailCall(isTailCall);
+ // FIXME: pass in SDLoc
+ CLI.setDebugLoc(dl).setChain(Chain);
+
+ ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src);
+ const bool SrcIsZero = ConstantSrc && ConstantSrc->isZero();
+ const char *BzeroName = getTargetLoweringInfo().getLibcallName(RTLIB::BZERO);
+
+ // Helper function to create an Entry from Node and Type.
+ const auto CreateEntry = [](SDValue Node, Type *Ty) {
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = Node;
+ Entry.Ty = Ty;
+ return Entry;
+ };
- std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
+ // If zeroing out and bzero is present, use it.
+ if (SrcIsZero && BzeroName) {
+ TargetLowering::ArgListTy Args;
+ Args.push_back(CreateEntry(Dst, Type::getInt8PtrTy(Ctx)));
+ Args.push_back(CreateEntry(Size, DL.getIntPtrType(Ctx)));
+ CLI.setLibCallee(
+ TLI->getLibcallCallingConv(RTLIB::BZERO), Type::getVoidTy(Ctx),
+ getExternalSymbol(BzeroName, TLI->getPointerTy(DL)), std::move(Args));
+ } else {
+ TargetLowering::ArgListTy Args;
+ Args.push_back(CreateEntry(Dst, Type::getInt8PtrTy(Ctx)));
+ Args.push_back(CreateEntry(Src, Src.getValueType().getTypeForEVT(Ctx)));
+ Args.push_back(CreateEntry(Size, DL.getIntPtrType(Ctx)));
+ CLI.setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET),
+ Dst.getValueType().getTypeForEVT(Ctx),
+ getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET),
+ TLI->getPointerTy(DL)),
+ std::move(Args));
+ }
+
+ CLI.setDiscardResult().setTailCall(isTailCall);
+
+ std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI);
return CallResult.second;
}
SDValue SelectionDAG::getAtomicMemset(SDValue Chain, const SDLoc &dl,
- SDValue Dst, unsigned DstAlign,
- SDValue Value, SDValue Size, Type *SizeTy,
- unsigned ElemSz, bool isTailCall,
+ SDValue Dst, SDValue Value, SDValue Size,
+ Type *SizeTy, unsigned ElemSz,
+ bool isTailCall,
MachinePointerInfo DstPtrInfo) {
// Emit a library call.
TargetLowering::ArgListTy Args;
@@ -7214,6 +7461,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
ID.AddInteger(MemVT.getRawBits());
AddNodeIDNode(ID, Opcode, VTList, Ops);
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
void* IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<AtomicSDNode>(E)->refineAlignment(MMO);
@@ -7326,6 +7574,7 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl,
ID.AddInteger(getSyntheticNodeSubclassData<MemIntrinsicSDNode>(
Opcode, dl.getIROrder(), VTList, MemVT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO);
@@ -7498,6 +7747,7 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
ID.AddInteger(getSyntheticNodeSubclassData<LoadSDNode>(
dl.getIROrder(), VTs, AM, ExtType, MemVT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<LoadSDNode>(E)->refineAlignment(MMO);
@@ -7599,6 +7849,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,
ID.AddInteger(getSyntheticNodeSubclassData<StoreSDNode>(
dl.getIROrder(), VTs, ISD::UNINDEXED, false, VT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<StoreSDNode>(E)->refineAlignment(MMO);
@@ -7665,6 +7916,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val,
ID.AddInteger(getSyntheticNodeSubclassData<StoreSDNode>(
dl.getIROrder(), VTs, ISD::UNINDEXED, true, SVT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<StoreSDNode>(E)->refineAlignment(MMO);
@@ -7693,6 +7945,7 @@ SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl,
ID.AddInteger(ST->getMemoryVT().getRawBits());
ID.AddInteger(ST->getRawSubclassData());
ID.AddInteger(ST->getPointerInfo().getAddrSpace());
+ ID.AddInteger(ST->getMemOperand()->getFlags());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))
return SDValue(E, 0);
@@ -7750,6 +8003,7 @@ SDValue SelectionDAG::getLoadVP(ISD::MemIndexedMode AM,
ID.AddInteger(getSyntheticNodeSubclassData<VPLoadSDNode>(
dl.getIROrder(), VTs, AM, ExtType, IsExpanding, MemVT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<VPLoadSDNode>(E)->refineAlignment(MMO);
@@ -7842,6 +8096,7 @@ SDValue SelectionDAG::getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val,
ID.AddInteger(getSyntheticNodeSubclassData<VPStoreSDNode>(
dl.getIROrder(), VTs, AM, IsTruncating, IsCompressing, MemVT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<VPStoreSDNode>(E)->refineAlignment(MMO);
@@ -7912,6 +8167,7 @@ SDValue SelectionDAG::getTruncStoreVP(SDValue Chain, const SDLoc &dl,
ID.AddInteger(getSyntheticNodeSubclassData<VPStoreSDNode>(
dl.getIROrder(), VTs, ISD::UNINDEXED, true, IsCompressing, SVT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<VPStoreSDNode>(E)->refineAlignment(MMO);
@@ -7942,6 +8198,7 @@ SDValue SelectionDAG::getIndexedStoreVP(SDValue OrigStore, const SDLoc &dl,
ID.AddInteger(ST->getMemoryVT().getRawBits());
ID.AddInteger(ST->getRawSubclassData());
ID.AddInteger(ST->getPointerInfo().getAddrSpace());
+ ID.AddInteger(ST->getMemOperand()->getFlags());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))
return SDValue(E, 0);
@@ -7958,6 +8215,259 @@ SDValue SelectionDAG::getIndexedStoreVP(SDValue OrigStore, const SDLoc &dl,
return V;
}
+SDValue SelectionDAG::getStridedLoadVP(
+ ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL,
+ SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask,
+ SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment,
+ MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo,
+ const MDNode *Ranges, bool IsExpanding) {
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+
+ MMOFlags |= MachineMemOperand::MOLoad;
+ assert((MMOFlags & MachineMemOperand::MOStore) == 0);
+ // If we don't have a PtrInfo, infer the trivial frame index case to simplify
+ // clients.
+ if (PtrInfo.V.isNull())
+ PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset);
+
+ uint64_t Size = MemoryLocation::UnknownSize;
+ MachineFunction &MF = getMachineFunction();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size,
+ Alignment, AAInfo, Ranges);
+ return getStridedLoadVP(AM, ExtType, VT, DL, Chain, Ptr, Offset, Stride, Mask,
+ EVL, MemVT, MMO, IsExpanding);
+}
+
+SDValue SelectionDAG::getStridedLoadVP(
+ ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL,
+ SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask,
+ SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding) {
+ bool Indexed = AM != ISD::UNINDEXED;
+ assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!");
+
+ SDValue Ops[] = {Chain, Ptr, Offset, Stride, Mask, EVL};
+ SDVTList VTs = Indexed ? getVTList(VT, Ptr.getValueType(), MVT::Other)
+ : getVTList(VT, MVT::Other);
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::EXPERIMENTAL_VP_STRIDED_LOAD, VTs, Ops);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<VPStridedLoadSDNode>(
+ DL.getIROrder(), VTs, AM, ExtType, IsExpanding, MemVT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
+ cast<VPStridedLoadSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+
+ auto *N =
+ newSDNode<VPStridedLoadSDNode>(DL.getIROrder(), DL.getDebugLoc(), VTs, AM,
+ ExtType, IsExpanding, MemVT, MMO);
+ createOperands(N, Ops);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getStridedLoadVP(
+ EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, SDValue Stride,
+ SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, MaybeAlign Alignment,
+ MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo,
+ const MDNode *Ranges, bool IsExpanding) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getStridedLoadVP(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, DL, Chain, Ptr,
+ Undef, Stride, Mask, EVL, PtrInfo, VT, Alignment,
+ MMOFlags, AAInfo, Ranges, IsExpanding);
+}
+
+SDValue SelectionDAG::getStridedLoadVP(EVT VT, const SDLoc &DL, SDValue Chain,
+ SDValue Ptr, SDValue Stride,
+ SDValue Mask, SDValue EVL,
+ MachineMemOperand *MMO,
+ bool IsExpanding) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getStridedLoadVP(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, DL, Chain, Ptr,
+ Undef, Stride, Mask, EVL, VT, MMO, IsExpanding);
+}
+
+SDValue SelectionDAG::getExtStridedLoadVP(
+ ISD::LoadExtType ExtType, const SDLoc &DL, EVT VT, SDValue Chain,
+ SDValue Ptr, SDValue Stride, SDValue Mask, SDValue EVL,
+ MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment,
+ MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo,
+ bool IsExpanding) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getStridedLoadVP(ISD::UNINDEXED, ExtType, VT, DL, Chain, Ptr, Undef,
+ Stride, Mask, EVL, PtrInfo, MemVT, Alignment,
+ MMOFlags, AAInfo, nullptr, IsExpanding);
+}
+
+SDValue SelectionDAG::getExtStridedLoadVP(
+ ISD::LoadExtType ExtType, const SDLoc &DL, EVT VT, SDValue Chain,
+ SDValue Ptr, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT,
+ MachineMemOperand *MMO, bool IsExpanding) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getStridedLoadVP(ISD::UNINDEXED, ExtType, VT, DL, Chain, Ptr, Undef,
+ Stride, Mask, EVL, MemVT, MMO, IsExpanding);
+}
+
+SDValue SelectionDAG::getIndexedStridedLoadVP(SDValue OrigLoad, const SDLoc &DL,
+ SDValue Base, SDValue Offset,
+ ISD::MemIndexedMode AM) {
+ auto *SLD = cast<VPStridedLoadSDNode>(OrigLoad);
+ assert(SLD->getOffset().isUndef() &&
+ "Strided load is already a indexed load!");
+ // Don't propagate the invariant or dereferenceable flags.
+ auto MMOFlags =
+ SLD->getMemOperand()->getFlags() &
+ ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
+ return getStridedLoadVP(
+ AM, SLD->getExtensionType(), OrigLoad.getValueType(), DL, SLD->getChain(),
+ Base, Offset, SLD->getStride(), SLD->getMask(), SLD->getVectorLength(),
+ SLD->getPointerInfo(), SLD->getMemoryVT(), SLD->getAlign(), MMOFlags,
+ SLD->getAAInfo(), nullptr, SLD->isExpandingLoad());
+}
+
+SDValue SelectionDAG::getStridedStoreVP(SDValue Chain, const SDLoc &DL,
+ SDValue Val, SDValue Ptr,
+ SDValue Offset, SDValue Stride,
+ SDValue Mask, SDValue EVL, EVT MemVT,
+ MachineMemOperand *MMO,
+ ISD::MemIndexedMode AM,
+ bool IsTruncating, bool IsCompressing) {
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+ bool Indexed = AM != ISD::UNINDEXED;
+ assert((Indexed || Offset.isUndef()) && "Unindexed vp_store with an offset!");
+ SDVTList VTs = Indexed ? getVTList(Ptr.getValueType(), MVT::Other)
+ : getVTList(MVT::Other);
+ SDValue Ops[] = {Chain, Val, Ptr, Offset, Stride, Mask, EVL};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::EXPERIMENTAL_VP_STRIDED_STORE, VTs, Ops);
+ ID.AddInteger(MemVT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<VPStridedStoreSDNode>(
+ DL.getIROrder(), VTs, AM, IsTruncating, IsCompressing, MemVT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
+ cast<VPStridedStoreSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N = newSDNode<VPStridedStoreSDNode>(DL.getIROrder(), DL.getDebugLoc(),
+ VTs, AM, IsTruncating,
+ IsCompressing, MemVT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getTruncStridedStoreVP(
+ SDValue Chain, const SDLoc &DL, SDValue Val, SDValue Ptr, SDValue Stride,
+ SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, EVT SVT,
+ Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo,
+ bool IsCompressing) {
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+
+ MMOFlags |= MachineMemOperand::MOStore;
+ assert((MMOFlags & MachineMemOperand::MOLoad) == 0);
+
+ if (PtrInfo.V.isNull())
+ PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr);
+
+ MachineFunction &MF = getMachineFunction();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ PtrInfo, MMOFlags, MemoryLocation::UnknownSize, Alignment, AAInfo);
+ return getTruncStridedStoreVP(Chain, DL, Val, Ptr, Stride, Mask, EVL, SVT,
+ MMO, IsCompressing);
+}
+
+SDValue SelectionDAG::getTruncStridedStoreVP(SDValue Chain, const SDLoc &DL,
+ SDValue Val, SDValue Ptr,
+ SDValue Stride, SDValue Mask,
+ SDValue EVL, EVT SVT,
+ MachineMemOperand *MMO,
+ bool IsCompressing) {
+ EVT VT = Val.getValueType();
+
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+ if (VT == SVT)
+ return getStridedStoreVP(Chain, DL, Val, Ptr, getUNDEF(Ptr.getValueType()),
+ Stride, Mask, EVL, VT, MMO, ISD::UNINDEXED,
+ /*IsTruncating*/ false, IsCompressing);
+
+ assert(SVT.getScalarType().bitsLT(VT.getScalarType()) &&
+ "Should only be a truncating store, not extending!");
+ assert(VT.isInteger() == SVT.isInteger() && "Can't do FP-INT conversion!");
+ assert(VT.isVector() == SVT.isVector() &&
+ "Cannot use trunc store to convert to or from a vector!");
+ assert((!VT.isVector() ||
+ VT.getVectorElementCount() == SVT.getVectorElementCount()) &&
+ "Cannot use trunc store to change the number of vector elements!");
+
+ SDVTList VTs = getVTList(MVT::Other);
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ SDValue Ops[] = {Chain, Val, Ptr, Undef, Stride, Mask, EVL};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::EXPERIMENTAL_VP_STRIDED_STORE, VTs, Ops);
+ ID.AddInteger(SVT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<VPStridedStoreSDNode>(
+ DL.getIROrder(), VTs, ISD::UNINDEXED, true, IsCompressing, SVT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
+ cast<VPStridedStoreSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N = newSDNode<VPStridedStoreSDNode>(DL.getIROrder(), DL.getDebugLoc(),
+ VTs, ISD::UNINDEXED, true,
+ IsCompressing, SVT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getIndexedStridedStoreVP(SDValue OrigStore,
+ const SDLoc &DL, SDValue Base,
+ SDValue Offset,
+ ISD::MemIndexedMode AM) {
+ auto *SST = cast<VPStridedStoreSDNode>(OrigStore);
+ assert(SST->getOffset().isUndef() &&
+ "Strided store is already an indexed store!");
+ SDVTList VTs = getVTList(Base.getValueType(), MVT::Other);
+ SDValue Ops[] = {
+ SST->getChain(), SST->getValue(), Base, Offset, SST->getStride(),
+ SST->getMask(), SST->getVectorLength()};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::EXPERIMENTAL_VP_STRIDED_STORE, VTs, Ops);
+ ID.AddInteger(SST->getMemoryVT().getRawBits());
+ ID.AddInteger(SST->getRawSubclassData());
+ ID.AddInteger(SST->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<VPStridedStoreSDNode>(
+ DL.getIROrder(), DL.getDebugLoc(), VTs, AM, SST->isTruncatingStore(),
+ SST->isCompressingStore(), SST->getMemoryVT(), SST->getMemOperand());
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
SDValue SelectionDAG::getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl,
ArrayRef<SDValue> Ops, MachineMemOperand *MMO,
ISD::MemIndexType IndexType) {
@@ -7969,6 +8479,7 @@ SDValue SelectionDAG::getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl,
ID.AddInteger(getSyntheticNodeSubclassData<VPGatherSDNode>(
dl.getIROrder(), VTs, VT, MMO, IndexType));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<VPGatherSDNode>(E)->refineAlignment(MMO);
@@ -8012,6 +8523,7 @@ SDValue SelectionDAG::getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl,
ID.AddInteger(getSyntheticNodeSubclassData<VPScatterSDNode>(
dl.getIROrder(), VTs, VT, MMO, IndexType));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<VPScatterSDNode>(E)->refineAlignment(MMO);
@@ -8061,6 +8573,7 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
ID.AddInteger(getSyntheticNodeSubclassData<MaskedLoadSDNode>(
dl.getIROrder(), VTs, AM, ExtTy, isExpanding, MemVT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<MaskedLoadSDNode>(E)->refineAlignment(MMO);
@@ -8108,6 +8621,7 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
ID.AddInteger(getSyntheticNodeSubclassData<MaskedStoreSDNode>(
dl.getIROrder(), VTs, AM, IsTruncating, IsCompressing, MemVT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<MaskedStoreSDNode>(E)->refineAlignment(MMO);
@@ -8149,13 +8663,13 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl,
ID.AddInteger(getSyntheticNodeSubclassData<MaskedGatherSDNode>(
dl.getIROrder(), VTs, MemVT, MMO, IndexType, ExtTy));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<MaskedGatherSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);
}
- IndexType = TLI->getCanonicalIndexType(IndexType, MemVT, Ops[4]);
auto *N = newSDNode<MaskedGatherSDNode>(dl.getIROrder(), dl.getDebugLoc(),
VTs, MemVT, MMO, IndexType, ExtTy);
createOperands(N, Ops);
@@ -8196,13 +8710,13 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl,
ID.AddInteger(getSyntheticNodeSubclassData<MaskedScatterSDNode>(
dl.getIROrder(), VTs, MemVT, MMO, IndexType, IsTrunc));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<MaskedScatterSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);
}
- IndexType = TLI->getCanonicalIndexType(IndexType, MemVT, Ops[4]);
auto *N = newSDNode<MaskedScatterSDNode>(dl.getIROrder(), dl.getDebugLoc(),
VTs, MemVT, MMO, IndexType, IsTrunc);
createOperands(N, Ops);
@@ -8400,6 +8914,41 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
assert(Ops[2].getValueType() == Ops[3].getValueType() &&
"LHS/RHS of comparison should match types!");
break;
+ case ISD::VP_ADD:
+ case ISD::VP_SUB:
+ // If it is VP_ADD/VP_SUB mask operation then turn it to VP_XOR
+ if (VT.isVector() && VT.getVectorElementType() == MVT::i1)
+ Opcode = ISD::VP_XOR;
+ break;
+ case ISD::VP_MUL:
+ // If it is VP_MUL mask operation then turn it to VP_AND
+ if (VT.isVector() && VT.getVectorElementType() == MVT::i1)
+ Opcode = ISD::VP_AND;
+ break;
+ case ISD::VP_REDUCE_MUL:
+ // If it is VP_REDUCE_MUL mask operation then turn it to VP_REDUCE_AND
+ if (VT == MVT::i1)
+ Opcode = ISD::VP_REDUCE_AND;
+ break;
+ case ISD::VP_REDUCE_ADD:
+ // If it is VP_REDUCE_ADD mask operation then turn it to VP_REDUCE_XOR
+ if (VT == MVT::i1)
+ Opcode = ISD::VP_REDUCE_XOR;
+ break;
+ case ISD::VP_REDUCE_SMAX:
+ case ISD::VP_REDUCE_UMIN:
+ // If it is VP_REDUCE_SMAX/VP_REDUCE_UMIN mask operation then turn it to
+ // VP_REDUCE_AND.
+ if (VT == MVT::i1)
+ Opcode = ISD::VP_REDUCE_AND;
+ break;
+ case ISD::VP_REDUCE_SMIN:
+ case ISD::VP_REDUCE_UMAX:
+ // If it is VP_REDUCE_SMIN/VP_REDUCE_UMAX mask operation then turn it to
+ // VP_REDUCE_OR.
+ if (VT == MVT::i1)
+ Opcode = ISD::VP_REDUCE_OR;
+ break;
}
// Memoize nodes.
@@ -8446,7 +8995,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
ArrayRef<SDValue> Ops, const SDNodeFlags Flags) {
if (VTList.NumVTs == 1)
- return getNode(Opcode, DL, VTList.VTs[0], Ops);
+ return getNode(Opcode, DL, VTList.VTs[0], Ops, Flags);
#ifndef NDEBUG
for (auto &Op : Ops)
@@ -9659,19 +10208,36 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){
namespace {
- /// UseMemo - This class is used by SelectionDAG::ReplaceAllUsesOfValuesWith
- /// to record information about a use.
- struct UseMemo {
- SDNode *User;
- unsigned Index;
- SDUse *Use;
- };
+/// UseMemo - This class is used by SelectionDAG::ReplaceAllUsesOfValuesWith
+/// to record information about a use.
+struct UseMemo {
+ SDNode *User;
+ unsigned Index;
+ SDUse *Use;
+};
- /// operator< - Sort Memos by User.
- bool operator<(const UseMemo &L, const UseMemo &R) {
- return (intptr_t)L.User < (intptr_t)R.User;
+/// operator< - Sort Memos by User.
+bool operator<(const UseMemo &L, const UseMemo &R) {
+ return (intptr_t)L.User < (intptr_t)R.User;
+}
+
+/// RAUOVWUpdateListener - Helper for ReplaceAllUsesOfValuesWith - When the node
+/// pointed to by a UseMemo is deleted, set the User to nullptr to indicate that
+/// the node already has been taken care of recursively.
+class RAUOVWUpdateListener : public SelectionDAG::DAGUpdateListener {
+ SmallVector<UseMemo, 4> &Uses;
+
+ void NodeDeleted(SDNode *N, SDNode *E) override {
+ for (UseMemo &Memo : Uses)
+ if (Memo.User == N)
+ Memo.User = nullptr;
}
+public:
+ RAUOVWUpdateListener(SelectionDAG &d, SmallVector<UseMemo, 4> &uses)
+ : SelectionDAG::DAGUpdateListener(d), Uses(uses) {}
+};
+
} // end anonymous namespace
bool SelectionDAG::calculateDivergence(SDNode *N) {
@@ -9763,12 +10329,19 @@ void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From,
// Sort the uses, so that all the uses from a given User are together.
llvm::sort(Uses);
+ RAUOVWUpdateListener Listener(*this, Uses);
for (unsigned UseIndex = 0, UseIndexEnd = Uses.size();
UseIndex != UseIndexEnd; ) {
// We know that this user uses some value of From. If it is the right
// value, update it.
SDNode *User = Uses[UseIndex].User;
+ // If the node has been deleted by recursive CSE updates when updating
+ // another node, then just skip this entry.
+ if (User == nullptr) {
+ ++UseIndex;
+ continue;
+ }
// This node is about to morph, remove its old self from the CSE maps.
RemoveNodeFromCSEMaps(User);
@@ -9965,6 +10538,11 @@ bool llvm::isOneConstant(SDValue V) {
return Const != nullptr && Const->isOne();
}
+bool llvm::isMinSignedConstant(SDValue V) {
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
+ return Const != nullptr && Const->isMinSignedValue();
+}
+
SDValue llvm::peekThroughBitcasts(SDValue V) {
while (V.getOpcode() == ISD::BITCAST)
V = V.getOperand(0);
@@ -10095,10 +10673,9 @@ bool llvm::isNullOrNullSplat(SDValue N, bool AllowUndefs) {
}
bool llvm::isOneOrOneSplat(SDValue N, bool AllowUndefs) {
- // TODO: may want to use peekThroughBitcast() here.
- unsigned BitWidth = N.getScalarValueSizeInBits();
- ConstantSDNode *C = isConstOrConstSplat(N, AllowUndefs);
- return C && C->isOne() && C->getValueSizeInBits(0) == BitWidth;
+ ConstantSDNode *C =
+ isConstOrConstSplat(N, AllowUndefs, /*AllowTruncation*/ true);
+ return C && C->isOne();
}
bool llvm::isAllOnesOrAllOnesSplat(SDValue N, bool AllowUndefs) {
@@ -10947,9 +11524,8 @@ bool BuildVectorSDNode::getConstantRawBits(
auto *CInt = dyn_cast<ConstantSDNode>(Op);
auto *CFP = dyn_cast<ConstantFPSDNode>(Op);
assert((CInt || CFP) && "Unknown constant");
- SrcBitElements[I] =
- CInt ? CInt->getAPIntValue().truncOrSelf(SrcEltSizeInBits)
- : CFP->getValueAPF().bitcastToAPInt();
+ SrcBitElements[I] = CInt ? CInt->getAPIntValue().trunc(SrcEltSizeInBits)
+ : CFP->getValueAPF().bitcastToAPInt();
}
// Recast to dst width.
@@ -11068,6 +11644,10 @@ SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) const {
if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
return N.getNode();
+ if ((N.getOpcode() == ISD::SPLAT_VECTOR) &&
+ isa<ConstantFPSDNode>(N.getOperand(0)))
+ return N.getNode();
+
return nullptr;
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index 6d8252046501..d236433f6fb4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -96,7 +96,7 @@ bool BaseIndexOffset::computeAliasing(const SDNode *Op0,
if (!(BasePtr0.getBase().getNode() && BasePtr1.getBase().getNode()))
return false;
int64_t PtrDiff;
- if (NumBytes0.hasValue() && NumBytes1.hasValue() &&
+ if (NumBytes0 && NumBytes1 &&
BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) {
// If the size of memory access is unknown, do not use it to analysis.
// One example of unknown size memory access is to load/store scalable
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 01230a36e744..37d05cdba76d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -24,25 +24,21 @@
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/EHPersonalities.h"
-#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryLocation.h"
-#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineInstrBundleIterator.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
@@ -89,7 +85,6 @@
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -102,10 +97,8 @@
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Utils/Local.h"
#include <cstddef>
-#include <cstring>
#include <iterator>
#include <limits>
-#include <numeric>
#include <tuple>
using namespace llvm;
@@ -224,10 +217,10 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
std::swap(Lo, Hi);
EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi);
- Hi =
- DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
- DAG.getConstant(Lo.getValueSizeInBits(), DL,
- TLI.getPointerTy(DAG.getDataLayout())));
+ Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
+ DAG.getConstant(Lo.getValueSizeInBits(), DL,
+ TLI.getShiftAmountTy(
+ TotalVT, DAG.getDataLayout())));
Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo);
Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi);
}
@@ -276,7 +269,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
// For a truncate, see if we have any information to
// indicate whether the truncated bits will always be
// zero or sign-extension.
- if (AssertOp.hasValue())
+ if (AssertOp)
Val = DAG.getNode(*AssertOp, DL, PartEVT, Val,
DAG.getValueType(ValueVT));
return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
@@ -330,7 +323,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
Optional<CallingConv::ID> CallConv) {
assert(ValueVT.isVector() && "Not a vector value");
assert(NumParts > 0 && "No parts to assemble!");
- const bool IsABIRegCopy = CallConv.hasValue();
+ const bool IsABIRegCopy = CallConv.has_value();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Val = Parts[0];
@@ -344,7 +337,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
if (IsABIRegCopy) {
NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
- *DAG.getContext(), CallConv.getValue(), ValueVT, IntermediateVT,
+ *DAG.getContext(), *CallConv, ValueVT, IntermediateVT,
NumIntermediates, RegisterVT);
} else {
NumRegs =
@@ -566,7 +559,7 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
unsigned RoundBits = RoundParts * PartBits;
unsigned OddParts = NumParts - RoundParts;
SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
- DAG.getShiftAmountConstant(RoundBits, ValueVT, DL, /*LegalTypes*/false));
+ DAG.getShiftAmountConstant(RoundBits, ValueVT, DL));
getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V,
CallConv);
@@ -654,7 +647,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
EVT ValueVT = Val.getValueType();
assert(ValueVT.isVector() && "Not a vector");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- const bool IsABIRegCopy = CallConv.hasValue();
+ const bool IsABIRegCopy = CallConv.has_value();
if (NumParts == 1) {
EVT PartEVT = PartVT;
@@ -733,7 +726,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
DestEltCnt = ElementCount::getFixed(NumIntermediates);
EVT BuiltVectorTy = EVT::getVectorVT(
- *DAG.getContext(), IntermediateVT.getScalarType(), DestEltCnt.getValue());
+ *DAG.getContext(), IntermediateVT.getScalarType(), *DestEltCnt);
if (ValueVT == BuiltVectorTy) {
// Nothing to do.
@@ -1236,7 +1229,8 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
// in the first place we should not be more successful here). Unless we
// have some test case that prove this to be correct we should avoid
// calling EmitFuncArgumentDbgValue here.
- if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, false, Val)) {
+ if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl,
+ FuncArgumentDbgValueKind::Value, Val)) {
LLVM_DEBUG(dbgs() << "Resolve dangling debug info [order="
<< DbgSDNodeOrder << "] for:\n " << *DI << "\n");
LLVM_DEBUG(dbgs() << " By mapping to:\n "; Val.dump());
@@ -1367,7 +1361,9 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values,
N = UnusedArgNodeMap[V];
if (N.getNode()) {
// Only emit func arg dbg value for non-variadic dbg.values for now.
- if (!IsVariadic && EmitFuncArgumentDbgValue(V, Var, Expr, dl, false, N))
+ if (!IsVariadic &&
+ EmitFuncArgumentDbgValue(V, Var, Expr, dl,
+ FuncArgumentDbgValueKind::Value, N))
return true;
if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) {
// Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can
@@ -1639,7 +1635,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
Ops.push_back(getValue(CV->getOperand(i)));
return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
- } else if (isa<ConstantAggregateZero>(C)) {
+ }
+
+ if (isa<ConstantAggregateZero>(C)) {
EVT EltVT =
TLI.getValueType(DAG.getDataLayout(), VecTy->getElementType());
@@ -1651,12 +1649,12 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
if (isa<ScalableVectorType>(VecTy))
return NodeMap[V] = DAG.getSplatVector(VT, getCurSDLoc(), Op);
- else {
- SmallVector<SDValue, 16> Ops;
- Ops.assign(cast<FixedVectorType>(VecTy)->getNumElements(), Op);
- return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
- }
+
+ SmallVector<SDValue, 16> Ops;
+ Ops.assign(cast<FixedVectorType>(VecTy)->getNumElements(), Op);
+ return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
}
+
llvm_unreachable("Unknown vector constant");
}
@@ -1680,11 +1678,12 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
}
- if (const MetadataAsValue *MD = dyn_cast<MetadataAsValue>(V)) {
+ if (const MetadataAsValue *MD = dyn_cast<MetadataAsValue>(V))
return DAG.getMDNode(cast<MDNode>(MD->getMetadata()));
- }
+
if (const auto *BB = dyn_cast<BasicBlock>(V))
return DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
+
llvm_unreachable("Can't get register for value!");
}
@@ -2748,10 +2747,10 @@ SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
SDValue Chain =
TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid,
None, CallOptions, getCurSDLoc()).second;
- // On PS4, the "return address" must still be within the calling function,
- // even if it's at the very end, so emit an explicit TRAP here.
+ // On PS4/PS5, the "return address" must still be within the calling
+ // function, even if it's at the very end, so emit an explicit TRAP here.
// Passing 'true' for doesNotReturn above won't generate the trap for us.
- if (TM.getTargetTriple().isPS4CPU())
+ if (TM.getTargetTriple().isPS())
Chain = DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, Chain);
// WebAssembly needs an unreachable instruction after a non-returning call,
// because the function return type can be different from __stack_chk_fail's
@@ -3150,26 +3149,12 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
EVT ShiftTy = DAG.getTargetLoweringInfo().getShiftAmountTy(
Op1.getValueType(), DAG.getDataLayout());
- // Coerce the shift amount to the right type if we can.
+ // Coerce the shift amount to the right type if we can. This exposes the
+ // truncate or zext to optimization early.
if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
- unsigned ShiftSize = ShiftTy.getSizeInBits();
- unsigned Op2Size = Op2.getValueSizeInBits();
- SDLoc DL = getCurSDLoc();
-
- // If the operand is smaller than the shift count type, promote it.
- if (ShiftSize > Op2Size)
- Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2);
-
- // If the operand is larger than the shift count type but the shift
- // count type has enough bits to represent any shift value, truncate
- // it now. This is a common case and it exposes the truncate to
- // optimization early.
- else if (ShiftSize >= Log2_32_Ceil(Op1.getValueSizeInBits()))
- Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2);
- // Otherwise we'll need to temporarily settle for some other convenient
- // type. Type legalization will make adjustments once the shiftee is split.
- else
- Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32);
+ assert(ShiftTy.getSizeInBits() >= Log2_32_Ceil(Op1.getValueSizeInBits()) &&
+ "Unexpected shift type");
+ Op2 = DAG.getZExtOrTrunc(Op2, getCurSDLoc(), ShiftTy);
}
bool nuw = false;
@@ -3816,13 +3801,8 @@ void SelectionDAGBuilder::visitInsertValue(const User &I) {
DAG.getVTList(AggValueVTs), Values));
}
-void SelectionDAGBuilder::visitExtractValue(const User &I) {
- ArrayRef<unsigned> Indices;
- if (const ExtractValueInst *EV = dyn_cast<ExtractValueInst>(&I))
- Indices = EV->getIndices();
- else
- Indices = cast<ConstantExpr>(&I)->getIndices();
-
+void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
+ ArrayRef<unsigned> Indices = I.getIndices();
const Value *Op0 = I.getOperand(0);
Type *AggTy = Op0->getType();
Type *ValTy = I.getType();
@@ -4376,7 +4356,8 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
// In all other cases the function returns 'false'.
static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index,
ISD::MemIndexType &IndexType, SDValue &Scale,
- SelectionDAGBuilder *SDB, const BasicBlock *CurBB) {
+ SelectionDAGBuilder *SDB, const BasicBlock *CurBB,
+ uint64_t ElemSize) {
SelectionDAG& DAG = SDB->DAG;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
const DataLayout &DL = DAG.getDataLayout();
@@ -4416,9 +4397,16 @@ static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index,
Base = SDB->getValue(BasePtr);
Index = SDB->getValue(IndexVal);
IndexType = ISD::SIGNED_SCALED;
- Scale = DAG.getTargetConstant(
- DL.getTypeAllocSize(GEP->getResultElementType()),
- SDB->getCurSDLoc(), TLI.getPointerTy(DL));
+
+ // MGATHER/MSCATTER are only required to support scaling by one or by the
+ // element size. Other scales may be produced using target-specific DAG
+ // combines.
+ uint64_t ScaleVal = DL.getTypeAllocSize(GEP->getResultElementType());
+ if (ScaleVal != ElemSize && ScaleVal != 1)
+ return false;
+
+ Scale =
+ DAG.getTargetConstant(ScaleVal, SDB->getCurSDLoc(), TLI.getPointerTy(DL));
return true;
}
@@ -4432,7 +4420,7 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
EVT VT = Src0.getValueType();
Align Alignment = cast<ConstantInt>(I.getArgOperand(2))
->getMaybeAlignValue()
- .getValueOr(DAG.getEVTAlign(VT.getScalarType()));
+ .value_or(DAG.getEVTAlign(VT.getScalarType()));
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Base;
@@ -4440,7 +4428,7 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
ISD::MemIndexType IndexType;
SDValue Scale;
bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this,
- I.getParent());
+ I.getParent(), VT.getScalarStoreSize());
unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace();
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
@@ -4451,7 +4439,7 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
if (!UniformBase) {
Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
Index = getValue(Ptr);
- IndexType = ISD::SIGNED_UNSCALED;
+ IndexType = ISD::SIGNED_SCALED;
Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
}
@@ -4538,7 +4526,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
Align Alignment = cast<ConstantInt>(I.getArgOperand(1))
->getMaybeAlignValue()
- .getValueOr(DAG.getEVTAlign(VT.getScalarType()));
+ .value_or(DAG.getEVTAlign(VT.getScalarType()));
const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
@@ -4548,7 +4536,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
ISD::MemIndexType IndexType;
SDValue Scale;
bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this,
- I.getParent());
+ I.getParent(), VT.getScalarStoreSize());
unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace();
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(AS), MachineMemOperand::MOLoad,
@@ -4559,7 +4547,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
if (!UniformBase) {
Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
Index = getValue(Ptr);
- IndexType = ISD::SIGNED_UNSCALED;
+ IndexType = ISD::SIGNED_SCALED;
Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
}
@@ -4678,7 +4666,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getType());
if (!TLI.supportsUnalignedAtomics() &&
- I.getAlignment() < MemVT.getSizeInBits() / 8)
+ I.getAlign().value() < MemVT.getSizeInBits() / 8)
report_fatal_error("Cannot generate unaligned atomic load");
auto Flags = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout());
@@ -4730,7 +4718,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
EVT MemVT =
TLI.getMemValueType(DAG.getDataLayout(), I.getValueOperand()->getType());
- if (I.getAlignment() < MemVT.getSizeInBits() / 8)
+ if (I.getAlign().value() < MemVT.getSizeInBits() / 8)
report_fatal_error("Cannot generate unaligned atomic store");
auto Flags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout());
@@ -4781,7 +4769,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
}
}
- // Info is set by getTgtMemInstrinsic
+ // Info is set by getTgtMemIntrinsic
TargetLowering::IntrinsicInfo Info;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I,
@@ -4895,7 +4883,8 @@ static SDValue GetExponent(SelectionDAG &DAG, SDValue Op,
DAG.getConstant(0x7f800000, dl, MVT::i32));
SDValue t1 = DAG.getNode(
ISD::SRL, dl, MVT::i32, t0,
- DAG.getConstant(23, dl, TLI.getPointerTy(DAG.getDataLayout())));
+ DAG.getConstant(23, dl,
+ TLI.getShiftAmountTy(MVT::i32, DAG.getDataLayout())));
SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
DAG.getConstant(127, dl, MVT::i32));
return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
@@ -4920,10 +4909,11 @@ static SDValue getLimitedPrecisionExp2(SDValue t0, const SDLoc &dl,
SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
// IntegerPartOfX <<= 23;
- IntegerPartOfX = DAG.getNode(
- ISD::SHL, dl, MVT::i32, IntegerPartOfX,
- DAG.getConstant(23, dl, DAG.getTargetLoweringInfo().getPointerTy(
- DAG.getDataLayout())));
+ IntegerPartOfX =
+ DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
+ DAG.getConstant(23, dl,
+ DAG.getTargetLoweringInfo().getShiftAmountTy(
+ MVT::i32, DAG.getDataLayout())));
SDValue TwoToFractionalPartOfX;
if (LimitFloatPrecision <= 6) {
@@ -5351,38 +5341,36 @@ static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS,
/// ExpandPowI - Expand a llvm.powi intrinsic.
static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS,
SelectionDAG &DAG) {
- // If RHS is a constant, we can expand this out to a multiplication tree,
- // otherwise we end up lowering to a call to __powidf2 (for example). When
- // optimizing for size, we only want to do this if the expansion would produce
- // a small number of multiplies, otherwise we do the full expansion.
+ // If RHS is a constant, we can expand this out to a multiplication tree if
+ // it's beneficial on the target, otherwise we end up lowering to a call to
+ // __powidf2 (for example).
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
- // Get the exponent as a positive value.
unsigned Val = RHSC->getSExtValue();
- if ((int)Val < 0) Val = -Val;
// powi(x, 0) -> 1.0
if (Val == 0)
return DAG.getConstantFP(1.0, DL, LHS.getValueType());
- bool OptForSize = DAG.shouldOptForSize();
- if (!OptForSize ||
- // If optimizing for size, don't insert too many multiplies.
- // This inserts up to 5 multiplies.
- countPopulation(Val) + Log2_32(Val) < 7) {
+ if (DAG.getTargetLoweringInfo().isBeneficialToExpandPowI(
+ Val, DAG.shouldOptForSize())) {
+ // Get the exponent as a positive value.
+ if ((int)Val < 0)
+ Val = -Val;
// We use the simple binary decomposition method to generate the multiply
// sequence. There are more optimal ways to do this (for example,
// powi(x,15) generates one more multiply than it should), but this has
// the benefit of being both really simple and much better than a libcall.
- SDValue Res; // Logically starts equal to 1.0
+ SDValue Res; // Logically starts equal to 1.0
SDValue CurSquare = LHS;
// TODO: Intrinsics should have fast-math-flags that propagate to these
// nodes.
while (Val) {
if (Val & 1) {
if (Res.getNode())
- Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare);
+ Res =
+ DAG.getNode(ISD::FMUL, DL, Res.getValueType(), Res, CurSquare);
else
- Res = CurSquare; // 1.0*CurSquare.
+ Res = CurSquare; // 1.0*CurSquare.
}
CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(),
@@ -5503,7 +5491,7 @@ getUnderlyingArgRegs(SmallVectorImpl<std::pair<unsigned, TypeSize>> &Regs,
/// appear for function arguments or in the prologue.
bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
const Value *V, DILocalVariable *Variable, DIExpression *Expr,
- DILocation *DL, bool IsDbgDeclare, const SDValue &N) {
+ DILocation *DL, FuncArgumentDbgValueKind Kind, const SDValue &N) {
const Argument *Arg = dyn_cast<Argument>(V);
if (!Arg)
return false;
@@ -5537,7 +5525,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
}
};
- if (!IsDbgDeclare) {
+ if (Kind == FuncArgumentDbgValueKind::Value) {
// ArgDbgValues are hoisted to the beginning of the entry block. So we
// should only emit as ArgDbgValue if the dbg.value intrinsic is found in
// the entry block.
@@ -5624,7 +5612,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
}
if (Reg) {
Op = MachineOperand::CreateReg(Reg, false);
- IsIndirect = IsDbgDeclare;
+ IsIndirect = Kind != FuncArgumentDbgValueKind::Value;
}
}
@@ -5672,7 +5660,8 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
continue;
}
MachineInstr *NewMI =
- MakeVRegDbgValue(RegAndSize.first, *FragmentExpr, IsDbgDeclare);
+ MakeVRegDbgValue(RegAndSize.first, *FragmentExpr,
+ Kind != FuncArgumentDbgValueKind::Value);
FuncInfo.ArgDbgValues.push_back(NewMI);
}
};
@@ -5690,7 +5679,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
}
Op = MachineOperand::CreateReg(VMI->second, false);
- IsIndirect = IsDbgDeclare;
+ IsIndirect = Kind != FuncArgumentDbgValueKind::Value;
} else if (ArgRegsAndSizes.size() > 1) {
// This was split due to the calling convention, and no virtual register
// mapping exists for the value.
@@ -5712,6 +5701,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
NewMI = BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), true, *Op,
Variable, Expr);
+ // Otherwise, use ArgDbgValues.
FuncInfo.ArgDbgValues.push_back(NewMI);
return true;
}
@@ -5817,16 +5807,18 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::vacopy: visitVACopy(I); return;
case Intrinsic::returnaddress:
setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl,
- TLI.getPointerTy(DAG.getDataLayout()),
+ TLI.getValueType(DAG.getDataLayout(), I.getType()),
getValue(I.getArgOperand(0))));
return;
case Intrinsic::addressofreturnaddress:
- setValue(&I, DAG.getNode(ISD::ADDROFRETURNADDR, sdl,
- TLI.getPointerTy(DAG.getDataLayout())));
+ setValue(&I,
+ DAG.getNode(ISD::ADDROFRETURNADDR, sdl,
+ TLI.getValueType(DAG.getDataLayout(), I.getType())));
return;
case Intrinsic::sponentry:
- setValue(&I, DAG.getNode(ISD::SPONENTRY, sdl,
- TLI.getFrameIndexTy(DAG.getDataLayout())));
+ setValue(&I,
+ DAG.getNode(ISD::SPONENTRY, sdl,
+ TLI.getValueType(DAG.getDataLayout(), I.getType())));
return;
case Intrinsic::frameaddress:
setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl,
@@ -5864,7 +5856,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// @llvm.memcpy defines 0 and 1 to both mean no alignment.
Align DstAlign = MCI.getDestAlign().valueOrOne();
Align SrcAlign = MCI.getSourceAlign().valueOrOne();
- Align Alignment = commonAlignment(DstAlign, SrcAlign);
+ Align Alignment = std::min(DstAlign, SrcAlign);
bool isVol = MCI.isVolatile();
bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
// FIXME: Support passing different dest/src alignments to the memcpy DAG
@@ -5887,7 +5879,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// @llvm.memcpy.inline defines 0 and 1 to both mean no alignment.
Align DstAlign = MCI.getDestAlign().valueOrOne();
Align SrcAlign = MCI.getSourceAlign().valueOrOne();
- Align Alignment = commonAlignment(DstAlign, SrcAlign);
+ Align Alignment = std::min(DstAlign, SrcAlign);
bool isVol = MCI.isVolatile();
bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
// FIXME: Support passing different dest/src alignments to the memcpy DAG
@@ -5910,10 +5902,28 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
bool isVol = MSI.isVolatile();
bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
SDValue Root = isVol ? getRoot() : getMemoryRoot();
- SDValue MS = DAG.getMemset(Root, sdl, Op1, Op2, Op3, Alignment, isVol, isTC,
+ SDValue MS = DAG.getMemset(
+ Root, sdl, Op1, Op2, Op3, Alignment, isVol, /* AlwaysInline */ false,
+ isTC, MachinePointerInfo(I.getArgOperand(0)), I.getAAMetadata());
+ updateDAGForMaybeTailCall(MS);
+ return;
+ }
+ case Intrinsic::memset_inline: {
+ const auto &MSII = cast<MemSetInlineInst>(I);
+ SDValue Dst = getValue(I.getArgOperand(0));
+ SDValue Value = getValue(I.getArgOperand(1));
+ SDValue Size = getValue(I.getArgOperand(2));
+ assert(isa<ConstantSDNode>(Size) && "memset_inline needs constant size");
+ // @llvm.memset defines 0 and 1 to both mean no alignment.
+ Align DstAlign = MSII.getDestAlign().valueOrOne();
+ bool isVol = MSII.isVolatile();
+ bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
+ SDValue Root = isVol ? getRoot() : getMemoryRoot();
+ SDValue MC = DAG.getMemset(Root, sdl, Dst, Value, Size, DstAlign, isVol,
+ /* AlwaysInline */ true, isTC,
MachinePointerInfo(I.getArgOperand(0)),
I.getAAMetadata());
- updateDAGForMaybeTailCall(MS);
+ updateDAGForMaybeTailCall(MC);
return;
}
case Intrinsic::memmove: {
@@ -5924,7 +5934,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// @llvm.memmove defines 0 and 1 to both mean no alignment.
Align DstAlign = MMI.getDestAlign().valueOrOne();
Align SrcAlign = MMI.getSourceAlign().valueOrOne();
- Align Alignment = commonAlignment(DstAlign, SrcAlign);
+ Align Alignment = std::min(DstAlign, SrcAlign);
bool isVol = MMI.isVolatile();
bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
// FIXME: Support passing different dest/src alignments to the memmove DAG
@@ -5943,15 +5953,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
SDValue Src = getValue(MI.getRawSource());
SDValue Length = getValue(MI.getLength());
- unsigned DstAlign = MI.getDestAlignment();
- unsigned SrcAlign = MI.getSourceAlignment();
Type *LengthTy = MI.getLength()->getType();
unsigned ElemSz = MI.getElementSizeInBytes();
bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
- SDValue MC = DAG.getAtomicMemcpy(getRoot(), sdl, Dst, DstAlign, Src,
- SrcAlign, Length, LengthTy, ElemSz, isTC,
- MachinePointerInfo(MI.getRawDest()),
- MachinePointerInfo(MI.getRawSource()));
+ SDValue MC =
+ DAG.getAtomicMemcpy(getRoot(), sdl, Dst, Src, Length, LengthTy, ElemSz,
+ isTC, MachinePointerInfo(MI.getRawDest()),
+ MachinePointerInfo(MI.getRawSource()));
updateDAGForMaybeTailCall(MC);
return;
}
@@ -5961,15 +5969,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
SDValue Src = getValue(MI.getRawSource());
SDValue Length = getValue(MI.getLength());
- unsigned DstAlign = MI.getDestAlignment();
- unsigned SrcAlign = MI.getSourceAlignment();
Type *LengthTy = MI.getLength()->getType();
unsigned ElemSz = MI.getElementSizeInBytes();
bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
- SDValue MC = DAG.getAtomicMemmove(getRoot(), sdl, Dst, DstAlign, Src,
- SrcAlign, Length, LengthTy, ElemSz, isTC,
- MachinePointerInfo(MI.getRawDest()),
- MachinePointerInfo(MI.getRawSource()));
+ SDValue MC =
+ DAG.getAtomicMemmove(getRoot(), sdl, Dst, Src, Length, LengthTy, ElemSz,
+ isTC, MachinePointerInfo(MI.getRawDest()),
+ MachinePointerInfo(MI.getRawSource()));
updateDAGForMaybeTailCall(MC);
return;
}
@@ -5979,13 +5985,12 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
SDValue Val = getValue(MI.getValue());
SDValue Length = getValue(MI.getLength());
- unsigned DstAlign = MI.getDestAlignment();
Type *LengthTy = MI.getLength()->getType();
unsigned ElemSz = MI.getElementSizeInBytes();
bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
- SDValue MC = DAG.getAtomicMemset(getRoot(), sdl, Dst, DstAlign, Val, Length,
- LengthTy, ElemSz, isTC,
- MachinePointerInfo(MI.getRawDest()));
+ SDValue MC =
+ DAG.getAtomicMemset(getRoot(), sdl, Dst, Val, Length, LengthTy, ElemSz,
+ isTC, MachinePointerInfo(MI.getRawDest()));
updateDAGForMaybeTailCall(MC);
return;
}
@@ -6085,7 +6090,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
} else if (isa<Argument>(Address)) {
// Address is an argument, so try to emit its dbg value using
// virtual register info from the FuncInfo.ValueMap.
- EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true, N);
+ EmitFuncArgumentDbgValue(Address, Variable, Expression, dl,
+ FuncArgumentDbgValueKind::Declare, N);
return;
} else {
SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
@@ -6095,8 +6101,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
} else {
// If Address is an argument then try to emit its dbg value using
// virtual register info from the FuncInfo.ValueMap.
- if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true,
- N)) {
+ if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl,
+ FuncArgumentDbgValueKind::Declare, N)) {
LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI
<< " (could not emit func-arg dbg_value)\n");
}
@@ -6162,8 +6168,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
return;
case Intrinsic::eh_sjlj_callsite: {
MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
- ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0));
- assert(CI && "Non-constant call site value in eh.sjlj.callsite!");
+ ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(0));
assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!");
MMI.setCurrentCallSite(CI->getZExtValue());
@@ -6343,6 +6348,29 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
#include "llvm/IR/VPIntrinsics.def"
visitVectorPredicationIntrinsic(cast<VPIntrinsic>(I));
return;
+ case Intrinsic::fptrunc_round: {
+ // Get the last argument, the metadata and convert it to an integer in the
+ // call
+ Metadata *MD = cast<MetadataAsValue>(I.getArgOperand(1))->getMetadata();
+ Optional<RoundingMode> RoundMode =
+ convertStrToRoundingMode(cast<MDString>(MD)->getString());
+
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+
+ // Propagate fast-math-flags from IR to node(s).
+ SDNodeFlags Flags;
+ Flags.copyFMF(*cast<FPMathOperator>(&I));
+ SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
+
+ SDValue Result;
+ Result = DAG.getNode(
+ ISD::FPTRUNC_ROUND, sdl, VT, getValue(I.getArgOperand(0)),
+ DAG.getTargetConstant((int)*RoundMode, sdl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ setValue(&I, Result);
+
+ return;
+ }
case Intrinsic::fmuladd: {
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
@@ -6397,6 +6425,31 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
setValue(&I, Res);
DAG.setRoot(Res.getValue(0));
return;
+ case Intrinsic::is_fpclass: {
+ const DataLayout DLayout = DAG.getDataLayout();
+ EVT DestVT = TLI.getValueType(DLayout, I.getType());
+ EVT ArgVT = TLI.getValueType(DLayout, I.getArgOperand(0)->getType());
+ unsigned Test = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
+ MachineFunction &MF = DAG.getMachineFunction();
+ const Function &F = MF.getFunction();
+ SDValue Op = getValue(I.getArgOperand(0));
+ SDNodeFlags Flags;
+ Flags.setNoFPExcept(
+ !F.getAttributes().hasFnAttr(llvm::Attribute::StrictFP));
+ // If ISD::IS_FPCLASS should be expanded, do it right now, because the
+ // expansion can use illegal types. Making expansion early allows
+ // legalizing these types prior to selection.
+ if (!TLI.isOperationLegalOrCustom(ISD::IS_FPCLASS, ArgVT)) {
+ SDValue Result = TLI.expandIS_FPCLASS(DestVT, Op, Test, Flags, sdl, DAG);
+ setValue(&I, Result);
+ return;
+ }
+
+ SDValue Check = DAG.getTargetConstant(Test, sdl, MVT::i32);
+ SDValue V = DAG.getNode(ISD::IS_FPCLASS, sdl, DestVT, {Op, Check}, Flags);
+ setValue(&I, V);
+ return;
+ }
case Intrinsic::pcmarker: {
SDValue Tmp = getValue(I.getArgOperand(0));
DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp));
@@ -6843,7 +6896,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
}
case Intrinsic::invariant_start:
// Discard region information.
- setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout())));
+ setValue(&I,
+ DAG.getUNDEF(TLI.getValueType(DAG.getDataLayout(), I.getType())));
return;
case Intrinsic::invariant_end:
// Discard region information.
@@ -7147,7 +7201,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
setValue(&I, SetCC);
return;
}
- case Intrinsic::experimental_vector_insert: {
+ case Intrinsic::vector_insert: {
SDValue Vec = getValue(I.getOperand(0));
SDValue SubVec = getValue(I.getOperand(1));
SDValue Index = getValue(I.getOperand(2));
@@ -7164,7 +7218,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
Index));
return;
}
- case Intrinsic::experimental_vector_extract: {
+ case Intrinsic::vector_extract: {
SDValue Vec = getValue(I.getOperand(0));
SDValue Index = getValue(I.getOperand(1));
EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
@@ -7242,7 +7296,7 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
};
SDVTList VTs = DAG.getVTList(ValueVTs);
- fp::ExceptionBehavior EB = FPI.getExceptionBehavior().getValue();
+ fp::ExceptionBehavior EB = *FPI.getExceptionBehavior();
SDNodeFlags Flags;
if (EB == fp::ExceptionBehavior::ebIgnore)
@@ -7307,13 +7361,14 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) {
Optional<unsigned> ResOPC;
switch (VPIntrin.getIntrinsicID()) {
-#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
-#define BEGIN_REGISTER_VP_SDNODE(VPSD, ...) ResOPC = ISD::VPSD;
-#define END_REGISTER_VP_INTRINSIC(VPID) break;
+#define HELPER_MAP_VPID_TO_VPSD(VPID, VPSD) \
+ case Intrinsic::VPID: \
+ ResOPC = ISD::VPSD; \
+ break;
#include "llvm/IR/VPIntrinsics.def"
}
- if (!ResOPC.hasValue())
+ if (!ResOPC)
llvm_unreachable(
"Inconsistency: no SDNode available for this VPIntrinsic!");
@@ -7324,7 +7379,7 @@ static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) {
: ISD::VP_REDUCE_FMUL;
}
- return ResOPC.getValue();
+ return *ResOPC;
}
void SelectionDAGBuilder::visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT,
@@ -7362,11 +7417,12 @@ void SelectionDAGBuilder::visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT,
SDValue Base, Index, Scale;
ISD::MemIndexType IndexType;
bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale,
- this, VPIntrin.getParent());
+ this, VPIntrin.getParent(),
+ VT.getScalarStoreSize());
if (!UniformBase) {
Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()));
Index = getValue(PtrOperand);
- IndexType = ISD::SIGNED_UNSCALED;
+ IndexType = ISD::SIGNED_SCALED;
Scale =
DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout()));
}
@@ -7418,11 +7474,12 @@ void SelectionDAGBuilder::visitVPStoreScatter(const VPIntrinsic &VPIntrin,
SDValue Base, Index, Scale;
ISD::MemIndexType IndexType;
bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale,
- this, VPIntrin.getParent());
+ this, VPIntrin.getParent(),
+ VT.getScalarStoreSize());
if (!UniformBase) {
Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()));
Index = getValue(PtrOperand);
- IndexType = ISD::SIGNED_UNSCALED;
+ IndexType = ISD::SIGNED_SCALED;
Scale =
DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout()));
}
@@ -7441,18 +7498,104 @@ void SelectionDAGBuilder::visitVPStoreScatter(const VPIntrinsic &VPIntrin,
setValue(&VPIntrin, ST);
}
+void SelectionDAGBuilder::visitVPStridedLoad(
+ const VPIntrinsic &VPIntrin, EVT VT, SmallVectorImpl<SDValue> &OpValues) {
+ SDLoc DL = getCurSDLoc();
+ Value *PtrOperand = VPIntrin.getArgOperand(0);
+ MaybeAlign Alignment = VPIntrin.getPointerAlignment();
+ if (!Alignment)
+ Alignment = DAG.getEVTAlign(VT.getScalarType());
+ AAMDNodes AAInfo = VPIntrin.getAAMetadata();
+ const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range);
+ MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo);
+ bool AddToChain = !AA || !AA->pointsToConstantMemory(ML);
+ SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
+ MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
+
+ SDValue LD = DAG.getStridedLoadVP(VT, DL, InChain, OpValues[0], OpValues[1],
+ OpValues[2], OpValues[3], MMO,
+ false /*IsExpanding*/);
+
+ if (AddToChain)
+ PendingLoads.push_back(LD.getValue(1));
+ setValue(&VPIntrin, LD);
+}
+
+void SelectionDAGBuilder::visitVPStridedStore(
+ const VPIntrinsic &VPIntrin, SmallVectorImpl<SDValue> &OpValues) {
+ SDLoc DL = getCurSDLoc();
+ Value *PtrOperand = VPIntrin.getArgOperand(1);
+ EVT VT = OpValues[0].getValueType();
+ MaybeAlign Alignment = VPIntrin.getPointerAlignment();
+ if (!Alignment)
+ Alignment = DAG.getEVTAlign(VT.getScalarType());
+ AAMDNodes AAInfo = VPIntrin.getAAMetadata();
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
+ MemoryLocation::UnknownSize, *Alignment, AAInfo);
+
+ SDValue ST = DAG.getStridedStoreVP(
+ getMemoryRoot(), DL, OpValues[0], OpValues[1],
+ DAG.getUNDEF(OpValues[1].getValueType()), OpValues[2], OpValues[3],
+ OpValues[4], VT, MMO, ISD::UNINDEXED, /*IsTruncating*/ false,
+ /*IsCompressing*/ false);
+
+ DAG.setRoot(ST);
+ setValue(&VPIntrin, ST);
+}
+
+void SelectionDAGBuilder::visitVPCmp(const VPCmpIntrinsic &VPIntrin) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDLoc DL = getCurSDLoc();
+
+ ISD::CondCode Condition;
+ CmpInst::Predicate CondCode = VPIntrin.getPredicate();
+ bool IsFP = VPIntrin.getOperand(0)->getType()->isFPOrFPVectorTy();
+ if (IsFP) {
+ // FIXME: Regular fcmps are FPMathOperators which may have fast-math (nnan)
+ // flags, but calls that don't return floating-point types can't be
+ // FPMathOperators, like vp.fcmp. This affects constrained fcmp too.
+ Condition = getFCmpCondCode(CondCode);
+ if (TM.Options.NoNaNsFPMath)
+ Condition = getFCmpCodeWithoutNaN(Condition);
+ } else {
+ Condition = getICmpCondCode(CondCode);
+ }
+
+ SDValue Op1 = getValue(VPIntrin.getOperand(0));
+ SDValue Op2 = getValue(VPIntrin.getOperand(1));
+ // #2 is the condition code
+ SDValue MaskOp = getValue(VPIntrin.getOperand(3));
+ SDValue EVL = getValue(VPIntrin.getOperand(4));
+ MVT EVLParamVT = TLI.getVPExplicitVectorLengthTy();
+ assert(EVLParamVT.isScalarInteger() && EVLParamVT.bitsGE(MVT::i32) &&
+ "Unexpected target EVL type");
+ EVL = DAG.getNode(ISD::ZERO_EXTEND, DL, EVLParamVT, EVL);
+
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ VPIntrin.getType());
+ setValue(&VPIntrin,
+ DAG.getSetCCVP(DL, DestVT, Op1, Op2, Condition, MaskOp, EVL));
+}
+
void SelectionDAGBuilder::visitVectorPredicationIntrinsic(
const VPIntrinsic &VPIntrin) {
SDLoc DL = getCurSDLoc();
unsigned Opcode = getISDForVPIntrinsic(VPIntrin);
+ auto IID = VPIntrin.getIntrinsicID();
+
+ if (const auto *CmpI = dyn_cast<VPCmpIntrinsic>(&VPIntrin))
+ return visitVPCmp(*CmpI);
+
SmallVector<EVT, 4> ValueVTs;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
ComputeValueVTs(TLI, DAG.getDataLayout(), VPIntrin.getType(), ValueVTs);
SDVTList VTs = DAG.getVTList(ValueVTs);
- auto EVLParamPos =
- VPIntrinsic::getVectorLengthParamPos(VPIntrin.getIntrinsicID());
+ auto EVLParamPos = VPIntrinsic::getVectorLengthParamPos(IID);
MVT EVLParamVT = TLI.getVPExplicitVectorLengthTy();
assert(EVLParamVT.isScalarInteger() && EVLParamVT.bitsGE(MVT::i32) &&
@@ -7469,7 +7612,10 @@ void SelectionDAGBuilder::visitVectorPredicationIntrinsic(
switch (Opcode) {
default: {
- SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues);
+ SDNodeFlags SDFlags;
+ if (auto *FPMO = dyn_cast<FPMathOperator>(&VPIntrin))
+ SDFlags.copyFMF(*FPMO);
+ SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues, SDFlags);
setValue(&VPIntrin, Result);
break;
}
@@ -7478,10 +7624,16 @@ void SelectionDAGBuilder::visitVectorPredicationIntrinsic(
visitVPLoadGather(VPIntrin, ValueVTs[0], OpValues,
Opcode == ISD::VP_GATHER);
break;
+ case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
+ visitVPStridedLoad(VPIntrin, ValueVTs[0], OpValues);
+ break;
case ISD::VP_STORE:
case ISD::VP_SCATTER:
visitVPStoreScatter(VPIntrin, OpValues, Opcode == ISD::VP_SCATTER);
break;
+ case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
+ visitVPStridedStore(VPIntrin, OpValues);
+ break;
}
}
@@ -7756,7 +7908,7 @@ void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I,
bool SelectionDAGBuilder::visitMemCmpBCmpCall(const CallInst &I) {
const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1);
const Value *Size = I.getArgOperand(2);
- const ConstantInt *CSize = dyn_cast<ConstantInt>(Size);
+ const ConstantSDNode *CSize = dyn_cast<ConstantSDNode>(getValue(Size));
if (CSize && CSize->getZExtValue() == 0) {
EVT CallVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType(), true);
@@ -8277,7 +8429,7 @@ public:
// accessed type.
if (isIndirect) {
OpTy = ParamElemType;
- assert(OpTy && "Indirect opernad must have elementtype attribute");
+ assert(OpTy && "Indirect operand must have elementtype attribute");
}
// Look for vector wrapped in a struct. e.g. { <16 x i8> }.
@@ -8398,8 +8550,9 @@ getRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
SmallVector<unsigned, 4> Regs;
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
- // No work to do for memory operations.
- if (OpInfo.ConstraintType == TargetLowering::C_Memory)
+ // No work to do for memory/address operands.
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
+ OpInfo.ConstraintType == TargetLowering::C_Address)
return None;
// If this is a constraint for a single physreg, or a constraint for a
@@ -8579,7 +8732,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
if (OpInfo.hasArg()) {
OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
- Type *ParamElemTy = Call.getAttributes().getParamElementType(ArgNo);
+ Type *ParamElemTy = Call.getParamElementType(ArgNo);
EVT VT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI,
DAG.getDataLayout(), ParamElemTy);
OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
@@ -8657,8 +8810,9 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
// Compute the constraint code and ConstraintType to use.
TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG);
- if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
- OpInfo.Type == InlineAsm::isClobber)
+ if ((OpInfo.ConstraintType == TargetLowering::C_Memory &&
+ OpInfo.Type == InlineAsm::isClobber) ||
+ OpInfo.ConstraintType == TargetLowering::C_Address)
continue;
// If this is a memory input, and if the operand is not indirect, do what we
@@ -8708,7 +8862,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
: OpInfo;
const auto RegError =
getRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo);
- if (RegError.hasValue()) {
+ if (RegError) {
const MachineFunction &MF = DAG.getMachineFunction();
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
const char *RegName = TRI.getName(RegError.getValue());
@@ -8733,6 +8887,10 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
}
return false;
};
+ assert((OpInfo.ConstraintType != TargetLowering::C_Address ||
+ (OpInfo.Type == InlineAsm::isInput &&
+ !OpInfo.isMatchingInputConstraint())) &&
+ "Only address as input operand is allowed.");
switch (OpInfo.Type) {
case InlineAsm::isOutput:
@@ -8865,8 +9023,11 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
break;
}
- if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
- assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
+ OpInfo.ConstraintType == TargetLowering::C_Address) {
+ assert((OpInfo.isIndirect ||
+ OpInfo.ConstraintType != TargetLowering::C_Memory) &&
+ "Operand must be indirect to be a mem!");
assert(InOperandVal.getValueType() ==
TLI.getPointerTy(DAG.getDataLayout()) &&
"Memory operands expect pointer values");
@@ -9004,6 +9165,8 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
break;
case TargetLowering::C_Memory:
break; // Already handled.
+ case TargetLowering::C_Address:
+ break; // Silence warning.
case TargetLowering::C_Unknown:
assert(false && "Unexpected unknown constraint");
}
@@ -9950,8 +10113,9 @@ SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
llvm_unreachable("LowerOperation not implemented for this target!");
}
-void
-SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
+void SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V,
+ unsigned Reg,
+ ISD::NodeType ExtendType) {
SDValue Op = getNonRegisterValue(V);
assert((Op.getOpcode() != ISD::CopyFromReg ||
cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
@@ -9966,10 +10130,11 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
None); // This is not an ABI copy.
SDValue Chain = DAG.getEntryNode();
- ISD::NodeType ExtendType = ISD::ANY_EXTEND;
- auto PreferredExtendIt = FuncInfo.PreferredExtendType.find(V);
- if (PreferredExtendIt != FuncInfo.PreferredExtendType.end())
- ExtendType = PreferredExtendIt->second;
+ if (ExtendType == ISD::ANY_EXTEND) {
+ auto PreferredExtendIt = FuncInfo.PreferredExtendType.find(V);
+ if (PreferredExtendIt != FuncInfo.PreferredExtendType.end())
+ ExtendType = PreferredExtendIt->second;
+ }
RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V, ExtendType);
PendingExports.push_back(Chain);
}
@@ -10542,6 +10707,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
/// the end.
void
SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
const Instruction *TI = LLVMBB->getTerminator();
SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
@@ -10579,7 +10745,13 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
unsigned &RegOut = ConstantsOut[C];
if (RegOut == 0) {
RegOut = FuncInfo.CreateRegs(C);
- CopyValueToVirtualRegister(C, RegOut);
+ // We need to zero/sign extend ConstantInt phi operands to match
+ // assumptions in FunctionLoweringInfo::ComputePHILiveOutRegInfo.
+ ISD::NodeType ExtendType = ISD::ANY_EXTEND;
+ if (auto *CI = dyn_cast<ConstantInt>(C))
+ ExtendType = TLI.signExtendConstant(CI) ? ISD::SIGN_EXTEND
+ : ISD::ZERO_EXTEND;
+ CopyValueToVirtualRegister(C, RegOut, ExtendType);
}
Reg = RegOut;
} else {
@@ -10599,7 +10771,6 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
// Remember that this register needs to added to the machine PHI node as
// the input for this MBB.
SmallVector<EVT, 4> ValueVTs;
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
ComputeValueVTs(TLI, DAG.getDataLayout(), PN.getType(), ValueVTs);
for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
EVT VT = ValueVTs[vti];
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index ea48042a5dcf..72cca3d9b001 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -284,7 +284,8 @@ public:
return CurInst ? CurInst->getDebugLoc() : DebugLoc();
}
- void CopyValueToVirtualRegister(const Value *V, unsigned Reg);
+ void CopyValueToVirtualRegister(const Value *V, unsigned Reg,
+ ISD::NodeType ExtendType = ISD::ANY_EXTEND);
void visit(const Instruction &I);
@@ -527,7 +528,7 @@ private:
void visitInsertElement(const User &I);
void visitShuffleVector(const User &I);
- void visitExtractValue(const User &I);
+ void visitExtractValue(const ExtractValueInst &I);
void visitInsertValue(const User &I);
void visitLandingPad(const LandingPadInst &LP);
@@ -570,6 +571,11 @@ private:
SmallVector<SDValue, 7> &OpValues, bool IsGather);
void visitVPStoreScatter(const VPIntrinsic &VPIntrin,
SmallVector<SDValue, 7> &OpValues, bool IsScatter);
+ void visitVPStridedLoad(const VPIntrinsic &VPIntrin, EVT VT,
+ SmallVectorImpl<SDValue> &OpValues);
+ void visitVPStridedStore(const VPIntrinsic &VPIntrin,
+ SmallVectorImpl<SDValue> &OpValues);
+ void visitVPCmp(const VPCmpIntrinsic &VPIntrin);
void visitVectorPredicationIntrinsic(const VPIntrinsic &VPIntrin);
void visitVAStart(const CallInst &I);
@@ -602,12 +608,22 @@ private:
void emitInlineAsmError(const CallBase &Call, const Twine &Message);
+ /// An enum that states to emit func argument dbg value the kind of intrinsic
+ /// it originally had. This controls the internal behavior of
+ /// EmitFuncArgumentDbgValue.
+ enum class FuncArgumentDbgValueKind {
+ Value, // This was originally a llvm.dbg.value.
+ Addr, // This was originally a llvm.dbg.addr.
+ Declare, // This was originally a llvm.dbg.declare.
+ };
+
/// If V is an function argument then create corresponding DBG_VALUE machine
/// instruction for it now. At the end of instruction selection, they will be
/// inserted to the entry BB.
bool EmitFuncArgumentDbgValue(const Value *V, DILocalVariable *Variable,
DIExpression *Expr, DILocation *DL,
- bool IsDbgDeclare, const SDValue &N);
+ FuncArgumentDbgValueKind Kind,
+ const SDValue &N);
/// Return the next block after MBB, or nullptr if there is none.
MachineBasicBlock *NextBlock(MachineBasicBlock *MBB);
@@ -673,9 +689,7 @@ struct RegsForValue {
const DataLayout &DL, unsigned Reg, Type *Ty,
Optional<CallingConv::ID> CC);
- bool isABIMangled() const {
- return CallConv.hasValue();
- }
+ bool isABIMangled() const { return CallConv.has_value(); }
/// Add the specified values to this one.
void append(const RegsForValue &RHS) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 77e9e53668f9..bbfc6e5ef64f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -10,9 +10,9 @@
//
//===----------------------------------------------------------------------===//
+#include "SDNodeDbgValue.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/None.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/ISDOpcodes.h"
@@ -45,7 +45,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "SDNodeDbgValue.h"
#include <cstdint>
#include <iterator>
@@ -231,6 +230,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::MUL: return "mul";
case ISD::MULHU: return "mulhu";
case ISD::MULHS: return "mulhs";
+ case ISD::AVGFLOORU: return "avgflooru";
+ case ISD::AVGFLOORS: return "avgfloors";
+ case ISD::AVGCEILU: return "avgceilu";
+ case ISD::AVGCEILS: return "avgceils";
case ISD::ABDS: return "abds";
case ISD::ABDU: return "abdu";
case ISD::SDIV: return "sdiv";
@@ -267,6 +270,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FCOPYSIGN: return "fcopysign";
case ISD::FGETSIGN: return "fgetsign";
case ISD::FCANONICALIZE: return "fcanonicalize";
+ case ISD::IS_FPCLASS: return "is_fpclass";
case ISD::FPOW: return "fpow";
case ISD::STRICT_FPOW: return "strict_fpow";
case ISD::SMIN: return "smin";
@@ -361,6 +365,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::STRICT_FP16_TO_FP: return "strict_fp16_to_fp";
case ISD::FP_TO_FP16: return "fp_to_fp16";
case ISD::STRICT_FP_TO_FP16: return "strict_fp_to_fp16";
+ case ISD::BF16_TO_FP: return "bf16_to_fp";
+ case ISD::FP_TO_BF16: return "fp_to_bf16";
case ISD::LROUND: return "lround";
case ISD::STRICT_LROUND: return "strict_lround";
case ISD::LLROUND: return "llround";
@@ -814,6 +820,8 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
} else if (const LifetimeSDNode *LN = dyn_cast<LifetimeSDNode>(this)) {
if (LN->hasOffset())
OS << "<" << LN->getOffset() << " to " << LN->getOffset() + LN->getSize() << ">";
+ } else if (const auto *AA = dyn_cast<AssertAlignSDNode>(this)) {
+ OS << '<' << AA->getAlign().value() << '>';
}
if (VerboseDAGDumping) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 3c786904620a..2b63359c2b1b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -15,11 +15,9 @@
#include "SelectionDAGBuilder.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/None.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
@@ -29,6 +27,7 @@
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -69,7 +68,6 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -82,7 +80,6 @@
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/Casting.h"
@@ -370,8 +367,8 @@ static void SplitCriticalSideEffectEdges(Function &Fn, DominatorTree *DT,
// PHI.
for (BasicBlock::iterator I = BB.begin(); (PN = dyn_cast<PHINode>(I)); ++I)
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- ConstantExpr *CE = dyn_cast<ConstantExpr>(PN->getIncomingValue(i));
- if (!CE || !CE->canTrap()) continue;
+ Constant *C = dyn_cast<Constant>(PN->getIncomingValue(i));
+ if (!C || !C->canTrap()) continue;
// The only case we have to worry about is when the edge is critical.
// Since this block has a PHI Node, we assume it has multiple input
@@ -425,6 +422,11 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
const Function &Fn = mf.getFunction();
MF = &mf;
+ // Decide what flavour of variable location debug-info will be used, before
+ // we change the optimisation level.
+ UseInstrRefDebugInfo = mf.useDebugInstrRef();
+ CurDAG->useInstrRefDebugInfo(UseInstrRefDebugInfo);
+
// Reset the target options before resetting the optimization
// level below.
// FIXME: This is a horrible hack and should be processed via
@@ -654,7 +656,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// For debug-info, in instruction referencing mode, we need to perform some
// post-isel maintenence.
- MF->finalizeDebugInstrRefs();
+ if (UseInstrRefDebugInfo)
+ MF->finalizeDebugInstrRefs();
// Determine if there are any calls in this machine function.
MachineFrameInfo &MFI = MF->getFrameInfo();
@@ -703,6 +706,7 @@ static void reportFastISelFailure(MachineFunction &MF,
report_fatal_error(Twine(R.getMsg()));
ORE.emit(R);
+ LLVM_DEBUG(dbgs() << R.getMsg() << "\n");
}
void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
@@ -1380,6 +1384,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
if (TM.Options.EnableFastISel) {
LLVM_DEBUG(dbgs() << "Enabling fast-isel\n");
FastIS = TLI->createFastISel(*FuncInfo, LibInfo);
+ if (FastIS)
+ FastIS->useInstrRefDebugInfo(UseInstrRefDebugInfo);
}
ReversePostOrderTraversal<const Function*> RPOT(&Fn);
@@ -1519,6 +1525,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
BeforeInst->hasOneUse() &&
FastIS->tryToFoldLoad(cast<LoadInst>(BeforeInst), Inst)) {
// If we succeeded, don't re-select the load.
+ LLVM_DEBUG(dbgs()
+ << "FastISel folded load: " << *BeforeInst << "\n");
BI = std::next(BasicBlock::const_iterator(BeforeInst));
--NumFastIselRemaining;
++NumFastIselSuccess;
@@ -3264,6 +3272,8 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
assert(RecNo < RecordedNodes.size() && "Invalid EmitMergeInputChains");
ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
+ // If the chained node is not the root, we can't fold it if it has
+ // multiple uses.
// FIXME: What if other value results of the node have uses not matched
// by this pattern?
if (ChainNodesMatched.back() != NodeToMatch &&
@@ -3301,6 +3311,8 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
assert(RecNo < RecordedNodes.size() && "Invalid EmitMergeInputChains");
ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
+ // If the chained node is not the root, we can't fold it if it has
+ // multiple uses.
// FIXME: What if other value results of the node have uses not matched
// by this pattern?
if (ChainNodesMatched.back() != NodeToMatch &&
@@ -3439,12 +3451,10 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
// such nodes must have a chain, it suffices to check ChainNodesMatched.
// We need to perform this check before potentially modifying one of the
// nodes via MorphNode.
- bool MayRaiseFPException = false;
- for (auto *N : ChainNodesMatched)
- if (mayRaiseFPException(N) && !N->getFlags().hasNoFPExcept()) {
- MayRaiseFPException = true;
- break;
- }
+ bool MayRaiseFPException =
+ llvm::any_of(ChainNodesMatched, [this](SDNode *N) {
+ return mayRaiseFPException(N) && !N->getFlags().hasNoFPExcept();
+ });
// Create the node.
MachineSDNode *Res = nullptr;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index d022e2a23ea0..b66eeb6d2bb1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -13,15 +13,11 @@
#include "ScheduleDAGSDNodes.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/IR/Constants.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
using namespace llvm;
#define DEBUG_TYPE "dag-printer"
@@ -181,11 +177,11 @@ LLVM_DUMP_METHOD void SelectionDAG::dumpDotGraph(const Twine &FileName,
/// clearGraphAttrs - Clear all previously defined node graph attributes.
/// Intended to be used from a debugging tool (eg. gdb).
void SelectionDAG::clearGraphAttrs() {
-#ifndef NDEBUG
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
NodeGraphAttrs.clear();
#else
- errs() << "SelectionDAG::clearGraphAttrs is only available in debug builds"
- << " on systems with Graphviz or gv!\n";
+ errs() << "SelectionDAG::clearGraphAttrs is only available in builds with "
+ << "ABI breaking checks enabled on systems with Graphviz or gv!\n";
#endif
}
@@ -193,11 +189,11 @@ void SelectionDAG::clearGraphAttrs() {
/// setGraphAttrs - Set graph attributes for a node. (eg. "color=red".)
///
void SelectionDAG::setGraphAttrs(const SDNode *N, const char *Attrs) {
-#ifndef NDEBUG
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
NodeGraphAttrs[N] = Attrs;
#else
- errs() << "SelectionDAG::setGraphAttrs is only available in debug builds"
- << " on systems with Graphviz or gv!\n";
+ errs() << "SelectionDAG::setGraphAttrs is only available in builds with "
+ << "ABI breaking checks enabled on systems with Graphviz or gv!\n";
#endif
}
@@ -205,7 +201,7 @@ void SelectionDAG::setGraphAttrs(const SDNode *N, const char *Attrs) {
/// getGraphAttrs - Get graph attributes for a node. (eg. "color=red".)
/// Used from getNodeAttributes.
std::string SelectionDAG::getGraphAttrs(const SDNode *N) const {
-#ifndef NDEBUG
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
std::map<const SDNode *, std::string>::const_iterator I =
NodeGraphAttrs.find(N);
@@ -214,8 +210,8 @@ std::string SelectionDAG::getGraphAttrs(const SDNode *N) const {
else
return "";
#else
- errs() << "SelectionDAG::getGraphAttrs is only available in debug builds"
- << " on systems with Graphviz or gv!\n";
+ errs() << "SelectionDAG::getGraphAttrs is only available in builds with "
+ << "ABI breaking checks enabled on systems with Graphviz or gv!\n";
return std::string();
#endif
}
@@ -223,11 +219,11 @@ std::string SelectionDAG::getGraphAttrs(const SDNode *N) const {
/// setGraphColor - Convenience for setting node color attribute.
///
void SelectionDAG::setGraphColor(const SDNode *N, const char *Color) {
-#ifndef NDEBUG
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
NodeGraphAttrs[N] = std::string("color=") + Color;
#else
- errs() << "SelectionDAG::setGraphColor is only available in debug builds"
- << " on systems with Graphviz or gv!\n";
+ errs() << "SelectionDAG::setGraphColor is only available in builds with "
+ << "ABI breaking checks enabled on systems with Graphviz or gv!\n";
#endif
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index dfda7d8b9f81..19a52fde44c1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -17,7 +17,10 @@
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
@@ -27,6 +30,7 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
@@ -168,7 +172,7 @@ static Optional<int> findPreviousSpillSlot(const Value *Val,
const auto &RelocationMap =
Builder.FuncInfo.StatepointRelocationMaps[Relocate->getStatepoint()];
- auto It = RelocationMap.find(Relocate->getDerivedPtr());
+ auto It = RelocationMap.find(Relocate);
if (It == RelocationMap.end())
return None;
@@ -192,10 +196,10 @@ static Optional<int> findPreviousSpillSlot(const Value *Val,
for (auto &IncomingValue : Phi->incoming_values()) {
Optional<int> SpillSlot =
findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth - 1);
- if (!SpillSlot.hasValue())
+ if (!SpillSlot)
return None;
- if (MergedResult.hasValue() && *MergedResult != *SpillSlot)
+ if (MergedResult && *MergedResult != *SpillSlot)
return None;
MergedResult = SpillSlot;
@@ -276,7 +280,7 @@ static void reservePreviousStackSlotForValue(const Value *IncomingValue,
const int LookUpDepth = 6;
Optional<int> Index =
findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth);
- if (!Index.hasValue())
+ if (!Index)
return;
const auto &StatepointSlots = Builder.FuncInfo.StatepointStackSlots;
@@ -526,14 +530,14 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
GCStrategy &S = GFI->getStrategy();
for (const Value *V : SI.Bases) {
auto Opt = S.isGCManagedPointer(V->getType()->getScalarType());
- if (Opt.hasValue()) {
+ if (Opt) {
assert(Opt.getValue() &&
"non gc managed base pointer found in statepoint");
}
}
for (const Value *V : SI.Ptrs) {
auto Opt = S.isGCManagedPointer(V->getType()->getScalarType());
- if (Opt.hasValue()) {
+ if (Opt) {
assert(Opt.getValue() &&
"non gc managed derived pointer found in statepoint");
}
@@ -880,8 +884,9 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
DAG.getMachineNode(TargetOpcode::STATEPOINT, getCurSDLoc(), NodeTys, Ops);
DAG.setNodeMemRefs(StatepointMCNode, MemRefs);
- // For values lowered to tied-defs, create the virtual registers. Note that
- // for simplicity, we *always* create a vreg even within a single block.
+ // For values lowered to tied-defs, create the virtual registers if used
+ // in other blocks. For local gc.relocate record appropriate statepoint
+ // result in StatepointLoweringState.
DenseMap<SDValue, Register> VirtRegs;
for (const auto *Relocate : SI.GCRelocates) {
Value *Derived = Relocate->getDerivedPtr();
@@ -889,12 +894,23 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
if (!LowerAsVReg.count(SD))
continue;
+ SDValue Relocated = SDValue(StatepointMCNode, LowerAsVReg[SD]);
+
+ // Handle local relocate. Note that different relocates might
+ // map to the same SDValue.
+ if (SI.StatepointInstr->getParent() == Relocate->getParent()) {
+ SDValue Res = StatepointLowering.getLocation(SD);
+ if (Res)
+ assert(Res == Relocated);
+ else
+ StatepointLowering.setLocation(SD, Relocated);
+ continue;
+ }
+
// Handle multiple gc.relocates of the same input efficiently.
if (VirtRegs.count(SD))
continue;
- SDValue Relocated = SDValue(StatepointMCNode, LowerAsVReg[SD]);
-
auto *RetTy = Relocate->getType();
Register Reg = FuncInfo.CreateRegs(RetTy);
RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
@@ -915,8 +931,13 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
SDValue SDV = getValue(V);
SDValue Loc = StatepointLowering.getLocation(SDV);
+ bool IsLocal = (Relocate->getParent() == StatepointInstr->getParent());
+
RecordType Record;
- if (LowerAsVReg.count(SDV)) {
+ if (IsLocal && LowerAsVReg.count(SDV)) {
+ // Result is already stored in StatepointLowering
+ Record.type = RecordType::SDValueNode;
+ } else if (LowerAsVReg.count(SDV)) {
Record.type = RecordType::VReg;
assert(VirtRegs.count(SDV));
Record.payload.Reg = VirtRegs[SDV];
@@ -932,7 +953,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
if (Relocate->getParent() != StatepointInstr->getParent())
ExportFromCurrentBlock(V);
}
- RelocationMap[V] = Record;
+ RelocationMap[Relocate] = Record;
}
@@ -1148,8 +1169,8 @@ void SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl(
unsigned DefaultID = StatepointDirectives::DeoptBundleStatepointID;
auto SD = parseStatepointDirectivesFromAttrs(Call->getAttributes());
- SI.ID = SD.StatepointID.getValueOr(DefaultID);
- SI.NumPatchBytes = SD.NumPatchBytes.getValueOr(0);
+ SI.ID = SD.StatepointID.value_or(DefaultID);
+ SI.NumPatchBytes = SD.NumPatchBytes.value_or(0);
SI.DeoptState =
ArrayRef<const Use>(DeoptBundle.Inputs.begin(), DeoptBundle.Inputs.end());
@@ -1210,11 +1231,19 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
const Value *DerivedPtr = Relocate.getDerivedPtr();
auto &RelocationMap =
FuncInfo.StatepointRelocationMaps[Relocate.getStatepoint()];
- auto SlotIt = RelocationMap.find(DerivedPtr);
+ auto SlotIt = RelocationMap.find(&Relocate);
assert(SlotIt != RelocationMap.end() && "Relocating not lowered gc value");
const RecordType &Record = SlotIt->second;
// If relocation was done via virtual register..
+ if (Record.type == RecordType::SDValueNode) {
+ assert(Relocate.getStatepoint()->getParent() == Relocate.getParent() &&
+ "Nonlocal gc.relocate mapped via SDValue");
+ SDValue SDV = StatepointLowering.getLocation(getValue(DerivedPtr));
+ assert(SDV.getNode() && "empty SDValue");
+ setValue(&Relocate, SDV);
+ return;
+ }
if (Record.type == RecordType::VReg) {
Register InReg = Record.payload.Reg;
RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index f6d1fa87676f..a6b471ea22b7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -13,13 +13,13 @@
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalVariable.h"
@@ -30,7 +30,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include <cctype>
using namespace llvm;
@@ -94,6 +93,8 @@ bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
// (We look for a CopyFromReg reading a virtual register that is used
// for the function live-in value of register Reg)
SDValue Value = OutVals[I];
+ if (Value->getOpcode() == ISD::AssertZext)
+ Value = Value.getOperand(0);
if (Value->getOpcode() != ISD::CopyFromReg)
return false;
Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
@@ -121,7 +122,7 @@ void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
Alignment = Call->getParamStackAlign(ArgIdx);
IndirectType = nullptr;
- assert(IsByVal + IsPreallocated + IsInAlloca <= 1 &&
+ assert(IsByVal + IsPreallocated + IsInAlloca + IsSRet <= 1 &&
"multiple ABI attributes?");
if (IsByVal) {
IndirectType = Call->getParamByValType(ArgIdx);
@@ -132,6 +133,8 @@ void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
IndirectType = Call->getParamPreallocatedType(ArgIdx);
if (IsInAlloca)
IndirectType = Call->getParamInAllocaType(ArgIdx);
+ if (IsSRet)
+ IndirectType = Call->getParamStructRetType(ArgIdx);
}
/// Generate a libcall taking the given operands as arguments and returning a
@@ -193,7 +196,8 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
bool TargetLowering::findOptimalMemOpLowering(
std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
unsigned SrcAS, const AttributeList &FuncAttributes) const {
- if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
+ if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
+ Op.getSrcAlign() < Op.getDstAlign())
return false;
EVT VT = getOptimalMemOpType(Op, FuncAttributes);
@@ -905,6 +909,132 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
Depth);
}
+// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
+// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
+static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ const APInt &DemandedBits,
+ const APInt &DemandedElts,
+ unsigned Depth) {
+ assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
+ "SRL or SRA node is required here!");
+ // Is the right shift using an immediate value of 1?
+ ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
+ if (!N1C || !N1C->isOne())
+ return SDValue();
+
+ // We are looking for an avgfloor
+ // add(ext, ext)
+ // or one of these as a avgceil
+ // add(add(ext, ext), 1)
+ // add(add(ext, 1), ext)
+ // add(ext, add(ext, 1))
+ SDValue Add = Op.getOperand(0);
+ if (Add.getOpcode() != ISD::ADD)
+ return SDValue();
+
+ SDValue ExtOpA = Add.getOperand(0);
+ SDValue ExtOpB = Add.getOperand(1);
+ auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3) {
+ ConstantSDNode *ConstOp;
+ if ((ConstOp = isConstOrConstSplat(Op1, DemandedElts)) &&
+ ConstOp->isOne()) {
+ ExtOpA = Op2;
+ ExtOpB = Op3;
+ return true;
+ }
+ if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
+ ConstOp->isOne()) {
+ ExtOpA = Op1;
+ ExtOpB = Op3;
+ return true;
+ }
+ if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
+ ConstOp->isOne()) {
+ ExtOpA = Op1;
+ ExtOpB = Op2;
+ return true;
+ }
+ return false;
+ };
+ bool IsCeil =
+ (ExtOpA.getOpcode() == ISD::ADD &&
+ MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB)) ||
+ (ExtOpB.getOpcode() == ISD::ADD &&
+ MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA));
+
+ // If the shift is signed (sra):
+ // - Needs >= 2 sign bit for both operands.
+ // - Needs >= 2 zero bits.
+ // If the shift is unsigned (srl):
+ // - Needs >= 1 zero bit for both operands.
+ // - Needs 1 demanded bit zero and >= 2 sign bits.
+ unsigned ShiftOpc = Op.getOpcode();
+ bool IsSigned = false;
+ unsigned KnownBits;
+ unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
+ unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
+ unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
+ unsigned NumZeroA =
+ DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
+ unsigned NumZeroB =
+ DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
+ unsigned NumZero = std::min(NumZeroA, NumZeroB);
+
+ switch (ShiftOpc) {
+ default:
+ llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
+ case ISD::SRA: {
+ if (NumZero >= 2 && NumSigned < NumZero) {
+ IsSigned = false;
+ KnownBits = NumZero;
+ break;
+ }
+ if (NumSigned >= 1) {
+ IsSigned = true;
+ KnownBits = NumSigned;
+ break;
+ }
+ return SDValue();
+ }
+ case ISD::SRL: {
+ if (NumZero >= 1 && NumSigned < NumZero) {
+ IsSigned = false;
+ KnownBits = NumZero;
+ break;
+ }
+ if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
+ IsSigned = true;
+ KnownBits = NumSigned;
+ break;
+ }
+ return SDValue();
+ }
+ }
+
+ unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
+ : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
+
+ // Find the smallest power-2 type that is legal for this vector size and
+ // operation, given the original type size and the number of known sign/zero
+ // bits.
+ EVT VT = Op.getValueType();
+ unsigned MinWidth =
+ std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(), PowerOf2Ceil(MinWidth));
+ if (VT.isVector())
+ NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
+ if (!TLI.isOperationLegalOrCustom(AVGOpc, NVT))
+ return SDValue();
+
+ SDLoc DL(Op);
+ SDValue ResultAVG =
+ DAG.getNode(AVGOpc, DL, NVT, DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpA),
+ DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpB));
+ return DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL, VT,
+ ResultAVG);
+}
+
/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
/// result of Op are ever used downstream. If we can use this information to
/// simplify Op, create a new simplified DAG node and return true, returning the
@@ -989,7 +1119,7 @@ bool TargetLowering::SimplifyDemandedBits(
KnownBits SrcKnown;
SDValue Src = Op.getOperand(0);
unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
- APInt SrcDemandedBits = DemandedBits.zextOrSelf(SrcBitWidth);
+ APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
return true;
@@ -1105,7 +1235,7 @@ bool TargetLowering::SimplifyDemandedBits(
break;
uint64_t Idx = Op.getConstantOperandVal(1);
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
- APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
+ APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
Depth + 1))
@@ -1409,6 +1539,19 @@ bool TargetLowering::SimplifyDemandedBits(
// Only known if known in both the LHS and RHS.
Known = KnownBits::commonBits(Known, Known2);
break;
+ case ISD::VSELECT:
+ if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
+ Known, TLO, Depth + 1))
+ return true;
+ if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
+ Known2, TLO, Depth + 1))
+ return true;
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
+
+ // Only known if known in both the LHS and RHS.
+ Known = KnownBits::commonBits(Known, Known2);
+ break;
case ISD::SELECT_CC:
if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
Depth + 1))
@@ -1542,6 +1685,16 @@ bool TargetLowering::SimplifyDemandedBits(
// low bits known zero.
Known.Zero.setLowBits(ShAmt);
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
+ if (DemandedOp0) {
+ SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+
// Try shrinking the operation as long as the shift amount will still be
// in range.
if ((ShAmt < DemandedBits.getActiveBits()) &&
@@ -1567,6 +1720,11 @@ bool TargetLowering::SimplifyDemandedBits(
SDValue Op1 = Op.getOperand(1);
EVT ShiftVT = Op1.getValueType();
+ // Try to match AVG patterns.
+ if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
+ DemandedElts, Depth + 1))
+ return TLO.CombineTo(Op, AVG);
+
if (const APInt *SA =
TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
unsigned ShAmt = SA->getZExtValue();
@@ -1633,6 +1791,11 @@ bool TargetLowering::SimplifyDemandedBits(
if (DemandedBits.isOne())
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
+ // Try to match AVG patterns.
+ if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
+ DemandedElts, Depth + 1))
+ return TLO.CombineTo(Op, AVG);
+
if (const APInt *SA =
TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
unsigned ShAmt = SA->getZExtValue();
@@ -1727,6 +1890,22 @@ bool TargetLowering::SimplifyDemandedBits(
Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
Known.One |= Known2.One;
Known.Zero |= Known2.Zero;
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
+ !DemandedElts.isAllOnes()) {
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
+ Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
+ if (DemandedOp0 || DemandedOp1) {
+ DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
+ DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
+ DemandedOp1, Op2);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
}
// For pow-2 bitwidths we only demand the bottom modulo amt bits.
@@ -1899,7 +2078,8 @@ bool TargetLowering::SimplifyDemandedBits(
// bit is demanded.
InputDemandedBits.setBit(ExVTBits - 1);
- if (SimplifyDemandedBits(Op0, InputDemandedBits, Known, TLO, Depth + 1))
+ if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
+ Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
@@ -1965,7 +2145,7 @@ bool TargetLowering::SimplifyDemandedBits(
}
APInt InDemandedBits = DemandedBits.trunc(InBits);
- APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
+ APInt InDemandedElts = DemandedElts.zext(InElts);
if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
Depth + 1))
return true;
@@ -2002,7 +2182,7 @@ bool TargetLowering::SimplifyDemandedBits(
}
APInt InDemandedBits = DemandedBits.trunc(InBits);
- APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
+ APInt InDemandedElts = DemandedElts.zext(InElts);
// Since some of the sign extended bits are demanded, we know that the sign
// bit is demanded.
@@ -2046,7 +2226,7 @@ bool TargetLowering::SimplifyDemandedBits(
return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
APInt InDemandedBits = DemandedBits.trunc(InBits);
- APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
+ APInt InDemandedElts = DemandedElts.zext(InElts);
if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
Depth + 1))
return true;
@@ -2265,9 +2445,27 @@ bool TargetLowering::SimplifyDemandedBits(
break;
}
case ISD::MUL:
- // 'Quadratic Reciprocity': mul(x,x) -> 0 if we're only demanding bit[1]
- if (DemandedBits == 2 && Op.getOperand(0) == Op.getOperand(1))
- return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
+ if (DemandedBits.isPowerOf2()) {
+ // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
+ // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
+ // odd (has LSB set), then the left-shifted low bit of X is the answer.
+ unsigned CTZ = DemandedBits.countTrailingZeros();
+ ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
+ if (C && C->getAPIntValue().countTrailingZeros() == CTZ) {
+ EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout());
+ SDValue AmtC = TLO.DAG.getConstant(CTZ, dl, ShiftAmtTy);
+ SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
+ return TLO.CombineTo(Op, Shl);
+ }
+ }
+ // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
+ // X * X is odd iff X is odd.
+ // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
+ if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
+ SDValue One = TLO.DAG.getConstant(1, dl, VT);
+ SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
+ return TLO.CombineTo(Op, And1);
+ }
LLVM_FALLTHROUGH;
case ISD::ADD:
case ISD::SUB: {
@@ -2330,6 +2528,49 @@ bool TargetLowering::SimplifyDemandedBits(
return TLO.CombineTo(Op, NewOp);
}
+ // Match a multiply with a disguised negated-power-of-2 and convert to a
+ // an equivalent shift-left amount.
+ // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
+ auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
+ if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
+ return 0;
+
+ // Don't touch opaque constants. Also, ignore zero and power-of-2
+ // multiplies. Those will get folded later.
+ ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
+ if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
+ !MulC->getAPIntValue().isPowerOf2()) {
+ APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
+ if (UnmaskedC.isNegatedPowerOf2())
+ return (-UnmaskedC).logBase2();
+ }
+ return 0;
+ };
+
+ auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y, unsigned ShlAmt) {
+ EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout());
+ SDValue ShlAmtC = TLO.DAG.getConstant(ShlAmt, dl, ShiftAmtTy);
+ SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
+ SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
+ return TLO.CombineTo(Op, Res);
+ };
+
+ if (isOperationLegalOrCustom(ISD::SHL, VT)) {
+ if (Op.getOpcode() == ISD::ADD) {
+ // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
+ if (unsigned ShAmt = getShiftLeftAmt(Op0))
+ return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
+ // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
+ if (unsigned ShAmt = getShiftLeftAmt(Op1))
+ return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
+ }
+ if (Op.getOpcode() == ISD::SUB) {
+ // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
+ if (unsigned ShAmt = getShiftLeftAmt(Op1))
+ return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
+ }
+ }
+
LLVM_FALLTHROUGH;
}
default:
@@ -2347,7 +2588,8 @@ bool TargetLowering::SimplifyDemandedBits(
// If we know the value of all of the demanded bits, return this as a
// constant.
- if (DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
+ if (!isTargetCanonicalConstantNode(Op) &&
+ DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
// Avoid folding to a constant if any OpaqueConstant is involved.
const SDNode *N = Op.getNode();
for (SDNode *Op :
@@ -2370,13 +2612,12 @@ bool TargetLowering::SimplifyDemandedBits(
bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
const APInt &DemandedElts,
- APInt &KnownUndef,
- APInt &KnownZero,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
!DCI.isBeforeLegalizeOps());
+ APInt KnownUndef, KnownZero;
bool Simplified =
SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
if (Simplified) {
@@ -2447,6 +2688,10 @@ bool TargetLowering::SimplifyDemandedVectorElts(
KnownUndef = KnownZero = APInt::getZero(NumElts);
+ const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
+ if (!TLI.shouldSimplifyDemandedVectorElts(Op, TLO))
+ return false;
+
// TODO: For now we assume we know nothing about scalable vectors.
if (VT.isScalableVector())
return false;
@@ -2565,6 +2810,21 @@ bool TargetLowering::SimplifyDemandedVectorElts(
if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
TLO, Depth + 1))
return true;
+
+ // The bitcast has split each wide element into a number of
+ // narrow subelements. We have just computed the Known bits
+ // for wide elements. See if element splitting results in
+ // some subelements being zero. Only for demanded elements!
+ for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
+ if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
+ .isAllOnes())
+ continue;
+ for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
+ unsigned Elt = Scale * SrcElt + SubElt;
+ if (DemandedElts[Elt])
+ KnownZero.setBit(Elt);
+ }
+ }
}
// If the src element is zero/undef then all the output elements will be -
@@ -2646,6 +2906,25 @@ bool TargetLowering::SimplifyDemandedVectorElts(
KnownUndef.insertBits(SubUndef, i * NumSubElts);
KnownZero.insertBits(SubZero, i * NumSubElts);
}
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!DemandedElts.isAllOnes()) {
+ bool FoundNewSub = false;
+ SmallVector<SDValue, 2> DemandedSubOps;
+ for (unsigned i = 0; i != NumSubVecs; ++i) {
+ SDValue SubOp = Op.getOperand(i);
+ APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
+ SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
+ SubOp, SubElts, TLO.DAG, Depth + 1);
+ DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
+ FoundNewSub = NewSubOp ? true : FoundNewSub;
+ }
+ if (FoundNewSub) {
+ SDValue NewOp =
+ TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
break;
}
case ISD::INSERT_SUBVECTOR: {
@@ -2699,7 +2978,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
break;
uint64_t Idx = Op.getConstantOperandVal(1);
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
- APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
+ APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
APInt SrcUndef, SrcZero;
if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
@@ -2858,7 +3137,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
APInt SrcUndef, SrcZero;
SDValue Src = Op.getOperand(0);
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
- APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts);
+ APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
Depth + 1))
return true;
@@ -3618,6 +3897,115 @@ static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
return SDValue();
}
+static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1,
+ ISD::CondCode Cond, const SDLoc &dl,
+ SelectionDAG &DAG) {
+ if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
+ return SDValue();
+
+ auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
+ if (!C1 || !(C1->isZero() || C1->isAllOnes()))
+ return SDValue();
+
+ auto getRotateSource = [](SDValue X) {
+ if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
+ return X.getOperand(0);
+ return SDValue();
+ };
+
+ // Peek through a rotated value compared against 0 or -1:
+ // (rot X, Y) == 0/-1 --> X == 0/-1
+ // (rot X, Y) != 0/-1 --> X != 0/-1
+ if (SDValue R = getRotateSource(N0))
+ return DAG.getSetCC(dl, VT, R, N1, Cond);
+
+ // Peek through an 'or' of a rotated value compared against 0:
+ // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
+ // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
+ //
+ // TODO: Add the 'and' with -1 sibling.
+ // TODO: Recurse through a series of 'or' ops to find the rotate.
+ EVT OpVT = N0.getValueType();
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
+ if (SDValue R = getRotateSource(N0.getOperand(0))) {
+ SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
+ return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
+ }
+ if (SDValue R = getRotateSource(N0.getOperand(1))) {
+ SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
+ return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
+ }
+ }
+
+ return SDValue();
+}
+
+static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1,
+ ISD::CondCode Cond, const SDLoc &dl,
+ SelectionDAG &DAG) {
+ // If we are testing for all-bits-clear, we might be able to do that with
+ // less shifting since bit-order does not matter.
+ if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
+ return SDValue();
+
+ auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
+ if (!C1 || !C1->isZero())
+ return SDValue();
+
+ if (!N0.hasOneUse() ||
+ (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
+ return SDValue();
+
+ unsigned BitWidth = N0.getScalarValueSizeInBits();
+ auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
+ if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
+ return SDValue();
+
+ // Canonicalize fshr as fshl to reduce pattern-matching.
+ unsigned ShAmt = ShAmtC->getZExtValue();
+ if (N0.getOpcode() == ISD::FSHR)
+ ShAmt = BitWidth - ShAmt;
+
+ // Match an 'or' with a specific operand 'Other' in either commuted variant.
+ SDValue X, Y;
+ auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
+ if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
+ return false;
+ if (Or.getOperand(0) == Other) {
+ X = Or.getOperand(0);
+ Y = Or.getOperand(1);
+ return true;
+ }
+ if (Or.getOperand(1) == Other) {
+ X = Or.getOperand(1);
+ Y = Or.getOperand(0);
+ return true;
+ }
+ return false;
+ };
+
+ EVT OpVT = N0.getValueType();
+ EVT ShAmtVT = N0.getOperand(2).getValueType();
+ SDValue F0 = N0.getOperand(0);
+ SDValue F1 = N0.getOperand(1);
+ if (matchOr(F0, F1)) {
+ // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
+ SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
+ SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
+ SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
+ return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
+ }
+ if (matchOr(F1, F0)) {
+ // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
+ SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
+ SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
+ SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
+ return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
+ }
+
+ return SDValue();
+}
+
/// Try to simplify a setcc built with the specified operands and cc. If it is
/// unable to simplify it, return a null SDValue.
SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
@@ -3632,13 +4020,17 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
return Fold;
+ bool N0ConstOrSplat =
+ isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
+ bool N1ConstOrSplat =
+ isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
+
// Ensure that the constant occurs on the RHS and fold constant comparisons.
// TODO: Handle non-splat vector constants. All undef causes trouble.
// FIXME: We can't yet fold constant scalable vector splats, so avoid an
// infinite loop here when we encounter one.
ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
- if (isConstOrConstSplat(N0) &&
- (!OpVT.isScalableVector() || !isConstOrConstSplat(N1)) &&
+ if (N0ConstOrSplat && (!OpVT.isScalableVector() || !N1ConstOrSplat) &&
(DCI.isBeforeLegalizeOps() ||
isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
@@ -3647,13 +4039,19 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// -- but in reverse order -- then try to commute the operands of this setcc
// to match. A matching pair of setcc (cmp) and sub may be combined into 1
// instruction on some targets.
- if (!isConstOrConstSplat(N0) && !isConstOrConstSplat(N1) &&
+ if (!N0ConstOrSplat && !N1ConstOrSplat &&
(DCI.isBeforeLegalizeOps() ||
isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
!DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
+ if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
+ return V;
+
+ if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
+ return V;
+
if (auto *N1C = isConstOrConstSplat(N1)) {
const APInt &C1 = N1C->getAPIntValue();
@@ -4399,37 +4797,30 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
// Turn (X+C1) == C2 --> X == C2-C1
- if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) {
- return DAG.getSetCC(dl, VT, N0.getOperand(0),
- DAG.getConstant(RHSC->getAPIntValue()-
- LHSR->getAPIntValue(),
- dl, N0.getValueType()), Cond);
- }
-
- // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
- if (N0.getOpcode() == ISD::XOR)
- // If we know that all of the inverted bits are zero, don't bother
- // performing the inversion.
- if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue()))
- return
- DAG.getSetCC(dl, VT, N0.getOperand(0),
- DAG.getConstant(LHSR->getAPIntValue() ^
- RHSC->getAPIntValue(),
- dl, N0.getValueType()),
- Cond);
+ if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
+ return DAG.getSetCC(
+ dl, VT, N0.getOperand(0),
+ DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
+ dl, N0.getValueType()),
+ Cond);
+
+ // Turn (X^C1) == C2 --> X == C1^C2
+ if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
+ return DAG.getSetCC(
+ dl, VT, N0.getOperand(0),
+ DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
+ dl, N0.getValueType()),
+ Cond);
}
// Turn (C1-X) == C2 --> X == C1-C2
- if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
- if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
- return
- DAG.getSetCC(dl, VT, N0.getOperand(1),
- DAG.getConstant(SUBC->getAPIntValue() -
- RHSC->getAPIntValue(),
- dl, N0.getValueType()),
- Cond);
- }
- }
+ if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
+ if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
+ return DAG.getSetCC(
+ dl, VT, N0.getOperand(1),
+ DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
+ dl, N0.getValueType()),
+ Cond);
// Could RHSC fold directly into a compare?
if (RHSC->getValueType(0).getSizeInBits() <= 64)
@@ -4582,13 +4973,14 @@ TargetLowering::getConstraintType(StringRef Constraint) const {
case 'o': // offsetable
case 'V': // not offsetable
return C_Memory;
+ case 'p': // Address.
+ return C_Address;
case 'n': // Simple Integer
case 'E': // Floating Point Constant
case 'F': // Floating Point Constant
return C_Immediate;
case 'i': // Simple Integer or Relocatable Constant
case 's': // Relocatable Constant
- case 'p': // Address.
case 'X': // Allow ANY value.
case 'I': // Target registers.
case 'J':
@@ -4826,8 +5218,8 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
if (OpInfo.CallOperandVal) {
llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
if (OpInfo.isIndirect) {
- OpTy = Call.getAttributes().getParamElementType(ArgNo);
- assert(OpTy && "Indirect opernad must have elementtype attribute");
+ OpTy = Call.getParamElementType(ArgNo);
+ assert(OpTy && "Indirect operand must have elementtype attribute");
}
// Look for vector wrapped in a struct. e.g. { <16 x i8> }.
@@ -4962,6 +5354,7 @@ static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
case TargetLowering::C_RegisterClass:
return 2;
case TargetLowering::C_Memory:
+ case TargetLowering::C_Address:
return 3;
}
llvm_unreachable("Invalid constraint type");
@@ -5232,6 +5625,17 @@ SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
return SDValue();
}
+SDValue
+TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDNode *> &Created) const {
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.isIntDivCheap(N->getValueType(0), Attr))
+ return SDValue(N, 0); // Lower SREM as SREM
+ return SDValue();
+}
+
/// Given an ISD::SDIV node expressing a divide by constant,
/// return a DAG expression to select that will generate the same value by
/// multiplying by a magic number.
@@ -7016,6 +7420,30 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
return true;
}
+SDValue
+TargetLowering::createSelectForFMINNUM_FMAXNUM(SDNode *Node,
+ SelectionDAG &DAG) const {
+ unsigned Opcode = Node->getOpcode();
+ assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
+ Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
+ "Wrong opcode");
+
+ if (Node->getFlags().hasNoNaNs()) {
+ ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
+ SDValue Op1 = Node->getOperand(0);
+ SDValue Op2 = Node->getOperand(1);
+ SDValue SelCC = DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred);
+ // Copy FMF flags, but always set the no-signed-zeros flag
+ // as this is implied by the FMINNUM/FMAXNUM semantics.
+ SDNodeFlags Flags = Node->getFlags();
+ Flags.setNoSignedZeros(true);
+ SelCC->setFlags(Flags);
+ return SelCC;
+ }
+
+ return SDValue();
+}
+
SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
SelectionDAG &DAG) const {
SDLoc dl(Node);
@@ -7058,29 +7486,234 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
}
}
- // If none of the above worked, but there are no NaNs, then expand to
- // a compare/select sequence. This is required for correctness since
- // InstCombine might have canonicalized a fcmp+select sequence to a
- // FMINNUM/FMAXNUM node. If we were to fall through to the default
- // expansion to libcall, we might introduce a link-time dependency
- // on libm into a file that originally did not have one.
- if (Node->getFlags().hasNoNaNs()) {
- ISD::CondCode Pred =
- Node->getOpcode() == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
- SDValue Op1 = Node->getOperand(0);
- SDValue Op2 = Node->getOperand(1);
- SDValue SelCC = DAG.getSelectCC(dl, Op1, Op2, Op1, Op2, Pred);
- // Copy FMF flags, but always set the no-signed-zeros flag
- // as this is implied by the FMINNUM/FMAXNUM semantics.
- SDNodeFlags Flags = Node->getFlags();
- Flags.setNoSignedZeros(true);
- SelCC->setFlags(Flags);
+ if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))
return SelCC;
- }
return SDValue();
}
+SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
+ unsigned Test, SDNodeFlags Flags,
+ const SDLoc &DL,
+ SelectionDAG &DAG) const {
+ EVT OperandVT = Op.getValueType();
+ assert(OperandVT.isFloatingPoint());
+
+ // Degenerated cases.
+ if (Test == 0)
+ return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
+ if ((Test & fcAllFlags) == fcAllFlags)
+ return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
+
+ // PPC double double is a pair of doubles, of which the higher part determines
+ // the value class.
+ if (OperandVT == MVT::ppcf128) {
+ Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
+ DAG.getConstant(1, DL, MVT::i32));
+ OperandVT = MVT::f64;
+ }
+
+ // Some checks may be represented as inversion of simpler check, for example
+ // "inf|normal|subnormal|zero" => !"nan".
+ bool IsInverted = false;
+ if (unsigned InvertedCheck = getInvertedFPClassTest(Test)) {
+ IsInverted = true;
+ Test = InvertedCheck;
+ }
+
+ // Floating-point type properties.
+ EVT ScalarFloatVT = OperandVT.getScalarType();
+ const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
+ const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
+ bool IsF80 = (ScalarFloatVT == MVT::f80);
+
+ // Some checks can be implemented using float comparisons, if floating point
+ // exceptions are ignored.
+ if (Flags.hasNoFPExcept() &&
+ isOperationLegalOrCustom(ISD::SETCC, OperandVT.getScalarType())) {
+ if (Test == fcZero)
+ return DAG.getSetCC(DL, ResultVT, Op,
+ DAG.getConstantFP(0.0, DL, OperandVT),
+ IsInverted ? ISD::SETUNE : ISD::SETOEQ);
+ if (Test == fcNan)
+ return DAG.getSetCC(DL, ResultVT, Op, Op,
+ IsInverted ? ISD::SETO : ISD::SETUO);
+ }
+
+ // In the general case use integer operations.
+ unsigned BitSize = OperandVT.getScalarSizeInBits();
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize);
+ if (OperandVT.isVector())
+ IntVT = EVT::getVectorVT(*DAG.getContext(), IntVT,
+ OperandVT.getVectorElementCount());
+ SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
+
+ // Various masks.
+ APInt SignBit = APInt::getSignMask(BitSize);
+ APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
+ APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
+ const unsigned ExplicitIntBitInF80 = 63;
+ APInt ExpMask = Inf;
+ if (IsF80)
+ ExpMask.clearBit(ExplicitIntBitInF80);
+ APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
+ APInt QNaNBitMask =
+ APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
+ APInt InvertionMask = APInt::getAllOnesValue(ResultVT.getScalarSizeInBits());
+
+ SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
+ SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
+ SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT);
+ SDValue ZeroV = DAG.getConstant(0, DL, IntVT);
+ SDValue InfV = DAG.getConstant(Inf, DL, IntVT);
+ SDValue ResultInvertionMask = DAG.getConstant(InvertionMask, DL, ResultVT);
+
+ SDValue Res;
+ const auto appendResult = [&](SDValue PartialRes) {
+ if (PartialRes) {
+ if (Res)
+ Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes);
+ else
+ Res = PartialRes;
+ }
+ };
+
+ SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
+ const auto getIntBitIsSet = [&]() -> SDValue {
+ if (!IntBitIsSetV) {
+ APInt IntBitMask(BitSize, 0);
+ IntBitMask.setBit(ExplicitIntBitInF80);
+ SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT);
+ SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV);
+ IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE);
+ }
+ return IntBitIsSetV;
+ };
+
+ // Split the value into sign bit and absolute value.
+ SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV);
+ SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
+ DAG.getConstant(0.0, DL, IntVT), ISD::SETLT);
+
+ // Tests that involve more than one class should be processed first.
+ SDValue PartialRes;
+
+ if (IsF80)
+ ; // Detect finite numbers of f80 by checking individual classes because
+ // they have different settings of the explicit integer bit.
+ else if ((Test & fcFinite) == fcFinite) {
+ // finite(V) ==> abs(V) < exp_mask
+ PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
+ Test &= ~fcFinite;
+ } else if ((Test & fcFinite) == fcPosFinite) {
+ // finite(V) && V > 0 ==> V < exp_mask
+ PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT);
+ Test &= ~fcPosFinite;
+ } else if ((Test & fcFinite) == fcNegFinite) {
+ // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
+ PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
+ PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
+ Test &= ~fcNegFinite;
+ }
+ appendResult(PartialRes);
+
+ // Check for individual classes.
+
+ if (unsigned PartialCheck = Test & fcZero) {
+ if (PartialCheck == fcPosZero)
+ PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ);
+ else if (PartialCheck == fcZero)
+ PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ);
+ else // ISD::fcNegZero
+ PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ);
+ appendResult(PartialRes);
+ }
+
+ if (unsigned PartialCheck = Test & fcInf) {
+ if (PartialCheck == fcPosInf)
+ PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
+ else if (PartialCheck == fcInf)
+ PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ);
+ else { // ISD::fcNegInf
+ APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
+ SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT);
+ PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ);
+ }
+ appendResult(PartialRes);
+ }
+
+ if (unsigned PartialCheck = Test & fcNan) {
+ APInt InfWithQnanBit = Inf | QNaNBitMask;
+ SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT);
+ if (PartialCheck == fcNan) {
+ // isnan(V) ==> abs(V) > int(inf)
+ PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
+ if (IsF80) {
+ // Recognize unsupported values as NaNs for compatibility with glibc.
+ // In them (exp(V)==0) == int_bit.
+ SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV);
+ SDValue ExpIsZero =
+ DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
+ SDValue IsPseudo =
+ DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ);
+ PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo);
+ }
+ } else if (PartialCheck == fcQNan) {
+ // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
+ PartialRes =
+ DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE);
+ } else { // ISD::fcSNan
+ // issignaling(V) ==> abs(V) > unsigned(Inf) &&
+ // abs(V) < (unsigned(Inf) | quiet_bit)
+ SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
+ SDValue IsNotQnan =
+ DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT);
+ PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan);
+ }
+ appendResult(PartialRes);
+ }
+
+ if (unsigned PartialCheck = Test & fcSubnormal) {
+ // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
+ // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
+ SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
+ SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
+ SDValue VMinusOneV =
+ DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
+ PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
+ if (PartialCheck == fcNegSubnormal)
+ PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
+ appendResult(PartialRes);
+ }
+
+ if (unsigned PartialCheck = Test & fcNormal) {
+ // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
+ APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
+ SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT);
+ SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV);
+ APInt ExpLimit = ExpMask - ExpLSB;
+ SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT);
+ PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT);
+ if (PartialCheck == fcNegNormal)
+ PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
+ else if (PartialCheck == fcPosNormal) {
+ SDValue PosSignV =
+ DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInvertionMask);
+ PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV);
+ }
+ if (IsF80)
+ PartialRes =
+ DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet());
+ appendResult(PartialRes);
+ }
+
+ if (!Res)
+ return DAG.getConstant(IsInverted, DL, ResultVT);
+ if (IsInverted)
+ Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInvertionMask);
+ return Res;
+}
+
// Only expand vector types if we have the appropriate vector bit operations.
static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
assert(VT.isVector() && "Expected vector type");
@@ -7116,8 +7749,6 @@ SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
SDValue Mask0F =
DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
- SDValue Mask01 =
- DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
// v = v - ((v >> 1) & 0x55555555...)
Op = DAG.getNode(ISD::SUB, dl, VT, Op,
@@ -7137,13 +7768,28 @@ SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
DAG.getNode(ISD::SRL, dl, VT, Op,
DAG.getConstant(4, dl, ShVT))),
Mask0F);
- // v = (v * 0x01010101...) >> (Len - 8)
- if (Len > 8)
- Op =
- DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
- DAG.getConstant(Len - 8, dl, ShVT));
- return Op;
+ if (Len <= 8)
+ return Op;
+
+ // Avoid the multiply if we only have 2 bytes to add.
+ // TODO: Only doing this for scalars because vectors weren't as obviously
+ // improved.
+ if (Len == 16 && !VT.isVector()) {
+ // v = (v + (v >> 8)) & 0x00FF;
+ return DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::ADD, dl, VT, Op,
+ DAG.getNode(ISD::SRL, dl, VT, Op,
+ DAG.getConstant(8, dl, ShVT))),
+ DAG.getConstant(0xFF, dl, VT));
+ }
+
+ // v = (v * 0x01010101...) >> (Len - 8)
+ SDValue Mask01 =
+ DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
+ return DAG.getNode(ISD::SRL, dl, VT,
+ DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
+ DAG.getConstant(Len - 8, dl, ShVT));
}
SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const {
@@ -7265,6 +7911,7 @@ SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
isOperationLegal(ISD::UMIN, VT)) {
SDValue Zero = DAG.getConstant(0, dl, VT);
+ Op = DAG.getFreeze(Op);
return DAG.getNode(ISD::UMIN, dl, VT, Op,
DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
}
@@ -7272,6 +7919,7 @@ SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
// 0 - abs(x) -> smin(x, sub(0,x))
if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
isOperationLegal(ISD::SMIN, VT)) {
+ Op = DAG.getFreeze(Op);
SDValue Zero = DAG.getConstant(0, dl, VT);
return DAG.getNode(ISD::SMIN, dl, VT, Op,
DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
@@ -7285,16 +7933,17 @@ SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
!isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
return SDValue();
+ Op = DAG.getFreeze(Op);
SDValue Shift =
DAG.getNode(ISD::SRA, dl, VT, Op,
DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT));
- if (!IsNegative) {
- SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift);
- return DAG.getNode(ISD::XOR, dl, VT, Add, Shift);
- }
+ SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
+
+ // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
+ if (!IsNegative)
+ return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
// 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
- SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
}
@@ -8041,23 +8690,6 @@ SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
return SDValue();
}
-// Convert redundant addressing modes (e.g. scaling is redundant
-// when accessing bytes).
-ISD::MemIndexType
-TargetLowering::getCanonicalIndexType(ISD::MemIndexType IndexType, EVT MemVT,
- SDValue Offsets) const {
- bool IsScaledIndex =
- (IndexType == ISD::SIGNED_SCALED) || (IndexType == ISD::UNSIGNED_SCALED);
- bool IsSignedIndex =
- (IndexType == ISD::SIGNED_SCALED) || (IndexType == ISD::SIGNED_UNSCALED);
-
- // Scaling is unimportant for bytes, canonicalize to unscaled.
- if (IsScaledIndex && MemVT.getScalarType() == MVT::i8)
- return IsSignedIndex ? ISD::SIGNED_UNSCALED : ISD::UNSIGNED_UNSCALED;
-
- return IndexType;
-}
-
SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const {
SDValue Op0 = Node->getOperand(0);
SDValue Op1 = Node->getOperand(1);
@@ -8473,8 +9105,20 @@ void TargetLowering::expandUADDSUBO(
EVT ResultType = Node->getValueType(1);
EVT SetCCType = getSetCCResultType(
DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
- ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
- SDValue SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
+ SDValue SetCC;
+ if (IsAdd && isOneConstant(RHS)) {
+ // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
+ // the live range of X. We assume comparing with 0 is cheap.
+ // The general case (X + C) < C is not necessarily beneficial. Although we
+ // reduce the live range of X, we may introduce the materialization of
+ // constant C.
+ SetCC =
+ DAG.getSetCC(dl, SetCCType, Result,
+ DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
+ } else {
+ ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
+ SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
+ }
Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
}
@@ -8773,11 +9417,11 @@ SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
// floating-point values.
APInt MinInt, MaxInt;
if (IsSigned) {
- MinInt = APInt::getSignedMinValue(SatWidth).sextOrSelf(DstWidth);
- MaxInt = APInt::getSignedMaxValue(SatWidth).sextOrSelf(DstWidth);
+ MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
+ MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
} else {
- MinInt = APInt::getMinValue(SatWidth).zextOrSelf(DstWidth);
- MaxInt = APInt::getMaxValue(SatWidth).zextOrSelf(DstWidth);
+ MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
+ MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
}
// We cannot risk emitting FP_TO_XINT nodes with a source VT of f16, as
@@ -8931,13 +9575,16 @@ SDValue TargetLowering::expandVectorSplice(SDNode *Node,
bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
SDValue &LHS, SDValue &RHS,
- SDValue &CC, bool &NeedInvert,
+ SDValue &CC, SDValue Mask,
+ SDValue EVL, bool &NeedInvert,
const SDLoc &dl, SDValue &Chain,
bool IsSignaling) const {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
MVT OpVT = LHS.getSimpleValueType();
ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
NeedInvert = false;
+ assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
+ bool IsNonVP = !EVL;
switch (TLI.getCondCodeAction(CCCode, OpVT)) {
default:
llvm_unreachable("Unknown condition code action!");
@@ -9044,17 +9691,34 @@ bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
// If we aren't the ordered or unorder operation,
// then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
- SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
- SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
+ if (IsNonVP) {
+ SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
+ SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
+ } else {
+ SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL);
+ SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL);
+ }
} else {
// Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
- SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
- SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
+ if (IsNonVP) {
+ SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
+ SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
+ } else {
+ SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL);
+ SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL);
+ }
}
if (Chain)
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
SetCC2.getValue(1));
- LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
+ if (IsNonVP)
+ LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
+ else {
+ // Transform the binary opcode to the VP equivalent.
+ assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
+ Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
+ LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
+ }
RHS = SDValue();
CC = SDValue();
return true;
diff --git a/llvm/lib/CodeGen/ShadowStackGCLowering.cpp b/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
index 43a54ce33bf0..5f9ade18f15c 100644
--- a/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
+++ b/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
@@ -39,7 +39,6 @@
#include "llvm/Support/Casting.h"
#include "llvm/Transforms/Utils/EscapeEnumerator.h"
#include <cassert>
-#include <cstddef>
#include <string>
#include <utility>
#include <vector>
@@ -362,7 +361,7 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) {
// For each instruction that escapes...
EscapeEnumerator EE(F, "gc_cleanup", /*HandleExceptions=*/true,
- DTU.hasValue() ? DTU.getPointer() : nullptr);
+ DTU ? DTU.getPointer() : nullptr);
while (IRBuilder<> *AtExit = EE.Next()) {
// Pop the entry from the shadow stack. Don't reuse CurrentHead from
// AtEntry, since that would make the value live for the entire function.
diff --git a/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/llvm/lib/CodeGen/SjLjEHPrepare.cpp
index 8211e3d6a9dd..1fcee02184a9 100644
--- a/llvm/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/llvm/lib/CodeGen/SjLjEHPrepare.cpp
@@ -413,7 +413,7 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
Val = Builder.CreateCall(StackAddrFn, {}, "sp");
Builder.CreateStore(Val, StackPtr, /*isVolatile=*/true);
- // Call the setup_dispatch instrinsic. It fills in the rest of the jmpbuf.
+ // Call the setup_dispatch intrinsic. It fills in the rest of the jmpbuf.
Builder.CreateCall(BuiltinSetupDispatchFn, {});
// Store a pointer to the function context so that the back-end will know
diff --git a/llvm/lib/CodeGen/SplitKit.cpp b/llvm/lib/CodeGen/SplitKit.cpp
index 7f9518e4c075..140a91ae342b 100644
--- a/llvm/lib/CodeGen/SplitKit.cpp
+++ b/llvm/lib/CodeGen/SplitKit.cpp
@@ -389,17 +389,34 @@ LLVM_DUMP_METHOD void SplitEditor::dump() const {
}
#endif
-LiveInterval::SubRange &SplitEditor::getSubRangeForMaskExact(LaneBitmask LM,
- LiveInterval &LI) {
- for (LiveInterval::SubRange &S : LI.subranges())
+/// Find a subrange corresponding to the exact lane mask @p LM in the live
+/// interval @p LI. The interval @p LI is assumed to contain such a subrange.
+/// This function is used to find corresponding subranges between the
+/// original interval and the new intervals.
+template <typename T> auto &getSubrangeImpl(LaneBitmask LM, T &LI) {
+ for (auto &S : LI.subranges())
if (S.LaneMask == LM)
return S;
llvm_unreachable("SubRange for this mask not found");
}
-LiveInterval::SubRange &SplitEditor::getSubRangeForMask(LaneBitmask LM,
- LiveInterval &LI) {
- for (LiveInterval::SubRange &S : LI.subranges())
+LiveInterval::SubRange &getSubRangeForMaskExact(LaneBitmask LM,
+ LiveInterval &LI) {
+ return getSubrangeImpl(LM, LI);
+}
+
+const LiveInterval::SubRange &getSubRangeForMaskExact(LaneBitmask LM,
+ const LiveInterval &LI) {
+ return getSubrangeImpl(LM, LI);
+}
+
+/// Find a subrange corresponding to the lane mask @p LM, or a superset of it,
+/// in the live interval @p LI. The interval @p LI is assumed to contain such
+/// a subrange. This function is used to find corresponding subranges between
+/// the original interval and the new intervals.
+const LiveInterval::SubRange &getSubRangeForMask(LaneBitmask LM,
+ const LiveInterval &LI) {
+ for (const LiveInterval::SubRange &S : LI.subranges())
if ((S.LaneMask & LM) == LM)
return S;
llvm_unreachable("SubRange for this mask not found");
@@ -566,10 +583,8 @@ SlotIndex SplitEditor::buildCopy(Register FromReg, Register ToReg,
return Def;
}
-VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
- VNInfo *ParentVNI,
- SlotIndex UseIdx,
- MachineBasicBlock &MBB,
+VNInfo *SplitEditor::defFromParent(unsigned RegIdx, const VNInfo *ParentVNI,
+ SlotIndex UseIdx, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) {
SlotIndex Def;
LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx));
@@ -937,7 +952,7 @@ SplitEditor::findShallowDominator(MachineBasicBlock *MBB,
void SplitEditor::computeRedundantBackCopies(
DenseSet<unsigned> &NotToHoistSet, SmallVectorImpl<VNInfo *> &BackCopies) {
LiveInterval *LI = &LIS.getInterval(Edit->get(0));
- LiveInterval *Parent = &Edit->getParent();
+ const LiveInterval *Parent = &Edit->getParent();
SmallVector<SmallPtrSet<VNInfo *, 8>, 8> EqualVNs(Parent->getNumValNums());
SmallPtrSet<VNInfo *, 8> DominatedVNIs;
@@ -952,7 +967,7 @@ void SplitEditor::computeRedundantBackCopies(
// For VNI aggregation of each ParentVNI, collect dominated, i.e.,
// redundant VNIs to BackCopies.
for (unsigned i = 0, e = Parent->getNumValNums(); i != e; ++i) {
- VNInfo *ParentVNI = Parent->getValNumInfo(i);
+ const VNInfo *ParentVNI = Parent->getValNumInfo(i);
if (!NotToHoistSet.count(ParentVNI->id))
continue;
SmallPtrSetIterator<VNInfo *> It1 = EqualVNs[ParentVNI->id].begin();
@@ -990,7 +1005,7 @@ void SplitEditor::computeRedundantBackCopies(
void SplitEditor::hoistCopies() {
// Get the complement interval, always RegIdx 0.
LiveInterval *LI = &LIS.getInterval(Edit->get(0));
- LiveInterval *Parent = &Edit->getParent();
+ const LiveInterval *Parent = &Edit->getParent();
// Track the nearest common dominator for all back-copies for each ParentVNI,
// indexed by ParentVNI->id.
@@ -1067,7 +1082,7 @@ void SplitEditor::hoistCopies() {
if (!Dom.first || Dom.second.isValid())
continue;
// This value needs a hoisted copy inserted at the end of Dom.first.
- VNInfo *ParentVNI = Parent->getValNumInfo(i);
+ const VNInfo *ParentVNI = Parent->getValNumInfo(i);
MachineBasicBlock *DefMBB = LIS.getMBBFromIndex(ParentVNI->def);
// Get a less loopy dominator than Dom.first.
Dom.first = findShallowDominator(Dom.first, DefMBB);
@@ -1237,11 +1252,11 @@ void SplitEditor::extendPHIRange(MachineBasicBlock &B, LiveIntervalCalc &LIC,
SlotIndex LastUse = End.getPrevSlot();
// The predecessor may not have a live-out value. That is OK, like an
// undef PHI operand.
- LiveInterval &PLI = Edit->getParent();
+ const LiveInterval &PLI = Edit->getParent();
// Need the cast because the inputs to ?: would otherwise be deemed
// "incompatible": SubRange vs LiveInterval.
- LiveRange &PSR = !LM.all() ? getSubRangeForMaskExact(LM, PLI)
- : static_cast<LiveRange &>(PLI);
+ const LiveRange &PSR = !LM.all() ? getSubRangeForMaskExact(LM, PLI)
+ : static_cast<const LiveRange &>(PLI);
if (PSR.liveAt(LastUse))
LIC.extend(LR, End, /*PhysReg=*/0, Undefs);
}
@@ -1254,7 +1269,7 @@ void SplitEditor::extendPHIKillRanges() {
// remove it. Otherwise, extend the live interval to reach the end indexes
// of all predecessor blocks.
- LiveInterval &ParentLI = Edit->getParent();
+ const LiveInterval &ParentLI = Edit->getParent();
for (const VNInfo *V : ParentLI.valnos) {
if (V->isUnused() || !V->isPHIDef())
continue;
@@ -1270,7 +1285,7 @@ void SplitEditor::extendPHIKillRanges() {
SmallVector<SlotIndex, 4> Undefs;
LiveIntervalCalc SubLIC;
- for (LiveInterval::SubRange &PS : ParentLI.subranges()) {
+ for (const LiveInterval::SubRange &PS : ParentLI.subranges()) {
for (const VNInfo *V : PS.valnos) {
if (V->isUnused() || !V->isPHIDef())
continue;
@@ -1337,13 +1352,34 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
continue;
// We may want to extend a live range for a partial redef, or for a use
// tied to an early clobber.
- Idx = Idx.getPrevSlot();
- if (!Edit->getParent().liveAt(Idx))
+ if (!Edit->getParent().liveAt(Idx.getPrevSlot()))
continue;
- } else
- Idx = Idx.getRegSlot(true);
+ } else {
+ assert(MO.isUse());
+ bool IsEarlyClobber = false;
+ if (MO.isTied()) {
+ // We want to extend a live range into `e` slot rather than `r` slot if
+ // tied-def is early clobber, because the `e` slot already contained
+ // in the live range of early-clobber tied-def operand, give an example
+ // here:
+ // 0 %0 = ...
+ // 16 early-clobber %0 = Op %0 (tied-def 0), ...
+ // 32 ... = Op %0
+ // Before extend:
+ // %0 = [0r, 0d) [16e, 32d)
+ // The point we want to extend is 0d to 16e not 16r in this case, but if
+ // we use 16r here we will extend nothing because that already contained
+ // in [16e, 32d).
+ unsigned OpIdx = MI->getOperandNo(&MO);
+ unsigned DefOpIdx = MI->findTiedOperandIdx(OpIdx);
+ const MachineOperand &DefOp = MI->getOperand(DefOpIdx);
+ IsEarlyClobber = DefOp.isEarlyClobber();
+ }
+
+ Idx = Idx.getRegSlot(IsEarlyClobber);
+ }
- SlotIndex Next = Idx.getNextSlot();
+ SlotIndex Next = Idx;
if (LI.hasSubRanges()) {
// We have to delay extending subranges until we have seen all operands
// defining the register. This is because a <def,read-undef> operand
@@ -1510,9 +1546,8 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
// Provide a reverse mapping from original indices to Edit ranges.
if (LRMap) {
- LRMap->clear();
- for (unsigned i = 0, e = Edit->size(); i != e; ++i)
- LRMap->push_back(i);
+ auto Seq = llvm::seq<unsigned>(0, Edit->size());
+ LRMap->assign(Seq.begin(), Seq.end());
}
// Now check if any registers were separated into multiple components.
diff --git a/llvm/lib/CodeGen/SplitKit.h b/llvm/lib/CodeGen/SplitKit.h
index 902546fe16d8..4400a797d38e 100644
--- a/llvm/lib/CodeGen/SplitKit.h
+++ b/llvm/lib/CodeGen/SplitKit.h
@@ -22,19 +22,19 @@
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervalCalc.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/SlotIndexes.h"
-#include "llvm/MC/LaneBitmask.h"
#include "llvm/Support/Compiler.h"
#include <utility>
namespace llvm {
class AAResults;
+class LiveInterval;
+class LiveRange;
class LiveIntervals;
class LiveRangeEdit;
class MachineBlockFrequencyInfo;
@@ -346,19 +346,6 @@ private:
return LICalc[SpillMode != SM_Partition && RegIdx != 0];
}
- /// Find a subrange corresponding to the exact lane mask @p LM in the live
- /// interval @p LI. The interval @p LI is assumed to contain such a subrange.
- /// This function is used to find corresponding subranges between the
- /// original interval and the new intervals.
- LiveInterval::SubRange &getSubRangeForMaskExact(LaneBitmask LM,
- LiveInterval &LI);
-
- /// Find a subrange corresponding to the lane mask @p LM, or a superset of it,
- /// in the live interval @p LI. The interval @p LI is assumed to contain such
- /// a subrange. This function is used to find corresponding subranges between
- /// the original interval and the new intervals.
- LiveInterval::SubRange &getSubRangeForMask(LaneBitmask LM, LiveInterval &LI);
-
/// Add a segment to the interval LI for the value number VNI. If LI has
/// subranges, corresponding segments will be added to them as well, but
/// with newly created value numbers. If Original is true, dead def will
@@ -390,10 +377,8 @@ private:
/// defFromParent - Define Reg from ParentVNI at UseIdx using either
/// rematerialization or a COPY from parent. Return the new value.
- VNInfo *defFromParent(unsigned RegIdx,
- VNInfo *ParentVNI,
- SlotIndex UseIdx,
- MachineBasicBlock &MBB,
+ VNInfo *defFromParent(unsigned RegIdx, const VNInfo *ParentVNI,
+ SlotIndex UseIdx, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I);
/// removeBackCopies - Remove the copy instructions that defines the values
diff --git a/llvm/lib/CodeGen/StackColoring.cpp b/llvm/lib/CodeGen/StackColoring.cpp
index 623d5da9831e..11c6bdc69956 100644
--- a/llvm/lib/CodeGen/StackColoring.cpp
+++ b/llvm/lib/CodeGen/StackColoring.cpp
@@ -36,14 +36,12 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Use.h"
@@ -1145,6 +1143,9 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
LLVM_DEBUG(dbgs() << "Fixed " << FixedMemOp << " machine memory operands.\n");
LLVM_DEBUG(dbgs() << "Fixed " << FixedDbg << " debug locations.\n");
LLVM_DEBUG(dbgs() << "Fixed " << FixedInstr << " machine instructions.\n");
+ (void) FixedMemOp;
+ (void) FixedDbg;
+ (void) FixedInstr;
}
void StackColoring::removeInvalidSlotRanges() {
@@ -1319,6 +1320,11 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
int FirstSlot = SortedSlots[I];
int SecondSlot = SortedSlots[J];
+
+ // Objects with different stack IDs cannot be merged.
+ if (MFI->getStackID(FirstSlot) != MFI->getStackID(SecondSlot))
+ continue;
+
LiveInterval *First = &*Intervals[FirstSlot];
LiveInterval *Second = &*Intervals[SecondSlot];
auto &FirstS = LiveStarts[FirstSlot];
diff --git a/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp b/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
index 3640296adbca..b83c56903133 100644
--- a/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
+++ b/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
@@ -17,9 +17,9 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/llvm/lib/CodeGen/StackMaps.cpp b/llvm/lib/CodeGen/StackMaps.cpp
index 36e8f129ea15..6757d6ca4f88 100644
--- a/llvm/lib/CodeGen/StackMaps.cpp
+++ b/llvm/lib/CodeGen/StackMaps.cpp
@@ -721,7 +721,7 @@ void StackMaps::serializeToStackMapSection() {
// Create the section.
MCSection *StackMapSection =
OutContext.getObjectFileInfo()->getStackMapSection();
- OS.SwitchSection(StackMapSection);
+ OS.switchSection(StackMapSection);
// Emit a dummy symbol to force section inclusion.
OS.emitLabel(OutContext.getOrCreateSymbol(Twine("__LLVM_StackMaps")));
@@ -732,7 +732,7 @@ void StackMaps::serializeToStackMapSection() {
emitFunctionFrameRecords(OS);
emitConstantPoolEntries(OS);
emitCallsiteEntries(OS);
- OS.AddBlankLine();
+ OS.addBlankLine();
// Clean up.
CSInfos.clear();
diff --git a/llvm/lib/CodeGen/StackProtector.cpp b/llvm/lib/CodeGen/StackProtector.cpp
index 6765fd274686..510a8e3e4ba2 100644
--- a/llvm/lib/CodeGen/StackProtector.cpp
+++ b/llvm/lib/CodeGen/StackProtector.cpp
@@ -28,8 +28,6 @@
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
-#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
@@ -169,7 +167,7 @@ bool StackProtector::HasAddressTaken(const Instruction *AI,
// If this instruction accesses memory make sure it doesn't access beyond
// the bounds of the allocated object.
Optional<MemoryLocation> MemLoc = MemoryLocation::getOrNone(I);
- if (MemLoc.hasValue() && MemLoc->Size.hasValue() &&
+ if (MemLoc && MemLoc->Size.hasValue() &&
!TypeSize::isKnownGE(AllocSize,
TypeSize::getFixed(MemLoc->Size.getValue())))
return true;
diff --git a/llvm/lib/CodeGen/StackSlotColoring.cpp b/llvm/lib/CodeGen/StackSlotColoring.cpp
index 17e6f51d0899..b8c750688914 100644
--- a/llvm/lib/CodeGen/StackSlotColoring.cpp
+++ b/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -28,7 +28,6 @@
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
diff --git a/llvm/lib/CodeGen/TailDuplication.cpp b/llvm/lib/CodeGen/TailDuplication.cpp
index 20892a79d35f..bf3d2088e196 100644
--- a/llvm/lib/CodeGen/TailDuplication.cpp
+++ b/llvm/lib/CodeGen/TailDuplication.cpp
@@ -14,14 +14,14 @@
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MBFIWrapper.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TailDuplicator.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp
index 68a7b80d6146..ba533a491b9c 100644
--- a/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -19,17 +19,15 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
-#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/MachineSSAUpdater.h"
+#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -370,8 +368,8 @@ void TailDuplicator::processPHI(
return;
// Remove PredBB from the PHI node.
- MI->RemoveOperand(SrcOpIdx + 1);
- MI->RemoveOperand(SrcOpIdx);
+ MI->removeOperand(SrcOpIdx + 1);
+ MI->removeOperand(SrcOpIdx);
if (MI->getNumOperands() == 1)
MI->eraseFromParent();
}
@@ -385,8 +383,9 @@ void TailDuplicator::duplicateInstruction(
// Allow duplication of CFI instructions.
if (MI->isCFIInstruction()) {
BuildMI(*PredBB, PredBB->end(), PredBB->findDebugLoc(PredBB->begin()),
- TII->get(TargetOpcode::CFI_INSTRUCTION)).addCFIIndex(
- MI->getOperand(0).getCFIIndex());
+ TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(MI->getOperand(0).getCFIIndex())
+ .setMIFlags(MI->getFlags());
return;
}
MachineInstr &NewMI = TII->duplicate(*PredBB, PredBB->end(), *MI);
@@ -496,15 +495,15 @@ void TailDuplicator::updateSuccessorsPHIs(
for (unsigned i = MI.getNumOperands() - 2; i != Idx; i -= 2) {
MachineOperand &MO = MI.getOperand(i + 1);
if (MO.getMBB() == FromBB) {
- MI.RemoveOperand(i + 1);
- MI.RemoveOperand(i);
+ MI.removeOperand(i + 1);
+ MI.removeOperand(i);
}
}
} else
Idx = 0;
// If Idx is set, the operands at Idx and Idx+1 must be removed.
- // We reuse the location to avoid expensive RemoveOperand calls.
+ // We reuse the location to avoid expensive removeOperand calls.
DenseMap<Register, AvailableValsTy>::iterator LI =
SSAUpdateVals.find(Reg);
@@ -541,8 +540,8 @@ void TailDuplicator::updateSuccessorsPHIs(
}
}
if (Idx != 0) {
- MI.RemoveOperand(Idx + 1);
- MI.RemoveOperand(Idx);
+ MI.removeOperand(Idx + 1);
+ MI.removeOperand(Idx);
}
}
}
diff --git a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
index fbf190a52585..9430e86fe44d 100644
--- a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -10,17 +10,17 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Target/TargetMachine.h"
@@ -37,6 +37,11 @@ bool TargetFrameLowering::enableCalleeSaveSkip(const MachineFunction &MF) const
return false;
}
+bool TargetFrameLowering::enableCFIFixup(MachineFunction &MF) const {
+ return MF.needsFrameMoves() &&
+ !MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
+}
+
/// Returns the displacement from the frame register to the stack
/// frame of the specified index, along with the frame register used
/// (in output arg FrameReg). This is the default implementation which
diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp
index 3f22cc4289f2..2a987ee3eedf 100644
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -12,6 +12,7 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
@@ -31,8 +32,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
-#include <cctype>
using namespace llvm;
@@ -40,8 +39,7 @@ static cl::opt<bool> DisableHazardRecognizer(
"disable-sched-hazard", cl::Hidden, cl::init(false),
cl::desc("Disable hazard detection during preRA scheduling"));
-TargetInstrInfo::~TargetInstrInfo() {
-}
+TargetInstrInfo::~TargetInstrInfo() = default;
const TargetRegisterClass*
TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
@@ -873,11 +871,13 @@ void TargetInstrInfo::reassociateOps(
MachineInstrBuilder MIB1 =
BuildMI(*MF, Prev.getDebugLoc(), TII->get(Opcode), NewVR)
.addReg(RegX, getKillRegState(KillX))
- .addReg(RegY, getKillRegState(KillY));
+ .addReg(RegY, getKillRegState(KillY))
+ .setMIFlags(Prev.getFlags());
MachineInstrBuilder MIB2 =
BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC)
.addReg(RegA, getKillRegState(KillA))
- .addReg(NewVR, getKillRegState(true));
+ .addReg(NewVR, getKillRegState(true))
+ .setMIFlags(Root.getFlags());
setSpecialOperandAttr(Root, Prev, *MIB1, *MIB2);
@@ -1399,7 +1399,7 @@ std::string TargetInstrInfo::createMIROperandComment(
return OS.str();
}
-TargetInstrInfo::PipelinerLoopInfo::~PipelinerLoopInfo() {}
+TargetInstrInfo::PipelinerLoopInfo::~PipelinerLoopInfo() = default;
void TargetInstrInfo::mergeOutliningCandidateAttributes(
Function &F, std::vector<outliner::Candidate> &Candidates) const {
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index ab574232e367..6a595a4c748b 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -56,7 +56,6 @@
#include "llvm/Transforms/Utils/SizeOpts.h"
#include <algorithm>
#include <cassert>
-#include <cstddef>
#include <cstdint>
#include <cstring>
#include <iterator>
@@ -202,7 +201,7 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) {
setLibcallName(RTLIB::SINCOS_PPCF128, "sincosl");
}
- if (TT.isPS4CPU()) {
+ if (TT.isPS()) {
setLibcallName(RTLIB::SINCOS_F32, "sincosf");
setLibcallName(RTLIB::SINCOS_F64, "sincos");
}
@@ -275,6 +274,11 @@ RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) {
return FPROUND_F128_F16;
if (OpVT == MVT::ppcf128)
return FPROUND_PPCF128_F16;
+ } else if (RetVT == MVT::bf16) {
+ if (OpVT == MVT::f32)
+ return FPROUND_F32_BF16;
+ if (OpVT == MVT::f64)
+ return FPROUND_F64_BF16;
} else if (RetVT == MVT::f32) {
if (OpVT == MVT::f64)
return FPROUND_F64_F32;
@@ -740,6 +744,30 @@ void TargetLoweringBase::initActions() {
std::fill(std::begin(TargetDAGCombineArray),
std::end(TargetDAGCombineArray), 0);
+ // We're somewhat special casing MVT::i2 and MVT::i4. Ideally we want to
+ // remove this and targets should individually set these types if not legal.
+ for (ISD::NodeType NT : enum_seq(ISD::DELETED_NODE, ISD::BUILTIN_OP_END,
+ force_iteration_on_noniterable_enum)) {
+ for (MVT VT : {MVT::i2, MVT::i4})
+ OpActions[(unsigned)VT.SimpleTy][NT] = Expand;
+ }
+ for (MVT AVT : MVT::all_valuetypes()) {
+ for (MVT VT : {MVT::i2, MVT::i4, MVT::v128i2, MVT::v64i4}) {
+ setTruncStoreAction(AVT, VT, Expand);
+ setLoadExtAction(ISD::EXTLOAD, AVT, VT, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, AVT, VT, Expand);
+ }
+ }
+ for (unsigned IM = (unsigned)ISD::PRE_INC;
+ IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) {
+ for (MVT VT : {MVT::i2, MVT::i4}) {
+ setIndexedLoadAction(IM, VT, Expand);
+ setIndexedStoreAction(IM, VT, Expand);
+ setIndexedMaskedLoadAction(IM, VT, Expand);
+ setIndexedMaskedStoreAction(IM, VT, Expand);
+ }
+ }
+
for (MVT VT : MVT::fp_valuetypes()) {
MVT IntVT = MVT::getIntegerVT(VT.getFixedSizeInBits());
if (IntVT.isValid()) {
@@ -763,85 +791,63 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Expand);
// These operations default to expand.
- setOperationAction(ISD::FGETSIGN, VT, Expand);
- setOperationAction(ISD::CONCAT_VECTORS, VT, Expand);
- setOperationAction(ISD::FMINNUM, VT, Expand);
- setOperationAction(ISD::FMAXNUM, VT, Expand);
- setOperationAction(ISD::FMINNUM_IEEE, VT, Expand);
- setOperationAction(ISD::FMAXNUM_IEEE, VT, Expand);
- setOperationAction(ISD::FMINIMUM, VT, Expand);
- setOperationAction(ISD::FMAXIMUM, VT, Expand);
- setOperationAction(ISD::FMAD, VT, Expand);
- setOperationAction(ISD::SMIN, VT, Expand);
- setOperationAction(ISD::SMAX, VT, Expand);
- setOperationAction(ISD::UMIN, VT, Expand);
- setOperationAction(ISD::UMAX, VT, Expand);
- setOperationAction(ISD::ABS, VT, Expand);
- setOperationAction(ISD::FSHL, VT, Expand);
- setOperationAction(ISD::FSHR, VT, Expand);
- setOperationAction(ISD::SADDSAT, VT, Expand);
- setOperationAction(ISD::UADDSAT, VT, Expand);
- setOperationAction(ISD::SSUBSAT, VT, Expand);
- setOperationAction(ISD::USUBSAT, VT, Expand);
- setOperationAction(ISD::SSHLSAT, VT, Expand);
- setOperationAction(ISD::USHLSAT, VT, Expand);
- setOperationAction(ISD::SMULFIX, VT, Expand);
- setOperationAction(ISD::SMULFIXSAT, VT, Expand);
- setOperationAction(ISD::UMULFIX, VT, Expand);
- setOperationAction(ISD::UMULFIXSAT, VT, Expand);
- setOperationAction(ISD::SDIVFIX, VT, Expand);
- setOperationAction(ISD::SDIVFIXSAT, VT, Expand);
- setOperationAction(ISD::UDIVFIX, VT, Expand);
- setOperationAction(ISD::UDIVFIXSAT, VT, Expand);
- setOperationAction(ISD::FP_TO_SINT_SAT, VT, Expand);
- setOperationAction(ISD::FP_TO_UINT_SAT, VT, Expand);
+ setOperationAction({ISD::FGETSIGN, ISD::CONCAT_VECTORS,
+ ISD::FMINNUM, ISD::FMAXNUM,
+ ISD::FMINNUM_IEEE, ISD::FMAXNUM_IEEE,
+ ISD::FMINIMUM, ISD::FMAXIMUM,
+ ISD::FMAD, ISD::SMIN,
+ ISD::SMAX, ISD::UMIN,
+ ISD::UMAX, ISD::ABS,
+ ISD::FSHL, ISD::FSHR,
+ ISD::SADDSAT, ISD::UADDSAT,
+ ISD::SSUBSAT, ISD::USUBSAT,
+ ISD::SSHLSAT, ISD::USHLSAT,
+ ISD::SMULFIX, ISD::SMULFIXSAT,
+ ISD::UMULFIX, ISD::UMULFIXSAT,
+ ISD::SDIVFIX, ISD::SDIVFIXSAT,
+ ISD::UDIVFIX, ISD::UDIVFIXSAT,
+ ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT,
+ ISD::IS_FPCLASS},
+ VT, Expand);
// Overflow operations default to expand
- setOperationAction(ISD::SADDO, VT, Expand);
- setOperationAction(ISD::SSUBO, VT, Expand);
- setOperationAction(ISD::UADDO, VT, Expand);
- setOperationAction(ISD::USUBO, VT, Expand);
- setOperationAction(ISD::SMULO, VT, Expand);
- setOperationAction(ISD::UMULO, VT, Expand);
+ setOperationAction({ISD::SADDO, ISD::SSUBO, ISD::UADDO, ISD::USUBO,
+ ISD::SMULO, ISD::UMULO},
+ VT, Expand);
// ADDCARRY operations default to expand
- setOperationAction(ISD::ADDCARRY, VT, Expand);
- setOperationAction(ISD::SUBCARRY, VT, Expand);
- setOperationAction(ISD::SETCCCARRY, VT, Expand);
- setOperationAction(ISD::SADDO_CARRY, VT, Expand);
- setOperationAction(ISD::SSUBO_CARRY, VT, Expand);
+ setOperationAction({ISD::ADDCARRY, ISD::SUBCARRY, ISD::SETCCCARRY,
+ ISD::SADDO_CARRY, ISD::SSUBO_CARRY},
+ VT, Expand);
// ADDC/ADDE/SUBC/SUBE default to expand.
- setOperationAction(ISD::ADDC, VT, Expand);
- setOperationAction(ISD::ADDE, VT, Expand);
- setOperationAction(ISD::SUBC, VT, Expand);
- setOperationAction(ISD::SUBE, VT, Expand);
+ setOperationAction({ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}, VT,
+ Expand);
+
+ // Halving adds
+ setOperationAction(
+ {ISD::AVGFLOORS, ISD::AVGFLOORU, ISD::AVGCEILS, ISD::AVGCEILU}, VT,
+ Expand);
// Absolute difference
- setOperationAction(ISD::ABDS, VT, Expand);
- setOperationAction(ISD::ABDU, VT, Expand);
+ setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Expand);
// These default to Expand so they will be expanded to CTLZ/CTTZ by default.
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
+ setOperationAction({ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT,
+ Expand);
- setOperationAction(ISD::BITREVERSE, VT, Expand);
- setOperationAction(ISD::PARITY, VT, Expand);
+ setOperationAction({ISD::BITREVERSE, ISD::PARITY}, VT, Expand);
// These library functions default to expand.
- setOperationAction(ISD::FROUND, VT, Expand);
- setOperationAction(ISD::FROUNDEVEN, VT, Expand);
- setOperationAction(ISD::FPOWI, VT, Expand);
+ setOperationAction({ISD::FROUND, ISD::FROUNDEVEN, ISD::FPOWI}, VT, Expand);
// These operations default to expand for vector types.
- if (VT.isVector()) {
- setOperationAction(ISD::FCOPYSIGN, VT, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
- setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, VT, Expand);
- setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Expand);
- setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand);
- setOperationAction(ISD::SPLAT_VECTOR, VT, Expand);
- }
+ if (VT.isVector())
+ setOperationAction({ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG,
+ ISD::ANY_EXTEND_VECTOR_INREG,
+ ISD::SIGN_EXTEND_VECTOR_INREG,
+ ISD::ZERO_EXTEND_VECTOR_INREG, ISD::SPLAT_VECTOR},
+ VT, Expand);
// Constrained floating-point operations default to expand.
#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
@@ -852,21 +858,13 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand);
// Vector reduction default to expand.
- setOperationAction(ISD::VECREDUCE_FADD, VT, Expand);
- setOperationAction(ISD::VECREDUCE_FMUL, VT, Expand);
- setOperationAction(ISD::VECREDUCE_ADD, VT, Expand);
- setOperationAction(ISD::VECREDUCE_MUL, VT, Expand);
- setOperationAction(ISD::VECREDUCE_AND, VT, Expand);
- setOperationAction(ISD::VECREDUCE_OR, VT, Expand);
- setOperationAction(ISD::VECREDUCE_XOR, VT, Expand);
- setOperationAction(ISD::VECREDUCE_SMAX, VT, Expand);
- setOperationAction(ISD::VECREDUCE_SMIN, VT, Expand);
- setOperationAction(ISD::VECREDUCE_UMAX, VT, Expand);
- setOperationAction(ISD::VECREDUCE_UMIN, VT, Expand);
- setOperationAction(ISD::VECREDUCE_FMAX, VT, Expand);
- setOperationAction(ISD::VECREDUCE_FMIN, VT, Expand);
- setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Expand);
- setOperationAction(ISD::VECREDUCE_SEQ_FMUL, VT, Expand);
+ setOperationAction(
+ {ISD::VECREDUCE_FADD, ISD::VECREDUCE_FMUL, ISD::VECREDUCE_ADD,
+ ISD::VECREDUCE_MUL, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,
+ ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN,
+ ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN, ISD::VECREDUCE_FMAX,
+ ISD::VECREDUCE_FMIN, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_SEQ_FMUL},
+ VT, Expand);
// Named vector shuffles default to expand.
setOperationAction(ISD::VECTOR_SPLICE, VT, Expand);
@@ -881,30 +879,16 @@ void TargetLoweringBase::initActions() {
// ConstantFP nodes default to expand. Targets can either change this to
// Legal, in which case all fp constants are legal, or use isFPImmLegal()
// to optimize expansions for certain constants.
- setOperationAction(ISD::ConstantFP, MVT::f16, Expand);
- setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
- setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
- setOperationAction(ISD::ConstantFP, MVT::f80, Expand);
- setOperationAction(ISD::ConstantFP, MVT::f128, Expand);
+ setOperationAction(ISD::ConstantFP,
+ {MVT::f16, MVT::f32, MVT::f64, MVT::f80, MVT::f128},
+ Expand);
// These library functions default to expand.
- for (MVT VT : {MVT::f32, MVT::f64, MVT::f128}) {
- setOperationAction(ISD::FCBRT, VT, Expand);
- setOperationAction(ISD::FLOG , VT, Expand);
- setOperationAction(ISD::FLOG2, VT, Expand);
- setOperationAction(ISD::FLOG10, VT, Expand);
- setOperationAction(ISD::FEXP , VT, Expand);
- setOperationAction(ISD::FEXP2, VT, Expand);
- setOperationAction(ISD::FFLOOR, VT, Expand);
- setOperationAction(ISD::FNEARBYINT, VT, Expand);
- setOperationAction(ISD::FCEIL, VT, Expand);
- setOperationAction(ISD::FRINT, VT, Expand);
- setOperationAction(ISD::FTRUNC, VT, Expand);
- setOperationAction(ISD::LROUND, VT, Expand);
- setOperationAction(ISD::LLROUND, VT, Expand);
- setOperationAction(ISD::LRINT, VT, Expand);
- setOperationAction(ISD::LLRINT, VT, Expand);
- }
+ setOperationAction({ISD::FCBRT, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, ISD::FEXP,
+ ISD::FEXP2, ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL,
+ ISD::FRINT, ISD::FTRUNC, ISD::LROUND, ISD::LLROUND,
+ ISD::LRINT, ISD::LLRINT},
+ {MVT::f32, MVT::f64, MVT::f128}, Expand);
// Default ISD::TRAP to expand (which turns it into abort).
setOperationAction(ISD::TRAP, MVT::Other, Expand);
@@ -1394,6 +1378,16 @@ void TargetLoweringBase::computeRegisterProperties(
}
}
+ // Decide how to handle bf16. If the target does not have native bf16 support,
+ // promote it to f32, because there are no bf16 library calls (except for
+ // converting from f32 to bf16).
+ if (!isTypeLegal(MVT::bf16)) {
+ NumRegistersForVT[MVT::bf16] = NumRegistersForVT[MVT::f32];
+ RegisterTypeForVT[MVT::bf16] = RegisterTypeForVT[MVT::f32];
+ TransformToType[MVT::bf16] = MVT::f32;
+ ValueTypeActions.setTypeAction(MVT::bf16, TypePromoteFloat);
+ }
+
// Loop over all of the vector value types to see which need transformations.
for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;
i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
@@ -1647,6 +1641,11 @@ bool TargetLoweringBase::isSuitableForJumpTable(const SwitchInst *SI,
(NumCases * 100 >= Range * MinDensity);
}
+MVT TargetLoweringBase::getPreferredSwitchConditionType(LLVMContext &Context,
+ EVT ConditionVT) const {
+ return getRegisterType(Context, ConditionVT);
+}
+
/// Get the EVTs and ArgFlags collections that represent the legalized return
/// type of the given function. This does not require a DAG or a return value,
/// and is suitable for use before any DAGs for the function are constructed.
@@ -2066,9 +2065,11 @@ static std::string getReciprocalOpName(bool IsSqrt, EVT VT) {
Name += IsSqrt ? "sqrt" : "div";
- // TODO: Handle "half" or other float types?
+ // TODO: Handle other float types?
if (VT.getScalarType() == MVT::f64) {
Name += "d";
+ } else if (VT.getScalarType() == MVT::f16) {
+ Name += "h";
} else {
assert(VT.getScalarType() == MVT::f32 &&
"Unexpected FP type for reciprocal estimate");
diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index ce350034d073..f3d68bd9c92d 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -310,7 +310,7 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer,
auto *S = C.getELFSection(".linker-options", ELF::SHT_LLVM_LINKER_OPTIONS,
ELF::SHF_EXCLUDE);
- Streamer.SwitchSection(S);
+ Streamer.switchSection(S);
for (const auto *Operand : LinkerOptions->operands()) {
if (cast<MDNode>(Operand)->getNumOperands() != 2)
@@ -326,7 +326,7 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer,
auto *S = C.getELFSection(".deplibs", ELF::SHT_LLVM_DEPENDENT_LIBRARIES,
ELF::SHF_MERGE | ELF::SHF_STRINGS, 1);
- Streamer.SwitchSection(S);
+ Streamer.switchSection(S);
for (const auto *Operand : DependentLibraries->operands()) {
Streamer.emitBytes(
@@ -350,7 +350,7 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer,
auto *S = C.getObjectFileInfo()->getPseudoProbeDescSection(
TM->getFunctionSections() ? Name->getString() : StringRef());
- Streamer.SwitchSection(S);
+ Streamer.switchSection(S);
Streamer.emitInt64(GUID->getZExtValue());
Streamer.emitInt64(Hash->getZExtValue());
Streamer.emitULEB128IntValue(Name->getString().size());
@@ -365,11 +365,11 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer,
GetObjCImageInfo(M, Version, Flags, Section);
if (!Section.empty()) {
auto *S = C.getELFSection(Section, ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
- Streamer.SwitchSection(S);
+ Streamer.switchSection(S);
Streamer.emitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO")));
Streamer.emitInt32(Version);
Streamer.emitInt32(Flags);
- Streamer.AddBlankLine();
+ Streamer.addBlankLine();
}
emitCGProfileMetadata(Streamer, M);
@@ -399,7 +399,7 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(
MCSection *Sec = getContext().getELFNamedSection(".data", Label->getName(),
ELF::SHT_PROGBITS, Flags, 0);
unsigned Size = DL.getPointerSize();
- Streamer.SwitchSection(Sec);
+ Streamer.switchSection(Sec);
Streamer.emitValueToAlignment(DL.getPointerABIAlignment(0).value());
Streamer.emitSymbolAttribute(Label, MCSA_ELF_TypeObject);
const MCExpr *E = MCConstantExpr::create(Size, getContext());
@@ -449,6 +449,9 @@ static SectionKind getELFKindForNamedSection(StringRef Name, SectionKind K) {
Name == ".llvmbc" || Name == ".llvmcmd")
return SectionKind::getMetadata();
+ if (Name == ".llvm.offloading")
+ return SectionKind::getExclude();
+
if (Name.empty() || Name[0] != '.') return K;
// Default implementation based on some magic section names.
@@ -507,9 +510,12 @@ static unsigned getELFSectionType(StringRef Name, SectionKind K) {
static unsigned getELFSectionFlags(SectionKind K) {
unsigned Flags = 0;
- if (!K.isMetadata())
+ if (!K.isMetadata() && !K.isExclude())
Flags |= ELF::SHF_ALLOC;
+ if (K.isExclude())
+ Flags |= ELF::SHF_EXCLUDE;
+
if (K.isText())
Flags |= ELF::SHF_EXECINSTR;
@@ -681,9 +687,10 @@ calcUniqueIDUpdateFlagsAndSize(const GlobalObject *GO, StringRef SectionName,
}
if (Retain) {
- if ((Ctx.getAsmInfo()->useIntegratedAssembler() ||
- Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36)) &&
- !TM.getTargetTriple().isOSSolaris())
+ if (TM.getTargetTriple().isOSSolaris())
+ Flags |= ELF::SHF_SUNW_NODISCARD;
+ else if (Ctx.getAsmInfo()->useIntegratedAssembler() ||
+ Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36))
Flags |= ELF::SHF_GNU_RETAIN;
return NextUniqueID++;
}
@@ -860,12 +867,15 @@ static MCSection *selectELFSectionForGlobal(
EmitUniqueSection = true;
Flags |= ELF::SHF_LINK_ORDER;
}
- if (Retain &&
- (Ctx.getAsmInfo()->useIntegratedAssembler() ||
- Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36)) &&
- !TM.getTargetTriple().isOSSolaris()) {
- EmitUniqueSection = true;
- Flags |= ELF::SHF_GNU_RETAIN;
+ if (Retain) {
+ if (TM.getTargetTriple().isOSSolaris()) {
+ EmitUniqueSection = true;
+ Flags |= ELF::SHF_SUNW_NODISCARD;
+ } else if (Ctx.getAsmInfo()->useIntegratedAssembler() ||
+ Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36)) {
+ EmitUniqueSection = true;
+ Flags |= ELF::SHF_GNU_RETAIN;
+ }
}
MCSectionELF *Section = selectELFSectionForGlobal(
@@ -1171,6 +1181,15 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
}
+MCSection *TargetLoweringObjectFileMachO::getStaticDtorSection(
+ unsigned Priority, const MCSymbol *KeySym) const {
+ // TODO(yln): Remove -lower-global-dtors-via-cxa-atexit fallback flag
+ // (LowerGlobalDtorsViaCxaAtExit) and always issue a fatal error here.
+ if (TM->Options.LowerGlobalDtorsViaCxaAtExit)
+ report_fatal_error("@llvm.global_dtors should have been lowered already");
+ return StaticDtorSection;
+}
+
void TargetLoweringObjectFileMachO::emitModuleMetadata(MCStreamer &Streamer,
Module &M) const {
// Emit the linker options if present.
@@ -1207,12 +1226,12 @@ void TargetLoweringObjectFileMachO::emitModuleMetadata(MCStreamer &Streamer,
// Get the section.
MCSectionMachO *S = getContext().getMachOSection(
Segment, Section, TAA, StubSize, SectionKind::getData());
- Streamer.SwitchSection(S);
+ Streamer.switchSection(S);
Streamer.emitLabel(getContext().
getOrCreateSymbol(StringRef("L_OBJC_IMAGE_INFO")));
Streamer.emitInt32(VersionVal);
Streamer.emitInt32(ImageInfoFlags);
- Streamer.AddBlankLine();
+ Streamer.addBlankLine();
}
static void checkMachOComdat(const GlobalValue *GV) {
@@ -1520,6 +1539,9 @@ getCOFFSectionFlags(SectionKind K, const TargetMachine &TM) {
if (K.isMetadata())
Flags |=
COFF::IMAGE_SCN_MEM_DISCARDABLE;
+ else if (K.isExclude())
+ Flags |=
+ COFF::IMAGE_SCN_LNK_REMOVE | COFF::IMAGE_SCN_MEM_DISCARDABLE;
else if (K.isText())
Flags |=
COFF::IMAGE_SCN_MEM_EXECUTE |
@@ -1755,11 +1777,11 @@ void TargetLoweringObjectFileCOFF::emitModuleMetadata(MCStreamer &Streamer,
COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
COFF::IMAGE_SCN_MEM_READ,
SectionKind::getReadOnly());
- Streamer.SwitchSection(S);
+ Streamer.switchSection(S);
Streamer.emitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO")));
Streamer.emitInt32(Version);
Streamer.emitInt32(Flags);
- Streamer.AddBlankLine();
+ Streamer.addBlankLine();
}
emitCGProfileMetadata(Streamer, M);
@@ -1772,7 +1794,7 @@ void TargetLoweringObjectFileCOFF::emitLinkerDirectives(
// spec, this section is a space-separated string containing flags for
// linker.
MCSection *Sec = getDrectveSection();
- Streamer.SwitchSection(Sec);
+ Streamer.switchSection(Sec);
for (const auto *Option : LinkerOptions->operands()) {
for (const auto &Piece : cast<MDNode>(Option)->operands()) {
// Lead with a space for consistency with our dllexport implementation.
@@ -1791,7 +1813,7 @@ void TargetLoweringObjectFileCOFF::emitLinkerDirectives(
getMangler());
OS.flush();
if (!Flags.empty()) {
- Streamer.SwitchSection(getDrectveSection());
+ Streamer.switchSection(getDrectveSection());
Streamer.emitBytes(Flags);
}
Flags.clear();
@@ -1817,7 +1839,7 @@ void TargetLoweringObjectFileCOFF::emitLinkerDirectives(
OS.flush();
if (!Flags.empty()) {
- Streamer.SwitchSection(getDrectveSection());
+ Streamer.switchSection(getDrectveSection());
Streamer.emitBytes(Flags);
}
Flags.clear();
@@ -2170,8 +2192,7 @@ MCSection *TargetLoweringObjectFileWasm::getStaticCtorSection(
MCSection *TargetLoweringObjectFileWasm::getStaticDtorSection(
unsigned Priority, const MCSymbol *KeySym) const {
- llvm_unreachable("@llvm.global_dtors should have been lowered already");
- return nullptr;
+ report_fatal_error("@llvm.global_dtors should have been lowered already");
}
//===----------------------------------------------------------------------===//
@@ -2544,10 +2565,24 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForTOCEntry(
XCOFF::XTY_SD));
}
+MCSection *TargetLoweringObjectFileXCOFF::getSectionForLSDA(
+ const Function &F, const MCSymbol &FnSym, const TargetMachine &TM) const {
+ auto *LSDA = cast<MCSectionXCOFF>(LSDASection);
+ if (TM.getFunctionSections()) {
+ // If option -ffunction-sections is on, append the function name to the
+ // name of the LSDA csect so that each function has its own LSDA csect.
+ // This helps the linker to garbage-collect EH info of unused functions.
+ SmallString<128> NameStr = LSDA->getName();
+ raw_svector_ostream(NameStr) << '.' << F.getName();
+ LSDA = getContext().getXCOFFSection(NameStr, LSDA->getKind(),
+ LSDA->getCsectProp());
+ }
+ return LSDA;
+}
//===----------------------------------------------------------------------===//
// GOFF
//===----------------------------------------------------------------------===//
-TargetLoweringObjectFileGOFF::TargetLoweringObjectFileGOFF() {}
+TargetLoweringObjectFileGOFF::TargetLoweringObjectFileGOFF() = default;
MCSection *TargetLoweringObjectFileGOFF::getExplicitSectionGlobal(
const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
@@ -2558,8 +2593,8 @@ MCSection *TargetLoweringObjectFileGOFF::SelectSectionForGlobal(
const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
auto *Symbol = TM.getSymbol(GO);
if (Kind.isBSS())
- return getContext().getGOFFSection(Symbol->getName(),
- SectionKind::getBSS());
+ return getContext().getGOFFSection(Symbol->getName(), SectionKind::getBSS(),
+ nullptr, nullptr);
return getContext().getObjectFileInfo()->getTextSection();
}
diff --git a/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/llvm/lib/CodeGen/TargetOptionsImpl.cpp
index 0731cf9b28f4..af5d10103f78 100644
--- a/llvm/lib/CodeGen/TargetOptionsImpl.cpp
+++ b/llvm/lib/CodeGen/TargetOptionsImpl.cpp
@@ -15,7 +15,6 @@
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Module.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index 05004fb935df..0bd229f4fc68 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -22,6 +22,7 @@
#include "llvm/Analysis/ScopedNoAliasAA.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
+#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
#include "llvm/CodeGen/CSEConfigBase.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachinePassRegistry.h"
@@ -47,7 +48,6 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
-#include "llvm/Transforms/Utils/SymbolRewriter.h"
#include <cassert>
#include <string>
@@ -115,20 +115,18 @@ static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden,
cl::desc("Dump garbage collector data"));
static cl::opt<cl::boolOrDefault>
VerifyMachineCode("verify-machineinstrs", cl::Hidden,
- cl::desc("Verify generated machine code"),
- cl::ZeroOrMore);
-static cl::opt<cl::boolOrDefault> DebugifyAndStripAll(
- "debugify-and-strip-all-safe", cl::Hidden,
- cl::desc(
- "Debugify MIR before and Strip debug after "
- "each pass except those known to be unsafe when debug info is present"),
- cl::ZeroOrMore);
+ cl::desc("Verify generated machine code"));
+static cl::opt<cl::boolOrDefault>
+ DebugifyAndStripAll("debugify-and-strip-all-safe", cl::Hidden,
+ cl::desc("Debugify MIR before and Strip debug after "
+ "each pass except those known to be unsafe "
+ "when debug info is present"));
static cl::opt<cl::boolOrDefault> DebugifyCheckAndStripAll(
"debugify-check-and-strip-all-safe", cl::Hidden,
cl::desc(
"Debugify MIR before, by checking and stripping the debug info after, "
- "each pass except those known to be unsafe when debug info is present"),
- cl::ZeroOrMore);
+ "each pass except those known to be unsafe when debug info is "
+ "present"));
// Enable or disable the MachineOutliner.
static cl::opt<RunOutliner> EnableMachineOutliner(
"enable-machine-outliner", cl::desc("Enable the machine outliner"),
@@ -139,6 +137,11 @@ static cl::opt<RunOutliner> EnableMachineOutliner(
"Disable all outlining"),
// Sentinel value for unspecified option.
clEnumValN(RunOutliner::AlwaysOutline, "", "")));
+// Disable the pass to fix unwind information. Whether the pass is included in
+// the pipeline is controlled via the target options, this option serves as
+// manual override.
+static cl::opt<bool> DisableCFIFixup("disable-cfi-fixup", cl::Hidden,
+ cl::desc("Disable the CFI fixup pass"));
// Enable or disable FastISel. Both options are needed, because
// FastISel is enabled by default with -fast, and we wish to be
// able to enable or disable fast-isel independently from -O0.
@@ -175,12 +178,12 @@ static cl::opt<bool>
// Disable MIRProfileLoader before RegAlloc. This is for for debugging and
// tuning purpose.
static cl::opt<bool> DisableRAFSProfileLoader(
- "disable-ra-fsprofile-loader", cl::init(true), cl::Hidden,
+ "disable-ra-fsprofile-loader", cl::init(false), cl::Hidden,
cl::desc("Disable MIRProfileLoader before RegAlloc"));
// Disable MIRProfileLoader before BloackPlacement. This is for for debugging
// and tuning purpose.
static cl::opt<bool> DisableLayoutFSProfileLoader(
- "disable-layout-fsprofile-loader", cl::init(true), cl::Hidden,
+ "disable-layout-fsprofile-loader", cl::init(false), cl::Hidden,
cl::desc("Disable MIRProfileLoader before BlockPlacement"));
// Specify FSProfile file name.
static cl::opt<std::string>
@@ -256,6 +259,11 @@ static cl::opt<bool> DisableExpandReductions(
"disable-expand-reductions", cl::init(false), cl::Hidden,
cl::desc("Disable the expand reduction intrinsics pass from running"));
+/// Disable the select optimization pass.
+static cl::opt<bool> DisableSelectOptimize(
+ "disable-select-optimize", cl::init(true), cl::Hidden,
+ cl::desc("Disable the select-optimization pass from running"));
+
/// Allow standard passes to be disabled by command line options. This supports
/// simple binary flags that either suppress the pass or do nothing.
/// i.e. -disable-mypass=false has no effect.
@@ -490,6 +498,7 @@ CGPassBuilderOption llvm::getCGPassBuilderOption() {
SET_BOOLEAN_OPTION(DisableConstantHoisting)
SET_BOOLEAN_OPTION(DisableCGP)
SET_BOOLEAN_OPTION(DisablePartialLibcallInlining)
+ SET_BOOLEAN_OPTION(DisableSelectOptimize)
SET_BOOLEAN_OPTION(PrintLSR)
SET_BOOLEAN_OPTION(PrintISelInput)
SET_BOOLEAN_OPTION(PrintGCInfo)
@@ -736,21 +745,21 @@ void TargetPassConfig::addPass(Pass *P) {
if (StopBefore == PassID && StopBeforeCount++ == StopBeforeInstanceNum)
Stopped = true;
if (Started && !Stopped) {
- if (AddingMachinePasses)
+ if (AddingMachinePasses) {
+ // Construct banner message before PM->add() as that may delete the pass.
+ std::string Banner =
+ std::string("After ") + std::string(P->getPassName());
addMachinePrePasses();
- std::string Banner;
- // Construct banner message before PM->add() as that may delete the pass.
- if (AddingMachinePasses)
- Banner = std::string("After ") + std::string(P->getPassName());
- PM->add(P);
- if (AddingMachinePasses)
+ PM->add(P);
addMachinePostPasses(Banner);
+ } else {
+ PM->add(P);
+ }
// Add the passes after the pass P if there is any.
- for (const auto &IP : Impl->InsertedPasses) {
+ for (const auto &IP : Impl->InsertedPasses)
if (IP.TargetPassID == PassID)
addPass(IP.getInsertedPass());
- }
} else {
delete P;
}
@@ -895,6 +904,12 @@ void TargetPassConfig::addIRPasses() {
addPass(&ShadowStackGCLoweringID);
addPass(createLowerConstantIntrinsicsPass());
+ // For MachO, lower @llvm.global_dtors into @llvm_global_ctors with
+ // __cxa_atexit() calls to avoid emitting the deprecated __mod_term_func.
+ if (TM->getTargetTriple().isOSBinFormatMachO() &&
+ TM->Options.LowerGlobalDtorsViaCxaAtExit)
+ addPass(createLowerGlobalDtorsLegacyPass());
+
// Make sure that no unreachable blocks are instruction selected.
addPass(createUnreachableBlockEliminationPass());
@@ -922,6 +937,13 @@ void TargetPassConfig::addIRPasses() {
// Allow disabling it for testing purposes.
if (!DisableExpandReductions)
addPass(createExpandReductionsPass());
+
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createTLSVariableHoistPass());
+
+ // Convert conditional moves to conditional jumps when profitable.
+ if (getOptLevel() != CodeGenOpt::None && !DisableSelectOptimize)
+ addPass(createSelectOptimizePass());
}
/// Turn exception handling constructs into something the code generators can
@@ -1261,12 +1283,19 @@ void TargetPassConfig::addMachinePasses() {
// FIXME: In principle, BasicBlockSection::Labels and splitting can used
// together. Update this check once we have addressed any issues.
if (TM->getBBSectionsType() != llvm::BasicBlockSection::None) {
- addPass(llvm::createBasicBlockSectionsPass(TM->getBBSectionsFuncListBuf()));
+ if (TM->getBBSectionsType() == llvm::BasicBlockSection::List) {
+ addPass(llvm::createBasicBlockSectionsProfileReaderPass(
+ TM->getBBSectionsFuncListBuf()));
+ }
+ addPass(llvm::createBasicBlockSectionsPass());
} else if (TM->Options.EnableMachineFunctionSplitter ||
EnableMachineFunctionSplitter) {
addPass(createMachineFunctionSplitterPass());
}
+ if (!DisableCFIFixup && TM->Options.EnableCFIFixup)
+ addPass(createCFIFixup());
+
// Add passes that directly emit MI after all other MI passes.
addPreEmitPass2();
@@ -1376,6 +1405,11 @@ FunctionPass *TargetPassConfig::createRegAllocPass(bool Optimized) {
return createTargetRegisterAllocator(Optimized);
}
+bool TargetPassConfig::isCustomizedRegAlloc() {
+ return RegAlloc !=
+ (RegisterRegAlloc::FunctionPassCtor)&useDefaultRegisterAllocator;
+}
+
bool TargetPassConfig::addRegAssignAndRewriteFast() {
if (RegAlloc != (RegisterRegAlloc::FunctionPassCtor)&useDefaultRegisterAllocator &&
RegAlloc != (RegisterRegAlloc::FunctionPassCtor)&createFastRegisterAllocator)
diff --git a/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/llvm/lib/CodeGen/TargetRegisterInfo.cpp
index 6bcf79547056..ac346585b0f8 100644
--- a/llvm/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/llvm/lib/CodeGen/TargetRegisterInfo.cpp
@@ -16,10 +16,11 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
diff --git a/llvm/lib/CodeGen/TargetSchedule.cpp b/llvm/lib/CodeGen/TargetSchedule.cpp
index ce59452fd1b8..ac07c86cab85 100644
--- a/llvm/lib/CodeGen/TargetSchedule.cpp
+++ b/llvm/lib/CodeGen/TargetSchedule.cpp
@@ -16,7 +16,6 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrItineraries.h"
diff --git a/llvm/lib/CodeGen/TargetSubtargetInfo.cpp b/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
index e4520d8ccb1e..ba2c8dda7de5 100644
--- a/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
+++ b/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
@@ -45,10 +45,6 @@ bool TargetSubtargetInfo::enableRALocalReassignment(
return true;
}
-bool TargetSubtargetInfo::enableAdvancedRASplitCost() const {
- return false;
-}
-
bool TargetSubtargetInfo::enablePostRAScheduler() const {
return getSchedModel().PostRAScheduler;
}
diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index dfd962be2882..c44fd9f97383 100644
--- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -28,7 +28,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/iterator_range.h"
@@ -50,7 +49,6 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Pass.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
@@ -163,6 +161,7 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
bool collectTiedOperands(MachineInstr *MI, TiedOperandMap&);
void processTiedPairs(MachineInstr *MI, TiedPairList&, unsigned &Dist);
void eliminateRegSequence(MachineBasicBlock::iterator&);
+ bool processStatepoint(MachineInstr *MI, TiedOperandMap &TiedOperands);
public:
static char ID; // Pass identification, replacement for typeid
@@ -1629,6 +1628,61 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
}
}
+// For every tied operand pair this function transforms statepoint from
+// RegA = STATEPOINT ... RegB(tied-def N)
+// to
+// RegB = STATEPOINT ... RegB(tied-def N)
+// and replaces all uses of RegA with RegB.
+// No extra COPY instruction is necessary because tied use is killed at
+// STATEPOINT.
+bool TwoAddressInstructionPass::processStatepoint(
+ MachineInstr *MI, TiedOperandMap &TiedOperands) {
+
+ bool NeedCopy = false;
+ for (auto &TO : TiedOperands) {
+ Register RegB = TO.first;
+ if (TO.second.size() != 1) {
+ NeedCopy = true;
+ continue;
+ }
+
+ unsigned SrcIdx = TO.second[0].first;
+ unsigned DstIdx = TO.second[0].second;
+
+ MachineOperand &DstMO = MI->getOperand(DstIdx);
+ Register RegA = DstMO.getReg();
+
+ assert(RegB == MI->getOperand(SrcIdx).getReg());
+
+ if (RegA == RegB)
+ continue;
+
+ MRI->replaceRegWith(RegA, RegB);
+
+ if (LIS) {
+ VNInfo::Allocator &A = LIS->getVNInfoAllocator();
+ LiveInterval &LI = LIS->getInterval(RegB);
+ for (auto &S : LIS->getInterval(RegA)) {
+ VNInfo *VNI = LI.getNextValue(S.start, A);
+ LiveRange::Segment NewSeg(S.start, S.end, VNI);
+ LI.addSegment(NewSeg);
+ }
+ LIS->removeInterval(RegA);
+ }
+
+ if (LV) {
+ if (MI->getOperand(SrcIdx).isKill())
+ LV->removeVirtualRegisterKilled(RegB, *MI);
+ LiveVariables::VarInfo &SrcInfo = LV->getVarInfo(RegB);
+ LiveVariables::VarInfo &DstInfo = LV->getVarInfo(RegA);
+ SrcInfo.AliveBlocks |= DstInfo.AliveBlocks;
+ for (auto *KillMI : DstInfo.Kills)
+ LV->addVirtualRegisterKilled(RegB, *KillMI, false);
+ }
+ }
+ return !NeedCopy;
+}
+
/// Reduce two-address instructions to two operands.
bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
MF = &Func;
@@ -1722,6 +1776,14 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
}
}
+ if (mi->getOpcode() == TargetOpcode::STATEPOINT &&
+ processStatepoint(&*mi, TiedOperands)) {
+ TiedOperands.clear();
+ LLVM_DEBUG(dbgs() << "\t\trewrite to:\t" << *mi);
+ mi = nmi;
+ continue;
+ }
+
// Now iterate over the information collected above.
for (auto &TO : TiedOperands) {
processTiedPairs(&*mi, TO.second, Dist);
@@ -1733,11 +1795,11 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
// From %reg = INSERT_SUBREG %reg, %subreg, subidx
// To %reg:subidx = COPY %subreg
unsigned SubIdx = mi->getOperand(3).getImm();
- mi->RemoveOperand(3);
+ mi->removeOperand(3);
assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx");
mi->getOperand(0).setSubReg(SubIdx);
mi->getOperand(0).setIsUndef(mi->getOperand(1).isUndef());
- mi->RemoveOperand(1);
+ mi->removeOperand(1);
mi->setDesc(TII->get(TargetOpcode::COPY));
LLVM_DEBUG(dbgs() << "\t\tconvert to:\t" << *mi);
@@ -1858,7 +1920,7 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
LLVM_DEBUG(dbgs() << "Turned: " << MI << " into an IMPLICIT_DEF");
MI.setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
for (int j = MI.getNumOperands() - 1, ee = 0; j > ee; --j)
- MI.RemoveOperand(j);
+ MI.removeOperand(j);
} else {
if (LIS)
LIS->RemoveMachineInstrFromMaps(MI);
diff --git a/llvm/lib/CodeGen/TypePromotion.cpp b/llvm/lib/CodeGen/TypePromotion.cpp
index 01ea171e5ea2..166a3c413f6a 100644
--- a/llvm/lib/CodeGen/TypePromotion.cpp
+++ b/llvm/lib/CodeGen/TypePromotion.cpp
@@ -24,15 +24,13 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
-#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
@@ -44,9 +42,9 @@
using namespace llvm;
-static cl::opt<bool>
-DisablePromotion("disable-type-promotion", cl::Hidden, cl::init(false),
- cl::desc("Disable type promotion pass"));
+static cl::opt<bool> DisablePromotion("disable-type-promotion", cl::Hidden,
+ cl::init(false),
+ cl::desc("Disable type promotion pass"));
// The goal of this pass is to enable more efficient code generation for
// operations on narrow types (i.e. types with < 32-bits) and this is a
@@ -103,17 +101,16 @@ DisablePromotion("disable-type-promotion", cl::Hidden, cl::init(false),
namespace {
class IRPromoter {
LLVMContext &Ctx;
- IntegerType *OrigTy = nullptr;
unsigned PromotedWidth = 0;
- SetVector<Value*> &Visited;
- SetVector<Value*> &Sources;
- SetVector<Instruction*> &Sinks;
+ SetVector<Value *> &Visited;
+ SetVector<Value *> &Sources;
+ SetVector<Instruction *> &Sinks;
SmallPtrSetImpl<Instruction *> &SafeWrap;
IntegerType *ExtTy = nullptr;
- SmallPtrSet<Value*, 8> NewInsts;
- SmallPtrSet<Instruction*, 4> InstsToRemove;
- DenseMap<Value*, SmallVector<Type*, 4>> TruncTysMap;
- SmallPtrSet<Value*, 8> Promoted;
+ SmallPtrSet<Value *, 8> NewInsts;
+ SmallPtrSet<Instruction *, 4> InstsToRemove;
+ DenseMap<Value *, SmallVector<Type *, 4>> TruncTysMap;
+ SmallPtrSet<Value *, 8> Promoted;
void ReplaceAllUsersOfWith(Value *From, Value *To);
void ExtendSources();
@@ -123,16 +120,13 @@ class IRPromoter {
void Cleanup();
public:
- IRPromoter(LLVMContext &C, IntegerType *Ty, unsigned Width,
+ IRPromoter(LLVMContext &C, unsigned Width,
SetVector<Value *> &visited, SetVector<Value *> &sources,
SetVector<Instruction *> &sinks,
SmallPtrSetImpl<Instruction *> &wrap)
- : Ctx(C), OrigTy(Ty), PromotedWidth(Width), Visited(visited),
+ : Ctx(C), PromotedWidth(Width), Visited(visited),
Sources(sources), Sinks(sinks), SafeWrap(wrap) {
ExtTy = IntegerType::get(Ctx, PromotedWidth);
- assert(OrigTy->getPrimitiveSizeInBits().getFixedSize() <
- ExtTy->getPrimitiveSizeInBits().getFixedSize() &&
- "Original type not smaller than extended type");
}
void Mutate();
@@ -142,8 +136,8 @@ class TypePromotion : public FunctionPass {
unsigned TypeSize = 0;
LLVMContext *Ctx = nullptr;
unsigned RegisterBitWidth = 0;
- SmallPtrSet<Value*, 16> AllVisited;
- SmallPtrSet<Instruction*, 8> SafeToPromote;
+ SmallPtrSet<Value *, 16> AllVisited;
+ SmallPtrSet<Instruction *, 8> SafeToPromote;
SmallPtrSet<Instruction *, 4> SafeWrap;
// Does V have the same size result type as TypeSize.
@@ -190,7 +184,7 @@ public:
bool runOnFunction(Function &F) override;
};
-}
+} // namespace
static bool GenerateSignBits(Instruction *I) {
unsigned Opc = I->getOpcode();
@@ -245,7 +239,7 @@ bool TypePromotion::isSource(Value *V) {
bool TypePromotion::isSink(Value *V) {
// TODO The truncate also isn't actually necessary because we would already
// proved that the data value is kept within the range of the original data
- // type.
+ // type. We currently remove any truncs inserted for handling zext sinks.
// Sinks are:
// - points where the value in the register is being observed, such as an
@@ -269,7 +263,7 @@ bool TypePromotion::isSink(Value *V) {
/// Return whether this instruction can safely wrap.
bool TypePromotion::isSafeWrap(Instruction *I) {
- // We can support a, potentially, wrapping instruction (I) if:
+ // We can support a potentially wrapping instruction (I) if:
// - It is only used by an unsigned icmp.
// - The icmp uses a constant.
// - The wrapping value (I) is decreasing, i.e would underflow - wrapping
@@ -356,7 +350,7 @@ bool TypePromotion::isSafeWrap(Instruction *I) {
if (!OverflowConst.isNonPositive())
return false;
- // Using C1 = OverflowConst and C2 = ICmpConst, we can use either prove that:
+ // Using C1 = OverflowConst and C2 = ICmpConst, we can either prove that:
// zext(x) + sext(C1) <u zext(C2) if C1 < 0 and C1 >s C2
// zext(x) + sext(C1) <u sext(C2) if C1 < 0 and C1 <=s C2
if (OverflowConst.sgt(ICmpConst)) {
@@ -404,7 +398,7 @@ static bool isPromotedResultSafe(Instruction *I) {
}
void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) {
- SmallVector<Instruction*, 4> Users;
+ SmallVector<Instruction *, 4> Users;
Instruction *InstTo = dyn_cast<Instruction>(To);
bool ReplacedAll = true;
@@ -485,12 +479,18 @@ void IRPromoter::PromoteTree() {
continue;
if (auto *Const = dyn_cast<ConstantInt>(Op)) {
- Constant *NewConst = SafeWrap.contains(I)
+ // For subtract, we don't need to sext the constant. We only put it in
+ // SafeWrap because SafeWrap.size() is used elsewhere.
+ // For cmp, we need to sign extend a constant appearing in either
+ // operand. For add, we should only sign extend the RHS.
+ Constant *NewConst = (SafeWrap.contains(I) &&
+ (I->getOpcode() == Instruction::ICmp || i == 1) &&
+ I->getOpcode() != Instruction::Sub)
? ConstantExpr::getSExt(Const, ExtTy)
: ConstantExpr::getZExt(Const, ExtTy);
I->setOperand(i, NewConst);
} else if (isa<UndefValue>(Op))
- I->setOperand(i, UndefValue::get(ExtTy));
+ I->setOperand(i, ConstantInt::get(ExtTy, 0));
}
// Mutate the result type, unless this is an icmp or switch.
@@ -506,7 +506,7 @@ void IRPromoter::TruncateSinks() {
IRBuilder<> Builder{Ctx};
- auto InsertTrunc = [&](Value *V, Type *TruncTy) -> Instruction* {
+ auto InsertTrunc = [&](Value *V, Type *TruncTy) -> Instruction * {
if (!isa<Instruction>(V) || !isa<IntegerType>(V->getType()))
return nullptr;
@@ -514,7 +514,7 @@ void IRPromoter::TruncateSinks() {
return nullptr;
LLVM_DEBUG(dbgs() << "IR Promotion: Creating " << *TruncTy << " Trunc for "
- << *V << "\n");
+ << *V << "\n");
Builder.SetInsertPoint(cast<Instruction>(V));
auto *Trunc = dyn_cast<Instruction>(Builder.CreateTrunc(V, TruncTy));
if (Trunc)
@@ -550,6 +550,11 @@ void IRPromoter::TruncateSinks() {
continue;
}
+ // Don't insert a trunc for a zext which can still legally promote.
+ if (auto ZExt = dyn_cast<ZExtInst>(I))
+ if (ZExt->getType()->getScalarSizeInBits() > PromotedWidth)
+ continue;
+
// Now handle the others.
for (unsigned i = 0; i < I->getNumOperands(); ++i) {
Type *Ty = TruncTysMap[I][i];
@@ -576,16 +581,14 @@ void IRPromoter::Cleanup() {
Value *Src = ZExt->getOperand(0);
if (ZExt->getSrcTy() == ZExt->getDestTy()) {
LLVM_DEBUG(dbgs() << "IR Promotion: Removing unnecessary cast: " << *ZExt
- << "\n");
+ << "\n");
ReplaceAllUsersOfWith(ZExt, Src);
continue;
}
- // Unless they produce a value that is narrower than ExtTy, we can
- // replace the result of the zext with the input of a newly inserted
- // trunc.
- if (NewInsts.count(Src) && isa<TruncInst>(Src) &&
- Src->getType() == OrigTy) {
+ // We've inserted a trunc for a zext sink, but we already know that the
+ // input is in range, negating the need for the trunc.
+ if (NewInsts.count(Src) && isa<TruncInst>(Src)) {
auto *Trunc = cast<TruncInst>(Src);
assert(Trunc->getOperand(0)->getType() == ExtTy &&
"expected inserted trunc to be operating on i32");
@@ -615,7 +618,7 @@ void IRPromoter::ConvertTruncs() {
unsigned NumBits = DestTy->getScalarSizeInBits();
ConstantInt *Mask =
- ConstantInt::get(SrcTy, APInt::getMaxValue(NumBits).getZExtValue());
+ ConstantInt::get(SrcTy, APInt::getMaxValue(NumBits).getZExtValue());
Value *Masked = Builder.CreateAnd(Trunc->getOperand(0), Mask);
if (auto *I = dyn_cast<Instruction>(Masked))
@@ -626,8 +629,8 @@ void IRPromoter::ConvertTruncs() {
}
void IRPromoter::Mutate() {
- LLVM_DEBUG(dbgs() << "IR Promotion: Promoting use-def chains from "
- << OrigTy->getBitWidth() << " to " << PromotedWidth << "-bits\n");
+ LLVM_DEBUG(dbgs() << "IR Promotion: Promoting use-def chains to "
+ << PromotedWidth << "-bits\n");
// Cache original types of the values that will likely need truncating
for (auto *I : Sinks) {
@@ -677,8 +680,7 @@ bool TypePromotion::isSupportedType(Value *V) {
if (Ty->isVoidTy() || Ty->isPointerTy())
return true;
- if (!isa<IntegerType>(Ty) ||
- cast<IntegerType>(Ty)->getBitWidth() == 1 ||
+ if (!isa<IntegerType>(Ty) || cast<IntegerType>(Ty)->getBitWidth() == 1 ||
cast<IntegerType>(Ty)->getBitWidth() > RegisterBitWidth)
return false;
@@ -738,13 +740,12 @@ bool TypePromotion::isSupportedValue(Value *V) {
/// smaller than the targeted promoted type. Check that we're not trying to
/// promote something larger than our base 'TypeSize' type.
bool TypePromotion::isLegalToPromote(Value *V) {
-
auto *I = dyn_cast<Instruction>(V);
if (!I)
return true;
if (SafeToPromote.count(I))
- return true;
+ return true;
if (isPromotedResultSafe(I) || isSafeWrap(I)) {
SafeToPromote.insert(I);
@@ -765,10 +766,10 @@ bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) {
LLVM_DEBUG(dbgs() << "IR Promotion: TryToPromote: " << *V << ", from "
<< TypeSize << " bits to " << PromotedWidth << "\n");
- SetVector<Value*> WorkList;
- SetVector<Value*> Sources;
- SetVector<Instruction*> Sinks;
- SetVector<Value*> CurrentVisited;
+ SetVector<Value *> WorkList;
+ SetVector<Value *> Sources;
+ SetVector<Instruction *> Sinks;
+ SetVector<Value *> CurrentVisited;
WorkList.insert(V);
// Return true if V was added to the worklist as a supported instruction,
@@ -839,14 +840,15 @@ bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) {
}
}
- LLVM_DEBUG(dbgs() << "IR Promotion: Visited nodes:\n";
- for (auto *I : CurrentVisited)
- I->dump();
- );
+ LLVM_DEBUG({
+ dbgs() << "IR Promotion: Visited nodes:\n";
+ for (auto *I : CurrentVisited)
+ I->dump();
+ });
unsigned ToPromote = 0;
unsigned NonFreeArgs = 0;
- SmallPtrSet<BasicBlock*, 4> Blocks;
+ SmallPtrSet<BasicBlock *, 4> Blocks;
for (auto *V : CurrentVisited) {
if (auto *I = dyn_cast<Instruction>(V))
Blocks.insert(I->getParent());
@@ -860,16 +862,16 @@ bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) {
if (Sinks.count(cast<Instruction>(V)))
continue;
- ++ToPromote;
- }
+ ++ToPromote;
+ }
// DAG optimizations should be able to handle these cases better, especially
// for function arguments.
if (ToPromote < 2 || (Blocks.size() == 1 && (NonFreeArgs > SafeWrap.size())))
return false;
- IRPromoter Promoter(*Ctx, cast<IntegerType>(OrigTy), PromotedWidth,
- CurrentVisited, Sources, Sinks, SafeWrap);
+ IRPromoter Promoter(*Ctx, PromotedWidth, CurrentVisited, Sources, Sinks,
+ SafeWrap);
Promoter.Mutate();
return true;
}
@@ -893,14 +895,14 @@ bool TypePromotion::runOnFunction(Function &F) {
const TargetSubtargetInfo *SubtargetInfo = TM.getSubtargetImpl(F);
const TargetLowering *TLI = SubtargetInfo->getTargetLowering();
const TargetTransformInfo &TII =
- getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
RegisterBitWidth =
TII.getRegisterBitWidth(TargetTransformInfo::RGK_Scalar).getFixedSize();
Ctx = &F.getParent()->getContext();
// Search up from icmps to try to promote their operands.
for (BasicBlock &BB : F) {
- for (auto &I : BB) {
+ for (Instruction &I : BB) {
if (AllVisited.count(&I))
continue;
@@ -909,8 +911,7 @@ bool TypePromotion::runOnFunction(Function &F) {
auto *ICmp = cast<ICmpInst>(&I);
// Skip signed or pointer compares
- if (ICmp->isSigned() ||
- !isa<IntegerType>(ICmp->getOperand(0)->getType()))
+ if (ICmp->isSigned() || !isa<IntegerType>(ICmp->getOperand(0)->getType()))
continue;
LLVM_DEBUG(dbgs() << "IR Promotion: Searching from: " << *ICmp << "\n");
@@ -921,13 +922,13 @@ bool TypePromotion::runOnFunction(Function &F) {
if (SrcVT.isSimple() && TLI->isTypeLegal(SrcVT.getSimpleVT()))
break;
- if (TLI->getTypeAction(ICmp->getContext(), SrcVT) !=
+ if (TLI->getTypeAction(*Ctx, SrcVT) !=
TargetLowering::TypePromoteInteger)
break;
- EVT PromotedVT = TLI->getTypeToTransformTo(ICmp->getContext(), SrcVT);
+ EVT PromotedVT = TLI->getTypeToTransformTo(*Ctx, SrcVT);
if (RegisterBitWidth < PromotedVT.getFixedSizeInBits()) {
LLVM_DEBUG(dbgs() << "IR Promotion: Couldn't find target register "
- << "for promoted type\n");
+ << "for promoted type\n");
break;
}
@@ -936,13 +937,7 @@ bool TypePromotion::runOnFunction(Function &F) {
}
}
}
- LLVM_DEBUG(if (verifyFunction(F, &dbgs())) {
- dbgs() << F;
- report_fatal_error("Broken function after type promotion");
- });
}
- if (MadeChange)
- LLVM_DEBUG(dbgs() << "After TypePromotion: " << F << "\n");
AllVisited.clear();
SafeToPromote.clear();
@@ -956,6 +951,4 @@ INITIALIZE_PASS_END(TypePromotion, DEBUG_TYPE, PASS_NAME, false, false)
char TypePromotion::ID = 0;
-FunctionPass *llvm::createTypePromotionPass() {
- return new TypePromotion();
-}
+FunctionPass *llvm::createTypePromotionPass() { return new TypePromotion(); }
diff --git a/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/llvm/lib/CodeGen/UnreachableBlockElim.cpp
index 3426a03b6083..5e8514f525e9 100644
--- a/llvm/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/llvm/lib/CodeGen/UnreachableBlockElim.cpp
@@ -26,16 +26,10 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/IR/CFG.h"
-#include "llvm/IR/Constant.h"
#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Type.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -131,8 +125,8 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
for (unsigned i = start->getNumOperands() - 1; i >= 2; i-=2)
if (start->getOperand(i).isMBB() &&
start->getOperand(i).getMBB() == &BB) {
- start->RemoveOperand(i);
- start->RemoveOperand(i-1);
+ start->removeOperand(i);
+ start->removeOperand(i-1);
}
start++;
@@ -162,8 +156,8 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
while (phi != BB.end() && phi->isPHI()) {
for (unsigned i = phi->getNumOperands() - 1; i >= 2; i-=2)
if (!preds.count(phi->getOperand(i).getMBB())) {
- phi->RemoveOperand(i);
- phi->RemoveOperand(i-1);
+ phi->removeOperand(i);
+ phi->removeOperand(i-1);
ModifiedPHI = true;
}
diff --git a/llvm/lib/CodeGen/VLIWMachineScheduler.cpp b/llvm/lib/CodeGen/VLIWMachineScheduler.cpp
index 5f59cb4643f2..8b5b585090f5 100644
--- a/llvm/lib/CodeGen/VLIWMachineScheduler.cpp
+++ b/llvm/lib/CodeGen/VLIWMachineScheduler.cpp
@@ -27,7 +27,6 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -43,19 +42,18 @@ using namespace llvm;
#define DEBUG_TYPE "machine-scheduler"
static cl::opt<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure", cl::Hidden,
- cl::ZeroOrMore, cl::init(false));
+ cl::init(false));
static cl::opt<bool> UseNewerCandidate("use-newer-candidate", cl::Hidden,
- cl::ZeroOrMore, cl::init(true));
+ cl::init(true));
static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level",
- cl::Hidden, cl::ZeroOrMore,
- cl::init(1));
+ cl::Hidden, cl::init(1));
// Check if the scheduler should penalize instructions that are available to
// early due to a zero-latency dependence.
static cl::opt<bool> CheckEarlyAvail("check-early-avail", cl::Hidden,
- cl::ZeroOrMore, cl::init(true));
+ cl::init(true));
// This value is used to determine if a register class is a high pressure set.
// We compute the maximum number of registers needed and divided by the total
diff --git a/llvm/lib/CodeGen/ValueTypes.cpp b/llvm/lib/CodeGen/ValueTypes.cpp
index 0c42bef82005..f577aff39ea7 100644
--- a/llvm/lib/CodeGen/ValueTypes.cpp
+++ b/llvm/lib/CodeGen/ValueTypes.cpp
@@ -12,6 +12,7 @@
#include "llvm/IR/Type.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TypeSize.h"
+#include "llvm/Support/WithColor.h"
using namespace llvm;
EVT EVT::changeExtendedTypeToInteger() const {
@@ -179,19 +180,22 @@ std::string EVT::getEVTString() const {
/// specified EVT. For integer types, this returns an unsigned type. Note
/// that this will abort for types that cannot be represented.
Type *EVT::getTypeForEVT(LLVMContext &Context) const {
+ // clang-format off
switch (V.SimpleTy) {
default:
assert(isExtended() && "Type is not extended!");
return LLVMTy;
case MVT::isVoid: return Type::getVoidTy(Context);
case MVT::i1: return Type::getInt1Ty(Context);
+ case MVT::i2: return Type::getIntNTy(Context, 2);
+ case MVT::i4: return Type::getIntNTy(Context, 4);
case MVT::i8: return Type::getInt8Ty(Context);
case MVT::i16: return Type::getInt16Ty(Context);
case MVT::i32: return Type::getInt32Ty(Context);
case MVT::i64: return Type::getInt64Ty(Context);
case MVT::i128: return IntegerType::get(Context, 128);
case MVT::f16: return Type::getHalfTy(Context);
- case MVT::bf16: return Type::getBFloatTy(Context);
+ case MVT::bf16: return Type::getBFloatTy(Context);
case MVT::f32: return Type::getFloatTy(Context);
case MVT::f64: return Type::getDoubleTy(Context);
case MVT::f80: return Type::getX86_FP80Ty(Context);
@@ -228,6 +232,10 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
return FixedVectorType::get(Type::getInt1Ty(Context), 512);
case MVT::v1024i1:
return FixedVectorType::get(Type::getInt1Ty(Context), 1024);
+ case MVT::v128i2:
+ return FixedVectorType::get(Type::getIntNTy(Context, 2), 128);
+ case MVT::v64i4:
+ return FixedVectorType::get(Type::getIntNTy(Context, 4), 64);
case MVT::v1i8:
return FixedVectorType::get(Type::getInt8Ty(Context), 1);
case MVT::v2i8:
@@ -500,6 +508,10 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
return ScalableVectorType::get(Type::getBFloatTy(Context), 4);
case MVT::nxv8bf16:
return ScalableVectorType::get(Type::getBFloatTy(Context), 8);
+ case MVT::nxv16bf16:
+ return ScalableVectorType::get(Type::getBFloatTy(Context), 16);
+ case MVT::nxv32bf16:
+ return ScalableVectorType::get(Type::getBFloatTy(Context), 32);
case MVT::nxv1f32:
return ScalableVectorType::get(Type::getFloatTy(Context), 1);
case MVT::nxv2f32:
@@ -520,6 +532,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
return ScalableVectorType::get(Type::getDoubleTy(Context), 8);
case MVT::Metadata: return Type::getMetadataTy(Context);
}
+ // clang-format on
}
/// Return the value type corresponding to the specified type. This returns all
diff --git a/llvm/lib/CodeGen/WasmEHPrepare.cpp b/llvm/lib/CodeGen/WasmEHPrepare.cpp
index c04a7b28eff9..aa6645227edb 100644
--- a/llvm/lib/CodeGen/WasmEHPrepare.cpp
+++ b/llvm/lib/CodeGen/WasmEHPrepare.cpp
@@ -77,8 +77,8 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/TargetLowering.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/WasmEHFuncInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicsWebAssembly.h"
@@ -212,9 +212,15 @@ bool WasmEHPrepare::prepareEHPads(Function &F) {
assert(F.hasPersonalityFn() && "Personality function not found");
- // __wasm_lpad_context global variable
+ // __wasm_lpad_context global variable.
+ // This variable should be thread local. If the target does not support TLS,
+ // we depend on CoalesceFeaturesAndStripAtomics to downgrade it to
+ // non-thread-local ones, in which case we don't allow this object to be
+ // linked with other objects using shared memory.
LPadContextGV = cast<GlobalVariable>(
M.getOrInsertGlobal("__wasm_lpad_context", LPadContextTy));
+ LPadContextGV->setThreadLocalMode(GlobalValue::GeneralDynamicTLSModel);
+
LPadIndexField = IRB.CreateConstGEP2_32(LPadContextTy, LPadContextGV, 0, 0,
"lpad_index_gep");
LSDAField =
diff --git a/llvm/lib/CodeGen/WinEHPrepare.cpp b/llvm/lib/CodeGen/WinEHPrepare.cpp
index d31183e46d65..b835503ee9ed 100644
--- a/llvm/lib/CodeGen/WinEHPrepare.cpp
+++ b/llvm/lib/CodeGen/WinEHPrepare.cpp
@@ -19,14 +19,14 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Triple.h"
-#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
-#include "llvm/MC/MCSymbol.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -1256,4 +1256,4 @@ void WinEHFuncInfo::addIPToStateRange(const InvokeInst *II,
LabelToStateMap[InvokeBegin] = std::make_pair(InvokeStateMap[II], InvokeEnd);
}
-WinEHFuncInfo::WinEHFuncInfo() {}
+WinEHFuncInfo::WinEHFuncInfo() = default;
diff --git a/llvm/lib/DWARFLinker/DWARFLinker.cpp b/llvm/lib/DWARFLinker/DWARFLinker.cpp
index b56095ca9a96..50c52190c1f6 100644
--- a/llvm/lib/DWARFLinker/DWARFLinker.cpp
+++ b/llvm/lib/DWARFLinker/DWARFLinker.cpp
@@ -10,7 +10,6 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/NonRelocatableStringpool.h"
#include "llvm/DWARFLinker/DWARFLinkerDeclContext.h"
#include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h"
@@ -19,9 +18,11 @@
#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h"
#include "llvm/DebugInfo/DWARF/DWARFDie.h"
+#include "llvm/DebugInfo/DWARF/DWARFExpression.h"
#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
#include "llvm/DebugInfo/DWARF/DWARFSection.h"
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/Support/DataExtractor.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
@@ -132,9 +133,9 @@ static bool isTypeTag(uint16_t Tag) {
return false;
}
-AddressesMap::~AddressesMap() {}
+AddressesMap::~AddressesMap() = default;
-DwarfEmitter::~DwarfEmitter() {}
+DwarfEmitter::~DwarfEmitter() = default;
static Optional<StringRef> StripTemplateParameters(StringRef Name) {
// We are looking for template parameters to strip from Name. e.g.
@@ -360,16 +361,16 @@ static bool analyzeContextInfo(
}
Info.ParentIdx = Current.ParentIdx;
- bool InClangModule = CU.isClangModule() || Current.InImportedModule;
- if (CU.hasODR() || InClangModule) {
+ Info.InModuleScope = CU.isClangModule() || Current.InImportedModule;
+ if (CU.hasODR() || Info.InModuleScope) {
if (Current.Context) {
auto PtrInvalidPair = Contexts.getChildDeclContext(
- *Current.Context, Current.Die, CU, InClangModule);
+ *Current.Context, Current.Die, CU, Info.InModuleScope);
Current.Context = PtrInvalidPair.getPointer();
Info.Ctxt =
PtrInvalidPair.getInt() ? nullptr : PtrInvalidPair.getPointer();
if (Info.Ctxt)
- Info.Ctxt->setDefinedInClangModule(InClangModule);
+ Info.Ctxt->setDefinedInClangModule(Info.InModuleScope);
} else
Info.Ctxt = Current.Context = nullptr;
}
@@ -439,8 +440,7 @@ unsigned DWARFLinker::shouldKeepVariableDIE(AddressesMap &RelocMgr,
// if the variable has a valid relocation, so that the DIEInfo is filled.
// However, we don't want a static variable in a function to force us to keep
// the enclosing function, unless requested explicitly.
- const bool HasLiveMemoryLocation =
- RelocMgr.hasLiveMemoryLocation(DIE, MyInfo);
+ const bool HasLiveMemoryLocation = RelocMgr.isLiveVariable(DIE, MyInfo);
if (!HasLiveMemoryLocation || ((Flags & TF_InFunctionScope) &&
!LLVM_UNLIKELY(Options.KeepFunctionForStatic)))
return Flags;
@@ -468,8 +468,8 @@ unsigned DWARFLinker::shouldKeepSubprogramDIE(
if (!LowPc)
return Flags;
- assert(LowPc.hasValue() && "low_pc attribute is not an address.");
- if (!RelocMgr.hasLiveAddressRange(DIE, MyInfo))
+ assert(LowPc && "low_pc attribute is not an address.");
+ if (!RelocMgr.isLiveSubprogram(DIE, MyInfo))
return Flags;
if (Options.Verbose) {
@@ -490,7 +490,7 @@ unsigned DWARFLinker::shouldKeepSubprogramDIE(
// generation bugs aside, this is really wrong in the case of labels, where
// a label marking the end of a function will have a PC == CU's high_pc.
if (dwarf::toAddress(OrigUnit.getUnitDIE().find(dwarf::DW_AT_high_pc))
- .getValueOr(UINT64_MAX) <= LowPc)
+ .value_or(UINT64_MAX) <= LowPc)
return Flags;
Unit.addLabelLowPc(*LowPc, MyInfo.AddrAdjust);
return Flags | TF_Keep;
@@ -616,6 +616,27 @@ void DWARFLinker::lookForChildDIEsToKeep(
}
}
+static bool isODRCanonicalCandidate(const DWARFDie &Die, CompileUnit &CU) {
+ CompileUnit::DIEInfo &Info = CU.getInfo(Die);
+
+ if (!Info.Ctxt || (Die.getTag() == dwarf::DW_TAG_namespace))
+ return false;
+
+ if (!CU.hasODR() && !Info.InModuleScope)
+ return false;
+
+ return !Info.Incomplete && Info.Ctxt != CU.getInfo(Info.ParentIdx).Ctxt;
+}
+
+void DWARFLinker::markODRCanonicalDie(const DWARFDie &Die, CompileUnit &CU) {
+ CompileUnit::DIEInfo &Info = CU.getInfo(Die);
+
+ Info.ODRMarkingDone = true;
+ if (Info.Keep && isODRCanonicalCandidate(Die, CU) &&
+ !Info.Ctxt->hasCanonicalDIE())
+ Info.Ctxt->setHasCanonicalDIE();
+}
+
/// Look at DIEs referenced by the given DIE and decide whether they should be
/// kept. All DIEs referenced though attributes should be kept.
void DWARFLinker::lookForRefDIEsToKeep(
@@ -645,8 +666,6 @@ void DWARFLinker::lookForRefDIEsToKeep(
if (auto RefDie =
resolveDIEReference(File, Units, Val, Die, ReferencedCU)) {
CompileUnit::DIEInfo &Info = ReferencedCU->getInfo(RefDie);
- bool IsModuleRef = Info.Ctxt && Info.Ctxt->getCanonicalDIEOffset() &&
- Info.Ctxt->isDefinedInClangModule();
// If the referenced DIE has a DeclContext that has already been
// emitted, then do not keep the one in this CU. We'll link to
// the canonical DIE in cloneDieReferenceAttribute.
@@ -657,15 +676,14 @@ void DWARFLinker::lookForRefDIEsToKeep(
//
// FIXME: compatibility with dsymutil-classic. There is no
// reason not to unique ref_addr references.
- if (AttrSpec.Form != dwarf::DW_FORM_ref_addr && (UseOdr || IsModuleRef) &&
- Info.Ctxt &&
- Info.Ctxt != ReferencedCU->getInfo(Info.ParentIdx).Ctxt &&
- Info.Ctxt->getCanonicalDIEOffset() && isODRAttribute(AttrSpec.Attr))
+ if (AttrSpec.Form != dwarf::DW_FORM_ref_addr &&
+ isODRAttribute(AttrSpec.Attr) && Info.Ctxt &&
+ Info.Ctxt->hasCanonicalDIE())
continue;
// Keep a module forward declaration if there is no definition.
if (!(isODRAttribute(AttrSpec.Attr) && Info.Ctxt &&
- Info.Ctxt->getCanonicalDIEOffset()))
+ Info.Ctxt->hasCanonicalDIE()))
Info.Prune = false;
ReferencedDIEs.emplace_back(RefDie, *ReferencedCU);
}
@@ -756,6 +774,9 @@ void DWARFLinker::lookForDIEsToKeep(AddressesMap &AddressesMap,
lookForParentDIEsToKeep(Current.AncestorIdx, Current.CU, Current.Flags,
Worklist);
continue;
+ case WorklistItemType::MarkODRCanonicalDie:
+ markODRCanonicalDie(Current.Die, Current.CU);
+ continue;
case WorklistItemType::LookForDIEsToKeep:
break;
}
@@ -778,6 +799,16 @@ void DWARFLinker::lookForDIEsToKeep(AddressesMap &AddressesMap,
Current.Flags = shouldKeepDIE(AddressesMap, Ranges, Current.Die, File,
Current.CU, MyInfo, Current.Flags);
+ // We need to mark context for the canonical die in the end of normal
+ // traversing(not TF_DependencyWalk) or after normal traversing if die
+ // was not marked as kept.
+ if (!(Current.Flags & TF_DependencyWalk) ||
+ (MyInfo.ODRMarkingDone && !MyInfo.Keep)) {
+ if (Current.CU.hasODR() || MyInfo.InModuleScope)
+ Worklist.emplace_back(Current.Die, Current.CU,
+ WorklistItemType::MarkODRCanonicalDie);
+ }
+
// Finish by looking for child DIEs. Because of the LIFO worklist we need
// to schedule that work before any subsequent items are added to the
// worklist.
@@ -845,7 +876,7 @@ void DWARFLinker::assignAbbrev(DIEAbbrev &Abbrev) {
unsigned DWARFLinker::DIECloner::cloneStringAttribute(
DIE &Die, AttributeSpec AttrSpec, const DWARFFormValue &Val,
- const DWARFUnit &U, OffsetsStringPool &StringPool, AttributesInfo &Info) {
+ const DWARFUnit &, OffsetsStringPool &StringPool, AttributesInfo &Info) {
Optional<const char *> String = dwarf::toString(Val);
if (!String)
return 0;
@@ -875,7 +906,6 @@ unsigned DWARFLinker::DIECloner::cloneDieReferenceAttribute(
DIE *NewRefDie = nullptr;
CompileUnit *RefUnit = nullptr;
- DeclContext *Ctxt = nullptr;
DWARFDie RefDie =
Linker.resolveDIEReference(File, CompileUnits, Val, InputDIE, RefUnit);
@@ -888,14 +918,14 @@ unsigned DWARFLinker::DIECloner::cloneDieReferenceAttribute(
// If we already have emitted an equivalent DeclContext, just point
// at it.
- if (isODRAttribute(AttrSpec.Attr)) {
- Ctxt = RefInfo.Ctxt;
- if (Ctxt && Ctxt->getCanonicalDIEOffset()) {
- DIEInteger Attr(Ctxt->getCanonicalDIEOffset());
- Die.addValue(DIEAlloc, dwarf::Attribute(AttrSpec.Attr),
- dwarf::DW_FORM_ref_addr, Attr);
- return U.getRefAddrByteSize();
- }
+ if (isODRAttribute(AttrSpec.Attr) && RefInfo.Ctxt &&
+ RefInfo.Ctxt->getCanonicalDIEOffset()) {
+ assert(RefInfo.Ctxt->hasCanonicalDIE() &&
+ "Offset to canonical die is set, but context is not marked");
+ DIEInteger Attr(RefInfo.Ctxt->getCanonicalDIEOffset());
+ Die.addValue(DIEAlloc, dwarf::Attribute(AttrSpec.Attr),
+ dwarf::DW_FORM_ref_addr, Attr);
+ return U.getRefAddrByteSize();
}
if (!RefInfo.Clone) {
@@ -925,7 +955,7 @@ unsigned DWARFLinker::DIECloner::cloneDieReferenceAttribute(
// A forward reference. Note and fixup later.
Attr = 0xBADDEF;
Unit.noteForwardReference(
- NewRefDie, RefUnit, Ctxt,
+ NewRefDie, RefUnit, RefInfo.Ctxt,
Die.addValue(DIEAlloc, dwarf::Attribute(AttrSpec.Attr),
dwarf::DW_FORM_ref_addr, DIEInteger(Attr)));
}
@@ -1356,10 +1386,10 @@ DIE *DWARFLinker::DIECloner::cloneDIE(const DWARFDie &InputDIE,
assert(Die->getTag() == InputDIE.getTag());
Die->setOffset(OutOffset);
- if ((Unit.hasODR() || Unit.isClangModule()) && !Info.Incomplete &&
- Die->getTag() != dwarf::DW_TAG_namespace && Info.Ctxt &&
- Info.Ctxt != Unit.getInfo(Info.ParentIdx).Ctxt &&
- !Info.Ctxt->getCanonicalDIEOffset()) {
+ if (isODRCanonicalCandidate(InputDIE, Unit) && Info.Ctxt &&
+ (Info.Ctxt->getCanonicalDIEOffset() == 0)) {
+ if (!Info.Ctxt->hasCanonicalDIE())
+ Info.Ctxt->setHasCanonicalDIE();
// We are about to emit a DIE that is the root of its own valid
// DeclContext tree. Make the current offset the canonical offset
// for this context.
@@ -1384,8 +1414,7 @@ DIE *DWARFLinker::DIECloner::cloneDIE(const DWARFDie &InputDIE,
DWARFDataExtractor(DIECopy, Data.isLittleEndian(), Data.getAddressSize());
// Modify the copy with relocated addresses.
- if (ObjFile.Addresses->areRelocationsResolved() &&
- ObjFile.Addresses->applyValidRelocs(DIECopy, Offset,
+ if (ObjFile.Addresses->applyValidRelocs(DIECopy, Offset,
Data.isLittleEndian())) {
// If we applied relocations, we store the value of high_pc that was
// potentially stored in the input DIE. If high_pc is an address
@@ -1481,12 +1510,12 @@ DIE *DWARFLinker::DIECloner::cloneDIE(const DWARFDie &InputDIE,
uint32_t Hash = hashFullyQualifiedName(InputDIE, Unit, File);
uint64_t RuntimeLang =
dwarf::toUnsigned(InputDIE.find(dwarf::DW_AT_APPLE_runtime_class))
- .getValueOr(0);
+ .value_or(0);
bool ObjCClassIsImplementation =
(RuntimeLang == dwarf::DW_LANG_ObjC ||
RuntimeLang == dwarf::DW_LANG_ObjC_plus_plus) &&
dwarf::toUnsigned(InputDIE.find(dwarf::DW_AT_APPLE_objc_complete_type))
- .getValueOr(0);
+ .value_or(0);
Unit.addTypeAccelerator(Die, AttrInfo.Name, ObjCClassIsImplementation,
Hash);
}
@@ -1788,16 +1817,19 @@ void DWARFLinker::patchLineTableForUnit(CompileUnit &Unit,
void DWARFLinker::emitAcceleratorEntriesForUnit(CompileUnit &Unit) {
switch (Options.TheAccelTableKind) {
- case AccelTableKind::Apple:
+ case DwarfLinkerAccelTableKind::None:
+ // Nothing to do.
+ break;
+ case DwarfLinkerAccelTableKind::Apple:
emitAppleAcceleratorEntriesForUnit(Unit);
break;
- case AccelTableKind::Dwarf:
+ case DwarfLinkerAccelTableKind::Dwarf:
emitDwarfAcceleratorEntriesForUnit(Unit);
break;
- case AccelTableKind::Pub:
+ case DwarfLinkerAccelTableKind::Pub:
emitPubAcceleratorEntriesForUnit(Unit);
break;
- case AccelTableKind::Default:
+ case DwarfLinkerAccelTableKind::Default:
llvm_unreachable("The default must be updated to a concrete value.");
break;
}
@@ -2216,7 +2248,7 @@ uint64_t DWARFLinker::DIECloner::cloneAllCompileUnits(
}
void DWARFLinker::updateAccelKind(DWARFContext &Dwarf) {
- if (Options.TheAccelTableKind != AccelTableKind::Default)
+ if (Options.TheAccelTableKind != DwarfLinkerAccelTableKind::Default)
return;
auto &DwarfObj = Dwarf.getDWARFObj();
@@ -2342,11 +2374,11 @@ bool DWARFLinker::link() {
// would affect the decision. However, as they're built with the same
// compiler and flags, it is safe to assume that they will follow the
// decision made here.
- if (Options.TheAccelTableKind == AccelTableKind::Default) {
+ if (Options.TheAccelTableKind == DwarfLinkerAccelTableKind::Default) {
if (AtLeastOneDwarfAccelTable && !AtLeastOneAppleAccelTable)
- Options.TheAccelTableKind = AccelTableKind::Dwarf;
+ Options.TheAccelTableKind = DwarfLinkerAccelTableKind::Dwarf;
else
- Options.TheAccelTableKind = AccelTableKind::Apple;
+ Options.TheAccelTableKind = DwarfLinkerAccelTableKind::Apple;
}
for (LinkContext &OptContext : ObjectContexts) {
@@ -2362,6 +2394,10 @@ bool DWARFLinker::link() {
if (!OptContext.File.Dwarf)
continue;
+
+ if (Options.VerifyInputDWARF)
+ verify(OptContext.File);
+
// Look for relocations that correspond to address map entries.
// there was findvalidrelocations previously ... probably we need to gather
@@ -2521,19 +2557,22 @@ bool DWARFLinker::link() {
TheDwarfEmitter->emitAbbrevs(Abbreviations, MaxDwarfVersion);
TheDwarfEmitter->emitStrings(OffsetsStringPool);
switch (Options.TheAccelTableKind) {
- case AccelTableKind::Apple:
+ case DwarfLinkerAccelTableKind::None:
+ // Nothing to do.
+ break;
+ case DwarfLinkerAccelTableKind::Apple:
TheDwarfEmitter->emitAppleNames(AppleNames);
TheDwarfEmitter->emitAppleNamespaces(AppleNamespaces);
TheDwarfEmitter->emitAppleTypes(AppleTypes);
TheDwarfEmitter->emitAppleObjc(AppleObjc);
break;
- case AccelTableKind::Dwarf:
+ case DwarfLinkerAccelTableKind::Dwarf:
TheDwarfEmitter->emitDebugNames(DebugNames);
break;
- case AccelTableKind::Pub:
+ case DwarfLinkerAccelTableKind::Pub:
// Already emitted by emitPubAcceleratorEntriesForUnit.
break;
- case AccelTableKind::Default:
+ case DwarfLinkerAccelTableKind::Default:
llvm_unreachable("Default should have already been resolved.");
break;
}
@@ -2631,4 +2670,15 @@ bool DWARFLinker::link() {
return true;
}
+bool DWARFLinker::verify(const DWARFFile &File) {
+ assert(File.Dwarf);
+
+ DIDumpOptions DumpOpts;
+ if (!File.Dwarf->verify(llvm::outs(), DumpOpts.noImplicitRecursion())) {
+ reportWarning("input verification failed", File);
+ return false;
+ }
+ return true;
+}
+
} // namespace llvm
diff --git a/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp b/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp
index acecb1788d10..e9e8be7fd008 100644
--- a/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp
+++ b/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp
@@ -90,9 +90,11 @@ void CompileUnit::fixupForwardReferences() {
PatchLocation Attr;
DeclContext *Ctxt;
std::tie(RefDie, RefUnit, Ctxt, Attr) = Ref;
- if (Ctxt && Ctxt->getCanonicalDIEOffset())
+ if (Ctxt && Ctxt->hasCanonicalDIE()) {
+ assert(Ctxt->getCanonicalDIEOffset() &&
+ "Canonical die offset is not set");
Attr.set(Ctxt->getCanonicalDIEOffset());
- else
+ } else
Attr.set(RefDie->getOffset() + RefUnit->getStartOffset());
}
}
diff --git a/llvm/lib/DWARFLinker/DWARFLinkerDeclContext.cpp b/llvm/lib/DWARFLinker/DWARFLinkerDeclContext.cpp
index 5ab2ad0780a2..dfdfc5857569 100644
--- a/llvm/lib/DWARFLinker/DWARFLinkerDeclContext.cpp
+++ b/llvm/lib/DWARFLinker/DWARFLinkerDeclContext.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/DWARFLinker/DWARFLinkerDeclContext.h"
+#include "llvm/DWARFLinker/DWARFLinkerCompileUnit.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/DWARF/DWARFDie.h"
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
diff --git a/llvm/lib/DWARFLinker/DWARFStreamer.cpp b/llvm/lib/DWARFLinker/DWARFStreamer.cpp
index 99e12fce6513..55ff6b14f945 100644
--- a/llvm/lib/DWARFLinker/DWARFStreamer.cpp
+++ b/llvm/lib/DWARFLinker/DWARFStreamer.cpp
@@ -18,7 +18,6 @@
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/MCTargetOptionsCommandFlags.h"
#include "llvm/MC/TargetRegistry.h"
@@ -68,7 +67,7 @@ bool DwarfStreamer::init(Triple TheTriple,
if (!MII)
return error("no instr info info for target " + TripleName, Context), false;
- MCE = TheTarget->createMCCodeEmitter(*MII, *MRI, *MC);
+ MCE = TheTarget->createMCCodeEmitter(*MII, *MC);
if (!MCE)
return error("no code emitter for target " + TripleName, Context), false;
@@ -114,10 +113,10 @@ bool DwarfStreamer::init(Triple TheTriple,
return true;
}
-void DwarfStreamer::finish() { MS->Finish(); }
+void DwarfStreamer::finish() { MS->finish(); }
void DwarfStreamer::switchToDebugInfoSection(unsigned DwarfVersion) {
- MS->SwitchSection(MOFI->getDwarfInfoSection());
+ MS->switchSection(MOFI->getDwarfInfoSection());
MC->setDwarfVersion(DwarfVersion);
}
@@ -175,14 +174,14 @@ void DwarfStreamer::emitCompileUnitHeader(CompileUnit &Unit,
void DwarfStreamer::emitAbbrevs(
const std::vector<std::unique_ptr<DIEAbbrev>> &Abbrevs,
unsigned DwarfVersion) {
- MS->SwitchSection(MOFI->getDwarfAbbrevSection());
+ MS->switchSection(MOFI->getDwarfAbbrevSection());
MC->setDwarfVersion(DwarfVersion);
Asm->emitDwarfAbbrevs(Abbrevs);
}
/// Recursively emit the DIE tree rooted at \p Die.
void DwarfStreamer::emitDIE(DIE &Die) {
- MS->SwitchSection(MOFI->getDwarfInfoSection());
+ MS->switchSection(MOFI->getDwarfInfoSection());
Asm->emitDwarfDIE(Die);
DebugInfoSectionSize += Die.getSize();
}
@@ -201,7 +200,7 @@ void DwarfStreamer::emitSectionContents(StringRef SecData, StringRef SecName) {
.Default(nullptr);
if (Section) {
- MS->SwitchSection(Section);
+ MS->switchSection(Section);
MS->emitBytes(SecData);
}
@@ -221,7 +220,7 @@ void DwarfStreamer::emitPaperTrailWarningsDie(DIE &Die) {
/// Emit the debug_str section stored in \p Pool.
void DwarfStreamer::emitStrings(const NonRelocatableStringpool &Pool) {
- Asm->OutStreamer->SwitchSection(MOFI->getDwarfStrSection());
+ Asm->OutStreamer->switchSection(MOFI->getDwarfStrSection());
std::vector<DwarfStringPoolEntryRef> Entries = Pool.getEntriesForEmission();
for (auto Entry : Entries) {
// Emit the string itself.
@@ -233,7 +232,7 @@ void DwarfStreamer::emitStrings(const NonRelocatableStringpool &Pool) {
#if 0
if (DwarfVersion >= 5) {
// Emit an empty string offset section.
- Asm->OutStreamer->SwitchSection(MOFI->getDwarfStrOffSection());
+ Asm->OutStreamer->switchSection(MOFI->getDwarfStrOffSection());
Asm->emitDwarfUnitLength(4, "Length of String Offsets Set");
Asm->emitInt16(DwarfVersion);
Asm->emitInt16(0);
@@ -256,7 +255,7 @@ void DwarfStreamer::emitDebugNames(
UniqueIdToCuMap[CU.ID] = Id++;
}
- Asm->OutStreamer->SwitchSection(MOFI->getDwarfDebugNamesSection());
+ Asm->OutStreamer->switchSection(MOFI->getDwarfDebugNamesSection());
emitDWARF5AccelTable(
Asm.get(), Table, CompUnits,
[&UniqueIdToCuMap](const DWARF5AccelTableStaticData &Entry) {
@@ -266,7 +265,7 @@ void DwarfStreamer::emitDebugNames(
void DwarfStreamer::emitAppleNamespaces(
AccelTable<AppleAccelTableStaticOffsetData> &Table) {
- Asm->OutStreamer->SwitchSection(MOFI->getDwarfAccelNamespaceSection());
+ Asm->OutStreamer->switchSection(MOFI->getDwarfAccelNamespaceSection());
auto *SectionBegin = Asm->createTempSymbol("namespac_begin");
Asm->OutStreamer->emitLabel(SectionBegin);
emitAppleAccelTable(Asm.get(), Table, "namespac", SectionBegin);
@@ -274,7 +273,7 @@ void DwarfStreamer::emitAppleNamespaces(
void DwarfStreamer::emitAppleNames(
AccelTable<AppleAccelTableStaticOffsetData> &Table) {
- Asm->OutStreamer->SwitchSection(MOFI->getDwarfAccelNamesSection());
+ Asm->OutStreamer->switchSection(MOFI->getDwarfAccelNamesSection());
auto *SectionBegin = Asm->createTempSymbol("names_begin");
Asm->OutStreamer->emitLabel(SectionBegin);
emitAppleAccelTable(Asm.get(), Table, "names", SectionBegin);
@@ -282,7 +281,7 @@ void DwarfStreamer::emitAppleNames(
void DwarfStreamer::emitAppleObjc(
AccelTable<AppleAccelTableStaticOffsetData> &Table) {
- Asm->OutStreamer->SwitchSection(MOFI->getDwarfAccelObjCSection());
+ Asm->OutStreamer->switchSection(MOFI->getDwarfAccelObjCSection());
auto *SectionBegin = Asm->createTempSymbol("objc_begin");
Asm->OutStreamer->emitLabel(SectionBegin);
emitAppleAccelTable(Asm.get(), Table, "objc", SectionBegin);
@@ -290,7 +289,7 @@ void DwarfStreamer::emitAppleObjc(
void DwarfStreamer::emitAppleTypes(
AccelTable<AppleAccelTableStaticTypeData> &Table) {
- Asm->OutStreamer->SwitchSection(MOFI->getDwarfAccelTypesSection());
+ Asm->OutStreamer->switchSection(MOFI->getDwarfAccelTypesSection());
auto *SectionBegin = Asm->createTempSymbol("types_begin");
Asm->OutStreamer->emitLabel(SectionBegin);
emitAppleAccelTable(Asm.get(), Table, "types", SectionBegin);
@@ -300,7 +299,7 @@ void DwarfStreamer::emitAppleTypes(
void DwarfStreamer::emitSwiftAST(StringRef Buffer) {
MCSection *SwiftASTSection = MOFI->getDwarfSwiftASTSection();
SwiftASTSection->setAlignment(Align(32));
- MS->SwitchSection(SwiftASTSection);
+ MS->switchSection(SwiftASTSection);
MS->emitBytes(Buffer);
}
@@ -312,7 +311,7 @@ void DwarfStreamer::emitSwiftReflectionSection(
if (ReflectionSection == nullptr)
return;
ReflectionSection->setAlignment(Align(Alignment));
- MS->SwitchSection(ReflectionSection);
+ MS->switchSection(ReflectionSection);
MS->emitBytes(Buffer);
}
@@ -325,7 +324,7 @@ void DwarfStreamer::emitRangesEntries(
const FunctionIntervals::const_iterator &FuncRange,
const std::vector<DWARFDebugRangeList::RangeListEntry> &Entries,
unsigned AddressSize) {
- MS->SwitchSection(MC->getObjectFileInfo()->getDwarfRangesSection());
+ MS->switchSection(MC->getObjectFileInfo()->getDwarfRangesSection());
// Offset each range by the right amount.
int64_t PcOffset = Entries.empty() ? 0 : FuncRange.value() + UnitPcOffset;
@@ -377,7 +376,7 @@ void DwarfStreamer::emitUnitRangesEntries(CompileUnit &Unit,
llvm::sort(Ranges);
if (!Ranges.empty()) {
- MS->SwitchSection(MC->getObjectFileInfo()->getDwarfARangesSection());
+ MS->switchSection(MC->getObjectFileInfo()->getDwarfARangesSection());
MCSymbol *BeginLabel = Asm->createTempSymbol("Barange");
MCSymbol *EndLabel = Asm->createTempSymbol("Earange");
@@ -419,7 +418,7 @@ void DwarfStreamer::emitUnitRangesEntries(CompileUnit &Unit,
if (!DoDebugRanges)
return;
- MS->SwitchSection(MC->getObjectFileInfo()->getDwarfRangesSection());
+ MS->switchSection(MC->getObjectFileInfo()->getDwarfRangesSection());
// Offset each range by the right amount.
int64_t PcOffset = -Unit.getLowPc();
// Emit coalesced ranges.
@@ -447,7 +446,7 @@ void DwarfStreamer::emitLocationsForUnit(
if (Attributes.empty())
return;
- MS->SwitchSection(MC->getObjectFileInfo()->getDwarfLocSection());
+ MS->switchSection(MC->getObjectFileInfo()->getDwarfLocSection());
unsigned AddressSize = Unit.getOrigUnit().getAddressByteSize();
uint64_t BaseAddressMarker = (AddressSize == 8)
@@ -509,7 +508,7 @@ void DwarfStreamer::emitLineTableForUnit(MCDwarfLineTableParams Params,
std::vector<DWARFDebugLine::Row> &Rows,
unsigned PointerSize) {
// Switch to the section where the table will be emitted into.
- MS->SwitchSection(MC->getObjectFileInfo()->getDwarfLineSection());
+ MS->switchSection(MC->getObjectFileInfo()->getDwarfLineSection());
MCSymbol *LineStartSym = MC->createTempSymbol();
MCSymbol *LineEndSym = MC->createTempSymbol();
@@ -650,7 +649,7 @@ void DwarfStreamer::emitLineTableForUnit(MCDwarfLineTableParams Params,
/// Copy the debug_line over to the updated binary while unobfuscating the file
/// names and directories.
void DwarfStreamer::translateLineTable(DataExtractor Data, uint64_t Offset) {
- MS->SwitchSection(MC->getObjectFileInfo()->getDwarfLineSection());
+ MS->switchSection(MC->getObjectFileInfo()->getDwarfLineSection());
StringRef Contents = Data.getData();
// We have to deconstruct the line table header, because it contains to
@@ -738,7 +737,7 @@ void DwarfStreamer::emitPubSectionForUnit(
return;
// Start the dwarf pubnames section.
- Asm->OutStreamer->SwitchSection(Sec);
+ Asm->OutStreamer->switchSection(Sec);
MCSymbol *BeginLabel = Asm->createTempSymbol("pub" + SecName + "_begin");
MCSymbol *EndLabel = Asm->createTempSymbol("pub" + SecName + "_end");
@@ -785,7 +784,7 @@ void DwarfStreamer::emitPubTypesForUnit(const CompileUnit &Unit) {
/// Emit a CIE into the debug_frame section.
void DwarfStreamer::emitCIE(StringRef CIEBytes) {
- MS->SwitchSection(MC->getObjectFileInfo()->getDwarfFrameSection());
+ MS->switchSection(MC->getObjectFileInfo()->getDwarfFrameSection());
MS->emitBytes(CIEBytes);
FrameSectionSize += CIEBytes.size();
@@ -796,7 +795,7 @@ void DwarfStreamer::emitCIE(StringRef CIEBytes) {
/// which will be replaced with the parameter values.
void DwarfStreamer::emitFDE(uint32_t CIEOffset, uint32_t AddrSize,
uint32_t Address, StringRef FDEBytes) {
- MS->SwitchSection(MC->getObjectFileInfo()->getDwarfFrameSection());
+ MS->switchSection(MC->getObjectFileInfo()->getDwarfFrameSection());
MS->emitIntValue(FDEBytes.size() + 4 + AddrSize, 4);
MS->emitIntValue(CIEOffset, 4);
diff --git a/llvm/lib/DWP/DWP.cpp b/llvm/lib/DWP/DWP.cpp
index f6538c0549d0..34615a73e328 100644
--- a/llvm/lib/DWP/DWP.cpp
+++ b/llvm/lib/DWP/DWP.cpp
@@ -16,6 +16,7 @@
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCTargetOptionsCommandFlags.h"
#include "llvm/Object/Decompressor.h"
+#include "llvm/Support/MemoryBuffer.h"
using namespace llvm;
using namespace llvm::object;
@@ -181,7 +182,7 @@ addAllTypesFromDWP(MCStreamer &Out,
const DWARFUnitIndex &TUIndex, MCSection *OutputTypes,
StringRef Types, const UnitIndexEntry &TUEntry,
uint32_t &TypesOffset, unsigned TypesContributionIndex) {
- Out.SwitchSection(OutputTypes);
+ Out.switchSection(OutputTypes);
for (const DWARFUnitIndex::Entry &E : TUIndex.getRows()) {
auto *I = E.getContributions();
if (!I)
@@ -215,7 +216,7 @@ static void addAllTypesFromTypesSection(
MCSection *OutputTypes, const std::vector<StringRef> &TypesSections,
const UnitIndexEntry &CUEntry, uint32_t &TypesOffset) {
for (StringRef Types : TypesSections) {
- Out.SwitchSection(OutputTypes);
+ Out.switchSection(OutputTypes);
uint64_t Offset = 0;
DataExtractor Data(Types, true, 0);
while (Data.isValidOffset(Offset)) {
@@ -373,7 +374,7 @@ void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings,
Data = DataExtractor(CurStrOffsetSection, true, 0);
- Out.SwitchSection(StrOffsetSection);
+ Out.switchSection(StrOffsetSection);
uint64_t HeaderSize = debugStrOffsetsHeaderSize(Data, Version);
uint64_t Offset = 0;
@@ -427,7 +428,7 @@ void writeIndex(MCStreamer &Out, MCSection *Section,
++I;
}
- Out.SwitchSection(Section);
+ Out.switchSection(Section);
Out.emitIntValue(IndexVersion, 4); // Version
Out.emitIntValue(Columns, 4); // Columns
Out.emitIntValue(IndexEntries.size(), 4); // Num Units
@@ -526,7 +527,7 @@ Error handleSection(
else if (OutSection == InfoSection)
CurInfoSection.push_back(Contents);
else {
- Out.SwitchSection(OutSection);
+ Out.switchSection(OutSection);
Out.emitBytes(Contents);
}
return Error::success();
@@ -633,7 +634,7 @@ Error write(MCStreamer &Out, ArrayRef<std::string> Inputs) {
ContributionOffsets[getContributionIndex(DW_SECT_INFO, IndexVersion)];
if (CurCUIndexSection.empty()) {
bool FoundCUUnit = false;
- Out.SwitchSection(InfoSection);
+ Out.switchSection(InfoSection);
for (StringRef Info : CurInfoSection) {
uint64_t UnitOffset = 0;
while (Info.size() > UnitOffset) {
@@ -668,7 +669,7 @@ Error write(MCStreamer &Out, ArrayRef<std::string> Inputs) {
FoundCUUnit = true;
} else if (Header.UnitType == dwarf::DW_UT_split_type) {
auto P = TypeIndexEntries.insert(
- std::make_pair(Header.Signature.getValue(), Entry));
+ std::make_pair(*Header.Signature, Entry));
if (!P.second)
continue;
}
@@ -703,7 +704,7 @@ Error write(MCStreamer &Out, ArrayRef<std::string> Inputs) {
utostr(CUIndex.getVersion()) +
" and expecting " + utostr(IndexVersion));
- Out.SwitchSection(InfoSection);
+ Out.switchSection(InfoSection);
for (const DWARFUnitIndex::Entry &E : CUIndex.getRows()) {
auto *I = E.getContributions();
if (!I)
diff --git a/llvm/lib/DebugInfo/CodeView/AppendingTypeTableBuilder.cpp b/llvm/lib/DebugInfo/CodeView/AppendingTypeTableBuilder.cpp
index 4d8b15530b9e..3ab7f722eaee 100644
--- a/llvm/lib/DebugInfo/CodeView/AppendingTypeTableBuilder.cpp
+++ b/llvm/lib/DebugInfo/CodeView/AppendingTypeTableBuilder.cpp
@@ -8,18 +8,11 @@
#include "llvm/DebugInfo/CodeView/AppendingTypeTableBuilder.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h"
-#include "llvm/DebugInfo/CodeView/RecordSerialization.h"
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/Support/Allocator.h"
-#include "llvm/Support/BinaryByteStream.h"
-#include "llvm/Support/BinaryStreamWriter.h"
-#include "llvm/Support/Endian.h"
-#include "llvm/Support/Error.h"
-#include <algorithm>
+#include "llvm/Support/ErrorHandling.h"
#include <cassert>
#include <cstdint>
#include <cstring>
diff --git a/llvm/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp b/llvm/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp
index 48b9b0496ffe..2154aa2b8d00 100644
--- a/llvm/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp
+++ b/llvm/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp
@@ -8,8 +8,12 @@
#include "llvm/DebugInfo/CodeView/CVSymbolVisitor.h"
-#include "llvm/DebugInfo/CodeView/CodeViewError.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/DebugInfo/CodeView/SymbolRecordHelpers.h"
#include "llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h"
+#include "llvm/Support/BinaryStreamArray.h"
+#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
using namespace llvm::codeview;
@@ -80,3 +84,72 @@ Error CVSymbolVisitor::visitSymbolStream(const CVSymbolArray &Symbols,
}
return Error::success();
}
+
+Error CVSymbolVisitor::visitSymbolStreamFiltered(const CVSymbolArray &Symbols,
+ const FilterOptions &Filter) {
+ if (!Filter.SymbolOffset)
+ return visitSymbolStream(Symbols);
+ uint32_t SymbolOffset = *Filter.SymbolOffset;
+ uint32_t ParentRecurseDepth = Filter.ParentRecursiveDepth.value_or(0);
+ uint32_t ChildrenRecurseDepth = Filter.ChildRecursiveDepth.value_or(0);
+ if (!Symbols.isOffsetValid(SymbolOffset))
+ return createStringError(inconvertibleErrorCode(), "Invalid symbol offset");
+ CVSymbol Sym = *Symbols.at(SymbolOffset);
+ uint32_t SymEndOffset =
+ symbolOpensScope(Sym.kind()) ? getScopeEndOffset(Sym) : 0;
+
+ std::vector<uint32_t> ParentOffsets;
+ std::vector<uint32_t> ParentEndOffsets;
+ uint32_t ChildrenDepth = 0;
+ for (auto Begin = Symbols.begin(), End = Symbols.end(); Begin != End;
+ ++Begin) {
+ uint32_t BeginOffset = Begin.offset();
+ CVSymbol BeginSym = *Begin;
+ if (BeginOffset < SymbolOffset) {
+ if (symbolOpensScope(Begin->kind())) {
+ uint32_t EndOffset = getScopeEndOffset(BeginSym);
+ if (SymbolOffset < EndOffset) {
+ ParentOffsets.push_back(BeginOffset);
+ ParentEndOffsets.push_back(EndOffset);
+ }
+ }
+ } else if (BeginOffset == SymbolOffset) {
+ // Found symbol at offset. Visit its parent up to ParentRecurseDepth.
+ if (ParentRecurseDepth >= ParentOffsets.size())
+ ParentRecurseDepth = ParentOffsets.size();
+ uint32_t StartIndex = ParentOffsets.size() - ParentRecurseDepth;
+ while (StartIndex < ParentOffsets.size()) {
+ if (!Symbols.isOffsetValid(ParentOffsets[StartIndex]))
+ break;
+ CVSymbol Parent = *Symbols.at(ParentOffsets[StartIndex]);
+ if (auto EC = visitSymbolRecord(Parent, ParentOffsets[StartIndex]))
+ return EC;
+ ++StartIndex;
+ }
+ if (auto EC = visitSymbolRecord(Sym, SymbolOffset))
+ return EC;
+ } else if (BeginOffset <= SymEndOffset) {
+ if (ChildrenRecurseDepth) {
+ // Visit children.
+ if (symbolEndsScope(Begin->kind()))
+ --ChildrenDepth;
+ if (ChildrenDepth < ChildrenRecurseDepth ||
+ BeginOffset == SymEndOffset) {
+ if (auto EC = visitSymbolRecord(BeginSym, BeginOffset))
+ return EC;
+ }
+ if (symbolOpensScope(Begin->kind()))
+ ++ChildrenDepth;
+ }
+ } else {
+ // Visit parents' ends.
+ if (ParentRecurseDepth && BeginOffset == ParentEndOffsets.back()) {
+ if (auto EC = visitSymbolRecord(BeginSym, BeginOffset))
+ return EC;
+ ParentEndOffsets.pop_back();
+ --ParentRecurseDepth;
+ }
+ }
+ }
+ return Error::success();
+}
diff --git a/llvm/lib/DebugInfo/CodeView/CVTypeVisitor.cpp b/llvm/lib/DebugInfo/CodeView/CVTypeVisitor.cpp
index dd6f75f97a4a..5da300f710d5 100644
--- a/llvm/lib/DebugInfo/CodeView/CVTypeVisitor.cpp
+++ b/llvm/lib/DebugInfo/CodeView/CVTypeVisitor.cpp
@@ -8,11 +8,12 @@
#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
-#include "llvm/DebugInfo/CodeView/CodeViewError.h"
#include "llvm/DebugInfo/CodeView/TypeCollection.h"
#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
-#include "llvm/DebugInfo/CodeView/TypeRecordMapping.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h"
+#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
#include "llvm/Support/BinaryByteStream.h"
#include "llvm/Support/BinaryStreamReader.h"
diff --git a/llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp b/llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp
index 1af59ff679dd..a66f9af98835 100644
--- a/llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp
+++ b/llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp
@@ -8,7 +8,9 @@
#include "llvm/DebugInfo/CodeView/CodeViewRecordIO.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/GUID.h"
#include "llvm/DebugInfo/CodeView/RecordSerialization.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/BinaryStreamWriter.h"
@@ -68,10 +70,10 @@ uint32_t CodeViewRecordIO::maxFieldLength() const {
Optional<uint32_t> Min = Limits.front().bytesRemaining(Offset);
for (auto X : makeArrayRef(Limits).drop_front()) {
Optional<uint32_t> ThisMin = X.bytesRemaining(Offset);
- if (ThisMin.hasValue())
- Min = (Min.hasValue()) ? std::min(*Min, *ThisMin) : *ThisMin;
+ if (ThisMin)
+ Min = Min ? std::min(*Min, *ThisMin) : *ThisMin;
}
- assert(Min.hasValue() && "Every field must have a maximum length!");
+ assert(Min && "Every field must have a maximum length!");
return *Min;
}
@@ -279,17 +281,24 @@ void CodeViewRecordIO::emitEncodedSignedInteger(const int64_t &Value,
// FIXME: There are no test cases covering this function.
// This may be because we always consider enumerators to be unsigned.
// See FIXME at CodeViewDebug.cpp : CodeViewDebug::lowerTypeEnum.
- if (Value >= std::numeric_limits<int8_t>::min()) {
+ if (Value < LF_NUMERIC && Value >= 0) {
+ emitComment(Comment);
+ Streamer->emitIntValue(Value, 2);
+ incrStreamedLen(2);
+ } else if (Value >= std::numeric_limits<int8_t>::min() &&
+ Value <= std::numeric_limits<int8_t>::max()) {
Streamer->emitIntValue(LF_CHAR, 2);
emitComment(Comment);
Streamer->emitIntValue(Value, 1);
incrStreamedLen(3);
- } else if (Value >= std::numeric_limits<int16_t>::min()) {
+ } else if (Value >= std::numeric_limits<int16_t>::min() &&
+ Value <= std::numeric_limits<int16_t>::max()) {
Streamer->emitIntValue(LF_SHORT, 2);
emitComment(Comment);
Streamer->emitIntValue(Value, 2);
incrStreamedLen(4);
- } else if (Value >= std::numeric_limits<int32_t>::min()) {
+ } else if (Value >= std::numeric_limits<int32_t>::min() &&
+ Value <= std::numeric_limits<int32_t>::max()) {
Streamer->emitIntValue(LF_LONG, 2);
emitComment(Comment);
Streamer->emitIntValue(Value, 4);
@@ -328,17 +337,23 @@ void CodeViewRecordIO::emitEncodedUnsignedInteger(const uint64_t &Value,
}
Error CodeViewRecordIO::writeEncodedSignedInteger(const int64_t &Value) {
- if (Value >= std::numeric_limits<int8_t>::min()) {
+ if (Value < LF_NUMERIC && Value >= 0) {
+ if (auto EC = Writer->writeInteger<int16_t>(Value))
+ return EC;
+ } else if (Value >= std::numeric_limits<int8_t>::min() &&
+ Value <= std::numeric_limits<int8_t>::max()) {
if (auto EC = Writer->writeInteger<uint16_t>(LF_CHAR))
return EC;
if (auto EC = Writer->writeInteger<int8_t>(Value))
return EC;
- } else if (Value >= std::numeric_limits<int16_t>::min()) {
+ } else if (Value >= std::numeric_limits<int16_t>::min() &&
+ Value <= std::numeric_limits<int16_t>::max()) {
if (auto EC = Writer->writeInteger<uint16_t>(LF_SHORT))
return EC;
if (auto EC = Writer->writeInteger<int16_t>(Value))
return EC;
- } else if (Value >= std::numeric_limits<int32_t>::min()) {
+ } else if (Value >= std::numeric_limits<int32_t>::min() &&
+ Value <= std::numeric_limits<int32_t>::max()) {
if (auto EC = Writer->writeInteger<uint16_t>(LF_LONG))
return EC;
if (auto EC = Writer->writeInteger<int32_t>(Value))
diff --git a/llvm/lib/DebugInfo/CodeView/ContinuationRecordBuilder.cpp b/llvm/lib/DebugInfo/CodeView/ContinuationRecordBuilder.cpp
index c7b1c65f2f9a..a3dbb3954d5c 100644
--- a/llvm/lib/DebugInfo/CodeView/ContinuationRecordBuilder.cpp
+++ b/llvm/lib/DebugInfo/CodeView/ContinuationRecordBuilder.cpp
@@ -46,10 +46,10 @@ static inline TypeLeafKind getTypeLeafKind(ContinuationRecordKind CK) {
ContinuationRecordBuilder::ContinuationRecordBuilder()
: SegmentWriter(Buffer), Mapping(SegmentWriter) {}
-ContinuationRecordBuilder::~ContinuationRecordBuilder() {}
+ContinuationRecordBuilder::~ContinuationRecordBuilder() = default;
void ContinuationRecordBuilder::begin(ContinuationRecordKind RecordKind) {
- assert(!Kind.hasValue());
+ assert(!Kind);
Kind = RecordKind;
Buffer.clear();
SegmentWriter.setOffset(0);
@@ -76,7 +76,7 @@ void ContinuationRecordBuilder::begin(ContinuationRecordKind RecordKind) {
template <typename RecordType>
void ContinuationRecordBuilder::writeMemberType(RecordType &Record) {
- assert(Kind.hasValue());
+ assert(Kind);
uint32_t OriginalOffset = SegmentWriter.getOffset();
CVMemberRecord CVMR;
@@ -158,7 +158,7 @@ CVType ContinuationRecordBuilder::createSegmentRecord(
RecordPrefix *Prefix = reinterpret_cast<RecordPrefix *>(Data.data());
Prefix->RecordLen = Data.size() - sizeof(RecordPrefix::RecordLen);
- if (RefersTo.hasValue()) {
+ if (RefersTo) {
auto Continuation = Data.take_back(ContinuationLength);
ContinuationRecord *CR =
reinterpret_cast<ContinuationRecord *>(Continuation.data());
diff --git a/llvm/lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp b/llvm/lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp
index b23410409f88..b48f57955db1 100644
--- a/llvm/lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp
+++ b/llvm/lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp
@@ -8,6 +8,7 @@
#include "llvm/DebugInfo/CodeView/DebugCrossExSubsection.h"
#include "llvm/DebugInfo/CodeView/CodeViewError.h"
+#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/BinaryStreamWriter.h"
#include "llvm/Support/Error.h"
#include <cstdint>
diff --git a/llvm/lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp b/llvm/lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp
index 9bc69abea102..c083c61d1595 100644
--- a/llvm/lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp
+++ b/llvm/lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp
@@ -8,6 +8,8 @@
#include "llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h"
#include "llvm/DebugInfo/CodeView/CodeViewError.h"
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/BinaryStreamWriter.h"
using namespace llvm;
using namespace llvm::codeview;
diff --git a/llvm/lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp b/llvm/lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp
index 48ec7e4ecdd6..665511c592f9 100644
--- a/llvm/lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp
+++ b/llvm/lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp
@@ -10,6 +10,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
+#include "llvm/DebugInfo/CodeView/RecordSerialization.h"
#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/BinaryStreamWriter.h"
#include "llvm/Support/Endian.h"
diff --git a/llvm/lib/DebugInfo/CodeView/DebugSubsection.cpp b/llvm/lib/DebugInfo/CodeView/DebugSubsection.cpp
index 3f93463fe6d6..01581181dfe0 100644
--- a/llvm/lib/DebugInfo/CodeView/DebugSubsection.cpp
+++ b/llvm/lib/DebugInfo/CodeView/DebugSubsection.cpp
@@ -10,6 +10,6 @@
using namespace llvm::codeview;
-DebugSubsectionRef::~DebugSubsectionRef() {}
+DebugSubsectionRef::~DebugSubsectionRef() = default;
-DebugSubsection::~DebugSubsection() {}
+DebugSubsection::~DebugSubsection() = default;
diff --git a/llvm/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp b/llvm/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp
index 3c8a30101450..adc6cabd7da1 100644
--- a/llvm/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp
+++ b/llvm/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp
@@ -13,7 +13,6 @@
#include "llvm/Support/BinaryStreamWriter.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/MathExtras.h"
-#include <algorithm>
#include <cassert>
#include <cstdint>
diff --git a/llvm/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp b/llvm/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp
index 7968b6a2d757..50f6fb93dec1 100644
--- a/llvm/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp
+++ b/llvm/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp
@@ -8,6 +8,7 @@
#include "llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
#include "llvm/DebugInfo/CodeView/DebugCrossExSubsection.h"
#include "llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h"
@@ -20,7 +21,7 @@
#include "llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h"
#include "llvm/DebugInfo/CodeView/DebugUnknownSubsection.h"
#include "llvm/Support/BinaryStreamReader.h"
-#include "llvm/Support/BinaryStreamRef.h"
+#include "llvm/Support/SwapByteOrder.h"
using namespace llvm;
using namespace llvm::codeview;
diff --git a/llvm/lib/DebugInfo/CodeView/DebugSymbolsSubsection.cpp b/llvm/lib/DebugInfo/CodeView/DebugSymbolsSubsection.cpp
index c833103663e4..2b20b3e95db6 100644
--- a/llvm/lib/DebugInfo/CodeView/DebugSymbolsSubsection.cpp
+++ b/llvm/lib/DebugInfo/CodeView/DebugSymbolsSubsection.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h"
+#include "llvm/Support/BinaryStreamWriter.h"
using namespace llvm;
using namespace llvm::codeview;
diff --git a/llvm/lib/DebugInfo/CodeView/Formatters.cpp b/llvm/lib/DebugInfo/CodeView/Formatters.cpp
index f1f51bcb39cc..73a589212227 100644
--- a/llvm/lib/DebugInfo/CodeView/Formatters.cpp
+++ b/llvm/lib/DebugInfo/CodeView/Formatters.cpp
@@ -9,8 +9,10 @@
#include "llvm/DebugInfo/CodeView/Formatters.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/DebugInfo/CodeView/GUID.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
#include <cassert>
using namespace llvm;
diff --git a/llvm/lib/DebugInfo/CodeView/GlobalTypeTableBuilder.cpp b/llvm/lib/DebugInfo/CodeView/GlobalTypeTableBuilder.cpp
index 7cd9ca7498f5..142af382efba 100644
--- a/llvm/lib/DebugInfo/CodeView/GlobalTypeTableBuilder.cpp
+++ b/llvm/lib/DebugInfo/CodeView/GlobalTypeTableBuilder.cpp
@@ -8,18 +8,12 @@
#include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/None.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h"
-#include "llvm/DebugInfo/CodeView/RecordSerialization.h"
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/Support/Allocator.h"
-#include "llvm/Support/BinaryByteStream.h"
-#include "llvm/Support/BinaryStreamWriter.h"
-#include "llvm/Support/Endian.h"
-#include "llvm/Support/Error.h"
-#include <algorithm>
+#include "llvm/Support/ErrorHandling.h"
#include <cassert>
#include <cstdint>
#include <cstring>
diff --git a/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp b/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp
index c0fc3e0ef65a..1d49a1ed4712 100644
--- a/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp
+++ b/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp
@@ -9,11 +9,12 @@
#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/None.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/CodeViewError.h"
#include "llvm/DebugInfo/CodeView/RecordName.h"
-#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/CodeView/RecordSerialization.h"
#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
diff --git a/llvm/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp b/llvm/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp
index 13ce3ae82c26..62d228599eae 100644
--- a/llvm/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp
+++ b/llvm/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp
@@ -8,18 +8,13 @@
#include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/None.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h"
-#include "llvm/DebugInfo/CodeView/RecordSerialization.h"
+#include "llvm/DebugInfo/CodeView/TypeHashing.h"
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/Support/Allocator.h"
-#include "llvm/Support/BinaryByteStream.h"
-#include "llvm/Support/BinaryStreamWriter.h"
-#include "llvm/Support/Endian.h"
-#include "llvm/Support/Error.h"
-#include <algorithm>
+#include "llvm/Support/ErrorHandling.h"
#include <cassert>
#include <cstdint>
#include <cstring>
diff --git a/llvm/lib/DebugInfo/CodeView/RecordName.cpp b/llvm/lib/DebugInfo/CodeView/RecordName.cpp
index 1ca899789bef..5fbbc4a5d497 100644
--- a/llvm/lib/DebugInfo/CodeView/RecordName.cpp
+++ b/llvm/lib/DebugInfo/CodeView/RecordName.cpp
@@ -10,9 +10,13 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/DebugInfo/CodeView/CVSymbolVisitor.h"
#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/DebugInfo/CodeView/SymbolRecordMapping.h"
+#include "llvm/DebugInfo/CodeView/TypeCollection.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
#include "llvm/Support/FormatVariadic.h"
diff --git a/llvm/lib/DebugInfo/CodeView/RecordSerialization.cpp b/llvm/lib/DebugInfo/CodeView/RecordSerialization.cpp
index 63ce302a4e09..d76905df8681 100644
--- a/llvm/lib/DebugInfo/CodeView/RecordSerialization.cpp
+++ b/llvm/lib/DebugInfo/CodeView/RecordSerialization.cpp
@@ -13,9 +13,9 @@
#include "llvm/DebugInfo/CodeView/RecordSerialization.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/APSInt.h"
+#include "llvm/DebugInfo/CodeView/CVRecord.h"
#include "llvm/DebugInfo/CodeView/CodeViewError.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
-#include "llvm/DebugInfo/CodeView/TypeRecord.h"
#include "llvm/Support/BinaryByteStream.h"
using namespace llvm;
diff --git a/llvm/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp b/llvm/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp
index d963e34628db..cf0c877fdbf8 100644
--- a/llvm/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp
+++ b/llvm/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp
@@ -7,7 +7,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/CodeView/SimpleTypeSerializer.h"
-#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/CodeView/CVRecord.h"
+#include "llvm/DebugInfo/CodeView/RecordSerialization.h"
#include "llvm/DebugInfo/CodeView/TypeRecordMapping.h"
#include "llvm/Support/BinaryStreamWriter.h"
@@ -29,7 +30,7 @@ static void addPadding(BinaryStreamWriter &Writer) {
SimpleTypeSerializer::SimpleTypeSerializer() : ScratchBuffer(MaxRecordLength) {}
-SimpleTypeSerializer::~SimpleTypeSerializer() {}
+SimpleTypeSerializer::~SimpleTypeSerializer() = default;
template <typename T>
ArrayRef<uint8_t> SimpleTypeSerializer::serialize(T &Record) {
diff --git a/llvm/lib/DebugInfo/CodeView/StringsAndChecksums.cpp b/llvm/lib/DebugInfo/CodeView/StringsAndChecksums.cpp
index 9e204eec8604..81aa44fb2086 100644
--- a/llvm/lib/DebugInfo/CodeView/StringsAndChecksums.cpp
+++ b/llvm/lib/DebugInfo/CodeView/StringsAndChecksums.cpp
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/CodeView/StringsAndChecksums.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h"
diff --git a/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp b/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp
index 45b63983beb4..cfb12dbae845 100644
--- a/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp
+++ b/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/CodeView/SymbolDumper.h"
-#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/DebugInfo/CodeView/CVSymbolVisitor.h"
#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h"
#include "llvm/DebugInfo/CodeView/EnumTables.h"
@@ -20,8 +20,6 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/ScopedPrinter.h"
-#include <system_error>
-
using namespace llvm;
using namespace llvm::codeview;
diff --git a/llvm/lib/DebugInfo/CodeView/SymbolRecordHelpers.cpp b/llvm/lib/DebugInfo/CodeView/SymbolRecordHelpers.cpp
index 2562c633bb99..d8b350bf26ba 100644
--- a/llvm/lib/DebugInfo/CodeView/SymbolRecordHelpers.cpp
+++ b/llvm/lib/DebugInfo/CodeView/SymbolRecordHelpers.cpp
@@ -8,7 +8,7 @@
#include "llvm/DebugInfo/CodeView/SymbolRecordHelpers.h"
-#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
using namespace llvm;
diff --git a/llvm/lib/DebugInfo/CodeView/SymbolSerializer.cpp b/llvm/lib/DebugInfo/CodeView/SymbolSerializer.cpp
index de9bb42b1798..5fb8d497b957 100644
--- a/llvm/lib/DebugInfo/CodeView/SymbolSerializer.cpp
+++ b/llvm/lib/DebugInfo/CodeView/SymbolSerializer.cpp
@@ -8,9 +8,9 @@
#include "llvm/DebugInfo/CodeView/SymbolSerializer.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
#include <cassert>
#include <cstdint>
#include <cstring>
@@ -24,7 +24,7 @@ SymbolSerializer::SymbolSerializer(BumpPtrAllocator &Allocator,
Mapping(Writer, Container) {}
Error SymbolSerializer::visitSymbolBegin(CVSymbol &Record) {
- assert(!CurrentSymbol.hasValue() && "Already in a symbol mapping!");
+ assert(!CurrentSymbol && "Already in a symbol mapping!");
Writer.setOffset(0);
@@ -39,7 +39,7 @@ Error SymbolSerializer::visitSymbolBegin(CVSymbol &Record) {
}
Error SymbolSerializer::visitSymbolEnd(CVSymbol &Record) {
- assert(CurrentSymbol.hasValue() && "Not in a symbol mapping!");
+ assert(CurrentSymbol && "Not in a symbol mapping!");
if (auto EC = Mapping.visitSymbolEnd(Record))
return EC;
diff --git a/llvm/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp b/llvm/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp
index d5fea5ee5e29..5d27c9f29984 100644
--- a/llvm/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp
+++ b/llvm/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp
@@ -8,14 +8,15 @@
#include "llvm/DebugInfo/CodeView/TypeDumpVisitor.h"
-#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
-#include "llvm/DebugInfo/CodeView/Formatters.h"
+#include "llvm/DebugInfo/CodeView/RecordSerialization.h"
#include "llvm/DebugInfo/CodeView/TypeCollection.h"
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/ScopedPrinter.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
using namespace llvm::codeview;
diff --git a/llvm/lib/DebugInfo/CodeView/TypeHashing.cpp b/llvm/lib/DebugInfo/CodeView/TypeHashing.cpp
index 2dbc11a84f0b..fc85d8186eaa 100644
--- a/llvm/lib/DebugInfo/CodeView/TypeHashing.cpp
+++ b/llvm/lib/DebugInfo/CodeView/TypeHashing.cpp
@@ -76,5 +76,6 @@ GloballyHashedType::hashType(ArrayRef<uint8_t> RecordData,
auto TrailingBytes = RecordData.drop_front(Off);
S.update(TrailingBytes);
- return {S.final().take_back(8)};
+ std::array<uint8_t, 20> Hash = S.final();
+ return {ArrayRef<uint8_t>(Hash).take_back(8)};
}
diff --git a/llvm/lib/DebugInfo/CodeView/TypeIndex.cpp b/llvm/lib/DebugInfo/CodeView/TypeIndex.cpp
index 604d342448d3..3aead9d50041 100644
--- a/llvm/lib/DebugInfo/CodeView/TypeIndex.cpp
+++ b/llvm/lib/DebugInfo/CodeView/TypeIndex.cpp
@@ -33,6 +33,7 @@ static const SimpleTypeEntry SimpleTypeNames[] = {
{"wchar_t*", SimpleTypeKind::WideCharacter},
{"char16_t*", SimpleTypeKind::Character16},
{"char32_t*", SimpleTypeKind::Character32},
+ {"char8_t*", SimpleTypeKind::Character8},
{"__int8*", SimpleTypeKind::SByte},
{"unsigned __int8*", SimpleTypeKind::Byte},
{"short*", SimpleTypeKind::Int16Short},
diff --git a/llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp b/llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp
index d272999bdab8..27f63b9edcd0 100644
--- a/llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp
+++ b/llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp
@@ -7,10 +7,28 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/CodeView/TypeRecordMapping.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Twine.h"
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
+#include "llvm/DebugInfo/CodeView/CodeViewRecordIO.h"
#include "llvm/DebugInfo/CodeView/EnumTables.h"
+#include "llvm/DebugInfo/CodeView/RecordSerialization.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MD5.h"
+#include "llvm/Support/ScopedPrinter.h"
+
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <string>
+#include <vector>
using namespace llvm;
using namespace llvm::codeview;
@@ -210,8 +228,8 @@ static Error mapNameAndUniqueName(CodeViewRecordIO &IO, StringRef &Name,
}
Error TypeRecordMapping::visitTypeBegin(CVType &CVR) {
- assert(!TypeKind.hasValue() && "Already in a type mapping!");
- assert(!MemberKind.hasValue() && "Already in a member mapping!");
+ assert(!TypeKind && "Already in a type mapping!");
+ assert(!MemberKind && "Already in a member mapping!");
// FieldList and MethodList records can be any length because they can be
// split with continuation records. All other record types cannot be
@@ -242,8 +260,8 @@ Error TypeRecordMapping::visitTypeBegin(CVType &CVR, TypeIndex Index) {
}
Error TypeRecordMapping::visitTypeEnd(CVType &Record) {
- assert(TypeKind.hasValue() && "Not in a type mapping!");
- assert(!MemberKind.hasValue() && "Still in a member mapping!");
+ assert(TypeKind && "Not in a type mapping!");
+ assert(!MemberKind && "Still in a member mapping!");
error(IO.endRecord());
@@ -252,8 +270,8 @@ Error TypeRecordMapping::visitTypeEnd(CVType &Record) {
}
Error TypeRecordMapping::visitMemberBegin(CVMemberRecord &Record) {
- assert(TypeKind.hasValue() && "Not in a type mapping!");
- assert(!MemberKind.hasValue() && "Already in a member mapping!");
+ assert(TypeKind && "Not in a type mapping!");
+ assert(!MemberKind && "Already in a member mapping!");
// The largest possible subrecord is one in which there is a record prefix,
// followed by the subrecord, followed by a continuation, and that entire
@@ -278,8 +296,8 @@ Error TypeRecordMapping::visitMemberBegin(CVMemberRecord &Record) {
}
Error TypeRecordMapping::visitMemberEnd(CVMemberRecord &Record) {
- assert(TypeKind.hasValue() && "Not in a type mapping!");
- assert(MemberKind.hasValue() && "Not in a member mapping!");
+ assert(TypeKind && "Not in a type mapping!");
+ assert(MemberKind && "Not in a member mapping!");
if (IO.isReading()) {
if (auto EC = IO.skipPadding())
diff --git a/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp b/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
index 587a68142a4a..7ddfb7ab2f8d 100644
--- a/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
+++ b/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
@@ -7,8 +7,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/CodeView/TypeStreamMerger.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h"
#include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h"
#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
@@ -487,7 +487,7 @@ Expected<bool> TypeStreamMerger::shouldRemapType(const CVType &Type) {
if (auto EC = TypeDeserializer::deserializeAs(const_cast<CVType &>(Type),
EP))
return joinErrors(std::move(EC), errorCorruptRecord());
- if (PCHSignature.hasValue())
+ if (PCHSignature)
return errorCorruptRecord();
PCHSignature.emplace(EP.getSignature());
return false;
diff --git a/llvm/lib/DebugInfo/CodeView/TypeTableCollection.cpp b/llvm/lib/DebugInfo/CodeView/TypeTableCollection.cpp
index e517e8846d69..910a32730e39 100644
--- a/llvm/lib/DebugInfo/CodeView/TypeTableCollection.cpp
+++ b/llvm/lib/DebugInfo/CodeView/TypeTableCollection.cpp
@@ -8,9 +8,10 @@
#include "llvm/DebugInfo/CodeView/TypeTableCollection.h"
-#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/RecordName.h"
-#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
using namespace llvm::codeview;
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp b/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
index 1be5a752453a..e2ea5910932d 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
@@ -11,10 +11,10 @@
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
#include "llvm/Support/DataExtractor.h"
-#include "llvm/Support/Format.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/raw_ostream.h"
#include <cstddef>
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp b/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
index c77d4d4d989c..5727b3bdb05c 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
@@ -10,7 +10,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/BinaryFormat/Dwarf.h"
-#include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/DJB.h"
#include "llvm/Support/Errc.h"
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFAddressRange.cpp b/llvm/lib/DebugInfo/DWARF/DWARFAddressRange.cpp
index 25d2e852a7fe..2d6c145f9237 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFAddressRange.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFAddressRange.cpp
@@ -7,8 +7,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/DWARF/DWARFAddressRange.h"
+#include "llvm/DebugInfo/DIContext.h"
#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
-#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp
index d68ecd4f8a42..6461f2ac031d 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp
@@ -7,8 +7,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
-#include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
+#include "llvm/DebugInfo/DIContext.h"
#include "llvm/DebugInfo/DWARF/DWARFDie.h"
+
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
index ef50ad53650a..c785026f8461 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
@@ -15,6 +16,7 @@
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h"
#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
+#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugAddr.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h"
@@ -29,7 +31,11 @@
#include "llvm/DebugInfo/DWARF/DWARFDie.h"
#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
#include "llvm/DebugInfo/DWARF/DWARFGdbIndex.h"
+#include "llvm/DebugInfo/DWARF/DWARFListTable.h"
+#include "llvm/DebugInfo/DWARF/DWARFLocationExpression.h"
+#include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
#include "llvm/DebugInfo/DWARF/DWARFSection.h"
+#include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h"
#include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h"
#include "llvm/DebugInfo/DWARF/DWARFVerifier.h"
#include "llvm/MC/MCRegisterInfo.h"
@@ -115,7 +121,7 @@ collectContributionData(DWARFContext::unit_iterator_range Units) {
const Optional<StrOffsetsContributionDescriptor> &R) {
if (L && R)
return L->Base < R->Base;
- return R.hasValue();
+ return R.has_value();
});
// Uniquify contributions, as it is possible that units (specifically
@@ -383,7 +389,7 @@ void DWARFContext::dump(
OS << '\n' << Name << " contents:\n";
if (auto DumpOffset = DumpOffsets[DIDT_ID_DebugInfo])
for (const auto &U : Units)
- U->getDIEForOffset(DumpOffset.getValue())
+ U->getDIEForOffset(*DumpOffset)
.dump(OS, 0, DumpOpts.noImplicitRecursion());
else
for (const auto &U : Units)
@@ -763,6 +769,10 @@ bool DWARFContext::verify(raw_ostream &OS, DIDumpOptions DumpOpts) {
DWARFVerifier verifier(OS, *this, DumpOpts);
Success &= verifier.handleDebugAbbrev();
+ if (DumpOpts.DumpType & DIDT_DebugCUIndex)
+ Success &= verifier.handleDebugCUIndex();
+ if (DumpOpts.DumpType & DIDT_DebugTUIndex)
+ Success &= verifier.handleDebugTUIndex();
if (DumpOpts.DumpType & DIDT_DebugInfo)
Success &= verifier.handleDebugInfo();
if (DumpOpts.DumpType & DIDT_DebugLine)
@@ -993,6 +1003,22 @@ Expected<const DWARFDebugLine::LineTable *> DWARFContext::getLineTableForUnit(
RecoverableErrorHandler);
}
+void DWARFContext::clearLineTableForUnit(DWARFUnit *U) {
+ if (!Line)
+ return;
+
+ auto UnitDIE = U->getUnitDIE();
+ if (!UnitDIE)
+ return;
+
+ auto Offset = toSectionOffset(UnitDIE.find(DW_AT_stmt_list));
+ if (!Offset)
+ return;
+
+ uint64_t stmtOffset = *Offset + U->getLineTableOffset();
+ Line->clearLineTable(stmtOffset);
+}
+
void DWARFContext::parseNormalUnits() {
if (!NormalUnits.empty())
return;
@@ -1027,7 +1053,25 @@ DWARFCompileUnit *DWARFContext::getCompileUnitForAddress(uint64_t Address) {
// First, get the offset of the compile unit.
uint64_t CUOffset = getDebugAranges()->findAddress(Address);
// Retrieve the compile unit.
- return getCompileUnitForOffset(CUOffset);
+ if (DWARFCompileUnit *OffsetCU = getCompileUnitForOffset(CUOffset))
+ return OffsetCU;
+
+ // Global variables are often not found by the above search, for one of two
+ // reasons:
+ // 1. .debug_aranges may not include global variables. On clang, it seems we
+ // put the globals in the aranges, but this isn't true for gcc.
+ // 2. Even if the global variable is in a .debug_arange, global variables
+ // may not be captured in the [start, end) addresses described by the
+ // parent compile unit.
+ //
+ // So, we walk the CU's and their child DI's manually, looking for the
+ // specific global variable.
+ for (std::unique_ptr<DWARFUnit> &CU : compile_units()) {
+ if (DWARFDie Die = CU->getVariableForAddress(Address)) {
+ return static_cast<DWARFCompileUnit *>(CU.get());
+ }
+ }
+ return nullptr;
}
DWARFContext::DIEsForAddress DWARFContext::getDIEsForAddress(uint64_t Address) {
@@ -1097,64 +1141,6 @@ static bool getFunctionNameAndStartLineForAddress(
return FoundResult;
}
-static Optional<uint64_t> getTypeSize(DWARFDie Type, uint64_t PointerSize) {
- if (auto SizeAttr = Type.find(DW_AT_byte_size))
- if (Optional<uint64_t> Size = SizeAttr->getAsUnsignedConstant())
- return Size;
-
- switch (Type.getTag()) {
- case DW_TAG_pointer_type:
- case DW_TAG_reference_type:
- case DW_TAG_rvalue_reference_type:
- return PointerSize;
- case DW_TAG_ptr_to_member_type: {
- if (DWARFDie BaseType = Type.getAttributeValueAsReferencedDie(DW_AT_type))
- if (BaseType.getTag() == DW_TAG_subroutine_type)
- return 2 * PointerSize;
- return PointerSize;
- }
- case DW_TAG_const_type:
- case DW_TAG_immutable_type:
- case DW_TAG_volatile_type:
- case DW_TAG_restrict_type:
- case DW_TAG_typedef: {
- if (DWARFDie BaseType = Type.getAttributeValueAsReferencedDie(DW_AT_type))
- return getTypeSize(BaseType, PointerSize);
- break;
- }
- case DW_TAG_array_type: {
- DWARFDie BaseType = Type.getAttributeValueAsReferencedDie(DW_AT_type);
- if (!BaseType)
- return Optional<uint64_t>();
- Optional<uint64_t> BaseSize = getTypeSize(BaseType, PointerSize);
- if (!BaseSize)
- return Optional<uint64_t>();
- uint64_t Size = *BaseSize;
- for (DWARFDie Child : Type) {
- if (Child.getTag() != DW_TAG_subrange_type)
- continue;
-
- if (auto ElemCountAttr = Child.find(DW_AT_count))
- if (Optional<uint64_t> ElemCount =
- ElemCountAttr->getAsUnsignedConstant())
- Size *= *ElemCount;
- if (auto UpperBoundAttr = Child.find(DW_AT_upper_bound))
- if (Optional<int64_t> UpperBound =
- UpperBoundAttr->getAsSignedConstant()) {
- int64_t LowerBound = 0;
- if (auto LowerBoundAttr = Child.find(DW_AT_lower_bound))
- LowerBound = LowerBoundAttr->getAsSignedConstant().getValueOr(0);
- Size *= *UpperBound - LowerBound + 1;
- }
- }
- return Size;
- }
- default:
- break;
- }
- return Optional<uint64_t>();
-}
-
static Optional<int64_t>
getExpressionFrameOffset(ArrayRef<uint8_t> Expr,
Optional<unsigned> FrameBaseReg) {
@@ -1215,7 +1201,7 @@ void DWARFContext::addLocalsForDie(DWARFCompileUnit *CU, DWARFDie Subprogram,
if (Optional<const char *> Name = dwarf::toString(*NameAttr))
Local.Name = *Name;
if (auto Type = Die.getAttributeValueAsReferencedDie(DW_AT_type))
- Local.Size = getTypeSize(Type, getCUAddrSize());
+ Local.Size = Type.getTypeSize(getCUAddrSize());
if (auto DeclFileAttr = Die.find(DW_AT_decl_file)) {
if (const auto *LT = CU->getContext().getLineTableForUnit(CU))
LT->getFileNameByIndex(
@@ -1256,7 +1242,6 @@ DWARFContext::getLocalsForAddress(object::SectionedAddress Address) {
DILineInfo DWARFContext::getLineInfoForAddress(object::SectionedAddress Address,
DILineInfoSpecifier Spec) {
DILineInfo Result;
-
DWARFCompileUnit *CU = getCompileUnitForAddress(Address.Address);
if (!CU)
return Result;
@@ -1271,6 +1256,22 @@ DILineInfo DWARFContext::getLineInfoForAddress(object::SectionedAddress Address,
Spec.FLIKind, Result);
}
}
+
+ return Result;
+}
+
+DILineInfo
+DWARFContext::getLineInfoForDataAddress(object::SectionedAddress Address) {
+ DILineInfo Result;
+ DWARFCompileUnit *CU = getCompileUnitForAddress(Address.Address);
+ if (!CU)
+ return Result;
+
+ if (DWARFDie Die = CU->getVariableForAddress(Address.Address)) {
+ Result.FileName = Die.getDeclFile(FileLineInfoKind::AbsoluteFilePath);
+ Result.Line = Die.getDeclLine();
+ }
+
return Result;
}
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp
index da6f6ad903f4..b18b64382b41 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp
@@ -7,7 +7,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
-#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFObject.h"
+#include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
+#include "llvm/Support/Errc.h"
using namespace llvm;
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp
index 5b1c62e6a259..81fac4763ec1 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp
@@ -9,6 +9,7 @@
#include "llvm/DebugInfo/DWARF/DWARFDebugAddr.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/Support/Errc.h"
using namespace llvm;
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp
index 1a1b8ea0976f..49ee27db6d54 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp
@@ -7,10 +7,12 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/DWARF/DWARFDebugAranges.h"
-#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
+#include "llvm/DebugInfo/DWARF/DWARFAddressRange.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h"
-#include "llvm/Support/DataExtractor.h"
+#include "llvm/DebugInfo/DWARF/DWARFObject.h"
+#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -20,15 +22,15 @@ using namespace llvm;
void DWARFDebugAranges::extract(
DWARFDataExtractor DebugArangesData,
- function_ref<void(Error)> RecoverableErrorHandler) {
+ function_ref<void(Error)> RecoverableErrorHandler,
+ function_ref<void(Error)> WarningHandler) {
if (!DebugArangesData.isValidOffset(0))
return;
uint64_t Offset = 0;
DWARFDebugArangeSet Set;
while (DebugArangesData.isValidOffset(Offset)) {
- if (Error E =
- Set.extract(DebugArangesData, &Offset, RecoverableErrorHandler)) {
+ if (Error E = Set.extract(DebugArangesData, &Offset, WarningHandler)) {
RecoverableErrorHandler(std::move(E));
return;
}
@@ -50,7 +52,8 @@ void DWARFDebugAranges::generate(DWARFContext *CTX) {
// Extract aranges from .debug_aranges section.
DWARFDataExtractor ArangesData(CTX->getDWARFObj().getArangesSection(),
CTX->isLittleEndian(), 0);
- extract(ArangesData, CTX->getRecoverableErrorHandler());
+ extract(ArangesData, CTX->getRecoverableErrorHandler(),
+ CTX->getWarningHandler());
// Generate aranges from DIEs: even if .debug_aranges section is present,
// it may describe only a small subset of compilation units, so we need to
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
index 92a461dbd941..cf9057c99dbd 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
@@ -12,8 +12,9 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/DebugInfo/DIContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/DataExtractor.h"
#include "llvm/Support/Errc.h"
@@ -1100,8 +1101,8 @@ Error DWARFDebugFrame::parse(DWARFDataExtractor Data) {
default:
return createStringError(
errc::invalid_argument,
- "unknown augmentation character in entry at 0x%" PRIx64,
- StartOffset);
+ "unknown augmentation character %c in entry at 0x%" PRIx64,
+ AugmentationString[i], StartOffset);
case 'L':
LSDAPointerEncoding = Data.getU8(&Offset);
break;
@@ -1137,10 +1138,14 @@ Error DWARFDebugFrame::parse(DWARFDataExtractor Data) {
// B-Key is used for signing functions associated with this
// augmentation string
break;
+ // This stack frame contains MTE tagged data, so needs to be
+ // untagged on unwind.
+ case 'G':
+ break;
}
}
- if (AugmentationLength.hasValue()) {
+ if (AugmentationLength) {
if (Offset != EndAugmentationOffset)
return createStringError(errc::invalid_argument,
"parsing augmentation data at 0x%" PRIx64
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
index 385bde51e2e7..7dbeebc2770f 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
@@ -9,10 +9,11 @@
#include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h"
#include "llvm/ADT/Optional.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
-#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Errc.h"
#include <cstddef>
#include <cstdint>
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index f36d3f87257a..2e0780e249aa 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -12,12 +12,12 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
+#include "llvm/DebugInfo/DWARF/DWARFDie.h"
#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
-#include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/FormatVariadic.h"
-#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
@@ -29,6 +29,10 @@
using namespace llvm;
using namespace dwarf;
+namespace llvm {
+class DwarfContext;
+}
+
using FileLineInfoKind = DILineInfoSpecifier::FileLineInfoKind;
namespace {
@@ -337,7 +341,7 @@ parseV5DirFileTables(const DWARFDataExtractor &DebugLineData,
errc::invalid_argument,
"failed to parse file entry because the MD5 hash is invalid");
std::uninitialized_copy_n(Value.getAsBlock().getValue().begin(), 16,
- FileEntry.Checksum.Bytes.begin());
+ FileEntry.Checksum.begin());
break;
default:
break;
@@ -597,6 +601,10 @@ Expected<const DWARFDebugLine::LineTable *> DWARFDebugLine::getOrParseLineTable(
return LT;
}
+void DWARFDebugLine::clearLineTable(uint64_t Offset) {
+ LineTableMap.erase(Offset);
+}
+
static StringRef getOpcodeName(uint8_t Opcode, uint8_t OpcodeBase) {
assert(Opcode != 0);
if (Opcode < OpcodeBase)
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
index f39c7871d603..b68af4cfafef 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
@@ -9,13 +9,13 @@
#include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/Dwarf.h"
-#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/DebugInfo/DIContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFAddressRange.h"
#include "llvm/DebugInfo/DWARF/DWARFExpression.h"
-#include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
+#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
+#include "llvm/DebugInfo/DWARF/DWARFLocationExpression.h"
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
-#include "llvm/Support/Compiler.h"
#include "llvm/Support/Format.h"
-#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cinttypes>
@@ -24,6 +24,10 @@
using namespace llvm;
using object::SectionedAddress;
+namespace llvm {
+class DWARFObject;
+}
+
namespace {
class DWARFLocationInterpreter {
Optional<object::SectionedAddress> Base;
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp
index 7a81d7ff064b..80daea64814a 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp
@@ -7,9 +7,12 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/DWARF/DWARFDebugMacro.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/BinaryFormat/Dwarf.h"
-#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
+#include "llvm/DebugInfo/DWARF/DWARFDie.h"
+#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
+#include "llvm/Support/Errc.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdint>
@@ -112,7 +115,7 @@ Error DWARFDebugMacro::parseImpl(
if (IsMacro && Data.isValidOffset(Offset)) {
// Keep a mapping from Macro contribution to CUs, this will
// be needed while retrieving macro from DW_MACRO_define_strx form.
- for (const auto &U : Units.getValue())
+ for (const auto &U : *Units)
if (auto CUDIE = U->getUnitDIE())
// Skip units which does not contibutes to macro section.
if (auto MacroOffset = toSectionOffset(CUDIE.find(DW_AT_macros)))
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
index ec7889a3728a..96c546250974 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
@@ -14,19 +14,20 @@
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
-#include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
#include "llvm/DebugInfo/DWARF/DWARFExpression.h"
#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
+#include "llvm/DebugInfo/DWARF/DWARFTypePrinter.h"
+#include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h"
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/DataExtractor.h"
#include "llvm/Support/Format.h"
-#include "llvm/Support/FormatAdapters.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
#include <cassert>
#include <cinttypes>
#include <cstdint>
@@ -106,586 +107,10 @@ static void dumpLocationExpr(raw_ostream &OS, const DWARFFormValue &FormValue,
.print(OS, DumpOpts, MRI, U);
}
-static DWARFDie resolveReferencedType(DWARFDie D,
- dwarf::Attribute Attr = DW_AT_type) {
- return D.getAttributeValueAsReferencedDie(Attr).resolveTypeUnitReference();
-}
static DWARFDie resolveReferencedType(DWARFDie D, DWARFFormValue F) {
return D.getAttributeValueAsReferencedDie(F).resolveTypeUnitReference();
}
-namespace {
-
-// FIXME: We should have pretty printers per language. Currently we print
-// everything as if it was C++ and fall back to the TAG type name.
-struct DWARFTypePrinter {
- raw_ostream &OS;
- bool Word = true;
- bool EndedWithTemplate = false;
-
- DWARFTypePrinter(raw_ostream &OS) : OS(OS) {}
-
- /// Dump the name encoded in the type tag.
- void appendTypeTagName(dwarf::Tag T) {
- StringRef TagStr = TagString(T);
- static constexpr StringRef Prefix = "DW_TAG_";
- static constexpr StringRef Suffix = "_type";
- if (!TagStr.startswith(Prefix) || !TagStr.endswith(Suffix))
- return;
- OS << TagStr.substr(Prefix.size(),
- TagStr.size() - (Prefix.size() + Suffix.size()))
- << " ";
- }
-
- void appendArrayType(const DWARFDie &D) {
- for (const DWARFDie &C : D.children()) {
- if (C.getTag() != DW_TAG_subrange_type)
- continue;
- Optional<uint64_t> LB;
- Optional<uint64_t> Count;
- Optional<uint64_t> UB;
- Optional<unsigned> DefaultLB;
- if (Optional<DWARFFormValue> L = C.find(DW_AT_lower_bound))
- LB = L->getAsUnsignedConstant();
- if (Optional<DWARFFormValue> CountV = C.find(DW_AT_count))
- Count = CountV->getAsUnsignedConstant();
- if (Optional<DWARFFormValue> UpperV = C.find(DW_AT_upper_bound))
- UB = UpperV->getAsUnsignedConstant();
- if (Optional<DWARFFormValue> LV =
- D.getDwarfUnit()->getUnitDIE().find(DW_AT_language))
- if (Optional<uint64_t> LC = LV->getAsUnsignedConstant())
- if ((DefaultLB =
- LanguageLowerBound(static_cast<dwarf::SourceLanguage>(*LC))))
- if (LB && *LB == *DefaultLB)
- LB = None;
- if (!LB && !Count && !UB)
- OS << "[]";
- else if (!LB && (Count || UB) && DefaultLB)
- OS << '[' << (Count ? *Count : *UB - *DefaultLB + 1) << ']';
- else {
- OS << "[[";
- if (LB)
- OS << *LB;
- else
- OS << '?';
- OS << ", ";
- if (Count)
- if (LB)
- OS << *LB + *Count;
- else
- OS << "? + " << *Count;
- else if (UB)
- OS << *UB + 1;
- else
- OS << '?';
- OS << ")]";
- }
- }
- EndedWithTemplate = false;
- }
-
- DWARFDie skipQualifiers(DWARFDie D) {
- while (D && (D.getTag() == DW_TAG_const_type ||
- D.getTag() == DW_TAG_volatile_type))
- D = resolveReferencedType(D);
- return D;
- }
-
- bool needsParens(DWARFDie D) {
- D = skipQualifiers(D);
- return D && (D.getTag() == DW_TAG_subroutine_type || D.getTag() == DW_TAG_array_type);
- }
-
- void appendPointerLikeTypeBefore(DWARFDie D, DWARFDie Inner, StringRef Ptr) {
- appendQualifiedNameBefore(Inner);
- if (Word)
- OS << ' ';
- if (needsParens(Inner))
- OS << '(';
- OS << Ptr;
- Word = false;
- EndedWithTemplate = false;
- }
-
- DWARFDie
- appendUnqualifiedNameBefore(DWARFDie D,
- std::string *OriginalFullName = nullptr) {
- Word = true;
- if (!D) {
- OS << "void";
- return DWARFDie();
- }
- DWARFDie InnerDIE;
- auto Inner = [&] { return InnerDIE = resolveReferencedType(D); };
- const dwarf::Tag T = D.getTag();
- switch (T) {
- case DW_TAG_pointer_type: {
- appendPointerLikeTypeBefore(D, Inner(), "*");
- break;
- }
- case DW_TAG_subroutine_type: {
- appendQualifiedNameBefore(Inner());
- if (Word) {
- OS << ' ';
- }
- Word = false;
- break;
- }
- case DW_TAG_array_type: {
- appendQualifiedNameBefore(Inner());
- break;
- }
- case DW_TAG_reference_type:
- appendPointerLikeTypeBefore(D, Inner(), "&");
- break;
- case DW_TAG_rvalue_reference_type:
- appendPointerLikeTypeBefore(D, Inner(), "&&");
- break;
- case DW_TAG_ptr_to_member_type: {
- appendQualifiedNameBefore(Inner());
- if (needsParens(InnerDIE))
- OS << '(';
- else if (Word)
- OS << ' ';
- if (DWARFDie Cont = resolveReferencedType(D, DW_AT_containing_type)) {
- appendQualifiedName(Cont);
- OS << "::";
- }
- OS << "*";
- Word = false;
- break;
- }
- case DW_TAG_const_type:
- case DW_TAG_volatile_type:
- appendConstVolatileQualifierBefore(D);
- break;
- case DW_TAG_namespace: {
- if (const char *Name = dwarf::toString(D.find(DW_AT_name), nullptr))
- OS << Name;
- else
- OS << "(anonymous namespace)";
- break;
- }
- case DW_TAG_unspecified_type: {
- StringRef TypeName = D.getShortName();
- if (TypeName == "decltype(nullptr)")
- TypeName = "std::nullptr_t";
- Word = true;
- OS << TypeName;
- EndedWithTemplate = false;
- break;
- }
- /*
- case DW_TAG_structure_type:
- case DW_TAG_class_type:
- case DW_TAG_enumeration_type:
- case DW_TAG_base_type:
- */
- default: {
- const char *NamePtr = dwarf::toString(D.find(DW_AT_name), nullptr);
- if (!NamePtr) {
- appendTypeTagName(D.getTag());
- return DWARFDie();
- }
- Word = true;
- StringRef Name = NamePtr;
- static constexpr StringRef MangledPrefix = "_STN";
- if (Name.startswith(MangledPrefix)) {
- Name = Name.drop_front(MangledPrefix.size());
- auto Separator = Name.find('|');
- assert(Separator != StringRef::npos);
- StringRef BaseName = Name.substr(0, Separator);
- StringRef TemplateArgs = Name.substr(Separator + 1);
- if (OriginalFullName)
- *OriginalFullName = (BaseName + TemplateArgs).str();
- Name = BaseName;
- } else
- EndedWithTemplate = Name.endswith(">");
- OS << Name;
- // This check would be insufficient for operator overloads like
- // "operator>>" - but for now Clang doesn't try to simplify them, so this
- // is OK. Add more nuanced operator overload handling here if/when needed.
- if (Name.endswith(">"))
- break;
- if (!appendTemplateParameters(D))
- break;
-
- if (EndedWithTemplate)
- OS << ' ';
- OS << '>';
- EndedWithTemplate = true;
- Word = true;
- break;
- }
- }
- return InnerDIE;
- }
-
- void appendUnqualifiedNameAfter(DWARFDie D, DWARFDie Inner,
- bool SkipFirstParamIfArtificial = false) {
- if (!D)
- return;
- switch (D.getTag()) {
- case DW_TAG_subroutine_type: {
- appendSubroutineNameAfter(D, Inner, SkipFirstParamIfArtificial, false,
- false);
- break;
- }
- case DW_TAG_array_type: {
- appendArrayType(D);
- break;
- }
- case DW_TAG_const_type:
- case DW_TAG_volatile_type:
- appendConstVolatileQualifierAfter(D);
- break;
- case DW_TAG_ptr_to_member_type:
- case DW_TAG_reference_type:
- case DW_TAG_rvalue_reference_type:
- case DW_TAG_pointer_type: {
- if (needsParens(Inner))
- OS << ')';
- appendUnqualifiedNameAfter(Inner, resolveReferencedType(Inner),
- /*SkipFirstParamIfArtificial=*/D.getTag() ==
- DW_TAG_ptr_to_member_type);
- break;
- }
- /*
- case DW_TAG_structure_type:
- case DW_TAG_class_type:
- case DW_TAG_enumeration_type:
- case DW_TAG_base_type:
- case DW_TAG_namespace:
- */
- default:
- break;
- }
- }
-
- void appendQualifiedName(DWARFDie D) {
- if (D)
- appendScopes(D.getParent());
- appendUnqualifiedName(D);
- }
- DWARFDie appendQualifiedNameBefore(DWARFDie D) {
- if (D)
- appendScopes(D.getParent());
- return appendUnqualifiedNameBefore(D);
- }
- bool appendTemplateParameters(DWARFDie D, bool *FirstParameter = nullptr) {
- bool FirstParameterValue = true;
- bool IsTemplate = false;
- if (!FirstParameter)
- FirstParameter = &FirstParameterValue;
- for (const DWARFDie &C : D) {
- auto Sep = [&] {
- if (*FirstParameter)
- OS << '<';
- else
- OS << ", ";
- IsTemplate = true;
- EndedWithTemplate = false;
- *FirstParameter = false;
- };
- if (C.getTag() == dwarf::DW_TAG_GNU_template_parameter_pack) {
- IsTemplate = true;
- appendTemplateParameters(C, FirstParameter);
- }
- if (C.getTag() == dwarf::DW_TAG_template_value_parameter) {
- DWARFDie T = resolveReferencedType(C);
- Sep();
- if (T.getTag() == DW_TAG_enumeration_type) {
- auto V = C.find(DW_AT_const_value);
- bool FoundEnumerator = false;
- for (const DWARFDie &Enumerator : T) {
- auto EV = Enumerator.find(DW_AT_const_value);
- if (V && EV &&
- V->getAsSignedConstant() == EV->getAsSignedConstant()) {
- if (T.find(DW_AT_enum_class)) {
- appendQualifiedName(T);
- OS << "::";
- } else
- appendScopes(T.getParent());
- OS << Enumerator.getShortName();
- FoundEnumerator = true;
- break;
- }
- }
- if (FoundEnumerator)
- continue;
- OS << '(';
- appendQualifiedName(T);
- OS << ')';
- OS << to_string(*V->getAsSignedConstant());
- continue;
- }
- // /Maybe/ we could do pointer type parameters, looking for the
- // symbol in the ELF symbol table to get back to the variable...
- // but probably not worth it.
- if (T.getTag() == DW_TAG_pointer_type)
- continue;
- const char *RawName = dwarf::toString(T.find(DW_AT_name), nullptr);
- assert(RawName);
- StringRef Name = RawName;
- auto V = C.find(DW_AT_const_value);
- bool IsQualifiedChar = false;
- if (Name == "bool") {
- OS << (*V->getAsUnsignedConstant() ? "true" : "false");
- } else if (Name == "short") {
- OS << "(short)";
- OS << to_string(*V->getAsSignedConstant());
- } else if (Name == "unsigned short") {
- OS << "(unsigned short)";
- OS << to_string(*V->getAsSignedConstant());
- } else if (Name == "int")
- OS << to_string(*V->getAsSignedConstant());
- else if (Name == "long") {
- OS << to_string(*V->getAsSignedConstant());
- OS << "L";
- } else if (Name == "long long") {
- OS << to_string(*V->getAsSignedConstant());
- OS << "LL";
- } else if (Name == "unsigned int") {
- OS << to_string(*V->getAsUnsignedConstant());
- OS << "U";
- } else if (Name == "unsigned long") {
- OS << to_string(*V->getAsUnsignedConstant());
- OS << "UL";
- } else if (Name == "unsigned long long") {
- OS << to_string(*V->getAsUnsignedConstant());
- OS << "ULL";
- } else if (Name == "char" ||
- (IsQualifiedChar =
- (Name == "unsigned char" || Name == "signed char"))) {
- // FIXME: check T's DW_AT_type to see if it's signed or not (since
- // char signedness is implementation defined).
- auto Val = *V->getAsSignedConstant();
- // Copied/hacked up from Clang's CharacterLiteral::print - incomplete
- // (doesn't actually support different character types/widths, sign
- // handling's not done, and doesn't correctly test if a character is
- // printable or needs to use a numeric escape sequence instead)
- if (IsQualifiedChar) {
- OS << '(';
- OS << Name;
- OS << ')';
- }
- switch (Val) {
- case '\\':
- OS << "'\\\\'";
- break;
- case '\'':
- OS << "'\\''";
- break;
- case '\a':
- // TODO: K&R: the meaning of '\\a' is different in traditional C
- OS << "'\\a'";
- break;
- case '\b':
- OS << "'\\b'";
- break;
- case '\f':
- OS << "'\\f'";
- break;
- case '\n':
- OS << "'\\n'";
- break;
- case '\r':
- OS << "'\\r'";
- break;
- case '\t':
- OS << "'\\t'";
- break;
- case '\v':
- OS << "'\\v'";
- break;
- default:
- if ((Val & ~0xFFu) == ~0xFFu)
- Val &= 0xFFu;
- if (Val < 127 && Val >= 32) {
- OS << "'";
- OS << (char)Val;
- OS << "'";
- } else if (Val < 256)
- OS << to_string(llvm::format("'\\x%02x'", Val));
- else if (Val <= 0xFFFF)
- OS << to_string(llvm::format("'\\u%04x'", Val));
- else
- OS << to_string(llvm::format("'\\U%08x'", Val));
- }
- }
- continue;
- }
- if (C.getTag() == dwarf::DW_TAG_GNU_template_template_param) {
- const char *RawName =
- dwarf::toString(C.find(DW_AT_GNU_template_name), nullptr);
- assert(RawName);
- StringRef Name = RawName;
- Sep();
- OS << Name;
- continue;
- }
- if (C.getTag() != dwarf::DW_TAG_template_type_parameter)
- continue;
- auto TypeAttr = C.find(DW_AT_type);
- Sep();
- appendQualifiedName(TypeAttr ? resolveReferencedType(C, *TypeAttr)
- : DWARFDie());
- }
- if (IsTemplate && *FirstParameter && FirstParameter == &FirstParameterValue)
- OS << '<';
- return IsTemplate;
- }
- void decomposeConstVolatile(DWARFDie &N, DWARFDie &T, DWARFDie &C,
- DWARFDie &V) {
- (N.getTag() == DW_TAG_const_type ? C : V) = N;
- T = resolveReferencedType(N);
- if (T) {
- auto Tag = T.getTag();
- if (Tag == DW_TAG_const_type) {
- C = T;
- T = resolveReferencedType(T);
- } else if (Tag == DW_TAG_volatile_type) {
- V = T;
- T = resolveReferencedType(T);
- }
- }
- }
- void appendConstVolatileQualifierAfter(DWARFDie N) {
- DWARFDie C;
- DWARFDie V;
- DWARFDie T;
- decomposeConstVolatile(N, T, C, V);
- if (T && T.getTag() == DW_TAG_subroutine_type)
- appendSubroutineNameAfter(T, resolveReferencedType(T), false, C.isValid(),
- V.isValid());
- else
- appendUnqualifiedNameAfter(T, resolveReferencedType(T));
- }
- void appendConstVolatileQualifierBefore(DWARFDie N) {
- DWARFDie C;
- DWARFDie V;
- DWARFDie T;
- decomposeConstVolatile(N, T, C, V);
- bool Subroutine = T && T.getTag() == DW_TAG_subroutine_type;
- DWARFDie A = T;
- while (A && A.getTag() == DW_TAG_array_type)
- A = resolveReferencedType(A);
- bool Leading =
- (!A || (A.getTag() != DW_TAG_pointer_type &&
- A.getTag() != llvm::dwarf::DW_TAG_ptr_to_member_type)) &&
- !Subroutine;
- if (Leading) {
- if (C)
- OS << "const ";
- if (V)
- OS << "volatile ";
- }
- appendQualifiedNameBefore(T);
- if (!Leading && !Subroutine) {
- Word = true;
- if (C)
- OS << "const";
- if (V) {
- if (C)
- OS << ' ';
- OS << "volatile";
- }
- }
- }
-
- /// Recursively append the DIE type name when applicable.
- void appendUnqualifiedName(DWARFDie D,
- std::string *OriginalFullName = nullptr) {
- // FIXME: We should have pretty printers per language. Currently we print
- // everything as if it was C++ and fall back to the TAG type name.
- DWARFDie Inner = appendUnqualifiedNameBefore(D, OriginalFullName);
- appendUnqualifiedNameAfter(D, Inner);
- }
-
- void appendSubroutineNameAfter(DWARFDie D, DWARFDie Inner,
- bool SkipFirstParamIfArtificial, bool Const,
- bool Volatile) {
- DWARFDie FirstParamIfArtificial;
- OS << '(';
- EndedWithTemplate = false;
- bool First = true;
- bool RealFirst = true;
- for (DWARFDie P : D) {
- if (P.getTag() != DW_TAG_formal_parameter &&
- P.getTag() != DW_TAG_unspecified_parameters)
- return;
- DWARFDie T = resolveReferencedType(P);
- if (SkipFirstParamIfArtificial && RealFirst && P.find(DW_AT_artificial)) {
- FirstParamIfArtificial = T;
- RealFirst = false;
- continue;
- }
- if (!First) {
- OS << ", ";
- }
- First = false;
- if (P.getTag() == DW_TAG_unspecified_parameters)
- OS << "...";
- else
- appendQualifiedName(T);
- }
- EndedWithTemplate = false;
- OS << ')';
- if (FirstParamIfArtificial) {
- if (DWARFDie P = FirstParamIfArtificial) {
- if (P.getTag() == DW_TAG_pointer_type) {
- DWARFDie C;
- DWARFDie V;
- auto CVStep = [&](DWARFDie CV) {
- if (DWARFDie U = resolveReferencedType(CV)) {
- if (U.getTag() == DW_TAG_const_type)
- return C = U;
- if (U.getTag() == DW_TAG_volatile_type)
- return V = U;
- }
- return DWARFDie();
- };
- if (DWARFDie CV = CVStep(P)) {
- CVStep(CV);
- }
- if (C)
- OS << " const";
- if (V)
- OS << " volatile";
- }
- }
- } else {
- if (Const)
- OS << " const";
- if (Volatile)
- OS << " volatile";
- }
- if (D.find(DW_AT_reference))
- OS << " &";
- if (D.find(DW_AT_rvalue_reference))
- OS << " &&";
- appendUnqualifiedNameAfter(Inner, resolveReferencedType(Inner));
- }
- void appendScopes(DWARFDie D) {
- if (D.getTag() == DW_TAG_compile_unit)
- return;
- if (D.getTag() == DW_TAG_type_unit)
- return;
- if (D.getTag() == DW_TAG_skeleton_unit)
- return;
- if (D.getTag() == DW_TAG_subprogram)
- return;
- if (D.getTag() == DW_TAG_lexical_block)
- return;
- D = D.resolveTypeUnitReference();
- if (DWARFDie P = D.getParent())
- appendScopes(P);
- appendUnqualifiedName(D);
- OS << "::";
- }
-};
-} // anonymous namespace
-
static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die,
const DWARFAttribute &AttrValue, unsigned Indent,
DIDumpOptions DumpOpts) {
@@ -713,8 +138,7 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die,
Color = HighlightColor::String;
if (const auto *LT = U->getContext().getLineTableForUnit(U))
if (LT->getFileNameByIndex(
- FormValue.getAsUnsignedConstant().getValue(),
- U->getCompilationDir(),
+ *FormValue.getAsUnsignedConstant(), U->getCompilationDir(),
DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, File)) {
File = '"' + File + '"';
Name = File;
@@ -768,7 +192,7 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die,
Die.getAttributeValueAsReferencedDie(FormValue).getName(
DINameKind::LinkageName))
OS << Space << "\"" << Name << '\"';
- } else if (Attr == DW_AT_type) {
+ } else if (Attr == DW_AT_type || Attr == DW_AT_containing_type) {
DWARFDie D = resolveReferencedType(Die, FormValue);
if (D && !D.isNULL()) {
OS << Space << "\"";
@@ -1061,6 +485,66 @@ void DWARFDie::getCallerFrame(uint32_t &CallFile, uint32_t &CallLine,
CallDiscriminator = toUnsigned(find(DW_AT_GNU_discriminator), 0);
}
+Optional<uint64_t> DWARFDie::getTypeSize(uint64_t PointerSize) {
+ if (auto SizeAttr = find(DW_AT_byte_size))
+ if (Optional<uint64_t> Size = SizeAttr->getAsUnsignedConstant())
+ return Size;
+
+ switch (getTag()) {
+ case DW_TAG_pointer_type:
+ case DW_TAG_reference_type:
+ case DW_TAG_rvalue_reference_type:
+ return PointerSize;
+ case DW_TAG_ptr_to_member_type: {
+ if (DWARFDie BaseType = getAttributeValueAsReferencedDie(DW_AT_type))
+ if (BaseType.getTag() == DW_TAG_subroutine_type)
+ return 2 * PointerSize;
+ return PointerSize;
+ }
+ case DW_TAG_const_type:
+ case DW_TAG_immutable_type:
+ case DW_TAG_volatile_type:
+ case DW_TAG_restrict_type:
+ case DW_TAG_typedef: {
+ if (DWARFDie BaseType = getAttributeValueAsReferencedDie(DW_AT_type))
+ return BaseType.getTypeSize(PointerSize);
+ break;
+ }
+ case DW_TAG_array_type: {
+ DWARFDie BaseType = getAttributeValueAsReferencedDie(DW_AT_type);
+ if (!BaseType)
+ return None;
+ Optional<uint64_t> BaseSize = BaseType.getTypeSize(PointerSize);
+ if (!BaseSize)
+ return None;
+ uint64_t Size = *BaseSize;
+ for (DWARFDie Child : *this) {
+ if (Child.getTag() != DW_TAG_subrange_type)
+ continue;
+
+ if (auto ElemCountAttr = Child.find(DW_AT_count))
+ if (Optional<uint64_t> ElemCount =
+ ElemCountAttr->getAsUnsignedConstant())
+ Size *= *ElemCount;
+ if (auto UpperBoundAttr = Child.find(DW_AT_upper_bound))
+ if (Optional<int64_t> UpperBound =
+ UpperBoundAttr->getAsSignedConstant()) {
+ int64_t LowerBound = 0;
+ if (auto LowerBoundAttr = Child.find(DW_AT_lower_bound))
+ LowerBound = LowerBoundAttr->getAsSignedConstant().value_or(0);
+ Size *= *UpperBound - LowerBound + 1;
+ }
+ }
+ return Size;
+ }
+ default:
+ if (DWARFDie BaseType = getAttributeValueAsReferencedDie(DW_AT_type))
+ return BaseType.getTypeSize(PointerSize);
+ break;
+ }
+ return None;
+}
+
/// Helper to dump a DIE with all of its parents, but no siblings.
static unsigned dumpParentChain(DWARFDie Die, raw_ostream &OS, unsigned Indent,
DIDumpOptions DumpOpts, unsigned Depth = 0) {
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
index 86991a3949dd..1fecd5ee6902 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
@@ -13,7 +13,10 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
-#include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
+#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
+#include "llvm/DebugInfo/DWARF/DWARFObject.h"
+#include "llvm/DebugInfo/DWARF/DWARFSection.h"
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp b/llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp
index ace7000f07b2..3f140d21c53c 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp
@@ -9,10 +9,10 @@
#include "llvm/DebugInfo/DWARF/DWARFGdbIndex.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DataExtractor.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
#include <cassert>
#include <cinttypes>
#include <cstdint>
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFTypePrinter.cpp b/llvm/lib/DebugInfo/DWARF/DWARFTypePrinter.cpp
new file mode 100644
index 000000000000..86cc07b0d0f2
--- /dev/null
+++ b/llvm/lib/DebugInfo/DWARF/DWARFTypePrinter.cpp
@@ -0,0 +1,608 @@
+#include "llvm/DebugInfo/DWARF/DWARFTypePrinter.h"
+#include "llvm/DebugInfo/DWARF/DWARFDie.h"
+#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
+#include "llvm/Support/ScopedPrinter.h"
+namespace llvm {
+using namespace dwarf;
+void DWARFTypePrinter::appendTypeTagName(dwarf::Tag T) {
+ StringRef TagStr = TagString(T);
+ static constexpr StringRef Prefix = "DW_TAG_";
+ static constexpr StringRef Suffix = "_type";
+ if (!TagStr.startswith(Prefix) || !TagStr.endswith(Suffix))
+ return;
+ OS << TagStr.substr(Prefix.size(),
+ TagStr.size() - (Prefix.size() + Suffix.size()))
+ << " ";
+}
+
+void DWARFTypePrinter::appendArrayType(const DWARFDie &D) {
+ for (const DWARFDie &C : D.children()) {
+ if (C.getTag() != DW_TAG_subrange_type)
+ continue;
+ Optional<uint64_t> LB;
+ Optional<uint64_t> Count;
+ Optional<uint64_t> UB;
+ Optional<unsigned> DefaultLB;
+ if (Optional<DWARFFormValue> L = C.find(DW_AT_lower_bound))
+ LB = L->getAsUnsignedConstant();
+ if (Optional<DWARFFormValue> CountV = C.find(DW_AT_count))
+ Count = CountV->getAsUnsignedConstant();
+ if (Optional<DWARFFormValue> UpperV = C.find(DW_AT_upper_bound))
+ UB = UpperV->getAsUnsignedConstant();
+ if (Optional<DWARFFormValue> LV =
+ D.getDwarfUnit()->getUnitDIE().find(DW_AT_language))
+ if (Optional<uint64_t> LC = LV->getAsUnsignedConstant())
+ if ((DefaultLB =
+ LanguageLowerBound(static_cast<dwarf::SourceLanguage>(*LC))))
+ if (LB && *LB == *DefaultLB)
+ LB = None;
+ if (!LB && !Count && !UB)
+ OS << "[]";
+ else if (!LB && (Count || UB) && DefaultLB)
+ OS << '[' << (Count ? *Count : *UB - *DefaultLB + 1) << ']';
+ else {
+ OS << "[[";
+ if (LB)
+ OS << *LB;
+ else
+ OS << '?';
+ OS << ", ";
+ if (Count)
+ if (LB)
+ OS << *LB + *Count;
+ else
+ OS << "? + " << *Count;
+ else if (UB)
+ OS << *UB + 1;
+ else
+ OS << '?';
+ OS << ")]";
+ }
+ }
+ EndedWithTemplate = false;
+}
+
+static DWARFDie resolveReferencedType(DWARFDie D,
+ dwarf::Attribute Attr = DW_AT_type) {
+ return D.getAttributeValueAsReferencedDie(Attr).resolveTypeUnitReference();
+}
+static DWARFDie resolveReferencedType(DWARFDie D, DWARFFormValue F) {
+ return D.getAttributeValueAsReferencedDie(F).resolveTypeUnitReference();
+}
+DWARFDie DWARFTypePrinter::skipQualifiers(DWARFDie D) {
+ while (D && (D.getTag() == DW_TAG_const_type ||
+ D.getTag() == DW_TAG_volatile_type))
+ D = resolveReferencedType(D);
+ return D;
+}
+
+bool DWARFTypePrinter::needsParens(DWARFDie D) {
+ D = skipQualifiers(D);
+ return D && (D.getTag() == DW_TAG_subroutine_type ||
+ D.getTag() == DW_TAG_array_type);
+}
+
+void DWARFTypePrinter::appendPointerLikeTypeBefore(DWARFDie D, DWARFDie Inner,
+ StringRef Ptr) {
+ appendQualifiedNameBefore(Inner);
+ if (Word)
+ OS << ' ';
+ if (needsParens(Inner))
+ OS << '(';
+ OS << Ptr;
+ Word = false;
+ EndedWithTemplate = false;
+}
+
+DWARFDie
+DWARFTypePrinter::appendUnqualifiedNameBefore(DWARFDie D,
+ std::string *OriginalFullName) {
+ Word = true;
+ if (!D) {
+ OS << "void";
+ return DWARFDie();
+ }
+ DWARFDie InnerDIE;
+ auto Inner = [&] { return InnerDIE = resolveReferencedType(D); };
+ const dwarf::Tag T = D.getTag();
+ switch (T) {
+ case DW_TAG_pointer_type: {
+ appendPointerLikeTypeBefore(D, Inner(), "*");
+ break;
+ }
+ case DW_TAG_subroutine_type: {
+ appendQualifiedNameBefore(Inner());
+ if (Word) {
+ OS << ' ';
+ }
+ Word = false;
+ break;
+ }
+ case DW_TAG_array_type: {
+ appendQualifiedNameBefore(Inner());
+ break;
+ }
+ case DW_TAG_reference_type:
+ appendPointerLikeTypeBefore(D, Inner(), "&");
+ break;
+ case DW_TAG_rvalue_reference_type:
+ appendPointerLikeTypeBefore(D, Inner(), "&&");
+ break;
+ case DW_TAG_ptr_to_member_type: {
+ appendQualifiedNameBefore(Inner());
+ if (needsParens(InnerDIE))
+ OS << '(';
+ else if (Word)
+ OS << ' ';
+ if (DWARFDie Cont = resolveReferencedType(D, DW_AT_containing_type)) {
+ appendQualifiedName(Cont);
+ EndedWithTemplate = false;
+ OS << "::";
+ }
+ OS << "*";
+ Word = false;
+ break;
+ }
+ case DW_TAG_const_type:
+ case DW_TAG_volatile_type:
+ appendConstVolatileQualifierBefore(D);
+ break;
+ case DW_TAG_namespace: {
+ if (const char *Name = dwarf::toString(D.find(DW_AT_name), nullptr))
+ OS << Name;
+ else
+ OS << "(anonymous namespace)";
+ break;
+ }
+ case DW_TAG_unspecified_type: {
+ StringRef TypeName = D.getShortName();
+ if (TypeName == "decltype(nullptr)")
+ TypeName = "std::nullptr_t";
+ Word = true;
+ OS << TypeName;
+ EndedWithTemplate = false;
+ break;
+ }
+ /*
+ case DW_TAG_structure_type:
+ case DW_TAG_class_type:
+ case DW_TAG_enumeration_type:
+ case DW_TAG_base_type:
+ */
+ default: {
+ const char *NamePtr = dwarf::toString(D.find(DW_AT_name), nullptr);
+ if (!NamePtr) {
+ appendTypeTagName(D.getTag());
+ return DWARFDie();
+ }
+ Word = true;
+ StringRef Name = NamePtr;
+ static constexpr StringRef MangledPrefix = "_STN|";
+ if (Name.startswith(MangledPrefix)) {
+ Name = Name.drop_front(MangledPrefix.size());
+ auto Separator = Name.find('|');
+ assert(Separator != StringRef::npos);
+ StringRef BaseName = Name.substr(0, Separator);
+ StringRef TemplateArgs = Name.substr(Separator + 1);
+ if (OriginalFullName)
+ *OriginalFullName = (BaseName + TemplateArgs).str();
+ Name = BaseName;
+ } else
+ EndedWithTemplate = Name.endswith(">");
+ OS << Name;
+ // This check would be insufficient for operator overloads like
+ // "operator>>" - but for now Clang doesn't try to simplify them, so this
+ // is OK. Add more nuanced operator overload handling here if/when needed.
+ if (Name.endswith(">"))
+ break;
+ if (!appendTemplateParameters(D))
+ break;
+
+ if (EndedWithTemplate)
+ OS << ' ';
+ OS << '>';
+ EndedWithTemplate = true;
+ Word = true;
+ break;
+ }
+ }
+ return InnerDIE;
+}
+
+void DWARFTypePrinter::appendUnqualifiedNameAfter(
+ DWARFDie D, DWARFDie Inner, bool SkipFirstParamIfArtificial) {
+ if (!D)
+ return;
+ switch (D.getTag()) {
+ case DW_TAG_subroutine_type: {
+ appendSubroutineNameAfter(D, Inner, SkipFirstParamIfArtificial, false,
+ false);
+ break;
+ }
+ case DW_TAG_array_type: {
+ appendArrayType(D);
+ break;
+ }
+ case DW_TAG_const_type:
+ case DW_TAG_volatile_type:
+ appendConstVolatileQualifierAfter(D);
+ break;
+ case DW_TAG_ptr_to_member_type:
+ case DW_TAG_reference_type:
+ case DW_TAG_rvalue_reference_type:
+ case DW_TAG_pointer_type: {
+ if (needsParens(Inner))
+ OS << ')';
+ appendUnqualifiedNameAfter(Inner, resolveReferencedType(Inner),
+ /*SkipFirstParamIfArtificial=*/D.getTag() ==
+ DW_TAG_ptr_to_member_type);
+ break;
+ }
+ /*
+ case DW_TAG_structure_type:
+ case DW_TAG_class_type:
+ case DW_TAG_enumeration_type:
+ case DW_TAG_base_type:
+ case DW_TAG_namespace:
+ */
+ default:
+ break;
+ }
+}
+
+void DWARFTypePrinter::appendQualifiedName(DWARFDie D) {
+ if (D)
+ appendScopes(D.getParent());
+ appendUnqualifiedName(D);
+}
+DWARFDie DWARFTypePrinter::appendQualifiedNameBefore(DWARFDie D) {
+ if (D)
+ appendScopes(D.getParent());
+ return appendUnqualifiedNameBefore(D);
+}
+bool DWARFTypePrinter::appendTemplateParameters(DWARFDie D,
+ bool *FirstParameter) {
+ bool FirstParameterValue = true;
+ bool IsTemplate = false;
+ if (!FirstParameter)
+ FirstParameter = &FirstParameterValue;
+ for (const DWARFDie &C : D) {
+ auto Sep = [&] {
+ if (*FirstParameter)
+ OS << '<';
+ else
+ OS << ", ";
+ IsTemplate = true;
+ EndedWithTemplate = false;
+ *FirstParameter = false;
+ };
+ if (C.getTag() == dwarf::DW_TAG_GNU_template_parameter_pack) {
+ IsTemplate = true;
+ appendTemplateParameters(C, FirstParameter);
+ }
+ if (C.getTag() == dwarf::DW_TAG_template_value_parameter) {
+ DWARFDie T = resolveReferencedType(C);
+ Sep();
+ if (T.getTag() == DW_TAG_enumeration_type) {
+ OS << '(';
+ appendQualifiedName(T);
+ OS << ')';
+ auto V = C.find(DW_AT_const_value);
+ OS << std::to_string(*V->getAsSignedConstant());
+ continue;
+ }
+ // /Maybe/ we could do pointer type parameters, looking for the
+ // symbol in the ELF symbol table to get back to the variable...
+ // but probably not worth it.
+ if (T.getTag() == DW_TAG_pointer_type)
+ continue;
+ const char *RawName = dwarf::toString(T.find(DW_AT_name), nullptr);
+ assert(RawName);
+ StringRef Name = RawName;
+ auto V = C.find(DW_AT_const_value);
+ bool IsQualifiedChar = false;
+ if (Name == "bool") {
+ OS << (*V->getAsUnsignedConstant() ? "true" : "false");
+ } else if (Name == "short") {
+ OS << "(short)";
+ OS << std::to_string(*V->getAsSignedConstant());
+ } else if (Name == "unsigned short") {
+ OS << "(unsigned short)";
+ OS << std::to_string(*V->getAsSignedConstant());
+ } else if (Name == "int")
+ OS << std::to_string(*V->getAsSignedConstant());
+ else if (Name == "long") {
+ OS << std::to_string(*V->getAsSignedConstant());
+ OS << "L";
+ } else if (Name == "long long") {
+ OS << std::to_string(*V->getAsSignedConstant());
+ OS << "LL";
+ } else if (Name == "unsigned int") {
+ OS << std::to_string(*V->getAsUnsignedConstant());
+ OS << "U";
+ } else if (Name == "unsigned long") {
+ OS << std::to_string(*V->getAsUnsignedConstant());
+ OS << "UL";
+ } else if (Name == "unsigned long long") {
+ OS << std::to_string(*V->getAsUnsignedConstant());
+ OS << "ULL";
+ } else if (Name == "char" ||
+ (IsQualifiedChar =
+ (Name == "unsigned char" || Name == "signed char"))) {
+ // FIXME: check T's DW_AT_type to see if it's signed or not (since
+ // char signedness is implementation defined).
+ auto Val = *V->getAsSignedConstant();
+ // Copied/hacked up from Clang's CharacterLiteral::print - incomplete
+ // (doesn't actually support different character types/widths, sign
+ // handling's not done, and doesn't correctly test if a character is
+ // printable or needs to use a numeric escape sequence instead)
+ if (IsQualifiedChar) {
+ OS << '(';
+ OS << Name;
+ OS << ')';
+ }
+ switch (Val) {
+ case '\\':
+ OS << "'\\\\'";
+ break;
+ case '\'':
+ OS << "'\\''";
+ break;
+ case '\a':
+ // TODO: K&R: the meaning of '\\a' is different in traditional C
+ OS << "'\\a'";
+ break;
+ case '\b':
+ OS << "'\\b'";
+ break;
+ case '\f':
+ OS << "'\\f'";
+ break;
+ case '\n':
+ OS << "'\\n'";
+ break;
+ case '\r':
+ OS << "'\\r'";
+ break;
+ case '\t':
+ OS << "'\\t'";
+ break;
+ case '\v':
+ OS << "'\\v'";
+ break;
+ default:
+ if ((Val & ~0xFFu) == ~0xFFu)
+ Val &= 0xFFu;
+ if (Val < 127 && Val >= 32) {
+ OS << "'";
+ OS << (char)Val;
+ OS << "'";
+ } else if (Val < 256)
+ OS << to_string(llvm::format("'\\x%02x'", Val));
+ else if (Val <= 0xFFFF)
+ OS << to_string(llvm::format("'\\u%04x'", Val));
+ else
+ OS << to_string(llvm::format("'\\U%08x'", Val));
+ }
+ }
+ continue;
+ }
+ if (C.getTag() == dwarf::DW_TAG_GNU_template_template_param) {
+ const char *RawName =
+ dwarf::toString(C.find(DW_AT_GNU_template_name), nullptr);
+ assert(RawName);
+ StringRef Name = RawName;
+ Sep();
+ OS << Name;
+ continue;
+ }
+ if (C.getTag() != dwarf::DW_TAG_template_type_parameter)
+ continue;
+ auto TypeAttr = C.find(DW_AT_type);
+ Sep();
+ appendQualifiedName(TypeAttr ? resolveReferencedType(C, *TypeAttr)
+ : DWARFDie());
+ }
+ if (IsTemplate && *FirstParameter && FirstParameter == &FirstParameterValue) {
+ OS << '<';
+ EndedWithTemplate = false;
+ }
+ return IsTemplate;
+}
+void DWARFTypePrinter::decomposeConstVolatile(DWARFDie &N, DWARFDie &T,
+ DWARFDie &C, DWARFDie &V) {
+ (N.getTag() == DW_TAG_const_type ? C : V) = N;
+ T = resolveReferencedType(N);
+ if (T) {
+ auto Tag = T.getTag();
+ if (Tag == DW_TAG_const_type) {
+ C = T;
+ T = resolveReferencedType(T);
+ } else if (Tag == DW_TAG_volatile_type) {
+ V = T;
+ T = resolveReferencedType(T);
+ }
+ }
+}
+void DWARFTypePrinter::appendConstVolatileQualifierAfter(DWARFDie N) {
+ DWARFDie C;
+ DWARFDie V;
+ DWARFDie T;
+ decomposeConstVolatile(N, T, C, V);
+ if (T && T.getTag() == DW_TAG_subroutine_type)
+ appendSubroutineNameAfter(T, resolveReferencedType(T), false, C.isValid(),
+ V.isValid());
+ else
+ appendUnqualifiedNameAfter(T, resolveReferencedType(T));
+}
+void DWARFTypePrinter::appendConstVolatileQualifierBefore(DWARFDie N) {
+ DWARFDie C;
+ DWARFDie V;
+ DWARFDie T;
+ decomposeConstVolatile(N, T, C, V);
+ bool Subroutine = T && T.getTag() == DW_TAG_subroutine_type;
+ DWARFDie A = T;
+ while (A && A.getTag() == DW_TAG_array_type)
+ A = resolveReferencedType(A);
+ bool Leading =
+ (!A || (A.getTag() != DW_TAG_pointer_type &&
+ A.getTag() != llvm::dwarf::DW_TAG_ptr_to_member_type)) &&
+ !Subroutine;
+ if (Leading) {
+ if (C)
+ OS << "const ";
+ if (V)
+ OS << "volatile ";
+ }
+ appendQualifiedNameBefore(T);
+ if (!Leading && !Subroutine) {
+ Word = true;
+ if (C)
+ OS << "const";
+ if (V) {
+ if (C)
+ OS << ' ';
+ OS << "volatile";
+ }
+ }
+}
+void DWARFTypePrinter::appendUnqualifiedName(DWARFDie D,
+ std::string *OriginalFullName) {
+ // FIXME: We should have pretty printers per language. Currently we print
+ // everything as if it was C++ and fall back to the TAG type name.
+ DWARFDie Inner = appendUnqualifiedNameBefore(D, OriginalFullName);
+ appendUnqualifiedNameAfter(D, Inner);
+}
+void DWARFTypePrinter::appendSubroutineNameAfter(
+ DWARFDie D, DWARFDie Inner, bool SkipFirstParamIfArtificial, bool Const,
+ bool Volatile) {
+ DWARFDie FirstParamIfArtificial;
+ OS << '(';
+ EndedWithTemplate = false;
+ bool First = true;
+ bool RealFirst = true;
+ for (DWARFDie P : D) {
+ if (P.getTag() != DW_TAG_formal_parameter &&
+ P.getTag() != DW_TAG_unspecified_parameters)
+ return;
+ DWARFDie T = resolveReferencedType(P);
+ if (SkipFirstParamIfArtificial && RealFirst && P.find(DW_AT_artificial)) {
+ FirstParamIfArtificial = T;
+ RealFirst = false;
+ continue;
+ }
+ if (!First) {
+ OS << ", ";
+ }
+ First = false;
+ if (P.getTag() == DW_TAG_unspecified_parameters)
+ OS << "...";
+ else
+ appendQualifiedName(T);
+ }
+ EndedWithTemplate = false;
+ OS << ')';
+ if (FirstParamIfArtificial) {
+ if (DWARFDie P = FirstParamIfArtificial) {
+ if (P.getTag() == DW_TAG_pointer_type) {
+ auto CVStep = [&](DWARFDie CV) {
+ if (DWARFDie U = resolveReferencedType(CV)) {
+ Const |= U.getTag() == DW_TAG_const_type;
+ Volatile |= U.getTag() == DW_TAG_volatile_type;
+ return U;
+ }
+ return DWARFDie();
+ };
+ if (DWARFDie CV = CVStep(P)) {
+ CVStep(CV);
+ }
+ }
+ }
+ }
+
+ if (auto CC = D.find(DW_AT_calling_convention)) {
+ switch (*CC->getAsUnsignedConstant()) {
+ case CallingConvention::DW_CC_BORLAND_stdcall:
+ OS << " __attribute__((stdcall))";
+ break;
+ case CallingConvention::DW_CC_BORLAND_msfastcall:
+ OS << " __attribute__((fastcall))";
+ break;
+ case CallingConvention::DW_CC_BORLAND_thiscall:
+ OS << " __attribute__((thiscall))";
+ break;
+ case CallingConvention::DW_CC_LLVM_vectorcall:
+ OS << " __attribute__((vectorcall))";
+ break;
+ case CallingConvention::DW_CC_BORLAND_pascal:
+ OS << " __attribute__((pascal))";
+ break;
+ case CallingConvention::DW_CC_LLVM_Win64:
+ OS << " __attribute__((ms_abi))";
+ break;
+ case CallingConvention::DW_CC_LLVM_X86_64SysV:
+ OS << " __attribute__((sysv_abi))";
+ break;
+ case CallingConvention::DW_CC_LLVM_AAPCS:
+ // AArch64VectorCall missing?
+ OS << " __attribute__((pcs(\"aapcs\")))";
+ break;
+ case CallingConvention::DW_CC_LLVM_AAPCS_VFP:
+ OS << " __attribute__((pcs(\"aapcs-vfp\")))";
+ break;
+ case CallingConvention::DW_CC_LLVM_IntelOclBicc:
+ OS << " __attribute__((intel_ocl_bicc))";
+ break;
+ case CallingConvention::DW_CC_LLVM_SpirFunction:
+ case CallingConvention::DW_CC_LLVM_OpenCLKernel:
+ // These aren't available as attributes, but maybe we should still
+ // render them somehow? (Clang doesn't render them, but that's an issue
+ // for template names too - since then the DWARF names of templates
+ // instantiated with function types with these calling conventions won't
+ // have distinct names - so we'd need to fix that too)
+ break;
+ case CallingConvention::DW_CC_LLVM_Swift:
+ // SwiftAsync missing
+ OS << " __attribute__((swiftcall))";
+ break;
+ case CallingConvention::DW_CC_LLVM_PreserveMost:
+ OS << " __attribute__((preserve_most))";
+ break;
+ case CallingConvention::DW_CC_LLVM_PreserveAll:
+ OS << " __attribute__((preserve_all))";
+ break;
+ case CallingConvention::DW_CC_LLVM_X86RegCall:
+ OS << " __attribute__((regcall))";
+ break;
+ }
+ }
+
+ if (Const)
+ OS << " const";
+ if (Volatile)
+ OS << " volatile";
+ if (D.find(DW_AT_reference))
+ OS << " &";
+ if (D.find(DW_AT_rvalue_reference))
+ OS << " &&";
+
+ appendUnqualifiedNameAfter(Inner, resolveReferencedType(Inner));
+}
+void DWARFTypePrinter::appendScopes(DWARFDie D) {
+ if (D.getTag() == DW_TAG_compile_unit)
+ return;
+ if (D.getTag() == DW_TAG_type_unit)
+ return;
+ if (D.getTag() == DW_TAG_skeleton_unit)
+ return;
+ if (D.getTag() == DW_TAG_subprogram)
+ return;
+ if (D.getTag() == DW_TAG_lexical_block)
+ return;
+ D = D.resolveTypeUnitReference();
+ if (DWARFDie P = D.getParent())
+ appendScopes(P);
+ appendUnqualifiedName(D);
+ OS << "::";
+}
+} // namespace llvm
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp
index a301b65dd444..fe16ca06132b 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp
@@ -8,9 +8,7 @@
#include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h"
#include "llvm/DebugInfo/DIContext.h"
-#include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
#include "llvm/DebugInfo/DWARF/DWARFDie.h"
-#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include <cinttypes>
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
index eed0a60ec75e..74667fcb92bc 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -9,15 +9,23 @@
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h"
#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugRnglists.h"
#include "llvm/DebugInfo/DWARF/DWARFDie.h"
+#include "llvm/DebugInfo/DWARF/DWARFExpression.h"
#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
+#include "llvm/DebugInfo/DWARF/DWARFListTable.h"
+#include "llvm/DebugInfo/DWARF/DWARFObject.h"
+#include "llvm/DebugInfo/DWARF/DWARFSection.h"
#include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h"
+#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/DataExtractor.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/Path.h"
@@ -25,7 +33,6 @@
#include <cassert>
#include <cstddef>
#include <cstdint>
-#include <cstdio>
#include <utility>
#include <vector>
@@ -79,7 +86,14 @@ void DWARFUnitVector::addUnitsImpl(
if (!IndexEntry && IsDWO) {
const DWARFUnitIndex &Index = getDWARFUnitIndex(
Context, Header.isTypeUnit() ? DW_SECT_EXT_TYPES : DW_SECT_INFO);
- IndexEntry = Index.getFromOffset(Header.getOffset());
+ if (Index) {
+ if (Header.isTypeUnit())
+ IndexEntry = Index.getFromHash(Header.getTypeHash());
+ else if (auto DWOId = Header.getDWOId())
+ IndexEntry = Index.getFromHash(*DWOId);
+ }
+ if (!IndexEntry)
+ IndexEntry = Index.getFromOffset(Header.getOffset());
}
if (IndexEntry && !Header.applyIndexEntry(IndexEntry))
return nullptr;
@@ -366,6 +380,9 @@ void DWARFUnit::clear() {
AddrOffsetSectionBase = None;
SU = nullptr;
clearDIEs(false);
+ AddrDieMap.clear();
+ if (DWO)
+ DWO->clear();
DWO.reset();
}
@@ -407,7 +424,7 @@ void DWARFUnit::extractDIEsToVector(
assert((Parents.back() == UINT32_MAX || Parents.back() <= Dies.size()) &&
"Wrong parent index");
- // Extract die. Stop if any error occured.
+ // Extract die. Stop if any error occurred.
if (!DIE.extractFast(*this, &DIEOffset, DebugInfoData, NextCUOffset,
Parents.back()))
break;
@@ -607,7 +624,7 @@ bool DWARFUnit::parseDWO() {
DWO->setAddrOffsetSection(AddrOffsetSection, *AddrOffsetSectionBase);
if (getVersion() == 4) {
auto DWORangesBase = UnitDie.getRangesBaseAttribute();
- DWO->setRangesSection(RangeSection, DWORangesBase.getValueOr(0));
+ DWO->setRangesSection(RangeSection, DWORangesBase.value_or(0));
}
return true;
@@ -735,6 +752,100 @@ DWARFDie DWARFUnit::getSubroutineForAddress(uint64_t Address) {
return R->second.second;
}
+void DWARFUnit::updateVariableDieMap(DWARFDie Die) {
+ for (DWARFDie Child : Die) {
+ if (isType(Child.getTag()))
+ continue;
+ updateVariableDieMap(Child);
+ }
+
+ if (Die.getTag() != DW_TAG_variable)
+ return;
+
+ Expected<DWARFLocationExpressionsVector> Locations =
+ Die.getLocations(DW_AT_location);
+ if (!Locations) {
+ // Missing DW_AT_location is fine here.
+ consumeError(Locations.takeError());
+ return;
+ }
+
+ uint64_t Address = UINT64_MAX;
+
+ for (const DWARFLocationExpression &Location : *Locations) {
+ uint8_t AddressSize = getAddressByteSize();
+ DataExtractor Data(Location.Expr, /*IsLittleEndian=*/true, AddressSize);
+ DWARFExpression Expr(Data, AddressSize);
+ auto It = Expr.begin();
+ if (It == Expr.end())
+ continue;
+
+ // Match exactly the main sequence used to describe global variables:
+ // `DW_OP_addr[x] [+ DW_OP_plus_uconst]`. Currently, this is the sequence
+ // that LLVM produces for DILocalVariables and DIGlobalVariables. If, in
+ // future, the DWARF producer (`DwarfCompileUnit::addLocationAttribute()` is
+ // a good starting point) is extended to use further expressions, this code
+ // needs to be updated.
+ uint64_t LocationAddr;
+ if (It->getCode() == dwarf::DW_OP_addr) {
+ LocationAddr = It->getRawOperand(0);
+ } else if (It->getCode() == dwarf::DW_OP_addrx) {
+ uint64_t DebugAddrOffset = It->getRawOperand(0);
+ if (auto Pointer = getAddrOffsetSectionItem(DebugAddrOffset)) {
+ LocationAddr = Pointer->Address;
+ }
+ } else {
+ continue;
+ }
+
+ // Read the optional 2nd operand, a DW_OP_plus_uconst.
+ if (++It != Expr.end()) {
+ if (It->getCode() != dwarf::DW_OP_plus_uconst)
+ continue;
+
+ LocationAddr += It->getRawOperand(0);
+
+ // Probe for a 3rd operand, if it exists, bail.
+ if (++It != Expr.end())
+ continue;
+ }
+
+ Address = LocationAddr;
+ break;
+ }
+
+ // Get the size of the global variable. If all else fails (i.e. the global has
+ // no type), then we use a size of one to still allow symbolization of the
+ // exact address.
+ uint64_t GVSize = 1;
+ if (DWARFDie BaseType = Die.getAttributeValueAsReferencedDie(DW_AT_type))
+ if (Optional<uint64_t> Size = Die.getTypeSize(getAddressByteSize()))
+ GVSize = *Size;
+
+ if (Address != UINT64_MAX)
+ VariableDieMap[Address] = {Address + GVSize, Die};
+}
+
+DWARFDie DWARFUnit::getVariableForAddress(uint64_t Address) {
+ extractDIEsIfNeeded(false);
+
+ auto RootDie = getUnitDIE();
+
+ auto RootLookup = RootsParsedForVariables.insert(RootDie.getOffset());
+ if (RootLookup.second)
+ updateVariableDieMap(RootDie);
+
+ auto R = VariableDieMap.upper_bound(Address);
+ if (R == VariableDieMap.begin())
+ return DWARFDie();
+
+ // upper_bound's previous item contains Address.
+ --R;
+ if (Address >= R->second.first)
+ return DWARFDie();
+ return R->second.second;
+}
+
void
DWARFUnit::getInlinedChainForAddress(uint64_t Address,
SmallVectorImpl<DWARFDie> &InlinedChain) {
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp
index d27fd08db14e..d161beef2202 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp
@@ -9,6 +9,7 @@
#include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DataExtractor.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
index ca7ac785b550..c704f8f583af 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
@@ -6,17 +6,28 @@
//
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/DWARF/DWARFVerifier.h"
+#include "llvm/ADT/IntervalMap.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h"
+#include "llvm/DebugInfo/DWARF/DWARFAttribute.h"
#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
#include "llvm/DebugInfo/DWARF/DWARFDie.h"
#include "llvm/DebugInfo/DWARF/DWARFExpression.h"
#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
+#include "llvm/DebugInfo/DWARF/DWARFLocationExpression.h"
+#include "llvm/DebugInfo/DWARF/DWARFObject.h"
#include "llvm/DebugInfo/DWARF/DWARFSection.h"
-#include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h"
+#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
+#include "llvm/Object/Error.h"
#include "llvm/Support/DJB.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
@@ -28,6 +39,10 @@ using namespace llvm;
using namespace dwarf;
using namespace object;
+namespace llvm {
+class DWARFDebugInfoEntry;
+}
+
Optional<DWARFAddressRange>
DWARFVerifier::DieRangeInfo::insert(const DWARFAddressRange &R) {
auto Begin = Ranges.begin();
@@ -381,6 +396,59 @@ unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S) {
return NumDebugInfoErrors;
}
+unsigned DWARFVerifier::verifyIndex(StringRef Name,
+ DWARFSectionKind InfoColumnKind,
+ StringRef IndexStr) {
+ if (IndexStr.empty())
+ return 0;
+ OS << "Verifying " << Name << "...\n";
+ DWARFUnitIndex Index(InfoColumnKind);
+ DataExtractor D(IndexStr, DCtx.isLittleEndian(), 0);
+ if (!Index.parse(D))
+ return 1;
+ using MapType = IntervalMap<uint32_t, uint64_t>;
+ MapType::Allocator Alloc;
+ std::vector<std::unique_ptr<MapType>> Sections(Index.getColumnKinds().size());
+ for (const DWARFUnitIndex::Entry &E : Index.getRows()) {
+ uint64_t Sig = E.getSignature();
+ if (!E.getContributions())
+ continue;
+ for (auto E : enumerate(InfoColumnKind == DW_SECT_INFO
+ ? makeArrayRef(E.getContributions(),
+ Index.getColumnKinds().size())
+ : makeArrayRef(E.getContribution(), 1))) {
+ const DWARFUnitIndex::Entry::SectionContribution &SC = E.value();
+ int Col = E.index();
+ if (SC.Length == 0)
+ continue;
+ if (!Sections[Col])
+ Sections[Col] = std::make_unique<MapType>(Alloc);
+ auto &M = *Sections[Col];
+ auto I = M.find(SC.Offset);
+ if (I != M.end() && I.start() < (SC.Offset + SC.Length)) {
+ error() << llvm::formatv(
+ "overlapping index entries for entries {0:x16} "
+ "and {1:x16} for column {2}\n",
+ *I, Sig, toString(Index.getColumnKinds()[Col]));
+ return 1;
+ }
+ M.insert(SC.Offset, SC.Offset + SC.Length - 1, Sig);
+ }
+ }
+
+ return 0;
+}
+
+bool DWARFVerifier::handleDebugCUIndex() {
+ return verifyIndex(".debug_cu_index", DWARFSectionKind::DW_SECT_INFO,
+ DCtx.getDWARFObj().getCUIndexSection()) == 0;
+}
+
+bool DWARFVerifier::handleDebugTUIndex() {
+ return verifyIndex(".debug_tu_index", DWARFSectionKind::DW_SECT_EXT_TYPES,
+ DCtx.getDWARFObj().getTUIndexSection()) == 0;
+}
+
bool DWARFVerifier::handleDebugInfo() {
const DWARFObject &DObj = DCtx.getDWARFObj();
unsigned NumErrors = 0;
diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
index 6eef6f84ab40..473a69b34ac3 100644
--- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
+++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
@@ -10,6 +10,7 @@
#include <unordered_set>
#include "llvm/DebugInfo/DIContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ThreadPool.h"
@@ -287,12 +288,12 @@ static void convertFunctionLineTable(raw_ostream &Log, CUInfo &CUI,
// linker problems or LTO or other DWARF re-linking so it is worth emitting
// an error, but not worth stopping the creation of the GSYM.
if (!FI.Range.contains(RowAddress)) {
- if (RowAddress < FI.Range.Start) {
+ if (RowAddress < FI.Range.start()) {
Log << "error: DIE has a start address whose LowPC is between the "
"line table Row[" << RowIndex << "] with address "
<< HEX64(RowAddress) << " and the next one.\n";
Die.dump(Log, 0, DIDumpOptions::getForSingleDIE());
- RowAddress = FI.Range.Start;
+ RowAddress = FI.Range.start();
} else {
continue;
}
@@ -403,8 +404,7 @@ void DwarfTransformer::handleDie(raw_ostream &OS, CUInfo &CUI, DWARFDie Die) {
}
FunctionInfo FI;
- FI.setStartAddress(Range.LowPC);
- FI.setEndAddress(Range.HighPC);
+ FI.Range = {Range.LowPC, Range.HighPC};
FI.Name = *NameIndex;
if (CUI.LineTable) {
convertFunctionLineTable(OS, CUI, Die, Gsym, FI);
@@ -427,11 +427,28 @@ void DwarfTransformer::handleDie(raw_ostream &OS, CUInfo &CUI, DWARFDie Die) {
Error DwarfTransformer::convert(uint32_t NumThreads) {
size_t NumBefore = Gsym.getNumFunctionInfos();
+ auto getDie = [&](DWARFUnit &DwarfUnit) -> DWARFDie {
+ DWARFDie ReturnDie = DwarfUnit.getUnitDIE(false);
+ if (llvm::Optional<uint64_t> DWOId = DwarfUnit.getDWOId()) {
+ DWARFUnit *DWOCU = DwarfUnit.getNonSkeletonUnitDIE(false).getDwarfUnit();
+ if (!DWOCU->isDWOUnit()) {
+ std::string DWOName = dwarf::toString(
+ DwarfUnit.getUnitDIE().find(
+ {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
+ "");
+ Log << "warning: Unable to retrieve DWO .debug_info section for "
+ << DWOName << "\n";
+ } else {
+ ReturnDie = DWOCU->getUnitDIE(false);
+ }
+ }
+ return ReturnDie;
+ };
if (NumThreads == 1) {
// Parse all DWARF data from this thread, use the same string/file table
// for everything
for (const auto &CU : DICtx.compile_units()) {
- DWARFDie Die = CU->getUnitDIE(false);
+ DWARFDie Die = getDie(*CU);
CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get()));
handleDie(Log, CUI, Die);
}
@@ -456,7 +473,7 @@ Error DwarfTransformer::convert(uint32_t NumThreads) {
// Now convert all DWARF to GSYM in a thread pool.
std::mutex LogMutex;
for (const auto &CU : DICtx.compile_units()) {
- DWARFDie Die = CU->getUnitDIE(false /*CUDieOnly*/);
+ DWARFDie Die = getDie(*CU);
if (Die) {
CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get()));
pool.async([this, CUI, &LogMutex, Die]() mutable {
diff --git a/llvm/lib/DebugInfo/GSYM/ExtractRanges.cpp b/llvm/lib/DebugInfo/GSYM/ExtractRanges.cpp
new file mode 100644
index 000000000000..4a42100c86da
--- /dev/null
+++ b/llvm/lib/DebugInfo/GSYM/ExtractRanges.cpp
@@ -0,0 +1,79 @@
+//===- ExtractRanges.cpp ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/GSYM/ExtractRanges.h"
+#include "llvm/DebugInfo/GSYM/FileWriter.h"
+#include "llvm/Support/DataExtractor.h"
+#include <algorithm>
+#include <inttypes.h>
+
+namespace llvm {
+namespace gsym {
+
+void encodeRange(const AddressRange &Range, FileWriter &O, uint64_t BaseAddr) {
+ assert(Range.start() >= BaseAddr);
+ O.writeULEB(Range.start() - BaseAddr);
+ O.writeULEB(Range.size());
+}
+
+AddressRange decodeRange(DataExtractor &Data, uint64_t BaseAddr,
+ uint64_t &Offset) {
+ const uint64_t AddrOffset = Data.getULEB128(&Offset);
+ const uint64_t Size = Data.getULEB128(&Offset);
+ const uint64_t StartAddr = BaseAddr + AddrOffset;
+
+ return {StartAddr, StartAddr + Size};
+}
+
+void encodeRanges(const AddressRanges &Ranges, FileWriter &O,
+ uint64_t BaseAddr) {
+ O.writeULEB(Ranges.size());
+ if (Ranges.empty())
+ return;
+ for (auto Range : Ranges)
+ encodeRange(Range, O, BaseAddr);
+}
+
+void decodeRanges(AddressRanges &Ranges, DataExtractor &Data, uint64_t BaseAddr,
+ uint64_t &Offset) {
+ Ranges.clear();
+ uint64_t NumRanges = Data.getULEB128(&Offset);
+ Ranges.reserve(NumRanges);
+ for (uint64_t RangeIdx = 0; RangeIdx < NumRanges; RangeIdx++)
+ Ranges.insert(decodeRange(Data, BaseAddr, Offset));
+}
+
+void skipRange(DataExtractor &Data, uint64_t &Offset) {
+ Data.getULEB128(&Offset);
+ Data.getULEB128(&Offset);
+}
+
+uint64_t skipRanges(DataExtractor &Data, uint64_t &Offset) {
+ uint64_t NumRanges = Data.getULEB128(&Offset);
+ for (uint64_t I = 0; I < NumRanges; ++I)
+ skipRange(Data, Offset);
+ return NumRanges;
+}
+
+} // namespace gsym
+
+raw_ostream &operator<<(raw_ostream &OS, const AddressRange &R) {
+ return OS << '[' << HEX64(R.start()) << " - " << HEX64(R.end()) << ")";
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const AddressRanges &AR) {
+ size_t Size = AR.size();
+ for (size_t I = 0; I < Size; ++I) {
+ if (I)
+ OS << ' ';
+ OS << AR[I];
+ }
+ return OS;
+}
+
+} // namespace llvm
diff --git a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
index cef1b9498c5c..4f5d240cdf72 100644
--- a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
+++ b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
@@ -36,12 +36,11 @@ raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const FunctionInfo &FI) {
llvm::Expected<FunctionInfo> FunctionInfo::decode(DataExtractor &Data,
uint64_t BaseAddr) {
FunctionInfo FI;
- FI.Range.Start = BaseAddr;
uint64_t Offset = 0;
if (!Data.isValidOffsetForDataOfSize(Offset, 4))
return createStringError(std::errc::io_error,
"0x%8.8" PRIx64 ": missing FunctionInfo Size", Offset);
- FI.Range.End = FI.Range.Start + Data.getU32(&Offset);
+ FI.Range = {BaseAddr, BaseAddr + Data.getU32(&Offset)};
if (!Data.isValidOffsetForDataOfSize(Offset, 4))
return createStringError(std::errc::io_error,
"0x%8.8" PRIx64 ": missing FunctionInfo Name", Offset);
@@ -109,13 +108,13 @@ llvm::Expected<uint64_t> FunctionInfo::encode(FileWriter &O) const {
// Write the name of this function as a uint32_t string table offset.
O.writeU32(Name);
- if (OptLineTable.hasValue()) {
+ if (OptLineTable) {
O.writeU32(InfoType::LineTableInfo);
// Write a uint32_t length as zero for now, we will fix this up after
// writing the LineTable out with the number of bytes that were written.
O.writeU32(0);
const auto StartOffset = O.tell();
- llvm::Error err = OptLineTable->encode(O, Range.Start);
+ llvm::Error err = OptLineTable->encode(O, Range.start());
if (err)
return std::move(err);
const auto Length = O.tell() - StartOffset;
@@ -127,13 +126,13 @@ llvm::Expected<uint64_t> FunctionInfo::encode(FileWriter &O) const {
}
// Write out the inline function info if we have any and if it is valid.
- if (Inline.hasValue()) {
+ if (Inline) {
O.writeU32(InfoType::InlineInfo);
// Write a uint32_t length as zero for now, we will fix this up after
// writing the LineTable out with the number of bytes that were written.
O.writeU32(0);
const auto StartOffset = O.tell();
- llvm::Error err = Inline->encode(O, Range.Start);
+ llvm::Error err = Inline->encode(O, Range.start());
if (err)
return std::move(err);
const auto Length = O.tell() - StartOffset;
@@ -157,9 +156,8 @@ llvm::Expected<LookupResult> FunctionInfo::lookup(DataExtractor &Data,
uint64_t Addr) {
LookupResult LR;
LR.LookupAddr = Addr;
- LR.FuncRange.Start = FuncAddr;
uint64_t Offset = 0;
- LR.FuncRange.End = FuncAddr + Data.getU32(&Offset);
+ LR.FuncRange = {FuncAddr, FuncAddr + Data.getU32(&Offset)};
uint32_t NameOffset = Data.getU32(&Offset);
// The "lookup" functions doesn't report errors as accurately as the "decode"
// function as it is meant to be fast. For more accurage errors we could call
diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
index 1c20a59469dc..8281938770cf 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
@@ -271,7 +271,7 @@ llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) {
}
}
} else if (Prev.Range.size() == 0 &&
- Curr.Range.contains(Prev.Range.Start)) {
+ Curr.Range.contains(Prev.Range.start())) {
if (!Quiet) {
OS << "warning: removing symbol:\n"
<< Prev << "\nKeeping:\n"
@@ -291,8 +291,8 @@ llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) {
// has no size when doing lookups.
if (!Funcs.empty() && Funcs.back().Range.size() == 0 && ValidTextRanges) {
if (auto Range =
- ValidTextRanges->getRangeThatContains(Funcs.back().Range.Start)) {
- Funcs.back().Range.End = Range->End;
+ ValidTextRanges->getRangeThatContains(Funcs.back().Range.start())) {
+ Funcs.back().Range = {Funcs.back().Range.start(), Range->end()};
}
}
OS << "Pruned " << NumBefore - Funcs.size() << " functions, ended with "
diff --git a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
index 2ad18bf63d5d..0c585cc8d306 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
@@ -48,7 +48,7 @@ llvm::Expected<GsymReader> GsymReader::copyBuffer(StringRef Bytes) {
llvm::Expected<llvm::gsym::GsymReader>
GsymReader::create(std::unique_ptr<MemoryBuffer> &MemBuffer) {
- if (!MemBuffer.get())
+ if (!MemBuffer)
return createStringError(std::errc::invalid_argument,
"invalid memory buffer");
GsymReader GR(std::move(MemBuffer));
diff --git a/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp b/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp
index 21679b1b78aa..f7c4637a8a5b 100644
--- a/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp
+++ b/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp
@@ -75,7 +75,7 @@ llvm::Optional<InlineInfo::InlineArray> InlineInfo::getInlineStack(uint64_t Addr
static bool skip(DataExtractor &Data, uint64_t &Offset, bool SkippedRanges) {
if (!SkippedRanges) {
- if (AddressRanges::skip(Data, Offset) == 0)
+ if (skipRanges(Data, Offset) == 0)
return false;
}
bool HasChildren = Data.getU8(&Offset) != 0;
@@ -109,7 +109,7 @@ static bool lookup(const GsymReader &GR, DataExtractor &Data, uint64_t &Offset,
uint64_t BaseAddr, uint64_t Addr, SourceLocations &SrcLocs,
llvm::Error &Err) {
InlineInfo Inline;
- Inline.Ranges.decode(Data, BaseAddr, Offset);
+ decodeRanges(Inline.Ranges, Data, BaseAddr, Offset);
if (Inline.Ranges.empty())
return true;
// Check if the address is contained within the inline information, and if
@@ -128,7 +128,7 @@ static bool lookup(const GsymReader &GR, DataExtractor &Data, uint64_t &Offset,
if (HasChildren) {
// Child address ranges are encoded relative to the first address in the
// parent InlineInfo object.
- const auto ChildBaseAddr = Inline.Ranges[0].Start;
+ const auto ChildBaseAddr = Inline.Ranges[0].start();
bool Done = false;
while (!Done)
Done = lookup(GR, Data, Offset, ChildBaseAddr, Addr, SrcLocs, Err);
@@ -150,7 +150,7 @@ static bool lookup(const GsymReader &GR, DataExtractor &Data, uint64_t &Offset,
SrcLoc.Base = GR.getString(CallFile->Base);
SrcLoc.Line = Inline.CallLine;
SrcLocs.back().Name = GR.getString(Inline.Name);
- SrcLocs.back().Offset = Addr - Inline.Ranges[0].Start;
+ SrcLocs.back().Offset = Addr - Inline.Ranges[0].start();
SrcLocs.push_back(SrcLoc);
}
return true;
@@ -182,7 +182,7 @@ static llvm::Expected<InlineInfo> decode(DataExtractor &Data, uint64_t &Offset,
if (!Data.isValidOffset(Offset))
return createStringError(std::errc::io_error,
"0x%8.8" PRIx64 ": missing InlineInfo address ranges data", Offset);
- Inline.Ranges.decode(Data, BaseAddr, Offset);
+ decodeRanges(Inline.Ranges, Data, BaseAddr, Offset);
if (Inline.Ranges.empty())
return Inline;
if (!Data.isValidOffsetForDataOfSize(Offset, 1))
@@ -205,7 +205,7 @@ static llvm::Expected<InlineInfo> decode(DataExtractor &Data, uint64_t &Offset,
if (HasChildren) {
// Child address ranges are encoded relative to the first address in the
// parent InlineInfo object.
- const auto ChildBaseAddr = Inline.Ranges[0].Start;
+ const auto ChildBaseAddr = Inline.Ranges[0].start();
while (true) {
llvm::Expected<InlineInfo> Child = decode(Data, Offset, ChildBaseAddr);
if (!Child)
@@ -232,7 +232,7 @@ llvm::Error InlineInfo::encode(FileWriter &O, uint64_t BaseAddr) const {
if (!isValid())
return createStringError(std::errc::invalid_argument,
"attempted to encode invalid InlineInfo object");
- Ranges.encode(O, BaseAddr);
+ encodeRanges(Ranges, O, BaseAddr);
bool HasChildren = !Children.empty();
O.writeU8(HasChildren);
O.writeU32(Name);
@@ -242,7 +242,7 @@ llvm::Error InlineInfo::encode(FileWriter &O, uint64_t BaseAddr) const {
// Child address ranges are encoded as relative to the first
// address in the Ranges for this object. This keeps the offsets
// small and allows for efficient encoding using ULEB offsets.
- const uint64_t ChildBaseAddr = Ranges[0].Start;
+ const uint64_t ChildBaseAddr = Ranges[0].start();
for (const auto &Child : Children) {
// Make sure all child address ranges are contained in the parent address
// ranges.
diff --git a/llvm/lib/DebugInfo/GSYM/LookupResult.cpp b/llvm/lib/DebugInfo/GSYM/LookupResult.cpp
index 8a624226b1d3..00a5b1bbfaa5 100644
--- a/llvm/lib/DebugInfo/GSYM/LookupResult.cpp
+++ b/llvm/lib/DebugInfo/GSYM/LookupResult.cpp
@@ -8,6 +8,7 @@
#include "llvm/DebugInfo/GSYM/LookupResult.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/DebugInfo/GSYM/ExtractRanges.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
@@ -42,7 +43,7 @@ raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const SourceLocation &SL) {
OS << " @ ";
if (!SL.Dir.empty()) {
OS << SL.Dir;
- if (SL.Dir.contains('\\') and not SL.Dir.contains('/'))
+ if (SL.Dir.contains('\\') && !SL.Dir.contains('/'))
OS << '\\';
else
OS << '/';
diff --git a/llvm/lib/DebugInfo/GSYM/Range.cpp b/llvm/lib/DebugInfo/GSYM/Range.cpp
deleted file mode 100644
index c1e8eccd0daa..000000000000
--- a/llvm/lib/DebugInfo/GSYM/Range.cpp
+++ /dev/null
@@ -1,123 +0,0 @@
-//===- Range.cpp ------------------------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/DebugInfo/GSYM/Range.h"
-#include "llvm/DebugInfo/GSYM/FileWriter.h"
-#include "llvm/Support/DataExtractor.h"
-#include <algorithm>
-#include <inttypes.h>
-
-using namespace llvm;
-using namespace gsym;
-
-
-void AddressRanges::insert(AddressRange Range) {
- if (Range.size() == 0)
- return;
-
- auto It = llvm::upper_bound(Ranges, Range);
- auto It2 = It;
- while (It2 != Ranges.end() && It2->Start < Range.End)
- ++It2;
- if (It != It2) {
- Range.End = std::max(Range.End, It2[-1].End);
- It = Ranges.erase(It, It2);
- }
- if (It != Ranges.begin() && Range.Start < It[-1].End)
- It[-1].End = std::max(It[-1].End, Range.End);
- else
- Ranges.insert(It, Range);
-}
-
-bool AddressRanges::contains(uint64_t Addr) const {
- auto It = std::partition_point(
- Ranges.begin(), Ranges.end(),
- [=](const AddressRange &R) { return R.Start <= Addr; });
- return It != Ranges.begin() && Addr < It[-1].End;
-}
-
-bool AddressRanges::contains(AddressRange Range) const {
- if (Range.size() == 0)
- return false;
- auto It = std::partition_point(
- Ranges.begin(), Ranges.end(),
- [=](const AddressRange &R) { return R.Start <= Range.Start; });
- if (It == Ranges.begin())
- return false;
- return Range.End <= It[-1].End;
-}
-
-Optional<AddressRange>
-AddressRanges::getRangeThatContains(uint64_t Addr) const {
- auto It = std::partition_point(
- Ranges.begin(), Ranges.end(),
- [=](const AddressRange &R) { return R.Start <= Addr; });
- if (It != Ranges.begin() && Addr < It[-1].End)
- return It[-1];
- return llvm::None;
-}
-
-raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const AddressRange &R) {
- return OS << '[' << HEX64(R.Start) << " - " << HEX64(R.End) << ")";
-}
-
-raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const AddressRanges &AR) {
- size_t Size = AR.size();
- for (size_t I = 0; I < Size; ++I) {
- if (I)
- OS << ' ';
- OS << AR[I];
- }
- return OS;
-}
-
-void AddressRange::encode(FileWriter &O, uint64_t BaseAddr) const {
- assert(Start >= BaseAddr);
- O.writeULEB(Start - BaseAddr);
- O.writeULEB(size());
-}
-
-void AddressRange::decode(DataExtractor &Data, uint64_t BaseAddr,
- uint64_t &Offset) {
- const uint64_t AddrOffset = Data.getULEB128(&Offset);
- const uint64_t Size = Data.getULEB128(&Offset);
- const uint64_t StartAddr = BaseAddr + AddrOffset;
- Start = StartAddr;
- End = StartAddr + Size;
-}
-
-void AddressRanges::encode(FileWriter &O, uint64_t BaseAddr) const {
- O.writeULEB(Ranges.size());
- if (Ranges.empty())
- return;
- for (auto Range : Ranges)
- Range.encode(O, BaseAddr);
-}
-
-void AddressRanges::decode(DataExtractor &Data, uint64_t BaseAddr,
- uint64_t &Offset) {
- clear();
- uint64_t NumRanges = Data.getULEB128(&Offset);
- if (NumRanges == 0)
- return;
- Ranges.resize(NumRanges);
- for (auto &Range : Ranges)
- Range.decode(Data, BaseAddr, Offset);
-}
-
-void AddressRange::skip(DataExtractor &Data, uint64_t &Offset) {
- Data.getULEB128(&Offset);
- Data.getULEB128(&Offset);
-}
-
-uint64_t AddressRanges::skip(DataExtractor &Data, uint64_t &Offset) {
- uint64_t NumRanges = Data.getULEB128(&Offset);
- for (uint64_t I=0; I<NumRanges; ++I)
- AddressRange::skip(Data, Offset);
- return NumRanges;
-}
diff --git a/llvm/lib/DebugInfo/MSF/MappedBlockStream.cpp b/llvm/lib/DebugInfo/MSF/MappedBlockStream.cpp
index 00fc70ca5a54..94935d63452e 100644
--- a/llvm/lib/DebugInfo/MSF/MappedBlockStream.cpp
+++ b/llvm/lib/DebugInfo/MSF/MappedBlockStream.cpp
@@ -8,7 +8,6 @@
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/DebugInfo/MSF/MSFCommon.h"
#include "llvm/Support/BinaryStreamWriter.h"
#include "llvm/Support/Endian.h"
diff --git a/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
index b6f11a942a26..c12ac38c2317 100644
--- a/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
@@ -10,12 +10,10 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
#include "llvm/DebugInfo/MSF/MSFBuilder.h"
-#include "llvm/DebugInfo/MSF/MSFCommon.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
-#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
-#include "llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h"
#include "llvm/DebugInfo/PDB/Native/RawConstants.h"
#include "llvm/DebugInfo/PDB/Native/RawError.h"
#include "llvm/Support/BinaryStreamWriter.h"
@@ -25,6 +23,12 @@ using namespace llvm::codeview;
using namespace llvm::msf;
using namespace llvm::pdb;
+namespace llvm {
+namespace codeview {
+class DebugSubsection;
+}
+} // namespace llvm
+
static uint32_t calculateDiSymbolStreamSize(uint32_t SymbolByteSize,
uint32_t C13Size) {
uint32_t Size = sizeof(uint32_t); // Signature
@@ -44,7 +48,7 @@ DbiModuleDescriptorBuilder::DbiModuleDescriptorBuilder(StringRef ModuleName,
Layout.Mod = ModIndex;
}
-DbiModuleDescriptorBuilder::~DbiModuleDescriptorBuilder() {}
+DbiModuleDescriptorBuilder::~DbiModuleDescriptorBuilder() = default;
uint16_t DbiModuleDescriptorBuilder::getStreamIndex() const {
return Layout.ModDiStream;
diff --git a/llvm/lib/DebugInfo/PDB/Native/DbiModuleList.cpp b/llvm/lib/DebugInfo/PDB/Native/DbiModuleList.cpp
index 5cf014e881cd..009cd113f652 100644
--- a/llvm/lib/DebugInfo/PDB/Native/DbiModuleList.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/DbiModuleList.cpp
@@ -10,6 +10,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/DebugInfo/PDB/Native/RawError.h"
+#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/Error.h"
#include <algorithm>
diff --git a/llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp b/llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp
index 4eb16804171d..1a2267334049 100644
--- a/llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp
@@ -9,7 +9,6 @@
#include "llvm/DebugInfo/PDB/Native/DbiStream.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
-#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
#include "llvm/DebugInfo/PDB/Native/ISectionContribVisitor.h"
#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
#include "llvm/DebugInfo/PDB/Native/RawConstants.h"
@@ -20,7 +19,6 @@
#include "llvm/Support/BinaryStreamArray.h"
#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/Error.h"
-#include <algorithm>
#include <cstddef>
#include <cstdint>
diff --git a/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
index 0584966a98c5..3a719bd07c8a 100644
--- a/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
@@ -14,7 +14,6 @@
#include "llvm/DebugInfo/MSF/MSFBuilder.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h"
-#include "llvm/DebugInfo/PDB/Native/DbiStream.h"
#include "llvm/DebugInfo/PDB/Native/RawError.h"
#include "llvm/Object/COFF.h"
#include "llvm/Support/BinaryStreamWriter.h"
@@ -30,7 +29,7 @@ DbiStreamBuilder::DbiStreamBuilder(msf::MSFBuilder &Msf)
PdbDllVersion(0), PdbDllRbld(0), Flags(0), MachineType(PDB_Machine::x86),
Header(nullptr) {}
-DbiStreamBuilder::~DbiStreamBuilder() {}
+DbiStreamBuilder::~DbiStreamBuilder() = default;
void DbiStreamBuilder::setVersionHeader(PdbRaw_DbiVer V) { VerHeader = V; }
@@ -72,7 +71,7 @@ void DbiStreamBuilder::setPublicsStreamIndex(uint32_t Index) {
}
void DbiStreamBuilder::addNewFpoData(const codeview::FrameData &FD) {
- if (!NewFpoData.hasValue())
+ if (!NewFpoData)
NewFpoData.emplace(false);
NewFpoData->addFrameData(FD);
@@ -286,7 +285,7 @@ Error DbiStreamBuilder::finalize() {
}
Error DbiStreamBuilder::finalizeMsfLayout() {
- if (NewFpoData.hasValue()) {
+ if (NewFpoData) {
DbgStreams[(int)DbgHeaderType::NewFPO].emplace();
DbgStreams[(int)DbgHeaderType::NewFPO]->Size =
NewFpoData->calculateSerializedSize();
@@ -307,7 +306,7 @@ Error DbiStreamBuilder::finalizeMsfLayout() {
}
for (auto &S : DbgStreams) {
- if (!S.hasValue())
+ if (!S)
continue;
auto ExpectedIndex = Msf.addStream(S->Size);
if (!ExpectedIndex)
@@ -428,14 +427,14 @@ Error DbiStreamBuilder::commit(const msf::MSFLayout &Layout,
for (auto &Stream : DbgStreams) {
uint16_t StreamNumber = kInvalidStreamIndex;
- if (Stream.hasValue())
+ if (Stream)
StreamNumber = Stream->StreamNumber;
if (auto EC = Writer.writeInteger(StreamNumber))
return EC;
}
for (auto &Stream : DbgStreams) {
- if (!Stream.hasValue())
+ if (!Stream)
continue;
assert(Stream->StreamNumber != kInvalidStreamIndex);
diff --git a/llvm/lib/DebugInfo/PDB/Native/EnumTables.cpp b/llvm/lib/DebugInfo/PDB/Native/EnumTables.cpp
index 37192ba36a04..32bad9cea7ce 100644
--- a/llvm/lib/DebugInfo/PDB/Native/EnumTables.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/EnumTables.cpp
@@ -8,6 +8,7 @@
#include "llvm/DebugInfo/PDB/Native/EnumTables.h"
#include "llvm/DebugInfo/PDB/Native/RawConstants.h"
+#include "llvm/Support/ScopedPrinter.h"
using namespace llvm;
using namespace llvm::pdb;
diff --git a/llvm/tools/llvm-pdbutil/FormatUtil.cpp b/llvm/lib/DebugInfo/PDB/Native/FormatUtil.cpp
index b4837398f1d0..a167d45982a9 100644
--- a/llvm/tools/llvm-pdbutil/FormatUtil.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/FormatUtil.cpp
@@ -6,7 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#include "FormatUtil.h"
+#include "llvm/DebugInfo/PDB/Native/FormatUtil.h"
+
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/BinaryFormat/COFF.h"
@@ -18,58 +19,6 @@ using namespace llvm;
using namespace llvm::codeview;
using namespace llvm::pdb;
-std::string llvm::pdb::truncateStringBack(StringRef S, uint32_t MaxLen) {
- if (MaxLen == 0 || S.size() <= MaxLen || S.size() <= 3)
- return std::string(S);
-
- assert(MaxLen >= 3);
- uint32_t FinalLen = std::min<size_t>(S.size(), MaxLen - 3);
- S = S.take_front(FinalLen);
- return std::string(S) + std::string("...");
-}
-
-std::string llvm::pdb::truncateStringMiddle(StringRef S, uint32_t MaxLen) {
- if (MaxLen == 0 || S.size() <= MaxLen || S.size() <= 3)
- return std::string(S);
-
- assert(MaxLen >= 3);
- uint32_t FinalLen = std::min<size_t>(S.size(), MaxLen - 3);
- StringRef Front = S.take_front(FinalLen / 2);
- StringRef Back = S.take_back(Front.size());
- return std::string(Front) + std::string("...") + std::string(Back);
-}
-
-std::string llvm::pdb::truncateStringFront(StringRef S, uint32_t MaxLen) {
- if (MaxLen == 0 || S.size() <= MaxLen || S.size() <= 3)
- return std::string(S);
-
- assert(MaxLen >= 3);
- S = S.take_back(MaxLen - 3);
- return std::string("...") + std::string(S);
-}
-
-std::string llvm::pdb::truncateQuotedNameFront(StringRef Label, StringRef Name,
- uint32_t MaxLen) {
- uint32_t RequiredExtraChars = Label.size() + 1 + 2;
- if (MaxLen == 0 || RequiredExtraChars + Name.size() <= MaxLen)
- return formatv("{0} \"{1}\"", Label, Name).str();
-
- assert(MaxLen >= RequiredExtraChars);
- std::string TN = truncateStringFront(Name, MaxLen - RequiredExtraChars);
- return formatv("{0} \"{1}\"", Label, TN).str();
-}
-
-std::string llvm::pdb::truncateQuotedNameBack(StringRef Label, StringRef Name,
- uint32_t MaxLen) {
- uint32_t RequiredExtraChars = Label.size() + 1 + 2;
- if (MaxLen == 0 || RequiredExtraChars + Name.size() <= MaxLen)
- return formatv("{0} \"{1}\"", Label, Name).str();
-
- assert(MaxLen >= RequiredExtraChars);
- std::string TN = truncateStringBack(Name, MaxLen - RequiredExtraChars);
- return formatv("{0} \"{1}\"", Label, TN).str();
-}
-
std::string llvm::pdb::typesetItemList(ArrayRef<std::string> Opts,
uint32_t IndentLevel, uint32_t GroupSize,
StringRef Sep) {
diff --git a/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp
index 9084e689d165..262873c6e6ab 100644
--- a/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp
@@ -14,7 +14,7 @@
#include "llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h"
#include "llvm/DebugInfo/CodeView/RecordName.h"
-#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
+#include "llvm/DebugInfo/CodeView/RecordSerialization.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/DebugInfo/CodeView/SymbolSerializer.h"
#include "llvm/DebugInfo/MSF/MSFBuilder.h"
@@ -22,6 +22,7 @@
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
#include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
#include "llvm/DebugInfo/PDB/Native/Hash.h"
+#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
#include "llvm/Support/BinaryItemStream.h"
#include "llvm/Support/BinaryStreamWriter.h"
#include "llvm/Support/Parallel.h"
@@ -196,7 +197,7 @@ void GSIStreamBuilder::finalizeGlobalBuckets(uint32_t RecordZeroOffset) {
void GSIHashStreamBuilder::finalizeBuckets(
uint32_t RecordZeroOffset, MutableArrayRef<BulkPublic> Records) {
// Hash every name in parallel.
- parallelForEachN(0, Records.size(), [&](size_t I) {
+ parallelFor(0, Records.size(), [&](size_t I) {
Records[I].setBucketIdx(hashStringV1(Records[I].Name) % IPHR_HASH);
});
@@ -231,7 +232,7 @@ void GSIHashStreamBuilder::finalizeBuckets(
// bucket can properly early-out when it detects the record won't be found.
// The algorithm used here corresponds to the function
// caseInsensitiveComparePchPchCchCch in the reference implementation.
- parallelForEachN(0, IPHR_HASH, [&](size_t I) {
+ parallelFor(0, IPHR_HASH, [&](size_t I) {
auto B = HashRecords.begin() + BucketStarts[I];
auto E = HashRecords.begin() + BucketCursors[I];
if (B == E)
@@ -286,7 +287,7 @@ GSIStreamBuilder::GSIStreamBuilder(msf::MSFBuilder &Msf)
: Msf(Msf), PSH(std::make_unique<GSIHashStreamBuilder>()),
GSH(std::make_unique<GSIHashStreamBuilder>()) {}
-GSIStreamBuilder::~GSIStreamBuilder() {}
+GSIStreamBuilder::~GSIStreamBuilder() = default;
uint32_t GSIStreamBuilder::calculatePublicsHashStreamSize() const {
uint32_t Size = 0;
diff --git a/llvm/lib/DebugInfo/PDB/Native/GlobalsStream.cpp b/llvm/lib/DebugInfo/PDB/Native/GlobalsStream.cpp
index f27d60f46815..7217fe38be55 100644
--- a/llvm/lib/DebugInfo/PDB/Native/GlobalsStream.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/GlobalsStream.cpp
@@ -21,6 +21,7 @@
#include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
#include "llvm/DebugInfo/CodeView/RecordName.h"
+#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
#include "llvm/DebugInfo/PDB/Native/Hash.h"
#include "llvm/DebugInfo/PDB/Native/RawError.h"
#include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
@@ -141,14 +142,12 @@ readGSIHashBuckets(FixedStreamArray<support::ulittle32_t> &HashBuckets,
return joinErrors(std::move(EC),
make_error<RawError>(raw_error_code::corrupt_file,
"Could not read a bitmap."));
- uint32_t NumBuckets1 = 0;
uint32_t CompressedBucketIdx = 0;
for (uint32_t I = 0; I <= IPHR_HASH; ++I) {
uint8_t WordIdx = I / 32;
uint8_t BitIdx = I % 32;
bool IsSet = HashBitmap[WordIdx] & (1U << BitIdx);
if (IsSet) {
- ++NumBuckets1;
BucketMap[I] = CompressedBucketIdx++;
} else {
BucketMap[I] = -1;
diff --git a/llvm/lib/DebugInfo/PDB/Native/HashTable.cpp b/llvm/lib/DebugInfo/PDB/Native/HashTable.cpp
index dfdcdf1f4eaf..030a59821914 100644
--- a/llvm/lib/DebugInfo/PDB/Native/HashTable.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/HashTable.cpp
@@ -7,14 +7,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/PDB/Native/HashTable.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/DebugInfo/PDB/Native/RawError.h"
#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/BinaryStreamWriter.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/MathExtras.h"
-#include <algorithm>
-#include <cassert>
#include <cstdint>
#include <utility>
diff --git a/llvm/lib/DebugInfo/PDB/Native/InfoStream.cpp b/llvm/lib/DebugInfo/PDB/Native/InfoStream.cpp
index f41bb32d69af..927a0ffee28c 100644
--- a/llvm/lib/DebugInfo/PDB/Native/InfoStream.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/InfoStream.cpp
@@ -7,8 +7,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/DebugInfo/PDB/Native/RawConstants.h"
#include "llvm/DebugInfo/PDB/Native/RawError.h"
#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
@@ -16,7 +14,7 @@
using namespace llvm;
using namespace llvm::codeview;
-using namespace llvm::msf;
+// using namespace llvm::msf;
using namespace llvm::pdb;
InfoStream::InfoStream(std::unique_ptr<BinaryStream> Stream)
diff --git a/llvm/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp
index 42daa7cae799..e8f5a451b08e 100644
--- a/llvm/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp
@@ -10,11 +10,9 @@
#include "llvm/DebugInfo/MSF/MSFBuilder.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
-#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
#include "llvm/DebugInfo/PDB/Native/NamedStreamMap.h"
-#include "llvm/DebugInfo/PDB/Native/PDBFileBuilder.h"
-#include "llvm/DebugInfo/PDB/Native/RawError.h"
#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
+#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/BinaryStreamWriter.h"
using namespace llvm;
diff --git a/llvm/lib/DebugInfo/PDB/Native/InjectedSourceStream.cpp b/llvm/lib/DebugInfo/PDB/Native/InjectedSourceStream.cpp
index 3f4101db7b93..f1e8adeb1b21 100644
--- a/llvm/lib/DebugInfo/PDB/Native/InjectedSourceStream.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/InjectedSourceStream.cpp
@@ -9,7 +9,7 @@
#include "llvm/DebugInfo/PDB/Native/InjectedSourceStream.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
-#include "llvm/DebugInfo/PDB/Native/Hash.h"
+#include "llvm/DebugInfo/PDB/Native/HashTable.h"
#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
#include "llvm/DebugInfo/PDB/Native/RawConstants.h"
#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
diff --git a/llvm/tools/llvm-pdbutil/InputFile.cpp b/llvm/lib/DebugInfo/PDB/Native/InputFile.cpp
index 40b35625b6f8..495b25077737 100644
--- a/llvm/tools/llvm-pdbutil/InputFile.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/InputFile.cpp
@@ -6,16 +6,16 @@
//
//===----------------------------------------------------------------------===//
-#include "InputFile.h"
-
-#include "FormatUtil.h"
-#include "LinePrinter.h"
+#include "llvm/DebugInfo/PDB/Native/InputFile.h"
#include "llvm/BinaryFormat/Magic.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
#include "llvm/DebugInfo/CodeView/StringsAndChecksums.h"
+#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
#include "llvm/DebugInfo/PDB/Native/DbiStream.h"
+#include "llvm/DebugInfo/PDB/Native/FormatUtil.h"
+#include "llvm/DebugInfo/PDB/Native/LinePrinter.h"
#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
@@ -31,14 +31,16 @@ using namespace llvm::codeview;
using namespace llvm::object;
using namespace llvm::pdb;
-InputFile::InputFile() {}
-InputFile::~InputFile() {}
-
-static Expected<ModuleDebugStreamRef>
-getModuleDebugStream(PDBFile &File, StringRef &ModuleName, uint32_t Index) {
- ExitOnError Err("Unexpected error: ");
+InputFile::InputFile() = default;
+InputFile::~InputFile() = default;
- auto &Dbi = Err(File.getPDBDbiStream());
+Expected<ModuleDebugStreamRef>
+llvm::pdb::getModuleDebugStream(PDBFile &File, StringRef &ModuleName,
+ uint32_t Index) {
+ Expected<DbiStream &> DbiOrErr = File.getPDBDbiStream();
+ if (!DbiOrErr)
+ return DbiOrErr.takeError();
+ DbiStream &Dbi = *DbiOrErr;
const auto &Modules = Dbi.modules();
if (Index >= Modules.getModuleCount())
return make_error<RawError>(raw_error_code::index_out_of_bounds,
@@ -63,6 +65,30 @@ getModuleDebugStream(PDBFile &File, StringRef &ModuleName, uint32_t Index) {
return std::move(ModS);
}
+Expected<ModuleDebugStreamRef> llvm::pdb::getModuleDebugStream(PDBFile &File,
+ uint32_t Index) {
+ Expected<DbiStream &> DbiOrErr = File.getPDBDbiStream();
+ if (!DbiOrErr)
+ return DbiOrErr.takeError();
+ DbiStream &Dbi = *DbiOrErr;
+ const auto &Modules = Dbi.modules();
+ auto Modi = Modules.getModuleDescriptor(Index);
+
+ uint16_t ModiStream = Modi.getModuleStreamIndex();
+ if (ModiStream == kInvalidStreamIndex)
+ return make_error<RawError>(raw_error_code::no_stream,
+ "Module stream not present");
+
+ auto ModStreamData = File.createIndexedStream(ModiStream);
+
+ ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData));
+ if (Error Err = ModS.reload())
+ return make_error<RawError>(raw_error_code::corrupt_file,
+ "Invalid module stream");
+
+ return std::move(ModS);
+}
+
static inline bool isCodeViewDebugSubsection(object::SectionRef Section,
StringRef Name,
BinaryStreamReader &Reader) {
@@ -120,7 +146,7 @@ static std::string formatChecksumKind(FileChecksumKind Kind) {
}
template <typename... Args>
-static void formatInternal(LinePrinter &Printer, bool Append, Args &&... args) {
+static void formatInternal(LinePrinter &Printer, bool Append, Args &&...args) {
if (Append)
Printer.format(std::forward<Args>(args)...);
else
@@ -209,6 +235,26 @@ Expected<StringRef> SymbolGroup::getNameFromStringTable(uint32_t Offset) const {
return SC.strings().getString(Offset);
}
+Expected<StringRef> SymbolGroup::getNameFromChecksums(uint32_t Offset) const {
+ StringRef Name;
+ if (!SC.hasChecksums()) {
+ return std::move(Name);
+ }
+
+ auto Iter = SC.checksums().getArray().at(Offset);
+ if (Iter == SC.checksums().getArray().end()) {
+ return std::move(Name);
+ }
+
+ uint32_t FO = Iter->FileNameOffset;
+ auto ExpectedFile = getNameFromStringTable(FO);
+ if (!ExpectedFile) {
+ return std::move(Name);
+ }
+
+ return *ExpectedFile;
+}
+
void SymbolGroup::formatFromFileName(LinePrinter &Printer, StringRef File,
bool Append) const {
auto FC = ChecksumsByFile.find(File);
@@ -479,7 +525,7 @@ SymbolGroupIterator &SymbolGroupIterator::operator++() {
}
void SymbolGroupIterator::scanToNextDebugS() {
- assert(SectionIter.hasValue());
+ assert(SectionIter);
auto End = Value.File->obj().section_end();
auto &Iter = *SectionIter;
assert(!isEnd());
@@ -499,12 +545,43 @@ bool SymbolGroupIterator::isEnd() const {
if (!Value.File)
return true;
if (Value.File->isPdb()) {
- auto &Dbi = cantFail(Value.File->pdb().getPDBDbiStream());
+ DbiStream &Dbi = cantFail(Value.File->pdb().getPDBDbiStream());
uint32_t Count = Dbi.modules().getModuleCount();
assert(Index <= Count);
return Index == Count;
}
- assert(SectionIter.hasValue());
+ assert(SectionIter);
return *SectionIter == Value.File->obj().section_end();
}
+
+static bool isMyCode(const SymbolGroup &Group) {
+ if (Group.getFile().isObj())
+ return true;
+
+ StringRef Name = Group.name();
+ if (Name.startswith("Import:"))
+ return false;
+ if (Name.endswith_insensitive(".dll"))
+ return false;
+ if (Name.equals_insensitive("* linker *"))
+ return false;
+ if (Name.startswith_insensitive("f:\\binaries\\Intermediate\\vctools"))
+ return false;
+ if (Name.startswith_insensitive("f:\\dd\\vctools\\crt"))
+ return false;
+ return true;
+}
+
+bool llvm::pdb::shouldDumpSymbolGroup(uint32_t Idx, const SymbolGroup &Group,
+ const FilterOptions &Filters) {
+ if (Filters.JustMyCode && !isMyCode(Group))
+ return false;
+
+ // If the arg was not specified on the command line, always dump all modules.
+ if (!Filters.DumpModi)
+ return true;
+
+ // Otherwise, only dump if this is the same module specified.
+ return (Filters.DumpModi == Idx);
+}
diff --git a/llvm/tools/llvm-pdbutil/LinePrinter.cpp b/llvm/lib/DebugInfo/PDB/Native/LinePrinter.cpp
index dd6ca5bf41b1..c12fedc23833 100644
--- a/llvm/tools/llvm-pdbutil/LinePrinter.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/LinePrinter.cpp
@@ -6,15 +6,18 @@
//
//===----------------------------------------------------------------------===//
-#include "LinePrinter.h"
-
-#include "llvm-pdbutil.h"
+#include "llvm/DebugInfo/PDB/Native/LinePrinter.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
#include "llvm/DebugInfo/MSF/MSFCommon.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
+#include "llvm/DebugInfo/PDB/IPDBLineNumber.h"
+#include "llvm/DebugInfo/PDB/Native/InputFile.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
#include "llvm/DebugInfo/PDB/UDTLayout.h"
+#include "llvm/Object/COFF.h"
#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/FormatAdapters.h"
@@ -46,25 +49,27 @@ bool IsItemExcluded(llvm::StringRef Item,
return false;
}
-}
+} // namespace
using namespace llvm;
-LinePrinter::LinePrinter(int Indent, bool UseColor, llvm::raw_ostream &Stream)
- : OS(Stream), IndentSpaces(Indent), CurrentIndent(0), UseColor(UseColor) {
- SetFilters(ExcludeTypeFilters, opts::pretty::ExcludeTypes.begin(),
- opts::pretty::ExcludeTypes.end());
- SetFilters(ExcludeSymbolFilters, opts::pretty::ExcludeSymbols.begin(),
- opts::pretty::ExcludeSymbols.end());
- SetFilters(ExcludeCompilandFilters, opts::pretty::ExcludeCompilands.begin(),
- opts::pretty::ExcludeCompilands.end());
-
- SetFilters(IncludeTypeFilters, opts::pretty::IncludeTypes.begin(),
- opts::pretty::IncludeTypes.end());
- SetFilters(IncludeSymbolFilters, opts::pretty::IncludeSymbols.begin(),
- opts::pretty::IncludeSymbols.end());
- SetFilters(IncludeCompilandFilters, opts::pretty::IncludeCompilands.begin(),
- opts::pretty::IncludeCompilands.end());
+LinePrinter::LinePrinter(int Indent, bool UseColor, llvm::raw_ostream &Stream,
+ const FilterOptions &Filters)
+ : OS(Stream), IndentSpaces(Indent), CurrentIndent(0), UseColor(UseColor),
+ Filters(Filters) {
+ SetFilters(ExcludeTypeFilters, Filters.ExcludeTypes.begin(),
+ Filters.ExcludeTypes.end());
+ SetFilters(ExcludeSymbolFilters, Filters.ExcludeSymbols.begin(),
+ Filters.ExcludeSymbols.end());
+ SetFilters(ExcludeCompilandFilters, Filters.ExcludeCompilands.begin(),
+ Filters.ExcludeCompilands.end());
+
+ SetFilters(IncludeTypeFilters, Filters.IncludeTypes.begin(),
+ Filters.IncludeTypes.end());
+ SetFilters(IncludeSymbolFilters, Filters.IncludeSymbols.begin(),
+ Filters.IncludeSymbols.end());
+ SetFilters(IncludeCompilandFilters, Filters.IncludeCompilands.begin(),
+ Filters.IncludeCompilands.end());
}
void LinePrinter::Indent(uint32_t Amount) {
@@ -94,7 +99,7 @@ void LinePrinter::printLine(const Twine &T) {
bool LinePrinter::IsClassExcluded(const ClassLayout &Class) {
if (IsTypeExcluded(Class.getName(), Class.getSize()))
return true;
- if (Class.deepPaddingSize() < opts::pretty::PaddingThreshold)
+ if (Class.deepPaddingSize() < Filters.PaddingThreshold)
return true;
return false;
}
@@ -272,7 +277,7 @@ void LinePrinter::formatMsfStreamBlocks(
bool LinePrinter::IsTypeExcluded(llvm::StringRef TypeName, uint64_t Size) {
if (IsItemExcluded(TypeName, IncludeTypeFilters, ExcludeTypeFilters))
return true;
- if (Size < opts::pretty::SizeThreshold)
+ if (Size < Filters.SizeThreshold)
return true;
return false;
}
diff --git a/llvm/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp b/llvm/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp
index 1445f0bd9e1b..f0e96a7cd659 100644
--- a/llvm/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp
@@ -10,16 +10,17 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
-#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
-#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/DebugInfo/CodeView/SymbolRecordHelpers.h"
+#include "llvm/DebugInfo/MSF/MSFCommon.h"
+#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
#include "llvm/DebugInfo/PDB/Native/RawConstants.h"
#include "llvm/DebugInfo/PDB/Native/RawError.h"
+#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
+#include "llvm/Support/BinaryStreamArray.h"
#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/BinaryStreamRef.h"
#include "llvm/Support/Error.h"
-#include <algorithm>
#include <cstdint>
using namespace llvm;
diff --git a/llvm/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp b/llvm/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp
index 1d873b87b347..500923e57fbb 100644
--- a/llvm/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp
@@ -7,21 +7,19 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/PDB/Native/NamedStreamMap.h"
+#include "llvm/ADT/SparseBitVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/iterator_range.h"
#include "llvm/DebugInfo/PDB/Native/Hash.h"
#include "llvm/DebugInfo/PDB/Native/HashTable.h"
#include "llvm/DebugInfo/PDB/Native/RawError.h"
#include "llvm/Support/BinaryStreamReader.h"
-#include "llvm/Support/BinaryStreamRef.h"
#include "llvm/Support/BinaryStreamWriter.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
-#include <tuple>
using namespace llvm;
using namespace llvm::pdb;
diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp
index 7717f062eac1..d24364312b31 100644
--- a/llvm/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp
@@ -9,8 +9,6 @@
#include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h"
#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
-#include "llvm/ADT/STLExtras.h"
-
namespace llvm {
namespace pdb {
diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeEnumGlobals.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeEnumGlobals.cpp
index 54646867bc5f..b861fc2435b8 100644
--- a/llvm/lib/DebugInfo/PDB/Native/NativeEnumGlobals.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/NativeEnumGlobals.cpp
@@ -8,13 +8,15 @@
#include "llvm/DebugInfo/PDB/Native/NativeEnumGlobals.h"
-#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
+#include "llvm/DebugInfo/CodeView/CVRecord.h"
#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
#include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/SymbolCache.h"
#include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
#include "llvm/DebugInfo/PDB/PDBSymbol.h"
+#include "llvm/DebugInfo/PDB/PDBTypes.h"
using namespace llvm;
using namespace llvm::codeview;
diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp
index 5e6412275063..65e253ed115f 100644
--- a/llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp
@@ -8,9 +8,11 @@
#include "llvm/DebugInfo/PDB/Native/NativeEnumInjectedSources.h"
-#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
+#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
+#include "llvm/DebugInfo/PDB/Native/HashTable.h"
#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
+#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
namespace llvm {
namespace pdb {
diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeEnumLineNumbers.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeEnumLineNumbers.cpp
index 1e4b07646335..b912bf77e579 100644
--- a/llvm/lib/DebugInfo/PDB/Native/NativeEnumLineNumbers.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/NativeEnumLineNumbers.cpp
@@ -8,13 +8,11 @@
#include "llvm/DebugInfo/PDB/Native/NativeEnumLineNumbers.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h"
-#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
#include "llvm/DebugInfo/PDB/Native/NativeLineNumber.h"
-#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
-#include "llvm/DebugInfo/PDB/Native/NativeSourceFile.h"
+
+#include <vector>
using namespace llvm;
using namespace llvm::codeview;
diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp
index c6621924b516..7108b8efff83 100644
--- a/llvm/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp
@@ -8,13 +8,10 @@
#include "llvm/DebugInfo/PDB/Native/NativeEnumModules.h"
-#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
-#include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h"
-#include "llvm/DebugInfo/PDB/Native/NativeExeSymbol.h"
#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+#include "llvm/DebugInfo/PDB/Native/SymbolCache.h"
#include "llvm/DebugInfo/PDB/PDBSymbol.h"
#include "llvm/DebugInfo/PDB/PDBSymbolCompiland.h"
-#include "llvm/DebugInfo/PDB/PDBSymbolExe.h"
namespace llvm {
namespace pdb {
diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeEnumSymbols.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeEnumSymbols.cpp
index feede1dbc958..24fe2244cfc5 100644
--- a/llvm/lib/DebugInfo/PDB/Native/NativeEnumSymbols.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/NativeEnumSymbols.cpp
@@ -8,11 +8,11 @@
#include "llvm/DebugInfo/PDB/Native/NativeEnumSymbols.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
-#include "llvm/DebugInfo/PDB/Native/NativeTypeEnum.h"
+#include "llvm/DebugInfo/PDB/Native/SymbolCache.h"
#include "llvm/DebugInfo/PDB/PDBSymbol.h"
-#include "llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h"
using namespace llvm;
using namespace llvm::codeview;
diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeEnumTypes.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeEnumTypes.cpp
index 2524e10cb6c5..6912b8dc838e 100644
--- a/llvm/lib/DebugInfo/PDB/Native/NativeEnumTypes.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/NativeEnumTypes.cpp
@@ -8,13 +8,16 @@
#include "llvm/DebugInfo/PDB/Native/NativeEnumTypes.h"
-#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/DebugInfo/CodeView/CVRecord.h"
+#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
#include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h"
#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
-#include "llvm/DebugInfo/PDB/Native/NativeTypeEnum.h"
+#include "llvm/DebugInfo/PDB/Native/SymbolCache.h"
#include "llvm/DebugInfo/PDB/PDBSymbol.h"
-#include "llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h"
+#include "llvm/DebugInfo/PDB/PDBTypes.h"
using namespace llvm;
using namespace llvm::codeview;
diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp
index 895f8943157a..ae0f66c31fde 100644
--- a/llvm/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp
@@ -8,14 +8,14 @@
#include "llvm/DebugInfo/PDB/Native/NativeExeSymbol.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
#include "llvm/DebugInfo/PDB/Native/DbiStream.h"
#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
-#include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h"
#include "llvm/DebugInfo/PDB/Native/NativeEnumModules.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
#include "llvm/DebugInfo/PDB/Native/SymbolCache.h"
-#include "llvm/DebugInfo/PDB/PDBSymbolCompiland.h"
using namespace llvm;
using namespace llvm::pdb;
diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeFunctionSymbol.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeFunctionSymbol.cpp
index 7f3b35c297b4..b1caa5add5b3 100644
--- a/llvm/lib/DebugInfo/PDB/Native/NativeFunctionSymbol.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/NativeFunctionSymbol.cpp
@@ -8,11 +8,15 @@
#include "llvm/DebugInfo/PDB/Native/NativeFunctionSymbol.h"
+#include "llvm/DebugInfo/CodeView/CVRecord.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/DebugInfo/PDB/Native/ModuleDebugStream.h"
#include "llvm/DebugInfo/PDB/Native/NativeEnumSymbols.h"
-#include "llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h"
-#include "llvm/DebugInfo/PDB/Native/NativeTypeEnum.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+#include "llvm/DebugInfo/PDB/Native/SymbolCache.h"
+#include "llvm/DebugInfo/PDB/PDBExtras.h"
using namespace llvm;
using namespace llvm::codeview;
@@ -25,7 +29,7 @@ NativeFunctionSymbol::NativeFunctionSymbol(NativeSession &Session,
: NativeRawSymbol(Session, PDB_SymType::Function, Id), Sym(Sym),
RecordOffset(Offset) {}
-NativeFunctionSymbol::~NativeFunctionSymbol() {}
+NativeFunctionSymbol::~NativeFunctionSymbol() = default;
void NativeFunctionSymbol::dump(raw_ostream &OS, int Indent,
PdbSymbolIdField ShowIdFields,
diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeInlineSiteSymbol.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeInlineSiteSymbol.cpp
index 8314353c3890..99ec627fcd26 100644
--- a/llvm/lib/DebugInfo/PDB/Native/NativeInlineSiteSymbol.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/NativeInlineSiteSymbol.cpp
@@ -12,8 +12,14 @@
#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
+#include "llvm/DebugInfo/PDB/Native/ModuleDebugStream.h"
#include "llvm/DebugInfo/PDB/Native/NativeEnumLineNumbers.h"
+#include "llvm/DebugInfo/PDB/Native/NativeLineNumber.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/SymbolCache.h"
#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
+#include "llvm/DebugInfo/PDB/PDBExtras.h"
using namespace llvm;
using namespace llvm::codeview;
@@ -25,7 +31,7 @@ NativeInlineSiteSymbol::NativeInlineSiteSymbol(
: NativeRawSymbol(Session, PDB_SymType::InlineSite, Id), Sym(Sym),
ParentAddr(ParentAddr) {}
-NativeInlineSiteSymbol::~NativeInlineSiteSymbol() {}
+NativeInlineSiteSymbol::~NativeInlineSiteSymbol() = default;
void NativeInlineSiteSymbol::dump(raw_ostream &OS, int Indent,
PdbSymbolIdField ShowIdFields,
@@ -98,29 +104,81 @@ void NativeInlineSiteSymbol::getLineOffset(uint32_t OffsetInFunc,
LineOffset = 0;
FileOffset = 0;
uint32_t CodeOffset = 0;
+ Optional<uint32_t> CodeOffsetBase;
+ Optional<uint32_t> CodeOffsetEnd;
+ Optional<int32_t> CurLineOffset;
+ Optional<int32_t> NextLineOffset;
+ Optional<uint32_t> NextFileOffset;
+ auto UpdateCodeOffset = [&](uint32_t Delta) {
+ if (!CodeOffsetBase)
+ CodeOffsetBase = CodeOffset;
+ else if (!CodeOffsetEnd)
+ CodeOffsetEnd = *CodeOffsetBase + Delta;
+ };
+ auto UpdateLineOffset = [&](int32_t Delta) {
+ LineOffset += Delta;
+ if (!CodeOffsetBase || !CurLineOffset)
+ CurLineOffset = LineOffset;
+ else
+ NextLineOffset = LineOffset;
+ };
+ auto UpdateFileOffset = [&](uint32_t Offset) {
+ if (!CodeOffsetBase)
+ FileOffset = Offset;
+ else
+ NextFileOffset = Offset;
+ };
+ auto ValidateAndReset = [&]() {
+ // Current range is finished. Check if OffsetInFunc is in the range.
+ if (CodeOffsetBase && CodeOffsetEnd && CurLineOffset) {
+ if (CodeOffsetBase <= OffsetInFunc && OffsetInFunc < CodeOffsetEnd) {
+ LineOffset = *CurLineOffset;
+ return true;
+ }
+ // Set base, end, file offset and line offset for next range.
+ if (NextFileOffset)
+ FileOffset = *NextFileOffset;
+ if (NextLineOffset) {
+ CurLineOffset = NextLineOffset;
+ NextLineOffset = None;
+ }
+ CodeOffsetBase = CodeOffsetEnd;
+ CodeOffsetEnd = NextFileOffset = None;
+ }
+ return false;
+ };
for (const auto &Annot : Sym.annotations()) {
switch (Annot.OpCode) {
case BinaryAnnotationsOpCode::CodeOffset:
case BinaryAnnotationsOpCode::ChangeCodeOffset:
- case BinaryAnnotationsOpCode::ChangeCodeLength:
+ case BinaryAnnotationsOpCode::ChangeCodeOffsetBase:
CodeOffset += Annot.U1;
+ UpdateCodeOffset(Annot.U1);
+ break;
+ case BinaryAnnotationsOpCode::ChangeCodeLength:
+ UpdateCodeOffset(Annot.U1);
break;
case BinaryAnnotationsOpCode::ChangeCodeLengthAndCodeOffset:
CodeOffset += Annot.U2;
+ UpdateCodeOffset(Annot.U2);
+ UpdateCodeOffset(Annot.U1);
break;
case BinaryAnnotationsOpCode::ChangeLineOffset:
+ UpdateLineOffset(Annot.S1);
+ break;
case BinaryAnnotationsOpCode::ChangeCodeOffsetAndLineOffset:
CodeOffset += Annot.U1;
- LineOffset += Annot.S1;
+ UpdateCodeOffset(Annot.U1);
+ UpdateLineOffset(Annot.S1);
break;
case BinaryAnnotationsOpCode::ChangeFile:
- FileOffset = Annot.U1;
+ UpdateFileOffset(Annot.U1);
break;
default:
break;
}
- if (CodeOffset >= OffsetInFunc)
+ if (ValidateAndReset())
return;
}
}
diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeLineNumber.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeLineNumber.cpp
index 155ed0cdb828..aa7d6ac6f29d 100644
--- a/llvm/lib/DebugInfo/PDB/Native/NativeLineNumber.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/NativeLineNumber.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/PDB/Native/NativeLineNumber.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
using namespace llvm;
using namespace llvm::pdb;
diff --git a/llvm/lib/DebugInfo/PDB/Native/NativePublicSymbol.cpp b/llvm/lib/DebugInfo/PDB/Native/NativePublicSymbol.cpp
index 1265e688b867..339af6108009 100644
--- a/llvm/lib/DebugInfo/PDB/Native/NativePublicSymbol.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/NativePublicSymbol.cpp
@@ -9,8 +9,7 @@
#include "llvm/DebugInfo/PDB/Native/NativePublicSymbol.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
-#include "llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h"
-#include "llvm/DebugInfo/PDB/Native/NativeTypeEnum.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
using namespace llvm;
using namespace llvm::codeview;
@@ -20,7 +19,7 @@ NativePublicSymbol::NativePublicSymbol(NativeSession &Session, SymIndexId Id,
const codeview::PublicSym32 &Sym)
: NativeRawSymbol(Session, PDB_SymType::PublicSymbol, Id), Sym(Sym) {}
-NativePublicSymbol::~NativePublicSymbol() {}
+NativePublicSymbol::~NativePublicSymbol() = default;
void NativePublicSymbol::dump(raw_ostream &OS, int Indent,
PdbSymbolIdField ShowIdFields,
diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp
index 2ad552470b61..89f9f9836fec 100644
--- a/llvm/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp
@@ -10,7 +10,6 @@
#include "llvm/DebugInfo/PDB/IPDBLineNumber.h"
#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h"
-#include "llvm/Support/FormatVariadic.h"
using namespace llvm;
using namespace llvm::pdb;
diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp
index 7212a0e65035..cf314c3bede3 100644
--- a/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp
@@ -8,31 +8,33 @@
#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include "llvm/BinaryFormat/Magic.h"
+#include "llvm/DebugInfo/MSF/MSFCommon.h"
+#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
#include "llvm/DebugInfo/PDB/IPDBSourceFile.h"
+#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
+#include "llvm/DebugInfo/PDB/Native/DbiModuleList.h"
#include "llvm/DebugInfo/PDB/Native/DbiStream.h"
#include "llvm/DebugInfo/PDB/Native/ISectionContribVisitor.h"
-#include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h"
+#include "llvm/DebugInfo/PDB/Native/ModuleDebugStream.h"
#include "llvm/DebugInfo/PDB/Native/NativeEnumInjectedSources.h"
-#include "llvm/DebugInfo/PDB/Native/NativeEnumTypes.h"
#include "llvm/DebugInfo/PDB/Native/NativeExeSymbol.h"
-#include "llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h"
-#include "llvm/DebugInfo/PDB/Native/NativeTypeEnum.h"
#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/RawConstants.h"
#include "llvm/DebugInfo/PDB/Native/RawError.h"
+#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
#include "llvm/DebugInfo/PDB/Native/SymbolCache.h"
-#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
#include "llvm/DebugInfo/PDB/PDBSymbolCompiland.h"
#include "llvm/DebugInfo/PDB/PDBSymbolExe.h"
-#include "llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h"
+#include "llvm/Object/Binary.h"
#include "llvm/Object/COFF.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/BinaryByteStream.h"
+#include "llvm/Support/BinaryStreamArray.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorOr.h"
-#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
@@ -45,6 +47,12 @@ using namespace llvm;
using namespace llvm::msf;
using namespace llvm::pdb;
+namespace llvm {
+namespace codeview {
+union DebugInfo;
+}
+} // namespace llvm
+
static DbiStream *getDbiStreamPtr(PDBFile &File) {
Expected<DbiStream &> DbiS = File.getPDBDbiStream();
if (DbiS)
diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeSourceFile.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeSourceFile.cpp
index fd813dee6b9f..8d6f8ebebf4c 100644
--- a/llvm/lib/DebugInfo/PDB/Native/NativeSourceFile.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/NativeSourceFile.cpp
@@ -8,6 +8,8 @@
#include "llvm/DebugInfo/PDB/Native/NativeSourceFile.h"
#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
using namespace llvm;
using namespace llvm::pdb;
diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeSymbolEnumerator.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeSymbolEnumerator.cpp
index e5f1dcaf801e..a6e8cbf71548 100644
--- a/llvm/lib/DebugInfo/PDB/Native/NativeSymbolEnumerator.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/NativeSymbolEnumerator.cpp
@@ -8,7 +8,7 @@
#include "llvm/DebugInfo/PDB/Native/NativeSymbolEnumerator.h"
-#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
#include "llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h"
#include "llvm/DebugInfo/PDB/Native/NativeTypeEnum.h"
@@ -22,7 +22,7 @@ NativeSymbolEnumerator::NativeSymbolEnumerator(
: NativeRawSymbol(Session, PDB_SymType::Data, Id), Parent(Parent),
Record(std::move(Record)) {}
-NativeSymbolEnumerator::~NativeSymbolEnumerator() {}
+NativeSymbolEnumerator::~NativeSymbolEnumerator() = default;
void NativeSymbolEnumerator::dump(raw_ostream &OS, int Indent,
PdbSymbolIdField ShowIdFields,
diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeTypeArray.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeTypeArray.cpp
index 63ac9fae0e87..e98f357ac485 100644
--- a/llvm/lib/DebugInfo/PDB/Native/NativeTypeArray.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/NativeTypeArray.cpp
@@ -8,9 +8,10 @@
#include "llvm/DebugInfo/PDB/Native/NativeTypeArray.h"
-#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
-#include "llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h"
-#include "llvm/DebugInfo/PDB/Native/NativeTypeEnum.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+#include "llvm/DebugInfo/PDB/Native/SymbolCache.h"
+#include "llvm/DebugInfo/PDB/PDBExtras.h"
using namespace llvm;
using namespace llvm::codeview;
@@ -21,7 +22,7 @@ NativeTypeArray::NativeTypeArray(NativeSession &Session, SymIndexId Id,
codeview::ArrayRecord Record)
: NativeRawSymbol(Session, PDB_SymType::ArrayType, Id), Record(Record),
Index(TI) {}
-NativeTypeArray::~NativeTypeArray() {}
+NativeTypeArray::~NativeTypeArray() = default;
void NativeTypeArray::dump(raw_ostream &OS, int Indent,
PdbSymbolIdField ShowIdFields,
diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeTypeBuiltin.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeTypeBuiltin.cpp
index a08663aa91ba..80f892c7b118 100644
--- a/llvm/lib/DebugInfo/PDB/Native/NativeTypeBuiltin.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/NativeTypeBuiltin.cpp
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h"
-#include "llvm/Support/FormatVariadic.h"
using namespace llvm;
using namespace llvm::codeview;
@@ -19,7 +18,7 @@ NativeTypeBuiltin::NativeTypeBuiltin(NativeSession &PDBSession, SymIndexId Id,
: NativeRawSymbol(PDBSession, PDB_SymType::BuiltinType, Id),
Session(PDBSession), Mods(Mods), Type(T), Length(L) {}
-NativeTypeBuiltin::~NativeTypeBuiltin() {}
+NativeTypeBuiltin::~NativeTypeBuiltin() = default;
void NativeTypeBuiltin::dump(raw_ostream &OS, int Indent,
PdbSymbolIdField ShowIdFields,
diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp
index aaec3a5e7c60..ec37d276e66b 100644
--- a/llvm/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp
@@ -9,8 +9,9 @@
#include "llvm/DebugInfo/PDB/Native/NativeTypeEnum.h"
#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
+#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
-#include "llvm/DebugInfo/PDB/Native/NativeEnumTypes.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
#include "llvm/DebugInfo/PDB/Native/NativeSymbolEnumerator.h"
#include "llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h"
#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
@@ -18,8 +19,6 @@
#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h"
-#include "llvm/Support/FormatVariadic.h"
-
#include <cassert>
using namespace llvm;
@@ -68,10 +67,13 @@ NativeEnumEnumEnumerators::NativeEnumEnumEnumerators(
ContinuationIndex = ClassParent.getEnumRecord().FieldList;
while (ContinuationIndex) {
- CVType FieldList = Types.getType(*ContinuationIndex);
- assert(FieldList.kind() == LF_FIELDLIST);
+ CVType FieldListCVT = Types.getType(*ContinuationIndex);
+ assert(FieldListCVT.kind() == LF_FIELDLIST);
ContinuationIndex.reset();
- cantFail(visitMemberRecordStream(FieldList.data(), *this));
+ FieldListRecord FieldList;
+ cantFail(TypeDeserializer::deserializeAs<FieldListRecord>(FieldListCVT,
+ FieldList));
+ cantFail(visitMemberRecordStream(FieldList.Data, *this));
}
}
@@ -123,7 +125,7 @@ NativeTypeEnum::NativeTypeEnum(NativeSession &Session, SymIndexId Id,
: NativeRawSymbol(Session, PDB_SymType::Enum, Id),
UnmodifiedType(&UnmodifiedType), Modifiers(std::move(Modifier)) {}
-NativeTypeEnum::~NativeTypeEnum() {}
+NativeTypeEnum::~NativeTypeEnum() = default;
void NativeTypeEnum::dump(raw_ostream &OS, int Indent,
PdbSymbolIdField ShowIdFields,
@@ -138,7 +140,7 @@ void NativeTypeEnum::dump(raw_ostream &OS, int Indent,
dumpSymbolField(OS, "name", getName(), Indent);
dumpSymbolIdField(OS, "typeId", getTypeId(), Indent, Session,
PdbSymbolIdField::Type, ShowIdFields, RecurseIdFields);
- if (Modifiers.hasValue())
+ if (Modifiers)
dumpSymbolIdField(OS, "unmodifiedTypeId", getUnmodifiedTypeId(), Indent,
Session, PdbSymbolIdField::UnmodifiedType, ShowIdFields,
RecurseIdFields);
@@ -206,6 +208,8 @@ PDB_BuiltinType NativeTypeEnum::getBuiltinType() const {
return PDB_BuiltinType::Char16;
case SimpleTypeKind::Character32:
return PDB_BuiltinType::Char32;
+ case SimpleTypeKind::Character8:
+ return PDB_BuiltinType::Char8;
case SimpleTypeKind::Int128:
case SimpleTypeKind::Int128Oct:
case SimpleTypeKind::Int16:
diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeTypeFunctionSig.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeTypeFunctionSig.cpp
index f98a4c3043eb..7db3f1c63128 100644
--- a/llvm/lib/DebugInfo/PDB/Native/NativeTypeFunctionSig.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/NativeTypeFunctionSig.cpp
@@ -10,9 +10,10 @@
#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
#include "llvm/DebugInfo/PDB/Native/NativeEnumTypes.h"
-#include "llvm/DebugInfo/PDB/PDBExtras.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
+#include "llvm/DebugInfo/PDB/PDBExtras.h"
using namespace llvm;
using namespace llvm::codeview;
@@ -96,7 +97,7 @@ void NativeTypeFunctionSig::initialize() {
}
}
-NativeTypeFunctionSig::~NativeTypeFunctionSig() {}
+NativeTypeFunctionSig::~NativeTypeFunctionSig() = default;
void NativeTypeFunctionSig::initializeArgList(codeview::TypeIndex ArgListTI) {
TpiStream &Tpi = cantFail(Session.getPDBFile().getPDBTpiStream());
diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeTypePointer.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeTypePointer.cpp
index 32dcfc235954..14b903ccef5a 100644
--- a/llvm/lib/DebugInfo/PDB/Native/NativeTypePointer.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/NativeTypePointer.cpp
@@ -7,8 +7,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/PDB/Native/NativeTypePointer.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
-#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include <cassert>
@@ -29,7 +30,7 @@ NativeTypePointer::NativeTypePointer(NativeSession &Session, SymIndexId Id,
: NativeRawSymbol(Session, PDB_SymType::PointerType, Id), TI(TI),
Record(std::move(Record)) {}
-NativeTypePointer::~NativeTypePointer() {}
+NativeTypePointer::~NativeTypePointer() = default;
void NativeTypePointer::dump(raw_ostream &OS, int Indent,
PdbSymbolIdField ShowIdFields,
diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeTypeTypedef.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeTypeTypedef.cpp
index 72964a9e0d4d..11cd349b72ca 100644
--- a/llvm/lib/DebugInfo/PDB/Native/NativeTypeTypedef.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/NativeTypeTypedef.cpp
@@ -1,4 +1,6 @@
#include "llvm/DebugInfo/PDB/Native/NativeTypeTypedef.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+#include "llvm/DebugInfo/PDB/PDBExtras.h"
using namespace llvm;
using namespace llvm::codeview;
@@ -9,7 +11,7 @@ NativeTypeTypedef::NativeTypeTypedef(NativeSession &Session, SymIndexId Id,
: NativeRawSymbol(Session, PDB_SymType::Typedef, Id),
Record(std::move(Typedef)) {}
-NativeTypeTypedef::~NativeTypeTypedef() {}
+NativeTypeTypedef::~NativeTypeTypedef() = default;
void NativeTypeTypedef::dump(raw_ostream &OS, int Indent,
PdbSymbolIdField ShowIdFields,
diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeTypeUDT.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeTypeUDT.cpp
index 917ec14e58d6..b708fb644e7a 100644
--- a/llvm/lib/DebugInfo/PDB/Native/NativeTypeUDT.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/NativeTypeUDT.cpp
@@ -7,10 +7,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/PDB/Native/NativeTypeUDT.h"
-
-#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
-
-#include <cassert>
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+#include "llvm/DebugInfo/PDB/Native/SymbolCache.h"
+#include "llvm/DebugInfo/PDB/PDBExtras.h"
using namespace llvm;
using namespace llvm::codeview;
@@ -32,7 +33,7 @@ NativeTypeUDT::NativeTypeUDT(NativeSession &Session, SymIndexId Id,
: NativeRawSymbol(Session, PDB_SymType::UDT, Id),
UnmodifiedType(&UnmodifiedType), Modifiers(std::move(Modifier)) {}
-NativeTypeUDT::~NativeTypeUDT() {}
+NativeTypeUDT::~NativeTypeUDT() = default;
void NativeTypeUDT::dump(raw_ostream &OS, int Indent,
PdbSymbolIdField ShowIdFields,
@@ -44,7 +45,7 @@ void NativeTypeUDT::dump(raw_ostream &OS, int Indent,
dumpSymbolIdField(OS, "lexicalParentId", 0, Indent, Session,
PdbSymbolIdField::LexicalParent, ShowIdFields,
RecurseIdFields);
- if (Modifiers.hasValue())
+ if (Modifiers)
dumpSymbolIdField(OS, "unmodifiedTypeId", getUnmodifiedTypeId(), Indent,
Session, PdbSymbolIdField::UnmodifiedType, ShowIdFields,
RecurseIdFields);
diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeTypeVTShape.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeTypeVTShape.cpp
index 837fe19ec88c..63bb3f046e23 100644
--- a/llvm/lib/DebugInfo/PDB/Native/NativeTypeVTShape.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/NativeTypeVTShape.cpp
@@ -1,4 +1,7 @@
#include "llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h"
+#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+#include "llvm/DebugInfo/PDB/PDBExtras.h"
using namespace llvm;
using namespace llvm::pdb;
@@ -10,7 +13,7 @@ NativeTypeVTShape::NativeTypeVTShape(NativeSession &Session, SymIndexId Id,
: NativeRawSymbol(Session, PDB_SymType::VTableShape, Id), TI(TI),
Record(std::move(SR)) {}
-NativeTypeVTShape::~NativeTypeVTShape() {}
+NativeTypeVTShape::~NativeTypeVTShape() = default;
void NativeTypeVTShape::dump(raw_ostream &OS, int Indent,
PdbSymbolIdField ShowIdFields,
diff --git a/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp b/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp
index 5c61530c470d..471d183a5f53 100644
--- a/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp
@@ -8,7 +8,6 @@
#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/DebugInfo/MSF/MSFCommon.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
#include "llvm/DebugInfo/PDB/Native/DbiStream.h"
diff --git a/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
index f33125474e3a..641043a8e186 100644
--- a/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
@@ -7,34 +7,41 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/PDB/Native/PDBFileBuilder.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/GUID.h"
#include "llvm/DebugInfo/MSF/MSFBuilder.h"
-#include "llvm/DebugInfo/PDB/Native/DbiStream.h"
+#include "llvm/DebugInfo/MSF/MSFCommon.h"
+#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
#include "llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h"
#include "llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h"
-#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
#include "llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h"
#include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h"
+#include "llvm/DebugInfo/PDB/Native/RawConstants.h"
#include "llvm/DebugInfo/PDB/Native/RawError.h"
-#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
+#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
#include "llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h"
-#include "llvm/Support/BinaryStream.h"
#include "llvm/Support/BinaryStreamWriter.h"
#include "llvm/Support/CRC.h"
-#include "llvm/Support/Chrono.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/xxhash.h"
+#include <ctime>
+
using namespace llvm;
using namespace llvm::codeview;
using namespace llvm::msf;
using namespace llvm::pdb;
using namespace llvm::support;
+namespace llvm {
+class WritableBinaryStream;
+}
+
PDBFileBuilder::PDBFileBuilder(BumpPtrAllocator &Allocator)
: Allocator(Allocator), InjectedSourceHashTraits(Strings),
InjectedSourceTable(2) {}
-PDBFileBuilder::~PDBFileBuilder() {}
+PDBFileBuilder::~PDBFileBuilder() = default;
Error PDBFileBuilder::initialize(uint32_t BlockSize) {
auto ExpectedMsf = MSFBuilder::create(Allocator, BlockSize);
@@ -348,7 +355,7 @@ Error PDBFileBuilder::commit(StringRef Filename, codeview::GUID *Guid) {
H->Age = Info->getAge();
H->Guid = Info->getGuid();
Optional<uint32_t> Sig = Info->getSignature();
- H->Signature = Sig.hasValue() ? *Sig : time(nullptr);
+ H->Signature = Sig ? *Sig : time(nullptr);
}
return Buffer.commit();
diff --git a/llvm/lib/DebugInfo/PDB/Native/PDBStringTable.cpp b/llvm/lib/DebugInfo/PDB/Native/PDBStringTable.cpp
index 2be1656e06bb..5bd12f50f1d7 100644
--- a/llvm/lib/DebugInfo/PDB/Native/PDBStringTable.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/PDBStringTable.cpp
@@ -8,7 +8,6 @@
#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
-#include "llvm/ADT/ArrayRef.h"
#include "llvm/DebugInfo/PDB/Native/Hash.h"
#include "llvm/DebugInfo/PDB/Native/RawError.h"
#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
diff --git a/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp
index f7f36901e4d4..45a5bdb48f01 100644
--- a/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp
@@ -71,7 +71,7 @@ static uint32_t computeBucketCount(uint32_t NumStrings) {
// This list contains all StringCount, BucketCount pairs where BucketCount was
// just incremented. It ends before the first BucketCount entry where
// BucketCount * 3 would overflow a 32-bit unsigned int.
- static std::map<uint32_t, uint32_t> StringsToBuckets = {
+ static const std::pair<uint32_t, uint32_t> StringsToBuckets[] = {
{0, 1},
{1, 2},
{2, 4},
@@ -124,8 +124,9 @@ static uint32_t computeBucketCount(uint32_t NumStrings) {
{517197275, 1034394550},
{775795913, 1551591826},
{1163693870, 2327387740}};
- auto Entry = StringsToBuckets.lower_bound(NumStrings);
- assert(Entry != StringsToBuckets.end());
+ const auto *Entry = llvm::lower_bound(
+ StringsToBuckets, std::make_pair(NumStrings, 0U), llvm::less_first());
+ assert(Entry != std::end(StringsToBuckets));
return Entry->second;
}
diff --git a/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp b/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp
index a33bf03bf8fb..c7b9f443da5e 100644
--- a/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp
@@ -22,14 +22,12 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/PDB/Native/PublicsStream.h"
-#include "llvm/ADT/iterator_range.h"
-#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
#include "llvm/DebugInfo/PDB/Native/RawError.h"
+#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
-#include <algorithm>
#include <cstdint>
using namespace llvm;
diff --git a/llvm/lib/DebugInfo/PDB/Native/SymbolCache.cpp b/llvm/lib/DebugInfo/PDB/Native/SymbolCache.cpp
index f9e67014477e..f89f09aa3399 100644
--- a/llvm/lib/DebugInfo/PDB/Native/SymbolCache.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/SymbolCache.cpp
@@ -1,20 +1,25 @@
#include "llvm/DebugInfo/PDB/Native/SymbolCache.h"
-#include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
#include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
+#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
+#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
#include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h"
+#include "llvm/DebugInfo/PDB/IPDBSourceFile.h"
+#include "llvm/DebugInfo/PDB/Native/DbiModuleList.h"
#include "llvm/DebugInfo/PDB/Native/DbiStream.h"
-#include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
-#include "llvm/DebugInfo/PDB/Native/ISectionContribVisitor.h"
+#include "llvm/DebugInfo/PDB/Native/ModuleDebugStream.h"
#include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h"
#include "llvm/DebugInfo/PDB/Native/NativeEnumGlobals.h"
#include "llvm/DebugInfo/PDB/Native/NativeEnumLineNumbers.h"
-#include "llvm/DebugInfo/PDB/Native/NativeEnumSymbols.h"
#include "llvm/DebugInfo/PDB/Native/NativeEnumTypes.h"
#include "llvm/DebugInfo/PDB/Native/NativeFunctionSymbol.h"
#include "llvm/DebugInfo/PDB/Native/NativeInlineSiteSymbol.h"
+#include "llvm/DebugInfo/PDB/Native/NativeLineNumber.h"
#include "llvm/DebugInfo/PDB/Native/NativePublicSymbol.h"
#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
@@ -32,7 +37,6 @@
#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
#include "llvm/DebugInfo/PDB/PDBSymbol.h"
#include "llvm/DebugInfo/PDB/PDBSymbolCompiland.h"
-#include "llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h"
using namespace llvm;
using namespace llvm::codeview;
@@ -60,6 +64,7 @@ static const struct BuiltinTypeEntry {
{codeview::SimpleTypeKind::WideCharacter, PDB_BuiltinType::WCharT, 2},
{codeview::SimpleTypeKind::Character16, PDB_BuiltinType::Char16, 2},
{codeview::SimpleTypeKind::Character32, PDB_BuiltinType::Char32, 4},
+ {codeview::SimpleTypeKind::Character8, PDB_BuiltinType::Char8, 1},
{codeview::SimpleTypeKind::SignedCharacter, PDB_BuiltinType::Char, 1},
{codeview::SimpleTypeKind::UnsignedCharacter, PDB_BuiltinType::UInt, 1},
{codeview::SimpleTypeKind::Float32, PDB_BuiltinType::Float, 4},
diff --git a/llvm/lib/DebugInfo/PDB/Native/SymbolStream.cpp b/llvm/lib/DebugInfo/PDB/Native/SymbolStream.cpp
index 003840b6e67e..5802d1c77527 100644
--- a/llvm/lib/DebugInfo/PDB/Native/SymbolStream.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/SymbolStream.cpp
@@ -8,10 +8,7 @@
#include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
-#include "llvm/DebugInfo/CodeView/CodeView.h"
-#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
-#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/Endian.h"
using namespace llvm;
@@ -22,7 +19,7 @@ using namespace llvm::pdb;
SymbolStream::SymbolStream(std::unique_ptr<MappedBlockStream> Stream)
: Stream(std::move(Stream)) {}
-SymbolStream::~SymbolStream() {}
+SymbolStream::~SymbolStream() = default;
Error SymbolStream::reload() {
BinaryStreamReader Reader(*Stream);
diff --git a/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp
index 5f4f497690b6..986e45e050c7 100644
--- a/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp
@@ -9,17 +9,13 @@
#include "llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/DebugInfo/CodeView/RecordSerialization.h"
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
-#include "llvm/DebugInfo/CodeView/TypeRecord.h"
#include "llvm/DebugInfo/MSF/MSFBuilder.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
-#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
-#include "llvm/DebugInfo/PDB/Native/RawError.h"
#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/BinaryByteStream.h"
-#include "llvm/Support/BinaryStreamArray.h"
-#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/BinaryStreamWriter.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
diff --git a/llvm/lib/DebugInfo/PDB/PDB.cpp b/llvm/lib/DebugInfo/PDB/PDB.cpp
index e5b7731f6f4a..d106ba8fefc1 100644
--- a/llvm/lib/DebugInfo/PDB/PDB.cpp
+++ b/llvm/lib/DebugInfo/PDB/PDB.cpp
@@ -15,7 +15,6 @@
#endif
#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
#include "llvm/Support/Error.h"
-#include "llvm/Support/MemoryBuffer.h"
using namespace llvm;
using namespace llvm::pdb;
diff --git a/llvm/lib/DebugInfo/PDB/PDBContext.cpp b/llvm/lib/DebugInfo/PDB/PDBContext.cpp
index 0ebb70e010d5..e600fb7385f1 100644
--- a/llvm/lib/DebugInfo/PDB/PDBContext.cpp
+++ b/llvm/lib/DebugInfo/PDB/PDBContext.cpp
@@ -14,6 +14,8 @@
#include "llvm/DebugInfo/PDB/PDBSymbolData.h"
#include "llvm/DebugInfo/PDB/PDBSymbolFunc.h"
#include "llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h"
+#include "llvm/DebugInfo/PDB/PDBTypes.h"
#include "llvm/Object/COFF.h"
using namespace llvm;
@@ -62,6 +64,13 @@ DILineInfo PDBContext::getLineInfoForAddress(object::SectionedAddress Address,
return Result;
}
+DILineInfo
+PDBContext::getLineInfoForDataAddress(object::SectionedAddress Address) {
+ // Unimplemented. S_GDATA and S_LDATA in CodeView (used to describe global
+ // variables) aren't capable of carrying line information.
+ return DILineInfo();
+}
+
DILineInfoTable
PDBContext::getLineInfoForAddressRange(object::SectionedAddress Address,
uint64_t Size,
diff --git a/llvm/lib/DebugInfo/PDB/PDBExtras.cpp b/llvm/lib/DebugInfo/PDB/PDBExtras.cpp
index a6d7ca0da7a9..571510e6bad9 100644
--- a/llvm/lib/DebugInfo/PDB/PDBExtras.cpp
+++ b/llvm/lib/DebugInfo/PDB/PDBExtras.cpp
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/PDB/PDBExtras.h"
-#include "llvm/ADT/ArrayRef.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -64,6 +63,7 @@ raw_ostream &llvm::pdb::operator<<(raw_ostream &OS,
CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, HResult, OS)
CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, Char16, OS)
CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, Char32, OS)
+ CASE_OUTPUT_ENUM_CLASS_NAME(PDB_BuiltinType, Char8, OS)
}
return OS;
}
diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbol.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbol.cpp
index d6bc7ee9c951..4eb5af9bd292 100644
--- a/llvm/lib/DebugInfo/PDB/PDBSymbol.cpp
+++ b/llvm/lib/DebugInfo/PDB/PDBSymbol.cpp
@@ -8,6 +8,7 @@
#include "llvm/DebugInfo/PDB/PDBSymbol.h"
#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
+#include "llvm/DebugInfo/PDB/IPDBLineNumber.h"
#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h"
#include "llvm/DebugInfo/PDB/IPDBSession.h"
#include "llvm/DebugInfo/PDB/PDBExtras.h"
@@ -43,7 +44,6 @@
#include "llvm/DebugInfo/PDB/PDBSymbolUnknown.h"
#include "llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h"
#include "llvm/DebugInfo/PDB/PDBTypes.h"
-#include <algorithm>
#include <memory>
using namespace llvm;
diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp
index 0fa83efb7ae0..089f4de0f422 100644
--- a/llvm/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp
+++ b/llvm/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp
@@ -10,8 +10,6 @@
#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
-#include <utility>
-
using namespace llvm;
using namespace llvm::pdb;
diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolBlock.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolBlock.cpp
index 9452282a8817..49ee4937521b 100644
--- a/llvm/lib/DebugInfo/PDB/PDBSymbolBlock.cpp
+++ b/llvm/lib/DebugInfo/PDB/PDBSymbolBlock.cpp
@@ -9,9 +9,6 @@
#include "llvm/DebugInfo/PDB/PDBSymbolBlock.h"
#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
-#include "llvm/DebugInfo/PDB/PDBSymbol.h"
-
-#include <utility>
using namespace llvm;
using namespace llvm::pdb;
diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp
index 529100b23ba5..bd60489b6bed 100644
--- a/llvm/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp
+++ b/llvm/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp
@@ -9,10 +9,11 @@
#include "llvm/DebugInfo/PDB/IPDBSession.h"
#include "llvm/DebugInfo/PDB/IPDBSourceFile.h"
+#include "llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h"
+#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
#include "llvm/DebugInfo/PDB/PDBSymbolCompiland.h"
#include "llvm/DebugInfo/PDB/PDBSymbolCompilandDetails.h"
#include "llvm/DebugInfo/PDB/PDBSymbolCompilandEnv.h"
-#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/Path.h"
diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp
index 0d86dfe1e632..f775ac949cd8 100644
--- a/llvm/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp
+++ b/llvm/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp
@@ -9,9 +9,6 @@
#include "llvm/DebugInfo/PDB/PDBSymbolCompilandDetails.h"
#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
-#include "llvm/DebugInfo/PDB/PDBSymbol.h"
-
-#include <utility>
using namespace llvm;
using namespace llvm::pdb;
diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp
index 61f119405fd9..2c2ed59c1726 100644
--- a/llvm/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp
+++ b/llvm/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp
@@ -10,9 +10,7 @@
#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h"
#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
-#include "llvm/DebugInfo/PDB/PDBSymbol.h"
-
-#include <utility>
+#include "llvm/DebugInfo/PDB/PDBTypes.h"
using namespace llvm;
using namespace llvm::pdb;
diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolCustom.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolCustom.cpp
index 6c9a4aa76c3d..405b07c2b689 100644
--- a/llvm/lib/DebugInfo/PDB/PDBSymbolCustom.cpp
+++ b/llvm/lib/DebugInfo/PDB/PDBSymbolCustom.cpp
@@ -10,9 +10,6 @@
#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h"
#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
-#include "llvm/DebugInfo/PDB/PDBSymbol.h"
-
-#include <utility>
using namespace llvm;
using namespace llvm::pdb;
diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolData.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolData.cpp
index d2b82111ccd5..c604b5cd3a6a 100644
--- a/llvm/lib/DebugInfo/PDB/PDBSymbolData.cpp
+++ b/llvm/lib/DebugInfo/PDB/PDBSymbolData.cpp
@@ -7,12 +7,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/PDB/PDBSymbolData.h"
+#include "llvm/DebugInfo/PDB/IPDBLineNumber.h"
#include "llvm/DebugInfo/PDB/IPDBSectionContrib.h"
#include "llvm/DebugInfo/PDB/IPDBSession.h"
#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
-#include <utility>
-
using namespace llvm;
using namespace llvm::pdb;
diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolExe.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolExe.cpp
index c85756c43e47..3887c23b18ef 100644
--- a/llvm/lib/DebugInfo/PDB/PDBSymbolExe.cpp
+++ b/llvm/lib/DebugInfo/PDB/PDBSymbolExe.cpp
@@ -8,10 +8,10 @@
#include "llvm/DebugInfo/PDB/PDBSymbolExe.h"
+#include "llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h"
#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypePointer.h"
-
-#include <utility>
+#include "llvm/DebugInfo/PDB/PDBTypes.h"
using namespace llvm;
using namespace llvm::pdb;
diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolFunc.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolFunc.cpp
index cb0329bc0ed7..59d57e83fc10 100644
--- a/llvm/lib/DebugInfo/PDB/PDBSymbolFunc.cpp
+++ b/llvm/lib/DebugInfo/PDB/PDBSymbolFunc.cpp
@@ -10,7 +10,9 @@
#include "llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h"
#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
+#include "llvm/DebugInfo/PDB/IPDBLineNumber.h"
#include "llvm/DebugInfo/PDB/IPDBSession.h"
+#include "llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h"
#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
#include "llvm/DebugInfo/PDB/PDBSymbolData.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h"
diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp
index 66433dc17b49..5c72e3f62121 100644
--- a/llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp
+++ b/llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp
@@ -9,9 +9,6 @@
#include "llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h"
#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
-#include "llvm/DebugInfo/PDB/PDBSymbol.h"
-
-#include <utility>
using namespace llvm;
using namespace llvm::pdb;
diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp
index fe32c93c0121..fd537a9eeea4 100644
--- a/llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp
+++ b/llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp
@@ -8,10 +8,8 @@
#include "llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h"
+#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
-#include "llvm/DebugInfo/PDB/PDBSymbol.h"
-
-#include <utility>
using namespace llvm;
using namespace llvm::pdb;
diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolLabel.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolLabel.cpp
index 1fffe69a0c83..896719a6a8e2 100644
--- a/llvm/lib/DebugInfo/PDB/PDBSymbolLabel.cpp
+++ b/llvm/lib/DebugInfo/PDB/PDBSymbolLabel.cpp
@@ -10,8 +10,6 @@
#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
-#include <utility>
-
using namespace llvm;
using namespace llvm::pdb;
diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp
index 08697683f641..a00b1be40e18 100644
--- a/llvm/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp
+++ b/llvm/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp
@@ -8,10 +8,8 @@
#include "llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h"
+#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
-#include "llvm/DebugInfo/PDB/PDBSymbol.h"
-
-#include <utility>
using namespace llvm;
using namespace llvm::pdb;
diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolThunk.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolThunk.cpp
index 6483858183e5..42502a55ef76 100644
--- a/llvm/lib/DebugInfo/PDB/PDBSymbolThunk.cpp
+++ b/llvm/lib/DebugInfo/PDB/PDBSymbolThunk.cpp
@@ -10,8 +10,6 @@
#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
-#include <utility>
-
using namespace llvm;
using namespace llvm::pdb;
diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp
index a0d521abe43f..bb4eb43f22e5 100644
--- a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp
+++ b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp
@@ -10,8 +10,6 @@
#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
-#include <utility>
-
using namespace llvm;
using namespace llvm::pdb;
diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp
index 08467059b5e1..539c3547a4b0 100644
--- a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp
+++ b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp
@@ -8,10 +8,8 @@
#include "llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h"
+#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
-#include "llvm/DebugInfo/PDB/PDBSymbol.h"
-
-#include <utility>
using namespace llvm;
using namespace llvm::pdb;
diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp
index a0dd9ef601c0..eca2a09c1f77 100644
--- a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp
+++ b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp
@@ -10,8 +10,6 @@
#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
-#include <utility>
-
using namespace llvm;
using namespace llvm::pdb;
diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp
index 6723894c90ea..a616b4e26cb1 100644
--- a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp
+++ b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp
@@ -9,9 +9,6 @@
#include "llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h"
#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
-#include "llvm/DebugInfo/PDB/PDBSymbol.h"
-
-#include <utility>
using namespace llvm;
using namespace llvm::pdb;
diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp
index 4a25a391f278..2828ce4df3f8 100644
--- a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp
+++ b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp
@@ -10,9 +10,6 @@
#include "llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h"
#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
-#include "llvm/DebugInfo/PDB/PDBSymbol.h"
-
-#include <utility>
using namespace llvm;
using namespace llvm::pdb;
diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp
index b9fdf6aec811..db8ca327da1e 100644
--- a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp
+++ b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp
@@ -8,11 +8,10 @@
#include "llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h"
+#include "llvm/DebugInfo/PDB/IPDBLineNumber.h"
#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h"
-#include <utility>
-
using namespace llvm;
using namespace llvm::pdb;
diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp
index 4ffea42cbb0a..d4bd9996d786 100644
--- a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp
+++ b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp
@@ -9,9 +9,6 @@
#include "llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h"
#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
-#include "llvm/DebugInfo/PDB/PDBSymbol.h"
-
-#include <utility>
using namespace llvm;
using namespace llvm::pdb;
diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp
index 683e93548fb1..acda57f44e33 100644
--- a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp
+++ b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp
@@ -10,8 +10,6 @@
#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
-#include <utility>
-
using namespace llvm;
using namespace llvm::pdb;
diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp
index e80e6c716572..fa6e630e3c45 100644
--- a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp
+++ b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp
@@ -9,9 +9,6 @@
#include "llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h"
#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
-#include "llvm/DebugInfo/PDB/PDBSymbol.h"
-
-#include <utility>
using namespace llvm;
using namespace llvm::pdb;
diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp
index 462fc315359b..9e238c7caa37 100644
--- a/llvm/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp
+++ b/llvm/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp
@@ -8,11 +8,8 @@
#include "llvm/DebugInfo/PDB/PDBSymbolTypePointer.h"
-#include "llvm/DebugInfo/PDB/IPDBSession.h"
#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
-#include <utility>
-
using namespace llvm;
using namespace llvm::pdb;
diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp
index 70749d9bf5f5..c2ce21c6ca69 100644
--- a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp
+++ b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp
@@ -10,8 +10,6 @@
#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
-#include <utility>
-
using namespace llvm;
using namespace llvm::pdb;
diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp
index d302c29a3bec..122111d32027 100644
--- a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp
+++ b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp
@@ -8,16 +8,8 @@
#include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h"
-#include "llvm/DebugInfo/PDB/IPDBSession.h"
+#include "llvm/DebugInfo/PDB/IPDBLineNumber.h"
#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
-#include "llvm/DebugInfo/PDB/PDBSymbol.h"
-#include "llvm/DebugInfo/PDB/PDBSymbolData.h"
-#include "llvm/DebugInfo/PDB/PDBSymbolExe.h"
-#include "llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h"
-#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h"
-#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h"
-
-#include <utility>
using namespace llvm;
using namespace llvm::pdb;
diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp
index 4e2a45116d51..a4d81888e457 100644
--- a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp
+++ b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp
@@ -10,8 +10,6 @@
#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
-#include <utility>
-
using namespace llvm;
using namespace llvm::pdb;
diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp
index 78957620e083..835a86e165af 100644
--- a/llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp
+++ b/llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp
@@ -9,9 +9,6 @@
#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h"
#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
-#include "llvm/DebugInfo/PDB/PDBSymbol.h"
-
-#include <utility>
using namespace llvm;
using namespace llvm::pdb;
diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp
index 650d01183171..85294a4cded2 100644
--- a/llvm/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp
+++ b/llvm/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp
@@ -9,9 +9,6 @@
#include "llvm/DebugInfo/PDB/PDBSymbolUnknown.h"
#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
-#include "llvm/DebugInfo/PDB/PDBSymbol.h"
-
-#include <utility>
using namespace llvm;
using namespace llvm::pdb;
diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp
index 74afbdb18086..98aaaa9b10b9 100644
--- a/llvm/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp
+++ b/llvm/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp
@@ -9,9 +9,6 @@
#include "llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h"
#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
-#include "llvm/DebugInfo/PDB/PDBSymbol.h"
-
-#include <utility>
using namespace llvm;
using namespace llvm::pdb;
diff --git a/llvm/lib/DebugInfo/PDB/UDTLayout.cpp b/llvm/lib/DebugInfo/PDB/UDTLayout.cpp
index 55854bb49888..6e388834f199 100644
--- a/llvm/lib/DebugInfo/PDB/UDTLayout.cpp
+++ b/llvm/lib/DebugInfo/PDB/UDTLayout.cpp
@@ -10,6 +10,8 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
+#include "llvm/DebugInfo/PDB/IPDBLineNumber.h"
#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h"
#include "llvm/DebugInfo/PDB/IPDBSession.h"
#include "llvm/DebugInfo/PDB/PDBSymbol.h"
@@ -17,6 +19,7 @@
#include "llvm/DebugInfo/PDB/PDBSymbolFunc.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypePointer.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h"
diff --git a/llvm/lib/DebugInfo/Symbolize/DIFetcher.cpp b/llvm/lib/DebugInfo/Symbolize/DIFetcher.cpp
new file mode 100644
index 000000000000..119830de595a
--- /dev/null
+++ b/llvm/lib/DebugInfo/Symbolize/DIFetcher.cpp
@@ -0,0 +1,57 @@
+//===-- lib/DebugInfo/Symbolize/DIFetcher.cpp -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines the implementation of the local debug info fetcher, which
+/// searches cache directories.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/Symbolize/DIFetcher.h"
+
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+
+namespace llvm {
+namespace symbolize {
+
+Optional<std::string>
+LocalDIFetcher::fetchBuildID(ArrayRef<uint8_t> BuildID) const {
+ auto GetDebugPath = [&](StringRef Directory) {
+ SmallString<128> Path{Directory};
+ sys::path::append(Path, ".build-id",
+ llvm::toHex(BuildID[0], /*LowerCase=*/true),
+ llvm::toHex(BuildID.slice(1), /*LowerCase=*/true));
+ Path += ".debug";
+ return Path;
+ };
+ if (DebugFileDirectory.empty()) {
+ SmallString<128> Path = GetDebugPath(
+#if defined(__NetBSD__)
+ // Try /usr/libdata/debug/.build-id/../...
+ "/usr/libdata/debug"
+#else
+ // Try /usr/lib/debug/.build-id/../...
+ "/usr/lib/debug"
+#endif
+ );
+ if (llvm::sys::fs::exists(Path))
+ return std::string(Path);
+ } else {
+ for (const auto &Directory : DebugFileDirectory) {
+ // Try <debug-file-directory>/.build-id/../...
+ SmallString<128> Path = GetDebugPath(Directory);
+ if (llvm::sys::fs::exists(Path))
+ return std::string(Path);
+ }
+ }
+ return None;
+}
+
+} // namespace symbolize
+} // namespace llvm
diff --git a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp
index e29968d113bd..877380213f21 100644
--- a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp
@@ -16,9 +16,7 @@
#include "llvm/DebugInfo/DIContext.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/Format.h"
-#include "llvm/Support/LineIterator.h"
#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cmath>
@@ -208,6 +206,10 @@ void PlainPrinterBase::print(const Request &Request, const DIGlobal &Global) {
Name = DILineInfo::Addr2LineBadString;
OS << Name << "\n";
OS << Global.Start << " " << Global.Size << "\n";
+ if (Global.DeclFile.empty())
+ OS << "??:?\n";
+ else
+ OS << Global.DeclFile << ":" << Global.DeclLine << "\n";
printFooter();
}
diff --git a/llvm/lib/DebugInfo/Symbolize/Markup.cpp b/llvm/lib/DebugInfo/Symbolize/Markup.cpp
new file mode 100644
index 000000000000..9bc65e763287
--- /dev/null
+++ b/llvm/lib/DebugInfo/Symbolize/Markup.cpp
@@ -0,0 +1,202 @@
+//===- lib/DebugInfo/Symbolize/Markup.cpp ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines the log symbolizer markup data model and parser.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/Symbolize/Markup.h"
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+
+namespace llvm {
+namespace symbolize {
+
+// Matches the following:
+// "\033[0m"
+// "\033[1m"
+// "\033[30m" -- "\033[37m"
+static const char SGRSyntaxStr[] = "\033\\[([0-1]|3[0-7])m";
+
+MarkupParser::MarkupParser(StringSet<> MultilineTags)
+ : MultilineTags(std::move(MultilineTags)), SGRSyntax(SGRSyntaxStr) {}
+
+static StringRef takeTo(StringRef Str, StringRef::iterator Pos) {
+ return Str.take_front(Pos - Str.begin());
+}
+static void advanceTo(StringRef &Str, StringRef::iterator Pos) {
+ Str = Str.drop_front(Pos - Str.begin());
+}
+
+void MarkupParser::parseLine(StringRef Line) {
+ Buffer.clear();
+ NextIdx = 0;
+ FinishedMultiline.clear();
+ this->Line = Line;
+}
+
+Optional<MarkupNode> MarkupParser::nextNode() {
+ // Pull something out of the buffer if possible.
+ if (!Buffer.empty()) {
+ if (NextIdx < Buffer.size())
+ return std::move(Buffer[NextIdx++]);
+ NextIdx = 0;
+ Buffer.clear();
+ }
+
+ // The buffer is empty, so parse the next bit of the line.
+
+ if (Line.empty())
+ return None;
+
+ if (!InProgressMultiline.empty()) {
+ if (Optional<StringRef> MultilineEnd = parseMultiLineEnd(Line)) {
+ llvm::append_range(InProgressMultiline, *MultilineEnd);
+ assert(FinishedMultiline.empty() &&
+ "At most one multi-line element can be finished at a time.");
+ FinishedMultiline.swap(InProgressMultiline);
+ // Parse the multi-line element as if it were contiguous.
+ advanceTo(Line, MultilineEnd->end());
+ return *parseElement(FinishedMultiline);
+ }
+
+ // The whole line is part of the multi-line element.
+ llvm::append_range(InProgressMultiline, Line);
+ Line = Line.drop_front(Line.size());
+ return None;
+ }
+
+ // Find the first valid markup element, if any.
+ if (Optional<MarkupNode> Element = parseElement(Line)) {
+ parseTextOutsideMarkup(takeTo(Line, Element->Text.begin()));
+ Buffer.push_back(std::move(*Element));
+ advanceTo(Line, Element->Text.end());
+ return nextNode();
+ }
+
+ // Since there were no valid elements remaining, see if the line opens a
+ // multi-line element.
+ if (Optional<StringRef> MultilineBegin = parseMultiLineBegin(Line)) {
+ // Emit any text before the element.
+ parseTextOutsideMarkup(takeTo(Line, MultilineBegin->begin()));
+
+ // Begin recording the multi-line element.
+ llvm::append_range(InProgressMultiline, *MultilineBegin);
+ Line = Line.drop_front(Line.size());
+ return nextNode();
+ }
+
+ // The line doesn't contain any more markup elements, so emit it as text.
+ parseTextOutsideMarkup(Line);
+ Line = Line.drop_front(Line.size());
+ return nextNode();
+}
+
+void MarkupParser::flush() {
+ if (InProgressMultiline.empty())
+ return;
+ FinishedMultiline.swap(InProgressMultiline);
+ parseTextOutsideMarkup(FinishedMultiline);
+}
+
+// Finds and returns the next valid markup element in the given line. Returns
+// None if the line contains no valid elements.
+Optional<MarkupNode> MarkupParser::parseElement(StringRef Line) {
+ while (true) {
+ // Find next element using begin and end markers.
+ size_t BeginPos = Line.find("{{{");
+ if (BeginPos == StringRef::npos)
+ return None;
+ size_t EndPos = Line.find("}}}", BeginPos + 3);
+ if (EndPos == StringRef::npos)
+ return None;
+ EndPos += 3;
+ MarkupNode Element;
+ Element.Text = Line.slice(BeginPos, EndPos);
+ Line = Line.substr(EndPos);
+
+ // Parse tag.
+ StringRef Content = Element.Text.drop_front(3).drop_back(3);
+ StringRef FieldsContent;
+ std::tie(Element.Tag, FieldsContent) = Content.split(':');
+ if (Element.Tag.empty())
+ continue;
+
+ // Parse fields.
+ if (!FieldsContent.empty())
+ FieldsContent.split(Element.Fields, ":");
+ else if (Content.back() == ':')
+ Element.Fields.push_back(FieldsContent);
+
+ return Element;
+ }
+}
+
+static MarkupNode textNode(StringRef Text) {
+ MarkupNode Node;
+ Node.Text = Text;
+ return Node;
+}
+
+// Parses a region of text known to be outside any markup elements. Such text
+// may still contain SGR control codes, so the region is further subdivided into
+// control codes and true text regions.
+void MarkupParser::parseTextOutsideMarkup(StringRef Text) {
+ if (Text.empty())
+ return;
+ SmallVector<StringRef> Matches;
+ while (SGRSyntax.match(Text, &Matches)) {
+ // Emit any text before the SGR element.
+ if (Matches.begin()->begin() != Text.begin())
+ Buffer.push_back(textNode(takeTo(Text, Matches.begin()->begin())));
+
+ Buffer.push_back(textNode(*Matches.begin()));
+ advanceTo(Text, Matches.begin()->end());
+ }
+ if (!Text.empty())
+ Buffer.push_back(textNode(Text));
+}
+
+// Given that a line doesn't contain any valid markup, see if it ends with the
+// start of a multi-line element. If so, returns the beginning.
+Optional<StringRef> MarkupParser::parseMultiLineBegin(StringRef Line) {
+ // A multi-line begin marker must be the last one on the line.
+ size_t BeginPos = Line.rfind("{{{");
+ if (BeginPos == StringRef::npos)
+ return None;
+ size_t BeginTagPos = BeginPos + 3;
+
+ // If there are any end markers afterwards, the begin marker cannot belong to
+ // a multi-line element.
+ size_t EndPos = Line.find("}}}", BeginTagPos);
+ if (EndPos != StringRef::npos)
+ return None;
+
+ // Check whether the tag is registered multi-line.
+ size_t EndTagPos = Line.find(':', BeginTagPos);
+ if (EndTagPos == StringRef::npos)
+ return None;
+ StringRef Tag = Line.slice(BeginTagPos, EndTagPos);
+ if (!MultilineTags.contains(Tag))
+ return None;
+ return Line.substr(BeginPos);
+}
+
+// See if the line begins with the ending of an in-progress multi-line element.
+// If so, return the ending.
+Optional<StringRef> MarkupParser::parseMultiLineEnd(StringRef Line) {
+ size_t EndPos = Line.find("}}}");
+ if (EndPos == StringRef::npos)
+ return None;
+ return Line.take_front(EndPos + 3);
+}
+
+} // end namespace symbolize
+} // end namespace llvm
diff --git a/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp b/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp
new file mode 100644
index 000000000000..3363fe5e531f
--- /dev/null
+++ b/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp
@@ -0,0 +1,143 @@
+//===-- lib/DebugInfo/Symbolize/MarkupFilter.cpp -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines the implementation of a filter that replaces symbolizer
+/// markup with human-readable expressions.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/Symbolize/MarkupFilter.h"
+
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Demangle/Demangle.h"
+#include "llvm/Support/WithColor.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::symbolize;
+
+MarkupFilter::MarkupFilter(raw_ostream &OS, Optional<bool> ColorsEnabled)
+ : OS(OS), ColorsEnabled(ColorsEnabled.value_or(
+ WithColor::defaultAutoDetectFunction()(OS))) {}
+
+void MarkupFilter::beginLine(StringRef Line) {
+ this->Line = Line;
+ resetColor();
+}
+
+void MarkupFilter::filter(const MarkupNode &Node) {
+ if (!checkTag(Node))
+ return;
+
+ if (trySGR(Node))
+ return;
+
+ if (Node.Tag == "symbol") {
+ if (!checkNumFields(Node, 1))
+ return;
+ highlight();
+ OS << llvm::demangle(Node.Fields.front().str());
+ restoreColor();
+ return;
+ }
+
+ OS << Node.Text;
+}
+
+bool MarkupFilter::trySGR(const MarkupNode &Node) {
+ if (Node.Text == "\033[0m") {
+ resetColor();
+ return true;
+ }
+ if (Node.Text == "\033[1m") {
+ Bold = true;
+ if (ColorsEnabled)
+ OS.changeColor(raw_ostream::Colors::SAVEDCOLOR, Bold);
+ return true;
+ }
+ auto SGRColor = StringSwitch<Optional<raw_ostream::Colors>>(Node.Text)
+ .Case("\033[30m", raw_ostream::Colors::BLACK)
+ .Case("\033[31m", raw_ostream::Colors::RED)
+ .Case("\033[32m", raw_ostream::Colors::GREEN)
+ .Case("\033[33m", raw_ostream::Colors::YELLOW)
+ .Case("\033[34m", raw_ostream::Colors::BLUE)
+ .Case("\033[35m", raw_ostream::Colors::MAGENTA)
+ .Case("\033[36m", raw_ostream::Colors::CYAN)
+ .Case("\033[37m", raw_ostream::Colors::WHITE)
+ .Default(llvm::None);
+ if (SGRColor) {
+ Color = *SGRColor;
+ if (ColorsEnabled)
+ OS.changeColor(*Color);
+ return true;
+ }
+
+ return false;
+}
+
+// Begin highlighting text by picking a different color than the current color
+// state.
+void MarkupFilter::highlight() {
+ if (!ColorsEnabled)
+ return;
+ OS.changeColor(Color == raw_ostream::Colors::BLUE ? raw_ostream::Colors::CYAN
+ : raw_ostream::Colors::BLUE,
+ Bold);
+}
+
+// Set the output stream's color to the current color and bold state of the SGR
+// abstract machine.
+void MarkupFilter::restoreColor() {
+ if (!ColorsEnabled)
+ return;
+ if (Color) {
+ OS.changeColor(*Color, Bold);
+ } else {
+ OS.resetColor();
+ if (Bold)
+ OS.changeColor(raw_ostream::Colors::SAVEDCOLOR, Bold);
+ }
+}
+
+// Set the SGR and output stream's color and bold states back to the default.
+void MarkupFilter::resetColor() {
+ if (!Color && !Bold)
+ return;
+ Color.reset();
+ Bold = false;
+ if (ColorsEnabled)
+ OS.resetColor();
+}
+
+bool MarkupFilter::checkTag(const MarkupNode &Node) const {
+ if (any_of(Node.Tag, [](char C) { return C < 'a' || C > 'z'; })) {
+ WithColor::error(errs()) << "tags must be all lowercase characters\n";
+ reportLocation(Node.Tag.begin());
+ return false;
+ }
+ return true;
+}
+
+bool MarkupFilter::checkNumFields(const MarkupNode &Node, size_t Size) const {
+ if (Node.Fields.size() != Size) {
+ WithColor::error(errs()) << "expected " << Size << " fields; found "
+ << Node.Fields.size() << "\n";
+ reportLocation(Node.Tag.end());
+ return false;
+ }
+ return true;
+}
+
+void MarkupFilter::reportLocation(StringRef::iterator Loc) const {
+ errs() << Line;
+ WithColor(errs().indent(Loc - Line.begin()), HighlightColor::String) << '^';
+ errs() << '\n';
+}
diff --git a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
index a9c78830fa59..d8ee9264b64f 100644
--- a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
@@ -10,7 +10,7 @@
//
//===----------------------------------------------------------------------===//
-#include "SymbolizableObjectFile.h"
+#include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/COFF.h"
@@ -327,6 +327,14 @@ DIGlobal SymbolizableObjectFile::symbolizeData(
std::string FileName;
getNameFromSymbolTable(ModuleOffset.Address, Res.Name, Res.Start, Res.Size,
FileName);
+ Res.DeclFile = FileName;
+
+ // Try and get a better filename:lineno pair from the debuginfo, if present.
+ DILineInfo DL = DebugInfoContext->getLineInfoForDataAddress(ModuleOffset);
+ if (DL.Line != 0) {
+ Res.DeclFile = DL.FileName;
+ Res.DeclLine = DL.Line;
+ }
return Res;
}
diff --git a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
index 5ec79df17fed..d2ff8aa7c995 100644
--- a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
@@ -12,22 +12,19 @@
#include "llvm/DebugInfo/Symbolize/Symbolize.h"
-#include "SymbolizableObjectFile.h"
-
#include "llvm/ADT/STLExtras.h"
-#include "llvm/BinaryFormat/COFF.h"
-#include "llvm/Config/config.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/PDB/PDB.h"
#include "llvm/DebugInfo/PDB/PDBContext.h"
-#include "llvm/Debuginfod/Debuginfod.h"
+#include "llvm/DebugInfo/Symbolize/DIFetcher.h"
+#include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h"
#include "llvm/Demangle/Demangle.h"
#include "llvm/Object/COFF.h"
+#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Object/MachO.h"
#include "llvm/Object/MachOUniversal.h"
#include "llvm/Support/CRC.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/Compression.h"
#include "llvm/Support/DataExtractor.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/FileSystem.h"
@@ -38,8 +35,20 @@
#include <cstring>
namespace llvm {
+namespace codeview {
+union DebugInfo;
+}
+namespace object {
+template <class ELFT> class ELFFile;
+}
namespace symbolize {
+LLVMSymbolizer::LLVMSymbolizer() = default;
+
+LLVMSymbolizer::LLVMSymbolizer(const Options &Opts) : Opts(Opts) {}
+
+LLVMSymbolizer::~LLVMSymbolizer() = default;
+
template <typename T>
Expected<DILineInfo>
LLVMSymbolizer::symbolizeCodeCommon(const T &ModuleSpecifier,
@@ -81,6 +90,12 @@ LLVMSymbolizer::symbolizeCode(const std::string &ModuleName,
return symbolizeCodeCommon(ModuleName, ModuleOffset);
}
+Expected<DILineInfo>
+LLVMSymbolizer::symbolizeCode(ArrayRef<uint8_t> BuildID,
+ object::SectionedAddress ModuleOffset) {
+ return symbolizeCodeCommon(BuildID, ModuleOffset);
+}
+
template <typename T>
Expected<DIInliningInfo> LLVMSymbolizer::symbolizeInlinedCodeCommon(
const T &ModuleSpecifier, object::SectionedAddress ModuleOffset) {
@@ -124,6 +139,12 @@ LLVMSymbolizer::symbolizeInlinedCode(const std::string &ModuleName,
return symbolizeInlinedCodeCommon(ModuleName, ModuleOffset);
}
+Expected<DIInliningInfo>
+LLVMSymbolizer::symbolizeInlinedCode(ArrayRef<uint8_t> BuildID,
+ object::SectionedAddress ModuleOffset) {
+ return symbolizeInlinedCodeCommon(BuildID, ModuleOffset);
+}
+
template <typename T>
Expected<DIGlobal>
LLVMSymbolizer::symbolizeDataCommon(const T &ModuleSpecifier,
@@ -163,6 +184,12 @@ LLVMSymbolizer::symbolizeData(const std::string &ModuleName,
return symbolizeDataCommon(ModuleName, ModuleOffset);
}
+Expected<DIGlobal>
+LLVMSymbolizer::symbolizeData(ArrayRef<uint8_t> BuildID,
+ object::SectionedAddress ModuleOffset) {
+ return symbolizeDataCommon(BuildID, ModuleOffset);
+}
+
template <typename T>
Expected<std::vector<DILocal>>
LLVMSymbolizer::symbolizeFrameCommon(const T &ModuleSpecifier,
@@ -198,11 +225,20 @@ LLVMSymbolizer::symbolizeFrame(const std::string &ModuleName,
return symbolizeFrameCommon(ModuleName, ModuleOffset);
}
+Expected<std::vector<DILocal>>
+LLVMSymbolizer::symbolizeFrame(ArrayRef<uint8_t> BuildID,
+ object::SectionedAddress ModuleOffset) {
+ return symbolizeFrameCommon(BuildID, ModuleOffset);
+}
+
void LLVMSymbolizer::flush() {
ObjectForUBPathAndArch.clear();
+ LRUBinaries.clear();
+ CacheSize = 0;
BinaryForPath.clear();
ObjectPairForPathArch.clear();
Modules.clear();
+ BuildIDPaths.clear();
}
namespace {
@@ -230,51 +266,6 @@ bool checkFileCRC(StringRef Path, uint32_t CRCHash) {
return CRCHash == llvm::crc32(arrayRefFromStringRef(MB.get()->getBuffer()));
}
-bool findDebugBinary(const std::string &OrigPath,
- const std::string &DebuglinkName, uint32_t CRCHash,
- const std::string &FallbackDebugPath,
- std::string &Result) {
- SmallString<16> OrigDir(OrigPath);
- llvm::sys::path::remove_filename(OrigDir);
- SmallString<16> DebugPath = OrigDir;
- // Try relative/path/to/original_binary/debuglink_name
- llvm::sys::path::append(DebugPath, DebuglinkName);
- if (checkFileCRC(DebugPath, CRCHash)) {
- Result = std::string(DebugPath.str());
- return true;
- }
- // Try relative/path/to/original_binary/.debug/debuglink_name
- DebugPath = OrigDir;
- llvm::sys::path::append(DebugPath, ".debug", DebuglinkName);
- if (checkFileCRC(DebugPath, CRCHash)) {
- Result = std::string(DebugPath.str());
- return true;
- }
- // Make the path absolute so that lookups will go to
- // "/usr/lib/debug/full/path/to/debug", not
- // "/usr/lib/debug/to/debug"
- llvm::sys::fs::make_absolute(OrigDir);
- if (!FallbackDebugPath.empty()) {
- // Try <FallbackDebugPath>/absolute/path/to/original_binary/debuglink_name
- DebugPath = FallbackDebugPath;
- } else {
-#if defined(__NetBSD__)
- // Try /usr/libdata/debug/absolute/path/to/original_binary/debuglink_name
- DebugPath = "/usr/libdata/debug";
-#else
- // Try /usr/lib/debug/absolute/path/to/original_binary/debuglink_name
- DebugPath = "/usr/lib/debug";
-#endif
- }
- llvm::sys::path::append(DebugPath, llvm::sys::path::relative_path(OrigDir),
- DebuglinkName);
- if (checkFileCRC(DebugPath, CRCHash)) {
- Result = std::string(DebugPath.str());
- return true;
- }
- return false;
-}
-
bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName,
uint32_t &CRCHash) {
if (!Obj)
@@ -351,50 +342,6 @@ Optional<ArrayRef<uint8_t>> getBuildID(const ELFObjectFileBase *Obj) {
return BuildID;
}
-bool findDebugBinary(const std::vector<std::string> &DebugFileDirectory,
- const ArrayRef<uint8_t> BuildID, std::string &Result) {
- auto getDebugPath = [&](StringRef Directory) {
- SmallString<128> Path{Directory};
- sys::path::append(Path, ".build-id",
- llvm::toHex(BuildID[0], /*LowerCase=*/true),
- llvm::toHex(BuildID.slice(1), /*LowerCase=*/true));
- Path += ".debug";
- return Path;
- };
- if (DebugFileDirectory.empty()) {
- SmallString<128> Path = getDebugPath(
-#if defined(__NetBSD__)
- // Try /usr/libdata/debug/.build-id/../...
- "/usr/libdata/debug"
-#else
- // Try /usr/lib/debug/.build-id/../...
- "/usr/lib/debug"
-#endif
- );
- if (llvm::sys::fs::exists(Path)) {
- Result = std::string(Path.str());
- return true;
- }
- } else {
- for (const auto &Directory : DebugFileDirectory) {
- // Try <debug-file-directory>/.build-id/../...
- SmallString<128> Path = getDebugPath(Directory);
- if (llvm::sys::fs::exists(Path)) {
- Result = std::string(Path.str());
- return true;
- }
- }
- }
- // Try debuginfod client cache and known servers.
- Expected<std::string> PathOrErr = getCachedOrDownloadDebuginfo(BuildID);
- if (!PathOrErr) {
- consumeError(PathOrErr.takeError());
- return false;
- }
- Result = *PathOrErr;
- return true;
-}
-
} // end anonymous namespace
ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath,
@@ -437,8 +384,7 @@ ObjectFile *LLVMSymbolizer::lookUpDebuglinkObject(const std::string &Path,
std::string DebugBinaryPath;
if (!getGNUDebuglinkContents(Obj, DebuglinkName, CRCHash))
return nullptr;
- if (!findDebugBinary(Path, DebuglinkName, CRCHash, Opts.FallbackDebugPath,
- DebugBinaryPath))
+ if (!findDebugBinary(Path, DebuglinkName, CRCHash, DebugBinaryPath))
return nullptr;
auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName);
if (!DbgObjOrErr) {
@@ -458,7 +404,7 @@ ObjectFile *LLVMSymbolizer::lookUpBuildIDObject(const std::string &Path,
if (BuildID->size() < 2)
return nullptr;
std::string DebugBinaryPath;
- if (!findDebugBinary(Opts.DebugFileDirectory, *BuildID, DebugBinaryPath))
+ if (!getOrFindDebugBinary(*BuildID, DebugBinaryPath))
return nullptr;
auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName);
if (!DbgObjOrErr) {
@@ -468,12 +414,97 @@ ObjectFile *LLVMSymbolizer::lookUpBuildIDObject(const std::string &Path,
return DbgObjOrErr.get();
}
+bool LLVMSymbolizer::findDebugBinary(const std::string &OrigPath,
+ const std::string &DebuglinkName,
+ uint32_t CRCHash, std::string &Result) {
+ SmallString<16> OrigDir(OrigPath);
+ llvm::sys::path::remove_filename(OrigDir);
+ SmallString<16> DebugPath = OrigDir;
+ // Try relative/path/to/original_binary/debuglink_name
+ llvm::sys::path::append(DebugPath, DebuglinkName);
+ if (checkFileCRC(DebugPath, CRCHash)) {
+ Result = std::string(DebugPath.str());
+ return true;
+ }
+ // Try relative/path/to/original_binary/.debug/debuglink_name
+ DebugPath = OrigDir;
+ llvm::sys::path::append(DebugPath, ".debug", DebuglinkName);
+ if (checkFileCRC(DebugPath, CRCHash)) {
+ Result = std::string(DebugPath.str());
+ return true;
+ }
+ // Make the path absolute so that lookups will go to
+ // "/usr/lib/debug/full/path/to/debug", not
+ // "/usr/lib/debug/to/debug"
+ llvm::sys::fs::make_absolute(OrigDir);
+ if (!Opts.FallbackDebugPath.empty()) {
+ // Try <FallbackDebugPath>/absolute/path/to/original_binary/debuglink_name
+ DebugPath = Opts.FallbackDebugPath;
+ } else {
+#if defined(__NetBSD__)
+ // Try /usr/libdata/debug/absolute/path/to/original_binary/debuglink_name
+ DebugPath = "/usr/libdata/debug";
+#else
+ // Try /usr/lib/debug/absolute/path/to/original_binary/debuglink_name
+ DebugPath = "/usr/lib/debug";
+#endif
+ }
+ llvm::sys::path::append(DebugPath, llvm::sys::path::relative_path(OrigDir),
+ DebuglinkName);
+ if (checkFileCRC(DebugPath, CRCHash)) {
+ Result = std::string(DebugPath.str());
+ return true;
+ }
+ return false;
+}
+
+static StringRef getBuildIDStr(ArrayRef<uint8_t> BuildID) {
+ return StringRef(reinterpret_cast<const char *>(BuildID.data()),
+ BuildID.size());
+}
+
+bool LLVMSymbolizer::getOrFindDebugBinary(const ArrayRef<uint8_t> BuildID,
+ std::string &Result) {
+ StringRef BuildIDStr = getBuildIDStr(BuildID);
+ auto I = BuildIDPaths.find(BuildIDStr);
+ if (I != BuildIDPaths.end()) {
+ Result = I->second;
+ return true;
+ }
+ auto recordPath = [&](StringRef Path) {
+ Result = Path.str();
+ auto InsertResult = BuildIDPaths.insert({BuildIDStr, Result});
+ assert(InsertResult.second);
+ (void)InsertResult;
+ };
+
+ Optional<std::string> Path;
+ Path = LocalDIFetcher(Opts.DebugFileDirectory).fetchBuildID(BuildID);
+ if (Path) {
+ recordPath(*Path);
+ return true;
+ }
+
+ // Try caller-provided debug info fetchers.
+ for (const std::unique_ptr<DIFetcher> &Fetcher : DIFetchers) {
+ Path = Fetcher->fetchBuildID(BuildID);
+ if (Path) {
+ recordPath(*Path);
+ return true;
+ }
+ }
+
+ return false;
+}
+
Expected<LLVMSymbolizer::ObjectPair>
LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path,
const std::string &ArchName) {
auto I = ObjectPairForPathArch.find(std::make_pair(Path, ArchName));
- if (I != ObjectPairForPathArch.end())
+ if (I != ObjectPairForPathArch.end()) {
+ recordAccess(BinaryForPath.find(Path)->second);
return I->second;
+ }
auto ObjOrErr = getOrCreateObject(Path, ArchName);
if (!ObjOrErr) {
@@ -495,7 +526,12 @@ LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path,
if (!DbgObj)
DbgObj = Obj;
ObjectPair Res = std::make_pair(Obj, DbgObj);
- ObjectPairForPathArch.emplace(std::make_pair(Path, ArchName), Res);
+ std::string DbgObjPath = DbgObj->getFileName().str();
+ auto Pair =
+ ObjectPairForPathArch.emplace(std::make_pair(Path, ArchName), Res);
+ BinaryForPath.find(DbgObjPath)->second.pushEvictor([this, I = Pair.first]() {
+ ObjectPairForPathArch.erase(I);
+ });
return Res;
}
@@ -505,13 +541,19 @@ LLVMSymbolizer::getOrCreateObject(const std::string &Path,
Binary *Bin;
auto Pair = BinaryForPath.emplace(Path, OwningBinary<Binary>());
if (!Pair.second) {
- Bin = Pair.first->second.getBinary();
+ Bin = Pair.first->second->getBinary();
+ recordAccess(Pair.first->second);
} else {
Expected<OwningBinary<Binary>> BinOrErr = createBinary(Path);
if (!BinOrErr)
return BinOrErr.takeError();
- Pair.first->second = std::move(BinOrErr.get());
- Bin = Pair.first->second.getBinary();
+
+ CachedBinary &CachedBin = Pair.first->second;
+ CachedBin = std::move(BinOrErr.get());
+ CachedBin.pushEvictor([this, I = Pair.first]() { BinaryForPath.erase(I); });
+ LRUBinaries.push_back(CachedBin);
+ CacheSize += CachedBin.size();
+ Bin = CachedBin->getBinary();
}
if (!Bin)
@@ -530,8 +572,10 @@ LLVMSymbolizer::getOrCreateObject(const std::string &Path,
return ObjOrErr.takeError();
}
ObjectFile *Res = ObjOrErr->get();
- ObjectForUBPathAndArch.emplace(std::make_pair(Path, ArchName),
- std::move(ObjOrErr.get()));
+ auto Pair = ObjectForUBPathAndArch.emplace(std::make_pair(Path, ArchName),
+ std::move(ObjOrErr.get()));
+ BinaryForPath.find(Path)->second.pushEvictor(
+ [this, Iter = Pair.first]() { ObjectForUBPathAndArch.erase(Iter); });
return Res;
}
if (Bin->isObject()) {
@@ -559,10 +603,6 @@ LLVMSymbolizer::createModuleInfo(const ObjectFile *Obj,
Expected<SymbolizableModule *>
LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) {
- auto I = Modules.find(ModuleName);
- if (I != Modules.end())
- return I->second.get();
-
std::string BinaryName = ModuleName;
std::string ArchName = Opts.DefaultArch;
size_t ColonPos = ModuleName.find_last_of(':');
@@ -574,6 +614,13 @@ LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) {
ArchName = ArchStr;
}
}
+
+ auto I = Modules.find(ModuleName);
+ if (I != Modules.end()) {
+ recordAccess(BinaryForPath.find(BinaryName)->second);
+ return I->second.get();
+ }
+
auto ObjectsOrErr = getOrCreateObjectPair(BinaryName, ArchName);
if (!ObjectsOrErr) {
// Failed to find valid object file.
@@ -608,7 +655,15 @@ LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) {
Context = DWARFContext::create(
*Objects.second, DWARFContext::ProcessDebugRelocations::Process,
nullptr, Opts.DWPName);
- return createModuleInfo(Objects.first, std::move(Context), ModuleName);
+ auto ModuleOrErr =
+ createModuleInfo(Objects.first, std::move(Context), ModuleName);
+ if (ModuleOrErr) {
+ auto I = Modules.find(ModuleName);
+ BinaryForPath.find(BinaryName)->second.pushEvictor([this, I]() {
+ Modules.erase(I);
+ });
+ }
+ return ModuleOrErr;
}
Expected<SymbolizableModule *>
@@ -623,6 +678,17 @@ LLVMSymbolizer::getOrCreateModuleInfo(const ObjectFile &Obj) {
return createModuleInfo(&Obj, std::move(Context), ObjName);
}
+Expected<SymbolizableModule *>
+LLVMSymbolizer::getOrCreateModuleInfo(ArrayRef<uint8_t> BuildID) {
+ std::string Path;
+ if (!getOrFindDebugBinary(BuildID, Path)) {
+ return createStringError(errc::no_such_file_or_directory,
+ Twine("could not find build ID '") +
+ toHex(BuildID) + "'");
+ }
+ return getOrCreateModuleInfo(Path);
+}
+
namespace {
// Undo these various manglings for Win32 extern "C" functions:
@@ -680,5 +746,35 @@ LLVMSymbolizer::DemangleName(const std::string &Name,
return Name;
}
+void LLVMSymbolizer::recordAccess(CachedBinary &Bin) {
+ if (Bin->getBinary())
+ LRUBinaries.splice(LRUBinaries.end(), LRUBinaries, Bin.getIterator());
+}
+
+void LLVMSymbolizer::pruneCache() {
+ // Evict the LRU binary until the max cache size is reached or there's <= 1
+ // item in the cache. The MRU binary is always kept to avoid thrashing if it's
+ // larger than the cache size.
+ while (CacheSize > Opts.MaxCacheSize && !LRUBinaries.empty() &&
+ std::next(LRUBinaries.begin()) != LRUBinaries.end()) {
+ CachedBinary &Bin = LRUBinaries.front();
+ CacheSize -= Bin.size();
+ LRUBinaries.pop_front();
+ Bin.evict();
+ }
+}
+
+void CachedBinary::pushEvictor(std::function<void()> NewEvictor) {
+ if (Evictor) {
+ this->Evictor = [OldEvictor = std::move(this->Evictor),
+ NewEvictor = std::move(NewEvictor)]() {
+ NewEvictor();
+ OldEvictor();
+ };
+ } else {
+ this->Evictor = std::move(NewEvictor);
+ }
+}
+
} // namespace symbolize
} // namespace llvm
diff --git a/llvm/lib/Debuginfod/DIFetcher.cpp b/llvm/lib/Debuginfod/DIFetcher.cpp
new file mode 100644
index 000000000000..f0c134654534
--- /dev/null
+++ b/llvm/lib/Debuginfod/DIFetcher.cpp
@@ -0,0 +1,28 @@
+//===- llvm/DebugInfod/DIFetcher.cpp - Debug info fetcher -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines a DIFetcher implementation for obtaining debug info
+/// from debuginfod.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Debuginfod/DIFetcher.h"
+
+#include "llvm/Debuginfod/Debuginfod.h"
+
+using namespace llvm;
+
+Optional<std::string>
+DebuginfodDIFetcher::fetchBuildID(ArrayRef<uint8_t> BuildID) const {
+ Expected<std::string> PathOrErr = getCachedOrDownloadDebuginfo(BuildID);
+ if (PathOrErr)
+ return *PathOrErr;
+ consumeError(PathOrErr.takeError());
+ return None;
+}
diff --git a/llvm/lib/Debuginfod/Debuginfod.cpp b/llvm/lib/Debuginfod/Debuginfod.cpp
index 27614572766d..7b1c36fdbe09 100644
--- a/llvm/lib/Debuginfod/Debuginfod.cpp
+++ b/llvm/lib/Debuginfod/Debuginfod.cpp
@@ -115,6 +115,41 @@ Expected<std::string> getCachedOrDownloadArtifact(StringRef UniqueKey,
getDefaultDebuginfodTimeout());
}
+namespace {
+
+/// A simple handler which streams the returned data to a cache file. The cache
+/// file is only created if a 200 OK status is observed.
+class StreamedHTTPResponseHandler : public HTTPResponseHandler {
+ using CreateStreamFn =
+ std::function<Expected<std::unique_ptr<CachedFileStream>>()>;
+ CreateStreamFn CreateStream;
+ HTTPClient &Client;
+ std::unique_ptr<CachedFileStream> FileStream;
+
+public:
+ StreamedHTTPResponseHandler(CreateStreamFn CreateStream, HTTPClient &Client)
+ : CreateStream(CreateStream), Client(Client) {}
+ virtual ~StreamedHTTPResponseHandler() = default;
+
+ Error handleBodyChunk(StringRef BodyChunk) override;
+};
+
+} // namespace
+
+Error StreamedHTTPResponseHandler::handleBodyChunk(StringRef BodyChunk) {
+ if (!FileStream) {
+ if (Client.responseCode() != 200)
+ return Error::success();
+ Expected<std::unique_ptr<CachedFileStream>> FileStreamOrError =
+ CreateStream();
+ if (!FileStreamOrError)
+ return FileStreamOrError.takeError();
+ FileStream = std::move(*FileStreamOrError);
+ }
+ *FileStream->OS << BodyChunk;
+ return Error::success();
+}
+
Expected<std::string> getCachedOrDownloadArtifact(
StringRef UniqueKey, StringRef UrlPath, StringRef CacheDirectoryPath,
ArrayRef<StringRef> DebuginfodUrls, std::chrono::milliseconds Timeout) {
@@ -155,28 +190,18 @@ Expected<std::string> getCachedOrDownloadArtifact(
SmallString<64> ArtifactUrl;
sys::path::append(ArtifactUrl, sys::path::Style::posix, ServerUrl, UrlPath);
- Expected<HTTPResponseBuffer> ResponseOrErr = Client.get(ArtifactUrl);
- if (!ResponseOrErr)
- return ResponseOrErr.takeError();
+ // Perform the HTTP request and if successful, write the response body to
+ // the cache.
+ StreamedHTTPResponseHandler Handler([&]() { return CacheAddStream(Task); },
+ Client);
+ HTTPRequest Request(ArtifactUrl);
+ Error Err = Client.perform(Request, Handler);
+ if (Err)
+ return std::move(Err);
- HTTPResponseBuffer &Response = *ResponseOrErr;
- if (Response.Code != 200)
+ if (Client.responseCode() != 200)
continue;
- // We have retrieved the artifact from this server, and now add it to the
- // file cache.
- Expected<std::unique_ptr<CachedFileStream>> FileStreamOrErr =
- CacheAddStream(Task);
- if (!FileStreamOrErr)
- return FileStreamOrErr.takeError();
- std::unique_ptr<CachedFileStream> &FileStream = *FileStreamOrErr;
- if (!Response.Body)
- return createStringError(
- errc::io_error, "Unallocated MemoryBuffer in HTTPResponseBuffer.");
-
- *FileStream->OS << StringRef(Response.Body->getBufferStart(),
- Response.Body->getBufferSize());
-
// Return the path to the artifact on disk.
return std::string(AbsCachedArtifactPath);
}
diff --git a/llvm/lib/Debuginfod/HTTPClient.cpp b/llvm/lib/Debuginfod/HTTPClient.cpp
index 65f457933b92..3376eaa7cd0d 100644
--- a/llvm/lib/Debuginfod/HTTPClient.cpp
+++ b/llvm/lib/Debuginfod/HTTPClient.cpp
@@ -7,9 +7,8 @@
//===----------------------------------------------------------------------===//
///
/// \file
-///
-/// This file defines the methods of the HTTPRequest, HTTPClient, and
-/// BufferedHTTPResponseHandler classes.
+/// This file defines the implementation of the HTTPClient library for issuing
+/// HTTP requests and handling the responses.
///
//===----------------------------------------------------------------------===//
@@ -34,44 +33,6 @@ bool operator==(const HTTPRequest &A, const HTTPRequest &B) {
HTTPResponseHandler::~HTTPResponseHandler() = default;
-static inline bool parseContentLengthHeader(StringRef LineRef,
- size_t &ContentLength) {
- // Content-Length is a mandatory header, and the only one we handle.
- return LineRef.consume_front("Content-Length: ") &&
- to_integer(LineRef.trim(), ContentLength, 10);
-}
-
-Error BufferedHTTPResponseHandler::handleHeaderLine(StringRef HeaderLine) {
- if (ResponseBuffer.Body)
- return Error::success();
-
- size_t ContentLength;
- if (parseContentLengthHeader(HeaderLine, ContentLength))
- ResponseBuffer.Body =
- WritableMemoryBuffer::getNewUninitMemBuffer(ContentLength);
-
- return Error::success();
-}
-
-Error BufferedHTTPResponseHandler::handleBodyChunk(StringRef BodyChunk) {
- if (!ResponseBuffer.Body)
- return createStringError(errc::io_error,
- "Unallocated response buffer. HTTP Body data "
- "received before Content-Length header.");
- if (Offset + BodyChunk.size() > ResponseBuffer.Body->getBufferSize())
- return createStringError(errc::io_error,
- "Content size exceeds buffer size.");
- memcpy(ResponseBuffer.Body->getBufferStart() + Offset, BodyChunk.data(),
- BodyChunk.size());
- Offset += BodyChunk.size();
- return Error::success();
-}
-
-Error BufferedHTTPResponseHandler::handleStatusCode(unsigned Code) {
- ResponseBuffer.Code = Code;
- return Error::success();
-}
-
bool HTTPClient::IsInitialized = false;
class HTTPClientCleanup {
@@ -80,18 +41,6 @@ public:
};
static const HTTPClientCleanup Cleanup;
-Expected<HTTPResponseBuffer> HTTPClient::perform(const HTTPRequest &Request) {
- BufferedHTTPResponseHandler Handler;
- if (Error Err = perform(Request, Handler))
- return std::move(Err);
- return std::move(Handler.ResponseBuffer);
-}
-
-Expected<HTTPResponseBuffer> HTTPClient::get(StringRef Url) {
- HTTPRequest Request(Url);
- return perform(Request);
-}
-
#ifdef LLVM_ENABLE_CURL
bool HTTPClient::isAvailable() { return true; }
@@ -128,18 +77,6 @@ struct CurlHTTPRequest {
llvm::Error ErrorState = Error::success();
};
-static size_t curlHeaderFunction(char *Contents, size_t Size, size_t NMemb,
- CurlHTTPRequest *CurlRequest) {
- assert(Size == 1 && "The Size passed by libCURL to CURLOPT_HEADERFUNCTION "
- "should always be 1.");
- if (Error Err =
- CurlRequest->Handler.handleHeaderLine(StringRef(Contents, NMemb))) {
- CurlRequest->storeError(std::move(Err));
- return 0;
- }
- return NMemb;
-}
-
static size_t curlWriteFunction(char *Contents, size_t Size, size_t NMemb,
CurlHTTPRequest *CurlRequest) {
Size *= NMemb;
@@ -156,10 +93,10 @@ HTTPClient::HTTPClient() {
"Must call HTTPClient::initialize() at the beginning of main().");
if (Curl)
return;
- assert((Curl = curl_easy_init()) && "Curl could not be initialized.");
+ Curl = curl_easy_init();
+ assert(Curl && "Curl could not be initialized");
// Set the callback hooks.
curl_easy_setopt(Curl, CURLOPT_WRITEFUNCTION, curlWriteFunction);
- curl_easy_setopt(Curl, CURLOPT_HEADERFUNCTION, curlHeaderFunction);
}
HTTPClient::~HTTPClient() { curl_easy_cleanup(Curl); }
@@ -176,22 +113,19 @@ Error HTTPClient::perform(const HTTPRequest &Request,
CurlHTTPRequest CurlRequest(Handler);
curl_easy_setopt(Curl, CURLOPT_WRITEDATA, &CurlRequest);
- curl_easy_setopt(Curl, CURLOPT_HEADERDATA, &CurlRequest);
CURLcode CurlRes = curl_easy_perform(Curl);
if (CurlRes != CURLE_OK)
return joinErrors(std::move(CurlRequest.ErrorState),
createStringError(errc::io_error,
"curl_easy_perform() failed: %s\n",
curl_easy_strerror(CurlRes)));
- if (CurlRequest.ErrorState)
- return std::move(CurlRequest.ErrorState);
+ return std::move(CurlRequest.ErrorState);
+}
- unsigned Code;
+unsigned HTTPClient::responseCode() {
+ long Code = 0;
curl_easy_getinfo(Curl, CURLINFO_RESPONSE_CODE, &Code);
- if (Error Err = Handler.handleStatusCode(Code))
- return joinErrors(std::move(CurlRequest.ErrorState), std::move(Err));
-
- return std::move(CurlRequest.ErrorState);
+ return Code;
}
#else
@@ -213,4 +147,8 @@ Error HTTPClient::perform(const HTTPRequest &Request,
llvm_unreachable("No HTTP Client implementation available.");
}
+unsigned HTTPClient::responseCode() {
+ llvm_unreachable("No HTTP Client implementation available.");
+}
+
#endif
diff --git a/llvm/lib/Demangle/Demangle.cpp b/llvm/lib/Demangle/Demangle.cpp
index 13aa2864c183..9d128424cabf 100644
--- a/llvm/lib/Demangle/Demangle.cpp
+++ b/llvm/lib/Demangle/Demangle.cpp
@@ -51,7 +51,7 @@ bool llvm::nonMicrosoftDemangle(const char *MangledName, std::string &Result) {
if (isItaniumEncoding(MangledName))
Demangled = itaniumDemangle(MangledName, nullptr, nullptr, nullptr);
else if (isRustEncoding(MangledName))
- Demangled = rustDemangle(MangledName, nullptr, nullptr, nullptr);
+ Demangled = rustDemangle(MangledName);
else if (isDLangEncoding(MangledName))
Demangled = dlangDemangle(MangledName);
diff --git a/llvm/lib/Demangle/ItaniumDemangle.cpp b/llvm/lib/Demangle/ItaniumDemangle.cpp
index 1a5db755e37b..1c9209d8f369 100644
--- a/llvm/lib/Demangle/ItaniumDemangle.cpp
+++ b/llvm/lib/Demangle/ItaniumDemangle.cpp
@@ -172,6 +172,50 @@ struct DumpVisitor {
return printStr("TemplateParamKind::Template");
}
}
+ void print(Node::Prec P) {
+ switch (P) {
+ case Node::Prec::Primary:
+ return printStr("Node::Prec::Primary");
+ case Node::Prec::Postfix:
+ return printStr("Node::Prec::Postfix");
+ case Node::Prec::Unary:
+ return printStr("Node::Prec::Unary");
+ case Node::Prec::Cast:
+ return printStr("Node::Prec::Cast");
+ case Node::Prec::PtrMem:
+ return printStr("Node::Prec::PtrMem");
+ case Node::Prec::Multiplicative:
+ return printStr("Node::Prec::Multiplicative");
+ case Node::Prec::Additive:
+ return printStr("Node::Prec::Additive");
+ case Node::Prec::Shift:
+ return printStr("Node::Prec::Shift");
+ case Node::Prec::Spaceship:
+ return printStr("Node::Prec::Spaceship");
+ case Node::Prec::Relational:
+ return printStr("Node::Prec::Relational");
+ case Node::Prec::Equality:
+ return printStr("Node::Prec::Equality");
+ case Node::Prec::And:
+ return printStr("Node::Prec::And");
+ case Node::Prec::Xor:
+ return printStr("Node::Prec::Xor");
+ case Node::Prec::Ior:
+ return printStr("Node::Prec::Ior");
+ case Node::Prec::AndIf:
+ return printStr("Node::Prec::AndIf");
+ case Node::Prec::OrIf:
+ return printStr("Node::Prec::OrIf");
+ case Node::Prec::Conditional:
+ return printStr("Node::Prec::Conditional");
+ case Node::Prec::Assign:
+ return printStr("Node::Prec::Assign");
+ case Node::Prec::Comma:
+ return printStr("Node::Prec::Comma");
+ case Node::Prec::Default:
+ return printStr("Node::Prec::Default");
+ }
+ }
void newLine() {
printStr("\n");
@@ -404,8 +448,8 @@ char *ItaniumPartialDemangler::getFunctionBaseName(char *Buf, size_t *N) const {
case Node::KAbiTagAttr:
Name = static_cast<const AbiTagAttr *>(Name)->Base;
continue;
- case Node::KStdQualifiedName:
- Name = static_cast<const StdQualifiedName *>(Name)->Child;
+ case Node::KModuleEntity:
+ Name = static_cast<const ModuleEntity *>(Name)->Name;
continue;
case Node::KNestedName:
Name = static_cast<const NestedName *>(Name)->Name;
@@ -445,10 +489,10 @@ char *ItaniumPartialDemangler::getFunctionDeclContextName(char *Buf,
break;
}
+ if (Name->getKind() == Node::KModuleEntity)
+ Name = static_cast<const ModuleEntity *>(Name)->Name;
+
switch (Name->getKind()) {
- case Node::KStdQualifiedName:
- OB += "std";
- break;
case Node::KNestedName:
static_cast<const NestedName *>(Name)->Qual->print(OB);
break;
@@ -550,8 +594,8 @@ bool ItaniumPartialDemangler::isCtorOrDtor() const {
case Node::KNestedName:
N = static_cast<const NestedName *>(N)->Name;
break;
- case Node::KStdQualifiedName:
- N = static_cast<const StdQualifiedName *>(N)->Child;
+ case Node::KModuleEntity:
+ N = static_cast<const ModuleEntity *>(N)->Name;
break;
}
}
diff --git a/llvm/lib/Demangle/MicrosoftDemangle.cpp b/llvm/lib/Demangle/MicrosoftDemangle.cpp
index d8da3b48e25b..b4e98a20f389 100644
--- a/llvm/lib/Demangle/MicrosoftDemangle.cpp
+++ b/llvm/lib/Demangle/MicrosoftDemangle.cpp
@@ -245,8 +245,8 @@ demanglePointerCVQualifiers(StringView &MangledName) {
}
StringView Demangler::copyString(StringView Borrowed) {
- char *Stable = Arena.allocUnalignedBuffer(Borrowed.size() + 1);
- std::strcpy(Stable, Borrowed.begin());
+ char *Stable = Arena.allocUnalignedBuffer(Borrowed.size());
+ std::memcpy(Stable, Borrowed.begin(), Borrowed.size());
return {Stable, Borrowed.size()};
}
@@ -823,11 +823,15 @@ SymbolNode *Demangler::parse(StringView &MangledName) {
}
TagTypeNode *Demangler::parseTagUniqueName(StringView &MangledName) {
- if (!MangledName.consumeFront(".?A"))
+ if (!MangledName.consumeFront(".?A")) {
+ Error = true;
return nullptr;
+ }
MangledName.consumeFront(".?A");
- if (MangledName.empty())
+ if (MangledName.empty()) {
+ Error = true;
return nullptr;
+ }
return demangleClassType(MangledName);
}
@@ -970,12 +974,9 @@ void Demangler::memorizeIdentifier(IdentifierNode *Identifier) {
// FIXME: Propagate out-of-memory as an error?
std::terminate();
Identifier->output(OB, OF_Default);
- OB << '\0';
- char *Name = OB.getBuffer();
-
- StringView Owned = copyString(Name);
+ StringView Owned = copyString(OB);
memorizeString(Owned);
- std::free(Name);
+ std::free(OB.getBuffer());
}
IdentifierNode *
@@ -1279,7 +1280,6 @@ Demangler::demangleStringLiteral(StringView &MangledName) {
bool IsWcharT = false;
bool IsNegative = false;
size_t CrcEndPos = 0;
- char *ResultBuffer = nullptr;
EncodedStringLiteralNode *Result = Arena.alloc<EncodedStringLiteralNode>();
@@ -1375,10 +1375,8 @@ Demangler::demangleStringLiteral(StringView &MangledName) {
}
}
- OB << '\0';
- ResultBuffer = OB.getBuffer();
- Result->DecodedString = copyString(ResultBuffer);
- std::free(ResultBuffer);
+ Result->DecodedString = copyString(OB);
+ std::free(OB.getBuffer());
return Result;
StringLiteralError:
@@ -1455,10 +1453,9 @@ Demangler::demangleLocallyScopedNamePiece(StringView &MangledName) {
Scope->output(OB, OF_Default);
OB << '\'';
OB << "::`" << Number << "'";
- OB << '\0';
- char *Result = OB.getBuffer();
- Identifier->Name = copyString(Result);
- std::free(Result);
+
+ Identifier->Name = copyString(OB);
+ std::free(OB.getBuffer());
return Identifier;
}
@@ -2322,8 +2319,8 @@ void Demangler::dumpBackReferences() {
TypeNode *T = Backrefs.FunctionParams[I];
T->output(OB, OF_Default);
- std::printf(" [%d] - %.*s\n", (int)I, (int)OB.getCurrentPosition(),
- OB.getBuffer());
+ StringView B = OB;
+ std::printf(" [%d] - %.*s\n", (int)I, (int)B.size(), B.begin());
}
std::free(OB.getBuffer());
diff --git a/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp b/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp
index d07d05a08c55..494cdabad41f 100644
--- a/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp
+++ b/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp
@@ -121,8 +121,8 @@ std::string Node::toString(OutputFlags Flags) const {
OutputBuffer OB;
initializeOutputBuffer(nullptr, nullptr, OB, 1024);
this->output(OB, Flags);
- OB << '\0';
- std::string Owned(OB.getBuffer());
+ StringView SV = OB;
+ std::string Owned(SV.begin(), SV.end());
std::free(OB.getBuffer());
return Owned;
}
diff --git a/llvm/lib/Demangle/RustDemangle.cpp b/llvm/lib/Demangle/RustDemangle.cpp
index dcac0bd63859..32b10db2a968 100644
--- a/llvm/lib/Demangle/RustDemangle.cpp
+++ b/llvm/lib/Demangle/RustDemangle.cpp
@@ -24,8 +24,8 @@
using namespace llvm;
using llvm::itanium_demangle::OutputBuffer;
+using llvm::itanium_demangle::ScopedOverride;
using llvm::itanium_demangle::StringView;
-using llvm::itanium_demangle::SwapAndRestore;
namespace {
@@ -119,7 +119,7 @@ private:
if (!Print)
return;
- SwapAndRestore<size_t> SavePosition(Position, Position);
+ ScopedOverride<size_t> SavePosition(Position, Position);
Position = Backref;
Demangler();
}
@@ -147,57 +147,27 @@ private:
} // namespace
-char *llvm::rustDemangle(const char *MangledName, char *Buf, size_t *N,
- int *Status) {
- if (MangledName == nullptr || (Buf != nullptr && N == nullptr)) {
- if (Status != nullptr)
- *Status = demangle_invalid_args;
+char *llvm::rustDemangle(const char *MangledName) {
+ if (MangledName == nullptr)
return nullptr;
- }
// Return early if mangled name doesn't look like a Rust symbol.
StringView Mangled(MangledName);
- if (!Mangled.startsWith("_R")) {
- if (Status != nullptr)
- *Status = demangle_invalid_mangled_name;
+ if (!Mangled.startsWith("_R"))
return nullptr;
- }
Demangler D;
- if (!initializeOutputBuffer(nullptr, nullptr, D.Output, 1024)) {
- if (Status != nullptr)
- *Status = demangle_memory_alloc_failure;
+ if (!initializeOutputBuffer(nullptr, nullptr, D.Output, 1024))
return nullptr;
- }
if (!D.demangle(Mangled)) {
- if (Status != nullptr)
- *Status = demangle_invalid_mangled_name;
std::free(D.Output.getBuffer());
return nullptr;
}
D.Output += '\0';
- char *Demangled = D.Output.getBuffer();
- size_t DemangledLen = D.Output.getCurrentPosition();
-
- if (Buf != nullptr) {
- if (DemangledLen <= *N) {
- std::memcpy(Buf, Demangled, DemangledLen);
- std::free(Demangled);
- Demangled = Buf;
- } else {
- std::free(Buf);
- }
- }
-
- if (N != nullptr)
- *N = DemangledLen;
-
- if (Status != nullptr)
- *Status = demangle_success;
- return Demangled;
+ return D.Output.getBuffer();
}
Demangler::Demangler(size_t MaxRecursionLevel)
@@ -241,7 +211,7 @@ bool Demangler::demangle(StringView Mangled) {
demanglePath(IsInType::No);
if (Position != Input.size()) {
- SwapAndRestore<bool> SavePrint(Print, false);
+ ScopedOverride<bool> SavePrint(Print, false);
demanglePath(IsInType::No);
}
@@ -279,7 +249,7 @@ bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) {
Error = true;
return false;
}
- SwapAndRestore<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
+ ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
switch (consume()) {
case 'C': {
@@ -380,7 +350,7 @@ bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) {
// <impl-path> = [<disambiguator>] <path>
// <disambiguator> = "s" <base-62-number>
void Demangler::demangleImplPath(IsInType InType) {
- SwapAndRestore<bool> SavePrint(Print, false);
+ ScopedOverride<bool> SavePrint(Print, false);
parseOptionalBase62Number('s');
demanglePath(InType);
}
@@ -574,7 +544,7 @@ void Demangler::demangleType() {
Error = true;
return;
}
- SwapAndRestore<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
+ ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
size_t Start = Position;
char C = consume();
@@ -657,7 +627,7 @@ void Demangler::demangleType() {
// <abi> = "C"
// | <undisambiguated-identifier>
void Demangler::demangleFnSig() {
- SwapAndRestore<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
+ ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
demangleOptionalBinder();
if (consumeIf('U'))
@@ -699,7 +669,7 @@ void Demangler::demangleFnSig() {
// <dyn-bounds> = [<binder>] {<dyn-trait>} "E"
void Demangler::demangleDynBounds() {
- SwapAndRestore<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
+ ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
print("dyn ");
demangleOptionalBinder();
for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
@@ -763,7 +733,7 @@ void Demangler::demangleConst() {
Error = true;
return;
}
- SwapAndRestore<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
+ ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
char C = consume();
BasicType Type;
diff --git a/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp b/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp
index 1fb37ce7c57c..29a623ebe449 100644
--- a/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp
+++ b/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp
@@ -13,6 +13,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Mutex.h"
#include <mutex>
@@ -70,7 +71,7 @@ LLVM_ATTRIBUTE_USED void requiredSymbolDefinitionsFromOrcTargetProcess() {
}
struct RegisteredObjectInfo {
- RegisteredObjectInfo() {}
+ RegisteredObjectInfo() = default;
RegisteredObjectInfo(std::size_t Size, jit_code_entry *Entry,
OwningBinary<ObjectFile> Obj)
@@ -96,7 +97,7 @@ class GDBJITRegistrationListener : public JITEventListener {
public:
/// Instantiates the JIT service.
- GDBJITRegistrationListener() {}
+ GDBJITRegistrationListener() = default;
/// Unregisters each object that was previously registered and releases all
/// internal resources.
diff --git a/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h b/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h
index fd7fa21df196..3dfe736dc5be 100644
--- a/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h
+++ b/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h
@@ -37,7 +37,7 @@ class AllocaHolder {
std::vector<void *> Allocations;
public:
- AllocaHolder() {}
+ AllocaHolder() = default;
// Make this type move-only.
AllocaHolder(AllocaHolder &&) = default;
diff --git a/llvm/lib/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.cpp b/llvm/lib/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.cpp
new file mode 100644
index 000000000000..0fc366bf505f
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.cpp
@@ -0,0 +1,117 @@
+//===-------- JITLink_DWARFRecordSectionSplitter.cpp - JITLink-------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.h"
+#include "llvm/Support/BinaryStreamReader.h"
+
+#define DEBUG_TYPE "jitlink"
+
+namespace llvm {
+namespace jitlink {
+
+DWARFRecordSectionSplitter::DWARFRecordSectionSplitter(StringRef SectionName)
+ : SectionName(SectionName) {}
+
+Error DWARFRecordSectionSplitter::operator()(LinkGraph &G) {
+ auto *Section = G.findSectionByName(SectionName);
+
+ if (!Section) {
+ LLVM_DEBUG({
+ dbgs() << "DWARFRecordSectionSplitter: No " << SectionName
+ << " section. Nothing to do\n";
+ });
+ return Error::success();
+ }
+
+ LLVM_DEBUG({
+ dbgs() << "DWARFRecordSectionSplitter: Processing " << SectionName
+ << "...\n";
+ });
+
+ DenseMap<Block *, LinkGraph::SplitBlockCache> Caches;
+
+ {
+ // Pre-build the split caches.
+ for (auto *B : Section->blocks())
+ Caches[B] = LinkGraph::SplitBlockCache::value_type();
+ for (auto *Sym : Section->symbols())
+ Caches[&Sym->getBlock()]->push_back(Sym);
+ for (auto *B : Section->blocks())
+ llvm::sort(*Caches[B], [](const Symbol *LHS, const Symbol *RHS) {
+ return LHS->getOffset() > RHS->getOffset();
+ });
+ }
+
+ // Iterate over blocks (we do this by iterating over Caches entries rather
+ // than Section->blocks() as we will be inserting new blocks along the way,
+ // which would invalidate iterators in the latter sequence.
+ for (auto &KV : Caches) {
+ auto &B = *KV.first;
+ auto &BCache = KV.second;
+ if (auto Err = processBlock(G, B, BCache))
+ return Err;
+ }
+
+ return Error::success();
+}
+
+Error DWARFRecordSectionSplitter::processBlock(
+ LinkGraph &G, Block &B, LinkGraph::SplitBlockCache &Cache) {
+ LLVM_DEBUG(dbgs() << " Processing block at " << B.getAddress() << "\n");
+
+ // Section should not contain zero-fill blocks.
+ if (B.isZeroFill())
+ return make_error<JITLinkError>("Unexpected zero-fill block in " +
+ SectionName + " section");
+
+ if (B.getSize() == 0) {
+ LLVM_DEBUG(dbgs() << " Block is empty. Skipping.\n");
+ return Error::success();
+ }
+
+ BinaryStreamReader BlockReader(
+ StringRef(B.getContent().data(), B.getContent().size()),
+ G.getEndianness());
+
+ while (true) {
+ uint64_t RecordStartOffset = BlockReader.getOffset();
+
+ LLVM_DEBUG({
+ dbgs() << " Processing CFI record at "
+ << formatv("{0:x16}", B.getAddress()) << "\n";
+ });
+
+ uint32_t Length;
+ if (auto Err = BlockReader.readInteger(Length))
+ return Err;
+ if (Length != 0xffffffff) {
+ if (auto Err = BlockReader.skip(Length))
+ return Err;
+ } else {
+ uint64_t ExtendedLength;
+ if (auto Err = BlockReader.readInteger(ExtendedLength))
+ return Err;
+ if (auto Err = BlockReader.skip(ExtendedLength))
+ return Err;
+ }
+
+ // If this was the last block then there's nothing to split
+ if (BlockReader.empty()) {
+ LLVM_DEBUG(dbgs() << " Extracted " << B << "\n");
+ return Error::success();
+ }
+
+ uint64_t BlockSize = BlockReader.getOffset() - RecordStartOffset;
+ auto &NewBlock = G.splitBlock(B, BlockSize);
+ (void)NewBlock;
+ LLVM_DEBUG(dbgs() << " Extracted " << NewBlock << "\n");
+ }
+}
+
+} // namespace jitlink
+} // namespace llvm
diff --git a/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp b/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp
index 2ae193595fc0..b1492cd74508 100644
--- a/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp
@@ -10,6 +10,7 @@
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/Config/config.h"
+#include "llvm/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.h"
#include "llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h"
#include "llvm/Support/DynamicLibrary.h"
@@ -18,109 +19,13 @@
namespace llvm {
namespace jitlink {
-EHFrameSplitter::EHFrameSplitter(StringRef EHFrameSectionName)
- : EHFrameSectionName(EHFrameSectionName) {}
-
-Error EHFrameSplitter::operator()(LinkGraph &G) {
- auto *EHFrame = G.findSectionByName(EHFrameSectionName);
-
- if (!EHFrame) {
- LLVM_DEBUG({
- dbgs() << "EHFrameSplitter: No " << EHFrameSectionName
- << " section. Nothing to do\n";
- });
- return Error::success();
- }
-
- LLVM_DEBUG({
- dbgs() << "EHFrameSplitter: Processing " << EHFrameSectionName << "...\n";
- });
-
- DenseMap<Block *, LinkGraph::SplitBlockCache> Caches;
-
- {
- // Pre-build the split caches.
- for (auto *B : EHFrame->blocks())
- Caches[B] = LinkGraph::SplitBlockCache::value_type();
- for (auto *Sym : EHFrame->symbols())
- Caches[&Sym->getBlock()]->push_back(Sym);
- for (auto *B : EHFrame->blocks())
- llvm::sort(*Caches[B], [](const Symbol *LHS, const Symbol *RHS) {
- return LHS->getOffset() > RHS->getOffset();
- });
- }
-
- // Iterate over blocks (we do this by iterating over Caches entries rather
- // than EHFrame->blocks() as we will be inserting new blocks along the way,
- // which would invalidate iterators in the latter sequence.
- for (auto &KV : Caches) {
- auto &B = *KV.first;
- auto &BCache = KV.second;
- if (auto Err = processBlock(G, B, BCache))
- return Err;
- }
-
- return Error::success();
-}
-
-Error EHFrameSplitter::processBlock(LinkGraph &G, Block &B,
- LinkGraph::SplitBlockCache &Cache) {
- LLVM_DEBUG(dbgs() << " Processing block at " << B.getAddress() << "\n");
-
- // eh-frame should not contain zero-fill blocks.
- if (B.isZeroFill())
- return make_error<JITLinkError>("Unexpected zero-fill block in " +
- EHFrameSectionName + " section");
-
- if (B.getSize() == 0) {
- LLVM_DEBUG(dbgs() << " Block is empty. Skipping.\n");
- return Error::success();
- }
-
- BinaryStreamReader BlockReader(
- StringRef(B.getContent().data(), B.getContent().size()),
- G.getEndianness());
-
- while (true) {
- uint64_t RecordStartOffset = BlockReader.getOffset();
-
- LLVM_DEBUG({
- dbgs() << " Processing CFI record at "
- << formatv("{0:x16}", B.getAddress()) << "\n";
- });
-
- uint32_t Length;
- if (auto Err = BlockReader.readInteger(Length))
- return Err;
- if (Length != 0xffffffff) {
- if (auto Err = BlockReader.skip(Length))
- return Err;
- } else {
- uint64_t ExtendedLength;
- if (auto Err = BlockReader.readInteger(ExtendedLength))
- return Err;
- if (auto Err = BlockReader.skip(ExtendedLength))
- return Err;
- }
-
- // If this was the last block then there's nothing to split
- if (BlockReader.empty()) {
- LLVM_DEBUG(dbgs() << " Extracted " << B << "\n");
- return Error::success();
- }
-
- uint64_t BlockSize = BlockReader.getOffset() - RecordStartOffset;
- auto &NewBlock = G.splitBlock(B, BlockSize);
- (void)NewBlock;
- LLVM_DEBUG(dbgs() << " Extracted " << NewBlock << "\n");
- }
-}
-
EHFrameEdgeFixer::EHFrameEdgeFixer(StringRef EHFrameSectionName,
- unsigned PointerSize, Edge::Kind Delta64,
- Edge::Kind Delta32, Edge::Kind NegDelta32)
+ unsigned PointerSize, Edge::Kind Pointer32,
+ Edge::Kind Pointer64, Edge::Kind Delta32,
+ Edge::Kind Delta64, Edge::Kind NegDelta32)
: EHFrameSectionName(EHFrameSectionName), PointerSize(PointerSize),
- Delta64(Delta64), Delta32(Delta32), NegDelta32(NegDelta32) {}
+ Pointer32(Pointer32), Pointer64(Pointer64), Delta32(Delta32),
+ Delta64(Delta64), NegDelta32(NegDelta32) {}
Error EHFrameEdgeFixer::operator()(LinkGraph &G) {
auto *EHFrame = G.findSectionByName(EHFrameSectionName);
@@ -147,7 +52,16 @@ Error EHFrameEdgeFixer::operator()(LinkGraph &G) {
// Build a map of all blocks and symbols in the text sections. We will use
// these for finding / building edge targets when processing FDEs.
for (auto &Sec : G.sections()) {
- PC.AddrToSyms.addSymbols(Sec.symbols());
+ // Just record the most-canonical symbol (for eh-frame purposes) at each
+ // address.
+ for (auto *Sym : Sec.symbols()) {
+ auto &CurSym = PC.AddrToSym[Sym->getAddress()];
+ if (!CurSym || (std::make_tuple(Sym->getLinkage(), Sym->getScope(),
+ !Sym->hasName(), Sym->getName()) <
+ std::make_tuple(CurSym->getLinkage(), CurSym->getScope(),
+ !CurSym->hasName(), CurSym->getName())))
+ CurSym = Sym;
+ }
if (auto Err = PC.AddrToBlock.addBlocks(Sec.blocks(),
BlockAddressMap::includeNonNull))
return Err;
@@ -172,10 +86,7 @@ Error EHFrameEdgeFixer::operator()(LinkGraph &G) {
Error EHFrameEdgeFixer::processBlock(ParseContext &PC, Block &B) {
- LLVM_DEBUG({
- dbgs() << " Processing block at " << formatv("{0:x16}", B.getAddress())
- << "\n";
- });
+ LLVM_DEBUG(dbgs() << " Processing block at " << B.getAddress() << "\n");
// eh-frame should not contain zero-fill blocks.
if (B.isZeroFill())
@@ -209,7 +120,7 @@ Error EHFrameEdgeFixer::processBlock(ParseContext &PC, Block &B) {
LLVM_DEBUG({
dbgs() << " Processing CFI record at "
- << formatv("{0:x16}", B.getAddress() + RecordStartOffset) << "\n";
+ << (B.getAddress() + RecordStartOffset) << "\n";
});
// Get the record length.
@@ -244,7 +155,7 @@ Error EHFrameEdgeFixer::processBlock(ParseContext &PC, Block &B) {
if (CIEDelta == 0) {
if (auto Err = processCIE(PC, B, RecordStartOffset,
CIEDeltaFieldOffset + RecordRemaining,
- CIEDeltaFieldOffset))
+ CIEDeltaFieldOffset, BlockEdges))
return Err;
} else {
if (auto Err = processFDE(PC, B, RecordStartOffset,
@@ -263,7 +174,8 @@ Error EHFrameEdgeFixer::processBlock(ParseContext &PC, Block &B) {
Error EHFrameEdgeFixer::processCIE(ParseContext &PC, Block &B,
size_t RecordOffset, size_t RecordLength,
- size_t CIEDeltaFieldOffset) {
+ size_t CIEDeltaFieldOffset,
+ const BlockEdgeMap &BlockEdges) {
LLVM_DEBUG(dbgs() << " Record is CIE\n");
@@ -301,10 +213,6 @@ Error EHFrameEdgeFixer::processCIE(ParseContext &PC, Block &B,
uint64_t CodeAlignmentFactor = 0;
if (auto Err = RecordReader.readULEB128(CodeAlignmentFactor))
return Err;
- if (CodeAlignmentFactor != 1)
- return make_error<JITLinkError>("Unsupported CIE code alignment factor " +
- Twine(CodeAlignmentFactor) +
- " (expected 1)");
}
// Read and validate the data alignment factor.
@@ -312,76 +220,65 @@ Error EHFrameEdgeFixer::processCIE(ParseContext &PC, Block &B,
int64_t DataAlignmentFactor = 0;
if (auto Err = RecordReader.readSLEB128(DataAlignmentFactor))
return Err;
- if (DataAlignmentFactor != -8)
- return make_error<JITLinkError>("Unsupported CIE data alignment factor " +
- Twine(DataAlignmentFactor) +
- " (expected -8)");
}
// Skip the return address register field.
if (auto Err = RecordReader.skip(1))
return Err;
- uint64_t AugmentationDataLength = 0;
- if (auto Err = RecordReader.readULEB128(AugmentationDataLength))
- return Err;
+ if (AugInfo->AugmentationDataPresent) {
- uint32_t AugmentationDataStartOffset = RecordReader.getOffset();
+ CIEInfo.AugmentationDataPresent = true;
- uint8_t *NextField = &AugInfo->Fields[0];
- while (uint8_t Field = *NextField++) {
- switch (Field) {
- case 'L': {
- CIEInfo.FDEsHaveLSDAField = true;
- uint8_t LSDAPointerEncoding;
- if (auto Err = RecordReader.readInteger(LSDAPointerEncoding))
- return Err;
- if (!isSupportedPointerEncoding(LSDAPointerEncoding))
- return make_error<JITLinkError>(
- "Unsupported LSDA pointer encoding " +
- formatv("{0:x2}", LSDAPointerEncoding) + " in CIE at " +
- formatv("{0:x16}", CIESymbol.getAddress()));
- CIEInfo.LSDAPointerEncoding = LSDAPointerEncoding;
- break;
- }
- case 'P': {
- uint8_t PersonalityPointerEncoding = 0;
- if (auto Err = RecordReader.readInteger(PersonalityPointerEncoding))
- return Err;
- if (PersonalityPointerEncoding !=
- (dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
- dwarf::DW_EH_PE_sdata4))
- return make_error<JITLinkError>(
- "Unspported personality pointer "
- "encoding " +
- formatv("{0:x2}", PersonalityPointerEncoding) + " in CIE at " +
- formatv("{0:x16}", CIESymbol.getAddress()));
- uint32_t PersonalityPointerAddress;
- if (auto Err = RecordReader.readInteger(PersonalityPointerAddress))
- return Err;
- break;
- }
- case 'R': {
- uint8_t FDEPointerEncoding;
- if (auto Err = RecordReader.readInteger(FDEPointerEncoding))
- return Err;
- if (!isSupportedPointerEncoding(FDEPointerEncoding))
- return make_error<JITLinkError>(
- "Unsupported FDE pointer encoding " +
- formatv("{0:x2}", FDEPointerEncoding) + " in CIE at " +
- formatv("{0:x16}", CIESymbol.getAddress()));
- CIEInfo.FDEPointerEncoding = FDEPointerEncoding;
- break;
- }
- default:
- llvm_unreachable("Invalid augmentation string field");
+ uint64_t AugmentationDataLength = 0;
+ if (auto Err = RecordReader.readULEB128(AugmentationDataLength))
+ return Err;
+
+ uint32_t AugmentationDataStartOffset = RecordReader.getOffset();
+
+ uint8_t *NextField = &AugInfo->Fields[0];
+ while (uint8_t Field = *NextField++) {
+ switch (Field) {
+ case 'L':
+ CIEInfo.LSDAPresent = true;
+ if (auto PE = readPointerEncoding(RecordReader, B, "LSDA"))
+ CIEInfo.LSDAEncoding = *PE;
+ else
+ return PE.takeError();
+ break;
+ case 'P': {
+ auto PersonalityPointerEncoding =
+ readPointerEncoding(RecordReader, B, "personality");
+ if (!PersonalityPointerEncoding)
+ return PersonalityPointerEncoding.takeError();
+ if (auto Err =
+ getOrCreateEncodedPointerEdge(
+ PC, BlockEdges, *PersonalityPointerEncoding, RecordReader,
+ B, RecordOffset + RecordReader.getOffset(), "personality")
+ .takeError())
+ return Err;
+ break;
+ }
+ case 'R':
+ if (auto PE = readPointerEncoding(RecordReader, B, "address")) {
+ CIEInfo.AddressEncoding = *PE;
+ if (CIEInfo.AddressEncoding == dwarf::DW_EH_PE_omit)
+ return make_error<JITLinkError>(
+ "Invalid address encoding DW_EH_PE_omit in CIE at " +
+ formatv("{0:x}", (B.getAddress() + RecordOffset).getValue()));
+ } else
+ return PE.takeError();
+ break;
+ default:
+ llvm_unreachable("Invalid augmentation string field");
+ }
}
- }
- if (RecordReader.getOffset() - AugmentationDataStartOffset >
- AugmentationDataLength)
- return make_error<JITLinkError>("Read past the end of the augmentation "
- "data while parsing fields");
+ if (RecordReader.getOffset() - AugmentationDataStartOffset >
+ AugmentationDataLength)
+ return make_error<JITLinkError>("Read past the end of the augmentation "
+ "data while parsing fields");
+ }
assert(!PC.CIEInfos.count(CIESymbol.getAddress()) &&
"Multiple CIEs recorded at the same address?");
@@ -394,7 +291,7 @@ Error EHFrameEdgeFixer::processFDE(ParseContext &PC, Block &B,
size_t RecordOffset, size_t RecordLength,
size_t CIEDeltaFieldOffset,
uint32_t CIEDelta,
- BlockEdgeMap &BlockEdges) {
+ const BlockEdgeMap &BlockEdges) {
LLVM_DEBUG(dbgs() << " Record is FDE\n");
orc::ExecutorAddr RecordAddress = B.getAddress() + RecordOffset;
@@ -422,8 +319,8 @@ Error EHFrameEdgeFixer::processFDE(ParseContext &PC, Block &B,
LLVM_DEBUG({
dbgs() << " Adding edge at "
- << formatv("{0:x16}", RecordAddress + CIEDeltaFieldOffset)
- << " to CIE at: " << formatv("{0:x16}", CIEAddress) << "\n";
+ << (RecordAddress + CIEDeltaFieldOffset)
+ << " to CIE at: " << CIEAddress << "\n";
});
if (auto CIEInfoOrErr = PC.findCIEInfo(CIEAddress))
CIEInfo = *CIEInfoOrErr;
@@ -435,8 +332,8 @@ Error EHFrameEdgeFixer::processFDE(ParseContext &PC, Block &B,
} else {
LLVM_DEBUG({
dbgs() << " Already has edge at "
- << formatv("{0:x16}", RecordAddress + CIEDeltaFieldOffset)
- << " to CIE at " << formatv("{0:x16}", CIEAddress) << "\n";
+ << (RecordAddress + CIEDeltaFieldOffset) << " to CIE at "
+ << CIEAddress << "\n";
});
auto &EI = CIEEdgeItr->second;
if (EI.Addend)
@@ -451,107 +348,41 @@ Error EHFrameEdgeFixer::processFDE(ParseContext &PC, Block &B,
}
}
- {
- // Process the PC-Begin field.
- Block *PCBeginBlock = nullptr;
- orc::ExecutorAddrDiff PCBeginFieldOffset = RecordReader.getOffset();
- auto PCEdgeItr = BlockEdges.find(RecordOffset + PCBeginFieldOffset);
- if (PCEdgeItr == BlockEdges.end()) {
- auto PCBeginPtrInfo =
- readEncodedPointer(CIEInfo->FDEPointerEncoding,
- RecordAddress + PCBeginFieldOffset, RecordReader);
- if (!PCBeginPtrInfo)
- return PCBeginPtrInfo.takeError();
- orc::ExecutorAddr PCBegin = PCBeginPtrInfo->first;
- Edge::Kind PCBeginEdgeKind = PCBeginPtrInfo->second;
- LLVM_DEBUG({
- dbgs() << " Adding edge at "
- << (RecordAddress + PCBeginFieldOffset) << " to PC at "
- << formatv("{0:x16}", PCBegin) << "\n";
- });
- auto PCBeginSym = getOrCreateSymbol(PC, PCBegin);
- if (!PCBeginSym)
- return PCBeginSym.takeError();
- B.addEdge(PCBeginEdgeKind, RecordOffset + PCBeginFieldOffset, *PCBeginSym,
- 0);
- PCBeginBlock = &PCBeginSym->getBlock();
- } else {
- auto &EI = PCEdgeItr->second;
- LLVM_DEBUG({
- dbgs() << " Already has edge at "
- << formatv("{0:x16}", RecordAddress + PCBeginFieldOffset)
- << " to PC at " << formatv("{0:x16}", EI.Target->getAddress());
- if (EI.Addend)
- dbgs() << " + " << formatv("{0:x16}", EI.Addend);
- dbgs() << "\n";
- });
-
- // Make sure the existing edge points at a defined block.
- if (!EI.Target->isDefined()) {
- auto EdgeAddr = RecordAddress + PCBeginFieldOffset;
- return make_error<JITLinkError>("FDE edge at " +
- formatv("{0:x16}", EdgeAddr) +
- " points at external block");
- }
- PCBeginBlock = &EI.Target->getBlock();
- if (auto Err = RecordReader.skip(
- getPointerEncodingDataSize(CIEInfo->FDEPointerEncoding)))
- return Err;
- }
-
+ // Process the PC-Begin field.
+ LLVM_DEBUG({
+ dbgs() << " Processing PC-begin at "
+ << (RecordAddress + RecordReader.getOffset()) << "\n";
+ });
+ if (auto PCBegin = getOrCreateEncodedPointerEdge(
+ PC, BlockEdges, CIEInfo->AddressEncoding, RecordReader, B,
+ RecordReader.getOffset(), "PC begin")) {
+ assert(*PCBegin && "PC-begin symbol not set");
// Add a keep-alive edge from the FDE target to the FDE to ensure that the
// FDE is kept alive if its target is.
- assert(PCBeginBlock && "PC-begin block not recorded");
LLVM_DEBUG({
dbgs() << " Adding keep-alive edge from target at "
- << formatv("{0:x16}", PCBeginBlock->getAddress()) << " to FDE at "
- << formatv("{0:x16}", RecordAddress) << "\n";
+ << (*PCBegin)->getBlock().getAddress() << " to FDE at "
+ << RecordAddress << "\n";
});
- PCBeginBlock->addEdge(Edge::KeepAlive, 0, FDESymbol, 0);
- }
+ (*PCBegin)->getBlock().addEdge(Edge::KeepAlive, 0, FDESymbol, 0);
+ } else
+ return PCBegin.takeError();
// Skip over the PC range size field.
- if (auto Err = RecordReader.skip(
- getPointerEncodingDataSize(CIEInfo->FDEPointerEncoding)))
+ if (auto Err = skipEncodedPointer(CIEInfo->AddressEncoding, RecordReader))
return Err;
- if (CIEInfo->FDEsHaveLSDAField) {
+ if (CIEInfo->AugmentationDataPresent) {
uint64_t AugmentationDataSize;
if (auto Err = RecordReader.readULEB128(AugmentationDataSize))
return Err;
- orc::ExecutorAddrDiff LSDAFieldOffset = RecordReader.getOffset();
- auto LSDAEdgeItr = BlockEdges.find(RecordOffset + LSDAFieldOffset);
- if (LSDAEdgeItr == BlockEdges.end()) {
- auto LSDAPointerInfo =
- readEncodedPointer(CIEInfo->LSDAPointerEncoding,
- RecordAddress + LSDAFieldOffset, RecordReader);
- if (!LSDAPointerInfo)
- return LSDAPointerInfo.takeError();
- orc::ExecutorAddr LSDA = LSDAPointerInfo->first;
- Edge::Kind LSDAEdgeKind = LSDAPointerInfo->second;
- auto LSDASym = getOrCreateSymbol(PC, LSDA);
- if (!LSDASym)
- return LSDASym.takeError();
- LLVM_DEBUG({
- dbgs() << " Adding edge at "
- << formatv("{0:x16}", RecordAddress + LSDAFieldOffset)
- << " to LSDA at " << formatv("{0:x16}", LSDA) << "\n";
- });
- B.addEdge(LSDAEdgeKind, RecordOffset + LSDAFieldOffset, *LSDASym, 0);
- } else {
- LLVM_DEBUG({
- auto &EI = LSDAEdgeItr->second;
- dbgs() << " Already has edge at "
- << formatv("{0:x16}", RecordAddress + LSDAFieldOffset)
- << " to LSDA at " << formatv("{0:x16}", EI.Target->getAddress());
- if (EI.Addend)
- dbgs() << " + " << formatv("{0:x16}", EI.Addend);
- dbgs() << "\n";
- });
- if (auto Err = RecordReader.skip(AugmentationDataSize))
+ if (CIEInfo->LSDAPresent)
+ if (auto Err = getOrCreateEncodedPointerEdge(
+ PC, BlockEdges, CIEInfo->LSDAEncoding, RecordReader, B,
+ RecordReader.getOffset(), "LSDA")
+ .takeError())
return Err;
- }
} else {
LLVM_DEBUG(dbgs() << " Record does not have LSDA field.\n");
}
@@ -600,129 +431,163 @@ EHFrameEdgeFixer::parseAugmentationString(BinaryStreamReader &RecordReader) {
return std::move(AugInfo);
}
-bool EHFrameEdgeFixer::isSupportedPointerEncoding(uint8_t PointerEncoding) {
+Expected<uint8_t> EHFrameEdgeFixer::readPointerEncoding(BinaryStreamReader &R,
+ Block &InBlock,
+ const char *FieldName) {
using namespace dwarf;
- // We only support PC-rel for now.
- if ((PointerEncoding & 0x70) != DW_EH_PE_pcrel)
- return false;
-
- // readEncodedPointer does not handle indirect.
- if (PointerEncoding & DW_EH_PE_indirect)
- return false;
+ uint8_t PointerEncoding;
+ if (auto Err = R.readInteger(PointerEncoding))
+ return std::move(Err);
- // Supported datatypes.
+ bool Supported = true;
switch (PointerEncoding & 0xf) {
- case DW_EH_PE_absptr:
- case DW_EH_PE_udata4:
- case DW_EH_PE_udata8:
- case DW_EH_PE_sdata4:
- case DW_EH_PE_sdata8:
- return true;
+ case DW_EH_PE_uleb128:
+ case DW_EH_PE_udata2:
+ case DW_EH_PE_sleb128:
+ case DW_EH_PE_sdata2:
+ Supported = false;
+ break;
+ }
+ if (Supported) {
+ switch (PointerEncoding & 0x70) {
+ case DW_EH_PE_textrel:
+ case DW_EH_PE_datarel:
+ case DW_EH_PE_funcrel:
+ case DW_EH_PE_aligned:
+ Supported = false;
+ break;
+ }
}
- return false;
+ if (Supported)
+ return PointerEncoding;
+
+ return make_error<JITLinkError>("Unsupported pointer encoding " +
+ formatv("{0:x2}", PointerEncoding) + " for " +
+ FieldName + "in CFI record at " +
+ formatv("{0:x16}", InBlock.getAddress()));
}
-unsigned EHFrameEdgeFixer::getPointerEncodingDataSize(uint8_t PointerEncoding) {
+Error EHFrameEdgeFixer::skipEncodedPointer(uint8_t PointerEncoding,
+ BinaryStreamReader &RecordReader) {
using namespace dwarf;
- assert(isSupportedPointerEncoding(PointerEncoding) &&
- "Unsupported pointer encoding");
+ // Switch absptr to corresponding udata encoding.
+ if ((PointerEncoding & 0xf) == DW_EH_PE_absptr)
+ PointerEncoding |= (PointerSize == 8) ? DW_EH_PE_udata8 : DW_EH_PE_udata4;
+
switch (PointerEncoding & 0xf) {
- case DW_EH_PE_absptr:
- return PointerSize;
case DW_EH_PE_udata4:
case DW_EH_PE_sdata4:
- return 4;
+ if (auto Err = RecordReader.skip(4))
+ return Err;
+ break;
case DW_EH_PE_udata8:
case DW_EH_PE_sdata8:
- return 8;
+ if (auto Err = RecordReader.skip(8))
+ return Err;
+ break;
default:
- llvm_unreachable("Unsupported encoding");
+ llvm_unreachable("Unrecognized encoding");
}
+ return Error::success();
}
-Expected<std::pair<orc::ExecutorAddr, Edge::Kind>>
-EHFrameEdgeFixer::readEncodedPointer(uint8_t PointerEncoding,
- orc::ExecutorAddr PointerFieldAddress,
- BinaryStreamReader &RecordReader) {
- assert(isSupportedPointerEncoding(PointerEncoding) &&
- "Unsupported pointer encoding");
-
+Expected<Symbol *> EHFrameEdgeFixer::getOrCreateEncodedPointerEdge(
+ ParseContext &PC, const BlockEdgeMap &BlockEdges, uint8_t PointerEncoding,
+ BinaryStreamReader &RecordReader, Block &BlockToFix,
+ size_t PointerFieldOffset, const char *FieldName) {
using namespace dwarf;
- // Isolate data type, remap absptr to udata4 or udata8. This relies on us
- // having verified that the graph uses 32-bit or 64-bit pointers only at the
- // start of this pass.
- uint8_t EffectiveType = PointerEncoding & 0xf;
- if (EffectiveType == DW_EH_PE_absptr)
- EffectiveType = (PointerSize == 8) ? DW_EH_PE_udata8 : DW_EH_PE_udata4;
+ if (PointerEncoding == DW_EH_PE_omit)
+ return nullptr;
+
+ // If there's already an edge here then just skip the encoded pointer and
+ // return the edge's target.
+ {
+ auto EdgeI = BlockEdges.find(PointerFieldOffset);
+ if (EdgeI != BlockEdges.end()) {
+ LLVM_DEBUG({
+ dbgs() << " Existing edge at "
+ << (BlockToFix.getAddress() + PointerFieldOffset) << " to "
+ << FieldName << " at " << EdgeI->second.Target->getAddress();
+ if (EdgeI->second.Target->hasName())
+ dbgs() << " (" << EdgeI->second.Target->getName() << ")";
+ dbgs() << "\n";
+ });
+ if (auto Err = skipEncodedPointer(PointerEncoding, RecordReader))
+ return std::move(Err);
+ return EdgeI->second.Target;
+ }
+ }
+
+ // Switch absptr to corresponding udata encoding.
+ if ((PointerEncoding & 0xf) == DW_EH_PE_absptr)
+ PointerEncoding |= (PointerSize == 8) ? DW_EH_PE_udata8 : DW_EH_PE_udata4;
- orc::ExecutorAddr Addr;
- Edge::Kind PointerEdgeKind = Edge::Invalid;
- switch (EffectiveType) {
+ // We need to create an edge. Start by reading the field value.
+ uint64_t FieldValue;
+ bool Is64Bit = false;
+ switch (PointerEncoding & 0xf) {
case DW_EH_PE_udata4: {
uint32_t Val;
if (auto Err = RecordReader.readInteger(Val))
return std::move(Err);
- Addr = PointerFieldAddress + Val;
- PointerEdgeKind = Delta32;
- break;
- }
- case DW_EH_PE_udata8: {
- uint64_t Val;
- if (auto Err = RecordReader.readInteger(Val))
- return std::move(Err);
- Addr = PointerFieldAddress + Val;
- PointerEdgeKind = Delta64;
+ FieldValue = Val;
break;
}
case DW_EH_PE_sdata4: {
- int32_t Val;
+ uint32_t Val;
if (auto Err = RecordReader.readInteger(Val))
return std::move(Err);
- Addr = PointerFieldAddress + Val;
- PointerEdgeKind = Delta32;
+ FieldValue = Val;
break;
}
- case DW_EH_PE_sdata8: {
- int64_t Val;
- if (auto Err = RecordReader.readInteger(Val))
+ case DW_EH_PE_udata8:
+ case DW_EH_PE_sdata8:
+ Is64Bit = true;
+ if (auto Err = RecordReader.readInteger(FieldValue))
return std::move(Err);
- Addr = PointerFieldAddress + Val;
- PointerEdgeKind = Delta64;
break;
- }
+ default:
+ llvm_unreachable("Unsupported encoding");
}
- if (PointerEdgeKind == Edge::Invalid)
- return make_error<JITLinkError>(
- "Unspported edge kind for encoded pointer at " +
- formatv("{0:x}", PointerFieldAddress));
+ // Find the edge target and edge kind to use.
+ orc::ExecutorAddr Target;
+ Edge::Kind PtrEdgeKind = Edge::Invalid;
+ if ((PointerEncoding & 0x70) == DW_EH_PE_pcrel) {
+ Target = BlockToFix.getAddress() + PointerFieldOffset;
+ PtrEdgeKind = Is64Bit ? Delta64 : Delta32;
+ } else
+ PtrEdgeKind = Is64Bit ? Pointer64 : Pointer32;
+ Target += FieldValue;
+
+ // Find or create a symbol to point the edge at.
+ auto TargetSym = getOrCreateSymbol(PC, Target);
+ if (!TargetSym)
+ return TargetSym.takeError();
+ BlockToFix.addEdge(PtrEdgeKind, PointerFieldOffset, *TargetSym, 0);
- return std::make_pair(Addr, Delta64);
+ LLVM_DEBUG({
+ dbgs() << " Adding edge at "
+ << (BlockToFix.getAddress() + PointerFieldOffset) << " to "
+ << FieldName << " at " << TargetSym->getAddress();
+ if (TargetSym->hasName())
+ dbgs() << " (" << TargetSym->getName() << ")";
+ dbgs() << "\n";
+ });
+
+ return &*TargetSym;
}
Expected<Symbol &> EHFrameEdgeFixer::getOrCreateSymbol(ParseContext &PC,
orc::ExecutorAddr Addr) {
- Symbol *CanonicalSym = nullptr;
-
- auto UpdateCanonicalSym = [&](Symbol *Sym) {
- if (!CanonicalSym || Sym->getLinkage() < CanonicalSym->getLinkage() ||
- Sym->getScope() < CanonicalSym->getScope() ||
- (Sym->hasName() && !CanonicalSym->hasName()) ||
- Sym->getName() < CanonicalSym->getName())
- CanonicalSym = Sym;
- };
-
- if (auto *SymbolsAtAddr = PC.AddrToSyms.getSymbolsAt(Addr))
- for (auto *Sym : *SymbolsAtAddr)
- UpdateCanonicalSym(Sym);
-
- // If we found an existing symbol at the given address then use it.
- if (CanonicalSym)
- return *CanonicalSym;
+ // See whether we have a canonical symbol for the given address already.
+ auto CanonicalSymI = PC.AddrToSym.find(Addr);
+ if (CanonicalSymI != PC.AddrToSym.end())
+ return *CanonicalSymI->second;
// Otherwise search for a block covering the address and create a new symbol.
auto *B = PC.AddrToBlock.getBlockCovering(Addr);
@@ -730,7 +595,10 @@ Expected<Symbol &> EHFrameEdgeFixer::getOrCreateSymbol(ParseContext &PC,
return make_error<JITLinkError>("No symbol or block covering address " +
formatv("{0:x16}", Addr));
- return PC.G.addAnonymousSymbol(*B, Addr - B->getAddress(), 0, false, false);
+ auto &S =
+ PC.G.addAnonymousSymbol(*B, Addr - B->getAddress(), 0, false, false);
+ PC.AddrToSym[S.getAddress()] = &S;
+ return S;
}
char EHFrameNullTerminator::NullTerminatorBlockContent[4] = {0, 0, 0, 0};
@@ -756,7 +624,7 @@ Error EHFrameNullTerminator::operator()(LinkGraph &G) {
return Error::success();
}
-EHFrameRegistrar::~EHFrameRegistrar() {}
+EHFrameRegistrar::~EHFrameRegistrar() = default;
Error InProcessEHFrameRegistrar::registerEHFrames(
orc::ExecutorAddrRange EHFrameSection) {
diff --git a/llvm/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h b/llvm/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h
index ef4b47b9aa28..55cf7fc63ee7 100644
--- a/llvm/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h
+++ b/llvm/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h
@@ -21,27 +21,16 @@
namespace llvm {
namespace jitlink {
-/// A LinkGraph pass that splits blocks in an eh-frame section into sub-blocks
-/// representing individual eh-frames.
-/// EHFrameSplitter should not be run without EHFrameEdgeFixer, which is
-/// responsible for adding FDE-to-CIE edges.
-class EHFrameSplitter {
-public:
- EHFrameSplitter(StringRef EHFrameSectionName);
- Error operator()(LinkGraph &G);
-
-private:
- Error processBlock(LinkGraph &G, Block &B, LinkGraph::SplitBlockCache &Cache);
-
- StringRef EHFrameSectionName;
-};
-
/// A LinkGraph pass that adds missing FDE-to-CIE, FDE-to-PC and FDE-to-LSDA
/// edges.
class EHFrameEdgeFixer {
public:
+ /// Create an eh-frame edge fixer.
+ /// If a given edge-kind is not supported on the target architecture then
+ /// Edge::Invalid should be used.
EHFrameEdgeFixer(StringRef EHFrameSectionName, unsigned PointerSize,
- Edge::Kind Delta64, Edge::Kind Delta32,
+ Edge::Kind Pointer32, Edge::Kind Pointer64,
+ Edge::Kind Delta32, Edge::Kind Delta64,
Edge::Kind NegDelta32);
Error operator()(LinkGraph &G);
@@ -57,9 +46,10 @@ private:
CIEInformation() = default;
CIEInformation(Symbol &CIESymbol) : CIESymbol(&CIESymbol) {}
Symbol *CIESymbol = nullptr;
- bool FDEsHaveLSDAField = false;
- uint8_t FDEPointerEncoding = 0;
- uint8_t LSDAPointerEncoding = 0;
+ bool AugmentationDataPresent = false;
+ bool LSDAPresent = false;
+ uint8_t LSDAEncoding = 0;
+ uint8_t AddressEncoding = 0;
};
struct EdgeTarget {
@@ -87,33 +77,38 @@ private:
LinkGraph &G;
CIEInfosMap CIEInfos;
BlockAddressMap AddrToBlock;
- SymbolAddressMap AddrToSyms;
+ DenseMap<orc::ExecutorAddr, Symbol *> AddrToSym;
};
Error processBlock(ParseContext &PC, Block &B);
Error processCIE(ParseContext &PC, Block &B, size_t RecordOffset,
- size_t RecordLength, size_t CIEDeltaFieldOffset);
+ size_t RecordLength, size_t CIEDeltaFieldOffset,
+ const BlockEdgeMap &BlockEdges);
Error processFDE(ParseContext &PC, Block &B, size_t RecordOffset,
size_t RecordLength, size_t CIEDeltaFieldOffset,
- uint32_t CIEDelta, BlockEdgeMap &BlockEdges);
+ uint32_t CIEDelta, const BlockEdgeMap &BlockEdges);
Expected<AugmentationInfo>
parseAugmentationString(BinaryStreamReader &RecordReader);
- static bool isSupportedPointerEncoding(uint8_t PointerEncoding);
- unsigned getPointerEncodingDataSize(uint8_t PointerEncoding);
- Expected<std::pair<orc::ExecutorAddr, Edge::Kind>>
- readEncodedPointer(uint8_t PointerEncoding,
- orc::ExecutorAddr PointerFieldAddress,
- BinaryStreamReader &RecordReader);
+ Expected<uint8_t> readPointerEncoding(BinaryStreamReader &RecordReader,
+ Block &InBlock, const char *FieldName);
+ Error skipEncodedPointer(uint8_t PointerEncoding,
+ BinaryStreamReader &RecordReader);
+ Expected<Symbol *> getOrCreateEncodedPointerEdge(
+ ParseContext &PC, const BlockEdgeMap &BlockEdges, uint8_t PointerEncoding,
+ BinaryStreamReader &RecordReader, Block &BlockToFix,
+ size_t PointerFieldOffset, const char *FieldName);
Expected<Symbol &> getOrCreateSymbol(ParseContext &PC,
orc::ExecutorAddr Addr);
StringRef EHFrameSectionName;
unsigned PointerSize;
- Edge::Kind Delta64;
+ Edge::Kind Pointer32;
+ Edge::Kind Pointer64;
Edge::Kind Delta32;
+ Edge::Kind Delta64;
Edge::Kind NegDelta32;
};
diff --git a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.cpp b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.cpp
index 2194a4fbf1f4..5a983c219627 100644
--- a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.cpp
@@ -27,7 +27,7 @@ namespace jitlink {
StringRef ELFLinkGraphBuilderBase::CommonSectionName(".common");
ArrayRef<const char *> ELFLinkGraphBuilderBase::DwarfSectionNames = DWSecNames;
-ELFLinkGraphBuilderBase::~ELFLinkGraphBuilderBase() {}
+ELFLinkGraphBuilderBase::~ELFLinkGraphBuilderBase() = default;
} // end namespace jitlink
} // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp
index dd3eb97c21a0..98da3f155c3e 100644
--- a/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp
@@ -11,20 +11,21 @@
//===----------------------------------------------------------------------===//
#include "llvm/ExecutionEngine/JITLink/ELF_aarch64.h"
+#include "EHFrameSupportImpl.h"
#include "ELFLinkGraphBuilder.h"
#include "JITLinkGeneric.h"
#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.h"
#include "llvm/ExecutionEngine/JITLink/aarch64.h"
#include "llvm/Object/ELFObjectFile.h"
-#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Endian.h"
#define DEBUG_TYPE "jitlink"
using namespace llvm;
using namespace llvm::jitlink;
-namespace llvm {
-namespace jitlink {
+namespace {
class ELFJITLinker_aarch64 : public JITLinker<ELFJITLinker_aarch64> {
friend class JITLinker<ELFJITLinker_aarch64>;
@@ -37,50 +38,77 @@ public:
private:
Error applyFixup(LinkGraph &G, Block &B, const Edge &E) const {
- using namespace aarch64;
- using namespace llvm::support;
-
- char *BlockWorkingMem = B.getAlreadyMutableContent().data();
- char *FixupPtr = BlockWorkingMem + E.getOffset();
- auto FixupAddress = B.getAddress() + E.getOffset();
- switch (E.getKind()) {
- case aarch64::R_AARCH64_CALL26: {
- assert((FixupAddress.getValue() & 0x3) == 0 &&
- "Call-inst is not 32-bit aligned");
- int64_t Value = E.getTarget().getAddress() - FixupAddress + E.getAddend();
-
- if (static_cast<uint64_t>(Value) & 0x3)
- return make_error<JITLinkError>("Call target is not 32-bit aligned");
-
- if (!isInt<28>(Value))
- return makeTargetOutOfRangeError(G, B, E);
-
- uint32_t RawInstr = *(little32_t *)FixupPtr;
- assert((RawInstr & 0x7fffffff) == 0x14000000 &&
- "RawInstr isn't a B or BR immediate instruction");
- uint32_t Imm = (static_cast<uint32_t>(Value) & ((1 << 28) - 1)) >> 2;
- uint32_t FixedInstr = RawInstr | Imm;
- *(little32_t *)FixupPtr = FixedInstr;
- break;
- }
- }
- return Error::success();
+ return aarch64::applyFixup(G, B, E);
}
};
template <typename ELFT>
class ELFLinkGraphBuilder_aarch64 : public ELFLinkGraphBuilder<ELFT> {
private:
- static Expected<aarch64::EdgeKind_aarch64>
+ enum ELFAArch64RelocationKind : Edge::Kind {
+ ELFCall26 = Edge::FirstRelocation,
+ ELFAdrPage21,
+ ELFAddAbs12,
+ ELFLdSt8Abs12,
+ ELFLdSt16Abs12,
+ ELFLdSt32Abs12,
+ ELFLdSt64Abs12,
+ ELFLdSt128Abs12,
+ ELFMovwAbsG0,
+ ELFMovwAbsG1,
+ ELFMovwAbsG2,
+ ELFMovwAbsG3,
+ ELFAbs64,
+ ELFPrel32,
+ ELFPrel64,
+ ELFAdrGOTPage21,
+ ELFLd64GOTLo12,
+ };
+
+ static Expected<ELFAArch64RelocationKind>
getRelocationKind(const uint32_t Type) {
using namespace aarch64;
switch (Type) {
case ELF::R_AARCH64_CALL26:
- return EdgeKind_aarch64::R_AARCH64_CALL26;
+ case ELF::R_AARCH64_JUMP26:
+ return ELFCall26;
+ case ELF::R_AARCH64_ADR_PREL_PG_HI21:
+ return ELFAdrPage21;
+ case ELF::R_AARCH64_ADD_ABS_LO12_NC:
+ return ELFAddAbs12;
+ case ELF::R_AARCH64_LDST8_ABS_LO12_NC:
+ return ELFLdSt8Abs12;
+ case ELF::R_AARCH64_LDST16_ABS_LO12_NC:
+ return ELFLdSt16Abs12;
+ case ELF::R_AARCH64_LDST32_ABS_LO12_NC:
+ return ELFLdSt32Abs12;
+ case ELF::R_AARCH64_LDST64_ABS_LO12_NC:
+ return ELFLdSt64Abs12;
+ case ELF::R_AARCH64_LDST128_ABS_LO12_NC:
+ return ELFLdSt128Abs12;
+ case ELF::R_AARCH64_MOVW_UABS_G0_NC:
+ return ELFMovwAbsG0;
+ case ELF::R_AARCH64_MOVW_UABS_G1_NC:
+ return ELFMovwAbsG1;
+ case ELF::R_AARCH64_MOVW_UABS_G2_NC:
+ return ELFMovwAbsG2;
+ case ELF::R_AARCH64_MOVW_UABS_G3:
+ return ELFMovwAbsG3;
+ case ELF::R_AARCH64_ABS64:
+ return ELFAbs64;
+ case ELF::R_AARCH64_PREL32:
+ return ELFPrel32;
+ case ELF::R_AARCH64_PREL64:
+ return ELFPrel64;
+ case ELF::R_AARCH64_ADR_GOT_PAGE:
+ return ELFAdrGOTPage21;
+ case ELF::R_AARCH64_LD64_GOT_LO12_NC:
+ return ELFLd64GOTLo12;
}
- return make_error<JITLinkError>("Unsupported aarch64 relocation:" +
- formatv("{0:d}", Type));
+ return make_error<JITLinkError>(
+ "Unsupported aarch64 relocation:" + formatv("{0:d}: ", Type) +
+ object::getELFRelocationTypeName(ELF::EM_AARCH64, Type));
}
Error addRelocations() override {
@@ -99,6 +127,7 @@ private:
Error addSingleRelocation(const typename ELFT::Rela &Rel,
const typename ELFT::Shdr &FixupSect,
Block &BlockToFix) {
+ using support::ulittle32_t;
using Base = ELFLinkGraphBuilder<ELFT>;
uint32_t SymbolIndex = Rel.getSymbol(false);
@@ -116,18 +145,159 @@ private:
inconvertibleErrorCode());
uint32_t Type = Rel.getType(false);
- Expected<aarch64::EdgeKind_aarch64> Kind = getRelocationKind(Type);
- if (!Kind)
- return Kind.takeError();
+ Expected<ELFAArch64RelocationKind> RelocKind = getRelocationKind(Type);
+ if (!RelocKind)
+ return RelocKind.takeError();
int64_t Addend = Rel.r_addend;
orc::ExecutorAddr FixupAddress =
orc::ExecutorAddr(FixupSect.sh_addr) + Rel.r_offset;
Edge::OffsetT Offset = FixupAddress - BlockToFix.getAddress();
- Edge GE(*Kind, Offset, *GraphSymbol, Addend);
+
+ // Get a pointer to the fixup content.
+ const void *FixupContent = BlockToFix.getContent().data() +
+ (FixupAddress - BlockToFix.getAddress());
+
+ Edge::Kind Kind = Edge::Invalid;
+
+ switch (*RelocKind) {
+ case ELFCall26: {
+ Kind = aarch64::Branch26;
+ break;
+ }
+ case ELFAdrPage21: {
+ Kind = aarch64::Page21;
+ break;
+ }
+ case ELFAddAbs12: {
+ Kind = aarch64::PageOffset12;
+ break;
+ }
+ case ELFLdSt8Abs12: {
+ uint32_t Instr = *(const ulittle32_t *)FixupContent;
+ if (!aarch64::isLoadStoreImm12(Instr) ||
+ aarch64::getPageOffset12Shift(Instr) != 0)
+ return make_error<JITLinkError>(
+ "R_AARCH64_LDST8_ABS_LO12_NC target is not a "
+ "LDRB/STRB (imm12) instruction");
+
+ Kind = aarch64::PageOffset12;
+ break;
+ }
+ case ELFLdSt16Abs12: {
+ uint32_t Instr = *(const ulittle32_t *)FixupContent;
+ if (!aarch64::isLoadStoreImm12(Instr) ||
+ aarch64::getPageOffset12Shift(Instr) != 1)
+ return make_error<JITLinkError>(
+ "R_AARCH64_LDST16_ABS_LO12_NC target is not a "
+ "LDRH/STRH (imm12) instruction");
+
+ Kind = aarch64::PageOffset12;
+ break;
+ }
+ case ELFLdSt32Abs12: {
+ uint32_t Instr = *(const ulittle32_t *)FixupContent;
+ if (!aarch64::isLoadStoreImm12(Instr) ||
+ aarch64::getPageOffset12Shift(Instr) != 2)
+ return make_error<JITLinkError>(
+ "R_AARCH64_LDST32_ABS_LO12_NC target is not a "
+ "LDR/STR (imm12, 32 bit) instruction");
+
+ Kind = aarch64::PageOffset12;
+ break;
+ }
+ case ELFLdSt64Abs12: {
+ uint32_t Instr = *(const ulittle32_t *)FixupContent;
+ if (!aarch64::isLoadStoreImm12(Instr) ||
+ aarch64::getPageOffset12Shift(Instr) != 3)
+ return make_error<JITLinkError>(
+ "R_AARCH64_LDST64_ABS_LO12_NC target is not a "
+ "LDR/STR (imm12, 64 bit) instruction");
+
+ Kind = aarch64::PageOffset12;
+ break;
+ }
+ case ELFLdSt128Abs12: {
+ uint32_t Instr = *(const ulittle32_t *)FixupContent;
+ if (!aarch64::isLoadStoreImm12(Instr) ||
+ aarch64::getPageOffset12Shift(Instr) != 4)
+ return make_error<JITLinkError>(
+ "R_AARCH64_LDST128_ABS_LO12_NC target is not a "
+ "LDR/STR (imm12, 128 bit) instruction");
+
+ Kind = aarch64::PageOffset12;
+ break;
+ }
+ case ELFMovwAbsG0: {
+ uint32_t Instr = *(const ulittle32_t *)FixupContent;
+ if (!aarch64::isMoveWideImm16(Instr) ||
+ aarch64::getMoveWide16Shift(Instr) != 0)
+ return make_error<JITLinkError>(
+ "R_AARCH64_MOVW_UABS_G0_NC target is not a "
+ "MOVK/MOVZ (imm16, LSL #0) instruction");
+
+ Kind = aarch64::MoveWide16;
+ break;
+ }
+ case ELFMovwAbsG1: {
+ uint32_t Instr = *(const ulittle32_t *)FixupContent;
+ if (!aarch64::isMoveWideImm16(Instr) ||
+ aarch64::getMoveWide16Shift(Instr) != 16)
+ return make_error<JITLinkError>(
+ "R_AARCH64_MOVW_UABS_G1_NC target is not a "
+ "MOVK/MOVZ (imm16, LSL #16) instruction");
+
+ Kind = aarch64::MoveWide16;
+ break;
+ }
+ case ELFMovwAbsG2: {
+ uint32_t Instr = *(const ulittle32_t *)FixupContent;
+ if (!aarch64::isMoveWideImm16(Instr) ||
+ aarch64::getMoveWide16Shift(Instr) != 32)
+ return make_error<JITLinkError>(
+ "R_AARCH64_MOVW_UABS_G2_NC target is not a "
+ "MOVK/MOVZ (imm16, LSL #32) instruction");
+
+ Kind = aarch64::MoveWide16;
+ break;
+ }
+ case ELFMovwAbsG3: {
+ uint32_t Instr = *(const ulittle32_t *)FixupContent;
+ if (!aarch64::isMoveWideImm16(Instr) ||
+ aarch64::getMoveWide16Shift(Instr) != 48)
+ return make_error<JITLinkError>(
+ "R_AARCH64_MOVW_UABS_G3 target is not a "
+ "MOVK/MOVZ (imm16, LSL #48) instruction");
+
+ Kind = aarch64::MoveWide16;
+ break;
+ }
+ case ELFAbs64: {
+ Kind = aarch64::Pointer64;
+ break;
+ }
+ case ELFPrel32: {
+ Kind = aarch64::Delta32;
+ break;
+ }
+ case ELFPrel64: {
+ Kind = aarch64::Delta64;
+ break;
+ }
+ case ELFAdrGOTPage21: {
+ Kind = aarch64::GOTPage21;
+ break;
+ }
+ case ELFLd64GOTLo12: {
+ Kind = aarch64::GOTPageOffset12;
+ break;
+ }
+ };
+
+ Edge GE(Kind, Offset, *GraphSymbol, Addend);
LLVM_DEBUG({
dbgs() << " ";
- printEdge(dbgs(), BlockToFix, GE, aarch64::getEdgeKindName(*Kind));
+ printEdge(dbgs(), BlockToFix, GE, aarch64::getEdgeKindName(Kind));
dbgs() << "\n";
});
@@ -135,6 +305,48 @@ private:
return Error::success();
}
+ /// Return the string name of the given ELF aarch64 edge kind.
+ const char *getELFAArch64RelocationKindName(Edge::Kind R) {
+ switch (R) {
+ case ELFCall26:
+ return "ELFCall26";
+ case ELFAdrPage21:
+ return "ELFAdrPage21";
+ case ELFAddAbs12:
+ return "ELFAddAbs12";
+ case ELFLdSt8Abs12:
+ return "ELFLdSt8Abs12";
+ case ELFLdSt16Abs12:
+ return "ELFLdSt16Abs12";
+ case ELFLdSt32Abs12:
+ return "ELFLdSt32Abs12";
+ case ELFLdSt64Abs12:
+ return "ELFLdSt64Abs12";
+ case ELFLdSt128Abs12:
+ return "ELFLdSt128Abs12";
+ case ELFMovwAbsG0:
+ return "ELFMovwAbsG0";
+ case ELFMovwAbsG1:
+ return "ELFMovwAbsG1";
+ case ELFMovwAbsG2:
+ return "ELFMovwAbsG2";
+ case ELFMovwAbsG3:
+ return "ELFMovwAbsG3";
+ case ELFAbs64:
+ return "ELFAbs64";
+ case ELFPrel32:
+ return "ELFPrel32";
+ case ELFPrel64:
+ return "ELFPrel64";
+ case ELFAdrGOTPage21:
+ return "ELFAdrGOTPage21";
+ case ELFLd64GOTLo12:
+ return "ELFLd64GOTLo12";
+ default:
+ return getGenericEdgeKindName(static_cast<Edge::Kind>(R));
+ }
+ }
+
public:
ELFLinkGraphBuilder_aarch64(StringRef FileName,
const object::ELFFile<ELFT> &Obj, const Triple T)
@@ -142,6 +354,20 @@ public:
aarch64::getEdgeKindName) {}
};
+Error buildTables_ELF_aarch64(LinkGraph &G) {
+ LLVM_DEBUG(dbgs() << "Visiting edges in graph:\n");
+
+ aarch64::GOTTableManager GOT;
+ aarch64::PLTTableManager PLT(GOT);
+ visitExistingEdges(G, GOT, PLT);
+ return Error::success();
+}
+
+} // namespace
+
+namespace llvm {
+namespace jitlink {
+
Expected<std::unique_ptr<LinkGraph>>
createLinkGraphFromELFObject_aarch64(MemoryBufferRef ObjectBuffer) {
LLVM_DEBUG({
@@ -168,11 +394,22 @@ void link_ELF_aarch64(std::unique_ptr<LinkGraph> G,
PassConfiguration Config;
const Triple &TT = G->getTargetTriple();
if (Ctx->shouldAddDefaultTargetPasses(TT)) {
+ // Add eh-frame passses.
+ Config.PrePrunePasses.push_back(DWARFRecordSectionSplitter(".eh_frame"));
+ Config.PrePrunePasses.push_back(EHFrameEdgeFixer(
+ ".eh_frame", 8, aarch64::Pointer32, aarch64::Pointer64,
+ aarch64::Delta32, aarch64::Delta64, aarch64::NegDelta32));
+
+ // Add a mark-live pass.
if (auto MarkLive = Ctx->getMarkLivePass(TT))
Config.PrePrunePasses.push_back(std::move(MarkLive));
else
Config.PrePrunePasses.push_back(markAllSymbolsLive);
+
+ // Add an in-place GOT/Stubs build pass.
+ Config.PostPrunePasses.push_back(buildTables_ELF_aarch64);
}
+
if (auto Err = Ctx->modifyPassConfig(*G, Config))
return Ctx->notifyFailed(std::move(Err));
diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp
index f83001417e94..197ab71f5274 100644
--- a/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp
@@ -160,23 +160,16 @@ static Expected<const Edge &> getRISCVPCRelHi20(const Edge &E) {
}
static uint32_t extractBits(uint32_t Num, unsigned Low, unsigned Size) {
- return (Num & (((1ULL << (Size + 1)) - 1) << Low)) >> Low;
+ return (Num & (((1ULL << Size) - 1) << Low)) >> Low;
}
-inline Error checkAlignment(llvm::orc::ExecutorAddr loc, uint64_t v, int n,
- const Edge &E) {
- if (v & (n - 1))
- return make_error<JITLinkError>("0x" + llvm::utohexstr(loc.getValue()) +
- " improper alignment for relocation " +
- formatv("{0:d}", E.getKind()) + ": 0x" +
- llvm::utohexstr(v) + " is not aligned to " +
- Twine(n) + " bytes");
- return Error::success();
+static inline bool isAlignmentCorrect(uint64_t Value, int N) {
+ return (Value & (N - 1)) ? false : true;
}
-static inline bool isInRangeForImmS32(int64_t Value) {
- return (Value >= std::numeric_limits<int32_t>::min() &&
- Value <= std::numeric_limits<int32_t>::max());
+// Requires 0 < N <= 64.
+static inline bool isInRangeForImm(int64_t Value, int N) {
+ return Value == llvm::SignExtend64(Value, N);
}
class ELFJITLinker_riscv : public JITLinker<ELFJITLinker_riscv> {
@@ -208,23 +201,36 @@ private:
}
case R_RISCV_BRANCH: {
int64_t Value = E.getTarget().getAddress() + E.getAddend() - FixupAddress;
- Error AlignmentIssue = checkAlignment(FixupAddress, Value, 2, E);
- if (AlignmentIssue) {
- return AlignmentIssue;
- }
- int64_t Lo = Value & 0xFFF;
- uint32_t Imm31_25 = extractBits(Lo, 5, 6) << 25 | extractBits(Lo, 12, 1)
- << 31;
- uint32_t Imm11_7 = extractBits(Lo, 1, 4) << 8 | extractBits(Lo, 11, 1)
- << 7;
+ if (LLVM_UNLIKELY(!isInRangeForImm(Value >> 1, 12)))
+ return makeTargetOutOfRangeError(G, B, E);
+ if (LLVM_UNLIKELY(!isAlignmentCorrect(Value, 2)))
+ return makeAlignmentError(FixupAddress, Value, 2, E);
+ uint32_t Imm31_25 =
+ extractBits(Value, 5, 6) << 25 | extractBits(Value, 12, 1) << 31;
+ uint32_t Imm11_7 =
+ extractBits(Value, 1, 4) << 8 | extractBits(Value, 11, 1) << 7;
uint32_t RawInstr = *(little32_t *)FixupPtr;
*(little32_t *)FixupPtr = (RawInstr & 0x1FFF07F) | Imm31_25 | Imm11_7;
break;
}
+ case R_RISCV_JAL: {
+ int64_t Value = E.getTarget().getAddress() + E.getAddend() - FixupAddress;
+ if (LLVM_UNLIKELY(!isInRangeForImm(Value >> 1, 20)))
+ return makeTargetOutOfRangeError(G, B, E);
+ if (LLVM_UNLIKELY(!isAlignmentCorrect(Value, 2)))
+ return makeAlignmentError(FixupAddress, Value, 2, E);
+ uint32_t Imm20 = extractBits(Value, 20, 1) << 31;
+ uint32_t Imm10_1 = extractBits(Value, 1, 10) << 21;
+ uint32_t Imm11 = extractBits(Value, 11, 1) << 20;
+ uint32_t Imm19_12 = extractBits(Value, 12, 8) << 12;
+ uint32_t RawInstr = *(little32_t *)FixupPtr;
+ *(little32_t *)FixupPtr = RawInstr | Imm20 | Imm10_1 | Imm11 | Imm19_12;
+ break;
+ }
case R_RISCV_HI20: {
int64_t Value = (E.getTarget().getAddress() + E.getAddend()).getValue();
int64_t Hi = Value + 0x800;
- if (LLVM_UNLIKELY(!isInRangeForImmS32(Hi)))
+ if (LLVM_UNLIKELY(!isInRangeForImm(Hi, 32)))
return makeTargetOutOfRangeError(G, B, E);
uint32_t RawInstr = *(little32_t *)FixupPtr;
*(little32_t *)FixupPtr =
@@ -244,7 +250,7 @@ private:
case R_RISCV_CALL: {
int64_t Value = E.getTarget().getAddress() + E.getAddend() - FixupAddress;
int64_t Hi = Value + 0x800;
- if (LLVM_UNLIKELY(!isInRangeForImmS32(Hi)))
+ if (LLVM_UNLIKELY(!isInRangeForImm(Hi, 32)))
return makeTargetOutOfRangeError(G, B, E);
int32_t Lo = Value & 0xFFF;
uint32_t RawInstrAuipc = *(little32_t *)FixupPtr;
@@ -258,7 +264,7 @@ private:
case R_RISCV_PCREL_HI20: {
int64_t Value = E.getTarget().getAddress() + E.getAddend() - FixupAddress;
int64_t Hi = Value + 0x800;
- if (LLVM_UNLIKELY(!isInRangeForImmS32(Hi)))
+ if (LLVM_UNLIKELY(!isInRangeForImm(Hi, 32)))
return makeTargetOutOfRangeError(G, B, E);
uint32_t RawInstr = *(little32_t *)FixupPtr;
*(little32_t *)FixupPtr =
@@ -359,6 +365,13 @@ private:
*FixupPtr = static_cast<uint8_t>(Value);
break;
}
+ case R_RISCV_SUB6: {
+ int64_t Value =
+ *(reinterpret_cast<const uint8_t *>(FixupAddress.getValue())) & 0x3f;
+ Value -= E.getTarget().getAddress().getValue() - E.getAddend();
+ *FixupPtr = (*FixupPtr & 0xc0) | (static_cast<uint8_t>(Value) & 0x3f);
+ break;
+ }
case R_RISCV_SET6: {
int64_t Value = (E.getTarget().getAddress() + E.getAddend()).getValue();
uint32_t RawData = *(little32_t *)FixupPtr;
@@ -410,6 +423,8 @@ private:
return EdgeKind_riscv::R_RISCV_64;
case ELF::R_RISCV_BRANCH:
return EdgeKind_riscv::R_RISCV_BRANCH;
+ case ELF::R_RISCV_JAL:
+ return EdgeKind_riscv::R_RISCV_JAL;
case ELF::R_RISCV_HI20:
return EdgeKind_riscv::R_RISCV_HI20;
case ELF::R_RISCV_LO12_I:
@@ -442,6 +457,8 @@ private:
return EdgeKind_riscv::R_RISCV_SUB16;
case ELF::R_RISCV_SUB8:
return EdgeKind_riscv::R_RISCV_SUB8;
+ case ELF::R_RISCV_SUB6:
+ return EdgeKind_riscv::R_RISCV_SUB6;
case ELF::R_RISCV_SET6:
return EdgeKind_riscv::R_RISCV_SET6;
case ELF::R_RISCV_SET8:
@@ -454,8 +471,9 @@ private:
return EdgeKind_riscv::R_RISCV_32_PCREL;
}
- return make_error<JITLinkError>("Unsupported riscv relocation:" +
- formatv("{0:d}", Type));
+ return make_error<JITLinkError>(
+ "Unsupported riscv relocation:" + formatv("{0:d}: ", Type) +
+ object::getELFRelocationTypeName(ELF::EM_RISCV, Type));
}
Error addRelocations() override {
diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp
index 79d2cdbb30f1..8f21274bd1a3 100644
--- a/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ExecutionEngine/JITLink/ELF_x86_64.h"
+#include "llvm/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.h"
#include "llvm/ExecutionEngine/JITLink/JITLink.h"
#include "llvm/ExecutionEngine/JITLink/TableManager.h"
#include "llvm/ExecutionEngine/JITLink/x86_64.h"
@@ -96,17 +97,6 @@ Error buildTables_ELF_x86_64(LinkGraph &G) {
}
} // namespace
-static const char *getELFX86_64RelocName(uint32_t Type) {
- switch (Type) {
-#define ELF_RELOC(Name, Number) \
- case Number: \
- return #Name;
-#include "llvm/BinaryFormat/ELFRelocs/x86_64.def"
-#undef ELF_RELOC
- }
- return "Unrecognized ELF/x86-64 relocation type";
-}
-
namespace llvm {
namespace jitlink {
@@ -145,9 +135,9 @@ private:
case ELF::R_X86_64_TLSGD:
return ELF_x86_64_Edges::ELFX86RelocationKind::PCRel32TLV;
}
- return make_error<JITLinkError>("Unsupported x86-64 relocation type " +
- formatv("{0:d}: ", Type) +
- getELFX86_64RelocName(Type));
+ return make_error<JITLinkError>(
+ "Unsupported x86-64 relocation type " + formatv("{0:d}: ", Type) +
+ object::getELFRelocationTypeName(ELF::EM_X86_64, Type));
}
Error addRelocations() override {
@@ -379,10 +369,10 @@ void link_ELF_x86_64(std::unique_ptr<LinkGraph> G,
if (Ctx->shouldAddDefaultTargetPasses(G->getTargetTriple())) {
- Config.PrePrunePasses.push_back(EHFrameSplitter(".eh_frame"));
- Config.PrePrunePasses.push_back(
- EHFrameEdgeFixer(".eh_frame", x86_64::PointerSize, x86_64::Delta64,
- x86_64::Delta32, x86_64::NegDelta32));
+ Config.PrePrunePasses.push_back(DWARFRecordSectionSplitter(".eh_frame"));
+ Config.PrePrunePasses.push_back(EHFrameEdgeFixer(
+ ".eh_frame", x86_64::PointerSize, x86_64::Pointer32, x86_64::Pointer64,
+ x86_64::Delta32, x86_64::Delta64, x86_64::NegDelta32));
Config.PrePrunePasses.push_back(EHFrameNullTerminator(".eh_frame"));
// Construct a JITLinker and run the link function.
diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp
index 78a603cfed17..43efe0725cfe 100644
--- a/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp
@@ -336,7 +336,7 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolLookupFlags &LF) {
void JITLinkAsyncLookupContinuation::anchor() {}
-JITLinkContext::~JITLinkContext() {}
+JITLinkContext::~JITLinkContext() = default;
bool JITLinkContext::shouldAddDefaultTargetPasses(const Triple &TT) const {
return true;
@@ -393,6 +393,15 @@ Error makeTargetOutOfRangeError(const LinkGraph &G, const Block &B,
return make_error<JITLinkError>(std::move(ErrMsg));
}
+Error makeAlignmentError(llvm::orc::ExecutorAddr Loc, uint64_t Value, int N,
+ const Edge &E) {
+ return make_error<JITLinkError>("0x" + llvm::utohexstr(Loc.getValue()) +
+ " improper alignment for relocation " +
+ formatv("{0:d}", E.getKind()) + ": 0x" +
+ llvm::utohexstr(Value) +
+ " is not aligned to " + Twine(N) + " bytes");
+}
+
Expected<std::unique_ptr<LinkGraph>>
createLinkGraphFromObject(MemoryBufferRef ObjectBuffer) {
auto Magic = identify_magic(ObjectBuffer.getBuffer());
diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp
index 35ee050c8566..6d321a080829 100644
--- a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp
@@ -20,7 +20,7 @@
namespace llvm {
namespace jitlink {
-JITLinkerBase::~JITLinkerBase() {}
+JITLinkerBase::~JITLinkerBase() = default;
void JITLinkerBase::linkPhase1(std::unique_ptr<JITLinkerBase> Self) {
diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp
index 9315ac4f6120..acb759d6ce79 100644
--- a/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp
@@ -211,7 +211,7 @@ SimpleSegmentAlloc::Create(JITLinkMemoryManager &MemMgr, const JITLinkDylib *JD,
SimpleSegmentAlloc::SimpleSegmentAlloc(SimpleSegmentAlloc &&) = default;
SimpleSegmentAlloc &
SimpleSegmentAlloc::operator=(SimpleSegmentAlloc &&) = default;
-SimpleSegmentAlloc::~SimpleSegmentAlloc() {}
+SimpleSegmentAlloc::~SimpleSegmentAlloc() = default;
SimpleSegmentAlloc::SegmentInfo SimpleSegmentAlloc::getSegInfo(AllocGroup AG) {
auto I = ContentBlocks.find(AG);
diff --git a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp
index 62574604458c..1bf12f438be0 100644
--- a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp
@@ -19,7 +19,7 @@ static const char *CommonSectionName = "__common";
namespace llvm {
namespace jitlink {
-MachOLinkGraphBuilder::~MachOLinkGraphBuilder() {}
+MachOLinkGraphBuilder::~MachOLinkGraphBuilder() = default;
Expected<std::unique_ptr<LinkGraph>> MachOLinkGraphBuilder::buildGraph() {
@@ -368,7 +368,7 @@ Error MachOLinkGraphBuilder::graphifyRegularSymbols() {
Twine(KV.first));
NSym.GraphSymbol = &G->addAbsoluteSymbol(
*NSym.Name, orc::ExecutorAddr(NSym.Value), 0, Linkage::Strong,
- Scope::Default, NSym.Desc & MachO::N_NO_DEAD_STRIP);
+ getScope(*NSym.Name, NSym.Type), NSym.Desc & MachO::N_NO_DEAD_STRIP);
break;
case MachO::N_SECT:
SecIndexToSymbols[NSym.Sect - 1].push_back(&NSym);
@@ -644,17 +644,27 @@ Error MachOLinkGraphBuilder::graphifyCStringSection(
// Scan section for null characters.
for (size_t I = 0; I != NSec.Size; ++I)
if (NSec.Data[I] == '\0') {
- orc::ExecutorAddrDiff BlockEnd = I + 1;
- size_t BlockSize = BlockEnd - BlockStart;
+ size_t BlockSize = I + 1 - BlockStart;
// Create a block for this null terminated string.
auto &B = G->createContentBlock(*NSec.GraphSection,
{NSec.Data + BlockStart, BlockSize},
- NSec.Address + BlockStart, 1, 0);
+ NSec.Address + BlockStart, NSec.Alignment,
+ BlockStart % NSec.Alignment);
LLVM_DEBUG({
- dbgs() << " Created block " << formatv("{0:x}", B.getAddress())
- << " -- " << formatv("{0:x}", B.getAddress() + B.getSize())
- << " for \"" << StringRef(B.getContent().data()) << "\"\n";
+ dbgs() << " Created block " << B.getRange()
+ << ", align = " << B.getAlignment()
+ << ", align-ofs = " << B.getAlignmentOffset() << " for \"";
+ for (size_t J = 0; J != std::min(B.getSize(), size_t(16)); ++J)
+ switch (B.getContent()[J]) {
+ case '\0': break;
+ case '\n': dbgs() << "\\n"; break;
+ case '\t': dbgs() << "\\t"; break;
+ default: dbgs() << B.getContent()[J]; break;
+ }
+ if (B.getSize() > 16)
+ dbgs() << "...";
+ dbgs() << "\"\n";
});
// If there's no symbol at the start of this block then create one.
@@ -663,15 +673,13 @@ Error MachOLinkGraphBuilder::graphifyCStringSection(
auto &S = G->addAnonymousSymbol(B, 0, BlockSize, false, false);
setCanonicalSymbol(NSec, S);
LLVM_DEBUG({
- dbgs() << " Adding anonymous symbol for c-string block "
- << formatv("{0:x16} -- {1:x16}", S.getAddress(),
- S.getAddress() + BlockSize)
- << "\n";
+ dbgs() << " Adding symbol for c-string block " << B.getRange()
+ << ": <anonymous symbol> at offset 0\n";
});
}
// Process any remaining symbols that point into this block.
- auto LastCanonicalAddr = B.getAddress() + BlockEnd;
+ auto LastCanonicalAddr = B.getAddress() + BlockSize;
while (!NSyms.empty() && orc::ExecutorAddr(NSyms.back()->Value) <
B.getAddress() + BlockSize) {
auto &NSym = *NSyms.back();
@@ -686,8 +694,15 @@ Error MachOLinkGraphBuilder::graphifyCStringSection(
LastCanonicalAddr = orc::ExecutorAddr(NSym.Value);
}
- createStandardGraphSymbol(NSym, B, SymSize, SectionIsText, SymLive,
- IsCanonical);
+ auto &Sym = createStandardGraphSymbol(NSym, B, SymSize, SectionIsText,
+ SymLive, IsCanonical);
+ (void)Sym;
+ LLVM_DEBUG({
+ dbgs() << " Adding symbol for c-string block " << B.getRange()
+ << ": "
+ << (Sym.hasName() ? Sym.getName() : "<anonymous symbol>")
+ << " at offset " << formatv("{0:x}", Sym.getOffset()) << "\n";
+ });
NSyms.pop_back();
}
diff --git a/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp b/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
index 3ca2e40c7263..dd50314d3ed7 100644
--- a/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
@@ -11,15 +11,15 @@
//===----------------------------------------------------------------------===//
#include "llvm/ExecutionEngine/JITLink/MachO_arm64.h"
+#include "llvm/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.h"
+#include "llvm/ExecutionEngine/JITLink/aarch64.h"
#include "MachOLinkGraphBuilder.h"
-#include "PerGraphGOTAndPLTStubsBuilder.h"
#define DEBUG_TYPE "jitlink"
using namespace llvm;
using namespace llvm::jitlink;
-using namespace llvm::jitlink::MachO_arm64_Edges;
namespace {
@@ -27,19 +27,39 @@ class MachOLinkGraphBuilder_arm64 : public MachOLinkGraphBuilder {
public:
MachOLinkGraphBuilder_arm64(const object::MachOObjectFile &Obj)
: MachOLinkGraphBuilder(Obj, Triple("arm64-apple-darwin"),
- getMachOARM64RelocationKindName),
+ aarch64::getEdgeKindName),
NumSymbols(Obj.getSymtabLoadCommand().nsyms) {}
private:
+ enum MachOARM64RelocationKind : Edge::Kind {
+ MachOBranch26 = Edge::FirstRelocation,
+ MachOPointer32,
+ MachOPointer64,
+ MachOPointer64Anon,
+ MachOPage21,
+ MachOPageOffset12,
+ MachOGOTPage21,
+ MachOGOTPageOffset12,
+ MachOTLVPage21,
+ MachOTLVPageOffset12,
+ MachOPointerToGOT,
+ MachOPairedAddend,
+ MachOLDRLiteral19,
+ MachODelta32,
+ MachODelta64,
+ MachONegDelta32,
+ MachONegDelta64,
+ };
+
static Expected<MachOARM64RelocationKind>
getRelocationKind(const MachO::relocation_info &RI) {
switch (RI.r_type) {
case MachO::ARM64_RELOC_UNSIGNED:
if (!RI.r_pcrel) {
if (RI.r_length == 3)
- return RI.r_extern ? Pointer64 : Pointer64Anon;
+ return RI.r_extern ? MachOPointer64 : MachOPointer64Anon;
else if (RI.r_length == 2)
- return Pointer32;
+ return MachOPointer32;
}
break;
case MachO::ARM64_RELOC_SUBTRACTOR:
@@ -48,46 +68,46 @@ private:
// They may be turned into NegDelta<W> by parsePairRelocation.
if (!RI.r_pcrel && RI.r_extern) {
if (RI.r_length == 2)
- return Delta32;
+ return MachODelta32;
else if (RI.r_length == 3)
- return Delta64;
+ return MachODelta64;
}
break;
case MachO::ARM64_RELOC_BRANCH26:
if (RI.r_pcrel && RI.r_extern && RI.r_length == 2)
- return Branch26;
+ return MachOBranch26;
break;
case MachO::ARM64_RELOC_PAGE21:
if (RI.r_pcrel && RI.r_extern && RI.r_length == 2)
- return Page21;
+ return MachOPage21;
break;
case MachO::ARM64_RELOC_PAGEOFF12:
if (!RI.r_pcrel && RI.r_extern && RI.r_length == 2)
- return PageOffset12;
+ return MachOPageOffset12;
break;
case MachO::ARM64_RELOC_GOT_LOAD_PAGE21:
if (RI.r_pcrel && RI.r_extern && RI.r_length == 2)
- return GOTPage21;
+ return MachOGOTPage21;
break;
case MachO::ARM64_RELOC_GOT_LOAD_PAGEOFF12:
if (!RI.r_pcrel && RI.r_extern && RI.r_length == 2)
- return GOTPageOffset12;
+ return MachOGOTPageOffset12;
break;
case MachO::ARM64_RELOC_POINTER_TO_GOT:
if (RI.r_pcrel && RI.r_extern && RI.r_length == 2)
- return PointerToGOT;
+ return MachOPointerToGOT;
break;
case MachO::ARM64_RELOC_ADDEND:
if (!RI.r_pcrel && !RI.r_extern && RI.r_length == 2)
- return PairedAddend;
+ return MachOPairedAddend;
break;
case MachO::ARM64_RELOC_TLVP_LOAD_PAGE21:
if (RI.r_pcrel && RI.r_extern && RI.r_length == 2)
- return TLVPage21;
+ return MachOTLVPage21;
break;
case MachO::ARM64_RELOC_TLVP_LOAD_PAGEOFF12:
if (!RI.r_pcrel && RI.r_extern && RI.r_length == 2)
- return TLVPageOffset12;
+ return MachOTLVPageOffset12;
break;
}
@@ -101,8 +121,7 @@ private:
", length=" + formatv("{0:d}", RI.r_length));
}
- using PairRelocInfo =
- std::tuple<MachOARM64RelocationKind, Symbol *, uint64_t>;
+ using PairRelocInfo = std::tuple<Edge::Kind, Symbol *, uint64_t>;
// Parses paired SUBTRACTOR/UNSIGNED relocations and, on success,
// returns the edge kind and addend to be used.
@@ -114,8 +133,8 @@ private:
object::relocation_iterator &RelEnd) {
using namespace support;
- assert(((SubtractorKind == Delta32 && SubRI.r_length == 2) ||
- (SubtractorKind == Delta64 && SubRI.r_length == 3)) &&
+ assert(((SubtractorKind == MachODelta32 && SubRI.r_length == 2) ||
+ (SubtractorKind == MachODelta64 && SubRI.r_length == 3)) &&
"Subtractor kind should match length");
assert(SubRI.r_extern && "SUBTRACTOR reloc symbol should be extern");
assert(!SubRI.r_pcrel && "SUBTRACTOR reloc should not be PCRel");
@@ -165,17 +184,18 @@ private:
FixupValue -= ToSymbol->getAddress().getValue();
}
- MachOARM64RelocationKind DeltaKind;
+ Edge::Kind DeltaKind;
Symbol *TargetSymbol;
uint64_t Addend;
if (&BlockToFix == &FromSymbol->getAddressable()) {
TargetSymbol = ToSymbol;
- DeltaKind = (SubRI.r_length == 3) ? Delta64 : Delta32;
+ DeltaKind = (SubRI.r_length == 3) ? aarch64::Delta64 : aarch64::Delta32;
Addend = FixupValue + (FixupAddress - FromSymbol->getAddress());
// FIXME: handle extern 'from'.
} else if (&BlockToFix == &ToSymbol->getAddressable()) {
TargetSymbol = &*FromSymbol;
- DeltaKind = (SubRI.r_length == 3) ? NegDelta64 : NegDelta32;
+ DeltaKind =
+ (SubRI.r_length == 3) ? aarch64::NegDelta64 : aarch64::NegDelta32;
Addend = FixupValue - (FixupAddress - ToSymbol->getAddress());
} else {
// BlockToFix was neither FromSymbol nor ToSymbol.
@@ -229,9 +249,9 @@ private:
MachO::relocation_info RI = getRelocationInfo(RelItr);
// Validate the relocation kind.
- auto Kind = getRelocationKind(RI);
- if (!Kind)
- return Kind.takeError();
+ auto MachORelocKind = getRelocationKind(RI);
+ if (!MachORelocKind)
+ return MachORelocKind.takeError();
// Find the address of the value to fix up.
orc::ExecutorAddr FixupAddress =
@@ -255,6 +275,8 @@ private:
return make_error<JITLinkError>(
"Relocation content extends past end of fixup block");
+ Edge::Kind Kind = Edge::Invalid;
+
// Get a pointer to the fixup content.
const char *FixupContent = BlockToFix->getContent().data() +
(FixupAddress - BlockToFix->getAddress());
@@ -263,7 +285,7 @@ private:
Symbol *TargetSymbol = nullptr;
uint64_t Addend = 0;
- if (*Kind == PairedAddend) {
+ if (*MachORelocKind == MachOPairedAddend) {
// If this is an Addend relocation then process it and move to the
// paired reloc.
@@ -275,19 +297,21 @@ private:
++RelItr;
RI = getRelocationInfo(RelItr);
- Kind = getRelocationKind(RI);
- if (!Kind)
- return Kind.takeError();
+ MachORelocKind = getRelocationKind(RI);
+ if (!MachORelocKind)
+ return MachORelocKind.takeError();
- if (*Kind != Branch26 && *Kind != Page21 && *Kind != PageOffset12)
+ if (*MachORelocKind != MachOBranch26 &&
+ *MachORelocKind != MachOPage21 &&
+ *MachORelocKind != MachOPageOffset12)
return make_error<JITLinkError>(
"Invalid relocation pair: Addend + " +
- StringRef(getMachOARM64RelocationKindName(*Kind)));
+ StringRef(getMachOARM64RelocationKindName(*MachORelocKind)));
LLVM_DEBUG({
dbgs() << " Addend: value = " << formatv("{0:x6}", Addend)
- << ", pair is " << getMachOARM64RelocationKindName(*Kind)
- << "\n";
+ << ", pair is "
+ << getMachOARM64RelocationKindName(*MachORelocKind) << "\n";
});
// Find the address of the value to fix up.
@@ -298,8 +322,8 @@ private:
"different target");
}
- switch (*Kind) {
- case Branch26: {
+ switch (*MachORelocKind) {
+ case MachOBranch26: {
if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum))
TargetSymbol = TargetSymbolOrErr->GraphSymbol;
else
@@ -308,23 +332,26 @@ private:
if ((Instr & 0x7fffffff) != 0x14000000)
return make_error<JITLinkError>("BRANCH26 target is not a B or BL "
"instruction with a zero addend");
+ Kind = aarch64::Branch26;
break;
}
- case Pointer32:
+ case MachOPointer32:
if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum))
TargetSymbol = TargetSymbolOrErr->GraphSymbol;
else
return TargetSymbolOrErr.takeError();
Addend = *(const ulittle32_t *)FixupContent;
+ Kind = aarch64::Pointer32;
break;
- case Pointer64:
+ case MachOPointer64:
if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum))
TargetSymbol = TargetSymbolOrErr->GraphSymbol;
else
return TargetSymbolOrErr.takeError();
Addend = *(const ulittle64_t *)FixupContent;
+ Kind = aarch64::Pointer64;
break;
- case Pointer64Anon: {
+ case MachOPointer64Anon: {
orc::ExecutorAddr TargetAddress(*(const ulittle64_t *)FixupContent);
auto TargetNSec = findSectionByIndex(RI.r_symbolnum - 1);
if (!TargetNSec)
@@ -335,11 +362,12 @@ private:
else
return TargetSymbolOrErr.takeError();
Addend = TargetAddress - TargetSymbol->getAddress();
+ Kind = aarch64::Pointer64Anon;
break;
}
- case Page21:
- case TLVPage21:
- case GOTPage21: {
+ case MachOPage21:
+ case MachOTLVPage21:
+ case MachOGOTPage21: {
if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum))
TargetSymbol = TargetSymbolOrErr->GraphSymbol;
else
@@ -349,9 +377,17 @@ private:
return make_error<JITLinkError>("PAGE21/GOTPAGE21 target is not an "
"ADRP instruction with a zero "
"addend");
+
+ if (*MachORelocKind == MachOPage21) {
+ Kind = aarch64::Page21;
+ } else if (*MachORelocKind == MachOTLVPage21) {
+ Kind = aarch64::TLVPage21;
+ } else if (*MachORelocKind == MachOGOTPage21) {
+ Kind = aarch64::GOTPage21;
+ }
break;
}
- case PageOffset12: {
+ case MachOPageOffset12: {
if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum))
TargetSymbol = TargetSymbolOrErr->GraphSymbol;
else
@@ -361,10 +397,11 @@ private:
if (EncodedAddend != 0)
return make_error<JITLinkError>("GOTPAGEOFF12 target has non-zero "
"encoded addend");
+ Kind = aarch64::PageOffset12;
break;
}
- case TLVPageOffset12:
- case GOTPageOffset12: {
+ case MachOTLVPageOffset12:
+ case MachOGOTPageOffset12: {
if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum))
TargetSymbol = TargetSymbolOrErr->GraphSymbol;
else
@@ -374,27 +411,35 @@ private:
return make_error<JITLinkError>("GOTPAGEOFF12 target is not an LDR "
"immediate instruction with a zero "
"addend");
+
+ if (*MachORelocKind == MachOTLVPageOffset12) {
+ Kind = aarch64::TLVPageOffset12;
+ } else if (*MachORelocKind == MachOGOTPageOffset12) {
+ Kind = aarch64::GOTPageOffset12;
+ }
break;
}
- case PointerToGOT:
+ case MachOPointerToGOT:
if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum))
TargetSymbol = TargetSymbolOrErr->GraphSymbol;
else
return TargetSymbolOrErr.takeError();
+
+ Kind = aarch64::PointerToGOT;
break;
- case Delta32:
- case Delta64: {
+ case MachODelta32:
+ case MachODelta64: {
// We use Delta32/Delta64 to represent SUBTRACTOR relocations.
// parsePairRelocation handles the paired reloc, and returns the
// edge kind to be used (either Delta32/Delta64, or
// NegDelta32/NegDelta64, depending on the direction of the
// subtraction) along with the addend.
auto PairInfo =
- parsePairRelocation(*BlockToFix, *Kind, RI, FixupAddress,
- FixupContent, ++RelItr, RelEnd);
+ parsePairRelocation(*BlockToFix, *MachORelocKind, RI,
+ FixupAddress, FixupContent, ++RelItr, RelEnd);
if (!PairInfo)
return PairInfo.takeError();
- std::tie(*Kind, TargetSymbol, Addend) = *PairInfo;
+ std::tie(Kind, TargetSymbol, Addend) = *PairInfo;
assert(TargetSymbol && "No target symbol from parsePairRelocation?");
break;
}
@@ -405,108 +450,59 @@ private:
LLVM_DEBUG({
dbgs() << " ";
- Edge GE(*Kind, FixupAddress - BlockToFix->getAddress(), *TargetSymbol,
+ Edge GE(Kind, FixupAddress - BlockToFix->getAddress(), *TargetSymbol,
Addend);
- printEdge(dbgs(), *BlockToFix, GE,
- getMachOARM64RelocationKindName(*Kind));
+ printEdge(dbgs(), *BlockToFix, GE, aarch64::getEdgeKindName(Kind));
dbgs() << "\n";
});
- BlockToFix->addEdge(*Kind, FixupAddress - BlockToFix->getAddress(),
+ BlockToFix->addEdge(Kind, FixupAddress - BlockToFix->getAddress(),
*TargetSymbol, Addend);
}
}
return Error::success();
}
- unsigned NumSymbols = 0;
-};
-
-class PerGraphGOTAndPLTStubsBuilder_MachO_arm64
- : public PerGraphGOTAndPLTStubsBuilder<
- PerGraphGOTAndPLTStubsBuilder_MachO_arm64> {
-public:
- using PerGraphGOTAndPLTStubsBuilder<
- PerGraphGOTAndPLTStubsBuilder_MachO_arm64>::PerGraphGOTAndPLTStubsBuilder;
-
- bool isGOTEdgeToFix(Edge &E) const {
- return E.getKind() == GOTPage21 || E.getKind() == GOTPageOffset12 ||
- E.getKind() == TLVPage21 || E.getKind() == TLVPageOffset12 ||
- E.getKind() == PointerToGOT;
- }
-
- Symbol &createGOTEntry(Symbol &Target) {
- auto &GOTEntryBlock = G.createContentBlock(
- getGOTSection(), getGOTEntryBlockContent(), orc::ExecutorAddr(), 8, 0);
- GOTEntryBlock.addEdge(Pointer64, 0, Target, 0);
- return G.addAnonymousSymbol(GOTEntryBlock, 0, 8, false, false);
- }
-
- void fixGOTEdge(Edge &E, Symbol &GOTEntry) {
- if (E.getKind() == GOTPage21 || E.getKind() == GOTPageOffset12 ||
- E.getKind() == TLVPage21 || E.getKind() == TLVPageOffset12) {
- // Update the target, but leave the edge addend as-is.
- E.setTarget(GOTEntry);
- } else if (E.getKind() == PointerToGOT) {
- E.setTarget(GOTEntry);
- E.setKind(Delta32);
- } else
- llvm_unreachable("Not a GOT edge?");
- }
-
- bool isExternalBranchEdge(Edge &E) {
- return E.getKind() == Branch26 && !E.getTarget().isDefined();
- }
-
- Symbol &createPLTStub(Symbol &Target) {
- auto &StubContentBlock = G.createContentBlock(
- getStubsSection(), getStubBlockContent(), orc::ExecutorAddr(), 1, 0);
- // Re-use GOT entries for stub targets.
- auto &GOTEntrySymbol = getGOTEntry(Target);
- StubContentBlock.addEdge(LDRLiteral19, 0, GOTEntrySymbol, 0);
- return G.addAnonymousSymbol(StubContentBlock, 0, 8, true, false);
- }
-
- void fixPLTEdge(Edge &E, Symbol &Stub) {
- assert(E.getKind() == Branch26 && "Not a Branch32 edge?");
- assert(E.getAddend() == 0 && "Branch32 edge has non-zero addend?");
- E.setTarget(Stub);
- }
-
-private:
- Section &getGOTSection() {
- if (!GOTSection)
- GOTSection = &G.createSection("$__GOT", MemProt::Read | MemProt::Exec);
- return *GOTSection;
- }
-
- Section &getStubsSection() {
- if (!StubsSection)
- StubsSection =
- &G.createSection("$__STUBS", MemProt::Read | MemProt::Exec);
- return *StubsSection;
- }
-
- ArrayRef<char> getGOTEntryBlockContent() {
- return {reinterpret_cast<const char *>(NullGOTEntryContent),
- sizeof(NullGOTEntryContent)};
- }
-
- ArrayRef<char> getStubBlockContent() {
- return {reinterpret_cast<const char *>(StubContent), sizeof(StubContent)};
+ /// Return the string name of the given MachO arm64 edge kind.
+ const char *getMachOARM64RelocationKindName(Edge::Kind R) {
+ switch (R) {
+ case MachOBranch26:
+ return "MachOBranch26";
+ case MachOPointer64:
+ return "MachOPointer64";
+ case MachOPointer64Anon:
+ return "MachOPointer64Anon";
+ case MachOPage21:
+ return "MachOPage21";
+ case MachOPageOffset12:
+ return "MachOPageOffset12";
+ case MachOGOTPage21:
+ return "MachOGOTPage21";
+ case MachOGOTPageOffset12:
+ return "MachOGOTPageOffset12";
+ case MachOTLVPage21:
+ return "MachOTLVPage21";
+ case MachOTLVPageOffset12:
+ return "MachOTLVPageOffset12";
+ case MachOPointerToGOT:
+ return "MachOPointerToGOT";
+ case MachOPairedAddend:
+ return "MachOPairedAddend";
+ case MachOLDRLiteral19:
+ return "MachOLDRLiteral19";
+ case MachODelta32:
+ return "MachODelta32";
+ case MachODelta64:
+ return "MachODelta64";
+ case MachONegDelta32:
+ return "MachONegDelta32";
+ case MachONegDelta64:
+ return "MachONegDelta64";
+ default:
+ return getGenericEdgeKindName(static_cast<Edge::Kind>(R));
+ }
}
- static const uint8_t NullGOTEntryContent[8];
- static const uint8_t StubContent[8];
- Section *GOTSection = nullptr;
- Section *StubsSection = nullptr;
-};
-
-const uint8_t
- PerGraphGOTAndPLTStubsBuilder_MachO_arm64::NullGOTEntryContent[8] = {
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
-const uint8_t PerGraphGOTAndPLTStubsBuilder_MachO_arm64::StubContent[8] = {
- 0x10, 0x00, 0x00, 0x58, // LDR x16, <literal>
- 0x00, 0x02, 0x1f, 0xd6 // BR x16
+ unsigned NumSymbols = 0;
};
} // namespace
@@ -514,6 +510,15 @@ const uint8_t PerGraphGOTAndPLTStubsBuilder_MachO_arm64::StubContent[8] = {
namespace llvm {
namespace jitlink {
+Error buildTables_MachO_arm64(LinkGraph &G) {
+ LLVM_DEBUG(dbgs() << "Visiting edges in graph:\n");
+
+ aarch64::GOTTableManager GOT;
+ aarch64::PLTTableManager PLT(GOT);
+ visitExistingEdges(G, GOT, PLT);
+ return Error::success();
+}
+
class MachOJITLinker_arm64 : public JITLinker<MachOJITLinker_arm64> {
friend class JITLinker<MachOJITLinker_arm64>;
@@ -524,162 +529,8 @@ public:
: JITLinker(std::move(Ctx), std::move(G), std::move(PassConfig)) {}
private:
-
- static unsigned getPageOffset12Shift(uint32_t Instr) {
- constexpr uint32_t LoadStoreImm12Mask = 0x3b000000;
- constexpr uint32_t Vec128Mask = 0x04800000;
-
- if ((Instr & LoadStoreImm12Mask) == 0x39000000) {
- uint32_t ImplicitShift = Instr >> 30;
- if (ImplicitShift == 0)
- if ((Instr & Vec128Mask) == Vec128Mask)
- ImplicitShift = 4;
-
- return ImplicitShift;
- }
-
- return 0;
- }
-
Error applyFixup(LinkGraph &G, Block &B, const Edge &E) const {
- using namespace support;
-
- char *BlockWorkingMem = B.getAlreadyMutableContent().data();
- char *FixupPtr = BlockWorkingMem + E.getOffset();
- orc::ExecutorAddr FixupAddress = B.getAddress() + E.getOffset();
-
- switch (E.getKind()) {
- case Branch26: {
- assert((FixupAddress.getValue() & 0x3) == 0 &&
- "Branch-inst is not 32-bit aligned");
-
- int64_t Value = E.getTarget().getAddress() - FixupAddress + E.getAddend();
-
- if (static_cast<uint64_t>(Value) & 0x3)
- return make_error<JITLinkError>("Branch26 target is not 32-bit "
- "aligned");
-
- if (Value < -(1 << 27) || Value > ((1 << 27) - 1))
- return makeTargetOutOfRangeError(G, B, E);
-
- uint32_t RawInstr = *(little32_t *)FixupPtr;
- assert((RawInstr & 0x7fffffff) == 0x14000000 &&
- "RawInstr isn't a B or BR immediate instruction");
- uint32_t Imm = (static_cast<uint32_t>(Value) & ((1 << 28) - 1)) >> 2;
- uint32_t FixedInstr = RawInstr | Imm;
- *(little32_t *)FixupPtr = FixedInstr;
- break;
- }
- case Pointer32: {
- uint64_t Value = E.getTarget().getAddress().getValue() + E.getAddend();
- if (Value > std::numeric_limits<uint32_t>::max())
- return makeTargetOutOfRangeError(G, B, E);
- *(ulittle32_t *)FixupPtr = Value;
- break;
- }
- case Pointer64:
- case Pointer64Anon: {
- uint64_t Value = E.getTarget().getAddress().getValue() + E.getAddend();
- *(ulittle64_t *)FixupPtr = Value;
- break;
- }
- case Page21:
- case TLVPage21:
- case GOTPage21: {
- assert((E.getKind() != GOTPage21 || E.getAddend() == 0) &&
- "GOTPAGE21 with non-zero addend");
- uint64_t TargetPage =
- (E.getTarget().getAddress().getValue() + E.getAddend()) &
- ~static_cast<uint64_t>(4096 - 1);
- uint64_t PCPage =
- FixupAddress.getValue() & ~static_cast<uint64_t>(4096 - 1);
-
- int64_t PageDelta = TargetPage - PCPage;
- if (PageDelta < -(1 << 30) || PageDelta > ((1 << 30) - 1))
- return makeTargetOutOfRangeError(G, B, E);
-
- uint32_t RawInstr = *(ulittle32_t *)FixupPtr;
- assert((RawInstr & 0xffffffe0) == 0x90000000 &&
- "RawInstr isn't an ADRP instruction");
- uint32_t ImmLo = (static_cast<uint64_t>(PageDelta) >> 12) & 0x3;
- uint32_t ImmHi = (static_cast<uint64_t>(PageDelta) >> 14) & 0x7ffff;
- uint32_t FixedInstr = RawInstr | (ImmLo << 29) | (ImmHi << 5);
- *(ulittle32_t *)FixupPtr = FixedInstr;
- break;
- }
- case PageOffset12: {
- uint64_t TargetOffset =
- (E.getTarget().getAddress() + E.getAddend()).getValue() & 0xfff;
-
- uint32_t RawInstr = *(ulittle32_t *)FixupPtr;
- unsigned ImmShift = getPageOffset12Shift(RawInstr);
-
- if (TargetOffset & ((1 << ImmShift) - 1))
- return make_error<JITLinkError>("PAGEOFF12 target is not aligned");
-
- uint32_t EncodedImm = (TargetOffset >> ImmShift) << 10;
- uint32_t FixedInstr = RawInstr | EncodedImm;
- *(ulittle32_t *)FixupPtr = FixedInstr;
- break;
- }
- case TLVPageOffset12:
- case GOTPageOffset12: {
- assert(E.getAddend() == 0 && "GOTPAGEOF12 with non-zero addend");
-
- uint32_t RawInstr = *(ulittle32_t *)FixupPtr;
- assert((RawInstr & 0xfffffc00) == 0xf9400000 &&
- "RawInstr isn't a 64-bit LDR immediate");
-
- uint32_t TargetOffset = E.getTarget().getAddress().getValue() & 0xfff;
- assert((TargetOffset & 0x7) == 0 && "GOT entry is not 8-byte aligned");
- uint32_t EncodedImm = (TargetOffset >> 3) << 10;
- uint32_t FixedInstr = RawInstr | EncodedImm;
- *(ulittle32_t *)FixupPtr = FixedInstr;
- break;
- }
- case LDRLiteral19: {
- assert((FixupAddress.getValue() & 0x3) == 0 &&
- "LDR is not 32-bit aligned");
- assert(E.getAddend() == 0 && "LDRLiteral19 with non-zero addend");
- uint32_t RawInstr = *(ulittle32_t *)FixupPtr;
- assert(RawInstr == 0x58000010 && "RawInstr isn't a 64-bit LDR literal");
- int64_t Delta = E.getTarget().getAddress() - FixupAddress;
- if (Delta & 0x3)
- return make_error<JITLinkError>("LDR literal target is not 32-bit "
- "aligned");
- if (Delta < -(1 << 20) || Delta > ((1 << 20) - 1))
- return makeTargetOutOfRangeError(G, B, E);
-
- uint32_t EncodedImm =
- ((static_cast<uint32_t>(Delta) >> 2) & 0x7ffff) << 5;
- uint32_t FixedInstr = RawInstr | EncodedImm;
- *(ulittle32_t *)FixupPtr = FixedInstr;
- break;
- }
- case Delta32:
- case Delta64:
- case NegDelta32:
- case NegDelta64: {
- int64_t Value;
- if (E.getKind() == Delta32 || E.getKind() == Delta64)
- Value = E.getTarget().getAddress() - FixupAddress + E.getAddend();
- else
- Value = FixupAddress - E.getTarget().getAddress() + E.getAddend();
-
- if (E.getKind() == Delta32 || E.getKind() == NegDelta32) {
- if (Value < std::numeric_limits<int32_t>::min() ||
- Value > std::numeric_limits<int32_t>::max())
- return makeTargetOutOfRangeError(G, B, E);
- *(little32_t *)FixupPtr = Value;
- } else
- *(little64_t *)FixupPtr = Value;
- break;
- }
- default:
- llvm_unreachable("Unrecognized edge kind");
- }
-
- return Error::success();
+ return aarch64::applyFixup(G, B, E);
}
uint64_t NullValue = 0;
@@ -712,13 +563,14 @@ void link_MachO_arm64(std::unique_ptr<LinkGraph> G,
// Add eh-frame passses.
// FIXME: Prune eh-frames for which compact-unwind is available once
// we support compact-unwind registration with libunwind.
- Config.PrePrunePasses.push_back(EHFrameSplitter("__TEXT,__eh_frame"));
Config.PrePrunePasses.push_back(
- EHFrameEdgeFixer("__TEXT,__eh_frame", 8, Delta64, Delta32, NegDelta32));
+ DWARFRecordSectionSplitter("__TEXT,__eh_frame"));
+ Config.PrePrunePasses.push_back(EHFrameEdgeFixer(
+ "__TEXT,__eh_frame", 8, aarch64::Pointer32, aarch64::Pointer64,
+ aarch64::Delta32, aarch64::Delta64, aarch64::NegDelta32));
// Add an in-place GOT/Stubs pass.
- Config.PostPrunePasses.push_back(
- PerGraphGOTAndPLTStubsBuilder_MachO_arm64::asPass);
+ Config.PostPrunePasses.push_back(buildTables_MachO_arm64);
}
if (auto Err = Ctx->modifyPassConfig(*G, Config))
@@ -728,44 +580,5 @@ void link_MachO_arm64(std::unique_ptr<LinkGraph> G,
MachOJITLinker_arm64::link(std::move(Ctx), std::move(G), std::move(Config));
}
-const char *getMachOARM64RelocationKindName(Edge::Kind R) {
- switch (R) {
- case Branch26:
- return "Branch26";
- case Pointer64:
- return "Pointer64";
- case Pointer64Anon:
- return "Pointer64Anon";
- case Page21:
- return "Page21";
- case PageOffset12:
- return "PageOffset12";
- case GOTPage21:
- return "GOTPage21";
- case GOTPageOffset12:
- return "GOTPageOffset12";
- case TLVPage21:
- return "TLVPage21";
- case TLVPageOffset12:
- return "TLVPageOffset12";
- case PointerToGOT:
- return "PointerToGOT";
- case PairedAddend:
- return "PairedAddend";
- case LDRLiteral19:
- return "LDRLiteral19";
- case Delta32:
- return "Delta32";
- case Delta64:
- return "Delta64";
- case NegDelta32:
- return "NegDelta32";
- case NegDelta64:
- return "NegDelta64";
- default:
- return getGenericEdgeKindName(static_cast<Edge::Kind>(R));
- }
-}
-
} // end namespace jitlink
} // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
index 82afaa3aa3c5..6dfd5548fcfd 100644
--- a/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
@@ -11,10 +11,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/ExecutionEngine/JITLink/MachO_x86_64.h"
+#include "llvm/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.h"
#include "llvm/ExecutionEngine/JITLink/x86_64.h"
#include "MachOLinkGraphBuilder.h"
-#include "PerGraphGOTAndPLTStubsBuilder.h"
#define DEBUG_TYPE "jitlink"
@@ -504,12 +504,13 @@ void link_MachO_x86_64(std::unique_ptr<LinkGraph> G,
}
LinkGraphPassFunction createEHFrameSplitterPass_MachO_x86_64() {
- return EHFrameSplitter("__TEXT,__eh_frame");
+ return DWARFRecordSectionSplitter("__TEXT,__eh_frame");
}
LinkGraphPassFunction createEHFrameEdgeFixerPass_MachO_x86_64() {
return EHFrameEdgeFixer("__TEXT,__eh_frame", x86_64::PointerSize,
- x86_64::Delta64, x86_64::Delta32, x86_64::NegDelta32);
+ x86_64::Pointer32, x86_64::Pointer64, x86_64::Delta32,
+ x86_64::Delta64, x86_64::NegDelta32);
}
} // end namespace jitlink
diff --git a/llvm/lib/ExecutionEngine/JITLink/aarch64.cpp b/llvm/lib/ExecutionEngine/JITLink/aarch64.cpp
index 6dccc4811885..28a6f9ce90d9 100644
--- a/llvm/lib/ExecutionEngine/JITLink/aarch64.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/aarch64.cpp
@@ -18,13 +18,55 @@ namespace llvm {
namespace jitlink {
namespace aarch64 {
-const char *getEdgeKindName(Edge::Kind K) {
- switch (K) {
- case R_AARCH64_CALL26:
- return "R_AARCH64_CALL26";
+const uint8_t NullGOTEntryContent[8] = {0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00};
+
+const uint8_t StubContent[8] = {
+ 0x10, 0x00, 0x00, 0x58, // LDR x16, <literal>
+ 0x00, 0x02, 0x1f, 0xd6 // BR x16
+};
+
+const char *getEdgeKindName(Edge::Kind R) {
+ switch (R) {
+ case Branch26:
+ return "Branch26";
+ case Pointer64:
+ return "Pointer64";
+ case Pointer64Anon:
+ return "Pointer64Anon";
+ case Page21:
+ return "Page21";
+ case PageOffset12:
+ return "PageOffset12";
+ case MoveWide16:
+ return "MoveWide16";
+ case GOTPage21:
+ return "GOTPage21";
+ case GOTPageOffset12:
+ return "GOTPageOffset12";
+ case TLVPage21:
+ return "TLVPage21";
+ case TLVPageOffset12:
+ return "TLVPageOffset12";
+ case PointerToGOT:
+ return "PointerToGOT";
+ case PairedAddend:
+ return "PairedAddend";
+ case LDRLiteral19:
+ return "LDRLiteral19";
+ case Delta32:
+ return "Delta32";
+ case Delta64:
+ return "Delta64";
+ case NegDelta32:
+ return "NegDelta32";
+ case NegDelta64:
+ return "NegDelta64";
+ default:
+ return getGenericEdgeKindName(static_cast<Edge::Kind>(R));
}
- return getGenericEdgeKindName(K);
}
+
} // namespace aarch64
} // namespace jitlink
} // namespace llvm
diff --git a/llvm/lib/ExecutionEngine/JITLink/riscv.cpp b/llvm/lib/ExecutionEngine/JITLink/riscv.cpp
index 3ce2cf10a24c..3848cc6b5f01 100644
--- a/llvm/lib/ExecutionEngine/JITLink/riscv.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/riscv.cpp
@@ -26,6 +26,8 @@ const char *getEdgeKindName(Edge::Kind K) {
return "R_RISCV_64";
case R_RISCV_BRANCH:
return "R_RISCV_BRANCH";
+ case R_RISCV_JAL:
+ return "R_RISCV_JAL";
case R_RISCV_HI20:
return "R_RISCV_HI20";
case R_RISCV_LO12_I:
@@ -56,6 +58,8 @@ const char *getEdgeKindName(Edge::Kind K) {
return "R_RISCV_SUB16";
case R_RISCV_SUB8:
return "R_RISCV_SUB8";
+ case R_RISCV_SUB6:
+ return "R_RISCV_SUB6";
case R_RISCV_SET6:
return "R_RISCV_SET6";
case R_RISCV_SET8:
diff --git a/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index ed912280ac82..4ac901daa5c8 100644
--- a/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -19,6 +19,7 @@
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Mangler.h"
#include "llvm/IR/Module.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/DynamicLibrary.h"
diff --git a/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h b/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h
index a5dd420c9132..f6c4cdbb8c91 100644
--- a/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h
+++ b/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h
@@ -72,8 +72,7 @@ class MCJIT : public ExecutionEngine {
class OwningModuleContainer {
public:
- OwningModuleContainer() {
- }
+ OwningModuleContainer() = default;
~OwningModuleContainer() {
freeModulePtrSet(AddedModules);
freeModulePtrSet(LoadedModules);
diff --git a/llvm/lib/ExecutionEngine/Orc/CompileUtils.cpp b/llvm/lib/ExecutionEngine/Orc/CompileUtils.cpp
index f34247005258..fad7428e1f90 100644
--- a/llvm/lib/ExecutionEngine/Orc/CompileUtils.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/CompileUtils.cpp
@@ -12,6 +12,7 @@
#include "llvm/ExecutionEngine/ObjectCache.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/llvm/lib/ExecutionEngine/Orc/Core.cpp b/llvm/lib/ExecutionEngine/Orc/Core.cpp
index e5cb8103919a..dd80630a33c1 100644
--- a/llvm/lib/ExecutionEngine/Orc/Core.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/Core.cpp
@@ -62,7 +62,7 @@ void ResourceTracker::makeDefunct() {
JDAndFlag.store(Val);
}
-ResourceManager::~ResourceManager() {}
+ResourceManager::~ResourceManager() = default;
ResourceTrackerDefunct::ResourceTrackerDefunct(ResourceTrackerSP RT)
: RT(std::move(RT)) {}
@@ -76,9 +76,21 @@ void ResourceTrackerDefunct::log(raw_ostream &OS) const {
}
FailedToMaterialize::FailedToMaterialize(
+ std::shared_ptr<SymbolStringPool> SSP,
std::shared_ptr<SymbolDependenceMap> Symbols)
- : Symbols(std::move(Symbols)) {
+ : SSP(std::move(SSP)), Symbols(std::move(Symbols)) {
+ assert(this->SSP && "String pool cannot be null");
assert(!this->Symbols->empty() && "Can not fail to resolve an empty set");
+
+ // FIXME: Use a new dep-map type for FailedToMaterialize errors so that we
+ // don't have to manually retain/release.
+ for (auto &KV : *this->Symbols)
+ KV.first->Retain();
+}
+
+FailedToMaterialize::~FailedToMaterialize() {
+ for (auto &KV : *Symbols)
+ KV.first->Release();
}
std::error_code FailedToMaterialize::convertToErrorCode() const {
@@ -251,9 +263,21 @@ StringRef AbsoluteSymbolsMaterializationUnit::getName() const {
void AbsoluteSymbolsMaterializationUnit::materialize(
std::unique_ptr<MaterializationResponsibility> R) {
- // No dependencies, so these calls can't fail.
- cantFail(R->notifyResolved(Symbols));
- cantFail(R->notifyEmitted());
+ // Even though these are just absolute symbols we need to check for failure
+ // to resolve/emit: the tracker for these symbols may have been removed while
+ // the materialization was in flight (e.g. due to a failure in some action
+ // triggered by the queries attached to the resolution/emission of these
+ // symbols).
+ if (auto Err = R->notifyResolved(Symbols)) {
+ R->getExecutionSession().reportError(std::move(Err));
+ R->failMaterialization();
+ return;
+ }
+ if (auto Err = R->notifyEmitted()) {
+ R->getExecutionSession().reportError(std::move(Err));
+ R->failMaterialization();
+ return;
+ }
}
void AbsoluteSymbolsMaterializationUnit::discard(const JITDylib &JD,
@@ -485,13 +509,16 @@ Expected<SymbolAliasMap> buildSimpleReexportsAliasMap(JITDylib &SourceJD,
class InProgressLookupState {
public:
+ // FIXME: Reduce the number of SymbolStringPtrs here. See
+ // https://github.com/llvm/llvm-project/issues/55576.
+
InProgressLookupState(LookupKind K, JITDylibSearchOrder SearchOrder,
SymbolLookupSet LookupSet, SymbolState RequiredState)
: K(K), SearchOrder(std::move(SearchOrder)),
LookupSet(std::move(LookupSet)), RequiredState(RequiredState) {
DefGeneratorCandidates = this->LookupSet;
}
- virtual ~InProgressLookupState() {}
+ virtual ~InProgressLookupState() = default;
virtual void complete(std::unique_ptr<InProgressLookupState> IPLS) = 0;
virtual void fail(Error Err) = 0;
@@ -609,7 +636,7 @@ void LookupState::continueLookup(Error Err) {
ES.OL_applyQueryPhase1(std::move(IPLS), std::move(Err));
}
-DefinitionGenerator::~DefinitionGenerator() {}
+DefinitionGenerator::~DefinitionGenerator() = default;
JITDylib::~JITDylib() {
LLVM_DEBUG(dbgs() << "Destroying JITDylib " << getName() << "\n");
@@ -959,6 +986,7 @@ Error JITDylib::resolve(MaterializationResponsibility &MR,
auto FailedSymbolsDepMap = std::make_shared<SymbolDependenceMap>();
(*FailedSymbolsDepMap)[this] = std::move(SymbolsInErrorState);
return make_error<FailedToMaterialize>(
+ getExecutionSession().getSymbolStringPool(),
std::move(FailedSymbolsDepMap));
}
@@ -1036,6 +1064,7 @@ Error JITDylib::emit(MaterializationResponsibility &MR,
auto FailedSymbolsDepMap = std::make_shared<SymbolDependenceMap>();
(*FailedSymbolsDepMap)[this] = std::move(SymbolsInErrorState);
return make_error<FailedToMaterialize>(
+ getExecutionSession().getSymbolStringPool(),
std::move(FailedSymbolsDepMap));
}
@@ -1411,12 +1440,11 @@ void JITDylib::dump(raw_ostream &OS) {
for (auto &KV : Symbols) {
OS << " \"" << *KV.first << "\": ";
if (auto Addr = KV.second.getAddress())
- OS << format("0x%016" PRIx64, Addr) << ", " << KV.second.getFlags()
- << " ";
+ OS << format("0x%016" PRIx64, Addr);
else
OS << "<not resolved> ";
- OS << KV.second.getFlags() << " " << KV.second.getState();
+ OS << " " << KV.second.getFlags() << " " << KV.second.getState();
if (KV.second.hasMaterializerAttached()) {
OS << " (Materializer ";
@@ -1751,7 +1779,7 @@ void JITDylib::transferEmittedNodeDependencies(
}
}
-Platform::~Platform() {}
+Platform::~Platform() = default;
Expected<DenseMap<JITDylib *, SymbolMap>> Platform::lookupInitSymbols(
ExecutionSession &ES,
@@ -1858,6 +1886,12 @@ ExecutionSession::ExecutionSession(std::unique_ptr<ExecutorProcessControl> EPC)
this->EPC->ES = this;
}
+ExecutionSession::~ExecutionSession() {
+ // You must call endSession prior to destroying the session.
+ assert(!SessionOpen &&
+ "Session still open. Did you forget to call endSession?");
+}
+
Error ExecutionSession::endSession() {
LLVM_DEBUG(dbgs() << "Ending ExecutionSession " << this << "\n");
@@ -1869,7 +1903,7 @@ Error ExecutionSession::endSession() {
// TODO: notifiy platform? run static deinits?
Error Err = Error::success();
- for (auto &JD : JITDylibsToClose)
+ for (auto &JD : reverse(JITDylibsToClose))
Err = joinErrors(std::move(Err), JD->clear());
Err = joinErrors(std::move(Err), EPC->disconnect());
@@ -1987,9 +2021,8 @@ JITDylib::getDFSLinkOrder(ArrayRef<JITDylibSP> JDs) {
for (auto &KV : llvm::reverse(Result.back()->LinkOrder)) {
auto &JD = *KV.first;
- if (Visited.count(&JD))
+ if (!Visited.insert(&JD).second)
continue;
- Visited.insert(&JD);
WorkStack.push_back(&JD);
}
}
@@ -2071,7 +2104,7 @@ void ExecutionSession::lookup(
Expected<SymbolMap>
ExecutionSession::lookup(const JITDylibSearchOrder &SearchOrder,
- const SymbolLookupSet &Symbols, LookupKind K,
+ SymbolLookupSet Symbols, LookupKind K,
SymbolState RequiredState,
RegisterDependenciesFunction RegisterDependencies) {
#if LLVM_ENABLE_THREADS
@@ -2103,7 +2136,7 @@ ExecutionSession::lookup(const JITDylibSearchOrder &SearchOrder,
#endif
// Perform the asynchronous lookup.
- lookup(K, SearchOrder, Symbols, RequiredState, NotifyComplete,
+ lookup(K, SearchOrder, std::move(Symbols), RequiredState, NotifyComplete,
RegisterDependencies);
#if LLVM_ENABLE_THREADS
@@ -2257,7 +2290,8 @@ Error ExecutionSession::removeResourceTracker(ResourceTracker &RT) {
joinErrors(std::move(Err), L->handleRemoveResources(RT.getKeyUnsafe()));
for (auto &Q : QueriesToFail)
- Q->handleFailed(make_error<FailedToMaterialize>(FailedSymbols));
+ Q->handleFailed(
+ make_error<FailedToMaterialize>(getSymbolStringPool(), FailedSymbols));
return Err;
}
@@ -2337,7 +2371,8 @@ Error ExecutionSession::IL_updateCandidatesFor(
if (SymI->second.getFlags().hasError()) {
auto FailedSymbolsMap = std::make_shared<SymbolDependenceMap>();
(*FailedSymbolsMap)[&JD] = {Name};
- return make_error<FailedToMaterialize>(std::move(FailedSymbolsMap));
+ return make_error<FailedToMaterialize>(getSymbolStringPool(),
+ std::move(FailedSymbolsMap));
}
// Otherwise this is a match. Remove it from the candidate set.
@@ -2611,7 +2646,7 @@ void ExecutionSession::OL_completeLookup(
auto FailedSymbolsMap = std::make_shared<SymbolDependenceMap>();
(*FailedSymbolsMap)[&JD] = {Name};
return make_error<FailedToMaterialize>(
- std::move(FailedSymbolsMap));
+ getSymbolStringPool(), std::move(FailedSymbolsMap));
}
// Otherwise this is a match.
@@ -2947,7 +2982,8 @@ void ExecutionSession::OL_notifyFailed(MaterializationResponsibility &MR) {
});
for (auto &Q : FailedQueries)
- Q->handleFailed(make_error<FailedToMaterialize>(FailedSymbols));
+ Q->handleFailed(
+ make_error<FailedToMaterialize>(getSymbolStringPool(), FailedSymbols));
}
Error ExecutionSession::OL_replace(MaterializationResponsibility &MR,
diff --git a/llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp b/llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp
index 4ff6b7fd54df..1e68ea1225e6 100644
--- a/llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp
@@ -42,7 +42,7 @@ class DebugObjectSection {
public:
virtual void setTargetMemoryRange(SectionRange Range) = 0;
virtual void dump(raw_ostream &OS, StringRef Name) {}
- virtual ~DebugObjectSection() {}
+ virtual ~DebugObjectSection() = default;
};
template <typename ELFT>
diff --git a/llvm/lib/ExecutionEngine/Orc/DebugUtils.cpp b/llvm/lib/ExecutionEngine/Orc/DebugUtils.cpp
index 5b386a458f1f..028bd245fb55 100644
--- a/llvm/lib/ExecutionEngine/Orc/DebugUtils.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/DebugUtils.cpp
@@ -297,6 +297,13 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolState &S) {
llvm_unreachable("Invalid state");
}
+raw_ostream &operator<<(raw_ostream &OS, const SymbolStringPool &SSP) {
+ std::lock_guard<std::mutex> Lock(SSP.PoolMutex);
+ for (auto &KV : SSP.Pool)
+ OS << KV.first() << ": " << KV.second << "\n";
+ return OS;
+}
+
DumpObjects::DumpObjects(std::string DumpDir, std::string IdentifierOverride)
: DumpDir(std::move(DumpDir)),
IdentifierOverride(std::move(IdentifierOverride)) {
diff --git a/llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp b/llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp
index 6916ee4a827f..3c44fe81b4a9 100644
--- a/llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp
@@ -48,7 +48,7 @@ public:
MachODebugObjectSynthesizerBase(LinkGraph &G, ExecutorAddr RegisterActionAddr)
: G(G), RegisterActionAddr(RegisterActionAddr) {}
- virtual ~MachODebugObjectSynthesizerBase() {}
+ virtual ~MachODebugObjectSynthesizerBase() = default;
Error preserveDebugSections() {
if (G.findSectionByName(SynthDebugSectionName)) {
@@ -349,10 +349,11 @@ public:
}
SectionRange R(MachOContainerBlock->getSection());
- G.allocActions().push_back({cantFail(shared::WrapperFunctionCall::Create<
- SPSArgList<SPSExecutorAddrRange>>(
- RegisterActionAddr, R.getRange())),
- {}});
+ G.allocActions().push_back(
+ {cantFail(shared::WrapperFunctionCall::Create<
+ shared::SPSArgList<shared::SPSExecutorAddrRange>>(
+ RegisterActionAddr, R.getRange())),
+ {}});
return Error::success();
}
diff --git a/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp
index d02760703f06..e476c549412a 100644
--- a/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp
@@ -10,6 +10,7 @@
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/ExecutionEngine/JITLink/ELF_x86_64.h"
+#include "llvm/ExecutionEngine/JITLink/aarch64.h"
#include "llvm/ExecutionEngine/JITLink/x86_64.h"
#include "llvm/ExecutionEngine/Orc/DebugUtils.h"
#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
@@ -47,6 +48,11 @@ public:
Endianness = support::endianness::little;
EdgeKind = jitlink::x86_64::Pointer64;
break;
+ case Triple::aarch64:
+ PointerSize = 8;
+ Endianness = support::endianness::little;
+ EdgeKind = jitlink::aarch64::Pointer64;
+ break;
default:
llvm_unreachable("Unrecognized architecture");
}
@@ -95,8 +101,6 @@ StringRef InitArrayFuncSectionName = ".init_array";
StringRef ThreadBSSSectionName = ".tbss";
StringRef ThreadDataSectionName = ".tdata";
-StringRef InitSectionNames[] = {InitArrayFuncSectionName};
-
} // end anonymous namespace
namespace llvm {
@@ -117,8 +121,12 @@ ELFNixPlatform::Create(ExecutionSession &ES,
inconvertibleErrorCode());
// Create default aliases if the caller didn't supply any.
- if (!RuntimeAliases)
- RuntimeAliases = standardPlatformAliases(ES);
+ if (!RuntimeAliases) {
+ auto StandardRuntimeAliases = standardPlatformAliases(ES, PlatformJD);
+ if (!StandardRuntimeAliases)
+ return StandardRuntimeAliases.takeError();
+ RuntimeAliases = std::move(*StandardRuntimeAliases);
+ }
// Define the aliases.
if (auto Err = PlatformJD.define(symbolAliases(std::move(*RuntimeAliases))))
@@ -189,10 +197,53 @@ static void addAliases(ExecutionSession &ES, SymbolAliasMap &Aliases,
}
}
-SymbolAliasMap ELFNixPlatform::standardPlatformAliases(ExecutionSession &ES) {
+Expected<SymbolAliasMap>
+ELFNixPlatform::standardPlatformAliases(ExecutionSession &ES,
+ JITDylib &PlatformJD) {
SymbolAliasMap Aliases;
addAliases(ES, Aliases, requiredCXXAliases());
addAliases(ES, Aliases, standardRuntimeUtilityAliases());
+
+ // Determine whether or not the libunwind extended-API function for
+ // dynamically registering an entire .eh_frame section is available.
+ // If it is not, we assume that libgcc_s is being used, and alias to
+ // its __register_frame with the same functionality.
+ auto RTRegisterFrame = ES.intern("__orc_rt_register_eh_frame_section");
+ auto LibUnwindRegisterFrame = ES.intern("__unw_add_dynamic_eh_frame_section");
+ auto RTDeregisterFrame = ES.intern("__orc_rt_deregister_eh_frame_section");
+ auto LibUnwindDeregisterFrame =
+ ES.intern("__unw_remove_dynamic_eh_frame_section");
+ auto SM = ES.lookup(makeJITDylibSearchOrder(&PlatformJD),
+ SymbolLookupSet()
+ .add(LibUnwindRegisterFrame,
+ SymbolLookupFlags::WeaklyReferencedSymbol)
+ .add(LibUnwindDeregisterFrame,
+ SymbolLookupFlags::WeaklyReferencedSymbol));
+ if (!SM) { // Weak-ref means no "missing symbol" errors, so this must be
+ // something more serious that we should report.
+ return SM.takeError();
+ } else if (SM->size() == 2) {
+ LLVM_DEBUG({
+ dbgs() << "Using libunwind " << LibUnwindRegisterFrame
+ << " for unwind info registration\n";
+ });
+ Aliases[std::move(RTRegisterFrame)] = {LibUnwindRegisterFrame,
+ JITSymbolFlags::Exported};
+ Aliases[std::move(RTDeregisterFrame)] = {LibUnwindDeregisterFrame,
+ JITSymbolFlags::Exported};
+ } else {
+ // Since LLVM libunwind is not present, we assume that unwinding
+ // is provided by libgcc
+ LLVM_DEBUG({
+ dbgs() << "Using libgcc __register_frame"
+ << " for unwind info registration\n";
+ });
+ Aliases[std::move(RTRegisterFrame)] = {ES.intern("__register_frame"),
+ JITSymbolFlags::Exported};
+ Aliases[std::move(RTDeregisterFrame)] = {ES.intern("__deregister_frame"),
+ JITSymbolFlags::Exported};
+ }
+
return Aliases;
}
@@ -210,6 +261,10 @@ ELFNixPlatform::standardRuntimeUtilityAliases() {
static const std::pair<const char *, const char *>
StandardRuntimeUtilityAliases[] = {
{"__orc_rt_run_program", "__orc_rt_elfnix_run_program"},
+ {"__orc_rt_jit_dlerror", "__orc_rt_elfnix_jit_dlerror"},
+ {"__orc_rt_jit_dlopen", "__orc_rt_elfnix_jit_dlopen"},
+ {"__orc_rt_jit_dlclose", "__orc_rt_elfnix_jit_dlclose"},
+ {"__orc_rt_jit_dlsym", "__orc_rt_elfnix_jit_dlsym"},
{"__orc_rt_log_error", "__orc_rt_log_error_to_stderr"}};
return ArrayRef<std::pair<const char *, const char *>>(
@@ -217,16 +272,16 @@ ELFNixPlatform::standardRuntimeUtilityAliases() {
}
bool ELFNixPlatform::isInitializerSection(StringRef SecName) {
- for (auto &Name : InitSectionNames) {
- if (Name.equals(SecName))
- return true;
- }
+ if (SecName.consume_front(InitArrayFuncSectionName) &&
+ (SecName.empty() || SecName[0] == '.'))
+ return true;
return false;
}
bool ELFNixPlatform::supportedTarget(const Triple &TT) {
switch (TT.getArch()) {
case Triple::x86_64:
+ case Triple::aarch64:
return true;
default:
return false;
@@ -723,16 +778,15 @@ Error ELFNixPlatform::ELFNixPlatformPlugin::preserveInitSections(
jitlink::LinkGraph &G, MaterializationResponsibility &MR) {
JITLinkSymbolSet InitSectionSymbols;
- for (auto &InitSectionName : InitSectionNames) {
+ for (auto &InitSection : G.sections()) {
// Skip non-init sections.
- auto *InitSection = G.findSectionByName(InitSectionName);
- if (!InitSection)
+ if (!isInitializerSection(InitSection.getName()))
continue;
// Make a pass over live symbols in the section: those blocks are already
// preserved.
DenseSet<jitlink::Block *> AlreadyLiveBlocks;
- for (auto &Sym : InitSection->symbols()) {
+ for (auto &Sym : InitSection.symbols()) {
auto &B = Sym->getBlock();
if (Sym->isLive() && Sym->getOffset() == 0 &&
Sym->getSize() == B.getSize() && !AlreadyLiveBlocks.count(&B)) {
@@ -742,7 +796,7 @@ Error ELFNixPlatform::ELFNixPlatformPlugin::preserveInitSections(
}
// Add anonymous symbols to preserve any not-already-preserved blocks.
- for (auto *B : InitSection->blocks())
+ for (auto *B : InitSection.blocks())
if (!AlreadyLiveBlocks.count(B))
InitSectionSymbols.insert(
&G.addAnonymousSymbol(*B, 0, B->getSize(), false, true));
@@ -763,9 +817,9 @@ Error ELFNixPlatform::ELFNixPlatformPlugin::registerInitSections(
LLVM_DEBUG({ dbgs() << "ELFNixPlatform::registerInitSections\n"; });
- for (auto InitSectionName : InitSectionNames) {
- if (auto *Sec = G.findSectionByName(InitSectionName)) {
- InitSections.push_back(Sec);
+ for (auto &Sec : G.sections()) {
+ if (isInitializerSection(Sec.getName())) {
+ InitSections.push_back(&Sec);
}
}
diff --git a/llvm/lib/ExecutionEngine/Orc/EPCDebugObjectRegistrar.cpp b/llvm/lib/ExecutionEngine/Orc/EPCDebugObjectRegistrar.cpp
index f3fe0555fa75..c591acdd646b 100644
--- a/llvm/lib/ExecutionEngine/Orc/EPCDebugObjectRegistrar.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/EPCDebugObjectRegistrar.cpp
@@ -45,7 +45,8 @@ createJITLoaderGDBRegistrar(ExecutionSession &ES) {
Error EPCDebugObjectRegistrar::registerDebugObject(
ExecutorAddrRange TargetMem) {
- return ES.callSPSWrapper<void(SPSExecutorAddrRange)>(RegisterFn, TargetMem);
+ return ES.callSPSWrapper<void(shared::SPSExecutorAddrRange)>(RegisterFn,
+ TargetMem);
}
} // namespace orc
diff --git a/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp
index b901a2d2da23..48aaab96e71f 100644
--- a/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp
@@ -88,7 +88,6 @@ EPCTrampolinePool::EPCTrampolinePool(EPCIndirectionUtils &EPCIU)
}
Error EPCTrampolinePool::deallocatePool() {
- Error Err = Error::success();
std::promise<MSVCPError> DeallocResultP;
auto DeallocResultF = DeallocResultP.get_future();
@@ -234,7 +233,7 @@ Error EPCIndirectStubsManager::updatePointer(StringRef Name,
namespace llvm {
namespace orc {
-EPCIndirectionUtils::ABISupport::~ABISupport() {}
+EPCIndirectionUtils::ABISupport::~ABISupport() = default;
Expected<std::unique_ptr<EPCIndirectionUtils>>
EPCIndirectionUtils::Create(ExecutorProcessControl &EPC) {
@@ -261,6 +260,9 @@ EPCIndirectionUtils::Create(ExecutorProcessControl &EPC) {
case Triple::mips64el:
return CreateWithABI<OrcMips64>(EPC);
+ case Triple::riscv64:
+ return CreateWithABI<OrcRiscv64>(EPC);
+
case Triple::x86_64:
if (TT.getOS() == Triple::OSType::Win32)
return CreateWithABI<OrcX86_64_Win32>(EPC);
@@ -302,7 +304,8 @@ EPCIndirectionUtils::writeResolverBlock(JITTargetAddress ReentryFnAddr,
return Alloc.takeError();
auto SegInfo = Alloc->getSegInfo(MemProt::Read | MemProt::Exec);
- ABI->writeResolverCode(SegInfo.WorkingMem.data(), SegInfo.Addr.getValue(),
+ ResolverBlockAddr = SegInfo.Addr.getValue();
+ ABI->writeResolverCode(SegInfo.WorkingMem.data(), ResolverBlockAddr,
ReentryFnAddr, ReentryCtxAddr);
auto FA = Alloc->finalize();
@@ -310,7 +313,7 @@ EPCIndirectionUtils::writeResolverBlock(JITTargetAddress ReentryFnAddr,
return FA.takeError();
ResolverBlock = std::move(*FA);
- return SegInfo.Addr.getValue();
+ return ResolverBlockAddr;
}
std::unique_ptr<IndirectStubsManager>
diff --git a/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
index ae2d47fb8c5e..95cf89ec3f8b 100644
--- a/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
@@ -62,7 +62,7 @@ CtorDtorIterator::Element CtorDtorIterator::operator*() const {
break;
} else if (ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(FuncC)) {
if (CE->isCast())
- FuncC = dyn_cast_or_null<ConstantExpr>(CE->getOperand(0));
+ FuncC = CE->getOperand(0);
else
break;
} else {
@@ -273,10 +273,10 @@ Expected<std::unique_ptr<StaticLibraryDefinitionGenerator>>
StaticLibraryDefinitionGenerator::Load(
ObjectLayer &L, const char *FileName,
GetObjectFileInterface GetObjFileInterface) {
- auto ArchiveBuffer = errorOrToExpected(MemoryBuffer::getFile(FileName));
+ auto ArchiveBuffer = MemoryBuffer::getFile(FileName);
if (!ArchiveBuffer)
- return ArchiveBuffer.takeError();
+ return createFileError(FileName, ArchiveBuffer.getError());
return Create(L, std::move(*ArchiveBuffer), std::move(GetObjFileInterface));
}
@@ -288,7 +288,7 @@ StaticLibraryDefinitionGenerator::Load(
auto B = object::createBinary(FileName);
if (!B)
- return B.takeError();
+ return createFileError(FileName, B.takeError());
// If this is a regular archive then create an instance from it.
if (isa<object::Archive>(B->getBinary()))
diff --git a/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp b/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp
index 2eb835551adb..412b9f95ea62 100644
--- a/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp
@@ -19,9 +19,9 @@
namespace llvm {
namespace orc {
-ExecutorProcessControl::MemoryAccess::~MemoryAccess() {}
+ExecutorProcessControl::MemoryAccess::~MemoryAccess() = default;
-ExecutorProcessControl::~ExecutorProcessControl() {}
+ExecutorProcessControl::~ExecutorProcessControl() = default;
SelfExecutorProcessControl::SelfExecutorProcessControl(
std::shared_ptr<SymbolStringPool> SSP, std::unique_ptr<TaskDispatcher> D,
diff --git a/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp b/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp
index aadc437c80c4..69aba1fff59a 100644
--- a/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp
@@ -11,7 +11,7 @@
namespace llvm {
namespace orc {
-IRCompileLayer::IRCompiler::~IRCompiler() {}
+IRCompileLayer::IRCompiler::~IRCompiler() = default;
IRCompileLayer::IRCompileLayer(ExecutionSession &ES, ObjectLayer &BaseLayer,
std::unique_ptr<IRCompiler> Compile)
diff --git a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
index 7a71d2f781d7..38cab526704f 100644
--- a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
@@ -59,7 +59,7 @@ private:
namespace llvm {
namespace orc {
-TrampolinePool::~TrampolinePool() {}
+TrampolinePool::~TrampolinePool() = default;
void IndirectStubsManager::anchor() {}
Expected<JITTargetAddress>
@@ -152,6 +152,11 @@ createLocalCompileCallbackManager(const Triple &T, ExecutionSession &ES,
return CCMgrT::Create(ES, ErrorHandlerAddress);
}
+ case Triple::riscv64: {
+ typedef orc::LocalJITCompileCallbackManager<orc::OrcRiscv64> CCMgrT;
+ return CCMgrT::Create(ES, ErrorHandlerAddress);
+ }
+
case Triple::x86_64: {
if (T.getOS() == Triple::OSType::Win32) {
typedef orc::LocalJITCompileCallbackManager<orc::OrcX86_64_Win32> CCMgrT;
@@ -206,6 +211,12 @@ createLocalIndirectStubsManagerBuilder(const Triple &T) {
orc::LocalIndirectStubsManager<orc::OrcMips64>>();
};
+ case Triple::riscv64:
+ return []() {
+ return std::make_unique<
+ orc::LocalIndirectStubsManager<orc::OrcRiscv64>>();
+ };
+
case Triple::x86_64:
if (T.getOS() == Triple::OSType::Win32) {
return [](){
@@ -431,8 +442,7 @@ Error addFunctionPointerRelocationsToCurrentSymbol(jitlink::Symbol &Sym,
auto RelocOffInInstr =
MIA.getMemoryOperandRelocationOffset(Instr, InstrSize);
- if (!RelocOffInInstr.hasValue() ||
- InstrSize - RelocOffInInstr.getValue() != 4) {
+ if (!RelocOffInInstr || InstrSize - *RelocOffInInstr != 4) {
LLVM_DEBUG(dbgs() << "Skipping unknown self-relocation at "
<< InstrStart);
continue;
diff --git a/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp b/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp
index 0fbf79b8a56d..c60f4b3b263c 100644
--- a/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp
@@ -19,6 +19,7 @@ JITTargetMachineBuilder::JITTargetMachineBuilder(Triple TT)
: TT(std::move(TT)) {
Options.EmulatedTLS = true;
Options.ExplicitEmulatedTLS = true;
+ Options.UseInitArray = true;
}
Expected<JITTargetMachineBuilder> JITTargetMachineBuilder::detectHost() {
diff --git a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
index 91949c9d7eeb..6d67e6d87b56 100644
--- a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
@@ -143,7 +143,7 @@ public:
JITEvaluatedSymbol(pointerToJITTargetAddress(this),
JITSymbolFlags::Exported);
StdInterposes[J.mangleAndIntern("__lljit.cxa_atexit_helper")] =
- JITEvaluatedSymbol(pointerToJITTargetAddress(registerAtExitHelper),
+ JITEvaluatedSymbol(pointerToJITTargetAddress(registerCxaAtExitHelper),
JITSymbolFlags());
cantFail(
@@ -162,6 +162,9 @@ public:
PerJDInterposes[J.mangleAndIntern("__lljit.run_atexits_helper")] =
JITEvaluatedSymbol(pointerToJITTargetAddress(runAtExitsHelper),
JITSymbolFlags());
+ PerJDInterposes[J.mangleAndIntern("__lljit.atexit_helper")] =
+ JITEvaluatedSymbol(pointerToJITTargetAddress(registerAtExitHelper),
+ JITSymbolFlags());
cantFail(JD.define(absoluteSymbols(std::move(PerJDInterposes))));
auto Ctx = std::make_unique<LLVMContext>();
@@ -190,6 +193,14 @@ public:
GlobalValue::HiddenVisibility, "__lljit.run_atexits_helper",
{PlatformInstanceDecl, DSOHandle});
+ auto *IntTy = Type::getIntNTy(*Ctx, sizeof(int) * CHAR_BIT);
+ auto *AtExitCallbackTy = FunctionType::get(VoidTy, {}, false);
+ auto *AtExitCallbackPtrTy = PointerType::getUnqual(AtExitCallbackTy);
+ addHelperAndWrapper(*M, "atexit",
+ FunctionType::get(IntTy, {AtExitCallbackPtrTy}, false),
+ GlobalValue::HiddenVisibility, "__lljit.atexit_helper",
+ {PlatformInstanceDecl, DSOHandle});
+
return J.addIRModule(JD, ThreadSafeModule(std::move(M), std::move(Ctx)));
}
@@ -413,16 +424,25 @@ private:
.takeError();
}
- static void registerAtExitHelper(void *Self, void (*F)(void *), void *Ctx,
- void *DSOHandle) {
+ static void registerCxaAtExitHelper(void *Self, void (*F)(void *), void *Ctx,
+ void *DSOHandle) {
LLVM_DEBUG({
- dbgs() << "Registering atexit function " << (void *)F << " for JD "
+ dbgs() << "Registering cxa atexit function " << (void *)F << " for JD "
<< (*static_cast<JITDylib **>(DSOHandle))->getName() << "\n";
});
static_cast<GenericLLVMIRPlatformSupport *>(Self)->AtExitMgr.registerAtExit(
F, Ctx, DSOHandle);
}
+ static void registerAtExitHelper(void *Self, void *DSOHandle, void (*F)()) {
+ LLVM_DEBUG({
+ dbgs() << "Registering atexit function " << (void *)F << " for JD "
+ << (*static_cast<JITDylib **>(DSOHandle))->getName() << "\n";
+ });
+ static_cast<GenericLLVMIRPlatformSupport *>(Self)->AtExitMgr.registerAtExit(
+ reinterpret_cast<void (*)(void *)>(F), nullptr, DSOHandle);
+ }
+
static void runAtExitsHelper(void *Self, void *DSOHandle) {
LLVM_DEBUG({
dbgs() << "Running atexit functions for JD "
@@ -450,12 +470,12 @@ private:
auto *IntTy = Type::getIntNTy(*Ctx, sizeof(int) * CHAR_BIT);
auto *VoidTy = Type::getVoidTy(*Ctx);
auto *BytePtrTy = PointerType::getUnqual(Int8Ty);
- auto *AtExitCallbackTy = FunctionType::get(VoidTy, {BytePtrTy}, false);
- auto *AtExitCallbackPtrTy = PointerType::getUnqual(AtExitCallbackTy);
+ auto *CxaAtExitCallbackTy = FunctionType::get(VoidTy, {BytePtrTy}, false);
+ auto *CxaAtExitCallbackPtrTy = PointerType::getUnqual(CxaAtExitCallbackTy);
addHelperAndWrapper(
*M, "__cxa_atexit",
- FunctionType::get(IntTy, {AtExitCallbackPtrTy, BytePtrTy, BytePtrTy},
+ FunctionType::get(IntTy, {CxaAtExitCallbackPtrTy, BytePtrTy, BytePtrTy},
false),
GlobalValue::DefaultVisibility, "__lljit.cxa_atexit_helper",
{PlatformInstanceDecl});
@@ -521,11 +541,7 @@ GlobalCtorDtorScraper::operator()(ThreadSafeModule TSM,
for (auto E : COrDtors)
InitsOrDeInits.push_back(std::make_pair(E.Func, E.Priority));
- llvm::sort(InitsOrDeInits,
- [](const std::pair<Function *, unsigned> &LHS,
- const std::pair<Function *, unsigned> &RHS) {
- return LHS.first < RHS.first;
- });
+ llvm::sort(InitsOrDeInits, llvm::less_second());
auto *InitOrDeInitFuncEntryBlock =
BasicBlock::Create(Ctx, "entry", InitOrDeInitFunc);
@@ -589,7 +605,7 @@ void LLJIT::PlatformSupport::setInitTransform(
J.InitHelperTransformLayer->setTransform(std::move(T));
}
-LLJIT::PlatformSupport::~PlatformSupport() {}
+LLJIT::PlatformSupport::~PlatformSupport() = default;
Error LLJITBuilderState::prepareForConstruction() {
@@ -701,10 +717,14 @@ Error LLJIT::addObjectFile(JITDylib &JD, std::unique_ptr<MemoryBuffer> Obj) {
return addObjectFile(JD.getDefaultResourceTracker(), std::move(Obj));
}
-Expected<JITEvaluatedSymbol> LLJIT::lookupLinkerMangled(JITDylib &JD,
- SymbolStringPtr Name) {
- return ES->lookup(
- makeJITDylibSearchOrder(&JD, JITDylibLookupFlags::MatchAllSymbols), Name);
+Expected<ExecutorAddr> LLJIT::lookupLinkerMangled(JITDylib &JD,
+ SymbolStringPtr Name) {
+ if (auto Sym = ES->lookup(
+ makeJITDylibSearchOrder(&JD, JITDylibLookupFlags::MatchAllSymbols),
+ Name))
+ return ExecutorAddr(Sym->getAddress());
+ else
+ return Sym.takeError();
}
Expected<std::unique_ptr<ObjectLayer>>
@@ -897,7 +917,7 @@ LLLazyJIT::LLLazyJIT(LLLazyJITBuilderState &S, Error &Err) : LLJIT(S, Err) {
LCTMgr = std::move(S.LCTMgr);
else {
if (auto LCTMgrOrErr = createLocalLazyCallThroughManager(
- S.TT, *ES, S.LazyCompileFailureAddr))
+ S.TT, *ES, S.LazyCompileFailureAddr.getValue()))
LCTMgr = std::move(*LCTMgrOrErr);
else {
Err = LCTMgrOrErr.takeError();
diff --git a/llvm/lib/ExecutionEngine/Orc/Layer.cpp b/llvm/lib/ExecutionEngine/Orc/Layer.cpp
index adb8861793b1..4a50f2d7a153 100644
--- a/llvm/lib/ExecutionEngine/Orc/Layer.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/Layer.cpp
@@ -19,7 +19,7 @@
namespace llvm {
namespace orc {
-IRLayer::~IRLayer() {}
+IRLayer::~IRLayer() = default;
Error IRLayer::add(ResourceTrackerSP RT, ThreadSafeModule TSM) {
assert(RT && "RT can not be null");
@@ -158,7 +158,7 @@ char ObjectLayer::ID;
ObjectLayer::ObjectLayer(ExecutionSession &ES) : ES(ES) {}
-ObjectLayer::~ObjectLayer() {}
+ObjectLayer::~ObjectLayer() = default;
Error ObjectLayer::add(ResourceTrackerSP RT, std::unique_ptr<MemoryBuffer> O,
MaterializationUnit::Interface I) {
diff --git a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
index 66453e6a632f..20b655bdf4b1 100644
--- a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
@@ -131,6 +131,10 @@ createLocalLazyCallThroughManager(const Triple &T, ExecutionSession &ES,
case Triple::mips64el:
return LocalLazyCallThroughManager::Create<OrcMips64>(ES, ErrorHandlerAddr);
+ case Triple::riscv64:
+ return LocalLazyCallThroughManager::Create<OrcRiscv64>(ES,
+ ErrorHandlerAddr);
+
case Triple::x86_64:
if (T.getOS() == Triple::OSType::Win32)
return LocalLazyCallThroughManager::Create<OrcX86_64_Win32>(
diff --git a/llvm/lib/ExecutionEngine/Orc/LookupAndRecordAddrs.cpp b/llvm/lib/ExecutionEngine/Orc/LookupAndRecordAddrs.cpp
index 44cb78c773c9..3452267e4df4 100644
--- a/llvm/lib/ExecutionEngine/Orc/LookupAndRecordAddrs.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/LookupAndRecordAddrs.cpp
@@ -24,7 +24,7 @@ void lookupAndRecordAddrs(
Symbols.add(KV.first, LookupFlags);
ES.lookup(
- K, SearchOrder, Symbols, SymbolState::Ready,
+ K, SearchOrder, std::move(Symbols), SymbolState::Ready,
[Pairs = std::move(Pairs),
OnRec = std::move(OnRecorded)](Expected<SymbolMap> Result) mutable {
if (!Result)
@@ -47,7 +47,7 @@ Error lookupAndRecordAddrs(
std::promise<MSVCPError> ResultP;
auto ResultF = ResultP.get_future();
lookupAndRecordAddrs([&](Error Err) { ResultP.set_value(std::move(Err)); },
- ES, K, SearchOrder, Pairs, LookupFlags);
+ ES, K, SearchOrder, std::move(Pairs), LookupFlags);
return ResultF.get();
}
diff --git a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp
index a364719855b4..d5274b06a76f 100644
--- a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp
@@ -22,6 +22,39 @@ using namespace llvm;
using namespace llvm::orc;
using namespace llvm::orc::shared;
+namespace llvm {
+namespace orc {
+namespace shared {
+
+using SPSMachOJITDylibDepInfo = SPSTuple<bool, SPSSequence<SPSExecutorAddr>>;
+using SPSMachOJITDylibDepInfoMap =
+ SPSSequence<SPSTuple<SPSExecutorAddr, SPSMachOJITDylibDepInfo>>;
+
+template <>
+class SPSSerializationTraits<SPSMachOJITDylibDepInfo,
+ MachOPlatform::MachOJITDylibDepInfo> {
+public:
+ static size_t size(const MachOPlatform::MachOJITDylibDepInfo &DDI) {
+ return SPSMachOJITDylibDepInfo::AsArgList::size(DDI.Sealed, DDI.DepHeaders);
+ }
+
+ static bool serialize(SPSOutputBuffer &OB,
+ const MachOPlatform::MachOJITDylibDepInfo &DDI) {
+ return SPSMachOJITDylibDepInfo::AsArgList::serialize(OB, DDI.Sealed,
+ DDI.DepHeaders);
+ }
+
+ static bool deserialize(SPSInputBuffer &IB,
+ MachOPlatform::MachOJITDylibDepInfo &DDI) {
+ return SPSMachOJITDylibDepInfo::AsArgList::deserialize(IB, DDI.Sealed,
+ DDI.DepHeaders);
+ }
+};
+
+} // namespace shared
+} // namespace orc
+} // namespace llvm
+
namespace {
class MachOHeaderMaterializationUnit : public MaterializationUnit {
@@ -199,11 +232,25 @@ MachOPlatform::Create(ExecutionSession &ES, ObjectLinkingLayer &ObjLinkingLayer,
}
Error MachOPlatform::setupJITDylib(JITDylib &JD) {
- return JD.define(std::make_unique<MachOHeaderMaterializationUnit>(
- *this, MachOHeaderStartSymbol));
+ if (auto Err = JD.define(std::make_unique<MachOHeaderMaterializationUnit>(
+ *this, MachOHeaderStartSymbol)))
+ return Err;
+
+ return ES.lookup({&JD}, MachOHeaderStartSymbol).takeError();
}
-Error MachOPlatform::teardownJITDylib(JITDylib &JD) { return Error::success(); }
+Error MachOPlatform::teardownJITDylib(JITDylib &JD) {
+ std::lock_guard<std::mutex> Lock(PlatformMutex);
+ auto I = JITDylibToHeaderAddr.find(&JD);
+ if (I != JITDylibToHeaderAddr.end()) {
+ assert(HeaderAddrToJITDylib.count(I->second) &&
+ "HeaderAddrToJITDylib missing entry");
+ HeaderAddrToJITDylib.erase(I->second);
+ JITDylibToHeaderAddr.erase(I);
+ }
+ JITDylibToPThreadKey.erase(&JD);
+ return Error::success();
+}
Error MachOPlatform::notifyAdding(ResourceTracker &RT,
const MaterializationUnit &MU) {
@@ -255,6 +302,10 @@ MachOPlatform::standardRuntimeUtilityAliases() {
static const std::pair<const char *, const char *>
StandardRuntimeUtilityAliases[] = {
{"___orc_rt_run_program", "___orc_rt_macho_run_program"},
+ {"___orc_rt_jit_dlerror", "___orc_rt_macho_jit_dlerror"},
+ {"___orc_rt_jit_dlopen", "___orc_rt_macho_jit_dlopen"},
+ {"___orc_rt_jit_dlclose", "___orc_rt_macho_jit_dlclose"},
+ {"___orc_rt_jit_dlsym", "___orc_rt_macho_jit_dlsym"},
{"___orc_rt_log_error", "___orc_rt_log_error_to_stderr"}};
return ArrayRef<std::pair<const char *, const char *>>(
@@ -305,16 +356,6 @@ MachOPlatform::MachOPlatform(
State = BootstrapPhase2;
- // PlatformJD hasn't been 'set-up' by the platform yet (since we're creating
- // the platform now), so set it up.
- if (auto E2 = setupJITDylib(PlatformJD)) {
- Err = std::move(E2);
- return;
- }
-
- RegisteredInitSymbols[&PlatformJD].add(
- MachOHeaderStartSymbol, SymbolLookupFlags::WeaklyReferencedSymbol);
-
// Associate wrapper function tags with JIT-side function implementations.
if (auto E2 = associateRuntimeSupportFunctions(PlatformJD)) {
Err = std::move(E2);
@@ -329,23 +370,24 @@ MachOPlatform::MachOPlatform(
return;
}
+ // PlatformJD hasn't been set up by the platform yet (since we're creating
+ // the platform now), so set it up.
+ if (auto E2 = setupJITDylib(PlatformJD)) {
+ Err = std::move(E2);
+ return;
+ }
+
State = Initialized;
}
Error MachOPlatform::associateRuntimeSupportFunctions(JITDylib &PlatformJD) {
ExecutionSession::JITDispatchHandlerAssociationMap WFs;
- using GetInitializersSPSSig =
- SPSExpected<SPSMachOJITDylibInitializerSequence>(SPSString);
- WFs[ES.intern("___orc_rt_macho_get_initializers_tag")] =
- ES.wrapAsyncWithSPS<GetInitializersSPSSig>(
- this, &MachOPlatform::rt_getInitializers);
-
- using GetDeinitializersSPSSig =
- SPSExpected<SPSMachOJITDylibDeinitializerSequence>(SPSExecutorAddr);
- WFs[ES.intern("___orc_rt_macho_get_deinitializers_tag")] =
- ES.wrapAsyncWithSPS<GetDeinitializersSPSSig>(
- this, &MachOPlatform::rt_getDeinitializers);
+ using PushInitializersSPSSig =
+ SPSExpected<SPSMachOJITDylibDepInfoMap>(SPSExecutorAddr);
+ WFs[ES.intern("___orc_rt_macho_push_initializers_tag")] =
+ ES.wrapAsyncWithSPS<PushInitializersSPSSig>(
+ this, &MachOPlatform::rt_pushInitializers);
using LookupSymbolSPSSig =
SPSExpected<SPSExecutorAddr>(SPSExecutorAddr, SPSString);
@@ -356,53 +398,83 @@ Error MachOPlatform::associateRuntimeSupportFunctions(JITDylib &PlatformJD) {
return ES.registerJITDispatchHandlers(PlatformJD, std::move(WFs));
}
-void MachOPlatform::getInitializersBuildSequencePhase(
- SendInitializerSequenceFn SendResult, JITDylib &JD,
- std::vector<JITDylibSP> DFSLinkOrder) {
- MachOJITDylibInitializerSequence FullInitSeq;
- {
- std::lock_guard<std::mutex> Lock(PlatformMutex);
- for (auto &InitJD : reverse(DFSLinkOrder)) {
- LLVM_DEBUG({
- dbgs() << "MachOPlatform: Appending inits for \"" << InitJD->getName()
- << "\" to sequence\n";
- });
- auto ISItr = InitSeqs.find(InitJD.get());
- if (ISItr != InitSeqs.end()) {
- FullInitSeq.emplace_back(std::move(ISItr->second));
- InitSeqs.erase(ISItr);
- }
- }
- }
-
- SendResult(std::move(FullInitSeq));
-}
-
-void MachOPlatform::getInitializersLookupPhase(
- SendInitializerSequenceFn SendResult, JITDylib &JD) {
-
- auto DFSLinkOrder = JD.getDFSLinkOrder();
- if (!DFSLinkOrder) {
- SendResult(DFSLinkOrder.takeError());
- return;
- }
-
+void MachOPlatform::pushInitializersLoop(
+ PushInitializersSendResultFn SendResult, JITDylibSP JD) {
DenseMap<JITDylib *, SymbolLookupSet> NewInitSymbols;
+ DenseMap<JITDylib *, SmallVector<JITDylib *>> JDDepMap;
+ SmallVector<JITDylib *, 16> Worklist({JD.get()});
+
ES.runSessionLocked([&]() {
- for (auto &InitJD : *DFSLinkOrder) {
- auto RISItr = RegisteredInitSymbols.find(InitJD.get());
+ while (!Worklist.empty()) {
+ // FIXME: Check for defunct dylibs.
+
+ auto DepJD = Worklist.back();
+ Worklist.pop_back();
+
+ // If we've already visited this JITDylib on this iteration then continue.
+ if (JDDepMap.count(DepJD))
+ continue;
+
+ // Add dep info.
+ auto &DM = JDDepMap[DepJD];
+ DepJD->withLinkOrderDo([&](const JITDylibSearchOrder &O) {
+ for (auto &KV : O) {
+ if (KV.first == DepJD)
+ continue;
+ DM.push_back(KV.first);
+ Worklist.push_back(KV.first);
+ }
+ });
+
+ // Add any registered init symbols.
+ auto RISItr = RegisteredInitSymbols.find(DepJD);
if (RISItr != RegisteredInitSymbols.end()) {
- NewInitSymbols[InitJD.get()] = std::move(RISItr->second);
+ NewInitSymbols[DepJD] = std::move(RISItr->second);
RegisteredInitSymbols.erase(RISItr);
}
}
});
- // If there are no further init symbols to look up then move on to the next
- // phase.
+ // If there are no further init symbols to look up then send the link order
+ // (as a list of header addresses) to the caller.
if (NewInitSymbols.empty()) {
- getInitializersBuildSequencePhase(std::move(SendResult), JD,
- std::move(*DFSLinkOrder));
+
+ // To make the list intelligible to the runtime we need to convert all
+ // JITDylib pointers to their header addresses.
+ DenseMap<JITDylib *, ExecutorAddr> HeaderAddrs;
+ HeaderAddrs.reserve(JDDepMap.size());
+ {
+ std::lock_guard<std::mutex> Lock(PlatformMutex);
+ for (auto &KV : JDDepMap) {
+ auto I = JITDylibToHeaderAddr.find(KV.first);
+ if (I == JITDylibToHeaderAddr.end()) {
+ // The header address should have been materialized by the previous
+ // round, but we need to handle the pathalogical case where someone
+ // removes the symbol on another thread while we're running.
+ SendResult(
+ make_error<StringError>("JITDylib " + KV.first->getName() +
+ " has no registered header address",
+ inconvertibleErrorCode()));
+ return;
+ }
+ HeaderAddrs[KV.first] = I->second;
+ }
+ }
+
+ // Build the dep info map to return.
+ MachOJITDylibDepInfoMap DIM;
+ DIM.reserve(JDDepMap.size());
+ for (auto &KV : JDDepMap) {
+ assert(HeaderAddrs.count(KV.first) && "Missing header addr");
+ auto H = HeaderAddrs[KV.first];
+ MachOJITDylibDepInfo DepInfo;
+ for (auto &Dep : KV.second) {
+ assert(HeaderAddrs.count(Dep) && "Missing header addr");
+ DepInfo.DepHeaders.push_back(HeaderAddrs[Dep]);
+ }
+ DIM.push_back(std::make_pair(H, std::move(DepInfo)));
+ }
+ SendResult(DIM);
return;
}
@@ -412,58 +484,38 @@ void MachOPlatform::getInitializersLookupPhase(
if (Err)
SendResult(std::move(Err));
else
- getInitializersLookupPhase(std::move(SendResult), JD);
+ pushInitializersLoop(std::move(SendResult), JD);
},
ES, std::move(NewInitSymbols));
}
-void MachOPlatform::rt_getInitializers(SendInitializerSequenceFn SendResult,
- StringRef JDName) {
- LLVM_DEBUG({
- dbgs() << "MachOPlatform::rt_getInitializers(\"" << JDName << "\")\n";
- });
-
- JITDylib *JD = ES.getJITDylibByName(JDName);
- if (!JD) {
- LLVM_DEBUG({
- dbgs() << " No such JITDylib \"" << JDName << "\". Sending error.\n";
- });
- SendResult(make_error<StringError>("No JITDylib named " + JDName,
- inconvertibleErrorCode()));
- return;
- }
-
- getInitializersLookupPhase(std::move(SendResult), *JD);
-}
-
-void MachOPlatform::rt_getDeinitializers(SendDeinitializerSequenceFn SendResult,
- ExecutorAddr Handle) {
- LLVM_DEBUG({
- dbgs() << "MachOPlatform::rt_getDeinitializers(\""
- << formatv("{0:x}", Handle.getValue()) << "\")\n";
- });
-
- JITDylib *JD = nullptr;
-
+void MachOPlatform::rt_pushInitializers(PushInitializersSendResultFn SendResult,
+ ExecutorAddr JDHeaderAddr) {
+ JITDylibSP JD;
{
std::lock_guard<std::mutex> Lock(PlatformMutex);
- auto I = HeaderAddrToJITDylib.find(Handle);
+ auto I = HeaderAddrToJITDylib.find(JDHeaderAddr);
if (I != HeaderAddrToJITDylib.end())
JD = I->second;
}
+ LLVM_DEBUG({
+ dbgs() << "MachOPlatform::rt_pushInitializers(" << JDHeaderAddr << ") ";
+ if (JD)
+ dbgs() << "pushing initializers for " << JD->getName() << "\n";
+ else
+ dbgs() << "No JITDylib for header address.\n";
+ });
+
if (!JD) {
- LLVM_DEBUG({
- dbgs() << " No JITDylib for handle "
- << formatv("{0:x}", Handle.getValue()) << "\n";
- });
- SendResult(make_error<StringError>("No JITDylib associated with handle " +
- formatv("{0:x}", Handle.getValue()),
- inconvertibleErrorCode()));
+ SendResult(
+ make_error<StringError>("No JITDylib with header addr " +
+ formatv("{0:x}", JDHeaderAddr.getValue()),
+ inconvertibleErrorCode()));
return;
}
- SendResult(MachOJITDylibDeinitializerSequence());
+ pushInitializersLoop(std::move(SendResult), JD);
}
void MachOPlatform::rt_lookupSymbol(SendSymbolAddressFn SendResult,
@@ -526,10 +578,14 @@ Error MachOPlatform::bootstrapMachORuntime(JITDylib &PlatformJD) {
&orc_rt_macho_platform_bootstrap},
{ES.intern("___orc_rt_macho_platform_shutdown"),
&orc_rt_macho_platform_shutdown},
- {ES.intern("___orc_rt_macho_register_thread_data_section"),
- &orc_rt_macho_register_thread_data_section},
- {ES.intern("___orc_rt_macho_deregister_thread_data_section"),
- &orc_rt_macho_deregister_thread_data_section},
+ {ES.intern("___orc_rt_macho_register_jitdylib"),
+ &orc_rt_macho_register_jitdylib},
+ {ES.intern("___orc_rt_macho_deregister_jitdylib"),
+ &orc_rt_macho_deregister_jitdylib},
+ {ES.intern("___orc_rt_macho_register_object_platform_sections"),
+ &orc_rt_macho_register_object_platform_sections},
+ {ES.intern("___orc_rt_macho_deregister_object_platform_sections"),
+ &orc_rt_macho_deregister_object_platform_sections},
{ES.intern("___orc_rt_macho_create_pthread_key"),
&orc_rt_macho_create_pthread_key}}))
return Err;
@@ -537,45 +593,6 @@ Error MachOPlatform::bootstrapMachORuntime(JITDylib &PlatformJD) {
return ES.callSPSWrapper<void()>(orc_rt_macho_platform_bootstrap);
}
-Error MachOPlatform::registerInitInfo(
- JITDylib &JD, ExecutorAddr ObjCImageInfoAddr,
- ArrayRef<jitlink::Section *> InitSections) {
-
- std::unique_lock<std::mutex> Lock(PlatformMutex);
-
- MachOJITDylibInitializers *InitSeq = nullptr;
- {
- auto I = InitSeqs.find(&JD);
- if (I == InitSeqs.end()) {
- // If there's no init sequence entry yet then we need to look up the
- // header symbol to force creation of one.
- Lock.unlock();
-
- auto SearchOrder =
- JD.withLinkOrderDo([](const JITDylibSearchOrder &SO) { return SO; });
- if (auto Err = ES.lookup(SearchOrder, MachOHeaderStartSymbol).takeError())
- return Err;
-
- Lock.lock();
- I = InitSeqs.find(&JD);
- assert(I != InitSeqs.end() &&
- "Entry missing after header symbol lookup?");
- }
- InitSeq = &I->second;
- }
-
- InitSeq->ObjCImageInfoAddress = ObjCImageInfoAddr;
-
- for (auto *Sec : InitSections) {
- // FIXME: Avoid copy here.
- jitlink::SectionRange R(*Sec);
- InitSeq->InitSections[Sec->getName()].push_back(
- {ExecutorAddr(R.getStart()), ExecutorAddr(R.getEnd())});
- }
-
- return Error::success();
-}
-
Expected<uint64_t> MachOPlatform::createPThreadKey() {
if (!orc_rt_macho_create_pthread_key)
return make_error<StringError>(
@@ -617,11 +634,6 @@ void MachOPlatform::MachOPlatformPlugin::modifyPassConfig(
return Err;
return processObjCImageInfo(G, MR);
});
-
- Config.PostFixupPasses.push_back(
- [this, &JD = MR.getTargetJITDylib()](jitlink::LinkGraph &G) {
- return registerInitSections(G, JD);
- });
}
// --- Add passes for eh-frame and TLV support ---
@@ -639,10 +651,12 @@ void MachOPlatform::MachOPlatformPlugin::modifyPassConfig(
return fixTLVSectionsAndEdges(G, JD);
});
- // Add a pass to register the final addresses of the eh-frame and TLV sections
- // with the runtime.
- Config.PostFixupPasses.push_back(
- [this](jitlink::LinkGraph &G) { return registerEHAndTLVSections(G); });
+ // Add a pass to register the final addresses of any special sections in the
+ // object with the runtime.
+ Config.PostAllocationPasses.push_back(
+ [this, &JD = MR.getTargetJITDylib()](jitlink::LinkGraph &G) {
+ return registerObjectPlatformSections(G, JD);
+ });
}
ObjectLinkingLayer::Plugin::SyntheticSymbolDependenciesMap
@@ -661,7 +675,6 @@ MachOPlatform::MachOPlatformPlugin::getSyntheticSymbolDependencies(
Error MachOPlatform::MachOPlatformPlugin::associateJITDylibHeaderSymbol(
jitlink::LinkGraph &G, MaterializationResponsibility &MR) {
-
auto I = llvm::find_if(G.defined_symbols(), [this](jitlink::Symbol *Sym) {
return Sym->getName() == *MP.MachOHeaderStartSymbol;
});
@@ -670,10 +683,14 @@ Error MachOPlatform::MachOPlatformPlugin::associateJITDylibHeaderSymbol(
auto &JD = MR.getTargetJITDylib();
std::lock_guard<std::mutex> Lock(MP.PlatformMutex);
auto HeaderAddr = (*I)->getAddress();
+ MP.JITDylibToHeaderAddr[&JD] = HeaderAddr;
MP.HeaderAddrToJITDylib[HeaderAddr] = &JD;
- assert(!MP.InitSeqs.count(&JD) && "InitSeq entry for JD already exists");
- MP.InitSeqs.insert(
- std::make_pair(&JD, MachOJITDylibInitializers(JD.getName(), HeaderAddr)));
+ G.allocActions().push_back(
+ {cantFail(
+ WrapperFunctionCall::Create<SPSArgList<SPSString, SPSExecutorAddr>>(
+ MP.orc_rt_macho_register_jitdylib, JD.getName(), HeaderAddr)),
+ cantFail(WrapperFunctionCall::Create<SPSArgList<SPSExecutorAddr>>(
+ MP.orc_rt_macho_deregister_jitdylib, HeaderAddr))});
return Error::success();
}
@@ -792,37 +809,6 @@ Error MachOPlatform::MachOPlatformPlugin::processObjCImageInfo(
return Error::success();
}
-Error MachOPlatform::MachOPlatformPlugin::registerInitSections(
- jitlink::LinkGraph &G, JITDylib &JD) {
-
- ExecutorAddr ObjCImageInfoAddr;
- SmallVector<jitlink::Section *> InitSections;
-
- if (auto *ObjCImageInfoSec = G.findSectionByName(ObjCImageInfoSectionName)) {
- if (auto Addr = jitlink::SectionRange(*ObjCImageInfoSec).getStart())
- ObjCImageInfoAddr = Addr;
- }
-
- for (auto InitSectionName : InitSectionNames)
- if (auto *Sec = G.findSectionByName(InitSectionName))
- InitSections.push_back(Sec);
-
- // Dump the scraped inits.
- LLVM_DEBUG({
- dbgs() << "MachOPlatform: Scraped " << G.getName() << " init sections:\n";
- if (ObjCImageInfoAddr)
- dbgs() << " " << ObjCImageInfoSectionName << ": "
- << formatv("{0:x}", ObjCImageInfoAddr.getValue()) << "\n";
- for (auto *Sec : InitSections) {
- jitlink::SectionRange R(*Sec);
- dbgs() << " " << Sec->getName() << ": "
- << formatv("[ {0:x} -- {1:x} ]", R.getStart(), R.getEnd()) << "\n";
- }
- });
-
- return MP.registerInitInfo(JD, ObjCImageInfoAddr, InitSections);
-}
-
Error MachOPlatform::MachOPlatformPlugin::fixTLVSectionsAndEdges(
jitlink::LinkGraph &G, JITDylib &JD) {
@@ -879,11 +865,10 @@ Error MachOPlatform::MachOPlatformPlugin::fixTLVSectionsAndEdges(
return Error::success();
}
-Error MachOPlatform::MachOPlatformPlugin::registerEHAndTLVSections(
- jitlink::LinkGraph &G) {
+Error MachOPlatform::MachOPlatformPlugin::registerObjectPlatformSections(
+ jitlink::LinkGraph &G, JITDylib &JD) {
- // Add a pass to register the final addresses of the eh-frame and TLV sections
- // with the runtime.
+ // Add an action to register the eh-frame.
if (auto *EHFrameSection = G.findSectionByName(EHFrameSectionName)) {
jitlink::SectionRange R(*EHFrameSection);
if (!R.empty())
@@ -912,6 +897,8 @@ Error MachOPlatform::MachOPlatformPlugin::registerEHAndTLVSections(
ThreadDataSection = ThreadBSSSection;
}
+ SmallVector<std::pair<StringRef, ExecutorAddrRange>, 8> MachOPlatformSecs;
+
// Having merged thread BSS (if present) and thread data (if present),
// record the resulting section range.
if (ThreadDataSection) {
@@ -922,16 +909,64 @@ Error MachOPlatform::MachOPlatformPlugin::registerEHAndTLVSections(
"MachOPlatform has not finished booting",
inconvertibleErrorCode());
- G.allocActions().push_back(
- {cantFail(
- WrapperFunctionCall::Create<SPSArgList<SPSExecutorAddrRange>>(
- MP.orc_rt_macho_register_thread_data_section, R.getRange())),
- cantFail(
- WrapperFunctionCall::Create<SPSArgList<SPSExecutorAddrRange>>(
- MP.orc_rt_macho_deregister_thread_data_section,
- R.getRange()))});
+ MachOPlatformSecs.push_back({ThreadDataSectionName, R.getRange()});
+ }
+ }
+
+ // If any platform sections were found then add an allocation action to call
+ // the registration function.
+ StringRef PlatformSections[] = {
+ ModInitFuncSectionName, ObjCClassListSectionName,
+ ObjCImageInfoSectionName, ObjCSelRefsSectionName,
+ Swift5ProtoSectionName, Swift5ProtosSectionName,
+ Swift5TypesSectionName,
+ };
+
+ for (auto &SecName : PlatformSections) {
+ auto *Sec = G.findSectionByName(SecName);
+ if (!Sec)
+ continue;
+ jitlink::SectionRange R(*Sec);
+ if (R.empty())
+ continue;
+
+ MachOPlatformSecs.push_back({SecName, R.getRange()});
+ }
+
+ if (!MachOPlatformSecs.empty()) {
+ Optional<ExecutorAddr> HeaderAddr;
+ {
+ std::lock_guard<std::mutex> Lock(MP.PlatformMutex);
+ auto I = MP.JITDylibToHeaderAddr.find(&JD);
+ if (I != MP.JITDylibToHeaderAddr.end())
+ HeaderAddr = I->second;
}
+
+ if (!HeaderAddr)
+ return make_error<StringError>("Missing header for " + JD.getName(),
+ inconvertibleErrorCode());
+
+ // Dump the scraped inits.
+ LLVM_DEBUG({
+ dbgs() << "MachOPlatform: Scraped " << G.getName() << " init sections:\n";
+ for (auto &KV : MachOPlatformSecs)
+ dbgs() << " " << KV.first << ": " << KV.second << "\n";
+ });
+
+ using SPSRegisterObjectPlatformSectionsArgs =
+ SPSArgList<SPSExecutorAddr,
+ SPSSequence<SPSTuple<SPSString, SPSExecutorAddrRange>>>;
+ G.allocActions().push_back(
+ {cantFail(
+ WrapperFunctionCall::Create<SPSRegisterObjectPlatformSectionsArgs>(
+ MP.orc_rt_macho_register_object_platform_sections, *HeaderAddr,
+ MachOPlatformSecs)),
+ cantFail(
+ WrapperFunctionCall::Create<SPSRegisterObjectPlatformSectionsArgs>(
+ MP.orc_rt_macho_deregister_object_platform_sections,
+ *HeaderAddr, MachOPlatformSecs))});
}
+
return Error::success();
}
diff --git a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp
new file mode 100644
index 000000000000..8b3fbd7117e2
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp
@@ -0,0 +1,152 @@
+//===- MemoryMapper.cpp - Cross-process memory mapper ------------*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/MemoryMapper.h"
+
+namespace llvm {
+namespace orc {
+
+MemoryMapper::~MemoryMapper() {}
+
+void InProcessMemoryMapper::reserve(size_t NumBytes,
+ OnReservedFunction OnReserved) {
+ std::error_code EC;
+ auto MB = sys::Memory::allocateMappedMemory(
+ NumBytes, nullptr, sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC);
+
+ if (EC)
+ return OnReserved(errorCodeToError(EC));
+
+ {
+ std::lock_guard<std::mutex> Lock(Mutex);
+ Reservations[MB.base()].Size = MB.allocatedSize();
+ }
+
+ OnReserved(
+ ExecutorAddrRange(ExecutorAddr::fromPtr(MB.base()), MB.allocatedSize()));
+}
+
+char *InProcessMemoryMapper::prepare(ExecutorAddr Addr, size_t ContentSize) {
+ return Addr.toPtr<char *>();
+}
+
+void InProcessMemoryMapper::initialize(MemoryMapper::AllocInfo &AI,
+ OnInitializedFunction OnInitialized) {
+ ExecutorAddr MinAddr(~0ULL);
+
+ for (auto &Segment : AI.Segments) {
+ auto Base = AI.MappingBase + Segment.Offset;
+ auto Size = Segment.ContentSize + Segment.ZeroFillSize;
+
+ if (Base < MinAddr)
+ MinAddr = Base;
+
+ std::memset((Base + Segment.ContentSize).toPtr<void *>(), 0,
+ Segment.ZeroFillSize);
+
+ if (auto EC = sys::Memory::protectMappedMemory({Base.toPtr<void *>(), Size},
+ Segment.Prot)) {
+ return OnInitialized(errorCodeToError(EC));
+ }
+ if (Segment.Prot & sys::Memory::MF_EXEC)
+ sys::Memory::InvalidateInstructionCache(Base.toPtr<void *>(), Size);
+ }
+
+ auto DeinitializeActions = shared::runFinalizeActions(AI.Actions);
+ if (!DeinitializeActions)
+ return OnInitialized(DeinitializeActions.takeError());
+
+ {
+ std::lock_guard<std::mutex> Lock(Mutex);
+ Allocations[MinAddr].DeinitializationActions =
+ std::move(*DeinitializeActions);
+ Reservations[AI.MappingBase.toPtr<void *>()].Allocations.push_back(MinAddr);
+ }
+
+ OnInitialized(MinAddr);
+}
+
+void InProcessMemoryMapper::deinitialize(
+ ArrayRef<ExecutorAddr> Bases,
+ MemoryMapper::OnDeinitializedFunction OnDeinitialized) {
+ Error AllErr = Error::success();
+
+ {
+ std::lock_guard<std::mutex> Lock(Mutex);
+
+ for (auto Base : Bases) {
+
+ if (Error Err = shared::runDeallocActions(
+ Allocations[Base].DeinitializationActions)) {
+ AllErr = joinErrors(std::move(AllErr), std::move(Err));
+ }
+
+ Allocations.erase(Base);
+ }
+ }
+
+ OnDeinitialized(std::move(AllErr));
+}
+
+void InProcessMemoryMapper::release(ArrayRef<ExecutorAddr> Bases,
+ OnReleasedFunction OnReleased) {
+ Error Err = Error::success();
+
+ for (auto Base : Bases) {
+ std::vector<ExecutorAddr> AllocAddrs;
+ size_t Size;
+ {
+ std::lock_guard<std::mutex> Lock(Mutex);
+ auto &R = Reservations[Base.toPtr<void *>()];
+ Size = R.Size;
+ AllocAddrs.swap(R.Allocations);
+ }
+
+ // deinitialize sub allocations
+ std::promise<MSVCPError> P;
+ auto F = P.get_future();
+ deinitialize(AllocAddrs, [&](Error Err) { P.set_value(std::move(Err)); });
+ if (Error E = F.get()) {
+ Err = joinErrors(std::move(Err), std::move(E));
+ }
+
+ // free the memory
+ auto MB = sys::MemoryBlock(Base.toPtr<void *>(), Size);
+
+ auto EC = sys::Memory::releaseMappedMemory(MB);
+ if (EC) {
+ Err = joinErrors(std::move(Err), errorCodeToError(EC));
+ }
+
+ std::lock_guard<std::mutex> Lock(Mutex);
+ Reservations.erase(Base.toPtr<void *>());
+ }
+
+ OnReleased(std::move(Err));
+}
+
+InProcessMemoryMapper::~InProcessMemoryMapper() {
+ std::vector<ExecutorAddr> ReservationAddrs;
+ {
+ std::lock_guard<std::mutex> Lock(Mutex);
+
+ ReservationAddrs.reserve(Reservations.size());
+ for (const auto &R : Reservations) {
+ ReservationAddrs.push_back(ExecutorAddr::fromPtr(R.getFirst()));
+ }
+ }
+
+ std::promise<MSVCPError> P;
+ auto F = P.get_future();
+ release(ReservationAddrs, [&](Error Err) { P.set_value(std::move(Err)); });
+ cantFail(F.get());
+}
+
+} // namespace orc
+
+} // namespace llvm
diff --git a/llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp b/llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp
index c1ad569dd65d..394a555e453b 100644
--- a/llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp
@@ -63,7 +63,6 @@ getMachOObjectFileSymbolInfo(ExecutionSession &ES,
auto Name = Sym.getName();
if (!Name)
return Name.takeError();
- auto InternedName = ES.intern(*Name);
auto SymFlags = JITSymbolFlags::fromObjectSymbol(Sym);
if (!SymFlags)
return SymFlags.takeError();
@@ -72,7 +71,7 @@ getMachOObjectFileSymbolInfo(ExecutionSession &ES,
if (Name->startswith("l"))
*SymFlags &= ~JITSymbolFlags::Exported;
- I.SymbolFlags[InternedName] = std::move(*SymFlags);
+ I.SymbolFlags[ES.intern(*Name)] = std::move(*SymFlags);
}
for (auto &Sec : Obj.sections()) {
@@ -121,7 +120,7 @@ getELFObjectFileSymbolInfo(ExecutionSession &ES,
auto Name = Sym.getName();
if (!Name)
return Name.takeError();
- auto InternedName = ES.intern(*Name);
+
auto SymFlags = JITSymbolFlags::fromObjectSymbol(Sym);
if (!SymFlags)
return SymFlags.takeError();
@@ -130,7 +129,7 @@ getELFObjectFileSymbolInfo(ExecutionSession &ES,
if (Sym.getBinding() == ELF::STB_GNU_UNIQUE)
*SymFlags |= JITSymbolFlags::Weak;
- I.SymbolFlags[InternedName] = std::move(*SymFlags);
+ I.SymbolFlags[ES.intern(*Name)] = std::move(*SymFlags);
}
SymbolStringPtr InitSymbol;
@@ -175,12 +174,12 @@ getGenericObjectFileSymbolInfo(ExecutionSession &ES,
auto Name = Sym.getName();
if (!Name)
return Name.takeError();
- auto InternedName = ES.intern(*Name);
+
auto SymFlags = JITSymbolFlags::fromObjectSymbol(Sym);
if (!SymFlags)
return SymFlags.takeError();
- I.SymbolFlags[InternedName] = std::move(*SymFlags);
+ I.SymbolFlags[ES.intern(*Name)] = std::move(*SymFlags);
}
return I;
diff --git a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp
index 32c5998a789b..5ddb35cbafd5 100644
--- a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp
@@ -78,9 +78,12 @@ private:
}
static bool hasELFInitSection(LinkGraph &G) {
- for (auto &Sec : G.sections())
- if (Sec.getName() == ".init_array")
+ for (auto &Sec : G.sections()) {
+ auto SecName = Sec.getName();
+ if (SecName.consume_front(".init_array") &&
+ (SecName.empty() || SecName[0] == '.'))
return true;
+ }
return false;
}
@@ -226,12 +229,13 @@ public:
}
for (auto *Sym : G.absolute_symbols())
- if (Sym->hasName()) {
+ if (Sym->hasName() && Sym->getScope() != Scope::Local) {
auto InternedName = ES.intern(Sym->getName());
JITSymbolFlags Flags;
- Flags |= JITSymbolFlags::Absolute;
if (Sym->isCallable())
Flags |= JITSymbolFlags::Callable;
+ if (Sym->getScope() == Scope::Default)
+ Flags |= JITSymbolFlags::Exported;
if (Sym->getLinkage() == Linkage::Weak)
Flags |= JITSymbolFlags::Weak;
InternedResult[InternedName] =
@@ -607,7 +611,7 @@ private:
DenseMap<SymbolStringPtr, SymbolNameSet> InternalNamedSymbolDeps;
};
-ObjectLinkingLayer::Plugin::~Plugin() {}
+ObjectLinkingLayer::Plugin::~Plugin() = default;
char ObjectLinkingLayer::ID;
diff --git a/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp b/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp
index 18b3c5e12b1c..ef764a3f0d7f 100644
--- a/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp
@@ -906,5 +906,176 @@ void OrcMips64::writeIndirectStubsBlock(
Stub[8 * I + 7] = 0x00000000; // nop
}
}
+
+void OrcRiscv64::writeResolverCode(char *ResolverWorkingMem,
+ JITTargetAddress ResolverTargetAddress,
+ JITTargetAddress ReentryFnAddr,
+ JITTargetAddress ReentryCtxAddr) {
+
+ const uint32_t ResolverCode[] = {
+ 0xef810113, // 0x00: addi sp,sp,-264
+ 0x00813023, // 0x04: sd s0,0(sp)
+ 0x00913423, // 0x08: sd s1,8(sp)
+ 0x01213823, // 0x0c: sd s2,16(sp)
+ 0x01313c23, // 0x10: sd s3,24(sp)
+ 0x03413023, // 0x14: sd s4,32(sp)
+ 0x03513423, // 0x18: sd s5,40(sp)
+ 0x03613823, // 0x1c: sd s6,48(sp)
+ 0x03713c23, // 0x20: sd s7,56(sp)
+ 0x05813023, // 0x24: sd s8,64(sp)
+ 0x05913423, // 0x28: sd s9,72(sp)
+ 0x05a13823, // 0x2c: sd s10,80(sp)
+ 0x05b13c23, // 0x30: sd s11,88(sp)
+ 0x06113023, // 0x34: sd ra,96(sp)
+ 0x06a13423, // 0x38: sd a0,104(sp)
+ 0x06b13823, // 0x3c: sd a1,112(sp)
+ 0x06c13c23, // 0x40: sd a2,120(sp)
+ 0x08d13023, // 0x44: sd a3,128(sp)
+ 0x08e13423, // 0x48: sd a4,136(sp)
+ 0x08f13823, // 0x4c: sd a5,144(sp)
+ 0x09013c23, // 0x50: sd a6,152(sp)
+ 0x0b113023, // 0x54: sd a7,160(sp)
+ 0x0a813427, // 0x58: fsd fs0,168(sp)
+ 0x0a913827, // 0x5c: fsd fs1,176(sp)
+ 0x0b213c27, // 0x60: fsd fs2,184(sp)
+ 0x0d313027, // 0x64: fsd fs3,192(sp)
+ 0x0d413427, // 0x68: fsd fs4,200(sp)
+ 0x0d513827, // 0x6c: fsd fs5,208(sp)
+ 0x0d613c27, // 0x70: fsd fs6,216(sp)
+ 0x0f713027, // 0x74: fsd fs7,224(sp)
+ 0x0f813427, // 0x78: fsd fs8,232(sp)
+ 0x0f913827, // 0x7c: fsd fs9,240(sp)
+ 0x0fa13c27, // 0x80: fsd fs10,248(sp)
+ 0x11b13027, // 0x84: fsd fs11,256(sp)
+ 0x00000517, // 0x88: auipc a0,0x0
+ 0x0b053503, // 0x8c: ld a0,176(a0) # 0x138
+ 0x00030593, // 0x90: mv a1,t1
+ 0xff458593, // 0x94: addi a1,a1,-12
+ 0x00000617, // 0x98: auipc a2,0x0
+ 0x0a863603, // 0x9c: ld a2,168(a2) # 0x140
+ 0x000600e7, // 0xa0: jalr a2
+ 0x00050293, // 0xa4: mv t0,a0
+ 0x00013403, // 0xa8: ld s0,0(sp)
+ 0x00813483, // 0xac: ld s1,8(sp)
+ 0x01013903, // 0xb0: ld s2,16(sp)
+ 0x01813983, // 0xb4: ld s3,24(sp)
+ 0x02013a03, // 0xb8: ld s4,32(sp)
+ 0x02813a83, // 0xbc: ld s5,40(sp)
+ 0x03013b03, // 0xc0: ld s6,48(sp)
+ 0x03813b83, // 0xc4: ld s7,56(sp)
+ 0x04013c03, // 0xc8: ld s8,64(sp)
+ 0x04813c83, // 0xcc: ld s9,72(sp)
+ 0x05013d03, // 0xd0: ld s10,80(sp)
+ 0x05813d83, // 0xd4: ld s11,88(sp)
+ 0x06013083, // 0xd8: ld ra,96(sp)
+ 0x06813503, // 0xdc: ld a0,104(sp)
+ 0x07013583, // 0xe0: ld a1,112(sp)
+ 0x07813603, // 0xe4: ld a2,120(sp)
+ 0x08013683, // 0xe8: ld a3,128(sp)
+ 0x08813703, // 0xec: ld a4,136(sp)
+ 0x09013783, // 0xf0: ld a5,144(sp)
+ 0x09813803, // 0xf4: ld a6,152(sp)
+ 0x0a013883, // 0xf8: ld a7,160(sp)
+ 0x0a813407, // 0xfc: fld fs0,168(sp)
+ 0x0b013487, // 0x100: fld fs1,176(sp)
+ 0x0b813907, // 0x104: fld fs2,184(sp)
+ 0x0c013987, // 0x108: fld fs3,192(sp)
+ 0x0c813a07, // 0x10c: fld fs4,200(sp)
+ 0x0d013a87, // 0x110: fld fs5,208(sp)
+ 0x0d813b07, // 0x114: fld fs6,216(sp)
+ 0x0e013b87, // 0x118: fld fs7,224(sp)
+ 0x0e813c07, // 0x11c: fld fs8,232(sp)
+ 0x0f013c87, // 0x120: fld fs9,240(sp)
+ 0x0f813d07, // 0x124: fld fs10,248(sp)
+ 0x10013d87, // 0x128: fld fs11,256(sp)
+ 0x10810113, // 0x12c: addi sp,sp,264
+ 0x00028067, // 0x130: jr t0
+ 0x12345678, // 0x134: padding to align at 8 byte
+ 0x12345678, // 0x138: Lreentry_ctx_ptr:
+ 0xdeadbeef, // 0x13c: .quad 0
+ 0x98765432, // 0x140: Lreentry_fn_ptr:
+ 0xcafef00d // 0x144: .quad 0
+ };
+
+ const unsigned ReentryCtxAddrOffset = 0x138;
+ const unsigned ReentryFnAddrOffset = 0x140;
+
+ memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode));
+ memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnAddr,
+ sizeof(uint64_t));
+ memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxAddr,
+ sizeof(uint64_t));
+}
+
+void OrcRiscv64::writeTrampolines(char *TrampolineBlockWorkingMem,
+ JITTargetAddress TrampolineBlockTargetAddress,
+ JITTargetAddress ResolverAddr,
+ unsigned NumTrampolines) {
+
+ unsigned OffsetToPtr = alignTo(NumTrampolines * TrampolineSize, 8);
+
+ memcpy(TrampolineBlockWorkingMem + OffsetToPtr, &ResolverAddr,
+ sizeof(uint64_t));
+
+ uint32_t *Trampolines =
+ reinterpret_cast<uint32_t *>(TrampolineBlockWorkingMem);
+ for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize) {
+ uint32_t Hi20 = (OffsetToPtr + 0x800) & 0xFFFFF000;
+ uint32_t Lo12 = OffsetToPtr - Hi20;
+ Trampolines[4 * I + 0] = 0x00000297 | Hi20; // auipc t0, %hi(Lptr)
+ Trampolines[4 * I + 1] =
+ 0x0002b283 | ((Lo12 & 0xFFF) << 20); // ld t0, %lo(Lptr)
+ Trampolines[4 * I + 2] = 0x00028367; // jalr t1, t0
+ Trampolines[4 * I + 3] = 0xdeadface; // padding
+ }
+}
+
+void OrcRiscv64::writeIndirectStubsBlock(
+ char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress,
+ JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs) {
+ // Stub format is:
+ //
+ // .section __orc_stubs
+ // stub1:
+ // auipc t0, %hi(ptr1) ; PC-rel load of ptr1
+ // ld t0, %lo(t0)
+ // jr t0 ; Jump to resolver
+ // .quad 0 ; Pad to 16 bytes
+ // stub2:
+ // auipc t0, %hi(ptr1) ; PC-rel load of ptr1
+ // ld t0, %lo(t0)
+ // jr t0 ; Jump to resolver
+ // .quad 0
+ //
+ // ...
+ //
+ // .section __orc_ptrs
+ // ptr1:
+ // .quad 0x0
+ // ptr2:
+ // .quad 0x0
+ //
+ // ...
+
+ assert(stubAndPointerRangesOk<OrcRiscv64>(
+ StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) &&
+ "PointersBlock is out of range");
+
+ uint32_t *Stub = reinterpret_cast<uint32_t *>(StubsBlockWorkingMem);
+
+ for (unsigned I = 0; I < NumStubs; ++I) {
+ uint64_t PtrDisplacement =
+ PointersBlockTargetAddress - StubsBlockTargetAddress;
+ uint32_t Hi20 = (PtrDisplacement + 0x800) & 0xFFFFF000;
+ uint32_t Lo12 = PtrDisplacement - Hi20;
+ Stub[4 * I + 0] = 0x00000297 | Hi20; // auipc t0, %hi(Lptr)
+ Stub[4 * I + 1] = 0x0002b283 | ((Lo12 & 0xFFF) << 20); // ld t0, %lo(Lptr)
+ Stub[4 * I + 2] = 0x00028067; // jr t0
+ Stub[4 * I + 3] = 0xfeedbeef; // padding
+ PointersBlockTargetAddress += PointerSize;
+ StubsBlockTargetAddress += StubSize;
+ }
+}
+
} // End namespace orc.
} // End namespace llvm.
diff --git a/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp b/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp
index 71be8dfdc004..b7eab6b85ecf 100644
--- a/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp
@@ -106,82 +106,6 @@ DEFINE_SIMPLE_CONVERSION_FUNCTIONS(LLJITBuilder, LLVMOrcLLJITBuilderRef)
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(LLJIT, LLVMOrcLLJITRef)
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(TargetMachine, LLVMTargetMachineRef)
-namespace llvm {
-namespace orc {
-
-class CAPIDefinitionGenerator final : public DefinitionGenerator {
-public:
- CAPIDefinitionGenerator(
- void *Ctx,
- LLVMOrcCAPIDefinitionGeneratorTryToGenerateFunction TryToGenerate)
- : Ctx(Ctx), TryToGenerate(TryToGenerate) {}
-
- Error tryToGenerate(LookupState &LS, LookupKind K, JITDylib &JD,
- JITDylibLookupFlags JDLookupFlags,
- const SymbolLookupSet &LookupSet) override {
-
- // Take the lookup state.
- LLVMOrcLookupStateRef LSR = ::wrap(OrcV2CAPIHelper::extractLookupState(LS));
-
- // Translate the lookup kind.
- LLVMOrcLookupKind CLookupKind;
- switch (K) {
- case LookupKind::Static:
- CLookupKind = LLVMOrcLookupKindStatic;
- break;
- case LookupKind::DLSym:
- CLookupKind = LLVMOrcLookupKindDLSym;
- break;
- }
-
- // Translate the JITDylibSearchFlags.
- LLVMOrcJITDylibLookupFlags CJDLookupFlags;
- switch (JDLookupFlags) {
- case JITDylibLookupFlags::MatchExportedSymbolsOnly:
- CJDLookupFlags = LLVMOrcJITDylibLookupFlagsMatchExportedSymbolsOnly;
- break;
- case JITDylibLookupFlags::MatchAllSymbols:
- CJDLookupFlags = LLVMOrcJITDylibLookupFlagsMatchAllSymbols;
- break;
- }
-
- // Translate the lookup set.
- std::vector<LLVMOrcCLookupSetElement> CLookupSet;
- CLookupSet.reserve(LookupSet.size());
- for (auto &KV : LookupSet) {
- LLVMOrcSymbolLookupFlags SLF;
- LLVMOrcSymbolStringPoolEntryRef Name =
- ::wrap(OrcV2CAPIHelper::getRawPoolEntryPtr(KV.first));
- switch (KV.second) {
- case SymbolLookupFlags::RequiredSymbol:
- SLF = LLVMOrcSymbolLookupFlagsRequiredSymbol;
- break;
- case SymbolLookupFlags::WeaklyReferencedSymbol:
- SLF = LLVMOrcSymbolLookupFlagsWeaklyReferencedSymbol;
- break;
- }
- CLookupSet.push_back({Name, SLF});
- }
-
- // Run the C TryToGenerate function.
- auto Err = unwrap(TryToGenerate(::wrap(this), Ctx, &LSR, CLookupKind,
- ::wrap(&JD), CJDLookupFlags,
- CLookupSet.data(), CLookupSet.size()));
-
- // Restore the lookup state.
- OrcV2CAPIHelper::resetLookupState(LS, ::unwrap(LSR));
-
- return Err;
- }
-
-private:
- void *Ctx;
- LLVMOrcCAPIDefinitionGeneratorTryToGenerateFunction TryToGenerate;
-};
-
-} // end namespace orc
-} // end namespace llvm
-
namespace {
class OrcCAPIMaterializationUnit : public llvm::orc::MaterializationUnit {
@@ -282,8 +206,134 @@ toSymbolDependenceMap(LLVMOrcCDependenceMapPairs Pairs, size_t NumPairs) {
return SDM;
}
+static LookupKind toLookupKind(LLVMOrcLookupKind K) {
+ switch (K) {
+ case LLVMOrcLookupKindStatic:
+ return LookupKind::Static;
+ case LLVMOrcLookupKindDLSym:
+ return LookupKind::DLSym;
+ }
+ llvm_unreachable("unrecognized LLVMOrcLookupKind value");
+}
+
+static LLVMOrcLookupKind fromLookupKind(LookupKind K) {
+ switch (K) {
+ case LookupKind::Static:
+ return LLVMOrcLookupKindStatic;
+ case LookupKind::DLSym:
+ return LLVMOrcLookupKindDLSym;
+ }
+ llvm_unreachable("unrecognized LookupKind value");
+}
+
+static JITDylibLookupFlags
+toJITDylibLookupFlags(LLVMOrcJITDylibLookupFlags LF) {
+ switch (LF) {
+ case LLVMOrcJITDylibLookupFlagsMatchExportedSymbolsOnly:
+ return JITDylibLookupFlags::MatchExportedSymbolsOnly;
+ case LLVMOrcJITDylibLookupFlagsMatchAllSymbols:
+ return JITDylibLookupFlags::MatchAllSymbols;
+ }
+ llvm_unreachable("unrecognized LLVMOrcJITDylibLookupFlags value");
+}
+
+static LLVMOrcJITDylibLookupFlags
+fromJITDylibLookupFlags(JITDylibLookupFlags LF) {
+ switch (LF) {
+ case JITDylibLookupFlags::MatchExportedSymbolsOnly:
+ return LLVMOrcJITDylibLookupFlagsMatchExportedSymbolsOnly;
+ case JITDylibLookupFlags::MatchAllSymbols:
+ return LLVMOrcJITDylibLookupFlagsMatchAllSymbols;
+ }
+ llvm_unreachable("unrecognized JITDylibLookupFlags value");
+}
+
+static SymbolLookupFlags toSymbolLookupFlags(LLVMOrcSymbolLookupFlags SLF) {
+ switch (SLF) {
+ case LLVMOrcSymbolLookupFlagsRequiredSymbol:
+ return SymbolLookupFlags::RequiredSymbol;
+ case LLVMOrcSymbolLookupFlagsWeaklyReferencedSymbol:
+ return SymbolLookupFlags::WeaklyReferencedSymbol;
+ }
+ llvm_unreachable("unrecognized LLVMOrcSymbolLookupFlags value");
+}
+
+static LLVMOrcSymbolLookupFlags fromSymbolLookupFlags(SymbolLookupFlags SLF) {
+ switch (SLF) {
+ case SymbolLookupFlags::RequiredSymbol:
+ return LLVMOrcSymbolLookupFlagsRequiredSymbol;
+ case SymbolLookupFlags::WeaklyReferencedSymbol:
+ return LLVMOrcSymbolLookupFlagsWeaklyReferencedSymbol;
+ }
+ llvm_unreachable("unrecognized SymbolLookupFlags value");
+}
+
+static LLVMJITEvaluatedSymbol
+fromJITEvaluatedSymbol(const JITEvaluatedSymbol &S) {
+ return {S.getAddress(), fromJITSymbolFlags(S.getFlags())};
+}
+
} // end anonymous namespace
+namespace llvm {
+namespace orc {
+
+class CAPIDefinitionGenerator final : public DefinitionGenerator {
+public:
+ CAPIDefinitionGenerator(
+ LLVMOrcDisposeCAPIDefinitionGeneratorFunction Dispose, void *Ctx,
+ LLVMOrcCAPIDefinitionGeneratorTryToGenerateFunction TryToGenerate)
+ : Dispose(Dispose), Ctx(Ctx), TryToGenerate(TryToGenerate) {}
+
+ ~CAPIDefinitionGenerator() {
+ if (Dispose)
+ Dispose(Ctx);
+ }
+
+ Error tryToGenerate(LookupState &LS, LookupKind K, JITDylib &JD,
+ JITDylibLookupFlags JDLookupFlags,
+ const SymbolLookupSet &LookupSet) override {
+
+ // Take the lookup state.
+ LLVMOrcLookupStateRef LSR = ::wrap(OrcV2CAPIHelper::extractLookupState(LS));
+
+ // Translate the lookup kind.
+ LLVMOrcLookupKind CLookupKind = fromLookupKind(K);
+
+ // Translate the JITDylibLookupFlags.
+ LLVMOrcJITDylibLookupFlags CJDLookupFlags =
+ fromJITDylibLookupFlags(JDLookupFlags);
+
+ // Translate the lookup set.
+ std::vector<LLVMOrcCLookupSetElement> CLookupSet;
+ CLookupSet.reserve(LookupSet.size());
+ for (auto &KV : LookupSet) {
+ LLVMOrcSymbolStringPoolEntryRef Name =
+ ::wrap(OrcV2CAPIHelper::getRawPoolEntryPtr(KV.first));
+ LLVMOrcSymbolLookupFlags SLF = fromSymbolLookupFlags(KV.second);
+ CLookupSet.push_back({Name, SLF});
+ }
+
+ // Run the C TryToGenerate function.
+ auto Err = unwrap(TryToGenerate(::wrap(this), Ctx, &LSR, CLookupKind,
+ ::wrap(&JD), CJDLookupFlags,
+ CLookupSet.data(), CLookupSet.size()));
+
+ // Restore the lookup state.
+ OrcV2CAPIHelper::resetLookupState(LS, ::unwrap(LSR));
+
+ return Err;
+ }
+
+private:
+ LLVMOrcDisposeCAPIDefinitionGeneratorFunction Dispose;
+ void *Ctx;
+ LLVMOrcCAPIDefinitionGeneratorTryToGenerateFunction TryToGenerate;
+};
+
+} // end namespace orc
+} // end namespace llvm
+
void LLVMOrcExecutionSessionSetErrorReporter(
LLVMOrcExecutionSessionRef ES, LLVMOrcErrorReporterFunction ReportError,
void *Ctx) {
@@ -307,6 +357,42 @@ LLVMOrcExecutionSessionIntern(LLVMOrcExecutionSessionRef ES, const char *Name) {
OrcV2CAPIHelper::moveFromSymbolStringPtr(unwrap(ES)->intern(Name)));
}
+void LLVMOrcExecutionSessionLookup(
+ LLVMOrcExecutionSessionRef ES, LLVMOrcLookupKind K,
+ LLVMOrcCJITDylibSearchOrder SearchOrder, size_t SearchOrderSize,
+ LLVMOrcCLookupSet Symbols, size_t SymbolsSize,
+ LLVMOrcExecutionSessionLookupHandleResultFunction HandleResult, void *Ctx) {
+ assert(ES && "ES cannot be null");
+ assert(SearchOrder && "SearchOrder cannot be null");
+ assert(Symbols && "Symbols cannot be null");
+ assert(HandleResult && "HandleResult cannot be null");
+
+ JITDylibSearchOrder SO;
+ for (size_t I = 0; I != SearchOrderSize; ++I)
+ SO.push_back({unwrap(SearchOrder[I].JD),
+ toJITDylibLookupFlags(SearchOrder[I].JDLookupFlags)});
+
+ SymbolLookupSet SLS;
+ for (size_t I = 0; I != SymbolsSize; ++I)
+ SLS.add(OrcV2CAPIHelper::moveToSymbolStringPtr(unwrap(Symbols[I].Name)),
+ toSymbolLookupFlags(Symbols[I].LookupFlags));
+
+ unwrap(ES)->lookup(
+ toLookupKind(K), SO, std::move(SLS), SymbolState::Ready,
+ [HandleResult, Ctx](Expected<SymbolMap> Result) {
+ if (Result) {
+ SmallVector<LLVMOrcCSymbolMapPair> CResult;
+ for (auto &KV : *Result)
+ CResult.push_back(LLVMOrcCSymbolMapPair{
+ wrap(OrcV2CAPIHelper::getRawPoolEntryPtr(KV.first)),
+ fromJITEvaluatedSymbol(KV.second)});
+ HandleResult(LLVMErrorSuccess, CResult.data(), CResult.size(), Ctx);
+ } else
+ HandleResult(wrap(Result.takeError()), nullptr, 0, Ctx);
+ },
+ NoDependenciesToRegister);
+}
+
void LLVMOrcRetainSymbolStringPoolEntry(LLVMOrcSymbolStringPoolEntryRef S) {
OrcV2CAPIHelper::retainPoolEntry(unwrap(S));
}
@@ -589,11 +675,19 @@ void LLVMOrcJITDylibAddGenerator(LLVMOrcJITDylibRef JD,
}
LLVMOrcDefinitionGeneratorRef LLVMOrcCreateCustomCAPIDefinitionGenerator(
- LLVMOrcCAPIDefinitionGeneratorTryToGenerateFunction F, void *Ctx) {
- auto DG = std::make_unique<CAPIDefinitionGenerator>(Ctx, F);
+ LLVMOrcCAPIDefinitionGeneratorTryToGenerateFunction F, void *Ctx,
+ LLVMOrcDisposeCAPIDefinitionGeneratorFunction Dispose) {
+ auto DG = std::make_unique<CAPIDefinitionGenerator>(Dispose, Ctx, F);
return wrap(DG.release());
}
+void LLVMOrcLookupStateContinueLookup(LLVMOrcLookupStateRef S,
+ LLVMErrorRef Err) {
+ LookupState LS;
+ OrcV2CAPIHelper::resetLookupState(LS, ::unwrap(S));
+ LS.continueLookup(unwrap(Err));
+}
+
LLVMErrorRef LLVMOrcCreateDynamicLibrarySearchGeneratorForProcess(
LLVMOrcDefinitionGeneratorRef *Result, char GlobalPrefix,
LLVMOrcSymbolPredicate Filter, void *FilterCtx) {
@@ -951,7 +1045,7 @@ LLVMErrorRef LLVMOrcLLJITLookup(LLVMOrcLLJITRef J,
return wrap(Sym.takeError());
}
- *Result = Sym->getAddress();
+ *Result = Sym->getValue();
return LLVMErrorSuccess;
}
diff --git a/llvm/lib/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.cpp b/llvm/lib/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.cpp
index 64fc717b7b56..2bb204e688fc 100644
--- a/llvm/lib/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.cpp
@@ -43,8 +43,8 @@ const char *DispatchFnName = "__llvm_orc_SimpleRemoteEPC_dispatch_fn";
} // end namespace SimpleRemoteEPCDefaultBootstrapSymbolNames
-SimpleRemoteEPCTransportClient::~SimpleRemoteEPCTransportClient() {}
-SimpleRemoteEPCTransport::~SimpleRemoteEPCTransport() {}
+SimpleRemoteEPCTransportClient::~SimpleRemoteEPCTransportClient() = default;
+SimpleRemoteEPCTransport::~SimpleRemoteEPCTransport() = default;
Expected<std::unique_ptr<FDSimpleRemoteEPCTransport>>
FDSimpleRemoteEPCTransport::Create(SimpleRemoteEPCTransportClient &C, int InFD,
diff --git a/llvm/lib/ExecutionEngine/Orc/Speculation.cpp b/llvm/lib/ExecutionEngine/Orc/Speculation.cpp
index 0b4755fe23cf..b52d01318c0d 100644
--- a/llvm/lib/ExecutionEngine/Orc/Speculation.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/Speculation.cpp
@@ -85,7 +85,7 @@ void IRSpeculationLayer::emit(std::unique_ptr<MaterializationResponsibility> R,
auto IRNames = QueryAnalysis(Fn);
// Instrument and register if Query has result
- if (IRNames.hasValue()) {
+ if (IRNames) {
// Emit globals for each function.
auto LoadValueTy = Type::getInt8Ty(MContext);
@@ -126,7 +126,7 @@ void IRSpeculationLayer::emit(std::unique_ptr<MaterializationResponsibility> R,
assert(Mutator.GetInsertBlock()->getParent() == &Fn &&
"IR builder association mismatch?");
- S.registerSymbols(internToJITSymbols(IRNames.getValue()),
+ S.registerSymbols(internToJITSymbols(*IRNames),
&R->getTargetJITDylib());
}
}
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.cpp
index b6b21bde1182..8ab0af3eab6e 100644
--- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.cpp
@@ -22,9 +22,9 @@ using namespace llvm::orc::shared;
namespace llvm {
namespace orc {
-ExecutorBootstrapService::~ExecutorBootstrapService() {}
+ExecutorBootstrapService::~ExecutorBootstrapService() = default;
-SimpleRemoteEPCServer::Dispatcher::~Dispatcher() {}
+SimpleRemoteEPCServer::Dispatcher::~Dispatcher() = default;
#if LLVM_ENABLE_THREADS
void SimpleRemoteEPCServer::ThreadDispatcher::dispatch(
diff --git a/llvm/lib/ExecutionEngine/Orc/TaskDispatch.cpp b/llvm/lib/ExecutionEngine/Orc/TaskDispatch.cpp
index 111c84ec87ed..11a99986f2ee 100644
--- a/llvm/lib/ExecutionEngine/Orc/TaskDispatch.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/TaskDispatch.cpp
@@ -16,7 +16,7 @@ char GenericNamedTask::ID = 0;
const char *GenericNamedTask::DefaultDescription = "Generic Task";
void Task::anchor() {}
-TaskDispatcher::~TaskDispatcher() {}
+TaskDispatcher::~TaskDispatcher() = default;
void InPlaceTaskDispatcher::dispatch(std::unique_ptr<Task> T) { T->run(); }
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp
index 9c8d402364c6..bc42eebf3fec 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp
@@ -29,7 +29,7 @@
namespace llvm {
-RTDyldMemoryManager::~RTDyldMemoryManager() {}
+RTDyldMemoryManager::~RTDyldMemoryManager() = default;
#if defined(HAVE_REGISTER_FRAME) && defined(HAVE_DEREGISTER_FRAME) && \
!defined(__SEH__) && !defined(__USING_SJLJ_EXCEPTIONS__)
@@ -95,18 +95,16 @@ void RTDyldMemoryManager::registerEHFramesInProcess(uint8_t *Addr,
// and projects/libunwind/src/UnwindLevel1-gcc-ext.c.
const char *P = (const char *)Addr;
const char *End = P + Size;
- do {
+ while (P != End)
P = processFDE(P, false);
- } while(P != End);
}
void RTDyldMemoryManager::deregisterEHFramesInProcess(uint8_t *Addr,
size_t Size) {
const char *P = (const char *)Addr;
const char *End = P + Size;
- do {
+ while (P != End)
P = processFDE(P, true);
- } while(P != End);
}
#else
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index 3f38d26869d4..2e0cba849165 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -66,7 +66,7 @@ std::error_code RuntimeDyldError::convertToErrorCode() const {
}
// Empty out-of-line virtual destructor as the key function.
-RuntimeDyldImpl::~RuntimeDyldImpl() {}
+RuntimeDyldImpl::~RuntimeDyldImpl() = default;
// Pin LoadedObjectInfo's vtables to this file.
void RuntimeDyld::LoadedObjectInfo::anchor() {}
@@ -1311,7 +1311,7 @@ RuntimeDyld::RuntimeDyld(RuntimeDyld::MemoryManager &MemMgr,
ProcessAllSections = false;
}
-RuntimeDyld::~RuntimeDyld() {}
+RuntimeDyld::~RuntimeDyld() = default;
static std::unique_ptr<RuntimeDyldCOFF>
createRuntimeDyldCOFF(
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
index 33db23408cf2..ae1bb5a1da4b 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
@@ -15,6 +15,7 @@
#include "llvm/MC/MCInst.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/MSVCErrorWorkarounds.h"
+#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include <cctype>
#include <memory>
@@ -892,7 +893,7 @@ RuntimeDyldChecker::RuntimeDyldChecker(
std::move(GetGOTInfo), Endianness, Disassembler, InstPrinter,
ErrStream)) {}
-RuntimeDyldChecker::~RuntimeDyldChecker() {}
+RuntimeDyldChecker::~RuntimeDyldChecker() = default;
bool RuntimeDyldChecker::check(StringRef CheckExpr) const {
return Impl->check(CheckExpr);
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index f92618afdff6..da1102fc9f07 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -216,7 +216,7 @@ namespace llvm {
RuntimeDyldELF::RuntimeDyldELF(RuntimeDyld::MemoryManager &MemMgr,
JITSymbolResolver &Resolver)
: RuntimeDyldImpl(MemMgr, Resolver), GOTSectionID(0), CurrentGOTIndex(0) {}
-RuntimeDyldELF::~RuntimeDyldELF() {}
+RuntimeDyldELF::~RuntimeDyldELF() = default;
void RuntimeDyldELF::registerEHFrames() {
for (int i = 0, e = UnregisteredEHFrameSections.size(); i != e; ++i) {
@@ -446,6 +446,13 @@ void RuntimeDyldELF::resolveAArch64Relocation(const SectionEntry &Section,
write(isBE, TargetPtr, static_cast<uint32_t>(Result));
break;
}
+ case ELF::R_AARCH64_PREL16: {
+ uint64_t Result = Value + Addend - FinalAddress;
+ assert(static_cast<int64_t>(Result) >= INT16_MIN &&
+ static_cast<int64_t>(Result) <= UINT16_MAX);
+ write(isBE, TargetPtr, static_cast<uint16_t>(Result & 0xffffU));
+ break;
+ }
case ELF::R_AARCH64_PREL32: {
uint64_t Result = Value + Addend - FinalAddress;
assert(static_cast<int64_t>(Result) >= INT32_MIN &&
diff --git a/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp b/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp
index 56b232b9dbcd..b23e33039c35 100644
--- a/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp
+++ b/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp
@@ -238,7 +238,7 @@ SectionMemoryManager::~SectionMemoryManager() {
}
}
-SectionMemoryManager::MemoryMapper::~MemoryMapper() {}
+SectionMemoryManager::MemoryMapper::~MemoryMapper() = default;
void SectionMemoryManager::anchor() {}
diff --git a/llvm/lib/FileCheck/FileCheck.cpp b/llvm/lib/FileCheck/FileCheck.cpp
index 6186af444e73..bf13b6c325ec 100644
--- a/llvm/lib/FileCheck/FileCheck.cpp
+++ b/llvm/lib/FileCheck/FileCheck.cpp
@@ -1651,6 +1651,8 @@ std::string Check::FileCheckType::getDescription(StringRef Prefix) const {
switch (Kind) {
case Check::CheckNone:
return "invalid";
+ case Check::CheckMisspelled:
+ return "misspelled";
case Check::CheckPlain:
if (Count > 1)
return WithModifiers("-COUNT");
@@ -1680,7 +1682,8 @@ std::string Check::FileCheckType::getDescription(StringRef Prefix) const {
}
static std::pair<Check::FileCheckType, StringRef>
-FindCheckType(const FileCheckRequest &Req, StringRef Buffer, StringRef Prefix) {
+FindCheckType(const FileCheckRequest &Req, StringRef Buffer, StringRef Prefix,
+ bool &Misspelled) {
if (Buffer.size() <= Prefix.size())
return {Check::CheckNone, StringRef()};
@@ -1722,7 +1725,9 @@ FindCheckType(const FileCheckRequest &Req, StringRef Buffer, StringRef Prefix) {
if (Rest.front() == '{')
return ConsumeModifiers(Check::CheckPlain);
- if (!Rest.consume_front("-"))
+ if (Rest.consume_front("_"))
+ Misspelled = true;
+ else if (!Rest.consume_front("-"))
return {Check::CheckNone, StringRef()};
if (Rest.consume_front("COUNT-")) {
@@ -1766,6 +1771,15 @@ FindCheckType(const FileCheckRequest &Req, StringRef Buffer, StringRef Prefix) {
return {Check::CheckNone, Rest};
}
+static std::pair<Check::FileCheckType, StringRef>
+FindCheckType(const FileCheckRequest &Req, StringRef Buffer, StringRef Prefix) {
+ bool Misspelled = false;
+ auto Res = FindCheckType(Req, Buffer, Prefix, Misspelled);
+ if (Res.first != Check::CheckNone && Misspelled)
+ return {Check::CheckMisspelled, Res.second};
+ return Res;
+}
+
// From the given position, find the next character after the word.
static size_t SkipWord(StringRef Str, size_t Loc) {
while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
@@ -1939,6 +1953,16 @@ bool FileCheck::readCheckFile(
Buffer = AfterSuffix.empty() ? Buffer.drop_front(UsedPrefix.size())
: AfterSuffix;
+ // Complain about misspelled directives.
+ if (CheckTy == Check::CheckMisspelled) {
+ StringRef UsedDirective(UsedPrefix.data(),
+ AfterSuffix.data() - UsedPrefix.data());
+ SM.PrintMessage(SMLoc::getFromPointer(UsedDirective.data()),
+ SourceMgr::DK_Error,
+ "misspelled directive '" + UsedDirective + "'");
+ return true;
+ }
+
// Complain about useful-looking but unsupported suffixes.
if (CheckTy == Check::CheckBadNot) {
SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error,
diff --git a/llvm/lib/Frontend/OpenMP/OMPContext.cpp b/llvm/lib/Frontend/OpenMP/OMPContext.cpp
index 11d8da097c6c..6e8856f481af 100644
--- a/llvm/lib/Frontend/OpenMP/OMPContext.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPContext.cpp
@@ -13,7 +13,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Frontend/OpenMP/OMPContext.h"
-#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
@@ -214,7 +213,7 @@ static int isVariantApplicableInContextHelper(
});
Optional<bool> Result = HandleTrait(Property, IsActiveTrait);
- if (Result.hasValue())
+ if (Result)
return Result.getValue();
}
@@ -235,7 +234,7 @@ static int isVariantApplicableInContextHelper(
ConstructMatches->push_back(ConstructIdx - 1);
Optional<bool> Result = HandleTrait(Property, FoundInOrder);
- if (Result.hasValue())
+ if (Result)
return Result.getValue();
if (!FoundInOrder) {
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 99001269e1f8..9b08a24e14d4 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -15,15 +15,15 @@
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/MDBuilder.h"
@@ -31,17 +31,14 @@
#include "llvm/IR/Value.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Error.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/CodeExtractor.h"
#include "llvm/Transforms/Utils/LoopPeel.h"
-#include "llvm/Transforms/Utils/ModuleUtils.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
#include <cstdint>
-#include <sstream>
#define DEBUG_TYPE "openmp-ir-builder"
@@ -72,8 +69,263 @@ static bool isConflictIP(IRBuilder<>::InsertPoint IP1,
return false;
return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint();
}
+
+static bool isValidWorkshareLoopScheduleType(OMPScheduleType SchedType) {
+ // Valid ordered/unordered and base algorithm combinations.
+ switch (SchedType & ~OMPScheduleType::MonotonicityMask) {
+ case OMPScheduleType::UnorderedStaticChunked:
+ case OMPScheduleType::UnorderedStatic:
+ case OMPScheduleType::UnorderedDynamicChunked:
+ case OMPScheduleType::UnorderedGuidedChunked:
+ case OMPScheduleType::UnorderedRuntime:
+ case OMPScheduleType::UnorderedAuto:
+ case OMPScheduleType::UnorderedTrapezoidal:
+ case OMPScheduleType::UnorderedGreedy:
+ case OMPScheduleType::UnorderedBalanced:
+ case OMPScheduleType::UnorderedGuidedIterativeChunked:
+ case OMPScheduleType::UnorderedGuidedAnalyticalChunked:
+ case OMPScheduleType::UnorderedSteal:
+ case OMPScheduleType::UnorderedStaticBalancedChunked:
+ case OMPScheduleType::UnorderedGuidedSimd:
+ case OMPScheduleType::UnorderedRuntimeSimd:
+ case OMPScheduleType::OrderedStaticChunked:
+ case OMPScheduleType::OrderedStatic:
+ case OMPScheduleType::OrderedDynamicChunked:
+ case OMPScheduleType::OrderedGuidedChunked:
+ case OMPScheduleType::OrderedRuntime:
+ case OMPScheduleType::OrderedAuto:
+ case OMPScheduleType::OrderdTrapezoidal:
+ case OMPScheduleType::NomergeUnorderedStaticChunked:
+ case OMPScheduleType::NomergeUnorderedStatic:
+ case OMPScheduleType::NomergeUnorderedDynamicChunked:
+ case OMPScheduleType::NomergeUnorderedGuidedChunked:
+ case OMPScheduleType::NomergeUnorderedRuntime:
+ case OMPScheduleType::NomergeUnorderedAuto:
+ case OMPScheduleType::NomergeUnorderedTrapezoidal:
+ case OMPScheduleType::NomergeUnorderedGreedy:
+ case OMPScheduleType::NomergeUnorderedBalanced:
+ case OMPScheduleType::NomergeUnorderedGuidedIterativeChunked:
+ case OMPScheduleType::NomergeUnorderedGuidedAnalyticalChunked:
+ case OMPScheduleType::NomergeUnorderedSteal:
+ case OMPScheduleType::NomergeOrderedStaticChunked:
+ case OMPScheduleType::NomergeOrderedStatic:
+ case OMPScheduleType::NomergeOrderedDynamicChunked:
+ case OMPScheduleType::NomergeOrderedGuidedChunked:
+ case OMPScheduleType::NomergeOrderedRuntime:
+ case OMPScheduleType::NomergeOrderedAuto:
+ case OMPScheduleType::NomergeOrderedTrapezoidal:
+ break;
+ default:
+ return false;
+ }
+
+ // Must not set both monotonicity modifiers at the same time.
+ OMPScheduleType MonotonicityFlags =
+ SchedType & OMPScheduleType::MonotonicityMask;
+ if (MonotonicityFlags == OMPScheduleType::MonotonicityMask)
+ return false;
+
+ return true;
+}
#endif
+/// Determine which scheduling algorithm to use, determined from schedule clause
+/// arguments.
+static OMPScheduleType
+getOpenMPBaseScheduleType(llvm::omp::ScheduleKind ClauseKind, bool HasChunks,
+ bool HasSimdModifier) {
+ // Currently, the default schedule it static.
+ switch (ClauseKind) {
+ case OMP_SCHEDULE_Default:
+ case OMP_SCHEDULE_Static:
+ return HasChunks ? OMPScheduleType::BaseStaticChunked
+ : OMPScheduleType::BaseStatic;
+ case OMP_SCHEDULE_Dynamic:
+ return OMPScheduleType::BaseDynamicChunked;
+ case OMP_SCHEDULE_Guided:
+ return HasSimdModifier ? OMPScheduleType::BaseGuidedSimd
+ : OMPScheduleType::BaseGuidedChunked;
+ case OMP_SCHEDULE_Auto:
+ return llvm::omp::OMPScheduleType::BaseAuto;
+ case OMP_SCHEDULE_Runtime:
+ return HasSimdModifier ? OMPScheduleType::BaseRuntimeSimd
+ : OMPScheduleType::BaseRuntime;
+ }
+ llvm_unreachable("unhandled schedule clause argument");
+}
+
+/// Adds ordering modifier flags to schedule type.
+static OMPScheduleType
+getOpenMPOrderingScheduleType(OMPScheduleType BaseScheduleType,
+ bool HasOrderedClause) {
+ assert((BaseScheduleType & OMPScheduleType::ModifierMask) ==
+ OMPScheduleType::None &&
+ "Must not have ordering nor monotonicity flags already set");
+
+ OMPScheduleType OrderingModifier = HasOrderedClause
+ ? OMPScheduleType::ModifierOrdered
+ : OMPScheduleType::ModifierUnordered;
+ OMPScheduleType OrderingScheduleType = BaseScheduleType | OrderingModifier;
+
+ // Unsupported combinations
+ if (OrderingScheduleType ==
+ (OMPScheduleType::BaseGuidedSimd | OMPScheduleType::ModifierOrdered))
+ return OMPScheduleType::OrderedGuidedChunked;
+ else if (OrderingScheduleType == (OMPScheduleType::BaseRuntimeSimd |
+ OMPScheduleType::ModifierOrdered))
+ return OMPScheduleType::OrderedRuntime;
+
+ return OrderingScheduleType;
+}
+
+/// Adds monotonicity modifier flags to schedule type.
+static OMPScheduleType
+getOpenMPMonotonicityScheduleType(OMPScheduleType ScheduleType,
+ bool HasSimdModifier, bool HasMonotonic,
+ bool HasNonmonotonic, bool HasOrderedClause) {
+ assert((ScheduleType & OMPScheduleType::MonotonicityMask) ==
+ OMPScheduleType::None &&
+ "Must not have monotonicity flags already set");
+ assert((!HasMonotonic || !HasNonmonotonic) &&
+ "Monotonic and Nonmonotonic are contradicting each other");
+
+ if (HasMonotonic) {
+ return ScheduleType | OMPScheduleType::ModifierMonotonic;
+ } else if (HasNonmonotonic) {
+ return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
+ } else {
+ // OpenMP 5.1, 2.11.4 Worksharing-Loop Construct, Description.
+ // If the static schedule kind is specified or if the ordered clause is
+ // specified, and if the nonmonotonic modifier is not specified, the
+ // effect is as if the monotonic modifier is specified. Otherwise, unless
+ // the monotonic modifier is specified, the effect is as if the
+ // nonmonotonic modifier is specified.
+ OMPScheduleType BaseScheduleType =
+ ScheduleType & ~OMPScheduleType::ModifierMask;
+ if ((BaseScheduleType == OMPScheduleType::BaseStatic) ||
+ (BaseScheduleType == OMPScheduleType::BaseStaticChunked) ||
+ HasOrderedClause) {
+ // The monotonic is used by default in openmp runtime library, so no need
+ // to set it.
+ return ScheduleType;
+ } else {
+ return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
+ }
+ }
+}
+
+/// Determine the schedule type using schedule and ordering clause arguments.
+static OMPScheduleType
+computeOpenMPScheduleType(ScheduleKind ClauseKind, bool HasChunks,
+ bool HasSimdModifier, bool HasMonotonicModifier,
+ bool HasNonmonotonicModifier, bool HasOrderedClause) {
+ OMPScheduleType BaseSchedule =
+ getOpenMPBaseScheduleType(ClauseKind, HasChunks, HasSimdModifier);
+ OMPScheduleType OrderedSchedule =
+ getOpenMPOrderingScheduleType(BaseSchedule, HasOrderedClause);
+ OMPScheduleType Result = getOpenMPMonotonicityScheduleType(
+ OrderedSchedule, HasSimdModifier, HasMonotonicModifier,
+ HasNonmonotonicModifier, HasOrderedClause);
+
+ assert(isValidWorkshareLoopScheduleType(Result));
+ return Result;
+}
+
+/// Make \p Source branch to \p Target.
+///
+/// Handles two situations:
+/// * \p Source already has an unconditional branch.
+/// * \p Source is a degenerate block (no terminator because the BB is
+/// the current head of the IR construction).
+static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL) {
+ if (Instruction *Term = Source->getTerminator()) {
+ auto *Br = cast<BranchInst>(Term);
+ assert(!Br->isConditional() &&
+ "BB's terminator must be an unconditional branch (or degenerate)");
+ BasicBlock *Succ = Br->getSuccessor(0);
+ Succ->removePredecessor(Source, /*KeepOneInputPHIs=*/true);
+ Br->setSuccessor(0, Target);
+ return;
+ }
+
+ auto *NewBr = BranchInst::Create(Target, Source);
+ NewBr->setDebugLoc(DL);
+}
+
+void llvm::spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New,
+ bool CreateBranch) {
+ assert(New->getFirstInsertionPt() == New->begin() &&
+ "Target BB must not have PHI nodes");
+
+ // Move instructions to new block.
+ BasicBlock *Old = IP.getBlock();
+ New->getInstList().splice(New->begin(), Old->getInstList(), IP.getPoint(),
+ Old->end());
+
+ if (CreateBranch)
+ BranchInst::Create(New, Old);
+}
+
+void llvm::spliceBB(IRBuilder<> &Builder, BasicBlock *New, bool CreateBranch) {
+ DebugLoc DebugLoc = Builder.getCurrentDebugLocation();
+ BasicBlock *Old = Builder.GetInsertBlock();
+
+ spliceBB(Builder.saveIP(), New, CreateBranch);
+ if (CreateBranch)
+ Builder.SetInsertPoint(Old->getTerminator());
+ else
+ Builder.SetInsertPoint(Old);
+
+ // SetInsertPoint also updates the Builder's debug location, but we want to
+ // keep the one the Builder was configured to use.
+ Builder.SetCurrentDebugLocation(DebugLoc);
+}
+
+BasicBlock *llvm::splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch,
+ llvm::Twine Name) {
+ BasicBlock *Old = IP.getBlock();
+ BasicBlock *New = BasicBlock::Create(
+ Old->getContext(), Name.isTriviallyEmpty() ? Old->getName() : Name,
+ Old->getParent(), Old->getNextNode());
+ spliceBB(IP, New, CreateBranch);
+ New->replaceSuccessorsPhiUsesWith(Old, New);
+ return New;
+}
+
+BasicBlock *llvm::splitBB(IRBuilderBase &Builder, bool CreateBranch,
+ llvm::Twine Name) {
+ DebugLoc DebugLoc = Builder.getCurrentDebugLocation();
+ BasicBlock *New = splitBB(Builder.saveIP(), CreateBranch, Name);
+ if (CreateBranch)
+ Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
+ else
+ Builder.SetInsertPoint(Builder.GetInsertBlock());
+ // SetInsertPoint also updates the Builder's debug location, but we want to
+ // keep the one the Builder was configured to use.
+ Builder.SetCurrentDebugLocation(DebugLoc);
+ return New;
+}
+
+BasicBlock *llvm::splitBB(IRBuilder<> &Builder, bool CreateBranch,
+ llvm::Twine Name) {
+ DebugLoc DebugLoc = Builder.getCurrentDebugLocation();
+ BasicBlock *New = splitBB(Builder.saveIP(), CreateBranch, Name);
+ if (CreateBranch)
+ Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
+ else
+ Builder.SetInsertPoint(Builder.GetInsertBlock());
+ // SetInsertPoint also updates the Builder's debug location, but we want to
+ // keep the one the Builder was configured to use.
+ Builder.SetCurrentDebugLocation(DebugLoc);
+ return New;
+}
+
+BasicBlock *llvm::splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch,
+ llvm::Twine Suffix) {
+ BasicBlock *Old = Builder.GetInsertBlock();
+ return splitBB(Builder, CreateBranch, Old->getName() + Suffix);
+}
+
void OpenMPIRBuilder::addAttributes(omp::RuntimeFunction FnID, Function &Fn) {
LLVMContext &Ctx = Fn.getContext();
@@ -199,6 +451,7 @@ void OpenMPIRBuilder::finalize(Function *Fn) {
/* AssumptionCache */ nullptr,
/* AllowVarArgs */ true,
/* AllowAlloca */ true,
+ /* AllocaBlock*/ OI.OuterAllocaBB,
/* Suffix */ ".omp_par");
LLVM_DEBUG(dbgs() << "Before outlining: " << *OuterFn << "\n");
@@ -500,6 +753,44 @@ OpenMPIRBuilder::createCancel(const LocationDescription &Loc,
return Builder.saveIP();
}
+void OpenMPIRBuilder::emitOffloadingEntry(Constant *Addr, StringRef Name,
+ uint64_t Size, int32_t Flags,
+ StringRef SectionName) {
+ Type *Int8PtrTy = Type::getInt8PtrTy(M.getContext());
+ Type *Int32Ty = Type::getInt32Ty(M.getContext());
+ Type *SizeTy = M.getDataLayout().getIntPtrType(M.getContext());
+
+ Constant *AddrName = ConstantDataArray::getString(M.getContext(), Name);
+
+ // Create the constant string used to look up the symbol in the device.
+ auto *Str =
+ new llvm::GlobalVariable(M, AddrName->getType(), /*isConstant=*/true,
+ llvm::GlobalValue::InternalLinkage, AddrName,
+ ".omp_offloading.entry_name");
+ Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+
+ // Construct the offloading entry.
+ Constant *EntryData[] = {
+ ConstantExpr::getPointerBitCastOrAddrSpaceCast(Addr, Int8PtrTy),
+ ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, Int8PtrTy),
+ ConstantInt::get(SizeTy, Size),
+ ConstantInt::get(Int32Ty, Flags),
+ ConstantInt::get(Int32Ty, 0),
+ };
+ Constant *EntryInitializer =
+ ConstantStruct::get(OpenMPIRBuilder::OffloadEntry, EntryData);
+
+ auto *Entry = new GlobalVariable(
+ M, OpenMPIRBuilder::OffloadEntry,
+ /* isConstant = */ true, GlobalValue::WeakAnyLinkage, EntryInitializer,
+ ".omp_offloading.entry." + Name, nullptr, GlobalValue::NotThreadLocal,
+ M.getDataLayout().getDefaultGlobalsAddressSpace());
+
+ // The entry has to be created in the section the linker expects it to be.
+ Entry->setSection(SectionName);
+ Entry->setAlignment(Align(1));
+}
+
void OpenMPIRBuilder::emitCancelationCheckImpl(Value *CancelFlag,
omp::Directive CanceledDirective,
FinalizeCallbackTy ExitCB) {
@@ -670,7 +961,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
// Let the caller create the body.
assert(BodyGenCB && "Expected body generation callback!");
InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin());
- BodyGenCB(InnerAllocaIP, CodeGenIP, *PRegPreFiniBB);
+ BodyGenCB(InnerAllocaIP, CodeGenIP);
LLVM_DEBUG(dbgs() << "After body codegen: " << *OuterFn << "\n");
@@ -777,6 +1068,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator());
FiniCB(PreFiniIP);
+ OI.OuterAllocaBB = OuterAllocaBlock;
OI.EntryBB = PRegEntryBB;
OI.ExitBB = PRegExitBB;
@@ -800,6 +1092,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
/* AssumptionCache */ nullptr,
/* AllowVarArgs */ true,
/* AllowAlloca */ true,
+ /* AllocationBlock */ OuterAllocaBlock,
/* Suffix */ ".omp_par");
// Find inputs to, outputs from the code region.
@@ -960,10 +1253,185 @@ void OpenMPIRBuilder::createTaskyield(const LocationDescription &Loc) {
emitTaskyieldImpl(Loc);
}
+OpenMPIRBuilder::InsertPointTy
+OpenMPIRBuilder::createTask(const LocationDescription &Loc,
+ InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB,
+ bool Tied, Value *Final) {
+ if (!updateToLocation(Loc))
+ return InsertPointTy();
+
+ // The current basic block is split into four basic blocks. After outlining,
+ // they will be mapped as follows:
+ // ```
+ // def current_fn() {
+ // current_basic_block:
+ // br label %task.exit
+ // task.exit:
+ // ; instructions after task
+ // }
+ // def outlined_fn() {
+ // task.alloca:
+ // br label %task.body
+ // task.body:
+ // ret void
+ // }
+ // ```
+ BasicBlock *TaskExitBB = splitBB(Builder, /*CreateBranch=*/true, "task.exit");
+ BasicBlock *TaskBodyBB = splitBB(Builder, /*CreateBranch=*/true, "task.body");
+ BasicBlock *TaskAllocaBB =
+ splitBB(Builder, /*CreateBranch=*/true, "task.alloca");
+
+ OutlineInfo OI;
+ OI.EntryBB = TaskAllocaBB;
+ OI.OuterAllocaBB = AllocaIP.getBlock();
+ OI.ExitBB = TaskExitBB;
+ OI.PostOutlineCB = [this, &Loc, Tied, Final](Function &OutlinedFn) {
+ // The input IR here looks like the following-
+ // ```
+ // func @current_fn() {
+ // outlined_fn(%args)
+ // }
+ // func @outlined_fn(%args) { ... }
+ // ```
+ //
+ // This is changed to the following-
+ //
+ // ```
+ // func @current_fn() {
+ // runtime_call(..., wrapper_fn, ...)
+ // }
+ // func @wrapper_fn(..., %args) {
+ // outlined_fn(%args)
+ // }
+ // func @outlined_fn(%args) { ... }
+ // ```
+
+ // The stale call instruction will be replaced with a new call instruction
+ // for runtime call with a wrapper function.
+ assert(OutlinedFn.getNumUses() == 1 &&
+ "there must be a single user for the outlined function");
+ CallInst *StaleCI = cast<CallInst>(OutlinedFn.user_back());
+
+ // HasTaskData is true if any variables are captured in the outlined region,
+ // false otherwise.
+ bool HasTaskData = StaleCI->arg_size() > 0;
+ Builder.SetInsertPoint(StaleCI);
+
+ // Gather the arguments for emitting the runtime call for
+ // @__kmpc_omp_task_alloc
+ Function *TaskAllocFn =
+ getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc);
+
+ // Arguments - `loc_ref` (Ident) and `gtid` (ThreadID)
+ // call.
+ uint32_t SrcLocStrSize;
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
+ Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+ Value *ThreadID = getOrCreateThreadID(Ident);
+
+ // Argument - `flags`
+ // Task is tied iff (Flags & 1) == 1.
+ // Task is untied iff (Flags & 1) == 0.
+ // Task is final iff (Flags & 2) == 2.
+ // Task is not final iff (Flags & 2) == 0.
+ // TODO: Handle the other flags.
+ Value *Flags = Builder.getInt32(Tied);
+ if (Final) {
+ Value *FinalFlag =
+ Builder.CreateSelect(Final, Builder.getInt32(2), Builder.getInt32(0));
+ Flags = Builder.CreateOr(FinalFlag, Flags);
+ }
+
+ // Argument - `sizeof_kmp_task_t` (TaskSize)
+ // Tasksize refers to the size in bytes of kmp_task_t data structure
+ // including private vars accessed in task.
+ Value *TaskSize = Builder.getInt64(0);
+ if (HasTaskData) {
+ AllocaInst *ArgStructAlloca =
+ dyn_cast<AllocaInst>(StaleCI->getArgOperand(0));
+ assert(ArgStructAlloca &&
+ "Unable to find the alloca instruction corresponding to arguments "
+ "for extracted function");
+ StructType *ArgStructType =
+ dyn_cast<StructType>(ArgStructAlloca->getAllocatedType());
+ assert(ArgStructType && "Unable to find struct type corresponding to "
+ "arguments for extracted function");
+ TaskSize =
+ Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
+ }
+
+ // TODO: Argument - sizeof_shareds
+
+ // Argument - task_entry (the wrapper function)
+ // If the outlined function has some captured variables (i.e. HasTaskData is
+ // true), then the wrapper function will have an additional argument (the
+ // struct containing captured variables). Otherwise, no such argument will
+ // be present.
+ SmallVector<Type *> WrapperArgTys{Builder.getInt32Ty()};
+ if (HasTaskData)
+ WrapperArgTys.push_back(OutlinedFn.getArg(0)->getType());
+ FunctionCallee WrapperFuncVal = M.getOrInsertFunction(
+ (Twine(OutlinedFn.getName()) + ".wrapper").str(),
+ FunctionType::get(Builder.getInt32Ty(), WrapperArgTys, false));
+ Function *WrapperFunc = dyn_cast<Function>(WrapperFuncVal.getCallee());
+ PointerType *WrapperFuncBitcastType =
+ FunctionType::get(Builder.getInt32Ty(),
+ {Builder.getInt32Ty(), Builder.getInt8PtrTy()}, false)
+ ->getPointerTo();
+ Value *WrapperFuncBitcast =
+ ConstantExpr::getBitCast(WrapperFunc, WrapperFuncBitcastType);
+
+ // Emit the @__kmpc_omp_task_alloc runtime call
+ // The runtime call returns a pointer to an area where the task captured
+ // variables must be copied before the task is run (NewTaskData)
+ CallInst *NewTaskData = Builder.CreateCall(
+ TaskAllocFn,
+ {/*loc_ref=*/Ident, /*gtid=*/ThreadID, /*flags=*/Flags,
+ /*sizeof_task=*/TaskSize, /*sizeof_shared=*/Builder.getInt64(0),
+ /*task_func=*/WrapperFuncBitcast});
+
+ // Copy the arguments for outlined function
+ if (HasTaskData) {
+ Value *TaskData = StaleCI->getArgOperand(0);
+ Align Alignment = TaskData->getPointerAlignment(M.getDataLayout());
+ Builder.CreateMemCpy(NewTaskData, Alignment, TaskData, Alignment,
+ TaskSize);
+ }
+
+ // Emit the @__kmpc_omp_task runtime call to spawn the task
+ Function *TaskFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task);
+ Builder.CreateCall(TaskFn, {Ident, ThreadID, NewTaskData});
+
+ StaleCI->eraseFromParent();
+
+ // Emit the body for wrapper function
+ BasicBlock *WrapperEntryBB =
+ BasicBlock::Create(M.getContext(), "", WrapperFunc);
+ Builder.SetInsertPoint(WrapperEntryBB);
+ if (HasTaskData)
+ Builder.CreateCall(&OutlinedFn, {WrapperFunc->getArg(1)});
+ else
+ Builder.CreateCall(&OutlinedFn);
+ Builder.CreateRet(Builder.getInt32(0));
+ };
+
+ addOutlineInfo(std::move(OI));
+
+ InsertPointTy TaskAllocaIP =
+ InsertPointTy(TaskAllocaBB, TaskAllocaBB->begin());
+ InsertPointTy TaskBodyIP = InsertPointTy(TaskBodyBB, TaskBodyBB->begin());
+ BodyGenCB(TaskAllocaIP, TaskBodyIP);
+ Builder.SetInsertPoint(TaskExitBB);
+
+ return Builder.saveIP();
+}
+
OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections(
const LocationDescription &Loc, InsertPointTy AllocaIP,
ArrayRef<StorableBodyGenCallbackTy> SectionCBs, PrivatizeCallbackTy PrivCB,
FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait) {
+ assert(!isConflictIP(AllocaIP, Loc.IP) && "Dedicated IP allocas required");
+
if (!updateToLocation(Loc))
return Loc.IP;
@@ -1006,26 +1474,25 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections(
// section_loop.after:
// <FiniCB>;
auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, Value *IndVar) {
- auto *CurFn = CodeGenIP.getBlock()->getParent();
- auto *ForIncBB = CodeGenIP.getBlock()->getSingleSuccessor();
- auto *ForExitBB = CodeGenIP.getBlock()
- ->getSinglePredecessor()
- ->getTerminator()
- ->getSuccessor(1);
- SwitchInst *SwitchStmt = Builder.CreateSwitch(IndVar, ForIncBB);
Builder.restoreIP(CodeGenIP);
+ BasicBlock *Continue =
+ splitBBWithSuffix(Builder, /*CreateBranch=*/false, ".sections.after");
+ Function *CurFn = Continue->getParent();
+ SwitchInst *SwitchStmt = Builder.CreateSwitch(IndVar, Continue);
+
unsigned CaseNumber = 0;
for (auto SectionCB : SectionCBs) {
- auto *CaseBB = BasicBlock::Create(M.getContext(),
- "omp_section_loop.body.case", CurFn);
+ BasicBlock *CaseBB = BasicBlock::Create(
+ M.getContext(), "omp_section_loop.body.case", CurFn, Continue);
SwitchStmt->addCase(Builder.getInt32(CaseNumber), CaseBB);
Builder.SetInsertPoint(CaseBB);
- SectionCB(InsertPointTy(), Builder.saveIP(), *ForExitBB);
+ BranchInst *CaseEndBr = Builder.CreateBr(Continue);
+ SectionCB(InsertPointTy(),
+ {CaseEndBr->getParent(), CaseEndBr->getIterator()});
CaseNumber++;
}
// remove the existing terminator from body BB since there can be no
// terminators after switch/case
- CodeGenIP.getBlock()->getTerminator()->eraseFromParent();
};
// Loop body ends here
// LowerBound, UpperBound, and STride for createCanonicalLoop
@@ -1035,29 +1502,22 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections(
Value *ST = ConstantInt::get(I32Ty, 1);
llvm::CanonicalLoopInfo *LoopInfo = createCanonicalLoop(
Loc, LoopBodyGenCB, LB, UB, ST, true, false, AllocaIP, "section_loop");
- Builder.SetInsertPoint(AllocaIP.getBlock()->getTerminator());
- AllocaIP = Builder.saveIP();
InsertPointTy AfterIP =
applyStaticWorkshareLoop(Loc.DL, LoopInfo, AllocaIP, !IsNowait);
- BasicBlock *LoopAfterBB = AfterIP.getBlock();
- Instruction *SplitPos = LoopAfterBB->getTerminator();
- if (!isa_and_nonnull<BranchInst>(SplitPos))
- SplitPos = new UnreachableInst(Builder.getContext(), LoopAfterBB);
- // ExitBB after LoopAfterBB because LoopAfterBB is used for FinalizationCB,
- // which requires a BB with branch
- BasicBlock *ExitBB =
- LoopAfterBB->splitBasicBlock(SplitPos, "omp_sections.end");
- SplitPos->eraseFromParent();
// Apply the finalization callback in LoopAfterBB
auto FiniInfo = FinalizationStack.pop_back_val();
assert(FiniInfo.DK == OMPD_sections &&
"Unexpected finalization stack state!");
- Builder.SetInsertPoint(LoopAfterBB->getTerminator());
- FiniInfo.FiniCB(Builder.saveIP());
- Builder.SetInsertPoint(ExitBB);
+ if (FinalizeCallbackTy &CB = FiniInfo.FiniCB) {
+ Builder.restoreIP(AfterIP);
+ BasicBlock *FiniBB =
+ splitBBWithSuffix(Builder, /*CreateBranch=*/true, "sections.fini");
+ CB(Builder.saveIP());
+ AfterIP = {FiniBB, FiniBB->begin()};
+ }
- return Builder.saveIP();
+ return AfterIP;
}
OpenMPIRBuilder::InsertPointTy
@@ -1402,10 +1862,8 @@ OpenMPIRBuilder::createCanonicalLoop(const LocationDescription &Loc,
// Split the loop at the insertion point: Branch to the preheader and move
// every following instruction to after the loop (the After BB). Also, the
// new successor is the loop's after block.
+ spliceBB(Builder, After, /*CreateBranch=*/false);
Builder.CreateBr(CL->getPreheader());
- After->getInstList().splice(After->begin(), BB->getInstList(),
- Builder.GetInsertPoint(), BB->end());
- After->replaceSuccessorsPhiUsesWith(BB, After);
}
// Emit the body content. We do it after connecting the loop to the CFG to
@@ -1506,20 +1964,10 @@ static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M,
llvm_unreachable("unknown OpenMP loop iterator bitwidth");
}
-// Sets the number of loop iterations to the given value. This value must be
-// valid in the condition block (i.e., defined in the preheader) and is
-// interpreted as an unsigned integer.
-void setCanonicalLoopTripCount(CanonicalLoopInfo *CLI, Value *TripCount) {
- Instruction *CmpI = &CLI->getCond()->front();
- assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
- CmpI->setOperand(1, TripCount);
- CLI->assertOK();
-}
-
OpenMPIRBuilder::InsertPointTy
OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
InsertPointTy AllocaIP,
- bool NeedsBarrier, Value *Chunk) {
+ bool NeedsBarrier) {
assert(CLI->isValid() && "Requires a valid canonical loop");
assert(!isConflictIP(AllocaIP, CLI->getPreheaderIP()) &&
"Require dedicated allocate IP");
@@ -1559,38 +2007,31 @@ OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
Builder.CreateStore(UpperBound, PUpperBound);
Builder.CreateStore(One, PStride);
- // FIXME: schedule(static) is NOT the same as schedule(static,1)
- if (!Chunk)
- Chunk = One;
-
Value *ThreadNum = getOrCreateThreadID(SrcLoc);
- Constant *SchedulingType =
- ConstantInt::get(I32Type, static_cast<int>(OMPScheduleType::Static));
+ Constant *SchedulingType = ConstantInt::get(
+ I32Type, static_cast<int>(OMPScheduleType::UnorderedStatic));
// Call the "init" function and update the trip count of the loop with the
// value it produced.
Builder.CreateCall(StaticInit,
{SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound,
- PUpperBound, PStride, One, Chunk});
+ PUpperBound, PStride, One, Zero});
Value *LowerBound = Builder.CreateLoad(IVTy, PLowerBound);
Value *InclusiveUpperBound = Builder.CreateLoad(IVTy, PUpperBound);
Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound);
Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One);
- setCanonicalLoopTripCount(CLI, TripCount);
+ CLI->setTripCount(TripCount);
// Update all uses of the induction variable except the one in the condition
// block that compares it with the actual upper bound, and the increment in
// the latch block.
- // TODO: this can eventually move to CanonicalLoopInfo or to a new
- // CanonicalLoopInfoUpdater interface.
- Builder.SetInsertPoint(CLI->getBody(), CLI->getBody()->getFirstInsertionPt());
- Value *UpdatedIV = Builder.CreateAdd(IV, LowerBound);
- IV->replaceUsesWithIf(UpdatedIV, [&](Use &U) {
- auto *Instr = dyn_cast<Instruction>(U.getUser());
- return !Instr ||
- (Instr->getParent() != CLI->getCond() &&
- Instr->getParent() != CLI->getLatch() && Instr != UpdatedIV);
+
+ CLI->mapIndVar([&](Instruction *OldIV) -> Value * {
+ Builder.SetInsertPoint(CLI->getBody(),
+ CLI->getBody()->getFirstInsertionPt());
+ Builder.SetCurrentDebugLocation(DL);
+ return Builder.CreateAdd(OldIV, LowerBound);
});
// In the "exit" block, call the "fini" function.
@@ -1610,11 +2051,198 @@ OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
return AfterIP;
}
-OpenMPIRBuilder::InsertPointTy
-OpenMPIRBuilder::applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
- InsertPointTy AllocaIP, bool NeedsBarrier) {
- // Currently only supports static schedules.
- return applyStaticWorkshareLoop(DL, CLI, AllocaIP, NeedsBarrier);
+OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
+ DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
+ bool NeedsBarrier, Value *ChunkSize) {
+ assert(CLI->isValid() && "Requires a valid canonical loop");
+ assert(ChunkSize && "Chunk size is required");
+
+ LLVMContext &Ctx = CLI->getFunction()->getContext();
+ Value *IV = CLI->getIndVar();
+ Value *OrigTripCount = CLI->getTripCount();
+ Type *IVTy = IV->getType();
+ assert(IVTy->getIntegerBitWidth() <= 64 &&
+ "Max supported tripcount bitwidth is 64 bits");
+ Type *InternalIVTy = IVTy->getIntegerBitWidth() <= 32 ? Type::getInt32Ty(Ctx)
+ : Type::getInt64Ty(Ctx);
+ Type *I32Type = Type::getInt32Ty(M.getContext());
+ Constant *Zero = ConstantInt::get(InternalIVTy, 0);
+ Constant *One = ConstantInt::get(InternalIVTy, 1);
+
+ // Declare useful OpenMP runtime functions.
+ FunctionCallee StaticInit =
+ getKmpcForStaticInitForType(InternalIVTy, M, *this);
+ FunctionCallee StaticFini =
+ getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
+
+ // Allocate space for computed loop bounds as expected by the "init" function.
+ Builder.restoreIP(AllocaIP);
+ Builder.SetCurrentDebugLocation(DL);
+ Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter");
+ Value *PLowerBound =
+ Builder.CreateAlloca(InternalIVTy, nullptr, "p.lowerbound");
+ Value *PUpperBound =
+ Builder.CreateAlloca(InternalIVTy, nullptr, "p.upperbound");
+ Value *PStride = Builder.CreateAlloca(InternalIVTy, nullptr, "p.stride");
+
+ // Set up the source location value for the OpenMP runtime.
+ Builder.restoreIP(CLI->getPreheaderIP());
+ Builder.SetCurrentDebugLocation(DL);
+
+ // TODO: Detect overflow in ubsan or max-out with current tripcount.
+ Value *CastedChunkSize =
+ Builder.CreateZExtOrTrunc(ChunkSize, InternalIVTy, "chunksize");
+ Value *CastedTripCount =
+ Builder.CreateZExt(OrigTripCount, InternalIVTy, "tripcount");
+
+ Constant *SchedulingType = ConstantInt::get(
+ I32Type, static_cast<int>(OMPScheduleType::UnorderedStaticChunked));
+ Builder.CreateStore(Zero, PLowerBound);
+ Value *OrigUpperBound = Builder.CreateSub(CastedTripCount, One);
+ Builder.CreateStore(OrigUpperBound, PUpperBound);
+ Builder.CreateStore(One, PStride);
+
+ // Call the "init" function and update the trip count of the loop with the
+ // value it produced.
+ uint32_t SrcLocStrSize;
+ Constant *SrcLocStr = getOrCreateSrcLocStr(DL, SrcLocStrSize);
+ Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+ Value *ThreadNum = getOrCreateThreadID(SrcLoc);
+ Builder.CreateCall(StaticInit,
+ {/*loc=*/SrcLoc, /*global_tid=*/ThreadNum,
+ /*schedtype=*/SchedulingType, /*plastiter=*/PLastIter,
+ /*plower=*/PLowerBound, /*pupper=*/PUpperBound,
+ /*pstride=*/PStride, /*incr=*/One,
+ /*chunk=*/CastedChunkSize});
+
+ // Load values written by the "init" function.
+ Value *FirstChunkStart =
+ Builder.CreateLoad(InternalIVTy, PLowerBound, "omp_firstchunk.lb");
+ Value *FirstChunkStop =
+ Builder.CreateLoad(InternalIVTy, PUpperBound, "omp_firstchunk.ub");
+ Value *FirstChunkEnd = Builder.CreateAdd(FirstChunkStop, One);
+ Value *ChunkRange =
+ Builder.CreateSub(FirstChunkEnd, FirstChunkStart, "omp_chunk.range");
+ Value *NextChunkStride =
+ Builder.CreateLoad(InternalIVTy, PStride, "omp_dispatch.stride");
+
+ // Create outer "dispatch" loop for enumerating the chunks.
+ BasicBlock *DispatchEnter = splitBB(Builder, true);
+ Value *DispatchCounter;
+ CanonicalLoopInfo *DispatchCLI = createCanonicalLoop(
+ {Builder.saveIP(), DL},
+ [&](InsertPointTy BodyIP, Value *Counter) { DispatchCounter = Counter; },
+ FirstChunkStart, CastedTripCount, NextChunkStride,
+ /*IsSigned=*/false, /*InclusiveStop=*/false, /*ComputeIP=*/{},
+ "dispatch");
+
+ // Remember the BasicBlocks of the dispatch loop we need, then invalidate to
+ // not have to preserve the canonical invariant.
+ BasicBlock *DispatchBody = DispatchCLI->getBody();
+ BasicBlock *DispatchLatch = DispatchCLI->getLatch();
+ BasicBlock *DispatchExit = DispatchCLI->getExit();
+ BasicBlock *DispatchAfter = DispatchCLI->getAfter();
+ DispatchCLI->invalidate();
+
+ // Rewire the original loop to become the chunk loop inside the dispatch loop.
+ redirectTo(DispatchAfter, CLI->getAfter(), DL);
+ redirectTo(CLI->getExit(), DispatchLatch, DL);
+ redirectTo(DispatchBody, DispatchEnter, DL);
+
+ // Prepare the prolog of the chunk loop.
+ Builder.restoreIP(CLI->getPreheaderIP());
+ Builder.SetCurrentDebugLocation(DL);
+
+ // Compute the number of iterations of the chunk loop.
+ Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
+ Value *ChunkEnd = Builder.CreateAdd(DispatchCounter, ChunkRange);
+ Value *IsLastChunk =
+ Builder.CreateICmpUGE(ChunkEnd, CastedTripCount, "omp_chunk.is_last");
+ Value *CountUntilOrigTripCount =
+ Builder.CreateSub(CastedTripCount, DispatchCounter);
+ Value *ChunkTripCount = Builder.CreateSelect(
+ IsLastChunk, CountUntilOrigTripCount, ChunkRange, "omp_chunk.tripcount");
+ Value *BackcastedChunkTC =
+ Builder.CreateTrunc(ChunkTripCount, IVTy, "omp_chunk.tripcount.trunc");
+ CLI->setTripCount(BackcastedChunkTC);
+
+ // Update all uses of the induction variable except the one in the condition
+ // block that compares it with the actual upper bound, and the increment in
+ // the latch block.
+ Value *BackcastedDispatchCounter =
+ Builder.CreateTrunc(DispatchCounter, IVTy, "omp_dispatch.iv.trunc");
+ CLI->mapIndVar([&](Instruction *) -> Value * {
+ Builder.restoreIP(CLI->getBodyIP());
+ return Builder.CreateAdd(IV, BackcastedDispatchCounter);
+ });
+
+ // In the "exit" block, call the "fini" function.
+ Builder.SetInsertPoint(DispatchExit, DispatchExit->getFirstInsertionPt());
+ Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum});
+
+ // Add the barrier if requested.
+ if (NeedsBarrier)
+ createBarrier(LocationDescription(Builder.saveIP(), DL), OMPD_for,
+ /*ForceSimpleCall=*/false, /*CheckCancelFlag=*/false);
+
+#ifndef NDEBUG
+ // Even though we currently do not support applying additional methods to it,
+ // the chunk loop should remain a canonical loop.
+ CLI->assertOK();
+#endif
+
+ return {DispatchAfter, DispatchAfter->getFirstInsertionPt()};
+}
+
+OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyWorkshareLoop(
+ DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
+ bool NeedsBarrier, llvm::omp::ScheduleKind SchedKind,
+ llvm::Value *ChunkSize, bool HasSimdModifier, bool HasMonotonicModifier,
+ bool HasNonmonotonicModifier, bool HasOrderedClause) {
+ OMPScheduleType EffectiveScheduleType = computeOpenMPScheduleType(
+ SchedKind, ChunkSize, HasSimdModifier, HasMonotonicModifier,
+ HasNonmonotonicModifier, HasOrderedClause);
+
+ bool IsOrdered = (EffectiveScheduleType & OMPScheduleType::ModifierOrdered) ==
+ OMPScheduleType::ModifierOrdered;
+ switch (EffectiveScheduleType & ~OMPScheduleType::ModifierMask) {
+ case OMPScheduleType::BaseStatic:
+ assert(!ChunkSize && "No chunk size with static-chunked schedule");
+ if (IsOrdered)
+ return applyDynamicWorkshareLoop(DL, CLI, AllocaIP, EffectiveScheduleType,
+ NeedsBarrier, ChunkSize);
+ // FIXME: Monotonicity ignored?
+ return applyStaticWorkshareLoop(DL, CLI, AllocaIP, NeedsBarrier);
+
+ case OMPScheduleType::BaseStaticChunked:
+ if (IsOrdered)
+ return applyDynamicWorkshareLoop(DL, CLI, AllocaIP, EffectiveScheduleType,
+ NeedsBarrier, ChunkSize);
+ // FIXME: Monotonicity ignored?
+ return applyStaticChunkedWorkshareLoop(DL, CLI, AllocaIP, NeedsBarrier,
+ ChunkSize);
+
+ case OMPScheduleType::BaseRuntime:
+ case OMPScheduleType::BaseAuto:
+ case OMPScheduleType::BaseGreedy:
+ case OMPScheduleType::BaseBalanced:
+ case OMPScheduleType::BaseSteal:
+ case OMPScheduleType::BaseGuidedSimd:
+ case OMPScheduleType::BaseRuntimeSimd:
+ assert(!ChunkSize &&
+ "schedule type does not support user-defined chunk sizes");
+ LLVM_FALLTHROUGH;
+ case OMPScheduleType::BaseDynamicChunked:
+ case OMPScheduleType::BaseGuidedChunked:
+ case OMPScheduleType::BaseGuidedIterativeChunked:
+ case OMPScheduleType::BaseGuidedAnalyticalChunked:
+ case OMPScheduleType::BaseStaticBalancedChunked:
+ return applyDynamicWorkshareLoop(DL, CLI, AllocaIP, EffectiveScheduleType,
+ NeedsBarrier, ChunkSize);
+
+ default:
+ llvm_unreachable("Unknown/unimplemented schedule kind");
+ }
}
/// Returns an LLVM function to call for initializing loop bounds using OpenMP
@@ -1649,12 +2277,32 @@ getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder) {
llvm_unreachable("unknown OpenMP loop iterator bitwidth");
}
+/// Returns an LLVM function to call for finalizing the dynamic loop using
+/// depending on `type`. Only i32 and i64 are supported by the runtime. Always
+/// interpret integers as unsigned similarly to CanonicalLoopInfo.
+static FunctionCallee
+getKmpcForDynamicFiniForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder) {
+ unsigned Bitwidth = Ty->getIntegerBitWidth();
+ if (Bitwidth == 32)
+ return OMPBuilder.getOrCreateRuntimeFunction(
+ M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_4u);
+ if (Bitwidth == 64)
+ return OMPBuilder.getOrCreateRuntimeFunction(
+ M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_8u);
+ llvm_unreachable("unknown OpenMP loop iterator bitwidth");
+}
+
OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyDynamicWorkshareLoop(
DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
OMPScheduleType SchedType, bool NeedsBarrier, Value *Chunk) {
assert(CLI->isValid() && "Requires a valid canonical loop");
assert(!isConflictIP(AllocaIP, CLI->getPreheaderIP()) &&
"Require dedicated allocate IP");
+ assert(isValidWorkshareLoopScheduleType(SchedType) &&
+ "Require valid schedule type");
+
+ bool Ordered = (SchedType & OMPScheduleType::ModifierOrdered) ==
+ OMPScheduleType::ModifierOrdered;
// Set up the source location value for OpenMP runtime.
Builder.SetCurrentDebugLocation(DL);
@@ -1692,6 +2340,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyDynamicWorkshareLoop(
BasicBlock *Header = CLI->getHeader();
BasicBlock *Exit = CLI->getExit();
BasicBlock *Cond = CLI->getCond();
+ BasicBlock *Latch = CLI->getLatch();
InsertPointTy AfterIP = CLI->getAfterIP();
// The CLI will be "broken" in the code below, as the loop is no longer
@@ -1751,6 +2400,13 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyDynamicWorkshareLoop(
assert(BI->getSuccessor(1) == Exit);
BI->setSuccessor(1, OuterCond);
+ // Call the "fini" function if "ordered" is present in wsloop directive.
+ if (Ordered) {
+ Builder.SetInsertPoint(&Latch->back());
+ FunctionCallee DynamicFini = getKmpcForDynamicFiniForType(IVTy, M, *this);
+ Builder.CreateCall(DynamicFini, {SrcLoc, ThreadNum});
+ }
+
// Add the barrier if requested.
if (NeedsBarrier) {
Builder.SetInsertPoint(&Exit->back());
@@ -1763,27 +2419,6 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyDynamicWorkshareLoop(
return AfterIP;
}
-/// Make \p Source branch to \p Target.
-///
-/// Handles two situations:
-/// * \p Source already has an unconditional branch.
-/// * \p Source is a degenerate block (no terminator because the BB is
-/// the current head of the IR construction).
-static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL) {
- if (Instruction *Term = Source->getTerminator()) {
- auto *Br = cast<BranchInst>(Term);
- assert(!Br->isConditional() &&
- "BB's terminator must be an unconditional branch (or degenerate)");
- BasicBlock *Succ = Br->getSuccessor(0);
- Succ->removePredecessor(Source, /*KeepOneInputPHIs=*/true);
- Br->setSuccessor(0, Target);
- return;
- }
-
- auto *NewBr = BranchInst::Create(Target, Source);
- NewBr->setDebugLoc(DL);
-}
-
/// Redirect all edges that branch to \p OldTarget to \p NewTarget. That is,
/// after this \p OldTarget will be orphaned.
static void redirectAllPredecessorsTo(BasicBlock *OldTarget,
@@ -2385,16 +3020,17 @@ static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI) {
unsigned NumInlineCandidates;
bool NotDuplicatable;
bool Convergent;
- unsigned LoopSize =
+ InstructionCost LoopSizeIC =
ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
TTI, EphValues, UP.BEInsns);
- LLVM_DEBUG(dbgs() << "Estimated loop size is " << LoopSize << "\n");
+ LLVM_DEBUG(dbgs() << "Estimated loop size is " << LoopSizeIC << "\n");
// Loop is not unrollable if the loop contains certain instructions.
- if (NotDuplicatable || Convergent) {
+ if (NotDuplicatable || Convergent || !LoopSizeIC.isValid()) {
LLVM_DEBUG(dbgs() << "Loop not considered unrollable\n");
return 1;
}
+ unsigned LoopSize = *LoopSizeIC.getValue();
// TODO: Determine trip count of \p CLI if constant, computeUnrollCount might
// be able to use it.
@@ -2506,10 +3142,9 @@ OpenMPIRBuilder::createCopyPrivate(const LocationDescription &Loc,
return Builder.saveIP();
}
-OpenMPIRBuilder::InsertPointTy
-OpenMPIRBuilder::createSingle(const LocationDescription &Loc,
- BodyGenCallbackTy BodyGenCB,
- FinalizeCallbackTy FiniCB, llvm::Value *DidIt) {
+OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSingle(
+ const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
+ FinalizeCallbackTy FiniCB, bool IsNowait, llvm::Value *DidIt) {
if (!updateToLocation(Loc))
return Loc.IP;
@@ -2537,9 +3172,16 @@ OpenMPIRBuilder::createSingle(const LocationDescription &Loc,
// .... single region ...
// __kmpc_end_single
// }
-
- return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
- /*Conditional*/ true, /*hasFinalize*/ true);
+ // __kmpc_barrier
+
+ EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
+ /*Conditional*/ true,
+ /*hasFinalize*/ true);
+ if (!IsNowait)
+ createBarrier(LocationDescription(Builder.saveIP(), Loc.DL),
+ omp::Directive::OMPD_unknown, /* ForceSimpleCall */ false,
+ /* CheckCancelFlag */ false);
+ return Builder.saveIP();
}
OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCritical(
@@ -2674,48 +3316,28 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion(
// generate body
BodyGenCB(/* AllocaIP */ InsertPointTy(),
- /* CodeGenIP */ Builder.saveIP(), *FiniBB);
-
- // If we didn't emit a branch to FiniBB during body generation, it means
- // FiniBB is unreachable (e.g. while(1);). stop generating all the
- // unreachable blocks, and remove anything we are not going to use.
- auto SkipEmittingRegion = FiniBB->hasNPredecessors(0);
- if (SkipEmittingRegion) {
- FiniBB->eraseFromParent();
- ExitCall->eraseFromParent();
- // Discard finalization if we have it.
- if (HasFinalize) {
- assert(!FinalizationStack.empty() &&
- "Unexpected finalization stack state!");
- FinalizationStack.pop_back();
- }
- } else {
- // emit exit call and do any needed finalization.
- auto FinIP = InsertPointTy(FiniBB, FiniBB->getFirstInsertionPt());
- assert(FiniBB->getTerminator()->getNumSuccessors() == 1 &&
- FiniBB->getTerminator()->getSuccessor(0) == ExitBB &&
- "Unexpected control flow graph state!!");
- emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
- assert(FiniBB->getUniquePredecessor()->getUniqueSuccessor() == FiniBB &&
- "Unexpected Control Flow State!");
- MergeBlockIntoPredecessor(FiniBB);
- }
+ /* CodeGenIP */ Builder.saveIP());
+
+ // emit exit call and do any needed finalization.
+ auto FinIP = InsertPointTy(FiniBB, FiniBB->getFirstInsertionPt());
+ assert(FiniBB->getTerminator()->getNumSuccessors() == 1 &&
+ FiniBB->getTerminator()->getSuccessor(0) == ExitBB &&
+ "Unexpected control flow graph state!!");
+ emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
+ assert(FiniBB->getUniquePredecessor()->getUniqueSuccessor() == FiniBB &&
+ "Unexpected Control Flow State!");
+ MergeBlockIntoPredecessor(FiniBB);
// If we are skipping the region of a non conditional, remove the exit
// block, and clear the builder's insertion point.
assert(SplitPos->getParent() == ExitBB &&
"Unexpected Insertion point location!");
- if (!Conditional && SkipEmittingRegion) {
- ExitBB->eraseFromParent();
- Builder.ClearInsertionPoint();
- } else {
- auto merged = MergeBlockIntoPredecessor(ExitBB);
- BasicBlock *ExitPredBB = SplitPos->getParent();
- auto InsertBB = merged ? ExitPredBB : ExitBB;
- if (!isa_and_nonnull<BranchInst>(SplitPos))
- SplitPos->eraseFromParent();
- Builder.SetInsertPoint(InsertBB);
- }
+ auto merged = MergeBlockIntoPredecessor(ExitBB);
+ BasicBlock *ExitPredBB = SplitPos->getParent();
+ auto InsertBB = merged ? ExitPredBB : ExitBB;
+ if (!isa_and_nonnull<BranchInst>(SplitPos))
+ SplitPos->eraseFromParent();
+ Builder.SetInsertPoint(InsertBB);
return Builder.saveIP();
}
@@ -3171,6 +3793,7 @@ bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
}
break;
case Write:
+ case Compare:
case Update:
if (AO == AtomicOrdering::Release || AO == AtomicOrdering::AcquireRelease ||
AO == AtomicOrdering::SequentiallyConsistent) {
@@ -3290,9 +3913,10 @@ OpenMPIRBuilder::createAtomicWrite(const LocationDescription &Loc,
}
OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicUpdate(
- const LocationDescription &Loc, Instruction *AllocIP, AtomicOpValue &X,
+ const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X,
Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr) {
+ assert(!isConflictIP(Loc.IP, AllocaIP) && "IPs must not be ambiguous");
if (!updateToLocation(Loc))
return Loc.IP;
@@ -3309,7 +3933,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicUpdate(
"OpenMP atomic does not support LT or GT operations");
});
- emitAtomicUpdate(AllocIP, X.Var, X.ElemTy, Expr, AO, RMWOp, UpdateOp,
+ emitAtomicUpdate(AllocaIP, X.Var, X.ElemTy, Expr, AO, RMWOp, UpdateOp,
X.IsVolatile, IsXBinopExpr);
checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Update);
return Builder.saveIP();
@@ -3344,20 +3968,39 @@ Value *OpenMPIRBuilder::emitRMWOpAsInstruction(Value *Src1, Value *Src2,
}
std::pair<Value *, Value *> OpenMPIRBuilder::emitAtomicUpdate(
- Instruction *AllocIP, Value *X, Type *XElemTy, Value *Expr,
+ InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr,
AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
AtomicUpdateCallbackTy &UpdateOp, bool VolatileX, bool IsXBinopExpr) {
- bool DoCmpExch =
- ((RMWOp == AtomicRMWInst::BAD_BINOP) || (RMWOp == AtomicRMWInst::FAdd)) ||
- (RMWOp == AtomicRMWInst::FSub) ||
- (RMWOp == AtomicRMWInst::Sub && !IsXBinopExpr);
+ // TODO: handle the case where XElemTy is not byte-sized or not a power of 2
+ // or a complex datatype.
+ bool emitRMWOp = false;
+ switch (RMWOp) {
+ case AtomicRMWInst::Add:
+ case AtomicRMWInst::And:
+ case AtomicRMWInst::Nand:
+ case AtomicRMWInst::Or:
+ case AtomicRMWInst::Xor:
+ case AtomicRMWInst::Xchg:
+ emitRMWOp = XElemTy;
+ break;
+ case AtomicRMWInst::Sub:
+ emitRMWOp = (IsXBinopExpr && XElemTy);
+ break;
+ default:
+ emitRMWOp = false;
+ }
+ emitRMWOp &= XElemTy->isIntegerTy();
std::pair<Value *, Value *> Res;
- if (XElemTy->isIntegerTy() && !DoCmpExch) {
+ if (emitRMWOp) {
Res.first = Builder.CreateAtomicRMW(RMWOp, X, Expr, llvm::MaybeAlign(), AO);
// not needed except in case of postfix captures. Generate anyway for
// consistency with the else part. Will be removed with any DCE pass.
- Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
+ // AtomicRMWInst::Xchg does not have a coressponding instruction.
+ if (RMWOp == AtomicRMWInst::Xchg)
+ Res.second = Res.first;
+ else
+ Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
} else {
unsigned Addrspace = cast<PointerType>(X->getType())->getAddressSpace();
IntegerType *IntCastTy =
@@ -3380,12 +4023,12 @@ std::pair<Value *, Value *> OpenMPIRBuilder::emitAtomicUpdate(
BasicBlock *ContBB = CurBB->splitBasicBlock(CurBB->getTerminator(),
X->getName() + ".atomic.cont");
ContBB->getTerminator()->eraseFromParent();
+ Builder.restoreIP(AllocaIP);
+ AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy);
+ NewAtomicAddr->setName(X->getName() + "x.new.val");
Builder.SetInsertPoint(ContBB);
llvm::PHINode *PHI = Builder.CreatePHI(OldVal->getType(), 2);
PHI->addIncoming(OldVal, CurBB);
- AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy);
- NewAtomicAddr->setName(X->getName() + "x.new.val");
- NewAtomicAddr->moveBefore(AllocIP);
IntegerType *NewAtomicCastTy =
IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
bool IsIntTy = XElemTy->isIntegerTy();
@@ -3407,7 +4050,7 @@ std::pair<Value *, Value *> OpenMPIRBuilder::emitAtomicUpdate(
Value *Upd = UpdateOp(OldExprVal, Builder);
Builder.CreateStore(Upd, NewAtomicAddr);
- LoadInst *DesiredVal = Builder.CreateLoad(XElemTy, NewAtomicIntAddr);
+ LoadInst *DesiredVal = Builder.CreateLoad(IntCastTy, NewAtomicIntAddr);
Value *XAddr =
(IsIntTy)
? X
@@ -3415,7 +4058,7 @@ std::pair<Value *, Value *> OpenMPIRBuilder::emitAtomicUpdate(
AtomicOrdering Failure =
llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO);
AtomicCmpXchgInst *Result = Builder.CreateAtomicCmpXchg(
- XAddr, OldExprVal, DesiredVal, llvm::MaybeAlign(), AO, Failure);
+ XAddr, PHI, DesiredVal, llvm::MaybeAlign(), AO, Failure);
Result->setVolatile(VolatileX);
Value *PreviousVal = Builder.CreateExtractValue(Result, /*Idxs=*/0);
Value *SuccessFailureVal = Builder.CreateExtractValue(Result, /*Idxs=*/1);
@@ -3439,7 +4082,7 @@ std::pair<Value *, Value *> OpenMPIRBuilder::emitAtomicUpdate(
}
OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCapture(
- const LocationDescription &Loc, Instruction *AllocIP, AtomicOpValue &X,
+ const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X,
AtomicOpValue &V, Value *Expr, AtomicOrdering AO,
AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp,
bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr) {
@@ -3450,7 +4093,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCapture(
Type *XTy = X.Var->getType();
assert(XTy->isPointerTy() &&
"OMP Atomic expects a pointer to target memory");
- Type *XElemTy = XTy->getPointerElementType();
+ Type *XElemTy = X.ElemTy;
assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
XElemTy->isPointerTy()) &&
"OMP atomic capture expected a scalar type");
@@ -3462,7 +4105,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCapture(
// 'x' is simply atomically rewritten with 'expr'.
AtomicRMWInst::BinOp AtomicOp = (UpdateExpr ? RMWOp : AtomicRMWInst::Xchg);
std::pair<Value *, Value *> Result =
- emitAtomicUpdate(AllocIP, X.Var, X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
+ emitAtomicUpdate(AllocaIP, X.Var, X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
X.IsVolatile, IsXBinopExpr);
Value *CapturedVal = (IsPostfixUpdate ? Result.first : Result.second);
@@ -3472,6 +4115,155 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCapture(
return Builder.saveIP();
}
+OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
+ const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V,
+ AtomicOpValue &R, Value *E, Value *D, AtomicOrdering AO,
+ omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate,
+ bool IsFailOnly) {
+
+ if (!updateToLocation(Loc))
+ return Loc.IP;
+
+ assert(X.Var->getType()->isPointerTy() &&
+ "OMP atomic expects a pointer to target memory");
+ assert((X.ElemTy->isIntegerTy() || X.ElemTy->isPointerTy()) &&
+ "OMP atomic compare expected a integer scalar type");
+ // compare capture
+ if (V.Var) {
+ assert(V.Var->getType()->isPointerTy() && "v.var must be of pointer type");
+ assert(V.ElemTy == X.ElemTy && "x and v must be of same type");
+ }
+
+ if (Op == OMPAtomicCompareOp::EQ) {
+ AtomicOrdering Failure = AtomicCmpXchgInst::getStrongestFailureOrdering(AO);
+ AtomicCmpXchgInst *Result =
+ Builder.CreateAtomicCmpXchg(X.Var, E, D, MaybeAlign(), AO, Failure);
+ if (V.Var) {
+ Value *OldValue = Builder.CreateExtractValue(Result, /*Idxs=*/0);
+ assert(OldValue->getType() == V.ElemTy &&
+ "OldValue and V must be of same type");
+ if (IsPostfixUpdate) {
+ Builder.CreateStore(OldValue, V.Var, V.IsVolatile);
+ } else {
+ Value *SuccessOrFail = Builder.CreateExtractValue(Result, /*Idxs=*/1);
+ if (IsFailOnly) {
+ // CurBB----
+ // | |
+ // v |
+ // ContBB |
+ // | |
+ // v |
+ // ExitBB <-
+ //
+ // where ContBB only contains the store of old value to 'v'.
+ BasicBlock *CurBB = Builder.GetInsertBlock();
+ Instruction *CurBBTI = CurBB->getTerminator();
+ CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
+ BasicBlock *ExitBB = CurBB->splitBasicBlock(
+ CurBBTI, X.Var->getName() + ".atomic.exit");
+ BasicBlock *ContBB = CurBB->splitBasicBlock(
+ CurBB->getTerminator(), X.Var->getName() + ".atomic.cont");
+ ContBB->getTerminator()->eraseFromParent();
+ CurBB->getTerminator()->eraseFromParent();
+
+ Builder.CreateCondBr(SuccessOrFail, ExitBB, ContBB);
+
+ Builder.SetInsertPoint(ContBB);
+ Builder.CreateStore(OldValue, V.Var);
+ Builder.CreateBr(ExitBB);
+
+ if (UnreachableInst *ExitTI =
+ dyn_cast<UnreachableInst>(ExitBB->getTerminator())) {
+ CurBBTI->eraseFromParent();
+ Builder.SetInsertPoint(ExitBB);
+ } else {
+ Builder.SetInsertPoint(ExitTI);
+ }
+ } else {
+ Value *CapturedValue =
+ Builder.CreateSelect(SuccessOrFail, E, OldValue);
+ Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
+ }
+ }
+ }
+ // The comparison result has to be stored.
+ if (R.Var) {
+ assert(R.Var->getType()->isPointerTy() &&
+ "r.var must be of pointer type");
+ assert(R.ElemTy->isIntegerTy() && "r must be of integral type");
+
+ Value *SuccessFailureVal = Builder.CreateExtractValue(Result, /*Idxs=*/1);
+ Value *ResultCast = R.IsSigned
+ ? Builder.CreateSExt(SuccessFailureVal, R.ElemTy)
+ : Builder.CreateZExt(SuccessFailureVal, R.ElemTy);
+ Builder.CreateStore(ResultCast, R.Var, R.IsVolatile);
+ }
+ } else {
+ assert((Op == OMPAtomicCompareOp::MAX || Op == OMPAtomicCompareOp::MIN) &&
+ "Op should be either max or min at this point");
+ assert(!IsFailOnly && "IsFailOnly is only valid when the comparison is ==");
+
+ // Reverse the ordop as the OpenMP forms are different from LLVM forms.
+ // Let's take max as example.
+ // OpenMP form:
+ // x = x > expr ? expr : x;
+ // LLVM form:
+ // *ptr = *ptr > val ? *ptr : val;
+ // We need to transform to LLVM form.
+ // x = x <= expr ? x : expr;
+ AtomicRMWInst::BinOp NewOp;
+ if (IsXBinopExpr) {
+ if (X.IsSigned)
+ NewOp = Op == OMPAtomicCompareOp::MAX ? AtomicRMWInst::Min
+ : AtomicRMWInst::Max;
+ else
+ NewOp = Op == OMPAtomicCompareOp::MAX ? AtomicRMWInst::UMin
+ : AtomicRMWInst::UMax;
+ } else {
+ if (X.IsSigned)
+ NewOp = Op == OMPAtomicCompareOp::MAX ? AtomicRMWInst::Max
+ : AtomicRMWInst::Min;
+ else
+ NewOp = Op == OMPAtomicCompareOp::MAX ? AtomicRMWInst::UMax
+ : AtomicRMWInst::UMin;
+ }
+
+ AtomicRMWInst *OldValue =
+ Builder.CreateAtomicRMW(NewOp, X.Var, E, MaybeAlign(), AO);
+ if (V.Var) {
+ Value *CapturedValue = nullptr;
+ if (IsPostfixUpdate) {
+ CapturedValue = OldValue;
+ } else {
+ CmpInst::Predicate Pred;
+ switch (NewOp) {
+ case AtomicRMWInst::Max:
+ Pred = CmpInst::ICMP_SGT;
+ break;
+ case AtomicRMWInst::UMax:
+ Pred = CmpInst::ICMP_UGT;
+ break;
+ case AtomicRMWInst::Min:
+ Pred = CmpInst::ICMP_SLT;
+ break;
+ case AtomicRMWInst::UMin:
+ Pred = CmpInst::ICMP_ULT;
+ break;
+ default:
+ llvm_unreachable("unexpected comparison op");
+ }
+ Value *NonAtomicCmp = Builder.CreateCmp(Pred, OldValue, E);
+ CapturedValue = Builder.CreateSelect(NonAtomicCmp, E, OldValue);
+ }
+ Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
+ }
+ }
+
+ checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Compare);
+
+ return Builder.saveIP();
+}
+
GlobalVariable *
OpenMPIRBuilder::createOffloadMapnames(SmallVectorImpl<llvm::Constant *> &Names,
std::string VarName) {
@@ -3543,6 +4335,51 @@ BasicBlock *CanonicalLoopInfo::getPreheader() const {
llvm_unreachable("Missing preheader");
}
+void CanonicalLoopInfo::setTripCount(Value *TripCount) {
+ assert(isValid() && "Requires a valid canonical loop");
+
+ Instruction *CmpI = &getCond()->front();
+ assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
+ CmpI->setOperand(1, TripCount);
+
+#ifndef NDEBUG
+ assertOK();
+#endif
+}
+
+void CanonicalLoopInfo::mapIndVar(
+ llvm::function_ref<Value *(Instruction *)> Updater) {
+ assert(isValid() && "Requires a valid canonical loop");
+
+ Instruction *OldIV = getIndVar();
+
+ // Record all uses excluding those introduced by the updater. Uses by the
+ // CanonicalLoopInfo itself to keep track of the number of iterations are
+ // excluded.
+ SmallVector<Use *> ReplacableUses;
+ for (Use &U : OldIV->uses()) {
+ auto *User = dyn_cast<Instruction>(U.getUser());
+ if (!User)
+ continue;
+ if (User->getParent() == getCond())
+ continue;
+ if (User->getParent() == getLatch())
+ continue;
+ ReplacableUses.push_back(&U);
+ }
+
+ // Run the updater that may introduce new uses
+ Value *NewIV = Updater(OldIV);
+
+ // Replace the old uses with the value returned by the updater.
+ for (Use *U : ReplacableUses)
+ U->set(NewIV);
+
+#ifndef NDEBUG
+ assertOK();
+#endif
+}
+
void CanonicalLoopInfo::assertOK() const {
#ifndef NDEBUG
// No constraints if this object currently does not describe a loop.
diff --git a/llvm/lib/FuzzMutate/FuzzerCLI.cpp b/llvm/lib/FuzzMutate/FuzzerCLI.cpp
index 879d5a10b37b..90a1a35e2e3e 100644
--- a/llvm/lib/FuzzMutate/FuzzerCLI.cpp
+++ b/llvm/lib/FuzzMutate/FuzzerCLI.cpp
@@ -9,16 +9,9 @@
#include "llvm/FuzzMutate/FuzzerCLI.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
-#include "llvm/Bitcode/BitcodeReader.h"
-#include "llvm/Bitcode/BitcodeWriter.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Error.h"
#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/IR/Verifier.h"
using namespace llvm;
@@ -166,44 +159,3 @@ int llvm::runFuzzerOnInputs(int ArgC, char *ArgV[], FuzzerTestFun TestOne,
}
return 0;
}
-
-std::unique_ptr<Module> llvm::parseModule(
- const uint8_t *Data, size_t Size, LLVMContext &Context) {
-
- if (Size <= 1)
- // We get bogus data given an empty corpus - just create a new module.
- return std::make_unique<Module>("M", Context);
-
- auto Buffer = MemoryBuffer::getMemBuffer(
- StringRef(reinterpret_cast<const char *>(Data), Size), "Fuzzer input",
- /*RequiresNullTerminator=*/false);
-
- SMDiagnostic Err;
- auto M = parseBitcodeFile(Buffer->getMemBufferRef(), Context);
- if (Error E = M.takeError()) {
- errs() << toString(std::move(E)) << "\n";
- return nullptr;
- }
- return std::move(M.get());
-}
-
-size_t llvm::writeModule(const Module &M, uint8_t *Dest, size_t MaxSize) {
- std::string Buf;
- {
- raw_string_ostream OS(Buf);
- WriteBitcodeToFile(M, OS);
- }
- if (Buf.size() > MaxSize)
- return 0;
- memcpy(Dest, Buf.data(), Buf.size());
- return Buf.size();
-}
-
-std::unique_ptr<Module> llvm::parseAndVerify(const uint8_t *Data, size_t Size,
- LLVMContext &Context) {
- auto M = parseModule(Data, Size, Context);
- if (!M || verifyModule(*M, &errs()))
- return nullptr;
-
- return M;
-}
diff --git a/llvm/lib/FuzzMutate/IRMutator.cpp b/llvm/lib/FuzzMutate/IRMutator.cpp
index 0cd0f538fdbc..b62a326a40cc 100644
--- a/llvm/lib/FuzzMutate/IRMutator.cpp
+++ b/llvm/lib/FuzzMutate/IRMutator.cpp
@@ -9,6 +9,8 @@
#include "llvm/FuzzMutate/IRMutator.h"
#include "llvm/ADT/Optional.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/FuzzMutate/Operations.h"
#include "llvm/FuzzMutate/Random.h"
#include "llvm/FuzzMutate/RandomIRBuilder.h"
@@ -17,7 +19,9 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
#include "llvm/Transforms/Scalar/DCE.h"
using namespace llvm;
@@ -33,14 +37,15 @@ static void createEmptyFunction(Module &M) {
}
void IRMutationStrategy::mutate(Module &M, RandomIRBuilder &IB) {
- if (M.empty())
- createEmptyFunction(M);
-
auto RS = makeSampler<Function *>(IB.Rand);
for (Function &F : M)
if (!F.isDeclaration())
RS.sample(&F, /*Weight=*/1);
- mutate(*RS.getSelection(), IB);
+
+ if (RS.isEmpty())
+ createEmptyFunction(M);
+ else
+ mutate(*RS.getSelection(), IB);
}
void IRMutationStrategy::mutate(Function &F, RandomIRBuilder &IB) {
@@ -243,3 +248,44 @@ void InstModificationIRStrategy::mutate(Instruction &Inst,
if (RS)
RS.getSelection()();
}
+
+std::unique_ptr<Module> llvm::parseModule(const uint8_t *Data, size_t Size,
+ LLVMContext &Context) {
+
+ if (Size <= 1)
+ // We get bogus data given an empty corpus - just create a new module.
+ return std::make_unique<Module>("M", Context);
+
+ auto Buffer = MemoryBuffer::getMemBuffer(
+ StringRef(reinterpret_cast<const char *>(Data), Size), "Fuzzer input",
+ /*RequiresNullTerminator=*/false);
+
+ SMDiagnostic Err;
+ auto M = parseBitcodeFile(Buffer->getMemBufferRef(), Context);
+ if (Error E = M.takeError()) {
+ errs() << toString(std::move(E)) << "\n";
+ return nullptr;
+ }
+ return std::move(M.get());
+}
+
+size_t llvm::writeModule(const Module &M, uint8_t *Dest, size_t MaxSize) {
+ std::string Buf;
+ {
+ raw_string_ostream OS(Buf);
+ WriteBitcodeToFile(M, OS);
+ }
+ if (Buf.size() > MaxSize)
+ return 0;
+ memcpy(Dest, Buf.data(), Buf.size());
+ return Buf.size();
+}
+
+std::unique_ptr<Module> llvm::parseAndVerify(const uint8_t *Data, size_t Size,
+ LLVMContext &Context) {
+ auto M = parseModule(Data, Size, Context);
+ if (!M || verifyModule(*M, &errs()))
+ return nullptr;
+
+ return M;
+}
diff --git a/llvm/lib/FuzzMutate/Operations.cpp b/llvm/lib/FuzzMutate/Operations.cpp
index 221a3a84b49b..7443d49967c5 100644
--- a/llvm/lib/FuzzMutate/Operations.cpp
+++ b/llvm/lib/FuzzMutate/Operations.cpp
@@ -169,14 +169,21 @@ OpDescriptor llvm::fuzzerop::splitBlockDescriptor(unsigned Weight) {
OpDescriptor llvm::fuzzerop::gepDescriptor(unsigned Weight) {
auto buildGEP = [](ArrayRef<Value *> Srcs, Instruction *Inst) {
- Type *Ty = Srcs[0]->getType()->getPointerElementType();
- auto Indices = makeArrayRef(Srcs).drop_front(1);
+ // TODO: It would be better to generate a random type here, rather than
+ // generating a random value and picking its type.
+ Type *Ty = Srcs[0]->getType()->isOpaquePointerTy()
+ ? Srcs[1]->getType()
+ : Srcs[0]->getType()->getNonOpaquePointerElementType();
+ auto Indices = makeArrayRef(Srcs).drop_front(2);
return GetElementPtrInst::Create(Ty, Srcs[0], Indices, "G", Inst);
};
// TODO: Handle aggregates and vectors
// TODO: Support multiple indices.
// TODO: Try to avoid meaningless accesses.
- return {Weight, {sizedPtrType(), anyIntType()}, buildGEP};
+ SourcePred sizedType(
+ [](ArrayRef<Value *>, const Value *V) { return V->getType()->isSized(); },
+ None);
+ return {Weight, {sizedPtrType(), sizedType, anyIntType()}, buildGEP};
}
static uint64_t getAggregateNumElements(Type *T) {
@@ -302,12 +309,12 @@ static SourcePred validShuffleVectorIndex() {
return ShuffleVectorInst::isValidOperands(Cur[0], Cur[1], V);
};
auto Make = [](ArrayRef<Value *> Cur, ArrayRef<Type *> Ts) {
- auto *FirstTy = cast<FixedVectorType>(Cur[0]->getType());
+ auto *FirstTy = cast<VectorType>(Cur[0]->getType());
auto *Int32Ty = Type::getInt32Ty(Cur[0]->getContext());
// TODO: It's straighforward to make up reasonable values, but listing them
// exhaustively would be insane. Come up with a couple of sensible ones.
return std::vector<Constant *>{UndefValue::get(
- FixedVectorType::get(Int32Ty, FirstTy->getNumElements()))};
+ VectorType::get(Int32Ty, FirstTy->getElementCount()))};
};
return {Pred, Make};
}
diff --git a/llvm/lib/FuzzMutate/RandomIRBuilder.cpp b/llvm/lib/FuzzMutate/RandomIRBuilder.cpp
index 27c3bdfb22a8..9ac31ebccb99 100644
--- a/llvm/lib/FuzzMutate/RandomIRBuilder.cpp
+++ b/llvm/lib/FuzzMutate/RandomIRBuilder.cpp
@@ -8,10 +8,10 @@
#include "llvm/FuzzMutate/RandomIRBuilder.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/FuzzMutate/OpDescriptor.h"
#include "llvm/FuzzMutate/Random.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -53,8 +53,11 @@ Value *RandomIRBuilder::newSource(BasicBlock &BB, ArrayRef<Instruction *> Insts,
IP = ++I->getIterator();
assert(IP != BB.end() && "guaranteed by the findPointer");
}
- auto *NewLoad =
- new LoadInst(Ptr->getType()->getPointerElementType(), Ptr, "L", &*IP);
+ // For opaque pointers, pick the type independently.
+ Type *AccessTy = Ptr->getType()->isOpaquePointerTy()
+ ? RS.getSelection()->getType()
+ : Ptr->getType()->getNonOpaquePointerElementType();
+ auto *NewLoad = new LoadInst(AccessTy, Ptr, "L", &*IP);
// Only sample this load if it really matches the descriptor
if (Pred.matches(Srcs, NewLoad))
@@ -139,9 +142,12 @@ Value *RandomIRBuilder::findPointer(BasicBlock &BB,
if (Inst->isTerminator())
return false;
- if (auto PtrTy = dyn_cast<PointerType>(Inst->getType())) {
+ if (auto *PtrTy = dyn_cast<PointerType>(Inst->getType())) {
+ if (PtrTy->isOpaque())
+ return true;
+
// We can never generate loads from non first class or non sized types
- Type *ElemTy = PtrTy->getPointerElementType();
+ Type *ElemTy = PtrTy->getNonOpaquePointerElementType();
if (!ElemTy->isSized() || !ElemTy->isFirstClassType())
return false;
diff --git a/llvm/lib/IR/AbstractCallSite.cpp b/llvm/lib/IR/AbstractCallSite.cpp
index 2e41799e13e9..b7a10846a0d3 100644
--- a/llvm/lib/IR/AbstractCallSite.cpp
+++ b/llvm/lib/IR/AbstractCallSite.cpp
@@ -16,7 +16,6 @@
#include "llvm/IR/AbstractCallSite.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/Debug.h"
using namespace llvm;
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index 179754e275b0..596348ddb462 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -223,9 +223,7 @@ predictValueUseListOrder(const Value *V, unsigned ID, const OrderMap &OM) {
return LU->getOperandNo() > RU->getOperandNo();
});
- if (llvm::is_sorted(List, [](const Entry &L, const Entry &R) {
- return L.second < R.second;
- }))
+ if (llvm::is_sorted(List, llvm::less_second()))
// Order is already correct.
return {};
@@ -612,6 +610,11 @@ void TypePrinting::print(Type *Ty, raw_ostream &OS) {
OS << '>';
return;
}
+ case Type::DXILPointerTyID:
+ // DXIL pointer types are only handled by the DirectX backend. To avoid
+ // extra dependencies we just print the pointer's address here.
+ OS << "dxil-ptr (" << Ty << ")";
+ return;
}
llvm_unreachable("Invalid TypeID");
}
@@ -641,7 +644,7 @@ void TypePrinting::printStructBody(StructType *STy, raw_ostream &OS) {
OS << '>';
}
-AbstractSlotTrackerStorage::~AbstractSlotTrackerStorage() {}
+AbstractSlotTrackerStorage::~AbstractSlotTrackerStorage() = default;
namespace llvm {
@@ -1290,7 +1293,7 @@ struct AsmWriterContext {
/// prints a Metadata as operand.
virtual void onWriteMetadataAsOperand(const Metadata *) {}
- virtual ~AsmWriterContext() {}
+ virtual ~AsmWriterContext() = default;
};
} // end anonymous namespace
@@ -2072,7 +2075,7 @@ static void writeDIFile(raw_ostream &Out, const DIFile *N, AsmWriterContext &) {
// Print all values for checksum together, or not at all.
if (N->getChecksum())
Printer.printChecksum(*N->getChecksum());
- Printer.printString("source", N->getSource().getValueOr(StringRef()),
+ Printer.printString("source", N->getSource().value_or(StringRef()),
/* ShouldSkipEmpty */ true);
Out << ")";
}
@@ -2131,6 +2134,7 @@ static void writeDISubprogram(raw_ostream &Out, const DISubprogram *N,
Printer.printMetadata("retainedNodes", N->getRawRetainedNodes());
Printer.printMetadata("thrownTypes", N->getRawThrownTypes());
Printer.printMetadata("annotations", N->getRawAnnotations());
+ Printer.printString("targetFuncName", N->getTargetFuncName());
Out << ")";
}
@@ -3531,6 +3535,19 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) {
Out << '"';
}
+ using SanitizerMetadata = llvm::GlobalValue::SanitizerMetadata;
+ if (GV->hasSanitizerMetadata()) {
+ SanitizerMetadata MD = GV->getSanitizerMetadata();
+ if (MD.NoAddress)
+ Out << ", no_sanitize_address";
+ if (MD.NoHWAddress)
+ Out << ", no_sanitize_hwaddress";
+ if (MD.NoMemtag)
+ Out << ", no_sanitize_memtag";
+ if (MD.IsDynInit)
+ Out << ", sanitize_address_dyninit";
+ }
+
maybePrintComdat(Out, *GV);
if (MaybeAlign A = GV->getAlign())
Out << ", align " << A->value();
@@ -4708,9 +4725,8 @@ struct MDTreeAsmWriterContext : public AsmWriterContext {
: AsmWriterContext(TP, ST, M), Level(0U), Visited({InitMD}), MainOS(OS) {}
void onWriteMetadataAsOperand(const Metadata *MD) override {
- if (Visited.count(MD))
+ if (!Visited.insert(MD).second)
return;
- Visited.insert(MD);
std::string Str;
raw_string_ostream SS(Str);
diff --git a/llvm/lib/IR/Assumptions.cpp b/llvm/lib/IR/Assumptions.cpp
index 3d24ae062841..27977d5d56b0 100644
--- a/llvm/lib/IR/Assumptions.cpp
+++ b/llvm/lib/IR/Assumptions.cpp
@@ -107,4 +107,5 @@ StringSet<> llvm::KnownAssumptionStrings({
"omp_no_openmp_routines", // OpenMP 5.1
"omp_no_parallelism", // OpenMP 5.1
"ompx_spmd_amenable", // OpenMPOpt extension
+ "ompx_no_call_asm", // OpenMPOpt extension
});
diff --git a/llvm/lib/IR/AttributeImpl.h b/llvm/lib/IR/AttributeImpl.h
index 1153fb827b56..5eb958f5786a 100644
--- a/llvm/lib/IR/AttributeImpl.h
+++ b/llvm/lib/IR/AttributeImpl.h
@@ -255,6 +255,8 @@ public:
std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const;
unsigned getVScaleRangeMin() const;
Optional<unsigned> getVScaleRangeMax() const;
+ UWTableKind getUWTableKind() const;
+ AllocFnKind getAllocKind() const;
std::string getAsString(bool InAttrGrp) const;
Type *getAttributeType(Attribute::AttrKind Kind) const;
diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp
index 43fde64c3734..6d9f94b5eefd 100644
--- a/llvm/lib/IR/Attributes.cpp
+++ b/llvm/lib/IR/Attributes.cpp
@@ -28,7 +28,6 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -56,12 +55,11 @@ static const unsigned AllocSizeNumElemsNotPresent = -1;
static uint64_t packAllocSizeArgs(unsigned ElemSizeArg,
const Optional<unsigned> &NumElemsArg) {
- assert((!NumElemsArg.hasValue() ||
- *NumElemsArg != AllocSizeNumElemsNotPresent) &&
+ assert((!NumElemsArg || *NumElemsArg != AllocSizeNumElemsNotPresent) &&
"Attempting to pack a reserved value");
return uint64_t(ElemSizeArg) << 32 |
- NumElemsArg.getValueOr(AllocSizeNumElemsNotPresent);
+ NumElemsArg.value_or(AllocSizeNumElemsNotPresent);
}
static std::pair<unsigned, Optional<unsigned>>
@@ -77,7 +75,7 @@ unpackAllocSizeArgs(uint64_t Num) {
static uint64_t packVScaleRangeArgs(unsigned MinValue,
Optional<unsigned> MaxValue) {
- return uint64_t(MinValue) << 32 | MaxValue.getValueOr(0);
+ return uint64_t(MinValue) << 32 | MaxValue.value_or(0);
}
static std::pair<unsigned, Optional<unsigned>>
@@ -205,6 +203,11 @@ Attribute Attribute::getWithInAllocaType(LLVMContext &Context, Type *Ty) {
return get(Context, InAlloca, Ty);
}
+Attribute Attribute::getWithUWTableKind(LLVMContext &Context,
+ UWTableKind Kind) {
+ return get(Context, UWTable, uint64_t(Kind));
+}
+
Attribute
Attribute::getWithAllocSizeArgs(LLVMContext &Context, unsigned ElemSizeArg,
const Optional<unsigned> &NumElemsArg) {
@@ -366,6 +369,18 @@ Optional<unsigned> Attribute::getVScaleRangeMax() const {
return unpackVScaleRangeArgs(pImpl->getValueAsInt()).second;
}
+UWTableKind Attribute::getUWTableKind() const {
+ assert(hasAttribute(Attribute::UWTable) &&
+ "Trying to get unwind table kind from non-uwtable attribute");
+ return UWTableKind(pImpl->getValueAsInt());
+}
+
+AllocFnKind Attribute::getAllocKind() const {
+ assert(hasAttribute(Attribute::AllocKind) &&
+ "Trying to get allockind value from non-allockind attribute");
+ return AllocFnKind(pImpl->getValueAsInt());
+}
+
std::string Attribute::getAsString(bool InAttrGrp) const {
if (!pImpl) return {};
@@ -422,7 +437,38 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
unsigned MinValue = getVScaleRangeMin();
Optional<unsigned> MaxValue = getVScaleRangeMax();
return ("vscale_range(" + Twine(MinValue) + "," +
- Twine(MaxValue.getValueOr(0)) + ")")
+ Twine(MaxValue.value_or(0)) + ")")
+ .str();
+ }
+
+ if (hasAttribute(Attribute::UWTable)) {
+ UWTableKind Kind = getUWTableKind();
+ if (Kind != UWTableKind::None) {
+ return Kind == UWTableKind::Default
+ ? "uwtable"
+ : ("uwtable(" +
+ Twine(Kind == UWTableKind::Sync ? "sync" : "async") + ")")
+ .str();
+ }
+ }
+
+ if (hasAttribute(Attribute::AllocKind)) {
+ AllocFnKind Kind = getAllocKind();
+ SmallVector<StringRef> parts;
+ if ((Kind & AllocFnKind::Alloc) != AllocFnKind::Unknown)
+ parts.push_back("alloc");
+ if ((Kind & AllocFnKind::Realloc) != AllocFnKind::Unknown)
+ parts.push_back("realloc");
+ if ((Kind & AllocFnKind::Free) != AllocFnKind::Unknown)
+ parts.push_back("free");
+ if ((Kind & AllocFnKind::Uninitialized) != AllocFnKind::Unknown)
+ parts.push_back("uninitialized");
+ if ((Kind & AllocFnKind::Zeroed) != AllocFnKind::Unknown)
+ parts.push_back("zeroed");
+ if ((Kind & AllocFnKind::Aligned) != AllocFnKind::Unknown)
+ parts.push_back("aligned");
+ return ("allockind(\"" +
+ Twine(llvm::join(parts.begin(), parts.end(), ",")) + "\")")
.str();
}
@@ -710,6 +756,14 @@ Optional<unsigned> AttributeSet::getVScaleRangeMax() const {
return SetNode ? SetNode->getVScaleRangeMax() : None;
}
+UWTableKind AttributeSet::getUWTableKind() const {
+ return SetNode ? SetNode->getUWTableKind() : UWTableKind::None;
+}
+
+AllocFnKind AttributeSet::getAllocKind() const {
+ return SetNode ? SetNode->getAllocKind() : AllocFnKind::Unknown;
+}
+
std::string AttributeSet::getAsString(bool InAttrGrp) const {
return SetNode ? SetNode->getAsString(InAttrGrp) : "";
}
@@ -876,6 +930,18 @@ Optional<unsigned> AttributeSetNode::getVScaleRangeMax() const {
return None;
}
+UWTableKind AttributeSetNode::getUWTableKind() const {
+ if (auto A = findEnumAttribute(Attribute::UWTable))
+ return A->getUWTableKind();
+ return UWTableKind::None;
+}
+
+AllocFnKind AttributeSetNode::getAllocKind() const {
+ if (auto A = findEnumAttribute(Attribute::AllocKind))
+ return A->getAllocKind();
+ return AllocFnKind::Unknown;
+}
+
std::string AttributeSetNode::getAsString(bool InAttrGrp) const {
std::string Str;
for (iterator I = begin(), E = end(); I != E; ++I) {
@@ -987,11 +1053,7 @@ AttributeList::get(LLVMContext &C,
if (Attrs.empty())
return {};
- assert(llvm::is_sorted(Attrs,
- [](const std::pair<unsigned, Attribute> &LHS,
- const std::pair<unsigned, Attribute> &RHS) {
- return LHS.first < RHS.first;
- }) &&
+ assert(llvm::is_sorted(Attrs, llvm::less_first()) &&
"Misordered Attributes list!");
assert(llvm::all_of(Attrs,
[](const std::pair<unsigned, Attribute> &Pair) {
@@ -1024,11 +1086,7 @@ AttributeList::get(LLVMContext &C,
if (Attrs.empty())
return {};
- assert(llvm::is_sorted(Attrs,
- [](const std::pair<unsigned, AttributeSet> &LHS,
- const std::pair<unsigned, AttributeSet> &RHS) {
- return LHS.first < RHS.first;
- }) &&
+ assert(llvm::is_sorted(Attrs, llvm::less_first()) &&
"Misordered Attributes list!");
assert(llvm::none_of(Attrs,
[](const std::pair<unsigned, AttributeSet> &Pair) {
@@ -1428,6 +1486,14 @@ AttributeList::getParamDereferenceableOrNullBytes(unsigned Index) const {
return getParamAttrs(Index).getDereferenceableOrNullBytes();
}
+UWTableKind AttributeList::getUWTableKind() const {
+ return getFnAttrs().getUWTableKind();
+}
+
+AllocFnKind AttributeList::getAllocKind() const {
+ return getFnAttrs().getAllocKind();
+}
+
std::string AttributeList::getAsString(unsigned Index, bool InAttrGrp) const {
return getAttributes(Index).getAsString(InAttrGrp);
}
@@ -1649,6 +1715,16 @@ AttrBuilder &AttrBuilder::addVScaleRangeAttrFromRawRepr(uint64_t RawArgs) {
return addRawIntAttr(Attribute::VScaleRange, RawArgs);
}
+AttrBuilder &AttrBuilder::addUWTableAttr(UWTableKind Kind) {
+ if (Kind == UWTableKind::None)
+ return *this;
+ return addRawIntAttr(Attribute::UWTable, uint64_t(Kind));
+}
+
+AttrBuilder &AttrBuilder::addAllocKindAttr(AllocFnKind Kind) {
+ return addRawIntAttr(Attribute::AllocKind, static_cast<uint64_t>(Kind));
+}
+
Type *AttrBuilder::getTypeAttr(Attribute::AttrKind Kind) const {
assert(Attribute::isTypeAttrKind(Kind) && "Not a type attribute");
Attribute A = getAttribute(Kind);
@@ -1732,39 +1808,51 @@ bool AttrBuilder::operator==(const AttrBuilder &B) const {
//===----------------------------------------------------------------------===//
/// Which attributes cannot be applied to a type.
-AttributeMask AttributeFuncs::typeIncompatible(Type *Ty) {
+AttributeMask AttributeFuncs::typeIncompatible(Type *Ty,
+ AttributeSafetyKind ASK) {
AttributeMask Incompatible;
- if (!Ty->isIntegerTy())
+ if (!Ty->isIntegerTy()) {
// Attributes that only apply to integers.
- Incompatible.addAttribute(Attribute::SExt)
- .addAttribute(Attribute::ZExt);
+ if (ASK & ASK_SAFE_TO_DROP)
+ Incompatible.addAttribute(Attribute::AllocAlign);
+ if (ASK & ASK_UNSAFE_TO_DROP)
+ Incompatible.addAttribute(Attribute::SExt).addAttribute(Attribute::ZExt);
+ }
- if (!Ty->isPointerTy())
+ if (!Ty->isPointerTy()) {
// Attributes that only apply to pointers.
- Incompatible.addAttribute(Attribute::Nest)
- .addAttribute(Attribute::NoAlias)
- .addAttribute(Attribute::NoCapture)
- .addAttribute(Attribute::NonNull)
- .addAttribute(Attribute::ReadNone)
- .addAttribute(Attribute::ReadOnly)
- .addAttribute(Attribute::SwiftError)
- .addAttribute(Attribute::Dereferenceable)
- .addAttribute(Attribute::DereferenceableOrNull)
- .addAttribute(Attribute::Preallocated)
- .addAttribute(Attribute::InAlloca)
- .addAttribute(Attribute::ByVal)
- .addAttribute(Attribute::StructRet)
- .addAttribute(Attribute::ByRef)
- .addAttribute(Attribute::ElementType);
-
- if (!Ty->isPtrOrPtrVectorTy())
+ if (ASK & ASK_SAFE_TO_DROP)
+ Incompatible.addAttribute(Attribute::NoAlias)
+ .addAttribute(Attribute::NoCapture)
+ .addAttribute(Attribute::NonNull)
+ .addAttribute(Attribute::ReadNone)
+ .addAttribute(Attribute::ReadOnly)
+ .addAttribute(Attribute::Dereferenceable)
+ .addAttribute(Attribute::DereferenceableOrNull);
+ if (ASK & ASK_UNSAFE_TO_DROP)
+ Incompatible.addAttribute(Attribute::Nest)
+ .addAttribute(Attribute::SwiftError)
+ .addAttribute(Attribute::Preallocated)
+ .addAttribute(Attribute::InAlloca)
+ .addAttribute(Attribute::ByVal)
+ .addAttribute(Attribute::StructRet)
+ .addAttribute(Attribute::ByRef)
+ .addAttribute(Attribute::ElementType)
+ .addAttribute(Attribute::AllocatedPointer);
+ }
+
// Attributes that only apply to pointers or vectors of pointers.
- Incompatible.addAttribute(Attribute::Alignment);
+ if (!Ty->isPtrOrPtrVectorTy()) {
+ if (ASK & ASK_SAFE_TO_DROP)
+ Incompatible.addAttribute(Attribute::Alignment);
+ }
// Some attributes can apply to all "values" but there are no `void` values.
- if (Ty->isVoidTy())
- Incompatible.addAttribute(Attribute::NoUndef);
+ if (Ty->isVoidTy()) {
+ if (ASK & ASK_SAFE_TO_DROP)
+ Incompatible.addAttribute(Attribute::NoUndef);
+ }
return Incompatible;
}
@@ -1976,3 +2064,14 @@ void AttributeFuncs::mergeAttributesForOutlining(Function &Base,
// that aspect in the merged function.
mergeFnAttrs(Base, ToMerge);
}
+
+void AttributeFuncs::updateMinLegalVectorWidthAttr(Function &Fn,
+ uint64_t Width) {
+ Attribute Attr = Fn.getFnAttribute("min-legal-vector-width");
+ if (Attr.isValid()) {
+ uint64_t OldWidth;
+ Attr.getValueAsString().getAsInteger(0, OldWidth);
+ if (Width > OldWidth)
+ Fn.addFnAttr("min-legal-vector-width", llvm::utostr(Width));
+ }
+}
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 11839c7572e3..75594f90c926 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -14,6 +14,7 @@
#include "llvm/IR/AutoUpgrade.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
@@ -575,19 +576,6 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
F->arg_begin()->getType());
return true;
}
- static const Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
- if (vldRegex.match(Name)) {
- auto fArgs = F->getFunctionType()->params();
- SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
- // Can't use Intrinsic::getDeclaration here as the return types might
- // then only be structurally equal.
- FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
- StringRef Suffix =
- F->getContext().supportsTypedPointers() ? "p0i8" : "p0";
- NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
- "llvm." + Name + "." + Suffix, F->getParent());
- return true;
- }
static const Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
if (vstRegex.match(Name)) {
static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
@@ -760,6 +748,23 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
break;
}
case 'e': {
+ if (Name.startswith("experimental.vector.extract.")) {
+ rename(F);
+ Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
+ NewFn = Intrinsic::getDeclaration(F->getParent(),
+ Intrinsic::vector_extract, Tys);
+ return true;
+ }
+
+ if (Name.startswith("experimental.vector.insert.")) {
+ rename(F);
+ auto Args = F->getFunctionType()->params();
+ Type *Tys[] = {Args[0], Args[1]};
+ NewFn = Intrinsic::getDeclaration(F->getParent(),
+ Intrinsic::vector_insert, Tys);
+ return true;
+ }
+
SmallVector<StringRef, 2> Groups;
static const Regex R("^experimental.vector.reduce.([a-z]+)\\.[a-z][0-9]+");
if (R.match(Name, &Groups)) {
@@ -1016,10 +1021,35 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
return true;
}
+
+ auto *ST = dyn_cast<StructType>(F->getReturnType());
+ if (ST && (!ST->isLiteral() || ST->isPacked())) {
+ // Replace return type with literal non-packed struct. Only do this for
+ // intrinsics declared to return a struct, not for intrinsics with
+ // overloaded return type, in which case the exact struct type will be
+ // mangled into the name.
+ SmallVector<Intrinsic::IITDescriptor> Desc;
+ Intrinsic::getIntrinsicInfoTableEntries(F->getIntrinsicID(), Desc);
+ if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
+ auto *FT = F->getFunctionType();
+ auto *NewST = StructType::get(ST->getContext(), ST->elements());
+ auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
+ std::string Name = F->getName().str();
+ rename(F);
+ NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
+ Name, F->getParent());
+
+ // The new function may also need remangling.
+ if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F))
+ NewFn = *Result;
+ return true;
+ }
+ }
+
// Remangle our intrinsic since we upgrade the mangling
auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
if (Result != None) {
- NewFn = Result.getValue();
+ NewFn = *Result;
return true;
}
@@ -1237,7 +1267,7 @@ static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
return EmitX86Select(Builder, Mask, Align, Passthru);
}
-static Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallInst &CI,
+static Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI,
bool ZeroMask, bool IndexForm) {
Type *Ty = CI.getType();
unsigned VecWidth = Ty->getPrimitiveSizeInBits();
@@ -1298,7 +1328,7 @@ static Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallInst &CI,
return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
}
-static Value *UpgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallInst &CI,
+static Value *UpgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI,
Intrinsic::ID IID) {
Type *Ty = CI.getType();
Value *Op0 = CI.getOperand(0);
@@ -1314,7 +1344,7 @@ static Value *UpgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallInst &CI,
return Res;
}
-static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallInst &CI,
+static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI,
bool IsRotateRight) {
Type *Ty = CI.getType();
Value *Src = CI.getArgOperand(0);
@@ -1341,7 +1371,7 @@ static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallInst &CI,
return Res;
}
-static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallInst &CI, unsigned Imm,
+static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
bool IsSigned) {
Type *Ty = CI.getType();
Value *LHS = CI.getArgOperand(0);
@@ -1380,7 +1410,7 @@ static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallInst &CI, unsigned Imm,
return Ext;
}
-static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallInst &CI,
+static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI,
bool IsShiftRight, bool ZeroMask) {
Type *Ty = CI.getType();
Value *Op0 = CI.getArgOperand(0);
@@ -1459,7 +1489,7 @@ static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
}
-static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) {
+static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
Type *Ty = CI.getType();
Value *Op0 = CI.getArgOperand(0);
Function *F = Intrinsic::getDeclaration(CI.getModule(), Intrinsic::abs, Ty);
@@ -1469,7 +1499,7 @@ static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) {
return Res;
}
-static Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) {
+static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
Type *Ty = CI.getType();
// Arguments have a vXi32 type so cast to vXi64.
@@ -1521,7 +1551,7 @@ static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
}
-static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
+static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI,
unsigned CC, bool Signed) {
Value *Op0 = CI.getArgOperand(0);
unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
@@ -1553,7 +1583,7 @@ static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
}
// Replace a masked intrinsic with an older unmasked intrinsic.
-static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI,
+static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI,
Intrinsic::ID IID) {
Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
Value *Rep = Builder.CreateCall(Intrin,
@@ -1561,7 +1591,7 @@ static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI,
return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
}
-static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
+static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI) {
Value* A = CI.getArgOperand(0);
Value* B = CI.getArgOperand(1);
Value* Src = CI.getArgOperand(2);
@@ -1576,7 +1606,7 @@ static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
}
-static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) {
+static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI) {
Value* Op = CI.getArgOperand(0);
Type* ReturnOp = CI.getType();
unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
@@ -1586,7 +1616,7 @@ static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) {
// Replace intrinsic with unmasked version and a select.
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
- CallInst &CI, Value *&Rep) {
+ CallBase &CI, Value *&Rep) {
Name = Name.substr(12); // Remove avx512.mask.
unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
@@ -1834,7 +1864,7 @@ void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
}
}
-static Value *UpgradeARMIntrinsicCall(StringRef Name, CallInst *CI, Function *F,
+static Value *UpgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F,
IRBuilder<> &Builder) {
if (Name == "mve.vctp64.old") {
// Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
@@ -1921,12 +1951,12 @@ static Value *UpgradeARMIntrinsicCall(StringRef Name, CallInst *CI, Function *F,
Function *Fn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
return Builder.CreateCall(Fn, Ops, CI->getName());
}
- llvm_unreachable("Unknown function for ARM CallInst upgrade.");
+ llvm_unreachable("Unknown function for ARM CallBase upgrade.");
}
/// Upgrade a call to an old intrinsic. All argument and return casting must be
/// provided to seamlessly integrate with existing context.
-void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
+void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
Function *F = CI->getCalledFunction();
LLVMContext &C = CI->getContext();
IRBuilder<> Builder(C);
@@ -3774,7 +3804,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
} else if (IsARM) {
Rep = UpgradeARMIntrinsicCall(Name, CI, F, Builder);
} else {
- llvm_unreachable("Unknown function for CallInst upgrade.");
+ llvm_unreachable("Unknown function for CallBase upgrade.");
}
if (Rep)
@@ -3783,12 +3813,33 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
return;
}
- const auto &DefaultCase = [&NewFn, &CI]() -> void {
- // Handle generic mangling change, but nothing else
- assert(
- (CI->getCalledFunction()->getName() != NewFn->getName()) &&
- "Unknown function for CallInst upgrade and isn't just a name change");
- CI->setCalledFunction(NewFn);
+ const auto &DefaultCase = [&]() -> void {
+ if (CI->getFunctionType() == NewFn->getFunctionType()) {
+ // Handle generic mangling change.
+ assert(
+ (CI->getCalledFunction()->getName() != NewFn->getName()) &&
+ "Unknown function for CallBase upgrade and isn't just a name change");
+ CI->setCalledFunction(NewFn);
+ return;
+ }
+
+ // This must be an upgrade from a named to a literal struct.
+ auto *OldST = cast<StructType>(CI->getType());
+ assert(OldST != NewFn->getReturnType() && "Return type must have changed");
+ assert(OldST->getNumElements() ==
+ cast<StructType>(NewFn->getReturnType())->getNumElements() &&
+ "Must have same number of elements");
+
+ SmallVector<Value *> Args(CI->args());
+ Value *NewCI = Builder.CreateCall(NewFn, Args);
+ Value *Res = PoisonValue::get(OldST);
+ for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
+ Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
+ Res = Builder.CreateInsertValue(Res, Elem, Idx);
+ }
+ CI->replaceAllUsesWith(Res);
+ CI->eraseFromParent();
+ return;
};
CallInst *NewCall = nullptr;
switch (NewFn->getIntrinsicID()) {
@@ -3796,13 +3847,6 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
DefaultCase();
return;
}
- case Intrinsic::arm_neon_vld1:
- case Intrinsic::arm_neon_vld2:
- case Intrinsic::arm_neon_vld3:
- case Intrinsic::arm_neon_vld4:
- case Intrinsic::arm_neon_vld2lane:
- case Intrinsic::arm_neon_vld3lane:
- case Intrinsic::arm_neon_vld4lane:
case Intrinsic::arm_neon_vst1:
case Intrinsic::arm_neon_vst2:
case Intrinsic::arm_neon_vst3:
@@ -3885,8 +3929,11 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
case Intrinsic::ptr_annotation:
// Upgrade from versions that lacked the annotation attribute argument.
- assert(CI->arg_size() == 4 &&
- "Before LLVM 12.0 this intrinsic took four arguments");
+ if (CI->arg_size() != 4) {
+ DefaultCase();
+ return;
+ }
+
// Create a new call with an added null annotation attribute argument.
NewCall = Builder.CreateCall(
NewFn,
@@ -4047,6 +4094,12 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(2), CI->getArgOperand(4)};
NewCall = Builder.CreateCall(NewFn, Args);
+ AttributeList OldAttrs = CI->getAttributes();
+ AttributeList NewAttrs = AttributeList::get(
+ C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
+ {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
+ OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
+ NewCall->setAttributes(NewAttrs);
auto *MemCI = cast<MemIntrinsic>(NewCall);
// All mem intrinsics support dest alignment.
const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
@@ -4074,8 +4127,8 @@ void llvm::UpgradeCallsToIntrinsic(Function *F) {
// Replace all users of the old function with the new function or new
// instructions. This is not a range loop because the call is deleted.
for (User *U : make_early_inc_range(F->users()))
- if (CallInst *CI = dyn_cast<CallInst>(U))
- UpgradeIntrinsicCall(CI, NewFn);
+ if (CallBase *CB = dyn_cast<CallBase>(U))
+ UpgradeIntrinsicCall(CB, NewFn);
// Remove old function, no longer used, from the module.
F->eraseFromParent();
@@ -4126,7 +4179,7 @@ Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
return nullptr;
}
-Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
+Constant *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
if (Opc != Instruction::BitCast)
return nullptr;
@@ -4358,6 +4411,24 @@ bool llvm::UpgradeModuleFlags(Module &M) {
}
}
}
+
+ // Upgrade branch protection and return address signing module flags. The
+ // module flag behavior for these fields were Error and now they are Min.
+ if (ID->getString() == "branch-target-enforcement" ||
+ ID->getString().startswith("sign-return-address")) {
+ if (auto *Behavior =
+ mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
+ if (Behavior->getLimitedValue() == Module::Error) {
+ Type *Int32Ty = Type::getInt32Ty(M.getContext());
+ Metadata *Ops[3] = {
+ ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
+ Op->getOperand(1), Op->getOperand(2)};
+ ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
+ Changed = true;
+ }
+ }
+ }
+
// Upgrade Objective-C Image Info Section. Removed the whitespce in the
// section name so that llvm-lto will not complain about mismatching
// module flags that is functionally the same.
@@ -4469,7 +4540,7 @@ namespace {
// folding and other libcall simplification. The nobuiltin attribute on the
// callsite has the same effect.
struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
- StrictFPUpgradeVisitor() {}
+ StrictFPUpgradeVisitor() = default;
void visitCallBase(CallBase &Call) {
if (!Call.isStrictFP())
@@ -4492,13 +4563,6 @@ void llvm::UpgradeFunctionAttributes(Function &F) {
SFPV.visit(F);
}
- if (F.getCallingConv() == CallingConv::X86_INTR &&
- !F.arg_empty() && !F.hasParamAttribute(0, Attribute::ByVal)) {
- Type *ByValTy = F.getArg(0)->getType()->getPointerElementType();
- Attribute NewAttr = Attribute::getWithByValType(F.getContext(), ByValTy);
- F.addParamAttr(0, NewAttr);
- }
-
// Remove all incompatibile attributes from function.
F.removeRetAttrs(AttributeFuncs::typeIncompatible(F.getReturnType()));
for (auto &Arg : F.args())
@@ -4628,3 +4692,15 @@ void llvm::UpgradeAttributes(AttrBuilder &B) {
B.addAttribute(Attribute::NullPointerIsValid);
}
}
+
+void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
+
+ // clang.arc.attachedcall bundles are now required to have an operand.
+ // If they don't, it's okay to drop them entirely: when there is an operand,
+ // the "attachedcall" is meaningful and required, but without an operand,
+ // it's just a marker NOP. Dropping it merely prevents an optimization.
+ erase_if(Bundles, [&](OperandBundleDef &OBD) {
+ return OBD.getTag() == "clang.arc.attachedcall" &&
+ OBD.inputs().empty();
+ });
+}
diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp
index 99e3afaa8ba8..f064ff503eba 100644
--- a/llvm/lib/IR/BasicBlock.cpp
+++ b/llvm/lib/IR/BasicBlock.cpp
@@ -148,12 +148,6 @@ const Module *BasicBlock::getModule() const {
return getParent()->getParent();
}
-const Instruction *BasicBlock::getTerminator() const {
- if (InstList.empty() || !InstList.back().isTerminator())
- return nullptr;
- return &InstList.back();
-}
-
const CallInst *BasicBlock::getTerminatingMustTailCall() const {
if (InstList.empty())
return nullptr;
diff --git a/llvm/lib/IR/BuiltinGCs.cpp b/llvm/lib/IR/BuiltinGCs.cpp
index 31ee86383e78..e9ef034c488f 100644
--- a/llvm/lib/IR/BuiltinGCs.cpp
+++ b/llvm/lib/IR/BuiltinGCs.cpp
@@ -53,7 +53,7 @@ public:
/// while introducing only minor runtime overhead.
class ShadowStackGC : public GCStrategy {
public:
- ShadowStackGC() {}
+ ShadowStackGC() = default;
};
/// A GCStrategy which serves as an example for the usage of a statepoint based
diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp
index 936b1fc2ff6f..41b4f2919221 100644
--- a/llvm/lib/IR/ConstantFold.cpp
+++ b/llvm/lib/IR/ConstantFold.cpp
@@ -16,7 +16,7 @@
//
//===----------------------------------------------------------------------===//
-#include "ConstantFold.h"
+#include "llvm/IR/ConstantFold.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/Constants.h"
@@ -379,7 +379,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
opc != Instruction::AddrSpaceCast &&
// Do not fold bitcast (gep) with inrange index, as this loses
// information.
- !cast<GEPOperator>(CE)->getInRangeIndex().hasValue() &&
+ !cast<GEPOperator>(CE)->getInRangeIndex() &&
// Do not fold if the gep type is a vector, as bitcasting
// operand 0 of a vector gep will result in a bitcast between
// different sizes.
@@ -435,14 +435,8 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
if (ConstantFP *FPC = dyn_cast<ConstantFP>(V)) {
bool ignored;
APFloat Val = FPC->getValueAPF();
- Val.convert(DestTy->isHalfTy() ? APFloat::IEEEhalf() :
- DestTy->isFloatTy() ? APFloat::IEEEsingle() :
- DestTy->isDoubleTy() ? APFloat::IEEEdouble() :
- DestTy->isX86_FP80Ty() ? APFloat::x87DoubleExtended() :
- DestTy->isFP128Ty() ? APFloat::IEEEquad() :
- DestTy->isPPC_FP128Ty() ? APFloat::PPCDoubleDouble() :
- APFloat::Bogus(),
- APFloat::rmNearestTiesToEven, &ignored);
+ Val.convert(DestTy->getFltSemantics(), APFloat::rmNearestTiesToEven,
+ &ignored);
return ConstantFP::get(V->getContext(), Val);
}
return nullptr; // Can't fold.
@@ -683,6 +677,11 @@ Constant *llvm::ConstantFoldInsertElementInstruction(Constant *Val,
if (isa<UndefValue>(Idx))
return PoisonValue::get(Val->getType());
+ // Inserting null into all zeros is still all zeros.
+ // TODO: This is true for undef and poison splats too.
+ if (isa<ConstantAggregateZero>(Val) && Elt->isNullValue())
+ return Val;
+
ConstantInt *CIdx = dyn_cast<ConstantInt>(Idx);
if (!CIdx) return nullptr;
@@ -724,7 +723,7 @@ Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1, Constant *V2,
// Undefined shuffle mask -> undefined value.
if (all_of(Mask, [](int Elt) { return Elt == UndefMaskElem; })) {
- return UndefValue::get(FixedVectorType::get(EltTy, MaskNumElts));
+ return UndefValue::get(VectorType::get(EltTy, MaskEltCount));
}
// If the mask is all zeros this is a splat, no need to go through all
@@ -2036,8 +2035,18 @@ Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C,
// If inbounds, we can choose an out-of-bounds pointer as a base pointer.
return InBounds ? PoisonValue::get(GEPTy) : UndefValue::get(GEPTy);
- Constant *Idx0 = cast<Constant>(Idxs[0]);
- if (Idxs.size() == 1 && (Idx0->isNullValue() || isa<UndefValue>(Idx0)))
+ auto IsNoOp = [&]() {
+ // For non-opaque pointers having multiple indices will change the result
+ // type of the GEP.
+ if (!C->getType()->getScalarType()->isOpaquePointerTy() && Idxs.size() != 1)
+ return false;
+
+ return all_of(Idxs, [](Value *Idx) {
+ Constant *IdxC = cast<Constant>(Idx);
+ return IdxC->isNullValue() || isa<UndefValue>(IdxC);
+ });
+ };
+ if (IsNoOp())
return GEPTy->isVectorTy() && !C->getType()->isVectorTy()
? ConstantVector::getSplat(
cast<VectorType>(GEPTy)->getElementCount(), C)
@@ -2090,6 +2099,7 @@ Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C,
// i32* getelementptr ([3 x i32]* %X, i64 0, i64 0)
//
// Don't fold if the cast is changing address spaces.
+ Constant *Idx0 = cast<Constant>(Idxs[0]);
if (CE->isCast() && Idxs.size() > 1 && Idx0->isNullValue()) {
PointerType *SrcPtrTy =
dyn_cast<PointerType>(CE->getOperand(0)->getType());
diff --git a/llvm/lib/IR/ConstantRange.cpp b/llvm/lib/IR/ConstantRange.cpp
index a0f2179bddb4..9d239101d8fd 100644
--- a/llvm/lib/IR/ConstantRange.cpp
+++ b/llvm/lib/IR/ConstantRange.cpp
@@ -75,6 +75,24 @@ ConstantRange ConstantRange::fromKnownBits(const KnownBits &Known,
return ConstantRange(Lower, Upper + 1);
}
+KnownBits ConstantRange::toKnownBits() const {
+ // TODO: We could return conflicting known bits here, but consumers are
+ // likely not prepared for that.
+ if (isEmptySet())
+ return KnownBits(getBitWidth());
+
+ // We can only retain the top bits that are the same between min and max.
+ APInt Min = getUnsignedMin();
+ APInt Max = getUnsignedMax();
+ KnownBits Known = KnownBits::makeConstant(Min);
+ if (Optional<unsigned> DifferentBit =
+ APIntOps::GetMostSignificantDifferentBit(Min, Max)) {
+ Known.Zero.clearLowBits(*DifferentBit + 1);
+ Known.One.clearLowBits(*DifferentBit + 1);
+ }
+ return Known;
+}
+
ConstantRange ConstantRange::makeAllowedICmpRegion(CmpInst::Predicate Pred,
const ConstantRange &CR) {
if (CR.isEmptySet())
@@ -721,15 +739,23 @@ ConstantRange ConstantRange::castOp(Instruction::CastOps CastOp,
case Instruction::UIToFP: {
// TODO: use input range if available
auto BW = getBitWidth();
- APInt Min = APInt::getMinValue(BW).zextOrSelf(ResultBitWidth);
- APInt Max = APInt::getMaxValue(BW).zextOrSelf(ResultBitWidth);
+ APInt Min = APInt::getMinValue(BW);
+ APInt Max = APInt::getMaxValue(BW);
+ if (ResultBitWidth > BW) {
+ Min = Min.zext(ResultBitWidth);
+ Max = Max.zext(ResultBitWidth);
+ }
return ConstantRange(std::move(Min), std::move(Max));
}
case Instruction::SIToFP: {
// TODO: use input range if available
auto BW = getBitWidth();
- APInt SMin = APInt::getSignedMinValue(BW).sextOrSelf(ResultBitWidth);
- APInt SMax = APInt::getSignedMaxValue(BW).sextOrSelf(ResultBitWidth);
+ APInt SMin = APInt::getSignedMinValue(BW);
+ APInt SMax = APInt::getSignedMaxValue(BW);
+ if (ResultBitWidth > BW) {
+ SMin = SMin.sext(ResultBitWidth);
+ SMax = SMax.sext(ResultBitWidth);
+ }
return ConstantRange(std::move(SMin), std::move(SMax));
}
case Instruction::FPTrunc:
@@ -1212,7 +1238,10 @@ ConstantRange ConstantRange::sdiv(const ConstantRange &RHS) const {
// separately by combining division results with the appropriate signs.
APInt Zero = APInt::getZero(getBitWidth());
APInt SignedMin = APInt::getSignedMinValue(getBitWidth());
- ConstantRange PosFilter(APInt(getBitWidth(), 1), SignedMin);
+ // There are no positive 1-bit values. The 1 would get interpreted as -1.
+ ConstantRange PosFilter =
+ getBitWidth() == 1 ? getEmpty()
+ : ConstantRange(APInt(getBitWidth(), 1), SignedMin);
ConstantRange NegFilter(SignedMin, Zero);
ConstantRange PosL = intersectWith(PosFilter);
ConstantRange NegL = intersectWith(NegFilter);
@@ -1368,34 +1397,29 @@ ConstantRange ConstantRange::binaryNot() const {
return ConstantRange(APInt::getAllOnes(getBitWidth())).sub(*this);
}
-ConstantRange
-ConstantRange::binaryAnd(const ConstantRange &Other) const {
+ConstantRange ConstantRange::binaryAnd(const ConstantRange &Other) const {
if (isEmptySet() || Other.isEmptySet())
return getEmpty();
- // Use APInt's implementation of AND for single element ranges.
- if (isSingleElement() && Other.isSingleElement())
- return {*getSingleElement() & *Other.getSingleElement()};
-
- // TODO: replace this with something less conservative
-
- APInt umin = APIntOps::umin(Other.getUnsignedMax(), getUnsignedMax());
- return getNonEmpty(APInt::getZero(getBitWidth()), std::move(umin) + 1);
+ ConstantRange KnownBitsRange =
+ fromKnownBits(toKnownBits() & Other.toKnownBits(), false);
+ ConstantRange UMinUMaxRange =
+ getNonEmpty(APInt::getZero(getBitWidth()),
+ APIntOps::umin(Other.getUnsignedMax(), getUnsignedMax()) + 1);
+ return KnownBitsRange.intersectWith(UMinUMaxRange);
}
-ConstantRange
-ConstantRange::binaryOr(const ConstantRange &Other) const {
+ConstantRange ConstantRange::binaryOr(const ConstantRange &Other) const {
if (isEmptySet() || Other.isEmptySet())
return getEmpty();
- // Use APInt's implementation of OR for single element ranges.
- if (isSingleElement() && Other.isSingleElement())
- return {*getSingleElement() | *Other.getSingleElement()};
-
- // TODO: replace this with something less conservative
-
- APInt umax = APIntOps::umax(getUnsignedMin(), Other.getUnsignedMin());
- return getNonEmpty(std::move(umax), APInt::getZero(getBitWidth()));
+ ConstantRange KnownBitsRange =
+ fromKnownBits(toKnownBits() | Other.toKnownBits(), false);
+ // Upper wrapped range.
+ ConstantRange UMaxUMinRange =
+ getNonEmpty(APIntOps::umax(getUnsignedMin(), Other.getUnsignedMin()),
+ APInt::getZero(getBitWidth()));
+ return KnownBitsRange.intersectWith(UMaxUMinRange);
}
ConstantRange ConstantRange::binaryXor(const ConstantRange &Other) const {
@@ -1412,8 +1436,7 @@ ConstantRange ConstantRange::binaryXor(const ConstantRange &Other) const {
if (isSingleElement() && getSingleElement()->isAllOnes())
return Other.binaryNot();
- // TODO: replace this with something less conservative
- return getFull();
+ return fromKnownBits(toKnownBits() ^ Other.toKnownBits(), /*IsSigned*/false);
}
ConstantRange
diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp
index b862a159127f..0bf5e09d6647 100644
--- a/llvm/lib/IR/Constants.cpp
+++ b/llvm/lib/IR/Constants.cpp
@@ -11,12 +11,12 @@
//===----------------------------------------------------------------------===//
#include "llvm/IR/Constants.h"
-#include "ConstantFold.h"
#include "LLVMContextImpl.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/ConstantFold.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
@@ -27,7 +27,6 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
@@ -353,26 +352,14 @@ Constant *Constant::getNullValue(Type *Ty) {
case Type::IntegerTyID:
return ConstantInt::get(Ty, 0);
case Type::HalfTyID:
- return ConstantFP::get(Ty->getContext(),
- APFloat::getZero(APFloat::IEEEhalf()));
case Type::BFloatTyID:
- return ConstantFP::get(Ty->getContext(),
- APFloat::getZero(APFloat::BFloat()));
case Type::FloatTyID:
- return ConstantFP::get(Ty->getContext(),
- APFloat::getZero(APFloat::IEEEsingle()));
case Type::DoubleTyID:
- return ConstantFP::get(Ty->getContext(),
- APFloat::getZero(APFloat::IEEEdouble()));
case Type::X86_FP80TyID:
- return ConstantFP::get(Ty->getContext(),
- APFloat::getZero(APFloat::x87DoubleExtended()));
case Type::FP128TyID:
- return ConstantFP::get(Ty->getContext(),
- APFloat::getZero(APFloat::IEEEquad()));
case Type::PPC_FP128TyID:
- return ConstantFP::get(Ty->getContext(), APFloat(APFloat::PPCDoubleDouble(),
- APInt::getZero(128)));
+ return ConstantFP::get(Ty->getContext(),
+ APFloat::getZero(Ty->getFltSemantics()));
case Type::PointerTyID:
return ConstantPointerNull::get(cast<PointerType>(Ty));
case Type::StructTyID:
@@ -560,8 +547,6 @@ void llvm::deleteConstant(Constant *C) {
delete static_cast<InsertElementConstantExpr *>(C);
else if (isa<ShuffleVectorConstantExpr>(C))
delete static_cast<ShuffleVectorConstantExpr *>(C);
- else if (isa<ExtractValueConstantExpr>(C))
- delete static_cast<ExtractValueConstantExpr *>(C);
else if (isa<InsertValueConstantExpr>(C))
delete static_cast<InsertValueConstantExpr *>(C);
else if (isa<GetElementPtrConstantExpr>(C))
@@ -577,38 +562,47 @@ void llvm::deleteConstant(Constant *C) {
}
static bool canTrapImpl(const Constant *C,
- SmallPtrSetImpl<const ConstantExpr *> &NonTrappingOps) {
- assert(C->getType()->isFirstClassType() && "Cannot evaluate aggregate vals!");
- // The only thing that could possibly trap are constant exprs.
+ SmallPtrSetImpl<const Constant *> &NonTrappingOps) {
+ assert(C->getType()->isFirstClassType() &&
+ "Cannot evaluate non-first-class types!");
+ // ConstantExpr or ConstantAggregate trap if any operands can trap.
+ if (isa<ConstantExpr>(C) || isa<ConstantAggregate>(C)) {
+ for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
+ const Constant *Op = cast<Constant>(C->getOperand(i));
+ if (isa<ConstantExpr>(Op) || isa<ConstantAggregate>(Op)) {
+ if (NonTrappingOps.insert(Op).second && canTrapImpl(Op, NonTrappingOps))
+ return true;
+ }
+ }
+ }
+
+ // The only leafs that can trap are constant expressions.
const ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
if (!CE)
return false;
- // ConstantExpr traps if any operands can trap.
- for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
- if (ConstantExpr *Op = dyn_cast<ConstantExpr>(CE->getOperand(i))) {
- if (NonTrappingOps.insert(Op).second && canTrapImpl(Op, NonTrappingOps))
- return true;
- }
- }
-
// Otherwise, only specific operations can trap.
switch (CE->getOpcode()) {
default:
return false;
- case Instruction::UDiv:
case Instruction::SDiv:
- case Instruction::URem:
case Instruction::SRem:
- // Div and rem can trap if the RHS is not known to be non-zero.
- if (!isa<ConstantInt>(CE->getOperand(1)) ||CE->getOperand(1)->isNullValue())
+ // Signed div/rem can trap for SignedMin / -1.
+ if (!CE->getOperand(0)->isNotMinSignedValue() &&
+ (!isa<ConstantInt>(CE->getOperand(1)) ||
+ CE->getOperand(1)->isAllOnesValue()))
return true;
- return false;
+ LLVM_FALLTHROUGH;
+ case Instruction::UDiv:
+ case Instruction::URem:
+ // Div and rem can trap if the RHS is not known to be non-zero.
+ return !isa<ConstantInt>(CE->getOperand(1)) ||
+ CE->getOperand(1)->isNullValue();
}
}
bool Constant::canTrap() const {
- SmallPtrSet<const ConstantExpr *, 4> NonTrappingOps;
+ SmallPtrSet<const Constant *, 4> NonTrappingOps;
return canTrapImpl(this, NonTrappingOps);
}
@@ -742,9 +736,13 @@ static bool constantIsDead(const Constant *C, bool RemoveDeadUsers) {
++I;
}
- if (RemoveDeadUsers)
+ if (RemoveDeadUsers) {
+ // If C is only used by metadata, it should not be preserved but should
+ // have its uses replaced.
+ ReplaceableMetadataImpl::SalvageDebugInfo(*C);
const_cast<Constant *>(C)->destroyConstant();
-
+ }
+
return true;
}
@@ -1046,9 +1044,9 @@ Constant *ConstantFP::getSNaN(Type *Ty, bool Negative, APInt *Payload) {
return C;
}
-Constant *ConstantFP::getNegativeZero(Type *Ty) {
+Constant *ConstantFP::getZero(Type *Ty, bool Negative) {
const fltSemantics &Semantics = Ty->getScalarType()->getFltSemantics();
- APFloat NegZero = APFloat::getZero(Semantics, /*Negative=*/true);
+ APFloat NegZero = APFloat::getZero(Semantics, Negative);
Constant *C = get(Ty->getContext(), NegZero);
if (VectorType *VTy = dyn_cast<VectorType>(Ty))
@@ -1057,7 +1055,6 @@ Constant *ConstantFP::getNegativeZero(Type *Ty) {
return C;
}
-
Constant *ConstantFP::getZeroValueForNegation(Type *Ty) {
if (Ty->isFPOrFPVectorTy())
return getNegativeZero(Ty);
@@ -1492,15 +1489,10 @@ bool ConstantExpr::isCompare() const {
}
bool ConstantExpr::hasIndices() const {
- return getOpcode() == Instruction::ExtractValue ||
- getOpcode() == Instruction::InsertValue;
+ return getOpcode() == Instruction::InsertValue;
}
ArrayRef<unsigned> ConstantExpr::getIndices() const {
- if (const ExtractValueConstantExpr *EVCE =
- dyn_cast<ExtractValueConstantExpr>(this))
- return EVCE->Indices;
-
return cast<InsertValueConstantExpr>(this)->Indices;
}
@@ -1550,8 +1542,6 @@ Constant *ConstantExpr::getWithOperands(ArrayRef<Constant *> Ops, Type *Ty,
case Instruction::InsertValue:
return ConstantExpr::getInsertValue(Ops[0], Ops[1], getIndices(),
OnlyIfReducedTy);
- case Instruction::ExtractValue:
- return ConstantExpr::getExtractValue(Ops[0], getIndices(), OnlyIfReducedTy);
case Instruction::FNeg:
return ConstantExpr::getFNeg(Ops[0]);
case Instruction::ShuffleVector:
@@ -2065,6 +2055,17 @@ Constant *ConstantExpr::getTruncOrBitCast(Constant *C, Type *Ty) {
return getTrunc(C, Ty);
}
+Constant *ConstantExpr::getSExtOrTrunc(Constant *C, Type *Ty) {
+ assert(C->getType()->isIntOrIntVectorTy() && Ty->isIntOrIntVectorTy() &&
+ "Can only sign extend/truncate integers!");
+ Type *CTy = C->getType();
+ if (CTy->getScalarSizeInBits() < Ty->getScalarSizeInBits())
+ return getSExt(C, Ty);
+ if (CTy->getScalarSizeInBits() > Ty->getScalarSizeInBits())
+ return getTrunc(C, Ty);
+ return C;
+}
+
Constant *ConstantExpr::getPointerCast(Constant *S, Type *Ty) {
assert(S->getType()->isPtrOrPtrVectorTy() && "Invalid cast");
assert((Ty->isIntOrIntVectorTy() || Ty->isPtrOrPtrVectorTy()) &&
@@ -2233,8 +2234,8 @@ Constant *ConstantExpr::getPtrToInt(Constant *C, Type *DstTy,
"PtrToInt destination must be integer or integer vector");
assert(isa<VectorType>(C->getType()) == isa<VectorType>(DstTy));
if (isa<VectorType>(C->getType()))
- assert(cast<FixedVectorType>(C->getType())->getNumElements() ==
- cast<FixedVectorType>(DstTy)->getNumElements() &&
+ assert(cast<VectorType>(C->getType())->getElementCount() ==
+ cast<VectorType>(DstTy)->getElementCount() &&
"Invalid cast between a different number of vector elements");
return getFoldedCast(Instruction::PtrToInt, C, DstTy, OnlyIfReduced);
}
@@ -2667,30 +2668,6 @@ Constant *ConstantExpr::getInsertValue(Constant *Agg, Constant *Val,
return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
}
-Constant *ConstantExpr::getExtractValue(Constant *Agg, ArrayRef<unsigned> Idxs,
- Type *OnlyIfReducedTy) {
- assert(Agg->getType()->isFirstClassType() &&
- "Tried to create extractelement operation on non-first-class type!");
-
- Type *ReqTy = ExtractValueInst::getIndexedType(Agg->getType(), Idxs);
- (void)ReqTy;
- assert(ReqTy && "extractvalue indices invalid!");
-
- assert(Agg->getType()->isFirstClassType() &&
- "Non-first-class type for constant extractvalue expression");
- if (Constant *FC = ConstantFoldExtractValueInstruction(Agg, Idxs))
- return FC;
-
- if (OnlyIfReducedTy == ReqTy)
- return nullptr;
-
- Constant *ArgVec[] = { Agg };
- const ConstantExprKeyType Key(Instruction::ExtractValue, ArgVec, 0, 0, Idxs);
-
- LLVMContextImpl *pImpl = Agg->getContext().pImpl;
- return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
-}
-
Constant *ConstantExpr::getNeg(Constant *C, bool HasNUW, bool HasNSW) {
assert(C->getType()->isIntOrIntVectorTy() &&
"Cannot NEG a nonintegral value!");
@@ -2833,7 +2810,7 @@ Constant *ConstantExpr::getExactLogBase2(Constant *C) {
}
Constant *ConstantExpr::getBinOpIdentity(unsigned Opcode, Type *Ty,
- bool AllowRHSConstant) {
+ bool AllowRHSConstant, bool NSZ) {
assert(Instruction::isBinaryOp(Opcode) && "Only binops allowed");
// Commutative opcodes: it does not matter if AllowRHSConstant is set.
@@ -2848,8 +2825,7 @@ Constant *ConstantExpr::getBinOpIdentity(unsigned Opcode, Type *Ty,
case Instruction::And: // X & -1 = X
return Constant::getAllOnesValue(Ty);
case Instruction::FAdd: // X + -0.0 = X
- // TODO: If the fadd has 'nsz', should we return +0.0?
- return ConstantFP::getNegativeZero(Ty);
+ return ConstantFP::getZero(Ty, !NSZ);
case Instruction::FMul: // X * 1.0 = X
return ConstantFP::get(Ty, 1.0);
default:
@@ -3544,8 +3520,6 @@ Instruction *ConstantExpr::getAsInstruction(Instruction *InsertBefore) const {
case Instruction::InsertValue:
return InsertValueInst::Create(Ops[0], Ops[1], getIndices(), "",
InsertBefore);
- case Instruction::ExtractValue:
- return ExtractValueInst::Create(Ops[0], getIndices(), "", InsertBefore);
case Instruction::ShuffleVector:
return new ShuffleVectorInst(Ops[0], Ops[1], getShuffleMask(), "",
InsertBefore);
diff --git a/llvm/lib/IR/ConstantsContext.h b/llvm/lib/IR/ConstantsContext.h
index 4056c5748081..21ef1c0d9f64 100644
--- a/llvm/lib/IR/ConstantsContext.h
+++ b/llvm/lib/IR/ConstantsContext.h
@@ -209,36 +209,6 @@ public:
}
};
-/// ExtractValueConstantExpr - This class is private to
-/// Constants.cpp, and is used behind the scenes to implement
-/// extractvalue constant exprs.
-class ExtractValueConstantExpr final : public ConstantExpr {
-public:
- ExtractValueConstantExpr(Constant *Agg, ArrayRef<unsigned> IdxList,
- Type *DestTy)
- : ConstantExpr(DestTy, Instruction::ExtractValue, &Op<0>(), 1),
- Indices(IdxList.begin(), IdxList.end()) {
- Op<0>() = Agg;
- }
-
- // allocate space for exactly one operand
- void *operator new(size_t S) { return User::operator new(S, 1); }
- void operator delete(void *Ptr) { User::operator delete(Ptr); }
-
- /// Indices - These identify which value to extract.
- const SmallVector<unsigned, 4> Indices;
-
- /// Transparently provide more efficient getOperand methods.
- DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-
- static bool classof(const ConstantExpr *CE) {
- return CE->getOpcode() == Instruction::ExtractValue;
- }
- static bool classof(const Value *V) {
- return isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V));
- }
-};
-
/// InsertValueConstantExpr - This class is private to
/// Constants.cpp, and is used behind the scenes to implement
/// insertvalue constant exprs.
@@ -363,11 +333,6 @@ struct OperandTraits<ShuffleVectorConstantExpr>
DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ShuffleVectorConstantExpr, Value)
template <>
-struct OperandTraits<ExtractValueConstantExpr>
- : public FixedNumOperandTraits<ExtractValueConstantExpr, 1> {};
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ExtractValueConstantExpr, Value)
-
-template <>
struct OperandTraits<InsertValueConstantExpr>
: public FixedNumOperandTraits<InsertValueConstantExpr, 2> {};
DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertValueConstantExpr, Value)
@@ -620,8 +585,6 @@ public:
return new ShuffleVectorConstantExpr(Ops[0], Ops[1], ShuffleMask);
case Instruction::InsertValue:
return new InsertValueConstantExpr(Ops[0], Ops[1], Indexes, Ty);
- case Instruction::ExtractValue:
- return new ExtractValueConstantExpr(Ops[0], Indexes, Ty);
case Instruction::GetElementPtr:
return GetElementPtrConstantExpr::Create(ExplicitTy, Ops[0], Ops.slice(1),
Ty, SubclassOptionalData);
diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp
index 7ed156d552b1..4b9189ca5baa 100644
--- a/llvm/lib/IR/Core.cpp
+++ b/llvm/lib/IR/Core.cpp
@@ -115,6 +115,10 @@ void LLVMContextSetDiscardValueNames(LLVMContextRef C, LLVMBool Discard) {
unwrap(C)->setDiscardValueNames(Discard);
}
+void LLVMContextSetOpaquePointers(LLVMContextRef C, LLVMBool OpaquePointers) {
+ unwrap(C)->setOpaquePointers(OpaquePointers);
+}
+
void LLVMContextDispose(LLVMContextRef C) {
delete unwrap(C);
}
@@ -534,6 +538,8 @@ LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty) {
return LLVMTokenTypeKind;
case Type::ScalableVectorTyID:
return LLVMScalableVectorTypeKind;
+ case Type::DXILPointerTyID:
+ llvm_unreachable("DXIL pointers are unsupported via the C API");
}
llvm_unreachable("Unhandled TypeID.");
}
@@ -786,6 +792,10 @@ LLVMTypeRef LLVMPointerType(LLVMTypeRef ElementType, unsigned AddressSpace) {
return wrap(PointerType::get(unwrap(ElementType), AddressSpace));
}
+LLVMBool LLVMPointerTypeIsOpaque(LLVMTypeRef Ty) {
+ return unwrap(Ty)->isOpaquePointerTy();
+}
+
LLVMTypeRef LLVMVectorType(LLVMTypeRef ElementType, unsigned ElementCount) {
return wrap(FixedVectorType::get(unwrap(ElementType), ElementCount));
}
@@ -798,7 +808,7 @@ LLVMTypeRef LLVMScalableVectorType(LLVMTypeRef ElementType,
LLVMTypeRef LLVMGetElementType(LLVMTypeRef WrappedTy) {
auto *Ty = unwrap<Type>(WrappedTy);
if (auto *PTy = dyn_cast<PointerType>(Ty))
- return wrap(PTy->getPointerElementType());
+ return wrap(PTy->getNonOpaquePointerElementType());
if (auto *ATy = dyn_cast<ArrayType>(Ty))
return wrap(ATy->getElementType());
return wrap(cast<VectorType>(Ty)->getElementType());
@@ -822,6 +832,10 @@ unsigned LLVMGetVectorSize(LLVMTypeRef VectorTy) {
/*--.. Operations on other types ...........................................--*/
+LLVMTypeRef LLVMPointerTypeInContext(LLVMContextRef C, unsigned AddressSpace) {
+ return wrap(PointerType::get(*unwrap(C), AddressSpace));
+}
+
LLVMTypeRef LLVMVoidTypeInContext(LLVMContextRef C) {
return wrap(Type::getVoidTy(*unwrap(C)));
}
@@ -1431,6 +1445,10 @@ LLVMValueRef LLVMConstString(const char *Str, unsigned Length,
DontNullTerminate);
}
+LLVMValueRef LLVMGetAggregateElement(LLVMValueRef C, unsigned Idx) {
+ return wrap(unwrap<Constant>(C)->getAggregateElement(Idx));
+}
+
LLVMValueRef LLVMGetElementAsConstant(LLVMValueRef C, unsigned idx) {
return wrap(unwrap<ConstantDataSequential>(C)->getElementAsConstant(idx));
}
@@ -1857,12 +1875,6 @@ LLVMValueRef LLVMConstShuffleVector(LLVMValueRef VectorAConstant,
IntMask));
}
-LLVMValueRef LLVMConstExtractValue(LLVMValueRef AggConstant, unsigned *IdxList,
- unsigned NumIdx) {
- return wrap(ConstantExpr::getExtractValue(unwrap<Constant>(AggConstant),
- makeArrayRef(IdxList, NumIdx)));
-}
-
LLVMValueRef LLVMConstInsertValue(LLVMValueRef AggConstant,
LLVMValueRef ElementValueConstant,
unsigned *IdxList, unsigned NumIdx) {
@@ -2061,13 +2073,13 @@ LLVMTypeRef LLVMGlobalGetValueType(LLVMValueRef Global) {
unsigned LLVMGetAlignment(LLVMValueRef V) {
Value *P = unwrap<Value>(V);
if (GlobalObject *GV = dyn_cast<GlobalObject>(P))
- return GV->getAlignment();
+ return GV->getAlign() ? GV->getAlign()->value() : 0;
if (AllocaInst *AI = dyn_cast<AllocaInst>(P))
- return AI->getAlignment();
+ return AI->getAlign().value();
if (LoadInst *LI = dyn_cast<LoadInst>(P))
- return LI->getAlignment();
+ return LI->getAlign().value();
if (StoreInst *SI = dyn_cast<StoreInst>(P))
- return SI->getAlignment();
+ return SI->getAlign().value();
if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(P))
return RMWI->getAlign().value();
if (AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(P))
@@ -3919,6 +3931,12 @@ LLVMValueRef LLVMBuildFPCast(LLVMBuilderRef B, LLVMValueRef Val,
return wrap(unwrap(B)->CreateFPCast(unwrap(Val), unwrap(DestTy), Name));
}
+LLVMOpcode LLVMGetCastOpcode(LLVMValueRef Src, LLVMBool SrcIsSigned,
+ LLVMTypeRef DestTy, LLVMBool DestIsSigned) {
+ return map_to_llvmopcode(CastInst::getCastOpcode(
+ unwrap(Src), SrcIsSigned, unwrap(DestTy), DestIsSigned));
+}
+
/*--.. Comparisons .........................................................--*/
LLVMValueRef LLVMBuildICmp(LLVMBuilderRef B, LLVMIntPredicate Op,
diff --git a/llvm/lib/IR/DIBuilder.cpp b/llvm/lib/IR/DIBuilder.cpp
index dc5768dd4f26..34ffc9425281 100644
--- a/llvm/lib/IR/DIBuilder.cpp
+++ b/llvm/lib/IR/DIBuilder.cpp
@@ -19,7 +19,6 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
using namespace llvm;
using namespace llvm::dwarf;
@@ -293,6 +292,22 @@ DIStringType *DIBuilder::createStringType(StringRef Name, uint64_t SizeInBits) {
SizeInBits, 0);
}
+DIStringType *DIBuilder::createStringType(StringRef Name,
+ DIVariable *StringLength,
+ DIExpression *StrLocationExp) {
+ assert(!Name.empty() && "Unable to create type without name");
+ return DIStringType::get(VMContext, dwarf::DW_TAG_string_type, Name,
+ StringLength, nullptr, StrLocationExp, 0, 0, 0);
+}
+
+DIStringType *DIBuilder::createStringType(StringRef Name,
+ DIExpression *StringLengthExp,
+ DIExpression *StrLocationExp) {
+ assert(!Name.empty() && "Unable to create type without name");
+ return DIStringType::get(VMContext, dwarf::DW_TAG_string_type, Name, nullptr,
+ StringLengthExp, StrLocationExp, 0, 0, 0);
+}
+
DIDerivedType *DIBuilder::createQualifiedType(unsigned Tag, DIType *FromTy) {
return DIDerivedType::get(VMContext, Tag, "", nullptr, 0, nullptr, FromTy, 0,
0, 0, None, DINode::FlagZero);
@@ -831,14 +846,15 @@ DISubprogram *DIBuilder::createFunction(
unsigned LineNo, DISubroutineType *Ty, unsigned ScopeLine,
DINode::DIFlags Flags, DISubprogram::DISPFlags SPFlags,
DITemplateParameterArray TParams, DISubprogram *Decl,
- DITypeArray ThrownTypes, DINodeArray Annotations) {
+ DITypeArray ThrownTypes, DINodeArray Annotations,
+ StringRef TargetFuncName) {
bool IsDefinition = SPFlags & DISubprogram::SPFlagDefinition;
auto *Node = getSubprogram(
/*IsDistinct=*/IsDefinition, VMContext, getNonCompileUnitScope(Context),
Name, LinkageName, File, LineNo, Ty, ScopeLine, nullptr, 0, 0, Flags,
SPFlags, IsDefinition ? CUNode : nullptr, TParams, Decl,
MDTuple::getTemporary(VMContext, None).release(), ThrownTypes,
- Annotations);
+ Annotations, TargetFuncName);
if (IsDefinition)
AllSubprograms.push_back(Node);
diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp
index b9fc5261fefe..50799327c78a 100644
--- a/llvm/lib/IR/DebugInfoMetadata.cpp
+++ b/llvm/lib/IR/DebugInfoMetadata.cpp
@@ -15,6 +15,7 @@
#include "MetadataImpl.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
@@ -26,7 +27,7 @@ using namespace llvm;
namespace llvm {
// Use FS-AFDO discriminator.
cl::opt<bool> EnableFSDiscriminator(
- "enable-fs-discriminator", cl::Hidden, cl::init(false),
+ "enable-fs-discriminator", cl::Hidden,
cl::desc("Enable adding flow sensitive discriminators"));
} // namespace llvm
@@ -77,8 +78,8 @@ DILocation *DILocation::getImpl(LLVMContext &Context, unsigned Line,
Ops.push_back(Scope);
if (InlinedAt)
Ops.push_back(InlinedAt);
- return storeImpl(new (Ops.size()) DILocation(Context, Storage, Line, Column,
- Ops, ImplicitCode),
+ return storeImpl(new (Ops.size(), Storage) DILocation(
+ Context, Storage, Line, Column, Ops, ImplicitCode),
Storage, Context.pImpl->DILocations);
}
@@ -180,6 +181,7 @@ void DILocation::decodeDiscriminator(unsigned D, unsigned &BD, unsigned &DF,
CI = getUnsignedFromPrefixEncoding(
getNextComponentInDiscriminator(getNextComponentInDiscriminator(D)));
}
+dwarf::Tag DINode::getTag() const { return (dwarf::Tag)SubclassData16; }
DINode::DIFlags DINode::getFlag(StringRef Flag) {
return StringSwitch<DIFlags>(Flag)
@@ -282,6 +284,7 @@ static bool isCanonical(const MDString *S) {
}
#endif
+dwarf::Tag GenericDINode::getTag() const { return (dwarf::Tag)SubclassData16; }
GenericDINode *GenericDINode::getImpl(LLVMContext &Context, unsigned Tag,
MDString *Header,
ArrayRef<Metadata *> DwarfOps,
@@ -301,7 +304,7 @@ GenericDINode *GenericDINode::getImpl(LLVMContext &Context, unsigned Tag,
// Use a nullptr for empty headers.
assert(isCanonical(Header) && "Expected canonical MDString");
Metadata *PreOps[] = {Header};
- return storeImpl(new (DwarfOps.size() + 1) GenericDINode(
+ return storeImpl(new (DwarfOps.size() + 1, Storage) GenericDINode(
Context, Storage, Hash, Tag, PreOps, DwarfOps),
Storage, Context.pImpl->GenericDINodes);
}
@@ -326,20 +329,25 @@ void GenericDINode::recalculateHash() {
} \
} while (false)
#define DEFINE_GETIMPL_STORE(CLASS, ARGS, OPS) \
- return storeImpl(new (array_lengthof(OPS)) \
+ return storeImpl(new (array_lengthof(OPS), Storage) \
CLASS(Context, Storage, UNWRAP_ARGS(ARGS), OPS), \
Storage, Context.pImpl->CLASS##s)
#define DEFINE_GETIMPL_STORE_NO_OPS(CLASS, ARGS) \
- return storeImpl(new (0u) CLASS(Context, Storage, UNWRAP_ARGS(ARGS)), \
+ return storeImpl(new (0u, Storage) \
+ CLASS(Context, Storage, UNWRAP_ARGS(ARGS)), \
Storage, Context.pImpl->CLASS##s)
#define DEFINE_GETIMPL_STORE_NO_CONSTRUCTOR_ARGS(CLASS, OPS) \
- return storeImpl(new (array_lengthof(OPS)) CLASS(Context, Storage, OPS), \
+ return storeImpl(new (array_lengthof(OPS), Storage) \
+ CLASS(Context, Storage, OPS), \
Storage, Context.pImpl->CLASS##s)
#define DEFINE_GETIMPL_STORE_N(CLASS, ARGS, OPS, NUM_OPS) \
- return storeImpl(new (NUM_OPS) \
+ return storeImpl(new (NUM_OPS, Storage) \
CLASS(Context, Storage, UNWRAP_ARGS(ARGS), OPS), \
Storage, Context.pImpl->CLASS##s)
+DISubrange::DISubrange(LLVMContext &C, StorageType Storage,
+ ArrayRef<Metadata *> Ops)
+ : DINode(C, DISubrangeKind, Storage, dwarf::DW_TAG_subrange_type, Ops) {}
DISubrange *DISubrange::getImpl(LLVMContext &Context, int64_t Count, int64_t Lo,
StorageType Storage, bool ShouldCreate) {
auto *CountNode = ConstantAsMetadata::get(
@@ -450,6 +458,10 @@ DISubrange::BoundType DISubrange::getStride() const {
return BoundType();
}
+DIGenericSubrange::DIGenericSubrange(LLVMContext &C, StorageType Storage,
+ ArrayRef<Metadata *> Ops)
+ : DINode(C, DIGenericSubrangeKind, Storage, dwarf::DW_TAG_generic_subrange,
+ Ops) {}
DIGenericSubrange *DIGenericSubrange::getImpl(LLVMContext &Context,
Metadata *CountNode, Metadata *LB,
@@ -529,6 +541,13 @@ DIGenericSubrange::BoundType DIGenericSubrange::getStride() const {
return BoundType();
}
+DIEnumerator::DIEnumerator(LLVMContext &C, StorageType Storage,
+ const APInt &Value, bool IsUnsigned,
+ ArrayRef<Metadata *> Ops)
+ : DINode(C, DIEnumeratorKind, Storage, dwarf::DW_TAG_enumerator, Ops),
+ Value(Value) {
+ SubclassData32 = IsUnsigned;
+}
DIEnumerator *DIEnumerator::getImpl(LLVMContext &Context, const APInt &Value,
bool IsUnsigned, MDString *Name,
StorageType Storage, bool ShouldCreate) {
@@ -580,6 +599,36 @@ DIStringType *DIStringType::getImpl(LLVMContext &Context, unsigned Tag,
DEFINE_GETIMPL_STORE(DIStringType, (Tag, SizeInBits, AlignInBits, Encoding),
Ops);
}
+DIType *DIDerivedType::getClassType() const {
+ assert(getTag() == dwarf::DW_TAG_ptr_to_member_type);
+ return cast_or_null<DIType>(getExtraData());
+}
+uint32_t DIDerivedType::getVBPtrOffset() const {
+ assert(getTag() == dwarf::DW_TAG_inheritance);
+ if (auto *CM = cast_or_null<ConstantAsMetadata>(getExtraData()))
+ if (auto *CI = dyn_cast_or_null<ConstantInt>(CM->getValue()))
+ return static_cast<uint32_t>(CI->getZExtValue());
+ return 0;
+}
+Constant *DIDerivedType::getStorageOffsetInBits() const {
+ assert(getTag() == dwarf::DW_TAG_member && isBitField());
+ if (auto *C = cast_or_null<ConstantAsMetadata>(getExtraData()))
+ return C->getValue();
+ return nullptr;
+}
+
+Constant *DIDerivedType::getConstant() const {
+ assert(getTag() == dwarf::DW_TAG_member && isStaticMember());
+ if (auto *C = cast_or_null<ConstantAsMetadata>(getExtraData()))
+ return C->getValue();
+ return nullptr;
+}
+Constant *DIDerivedType::getDiscriminantValue() const {
+ assert(getTag() == dwarf::DW_TAG_member && !isStaticMember());
+ if (auto *C = cast_or_null<ConstantAsMetadata>(getExtraData()))
+ return C->getValue();
+ return nullptr;
+}
DIDerivedType *DIDerivedType::getImpl(
LLVMContext &Context, unsigned Tag, MDString *Name, Metadata *File,
@@ -701,6 +750,12 @@ DICompositeType *DICompositeType::getODRTypeIfExists(LLVMContext &Context,
return nullptr;
return Context.pImpl->DITypeMap->lookup(&Identifier);
}
+DISubroutineType::DISubroutineType(LLVMContext &C, StorageType Storage,
+ DIFlags Flags, uint8_t CC,
+ ArrayRef<Metadata *> Ops)
+ : DIType(C, DISubroutineTypeKind, Storage, dwarf::DW_TAG_subroutine_type, 0,
+ 0, 0, 0, Flags, Ops),
+ CC(CC) {}
DISubroutineType *DISubroutineType::getImpl(LLVMContext &Context, DIFlags Flags,
uint8_t CC, Metadata *TypeArray,
@@ -711,6 +766,12 @@ DISubroutineType *DISubroutineType::getImpl(LLVMContext &Context, DIFlags Flags,
DEFINE_GETIMPL_STORE(DISubroutineType, (Flags, CC), Ops);
}
+DIFile::DIFile(LLVMContext &C, StorageType Storage,
+ Optional<ChecksumInfo<MDString *>> CS, Optional<MDString *> Src,
+ ArrayRef<Metadata *> Ops)
+ : DIScope(C, DIFileKind, Storage, dwarf::DW_TAG_file_type, Ops),
+ Checksum(CS), Source(Src) {}
+
// FIXME: Implement this string-enum correspondence with a .def file and macros,
// so that the association is explicit rather than implied.
static const char *ChecksumKindName[DIFile::CSK_Last] = {
@@ -746,9 +807,23 @@ DIFile *DIFile::getImpl(LLVMContext &Context, MDString *Filename,
assert((!Source || isCanonical(*Source)) && "Expected canonical MDString");
DEFINE_GETIMPL_LOOKUP(DIFile, (Filename, Directory, CS, Source));
Metadata *Ops[] = {Filename, Directory, CS ? CS->Value : nullptr,
- Source.getValueOr(nullptr)};
+ Source.value_or(nullptr)};
DEFINE_GETIMPL_STORE(DIFile, (CS, Source), Ops);
}
+DICompileUnit::DICompileUnit(LLVMContext &C, StorageType Storage,
+ unsigned SourceLanguage, bool IsOptimized,
+ unsigned RuntimeVersion, unsigned EmissionKind,
+ uint64_t DWOId, bool SplitDebugInlining,
+ bool DebugInfoForProfiling, unsigned NameTableKind,
+ bool RangesBaseAddress, ArrayRef<Metadata *> Ops)
+ : DIScope(C, DICompileUnitKind, Storage, dwarf::DW_TAG_compile_unit, Ops),
+ SourceLanguage(SourceLanguage), IsOptimized(IsOptimized),
+ RuntimeVersion(RuntimeVersion), EmissionKind(EmissionKind), DWOId(DWOId),
+ SplitDebugInlining(SplitDebugInlining),
+ DebugInfoForProfiling(DebugInfoForProfiling),
+ NameTableKind(NameTableKind), RangesBaseAddress(RangesBaseAddress) {
+ assert(Storage != Uniqued);
+}
DICompileUnit *DICompileUnit::getImpl(
LLVMContext &Context, unsigned SourceLanguage, Metadata *File,
@@ -775,7 +850,7 @@ DICompileUnit *DICompileUnit::getImpl(
Macros,
SysRoot,
SDK};
- return storeImpl(new (array_lengthof(Ops)) DICompileUnit(
+ return storeImpl(new (array_lengthof(Ops), Storage) DICompileUnit(
Context, Storage, SourceLanguage, IsOptimized,
RuntimeVersion, EmissionKind, DWOId, SplitDebugInlining,
DebugInfoForProfiling, NameTableKind, RangesBaseAddress,
@@ -827,6 +902,30 @@ const char *DICompileUnit::nameTableKindString(DebugNameTableKind NTK) {
}
return nullptr;
}
+DISubprogram::DISubprogram(LLVMContext &C, StorageType Storage, unsigned Line,
+ unsigned ScopeLine, unsigned VirtualIndex,
+ int ThisAdjustment, DIFlags Flags, DISPFlags SPFlags,
+ ArrayRef<Metadata *> Ops)
+ : DILocalScope(C, DISubprogramKind, Storage, dwarf::DW_TAG_subprogram, Ops),
+ Line(Line), ScopeLine(ScopeLine), VirtualIndex(VirtualIndex),
+ ThisAdjustment(ThisAdjustment), Flags(Flags), SPFlags(SPFlags) {
+ static_assert(dwarf::DW_VIRTUALITY_max < 4, "Virtuality out of range");
+}
+DISubprogram::DISPFlags
+DISubprogram::toSPFlags(bool IsLocalToUnit, bool IsDefinition, bool IsOptimized,
+ unsigned Virtuality, bool IsMainSubprogram) {
+ // We're assuming virtuality is the low-order field.
+ static_assert(int(SPFlagVirtual) == int(dwarf::DW_VIRTUALITY_virtual) &&
+ int(SPFlagPureVirtual) ==
+ int(dwarf::DW_VIRTUALITY_pure_virtual),
+ "Virtuality constant mismatch");
+ return static_cast<DISPFlags>(
+ (Virtuality & SPFlagVirtuality) |
+ (IsLocalToUnit ? SPFlagLocalToUnit : SPFlagZero) |
+ (IsDefinition ? SPFlagDefinition : SPFlagZero) |
+ (IsOptimized ? SPFlagOptimized : SPFlagZero) |
+ (IsMainSubprogram ? SPFlagMainSubprogram : SPFlagZero));
+}
DISubprogram *DILocalScope::getSubprogram() const {
if (auto *Block = dyn_cast<DILexicalBlockBase>(this))
@@ -881,27 +980,33 @@ DISubprogram *DISubprogram::getImpl(
unsigned ScopeLine, Metadata *ContainingType, unsigned VirtualIndex,
int ThisAdjustment, DIFlags Flags, DISPFlags SPFlags, Metadata *Unit,
Metadata *TemplateParams, Metadata *Declaration, Metadata *RetainedNodes,
- Metadata *ThrownTypes, Metadata *Annotations, StorageType Storage,
- bool ShouldCreate) {
+ Metadata *ThrownTypes, Metadata *Annotations, MDString *TargetFuncName,
+ StorageType Storage, bool ShouldCreate) {
assert(isCanonical(Name) && "Expected canonical MDString");
assert(isCanonical(LinkageName) && "Expected canonical MDString");
+ assert(isCanonical(TargetFuncName) && "Expected canonical MDString");
DEFINE_GETIMPL_LOOKUP(DISubprogram,
(Scope, Name, LinkageName, File, Line, Type, ScopeLine,
ContainingType, VirtualIndex, ThisAdjustment, Flags,
SPFlags, Unit, TemplateParams, Declaration,
- RetainedNodes, ThrownTypes, Annotations));
- SmallVector<Metadata *, 12> Ops = {
+ RetainedNodes, ThrownTypes, Annotations,
+ TargetFuncName));
+ SmallVector<Metadata *, 13> Ops = {
File, Scope, Name, LinkageName,
Type, Unit, Declaration, RetainedNodes,
- ContainingType, TemplateParams, ThrownTypes, Annotations};
- if (!Annotations) {
+ ContainingType, TemplateParams, ThrownTypes, Annotations,
+ TargetFuncName};
+ if (!TargetFuncName) {
Ops.pop_back();
- if (!ThrownTypes) {
+ if (!Annotations) {
Ops.pop_back();
- if (!TemplateParams) {
+ if (!ThrownTypes) {
Ops.pop_back();
- if (!ContainingType)
+ if (!TemplateParams) {
Ops.pop_back();
+ if (!ContainingType)
+ Ops.pop_back();
+ }
}
}
}
@@ -915,6 +1020,10 @@ bool DISubprogram::describes(const Function *F) const {
assert(F && "Invalid function");
return F->getSubprogram() == this;
}
+DILexicalBlockBase::DILexicalBlockBase(LLVMContext &C, unsigned ID,
+ StorageType Storage,
+ ArrayRef<Metadata *> Ops)
+ : DILocalScope(C, ID, Storage, dwarf::DW_TAG_lexical_block, Ops) {}
DILexicalBlock *DILexicalBlock::getImpl(LLVMContext &Context, Metadata *Scope,
Metadata *File, unsigned Line,
@@ -940,6 +1049,10 @@ DILexicalBlockFile *DILexicalBlockFile::getImpl(LLVMContext &Context,
DEFINE_GETIMPL_STORE(DILexicalBlockFile, (Discriminator), Ops);
}
+DINamespace::DINamespace(LLVMContext &Context, StorageType Storage,
+ bool ExportSymbols, ArrayRef<Metadata *> Ops)
+ : DIScope(Context, DINamespaceKind, Storage, dwarf::DW_TAG_namespace, Ops),
+ ExportSymbols(ExportSymbols) {}
DINamespace *DINamespace::getImpl(LLVMContext &Context, Metadata *Scope,
MDString *Name, bool ExportSymbols,
StorageType Storage, bool ShouldCreate) {
@@ -950,6 +1063,11 @@ DINamespace *DINamespace::getImpl(LLVMContext &Context, Metadata *Scope,
DEFINE_GETIMPL_STORE(DINamespace, (ExportSymbols), Ops);
}
+DICommonBlock::DICommonBlock(LLVMContext &Context, StorageType Storage,
+ unsigned LineNo, ArrayRef<Metadata *> Ops)
+ : DIScope(Context, DICommonBlockKind, Storage, dwarf::DW_TAG_common_block,
+ Ops),
+ LineNo(LineNo) {}
DICommonBlock *DICommonBlock::getImpl(LLVMContext &Context, Metadata *Scope,
Metadata *Decl, MDString *Name,
Metadata *File, unsigned LineNo,
@@ -961,6 +1079,10 @@ DICommonBlock *DICommonBlock::getImpl(LLVMContext &Context, Metadata *Scope,
DEFINE_GETIMPL_STORE(DICommonBlock, (LineNo), Ops);
}
+DIModule::DIModule(LLVMContext &Context, StorageType Storage, unsigned LineNo,
+ bool IsDecl, ArrayRef<Metadata *> Ops)
+ : DIScope(Context, DIModuleKind, Storage, dwarf::DW_TAG_module, Ops),
+ LineNo(LineNo), IsDecl(IsDecl) {}
DIModule *DIModule::getImpl(LLVMContext &Context, Metadata *File,
Metadata *Scope, MDString *Name,
MDString *ConfigurationMacros,
@@ -974,6 +1096,13 @@ DIModule *DIModule::getImpl(LLVMContext &Context, Metadata *File,
IncludePath, APINotesFile};
DEFINE_GETIMPL_STORE(DIModule, (LineNo, IsDecl), Ops);
}
+DITemplateTypeParameter::DITemplateTypeParameter(LLVMContext &Context,
+ StorageType Storage,
+ bool IsDefault,
+ ArrayRef<Metadata *> Ops)
+ : DITemplateParameter(Context, DITemplateTypeParameterKind, Storage,
+ dwarf::DW_TAG_template_type_parameter, IsDefault,
+ Ops) {}
DITemplateTypeParameter *
DITemplateTypeParameter::getImpl(LLVMContext &Context, MDString *Name,
@@ -1039,6 +1168,11 @@ DILocalVariable::getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name,
DEFINE_GETIMPL_STORE(DILocalVariable, (Line, Arg, Flags, AlignInBits), Ops);
}
+DIVariable::DIVariable(LLVMContext &C, unsigned ID, StorageType Storage,
+ signed Line, ArrayRef<Metadata *> Ops,
+ uint32_t AlignInBits)
+ : DINode(C, ID, Storage, dwarf::DW_TAG_variable, Ops), Line(Line),
+ AlignInBits(AlignInBits) {}
Optional<uint64_t> DIVariable::getSizeInBits() const {
// This is used by the Verifier so be mindful of broken types.
const Metadata *RawType = getRawType();
@@ -1062,6 +1196,9 @@ Optional<uint64_t> DIVariable::getSizeInBits() const {
return None;
}
+DILabel::DILabel(LLVMContext &C, StorageType Storage, unsigned Line,
+ ArrayRef<Metadata *> Ops)
+ : DINode(C, DILabelKind, Storage, dwarf::DW_TAG_label, Ops), Line(Line) {}
DILabel *DILabel::getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name,
Metadata *File, unsigned Line, StorageType Storage,
bool ShouldCreate) {
@@ -1078,6 +1215,12 @@ DIExpression *DIExpression::getImpl(LLVMContext &Context,
DEFINE_GETIMPL_LOOKUP(DIExpression, (Elements));
DEFINE_GETIMPL_STORE_NO_OPS(DIExpression, (Elements));
}
+bool DIExpression::isEntryValue() const {
+ return getNumElements() > 0 && getElement(0) == dwarf::DW_OP_LLVM_entry_value;
+}
+bool DIExpression::startsWithDeref() const {
+ return getNumElements() > 0 && getElement(0) == dwarf::DW_OP_deref;
+}
unsigned DIExpression::ExprOperand::getSize() const {
uint64_t Op = getOp();
@@ -1439,7 +1582,7 @@ DIExpression *DIExpression::appendToStack(const DIExpression *Expr,
//
// Match .* DW_OP_stack_value (DW_OP_LLVM_fragment A B)?.
Optional<FragmentInfo> FI = Expr->getFragmentInfo();
- unsigned DropUntilStackValue = FI.hasValue() ? 3 : 0;
+ unsigned DropUntilStackValue = FI ? 3 : 0;
ArrayRef<uint64_t> ExprOpsBeforeFragment =
Expr->getElements().drop_back(DropUntilStackValue);
bool NeedsDeref = (Expr->getNumElements() > DropUntilStackValue) &&
@@ -1597,6 +1740,11 @@ DIGlobalVariableExpression::getImpl(LLVMContext &Context, Metadata *Variable,
Metadata *Ops[] = {Variable, Expression};
DEFINE_GETIMPL_STORE_NO_CONSTRUCTOR_ARGS(DIGlobalVariableExpression, Ops);
}
+DIObjCProperty::DIObjCProperty(LLVMContext &C, StorageType Storage,
+ unsigned Line, unsigned Attributes,
+ ArrayRef<Metadata *> Ops)
+ : DINode(C, DIObjCPropertyKind, Storage, dwarf::DW_TAG_APPLE_property, Ops),
+ Line(Line), Attributes(Attributes) {}
DIObjCProperty *DIObjCProperty::getImpl(
LLVMContext &Context, MDString *Name, Metadata *File, unsigned Line,
diff --git a/llvm/lib/IR/DiagnosticHandler.cpp b/llvm/lib/IR/DiagnosticHandler.cpp
index 7b40728a34e8..683eade50291 100644
--- a/llvm/lib/IR/DiagnosticHandler.cpp
+++ b/llvm/lib/IR/DiagnosticHandler.cpp
@@ -47,8 +47,7 @@ static cl::opt<PassRemarksOpt, true, cl::parser<std::string>> PassRemarks(
"pass-remarks", cl::value_desc("pattern"),
cl::desc("Enable optimization remarks from passes whose name match "
"the given regular expression"),
- cl::Hidden, cl::location(PassRemarksPassedOptLoc), cl::ValueRequired,
- cl::ZeroOrMore);
+ cl::Hidden, cl::location(PassRemarksPassedOptLoc), cl::ValueRequired);
// -pass-remarks-missed
// Command line flag to enable emitOptimizationRemarkMissed()
@@ -56,8 +55,7 @@ static cl::opt<PassRemarksOpt, true, cl::parser<std::string>> PassRemarksMissed(
"pass-remarks-missed", cl::value_desc("pattern"),
cl::desc("Enable missed optimization remarks from passes whose name match "
"the given regular expression"),
- cl::Hidden, cl::location(PassRemarksMissedOptLoc), cl::ValueRequired,
- cl::ZeroOrMore);
+ cl::Hidden, cl::location(PassRemarksMissedOptLoc), cl::ValueRequired);
// -pass-remarks-analysis
// Command line flag to enable emitOptimizationRemarkAnalysis()
@@ -67,8 +65,7 @@ static cl::opt<PassRemarksOpt, true, cl::parser<std::string>>
cl::desc(
"Enable optimization analysis remarks from passes whose name match "
"the given regular expression"),
- cl::Hidden, cl::location(PassRemarksAnalysisOptLoc), cl::ValueRequired,
- cl::ZeroOrMore);
+ cl::Hidden, cl::location(PassRemarksAnalysisOptLoc), cl::ValueRequired);
}
bool DiagnosticHandler::isAnalysisRemarkEnabled(StringRef PassName) const {
diff --git a/llvm/lib/IR/DiagnosticInfo.cpp b/llvm/lib/IR/DiagnosticInfo.cpp
index f46f0fdd947d..50fe6829ad86 100644
--- a/llvm/lib/IR/DiagnosticInfo.cpp
+++ b/llvm/lib/IR/DiagnosticInfo.cpp
@@ -393,6 +393,17 @@ std::string DiagnosticInfoOptimizationBase::getMsg() const {
return OS.str();
}
+DiagnosticInfoMisExpect::DiagnosticInfoMisExpect(const Instruction *Inst,
+ Twine &Msg)
+ : DiagnosticInfoWithLocationBase(DK_MisExpect, DS_Warning,
+ *Inst->getParent()->getParent(),
+ Inst->getDebugLoc()),
+ Msg(Msg) {}
+
+void DiagnosticInfoMisExpect::print(DiagnosticPrinter &DP) const {
+ DP << getLocationStr() << ": " << getMsg();
+}
+
void OptimizationRemarkAnalysisFPCommute::anchor() {}
void OptimizationRemarkAnalysisAliasing::anchor() {}
diff --git a/llvm/lib/IR/Dominators.cpp b/llvm/lib/IR/Dominators.cpp
index aac8936c7bd6..09be2a8ef605 100644
--- a/llvm/lib/IR/Dominators.cpp
+++ b/llvm/lib/IR/Dominators.cpp
@@ -25,7 +25,6 @@
#include "llvm/PassRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
diff --git a/llvm/lib/IR/FPEnv.cpp b/llvm/lib/IR/FPEnv.cpp
index c6e0938e71a6..48ee84080e98 100644
--- a/llvm/lib/IR/FPEnv.cpp
+++ b/llvm/lib/IR/FPEnv.cpp
@@ -14,6 +14,9 @@
#include "llvm/IR/FPEnv.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
namespace llvm {
@@ -82,4 +85,46 @@ convertExceptionBehaviorToStr(fp::ExceptionBehavior UseExcept) {
}
return ExceptStr;
}
+
+Intrinsic::ID getConstrainedIntrinsicID(const Instruction &Instr) {
+ Intrinsic::ID IID = Intrinsic::not_intrinsic;
+ switch (Instr.getOpcode()) {
+ case Instruction::FCmp:
+ // Unlike other instructions FCmp can be mapped to one of two intrinsic
+ // functions. We choose the non-signaling variant.
+ IID = Intrinsic::experimental_constrained_fcmp;
+ break;
+
+ // Instructions
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
+ case Instruction::NAME: \
+ IID = Intrinsic::INTRINSIC; \
+ break;
+#define FUNCTION(NAME, NARG, ROUND_MODE, INTRINSIC)
+#define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN)
+#include "llvm/IR/ConstrainedOps.def"
+
+ // Intrinsic calls.
+ case Instruction::Call:
+ if (auto *IntrinCall = dyn_cast<IntrinsicInst>(&Instr)) {
+ switch (IntrinCall->getIntrinsicID()) {
+#define FUNCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
+ case Intrinsic::NAME: \
+ IID = Intrinsic::INTRINSIC; \
+ break;
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC)
+#define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN)
+#include "llvm/IR/ConstrainedOps.def"
+ default:
+ break;
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
+ return IID;
+}
+
} // namespace llvm
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index 726ba80da41b..53df94366760 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -36,6 +36,7 @@
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/IntrinsicsBPF.h"
+#include "llvm/IR/IntrinsicsDirectX.h"
#include "llvm/IR/IntrinsicsHexagon.h"
#include "llvm/IR/IntrinsicsMips.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
@@ -339,8 +340,9 @@ Function *Function::createWithDefaultAttr(FunctionType *Ty,
Module *M) {
auto *F = new Function(Ty, Linkage, AddrSpace, N, M);
AttrBuilder B(F->getContext());
- if (M->getUwtable())
- B.addAttribute(Attribute::UWTable);
+ UWTableKind UWTable = M->getUwtable();
+ if (UWTable != UWTableKind::None)
+ B.addUWTableAttr(UWTable);
switch (M->getFramePointer()) {
case FramePointerKind::None:
// 0 ("none") is the default.
@@ -926,25 +928,25 @@ std::string Intrinsic::getNameNoUnnamedTypes(ID Id, ArrayRef<Type *> Tys) {
enum IIT_Info {
// Common values should be encoded with 0-15.
IIT_Done = 0,
- IIT_I1 = 1,
- IIT_I8 = 2,
- IIT_I16 = 3,
- IIT_I32 = 4,
- IIT_I64 = 5,
- IIT_F16 = 6,
- IIT_F32 = 7,
- IIT_F64 = 8,
- IIT_V2 = 9,
- IIT_V4 = 10,
- IIT_V8 = 11,
- IIT_V16 = 12,
- IIT_V32 = 13,
- IIT_PTR = 14,
- IIT_ARG = 15,
+ IIT_I1 = 1,
+ IIT_I8 = 2,
+ IIT_I16 = 3,
+ IIT_I32 = 4,
+ IIT_I64 = 5,
+ IIT_F16 = 6,
+ IIT_F32 = 7,
+ IIT_F64 = 8,
+ IIT_V2 = 9,
+ IIT_V4 = 10,
+ IIT_V8 = 11,
+ IIT_V16 = 12,
+ IIT_V32 = 13,
+ IIT_PTR = 14,
+ IIT_ARG = 15,
// Values from 16+ are only encodable with the inefficient encoding.
- IIT_V64 = 16,
- IIT_MMX = 17,
+ IIT_V64 = 16,
+ IIT_MMX = 17,
IIT_TOKEN = 18,
IIT_METADATA = 19,
IIT_EMPTYSTRUCT = 20,
@@ -955,7 +957,7 @@ enum IIT_Info {
IIT_EXTEND_ARG = 25,
IIT_TRUNC_ARG = 26,
IIT_ANYPTR = 27,
- IIT_V1 = 28,
+ IIT_V1 = 28,
IIT_VARARG = 29,
IIT_HALF_VEC_ARG = 30,
IIT_SAME_VEC_WIDTH_ARG = 31,
@@ -978,11 +980,14 @@ enum IIT_Info {
IIT_BF16 = 48,
IIT_STRUCT9 = 49,
IIT_V256 = 50,
- IIT_AMX = 51,
+ IIT_AMX = 51,
IIT_PPCF128 = 52,
IIT_V3 = 53,
IIT_EXTERNREF = 54,
- IIT_FUNCREF = 55
+ IIT_FUNCREF = 55,
+ IIT_ANYPTR_TO_ELT = 56,
+ IIT_I2 = 57,
+ IIT_I4 = 58,
};
static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
@@ -1035,6 +1040,12 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
case IIT_I1:
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 1));
return;
+ case IIT_I2:
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 2));
+ return;
+ case IIT_I4:
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 4));
+ return;
case IIT_I8:
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 8));
return;
@@ -1156,6 +1167,13 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
OutputTable.push_back(IITDescriptor::get(IITDescriptor::PtrToElt, ArgInfo));
return;
}
+ case IIT_ANYPTR_TO_ELT: {
+ unsigned short ArgNo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
+ unsigned short RefNo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
+ OutputTable.push_back(
+ IITDescriptor::get(IITDescriptor::AnyPtrToElt, ArgNo, RefNo));
+ return;
+ }
case IIT_VEC_OF_ANYPTRS_TO_ELT: {
unsigned short ArgNo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
unsigned short RefNo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
@@ -1347,6 +1365,9 @@ static Type *DecodeFixedType(ArrayRef<Intrinsic::IITDescriptor> &Infos,
case IITDescriptor::VecOfAnyPtrsToElt:
// Return the overloaded type (which determines the pointers address space)
return Tys[D.getOverloadArgNumber()];
+ case IITDescriptor::AnyPtrToElt:
+ // Return the overloaded type (which determines the pointers address space)
+ return Tys[D.getOverloadArgNumber()];
}
llvm_unreachable("unhandled");
}
@@ -1406,10 +1427,10 @@ Function *Intrinsic::getDeclaration(Module *M, ID id, ArrayRef<Type*> Tys) {
.getCallee());
}
-// This defines the "Intrinsic::getIntrinsicForGCCBuiltin()" method.
-#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+// This defines the "Intrinsic::getIntrinsicForClangBuiltin()" method.
+#define GET_LLVM_INTRINSIC_FOR_CLANG_BUILTIN
#include "llvm/IR/IntrinsicImpl.inc"
-#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+#undef GET_LLVM_INTRINSIC_FOR_CLANG_BUILTIN
// This defines the "Intrinsic::getIntrinsicForMSBuiltin()" method.
#define GET_LLVM_INTRINSIC_FOR_MS_BUILTIN
@@ -1463,19 +1484,37 @@ static bool matchIntrinsicType(
PointerType *PT = dyn_cast<PointerType>(Ty);
if (!PT || PT->getAddressSpace() != D.Pointer_AddressSpace)
return true;
- if (!PT->isOpaque())
+ if (!PT->isOpaque()) {
+ /* Manually consume a pointer to empty struct descriptor, which is
+ * used for externref. We don't want to enforce that the struct is
+ * anonymous in this case. (This renders externref intrinsics
+ * non-unique, but this will go away with opaque pointers anyway.) */
+ if (Infos.front().Kind == IITDescriptor::Struct &&
+ Infos.front().Struct_NumElements == 0) {
+ Infos = Infos.slice(1);
+ return false;
+ }
return matchIntrinsicType(PT->getNonOpaquePointerElementType(), Infos,
ArgTys, DeferredChecks, IsDeferredCheck);
+ }
// Consume IIT descriptors relating to the pointer element type.
- while (Infos.front().Kind == IITDescriptor::Pointer)
+ // FIXME: Intrinsic type matching of nested single value types or even
+ // aggregates doesn't work properly with opaque pointers but hopefully
+ // doesn't happen in practice.
+ while (Infos.front().Kind == IITDescriptor::Pointer ||
+ Infos.front().Kind == IITDescriptor::Vector)
Infos = Infos.slice(1);
+ assert((Infos.front().Kind != IITDescriptor::Argument ||
+ Infos.front().getArgumentKind() == IITDescriptor::AK_MatchType) &&
+ "Unsupported polymorphic pointer type with opaque pointer");
Infos = Infos.slice(1);
return false;
}
case IITDescriptor::Struct: {
StructType *ST = dyn_cast<StructType>(Ty);
- if (!ST || ST->getNumElements() != D.Struct_NumElements)
+ if (!ST || !ST->isLiteral() || ST->isPacked() ||
+ ST->getNumElements() != D.Struct_NumElements)
return true;
for (unsigned i = 0, e = D.Struct_NumElements; i != e; ++i)
@@ -1587,6 +1626,30 @@ static bool matchIntrinsicType(
return !ThisArgType->isOpaqueOrPointeeTypeMatches(
ReferenceType->getElementType());
}
+ case IITDescriptor::AnyPtrToElt: {
+ unsigned RefArgNumber = D.getRefArgNumber();
+ if (RefArgNumber >= ArgTys.size()) {
+ if (IsDeferredCheck)
+ return true;
+ // If forward referencing, already add the pointer type and
+ // defer the checks for later.
+ ArgTys.push_back(Ty);
+ return DeferCheck(Ty);
+ }
+
+ if (!IsDeferredCheck) {
+ assert(D.getOverloadArgNumber() == ArgTys.size() &&
+ "Table consistency error");
+ ArgTys.push_back(Ty);
+ }
+
+ auto *ReferenceType = dyn_cast<VectorType>(ArgTys[RefArgNumber]);
+ auto *ThisArgType = dyn_cast<PointerType>(Ty);
+ if (!ThisArgType || !ReferenceType)
+ return true;
+ return !ThisArgType->isOpaqueOrPointeeTypeMatches(
+ ReferenceType->getElementType());
+ }
case IITDescriptor::VecOfAnyPtrsToElt: {
unsigned RefArgNumber = D.getRefArgNumber();
if (RefArgNumber >= ArgTys.size()) {
@@ -1802,7 +1865,7 @@ bool Function::hasAddressTaken(const User **PutOffender,
*PutOffender = FU;
return true;
}
- if (!Call->isCallee(&U)) {
+ if (!Call->isCallee(&U) || Call->getFunctionType() != getFunctionType()) {
if (IgnoreARCAttachedCall &&
Call->isOperandBundleOfType(LLVMContext::OB_clang_arc_attachedcall,
U.getOperandNo()))
@@ -1909,7 +1972,7 @@ void Function::setEntryCount(ProfileCount Count,
const DenseSet<GlobalValue::GUID> *S) {
#if !defined(NDEBUG)
auto PrevCount = getEntryCount();
- assert(!PrevCount.hasValue() || PrevCount->getType() == Count.getType());
+ assert(!PrevCount || PrevCount->getType() == Count.getType());
#endif
auto ImportGUIDs = getImportGUIDs();
diff --git a/llvm/lib/IR/GVMaterializer.cpp b/llvm/lib/IR/GVMaterializer.cpp
index 35397309a103..dc3b0e0fc236 100644
--- a/llvm/lib/IR/GVMaterializer.cpp
+++ b/llvm/lib/IR/GVMaterializer.cpp
@@ -14,4 +14,4 @@
#include "llvm/IR/GVMaterializer.h"
using namespace llvm;
-GVMaterializer::~GVMaterializer() {}
+GVMaterializer::~GVMaterializer() = default;
diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp
index 47e8bc0a916d..3265050261c8 100644
--- a/llvm/lib/IR/Globals.cpp
+++ b/llvm/lib/IR/Globals.cpp
@@ -67,6 +67,10 @@ void GlobalValue::copyAttributesFrom(const GlobalValue *Src) {
setDLLStorageClass(Src->getDLLStorageClass());
setDSOLocal(Src->isDSOLocal());
setPartition(Src->getPartition());
+ if (Src->hasSanitizerMetadata())
+ setSanitizerMetadata(Src->getSanitizerMetadata());
+ else
+ removeSanitizerMetadata();
}
void GlobalValue::removeFromParent() {
@@ -217,6 +221,25 @@ void GlobalValue::setPartition(StringRef S) {
HasPartition = !S.empty();
}
+using SanitizerMetadata = GlobalValue::SanitizerMetadata;
+const SanitizerMetadata &GlobalValue::getSanitizerMetadata() const {
+ assert(hasSanitizerMetadata());
+ assert(getContext().pImpl->GlobalValueSanitizerMetadata.count(this));
+ return getContext().pImpl->GlobalValueSanitizerMetadata[this];
+}
+
+void GlobalValue::setSanitizerMetadata(SanitizerMetadata Meta) {
+ getContext().pImpl->GlobalValueSanitizerMetadata[this] = Meta;
+ HasSanitizerMetadata = true;
+}
+
+void GlobalValue::removeSanitizerMetadata() {
+ DenseMap<const GlobalValue *, SanitizerMetadata> &MetadataMap =
+ getContext().pImpl->GlobalValueSanitizerMetadata;
+ MetadataMap.erase(this);
+ HasSanitizerMetadata = false;
+}
+
StringRef GlobalObject::getSectionImpl() const {
assert(hasSection());
return getContext().pImpl->GlobalObjectSections[this];
@@ -262,7 +285,7 @@ bool GlobalObject::canIncreaseAlignment() const {
// alignment specified. (If it is assigned a section, the global
// could be densely packed with other objects in the section, and
// increasing the alignment could cause padding issues.)
- if (hasSection() && getAlign().hasValue())
+ if (hasSection() && getAlign())
return false;
// On ELF platforms, we're further restricted in that we can't
diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp
index 4e8f1b506811..d0c622fe2389 100644
--- a/llvm/lib/IR/IRBuilder.cpp
+++ b/llvm/lib/IR/IRBuilder.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/None.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
@@ -68,6 +69,21 @@ Value *IRBuilderBase::getCastedInt8PtrValue(Value *Ptr) {
return CreateBitCast(Ptr, getInt8PtrTy(PT->getAddressSpace()));
}
+DebugLoc IRBuilderBase::getCurrentDebugLocation() const {
+ for (auto &KV : MetadataToCopy)
+ if (KV.first == LLVMContext::MD_dbg)
+ return {cast<DILocation>(KV.second)};
+
+ return {};
+}
+void IRBuilderBase::SetInstDebugLocation(Instruction *I) const {
+ for (const auto &KV : MetadataToCopy)
+ if (KV.first == LLVMContext::MD_dbg) {
+ I->setDebugLoc(DebugLoc(KV.second));
+ return;
+ }
+}
+
static CallInst *createCallHelper(Function *Callee, ArrayRef<Value *> Ops,
IRBuilderBase *Builder,
const Twine &Name = "",
@@ -133,7 +149,36 @@ CallInst *IRBuilderBase::CreateMemSet(Value *Ptr, Value *Val, Value *Size,
CallInst *CI = createCallHelper(TheFn, Ops, this);
if (Align)
- cast<MemSetInst>(CI)->setDestAlignment(Align->value());
+ cast<MemSetInst>(CI)->setDestAlignment(*Align);
+
+ // Set the TBAA info if present.
+ if (TBAATag)
+ CI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+
+ if (ScopeTag)
+ CI->setMetadata(LLVMContext::MD_alias_scope, ScopeTag);
+
+ if (NoAliasTag)
+ CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag);
+
+ return CI;
+}
+
+CallInst *IRBuilderBase::CreateMemSetInline(Value *Dst, MaybeAlign DstAlign,
+ Value *Val, Value *Size,
+ bool IsVolatile, MDNode *TBAATag,
+ MDNode *ScopeTag,
+ MDNode *NoAliasTag) {
+ Dst = getCastedInt8PtrValue(Dst);
+ Value *Ops[] = {Dst, Val, Size, getInt1(IsVolatile)};
+ Type *Tys[] = {Dst->getType(), Size->getType()};
+ Module *M = BB->getParent()->getParent();
+ Function *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset_inline, Tys);
+
+ CallInst *CI = createCallHelper(TheFn, Ops, this);
+
+ if (DstAlign)
+ cast<MemSetInlineInst>(CI)->setDestAlignment(*DstAlign);
// Set the TBAA info if present.
if (TBAATag)
@@ -672,34 +717,29 @@ getStatepointBundles(Optional<ArrayRef<T1>> TransitionArgs,
template <typename T0, typename T1, typename T2, typename T3>
static CallInst *CreateGCStatepointCallCommon(
IRBuilderBase *Builder, uint64_t ID, uint32_t NumPatchBytes,
- Value *ActualCallee, uint32_t Flags, ArrayRef<T0> CallArgs,
- Optional<ArrayRef<T1>> TransitionArgs,
- Optional<ArrayRef<T2>> DeoptArgs, ArrayRef<T3> GCArgs,
- const Twine &Name) {
- // Extract out the type of the callee.
- auto *FuncPtrType = cast<PointerType>(ActualCallee->getType());
- assert(isa<FunctionType>(FuncPtrType->getPointerElementType()) &&
- "actual callee must be a callable value");
-
+ FunctionCallee ActualCallee, uint32_t Flags, ArrayRef<T0> CallArgs,
+ Optional<ArrayRef<T1>> TransitionArgs, Optional<ArrayRef<T2>> DeoptArgs,
+ ArrayRef<T3> GCArgs, const Twine &Name) {
Module *M = Builder->GetInsertBlock()->getParent()->getParent();
// Fill in the one generic type'd argument (the function is also vararg)
- Type *ArgTypes[] = { FuncPtrType };
Function *FnStatepoint =
- Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_statepoint,
- ArgTypes);
-
- std::vector<Value *> Args =
- getStatepointArgs(*Builder, ID, NumPatchBytes, ActualCallee, Flags,
- CallArgs);
-
- return Builder->CreateCall(FnStatepoint, Args,
- getStatepointBundles(TransitionArgs, DeoptArgs,
- GCArgs),
- Name);
+ Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_statepoint,
+ {ActualCallee.getCallee()->getType()});
+
+ std::vector<Value *> Args = getStatepointArgs(
+ *Builder, ID, NumPatchBytes, ActualCallee.getCallee(), Flags, CallArgs);
+
+ CallInst *CI = Builder->CreateCall(
+ FnStatepoint, Args,
+ getStatepointBundles(TransitionArgs, DeoptArgs, GCArgs), Name);
+ CI->addParamAttr(2,
+ Attribute::get(Builder->getContext(), Attribute::ElementType,
+ ActualCallee.getFunctionType()));
+ return CI;
}
CallInst *IRBuilderBase::CreateGCStatepointCall(
- uint64_t ID, uint32_t NumPatchBytes, Value *ActualCallee,
+ uint64_t ID, uint32_t NumPatchBytes, FunctionCallee ActualCallee,
ArrayRef<Value *> CallArgs, Optional<ArrayRef<Value *>> DeoptArgs,
ArrayRef<Value *> GCArgs, const Twine &Name) {
return CreateGCStatepointCallCommon<Value *, Value *, Value *, Value *>(
@@ -708,17 +748,17 @@ CallInst *IRBuilderBase::CreateGCStatepointCall(
}
CallInst *IRBuilderBase::CreateGCStatepointCall(
- uint64_t ID, uint32_t NumPatchBytes, Value *ActualCallee, uint32_t Flags,
- ArrayRef<Value *> CallArgs, Optional<ArrayRef<Use>> TransitionArgs,
- Optional<ArrayRef<Use>> DeoptArgs, ArrayRef<Value *> GCArgs,
- const Twine &Name) {
+ uint64_t ID, uint32_t NumPatchBytes, FunctionCallee ActualCallee,
+ uint32_t Flags, ArrayRef<Value *> CallArgs,
+ Optional<ArrayRef<Use>> TransitionArgs, Optional<ArrayRef<Use>> DeoptArgs,
+ ArrayRef<Value *> GCArgs, const Twine &Name) {
return CreateGCStatepointCallCommon<Value *, Use, Use, Value *>(
this, ID, NumPatchBytes, ActualCallee, Flags, CallArgs, TransitionArgs,
DeoptArgs, GCArgs, Name);
}
CallInst *IRBuilderBase::CreateGCStatepointCall(
- uint64_t ID, uint32_t NumPatchBytes, Value *ActualCallee,
+ uint64_t ID, uint32_t NumPatchBytes, FunctionCallee ActualCallee,
ArrayRef<Use> CallArgs, Optional<ArrayRef<Value *>> DeoptArgs,
ArrayRef<Value *> GCArgs, const Twine &Name) {
return CreateGCStatepointCallCommon<Use, Value *, Value *, Value *>(
@@ -729,32 +769,31 @@ CallInst *IRBuilderBase::CreateGCStatepointCall(
template <typename T0, typename T1, typename T2, typename T3>
static InvokeInst *CreateGCStatepointInvokeCommon(
IRBuilderBase *Builder, uint64_t ID, uint32_t NumPatchBytes,
- Value *ActualInvokee, BasicBlock *NormalDest, BasicBlock *UnwindDest,
- uint32_t Flags, ArrayRef<T0> InvokeArgs,
+ FunctionCallee ActualInvokee, BasicBlock *NormalDest,
+ BasicBlock *UnwindDest, uint32_t Flags, ArrayRef<T0> InvokeArgs,
Optional<ArrayRef<T1>> TransitionArgs, Optional<ArrayRef<T2>> DeoptArgs,
ArrayRef<T3> GCArgs, const Twine &Name) {
- // Extract out the type of the callee.
- auto *FuncPtrType = cast<PointerType>(ActualInvokee->getType());
- assert(isa<FunctionType>(FuncPtrType->getPointerElementType()) &&
- "actual callee must be a callable value");
-
Module *M = Builder->GetInsertBlock()->getParent()->getParent();
// Fill in the one generic type'd argument (the function is also vararg)
- Function *FnStatepoint = Intrinsic::getDeclaration(
- M, Intrinsic::experimental_gc_statepoint, {FuncPtrType});
+ Function *FnStatepoint =
+ Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_statepoint,
+ {ActualInvokee.getCallee()->getType()});
std::vector<Value *> Args =
- getStatepointArgs(*Builder, ID, NumPatchBytes, ActualInvokee, Flags,
- InvokeArgs);
+ getStatepointArgs(*Builder, ID, NumPatchBytes, ActualInvokee.getCallee(),
+ Flags, InvokeArgs);
- return Builder->CreateInvoke(FnStatepoint, NormalDest, UnwindDest, Args,
- getStatepointBundles(TransitionArgs, DeoptArgs,
- GCArgs),
- Name);
+ InvokeInst *II = Builder->CreateInvoke(
+ FnStatepoint, NormalDest, UnwindDest, Args,
+ getStatepointBundles(TransitionArgs, DeoptArgs, GCArgs), Name);
+ II->addParamAttr(2,
+ Attribute::get(Builder->getContext(), Attribute::ElementType,
+ ActualInvokee.getFunctionType()));
+ return II;
}
InvokeInst *IRBuilderBase::CreateGCStatepointInvoke(
- uint64_t ID, uint32_t NumPatchBytes, Value *ActualInvokee,
+ uint64_t ID, uint32_t NumPatchBytes, FunctionCallee ActualInvokee,
BasicBlock *NormalDest, BasicBlock *UnwindDest,
ArrayRef<Value *> InvokeArgs, Optional<ArrayRef<Value *>> DeoptArgs,
ArrayRef<Value *> GCArgs, const Twine &Name) {
@@ -765,19 +804,21 @@ InvokeInst *IRBuilderBase::CreateGCStatepointInvoke(
}
InvokeInst *IRBuilderBase::CreateGCStatepointInvoke(
- uint64_t ID, uint32_t NumPatchBytes, Value *ActualInvokee,
+ uint64_t ID, uint32_t NumPatchBytes, FunctionCallee ActualInvokee,
BasicBlock *NormalDest, BasicBlock *UnwindDest, uint32_t Flags,
ArrayRef<Value *> InvokeArgs, Optional<ArrayRef<Use>> TransitionArgs,
- Optional<ArrayRef<Use>> DeoptArgs, ArrayRef<Value *> GCArgs, const Twine &Name) {
+ Optional<ArrayRef<Use>> DeoptArgs, ArrayRef<Value *> GCArgs,
+ const Twine &Name) {
return CreateGCStatepointInvokeCommon<Value *, Use, Use, Value *>(
this, ID, NumPatchBytes, ActualInvokee, NormalDest, UnwindDest, Flags,
InvokeArgs, TransitionArgs, DeoptArgs, GCArgs, Name);
}
InvokeInst *IRBuilderBase::CreateGCStatepointInvoke(
- uint64_t ID, uint32_t NumPatchBytes, Value *ActualInvokee,
+ uint64_t ID, uint32_t NumPatchBytes, FunctionCallee ActualInvokee,
BasicBlock *NormalDest, BasicBlock *UnwindDest, ArrayRef<Use> InvokeArgs,
- Optional<ArrayRef<Value *>> DeoptArgs, ArrayRef<Value *> GCArgs, const Twine &Name) {
+ Optional<ArrayRef<Value *>> DeoptArgs, ArrayRef<Value *> GCArgs,
+ const Twine &Name) {
return CreateGCStatepointInvokeCommon<Use, Value *, Value *, Value *>(
this, ID, NumPatchBytes, ActualInvokee, NormalDest, UnwindDest,
uint32_t(StatepointFlags::None), InvokeArgs, None, DeoptArgs, GCArgs,
@@ -785,31 +826,26 @@ InvokeInst *IRBuilderBase::CreateGCStatepointInvoke(
}
CallInst *IRBuilderBase::CreateGCResult(Instruction *Statepoint,
- Type *ResultType,
- const Twine &Name) {
- Intrinsic::ID ID = Intrinsic::experimental_gc_result;
- Module *M = BB->getParent()->getParent();
- Type *Types[] = {ResultType};
- Function *FnGCResult = Intrinsic::getDeclaration(M, ID, Types);
+ Type *ResultType, const Twine &Name) {
+ Intrinsic::ID ID = Intrinsic::experimental_gc_result;
+ Module *M = BB->getParent()->getParent();
+ Type *Types[] = {ResultType};
+ Function *FnGCResult = Intrinsic::getDeclaration(M, ID, Types);
- Value *Args[] = {Statepoint};
- return createCallHelper(FnGCResult, Args, this, Name);
+ Value *Args[] = {Statepoint};
+ return createCallHelper(FnGCResult, Args, this, Name);
}
CallInst *IRBuilderBase::CreateGCRelocate(Instruction *Statepoint,
- int BaseOffset,
- int DerivedOffset,
- Type *ResultType,
- const Twine &Name) {
- Module *M = BB->getParent()->getParent();
- Type *Types[] = {ResultType};
- Function *FnGCRelocate =
- Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_relocate, Types);
+ int BaseOffset, int DerivedOffset,
+ Type *ResultType, const Twine &Name) {
+ Module *M = BB->getParent()->getParent();
+ Type *Types[] = {ResultType};
+ Function *FnGCRelocate =
+ Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_relocate, Types);
- Value *Args[] = {Statepoint,
- getInt32(BaseOffset),
- getInt32(DerivedOffset)};
- return createCallHelper(FnGCRelocate, Args, this, Name);
+ Value *Args[] = {Statepoint, getInt32(BaseOffset), getInt32(DerivedOffset)};
+ return createCallHelper(FnGCRelocate, Args, this, Name);
}
CallInst *IRBuilderBase::CreateGCGetPointerBase(Value *DerivedPtr,
@@ -1262,8 +1298,8 @@ CallInst *IRBuilderBase::CreateAlignmentAssumption(const DataLayout &DL,
return CreateAlignmentAssumptionHelper(DL, PtrValue, Alignment, OffsetValue);
}
-IRBuilderDefaultInserter::~IRBuilderDefaultInserter() {}
-IRBuilderCallbackInserter::~IRBuilderCallbackInserter() {}
-IRBuilderFolder::~IRBuilderFolder() {}
+IRBuilderDefaultInserter::~IRBuilderDefaultInserter() = default;
+IRBuilderCallbackInserter::~IRBuilderCallbackInserter() = default;
+IRBuilderFolder::~IRBuilderFolder() = default;
void ConstantFolder::anchor() {}
void NoFolder::anchor() {}
diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp
index 36a20679863b..bf76c89f26ca 100644
--- a/llvm/lib/IR/Instruction.cpp
+++ b/llvm/lib/IR/Instruction.cpp
@@ -492,6 +492,9 @@ static bool haveSameSpecialState(const Instruction *I1, const Instruction *I2,
if (const ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I1))
return SVI->getShuffleMask() ==
cast<ShuffleVectorInst>(I2)->getShuffleMask();
+ if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I1))
+ return GEP->getSourceElementType() ==
+ cast<GetElementPtrInst>(I2)->getSourceElementType();
return true;
}
@@ -695,7 +698,7 @@ bool Instruction::mayHaveSideEffects() const {
bool Instruction::isSafeToRemove() const {
return (!isa<CallInst>(this) || !this->mayHaveSideEffects()) &&
- !this->isTerminator();
+ !this->isTerminator() && !this->isEHPad();
}
bool Instruction::willReturn() const {
diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index 7798af3b19b9..6a91edb75dd2 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -128,7 +128,7 @@ Value *PHINode::removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty) {
// If the PHI node is dead, because it has zero entries, nuke it now.
if (getNumOperands() == 0 && DeletePHIIfEmpty) {
// If anyone is using this PHI, make them use a dummy value instead...
- replaceAllUsesWith(UndefValue::get(getType()));
+ replaceAllUsesWith(PoisonValue::get(getType()));
eraseFromParent();
}
return Removed;
@@ -325,13 +325,13 @@ bool CallBase::isReturnNonNull() const {
return false;
}
-Value *CallBase::getReturnedArgOperand() const {
+Value *CallBase::getArgOperandWithAttribute(Attribute::AttrKind Kind) const {
unsigned Index;
- if (Attrs.hasAttrSomewhere(Attribute::Returned, &Index))
+ if (Attrs.hasAttrSomewhere(Kind, &Index))
return getArgOperand(Index - AttributeList::FirstArgIndex);
if (const Function *F = getCalledFunction())
- if (F->getAttributes().hasAttrSomewhere(Attribute::Returned, &Index))
+ if (F->getAttributes().hasAttrSomewhere(Kind, &Index))
return getArgOperand(Index - AttributeList::FirstArgIndex);
return nullptr;
@@ -372,6 +372,27 @@ bool CallBase::hasFnAttrOnCalledFunction(StringRef Kind) const {
return false;
}
+template <typename AK>
+Attribute CallBase::getFnAttrOnCalledFunction(AK Kind) const {
+ // Operand bundles override attributes on the called function, but don't
+ // override attributes directly present on the call instruction.
+ if (isFnAttrDisallowedByOpBundle(Kind))
+ return Attribute();
+ Value *V = getCalledOperand();
+ if (auto *CE = dyn_cast<ConstantExpr>(V))
+ if (CE->getOpcode() == BitCast)
+ V = CE->getOperand(0);
+
+ if (auto *F = dyn_cast<Function>(V))
+ return F->getAttributes().getFnAttr(Kind);
+
+ return Attribute();
+}
+
+template Attribute
+CallBase::getFnAttrOnCalledFunction(Attribute::AttrKind Kind) const;
+template Attribute CallBase::getFnAttrOnCalledFunction(StringRef Kind) const;
+
void CallBase::getOperandBundlesAsDefs(
SmallVectorImpl<OperandBundleDef> &Defs) const {
for (unsigned i = 0, e = getNumOperandBundles(); i != e; ++i)
@@ -482,9 +503,10 @@ CallBase *CallBase::removeOperandBundle(CallBase *CB, uint32_t ID,
bool CallBase::hasReadingOperandBundles() const {
// Implementation note: this is a conservative implementation of operand
- // bundle semantics, where *any* non-assume operand bundle forces a callsite
- // to be at least readonly.
- return hasOperandBundles() && getIntrinsicID() != Intrinsic::assume;
+ // bundle semantics, where *any* non-assume operand bundle (other than
+ // ptrauth) forces a callsite to be at least readonly.
+ return hasOperandBundlesOtherThan(LLVMContext::OB_ptrauth) &&
+ getIntrinsicID() != Intrinsic::assume;
}
//===----------------------------------------------------------------------===//
@@ -2194,7 +2216,13 @@ bool ShuffleVectorInst::isIdentityMask(ArrayRef<int> Mask) {
bool ShuffleVectorInst::isReverseMask(ArrayRef<int> Mask) {
if (!isSingleSourceMask(Mask))
return false;
- for (int i = 0, NumElts = Mask.size(); i < NumElts; ++i) {
+
+ // The number of elements in the mask must be at least 2.
+ int NumElts = Mask.size();
+ if (NumElts < 2)
+ return false;
+
+ for (int i = 0; i < NumElts; ++i) {
if (Mask[i] == -1)
continue;
if (Mask[i] != (NumElts - 1 - i) && Mask[i] != (NumElts + NumElts - 1 - i))
@@ -3060,16 +3088,18 @@ unsigned CastInst::isEliminableCastPair(
return 0;
}
case 8: {
- // ext, trunc -> bitcast, if the SrcTy and DstTy are same size
+ // ext, trunc -> bitcast, if the SrcTy and DstTy are the same
// ext, trunc -> ext, if sizeof(SrcTy) < sizeof(DstTy)
// ext, trunc -> trunc, if sizeof(SrcTy) > sizeof(DstTy)
unsigned SrcSize = SrcTy->getScalarSizeInBits();
unsigned DstSize = DstTy->getScalarSizeInBits();
- if (SrcSize == DstSize)
+ if (SrcTy == DstTy)
return Instruction::BitCast;
- else if (SrcSize < DstSize)
+ if (SrcSize < DstSize)
return firstOp;
- return secondOp;
+ if (SrcSize > DstSize)
+ return secondOp;
+ return 0;
}
case 9:
// zext, sext -> zext, because sext can't sign extend after zext
@@ -4447,7 +4477,7 @@ void SwitchInstProfUpdateWrapper::addCase(
Weights.getValue()[SI.getNumSuccessors() - 1] = *W;
} else if (Weights) {
Changed = true;
- Weights.getValue().push_back(W.getValueOr(0));
+ Weights.getValue().push_back(W.value_or(0));
}
if (Weights)
assert(SI.getNumSuccessors() == Weights->size() &&
@@ -4467,7 +4497,7 @@ SwitchInstProfUpdateWrapper::CaseWeightOpt
SwitchInstProfUpdateWrapper::getSuccessorWeight(unsigned idx) {
if (!Weights)
return None;
- return Weights.getValue()[idx];
+ return (*Weights)[idx];
}
void SwitchInstProfUpdateWrapper::setSuccessorWeight(
@@ -4479,7 +4509,7 @@ void SwitchInstProfUpdateWrapper::setSuccessorWeight(
Weights = SmallVector<uint32_t, 8>(SI.getNumSuccessors(), 0);
if (Weights) {
- auto &OldW = Weights.getValue()[idx];
+ auto &OldW = (*Weights)[idx];
if (*W != OldW) {
Changed = true;
OldW = *W;
diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp
index e27758c5de02..b132a9dcb812 100644
--- a/llvm/lib/IR/IntrinsicInst.cpp
+++ b/llvm/lib/IR/IntrinsicInst.cpp
@@ -236,8 +236,8 @@ bool ConstrainedFPIntrinsic::isDefaultFPEnvironment() const {
return true;
}
-FCmpInst::Predicate ConstrainedFPCmpIntrinsic::getPredicate() const {
- Metadata *MD = cast<MetadataAsValue>(getArgOperand(2))->getMetadata();
+static FCmpInst::Predicate getFPPredicateFromMD(const Value *Op) {
+ Metadata *MD = cast<MetadataAsValue>(Op)->getMetadata();
if (!MD || !isa<MDString>(MD))
return FCmpInst::BAD_FCMP_PREDICATE;
return StringSwitch<FCmpInst::Predicate>(cast<MDString>(MD)->getString())
@@ -258,6 +258,10 @@ FCmpInst::Predicate ConstrainedFPCmpIntrinsic::getPredicate() const {
.Default(FCmpInst::BAD_FCMP_PREDICATE);
}
+FCmpInst::Predicate ConstrainedFPCmpIntrinsic::getPredicate() const {
+ return getFPPredicateFromMD(getArgOperand(2));
+}
+
bool ConstrainedFPIntrinsic::isUnaryOp() const {
switch (getIntrinsicID()) {
default:
@@ -299,13 +303,18 @@ ElementCount VPIntrinsic::getStaticVectorLength() const {
};
Value *VPMask = getMaskParam();
- assert(VPMask && "No mask param?");
+ if (!VPMask) {
+ assert((getIntrinsicID() == Intrinsic::vp_merge ||
+ getIntrinsicID() == Intrinsic::vp_select) &&
+ "Unexpected VP intrinsic without mask operand");
+ return GetVectorLengthOfType(getType());
+ }
return GetVectorLengthOfType(VPMask->getType());
}
Value *VPIntrinsic::getMaskParam() const {
if (auto MaskPos = getMaskParamPos(getIntrinsicID()))
- return getArgOperand(MaskPos.getValue());
+ return getArgOperand(*MaskPos);
return nullptr;
}
@@ -316,7 +325,7 @@ void VPIntrinsic::setMaskParam(Value *NewMask) {
Value *VPIntrinsic::getVectorLengthParam() const {
if (auto EVLPos = getVectorLengthParamPos(getIntrinsicID()))
- return getArgOperand(EVLPos.getValue());
+ return getArgOperand(*EVLPos);
return nullptr;
}
@@ -354,7 +363,7 @@ VPIntrinsic::getVectorLengthParamPos(Intrinsic::ID IntrinsicID) {
/// scatter.
MaybeAlign VPIntrinsic::getPointerAlignment() const {
Optional<unsigned> PtrParamOpt = getMemoryPointerParamPos(getIntrinsicID());
- assert(PtrParamOpt.hasValue() && "no pointer argument!");
+ assert(PtrParamOpt && "no pointer argument!");
return getParamAlign(PtrParamOpt.getValue());
}
@@ -380,7 +389,7 @@ Optional<unsigned> VPIntrinsic::getMemoryPointerParamPos(Intrinsic::ID VPID) {
/// \return The data (payload) operand of this store or scatter.
Value *VPIntrinsic::getMemoryDataParam() const {
auto DataParamOpt = getMemoryDataParamPos(getIntrinsicID());
- if (!DataParamOpt.hasValue())
+ if (!DataParamOpt)
return nullptr;
return getArgOperand(DataParamOpt.getValue());
}
@@ -492,6 +501,20 @@ Function *VPIntrinsic::getDeclarationForParams(Module *M, Intrinsic::ID VPID,
VPFunc = Intrinsic::getDeclaration(M, VPID, OverloadTy);
break;
}
+ case Intrinsic::vp_trunc:
+ case Intrinsic::vp_sext:
+ case Intrinsic::vp_zext:
+ case Intrinsic::vp_fptoui:
+ case Intrinsic::vp_fptosi:
+ case Intrinsic::vp_uitofp:
+ case Intrinsic::vp_sitofp:
+ case Intrinsic::vp_fptrunc:
+ case Intrinsic::vp_fpext:
+ case Intrinsic::vp_ptrtoint:
+ case Intrinsic::vp_inttoptr:
+ VPFunc =
+ Intrinsic::getDeclaration(M, VPID, {ReturnType, Params[0]->getType()});
+ break;
case Intrinsic::vp_merge:
case Intrinsic::vp_select:
VPFunc = Intrinsic::getDeclaration(M, VPID, {Params[1]->getType()});
@@ -500,6 +523,10 @@ Function *VPIntrinsic::getDeclarationForParams(Module *M, Intrinsic::ID VPID,
VPFunc = Intrinsic::getDeclaration(
M, VPID, {ReturnType, Params[0]->getType()});
break;
+ case Intrinsic::experimental_vp_strided_load:
+ VPFunc = Intrinsic::getDeclaration(
+ M, VPID, {ReturnType, Params[0]->getType(), Params[1]->getType()});
+ break;
case Intrinsic::vp_gather:
VPFunc = Intrinsic::getDeclaration(
M, VPID, {ReturnType, Params[0]->getType()});
@@ -508,6 +535,11 @@ Function *VPIntrinsic::getDeclarationForParams(Module *M, Intrinsic::ID VPID,
VPFunc = Intrinsic::getDeclaration(
M, VPID, {Params[0]->getType(), Params[1]->getType()});
break;
+ case Intrinsic::experimental_vp_strided_store:
+ VPFunc = Intrinsic::getDeclaration(
+ M, VPID,
+ {Params[0]->getType(), Params[1]->getType(), Params[2]->getType()});
+ break;
case Intrinsic::vp_scatter:
VPFunc = Intrinsic::getDeclaration(
M, VPID, {Params[0]->getType(), Params[1]->getType()});
@@ -529,6 +561,67 @@ bool VPReductionIntrinsic::isVPReduction(Intrinsic::ID ID) {
return false;
}
+bool VPCastIntrinsic::isVPCast(Intrinsic::ID ID) {
+ switch (ID) {
+ default:
+ break;
+#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
+#define VP_PROPERTY_CASTOP return true;
+#define END_REGISTER_VP_INTRINSIC(VPID) break;
+#include "llvm/IR/VPIntrinsics.def"
+ }
+ return false;
+}
+
+bool VPCmpIntrinsic::isVPCmp(Intrinsic::ID ID) {
+ switch (ID) {
+ default:
+ break;
+#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
+#define VP_PROPERTY_CMP(CCPOS, ...) return true;
+#define END_REGISTER_VP_INTRINSIC(VPID) break;
+#include "llvm/IR/VPIntrinsics.def"
+ }
+ return false;
+}
+
+static ICmpInst::Predicate getIntPredicateFromMD(const Value *Op) {
+ Metadata *MD = cast<MetadataAsValue>(Op)->getMetadata();
+ if (!MD || !isa<MDString>(MD))
+ return ICmpInst::BAD_ICMP_PREDICATE;
+ return StringSwitch<ICmpInst::Predicate>(cast<MDString>(MD)->getString())
+ .Case("eq", ICmpInst::ICMP_EQ)
+ .Case("ne", ICmpInst::ICMP_NE)
+ .Case("ugt", ICmpInst::ICMP_UGT)
+ .Case("uge", ICmpInst::ICMP_UGE)
+ .Case("ult", ICmpInst::ICMP_ULT)
+ .Case("ule", ICmpInst::ICMP_ULE)
+ .Case("sgt", ICmpInst::ICMP_SGT)
+ .Case("sge", ICmpInst::ICMP_SGE)
+ .Case("slt", ICmpInst::ICMP_SLT)
+ .Case("sle", ICmpInst::ICMP_SLE)
+ .Default(ICmpInst::BAD_ICMP_PREDICATE);
+}
+
+CmpInst::Predicate VPCmpIntrinsic::getPredicate() const {
+ bool IsFP = true;
+ Optional<unsigned> CCArgIdx;
+ switch (getIntrinsicID()) {
+ default:
+ break;
+#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
+#define VP_PROPERTY_CMP(CCPOS, ISFP) \
+ CCArgIdx = CCPOS; \
+ IsFP = ISFP; \
+ break;
+#define END_REGISTER_VP_INTRINSIC(VPID) break;
+#include "llvm/IR/VPIntrinsics.def"
+ }
+ assert(CCArgIdx && "Unexpected vector-predicated comparison");
+ return IsFP ? getFPPredicateFromMD(getArgOperand(*CCArgIdx))
+ : getIntPredicateFromMD(getArgOperand(*CCArgIdx));
+}
+
unsigned VPReductionIntrinsic::getVectorParamPos() const {
return *VPReductionIntrinsic::getVectorParamPos(getIntrinsicID());
}
diff --git a/llvm/lib/IR/LLVMContext.cpp b/llvm/lib/IR/LLVMContext.cpp
index e19ead98a616..4a1d5d3dcdf6 100644
--- a/llvm/lib/IR/LLVMContext.cpp
+++ b/llvm/lib/IR/LLVMContext.cpp
@@ -82,6 +82,11 @@ LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) {
"clang.arc.attachedcall operand bundle id drifted!");
(void)ClangAttachedCall;
+ auto *PtrauthEntry = pImpl->getOrInsertBundleTag("ptrauth");
+ assert(PtrauthEntry->second == LLVMContext::OB_ptrauth &&
+ "ptrauth operand bundle id drifted!");
+ (void)PtrauthEntry;
+
SyncScope::ID SingleThreadSSID =
pImpl->getOrInsertSyncScopeID("singlethread");
assert(SingleThreadSSID == SyncScope::SingleThread &&
@@ -133,13 +138,25 @@ bool LLVMContext::getDiagnosticsHotnessRequested() const {
void LLVMContext::setDiagnosticsHotnessThreshold(Optional<uint64_t> Threshold) {
pImpl->DiagnosticsHotnessThreshold = Threshold;
}
-
+void LLVMContext::setMisExpectWarningRequested(bool Requested) {
+ pImpl->MisExpectWarningRequested = Requested;
+}
+bool LLVMContext::getMisExpectWarningRequested() const {
+ return pImpl->MisExpectWarningRequested;
+}
uint64_t LLVMContext::getDiagnosticsHotnessThreshold() const {
- return pImpl->DiagnosticsHotnessThreshold.getValueOr(UINT64_MAX);
+ return pImpl->DiagnosticsHotnessThreshold.value_or(UINT64_MAX);
+}
+void LLVMContext::setDiagnosticsMisExpectTolerance(
+ Optional<uint64_t> Tolerance) {
+ pImpl->DiagnosticsMisExpectTolerance = Tolerance;
+}
+uint64_t LLVMContext::getDiagnosticsMisExpectTolerance() const {
+ return pImpl->DiagnosticsMisExpectTolerance.value_or(0);
}
bool LLVMContext::isDiagnosticsHotnessThresholdSetFromPSI() const {
- return !pImpl->DiagnosticsHotnessThreshold.hasValue();
+ return !pImpl->DiagnosticsHotnessThreshold.has_value();
}
remarks::RemarkStreamer *LLVMContext::getMainRemarkStreamer() {
@@ -346,12 +363,18 @@ std::unique_ptr<DiagnosticHandler> LLVMContext::getDiagnosticHandler() {
return std::move(pImpl->DiagHandler);
}
-void LLVMContext::enableOpaquePointers() const {
- assert(pImpl->PointerTypes.empty() && pImpl->ASPointerTypes.empty() &&
- "Must be called before creating any pointer types");
- pImpl->setOpaquePointers(true);
+bool LLVMContext::hasSetOpaquePointersValue() const {
+ return pImpl->hasOpaquePointersValue();
+}
+
+void LLVMContext::setOpaquePointers(bool Enable) const {
+ pImpl->setOpaquePointers(Enable);
}
bool LLVMContext::supportsTypedPointers() const {
return !pImpl->getOpaquePointers();
}
+
+Any &LLVMContext::getTargetData() const {
+ return pImpl->TargetDataStorage;
+}
diff --git a/llvm/lib/IR/LLVMContextImpl.cpp b/llvm/lib/IR/LLVMContextImpl.cpp
index 8f9530290459..06b3a3afef9d 100644
--- a/llvm/lib/IR/LLVMContextImpl.cpp
+++ b/llvm/lib/IR/LLVMContextImpl.cpp
@@ -36,7 +36,7 @@ using namespace llvm;
static cl::opt<bool>
OpaquePointersCL("opaque-pointers", cl::desc("Use opaque pointers"),
- cl::init(false));
+ cl::init(true));
LLVMContextImpl::LLVMContextImpl(LLVMContext &C)
: DiagHandler(std::make_unique<DiagnosticHandler>()),
@@ -47,7 +47,11 @@ LLVMContextImpl::LLVMContextImpl(LLVMContext &C)
X86_FP80Ty(C, Type::X86_FP80TyID), FP128Ty(C, Type::FP128TyID),
PPC_FP128Ty(C, Type::PPC_FP128TyID), X86_MMXTy(C, Type::X86_MMXTyID),
X86_AMXTy(C, Type::X86_AMXTyID), Int1Ty(C, 1), Int8Ty(C, 8),
- Int16Ty(C, 16), Int32Ty(C, 32), Int64Ty(C, 64), Int128Ty(C, 128) {}
+ Int16Ty(C, 16), Int32Ty(C, 32), Int64Ty(C, 64), Int128Ty(C, 128) {
+ if (OpaquePointersCL.getNumOccurrences()) {
+ OpaquePointers = OpaquePointersCL;
+ }
+}
LLVMContextImpl::~LLVMContextImpl() {
// NOTE: We need to delete the contents of OwnedModules, but Module's dtor
@@ -245,10 +249,18 @@ void LLVMContextImpl::setOptPassGate(OptPassGate& OPG) {
this->OPG = &OPG;
}
+bool LLVMContextImpl::hasOpaquePointersValue() {
+ return OpaquePointers.has_value();
+}
+
bool LLVMContextImpl::getOpaquePointers() {
- if (LLVM_UNLIKELY(!(OpaquePointers.hasValue())))
+ if (LLVM_UNLIKELY(!OpaquePointers))
OpaquePointers = OpaquePointersCL;
return *OpaquePointers;
}
-void LLVMContextImpl::setOpaquePointers(bool OP) { OpaquePointers = OP; }
+void LLVMContextImpl::setOpaquePointers(bool OP) {
+ assert((!OpaquePointers || OpaquePointers.getValue() == OP) &&
+ "Cannot change opaque pointers mode once set");
+ OpaquePointers = OP;
+}
diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h
index 70242f4d8f20..47add940f603 100644
--- a/llvm/lib/IR/LLVMContextImpl.h
+++ b/llvm/lib/IR/LLVMContextImpl.h
@@ -17,6 +17,7 @@
#include "ConstantsContext.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/Any.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseMapInfo.h"
@@ -686,7 +687,7 @@ template <> struct MDNodeKeyImpl<DIFile> {
unsigned getHashValue() const {
return hash_combine(Filename, Directory, Checksum ? Checksum->Kind : 0,
Checksum ? Checksum->Value : nullptr,
- Source.getValueOr(nullptr));
+ Source.value_or(nullptr));
}
};
@@ -709,6 +710,7 @@ template <> struct MDNodeKeyImpl<DISubprogram> {
Metadata *RetainedNodes;
Metadata *ThrownTypes;
Metadata *Annotations;
+ MDString *TargetFuncName;
MDNodeKeyImpl(Metadata *Scope, MDString *Name, MDString *LinkageName,
Metadata *File, unsigned Line, Metadata *Type,
@@ -716,14 +718,15 @@ template <> struct MDNodeKeyImpl<DISubprogram> {
unsigned VirtualIndex, int ThisAdjustment, unsigned Flags,
unsigned SPFlags, Metadata *Unit, Metadata *TemplateParams,
Metadata *Declaration, Metadata *RetainedNodes,
- Metadata *ThrownTypes, Metadata *Annotations)
+ Metadata *ThrownTypes, Metadata *Annotations,
+ MDString *TargetFuncName)
: Scope(Scope), Name(Name), LinkageName(LinkageName), File(File),
Line(Line), Type(Type), ScopeLine(ScopeLine),
ContainingType(ContainingType), VirtualIndex(VirtualIndex),
ThisAdjustment(ThisAdjustment), Flags(Flags), SPFlags(SPFlags),
Unit(Unit), TemplateParams(TemplateParams), Declaration(Declaration),
RetainedNodes(RetainedNodes), ThrownTypes(ThrownTypes),
- Annotations(Annotations) {}
+ Annotations(Annotations), TargetFuncName(TargetFuncName) {}
MDNodeKeyImpl(const DISubprogram *N)
: Scope(N->getRawScope()), Name(N->getRawName()),
LinkageName(N->getRawLinkageName()), File(N->getRawFile()),
@@ -736,7 +739,8 @@ template <> struct MDNodeKeyImpl<DISubprogram> {
Declaration(N->getRawDeclaration()),
RetainedNodes(N->getRawRetainedNodes()),
ThrownTypes(N->getRawThrownTypes()),
- Annotations(N->getRawAnnotations()) {}
+ Annotations(N->getRawAnnotations()),
+ TargetFuncName(N->getRawTargetFuncName()) {}
bool isKeyOf(const DISubprogram *RHS) const {
return Scope == RHS->getRawScope() && Name == RHS->getRawName() &&
@@ -752,7 +756,8 @@ template <> struct MDNodeKeyImpl<DISubprogram> {
Declaration == RHS->getRawDeclaration() &&
RetainedNodes == RHS->getRawRetainedNodes() &&
ThrownTypes == RHS->getRawThrownTypes() &&
- Annotations == RHS->getRawAnnotations();
+ Annotations == RHS->getRawAnnotations() &&
+ TargetFuncName == RHS->getRawTargetFuncName();
}
bool isDefinition() const { return SPFlags & DISubprogram::SPFlagDefinition; }
@@ -1380,12 +1385,19 @@ public:
/// If threshold option is not specified, it is disabled (0) by default.
Optional<uint64_t> DiagnosticsHotnessThreshold = 0;
+ /// The percentage of difference between profiling branch weights and
+ // llvm.expect branch weights to tolerate when emiting MisExpect diagnostics
+ Optional<uint64_t> DiagnosticsMisExpectTolerance = 0;
+ bool MisExpectWarningRequested = false;
+
/// The specialized remark streamer used by LLVM's OptimizationRemarkEmitter.
std::unique_ptr<LLVMRemarkStreamer> LLVMRS;
LLVMContext::YieldCallbackTy YieldCallback = nullptr;
void *YieldOpaqueHandle = nullptr;
+ DenseMap<const Value *, ValueName *> ValueNames;
+
using IntMapTy =
DenseMap<APInt, std::unique_ptr<ConstantInt>, DenseMapAPIntKeyInfo>;
IntMapTy IntConstants;
@@ -1402,8 +1414,6 @@ public:
DenseMap<Value *, ValueAsMetadata *> ValuesAsMetadata;
DenseMap<Metadata *, MetadataAsValue *> MetadataAsValues;
- DenseMap<const Value *, ValueName *> ValueNames;
-
#define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS) \
DenseSet<CLASS *, CLASS##Info> CLASS##s;
#include "llvm/IR/Metadata.def"
@@ -1450,14 +1460,14 @@ public:
ConstantInt *TheTrueVal = nullptr;
ConstantInt *TheFalseVal = nullptr;
- std::unique_ptr<ConstantTokenNone> TheNoneToken;
-
// Basic type instances.
Type VoidTy, LabelTy, HalfTy, BFloatTy, FloatTy, DoubleTy, MetadataTy,
TokenTy;
Type X86_FP80Ty, FP128Ty, PPC_FP128Ty, X86_MMXTy, X86_AMXTy;
IntegerType Int1Ty, Int8Ty, Int16Ty, Int32Ty, Int64Ty, Int128Ty;
+ std::unique_ptr<ConstantTokenNone> TheNoneToken;
+
BumpPtrAllocator Alloc;
UniqueStringSaver Saver{Alloc};
@@ -1493,6 +1503,9 @@ public:
/// Collection of per-GlobalValue partitions used in this context.
DenseMap<const GlobalValue *, StringRef> GlobalValuePartitions;
+ DenseMap<const GlobalValue *, GlobalValue::SanitizerMetadata>
+ GlobalValueSanitizerMetadata;
+
/// DiscriminatorTable - This table maps file:line locations to an
/// integer representing the next DWARF path discriminator to assign to
/// instructions in different blocks at the same location.
@@ -1555,8 +1568,11 @@ public:
// TODO: clean up the following after we no longer support non-opaque pointer
// types.
bool getOpaquePointers();
+ bool hasOpaquePointersValue();
void setOpaquePointers(bool OP);
+ llvm::Any TargetDataStorage;
+
private:
Optional<bool> OpaquePointers;
};
diff --git a/llvm/lib/IR/LegacyPassManager.cpp b/llvm/lib/IR/LegacyPassManager.cpp
index 08cf909a83f9..ef3465177647 100644
--- a/llvm/lib/IR/LegacyPassManager.cpp
+++ b/llvm/lib/IR/LegacyPassManager.cpp
@@ -29,10 +29,6 @@
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
-#ifdef EXPENSIVE_CHECKS
-#include "llvm/IR/StructuralHash.h"
-#endif
-
using namespace llvm;
// See PassManagers.h for Pass Manager infrastructure overview.
@@ -1429,12 +1425,12 @@ bool FPPassManager::runOnFunction(Function &F) {
PassManagerPrettyStackEntry X(FP, F);
TimeRegion PassTimer(getPassTimer(FP));
#ifdef EXPENSIVE_CHECKS
- uint64_t RefHash = StructuralHash(F);
+ uint64_t RefHash = FP->structuralHash(F);
#endif
LocalChanged |= FP->runOnFunction(F);
#if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
- if (!LocalChanged && (RefHash != StructuralHash(F))) {
+ if (!LocalChanged && (RefHash != FP->structuralHash(F))) {
llvm::errs() << "Pass modifies its input and doesn't report it: "
<< FP->getPassName() << "\n";
llvm_unreachable("Pass modifies its input and doesn't report it");
@@ -1543,13 +1539,13 @@ MPPassManager::runOnModule(Module &M) {
TimeRegion PassTimer(getPassTimer(MP));
#ifdef EXPENSIVE_CHECKS
- uint64_t RefHash = StructuralHash(M);
+ uint64_t RefHash = MP->structuralHash(M);
#endif
LocalChanged |= MP->runOnModule(M);
#ifdef EXPENSIVE_CHECKS
- assert((LocalChanged || (RefHash == StructuralHash(M))) &&
+ assert((LocalChanged || (RefHash == MP->structuralHash(M))) &&
"Pass modifies its input and doesn't report it.");
#endif
@@ -1767,4 +1763,4 @@ void FunctionPass::assignPassManager(PMStack &PMS,
PM->add(this);
}
-legacy::PassManagerBase::~PassManagerBase() {}
+legacy::PassManagerBase::~PassManagerBase() = default;
diff --git a/llvm/lib/IR/MDBuilder.cpp b/llvm/lib/IR/MDBuilder.cpp
index 35af8490287b..fc59fda9fe22 100644
--- a/llvm/lib/IR/MDBuilder.cpp
+++ b/llvm/lib/IR/MDBuilder.cpp
@@ -150,6 +150,14 @@ MDNode *MDBuilder::mergeCallbackEncodings(MDNode *ExistingCallbacks,
return MDNode::get(Context, Ops);
}
+MDNode *MDBuilder::createRTTIPointerPrologue(Constant *PrologueSig,
+ Constant *RTTI) {
+ SmallVector<Metadata *, 4> Ops;
+ Ops.push_back(createConstant(PrologueSig));
+ Ops.push_back(createConstant(RTTI));
+ return MDNode::get(Context, Ops);
+}
+
MDNode *MDBuilder::createAnonymousAARoot(StringRef Name, MDNode *Extra) {
SmallVector<Metadata *, 3> Args(1, nullptr);
if (Extra)
diff --git a/llvm/lib/IR/Mangler.cpp b/llvm/lib/IR/Mangler.cpp
index 2399ea27ee9d..b8e3e40e4c1d 100644
--- a/llvm/lib/IR/Mangler.cpp
+++ b/llvm/lib/IR/Mangler.cpp
@@ -144,7 +144,7 @@ void Mangler::getNameWithPrefix(raw_ostream &OS, const GlobalValue *GV,
// Mangle functions with Microsoft calling conventions specially. Only do
// this mangling for x86_64 vectorcall and 32-bit x86.
- const Function *MSFunc = dyn_cast<Function>(GV);
+ const Function *MSFunc = dyn_cast_or_null<Function>(GV->getAliaseeObject());
// Don't add byte count suffixes when '\01' or '?' are in the first
// character.
diff --git a/llvm/lib/IR/Metadata.cpp b/llvm/lib/IR/Metadata.cpp
index 226718ecac28..ae2401026ebf 100644
--- a/llvm/lib/IR/Metadata.cpp
+++ b/llvm/lib/IR/Metadata.cpp
@@ -245,6 +245,36 @@ void ReplaceableMetadataImpl::moveRef(void *Ref, void *New,
"Reference without owner must be direct");
}
+void ReplaceableMetadataImpl::SalvageDebugInfo(const Constant &C) {
+ if (!C.isUsedByMetadata()) {
+ return;
+ }
+
+ LLVMContext &Context = C.getType()->getContext();
+ auto &Store = Context.pImpl->ValuesAsMetadata;
+ auto I = Store.find(&C);
+ ValueAsMetadata *MD = I->second;
+ using UseTy =
+ std::pair<void *, std::pair<MetadataTracking::OwnerTy, uint64_t>>;
+ // Copy out uses and update value of Constant used by debug info metadata with undef below
+ SmallVector<UseTy, 8> Uses(MD->UseMap.begin(), MD->UseMap.end());
+
+ for (const auto &Pair : Uses) {
+ MetadataTracking::OwnerTy Owner = Pair.second.first;
+ if (!Owner)
+ continue;
+ if (!Owner.is<Metadata *>())
+ continue;
+ auto *OwnerMD = dyn_cast<MDNode>(Owner.get<Metadata *>());
+ if (!OwnerMD)
+ continue;
+ if (isa<DINode>(OwnerMD)) {
+ OwnerMD->handleChangedOperand(
+ Pair.first, ValueAsMetadata::get(UndefValue::get(C.getType())));
+ }
+ }
+}
+
void ReplaceableMetadataImpl::replaceAllUsesWith(Metadata *MD) {
if (UseMap.empty())
return;
@@ -252,9 +282,7 @@ void ReplaceableMetadataImpl::replaceAllUsesWith(Metadata *MD) {
// Copy out uses since UseMap will get touched below.
using UseTy = std::pair<void *, std::pair<OwnerTy, uint64_t>>;
SmallVector<UseTy, 8> Uses(UseMap.begin(), UseMap.end());
- llvm::sort(Uses, [](const UseTy &L, const UseTy &R) {
- return L.second.second < R.second.second;
- });
+ llvm::sort(Uses, llvm::less_second());
for (const auto &Pair : Uses) {
// Check that this Ref hasn't disappeared after RAUW (when updating a
// previous Ref).
@@ -493,35 +521,26 @@ StringRef MDString::getString() const {
"Alignment is insufficient after objects prepended to " #CLASS);
#include "llvm/IR/Metadata.def"
-void *MDNode::operator new(size_t Size, unsigned NumOps) {
- size_t OpSize = NumOps * sizeof(MDOperand);
+void *MDNode::operator new(size_t Size, size_t NumOps, StorageType Storage) {
// uint64_t is the most aligned type we need support (ensured by static_assert
// above)
- OpSize = alignTo(OpSize, alignof(uint64_t));
- void *Ptr = reinterpret_cast<char *>(::operator new(OpSize + Size)) + OpSize;
- MDOperand *O = static_cast<MDOperand *>(Ptr);
- for (MDOperand *E = O - NumOps; O != E; --O)
- (void)new (O - 1) MDOperand;
- return Ptr;
+ size_t AllocSize =
+ alignTo(Header::getAllocSize(Storage, NumOps), alignof(uint64_t));
+ char *Mem = reinterpret_cast<char *>(::operator new(AllocSize + Size));
+ Header *H = new (Mem + AllocSize - sizeof(Header)) Header(NumOps, Storage);
+ return reinterpret_cast<void *>(H + 1);
}
-// Repress memory sanitization, due to use-after-destroy by operator
-// delete. Bug report 24578 identifies this issue.
-LLVM_NO_SANITIZE_MEMORY_ATTRIBUTE void MDNode::operator delete(void *Mem) {
- MDNode *N = static_cast<MDNode *>(Mem);
- size_t OpSize = N->NumOperands * sizeof(MDOperand);
- OpSize = alignTo(OpSize, alignof(uint64_t));
-
- MDOperand *O = static_cast<MDOperand *>(Mem);
- for (MDOperand *E = O - N->NumOperands; O != E; --O)
- (O - 1)->~MDOperand();
- ::operator delete(reinterpret_cast<char *>(Mem) - OpSize);
+void MDNode::operator delete(void *N) {
+ Header *H = reinterpret_cast<Header *>(N) - 1;
+ void *Mem = H->getAllocation();
+ H->~Header();
+ ::operator delete(Mem);
}
MDNode::MDNode(LLVMContext &Context, unsigned ID, StorageType Storage,
ArrayRef<Metadata *> Ops1, ArrayRef<Metadata *> Ops2)
- : Metadata(ID, Storage), NumOperands(Ops1.size() + Ops2.size()),
- NumUnresolved(0), Context(Context) {
+ : Metadata(ID, Storage), Context(Context) {
unsigned Op = 0;
for (Metadata *MD : Ops1)
setOperand(Op++, MD);
@@ -547,6 +566,87 @@ TempMDNode MDNode::clone() const {
}
}
+MDNode::Header::Header(size_t NumOps, StorageType Storage) {
+ IsLarge = isLarge(NumOps);
+ IsResizable = isResizable(Storage);
+ SmallSize = getSmallSize(NumOps, IsResizable, IsLarge);
+ if (IsLarge) {
+ SmallNumOps = 0;
+ new (getLargePtr()) LargeStorageVector();
+ getLarge().resize(NumOps);
+ return;
+ }
+ SmallNumOps = NumOps;
+ MDOperand *O = reinterpret_cast<MDOperand *>(this) - SmallSize;
+ for (MDOperand *E = O + SmallSize; O != E;)
+ (void)new (O++) MDOperand();
+}
+
+MDNode::Header::~Header() {
+ if (IsLarge) {
+ getLarge().~LargeStorageVector();
+ return;
+ }
+ MDOperand *O = reinterpret_cast<MDOperand *>(this);
+ for (MDOperand *E = O - SmallSize; O != E; --O)
+ (void)(O - 1)->~MDOperand();
+}
+
+void *MDNode::Header::getLargePtr() const {
+ static_assert(alignof(LargeStorageVector) <= alignof(Header),
+ "LargeStorageVector too strongly aligned");
+ return reinterpret_cast<char *>(const_cast<Header *>(this)) -
+ sizeof(LargeStorageVector);
+}
+
+void *MDNode::Header::getSmallPtr() {
+ static_assert(alignof(MDOperand) <= alignof(Header),
+ "MDOperand too strongly aligned");
+ return reinterpret_cast<char *>(const_cast<Header *>(this)) -
+ sizeof(MDOperand) * SmallSize;
+}
+
+void MDNode::Header::resize(size_t NumOps) {
+ assert(IsResizable && "Node is not resizable");
+ if (operands().size() == NumOps)
+ return;
+
+ if (IsLarge)
+ getLarge().resize(NumOps);
+ else if (NumOps <= SmallSize)
+ resizeSmall(NumOps);
+ else
+ resizeSmallToLarge(NumOps);
+}
+
+void MDNode::Header::resizeSmall(size_t NumOps) {
+ assert(!IsLarge && "Expected a small MDNode");
+ assert(NumOps <= SmallSize && "NumOps too large for small resize");
+
+ MutableArrayRef<MDOperand> ExistingOps = operands();
+ assert(NumOps != ExistingOps.size() && "Expected a different size");
+
+ int NumNew = (int)NumOps - (int)ExistingOps.size();
+ MDOperand *O = ExistingOps.end();
+ for (int I = 0, E = NumNew; I < E; ++I)
+ (O++)->reset();
+ for (int I = 0, E = NumNew; I > E; --I)
+ (--O)->reset();
+ SmallNumOps = NumOps;
+ assert(O == operands().end() && "Operands not (un)initialized until the end");
+}
+
+void MDNode::Header::resizeSmallToLarge(size_t NumOps) {
+ assert(!IsLarge && "Expected a small MDNode");
+ assert(NumOps > SmallSize && "Expected NumOps to be larger than allocation");
+ LargeStorageVector NewOps;
+ NewOps.resize(NumOps);
+ llvm::move(operands(), NewOps.begin());
+ resizeSmall(0);
+ new (getLargePtr()) LargeStorageVector(std::move(NewOps));
+ IsLarge = true;
+}
+
static bool isOperandUnresolved(Metadata *Op) {
if (auto *N = dyn_cast_or_null<MDNode>(Op))
return !N->isResolved();
@@ -554,9 +654,9 @@ static bool isOperandUnresolved(Metadata *Op) {
}
void MDNode::countUnresolvedOperands() {
- assert(NumUnresolved == 0 && "Expected unresolved ops to be uncounted");
+ assert(getNumUnresolved() == 0 && "Expected unresolved ops to be uncounted");
assert(isUniqued() && "Expected this to be uniqued");
- NumUnresolved = count_if(operands(), isOperandUnresolved);
+ setNumUnresolved(count_if(operands(), isOperandUnresolved));
}
void MDNode::makeUniqued() {
@@ -570,7 +670,7 @@ void MDNode::makeUniqued() {
// Make this 'uniqued'.
Storage = Uniqued;
countUnresolvedOperands();
- if (!NumUnresolved) {
+ if (!getNumUnresolved()) {
dropReplaceableUses();
assert(isResolved() && "Expected this to be resolved");
}
@@ -594,14 +694,14 @@ void MDNode::resolve() {
assert(isUniqued() && "Expected this to be uniqued");
assert(!isResolved() && "Expected this to be unresolved");
- NumUnresolved = 0;
+ setNumUnresolved(0);
dropReplaceableUses();
assert(isResolved() && "Expected this to be resolved");
}
void MDNode::dropReplaceableUses() {
- assert(!NumUnresolved && "Unexpected unresolved operand");
+ assert(!getNumUnresolved() && "Unexpected unresolved operand");
// Drop any RAUW support.
if (Context.hasReplaceableUses())
@@ -610,13 +710,13 @@ void MDNode::dropReplaceableUses() {
void MDNode::resolveAfterOperandChange(Metadata *Old, Metadata *New) {
assert(isUniqued() && "Expected this to be uniqued");
- assert(NumUnresolved != 0 && "Expected unresolved operands");
+ assert(getNumUnresolved() != 0 && "Expected unresolved operands");
// Check if an operand was resolved.
if (!isOperandUnresolved(Old)) {
if (isOperandUnresolved(New))
// An operand was un-resolved!
- ++NumUnresolved;
+ setNumUnresolved(getNumUnresolved() + 1);
} else if (!isOperandUnresolved(New))
decrementUnresolvedOperandCount();
}
@@ -627,7 +727,8 @@ void MDNode::decrementUnresolvedOperandCount() {
return;
assert(isUniqued() && "Expected this to be uniqued");
- if (--NumUnresolved)
+ setNumUnresolved(getNumUnresolved() - 1);
+ if (getNumUnresolved())
return;
// Last unresolved operand has just been resolved.
@@ -702,7 +803,7 @@ void MDTuple::recalculateHash() {
}
void MDNode::dropAllReferences() {
- for (unsigned I = 0, E = NumOperands; I != E; ++I)
+ for (unsigned I = 0, E = getNumOperands(); I != E; ++I)
setOperand(I, nullptr);
if (Context.hasReplaceableUses()) {
Context.getReplaceableUses()->resolveAllUses(/* ResolveUsers */ false);
@@ -838,7 +939,8 @@ MDTuple *MDTuple::getImpl(LLVMContext &Context, ArrayRef<Metadata *> MDs,
assert(ShouldCreate && "Expected non-uniqued nodes to always be created");
}
- return storeImpl(new (MDs.size()) MDTuple(Context, Storage, Hash, MDs),
+ return storeImpl(new (MDs.size(), Storage)
+ MDTuple(Context, Storage, Hash, MDs),
Storage, Context.pImpl->MDTuples);
}
@@ -850,7 +952,7 @@ void MDNode::deleteTemporary(MDNode *N) {
void MDNode::storeDistinctInContext() {
assert(!Context.hasReplaceableUses() && "Unexpected replaceable uses");
- assert(!NumUnresolved && "Unexpected unresolved nodes");
+ assert(!getNumUnresolved() && "Unexpected unresolved nodes");
Storage = Distinct;
assert(isResolved() && "Expected this to be resolved");
@@ -883,7 +985,7 @@ void MDNode::replaceOperandWith(unsigned I, Metadata *New) {
}
void MDNode::setOperand(unsigned I, Metadata *New) {
- assert(I < NumOperands);
+ assert(I < getNumOperands());
mutable_begin()[I].reset(New, isUniqued() ? this : nullptr);
}
diff --git a/llvm/lib/IR/Module.cpp b/llvm/lib/IR/Module.cpp
index 4974b372db2a..5cd74d53da75 100644
--- a/llvm/lib/IR/Module.cpp
+++ b/llvm/lib/IR/Module.cpp
@@ -71,8 +71,7 @@ template class llvm::SymbolTableListTraits<GlobalIFunc>;
Module::Module(StringRef MID, LLVMContext &C)
: Context(C), ValSymTab(std::make_unique<ValueSymbolTable>(-1)),
- Materializer(), ModuleID(std::string(MID)),
- SourceFileName(std::string(MID)), DL("") {
+ ModuleID(std::string(MID)), SourceFileName(std::string(MID)), DL("") {
Context.addModule(this);
}
@@ -671,12 +670,15 @@ void Module::setRtLibUseGOT() {
addModuleFlag(ModFlagBehavior::Max, "RtLibUseGOT", 1);
}
-bool Module::getUwtable() const {
- auto *Val = cast_or_null<ConstantAsMetadata>(getModuleFlag("uwtable"));
- return Val && (cast<ConstantInt>(Val->getValue())->getZExtValue() > 0);
+UWTableKind Module::getUwtable() const {
+ if (auto *Val = cast_or_null<ConstantAsMetadata>(getModuleFlag("uwtable")))
+ return UWTableKind(cast<ConstantInt>(Val->getValue())->getZExtValue());
+ return UWTableKind::None;
}
-void Module::setUwtable() { addModuleFlag(ModFlagBehavior::Max, "uwtable", 1); }
+void Module::setUwtable(UWTableKind Kind) {
+ addModuleFlag(ModFlagBehavior::Max, "uwtable", uint32_t(Kind));
+}
FramePointerKind Module::getFramePointer() const {
auto *Val = cast_or_null<ConstantAsMetadata>(getModuleFlag("frame-pointer"));
@@ -734,7 +736,7 @@ void Module::setOverrideStackAlignment(unsigned Align) {
addModuleFlag(ModFlagBehavior::Error, "override-stack-alignment", Align);
}
-void Module::setSDKVersion(const VersionTuple &V) {
+static void addSDKVersionMD(const VersionTuple &V, Module &M, StringRef Name) {
SmallVector<unsigned, 3> Entries;
Entries.push_back(V.getMajor());
if (auto Minor = V.getMinor()) {
@@ -744,8 +746,12 @@ void Module::setSDKVersion(const VersionTuple &V) {
// Ignore the 'build' component as it can't be represented in the object
// file.
}
- addModuleFlag(ModFlagBehavior::Warning, "SDK Version",
- ConstantDataArray::get(Context, Entries));
+ M.addModuleFlag(Module::ModFlagBehavior::Warning, Name,
+ ConstantDataArray::get(M.getContext(), Entries));
+}
+
+void Module::setSDKVersion(const VersionTuple &V) {
+ addSDKVersionMD(V, *this, "SDK Version");
}
static VersionTuple getSDKVersionMD(Metadata *MD) {
@@ -818,6 +824,15 @@ StringRef Module::getDarwinTargetVariantTriple() const {
return "";
}
+void Module::setDarwinTargetVariantTriple(StringRef T) {
+ addModuleFlag(ModFlagBehavior::Override, "darwin.target_variant.triple",
+ MDString::get(getContext(), T));
+}
+
VersionTuple Module::getDarwinTargetVariantSDKVersion() const {
return getSDKVersionMD(getModuleFlag("darwin.target_variant.SDK Version"));
}
+
+void Module::setDarwinTargetVariantSDKVersion(VersionTuple Version) {
+ addSDKVersionMD(Version, *this, "darwin.target_variant.SDK Version");
+}
diff --git a/llvm/lib/IR/Pass.cpp b/llvm/lib/IR/Pass.cpp
index 755ea57c63fd..fe0bfd81a81e 100644
--- a/llvm/lib/IR/Pass.cpp
+++ b/llvm/lib/IR/Pass.cpp
@@ -27,6 +27,10 @@
#include "llvm/Support/raw_ostream.h"
#include <cassert>
+#ifdef EXPENSIVE_CHECKS
+#include "llvm/IR/StructuralHash.h"
+#endif
+
using namespace llvm;
#define DEBUG_TYPE "ir"
@@ -133,6 +137,12 @@ LLVM_DUMP_METHOD void Pass::dump() const {
}
#endif
+#ifdef EXPENSIVE_CHECKS
+uint64_t Pass::structuralHash(Module &M) const { return StructuralHash(M); }
+
+uint64_t Pass::structuralHash(Function &F) const { return StructuralHash(F); }
+#endif
+
//===----------------------------------------------------------------------===//
// ImmutablePass Implementation
//
diff --git a/llvm/lib/IR/ReplaceConstant.cpp b/llvm/lib/IR/ReplaceConstant.cpp
index d2f676192e7f..069da26e63b1 100644
--- a/llvm/lib/IR/ReplaceConstant.cpp
+++ b/llvm/lib/IR/ReplaceConstant.cpp
@@ -13,6 +13,7 @@
#include "llvm/IR/ReplaceConstant.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/ValueMap.h"
diff --git a/llvm/lib/IR/SafepointIRVerifier.cpp b/llvm/lib/IR/SafepointIRVerifier.cpp
index d8634e0ac7dd..5d3fa28f7d0a 100644
--- a/llvm/lib/IR/SafepointIRVerifier.cpp
+++ b/llvm/lib/IR/SafepointIRVerifier.cpp
@@ -357,6 +357,17 @@ static enum BaseType getBaseType(const Value *Val) {
Worklist.push_back(SI->getFalseValue());
continue;
}
+ if (const auto *GCRelocate = dyn_cast<GCRelocateInst>(V)) {
+ // GCRelocates do not change null-ness or constant-ness of the value.
+ // So we can continue with derived pointer this instruction relocates.
+ Worklist.push_back(GCRelocate->getDerivedPtr());
+ continue;
+ }
+ if (const auto *FI = dyn_cast<FreezeInst>(V)) {
+ // Freeze does not change null-ness or constant-ness of the value.
+ Worklist.push_back(FI->getOperand(0));
+ continue;
+ }
if (isa<Constant>(V)) {
// We found at least one base pointer which is non-null, so this derived
// pointer is not exclusively derived from null.
diff --git a/llvm/lib/IR/Use.cpp b/llvm/lib/IR/Use.cpp
index 601a9df5279e..99a89386d75f 100644
--- a/llvm/lib/IR/Use.cpp
+++ b/llvm/lib/IR/Use.cpp
@@ -11,10 +11,6 @@
namespace llvm {
-class User;
-template <typename> struct simplify_type;
-class Value;
-
void Use::swap(Use &RHS) {
if (Val == RHS.Val)
return;
diff --git a/llvm/lib/IR/User.cpp b/llvm/lib/IR/User.cpp
index 68489075cd88..637af7aaa245 100644
--- a/llvm/lib/IR/User.cpp
+++ b/llvm/lib/IR/User.cpp
@@ -18,8 +18,9 @@ class BasicBlock;
// User Class
//===----------------------------------------------------------------------===//
-void User::replaceUsesOfWith(Value *From, Value *To) {
- if (From == To) return; // Duh what?
+bool User::replaceUsesOfWith(Value *From, Value *To) {
+ bool Changed = false;
+ if (From == To) return Changed; // Duh what?
assert((!isa<Constant>(this) || isa<GlobalValue>(this)) &&
"Cannot call User::replaceUsesOfWith on a constant!");
@@ -30,11 +31,16 @@ void User::replaceUsesOfWith(Value *From, Value *To) {
// "To", adding "this" to the uses list of To, and
// most importantly, removing "this" from the use list of "From".
setOperand(i, To);
+ Changed = true;
}
if (auto DVI = dyn_cast_or_null<DbgVariableIntrinsic>(this)) {
- if (is_contained(DVI->location_ops(), From))
+ if (is_contained(DVI->location_ops(), From)) {
DVI->replaceVariableLocationOp(From, To);
+ Changed = true;
+ }
}
+
+ return Changed;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp
index 18aef37e2023..3990536f3da5 100644
--- a/llvm/lib/IR/Value.cpp
+++ b/llvm/lib/IR/Value.cpp
@@ -28,7 +28,6 @@
#include "llvm/IR/ValueHandle.h"
#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -377,6 +376,7 @@ void Value::setName(const Twine &NewName) {
}
void Value::takeName(Value *V) {
+ assert(V != this && "Illegal call to this->takeName(this)!");
ValueSymbolTable *ST = nullptr;
// If this value has a name, drop it.
if (hasName()) {
@@ -408,7 +408,7 @@ void Value::takeName(Value *V) {
}
}
- // Get V's ST, this should always succed, because V has a name.
+ // Get V's ST, this should always succeed, because V has a name.
ValueSymbolTable *VST;
bool Failure = getSymTab(V, VST);
assert(!Failure && "V has a name, so it should have a ST!"); (void)Failure;
@@ -963,6 +963,9 @@ Align Value::getPointerAlignment(const DataLayout &DL) const {
return Align(CI->getLimitedValue());
}
} else if (auto *CstPtr = dyn_cast<Constant>(this)) {
+ // Strip pointer casts to avoid creating unnecessary ptrtoint expression
+ // if the only "reduction" is combining a bitcast + ptrtoint.
+ CstPtr = CstPtr->stripPointerCasts();
if (auto *CstInt = dyn_cast_or_null<ConstantInt>(ConstantExpr::getPtrToInt(
const_cast<Constant *>(CstPtr), DL.getIntPtrType(getType()),
/*OnlyIfReduced=*/true))) {
@@ -1017,20 +1020,16 @@ bool Value::isSwiftError() const {
}
bool Value::isTransitiveUsedByMetadataOnly() const {
- if (use_empty())
- return false;
- llvm::SmallVector<const User *, 32> WorkList;
- llvm::SmallPtrSet<const User *, 32> Visited;
- WorkList.insert(WorkList.begin(), user_begin(), user_end());
+ SmallVector<const User *, 32> WorkList(user_begin(), user_end());
+ SmallPtrSet<const User *, 32> Visited(user_begin(), user_end());
while (!WorkList.empty()) {
const User *U = WorkList.pop_back_val();
- Visited.insert(U);
// If it is transitively used by a global value or a non-constant value,
// it's obviously not only used by metadata.
if (!isa<Constant>(U) || isa<GlobalValue>(U))
return false;
for (const User *UU : U->users())
- if (!Visited.count(UU))
+ if (Visited.insert(UU).second)
WorkList.push_back(UU);
}
return true;
diff --git a/llvm/lib/IR/VectorBuilder.cpp b/llvm/lib/IR/VectorBuilder.cpp
new file mode 100644
index 000000000000..e7be7a98a593
--- /dev/null
+++ b/llvm/lib/IR/VectorBuilder.cpp
@@ -0,0 +1,103 @@
+//===- VectorBuilder.cpp - Builder for VP Intrinsics ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the VectorBuilder class, which is used as a convenient
+// way to create VP intrinsics as if they were LLVM instructions with a
+// consistent and simplified interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include <llvm/ADT/SmallVector.h>
+#include <llvm/IR/FPEnv.h>
+#include <llvm/IR/Instructions.h>
+#include <llvm/IR/IntrinsicInst.h>
+#include <llvm/IR/Intrinsics.h>
+#include <llvm/IR/VectorBuilder.h>
+
+namespace llvm {
+
+void VectorBuilder::handleError(const char *ErrorMsg) const {
+ if (ErrorHandling == Behavior::SilentlyReturnNone)
+ return;
+ report_fatal_error(ErrorMsg);
+}
+
+Module &VectorBuilder::getModule() const {
+ return *Builder.GetInsertBlock()->getModule();
+}
+
+Value *VectorBuilder::getAllTrueMask() {
+ auto *BoolTy = Builder.getInt1Ty();
+ auto *MaskTy = VectorType::get(BoolTy, StaticVectorLength);
+ return ConstantInt::getAllOnesValue(MaskTy);
+}
+
+Value &VectorBuilder::requestMask() {
+ if (Mask)
+ return *Mask;
+
+ return *getAllTrueMask();
+}
+
+Value &VectorBuilder::requestEVL() {
+ if (ExplicitVectorLength)
+ return *ExplicitVectorLength;
+
+ assert(!StaticVectorLength.isScalable() && "TODO vscale lowering");
+ auto *IntTy = Builder.getInt32Ty();
+ return *ConstantInt::get(IntTy, StaticVectorLength.getFixedValue());
+}
+
+Value *VectorBuilder::createVectorInstruction(unsigned Opcode, Type *ReturnTy,
+ ArrayRef<Value *> InstOpArray,
+ const Twine &Name) {
+ auto VPID = VPIntrinsic::getForOpcode(Opcode);
+ if (VPID == Intrinsic::not_intrinsic)
+ return returnWithError<Value *>("No VPIntrinsic for this opcode");
+
+ auto MaskPosOpt = VPIntrinsic::getMaskParamPos(VPID);
+ auto VLenPosOpt = VPIntrinsic::getVectorLengthParamPos(VPID);
+ size_t NumInstParams = InstOpArray.size();
+ size_t NumVPParams =
+ NumInstParams + MaskPosOpt.has_value() + VLenPosOpt.has_value();
+
+ SmallVector<Value *, 6> IntrinParams;
+
+ // Whether the mask and vlen parameter are at the end of the parameter list.
+ bool TrailingMaskAndVLen =
+ std::min<size_t>(MaskPosOpt.value_or(NumInstParams),
+ VLenPosOpt.value_or(NumInstParams)) >= NumInstParams;
+
+ if (TrailingMaskAndVLen) {
+ // Fast path for trailing mask, vector length.
+ IntrinParams.append(InstOpArray.begin(), InstOpArray.end());
+ IntrinParams.resize(NumVPParams);
+ } else {
+ IntrinParams.resize(NumVPParams);
+ // Insert mask and evl operands in between the instruction operands.
+ for (size_t VPParamIdx = 0, ParamIdx = 0; VPParamIdx < NumVPParams;
+ ++VPParamIdx) {
+ if ((MaskPosOpt && MaskPosOpt.value_or(NumVPParams) == VPParamIdx) ||
+ (VLenPosOpt && VLenPosOpt.value_or(NumVPParams) == VPParamIdx))
+ continue;
+ assert(ParamIdx < NumInstParams);
+ IntrinParams[VPParamIdx] = InstOpArray[ParamIdx++];
+ }
+ }
+
+ if (MaskPosOpt)
+ IntrinParams[*MaskPosOpt] = &requestMask();
+ if (VLenPosOpt)
+ IntrinParams[*VLenPosOpt] = &requestEVL();
+
+ auto *VPDecl = VPIntrinsic::getDeclarationForParams(&getModule(), VPID,
+ ReturnTy, IntrinParams);
+ return Builder.CreateCall(VPDecl, IntrinParams, Name);
+}
+
+} // namespace llvm
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 989d01e2e395..75d02f4c8c82 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -84,6 +84,8 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsAArch64.h"
+#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
@@ -100,7 +102,6 @@
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
@@ -278,6 +279,12 @@ namespace {
class Verifier : public InstVisitor<Verifier>, VerifierSupport {
friend class InstVisitor<Verifier>;
+ // ISD::ArgFlagsTy::MemAlign only have 4 bits for alignment, so
+ // the alignment size should not exceed 2^15. Since encode(Align)
+ // would plus the shift value by 1, the alignment size should
+ // not exceed 2^14, otherwise it can NOT be properly lowered
+ // in backend.
+ static constexpr unsigned ParamMaxAlignment = 1 << 14;
DominatorTree DT;
/// When verifying a basic block, keep track of all of the
@@ -465,6 +472,7 @@ private:
void visitAnnotationMetadata(MDNode *Annotation);
void visitAliasScopeMetadata(const MDNode *MD);
void visitAliasScopeListMetadata(const MDNode *MD);
+ void visitAccessGroupMetadata(const MDNode *MD);
template <class Ty> bool isValidMetadataArray(const MDTuple &N);
#define HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS) void visit##CLASS(const CLASS &N);
@@ -521,6 +529,7 @@ private:
void visitUserOp2(Instruction &I) { visitUserOp1(I); }
void visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call);
void visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI);
+ void visitVPIntrinsic(VPIntrinsic &VPI);
void visitDbgIntrinsic(StringRef Kind, DbgVariableIntrinsic &DII);
void visitDbgLabelIntrinsic(StringRef Kind, DbgLabelInst &DLI);
void visitAtomicCmpXchgInst(AtomicCmpXchgInst &CXI);
@@ -587,17 +596,27 @@ private:
} // end anonymous namespace
/// We know that cond should be true, if not print an error message.
-#define Assert(C, ...) \
- do { if (!(C)) { CheckFailed(__VA_ARGS__); return; } } while (false)
+#define Check(C, ...) \
+ do { \
+ if (!(C)) { \
+ CheckFailed(__VA_ARGS__); \
+ return; \
+ } \
+ } while (false)
/// We know that a debug info condition should be true, if not print
/// an error message.
-#define AssertDI(C, ...) \
- do { if (!(C)) { DebugInfoCheckFailed(__VA_ARGS__); return; } } while (false)
+#define CheckDI(C, ...) \
+ do { \
+ if (!(C)) { \
+ DebugInfoCheckFailed(__VA_ARGS__); \
+ return; \
+ } \
+ } while (false)
void Verifier::visit(Instruction &I) {
for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
- Assert(I.getOperand(i) != nullptr, "Operand is null", &I);
+ Check(I.getOperand(i) != nullptr, "Operand is null", &I);
InstVisitor<Verifier>::visit(I);
}
@@ -620,43 +639,43 @@ static void forEachUser(const Value *User,
}
void Verifier::visitGlobalValue(const GlobalValue &GV) {
- Assert(!GV.isDeclaration() || GV.hasValidDeclarationLinkage(),
- "Global is external, but doesn't have external or weak linkage!", &GV);
+ Check(!GV.isDeclaration() || GV.hasValidDeclarationLinkage(),
+ "Global is external, but doesn't have external or weak linkage!", &GV);
if (const GlobalObject *GO = dyn_cast<GlobalObject>(&GV)) {
if (MaybeAlign A = GO->getAlign()) {
- Assert(A->value() <= Value::MaximumAlignment,
- "huge alignment values are unsupported", GO);
+ Check(A->value() <= Value::MaximumAlignment,
+ "huge alignment values are unsupported", GO);
}
}
- Assert(!GV.hasAppendingLinkage() || isa<GlobalVariable>(GV),
- "Only global variables can have appending linkage!", &GV);
+ Check(!GV.hasAppendingLinkage() || isa<GlobalVariable>(GV),
+ "Only global variables can have appending linkage!", &GV);
if (GV.hasAppendingLinkage()) {
const GlobalVariable *GVar = dyn_cast<GlobalVariable>(&GV);
- Assert(GVar && GVar->getValueType()->isArrayTy(),
- "Only global arrays can have appending linkage!", GVar);
+ Check(GVar && GVar->getValueType()->isArrayTy(),
+ "Only global arrays can have appending linkage!", GVar);
}
if (GV.isDeclarationForLinker())
- Assert(!GV.hasComdat(), "Declaration may not be in a Comdat!", &GV);
+ Check(!GV.hasComdat(), "Declaration may not be in a Comdat!", &GV);
if (GV.hasDLLImportStorageClass()) {
- Assert(!GV.isDSOLocal(),
- "GlobalValue with DLLImport Storage is dso_local!", &GV);
+ Check(!GV.isDSOLocal(), "GlobalValue with DLLImport Storage is dso_local!",
+ &GV);
- Assert((GV.isDeclaration() &&
- (GV.hasExternalLinkage() || GV.hasExternalWeakLinkage())) ||
- GV.hasAvailableExternallyLinkage(),
- "Global is marked as dllimport, but not external", &GV);
+ Check((GV.isDeclaration() &&
+ (GV.hasExternalLinkage() || GV.hasExternalWeakLinkage())) ||
+ GV.hasAvailableExternallyLinkage(),
+ "Global is marked as dllimport, but not external", &GV);
}
if (GV.isImplicitDSOLocal())
- Assert(GV.isDSOLocal(),
- "GlobalValue with local linkage or non-default "
- "visibility must be dso_local!",
- &GV);
+ Check(GV.isDSOLocal(),
+ "GlobalValue with local linkage or non-default "
+ "visibility must be dso_local!",
+ &GV);
forEachUser(&GV, GlobalValueVisited, [&](const Value *V) -> bool {
if (const Instruction *I = dyn_cast<Instruction>(V)) {
@@ -680,25 +699,25 @@ void Verifier::visitGlobalValue(const GlobalValue &GV) {
void Verifier::visitGlobalVariable(const GlobalVariable &GV) {
if (GV.hasInitializer()) {
- Assert(GV.getInitializer()->getType() == GV.getValueType(),
- "Global variable initializer type does not match global "
- "variable type!",
- &GV);
+ Check(GV.getInitializer()->getType() == GV.getValueType(),
+ "Global variable initializer type does not match global "
+ "variable type!",
+ &GV);
// If the global has common linkage, it must have a zero initializer and
// cannot be constant.
if (GV.hasCommonLinkage()) {
- Assert(GV.getInitializer()->isNullValue(),
- "'common' global must have a zero initializer!", &GV);
- Assert(!GV.isConstant(), "'common' global may not be marked constant!",
- &GV);
- Assert(!GV.hasComdat(), "'common' global may not be in a Comdat!", &GV);
+ Check(GV.getInitializer()->isNullValue(),
+ "'common' global must have a zero initializer!", &GV);
+ Check(!GV.isConstant(), "'common' global may not be marked constant!",
+ &GV);
+ Check(!GV.hasComdat(), "'common' global may not be in a Comdat!", &GV);
}
}
if (GV.hasName() && (GV.getName() == "llvm.global_ctors" ||
GV.getName() == "llvm.global_dtors")) {
- Assert(!GV.hasInitializer() || GV.hasAppendingLinkage(),
- "invalid linkage for intrinsic global variable", &GV);
+ Check(!GV.hasInitializer() || GV.hasAppendingLinkage(),
+ "invalid linkage for intrinsic global variable", &GV);
// Don't worry about emitting an error for it not being an array,
// visitGlobalValue will complain on appending non-array.
if (ArrayType *ATy = dyn_cast<ArrayType>(GV.getValueType())) {
@@ -706,42 +725,41 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) {
PointerType *FuncPtrTy =
FunctionType::get(Type::getVoidTy(Context), false)->
getPointerTo(DL.getProgramAddressSpace());
- Assert(STy &&
- (STy->getNumElements() == 2 || STy->getNumElements() == 3) &&
- STy->getTypeAtIndex(0u)->isIntegerTy(32) &&
- STy->getTypeAtIndex(1) == FuncPtrTy,
- "wrong type for intrinsic global variable", &GV);
- Assert(STy->getNumElements() == 3,
- "the third field of the element type is mandatory, "
- "specify i8* null to migrate from the obsoleted 2-field form");
+ Check(STy && (STy->getNumElements() == 2 || STy->getNumElements() == 3) &&
+ STy->getTypeAtIndex(0u)->isIntegerTy(32) &&
+ STy->getTypeAtIndex(1) == FuncPtrTy,
+ "wrong type for intrinsic global variable", &GV);
+ Check(STy->getNumElements() == 3,
+ "the third field of the element type is mandatory, "
+ "specify i8* null to migrate from the obsoleted 2-field form");
Type *ETy = STy->getTypeAtIndex(2);
Type *Int8Ty = Type::getInt8Ty(ETy->getContext());
- Assert(ETy->isPointerTy() &&
- cast<PointerType>(ETy)->isOpaqueOrPointeeTypeMatches(Int8Ty),
- "wrong type for intrinsic global variable", &GV);
+ Check(ETy->isPointerTy() &&
+ cast<PointerType>(ETy)->isOpaqueOrPointeeTypeMatches(Int8Ty),
+ "wrong type for intrinsic global variable", &GV);
}
}
if (GV.hasName() && (GV.getName() == "llvm.used" ||
GV.getName() == "llvm.compiler.used")) {
- Assert(!GV.hasInitializer() || GV.hasAppendingLinkage(),
- "invalid linkage for intrinsic global variable", &GV);
+ Check(!GV.hasInitializer() || GV.hasAppendingLinkage(),
+ "invalid linkage for intrinsic global variable", &GV);
Type *GVType = GV.getValueType();
if (ArrayType *ATy = dyn_cast<ArrayType>(GVType)) {
PointerType *PTy = dyn_cast<PointerType>(ATy->getElementType());
- Assert(PTy, "wrong type for intrinsic global variable", &GV);
+ Check(PTy, "wrong type for intrinsic global variable", &GV);
if (GV.hasInitializer()) {
const Constant *Init = GV.getInitializer();
const ConstantArray *InitArray = dyn_cast<ConstantArray>(Init);
- Assert(InitArray, "wrong initalizer for intrinsic global variable",
- Init);
+ Check(InitArray, "wrong initalizer for intrinsic global variable",
+ Init);
for (Value *Op : InitArray->operands()) {
Value *V = Op->stripPointerCasts();
- Assert(isa<GlobalVariable>(V) || isa<Function>(V) ||
- isa<GlobalAlias>(V),
- Twine("invalid ") + GV.getName() + " member", V);
- Assert(V->hasName(),
- Twine("members of ") + GV.getName() + " must be named", V);
+ Check(isa<GlobalVariable>(V) || isa<Function>(V) ||
+ isa<GlobalAlias>(V),
+ Twine("invalid ") + GV.getName() + " member", V);
+ Check(V->hasName(),
+ Twine("members of ") + GV.getName() + " must be named", V);
}
}
}
@@ -754,20 +772,20 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) {
if (auto *GVE = dyn_cast<DIGlobalVariableExpression>(MD))
visitDIGlobalVariableExpression(*GVE);
else
- AssertDI(false, "!dbg attachment of global variable must be a "
- "DIGlobalVariableExpression");
+ CheckDI(false, "!dbg attachment of global variable must be a "
+ "DIGlobalVariableExpression");
}
// Scalable vectors cannot be global variables, since we don't know
// the runtime size. If the global is an array containing scalable vectors,
// that will be caught by the isValidElementType methods in StructType or
// ArrayType instead.
- Assert(!isa<ScalableVectorType>(GV.getValueType()),
- "Globals cannot contain scalable vectors", &GV);
+ Check(!isa<ScalableVectorType>(GV.getValueType()),
+ "Globals cannot contain scalable vectors", &GV);
if (auto *STy = dyn_cast<StructType>(GV.getValueType()))
- Assert(!STy->containsScalableVectorType(),
- "Globals cannot contain scalable vectors", &GV);
+ Check(!STy->containsScalableVectorType(),
+ "Globals cannot contain scalable vectors", &GV);
if (!GV.hasInitializer()) {
visitGlobalValue(GV);
@@ -789,14 +807,14 @@ void Verifier::visitAliaseeSubExpr(const GlobalAlias &GA, const Constant &C) {
void Verifier::visitAliaseeSubExpr(SmallPtrSetImpl<const GlobalAlias*> &Visited,
const GlobalAlias &GA, const Constant &C) {
if (const auto *GV = dyn_cast<GlobalValue>(&C)) {
- Assert(!GV->isDeclarationForLinker(), "Alias must point to a definition",
- &GA);
+ Check(!GV->isDeclarationForLinker(), "Alias must point to a definition",
+ &GA);
if (const auto *GA2 = dyn_cast<GlobalAlias>(GV)) {
- Assert(Visited.insert(GA2).second, "Aliases cannot form a cycle", &GA);
+ Check(Visited.insert(GA2).second, "Aliases cannot form a cycle", &GA);
- Assert(!GA2->isInterposable(), "Alias cannot point to an interposable alias",
- &GA);
+ Check(!GA2->isInterposable(),
+ "Alias cannot point to an interposable alias", &GA);
} else {
// Only continue verifying subexpressions of GlobalAliases.
// Do not recurse into global initializers.
@@ -817,17 +835,17 @@ void Verifier::visitAliaseeSubExpr(SmallPtrSetImpl<const GlobalAlias*> &Visited,
}
void Verifier::visitGlobalAlias(const GlobalAlias &GA) {
- Assert(GlobalAlias::isValidLinkage(GA.getLinkage()),
- "Alias should have private, internal, linkonce, weak, linkonce_odr, "
- "weak_odr, or external linkage!",
- &GA);
+ Check(GlobalAlias::isValidLinkage(GA.getLinkage()),
+ "Alias should have private, internal, linkonce, weak, linkonce_odr, "
+ "weak_odr, or external linkage!",
+ &GA);
const Constant *Aliasee = GA.getAliasee();
- Assert(Aliasee, "Aliasee cannot be NULL!", &GA);
- Assert(GA.getType() == Aliasee->getType(),
- "Alias and aliasee types should match!", &GA);
+ Check(Aliasee, "Aliasee cannot be NULL!", &GA);
+ Check(GA.getType() == Aliasee->getType(),
+ "Alias and aliasee types should match!", &GA);
- Assert(isa<GlobalValue>(Aliasee) || isa<ConstantExpr>(Aliasee),
- "Aliasee should be either GlobalValue or ConstantExpr", &GA);
+ Check(isa<GlobalValue>(Aliasee) || isa<ConstantExpr>(Aliasee),
+ "Aliasee should be either GlobalValue or ConstantExpr", &GA);
visitAliaseeSubExpr(GA, *Aliasee);
@@ -835,30 +853,35 @@ void Verifier::visitGlobalAlias(const GlobalAlias &GA) {
}
void Verifier::visitGlobalIFunc(const GlobalIFunc &GI) {
+ Check(GlobalIFunc::isValidLinkage(GI.getLinkage()),
+ "IFunc should have private, internal, linkonce, weak, linkonce_odr, "
+ "weak_odr, or external linkage!",
+ &GI);
// Pierce through ConstantExprs and GlobalAliases and check that the resolver
- // has a Function
+ // is a Function definition.
const Function *Resolver = GI.getResolverFunction();
- Assert(Resolver, "IFunc must have a Function resolver", &GI);
+ Check(Resolver, "IFunc must have a Function resolver", &GI);
+ Check(!Resolver->isDeclarationForLinker(),
+ "IFunc resolver must be a definition", &GI);
// Check that the immediate resolver operand (prior to any bitcasts) has the
- // correct type
+ // correct type.
const Type *ResolverTy = GI.getResolver()->getType();
const Type *ResolverFuncTy =
GlobalIFunc::getResolverFunctionType(GI.getValueType());
- Assert(ResolverTy == ResolverFuncTy->getPointerTo(),
- "IFunc resolver has incorrect type", &GI);
+ Check(ResolverTy == ResolverFuncTy->getPointerTo(),
+ "IFunc resolver has incorrect type", &GI);
}
void Verifier::visitNamedMDNode(const NamedMDNode &NMD) {
// There used to be various other llvm.dbg.* nodes, but we don't support
// upgrading them and we want to reserve the namespace for future uses.
if (NMD.getName().startswith("llvm.dbg."))
- AssertDI(NMD.getName() == "llvm.dbg.cu",
- "unrecognized named metadata node in the llvm.dbg namespace",
- &NMD);
+ CheckDI(NMD.getName() == "llvm.dbg.cu",
+ "unrecognized named metadata node in the llvm.dbg namespace", &NMD);
for (const MDNode *MD : NMD.operands()) {
if (NMD.getName() == "llvm.dbg.cu")
- AssertDI(MD && isa<DICompileUnit>(MD), "invalid compile unit", &NMD, MD);
+ CheckDI(MD && isa<DICompileUnit>(MD), "invalid compile unit", &NMD, MD);
if (!MD)
continue;
@@ -873,8 +896,8 @@ void Verifier::visitMDNode(const MDNode &MD, AreDebugLocsAllowed AllowLocs) {
if (!MDNodes.insert(&MD).second)
return;
- Assert(&MD.getContext() == &Context,
- "MDNode context does not match Module context!", &MD);
+ Check(&MD.getContext() == &Context,
+ "MDNode context does not match Module context!", &MD);
switch (MD.getMetadataID()) {
default:
@@ -891,10 +914,10 @@ void Verifier::visitMDNode(const MDNode &MD, AreDebugLocsAllowed AllowLocs) {
for (const Metadata *Op : MD.operands()) {
if (!Op)
continue;
- Assert(!isa<LocalAsMetadata>(Op), "Invalid operand for global metadata!",
- &MD, Op);
- AssertDI(!isa<DILocation>(Op) || AllowLocs == AreDebugLocsAllowed::Yes,
- "DILocation not allowed within this metadata node", &MD, Op);
+ Check(!isa<LocalAsMetadata>(Op), "Invalid operand for global metadata!",
+ &MD, Op);
+ CheckDI(!isa<DILocation>(Op) || AllowLocs == AreDebugLocsAllowed::Yes,
+ "DILocation not allowed within this metadata node", &MD, Op);
if (auto *N = dyn_cast<MDNode>(Op)) {
visitMDNode(*N, AllowLocs);
continue;
@@ -906,26 +929,26 @@ void Verifier::visitMDNode(const MDNode &MD, AreDebugLocsAllowed AllowLocs) {
}
// Check these last, so we diagnose problems in operands first.
- Assert(!MD.isTemporary(), "Expected no forward declarations!", &MD);
- Assert(MD.isResolved(), "All nodes should be resolved!", &MD);
+ Check(!MD.isTemporary(), "Expected no forward declarations!", &MD);
+ Check(MD.isResolved(), "All nodes should be resolved!", &MD);
}
void Verifier::visitValueAsMetadata(const ValueAsMetadata &MD, Function *F) {
- Assert(MD.getValue(), "Expected valid value", &MD);
- Assert(!MD.getValue()->getType()->isMetadataTy(),
- "Unexpected metadata round-trip through values", &MD, MD.getValue());
+ Check(MD.getValue(), "Expected valid value", &MD);
+ Check(!MD.getValue()->getType()->isMetadataTy(),
+ "Unexpected metadata round-trip through values", &MD, MD.getValue());
auto *L = dyn_cast<LocalAsMetadata>(&MD);
if (!L)
return;
- Assert(F, "function-local metadata used outside a function", L);
+ Check(F, "function-local metadata used outside a function", L);
// If this was an instruction, bb, or argument, verify that it is in the
// function that we expect.
Function *ActualF = nullptr;
if (Instruction *I = dyn_cast<Instruction>(L->getValue())) {
- Assert(I->getParent(), "function-local metadata not in basic block", L, I);
+ Check(I->getParent(), "function-local metadata not in basic block", L, I);
ActualF = I->getParent()->getParent();
} else if (BasicBlock *BB = dyn_cast<BasicBlock>(L->getValue()))
ActualF = BB->getParent();
@@ -933,7 +956,7 @@ void Verifier::visitValueAsMetadata(const ValueAsMetadata &MD, Function *F) {
ActualF = A->getParent();
assert(ActualF && "Unimplemented function local metadata case!");
- Assert(ActualF == F, "function-local metadata used in wrong function", L);
+ Check(ActualF == F, "function-local metadata used in wrong function", L);
}
void Verifier::visitMetadataAsValue(const MetadataAsValue &MDV, Function *F) {
@@ -957,125 +980,125 @@ static bool isScope(const Metadata *MD) { return !MD || isa<DIScope>(MD); }
static bool isDINode(const Metadata *MD) { return !MD || isa<DINode>(MD); }
void Verifier::visitDILocation(const DILocation &N) {
- AssertDI(N.getRawScope() && isa<DILocalScope>(N.getRawScope()),
- "location requires a valid scope", &N, N.getRawScope());
+ CheckDI(N.getRawScope() && isa<DILocalScope>(N.getRawScope()),
+ "location requires a valid scope", &N, N.getRawScope());
if (auto *IA = N.getRawInlinedAt())
- AssertDI(isa<DILocation>(IA), "inlined-at should be a location", &N, IA);
+ CheckDI(isa<DILocation>(IA), "inlined-at should be a location", &N, IA);
if (auto *SP = dyn_cast<DISubprogram>(N.getRawScope()))
- AssertDI(SP->isDefinition(), "scope points into the type hierarchy", &N);
+ CheckDI(SP->isDefinition(), "scope points into the type hierarchy", &N);
}
void Verifier::visitGenericDINode(const GenericDINode &N) {
- AssertDI(N.getTag(), "invalid tag", &N);
+ CheckDI(N.getTag(), "invalid tag", &N);
}
void Verifier::visitDIScope(const DIScope &N) {
if (auto *F = N.getRawFile())
- AssertDI(isa<DIFile>(F), "invalid file", &N, F);
+ CheckDI(isa<DIFile>(F), "invalid file", &N, F);
}
void Verifier::visitDISubrange(const DISubrange &N) {
- AssertDI(N.getTag() == dwarf::DW_TAG_subrange_type, "invalid tag", &N);
+ CheckDI(N.getTag() == dwarf::DW_TAG_subrange_type, "invalid tag", &N);
bool HasAssumedSizedArraySupport = dwarf::isFortran(CurrentSourceLang);
- AssertDI(HasAssumedSizedArraySupport || N.getRawCountNode() ||
- N.getRawUpperBound(),
- "Subrange must contain count or upperBound", &N);
- AssertDI(!N.getRawCountNode() || !N.getRawUpperBound(),
- "Subrange can have any one of count or upperBound", &N);
+ CheckDI(HasAssumedSizedArraySupport || N.getRawCountNode() ||
+ N.getRawUpperBound(),
+ "Subrange must contain count or upperBound", &N);
+ CheckDI(!N.getRawCountNode() || !N.getRawUpperBound(),
+ "Subrange can have any one of count or upperBound", &N);
auto *CBound = N.getRawCountNode();
- AssertDI(!CBound || isa<ConstantAsMetadata>(CBound) ||
- isa<DIVariable>(CBound) || isa<DIExpression>(CBound),
- "Count must be signed constant or DIVariable or DIExpression", &N);
+ CheckDI(!CBound || isa<ConstantAsMetadata>(CBound) ||
+ isa<DIVariable>(CBound) || isa<DIExpression>(CBound),
+ "Count must be signed constant or DIVariable or DIExpression", &N);
auto Count = N.getCount();
- AssertDI(!Count || !Count.is<ConstantInt *>() ||
- Count.get<ConstantInt *>()->getSExtValue() >= -1,
- "invalid subrange count", &N);
+ CheckDI(!Count || !Count.is<ConstantInt *>() ||
+ Count.get<ConstantInt *>()->getSExtValue() >= -1,
+ "invalid subrange count", &N);
auto *LBound = N.getRawLowerBound();
- AssertDI(!LBound || isa<ConstantAsMetadata>(LBound) ||
- isa<DIVariable>(LBound) || isa<DIExpression>(LBound),
- "LowerBound must be signed constant or DIVariable or DIExpression",
- &N);
+ CheckDI(!LBound || isa<ConstantAsMetadata>(LBound) ||
+ isa<DIVariable>(LBound) || isa<DIExpression>(LBound),
+ "LowerBound must be signed constant or DIVariable or DIExpression",
+ &N);
auto *UBound = N.getRawUpperBound();
- AssertDI(!UBound || isa<ConstantAsMetadata>(UBound) ||
- isa<DIVariable>(UBound) || isa<DIExpression>(UBound),
- "UpperBound must be signed constant or DIVariable or DIExpression",
- &N);
+ CheckDI(!UBound || isa<ConstantAsMetadata>(UBound) ||
+ isa<DIVariable>(UBound) || isa<DIExpression>(UBound),
+ "UpperBound must be signed constant or DIVariable or DIExpression",
+ &N);
auto *Stride = N.getRawStride();
- AssertDI(!Stride || isa<ConstantAsMetadata>(Stride) ||
- isa<DIVariable>(Stride) || isa<DIExpression>(Stride),
- "Stride must be signed constant or DIVariable or DIExpression", &N);
+ CheckDI(!Stride || isa<ConstantAsMetadata>(Stride) ||
+ isa<DIVariable>(Stride) || isa<DIExpression>(Stride),
+ "Stride must be signed constant or DIVariable or DIExpression", &N);
}
void Verifier::visitDIGenericSubrange(const DIGenericSubrange &N) {
- AssertDI(N.getTag() == dwarf::DW_TAG_generic_subrange, "invalid tag", &N);
- AssertDI(N.getRawCountNode() || N.getRawUpperBound(),
- "GenericSubrange must contain count or upperBound", &N);
- AssertDI(!N.getRawCountNode() || !N.getRawUpperBound(),
- "GenericSubrange can have any one of count or upperBound", &N);
+ CheckDI(N.getTag() == dwarf::DW_TAG_generic_subrange, "invalid tag", &N);
+ CheckDI(N.getRawCountNode() || N.getRawUpperBound(),
+ "GenericSubrange must contain count or upperBound", &N);
+ CheckDI(!N.getRawCountNode() || !N.getRawUpperBound(),
+ "GenericSubrange can have any one of count or upperBound", &N);
auto *CBound = N.getRawCountNode();
- AssertDI(!CBound || isa<DIVariable>(CBound) || isa<DIExpression>(CBound),
- "Count must be signed constant or DIVariable or DIExpression", &N);
+ CheckDI(!CBound || isa<DIVariable>(CBound) || isa<DIExpression>(CBound),
+ "Count must be signed constant or DIVariable or DIExpression", &N);
auto *LBound = N.getRawLowerBound();
- AssertDI(LBound, "GenericSubrange must contain lowerBound", &N);
- AssertDI(isa<DIVariable>(LBound) || isa<DIExpression>(LBound),
- "LowerBound must be signed constant or DIVariable or DIExpression",
- &N);
+ CheckDI(LBound, "GenericSubrange must contain lowerBound", &N);
+ CheckDI(isa<DIVariable>(LBound) || isa<DIExpression>(LBound),
+ "LowerBound must be signed constant or DIVariable or DIExpression",
+ &N);
auto *UBound = N.getRawUpperBound();
- AssertDI(!UBound || isa<DIVariable>(UBound) || isa<DIExpression>(UBound),
- "UpperBound must be signed constant or DIVariable or DIExpression",
- &N);
+ CheckDI(!UBound || isa<DIVariable>(UBound) || isa<DIExpression>(UBound),
+ "UpperBound must be signed constant or DIVariable or DIExpression",
+ &N);
auto *Stride = N.getRawStride();
- AssertDI(Stride, "GenericSubrange must contain stride", &N);
- AssertDI(isa<DIVariable>(Stride) || isa<DIExpression>(Stride),
- "Stride must be signed constant or DIVariable or DIExpression", &N);
+ CheckDI(Stride, "GenericSubrange must contain stride", &N);
+ CheckDI(isa<DIVariable>(Stride) || isa<DIExpression>(Stride),
+ "Stride must be signed constant or DIVariable or DIExpression", &N);
}
void Verifier::visitDIEnumerator(const DIEnumerator &N) {
- AssertDI(N.getTag() == dwarf::DW_TAG_enumerator, "invalid tag", &N);
+ CheckDI(N.getTag() == dwarf::DW_TAG_enumerator, "invalid tag", &N);
}
void Verifier::visitDIBasicType(const DIBasicType &N) {
- AssertDI(N.getTag() == dwarf::DW_TAG_base_type ||
- N.getTag() == dwarf::DW_TAG_unspecified_type ||
- N.getTag() == dwarf::DW_TAG_string_type,
- "invalid tag", &N);
+ CheckDI(N.getTag() == dwarf::DW_TAG_base_type ||
+ N.getTag() == dwarf::DW_TAG_unspecified_type ||
+ N.getTag() == dwarf::DW_TAG_string_type,
+ "invalid tag", &N);
}
void Verifier::visitDIStringType(const DIStringType &N) {
- AssertDI(N.getTag() == dwarf::DW_TAG_string_type, "invalid tag", &N);
- AssertDI(!(N.isBigEndian() && N.isLittleEndian()) ,
- "has conflicting flags", &N);
+ CheckDI(N.getTag() == dwarf::DW_TAG_string_type, "invalid tag", &N);
+ CheckDI(!(N.isBigEndian() && N.isLittleEndian()), "has conflicting flags",
+ &N);
}
void Verifier::visitDIDerivedType(const DIDerivedType &N) {
// Common scope checks.
visitDIScope(N);
- AssertDI(N.getTag() == dwarf::DW_TAG_typedef ||
- N.getTag() == dwarf::DW_TAG_pointer_type ||
- N.getTag() == dwarf::DW_TAG_ptr_to_member_type ||
- N.getTag() == dwarf::DW_TAG_reference_type ||
- N.getTag() == dwarf::DW_TAG_rvalue_reference_type ||
- N.getTag() == dwarf::DW_TAG_const_type ||
- N.getTag() == dwarf::DW_TAG_immutable_type ||
- N.getTag() == dwarf::DW_TAG_volatile_type ||
- N.getTag() == dwarf::DW_TAG_restrict_type ||
- N.getTag() == dwarf::DW_TAG_atomic_type ||
- N.getTag() == dwarf::DW_TAG_member ||
- N.getTag() == dwarf::DW_TAG_inheritance ||
- N.getTag() == dwarf::DW_TAG_friend ||
- N.getTag() == dwarf::DW_TAG_set_type,
- "invalid tag", &N);
+ CheckDI(N.getTag() == dwarf::DW_TAG_typedef ||
+ N.getTag() == dwarf::DW_TAG_pointer_type ||
+ N.getTag() == dwarf::DW_TAG_ptr_to_member_type ||
+ N.getTag() == dwarf::DW_TAG_reference_type ||
+ N.getTag() == dwarf::DW_TAG_rvalue_reference_type ||
+ N.getTag() == dwarf::DW_TAG_const_type ||
+ N.getTag() == dwarf::DW_TAG_immutable_type ||
+ N.getTag() == dwarf::DW_TAG_volatile_type ||
+ N.getTag() == dwarf::DW_TAG_restrict_type ||
+ N.getTag() == dwarf::DW_TAG_atomic_type ||
+ N.getTag() == dwarf::DW_TAG_member ||
+ N.getTag() == dwarf::DW_TAG_inheritance ||
+ N.getTag() == dwarf::DW_TAG_friend ||
+ N.getTag() == dwarf::DW_TAG_set_type,
+ "invalid tag", &N);
if (N.getTag() == dwarf::DW_TAG_ptr_to_member_type) {
- AssertDI(isType(N.getRawExtraData()), "invalid pointer to member type", &N,
- N.getRawExtraData());
+ CheckDI(isType(N.getRawExtraData()), "invalid pointer to member type", &N,
+ N.getRawExtraData());
}
if (N.getTag() == dwarf::DW_TAG_set_type) {
if (auto *T = N.getRawBaseType()) {
auto *Enum = dyn_cast_or_null<DICompositeType>(T);
auto *Basic = dyn_cast_or_null<DIBasicType>(T);
- AssertDI(
+ CheckDI(
(Enum && Enum->getTag() == dwarf::DW_TAG_enumeration_type) ||
(Basic && (Basic->getEncoding() == dwarf::DW_ATE_unsigned ||
Basic->getEncoding() == dwarf::DW_ATE_signed ||
@@ -1086,16 +1109,16 @@ void Verifier::visitDIDerivedType(const DIDerivedType &N) {
}
}
- AssertDI(isScope(N.getRawScope()), "invalid scope", &N, N.getRawScope());
- AssertDI(isType(N.getRawBaseType()), "invalid base type", &N,
- N.getRawBaseType());
+ CheckDI(isScope(N.getRawScope()), "invalid scope", &N, N.getRawScope());
+ CheckDI(isType(N.getRawBaseType()), "invalid base type", &N,
+ N.getRawBaseType());
if (N.getDWARFAddressSpace()) {
- AssertDI(N.getTag() == dwarf::DW_TAG_pointer_type ||
- N.getTag() == dwarf::DW_TAG_reference_type ||
- N.getTag() == dwarf::DW_TAG_rvalue_reference_type,
- "DWARF address space only applies to pointer or reference types",
- &N);
+ CheckDI(N.getTag() == dwarf::DW_TAG_pointer_type ||
+ N.getTag() == dwarf::DW_TAG_reference_type ||
+ N.getTag() == dwarf::DW_TAG_rvalue_reference_type,
+ "DWARF address space only applies to pointer or reference types",
+ &N);
}
}
@@ -1109,10 +1132,10 @@ static bool hasConflictingReferenceFlags(unsigned Flags) {
void Verifier::visitTemplateParams(const MDNode &N, const Metadata &RawParams) {
auto *Params = dyn_cast<MDTuple>(&RawParams);
- AssertDI(Params, "invalid template params", &N, &RawParams);
+ CheckDI(Params, "invalid template params", &N, &RawParams);
for (Metadata *Op : Params->operands()) {
- AssertDI(Op && isa<DITemplateParameter>(Op), "invalid template parameter",
- &N, Params, Op);
+ CheckDI(Op && isa<DITemplateParameter>(Op), "invalid template parameter",
+ &N, Params, Op);
}
}
@@ -1120,83 +1143,83 @@ void Verifier::visitDICompositeType(const DICompositeType &N) {
// Common scope checks.
visitDIScope(N);
- AssertDI(N.getTag() == dwarf::DW_TAG_array_type ||
- N.getTag() == dwarf::DW_TAG_structure_type ||
- N.getTag() == dwarf::DW_TAG_union_type ||
- N.getTag() == dwarf::DW_TAG_enumeration_type ||
- N.getTag() == dwarf::DW_TAG_class_type ||
- N.getTag() == dwarf::DW_TAG_variant_part ||
- N.getTag() == dwarf::DW_TAG_namelist,
- "invalid tag", &N);
-
- AssertDI(isScope(N.getRawScope()), "invalid scope", &N, N.getRawScope());
- AssertDI(isType(N.getRawBaseType()), "invalid base type", &N,
- N.getRawBaseType());
-
- AssertDI(!N.getRawElements() || isa<MDTuple>(N.getRawElements()),
- "invalid composite elements", &N, N.getRawElements());
- AssertDI(isType(N.getRawVTableHolder()), "invalid vtable holder", &N,
- N.getRawVTableHolder());
- AssertDI(!hasConflictingReferenceFlags(N.getFlags()),
- "invalid reference flags", &N);
+ CheckDI(N.getTag() == dwarf::DW_TAG_array_type ||
+ N.getTag() == dwarf::DW_TAG_structure_type ||
+ N.getTag() == dwarf::DW_TAG_union_type ||
+ N.getTag() == dwarf::DW_TAG_enumeration_type ||
+ N.getTag() == dwarf::DW_TAG_class_type ||
+ N.getTag() == dwarf::DW_TAG_variant_part ||
+ N.getTag() == dwarf::DW_TAG_namelist,
+ "invalid tag", &N);
+
+ CheckDI(isScope(N.getRawScope()), "invalid scope", &N, N.getRawScope());
+ CheckDI(isType(N.getRawBaseType()), "invalid base type", &N,
+ N.getRawBaseType());
+
+ CheckDI(!N.getRawElements() || isa<MDTuple>(N.getRawElements()),
+ "invalid composite elements", &N, N.getRawElements());
+ CheckDI(isType(N.getRawVTableHolder()), "invalid vtable holder", &N,
+ N.getRawVTableHolder());
+ CheckDI(!hasConflictingReferenceFlags(N.getFlags()),
+ "invalid reference flags", &N);
unsigned DIBlockByRefStruct = 1 << 4;
- AssertDI((N.getFlags() & DIBlockByRefStruct) == 0,
- "DIBlockByRefStruct on DICompositeType is no longer supported", &N);
+ CheckDI((N.getFlags() & DIBlockByRefStruct) == 0,
+ "DIBlockByRefStruct on DICompositeType is no longer supported", &N);
if (N.isVector()) {
const DINodeArray Elements = N.getElements();
- AssertDI(Elements.size() == 1 &&
- Elements[0]->getTag() == dwarf::DW_TAG_subrange_type,
- "invalid vector, expected one element of type subrange", &N);
+ CheckDI(Elements.size() == 1 &&
+ Elements[0]->getTag() == dwarf::DW_TAG_subrange_type,
+ "invalid vector, expected one element of type subrange", &N);
}
if (auto *Params = N.getRawTemplateParams())
visitTemplateParams(N, *Params);
if (auto *D = N.getRawDiscriminator()) {
- AssertDI(isa<DIDerivedType>(D) && N.getTag() == dwarf::DW_TAG_variant_part,
- "discriminator can only appear on variant part");
+ CheckDI(isa<DIDerivedType>(D) && N.getTag() == dwarf::DW_TAG_variant_part,
+ "discriminator can only appear on variant part");
}
if (N.getRawDataLocation()) {
- AssertDI(N.getTag() == dwarf::DW_TAG_array_type,
- "dataLocation can only appear in array type");
+ CheckDI(N.getTag() == dwarf::DW_TAG_array_type,
+ "dataLocation can only appear in array type");
}
if (N.getRawAssociated()) {
- AssertDI(N.getTag() == dwarf::DW_TAG_array_type,
- "associated can only appear in array type");
+ CheckDI(N.getTag() == dwarf::DW_TAG_array_type,
+ "associated can only appear in array type");
}
if (N.getRawAllocated()) {
- AssertDI(N.getTag() == dwarf::DW_TAG_array_type,
- "allocated can only appear in array type");
+ CheckDI(N.getTag() == dwarf::DW_TAG_array_type,
+ "allocated can only appear in array type");
}
if (N.getRawRank()) {
- AssertDI(N.getTag() == dwarf::DW_TAG_array_type,
- "rank can only appear in array type");
+ CheckDI(N.getTag() == dwarf::DW_TAG_array_type,
+ "rank can only appear in array type");
}
}
void Verifier::visitDISubroutineType(const DISubroutineType &N) {
- AssertDI(N.getTag() == dwarf::DW_TAG_subroutine_type, "invalid tag", &N);
+ CheckDI(N.getTag() == dwarf::DW_TAG_subroutine_type, "invalid tag", &N);
if (auto *Types = N.getRawTypeArray()) {
- AssertDI(isa<MDTuple>(Types), "invalid composite elements", &N, Types);
+ CheckDI(isa<MDTuple>(Types), "invalid composite elements", &N, Types);
for (Metadata *Ty : N.getTypeArray()->operands()) {
- AssertDI(isType(Ty), "invalid subroutine type ref", &N, Types, Ty);
+ CheckDI(isType(Ty), "invalid subroutine type ref", &N, Types, Ty);
}
}
- AssertDI(!hasConflictingReferenceFlags(N.getFlags()),
- "invalid reference flags", &N);
+ CheckDI(!hasConflictingReferenceFlags(N.getFlags()),
+ "invalid reference flags", &N);
}
void Verifier::visitDIFile(const DIFile &N) {
- AssertDI(N.getTag() == dwarf::DW_TAG_file_type, "invalid tag", &N);
+ CheckDI(N.getTag() == dwarf::DW_TAG_file_type, "invalid tag", &N);
Optional<DIFile::ChecksumInfo<StringRef>> Checksum = N.getChecksum();
if (Checksum) {
- AssertDI(Checksum->Kind <= DIFile::ChecksumKind::CSK_Last,
- "invalid checksum kind", &N);
+ CheckDI(Checksum->Kind <= DIFile::ChecksumKind::CSK_Last,
+ "invalid checksum kind", &N);
size_t Size;
switch (Checksum->Kind) {
case DIFile::CSK_MD5:
@@ -1209,137 +1232,137 @@ void Verifier::visitDIFile(const DIFile &N) {
Size = 64;
break;
}
- AssertDI(Checksum->Value.size() == Size, "invalid checksum length", &N);
- AssertDI(Checksum->Value.find_if_not(llvm::isHexDigit) == StringRef::npos,
- "invalid checksum", &N);
+ CheckDI(Checksum->Value.size() == Size, "invalid checksum length", &N);
+ CheckDI(Checksum->Value.find_if_not(llvm::isHexDigit) == StringRef::npos,
+ "invalid checksum", &N);
}
}
void Verifier::visitDICompileUnit(const DICompileUnit &N) {
- AssertDI(N.isDistinct(), "compile units must be distinct", &N);
- AssertDI(N.getTag() == dwarf::DW_TAG_compile_unit, "invalid tag", &N);
+ CheckDI(N.isDistinct(), "compile units must be distinct", &N);
+ CheckDI(N.getTag() == dwarf::DW_TAG_compile_unit, "invalid tag", &N);
// Don't bother verifying the compilation directory or producer string
// as those could be empty.
- AssertDI(N.getRawFile() && isa<DIFile>(N.getRawFile()), "invalid file", &N,
- N.getRawFile());
- AssertDI(!N.getFile()->getFilename().empty(), "invalid filename", &N,
- N.getFile());
+ CheckDI(N.getRawFile() && isa<DIFile>(N.getRawFile()), "invalid file", &N,
+ N.getRawFile());
+ CheckDI(!N.getFile()->getFilename().empty(), "invalid filename", &N,
+ N.getFile());
CurrentSourceLang = (dwarf::SourceLanguage)N.getSourceLanguage();
verifySourceDebugInfo(N, *N.getFile());
- AssertDI((N.getEmissionKind() <= DICompileUnit::LastEmissionKind),
- "invalid emission kind", &N);
+ CheckDI((N.getEmissionKind() <= DICompileUnit::LastEmissionKind),
+ "invalid emission kind", &N);
if (auto *Array = N.getRawEnumTypes()) {
- AssertDI(isa<MDTuple>(Array), "invalid enum list", &N, Array);
+ CheckDI(isa<MDTuple>(Array), "invalid enum list", &N, Array);
for (Metadata *Op : N.getEnumTypes()->operands()) {
auto *Enum = dyn_cast_or_null<DICompositeType>(Op);
- AssertDI(Enum && Enum->getTag() == dwarf::DW_TAG_enumeration_type,
- "invalid enum type", &N, N.getEnumTypes(), Op);
+ CheckDI(Enum && Enum->getTag() == dwarf::DW_TAG_enumeration_type,
+ "invalid enum type", &N, N.getEnumTypes(), Op);
}
}
if (auto *Array = N.getRawRetainedTypes()) {
- AssertDI(isa<MDTuple>(Array), "invalid retained type list", &N, Array);
+ CheckDI(isa<MDTuple>(Array), "invalid retained type list", &N, Array);
for (Metadata *Op : N.getRetainedTypes()->operands()) {
- AssertDI(Op && (isa<DIType>(Op) ||
- (isa<DISubprogram>(Op) &&
- !cast<DISubprogram>(Op)->isDefinition())),
- "invalid retained type", &N, Op);
+ CheckDI(
+ Op && (isa<DIType>(Op) || (isa<DISubprogram>(Op) &&
+ !cast<DISubprogram>(Op)->isDefinition())),
+ "invalid retained type", &N, Op);
}
}
if (auto *Array = N.getRawGlobalVariables()) {
- AssertDI(isa<MDTuple>(Array), "invalid global variable list", &N, Array);
+ CheckDI(isa<MDTuple>(Array), "invalid global variable list", &N, Array);
for (Metadata *Op : N.getGlobalVariables()->operands()) {
- AssertDI(Op && (isa<DIGlobalVariableExpression>(Op)),
- "invalid global variable ref", &N, Op);
+ CheckDI(Op && (isa<DIGlobalVariableExpression>(Op)),
+ "invalid global variable ref", &N, Op);
}
}
if (auto *Array = N.getRawImportedEntities()) {
- AssertDI(isa<MDTuple>(Array), "invalid imported entity list", &N, Array);
+ CheckDI(isa<MDTuple>(Array), "invalid imported entity list", &N, Array);
for (Metadata *Op : N.getImportedEntities()->operands()) {
- AssertDI(Op && isa<DIImportedEntity>(Op), "invalid imported entity ref",
- &N, Op);
+ CheckDI(Op && isa<DIImportedEntity>(Op), "invalid imported entity ref",
+ &N, Op);
}
}
if (auto *Array = N.getRawMacros()) {
- AssertDI(isa<MDTuple>(Array), "invalid macro list", &N, Array);
+ CheckDI(isa<MDTuple>(Array), "invalid macro list", &N, Array);
for (Metadata *Op : N.getMacros()->operands()) {
- AssertDI(Op && isa<DIMacroNode>(Op), "invalid macro ref", &N, Op);
+ CheckDI(Op && isa<DIMacroNode>(Op), "invalid macro ref", &N, Op);
}
}
CUVisited.insert(&N);
}
void Verifier::visitDISubprogram(const DISubprogram &N) {
- AssertDI(N.getTag() == dwarf::DW_TAG_subprogram, "invalid tag", &N);
- AssertDI(isScope(N.getRawScope()), "invalid scope", &N, N.getRawScope());
+ CheckDI(N.getTag() == dwarf::DW_TAG_subprogram, "invalid tag", &N);
+ CheckDI(isScope(N.getRawScope()), "invalid scope", &N, N.getRawScope());
if (auto *F = N.getRawFile())
- AssertDI(isa<DIFile>(F), "invalid file", &N, F);
+ CheckDI(isa<DIFile>(F), "invalid file", &N, F);
else
- AssertDI(N.getLine() == 0, "line specified with no file", &N, N.getLine());
+ CheckDI(N.getLine() == 0, "line specified with no file", &N, N.getLine());
if (auto *T = N.getRawType())
- AssertDI(isa<DISubroutineType>(T), "invalid subroutine type", &N, T);
- AssertDI(isType(N.getRawContainingType()), "invalid containing type", &N,
- N.getRawContainingType());
+ CheckDI(isa<DISubroutineType>(T), "invalid subroutine type", &N, T);
+ CheckDI(isType(N.getRawContainingType()), "invalid containing type", &N,
+ N.getRawContainingType());
if (auto *Params = N.getRawTemplateParams())
visitTemplateParams(N, *Params);
if (auto *S = N.getRawDeclaration())
- AssertDI(isa<DISubprogram>(S) && !cast<DISubprogram>(S)->isDefinition(),
- "invalid subprogram declaration", &N, S);
+ CheckDI(isa<DISubprogram>(S) && !cast<DISubprogram>(S)->isDefinition(),
+ "invalid subprogram declaration", &N, S);
if (auto *RawNode = N.getRawRetainedNodes()) {
auto *Node = dyn_cast<MDTuple>(RawNode);
- AssertDI(Node, "invalid retained nodes list", &N, RawNode);
+ CheckDI(Node, "invalid retained nodes list", &N, RawNode);
for (Metadata *Op : Node->operands()) {
- AssertDI(Op && (isa<DILocalVariable>(Op) || isa<DILabel>(Op)),
- "invalid retained nodes, expected DILocalVariable or DILabel",
- &N, Node, Op);
+ CheckDI(Op && (isa<DILocalVariable>(Op) || isa<DILabel>(Op)),
+ "invalid retained nodes, expected DILocalVariable or DILabel", &N,
+ Node, Op);
}
}
- AssertDI(!hasConflictingReferenceFlags(N.getFlags()),
- "invalid reference flags", &N);
+ CheckDI(!hasConflictingReferenceFlags(N.getFlags()),
+ "invalid reference flags", &N);
auto *Unit = N.getRawUnit();
if (N.isDefinition()) {
// Subprogram definitions (not part of the type hierarchy).
- AssertDI(N.isDistinct(), "subprogram definitions must be distinct", &N);
- AssertDI(Unit, "subprogram definitions must have a compile unit", &N);
- AssertDI(isa<DICompileUnit>(Unit), "invalid unit type", &N, Unit);
+ CheckDI(N.isDistinct(), "subprogram definitions must be distinct", &N);
+ CheckDI(Unit, "subprogram definitions must have a compile unit", &N);
+ CheckDI(isa<DICompileUnit>(Unit), "invalid unit type", &N, Unit);
if (N.getFile())
verifySourceDebugInfo(*N.getUnit(), *N.getFile());
} else {
// Subprogram declarations (part of the type hierarchy).
- AssertDI(!Unit, "subprogram declarations must not have a compile unit", &N);
+ CheckDI(!Unit, "subprogram declarations must not have a compile unit", &N);
}
if (auto *RawThrownTypes = N.getRawThrownTypes()) {
auto *ThrownTypes = dyn_cast<MDTuple>(RawThrownTypes);
- AssertDI(ThrownTypes, "invalid thrown types list", &N, RawThrownTypes);
+ CheckDI(ThrownTypes, "invalid thrown types list", &N, RawThrownTypes);
for (Metadata *Op : ThrownTypes->operands())
- AssertDI(Op && isa<DIType>(Op), "invalid thrown type", &N, ThrownTypes,
- Op);
+ CheckDI(Op && isa<DIType>(Op), "invalid thrown type", &N, ThrownTypes,
+ Op);
}
if (N.areAllCallsDescribed())
- AssertDI(N.isDefinition(),
- "DIFlagAllCallsDescribed must be attached to a definition");
+ CheckDI(N.isDefinition(),
+ "DIFlagAllCallsDescribed must be attached to a definition");
}
void Verifier::visitDILexicalBlockBase(const DILexicalBlockBase &N) {
- AssertDI(N.getTag() == dwarf::DW_TAG_lexical_block, "invalid tag", &N);
- AssertDI(N.getRawScope() && isa<DILocalScope>(N.getRawScope()),
- "invalid local scope", &N, N.getRawScope());
+ CheckDI(N.getTag() == dwarf::DW_TAG_lexical_block, "invalid tag", &N);
+ CheckDI(N.getRawScope() && isa<DILocalScope>(N.getRawScope()),
+ "invalid local scope", &N, N.getRawScope());
if (auto *SP = dyn_cast<DISubprogram>(N.getRawScope()))
- AssertDI(SP->isDefinition(), "scope points into the type hierarchy", &N);
+ CheckDI(SP->isDefinition(), "scope points into the type hierarchy", &N);
}
void Verifier::visitDILexicalBlock(const DILexicalBlock &N) {
visitDILexicalBlockBase(N);
- AssertDI(N.getLine() || !N.getColumn(),
- "cannot have column info without line info", &N);
+ CheckDI(N.getLine() || !N.getColumn(),
+ "cannot have column info without line info", &N);
}
void Verifier::visitDILexicalBlockFile(const DILexicalBlockFile &N) {
@@ -1347,95 +1370,95 @@ void Verifier::visitDILexicalBlockFile(const DILexicalBlockFile &N) {
}
void Verifier::visitDICommonBlock(const DICommonBlock &N) {
- AssertDI(N.getTag() == dwarf::DW_TAG_common_block, "invalid tag", &N);
+ CheckDI(N.getTag() == dwarf::DW_TAG_common_block, "invalid tag", &N);
if (auto *S = N.getRawScope())
- AssertDI(isa<DIScope>(S), "invalid scope ref", &N, S);
+ CheckDI(isa<DIScope>(S), "invalid scope ref", &N, S);
if (auto *S = N.getRawDecl())
- AssertDI(isa<DIGlobalVariable>(S), "invalid declaration", &N, S);
+ CheckDI(isa<DIGlobalVariable>(S), "invalid declaration", &N, S);
}
void Verifier::visitDINamespace(const DINamespace &N) {
- AssertDI(N.getTag() == dwarf::DW_TAG_namespace, "invalid tag", &N);
+ CheckDI(N.getTag() == dwarf::DW_TAG_namespace, "invalid tag", &N);
if (auto *S = N.getRawScope())
- AssertDI(isa<DIScope>(S), "invalid scope ref", &N, S);
+ CheckDI(isa<DIScope>(S), "invalid scope ref", &N, S);
}
void Verifier::visitDIMacro(const DIMacro &N) {
- AssertDI(N.getMacinfoType() == dwarf::DW_MACINFO_define ||
- N.getMacinfoType() == dwarf::DW_MACINFO_undef,
- "invalid macinfo type", &N);
- AssertDI(!N.getName().empty(), "anonymous macro", &N);
+ CheckDI(N.getMacinfoType() == dwarf::DW_MACINFO_define ||
+ N.getMacinfoType() == dwarf::DW_MACINFO_undef,
+ "invalid macinfo type", &N);
+ CheckDI(!N.getName().empty(), "anonymous macro", &N);
if (!N.getValue().empty()) {
assert(N.getValue().data()[0] != ' ' && "Macro value has a space prefix");
}
}
void Verifier::visitDIMacroFile(const DIMacroFile &N) {
- AssertDI(N.getMacinfoType() == dwarf::DW_MACINFO_start_file,
- "invalid macinfo type", &N);
+ CheckDI(N.getMacinfoType() == dwarf::DW_MACINFO_start_file,
+ "invalid macinfo type", &N);
if (auto *F = N.getRawFile())
- AssertDI(isa<DIFile>(F), "invalid file", &N, F);
+ CheckDI(isa<DIFile>(F), "invalid file", &N, F);
if (auto *Array = N.getRawElements()) {
- AssertDI(isa<MDTuple>(Array), "invalid macro list", &N, Array);
+ CheckDI(isa<MDTuple>(Array), "invalid macro list", &N, Array);
for (Metadata *Op : N.getElements()->operands()) {
- AssertDI(Op && isa<DIMacroNode>(Op), "invalid macro ref", &N, Op);
+ CheckDI(Op && isa<DIMacroNode>(Op), "invalid macro ref", &N, Op);
}
}
}
void Verifier::visitDIArgList(const DIArgList &N) {
- AssertDI(!N.getNumOperands(),
- "DIArgList should have no operands other than a list of "
- "ValueAsMetadata",
- &N);
+ CheckDI(!N.getNumOperands(),
+ "DIArgList should have no operands other than a list of "
+ "ValueAsMetadata",
+ &N);
}
void Verifier::visitDIModule(const DIModule &N) {
- AssertDI(N.getTag() == dwarf::DW_TAG_module, "invalid tag", &N);
- AssertDI(!N.getName().empty(), "anonymous module", &N);
+ CheckDI(N.getTag() == dwarf::DW_TAG_module, "invalid tag", &N);
+ CheckDI(!N.getName().empty(), "anonymous module", &N);
}
void Verifier::visitDITemplateParameter(const DITemplateParameter &N) {
- AssertDI(isType(N.getRawType()), "invalid type ref", &N, N.getRawType());
+ CheckDI(isType(N.getRawType()), "invalid type ref", &N, N.getRawType());
}
void Verifier::visitDITemplateTypeParameter(const DITemplateTypeParameter &N) {
visitDITemplateParameter(N);
- AssertDI(N.getTag() == dwarf::DW_TAG_template_type_parameter, "invalid tag",
- &N);
+ CheckDI(N.getTag() == dwarf::DW_TAG_template_type_parameter, "invalid tag",
+ &N);
}
void Verifier::visitDITemplateValueParameter(
const DITemplateValueParameter &N) {
visitDITemplateParameter(N);
- AssertDI(N.getTag() == dwarf::DW_TAG_template_value_parameter ||
- N.getTag() == dwarf::DW_TAG_GNU_template_template_param ||
- N.getTag() == dwarf::DW_TAG_GNU_template_parameter_pack,
- "invalid tag", &N);
+ CheckDI(N.getTag() == dwarf::DW_TAG_template_value_parameter ||
+ N.getTag() == dwarf::DW_TAG_GNU_template_template_param ||
+ N.getTag() == dwarf::DW_TAG_GNU_template_parameter_pack,
+ "invalid tag", &N);
}
void Verifier::visitDIVariable(const DIVariable &N) {
if (auto *S = N.getRawScope())
- AssertDI(isa<DIScope>(S), "invalid scope", &N, S);
+ CheckDI(isa<DIScope>(S), "invalid scope", &N, S);
if (auto *F = N.getRawFile())
- AssertDI(isa<DIFile>(F), "invalid file", &N, F);
+ CheckDI(isa<DIFile>(F), "invalid file", &N, F);
}
void Verifier::visitDIGlobalVariable(const DIGlobalVariable &N) {
// Checks common to all variables.
visitDIVariable(N);
- AssertDI(N.getTag() == dwarf::DW_TAG_variable, "invalid tag", &N);
- AssertDI(isType(N.getRawType()), "invalid type ref", &N, N.getRawType());
- // Assert only if the global variable is not an extern
+ CheckDI(N.getTag() == dwarf::DW_TAG_variable, "invalid tag", &N);
+ CheckDI(isType(N.getRawType()), "invalid type ref", &N, N.getRawType());
+ // Check only if the global variable is not an extern
if (N.isDefinition())
- AssertDI(N.getType(), "missing global variable type", &N);
+ CheckDI(N.getType(), "missing global variable type", &N);
if (auto *Member = N.getRawStaticDataMemberDeclaration()) {
- AssertDI(isa<DIDerivedType>(Member),
- "invalid static data member declaration", &N, Member);
+ CheckDI(isa<DIDerivedType>(Member),
+ "invalid static data member declaration", &N, Member);
}
}
@@ -1443,32 +1466,32 @@ void Verifier::visitDILocalVariable(const DILocalVariable &N) {
// Checks common to all variables.
visitDIVariable(N);
- AssertDI(isType(N.getRawType()), "invalid type ref", &N, N.getRawType());
- AssertDI(N.getTag() == dwarf::DW_TAG_variable, "invalid tag", &N);
- AssertDI(N.getRawScope() && isa<DILocalScope>(N.getRawScope()),
- "local variable requires a valid scope", &N, N.getRawScope());
+ CheckDI(isType(N.getRawType()), "invalid type ref", &N, N.getRawType());
+ CheckDI(N.getTag() == dwarf::DW_TAG_variable, "invalid tag", &N);
+ CheckDI(N.getRawScope() && isa<DILocalScope>(N.getRawScope()),
+ "local variable requires a valid scope", &N, N.getRawScope());
if (auto Ty = N.getType())
- AssertDI(!isa<DISubroutineType>(Ty), "invalid type", &N, N.getType());
+ CheckDI(!isa<DISubroutineType>(Ty), "invalid type", &N, N.getType());
}
void Verifier::visitDILabel(const DILabel &N) {
if (auto *S = N.getRawScope())
- AssertDI(isa<DIScope>(S), "invalid scope", &N, S);
+ CheckDI(isa<DIScope>(S), "invalid scope", &N, S);
if (auto *F = N.getRawFile())
- AssertDI(isa<DIFile>(F), "invalid file", &N, F);
+ CheckDI(isa<DIFile>(F), "invalid file", &N, F);
- AssertDI(N.getTag() == dwarf::DW_TAG_label, "invalid tag", &N);
- AssertDI(N.getRawScope() && isa<DILocalScope>(N.getRawScope()),
- "label requires a valid scope", &N, N.getRawScope());
+ CheckDI(N.getTag() == dwarf::DW_TAG_label, "invalid tag", &N);
+ CheckDI(N.getRawScope() && isa<DILocalScope>(N.getRawScope()),
+ "label requires a valid scope", &N, N.getRawScope());
}
void Verifier::visitDIExpression(const DIExpression &N) {
- AssertDI(N.isValid(), "invalid expression", &N);
+ CheckDI(N.isValid(), "invalid expression", &N);
}
void Verifier::visitDIGlobalVariableExpression(
const DIGlobalVariableExpression &GVE) {
- AssertDI(GVE.getVariable(), "missing variable");
+ CheckDI(GVE.getVariable(), "missing variable");
if (auto *Var = GVE.getVariable())
visitDIGlobalVariable(*Var);
if (auto *Expr = GVE.getExpression()) {
@@ -1479,21 +1502,21 @@ void Verifier::visitDIGlobalVariableExpression(
}
void Verifier::visitDIObjCProperty(const DIObjCProperty &N) {
- AssertDI(N.getTag() == dwarf::DW_TAG_APPLE_property, "invalid tag", &N);
+ CheckDI(N.getTag() == dwarf::DW_TAG_APPLE_property, "invalid tag", &N);
if (auto *T = N.getRawType())
- AssertDI(isType(T), "invalid type ref", &N, T);
+ CheckDI(isType(T), "invalid type ref", &N, T);
if (auto *F = N.getRawFile())
- AssertDI(isa<DIFile>(F), "invalid file", &N, F);
+ CheckDI(isa<DIFile>(F), "invalid file", &N, F);
}
void Verifier::visitDIImportedEntity(const DIImportedEntity &N) {
- AssertDI(N.getTag() == dwarf::DW_TAG_imported_module ||
- N.getTag() == dwarf::DW_TAG_imported_declaration,
- "invalid tag", &N);
+ CheckDI(N.getTag() == dwarf::DW_TAG_imported_module ||
+ N.getTag() == dwarf::DW_TAG_imported_declaration,
+ "invalid tag", &N);
if (auto *S = N.getRawScope())
- AssertDI(isa<DIScope>(S), "invalid scope for imported entity", &N, S);
- AssertDI(isDINode(N.getRawEntity()), "invalid imported entity", &N,
- N.getRawEntity());
+ CheckDI(isa<DIScope>(S), "invalid scope for imported entity", &N, S);
+ CheckDI(isDINode(N.getRawEntity()), "invalid imported entity", &N,
+ N.getRawEntity());
}
void Verifier::visitComdat(const Comdat &C) {
@@ -1501,8 +1524,8 @@ void Verifier::visitComdat(const Comdat &C) {
// Entities with private linkage don't have entries in the symbol table.
if (TT.isOSBinFormatCOFF())
if (const GlobalValue *GV = M.getNamedValue(C.getName()))
- Assert(!GV->hasPrivateLinkage(),
- "comdat global value has private linkage", GV);
+ Check(!GV->hasPrivateLinkage(), "comdat global value has private linkage",
+ GV);
}
void Verifier::visitModuleIdents() {
@@ -1513,12 +1536,12 @@ void Verifier::visitModuleIdents() {
// llvm.ident takes a list of metadata entry. Each entry has only one string.
// Scan each llvm.ident entry and make sure that this requirement is met.
for (const MDNode *N : Idents->operands()) {
- Assert(N->getNumOperands() == 1,
- "incorrect number of operands in llvm.ident metadata", N);
- Assert(dyn_cast_or_null<MDString>(N->getOperand(0)),
- ("invalid value for llvm.ident metadata entry operand"
- "(the operand should be a string)"),
- N->getOperand(0));
+ Check(N->getNumOperands() == 1,
+ "incorrect number of operands in llvm.ident metadata", N);
+ Check(dyn_cast_or_null<MDString>(N->getOperand(0)),
+ ("invalid value for llvm.ident metadata entry operand"
+ "(the operand should be a string)"),
+ N->getOperand(0));
}
}
@@ -1531,12 +1554,12 @@ void Verifier::visitModuleCommandLines() {
// string. Scan each llvm.commandline entry and make sure that this
// requirement is met.
for (const MDNode *N : CommandLines->operands()) {
- Assert(N->getNumOperands() == 1,
- "incorrect number of operands in llvm.commandline metadata", N);
- Assert(dyn_cast_or_null<MDString>(N->getOperand(0)),
- ("invalid value for llvm.commandline metadata entry operand"
- "(the operand should be a string)"),
- N->getOperand(0));
+ Check(N->getNumOperands() == 1,
+ "incorrect number of operands in llvm.commandline metadata", N);
+ Check(dyn_cast_or_null<MDString>(N->getOperand(0)),
+ ("invalid value for llvm.commandline metadata entry operand"
+ "(the operand should be a string)"),
+ N->getOperand(0));
}
}
@@ -1577,21 +1600,20 @@ Verifier::visitModuleFlag(const MDNode *Op,
SmallVectorImpl<const MDNode *> &Requirements) {
// Each module flag should have three arguments, the merge behavior (a
// constant int), the flag ID (an MDString), and the value.
- Assert(Op->getNumOperands() == 3,
- "incorrect number of operands in module flag", Op);
+ Check(Op->getNumOperands() == 3,
+ "incorrect number of operands in module flag", Op);
Module::ModFlagBehavior MFB;
if (!Module::isValidModFlagBehavior(Op->getOperand(0), MFB)) {
- Assert(
- mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0)),
- "invalid behavior operand in module flag (expected constant integer)",
- Op->getOperand(0));
- Assert(false,
- "invalid behavior operand in module flag (unexpected constant)",
- Op->getOperand(0));
+ Check(mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0)),
+ "invalid behavior operand in module flag (expected constant integer)",
+ Op->getOperand(0));
+ Check(false,
+ "invalid behavior operand in module flag (unexpected constant)",
+ Op->getOperand(0));
}
MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
- Assert(ID, "invalid ID operand in module flag (expected metadata string)",
- Op->getOperand(1));
+ Check(ID, "invalid ID operand in module flag (expected metadata string)",
+ Op->getOperand(1));
// Check the values for behaviors with additional requirements.
switch (MFB) {
@@ -1601,10 +1623,17 @@ Verifier::visitModuleFlag(const MDNode *Op,
// These behavior types accept any value.
break;
+ case Module::Min: {
+ Check(mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(2)),
+ "invalid value for 'min' module flag (expected constant integer)",
+ Op->getOperand(2));
+ break;
+ }
+
case Module::Max: {
- Assert(mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(2)),
- "invalid value for 'max' module flag (expected constant integer)",
- Op->getOperand(2));
+ Check(mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(2)),
+ "invalid value for 'max' module flag (expected constant integer)",
+ Op->getOperand(2));
break;
}
@@ -1612,13 +1641,13 @@ Verifier::visitModuleFlag(const MDNode *Op,
// The value should itself be an MDNode with two operands, a flag ID (an
// MDString), and a value.
MDNode *Value = dyn_cast<MDNode>(Op->getOperand(2));
- Assert(Value && Value->getNumOperands() == 2,
- "invalid value for 'require' module flag (expected metadata pair)",
- Op->getOperand(2));
- Assert(isa<MDString>(Value->getOperand(0)),
- ("invalid value for 'require' module flag "
- "(first value operand should be a string)"),
- Value->getOperand(0));
+ Check(Value && Value->getNumOperands() == 2,
+ "invalid value for 'require' module flag (expected metadata pair)",
+ Op->getOperand(2));
+ Check(isa<MDString>(Value->getOperand(0)),
+ ("invalid value for 'require' module flag "
+ "(first value operand should be a string)"),
+ Value->getOperand(0));
// Append it to the list of requirements, to check once all module flags are
// scanned.
@@ -1629,10 +1658,10 @@ Verifier::visitModuleFlag(const MDNode *Op,
case Module::Append:
case Module::AppendUnique: {
// These behavior types require the operand be an MDNode.
- Assert(isa<MDNode>(Op->getOperand(2)),
- "invalid value for 'append'-type module flag "
- "(expected a metadata node)",
- Op->getOperand(2));
+ Check(isa<MDNode>(Op->getOperand(2)),
+ "invalid value for 'append'-type module flag "
+ "(expected a metadata node)",
+ Op->getOperand(2));
break;
}
}
@@ -1640,29 +1669,29 @@ Verifier::visitModuleFlag(const MDNode *Op,
// Unless this is a "requires" flag, check the ID is unique.
if (MFB != Module::Require) {
bool Inserted = SeenIDs.insert(std::make_pair(ID, Op)).second;
- Assert(Inserted,
- "module flag identifiers must be unique (or of 'require' type)", ID);
+ Check(Inserted,
+ "module flag identifiers must be unique (or of 'require' type)", ID);
}
if (ID->getString() == "wchar_size") {
ConstantInt *Value
= mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(2));
- Assert(Value, "wchar_size metadata requires constant integer argument");
+ Check(Value, "wchar_size metadata requires constant integer argument");
}
if (ID->getString() == "Linker Options") {
// If the llvm.linker.options named metadata exists, we assume that the
// bitcode reader has upgraded the module flag. Otherwise the flag might
// have been created by a client directly.
- Assert(M.getNamedMetadata("llvm.linker.options"),
- "'Linker Options' named metadata no longer supported");
+ Check(M.getNamedMetadata("llvm.linker.options"),
+ "'Linker Options' named metadata no longer supported");
}
if (ID->getString() == "SemanticInterposition") {
ConstantInt *Value =
mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(2));
- Assert(Value,
- "SemanticInterposition metadata requires constant integer argument");
+ Check(Value,
+ "SemanticInterposition metadata requires constant integer argument");
}
if (ID->getString() == "CG Profile") {
@@ -1676,16 +1705,16 @@ void Verifier::visitModuleFlagCGProfileEntry(const MDOperand &MDO) {
if (!FuncMDO)
return;
auto F = dyn_cast<ValueAsMetadata>(FuncMDO);
- Assert(F && isa<Function>(F->getValue()->stripPointerCasts()),
- "expected a Function or null", FuncMDO);
+ Check(F && isa<Function>(F->getValue()->stripPointerCasts()),
+ "expected a Function or null", FuncMDO);
};
auto Node = dyn_cast_or_null<MDNode>(MDO);
- Assert(Node && Node->getNumOperands() == 3, "expected a MDNode triple", MDO);
+ Check(Node && Node->getNumOperands() == 3, "expected a MDNode triple", MDO);
CheckFunction(Node->getOperand(0));
CheckFunction(Node->getOperand(1));
auto Count = dyn_cast_or_null<ConstantAsMetadata>(Node->getOperand(2));
- Assert(Count && Count->getType()->isIntegerTy(),
- "expected an integer constant", Node->getOperand(2));
+ Check(Count && Count->getType()->isIntegerTy(),
+ "expected an integer constant", Node->getOperand(2));
}
void Verifier::verifyAttributeTypes(AttributeSet Attrs, const Value *V) {
@@ -1724,15 +1753,14 @@ void Verifier::verifyParameterAttrs(AttributeSet Attrs, Type *Ty,
verifyAttributeTypes(Attrs, V);
for (Attribute Attr : Attrs)
- Assert(Attr.isStringAttribute() ||
- Attribute::canUseAsParamAttr(Attr.getKindAsEnum()),
- "Attribute '" + Attr.getAsString() +
- "' does not apply to parameters",
- V);
+ Check(Attr.isStringAttribute() ||
+ Attribute::canUseAsParamAttr(Attr.getKindAsEnum()),
+ "Attribute '" + Attr.getAsString() + "' does not apply to parameters",
+ V);
if (Attrs.hasAttribute(Attribute::ImmArg)) {
- Assert(Attrs.getNumAttributes() == 1,
- "Attribute 'immarg' is incompatible with other attributes", V);
+ Check(Attrs.getNumAttributes() == 1,
+ "Attribute 'immarg' is incompatible with other attributes", V);
}
// Check for mutually incompatible attributes. Only inreg is compatible with
@@ -1745,52 +1773,52 @@ void Verifier::verifyParameterAttrs(AttributeSet Attrs, Type *Ty,
Attrs.hasAttribute(Attribute::InReg);
AttrCount += Attrs.hasAttribute(Attribute::Nest);
AttrCount += Attrs.hasAttribute(Attribute::ByRef);
- Assert(AttrCount <= 1,
- "Attributes 'byval', 'inalloca', 'preallocated', 'inreg', 'nest', "
- "'byref', and 'sret' are incompatible!",
- V);
-
- Assert(!(Attrs.hasAttribute(Attribute::InAlloca) &&
- Attrs.hasAttribute(Attribute::ReadOnly)),
- "Attributes "
- "'inalloca and readonly' are incompatible!",
- V);
-
- Assert(!(Attrs.hasAttribute(Attribute::StructRet) &&
- Attrs.hasAttribute(Attribute::Returned)),
- "Attributes "
- "'sret and returned' are incompatible!",
- V);
-
- Assert(!(Attrs.hasAttribute(Attribute::ZExt) &&
- Attrs.hasAttribute(Attribute::SExt)),
- "Attributes "
- "'zeroext and signext' are incompatible!",
- V);
-
- Assert(!(Attrs.hasAttribute(Attribute::ReadNone) &&
- Attrs.hasAttribute(Attribute::ReadOnly)),
- "Attributes "
- "'readnone and readonly' are incompatible!",
- V);
-
- Assert(!(Attrs.hasAttribute(Attribute::ReadNone) &&
- Attrs.hasAttribute(Attribute::WriteOnly)),
- "Attributes "
- "'readnone and writeonly' are incompatible!",
- V);
-
- Assert(!(Attrs.hasAttribute(Attribute::ReadOnly) &&
- Attrs.hasAttribute(Attribute::WriteOnly)),
- "Attributes "
- "'readonly and writeonly' are incompatible!",
- V);
-
- Assert(!(Attrs.hasAttribute(Attribute::NoInline) &&
- Attrs.hasAttribute(Attribute::AlwaysInline)),
- "Attributes "
- "'noinline and alwaysinline' are incompatible!",
- V);
+ Check(AttrCount <= 1,
+ "Attributes 'byval', 'inalloca', 'preallocated', 'inreg', 'nest', "
+ "'byref', and 'sret' are incompatible!",
+ V);
+
+ Check(!(Attrs.hasAttribute(Attribute::InAlloca) &&
+ Attrs.hasAttribute(Attribute::ReadOnly)),
+ "Attributes "
+ "'inalloca and readonly' are incompatible!",
+ V);
+
+ Check(!(Attrs.hasAttribute(Attribute::StructRet) &&
+ Attrs.hasAttribute(Attribute::Returned)),
+ "Attributes "
+ "'sret and returned' are incompatible!",
+ V);
+
+ Check(!(Attrs.hasAttribute(Attribute::ZExt) &&
+ Attrs.hasAttribute(Attribute::SExt)),
+ "Attributes "
+ "'zeroext and signext' are incompatible!",
+ V);
+
+ Check(!(Attrs.hasAttribute(Attribute::ReadNone) &&
+ Attrs.hasAttribute(Attribute::ReadOnly)),
+ "Attributes "
+ "'readnone and readonly' are incompatible!",
+ V);
+
+ Check(!(Attrs.hasAttribute(Attribute::ReadNone) &&
+ Attrs.hasAttribute(Attribute::WriteOnly)),
+ "Attributes "
+ "'readnone and writeonly' are incompatible!",
+ V);
+
+ Check(!(Attrs.hasAttribute(Attribute::ReadOnly) &&
+ Attrs.hasAttribute(Attribute::WriteOnly)),
+ "Attributes "
+ "'readonly and writeonly' are incompatible!",
+ V);
+
+ Check(!(Attrs.hasAttribute(Attribute::NoInline) &&
+ Attrs.hasAttribute(Attribute::AlwaysInline)),
+ "Attributes "
+ "'noinline and alwaysinline' are incompatible!",
+ V);
AttributeMask IncompatibleAttrs = AttributeFuncs::typeIncompatible(Ty);
for (Attribute Attr : Attrs) {
@@ -1804,55 +1832,61 @@ void Verifier::verifyParameterAttrs(AttributeSet Attrs, Type *Ty,
if (PointerType *PTy = dyn_cast<PointerType>(Ty)) {
if (Attrs.hasAttribute(Attribute::ByVal)) {
+ if (Attrs.hasAttribute(Attribute::Alignment)) {
+ Align AttrAlign = Attrs.getAlignment().valueOrOne();
+ Align MaxAlign(ParamMaxAlignment);
+ Check(AttrAlign <= MaxAlign,
+ "Attribute 'align' exceed the max size 2^14", V);
+ }
SmallPtrSet<Type *, 4> Visited;
- Assert(Attrs.getByValType()->isSized(&Visited),
- "Attribute 'byval' does not support unsized types!", V);
+ Check(Attrs.getByValType()->isSized(&Visited),
+ "Attribute 'byval' does not support unsized types!", V);
}
if (Attrs.hasAttribute(Attribute::ByRef)) {
SmallPtrSet<Type *, 4> Visited;
- Assert(Attrs.getByRefType()->isSized(&Visited),
- "Attribute 'byref' does not support unsized types!", V);
+ Check(Attrs.getByRefType()->isSized(&Visited),
+ "Attribute 'byref' does not support unsized types!", V);
}
if (Attrs.hasAttribute(Attribute::InAlloca)) {
SmallPtrSet<Type *, 4> Visited;
- Assert(Attrs.getInAllocaType()->isSized(&Visited),
- "Attribute 'inalloca' does not support unsized types!", V);
+ Check(Attrs.getInAllocaType()->isSized(&Visited),
+ "Attribute 'inalloca' does not support unsized types!", V);
}
if (Attrs.hasAttribute(Attribute::Preallocated)) {
SmallPtrSet<Type *, 4> Visited;
- Assert(Attrs.getPreallocatedType()->isSized(&Visited),
- "Attribute 'preallocated' does not support unsized types!", V);
+ Check(Attrs.getPreallocatedType()->isSized(&Visited),
+ "Attribute 'preallocated' does not support unsized types!", V);
}
if (!PTy->isOpaque()) {
if (!isa<PointerType>(PTy->getNonOpaquePointerElementType()))
- Assert(!Attrs.hasAttribute(Attribute::SwiftError),
- "Attribute 'swifterror' only applies to parameters "
- "with pointer to pointer type!",
- V);
+ Check(!Attrs.hasAttribute(Attribute::SwiftError),
+ "Attribute 'swifterror' only applies to parameters "
+ "with pointer to pointer type!",
+ V);
if (Attrs.hasAttribute(Attribute::ByRef)) {
- Assert(Attrs.getByRefType() == PTy->getNonOpaquePointerElementType(),
- "Attribute 'byref' type does not match parameter!", V);
+ Check(Attrs.getByRefType() == PTy->getNonOpaquePointerElementType(),
+ "Attribute 'byref' type does not match parameter!", V);
}
if (Attrs.hasAttribute(Attribute::ByVal) && Attrs.getByValType()) {
- Assert(Attrs.getByValType() == PTy->getNonOpaquePointerElementType(),
- "Attribute 'byval' type does not match parameter!", V);
+ Check(Attrs.getByValType() == PTy->getNonOpaquePointerElementType(),
+ "Attribute 'byval' type does not match parameter!", V);
}
if (Attrs.hasAttribute(Attribute::Preallocated)) {
- Assert(Attrs.getPreallocatedType() ==
- PTy->getNonOpaquePointerElementType(),
- "Attribute 'preallocated' type does not match parameter!", V);
+ Check(Attrs.getPreallocatedType() ==
+ PTy->getNonOpaquePointerElementType(),
+ "Attribute 'preallocated' type does not match parameter!", V);
}
if (Attrs.hasAttribute(Attribute::InAlloca)) {
- Assert(Attrs.getInAllocaType() == PTy->getNonOpaquePointerElementType(),
- "Attribute 'inalloca' type does not match parameter!", V);
+ Check(Attrs.getInAllocaType() == PTy->getNonOpaquePointerElementType(),
+ "Attribute 'inalloca' type does not match parameter!", V);
}
if (Attrs.hasAttribute(Attribute::ElementType)) {
- Assert(Attrs.getElementType() == PTy->getNonOpaquePointerElementType(),
- "Attribute 'elementtype' type does not match parameter!", V);
+ Check(Attrs.getElementType() == PTy->getNonOpaquePointerElementType(),
+ "Attribute 'elementtype' type does not match parameter!", V);
}
}
}
@@ -1877,14 +1911,14 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
return;
if (AttributeListsVisited.insert(Attrs.getRawPointer()).second) {
- Assert(Attrs.hasParentContext(Context),
- "Attribute list does not match Module context!", &Attrs, V);
+ Check(Attrs.hasParentContext(Context),
+ "Attribute list does not match Module context!", &Attrs, V);
for (const auto &AttrSet : Attrs) {
- Assert(!AttrSet.hasAttributes() || AttrSet.hasParentContext(Context),
- "Attribute set does not match Module context!", &AttrSet, V);
+ Check(!AttrSet.hasAttributes() || AttrSet.hasParentContext(Context),
+ "Attribute set does not match Module context!", &AttrSet, V);
for (const auto &A : AttrSet) {
- Assert(A.hasParentContext(Context),
- "Attribute does not match Module context!", &A, V);
+ Check(A.hasParentContext(Context),
+ "Attribute does not match Module context!", &A, V);
}
}
}
@@ -1899,11 +1933,11 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
// Verify return value attributes.
AttributeSet RetAttrs = Attrs.getRetAttrs();
for (Attribute RetAttr : RetAttrs)
- Assert(RetAttr.isStringAttribute() ||
- Attribute::canUseAsRetAttr(RetAttr.getKindAsEnum()),
- "Attribute '" + RetAttr.getAsString() +
- "' does not apply to function return values",
- V);
+ Check(RetAttr.isStringAttribute() ||
+ Attribute::canUseAsRetAttr(RetAttr.getKindAsEnum()),
+ "Attribute '" + RetAttr.getAsString() +
+ "' does not apply to function return values",
+ V);
verifyParameterAttrs(RetAttrs, FT->getReturnType(), V);
@@ -1913,56 +1947,55 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
AttributeSet ArgAttrs = Attrs.getParamAttrs(i);
if (!IsIntrinsic) {
- Assert(!ArgAttrs.hasAttribute(Attribute::ImmArg),
- "immarg attribute only applies to intrinsics",V);
+ Check(!ArgAttrs.hasAttribute(Attribute::ImmArg),
+ "immarg attribute only applies to intrinsics", V);
if (!IsInlineAsm)
- Assert(!ArgAttrs.hasAttribute(Attribute::ElementType),
- "Attribute 'elementtype' can only be applied to intrinsics"
- " and inline asm.", V);
+ Check(!ArgAttrs.hasAttribute(Attribute::ElementType),
+ "Attribute 'elementtype' can only be applied to intrinsics"
+ " and inline asm.",
+ V);
}
verifyParameterAttrs(ArgAttrs, Ty, V);
if (ArgAttrs.hasAttribute(Attribute::Nest)) {
- Assert(!SawNest, "More than one parameter has attribute nest!", V);
+ Check(!SawNest, "More than one parameter has attribute nest!", V);
SawNest = true;
}
if (ArgAttrs.hasAttribute(Attribute::Returned)) {
- Assert(!SawReturned, "More than one parameter has attribute returned!",
- V);
- Assert(Ty->canLosslesslyBitCastTo(FT->getReturnType()),
- "Incompatible argument and return types for 'returned' attribute",
- V);
+ Check(!SawReturned, "More than one parameter has attribute returned!", V);
+ Check(Ty->canLosslesslyBitCastTo(FT->getReturnType()),
+ "Incompatible argument and return types for 'returned' attribute",
+ V);
SawReturned = true;
}
if (ArgAttrs.hasAttribute(Attribute::StructRet)) {
- Assert(!SawSRet, "Cannot have multiple 'sret' parameters!", V);
- Assert(i == 0 || i == 1,
- "Attribute 'sret' is not on first or second parameter!", V);
+ Check(!SawSRet, "Cannot have multiple 'sret' parameters!", V);
+ Check(i == 0 || i == 1,
+ "Attribute 'sret' is not on first or second parameter!", V);
SawSRet = true;
}
if (ArgAttrs.hasAttribute(Attribute::SwiftSelf)) {
- Assert(!SawSwiftSelf, "Cannot have multiple 'swiftself' parameters!", V);
+ Check(!SawSwiftSelf, "Cannot have multiple 'swiftself' parameters!", V);
SawSwiftSelf = true;
}
if (ArgAttrs.hasAttribute(Attribute::SwiftAsync)) {
- Assert(!SawSwiftAsync, "Cannot have multiple 'swiftasync' parameters!", V);
+ Check(!SawSwiftAsync, "Cannot have multiple 'swiftasync' parameters!", V);
SawSwiftAsync = true;
}
if (ArgAttrs.hasAttribute(Attribute::SwiftError)) {
- Assert(!SawSwiftError, "Cannot have multiple 'swifterror' parameters!",
- V);
+ Check(!SawSwiftError, "Cannot have multiple 'swifterror' parameters!", V);
SawSwiftError = true;
}
if (ArgAttrs.hasAttribute(Attribute::InAlloca)) {
- Assert(i == FT->getNumParams() - 1,
- "inalloca isn't on the last parameter!", V);
+ Check(i == FT->getNumParams() - 1,
+ "inalloca isn't on the last parameter!", V);
}
}
@@ -1971,53 +2004,53 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
verifyAttributeTypes(Attrs.getFnAttrs(), V);
for (Attribute FnAttr : Attrs.getFnAttrs())
- Assert(FnAttr.isStringAttribute() ||
- Attribute::canUseAsFnAttr(FnAttr.getKindAsEnum()),
- "Attribute '" + FnAttr.getAsString() +
- "' does not apply to functions!",
- V);
-
- Assert(!(Attrs.hasFnAttr(Attribute::ReadNone) &&
- Attrs.hasFnAttr(Attribute::ReadOnly)),
- "Attributes 'readnone and readonly' are incompatible!", V);
-
- Assert(!(Attrs.hasFnAttr(Attribute::ReadNone) &&
- Attrs.hasFnAttr(Attribute::WriteOnly)),
- "Attributes 'readnone and writeonly' are incompatible!", V);
-
- Assert(!(Attrs.hasFnAttr(Attribute::ReadOnly) &&
- Attrs.hasFnAttr(Attribute::WriteOnly)),
- "Attributes 'readonly and writeonly' are incompatible!", V);
-
- Assert(!(Attrs.hasFnAttr(Attribute::ReadNone) &&
- Attrs.hasFnAttr(Attribute::InaccessibleMemOrArgMemOnly)),
- "Attributes 'readnone and inaccessiblemem_or_argmemonly' are "
- "incompatible!",
- V);
-
- Assert(!(Attrs.hasFnAttr(Attribute::ReadNone) &&
- Attrs.hasFnAttr(Attribute::InaccessibleMemOnly)),
- "Attributes 'readnone and inaccessiblememonly' are incompatible!", V);
-
- Assert(!(Attrs.hasFnAttr(Attribute::NoInline) &&
- Attrs.hasFnAttr(Attribute::AlwaysInline)),
- "Attributes 'noinline and alwaysinline' are incompatible!", V);
+ Check(FnAttr.isStringAttribute() ||
+ Attribute::canUseAsFnAttr(FnAttr.getKindAsEnum()),
+ "Attribute '" + FnAttr.getAsString() +
+ "' does not apply to functions!",
+ V);
+
+ Check(!(Attrs.hasFnAttr(Attribute::ReadNone) &&
+ Attrs.hasFnAttr(Attribute::ReadOnly)),
+ "Attributes 'readnone and readonly' are incompatible!", V);
+
+ Check(!(Attrs.hasFnAttr(Attribute::ReadNone) &&
+ Attrs.hasFnAttr(Attribute::WriteOnly)),
+ "Attributes 'readnone and writeonly' are incompatible!", V);
+
+ Check(!(Attrs.hasFnAttr(Attribute::ReadOnly) &&
+ Attrs.hasFnAttr(Attribute::WriteOnly)),
+ "Attributes 'readonly and writeonly' are incompatible!", V);
+
+ Check(!(Attrs.hasFnAttr(Attribute::ReadNone) &&
+ Attrs.hasFnAttr(Attribute::InaccessibleMemOrArgMemOnly)),
+ "Attributes 'readnone and inaccessiblemem_or_argmemonly' are "
+ "incompatible!",
+ V);
+
+ Check(!(Attrs.hasFnAttr(Attribute::ReadNone) &&
+ Attrs.hasFnAttr(Attribute::InaccessibleMemOnly)),
+ "Attributes 'readnone and inaccessiblememonly' are incompatible!", V);
+
+ Check(!(Attrs.hasFnAttr(Attribute::NoInline) &&
+ Attrs.hasFnAttr(Attribute::AlwaysInline)),
+ "Attributes 'noinline and alwaysinline' are incompatible!", V);
if (Attrs.hasFnAttr(Attribute::OptimizeNone)) {
- Assert(Attrs.hasFnAttr(Attribute::NoInline),
- "Attribute 'optnone' requires 'noinline'!", V);
+ Check(Attrs.hasFnAttr(Attribute::NoInline),
+ "Attribute 'optnone' requires 'noinline'!", V);
- Assert(!Attrs.hasFnAttr(Attribute::OptimizeForSize),
- "Attributes 'optsize and optnone' are incompatible!", V);
+ Check(!Attrs.hasFnAttr(Attribute::OptimizeForSize),
+ "Attributes 'optsize and optnone' are incompatible!", V);
- Assert(!Attrs.hasFnAttr(Attribute::MinSize),
- "Attributes 'minsize and optnone' are incompatible!", V);
+ Check(!Attrs.hasFnAttr(Attribute::MinSize),
+ "Attributes 'minsize and optnone' are incompatible!", V);
}
if (Attrs.hasFnAttr(Attribute::JumpTable)) {
const GlobalValue *GV = cast<GlobalValue>(V);
- Assert(GV->hasGlobalUnnamedAddr(),
- "Attribute 'jumptable' requires 'unnamed_addr'", V);
+ Check(GV->hasGlobalUnnamedAddr(),
+ "Attribute 'jumptable' requires 'unnamed_addr'", V);
}
if (Attrs.hasFnAttr(Attribute::AllocSize)) {
@@ -2047,6 +2080,25 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
return;
}
+ if (Attrs.hasFnAttr(Attribute::AllocKind)) {
+ AllocFnKind K = Attrs.getAllocKind();
+ AllocFnKind Type =
+ K & (AllocFnKind::Alloc | AllocFnKind::Realloc | AllocFnKind::Free);
+ if (!is_contained(
+ {AllocFnKind::Alloc, AllocFnKind::Realloc, AllocFnKind::Free},
+ Type))
+ CheckFailed(
+ "'allockind()' requires exactly one of alloc, realloc, and free");
+ if ((Type == AllocFnKind::Free) &&
+ ((K & (AllocFnKind::Uninitialized | AllocFnKind::Zeroed |
+ AllocFnKind::Aligned)) != AllocFnKind::Unknown))
+ CheckFailed("'allockind(\"free\")' doesn't allow uninitialized, zeroed, "
+ "or aligned modifiers.");
+ AllocFnKind ZeroedUninit = AllocFnKind::Uninitialized | AllocFnKind::Zeroed;
+ if ((K & ZeroedUninit) == ZeroedUninit)
+ CheckFailed("'allockind()' can't be both zeroed and uninitialized");
+ }
+
if (Attrs.hasFnAttr(Attribute::VScaleRange)) {
unsigned VScaleMin = Attrs.getFnAttrs().getVScaleRangeMin();
if (VScaleMin == 0)
@@ -2073,27 +2125,27 @@ void Verifier::verifyFunctionMetadata(
for (const auto &Pair : MDs) {
if (Pair.first == LLVMContext::MD_prof) {
MDNode *MD = Pair.second;
- Assert(MD->getNumOperands() >= 2,
- "!prof annotations should have no less than 2 operands", MD);
+ Check(MD->getNumOperands() >= 2,
+ "!prof annotations should have no less than 2 operands", MD);
// Check first operand.
- Assert(MD->getOperand(0) != nullptr, "first operand should not be null",
- MD);
- Assert(isa<MDString>(MD->getOperand(0)),
- "expected string with name of the !prof annotation", MD);
+ Check(MD->getOperand(0) != nullptr, "first operand should not be null",
+ MD);
+ Check(isa<MDString>(MD->getOperand(0)),
+ "expected string with name of the !prof annotation", MD);
MDString *MDS = cast<MDString>(MD->getOperand(0));
StringRef ProfName = MDS->getString();
- Assert(ProfName.equals("function_entry_count") ||
- ProfName.equals("synthetic_function_entry_count"),
- "first operand should be 'function_entry_count'"
- " or 'synthetic_function_entry_count'",
- MD);
+ Check(ProfName.equals("function_entry_count") ||
+ ProfName.equals("synthetic_function_entry_count"),
+ "first operand should be 'function_entry_count'"
+ " or 'synthetic_function_entry_count'",
+ MD);
// Check second operand.
- Assert(MD->getOperand(1) != nullptr, "second operand should not be null",
- MD);
- Assert(isa<ConstantAsMetadata>(MD->getOperand(1)),
- "expected integer argument to function_entry_count", MD);
+ Check(MD->getOperand(1) != nullptr, "second operand should not be null",
+ MD);
+ Check(isa<ConstantAsMetadata>(MD->getOperand(1)),
+ "expected integer argument to function_entry_count", MD);
}
}
}
@@ -2115,8 +2167,8 @@ void Verifier::visitConstantExprsRecursively(const Constant *EntryC) {
if (const auto *GV = dyn_cast<GlobalValue>(C)) {
// Global Values get visited separately, but we do need to make sure
// that the global value is in the correct module
- Assert(GV->getParent() == &M, "Referencing global in another module!",
- EntryC, &M, GV, GV->getParent());
+ Check(GV->getParent() == &M, "Referencing global in another module!",
+ EntryC, &M, GV, GV->getParent());
continue;
}
@@ -2134,9 +2186,9 @@ void Verifier::visitConstantExprsRecursively(const Constant *EntryC) {
void Verifier::visitConstantExpr(const ConstantExpr *CE) {
if (CE->getOpcode() == Instruction::BitCast)
- Assert(CastInst::castIsValid(Instruction::BitCast, CE->getOperand(0),
- CE->getType()),
- "Invalid bitcast", CE);
+ Check(CastInst::castIsValid(Instruction::BitCast, CE->getOperand(0),
+ CE->getType()),
+ "Invalid bitcast", CE);
}
bool Verifier::verifyAttributeCount(AttributeList Attrs, unsigned Params) {
@@ -2155,17 +2207,17 @@ void Verifier::verifyInlineAsmCall(const CallBase &Call) {
if (CI.isIndirect) {
const Value *Arg = Call.getArgOperand(ArgNo);
- Assert(Arg->getType()->isPointerTy(),
- "Operand for indirect constraint must have pointer type",
- &Call);
+ Check(Arg->getType()->isPointerTy(),
+ "Operand for indirect constraint must have pointer type", &Call);
- Assert(Call.getAttributes().getParamElementType(ArgNo),
- "Operand for indirect constraint must have elementtype attribute",
- &Call);
+ Check(Call.getParamElementType(ArgNo),
+ "Operand for indirect constraint must have elementtype attribute",
+ &Call);
} else {
- Assert(!Call.paramHasAttr(ArgNo, Attribute::ElementType),
- "Elementtype attribute can only be applied for indirect "
- "constraints", &Call);
+ Check(!Call.paramHasAttr(ArgNo, Attribute::ElementType),
+ "Elementtype attribute can only be applied for indirect "
+ "constraints",
+ &Call);
}
ArgNo++;
@@ -2178,50 +2230,50 @@ void Verifier::verifyStatepoint(const CallBase &Call) {
Call.getCalledFunction()->getIntrinsicID() ==
Intrinsic::experimental_gc_statepoint);
- Assert(!Call.doesNotAccessMemory() && !Call.onlyReadsMemory() &&
- !Call.onlyAccessesArgMemory(),
- "gc.statepoint must read and write all memory to preserve "
- "reordering restrictions required by safepoint semantics",
- Call);
+ Check(!Call.doesNotAccessMemory() && !Call.onlyReadsMemory() &&
+ !Call.onlyAccessesArgMemory(),
+ "gc.statepoint must read and write all memory to preserve "
+ "reordering restrictions required by safepoint semantics",
+ Call);
const int64_t NumPatchBytes =
cast<ConstantInt>(Call.getArgOperand(1))->getSExtValue();
assert(isInt<32>(NumPatchBytes) && "NumPatchBytesV is an i32!");
- Assert(NumPatchBytes >= 0,
- "gc.statepoint number of patchable bytes must be "
- "positive",
- Call);
-
- const Value *Target = Call.getArgOperand(2);
- auto *PT = dyn_cast<PointerType>(Target->getType());
- Assert(PT && PT->getPointerElementType()->isFunctionTy(),
- "gc.statepoint callee must be of function pointer type", Call, Target);
- FunctionType *TargetFuncType =
- cast<FunctionType>(PT->getPointerElementType());
+ Check(NumPatchBytes >= 0,
+ "gc.statepoint number of patchable bytes must be "
+ "positive",
+ Call);
+
+ Type *TargetElemType = Call.getParamElementType(2);
+ Check(TargetElemType,
+ "gc.statepoint callee argument must have elementtype attribute", Call);
+ FunctionType *TargetFuncType = dyn_cast<FunctionType>(TargetElemType);
+ Check(TargetFuncType,
+ "gc.statepoint callee elementtype must be function type", Call);
const int NumCallArgs = cast<ConstantInt>(Call.getArgOperand(3))->getZExtValue();
- Assert(NumCallArgs >= 0,
- "gc.statepoint number of arguments to underlying call "
- "must be positive",
- Call);
+ Check(NumCallArgs >= 0,
+ "gc.statepoint number of arguments to underlying call "
+ "must be positive",
+ Call);
const int NumParams = (int)TargetFuncType->getNumParams();
if (TargetFuncType->isVarArg()) {
- Assert(NumCallArgs >= NumParams,
- "gc.statepoint mismatch in number of vararg call args", Call);
+ Check(NumCallArgs >= NumParams,
+ "gc.statepoint mismatch in number of vararg call args", Call);
// TODO: Remove this limitation
- Assert(TargetFuncType->getReturnType()->isVoidTy(),
- "gc.statepoint doesn't support wrapping non-void "
- "vararg functions yet",
- Call);
+ Check(TargetFuncType->getReturnType()->isVoidTy(),
+ "gc.statepoint doesn't support wrapping non-void "
+ "vararg functions yet",
+ Call);
} else
- Assert(NumCallArgs == NumParams,
- "gc.statepoint mismatch in number of call args", Call);
+ Check(NumCallArgs == NumParams,
+ "gc.statepoint mismatch in number of call args", Call);
const uint64_t Flags
= cast<ConstantInt>(Call.getArgOperand(4))->getZExtValue();
- Assert((Flags & ~(uint64_t)StatepointFlags::MaskAll) == 0,
- "unknown flag used in gc.statepoint flags argument", Call);
+ Check((Flags & ~(uint64_t)StatepointFlags::MaskAll) == 0,
+ "unknown flag used in gc.statepoint flags argument", Call);
// Verify that the types of the call parameter arguments match
// the type of the wrapped callee.
@@ -2229,63 +2281,62 @@ void Verifier::verifyStatepoint(const CallBase &Call) {
for (int i = 0; i < NumParams; i++) {
Type *ParamType = TargetFuncType->getParamType(i);
Type *ArgType = Call.getArgOperand(5 + i)->getType();
- Assert(ArgType == ParamType,
- "gc.statepoint call argument does not match wrapped "
- "function type",
- Call);
+ Check(ArgType == ParamType,
+ "gc.statepoint call argument does not match wrapped "
+ "function type",
+ Call);
if (TargetFuncType->isVarArg()) {
AttributeSet ArgAttrs = Attrs.getParamAttrs(5 + i);
- Assert(!ArgAttrs.hasAttribute(Attribute::StructRet),
- "Attribute 'sret' cannot be used for vararg call arguments!",
- Call);
+ Check(!ArgAttrs.hasAttribute(Attribute::StructRet),
+ "Attribute 'sret' cannot be used for vararg call arguments!", Call);
}
}
const int EndCallArgsInx = 4 + NumCallArgs;
const Value *NumTransitionArgsV = Call.getArgOperand(EndCallArgsInx + 1);
- Assert(isa<ConstantInt>(NumTransitionArgsV),
- "gc.statepoint number of transition arguments "
- "must be constant integer",
- Call);
+ Check(isa<ConstantInt>(NumTransitionArgsV),
+ "gc.statepoint number of transition arguments "
+ "must be constant integer",
+ Call);
const int NumTransitionArgs =
cast<ConstantInt>(NumTransitionArgsV)->getZExtValue();
- Assert(NumTransitionArgs == 0,
- "gc.statepoint w/inline transition bundle is deprecated", Call);
+ Check(NumTransitionArgs == 0,
+ "gc.statepoint w/inline transition bundle is deprecated", Call);
const int EndTransitionArgsInx = EndCallArgsInx + 1 + NumTransitionArgs;
const Value *NumDeoptArgsV = Call.getArgOperand(EndTransitionArgsInx + 1);
- Assert(isa<ConstantInt>(NumDeoptArgsV),
- "gc.statepoint number of deoptimization arguments "
- "must be constant integer",
- Call);
+ Check(isa<ConstantInt>(NumDeoptArgsV),
+ "gc.statepoint number of deoptimization arguments "
+ "must be constant integer",
+ Call);
const int NumDeoptArgs = cast<ConstantInt>(NumDeoptArgsV)->getZExtValue();
- Assert(NumDeoptArgs == 0,
- "gc.statepoint w/inline deopt operands is deprecated", Call);
+ Check(NumDeoptArgs == 0,
+ "gc.statepoint w/inline deopt operands is deprecated", Call);
const int ExpectedNumArgs = 7 + NumCallArgs;
- Assert(ExpectedNumArgs == (int)Call.arg_size(),
- "gc.statepoint too many arguments", Call);
+ Check(ExpectedNumArgs == (int)Call.arg_size(),
+ "gc.statepoint too many arguments", Call);
// Check that the only uses of this gc.statepoint are gc.result or
// gc.relocate calls which are tied to this statepoint and thus part
// of the same statepoint sequence
for (const User *U : Call.users()) {
const CallInst *UserCall = dyn_cast<const CallInst>(U);
- Assert(UserCall, "illegal use of statepoint token", Call, U);
+ Check(UserCall, "illegal use of statepoint token", Call, U);
if (!UserCall)
continue;
- Assert(isa<GCRelocateInst>(UserCall) || isa<GCResultInst>(UserCall),
- "gc.result or gc.relocate are the only value uses "
- "of a gc.statepoint",
- Call, U);
+ Check(isa<GCRelocateInst>(UserCall) || isa<GCResultInst>(UserCall),
+ "gc.result or gc.relocate are the only value uses "
+ "of a gc.statepoint",
+ Call, U);
if (isa<GCResultInst>(UserCall)) {
- Assert(UserCall->getArgOperand(0) == &Call,
- "gc.result connected to wrong gc.statepoint", Call, UserCall);
+ Check(UserCall->getArgOperand(0) == &Call,
+ "gc.result connected to wrong gc.statepoint", Call, UserCall);
} else if (isa<GCRelocateInst>(Call)) {
- Assert(UserCall->getArgOperand(0) == &Call,
- "gc.relocate connected to wrong gc.statepoint", Call, UserCall);
+ Check(UserCall->getArgOperand(0) == &Call,
+ "gc.relocate connected to wrong gc.statepoint", Call, UserCall);
}
}
@@ -2304,11 +2355,11 @@ void Verifier::verifyFrameRecoverIndices() {
Function *F = Counts.first;
unsigned EscapedObjectCount = Counts.second.first;
unsigned MaxRecoveredIndex = Counts.second.second;
- Assert(MaxRecoveredIndex <= EscapedObjectCount,
- "all indices passed to llvm.localrecover must be less than the "
- "number of arguments passed to llvm.localescape in the parent "
- "function",
- F);
+ Check(MaxRecoveredIndex <= EscapedObjectCount,
+ "all indices passed to llvm.localrecover must be less than the "
+ "number of arguments passed to llvm.localescape in the parent "
+ "function",
+ F);
}
}
@@ -2345,8 +2396,8 @@ void Verifier::verifySiblingFuncletUnwinds() {
CycleNodes.push_back(CycleTerminator);
CyclePad = getSuccPad(CycleTerminator);
} while (CyclePad != SuccPad);
- Assert(false, "EH pads can't handle each other's exceptions",
- ArrayRef<Instruction *>(CycleNodes));
+ Check(false, "EH pads can't handle each other's exceptions",
+ ArrayRef<Instruction *>(CycleNodes));
}
// Don't re-walk a node we've already checked
if (!Visited.insert(SuccPad).second)
@@ -2374,24 +2425,24 @@ void Verifier::visitFunction(const Function &F) {
FunctionType *FT = F.getFunctionType();
unsigned NumArgs = F.arg_size();
- Assert(&Context == &F.getContext(),
- "Function context does not match Module context!", &F);
+ Check(&Context == &F.getContext(),
+ "Function context does not match Module context!", &F);
- Assert(!F.hasCommonLinkage(), "Functions may not have common linkage", &F);
- Assert(FT->getNumParams() == NumArgs,
- "# formal arguments must match # of arguments for function type!", &F,
- FT);
- Assert(F.getReturnType()->isFirstClassType() ||
- F.getReturnType()->isVoidTy() || F.getReturnType()->isStructTy(),
- "Functions cannot return aggregate values!", &F);
+ Check(!F.hasCommonLinkage(), "Functions may not have common linkage", &F);
+ Check(FT->getNumParams() == NumArgs,
+ "# formal arguments must match # of arguments for function type!", &F,
+ FT);
+ Check(F.getReturnType()->isFirstClassType() ||
+ F.getReturnType()->isVoidTy() || F.getReturnType()->isStructTy(),
+ "Functions cannot return aggregate values!", &F);
- Assert(!F.hasStructRetAttr() || F.getReturnType()->isVoidTy(),
- "Invalid struct return type!", &F);
+ Check(!F.hasStructRetAttr() || F.getReturnType()->isVoidTy(),
+ "Invalid struct return type!", &F);
AttributeList Attrs = F.getAttributes();
- Assert(verifyAttributeCount(Attrs, FT->getNumParams()),
- "Attribute after last parameter!", &F);
+ Check(verifyAttributeCount(Attrs, FT->getNumParams()),
+ "Attribute after last parameter!", &F);
bool IsIntrinsic = F.isIntrinsic();
@@ -2401,11 +2452,11 @@ void Verifier::visitFunction(const Function &F) {
// On function declarations/definitions, we do not support the builtin
// attribute. We do not check this in VerifyFunctionAttrs since that is
// checking for Attributes that can/can not ever be on functions.
- Assert(!Attrs.hasFnAttr(Attribute::Builtin),
- "Attribute 'builtin' can only be applied to a callsite.", &F);
+ Check(!Attrs.hasFnAttr(Attribute::Builtin),
+ "Attribute 'builtin' can only be applied to a callsite.", &F);
- Assert(!Attrs.hasAttrSomewhere(Attribute::ElementType),
- "Attribute 'elementtype' can only be applied to a callsite.", &F);
+ Check(!Attrs.hasAttrSomewhere(Attribute::ElementType),
+ "Attribute 'elementtype' can only be applied to a callsite.", &F);
// Check that this function meets the restrictions on this calling convention.
// Sometimes varargs is used for perfectly forwarding thunks, so some of these
@@ -2415,38 +2466,37 @@ void Verifier::visitFunction(const Function &F) {
case CallingConv::C:
break;
case CallingConv::X86_INTR: {
- Assert(F.arg_empty() || Attrs.hasParamAttr(0, Attribute::ByVal),
- "Calling convention parameter requires byval", &F);
+ Check(F.arg_empty() || Attrs.hasParamAttr(0, Attribute::ByVal),
+ "Calling convention parameter requires byval", &F);
break;
}
case CallingConv::AMDGPU_KERNEL:
case CallingConv::SPIR_KERNEL:
- Assert(F.getReturnType()->isVoidTy(),
- "Calling convention requires void return type", &F);
+ Check(F.getReturnType()->isVoidTy(),
+ "Calling convention requires void return type", &F);
LLVM_FALLTHROUGH;
case CallingConv::AMDGPU_VS:
case CallingConv::AMDGPU_HS:
case CallingConv::AMDGPU_GS:
case CallingConv::AMDGPU_PS:
case CallingConv::AMDGPU_CS:
- Assert(!F.hasStructRetAttr(),
- "Calling convention does not allow sret", &F);
+ Check(!F.hasStructRetAttr(), "Calling convention does not allow sret", &F);
if (F.getCallingConv() != CallingConv::SPIR_KERNEL) {
const unsigned StackAS = DL.getAllocaAddrSpace();
unsigned i = 0;
for (const Argument &Arg : F.args()) {
- Assert(!Attrs.hasParamAttr(i, Attribute::ByVal),
- "Calling convention disallows byval", &F);
- Assert(!Attrs.hasParamAttr(i, Attribute::Preallocated),
- "Calling convention disallows preallocated", &F);
- Assert(!Attrs.hasParamAttr(i, Attribute::InAlloca),
- "Calling convention disallows inalloca", &F);
+ Check(!Attrs.hasParamAttr(i, Attribute::ByVal),
+ "Calling convention disallows byval", &F);
+ Check(!Attrs.hasParamAttr(i, Attribute::Preallocated),
+ "Calling convention disallows preallocated", &F);
+ Check(!Attrs.hasParamAttr(i, Attribute::InAlloca),
+ "Calling convention disallows inalloca", &F);
if (Attrs.hasParamAttr(i, Attribute::ByRef)) {
// FIXME: Should also disallow LDS and GDS, but we don't have the enum
// value here.
- Assert(Arg.getType()->getPointerAddressSpace() != StackAS,
- "Calling convention disallows stack byref", &F);
+ Check(Arg.getType()->getPointerAddressSpace() != StackAS,
+ "Calling convention disallows stack byref", &F);
}
++i;
@@ -2459,27 +2509,28 @@ void Verifier::visitFunction(const Function &F) {
case CallingConv::Intel_OCL_BI:
case CallingConv::PTX_Kernel:
case CallingConv::PTX_Device:
- Assert(!F.isVarArg(), "Calling convention does not support varargs or "
- "perfect forwarding!",
- &F);
+ Check(!F.isVarArg(),
+ "Calling convention does not support varargs or "
+ "perfect forwarding!",
+ &F);
break;
}
// Check that the argument values match the function type for this function...
unsigned i = 0;
for (const Argument &Arg : F.args()) {
- Assert(Arg.getType() == FT->getParamType(i),
- "Argument value does not match function argument type!", &Arg,
- FT->getParamType(i));
- Assert(Arg.getType()->isFirstClassType(),
- "Function arguments must have first-class types!", &Arg);
+ Check(Arg.getType() == FT->getParamType(i),
+ "Argument value does not match function argument type!", &Arg,
+ FT->getParamType(i));
+ Check(Arg.getType()->isFirstClassType(),
+ "Function arguments must have first-class types!", &Arg);
if (!IsIntrinsic) {
- Assert(!Arg.getType()->isMetadataTy(),
- "Function takes metadata but isn't an intrinsic", &Arg, &F);
- Assert(!Arg.getType()->isTokenTy(),
- "Function takes token but isn't an intrinsic", &Arg, &F);
- Assert(!Arg.getType()->isX86_AMXTy(),
- "Function takes x86_amx but isn't an intrinsic", &Arg, &F);
+ Check(!Arg.getType()->isMetadataTy(),
+ "Function takes metadata but isn't an intrinsic", &Arg, &F);
+ Check(!Arg.getType()->isTokenTy(),
+ "Function takes token but isn't an intrinsic", &Arg, &F);
+ Check(!Arg.getType()->isX86_AMXTy(),
+ "Function takes x86_amx but isn't an intrinsic", &Arg, &F);
}
// Check that swifterror argument is only used by loads and stores.
@@ -2490,10 +2541,10 @@ void Verifier::visitFunction(const Function &F) {
}
if (!IsIntrinsic) {
- Assert(!F.getReturnType()->isTokenTy(),
- "Function returns a token but isn't an intrinsic", &F);
- Assert(!F.getReturnType()->isX86_AMXTy(),
- "Function returns a x86_amx but isn't an intrinsic", &F);
+ Check(!F.getReturnType()->isTokenTy(),
+ "Function returns a token but isn't an intrinsic", &F);
+ Check(!F.getReturnType()->isX86_AMXTy(),
+ "Function returns a x86_amx but isn't an intrinsic", &F);
}
// Get the function metadata attachments.
@@ -2506,44 +2557,44 @@ void Verifier::visitFunction(const Function &F) {
if (F.hasPersonalityFn()) {
auto *Per = dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts());
if (Per)
- Assert(Per->getParent() == F.getParent(),
- "Referencing personality function in another module!",
- &F, F.getParent(), Per, Per->getParent());
+ Check(Per->getParent() == F.getParent(),
+ "Referencing personality function in another module!", &F,
+ F.getParent(), Per, Per->getParent());
}
if (F.isMaterializable()) {
// Function has a body somewhere we can't see.
- Assert(MDs.empty(), "unmaterialized function cannot have metadata", &F,
- MDs.empty() ? nullptr : MDs.front().second);
+ Check(MDs.empty(), "unmaterialized function cannot have metadata", &F,
+ MDs.empty() ? nullptr : MDs.front().second);
} else if (F.isDeclaration()) {
for (const auto &I : MDs) {
// This is used for call site debug information.
- AssertDI(I.first != LLVMContext::MD_dbg ||
- !cast<DISubprogram>(I.second)->isDistinct(),
- "function declaration may only have a unique !dbg attachment",
- &F);
- Assert(I.first != LLVMContext::MD_prof,
- "function declaration may not have a !prof attachment", &F);
+ CheckDI(I.first != LLVMContext::MD_dbg ||
+ !cast<DISubprogram>(I.second)->isDistinct(),
+ "function declaration may only have a unique !dbg attachment",
+ &F);
+ Check(I.first != LLVMContext::MD_prof,
+ "function declaration may not have a !prof attachment", &F);
// Verify the metadata itself.
visitMDNode(*I.second, AreDebugLocsAllowed::Yes);
}
- Assert(!F.hasPersonalityFn(),
- "Function declaration shouldn't have a personality routine", &F);
+ Check(!F.hasPersonalityFn(),
+ "Function declaration shouldn't have a personality routine", &F);
} else {
// Verify that this function (which has a body) is not named "llvm.*". It
// is not legal to define intrinsics.
- Assert(!IsIntrinsic, "llvm intrinsics cannot be defined!", &F);
+ Check(!IsIntrinsic, "llvm intrinsics cannot be defined!", &F);
// Check the entry node
const BasicBlock *Entry = &F.getEntryBlock();
- Assert(pred_empty(Entry),
- "Entry block to function must not have predecessors!", Entry);
+ Check(pred_empty(Entry),
+ "Entry block to function must not have predecessors!", Entry);
// The address of the entry block cannot be taken, unless it is dead.
if (Entry->hasAddressTaken()) {
- Assert(!BlockAddress::lookup(Entry)->isConstantUsed(),
- "blockaddress may not be used with the entry block!", Entry);
+ Check(!BlockAddress::lookup(Entry)->isConstantUsed(),
+ "blockaddress may not be used with the entry block!", Entry);
}
unsigned NumDebugAttachments = 0, NumProfAttachments = 0;
@@ -2556,26 +2607,26 @@ void Verifier::visitFunction(const Function &F) {
break;
case LLVMContext::MD_dbg: {
++NumDebugAttachments;
- AssertDI(NumDebugAttachments == 1,
- "function must have a single !dbg attachment", &F, I.second);
- AssertDI(isa<DISubprogram>(I.second),
- "function !dbg attachment must be a subprogram", &F, I.second);
- AssertDI(cast<DISubprogram>(I.second)->isDistinct(),
- "function definition may only have a distinct !dbg attachment",
- &F);
+ CheckDI(NumDebugAttachments == 1,
+ "function must have a single !dbg attachment", &F, I.second);
+ CheckDI(isa<DISubprogram>(I.second),
+ "function !dbg attachment must be a subprogram", &F, I.second);
+ CheckDI(cast<DISubprogram>(I.second)->isDistinct(),
+ "function definition may only have a distinct !dbg attachment",
+ &F);
auto *SP = cast<DISubprogram>(I.second);
const Function *&AttachedTo = DISubprogramAttachments[SP];
- AssertDI(!AttachedTo || AttachedTo == &F,
- "DISubprogram attached to more than one function", SP, &F);
+ CheckDI(!AttachedTo || AttachedTo == &F,
+ "DISubprogram attached to more than one function", SP, &F);
AttachedTo = &F;
AllowLocs = AreDebugLocsAllowed::Yes;
break;
}
case LLVMContext::MD_prof:
++NumProfAttachments;
- Assert(NumProfAttachments == 1,
- "function must have a single !prof attachment", &F, I.second);
+ Check(NumProfAttachments == 1,
+ "function must have a single !prof attachment", &F, I.second);
break;
}
@@ -2592,28 +2643,27 @@ void Verifier::visitFunction(const Function &F) {
const User *U;
if (F.hasAddressTaken(&U, false, true, false,
/*IgnoreARCAttachedCall=*/true))
- Assert(false, "Invalid user of intrinsic instruction!", U);
+ Check(false, "Invalid user of intrinsic instruction!", U);
}
// Check intrinsics' signatures.
switch (F.getIntrinsicID()) {
case Intrinsic::experimental_gc_get_pointer_base: {
FunctionType *FT = F.getFunctionType();
- Assert(FT->getNumParams() == 1, "wrong number of parameters", F);
- Assert(isa<PointerType>(F.getReturnType()),
- "gc.get.pointer.base must return a pointer", F);
- Assert(FT->getParamType(0) == F.getReturnType(),
- "gc.get.pointer.base operand and result must be of the same type",
- F);
+ Check(FT->getNumParams() == 1, "wrong number of parameters", F);
+ Check(isa<PointerType>(F.getReturnType()),
+ "gc.get.pointer.base must return a pointer", F);
+ Check(FT->getParamType(0) == F.getReturnType(),
+ "gc.get.pointer.base operand and result must be of the same type", F);
break;
}
case Intrinsic::experimental_gc_get_pointer_offset: {
FunctionType *FT = F.getFunctionType();
- Assert(FT->getNumParams() == 1, "wrong number of parameters", F);
- Assert(isa<PointerType>(FT->getParamType(0)),
- "gc.get.pointer.offset operand must be a pointer", F);
- Assert(F.getReturnType()->isIntegerTy(),
- "gc.get.pointer.offset must return integer", F);
+ Check(FT->getNumParams() == 1, "wrong number of parameters", F);
+ Check(isa<PointerType>(FT->getParamType(0)),
+ "gc.get.pointer.offset operand must be a pointer", F);
+ Check(F.getReturnType()->isIntegerTy(),
+ "gc.get.pointer.offset must return integer", F);
break;
}
}
@@ -2638,12 +2688,11 @@ void Verifier::visitFunction(const Function &F) {
return;
Metadata *Parent = DL->getRawScope();
- AssertDI(Parent && isa<DILocalScope>(Parent),
- "DILocation's scope must be a DILocalScope", N, &F, &I, DL,
- Parent);
+ CheckDI(Parent && isa<DILocalScope>(Parent),
+ "DILocation's scope must be a DILocalScope", N, &F, &I, DL, Parent);
DILocalScope *Scope = DL->getInlinedAtScope();
- Assert(Scope, "Failed to find DILocalScope", DL);
+ Check(Scope, "Failed to find DILocalScope", DL);
if (!Seen.insert(Scope).second)
return;
@@ -2655,9 +2704,9 @@ void Verifier::visitFunction(const Function &F) {
if (SP && ((Scope != SP) && !Seen.insert(SP).second))
return;
- AssertDI(SP->describes(&F),
- "!dbg attachment points at wrong subprogram for function", N, &F,
- &I, DL, Scope, SP);
+ CheckDI(SP->describes(&F),
+ "!dbg attachment points at wrong subprogram for function", N, &F,
+ &I, DL, Scope, SP);
};
for (auto &BB : F)
for (auto &I : BB) {
@@ -2677,7 +2726,7 @@ void Verifier::visitBasicBlock(BasicBlock &BB) {
InstsInThisBlock.clear();
// Ensure that basic blocks have terminators!
- Assert(BB.getTerminator(), "Basic Block does not have terminator!", &BB);
+ Check(BB.getTerminator(), "Basic Block does not have terminator!", &BB);
// Check constraints that this basic block imposes on all of the PHI nodes in
// it.
@@ -2686,10 +2735,10 @@ void Verifier::visitBasicBlock(BasicBlock &BB) {
SmallVector<std::pair<BasicBlock*, Value*>, 8> Values;
llvm::sort(Preds);
for (const PHINode &PN : BB.phis()) {
- Assert(PN.getNumIncomingValues() == Preds.size(),
- "PHINode should have one entry for each predecessor of its "
- "parent basic block!",
- &PN);
+ Check(PN.getNumIncomingValues() == Preds.size(),
+ "PHINode should have one entry for each predecessor of its "
+ "parent basic block!",
+ &PN);
// Get and sort all incoming values in the PHI node...
Values.clear();
@@ -2704,17 +2753,17 @@ void Verifier::visitBasicBlock(BasicBlock &BB) {
// particular basic block in this PHI node, that the incoming values are
// all identical.
//
- Assert(i == 0 || Values[i].first != Values[i - 1].first ||
- Values[i].second == Values[i - 1].second,
- "PHI node has multiple entries for the same basic block with "
- "different incoming values!",
- &PN, Values[i].first, Values[i].second, Values[i - 1].second);
+ Check(i == 0 || Values[i].first != Values[i - 1].first ||
+ Values[i].second == Values[i - 1].second,
+ "PHI node has multiple entries for the same basic block with "
+ "different incoming values!",
+ &PN, Values[i].first, Values[i].second, Values[i - 1].second);
// Check to make sure that the predecessors and PHI node entries are
// matched up.
- Assert(Values[i].first == Preds[i],
- "PHI node entries do not match predecessors!", &PN,
- Values[i].first, Preds[i]);
+ Check(Values[i].first == Preds[i],
+ "PHI node entries do not match predecessors!", &PN,
+ Values[i].first, Preds[i]);
}
}
}
@@ -2722,21 +2771,21 @@ void Verifier::visitBasicBlock(BasicBlock &BB) {
// Check that all instructions have their parent pointers set up correctly.
for (auto &I : BB)
{
- Assert(I.getParent() == &BB, "Instruction has bogus parent pointer!");
+ Check(I.getParent() == &BB, "Instruction has bogus parent pointer!");
}
}
void Verifier::visitTerminator(Instruction &I) {
// Ensure that terminators only exist at the end of the basic block.
- Assert(&I == I.getParent()->getTerminator(),
- "Terminator found in the middle of a basic block!", I.getParent());
+ Check(&I == I.getParent()->getTerminator(),
+ "Terminator found in the middle of a basic block!", I.getParent());
visitInstruction(I);
}
void Verifier::visitBranchInst(BranchInst &BI) {
if (BI.isConditional()) {
- Assert(BI.getCondition()->getType()->isIntegerTy(1),
- "Branch condition is not 'i1' type!", &BI, BI.getCondition());
+ Check(BI.getCondition()->getType()->isIntegerTy(1),
+ "Branch condition is not 'i1' type!", &BI, BI.getCondition());
}
visitTerminator(BI);
}
@@ -2745,15 +2794,15 @@ void Verifier::visitReturnInst(ReturnInst &RI) {
Function *F = RI.getParent()->getParent();
unsigned N = RI.getNumOperands();
if (F->getReturnType()->isVoidTy())
- Assert(N == 0,
- "Found return instr that returns non-void in Function of void "
- "return type!",
- &RI, F->getReturnType());
+ Check(N == 0,
+ "Found return instr that returns non-void in Function of void "
+ "return type!",
+ &RI, F->getReturnType());
else
- Assert(N == 1 && F->getReturnType() == RI.getOperand(0)->getType(),
- "Function return type does not match operand "
- "type of return inst!",
- &RI, F->getReturnType());
+ Check(N == 1 && F->getReturnType() == RI.getOperand(0)->getType(),
+ "Function return type does not match operand "
+ "type of return inst!",
+ &RI, F->getReturnType());
// Check to make sure that the return value has necessary properties for
// terminators...
@@ -2761,46 +2810,45 @@ void Verifier::visitReturnInst(ReturnInst &RI) {
}
void Verifier::visitSwitchInst(SwitchInst &SI) {
- Assert(SI.getType()->isVoidTy(), "Switch must have void result type!", &SI);
+ Check(SI.getType()->isVoidTy(), "Switch must have void result type!", &SI);
// Check to make sure that all of the constants in the switch instruction
// have the same type as the switched-on value.
Type *SwitchTy = SI.getCondition()->getType();
SmallPtrSet<ConstantInt*, 32> Constants;
for (auto &Case : SI.cases()) {
- Assert(Case.getCaseValue()->getType() == SwitchTy,
- "Switch constants must all be same type as switch value!", &SI);
- Assert(Constants.insert(Case.getCaseValue()).second,
- "Duplicate integer as switch case", &SI, Case.getCaseValue());
+ Check(Case.getCaseValue()->getType() == SwitchTy,
+ "Switch constants must all be same type as switch value!", &SI);
+ Check(Constants.insert(Case.getCaseValue()).second,
+ "Duplicate integer as switch case", &SI, Case.getCaseValue());
}
visitTerminator(SI);
}
void Verifier::visitIndirectBrInst(IndirectBrInst &BI) {
- Assert(BI.getAddress()->getType()->isPointerTy(),
- "Indirectbr operand must have pointer type!", &BI);
+ Check(BI.getAddress()->getType()->isPointerTy(),
+ "Indirectbr operand must have pointer type!", &BI);
for (unsigned i = 0, e = BI.getNumDestinations(); i != e; ++i)
- Assert(BI.getDestination(i)->getType()->isLabelTy(),
- "Indirectbr destinations must all have pointer type!", &BI);
+ Check(BI.getDestination(i)->getType()->isLabelTy(),
+ "Indirectbr destinations must all have pointer type!", &BI);
visitTerminator(BI);
}
void Verifier::visitCallBrInst(CallBrInst &CBI) {
- Assert(CBI.isInlineAsm(), "Callbr is currently only used for asm-goto!",
- &CBI);
+ Check(CBI.isInlineAsm(), "Callbr is currently only used for asm-goto!", &CBI);
const InlineAsm *IA = cast<InlineAsm>(CBI.getCalledOperand());
- Assert(!IA->canThrow(), "Unwinding from Callbr is not allowed");
+ Check(!IA->canThrow(), "Unwinding from Callbr is not allowed");
for (unsigned i = 0, e = CBI.getNumSuccessors(); i != e; ++i)
- Assert(CBI.getSuccessor(i)->getType()->isLabelTy(),
- "Callbr successors must all have pointer type!", &CBI);
+ Check(CBI.getSuccessor(i)->getType()->isLabelTy(),
+ "Callbr successors must all have pointer type!", &CBI);
for (unsigned i = 0, e = CBI.getNumOperands(); i != e; ++i) {
- Assert(i >= CBI.arg_size() || !isa<BasicBlock>(CBI.getOperand(i)),
- "Using an unescaped label as a callbr argument!", &CBI);
+ Check(i >= CBI.arg_size() || !isa<BasicBlock>(CBI.getOperand(i)),
+ "Using an unescaped label as a callbr argument!", &CBI);
if (isa<BasicBlock>(CBI.getOperand(i)))
for (unsigned j = i + 1; j != e; ++j)
- Assert(CBI.getOperand(i) != CBI.getOperand(j),
- "Duplicate callbr destination!", &CBI);
+ Check(CBI.getOperand(i) != CBI.getOperand(j),
+ "Duplicate callbr destination!", &CBI);
}
{
SmallPtrSet<BasicBlock *, 4> ArgBBs;
@@ -2808,7 +2856,7 @@ void Verifier::visitCallBrInst(CallBrInst &CBI) {
if (auto *BA = dyn_cast<BlockAddress>(V))
ArgBBs.insert(BA->getBasicBlock());
for (BasicBlock *BB : CBI.getIndirectDests())
- Assert(ArgBBs.count(BB), "Indirect label missing from arglist.", &CBI);
+ Check(ArgBBs.count(BB), "Indirect label missing from arglist.", &CBI);
}
verifyInlineAsmCall(CBI);
@@ -2816,12 +2864,12 @@ void Verifier::visitCallBrInst(CallBrInst &CBI) {
}
void Verifier::visitSelectInst(SelectInst &SI) {
- Assert(!SelectInst::areInvalidOperands(SI.getOperand(0), SI.getOperand(1),
- SI.getOperand(2)),
- "Invalid operands for select instruction!", &SI);
+ Check(!SelectInst::areInvalidOperands(SI.getOperand(0), SI.getOperand(1),
+ SI.getOperand(2)),
+ "Invalid operands for select instruction!", &SI);
- Assert(SI.getTrueValue()->getType() == SI.getType(),
- "Select values must have same type as select instruction!", &SI);
+ Check(SI.getTrueValue()->getType() == SI.getType(),
+ "Select values must have same type as select instruction!", &SI);
visitInstruction(SI);
}
@@ -2829,7 +2877,7 @@ void Verifier::visitSelectInst(SelectInst &SI) {
/// a pass, if any exist, it's an error.
///
void Verifier::visitUserOp1(Instruction &I) {
- Assert(false, "User-defined operators should not live outside of a pass!", &I);
+ Check(false, "User-defined operators should not live outside of a pass!", &I);
}
void Verifier::visitTruncInst(TruncInst &I) {
@@ -2841,11 +2889,11 @@ void Verifier::visitTruncInst(TruncInst &I) {
unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
unsigned DestBitSize = DestTy->getScalarSizeInBits();
- Assert(SrcTy->isIntOrIntVectorTy(), "Trunc only operates on integer", &I);
- Assert(DestTy->isIntOrIntVectorTy(), "Trunc only produces integer", &I);
- Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(),
- "trunc source and destination must both be a vector or neither", &I);
- Assert(SrcBitSize > DestBitSize, "DestTy too big for Trunc", &I);
+ Check(SrcTy->isIntOrIntVectorTy(), "Trunc only operates on integer", &I);
+ Check(DestTy->isIntOrIntVectorTy(), "Trunc only produces integer", &I);
+ Check(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+ "trunc source and destination must both be a vector or neither", &I);
+ Check(SrcBitSize > DestBitSize, "DestTy too big for Trunc", &I);
visitInstruction(I);
}
@@ -2856,14 +2904,14 @@ void Verifier::visitZExtInst(ZExtInst &I) {
Type *DestTy = I.getType();
// Get the size of the types in bits, we'll need this later
- Assert(SrcTy->isIntOrIntVectorTy(), "ZExt only operates on integer", &I);
- Assert(DestTy->isIntOrIntVectorTy(), "ZExt only produces an integer", &I);
- Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(),
- "zext source and destination must both be a vector or neither", &I);
+ Check(SrcTy->isIntOrIntVectorTy(), "ZExt only operates on integer", &I);
+ Check(DestTy->isIntOrIntVectorTy(), "ZExt only produces an integer", &I);
+ Check(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+ "zext source and destination must both be a vector or neither", &I);
unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
unsigned DestBitSize = DestTy->getScalarSizeInBits();
- Assert(SrcBitSize < DestBitSize, "Type too small for ZExt", &I);
+ Check(SrcBitSize < DestBitSize, "Type too small for ZExt", &I);
visitInstruction(I);
}
@@ -2877,11 +2925,11 @@ void Verifier::visitSExtInst(SExtInst &I) {
unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
unsigned DestBitSize = DestTy->getScalarSizeInBits();
- Assert(SrcTy->isIntOrIntVectorTy(), "SExt only operates on integer", &I);
- Assert(DestTy->isIntOrIntVectorTy(), "SExt only produces an integer", &I);
- Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(),
- "sext source and destination must both be a vector or neither", &I);
- Assert(SrcBitSize < DestBitSize, "Type too small for SExt", &I);
+ Check(SrcTy->isIntOrIntVectorTy(), "SExt only operates on integer", &I);
+ Check(DestTy->isIntOrIntVectorTy(), "SExt only produces an integer", &I);
+ Check(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+ "sext source and destination must both be a vector or neither", &I);
+ Check(SrcBitSize < DestBitSize, "Type too small for SExt", &I);
visitInstruction(I);
}
@@ -2894,11 +2942,11 @@ void Verifier::visitFPTruncInst(FPTruncInst &I) {
unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
unsigned DestBitSize = DestTy->getScalarSizeInBits();
- Assert(SrcTy->isFPOrFPVectorTy(), "FPTrunc only operates on FP", &I);
- Assert(DestTy->isFPOrFPVectorTy(), "FPTrunc only produces an FP", &I);
- Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(),
- "fptrunc source and destination must both be a vector or neither", &I);
- Assert(SrcBitSize > DestBitSize, "DestTy too big for FPTrunc", &I);
+ Check(SrcTy->isFPOrFPVectorTy(), "FPTrunc only operates on FP", &I);
+ Check(DestTy->isFPOrFPVectorTy(), "FPTrunc only produces an FP", &I);
+ Check(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+ "fptrunc source and destination must both be a vector or neither", &I);
+ Check(SrcBitSize > DestBitSize, "DestTy too big for FPTrunc", &I);
visitInstruction(I);
}
@@ -2912,11 +2960,11 @@ void Verifier::visitFPExtInst(FPExtInst &I) {
unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
unsigned DestBitSize = DestTy->getScalarSizeInBits();
- Assert(SrcTy->isFPOrFPVectorTy(), "FPExt only operates on FP", &I);
- Assert(DestTy->isFPOrFPVectorTy(), "FPExt only produces an FP", &I);
- Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(),
- "fpext source and destination must both be a vector or neither", &I);
- Assert(SrcBitSize < DestBitSize, "DestTy too small for FPExt", &I);
+ Check(SrcTy->isFPOrFPVectorTy(), "FPExt only operates on FP", &I);
+ Check(DestTy->isFPOrFPVectorTy(), "FPExt only produces an FP", &I);
+ Check(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+ "fpext source and destination must both be a vector or neither", &I);
+ Check(SrcBitSize < DestBitSize, "DestTy too small for FPExt", &I);
visitInstruction(I);
}
@@ -2929,17 +2977,17 @@ void Verifier::visitUIToFPInst(UIToFPInst &I) {
bool SrcVec = SrcTy->isVectorTy();
bool DstVec = DestTy->isVectorTy();
- Assert(SrcVec == DstVec,
- "UIToFP source and dest must both be vector or scalar", &I);
- Assert(SrcTy->isIntOrIntVectorTy(),
- "UIToFP source must be integer or integer vector", &I);
- Assert(DestTy->isFPOrFPVectorTy(), "UIToFP result must be FP or FP vector",
- &I);
+ Check(SrcVec == DstVec,
+ "UIToFP source and dest must both be vector or scalar", &I);
+ Check(SrcTy->isIntOrIntVectorTy(),
+ "UIToFP source must be integer or integer vector", &I);
+ Check(DestTy->isFPOrFPVectorTy(), "UIToFP result must be FP or FP vector",
+ &I);
if (SrcVec && DstVec)
- Assert(cast<VectorType>(SrcTy)->getElementCount() ==
- cast<VectorType>(DestTy)->getElementCount(),
- "UIToFP source and dest vector length mismatch", &I);
+ Check(cast<VectorType>(SrcTy)->getElementCount() ==
+ cast<VectorType>(DestTy)->getElementCount(),
+ "UIToFP source and dest vector length mismatch", &I);
visitInstruction(I);
}
@@ -2952,17 +3000,17 @@ void Verifier::visitSIToFPInst(SIToFPInst &I) {
bool SrcVec = SrcTy->isVectorTy();
bool DstVec = DestTy->isVectorTy();
- Assert(SrcVec == DstVec,
- "SIToFP source and dest must both be vector or scalar", &I);
- Assert(SrcTy->isIntOrIntVectorTy(),
- "SIToFP source must be integer or integer vector", &I);
- Assert(DestTy->isFPOrFPVectorTy(), "SIToFP result must be FP or FP vector",
- &I);
+ Check(SrcVec == DstVec,
+ "SIToFP source and dest must both be vector or scalar", &I);
+ Check(SrcTy->isIntOrIntVectorTy(),
+ "SIToFP source must be integer or integer vector", &I);
+ Check(DestTy->isFPOrFPVectorTy(), "SIToFP result must be FP or FP vector",
+ &I);
if (SrcVec && DstVec)
- Assert(cast<VectorType>(SrcTy)->getElementCount() ==
- cast<VectorType>(DestTy)->getElementCount(),
- "SIToFP source and dest vector length mismatch", &I);
+ Check(cast<VectorType>(SrcTy)->getElementCount() ==
+ cast<VectorType>(DestTy)->getElementCount(),
+ "SIToFP source and dest vector length mismatch", &I);
visitInstruction(I);
}
@@ -2975,17 +3023,16 @@ void Verifier::visitFPToUIInst(FPToUIInst &I) {
bool SrcVec = SrcTy->isVectorTy();
bool DstVec = DestTy->isVectorTy();
- Assert(SrcVec == DstVec,
- "FPToUI source and dest must both be vector or scalar", &I);
- Assert(SrcTy->isFPOrFPVectorTy(), "FPToUI source must be FP or FP vector",
- &I);
- Assert(DestTy->isIntOrIntVectorTy(),
- "FPToUI result must be integer or integer vector", &I);
+ Check(SrcVec == DstVec,
+ "FPToUI source and dest must both be vector or scalar", &I);
+ Check(SrcTy->isFPOrFPVectorTy(), "FPToUI source must be FP or FP vector", &I);
+ Check(DestTy->isIntOrIntVectorTy(),
+ "FPToUI result must be integer or integer vector", &I);
if (SrcVec && DstVec)
- Assert(cast<VectorType>(SrcTy)->getElementCount() ==
- cast<VectorType>(DestTy)->getElementCount(),
- "FPToUI source and dest vector length mismatch", &I);
+ Check(cast<VectorType>(SrcTy)->getElementCount() ==
+ cast<VectorType>(DestTy)->getElementCount(),
+ "FPToUI source and dest vector length mismatch", &I);
visitInstruction(I);
}
@@ -2998,17 +3045,16 @@ void Verifier::visitFPToSIInst(FPToSIInst &I) {
bool SrcVec = SrcTy->isVectorTy();
bool DstVec = DestTy->isVectorTy();
- Assert(SrcVec == DstVec,
- "FPToSI source and dest must both be vector or scalar", &I);
- Assert(SrcTy->isFPOrFPVectorTy(), "FPToSI source must be FP or FP vector",
- &I);
- Assert(DestTy->isIntOrIntVectorTy(),
- "FPToSI result must be integer or integer vector", &I);
+ Check(SrcVec == DstVec,
+ "FPToSI source and dest must both be vector or scalar", &I);
+ Check(SrcTy->isFPOrFPVectorTy(), "FPToSI source must be FP or FP vector", &I);
+ Check(DestTy->isIntOrIntVectorTy(),
+ "FPToSI result must be integer or integer vector", &I);
if (SrcVec && DstVec)
- Assert(cast<VectorType>(SrcTy)->getElementCount() ==
- cast<VectorType>(DestTy)->getElementCount(),
- "FPToSI source and dest vector length mismatch", &I);
+ Check(cast<VectorType>(SrcTy)->getElementCount() ==
+ cast<VectorType>(DestTy)->getElementCount(),
+ "FPToSI source and dest vector length mismatch", &I);
visitInstruction(I);
}
@@ -3018,17 +3064,17 @@ void Verifier::visitPtrToIntInst(PtrToIntInst &I) {
Type *SrcTy = I.getOperand(0)->getType();
Type *DestTy = I.getType();
- Assert(SrcTy->isPtrOrPtrVectorTy(), "PtrToInt source must be pointer", &I);
+ Check(SrcTy->isPtrOrPtrVectorTy(), "PtrToInt source must be pointer", &I);
- Assert(DestTy->isIntOrIntVectorTy(), "PtrToInt result must be integral", &I);
- Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(), "PtrToInt type mismatch",
- &I);
+ Check(DestTy->isIntOrIntVectorTy(), "PtrToInt result must be integral", &I);
+ Check(SrcTy->isVectorTy() == DestTy->isVectorTy(), "PtrToInt type mismatch",
+ &I);
if (SrcTy->isVectorTy()) {
auto *VSrc = cast<VectorType>(SrcTy);
auto *VDest = cast<VectorType>(DestTy);
- Assert(VSrc->getElementCount() == VDest->getElementCount(),
- "PtrToInt Vector width mismatch", &I);
+ Check(VSrc->getElementCount() == VDest->getElementCount(),
+ "PtrToInt Vector width mismatch", &I);
}
visitInstruction(I);
@@ -3039,23 +3085,22 @@ void Verifier::visitIntToPtrInst(IntToPtrInst &I) {
Type *SrcTy = I.getOperand(0)->getType();
Type *DestTy = I.getType();
- Assert(SrcTy->isIntOrIntVectorTy(),
- "IntToPtr source must be an integral", &I);
- Assert(DestTy->isPtrOrPtrVectorTy(), "IntToPtr result must be a pointer", &I);
+ Check(SrcTy->isIntOrIntVectorTy(), "IntToPtr source must be an integral", &I);
+ Check(DestTy->isPtrOrPtrVectorTy(), "IntToPtr result must be a pointer", &I);
- Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(), "IntToPtr type mismatch",
- &I);
+ Check(SrcTy->isVectorTy() == DestTy->isVectorTy(), "IntToPtr type mismatch",
+ &I);
if (SrcTy->isVectorTy()) {
auto *VSrc = cast<VectorType>(SrcTy);
auto *VDest = cast<VectorType>(DestTy);
- Assert(VSrc->getElementCount() == VDest->getElementCount(),
- "IntToPtr Vector width mismatch", &I);
+ Check(VSrc->getElementCount() == VDest->getElementCount(),
+ "IntToPtr Vector width mismatch", &I);
}
visitInstruction(I);
}
void Verifier::visitBitCastInst(BitCastInst &I) {
- Assert(
+ Check(
CastInst::castIsValid(Instruction::BitCast, I.getOperand(0), I.getType()),
"Invalid bitcast", &I);
visitInstruction(I);
@@ -3065,16 +3110,16 @@ void Verifier::visitAddrSpaceCastInst(AddrSpaceCastInst &I) {
Type *SrcTy = I.getOperand(0)->getType();
Type *DestTy = I.getType();
- Assert(SrcTy->isPtrOrPtrVectorTy(), "AddrSpaceCast source must be a pointer",
- &I);
- Assert(DestTy->isPtrOrPtrVectorTy(), "AddrSpaceCast result must be a pointer",
- &I);
- Assert(SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace(),
- "AddrSpaceCast must be between different address spaces", &I);
+ Check(SrcTy->isPtrOrPtrVectorTy(), "AddrSpaceCast source must be a pointer",
+ &I);
+ Check(DestTy->isPtrOrPtrVectorTy(), "AddrSpaceCast result must be a pointer",
+ &I);
+ Check(SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace(),
+ "AddrSpaceCast must be between different address spaces", &I);
if (auto *SrcVTy = dyn_cast<VectorType>(SrcTy))
- Assert(SrcVTy->getElementCount() ==
- cast<VectorType>(DestTy)->getElementCount(),
- "AddrSpaceCast vector pointer number of elements mismatch", &I);
+ Check(SrcVTy->getElementCount() ==
+ cast<VectorType>(DestTy)->getElementCount(),
+ "AddrSpaceCast vector pointer number of elements mismatch", &I);
visitInstruction(I);
}
@@ -3085,18 +3130,18 @@ void Verifier::visitPHINode(PHINode &PN) {
// This can be tested by checking whether the instruction before this is
// either nonexistent (because this is begin()) or is a PHI node. If not,
// then there is some other instruction before a PHI.
- Assert(&PN == &PN.getParent()->front() ||
- isa<PHINode>(--BasicBlock::iterator(&PN)),
- "PHI nodes not grouped at top of basic block!", &PN, PN.getParent());
+ Check(&PN == &PN.getParent()->front() ||
+ isa<PHINode>(--BasicBlock::iterator(&PN)),
+ "PHI nodes not grouped at top of basic block!", &PN, PN.getParent());
// Check that a PHI doesn't yield a Token.
- Assert(!PN.getType()->isTokenTy(), "PHI nodes cannot have token type!");
+ Check(!PN.getType()->isTokenTy(), "PHI nodes cannot have token type!");
// Check that all of the values of the PHI node have the same type as the
// result, and that the incoming blocks are really basic blocks.
for (Value *IncValue : PN.incoming_values()) {
- Assert(PN.getType() == IncValue->getType(),
- "PHI node operands are not the same type as the result!", &PN);
+ Check(PN.getType() == IncValue->getType(),
+ "PHI node operands are not the same type as the result!", &PN);
}
// All other PHI node constraints are checked in the visitBasicBlock method.
@@ -3105,54 +3150,68 @@ void Verifier::visitPHINode(PHINode &PN) {
}
void Verifier::visitCallBase(CallBase &Call) {
- Assert(Call.getCalledOperand()->getType()->isPointerTy(),
- "Called function must be a pointer!", Call);
+ Check(Call.getCalledOperand()->getType()->isPointerTy(),
+ "Called function must be a pointer!", Call);
PointerType *FPTy = cast<PointerType>(Call.getCalledOperand()->getType());
- Assert(FPTy->isOpaqueOrPointeeTypeMatches(Call.getFunctionType()),
- "Called function is not the same type as the call!", Call);
+ Check(FPTy->isOpaqueOrPointeeTypeMatches(Call.getFunctionType()),
+ "Called function is not the same type as the call!", Call);
FunctionType *FTy = Call.getFunctionType();
// Verify that the correct number of arguments are being passed
if (FTy->isVarArg())
- Assert(Call.arg_size() >= FTy->getNumParams(),
- "Called function requires more parameters than were provided!",
- Call);
+ Check(Call.arg_size() >= FTy->getNumParams(),
+ "Called function requires more parameters than were provided!", Call);
else
- Assert(Call.arg_size() == FTy->getNumParams(),
- "Incorrect number of arguments passed to called function!", Call);
+ Check(Call.arg_size() == FTy->getNumParams(),
+ "Incorrect number of arguments passed to called function!", Call);
// Verify that all arguments to the call match the function type.
for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
- Assert(Call.getArgOperand(i)->getType() == FTy->getParamType(i),
- "Call parameter type does not match function signature!",
- Call.getArgOperand(i), FTy->getParamType(i), Call);
+ Check(Call.getArgOperand(i)->getType() == FTy->getParamType(i),
+ "Call parameter type does not match function signature!",
+ Call.getArgOperand(i), FTy->getParamType(i), Call);
AttributeList Attrs = Call.getAttributes();
- Assert(verifyAttributeCount(Attrs, Call.arg_size()),
- "Attribute after last parameter!", Call);
+ Check(verifyAttributeCount(Attrs, Call.arg_size()),
+ "Attribute after last parameter!", Call);
+
+ auto VerifyTypeAlign = [&](Type *Ty, const Twine &Message) {
+ if (!Ty->isSized())
+ return;
+ Align ABIAlign = DL.getABITypeAlign(Ty);
+ Align MaxAlign(ParamMaxAlignment);
+ Check(ABIAlign <= MaxAlign,
+ "Incorrect alignment of " + Message + " to called function!", Call);
+ };
+
+ VerifyTypeAlign(FTy->getReturnType(), "return type");
+ for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) {
+ Type *Ty = FTy->getParamType(i);
+ VerifyTypeAlign(Ty, "argument passed");
+ }
Function *Callee =
dyn_cast<Function>(Call.getCalledOperand()->stripPointerCasts());
bool IsIntrinsic = Callee && Callee->isIntrinsic();
if (IsIntrinsic)
- Assert(Callee->getValueType() == FTy,
- "Intrinsic called with incompatible signature", Call);
+ Check(Callee->getValueType() == FTy,
+ "Intrinsic called with incompatible signature", Call);
if (Attrs.hasFnAttr(Attribute::Speculatable)) {
// Don't allow speculatable on call sites, unless the underlying function
// declaration is also speculatable.
- Assert(Callee && Callee->isSpeculatable(),
- "speculatable attribute may not apply to call sites", Call);
+ Check(Callee && Callee->isSpeculatable(),
+ "speculatable attribute may not apply to call sites", Call);
}
if (Attrs.hasFnAttr(Attribute::Preallocated)) {
- Assert(Call.getCalledFunction()->getIntrinsicID() ==
- Intrinsic::call_preallocated_arg,
- "preallocated as a call site attribute can only be on "
- "llvm.call.preallocated.arg");
+ Check(Call.getCalledFunction()->getIntrinsicID() ==
+ Intrinsic::call_preallocated_arg,
+ "preallocated as a call site attribute can only be on "
+ "llvm.call.preallocated.arg");
}
// Verify call attributes.
@@ -3164,8 +3223,8 @@ void Verifier::visitCallBase(CallBase &Call) {
if (Call.hasInAllocaArgument()) {
Value *InAllocaArg = Call.getArgOperand(FTy->getNumParams() - 1);
if (auto AI = dyn_cast<AllocaInst>(InAllocaArg->stripInBoundsOffsets()))
- Assert(AI->isUsedWithInAlloca(),
- "inalloca argument for call has mismatched alloca", AI, Call);
+ Check(AI->isUsedWithInAlloca(),
+ "inalloca argument for call has mismatched alloca", AI, Call);
}
// For each argument of the callsite, if it has the swifterror argument,
@@ -3175,31 +3234,30 @@ void Verifier::visitCallBase(CallBase &Call) {
if (Call.paramHasAttr(i, Attribute::SwiftError)) {
Value *SwiftErrorArg = Call.getArgOperand(i);
if (auto AI = dyn_cast<AllocaInst>(SwiftErrorArg->stripInBoundsOffsets())) {
- Assert(AI->isSwiftError(),
- "swifterror argument for call has mismatched alloca", AI, Call);
+ Check(AI->isSwiftError(),
+ "swifterror argument for call has mismatched alloca", AI, Call);
continue;
}
auto ArgI = dyn_cast<Argument>(SwiftErrorArg);
- Assert(ArgI,
- "swifterror argument should come from an alloca or parameter",
- SwiftErrorArg, Call);
- Assert(ArgI->hasSwiftErrorAttr(),
- "swifterror argument for call has mismatched parameter", ArgI,
- Call);
+ Check(ArgI, "swifterror argument should come from an alloca or parameter",
+ SwiftErrorArg, Call);
+ Check(ArgI->hasSwiftErrorAttr(),
+ "swifterror argument for call has mismatched parameter", ArgI,
+ Call);
}
if (Attrs.hasParamAttr(i, Attribute::ImmArg)) {
// Don't allow immarg on call sites, unless the underlying declaration
// also has the matching immarg.
- Assert(Callee && Callee->hasParamAttribute(i, Attribute::ImmArg),
- "immarg may not apply only to call sites",
- Call.getArgOperand(i), Call);
+ Check(Callee && Callee->hasParamAttribute(i, Attribute::ImmArg),
+ "immarg may not apply only to call sites", Call.getArgOperand(i),
+ Call);
}
if (Call.paramHasAttr(i, Attribute::ImmArg)) {
Value *ArgVal = Call.getArgOperand(i);
- Assert(isa<ConstantInt>(ArgVal) || isa<ConstantFP>(ArgVal),
- "immarg operand has non-immediate parameter", ArgVal, Call);
+ Check(isa<ConstantInt>(ArgVal) || isa<ConstantFP>(ArgVal),
+ "immarg operand has non-immediate parameter", ArgVal, Call);
}
if (Call.paramHasAttr(i, Attribute::Preallocated)) {
@@ -3207,10 +3265,10 @@ void Verifier::visitCallBase(CallBase &Call) {
bool hasOB =
Call.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0;
bool isMustTail = Call.isMustTailCall();
- Assert(hasOB != isMustTail,
- "preallocated operand either requires a preallocated bundle or "
- "the call to be musttail (but not both)",
- ArgVal, Call);
+ Check(hasOB != isMustTail,
+ "preallocated operand either requires a preallocated bundle or "
+ "the call to be musttail (but not both)",
+ ArgVal, Call);
}
}
@@ -3233,17 +3291,17 @@ void Verifier::visitCallBase(CallBase &Call) {
verifyParameterAttrs(ArgAttrs, Ty, &Call);
if (ArgAttrs.hasAttribute(Attribute::Nest)) {
- Assert(!SawNest, "More than one parameter has attribute nest!", Call);
+ Check(!SawNest, "More than one parameter has attribute nest!", Call);
SawNest = true;
}
if (ArgAttrs.hasAttribute(Attribute::Returned)) {
- Assert(!SawReturned, "More than one parameter has attribute returned!",
- Call);
- Assert(Ty->canLosslesslyBitCastTo(FTy->getReturnType()),
- "Incompatible argument and return types for 'returned' "
- "attribute",
- Call);
+ Check(!SawReturned, "More than one parameter has attribute returned!",
+ Call);
+ Check(Ty->canLosslesslyBitCastTo(FTy->getReturnType()),
+ "Incompatible argument and return types for 'returned' "
+ "attribute",
+ Call);
SawReturned = true;
}
@@ -3252,32 +3310,32 @@ void Verifier::visitCallBase(CallBase &Call) {
if (!Call.getCalledFunction() ||
Call.getCalledFunction()->getIntrinsicID() !=
Intrinsic::experimental_gc_statepoint)
- Assert(!ArgAttrs.hasAttribute(Attribute::StructRet),
- "Attribute 'sret' cannot be used for vararg call arguments!",
- Call);
+ Check(!ArgAttrs.hasAttribute(Attribute::StructRet),
+ "Attribute 'sret' cannot be used for vararg call arguments!",
+ Call);
if (ArgAttrs.hasAttribute(Attribute::InAlloca))
- Assert(Idx == Call.arg_size() - 1,
- "inalloca isn't on the last argument!", Call);
+ Check(Idx == Call.arg_size() - 1,
+ "inalloca isn't on the last argument!", Call);
}
}
// Verify that there's no metadata unless it's a direct call to an intrinsic.
if (!IsIntrinsic) {
for (Type *ParamTy : FTy->params()) {
- Assert(!ParamTy->isMetadataTy(),
- "Function has metadata parameter but isn't an intrinsic", Call);
- Assert(!ParamTy->isTokenTy(),
- "Function has token parameter but isn't an intrinsic", Call);
+ Check(!ParamTy->isMetadataTy(),
+ "Function has metadata parameter but isn't an intrinsic", Call);
+ Check(!ParamTy->isTokenTy(),
+ "Function has token parameter but isn't an intrinsic", Call);
}
}
// Verify that indirect calls don't return tokens.
if (!Call.getCalledFunction()) {
- Assert(!FTy->getReturnType()->isTokenTy(),
- "Return type cannot be token for indirect call!");
- Assert(!FTy->getReturnType()->isX86_AMXTy(),
- "Return type cannot be x86_amx for indirect call!");
+ Check(!FTy->getReturnType()->isTokenTy(),
+ "Return type cannot be token for indirect call!");
+ Check(!FTy->getReturnType()->isX86_AMXTy(),
+ "Return type cannot be x86_amx for indirect call!");
}
if (Function *F = Call.getCalledFunction())
@@ -3285,69 +3343,83 @@ void Verifier::visitCallBase(CallBase &Call) {
visitIntrinsicCall(ID, Call);
// Verify that a callsite has at most one "deopt", at most one "funclet", at
- // most one "gc-transition", at most one "cfguardtarget",
- // and at most one "preallocated" operand bundle.
+ // most one "gc-transition", at most one "cfguardtarget", at most one
+ // "preallocated" operand bundle, and at most one "ptrauth" operand bundle.
bool FoundDeoptBundle = false, FoundFuncletBundle = false,
FoundGCTransitionBundle = false, FoundCFGuardTargetBundle = false,
FoundPreallocatedBundle = false, FoundGCLiveBundle = false,
+ FoundPtrauthBundle = false,
FoundAttachedCallBundle = false;
for (unsigned i = 0, e = Call.getNumOperandBundles(); i < e; ++i) {
OperandBundleUse BU = Call.getOperandBundleAt(i);
uint32_t Tag = BU.getTagID();
if (Tag == LLVMContext::OB_deopt) {
- Assert(!FoundDeoptBundle, "Multiple deopt operand bundles", Call);
+ Check(!FoundDeoptBundle, "Multiple deopt operand bundles", Call);
FoundDeoptBundle = true;
} else if (Tag == LLVMContext::OB_gc_transition) {
- Assert(!FoundGCTransitionBundle, "Multiple gc-transition operand bundles",
- Call);
+ Check(!FoundGCTransitionBundle, "Multiple gc-transition operand bundles",
+ Call);
FoundGCTransitionBundle = true;
} else if (Tag == LLVMContext::OB_funclet) {
- Assert(!FoundFuncletBundle, "Multiple funclet operand bundles", Call);
+ Check(!FoundFuncletBundle, "Multiple funclet operand bundles", Call);
FoundFuncletBundle = true;
- Assert(BU.Inputs.size() == 1,
- "Expected exactly one funclet bundle operand", Call);
- Assert(isa<FuncletPadInst>(BU.Inputs.front()),
- "Funclet bundle operands should correspond to a FuncletPadInst",
- Call);
+ Check(BU.Inputs.size() == 1,
+ "Expected exactly one funclet bundle operand", Call);
+ Check(isa<FuncletPadInst>(BU.Inputs.front()),
+ "Funclet bundle operands should correspond to a FuncletPadInst",
+ Call);
} else if (Tag == LLVMContext::OB_cfguardtarget) {
- Assert(!FoundCFGuardTargetBundle,
- "Multiple CFGuardTarget operand bundles", Call);
+ Check(!FoundCFGuardTargetBundle, "Multiple CFGuardTarget operand bundles",
+ Call);
FoundCFGuardTargetBundle = true;
- Assert(BU.Inputs.size() == 1,
- "Expected exactly one cfguardtarget bundle operand", Call);
+ Check(BU.Inputs.size() == 1,
+ "Expected exactly one cfguardtarget bundle operand", Call);
+ } else if (Tag == LLVMContext::OB_ptrauth) {
+ Check(!FoundPtrauthBundle, "Multiple ptrauth operand bundles", Call);
+ FoundPtrauthBundle = true;
+ Check(BU.Inputs.size() == 2,
+ "Expected exactly two ptrauth bundle operands", Call);
+ Check(isa<ConstantInt>(BU.Inputs[0]) &&
+ BU.Inputs[0]->getType()->isIntegerTy(32),
+ "Ptrauth bundle key operand must be an i32 constant", Call);
+ Check(BU.Inputs[1]->getType()->isIntegerTy(64),
+ "Ptrauth bundle discriminator operand must be an i64", Call);
} else if (Tag == LLVMContext::OB_preallocated) {
- Assert(!FoundPreallocatedBundle, "Multiple preallocated operand bundles",
- Call);
+ Check(!FoundPreallocatedBundle, "Multiple preallocated operand bundles",
+ Call);
FoundPreallocatedBundle = true;
- Assert(BU.Inputs.size() == 1,
- "Expected exactly one preallocated bundle operand", Call);
+ Check(BU.Inputs.size() == 1,
+ "Expected exactly one preallocated bundle operand", Call);
auto Input = dyn_cast<IntrinsicInst>(BU.Inputs.front());
- Assert(Input &&
- Input->getIntrinsicID() == Intrinsic::call_preallocated_setup,
- "\"preallocated\" argument must be a token from "
- "llvm.call.preallocated.setup",
- Call);
+ Check(Input &&
+ Input->getIntrinsicID() == Intrinsic::call_preallocated_setup,
+ "\"preallocated\" argument must be a token from "
+ "llvm.call.preallocated.setup",
+ Call);
} else if (Tag == LLVMContext::OB_gc_live) {
- Assert(!FoundGCLiveBundle, "Multiple gc-live operand bundles",
- Call);
+ Check(!FoundGCLiveBundle, "Multiple gc-live operand bundles", Call);
FoundGCLiveBundle = true;
} else if (Tag == LLVMContext::OB_clang_arc_attachedcall) {
- Assert(!FoundAttachedCallBundle,
- "Multiple \"clang.arc.attachedcall\" operand bundles", Call);
+ Check(!FoundAttachedCallBundle,
+ "Multiple \"clang.arc.attachedcall\" operand bundles", Call);
FoundAttachedCallBundle = true;
verifyAttachedCallBundle(Call, BU);
}
}
+ // Verify that callee and callsite agree on whether to use pointer auth.
+ Check(!(Call.getCalledFunction() && FoundPtrauthBundle),
+ "Direct call cannot have a ptrauth bundle", Call);
+
// Verify that each inlinable callsite of a debug-info-bearing function in a
// debug-info-bearing function has a debug location attached to it. Failure to
// do so causes assertion failures when the inliner sets up inline scope info.
if (Call.getFunction()->getSubprogram() && Call.getCalledFunction() &&
Call.getCalledFunction()->getSubprogram())
- AssertDI(Call.getDebugLoc(),
- "inlinable function call in a function with "
- "debug info must have a !dbg location",
- Call);
+ CheckDI(Call.getDebugLoc(),
+ "inlinable function call in a function with "
+ "debug info must have a !dbg location",
+ Call);
if (Call.isInlineAsm())
verifyInlineAsmCall(Call);
@@ -3357,16 +3429,16 @@ void Verifier::visitCallBase(CallBase &Call) {
void Verifier::verifyTailCCMustTailAttrs(const AttrBuilder &Attrs,
StringRef Context) {
- Assert(!Attrs.contains(Attribute::InAlloca),
- Twine("inalloca attribute not allowed in ") + Context);
- Assert(!Attrs.contains(Attribute::InReg),
- Twine("inreg attribute not allowed in ") + Context);
- Assert(!Attrs.contains(Attribute::SwiftError),
- Twine("swifterror attribute not allowed in ") + Context);
- Assert(!Attrs.contains(Attribute::Preallocated),
- Twine("preallocated attribute not allowed in ") + Context);
- Assert(!Attrs.contains(Attribute::ByRef),
- Twine("byref attribute not allowed in ") + Context);
+ Check(!Attrs.contains(Attribute::InAlloca),
+ Twine("inalloca attribute not allowed in ") + Context);
+ Check(!Attrs.contains(Attribute::InReg),
+ Twine("inreg attribute not allowed in ") + Context);
+ Check(!Attrs.contains(Attribute::SwiftError),
+ Twine("swifterror attribute not allowed in ") + Context);
+ Check(!Attrs.contains(Attribute::Preallocated),
+ Twine("preallocated attribute not allowed in ") + Context);
+ Check(!Attrs.contains(Attribute::ByRef),
+ Twine("byref attribute not allowed in ") + Context);
}
/// Two types are "congruent" if they are identical, or if they are both pointer
@@ -3403,19 +3475,19 @@ static AttrBuilder getParameterABIAttributes(LLVMContext& C, unsigned I, Attribu
}
void Verifier::verifyMustTailCall(CallInst &CI) {
- Assert(!CI.isInlineAsm(), "cannot use musttail call with inline asm", &CI);
+ Check(!CI.isInlineAsm(), "cannot use musttail call with inline asm", &CI);
Function *F = CI.getParent()->getParent();
FunctionType *CallerTy = F->getFunctionType();
FunctionType *CalleeTy = CI.getFunctionType();
- Assert(CallerTy->isVarArg() == CalleeTy->isVarArg(),
- "cannot guarantee tail call due to mismatched varargs", &CI);
- Assert(isTypeCongruent(CallerTy->getReturnType(), CalleeTy->getReturnType()),
- "cannot guarantee tail call due to mismatched return types", &CI);
+ Check(CallerTy->isVarArg() == CalleeTy->isVarArg(),
+ "cannot guarantee tail call due to mismatched varargs", &CI);
+ Check(isTypeCongruent(CallerTy->getReturnType(), CalleeTy->getReturnType()),
+ "cannot guarantee tail call due to mismatched return types", &CI);
// - The calling conventions of the caller and callee must match.
- Assert(F->getCallingConv() == CI.getCallingConv(),
- "cannot guarantee tail call due to mismatched calling conv", &CI);
+ Check(F->getCallingConv() == CI.getCallingConv(),
+ "cannot guarantee tail call due to mismatched calling conv", &CI);
// - The call must immediately precede a :ref:`ret <i_ret>` instruction,
// or a pointer bitcast followed by a ret instruction.
@@ -3426,19 +3498,18 @@ void Verifier::verifyMustTailCall(CallInst &CI) {
// Handle the optional bitcast.
if (BitCastInst *BI = dyn_cast_or_null<BitCastInst>(Next)) {
- Assert(BI->getOperand(0) == RetVal,
- "bitcast following musttail call must use the call", BI);
+ Check(BI->getOperand(0) == RetVal,
+ "bitcast following musttail call must use the call", BI);
RetVal = BI;
Next = BI->getNextNode();
}
// Check the return.
ReturnInst *Ret = dyn_cast_or_null<ReturnInst>(Next);
- Assert(Ret, "musttail call must precede a ret with an optional bitcast",
- &CI);
- Assert(!Ret->getReturnValue() || Ret->getReturnValue() == RetVal ||
- isa<UndefValue>(Ret->getReturnValue()),
- "musttail call result must be returned", Ret);
+ Check(Ret, "musttail call must precede a ret with an optional bitcast", &CI);
+ Check(!Ret->getReturnValue() || Ret->getReturnValue() == RetVal ||
+ isa<UndefValue>(Ret->getReturnValue()),
+ "musttail call result must be returned", Ret);
AttributeList CallerAttrs = F->getAttributes();
AttributeList CalleeAttrs = CI.getAttributes();
@@ -3460,8 +3531,8 @@ void Verifier::verifyMustTailCall(CallInst &CI) {
verifyTailCCMustTailAttrs(ABIAttrs, Context);
}
// - Varargs functions are not allowed
- Assert(!CallerTy->isVarArg(), Twine("cannot guarantee ") + CCName +
- " tail call for varargs function");
+ Check(!CallerTy->isVarArg(), Twine("cannot guarantee ") + CCName +
+ " tail call for varargs function");
return;
}
@@ -3469,11 +3540,10 @@ void Verifier::verifyMustTailCall(CallInst &CI) {
// parameters or return types may differ in pointee type, but not
// address space.
if (!CI.getCalledFunction() || !CI.getCalledFunction()->isIntrinsic()) {
- Assert(CallerTy->getNumParams() == CalleeTy->getNumParams(),
- "cannot guarantee tail call due to mismatched parameter counts",
- &CI);
+ Check(CallerTy->getNumParams() == CalleeTy->getNumParams(),
+ "cannot guarantee tail call due to mismatched parameter counts", &CI);
for (unsigned I = 0, E = CallerTy->getNumParams(); I != E; ++I) {
- Assert(
+ Check(
isTypeCongruent(CallerTy->getParamType(I), CalleeTy->getParamType(I)),
"cannot guarantee tail call due to mismatched parameter types", &CI);
}
@@ -3484,10 +3554,10 @@ void Verifier::verifyMustTailCall(CallInst &CI) {
for (unsigned I = 0, E = CallerTy->getNumParams(); I != E; ++I) {
AttrBuilder CallerABIAttrs = getParameterABIAttributes(F->getContext(), I, CallerAttrs);
AttrBuilder CalleeABIAttrs = getParameterABIAttributes(F->getContext(), I, CalleeAttrs);
- Assert(CallerABIAttrs == CalleeABIAttrs,
- "cannot guarantee tail call due to mismatched ABI impacting "
- "function attributes",
- &CI, CI.getOperand(I));
+ Check(CallerABIAttrs == CalleeABIAttrs,
+ "cannot guarantee tail call due to mismatched ABI impacting "
+ "function attributes",
+ &CI, CI.getOperand(I));
}
}
@@ -3503,7 +3573,7 @@ void Verifier::visitInvokeInst(InvokeInst &II) {
// Verify that the first non-PHI instruction of the unwind destination is an
// exception handling instruction.
- Assert(
+ Check(
II.getUnwindDest()->isEHPad(),
"The unwind destination does not have an exception handling instruction!",
&II);
@@ -3514,17 +3584,17 @@ void Verifier::visitInvokeInst(InvokeInst &II) {
/// visitUnaryOperator - Check the argument to the unary operator.
///
void Verifier::visitUnaryOperator(UnaryOperator &U) {
- Assert(U.getType() == U.getOperand(0)->getType(),
- "Unary operators must have same type for"
- "operands and result!",
- &U);
+ Check(U.getType() == U.getOperand(0)->getType(),
+ "Unary operators must have same type for"
+ "operands and result!",
+ &U);
switch (U.getOpcode()) {
// Check that floating-point arithmetic operators are only used with
// floating-point operands.
case Instruction::FNeg:
- Assert(U.getType()->isFPOrFPVectorTy(),
- "FNeg operator only works with float types!", &U);
+ Check(U.getType()->isFPOrFPVectorTy(),
+ "FNeg operator only works with float types!", &U);
break;
default:
llvm_unreachable("Unknown UnaryOperator opcode!");
@@ -3537,8 +3607,8 @@ void Verifier::visitUnaryOperator(UnaryOperator &U) {
/// of the same type!
///
void Verifier::visitBinaryOperator(BinaryOperator &B) {
- Assert(B.getOperand(0)->getType() == B.getOperand(1)->getType(),
- "Both operands to a binary operator are not of the same type!", &B);
+ Check(B.getOperand(0)->getType() == B.getOperand(1)->getType(),
+ "Both operands to a binary operator are not of the same type!", &B);
switch (B.getOpcode()) {
// Check that integer arithmetic operators are only used with
@@ -3550,12 +3620,12 @@ void Verifier::visitBinaryOperator(BinaryOperator &B) {
case Instruction::UDiv:
case Instruction::SRem:
case Instruction::URem:
- Assert(B.getType()->isIntOrIntVectorTy(),
- "Integer arithmetic operators only work with integral types!", &B);
- Assert(B.getType() == B.getOperand(0)->getType(),
- "Integer arithmetic operators must have same type "
- "for operands and result!",
- &B);
+ Check(B.getType()->isIntOrIntVectorTy(),
+ "Integer arithmetic operators only work with integral types!", &B);
+ Check(B.getType() == B.getOperand(0)->getType(),
+ "Integer arithmetic operators must have same type "
+ "for operands and result!",
+ &B);
break;
// Check that floating-point arithmetic operators are only used with
// floating-point operands.
@@ -3564,32 +3634,31 @@ void Verifier::visitBinaryOperator(BinaryOperator &B) {
case Instruction::FMul:
case Instruction::FDiv:
case Instruction::FRem:
- Assert(B.getType()->isFPOrFPVectorTy(),
- "Floating-point arithmetic operators only work with "
- "floating-point types!",
- &B);
- Assert(B.getType() == B.getOperand(0)->getType(),
- "Floating-point arithmetic operators must have same type "
- "for operands and result!",
- &B);
+ Check(B.getType()->isFPOrFPVectorTy(),
+ "Floating-point arithmetic operators only work with "
+ "floating-point types!",
+ &B);
+ Check(B.getType() == B.getOperand(0)->getType(),
+ "Floating-point arithmetic operators must have same type "
+ "for operands and result!",
+ &B);
break;
// Check that logical operators are only used with integral operands.
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
- Assert(B.getType()->isIntOrIntVectorTy(),
- "Logical operators only work with integral types!", &B);
- Assert(B.getType() == B.getOperand(0)->getType(),
- "Logical operators must have same type for operands and result!",
- &B);
+ Check(B.getType()->isIntOrIntVectorTy(),
+ "Logical operators only work with integral types!", &B);
+ Check(B.getType() == B.getOperand(0)->getType(),
+ "Logical operators must have same type for operands and result!", &B);
break;
case Instruction::Shl:
case Instruction::LShr:
case Instruction::AShr:
- Assert(B.getType()->isIntOrIntVectorTy(),
- "Shifts only work with integral types!", &B);
- Assert(B.getType() == B.getOperand(0)->getType(),
- "Shift return type must be same as operands!", &B);
+ Check(B.getType()->isIntOrIntVectorTy(),
+ "Shifts only work with integral types!", &B);
+ Check(B.getType() == B.getOperand(0)->getType(),
+ "Shift return type must be same as operands!", &B);
break;
default:
llvm_unreachable("Unknown BinaryOperator opcode!");
@@ -3602,14 +3671,13 @@ void Verifier::visitICmpInst(ICmpInst &IC) {
// Check that the operands are the same type
Type *Op0Ty = IC.getOperand(0)->getType();
Type *Op1Ty = IC.getOperand(1)->getType();
- Assert(Op0Ty == Op1Ty,
- "Both operands to ICmp instruction are not of the same type!", &IC);
+ Check(Op0Ty == Op1Ty,
+ "Both operands to ICmp instruction are not of the same type!", &IC);
// Check that the operands are the right type
- Assert(Op0Ty->isIntOrIntVectorTy() || Op0Ty->isPtrOrPtrVectorTy(),
- "Invalid operand types for ICmp instruction", &IC);
+ Check(Op0Ty->isIntOrIntVectorTy() || Op0Ty->isPtrOrPtrVectorTy(),
+ "Invalid operand types for ICmp instruction", &IC);
// Check that the predicate is valid.
- Assert(IC.isIntPredicate(),
- "Invalid predicate in ICmp instruction!", &IC);
+ Check(IC.isIntPredicate(), "Invalid predicate in ICmp instruction!", &IC);
visitInstruction(IC);
}
@@ -3618,63 +3686,61 @@ void Verifier::visitFCmpInst(FCmpInst &FC) {
// Check that the operands are the same type
Type *Op0Ty = FC.getOperand(0)->getType();
Type *Op1Ty = FC.getOperand(1)->getType();
- Assert(Op0Ty == Op1Ty,
- "Both operands to FCmp instruction are not of the same type!", &FC);
+ Check(Op0Ty == Op1Ty,
+ "Both operands to FCmp instruction are not of the same type!", &FC);
// Check that the operands are the right type
- Assert(Op0Ty->isFPOrFPVectorTy(),
- "Invalid operand types for FCmp instruction", &FC);
+ Check(Op0Ty->isFPOrFPVectorTy(), "Invalid operand types for FCmp instruction",
+ &FC);
// Check that the predicate is valid.
- Assert(FC.isFPPredicate(),
- "Invalid predicate in FCmp instruction!", &FC);
+ Check(FC.isFPPredicate(), "Invalid predicate in FCmp instruction!", &FC);
visitInstruction(FC);
}
void Verifier::visitExtractElementInst(ExtractElementInst &EI) {
- Assert(
- ExtractElementInst::isValidOperands(EI.getOperand(0), EI.getOperand(1)),
- "Invalid extractelement operands!", &EI);
+ Check(ExtractElementInst::isValidOperands(EI.getOperand(0), EI.getOperand(1)),
+ "Invalid extractelement operands!", &EI);
visitInstruction(EI);
}
void Verifier::visitInsertElementInst(InsertElementInst &IE) {
- Assert(InsertElementInst::isValidOperands(IE.getOperand(0), IE.getOperand(1),
- IE.getOperand(2)),
- "Invalid insertelement operands!", &IE);
+ Check(InsertElementInst::isValidOperands(IE.getOperand(0), IE.getOperand(1),
+ IE.getOperand(2)),
+ "Invalid insertelement operands!", &IE);
visitInstruction(IE);
}
void Verifier::visitShuffleVectorInst(ShuffleVectorInst &SV) {
- Assert(ShuffleVectorInst::isValidOperands(SV.getOperand(0), SV.getOperand(1),
- SV.getShuffleMask()),
- "Invalid shufflevector operands!", &SV);
+ Check(ShuffleVectorInst::isValidOperands(SV.getOperand(0), SV.getOperand(1),
+ SV.getShuffleMask()),
+ "Invalid shufflevector operands!", &SV);
visitInstruction(SV);
}
void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
Type *TargetTy = GEP.getPointerOperandType()->getScalarType();
- Assert(isa<PointerType>(TargetTy),
- "GEP base pointer is not a vector or a vector of pointers", &GEP);
- Assert(GEP.getSourceElementType()->isSized(), "GEP into unsized type!", &GEP);
+ Check(isa<PointerType>(TargetTy),
+ "GEP base pointer is not a vector or a vector of pointers", &GEP);
+ Check(GEP.getSourceElementType()->isSized(), "GEP into unsized type!", &GEP);
SmallVector<Value *, 16> Idxs(GEP.indices());
- Assert(all_of(
- Idxs, [](Value* V) { return V->getType()->isIntOrIntVectorTy(); }),
+ Check(
+ all_of(Idxs, [](Value *V) { return V->getType()->isIntOrIntVectorTy(); }),
"GEP indexes must be integers", &GEP);
Type *ElTy =
GetElementPtrInst::getIndexedType(GEP.getSourceElementType(), Idxs);
- Assert(ElTy, "Invalid indices for GEP pointer type!", &GEP);
+ Check(ElTy, "Invalid indices for GEP pointer type!", &GEP);
- Assert(GEP.getType()->isPtrOrPtrVectorTy() &&
- GEP.getResultElementType() == ElTy,
- "GEP is not of right type for indices!", &GEP, ElTy);
+ Check(GEP.getType()->isPtrOrPtrVectorTy() &&
+ GEP.getResultElementType() == ElTy,
+ "GEP is not of right type for indices!", &GEP, ElTy);
if (auto *GEPVTy = dyn_cast<VectorType>(GEP.getType())) {
// Additional checks for vector GEPs.
ElementCount GEPWidth = GEPVTy->getElementCount();
if (GEP.getPointerOperandType()->isVectorTy())
- Assert(
+ Check(
GEPWidth ==
cast<VectorType>(GEP.getPointerOperandType())->getElementCount(),
"Vector GEP result width doesn't match operand's", &GEP);
@@ -3682,16 +3748,16 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
Type *IndexTy = Idx->getType();
if (auto *IndexVTy = dyn_cast<VectorType>(IndexTy)) {
ElementCount IndexWidth = IndexVTy->getElementCount();
- Assert(IndexWidth == GEPWidth, "Invalid GEP index vector width", &GEP);
+ Check(IndexWidth == GEPWidth, "Invalid GEP index vector width", &GEP);
}
- Assert(IndexTy->isIntOrIntVectorTy(),
- "All GEP indices should be of integer type");
+ Check(IndexTy->isIntOrIntVectorTy(),
+ "All GEP indices should be of integer type");
}
}
if (auto *PTy = dyn_cast<PointerType>(GEP.getType())) {
- Assert(GEP.getAddressSpace() == PTy->getAddressSpace(),
- "GEP address space doesn't match type", &GEP);
+ Check(GEP.getAddressSpace() == PTy->getAddressSpace(),
+ "GEP address space doesn't match type", &GEP);
}
visitInstruction(GEP);
@@ -3706,33 +3772,33 @@ void Verifier::visitRangeMetadata(Instruction &I, MDNode *Range, Type *Ty) {
"precondition violation");
unsigned NumOperands = Range->getNumOperands();
- Assert(NumOperands % 2 == 0, "Unfinished range!", Range);
+ Check(NumOperands % 2 == 0, "Unfinished range!", Range);
unsigned NumRanges = NumOperands / 2;
- Assert(NumRanges >= 1, "It should have at least one range!", Range);
+ Check(NumRanges >= 1, "It should have at least one range!", Range);
ConstantRange LastRange(1, true); // Dummy initial value
for (unsigned i = 0; i < NumRanges; ++i) {
ConstantInt *Low =
mdconst::dyn_extract<ConstantInt>(Range->getOperand(2 * i));
- Assert(Low, "The lower limit must be an integer!", Low);
+ Check(Low, "The lower limit must be an integer!", Low);
ConstantInt *High =
mdconst::dyn_extract<ConstantInt>(Range->getOperand(2 * i + 1));
- Assert(High, "The upper limit must be an integer!", High);
- Assert(High->getType() == Low->getType() && High->getType() == Ty,
- "Range types must match instruction type!", &I);
+ Check(High, "The upper limit must be an integer!", High);
+ Check(High->getType() == Low->getType() && High->getType() == Ty,
+ "Range types must match instruction type!", &I);
APInt HighV = High->getValue();
APInt LowV = Low->getValue();
ConstantRange CurRange(LowV, HighV);
- Assert(!CurRange.isEmptySet() && !CurRange.isFullSet(),
- "Range must not be empty!", Range);
+ Check(!CurRange.isEmptySet() && !CurRange.isFullSet(),
+ "Range must not be empty!", Range);
if (i != 0) {
- Assert(CurRange.intersectWith(LastRange).isEmptySet(),
- "Intervals are overlapping", Range);
- Assert(LowV.sgt(LastRange.getLower()), "Intervals are not in order",
- Range);
- Assert(!isContiguous(CurRange, LastRange), "Intervals are contiguous",
- Range);
+ Check(CurRange.intersectWith(LastRange).isEmptySet(),
+ "Intervals are overlapping", Range);
+ Check(LowV.sgt(LastRange.getLower()), "Intervals are not in order",
+ Range);
+ Check(!isContiguous(CurRange, LastRange), "Intervals are contiguous",
+ Range);
}
LastRange = ConstantRange(LowV, HighV);
}
@@ -3742,41 +3808,41 @@ void Verifier::visitRangeMetadata(Instruction &I, MDNode *Range, Type *Ty) {
APInt FirstHigh =
mdconst::dyn_extract<ConstantInt>(Range->getOperand(1))->getValue();
ConstantRange FirstRange(FirstLow, FirstHigh);
- Assert(FirstRange.intersectWith(LastRange).isEmptySet(),
- "Intervals are overlapping", Range);
- Assert(!isContiguous(FirstRange, LastRange), "Intervals are contiguous",
- Range);
+ Check(FirstRange.intersectWith(LastRange).isEmptySet(),
+ "Intervals are overlapping", Range);
+ Check(!isContiguous(FirstRange, LastRange), "Intervals are contiguous",
+ Range);
}
}
void Verifier::checkAtomicMemAccessSize(Type *Ty, const Instruction *I) {
unsigned Size = DL.getTypeSizeInBits(Ty);
- Assert(Size >= 8, "atomic memory access' size must be byte-sized", Ty, I);
- Assert(!(Size & (Size - 1)),
- "atomic memory access' operand must have a power-of-two size", Ty, I);
+ Check(Size >= 8, "atomic memory access' size must be byte-sized", Ty, I);
+ Check(!(Size & (Size - 1)),
+ "atomic memory access' operand must have a power-of-two size", Ty, I);
}
void Verifier::visitLoadInst(LoadInst &LI) {
PointerType *PTy = dyn_cast<PointerType>(LI.getOperand(0)->getType());
- Assert(PTy, "Load operand must be a pointer.", &LI);
+ Check(PTy, "Load operand must be a pointer.", &LI);
Type *ElTy = LI.getType();
if (MaybeAlign A = LI.getAlign()) {
- Assert(A->value() <= Value::MaximumAlignment,
- "huge alignment values are unsupported", &LI);
+ Check(A->value() <= Value::MaximumAlignment,
+ "huge alignment values are unsupported", &LI);
}
- Assert(ElTy->isSized(), "loading unsized types is not allowed", &LI);
+ Check(ElTy->isSized(), "loading unsized types is not allowed", &LI);
if (LI.isAtomic()) {
- Assert(LI.getOrdering() != AtomicOrdering::Release &&
- LI.getOrdering() != AtomicOrdering::AcquireRelease,
- "Load cannot have Release ordering", &LI);
- Assert(ElTy->isIntOrPtrTy() || ElTy->isFloatingPointTy(),
- "atomic load operand must have integer, pointer, or floating point "
- "type!",
- ElTy, &LI);
+ Check(LI.getOrdering() != AtomicOrdering::Release &&
+ LI.getOrdering() != AtomicOrdering::AcquireRelease,
+ "Load cannot have Release ordering", &LI);
+ Check(ElTy->isIntOrPtrTy() || ElTy->isFloatingPointTy(),
+ "atomic load operand must have integer, pointer, or floating point "
+ "type!",
+ ElTy, &LI);
checkAtomicMemAccessSize(ElTy, &LI);
} else {
- Assert(LI.getSyncScopeID() == SyncScope::System,
- "Non-atomic load cannot have SynchronizationScope specified", &LI);
+ Check(LI.getSyncScopeID() == SyncScope::System,
+ "Non-atomic load cannot have SynchronizationScope specified", &LI);
}
visitInstruction(LI);
@@ -3784,27 +3850,27 @@ void Verifier::visitLoadInst(LoadInst &LI) {
void Verifier::visitStoreInst(StoreInst &SI) {
PointerType *PTy = dyn_cast<PointerType>(SI.getOperand(1)->getType());
- Assert(PTy, "Store operand must be a pointer.", &SI);
+ Check(PTy, "Store operand must be a pointer.", &SI);
Type *ElTy = SI.getOperand(0)->getType();
- Assert(PTy->isOpaqueOrPointeeTypeMatches(ElTy),
- "Stored value type does not match pointer operand type!", &SI, ElTy);
+ Check(PTy->isOpaqueOrPointeeTypeMatches(ElTy),
+ "Stored value type does not match pointer operand type!", &SI, ElTy);
if (MaybeAlign A = SI.getAlign()) {
- Assert(A->value() <= Value::MaximumAlignment,
- "huge alignment values are unsupported", &SI);
+ Check(A->value() <= Value::MaximumAlignment,
+ "huge alignment values are unsupported", &SI);
}
- Assert(ElTy->isSized(), "storing unsized types is not allowed", &SI);
+ Check(ElTy->isSized(), "storing unsized types is not allowed", &SI);
if (SI.isAtomic()) {
- Assert(SI.getOrdering() != AtomicOrdering::Acquire &&
- SI.getOrdering() != AtomicOrdering::AcquireRelease,
- "Store cannot have Acquire ordering", &SI);
- Assert(ElTy->isIntOrPtrTy() || ElTy->isFloatingPointTy(),
- "atomic store operand must have integer, pointer, or floating point "
- "type!",
- ElTy, &SI);
+ Check(SI.getOrdering() != AtomicOrdering::Acquire &&
+ SI.getOrdering() != AtomicOrdering::AcquireRelease,
+ "Store cannot have Acquire ordering", &SI);
+ Check(ElTy->isIntOrPtrTy() || ElTy->isFloatingPointTy(),
+ "atomic store operand must have integer, pointer, or floating point "
+ "type!",
+ ElTy, &SI);
checkAtomicMemAccessSize(ElTy, &SI);
} else {
- Assert(SI.getSyncScopeID() == SyncScope::System,
- "Non-atomic store cannot have SynchronizationScope specified", &SI);
+ Check(SI.getSyncScopeID() == SyncScope::System,
+ "Non-atomic store cannot have SynchronizationScope specified", &SI);
}
visitInstruction(SI);
}
@@ -3814,10 +3880,10 @@ void Verifier::verifySwiftErrorCall(CallBase &Call,
const Value *SwiftErrorVal) {
for (const auto &I : llvm::enumerate(Call.args())) {
if (I.value() == SwiftErrorVal) {
- Assert(Call.paramHasAttr(I.index(), Attribute::SwiftError),
- "swifterror value when used in a callsite should be marked "
- "with swifterror attribute",
- SwiftErrorVal, Call);
+ Check(Call.paramHasAttr(I.index(), Attribute::SwiftError),
+ "swifterror value when used in a callsite should be marked "
+ "with swifterror attribute",
+ SwiftErrorVal, Call);
}
}
}
@@ -3826,16 +3892,17 @@ void Verifier::verifySwiftErrorValue(const Value *SwiftErrorVal) {
// Check that swifterror value is only used by loads, stores, or as
// a swifterror argument.
for (const User *U : SwiftErrorVal->users()) {
- Assert(isa<LoadInst>(U) || isa<StoreInst>(U) || isa<CallInst>(U) ||
- isa<InvokeInst>(U),
- "swifterror value can only be loaded and stored from, or "
- "as a swifterror argument!",
- SwiftErrorVal, U);
+ Check(isa<LoadInst>(U) || isa<StoreInst>(U) || isa<CallInst>(U) ||
+ isa<InvokeInst>(U),
+ "swifterror value can only be loaded and stored from, or "
+ "as a swifterror argument!",
+ SwiftErrorVal, U);
// If it is used by a store, check it is the second operand.
if (auto StoreI = dyn_cast<StoreInst>(U))
- Assert(StoreI->getOperand(1) == SwiftErrorVal,
- "swifterror value should be the second operand when used "
- "by stores", SwiftErrorVal, U);
+ Check(StoreI->getOperand(1) == SwiftErrorVal,
+ "swifterror value should be the second operand when used "
+ "by stores",
+ SwiftErrorVal, U);
if (auto *Call = dyn_cast<CallBase>(U))
verifySwiftErrorCall(*const_cast<CallBase *>(Call), SwiftErrorVal);
}
@@ -3843,16 +3910,20 @@ void Verifier::verifySwiftErrorValue(const Value *SwiftErrorVal) {
void Verifier::visitAllocaInst(AllocaInst &AI) {
SmallPtrSet<Type*, 4> Visited;
- Assert(AI.getAllocatedType()->isSized(&Visited),
- "Cannot allocate unsized type", &AI);
- Assert(AI.getArraySize()->getType()->isIntegerTy(),
- "Alloca array size must have integer type", &AI);
+ Check(AI.getAllocatedType()->isSized(&Visited),
+ "Cannot allocate unsized type", &AI);
+ Check(AI.getArraySize()->getType()->isIntegerTy(),
+ "Alloca array size must have integer type", &AI);
if (MaybeAlign A = AI.getAlign()) {
- Assert(A->value() <= Value::MaximumAlignment,
- "huge alignment values are unsupported", &AI);
+ Check(A->value() <= Value::MaximumAlignment,
+ "huge alignment values are unsupported", &AI);
}
if (AI.isSwiftError()) {
+ Check(AI.getAllocatedType()->isPointerTy(),
+ "swifterror alloca must have pointer type", &AI);
+ Check(!AI.isArrayAllocation(),
+ "swifterror alloca must not be array allocation", &AI);
verifySwiftErrorValue(&AI);
}
@@ -3861,64 +3932,65 @@ void Verifier::visitAllocaInst(AllocaInst &AI) {
void Verifier::visitAtomicCmpXchgInst(AtomicCmpXchgInst &CXI) {
Type *ElTy = CXI.getOperand(1)->getType();
- Assert(ElTy->isIntOrPtrTy(),
- "cmpxchg operand must have integer or pointer type", ElTy, &CXI);
+ Check(ElTy->isIntOrPtrTy(),
+ "cmpxchg operand must have integer or pointer type", ElTy, &CXI);
checkAtomicMemAccessSize(ElTy, &CXI);
visitInstruction(CXI);
}
void Verifier::visitAtomicRMWInst(AtomicRMWInst &RMWI) {
- Assert(RMWI.getOrdering() != AtomicOrdering::Unordered,
- "atomicrmw instructions cannot be unordered.", &RMWI);
+ Check(RMWI.getOrdering() != AtomicOrdering::Unordered,
+ "atomicrmw instructions cannot be unordered.", &RMWI);
auto Op = RMWI.getOperation();
Type *ElTy = RMWI.getOperand(1)->getType();
if (Op == AtomicRMWInst::Xchg) {
- Assert(ElTy->isIntegerTy() || ElTy->isFloatingPointTy(), "atomicrmw " +
- AtomicRMWInst::getOperationName(Op) +
- " operand must have integer or floating point type!",
- &RMWI, ElTy);
+ Check(ElTy->isIntegerTy() || ElTy->isFloatingPointTy() ||
+ ElTy->isPointerTy(),
+ "atomicrmw " + AtomicRMWInst::getOperationName(Op) +
+ " operand must have integer or floating point type!",
+ &RMWI, ElTy);
} else if (AtomicRMWInst::isFPOperation(Op)) {
- Assert(ElTy->isFloatingPointTy(), "atomicrmw " +
- AtomicRMWInst::getOperationName(Op) +
- " operand must have floating point type!",
- &RMWI, ElTy);
+ Check(ElTy->isFloatingPointTy(),
+ "atomicrmw " + AtomicRMWInst::getOperationName(Op) +
+ " operand must have floating point type!",
+ &RMWI, ElTy);
} else {
- Assert(ElTy->isIntegerTy(), "atomicrmw " +
- AtomicRMWInst::getOperationName(Op) +
- " operand must have integer type!",
- &RMWI, ElTy);
+ Check(ElTy->isIntegerTy(),
+ "atomicrmw " + AtomicRMWInst::getOperationName(Op) +
+ " operand must have integer type!",
+ &RMWI, ElTy);
}
checkAtomicMemAccessSize(ElTy, &RMWI);
- Assert(AtomicRMWInst::FIRST_BINOP <= Op && Op <= AtomicRMWInst::LAST_BINOP,
- "Invalid binary operation!", &RMWI);
+ Check(AtomicRMWInst::FIRST_BINOP <= Op && Op <= AtomicRMWInst::LAST_BINOP,
+ "Invalid binary operation!", &RMWI);
visitInstruction(RMWI);
}
void Verifier::visitFenceInst(FenceInst &FI) {
const AtomicOrdering Ordering = FI.getOrdering();
- Assert(Ordering == AtomicOrdering::Acquire ||
- Ordering == AtomicOrdering::Release ||
- Ordering == AtomicOrdering::AcquireRelease ||
- Ordering == AtomicOrdering::SequentiallyConsistent,
- "fence instructions may only have acquire, release, acq_rel, or "
- "seq_cst ordering.",
- &FI);
+ Check(Ordering == AtomicOrdering::Acquire ||
+ Ordering == AtomicOrdering::Release ||
+ Ordering == AtomicOrdering::AcquireRelease ||
+ Ordering == AtomicOrdering::SequentiallyConsistent,
+ "fence instructions may only have acquire, release, acq_rel, or "
+ "seq_cst ordering.",
+ &FI);
visitInstruction(FI);
}
void Verifier::visitExtractValueInst(ExtractValueInst &EVI) {
- Assert(ExtractValueInst::getIndexedType(EVI.getAggregateOperand()->getType(),
- EVI.getIndices()) == EVI.getType(),
- "Invalid ExtractValueInst operands!", &EVI);
+ Check(ExtractValueInst::getIndexedType(EVI.getAggregateOperand()->getType(),
+ EVI.getIndices()) == EVI.getType(),
+ "Invalid ExtractValueInst operands!", &EVI);
visitInstruction(EVI);
}
void Verifier::visitInsertValueInst(InsertValueInst &IVI) {
- Assert(ExtractValueInst::getIndexedType(IVI.getAggregateOperand()->getType(),
- IVI.getIndices()) ==
- IVI.getOperand(1)->getType(),
- "Invalid InsertValueInst operands!", &IVI);
+ Check(ExtractValueInst::getIndexedType(IVI.getAggregateOperand()->getType(),
+ IVI.getIndices()) ==
+ IVI.getOperand(1)->getType(),
+ "Invalid InsertValueInst operands!", &IVI);
visitInstruction(IVI);
}
@@ -3936,7 +4008,7 @@ void Verifier::visitEHPadPredecessors(Instruction &I) {
BasicBlock *BB = I.getParent();
Function *F = BB->getParent();
- Assert(BB != &F->getEntryBlock(), "EH pad cannot be in entry block.", &I);
+ Check(BB != &F->getEntryBlock(), "EH pad cannot be in entry block.", &I);
if (auto *LPI = dyn_cast<LandingPadInst>(&I)) {
// The landingpad instruction defines its parent as a landing pad block. The
@@ -3944,22 +4016,22 @@ void Verifier::visitEHPadPredecessors(Instruction &I) {
// invoke.
for (BasicBlock *PredBB : predecessors(BB)) {
const auto *II = dyn_cast<InvokeInst>(PredBB->getTerminator());
- Assert(II && II->getUnwindDest() == BB && II->getNormalDest() != BB,
- "Block containing LandingPadInst must be jumped to "
- "only by the unwind edge of an invoke.",
- LPI);
+ Check(II && II->getUnwindDest() == BB && II->getNormalDest() != BB,
+ "Block containing LandingPadInst must be jumped to "
+ "only by the unwind edge of an invoke.",
+ LPI);
}
return;
}
if (auto *CPI = dyn_cast<CatchPadInst>(&I)) {
if (!pred_empty(BB))
- Assert(BB->getUniquePredecessor() == CPI->getCatchSwitch()->getParent(),
- "Block containg CatchPadInst must be jumped to "
- "only by its catchswitch.",
- CPI);
- Assert(BB != CPI->getCatchSwitch()->getUnwindDest(),
- "Catchswitch cannot unwind to one of its catchpads",
- CPI->getCatchSwitch(), CPI);
+ Check(BB->getUniquePredecessor() == CPI->getCatchSwitch()->getParent(),
+ "Block containg CatchPadInst must be jumped to "
+ "only by its catchswitch.",
+ CPI);
+ Check(BB != CPI->getCatchSwitch()->getUnwindDest(),
+ "Catchswitch cannot unwind to one of its catchpads",
+ CPI->getCatchSwitch(), CPI);
return;
}
@@ -3971,39 +4043,39 @@ void Verifier::visitEHPadPredecessors(Instruction &I) {
Instruction *TI = PredBB->getTerminator();
Value *FromPad;
if (auto *II = dyn_cast<InvokeInst>(TI)) {
- Assert(II->getUnwindDest() == BB && II->getNormalDest() != BB,
- "EH pad must be jumped to via an unwind edge", ToPad, II);
+ Check(II->getUnwindDest() == BB && II->getNormalDest() != BB,
+ "EH pad must be jumped to via an unwind edge", ToPad, II);
if (auto Bundle = II->getOperandBundle(LLVMContext::OB_funclet))
FromPad = Bundle->Inputs[0];
else
FromPad = ConstantTokenNone::get(II->getContext());
} else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
FromPad = CRI->getOperand(0);
- Assert(FromPad != ToPadParent, "A cleanupret must exit its cleanup", CRI);
+ Check(FromPad != ToPadParent, "A cleanupret must exit its cleanup", CRI);
} else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
FromPad = CSI;
} else {
- Assert(false, "EH pad must be jumped to via an unwind edge", ToPad, TI);
+ Check(false, "EH pad must be jumped to via an unwind edge", ToPad, TI);
}
// The edge may exit from zero or more nested pads.
SmallSet<Value *, 8> Seen;
for (;; FromPad = getParentPad(FromPad)) {
- Assert(FromPad != ToPad,
- "EH pad cannot handle exceptions raised within it", FromPad, TI);
+ Check(FromPad != ToPad,
+ "EH pad cannot handle exceptions raised within it", FromPad, TI);
if (FromPad == ToPadParent) {
// This is a legal unwind edge.
break;
}
- Assert(!isa<ConstantTokenNone>(FromPad),
- "A single unwind edge may only enter one EH pad", TI);
- Assert(Seen.insert(FromPad).second,
- "EH pad jumps through a cycle of pads", FromPad);
+ Check(!isa<ConstantTokenNone>(FromPad),
+ "A single unwind edge may only enter one EH pad", TI);
+ Check(Seen.insert(FromPad).second, "EH pad jumps through a cycle of pads",
+ FromPad);
// This will be diagnosed on the corresponding instruction already. We
// need the extra check here to make sure getParentPad() works.
- Assert(isa<FuncletPadInst>(FromPad) || isa<CatchSwitchInst>(FromPad),
- "Parent pad must be catchpad/cleanuppad/catchswitch", TI);
+ Check(isa<FuncletPadInst>(FromPad) || isa<CatchSwitchInst>(FromPad),
+ "Parent pad must be catchpad/cleanuppad/catchswitch", TI);
}
}
}
@@ -4011,38 +4083,37 @@ void Verifier::visitEHPadPredecessors(Instruction &I) {
void Verifier::visitLandingPadInst(LandingPadInst &LPI) {
// The landingpad instruction is ill-formed if it doesn't have any clauses and
// isn't a cleanup.
- Assert(LPI.getNumClauses() > 0 || LPI.isCleanup(),
- "LandingPadInst needs at least one clause or to be a cleanup.", &LPI);
+ Check(LPI.getNumClauses() > 0 || LPI.isCleanup(),
+ "LandingPadInst needs at least one clause or to be a cleanup.", &LPI);
visitEHPadPredecessors(LPI);
if (!LandingPadResultTy)
LandingPadResultTy = LPI.getType();
else
- Assert(LandingPadResultTy == LPI.getType(),
- "The landingpad instruction should have a consistent result type "
- "inside a function.",
- &LPI);
+ Check(LandingPadResultTy == LPI.getType(),
+ "The landingpad instruction should have a consistent result type "
+ "inside a function.",
+ &LPI);
Function *F = LPI.getParent()->getParent();
- Assert(F->hasPersonalityFn(),
- "LandingPadInst needs to be in a function with a personality.", &LPI);
+ Check(F->hasPersonalityFn(),
+ "LandingPadInst needs to be in a function with a personality.", &LPI);
// The landingpad instruction must be the first non-PHI instruction in the
// block.
- Assert(LPI.getParent()->getLandingPadInst() == &LPI,
- "LandingPadInst not the first non-PHI instruction in the block.",
- &LPI);
+ Check(LPI.getParent()->getLandingPadInst() == &LPI,
+ "LandingPadInst not the first non-PHI instruction in the block.", &LPI);
for (unsigned i = 0, e = LPI.getNumClauses(); i < e; ++i) {
Constant *Clause = LPI.getClause(i);
if (LPI.isCatch(i)) {
- Assert(isa<PointerType>(Clause->getType()),
- "Catch operand does not have pointer type!", &LPI);
+ Check(isa<PointerType>(Clause->getType()),
+ "Catch operand does not have pointer type!", &LPI);
} else {
- Assert(LPI.isFilter(i), "Clause is neither catch nor filter!", &LPI);
- Assert(isa<ConstantArray>(Clause) || isa<ConstantAggregateZero>(Clause),
- "Filter operand is not an array of constants!", &LPI);
+ Check(LPI.isFilter(i), "Clause is neither catch nor filter!", &LPI);
+ Check(isa<ConstantArray>(Clause) || isa<ConstantAggregateZero>(Clause),
+ "Filter operand is not an array of constants!", &LPI);
}
}
@@ -4050,16 +4121,16 @@ void Verifier::visitLandingPadInst(LandingPadInst &LPI) {
}
void Verifier::visitResumeInst(ResumeInst &RI) {
- Assert(RI.getFunction()->hasPersonalityFn(),
- "ResumeInst needs to be in a function with a personality.", &RI);
+ Check(RI.getFunction()->hasPersonalityFn(),
+ "ResumeInst needs to be in a function with a personality.", &RI);
if (!LandingPadResultTy)
LandingPadResultTy = RI.getValue()->getType();
else
- Assert(LandingPadResultTy == RI.getValue()->getType(),
- "The resume instruction should have a consistent result type "
- "inside a function.",
- &RI);
+ Check(LandingPadResultTy == RI.getValue()->getType(),
+ "The resume instruction should have a consistent result type "
+ "inside a function.",
+ &RI);
visitTerminator(RI);
}
@@ -4068,26 +4139,26 @@ void Verifier::visitCatchPadInst(CatchPadInst &CPI) {
BasicBlock *BB = CPI.getParent();
Function *F = BB->getParent();
- Assert(F->hasPersonalityFn(),
- "CatchPadInst needs to be in a function with a personality.", &CPI);
+ Check(F->hasPersonalityFn(),
+ "CatchPadInst needs to be in a function with a personality.", &CPI);
- Assert(isa<CatchSwitchInst>(CPI.getParentPad()),
- "CatchPadInst needs to be directly nested in a CatchSwitchInst.",
- CPI.getParentPad());
+ Check(isa<CatchSwitchInst>(CPI.getParentPad()),
+ "CatchPadInst needs to be directly nested in a CatchSwitchInst.",
+ CPI.getParentPad());
// The catchpad instruction must be the first non-PHI instruction in the
// block.
- Assert(BB->getFirstNonPHI() == &CPI,
- "CatchPadInst not the first non-PHI instruction in the block.", &CPI);
+ Check(BB->getFirstNonPHI() == &CPI,
+ "CatchPadInst not the first non-PHI instruction in the block.", &CPI);
visitEHPadPredecessors(CPI);
visitFuncletPadInst(CPI);
}
void Verifier::visitCatchReturnInst(CatchReturnInst &CatchReturn) {
- Assert(isa<CatchPadInst>(CatchReturn.getOperand(0)),
- "CatchReturnInst needs to be provided a CatchPad", &CatchReturn,
- CatchReturn.getOperand(0));
+ Check(isa<CatchPadInst>(CatchReturn.getOperand(0)),
+ "CatchReturnInst needs to be provided a CatchPad", &CatchReturn,
+ CatchReturn.getOperand(0));
visitTerminator(CatchReturn);
}
@@ -4096,18 +4167,17 @@ void Verifier::visitCleanupPadInst(CleanupPadInst &CPI) {
BasicBlock *BB = CPI.getParent();
Function *F = BB->getParent();
- Assert(F->hasPersonalityFn(),
- "CleanupPadInst needs to be in a function with a personality.", &CPI);
+ Check(F->hasPersonalityFn(),
+ "CleanupPadInst needs to be in a function with a personality.", &CPI);
// The cleanuppad instruction must be the first non-PHI instruction in the
// block.
- Assert(BB->getFirstNonPHI() == &CPI,
- "CleanupPadInst not the first non-PHI instruction in the block.",
- &CPI);
+ Check(BB->getFirstNonPHI() == &CPI,
+ "CleanupPadInst not the first non-PHI instruction in the block.", &CPI);
auto *ParentPad = CPI.getParentPad();
- Assert(isa<ConstantTokenNone>(ParentPad) || isa<FuncletPadInst>(ParentPad),
- "CleanupPadInst has an invalid parent.", &CPI);
+ Check(isa<ConstantTokenNone>(ParentPad) || isa<FuncletPadInst>(ParentPad),
+ "CleanupPadInst has an invalid parent.", &CPI);
visitEHPadPredecessors(CPI);
visitFuncletPadInst(CPI);
@@ -4121,8 +4191,8 @@ void Verifier::visitFuncletPadInst(FuncletPadInst &FPI) {
while (!Worklist.empty()) {
FuncletPadInst *CurrentPad = Worklist.pop_back_val();
- Assert(Seen.insert(CurrentPad).second,
- "FuncletPadInst must not be nested within itself", CurrentPad);
+ Check(Seen.insert(CurrentPad).second,
+ "FuncletPadInst must not be nested within itself", CurrentPad);
Value *UnresolvedAncestorPad = nullptr;
for (User *U : CurrentPad->users()) {
BasicBlock *UnwindDest;
@@ -4150,7 +4220,7 @@ void Verifier::visitFuncletPadInst(FuncletPadInst &FPI) {
Worklist.push_back(CPI);
continue;
} else {
- Assert(isa<CatchReturnInst>(U), "Bogus funclet pad use", U);
+ Check(isa<CatchReturnInst>(U), "Bogus funclet pad use", U);
continue;
}
@@ -4200,10 +4270,11 @@ void Verifier::visitFuncletPadInst(FuncletPadInst &FPI) {
// This unwind edge exits FPI. Make sure it agrees with other
// such edges.
if (FirstUser) {
- Assert(UnwindPad == FirstUnwindPad, "Unwind edges out of a funclet "
- "pad must have the same unwind "
- "dest",
- &FPI, U, FirstUser);
+ Check(UnwindPad == FirstUnwindPad,
+ "Unwind edges out of a funclet "
+ "pad must have the same unwind "
+ "dest",
+ &FPI, U, FirstUser);
} else {
FirstUser = U;
FirstUnwindPad = UnwindPad;
@@ -4262,10 +4333,10 @@ void Verifier::visitFuncletPadInst(FuncletPadInst &FPI) {
SwitchUnwindPad = SwitchUnwindDest->getFirstNonPHI();
else
SwitchUnwindPad = ConstantTokenNone::get(FPI.getContext());
- Assert(SwitchUnwindPad == FirstUnwindPad,
- "Unwind edges out of a catch must have the same unwind dest as "
- "the parent catchswitch",
- &FPI, FirstUser, CatchSwitch);
+ Check(SwitchUnwindPad == FirstUnwindPad,
+ "Unwind edges out of a catch must have the same unwind dest as "
+ "the parent catchswitch",
+ &FPI, FirstUser, CatchSwitch);
}
}
@@ -4276,38 +4347,38 @@ void Verifier::visitCatchSwitchInst(CatchSwitchInst &CatchSwitch) {
BasicBlock *BB = CatchSwitch.getParent();
Function *F = BB->getParent();
- Assert(F->hasPersonalityFn(),
- "CatchSwitchInst needs to be in a function with a personality.",
- &CatchSwitch);
+ Check(F->hasPersonalityFn(),
+ "CatchSwitchInst needs to be in a function with a personality.",
+ &CatchSwitch);
// The catchswitch instruction must be the first non-PHI instruction in the
// block.
- Assert(BB->getFirstNonPHI() == &CatchSwitch,
- "CatchSwitchInst not the first non-PHI instruction in the block.",
- &CatchSwitch);
+ Check(BB->getFirstNonPHI() == &CatchSwitch,
+ "CatchSwitchInst not the first non-PHI instruction in the block.",
+ &CatchSwitch);
auto *ParentPad = CatchSwitch.getParentPad();
- Assert(isa<ConstantTokenNone>(ParentPad) || isa<FuncletPadInst>(ParentPad),
- "CatchSwitchInst has an invalid parent.", ParentPad);
+ Check(isa<ConstantTokenNone>(ParentPad) || isa<FuncletPadInst>(ParentPad),
+ "CatchSwitchInst has an invalid parent.", ParentPad);
if (BasicBlock *UnwindDest = CatchSwitch.getUnwindDest()) {
Instruction *I = UnwindDest->getFirstNonPHI();
- Assert(I->isEHPad() && !isa<LandingPadInst>(I),
- "CatchSwitchInst must unwind to an EH block which is not a "
- "landingpad.",
- &CatchSwitch);
+ Check(I->isEHPad() && !isa<LandingPadInst>(I),
+ "CatchSwitchInst must unwind to an EH block which is not a "
+ "landingpad.",
+ &CatchSwitch);
// Record catchswitch sibling unwinds for verifySiblingFuncletUnwinds
if (getParentPad(I) == ParentPad)
SiblingFuncletInfo[&CatchSwitch] = &CatchSwitch;
}
- Assert(CatchSwitch.getNumHandlers() != 0,
- "CatchSwitchInst cannot have empty handler list", &CatchSwitch);
+ Check(CatchSwitch.getNumHandlers() != 0,
+ "CatchSwitchInst cannot have empty handler list", &CatchSwitch);
for (BasicBlock *Handler : CatchSwitch.handlers()) {
- Assert(isa<CatchPadInst>(Handler->getFirstNonPHI()),
- "CatchSwitchInst handlers must be catchpads", &CatchSwitch, Handler);
+ Check(isa<CatchPadInst>(Handler->getFirstNonPHI()),
+ "CatchSwitchInst handlers must be catchpads", &CatchSwitch, Handler);
}
visitEHPadPredecessors(CatchSwitch);
@@ -4315,16 +4386,16 @@ void Verifier::visitCatchSwitchInst(CatchSwitchInst &CatchSwitch) {
}
void Verifier::visitCleanupReturnInst(CleanupReturnInst &CRI) {
- Assert(isa<CleanupPadInst>(CRI.getOperand(0)),
- "CleanupReturnInst needs to be provided a CleanupPad", &CRI,
- CRI.getOperand(0));
+ Check(isa<CleanupPadInst>(CRI.getOperand(0)),
+ "CleanupReturnInst needs to be provided a CleanupPad", &CRI,
+ CRI.getOperand(0));
if (BasicBlock *UnwindDest = CRI.getUnwindDest()) {
Instruction *I = UnwindDest->getFirstNonPHI();
- Assert(I->isEHPad() && !isa<LandingPadInst>(I),
- "CleanupReturnInst must unwind to an EH block which is not a "
- "landingpad.",
- &CRI);
+ Check(I->isEHPad() && !isa<LandingPadInst>(I),
+ "CleanupReturnInst must unwind to an EH block which is not a "
+ "landingpad.",
+ &CRI);
}
visitTerminator(CRI);
@@ -4351,39 +4422,45 @@ void Verifier::verifyDominatesUse(Instruction &I, unsigned i) {
return;
const Use &U = I.getOperandUse(i);
- Assert(DT.dominates(Op, U),
- "Instruction does not dominate all uses!", Op, &I);
+ Check(DT.dominates(Op, U), "Instruction does not dominate all uses!", Op, &I);
}
void Verifier::visitDereferenceableMetadata(Instruction& I, MDNode* MD) {
- Assert(I.getType()->isPointerTy(), "dereferenceable, dereferenceable_or_null "
- "apply only to pointer types", &I);
- Assert((isa<LoadInst>(I) || isa<IntToPtrInst>(I)),
- "dereferenceable, dereferenceable_or_null apply only to load"
- " and inttoptr instructions, use attributes for calls or invokes", &I);
- Assert(MD->getNumOperands() == 1, "dereferenceable, dereferenceable_or_null "
- "take one operand!", &I);
+ Check(I.getType()->isPointerTy(),
+ "dereferenceable, dereferenceable_or_null "
+ "apply only to pointer types",
+ &I);
+ Check((isa<LoadInst>(I) || isa<IntToPtrInst>(I)),
+ "dereferenceable, dereferenceable_or_null apply only to load"
+ " and inttoptr instructions, use attributes for calls or invokes",
+ &I);
+ Check(MD->getNumOperands() == 1,
+ "dereferenceable, dereferenceable_or_null "
+ "take one operand!",
+ &I);
ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(MD->getOperand(0));
- Assert(CI && CI->getType()->isIntegerTy(64), "dereferenceable, "
- "dereferenceable_or_null metadata value must be an i64!", &I);
+ Check(CI && CI->getType()->isIntegerTy(64),
+ "dereferenceable, "
+ "dereferenceable_or_null metadata value must be an i64!",
+ &I);
}
void Verifier::visitProfMetadata(Instruction &I, MDNode *MD) {
- Assert(MD->getNumOperands() >= 2,
- "!prof annotations should have no less than 2 operands", MD);
+ Check(MD->getNumOperands() >= 2,
+ "!prof annotations should have no less than 2 operands", MD);
// Check first operand.
- Assert(MD->getOperand(0) != nullptr, "first operand should not be null", MD);
- Assert(isa<MDString>(MD->getOperand(0)),
- "expected string with name of the !prof annotation", MD);
+ Check(MD->getOperand(0) != nullptr, "first operand should not be null", MD);
+ Check(isa<MDString>(MD->getOperand(0)),
+ "expected string with name of the !prof annotation", MD);
MDString *MDS = cast<MDString>(MD->getOperand(0));
StringRef ProfName = MDS->getString();
// Check consistency of !prof branch_weights metadata.
if (ProfName.equals("branch_weights")) {
if (isa<InvokeInst>(&I)) {
- Assert(MD->getNumOperands() == 2 || MD->getNumOperands() == 3,
- "Wrong number of InvokeInst branch_weights operands", MD);
+ Check(MD->getNumOperands() == 2 || MD->getNumOperands() == 3,
+ "Wrong number of InvokeInst branch_weights operands", MD);
} else {
unsigned ExpectedNumOperands = 0;
if (BranchInst *BI = dyn_cast<BranchInst>(&I))
@@ -4400,94 +4477,112 @@ void Verifier::visitProfMetadata(Instruction &I, MDNode *MD) {
CheckFailed("!prof branch_weights are not allowed for this instruction",
MD);
- Assert(MD->getNumOperands() == 1 + ExpectedNumOperands,
- "Wrong number of operands", MD);
+ Check(MD->getNumOperands() == 1 + ExpectedNumOperands,
+ "Wrong number of operands", MD);
}
for (unsigned i = 1; i < MD->getNumOperands(); ++i) {
auto &MDO = MD->getOperand(i);
- Assert(MDO, "second operand should not be null", MD);
- Assert(mdconst::dyn_extract<ConstantInt>(MDO),
- "!prof brunch_weights operand is not a const int");
+ Check(MDO, "second operand should not be null", MD);
+ Check(mdconst::dyn_extract<ConstantInt>(MDO),
+ "!prof brunch_weights operand is not a const int");
}
}
}
void Verifier::visitAnnotationMetadata(MDNode *Annotation) {
- Assert(isa<MDTuple>(Annotation), "annotation must be a tuple");
- Assert(Annotation->getNumOperands() >= 1,
- "annotation must have at least one operand");
+ Check(isa<MDTuple>(Annotation), "annotation must be a tuple");
+ Check(Annotation->getNumOperands() >= 1,
+ "annotation must have at least one operand");
for (const MDOperand &Op : Annotation->operands())
- Assert(isa<MDString>(Op.get()), "operands must be strings");
+ Check(isa<MDString>(Op.get()), "operands must be strings");
}
void Verifier::visitAliasScopeMetadata(const MDNode *MD) {
unsigned NumOps = MD->getNumOperands();
- Assert(NumOps >= 2 && NumOps <= 3, "scope must have two or three operands",
- MD);
- Assert(MD->getOperand(0).get() == MD || isa<MDString>(MD->getOperand(0)),
- "first scope operand must be self-referential or string", MD);
+ Check(NumOps >= 2 && NumOps <= 3, "scope must have two or three operands",
+ MD);
+ Check(MD->getOperand(0).get() == MD || isa<MDString>(MD->getOperand(0)),
+ "first scope operand must be self-referential or string", MD);
if (NumOps == 3)
- Assert(isa<MDString>(MD->getOperand(2)),
- "third scope operand must be string (if used)", MD);
+ Check(isa<MDString>(MD->getOperand(2)),
+ "third scope operand must be string (if used)", MD);
MDNode *Domain = dyn_cast<MDNode>(MD->getOperand(1));
- Assert(Domain != nullptr, "second scope operand must be MDNode", MD);
+ Check(Domain != nullptr, "second scope operand must be MDNode", MD);
unsigned NumDomainOps = Domain->getNumOperands();
- Assert(NumDomainOps >= 1 && NumDomainOps <= 2,
- "domain must have one or two operands", Domain);
- Assert(Domain->getOperand(0).get() == Domain ||
- isa<MDString>(Domain->getOperand(0)),
- "first domain operand must be self-referential or string", Domain);
+ Check(NumDomainOps >= 1 && NumDomainOps <= 2,
+ "domain must have one or two operands", Domain);
+ Check(Domain->getOperand(0).get() == Domain ||
+ isa<MDString>(Domain->getOperand(0)),
+ "first domain operand must be self-referential or string", Domain);
if (NumDomainOps == 2)
- Assert(isa<MDString>(Domain->getOperand(1)),
- "second domain operand must be string (if used)", Domain);
+ Check(isa<MDString>(Domain->getOperand(1)),
+ "second domain operand must be string (if used)", Domain);
}
void Verifier::visitAliasScopeListMetadata(const MDNode *MD) {
for (const MDOperand &Op : MD->operands()) {
const MDNode *OpMD = dyn_cast<MDNode>(Op);
- Assert(OpMD != nullptr, "scope list must consist of MDNodes", MD);
+ Check(OpMD != nullptr, "scope list must consist of MDNodes", MD);
visitAliasScopeMetadata(OpMD);
}
}
+void Verifier::visitAccessGroupMetadata(const MDNode *MD) {
+ auto IsValidAccessScope = [](const MDNode *MD) {
+ return MD->getNumOperands() == 0 && MD->isDistinct();
+ };
+
+ // It must be either an access scope itself...
+ if (IsValidAccessScope(MD))
+ return;
+
+ // ...or a list of access scopes.
+ for (const MDOperand &Op : MD->operands()) {
+ const MDNode *OpMD = dyn_cast<MDNode>(Op);
+ Check(OpMD != nullptr, "Access scope list must consist of MDNodes", MD);
+ Check(IsValidAccessScope(OpMD),
+ "Access scope list contains invalid access scope", MD);
+ }
+}
+
/// verifyInstruction - Verify that an instruction is well formed.
///
void Verifier::visitInstruction(Instruction &I) {
BasicBlock *BB = I.getParent();
- Assert(BB, "Instruction not embedded in basic block!", &I);
+ Check(BB, "Instruction not embedded in basic block!", &I);
if (!isa<PHINode>(I)) { // Check that non-phi nodes are not self referential
for (User *U : I.users()) {
- Assert(U != (User *)&I || !DT.isReachableFromEntry(BB),
- "Only PHI nodes may reference their own value!", &I);
+ Check(U != (User *)&I || !DT.isReachableFromEntry(BB),
+ "Only PHI nodes may reference their own value!", &I);
}
}
// Check that void typed values don't have names
- Assert(!I.getType()->isVoidTy() || !I.hasName(),
- "Instruction has a name, but provides a void value!", &I);
+ Check(!I.getType()->isVoidTy() || !I.hasName(),
+ "Instruction has a name, but provides a void value!", &I);
// Check that the return value of the instruction is either void or a legal
// value type.
- Assert(I.getType()->isVoidTy() || I.getType()->isFirstClassType(),
- "Instruction returns a non-scalar type!", &I);
+ Check(I.getType()->isVoidTy() || I.getType()->isFirstClassType(),
+ "Instruction returns a non-scalar type!", &I);
// Check that the instruction doesn't produce metadata. Calls are already
// checked against the callee type.
- Assert(!I.getType()->isMetadataTy() || isa<CallInst>(I) || isa<InvokeInst>(I),
- "Invalid use of metadata!", &I);
+ Check(!I.getType()->isMetadataTy() || isa<CallInst>(I) || isa<InvokeInst>(I),
+ "Invalid use of metadata!", &I);
// Check that all uses of the instruction, if they are instructions
// themselves, actually have parent basic blocks. If the use is not an
// instruction, it is an error!
for (Use &U : I.uses()) {
if (Instruction *Used = dyn_cast<Instruction>(U.getUser()))
- Assert(Used->getParent() != nullptr,
- "Instruction referencing"
- " instruction not embedded in a basic block!",
- &I, Used);
+ Check(Used->getParent() != nullptr,
+ "Instruction referencing"
+ " instruction not embedded in a basic block!",
+ &I, Used);
else {
CheckFailed("Use of instruction is not an instruction!", U);
return;
@@ -4499,12 +4594,12 @@ void Verifier::visitInstruction(Instruction &I) {
const CallBase *CBI = dyn_cast<CallBase>(&I);
for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
- Assert(I.getOperand(i) != nullptr, "Instruction has null operand!", &I);
+ Check(I.getOperand(i) != nullptr, "Instruction has null operand!", &I);
// Check to make sure that only first-class-values are operands to
// instructions.
if (!I.getOperand(i)->getType()->isFirstClassType()) {
- Assert(false, "Instruction operands must be first-class values!", &I);
+ Check(false, "Instruction operands must be first-class values!", &I);
}
if (Function *F = dyn_cast<Function>(I.getOperand(i))) {
@@ -4520,43 +4615,43 @@ void Verifier::visitInstruction(Instruction &I) {
// taken. Ignore cases where the address of the intrinsic function is used
// as the argument of operand bundle "clang.arc.attachedcall" as those
// cases are handled in verifyAttachedCallBundle.
- Assert((!F->isIntrinsic() ||
- (CBI && &CBI->getCalledOperandUse() == &I.getOperandUse(i)) ||
- IsAttachedCallOperand(F, CBI, i)),
- "Cannot take the address of an intrinsic!", &I);
- Assert(
- !F->isIntrinsic() || isa<CallInst>(I) ||
- F->getIntrinsicID() == Intrinsic::donothing ||
- F->getIntrinsicID() == Intrinsic::seh_try_begin ||
- F->getIntrinsicID() == Intrinsic::seh_try_end ||
- F->getIntrinsicID() == Intrinsic::seh_scope_begin ||
- F->getIntrinsicID() == Intrinsic::seh_scope_end ||
- F->getIntrinsicID() == Intrinsic::coro_resume ||
- F->getIntrinsicID() == Intrinsic::coro_destroy ||
- F->getIntrinsicID() == Intrinsic::experimental_patchpoint_void ||
- F->getIntrinsicID() == Intrinsic::experimental_patchpoint_i64 ||
- F->getIntrinsicID() == Intrinsic::experimental_gc_statepoint ||
- F->getIntrinsicID() == Intrinsic::wasm_rethrow ||
- IsAttachedCallOperand(F, CBI, i),
- "Cannot invoke an intrinsic other than donothing, patchpoint, "
- "statepoint, coro_resume, coro_destroy or clang.arc.attachedcall",
- &I);
- Assert(F->getParent() == &M, "Referencing function in another module!",
- &I, &M, F, F->getParent());
+ Check((!F->isIntrinsic() ||
+ (CBI && &CBI->getCalledOperandUse() == &I.getOperandUse(i)) ||
+ IsAttachedCallOperand(F, CBI, i)),
+ "Cannot take the address of an intrinsic!", &I);
+ Check(!F->isIntrinsic() || isa<CallInst>(I) ||
+ F->getIntrinsicID() == Intrinsic::donothing ||
+ F->getIntrinsicID() == Intrinsic::seh_try_begin ||
+ F->getIntrinsicID() == Intrinsic::seh_try_end ||
+ F->getIntrinsicID() == Intrinsic::seh_scope_begin ||
+ F->getIntrinsicID() == Intrinsic::seh_scope_end ||
+ F->getIntrinsicID() == Intrinsic::coro_resume ||
+ F->getIntrinsicID() == Intrinsic::coro_destroy ||
+ F->getIntrinsicID() ==
+ Intrinsic::experimental_patchpoint_void ||
+ F->getIntrinsicID() == Intrinsic::experimental_patchpoint_i64 ||
+ F->getIntrinsicID() == Intrinsic::experimental_gc_statepoint ||
+ F->getIntrinsicID() == Intrinsic::wasm_rethrow ||
+ IsAttachedCallOperand(F, CBI, i),
+ "Cannot invoke an intrinsic other than donothing, patchpoint, "
+ "statepoint, coro_resume, coro_destroy or clang.arc.attachedcall",
+ &I);
+ Check(F->getParent() == &M, "Referencing function in another module!", &I,
+ &M, F, F->getParent());
} else if (BasicBlock *OpBB = dyn_cast<BasicBlock>(I.getOperand(i))) {
- Assert(OpBB->getParent() == BB->getParent(),
- "Referring to a basic block in another function!", &I);
+ Check(OpBB->getParent() == BB->getParent(),
+ "Referring to a basic block in another function!", &I);
} else if (Argument *OpArg = dyn_cast<Argument>(I.getOperand(i))) {
- Assert(OpArg->getParent() == BB->getParent(),
- "Referring to an argument in another function!", &I);
+ Check(OpArg->getParent() == BB->getParent(),
+ "Referring to an argument in another function!", &I);
} else if (GlobalValue *GV = dyn_cast<GlobalValue>(I.getOperand(i))) {
- Assert(GV->getParent() == &M, "Referencing global in another module!", &I,
- &M, GV, GV->getParent());
+ Check(GV->getParent() == &M, "Referencing global in another module!", &I,
+ &M, GV, GV->getParent());
} else if (isa<Instruction>(I.getOperand(i))) {
verifyDominatesUse(I, i);
} else if (isa<InlineAsm>(I.getOperand(i))) {
- Assert(CBI && &CBI->getCalledOperandUse() == &I.getOperandUse(i),
- "Cannot take the address of an inline asm!", &I);
+ Check(CBI && &CBI->getCalledOperandUse() == &I.getOperandUse(i),
+ "Cannot take the address of an inline asm!", &I);
} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(I.getOperand(i))) {
if (CE->getType()->isPtrOrPtrVectorTy()) {
// If we have a ConstantExpr pointer, we need to see if it came from an
@@ -4567,39 +4662,39 @@ void Verifier::visitInstruction(Instruction &I) {
}
if (MDNode *MD = I.getMetadata(LLVMContext::MD_fpmath)) {
- Assert(I.getType()->isFPOrFPVectorTy(),
- "fpmath requires a floating point result!", &I);
- Assert(MD->getNumOperands() == 1, "fpmath takes one operand!", &I);
+ Check(I.getType()->isFPOrFPVectorTy(),
+ "fpmath requires a floating point result!", &I);
+ Check(MD->getNumOperands() == 1, "fpmath takes one operand!", &I);
if (ConstantFP *CFP0 =
mdconst::dyn_extract_or_null<ConstantFP>(MD->getOperand(0))) {
const APFloat &Accuracy = CFP0->getValueAPF();
- Assert(&Accuracy.getSemantics() == &APFloat::IEEEsingle(),
- "fpmath accuracy must have float type", &I);
- Assert(Accuracy.isFiniteNonZero() && !Accuracy.isNegative(),
- "fpmath accuracy not a positive number!", &I);
+ Check(&Accuracy.getSemantics() == &APFloat::IEEEsingle(),
+ "fpmath accuracy must have float type", &I);
+ Check(Accuracy.isFiniteNonZero() && !Accuracy.isNegative(),
+ "fpmath accuracy not a positive number!", &I);
} else {
- Assert(false, "invalid fpmath accuracy!", &I);
+ Check(false, "invalid fpmath accuracy!", &I);
}
}
if (MDNode *Range = I.getMetadata(LLVMContext::MD_range)) {
- Assert(isa<LoadInst>(I) || isa<CallInst>(I) || isa<InvokeInst>(I),
- "Ranges are only for loads, calls and invokes!", &I);
+ Check(isa<LoadInst>(I) || isa<CallInst>(I) || isa<InvokeInst>(I),
+ "Ranges are only for loads, calls and invokes!", &I);
visitRangeMetadata(I, Range, I.getType());
}
if (I.hasMetadata(LLVMContext::MD_invariant_group)) {
- Assert(isa<LoadInst>(I) || isa<StoreInst>(I),
- "invariant.group metadata is only for loads and stores", &I);
+ Check(isa<LoadInst>(I) || isa<StoreInst>(I),
+ "invariant.group metadata is only for loads and stores", &I);
}
if (I.getMetadata(LLVMContext::MD_nonnull)) {
- Assert(I.getType()->isPointerTy(), "nonnull applies only to pointer types",
- &I);
- Assert(isa<LoadInst>(I),
- "nonnull applies only to load instructions, use attributes"
- " for calls or invokes",
- &I);
+ Check(I.getType()->isPointerTy(), "nonnull applies only to pointer types",
+ &I);
+ Check(isa<LoadInst>(I),
+ "nonnull applies only to load instructions, use attributes"
+ " for calls or invokes",
+ &I);
}
if (MDNode *MD = I.getMetadata(LLVMContext::MD_dereferenceable))
@@ -4616,20 +4711,25 @@ void Verifier::visitInstruction(Instruction &I) {
if (MDNode *MD = I.getMetadata(LLVMContext::MD_alias_scope))
visitAliasScopeListMetadata(MD);
+ if (MDNode *MD = I.getMetadata(LLVMContext::MD_access_group))
+ visitAccessGroupMetadata(MD);
+
if (MDNode *AlignMD = I.getMetadata(LLVMContext::MD_align)) {
- Assert(I.getType()->isPointerTy(), "align applies only to pointer types",
- &I);
- Assert(isa<LoadInst>(I), "align applies only to load instructions, "
- "use attributes for calls or invokes", &I);
- Assert(AlignMD->getNumOperands() == 1, "align takes one operand!", &I);
+ Check(I.getType()->isPointerTy(), "align applies only to pointer types",
+ &I);
+ Check(isa<LoadInst>(I),
+ "align applies only to load instructions, "
+ "use attributes for calls or invokes",
+ &I);
+ Check(AlignMD->getNumOperands() == 1, "align takes one operand!", &I);
ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(AlignMD->getOperand(0));
- Assert(CI && CI->getType()->isIntegerTy(64),
- "align metadata value must be an i64!", &I);
+ Check(CI && CI->getType()->isIntegerTy(64),
+ "align metadata value must be an i64!", &I);
uint64_t Align = CI->getZExtValue();
- Assert(isPowerOf2_64(Align),
- "align metadata value must be a power of 2!", &I);
- Assert(Align <= Value::MaximumAlignment,
- "alignment is larger that implementation defined limit", &I);
+ Check(isPowerOf2_64(Align), "align metadata value must be a power of 2!",
+ &I);
+ Check(Align <= Value::MaximumAlignment,
+ "alignment is larger that implementation defined limit", &I);
}
if (MDNode *MD = I.getMetadata(LLVMContext::MD_prof))
@@ -4639,7 +4739,7 @@ void Verifier::visitInstruction(Instruction &I) {
visitAnnotationMetadata(Annotation);
if (MDNode *N = I.getDebugLoc().getAsMDNode()) {
- AssertDI(isa<DILocation>(N), "invalid !dbg metadata attachment", &I, N);
+ CheckDI(isa<DILocation>(N), "invalid !dbg metadata attachment", &I, N);
visitMDNode(*N, AreDebugLocsAllowed::Yes);
}
@@ -4665,8 +4765,8 @@ void Verifier::visitInstruction(Instruction &I) {
/// Allow intrinsics to be verified in different ways.
void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
Function *IF = Call.getCalledFunction();
- Assert(IF->isDeclaration(), "Intrinsic functions should never be defined!",
- IF);
+ Check(IF->isDeclaration(), "Intrinsic functions should never be defined!",
+ IF);
// Verify that the intrinsic prototype lines up with what the .td files
// describe.
@@ -4681,21 +4781,21 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
SmallVector<Type *, 4> ArgTys;
Intrinsic::MatchIntrinsicTypesResult Res =
Intrinsic::matchIntrinsicSignature(IFTy, TableRef, ArgTys);
- Assert(Res != Intrinsic::MatchIntrinsicTypes_NoMatchRet,
- "Intrinsic has incorrect return type!", IF);
- Assert(Res != Intrinsic::MatchIntrinsicTypes_NoMatchArg,
- "Intrinsic has incorrect argument type!", IF);
+ Check(Res != Intrinsic::MatchIntrinsicTypes_NoMatchRet,
+ "Intrinsic has incorrect return type!", IF);
+ Check(Res != Intrinsic::MatchIntrinsicTypes_NoMatchArg,
+ "Intrinsic has incorrect argument type!", IF);
// Verify if the intrinsic call matches the vararg property.
if (IsVarArg)
- Assert(!Intrinsic::matchIntrinsicVarArg(IsVarArg, TableRef),
- "Intrinsic was not defined with variable arguments!", IF);
+ Check(!Intrinsic::matchIntrinsicVarArg(IsVarArg, TableRef),
+ "Intrinsic was not defined with variable arguments!", IF);
else
- Assert(!Intrinsic::matchIntrinsicVarArg(IsVarArg, TableRef),
- "Callsite was not defined with variable arguments!", IF);
+ Check(!Intrinsic::matchIntrinsicVarArg(IsVarArg, TableRef),
+ "Callsite was not defined with variable arguments!", IF);
// All descriptors should be absorbed by now.
- Assert(TableRef.empty(), "Intrinsic has too few arguments!", IF);
+ Check(TableRef.empty(), "Intrinsic has too few arguments!", IF);
// Now that we have the intrinsic ID and the actual argument types (and we
// know they are legal for the intrinsic!) get the intrinsic name through the
@@ -4703,11 +4803,11 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
// the name.
const std::string ExpectedName =
Intrinsic::getName(ID, ArgTys, IF->getParent(), IFTy);
- Assert(ExpectedName == IF->getName(),
- "Intrinsic name not mangled correctly for type arguments! "
- "Should be: " +
- ExpectedName,
- IF);
+ Check(ExpectedName == IF->getName(),
+ "Intrinsic name not mangled correctly for type arguments! "
+ "Should be: " +
+ ExpectedName,
+ IF);
// If the intrinsic takes MDNode arguments, verify that they are either global
// or are local to *this* function.
@@ -4715,8 +4815,8 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
if (auto *MD = dyn_cast<MetadataAsValue>(V))
visitMetadataAsValue(*MD, Call.getCaller());
if (auto *Const = dyn_cast<Constant>(V))
- Assert(!Const->getType()->isX86_AMXTy(),
- "const x86_amx is not allowed in argument!");
+ Check(!Const->getType()->isX86_AMXTy(),
+ "const x86_amx is not allowed in argument!");
}
switch (ID) {
@@ -4724,36 +4824,35 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
break;
case Intrinsic::assume: {
for (auto &Elem : Call.bundle_op_infos()) {
- Assert(Elem.Tag->getKey() == "ignore" ||
- Attribute::isExistingAttribute(Elem.Tag->getKey()),
- "tags must be valid attribute names", Call);
+ Check(Elem.Tag->getKey() == "ignore" ||
+ Attribute::isExistingAttribute(Elem.Tag->getKey()),
+ "tags must be valid attribute names", Call);
Attribute::AttrKind Kind =
Attribute::getAttrKindFromName(Elem.Tag->getKey());
unsigned ArgCount = Elem.End - Elem.Begin;
if (Kind == Attribute::Alignment) {
- Assert(ArgCount <= 3 && ArgCount >= 2,
- "alignment assumptions should have 2 or 3 arguments", Call);
- Assert(Call.getOperand(Elem.Begin)->getType()->isPointerTy(),
- "first argument should be a pointer", Call);
- Assert(Call.getOperand(Elem.Begin + 1)->getType()->isIntegerTy(),
- "second argument should be an integer", Call);
+ Check(ArgCount <= 3 && ArgCount >= 2,
+ "alignment assumptions should have 2 or 3 arguments", Call);
+ Check(Call.getOperand(Elem.Begin)->getType()->isPointerTy(),
+ "first argument should be a pointer", Call);
+ Check(Call.getOperand(Elem.Begin + 1)->getType()->isIntegerTy(),
+ "second argument should be an integer", Call);
if (ArgCount == 3)
- Assert(Call.getOperand(Elem.Begin + 2)->getType()->isIntegerTy(),
- "third argument should be an integer if present", Call);
+ Check(Call.getOperand(Elem.Begin + 2)->getType()->isIntegerTy(),
+ "third argument should be an integer if present", Call);
return;
}
- Assert(ArgCount <= 2, "too many arguments", Call);
+ Check(ArgCount <= 2, "too many arguments", Call);
if (Kind == Attribute::None)
break;
if (Attribute::isIntAttrKind(Kind)) {
- Assert(ArgCount == 2, "this attribute should have 2 arguments", Call);
- Assert(isa<ConstantInt>(Call.getOperand(Elem.Begin + 1)),
- "the second argument should be a constant integral value", Call);
+ Check(ArgCount == 2, "this attribute should have 2 arguments", Call);
+ Check(isa<ConstantInt>(Call.getOperand(Elem.Begin + 1)),
+ "the second argument should be a constant integral value", Call);
} else if (Attribute::canUseAsParamAttr(Kind)) {
- Assert((ArgCount) == 1, "this attribute should have one argument",
- Call);
+ Check((ArgCount) == 1, "this attribute should have one argument", Call);
} else if (Attribute::canUseAsFnAttr(Kind)) {
- Assert((ArgCount) == 0, "this attribute has no argument", Call);
+ Check((ArgCount) == 0, "this attribute has no argument", Call);
}
}
break;
@@ -4763,23 +4862,47 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
if (isa<ConstantPointerNull>(InfoArg))
break;
auto *GV = dyn_cast<GlobalVariable>(InfoArg);
- Assert(GV && GV->isConstant() && GV->hasDefinitiveInitializer(),
- "info argument of llvm.coro.id must refer to an initialized "
- "constant");
+ Check(GV && GV->isConstant() && GV->hasDefinitiveInitializer(),
+ "info argument of llvm.coro.id must refer to an initialized "
+ "constant");
Constant *Init = GV->getInitializer();
- Assert(isa<ConstantStruct>(Init) || isa<ConstantArray>(Init),
- "info argument of llvm.coro.id must refer to either a struct or "
- "an array");
+ Check(isa<ConstantStruct>(Init) || isa<ConstantArray>(Init),
+ "info argument of llvm.coro.id must refer to either a struct or "
+ "an array");
break;
}
+ case Intrinsic::fptrunc_round: {
+ // Check the rounding mode
+ Metadata *MD = nullptr;
+ auto *MAV = dyn_cast<MetadataAsValue>(Call.getOperand(1));
+ if (MAV)
+ MD = MAV->getMetadata();
+
+ Check(MD != nullptr, "missing rounding mode argument", Call);
+
+ Check(isa<MDString>(MD),
+ ("invalid value for llvm.fptrunc.round metadata operand"
+ " (the operand should be a string)"),
+ MD);
+
+ Optional<RoundingMode> RoundMode =
+ convertStrToRoundingMode(cast<MDString>(MD)->getString());
+ Check(RoundMode && *RoundMode != RoundingMode::Dynamic,
+ "unsupported rounding mode argument", Call);
+ break;
+ }
+#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
+#include "llvm/IR/VPIntrinsics.def"
+ visitVPIntrinsic(cast<VPIntrinsic>(Call));
+ break;
#define INSTRUCTION(NAME, NARGS, ROUND_MODE, INTRINSIC) \
case Intrinsic::INTRINSIC:
#include "llvm/IR/ConstrainedOps.def"
visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(Call));
break;
case Intrinsic::dbg_declare: // llvm.dbg.declare
- Assert(isa<MetadataAsValue>(Call.getArgOperand(0)),
- "invalid llvm.dbg.declare intrinsic call 1", Call);
+ Check(isa<MetadataAsValue>(Call.getArgOperand(0)),
+ "invalid llvm.dbg.declare intrinsic call 1", Call);
visitDbgIntrinsic("declare", cast<DbgVariableIntrinsic>(Call));
break;
case Intrinsic::dbg_addr: // llvm.dbg.addr
@@ -4794,18 +4917,19 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
case Intrinsic::memcpy:
case Intrinsic::memcpy_inline:
case Intrinsic::memmove:
- case Intrinsic::memset: {
+ case Intrinsic::memset:
+ case Intrinsic::memset_inline: {
const auto *MI = cast<MemIntrinsic>(&Call);
auto IsValidAlignment = [&](unsigned Alignment) -> bool {
return Alignment == 0 || isPowerOf2_32(Alignment);
};
- Assert(IsValidAlignment(MI->getDestAlignment()),
- "alignment of arg 0 of memory intrinsic must be 0 or a power of 2",
- Call);
+ Check(IsValidAlignment(MI->getDestAlignment()),
+ "alignment of arg 0 of memory intrinsic must be 0 or a power of 2",
+ Call);
if (const auto *MTI = dyn_cast<MemTransferInst>(MI)) {
- Assert(IsValidAlignment(MTI->getSourceAlignment()),
- "alignment of arg 1 of memory intrinsic must be 0 or a power of 2",
- Call);
+ Check(IsValidAlignment(MTI->getSourceAlignment()),
+ "alignment of arg 1 of memory intrinsic must be 0 or a power of 2",
+ Call);
}
break;
@@ -4818,50 +4942,50 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
ConstantInt *ElementSizeCI =
cast<ConstantInt>(AMI->getRawElementSizeInBytes());
const APInt &ElementSizeVal = ElementSizeCI->getValue();
- Assert(ElementSizeVal.isPowerOf2(),
- "element size of the element-wise atomic memory intrinsic "
- "must be a power of 2",
- Call);
+ Check(ElementSizeVal.isPowerOf2(),
+ "element size of the element-wise atomic memory intrinsic "
+ "must be a power of 2",
+ Call);
auto IsValidAlignment = [&](uint64_t Alignment) {
return isPowerOf2_64(Alignment) && ElementSizeVal.ule(Alignment);
};
uint64_t DstAlignment = AMI->getDestAlignment();
- Assert(IsValidAlignment(DstAlignment),
- "incorrect alignment of the destination argument", Call);
+ Check(IsValidAlignment(DstAlignment),
+ "incorrect alignment of the destination argument", Call);
if (const auto *AMT = dyn_cast<AtomicMemTransferInst>(AMI)) {
uint64_t SrcAlignment = AMT->getSourceAlignment();
- Assert(IsValidAlignment(SrcAlignment),
- "incorrect alignment of the source argument", Call);
+ Check(IsValidAlignment(SrcAlignment),
+ "incorrect alignment of the source argument", Call);
}
break;
}
case Intrinsic::call_preallocated_setup: {
auto *NumArgs = dyn_cast<ConstantInt>(Call.getArgOperand(0));
- Assert(NumArgs != nullptr,
- "llvm.call.preallocated.setup argument must be a constant");
+ Check(NumArgs != nullptr,
+ "llvm.call.preallocated.setup argument must be a constant");
bool FoundCall = false;
for (User *U : Call.users()) {
auto *UseCall = dyn_cast<CallBase>(U);
- Assert(UseCall != nullptr,
- "Uses of llvm.call.preallocated.setup must be calls");
+ Check(UseCall != nullptr,
+ "Uses of llvm.call.preallocated.setup must be calls");
const Function *Fn = UseCall->getCalledFunction();
if (Fn && Fn->getIntrinsicID() == Intrinsic::call_preallocated_arg) {
auto *AllocArgIndex = dyn_cast<ConstantInt>(UseCall->getArgOperand(1));
- Assert(AllocArgIndex != nullptr,
- "llvm.call.preallocated.alloc arg index must be a constant");
+ Check(AllocArgIndex != nullptr,
+ "llvm.call.preallocated.alloc arg index must be a constant");
auto AllocArgIndexInt = AllocArgIndex->getValue();
- Assert(AllocArgIndexInt.sge(0) &&
- AllocArgIndexInt.slt(NumArgs->getValue()),
- "llvm.call.preallocated.alloc arg index must be between 0 and "
- "corresponding "
- "llvm.call.preallocated.setup's argument count");
+ Check(AllocArgIndexInt.sge(0) &&
+ AllocArgIndexInt.slt(NumArgs->getValue()),
+ "llvm.call.preallocated.alloc arg index must be between 0 and "
+ "corresponding "
+ "llvm.call.preallocated.setup's argument count");
} else if (Fn && Fn->getIntrinsicID() ==
Intrinsic::call_preallocated_teardown) {
// nothing to do
} else {
- Assert(!FoundCall, "Can have at most one call corresponding to a "
- "llvm.call.preallocated.setup");
+ Check(!FoundCall, "Can have at most one call corresponding to a "
+ "llvm.call.preallocated.setup");
FoundCall = true;
size_t NumPreallocatedArgs = 0;
for (unsigned i = 0; i < UseCall->arg_size(); i++) {
@@ -4869,14 +4993,14 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
++NumPreallocatedArgs;
}
}
- Assert(NumPreallocatedArgs != 0,
- "cannot use preallocated intrinsics on a call without "
- "preallocated arguments");
- Assert(NumArgs->equalsInt(NumPreallocatedArgs),
- "llvm.call.preallocated.setup arg size must be equal to number "
- "of preallocated arguments "
- "at call site",
- Call, *UseCall);
+ Check(NumPreallocatedArgs != 0,
+ "cannot use preallocated intrinsics on a call without "
+ "preallocated arguments");
+ Check(NumArgs->equalsInt(NumPreallocatedArgs),
+ "llvm.call.preallocated.setup arg size must be equal to number "
+ "of preallocated arguments "
+ "at call site",
+ Call, *UseCall);
// getOperandBundle() cannot be called if more than one of the operand
// bundle exists. There is already a check elsewhere for this, so skip
// here if we see more than one.
@@ -4886,33 +5010,33 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
}
auto PreallocatedBundle =
UseCall->getOperandBundle(LLVMContext::OB_preallocated);
- Assert(PreallocatedBundle,
- "Use of llvm.call.preallocated.setup outside intrinsics "
- "must be in \"preallocated\" operand bundle");
- Assert(PreallocatedBundle->Inputs.front().get() == &Call,
- "preallocated bundle must have token from corresponding "
- "llvm.call.preallocated.setup");
+ Check(PreallocatedBundle,
+ "Use of llvm.call.preallocated.setup outside intrinsics "
+ "must be in \"preallocated\" operand bundle");
+ Check(PreallocatedBundle->Inputs.front().get() == &Call,
+ "preallocated bundle must have token from corresponding "
+ "llvm.call.preallocated.setup");
}
}
break;
}
case Intrinsic::call_preallocated_arg: {
auto *Token = dyn_cast<CallBase>(Call.getArgOperand(0));
- Assert(Token && Token->getCalledFunction()->getIntrinsicID() ==
- Intrinsic::call_preallocated_setup,
- "llvm.call.preallocated.arg token argument must be a "
- "llvm.call.preallocated.setup");
- Assert(Call.hasFnAttr(Attribute::Preallocated),
- "llvm.call.preallocated.arg must be called with a \"preallocated\" "
- "call site attribute");
+ Check(Token && Token->getCalledFunction()->getIntrinsicID() ==
+ Intrinsic::call_preallocated_setup,
+ "llvm.call.preallocated.arg token argument must be a "
+ "llvm.call.preallocated.setup");
+ Check(Call.hasFnAttr(Attribute::Preallocated),
+ "llvm.call.preallocated.arg must be called with a \"preallocated\" "
+ "call site attribute");
break;
}
case Intrinsic::call_preallocated_teardown: {
auto *Token = dyn_cast<CallBase>(Call.getArgOperand(0));
- Assert(Token && Token->getCalledFunction()->getIntrinsicID() ==
- Intrinsic::call_preallocated_setup,
- "llvm.call.preallocated.teardown token argument must be a "
- "llvm.call.preallocated.setup");
+ Check(Token && Token->getCalledFunction()->getIntrinsicID() ==
+ Intrinsic::call_preallocated_setup,
+ "llvm.call.preallocated.teardown token argument must be a "
+ "llvm.call.preallocated.setup");
break;
}
case Intrinsic::gcroot:
@@ -4921,46 +5045,46 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
if (ID == Intrinsic::gcroot) {
AllocaInst *AI =
dyn_cast<AllocaInst>(Call.getArgOperand(0)->stripPointerCasts());
- Assert(AI, "llvm.gcroot parameter #1 must be an alloca.", Call);
- Assert(isa<Constant>(Call.getArgOperand(1)),
- "llvm.gcroot parameter #2 must be a constant.", Call);
+ Check(AI, "llvm.gcroot parameter #1 must be an alloca.", Call);
+ Check(isa<Constant>(Call.getArgOperand(1)),
+ "llvm.gcroot parameter #2 must be a constant.", Call);
if (!AI->getAllocatedType()->isPointerTy()) {
- Assert(!isa<ConstantPointerNull>(Call.getArgOperand(1)),
- "llvm.gcroot parameter #1 must either be a pointer alloca, "
- "or argument #2 must be a non-null constant.",
- Call);
+ Check(!isa<ConstantPointerNull>(Call.getArgOperand(1)),
+ "llvm.gcroot parameter #1 must either be a pointer alloca, "
+ "or argument #2 must be a non-null constant.",
+ Call);
}
}
- Assert(Call.getParent()->getParent()->hasGC(),
- "Enclosing function does not use GC.", Call);
+ Check(Call.getParent()->getParent()->hasGC(),
+ "Enclosing function does not use GC.", Call);
break;
case Intrinsic::init_trampoline:
- Assert(isa<Function>(Call.getArgOperand(1)->stripPointerCasts()),
- "llvm.init_trampoline parameter #2 must resolve to a function.",
- Call);
+ Check(isa<Function>(Call.getArgOperand(1)->stripPointerCasts()),
+ "llvm.init_trampoline parameter #2 must resolve to a function.",
+ Call);
break;
case Intrinsic::prefetch:
- Assert(cast<ConstantInt>(Call.getArgOperand(1))->getZExtValue() < 2 &&
- cast<ConstantInt>(Call.getArgOperand(2))->getZExtValue() < 4,
- "invalid arguments to llvm.prefetch", Call);
+ Check(cast<ConstantInt>(Call.getArgOperand(1))->getZExtValue() < 2 &&
+ cast<ConstantInt>(Call.getArgOperand(2))->getZExtValue() < 4,
+ "invalid arguments to llvm.prefetch", Call);
break;
case Intrinsic::stackprotector:
- Assert(isa<AllocaInst>(Call.getArgOperand(1)->stripPointerCasts()),
- "llvm.stackprotector parameter #2 must resolve to an alloca.", Call);
+ Check(isa<AllocaInst>(Call.getArgOperand(1)->stripPointerCasts()),
+ "llvm.stackprotector parameter #2 must resolve to an alloca.", Call);
break;
case Intrinsic::localescape: {
BasicBlock *BB = Call.getParent();
- Assert(BB == &BB->getParent()->front(),
- "llvm.localescape used outside of entry block", Call);
- Assert(!SawFrameEscape,
- "multiple calls to llvm.localescape in one function", Call);
+ Check(BB == &BB->getParent()->front(),
+ "llvm.localescape used outside of entry block", Call);
+ Check(!SawFrameEscape, "multiple calls to llvm.localescape in one function",
+ Call);
for (Value *Arg : Call.args()) {
if (isa<ConstantPointerNull>(Arg))
continue; // Null values are allowed as placeholders.
auto *AI = dyn_cast<AllocaInst>(Arg->stripPointerCasts());
- Assert(AI && AI->isStaticAlloca(),
- "llvm.localescape only accepts static allocas", Call);
+ Check(AI && AI->isStaticAlloca(),
+ "llvm.localescape only accepts static allocas", Call);
}
FrameEscapeInfo[BB->getParent()].first = Call.arg_size();
SawFrameEscape = true;
@@ -4969,10 +5093,10 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
case Intrinsic::localrecover: {
Value *FnArg = Call.getArgOperand(0)->stripPointerCasts();
Function *Fn = dyn_cast<Function>(FnArg);
- Assert(Fn && !Fn->isDeclaration(),
- "llvm.localrecover first "
- "argument must be function defined in this module",
- Call);
+ Check(Fn && !Fn->isDeclaration(),
+ "llvm.localrecover first "
+ "argument must be function defined in this module",
+ Call);
auto *IdxArg = cast<ConstantInt>(Call.getArgOperand(2));
auto &Entry = FrameEscapeInfo[Fn];
Entry.second = unsigned(
@@ -4982,39 +5106,38 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
case Intrinsic::experimental_gc_statepoint:
if (auto *CI = dyn_cast<CallInst>(&Call))
- Assert(!CI->isInlineAsm(),
- "gc.statepoint support for inline assembly unimplemented", CI);
- Assert(Call.getParent()->getParent()->hasGC(),
- "Enclosing function does not use GC.", Call);
+ Check(!CI->isInlineAsm(),
+ "gc.statepoint support for inline assembly unimplemented", CI);
+ Check(Call.getParent()->getParent()->hasGC(),
+ "Enclosing function does not use GC.", Call);
verifyStatepoint(Call);
break;
case Intrinsic::experimental_gc_result: {
- Assert(Call.getParent()->getParent()->hasGC(),
- "Enclosing function does not use GC.", Call);
+ Check(Call.getParent()->getParent()->hasGC(),
+ "Enclosing function does not use GC.", Call);
// Are we tied to a statepoint properly?
const auto *StatepointCall = dyn_cast<CallBase>(Call.getArgOperand(0));
const Function *StatepointFn =
StatepointCall ? StatepointCall->getCalledFunction() : nullptr;
- Assert(StatepointFn && StatepointFn->isDeclaration() &&
- StatepointFn->getIntrinsicID() ==
- Intrinsic::experimental_gc_statepoint,
- "gc.result operand #1 must be from a statepoint", Call,
- Call.getArgOperand(0));
-
- // Assert that result type matches wrapped callee.
- const Value *Target = StatepointCall->getArgOperand(2);
- auto *PT = cast<PointerType>(Target->getType());
- auto *TargetFuncType = cast<FunctionType>(PT->getPointerElementType());
- Assert(Call.getType() == TargetFuncType->getReturnType(),
- "gc.result result type does not match wrapped callee", Call);
+ Check(StatepointFn && StatepointFn->isDeclaration() &&
+ StatepointFn->getIntrinsicID() ==
+ Intrinsic::experimental_gc_statepoint,
+ "gc.result operand #1 must be from a statepoint", Call,
+ Call.getArgOperand(0));
+
+ // Check that result type matches wrapped callee.
+ auto *TargetFuncType =
+ cast<FunctionType>(StatepointCall->getParamElementType(2));
+ Check(Call.getType() == TargetFuncType->getReturnType(),
+ "gc.result result type does not match wrapped callee", Call);
break;
}
case Intrinsic::experimental_gc_relocate: {
- Assert(Call.arg_size() == 3, "wrong number of arguments", Call);
+ Check(Call.arg_size() == 3, "wrong number of arguments", Call);
- Assert(isa<PointerType>(Call.getType()->getScalarType()),
- "gc.relocate must return a pointer or a vector of pointers", Call);
+ Check(isa<PointerType>(Call.getType()->getScalarType()),
+ "gc.relocate must return a pointer or a vector of pointers", Call);
// Check that this relocate is correctly tied to the statepoint
@@ -5027,19 +5150,19 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
// Landingpad relocates should have only one predecessor with invoke
// statepoint terminator
- Assert(InvokeBB, "safepoints should have unique landingpads",
- LandingPad->getParent());
- Assert(InvokeBB->getTerminator(), "safepoint block should be well formed",
- InvokeBB);
- Assert(isa<GCStatepointInst>(InvokeBB->getTerminator()),
- "gc relocate should be linked to a statepoint", InvokeBB);
+ Check(InvokeBB, "safepoints should have unique landingpads",
+ LandingPad->getParent());
+ Check(InvokeBB->getTerminator(), "safepoint block should be well formed",
+ InvokeBB);
+ Check(isa<GCStatepointInst>(InvokeBB->getTerminator()),
+ "gc relocate should be linked to a statepoint", InvokeBB);
} else {
// In all other cases relocate should be tied to the statepoint directly.
// This covers relocates on a normal return path of invoke statepoint and
// relocates of a call statepoint.
auto Token = Call.getArgOperand(0);
- Assert(isa<GCStatepointInst>(Token),
- "gc relocate is incorrectly tied to the statepoint", Call, Token);
+ Check(isa<GCStatepointInst>(Token),
+ "gc relocate is incorrectly tied to the statepoint", Call, Token);
}
// Verify rest of the relocate arguments.
@@ -5048,22 +5171,22 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
// Both the base and derived must be piped through the safepoint.
Value *Base = Call.getArgOperand(1);
- Assert(isa<ConstantInt>(Base),
- "gc.relocate operand #2 must be integer offset", Call);
+ Check(isa<ConstantInt>(Base),
+ "gc.relocate operand #2 must be integer offset", Call);
Value *Derived = Call.getArgOperand(2);
- Assert(isa<ConstantInt>(Derived),
- "gc.relocate operand #3 must be integer offset", Call);
+ Check(isa<ConstantInt>(Derived),
+ "gc.relocate operand #3 must be integer offset", Call);
const uint64_t BaseIndex = cast<ConstantInt>(Base)->getZExtValue();
const uint64_t DerivedIndex = cast<ConstantInt>(Derived)->getZExtValue();
// Check the bounds
if (auto Opt = StatepointCall.getOperandBundle(LLVMContext::OB_gc_live)) {
- Assert(BaseIndex < Opt->Inputs.size(),
- "gc.relocate: statepoint base index out of bounds", Call);
- Assert(DerivedIndex < Opt->Inputs.size(),
- "gc.relocate: statepoint derived index out of bounds", Call);
+ Check(BaseIndex < Opt->Inputs.size(),
+ "gc.relocate: statepoint base index out of bounds", Call);
+ Check(DerivedIndex < Opt->Inputs.size(),
+ "gc.relocate: statepoint derived index out of bounds", Call);
}
// Relocated value must be either a pointer type or vector-of-pointer type,
@@ -5071,15 +5194,15 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
// relocated pointer. It can be casted to the correct type later if it's
// desired. However, they must have the same address space and 'vectorness'
GCRelocateInst &Relocate = cast<GCRelocateInst>(Call);
- Assert(Relocate.getDerivedPtr()->getType()->isPtrOrPtrVectorTy(),
- "gc.relocate: relocated value must be a gc pointer", Call);
+ Check(Relocate.getDerivedPtr()->getType()->isPtrOrPtrVectorTy(),
+ "gc.relocate: relocated value must be a gc pointer", Call);
auto ResultType = Call.getType();
auto DerivedType = Relocate.getDerivedPtr()->getType();
- Assert(ResultType->isVectorTy() == DerivedType->isVectorTy(),
- "gc.relocate: vector relocates to vector and pointer to pointer",
- Call);
- Assert(
+ Check(ResultType->isVectorTy() == DerivedType->isVectorTy(),
+ "gc.relocate: vector relocates to vector and pointer to pointer",
+ Call);
+ Check(
ResultType->getPointerAddressSpace() ==
DerivedType->getPointerAddressSpace(),
"gc.relocate: relocating a pointer shouldn't change its address space",
@@ -5088,39 +5211,43 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
}
case Intrinsic::eh_exceptioncode:
case Intrinsic::eh_exceptionpointer: {
- Assert(isa<CatchPadInst>(Call.getArgOperand(0)),
- "eh.exceptionpointer argument must be a catchpad", Call);
+ Check(isa<CatchPadInst>(Call.getArgOperand(0)),
+ "eh.exceptionpointer argument must be a catchpad", Call);
break;
}
case Intrinsic::get_active_lane_mask: {
- Assert(Call.getType()->isVectorTy(), "get_active_lane_mask: must return a "
- "vector", Call);
+ Check(Call.getType()->isVectorTy(),
+ "get_active_lane_mask: must return a "
+ "vector",
+ Call);
auto *ElemTy = Call.getType()->getScalarType();
- Assert(ElemTy->isIntegerTy(1), "get_active_lane_mask: element type is not "
- "i1", Call);
+ Check(ElemTy->isIntegerTy(1),
+ "get_active_lane_mask: element type is not "
+ "i1",
+ Call);
break;
}
case Intrinsic::masked_load: {
- Assert(Call.getType()->isVectorTy(), "masked_load: must return a vector",
- Call);
+ Check(Call.getType()->isVectorTy(), "masked_load: must return a vector",
+ Call);
Value *Ptr = Call.getArgOperand(0);
ConstantInt *Alignment = cast<ConstantInt>(Call.getArgOperand(1));
Value *Mask = Call.getArgOperand(2);
Value *PassThru = Call.getArgOperand(3);
- Assert(Mask->getType()->isVectorTy(), "masked_load: mask must be vector",
- Call);
- Assert(Alignment->getValue().isPowerOf2(),
- "masked_load: alignment must be a power of 2", Call);
+ Check(Mask->getType()->isVectorTy(), "masked_load: mask must be vector",
+ Call);
+ Check(Alignment->getValue().isPowerOf2(),
+ "masked_load: alignment must be a power of 2", Call);
PointerType *PtrTy = cast<PointerType>(Ptr->getType());
- Assert(PtrTy->isOpaqueOrPointeeTypeMatches(Call.getType()),
- "masked_load: return must match pointer type", Call);
- Assert(PassThru->getType() == Call.getType(),
- "masked_load: pass through and return type must match", Call);
- Assert(cast<VectorType>(Mask->getType())->getElementCount() ==
- cast<VectorType>(Call.getType())->getElementCount(),
- "masked_load: vector mask must be same length as return", Call);
+ Check(PtrTy->isOpaqueOrPointeeTypeMatches(Call.getType()),
+ "masked_load: return must match pointer type", Call);
+ Check(PassThru->getType() == Call.getType(),
+ "masked_load: pass through and return type must match", Call);
+ Check(cast<VectorType>(Mask->getType())->getElementCount() ==
+ cast<VectorType>(Call.getType())->getElementCount(),
+ "masked_load: vector mask must be same length as return", Call);
break;
}
case Intrinsic::masked_store: {
@@ -5128,61 +5255,61 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
Value *Ptr = Call.getArgOperand(1);
ConstantInt *Alignment = cast<ConstantInt>(Call.getArgOperand(2));
Value *Mask = Call.getArgOperand(3);
- Assert(Mask->getType()->isVectorTy(), "masked_store: mask must be vector",
- Call);
- Assert(Alignment->getValue().isPowerOf2(),
- "masked_store: alignment must be a power of 2", Call);
+ Check(Mask->getType()->isVectorTy(), "masked_store: mask must be vector",
+ Call);
+ Check(Alignment->getValue().isPowerOf2(),
+ "masked_store: alignment must be a power of 2", Call);
PointerType *PtrTy = cast<PointerType>(Ptr->getType());
- Assert(PtrTy->isOpaqueOrPointeeTypeMatches(Val->getType()),
- "masked_store: storee must match pointer type", Call);
- Assert(cast<VectorType>(Mask->getType())->getElementCount() ==
- cast<VectorType>(Val->getType())->getElementCount(),
- "masked_store: vector mask must be same length as value", Call);
+ Check(PtrTy->isOpaqueOrPointeeTypeMatches(Val->getType()),
+ "masked_store: storee must match pointer type", Call);
+ Check(cast<VectorType>(Mask->getType())->getElementCount() ==
+ cast<VectorType>(Val->getType())->getElementCount(),
+ "masked_store: vector mask must be same length as value", Call);
break;
}
case Intrinsic::masked_gather: {
const APInt &Alignment =
cast<ConstantInt>(Call.getArgOperand(1))->getValue();
- Assert(Alignment.isZero() || Alignment.isPowerOf2(),
- "masked_gather: alignment must be 0 or a power of 2", Call);
+ Check(Alignment.isZero() || Alignment.isPowerOf2(),
+ "masked_gather: alignment must be 0 or a power of 2", Call);
break;
}
case Intrinsic::masked_scatter: {
const APInt &Alignment =
cast<ConstantInt>(Call.getArgOperand(2))->getValue();
- Assert(Alignment.isZero() || Alignment.isPowerOf2(),
- "masked_scatter: alignment must be 0 or a power of 2", Call);
+ Check(Alignment.isZero() || Alignment.isPowerOf2(),
+ "masked_scatter: alignment must be 0 or a power of 2", Call);
break;
}
case Intrinsic::experimental_guard: {
- Assert(isa<CallInst>(Call), "experimental_guard cannot be invoked", Call);
- Assert(Call.countOperandBundlesOfType(LLVMContext::OB_deopt) == 1,
- "experimental_guard must have exactly one "
- "\"deopt\" operand bundle");
+ Check(isa<CallInst>(Call), "experimental_guard cannot be invoked", Call);
+ Check(Call.countOperandBundlesOfType(LLVMContext::OB_deopt) == 1,
+ "experimental_guard must have exactly one "
+ "\"deopt\" operand bundle");
break;
}
case Intrinsic::experimental_deoptimize: {
- Assert(isa<CallInst>(Call), "experimental_deoptimize cannot be invoked",
- Call);
- Assert(Call.countOperandBundlesOfType(LLVMContext::OB_deopt) == 1,
- "experimental_deoptimize must have exactly one "
- "\"deopt\" operand bundle");
- Assert(Call.getType() == Call.getFunction()->getReturnType(),
- "experimental_deoptimize return type must match caller return type");
+ Check(isa<CallInst>(Call), "experimental_deoptimize cannot be invoked",
+ Call);
+ Check(Call.countOperandBundlesOfType(LLVMContext::OB_deopt) == 1,
+ "experimental_deoptimize must have exactly one "
+ "\"deopt\" operand bundle");
+ Check(Call.getType() == Call.getFunction()->getReturnType(),
+ "experimental_deoptimize return type must match caller return type");
if (isa<CallInst>(Call)) {
auto *RI = dyn_cast<ReturnInst>(Call.getNextNode());
- Assert(RI,
- "calls to experimental_deoptimize must be followed by a return");
+ Check(RI,
+ "calls to experimental_deoptimize must be followed by a return");
if (!Call.getType()->isVoidTy() && RI)
- Assert(RI->getReturnValue() == &Call,
- "calls to experimental_deoptimize must be followed by a return "
- "of the value computed by experimental_deoptimize");
+ Check(RI->getReturnValue() == &Call,
+ "calls to experimental_deoptimize must be followed by a return "
+ "of the value computed by experimental_deoptimize");
}
break;
@@ -5197,15 +5324,15 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
case Intrinsic::vector_reduce_umax:
case Intrinsic::vector_reduce_umin: {
Type *ArgTy = Call.getArgOperand(0)->getType();
- Assert(ArgTy->isIntOrIntVectorTy() && ArgTy->isVectorTy(),
- "Intrinsic has incorrect argument type!");
+ Check(ArgTy->isIntOrIntVectorTy() && ArgTy->isVectorTy(),
+ "Intrinsic has incorrect argument type!");
break;
}
case Intrinsic::vector_reduce_fmax:
case Intrinsic::vector_reduce_fmin: {
Type *ArgTy = Call.getArgOperand(0)->getType();
- Assert(ArgTy->isFPOrFPVectorTy() && ArgTy->isVectorTy(),
- "Intrinsic has incorrect argument type!");
+ Check(ArgTy->isFPOrFPVectorTy() && ArgTy->isVectorTy(),
+ "Intrinsic has incorrect argument type!");
break;
}
case Intrinsic::vector_reduce_fadd:
@@ -5213,8 +5340,8 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
// Unlike the other reductions, the first argument is a start value. The
// second argument is the vector to be reduced.
Type *ArgTy = Call.getArgOperand(1)->getType();
- Assert(ArgTy->isFPOrFPVectorTy() && ArgTy->isVectorTy(),
- "Intrinsic has incorrect argument type!");
+ Check(ArgTy->isFPOrFPVectorTy() && ArgTy->isVectorTy(),
+ "Intrinsic has incorrect argument type!");
break;
}
case Intrinsic::smul_fix:
@@ -5227,27 +5354,26 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
case Intrinsic::udiv_fix_sat: {
Value *Op1 = Call.getArgOperand(0);
Value *Op2 = Call.getArgOperand(1);
- Assert(Op1->getType()->isIntOrIntVectorTy(),
- "first operand of [us][mul|div]_fix[_sat] must be an int type or "
- "vector of ints");
- Assert(Op2->getType()->isIntOrIntVectorTy(),
- "second operand of [us][mul|div]_fix[_sat] must be an int type or "
- "vector of ints");
+ Check(Op1->getType()->isIntOrIntVectorTy(),
+ "first operand of [us][mul|div]_fix[_sat] must be an int type or "
+ "vector of ints");
+ Check(Op2->getType()->isIntOrIntVectorTy(),
+ "second operand of [us][mul|div]_fix[_sat] must be an int type or "
+ "vector of ints");
auto *Op3 = cast<ConstantInt>(Call.getArgOperand(2));
- Assert(Op3->getType()->getBitWidth() <= 32,
- "third argument of [us][mul|div]_fix[_sat] must fit within 32 bits");
+ Check(Op3->getType()->getBitWidth() <= 32,
+ "third argument of [us][mul|div]_fix[_sat] must fit within 32 bits");
if (ID == Intrinsic::smul_fix || ID == Intrinsic::smul_fix_sat ||
ID == Intrinsic::sdiv_fix || ID == Intrinsic::sdiv_fix_sat) {
- Assert(
- Op3->getZExtValue() < Op1->getType()->getScalarSizeInBits(),
- "the scale of s[mul|div]_fix[_sat] must be less than the width of "
- "the operands");
+ Check(Op3->getZExtValue() < Op1->getType()->getScalarSizeInBits(),
+ "the scale of s[mul|div]_fix[_sat] must be less than the width of "
+ "the operands");
} else {
- Assert(Op3->getZExtValue() <= Op1->getType()->getScalarSizeInBits(),
- "the scale of u[mul|div]_fix[_sat] must be less than or equal "
- "to the width of the operands");
+ Check(Op3->getZExtValue() <= Op1->getType()->getScalarSizeInBits(),
+ "the scale of u[mul|div]_fix[_sat] must be less than or equal "
+ "to the width of the operands");
}
break;
}
@@ -5257,22 +5383,22 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
case Intrinsic::llrint: {
Type *ValTy = Call.getArgOperand(0)->getType();
Type *ResultTy = Call.getType();
- Assert(!ValTy->isVectorTy() && !ResultTy->isVectorTy(),
- "Intrinsic does not support vectors", &Call);
+ Check(!ValTy->isVectorTy() && !ResultTy->isVectorTy(),
+ "Intrinsic does not support vectors", &Call);
break;
}
case Intrinsic::bswap: {
Type *Ty = Call.getType();
unsigned Size = Ty->getScalarSizeInBits();
- Assert(Size % 16 == 0, "bswap must be an even number of bytes", &Call);
+ Check(Size % 16 == 0, "bswap must be an even number of bytes", &Call);
break;
}
case Intrinsic::invariant_start: {
ConstantInt *InvariantSize = dyn_cast<ConstantInt>(Call.getArgOperand(0));
- Assert(InvariantSize &&
- (!InvariantSize->isNegative() || InvariantSize->isMinusOne()),
- "invariant_start parameter must be -1, 0 or a positive number",
- &Call);
+ Check(InvariantSize &&
+ (!InvariantSize->isNegative() || InvariantSize->isMinusOne()),
+ "invariant_start parameter must be -1, 0 or a positive number",
+ &Call);
break;
}
case Intrinsic::matrix_multiply:
@@ -5333,27 +5459,29 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
llvm_unreachable("unexpected intrinsic");
}
- Assert(ResultTy->getElementType()->isIntegerTy() ||
- ResultTy->getElementType()->isFloatingPointTy(),
- "Result type must be an integer or floating-point type!", IF);
+ Check(ResultTy->getElementType()->isIntegerTy() ||
+ ResultTy->getElementType()->isFloatingPointTy(),
+ "Result type must be an integer or floating-point type!", IF);
if (Op0ElemTy)
- Assert(ResultTy->getElementType() == Op0ElemTy,
- "Vector element type mismatch of the result and first operand "
- "vector!", IF);
+ Check(ResultTy->getElementType() == Op0ElemTy,
+ "Vector element type mismatch of the result and first operand "
+ "vector!",
+ IF);
if (Op1ElemTy)
- Assert(ResultTy->getElementType() == Op1ElemTy,
- "Vector element type mismatch of the result and second operand "
- "vector!", IF);
+ Check(ResultTy->getElementType() == Op1ElemTy,
+ "Vector element type mismatch of the result and second operand "
+ "vector!",
+ IF);
- Assert(cast<FixedVectorType>(ResultTy)->getNumElements() ==
- NumRows->getZExtValue() * NumColumns->getZExtValue(),
- "Result of a matrix operation does not fit in the returned vector!");
+ Check(cast<FixedVectorType>(ResultTy)->getNumElements() ==
+ NumRows->getZExtValue() * NumColumns->getZExtValue(),
+ "Result of a matrix operation does not fit in the returned vector!");
if (Stride)
- Assert(Stride->getZExtValue() >= NumRows->getZExtValue(),
- "Stride must be greater or equal than the number of rows!", IF);
+ Check(Stride->getZExtValue() >= NumRows->getZExtValue(),
+ "Stride must be greater or equal than the number of rows!", IF);
break;
}
@@ -5366,25 +5494,25 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
if (Attrs.hasFnAttr(Attribute::VScaleRange))
KnownMinNumElements *= Attrs.getFnAttrs().getVScaleRangeMin();
}
- Assert((Idx < 0 && std::abs(Idx) <= KnownMinNumElements) ||
- (Idx >= 0 && Idx < KnownMinNumElements),
- "The splice index exceeds the range [-VL, VL-1] where VL is the "
- "known minimum number of elements in the vector. For scalable "
- "vectors the minimum number of elements is determined from "
- "vscale_range.",
- &Call);
+ Check((Idx < 0 && std::abs(Idx) <= KnownMinNumElements) ||
+ (Idx >= 0 && Idx < KnownMinNumElements),
+ "The splice index exceeds the range [-VL, VL-1] where VL is the "
+ "known minimum number of elements in the vector. For scalable "
+ "vectors the minimum number of elements is determined from "
+ "vscale_range.",
+ &Call);
break;
}
case Intrinsic::experimental_stepvector: {
VectorType *VecTy = dyn_cast<VectorType>(Call.getType());
- Assert(VecTy && VecTy->getScalarType()->isIntegerTy() &&
- VecTy->getScalarSizeInBits() >= 8,
- "experimental_stepvector only supported for vectors of integers "
- "with a bitwidth of at least 8.",
- &Call);
+ Check(VecTy && VecTy->getScalarType()->isIntegerTy() &&
+ VecTy->getScalarSizeInBits() >= 8,
+ "experimental_stepvector only supported for vectors of integers "
+ "with a bitwidth of at least 8.",
+ &Call);
break;
}
- case Intrinsic::experimental_vector_insert: {
+ case Intrinsic::vector_insert: {
Value *Vec = Call.getArgOperand(0);
Value *SubVec = Call.getArgOperand(1);
Value *Idx = Call.getArgOperand(2);
@@ -5395,27 +5523,26 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
ElementCount VecEC = VecTy->getElementCount();
ElementCount SubVecEC = SubVecTy->getElementCount();
- Assert(VecTy->getElementType() == SubVecTy->getElementType(),
- "experimental_vector_insert parameters must have the same element "
- "type.",
- &Call);
- Assert(IdxN % SubVecEC.getKnownMinValue() == 0,
- "experimental_vector_insert index must be a constant multiple of "
- "the subvector's known minimum vector length.");
+ Check(VecTy->getElementType() == SubVecTy->getElementType(),
+ "vector_insert parameters must have the same element "
+ "type.",
+ &Call);
+ Check(IdxN % SubVecEC.getKnownMinValue() == 0,
+ "vector_insert index must be a constant multiple of "
+ "the subvector's known minimum vector length.");
// If this insertion is not the 'mixed' case where a fixed vector is
// inserted into a scalable vector, ensure that the insertion of the
// subvector does not overrun the parent vector.
if (VecEC.isScalable() == SubVecEC.isScalable()) {
- Assert(
- IdxN < VecEC.getKnownMinValue() &&
- IdxN + SubVecEC.getKnownMinValue() <= VecEC.getKnownMinValue(),
- "subvector operand of experimental_vector_insert would overrun the "
- "vector being inserted into.");
+ Check(IdxN < VecEC.getKnownMinValue() &&
+ IdxN + SubVecEC.getKnownMinValue() <= VecEC.getKnownMinValue(),
+ "subvector operand of vector_insert would overrun the "
+ "vector being inserted into.");
}
break;
}
- case Intrinsic::experimental_vector_extract: {
+ case Intrinsic::vector_extract: {
Value *Vec = Call.getArgOperand(0);
Value *Idx = Call.getArgOperand(1);
unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
@@ -5426,21 +5553,21 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
ElementCount VecEC = VecTy->getElementCount();
ElementCount ResultEC = ResultTy->getElementCount();
- Assert(ResultTy->getElementType() == VecTy->getElementType(),
- "experimental_vector_extract result must have the same element "
- "type as the input vector.",
- &Call);
- Assert(IdxN % ResultEC.getKnownMinValue() == 0,
- "experimental_vector_extract index must be a constant multiple of "
- "the result type's known minimum vector length.");
+ Check(ResultTy->getElementType() == VecTy->getElementType(),
+ "vector_extract result must have the same element "
+ "type as the input vector.",
+ &Call);
+ Check(IdxN % ResultEC.getKnownMinValue() == 0,
+ "vector_extract index must be a constant multiple of "
+ "the result type's known minimum vector length.");
// If this extraction is not the 'mixed' case where a fixed vector is is
// extracted from a scalable vector, ensure that the extraction does not
// overrun the parent vector.
if (VecEC.isScalable() == ResultEC.isScalable()) {
- Assert(IdxN < VecEC.getKnownMinValue() &&
- IdxN + ResultEC.getKnownMinValue() <= VecEC.getKnownMinValue(),
- "experimental_vector_extract would overrun.");
+ Check(IdxN < VecEC.getKnownMinValue() &&
+ IdxN + ResultEC.getKnownMinValue() <= VecEC.getKnownMinValue(),
+ "vector_extract would overrun.");
}
break;
}
@@ -5449,11 +5576,24 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
break;
}
case Intrinsic::preserve_array_access_index:
- case Intrinsic::preserve_struct_access_index: {
- Type *ElemTy = Call.getAttributes().getParamElementType(0);
- Assert(ElemTy,
- "Intrinsic requires elementtype attribute on first argument.",
- &Call);
+ case Intrinsic::preserve_struct_access_index:
+ case Intrinsic::aarch64_ldaxr:
+ case Intrinsic::aarch64_ldxr:
+ case Intrinsic::arm_ldaex:
+ case Intrinsic::arm_ldrex: {
+ Type *ElemTy = Call.getParamElementType(0);
+ Check(ElemTy, "Intrinsic requires elementtype attribute on first argument.",
+ &Call);
+ break;
+ }
+ case Intrinsic::aarch64_stlxr:
+ case Intrinsic::aarch64_stxr:
+ case Intrinsic::arm_stlex:
+ case Intrinsic::arm_strex: {
+ Type *ElemTy = Call.getAttributes().getParamElementType(1);
+ Check(ElemTy,
+ "Intrinsic requires elementtype attribute on second argument.",
+ &Call);
break;
}
};
@@ -5478,6 +5618,101 @@ static DISubprogram *getSubprogram(Metadata *LocalScope) {
return nullptr;
}
+void Verifier::visitVPIntrinsic(VPIntrinsic &VPI) {
+ if (auto *VPCast = dyn_cast<VPCastIntrinsic>(&VPI)) {
+ auto *RetTy = cast<VectorType>(VPCast->getType());
+ auto *ValTy = cast<VectorType>(VPCast->getOperand(0)->getType());
+ Check(RetTy->getElementCount() == ValTy->getElementCount(),
+ "VP cast intrinsic first argument and result vector lengths must be "
+ "equal",
+ *VPCast);
+
+ switch (VPCast->getIntrinsicID()) {
+ default:
+ llvm_unreachable("Unknown VP cast intrinsic");
+ case Intrinsic::vp_trunc:
+ Check(RetTy->isIntOrIntVectorTy() && ValTy->isIntOrIntVectorTy(),
+ "llvm.vp.trunc intrinsic first argument and result element type "
+ "must be integer",
+ *VPCast);
+ Check(RetTy->getScalarSizeInBits() < ValTy->getScalarSizeInBits(),
+ "llvm.vp.trunc intrinsic the bit size of first argument must be "
+ "larger than the bit size of the return type",
+ *VPCast);
+ break;
+ case Intrinsic::vp_zext:
+ case Intrinsic::vp_sext:
+ Check(RetTy->isIntOrIntVectorTy() && ValTy->isIntOrIntVectorTy(),
+ "llvm.vp.zext or llvm.vp.sext intrinsic first argument and result "
+ "element type must be integer",
+ *VPCast);
+ Check(RetTy->getScalarSizeInBits() > ValTy->getScalarSizeInBits(),
+ "llvm.vp.zext or llvm.vp.sext intrinsic the bit size of first "
+ "argument must be smaller than the bit size of the return type",
+ *VPCast);
+ break;
+ case Intrinsic::vp_fptoui:
+ case Intrinsic::vp_fptosi:
+ Check(
+ RetTy->isIntOrIntVectorTy() && ValTy->isFPOrFPVectorTy(),
+ "llvm.vp.fptoui or llvm.vp.fptosi intrinsic first argument element "
+ "type must be floating-point and result element type must be integer",
+ *VPCast);
+ break;
+ case Intrinsic::vp_uitofp:
+ case Intrinsic::vp_sitofp:
+ Check(
+ RetTy->isFPOrFPVectorTy() && ValTy->isIntOrIntVectorTy(),
+ "llvm.vp.uitofp or llvm.vp.sitofp intrinsic first argument element "
+ "type must be integer and result element type must be floating-point",
+ *VPCast);
+ break;
+ case Intrinsic::vp_fptrunc:
+ Check(RetTy->isFPOrFPVectorTy() && ValTy->isFPOrFPVectorTy(),
+ "llvm.vp.fptrunc intrinsic first argument and result element type "
+ "must be floating-point",
+ *VPCast);
+ Check(RetTy->getScalarSizeInBits() < ValTy->getScalarSizeInBits(),
+ "llvm.vp.fptrunc intrinsic the bit size of first argument must be "
+ "larger than the bit size of the return type",
+ *VPCast);
+ break;
+ case Intrinsic::vp_fpext:
+ Check(RetTy->isFPOrFPVectorTy() && ValTy->isFPOrFPVectorTy(),
+ "llvm.vp.fpext intrinsic first argument and result element type "
+ "must be floating-point",
+ *VPCast);
+ Check(RetTy->getScalarSizeInBits() > ValTy->getScalarSizeInBits(),
+ "llvm.vp.fpext intrinsic the bit size of first argument must be "
+ "smaller than the bit size of the return type",
+ *VPCast);
+ break;
+ case Intrinsic::vp_ptrtoint:
+ Check(RetTy->isIntOrIntVectorTy() && ValTy->isPtrOrPtrVectorTy(),
+ "llvm.vp.ptrtoint intrinsic first argument element type must be "
+ "pointer and result element type must be integer",
+ *VPCast);
+ break;
+ case Intrinsic::vp_inttoptr:
+ Check(RetTy->isPtrOrPtrVectorTy() && ValTy->isIntOrIntVectorTy(),
+ "llvm.vp.inttoptr intrinsic first argument element type must be "
+ "integer and result element type must be pointer",
+ *VPCast);
+ break;
+ }
+ }
+ if (VPI.getIntrinsicID() == Intrinsic::vp_fcmp) {
+ auto Pred = cast<VPCmpIntrinsic>(&VPI)->getPredicate();
+ Check(CmpInst::isFPPredicate(Pred),
+ "invalid predicate for VP FP comparison intrinsic", &VPI);
+ }
+ if (VPI.getIntrinsicID() == Intrinsic::vp_icmp) {
+ auto Pred = cast<VPCmpIntrinsic>(&VPI)->getPredicate();
+ Check(CmpInst::isIntPredicate(Pred),
+ "invalid predicate for VP integer comparison intrinsic", &VPI);
+ }
+}
+
void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
unsigned NumOperands;
bool HasRoundingMD;
@@ -5495,16 +5730,16 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
// Compare intrinsics carry an extra predicate metadata operand.
if (isa<ConstrainedFPCmpIntrinsic>(FPI))
NumOperands += 1;
- Assert((FPI.arg_size() == NumOperands),
- "invalid arguments for constrained FP intrinsic", &FPI);
+ Check((FPI.arg_size() == NumOperands),
+ "invalid arguments for constrained FP intrinsic", &FPI);
switch (FPI.getIntrinsicID()) {
case Intrinsic::experimental_constrained_lrint:
case Intrinsic::experimental_constrained_llrint: {
Type *ValTy = FPI.getArgOperand(0)->getType();
Type *ResultTy = FPI.getType();
- Assert(!ValTy->isVectorTy() && !ResultTy->isVectorTy(),
- "Intrinsic does not support vectors", &FPI);
+ Check(!ValTy->isVectorTy() && !ResultTy->isVectorTy(),
+ "Intrinsic does not support vectors", &FPI);
}
break;
@@ -5512,16 +5747,16 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
case Intrinsic::experimental_constrained_llround: {
Type *ValTy = FPI.getArgOperand(0)->getType();
Type *ResultTy = FPI.getType();
- Assert(!ValTy->isVectorTy() && !ResultTy->isVectorTy(),
- "Intrinsic does not support vectors", &FPI);
+ Check(!ValTy->isVectorTy() && !ResultTy->isVectorTy(),
+ "Intrinsic does not support vectors", &FPI);
break;
}
case Intrinsic::experimental_constrained_fcmp:
case Intrinsic::experimental_constrained_fcmps: {
auto Pred = cast<ConstrainedFPCmpIntrinsic>(&FPI)->getPredicate();
- Assert(CmpInst::isFPPredicate(Pred),
- "invalid predicate for constrained FP comparison intrinsic", &FPI);
+ Check(CmpInst::isFPPredicate(Pred),
+ "invalid predicate for constrained FP comparison intrinsic", &FPI);
break;
}
@@ -5529,21 +5764,21 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
case Intrinsic::experimental_constrained_fptoui: {
Value *Operand = FPI.getArgOperand(0);
uint64_t NumSrcElem = 0;
- Assert(Operand->getType()->isFPOrFPVectorTy(),
- "Intrinsic first argument must be floating point", &FPI);
+ Check(Operand->getType()->isFPOrFPVectorTy(),
+ "Intrinsic first argument must be floating point", &FPI);
if (auto *OperandT = dyn_cast<VectorType>(Operand->getType())) {
NumSrcElem = cast<FixedVectorType>(OperandT)->getNumElements();
}
Operand = &FPI;
- Assert((NumSrcElem > 0) == Operand->getType()->isVectorTy(),
- "Intrinsic first argument and result disagree on vector use", &FPI);
- Assert(Operand->getType()->isIntOrIntVectorTy(),
- "Intrinsic result must be an integer", &FPI);
+ Check((NumSrcElem > 0) == Operand->getType()->isVectorTy(),
+ "Intrinsic first argument and result disagree on vector use", &FPI);
+ Check(Operand->getType()->isIntOrIntVectorTy(),
+ "Intrinsic result must be an integer", &FPI);
if (auto *OperandT = dyn_cast<VectorType>(Operand->getType())) {
- Assert(NumSrcElem == cast<FixedVectorType>(OperandT)->getNumElements(),
- "Intrinsic first argument and result vector lengths must be equal",
- &FPI);
+ Check(NumSrcElem == cast<FixedVectorType>(OperandT)->getNumElements(),
+ "Intrinsic first argument and result vector lengths must be equal",
+ &FPI);
}
}
break;
@@ -5552,21 +5787,21 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
case Intrinsic::experimental_constrained_uitofp: {
Value *Operand = FPI.getArgOperand(0);
uint64_t NumSrcElem = 0;
- Assert(Operand->getType()->isIntOrIntVectorTy(),
- "Intrinsic first argument must be integer", &FPI);
+ Check(Operand->getType()->isIntOrIntVectorTy(),
+ "Intrinsic first argument must be integer", &FPI);
if (auto *OperandT = dyn_cast<VectorType>(Operand->getType())) {
NumSrcElem = cast<FixedVectorType>(OperandT)->getNumElements();
}
Operand = &FPI;
- Assert((NumSrcElem > 0) == Operand->getType()->isVectorTy(),
- "Intrinsic first argument and result disagree on vector use", &FPI);
- Assert(Operand->getType()->isFPOrFPVectorTy(),
- "Intrinsic result must be a floating point", &FPI);
+ Check((NumSrcElem > 0) == Operand->getType()->isVectorTy(),
+ "Intrinsic first argument and result disagree on vector use", &FPI);
+ Check(Operand->getType()->isFPOrFPVectorTy(),
+ "Intrinsic result must be a floating point", &FPI);
if (auto *OperandT = dyn_cast<VectorType>(Operand->getType())) {
- Assert(NumSrcElem == cast<FixedVectorType>(OperandT)->getNumElements(),
- "Intrinsic first argument and result vector lengths must be equal",
- &FPI);
+ Check(NumSrcElem == cast<FixedVectorType>(OperandT)->getNumElements(),
+ "Intrinsic first argument and result vector lengths must be equal",
+ &FPI);
}
} break;
@@ -5576,26 +5811,26 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
Type *OperandTy = Operand->getType();
Value *Result = &FPI;
Type *ResultTy = Result->getType();
- Assert(OperandTy->isFPOrFPVectorTy(),
- "Intrinsic first argument must be FP or FP vector", &FPI);
- Assert(ResultTy->isFPOrFPVectorTy(),
- "Intrinsic result must be FP or FP vector", &FPI);
- Assert(OperandTy->isVectorTy() == ResultTy->isVectorTy(),
- "Intrinsic first argument and result disagree on vector use", &FPI);
+ Check(OperandTy->isFPOrFPVectorTy(),
+ "Intrinsic first argument must be FP or FP vector", &FPI);
+ Check(ResultTy->isFPOrFPVectorTy(),
+ "Intrinsic result must be FP or FP vector", &FPI);
+ Check(OperandTy->isVectorTy() == ResultTy->isVectorTy(),
+ "Intrinsic first argument and result disagree on vector use", &FPI);
if (OperandTy->isVectorTy()) {
- Assert(cast<FixedVectorType>(OperandTy)->getNumElements() ==
- cast<FixedVectorType>(ResultTy)->getNumElements(),
- "Intrinsic first argument and result vector lengths must be equal",
- &FPI);
+ Check(cast<FixedVectorType>(OperandTy)->getNumElements() ==
+ cast<FixedVectorType>(ResultTy)->getNumElements(),
+ "Intrinsic first argument and result vector lengths must be equal",
+ &FPI);
}
if (FPI.getIntrinsicID() == Intrinsic::experimental_constrained_fptrunc) {
- Assert(OperandTy->getScalarSizeInBits() > ResultTy->getScalarSizeInBits(),
- "Intrinsic first argument's type must be larger than result type",
- &FPI);
+ Check(OperandTy->getScalarSizeInBits() > ResultTy->getScalarSizeInBits(),
+ "Intrinsic first argument's type must be larger than result type",
+ &FPI);
} else {
- Assert(OperandTy->getScalarSizeInBits() < ResultTy->getScalarSizeInBits(),
- "Intrinsic first argument's type must be smaller than result type",
- &FPI);
+ Check(OperandTy->getScalarSizeInBits() < ResultTy->getScalarSizeInBits(),
+ "Intrinsic first argument's type must be smaller than result type",
+ &FPI);
}
}
break;
@@ -5609,25 +5844,25 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
// match the specification in the intrinsic call table. Thus, no
// argument type check is needed here.
- Assert(FPI.getExceptionBehavior().hasValue(),
- "invalid exception behavior argument", &FPI);
+ Check(FPI.getExceptionBehavior().has_value(),
+ "invalid exception behavior argument", &FPI);
if (HasRoundingMD) {
- Assert(FPI.getRoundingMode().hasValue(),
- "invalid rounding mode argument", &FPI);
+ Check(FPI.getRoundingMode().has_value(), "invalid rounding mode argument",
+ &FPI);
}
}
void Verifier::visitDbgIntrinsic(StringRef Kind, DbgVariableIntrinsic &DII) {
auto *MD = DII.getRawLocation();
- AssertDI(isa<ValueAsMetadata>(MD) || isa<DIArgList>(MD) ||
- (isa<MDNode>(MD) && !cast<MDNode>(MD)->getNumOperands()),
- "invalid llvm.dbg." + Kind + " intrinsic address/value", &DII, MD);
- AssertDI(isa<DILocalVariable>(DII.getRawVariable()),
- "invalid llvm.dbg." + Kind + " intrinsic variable", &DII,
- DII.getRawVariable());
- AssertDI(isa<DIExpression>(DII.getRawExpression()),
- "invalid llvm.dbg." + Kind + " intrinsic expression", &DII,
- DII.getRawExpression());
+ CheckDI(isa<ValueAsMetadata>(MD) || isa<DIArgList>(MD) ||
+ (isa<MDNode>(MD) && !cast<MDNode>(MD)->getNumOperands()),
+ "invalid llvm.dbg." + Kind + " intrinsic address/value", &DII, MD);
+ CheckDI(isa<DILocalVariable>(DII.getRawVariable()),
+ "invalid llvm.dbg." + Kind + " intrinsic variable", &DII,
+ DII.getRawVariable());
+ CheckDI(isa<DIExpression>(DII.getRawExpression()),
+ "invalid llvm.dbg." + Kind + " intrinsic expression", &DII,
+ DII.getRawExpression());
// Ignore broken !dbg attachments; they're checked elsewhere.
if (MDNode *N = DII.getDebugLoc().getAsMDNode())
@@ -5640,29 +5875,30 @@ void Verifier::visitDbgIntrinsic(StringRef Kind, DbgVariableIntrinsic &DII) {
// The scopes for variables and !dbg attachments must agree.
DILocalVariable *Var = DII.getVariable();
DILocation *Loc = DII.getDebugLoc();
- AssertDI(Loc, "llvm.dbg." + Kind + " intrinsic requires a !dbg attachment",
- &DII, BB, F);
+ CheckDI(Loc, "llvm.dbg." + Kind + " intrinsic requires a !dbg attachment",
+ &DII, BB, F);
DISubprogram *VarSP = getSubprogram(Var->getRawScope());
DISubprogram *LocSP = getSubprogram(Loc->getRawScope());
if (!VarSP || !LocSP)
return; // Broken scope chains are checked elsewhere.
- AssertDI(VarSP == LocSP, "mismatched subprogram between llvm.dbg." + Kind +
- " variable and !dbg attachment",
- &DII, BB, F, Var, Var->getScope()->getSubprogram(), Loc,
- Loc->getScope()->getSubprogram());
+ CheckDI(VarSP == LocSP,
+ "mismatched subprogram between llvm.dbg." + Kind +
+ " variable and !dbg attachment",
+ &DII, BB, F, Var, Var->getScope()->getSubprogram(), Loc,
+ Loc->getScope()->getSubprogram());
// This check is redundant with one in visitLocalVariable().
- AssertDI(isType(Var->getRawType()), "invalid type ref", Var,
- Var->getRawType());
+ CheckDI(isType(Var->getRawType()), "invalid type ref", Var,
+ Var->getRawType());
verifyFnArgs(DII);
}
void Verifier::visitDbgLabelIntrinsic(StringRef Kind, DbgLabelInst &DLI) {
- AssertDI(isa<DILabel>(DLI.getRawLabel()),
- "invalid llvm.dbg." + Kind + " intrinsic variable", &DLI,
- DLI.getRawLabel());
+ CheckDI(isa<DILabel>(DLI.getRawLabel()),
+ "invalid llvm.dbg." + Kind + " intrinsic variable", &DLI,
+ DLI.getRawLabel());
// Ignore broken !dbg attachments; they're checked elsewhere.
if (MDNode *N = DLI.getDebugLoc().getAsMDNode())
@@ -5675,18 +5911,19 @@ void Verifier::visitDbgLabelIntrinsic(StringRef Kind, DbgLabelInst &DLI) {
// The scopes for variables and !dbg attachments must agree.
DILabel *Label = DLI.getLabel();
DILocation *Loc = DLI.getDebugLoc();
- Assert(Loc, "llvm.dbg." + Kind + " intrinsic requires a !dbg attachment",
- &DLI, BB, F);
+ Check(Loc, "llvm.dbg." + Kind + " intrinsic requires a !dbg attachment", &DLI,
+ BB, F);
DISubprogram *LabelSP = getSubprogram(Label->getRawScope());
DISubprogram *LocSP = getSubprogram(Loc->getRawScope());
if (!LabelSP || !LocSP)
return;
- AssertDI(LabelSP == LocSP, "mismatched subprogram between llvm.dbg." + Kind +
- " label and !dbg attachment",
- &DLI, BB, F, Label, Label->getScope()->getSubprogram(), Loc,
- Loc->getScope()->getSubprogram());
+ CheckDI(LabelSP == LocSP,
+ "mismatched subprogram between llvm.dbg." + Kind +
+ " label and !dbg attachment",
+ &DLI, BB, F, Label, Label->getScope()->getSubprogram(), Loc,
+ Loc->getScope()->getSubprogram());
}
void Verifier::verifyFragmentExpression(const DbgVariableIntrinsic &I) {
@@ -5726,9 +5963,9 @@ void Verifier::verifyFragmentExpression(const DIVariable &V,
unsigned FragSize = Fragment.SizeInBits;
unsigned FragOffset = Fragment.OffsetInBits;
- AssertDI(FragSize + FragOffset <= *VarSize,
- "fragment is larger than or outside of variable", Desc, &V);
- AssertDI(FragSize != *VarSize, "fragment covers entire variable", Desc, &V);
+ CheckDI(FragSize + FragOffset <= *VarSize,
+ "fragment is larger than or outside of variable", Desc, &V);
+ CheckDI(FragSize != *VarSize, "fragment covers entire variable", Desc, &V);
}
void Verifier::verifyFnArgs(const DbgVariableIntrinsic &I) {
@@ -5743,7 +5980,7 @@ void Verifier::verifyFnArgs(const DbgVariableIntrinsic &I) {
return;
DILocalVariable *Var = I.getVariable();
- AssertDI(Var, "dbg intrinsic without variable");
+ CheckDI(Var, "dbg intrinsic without variable");
unsigned ArgNo = Var->getArg();
if (!ArgNo)
@@ -5756,8 +5993,8 @@ void Verifier::verifyFnArgs(const DbgVariableIntrinsic &I) {
auto *Prev = DebugFnArgs[ArgNo - 1];
DebugFnArgs[ArgNo - 1] = Var;
- AssertDI(!Prev || (Prev == Var), "conflicting debug info for argument", &I,
- Prev, Var);
+ CheckDI(!Prev || (Prev == Var), "conflicting debug info for argument", &I,
+ Prev, Var);
}
void Verifier::verifyNotEntryValue(const DbgVariableIntrinsic &I) {
@@ -5767,7 +6004,7 @@ void Verifier::verifyNotEntryValue(const DbgVariableIntrinsic &I) {
if (!E || !E->isValid())
return;
- AssertDI(!E->isEntryValue(), "Entry values are only allowed in MIR", &I);
+ CheckDI(!E->isEntryValue(), "Entry values are only allowed in MIR", &I);
}
void Verifier::verifyCompileUnits() {
@@ -5781,7 +6018,7 @@ void Verifier::verifyCompileUnits() {
if (CUs)
Listed.insert(CUs->op_begin(), CUs->op_end());
for (auto *CU : CUVisited)
- AssertDI(Listed.count(CU), "DICompileUnit not listed in llvm.dbg.cu", CU);
+ CheckDI(Listed.count(CU), "DICompileUnit not listed in llvm.dbg.cu", CU);
CUVisited.clear();
}
@@ -5791,10 +6028,10 @@ void Verifier::verifyDeoptimizeCallingConvs() {
const Function *First = DeoptimizeDeclarations[0];
for (auto *F : makeArrayRef(DeoptimizeDeclarations).slice(1)) {
- Assert(First->getCallingConv() == F->getCallingConv(),
- "All llvm.experimental.deoptimize declarations must have the same "
- "calling convention",
- First, F);
+ Check(First->getCallingConv() == F->getCallingConv(),
+ "All llvm.experimental.deoptimize declarations must have the same "
+ "calling convention",
+ First, F);
}
}
@@ -5802,39 +6039,39 @@ void Verifier::verifyAttachedCallBundle(const CallBase &Call,
const OperandBundleUse &BU) {
FunctionType *FTy = Call.getFunctionType();
- Assert((FTy->getReturnType()->isPointerTy() ||
- (Call.doesNotReturn() && FTy->getReturnType()->isVoidTy())),
- "a call with operand bundle \"clang.arc.attachedcall\" must call a "
- "function returning a pointer or a non-returning function that has a "
- "void return type",
- Call);
+ Check((FTy->getReturnType()->isPointerTy() ||
+ (Call.doesNotReturn() && FTy->getReturnType()->isVoidTy())),
+ "a call with operand bundle \"clang.arc.attachedcall\" must call a "
+ "function returning a pointer or a non-returning function that has a "
+ "void return type",
+ Call);
- Assert(BU.Inputs.size() == 1 && isa<Function>(BU.Inputs.front()),
- "operand bundle \"clang.arc.attachedcall\" requires one function as "
- "an argument",
- Call);
+ Check(BU.Inputs.size() == 1 && isa<Function>(BU.Inputs.front()),
+ "operand bundle \"clang.arc.attachedcall\" requires one function as "
+ "an argument",
+ Call);
auto *Fn = cast<Function>(BU.Inputs.front());
Intrinsic::ID IID = Fn->getIntrinsicID();
if (IID) {
- Assert((IID == Intrinsic::objc_retainAutoreleasedReturnValue ||
- IID == Intrinsic::objc_unsafeClaimAutoreleasedReturnValue),
- "invalid function argument", Call);
+ Check((IID == Intrinsic::objc_retainAutoreleasedReturnValue ||
+ IID == Intrinsic::objc_unsafeClaimAutoreleasedReturnValue),
+ "invalid function argument", Call);
} else {
StringRef FnName = Fn->getName();
- Assert((FnName == "objc_retainAutoreleasedReturnValue" ||
- FnName == "objc_unsafeClaimAutoreleasedReturnValue"),
- "invalid function argument", Call);
+ Check((FnName == "objc_retainAutoreleasedReturnValue" ||
+ FnName == "objc_unsafeClaimAutoreleasedReturnValue"),
+ "invalid function argument", Call);
}
}
void Verifier::verifySourceDebugInfo(const DICompileUnit &U, const DIFile &F) {
- bool HasSource = F.getSource().hasValue();
+ bool HasSource = F.getSource().has_value();
if (!HasSourceDebugInfo.count(&U))
HasSourceDebugInfo[&U] = HasSource;
- AssertDI(HasSource == HasSourceDebugInfo[&U],
- "inconsistent use of embedded source");
+ CheckDI(HasSource == HasSourceDebugInfo[&U],
+ "inconsistent use of embedded source");
}
void Verifier::verifyNoAliasScopeDecl() {
@@ -5847,16 +6084,15 @@ void Verifier::verifyNoAliasScopeDecl() {
"Not a llvm.experimental.noalias.scope.decl ?");
const auto *ScopeListMV = dyn_cast<MetadataAsValue>(
II->getOperand(Intrinsic::NoAliasScopeDeclScopeArg));
- Assert(ScopeListMV != nullptr,
- "llvm.experimental.noalias.scope.decl must have a MetadataAsValue "
- "argument",
- II);
+ Check(ScopeListMV != nullptr,
+ "llvm.experimental.noalias.scope.decl must have a MetadataAsValue "
+ "argument",
+ II);
const auto *ScopeListMD = dyn_cast<MDNode>(ScopeListMV->getMetadata());
- Assert(ScopeListMD != nullptr, "!id.scope.list must point to an MDNode",
- II);
- Assert(ScopeListMD->getNumOperands() == 1,
- "!id.scope.list must point to a list with a single scope", II);
+ Check(ScopeListMD != nullptr, "!id.scope.list must point to an MDNode", II);
+ Check(ScopeListMD->getNumOperands() == 1,
+ "!id.scope.list must point to a list with a single scope", II);
visitAliasScopeListMetadata(ScopeListMD);
}
@@ -5899,10 +6135,10 @@ void Verifier::verifyNoAliasScopeDecl() {
for (auto *I : llvm::make_range(ItCurrent, ItNext))
for (auto *J : llvm::make_range(ItCurrent, ItNext))
if (I != J)
- Assert(!DT.dominates(I, J),
- "llvm.experimental.noalias.scope.decl dominates another one "
- "with the same scope",
- I);
+ Check(!DT.dominates(I, J),
+ "llvm.experimental.noalias.scope.decl dominates another one "
+ "with the same scope",
+ I);
ItCurrent = ItNext;
}
}
@@ -5995,7 +6231,7 @@ template <typename... Tys> void TBAAVerifier::CheckFailed(Tys &&... Args) {
return Diagnostic->CheckFailed(Args...);
}
-#define AssertTBAA(C, ...) \
+#define CheckTBAA(C, ...) \
do { \
if (!(C)) { \
CheckFailed(__VA_ARGS__); \
@@ -6185,7 +6421,7 @@ MDNode *TBAAVerifier::getFieldNodeFromTBAABaseNode(Instruction &I,
// Scalar nodes have only one possible "field" -- their parent in the access
// hierarchy. Offset must be zero at this point, but our caller is supposed
- // to Assert that.
+ // to check that.
if (BaseNode->getNumOperands() == 2)
return cast<MDNode>(BaseNode->getOperand(1));
@@ -6227,17 +6463,17 @@ static bool isNewFormatTBAATypeNode(llvm::MDNode *Type) {
}
bool TBAAVerifier::visitTBAAMetadata(Instruction &I, const MDNode *MD) {
- AssertTBAA(isa<LoadInst>(I) || isa<StoreInst>(I) || isa<CallInst>(I) ||
- isa<VAArgInst>(I) || isa<AtomicRMWInst>(I) ||
- isa<AtomicCmpXchgInst>(I),
- "This instruction shall not have a TBAA access tag!", &I);
+ CheckTBAA(isa<LoadInst>(I) || isa<StoreInst>(I) || isa<CallInst>(I) ||
+ isa<VAArgInst>(I) || isa<AtomicRMWInst>(I) ||
+ isa<AtomicCmpXchgInst>(I),
+ "This instruction shall not have a TBAA access tag!", &I);
bool IsStructPathTBAA =
isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3;
- AssertTBAA(
- IsStructPathTBAA,
- "Old-style TBAA is no longer allowed, use struct-path TBAA instead", &I);
+ CheckTBAA(IsStructPathTBAA,
+ "Old-style TBAA is no longer allowed, use struct-path TBAA instead",
+ &I);
MDNode *BaseNode = dyn_cast_or_null<MDNode>(MD->getOperand(0));
MDNode *AccessType = dyn_cast_or_null<MDNode>(MD->getOperand(1));
@@ -6245,18 +6481,18 @@ bool TBAAVerifier::visitTBAAMetadata(Instruction &I, const MDNode *MD) {
bool IsNewFormat = isNewFormatTBAATypeNode(AccessType);
if (IsNewFormat) {
- AssertTBAA(MD->getNumOperands() == 4 || MD->getNumOperands() == 5,
- "Access tag metadata must have either 4 or 5 operands", &I, MD);
+ CheckTBAA(MD->getNumOperands() == 4 || MD->getNumOperands() == 5,
+ "Access tag metadata must have either 4 or 5 operands", &I, MD);
} else {
- AssertTBAA(MD->getNumOperands() < 5,
- "Struct tag metadata must have either 3 or 4 operands", &I, MD);
+ CheckTBAA(MD->getNumOperands() < 5,
+ "Struct tag metadata must have either 3 or 4 operands", &I, MD);
}
// Check the access size field.
if (IsNewFormat) {
auto *AccessSizeNode = mdconst::dyn_extract_or_null<ConstantInt>(
MD->getOperand(3));
- AssertTBAA(AccessSizeNode, "Access size field must be a constant", &I, MD);
+ CheckTBAA(AccessSizeNode, "Access size field must be a constant", &I, MD);
}
// Check the immutability flag.
@@ -6264,28 +6500,28 @@ bool TBAAVerifier::visitTBAAMetadata(Instruction &I, const MDNode *MD) {
if (MD->getNumOperands() == ImmutabilityFlagOpNo + 1) {
auto *IsImmutableCI = mdconst::dyn_extract_or_null<ConstantInt>(
MD->getOperand(ImmutabilityFlagOpNo));
- AssertTBAA(IsImmutableCI,
- "Immutability tag on struct tag metadata must be a constant",
- &I, MD);
- AssertTBAA(
+ CheckTBAA(IsImmutableCI,
+ "Immutability tag on struct tag metadata must be a constant", &I,
+ MD);
+ CheckTBAA(
IsImmutableCI->isZero() || IsImmutableCI->isOne(),
"Immutability part of the struct tag metadata must be either 0 or 1",
&I, MD);
}
- AssertTBAA(BaseNode && AccessType,
- "Malformed struct tag metadata: base and access-type "
- "should be non-null and point to Metadata nodes",
- &I, MD, BaseNode, AccessType);
+ CheckTBAA(BaseNode && AccessType,
+ "Malformed struct tag metadata: base and access-type "
+ "should be non-null and point to Metadata nodes",
+ &I, MD, BaseNode, AccessType);
if (!IsNewFormat) {
- AssertTBAA(isValidScalarTBAANode(AccessType),
- "Access type node must be a valid scalar type", &I, MD,
- AccessType);
+ CheckTBAA(isValidScalarTBAANode(AccessType),
+ "Access type node must be a valid scalar type", &I, MD,
+ AccessType);
}
auto *OffsetCI = mdconst::dyn_extract_or_null<ConstantInt>(MD->getOperand(2));
- AssertTBAA(OffsetCI, "Offset must be constant integer", &I, MD);
+ CheckTBAA(OffsetCI, "Offset must be constant integer", &I, MD);
APInt Offset = OffsetCI->getValue();
bool SeenAccessTypeInPath = false;
@@ -6313,21 +6549,21 @@ bool TBAAVerifier::visitTBAAMetadata(Instruction &I, const MDNode *MD) {
SeenAccessTypeInPath |= BaseNode == AccessType;
if (isValidScalarTBAANode(BaseNode) || BaseNode == AccessType)
- AssertTBAA(Offset == 0, "Offset not zero at the point of scalar access",
- &I, MD, &Offset);
+ CheckTBAA(Offset == 0, "Offset not zero at the point of scalar access",
+ &I, MD, &Offset);
- AssertTBAA(BaseNodeBitWidth == Offset.getBitWidth() ||
- (BaseNodeBitWidth == 0 && Offset == 0) ||
- (IsNewFormat && BaseNodeBitWidth == ~0u),
- "Access bit-width not the same as description bit-width", &I, MD,
- BaseNodeBitWidth, Offset.getBitWidth());
+ CheckTBAA(BaseNodeBitWidth == Offset.getBitWidth() ||
+ (BaseNodeBitWidth == 0 && Offset == 0) ||
+ (IsNewFormat && BaseNodeBitWidth == ~0u),
+ "Access bit-width not the same as description bit-width", &I, MD,
+ BaseNodeBitWidth, Offset.getBitWidth());
if (IsNewFormat && SeenAccessTypeInPath)
break;
}
- AssertTBAA(SeenAccessTypeInPath, "Did not see access type in access path!",
- &I, MD);
+ CheckTBAA(SeenAccessTypeInPath, "Did not see access type in access path!", &I,
+ MD);
return true;
}
diff --git a/llvm/lib/InterfaceStub/ELFObjHandler.cpp b/llvm/lib/InterfaceStub/ELFObjHandler.cpp
index cb72f57f7bde..13801cd2cbc0 100644
--- a/llvm/lib/InterfaceStub/ELFObjHandler.cpp
+++ b/llvm/lib/InterfaceStub/ELFObjHandler.cpp
@@ -17,7 +17,6 @@
#include "llvm/Support/FileOutputBuffer.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Process.h"
using llvm::object::ELFObjectFile;
@@ -195,7 +194,7 @@ public:
for (const std::string &Lib : Stub.NeededLibs)
DynStr.Content.add(Lib);
if (Stub.SoName)
- DynStr.Content.add(Stub.SoName.getValue());
+ DynStr.Content.add(*Stub.SoName);
std::vector<OutputSection<ELFT> *> Sections = {&DynSym, &DynStr, &DynTab,
&ShStrTab};
@@ -218,7 +217,8 @@ public:
// time as long as it is not SHN_UNDEF. Set shndx to 1, which
// points to ".dynsym".
uint16_t Shndx = Sym.Undefined ? SHN_UNDEF : 1;
- DynSym.Content.add(DynStr.Content.getOffset(Sym.Name), Sym.Size, Bind,
+ uint64_t Size = Sym.Size.value_or(0);
+ DynSym.Content.add(DynStr.Content.getOffset(Sym.Name), Size, Bind,
convertIFSSymbolTypeToELF(Sym.Type), 0, Shndx);
}
DynSym.Size = DynSym.Content.getSize();
@@ -226,11 +226,12 @@ public:
// Poplulate dynamic table.
size_t DynSymIndex = DynTab.Content.addAddr(DT_SYMTAB, 0);
size_t DynStrIndex = DynTab.Content.addAddr(DT_STRTAB, 0);
+ DynTab.Content.addValue(DT_STRSZ, DynSym.Size);
for (const std::string &Lib : Stub.NeededLibs)
DynTab.Content.addValue(DT_NEEDED, DynStr.Content.getOffset(Lib));
if (Stub.SoName)
DynTab.Content.addValue(DT_SONAME,
- DynStr.Content.getOffset(Stub.SoName.getValue()));
+ DynStr.Content.getOffset(*Stub.SoName));
DynTab.Size = DynTab.Content.getSize();
// Calculate sections' addresses and offsets.
uint64_t CurrentOffset = sizeof(Elf_Ehdr);
@@ -249,8 +250,7 @@ public:
fillStrTabShdr(ShStrTab);
// Finish initializing the ELF header.
- initELFHeader<ELFT>(ElfHeader,
- static_cast<uint16_t>(Stub.Target.Arch.getValue()));
+ initELFHeader<ELFT>(ElfHeader, static_cast<uint16_t>(*Stub.Target.Arch));
ElfHeader.e_shstrndx = ShStrTab.Index;
ElfHeader.e_shnum = LastSection->Index + 1;
ElfHeader.e_shoff =
@@ -334,6 +334,89 @@ private:
write(Data + shdrOffset(Sec), Sec.Shdr);
}
};
+
+/// This function takes an error, and appends a string of text to the end of
+/// that error. Since "appending" to an Error isn't supported behavior of an
+/// Error, this function technically creates a new error with the combined
+/// message and consumes the old error.
+///
+/// @param Err Source error.
+/// @param After Text to append at the end of Err's error message.
+Error appendToError(Error Err, StringRef After) {
+ std::string Message;
+ raw_string_ostream Stream(Message);
+ Stream << Err;
+ Stream << " " << After;
+ consumeError(std::move(Err));
+ return createError(Stream.str());
+}
+
+template <class ELFT> class DynSym {
+ using Elf_Shdr_Range = typename ELFT::ShdrRange;
+ using Elf_Shdr = typename ELFT::Shdr;
+
+public:
+ static Expected<DynSym> create(const ELFFile<ELFT> &ElfFile,
+ const DynamicEntries &DynEnt) {
+ Expected<Elf_Shdr_Range> Shdrs = ElfFile.sections();
+ if (!Shdrs)
+ return Shdrs.takeError();
+ return DynSym(ElfFile, DynEnt, *Shdrs);
+ }
+
+ Expected<const uint8_t *> getDynSym() {
+ if (DynSymHdr)
+ return ElfFile.base() + DynSymHdr->sh_offset;
+ return getDynamicData(DynEnt.DynSymAddr, "dynamic symbol table");
+ }
+
+ Expected<StringRef> getDynStr() {
+ if (DynSymHdr)
+ return ElfFile.getStringTableForSymtab(*DynSymHdr, Shdrs);
+ Expected<const uint8_t *> DataOrErr = getDynamicData(
+ DynEnt.StrTabAddr, "dynamic string table", DynEnt.StrSize);
+ if (!DataOrErr)
+ return DataOrErr.takeError();
+ return StringRef(reinterpret_cast<const char *>(*DataOrErr),
+ DynEnt.StrSize);
+ }
+
+private:
+ DynSym(const ELFFile<ELFT> &ElfFile, const DynamicEntries &DynEnt,
+ Elf_Shdr_Range Shdrs)
+ : ElfFile(ElfFile), DynEnt(DynEnt), Shdrs(Shdrs),
+ DynSymHdr(findDynSymHdr()) {}
+
+ const Elf_Shdr *findDynSymHdr() {
+ for (const Elf_Shdr &Sec : Shdrs)
+ if (Sec.sh_type == SHT_DYNSYM) {
+ // If multiple .dynsym are present, use the first one.
+ // This behavior aligns with llvm::object::ELFFile::getDynSymtabSize()
+ return &Sec;
+ }
+ return nullptr;
+ }
+
+ Expected<const uint8_t *> getDynamicData(uint64_t EntAddr, StringRef Name,
+ uint64_t Size = 0) {
+ Expected<const uint8_t *> SecPtr = ElfFile.toMappedAddr(EntAddr);
+ if (!SecPtr)
+ return appendToError(
+ SecPtr.takeError(),
+ ("when locating " + Name + " section contents").str());
+ Expected<const uint8_t *> SecEndPtr = ElfFile.toMappedAddr(EntAddr + Size);
+ if (!SecEndPtr)
+ return appendToError(
+ SecEndPtr.takeError(),
+ ("when locating " + Name + " section contents").str());
+ return *SecPtr;
+ }
+
+ const ELFFile<ELFT> &ElfFile;
+ const DynamicEntries &DynEnt;
+ Elf_Shdr_Range Shdrs;
+ const Elf_Shdr *DynSymHdr;
+};
} // end anonymous namespace
/// This function behaves similarly to StringRef::substr(), but attempts to
@@ -353,22 +436,6 @@ static Expected<StringRef> terminatedSubstr(StringRef Str, size_t Offset) {
return Str.substr(Offset, StrLen);
}
-/// This function takes an error, and appends a string of text to the end of
-/// that error. Since "appending" to an Error isn't supported behavior of an
-/// Error, this function technically creates a new error with the combined
-/// message and consumes the old error.
-///
-/// @param Err Source error.
-/// @param After Text to append at the end of Err's error message.
-Error appendToError(Error Err, StringRef After) {
- std::string Message;
- raw_string_ostream Stream(Message);
- Stream << Err;
- Stream << " " << After;
- consumeError(std::move(Err));
- return createError(Stream.str());
-}
-
/// This function populates a DynamicEntries struct using an ELFT::DynRange.
/// After populating the struct, the members are validated with
/// some basic correctness checks.
@@ -425,7 +492,7 @@ static Error populateDynamic(DynamicEntries &Dyn,
return createError(
"Couldn't locate dynamic symbol table (no DT_SYMTAB entry)");
}
- if (Dyn.SONameOffset.hasValue() && *Dyn.SONameOffset >= Dyn.StrSize) {
+ if (Dyn.SONameOffset && *Dyn.SONameOffset >= Dyn.StrSize) {
return createStringError(object_error::parse_failed,
"DT_SONAME string offset (0x%016" PRIx64
") outside of dynamic string table",
@@ -507,7 +574,6 @@ template <class ELFT>
static Expected<std::unique_ptr<IFSStub>>
buildStub(const ELFObjectFile<ELFT> &ElfObj) {
using Elf_Dyn_Range = typename ELFT::DynRange;
- using Elf_Phdr_Range = typename ELFT::PhdrRange;
using Elf_Sym_Range = typename ELFT::SymRange;
using Elf_Sym = typename ELFT::Sym;
std::unique_ptr<IFSStub> DestStub = std::make_unique<IFSStub>();
@@ -518,25 +584,19 @@ buildStub(const ELFObjectFile<ELFT> &ElfObj) {
return DynTable.takeError();
}
- // Fetch program headers.
- Expected<Elf_Phdr_Range> PHdrs = ElfFile.program_headers();
- if (!PHdrs) {
- return PHdrs.takeError();
- }
-
// Collect relevant .dynamic entries.
DynamicEntries DynEnt;
if (Error Err = populateDynamic<ELFT>(DynEnt, *DynTable))
return std::move(Err);
+ Expected<DynSym<ELFT>> EDynSym = DynSym<ELFT>::create(ElfFile, DynEnt);
+ if (!EDynSym)
+ return EDynSym.takeError();
- // Get pointer to in-memory location of .dynstr section.
- Expected<const uint8_t *> DynStrPtr = ElfFile.toMappedAddr(DynEnt.StrTabAddr);
- if (!DynStrPtr)
- return appendToError(DynStrPtr.takeError(),
- "when locating .dynstr section contents");
+ Expected<StringRef> EDynStr = EDynSym->getDynStr();
+ if (!EDynStr)
+ return EDynStr.takeError();
- StringRef DynStr(reinterpret_cast<const char *>(DynStrPtr.get()),
- DynEnt.StrSize);
+ StringRef DynStr = *EDynStr;
// Populate Arch from ELF header.
DestStub->Target.Arch = static_cast<IFSArch>(ElfFile.getHeader().e_machine);
@@ -547,7 +607,7 @@ buildStub(const ELFObjectFile<ELFT> &ElfObj) {
DestStub->Target.ObjectFormat = "ELF";
// Populate SoName from .dynamic entries and dynamic string table.
- if (DynEnt.SONameOffset.hasValue()) {
+ if (DynEnt.SONameOffset) {
Expected<StringRef> NameOrErr =
terminatedSubstr(DynStr, *DynEnt.SONameOffset);
if (!NameOrErr) {
@@ -572,8 +632,7 @@ buildStub(const ELFObjectFile<ELFT> &ElfObj) {
return SymCount.takeError();
if (*SymCount > 0) {
// Get pointer to in-memory location of .dynsym section.
- Expected<const uint8_t *> DynSymPtr =
- ElfFile.toMappedAddr(DynEnt.DynSymAddr);
+ Expected<const uint8_t *> DynSymPtr = EDynSym->getDynSym();
if (!DynSymPtr)
return appendToError(DynSymPtr.takeError(),
"when locating .dynsym section contents");
diff --git a/llvm/lib/InterfaceStub/IFSHandler.cpp b/llvm/lib/InterfaceStub/IFSHandler.cpp
index 4ccbb18ca04a..71189e79360e 100644
--- a/llvm/lib/InterfaceStub/IFSHandler.cpp
+++ b/llvm/lib/InterfaceStub/IFSHandler.cpp
@@ -7,14 +7,17 @@
//===-----------------------------------------------------------------------===/
#include "llvm/InterfaceStub/IFSHandler.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/InterfaceStub/IFSStub.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/GlobPattern.h"
#include "llvm/Support/LineIterator.h"
#include "llvm/Support/YAMLTraits.h"
+#include <functional>
using namespace llvm;
using namespace llvm::ifs;
@@ -115,11 +118,12 @@ template <> struct MappingTraits<IFSSymbol> {
IO.mapRequired("Type", Symbol.Type);
// The need for symbol size depends on the symbol type.
if (Symbol.Type == IFSSymbolType::NoType) {
- IO.mapOptional("Size", Symbol.Size, (uint64_t)0);
- } else if (Symbol.Type == IFSSymbolType::Func) {
- Symbol.Size = 0;
- } else {
- IO.mapRequired("Size", Symbol.Size);
+ // Size is None, so we are reading it in, or it is non 0 so we
+ // should emit it.
+ if (!Symbol.Size || *Symbol.Size)
+ IO.mapOptional("Size", Symbol.Size);
+ } else if (Symbol.Type != IFSSymbolType::Func) {
+ IO.mapOptional("Size", Symbol.Size);
}
IO.mapOptional("Undefined", Symbol.Undefined, false);
IO.mapOptional("Weak", Symbol.Weak, false);
@@ -189,7 +193,7 @@ Expected<std::unique_ptr<IFSStub>> ifs::readIFSFromBuffer(StringRef Buf) {
std::make_error_code(std::errc::invalid_argument));
if (Stub->Target.ArchString) {
Stub->Target.Arch =
- ELF::convertArchNameToEMachine(Stub->Target.ArchString.getValue());
+ ELF::convertArchNameToEMachine(*Stub->Target.ArchString);
}
return std::move(Stub);
}
@@ -262,7 +266,7 @@ Error ifs::validateIFSTarget(IFSStub &Stub, bool ParseTriple) {
ValidationEC);
}
if (ParseTriple) {
- IFSTarget TargetFromTriple = parseTriple(Stub.Target.Triple.getValue());
+ IFSTarget TargetFromTriple = parseTriple(*Stub.Target.Triple);
Stub.Target.Arch = TargetFromTriple.Arch;
Stub.Target.BitWidth = TargetFromTriple.BitWidth;
Stub.Target.Endianness = TargetFromTriple.Endianness;
@@ -328,12 +332,28 @@ void ifs::stripIFSTarget(IFSStub &Stub, bool StripTriple, bool StripArch,
}
}
-void ifs::stripIFSUndefinedSymbols(IFSStub &Stub) {
- for (auto Iter = Stub.Symbols.begin(); Iter != Stub.Symbols.end();) {
- if (Iter->Undefined) {
- Iter = Stub.Symbols.erase(Iter);
- } else {
- Iter++;
- }
+Error ifs::filterIFSSyms(IFSStub &Stub, bool StripUndefined,
+ const std::vector<std::string> &Exclude) {
+ std::function<bool(const IFSSymbol &)> Filter = [](const IFSSymbol &) {
+ return false;
+ };
+
+ if (StripUndefined) {
+ Filter = [Filter](const IFSSymbol &Sym) {
+ return Sym.Undefined || Filter(Sym);
+ };
+ }
+
+ for (StringRef Glob : Exclude) {
+ Expected<llvm::GlobPattern> PatternOrErr = llvm::GlobPattern::create(Glob);
+ if (!PatternOrErr)
+ return PatternOrErr.takeError();
+ Filter = [Pattern = *PatternOrErr, Filter](const IFSSymbol &Sym) {
+ return Pattern.match(Sym.Name) || Filter(Sym);
+ };
}
+
+ llvm::erase_if(Stub.Symbols, Filter);
+
+ return Error::success();
}
diff --git a/llvm/lib/InterfaceStub/IFSStub.cpp b/llvm/lib/InterfaceStub/IFSStub.cpp
index 1ce7a66869b8..f043f7e9e383 100644
--- a/llvm/lib/InterfaceStub/IFSStub.cpp
+++ b/llvm/lib/InterfaceStub/IFSStub.cpp
@@ -8,7 +8,7 @@
#include "llvm/InterfaceStub/IFSStub.h"
#include "llvm/BinaryFormat/ELF.h"
-#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
using namespace llvm::ifs;
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 418aad26fdd6..a9e04ba760ca 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -134,7 +134,6 @@ void llvm::computeLTOCacheKey(
AddUnsigned(Conf.CGOptLevel);
AddUnsigned(Conf.CGFileType);
AddUnsigned(Conf.OptLevel);
- AddUnsigned(Conf.UseNewPM);
AddUnsigned(Conf.Freestanding);
AddString(Conf.OptPipeline);
AddString(Conf.AAPipeline);
@@ -640,11 +639,11 @@ Error LTO::addModule(InputFile &Input, unsigned ModI,
if (!LTOInfo)
return LTOInfo.takeError();
- if (EnableSplitLTOUnit.hasValue()) {
+ if (EnableSplitLTOUnit) {
// If only some modules were split, flag this in the index so that
// we can skip or error on optimizations that need consistently split
// modules (whole program devirt and lower type tests).
- if (EnableSplitLTOUnit.getValue() != LTOInfo->EnableSplitLTOUnit)
+ if (*EnableSplitLTOUnit != LTOInfo->EnableSplitLTOUnit)
ThinLTO.CombinedIndex.setPartiallySplitLTOUnits();
} else
EnableSplitLTOUnit = LTOInfo->EnableSplitLTOUnit;
@@ -820,9 +819,10 @@ LTO::addRegularLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
// For now they aren't reported correctly by ModuleSymbolTable.
auto &CommonRes = RegularLTO.Commons[std::string(Sym.getIRName())];
CommonRes.Size = std::max(CommonRes.Size, Sym.getCommonSize());
- MaybeAlign SymAlign(Sym.getCommonAlignment());
- if (SymAlign)
- CommonRes.Align = max(*SymAlign, CommonRes.Align);
+ if (uint32_t SymAlignValue = Sym.getCommonAlignment()) {
+ const Align SymAlign(SymAlignValue);
+ CommonRes.Align = std::max(SymAlign, CommonRes.Align.valueOrOne());
+ }
CommonRes.Prevailing |= Res.Prevailing;
}
}
@@ -885,8 +885,7 @@ Error LTO::linkRegularLTO(RegularLTOState::AddedModule Mod,
Keep.push_back(GV);
}
- return RegularLTO.Mover->move(std::move(Mod.M), Keep,
- [](GlobalValue &, IRMover::ValueAdder) {},
+ return RegularLTO.Mover->move(std::move(Mod.M), Keep, nullptr,
/* IsPerformingImport */ false);
}
@@ -1162,14 +1161,18 @@ protected:
const Config &Conf;
ModuleSummaryIndex &CombinedIndex;
const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries;
+ lto::IndexWriteCallback OnWrite;
+ bool ShouldEmitImportsFiles;
public:
ThinBackendProc(const Config &Conf, ModuleSummaryIndex &CombinedIndex,
- const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries)
+ const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
+ lto::IndexWriteCallback OnWrite, bool ShouldEmitImportsFiles)
: Conf(Conf), CombinedIndex(CombinedIndex),
- ModuleToDefinedGVSummaries(ModuleToDefinedGVSummaries) {}
+ ModuleToDefinedGVSummaries(ModuleToDefinedGVSummaries),
+ OnWrite(OnWrite), ShouldEmitImportsFiles(ShouldEmitImportsFiles) {}
- virtual ~ThinBackendProc() {}
+ virtual ~ThinBackendProc() = default;
virtual Error start(
unsigned Task, BitcodeModule BM,
const FunctionImporter::ImportMapTy &ImportList,
@@ -1178,6 +1181,30 @@ public:
MapVector<StringRef, BitcodeModule> &ModuleMap) = 0;
virtual Error wait() = 0;
virtual unsigned getThreadCount() = 0;
+
+ // Write sharded indices and (optionally) imports to disk
+ Error emitFiles(const FunctionImporter::ImportMapTy &ImportList,
+ llvm::StringRef ModulePath,
+ const std::string &NewModulePath) {
+ std::map<std::string, GVSummaryMapTy> ModuleToSummariesForIndex;
+ std::error_code EC;
+ gatherImportedSummariesForModule(ModulePath, ModuleToDefinedGVSummaries,
+ ImportList, ModuleToSummariesForIndex);
+
+ raw_fd_ostream OS(NewModulePath + ".thinlto.bc", EC,
+ sys::fs::OpenFlags::OF_None);
+ if (EC)
+ return errorCodeToError(EC);
+ writeIndexToFile(CombinedIndex, OS, &ModuleToSummariesForIndex);
+
+ if (ShouldEmitImportsFiles) {
+ EC = EmitImportsFiles(ModulePath, NewModulePath + ".imports",
+ ModuleToSummariesForIndex);
+ if (EC)
+ return errorCodeToError(EC);
+ }
+ return Error::success();
+ }
};
namespace {
@@ -1191,15 +1218,19 @@ class InProcessThinBackend : public ThinBackendProc {
Optional<Error> Err;
std::mutex ErrMu;
+ bool ShouldEmitIndexFiles;
+
public:
InProcessThinBackend(
const Config &Conf, ModuleSummaryIndex &CombinedIndex,
ThreadPoolStrategy ThinLTOParallelism,
const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
- AddStreamFn AddStream, FileCache Cache)
- : ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries),
+ AddStreamFn AddStream, FileCache Cache, lto::IndexWriteCallback OnWrite,
+ bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles)
+ : ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries,
+ OnWrite, ShouldEmitImportsFiles),
BackendThreadPool(ThinLTOParallelism), AddStream(std::move(AddStream)),
- Cache(std::move(Cache)) {
+ Cache(std::move(Cache)), ShouldEmitIndexFiles(ShouldEmitIndexFiles) {
for (auto &Name : CombinedIndex.cfiFunctionDefs())
CfiFunctionDefs.insert(
GlobalValue::getGUID(GlobalValue::dropLLVMManglingEscape(Name)));
@@ -1228,6 +1259,11 @@ public:
auto ModuleID = BM.getModuleIdentifier();
+ if (ShouldEmitIndexFiles) {
+ if (auto E = emitFiles(ImportList, ModuleID, ModuleID.str()))
+ return E;
+ }
+
if (!Cache || !CombinedIndex.modulePaths().count(ModuleID) ||
all_of(CombinedIndex.getModuleHash(ModuleID),
[](uint32_t V) { return V == 0; }))
@@ -1286,6 +1322,9 @@ public:
},
BM, std::ref(CombinedIndex), std::ref(ImportList), std::ref(ExportList),
std::ref(ResolvedODR), std::ref(DefinedGlobals), std::ref(ModuleMap));
+
+ if (OnWrite)
+ OnWrite(std::string(ModulePath));
return Error::success();
}
@@ -1303,13 +1342,16 @@ public:
};
} // end anonymous namespace
-ThinBackend lto::createInProcessThinBackend(ThreadPoolStrategy Parallelism) {
+ThinBackend lto::createInProcessThinBackend(ThreadPoolStrategy Parallelism,
+ lto::IndexWriteCallback OnWrite,
+ bool ShouldEmitIndexFiles,
+ bool ShouldEmitImportsFiles) {
return [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex,
const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
AddStreamFn AddStream, FileCache Cache) {
return std::make_unique<InProcessThinBackend>(
Conf, CombinedIndex, Parallelism, ModuleToDefinedGVSummaries, AddStream,
- Cache);
+ Cache, OnWrite, ShouldEmitIndexFiles, ShouldEmitImportsFiles);
};
}
@@ -1336,9 +1378,7 @@ std::string lto::getThinLTOOutputFile(const std::string &Path,
namespace {
class WriteIndexesThinBackend : public ThinBackendProc {
std::string OldPrefix, NewPrefix;
- bool ShouldEmitImportsFiles;
raw_fd_ostream *LinkedObjectsFile;
- lto::IndexWriteCallback OnWrite;
public:
WriteIndexesThinBackend(
@@ -1346,10 +1386,10 @@ public:
const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
std::string OldPrefix, std::string NewPrefix, bool ShouldEmitImportsFiles,
raw_fd_ostream *LinkedObjectsFile, lto::IndexWriteCallback OnWrite)
- : ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries),
+ : ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries,
+ OnWrite, ShouldEmitImportsFiles),
OldPrefix(OldPrefix), NewPrefix(NewPrefix),
- ShouldEmitImportsFiles(ShouldEmitImportsFiles),
- LinkedObjectsFile(LinkedObjectsFile), OnWrite(OnWrite) {}
+ LinkedObjectsFile(LinkedObjectsFile) {}
Error start(
unsigned Task, BitcodeModule BM,
@@ -1364,23 +1404,8 @@ public:
if (LinkedObjectsFile)
*LinkedObjectsFile << NewModulePath << '\n';
- std::map<std::string, GVSummaryMapTy> ModuleToSummariesForIndex;
- gatherImportedSummariesForModule(ModulePath, ModuleToDefinedGVSummaries,
- ImportList, ModuleToSummariesForIndex);
-
- std::error_code EC;
- raw_fd_ostream OS(NewModulePath + ".thinlto.bc", EC,
- sys::fs::OpenFlags::OF_None);
- if (EC)
- return errorCodeToError(EC);
- writeIndexToFile(CombinedIndex, OS, &ModuleToSummariesForIndex);
-
- if (ShouldEmitImportsFiles) {
- EC = EmitImportsFiles(ModulePath, NewModulePath + ".imports",
- ModuleToSummariesForIndex);
- if (EC)
- return errorCodeToError(EC);
- }
+ if (auto E = emitFiles(ImportList, ModulePath, NewModulePath))
+ return E;
if (OnWrite)
OnWrite(std::string(ModulePath));
@@ -1621,9 +1646,8 @@ lto::setupStatsFile(StringRef StatsFilename) {
// is to sort them per size so that the largest module get schedule as soon as
// possible. This is purely a compile-time optimization.
std::vector<int> lto::generateModulesOrdering(ArrayRef<BitcodeModule *> R) {
- std::vector<int> ModulesOrdering;
- ModulesOrdering.resize(R.size());
- std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0);
+ auto Seq = llvm::seq<int>(0, R.size());
+ std::vector<int> ModulesOrdering(Seq.begin(), Seq.end());
llvm::sort(ModulesOrdering, [&](int LeftIndex, int RightIndex) {
auto LSize = R[LeftIndex]->getBuffer().size();
auto RSize = R[RightIndex]->getBuffer().size();
diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp
index 3877def53c3f..5d50e92ae377 100644
--- a/llvm/lib/LTO/LTOBackend.cpp
+++ b/llvm/lib/LTO/LTOBackend.cpp
@@ -18,7 +18,6 @@
#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/IR/LLVMRemarkStreamer.h"
@@ -41,8 +40,6 @@
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Transforms/IPO.h"
-#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/FunctionImportUtils.h"
#include "llvm/Transforms/Utils/SplitModule.h"
@@ -298,6 +295,8 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM,
report_fatal_error(Twine("unable to parse pass pipeline description '") +
Conf.OptPipeline + "': " + toString(std::move(Err)));
}
+ } else if (Conf.UseDefaultPipeline) {
+ MPM.addPass(PB.buildPerModuleDefaultPipeline(OL));
} else if (IsThinLTO) {
MPM.addPass(PB.buildThinLTODefaultPipeline(OL, ImportSummary));
} else {
@@ -310,39 +309,6 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM,
MPM.run(Mod, MAM);
}
-static void runOldPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM,
- bool IsThinLTO, ModuleSummaryIndex *ExportSummary,
- const ModuleSummaryIndex *ImportSummary) {
- legacy::PassManager passes;
- passes.add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
-
- PassManagerBuilder PMB;
- PMB.LibraryInfo = new TargetLibraryInfoImpl(Triple(TM->getTargetTriple()));
- if (Conf.Freestanding)
- PMB.LibraryInfo->disableAllFunctions();
- PMB.Inliner = createFunctionInliningPass();
- PMB.ExportSummary = ExportSummary;
- PMB.ImportSummary = ImportSummary;
- // Unconditionally verify input since it is not verified before this
- // point and has unknown origin.
- PMB.VerifyInput = true;
- PMB.VerifyOutput = !Conf.DisableVerify;
- PMB.LoopVectorize = true;
- PMB.SLPVectorize = true;
- PMB.OptLevel = Conf.OptLevel;
- PMB.PGOSampleUse = Conf.SampleProfile;
- PMB.EnablePGOCSInstrGen = Conf.RunCSIRInstr;
- if (!Conf.RunCSIRInstr && !Conf.CSIRProfile.empty()) {
- PMB.EnablePGOCSInstrUse = true;
- PMB.PGOInstrUse = Conf.CSIRProfile;
- }
- if (IsThinLTO)
- PMB.populateThinLTOPassManager(passes);
- else
- PMB.populateLTOPassManager(passes);
- passes.run(Mod);
-}
-
bool lto::opt(const Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod,
bool IsThinLTO, ModuleSummaryIndex *ExportSummary,
const ModuleSummaryIndex *ImportSummary,
@@ -365,12 +331,8 @@ bool lto::opt(const Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod,
/*Cmdline*/ CmdArgs);
}
// FIXME: Plumb the combined index into the new pass manager.
- if (Conf.UseNewPM || !Conf.OptPipeline.empty()) {
- runNewPMPasses(Conf, Mod, TM, Conf.OptLevel, IsThinLTO, ExportSummary,
- ImportSummary);
- } else {
- runOldPMPasses(Conf, Mod, TM, IsThinLTO, ExportSummary, ImportSummary);
- }
+ runNewPMPasses(Conf, Mod, TM, Conf.OptLevel, IsThinLTO, ExportSummary,
+ ImportSummary);
return !Conf.PostOptModuleHook || Conf.PostOptModuleHook(Task, Mod);
}
diff --git a/llvm/lib/LTO/LTOCodeGenerator.cpp b/llvm/lib/LTO/LTOCodeGenerator.cpp
index fdc9896aca78..2abf249cbd62 100644
--- a/llvm/lib/LTO/LTOCodeGenerator.cpp
+++ b/llvm/lib/LTO/LTOCodeGenerator.cpp
@@ -66,11 +66,7 @@
using namespace llvm;
const char* LTOCodeGenerator::getVersionString() {
-#ifdef LLVM_VERSION_INFO
- return PACKAGE_NAME " version " PACKAGE_VERSION ", " LLVM_VERSION_INFO;
-#else
return PACKAGE_NAME " version " PACKAGE_VERSION;
-#endif
}
namespace llvm {
@@ -132,7 +128,7 @@ LTOCodeGenerator::LTOCodeGenerator(LLVMContext &Context)
};
}
-LTOCodeGenerator::~LTOCodeGenerator() {}
+LTOCodeGenerator::~LTOCodeGenerator() = default;
void LTOCodeGenerator::setAsmUndefinedRefs(LTOModule *Mod) {
for (const StringRef &Undef : Mod->getAsmUndefinedRefs())
diff --git a/llvm/lib/LTO/LTOModule.cpp b/llvm/lib/LTO/LTOModule.cpp
index 4cc1b307c553..5ad5e857296d 100644
--- a/llvm/lib/LTO/LTOModule.cpp
+++ b/llvm/lib/LTO/LTOModule.cpp
@@ -50,7 +50,7 @@ LTOModule::LTOModule(std::unique_ptr<Module> M, MemoryBufferRef MBRef,
SymTab.addModule(Mod.get());
}
-LTOModule::~LTOModule() {}
+LTOModule::~LTOModule() = default;
/// isBitcodeFile - Returns 'true' if the file (or memory contents) is LLVM
/// bitcode.
diff --git a/llvm/lib/LTO/SummaryBasedOptimizations.cpp b/llvm/lib/LTO/SummaryBasedOptimizations.cpp
index 9e9d5c84d50d..bd3565771c29 100644
--- a/llvm/lib/LTO/SummaryBasedOptimizations.cpp
+++ b/llvm/lib/LTO/SummaryBasedOptimizations.cpp
@@ -55,7 +55,7 @@ void llvm::computeSyntheticCounts(ModuleSummaryIndex &Index) {
};
auto GetEntryCount = [](ValueInfo V) {
if (V.getSummaryList().size()) {
- auto S = V.getSummaryList().front().get()->getBaseObject();
+ auto S = V.getSummaryList().front()->getBaseObject();
auto *F = cast<FunctionSummary>(S);
return F->entryCount();
} else {
diff --git a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
index 37e85b6af6ba..a1041b3c85f5 100644
--- a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -21,16 +21,15 @@
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/Bitcode/BitcodeWriterPass.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/LLVMRemarkStreamer.h"
-#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Mangler.h"
#include "llvm/IR/PassTimingInfo.h"
#include "llvm/IR/Verifier.h"
@@ -54,11 +53,9 @@
#include "llvm/Support/Threading.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/Transforms/IPO/FunctionImport.h"
#include "llvm/Transforms/IPO/Internalize.h"
-#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
#include "llvm/Transforms/ObjCARC.h"
#include "llvm/Transforms/Utils/FunctionImportUtils.h"
@@ -239,38 +236,7 @@ crossImportIntoModule(Module &TheModule, const ModuleSummaryIndex &Index,
static void optimizeModule(Module &TheModule, TargetMachine &TM,
unsigned OptLevel, bool Freestanding,
- ModuleSummaryIndex *Index) {
- // Populate the PassManager
- PassManagerBuilder PMB;
- PMB.LibraryInfo = new TargetLibraryInfoImpl(TM.getTargetTriple());
- if (Freestanding)
- PMB.LibraryInfo->disableAllFunctions();
- PMB.Inliner = createFunctionInliningPass();
- // FIXME: should get it from the bitcode?
- PMB.OptLevel = OptLevel;
- PMB.LoopVectorize = true;
- PMB.SLPVectorize = true;
- // Already did this in verifyLoadedModule().
- PMB.VerifyInput = false;
- PMB.VerifyOutput = false;
- PMB.ImportSummary = Index;
-
- legacy::PassManager PM;
-
- // Add the TTI (required to inform the vectorizer about register size for
- // instance)
- PM.add(createTargetTransformInfoWrapperPass(TM.getTargetIRAnalysis()));
-
- // Add optimizations
- PMB.populateThinLTOPassManager(PM);
-
- PM.run(TheModule);
-}
-
-static void optimizeModuleNewPM(Module &TheModule, TargetMachine &TM,
- unsigned OptLevel, bool Freestanding,
- bool DebugPassManager,
- ModuleSummaryIndex *Index) {
+ bool DebugPassManager, ModuleSummaryIndex *Index) {
Optional<PGOOptions> PGOOpt;
LoopAnalysisManager LAM;
FunctionAnalysisManager FAM;
@@ -485,7 +451,7 @@ ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index,
const ThinLTOCodeGenerator::CachingOptions &CacheOptions,
bool DisableCodeGen, StringRef SaveTempsDir,
bool Freestanding, unsigned OptLevel, unsigned count,
- bool UseNewPM, bool DebugPassManager) {
+ bool DebugPassManager) {
// "Benchmark"-like optimization: single-source case
bool SingleModule = (ModuleMap.size() == 1);
@@ -525,11 +491,8 @@ ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index,
saveTempBitcode(TheModule, SaveTempsDir, count, ".3.imported.bc");
}
- if (UseNewPM)
- optimizeModuleNewPM(TheModule, TM, OptLevel, Freestanding, DebugPassManager,
- &Index);
- else
- optimizeModule(TheModule, TM, OptLevel, Freestanding, &Index);
+ optimizeModule(TheModule, TM, OptLevel, Freestanding, DebugPassManager,
+ &Index);
saveTempBitcode(TheModule, SaveTempsDir, count, ".4.opt.bc");
@@ -953,7 +916,7 @@ void ThinLTOCodeGenerator::optimize(Module &TheModule) {
// Optimize now
optimizeModule(TheModule, *TMBuilder.create(), OptLevel, Freestanding,
- nullptr);
+ DebugPassManager, nullptr);
}
/// Write out the generated object file, either from CacheEntryPath or from
@@ -1216,7 +1179,7 @@ void ThinLTOCodeGenerator::run() {
ExportList, GUIDPreservedSymbols,
ModuleToDefinedGVSummaries[ModuleIdentifier], CacheOptions,
DisableCodeGen, SaveTempsDir, Freestanding, OptLevel, count,
- UseNewPM, DebugPassManager);
+ DebugPassManager);
// Commit to the cache (if enabled)
CacheEntry.write(*OutputBuffer);
diff --git a/llvm/lib/LineEditor/LineEditor.cpp b/llvm/lib/LineEditor/LineEditor.cpp
index 37c4b79f8e29..09ec65a1d9c9 100644
--- a/llvm/lib/LineEditor/LineEditor.cpp
+++ b/llvm/lib/LineEditor/LineEditor.cpp
@@ -29,8 +29,8 @@ std::string LineEditor::getDefaultHistoryPath(StringRef ProgName) {
return std::string();
}
-LineEditor::CompleterConcept::~CompleterConcept() {}
-LineEditor::ListCompleterConcept::~ListCompleterConcept() {}
+LineEditor::CompleterConcept::~CompleterConcept() = default;
+LineEditor::ListCompleterConcept::~ListCompleterConcept() = default;
std::string LineEditor::ListCompleterConcept::getCommonPrefix(
const std::vector<Completion> &Comps) {
diff --git a/llvm/lib/Linker/IRMover.cpp b/llvm/lib/Linker/IRMover.cpp
index b475ea81d107..5a819e2d736c 100644
--- a/llvm/lib/Linker/IRMover.cpp
+++ b/llvm/lib/Linker/IRMover.cpp
@@ -9,19 +9,24 @@
#include "llvm/Linker/IRMover.h"
#include "LinkDiagnosticInfo.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/IR/AutoUpgrade.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/GVMaterializer.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/PseudoProbe.h"
#include "llvm/IR/TypeFinder.h"
#include "llvm/Object/ModuleSymbolTable.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/Path.h"
-#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
#include <utility>
using namespace llvm;
@@ -381,7 +386,7 @@ class IRLinker {
std::unique_ptr<Module> SrcM;
/// See IRMover::move().
- std::function<void(GlobalValue &, IRMover::ValueAdder)> AddLazyFor;
+ IRMover::LazyCallback AddLazyFor;
TypeMapTy TypeMap;
GlobalValueMaterializer GValMaterializer;
@@ -524,8 +529,7 @@ public:
IRLinker(Module &DstM, MDMapT &SharedMDs,
IRMover::IdentifiedStructTypeSet &Set, std::unique_ptr<Module> SrcM,
ArrayRef<GlobalValue *> ValuesToLink,
- std::function<void(GlobalValue &, IRMover::ValueAdder)> AddLazyFor,
- bool IsPerformingImport)
+ IRMover::LazyCallback AddLazyFor, bool IsPerformingImport)
: DstM(DstM), SrcM(std::move(SrcM)), AddLazyFor(std::move(AddLazyFor)),
TypeMap(Set), GValMaterializer(*this), LValMaterializer(*this),
SharedMDs(SharedMDs), IsPerformingImport(IsPerformingImport),
@@ -987,10 +991,11 @@ bool IRLinker::shouldLink(GlobalValue *DGV, GlobalValue &SGV) {
// Callback to the client to give a chance to lazily add the Global to the
// list of value to link.
bool LazilyAdded = false;
- AddLazyFor(SGV, [this, &LazilyAdded](GlobalValue &GV) {
- maybeAdd(&GV);
- LazilyAdded = true;
- });
+ if (AddLazyFor)
+ AddLazyFor(SGV, [this, &LazilyAdded](GlobalValue &GV) {
+ maybeAdd(&GV);
+ LazilyAdded = true;
+ });
return LazilyAdded;
}
@@ -1041,7 +1046,7 @@ Expected<Constant *> IRLinker::linkGlobalValueProto(GlobalValue *SGV,
if (Function *F = dyn_cast<Function>(NewGV))
if (auto Remangled = Intrinsic::remangleIntrinsicFunction(F)) {
NewGV->eraseFromParent();
- NewGV = Remangled.getValue();
+ NewGV = *Remangled;
NeedsRenaming = false;
}
@@ -1229,8 +1234,15 @@ void IRLinker::linkNamedMDNodes() {
continue;
// Don't import pseudo probe descriptors here for thinLTO. They will be
// emitted by the originating module.
- if (IsPerformingImport && NMD.getName() == PseudoProbeDescMetadataName)
+ if (IsPerformingImport && NMD.getName() == PseudoProbeDescMetadataName) {
+ if (!DstM.getNamedMetadata(NMD.getName()))
+ emitWarning("Pseudo-probe ignored: source module '" +
+ SrcM->getModuleIdentifier() +
+ "' is compiled with -fpseudo-probe-for-profiling while "
+ "destination module '" +
+ DstM.getModuleIdentifier() + "' is not\n");
continue;
+ }
NamedMDNode *DestNMD = DstM.getOrInsertNamedMetadata(NMD.getName());
// Add Src elements into Dest node.
for (const MDNode *Op : NMD.operands())
@@ -1245,6 +1257,9 @@ Error IRLinker::linkModuleFlagsMetadata() {
if (!SrcModFlags)
return Error::success();
+ // Check for module flag for updates before do anything.
+ UpgradeModuleFlags(*SrcM);
+
// If the destination module doesn't have module flags yet, then just copy
// over the source module's flags.
NamedMDNode *DstModFlags = DstM.getOrInsertModuleFlagsMetadata();
@@ -1327,11 +1342,15 @@ Error IRLinker::linkModuleFlagsMetadata() {
// Diagnose inconsistent merge behavior types.
if (SrcBehaviorValue != DstBehaviorValue) {
+ bool MinAndWarn = (SrcBehaviorValue == Module::Min &&
+ DstBehaviorValue == Module::Warning) ||
+ (DstBehaviorValue == Module::Min &&
+ SrcBehaviorValue == Module::Warning);
bool MaxAndWarn = (SrcBehaviorValue == Module::Max &&
DstBehaviorValue == Module::Warning) ||
(DstBehaviorValue == Module::Max &&
SrcBehaviorValue == Module::Warning);
- if (!MaxAndWarn)
+ if (!(MaxAndWarn || MinAndWarn))
return stringErr("linking module flags '" + ID->getString() +
"': IDs have conflicting behaviors in '" +
SrcM->getModuleIdentifier() + "' and '" +
@@ -1360,6 +1379,25 @@ Error IRLinker::linkModuleFlagsMetadata() {
emitWarning(Str);
}
+ // Choose the minimum if either source or destination request Min behavior.
+ if (DstBehaviorValue == Module::Min || SrcBehaviorValue == Module::Min) {
+ ConstantInt *DstValue =
+ mdconst::extract<ConstantInt>(DstOp->getOperand(2));
+ ConstantInt *SrcValue =
+ mdconst::extract<ConstantInt>(SrcOp->getOperand(2));
+
+ // The resulting flag should have a Min behavior, and contain the minimum
+ // value from between the source and destination values.
+ Metadata *FlagOps[] = {
+ (DstBehaviorValue != Module::Min ? SrcOp : DstOp)->getOperand(0), ID,
+ (SrcValue->getZExtValue() < DstValue->getZExtValue() ? SrcOp : DstOp)
+ ->getOperand(2)};
+ MDNode *Flag = MDNode::get(DstM.getContext(), FlagOps);
+ DstModFlags->setOperand(DstIndex, Flag);
+ Flags[ID].first = Flag;
+ continue;
+ }
+
// Choose the maximum if either source or destination request Max behavior.
if (DstBehaviorValue == Module::Max || SrcBehaviorValue == Module::Max) {
ConstantInt *DstValue =
@@ -1673,10 +1711,9 @@ IRMover::IRMover(Module &M) : Composite(M) {
}
}
-Error IRMover::move(
- std::unique_ptr<Module> Src, ArrayRef<GlobalValue *> ValuesToLink,
- std::function<void(GlobalValue &, ValueAdder Add)> AddLazyFor,
- bool IsPerformingImport) {
+Error IRMover::move(std::unique_ptr<Module> Src,
+ ArrayRef<GlobalValue *> ValuesToLink,
+ LazyCallback AddLazyFor, bool IsPerformingImport) {
IRLinker TheIRLinker(Composite, SharedMDs, IdentifiedStructTypes,
std::move(Src), ValuesToLink, std::move(AddLazyFor),
IsPerformingImport);
diff --git a/llvm/lib/Linker/LinkModules.cpp b/llvm/lib/Linker/LinkModules.cpp
index f9f51bf17d95..17c3f09a23b7 100644
--- a/llvm/lib/Linker/LinkModules.cpp
+++ b/llvm/lib/Linker/LinkModules.cpp
@@ -14,7 +14,6 @@
#include "llvm-c/Linker.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/IR/Comdat.h"
-#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
@@ -573,11 +572,13 @@ bool ModuleLinker::run() {
// FIXME: Propagate Errors through to the caller instead of emitting
// diagnostics.
bool HasErrors = false;
- if (Error E = Mover.move(std::move(SrcM), ValuesToLink.getArrayRef(),
- [this](GlobalValue &GV, IRMover::ValueAdder Add) {
- addLazyFor(GV, Add);
- },
- /* IsPerformingImport */ false)) {
+ if (Error E =
+ Mover.move(std::move(SrcM), ValuesToLink.getArrayRef(),
+ IRMover::LazyCallback(
+ [this](GlobalValue &GV, IRMover::ValueAdder Add) {
+ addLazyFor(GV, Add);
+ }),
+ /* IsPerformingImport */ false)) {
handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) {
DstM.getContext().diagnose(LinkDiagnosticInfo(DS_Error, EIB.message()));
HasErrors = true;
diff --git a/llvm/lib/MC/ConstantPools.cpp b/llvm/lib/MC/ConstantPools.cpp
index d8a08a4bd439..c3ab88b94476 100644
--- a/llvm/lib/MC/ConstantPools.cpp
+++ b/llvm/lib/MC/ConstantPools.cpp
@@ -39,25 +39,38 @@ void ConstantPool::emitEntries(MCStreamer &Streamer) {
const MCExpr *ConstantPool::addEntry(const MCExpr *Value, MCContext &Context,
unsigned Size, SMLoc Loc) {
const MCConstantExpr *C = dyn_cast<MCConstantExpr>(Value);
+ const MCSymbolRefExpr *S = dyn_cast<MCSymbolRefExpr>(Value);
// Check if there is existing entry for the same constant. If so, reuse it.
- auto Itr = C ? CachedEntries.find(C->getValue()) : CachedEntries.end();
- if (Itr != CachedEntries.end())
- return Itr->second;
+ if (C) {
+ auto CItr = CachedConstantEntries.find(C->getValue());
+ if (CItr != CachedConstantEntries.end())
+ return CItr->second;
+ }
+
+ // Check if there is existing entry for the same symbol. If so, reuse it.
+ if (S) {
+ auto SItr = CachedSymbolEntries.find(&(S->getSymbol()));
+ if (SItr != CachedSymbolEntries.end())
+ return SItr->second;
+ }
MCSymbol *CPEntryLabel = Context.createTempSymbol();
Entries.push_back(ConstantPoolEntry(CPEntryLabel, Value, Size, Loc));
const auto SymRef = MCSymbolRefExpr::create(CPEntryLabel, Context);
if (C)
- CachedEntries[C->getValue()] = SymRef;
+ CachedConstantEntries[C->getValue()] = SymRef;
+ if (S)
+ CachedSymbolEntries[&(S->getSymbol())] = SymRef;
return SymRef;
}
bool ConstantPool::empty() { return Entries.empty(); }
void ConstantPool::clearCache() {
- CachedEntries.clear();
+ CachedConstantEntries.clear();
+ CachedSymbolEntries.clear();
}
//
@@ -79,7 +92,7 @@ AssemblerConstantPools::getOrCreateConstantPool(MCSection *Section) {
static void emitConstantPool(MCStreamer &Streamer, MCSection *Section,
ConstantPool &CP) {
if (!CP.empty()) {
- Streamer.SwitchSection(Section);
+ Streamer.switchSection(Section);
CP.emitEntries(Streamer);
}
}
diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp
index 883735fcc293..eda495693595 100644
--- a/llvm/lib/MC/ELFObjectWriter.cpp
+++ b/llvm/lib/MC/ELFObjectWriter.cpp
@@ -13,10 +13,10 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/iterator.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -28,18 +28,18 @@
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCFragment.h"
-#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/MCValue.h"
#include "llvm/MC/StringTableBuilder.h"
#include "llvm/Support/Alignment.h"
-#include "llvm/Support/Allocator.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compression.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
@@ -47,8 +47,6 @@
#include "llvm/Support/LEB128.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SMLoc.h"
-#include "llvm/Support/StringSaver.h"
-#include "llvm/Support/SwapByteOrder.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
@@ -223,8 +221,6 @@ class ELFObjectWriter : public MCObjectWriter {
DenseMap<const MCSymbolELF *, const MCSymbolELF *> Renames;
bool SeenGnuAbi = false;
- bool EmitAddrsigSection = false;
- std::vector<const MCSymbol *> AddrsigSyms;
bool hasRelocationAddend() const;
@@ -264,10 +260,6 @@ public:
void markGnuAbi() override { SeenGnuAbi = true; }
bool seenGnuAbi() const { return SeenGnuAbi; }
- void emitAddrsigSection() override { EmitAddrsigSection = true; }
- void addAddrsigSymbol(const MCSymbol *Sym) override {
- AddrsigSyms.push_back(Sym);
- }
friend struct ELFWriter;
};
@@ -549,9 +541,27 @@ void ELFWriter::writeSymbol(SymbolTableWriter &Writer, uint32_t StringIndex,
uint64_t Size = 0;
const MCExpr *ESize = MSD.Symbol->getSize();
- if (!ESize && Base)
+ if (!ESize && Base) {
+ // For expressions like .set y, x+1, if y's size is unset, inherit from x.
ESize = Base->getSize();
+ // For `.size x, 2; y = x; .size y, 1; z = y; z1 = z; .symver y, y@v1`, z,
+ // z1, and y@v1's st_size equals y's. However, `Base` is `x` which will give
+ // us 2. Follow the MCSymbolRefExpr assignment chain, which covers most
+ // needs. MCBinaryExpr is not handled.
+ const MCSymbolELF *Sym = &Symbol;
+ while (Sym->isVariable()) {
+ if (auto *Expr =
+ dyn_cast<MCSymbolRefExpr>(Sym->getVariableValue(false))) {
+ Sym = cast<MCSymbolELF>(&Expr->getSymbol());
+ if (!Sym->getSize())
+ continue;
+ ESize = Sym->getSize();
+ }
+ break;
+ }
+ }
+
if (ESize) {
int64_t Res;
if (!ESize->evaluateKnownAbsolute(Res, Layout))
@@ -850,13 +860,9 @@ void ELFWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec,
auto &MC = Asm.getContext();
const auto &MAI = MC.getAsmInfo();
- // Compressing debug_frame requires handling alignment fragments which is
- // more work (possibly generalizing MCAssembler.cpp:writeFragment to allow
- // for writing to arbitrary buffers) for little benefit.
bool CompressionEnabled =
MAI->compressDebugSections() != DebugCompressionType::None;
- if (!CompressionEnabled || !SectionName.startswith(".debug_") ||
- SectionName == ".debug_frame") {
+ if (!CompressionEnabled || !SectionName.startswith(".debug_")) {
Asm.writeSectionData(W.OS, &Section, Layout);
return;
}
@@ -870,13 +876,8 @@ void ELFWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec,
Asm.writeSectionData(VecOS, &Section, Layout);
SmallVector<char, 128> CompressedContents;
- if (Error E = zlib::compress(
- StringRef(UncompressedData.data(), UncompressedData.size()),
- CompressedContents)) {
- consumeError(std::move(E));
- W.OS << UncompressedData;
- return;
- }
+ zlib::compress(StringRef(UncompressedData.data(), UncompressedData.size()),
+ CompressedContents);
bool ZlibStyle = MAI->compressDebugSections() == DebugCompressionType::Z;
if (!maybeWriteCompression(UncompressedData.size(), CompressedContents,
@@ -1336,6 +1337,7 @@ bool ELFObjectWriter::shouldRelocateWithSymbol(const MCAssembler &Asm,
// can update it.
return true;
case ELF::STB_GLOBAL:
+ case ELF::STB_GNU_UNIQUE:
// Global ELF symbols can be preempted by the dynamic linker. The relocation
// has to point to the symbol for a reason analogous to the STB_WEAK case.
return true;
diff --git a/llvm/lib/MC/MCAsmBackend.cpp b/llvm/lib/MC/MCAsmBackend.cpp
index 7989dd57907c..4ed9d8593336 100644
--- a/llvm/lib/MC/MCAsmBackend.cpp
+++ b/llvm/lib/MC/MCAsmBackend.cpp
@@ -8,11 +8,13 @@
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/ADT/None.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLArrayExtras.h"
+#include "llvm/MC/MCDXContainerWriter.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCMachObjectWriter.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSPIRVObjectWriter.h"
#include "llvm/MC/MCWasmObjectWriter.h"
#include "llvm/MC/MCWinCOFFObjectWriter.h"
#include "llvm/MC/MCXCOFFObjectWriter.h"
@@ -39,12 +41,18 @@ MCAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
case Triple::COFF:
return createWinCOFFObjectWriter(
cast<MCWinCOFFObjectTargetWriter>(std::move(TW)), OS);
+ case Triple::SPIRV:
+ return createSPIRVObjectWriter(
+ cast<MCSPIRVObjectTargetWriter>(std::move(TW)), OS);
case Triple::Wasm:
return createWasmObjectWriter(cast<MCWasmObjectTargetWriter>(std::move(TW)),
OS);
case Triple::XCOFF:
return createXCOFFObjectWriter(
cast<MCXCOFFObjectTargetWriter>(std::move(TW)), OS);
+ case Triple::DXContainer:
+ return createDXContainerObjectWriter(
+ cast<MCDXContainerTargetWriter>(std::move(TW)), OS);
default:
llvm_unreachable("unexpected object format");
}
diff --git a/llvm/lib/MC/MCAsmInfo.cpp b/llvm/lib/MC/MCAsmInfo.cpp
index f52503d7b160..b8d0021ed432 100644
--- a/llvm/lib/MC/MCAsmInfo.cpp
+++ b/llvm/lib/MC/MCAsmInfo.cpp
@@ -114,7 +114,10 @@ MCAsmInfo::getExprForFDESymbol(const MCSymbol *Sym,
}
bool MCAsmInfo::isAcceptableChar(char C) const {
- return isAlnum(C) || C == '_' || C == '$' || C == '.' || C == '@';
+ if (C == '@')
+ return doesAllowAtInName();
+
+ return isAlnum(C) || C == '_' || C == '$' || C == '.';
}
bool MCAsmInfo::isValidUnquotedName(StringRef Name) const {
diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp
index 61ec941f50b8..6f8934d66ef4 100644
--- a/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/llvm/lib/MC/MCAsmStreamer.cpp
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
@@ -31,13 +30,13 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbolXCOFF.h"
#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Path.h"
-#include <cctype>
using namespace llvm;
@@ -127,7 +126,7 @@ public:
/// Return a raw_ostream that comments can be written to.
/// Unlike AddComment, you are required to terminate comments with \n if you
/// use this method.
- raw_ostream &GetCommentOS() override {
+ raw_ostream &getCommentOS() override {
if (!IsVerboseAsm)
return nulls(); // Discard comments unless in verbose asm mode.
return CommentStream;
@@ -139,9 +138,7 @@ public:
void emitExplicitComments() override;
/// Emit a blank line to a .s file to pretty it up.
- void AddBlankLine() override {
- EmitEOL();
- }
+ void addBlankLine() override { EmitEOL(); }
/// @name MCStreamer Interface
/// @{
@@ -180,15 +177,15 @@ public:
bool emitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override;
void emitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) override;
- void BeginCOFFSymbolDef(const MCSymbol *Symbol) override;
- void EmitCOFFSymbolStorageClass(int StorageClass) override;
- void EmitCOFFSymbolType(int Type) override;
- void EndCOFFSymbolDef() override;
- void EmitCOFFSafeSEH(MCSymbol const *Symbol) override;
- void EmitCOFFSymbolIndex(MCSymbol const *Symbol) override;
- void EmitCOFFSectionIndex(MCSymbol const *Symbol) override;
- void EmitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) override;
- void EmitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset) override;
+ void beginCOFFSymbolDef(const MCSymbol *Symbol) override;
+ void emitCOFFSymbolStorageClass(int StorageClass) override;
+ void emitCOFFSymbolType(int Type) override;
+ void endCOFFSymbolDef() override;
+ void emitCOFFSafeSEH(MCSymbol const *Symbol) override;
+ void emitCOFFSymbolIndex(MCSymbol const *Symbol) override;
+ void emitCOFFSectionIndex(MCSymbol const *Symbol) override;
+ void emitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) override;
+ void emitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset) override;
void emitXCOFFLocalCommonSymbol(MCSymbol *LabelSym, uint64_t Size,
MCSymbol *CsectSym,
unsigned ByteAlign) override;
@@ -198,6 +195,8 @@ public:
void emitXCOFFRenameDirective(const MCSymbol *Name,
StringRef Rename) override;
+ void emitXCOFFRefDirective(StringRef Name) override;
+
void emitELFSize(MCSymbol *Symbol, const MCExpr *Value) override;
void emitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment) override;
@@ -276,11 +275,11 @@ public:
StringRef FileName) override;
MCSymbol *getDwarfLineTableSymbol(unsigned CUID) override;
- bool EmitCVFileDirective(unsigned FileNo, StringRef Filename,
+ bool emitCVFileDirective(unsigned FileNo, StringRef Filename,
ArrayRef<uint8_t> Checksum,
unsigned ChecksumKind) override;
- bool EmitCVFuncIdDirective(unsigned FuncId) override;
- bool EmitCVInlineSiteIdDirective(unsigned FunctionId, unsigned IAFunc,
+ bool emitCVFuncIdDirective(unsigned FuncId) override;
+ bool emitCVInlineSiteIdDirective(unsigned FunctionId, unsigned IAFunc,
unsigned IAFile, unsigned IALine,
unsigned IACol, SMLoc Loc) override;
void emitCVLocDirective(unsigned FunctionId, unsigned FileNo, unsigned Line,
@@ -316,10 +315,11 @@ public:
void emitCVStringTableDirective() override;
void emitCVFileChecksumsDirective() override;
void emitCVFileChecksumOffsetDirective(unsigned FileNo) override;
- void EmitCVFPOData(const MCSymbol *ProcSym, SMLoc L) override;
+ void emitCVFPOData(const MCSymbol *ProcSym, SMLoc L) override;
void emitIdent(StringRef IdentString) override;
void emitCFIBKeyFrame() override;
+ void emitCFIMTETaggedFrame() override;
void emitCFISections(bool EH, bool Debug) override;
void emitCFIDefCfa(int64_t Register, int64_t Offset) override;
void emitCFIDefCfaOffset(int64_t Offset) override;
@@ -344,25 +344,25 @@ public:
void emitCFINegateRAState() override;
void emitCFIReturnColumn(int64_t Register) override;
- void EmitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc) override;
- void EmitWinCFIEndProc(SMLoc Loc) override;
- void EmitWinCFIFuncletOrFuncEnd(SMLoc Loc) override;
- void EmitWinCFIStartChained(SMLoc Loc) override;
- void EmitWinCFIEndChained(SMLoc Loc) override;
- void EmitWinCFIPushReg(MCRegister Register, SMLoc Loc) override;
- void EmitWinCFISetFrame(MCRegister Register, unsigned Offset,
+ void emitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc) override;
+ void emitWinCFIEndProc(SMLoc Loc) override;
+ void emitWinCFIFuncletOrFuncEnd(SMLoc Loc) override;
+ void emitWinCFIStartChained(SMLoc Loc) override;
+ void emitWinCFIEndChained(SMLoc Loc) override;
+ void emitWinCFIPushReg(MCRegister Register, SMLoc Loc) override;
+ void emitWinCFISetFrame(MCRegister Register, unsigned Offset,
SMLoc Loc) override;
- void EmitWinCFIAllocStack(unsigned Size, SMLoc Loc) override;
- void EmitWinCFISaveReg(MCRegister Register, unsigned Offset,
+ void emitWinCFIAllocStack(unsigned Size, SMLoc Loc) override;
+ void emitWinCFISaveReg(MCRegister Register, unsigned Offset,
SMLoc Loc) override;
- void EmitWinCFISaveXMM(MCRegister Register, unsigned Offset,
+ void emitWinCFISaveXMM(MCRegister Register, unsigned Offset,
SMLoc Loc) override;
- void EmitWinCFIPushFrame(bool Code, SMLoc Loc) override;
- void EmitWinCFIEndProlog(SMLoc Loc) override;
+ void emitWinCFIPushFrame(bool Code, SMLoc Loc) override;
+ void emitWinCFIEndProlog(SMLoc Loc) override;
- void EmitWinEHHandler(const MCSymbol *Sym, bool Unwind, bool Except,
+ void emitWinEHHandler(const MCSymbol *Sym, bool Unwind, bool Except,
SMLoc Loc) override;
- void EmitWinEHHandlerData(SMLoc Loc) override;
+ void emitWinEHHandlerData(SMLoc Loc) override;
void emitCGProfileEntry(const MCSymbolRefExpr *From,
const MCSymbolRefExpr *To, uint64_t Count) override;
@@ -502,7 +502,7 @@ void MCAsmStreamer::changeSection(MCSection *Section,
if (MCTargetStreamer *TS = getTargetStreamer()) {
TS->changeSection(getCurrentSectionOnly(), Section, Subsection, OS);
} else {
- Section->PrintSwitchToSection(*MAI, getContext().getTargetTriple(), OS,
+ Section->printSwitchToSection(*MAI, getContext().getTargetTriple(), OS,
Subsection);
}
}
@@ -761,6 +761,8 @@ bool MCAsmStreamer::emitSymbolAttribute(MCSymbol *Symbol,
case MCSA_WeakDefAutoPrivate: OS << "\t.weak_def_can_be_hidden\t"; break;
case MCSA_Cold:
// Assemblers currently do not support a .cold directive.
+ case MCSA_Exported:
+ // Non-AIX assemblers currently do not support exported visibility.
return false;
}
@@ -787,47 +789,47 @@ void MCAsmStreamer::emitSyntaxDirective() {
// with may have a value of prefix or noprefix.
}
-void MCAsmStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) {
+void MCAsmStreamer::beginCOFFSymbolDef(const MCSymbol *Symbol) {
OS << "\t.def\t";
Symbol->print(OS, MAI);
OS << ';';
EmitEOL();
}
-void MCAsmStreamer::EmitCOFFSymbolStorageClass (int StorageClass) {
+void MCAsmStreamer::emitCOFFSymbolStorageClass(int StorageClass) {
OS << "\t.scl\t" << StorageClass << ';';
EmitEOL();
}
-void MCAsmStreamer::EmitCOFFSymbolType (int Type) {
+void MCAsmStreamer::emitCOFFSymbolType(int Type) {
OS << "\t.type\t" << Type << ';';
EmitEOL();
}
-void MCAsmStreamer::EndCOFFSymbolDef() {
+void MCAsmStreamer::endCOFFSymbolDef() {
OS << "\t.endef";
EmitEOL();
}
-void MCAsmStreamer::EmitCOFFSafeSEH(MCSymbol const *Symbol) {
+void MCAsmStreamer::emitCOFFSafeSEH(MCSymbol const *Symbol) {
OS << "\t.safeseh\t";
Symbol->print(OS, MAI);
EmitEOL();
}
-void MCAsmStreamer::EmitCOFFSymbolIndex(MCSymbol const *Symbol) {
+void MCAsmStreamer::emitCOFFSymbolIndex(MCSymbol const *Symbol) {
OS << "\t.symidx\t";
Symbol->print(OS, MAI);
EmitEOL();
}
-void MCAsmStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) {
+void MCAsmStreamer::emitCOFFSectionIndex(MCSymbol const *Symbol) {
OS << "\t.secidx\t";
Symbol->print(OS, MAI);
EmitEOL();
}
-void MCAsmStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) {
+void MCAsmStreamer::emitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) {
OS << "\t.secrel32\t";
Symbol->print(OS, MAI);
if (Offset != 0)
@@ -835,7 +837,7 @@ void MCAsmStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) {
EmitEOL();
}
-void MCAsmStreamer::EmitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset) {
+void MCAsmStreamer::emitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset) {
OS << "\t.rva\t";
Symbol->print(OS, MAI);
if (Offset > 0)
@@ -903,6 +905,9 @@ void MCAsmStreamer::emitXCOFFSymbolLinkageWithVisibility(
case MCSA_Protected:
OS << ",protected";
break;
+ case MCSA_Exported:
+ OS << ",exported";
+ break;
default:
report_fatal_error("unexpected value for Visibility type");
}
@@ -931,6 +936,11 @@ void MCAsmStreamer::emitXCOFFRenameDirective(const MCSymbol *Name,
EmitEOL();
}
+void MCAsmStreamer::emitXCOFFRefDirective(StringRef Name) {
+ OS << "\t.ref " << Name;
+ EmitEOL();
+}
+
void MCAsmStreamer::emitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
assert(MAI->hasDotTypeDotSizeDirective());
OS << "\t.size\t";
@@ -988,7 +998,7 @@ void MCAsmStreamer::emitZerofill(MCSection *Section, MCSymbol *Symbol,
uint64_t Size, unsigned ByteAlignment,
SMLoc Loc) {
if (Symbol)
- AssignFragment(Symbol, &Section->getDummyFragment());
+ assignFragment(Symbol, &Section->getDummyFragment());
// Note: a .zerofill directive does not switch sections.
OS << ".zerofill ";
@@ -1015,7 +1025,7 @@ void MCAsmStreamer::emitZerofill(MCSection *Section, MCSymbol *Symbol,
// e.g. _a.
void MCAsmStreamer::emitTBSSSymbol(MCSection *Section, MCSymbol *Symbol,
uint64_t Size, unsigned ByteAlignment) {
- AssignFragment(Symbol, &Section->getDummyFragment());
+ assignFragment(Symbol, &Section->getDummyFragment());
assert(Symbol && "Symbol shouldn't be NULL!");
// Instead of using the Section we'll just use the shortcut.
@@ -1643,7 +1653,7 @@ MCSymbol *MCAsmStreamer::getDwarfLineTableSymbol(unsigned CUID) {
return MCStreamer::getDwarfLineTableSymbol(0);
}
-bool MCAsmStreamer::EmitCVFileDirective(unsigned FileNo, StringRef Filename,
+bool MCAsmStreamer::emitCVFileDirective(unsigned FileNo, StringRef Filename,
ArrayRef<uint8_t> Checksum,
unsigned ChecksumKind) {
if (!getContext().getCVContext().addFile(*this, FileNo, Filename, Checksum,
@@ -1666,19 +1676,19 @@ bool MCAsmStreamer::EmitCVFileDirective(unsigned FileNo, StringRef Filename,
return true;
}
-bool MCAsmStreamer::EmitCVFuncIdDirective(unsigned FuncId) {
+bool MCAsmStreamer::emitCVFuncIdDirective(unsigned FuncId) {
OS << "\t.cv_func_id " << FuncId << '\n';
- return MCStreamer::EmitCVFuncIdDirective(FuncId);
+ return MCStreamer::emitCVFuncIdDirective(FuncId);
}
-bool MCAsmStreamer::EmitCVInlineSiteIdDirective(unsigned FunctionId,
+bool MCAsmStreamer::emitCVInlineSiteIdDirective(unsigned FunctionId,
unsigned IAFunc,
unsigned IAFile,
unsigned IALine, unsigned IACol,
SMLoc Loc) {
OS << "\t.cv_inline_site_id " << FunctionId << " within " << IAFunc
<< " inlined_at " << IAFile << ' ' << IALine << ' ' << IACol << '\n';
- return MCStreamer::EmitCVInlineSiteIdDirective(FunctionId, IAFunc, IAFile,
+ return MCStreamer::emitCVInlineSiteIdDirective(FunctionId, IAFunc, IAFile,
IALine, IACol, Loc);
}
@@ -1795,7 +1805,7 @@ void MCAsmStreamer::emitCVFileChecksumOffsetDirective(unsigned FileNo) {
EmitEOL();
}
-void MCAsmStreamer::EmitCVFPOData(const MCSymbol *ProcSym, SMLoc L) {
+void MCAsmStreamer::emitCVFPOData(const MCSymbol *ProcSym, SMLoc L) {
OS << "\t.cv_fpo_data\t";
ProcSym->print(OS, MAI);
EmitEOL();
@@ -2016,59 +2026,69 @@ void MCAsmStreamer::emitCFIBKeyFrame() {
EmitEOL();
}
-void MCAsmStreamer::EmitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc) {
- MCStreamer::EmitWinCFIStartProc(Symbol, Loc);
+void MCAsmStreamer::emitCFIMTETaggedFrame() {
+ MCStreamer::emitCFIMTETaggedFrame();
+ OS << "\t.cfi_mte_tagged_frame";
+ EmitEOL();
+}
+
+void MCAsmStreamer::emitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc) {
+ MCStreamer::emitWinCFIStartProc(Symbol, Loc);
OS << ".seh_proc ";
Symbol->print(OS, MAI);
EmitEOL();
}
-void MCAsmStreamer::EmitWinCFIEndProc(SMLoc Loc) {
- MCStreamer::EmitWinCFIEndProc(Loc);
+void MCAsmStreamer::emitWinCFIEndProc(SMLoc Loc) {
+ MCStreamer::emitWinCFIEndProc(Loc);
OS << "\t.seh_endproc";
EmitEOL();
}
-void MCAsmStreamer::EmitWinCFIFuncletOrFuncEnd(SMLoc Loc) {
- MCStreamer::EmitWinCFIFuncletOrFuncEnd(Loc);
+void MCAsmStreamer::emitWinCFIFuncletOrFuncEnd(SMLoc Loc) {
+ MCStreamer::emitWinCFIFuncletOrFuncEnd(Loc);
OS << "\t.seh_endfunclet";
EmitEOL();
}
-void MCAsmStreamer::EmitWinCFIStartChained(SMLoc Loc) {
- MCStreamer::EmitWinCFIStartChained(Loc);
+void MCAsmStreamer::emitWinCFIStartChained(SMLoc Loc) {
+ MCStreamer::emitWinCFIStartChained(Loc);
OS << "\t.seh_startchained";
EmitEOL();
}
-void MCAsmStreamer::EmitWinCFIEndChained(SMLoc Loc) {
- MCStreamer::EmitWinCFIEndChained(Loc);
+void MCAsmStreamer::emitWinCFIEndChained(SMLoc Loc) {
+ MCStreamer::emitWinCFIEndChained(Loc);
OS << "\t.seh_endchained";
EmitEOL();
}
-void MCAsmStreamer::EmitWinEHHandler(const MCSymbol *Sym, bool Unwind,
+void MCAsmStreamer::emitWinEHHandler(const MCSymbol *Sym, bool Unwind,
bool Except, SMLoc Loc) {
- MCStreamer::EmitWinEHHandler(Sym, Unwind, Except, Loc);
+ MCStreamer::emitWinEHHandler(Sym, Unwind, Except, Loc);
OS << "\t.seh_handler ";
Sym->print(OS, MAI);
+ char Marker = '@';
+ const Triple &T = getContext().getTargetTriple();
+ if (T.getArch() == Triple::arm || T.getArch() == Triple::thumb)
+ Marker = '%';
if (Unwind)
- OS << ", @unwind";
+ OS << ", " << Marker << "unwind";
if (Except)
- OS << ", @except";
+ OS << ", " << Marker << "except";
EmitEOL();
}
-void MCAsmStreamer::EmitWinEHHandlerData(SMLoc Loc) {
- MCStreamer::EmitWinEHHandlerData(Loc);
+void MCAsmStreamer::emitWinEHHandlerData(SMLoc Loc) {
+ MCStreamer::emitWinEHHandlerData(Loc);
- // Switch sections. Don't call SwitchSection directly, because that will
+ // Switch sections. Don't call switchSection directly, because that will
// cause the section switch to be visible in the emitted assembly.
// We only do this so the section switch that terminates the handler
// data block is visible.
@@ -2081,23 +2101,23 @@ void MCAsmStreamer::EmitWinEHHandlerData(SMLoc Loc) {
MCSection *TextSec = &CurFrame->Function->getSection();
MCSection *XData = getAssociatedXDataSection(TextSec);
- SwitchSectionNoChange(XData);
+ switchSectionNoChange(XData);
OS << "\t.seh_handlerdata";
EmitEOL();
}
-void MCAsmStreamer::EmitWinCFIPushReg(MCRegister Register, SMLoc Loc) {
- MCStreamer::EmitWinCFIPushReg(Register, Loc);
+void MCAsmStreamer::emitWinCFIPushReg(MCRegister Register, SMLoc Loc) {
+ MCStreamer::emitWinCFIPushReg(Register, Loc);
OS << "\t.seh_pushreg ";
InstPrinter->printRegName(OS, Register);
EmitEOL();
}
-void MCAsmStreamer::EmitWinCFISetFrame(MCRegister Register, unsigned Offset,
+void MCAsmStreamer::emitWinCFISetFrame(MCRegister Register, unsigned Offset,
SMLoc Loc) {
- MCStreamer::EmitWinCFISetFrame(Register, Offset, Loc);
+ MCStreamer::emitWinCFISetFrame(Register, Offset, Loc);
OS << "\t.seh_setframe ";
InstPrinter->printRegName(OS, Register);
@@ -2105,16 +2125,16 @@ void MCAsmStreamer::EmitWinCFISetFrame(MCRegister Register, unsigned Offset,
EmitEOL();
}
-void MCAsmStreamer::EmitWinCFIAllocStack(unsigned Size, SMLoc Loc) {
- MCStreamer::EmitWinCFIAllocStack(Size, Loc);
+void MCAsmStreamer::emitWinCFIAllocStack(unsigned Size, SMLoc Loc) {
+ MCStreamer::emitWinCFIAllocStack(Size, Loc);
OS << "\t.seh_stackalloc " << Size;
EmitEOL();
}
-void MCAsmStreamer::EmitWinCFISaveReg(MCRegister Register, unsigned Offset,
+void MCAsmStreamer::emitWinCFISaveReg(MCRegister Register, unsigned Offset,
SMLoc Loc) {
- MCStreamer::EmitWinCFISaveReg(Register, Offset, Loc);
+ MCStreamer::emitWinCFISaveReg(Register, Offset, Loc);
OS << "\t.seh_savereg ";
InstPrinter->printRegName(OS, Register);
@@ -2122,9 +2142,9 @@ void MCAsmStreamer::EmitWinCFISaveReg(MCRegister Register, unsigned Offset,
EmitEOL();
}
-void MCAsmStreamer::EmitWinCFISaveXMM(MCRegister Register, unsigned Offset,
+void MCAsmStreamer::emitWinCFISaveXMM(MCRegister Register, unsigned Offset,
SMLoc Loc) {
- MCStreamer::EmitWinCFISaveXMM(Register, Offset, Loc);
+ MCStreamer::emitWinCFISaveXMM(Register, Offset, Loc);
OS << "\t.seh_savexmm ";
InstPrinter->printRegName(OS, Register);
@@ -2132,8 +2152,8 @@ void MCAsmStreamer::EmitWinCFISaveXMM(MCRegister Register, unsigned Offset,
EmitEOL();
}
-void MCAsmStreamer::EmitWinCFIPushFrame(bool Code, SMLoc Loc) {
- MCStreamer::EmitWinCFIPushFrame(Code, Loc);
+void MCAsmStreamer::emitWinCFIPushFrame(bool Code, SMLoc Loc) {
+ MCStreamer::emitWinCFIPushFrame(Code, Loc);
OS << "\t.seh_pushframe";
if (Code)
@@ -2141,8 +2161,8 @@ void MCAsmStreamer::EmitWinCFIPushFrame(bool Code, SMLoc Loc) {
EmitEOL();
}
-void MCAsmStreamer::EmitWinCFIEndProlog(SMLoc Loc) {
- MCStreamer::EmitWinCFIEndProlog(Loc);
+void MCAsmStreamer::emitWinCFIEndProlog(SMLoc Loc) {
+ MCStreamer::emitWinCFIEndProlog(Loc);
OS << "\t.seh_endprologue";
EmitEOL();
@@ -2161,7 +2181,7 @@ void MCAsmStreamer::emitCGProfileEntry(const MCSymbolRefExpr *From,
void MCAsmStreamer::AddEncodingComment(const MCInst &Inst,
const MCSubtargetInfo &STI) {
- raw_ostream &OS = GetCommentOS();
+ raw_ostream &OS = getCommentOS();
SmallString<256> Code;
SmallVector<MCFixup, 4> Fixups;
raw_svector_ostream VecOS(Code);
@@ -2245,8 +2265,10 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst,
MCFixup &F = Fixups[i];
const MCFixupKindInfo &Info =
getAssembler().getBackend().getFixupKindInfo(F.getKind());
- OS << " fixup " << char('A' + i) << " - " << "offset: " << F.getOffset()
- << ", value: " << *F.getValue() << ", kind: " << Info.Name << "\n";
+ OS << " fixup " << char('A' + i) << " - "
+ << "offset: " << F.getOffset() << ", value: ";
+ F.getValue()->print(OS, MAI);
+ OS << ", kind: " << Info.Name << "\n";
}
}
@@ -2265,8 +2287,8 @@ void MCAsmStreamer::emitInstruction(const MCInst &Inst,
// Show the MCInst if enabled.
if (ShowInst) {
- Inst.dump_pretty(GetCommentOS(), InstPrinter.get(), "\n ");
- GetCommentOS() << "\n";
+ Inst.dump_pretty(getCommentOS(), InstPrinter.get(), "\n ");
+ getCommentOS() << "\n";
}
if(getTargetStreamer())
@@ -2276,7 +2298,7 @@ void MCAsmStreamer::emitInstruction(const MCInst &Inst,
StringRef Comments = CommentToEmit;
if (Comments.size() && Comments.back() != '\n')
- GetCommentOS() << "\n";
+ getCommentOS() << "\n";
EmitEOL();
}
@@ -2365,7 +2387,7 @@ void MCAsmStreamer::finishImpl() {
if (!Tables.empty()) {
assert(Tables.size() == 1 && "asm output only supports one line table");
if (auto *Label = Tables.begin()->second.getLabel()) {
- SwitchSection(getContext().getObjectFileInfo()->getDwarfLineSection());
+ switchSection(getContext().getObjectFileInfo()->getDwarfLineSection());
emitLabel(Label);
}
}
@@ -2492,7 +2514,7 @@ void MCAsmStreamer::doFinalizationAtSectionEnd(MCSection *Section) {
if (MAI->usesDwarfFileAndLocDirectives())
return;
- SwitchSectionNoChange(Section);
+ switchSectionNoChange(Section);
MCSymbol *Sym = getCurrentSectionOnly()->getEndSymbol(getContext());
diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp
index a8837bbf57c7..a33d7ea9ebfe 100644
--- a/llvm/lib/MC/MCAssembler.cpp
+++ b/llvm/lib/MC/MCAssembler.cpp
@@ -27,7 +27,6 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSection.h"
-#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/Alignment.h"
@@ -36,16 +35,18 @@
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LEB128.h"
-#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstdint>
-#include <cstring>
#include <tuple>
#include <utility>
using namespace llvm;
+namespace llvm {
+class MCSubtargetInfo;
+}
+
#define DEBUG_TYPE "assembler"
namespace {
@@ -330,11 +331,11 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout,
case MCFragment::FT_Align: {
const MCAlignFragment &AF = cast<MCAlignFragment>(F);
unsigned Offset = Layout.getFragmentOffset(&AF);
- unsigned Size = offsetToAlignment(Offset, Align(AF.getAlignment()));
+ unsigned Size = offsetToAlignment(Offset, AF.getAlignment());
// Insert extra Nops for code alignment if the target define
// shouldInsertExtraNopBytesForCodeAlign target hook.
- if (AF.getParent()->UseCodeAlign() && AF.hasEmitNops() &&
+ if (AF.getParent()->useCodeAlign() && AF.hasEmitNops() &&
getBackend().shouldInsertExtraNopBytesForCodeAlign(AF, Size))
return Size;
@@ -342,7 +343,7 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout,
// minimum nop size.
if (Size > 0 && AF.hasEmitNops()) {
while (Size % getBackend().getMinimumNopSize())
- Size += AF.getAlignment();
+ Size += AF.getAlignment().value();
}
if (Size > AF.getMaxBytesToEmit())
return 0;
@@ -873,7 +874,7 @@ void MCAssembler::layout(MCAsmLayout &Layout) {
MCAlignFragment &AF = cast<MCAlignFragment>(Frag);
// Insert fixup type for code alignment if the target define
// shouldInsertFixupForCodeAlign target hook.
- if (Sec.UseCodeAlign() && AF.hasEmitNops())
+ if (Sec.useCodeAlign() && AF.hasEmitNops())
getBackend().shouldInsertFixupForCodeAlign(*this, Layout, AF);
continue;
}
diff --git a/llvm/lib/MC/MCCodeView.cpp b/llvm/lib/MC/MCCodeView.cpp
index 3da1a9c3e331..375d54696cb2 100644
--- a/llvm/lib/MC/MCCodeView.cpp
+++ b/llvm/lib/MC/MCCodeView.cpp
@@ -17,6 +17,7 @@
#include "llvm/DebugInfo/CodeView/Line.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCValue.h"
@@ -25,7 +26,7 @@
using namespace llvm;
using namespace llvm::codeview;
-CodeViewContext::CodeViewContext() {}
+CodeViewContext::CodeViewContext() = default;
CodeViewContext::~CodeViewContext() {
// If someone inserted strings into the string table but never actually
@@ -334,8 +335,8 @@ void CodeViewContext::emitLineTableForFunction(MCObjectStreamer &OS,
OS.emitInt32(uint32_t(DebugSubsectionKind::Lines));
OS.emitAbsoluteSymbolDiff(LineEnd, LineBegin, 4);
OS.emitLabel(LineBegin);
- OS.EmitCOFFSecRel32(FuncBegin, /*Offset=*/0);
- OS.EmitCOFFSectionIndex(FuncBegin);
+ OS.emitCOFFSecRel32(FuncBegin, /*Offset=*/0);
+ OS.emitCOFFSectionIndex(FuncBegin);
// Actual line info.
std::vector<MCCVLoc> Locs = getFunctionLineEntries(FuncId);
diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp
index eafcee1e0607..4be84ca7feb5 100644
--- a/llvm/lib/MC/MCContext.cpp
+++ b/llvm/lib/MC/MCContext.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCContext.h"
+#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
@@ -15,21 +16,25 @@
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/BinaryFormat/Wasm.h"
#include "llvm/BinaryFormat/XCOFF.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCCodeView.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFragment.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCLabel.h"
-#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCSectionDXContainer.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionGOFF.h"
#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSectionSPIRV.h"
#include "llvm/MC/MCSectionWasm.h"
#include "llvm/MC/MCSectionXCOFF.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolCOFF.h"
#include "llvm/MC/MCSymbolELF.h"
@@ -37,13 +42,14 @@
#include "llvm/MC/MCSymbolMachO.h"
#include "llvm/MC/MCSymbolWasm.h"
#include "llvm/MC/MCSymbolXCOFF.h"
+#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/SectionKind.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
-#include "llvm/Support/Signals.h"
+#include "llvm/Support/SMLoc.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
@@ -103,6 +109,12 @@ MCContext::MCContext(const Triple &TheTriple, const MCAsmInfo *mai,
case Triple::GOFF:
Env = IsGOFF;
break;
+ case Triple::DXContainer:
+ Env = IsDXContainer;
+ break;
+ case Triple::SPIRV:
+ Env = IsSPIRV;
+ break;
case Triple::UnknownObjectFormat:
report_fatal_error("Cannot initialize MC for unknown object file format.");
break;
@@ -134,11 +146,14 @@ void MCContext::reset() {
// Call the destructors so the fragments are freed
COFFAllocator.DestroyAll();
+ DXCAllocator.DestroyAll();
ELFAllocator.DestroyAll();
GOFFAllocator.DestroyAll();
MachOAllocator.DestroyAll();
+ WasmAllocator.DestroyAll();
XCOFFAllocator.DestroyAll();
MCInstAllocator.DestroyAll();
+ SPIRVAllocator.DestroyAll();
MCSubtargetAllocator.DestroyAll();
InlineAsmUsedLabelNames.clear();
@@ -163,6 +178,7 @@ void MCContext::reset() {
COFFUniquingMap.clear();
WasmUniquingMap.clear();
XCOFFUniquingMap.clear();
+ DXCUniquingMap.clear();
ELFEntrySizeMap.clear();
ELFSeenGenericMergeableSections.clear();
@@ -243,6 +259,11 @@ MCSymbol *MCContext::createSymbolImpl(const StringMapEntry<bool> *Name,
return new (Name, *this) MCSymbolWasm(Name, IsTemporary);
case MCContext::IsXCOFF:
return createXCOFFSymbolImpl(Name, IsTemporary);
+ case MCContext::IsDXContainer:
+ break;
+ case MCContext::IsSPIRV:
+ return new (Name, *this)
+ MCSymbol(MCSymbol::SymbolKindUnset, Name, IsTemporary);
}
return new (Name, *this) MCSymbol(MCSymbol::SymbolKindUnset, Name,
IsTemporary);
@@ -616,11 +637,14 @@ Optional<unsigned> MCContext::getELFUniqueIDForEntsize(StringRef SectionName,
return (I != ELFEntrySizeMap.end()) ? Optional<unsigned>(I->second) : None;
}
-MCSectionGOFF *MCContext::getGOFFSection(StringRef Section, SectionKind Kind) {
+MCSectionGOFF *MCContext::getGOFFSection(StringRef Section, SectionKind Kind,
+ MCSection *Parent,
+ const MCExpr *SubsectionId) {
// Do the lookup. If we don't have a hit, return a new section.
auto &GOFFSection = GOFFUniquingMap[Section.str()];
if (!GOFFSection)
- GOFFSection = new (GOFFAllocator.Allocate()) MCSectionGOFF(Section, Kind);
+ GOFFSection = new (GOFFAllocator.Allocate())
+ MCSectionGOFF(Section, Kind, Parent, SubsectionId);
return GOFFSection;
}
@@ -732,13 +756,19 @@ MCSectionWasm *MCContext::getWasmSection(const Twine &Section, SectionKind Kind,
return Result;
}
+bool MCContext::hasXCOFFSection(StringRef Section,
+ XCOFF::CsectProperties CsectProp) const {
+ return XCOFFUniquingMap.count(
+ XCOFFSectionKey(Section.str(), CsectProp.MappingClass)) != 0;
+}
+
MCSectionXCOFF *MCContext::getXCOFFSection(
StringRef Section, SectionKind Kind,
Optional<XCOFF::CsectProperties> CsectProp, bool MultiSymbolsAllowed,
const char *BeginSymName,
Optional<XCOFF::DwarfSectionSubtypeFlags> DwarfSectionSubtypeFlags) {
- bool IsDwarfSec = DwarfSectionSubtypeFlags.hasValue();
- assert((IsDwarfSec != CsectProp.hasValue()) && "Invalid XCOFF section!");
+ bool IsDwarfSec = DwarfSectionSubtypeFlags.has_value();
+ assert((IsDwarfSec != CsectProp.has_value()) && "Invalid XCOFF section!");
// Do the lookup. If we have a hit, return it.
auto IterBool = XCOFFUniquingMap.insert(std::make_pair(
@@ -796,6 +826,44 @@ MCSectionXCOFF *MCContext::getXCOFFSection(
return Result;
}
+MCSectionSPIRV *MCContext::getSPIRVSection() {
+ MCSymbol *Begin = nullptr;
+ MCSectionSPIRV *Result = new (SPIRVAllocator.Allocate())
+ MCSectionSPIRV(SectionKind::getText(), Begin);
+
+ auto *F = new MCDataFragment();
+ Result->getFragmentList().insert(Result->begin(), F);
+ F->setParent(Result);
+
+ if (Begin)
+ Begin->setFragment(F);
+
+ return Result;
+}
+
+MCSectionDXContainer *MCContext::getDXContainerSection(StringRef Section,
+ SectionKind K) {
+ // Do the lookup, if we have a hit, return it.
+ auto ItInsertedPair = DXCUniquingMap.try_emplace(Section);
+ if (!ItInsertedPair.second)
+ return ItInsertedPair.first->second;
+
+ auto MapIt = ItInsertedPair.first;
+ // Grab the name from the StringMap. Since the Section is going to keep a
+ // copy of this StringRef we need to make sure the underlying string stays
+ // alive as long as we need it.
+ StringRef Name = MapIt->first();
+ MapIt->second =
+ new (DXCAllocator.Allocate()) MCSectionDXContainer(Name, K, nullptr);
+
+ // The first fragment will store the header
+ auto *F = new MCDataFragment();
+ MapIt->second->getFragmentList().insert(MapIt->second->begin(), F);
+ F->setParent(MapIt->second);
+
+ return MapIt->second;
+}
+
MCSubtargetInfo &MCContext::getSubtargetCopy(const MCSubtargetInfo &STI) {
return *new (MCSubtargetAllocator.Allocate()) MCSubtargetInfo(STI);
}
@@ -835,6 +903,12 @@ void MCContext::RemapDebugPaths() {
// Dwarf Management
//===----------------------------------------------------------------------===//
+EmitDwarfUnwindType MCContext::emitDwarfUnwindInfo() const {
+ if (!TargetOptions)
+ return EmitDwarfUnwindType::Default;
+ return TargetOptions->EmitDwarfUnwind;
+}
+
void MCContext::setGenDwarfRootFile(StringRef InputFileName, StringRef Buffer) {
// MCDwarf needs the root file as well as the compilation directory.
// If we find a '.file 0' directive that will supersede these values.
@@ -906,9 +980,9 @@ void MCContext::finalizeDwarfSections(MCStreamer &MCOS) {
}
CodeViewContext &MCContext::getCVContext() {
- if (!CVContext.get())
+ if (!CVContext)
CVContext.reset(new CodeViewContext);
- return *CVContext.get();
+ return *CVContext;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/MC/MCDXContainerStreamer.cpp b/llvm/lib/MC/MCDXContainerStreamer.cpp
new file mode 100644
index 000000000000..3cb452f3dfa5
--- /dev/null
+++ b/llvm/lib/MC/MCDXContainerStreamer.cpp
@@ -0,0 +1,31 @@
+//===- lib/MC/MCDXContainerStreamer.cpp - DXContainer Impl ----*- C++ -*---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the object streamer for DXContainer files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCDXContainerStreamer.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/TargetRegistry.h"
+
+using namespace llvm;
+
+void MCDXContainerStreamer::emitInstToData(const MCInst &,
+ const MCSubtargetInfo &) {}
+
+MCStreamer *llvm::createDXContainerStreamer(
+ MCContext &Context, std::unique_ptr<MCAsmBackend> &&MAB,
+ std::unique_ptr<MCObjectWriter> &&OW, std::unique_ptr<MCCodeEmitter> &&CE,
+ bool RelaxAll) {
+ auto *S = new MCDXContainerStreamer(Context, std::move(MAB), std::move(OW),
+ std::move(CE));
+ if (RelaxAll)
+ S->getAssembler().setRelaxAll(true);
+ return S;
+}
diff --git a/llvm/lib/MC/MCDXContainerWriter.cpp b/llvm/lib/MC/MCDXContainerWriter.cpp
new file mode 100644
index 000000000000..f5dad702d6f6
--- /dev/null
+++ b/llvm/lib/MC/MCDXContainerWriter.cpp
@@ -0,0 +1,143 @@
+//===- llvm/MC/MCDXContainerWriter.cpp - DXContainer Writer -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCDXContainerWriter.h"
+#include "llvm/BinaryFormat/DXContainer.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Alignment.h"
+#include "llvm/Support/EndianStream.h"
+
+using namespace llvm;
+
+MCDXContainerTargetWriter::~MCDXContainerTargetWriter() {}
+
+namespace {
+class DXContainerObjectWriter : public MCObjectWriter {
+ ::support::endian::Writer W;
+
+ /// The target specific DXContainer writer instance.
+ std::unique_ptr<MCDXContainerTargetWriter> TargetObjectWriter;
+
+public:
+ DXContainerObjectWriter(std::unique_ptr<MCDXContainerTargetWriter> MOTW,
+ raw_pwrite_stream &OS)
+ : W(OS, support::little), TargetObjectWriter(std::move(MOTW)) {}
+
+ ~DXContainerObjectWriter() override {}
+
+private:
+ void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFragment *Fragment, const MCFixup &Fixup,
+ MCValue Target, uint64_t &FixedValue) override {}
+
+ void executePostLayoutBinding(MCAssembler &Asm,
+ const MCAsmLayout &Layout) override {}
+
+ uint64_t writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
+};
+} // namespace
+
+uint64_t DXContainerObjectWriter::writeObject(MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ // Start the file size as the header plus the size of the part offsets.
+ // Presently DXContainer files usually contain 7-10 parts. Reserving space for
+ // 16 part offsets gives us a little room for growth.
+ llvm::SmallVector<uint64_t, 16> PartOffsets;
+ uint64_t PartOffset = 0;
+ for (const MCSection &Sec : Asm) {
+ uint64_t SectionSize = Layout.getSectionAddressSize(&Sec);
+ // Skip empty sections.
+ if (SectionSize == 0)
+ continue;
+
+ assert(SectionSize < std::numeric_limits<uint32_t>::max() &&
+ "Section size too large for DXContainer");
+
+ PartOffsets.push_back(PartOffset);
+ PartOffset += sizeof(dxbc::PartHeader) + SectionSize;
+ PartOffset = alignTo(PartOffset, Align(4ul));
+ }
+ assert(PartOffset < std::numeric_limits<uint32_t>::max() &&
+ "Part data too large for DXContainer");
+
+ uint64_t PartStart =
+ sizeof(dxbc::Header) + (PartOffsets.size() * sizeof(uint32_t));
+ uint64_t FileSize = PartStart + PartOffset;
+ assert(FileSize < std::numeric_limits<uint32_t>::max() &&
+ "File size too large for DXContainer");
+
+ // Write the header.
+ W.write<char>({'D', 'X', 'B', 'C'});
+ // Write 16-bytes of 0's for the hash.
+ W.OS.write_zeros(16);
+ // Write 1.0 for file format version.
+ W.write<uint16_t>(1u);
+ W.write<uint16_t>(0u);
+ // Write the file size.
+ W.write<uint32_t>(static_cast<uint32_t>(FileSize));
+ // Write the number of parts.
+ W.write<uint32_t>(static_cast<uint32_t>(PartOffsets.size()));
+ // Write the offsets for the part headers for each part.
+ for (uint64_t Offset : PartOffsets)
+ W.write<uint32_t>(static_cast<uint32_t>(PartStart + Offset));
+
+ for (const MCSection &Sec : Asm) {
+ uint64_t SectionSize = Layout.getSectionAddressSize(&Sec);
+ // Skip empty sections.
+ if (SectionSize == 0)
+ continue;
+
+ unsigned Start = W.OS.tell();
+ // Write section header.
+ W.write<char>(ArrayRef<char>(Sec.getName().data(), 4));
+
+ uint64_t PartSize = SectionSize + sizeof(dxbc::PartHeader);
+
+ if (Sec.getName() == "DXIL")
+ PartSize += sizeof(dxbc::ProgramHeader);
+ // DXContainer parts should be 4-byte aligned.
+ PartSize = alignTo(PartSize, Align(4));
+ W.write<uint32_t>(static_cast<uint32_t>(PartSize));
+ if (Sec.getName() == "DXIL") {
+ dxbc::ProgramHeader Header;
+ memset(reinterpret_cast<void *>(&Header), 0, sizeof(dxbc::ProgramHeader));
+
+ const Triple &TT = Asm.getContext().getTargetTriple();
+ VersionTuple Version = TT.getOSVersion();
+ Header.MajorVersion = static_cast<uint8_t>(Version.getMajor());
+ if (Version.getMinor())
+ Header.MinorVersion = static_cast<uint8_t>(*Version.getMinor());
+ if (TT.hasEnvironment())
+ Header.ShaderKind =
+ static_cast<uint16_t>(TT.getEnvironment() - Triple::Pixel);
+
+ // The program header's size field is in 32-bit words.
+ Header.Size = (SectionSize + sizeof(dxbc::ProgramHeader) + 3) / 4;
+ memcpy(Header.Bitcode.Magic, "DXIL", 4);
+ Header.Bitcode.Offset = sizeof(dxbc::BitcodeHeader);
+ Header.Bitcode.Size = SectionSize;
+ if (sys::IsBigEndianHost)
+ Header.swapBytes();
+ W.write<char>(ArrayRef<char>(reinterpret_cast<char *>(&Header),
+ sizeof(dxbc::ProgramHeader)));
+ }
+ Asm.writeSectionData(W.OS, &Sec, Layout);
+ unsigned Size = W.OS.tell() - Start;
+ W.OS.write_zeros(offsetToAlignment(Size, Align(4)));
+ }
+ return 0;
+}
+
+std::unique_ptr<MCObjectWriter> llvm::createDXContainerObjectWriter(
+ std::unique_ptr<MCDXContainerTargetWriter> MOTW, raw_pwrite_stream &OS) {
+ return std::make_unique<DXContainerObjectWriter>(std::move(MOTW), OS);
+}
diff --git a/llvm/lib/MC/MCDisassembler/Disassembler.cpp b/llvm/lib/MC/MCDisassembler/Disassembler.cpp
index aaa3b747682c..f0c61840e413 100644
--- a/llvm/lib/MC/MCDisassembler/Disassembler.cpp
+++ b/llvm/lib/MC/MCDisassembler/Disassembler.cpp
@@ -30,7 +30,6 @@
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
-#include <cstddef>
#include <cstring>
using namespace llvm;
diff --git a/llvm/lib/MC/MCDisassembler/Disassembler.h b/llvm/lib/MC/MCDisassembler/Disassembler.h
index e5aab53a7613..3cb2479d388f 100644
--- a/llvm/lib/MC/MCDisassembler/Disassembler.h
+++ b/llvm/lib/MC/MCDisassembler/Disassembler.h
@@ -16,7 +16,7 @@
#ifndef LLVM_LIB_MC_MCDISASSEMBLER_DISASSEMBLER_H
#define LLVM_LIB_MC_MCDISASSEMBLER_DISASSEMBLER_H
-#include "llvm-c/Disassembler.h"
+#include "llvm-c/DisassemblerTypes.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
diff --git a/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp b/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp
index a58e8f6d9bcc..0c041186936d 100644
--- a/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp
+++ b/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp
@@ -8,9 +8,6 @@
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
using namespace llvm;
@@ -25,11 +22,12 @@ MCDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
bool MCDisassembler::tryAddingSymbolicOperand(MCInst &Inst, int64_t Value,
uint64_t Address, bool IsBranch,
- uint64_t Offset,
+ uint64_t Offset, uint64_t OpSize,
uint64_t InstSize) const {
if (Symbolizer)
- return Symbolizer->tryAddingSymbolicOperand(
- Inst, *CommentStream, Value, Address, IsBranch, Offset, InstSize);
+ return Symbolizer->tryAddingSymbolicOperand(Inst, *CommentStream, Value,
+ Address, IsBranch, Offset,
+ OpSize, InstSize);
return false;
}
@@ -85,10 +83,11 @@ bool XCOFFSymbolInfo::operator<(const XCOFFSymbolInfo &SymInfo) const {
return SymInfo.IsLabel;
// Symbols with a StorageMappingClass have higher priority than those without.
- if (StorageMappingClass.hasValue() != SymInfo.StorageMappingClass.hasValue())
- return SymInfo.StorageMappingClass.hasValue();
+ if (StorageMappingClass.has_value() !=
+ SymInfo.StorageMappingClass.has_value())
+ return SymInfo.StorageMappingClass.has_value();
- if (StorageMappingClass.hasValue()) {
+ if (StorageMappingClass) {
return getSMCPriority(StorageMappingClass.getValue()) <
getSMCPriority(SymInfo.StorageMappingClass.getValue());
}
diff --git a/llvm/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp b/llvm/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp
index 7befef86303c..e3f4cdd21557 100644
--- a/llvm/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp
+++ b/llvm/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp
@@ -31,19 +31,15 @@ class Triple;
// is found an MCExpr is created with that, else an MCExpr with Value is
// created. This function returns true if it adds an operand to the MCInst and
// false otherwise.
-bool MCExternalSymbolizer::tryAddingSymbolicOperand(MCInst &MI,
- raw_ostream &cStream,
- int64_t Value,
- uint64_t Address,
- bool IsBranch,
- uint64_t Offset,
- uint64_t InstSize) {
+bool MCExternalSymbolizer::tryAddingSymbolicOperand(
+ MCInst &MI, raw_ostream &cStream, int64_t Value, uint64_t Address,
+ bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) {
struct LLVMOpInfo1 SymbolicOp;
std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
SymbolicOp.Value = Value;
if (!GetOpInfo ||
- !GetOpInfo(DisInfo, Address, Offset, InstSize, 1, &SymbolicOp)) {
+ !GetOpInfo(DisInfo, Address, Offset, OpSize, InstSize, 1, &SymbolicOp)) {
// Clear SymbolicOp.Value from above and also all other fields.
std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
@@ -53,10 +49,10 @@ bool MCExternalSymbolizer::tryAddingSymbolicOperand(MCInst &MI,
// that always makes sense to guess. But in the case of an immediate it is
// a bit more questionable if it is an address of a symbol or some other
// reference. So if the immediate Value comes from a width of 1 byte,
- // InstSize, we will not guess it is an address of a symbol. Because in
+ // OpSize, we will not guess it is an address of a symbol. Because in
// object files assembled starting at address 0 this usually leads to
// incorrect symbolication.
- if (!SymbolLookUp || (InstSize == 1 && !IsBranch))
+ if (!SymbolLookUp || (OpSize == 1 && !IsBranch))
return false;
uint64_t ReferenceType;
diff --git a/llvm/lib/MC/MCDisassembler/MCRelocationInfo.cpp b/llvm/lib/MC/MCDisassembler/MCRelocationInfo.cpp
index 735be23206e4..137c44680080 100644
--- a/llvm/lib/MC/MCDisassembler/MCRelocationInfo.cpp
+++ b/llvm/lib/MC/MCDisassembler/MCRelocationInfo.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
-#include "llvm-c/Disassembler.h"
+#include "llvm-c/DisassemblerTypes.h"
#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/MC/MCDwarf.cpp b/llvm/lib/MC/MCDwarf.cpp
index 2cb5a000f88a..4cbb9981fde2 100644
--- a/llvm/lib/MC/MCDwarf.cpp
+++ b/llvm/lib/MC/MCDwarf.cpp
@@ -269,7 +269,7 @@ void MCDwarfLineTable::emit(MCStreamer *MCOS, MCDwarfLineTableParams Params) {
LineStr = MCDwarfLineStr(context);
// Switch to the section where the table will be emitted into.
- MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfLineSection());
+ MCOS->switchSection(context.getObjectFileInfo()->getDwarfLineSection());
// Handle the rest of the Compile Units.
for (const auto &CUIDTablePair : LineTables) {
@@ -285,7 +285,7 @@ void MCDwarfDwoLineTable::Emit(MCStreamer &MCOS, MCDwarfLineTableParams Params,
if (!HasSplitLineTable)
return;
Optional<MCDwarfLineStr> NoLineStr(None);
- MCOS.SwitchSection(Section);
+ MCOS.switchSection(Section);
MCOS.emitLabel(Header.Emit(&MCOS, Params, None, NoLineStr).second);
}
@@ -332,14 +332,20 @@ static void emitAbsValue(MCStreamer &OS, const MCExpr *Value, unsigned Size) {
void MCDwarfLineStr::emitSection(MCStreamer *MCOS) {
// Switch to the .debug_line_str section.
- MCOS->SwitchSection(
+ MCOS->switchSection(
MCOS->getContext().getObjectFileInfo()->getDwarfLineStrSection());
+ SmallString<0> Data = getFinalizedData();
+ MCOS->emitBinaryData(Data.str());
+}
+
+SmallString<0> MCDwarfLineStr::getFinalizedData() {
// Emit the strings without perturbing the offsets we used.
- LineStrings.finalizeInOrder();
+ if (!LineStrings.isFinalized())
+ LineStrings.finalizeInOrder();
SmallString<0> Data;
Data.resize(LineStrings.getSize());
LineStrings.write((uint8_t *)Data.data());
- MCOS->emitBinaryData(Data.str());
+ return Data;
}
void MCDwarfLineStr::emitRef(MCStreamer *MCOS, StringRef Path) {
@@ -387,16 +393,14 @@ static void emitOneV5FileEntry(MCStreamer *MCOS, const MCDwarfFile &DwarfFile,
if (EmitMD5) {
const MD5::MD5Result &Cksum = *DwarfFile.Checksum;
MCOS->emitBinaryData(
- StringRef(reinterpret_cast<const char *>(Cksum.Bytes.data()),
- Cksum.Bytes.size()));
+ StringRef(reinterpret_cast<const char *>(Cksum.data()), Cksum.size()));
}
if (HasSource) {
if (LineStr)
- LineStr->emitRef(MCOS, DwarfFile.Source.getValueOr(StringRef()));
+ LineStr->emitRef(MCOS, DwarfFile.Source.value_or(StringRef()));
else {
- MCOS->emitBytes(
- DwarfFile.Source.getValueOr(StringRef())); // Source and...
- MCOS->emitBytes(StringRef("\0", 1)); // its null terminator.
+ MCOS->emitBytes(DwarfFile.Source.value_or(StringRef())); // Source and...
+ MCOS->emitBytes(StringRef("\0", 1)); // its null terminator.
}
}
}
@@ -583,7 +587,7 @@ MCDwarfLineTableHeader::tryGetFile(StringRef &Directory,
// Keep track of whether any or all files have an MD5 checksum.
// If any files have embedded source, they all must.
if (MCDwarfFiles.empty()) {
- trackMD5Usage(Checksum.hasValue());
+ trackMD5Usage(Checksum.has_value());
HasSource = (Source != None);
}
if (DwarfVersion >= 5 && isRootFile(RootFile, Directory, FileName, Checksum))
@@ -646,7 +650,7 @@ MCDwarfLineTableHeader::tryGetFile(StringRef &Directory,
File.Name = std::string(FileName);
File.DirIndex = DirIndex;
File.Checksum = Checksum;
- trackMD5Usage(Checksum.hasValue());
+ trackMD5Usage(Checksum.has_value());
File.Source = Source;
if (Source)
HasSource = true;
@@ -764,7 +768,7 @@ static void EmitAbbrev(MCStreamer *MCOS, uint64_t Name, uint64_t Form) {
// the data for .debug_abbrev section which contains three DIEs.
static void EmitGenDwarfAbbrev(MCStreamer *MCOS) {
MCContext &context = MCOS->getContext();
- MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfAbbrevSection());
+ MCOS->switchSection(context.getObjectFileInfo()->getDwarfAbbrevSection());
// DW_TAG_compile_unit DIE abbrev (1).
MCOS->emitULEB128IntValue(1);
@@ -817,7 +821,7 @@ static void EmitGenDwarfAranges(MCStreamer *MCOS,
auto &Sections = context.getGenDwarfSectionSyms();
- MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfARangesSection());
+ MCOS->switchSection(context.getObjectFileInfo()->getDwarfARangesSection());
unsigned UnitLengthBytes =
dwarf::getUnitLengthFieldByteSize(context.getDwarfFormat());
@@ -896,7 +900,7 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS,
const MCSymbol *RangesSymbol) {
MCContext &context = MCOS->getContext();
- MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfInfoSection());
+ MCOS->switchSection(context.getObjectFileInfo()->getDwarfInfoSection());
// Create a symbol at the start and end of this section used in here for the
// expression to calculate the length in the header.
@@ -1073,7 +1077,7 @@ static MCSymbol *emitGenDwarfRanges(MCStreamer *MCOS) {
MCSymbol *RangesSymbol;
if (MCOS->getContext().getDwarfVersion() >= 5) {
- MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfRnglistsSection());
+ MCOS->switchSection(context.getObjectFileInfo()->getDwarfRnglistsSection());
MCSymbol *EndSymbol = mcdwarf::emitListsTableHeaderStart(*MCOS);
MCOS->AddComment("Offset entry count");
MCOS->emitInt32(0);
@@ -1093,7 +1097,7 @@ static MCSymbol *emitGenDwarfRanges(MCStreamer *MCOS) {
MCOS->emitInt8(dwarf::DW_RLE_end_of_list);
MCOS->emitLabel(EndSymbol);
} else {
- MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfRangesSection());
+ MCOS->switchSection(context.getObjectFileInfo()->getDwarfRangesSection());
RangesSymbol = context.createTempSymbol("debug_ranges_start");
MCOS->emitLabel(RangesSymbol);
for (MCSection *Sec : Sections) {
@@ -1154,18 +1158,18 @@ void MCGenDwarfInfo::Emit(MCStreamer *MCOS) {
MCOS->getContext().getDwarfVersion() >= 3;
CreateDwarfSectionSymbols |= UseRangesSection;
- MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfInfoSection());
+ MCOS->switchSection(context.getObjectFileInfo()->getDwarfInfoSection());
if (CreateDwarfSectionSymbols) {
InfoSectionSymbol = context.createTempSymbol();
MCOS->emitLabel(InfoSectionSymbol);
}
- MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfAbbrevSection());
+ MCOS->switchSection(context.getObjectFileInfo()->getDwarfAbbrevSection());
if (CreateDwarfSectionSymbols) {
AbbrevSectionSymbol = context.createTempSymbol();
MCOS->emitLabel(AbbrevSectionSymbol);
}
- MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfARangesSection());
+ MCOS->switchSection(context.getObjectFileInfo()->getDwarfARangesSection());
// Output the data for .debug_aranges section.
EmitGenDwarfAranges(MCOS, InfoSectionSymbol);
@@ -1599,6 +1603,8 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(const MCDwarfFrameInfo &Frame) {
Augmentation += "S";
if (Frame.IsBKeyFrame)
Augmentation += "B";
+ if (Frame.IsMTETaggedFrame)
+ Augmentation += "G";
Streamer.emitBytes(Augmentation);
}
Streamer.emitInt8(0);
@@ -1835,8 +1841,6 @@ template <> struct DenseMapInfo<CIEKey> {
void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB,
bool IsEH) {
- Streamer.generateCompactUnwindEncodings(MAB);
-
MCContext &Context = Streamer.getContext();
const MCObjectFileInfo *MOFI = Context.getObjectFileInfo();
const MCAsmInfo *AsmInfo = Context.getAsmInfo();
@@ -1846,11 +1850,12 @@ void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB,
// Emit the compact unwind info if available.
bool NeedsEHFrameSection = !MOFI->getSupportsCompactUnwindWithoutEHFrame();
if (IsEH && MOFI->getCompactUnwindSection()) {
+ Streamer.generateCompactUnwindEncodings(MAB);
bool SectionEmitted = false;
for (const MCDwarfFrameInfo &Frame : FrameArray) {
if (Frame.CompactUnwindEncoding == 0) continue;
if (!SectionEmitted) {
- Streamer.SwitchSection(MOFI->getCompactUnwindSection());
+ Streamer.switchSection(MOFI->getCompactUnwindSection());
Streamer.emitValueToAlignment(AsmInfo->getCodePointerSize());
SectionEmitted = true;
}
@@ -1867,7 +1872,7 @@ void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB,
IsEH ? *const_cast<MCObjectFileInfo *>(MOFI)->getEHFrameSection()
: *MOFI->getDwarfFrameSection();
- Streamer.SwitchSection(&Section);
+ Streamer.switchSection(&Section);
MCSymbol *SectionStart = Context.createTempSymbol();
Streamer.emitLabel(SectionStart);
diff --git a/llvm/lib/MC/MCELFStreamer.cpp b/llvm/lib/MC/MCELFStreamer.cpp
index fbf3c860368a..ca7f28e1386e 100644
--- a/llvm/lib/MC/MCELFStreamer.cpp
+++ b/llvm/lib/MC/MCELFStreamer.cpp
@@ -90,11 +90,11 @@ void MCELFStreamer::mergeFragment(MCDataFragment *DF,
void MCELFStreamer::initSections(bool NoExecStack, const MCSubtargetInfo &STI) {
MCContext &Ctx = getContext();
- SwitchSection(Ctx.getObjectFileInfo()->getTextSection());
+ switchSection(Ctx.getObjectFileInfo()->getTextSection());
emitCodeAlignment(Ctx.getObjectFileInfo()->getTextSectionAlignment(), &STI);
if (NoExecStack)
- SwitchSection(Ctx.getAsmInfo()->getNonexecutableStackSection(Ctx));
+ switchSection(Ctx.getAsmInfo()->getNonexecutableStackSection(Ctx));
}
void MCELFStreamer::emitLabel(MCSymbol *S, SMLoc Loc) {
@@ -215,6 +215,7 @@ bool MCELFStreamer::emitSymbolAttribute(MCSymbol *S, MCSymbolAttr Attribute) {
case MCSA_WeakDefAutoPrivate:
case MCSA_Invalid:
case MCSA_IndirectSymbol:
+ case MCSA_Exported:
return false;
case MCSA_NoDeadStrip:
@@ -317,13 +318,13 @@ void MCELFStreamer::emitCommonSymbol(MCSymbol *S, uint64_t Size,
MCSection &Section = *getAssembler().getContext().getELFSection(
".bss", ELF::SHT_NOBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
MCSectionSubPair P = getCurrentSection();
- SwitchSection(&Section);
+ switchSection(&Section);
emitValueToAlignment(ByteAlignment, 0, 1, 0);
emitLabel(Symbol);
emitZeros(Size);
- SwitchSection(P.first, P.second);
+ switchSection(P.first, P.second);
} else {
if(Symbol->declareCommon(Size, ByteAlignment))
report_fatal_error(Twine("Symbol: ") + Symbol->getName() +
@@ -381,15 +382,15 @@ void MCELFStreamer::emitCGProfileEntry(const MCSymbolRefExpr *From,
void MCELFStreamer::emitIdent(StringRef IdentString) {
MCSection *Comment = getAssembler().getContext().getELFSection(
".comment", ELF::SHT_PROGBITS, ELF::SHF_MERGE | ELF::SHF_STRINGS, 1);
- PushSection();
- SwitchSection(Comment);
+ pushSection();
+ switchSection(Comment);
if (!SeenIdent) {
emitInt8(0);
SeenIdent = true;
}
emitBytes(IdentString);
emitInt8(0);
- PopSection();
+ popSection();
}
void MCELFStreamer::fixSymbolsInTLSFixups(const MCExpr *expr) {
@@ -511,8 +512,8 @@ void MCELFStreamer::finalizeCGProfile() {
MCSection *CGProfile = getAssembler().getContext().getELFSection(
".llvm.call-graph-profile", ELF::SHT_LLVM_CALL_GRAPH_PROFILE,
ELF::SHF_EXCLUDE, /*sizeof(Elf_CGProfile_Impl<>)=*/8);
- PushSection();
- SwitchSection(CGProfile);
+ pushSection();
+ switchSection(CGProfile);
uint64_t Offset = 0;
for (MCAssembler::CGProfileEntry &E : Asm.CGProfile) {
finalizeCGProfileEntry(E.From, Offset);
@@ -520,7 +521,7 @@ void MCELFStreamer::finalizeCGProfile() {
emitIntValue(E.Count, sizeof(uint64_t));
Offset += sizeof(uint64_t);
}
- PopSection();
+ popSection();
}
void MCELFStreamer::emitInstToFragment(const MCInst &Inst,
@@ -832,10 +833,10 @@ void MCELFStreamer::createAttributesSection(
// Switch section to AttributeSection or get/create the section.
if (AttributeSection) {
- SwitchSection(AttributeSection);
+ switchSection(AttributeSection);
} else {
AttributeSection = getContext().getELFSection(Section, Type, 0);
- SwitchSection(AttributeSection);
+ switchSection(AttributeSection);
// Format version
emitInt8(0x41);
diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp
index 10d494b5ac61..45a3d938257a 100644
--- a/llvm/lib/MC/MCExpr.cpp
+++ b/llvm/lib/MC/MCExpr.cpp
@@ -8,7 +8,6 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/MC/MCAsmBackend.h"
@@ -76,8 +75,9 @@ void MCExpr::print(raw_ostream &OS, const MCAsmInfo *MAI, bool InParens) const {
const MCSymbol &Sym = SRE.getSymbol();
// Parenthesize names that start with $ so that they don't look like
// absolute names.
- bool UseParens =
- !InParens && !Sym.getName().empty() && Sym.getName()[0] == '$';
+ bool UseParens = MAI && MAI->useParensForDollarSignNames() && !InParens &&
+ !Sym.getName().empty() && Sym.getName()[0] == '$';
+
if (UseParens) {
OS << '(';
Sym.print(OS, MAI);
diff --git a/llvm/lib/MC/MCFragment.cpp b/llvm/lib/MC/MCFragment.cpp
index 4634de863b2f..4e6459c5d6e4 100644
--- a/llvm/lib/MC/MCFragment.cpp
+++ b/llvm/lib/MC/MCFragment.cpp
@@ -376,7 +376,7 @@ LLVM_DUMP_METHOD void MCFragment::dump() const {
if (AF->hasEmitNops())
OS << " (emit nops)";
OS << "\n ";
- OS << " Alignment:" << AF->getAlignment()
+ OS << " Alignment:" << AF->getAlignment().value()
<< " Value:" << AF->getValue() << " ValueSize:" << AF->getValueSize()
<< " MaxBytesToEmit:" << AF->getMaxBytesToEmit() << ">";
break;
diff --git a/llvm/lib/MC/MCInstPrinter.cpp b/llvm/lib/MC/MCInstPrinter.cpp
index 7ce92b968f47..843afe359529 100644
--- a/llvm/lib/MC/MCInstPrinter.cpp
+++ b/llvm/lib/MC/MCInstPrinter.cpp
@@ -12,6 +12,7 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
diff --git a/llvm/lib/MC/MCInstrAnalysis.cpp b/llvm/lib/MC/MCInstrAnalysis.cpp
index 4ed1c6286a72..85434b15bb5e 100644
--- a/llvm/lib/MC/MCInstrAnalysis.cpp
+++ b/llvm/lib/MC/MCInstrAnalysis.cpp
@@ -9,11 +9,12 @@
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/ADT/APInt.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/MC/MCInstrInfo.h"
#include <cstdint>
+namespace llvm {
+class MCSubtargetInfo;
+}
+
using namespace llvm;
bool MCInstrAnalysis::clearsSuperRegisters(const MCRegisterInfo &MRI,
diff --git a/llvm/lib/MC/MCInstrDesc.cpp b/llvm/lib/MC/MCInstrDesc.cpp
index b5c43f5edc0d..49a4a2cb546a 100644
--- a/llvm/lib/MC/MCInstrDesc.cpp
+++ b/llvm/lib/MC/MCInstrDesc.cpp
@@ -14,7 +14,6 @@
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
using namespace llvm;
diff --git a/llvm/lib/MC/MCMachOStreamer.cpp b/llvm/lib/MC/MCMachOStreamer.cpp
index 88aeeb980738..9f22b9b0a866 100644
--- a/llvm/lib/MC/MCMachOStreamer.cpp
+++ b/llvm/lib/MC/MCMachOStreamer.cpp
@@ -10,7 +10,6 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCCodeEmitter.h"
@@ -19,17 +18,16 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCFragment.h"
-#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCLinkerOptimizationHint.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionMachO.h"
-#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolMachO.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/MC/SectionKind.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
@@ -37,6 +35,13 @@
#include <cassert>
#include <vector>
+namespace llvm {
+class MCInst;
+class MCStreamer;
+class MCSubtargetInfo;
+class Triple;
+} // namespace llvm
+
using namespace llvm;
namespace {
@@ -126,6 +131,7 @@ public:
void finalizeCGProfileEntry(const MCSymbolRefExpr *&SRE);
void finalizeCGProfile();
+ void createAddrSigSection();
};
} // end anonymous namespace.
@@ -353,6 +359,7 @@ bool MCMachOStreamer::emitSymbolAttribute(MCSymbol *Sym,
case MCSA_Weak:
case MCSA_Local:
case MCSA_LGlobal:
+ case MCSA_Exported:
return false;
case MCSA_Global:
@@ -455,8 +462,8 @@ void MCMachOStreamer::emitZerofill(MCSection *Section, MCSymbol *Symbol,
// section.
}
- PushSection();
- SwitchSection(Section);
+ pushSection();
+ switchSection(Section);
// The symbol may not be present, which only creates the section.
if (Symbol) {
@@ -464,7 +471,7 @@ void MCMachOStreamer::emitZerofill(MCSection *Section, MCSymbol *Symbol,
emitLabel(Symbol);
emitZeros(Size);
}
- PopSection();
+ popSection();
}
// This should always be called with the thread local bss section. Like the
@@ -524,6 +531,7 @@ void MCMachOStreamer::finishImpl() {
finalizeCGProfile();
+ createAddrSigSection();
this->MCObjectStreamer::finishImpl();
}
@@ -574,3 +582,16 @@ MCStreamer *llvm::createMachOStreamer(MCContext &Context,
S->getAssembler().setRelaxAll(true);
return S;
}
+
+// Create the AddrSig section and first data fragment here as its layout needs
+// to be computed immediately after in order for it to be exported correctly.
+void MCMachOStreamer::createAddrSigSection() {
+ MCAssembler &Asm = getAssembler();
+ MCObjectWriter &writer = Asm.getWriter();
+ if (!writer.getEmitAddrsigSection())
+ return;
+ MCSection *AddrSigSection =
+ Asm.getContext().getObjectFileInfo()->getAddrSigSection();
+ Asm.registerSection(*AddrSigSection);
+ new MCDataFragment(AddrSigSection);
+}
diff --git a/llvm/lib/MC/MCNullStreamer.cpp b/llvm/lib/MC/MCNullStreamer.cpp
index 40b7eba58b03..83e8962451d5 100644
--- a/llvm/lib/MC/MCNullStreamer.cpp
+++ b/llvm/lib/MC/MCNullStreamer.cpp
@@ -7,9 +7,15 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/StringRef.h"
-#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/SMLoc.h"
+namespace llvm {
+class MCContext;
+class MCExpr;
+class MCSection;
+class MCSymbol;
+} // namespace llvm
using namespace llvm;
@@ -36,10 +42,10 @@ namespace {
uint64_t Size = 0, unsigned ByteAlignment = 0,
SMLoc Loc = SMLoc()) override {}
void emitGPRel32Value(const MCExpr *Value) override {}
- void BeginCOFFSymbolDef(const MCSymbol *Symbol) override {}
- void EmitCOFFSymbolStorageClass(int StorageClass) override {}
- void EmitCOFFSymbolType(int Type) override {}
- void EndCOFFSymbolDef() override {}
+ void beginCOFFSymbolDef(const MCSymbol *Symbol) override {}
+ void emitCOFFSymbolStorageClass(int StorageClass) override {}
+ void emitCOFFSymbolType(int Type) override {}
+ void endCOFFSymbolDef() override {}
void
emitXCOFFSymbolLinkageWithVisibility(MCSymbol *Symbol, MCSymbolAttr Linkage,
MCSymbolAttr Visibility) override {}
diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp
index b7890e7f0937..d6fe952c0c1d 100644
--- a/llvm/lib/MC/MCObjectFileInfo.cpp
+++ b/llvm/lib/MC/MCObjectFileInfo.cpp
@@ -16,11 +16,14 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCSectionDXContainer.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionGOFF.h"
#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSectionSPIRV.h"
#include "llvm/MC/MCSectionWasm.h"
#include "llvm/MC/MCSectionXCOFF.h"
+#include "llvm/Support/Casting.h"
using namespace llvm;
@@ -62,8 +65,18 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) {
(T.getArch() == Triple::aarch64 || T.getArch() == Triple::aarch64_32))
SupportsCompactUnwindWithoutEHFrame = true;
- if (T.isWatchABI())
+ switch (Ctx->emitDwarfUnwindInfo()) {
+ case EmitDwarfUnwindType::Always:
+ OmitDwarfIfHaveCompactUnwind = false;
+ break;
+ case EmitDwarfUnwindType::NoCompactUnwind:
OmitDwarfIfHaveCompactUnwind = true;
+ break;
+ case EmitDwarfUnwindType::Default:
+ OmitDwarfIfHaveCompactUnwind =
+ T.isWatchABI() || SupportsCompactUnwindWithoutEHFrame;
+ break;
+ }
FDECFIEncoding = dwarf::DW_EH_PE_pcrel;
@@ -180,6 +193,9 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) {
MachO::S_THREAD_LOCAL_VARIABLE_POINTERS,
SectionKind::getMetadata());
+ AddrSigSection = Ctx->getMachOSection("__DATA", "__llvm_addrsig", 0,
+ SectionKind::getData());
+
// Exception Handling.
LSDASection = Ctx->getMachOSection("__TEXT", "__gcc_except_tab", 0,
SectionKind::getReadOnlyWithRel());
@@ -518,8 +534,13 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T, bool Large) {
}
void MCObjectFileInfo::initGOFFMCObjectFileInfo(const Triple &T) {
- TextSection = Ctx->getGOFFSection(".text", SectionKind::getText());
- BSSSection = Ctx->getGOFFSection(".bss", SectionKind::getBSS());
+ TextSection =
+ Ctx->getGOFFSection(".text", SectionKind::getText(), nullptr, nullptr);
+ BSSSection =
+ Ctx->getGOFFSection(".bss", SectionKind::getBSS(), nullptr, nullptr);
+ PPA1Section =
+ Ctx->getGOFFSection(".ppa1", SectionKind::getMetadata(), TextSection,
+ MCConstantExpr::create(GOFF::SK_PPA1, *Ctx));
}
void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) {
@@ -554,8 +575,9 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) {
".rdata", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ,
SectionKind::getReadOnly());
- if (T.getArch() == Triple::x86_64 || T.getArch() == Triple::aarch64) {
- // On Windows 64 with SEH, the LSDA is emitted into the .xdata section
+ if (T.getArch() == Triple::x86_64 || T.getArch() == Triple::aarch64 ||
+ T.getArch() == Triple::arm || T.getArch() == Triple::thumb) {
+ // On Windows with SEH, the LSDA is emitted into the .xdata section
LSDASection = nullptr;
} else {
LSDASection = Ctx->getCOFFSection(".gcc_except_table",
@@ -803,6 +825,11 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) {
SectionKind::getReadOnly());
}
+void MCObjectFileInfo::initSPIRVMCObjectFileInfo(const Triple &T) {
+ // Put everything in a single binary section.
+ TextSection = Ctx->getSPIRVSection();
+}
+
void MCObjectFileInfo::initWasmMCObjectFileInfo(const Triple &T) {
TextSection = Ctx->getWasmSection(".text", SectionKind::getText());
DataSection = Ctx->getWasmSection(".data", SectionKind::getData());
@@ -993,7 +1020,12 @@ void MCObjectFileInfo::initXCOFFMCObjectFileInfo(const Triple &T) {
/* MultiSymbolsAllowed */ true, ".dwmac", XCOFF::SSUBTYP_DWMAC);
}
-MCObjectFileInfo::~MCObjectFileInfo() {}
+void MCObjectFileInfo::initDXContainerObjectFileInfo(const Triple &T) {
+ // At the moment the DXBC section should end up empty.
+ TextSection = Ctx->getDXContainerSection("DXBC", SectionKind::getText());
+}
+
+MCObjectFileInfo::~MCObjectFileInfo() = default;
void MCObjectFileInfo::initMCObjectFileInfo(MCContext &MCCtx, bool PIC,
bool LargeCodeModel) {
@@ -1031,12 +1063,18 @@ void MCObjectFileInfo::initMCObjectFileInfo(MCContext &MCCtx, bool PIC,
case MCContext::IsGOFF:
initGOFFMCObjectFileInfo(TheTriple);
break;
+ case MCContext::IsSPIRV:
+ initSPIRVMCObjectFileInfo(TheTriple);
+ break;
case MCContext::IsWasm:
initWasmMCObjectFileInfo(TheTriple);
break;
case MCContext::IsXCOFF:
initXCOFFMCObjectFileInfo(TheTriple);
break;
+ case MCContext::IsDXContainer:
+ initDXContainerObjectFileInfo(TheTriple);
+ break;
}
}
@@ -1052,7 +1090,9 @@ MCSection *MCObjectFileInfo::getDwarfComdatSection(const char *Name,
case Triple::MachO:
case Triple::COFF:
case Triple::GOFF:
+ case Triple::SPIRV:
case Triple::XCOFF:
+ case Triple::DXContainer:
case Triple::UnknownObjectFormat:
report_fatal_error("Cannot get DWARF comdat section for this object file "
"format: not implemented.");
diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp
index ebbbd6ad4e16..0c4ed201a0c5 100644
--- a/llvm/lib/MC/MCObjectStreamer.cpp
+++ b/llvm/lib/MC/MCObjectStreamer.cpp
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCObjectStreamer.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAssembler.h"
@@ -37,7 +36,7 @@ MCObjectStreamer::MCObjectStreamer(MCContext &Context,
setAllowAutoPadding(Assembler->getBackend().allowAutoPadding());
}
-MCObjectStreamer::~MCObjectStreamer() {}
+MCObjectStreamer::~MCObjectStreamer() = default;
// AssemblerPtr is used for evaluation of expressions and causes
// difference between asm and object outputs. Return nullptr to in
@@ -561,7 +560,7 @@ void MCObjectStreamer::emitDwarfLineEndEntry(MCSection *Section,
// Switch back the dwarf line section, in case endSection had to switch the
// section.
MCContext &Ctx = getContext();
- SwitchSection(Ctx.getObjectFileInfo()->getDwarfLineSection());
+ switchSection(Ctx.getObjectFileInfo()->getDwarfLineSection());
const MCAsmInfo *AsmInfo = Ctx.getAsmInfo();
emitDwarfAdvanceLineAddr(INT64_MAX, LastLabel, SectionEnd,
@@ -648,7 +647,8 @@ void MCObjectStreamer::emitValueToAlignment(unsigned ByteAlignment,
unsigned MaxBytesToEmit) {
if (MaxBytesToEmit == 0)
MaxBytesToEmit = ByteAlignment;
- insert(new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit));
+ insert(new MCAlignFragment(Align(ByteAlignment), Value, ValueSize,
+ MaxBytesToEmit));
// Update the maximum alignment on the current section if necessary.
MCSection *CurSec = getCurrentSectionOnly();
@@ -796,7 +796,7 @@ MCObjectStreamer::emitRelocDirective(const MCExpr &Offset, StringRef Name,
const MCExpr *Expr, SMLoc Loc,
const MCSubtargetInfo &STI) {
Optional<MCFixupKind> MaybeKind = Assembler->getBackend().getFixupKind(Name);
- if (!MaybeKind.hasValue())
+ if (!MaybeKind)
return std::make_pair(true, std::string("unknown relocation name"));
MCFixupKind Kind = *MaybeKind;
diff --git a/llvm/lib/MC/MCObjectWriter.cpp b/llvm/lib/MC/MCObjectWriter.cpp
index a058bbe0ba0b..89ff5800da5b 100644
--- a/llvm/lib/MC/MCObjectWriter.cpp
+++ b/llvm/lib/MC/MCObjectWriter.cpp
@@ -7,10 +7,12 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCObjectWriter.h"
-#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFragment.h"
#include "llvm/MC/MCSymbol.h"
+namespace llvm {
+class MCSection;
+}
using namespace llvm;
diff --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp
index bf9b9e916d6f..c3bc3bff6fa2 100644
--- a/llvm/lib/MC/MCParser/AsmLexer.cpp
+++ b/llvm/lib/MC/MCParser/AsmLexer.cpp
@@ -251,12 +251,12 @@ AsmToken AsmLexer::LexLineComment() {
}
static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
- // Skip ULL, UL, U, L and LL suffices.
- if (CurPtr[0] == 'U')
+ // Skip case-insensitive ULL, UL, U, L and LL suffixes.
+ if (CurPtr[0] == 'U' || CurPtr[0] == 'u')
++CurPtr;
- if (CurPtr[0] == 'L')
+ if (CurPtr[0] == 'L' || CurPtr[0] == 'l')
++CurPtr;
- if (CurPtr[0] == 'L')
+ if (CurPtr[0] == 'L' || CurPtr[0] == 'l')
++CurPtr;
}
diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp
index 0cea491f227d..ccc8e80e76ff 100644
--- a/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -33,7 +33,6 @@
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCParser/AsmCond.h"
#include "llvm/MC/MCParser/AsmLexer.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
@@ -541,6 +540,7 @@ private:
DK_PSEUDO_PROBE,
DK_LTO_DISCARD,
DK_LTO_SET_CONDITIONAL,
+ DK_CFI_MTE_TAGGED_FRAME,
DK_END
};
@@ -793,12 +793,19 @@ AsmParser::AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
case MCContext::IsGOFF:
PlatformParser.reset(createGOFFAsmParser());
break;
+ case MCContext::IsSPIRV:
+ report_fatal_error(
+ "Need to implement createSPIRVAsmParser for SPIRV format.");
+ break;
case MCContext::IsWasm:
PlatformParser.reset(createWasmAsmParser());
break;
case MCContext::IsXCOFF:
PlatformParser.reset(createXCOFFAsmParser());
break;
+ case MCContext::IsDXContainer:
+ llvm_unreachable("DXContainer is not supported yet");
+ break;
}
PlatformParser->Initialize(*this);
@@ -1067,7 +1074,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
if (auto *TS = Out.getTargetStreamer())
TS->emitConstantPools();
- Out.Finish(Lexer.getLoc());
+ Out.finish(Lexer.getLoc());
}
return HadError || getContext().hadError();
@@ -1780,7 +1787,7 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
// if this is a line comment we can drop it safely
if (getTok().getString().empty() || getTok().getString().front() == '\r' ||
getTok().getString().front() == '\n')
- Out.AddBlankLine();
+ Out.addBlankLine();
Lex();
return false;
}
@@ -1937,7 +1944,7 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
}
// Consume any end of statement token, if present, to avoid spurious
- // AddBlankLine calls().
+ // addBlankLine calls().
if (getTok().is(AsmToken::EndOfStatement)) {
Lex();
}
@@ -3445,10 +3452,14 @@ bool AsmParser::parseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
// up to one.
if (Alignment == 0)
Alignment = 1;
- if (!isPowerOf2_64(Alignment))
+ else if (!isPowerOf2_64(Alignment)) {
ReturnVal |= Error(AlignmentLoc, "alignment must be a power of 2");
- if (!isUInt<32>(Alignment))
+ Alignment = PowerOf2Floor(Alignment);
+ }
+ if (!isUInt<32>(Alignment)) {
ReturnVal |= Error(AlignmentLoc, "alignment must be smaller than 2**32");
+ Alignment = 1u << 31;
+ }
}
// Diagnose non-sensical max bytes to align.
@@ -3471,9 +3482,9 @@ bool AsmParser::parseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
// directive.
const MCSection *Section = getStreamer().getCurrentSectionOnly();
assert(Section && "must have section to emit alignment");
- bool UseCodeAlign = Section->UseCodeAlign();
+ bool useCodeAlign = Section->useCodeAlign();
if ((!HasFillExpr || Lexer.getMAI().getTextAlignFillValue() == FillExpr) &&
- ValueSize == 1 && UseCodeAlign) {
+ ValueSize == 1 && useCodeAlign) {
getStreamer().emitCodeAlignment(Alignment, &getTargetParser().getSTI(),
MaxBytesToFill);
} else {
@@ -3571,8 +3582,8 @@ bool AsmParser::parseDirectiveFile(SMLoc DirectiveLoc) {
if (HasMD5) {
MD5::MD5Result Sum;
for (unsigned i = 0; i != 8; ++i) {
- Sum.Bytes[i] = uint8_t(MD5Hi >> ((7 - i) * 8));
- Sum.Bytes[i + 8] = uint8_t(MD5Lo >> ((7 - i) * 8));
+ Sum[i] = uint8_t(MD5Hi >> ((7 - i) * 8));
+ Sum[i + 8] = uint8_t(MD5Lo >> ((7 - i) * 8));
}
CKMem = Sum;
}
@@ -3743,8 +3754,7 @@ bool AsmParser::parseDirectiveCVFile() {
parseEscapedString(Checksum) ||
parseIntToken(ChecksumKind,
"expected checksum kind in '.cv_file' directive") ||
- parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.cv_file' directive"))
+ parseEOL())
return true;
}
@@ -3754,7 +3764,7 @@ bool AsmParser::parseDirectiveCVFile() {
ArrayRef<uint8_t> ChecksumAsBytes(reinterpret_cast<const uint8_t *>(CKMem),
Checksum.size());
- if (!getStreamer().EmitCVFileDirective(FileNumber, Filename, ChecksumAsBytes,
+ if (!getStreamer().emitCVFileDirective(FileNumber, Filename, ChecksumAsBytes,
static_cast<uint8_t>(ChecksumKind)))
return Error(FileNumberLoc, "file number already allocated");
@@ -3790,12 +3800,10 @@ bool AsmParser::parseDirectiveCVFuncId() {
SMLoc FunctionIdLoc = getTok().getLoc();
int64_t FunctionId;
- if (parseCVFunctionId(FunctionId, ".cv_func_id") ||
- parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.cv_func_id' directive"))
+ if (parseCVFunctionId(FunctionId, ".cv_func_id") || parseEOL())
return true;
- if (!getStreamer().EmitCVFuncIdDirective(FunctionId))
+ if (!getStreamer().emitCVFuncIdDirective(FunctionId))
return Error(FunctionIdLoc, "function id already allocated");
return false;
@@ -3851,11 +3859,10 @@ bool AsmParser::parseDirectiveCVInlineSiteId() {
Lex();
}
- if (parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.cv_inline_site_id' directive"))
+ if (parseEOL())
return true;
- if (!getStreamer().EmitCVInlineSiteIdDirective(FunctionId, IAFunc, IAFile,
+ if (!getStreamer().emitCVInlineSiteIdDirective(FunctionId, IAFunc, IAFile,
IALine, IACol, FunctionIdLoc))
return Error(FunctionIdLoc, "function id already allocated");
@@ -3976,7 +3983,7 @@ bool AsmParser::parseDirectiveCVInlineLinetable() {
"expected identifier in directive"))
return true;
- if (parseToken(AsmToken::EndOfStatement, "Expected End of Statement"))
+ if (parseEOL())
return true;
MCSymbol *FnStartSym = getContext().getOrCreateSymbol(FnStartName);
@@ -4137,7 +4144,7 @@ bool AsmParser::parseDirectiveCVFileChecksumOffset() {
int64_t FileNo;
if (parseIntToken(FileNo, "expected identifier in directive"))
return true;
- if (parseToken(AsmToken::EndOfStatement, "Expected End of Statement"))
+ if (parseEOL())
return true;
getStreamer().emitCVFileChecksumOffsetDirective(FileNo);
return false;
@@ -4153,7 +4160,7 @@ bool AsmParser::parseDirectiveCVFPOData() {
if (parseEOL())
return true;
MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName);
- getStreamer().EmitCVFPOData(ProcSym, DirLoc);
+ getStreamer().emitCVFPOData(ProcSym, DirLoc);
return false;
}
@@ -5550,6 +5557,7 @@ void AsmParser::initializeDirectiveKindMap() {
DirectiveKindMap[".cfi_register"] = DK_CFI_REGISTER;
DirectiveKindMap[".cfi_window_save"] = DK_CFI_WINDOW_SAVE;
DirectiveKindMap[".cfi_b_key_frame"] = DK_CFI_B_KEY_FRAME;
+ DirectiveKindMap[".cfi_mte_tagged_frame"] = DK_CFI_MTE_TAGGED_FRAME;
DirectiveKindMap[".macros_on"] = DK_MACROS_ON;
DirectiveKindMap[".macros_off"] = DK_MACROS_OFF;
DirectiveKindMap[".macro"] = DK_MACRO;
@@ -6022,22 +6030,25 @@ bool AsmParser::parseMSInlineAsm(
}
bool isOutput = (i == 1) && Desc.mayStore();
+ bool Restricted = Operand.isMemUseUpRegs();
SMLoc Start = SMLoc::getFromPointer(SymName.data());
- int64_t Size = Operand.isMemPlaceholder(Desc) ? 0 : SymName.size();
if (isOutput) {
++InputIdx;
OutputDecls.push_back(OpDecl);
OutputDeclsAddressOf.push_back(Operand.needAddressOf());
OutputConstraints.push_back(("=" + Constraint).str());
- AsmStrRewrites.emplace_back(AOK_Output, Start, Size);
+ AsmStrRewrites.emplace_back(AOK_Output, Start, SymName.size(), 0,
+ Restricted);
} else {
InputDecls.push_back(OpDecl);
InputDeclsAddressOf.push_back(Operand.needAddressOf());
InputConstraints.push_back(Constraint.str());
if (Desc.OpInfo[i - 1].isBranchTarget())
- AsmStrRewrites.emplace_back(AOK_CallInput, Start, SymName.size());
+ AsmStrRewrites.emplace_back(AOK_CallInput, Start, SymName.size(), 0,
+ Restricted);
else
- AsmStrRewrites.emplace_back(AOK_Input, Start, Size);
+ AsmStrRewrites.emplace_back(AOK_Input, Start, SymName.size(), 0,
+ Restricted);
}
}
@@ -6152,17 +6163,19 @@ bool AsmParser::parseMSInlineAsm(
OS << Ctx.getAsmInfo()->getPrivateLabelPrefix() << AR.Label;
break;
case AOK_Input:
- if (AR.Len)
- OS << '$' << InputIdx;
- ++InputIdx;
+ if (AR.IntelExpRestricted)
+ OS << "${" << InputIdx++ << ":P}";
+ else
+ OS << '$' << InputIdx++;
break;
case AOK_CallInput:
OS << "${" << InputIdx++ << ":P}";
break;
case AOK_Output:
- if (AR.Len)
- OS << '$' << OutputIdx;
- ++OutputIdx;
+ if (AR.IntelExpRestricted)
+ OS << "${" << OutputIdx++ << ":P}";
+ else
+ OS << '$' << OutputIdx++;
break;
case AOK_SizeDirective:
switch (AR.Val) {
@@ -6299,7 +6312,7 @@ bool HLASMAsmParser::parseStatement(ParseStatementInfo &Info,
// if this is a line comment we can drop it safely
if (getTok().getString().empty() || getTok().getString().front() == '\r' ||
getTok().getString().front() == '\n')
- Out.AddBlankLine();
+ Out.addBlankLine();
Lex();
return false;
}
@@ -6315,7 +6328,7 @@ bool HLASMAsmParser::parseStatement(ParseStatementInfo &Info,
if (Lexer.is(AsmToken::EndOfStatement)) {
if (getTok().getString().front() == '\n' ||
getTok().getString().front() == '\r') {
- Out.AddBlankLine();
+ Out.addBlankLine();
Lex();
return false;
}
diff --git a/llvm/lib/MC/MCParser/COFFAsmParser.cpp b/llvm/lib/MC/MCParser/COFFAsmParser.cpp
index 0077c91cfdbd..b78595f5bab4 100644
--- a/llvm/lib/MC/MCParser/COFFAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/COFFAsmParser.cpp
@@ -13,11 +13,8 @@
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDirectives.h"
-#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParserExtension.h"
-#include "llvm/MC/MCParser/MCTargetAsmParser.h"
-#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/SectionKind.h"
@@ -322,7 +319,7 @@ bool COFFAsmParser::ParseSectionSwitch(StringRef Section,
return TokError("unexpected token in section switching directive");
Lex();
- getStreamer().SwitchSection(getContext().getCOFFSection(
+ getStreamer().switchSection(getContext().getCOFFSection(
Section, Characteristics, Kind, COMDATSymName, Type));
return false;
@@ -419,7 +416,7 @@ bool COFFAsmParser::ParseDirectiveDef(StringRef, SMLoc) {
MCSymbol *Sym = getContext().getOrCreateSymbol(SymbolName);
- getStreamer().BeginCOFFSymbolDef(Sym);
+ getStreamer().beginCOFFSymbolDef(Sym);
Lex();
return false;
@@ -434,7 +431,7 @@ bool COFFAsmParser::ParseDirectiveScl(StringRef, SMLoc) {
return TokError("unexpected token in directive");
Lex();
- getStreamer().EmitCOFFSymbolStorageClass(SymbolStorageClass);
+ getStreamer().emitCOFFSymbolStorageClass(SymbolStorageClass);
return false;
}
@@ -447,13 +444,13 @@ bool COFFAsmParser::ParseDirectiveType(StringRef, SMLoc) {
return TokError("unexpected token in directive");
Lex();
- getStreamer().EmitCOFFSymbolType(Type);
+ getStreamer().emitCOFFSymbolType(Type);
return false;
}
bool COFFAsmParser::ParseDirectiveEndef(StringRef, SMLoc) {
Lex();
- getStreamer().EndCOFFSymbolDef();
+ getStreamer().endCOFFSymbolDef();
return false;
}
@@ -482,7 +479,7 @@ bool COFFAsmParser::ParseDirectiveSecRel32(StringRef, SMLoc) {
MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID);
Lex();
- getStreamer().EmitCOFFSecRel32(Symbol, Offset);
+ getStreamer().emitCOFFSecRel32(Symbol, Offset);
return false;
}
@@ -508,7 +505,7 @@ bool COFFAsmParser::ParseDirectiveRVA(StringRef, SMLoc) {
MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID);
- getStreamer().EmitCOFFImgRel32(Symbol, Offset);
+ getStreamer().emitCOFFImgRel32(Symbol, Offset);
return false;
};
@@ -528,7 +525,7 @@ bool COFFAsmParser::ParseDirectiveSafeSEH(StringRef, SMLoc) {
MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID);
Lex();
- getStreamer().EmitCOFFSafeSEH(Symbol);
+ getStreamer().emitCOFFSafeSEH(Symbol);
return false;
}
@@ -543,7 +540,7 @@ bool COFFAsmParser::ParseDirectiveSecIdx(StringRef, SMLoc) {
MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID);
Lex();
- getStreamer().EmitCOFFSectionIndex(Symbol);
+ getStreamer().emitCOFFSectionIndex(Symbol);
return false;
}
@@ -558,7 +555,7 @@ bool COFFAsmParser::ParseDirectiveSymIdx(StringRef, SMLoc) {
MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID);
Lex();
- getStreamer().EmitCOFFSymbolIndex(Symbol);
+ getStreamer().emitCOFFSymbolIndex(Symbol);
return false;
}
@@ -621,31 +618,31 @@ bool COFFAsmParser::ParseSEHDirectiveStartProc(StringRef, SMLoc Loc) {
MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID);
Lex();
- getStreamer().EmitWinCFIStartProc(Symbol, Loc);
+ getStreamer().emitWinCFIStartProc(Symbol, Loc);
return false;
}
bool COFFAsmParser::ParseSEHDirectiveEndProc(StringRef, SMLoc Loc) {
Lex();
- getStreamer().EmitWinCFIEndProc(Loc);
+ getStreamer().emitWinCFIEndProc(Loc);
return false;
}
bool COFFAsmParser::ParseSEHDirectiveEndFuncletOrFunc(StringRef, SMLoc Loc) {
Lex();
- getStreamer().EmitWinCFIFuncletOrFuncEnd(Loc);
+ getStreamer().emitWinCFIFuncletOrFuncEnd(Loc);
return false;
}
bool COFFAsmParser::ParseSEHDirectiveStartChained(StringRef, SMLoc Loc) {
Lex();
- getStreamer().EmitWinCFIStartChained(Loc);
+ getStreamer().emitWinCFIStartChained(Loc);
return false;
}
bool COFFAsmParser::ParseSEHDirectiveEndChained(StringRef, SMLoc Loc) {
Lex();
- getStreamer().EmitWinCFIEndChained(Loc);
+ getStreamer().emitWinCFIEndChained(Loc);
return false;
}
@@ -671,13 +668,13 @@ bool COFFAsmParser::ParseSEHDirectiveHandler(StringRef, SMLoc Loc) {
MCSymbol *handler = getContext().getOrCreateSymbol(SymbolID);
Lex();
- getStreamer().EmitWinEHHandler(handler, unwind, except, Loc);
+ getStreamer().emitWinEHHandler(handler, unwind, except, Loc);
return false;
}
bool COFFAsmParser::ParseSEHDirectiveHandlerData(StringRef, SMLoc Loc) {
Lex();
- getStreamer().EmitWinEHHandlerData();
+ getStreamer().emitWinEHHandlerData();
return false;
}
@@ -690,20 +687,20 @@ bool COFFAsmParser::ParseSEHDirectiveAllocStack(StringRef, SMLoc Loc) {
return TokError("unexpected token in directive");
Lex();
- getStreamer().EmitWinCFIAllocStack(Size, Loc);
+ getStreamer().emitWinCFIAllocStack(Size, Loc);
return false;
}
bool COFFAsmParser::ParseSEHDirectiveEndProlog(StringRef, SMLoc Loc) {
Lex();
- getStreamer().EmitWinCFIEndProlog(Loc);
+ getStreamer().emitWinCFIEndProlog(Loc);
return false;
}
bool COFFAsmParser::ParseAtUnwindOrAtExcept(bool &unwind, bool &except) {
StringRef identifier;
- if (getLexer().isNot(AsmToken::At))
- return TokError("a handler attribute must begin with '@'");
+ if (getLexer().isNot(AsmToken::At) && getLexer().isNot(AsmToken::Percent))
+ return TokError("a handler attribute must begin with '@' or '%'");
SMLoc startLoc = getLexer().getLoc();
Lex();
if (getParser().parseIdentifier(identifier))
diff --git a/llvm/lib/MC/MCParser/COFFMasmParser.cpp b/llvm/lib/MC/MCParser/COFFMasmParser.cpp
index 9da880f3b2ea..c5fedef40782 100644
--- a/llvm/lib/MC/MCParser/COFFMasmParser.cpp
+++ b/llvm/lib/MC/MCParser/COFFMasmParser.cpp
@@ -7,25 +7,18 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCDirectives.h"
-#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParserExtension.h"
-#include "llvm/MC/MCParser/MCAsmParserUtils.h"
-#include "llvm/MC/MCParser/MCTargetAsmParser.h"
-#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbolCOFF.h"
#include "llvm/MC/SectionKind.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/SMLoc.h"
-#include <cassert>
#include <cstdint>
-#include <limits>
#include <utility>
using namespace llvm;
@@ -245,7 +238,7 @@ bool COFFMasmParser::ParseSectionSwitch(StringRef Section,
return TokError("unexpected token in section switching directive");
Lex();
- getStreamer().SwitchSection(getContext().getCOFFSection(
+ getStreamer().switchSection(getContext().getCOFFSection(
Section, Characteristics, Kind, COMDATSymName, Type));
return false;
@@ -273,7 +266,7 @@ bool COFFMasmParser::ParseDirectiveSegment(StringRef Directive, SMLoc Loc) {
COFF::IMAGE_SCN_MEM_READ;
}
SectionKind Kind = computeSectionKind(Flags);
- getStreamer().SwitchSection(getContext().getCOFFSection(
+ getStreamer().switchSection(getContext().getCOFFSection(
SectionName, Flags, Kind, "", (COFF::COMDATType)(0)));
return false;
}
@@ -300,13 +293,13 @@ bool COFFMasmParser::ParseDirectiveIncludelib(StringRef Directive, SMLoc Loc) {
unsigned Flags = COFF::IMAGE_SCN_MEM_PRELOAD | COFF::IMAGE_SCN_MEM_16BIT;
SectionKind Kind = computeSectionKind(Flags);
- getStreamer().PushSection();
- getStreamer().SwitchSection(getContext().getCOFFSection(
+ getStreamer().pushSection();
+ getStreamer().switchSection(getContext().getCOFFSection(
".drectve", Flags, Kind, "", (COFF::COMDATType)(0)));
getStreamer().emitBytes("/DEFAULTLIB:");
getStreamer().emitBytes(Lib);
getStreamer().emitBytes(" ");
- getStreamer().PopSection();
+ getStreamer().popSection();
return false;
}
@@ -343,7 +336,7 @@ bool COFFMasmParser::ParseDirectiveProc(StringRef Directive, SMLoc Loc) {
getTok().getString().equals_insensitive("frame")) {
Lex();
Framed = true;
- getStreamer().EmitWinCFIStartProc(Sym, Loc);
+ getStreamer().emitWinCFIStartProc(Sym, Loc);
}
getStreamer().emitLabel(Sym, Loc);
@@ -364,7 +357,7 @@ bool COFFMasmParser::ParseDirectiveEndProc(StringRef Directive, SMLoc Loc) {
CurrentProcedure + "'");
if (CurrentProcedureFramed) {
- getStreamer().EmitWinCFIEndProc(Loc);
+ getStreamer().emitWinCFIEndProc(Loc);
}
CurrentProcedure = "";
CurrentProcedureFramed = false;
@@ -398,13 +391,13 @@ bool COFFMasmParser::ParseSEHDirectiveAllocStack(StringRef Directive,
return Error(SizeLoc, "expected integer size");
if (Size % 8 != 0)
return Error(SizeLoc, "stack size must be a multiple of 8");
- getStreamer().EmitWinCFIAllocStack(static_cast<unsigned>(Size), Loc);
+ getStreamer().emitWinCFIAllocStack(static_cast<unsigned>(Size), Loc);
return false;
}
bool COFFMasmParser::ParseSEHDirectiveEndProlog(StringRef Directive,
SMLoc Loc) {
- getStreamer().EmitWinCFIEndProlog(Loc);
+ getStreamer().emitWinCFIEndProlog(Loc);
return false;
}
diff --git a/llvm/lib/MC/MCParser/DarwinAsmParser.cpp b/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
index 308b3842c61e..bc59531eecb8 100644
--- a/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -6,7 +6,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
@@ -15,7 +14,6 @@
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDirectives.h"
-#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCAsmParserExtension.h"
@@ -29,7 +27,6 @@
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <string>
@@ -483,7 +480,7 @@ bool DarwinAsmParser::parseSectionSwitch(StringRef Segment, StringRef Section,
// FIXME: Arch specific.
bool isText = TAA & MachO::S_ATTR_PURE_INSTRUCTIONS;
- getStreamer().SwitchSection(getContext().getMachOSection(
+ getStreamer().switchSection(getContext().getMachOSection(
Segment, Section, TAA, StubSize,
isText ? SectionKind::getText() : SectionKind::getData()));
@@ -722,7 +719,7 @@ bool DarwinAsmParser::parseDirectiveSection(StringRef, SMLoc) {
// FIXME: Arch specific.
bool isText = Segment == "__TEXT"; // FIXME: Hack.
- getStreamer().SwitchSection(getContext().getMachOSection(
+ getStreamer().switchSection(getContext().getMachOSection(
Segment, Section, TAA, StubSize,
isText ? SectionKind::getText() : SectionKind::getData()));
return false;
@@ -731,10 +728,10 @@ bool DarwinAsmParser::parseDirectiveSection(StringRef, SMLoc) {
/// ParseDirectivePushSection:
/// ::= .pushsection identifier (',' identifier)*
bool DarwinAsmParser::parseDirectivePushSection(StringRef S, SMLoc Loc) {
- getStreamer().PushSection();
+ getStreamer().pushSection();
if (parseDirectiveSection(S, Loc)) {
- getStreamer().PopSection();
+ getStreamer().popSection();
return true;
}
@@ -744,7 +741,7 @@ bool DarwinAsmParser::parseDirectivePushSection(StringRef S, SMLoc Loc) {
/// ParseDirectivePopSection:
/// ::= .popsection
bool DarwinAsmParser::parseDirectivePopSection(StringRef, SMLoc) {
- if (!getStreamer().PopSection())
+ if (!getStreamer().popSection())
return TokError(".popsection without corresponding .pushsection");
return false;
}
@@ -755,7 +752,7 @@ bool DarwinAsmParser::parseDirectivePrevious(StringRef DirName, SMLoc) {
MCSectionSubPair PreviousSection = getStreamer().getPreviousSection();
if (!PreviousSection.first)
return TokError(".previous without corresponding .section");
- getStreamer().SwitchSection(PreviousSection.first, PreviousSection.second);
+ getStreamer().switchSection(PreviousSection.first, PreviousSection.second);
return false;
}
@@ -1152,11 +1149,12 @@ static Triple::OSType getOSTypeFromPlatform(MachO::PlatformType Type) {
case MachO::PLATFORM_TVOS: return Triple::TvOS;
case MachO::PLATFORM_WATCHOS: return Triple::WatchOS;
case MachO::PLATFORM_BRIDGEOS: /* silence warning */ break;
+ case MachO::PLATFORM_DRIVERKIT:
+ return Triple::DriverKit;
case MachO::PLATFORM_MACCATALYST: return Triple::IOS;
case MachO::PLATFORM_IOSSIMULATOR: /* silence warning */ break;
case MachO::PLATFORM_TVOSSIMULATOR: /* silence warning */ break;
case MachO::PLATFORM_WATCHOSSIMULATOR: /* silence warning */ break;
- case MachO::PLATFORM_DRIVERKIT: /* silence warning */ break;
}
llvm_unreachable("Invalid mach-o platform type");
}
@@ -1175,6 +1173,7 @@ bool DarwinAsmParser::parseBuildVersion(StringRef Directive, SMLoc Loc) {
.Case("tvos", MachO::PLATFORM_TVOS)
.Case("watchos", MachO::PLATFORM_WATCHOS)
.Case("macCatalyst", MachO::PLATFORM_MACCATALYST)
+ .Case("driverkit", MachO::PLATFORM_DRIVERKIT)
.Default(0);
if (Platform == 0)
return Error(PlatformLoc, "unknown platform name");
diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
index e814cf003656..04a234be3b47 100644
--- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
@@ -12,11 +12,9 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDirectives.h"
-#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCAsmParserExtension.h"
-#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
@@ -214,7 +212,7 @@ bool ELFAsmParser::ParseSectionSwitch(StringRef Section, unsigned Type,
}
Lex();
- getStreamer().SwitchSection(getContext().getELFSection(Section, Type, Flags),
+ getStreamer().switchSection(getContext().getELFSection(Section, Type, Flags),
Subsection);
return false;
@@ -284,7 +282,8 @@ bool ELFAsmParser::ParseSectionName(StringRef &SectionName) {
return false;
}
-static unsigned parseSectionFlags(StringRef flagsStr, bool *UseLastGroup) {
+static unsigned parseSectionFlags(const Triple &TT, StringRef flagsStr,
+ bool *UseLastGroup) {
unsigned flags = 0;
// If a valid numerical value is set for the section flag, use it verbatim
@@ -333,7 +332,10 @@ static unsigned parseSectionFlags(StringRef flagsStr, bool *UseLastGroup) {
flags |= ELF::SHF_GROUP;
break;
case 'R':
- flags |= ELF::SHF_GNU_RETAIN;
+ if (TT.isOSSolaris())
+ flags |= ELF::SHF_SUNW_NODISCARD;
+ else
+ flags |= ELF::SHF_GNU_RETAIN;
break;
case '?':
*UseLastGroup = true;
@@ -377,10 +379,10 @@ unsigned ELFAsmParser::parseSunStyleSectionFlags() {
bool ELFAsmParser::ParseDirectivePushSection(StringRef s, SMLoc loc) {
- getStreamer().PushSection();
+ getStreamer().pushSection();
if (ParseSectionArguments(/*IsPush=*/true, loc)) {
- getStreamer().PopSection();
+ getStreamer().popSection();
return true;
}
@@ -388,7 +390,7 @@ bool ELFAsmParser::ParseDirectivePushSection(StringRef s, SMLoc loc) {
}
bool ELFAsmParser::ParseDirectivePopSection(StringRef, SMLoc) {
- if (!getStreamer().PopSection())
+ if (!getStreamer().popSection())
return TokError(".popsection without corresponding .pushsection");
return false;
}
@@ -571,7 +573,8 @@ bool ELFAsmParser::ParseSectionArguments(bool IsPush, SMLoc loc) {
} else {
StringRef FlagsStr = getTok().getStringContents();
Lex();
- extraFlags = parseSectionFlags(FlagsStr, &UseLastGroup);
+ extraFlags = parseSectionFlags(getContext().getTargetTriple(), FlagsStr,
+ &UseLastGroup);
}
if (extraFlags == -1U)
@@ -675,7 +678,7 @@ EndStmt:
MCSectionELF *Section =
getContext().getELFSection(SectionName, Type, Flags, Size, GroupName,
IsComdat, UniqueID, LinkedToSym);
- getStreamer().SwitchSection(Section, Subsection);
+ getStreamer().switchSection(Section, Subsection);
// Check that flags are used consistently. However, the GNU assembler permits
// to leave out in subsequent uses of the same sections; for compatibility,
// do likewise.
@@ -715,7 +718,7 @@ bool ELFAsmParser::ParseDirectivePrevious(StringRef DirName, SMLoc) {
MCSectionSubPair PreviousSection = getStreamer().getPreviousSection();
if (PreviousSection.first == nullptr)
return TokError(".previous without corresponding .section");
- getStreamer().SwitchSection(PreviousSection.first, PreviousSection.second);
+ getStreamer().switchSection(PreviousSection.first, PreviousSection.second);
return false;
}
@@ -857,15 +860,15 @@ bool ELFAsmParser::ParseDirectiveVersion(StringRef, SMLoc) {
MCSection *Note = getContext().getELFSection(".note", ELF::SHT_NOTE, 0);
- getStreamer().PushSection();
- getStreamer().SwitchSection(Note);
+ getStreamer().pushSection();
+ getStreamer().switchSection(Note);
getStreamer().emitInt32(Data.size() + 1); // namesz
getStreamer().emitInt32(0); // descsz = 0 (no description).
getStreamer().emitInt32(1); // type = NT_VERSION
getStreamer().emitBytes(Data); // name
getStreamer().emitInt8(0); // NUL
getStreamer().emitValueToAlignment(4);
- getStreamer().PopSection();
+ getStreamer().popSection();
return false;
}
@@ -907,7 +910,7 @@ bool ELFAsmParser::ParseDirectiveSubsection(StringRef, SMLoc) {
Lex();
- getStreamer().SubSection(Subsection);
+ getStreamer().subSection(Subsection);
return false;
}
diff --git a/llvm/lib/MC/MCParser/GOFFAsmParser.cpp b/llvm/lib/MC/MCParser/GOFFAsmParser.cpp
index c2a7eaee8029..c3fc04607273 100644
--- a/llvm/lib/MC/MCParser/GOFFAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/GOFFAsmParser.cpp
@@ -6,16 +6,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParserExtension.h"
-#include "llvm/MC/MCSectionGOFF.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbolGOFF.h"
using namespace llvm;
@@ -31,7 +22,7 @@ class GOFFAsmParser : public MCAsmParserExtension {
}
public:
- GOFFAsmParser() {}
+ GOFFAsmParser() = default;
void Initialize(MCAsmParser &Parser) override {
// Call the base implementation.
diff --git a/llvm/lib/MC/MCParser/MCAsmLexer.cpp b/llvm/lib/MC/MCParser/MCAsmLexer.cpp
index 497055bc1760..632c52479d70 100644
--- a/llvm/lib/MC/MCParser/MCAsmLexer.cpp
+++ b/llvm/lib/MC/MCParser/MCAsmLexer.cpp
@@ -9,7 +9,6 @@
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/SMLoc.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/llvm/lib/MC/MCParser/MCAsmParser.cpp b/llvm/lib/MC/MCParser/MCAsmParser.cpp
index d797c2d3f288..7fc1dbf56f98 100644
--- a/llvm/lib/MC/MCParser/MCAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/MCAsmParser.cpp
@@ -25,7 +25,7 @@ cl::opt<unsigned> AsmMacroMaxNestingDepth(
"asm-macro-max-nesting-depth", cl::init(20), cl::Hidden,
cl::desc("The maximum nesting depth allowed for assembly macros."));
-MCAsmParser::MCAsmParser() {}
+MCAsmParser::MCAsmParser() = default;
MCAsmParser::~MCAsmParser() = default;
diff --git a/llvm/lib/MC/MCParser/MCAsmParserExtension.cpp b/llvm/lib/MC/MCParser/MCAsmParserExtension.cpp
index 0b5046cd8fad..f5a10ce9805b 100644
--- a/llvm/lib/MC/MCParser/MCAsmParserExtension.cpp
+++ b/llvm/lib/MC/MCParser/MCAsmParserExtension.cpp
@@ -8,6 +8,8 @@
#include "llvm/MC/MCParser/MCAsmParserExtension.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCStreamer.h"
using namespace llvm;
diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp
index f9433240743d..8c582d225e30 100644
--- a/llvm/lib/MC/MCParser/MasmParser.cpp
+++ b/llvm/lib/MC/MCParser/MasmParser.cpp
@@ -14,7 +14,6 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
@@ -36,21 +35,19 @@
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCParser/AsmCond.h"
#include "llvm/MC/MCParser/AsmLexer.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCAsmParserExtension.h"
-#include "llvm/MC/MCParser/MCAsmParserUtils.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCTargetOptions.h"
-#include "llvm/MC/MCValue.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
@@ -64,7 +61,6 @@
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
-#include <cctype>
#include <climits>
#include <cstddef>
#include <cstdint>
@@ -980,6 +976,8 @@ private:
bool parseDirectiveEnds(StringRef Name, SMLoc NameLoc);
bool parseDirectiveNestedEnds();
+ bool parseDirectiveExtern();
+
/// Parse a directive like ".globl" which accepts a single symbol (which
/// should be a label or an external).
bool parseDirectiveSymbolAttribute(MCSymbolAttr Attr);
@@ -1192,7 +1190,7 @@ bool MasmParser::expandMacros() {
}
}
- if (!ExpandedValue.hasValue())
+ if (!ExpandedValue)
return true;
std::unique_ptr<MemoryBuffer> Instantiation =
MemoryBuffer::getMemBufferCopy(*ExpandedValue, "<instantiation>");
@@ -1431,7 +1429,7 @@ bool MasmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
// Finalize the output stream if there are no errors and if the client wants
// us to.
if (!HadError && !NoFinalize)
- Out.Finish(Lexer.getLoc());
+ Out.finish(Lexer.getLoc());
return HadError || getContext().hadError();
}
@@ -2094,7 +2092,7 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info,
// If this is a line comment we can drop it safely.
if (getTok().getString().empty() || getTok().getString().front() == '\r' ||
getTok().getString().front() == '\n')
- Out.AddBlankLine();
+ Out.addBlankLine();
Lex();
return false;
}
@@ -2283,7 +2281,7 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info,
}
// Consume any end of statement token, if present, to avoid spurious
- // AddBlankLine calls().
+ // addBlankLine calls().
if (getTok().is(AsmToken::EndOfStatement)) {
Lex();
}
@@ -2409,8 +2407,7 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info,
case DK_ORG:
return parseDirectiveOrg();
case DK_EXTERN:
- eatToEndOfStatement(); // .extern is the default, ignore it.
- return false;
+ return parseDirectiveExtern();
case DK_PUBLIC:
return parseDirectiveSymbolAttribute(MCSA_Global);
case DK_COMM:
@@ -2905,7 +2902,7 @@ bool MasmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
if (Body[Pos] == '&')
break;
if (isMacroParameterChar(Body[Pos])) {
- if (!CurrentQuote.hasValue())
+ if (!CurrentQuote)
break;
if (IdentifierPos == End)
IdentifierPos = Pos;
@@ -2914,7 +2911,7 @@ bool MasmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
}
// Track quotation status
- if (!CurrentQuote.hasValue()) {
+ if (!CurrentQuote) {
if (Body[Pos] == '\'' || Body[Pos] == '"')
CurrentQuote = Body[Pos];
} else if (Body[Pos] == CurrentQuote) {
@@ -3333,7 +3330,7 @@ bool MasmParser::handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc) {
ParseStatementInfo Info(&AsmStrRewrites);
bool Parsed = parseStatement(Info, nullptr);
- if (!Parsed && Info.ExitValue.hasValue()) {
+ if (!Parsed && Info.ExitValue) {
ExitValue = std::move(*Info.ExitValue);
break;
}
@@ -3628,7 +3625,7 @@ bool MasmParser::parseTextItem(std::string &Data) {
if (BuiltinIt != BuiltinSymbolMap.end()) {
llvm::Optional<std::string> BuiltinText =
evaluateBuiltinTextMacro(BuiltinIt->getValue(), StartLoc);
- if (!BuiltinText.hasValue()) {
+ if (!BuiltinText) {
// Not a text macro; break without substituting
break;
}
@@ -4242,7 +4239,7 @@ bool MasmParser::parseStructInitializer(const StructInfo &Structure,
auto &FieldInitializers = Initializer.FieldInitializers;
size_t FieldIndex = 0;
- if (EndToken.hasValue()) {
+ if (EndToken) {
// Initialize all fields with given initializers.
while (getTok().isNot(EndToken.getValue()) &&
FieldIndex < Structure.Fields.size()) {
@@ -4275,7 +4272,7 @@ bool MasmParser::parseStructInitializer(const StructInfo &Structure,
FieldInitializers.push_back(Field.Contents);
}
- if (EndToken.hasValue()) {
+ if (EndToken) {
if (EndToken.getValue() == AsmToken::Greater)
return parseAngleBracketClose();
@@ -4763,7 +4760,7 @@ bool MasmParser::emitAlignTo(int64_t Alignment) {
// directive.
const MCSection *Section = getStreamer().getCurrentSectionOnly();
assert(Section && "must have section to emit alignment");
- if (Section->UseCodeAlign()) {
+ if (Section->useCodeAlign()) {
getStreamer().emitCodeAlignment(Alignment, &getTargetParser().getSTI(),
/*MaxBytesToEmit=*/0);
} else {
@@ -4911,8 +4908,8 @@ bool MasmParser::parseDirectiveFile(SMLoc DirectiveLoc) {
if (HasMD5) {
MD5::MD5Result Sum;
for (unsigned i = 0; i != 8; ++i) {
- Sum.Bytes[i] = uint8_t(MD5Hi >> ((7 - i) * 8));
- Sum.Bytes[i + 8] = uint8_t(MD5Lo >> ((7 - i) * 8));
+ Sum[i] = uint8_t(MD5Hi >> ((7 - i) * 8));
+ Sum[i + 8] = uint8_t(MD5Lo >> ((7 - i) * 8));
}
CKMem = Sum;
}
@@ -4952,8 +4949,7 @@ bool MasmParser::parseDirectiveLine() {
(void)LineNumber;
// FIXME: Do something with the .line.
}
- if (parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.line' directive"))
+ if (parseEOL())
return true;
return false;
@@ -5086,8 +5082,7 @@ bool MasmParser::parseDirectiveCVFile() {
parseEscapedString(Checksum) ||
parseIntToken(ChecksumKind,
"expected checksum kind in '.cv_file' directive") ||
- parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.cv_file' directive"))
+ parseEOL())
return true;
}
@@ -5097,7 +5092,7 @@ bool MasmParser::parseDirectiveCVFile() {
ArrayRef<uint8_t> ChecksumAsBytes(reinterpret_cast<const uint8_t *>(CKMem),
Checksum.size());
- if (!getStreamer().EmitCVFileDirective(FileNumber, Filename, ChecksumAsBytes,
+ if (!getStreamer().emitCVFileDirective(FileNumber, Filename, ChecksumAsBytes,
static_cast<uint8_t>(ChecksumKind)))
return Error(FileNumberLoc, "file number already allocated");
@@ -5133,12 +5128,10 @@ bool MasmParser::parseDirectiveCVFuncId() {
SMLoc FunctionIdLoc = getTok().getLoc();
int64_t FunctionId;
- if (parseCVFunctionId(FunctionId, ".cv_func_id") ||
- parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.cv_func_id' directive"))
+ if (parseCVFunctionId(FunctionId, ".cv_func_id") || parseEOL())
return true;
- if (!getStreamer().EmitCVFuncIdDirective(FunctionId))
+ if (!getStreamer().emitCVFuncIdDirective(FunctionId))
return Error(FunctionIdLoc, "function id already allocated");
return false;
@@ -5194,11 +5187,10 @@ bool MasmParser::parseDirectiveCVInlineSiteId() {
Lex();
}
- if (parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.cv_inline_site_id' directive"))
+ if (parseEOL())
return true;
- if (!getStreamer().EmitCVInlineSiteIdDirective(FunctionId, IAFunc, IAFile,
+ if (!getStreamer().emitCVInlineSiteIdDirective(FunctionId, IAFunc, IAFile,
IALine, IACol, FunctionIdLoc))
return Error(FunctionIdLoc, "function id already allocated");
@@ -5321,7 +5313,7 @@ bool MasmParser::parseDirectiveCVInlineLinetable() {
"expected identifier in directive"))
return true;
- if (parseToken(AsmToken::EndOfStatement, "Expected End of Statement"))
+ if (parseEOL())
return true;
MCSymbol *FnStartSym = getContext().getOrCreateSymbol(FnStartName);
@@ -5482,7 +5474,7 @@ bool MasmParser::parseDirectiveCVFileChecksumOffset() {
int64_t FileNo;
if (parseIntToken(FileNo, "expected identifier in directive"))
return true;
- if (parseToken(AsmToken::EndOfStatement, "Expected End of Statement"))
+ if (parseEOL())
return true;
getStreamer().emitCVFileChecksumOffsetDirective(FileNo);
return false;
@@ -5498,7 +5490,7 @@ bool MasmParser::parseDirectiveCVFPOData() {
if (parseEOL("unexpected tokens"))
return addErrorSuffix(" in '.cv_fpo_data' directive");
MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName);
- getStreamer().EmitCVFPOData(ProcSym, DirLoc);
+ getStreamer().emitCVFPOData(ProcSym, DirLoc);
return false;
}
@@ -5791,8 +5783,7 @@ bool MasmParser::parseDirectiveCFIReturnColumn(SMLoc DirectiveLoc) {
/// parseDirectiveCFISignalFrame
/// ::= .cfi_signal_frame
bool MasmParser::parseDirectiveCFISignalFrame() {
- if (parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.cfi_signal_frame'"))
+ if (parseEOL())
return true;
getStreamer().emitCFISignalFrame();
@@ -6023,6 +6014,39 @@ bool MasmParser::parseDirectivePurgeMacro(SMLoc DirectiveLoc) {
return false;
}
+bool MasmParser::parseDirectiveExtern() {
+ // .extern is the default - but we still need to take any provided type info.
+ auto parseOp = [&]() -> bool {
+ StringRef Name;
+ SMLoc NameLoc = getTok().getLoc();
+ if (parseIdentifier(Name))
+ return Error(NameLoc, "expected name");
+ if (parseToken(AsmToken::Colon))
+ return true;
+
+ StringRef TypeName;
+ SMLoc TypeLoc = getTok().getLoc();
+ if (parseIdentifier(TypeName))
+ return Error(TypeLoc, "expected type");
+ if (!TypeName.equals_insensitive("proc")) {
+ AsmTypeInfo Type;
+ if (lookUpType(TypeName, Type))
+ return Error(TypeLoc, "unrecognized type");
+ KnownType[Name.lower()] = Type;
+ }
+
+ MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
+ Sym->setExternal(true);
+ getStreamer().emitSymbolAttribute(Sym, MCSA_Extern);
+
+ return false;
+ };
+
+ if (parseMany(parseOp))
+ return addErrorSuffix(" in directive 'extern'");
+ return false;
+}
+
/// parseDirectiveSymbolAttribute
/// ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ]
bool MasmParser::parseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
@@ -6091,8 +6115,7 @@ bool MasmParser::parseDirectiveComm(bool IsLocal) {
}
}
- if (parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.comm' or '.lcomm' directive"))
+ if (parseEOL())
return true;
// NOTE: a size of zero for a .comm should create a undefined symbol
@@ -6138,8 +6161,7 @@ bool MasmParser::parseDirectiveComment(SMLoc DirectiveLoc) {
Lex(); // eat end of statement
} while (
!StringRef(parseStringTo(AsmToken::EndOfStatement)).contains(Delimiter));
- return parseToken(AsmToken::EndOfStatement,
- "unexpected token in 'comment' directive");
+ return parseEOL();
}
/// parseDirectiveInclude
@@ -6173,9 +6195,7 @@ bool MasmParser::parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind) {
eatToEndOfStatement();
} else {
int64_t ExprValue;
- if (parseAbsoluteExpression(ExprValue) ||
- parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.if' directive"))
+ if (parseAbsoluteExpression(ExprValue) || parseEOL())
return true;
switch (DirKind) {
@@ -6208,8 +6228,7 @@ bool MasmParser::parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
if (parseTextItem(Str))
return TokError("expected text item parameter for 'ifb' directive");
- if (parseToken(AsmToken::EndOfStatement,
- "unexpected token in 'ifb' directive"))
+ if (parseEOL())
return true;
TheCondState.CondMet = ExpectBlank == Str.empty();
@@ -6275,7 +6294,7 @@ bool MasmParser::parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) {
if (!is_defined) {
StringRef Name;
if (check(parseIdentifier(Name), "expected identifier after 'ifdef'") ||
- parseToken(AsmToken::EndOfStatement, "unexpected token in 'ifdef'"))
+ parseEOL())
return true;
if (BuiltinSymbolMap.find(Name.lower()) != BuiltinSymbolMap.end()) {
@@ -6316,8 +6335,7 @@ bool MasmParser::parseDirectiveElseIf(SMLoc DirectiveLoc,
if (parseAbsoluteExpression(ExprValue))
return true;
- if (parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.elseif' directive"))
+ if (parseEOL())
return true;
switch (DirKind) {
@@ -6360,8 +6378,7 @@ bool MasmParser::parseDirectiveElseIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
return TokError("expected text item parameter for 'elseifnb' directive");
}
- if (parseToken(AsmToken::EndOfStatement,
- "unexpected token in 'elseifb' directive"))
+ if (parseEOL())
return true;
TheCondState.CondMet = ExpectBlank == Str.empty();
@@ -6398,8 +6415,7 @@ bool MasmParser::parseDirectiveElseIfdef(SMLoc DirectiveLoc,
StringRef Name;
if (check(parseIdentifier(Name),
"expected identifier after 'elseifdef'") ||
- parseToken(AsmToken::EndOfStatement,
- "unexpected token in 'elseifdef'"))
+ parseEOL())
return true;
if (BuiltinSymbolMap.find(Name.lower()) != BuiltinSymbolMap.end()) {
@@ -6475,8 +6491,7 @@ bool MasmParser::parseDirectiveElseIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
/// parseDirectiveElse
/// ::= else
bool MasmParser::parseDirectiveElse(SMLoc DirectiveLoc) {
- if (parseToken(AsmToken::EndOfStatement,
- "unexpected token in 'else' directive"))
+ if (parseEOL())
return true;
if (TheCondState.TheCond != AsmCond::IfCond &&
@@ -6498,8 +6513,7 @@ bool MasmParser::parseDirectiveElse(SMLoc DirectiveLoc) {
/// parseDirectiveEnd
/// ::= end
bool MasmParser::parseDirectiveEnd(SMLoc DirectiveLoc) {
- if (parseToken(AsmToken::EndOfStatement,
- "unexpected token in 'end' directive"))
+ if (parseEOL())
return true;
while (Lexer.isNot(AsmToken::Eof))
@@ -6687,8 +6701,7 @@ bool MasmParser::parseDirectiveErrorIfe(SMLoc DirectiveLoc, bool ExpectZero) {
/// parseDirectiveEndIf
/// ::= .endif
bool MasmParser::parseDirectiveEndIf(SMLoc DirectiveLoc) {
- if (parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.endif' directive"))
+ if (parseEOL())
return true;
if ((TheCondState.TheCond == AsmCond::NoCond) || TheCondStack.empty())
@@ -6982,9 +6995,7 @@ bool MasmParser::parseDirectiveRepeat(SMLoc DirectiveLoc, StringRef Dir) {
return Error(CountLoc, "unexpected token in '" + Dir + "' directive");
}
- if (check(Count < 0, CountLoc, "Count is negative") ||
- parseToken(AsmToken::EndOfStatement,
- "unexpected token in '" + Dir + "' directive"))
+ if (check(Count < 0, CountLoc, "Count is negative") || parseEOL())
return true;
// Lex the repeat definition.
@@ -7099,7 +7110,7 @@ bool MasmParser::parseDirectiveFor(SMLoc DirectiveLoc, StringRef Dir) {
if (parseToken(AsmToken::Greater,
"values in '" + Dir +
"' directive must be enclosed in angle brackets") ||
- parseToken(AsmToken::EndOfStatement, "expected End of Statement"))
+ parseEOL())
return true;
// Lex the for definition.
@@ -7149,7 +7160,7 @@ bool MasmParser::parseDirectiveForc(SMLoc DirectiveLoc, StringRef Directive) {
}
Argument.resize(End);
}
- if (parseToken(AsmToken::EndOfStatement, "expected end of statement"))
+ if (parseEOL())
return true;
// Lex the irpc definition.
diff --git a/llvm/lib/MC/MCParser/WasmAsmParser.cpp b/llvm/lib/MC/MCParser/WasmAsmParser.cpp
index 833530bef3bf..a84d00d82b76 100644
--- a/llvm/lib/MC/MCParser/WasmAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/WasmAsmParser.cpp
@@ -21,11 +21,11 @@
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCAsmParserExtension.h"
+#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCSectionWasm.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolWasm.h"
-#include "llvm/Support/MachineValueType.h"
+#include "llvm/Support/Casting.h"
using namespace llvm;
@@ -53,6 +53,7 @@ public:
this->MCAsmParserExtension::Initialize(*Parser);
addDirectiveHandler<&WasmAsmParser::parseSectionDirectiveText>(".text");
+ addDirectiveHandler<&WasmAsmParser::parseSectionDirectiveData>(".data");
addDirectiveHandler<&WasmAsmParser::parseSectionDirective>(".section");
addDirectiveHandler<&WasmAsmParser::parseDirectiveSize>(".size");
addDirectiveHandler<&WasmAsmParser::parseDirectiveType>(".type");
@@ -90,6 +91,12 @@ public:
return false;
}
+ bool parseSectionDirectiveData(StringRef, SMLoc) {
+ auto *S = getContext().getObjectFileInfo()->getDataSection();
+ getStreamer().switchSection(S);
+ return false;
+ }
+
uint32_t parseSectionFlags(StringRef FlagStr, bool &Passive, bool &Group) {
uint32_t flags = 0;
for (char C : FlagStr) {
@@ -181,7 +188,7 @@ public:
// TODO: Parse UniqueID
MCSectionWasm *WS = getContext().getWasmSection(
- Name, Kind.getValue(), Flags, GroupName, MCContext::GenericSectionID);
+ Name, *Kind, Flags, GroupName, MCContext::GenericSectionID);
if (WS->getSegmentFlags() != Flags)
Parser->Error(loc, "changed section flags for " + Name +
@@ -194,7 +201,7 @@ public:
WS->setPassive();
}
- getStreamer().SwitchSection(WS);
+ getStreamer().switchSection(WS);
return false;
}
diff --git a/llvm/lib/MC/MCParser/XCOFFAsmParser.cpp b/llvm/lib/MC/MCParser/XCOFFAsmParser.cpp
index 7494fe07734c..d20a65f6a476 100644
--- a/llvm/lib/MC/MCParser/XCOFFAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/XCOFFAsmParser.cpp
@@ -8,15 +8,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/BinaryFormat/XCOFF.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCAsmParserExtension.h"
-#include "llvm/MC/MCSectionXCOFF.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCSymbolXCOFF.h"
-#include "llvm/Support/MachineValueType.h"
using namespace llvm;
@@ -35,7 +28,7 @@ class XCOFFAsmParser : public MCAsmParserExtension {
}
public:
- XCOFFAsmParser() {}
+ XCOFFAsmParser() = default;
void Initialize(MCAsmParser &P) override {
Parser = &P;
diff --git a/llvm/lib/MC/MCPseudoProbe.cpp b/llvm/lib/MC/MCPseudoProbe.cpp
index ebf38327f4dc..5277ce87bee0 100644
--- a/llvm/lib/MC/MCPseudoProbe.cpp
+++ b/llvm/lib/MC/MCPseudoProbe.cpp
@@ -9,9 +9,10 @@
#include "llvm/MC/MCPseudoProbe.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFragment.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCObjectStreamer.h"
-#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/raw_ostream.h"
@@ -182,7 +183,7 @@ void MCPseudoProbeSection::emit(MCObjectStreamer *MCOS) {
if (auto *S =
Ctx.getObjectFileInfo()->getPseudoProbeSection(ProbeSec.first)) {
// Switch to the .pseudoprobe section or a comdat group.
- MCOS->SwitchSection(S);
+ MCOS->switchSection(S);
// Emit probes grouped by GUID.
ProbeSec.second.emit(MCOS, LastProbe);
}
@@ -229,8 +230,7 @@ void MCDecodedPseudoProbe::getInlineContext(
// It will add the string of each node's inline site during iteration.
// Note that it won't include the probe's belonging function(leaf location)
while (Cur->hasInlineSite()) {
- StringRef FuncName =
- getProbeFNameForGUID(GUID2FuncMAP, std::get<0>(Cur->ISite));
+ StringRef FuncName = getProbeFNameForGUID(GUID2FuncMAP, Cur->Parent->Guid);
ContextStack.emplace_back(
MCPseduoProbeFrameLocation(FuncName, std::get<1>(Cur->ISite)));
Cur = static_cast<MCDecodedPseudoProbeInlineTree *>(Cur->Parent);
@@ -357,8 +357,9 @@ bool MCPseudoProbeDecoder::buildGUID2FuncDescMap(const uint8_t *Start,
return true;
}
-bool MCPseudoProbeDecoder::buildAddress2ProbeMap(const uint8_t *Start,
- std::size_t Size) {
+bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
+ MCDecodedPseudoProbeInlineTree *Cur, uint64_t &LastAddr,
+ std::unordered_set<uint64_t> &GuildFilter) {
// The pseudo_probe section encodes an inline forest and each tree has a
// format like:
// FUNCTION BODY (one for each uninlined function present in the text
@@ -389,101 +390,110 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(const uint8_t *Start,
// FUNCTION BODY
// A FUNCTION BODY entry describing the inlined function.
- Data = Start;
- End = Data + Size;
-
- MCDecodedPseudoProbeInlineTree *Root = &DummyInlineRoot;
- MCDecodedPseudoProbeInlineTree *Cur = &DummyInlineRoot;
- uint64_t LastAddr = 0;
uint32_t Index = 0;
- // A DFS-based decoding
- while (Data < End) {
- if (Root == Cur) {
- // Use a sequential id for top level inliner.
- Index = Root->getChildren().size();
- } else {
- // Read inline site for inlinees
- auto ErrorOrIndex = readUnsignedNumber<uint32_t>();
- if (!ErrorOrIndex)
- return false;
- Index = std::move(*ErrorOrIndex);
- }
- // Switch/add to a new tree node(inlinee)
- Cur = Cur->getOrAddNode(std::make_tuple(Cur->Guid, Index));
- // Read guid
- auto ErrorOrCurGuid = readUnencodedNumber<uint64_t>();
- if (!ErrorOrCurGuid)
+ if (Cur == &DummyInlineRoot) {
+ // Use a sequential id for top level inliner.
+ Index = Cur->getChildren().size();
+ } else {
+ // Read inline site for inlinees
+ auto ErrorOrIndex = readUnsignedNumber<uint32_t>();
+ if (!ErrorOrIndex)
return false;
- Cur->Guid = std::move(*ErrorOrCurGuid);
- // Read number of probes in the current node.
- auto ErrorOrNodeCount = readUnsignedNumber<uint32_t>();
- if (!ErrorOrNodeCount)
+ Index = std::move(*ErrorOrIndex);
+ }
+
+ // Read guid
+ auto ErrorOrCurGuid = readUnencodedNumber<uint64_t>();
+ if (!ErrorOrCurGuid)
+ return false;
+ uint64_t Guid = std::move(*ErrorOrCurGuid);
+
+ // Decide if top-level node should be disgarded.
+ if (Cur == &DummyInlineRoot && !GuildFilter.empty() &&
+ !GuildFilter.count(Guid))
+ Cur = nullptr;
+
+ // If the incoming node is null, all its children nodes should be disgarded.
+ if (Cur) {
+ // Switch/add to a new tree node(inlinee)
+ Cur = Cur->getOrAddNode(std::make_tuple(Guid, Index));
+ Cur->Guid = Guid;
+ }
+
+ // Read number of probes in the current node.
+ auto ErrorOrNodeCount = readUnsignedNumber<uint32_t>();
+ if (!ErrorOrNodeCount)
+ return false;
+ uint32_t NodeCount = std::move(*ErrorOrNodeCount);
+ // Read number of direct inlinees
+ auto ErrorOrCurChildrenToProcess = readUnsignedNumber<uint32_t>();
+ if (!ErrorOrCurChildrenToProcess)
+ return false;
+ // Read all probes in this node
+ for (std::size_t I = 0; I < NodeCount; I++) {
+ // Read index
+ auto ErrorOrIndex = readUnsignedNumber<uint32_t>();
+ if (!ErrorOrIndex)
return false;
- uint32_t NodeCount = std::move(*ErrorOrNodeCount);
- // Read number of direct inlinees
- auto ErrorOrCurChildrenToProcess = readUnsignedNumber<uint32_t>();
- if (!ErrorOrCurChildrenToProcess)
+ uint32_t Index = std::move(*ErrorOrIndex);
+ // Read type | flag.
+ auto ErrorOrValue = readUnencodedNumber<uint8_t>();
+ if (!ErrorOrValue)
return false;
- Cur->ChildrenToProcess = std::move(*ErrorOrCurChildrenToProcess);
- // Read all probes in this node
- for (std::size_t I = 0; I < NodeCount; I++) {
- // Read index
- auto ErrorOrIndex = readUnsignedNumber<uint32_t>();
- if (!ErrorOrIndex)
+ uint8_t Value = std::move(*ErrorOrValue);
+ uint8_t Kind = Value & 0xf;
+ uint8_t Attr = (Value & 0x70) >> 4;
+ // Read address
+ uint64_t Addr = 0;
+ if (Value & 0x80) {
+ auto ErrorOrOffset = readSignedNumber<int64_t>();
+ if (!ErrorOrOffset)
return false;
- uint32_t Index = std::move(*ErrorOrIndex);
- // Read type | flag.
- auto ErrorOrValue = readUnencodedNumber<uint8_t>();
- if (!ErrorOrValue)
+ int64_t Offset = std::move(*ErrorOrOffset);
+ Addr = LastAddr + Offset;
+ } else {
+ auto ErrorOrAddr = readUnencodedNumber<int64_t>();
+ if (!ErrorOrAddr)
return false;
- uint8_t Value = std::move(*ErrorOrValue);
- uint8_t Kind = Value & 0xf;
- uint8_t Attr = (Value & 0x70) >> 4;
- // Read address
- uint64_t Addr = 0;
- if (Value & 0x80) {
- auto ErrorOrOffset = readSignedNumber<int64_t>();
- if (!ErrorOrOffset)
- return false;
- int64_t Offset = std::move(*ErrorOrOffset);
- Addr = LastAddr + Offset;
- } else {
- auto ErrorOrAddr = readUnencodedNumber<int64_t>();
- if (!ErrorOrAddr)
- return false;
- Addr = std::move(*ErrorOrAddr);
- }
+ Addr = std::move(*ErrorOrAddr);
+ }
+
+ if (Cur) {
// Populate Address2ProbesMap
auto &Probes = Address2ProbesMap[Addr];
Probes.emplace_back(Addr, Cur->Guid, Index, PseudoProbeType(Kind), Attr,
Cur);
Cur->addProbes(&Probes.back());
- LastAddr = Addr;
}
+ LastAddr = Addr;
+ }
- // Look for the parent for the next node by subtracting the current
- // node count from tree counts along the parent chain. The first node
- // in the chain that has a non-zero tree count is the target.
- while (Cur != Root) {
- if (Cur->ChildrenToProcess == 0) {
- Cur = static_cast<MCDecodedPseudoProbeInlineTree *>(Cur->Parent);
- if (Cur != Root) {
- assert(Cur->ChildrenToProcess > 0 &&
- "Should have some unprocessed nodes");
- Cur->ChildrenToProcess -= 1;
- }
- } else {
- break;
- }
- }
+ uint32_t ChildrenToProcess = std::move(*ErrorOrCurChildrenToProcess);
+ for (uint32_t I = 0; I < ChildrenToProcess; I++) {
+ buildAddress2ProbeMap(Cur, LastAddr, GuildFilter);
}
+ return true;
+}
+
+bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
+ const uint8_t *Start, std::size_t Size,
+ std::unordered_set<uint64_t> &GuildFilter) {
+ Data = Start;
+ End = Data + Size;
+ uint64_t LastAddr = 0;
+ while (Data < End)
+ buildAddress2ProbeMap(&DummyInlineRoot, LastAddr, GuildFilter);
assert(Data == End && "Have unprocessed data in pseudo_probe section");
- assert(Cur == Root &&
- " Cur should point to root when the forest is fully built up");
return true;
}
+bool MCPseudoProbeDecoder::buildAddress2ProbeMap(const uint8_t *Start,
+ std::size_t Size) {
+ std::unordered_set<uint64_t> GuildFilter;
+ return buildAddress2ProbeMap(Start, Size, GuildFilter);
+}
+
void MCPseudoProbeDecoder::printGUID2FuncDescMap(raw_ostream &OS) {
OS << "Pseudo Probe Desc:\n";
// Make the output deterministic
@@ -563,5 +573,5 @@ const MCPseudoProbeFuncDesc *MCPseudoProbeDecoder::getInlinerDescForProbe(
MCDecodedPseudoProbeInlineTree *InlinerNode = Probe->getInlineTreeNode();
if (!InlinerNode->hasInlineSite())
return nullptr;
- return getFuncDescForGUID(std::get<0>(InlinerNode->ISite));
+ return getFuncDescForGUID(InlinerNode->Parent->Guid);
}
diff --git a/llvm/lib/MC/MCRegisterInfo.cpp b/llvm/lib/MC/MCRegisterInfo.cpp
index d491c0eb7e06..d6c4fe10fc98 100644
--- a/llvm/lib/MC/MCRegisterInfo.cpp
+++ b/llvm/lib/MC/MCRegisterInfo.cpp
@@ -122,3 +122,14 @@ int MCRegisterInfo::getCodeViewRegNum(MCRegister RegNum) const {
: Twine(RegNum)));
return I->second;
}
+
+bool MCRegisterInfo::regsOverlap(MCRegister RegA, MCRegister RegB) const {
+ // Regunits are numerically ordered. Find a common unit.
+ MCRegUnitIterator RUA(RegA, this);
+ MCRegUnitIterator RUB(RegB, this);
+ do {
+ if (*RUA == *RUB)
+ return true;
+ } while (*RUA < *RUB ? (++RUA).isValid() : (++RUB).isValid());
+ return false;
+}
diff --git a/llvm/lib/MC/MCSPIRVStreamer.cpp b/llvm/lib/MC/MCSPIRVStreamer.cpp
new file mode 100644
index 000000000000..863db7f36f29
--- /dev/null
+++ b/llvm/lib/MC/MCSPIRVStreamer.cpp
@@ -0,0 +1,45 @@
+//===- lib/MC/MCSPIRVStreamer.cpp - SPIR-V Object Output ------*- C++ -*---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file assembles .s files and emits SPIR-V .o object files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCSPIRVStreamer.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/TargetRegistry.h"
+
+using namespace llvm;
+
+void MCSPIRVStreamer::emitInstToData(const MCInst &Inst,
+ const MCSubtargetInfo &STI) {
+ MCAssembler &Assembler = getAssembler();
+ SmallVector<MCFixup, 0> Fixups;
+ SmallString<256> Code;
+ raw_svector_ostream VecOS(Code);
+ Assembler.getEmitter().encodeInstruction(Inst, VecOS, Fixups, STI);
+
+ // Append the encoded instruction to the current data fragment (or create a
+ // new such fragment if the current fragment is not a data fragment).
+ MCDataFragment *DF = getOrCreateDataFragment();
+
+ DF->setHasInstructions(STI);
+ DF->getContents().append(Code.begin(), Code.end());
+}
+
+MCStreamer *llvm::createSPIRVStreamer(MCContext &Context,
+ std::unique_ptr<MCAsmBackend> &&MAB,
+ std::unique_ptr<MCObjectWriter> &&OW,
+ std::unique_ptr<MCCodeEmitter> &&CE,
+ bool RelaxAll) {
+ MCSPIRVStreamer *S = new MCSPIRVStreamer(Context, std::move(MAB),
+ std::move(OW), std::move(CE));
+ if (RelaxAll)
+ S->getAssembler().setRelaxAll(true);
+ return S;
+}
diff --git a/llvm/lib/MC/MCSchedule.cpp b/llvm/lib/MC/MCSchedule.cpp
index db08e2044113..98eb7eada064 100644
--- a/llvm/lib/MC/MCSchedule.cpp
+++ b/llvm/lib/MC/MCSchedule.cpp
@@ -98,7 +98,7 @@ MCSchedModel::getReciprocalThroughput(const MCSubtargetInfo &STI,
double Temp = NumUnits * 1.0 / I->Cycles;
Throughput = Throughput ? std::min(Throughput.getValue(), Temp) : Temp;
}
- if (Throughput.hasValue())
+ if (Throughput)
return 1.0 / Throughput.getValue();
// If no throughput value was calculated, assume that we can execute at the
@@ -142,7 +142,7 @@ MCSchedModel::getReciprocalThroughput(unsigned SchedClass,
double Temp = countPopulation(I->getUnits()) * 1.0 / I->getCycles();
Throughput = Throughput ? std::min(Throughput.getValue(), Temp) : Temp;
}
- if (Throughput.hasValue())
+ if (Throughput)
return 1.0 / Throughput.getValue();
// If there are no execution resources specified for this class, then assume
diff --git a/llvm/lib/MC/MCSection.cpp b/llvm/lib/MC/MCSection.cpp
index 8342abacec09..7547558fe6e2 100644
--- a/llvm/lib/MC/MCSection.cpp
+++ b/llvm/lib/MC/MCSection.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCSection.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/MC/MCContext.h"
@@ -15,7 +16,6 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
#include <utility>
using namespace llvm;
diff --git a/llvm/lib/MC/MCSectionCOFF.cpp b/llvm/lib/MC/MCSectionCOFF.cpp
index 387bf2c884e5..f7ca0375544a 100644
--- a/llvm/lib/MC/MCSectionCOFF.cpp
+++ b/llvm/lib/MC/MCSectionCOFF.cpp
@@ -14,9 +14,9 @@
using namespace llvm;
-// ShouldOmitSectionDirective - Decides whether a '.section' directive
+// shouldOmitSectionDirective - Decides whether a '.section' directive
// should be printed before the section name
-bool MCSectionCOFF::ShouldOmitSectionDirective(StringRef Name,
+bool MCSectionCOFF::shouldOmitSectionDirective(StringRef Name,
const MCAsmInfo &MAI) const {
if (COMDATSymbol)
return false;
@@ -34,11 +34,11 @@ void MCSectionCOFF::setSelection(int Selection) const {
Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
}
-void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
+void MCSectionCOFF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
raw_ostream &OS,
const MCExpr *Subsection) const {
// standard sections don't require the '.section'
- if (ShouldOmitSectionDirective(getName(), MAI)) {
+ if (shouldOmitSectionDirective(getName(), MAI)) {
OS << '\t' << getName() << '\n';
return;
}
@@ -104,9 +104,7 @@ void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
OS << '\n';
}
-bool MCSectionCOFF::UseCodeAlign() const {
- return getKind().isText();
-}
+bool MCSectionCOFF::useCodeAlign() const { return getKind().isText(); }
bool MCSectionCOFF::isVirtualSection() const {
return getCharacteristics() & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA;
diff --git a/llvm/lib/MC/MCSectionDXContainer.cpp b/llvm/lib/MC/MCSectionDXContainer.cpp
new file mode 100644
index 000000000000..065b506c21ce
--- /dev/null
+++ b/llvm/lib/MC/MCSectionDXContainer.cpp
@@ -0,0 +1,15 @@
+//===- lib/MC/MCSectionDXContainer.cpp - DXContainer Section --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCSectionDXContainer.h"
+
+using namespace llvm;
+
+void MCSectionDXContainer::printSwitchToSection(const MCAsmInfo &,
+ const Triple &, raw_ostream &,
+ const MCExpr *) const {}
diff --git a/llvm/lib/MC/MCSectionELF.cpp b/llvm/lib/MC/MCSectionELF.cpp
index d18876507cd7..27dc1826819b 100644
--- a/llvm/lib/MC/MCSectionELF.cpp
+++ b/llvm/lib/MC/MCSectionELF.cpp
@@ -19,7 +19,7 @@ using namespace llvm;
// Decides whether a '.section' directive
// should be printed before the section name.
-bool MCSectionELF::ShouldOmitSectionDirective(StringRef Name,
+bool MCSectionELF::shouldOmitSectionDirective(StringRef Name,
const MCAsmInfo &MAI) const {
if (isUnique())
return false;
@@ -50,10 +50,10 @@ static void printName(raw_ostream &OS, StringRef Name) {
OS << '"';
}
-void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
+void MCSectionELF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
raw_ostream &OS,
const MCExpr *Subsection) const {
- if (ShouldOmitSectionDirective(getName(), MAI)) {
+ if (shouldOmitSectionDirective(getName(), MAI)) {
OS << '\t' << getName();
if (Subsection) {
OS << '\t';
@@ -105,6 +105,11 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
if (Flags & ELF::SHF_GNU_RETAIN)
OS << 'R';
+ // If there are os-specific flags, print them.
+ if (T.isOSSolaris())
+ if (Flags & ELF::SHF_SUNW_NODISCARD)
+ OS << 'R';
+
// If there are target-specific flags, print them.
Triple::ArchType Arch = T.getArch();
if (Arch == Triple::xcore) {
@@ -160,6 +165,8 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
OS << "llvm_sympart";
else if (Type == ELF::SHT_LLVM_BB_ADDR_MAP)
OS << "llvm_bb_addr_map";
+ else if (Type == ELF::SHT_LLVM_BB_ADDR_MAP_V0)
+ OS << "llvm_bb_addr_map_v0";
else
report_fatal_error("unsupported type 0x" + Twine::utohexstr(Type) +
" for section " + getName());
@@ -196,7 +203,7 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
}
}
-bool MCSectionELF::UseCodeAlign() const {
+bool MCSectionELF::useCodeAlign() const {
return getFlags() & ELF::SHF_EXECINSTR;
}
diff --git a/llvm/lib/MC/MCSectionMachO.cpp b/llvm/lib/MC/MCSectionMachO.cpp
index d914e64ca23a..1c210fb0f4c8 100644
--- a/llvm/lib/MC/MCSectionMachO.cpp
+++ b/llvm/lib/MC/MCSectionMachO.cpp
@@ -7,9 +7,16 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCSectionMachO.h"
-#include "llvm/MC/MCContext.h"
+#include "llvm/MC/SectionKind.h"
#include "llvm/Support/raw_ostream.h"
-#include <cctype>
+
+namespace llvm {
+class MCAsmInfo;
+class MCExpr;
+class MCSymbol;
+class Triple;
+} // namespace llvm
+
using namespace llvm;
/// SectionTypeDescriptors - These are strings that describe the various section
@@ -19,7 +26,7 @@ static constexpr struct {
StringLiteral AssemblerName, EnumName;
} SectionTypeDescriptors[MachO::LAST_KNOWN_SECTION_TYPE + 1] = {
{StringLiteral("regular"), StringLiteral("S_REGULAR")}, // 0x00
- {StringLiteral(""), StringLiteral("S_ZEROFILL")}, // 0x01
+ {StringLiteral("zerofill"), StringLiteral("S_ZEROFILL")}, // 0x01
{StringLiteral("cstring_literals"),
StringLiteral("S_CSTRING_LITERALS")}, // 0x02
{StringLiteral("4byte_literals"),
@@ -95,7 +102,7 @@ MCSectionMachO::MCSectionMachO(StringRef Segment, StringRef Section,
}
}
-void MCSectionMachO::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
+void MCSectionMachO::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
raw_ostream &OS,
const MCExpr *Subsection) const {
OS << "\t.section\t" << getSegmentName() << ',' << getName();
@@ -159,7 +166,7 @@ void MCSectionMachO::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
OS << '\n';
}
-bool MCSectionMachO::UseCodeAlign() const {
+bool MCSectionMachO::useCodeAlign() const {
return hasAttribute(MachO::S_ATTR_PURE_INSTRUCTIONS);
}
diff --git a/llvm/lib/MC/MCSectionWasm.cpp b/llvm/lib/MC/MCSectionWasm.cpp
index 459913263268..e90f401b1efa 100644
--- a/llvm/lib/MC/MCSectionWasm.cpp
+++ b/llvm/lib/MC/MCSectionWasm.cpp
@@ -9,7 +9,6 @@
#include "llvm/MC/MCSectionWasm.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolWasm.h"
#include "llvm/Support/raw_ostream.h"
@@ -45,7 +44,7 @@ static void printName(raw_ostream &OS, StringRef Name) {
OS << '"';
}
-void MCSectionWasm::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
+void MCSectionWasm::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
raw_ostream &OS,
const MCExpr *Subsection) const {
@@ -102,6 +101,6 @@ void MCSectionWasm::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
}
}
-bool MCSectionWasm::UseCodeAlign() const { return false; }
+bool MCSectionWasm::useCodeAlign() const { return false; }
bool MCSectionWasm::isVirtualSection() const { return false; }
diff --git a/llvm/lib/MC/MCSectionXCOFF.cpp b/llvm/lib/MC/MCSectionXCOFF.cpp
index 2ff4839d3706..ee8fa04c421f 100644
--- a/llvm/lib/MC/MCSectionXCOFF.cpp
+++ b/llvm/lib/MC/MCSectionXCOFF.cpp
@@ -8,10 +8,12 @@
#include "llvm/MC/MCSectionXCOFF.h"
#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
+namespace llvm {
+class MCExpr;
+class Triple;
+} // namespace llvm
using namespace llvm;
@@ -22,7 +24,7 @@ void MCSectionXCOFF::printCsectDirective(raw_ostream &OS) const {
<< '\n';
}
-void MCSectionXCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
+void MCSectionXCOFF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
raw_ostream &OS,
const MCExpr *Subsection) const {
if (getKind().isText()) {
@@ -117,7 +119,7 @@ void MCSectionXCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
report_fatal_error("Printing for this SectionKind is unimplemented.");
}
-bool MCSectionXCOFF::UseCodeAlign() const { return getKind().isText(); }
+bool MCSectionXCOFF::useCodeAlign() const { return getKind().isText(); }
bool MCSectionXCOFF::isVirtualSection() const {
// DWARF sections are always not virtual.
diff --git a/llvm/lib/MC/MCStreamer.cpp b/llvm/lib/MC/MCStreamer.cpp
index a14f0de65a9d..a229d282dabe 100644
--- a/llvm/lib/MC/MCStreamer.cpp
+++ b/llvm/lib/MC/MCStreamer.cpp
@@ -12,6 +12,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/BinaryFormat/MachO.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -59,7 +60,7 @@ void MCTargetStreamer::changeSection(const MCSection *CurSection,
MCSection *Section,
const MCExpr *Subsection,
raw_ostream &OS) {
- Section->PrintSwitchToSection(*Streamer.getContext().getAsmInfo(),
+ Section->printSwitchToSection(*Streamer.getContext().getAsmInfo(),
Streamer.getContext().getTargetTriple(), OS,
Subsection);
}
@@ -96,7 +97,7 @@ MCStreamer::MCStreamer(MCContext &Ctx)
SectionStack.push_back(std::pair<MCSectionSubPair, MCSectionSubPair>());
}
-MCStreamer::~MCStreamer() {}
+MCStreamer::~MCStreamer() = default;
void MCStreamer::reset() {
DwarfFrameInfos.clear();
@@ -107,7 +108,7 @@ void MCStreamer::reset() {
SectionStack.push_back(std::pair<MCSectionSubPair, MCSectionSubPair>());
}
-raw_ostream &MCStreamer::GetCommentOS() {
+raw_ostream &MCStreamer::getCommentOS() {
// By default, discard comments.
return nulls();
}
@@ -186,7 +187,7 @@ void MCStreamer::emitSymbolValue(const MCSymbol *Sym, unsigned Size,
if (!IsSectionRelative)
emitValueImpl(MCSymbolRefExpr::create(Sym, getContext()), Size);
else
- EmitCOFFSecRel32(Sym, /*Offset=*/0);
+ emitCOFFSecRel32(Sym, /*Offset=*/0);
}
void MCStreamer::emitDTPRel64Value(const MCExpr *Value) {
@@ -251,6 +252,13 @@ void MCStreamer::emitCFIBKeyFrame() {
CurFrame->IsBKeyFrame = true;
}
+void MCStreamer::emitCFIMTETaggedFrame() {
+ MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
+ if (!CurFrame)
+ return;
+ CurFrame->IsMTETaggedFrame = true;
+}
+
void MCStreamer::emitDwarfLocDirective(unsigned FileNo, unsigned Line,
unsigned Column, unsigned Flags,
unsigned Isa, unsigned Discriminator,
@@ -283,18 +291,18 @@ MCDwarfFrameInfo *MCStreamer::getCurrentDwarfFrameInfo() {
return &DwarfFrameInfos.back();
}
-bool MCStreamer::EmitCVFileDirective(unsigned FileNo, StringRef Filename,
+bool MCStreamer::emitCVFileDirective(unsigned FileNo, StringRef Filename,
ArrayRef<uint8_t> Checksum,
unsigned ChecksumKind) {
return getContext().getCVContext().addFile(*this, FileNo, Filename, Checksum,
ChecksumKind);
}
-bool MCStreamer::EmitCVFuncIdDirective(unsigned FunctionId) {
+bool MCStreamer::emitCVFuncIdDirective(unsigned FunctionId) {
return getContext().getCVContext().recordFunctionId(FunctionId);
}
-bool MCStreamer::EmitCVInlineSiteIdDirective(unsigned FunctionId,
+bool MCStreamer::emitCVInlineSiteIdDirective(unsigned FunctionId,
unsigned IAFunc, unsigned IAFile,
unsigned IALine, unsigned IACol,
SMLoc Loc) {
@@ -400,10 +408,10 @@ void MCStreamer::emitEHSymAttributes(const MCSymbol *Symbol,
}
void MCStreamer::initSections(bool NoExecStack, const MCSubtargetInfo &STI) {
- SwitchSection(getContext().getObjectFileInfo()->getTextSection());
+ switchSection(getContext().getObjectFileInfo()->getTextSection());
}
-void MCStreamer::AssignFragment(MCSymbol *Symbol, MCFragment *Fragment) {
+void MCStreamer::assignFragment(MCSymbol *Symbol, MCFragment *Fragment) {
assert(Fragment);
Symbol->setFragment(Fragment);
@@ -698,7 +706,7 @@ WinEH::FrameInfo *MCStreamer::EnsureValidWinFrameInfo(SMLoc Loc) {
return CurrentWinFrameInfo;
}
-void MCStreamer::EmitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc) {
+void MCStreamer::emitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc) {
const MCAsmInfo *MAI = Context.getAsmInfo();
if (!MAI->usesWindowsCFI())
return getContext().reportError(
@@ -716,7 +724,7 @@ void MCStreamer::EmitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc) {
CurrentWinFrameInfo->TextSection = getCurrentSectionOnly();
}
-void MCStreamer::EmitWinCFIEndProc(SMLoc Loc) {
+void MCStreamer::emitWinCFIEndProc(SMLoc Loc) {
WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc);
if (!CurFrame)
return;
@@ -730,11 +738,11 @@ void MCStreamer::EmitWinCFIEndProc(SMLoc Loc) {
for (size_t I = CurrentProcWinFrameInfoStartIndex, E = WinFrameInfos.size();
I != E; ++I)
- EmitWindowsUnwindTables(WinFrameInfos[I].get());
- SwitchSection(CurFrame->TextSection);
+ emitWindowsUnwindTables(WinFrameInfos[I].get());
+ switchSection(CurFrame->TextSection);
}
-void MCStreamer::EmitWinCFIFuncletOrFuncEnd(SMLoc Loc) {
+void MCStreamer::emitWinCFIFuncletOrFuncEnd(SMLoc Loc) {
WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc);
if (!CurFrame)
return;
@@ -745,7 +753,7 @@ void MCStreamer::EmitWinCFIFuncletOrFuncEnd(SMLoc Loc) {
CurFrame->FuncletOrFuncEnd = Label;
}
-void MCStreamer::EmitWinCFIStartChained(SMLoc Loc) {
+void MCStreamer::emitWinCFIStartChained(SMLoc Loc) {
WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc);
if (!CurFrame)
return;
@@ -758,7 +766,7 @@ void MCStreamer::EmitWinCFIStartChained(SMLoc Loc) {
CurrentWinFrameInfo->TextSection = getCurrentSectionOnly();
}
-void MCStreamer::EmitWinCFIEndChained(SMLoc Loc) {
+void MCStreamer::emitWinCFIEndChained(SMLoc Loc) {
WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc);
if (!CurFrame)
return;
@@ -772,7 +780,7 @@ void MCStreamer::EmitWinCFIEndChained(SMLoc Loc) {
CurrentWinFrameInfo = const_cast<WinEH::FrameInfo *>(CurFrame->ChainedParent);
}
-void MCStreamer::EmitWinEHHandler(const MCSymbol *Sym, bool Unwind, bool Except,
+void MCStreamer::emitWinEHHandler(const MCSymbol *Sym, bool Unwind, bool Except,
SMLoc Loc) {
WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc);
if (!CurFrame)
@@ -789,7 +797,7 @@ void MCStreamer::EmitWinEHHandler(const MCSymbol *Sym, bool Unwind, bool Except,
CurFrame->HandlesExceptions = true;
}
-void MCStreamer::EmitWinEHHandlerData(SMLoc Loc) {
+void MCStreamer::emitWinEHHandlerData(SMLoc Loc) {
WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc);
if (!CurFrame)
return;
@@ -853,7 +861,7 @@ static unsigned encodeSEHRegNum(MCContext &Ctx, MCRegister Reg) {
return Ctx.getRegisterInfo()->getSEHRegNum(Reg);
}
-void MCStreamer::EmitWinCFIPushReg(MCRegister Register, SMLoc Loc) {
+void MCStreamer::emitWinCFIPushReg(MCRegister Register, SMLoc Loc) {
WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc);
if (!CurFrame)
return;
@@ -865,7 +873,7 @@ void MCStreamer::EmitWinCFIPushReg(MCRegister Register, SMLoc Loc) {
CurFrame->Instructions.push_back(Inst);
}
-void MCStreamer::EmitWinCFISetFrame(MCRegister Register, unsigned Offset,
+void MCStreamer::emitWinCFISetFrame(MCRegister Register, unsigned Offset,
SMLoc Loc) {
WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc);
if (!CurFrame)
@@ -887,7 +895,7 @@ void MCStreamer::EmitWinCFISetFrame(MCRegister Register, unsigned Offset,
CurFrame->Instructions.push_back(Inst);
}
-void MCStreamer::EmitWinCFIAllocStack(unsigned Size, SMLoc Loc) {
+void MCStreamer::emitWinCFIAllocStack(unsigned Size, SMLoc Loc) {
WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc);
if (!CurFrame)
return;
@@ -904,7 +912,7 @@ void MCStreamer::EmitWinCFIAllocStack(unsigned Size, SMLoc Loc) {
CurFrame->Instructions.push_back(Inst);
}
-void MCStreamer::EmitWinCFISaveReg(MCRegister Register, unsigned Offset,
+void MCStreamer::emitWinCFISaveReg(MCRegister Register, unsigned Offset,
SMLoc Loc) {
WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc);
if (!CurFrame)
@@ -921,7 +929,7 @@ void MCStreamer::EmitWinCFISaveReg(MCRegister Register, unsigned Offset,
CurFrame->Instructions.push_back(Inst);
}
-void MCStreamer::EmitWinCFISaveXMM(MCRegister Register, unsigned Offset,
+void MCStreamer::emitWinCFISaveXMM(MCRegister Register, unsigned Offset,
SMLoc Loc) {
WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc);
if (!CurFrame)
@@ -936,7 +944,7 @@ void MCStreamer::EmitWinCFISaveXMM(MCRegister Register, unsigned Offset,
CurFrame->Instructions.push_back(Inst);
}
-void MCStreamer::EmitWinCFIPushFrame(bool Code, SMLoc Loc) {
+void MCStreamer::emitWinCFIPushFrame(bool Code, SMLoc Loc) {
WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc);
if (!CurFrame)
return;
@@ -950,7 +958,7 @@ void MCStreamer::EmitWinCFIPushFrame(bool Code, SMLoc Loc) {
CurFrame->Instructions.push_back(Inst);
}
-void MCStreamer::EmitWinCFIEndProlog(SMLoc Loc) {
+void MCStreamer::emitWinCFIEndProlog(SMLoc Loc) {
WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc);
if (!CurFrame)
return;
@@ -960,15 +968,15 @@ void MCStreamer::EmitWinCFIEndProlog(SMLoc Loc) {
CurFrame->PrologEnd = Label;
}
-void MCStreamer::EmitCOFFSafeSEH(MCSymbol const *Symbol) {}
+void MCStreamer::emitCOFFSafeSEH(MCSymbol const *Symbol) {}
-void MCStreamer::EmitCOFFSymbolIndex(MCSymbol const *Symbol) {}
+void MCStreamer::emitCOFFSymbolIndex(MCSymbol const *Symbol) {}
-void MCStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) {}
+void MCStreamer::emitCOFFSectionIndex(MCSymbol const *Symbol) {}
-void MCStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) {}
+void MCStreamer::emitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) {}
-void MCStreamer::EmitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset) {}
+void MCStreamer::emitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset) {}
/// EmitRawText - If this file is backed by an assembly streamer, this dumps
/// the specified string in the output .s file. This capability is
@@ -987,13 +995,11 @@ void MCStreamer::emitRawText(const Twine &T) {
emitRawTextImpl(T.toStringRef(Str));
}
-void MCStreamer::EmitWindowsUnwindTables() {
-}
+void MCStreamer::emitWindowsUnwindTables() {}
-void MCStreamer::EmitWindowsUnwindTables(WinEH::FrameInfo *Frame) {
-}
+void MCStreamer::emitWindowsUnwindTables(WinEH::FrameInfo *Frame) {}
-void MCStreamer::Finish(SMLoc EndLoc) {
+void MCStreamer::finish(SMLoc EndLoc) {
if ((!DwarfFrameInfos.empty() && !DwarfFrameInfos.back().End) ||
(!WinFrameInfos.empty() && !WinFrameInfos.back()->End)) {
getContext().reportError(EndLoc, "Unfinished frame!");
@@ -1145,20 +1151,20 @@ void MCStreamer::emitAbsoluteSymbolDiffAsULEB128(const MCSymbol *Hi,
void MCStreamer::emitAssemblerFlag(MCAssemblerFlag Flag) {}
void MCStreamer::emitThumbFunc(MCSymbol *Func) {}
void MCStreamer::emitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {}
-void MCStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) {
+void MCStreamer::beginCOFFSymbolDef(const MCSymbol *Symbol) {
llvm_unreachable("this directive only supported on COFF targets");
}
-void MCStreamer::EndCOFFSymbolDef() {
+void MCStreamer::endCOFFSymbolDef() {
llvm_unreachable("this directive only supported on COFF targets");
}
void MCStreamer::emitFileDirective(StringRef Filename) {}
void MCStreamer::emitFileDirective(StringRef Filename, StringRef CompilerVerion,
StringRef TimeStamp, StringRef Description) {
}
-void MCStreamer::EmitCOFFSymbolStorageClass(int StorageClass) {
+void MCStreamer::emitCOFFSymbolStorageClass(int StorageClass) {
llvm_unreachable("this directive only supported on COFF targets");
}
-void MCStreamer::EmitCOFFSymbolType(int Type) {
+void MCStreamer::emitCOFFSymbolType(int Type) {
llvm_unreachable("this directive only supported on COFF targets");
}
void MCStreamer::emitXCOFFLocalCommonSymbol(MCSymbol *LabelSym, uint64_t Size,
@@ -1180,6 +1186,10 @@ void MCStreamer::emitXCOFFRenameDirective(const MCSymbol *Name,
"XCOFF targets");
}
+void MCStreamer::emitXCOFFRefDirective(StringRef Name) {
+ llvm_unreachable("emitXCOFFRefDirective is only supported on XCOFF targets");
+}
+
void MCStreamer::emitELFSize(MCSymbol *Symbol, const MCExpr *Value) {}
void MCStreamer::emitELFSymverDirective(const MCSymbol *OriginalSym,
StringRef Name, bool KeepOriginalSym) {}
@@ -1212,7 +1222,7 @@ void MCStreamer::emitBundleLock(bool AlignToEnd) {}
void MCStreamer::finishImpl() {}
void MCStreamer::emitBundleUnlock() {}
-void MCStreamer::SwitchSection(MCSection *Section, const MCExpr *Subsection) {
+void MCStreamer::switchSection(MCSection *Section, const MCExpr *Subsection) {
assert(Section && "Cannot switch to a null section!");
MCSectionSubPair curSection = SectionStack.back().first;
SectionStack.back().second = curSection;
@@ -1233,7 +1243,7 @@ MCSymbol *MCStreamer::endSection(MCSection *Section) {
if (Sym->isInSection())
return Sym;
- SwitchSection(Section);
+ switchSection(Section);
emitLabel(Sym);
return Sym;
}
@@ -1281,6 +1291,9 @@ static VersionTuple getMachoBuildVersionSupportedOS(const Triple &Target) {
return VersionTuple(12);
case Triple::WatchOS:
return VersionTuple(5);
+ case Triple::DriverKit:
+ // DriverKit always uses the build version load command.
+ return VersionTuple();
default:
break;
}
@@ -1305,6 +1318,8 @@ getMachoBuildVersionPlatformType(const Triple &Target) {
case Triple::WatchOS:
return Target.isSimulatorEnvironment() ? MachO::PLATFORM_WATCHOSSIMULATOR
: MachO::PLATFORM_WATCHOS;
+ case Triple::DriverKit:
+ return MachO::PLATFORM_DRIVERKIT;
default:
break;
}
@@ -1334,6 +1349,9 @@ void MCStreamer::emitVersionForTarget(
case Triple::WatchOS:
Version = Target.getWatchOSVersion();
break;
+ case Triple::DriverKit:
+ Version = Target.getDriverKitVersion();
+ break;
default:
llvm_unreachable("unexpected OS type");
}
@@ -1353,15 +1371,14 @@ void MCStreamer::emitVersionForTarget(
emitDarwinTargetVariantBuildVersion(
getMachoBuildVersionPlatformType(Target),
LinkedTargetVersion.getMajor(),
- LinkedTargetVersion.getMinor().getValueOr(0),
- LinkedTargetVersion.getSubminor().getValueOr(0), SDKVersion);
+ LinkedTargetVersion.getMinor().value_or(0),
+ LinkedTargetVersion.getSubminor().value_or(0), SDKVersion);
return;
}
emitBuildVersion(getMachoBuildVersionPlatformType(Target),
LinkedTargetVersion.getMajor(),
- LinkedTargetVersion.getMinor().getValueOr(0),
- LinkedTargetVersion.getSubminor().getValueOr(0),
- SDKVersion);
+ LinkedTargetVersion.getMinor().value_or(0),
+ LinkedTargetVersion.getSubminor().value_or(0), SDKVersion);
ShouldEmitBuildVersion = true;
}
@@ -1372,8 +1389,8 @@ void MCStreamer::emitVersionForTarget(
emitDarwinTargetVariantBuildVersion(
getMachoBuildVersionPlatformType(*TVT),
TVLinkedTargetVersion.getMajor(),
- TVLinkedTargetVersion.getMinor().getValueOr(0),
- TVLinkedTargetVersion.getSubminor().getValueOr(0),
+ TVLinkedTargetVersion.getMinor().value_or(0),
+ TVLinkedTargetVersion.getSubminor().value_or(0),
DarwinTargetVariantSDKVersion);
}
}
@@ -1383,6 +1400,6 @@ void MCStreamer::emitVersionForTarget(
emitVersionMin(getMachoVersionMinLoadCommandType(Target),
LinkedTargetVersion.getMajor(),
- LinkedTargetVersion.getMinor().getValueOr(0),
- LinkedTargetVersion.getSubminor().getValueOr(0), SDKVersion);
+ LinkedTargetVersion.getMinor().value_or(0),
+ LinkedTargetVersion.getSubminor().value_or(0), SDKVersion);
}
diff --git a/llvm/lib/MC/MCSymbol.cpp b/llvm/lib/MC/MCSymbol.cpp
index 67cab9a92722..4017225a81c4 100644
--- a/llvm/lib/MC/MCSymbol.cpp
+++ b/llvm/lib/MC/MCSymbol.cpp
@@ -11,7 +11,6 @@
#include "llvm/Config/llvm-config.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFragment.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
diff --git a/llvm/lib/MC/MCSymbolELF.cpp b/llvm/lib/MC/MCSymbolELF.cpp
index 1830b87fd856..820a91f57c17 100644
--- a/llvm/lib/MC/MCSymbolELF.cpp
+++ b/llvm/lib/MC/MCSymbolELF.cpp
@@ -8,7 +8,6 @@
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/BinaryFormat/ELF.h"
-#include "llvm/MC/MCFixupKindInfo.h"
namespace llvm {
diff --git a/llvm/lib/MC/MCTargetOptions.cpp b/llvm/lib/MC/MCTargetOptions.cpp
index eb57917ee8fd..c2946da3ee66 100644
--- a/llvm/lib/MC/MCTargetOptions.cpp
+++ b/llvm/lib/MC/MCTargetOptions.cpp
@@ -13,11 +13,12 @@ using namespace llvm;
MCTargetOptions::MCTargetOptions()
: MCRelaxAll(false), MCNoExecStack(false), MCFatalWarnings(false),
- MCNoWarn(false), MCNoDeprecatedWarn(false),
- MCNoTypeCheck(false), MCSaveTempLabels(false),
- MCUseDwarfDirectory(false), MCIncrementalLinkerCompatible(false),
+ MCNoWarn(false), MCNoDeprecatedWarn(false), MCNoTypeCheck(false),
+ MCSaveTempLabels(false), MCIncrementalLinkerCompatible(false),
ShowMCEncoding(false), ShowMCInst(false), AsmVerbose(false),
- PreserveAsmComments(true), Dwarf64(false) {}
+ PreserveAsmComments(true), Dwarf64(false),
+ EmitDwarfUnwind(EmitDwarfUnwindType::Default),
+ MCUseDwarfDirectory(DefaultDwarfDirectory) {}
StringRef MCTargetOptions::getABIName() const {
return ABIName;
diff --git a/llvm/lib/MC/MCTargetOptionsCommandFlags.cpp b/llvm/lib/MC/MCTargetOptionsCommandFlags.cpp
index 762c8d43063c..a310dc894021 100644
--- a/llvm/lib/MC/MCTargetOptionsCommandFlags.cpp
+++ b/llvm/lib/MC/MCTargetOptionsCommandFlags.cpp
@@ -1,5 +1,4 @@
-//===-- MCTargetOptionsCommandFlags.cpp --------------------------*- C++
-//-*-===//
+//===-- MCTargetOptionsCommandFlags.cpp -----------------------*- C++ //-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -39,6 +38,7 @@ MCOPT_EXP(bool, RelaxAll)
MCOPT(bool, IncrementalLinkerCompatible)
MCOPT(int, DwarfVersion)
MCOPT(bool, Dwarf64)
+MCOPT(EmitDwarfUnwindType, EmitDwarfUnwind)
MCOPT(bool, ShowMCInst)
MCOPT(bool, FatalWarnings)
MCOPT(bool, NoWarn)
@@ -73,6 +73,19 @@ llvm::mc::RegisterMCTargetOptionsFlags::RegisterMCTargetOptionsFlags() {
cl::desc("Generate debugging info in the 64-bit DWARF format"));
MCBINDOPT(Dwarf64);
+ static cl::opt<EmitDwarfUnwindType> EmitDwarfUnwind(
+ "emit-dwarf-unwind", cl::desc("Whether to emit DWARF EH frame entries."),
+ cl::init(EmitDwarfUnwindType::Default),
+ cl::values(clEnumValN(EmitDwarfUnwindType::Always, "always",
+ "Always emit EH frame entries"),
+ clEnumValN(EmitDwarfUnwindType::NoCompactUnwind,
+ "no-compact-unwind",
+ "Only emit EH frame entries when compact unwind is "
+ "not available"),
+ clEnumValN(EmitDwarfUnwindType::Default, "default",
+ "Use target platform default")));
+ MCBINDOPT(EmitDwarfUnwind);
+
static cl::opt<bool> ShowMCInst(
"asm-show-inst",
cl::desc("Emit internal instruction representation to assembly file"));
@@ -116,5 +129,7 @@ MCTargetOptions llvm::mc::InitMCTargetOptionsFromFlags() {
Options.MCNoWarn = getNoWarn();
Options.MCNoDeprecatedWarn = getNoDeprecatedWarn();
Options.MCNoTypeCheck = getNoTypeCheck();
+ Options.EmitDwarfUnwind = getEmitDwarfUnwind();
+
return Options;
}
diff --git a/llvm/lib/MC/MCWasmStreamer.cpp b/llvm/lib/MC/MCWasmStreamer.cpp
index 90249fb7380a..ce948c7435f5 100644
--- a/llvm/lib/MC/MCWasmStreamer.cpp
+++ b/llvm/lib/MC/MCWasmStreamer.cpp
@@ -11,27 +11,30 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCWasmStreamer.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCAsmBackend.h"
-#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCCodeEmitter.h"
-#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCFragment.h"
#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionWasm.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolWasm.h"
-#include "llvm/MC/MCValue.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+namespace llvm {
+class MCContext;
+class MCStreamer;
+class MCSubtargetInfo;
+} // namespace llvm
+
using namespace llvm;
MCWasmStreamer::~MCWasmStreamer() = default; // anchor.
@@ -118,6 +121,7 @@ bool MCWasmStreamer::emitSymbolAttribute(MCSymbol *S, MCSymbolAttr Attribute) {
case MCSA_Invalid:
case MCSA_IndirectSymbol:
case MCSA_Protected:
+ case MCSA_Exported:
return false;
case MCSA_Hidden:
diff --git a/llvm/lib/MC/MCWin64EH.cpp b/llvm/lib/MC/MCWin64EH.cpp
index 2a93c352c68a..ffabe0fe8978 100644
--- a/llvm/lib/MC/MCWin64EH.cpp
+++ b/llvm/lib/MC/MCWin64EH.cpp
@@ -7,15 +7,17 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCWin64EH.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCObjectStreamer.h"
-#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Win64EH.h"
+namespace llvm {
+class MCSection;
+}
using namespace llvm;
@@ -226,14 +228,14 @@ void llvm::Win64EH::UnwindEmitter::Emit(MCStreamer &Streamer) const {
// Emit the unwind info structs first.
for (const auto &CFI : Streamer.getWinFrameInfos()) {
MCSection *XData = Streamer.getAssociatedXDataSection(CFI->TextSection);
- Streamer.SwitchSection(XData);
+ Streamer.switchSection(XData);
::EmitUnwindInfo(Streamer, CFI.get());
}
// Now emit RUNTIME_FUNCTION entries.
for (const auto &CFI : Streamer.getWinFrameInfos()) {
MCSection *PData = Streamer.getAssociatedPDataSection(CFI->TextSection);
- Streamer.SwitchSection(PData);
+ Streamer.switchSection(PData);
EmitRuntimeFunction(Streamer, CFI.get());
}
}
@@ -244,13 +246,26 @@ void llvm::Win64EH::UnwindEmitter::EmitUnwindInfo(MCStreamer &Streamer,
// Switch sections (the static function above is meant to be called from
// here and from Emit().
MCSection *XData = Streamer.getAssociatedXDataSection(info->TextSection);
- Streamer.SwitchSection(XData);
+ Streamer.switchSection(XData);
::EmitUnwindInfo(Streamer, info);
}
-static int64_t GetAbsDifference(MCStreamer &Streamer, const MCSymbol *LHS,
- const MCSymbol *RHS) {
+static const MCExpr *GetSubDivExpr(MCStreamer &Streamer, const MCSymbol *LHS,
+ const MCSymbol *RHS, int Div) {
+ MCContext &Context = Streamer.getContext();
+ const MCExpr *Expr =
+ MCBinaryExpr::createSub(MCSymbolRefExpr::create(LHS, Context),
+ MCSymbolRefExpr::create(RHS, Context), Context);
+ if (Div != 1)
+ Expr = MCBinaryExpr::createDiv(Expr, MCConstantExpr::create(Div, Context),
+ Context);
+ return Expr;
+}
+
+static Optional<int64_t> GetOptionalAbsDifference(MCStreamer &Streamer,
+ const MCSymbol *LHS,
+ const MCSymbol *RHS) {
MCContext &Context = Streamer.getContext();
const MCExpr *Diff =
MCBinaryExpr::createSub(MCSymbolRefExpr::create(LHS, Context),
@@ -261,10 +276,18 @@ static int64_t GetAbsDifference(MCStreamer &Streamer, const MCSymbol *LHS,
// unusual constructs, like an inline asm with an alignment directive.
int64_t value;
if (!Diff->evaluateAsAbsolute(value, OS->getAssembler()))
- report_fatal_error("Failed to evaluate function length in SEH unwind info");
+ return None;
return value;
}
+static int64_t GetAbsDifference(MCStreamer &Streamer, const MCSymbol *LHS,
+ const MCSymbol *RHS) {
+ Optional<int64_t> MaybeDiff = GetOptionalAbsDifference(Streamer, LHS, RHS);
+ if (!MaybeDiff)
+ report_fatal_error("Failed to evaluate function length in SEH unwind info");
+ return *MaybeDiff;
+}
+
static uint32_t ARM64CountOfUnwindCodes(ArrayRef<WinEH::Instruction> Insns) {
uint32_t Count = 0;
for (const auto &I : Insns) {
@@ -350,7 +373,7 @@ static uint32_t ARM64CountOfUnwindCodes(ArrayRef<WinEH::Instruction> Insns) {
// Unwind opcode encodings and restrictions are documented at
// https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
-static void ARM64EmitUnwindCode(MCStreamer &streamer, const MCSymbol *begin,
+static void ARM64EmitUnwindCode(MCStreamer &streamer,
const WinEH::Instruction &inst) {
uint8_t b, reg;
switch (static_cast<Win64EH::UnwindOpcodes>(inst.Operation)) {
@@ -513,7 +536,7 @@ static void ARM64EmitUnwindCode(MCStreamer &streamer, const MCSymbol *begin,
}
// Returns the epilog symbol of an epilog with the exact same unwind code
-// sequence, if it exists. Otherwise, returns nulltpr.
+// sequence, if it exists. Otherwise, returns nullptr.
// EpilogInstrs - Unwind codes for the current epilog.
// Epilogs - Epilogs that potentialy match the current epilog.
static MCSymbol*
@@ -524,18 +547,16 @@ FindMatchingEpilog(const std::vector<WinEH::Instruction>& EpilogInstrs,
auto InstrsIter = info->EpilogMap.find(EpilogStart);
assert(InstrsIter != info->EpilogMap.end() &&
"Epilog not found in EpilogMap");
- const auto &Instrs = InstrsIter->second;
+ const auto &Instrs = InstrsIter->second.Instructions;
if (Instrs.size() != EpilogInstrs.size())
continue;
bool Match = true;
for (unsigned i = 0; i < Instrs.size(); ++i)
- if (Instrs[i].Operation != EpilogInstrs[i].Operation ||
- Instrs[i].Offset != EpilogInstrs[i].Offset ||
- Instrs[i].Register != EpilogInstrs[i].Register) {
- Match = false;
- break;
+ if (Instrs[i] != EpilogInstrs[i]) {
+ Match = false;
+ break;
}
if (Match)
@@ -544,8 +565,8 @@ FindMatchingEpilog(const std::vector<WinEH::Instruction>& EpilogInstrs,
return nullptr;
}
-static void simplifyOpcodes(std::vector<WinEH::Instruction> &Instructions,
- bool Reverse) {
+static void simplifyARM64Opcodes(std::vector<WinEH::Instruction> &Instructions,
+ bool Reverse) {
unsigned PrevOffset = -1;
unsigned PrevRegister = -1;
@@ -606,26 +627,37 @@ static void simplifyOpcodes(std::vector<WinEH::Instruction> &Instructions,
}
}
-static int checkPackedEpilog(MCStreamer &streamer, WinEH::FrameInfo *info,
- int PrologCodeBytes) {
- // Can only pack if there's one single epilog
- if (info->EpilogMap.size() != 1)
- return -1;
-
- const std::vector<WinEH::Instruction> &Epilog =
- info->EpilogMap.begin()->second;
-
- // Can pack if the epilog is a subset of the prolog but not vice versa
- if (Epilog.size() > info->Instructions.size())
+// Check if an epilog exists as a subset of the end of a prolog (backwards).
+static int
+getARM64OffsetInProlog(const std::vector<WinEH::Instruction> &Prolog,
+ const std::vector<WinEH::Instruction> &Epilog) {
+ // Can't find an epilog as a subset if it is longer than the prolog.
+ if (Epilog.size() > Prolog.size())
return -1;
// Check that the epilog actually is a perfect match for the end (backwrds)
// of the prolog.
for (int I = Epilog.size() - 1; I >= 0; I--) {
- if (info->Instructions[I] != Epilog[Epilog.size() - 1 - I])
+ if (Prolog[I] != Epilog[Epilog.size() - 1 - I])
return -1;
}
+ // If the epilog was a subset of the prolog, find its offset.
+ if (Epilog.size() == Prolog.size())
+ return 0;
+ return ARM64CountOfUnwindCodes(ArrayRef<WinEH::Instruction>(
+ &Prolog[Epilog.size()], Prolog.size() - Epilog.size()));
+}
+
+static int checkARM64PackedEpilog(MCStreamer &streamer, WinEH::FrameInfo *info,
+ int PrologCodeBytes) {
+ // Can only pack if there's one single epilog
+ if (info->EpilogMap.size() != 1)
+ return -1;
+
+ const std::vector<WinEH::Instruction> &Epilog =
+ info->EpilogMap.begin()->second.Instructions;
+
// Check that the epilog actually is at the very end of the function,
// otherwise it can't be packed.
uint32_t DistanceFromEnd = (uint32_t)GetAbsDifference(
@@ -633,24 +665,33 @@ static int checkPackedEpilog(MCStreamer &streamer, WinEH::FrameInfo *info,
if (DistanceFromEnd / 4 != Epilog.size())
return -1;
- int Offset = Epilog.size() == info->Instructions.size()
- ? 0
- : ARM64CountOfUnwindCodes(ArrayRef<WinEH::Instruction>(
- &info->Instructions[Epilog.size()],
- info->Instructions.size() - Epilog.size()));
+ int RetVal = -1;
+ // Even if we don't end up sharing opcodes with the prolog, we can still
+ // write the offset as a packed offset, if the single epilog is located at
+ // the end of the function and the offset (pointing after the prolog) fits
+ // as a packed offset.
+ if (PrologCodeBytes <= 31 &&
+ PrologCodeBytes + ARM64CountOfUnwindCodes(Epilog) <= 124)
+ RetVal = PrologCodeBytes;
+
+ int Offset = getARM64OffsetInProlog(info->Instructions, Epilog);
+ if (Offset < 0)
+ return RetVal;
// Check that the offset and prolog size fits in the first word; it's
// unclear whether the epilog count in the extension word can be taken
// as packed epilog offset.
if (Offset > 31 || PrologCodeBytes > 124)
- return -1;
+ return RetVal;
+ // As we choose to express the epilog as part of the prolog, remove the
+ // epilog from the map, so we don't try to emit its opcodes.
info->EpilogMap.clear();
return Offset;
}
-static bool tryPackedUnwind(WinEH::FrameInfo *info, uint32_t FuncLength,
- int PackedEpilogOffset) {
+static bool tryARM64PackedUnwind(WinEH::FrameInfo *info, uint32_t FuncLength,
+ int PackedEpilogOffset) {
if (PackedEpilogOffset == 0) {
// Fully symmetric prolog and epilog, should be ok for packed format.
// For CR=3, the corresponding synthesized epilog actually lacks the
@@ -842,6 +883,16 @@ static bool tryPackedUnwind(WinEH::FrameInfo *info, uint32_t FuncLength,
if (Nops != 0 && Nops != 4)
return false;
int H = Nops == 4;
+ // There's an inconsistency regarding packed unwind info with homed
+ // parameters; according to the documentation, the epilog shouldn't have
+ // the same corresponding nops (and thus, to set the H bit, we should
+ // require an epilog which isn't exactly symmetrical - we shouldn't accept
+ // an exact mirrored epilog for those cases), but in practice,
+ // RtlVirtualUnwind behaves as if it does expect the epilogue to contain
+ // the same nops. See https://github.com/llvm/llvm-project/issues/54879.
+ // To play it safe, don't produce packed unwind info with homed parameters.
+ if (H)
+ return false;
int IntSZ = 8 * RegI;
if (StandaloneLR)
IntSZ += 8;
@@ -901,9 +952,9 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info,
return;
}
- simplifyOpcodes(info->Instructions, false);
+ simplifyARM64Opcodes(info->Instructions, false);
for (auto &I : info->EpilogMap)
- simplifyOpcodes(I.second, true);
+ simplifyARM64Opcodes(I.second.Instructions, true);
MCContext &context = streamer.getContext();
MCSymbol *Label = context.createTempSymbol();
@@ -951,10 +1002,12 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info,
uint32_t PrologCodeBytes = ARM64CountOfUnwindCodes(info->Instructions);
uint32_t TotalCodeBytes = PrologCodeBytes;
- int PackedEpilogOffset = checkPackedEpilog(streamer, info, PrologCodeBytes);
+ int PackedEpilogOffset =
+ checkARM64PackedEpilog(streamer, info, PrologCodeBytes);
- if (PackedEpilogOffset >= 0 && !info->HandlesExceptions &&
- FuncLength <= 0x7ff && TryPacked) {
+ if (PackedEpilogOffset >= 0 &&
+ uint32_t(PackedEpilogOffset) < PrologCodeBytes &&
+ !info->HandlesExceptions && FuncLength <= 0x7ff && TryPacked) {
// Matching prolog/epilog and no exception handlers; check if the
// prolog matches the patterns that can be described by the packed
// format.
@@ -963,7 +1016,7 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info,
// unwind info there. Keep using that as indicator that this unwind
// info has been generated already.
- if (tryPackedUnwind(info, FuncLength, PackedEpilogOffset))
+ if (tryARM64PackedUnwind(info, FuncLength, PackedEpilogOffset))
return;
}
@@ -974,11 +1027,12 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info,
for (auto &I : info->EpilogMap) {
MCSymbol *EpilogStart = I.first;
- auto &EpilogInstrs = I.second;
+ auto &EpilogInstrs = I.second.Instructions;
uint32_t CodeBytes = ARM64CountOfUnwindCodes(EpilogInstrs);
MCSymbol* MatchingEpilog =
FindMatchingEpilog(EpilogInstrs, AddedEpilogs, info);
+ int PrologOffset;
if (MatchingEpilog) {
assert(EpilogInfo.find(MatchingEpilog) != EpilogInfo.end() &&
"Duplicate epilog not found");
@@ -986,6 +1040,12 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info,
// Clear the unwind codes in the EpilogMap, so that they don't get output
// in the logic below.
EpilogInstrs.clear();
+ } else if ((PrologOffset = getARM64OffsetInProlog(info->Instructions,
+ EpilogInstrs)) >= 0) {
+ EpilogInfo[EpilogStart] = PrologOffset;
+ // Clear the unwind codes in the EpilogMap, so that they don't get output
+ // in the logic below.
+ EpilogInstrs.clear();
} else {
EpilogInfo[EpilogStart] = TotalCodeBytes;
TotalCodeBytes += CodeBytes;
@@ -1016,8 +1076,6 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info,
// Extended Code Words, Extended Epilog Count
if (ExtensionWord) {
// FIXME: We should be able to split unwind info into multiple sections.
- // FIXME: We should share epilog codes across epilogs, where possible,
- // which would make this issue show up less frequently.
if (CodeWords > 0xFF || EpilogCount > 0xFFFF)
report_fatal_error("SEH unwind data splitting not yet implemented");
uint32_t row2 = 0x0;
@@ -1026,17 +1084,19 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info,
streamer.emitInt32(row2);
}
- // Epilog Start Index, Epilog Start Offset
- for (auto &I : EpilogInfo) {
- MCSymbol *EpilogStart = I.first;
- uint32_t EpilogIndex = I.second;
- uint32_t EpilogOffset =
- (uint32_t)GetAbsDifference(streamer, EpilogStart, info->Begin);
- if (EpilogOffset)
- EpilogOffset /= 4;
- uint32_t row3 = EpilogOffset;
- row3 |= (EpilogIndex & 0x3FF) << 22;
- streamer.emitInt32(row3);
+ if (PackedEpilogOffset < 0) {
+ // Epilog Start Index, Epilog Start Offset
+ for (auto &I : EpilogInfo) {
+ MCSymbol *EpilogStart = I.first;
+ uint32_t EpilogIndex = I.second;
+ uint32_t EpilogOffset =
+ (uint32_t)GetAbsDifference(streamer, EpilogStart, info->Begin);
+ if (EpilogOffset)
+ EpilogOffset /= 4;
+ uint32_t row3 = EpilogOffset;
+ row3 |= (EpilogIndex & 0x3FF) << 22;
+ streamer.emitInt32(row3);
+ }
}
// Emit prolog unwind instructions (in reverse order).
@@ -1044,14 +1104,14 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info,
for (uint8_t c = 0; c < numInst; ++c) {
WinEH::Instruction inst = info->Instructions.back();
info->Instructions.pop_back();
- ARM64EmitUnwindCode(streamer, info->Begin, inst);
+ ARM64EmitUnwindCode(streamer, inst);
}
// Emit epilog unwind instructions
for (auto &I : info->EpilogMap) {
- auto &EpilogInstrs = I.second;
+ auto &EpilogInstrs = I.second.Instructions;
for (const WinEH::Instruction &inst : EpilogInstrs)
- ARM64EmitUnwindCode(streamer, info->Begin, inst);
+ ARM64EmitUnwindCode(streamer, inst);
}
int32_t BytesMod = CodeWords * 4 - TotalCodeBytes;
@@ -1066,8 +1126,1087 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info,
4);
}
-static void ARM64EmitRuntimeFunction(MCStreamer &streamer,
- const WinEH::FrameInfo *info) {
+static uint32_t ARMCountOfUnwindCodes(ArrayRef<WinEH::Instruction> Insns) {
+ uint32_t Count = 0;
+ for (const auto &I : Insns) {
+ switch (static_cast<Win64EH::UnwindOpcodes>(I.Operation)) {
+ default:
+ llvm_unreachable("Unsupported ARM unwind code");
+ case Win64EH::UOP_AllocSmall:
+ Count += 1;
+ break;
+ case Win64EH::UOP_AllocLarge:
+ Count += 3;
+ break;
+ case Win64EH::UOP_AllocHuge:
+ Count += 4;
+ break;
+ case Win64EH::UOP_WideAllocMedium:
+ Count += 2;
+ break;
+ case Win64EH::UOP_WideAllocLarge:
+ Count += 3;
+ break;
+ case Win64EH::UOP_WideAllocHuge:
+ Count += 4;
+ break;
+ case Win64EH::UOP_WideSaveRegMask:
+ Count += 2;
+ break;
+ case Win64EH::UOP_SaveSP:
+ Count += 1;
+ break;
+ case Win64EH::UOP_SaveRegsR4R7LR:
+ Count += 1;
+ break;
+ case Win64EH::UOP_WideSaveRegsR4R11LR:
+ Count += 1;
+ break;
+ case Win64EH::UOP_SaveFRegD8D15:
+ Count += 1;
+ break;
+ case Win64EH::UOP_SaveRegMask:
+ Count += 2;
+ break;
+ case Win64EH::UOP_SaveLR:
+ Count += 2;
+ break;
+ case Win64EH::UOP_SaveFRegD0D15:
+ Count += 2;
+ break;
+ case Win64EH::UOP_SaveFRegD16D31:
+ Count += 2;
+ break;
+ case Win64EH::UOP_Nop:
+ case Win64EH::UOP_WideNop:
+ case Win64EH::UOP_End:
+ case Win64EH::UOP_EndNop:
+ case Win64EH::UOP_WideEndNop:
+ Count += 1;
+ break;
+ case Win64EH::UOP_Custom: {
+ int J;
+ for (J = 3; J > 0; J--)
+ if (I.Offset & (0xffu << (8 * J)))
+ break;
+ Count += J + 1;
+ break;
+ }
+ }
+ }
+ return Count;
+}
+
+static uint32_t ARMCountOfInstructionBytes(ArrayRef<WinEH::Instruction> Insns,
+ bool *HasCustom = nullptr) {
+ uint32_t Count = 0;
+ for (const auto &I : Insns) {
+ switch (static_cast<Win64EH::UnwindOpcodes>(I.Operation)) {
+ default:
+ llvm_unreachable("Unsupported ARM unwind code");
+ case Win64EH::UOP_AllocSmall:
+ case Win64EH::UOP_AllocLarge:
+ case Win64EH::UOP_AllocHuge:
+ Count += 2;
+ break;
+ case Win64EH::UOP_WideAllocMedium:
+ case Win64EH::UOP_WideAllocLarge:
+ case Win64EH::UOP_WideAllocHuge:
+ Count += 4;
+ break;
+ case Win64EH::UOP_WideSaveRegMask:
+ case Win64EH::UOP_WideSaveRegsR4R11LR:
+ Count += 4;
+ break;
+ case Win64EH::UOP_SaveSP:
+ Count += 2;
+ break;
+ case Win64EH::UOP_SaveRegMask:
+ case Win64EH::UOP_SaveRegsR4R7LR:
+ Count += 2;
+ break;
+ case Win64EH::UOP_SaveFRegD8D15:
+ case Win64EH::UOP_SaveFRegD0D15:
+ case Win64EH::UOP_SaveFRegD16D31:
+ Count += 4;
+ break;
+ case Win64EH::UOP_SaveLR:
+ Count += 4;
+ break;
+ case Win64EH::UOP_Nop:
+ case Win64EH::UOP_EndNop:
+ Count += 2;
+ break;
+ case Win64EH::UOP_WideNop:
+ case Win64EH::UOP_WideEndNop:
+ Count += 4;
+ break;
+ case Win64EH::UOP_End:
+ // This doesn't map to any instruction
+ break;
+ case Win64EH::UOP_Custom:
+ // We can't reason about what instructions this maps to; return a
+ // phony number to make sure we don't accidentally do epilog packing.
+ Count += 1000;
+ if (HasCustom)
+ *HasCustom = true;
+ break;
+ }
+ }
+ return Count;
+}
+
+static void checkARMInstructions(MCStreamer &Streamer,
+ ArrayRef<WinEH::Instruction> Insns,
+ const MCSymbol *Begin, const MCSymbol *End,
+ StringRef Name, StringRef Type) {
+ if (!End)
+ return;
+ Optional<int64_t> MaybeDistance =
+ GetOptionalAbsDifference(Streamer, End, Begin);
+ if (!MaybeDistance)
+ return;
+ uint32_t Distance = (uint32_t)*MaybeDistance;
+ bool HasCustom = false;
+ uint32_t InstructionBytes = ARMCountOfInstructionBytes(Insns, &HasCustom);
+ if (HasCustom)
+ return;
+ if (Distance != InstructionBytes) {
+ Streamer.getContext().reportError(
+ SMLoc(), "Incorrect size for " + Name + " " + Type + ": " +
+ Twine(Distance) +
+ " bytes of instructions in range, but .seh directives "
+ "corresponding to " +
+ Twine(InstructionBytes) + " bytes\n");
+ }
+}
+
+static bool isARMTerminator(const WinEH::Instruction &inst) {
+ switch (static_cast<Win64EH::UnwindOpcodes>(inst.Operation)) {
+ case Win64EH::UOP_End:
+ case Win64EH::UOP_EndNop:
+ case Win64EH::UOP_WideEndNop:
+ return true;
+ default:
+ return false;
+ }
+}
+
+// Unwind opcode encodings and restrictions are documented at
+// https://docs.microsoft.com/en-us/cpp/build/arm-exception-handling
+static void ARMEmitUnwindCode(MCStreamer &streamer,
+ const WinEH::Instruction &inst) {
+ uint32_t w, lr;
+ int i;
+ switch (static_cast<Win64EH::UnwindOpcodes>(inst.Operation)) {
+ default:
+ llvm_unreachable("Unsupported ARM unwind code");
+ case Win64EH::UOP_AllocSmall:
+ assert((inst.Offset & 3) == 0);
+ assert(inst.Offset / 4 <= 0x7f);
+ streamer.emitInt8(inst.Offset / 4);
+ break;
+ case Win64EH::UOP_WideSaveRegMask:
+ assert((inst.Register & ~0x5fff) == 0);
+ lr = (inst.Register >> 14) & 1;
+ w = 0x8000 | (inst.Register & 0x1fff) | (lr << 13);
+ streamer.emitInt8((w >> 8) & 0xff);
+ streamer.emitInt8((w >> 0) & 0xff);
+ break;
+ case Win64EH::UOP_SaveSP:
+ assert(inst.Register <= 0x0f);
+ streamer.emitInt8(0xc0 | inst.Register);
+ break;
+ case Win64EH::UOP_SaveRegsR4R7LR:
+ assert(inst.Register >= 4 && inst.Register <= 7);
+ assert(inst.Offset <= 1);
+ streamer.emitInt8(0xd0 | (inst.Register - 4) | (inst.Offset << 2));
+ break;
+ case Win64EH::UOP_WideSaveRegsR4R11LR:
+ assert(inst.Register >= 8 && inst.Register <= 11);
+ assert(inst.Offset <= 1);
+ streamer.emitInt8(0xd8 | (inst.Register - 8) | (inst.Offset << 2));
+ break;
+ case Win64EH::UOP_SaveFRegD8D15:
+ assert(inst.Register >= 8 && inst.Register <= 15);
+ streamer.emitInt8(0xe0 | (inst.Register - 8));
+ break;
+ case Win64EH::UOP_WideAllocMedium:
+ assert((inst.Offset & 3) == 0);
+ assert(inst.Offset / 4 <= 0x3ff);
+ w = 0xe800 | (inst.Offset / 4);
+ streamer.emitInt8((w >> 8) & 0xff);
+ streamer.emitInt8((w >> 0) & 0xff);
+ break;
+ case Win64EH::UOP_SaveRegMask:
+ assert((inst.Register & ~0x40ff) == 0);
+ lr = (inst.Register >> 14) & 1;
+ w = 0xec00 | (inst.Register & 0x0ff) | (lr << 8);
+ streamer.emitInt8((w >> 8) & 0xff);
+ streamer.emitInt8((w >> 0) & 0xff);
+ break;
+ case Win64EH::UOP_SaveLR:
+ assert((inst.Offset & 3) == 0);
+ assert(inst.Offset / 4 <= 0x0f);
+ streamer.emitInt8(0xef);
+ streamer.emitInt8(inst.Offset / 4);
+ break;
+ case Win64EH::UOP_SaveFRegD0D15:
+ assert(inst.Register <= 15);
+ assert(inst.Offset <= 15);
+ assert(inst.Register <= inst.Offset);
+ streamer.emitInt8(0xf5);
+ streamer.emitInt8((inst.Register << 4) | inst.Offset);
+ break;
+ case Win64EH::UOP_SaveFRegD16D31:
+ assert(inst.Register >= 16 && inst.Register <= 31);
+ assert(inst.Offset >= 16 && inst.Offset <= 31);
+ assert(inst.Register <= inst.Offset);
+ streamer.emitInt8(0xf6);
+ streamer.emitInt8(((inst.Register - 16) << 4) | (inst.Offset - 16));
+ break;
+ case Win64EH::UOP_AllocLarge:
+ assert((inst.Offset & 3) == 0);
+ assert(inst.Offset / 4 <= 0xffff);
+ w = inst.Offset / 4;
+ streamer.emitInt8(0xf7);
+ streamer.emitInt8((w >> 8) & 0xff);
+ streamer.emitInt8((w >> 0) & 0xff);
+ break;
+ case Win64EH::UOP_AllocHuge:
+ assert((inst.Offset & 3) == 0);
+ assert(inst.Offset / 4 <= 0xffffff);
+ w = inst.Offset / 4;
+ streamer.emitInt8(0xf8);
+ streamer.emitInt8((w >> 16) & 0xff);
+ streamer.emitInt8((w >> 8) & 0xff);
+ streamer.emitInt8((w >> 0) & 0xff);
+ break;
+ case Win64EH::UOP_WideAllocLarge:
+ assert((inst.Offset & 3) == 0);
+ assert(inst.Offset / 4 <= 0xffff);
+ w = inst.Offset / 4;
+ streamer.emitInt8(0xf9);
+ streamer.emitInt8((w >> 8) & 0xff);
+ streamer.emitInt8((w >> 0) & 0xff);
+ break;
+ case Win64EH::UOP_WideAllocHuge:
+ assert((inst.Offset & 3) == 0);
+ assert(inst.Offset / 4 <= 0xffffff);
+ w = inst.Offset / 4;
+ streamer.emitInt8(0xfa);
+ streamer.emitInt8((w >> 16) & 0xff);
+ streamer.emitInt8((w >> 8) & 0xff);
+ streamer.emitInt8((w >> 0) & 0xff);
+ break;
+ case Win64EH::UOP_Nop:
+ streamer.emitInt8(0xfb);
+ break;
+ case Win64EH::UOP_WideNop:
+ streamer.emitInt8(0xfc);
+ break;
+ case Win64EH::UOP_EndNop:
+ streamer.emitInt8(0xfd);
+ break;
+ case Win64EH::UOP_WideEndNop:
+ streamer.emitInt8(0xfe);
+ break;
+ case Win64EH::UOP_End:
+ streamer.emitInt8(0xff);
+ break;
+ case Win64EH::UOP_Custom:
+ for (i = 3; i > 0; i--)
+ if (inst.Offset & (0xffu << (8 * i)))
+ break;
+ for (; i >= 0; i--)
+ streamer.emitInt8((inst.Offset >> (8 * i)) & 0xff);
+ break;
+ }
+}
+
+// Check if an epilog exists as a subset of the end of a prolog (backwards).
+// An epilog may end with one out of three different end opcodes; if this
+// is the first epilog that shares opcodes with the prolog, we can tolerate
+// that this opcode differs (and the caller will update the prolog to use
+// the same end opcode as the epilog). If another epilog already shares
+// opcodes with the prolog, the ending opcode must be a strict match.
+static int getARMOffsetInProlog(const std::vector<WinEH::Instruction> &Prolog,
+ const std::vector<WinEH::Instruction> &Epilog,
+ bool CanTweakProlog) {
+ // Can't find an epilog as a subset if it is longer than the prolog.
+ if (Epilog.size() > Prolog.size())
+ return -1;
+
+ // Check that the epilog actually is a perfect match for the end (backwrds)
+ // of the prolog.
+ // If we can adjust the prolog afterwards, don't check that the end opcodes
+ // match.
+ int EndIdx = CanTweakProlog ? 1 : 0;
+ for (int I = Epilog.size() - 1; I >= EndIdx; I--) {
+ // TODO: Could also allow minor mismatches, e.g. "add sp, #16" vs
+ // "push {r0-r3}".
+ if (Prolog[I] != Epilog[Epilog.size() - 1 - I])
+ return -1;
+ }
+
+ if (CanTweakProlog) {
+ // Check that both prolog and epilog end with an expected end opcode.
+ if (Prolog.front().Operation != Win64EH::UOP_End)
+ return -1;
+ if (Epilog.back().Operation != Win64EH::UOP_End &&
+ Epilog.back().Operation != Win64EH::UOP_EndNop &&
+ Epilog.back().Operation != Win64EH::UOP_WideEndNop)
+ return -1;
+ }
+
+ // If the epilog was a subset of the prolog, find its offset.
+ if (Epilog.size() == Prolog.size())
+ return 0;
+ return ARMCountOfUnwindCodes(ArrayRef<WinEH::Instruction>(
+ &Prolog[Epilog.size()], Prolog.size() - Epilog.size()));
+}
+
+static int checkARMPackedEpilog(MCStreamer &streamer, WinEH::FrameInfo *info,
+ int PrologCodeBytes) {
+ // Can only pack if there's one single epilog
+ if (info->EpilogMap.size() != 1)
+ return -1;
+
+ const WinEH::FrameInfo::Epilog &EpilogInfo = info->EpilogMap.begin()->second;
+ // Can only pack if the epilog is unconditional
+ if (EpilogInfo.Condition != 0xe) // ARMCC::AL
+ return -1;
+
+ const std::vector<WinEH::Instruction> &Epilog = EpilogInfo.Instructions;
+ // Make sure we have at least the trailing end opcode
+ if (info->Instructions.empty() || Epilog.empty())
+ return -1;
+
+ // Check that the epilog actually is at the very end of the function,
+ // otherwise it can't be packed.
+ Optional<int64_t> MaybeDistance = GetOptionalAbsDifference(
+ streamer, info->FuncletOrFuncEnd, info->EpilogMap.begin()->first);
+ if (!MaybeDistance)
+ return -1;
+ uint32_t DistanceFromEnd = (uint32_t)*MaybeDistance;
+ uint32_t InstructionBytes = ARMCountOfInstructionBytes(Epilog);
+ if (DistanceFromEnd != InstructionBytes)
+ return -1;
+
+ int RetVal = -1;
+ // Even if we don't end up sharing opcodes with the prolog, we can still
+ // write the offset as a packed offset, if the single epilog is located at
+ // the end of the function and the offset (pointing after the prolog) fits
+ // as a packed offset.
+ if (PrologCodeBytes <= 31 &&
+ PrologCodeBytes + ARMCountOfUnwindCodes(Epilog) <= 63)
+ RetVal = PrologCodeBytes;
+
+ int Offset =
+ getARMOffsetInProlog(info->Instructions, Epilog, /*CanTweakProlog=*/true);
+ if (Offset < 0)
+ return RetVal;
+
+ // Check that the offset and prolog size fits in the first word; it's
+ // unclear whether the epilog count in the extension word can be taken
+ // as packed epilog offset.
+ if (Offset > 31 || PrologCodeBytes > 63)
+ return RetVal;
+
+ // Replace the regular end opcode of the prolog with the one from the
+ // epilog.
+ info->Instructions.front() = Epilog.back();
+
+ // As we choose to express the epilog as part of the prolog, remove the
+ // epilog from the map, so we don't try to emit its opcodes.
+ info->EpilogMap.clear();
+ return Offset;
+}
+
+static bool parseRegMask(unsigned Mask, bool &HasLR, bool &HasR11,
+ unsigned &Folded, int &IntRegs) {
+ if (Mask & (1 << 14)) {
+ HasLR = true;
+ Mask &= ~(1 << 14);
+ }
+ if (Mask & (1 << 11)) {
+ HasR11 = true;
+ Mask &= ~(1 << 11);
+ }
+ Folded = 0;
+ IntRegs = -1;
+ if (!Mask)
+ return true;
+ int First = 0;
+ // Shift right until we have the bits at the bottom
+ while ((Mask & 1) == 0) {
+ First++;
+ Mask >>= 1;
+ }
+ if ((Mask & (Mask + 1)) != 0)
+ return false; // Not a consecutive series of bits? Can't be packed.
+ // Count the bits
+ int N = 0;
+ while (Mask & (1 << N))
+ N++;
+ if (First < 4) {
+ if (First + N < 4)
+ return false;
+ Folded = 4 - First;
+ N -= Folded;
+ First = 4;
+ }
+ if (First > 4)
+ return false; // Can't be packed
+ if (N >= 1)
+ IntRegs = N - 1;
+ return true;
+}
+
+static bool tryARMPackedUnwind(MCStreamer &streamer, WinEH::FrameInfo *info,
+ uint32_t FuncLength) {
+ int Step = 0;
+ bool Homing = false;
+ bool HasR11 = false;
+ bool HasChain = false;
+ bool HasLR = false;
+ int IntRegs = -1; // r4 - r(4+N)
+ int FloatRegs = -1; // d8 - d(8+N)
+ unsigned PF = 0; // Number of extra pushed registers
+ unsigned StackAdjust = 0;
+ // Iterate over the prolog and check that all opcodes exactly match
+ // the canonical order and form.
+ for (const WinEH::Instruction &Inst : info->Instructions) {
+ switch (Inst.Operation) {
+ default:
+ llvm_unreachable("Unsupported ARM unwind code");
+ case Win64EH::UOP_Custom:
+ case Win64EH::UOP_AllocLarge:
+ case Win64EH::UOP_AllocHuge:
+ case Win64EH::UOP_WideAllocLarge:
+ case Win64EH::UOP_WideAllocHuge:
+ case Win64EH::UOP_SaveFRegD0D15:
+ case Win64EH::UOP_SaveFRegD16D31:
+ // Can't be packed
+ return false;
+ case Win64EH::UOP_SaveSP:
+ // Can't be packed; we can't rely on restoring sp from r11 when
+ // unwinding a packed prologue.
+ return false;
+ case Win64EH::UOP_SaveLR:
+ // Can't be present in a packed prologue
+ return false;
+
+ case Win64EH::UOP_End:
+ case Win64EH::UOP_EndNop:
+ case Win64EH::UOP_WideEndNop:
+ if (Step != 0)
+ return false;
+ Step = 1;
+ break;
+
+ case Win64EH::UOP_SaveRegsR4R7LR:
+ case Win64EH::UOP_WideSaveRegsR4R11LR:
+ // push {r4-r11,lr}
+ if (Step != 1 && Step != 2)
+ return false;
+ assert(Inst.Register >= 4 && Inst.Register <= 11); // r4-rX
+ assert(Inst.Offset <= 1); // Lr
+ IntRegs = Inst.Register - 4;
+ if (Inst.Register == 11) {
+ HasR11 = true;
+ IntRegs--;
+ }
+ if (Inst.Offset)
+ HasLR = true;
+ Step = 3;
+ break;
+
+ case Win64EH::UOP_SaveRegMask:
+ if (Step == 1 && Inst.Register == 0x0f) {
+ // push {r0-r3}
+ Homing = true;
+ Step = 2;
+ break;
+ }
+ LLVM_FALLTHROUGH;
+ case Win64EH::UOP_WideSaveRegMask:
+ if (Step != 1 && Step != 2)
+ return false;
+ // push {r4-r9,r11,lr}
+ // push {r11,lr}
+ // push {r1-r5}
+ if (!parseRegMask(Inst.Register, HasLR, HasR11, PF, IntRegs))
+ return false;
+ Step = 3;
+ break;
+
+ case Win64EH::UOP_Nop:
+ // mov r11, sp
+ if (Step != 3 || !HasR11 || IntRegs >= 0 || PF > 0)
+ return false;
+ HasChain = true;
+ Step = 4;
+ break;
+ case Win64EH::UOP_WideNop:
+ // add.w r11, sp, #xx
+ if (Step != 3 || !HasR11 || (IntRegs < 0 && PF == 0))
+ return false;
+ HasChain = true;
+ Step = 4;
+ break;
+
+ case Win64EH::UOP_SaveFRegD8D15:
+ if (Step != 1 && Step != 2 && Step != 3 && Step != 4)
+ return false;
+ assert(Inst.Register >= 8 && Inst.Register <= 15);
+ if (Inst.Register == 15)
+ return false; // Can't pack this case, R==7 means no IntRegs
+ if (IntRegs >= 0)
+ return false;
+ FloatRegs = Inst.Register - 8;
+ Step = 5;
+ break;
+
+ case Win64EH::UOP_AllocSmall:
+ case Win64EH::UOP_WideAllocMedium:
+ if (Step != 1 && Step != 2 && Step != 3 && Step != 4 && Step != 5)
+ return false;
+ if (PF > 0) // Can't have both folded and explicit stack allocation
+ return false;
+ if (Inst.Offset / 4 >= 0x3f4)
+ return false;
+ StackAdjust = Inst.Offset / 4;
+ Step = 6;
+ break;
+ }
+ }
+ if (HasR11 && !HasChain) {
+ if (IntRegs + 4 == 10) {
+ // r11 stored, but not chaining; can be packed if already saving r4-r10
+ // and we can fit r11 into this range.
+ IntRegs++;
+ HasR11 = false;
+ } else
+ return false;
+ }
+ if (HasChain && !HasLR)
+ return false;
+
+ // Packed uneind info can't express multiple epilogues.
+ if (info->EpilogMap.size() > 1)
+ return false;
+
+ unsigned EF = 0;
+ int Ret = 0;
+ if (info->EpilogMap.size() == 0) {
+ Ret = 3; // No epilogue
+ } else {
+ // As the prologue and epilogue aren't exact mirrors of each other,
+ // we have to check the epilogue too and see if it matches what we've
+ // concluded from the prologue.
+ const WinEH::FrameInfo::Epilog &EpilogInfo =
+ info->EpilogMap.begin()->second;
+ if (EpilogInfo.Condition != 0xe) // ARMCC::AL
+ return false;
+ const std::vector<WinEH::Instruction> &Epilog = EpilogInfo.Instructions;
+ Optional<int64_t> MaybeDistance = GetOptionalAbsDifference(
+ streamer, info->FuncletOrFuncEnd, info->EpilogMap.begin()->first);
+ if (!MaybeDistance)
+ return false;
+ uint32_t DistanceFromEnd = (uint32_t)*MaybeDistance;
+ uint32_t InstructionBytes = ARMCountOfInstructionBytes(Epilog);
+ if (DistanceFromEnd != InstructionBytes)
+ return false;
+
+ bool GotStackAdjust = false;
+ bool GotFloatRegs = false;
+ bool GotIntRegs = false;
+ bool GotHomingRestore = false;
+ bool GotLRRestore = false;
+ bool NeedsReturn = false;
+ bool GotReturn = false;
+
+ Step = 6;
+ for (const WinEH::Instruction &Inst : Epilog) {
+ switch (Inst.Operation) {
+ default:
+ llvm_unreachable("Unsupported ARM unwind code");
+ case Win64EH::UOP_Custom:
+ case Win64EH::UOP_AllocLarge:
+ case Win64EH::UOP_AllocHuge:
+ case Win64EH::UOP_WideAllocLarge:
+ case Win64EH::UOP_WideAllocHuge:
+ case Win64EH::UOP_SaveFRegD0D15:
+ case Win64EH::UOP_SaveFRegD16D31:
+ case Win64EH::UOP_SaveSP:
+ case Win64EH::UOP_Nop:
+ case Win64EH::UOP_WideNop:
+ // Can't be packed in an epilogue
+ return false;
+
+ case Win64EH::UOP_AllocSmall:
+ case Win64EH::UOP_WideAllocMedium:
+ if (Inst.Offset / 4 >= 0x3f4)
+ return false;
+ if (Step == 6) {
+ if (Homing && FloatRegs < 0 && IntRegs < 0 && StackAdjust == 0 &&
+ PF == 0 && Inst.Offset == 16) {
+ GotHomingRestore = true;
+ Step = 10;
+ } else {
+ if (StackAdjust > 0) {
+ // Got stack adjust in prologue too; must match.
+ if (StackAdjust != Inst.Offset / 4)
+ return false;
+ GotStackAdjust = true;
+ } else if (PF == Inst.Offset / 4) {
+ // Folded prologue, non-folded epilogue
+ StackAdjust = Inst.Offset / 4;
+ GotStackAdjust = true;
+ } else {
+ // StackAdjust == 0 in prologue, mismatch
+ return false;
+ }
+ Step = 7;
+ }
+ } else if (Step == 7 || Step == 8 || Step == 9) {
+ if (!Homing || Inst.Offset != 16)
+ return false;
+ GotHomingRestore = true;
+ Step = 10;
+ } else
+ return false;
+ break;
+
+ case Win64EH::UOP_SaveFRegD8D15:
+ if (Step != 6 && Step != 7)
+ return false;
+ assert(Inst.Register >= 8 && Inst.Register <= 15);
+ if (FloatRegs != (int)(Inst.Register - 8))
+ return false;
+ GotFloatRegs = true;
+ Step = 8;
+ break;
+
+ case Win64EH::UOP_SaveRegsR4R7LR:
+ case Win64EH::UOP_WideSaveRegsR4R11LR: {
+ // push {r4-r11,lr}
+ if (Step != 6 && Step != 7 && Step != 8)
+ return false;
+ assert(Inst.Register >= 4 && Inst.Register <= 11); // r4-rX
+ assert(Inst.Offset <= 1); // Lr
+ if (Homing && HasLR) {
+ // If homing and LR is backed up, we can either restore LR here
+ // and return with Ret == 1 or 2, or return with SaveLR below
+ if (Inst.Offset) {
+ GotLRRestore = true;
+ NeedsReturn = true;
+ } else {
+ // Expecting a separate SaveLR below
+ }
+ } else {
+ if (HasLR != (Inst.Offset == 1))
+ return false;
+ }
+ GotLRRestore = Inst.Offset == 1;
+ if (IntRegs < 0) // This opcode must include r4
+ return false;
+ int Expected = IntRegs;
+ if (HasChain) {
+ // Can't express r11 here unless IntRegs describe r4-r10
+ if (IntRegs != 6)
+ return false;
+ Expected++;
+ }
+ if (Expected != (int)(Inst.Register - 4))
+ return false;
+ GotIntRegs = true;
+ Step = 9;
+ break;
+ }
+
+ case Win64EH::UOP_SaveRegMask:
+ case Win64EH::UOP_WideSaveRegMask: {
+ if (Step != 6 && Step != 7 && Step != 8)
+ return false;
+ // push {r4-r9,r11,lr}
+ // push {r11,lr}
+ // push {r1-r5}
+ bool CurHasLR = false, CurHasR11 = false;
+ int Regs;
+ if (!parseRegMask(Inst.Register, CurHasLR, CurHasR11, EF, Regs))
+ return false;
+ if (EF > 0) {
+ if (EF != PF && EF != StackAdjust)
+ return false;
+ }
+ if (Homing && HasLR) {
+ // If homing and LR is backed up, we can either restore LR here
+ // and return with Ret == 1 or 2, or return with SaveLR below
+ if (CurHasLR) {
+ GotLRRestore = true;
+ NeedsReturn = true;
+ } else {
+ // Expecting a separate SaveLR below
+ }
+ } else {
+ if (CurHasLR != HasLR)
+ return false;
+ GotLRRestore = CurHasLR;
+ }
+ int Expected = IntRegs;
+ if (HasChain) {
+ // If we have chaining, the mask must have included r11.
+ if (!CurHasR11)
+ return false;
+ } else if (Expected == 7) {
+ // If we don't have chaining, the mask could still include r11,
+ // expressed as part of IntRegs Instead.
+ Expected--;
+ if (!CurHasR11)
+ return false;
+ } else {
+ // Neither HasChain nor r11 included in IntRegs, must not have r11
+ // here either.
+ if (CurHasR11)
+ return false;
+ }
+ if (Expected != Regs)
+ return false;
+ GotIntRegs = true;
+ Step = 9;
+ break;
+ }
+
+ case Win64EH::UOP_SaveLR:
+ if (Step != 6 && Step != 7 && Step != 8 && Step != 9)
+ return false;
+ if (!Homing || Inst.Offset != 20 || GotLRRestore)
+ return false;
+ GotLRRestore = true;
+ GotHomingRestore = true;
+ Step = 10;
+ break;
+
+ case Win64EH::UOP_EndNop:
+ case Win64EH::UOP_WideEndNop:
+ GotReturn = true;
+ Ret = (Inst.Operation == Win64EH::UOP_EndNop) ? 1 : 2;
+ LLVM_FALLTHROUGH;
+ case Win64EH::UOP_End:
+ if (Step != 6 && Step != 7 && Step != 8 && Step != 9 && Step != 10)
+ return false;
+ Step = 11;
+ break;
+ }
+ }
+
+ if (Step != 11)
+ return false;
+ if (StackAdjust > 0 && !GotStackAdjust && EF == 0)
+ return false;
+ if (FloatRegs >= 0 && !GotFloatRegs)
+ return false;
+ if (IntRegs >= 0 && !GotIntRegs)
+ return false;
+ if (Homing && !GotHomingRestore)
+ return false;
+ if (HasLR && !GotLRRestore)
+ return false;
+ if (NeedsReturn && !GotReturn)
+ return false;
+ }
+
+ assert(PF == 0 || EF == 0 ||
+ StackAdjust == 0); // Can't have adjust in all three
+ if (PF > 0 || EF > 0) {
+ StackAdjust = PF > 0 ? (PF - 1) : (EF - 1);
+ assert(StackAdjust <= 3);
+ StackAdjust |= 0x3f0;
+ if (PF > 0)
+ StackAdjust |= 1 << 2;
+ if (EF > 0)
+ StackAdjust |= 1 << 3;
+ }
+
+ assert(FuncLength <= 0x7FF && "FuncLength should have been checked earlier");
+ int Flag = info->Fragment ? 0x02 : 0x01;
+ int H = Homing ? 1 : 0;
+ int L = HasLR ? 1 : 0;
+ int C = HasChain ? 1 : 0;
+ assert(IntRegs < 0 || FloatRegs < 0);
+ unsigned Reg, R;
+ if (IntRegs >= 0) {
+ Reg = IntRegs;
+ assert(Reg <= 7);
+ R = 0;
+ } else if (FloatRegs >= 0) {
+ Reg = FloatRegs;
+ assert(Reg < 7);
+ R = 1;
+ } else {
+ // No int or float regs stored (except possibly R11,LR)
+ Reg = 7;
+ R = 1;
+ }
+ info->PackedInfo |= Flag << 0;
+ info->PackedInfo |= (FuncLength & 0x7FF) << 2;
+ info->PackedInfo |= (Ret & 0x3) << 13;
+ info->PackedInfo |= H << 15;
+ info->PackedInfo |= Reg << 16;
+ info->PackedInfo |= R << 19;
+ info->PackedInfo |= L << 20;
+ info->PackedInfo |= C << 21;
+ assert(StackAdjust <= 0x3ff);
+ info->PackedInfo |= StackAdjust << 22;
+ return true;
+}
+
+// Populate the .xdata section. The format of .xdata on ARM is documented at
+// https://docs.microsoft.com/en-us/cpp/build/arm-exception-handling
+static void ARMEmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info,
+ bool TryPacked = true) {
+ // If this UNWIND_INFO already has a symbol, it's already been emitted.
+ if (info->Symbol)
+ return;
+ // If there's no unwind info here (not even a terminating UOP_End), the
+ // unwind info is considered bogus and skipped. If this was done in
+ // response to an explicit .seh_handlerdata, the associated trailing
+ // handler data is left orphaned in the xdata section.
+ if (info->empty()) {
+ info->EmitAttempted = true;
+ return;
+ }
+ if (info->EmitAttempted) {
+ // If we tried to emit unwind info before (due to an explicit
+ // .seh_handlerdata directive), but skipped it (because there was no
+ // valid information to emit at the time), and it later got valid unwind
+ // opcodes, we can't emit it here, because the trailing handler data
+ // was already emitted elsewhere in the xdata section.
+ streamer.getContext().reportError(
+ SMLoc(), "Earlier .seh_handlerdata for " + info->Function->getName() +
+ " skipped due to no unwind info at the time "
+ "(.seh_handlerdata too early?), but the function later "
+ "did get unwind info that can't be emitted");
+ return;
+ }
+
+ MCContext &context = streamer.getContext();
+ MCSymbol *Label = context.createTempSymbol();
+
+ streamer.emitValueToAlignment(4);
+ streamer.emitLabel(Label);
+ info->Symbol = Label;
+
+ if (!info->PrologEnd)
+ streamer.getContext().reportError(SMLoc(), "Prologue in " +
+ info->Function->getName() +
+ " not correctly terminated");
+
+ if (info->PrologEnd && !info->Fragment)
+ checkARMInstructions(streamer, info->Instructions, info->Begin,
+ info->PrologEnd, info->Function->getName(),
+ "prologue");
+ for (auto &I : info->EpilogMap) {
+ MCSymbol *EpilogStart = I.first;
+ auto &Epilog = I.second;
+ checkARMInstructions(streamer, Epilog.Instructions, EpilogStart, Epilog.End,
+ info->Function->getName(), "epilogue");
+ if (Epilog.Instructions.empty() ||
+ !isARMTerminator(Epilog.Instructions.back()))
+ streamer.getContext().reportError(
+ SMLoc(), "Epilogue in " + info->Function->getName() +
+ " not correctly terminated");
+ }
+
+ Optional<int64_t> RawFuncLength;
+ const MCExpr *FuncLengthExpr = nullptr;
+ if (!info->FuncletOrFuncEnd) {
+ report_fatal_error("FuncletOrFuncEnd not set");
+ } else {
+ // As the size of many thumb2 instructions isn't known until later,
+ // we can't always rely on being able to calculate the absolute
+ // length of the function here. If we can't calculate it, defer it
+ // to a relocation.
+ //
+ // In such a case, we won't know if the function is too long so that
+ // the unwind info would need to be split (but this isn't implemented
+ // anyway).
+ RawFuncLength =
+ GetOptionalAbsDifference(streamer, info->FuncletOrFuncEnd, info->Begin);
+ if (!RawFuncLength)
+ FuncLengthExpr =
+ GetSubDivExpr(streamer, info->FuncletOrFuncEnd, info->Begin, 2);
+ }
+ uint32_t FuncLength = 0;
+ if (RawFuncLength)
+ FuncLength = (uint32_t)*RawFuncLength / 2;
+ if (FuncLength > 0x3FFFF)
+ report_fatal_error("SEH unwind data splitting not yet implemented");
+ uint32_t PrologCodeBytes = ARMCountOfUnwindCodes(info->Instructions);
+ uint32_t TotalCodeBytes = PrologCodeBytes;
+
+ if (!info->HandlesExceptions && RawFuncLength && FuncLength <= 0x7ff &&
+ TryPacked) {
+ // No exception handlers; check if the prolog and epilog matches the
+ // patterns that can be described by the packed format. If we don't
+ // know the exact function length yet, we can't do this.
+
+ // info->Symbol was already set even if we didn't actually write any
+ // unwind info there. Keep using that as indicator that this unwind
+ // info has been generated already.
+
+ if (tryARMPackedUnwind(streamer, info, FuncLength))
+ return;
+ }
+
+ int PackedEpilogOffset =
+ checkARMPackedEpilog(streamer, info, PrologCodeBytes);
+
+ // Process epilogs.
+ MapVector<MCSymbol *, uint32_t> EpilogInfo;
+ // Epilogs processed so far.
+ std::vector<MCSymbol *> AddedEpilogs;
+
+ bool CanTweakProlog = true;
+ for (auto &I : info->EpilogMap) {
+ MCSymbol *EpilogStart = I.first;
+ auto &EpilogInstrs = I.second.Instructions;
+ uint32_t CodeBytes = ARMCountOfUnwindCodes(EpilogInstrs);
+
+ MCSymbol *MatchingEpilog =
+ FindMatchingEpilog(EpilogInstrs, AddedEpilogs, info);
+ int PrologOffset;
+ if (MatchingEpilog) {
+ assert(EpilogInfo.find(MatchingEpilog) != EpilogInfo.end() &&
+ "Duplicate epilog not found");
+ EpilogInfo[EpilogStart] = EpilogInfo.lookup(MatchingEpilog);
+ // Clear the unwind codes in the EpilogMap, so that they don't get output
+ // in the logic below.
+ EpilogInstrs.clear();
+ } else if ((PrologOffset = getARMOffsetInProlog(
+ info->Instructions, EpilogInstrs, CanTweakProlog)) >= 0) {
+ if (CanTweakProlog) {
+ // Replace the regular end opcode of the prolog with the one from the
+ // epilog.
+ info->Instructions.front() = EpilogInstrs.back();
+ // Later epilogs need a strict match for the end opcode.
+ CanTweakProlog = false;
+ }
+ EpilogInfo[EpilogStart] = PrologOffset;
+ // Clear the unwind codes in the EpilogMap, so that they don't get output
+ // in the logic below.
+ EpilogInstrs.clear();
+ } else {
+ EpilogInfo[EpilogStart] = TotalCodeBytes;
+ TotalCodeBytes += CodeBytes;
+ AddedEpilogs.push_back(EpilogStart);
+ }
+ }
+
+ // Code Words, Epilog count, F, E, X, Vers, Function Length
+ uint32_t row1 = 0x0;
+ uint32_t CodeWords = TotalCodeBytes / 4;
+ uint32_t CodeWordsMod = TotalCodeBytes % 4;
+ if (CodeWordsMod)
+ CodeWords++;
+ uint32_t EpilogCount =
+ PackedEpilogOffset >= 0 ? PackedEpilogOffset : info->EpilogMap.size();
+ bool ExtensionWord = EpilogCount > 31 || CodeWords > 15;
+ if (!ExtensionWord) {
+ row1 |= (EpilogCount & 0x1F) << 23;
+ row1 |= (CodeWords & 0x0F) << 28;
+ }
+ if (info->HandlesExceptions) // X
+ row1 |= 1 << 20;
+ if (PackedEpilogOffset >= 0) // E
+ row1 |= 1 << 21;
+ if (info->Fragment) // F
+ row1 |= 1 << 22;
+ row1 |= FuncLength & 0x3FFFF;
+ if (RawFuncLength)
+ streamer.emitInt32(row1);
+ else
+ streamer.emitValue(
+ MCBinaryExpr::createOr(FuncLengthExpr,
+ MCConstantExpr::create(row1, context), context),
+ 4);
+
+ // Extended Code Words, Extended Epilog Count
+ if (ExtensionWord) {
+ // FIXME: We should be able to split unwind info into multiple sections.
+ if (CodeWords > 0xFF || EpilogCount > 0xFFFF)
+ report_fatal_error("SEH unwind data splitting not yet implemented");
+ uint32_t row2 = 0x0;
+ row2 |= (CodeWords & 0xFF) << 16;
+ row2 |= (EpilogCount & 0xFFFF);
+ streamer.emitInt32(row2);
+ }
+
+ if (PackedEpilogOffset < 0) {
+ // Epilog Start Index, Epilog Start Offset
+ for (auto &I : EpilogInfo) {
+ MCSymbol *EpilogStart = I.first;
+ uint32_t EpilogIndex = I.second;
+
+ Optional<int64_t> MaybeEpilogOffset =
+ GetOptionalAbsDifference(streamer, EpilogStart, info->Begin);
+ const MCExpr *OffsetExpr = nullptr;
+ uint32_t EpilogOffset = 0;
+ if (MaybeEpilogOffset)
+ EpilogOffset = *MaybeEpilogOffset / 2;
+ else
+ OffsetExpr = GetSubDivExpr(streamer, EpilogStart, info->Begin, 2);
+
+ assert(info->EpilogMap.find(EpilogStart) != info->EpilogMap.end());
+ unsigned Condition = info->EpilogMap[EpilogStart].Condition;
+ assert(Condition <= 0xf);
+
+ uint32_t row3 = EpilogOffset;
+ row3 |= Condition << 20;
+ row3 |= (EpilogIndex & 0x3FF) << 24;
+ if (MaybeEpilogOffset)
+ streamer.emitInt32(row3);
+ else
+ streamer.emitValue(
+ MCBinaryExpr::createOr(
+ OffsetExpr, MCConstantExpr::create(row3, context), context),
+ 4);
+ }
+ }
+
+ // Emit prolog unwind instructions (in reverse order).
+ uint8_t numInst = info->Instructions.size();
+ for (uint8_t c = 0; c < numInst; ++c) {
+ WinEH::Instruction inst = info->Instructions.back();
+ info->Instructions.pop_back();
+ ARMEmitUnwindCode(streamer, inst);
+ }
+
+ // Emit epilog unwind instructions
+ for (auto &I : info->EpilogMap) {
+ auto &EpilogInstrs = I.second.Instructions;
+ for (uint32_t i = 0; i < EpilogInstrs.size(); i++) {
+ WinEH::Instruction inst = EpilogInstrs[i];
+ ARMEmitUnwindCode(streamer, inst);
+ }
+ }
+
+ int32_t BytesMod = CodeWords * 4 - TotalCodeBytes;
+ assert(BytesMod >= 0);
+ for (int i = 0; i < BytesMod; i++)
+ streamer.emitInt8(0xFB);
+
+ if (info->HandlesExceptions)
+ streamer.emitValue(
+ MCSymbolRefExpr::create(info->ExceptionHandler,
+ MCSymbolRefExpr::VK_COFF_IMGREL32, context),
+ 4);
+}
+
+static void ARMEmitRuntimeFunction(MCStreamer &streamer,
+ const WinEH::FrameInfo *info) {
MCContext &context = streamer.getContext();
streamer.emitValueToAlignment(4);
@@ -1088,7 +2227,7 @@ void llvm::Win64EH::ARM64UnwindEmitter::Emit(MCStreamer &Streamer) const {
if (Info->empty())
continue;
MCSection *XData = Streamer.getAssociatedXDataSection(CFI->TextSection);
- Streamer.SwitchSection(XData);
+ Streamer.switchSection(XData);
ARM64EmitUnwindInfo(Streamer, Info);
}
@@ -1101,8 +2240,8 @@ void llvm::Win64EH::ARM64UnwindEmitter::Emit(MCStreamer &Streamer) const {
if (!Info->Symbol)
continue;
MCSection *PData = Streamer.getAssociatedPDataSection(CFI->TextSection);
- Streamer.SwitchSection(PData);
- ARM64EmitRuntimeFunction(Streamer, Info);
+ Streamer.switchSection(PData);
+ ARMEmitRuntimeFunction(Streamer, Info);
}
}
@@ -1116,12 +2255,57 @@ void llvm::Win64EH::ARM64UnwindEmitter::EmitUnwindInfo(MCStreamer &Streamer,
// end hasn't been marked yet, the xdata function length won't cover the
// whole function, only up to this point.
if (!info->FuncletOrFuncEnd) {
- Streamer.SwitchSection(info->TextSection);
+ Streamer.switchSection(info->TextSection);
info->FuncletOrFuncEnd = Streamer.emitCFILabel();
}
// Switch sections (the static function above is meant to be called from
// here and from Emit().
MCSection *XData = Streamer.getAssociatedXDataSection(info->TextSection);
- Streamer.SwitchSection(XData);
+ Streamer.switchSection(XData);
ARM64EmitUnwindInfo(Streamer, info, /* TryPacked = */ !HandlerData);
}
+
+void llvm::Win64EH::ARMUnwindEmitter::Emit(MCStreamer &Streamer) const {
+ // Emit the unwind info structs first.
+ for (const auto &CFI : Streamer.getWinFrameInfos()) {
+ WinEH::FrameInfo *Info = CFI.get();
+ if (Info->empty())
+ continue;
+ MCSection *XData = Streamer.getAssociatedXDataSection(CFI->TextSection);
+ Streamer.switchSection(XData);
+ ARMEmitUnwindInfo(Streamer, Info);
+ }
+
+ // Now emit RUNTIME_FUNCTION entries.
+ for (const auto &CFI : Streamer.getWinFrameInfos()) {
+ WinEH::FrameInfo *Info = CFI.get();
+ // ARMEmitUnwindInfo above clears the info struct, so we can't check
+ // empty here. But if a Symbol is set, we should create the corresponding
+ // pdata entry.
+ if (!Info->Symbol)
+ continue;
+ MCSection *PData = Streamer.getAssociatedPDataSection(CFI->TextSection);
+ Streamer.switchSection(PData);
+ ARMEmitRuntimeFunction(Streamer, Info);
+ }
+}
+
+void llvm::Win64EH::ARMUnwindEmitter::EmitUnwindInfo(MCStreamer &Streamer,
+ WinEH::FrameInfo *info,
+ bool HandlerData) const {
+ // Called if there's an .seh_handlerdata directive before the end of the
+ // function. This forces writing the xdata record already here - and
+ // in this case, the function isn't actually ended already, but the xdata
+ // record needs to know the function length. In these cases, if the funclet
+ // end hasn't been marked yet, the xdata function length won't cover the
+ // whole function, only up to this point.
+ if (!info->FuncletOrFuncEnd) {
+ Streamer.switchSection(info->TextSection);
+ info->FuncletOrFuncEnd = Streamer.emitCFILabel();
+ }
+ // Switch sections (the static function above is meant to be called from
+ // here and from Emit().
+ MCSection *XData = Streamer.getAssociatedXDataSection(info->TextSection);
+ Streamer.switchSection(XData);
+ ARMEmitUnwindInfo(Streamer, info, /* TryPacked = */ !HandlerData);
+}
diff --git a/llvm/lib/MC/MCWinCOFFStreamer.cpp b/llvm/lib/MC/MCWinCOFFStreamer.cpp
index 0dfe5a5c2bdb..ad883131eae1 100644
--- a/llvm/lib/MC/MCWinCOFFStreamer.cpp
+++ b/llvm/lib/MC/MCWinCOFFStreamer.cpp
@@ -10,6 +10,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/MC/MCWinCOFFStreamer.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Triple.h"
@@ -27,14 +28,12 @@
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSymbolCOFF.h"
-#include "llvm/MC/MCWinCOFFStreamer.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
-#include <cassert>
#include <cstdint>
using namespace llvm;
@@ -71,16 +70,16 @@ void MCWinCOFFStreamer::initSections(bool NoExecStack,
// FIXME: this is identical to the ELF one.
// This emulates the same behavior of GNU as. This makes it easier
// to compare the output as the major sections are in the same order.
- SwitchSection(getContext().getObjectFileInfo()->getTextSection());
+ switchSection(getContext().getObjectFileInfo()->getTextSection());
emitCodeAlignment(4, &STI);
- SwitchSection(getContext().getObjectFileInfo()->getDataSection());
+ switchSection(getContext().getObjectFileInfo()->getDataSection());
emitCodeAlignment(4, &STI);
- SwitchSection(getContext().getObjectFileInfo()->getBSSSection());
+ switchSection(getContext().getObjectFileInfo()->getBSSSection());
emitCodeAlignment(4, &STI);
- SwitchSection(getContext().getObjectFileInfo()->getTextSection());
+ switchSection(getContext().getObjectFileInfo()->getTextSection());
}
void MCWinCOFFStreamer::emitLabel(MCSymbol *S, SMLoc Loc) {
@@ -134,7 +133,7 @@ void MCWinCOFFStreamer::emitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
llvm_unreachable("not implemented");
}
-void MCWinCOFFStreamer::BeginCOFFSymbolDef(MCSymbol const *S) {
+void MCWinCOFFStreamer::beginCOFFSymbolDef(MCSymbol const *S) {
auto *Symbol = cast<MCSymbolCOFF>(S);
if (CurSymbol)
Error("starting a new symbol definition without completing the "
@@ -142,7 +141,7 @@ void MCWinCOFFStreamer::BeginCOFFSymbolDef(MCSymbol const *S) {
CurSymbol = Symbol;
}
-void MCWinCOFFStreamer::EmitCOFFSymbolStorageClass(int StorageClass) {
+void MCWinCOFFStreamer::emitCOFFSymbolStorageClass(int StorageClass) {
if (!CurSymbol) {
Error("storage class specified outside of symbol definition");
return;
@@ -158,7 +157,7 @@ void MCWinCOFFStreamer::EmitCOFFSymbolStorageClass(int StorageClass) {
cast<MCSymbolCOFF>(CurSymbol)->setClass((uint16_t)StorageClass);
}
-void MCWinCOFFStreamer::EmitCOFFSymbolType(int Type) {
+void MCWinCOFFStreamer::emitCOFFSymbolType(int Type) {
if (!CurSymbol) {
Error("symbol type specified outside of a symbol definition");
return;
@@ -173,13 +172,13 @@ void MCWinCOFFStreamer::EmitCOFFSymbolType(int Type) {
cast<MCSymbolCOFF>(CurSymbol)->setType((uint16_t)Type);
}
-void MCWinCOFFStreamer::EndCOFFSymbolDef() {
+void MCWinCOFFStreamer::endCOFFSymbolDef() {
if (!CurSymbol)
Error("ending symbol definition without starting one");
CurSymbol = nullptr;
}
-void MCWinCOFFStreamer::EmitCOFFSafeSEH(MCSymbol const *Symbol) {
+void MCWinCOFFStreamer::emitCOFFSafeSEH(MCSymbol const *Symbol) {
// SafeSEH is a feature specific to 32-bit x86. It does not exist (and is
// unnecessary) on all platforms which use table-based exception dispatch.
if (getContext().getTargetTriple().getArch() != Triple::x86)
@@ -205,7 +204,7 @@ void MCWinCOFFStreamer::EmitCOFFSafeSEH(MCSymbol const *Symbol) {
<< COFF::SCT_COMPLEX_TYPE_SHIFT);
}
-void MCWinCOFFStreamer::EmitCOFFSymbolIndex(MCSymbol const *Symbol) {
+void MCWinCOFFStreamer::emitCOFFSymbolIndex(MCSymbol const *Symbol) {
MCSection *Sec = getCurrentSectionOnly();
getAssembler().registerSection(*Sec);
if (Sec->getAlignment() < 4)
@@ -216,7 +215,7 @@ void MCWinCOFFStreamer::EmitCOFFSymbolIndex(MCSymbol const *Symbol) {
getAssembler().registerSymbol(*Symbol);
}
-void MCWinCOFFStreamer::EmitCOFFSectionIndex(const MCSymbol *Symbol) {
+void MCWinCOFFStreamer::emitCOFFSectionIndex(const MCSymbol *Symbol) {
visitUsedSymbol(*Symbol);
MCDataFragment *DF = getOrCreateDataFragment();
const MCSymbolRefExpr *SRE = MCSymbolRefExpr::create(Symbol, getContext());
@@ -225,7 +224,7 @@ void MCWinCOFFStreamer::EmitCOFFSectionIndex(const MCSymbol *Symbol) {
DF->getContents().resize(DF->getContents().size() + 2, 0);
}
-void MCWinCOFFStreamer::EmitCOFFSecRel32(const MCSymbol *Symbol,
+void MCWinCOFFStreamer::emitCOFFSecRel32(const MCSymbol *Symbol,
uint64_t Offset) {
visitUsedSymbol(*Symbol);
MCDataFragment *DF = getOrCreateDataFragment();
@@ -243,7 +242,7 @@ void MCWinCOFFStreamer::EmitCOFFSecRel32(const MCSymbol *Symbol,
DF->getContents().resize(DF->getContents().size() + 4, 0);
}
-void MCWinCOFFStreamer::EmitCOFFImgRel32(const MCSymbol *Symbol,
+void MCWinCOFFStreamer::emitCOFFImgRel32(const MCSymbol *Symbol,
int64_t Offset) {
visitUsedSymbol(*Symbol);
MCDataFragment *DF = getOrCreateDataFragment();
@@ -287,10 +286,10 @@ void MCWinCOFFStreamer::emitCommonSymbol(MCSymbol *S, uint64_t Size,
OS << " -aligncomm:\"" << Symbol->getName() << "\","
<< Log2_32_Ceil(ByteAlignment);
- PushSection();
- SwitchSection(MFI->getDrectveSection());
+ pushSection();
+ switchSection(MFI->getDrectveSection());
emitBytes(Directive);
- PopSection();
+ popSection();
}
}
@@ -299,13 +298,13 @@ void MCWinCOFFStreamer::emitLocalCommonSymbol(MCSymbol *S, uint64_t Size,
auto *Symbol = cast<MCSymbolCOFF>(S);
MCSection *Section = getContext().getObjectFileInfo()->getBSSSection();
- PushSection();
- SwitchSection(Section);
+ pushSection();
+ switchSection(Section);
emitValueToAlignment(ByteAlignment, 0, 1, 0);
emitLabel(Symbol);
Symbol->setExternal(false);
emitZeros(Size);
- PopSection();
+ popSection();
}
void MCWinCOFFStreamer::emitWeakReference(MCSymbol *AliasS,
@@ -334,7 +333,7 @@ void MCWinCOFFStreamer::emitIdent(StringRef IdentString) {
llvm_unreachable("not implemented");
}
-void MCWinCOFFStreamer::EmitWinEHHandlerData(SMLoc Loc) {
+void MCWinCOFFStreamer::emitWinEHHandlerData(SMLoc Loc) {
llvm_unreachable("not implemented");
}
diff --git a/llvm/lib/MC/MCWinEH.cpp b/llvm/lib/MC/MCWinEH.cpp
index e58a0b2cf654..1a6d5a3b562e 100644
--- a/llvm/lib/MC/MCWinEH.cpp
+++ b/llvm/lib/MC/MCWinEH.cpp
@@ -7,18 +7,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCWinEH.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/BinaryFormat/COFF.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCObjectFileInfo.h"
-#include "llvm/MC/MCSectionCOFF.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
namespace llvm {
namespace WinEH {
-UnwindEmitter::~UnwindEmitter() {}
+UnwindEmitter::~UnwindEmitter() = default;
}
}
diff --git a/llvm/lib/MC/MCXCOFFStreamer.cpp b/llvm/lib/MC/MCXCOFFStreamer.cpp
index 90604782de13..a4a42279d6e2 100644
--- a/llvm/lib/MC/MCXCOFFStreamer.cpp
+++ b/llvm/lib/MC/MCXCOFFStreamer.cpp
@@ -13,12 +13,14 @@
#include "llvm/MC/MCXCOFFStreamer.h"
#include "llvm/BinaryFormat/XCOFF.h"
#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionXCOFF.h"
#include "llvm/MC/MCSymbolXCOFF.h"
#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/Casting.h"
using namespace llvm;
@@ -54,6 +56,9 @@ bool MCXCOFFStreamer::emitSymbolAttribute(MCSymbol *Sym,
case llvm::MCSA_Protected:
Symbol->setVisibilityType(XCOFF::SYM_V_PROTECTED);
break;
+ case llvm::MCSA_Exported:
+ Symbol->setVisibilityType(XCOFF::SYM_V_EXPORTED);
+ break;
default:
report_fatal_error("Not implemented yet.");
}
diff --git a/llvm/lib/MC/MachObjectWriter.cpp b/llvm/lib/MC/MachObjectWriter.cpp
index 56bb03ad8d42..78d0d9cec556 100644
--- a/llvm/lib/MC/MachObjectWriter.cpp
+++ b/llvm/lib/MC/MachObjectWriter.cpp
@@ -19,6 +19,7 @@
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCFragment.h"
#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionMachO.h"
@@ -29,6 +30,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LEB128.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -751,6 +753,24 @@ static MachO::LoadCommandType getLCFromMCVM(MCVersionMinType Type) {
llvm_unreachable("Invalid mc version min type");
}
+// Encode addrsig data as symbol indexes in variable length encoding.
+void MachObjectWriter::writeAddrsigSection(MCAssembler &Asm) {
+ MCSection *AddrSigSection =
+ Asm.getContext().getObjectFileInfo()->getAddrSigSection();
+ MCSection::FragmentListType &fragmentList = AddrSigSection->getFragmentList();
+ if (!fragmentList.size())
+ return;
+
+ assert(fragmentList.size() == 1);
+ MCFragment *pFragment = &*fragmentList.begin();
+ MCDataFragment *pDataFragment = dyn_cast_or_null<MCDataFragment>(pFragment);
+ assert(pDataFragment);
+
+ raw_svector_ostream OS(pDataFragment->getContents());
+ for (const MCSymbol *sym : this->getAddrsigSyms())
+ encodeULEB128(sym->getIndex(), OS);
+}
+
uint64_t MachObjectWriter::writeObject(MCAssembler &Asm,
const MCAsmLayout &Layout) {
uint64_t StartOffset = W.OS.tell();
@@ -758,6 +778,7 @@ uint64_t MachObjectWriter::writeObject(MCAssembler &Asm,
// Compute symbol table information and bind symbol indices.
computeSymbolTable(Asm, LocalSymbolData, ExternalSymbolData,
UndefinedSymbolData);
+ writeAddrsigSection(Asm);
if (!Asm.CGProfile.empty()) {
MCSection *CGProfileSection = Asm.getContext().getMachOSection(
@@ -894,8 +915,8 @@ uint64_t MachObjectWriter::writeObject(MCAssembler &Asm,
[&](const MCAssembler::VersionInfoType &VersionInfo) {
auto EncodeVersion = [](VersionTuple V) -> uint32_t {
assert(!V.empty() && "empty version");
- unsigned Update = V.getSubminor().getValueOr(0);
- unsigned Minor = V.getMinor().getValueOr(0);
+ unsigned Update = V.getSubminor().value_or(0);
+ unsigned Minor = V.getMinor().value_or(0);
assert(Update < 256 && "unencodable update target version");
assert(Minor < 256 && "unencodable minor target version");
assert(V.getMajor() < 65536 && "unencodable major target version");
diff --git a/llvm/lib/MC/SPIRVObjectWriter.cpp b/llvm/lib/MC/SPIRVObjectWriter.cpp
new file mode 100644
index 000000000000..4a07740e8d14
--- /dev/null
+++ b/llvm/lib/MC/SPIRVObjectWriter.cpp
@@ -0,0 +1,76 @@
+//===- llvm/MC/MCSPIRVObjectWriter.cpp - SPIR-V Object Writer ----*- C++ *-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCSPIRVObjectWriter.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/EndianStream.h"
+
+using namespace llvm;
+
+class SPIRVObjectWriter : public MCObjectWriter {
+ ::support::endian::Writer W;
+
+ /// The target specific SPIR-V writer instance.
+ std::unique_ptr<MCSPIRVObjectTargetWriter> TargetObjectWriter;
+
+public:
+ SPIRVObjectWriter(std::unique_ptr<MCSPIRVObjectTargetWriter> MOTW,
+ raw_pwrite_stream &OS)
+ : W(OS, support::little), TargetObjectWriter(std::move(MOTW)) {}
+
+ ~SPIRVObjectWriter() override {}
+
+private:
+ void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFragment *Fragment, const MCFixup &Fixup,
+ MCValue Target, uint64_t &FixedValue) override {}
+
+ void executePostLayoutBinding(MCAssembler &Asm,
+ const MCAsmLayout &Layout) override {}
+
+ uint64_t writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
+ void writeHeader(const MCAssembler &Asm);
+};
+
+void SPIRVObjectWriter::writeHeader(const MCAssembler &Asm) {
+ constexpr uint32_t MagicNumber = 0x07230203;
+
+ // TODO: set the version on a min-necessary basis (just like the translator
+ // does) requires some refactoring of MCAssembler::VersionInfoType.
+ constexpr uint32_t Major = 1;
+ constexpr uint32_t Minor = 0;
+ constexpr uint32_t VersionNumber = 0 | (Major << 16) | (Minor << 8);
+ // TODO: check if we could use anything other than 0 (spec allows).
+ constexpr uint32_t GeneratorMagicNumber = 0;
+ // TODO: do not hardcode this as well.
+ constexpr uint32_t Bound = 900;
+ constexpr uint32_t Schema = 0;
+
+ W.write<uint32_t>(MagicNumber);
+ W.write<uint32_t>(VersionNumber);
+ W.write<uint32_t>(GeneratorMagicNumber);
+ W.write<uint32_t>(Bound);
+ W.write<uint32_t>(Schema);
+}
+
+uint64_t SPIRVObjectWriter::writeObject(MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ uint64_t StartOffset = W.OS.tell();
+ writeHeader(Asm);
+ for (const MCSection &S : Asm)
+ Asm.writeSectionData(W.OS, &S, Layout);
+ return W.OS.tell() - StartOffset;
+}
+
+std::unique_ptr<MCObjectWriter>
+llvm::createSPIRVObjectWriter(std::unique_ptr<MCSPIRVObjectTargetWriter> MOTW,
+ raw_pwrite_stream &OS) {
+ return std::make_unique<SPIRVObjectWriter>(std::move(MOTW), OS);
+}
diff --git a/llvm/lib/MC/SubtargetFeature.cpp b/llvm/lib/MC/SubtargetFeature.cpp
index 3155adcf2674..d53cc2f7e37b 100644
--- a/llvm/lib/MC/SubtargetFeature.cpp
+++ b/llvm/lib/MC/SubtargetFeature.cpp
@@ -20,10 +20,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
-#include <cassert>
-#include <cstddef>
-#include <cstring>
-#include <iterator>
#include <string>
#include <vector>
diff --git a/llvm/lib/MC/TargetRegistry.cpp b/llvm/lib/MC/TargetRegistry.cpp
index 09684b1e5ad2..57444fd23784 100644
--- a/llvm/lib/MC/TargetRegistry.cpp
+++ b/llvm/lib/MC/TargetRegistry.cpp
@@ -33,7 +33,7 @@ const Target *TargetRegistry::lookupTarget(const std::string &ArchName,
[&](const Target &T) { return ArchName == T.getName(); });
if (I == targets().end()) {
- Error = "error: invalid target '" + ArchName + "'.\n";
+ Error = "invalid target '" + ArchName + "'.\n";
return nullptr;
}
@@ -49,7 +49,7 @@ const Target *TargetRegistry::lookupTarget(const std::string &ArchName,
std::string TempError;
TheTarget = TargetRegistry::lookupTarget(TheTriple.getTriple(), TempError);
if (!TheTarget) {
- Error = ": error: unable to get target for '"
+ Error = "unable to get target for '"
+ TheTriple.getTriple()
+ "', see --version and --triple.\n";
return nullptr;
diff --git a/llvm/lib/MC/WasmObjectWriter.cpp b/llvm/lib/MC/WasmObjectWriter.cpp
index 636c1d238932..7cc11d24f286 100644
--- a/llvm/lib/MC/WasmObjectWriter.cpp
+++ b/llvm/lib/MC/WasmObjectWriter.cpp
@@ -11,7 +11,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/BinaryFormat/Wasm.h"
#include "llvm/BinaryFormat/WasmTraits.h"
#include "llvm/Config/llvm-config.h"
@@ -31,7 +30,6 @@
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LEB128.h"
-#include "llvm/Support/StringSaver.h"
#include <vector>
using namespace llvm;
@@ -125,12 +123,11 @@ struct WasmCustomSection {
StringRef Name;
MCSectionWasm *Section;
- uint32_t OutputContentsOffset;
- uint32_t OutputIndex;
+ uint32_t OutputContentsOffset = 0;
+ uint32_t OutputIndex = InvalidIndex;
WasmCustomSection(StringRef Name, MCSectionWasm *Section)
- : Name(Name), Section(Section), OutputContentsOffset(0),
- OutputIndex(InvalidIndex) {}
+ : Name(Name), Section(Section) {}
};
#if !defined(NDEBUG)
@@ -140,36 +137,58 @@ raw_ostream &operator<<(raw_ostream &OS, const WasmRelocationEntry &Rel) {
}
#endif
-// Write X as an (unsigned) LEB value at offset Offset in Stream, padded
+// Write Value as an (unsigned) LEB value at offset Offset in Stream, padded
// to allow patching.
-template <int W>
-void writePatchableLEB(raw_pwrite_stream &Stream, uint64_t X, uint64_t Offset) {
+template <typename T, int W>
+void writePatchableULEB(raw_pwrite_stream &Stream, T Value, uint64_t Offset) {
uint8_t Buffer[W];
- unsigned SizeLen = encodeULEB128(X, Buffer, W);
+ unsigned SizeLen = encodeULEB128(Value, Buffer, W);
assert(SizeLen == W);
Stream.pwrite((char *)Buffer, SizeLen, Offset);
}
-// Write X as an signed LEB value at offset Offset in Stream, padded
+// Write Value as an signed LEB value at offset Offset in Stream, padded
// to allow patching.
-template <int W>
-void writePatchableSLEB(raw_pwrite_stream &Stream, int64_t X, uint64_t Offset) {
+template <typename T, int W>
+void writePatchableSLEB(raw_pwrite_stream &Stream, T Value, uint64_t Offset) {
uint8_t Buffer[W];
- unsigned SizeLen = encodeSLEB128(X, Buffer, W);
+ unsigned SizeLen = encodeSLEB128(Value, Buffer, W);
assert(SizeLen == W);
Stream.pwrite((char *)Buffer, SizeLen, Offset);
}
-// Write X as a plain integer value at offset Offset in Stream.
-static void patchI32(raw_pwrite_stream &Stream, uint32_t X, uint64_t Offset) {
+static void writePatchableU32(raw_pwrite_stream &Stream, uint32_t Value,
+ uint64_t Offset) {
+ writePatchableULEB<uint32_t, 5>(Stream, Value, Offset);
+}
+
+static void writePatchableS32(raw_pwrite_stream &Stream, int32_t Value,
+ uint64_t Offset) {
+ writePatchableSLEB<int32_t, 5>(Stream, Value, Offset);
+}
+
+static void writePatchableU64(raw_pwrite_stream &Stream, uint64_t Value,
+ uint64_t Offset) {
+ writePatchableSLEB<uint64_t, 10>(Stream, Value, Offset);
+}
+
+static void writePatchableS64(raw_pwrite_stream &Stream, int64_t Value,
+ uint64_t Offset) {
+ writePatchableSLEB<int64_t, 10>(Stream, Value, Offset);
+}
+
+// Write Value as a plain integer value at offset Offset in Stream.
+static void patchI32(raw_pwrite_stream &Stream, uint32_t Value,
+ uint64_t Offset) {
uint8_t Buffer[4];
- support::endian::write32le(Buffer, X);
+ support::endian::write32le(Buffer, Value);
Stream.pwrite((char *)Buffer, sizeof(Buffer), Offset);
}
-static void patchI64(raw_pwrite_stream &Stream, uint64_t X, uint64_t Offset) {
+static void patchI64(raw_pwrite_stream &Stream, uint64_t Value,
+ uint64_t Offset) {
uint8_t Buffer[8];
- support::endian::write64le(Buffer, X);
+ support::endian::write64le(Buffer, Value);
Stream.pwrite((char *)Buffer, sizeof(Buffer), Offset);
}
@@ -423,8 +442,8 @@ void WasmObjectWriter::endSection(SectionBookkeeping &Section) {
// Write the final section size to the payload_len field, which follows
// the section id byte.
- writePatchableLEB<5>(static_cast<raw_pwrite_stream &>(W->OS), Size,
- Section.SizeOffset);
+ writePatchableU32(static_cast<raw_pwrite_stream &>(W->OS), Size,
+ Section.SizeOffset);
}
// Emit the Wasm header.
@@ -755,7 +774,7 @@ void WasmObjectWriter::applyRelocations(
RelEntry.Offset;
LLVM_DEBUG(dbgs() << "applyRelocation: " << RelEntry << "\n");
- auto Value = getProvisionalValue(RelEntry, Layout);
+ uint64_t Value = getProvisionalValue(RelEntry, Layout);
switch (RelEntry.Type) {
case wasm::R_WASM_FUNCTION_INDEX_LEB:
@@ -764,10 +783,10 @@ void WasmObjectWriter::applyRelocations(
case wasm::R_WASM_MEMORY_ADDR_LEB:
case wasm::R_WASM_TAG_INDEX_LEB:
case wasm::R_WASM_TABLE_NUMBER_LEB:
- writePatchableLEB<5>(Stream, Value, Offset);
+ writePatchableU32(Stream, Value, Offset);
break;
case wasm::R_WASM_MEMORY_ADDR_LEB64:
- writePatchableLEB<10>(Stream, Value, Offset);
+ writePatchableU64(Stream, Value, Offset);
break;
case wasm::R_WASM_TABLE_INDEX_I32:
case wasm::R_WASM_MEMORY_ADDR_I32:
@@ -787,14 +806,14 @@ void WasmObjectWriter::applyRelocations(
case wasm::R_WASM_MEMORY_ADDR_SLEB:
case wasm::R_WASM_MEMORY_ADDR_REL_SLEB:
case wasm::R_WASM_MEMORY_ADDR_TLS_SLEB:
- writePatchableSLEB<5>(Stream, Value, Offset);
+ writePatchableS32(Stream, Value, Offset);
break;
case wasm::R_WASM_TABLE_INDEX_SLEB64:
case wasm::R_WASM_TABLE_INDEX_REL_SLEB64:
case wasm::R_WASM_MEMORY_ADDR_SLEB64:
case wasm::R_WASM_MEMORY_ADDR_REL_SLEB64:
case wasm::R_WASM_MEMORY_ADDR_TLS_SLEB64:
- writePatchableSLEB<10>(Stream, Value, Offset);
+ writePatchableS64(Stream, Value, Offset);
break;
default:
llvm_unreachable("invalid relocation type");
@@ -912,25 +931,29 @@ void WasmObjectWriter::writeGlobalSection(ArrayRef<wasm::WasmGlobal> Globals) {
for (const wasm::WasmGlobal &Global : Globals) {
encodeULEB128(Global.Type.Type, W->OS);
W->OS << char(Global.Type.Mutable);
- W->OS << char(Global.InitExpr.Opcode);
- switch (Global.Type.Type) {
- case wasm::WASM_TYPE_I32:
- encodeSLEB128(0, W->OS);
- break;
- case wasm::WASM_TYPE_I64:
- encodeSLEB128(0, W->OS);
- break;
- case wasm::WASM_TYPE_F32:
- writeI32(0);
- break;
- case wasm::WASM_TYPE_F64:
- writeI64(0);
- break;
- case wasm::WASM_TYPE_EXTERNREF:
- writeValueType(wasm::ValType::EXTERNREF);
- break;
- default:
- llvm_unreachable("unexpected type");
+ if (Global.InitExpr.Extended) {
+ llvm_unreachable("extected init expressions not supported");
+ } else {
+ W->OS << char(Global.InitExpr.Inst.Opcode);
+ switch (Global.Type.Type) {
+ case wasm::WASM_TYPE_I32:
+ encodeSLEB128(0, W->OS);
+ break;
+ case wasm::WASM_TYPE_I64:
+ encodeSLEB128(0, W->OS);
+ break;
+ case wasm::WASM_TYPE_F32:
+ writeI32(0);
+ break;
+ case wasm::WASM_TYPE_F64:
+ writeI64(0);
+ break;
+ case wasm::WASM_TYPE_EXTERNREF:
+ writeValueType(wasm::ValType::EXTERNREF);
+ break;
+ default:
+ llvm_unreachable("unexpected type");
+ }
}
W->OS << char(wasm::WASM_OPCODE_END);
}
@@ -1547,9 +1570,9 @@ uint64_t WasmObjectWriter::writeOneObject(MCAssembler &Asm,
continue;
const auto &WS = static_cast<const MCSymbolWasm &>(S);
- LLVM_DEBUG(dbgs()
- << "MCSymbol: "
- << toString(WS.getType().getValueOr(wasm::WASM_SYMBOL_TYPE_DATA))
+ LLVM_DEBUG(
+ dbgs() << "MCSymbol: "
+ << toString(WS.getType().value_or(wasm::WASM_SYMBOL_TYPE_DATA))
<< " '" << S << "'"
<< " isDefined=" << S.isDefined() << " isExternal="
<< S.isExternal() << " isTemporary=" << S.isTemporary()
@@ -1639,21 +1662,22 @@ uint64_t WasmObjectWriter::writeOneObject(MCAssembler &Asm,
wasm::WasmGlobal Global;
Global.Type = WS.getGlobalType();
Global.Index = NumGlobalImports + Globals.size();
+ Global.InitExpr.Extended = false;
switch (Global.Type.Type) {
case wasm::WASM_TYPE_I32:
- Global.InitExpr.Opcode = wasm::WASM_OPCODE_I32_CONST;
+ Global.InitExpr.Inst.Opcode = wasm::WASM_OPCODE_I32_CONST;
break;
case wasm::WASM_TYPE_I64:
- Global.InitExpr.Opcode = wasm::WASM_OPCODE_I64_CONST;
+ Global.InitExpr.Inst.Opcode = wasm::WASM_OPCODE_I64_CONST;
break;
case wasm::WASM_TYPE_F32:
- Global.InitExpr.Opcode = wasm::WASM_OPCODE_F32_CONST;
+ Global.InitExpr.Inst.Opcode = wasm::WASM_OPCODE_F32_CONST;
break;
case wasm::WASM_TYPE_F64:
- Global.InitExpr.Opcode = wasm::WASM_OPCODE_F64_CONST;
+ Global.InitExpr.Inst.Opcode = wasm::WASM_OPCODE_F64_CONST;
break;
case wasm::WASM_TYPE_EXTERNREF:
- Global.InitExpr.Opcode = wasm::WASM_OPCODE_REF_NULL;
+ Global.InitExpr.Inst.Opcode = wasm::WASM_OPCODE_REF_NULL;
break;
default:
llvm_unreachable("unexpected type");
@@ -1785,7 +1809,7 @@ uint64_t WasmObjectWriter::writeOneObject(MCAssembler &Asm,
wasm::WasmSymbolInfo Info;
Info.Name = WS.getName();
- Info.Kind = WS.getType().getValueOr(wasm::WASM_SYMBOL_TYPE_DATA);
+ Info.Kind = WS.getType().value_or(wasm::WASM_SYMBOL_TYPE_DATA);
Info.Flags = Flags;
if (!WS.isData()) {
assert(WasmIndices.count(&WS) > 0);
@@ -1852,7 +1876,8 @@ uint64_t WasmObjectWriter::writeOneObject(MCAssembler &Asm,
const MCFragment &AlignFrag = *IT;
if (AlignFrag.getKind() != MCFragment::FT_Align)
report_fatal_error(".init_array section should be aligned");
- if (cast<MCAlignFragment>(AlignFrag).getAlignment() != (is64Bit() ? 8 : 4))
+ if (cast<MCAlignFragment>(AlignFrag).getAlignment() !=
+ Align(is64Bit() ? 8 : 4))
report_fatal_error(".init_array section should be aligned for pointers");
const MCFragment &Frag = *std::next(IT);
diff --git a/llvm/lib/MC/WinCOFFObjectWriter.cpp b/llvm/lib/MC/WinCOFFObjectWriter.cpp
index 73c687331d30..33e496b7a864 100644
--- a/llvm/lib/MC/WinCOFFObjectWriter.cpp
+++ b/llvm/lib/MC/WinCOFFObjectWriter.cpp
@@ -41,7 +41,6 @@
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
-#include <cstddef>
#include <cstdint>
#include <cstring>
#include <ctime>
@@ -155,9 +154,7 @@ public:
bool UseBigObj;
bool UseOffsetLabels = false;
- bool EmitAddrsigSection = false;
MCSectionCOFF *AddrsigSection;
- std::vector<const MCSymbol *> AddrsigSyms;
MCSectionCOFF *CGProfileSection = nullptr;
@@ -221,11 +218,6 @@ public:
void assignSectionNumbers();
void assignFileOffsets(MCAssembler &Asm, const MCAsmLayout &Layout);
- void emitAddrsigSection() override { EmitAddrsigSection = true; }
- void addAddrsigSymbol(const MCSymbol *Sym) override {
- AddrsigSyms.push_back(Sym);
- }
-
uint64_t writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
};
@@ -452,32 +444,6 @@ void WinCOFFObjectWriter::DefineSymbol(const MCSymbol &MCSym,
Sym->MC = &MCSym;
}
-// Maximum offsets for different string table entry encodings.
-enum : unsigned { Max7DecimalOffset = 9999999U };
-enum : uint64_t { MaxBase64Offset = 0xFFFFFFFFFULL }; // 64^6, including 0
-
-// Encode a string table entry offset in base 64, padded to 6 chars, and
-// prefixed with a double slash: '//AAAAAA', '//AAAAAB', ...
-// Buffer must be at least 8 bytes large. No terminating null appended.
-static void encodeBase64StringEntry(char *Buffer, uint64_t Value) {
- assert(Value > Max7DecimalOffset && Value <= MaxBase64Offset &&
- "Illegal section name encoding for value");
-
- static const char Alphabet[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
- "abcdefghijklmnopqrstuvwxyz"
- "0123456789+/";
-
- Buffer[0] = '/';
- Buffer[1] = '/';
-
- char *Ptr = Buffer + 7;
- for (unsigned i = 0; i < 6; ++i) {
- unsigned Rem = Value % 64;
- Value /= 64;
- *(Ptr--) = Alphabet[Rem];
- }
-}
-
void WinCOFFObjectWriter::SetSectionName(COFFSection &S) {
if (S.Name.size() <= COFF::NameSize) {
std::memcpy(S.Header.Name, S.Name.c_str(), S.Name.size());
@@ -485,19 +451,8 @@ void WinCOFFObjectWriter::SetSectionName(COFFSection &S) {
}
uint64_t StringTableEntry = Strings.getOffset(S.Name);
- if (StringTableEntry <= Max7DecimalOffset) {
- SmallVector<char, COFF::NameSize> Buffer;
- Twine('/').concat(Twine(StringTableEntry)).toVector(Buffer);
- assert(Buffer.size() <= COFF::NameSize && Buffer.size() >= 2);
- std::memcpy(S.Header.Name, Buffer.data(), Buffer.size());
- return;
- }
- if (StringTableEntry <= MaxBase64Offset) {
- // Starting with 10,000,000, offsets are encoded as base64.
- encodeBase64StringEntry(S.Header.Name, StringTableEntry);
- return;
- }
- report_fatal_error("COFF string table is greater than 64 GB.");
+ if (!COFF::encodeSectionName(S.Header.Name, StringTableEntry))
+ report_fatal_error("COFF string table is greater than 64 GB.");
}
void WinCOFFObjectWriter::SetSymbolName(COFFSymbol &S) {
@@ -1003,7 +958,7 @@ void WinCOFFObjectWriter::assignFileOffsets(MCAssembler &Asm,
for (const auto &Section : Asm) {
COFFSection *Sec = SectionMap[&Section];
- if (Sec->Number == -1)
+ if (!Sec || Sec->Number == -1)
continue;
Sec->Header.SizeOfRawData = Layout.getSectionAddressSize(&Section);
diff --git a/llvm/lib/MC/XCOFFObjectWriter.cpp b/llvm/lib/MC/XCOFFObjectWriter.cpp
index 177253d7a9d7..977e77bf67fd 100644
--- a/llvm/lib/MC/XCOFFObjectWriter.cpp
+++ b/llvm/lib/MC/XCOFFObjectWriter.cpp
@@ -22,8 +22,9 @@
#include "llvm/MC/MCValue.h"
#include "llvm/MC/MCXCOFFObjectWriter.h"
#include "llvm/MC/StringTableBuilder.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/EndianStream.h"
-#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include <deque>
@@ -65,6 +66,10 @@ struct Symbol {
const MCSymbolXCOFF *const MCSym;
uint32_t SymbolTableIndex;
+ XCOFF::VisibilityType getVisibilityType() const {
+ return MCSym->getVisibilityType();
+ }
+
XCOFF::StorageClass getStorageClass() const {
return MCSym->getStorageClass();
}
@@ -77,12 +82,15 @@ struct Symbol {
struct XCOFFSection {
const MCSectionXCOFF *const MCSec;
uint32_t SymbolTableIndex;
- uint32_t Address;
- uint32_t Size;
+ uint64_t Address;
+ uint64_t Size;
SmallVector<Symbol, 1> Syms;
SmallVector<XCOFFRelocation, 1> Relocations;
StringRef getSymbolTableName() const { return MCSec->getSymbolTableName(); }
+ XCOFF::VisibilityType getVisibilityType() const {
+ return MCSec->getVisibilityType();
+ }
XCOFFSection(const MCSectionXCOFF *MCSec)
: MCSec(MCSec), SymbolTableIndex(-1), Address(-1), Size(0) {}
};
@@ -100,10 +108,10 @@ struct SectionEntry {
char Name[XCOFF::NameSize];
// The physical/virtual address of the section. For an object file
// these values are equivalent.
- uint32_t Address;
- uint32_t Size;
- uint32_t FileOffsetToData;
- uint32_t FileOffsetToRelocations;
+ uint64_t Address;
+ uint64_t Size;
+ uint64_t FileOffsetToData;
+ uint64_t FileOffsetToRelocations;
uint32_t RelocationCount;
int32_t Flags;
@@ -136,7 +144,7 @@ struct SectionEntry {
Index = UninitializedIndex;
}
- virtual ~SectionEntry() {}
+ virtual ~SectionEntry() = default;
};
// Represents the data related to a section excluding the csects that make up
@@ -165,16 +173,21 @@ struct CsectSectionEntry : public SectionEntry {
Group->clear();
}
- virtual ~CsectSectionEntry() {}
+ virtual ~CsectSectionEntry() = default;
};
struct DwarfSectionEntry : public SectionEntry {
// For DWARF section entry.
std::unique_ptr<XCOFFSection> DwarfSect;
+ // For DWARF section, we must use real size in the section header. MemorySize
+ // is for the size the DWARF section occupies including paddings.
+ uint32_t MemorySize;
+
DwarfSectionEntry(StringRef N, int32_t Flags,
std::unique_ptr<XCOFFSection> Sect)
- : SectionEntry(N, Flags | XCOFF::STYP_DWARF), DwarfSect(std::move(Sect)) {
+ : SectionEntry(N, Flags | XCOFF::STYP_DWARF), DwarfSect(std::move(Sect)),
+ MemorySize(0) {
assert(DwarfSect->MCSec->isDwarfSect() &&
"This should be a DWARF section!");
assert(N.size() <= XCOFF::NameSize && "section name too long");
@@ -183,20 +196,24 @@ struct DwarfSectionEntry : public SectionEntry {
DwarfSectionEntry(DwarfSectionEntry &&s) = default;
- virtual ~DwarfSectionEntry() {}
+ virtual ~DwarfSectionEntry() = default;
};
class XCOFFObjectWriter : public MCObjectWriter {
uint32_t SymbolTableEntryCount = 0;
- uint32_t SymbolTableOffset = 0;
+ uint64_t SymbolTableOffset = 0;
uint16_t SectionCount = 0;
- uint32_t RelocationEntryOffset = 0;
+ uint64_t RelocationEntryOffset = 0;
+ std::vector<std::pair<std::string, size_t>> FileNames;
support::endian::Writer W;
std::unique_ptr<MCXCOFFObjectTargetWriter> TargetObjectWriter;
StringTableBuilder Strings;
+ const uint64_t MaxRawDataSize =
+ TargetObjectWriter->is64Bit() ? UINT64_MAX : UINT32_MAX;
+
// Maps the MCSection representation to its corresponding XCOFFSection
// wrapper. Needed for finding the XCOFFSection to insert an MCSymbol into
// from its containing MCSectionXCOFF.
@@ -244,26 +261,39 @@ class XCOFFObjectWriter : public MCObjectWriter {
uint64_t writeObject(MCAssembler &, const MCAsmLayout &) override;
- static bool nameShouldBeInStringTable(const StringRef &);
+ bool is64Bit() const { return TargetObjectWriter->is64Bit(); }
+ bool nameShouldBeInStringTable(const StringRef &);
void writeSymbolName(const StringRef &);
- void writeSymbolTableEntryForCsectMemberLabel(const Symbol &,
- const XCOFFSection &, int16_t,
- uint64_t);
- void writeSymbolTableEntryForControlSection(const XCOFFSection &, int16_t,
- XCOFF::StorageClass);
- void writeSymbolTableEntryForDwarfSection(const XCOFFSection &, int16_t);
+
+ void writeSymbolEntryForCsectMemberLabel(const Symbol &SymbolRef,
+ const XCOFFSection &CSectionRef,
+ int16_t SectionIndex,
+ uint64_t SymbolOffset);
+ void writeSymbolEntryForControlSection(const XCOFFSection &CSectionRef,
+ int16_t SectionIndex,
+ XCOFF::StorageClass StorageClass);
+ void writeSymbolEntryForDwarfSection(const XCOFFSection &DwarfSectionRef,
+ int16_t SectionIndex);
void writeFileHeader();
void writeSectionHeaderTable();
void writeSections(const MCAssembler &Asm, const MCAsmLayout &Layout);
void writeSectionForControlSectionEntry(const MCAssembler &Asm,
const MCAsmLayout &Layout,
const CsectSectionEntry &CsectEntry,
- uint32_t &CurrentAddressLocation);
+ uint64_t &CurrentAddressLocation);
void writeSectionForDwarfSectionEntry(const MCAssembler &Asm,
const MCAsmLayout &Layout,
const DwarfSectionEntry &DwarfEntry,
- uint32_t &CurrentAddressLocation);
+ uint64_t &CurrentAddressLocation);
void writeSymbolTable(const MCAsmLayout &Layout);
+ void writeSymbolAuxDwarfEntry(uint64_t LengthOfSectionPortion,
+ uint64_t NumberOfRelocEnt = 0);
+ void writeSymbolAuxCsectEntry(uint64_t SectionOrLength,
+ uint8_t SymbolAlignmentAndType,
+ uint8_t StorageMappingClass);
+ void writeSymbolEntry(StringRef SymbolName, uint64_t Value,
+ int16_t SectionNumber, uint16_t SymbolType,
+ uint8_t StorageClass, uint8_t NumberOfAuxEntries = 1);
void writeRelocations();
void writeRelocation(XCOFFRelocation Reloc, const XCOFFSection &Section);
@@ -278,10 +308,8 @@ class XCOFFObjectWriter : public MCObjectWriter {
void assignAddressesAndIndices(const MCAsmLayout &);
void finalizeSectionInfo();
- bool
- needsAuxiliaryHeader() const { /* TODO aux header support not implemented. */
- return false;
- }
+ // TODO aux header support not implemented.
+ bool needsAuxiliaryHeader() const { return false; }
// Returns the size of the auxiliary header to be written to the object file.
size_t auxiliaryHeaderSize() const {
@@ -293,6 +321,10 @@ class XCOFFObjectWriter : public MCObjectWriter {
public:
XCOFFObjectWriter(std::unique_ptr<MCXCOFFObjectTargetWriter> MOTW,
raw_pwrite_stream &OS);
+
+ void writeWord(uint64_t Word) {
+ is64Bit() ? W.write<uint64_t>(Word) : W.write<uint32_t>(Word);
+ }
};
XCOFFObjectWriter::XCOFFObjectWriter(
@@ -396,9 +428,6 @@ static MCSectionXCOFF *getContainingCsect(const MCSymbolXCOFF *XSym) {
void XCOFFObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
const MCAsmLayout &Layout) {
- if (TargetObjectWriter->is64Bit())
- report_fatal_error("64-bit XCOFF object files are not supported yet.");
-
for (const auto &S : Asm) {
const auto *MCSec = cast<const MCSectionXCOFF>(&S);
assert(SectionMap.find(MCSec) == SectionMap.end() &&
@@ -424,7 +453,7 @@ void XCOFFObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
SectionMap[MCSec] = DwarfSec.get();
DwarfSectionEntry SecEntry(MCSec->getName(),
- MCSec->getDwarfSubtypeFlags().getValue(),
+ *MCSec->getDwarfSubtypeFlags(),
std::move(DwarfSec));
DwarfSections.push_back(std::move(SecEntry));
} else
@@ -470,6 +499,15 @@ void XCOFFObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
Strings.add(XSym->getSymbolTableName());
}
+ FileNames = Asm.getFileNames();
+ // Emit ".file" as the source file name when there is no file name.
+ if (FileNames.empty())
+ FileNames.emplace_back(".file", 0);
+ for (const std::pair<std::string, size_t> &F : FileNames) {
+ if (nameShouldBeInStringTable(F.first))
+ Strings.add(F.first);
+ }
+
Strings.finalize();
assignAddressesAndIndices(Layout);
}
@@ -547,10 +585,9 @@ void XCOFFObjectWriter::recordRelocation(MCAssembler &Asm,
FixedValue = TOCEntryOffset;
}
- assert(
- (TargetObjectWriter->is64Bit() ||
- Fixup.getOffset() <= UINT32_MAX - Layout.getFragmentOffset(Fragment)) &&
- "Fragment offset + fixup offset is overflowed in 32-bit mode.");
+ assert((Fixup.getOffset() <=
+ MaxRawDataSize - Layout.getFragmentOffset(Fragment)) &&
+ "Fragment offset + fixup offset is overflowed.");
uint32_t FixupOffsetInCsect =
Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
@@ -590,7 +627,7 @@ void XCOFFObjectWriter::recordRelocation(MCAssembler &Asm,
void XCOFFObjectWriter::writeSections(const MCAssembler &Asm,
const MCAsmLayout &Layout) {
- uint32_t CurrentAddressLocation = 0;
+ uint64_t CurrentAddressLocation = 0;
for (const auto *Section : Sections)
writeSectionForControlSectionEntry(Asm, Layout, *Section,
CurrentAddressLocation);
@@ -607,9 +644,6 @@ uint64_t XCOFFObjectWriter::writeObject(MCAssembler &Asm,
if (Asm.isIncrementalLinkerCompatible())
report_fatal_error("Incremental linking not supported for XCOFF.");
- if (TargetObjectWriter->is64Bit())
- report_fatal_error("64-bit XCOFF object files are not supported yet.");
-
finalizeSectionInfo();
uint64_t StartOffset = W.OS.tell();
@@ -617,7 +651,6 @@ uint64_t XCOFFObjectWriter::writeObject(MCAssembler &Asm,
writeSectionHeaderTable();
writeSections(Asm, Layout);
writeRelocations();
-
writeSymbolTable(Layout);
// Write the string table.
Strings.write(W.OS);
@@ -626,142 +659,130 @@ uint64_t XCOFFObjectWriter::writeObject(MCAssembler &Asm,
}
bool XCOFFObjectWriter::nameShouldBeInStringTable(const StringRef &SymbolName) {
- return SymbolName.size() > XCOFF::NameSize;
+ return SymbolName.size() > XCOFF::NameSize || is64Bit();
}
void XCOFFObjectWriter::writeSymbolName(const StringRef &SymbolName) {
+ // Magic, Offset or SymbolName.
if (nameShouldBeInStringTable(SymbolName)) {
W.write<int32_t>(0);
W.write<uint32_t>(Strings.getOffset(SymbolName));
} else {
- char Name[XCOFF::NameSize+1];
+ char Name[XCOFF::NameSize + 1];
std::strncpy(Name, SymbolName.data(), XCOFF::NameSize);
ArrayRef<char> NameRef(Name, XCOFF::NameSize);
W.write(NameRef);
}
}
-void XCOFFObjectWriter::writeSymbolTableEntryForCsectMemberLabel(
- const Symbol &SymbolRef, const XCOFFSection &CSectionRef,
- int16_t SectionIndex, uint64_t SymbolOffset) {
- // Name or Zeros and string table offset
- writeSymbolName(SymbolRef.getSymbolTableName());
- assert(SymbolOffset <= UINT32_MAX - CSectionRef.Address &&
- "Symbol address overflows.");
- W.write<uint32_t>(CSectionRef.Address + SymbolOffset);
- W.write<int16_t>(SectionIndex);
+void XCOFFObjectWriter::writeSymbolEntry(StringRef SymbolName, uint64_t Value,
+ int16_t SectionNumber,
+ uint16_t SymbolType,
+ uint8_t StorageClass,
+ uint8_t NumberOfAuxEntries) {
+ if (is64Bit()) {
+ W.write<uint64_t>(Value);
+ W.write<uint32_t>(Strings.getOffset(SymbolName));
+ } else {
+ writeSymbolName(SymbolName);
+ W.write<uint32_t>(Value);
+ }
+ W.write<int16_t>(SectionNumber);
// Basic/Derived type. See the description of the n_type field for symbol
// table entries for a detailed description. Since we don't yet support
// visibility, and all other bits are either optionally set or reserved, this
// is always zero.
- // TODO FIXME How to assert a symbol's visibilty is default?
+ if (SymbolType != 0)
+ report_fatal_error("Emitting non-zero visibilities is not supported yet.");
// TODO Set the function indicator (bit 10, 0x0020) for functions
// when debugging is enabled.
- W.write<uint16_t>(0);
- W.write<uint8_t>(SymbolRef.getStorageClass());
- // Always 1 aux entry for now.
- W.write<uint8_t>(1);
-
- // Now output the auxiliary entry.
- W.write<uint32_t>(CSectionRef.SymbolTableIndex);
- // Parameter typecheck hash. Not supported.
- W.write<uint32_t>(0);
- // Typecheck section number. Not supported.
- W.write<uint16_t>(0);
- // Symbol type: Label
- W.write<uint8_t>(XCOFF::XTY_LD);
- // Storage mapping class.
- W.write<uint8_t>(CSectionRef.MCSec->getMappingClass());
- // Reserved (x_stab).
- W.write<uint32_t>(0);
- // Reserved (x_snstab).
- W.write<uint16_t>(0);
+ W.write<uint16_t>(SymbolType);
+ W.write<uint8_t>(StorageClass);
+ W.write<uint8_t>(NumberOfAuxEntries);
}
-void XCOFFObjectWriter::writeSymbolTableEntryForDwarfSection(
+void XCOFFObjectWriter::writeSymbolAuxCsectEntry(uint64_t SectionOrLength,
+ uint8_t SymbolAlignmentAndType,
+ uint8_t StorageMappingClass) {
+ W.write<uint32_t>(is64Bit() ? Lo_32(SectionOrLength) : SectionOrLength);
+ W.write<uint32_t>(0); // ParameterHashIndex
+ W.write<uint16_t>(0); // TypeChkSectNum
+ W.write<uint8_t>(SymbolAlignmentAndType);
+ W.write<uint8_t>(StorageMappingClass);
+ if (is64Bit()) {
+ W.write<uint32_t>(Hi_32(SectionOrLength));
+ W.OS.write_zeros(1); // Reserved
+ W.write<uint8_t>(XCOFF::AUX_CSECT);
+ } else {
+ W.write<uint32_t>(0); // StabInfoIndex
+ W.write<uint16_t>(0); // StabSectNum
+ }
+}
+
+void XCOFFObjectWriter::writeSymbolAuxDwarfEntry(
+ uint64_t LengthOfSectionPortion, uint64_t NumberOfRelocEnt) {
+ writeWord(LengthOfSectionPortion);
+ if (!is64Bit())
+ W.OS.write_zeros(4); // Reserved
+ writeWord(NumberOfRelocEnt);
+ if (is64Bit()) {
+ W.OS.write_zeros(1); // Reserved
+ W.write<uint8_t>(XCOFF::AUX_SECT);
+ } else {
+ W.OS.write_zeros(6); // Reserved
+ }
+}
+
+void XCOFFObjectWriter::writeSymbolEntryForCsectMemberLabel(
+ const Symbol &SymbolRef, const XCOFFSection &CSectionRef,
+ int16_t SectionIndex, uint64_t SymbolOffset) {
+ assert(SymbolOffset <= MaxRawDataSize - CSectionRef.Address &&
+ "Symbol address overflowed.");
+
+ writeSymbolEntry(SymbolRef.getSymbolTableName(),
+ CSectionRef.Address + SymbolOffset, SectionIndex,
+ SymbolRef.getVisibilityType(), SymbolRef.getStorageClass());
+
+ writeSymbolAuxCsectEntry(CSectionRef.SymbolTableIndex, XCOFF::XTY_LD,
+ CSectionRef.MCSec->getMappingClass());
+}
+
+void XCOFFObjectWriter::writeSymbolEntryForDwarfSection(
const XCOFFSection &DwarfSectionRef, int16_t SectionIndex) {
assert(DwarfSectionRef.MCSec->isDwarfSect() && "Not a DWARF section!");
- // n_name, n_zeros, n_offset
- writeSymbolName(DwarfSectionRef.getSymbolTableName());
- // n_value
- W.write<uint32_t>(0);
- // n_scnum
- W.write<int16_t>(SectionIndex);
- // n_type
- W.write<uint16_t>(0);
- // n_sclass
- W.write<uint8_t>(XCOFF::C_DWARF);
- // Always 1 aux entry for now.
- W.write<uint8_t>(1);
-
- // Now output the auxiliary entry.
- // x_scnlen
- W.write<uint32_t>(DwarfSectionRef.Size);
- // Reserved
- W.write<uint32_t>(0);
- // x_nreloc. Set to 0 for now.
- W.write<uint32_t>(0);
- // Reserved
- W.write<uint32_t>(0);
- // Reserved
- W.write<uint16_t>(0);
+ writeSymbolEntry(DwarfSectionRef.getSymbolTableName(), /*Value=*/0,
+ SectionIndex, /*SymbolType=*/0, XCOFF::C_DWARF);
+
+ writeSymbolAuxDwarfEntry(DwarfSectionRef.Size);
}
-void XCOFFObjectWriter::writeSymbolTableEntryForControlSection(
+void XCOFFObjectWriter::writeSymbolEntryForControlSection(
const XCOFFSection &CSectionRef, int16_t SectionIndex,
XCOFF::StorageClass StorageClass) {
- // n_name, n_zeros, n_offset
- writeSymbolName(CSectionRef.getSymbolTableName());
- // n_value
- W.write<uint32_t>(CSectionRef.Address);
- // n_scnum
- W.write<int16_t>(SectionIndex);
- // Basic/Derived type. See the description of the n_type field for symbol
- // table entries for a detailed description. Since we don't yet support
- // visibility, and all other bits are either optionally set or reserved, this
- // is always zero.
- // TODO FIXME How to assert a symbol's visibilty is default?
- // TODO Set the function indicator (bit 10, 0x0020) for functions
- // when debugging is enabled.
- W.write<uint16_t>(0);
- // n_sclass
- W.write<uint8_t>(StorageClass);
- // Always 1 aux entry for now.
- W.write<uint8_t>(1);
-
- // Now output the auxiliary entry.
- W.write<uint32_t>(CSectionRef.Size);
- // Parameter typecheck hash. Not supported.
- W.write<uint32_t>(0);
- // Typecheck section number. Not supported.
- W.write<uint16_t>(0);
- // Symbol type.
- W.write<uint8_t>(getEncodedType(CSectionRef.MCSec));
- // Storage mapping class.
- W.write<uint8_t>(CSectionRef.MCSec->getMappingClass());
- // Reserved (x_stab).
- W.write<uint32_t>(0);
- // Reserved (x_snstab).
- W.write<uint16_t>(0);
+ writeSymbolEntry(CSectionRef.getSymbolTableName(), CSectionRef.Address,
+ SectionIndex, CSectionRef.getVisibilityType(), StorageClass);
+
+ writeSymbolAuxCsectEntry(CSectionRef.Size, getEncodedType(CSectionRef.MCSec),
+ CSectionRef.MCSec->getMappingClass());
}
void XCOFFObjectWriter::writeFileHeader() {
- // Magic.
- W.write<uint16_t>(0x01df);
- // Number of sections.
+ W.write<uint16_t>(is64Bit() ? XCOFF::XCOFF64 : XCOFF::XCOFF32);
W.write<uint16_t>(SectionCount);
- // Timestamp field. For reproducible output we write a 0, which represents no
- // timestamp.
- W.write<int32_t>(0);
- // Byte Offset to the start of the symbol table.
- W.write<uint32_t>(SymbolTableOffset);
- // Number of entries in the symbol table.
- W.write<int32_t>(SymbolTableEntryCount);
- // Size of the optional header.
- W.write<uint16_t>(0);
- // Flags.
- W.write<uint16_t>(0);
+ W.write<int32_t>(0); // TimeStamp
+ writeWord(SymbolTableOffset);
+ if (is64Bit()) {
+ W.write<uint16_t>(0); // AuxHeaderSize. No optional header for an object
+ // file that is not to be loaded.
+ W.write<uint16_t>(0); // Flags
+ W.write<int32_t>(SymbolTableEntryCount);
+ } else {
+ W.write<int32_t>(SymbolTableEntryCount);
+ W.write<uint16_t>(0); // AuxHeaderSize. No optional header for an object
+ // file that is not to be loaded.
+ W.write<uint16_t>(0); // Flags
+ }
}
void XCOFFObjectWriter::writeSectionHeaderTable() {
@@ -777,28 +798,25 @@ void XCOFFObjectWriter::writeSectionHeaderTable() {
// Write the Physical Address and Virtual Address. In an object file these
// are the same.
// We use 0 for DWARF sections' Physical and Virtual Addresses.
- if (!IsDwarf) {
- W.write<uint32_t>(Sec->Address);
- W.write<uint32_t>(Sec->Address);
+ writeWord(IsDwarf ? 0 : Sec->Address);
+ writeWord(IsDwarf ? 0 : Sec->Address);
+
+ writeWord(Sec->Size);
+ writeWord(Sec->FileOffsetToData);
+ writeWord(Sec->FileOffsetToRelocations);
+ writeWord(0); // FileOffsetToLineNumberInfo. Not supported yet.
+
+ if (is64Bit()) {
+ W.write<uint32_t>(Sec->RelocationCount);
+ W.write<uint32_t>(0); // NumberOfLineNumbers. Not supported yet.
+ W.write<int32_t>(Sec->Flags);
+ W.OS.write_zeros(4);
} else {
- W.write<uint32_t>(0);
- W.write<uint32_t>(0);
+ W.write<uint16_t>(Sec->RelocationCount);
+ W.write<uint16_t>(0); // NumberOfLineNumbers. Not supported yet.
+ W.write<int32_t>(Sec->Flags);
}
- W.write<uint32_t>(Sec->Size);
- W.write<uint32_t>(Sec->FileOffsetToData);
- W.write<uint32_t>(Sec->FileOffsetToRelocations);
-
- // Line number pointer. Not supported yet.
- W.write<uint32_t>(0);
-
- W.write<uint16_t>(Sec->RelocationCount);
-
- // Line number counts. Not supported yet.
- W.write<uint16_t>(0);
-
- W.write<int32_t>(Sec->Flags);
-
return true;
};
@@ -811,11 +829,11 @@ void XCOFFObjectWriter::writeSectionHeaderTable() {
void XCOFFObjectWriter::writeRelocation(XCOFFRelocation Reloc,
const XCOFFSection &Section) {
if (Section.MCSec->isCsect())
- W.write<uint32_t>(Section.Address + Reloc.FixupOffsetInCsect);
+ writeWord(Section.Address + Reloc.FixupOffsetInCsect);
else {
// DWARF sections' address is set to 0.
assert(Section.MCSec->isDwarfSect() && "unsupport section type!");
- W.write<uint32_t>(Reloc.FixupOffsetInCsect);
+ writeWord(Reloc.FixupOffsetInCsect);
}
W.write<uint32_t>(Reloc.SymbolTableIndex);
W.write<uint8_t>(Reloc.SignAndSize);
@@ -845,34 +863,18 @@ void XCOFFObjectWriter::writeRelocations() {
}
void XCOFFObjectWriter::writeSymbolTable(const MCAsmLayout &Layout) {
- // Write symbol 0 as C_FILE.
- // FIXME: support 64-bit C_FILE symbol.
- //
- // n_name. The n_name of a C_FILE symbol is the source filename when no
- // auxiliary entries are present. The source filename is alternatively
- // provided by an auxiliary entry, in which case the n_name of the C_FILE
- // symbol is `.file`.
- // FIXME: add the real source filename.
- writeSymbolName(".file");
- // n_value. The n_value of a C_FILE symbol is its symbol table index.
- W.write<uint32_t>(0);
- // n_scnum. N_DEBUG is a reserved section number for indicating a special
- // symbolic debugging symbol.
- W.write<int16_t>(XCOFF::ReservedSectionNum::N_DEBUG);
- // n_type. The n_type field of a C_FILE symbol encodes the source language and
- // CPU version info; zero indicates no info.
- W.write<uint16_t>(0);
- // n_sclass. The C_FILE symbol provides source file-name information,
- // source-language ID and CPU-version ID information and some other optional
- // infos.
- W.write<uint8_t>(XCOFF::C_FILE);
- // n_numaux. No aux entry for now.
- W.write<uint8_t>(0);
+ // Write C_FILE symbols.
+ // The n_name of a C_FILE symbol is the source file's name when no auxiliary
+ // entries are present.
+ for (const std::pair<std::string, size_t> &F : FileNames) {
+ writeSymbolEntry(F.first, /*Value=*/0, XCOFF::ReservedSectionNum::N_DEBUG,
+ /*SymbolType=*/0, XCOFF::C_FILE,
+ /*NumberOfAuxEntries=*/0);
+ }
for (const auto &Csect : UndefinedCsects) {
- writeSymbolTableEntryForControlSection(Csect,
- XCOFF::ReservedSectionNum::N_UNDEF,
- Csect.MCSec->getStorageClass());
+ writeSymbolEntryForControlSection(Csect, XCOFF::ReservedSectionNum::N_UNDEF,
+ Csect.MCSec->getStorageClass());
}
for (const auto *Section : Sections) {
@@ -887,19 +889,19 @@ void XCOFFObjectWriter::writeSymbolTable(const MCAsmLayout &Layout) {
const int16_t SectionIndex = Section->Index;
for (const auto &Csect : *Group) {
// Write out the control section first and then each symbol in it.
- writeSymbolTableEntryForControlSection(Csect, SectionIndex,
- Csect.MCSec->getStorageClass());
+ writeSymbolEntryForControlSection(Csect, SectionIndex,
+ Csect.MCSec->getStorageClass());
for (const auto &Sym : Csect.Syms)
- writeSymbolTableEntryForCsectMemberLabel(
+ writeSymbolEntryForCsectMemberLabel(
Sym, Csect, SectionIndex, Layout.getSymbolOffset(*(Sym.MCSym)));
}
}
}
for (const auto &DwarfSection : DwarfSections)
- writeSymbolTableEntryForDwarfSection(*DwarfSection.DwarfSect,
- DwarfSection.Index);
+ writeSymbolEntryForDwarfSection(*DwarfSection.DwarfSect,
+ DwarfSection.Index);
}
void XCOFFObjectWriter::finalizeSectionInfo() {
@@ -914,8 +916,10 @@ void XCOFFObjectWriter::finalizeSectionInfo() {
for (auto &Csect : *Group) {
const size_t CsectRelocCount = Csect.Relocations.size();
- if (CsectRelocCount >= XCOFF::RelocOverflow ||
- Section->RelocationCount >= XCOFF::RelocOverflow - CsectRelocCount)
+ // An XCOFF64 file may not contain an overflow section header.
+ if (!is64Bit() && (CsectRelocCount >= XCOFF::RelocOverflow ||
+ Section->RelocationCount >=
+ XCOFF::RelocOverflow - CsectRelocCount))
report_fatal_error(
"relocation entries overflowed; overflow section is "
"not implemented yet");
@@ -938,10 +942,12 @@ void XCOFFObjectWriter::finalizeSectionInfo() {
return false;
Sec->FileOffsetToRelocations = RawPointer;
- const uint32_t RelocationSizeInSec =
- Sec->RelocationCount * XCOFF::RelocationSerializationSize32;
+ const uint64_t RelocationSizeInSec =
+ Sec->RelocationCount * (is64Bit()
+ ? XCOFF::RelocationSerializationSize64
+ : XCOFF::RelocationSerializationSize32);
RawPointer += RelocationSizeInSec;
- if (RawPointer > UINT32_MAX)
+ if (RawPointer > MaxRawDataSize)
report_fatal_error("Relocation data overflowed this object file.");
return true;
@@ -960,8 +966,8 @@ void XCOFFObjectWriter::finalizeSectionInfo() {
}
void XCOFFObjectWriter::assignAddressesAndIndices(const MCAsmLayout &Layout) {
- // The first symbol table entry (at index 0) is for the file name.
- uint32_t SymbolTableIndex = 1;
+ // The symbol table starts with all the C_FILE symbols.
+ uint32_t SymbolTableIndex = FileNames.size();
// Calculate indices for undefined symbols.
for (auto &Csect : UndefinedCsects) {
@@ -976,10 +982,11 @@ void XCOFFObjectWriter::assignAddressesAndIndices(const MCAsmLayout &Layout) {
// The address corrresponds to the address of sections and symbols in the
// object file. We place the shared address 0 immediately after the
// section header table.
- uint32_t Address = 0;
+ uint64_t Address = 0;
// Section indices are 1-based in XCOFF.
int32_t SectionIndex = 1;
bool HasTDataSection = false;
+ uint32_t PaddingsBeforeDwarf = 0;
for (auto *Section : Sections) {
const bool IsEmpty =
@@ -1039,6 +1046,19 @@ void XCOFFObjectWriter::assignAddressesAndIndices(const MCAsmLayout &Layout) {
Section->Size = Address - Section->Address;
}
+ // Start to generate DWARF sections. Sections other than DWARF section use
+ // DefaultSectionAlign as the default alignment, while DWARF sections have
+ // their own alignments. If these two alignments are not the same, we need
+ // some paddings here and record the paddings bytes for FileOffsetToData
+ // calculation.
+ if (!DwarfSections.empty())
+ PaddingsBeforeDwarf =
+ alignTo(Address,
+ (*DwarfSections.begin()).DwarfSect->MCSec->getAlignment()) -
+ Address;
+
+ DwarfSectionEntry *LastDwarfSection = nullptr;
+
for (auto &DwarfSection : DwarfSections) {
assert((SectionIndex <= MaxSectionIndex) && "Section index overflow!");
@@ -1066,40 +1086,52 @@ void XCOFFObjectWriter::assignAddressesAndIndices(const MCAsmLayout &Layout) {
// For DWARF section, we must use the real size which may be not aligned.
DwarfSection.Size = DwarfSect.Size = Layout.getSectionAddressSize(MCSec);
- // Make the Address align to default alignment for follow section.
- Address = alignTo(DwarfSect.Address + DwarfSect.Size, DefaultSectionAlign);
+ Address = DwarfSection.Address + DwarfSection.Size;
+
+ if (LastDwarfSection)
+ LastDwarfSection->MemorySize =
+ DwarfSection.Address - LastDwarfSection->Address;
+ LastDwarfSection = &DwarfSection;
+ }
+ if (LastDwarfSection) {
+ // Make the final DWARF section address align to the default section
+ // alignment for follow contents.
+ Address = alignTo(LastDwarfSection->Address + LastDwarfSection->Size,
+ DefaultSectionAlign);
+ LastDwarfSection->MemorySize = Address - LastDwarfSection->Address;
}
SymbolTableEntryCount = SymbolTableIndex;
// Calculate the RawPointer value for each section.
- uint64_t RawPointer = XCOFF::FileHeaderSize32 + auxiliaryHeaderSize() +
- SectionCount * XCOFF::SectionHeaderSize32;
+ uint64_t RawPointer =
+ (is64Bit() ? (XCOFF::FileHeaderSize64 +
+ SectionCount * XCOFF::SectionHeaderSize64)
+ : (XCOFF::FileHeaderSize32 +
+ SectionCount * XCOFF::SectionHeaderSize32)) +
+ auxiliaryHeaderSize();
+
for (auto *Sec : Sections) {
if (Sec->Index == SectionEntry::UninitializedIndex || Sec->IsVirtual)
continue;
Sec->FileOffsetToData = RawPointer;
RawPointer += Sec->Size;
- if (RawPointer > UINT32_MAX)
+ if (RawPointer > MaxRawDataSize)
report_fatal_error("Section raw data overflowed this object file.");
}
- for (auto &DwarfSection : DwarfSections) {
- // Address of csect sections are always aligned to DefaultSectionAlign, but
- // address of DWARF section are aligned to Section alignment which may be
- // bigger than DefaultSectionAlign, need to execlude the padding bits.
- RawPointer =
- alignTo(RawPointer, DwarfSection.DwarfSect->MCSec->getAlignment());
+ // Increase the raw pointer for the padding bytes between csect sections and
+ // DWARF sections.
+ if (!DwarfSections.empty())
+ RawPointer += PaddingsBeforeDwarf;
+ for (auto &DwarfSection : DwarfSections) {
DwarfSection.FileOffsetToData = RawPointer;
- // Some section entries, like DWARF section size is not aligned, so
- // RawPointer may be not aligned.
- RawPointer += DwarfSection.Size;
- // Make sure RawPointer is aligned.
- RawPointer = alignTo(RawPointer, DefaultSectionAlign);
- assert(RawPointer <= UINT32_MAX &&
+ RawPointer += DwarfSection.MemorySize;
+
+ assert(RawPointer <= MaxRawDataSize &&
"Section raw data overflowed this object file.");
}
@@ -1108,7 +1140,7 @@ void XCOFFObjectWriter::assignAddressesAndIndices(const MCAsmLayout &Layout) {
void XCOFFObjectWriter::writeSectionForControlSectionEntry(
const MCAssembler &Asm, const MCAsmLayout &Layout,
- const CsectSectionEntry &CsectEntry, uint32_t &CurrentAddressLocation) {
+ const CsectSectionEntry &CsectEntry, uint64_t &CurrentAddressLocation) {
// Nothing to write for this Section.
if (CsectEntry.Index == SectionEntry::UninitializedIndex)
return;
@@ -1146,7 +1178,7 @@ void XCOFFObjectWriter::writeSectionForControlSectionEntry(
// The size of the tail padding in a section is the end virtual address of
// the current section minus the the end virtual address of the last csect
// in that section.
- if (uint32_t PaddingSize =
+ if (uint64_t PaddingSize =
CsectEntry.Address + CsectEntry.Size - CurrentAddressLocation) {
W.OS.write_zeros(PaddingSize);
CurrentAddressLocation += PaddingSize;
@@ -1155,7 +1187,7 @@ void XCOFFObjectWriter::writeSectionForControlSectionEntry(
void XCOFFObjectWriter::writeSectionForDwarfSectionEntry(
const MCAssembler &Asm, const MCAsmLayout &Layout,
- const DwarfSectionEntry &DwarfEntry, uint32_t &CurrentAddressLocation) {
+ const DwarfSectionEntry &DwarfEntry, uint64_t &CurrentAddressLocation) {
// There could be a gap (without corresponding zero padding) between
// sections. For example DWARF section alignment is bigger than
// DefaultSectionAlign.
@@ -1163,7 +1195,7 @@ void XCOFFObjectWriter::writeSectionForDwarfSectionEntry(
"CurrentAddressLocation should be less than or equal to section "
"address.");
- if (uint32_t PaddingSize = DwarfEntry.Address - CurrentAddressLocation)
+ if (uint64_t PaddingSize = DwarfEntry.Address - CurrentAddressLocation)
W.OS.write_zeros(PaddingSize);
if (DwarfEntry.Size)
diff --git a/llvm/lib/MCA/CustomBehaviour.cpp b/llvm/lib/MCA/CustomBehaviour.cpp
index a9ea8edff059..a10a2f5c56f0 100644
--- a/llvm/lib/MCA/CustomBehaviour.cpp
+++ b/llvm/lib/MCA/CustomBehaviour.cpp
@@ -16,7 +16,7 @@
namespace llvm {
namespace mca {
-CustomBehaviour::~CustomBehaviour() {}
+CustomBehaviour::~CustomBehaviour() = default;
unsigned CustomBehaviour::checkCustomHazard(ArrayRef<InstRef> IssuedInst,
const InstRef &IR) {
diff --git a/llvm/lib/MCA/HardwareUnits/LSUnit.cpp b/llvm/lib/MCA/HardwareUnits/LSUnit.cpp
index 121d320f10e6..bdc8b3d0e390 100644
--- a/llvm/lib/MCA/HardwareUnits/LSUnit.cpp
+++ b/llvm/lib/MCA/HardwareUnits/LSUnit.cpp
@@ -39,7 +39,7 @@ LSUnitBase::LSUnitBase(const MCSchedModel &SM, unsigned LQ, unsigned SQ,
}
}
-LSUnitBase::~LSUnitBase() {}
+LSUnitBase::~LSUnitBase() = default;
void LSUnitBase::cycleEvent() {
for (const std::pair<unsigned, std::unique_ptr<MemoryGroup>> &G : Groups)
@@ -67,17 +67,17 @@ void LSUnitBase::dump() const {
#endif
unsigned LSUnit::dispatch(const InstRef &IR) {
- const InstrDesc &Desc = IR.getInstruction()->getDesc();
- bool IsStoreBarrier = IR.getInstruction()->isAStoreBarrier();
- bool IsLoadBarrier = IR.getInstruction()->isALoadBarrier();
- assert((Desc.MayLoad || Desc.MayStore) && "Not a memory operation!");
+ const Instruction &IS = *IR.getInstruction();
+ bool IsStoreBarrier = IS.isAStoreBarrier();
+ bool IsLoadBarrier = IS.isALoadBarrier();
+ assert((IS.getMayLoad() || IS.getMayStore()) && "Not a memory operation!");
- if (Desc.MayLoad)
+ if (IS.getMayLoad())
acquireLQSlot();
- if (Desc.MayStore)
+ if (IS.getMayStore())
acquireSQSlot();
- if (Desc.MayStore) {
+ if (IS.getMayStore()) {
unsigned NewGID = createMemoryGroup();
MemoryGroup &NewGroup = getGroup(NewGID);
NewGroup.addInstruction();
@@ -115,7 +115,7 @@ unsigned LSUnit::dispatch(const InstRef &IR) {
if (IsStoreBarrier)
CurrentStoreBarrierGroupID = NewGID;
- if (Desc.MayLoad) {
+ if (IS.getMayLoad()) {
CurrentLoadGroupID = NewGID;
if (IsLoadBarrier)
CurrentLoadBarrierGroupID = NewGID;
@@ -124,7 +124,7 @@ unsigned LSUnit::dispatch(const InstRef &IR) {
return NewGID;
}
- assert(Desc.MayLoad && "Expected a load!");
+ assert(IS.getMayLoad() && "Expected a load!");
unsigned ImmediateLoadDominator =
std::max(CurrentLoadGroupID, CurrentLoadBarrierGroupID);
@@ -194,10 +194,10 @@ unsigned LSUnit::dispatch(const InstRef &IR) {
}
LSUnit::Status LSUnit::isAvailable(const InstRef &IR) const {
- const InstrDesc &Desc = IR.getInstruction()->getDesc();
- if (Desc.MayLoad && isLQFull())
+ const Instruction &IS = *IR.getInstruction();
+ if (IS.getMayLoad() && isLQFull())
return LSUnit::LSU_LQUEUE_FULL;
- if (Desc.MayStore && isSQFull())
+ if (IS.getMayStore() && isSQFull())
return LSUnit::LSU_SQUEUE_FULL;
return LSUnit::LSU_AVAILABLE;
}
@@ -212,9 +212,9 @@ void LSUnitBase::onInstructionExecuted(const InstRef &IR) {
}
void LSUnitBase::onInstructionRetired(const InstRef &IR) {
- const InstrDesc &Desc = IR.getInstruction()->getDesc();
- bool IsALoad = Desc.MayLoad;
- bool IsAStore = Desc.MayStore;
+ const Instruction &IS = *IR.getInstruction();
+ bool IsALoad = IS.getMayLoad();
+ bool IsAStore = IS.getMayStore();
assert((IsALoad || IsAStore) && "Expected a memory operation!");
if (IsALoad) {
diff --git a/llvm/lib/MCA/IncrementalSourceMgr.cpp b/llvm/lib/MCA/IncrementalSourceMgr.cpp
new file mode 100644
index 000000000000..10b86b501a2e
--- /dev/null
+++ b/llvm/lib/MCA/IncrementalSourceMgr.cpp
@@ -0,0 +1,51 @@
+//===-------------------- IncrementalSourceMgr.cpp ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines some implementations for IncrementalSourceMgr.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/IncrementalSourceMgr.h"
+#ifndef NDEBUG
+#include "llvm/Support/Format.h"
+#endif
+
+using namespace llvm;
+using namespace llvm::mca;
+
+void IncrementalSourceMgr::clear() {
+ Staging.clear();
+ InstStorage.clear();
+ TotalCounter = 0U;
+ EOS = false;
+}
+
+void IncrementalSourceMgr::updateNext() {
+ ++TotalCounter;
+ Instruction *I = Staging.front();
+ Staging.pop_front();
+ I->reset();
+
+ if (InstFreedCB)
+ InstFreedCB(I);
+}
+
+#ifndef NDEBUG
+void IncrementalSourceMgr::printStatistic(raw_ostream &OS) {
+ unsigned MaxInstStorageSize = InstStorage.size();
+ if (MaxInstStorageSize <= TotalCounter) {
+ auto Ratio = double(MaxInstStorageSize) / double(TotalCounter);
+ OS << "Cache ratio = " << MaxInstStorageSize << " / " << TotalCounter
+ << llvm::format(" (%.2f%%)", (1.0 - Ratio) * 100.0) << "\n";
+ } else {
+ OS << "Error: Number of created instructions "
+ << "are larger than the number of issued instructions\n";
+ }
+}
+#endif
diff --git a/llvm/lib/MCA/InstrBuilder.cpp b/llvm/lib/MCA/InstrBuilder.cpp
index d8283f8d2682..45acea253587 100644
--- a/llvm/lib/MCA/InstrBuilder.cpp
+++ b/llvm/lib/MCA/InstrBuilder.cpp
@@ -14,16 +14,19 @@
#include "llvm/MCA/InstrBuilder.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
-#define DEBUG_TYPE "llvm-mca"
+#define DEBUG_TYPE "llvm-mca-instrbuilder"
namespace llvm {
namespace mca {
+char RecycledInstErr::ID = 0;
+
InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti,
const llvm::MCInstrInfo &mcii,
const llvm::MCRegisterInfo &mri,
@@ -572,6 +575,7 @@ InstrBuilder::createInstrDescImpl(const MCInst &MCI) {
LLVM_DEBUG(dbgs() << "\n\t\tOpcode Name= " << MCII.getName(Opcode) << '\n');
LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << '\n');
+ LLVM_DEBUG(dbgs() << "\t\tOpcode=" << Opcode << '\n');
// Create a new empty descriptor.
std::unique_ptr<InstrDesc> ID = std::make_unique<InstrDesc>();
@@ -593,13 +597,6 @@ InstrBuilder::createInstrDescImpl(const MCInst &MCI) {
FirstReturnInst = false;
}
- ID->MayLoad = MCDesc.mayLoad();
- ID->MayStore = MCDesc.mayStore();
- ID->HasSideEffects = MCDesc.hasUnmodeledSideEffects();
- ID->BeginGroup = SCDesc.BeginGroup;
- ID->EndGroup = SCDesc.EndGroup;
- ID->RetireOOO = SCDesc.RetireOOO;
-
initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks);
computeMaxLatency(*ID, MCDesc, SCDesc, STI);
@@ -618,7 +615,7 @@ InstrBuilder::createInstrDescImpl(const MCInst &MCI) {
// Now add the new descriptor.
bool IsVariadic = MCDesc.isVariadic();
- if (!IsVariadic && !IsVariant) {
+ if ((ID->IsRecyclable = !IsVariadic && !IsVariant)) {
Descriptors[MCI.getOpcode()] = std::move(ID);
return *Descriptors[MCI.getOpcode()];
}
@@ -638,14 +635,43 @@ InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI) {
return createInstrDescImpl(MCI);
}
+STATISTIC(NumVariantInst, "Number of MCInsts that doesn't have static Desc");
+
Expected<std::unique_ptr<Instruction>>
InstrBuilder::createInstruction(const MCInst &MCI) {
Expected<const InstrDesc &> DescOrErr = getOrCreateInstrDesc(MCI);
if (!DescOrErr)
return DescOrErr.takeError();
const InstrDesc &D = *DescOrErr;
- std::unique_ptr<Instruction> NewIS =
- std::make_unique<Instruction>(D, MCI.getOpcode());
+ Instruction *NewIS = nullptr;
+ std::unique_ptr<Instruction> CreatedIS;
+ bool IsInstRecycled = false;
+
+ if (!D.IsRecyclable)
+ ++NumVariantInst;
+
+ if (D.IsRecyclable && InstRecycleCB) {
+ if (auto *I = InstRecycleCB(D)) {
+ NewIS = I;
+ NewIS->reset();
+ IsInstRecycled = true;
+ }
+ }
+ if (!IsInstRecycled) {
+ CreatedIS = std::make_unique<Instruction>(D, MCI.getOpcode());
+ NewIS = CreatedIS.get();
+ }
+
+ const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
+ const MCSchedClassDesc &SCDesc =
+ *STI.getSchedModel().getSchedClassDesc(D.SchedClassID);
+
+ NewIS->setMayLoad(MCDesc.mayLoad());
+ NewIS->setMayStore(MCDesc.mayStore());
+ NewIS->setHasSideEffects(MCDesc.hasUnmodeledSideEffects());
+ NewIS->setBeginGroup(SCDesc.BeginGroup);
+ NewIS->setEndGroup(SCDesc.EndGroup);
+ NewIS->setRetireOOO(SCDesc.RetireOOO);
// Check if this is a dependency breaking instruction.
APInt Mask;
@@ -663,6 +689,7 @@ InstrBuilder::createInstruction(const MCInst &MCI) {
// Initialize Reads first.
MCPhysReg RegID = 0;
+ size_t Idx = 0U;
for (const ReadDescriptor &RD : D.Reads) {
if (!RD.isImplicitRead()) {
// explicit read.
@@ -681,15 +708,22 @@ InstrBuilder::createInstruction(const MCInst &MCI) {
continue;
// Okay, this is a register operand. Create a ReadState for it.
- NewIS->getUses().emplace_back(RD, RegID);
- ReadState &RS = NewIS->getUses().back();
+ ReadState *RS = nullptr;
+ if (IsInstRecycled && Idx < NewIS->getUses().size()) {
+ NewIS->getUses()[Idx] = ReadState(RD, RegID);
+ RS = &NewIS->getUses()[Idx++];
+ } else {
+ NewIS->getUses().emplace_back(RD, RegID);
+ RS = &NewIS->getUses().back();
+ ++Idx;
+ }
if (IsDepBreaking) {
// A mask of all zeroes means: explicit input operands are not
// independent.
if (Mask.isZero()) {
if (!RD.isImplicitRead())
- RS.setIndependentFromDef();
+ RS->setIndependentFromDef();
} else {
// Check if this register operand is independent according to `Mask`.
// Note that Mask may not have enough bits to describe all explicit and
@@ -699,15 +733,21 @@ InstrBuilder::createInstruction(const MCInst &MCI) {
if (Mask.getBitWidth() > RD.UseIndex) {
// Okay. This map describe register use `RD.UseIndex`.
if (Mask[RD.UseIndex])
- RS.setIndependentFromDef();
+ RS->setIndependentFromDef();
}
}
}
}
+ if (IsInstRecycled && Idx < NewIS->getUses().size())
+ NewIS->getUses().pop_back_n(NewIS->getUses().size() - Idx);
// Early exit if there are no writes.
- if (D.Writes.empty())
- return std::move(NewIS);
+ if (D.Writes.empty()) {
+ if (IsInstRecycled)
+ return llvm::make_error<RecycledInstErr>(NewIS);
+ else
+ return std::move(CreatedIS);
+ }
// Track register writes that implicitly clear the upper portion of the
// underlying super-registers using an APInt.
@@ -720,6 +760,7 @@ InstrBuilder::createInstruction(const MCInst &MCI) {
// Initialize writes.
unsigned WriteIndex = 0;
+ Idx = 0U;
for (const WriteDescriptor &WD : D.Writes) {
RegID = WD.isImplicitWrite() ? WD.RegisterID
: MCI.getOperand(WD.OpIndex).getReg();
@@ -730,13 +771,26 @@ InstrBuilder::createInstruction(const MCInst &MCI) {
}
assert(RegID && "Expected a valid register ID!");
- NewIS->getDefs().emplace_back(WD, RegID,
- /* ClearsSuperRegs */ WriteMask[WriteIndex],
- /* WritesZero */ IsZeroIdiom);
+ if (IsInstRecycled && Idx < NewIS->getDefs().size()) {
+ NewIS->getDefs()[Idx++] =
+ WriteState(WD, RegID,
+ /* ClearsSuperRegs */ WriteMask[WriteIndex],
+ /* WritesZero */ IsZeroIdiom);
+ } else {
+ NewIS->getDefs().emplace_back(WD, RegID,
+ /* ClearsSuperRegs */ WriteMask[WriteIndex],
+ /* WritesZero */ IsZeroIdiom);
+ ++Idx;
+ }
++WriteIndex;
}
+ if (IsInstRecycled && Idx < NewIS->getDefs().size())
+ NewIS->getDefs().pop_back_n(NewIS->getDefs().size() - Idx);
- return std::move(NewIS);
+ if (IsInstRecycled)
+ return llvm::make_error<RecycledInstErr>(NewIS);
+ else
+ return std::move(CreatedIS);
}
} // namespace mca
} // namespace llvm
diff --git a/llvm/lib/MCA/Instruction.cpp b/llvm/lib/MCA/Instruction.cpp
index e658b869a67e..d4adfce59713 100644
--- a/llvm/lib/MCA/Instruction.cpp
+++ b/llvm/lib/MCA/Instruction.cpp
@@ -148,6 +148,18 @@ const CriticalDependency &Instruction::computeCriticalRegDep() {
return CriticalRegDep;
}
+void Instruction::reset() {
+ // Note that this won't clear read/write descriptors
+ // or other non-trivial fields
+ Stage = IS_INVALID;
+ CyclesLeft = UNKNOWN_CYCLES;
+ clearOptimizableMove();
+ RCUTokenID = 0;
+ LSUTokenID = 0;
+ CriticalResourceMask = 0;
+ IsEliminated = false;
+}
+
void Instruction::dispatch(unsigned RCUToken) {
assert(Stage == IS_INVALID);
Stage = IS_DISPATCHED;
diff --git a/llvm/lib/MCA/Pipeline.cpp b/llvm/lib/MCA/Pipeline.cpp
index 22b9d0799f77..c94fe1422a69 100644
--- a/llvm/lib/MCA/Pipeline.cpp
+++ b/llvm/lib/MCA/Pipeline.cpp
@@ -38,7 +38,8 @@ Expected<unsigned> Pipeline::run() {
assert(!Stages.empty() && "Unexpected empty pipeline found!");
do {
- notifyCycleBegin();
+ if (!isPaused())
+ notifyCycleBegin();
if (Error Err = runCycle())
return std::move(Err);
notifyCycleEnd();
@@ -53,15 +54,25 @@ Error Pipeline::runCycle() {
// Update stages before we start processing new instructions.
for (auto I = Stages.rbegin(), E = Stages.rend(); I != E && !Err; ++I) {
const std::unique_ptr<Stage> &S = *I;
- Err = S->cycleStart();
+ if (isPaused())
+ Err = S->cycleResume();
+ else
+ Err = S->cycleStart();
}
+ CurrentState = State::Started;
+
// Now fetch and execute new instructions.
InstRef IR;
Stage &FirstStage = *Stages[0];
while (!Err && FirstStage.isAvailable(IR))
Err = FirstStage.execute(IR);
+ if (Err.isA<InstStreamPause>()) {
+ CurrentState = State::Paused;
+ return Err;
+ }
+
// Update stages in preparation for a new cycle.
for (const std::unique_ptr<Stage> &S : Stages) {
Err = S->cycleEnd();
diff --git a/llvm/lib/MCA/Stages/DispatchStage.cpp b/llvm/lib/MCA/Stages/DispatchStage.cpp
index 66228bd5a862..10e433bf1689 100644
--- a/llvm/lib/MCA/Stages/DispatchStage.cpp
+++ b/llvm/lib/MCA/Stages/DispatchStage.cpp
@@ -78,7 +78,6 @@ bool DispatchStage::canDispatch(const InstRef &IR) const {
Error DispatchStage::dispatch(InstRef IR) {
assert(!CarryOver && "Cannot dispatch another instruction!");
Instruction &IS = *IR.getInstruction();
- const InstrDesc &Desc = IS.getDesc();
const unsigned NumMicroOps = IS.getNumMicroOps();
if (NumMicroOps > DispatchWidth) {
assert(AvailableEntries == DispatchWidth);
@@ -91,7 +90,7 @@ Error DispatchStage::dispatch(InstRef IR) {
}
// Check if this instructions ends the dispatch group.
- if (Desc.EndGroup)
+ if (IS.getEndGroup())
AvailableEntries = 0;
// Check if this is an optimizable reg-reg move or an XCHG-like instruction.
@@ -159,12 +158,11 @@ bool DispatchStage::isAvailable(const InstRef &IR) const {
const Instruction &Inst = *IR.getInstruction();
unsigned NumMicroOps = Inst.getNumMicroOps();
- const InstrDesc &Desc = Inst.getDesc();
unsigned Required = std::min(NumMicroOps, DispatchWidth);
if (Required > AvailableEntries)
return false;
- if (Desc.BeginGroup && AvailableEntries != DispatchWidth)
+ if (Inst.getBeginGroup() && AvailableEntries != DispatchWidth)
return false;
// The dispatch logic doesn't internally buffer instructions. It only accepts
diff --git a/llvm/lib/MCA/Stages/EntryStage.cpp b/llvm/lib/MCA/Stages/EntryStage.cpp
index 66135790a4cd..6b3fbb8c6236 100644
--- a/llvm/lib/MCA/Stages/EntryStage.cpp
+++ b/llvm/lib/MCA/Stages/EntryStage.cpp
@@ -19,7 +19,7 @@ namespace llvm {
namespace mca {
bool EntryStage::hasWorkToComplete() const {
- return static_cast<bool>(CurrentInstruction);
+ return static_cast<bool>(CurrentInstruction) || !SM.isEnd();
}
bool EntryStage::isAvailable(const InstRef & /* unused */) const {
@@ -28,15 +28,20 @@ bool EntryStage::isAvailable(const InstRef & /* unused */) const {
return false;
}
-void EntryStage::getNextInstruction() {
+Error EntryStage::getNextInstruction() {
assert(!CurrentInstruction && "There is already an instruction to process!");
- if (!SM.hasNext())
- return;
+ if (!SM.hasNext()) {
+ if (!SM.isEnd())
+ return llvm::make_error<InstStreamPause>();
+ else
+ return llvm::ErrorSuccess();
+ }
SourceRef SR = SM.peekNext();
std::unique_ptr<Instruction> Inst = std::make_unique<Instruction>(SR.second);
CurrentInstruction = InstRef(SR.first, Inst.get());
Instructions.emplace_back(std::move(Inst));
SM.updateNext();
+ return llvm::ErrorSuccess();
}
llvm::Error EntryStage::execute(InstRef & /*unused */) {
@@ -46,16 +51,20 @@ llvm::Error EntryStage::execute(InstRef & /*unused */) {
// Move the program counter.
CurrentInstruction.invalidate();
- getNextInstruction();
- return llvm::ErrorSuccess();
+ return getNextInstruction();
}
llvm::Error EntryStage::cycleStart() {
if (!CurrentInstruction)
- getNextInstruction();
+ return getNextInstruction();
return llvm::ErrorSuccess();
}
+llvm::Error EntryStage::cycleResume() {
+ assert(!CurrentInstruction);
+ return getNextInstruction();
+}
+
llvm::Error EntryStage::cycleEnd() {
// Find the first instruction which hasn't been retired.
auto Range = make_range(&Instructions[NumRetired], Instructions.end());
diff --git a/llvm/lib/MCA/Stages/ExecuteStage.cpp b/llvm/lib/MCA/Stages/ExecuteStage.cpp
index 2b11f73b19df..369e2f5a4ef1 100644
--- a/llvm/lib/MCA/Stages/ExecuteStage.cpp
+++ b/llvm/lib/MCA/Stages/ExecuteStage.cpp
@@ -165,8 +165,8 @@ static void verifyInstructionEliminated(const InstRef &IR) {
// Ensure that instructions eliminated at register renaming stage are in a
// consistent state.
- const InstrDesc &Desc = Inst.getDesc();
- assert(!Desc.MayLoad && !Desc.MayStore && "Cannot eliminate a memory op!");
+ assert(!Inst.getMayLoad() && !Inst.getMayStore() &&
+ "Cannot eliminate a memory op!");
}
#endif
diff --git a/llvm/lib/MCA/Stages/InOrderIssueStage.cpp b/llvm/lib/MCA/Stages/InOrderIssueStage.cpp
index abfbc80f17c9..0f1737dc3cbc 100644
--- a/llvm/lib/MCA/Stages/InOrderIssueStage.cpp
+++ b/llvm/lib/MCA/Stages/InOrderIssueStage.cpp
@@ -63,7 +63,6 @@ bool InOrderIssueStage::isAvailable(const InstRef &IR) const {
const Instruction &Inst = *IR.getInstruction();
unsigned NumMicroOps = Inst.getNumMicroOps();
- const InstrDesc &Desc = Inst.getDesc();
bool ShouldCarryOver = NumMicroOps > getIssueWidth();
if (Bandwidth < NumMicroOps && !ShouldCarryOver)
@@ -71,7 +70,7 @@ bool InOrderIssueStage::isAvailable(const InstRef &IR) const {
// Instruction with BeginGroup must be the first instruction to be issued in a
// cycle.
- if (Desc.BeginGroup && NumIssued != 0)
+ if (Inst.getBeginGroup() && NumIssued != 0)
return false;
return true;
@@ -140,7 +139,7 @@ bool InOrderIssueStage::canExecute(const InstRef &IR) {
}
if (LastWriteBackCycle) {
- if (!IR.getInstruction()->getDesc().RetireOOO) {
+ if (!IR.getInstruction()->getRetireOOO()) {
unsigned NextWriteBackCycle = findFirstWriteBackCycle(IR);
// Delay the instruction to ensure that writes happen in program order.
if (NextWriteBackCycle < LastWriteBackCycle) {
@@ -254,7 +253,7 @@ llvm::Error InOrderIssueStage::tryIssue(InstRef &IR) {
LLVM_DEBUG(dbgs() << "[N] Carry over #" << IR << " \n");
} else {
NumIssued += NumMicroOps;
- Bandwidth = Desc.EndGroup ? 0 : Bandwidth - NumMicroOps;
+ Bandwidth = IS.getEndGroup() ? 0 : Bandwidth - NumMicroOps;
}
// If the instruction has a latency of 0, we need to handle
@@ -272,7 +271,7 @@ llvm::Error InOrderIssueStage::tryIssue(InstRef &IR) {
IssuedInst.push_back(IR);
- if (!IR.getInstruction()->getDesc().RetireOOO)
+ if (!IR.getInstruction()->getRetireOOO())
LastWriteBackCycle = IS.getCyclesLeft();
return llvm::ErrorSuccess();
@@ -325,7 +324,7 @@ void InOrderIssueStage::updateCarriedOver() {
LLVM_DEBUG(dbgs() << "[N] Carry over (complete) #" << CarriedOver << " \n");
- if (CarriedOver.getInstruction()->getDesc().EndGroup)
+ if (CarriedOver.getInstruction()->getEndGroup())
Bandwidth = 0;
else
Bandwidth -= CarryOver;
diff --git a/llvm/lib/MCA/Stages/Stage.cpp b/llvm/lib/MCA/Stages/Stage.cpp
index ed512ac9711c..5613d4d6bd07 100644
--- a/llvm/lib/MCA/Stages/Stage.cpp
+++ b/llvm/lib/MCA/Stages/Stage.cpp
@@ -24,5 +24,6 @@ void Stage::addListener(HWEventListener *Listener) {
Listeners.insert(Listener);
}
+char InstStreamPause::ID = 0;
} // namespace mca
} // namespace llvm
diff --git a/llvm/lib/ObjCopy/Archive.cpp b/llvm/lib/ObjCopy/Archive.cpp
new file mode 100644
index 000000000000..742ca0b890cf
--- /dev/null
+++ b/llvm/lib/ObjCopy/Archive.cpp
@@ -0,0 +1,110 @@
+//===- Archive.cpp --------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "Archive.h"
+#include "llvm/ObjCopy/CommonConfig.h"
+#include "llvm/ObjCopy/MultiFormatConfig.h"
+#include "llvm/ObjCopy/ObjCopy.h"
+#include "llvm/Object/Error.h"
+#include "llvm/Object/MachO.h"
+#include "llvm/Support/FileOutputBuffer.h"
+#include "llvm/Support/SmallVectorMemoryBuffer.h"
+
+namespace llvm {
+namespace objcopy {
+
+using namespace llvm::object;
+
+Expected<std::vector<NewArchiveMember>>
+createNewArchiveMembers(const MultiFormatConfig &Config, const Archive &Ar) {
+ std::vector<NewArchiveMember> NewArchiveMembers;
+ Error Err = Error::success();
+ for (const Archive::Child &Child : Ar.children(Err)) {
+ Expected<StringRef> ChildNameOrErr = Child.getName();
+ if (!ChildNameOrErr)
+ return createFileError(Ar.getFileName(), ChildNameOrErr.takeError());
+
+ Expected<std::unique_ptr<Binary>> ChildOrErr = Child.getAsBinary();
+ if (!ChildOrErr)
+ return createFileError(Ar.getFileName() + "(" + *ChildNameOrErr + ")",
+ ChildOrErr.takeError());
+
+ SmallVector<char, 0> Buffer;
+ raw_svector_ostream MemStream(Buffer);
+
+ if (Error E = executeObjcopyOnBinary(Config, *ChildOrErr->get(), MemStream))
+ return std::move(E);
+
+ Expected<NewArchiveMember> Member = NewArchiveMember::getOldMember(
+ Child, Config.getCommonConfig().DeterministicArchives);
+ if (!Member)
+ return createFileError(Ar.getFileName(), Member.takeError());
+
+ Member->Buf = std::make_unique<SmallVectorMemoryBuffer>(
+ std::move(Buffer), ChildNameOrErr.get());
+ Member->MemberName = Member->Buf->getBufferIdentifier();
+ NewArchiveMembers.push_back(std::move(*Member));
+ }
+ if (Err)
+ return createFileError(Config.getCommonConfig().InputFilename,
+ std::move(Err));
+ return std::move(NewArchiveMembers);
+}
+
+// For regular archives this function simply calls llvm::writeArchive,
+// For thin archives it writes the archive file itself as well as its members.
+static Error deepWriteArchive(StringRef ArcName,
+ ArrayRef<NewArchiveMember> NewMembers,
+ bool WriteSymtab, object::Archive::Kind Kind,
+ bool Deterministic, bool Thin) {
+ if (Kind == object::Archive::K_BSD && !NewMembers.empty() &&
+ NewMembers.front().detectKindFromObject() == object::Archive::K_DARWIN)
+ Kind = object::Archive::K_DARWIN;
+
+ if (Error E = writeArchive(ArcName, NewMembers, WriteSymtab, Kind,
+ Deterministic, Thin))
+ return createFileError(ArcName, std::move(E));
+
+ if (!Thin)
+ return Error::success();
+
+ for (const NewArchiveMember &Member : NewMembers) {
+ // For regular files (as is the case for deepWriteArchive),
+ // FileOutputBuffer::create will return OnDiskBuffer.
+ // OnDiskBuffer uses a temporary file and then renames it. So in reality
+ // there is no inefficiency / duplicated in-memory buffers in this case. For
+ // now in-memory buffers can not be completely avoided since
+ // NewArchiveMember still requires them even though writeArchive does not
+ // write them on disk.
+ Expected<std::unique_ptr<FileOutputBuffer>> FB =
+ FileOutputBuffer::create(Member.MemberName, Member.Buf->getBufferSize(),
+ FileOutputBuffer::F_executable);
+ if (!FB)
+ return FB.takeError();
+ std::copy(Member.Buf->getBufferStart(), Member.Buf->getBufferEnd(),
+ (*FB)->getBufferStart());
+ if (Error E = (*FB)->commit())
+ return E;
+ }
+ return Error::success();
+}
+
+Error executeObjcopyOnArchive(const MultiFormatConfig &Config,
+ const object::Archive &Ar) {
+ Expected<std::vector<NewArchiveMember>> NewArchiveMembersOrErr =
+ createNewArchiveMembers(Config, Ar);
+ if (!NewArchiveMembersOrErr)
+ return NewArchiveMembersOrErr.takeError();
+ const CommonConfig &CommonConfig = Config.getCommonConfig();
+ return deepWriteArchive(CommonConfig.OutputFilename, *NewArchiveMembersOrErr,
+ Ar.hasSymbolTable(), Ar.kind(),
+ CommonConfig.DeterministicArchives, Ar.isThin());
+}
+
+} // end namespace objcopy
+} // end namespace llvm
diff --git a/llvm/tools/llvm-objcopy/llvm-objcopy.h b/llvm/lib/ObjCopy/Archive.h
index 182c95dc64c8..08aae563505c 100644
--- a/llvm/tools/llvm-objcopy/llvm-objcopy.h
+++ b/llvm/lib/ObjCopy/Archive.h
@@ -1,4 +1,4 @@
-//===- llvm-objcopy.h -------------------------------------------*- C++ -*-===//
+//===- Archive.h ------------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,24 +6,21 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_OBJCOPY_OBJCOPY_H
-#define LLVM_TOOLS_OBJCOPY_OBJCOPY_H
+#ifndef LLVM_LIB_OBJCOPY_ARCHIVE_H
+#define LLVM_LIB_OBJCOPY_ARCHIVE_H
+#include "llvm/Object/ArchiveWriter.h"
#include "llvm/Support/Error.h"
-#include "llvm/Support/raw_ostream.h"
+#include <vector>
namespace llvm {
-
-struct NewArchiveMember;
-
-namespace object {
-
-class Archive;
-
-} // end namespace object
-
namespace objcopy {
+
class MultiFormatConfig;
+
+/// Applies the transformations described by \p Config to
+/// each member in archive \p Ar.
+/// \returns Vector of transformed archive members.
Expected<std::vector<NewArchiveMember>>
createNewArchiveMembers(const MultiFormatConfig &Config,
const object::Archive &Ar);
@@ -31,4 +28,4 @@ createNewArchiveMembers(const MultiFormatConfig &Config,
} // end namespace objcopy
} // end namespace llvm
-#endif // LLVM_TOOLS_OBJCOPY_OBJCOPY_H
+#endif // LLVM_LIB_OBJCOPY_ARCHIVE_H
diff --git a/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp b/llvm/lib/ObjCopy/COFF/COFFObjcopy.cpp
index e0039cd3a675..cda93ce0fb3c 100644
--- a/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp
+++ b/llvm/lib/ObjCopy/COFF/COFFObjcopy.cpp
@@ -6,12 +6,12 @@
//
//===----------------------------------------------------------------------===//
-#include "COFFObjcopy.h"
-#include "COFFConfig.h"
-#include "CommonConfig.h"
-#include "Object.h"
-#include "Reader.h"
-#include "Writer.h"
+#include "llvm/ObjCopy/COFF/COFFObjcopy.h"
+#include "COFFObject.h"
+#include "COFFReader.h"
+#include "COFFWriter.h"
+#include "llvm/ObjCopy/COFF/COFFConfig.h"
+#include "llvm/ObjCopy/CommonConfig.h"
#include "llvm/Object/Binary.h"
#include "llvm/Object/COFF.h"
@@ -230,27 +230,41 @@ static Error handleArgs(const CommonConfig &Config,
It->second.NewFlags, Sec.Header.Characteristics);
}
- for (const auto &Flag : Config.AddSection) {
- StringRef SecName, FileName;
- std::tie(SecName, FileName) = Flag.split("=");
-
- auto BufOrErr = MemoryBuffer::getFile(FileName);
- if (!BufOrErr)
- return createFileError(FileName, errorCodeToError(BufOrErr.getError()));
- auto Buf = std::move(*BufOrErr);
-
+ for (const NewSectionInfo &NewSection : Config.AddSection) {
uint32_t Characteristics;
- const auto It = Config.SetSectionFlags.find(SecName);
+ const auto It = Config.SetSectionFlags.find(NewSection.SectionName);
if (It != Config.SetSectionFlags.end())
Characteristics = flagsToCharacteristics(It->second.NewFlags, 0);
else
Characteristics = IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_ALIGN_1BYTES;
- addSection(
- Obj, SecName,
- makeArrayRef(reinterpret_cast<const uint8_t *>(Buf->getBufferStart()),
- Buf->getBufferSize()),
- Characteristics);
+ addSection(Obj, NewSection.SectionName,
+ makeArrayRef(reinterpret_cast<const uint8_t *>(
+ NewSection.SectionData->getBufferStart()),
+ NewSection.SectionData->getBufferSize()),
+ Characteristics);
+ }
+
+ for (const NewSectionInfo &NewSection : Config.UpdateSection) {
+ auto It = llvm::find_if(Obj.getMutableSections(), [&](auto &Sec) {
+ return Sec.Name == NewSection.SectionName;
+ });
+ if (It == Obj.getMutableSections().end())
+ return createStringError(errc::invalid_argument,
+ "could not find section with name '%s'",
+ NewSection.SectionName.str().c_str());
+ size_t ContentSize = It->getContents().size();
+ if (!ContentSize)
+ return createStringError(
+ errc::invalid_argument,
+ "section '%s' cannot be updated because it does not have contents",
+ NewSection.SectionName.str().c_str());
+ if (ContentSize < NewSection.SectionData->getBufferSize())
+ return createStringError(
+ errc::invalid_argument,
+ "new section cannot be larger than previous section");
+ It->setOwnedContents({NewSection.SectionData->getBufferStart(),
+ NewSection.SectionData->getBufferEnd()});
}
if (!Config.AddGnuDebugLink.empty())
diff --git a/llvm/tools/llvm-objcopy/COFF/Object.cpp b/llvm/lib/ObjCopy/COFF/COFFObject.cpp
index ec2628c7eca9..1d27b7eaa891 100644
--- a/llvm/tools/llvm-objcopy/COFF/Object.cpp
+++ b/llvm/lib/ObjCopy/COFF/COFFObject.cpp
@@ -1,4 +1,4 @@
-//===- Object.cpp ---------------------------------------------------------===//
+//===- COFFObject.cpp -----------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
-#include "Object.h"
+#include "COFFObject.h"
#include "llvm/ADT/DenseSet.h"
#include <algorithm>
diff --git a/llvm/tools/llvm-objcopy/COFF/Object.h b/llvm/lib/ObjCopy/COFF/COFFObject.h
index 0e854b58cbdb..66c0a19429ce 100644
--- a/llvm/tools/llvm-objcopy/COFF/Object.h
+++ b/llvm/lib/ObjCopy/COFF/COFFObject.h
@@ -1,4 +1,4 @@
-//===- Object.h -------------------------------------------------*- C++ -*-===//
+//===- COFFObject.h ---------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_OBJCOPY_COFF_OBJECT_H
-#define LLVM_TOOLS_OBJCOPY_COFF_OBJECT_H
+#ifndef LLVM_LIB_OBJCOPY_COFF_COFFOBJECT_H
+#define LLVM_LIB_OBJCOPY_COFF_COFFOBJECT_H
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
@@ -54,6 +54,7 @@ struct Section {
void setOwnedContents(std::vector<uint8_t> &&Data) {
ContentsRef = ArrayRef<uint8_t>();
OwnedContents = std::move(Data);
+ Header.SizeOfRawData = OwnedContents.size();
}
void clearContents() {
@@ -208,4 +209,4 @@ void copyPeHeader(PeHeader1Ty &Dest, const PeHeader2Ty &Src) {
} // end namespace objcopy
} // end namespace llvm
-#endif // LLVM_TOOLS_OBJCOPY_COFF_OBJECT_H
+#endif // LLVM_LIB_OBJCOPY_COFF_COFFOBJECT_H
diff --git a/llvm/tools/llvm-objcopy/COFF/Reader.cpp b/llvm/lib/ObjCopy/COFF/COFFReader.cpp
index d1beacb3bd67..44bf303078dd 100644
--- a/llvm/tools/llvm-objcopy/COFF/Reader.cpp
+++ b/llvm/lib/ObjCopy/COFF/COFFReader.cpp
@@ -1,4 +1,4 @@
-//===- Reader.cpp ---------------------------------------------------------===//
+//===- COFFReader.cpp -----------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#include "Reader.h"
-#include "Object.h"
+#include "COFFReader.h"
+#include "COFFObject.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/COFF.h"
diff --git a/llvm/tools/llvm-objcopy/COFF/Reader.h b/llvm/lib/ObjCopy/COFF/COFFReader.h
index 48c050b6ea11..b4957f844392 100644
--- a/llvm/tools/llvm-objcopy/COFF/Reader.h
+++ b/llvm/lib/ObjCopy/COFF/COFFReader.h
@@ -1,4 +1,4 @@
-//===- Reader.h -------------------------------------------------*- C++ -*-===//
+//===- COFFReader.h ---------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_OBJCOPY_COFF_READER_H
-#define LLVM_TOOLS_OBJCOPY_COFF_READER_H
+#ifndef LLVM_LIB_OBJCOPY_COFF_COFFREADER_H
+#define LLVM_LIB_OBJCOPY_COFF_COFFREADER_H
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/Object/COFF.h"
@@ -38,4 +38,4 @@ public:
} // end namespace objcopy
} // end namespace llvm
-#endif // LLVM_TOOLS_OBJCOPY_COFF_READER_H
+#endif // LLVM_LIB_OBJCOPY_COFF_COFFREADER_H
diff --git a/llvm/tools/llvm-objcopy/COFF/Writer.cpp b/llvm/lib/ObjCopy/COFF/COFFWriter.cpp
index cbd0e4261238..88eb4d14ba25 100644
--- a/llvm/tools/llvm-objcopy/COFF/Writer.cpp
+++ b/llvm/lib/ObjCopy/COFF/COFFWriter.cpp
@@ -1,4 +1,4 @@
-//===- Writer.cpp ---------------------------------------------------------===//
+//===- COFFWriter.cpp -----------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#include "Writer.h"
-#include "Object.h"
+#include "COFFWriter.h"
+#include "COFFObject.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/COFF.h"
@@ -116,7 +116,7 @@ void COFFWriter::layoutSections() {
}
}
-size_t COFFWriter::finalizeStringTable() {
+Expected<size_t> COFFWriter::finalizeStringTable() {
for (const auto &S : Obj.getSections())
if (S.Name.size() > COFF::NameSize)
StrTabBuilder.add(S.Name);
@@ -129,11 +129,16 @@ size_t COFFWriter::finalizeStringTable() {
for (auto &S : Obj.getMutableSections()) {
memset(S.Header.Name, 0, sizeof(S.Header.Name));
- if (S.Name.size() > COFF::NameSize) {
- snprintf(S.Header.Name, sizeof(S.Header.Name), "/%d",
- (int)StrTabBuilder.getOffset(S.Name));
- } else {
+ if (S.Name.size() <= COFF::NameSize) {
+ // Short names can go in the field directly.
memcpy(S.Header.Name, S.Name.data(), S.Name.size());
+ } else {
+ // Offset of the section name in the string table.
+ size_t Offset = StrTabBuilder.getOffset(S.Name);
+ if (!COFF::encodeSectionName(S.Header.Name, Offset))
+ return createStringError(object_error::invalid_section_index,
+ "COFF string table is greater than 64GB, "
+ "unable to encode section name offset");
}
}
for (auto &S : Obj.getMutableSymbols()) {
@@ -219,7 +224,11 @@ Error COFFWriter::finalize(bool IsBigObj) {
Obj.PeHeader.CheckSum = 0;
}
- size_t StrTabSize = finalizeStringTable();
+ Expected<size_t> StrTabSizeOrErr = finalizeStringTable();
+ if (!StrTabSizeOrErr)
+ return StrTabSizeOrErr.takeError();
+
+ size_t StrTabSize = *StrTabSizeOrErr;
size_t PointerToSymbolTable = FileSize;
// StrTabSize <= 4 is the size of an empty string table, only consisting
diff --git a/llvm/tools/llvm-objcopy/COFF/Writer.h b/llvm/lib/ObjCopy/COFF/COFFWriter.h
index eed43b3e5814..b7dca69e9a81 100644
--- a/llvm/tools/llvm-objcopy/COFF/Writer.h
+++ b/llvm/lib/ObjCopy/COFF/COFFWriter.h
@@ -1,4 +1,4 @@
-//===- Writer.h -------------------------------------------------*- C++ -*-===//
+//===- COFFWriter.h ---------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_OBJCOPY_COFF_WRITER_H
-#define LLVM_TOOLS_OBJCOPY_COFF_WRITER_H
+#ifndef LLVM_LIB_OBJCOPY_COFF_COFFWRITER_H
+#define LLVM_LIB_OBJCOPY_COFF_COFFWRITER_H
#include "llvm/MC/StringTableBuilder.h"
#include "llvm/Support/Error.h"
@@ -35,7 +35,7 @@ class COFFWriter {
Error finalizeRelocTargets();
Error finalizeSymbolContents();
void layoutSections();
- size_t finalizeStringTable();
+ Expected<size_t> finalizeStringTable();
Error finalize(bool IsBigObj);
@@ -60,4 +60,4 @@ public:
} // end namespace objcopy
} // end namespace llvm
-#endif // LLVM_TOOLS_OBJCOPY_COFF_WRITER_H
+#endif // LLVM_LIB_OBJCOPY_COFF_COFFWRITER_H
diff --git a/llvm/lib/ObjCopy/CommonConfig.cpp b/llvm/lib/ObjCopy/CommonConfig.cpp
new file mode 100644
index 000000000000..e85715d0c44c
--- /dev/null
+++ b/llvm/lib/ObjCopy/CommonConfig.cpp
@@ -0,0 +1,50 @@
+//===- CommonConfig.cpp ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ObjCopy/CommonConfig.h"
+
+namespace llvm {
+namespace objcopy {
+
+Expected<NameOrPattern>
+NameOrPattern::create(StringRef Pattern, MatchStyle MS,
+ function_ref<Error(Error)> ErrorCallback) {
+ switch (MS) {
+ case MatchStyle::Literal:
+ return NameOrPattern(Pattern);
+ case MatchStyle::Wildcard: {
+ SmallVector<char, 32> Data;
+ bool IsPositiveMatch = true;
+ if (Pattern[0] == '!') {
+ IsPositiveMatch = false;
+ Pattern = Pattern.drop_front();
+ }
+ Expected<GlobPattern> GlobOrErr = GlobPattern::create(Pattern);
+
+ // If we couldn't create it as a glob, report the error, but try again
+ // with a literal if the error reporting is non-fatal.
+ if (!GlobOrErr) {
+ if (Error E = ErrorCallback(GlobOrErr.takeError()))
+ return std::move(E);
+ return create(Pattern, MatchStyle::Literal, ErrorCallback);
+ }
+
+ return NameOrPattern(std::make_shared<GlobPattern>(*GlobOrErr),
+ IsPositiveMatch);
+ }
+ case MatchStyle::Regex: {
+ SmallVector<char, 32> Data;
+ return NameOrPattern(std::make_shared<Regex>(
+ ("^" + Pattern.ltrim('^').rtrim('$') + "$").toStringRef(Data)));
+ }
+ }
+ llvm_unreachable("Unhandled llvm.objcopy.MatchStyle enum");
+}
+
+} // end namespace objcopy
+} // end namespace llvm
diff --git a/llvm/lib/ObjCopy/ConfigManager.cpp b/llvm/lib/ObjCopy/ConfigManager.cpp
new file mode 100644
index 000000000000..9d8883a15c0b
--- /dev/null
+++ b/llvm/lib/ObjCopy/ConfigManager.cpp
@@ -0,0 +1,97 @@
+//===- ConfigManager.cpp --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ObjCopy/ConfigManager.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace objcopy {
+
+Expected<const COFFConfig &> ConfigManager::getCOFFConfig() const {
+ if (!Common.SplitDWO.empty() || !Common.SymbolsPrefix.empty() ||
+ !Common.AllocSectionsPrefix.empty() || !Common.DumpSection.empty() ||
+ !Common.KeepSection.empty() || !Common.SymbolsToGlobalize.empty() ||
+ !Common.SymbolsToKeep.empty() || !Common.SymbolsToLocalize.empty() ||
+ !Common.SymbolsToWeaken.empty() || !Common.SymbolsToKeepGlobal.empty() ||
+ !Common.SectionsToRename.empty() || !Common.SetSectionAlignment.empty() ||
+ Common.ExtractDWO || Common.PreserveDates || Common.StripDWO ||
+ Common.StripNonAlloc || Common.StripSections || Common.Weaken ||
+ Common.DecompressDebugSections ||
+ Common.DiscardMode == DiscardType::Locals || !Common.SymbolsToAdd.empty())
+ return createStringError(llvm::errc::invalid_argument,
+ "option is not supported for COFF");
+
+ return COFF;
+}
+
+Expected<const MachOConfig &> ConfigManager::getMachOConfig() const {
+ if (!Common.SplitDWO.empty() || !Common.SymbolsPrefix.empty() ||
+ !Common.AllocSectionsPrefix.empty() || !Common.KeepSection.empty() ||
+ !Common.SymbolsToGlobalize.empty() || !Common.SymbolsToKeep.empty() ||
+ !Common.SymbolsToLocalize.empty() || !Common.SymbolsToWeaken.empty() ||
+ !Common.SymbolsToKeepGlobal.empty() || !Common.SectionsToRename.empty() ||
+ !Common.UnneededSymbolsToRemove.empty() ||
+ !Common.SetSectionAlignment.empty() || !Common.SetSectionFlags.empty() ||
+ Common.ExtractDWO || Common.PreserveDates || Common.StripAllGNU ||
+ Common.StripDWO || Common.StripNonAlloc || Common.StripSections ||
+ Common.Weaken || Common.DecompressDebugSections || Common.StripUnneeded ||
+ Common.DiscardMode == DiscardType::Locals || !Common.SymbolsToAdd.empty())
+ return createStringError(llvm::errc::invalid_argument,
+ "option is not supported for MachO");
+
+ return MachO;
+}
+
+Expected<const WasmConfig &> ConfigManager::getWasmConfig() const {
+ if (!Common.AddGnuDebugLink.empty() || Common.ExtractPartition ||
+ !Common.SplitDWO.empty() || !Common.SymbolsPrefix.empty() ||
+ !Common.AllocSectionsPrefix.empty() ||
+ Common.DiscardMode != DiscardType::None || !Common.SymbolsToAdd.empty() ||
+ !Common.SymbolsToGlobalize.empty() || !Common.SymbolsToLocalize.empty() ||
+ !Common.SymbolsToKeep.empty() || !Common.SymbolsToRemove.empty() ||
+ !Common.UnneededSymbolsToRemove.empty() ||
+ !Common.SymbolsToWeaken.empty() || !Common.SymbolsToKeepGlobal.empty() ||
+ !Common.SectionsToRename.empty() || !Common.SetSectionAlignment.empty() ||
+ !Common.SetSectionFlags.empty() || !Common.SymbolsToRename.empty())
+ return createStringError(llvm::errc::invalid_argument,
+ "only flags for section dumping, removal, and "
+ "addition are supported");
+
+ return Wasm;
+}
+
+Expected<const XCOFFConfig &> ConfigManager::getXCOFFConfig() const {
+ if (!Common.AddGnuDebugLink.empty() || Common.ExtractPartition ||
+ !Common.SplitDWO.empty() || !Common.SymbolsPrefix.empty() ||
+ !Common.AllocSectionsPrefix.empty() ||
+ Common.DiscardMode != DiscardType::None || !Common.AddSection.empty() ||
+ !Common.DumpSection.empty() || !Common.SymbolsToAdd.empty() ||
+ !Common.KeepSection.empty() || !Common.OnlySection.empty() ||
+ !Common.ToRemove.empty() || !Common.SymbolsToGlobalize.empty() ||
+ !Common.SymbolsToKeep.empty() || !Common.SymbolsToLocalize.empty() ||
+ !Common.SymbolsToRemove.empty() ||
+ !Common.UnneededSymbolsToRemove.empty() ||
+ !Common.SymbolsToWeaken.empty() || !Common.SymbolsToKeepGlobal.empty() ||
+ !Common.SectionsToRename.empty() || !Common.SetSectionAlignment.empty() ||
+ !Common.SetSectionFlags.empty() || !Common.SymbolsToRename.empty() ||
+ Common.ExtractDWO || Common.ExtractMainPartition ||
+ Common.OnlyKeepDebug || Common.PreserveDates || Common.StripAllGNU ||
+ Common.StripDWO || Common.StripDebug || Common.StripNonAlloc ||
+ Common.StripSections || Common.Weaken || Common.StripUnneeded ||
+ Common.DecompressDebugSections) {
+ return createStringError(
+ llvm::errc::invalid_argument,
+ "no flags are supported yet, only basic copying is allowed");
+ }
+
+ return XCOFF;
+}
+
+} // end namespace objcopy
+} // end namespace llvm
diff --git a/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp b/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp
index f8521fa0d5b7..2d388f8a867e 100644
--- a/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp
+++ b/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp
@@ -6,11 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#include "ELFObjcopy.h"
-#include "CommonConfig.h"
-#include "ELFConfig.h"
-#include "Object.h"
-#include "llvm-objcopy.h"
+#include "llvm/ObjCopy/ELF/ELFObjcopy.h"
+#include "ELFObject.h"
#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Optional.h"
@@ -20,6 +17,8 @@
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/ObjCopy/CommonConfig.h"
+#include "llvm/ObjCopy/ELF/ELFConfig.h"
#include "llvm/Object/Binary.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Object/ELFTypes.h"
@@ -54,8 +53,7 @@ using namespace llvm::object;
using SectionPred = std::function<bool(const SectionBase &Sec)>;
static bool isDebugSection(const SectionBase &Sec) {
- return StringRef(Sec.Name).startswith(".debug") ||
- StringRef(Sec.Name).startswith(".zdebug") || Sec.Name == ".gdb_index";
+ return StringRef(Sec.Name).startswith(".debug") || Sec.Name == ".gdb_index";
}
static bool isDWOSection(const SectionBase &Sec) {
@@ -169,7 +167,7 @@ static std::unique_ptr<Writer> createWriter(const CommonConfig &Config,
template <class... Ts>
static Error makeStringError(std::error_code EC, const Twine &Msg,
- Ts &&... Args) {
+ Ts &&...Args) {
std::string FullMsg = (EC.message() + ": " + Msg).str();
return createStringError(EC, FullMsg.c_str(), std::forward<Ts>(Args)...);
}
@@ -303,10 +301,11 @@ static Error updateAndRemoveSymbols(const CommonConfig &Config,
Sym.getShndx() != SHN_UNDEF)
Sym.Binding = STB_GLOBAL;
- if (Config.SymbolsToWeaken.matches(Sym.Name) && Sym.Binding == STB_GLOBAL)
+ // SymbolsToWeaken applies to both STB_GLOBAL and STB_GNU_UNIQUE.
+ if (Config.SymbolsToWeaken.matches(Sym.Name) && Sym.Binding != STB_LOCAL)
Sym.Binding = STB_WEAK;
- if (Config.Weaken && Sym.Binding == STB_GLOBAL &&
+ if (Config.Weaken && Sym.Binding != STB_LOCAL &&
Sym.getShndx() != SHN_UNDEF)
Sym.Binding = STB_WEAK;
@@ -510,12 +509,8 @@ static Error replaceAndRemoveSections(const CommonConfig &Config,
if (Error Err = replaceDebugSections(
Obj, isCompressable,
[&Config, &Obj](const SectionBase *S) -> Expected<SectionBase *> {
- Expected<CompressedSection> NewSection =
- CompressedSection::create(*S, Config.CompressionType);
- if (!NewSection)
- return NewSection.takeError();
-
- return &Obj.addSection<CompressedSection>(std::move(*NewSection));
+ return &Obj.addSection<CompressedSection>(
+ CompressedSection(*S, Config.CompressionType));
}))
return Err;
} else if (Config.DecompressDebugSections) {
@@ -587,19 +582,12 @@ static void addSymbol(Object &Obj, const NewSymbolInfo &SymInfo,
}
static Error
-handleUserSection(StringRef Flag,
+handleUserSection(const NewSectionInfo &NewSection,
function_ref<Error(StringRef, ArrayRef<uint8_t>)> F) {
- std::pair<StringRef, StringRef> SecPair = Flag.split("=");
- StringRef SecName = SecPair.first;
- StringRef File = SecPair.second;
- ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr = MemoryBuffer::getFile(File);
- if (!BufOrErr)
- return createFileError(File, errorCodeToError(BufOrErr.getError()));
- std::unique_ptr<MemoryBuffer> Buf = std::move(*BufOrErr);
- ArrayRef<uint8_t> Data(
- reinterpret_cast<const uint8_t *>(Buf->getBufferStart()),
- Buf->getBufferSize());
- return F(SecName, Data);
+ ArrayRef<uint8_t> Data(reinterpret_cast<const uint8_t *>(
+ NewSection.SectionData->getBufferStart()),
+ NewSection.SectionData->getBufferSize());
+ return F(NewSection.SectionName, Data);
}
// This function handles the high level operations of GNU objcopy including
@@ -650,7 +638,7 @@ static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig,
if (Iter != Config.SectionsToRename.end()) {
const SectionRename &SR = Iter->second;
Sec.Name = std::string(SR.NewName);
- if (SR.NewFlags.hasValue())
+ if (SR.NewFlags)
setSectionFlagsAndType(Sec, SR.NewFlags.getValue());
RenamedSections.insert(&Sec);
} else if (RelocSec && !(Sec.Flags & SHF_ALLOC))
@@ -718,7 +706,7 @@ static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig,
if (Sec.Flags & SHF_ALLOC && Sec.Type != SHT_NOTE)
Sec.Type = SHT_NOBITS;
- for (const auto &Flag : Config.AddSection) {
+ for (const NewSectionInfo &AddedSection : Config.AddSection) {
auto AddSection = [&](StringRef Name, ArrayRef<uint8_t> Data) {
OwnedDataSection &NewSection =
Obj.addSection<OwnedDataSection>(Name, Data);
@@ -726,15 +714,15 @@ static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig,
NewSection.Type = SHT_NOTE;
return Error::success();
};
- if (Error E = handleUserSection(Flag, AddSection))
+ if (Error E = handleUserSection(AddedSection, AddSection))
return E;
}
- for (StringRef Flag : Config.UpdateSection) {
+ for (const NewSectionInfo &NewSection : Config.UpdateSection) {
auto UpdateSection = [&](StringRef Name, ArrayRef<uint8_t> Data) {
return Obj.updateSection(Name, Data);
};
- if (Error E = handleUserSection(Flag, UpdateSection))
+ if (Error E = handleUserSection(NewSection, UpdateSection))
return E;
}
@@ -785,7 +773,7 @@ Error objcopy::elf::executeObjcopyOnIHex(const CommonConfig &Config,
return Obj.takeError();
const ElfType OutputElfType =
- getOutputElfType(Config.OutputArch.getValueOr(MachineInfo()));
+ getOutputElfType(Config.OutputArch.value_or(MachineInfo()));
if (Error E = handleArgs(Config, ELFConfig, **Obj))
return E;
return writeOutput(Config, **Obj, Out, OutputElfType);
@@ -803,7 +791,7 @@ Error objcopy::elf::executeObjcopyOnRawBinary(const CommonConfig &Config,
// Prefer OutputArch (-O<format>) if set, otherwise fallback to BinaryArch
// (-B<arch>).
const ElfType OutputElfType =
- getOutputElfType(Config.OutputArch.getValueOr(MachineInfo()));
+ getOutputElfType(Config.OutputArch.value_or(MachineInfo()));
if (Error E = handleArgs(Config, ELFConfig, **Obj))
return E;
return writeOutput(Config, **Obj, Out, OutputElfType);
diff --git a/llvm/tools/llvm-objcopy/ELF/Object.cpp b/llvm/lib/ObjCopy/ELF/ELFObject.cpp
index 659e12bf0306..b241bd817ff5 100644
--- a/llvm/tools/llvm-objcopy/ELF/Object.cpp
+++ b/llvm/lib/ObjCopy/ELF/ELFObject.cpp
@@ -1,4 +1,4 @@
-//===- Object.cpp ---------------------------------------------------------===//
+//===- ELFObject.cpp ------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
-#include "Object.h"
+#include "ELFObject.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
@@ -519,78 +519,42 @@ Error BinarySectionWriter::visit(const CompressedSection &Sec) {
template <class ELFT>
Error ELFSectionWriter<ELFT>::visit(const CompressedSection &Sec) {
uint8_t *Buf = reinterpret_cast<uint8_t *>(Out.getBufferStart()) + Sec.Offset;
- if (Sec.CompressionType == DebugCompressionType::None) {
+ Elf_Chdr_Impl<ELFT> Chdr;
+ switch (Sec.CompressionType) {
+ case DebugCompressionType::None:
std::copy(Sec.OriginalData.begin(), Sec.OriginalData.end(), Buf);
return Error::success();
- }
-
- if (Sec.CompressionType == DebugCompressionType::GNU) {
- const char *Magic = "ZLIB";
- memcpy(Buf, Magic, strlen(Magic));
- Buf += strlen(Magic);
- const uint64_t DecompressedSize =
- support::endian::read64be(&Sec.DecompressedSize);
- memcpy(Buf, &DecompressedSize, sizeof(DecompressedSize));
- Buf += sizeof(DecompressedSize);
- } else {
- Elf_Chdr_Impl<ELFT> Chdr;
+ case DebugCompressionType::GNU:
+ llvm_unreachable("unexpected zlib-gnu");
+ break;
+ case DebugCompressionType::Z:
Chdr.ch_type = ELF::ELFCOMPRESS_ZLIB;
- Chdr.ch_size = Sec.DecompressedSize;
- Chdr.ch_addralign = Sec.DecompressedAlign;
- memcpy(Buf, &Chdr, sizeof(Chdr));
- Buf += sizeof(Chdr);
+ break;
}
+ Chdr.ch_size = Sec.DecompressedSize;
+ Chdr.ch_addralign = Sec.DecompressedAlign;
+ memcpy(Buf, &Chdr, sizeof(Chdr));
+ Buf += sizeof(Chdr);
std::copy(Sec.CompressedData.begin(), Sec.CompressedData.end(), Buf);
return Error::success();
}
-Expected<CompressedSection>
-CompressedSection::create(const SectionBase &Sec,
- DebugCompressionType CompressionType) {
- Error Err = Error::success();
- CompressedSection Section(Sec, CompressionType, Err);
-
- if (Err)
- return std::move(Err);
-
- return Section;
-}
-Expected<CompressedSection>
-CompressedSection::create(ArrayRef<uint8_t> CompressedData,
- uint64_t DecompressedSize,
- uint64_t DecompressedAlign) {
- return CompressedSection(CompressedData, DecompressedSize, DecompressedAlign);
-}
-
CompressedSection::CompressedSection(const SectionBase &Sec,
- DebugCompressionType CompressionType,
- Error &OutErr)
+ DebugCompressionType CompressionType)
: SectionBase(Sec), CompressionType(CompressionType),
DecompressedSize(Sec.OriginalData.size()), DecompressedAlign(Sec.Align) {
- ErrorAsOutParameter EAO(&OutErr);
-
- if (Error Err = zlib::compress(
- StringRef(reinterpret_cast<const char *>(OriginalData.data()),
- OriginalData.size()),
- CompressedData)) {
- OutErr = createStringError(llvm::errc::invalid_argument,
- "'" + Name + "': " + toString(std::move(Err)));
- return;
- }
-
- size_t ChdrSize;
- if (CompressionType == DebugCompressionType::GNU) {
- Name = ".z" + Sec.Name.substr(1);
- ChdrSize = sizeof("ZLIB") - 1 + sizeof(uint64_t);
- } else {
- Flags |= ELF::SHF_COMPRESSED;
- ChdrSize =
- std::max(std::max(sizeof(object::Elf_Chdr_Impl<object::ELF64LE>),
- sizeof(object::Elf_Chdr_Impl<object::ELF64BE>)),
- std::max(sizeof(object::Elf_Chdr_Impl<object::ELF32LE>),
- sizeof(object::Elf_Chdr_Impl<object::ELF32BE>)));
- }
+ zlib::compress(StringRef(reinterpret_cast<const char *>(OriginalData.data()),
+ OriginalData.size()),
+ CompressedData);
+
+ assert(CompressionType != DebugCompressionType::None);
+ Flags |= ELF::SHF_COMPRESSED;
+ size_t ChdrSize =
+ std::max(std::max(sizeof(object::Elf_Chdr_Impl<object::ELF64LE>),
+ sizeof(object::Elf_Chdr_Impl<object::ELF64BE>)),
+ std::max(sizeof(object::Elf_Chdr_Impl<object::ELF32LE>),
+ sizeof(object::Elf_Chdr_Impl<object::ELF32BE>)));
Size = ChdrSize + CompressedData.size();
Align = 8;
}
@@ -681,6 +645,15 @@ static bool isValidReservedSectionIndex(uint16_t Index, uint16_t Machine) {
return Index == SHN_AMDGPU_LDS;
}
+ if (Machine == EM_MIPS) {
+ switch (Index) {
+ case SHN_MIPS_ACOMMON:
+ case SHN_MIPS_SCOMMON:
+ case SHN_MIPS_SUNDEFINED:
+ return true;
+ }
+ }
+
if (Machine == EM_HEXAGON) {
switch (Index) {
case SHN_HEXAGON_SCOMMON:
@@ -767,8 +740,8 @@ Error SymbolTableSection::removeSectionReferences(
}
void SymbolTableSection::updateSymbols(function_ref<void(Symbol &)> Callable) {
- std::for_each(std::begin(Symbols) + 1, std::end(Symbols),
- [Callable](SymPtr &Sym) { Callable(*Sym); });
+ for (SymPtr &Sym : llvm::drop_begin(Symbols))
+ Callable(*Sym);
std::stable_partition(
std::begin(Symbols), std::end(Symbols),
[](const SymPtr &Sym) { return Sym->Binding == STB_LOCAL; });
@@ -1768,12 +1741,8 @@ Expected<SectionBase &> ELFBuilder<ELFT>::makeSection(const Elf_Shdr &Shdr) {
uint64_t DecompressedSize, DecompressedAlign;
std::tie(DecompressedSize, DecompressedAlign) =
getDecompressedSizeAndAlignment<ELFT>(*Data);
- Expected<CompressedSection> NewSection =
- CompressedSection::create(*Data, DecompressedSize, DecompressedAlign);
- if (!NewSection)
- return NewSection.takeError();
-
- return Obj.addSection<CompressedSection>(std::move(*NewSection));
+ return Obj.addSection<CompressedSection>(
+ CompressedSection(*Data, DecompressedSize, DecompressedAlign));
}
return Obj.addSection<Section>(*Data);
@@ -1813,9 +1782,9 @@ template <class ELFT> Error ELFBuilder<ELFT>::readSectionHeaders() {
Sec->EntrySize = Shdr.sh_entsize;
Sec->Index = Index++;
Sec->OriginalIndex = Sec->Index;
- Sec->OriginalData =
- ArrayRef<uint8_t>(ElfFile.base() + Shdr.sh_offset,
- (Shdr.sh_type == SHT_NOBITS) ? (size_t)0 : Shdr.sh_size);
+ Sec->OriginalData = ArrayRef<uint8_t>(
+ ElfFile.base() + Shdr.sh_offset,
+ (Shdr.sh_type == SHT_NOBITS) ? (size_t)0 : Shdr.sh_size);
}
return Error::success();
@@ -1937,9 +1906,9 @@ template <class ELFT> Error ELFBuilder<ELFT>::build(bool EnsureSymtab) {
return readProgramHeaders(*HeadersFile);
}
-Writer::~Writer() {}
+Writer::~Writer() = default;
-Reader::~Reader() {}
+Reader::~Reader() = default;
Expected<std::unique_ptr<Object>>
BinaryReader::create(bool /*EnsureSymtab*/) const {
@@ -2156,7 +2125,7 @@ Error Object::updateSection(StringRef Name, ArrayRef<uint8_t> Data) {
if (!OldSec->hasContents())
return createStringError(
errc::invalid_argument,
- "section '%s' can't be updated because it does not have contents",
+ "section '%s' cannot be updated because it does not have contents",
Name.str().c_str());
if (Data.size() > OldSec->Size && OldSec->ParentSegment)
diff --git a/llvm/tools/llvm-objcopy/ELF/Object.h b/llvm/lib/ObjCopy/ELF/ELFObject.h
index 681ab8f56381..f33bbb029c9b 100644
--- a/llvm/tools/llvm-objcopy/ELF/Object.h
+++ b/llvm/lib/ObjCopy/ELF/ELFObject.h
@@ -1,4 +1,4 @@
-//===- Object.h -------------------------------------------------*- C++ -*-===//
+//===- ELFObject.h ----------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,15 +6,15 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_OBJCOPY_OBJECT_H
-#define LLVM_TOOLS_OBJCOPY_OBJECT_H
+#ifndef LLVM_LIB_OBJCOPY_ELF_ELFOBJECT_H
+#define LLVM_LIB_OBJCOPY_ELF_ELFOBJECT_H
-#include "CommonConfig.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/StringTableBuilder.h"
+#include "llvm/ObjCopy/CommonConfig.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/FileOutputBuffer.h"
@@ -542,11 +542,10 @@ class CompressedSection : public SectionBase {
SmallVector<char, 128> CompressedData;
public:
- static Expected<CompressedSection>
- create(const SectionBase &Sec, DebugCompressionType CompressionType);
- static Expected<CompressedSection> create(ArrayRef<uint8_t> CompressedData,
- uint64_t DecompressedSize,
- uint64_t DecompressedAlign);
+ CompressedSection(const SectionBase &Sec,
+ DebugCompressionType CompressionType);
+ CompressedSection(ArrayRef<uint8_t> CompressedData, uint64_t DecompressedSize,
+ uint64_t DecompressedAlign);
uint64_t getDecompressedSize() const { return DecompressedSize; }
uint64_t getDecompressedAlign() const { return DecompressedAlign; }
@@ -555,15 +554,8 @@ public:
Error accept(MutableSectionVisitor &Visitor) override;
static bool classof(const SectionBase *S) {
- return (S->OriginalFlags & ELF::SHF_COMPRESSED) ||
- (StringRef(S->Name).startswith(".zdebug"));
+ return S->OriginalFlags & ELF::SHF_COMPRESSED;
}
-
-private:
- CompressedSection(const SectionBase &Sec,
- DebugCompressionType CompressionType, Error &Err);
- CompressedSection(ArrayRef<uint8_t> CompressedData, uint64_t DecompressedSize,
- uint64_t DecompressedAlign);
};
class DecompressedSection : public SectionBase {
@@ -575,8 +567,6 @@ public:
Size = Sec.getDecompressedSize();
Align = Sec.getDecompressedAlign();
Flags = OriginalFlags = (Flags & ~ELF::SHF_COMPRESSED);
- if (StringRef(Name).startswith(".zdebug"))
- Name = "." + Name.substr(2);
}
Error accept(SectionVisitor &Visitor) const override;
@@ -628,6 +618,11 @@ enum SymbolShndxType {
SYMBOL_HEXAGON_SCOMMON_2 = ELF::SHN_HEXAGON_SCOMMON_2,
SYMBOL_HEXAGON_SCOMMON_4 = ELF::SHN_HEXAGON_SCOMMON_4,
SYMBOL_HEXAGON_SCOMMON_8 = ELF::SHN_HEXAGON_SCOMMON_8,
+ SYMBOL_MIPS_ACOMMON = ELF::SHN_MIPS_ACOMMON,
+ SYMBOL_MIPS_TEXT = ELF::SHN_MIPS_TEXT,
+ SYMBOL_MIPS_DATA = ELF::SHN_MIPS_DATA,
+ SYMBOL_MIPS_SCOMMON = ELF::SHN_MIPS_SCOMMON,
+ SYMBOL_MIPS_SUNDEFINED = ELF::SHN_MIPS_SUNDEFINED,
SYMBOL_HIPROC = ELF::SHN_HIPROC,
SYMBOL_LOOS = ELF::SHN_LOOS,
SYMBOL_HIOS = ELF::SHN_HIOS,
@@ -998,7 +993,7 @@ class IHexReader : public Reader {
std::move(E));
}
template <typename... Ts>
- Error parseError(size_t LineNo, char const *Fmt, const Ts &... Vals) const {
+ Error parseError(size_t LineNo, char const *Fmt, const Ts &...Vals) const {
Error E = createStringError(errc::invalid_argument, Fmt, Vals...);
return parseError(LineNo, std::move(E));
}
@@ -1088,7 +1083,7 @@ public:
std::function<bool(const SectionBase &)> ToRemove);
Error replaceSections(const DenseMap<SectionBase *, SectionBase *> &FromTo);
Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove);
- template <class T, class... Ts> T &addSection(Ts &&... Args) {
+ template <class T, class... Ts> T &addSection(Ts &&...Args) {
auto Sec = std::make_unique<T>(std::forward<Ts>(Args)...);
auto Ptr = Sec.get();
MustBeRelocatable |= isa<RelocationSection>(*Ptr);
@@ -1110,4 +1105,4 @@ public:
} // end namespace objcopy
} // end namespace llvm
-#endif // LLVM_TOOLS_OBJCOPY_OBJECT_H
+#endif // LLVM_LIB_OBJCOPY_ELF_ELFOBJECT_H
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp b/llvm/lib/ObjCopy/MachO/MachOLayoutBuilder.cpp
index 6b731abd9ed9..6b731abd9ed9 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp
+++ b/llvm/lib/ObjCopy/MachO/MachOLayoutBuilder.cpp
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.h b/llvm/lib/ObjCopy/MachO/MachOLayoutBuilder.h
index 44d03b4af7e8..8d8716df22bb 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.h
+++ b/llvm/lib/ObjCopy/MachO/MachOLayoutBuilder.h
@@ -6,11 +6,11 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_OBJCOPY_MACHO_MACHOLAYOUTBUILDER_H
-#define LLVM_OBJCOPY_MACHO_MACHOLAYOUTBUILDER_H
+#ifndef LLVM_LIB_OBJCOPY_MACHO_MACHOLAYOUTBUILDER_H
+#define LLVM_LIB_OBJCOPY_MACHO_MACHOLAYOUTBUILDER_H
-#include "MachOObjcopy.h"
-#include "Object.h"
+#include "MachOObject.h"
+#include "llvm/ObjCopy/MachO/MachOObjcopy.h"
namespace llvm {
namespace objcopy {
@@ -87,11 +87,11 @@ public:
StringTableBuilder &getStringTableBuilder() { return StrTableBuilder; }
- const CodeSignatureInfo &getCodeSignature() { return CodeSignature; }
+ const CodeSignatureInfo &getCodeSignature() const { return CodeSignature; }
};
} // end namespace macho
} // end namespace objcopy
} // end namespace llvm
-#endif // LLVM_OBJCOPY_MACHO_MACHOLAYOUTBUILDER_H
+#endif // LLVM_LIB_OBJCOPY_MACHO_MACHOLAYOUTBUILDER_H
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp b/llvm/lib/ObjCopy/MachO/MachOObjcopy.cpp
index 0f92ca516bef..5db03a4e268e 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
+++ b/llvm/lib/ObjCopy/MachO/MachOObjcopy.cpp
@@ -6,14 +6,15 @@
//
//===----------------------------------------------------------------------===//
-#include "MachOObjcopy.h"
-#include "../llvm-objcopy.h"
-#include "CommonConfig.h"
-#include "MachO/MachOConfig.h"
+#include "llvm/ObjCopy/MachO/MachOObjcopy.h"
+#include "Archive.h"
#include "MachOReader.h"
#include "MachOWriter.h"
-#include "MultiFormatConfig.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ObjCopy/CommonConfig.h"
+#include "llvm/ObjCopy/MachO/MachOConfig.h"
+#include "llvm/ObjCopy/MultiFormatConfig.h"
+#include "llvm/ObjCopy/ObjCopy.h"
#include "llvm/Object/ArchiveWriter.h"
#include "llvm/Object/MachOUniversal.h"
#include "llvm/Object/MachOUniversalWriter.h"
@@ -98,7 +99,7 @@ static void updateAndRemoveSymbols(const CommonConfig &Config,
Sym.Name = std::string(I->getValue());
}
- auto RemovePred = [Config, MachOConfig,
+ auto RemovePred = [&Config, &MachOConfig,
&Obj](const std::unique_ptr<SymbolEntry> &N) {
if (N->Referenced)
return false;
@@ -257,6 +258,20 @@ static Error processLoadCommands(const MachOConfig &MachOConfig, Object &Obj) {
if (!MachOConfig.RPathToPrepend.empty())
Obj.updateLoadCommandIndexes();
+ // Remove any empty segments if required.
+ if (!MachOConfig.EmptySegmentsToRemove.empty()) {
+ auto RemovePred = [&MachOConfig](const LoadCommand &LC) {
+ if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_SEGMENT_64 ||
+ LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_SEGMENT) {
+ return LC.Sections.empty() &&
+ MachOConfig.EmptySegmentsToRemove.contains(*LC.getSegmentName());
+ }
+ return false;
+ };
+ if (Error E = Obj.removeLoadCommands(RemovePred))
+ return E;
+ }
+
return Error::success();
}
@@ -282,17 +297,12 @@ static Error dumpSectionToFile(StringRef SecName, StringRef Filename,
SecName.str().c_str());
}
-static Error addSection(StringRef SecName, StringRef Filename, Object &Obj) {
- ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
- MemoryBuffer::getFile(Filename);
- if (!BufOrErr)
- return createFileError(Filename, errorCodeToError(BufOrErr.getError()));
- std::unique_ptr<MemoryBuffer> Buf = std::move(*BufOrErr);
-
- std::pair<StringRef, StringRef> Pair = SecName.split(',');
+static Error addSection(const NewSectionInfo &NewSection, Object &Obj) {
+ std::pair<StringRef, StringRef> Pair = NewSection.SectionName.split(',');
StringRef TargetSegName = Pair.first;
Section Sec(TargetSegName, Pair.second);
- Sec.Content = Obj.NewSectionsContents.save(Buf->getBuffer());
+ Sec.Content =
+ Obj.NewSectionsContents.save(NewSection.SectionData->getBuffer());
Sec.Size = Sec.Content.size();
// Add the a section into an existing segment.
@@ -341,24 +351,18 @@ static Expected<Section &> findSection(StringRef SecName, Object &O) {
return *FoundSec->get();
}
-static Error updateSection(StringRef SecName, StringRef Filename, Object &O) {
- Expected<Section &> SecToUpdateOrErr = findSection(SecName, O);
+static Error updateSection(const NewSectionInfo &NewSection, Object &O) {
+ Expected<Section &> SecToUpdateOrErr = findSection(NewSection.SectionName, O);
if (!SecToUpdateOrErr)
return SecToUpdateOrErr.takeError();
Section &Sec = *SecToUpdateOrErr;
- ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
- MemoryBuffer::getFile(Filename);
- if (!BufOrErr)
- return createFileError(Filename, errorCodeToError(BufOrErr.getError()));
- std::unique_ptr<MemoryBuffer> Buf = std::move(*BufOrErr);
-
- if (Buf->getBufferSize() > Sec.Size)
+ if (NewSection.SectionData->getBufferSize() > Sec.Size)
return createStringError(
errc::invalid_argument,
"new section cannot be larger than previous section");
- Sec.Content = O.NewSectionsContents.save(Buf->getBuffer());
+ Sec.Content = O.NewSectionsContents.save(NewSection.SectionData->getBuffer());
Sec.Size = Sec.Content.size();
return Error::success();
}
@@ -410,23 +414,17 @@ static Error handleArgs(const CommonConfig &Config,
for (std::unique_ptr<Section> &Sec : LC.Sections)
Sec->Relocations.clear();
- for (const auto &Flag : Config.AddSection) {
- std::pair<StringRef, StringRef> SecPair = Flag.split("=");
- StringRef SecName = SecPair.first;
- StringRef File = SecPair.second;
- if (Error E = isValidMachOCannonicalName(SecName))
+ for (const NewSectionInfo &NewSection : Config.AddSection) {
+ if (Error E = isValidMachOCannonicalName(NewSection.SectionName))
return E;
- if (Error E = addSection(SecName, File, Obj))
+ if (Error E = addSection(NewSection, Obj))
return E;
}
- for (const auto &Flag : Config.UpdateSection) {
- StringRef SectionName;
- StringRef FileName;
- std::tie(SectionName, FileName) = Flag.split('=');
- if (Error E = isValidMachOCannonicalName(SectionName))
+ for (const NewSectionInfo &NewSection : Config.UpdateSection) {
+ if (Error E = isValidMachOCannonicalName(NewSection.SectionName))
return E;
- if (Error E = updateSection(SectionName, FileName, Obj))
+ if (Error E = updateSection(NewSection, Obj))
return E;
}
@@ -485,9 +483,12 @@ Error objcopy::macho::executeObjcopyOnMachOUniversalBinary(
createNewArchiveMembers(Config, **ArOrErr);
if (!NewArchiveMembersOrErr)
return NewArchiveMembersOrErr.takeError();
+ auto Kind = (*ArOrErr)->kind();
+ if (Kind == object::Archive::K_BSD)
+ Kind = object::Archive::K_DARWIN;
Expected<std::unique_ptr<MemoryBuffer>> OutputBufferOrErr =
writeArchiveToBuffer(*NewArchiveMembersOrErr,
- (*ArOrErr)->hasSymbolTable(), (*ArOrErr)->kind(),
+ (*ArOrErr)->hasSymbolTable(), Kind,
Config.getCommonConfig().DeterministicArchives,
(*ArOrErr)->isThin());
if (!OutputBufferOrErr)
diff --git a/llvm/tools/llvm-objcopy/MachO/Object.cpp b/llvm/lib/ObjCopy/MachO/MachOObject.cpp
index 6312adbbc9f7..56f31e456198 100644
--- a/llvm/tools/llvm-objcopy/MachO/Object.cpp
+++ b/llvm/lib/ObjCopy/MachO/MachOObject.cpp
@@ -1,4 +1,4 @@
-//===- Object.cpp - Mach-O object file model --------------------*- C++ -*-===//
+//===- MachOObject.cpp - Mach-O object file model ---------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
-#include "Object.h"
+#include "MachOObject.h"
#include "llvm/ADT/SmallPtrSet.h"
#include <unordered_set>
diff --git a/llvm/tools/llvm-objcopy/MachO/Object.h b/llvm/lib/ObjCopy/MachO/MachOObject.h
index 13aaf42634b0..df9261b76e4d 100644
--- a/llvm/tools/llvm-objcopy/MachO/Object.h
+++ b/llvm/lib/ObjCopy/MachO/MachOObject.h
@@ -1,4 +1,4 @@
-//===- Object.h - Mach-O object file model ----------------------*- C++ -*-===//
+//===- MachOObject.h - Mach-O object file model -----------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_OBJCOPY_MACHO_OBJECT_H
-#define LLVM_OBJCOPY_MACHO_OBJECT_H
+#ifndef LLVM_LIB_OBJCOPY_MACHO_MACHOOBJECT_H
+#define LLVM_LIB_OBJCOPY_MACHO_MACHOOBJECT_H
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringRef.h"
@@ -371,4 +371,4 @@ struct Object {
} // end namespace objcopy
} // end namespace llvm
-#endif // LLVM_OBJCOPY_MACHO_OBJECT_H
+#endif // LLVM_LIB_OBJCOPY_MACHO_MACHOOBJECT_H
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp b/llvm/lib/ObjCopy/MachO/MachOReader.cpp
index d68d1692997a..94459a436094 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp
+++ b/llvm/lib/ObjCopy/MachO/MachOReader.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "MachOReader.h"
-#include "Object.h"
+#include "MachOObject.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/Object/MachO.h"
#include "llvm/Support/Errc.h"
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOReader.h b/llvm/lib/ObjCopy/MachO/MachOReader.h
index b29e86ca642e..ef374aa9efae 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOReader.h
+++ b/llvm/lib/ObjCopy/MachO/MachOReader.h
@@ -6,9 +6,12 @@
//
//===----------------------------------------------------------------------===//
-#include "MachOObjcopy.h"
-#include "Object.h"
+#ifndef LLVM_LIB_OBJCOPY_MACHO_MACHOREADER_H
+#define LLVM_LIB_OBJCOPY_MACHO_MACHOREADER_H
+
+#include "MachOObject.h"
#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/ObjCopy/MachO/MachOObjcopy.h"
#include "llvm/Object/MachO.h"
#include <memory>
@@ -55,3 +58,5 @@ public:
} // end namespace macho
} // end namespace objcopy
} // end namespace llvm
+
+#endif // LLVM_LIB_OBJCOPY_MACHO_MACHOREADER_H
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp b/llvm/lib/ObjCopy/MachO/MachOWriter.cpp
index 52f20794cc57..bc633285e03c 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp
+++ b/llvm/lib/ObjCopy/MachO/MachOWriter.cpp
@@ -8,7 +8,7 @@
#include "MachOWriter.h"
#include "MachOLayoutBuilder.h"
-#include "Object.h"
+#include "MachOObject.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/Object/MachO.h"
@@ -94,64 +94,18 @@ size_t MachOWriter::totalSize() const {
sizeof(uint32_t) * O.IndirectSymTable.Symbols.size());
}
- if (O.CodeSignatureCommandIndex) {
- const MachO::linkedit_data_command &LinkEditDataCommand =
- O.LoadCommands[*O.CodeSignatureCommandIndex]
- .MachOLoadCommand.linkedit_data_command_data;
- if (LinkEditDataCommand.dataoff)
- Ends.push_back(LinkEditDataCommand.dataoff +
- LinkEditDataCommand.datasize);
- }
-
- if (O.DataInCodeCommandIndex) {
- const MachO::linkedit_data_command &LinkEditDataCommand =
- O.LoadCommands[*O.DataInCodeCommandIndex]
- .MachOLoadCommand.linkedit_data_command_data;
-
- if (LinkEditDataCommand.dataoff)
- Ends.push_back(LinkEditDataCommand.dataoff +
- LinkEditDataCommand.datasize);
- }
-
- if (O.LinkerOptimizationHintCommandIndex) {
- const MachO::linkedit_data_command &LinkEditDataCommand =
- O.LoadCommands[*O.LinkerOptimizationHintCommandIndex]
- .MachOLoadCommand.linkedit_data_command_data;
-
- if (LinkEditDataCommand.dataoff)
- Ends.push_back(LinkEditDataCommand.dataoff +
- LinkEditDataCommand.datasize);
- }
-
- if (O.FunctionStartsCommandIndex) {
- const MachO::linkedit_data_command &LinkEditDataCommand =
- O.LoadCommands[*O.FunctionStartsCommandIndex]
- .MachOLoadCommand.linkedit_data_command_data;
-
- if (LinkEditDataCommand.dataoff)
- Ends.push_back(LinkEditDataCommand.dataoff +
- LinkEditDataCommand.datasize);
- }
-
- if (O.ChainedFixupsCommandIndex) {
- const MachO::linkedit_data_command &LinkEditDataCommand =
- O.LoadCommands[*O.ChainedFixupsCommandIndex]
- .MachOLoadCommand.linkedit_data_command_data;
-
- if (LinkEditDataCommand.dataoff)
- Ends.push_back(LinkEditDataCommand.dataoff +
- LinkEditDataCommand.datasize);
- }
-
- if (O.ExportsTrieCommandIndex) {
- const MachO::linkedit_data_command &LinkEditDataCommand =
- O.LoadCommands[*O.ExportsTrieCommandIndex]
- .MachOLoadCommand.linkedit_data_command_data;
-
- if (LinkEditDataCommand.dataoff)
- Ends.push_back(LinkEditDataCommand.dataoff +
- LinkEditDataCommand.datasize);
- }
+ for (Optional<size_t> LinkEditDataCommandIndex :
+ {O.CodeSignatureCommandIndex, O.DataInCodeCommandIndex,
+ O.LinkerOptimizationHintCommandIndex, O.FunctionStartsCommandIndex,
+ O.ChainedFixupsCommandIndex, O.ExportsTrieCommandIndex})
+ if (LinkEditDataCommandIndex) {
+ const MachO::linkedit_data_command &LinkEditDataCommand =
+ O.LoadCommands[*LinkEditDataCommandIndex]
+ .MachOLoadCommand.linkedit_data_command_data;
+ if (LinkEditDataCommand.dataoff)
+ Ends.push_back(LinkEditDataCommand.dataoff +
+ LinkEditDataCommand.datasize);
+ }
// Otherwise, use the last section / reloction.
for (const LoadCommand &LC : O.LoadCommands)
@@ -566,11 +520,12 @@ void MachOWriter::writeCodeSignatureData() {
uint8_t *CurrHashWritePosition = HashWriteStart;
while (CurrHashReadPosition < HashReadEnd) {
StringRef Block(reinterpret_cast<char *>(CurrHashReadPosition),
- std::min(HashReadEnd - CurrHashReadPosition,
- static_cast<ssize_t>(CodeSignature.BlockSize)));
+ std::min(static_cast<size_t>(HashReadEnd
+ - CurrHashReadPosition),
+ static_cast<size_t>(CodeSignature.BlockSize)));
SHA256 Hasher;
Hasher.update(Block);
- StringRef Hash = Hasher.final();
+ std::array<uint8_t, 32> Hash = Hasher.final();
assert(Hash.size() == CodeSignature.HashSize);
memcpy(CurrHashWritePosition, Hash.data(), CodeSignature.HashSize);
CurrHashReadPosition += CodeSignature.BlockSize;
@@ -658,69 +613,29 @@ void MachOWriter::writeTail() {
&MachOWriter::writeIndirectSymbolTable);
}
- if (O.CodeSignatureCommandIndex) {
- const MachO::linkedit_data_command &LinkEditDataCommand =
- O.LoadCommands[*O.CodeSignatureCommandIndex]
- .MachOLoadCommand.linkedit_data_command_data;
-
- if (LinkEditDataCommand.dataoff)
- Queue.emplace_back(LinkEditDataCommand.dataoff,
- &MachOWriter::writeCodeSignatureData);
- }
-
- if (O.DataInCodeCommandIndex) {
- const MachO::linkedit_data_command &LinkEditDataCommand =
- O.LoadCommands[*O.DataInCodeCommandIndex]
- .MachOLoadCommand.linkedit_data_command_data;
-
- if (LinkEditDataCommand.dataoff)
- Queue.emplace_back(LinkEditDataCommand.dataoff,
- &MachOWriter::writeDataInCodeData);
- }
-
- if (O.LinkerOptimizationHintCommandIndex) {
- const MachO::linkedit_data_command &LinkEditDataCommand =
- O.LoadCommands[*O.LinkerOptimizationHintCommandIndex]
- .MachOLoadCommand.linkedit_data_command_data;
-
- if (LinkEditDataCommand.dataoff)
- Queue.emplace_back(LinkEditDataCommand.dataoff,
- &MachOWriter::writeLinkerOptimizationHint);
- }
-
- if (O.FunctionStartsCommandIndex) {
- const MachO::linkedit_data_command &LinkEditDataCommand =
- O.LoadCommands[*O.FunctionStartsCommandIndex]
- .MachOLoadCommand.linkedit_data_command_data;
-
- if (LinkEditDataCommand.dataoff)
- Queue.emplace_back(LinkEditDataCommand.dataoff,
- &MachOWriter::writeFunctionStartsData);
- }
-
- if (O.ChainedFixupsCommandIndex) {
- const MachO::linkedit_data_command &LinkEditDataCommand =
- O.LoadCommands[*O.ChainedFixupsCommandIndex]
- .MachOLoadCommand.linkedit_data_command_data;
-
- if (LinkEditDataCommand.dataoff)
- Queue.emplace_back(LinkEditDataCommand.dataoff,
- &MachOWriter::writeChainedFixupsData);
- }
-
- if (O.ExportsTrieCommandIndex) {
- const MachO::linkedit_data_command &LinkEditDataCommand =
- O.LoadCommands[*O.ExportsTrieCommandIndex]
- .MachOLoadCommand.linkedit_data_command_data;
-
- if (LinkEditDataCommand.dataoff)
- Queue.emplace_back(LinkEditDataCommand.dataoff,
- &MachOWriter::writeExportsTrieData);
+ std::initializer_list<std::pair<Optional<size_t>, WriteHandlerType>>
+ LinkEditDataCommandWriters = {
+ {O.CodeSignatureCommandIndex, &MachOWriter::writeCodeSignatureData},
+ {O.DataInCodeCommandIndex, &MachOWriter::writeDataInCodeData},
+ {O.LinkerOptimizationHintCommandIndex,
+ &MachOWriter::writeLinkerOptimizationHint},
+ {O.FunctionStartsCommandIndex, &MachOWriter::writeFunctionStartsData},
+ {O.ChainedFixupsCommandIndex, &MachOWriter::writeChainedFixupsData},
+ {O.ExportsTrieCommandIndex, &MachOWriter::writeExportsTrieData}};
+ for (const auto &W : LinkEditDataCommandWriters) {
+ Optional<size_t> LinkEditDataCommandIndex;
+ WriteHandlerType WriteHandler;
+ std::tie(LinkEditDataCommandIndex, WriteHandler) = W;
+ if (LinkEditDataCommandIndex) {
+ const MachO::linkedit_data_command &LinkEditDataCommand =
+ O.LoadCommands[*LinkEditDataCommandIndex]
+ .MachOLoadCommand.linkedit_data_command_data;
+ if (LinkEditDataCommand.dataoff)
+ Queue.emplace_back(LinkEditDataCommand.dataoff, WriteHandler);
+ }
}
- llvm::sort(Queue, [](const WriteOperation &LHS, const WriteOperation &RHS) {
- return LHS.first < RHS.first;
- });
+ llvm::sort(Queue, llvm::less_first());
for (auto WriteOp : Queue)
(this->*WriteOp.second)();
@@ -735,7 +650,6 @@ Error MachOWriter::write() {
return createStringError(errc::not_enough_memory,
"failed to allocate memory buffer of " +
Twine::utohexstr(TotalSize) + " bytes");
- memset(Buf->getBufferStart(), 0, totalSize());
writeHeader();
writeLoadCommands();
writeSections();
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOWriter.h b/llvm/lib/ObjCopy/MachO/MachOWriter.h
index a172534dac8a..a54c10294246 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOWriter.h
+++ b/llvm/lib/ObjCopy/MachO/MachOWriter.h
@@ -6,10 +6,13 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_OBJCOPY_MACHO_MACHOWRITER_H
+#define LLVM_LIB_OBJCOPY_MACHO_MACHOWRITER_H
+
#include "MachOLayoutBuilder.h"
-#include "MachOObjcopy.h"
-#include "Object.h"
+#include "MachOObject.h"
#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/ObjCopy/MachO/MachOObjcopy.h"
#include "llvm/Object/MachO.h"
namespace llvm {
@@ -69,3 +72,5 @@ public:
} // end namespace macho
} // end namespace objcopy
} // end namespace llvm
+
+#endif // LLVM_LIB_OBJCOPY_MACHO_MACHOWRITER_H
diff --git a/llvm/lib/ObjCopy/ObjCopy.cpp b/llvm/lib/ObjCopy/ObjCopy.cpp
new file mode 100644
index 000000000000..16968d202265
--- /dev/null
+++ b/llvm/lib/ObjCopy/ObjCopy.cpp
@@ -0,0 +1,90 @@
+//===- Objcopy.cpp --------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ObjCopy/ObjCopy.h"
+#include "llvm/ObjCopy/COFF/COFFConfig.h"
+#include "llvm/ObjCopy/COFF/COFFObjcopy.h"
+#include "llvm/ObjCopy/CommonConfig.h"
+#include "llvm/ObjCopy/ELF/ELFConfig.h"
+#include "llvm/ObjCopy/ELF/ELFObjcopy.h"
+#include "llvm/ObjCopy/MachO/MachOConfig.h"
+#include "llvm/ObjCopy/MachO/MachOObjcopy.h"
+#include "llvm/ObjCopy/MultiFormatConfig.h"
+#include "llvm/ObjCopy/wasm/WasmConfig.h"
+#include "llvm/ObjCopy/wasm/WasmObjcopy.h"
+#include "llvm/ObjCopy/XCOFF/XCOFFConfig.h"
+#include "llvm/ObjCopy/XCOFF/XCOFFObjcopy.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Object/Error.h"
+#include "llvm/Object/MachO.h"
+#include "llvm/Object/MachOUniversal.h"
+#include "llvm/Object/Wasm.h"
+#include "llvm/Object/XCOFFObjectFile.h"
+#include "llvm/Support/SmallVectorMemoryBuffer.h"
+
+namespace llvm {
+namespace objcopy {
+
+using namespace llvm::object;
+
+/// The function executeObjcopyOnBinary does the dispatch based on the format
+/// of the input binary (ELF, MachO or COFF).
+Error executeObjcopyOnBinary(const MultiFormatConfig &Config,
+ object::Binary &In, raw_ostream &Out) {
+ if (auto *ELFBinary = dyn_cast<object::ELFObjectFileBase>(&In)) {
+ Expected<const ELFConfig &> ELFConfig = Config.getELFConfig();
+ if (!ELFConfig)
+ return ELFConfig.takeError();
+
+ return elf::executeObjcopyOnBinary(Config.getCommonConfig(), *ELFConfig,
+ *ELFBinary, Out);
+ }
+ if (auto *COFFBinary = dyn_cast<object::COFFObjectFile>(&In)) {
+ Expected<const COFFConfig &> COFFConfig = Config.getCOFFConfig();
+ if (!COFFConfig)
+ return COFFConfig.takeError();
+
+ return coff::executeObjcopyOnBinary(Config.getCommonConfig(), *COFFConfig,
+ *COFFBinary, Out);
+ }
+ if (auto *MachOBinary = dyn_cast<object::MachOObjectFile>(&In)) {
+ Expected<const MachOConfig &> MachOConfig = Config.getMachOConfig();
+ if (!MachOConfig)
+ return MachOConfig.takeError();
+
+ return macho::executeObjcopyOnBinary(Config.getCommonConfig(), *MachOConfig,
+ *MachOBinary, Out);
+ }
+ if (auto *MachOUniversalBinary =
+ dyn_cast<object::MachOUniversalBinary>(&In)) {
+ return macho::executeObjcopyOnMachOUniversalBinary(
+ Config, *MachOUniversalBinary, Out);
+ }
+ if (auto *WasmBinary = dyn_cast<object::WasmObjectFile>(&In)) {
+ Expected<const WasmConfig &> WasmConfig = Config.getWasmConfig();
+ if (!WasmConfig)
+ return WasmConfig.takeError();
+
+ return objcopy::wasm::executeObjcopyOnBinary(Config.getCommonConfig(),
+ *WasmConfig, *WasmBinary, Out);
+ }
+ if (auto *XCOFFBinary = dyn_cast<object::XCOFFObjectFile>(&In)) {
+ Expected<const XCOFFConfig &> XCOFFConfig = Config.getXCOFFConfig();
+ if (!XCOFFConfig)
+ return XCOFFConfig.takeError();
+
+ return xcoff::executeObjcopyOnBinary(Config.getCommonConfig(), *XCOFFConfig,
+ *XCOFFBinary, Out);
+ }
+ return createStringError(object_error::invalid_file_type,
+ "unsupported object file format");
+}
+
+} // end namespace objcopy
+} // end namespace llvm
diff --git a/llvm/lib/ObjCopy/XCOFF/XCOFFObjcopy.cpp b/llvm/lib/ObjCopy/XCOFF/XCOFFObjcopy.cpp
new file mode 100644
index 000000000000..f6e29bd315cb
--- /dev/null
+++ b/llvm/lib/ObjCopy/XCOFF/XCOFFObjcopy.cpp
@@ -0,0 +1,45 @@
+//===- XCOFFObjcopy.cpp ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ObjCopy/CommonConfig.h"
+#include "llvm/ObjCopy/XCOFF/XCOFFConfig.h"
+#include "llvm/ObjCopy/XCOFF/XCOFFObjcopy.h"
+#include "llvm/Support/Errc.h"
+#include "XCOFFObject.h"
+#include "XCOFFReader.h"
+#include "XCOFFWriter.h"
+
+namespace llvm {
+namespace objcopy {
+namespace xcoff {
+
+using namespace object;
+
+static Error handleArgs(const CommonConfig &Config, Object &Obj) {
+ return Error::success();
+}
+
+Error executeObjcopyOnBinary(const CommonConfig &Config, const XCOFFConfig &,
+ XCOFFObjectFile &In, raw_ostream &Out) {
+ XCOFFReader Reader(In);
+ Expected<std::unique_ptr<Object>> ObjOrErr = Reader.create();
+ if (!ObjOrErr)
+ return createFileError(Config.InputFilename, ObjOrErr.takeError());
+ Object *Obj = ObjOrErr->get();
+ assert(Obj && "Unable to deserialize XCOFF object");
+ if (Error E = handleArgs(Config, *Obj))
+ return createFileError(Config.InputFilename, std::move(E));
+ XCOFFWriter Writer(*Obj, Out);
+ if (Error E = Writer.write())
+ return createFileError(Config.OutputFilename, std::move(E));
+ return Error::success();
+}
+
+} // end namespace xcoff
+} // end namespace objcopy
+} // end namespace llvm
diff --git a/llvm/lib/ObjCopy/XCOFF/XCOFFObject.h b/llvm/lib/ObjCopy/XCOFF/XCOFFObject.h
new file mode 100644
index 000000000000..3c68b6d3878f
--- /dev/null
+++ b/llvm/lib/ObjCopy/XCOFF/XCOFFObject.h
@@ -0,0 +1,48 @@
+//===- XCOFFObject.h --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_OBJCOPY_XCOFF_XCOFFOBJECT_H
+#define LLVM_LIB_OBJCOPY_XCOFF_XCOFFOBJECT_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Object/XCOFFObjectFile.h"
+#include <vector>
+
+namespace llvm {
+namespace objcopy {
+namespace xcoff {
+
+using namespace object;
+
+struct Section {
+ XCOFFSectionHeader32 SectionHeader;
+ ArrayRef<uint8_t> Contents;
+ std::vector<XCOFFRelocation32> Relocations;
+};
+
+struct Symbol {
+ XCOFFSymbolEntry32 Sym;
+ // For now, each auxiliary symbol is only an opaque binary blob with no
+ // distinction.
+ StringRef AuxSymbolEntries;
+};
+
+struct Object {
+ XCOFFFileHeader32 FileHeader;
+ XCOFFAuxiliaryHeader32 OptionalFileHeader;
+ std::vector<Section> Sections;
+ std::vector<Symbol> Symbols;
+ StringRef StringTable;
+};
+
+} // end namespace xcoff
+} // end namespace objcopy
+} // end namespace llvm
+
+#endif // LLVM_LIB_OBJCOPY_XCOFF_XCOFFOBJECT_H
diff --git a/llvm/lib/ObjCopy/XCOFF/XCOFFReader.cpp b/llvm/lib/ObjCopy/XCOFF/XCOFFReader.cpp
new file mode 100644
index 000000000000..8ad3021a0342
--- /dev/null
+++ b/llvm/lib/ObjCopy/XCOFF/XCOFFReader.cpp
@@ -0,0 +1,101 @@
+//===- XCOFFReader.cpp ----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCOFFReader.h"
+
+namespace llvm {
+namespace objcopy {
+namespace xcoff {
+
+using namespace object;
+
+Error XCOFFReader::readSections(Object &Obj) const {
+ ArrayRef<XCOFFSectionHeader32> Sections = XCOFFObj.sections32();
+ for (const XCOFFSectionHeader32 &Sec : Sections) {
+ Section ReadSec;
+ // Section header.
+ ReadSec.SectionHeader = Sec;
+ DataRefImpl SectionDRI;
+ SectionDRI.p = reinterpret_cast<uintptr_t>(&Sec);
+
+ // Section data.
+ if (Sec.SectionSize) {
+ Expected<ArrayRef<uint8_t>> ContentsRef =
+ XCOFFObj.getSectionContents(SectionDRI);
+ if (!ContentsRef)
+ return ContentsRef.takeError();
+ ReadSec.Contents = ContentsRef.get();
+ }
+
+ // Relocations.
+ if (Sec.NumberOfRelocations) {
+ auto Relocations =
+ XCOFFObj.relocations<XCOFFSectionHeader32, XCOFFRelocation32>(Sec);
+ if (!Relocations)
+ return Relocations.takeError();
+ for (const XCOFFRelocation32 &Rel : Relocations.get())
+ ReadSec.Relocations.push_back(Rel);
+ }
+
+ Obj.Sections.push_back(std::move(ReadSec));
+ }
+ return Error::success();
+}
+
+Error XCOFFReader::readSymbols(Object &Obj) const {
+ std::vector<Symbol> Symbols;
+ Symbols.reserve(XCOFFObj.getNumberOfSymbolTableEntries());
+ for (SymbolRef Sym : XCOFFObj.symbols()) {
+ Symbol ReadSym;
+ DataRefImpl SymbolDRI = Sym.getRawDataRefImpl();
+ XCOFFSymbolRef SymbolEntRef = XCOFFObj.toSymbolRef(SymbolDRI);
+ ReadSym.Sym = *SymbolEntRef.getSymbol32();
+ // Auxiliary entries.
+ if (SymbolEntRef.getNumberOfAuxEntries()) {
+ const char *Start = reinterpret_cast<const char *>(
+ SymbolDRI.p + XCOFF::SymbolTableEntrySize);
+ Expected<StringRef> RawAuxEntriesOrError = XCOFFObj.getRawData(
+ Start,
+ XCOFF::SymbolTableEntrySize * SymbolEntRef.getNumberOfAuxEntries(),
+ StringRef("symbol"));
+ if (!RawAuxEntriesOrError)
+ return RawAuxEntriesOrError.takeError();
+ ReadSym.AuxSymbolEntries = RawAuxEntriesOrError.get();
+ }
+ Obj.Symbols.push_back(std::move(ReadSym));
+ }
+ return Error::success();
+}
+
+Expected<std::unique_ptr<Object>> XCOFFReader::create() const {
+ auto Obj = std::make_unique<Object>();
+ // Only 32-bit supported now.
+ if (XCOFFObj.is64Bit())
+ return createStringError(object_error::invalid_file_type,
+ "64-bit XCOFF is not supported yet");
+ // Read the file header.
+ Obj->FileHeader = *XCOFFObj.fileHeader32();
+ // Read the optional header.
+ if (XCOFFObj.getOptionalHeaderSize())
+ Obj->OptionalFileHeader = *XCOFFObj.auxiliaryHeader32();
+ // Read each section.
+ Obj->Sections.reserve(XCOFFObj.getNumberOfSections());
+ if (Error E = readSections(*Obj))
+ return std::move(E);
+ // Read each symbol.
+ Obj->Symbols.reserve(XCOFFObj.getRawNumberOfSymbolTableEntries32());
+ if (Error E = readSymbols(*Obj))
+ return std::move(E);
+ // String table.
+ Obj->StringTable = XCOFFObj.getStringTable();
+ return std::move(Obj);
+}
+
+} // end namespace xcoff
+} // end namespace objcopy
+} // end namespace llvm
diff --git a/llvm/lib/ObjCopy/XCOFF/XCOFFReader.h b/llvm/lib/ObjCopy/XCOFF/XCOFFReader.h
new file mode 100644
index 000000000000..63a8d8579d37
--- /dev/null
+++ b/llvm/lib/ObjCopy/XCOFF/XCOFFReader.h
@@ -0,0 +1,35 @@
+//===- XCOFFReader.h --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_OBJCOPY_XCOFF_XCOFFREADER_H
+#define LLVM_LIB_OBJCOPY_XCOFF_XCOFFREADER_H
+
+#include "XCOFFObject.h"
+
+namespace llvm {
+namespace objcopy {
+namespace xcoff {
+
+using namespace object;
+
+class XCOFFReader {
+public:
+ explicit XCOFFReader(const XCOFFObjectFile &O) : XCOFFObj(O) {}
+ Expected<std::unique_ptr<Object>> create() const;
+
+private:
+ const XCOFFObjectFile &XCOFFObj;
+ Error readSections(Object &Obj) const;
+ Error readSymbols(Object &Obj) const;
+};
+
+} // end namespace xcoff
+} // end namespace objcopy
+} // end namespace llvm
+
+#endif // LLVM_LIB_OBJCOPY_XCOFF_XCOFFREADER_H
diff --git a/llvm/lib/ObjCopy/XCOFF/XCOFFWriter.cpp b/llvm/lib/ObjCopy/XCOFF/XCOFFWriter.cpp
new file mode 100644
index 000000000000..bae3128822e2
--- /dev/null
+++ b/llvm/lib/ObjCopy/XCOFF/XCOFFWriter.cpp
@@ -0,0 +1,125 @@
+//===- XCOFFWriter.cpp ----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Errc.h"
+#include "XCOFFWriter.h"
+
+namespace llvm {
+namespace objcopy {
+namespace xcoff {
+
+using namespace object;
+
+void XCOFFWriter::finalizeHeaders() {
+ // File header.
+ FileSize += sizeof(XCOFFFileHeader32);
+ // Optional file header.
+ FileSize += Obj.FileHeader.AuxHeaderSize;
+ // Section headers.
+ FileSize += sizeof(XCOFFSectionHeader32) * Obj.Sections.size();
+}
+
+void XCOFFWriter::finalizeSections() {
+ for (const Section &Sec : Obj.Sections) {
+ // Section data.
+ FileSize += Sec.Contents.size();
+ // Relocations.
+ FileSize +=
+ Sec.SectionHeader.NumberOfRelocations * sizeof(XCOFFRelocation32);
+ }
+}
+
+void XCOFFWriter::finalizeSymbolStringTable() {
+ assert(Obj.FileHeader.SymbolTableOffset >= FileSize);
+ FileSize = Obj.FileHeader.SymbolTableOffset;
+ // Symbols and auxiliary entries.
+ FileSize +=
+ Obj.FileHeader.NumberOfSymTableEntries * XCOFF::SymbolTableEntrySize;
+ // String table.
+ FileSize += Obj.StringTable.size();
+}
+
+void XCOFFWriter::finalize() {
+ FileSize = 0;
+ finalizeHeaders();
+ finalizeSections();
+ finalizeSymbolStringTable();
+}
+
+void XCOFFWriter::writeHeaders() {
+ // Write the file header.
+ uint8_t *Ptr = reinterpret_cast<uint8_t *>(Buf->getBufferStart());
+ memcpy(Ptr, &Obj.FileHeader, sizeof(XCOFFFileHeader32));
+ Ptr += sizeof(XCOFFFileHeader32);
+
+ // Write the optional header.
+ if (Obj.FileHeader.AuxHeaderSize) {
+ memcpy(Ptr, &Obj.OptionalFileHeader, Obj.FileHeader.AuxHeaderSize);
+ Ptr += Obj.FileHeader.AuxHeaderSize;
+ }
+
+ // Write section headers.
+ for (const Section &Sec : Obj.Sections) {
+ memcpy(Ptr, &Sec.SectionHeader, sizeof(XCOFFSectionHeader32));
+ Ptr += sizeof(XCOFFSectionHeader32);
+ }
+}
+
+void XCOFFWriter::writeSections() {
+ // Write section data.
+ for (const Section &Sec : Obj.Sections) {
+ uint8_t *Ptr = reinterpret_cast<uint8_t *>(Buf->getBufferStart()) +
+ Sec.SectionHeader.FileOffsetToRawData;
+ Ptr = std::copy(Sec.Contents.begin(), Sec.Contents.end(), Ptr);
+ }
+
+ // Write relocations.
+ for (const Section &Sec : Obj.Sections) {
+ uint8_t *Ptr = reinterpret_cast<uint8_t *>(Buf->getBufferStart()) +
+ Sec.SectionHeader.FileOffsetToRelocationInfo;
+ for (const XCOFFRelocation32 &Rel : Sec.Relocations) {
+ memcpy(Ptr, &Rel, sizeof(XCOFFRelocation32));
+ Ptr += sizeof(XCOFFRelocation32);
+ }
+ }
+}
+
+void XCOFFWriter::writeSymbolStringTable() {
+ // Write symbols.
+ uint8_t *Ptr = reinterpret_cast<uint8_t *>(Buf->getBufferStart()) +
+ Obj.FileHeader.SymbolTableOffset;
+ for (const Symbol &Sym : Obj.Symbols) {
+ memcpy(Ptr, &Sym.Sym, XCOFF::SymbolTableEntrySize);
+ Ptr += XCOFF::SymbolTableEntrySize;
+ // Auxiliary symbols.
+ memcpy(Ptr, Sym.AuxSymbolEntries.data(), Sym.AuxSymbolEntries.size());
+ Ptr += Sym.AuxSymbolEntries.size();
+ }
+ // Write the string table.
+ memcpy(Ptr, Obj.StringTable.data(), Obj.StringTable.size());
+ Ptr += Obj.StringTable.size();
+}
+
+Error XCOFFWriter::write() {
+ finalize();
+ Buf = WritableMemoryBuffer::getNewMemBuffer(FileSize);
+ if (!Buf)
+ return createStringError(errc::not_enough_memory,
+ "failed to allocate memory buffer of " +
+ Twine::utohexstr(FileSize) + " bytes");
+
+ writeHeaders();
+ writeSections();
+ writeSymbolStringTable();
+ Out.write(Buf->getBufferStart(), Buf->getBufferSize());
+ return Error::success();
+}
+
+} // end namespace xcoff
+} // end namespace objcopy
+} // end namespace llvm
diff --git a/llvm/lib/ObjCopy/XCOFF/XCOFFWriter.h b/llvm/lib/ObjCopy/XCOFF/XCOFFWriter.h
new file mode 100644
index 000000000000..54c7b5f3ccbe
--- /dev/null
+++ b/llvm/lib/ObjCopy/XCOFF/XCOFFWriter.h
@@ -0,0 +1,48 @@
+//===- XCOFFWriter.h --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_OBJCOPY_XCOFF_XCOFFWRITER_H
+#define LLVM_LIB_OBJCOPY_XCOFF_XCOFFWRITER_H
+
+#include "llvm/Support/MemoryBuffer.h"
+#include "XCOFFObject.h"
+
+#include <cstdint>
+#include <vector>
+
+namespace llvm {
+namespace objcopy {
+namespace xcoff {
+
+class XCOFFWriter {
+public:
+ virtual ~XCOFFWriter() {}
+ XCOFFWriter(Object &Obj, raw_ostream &Out) : Obj(Obj), Out(Out) {}
+ Error write();
+
+private:
+ Object &Obj;
+ raw_ostream &Out;
+ std::unique_ptr<WritableMemoryBuffer> Buf;
+ size_t FileSize;
+
+ void finalizeHeaders();
+ void finalizeSections();
+ void finalizeSymbolStringTable();
+ void finalize();
+
+ void writeHeaders();
+ void writeSections();
+ void writeSymbolStringTable();
+};
+
+} // end namespace xcoff
+} // end namespace objcopy
+} // end namespace llvm
+
+#endif // LLVM_LIB_OBJCOPY_XCOFF_XCOFFWRITER_H
diff --git a/llvm/tools/llvm-objcopy/wasm/WasmObjcopy.cpp b/llvm/lib/ObjCopy/wasm/WasmObjcopy.cpp
index 397d09757e54..6877cd68bee4 100644
--- a/llvm/tools/llvm-objcopy/wasm/WasmObjcopy.cpp
+++ b/llvm/lib/ObjCopy/wasm/WasmObjcopy.cpp
@@ -6,11 +6,11 @@
//
//===----------------------------------------------------------------------===//
-#include "WasmObjcopy.h"
-#include "CommonConfig.h"
-#include "Object.h"
-#include "Reader.h"
-#include "Writer.h"
+#include "llvm/ObjCopy/wasm/WasmObjcopy.h"
+#include "WasmObject.h"
+#include "WasmReader.h"
+#include "WasmWriter.h"
+#include "llvm/ObjCopy/CommonConfig.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/FileOutputBuffer.h"
@@ -121,21 +121,19 @@ static Error handleArgs(const CommonConfig &Config, Object &Obj) {
removeSections(Config, Obj);
- for (StringRef Flag : Config.AddSection) {
- StringRef SecName, FileName;
- std::tie(SecName, FileName) = Flag.split("=");
- ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
- MemoryBuffer::getFile(FileName);
- if (!BufOrErr)
- return createFileError(FileName, errorCodeToError(BufOrErr.getError()));
+ for (const NewSectionInfo &NewSection : Config.AddSection) {
Section Sec;
Sec.SectionType = llvm::wasm::WASM_SEC_CUSTOM;
- Sec.Name = SecName;
- std::unique_ptr<MemoryBuffer> Buf = std::move(*BufOrErr);
+ Sec.Name = NewSection.SectionName;
+
+ std::unique_ptr<MemoryBuffer> BufferCopy = MemoryBuffer::getMemBufferCopy(
+ NewSection.SectionData->getBufferStart(),
+ NewSection.SectionData->getBufferIdentifier());
Sec.Contents = makeArrayRef<uint8_t>(
- reinterpret_cast<const uint8_t *>(Buf->getBufferStart()),
- Buf->getBufferSize());
- Obj.addSectionWithOwnedContents(Sec, std::move(Buf));
+ reinterpret_cast<const uint8_t *>(BufferCopy->getBufferStart()),
+ BufferCopy->getBufferSize());
+
+ Obj.addSectionWithOwnedContents(Sec, std::move(BufferCopy));
}
return Error::success();
diff --git a/llvm/tools/llvm-objcopy/wasm/Object.cpp b/llvm/lib/ObjCopy/wasm/WasmObject.cpp
index e7a2956fedca..28a2de6e6e4f 100644
--- a/llvm/tools/llvm-objcopy/wasm/Object.cpp
+++ b/llvm/lib/ObjCopy/wasm/WasmObject.cpp
@@ -1,4 +1,4 @@
-//===- Object.cpp ---------------------------------------------------------===//
+//===- WasmObject.cpp -----------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
-#include "Object.h"
+#include "WasmObject.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/llvm/tools/llvm-objcopy/wasm/Object.h b/llvm/lib/ObjCopy/wasm/WasmObject.h
index 9db91c41e2e2..9bc5831926c6 100644
--- a/llvm/tools/llvm-objcopy/wasm/Object.h
+++ b/llvm/lib/ObjCopy/wasm/WasmObject.h
@@ -1,4 +1,4 @@
-//===- Object.h -------------------------------------------------*- C++ -*-===//
+//===- WasmObject.h ---------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_LLVM_OBJCOPY_WASM_OBJECT_H
-#define LLVM_TOOLS_LLVM_OBJCOPY_WASM_OBJECT_H
+#ifndef LLVM_LIB_OBJCOPY_WASM_WASMOBJECT_H
+#define LLVM_LIB_OBJCOPY_WASM_WASMOBJECT_H
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
@@ -44,4 +44,4 @@ private:
} // end namespace objcopy
} // end namespace llvm
-#endif // LLVM_TOOLS_LLVM_OBJCOPY_WASM_OBJECT_H
+#endif // LLVM_LIB_OBJCOPY_WASM_WASMOBJECT_H
diff --git a/llvm/tools/llvm-objcopy/wasm/Reader.cpp b/llvm/lib/ObjCopy/wasm/WasmReader.cpp
index 13fa84ad8020..6e7d8b5591c9 100644
--- a/llvm/tools/llvm-objcopy/wasm/Reader.cpp
+++ b/llvm/lib/ObjCopy/wasm/WasmReader.cpp
@@ -1,4 +1,4 @@
-//===- Reader.cpp ---------------------------------------------------------===//
+//===- WasmReader.cpp -----------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
-#include "Reader.h"
+#include "WasmReader.h"
namespace llvm {
namespace objcopy {
@@ -24,6 +24,12 @@ Expected<std::unique_ptr<Object>> Reader::create() const {
const WasmSection &WS = WasmObj.getWasmSection(Sec);
Obj->Sections.push_back(
{static_cast<uint8_t>(WS.Type), WS.Name, WS.Content});
+ // Give known sections standard names to allow them to be selected. (Custom
+ // sections already have their names filled in by the parser).
+ Section &ReaderSec = Obj->Sections.back();
+ if (ReaderSec.SectionType > WASM_SEC_CUSTOM &&
+ ReaderSec.SectionType <= WASM_SEC_LAST_KNOWN)
+ ReaderSec.Name = sectionTypeToString(ReaderSec.SectionType);
}
return std::move(Obj);
}
diff --git a/llvm/tools/llvm-objcopy/wasm/Reader.h b/llvm/lib/ObjCopy/wasm/WasmReader.h
index 2dcf7dde029a..d71660fa2b65 100644
--- a/llvm/tools/llvm-objcopy/wasm/Reader.h
+++ b/llvm/lib/ObjCopy/wasm/WasmReader.h
@@ -1,4 +1,4 @@
-//===- Reader.h -------------------------------------------------*- C++ -*-===//
+//===- WasmReader.h ---------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,10 +6,10 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_LLVM_OBJCOPY_WASM_READER_H
-#define LLVM_TOOLS_LLVM_OBJCOPY_WASM_READER_H
+#ifndef LLVM_LIB_OBJCOPY_WASM_WASMREADER_H
+#define LLVM_LIB_OBJCOPY_WASM_WASMREADER_H
-#include "Object.h"
+#include "WasmObject.h"
namespace llvm {
namespace objcopy {
@@ -28,4 +28,4 @@ private:
} // end namespace objcopy
} // end namespace llvm
-#endif // LLVM_TOOLS_LLVM_OBJCOPY_WASM_READER_H
+#endif // LLVM_LIB_OBJCOPY_WASM_WASMREADER_H
diff --git a/llvm/tools/llvm-objcopy/wasm/Writer.cpp b/llvm/lib/ObjCopy/wasm/WasmWriter.cpp
index 2fad9e60c50f..fdcd441cc798 100644
--- a/llvm/tools/llvm-objcopy/wasm/Writer.cpp
+++ b/llvm/lib/ObjCopy/wasm/WasmWriter.cpp
@@ -1,4 +1,4 @@
-//===- Writer.cpp ---------------------------------------------------------===//
+//===- WasmWriter.cpp -----------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
-#include "Writer.h"
+#include "WasmWriter.h"
#include "llvm/BinaryFormat/Wasm.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Errc.h"
diff --git a/llvm/tools/llvm-objcopy/wasm/Writer.h b/llvm/lib/ObjCopy/wasm/WasmWriter.h
index 4404cd8caf84..14bbcf88875e 100644
--- a/llvm/tools/llvm-objcopy/wasm/Writer.h
+++ b/llvm/lib/ObjCopy/wasm/WasmWriter.h
@@ -1,4 +1,4 @@
-//===- Writer.h -------------------------------------------------*- C++ -*-===//
+//===- WasmWriter.h ---------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,10 +6,10 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_LLVM_OBJCOPY_WASM_WRITER_H
-#define LLVM_TOOLS_LLVM_OBJCOPY_WASM_WRITER_H
+#ifndef LLVM_LIB_OBJCOPY_WASM_WASMWRITER_H
+#define LLVM_LIB_OBJCOPY_WASM_WASMWRITER_H
-#include "Object.h"
+#include "WasmObject.h"
#include <cstdint>
#include <vector>
@@ -46,4 +46,4 @@ private:
} // end namespace objcopy
} // end namespace llvm
-#endif // LLVM_TOOLS_LLVM_OBJCOPY_WASM_WRITER_H
+#endif // LLVM_LIB_OBJCOPY_WASM_WASMWRITER_H
diff --git a/llvm/lib/Object/Archive.cpp b/llvm/lib/Object/Archive.cpp
index 9a4ef055faa4..ad03f9cae9f8 100644
--- a/llvm/lib/Object/Archive.cpp
+++ b/llvm/lib/Object/Archive.cpp
@@ -22,6 +22,7 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Host.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
@@ -30,7 +31,6 @@
#include <cassert>
#include <cstddef>
#include <cstdint>
-#include <cstring>
#include <memory>
#include <string>
#include <system_error>
@@ -257,6 +257,14 @@ Expected<StringRef> ArchiveMemberHeader::getName(uint64_t Size) const {
return Name;
if (Name.size() == 2 && Name[1] == '/') // String table.
return Name;
+ // System libraries from the Windows SDK for Windows 11 contain this symbol.
+ // It looks like a CFG guard: we just skip it for now.
+ if (Name.equals("/<XFGHASHMAP>/"))
+ return Name;
+ // Some libraries (e.g., arm64rt.lib) from the Windows WDK
+ // (version 10.0.22000.0) contain this undocumented special member.
+ if (Name.equals("/<ECSYMBOLS>/"))
+ return Name;
// It's a long name.
// Get the string table offset.
std::size_t StringOffset;
@@ -922,6 +930,14 @@ Archive::Archive(MemoryBufferRef Source, Error &Err)
Err = Error::success();
}
+object::Archive::Kind Archive::getDefaultKindForHost() {
+ Triple HostTriple(sys::getProcessTriple());
+ return HostTriple.isOSDarwin()
+ ? object::Archive::K_DARWIN
+ : (HostTriple.isOSAIX() ? object::Archive::K_AIXBIG
+ : object::Archive::K_GNU);
+}
+
Archive::child_iterator Archive::child_begin(Error &Err,
bool SkipInternal) const {
if (isEmpty())
diff --git a/llvm/lib/Object/ArchiveWriter.cpp b/llvm/lib/Object/ArchiveWriter.cpp
index 053b3dafed95..dbf5052cdac0 100644
--- a/llvm/lib/Object/ArchiveWriter.cpp
+++ b/llvm/lib/Object/ArchiveWriter.cpp
@@ -18,16 +18,19 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/Error.h"
+#include "llvm/Object/IRObjectFile.h"
+#include "llvm/Object/MachO.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Object/SymbolicFile.h"
+#include "llvm/Object/XCOFFObjectFile.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/SmallVectorMemoryBuffer.h"
-#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/raw_ostream.h"
#include <map>
@@ -44,6 +47,40 @@ NewArchiveMember::NewArchiveMember(MemoryBufferRef BufRef)
: Buf(MemoryBuffer::getMemBuffer(BufRef, false)),
MemberName(BufRef.getBufferIdentifier()) {}
+object::Archive::Kind NewArchiveMember::detectKindFromObject() const {
+ auto MemBufferRef = this->Buf->getMemBufferRef();
+ Expected<std::unique_ptr<object::ObjectFile>> OptionalObject =
+ object::ObjectFile::createObjectFile(MemBufferRef);
+
+ if (OptionalObject)
+ return isa<object::MachOObjectFile>(**OptionalObject)
+ ? object::Archive::K_DARWIN
+ : (isa<object::XCOFFObjectFile>(**OptionalObject)
+ ? object::Archive::K_AIXBIG
+ : object::Archive::K_GNU);
+
+ // Squelch the error in case we had a non-object file.
+ consumeError(OptionalObject.takeError());
+
+ // If we're adding a bitcode file to the archive, detect the Archive kind
+ // based on the target triple.
+ LLVMContext Context;
+ if (identify_magic(MemBufferRef.getBuffer()) == file_magic::bitcode) {
+ if (auto ObjOrErr = object::SymbolicFile::createSymbolicFile(
+ MemBufferRef, file_magic::bitcode, &Context)) {
+ auto &IRObject = cast<object::IRObjectFile>(**ObjOrErr);
+ return Triple(IRObject.getTargetTriple()).isOSDarwin()
+ ? object::Archive::K_DARWIN
+ : object::Archive::K_GNU;
+ } else {
+ // Squelch the error in case this was not a SymbolicFile.
+ consumeError(ObjOrErr.takeError());
+ }
+ }
+
+ return object::Archive::getDefaultKindForHost();
+}
+
Expected<NewArchiveMember>
NewArchiveMember::getOldMember(const object::Archive::Child &OldMember,
bool Deterministic) {
@@ -128,16 +165,20 @@ static bool isDarwin(object::Archive::Kind Kind) {
Kind == object::Archive::K_DARWIN64;
}
+static bool isAIXBigArchive(object::Archive::Kind Kind) {
+ return Kind == object::Archive::K_AIXBIG;
+}
+
static bool isBSDLike(object::Archive::Kind Kind) {
switch (Kind) {
case object::Archive::K_GNU:
case object::Archive::K_GNU64:
+ case object::Archive::K_AIXBIG:
return false;
case object::Archive::K_BSD:
case object::Archive::K_DARWIN:
case object::Archive::K_DARWIN64:
return true;
- case object::Archive::K_AIXBIG:
case object::Archive::K_COFF:
break;
}
@@ -190,6 +231,31 @@ printBSDMemberHeader(raw_ostream &Out, uint64_t Pos, StringRef Name,
Out.write(uint8_t(0));
}
+static void
+printBigArchiveMemberHeader(raw_ostream &Out, StringRef Name,
+ const sys::TimePoint<std::chrono::seconds> &ModTime,
+ unsigned UID, unsigned GID, unsigned Perms,
+ uint64_t Size, unsigned PrevOffset,
+ unsigned NextOffset) {
+ unsigned NameLen = Name.size();
+
+ printWithSpacePadding(Out, Size, 20); // File member size
+ printWithSpacePadding(Out, NextOffset, 20); // Next member header offset
+ printWithSpacePadding(Out, PrevOffset, 20); // Previous member header offset
+ printWithSpacePadding(Out, sys::toTimeT(ModTime), 12); // File member date
+ // The big archive format has 12 chars for uid and gid.
+ printWithSpacePadding(Out, UID % 1000000000000, 12); // UID
+ printWithSpacePadding(Out, GID % 1000000000000, 12); // GID
+ printWithSpacePadding(Out, format("%o", Perms), 12); // Permission
+ printWithSpacePadding(Out, NameLen, 4); // Name length
+ if (NameLen) {
+ printWithSpacePadding(Out, Name, NameLen); // Name
+ if (NameLen % 2)
+ Out.write(uint8_t(0)); // Null byte padding
+ }
+ Out << "`\n"; // Terminator
+}
+
static bool useStringTable(bool Thin, StringRef Name) {
return Thin || Name.size() >= 16 || Name.contains('/');
}
@@ -200,8 +266,8 @@ static bool is64BitKind(object::Archive::Kind Kind) {
case object::Archive::K_BSD:
case object::Archive::K_DARWIN:
case object::Archive::K_COFF:
- case object::Archive::K_AIXBIG:
return false;
+ case object::Archive::K_AIXBIG:
case object::Archive::K_DARWIN64:
case object::Archive::K_GNU64:
return true;
@@ -305,7 +371,11 @@ static uint64_t computeSymbolTableSize(object::Archive::Kind Kind,
// least 4-byte aligned for 32-bit content. Opt for the larger encoding
// uniformly.
// We do this for all bsd formats because it simplifies aligning members.
- uint32_t Pad = offsetToAlignment(Size, Align(isBSDLike(Kind) ? 8 : 2));
+ // For the big archive format, the symbol table is the last member, so there
+ // is no need to align.
+ uint32_t Pad = isAIXBigArchive(Kind)
+ ? 0
+ : offsetToAlignment(Size, Align(isBSDLike(Kind) ? 8 : 2));
Size += Pad;
if (Padding)
*Padding = Pad;
@@ -313,11 +383,15 @@ static uint64_t computeSymbolTableSize(object::Archive::Kind Kind,
}
static void writeSymbolTableHeader(raw_ostream &Out, object::Archive::Kind Kind,
- bool Deterministic, uint64_t Size) {
+ bool Deterministic, uint64_t Size,
+ uint64_t PrevMemberOffset = 0) {
if (isBSDLike(Kind)) {
const char *Name = is64BitKind(Kind) ? "__.SYMDEF_64" : "__.SYMDEF";
printBSDMemberHeader(Out, Out.tell(), Name, now(Deterministic), 0, 0, 0,
Size);
+ } else if (isAIXBigArchive(Kind)) {
+ printBigArchiveMemberHeader(Out, "", now(Deterministic), 0, 0,
+ 0, Size, PrevMemberOffset, 0);
} else {
const char *Name = is64BitKind(Kind) ? "/SYM64" : "";
printGNUSmallMemberHeader(Out, Name, now(Deterministic), 0, 0, 0, Size);
@@ -326,7 +400,8 @@ static void writeSymbolTableHeader(raw_ostream &Out, object::Archive::Kind Kind,
static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind,
bool Deterministic, ArrayRef<MemberData> Members,
- StringRef StringTable) {
+ StringRef StringTable,
+ uint64_t PrevMemberOffset = 0) {
// We don't write a symbol table on an archive with no members -- except on
// Darwin, where the linker will abort unless the archive has a symbol table.
if (StringTable.empty() && !isDarwin(Kind))
@@ -339,9 +414,10 @@ static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind,
uint64_t OffsetSize = is64BitKind(Kind) ? 8 : 4;
uint32_t Pad;
uint64_t Size = computeSymbolTableSize(Kind, NumSyms, OffsetSize, StringTable, &Pad);
- writeSymbolTableHeader(Out, Kind, Deterministic, Size);
+ writeSymbolTableHeader(Out, Kind, Deterministic, Size, PrevMemberOffset);
- uint64_t Pos = Out.tell() + Size;
+ uint64_t Pos = isAIXBigArchive(Kind) ? sizeof(object::BigArchive::FixLenHdr)
+ : Out.tell() + Size;
if (isBSDLike(Kind))
printNBits(Out, Kind, NumSyms * 2 * OffsetSize);
@@ -410,9 +486,8 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
bool NeedSymbols, ArrayRef<NewArchiveMember> NewMembers) {
static char PaddingData[8] = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'};
- // This ignores the symbol table, but we only need the value mod 8 and the
- // symbol table is aligned to be a multiple of 8 bytes
- uint64_t Pos = 0;
+ uint64_t Pos =
+ isAIXBigArchive(Kind) ? sizeof(object::BigArchive::FixLenHdr) : 0;
std::vector<MemberData> Ret;
bool HasObject = false;
@@ -472,6 +547,9 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
Entry.second = Entry.second > 1 ? 1 : 0;
}
+ // The big archive format needs to know the offset of the previous member
+ // header.
+ unsigned PrevOffset = 0;
for (const NewArchiveMember &M : NewMembers) {
std::string Header;
raw_string_ostream Out(Header);
@@ -504,8 +582,16 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
std::move(StringMsg), object::object_error::parse_failed);
}
- printMemberHeader(Out, Pos, StringTable, MemberNames, Kind, Thin, M,
- ModTime, Size);
+ if (isAIXBigArchive(Kind)) {
+ unsigned NextOffset = Pos + sizeof(object::BigArMemHdrType) +
+ alignTo(M.MemberName.size(), 2) + alignTo(Size, 2);
+ printBigArchiveMemberHeader(Out, M.MemberName, ModTime, M.UID, M.GID,
+ M.Perms, Size, PrevOffset, NextOffset);
+ PrevOffset = Pos;
+ } else {
+ printMemberHeader(Out, Pos, StringTable, MemberNames, Kind, Thin, M,
+ ModTime, Size);
+ }
Out.flush();
std::vector<unsigned> Symbols;
@@ -589,22 +675,25 @@ static Error writeArchiveToStream(raw_ostream &Out,
return E;
std::vector<MemberData> &Data = *DataOrErr;
- if (!StringTableBuf.empty())
+ if (!StringTableBuf.empty() && !isAIXBigArchive(Kind))
Data.insert(Data.begin(), computeStringTable(StringTableBuf));
// We would like to detect if we need to switch to a 64-bit symbol table.
- if (WriteSymtab) {
- uint64_t MaxOffset = 8; // For the file signature.
- uint64_t LastOffset = MaxOffset;
- uint64_t NumSyms = 0;
- for (const auto &M : Data) {
- // Record the start of the member's offset
- LastOffset = MaxOffset;
- // Account for the size of each part associated with the member.
- MaxOffset += M.Header.size() + M.Data.size() + M.Padding.size();
- NumSyms += M.Symbols.size();
- }
+ uint64_t LastMemberEndOffset =
+ isAIXBigArchive(Kind) ? sizeof(object::BigArchive::FixLenHdr) : 8;
+ uint64_t LastMemberHeaderOffset = LastMemberEndOffset;
+ uint64_t NumSyms = 0;
+ for (const auto &M : Data) {
+ // Record the start of the member's offset
+ LastMemberHeaderOffset = LastMemberEndOffset;
+ // Account for the size of each part associated with the member.
+ LastMemberEndOffset += M.Header.size() + M.Data.size() + M.Padding.size();
+ NumSyms += M.Symbols.size();
+ }
+ // The symbol table is put at the end of the big archive file. The symbol
+ // table is at the start of the archive file for other archive formats.
+ if (WriteSymtab && !isAIXBigArchive(Kind)) {
// We assume 32-bit offsets to see if 32-bit symbols are possible or not.
uint64_t SymtabSize = computeSymbolTableSize(Kind, NumSyms, 4, SymNamesBuf);
auto computeSymbolTableHeaderSize =
@@ -614,7 +703,7 @@ static Error writeArchiveToStream(raw_ostream &Out,
writeSymbolTableHeader(Tmp, Kind, Deterministic, SymtabSize);
return TmpBuf.size();
};
- LastOffset += computeSymbolTableHeaderSize() + SymtabSize;
+ LastMemberHeaderOffset += computeSymbolTableHeaderSize() + SymtabSize;
// The SYM64 format is used when an archive's member offsets are larger than
// 32-bits can hold. The need for this shift in format is detected by
@@ -628,10 +717,10 @@ static Error writeArchiveToStream(raw_ostream &Out,
if (Sym64Env)
StringRef(Sym64Env).getAsInteger(10, Sym64Threshold);
- // If LastOffset isn't going to fit in a 32-bit varible we need to switch
- // to 64-bit. Note that the file can be larger than 4GB as long as the last
- // member starts before the 4GB offset.
- if (LastOffset >= Sym64Threshold) {
+ // If LastMemberHeaderOffset isn't going to fit in a 32-bit varible we need
+ // to switch to 64-bit. Note that the file can be larger than 4GB as long as
+ // the last member starts before the 4GB offset.
+ if (LastMemberHeaderOffset >= Sym64Threshold) {
if (Kind == object::Archive::K_DARWIN)
Kind = object::Archive::K_DARWIN64;
else
@@ -641,15 +730,92 @@ static Error writeArchiveToStream(raw_ostream &Out,
if (Thin)
Out << "!<thin>\n";
+ else if (isAIXBigArchive(Kind))
+ Out << "<bigaf>\n";
else
Out << "!<arch>\n";
- if (WriteSymtab)
- writeSymbolTable(Out, Kind, Deterministic, Data, SymNamesBuf);
+ if (!isAIXBigArchive(Kind)) {
+ if (WriteSymtab)
+ writeSymbolTable(Out, Kind, Deterministic, Data, SymNamesBuf);
+ for (const MemberData &M : Data)
+ Out << M.Header << M.Data << M.Padding;
+ } else {
+ // For the big archive (AIX) format, compute a table of member names and
+ // offsets, used in the member table.
+ uint64_t MemberTableNameStrTblSize = 0;
+ std::vector<size_t> MemberOffsets;
+ std::vector<StringRef> MemberNames;
+ // Loop across object to find offset and names.
+ uint64_t MemberEndOffset = sizeof(object::BigArchive::FixLenHdr);
+ for (size_t I = 0, Size = NewMembers.size(); I != Size; ++I) {
+ const NewArchiveMember &Member = NewMembers[I];
+ MemberTableNameStrTblSize += Member.MemberName.size() + 1;
+ MemberOffsets.push_back(MemberEndOffset);
+ MemberNames.push_back(Member.MemberName);
+ // File member name ended with "`\n". The length is included in
+ // BigArMemHdrType.
+ MemberEndOffset += sizeof(object::BigArMemHdrType) +
+ alignTo(Data[I].Data.size(), 2) +
+ alignTo(Member.MemberName.size(), 2);
+ }
- for (const MemberData &M : Data)
- Out << M.Header << M.Data << M.Padding;
+ // AIX member table size.
+ unsigned MemberTableSize = 20 + // Number of members field
+ 20 * MemberOffsets.size() +
+ MemberTableNameStrTblSize;
+
+ unsigned GlobalSymbolOffset =
+ (WriteSymtab && NumSyms > 0)
+ ? LastMemberEndOffset +
+ alignTo(sizeof(object::BigArMemHdrType) + MemberTableSize, 2)
+ : 0;
+
+ // Fixed Sized Header.
+ printWithSpacePadding(Out, NewMembers.size() ? LastMemberEndOffset : 0,
+ 20); // Offset to member table
+ // If there are no file members in the archive, there will be no global
+ // symbol table.
+ printWithSpacePadding(Out, NewMembers.size() ? GlobalSymbolOffset : 0, 20);
+ printWithSpacePadding(
+ Out, 0,
+ 20); // Offset to 64 bits global symbol table - Not supported yet
+ printWithSpacePadding(
+ Out, NewMembers.size() ? sizeof(object::BigArchive::FixLenHdr) : 0,
+ 20); // Offset to first archive member
+ printWithSpacePadding(Out, NewMembers.size() ? LastMemberHeaderOffset : 0,
+ 20); // Offset to last archive member
+ printWithSpacePadding(
+ Out, 0,
+ 20); // Offset to first member of free list - Not supported yet
+
+ for (const MemberData &M : Data) {
+ Out << M.Header << M.Data;
+ if (M.Data.size() % 2)
+ Out << '\0';
+ }
+ if (NewMembers.size()) {
+ // Member table.
+ printBigArchiveMemberHeader(Out, "", sys::toTimePoint(0), 0, 0, 0,
+ MemberTableSize, LastMemberHeaderOffset,
+ GlobalSymbolOffset);
+ printWithSpacePadding(Out, MemberOffsets.size(), 20); // Number of members
+ for (uint64_t MemberOffset : MemberOffsets)
+ printWithSpacePadding(Out, MemberOffset,
+ 20); // Offset to member file header.
+ for (StringRef MemberName : MemberNames)
+ Out << MemberName << '\0'; // Member file name, null byte padding.
+
+ if (MemberTableNameStrTblSize % 2)
+ Out << '\0'; // Name table must be tail padded to an even number of
+ // bytes.
+
+ if (WriteSymtab && NumSyms > 0)
+ writeSymbolTable(Out, Kind, Deterministic, Data, SymNamesBuf,
+ LastMemberEndOffset);
+ }
+ }
Out.flush();
return Error::success();
}
diff --git a/llvm/lib/Object/Binary.cpp b/llvm/lib/Object/Binary.cpp
index 143554344256..8065e3eb1d85 100644
--- a/llvm/lib/Object/Binary.cpp
+++ b/llvm/lib/Object/Binary.cpp
@@ -18,14 +18,13 @@
#include "llvm/Object/MachOUniversal.h"
#include "llvm/Object/Minidump.h"
#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/OffloadBinary.h"
#include "llvm/Object/TapiUniversal.h"
#include "llvm/Object/WindowsResource.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ErrorOr.h"
-#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
-#include <algorithm>
#include <memory>
#include <system_error>
@@ -84,9 +83,13 @@ Expected<std::unique_ptr<Binary>> object::createBinary(MemoryBufferRef Buffer,
// PDB does not support the Binary interface.
return errorCodeToError(object_error::invalid_file_type);
case file_magic::unknown:
+ case file_magic::cuda_fatbinary:
case file_magic::coff_cl_gl_object:
+ case file_magic::dxcontainer_object:
// Unrecognized object file format.
return errorCodeToError(object_error::invalid_file_type);
+ case file_magic::offload_binary:
+ return OffloadBinary::create(Buffer);
case file_magic::minidump:
return MinidumpFile::create(Buffer);
case file_magic::tapi_file:
diff --git a/llvm/lib/Object/COFFImportFile.cpp b/llvm/lib/Object/COFFImportFile.cpp
index 69bbf70b43a1..91ecea11511d 100644
--- a/llvm/lib/Object/COFFImportFile.cpp
+++ b/llvm/lib/Object/COFFImportFile.cpp
@@ -12,10 +12,14 @@
#include "llvm/Object/COFFImportFile.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/ArchiveWriter.h"
#include "llvm/Object/COFF.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Path.h"
#include <cstdint>
diff --git a/llvm/lib/Object/COFFModuleDefinition.cpp b/llvm/lib/Object/COFFModuleDefinition.cpp
index 55ddd3baca2b..0666970d5c60 100644
--- a/llvm/lib/Object/COFFModuleDefinition.cpp
+++ b/llvm/lib/Object/COFFModuleDefinition.cpp
@@ -17,12 +17,10 @@
#include "llvm/Object/COFFModuleDefinition.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/Object/COFF.h"
#include "llvm/Object/COFFImportFile.h"
#include "llvm/Object/Error.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/Path.h"
-#include "llvm/Support/raw_ostream.h"
using namespace llvm::COFF;
using namespace llvm;
diff --git a/llvm/lib/Object/COFFObjectFile.cpp b/llvm/lib/Object/COFFObjectFile.cpp
index 354b3c0d5577..1a4bb329201a 100644
--- a/llvm/lib/Object/COFFObjectFile.cpp
+++ b/llvm/lib/Object/COFFObjectFile.cpp
@@ -25,7 +25,7 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/MemoryBufferRef.h"
#include <algorithm>
#include <cassert>
#include <cinttypes>
@@ -447,7 +447,8 @@ Error COFFObjectFile::initSymbolTablePtr() {
// Check that the string table is null terminated if has any in it.
if (StringTableSize > 4 && StringTable[StringTableSize - 1] != 0)
- return errorCodeToError(object_error::parse_failed);
+ return createStringError(object_error::parse_failed,
+ "string table missing null terminator");
return Error::success();
}
@@ -469,23 +470,43 @@ Error COFFObjectFile::getVaPtr(uint64_t Addr, uintptr_t &Res) const {
}
// Returns the file offset for the given RVA.
-Error COFFObjectFile::getRvaPtr(uint32_t Addr, uintptr_t &Res) const {
+Error COFFObjectFile::getRvaPtr(uint32_t Addr, uintptr_t &Res,
+ const char *ErrorContext) const {
for (const SectionRef &S : sections()) {
const coff_section *Section = getCOFFSection(S);
uint32_t SectionStart = Section->VirtualAddress;
uint32_t SectionEnd = Section->VirtualAddress + Section->VirtualSize;
if (SectionStart <= Addr && Addr < SectionEnd) {
+ // A table/directory entry can be pointing to somewhere in a stripped
+ // section, in an object that went through `objcopy --only-keep-debug`.
+ // In this case we don't want to cause the parsing of the object file to
+ // fail, otherwise it will be impossible to use this object as debug info
+ // in LLDB. Return SectionStrippedError here so that
+ // COFFObjectFile::initialize can ignore the error.
+ // Somewhat common binaries may have RVAs pointing outside of the
+ // provided raw data. Instead of rejecting the binaries, just
+ // treat the section as stripped for these purposes.
+ if (Section->SizeOfRawData < Section->VirtualSize &&
+ Addr >= SectionStart + Section->SizeOfRawData) {
+ return make_error<SectionStrippedError>();
+ }
uint32_t Offset = Addr - SectionStart;
Res = reinterpret_cast<uintptr_t>(base()) + Section->PointerToRawData +
Offset;
return Error::success();
}
}
- return errorCodeToError(object_error::parse_failed);
+ if (ErrorContext)
+ return createStringError(object_error::parse_failed,
+ "RVA 0x%" PRIx32 " for %s not found", Addr,
+ ErrorContext);
+ return createStringError(object_error::parse_failed,
+ "RVA 0x%" PRIx32 " not found", Addr);
}
Error COFFObjectFile::getRvaAndSizeAsBytes(uint32_t RVA, uint32_t Size,
- ArrayRef<uint8_t> &Contents) const {
+ ArrayRef<uint8_t> &Contents,
+ const char *ErrorContext) const {
for (const SectionRef &S : sections()) {
const coff_section *Section = getCOFFSection(S);
uint32_t SectionStart = Section->VirtualAddress;
@@ -501,7 +522,12 @@ Error COFFObjectFile::getRvaAndSizeAsBytes(uint32_t RVA, uint32_t Size,
return Error::success();
}
}
- return errorCodeToError(object_error::parse_failed);
+ if (ErrorContext)
+ return createStringError(object_error::parse_failed,
+ "RVA 0x%" PRIx32 " for %s not found", RVA,
+ ErrorContext);
+ return createStringError(object_error::parse_failed,
+ "RVA 0x%" PRIx32 " not found", RVA);
}
// Returns hint and name fields, assuming \p Rva is pointing to a Hint/Name
@@ -521,11 +547,12 @@ Error COFFObjectFile::getDebugPDBInfo(const debug_directory *DebugDir,
const codeview::DebugInfo *&PDBInfo,
StringRef &PDBFileName) const {
ArrayRef<uint8_t> InfoBytes;
- if (Error E = getRvaAndSizeAsBytes(
- DebugDir->AddressOfRawData, DebugDir->SizeOfData, InfoBytes))
+ if (Error E =
+ getRvaAndSizeAsBytes(DebugDir->AddressOfRawData, DebugDir->SizeOfData,
+ InfoBytes, "PDB info"))
return E;
if (InfoBytes.size() < sizeof(*PDBInfo) + 1)
- return errorCodeToError(object_error::parse_failed);
+ return createStringError(object_error::parse_failed, "PDB info too small");
PDBInfo = reinterpret_cast<const codeview::DebugInfo *>(InfoBytes.data());
InfoBytes = InfoBytes.drop_front(sizeof(*PDBInfo));
PDBFileName = StringRef(reinterpret_cast<const char *>(InfoBytes.data()),
@@ -563,7 +590,7 @@ Error COFFObjectFile::initImportTablePtr() {
// Find the section that contains the RVA. This is needed because the RVA is
// the import table's memory address which is different from its file offset.
uintptr_t IntPtr = 0;
- if (Error E = getRvaPtr(ImportTableRva, IntPtr))
+ if (Error E = getRvaPtr(ImportTableRva, IntPtr, "import table"))
return E;
if (Error E = checkOffset(Data, IntPtr, DataEntry->Size))
return E;
@@ -586,8 +613,11 @@ Error COFFObjectFile::initDelayImportTablePtr() {
sizeof(delay_import_directory_table_entry) - 1;
uintptr_t IntPtr = 0;
- if (Error E = getRvaPtr(RVA, IntPtr))
+ if (Error E = getRvaPtr(RVA, IntPtr, "delay import table"))
return E;
+ if (Error E = checkOffset(Data, IntPtr, DataEntry->Size))
+ return E;
+
DelayImportDirectory = reinterpret_cast<
const delay_import_directory_table_entry *>(IntPtr);
return Error::success();
@@ -607,8 +637,11 @@ Error COFFObjectFile::initExportTablePtr() {
uint32_t ExportTableRva = DataEntry->RelativeVirtualAddress;
uintptr_t IntPtr = 0;
- if (Error E = getRvaPtr(ExportTableRva, IntPtr))
+ if (Error E = getRvaPtr(ExportTableRva, IntPtr, "export table"))
return E;
+ if (Error E = checkOffset(Data, IntPtr, DataEntry->Size))
+ return E;
+
ExportDirectory =
reinterpret_cast<const export_directory_table_entry *>(IntPtr);
return Error::success();
@@ -623,8 +656,12 @@ Error COFFObjectFile::initBaseRelocPtr() {
return Error::success();
uintptr_t IntPtr = 0;
- if (Error E = getRvaPtr(DataEntry->RelativeVirtualAddress, IntPtr))
+ if (Error E = getRvaPtr(DataEntry->RelativeVirtualAddress, IntPtr,
+ "base reloc table"))
+ return E;
+ if (Error E = checkOffset(Data, IntPtr, DataEntry->Size))
return E;
+
BaseRelocHeader = reinterpret_cast<const coff_base_reloc_block_header *>(
IntPtr);
BaseRelocEnd = reinterpret_cast<coff_base_reloc_block_header *>(
@@ -646,11 +683,16 @@ Error COFFObjectFile::initDebugDirectoryPtr() {
// Check that the size is a multiple of the entry size.
if (DataEntry->Size % sizeof(debug_directory) != 0)
- return errorCodeToError(object_error::parse_failed);
+ return createStringError(object_error::parse_failed,
+ "debug directory has uneven size");
uintptr_t IntPtr = 0;
- if (Error E = getRvaPtr(DataEntry->RelativeVirtualAddress, IntPtr))
+ if (Error E = getRvaPtr(DataEntry->RelativeVirtualAddress, IntPtr,
+ "debug directory"))
+ return E;
+ if (Error E = checkOffset(Data, IntPtr, DataEntry->Size))
return E;
+
DebugDirectoryBegin = reinterpret_cast<const debug_directory *>(IntPtr);
DebugDirectoryEnd = reinterpret_cast<const debug_directory *>(
IntPtr + DataEntry->Size);
@@ -680,7 +722,10 @@ Error COFFObjectFile::initTLSDirectoryPtr() {
static_cast<uint32_t>(DataEntry->Size), DirSize);
uintptr_t IntPtr = 0;
- if (Error E = getRvaPtr(DataEntry->RelativeVirtualAddress, IntPtr))
+ if (Error E =
+ getRvaPtr(DataEntry->RelativeVirtualAddress, IntPtr, "TLS directory"))
+ return E;
+ if (Error E = checkOffset(Data, IntPtr, DataEntry->Size))
return E;
if (is64())
@@ -701,7 +746,10 @@ Error COFFObjectFile::initLoadConfigPtr() {
if (DataEntry->RelativeVirtualAddress == 0)
return Error::success();
uintptr_t IntPtr = 0;
- if (Error E = getRvaPtr(DataEntry->RelativeVirtualAddress, IntPtr))
+ if (Error E = getRvaPtr(DataEntry->RelativeVirtualAddress, IntPtr,
+ "load config table"))
+ return E;
+ if (Error E = checkOffset(Data, IntPtr, DataEntry->Size))
return E;
LoadConfig = (const void *)IntPtr;
@@ -727,6 +775,14 @@ COFFObjectFile::COFFObjectFile(MemoryBufferRef Object)
DebugDirectoryBegin(nullptr), DebugDirectoryEnd(nullptr),
TLSDirectory32(nullptr), TLSDirectory64(nullptr) {}
+static Error ignoreStrippedErrors(Error E) {
+ if (E.isA<SectionStrippedError>()) {
+ consumeError(std::move(E));
+ return Error::success();
+ }
+ return E;
+}
+
Error COFFObjectFile::initialize() {
// Check that we at least have enough room for a header.
std::error_code EC;
@@ -749,7 +805,8 @@ Error COFFObjectFile::initialize() {
CurPtr = DH->AddressOfNewExeHeader;
// Check the PE magic bytes. ("PE\0\0")
if (memcmp(base() + CurPtr, COFF::PEMagic, sizeof(COFF::PEMagic)) != 0) {
- return errorCodeToError(object_error::parse_failed);
+ return createStringError(object_error::parse_failed,
+ "incorrect PE magic");
}
CurPtr += sizeof(COFF::PEMagic); // Skip the PE magic bytes.
HasPEHeader = true;
@@ -805,7 +862,8 @@ Error COFFObjectFile::initialize() {
DataDirSize = sizeof(data_directory) * PE32PlusHeader->NumberOfRvaAndSize;
} else {
// It's neither PE32 nor PE32+.
- return errorCodeToError(object_error::parse_failed);
+ return createStringError(object_error::parse_failed,
+ "incorrect PE magic");
}
if (Error E = getObject(DataDirectory, Data, DataDirAddr, DataDirSize))
return E;
@@ -834,33 +892,34 @@ Error COFFObjectFile::initialize() {
} else {
// We had better not have any symbols if we don't have a symbol table.
if (getNumberOfSymbols() != 0) {
- return errorCodeToError(object_error::parse_failed);
+ return createStringError(object_error::parse_failed,
+ "symbol table missing");
}
}
// Initialize the pointer to the beginning of the import table.
- if (Error E = initImportTablePtr())
+ if (Error E = ignoreStrippedErrors(initImportTablePtr()))
return E;
- if (Error E = initDelayImportTablePtr())
+ if (Error E = ignoreStrippedErrors(initDelayImportTablePtr()))
return E;
// Initialize the pointer to the export table.
- if (Error E = initExportTablePtr())
+ if (Error E = ignoreStrippedErrors(initExportTablePtr()))
return E;
// Initialize the pointer to the base relocation table.
- if (Error E = initBaseRelocPtr())
+ if (Error E = ignoreStrippedErrors(initBaseRelocPtr()))
return E;
// Initialize the pointer to the debug directory.
- if (Error E = initDebugDirectoryPtr())
+ if (Error E = ignoreStrippedErrors(initDebugDirectoryPtr()))
return E;
// Initialize the pointer to the TLS directory.
- if (Error E = initTLSDirectoryPtr())
+ if (Error E = ignoreStrippedErrors(initTLSDirectoryPtr()))
return E;
- if (Error E = initLoadConfigPtr())
+ if (Error E = ignoreStrippedErrors(initLoadConfigPtr()))
return E;
return Error::success();
@@ -1021,13 +1080,14 @@ Expected<const coff_section *> COFFObjectFile::getSection(int32_t Index) const {
// We already verified the section table data, so no need to check again.
return SectionTable + (Index - 1);
}
- return errorCodeToError(object_error::parse_failed);
+ return createStringError(object_error::parse_failed,
+ "section index out of bounds");
}
Expected<StringRef> COFFObjectFile::getString(uint32_t Offset) const {
if (StringTableSize <= 4)
// Tried to get a string from an empty string table.
- return errorCodeToError(object_error::parse_failed);
+ return createStringError(object_error::parse_failed, "string table empty");
if (Offset >= StringTableSize)
return errorCodeToError(object_error::unexpected_eof);
return StringRef(StringTable + Offset);
@@ -1086,13 +1146,7 @@ uint32_t COFFObjectFile::getSymbolIndex(COFFSymbolRef Symbol) const {
Expected<StringRef>
COFFObjectFile::getSectionName(const coff_section *Sec) const {
- StringRef Name;
- if (Sec->Name[COFF::NameSize - 1] == 0)
- // Null terminated, let ::strlen figure out the length.
- Name = Sec->Name;
- else
- // Not null terminated, use all 8 bytes.
- Name = StringRef(Sec->Name, COFF::NameSize);
+ StringRef Name = StringRef(Sec->Name, COFF::NameSize).split('\0').first;
// Check for string table entry. First byte is '/'.
if (Name.startswith("/")) {
@@ -1414,7 +1468,8 @@ ImportDirectoryEntryRef::lookup_table_symbols() const {
Error ImportDirectoryEntryRef::getName(StringRef &Result) const {
uintptr_t IntPtr = 0;
- if (Error E = OwningObject->getRvaPtr(ImportTable[Index].NameRVA, IntPtr))
+ if (Error E = OwningObject->getRvaPtr(ImportTable[Index].NameRVA, IntPtr,
+ "import directory name"))
return E;
Result = StringRef(reinterpret_cast<const char *>(IntPtr));
return Error::success();
@@ -1460,7 +1515,8 @@ DelayImportDirectoryEntryRef::imported_symbols() const {
Error DelayImportDirectoryEntryRef::getName(StringRef &Result) const {
uintptr_t IntPtr = 0;
- if (Error E = OwningObject->getRvaPtr(Table[Index].Name, IntPtr))
+ if (Error E = OwningObject->getRvaPtr(Table[Index].Name, IntPtr,
+ "delay import directory name"))
return E;
Result = StringRef(reinterpret_cast<const char *>(IntPtr));
return Error::success();
@@ -1477,7 +1533,7 @@ Error DelayImportDirectoryEntryRef::getImportAddress(int AddrIndex,
uint32_t RVA = Table[Index].DelayImportAddressTable +
AddrIndex * (OwningObject->is64() ? 8 : 4);
uintptr_t IntPtr = 0;
- if (Error E = OwningObject->getRvaPtr(RVA, IntPtr))
+ if (Error E = OwningObject->getRvaPtr(RVA, IntPtr, "import address"))
return E;
if (OwningObject->is64())
Result = *reinterpret_cast<const ulittle64_t *>(IntPtr);
@@ -1499,7 +1555,8 @@ void ExportDirectoryEntryRef::moveNext() {
// by ordinal, the empty string is set as a result.
Error ExportDirectoryEntryRef::getDllName(StringRef &Result) const {
uintptr_t IntPtr = 0;
- if (Error E = OwningObject->getRvaPtr(ExportTable->NameRVA, IntPtr))
+ if (Error E =
+ OwningObject->getRvaPtr(ExportTable->NameRVA, IntPtr, "dll name"))
return E;
Result = StringRef(reinterpret_cast<const char *>(IntPtr));
return Error::success();
@@ -1520,8 +1577,8 @@ Error ExportDirectoryEntryRef::getOrdinal(uint32_t &Result) const {
// Returns the address of the current export symbol.
Error ExportDirectoryEntryRef::getExportRVA(uint32_t &Result) const {
uintptr_t IntPtr = 0;
- if (Error EC =
- OwningObject->getRvaPtr(ExportTable->ExportAddressTableRVA, IntPtr))
+ if (Error EC = OwningObject->getRvaPtr(ExportTable->ExportAddressTableRVA,
+ IntPtr, "export address"))
return EC;
const export_address_table_entry *entry =
reinterpret_cast<const export_address_table_entry *>(IntPtr);
@@ -1534,8 +1591,8 @@ Error ExportDirectoryEntryRef::getExportRVA(uint32_t &Result) const {
Error
ExportDirectoryEntryRef::getSymbolName(StringRef &Result) const {
uintptr_t IntPtr = 0;
- if (Error EC =
- OwningObject->getRvaPtr(ExportTable->OrdinalTableRVA, IntPtr))
+ if (Error EC = OwningObject->getRvaPtr(ExportTable->OrdinalTableRVA, IntPtr,
+ "export ordinal table"))
return EC;
const ulittle16_t *Start = reinterpret_cast<const ulittle16_t *>(IntPtr);
@@ -1545,11 +1602,12 @@ ExportDirectoryEntryRef::getSymbolName(StringRef &Result) const {
I < E; ++I, ++Offset) {
if (*I != Index)
continue;
- if (Error EC =
- OwningObject->getRvaPtr(ExportTable->NamePointerRVA, IntPtr))
+ if (Error EC = OwningObject->getRvaPtr(ExportTable->NamePointerRVA, IntPtr,
+ "export table entry"))
return EC;
const ulittle32_t *NamePtr = reinterpret_cast<const ulittle32_t *>(IntPtr);
- if (Error EC = OwningObject->getRvaPtr(NamePtr[Offset], IntPtr))
+ if (Error EC = OwningObject->getRvaPtr(NamePtr[Offset], IntPtr,
+ "export symbol name"))
return EC;
Result = StringRef(reinterpret_cast<const char *>(IntPtr));
return Error::success();
@@ -1562,7 +1620,8 @@ Error ExportDirectoryEntryRef::isForwarder(bool &Result) const {
const data_directory *DataEntry =
OwningObject->getDataDirectory(COFF::EXPORT_TABLE);
if (!DataEntry)
- return errorCodeToError(object_error::parse_failed);
+ return createStringError(object_error::parse_failed,
+ "export table missing");
uint32_t RVA;
if (auto EC = getExportRVA(RVA))
return EC;
@@ -1577,7 +1636,7 @@ Error ExportDirectoryEntryRef::getForwardTo(StringRef &Result) const {
if (auto EC = getExportRVA(RVA))
return EC;
uintptr_t IntPtr = 0;
- if (auto EC = OwningObject->getRvaPtr(RVA, IntPtr))
+ if (auto EC = OwningObject->getRvaPtr(RVA, IntPtr, "export forward target"))
return EC;
Result = StringRef(reinterpret_cast<const char *>(IntPtr));
return Error::success();
@@ -1606,7 +1665,7 @@ Error ImportedSymbolRef::getSymbolName(StringRef &Result) const {
RVA = Entry64[Index].getHintNameRVA();
}
uintptr_t IntPtr = 0;
- if (Error EC = OwningObject->getRvaPtr(RVA, IntPtr))
+ if (Error EC = OwningObject->getRvaPtr(RVA, IntPtr, "import symbol name"))
return EC;
// +2 because the first two bytes is hint.
Result = StringRef(reinterpret_cast<const char *>(IntPtr + 2));
@@ -1645,7 +1704,7 @@ Error ImportedSymbolRef::getOrdinal(uint16_t &Result) const {
RVA = Entry64[Index].getHintNameRVA();
}
uintptr_t IntPtr = 0;
- if (Error EC = OwningObject->getRvaPtr(RVA, IntPtr))
+ if (Error EC = OwningObject->getRvaPtr(RVA, IntPtr, "import symbol ordinal"))
return EC;
Result = *reinterpret_cast<const ulittle16_t *>(IntPtr);
return Error::success();
diff --git a/llvm/lib/Object/DXContainer.cpp b/llvm/lib/Object/DXContainer.cpp
new file mode 100644
index 000000000000..ca859c1f69ae
--- /dev/null
+++ b/llvm/lib/Object/DXContainer.cpp
@@ -0,0 +1,111 @@
+//===- DXContainer.cpp - DXContainer object file implementation -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/DXContainer.h"
+#include "llvm/BinaryFormat/DXContainer.h"
+#include "llvm/Object/Error.h"
+
+using namespace llvm;
+using namespace llvm::object;
+
+static Error parseFailed(const Twine &Msg) {
+ return make_error<GenericBinaryError>(Msg.str(), object_error::parse_failed);
+}
+
+template <typename T>
+static Error readStruct(StringRef Buffer, const char *Src, T &Struct) {
+ // Don't read before the beginning or past the end of the file
+ if (Src < Buffer.begin() || Src + sizeof(T) > Buffer.end())
+ return parseFailed("Reading structure out of file bounds");
+
+ memcpy(&Struct, Src, sizeof(T));
+ // DXContainer is always little endian
+ if (sys::IsBigEndianHost)
+ Struct.swapBytes();
+ return Error::success();
+}
+
+template <typename T>
+static Error readInteger(StringRef Buffer, const char *Src, T &Val) {
+ static_assert(std::is_integral<T>::value,
+ "Cannot call readInteger on non-integral type.");
+ assert(reinterpret_cast<uintptr_t>(Src) % alignof(T) == 0 &&
+ "Unaligned read of value from buffer!");
+ // Don't read before the beginning or past the end of the file
+ if (Src < Buffer.begin() || Src + sizeof(T) > Buffer.end())
+ return parseFailed("Reading structure out of file bounds");
+
+ Val = *reinterpret_cast<const T *>(Src);
+ // DXContainer is always little endian
+ if (sys::IsBigEndianHost)
+ sys::swapByteOrder(Val);
+ return Error::success();
+}
+
+DXContainer::DXContainer(MemoryBufferRef O) : Data(O) {}
+
+Error DXContainer::parseHeader() {
+ return readStruct(Data.getBuffer(), Data.getBuffer().data(), Header);
+}
+
+Error DXContainer::parseDXILHeader(uint32_t Offset) {
+ if (DXIL)
+ return parseFailed("More than one DXIL part is present in the file");
+ const char *Current = Data.getBuffer().data() + Offset;
+ dxbc::ProgramHeader Header;
+ if (Error Err = readStruct(Data.getBuffer(), Current, Header))
+ return Err;
+ Current += offsetof(dxbc::ProgramHeader, Bitcode) + Header.Bitcode.Offset;
+ DXIL.emplace(std::make_pair(Header, Current));
+ return Error::success();
+}
+
+Error DXContainer::parsePartOffsets() {
+ const char *Current = Data.getBuffer().data() + sizeof(dxbc::Header);
+ for (uint32_t Part = 0; Part < Header.PartCount; ++Part) {
+ uint32_t PartOffset;
+ if (Error Err = readInteger(Data.getBuffer(), Current, PartOffset))
+ return Err;
+ Current += sizeof(uint32_t);
+ // We need to ensure that each part offset leaves enough space for a part
+ // header. To prevent overflow, we subtract the part header size from the
+ // buffer size, rather than adding to the offset. Since the file header is
+ // larger than the part header we can't reach this code unless the buffer
+ // is larger than the part header, so this can't underflow.
+ if (PartOffset > Data.getBufferSize() - sizeof(dxbc::PartHeader))
+ return parseFailed("Part offset points beyond boundary of the file");
+ PartOffsets.push_back(PartOffset);
+
+ // If this isn't a dxil part stop here...
+ if (Data.getBuffer().substr(PartOffset, 4) != "DXIL")
+ continue;
+ if (Error Err = parseDXILHeader(PartOffset + sizeof(dxbc::PartHeader)))
+ return Err;
+ }
+ return Error::success();
+}
+
+Expected<DXContainer> DXContainer::create(MemoryBufferRef Object) {
+ DXContainer Container(Object);
+ if (Error Err = Container.parseHeader())
+ return std::move(Err);
+ if (Error Err = Container.parsePartOffsets())
+ return std::move(Err);
+ return Container;
+}
+
+void DXContainer::PartIterator::updateIteratorImpl(const uint32_t Offset) {
+ StringRef Buffer = Container.Data.getBuffer();
+ const char *Current = Buffer.data() + Offset;
+ // Offsets are validated during parsing, so all offsets in the container are
+ // valid and contain enough readable data to read a header.
+ cantFail(readStruct(Buffer, Current, IteratorState.Part));
+ IteratorState.Data =
+ StringRef(Current + sizeof(dxbc::PartHeader), IteratorState.Part.Size);
+ IteratorState.Offset = Offset;
+}
diff --git a/llvm/lib/Object/Decompressor.cpp b/llvm/lib/Object/Decompressor.cpp
index 11efd857d1a1..de067ed59ac5 100644
--- a/llvm/lib/Object/Decompressor.cpp
+++ b/llvm/lib/Object/Decompressor.cpp
@@ -8,7 +8,7 @@
#include "llvm/Object/Decompressor.h"
#include "llvm/BinaryFormat/ELF.h"
-#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/DataExtractor.h"
#include "llvm/Support/Endian.h"
diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp
index 6e56da1a31f3..6acf4543be5a 100644
--- a/llvm/lib/Object/ELF.cpp
+++ b/llvm/lib/Object/ELF.cpp
@@ -166,6 +166,13 @@ StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine,
break;
}
break;
+ case ELF::EM_LOONGARCH:
+ switch (Type) {
+#include "llvm/BinaryFormat/ELFRelocs/LoongArch.def"
+ default:
+ break;
+ }
+ break;
default:
break;
}
@@ -288,6 +295,7 @@ StringRef llvm::object::getELFSectionTypeName(uint32_t Machine, unsigned Type) {
STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_SYMPART);
STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_PART_EHDR);
STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_PART_PHDR);
+ STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_BB_ADDR_MAP_V0);
STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_BB_ADDR_MAP);
STRINGIFY_ENUM_CASE(ELF, SHT_GNU_ATTRIBUTES);
STRINGIFY_ENUM_CASE(ELF, SHT_GNU_HASH);
@@ -561,11 +569,9 @@ Expected<typename ELFT::DynRange> ELFFile<ELFT>::dynamicEntries() const {
}
if (Dyn.empty())
- // TODO: this error is untested.
return createError("invalid empty dynamic section");
if (Dyn.back().d_tag != ELF::DT_NULL)
- // TODO: this error is untested.
return createError("dynamic sections must be DT_NULL terminated");
return Dyn;
@@ -635,7 +641,6 @@ ELFFile<ELFT>::decodeBBAddrMap(const Elf_Shdr &Sec) const {
DataExtractor::Cursor Cur(0);
Error ULEBSizeErr = Error::success();
-
// Helper to extract and decode the next ULEB128 value as uint32_t.
// Returns zero and sets ULEBSizeErr if the ULEB128 value exceeds the uint32_t
// limit.
@@ -655,18 +660,34 @@ ELFFile<ELFT>::decodeBBAddrMap(const Elf_Shdr &Sec) const {
return static_cast<uint32_t>(Value);
};
+ uint8_t Version = 0;
while (!ULEBSizeErr && Cur && Cur.tell() < Content.size()) {
+ if (Sec.sh_type == ELF::SHT_LLVM_BB_ADDR_MAP) {
+ Version = Data.getU8(Cur);
+ if (!Cur)
+ break;
+ if (Version > 1)
+ return createError("unsupported SHT_LLVM_BB_ADDR_MAP version: " +
+ Twine(static_cast<int>(Version)));
+ Data.getU8(Cur); // Feature byte
+ }
uintX_t Address = static_cast<uintX_t>(Data.getAddress(Cur));
uint32_t NumBlocks = ReadULEB128AsUInt32();
std::vector<BBAddrMap::BBEntry> BBEntries;
+ uint32_t PrevBBEndOffset = 0;
for (uint32_t BlockID = 0; !ULEBSizeErr && Cur && (BlockID < NumBlocks);
++BlockID) {
uint32_t Offset = ReadULEB128AsUInt32();
uint32_t Size = ReadULEB128AsUInt32();
uint32_t Metadata = ReadULEB128AsUInt32();
+ if (Version >= 1) {
+ // Offset is calculated relative to the end of the previous BB.
+ Offset += PrevBBEndOffset;
+ PrevBBEndOffset = Offset + Size;
+ }
BBEntries.push_back({Offset, Size, Metadata});
}
- FunctionEntries.push_back({Address, BBEntries});
+ FunctionEntries.push_back({Address, std::move(BBEntries)});
}
// Either Cur is in the error state, or ULEBSizeError is set (not both), but
// we join the two errors here to be safe.
diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp
index cf1f12d9a9a7..38de669f1d3d 100644
--- a/llvm/lib/Object/ELFObjectFile.cpp
+++ b/llvm/lib/Object/ELFObjectFile.cpp
@@ -21,7 +21,6 @@
#include "llvm/Object/Error.h"
#include "llvm/Support/ARMAttributeParser.h"
#include "llvm/Support/ARMBuildAttributes.h"
-#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/RISCVAttributeParser.h"
@@ -31,7 +30,6 @@
#include <cstdint>
#include <memory>
#include <string>
-#include <system_error>
#include <utility>
using namespace llvm;
@@ -169,11 +167,11 @@ SubtargetFeatures ELFObjectFileBase::getARMFeatures() const {
bool isV7 = false;
Optional<unsigned> Attr =
Attributes.getAttributeValue(ARMBuildAttrs::CPU_arch);
- if (Attr.hasValue())
+ if (Attr)
isV7 = Attr.getValue() == ARMBuildAttrs::v7;
Attr = Attributes.getAttributeValue(ARMBuildAttrs::CPU_arch_profile);
- if (Attr.hasValue()) {
+ if (Attr) {
switch (Attr.getValue()) {
case ARMBuildAttrs::ApplicationProfile:
Features.AddFeature("aclass");
@@ -192,7 +190,7 @@ SubtargetFeatures ELFObjectFileBase::getARMFeatures() const {
}
Attr = Attributes.getAttributeValue(ARMBuildAttrs::THUMB_ISA_use);
- if (Attr.hasValue()) {
+ if (Attr) {
switch (Attr.getValue()) {
default:
break;
@@ -207,7 +205,7 @@ SubtargetFeatures ELFObjectFileBase::getARMFeatures() const {
}
Attr = Attributes.getAttributeValue(ARMBuildAttrs::FP_arch);
- if (Attr.hasValue()) {
+ if (Attr) {
switch (Attr.getValue()) {
default:
break;
@@ -231,7 +229,7 @@ SubtargetFeatures ELFObjectFileBase::getARMFeatures() const {
}
Attr = Attributes.getAttributeValue(ARMBuildAttrs::Advanced_SIMD_arch);
- if (Attr.hasValue()) {
+ if (Attr) {
switch (Attr.getValue()) {
default:
break;
@@ -250,7 +248,7 @@ SubtargetFeatures ELFObjectFileBase::getARMFeatures() const {
}
Attr = Attributes.getAttributeValue(ARMBuildAttrs::MVE_arch);
- if (Attr.hasValue()) {
+ if (Attr) {
switch (Attr.getValue()) {
default:
break;
@@ -269,7 +267,7 @@ SubtargetFeatures ELFObjectFileBase::getARMFeatures() const {
}
Attr = Attributes.getAttributeValue(ARMBuildAttrs::DIV_use);
- if (Attr.hasValue()) {
+ if (Attr) {
switch (Attr.getValue()) {
default:
break;
@@ -305,11 +303,11 @@ SubtargetFeatures ELFObjectFileBase::getRISCVFeatures() const {
}
Optional<StringRef> Attr = Attributes.getAttributeString(RISCVAttrs::ARCH);
- if (Attr.hasValue()) {
+ if (Attr) {
// The Arch pattern is [rv32|rv64][i|e]version(_[m|a|f|d|c]version)*
// Version string pattern is (major)p(minor). Major and minor are optional.
// For example, a version number could be 2p0, 2, or p92.
- StringRef Arch = Attr.getValue();
+ StringRef Arch = *Attr;
if (Arch.consume_front("rv32"))
Features.AddFeature("64bit", false);
else if (Arch.consume_front("rv64"))
@@ -360,6 +358,8 @@ Optional<StringRef> ELFObjectFileBase::tryGetCPUName() const {
switch (getEMachine()) {
case ELF::EM_AMDGPU:
return getAMDGPUCPUName();
+ case ELF::EM_PPC64:
+ return StringRef("future");
default:
return None;
}
@@ -461,6 +461,8 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
return "gfx90a";
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C:
return "gfx90c";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX940:
+ return "gfx940";
// AMDGCN GFX10.
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010:
@@ -483,6 +485,18 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
return "gfx1034";
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1035:
return "gfx1035";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1036:
+ return "gfx1036";
+
+ // AMDGCN GFX11.
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1100:
+ return "gfx1100";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1101:
+ return "gfx1101";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1102:
+ return "gfx1102";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103:
+ return "gfx1103";
default:
llvm_unreachable("Unknown EF_AMDGPU_MACH value");
}
@@ -509,7 +523,7 @@ void ELFObjectFileBase::setARMSubArch(Triple &TheTriple) const {
Optional<unsigned> Attr =
Attributes.getAttributeValue(ARMBuildAttrs::CPU_arch);
- if (Attr.hasValue()) {
+ if (Attr) {
switch (Attr.getValue()) {
case ARMBuildAttrs::v4:
Triple += "v4";
@@ -541,7 +555,7 @@ void ELFObjectFileBase::setARMSubArch(Triple &TheTriple) const {
case ARMBuildAttrs::v7: {
Optional<unsigned> ArchProfileAttr =
Attributes.getAttributeValue(ARMBuildAttrs::CPU_arch_profile);
- if (ArchProfileAttr.hasValue() &&
+ if (ArchProfileAttr &&
ArchProfileAttr.getValue() == ARMBuildAttrs::MicroControllerProfile)
Triple += "v7m";
else
@@ -572,6 +586,9 @@ void ELFObjectFileBase::setARMSubArch(Triple &TheTriple) const {
case ARMBuildAttrs::v8_1_M_Main:
Triple += "v8.1m.main";
break;
+ case ARMBuildAttrs::v9_A:
+ Triple += "v9a";
+ break;
}
}
if (!isLittleEndian())
@@ -656,6 +673,36 @@ ELFObjectFileBase::getPltAddresses() const {
}
template <class ELFT>
+Expected<std::vector<BBAddrMap>>
+readBBAddrMapImpl(const ELFFile<ELFT> &EF,
+ Optional<unsigned> TextSectionIndex) {
+ using Elf_Shdr = typename ELFT::Shdr;
+ std::vector<BBAddrMap> BBAddrMaps;
+ const auto &Sections = cantFail(EF.sections());
+ for (const Elf_Shdr &Sec : Sections) {
+ if (Sec.sh_type != ELF::SHT_LLVM_BB_ADDR_MAP &&
+ Sec.sh_type != ELF::SHT_LLVM_BB_ADDR_MAP_V0)
+ continue;
+ if (TextSectionIndex) {
+ Expected<const Elf_Shdr *> TextSecOrErr = EF.getSection(Sec.sh_link);
+ if (!TextSecOrErr)
+ return createError("unable to get the linked-to section for " +
+ describe(EF, Sec) + ": " +
+ toString(TextSecOrErr.takeError()));
+ if (*TextSectionIndex != std::distance(Sections.begin(), *TextSecOrErr))
+ continue;
+ }
+ Expected<std::vector<BBAddrMap>> BBAddrMapOrErr = EF.decodeBBAddrMap(Sec);
+ if (!BBAddrMapOrErr)
+ return createError("unable to read " + describe(EF, Sec) + ": " +
+ toString(BBAddrMapOrErr.takeError()));
+ std::move(BBAddrMapOrErr->begin(), BBAddrMapOrErr->end(),
+ std::back_inserter(BBAddrMaps));
+ }
+ return BBAddrMaps;
+}
+
+template <class ELFT>
static Expected<std::vector<VersionEntry>>
readDynsymVersionsImpl(const ELFFile<ELFT> &EF,
ELFObjectFileBase::elf_symbol_iterator_range Symbols) {
@@ -723,3 +770,17 @@ ELFObjectFileBase::readDynsymVersions() const {
return readDynsymVersionsImpl(cast<ELF64BEObjectFile>(this)->getELFFile(),
Symbols);
}
+
+Expected<std::vector<BBAddrMap>>
+ELFObjectFileBase::readBBAddrMap(Optional<unsigned> TextSectionIndex) const {
+ if (const auto *Obj = dyn_cast<ELF32LEObjectFile>(this))
+ return readBBAddrMapImpl(Obj->getELFFile(), TextSectionIndex);
+ if (const auto *Obj = dyn_cast<ELF64LEObjectFile>(this))
+ return readBBAddrMapImpl(Obj->getELFFile(), TextSectionIndex);
+ if (const auto *Obj = dyn_cast<ELF32BEObjectFile>(this))
+ return readBBAddrMapImpl(Obj->getELFFile(), TextSectionIndex);
+ if (const auto *Obj = cast<ELF64BEObjectFile>(this))
+ return readBBAddrMapImpl(Obj->getELFFile(), TextSectionIndex);
+ else
+ llvm_unreachable("Unsupported binary format");
+}
diff --git a/llvm/lib/Object/Error.cpp b/llvm/lib/Object/Error.cpp
index bc75bc6c0445..6d1e3f2a59d0 100644
--- a/llvm/lib/Object/Error.cpp
+++ b/llvm/lib/Object/Error.cpp
@@ -52,6 +52,8 @@ std::string _object_error_category::message(int EV) const {
return "Bitcode section not found in object file";
case object_error::invalid_symbol_index:
return "Invalid symbol index";
+ case object_error::section_stripped:
+ return "Section has been stripped from the object file";
}
llvm_unreachable("An enumerator of object_error does not have a message "
"defined.");
diff --git a/llvm/lib/Object/IRObjectFile.cpp b/llvm/lib/Object/IRObjectFile.cpp
index c653262791cc..091930988bd0 100644
--- a/llvm/lib/Object/IRObjectFile.cpp
+++ b/llvm/lib/Object/IRObjectFile.cpp
@@ -11,20 +11,20 @@
//===----------------------------------------------------------------------===//
#include "llvm/Object/IRObjectFile.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/PointerUnion.h"
#include "llvm/BinaryFormat/Magic.h"
#include "llvm/Bitcode/BitcodeReader.h"
-#include "llvm/IR/GVMaterializer.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Mangler.h"
#include "llvm/IR/Module.h"
-#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/ObjectFile.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/raw_ostream.h"
using namespace llvm;
using namespace object;
+namespace llvm {
+class LLVMContext;
+class raw_ostream;
+} // namespace llvm
+
IRObjectFile::IRObjectFile(MemoryBufferRef Object,
std::vector<std::unique_ptr<Module>> Mods)
: SymbolicFile(Binary::ID_IR, Object), Mods(std::move(Mods)) {
@@ -32,7 +32,7 @@ IRObjectFile::IRObjectFile(MemoryBufferRef Object,
SymTab.addModule(M.get());
}
-IRObjectFile::~IRObjectFile() {}
+IRObjectFile::~IRObjectFile() = default;
static ModuleSymbolTable::Symbol getSym(DataRefImpl &Symb) {
return *reinterpret_cast<ModuleSymbolTable::Symbol *>(Symb.p);
diff --git a/llvm/lib/Object/IRSymtab.cpp b/llvm/lib/Object/IRSymtab.cpp
index dea3d90d3560..5a7ecdb1fc25 100644
--- a/llvm/lib/Object/IRSymtab.cpp
+++ b/llvm/lib/Object/IRSymtab.cpp
@@ -24,7 +24,6 @@
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/StringTableBuilder.h"
-#include "llvm/Object/IRObjectFile.h"
#include "llvm/Object/ModuleSymbolTable.h"
#include "llvm/Object/SymbolicFile.h"
#include "llvm/Support/Allocator.h"
diff --git a/llvm/lib/Object/MachOObjectFile.cpp b/llvm/lib/Object/MachOObjectFile.cpp
index 3d95b18f4672..2f463a1bd458 100644
--- a/llvm/lib/Object/MachOObjectFile.cpp
+++ b/llvm/lib/Object/MachOObjectFile.cpp
@@ -34,7 +34,7 @@
#include "llvm/Support/Format.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/LEB128.h"
-#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/MemoryBufferRef.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/SwapByteOrder.h"
#include "llvm/Support/raw_ostream.h"
@@ -1303,7 +1303,6 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian,
}
const char *DyldIdLoadCmd = nullptr;
- const char *FuncStartsLoadCmd = nullptr;
const char *SplitInfoLoadCmd = nullptr;
const char *CodeSignDrsLoadCmd = nullptr;
const char *CodeSignLoadCmd = nullptr;
@@ -1381,6 +1380,11 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian,
if ((Err = checkDyldInfoCommand(*this, Load, I, &DyldInfoLoadCmd,
"LC_DYLD_INFO_ONLY", Elements)))
return;
+ } else if (Load.C.cmd == MachO::LC_DYLD_CHAINED_FIXUPS) {
+ if ((Err = checkLinkeditDataCommand(
+ *this, Load, I, &DyldChainedFixupsLoadCmd,
+ "LC_DYLD_CHAINED_FIXUPS", Elements, "chained fixups")))
+ return;
} else if (Load.C.cmd == MachO::LC_UUID) {
if (Load.C.cmdsize != sizeof(MachO::uuid_command)) {
Err = malformedError("LC_UUID command " + Twine(I) + " has incorrect "
@@ -1596,9 +1600,9 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian,
return;
// Note: LC_TWOLEVEL_HINTS is really obsolete and is not supported.
} else if (Load.C.cmd == MachO::LC_TWOLEVEL_HINTS) {
- if ((Err = checkTwoLevelHintsCommand(*this, Load, I,
- &TwoLevelHintsLoadCmd, Elements)))
- return;
+ if ((Err = checkTwoLevelHintsCommand(*this, Load, I,
+ &TwoLevelHintsLoadCmd, Elements)))
+ return;
} else if (Load.C.cmd == MachO::LC_IDENT) {
// Note: LC_IDENT is ignored.
continue;
@@ -2993,7 +2997,9 @@ void ExportEntry::pushNode(uint64_t offset) {
return;
}
if (O != nullptr) {
- if (State.Other > O->getLibraryCount()) {
+ // Only positive numbers represent library ordinals. Zero and negative
+ // numbers have special meaning (see BindSpecialDylib).
+ if ((int64_t)State.Other > 0 && State.Other > O->getLibraryCount()) {
*E = malformedError(
"bad library ordinal: " + Twine((int)State.Other) + " (max " +
Twine((int)O->getLibraryCount()) +
@@ -3186,6 +3192,106 @@ iterator_range<export_iterator> MachOObjectFile::exports(Error &Err) const {
return exports(Err, getDyldInfoExportsTrie(), this);
}
+MachOAbstractFixupEntry::MachOAbstractFixupEntry(Error *E,
+ const MachOObjectFile *O)
+ : E(E), O(O) {
+ // Cache the vmaddress of __TEXT
+ for (const auto &Command : O->load_commands()) {
+ if (Command.C.cmd == MachO::LC_SEGMENT) {
+ MachO::segment_command SLC = O->getSegmentLoadCommand(Command);
+ if (StringRef(SLC.segname) == StringRef("__TEXT")) {
+ TextAddress = SLC.vmaddr;
+ break;
+ }
+ } else if (Command.C.cmd == MachO::LC_SEGMENT_64) {
+ MachO::segment_command_64 SLC_64 = O->getSegment64LoadCommand(Command);
+ if (StringRef(SLC_64.segname) == StringRef("__TEXT")) {
+ TextAddress = SLC_64.vmaddr;
+ break;
+ }
+ }
+ }
+}
+
+int32_t MachOAbstractFixupEntry::segmentIndex() const { return SegmentIndex; }
+
+uint64_t MachOAbstractFixupEntry::segmentOffset() const {
+ return SegmentOffset;
+}
+
+uint64_t MachOAbstractFixupEntry::segmentAddress() const {
+ return O->BindRebaseAddress(SegmentIndex, 0);
+}
+
+StringRef MachOAbstractFixupEntry::segmentName() const {
+ return O->BindRebaseSegmentName(SegmentIndex);
+}
+
+StringRef MachOAbstractFixupEntry::sectionName() const {
+ return O->BindRebaseSectionName(SegmentIndex, SegmentOffset);
+}
+
+uint64_t MachOAbstractFixupEntry::address() const {
+ return O->BindRebaseAddress(SegmentIndex, SegmentOffset);
+}
+
+StringRef MachOAbstractFixupEntry::symbolName() const { return SymbolName; }
+
+int64_t MachOAbstractFixupEntry::addend() const { return Addend; }
+
+uint32_t MachOAbstractFixupEntry::flags() const { return Flags; }
+
+int MachOAbstractFixupEntry::ordinal() const { return Ordinal; }
+
+StringRef MachOAbstractFixupEntry::typeName() const { return "unknown"; }
+
+void MachOAbstractFixupEntry::moveToFirst() {
+ SegmentOffset = 0;
+ SegmentIndex = -1;
+ Ordinal = 0;
+ Flags = 0;
+ Addend = 0;
+ Done = false;
+}
+
+void MachOAbstractFixupEntry::moveToEnd() { Done = true; }
+
+MachOChainedFixupEntry::MachOChainedFixupEntry(Error *E,
+ const MachOObjectFile *O,
+ bool Parse)
+ : MachOAbstractFixupEntry(E, O) {
+ ErrorAsOutParameter e(E);
+ if (!Parse)
+ return;
+ if (auto FixupTargetsOrErr = O->getDyldChainedFixupTargets())
+ FixupTargets = *FixupTargetsOrErr;
+ else {
+ *E = FixupTargetsOrErr.takeError();
+ return;
+ }
+}
+
+void MachOChainedFixupEntry::moveToFirst() {
+ MachOAbstractFixupEntry::moveToFirst();
+ FixupIndex = 0;
+ moveNext();
+}
+
+void MachOChainedFixupEntry::moveToEnd() {
+ MachOAbstractFixupEntry::moveToEnd();
+}
+
+void MachOChainedFixupEntry::moveNext() { Done = true; }
+
+bool MachOChainedFixupEntry::operator==(
+ const MachOChainedFixupEntry &Other) const {
+ if (Done == Other.Done)
+ return true;
+ if ((FixupIndex == Other.FixupIndex))
+ return true;
+ return false;
+}
+
MachORebaseEntry::MachORebaseEntry(Error *E, const MachOObjectFile *O,
ArrayRef<uint8_t> Bytes, bool is64Bit)
: E(E), O(O), Opcodes(Bytes), Ptr(Bytes.begin()),
@@ -4194,6 +4300,16 @@ iterator_range<bind_iterator> MachOObjectFile::weakBindTable(Error &Err) {
MachOBindEntry::Kind::Weak);
}
+iterator_range<fixup_iterator> MachOObjectFile::fixupTable(Error &Err) {
+ MachOChainedFixupEntry Start(&Err, this, true);
+ Start.moveToFirst();
+
+ MachOChainedFixupEntry Finish(&Err, this, false);
+ Finish.moveToEnd();
+
+ return make_range(fixup_iterator(Start), fixup_iterator(Finish));
+}
+
MachOObjectFile::load_command_iterator
MachOObjectFile::begin_load_commands() const {
return LoadCommands.begin();
@@ -4649,6 +4765,72 @@ ArrayRef<uint8_t> MachOObjectFile::getDyldInfoLazyBindOpcodes() const {
return makeArrayRef(Ptr, DyldInfo.lazy_bind_size);
}
+Expected<Optional<MachO::dyld_chained_fixups_header>>
+MachOObjectFile::getChainedFixupsHeader() const {
+ // Load the dyld chained fixups load command.
+ if (!DyldChainedFixupsLoadCmd)
+ return llvm::None;
+ auto DyldChainedFixupsOrErr = getStructOrErr<MachO::linkedit_data_command>(
+ *this, DyldChainedFixupsLoadCmd);
+ if (!DyldChainedFixupsOrErr)
+ return DyldChainedFixupsOrErr.takeError();
+ MachO::linkedit_data_command DyldChainedFixups = DyldChainedFixupsOrErr.get();
+
+ // If the load command is present but the data offset has been zeroed out,
+ // as is the case for dylib stubs, return None (no error).
+ uint64_t CFHeaderOffset = DyldChainedFixups.dataoff;
+ if (CFHeaderOffset == 0)
+ return DyldChainedFixupsOrErr.takeError();
+
+ // Load the dyld chained fixups header.
+ const char *CFHeaderPtr = getPtr(*this, CFHeaderOffset);
+ auto CFHeaderOrErr =
+ getStructOrErr<MachO::dyld_chained_fixups_header>(*this, CFHeaderPtr);
+ if (!CFHeaderOrErr)
+ return CFHeaderOrErr.takeError();
+ MachO::dyld_chained_fixups_header CFHeader = CFHeaderOrErr.get();
+
+ // Reject unknown chained fixup formats.
+ if (CFHeader.fixups_version != 0)
+ return malformedError(Twine("bad chained fixups: unknown version: ") +
+ Twine(CFHeader.fixups_version));
+ if (CFHeader.imports_format < 1 || CFHeader.imports_format > 3)
+ return malformedError(
+ Twine("bad chained fixups: unknown imports format: ") +
+ Twine(CFHeader.imports_format));
+
+ // Validate the image format.
+ //
+ // Load the image starts.
+ uint64_t CFImageStartsOffset = (CFHeaderOffset + CFHeader.starts_offset);
+ if (CFHeader.starts_offset < sizeof(MachO::dyld_chained_fixups_header)) {
+ return malformedError(Twine("bad chained fixups: image starts offset ") +
+ Twine(CFHeader.starts_offset) +
+ " overlaps with chained fixups header");
+ }
+ uint32_t EndOffset = DyldChainedFixups.dataoff + DyldChainedFixups.datasize;
+ if (CFImageStartsOffset + sizeof(MachO::dyld_chained_starts_in_image) >
+ EndOffset) {
+ return malformedError(Twine("bad chained fixups: image starts end ") +
+ Twine(CFImageStartsOffset +
+ sizeof(MachO::dyld_chained_starts_in_image)) +
+ " extends past end " + Twine(EndOffset));
+ }
+
+ return CFHeader;
+}
+
+Expected<std::vector<ChainedFixupTarget>>
+MachOObjectFile::getDyldChainedFixupTargets() const {
+ auto CFHeaderOrErr = getChainedFixupsHeader();
+ if (!CFHeaderOrErr)
+ return CFHeaderOrErr.takeError();
+ std::vector<ChainedFixupTarget> Targets;
+ if (!(*CFHeaderOrErr))
+ return Targets;
+ return Targets;
+}
+
ArrayRef<uint8_t> MachOObjectFile::getDyldInfoExportsTrie() const {
if (!DyldInfoLoadCmd)
return None;
@@ -4663,6 +4845,21 @@ ArrayRef<uint8_t> MachOObjectFile::getDyldInfoExportsTrie() const {
return makeArrayRef(Ptr, DyldInfo.export_size);
}
+SmallVector<uint64_t> MachOObjectFile::getFunctionStarts() const {
+ if (!FuncStartsLoadCmd)
+ return {};
+
+ auto InfoOrErr =
+ getStructOrErr<MachO::linkedit_data_command>(*this, FuncStartsLoadCmd);
+ if (!InfoOrErr)
+ return {};
+
+ MachO::linkedit_data_command Info = InfoOrErr.get();
+ SmallVector<uint64_t, 8> FunctionStarts;
+ this->ReadULEB128s(Info.dataoff, FunctionStarts);
+ return std::move(FunctionStarts);
+}
+
ArrayRef<uint8_t> MachOObjectFile::getUuid() const {
if (!UuidLoadCmd)
return None;
@@ -4778,3 +4975,23 @@ MachOObjectFile::mapReflectionSectionNameToEnumValue(
.Default(llvm::binaryformat::Swift5ReflectionSectionKind::unknown);
#undef HANDLE_SWIFT_SECTION
}
+
+bool MachOObjectFile::isMachOPairedReloc(uint64_t RelocType, uint64_t Arch) {
+ switch (Arch) {
+ case Triple::x86:
+ return RelocType == MachO::GENERIC_RELOC_SECTDIFF ||
+ RelocType == MachO::GENERIC_RELOC_LOCAL_SECTDIFF;
+ case Triple::x86_64:
+ return RelocType == MachO::X86_64_RELOC_SUBTRACTOR;
+ case Triple::arm:
+ case Triple::thumb:
+ return RelocType == MachO::ARM_RELOC_SECTDIFF ||
+ RelocType == MachO::ARM_RELOC_LOCAL_SECTDIFF ||
+ RelocType == MachO::ARM_RELOC_HALF ||
+ RelocType == MachO::ARM_RELOC_HALF_SECTDIFF;
+ case Triple::aarch64:
+ return RelocType == MachO::ARM64_RELOC_SUBTRACTOR;
+ default:
+ return false;
+ }
+}
diff --git a/llvm/lib/Object/MachOUniversal.cpp b/llvm/lib/Object/MachOUniversal.cpp
index f3ce005e6ef9..c2c2b67814dc 100644
--- a/llvm/lib/Object/MachOUniversal.cpp
+++ b/llvm/lib/Object/MachOUniversal.cpp
@@ -15,9 +15,9 @@
#include "llvm/Object/IRObjectFile.h"
#include "llvm/Object/MachO.h"
#include "llvm/Object/ObjectFile.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/Host.h"
-#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/SwapByteOrder.h"
+#include "llvm/Support/type_traits.h"
using namespace llvm;
using namespace object;
diff --git a/llvm/lib/Object/MachOUniversalWriter.cpp b/llvm/lib/Object/MachOUniversalWriter.cpp
index ae1ff09a4f8f..333706baf8c1 100644
--- a/llvm/lib/Object/MachOUniversalWriter.cpp
+++ b/llvm/lib/Object/MachOUniversalWriter.cpp
@@ -12,13 +12,21 @@
//===----------------------------------------------------------------------===//
#include "llvm/Object/MachOUniversalWriter.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/Binary.h"
-#include "llvm/Object/Error.h"
#include "llvm/Object/IRObjectFile.h"
#include "llvm/Object/MachO.h"
#include "llvm/Object/MachOUniversal.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/MemoryBufferRef.h"
+#include "llvm/Support/SwapByteOrder.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
using namespace object;
@@ -205,7 +213,7 @@ Expected<Slice> Slice::create(const Archive &A, LLVMContext *LLVMCtx) {
.c_str());
if (MFO) {
- Slice ArchiveSlice(*(MFO.get()), MFO->is64Bit() ? 3 : 2);
+ Slice ArchiveSlice(*(MFO), MFO->is64Bit() ? 3 : 2);
ArchiveSlice.B = &A;
return ArchiveSlice;
}
diff --git a/llvm/lib/Object/ModuleSymbolTable.cpp b/llvm/lib/Object/ModuleSymbolTable.cpp
index 954d1f09f4e9..11274a7fcc16 100644
--- a/llvm/lib/Object/ModuleSymbolTable.cpp
+++ b/llvm/lib/Object/ModuleSymbolTable.cpp
@@ -15,7 +15,6 @@
#include "llvm/Object/ModuleSymbolTable.h"
#include "RecordStreamer.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
@@ -27,7 +26,6 @@
#include "llvm/IR/Module.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
@@ -39,7 +37,6 @@
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/SymbolicFile.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/CodeGen.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SMLoc.h"
diff --git a/llvm/lib/Object/Object.cpp b/llvm/lib/Object/Object.cpp
index 576eb8d069d6..d5e67160dfa3 100644
--- a/llvm/lib/Object/Object.cpp
+++ b/llvm/lib/Object/Object.cpp
@@ -120,6 +120,8 @@ LLVMBinaryType LLVMBinaryGetType(LLVMBinaryRef BR) {
return LLVMBinaryTypeMachO64L;
case ID_MachO64B:
return LLVMBinaryTypeMachO64B;
+ case ID_Offload:
+ return LLVMBinaryTypeOffload;
case ID_Wasm:
return LLVMBinaryTypeWasm;
case ID_StartObjects:
diff --git a/llvm/lib/Object/ObjectFile.cpp b/llvm/lib/Object/ObjectFile.cpp
index 6fd02f3b9592..1be8f11751be 100644
--- a/llvm/lib/Object/ObjectFile.cpp
+++ b/llvm/lib/Object/ObjectFile.cpp
@@ -21,10 +21,9 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ErrorOr.h"
-#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
#include <cstdint>
#include <memory>
#include <system_error>
@@ -147,6 +146,9 @@ ObjectFile::createObjectFile(MemoryBufferRef Object, file_magic Type,
case file_magic::pdb:
case file_magic::minidump:
case file_magic::goff_object:
+ case file_magic::cuda_fatbinary:
+ case file_magic::offload_binary:
+ case file_magic::dxcontainer_object:
return errorCodeToError(object_error::invalid_file_type);
case file_magic::tapi_file:
return errorCodeToError(object_error::invalid_file_type);
@@ -198,3 +200,12 @@ ObjectFile::createObjectFile(StringRef ObjectPath) {
return OwningBinary<ObjectFile>(std::move(Obj), std::move(Buffer));
}
+
+bool ObjectFile::isReflectionSectionStrippable(
+ llvm::binaryformat::Swift5ReflectionSectionKind ReflectionSectionKind)
+ const {
+ using llvm::binaryformat::Swift5ReflectionSectionKind;
+ return ReflectionSectionKind == Swift5ReflectionSectionKind::fieldmd ||
+ ReflectionSectionKind == Swift5ReflectionSectionKind::reflstr ||
+ ReflectionSectionKind == Swift5ReflectionSectionKind::assocty;
+}
diff --git a/llvm/lib/Object/OffloadBinary.cpp b/llvm/lib/Object/OffloadBinary.cpp
new file mode 100644
index 000000000000..21946ec2d6fb
--- /dev/null
+++ b/llvm/lib/Object/OffloadBinary.cpp
@@ -0,0 +1,164 @@
+//===- Offloading.cpp - Utilities for handling offloading code -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/OffloadBinary.h"
+
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/BinaryFormat/Magic.h"
+#include "llvm/MC/StringTableBuilder.h"
+#include "llvm/Object/Error.h"
+#include "llvm/Support/Alignment.h"
+#include "llvm/Support/FileOutputBuffer.h"
+
+using namespace llvm;
+using namespace llvm::object;
+
+Expected<std::unique_ptr<OffloadBinary>>
+OffloadBinary::create(MemoryBufferRef Buf) {
+ if (Buf.getBufferSize() < sizeof(Header) + sizeof(Entry))
+ return errorCodeToError(object_error::parse_failed);
+
+ // Check for 0x10FF1OAD magic bytes.
+ if (identify_magic(Buf.getBuffer()) != file_magic::offload_binary)
+ return errorCodeToError(object_error::parse_failed);
+
+ // Make sure that the data has sufficient alignment.
+ if (!isAddrAligned(Align(getAlignment()), Buf.getBufferStart()))
+ return errorCodeToError(object_error::parse_failed);
+
+ const char *Start = Buf.getBufferStart();
+ const Header *TheHeader = reinterpret_cast<const Header *>(Start);
+ if (TheHeader->Version != OffloadBinary::Version)
+ return errorCodeToError(object_error::parse_failed);
+
+ if (TheHeader->Size > Buf.getBufferSize() ||
+ TheHeader->EntryOffset > TheHeader->Size - sizeof(Entry) ||
+ TheHeader->EntrySize > TheHeader->Size - sizeof(Header))
+ return errorCodeToError(object_error::unexpected_eof);
+
+ const Entry *TheEntry =
+ reinterpret_cast<const Entry *>(&Start[TheHeader->EntryOffset]);
+
+ if (TheEntry->ImageOffset > Buf.getBufferSize() ||
+ TheEntry->StringOffset > Buf.getBufferSize())
+ return errorCodeToError(object_error::unexpected_eof);
+
+ return std::unique_ptr<OffloadBinary>(
+ new OffloadBinary(Buf, TheHeader, TheEntry));
+}
+
+std::unique_ptr<MemoryBuffer>
+OffloadBinary::write(const OffloadingImage &OffloadingData) {
+ // Create a null-terminated string table with all the used strings.
+ StringTableBuilder StrTab(StringTableBuilder::ELF);
+ for (auto &KeyAndValue : OffloadingData.StringData) {
+ StrTab.add(KeyAndValue.getKey());
+ StrTab.add(KeyAndValue.getValue());
+ }
+ StrTab.finalize();
+
+ uint64_t StringEntrySize =
+ sizeof(StringEntry) * OffloadingData.StringData.size();
+
+ // Make sure the image we're wrapping around is aligned as well.
+ uint64_t BinaryDataSize = alignTo(sizeof(Header) + sizeof(Entry) +
+ StringEntrySize + StrTab.getSize(),
+ getAlignment());
+
+ // Create the header and fill in the offsets. The entry will be directly
+ // placed after the header in memory. Align the size to the alignment of the
+ // header so this can be placed contiguously in a single section.
+ Header TheHeader;
+ TheHeader.Size = alignTo(
+ BinaryDataSize + OffloadingData.Image->getBufferSize(), getAlignment());
+ TheHeader.EntryOffset = sizeof(Header);
+ TheHeader.EntrySize = sizeof(Entry);
+
+ // Create the entry using the string table offsets. The string table will be
+ // placed directly after the entry in memory, and the image after that.
+ Entry TheEntry;
+ TheEntry.TheImageKind = OffloadingData.TheImageKind;
+ TheEntry.TheOffloadKind = OffloadingData.TheOffloadKind;
+ TheEntry.Flags = OffloadingData.Flags;
+ TheEntry.StringOffset = sizeof(Header) + sizeof(Entry);
+ TheEntry.NumStrings = OffloadingData.StringData.size();
+
+ TheEntry.ImageOffset = BinaryDataSize;
+ TheEntry.ImageSize = OffloadingData.Image->getBufferSize();
+
+ SmallVector<char> Data;
+ Data.reserve(TheHeader.Size);
+ raw_svector_ostream OS(Data);
+ OS << StringRef(reinterpret_cast<char *>(&TheHeader), sizeof(Header));
+ OS << StringRef(reinterpret_cast<char *>(&TheEntry), sizeof(Entry));
+ for (auto &KeyAndValue : OffloadingData.StringData) {
+ uint64_t Offset = sizeof(Header) + sizeof(Entry) + StringEntrySize;
+ StringEntry Map{Offset + StrTab.getOffset(KeyAndValue.getKey()),
+ Offset + StrTab.getOffset(KeyAndValue.getValue())};
+ OS << StringRef(reinterpret_cast<char *>(&Map), sizeof(StringEntry));
+ }
+ StrTab.write(OS);
+ // Add padding to required image alignment.
+ OS.write_zeros(TheEntry.ImageOffset - OS.tell());
+ OS << OffloadingData.Image->getBuffer();
+
+ // Add final padding to required alignment.
+ assert(TheHeader.Size >= OS.tell() && "Too much data written?");
+ OS.write_zeros(TheHeader.Size - OS.tell());
+ assert(TheHeader.Size == OS.tell() && "Size mismatch");
+
+ return MemoryBuffer::getMemBufferCopy(OS.str());
+}
+
+OffloadKind object::getOffloadKind(StringRef Name) {
+ return llvm::StringSwitch<OffloadKind>(Name)
+ .Case("openmp", OFK_OpenMP)
+ .Case("cuda", OFK_Cuda)
+ .Case("hip", OFK_HIP)
+ .Default(OFK_None);
+}
+
+StringRef object::getOffloadKindName(OffloadKind Kind) {
+ switch (Kind) {
+ case OFK_OpenMP:
+ return "openmp";
+ case OFK_Cuda:
+ return "cuda";
+ case OFK_HIP:
+ return "hip";
+ default:
+ return "none";
+ }
+}
+
+ImageKind object::getImageKind(StringRef Name) {
+ return llvm::StringSwitch<ImageKind>(Name)
+ .Case("o", IMG_Object)
+ .Case("bc", IMG_Bitcode)
+ .Case("cubin", IMG_Cubin)
+ .Case("fatbin", IMG_Fatbinary)
+ .Case("s", IMG_PTX)
+ .Default(IMG_None);
+}
+
+StringRef object::getImageKindName(ImageKind Kind) {
+ switch (Kind) {
+ case IMG_Object:
+ return "o";
+ case IMG_Bitcode:
+ return "bc";
+ case IMG_Cubin:
+ return "cubin";
+ case IMG_Fatbinary:
+ return "fatbin";
+ case IMG_PTX:
+ return "s";
+ default:
+ return "";
+ }
+}
diff --git a/llvm/lib/Object/RecordStreamer.h b/llvm/lib/Object/RecordStreamer.h
index 957d80f33bf4..5c6541e5052d 100644
--- a/llvm/lib/Object/RecordStreamer.h
+++ b/llvm/lib/Object/RecordStreamer.h
@@ -57,10 +57,10 @@ public:
// Ignore COFF-specific directives; we do not need any information from them,
// but the default implementation of these methods crashes, so we override
// them with versions that do nothing.
- void BeginCOFFSymbolDef(const MCSymbol *Symbol) override {}
- void EmitCOFFSymbolStorageClass(int StorageClass) override {}
- void EmitCOFFSymbolType(int Type) override {}
- void EndCOFFSymbolDef() override {}
+ void beginCOFFSymbolDef(const MCSymbol *Symbol) override {}
+ void emitCOFFSymbolStorageClass(int StorageClass) override {}
+ void emitCOFFSymbolType(int Type) override {}
+ void endCOFFSymbolDef() override {}
/// Record .symver aliases for later processing.
void emitELFSymverDirective(const MCSymbol *OriginalSym, StringRef Name,
diff --git a/llvm/lib/Object/RelocationResolver.cpp b/llvm/lib/Object/RelocationResolver.cpp
index 00a45e2c5d4e..e14301663df3 100644
--- a/llvm/lib/Object/RelocationResolver.cpp
+++ b/llvm/lib/Object/RelocationResolver.cpp
@@ -11,6 +11,21 @@
//===----------------------------------------------------------------------===//
#include "llvm/Object/RelocationResolver.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/BinaryFormat/Wasm.h"
+#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Object/ELFTypes.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/SymbolicFile.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+#include <vector>
namespace llvm {
namespace object {
@@ -63,6 +78,7 @@ static bool supportsAArch64(uint64_t Type) {
switch (Type) {
case ELF::R_AARCH64_ABS32:
case ELF::R_AARCH64_ABS64:
+ case ELF::R_AARCH64_PREL16:
case ELF::R_AARCH64_PREL32:
case ELF::R_AARCH64_PREL64:
return true;
@@ -78,6 +94,8 @@ static uint64_t resolveAArch64(uint64_t Type, uint64_t Offset, uint64_t S,
return (S + Addend) & 0xFFFFFFFF;
case ELF::R_AARCH64_ABS64:
return S + Addend;
+ case ELF::R_AARCH64_PREL16:
+ return (S + Addend - Offset) & 0xFFFF;
case ELF::R_AARCH64_PREL32:
return (S + Addend - Offset) & 0xFFFFFFFF;
case ELF::R_AARCH64_PREL64:
@@ -468,6 +486,31 @@ static uint64_t resolveRISCV(uint64_t Type, uint64_t Offset, uint64_t S,
}
}
+static bool supportsCSKY(uint64_t Type) {
+ switch (Type) {
+ case ELF::R_CKCORE_NONE:
+ case ELF::R_CKCORE_ADDR32:
+ case ELF::R_CKCORE_PCREL32:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static uint64_t resolveCSKY(uint64_t Type, uint64_t Offset, uint64_t S,
+ uint64_t LocData, int64_t Addend) {
+ switch (Type) {
+ case ELF::R_CKCORE_NONE:
+ return LocData;
+ case ELF::R_CKCORE_ADDR32:
+ return (S + Addend) & 0xFFFFFFFF;
+ case ELF::R_CKCORE_PCREL32:
+ return (S + Addend - Offset) & 0xFFFFFFFF;
+ default:
+ llvm_unreachable("Invalid relocation type");
+ }
+}
+
static bool supportsCOFFX86(uint64_t Type) {
switch (Type) {
case COFF::IMAGE_REL_I386_SECREL:
@@ -715,6 +758,8 @@ getRelocationResolver(const ObjectFile &Obj) {
return {supportsHexagon, resolveHexagon};
case Triple::riscv32:
return {supportsRISCV, resolveRISCV};
+ case Triple::csky:
+ return {supportsCSKY, resolveCSKY};
default:
return {nullptr, nullptr};
}
diff --git a/llvm/lib/Object/SymbolicFile.cpp b/llvm/lib/Object/SymbolicFile.cpp
index 58db5b672914..05f47cfbf2ff 100644
--- a/llvm/lib/Object/SymbolicFile.cpp
+++ b/llvm/lib/Object/SymbolicFile.cpp
@@ -17,18 +17,17 @@
#include "llvm/Object/Error.h"
#include "llvm/Object/IRObjectFile.h"
#include "llvm/Object/ObjectFile.h"
-#include "llvm/Support/Compiler.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ErrorOr.h"
-#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include <algorithm>
#include <memory>
using namespace llvm;
using namespace object;
+namespace llvm {
+class LLVMContext;
+}
+
SymbolicFile::SymbolicFile(unsigned int Type, MemoryBufferRef Source)
: Binary(Type, Source) {}
diff --git a/llvm/lib/Object/TapiFile.cpp b/llvm/lib/Object/TapiFile.cpp
index 83568e8d823a..596445a09e85 100644
--- a/llvm/lib/Object/TapiFile.cpp
+++ b/llvm/lib/Object/TapiFile.cpp
@@ -12,8 +12,12 @@
#include "llvm/Object/TapiFile.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/MachO.h"
#include "llvm/Object/Error.h"
-#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/MemoryBufferRef.h"
+#include "llvm/TextAPI/ArchitectureSet.h"
+#include "llvm/TextAPI/InterfaceFile.h"
+#include "llvm/TextAPI/Platform.h"
#include "llvm/TextAPI/Symbol.h"
using namespace llvm;
diff --git a/llvm/lib/Object/TapiUniversal.cpp b/llvm/lib/Object/TapiUniversal.cpp
index d73d93f6bd53..bf96b57f0321 100644
--- a/llvm/lib/Object/TapiUniversal.cpp
+++ b/llvm/lib/Object/TapiUniversal.cpp
@@ -13,7 +13,8 @@
#include "llvm/Object/TapiUniversal.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Object/Error.h"
-#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Object/TapiFile.h"
+#include "llvm/TextAPI/ArchitectureSet.h"
#include "llvm/TextAPI/TextAPIReader.h"
using namespace llvm;
@@ -47,7 +48,7 @@ TapiUniversal::~TapiUniversal() = default;
Expected<std::unique_ptr<TapiFile>>
TapiUniversal::ObjectForArch::getAsObjectFile() const {
return std::unique_ptr<TapiFile>(new TapiFile(Parent->getMemoryBufferRef(),
- *Parent->ParsedFile.get(),
+ *Parent->ParsedFile,
Parent->Libraries[Index].Arch));
}
diff --git a/llvm/lib/Object/WasmObjectFile.cpp b/llvm/lib/Object/WasmObjectFile.cpp
index 6a19b159f3d5..ce816b097691 100644
--- a/llvm/lib/Object/WasmObjectFile.cpp
+++ b/llvm/lib/Object/WasmObjectFile.cpp
@@ -8,7 +8,6 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSet.h"
@@ -30,7 +29,6 @@
#include <cassert>
#include <cstdint>
#include <cstring>
-#include <system_error>
#define DEBUG_TYPE "wasm-object"
@@ -166,23 +164,25 @@ static uint8_t readOpcode(WasmObjectFile::ReadContext &Ctx) {
static Error readInitExpr(wasm::WasmInitExpr &Expr,
WasmObjectFile::ReadContext &Ctx) {
- Expr.Opcode = readOpcode(Ctx);
+ auto Start = Ctx.Ptr;
- switch (Expr.Opcode) {
+ Expr.Extended = false;
+ Expr.Inst.Opcode = readOpcode(Ctx);
+ switch (Expr.Inst.Opcode) {
case wasm::WASM_OPCODE_I32_CONST:
- Expr.Value.Int32 = readVarint32(Ctx);
+ Expr.Inst.Value.Int32 = readVarint32(Ctx);
break;
case wasm::WASM_OPCODE_I64_CONST:
- Expr.Value.Int64 = readVarint64(Ctx);
+ Expr.Inst.Value.Int64 = readVarint64(Ctx);
break;
case wasm::WASM_OPCODE_F32_CONST:
- Expr.Value.Float32 = readFloat32(Ctx);
+ Expr.Inst.Value.Float32 = readFloat32(Ctx);
break;
case wasm::WASM_OPCODE_F64_CONST:
- Expr.Value.Float64 = readFloat64(Ctx);
+ Expr.Inst.Value.Float64 = readFloat64(Ctx);
break;
case wasm::WASM_OPCODE_GLOBAL_GET:
- Expr.Value.Global = readULEB128(Ctx);
+ Expr.Inst.Value.Global = readULEB128(Ctx);
break;
case wasm::WASM_OPCODE_REF_NULL: {
wasm::ValType Ty = static_cast<wasm::ValType>(readULEB128(Ctx));
@@ -193,15 +193,46 @@ static Error readInitExpr(wasm::WasmInitExpr &Expr,
break;
}
default:
- return make_error<GenericBinaryError>("invalid opcode in init_expr",
- object_error::parse_failed);
+ Expr.Extended = true;
}
- uint8_t EndOpcode = readOpcode(Ctx);
- if (EndOpcode != wasm::WASM_OPCODE_END) {
- return make_error<GenericBinaryError>("invalid init_expr",
- object_error::parse_failed);
+ if (!Expr.Extended) {
+ uint8_t EndOpcode = readOpcode(Ctx);
+ if (EndOpcode != wasm::WASM_OPCODE_END)
+ Expr.Extended = true;
+ }
+
+ if (Expr.Extended) {
+ Ctx.Ptr = Start;
+ while (1) {
+ uint8_t Opcode = readOpcode(Ctx);
+ switch (Opcode) {
+ case wasm::WASM_OPCODE_I32_CONST:
+ case wasm::WASM_OPCODE_GLOBAL_GET:
+ case wasm::WASM_OPCODE_REF_NULL:
+ case wasm::WASM_OPCODE_I64_CONST:
+ case wasm::WASM_OPCODE_F32_CONST:
+ case wasm::WASM_OPCODE_F64_CONST:
+ readULEB128(Ctx);
+ break;
+ case wasm::WASM_OPCODE_I32_ADD:
+ case wasm::WASM_OPCODE_I32_SUB:
+ case wasm::WASM_OPCODE_I32_MUL:
+ case wasm::WASM_OPCODE_I64_ADD:
+ case wasm::WASM_OPCODE_I64_SUB:
+ case wasm::WASM_OPCODE_I64_MUL:
+ break;
+ case wasm::WASM_OPCODE_END:
+ Expr.Body = ArrayRef<uint8_t>(Start, Ctx.Ptr - Start);
+ return Error::success();
+ default:
+ return make_error<GenericBinaryError>(
+ Twine("invalid opcode in init_expr: ") + Twine(unsigned(Opcode)),
+ object_error::parse_failed);
+ }
+ }
}
+
return Error::success();
}
@@ -420,10 +451,6 @@ Error WasmObjectFile::parseNameSection(ReadContext &Ctx) {
llvm::DenseSet<uint64_t> SeenFunctions;
llvm::DenseSet<uint64_t> SeenGlobals;
llvm::DenseSet<uint64_t> SeenSegments;
- if (Functions.size() && !SeenCodeSection) {
- return make_error<GenericBinaryError>("names must come after code section",
- object_error::parse_failed);
- }
while (Ctx.Ptr < Ctx.End) {
uint8_t Type = readUint8(Ctx);
@@ -443,7 +470,7 @@ Error WasmObjectFile::parseNameSection(ReadContext &Ctx) {
return make_error<GenericBinaryError>(
"function named more than once", object_error::parse_failed);
if (!isValidFunctionIndex(Index) || Name.empty())
- return make_error<GenericBinaryError>("invalid name entry",
+ return make_error<GenericBinaryError>("invalid function name entry",
object_error::parse_failed);
if (isDefinedFunctionIndex(Index))
@@ -454,7 +481,7 @@ Error WasmObjectFile::parseNameSection(ReadContext &Ctx) {
return make_error<GenericBinaryError>("global named more than once",
object_error::parse_failed);
if (!isValidGlobalIndex(Index) || Name.empty())
- return make_error<GenericBinaryError>("invalid name entry",
+ return make_error<GenericBinaryError>("invalid global name entry",
object_error::parse_failed);
} else {
nameType = wasm::NameType::DATA_SEGMENT;
@@ -462,7 +489,7 @@ Error WasmObjectFile::parseNameSection(ReadContext &Ctx) {
return make_error<GenericBinaryError>(
"segment named more than once", object_error::parse_failed);
if (Index > DataSegments.size())
- return make_error<GenericBinaryError>("invalid named data segment",
+ return make_error<GenericBinaryError>("invalid data segment name entry",
object_error::parse_failed);
}
DebugNames.push_back(wasm::WasmDebugName{nameType, Index, Name});
@@ -488,11 +515,6 @@ Error WasmObjectFile::parseNameSection(ReadContext &Ctx) {
Error WasmObjectFile::parseLinkingSection(ReadContext &Ctx) {
HasLinkingSection = true;
- if (Functions.size() && !SeenCodeSection) {
- return make_error<GenericBinaryError>(
- "linking data must come after code section",
- object_error::parse_failed);
- }
LinkingData.Version = readVaruint32(Ctx);
if (LinkingData.Version != wasm::WasmMetadataVersion) {
@@ -1379,7 +1401,6 @@ Error WasmObjectFile::parseStartSection(ReadContext &Ctx) {
}
Error WasmObjectFile::parseCodeSection(ReadContext &Ctx) {
- SeenCodeSection = true;
CodeSection = Sections.size();
uint32_t FunctionCount = readVaruint32(Ctx);
if (FunctionCount != Functions.size()) {
@@ -1443,8 +1464,9 @@ Error WasmObjectFile::parseElemSection(ReadContext &Ctx) {
object_error::parse_failed);
if (Segment.Flags & wasm::WASM_ELEM_SEGMENT_IS_PASSIVE) {
- Segment.Offset.Opcode = wasm::WASM_OPCODE_I32_CONST;
- Segment.Offset.Value.Int32 = 0;
+ Segment.Offset.Extended = false;
+ Segment.Offset.Inst.Opcode = wasm::WASM_OPCODE_I32_CONST;
+ Segment.Offset.Inst.Value.Int32 = 0;
} else {
if (Error Err = readInitExpr(Segment.Offset, Ctx))
return Err;
@@ -1488,7 +1510,7 @@ Error WasmObjectFile::parseElemSection(ReadContext &Ctx) {
Error WasmObjectFile::parseDataSection(ReadContext &Ctx) {
DataSection = Sections.size();
uint32_t Count = readVaruint32(Ctx);
- if (DataCount && Count != DataCount.getValue())
+ if (DataCount && Count != *DataCount)
return make_error<GenericBinaryError>(
"number of data segments does not match DataCount section");
DataSegments.reserve(Count);
@@ -1503,8 +1525,9 @@ Error WasmObjectFile::parseDataSection(ReadContext &Ctx) {
if (Error Err = readInitExpr(Segment.Data.Offset, Ctx))
return Err;
} else {
- Segment.Data.Offset.Opcode = wasm::WASM_OPCODE_I32_CONST;
- Segment.Data.Offset.Value.Int32 = 0;
+ Segment.Data.Offset.Extended = false;
+ Segment.Data.Offset.Inst.Opcode = wasm::WASM_OPCODE_I32_CONST;
+ Segment.Data.Offset.Inst.Value.Int32 = 0;
}
uint32_t Size = readVaruint32(Ctx);
if (Size > (size_t)(Ctx.End - Ctx.Ptr))
@@ -1602,10 +1625,12 @@ uint64_t WasmObjectFile::getWasmSymbolValue(const WasmSymbol &Sym) const {
// offset within the segment.
uint32_t SegmentIndex = Sym.Info.DataRef.Segment;
const wasm::WasmDataSegment &Segment = DataSegments[SegmentIndex].Data;
- if (Segment.Offset.Opcode == wasm::WASM_OPCODE_I32_CONST) {
- return Segment.Offset.Value.Int32 + Sym.Info.DataRef.Offset;
- } else if (Segment.Offset.Opcode == wasm::WASM_OPCODE_I64_CONST) {
- return Segment.Offset.Value.Int64 + Sym.Info.DataRef.Offset;
+ if (Segment.Offset.Extended) {
+ llvm_unreachable("extended init exprs not supported");
+ } else if (Segment.Offset.Inst.Opcode == wasm::WASM_OPCODE_I32_CONST) {
+ return Segment.Offset.Inst.Value.Int32 + Sym.Info.DataRef.Offset;
+ } else if (Segment.Offset.Inst.Opcode == wasm::WASM_OPCODE_I64_CONST) {
+ return Segment.Offset.Inst.Value.Int64 + Sym.Info.DataRef.Offset;
} else {
llvm_unreachable("unknown init expr opcode");
}
@@ -1692,29 +1717,11 @@ void WasmObjectFile::moveSectionNext(DataRefImpl &Sec) const { Sec.d.a++; }
Expected<StringRef> WasmObjectFile::getSectionName(DataRefImpl Sec) const {
const WasmSection &S = Sections[Sec.d.a];
-#define ECase(X) \
- case wasm::WASM_SEC_##X: \
- return #X;
- switch (S.Type) {
- ECase(TYPE);
- ECase(IMPORT);
- ECase(FUNCTION);
- ECase(TABLE);
- ECase(MEMORY);
- ECase(GLOBAL);
- ECase(TAG);
- ECase(EXPORT);
- ECase(START);
- ECase(ELEM);
- ECase(CODE);
- ECase(DATA);
- ECase(DATACOUNT);
- case wasm::WASM_SEC_CUSTOM:
+ if (S.Type == wasm::WASM_SEC_CUSTOM)
return S.Name;
- default:
+ if (S.Type > wasm::WASM_SEC_LAST_KNOWN)
return createStringError(object_error::invalid_section_index, "");
- }
-#undef ECase
+ return wasm::sectionTypeToString(S.Type);
}
uint64_t WasmObjectFile::getSectionAddress(DataRefImpl Sec) const { return 0; }
diff --git a/llvm/lib/Object/WindowsResource.cpp b/llvm/lib/Object/WindowsResource.cpp
index 2a69c6c46b59..d50f149629c3 100644
--- a/llvm/lib/Object/WindowsResource.cpp
+++ b/llvm/lib/Object/WindowsResource.cpp
@@ -12,13 +12,11 @@
#include "llvm/Object/WindowsResource.h"
#include "llvm/Object/COFF.h"
-#include "llvm/Support/FileOutputBuffer.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/ScopedPrinter.h"
#include <ctime>
#include <queue>
-#include <system_error>
using namespace llvm;
using namespace object;
diff --git a/llvm/lib/Object/XCOFFObjectFile.cpp b/llvm/lib/Object/XCOFFObjectFile.cpp
index f2f6d700ddd8..ff39fe1794c0 100644
--- a/llvm/lib/Object/XCOFFObjectFile.cpp
+++ b/llvm/lib/Object/XCOFFObjectFile.cpp
@@ -615,6 +615,16 @@ Expected<uint32_t> XCOFFObjectFile::getSymbolFlags(DataRefImpl Symb) const {
if (XCOFFSym.getSectionNumber() == XCOFF::N_UNDEF)
Result |= SymbolRef::SF_Undefined;
+ // There is no visibility in old 32 bit XCOFF object file interpret.
+ if (is64Bit() || (auxiliaryHeader32() && (auxiliaryHeader32()->getVersion() ==
+ NEW_XCOFF_INTERPRET))) {
+ uint16_t SymType = XCOFFSym.getSymbolType();
+ if ((SymType & VISIBILITY_MASK) == SYM_V_HIDDEN)
+ Result |= SymbolRef::SF_Hidden;
+
+ if ((SymType & VISIBILITY_MASK) == SYM_V_EXPORTED)
+ Result |= SymbolRef::SF_Exported;
+ }
return Result;
}
@@ -699,6 +709,19 @@ bool XCOFFObjectFile::is64Bit() const {
return Binary::ID_XCOFF64 == getType();
}
+Expected<StringRef> XCOFFObjectFile::getRawData(const char *Start,
+ uint64_t Size,
+ StringRef Name) const {
+ uintptr_t StartPtr = reinterpret_cast<uintptr_t>(Start);
+ // TODO: this path is untested.
+ if (Error E = Binary::checkOffset(Data, StartPtr, Size))
+ return createError(toString(std::move(E)) + ": " + Name.data() +
+ " data with offset 0x" + Twine::utohexstr(StartPtr) +
+ " and size 0x" + Twine::utohexstr(Size) +
+ " goes past the end of the file");
+ return StringRef(Start, Size);
+}
+
uint16_t XCOFFObjectFile::getMagic() const {
return is64Bit() ? fileHeader64()->Magic : fileHeader32()->Magic;
}
@@ -1319,7 +1342,7 @@ XCOFFTracebackTable::XCOFFTracebackTable(const uint8_t *Ptr, uint64_t &Size,
NumOfCtlAnchors = DE.getU32(Cur);
if (Cur && NumOfCtlAnchors) {
SmallVector<uint32_t, 8> Disp;
- Disp.reserve(NumOfCtlAnchors.getValue());
+ Disp.reserve(*NumOfCtlAnchors);
for (uint32_t I = 0; I < NumOfCtlAnchors && Cur; ++I)
Disp.push_back(DE.getU32(Cur));
if (Cur)
@@ -1346,7 +1369,7 @@ XCOFFTracebackTable::XCOFFTracebackTable(const uint8_t *Ptr, uint64_t &Size,
return;
}
VecExt = TBVecExtOrErr.get();
- VectorParmsNum = VecExt.getValue().getNumberOfVectorParms();
+ VectorParmsNum = VecExt->getNumberOfVectorParms();
}
}
diff --git a/llvm/lib/ObjectYAML/COFFEmitter.cpp b/llvm/lib/ObjectYAML/COFFEmitter.cpp
index d884e2fd55cd..72d7db665d0e 100644
--- a/llvm/lib/ObjectYAML/COFFEmitter.cpp
+++ b/llvm/lib/ObjectYAML/COFFEmitter.cpp
@@ -19,6 +19,7 @@
#include "llvm/Object/COFF.h"
#include "llvm/ObjectYAML/ObjectYAML.h"
#include "llvm/ObjectYAML/yaml2obj.h"
+#include "llvm/Support/BinaryStreamWriter.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
@@ -45,7 +46,7 @@ struct COFFParser {
COFF::MaxNumberOfSections16;
}
- bool isPE() const { return Obj.OptionalHeader.hasValue(); }
+ bool isPE() const { return Obj.OptionalHeader.has_value(); }
bool is64Bit() const {
return Obj.Header.Machine == COFF::IMAGE_FILE_MACHINE_AMD64 ||
Obj.Header.Machine == COFF::IMAGE_FILE_MACHINE_ARM64;
@@ -236,7 +237,7 @@ static bool layoutCOFF(COFFParser &CP) {
if (S.SectionData.binary_size() == 0)
S.SectionData = CodeViewYAML::toDebugT(S.DebugP, CP.Allocator, S.Name);
} else if (S.Name == ".debug$H") {
- if (S.DebugH.hasValue() && S.SectionData.binary_size() == 0)
+ if (S.DebugH && S.SectionData.binary_size() == 0)
S.SectionData = CodeViewYAML::toDebugH(*S.DebugH, CP.Allocator);
}
@@ -456,7 +457,7 @@ static bool writeCOFF(COFFParser &CP, raw_ostream &OS) {
CP.Obj.OptionalHeader->DataDirectories;
uint32_t NumDataDir = sizeof(CP.Obj.OptionalHeader->DataDirectories) /
sizeof(Optional<COFF::DataDirectory>);
- if (I >= NumDataDir || !DataDirectories[I].hasValue()) {
+ if (I >= NumDataDir || !DataDirectories[I]) {
OS << zeros(uint32_t(0));
OS << zeros(uint32_t(0));
} else {
diff --git a/llvm/lib/ObjectYAML/COFFYAML.cpp b/llvm/lib/ObjectYAML/COFFYAML.cpp
index 6e5cdce89060..099ddb2b9665 100644
--- a/llvm/lib/ObjectYAML/COFFYAML.cpp
+++ b/llvm/lib/ObjectYAML/COFFYAML.cpp
@@ -75,6 +75,9 @@ void ScalarEnumerationTraits<COFF::MachineTypes>::enumeration(
ECase(IMAGE_FILE_MACHINE_POWERPC);
ECase(IMAGE_FILE_MACHINE_POWERPCFP);
ECase(IMAGE_FILE_MACHINE_R4000);
+ ECase(IMAGE_FILE_MACHINE_RISCV32);
+ ECase(IMAGE_FILE_MACHINE_RISCV64);
+ ECase(IMAGE_FILE_MACHINE_RISCV128);
ECase(IMAGE_FILE_MACHINE_SH3);
ECase(IMAGE_FILE_MACHINE_SH3DSP);
ECase(IMAGE_FILE_MACHINE_SH4);
diff --git a/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp b/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
index 6b6a1176628b..b1ad10d425cc 100644
--- a/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
+++ b/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
@@ -25,6 +25,7 @@
#include "llvm/ObjectYAML/YAML.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/ScopedPrinter.h"
#include "llvm/Support/YAMLTraits.h"
#include <algorithm>
#include <cstdint>
diff --git a/llvm/lib/ObjectYAML/CodeViewYAMLTypes.cpp b/llvm/lib/ObjectYAML/CodeViewYAMLTypes.cpp
index 49b24e21cf60..e4e2b2a6d21a 100644
--- a/llvm/lib/ObjectYAML/CodeViewYAMLTypes.cpp
+++ b/llvm/lib/ObjectYAML/CodeViewYAMLTypes.cpp
@@ -490,7 +490,10 @@ private:
Error LeafRecordImpl<FieldListRecord>::fromCodeViewRecord(CVType Type) {
MemberRecordConversionVisitor V(Members);
- return visitMemberRecordStream(Type.content(), V);
+ FieldListRecord FieldList;
+ cantFail(TypeDeserializer::deserializeAs<FieldListRecord>(Type,
+ FieldList));
+ return visitMemberRecordStream(FieldList.Data, V);
}
CVType LeafRecordImpl<FieldListRecord>::toCodeViewRecord(
diff --git a/llvm/lib/ObjectYAML/DWARFEmitter.cpp b/llvm/lib/ObjectYAML/DWARFEmitter.cpp
index eec733c7d7f9..c0e2cdd54f07 100644
--- a/llvm/lib/ObjectYAML/DWARFEmitter.cpp
+++ b/llvm/lib/ObjectYAML/DWARFEmitter.cpp
@@ -423,7 +423,7 @@ Error DWARFYAML::emitDebugInfo(raw_ostream &OS, const DWARFYAML::Data &DI) {
std::string EntryBuffer;
raw_string_ostream EntryBufferOS(EntryBuffer);
- uint64_t AbbrevTableID = Unit.AbbrevTableID.getValueOr(I);
+ uint64_t AbbrevTableID = Unit.AbbrevTableID.value_or(I);
for (const DWARFYAML::Entry &Entry : Unit.Entries) {
if (Expected<uint64_t> EntryLength =
writeDIE(DI, I, AbbrevTableID, Params, Entry, EntryBufferOS,
@@ -507,7 +507,7 @@ static void writeExtendedOpcode(const DWARFYAML::LineTableOpcode &Op,
for (auto OpByte : Op.UnknownOpcodeData)
writeInteger((uint8_t)OpByte, OpBufferOS, IsLittleEndian);
}
- uint64_t ExtLen = Op.ExtLen.getValueOr(OpBuffer.size());
+ uint64_t ExtLen = Op.ExtLen.value_or(OpBuffer.size());
encodeULEB128(ExtLen, OS);
OS.write(OpBuffer.data(), OpBuffer.size());
}
@@ -582,7 +582,7 @@ Error DWARFYAML::emitDebugLine(raw_ostream &OS, const DWARFYAML::Data &DI) {
writeInteger(LineTable.LineRange, BufferOS, DI.IsLittleEndian);
std::vector<uint8_t> StandardOpcodeLengths =
- LineTable.StandardOpcodeLengths.getValueOr(
+ LineTable.StandardOpcodeLengths.value_or(
getStandardOpcodeLengths(LineTable.Version, LineTable.OpcodeBase));
uint8_t OpcodeBase = LineTable.OpcodeBase
? *LineTable.OpcodeBase
diff --git a/llvm/lib/ObjectYAML/DWARFYAML.cpp b/llvm/lib/ObjectYAML/DWARFYAML.cpp
index 2591bf4d5af4..37116ada9901 100644
--- a/llvm/lib/ObjectYAML/DWARFYAML.cpp
+++ b/llvm/lib/ObjectYAML/DWARFYAML.cpp
@@ -62,7 +62,7 @@ DWARFYAML::Data::getAbbrevTableInfoByID(uint64_t ID) const {
for (auto &AbbrevTable : enumerate(DebugAbbrev)) {
// If the abbrev table's ID isn't specified, we use the index as its ID.
uint64_t AbbrevTableID =
- AbbrevTable.value().ID.getValueOr(AbbrevTable.index());
+ AbbrevTable.value().ID.value_or(AbbrevTable.index());
auto It = AbbrevTableInfoMap.insert(
{AbbrevTableID, AbbrevTableInfo{/*Index=*/AbbrevTable.index(),
/*Offset=*/AbbrevTableOffset}});
diff --git a/llvm/lib/ObjectYAML/DXContainerEmitter.cpp b/llvm/lib/ObjectYAML/DXContainerEmitter.cpp
new file mode 100644
index 000000000000..9834b036de90
--- /dev/null
+++ b/llvm/lib/ObjectYAML/DXContainerEmitter.cpp
@@ -0,0 +1,190 @@
+//===- DXContainerEmitter.cpp - Convert YAML to a DXContainer -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Binary emitter for yaml to DXContainer binary
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/BinaryFormat/DXContainer.h"
+#include "llvm/ObjectYAML/ObjectYAML.h"
+#include "llvm/ObjectYAML/yaml2obj.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+class DXContainerWriter {
+public:
+ DXContainerWriter(DXContainerYAML::Object &ObjectFile)
+ : ObjectFile(ObjectFile) {}
+
+ Error write(raw_ostream &OS);
+
+private:
+ DXContainerYAML::Object &ObjectFile;
+
+ Error computePartOffsets();
+ Error validatePartOffsets();
+ Error validateSize(uint32_t Computed);
+
+ void writeHeader(raw_ostream &OS);
+ void writeParts(raw_ostream &OS);
+};
+} // namespace
+
+Error DXContainerWriter::validateSize(uint32_t Computed) {
+ if (!ObjectFile.Header.FileSize)
+ ObjectFile.Header.FileSize = Computed;
+ else if (*ObjectFile.Header.FileSize < Computed)
+ return createStringError(errc::result_out_of_range,
+ "File size specified is too small.");
+ return Error::success();
+}
+
+Error DXContainerWriter::validatePartOffsets() {
+ if (ObjectFile.Parts.size() != ObjectFile.Header.PartOffsets->size())
+ return createStringError(
+ errc::invalid_argument,
+ "Mismatch between number of parts and part offsets.");
+ uint32_t RollingOffset =
+ sizeof(dxbc::Header) + (ObjectFile.Header.PartCount * sizeof(uint32_t));
+ for (auto I : llvm::zip(ObjectFile.Parts, *ObjectFile.Header.PartOffsets)) {
+ if (RollingOffset > std::get<1>(I))
+ return createStringError(errc::invalid_argument,
+ "Offset mismatch, not enough space for data.");
+ RollingOffset =
+ std::get<1>(I) + sizeof(dxbc::PartHeader) + std::get<0>(I).Size;
+ }
+ if (Error Err = validateSize(RollingOffset))
+ return Err;
+
+ return Error::success();
+}
+
+Error DXContainerWriter::computePartOffsets() {
+ if (ObjectFile.Header.PartOffsets)
+ return validatePartOffsets();
+ uint32_t RollingOffset =
+ sizeof(dxbc::Header) + (ObjectFile.Header.PartCount * sizeof(uint32_t));
+ ObjectFile.Header.PartOffsets = std::vector<uint32_t>();
+ for (const auto &Part : ObjectFile.Parts) {
+ ObjectFile.Header.PartOffsets->push_back(RollingOffset);
+ RollingOffset += sizeof(dxbc::PartHeader) + Part.Size;
+ }
+ if (Error Err = validateSize(RollingOffset))
+ return Err;
+
+ return Error::success();
+}
+
+void DXContainerWriter::writeHeader(raw_ostream &OS) {
+ dxbc::Header Header;
+ memcpy(Header.Magic, "DXBC", 4);
+ memcpy(Header.FileHash.Digest, ObjectFile.Header.Hash.data(), 16);
+ Header.Version.Major = ObjectFile.Header.Version.Major;
+ Header.Version.Minor = ObjectFile.Header.Version.Minor;
+ Header.FileSize = *ObjectFile.Header.FileSize;
+ Header.PartCount = ObjectFile.Parts.size();
+ if (sys::IsBigEndianHost)
+ Header.swapBytes();
+ OS.write(reinterpret_cast<char *>(&Header), sizeof(Header));
+ SmallVector<uint32_t> Offsets(ObjectFile.Header.PartOffsets->begin(),
+ ObjectFile.Header.PartOffsets->end());
+ if (sys::IsBigEndianHost)
+ for (auto &O : Offsets)
+ sys::swapByteOrder(O);
+ OS.write(reinterpret_cast<char *>(Offsets.data()),
+ Offsets.size() * sizeof(uint32_t));
+}
+
+void DXContainerWriter::writeParts(raw_ostream &OS) {
+ uint32_t RollingOffset =
+ sizeof(dxbc::Header) + (ObjectFile.Header.PartCount * sizeof(uint32_t));
+ for (auto I : llvm::zip(ObjectFile.Parts, *ObjectFile.Header.PartOffsets)) {
+ if (RollingOffset < std::get<1>(I)) {
+ uint32_t PadBytes = std::get<1>(I) - RollingOffset;
+ OS.write_zeros(PadBytes);
+ }
+ DXContainerYAML::Part P = std::get<0>(I);
+ OS.write(P.Name.c_str(), 4);
+ if (sys::IsBigEndianHost)
+ sys::swapByteOrder(P.Size);
+ OS.write(reinterpret_cast<const char *>(&P.Size), sizeof(uint32_t));
+ RollingOffset = std::get<1>(I) + sizeof(dxbc::PartHeader);
+
+ if (P.Name == "DXIL" && P.Program) {
+ dxbc::ProgramHeader Header;
+ Header.MajorVersion = P.Program->MajorVersion;
+ Header.MinorVersion = P.Program->MinorVersion;
+ Header.Unused = 0;
+ Header.ShaderKind = P.Program->ShaderKind;
+ memcpy(Header.Bitcode.Magic, "DXIL", 4);
+ Header.Bitcode.MajorVersion = P.Program->DXILMajorVersion;
+ Header.Bitcode.MinorVersion = P.Program->DXILMinorVersion;
+ Header.Bitcode.Unused = 0;
+
+ // Compute the optional fields if needed...
+ if (P.Program->DXILOffset)
+ Header.Bitcode.Offset = P.Program->DXILOffset.getValue();
+ else
+ Header.Bitcode.Offset = sizeof(dxbc::BitcodeHeader);
+
+ if (P.Program->DXILSize)
+ Header.Bitcode.Size = P.Program->DXILSize.getValue();
+ else
+ Header.Bitcode.Size = P.Program->DXIL ? P.Program->DXIL->size() : 0;
+
+ if (P.Program->Size)
+ Header.Size = P.Program->Size.getValue();
+ else
+ Header.Size = sizeof(dxbc::ProgramHeader) + Header.Bitcode.Size;
+
+ uint32_t BitcodeOffset = Header.Bitcode.Offset;
+ if (sys::IsBigEndianHost)
+ Header.swapBytes();
+ OS.write(reinterpret_cast<const char *>(&Header),
+ sizeof(dxbc::ProgramHeader));
+ if (P.Program->DXIL) {
+ if (BitcodeOffset > sizeof(dxbc::BitcodeHeader)) {
+ uint32_t PadBytes = BitcodeOffset - sizeof(dxbc::BitcodeHeader);
+ OS.write_zeros(PadBytes);
+ }
+ OS.write(reinterpret_cast<char *>(P.Program->DXIL->data()),
+ P.Program->DXIL->size());
+ }
+ }
+ }
+}
+
+Error DXContainerWriter::write(raw_ostream &OS) {
+ if (Error Err = computePartOffsets())
+ return Err;
+ writeHeader(OS);
+ writeParts(OS);
+ return Error::success();
+}
+
+namespace llvm {
+namespace yaml {
+
+bool yaml2dxcontainer(DXContainerYAML::Object &Doc, raw_ostream &Out,
+ ErrorHandler EH) {
+ DXContainerWriter Writer(Doc);
+ if (Error Err = Writer.write(Out)) {
+ handleAllErrors(std::move(Err),
+ [&](const ErrorInfoBase &Err) { EH(Err.message()); });
+ return false;
+ }
+ return true;
+}
+
+} // namespace yaml
+} // namespace llvm
diff --git a/llvm/lib/ObjectYAML/DXContainerYAML.cpp b/llvm/lib/ObjectYAML/DXContainerYAML.cpp
new file mode 100644
index 000000000000..7952fa4bf0e8
--- /dev/null
+++ b/llvm/lib/ObjectYAML/DXContainerYAML.cpp
@@ -0,0 +1,61 @@
+//===- DXContainerYAML.cpp - DXContainer YAMLIO implementation ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines classes for handling the YAML representation of
+// DXContainerYAML.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ObjectYAML/DXContainerYAML.h"
+
+namespace llvm {
+namespace yaml {
+
+void MappingTraits<DXContainerYAML::VersionTuple>::mapping(
+ IO &IO, DXContainerYAML::VersionTuple &Version) {
+ IO.mapRequired("Major", Version.Major);
+ IO.mapRequired("Minor", Version.Minor);
+}
+
+void MappingTraits<DXContainerYAML::FileHeader>::mapping(
+ IO &IO, DXContainerYAML::FileHeader &Header) {
+ IO.mapRequired("Hash", Header.Hash);
+ IO.mapRequired("Version", Header.Version);
+ IO.mapOptional("FileSize", Header.FileSize);
+ IO.mapRequired("PartCount", Header.PartCount);
+ IO.mapOptional("PartOffsets", Header.PartOffsets);
+}
+
+void MappingTraits<DXContainerYAML::DXILProgram>::mapping(
+ IO &IO, DXContainerYAML::DXILProgram &Program) {
+ IO.mapRequired("MajorVersion", Program.MajorVersion);
+ IO.mapRequired("MinorVersion", Program.MinorVersion);
+ IO.mapRequired("ShaderKind", Program.ShaderKind);
+ IO.mapOptional("Size", Program.Size);
+ IO.mapRequired("DXILMajorVersion", Program.DXILMajorVersion);
+ IO.mapRequired("DXILMinorVersion", Program.DXILMinorVersion);
+ IO.mapOptional("DXILSize", Program.DXILSize);
+ IO.mapOptional("DXIL", Program.DXIL);
+}
+
+void MappingTraits<DXContainerYAML::Part>::mapping(IO &IO,
+ DXContainerYAML::Part &P) {
+ IO.mapRequired("Name", P.Name);
+ IO.mapRequired("Size", P.Size);
+ IO.mapOptional("Program", P.Program);
+}
+
+void MappingTraits<DXContainerYAML::Object>::mapping(
+ IO &IO, DXContainerYAML::Object &Obj) {
+ IO.mapTag("!dxcontainer", true);
+ IO.mapRequired("Header", Obj.Header);
+ IO.mapRequired("Parts", Obj.Parts);
+}
+
+} // namespace yaml
+} // namespace llvm
diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp
index e378be3892fe..f5611ed1197b 100644
--- a/llvm/lib/ObjectYAML/ELFEmitter.cpp
+++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp
@@ -412,7 +412,7 @@ ELFState<ELFT>::ELFState(ELFYAML::Object &D, yaml::ErrorHandler EH)
}
// TODO: Only create the .strtab here if any symbols have been requested.
ImplicitSections.insert(".strtab");
- if (!SecHdrTable || !SecHdrTable->NoHeaders.getValueOr(false))
+ if (!SecHdrTable || !SecHdrTable->NoHeaders.value_or(false))
ImplicitSections.insert(SectionHeaderStringTableName);
// Insert placeholders for implicit sections that are not
@@ -596,12 +596,11 @@ unsigned ELFState<ELFT>::toSectionIndex(StringRef S, StringRef LocSec,
const ELFYAML::SectionHeaderTable &SectionHeaders =
Doc.getSectionHeaderTable();
if (SectionHeaders.IsImplicit ||
- (SectionHeaders.NoHeaders && !SectionHeaders.NoHeaders.getValue()) ||
+ (SectionHeaders.NoHeaders && !*SectionHeaders.NoHeaders) ||
SectionHeaders.isDefault())
return Index;
- assert(!SectionHeaders.NoHeaders.getValueOr(false) ||
- !SectionHeaders.Sections);
+ assert(!SectionHeaders.NoHeaders.value_or(false) || !SectionHeaders.Sections);
size_t FirstExcluded =
SectionHeaders.Sections ? SectionHeaders.Sections->size() : 0;
if (Index > FirstExcluded) {
@@ -771,7 +770,7 @@ void ELFState<ELFT>::initSectionHeaders(std::vector<Elf_Shdr> &SHeaders,
if (ELFYAML::SectionHeaderTable *S =
dyn_cast<ELFYAML::SectionHeaderTable>(D.get())) {
- if (S->NoHeaders.getValueOr(false))
+ if (S->NoHeaders.value_or(false))
continue;
if (!S->Offset)
@@ -808,7 +807,7 @@ void ELFState<ELFT>::initSectionHeaders(std::vector<Elf_Shdr> &SHeaders,
SHeader.sh_entsize = *Sec->EntSize;
else
SHeader.sh_entsize = ELFYAML::getDefaultShEntSize<ELFT>(
- Doc.Header.Machine.getValueOr(ELF::EM_NONE), Sec->Type, Sec->Name);
+ Doc.Header.Machine.value_or(ELF::EM_NONE), Sec->Type, Sec->Name);
// We have a few sections like string or symbol tables that are usually
// added implicitly to the end. However, if they are explicitly specified
@@ -958,9 +957,9 @@ ELFState<ELFT>::toELFSymbols(ArrayRef<ELFYAML::Symbol> Symbols,
else if (Sym.Index)
Symbol.st_shndx = *Sym.Index;
- Symbol.st_value = Sym.Value.getValueOr(yaml::Hex64(0));
+ Symbol.st_value = Sym.Value.value_or(yaml::Hex64(0));
Symbol.st_other = Sym.Other ? *Sym.Other : 0;
- Symbol.st_size = Sym.Size.getValueOr(yaml::Hex64(0));
+ Symbol.st_size = Sym.Size.value_or(yaml::Hex64(0));
}
return Ret;
@@ -1394,12 +1393,22 @@ void ELFState<ELFT>::writeSectionContent(
return;
for (const ELFYAML::BBAddrMapEntry &E : *Section.Entries) {
+ // Write version and feature values.
+ if (Section.Type == llvm::ELF::SHT_LLVM_BB_ADDR_MAP) {
+ if (E.Version > 1)
+ WithColor::warning() << "unsupported SHT_LLVM_BB_ADDR_MAP version: "
+ << static_cast<int>(E.Version)
+ << "; encoding using the most recent version";
+ CBA.write(E.Version);
+ CBA.write(E.Feature);
+ SHeader.sh_size += 2;
+ }
// Write the address of the function.
CBA.write<uintX_t>(E.Address, ELFT::TargetEndianness);
// Write number of BBEntries (number of basic blocks in the function). This
// is overridden by the 'NumBlocks' YAML field when specified.
uint64_t NumBlocks =
- E.NumBlocks.getValueOr(E.BBEntries ? E.BBEntries->size() : 0);
+ E.NumBlocks.value_or(E.BBEntries ? E.BBEntries->size() : 0);
SHeader.sh_size += sizeof(uintX_t) + CBA.writeULEB128(NumBlocks);
// Write all BBEntries.
if (!E.BBEntries)
@@ -1486,10 +1495,10 @@ void ELFState<ELFT>::writeSectionContent(Elf_Shdr &SHeader,
return;
CBA.write<uint32_t>(
- Section.NBucket.getValueOr(llvm::yaml::Hex64(Section.Bucket->size())),
+ Section.NBucket.value_or(llvm::yaml::Hex64(Section.Bucket->size())),
ELFT::TargetEndianness);
CBA.write<uint32_t>(
- Section.NChain.getValueOr(llvm::yaml::Hex64(Section.Chain->size())),
+ Section.NChain.value_or(llvm::yaml::Hex64(Section.Chain->size())),
ELFT::TargetEndianness);
for (uint32_t Val : *Section.Bucket)
@@ -1518,10 +1527,10 @@ void ELFState<ELFT>::writeSectionContent(Elf_Shdr &SHeader,
const ELFYAML::VerdefEntry &E = (*Section.Entries)[I];
Elf_Verdef VerDef;
- VerDef.vd_version = E.Version.getValueOr(1);
- VerDef.vd_flags = E.Flags.getValueOr(0);
- VerDef.vd_ndx = E.VersionNdx.getValueOr(0);
- VerDef.vd_hash = E.Hash.getValueOr(0);
+ VerDef.vd_version = E.Version.value_or(1);
+ VerDef.vd_flags = E.Flags.value_or(0);
+ VerDef.vd_ndx = E.VersionNdx.value_or(0);
+ VerDef.vd_hash = E.Hash.value_or(0);
VerDef.vd_aux = sizeof(Elf_Verdef);
VerDef.vd_cnt = E.VerNames.size();
if (I == Section.Entries->size() - 1)
@@ -1830,7 +1839,7 @@ template <class ELFT> void ELFState<ELFT>::buildSectionIndex() {
if (!ExcludedSectionHeaders.insert(Hdr.Name).second)
llvm_unreachable("buildSectionIndex() failed");
- if (SectionHeaders.NoHeaders.getValueOr(false))
+ if (SectionHeaders.NoHeaders.value_or(false))
for (const ELFYAML::Section *S : Sections)
if (!ExcludedSectionHeaders.insert(S->Name).second)
llvm_unreachable("buildSectionIndex() failed");
@@ -1960,7 +1969,7 @@ bool ELFState<ELFT>::writeELF(raw_ostream &OS, ELFYAML::Object &Doc,
writeArrayData(OS, makeArrayRef(PHeaders));
const ELFYAML::SectionHeaderTable &SHT = Doc.getSectionHeaderTable();
- if (!SHT.NoHeaders.getValueOr(false))
+ if (!SHT.NoHeaders.value_or(false))
CBA.updateDataAt(*SHT.Offset, SHeaders.data(),
SHT.getNumHeaders(SHeaders.size()) * sizeof(Elf_Shdr));
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index d597148b98ab..cdd180cdc15d 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -29,6 +29,8 @@ namespace llvm {
ELFYAML::Chunk::~Chunk() = default;
namespace ELFYAML {
+ELF_ELFOSABI Object::getOSAbi() const { return Header.OSABI; }
+
unsigned Object::getMachine() const {
if (Header.Machine)
return *Header.Machine;
@@ -175,6 +177,10 @@ void ScalarEnumerationTraits<ELFYAML::ELF_NT>::enumeration(
ECase(NT_AMD_PAL_METADATA);
// AMDGPU specific notes. (Code Object V3)
ECase(NT_AMDGPU_METADATA);
+ // Android specific notes.
+ ECase(NT_ANDROID_TYPE_IDENT);
+ ECase(NT_ANDROID_TYPE_KUSER);
+ ECase(NT_ANDROID_TYPE_MEMTAG);
#undef ECase
IO.enumFallback<Hex32>(Value);
}
@@ -344,6 +350,7 @@ void ScalarEnumerationTraits<ELFYAML::ELF_EM>::enumeration(
ECase(EM_BPF);
ECase(EM_VE);
ECase(EM_CSKY);
+ ECase(EM_LOONGARCH);
#undef ECase
IO.enumFallback<Hex16>(Value);
}
@@ -560,6 +567,7 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX909, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX90C, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX940, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1010, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1011, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1012, EF_AMDGPU_MACH);
@@ -570,6 +578,11 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1033, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1034, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1035, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1036, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1100, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1101, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1102, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1103, EF_AMDGPU_MACH);
switch (Object->Header.ABIVersion) {
default:
// ELFOSABI_AMDGPU_PAL, ELFOSABI_AMDGPU_MESA3D support *_V3 flags.
@@ -641,6 +654,7 @@ void ScalarEnumerationTraits<ELFYAML::ELF_SHT>::enumeration(
ECase(SHT_LLVM_SYMPART);
ECase(SHT_LLVM_PART_EHDR);
ECase(SHT_LLVM_PART_PHDR);
+ ECase(SHT_LLVM_BB_ADDR_MAP_V0);
ECase(SHT_LLVM_BB_ADDR_MAP);
ECase(SHT_GNU_ATTRIBUTES);
ECase(SHT_GNU_HASH);
@@ -705,7 +719,14 @@ void ScalarBitSetTraits<ELFYAML::ELF_SHF>::bitset(IO &IO,
BCase(SHF_GROUP);
BCase(SHF_TLS);
BCase(SHF_COMPRESSED);
- BCase(SHF_GNU_RETAIN);
+ switch (Object->getOSAbi()) {
+ case ELF::ELFOSABI_SOLARIS:
+ BCase(SHF_SUNW_NODISCARD);
+ break;
+ default:
+ BCase(SHF_GNU_RETAIN);
+ break;
+ }
switch (Object->getMachine()) {
case ELF::EM_ARM:
BCase(SHF_ARM_PURECODE);
@@ -735,6 +756,8 @@ void ScalarBitSetTraits<ELFYAML::ELF_SHF>::bitset(IO &IO,
void ScalarEnumerationTraits<ELFYAML::ELF_SHN>::enumeration(
IO &IO, ELFYAML::ELF_SHN &Value) {
+ const auto *Object = static_cast<ELFYAML::Object *>(IO.getContext());
+ assert(Object && "The IO context is not initialized");
#define ECase(X) IO.enumCase(Value, #X, ELF::X)
ECase(SHN_UNDEF);
ECase(SHN_LORESERVE);
@@ -747,6 +770,15 @@ void ScalarEnumerationTraits<ELFYAML::ELF_SHN>::enumeration(
ECase(SHN_XINDEX);
ECase(SHN_HIRESERVE);
ECase(SHN_AMDGPU_LDS);
+
+ if (!IO.outputting() || Object->getMachine() == ELF::EM_MIPS) {
+ ECase(SHN_MIPS_ACOMMON);
+ ECase(SHN_MIPS_TEXT);
+ ECase(SHN_MIPS_DATA);
+ ECase(SHN_MIPS_SCOMMON);
+ ECase(SHN_MIPS_SUNDEFINED);
+ }
+
ECase(SHN_HEXAGON_SCOMMON);
ECase(SHN_HEXAGON_SCOMMON_1);
ECase(SHN_HEXAGON_SCOMMON_2);
@@ -839,12 +871,18 @@ void ScalarEnumerationTraits<ELFYAML::ELF_REL>::enumeration(
case ELF::EM_CSKY:
#include "llvm/BinaryFormat/ELFRelocs/CSKY.def"
break;
+ case ELF::EM_PPC:
+#include "llvm/BinaryFormat/ELFRelocs/PowerPC.def"
+ break;
case ELF::EM_PPC64:
#include "llvm/BinaryFormat/ELFRelocs/PowerPC64.def"
break;
case ELF::EM_68K:
#include "llvm/BinaryFormat/ELFRelocs/M68k.def"
break;
+ case ELF::EM_LOONGARCH:
+#include "llvm/BinaryFormat/ELFRelocs/LoongArch.def"
+ break;
default:
// Nothing to do.
break;
@@ -1298,7 +1336,7 @@ static void sectionMapping(IO &IO, ELFYAML::RawContentSection &Section) {
// We also support reading a content as array of bytes using the ContentArray
// key. obj2yaml never prints this field.
- assert(!IO.outputting() || !Section.ContentBuf.hasValue());
+ assert(!IO.outputting() || !Section.ContentBuf);
IO.mapOptional("ContentArray", Section.ContentBuf);
if (Section.ContentBuf) {
if (Section.Content)
@@ -1327,8 +1365,7 @@ static void sectionMapping(IO &IO, ELFYAML::HashSection &Section) {
// obj2yaml does not dump these fields. They can be used to override nchain
// and nbucket values for creating broken sections.
- assert(!IO.outputting() ||
- (!Section.NBucket.hasValue() && !Section.NChain.hasValue()));
+ assert(!IO.outputting() || (!Section.NBucket && !Section.NChain));
IO.mapOptional("NChain", Section.NChain);
IO.mapOptional("NBucket", Section.NBucket);
}
@@ -1603,6 +1640,7 @@ void MappingTraits<std::unique_ptr<ELFYAML::Chunk>>::mapping(
Section.reset(new ELFYAML::CallGraphProfileSection());
sectionMapping(IO, *cast<ELFYAML::CallGraphProfileSection>(Section.get()));
break;
+ case ELF::SHT_LLVM_BB_ADDR_MAP_V0:
case ELF::SHT_LLVM_BB_ADDR_MAP:
if (!IO.outputting())
Section.reset(new ELFYAML::BBAddrMapSection());
@@ -1732,6 +1770,8 @@ void MappingTraits<ELFYAML::StackSizeEntry>::mapping(
void MappingTraits<ELFYAML::BBAddrMapEntry>::mapping(
IO &IO, ELFYAML::BBAddrMapEntry &E) {
assert(IO.getContext() && "The IO context is not initialized");
+ IO.mapRequired("Version", E.Version);
+ IO.mapOptional("Feature", E.Feature, Hex8(0));
IO.mapOptional("Address", E.Address, Hex64(0));
IO.mapOptional("NumBlocks", E.NumBlocks);
IO.mapOptional("BBEntries", E.BBEntries);
diff --git a/llvm/lib/ObjectYAML/MachOEmitter.cpp b/llvm/lib/ObjectYAML/MachOEmitter.cpp
index b9fad2982828..3d06f3d0bf86 100644
--- a/llvm/lib/ObjectYAML/MachOEmitter.cpp
+++ b/llvm/lib/ObjectYAML/MachOEmitter.cpp
@@ -55,6 +55,7 @@ private:
void writeStringTable(raw_ostream &OS);
void writeExportTrie(raw_ostream &OS);
void writeDynamicSymbolTable(raw_ostream &OS);
+ void writeFunctionStarts(raw_ostream &OS);
void dumpExportEntry(raw_ostream &OS, MachOYAML::ExportEntry &Entry);
void ZeroToOffset(raw_ostream &OS, size_t offset);
@@ -484,6 +485,7 @@ void MachOWriter::writeLinkEditData(raw_ostream &OS) {
MachO::dyld_info_command *DyldInfoOnlyCmd = nullptr;
MachO::symtab_command *SymtabCmd = nullptr;
MachO::dysymtab_command *DSymtabCmd = nullptr;
+ MachO::linkedit_data_command *FunctionStartsCmd = nullptr;
for (auto &LC : Obj.LoadCommands) {
switch (LC.Data.load_command_data.cmd) {
case MachO::LC_SYMTAB:
@@ -511,12 +513,15 @@ void MachOWriter::writeLinkEditData(raw_ostream &OS) {
WriteQueue.push_back(std::make_pair(
DSymtabCmd->indirectsymoff, &MachOWriter::writeDynamicSymbolTable));
break;
+ case MachO::LC_FUNCTION_STARTS:
+ FunctionStartsCmd = &LC.Data.linkedit_data_command_data;
+ WriteQueue.push_back(std::make_pair(FunctionStartsCmd->dataoff,
+ &MachOWriter::writeFunctionStarts));
+ break;
}
}
- llvm::sort(WriteQueue, [](const writeOperation &a, const writeOperation &b) {
- return a.first < b.first;
- });
+ llvm::sort(WriteQueue, llvm::less_first());
for (auto writeOp : WriteQueue) {
ZeroToOffset(OS, writeOp.first);
@@ -569,6 +574,17 @@ void MachOWriter::writeDynamicSymbolTable(raw_ostream &OS) {
sizeof(yaml::Hex32::BaseType));
}
+void MachOWriter::writeFunctionStarts(raw_ostream &OS) {
+ uint64_t Addr = 0;
+ for (uint64_t NextAddr : Obj.LinkEdit.FunctionStarts) {
+ uint64_t Delta = NextAddr - Addr;
+ encodeULEB128(Delta, OS);
+ Addr = NextAddr;
+ }
+
+ OS.write('\0');
+}
+
class UniversalWriter {
public:
UniversalWriter(yaml::YamlObjectFile &ObjectFile)
diff --git a/llvm/lib/ObjectYAML/MachOYAML.cpp b/llvm/lib/ObjectYAML/MachOYAML.cpp
index f32009458110..b6f3b53a42b3 100644
--- a/llvm/lib/ObjectYAML/MachOYAML.cpp
+++ b/llvm/lib/ObjectYAML/MachOYAML.cpp
@@ -26,10 +26,10 @@ namespace llvm {
MachOYAML::LoadCommand::~LoadCommand() = default;
bool MachOYAML::LinkEditData::isEmpty() const {
- return 0 ==
- RebaseOpcodes.size() + BindOpcodes.size() + WeakBindOpcodes.size() +
- LazyBindOpcodes.size() + ExportTrie.Children.size() +
- NameList.size() + StringTable.size();
+ return 0 == RebaseOpcodes.size() + BindOpcodes.size() +
+ WeakBindOpcodes.size() + LazyBindOpcodes.size() +
+ ExportTrie.Children.size() + NameList.size() +
+ StringTable.size() + FunctionStarts.size();
}
namespace yaml {
@@ -165,6 +165,7 @@ void MappingTraits<MachOYAML::LinkEditData>::mapping(
IO.mapOptional("NameList", LinkEditData.NameList);
IO.mapOptional("StringTable", LinkEditData.StringTable);
IO.mapOptional("IndirectSymbols", LinkEditData.IndirectSymbols);
+ IO.mapOptional("FunctionStarts", LinkEditData.FunctionStarts);
}
void MappingTraits<MachOYAML::RebaseOpcode>::mapping(
diff --git a/llvm/lib/ObjectYAML/MinidumpEmitter.cpp b/llvm/lib/ObjectYAML/MinidumpEmitter.cpp
index bbfd2cd8cbab..9505473a2415 100644
--- a/llvm/lib/ObjectYAML/MinidumpEmitter.cpp
+++ b/llvm/lib/ObjectYAML/MinidumpEmitter.cpp
@@ -219,7 +219,7 @@ static Directory layout(BlobAllocator &File, Stream &S) {
// If DataEnd is not set, we assume everything we generated is a part of the
// stream.
Result.Location.DataSize =
- DataEnd.getValueOr(File.tell()) - Result.Location.RVA;
+ DataEnd.value_or(File.tell()) - Result.Location.RVA;
return Result;
}
diff --git a/llvm/lib/ObjectYAML/ObjectYAML.cpp b/llvm/lib/ObjectYAML/ObjectYAML.cpp
index 63769d2eba0e..d57e5583016b 100644
--- a/llvm/lib/ObjectYAML/ObjectYAML.cpp
+++ b/llvm/lib/ObjectYAML/ObjectYAML.cpp
@@ -56,12 +56,19 @@ void MappingTraits<YamlObjectFile>::mapping(IO &IO,
} else if (IO.mapTag("!minidump")) {
ObjectFile.Minidump.reset(new MinidumpYAML::Object());
MappingTraits<MinidumpYAML::Object>::mapping(IO, *ObjectFile.Minidump);
+ } else if (IO.mapTag("!Offload")) {
+ ObjectFile.Offload.reset(new OffloadYAML::Binary());
+ MappingTraits<OffloadYAML::Binary>::mapping(IO, *ObjectFile.Offload);
} else if (IO.mapTag("!WASM")) {
ObjectFile.Wasm.reset(new WasmYAML::Object());
MappingTraits<WasmYAML::Object>::mapping(IO, *ObjectFile.Wasm);
} else if (IO.mapTag("!XCOFF")) {
ObjectFile.Xcoff.reset(new XCOFFYAML::Object());
MappingTraits<XCOFFYAML::Object>::mapping(IO, *ObjectFile.Xcoff);
+ } else if (IO.mapTag("!dxcontainer")) {
+ ObjectFile.DXContainer.reset(new DXContainerYAML::Object());
+ MappingTraits<DXContainerYAML::Object>::mapping(IO,
+ *ObjectFile.DXContainer);
} else if (const Node *N = In.getCurrentNode()) {
if (N->getRawTag().empty())
IO.setError("YAML Object File missing document type tag!");
diff --git a/llvm/lib/ObjectYAML/OffloadEmitter.cpp b/llvm/lib/ObjectYAML/OffloadEmitter.cpp
new file mode 100644
index 000000000000..3ffbc4ff0e11
--- /dev/null
+++ b/llvm/lib/ObjectYAML/OffloadEmitter.cpp
@@ -0,0 +1,68 @@
+//===- OffloadEmitter.cpp -------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/OffloadBinary.h"
+#include "llvm/ObjectYAML/OffloadYAML.h"
+#include "llvm/ObjectYAML/yaml2obj.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace OffloadYAML;
+
+namespace llvm {
+namespace yaml {
+
+bool yaml2offload(Binary &Doc, raw_ostream &Out, ErrorHandler EH) {
+ for (const auto &Member : Doc.Members) {
+ object::OffloadBinary::OffloadingImage Image{};
+ if (Member.ImageKind)
+ Image.TheImageKind = *Member.ImageKind;
+ if (Member.OffloadKind)
+ Image.TheOffloadKind = *Member.OffloadKind;
+ if (Member.Flags)
+ Image.Flags = *Member.Flags;
+
+ StringMap<StringRef> &StringData = Image.StringData;
+ if (Member.StringEntries) {
+ for (const auto &Entry : *Member.StringEntries) {
+ StringData[Entry.Key] = Entry.Value;
+ }
+ }
+
+ SmallVector<char, 1024> Data;
+ raw_svector_ostream OS(Data);
+ if (Member.Content)
+ Member.Content->writeAsBinary(OS);
+ Image.Image = MemoryBuffer::getMemBufferCopy(OS.str());
+
+ std::unique_ptr<MemoryBuffer> Binary = object::OffloadBinary::write(Image);
+
+ // Copy the data to a new buffer so we can modify the bytes directly.
+ SmallVector<char> NewBuffer;
+ std::copy(Binary->getBufferStart(), Binary->getBufferEnd(),
+ std::back_inserter(NewBuffer));
+ auto *TheHeader =
+ reinterpret_cast<object::OffloadBinary::Header *>(&NewBuffer[0]);
+ if (Doc.Version)
+ TheHeader->Version = *Doc.Version;
+ if (Doc.Size)
+ TheHeader->Size = *Doc.Size;
+ if (Doc.EntryOffset)
+ TheHeader->EntryOffset = *Doc.EntryOffset;
+ if (Doc.EntrySize)
+ TheHeader->EntrySize = *Doc.EntrySize;
+
+ Out.write(NewBuffer.begin(), NewBuffer.size());
+ }
+
+ return true;
+}
+
+} // namespace yaml
+} // namespace llvm
diff --git a/llvm/lib/ObjectYAML/OffloadYAML.cpp b/llvm/lib/ObjectYAML/OffloadYAML.cpp
new file mode 100644
index 000000000000..d5a0edde2179
--- /dev/null
+++ b/llvm/lib/ObjectYAML/OffloadYAML.cpp
@@ -0,0 +1,78 @@
+//===- OffloadYAML.cpp - Offload Binary YAMLIO implementation -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines classes for handling the YAML representation of offload
+// binaries.
+//
+//===----------------------------------------------------------------------===//
+
+#include <llvm/ObjectYAML/OffloadYAML.h>
+
+namespace llvm {
+
+namespace yaml {
+
+void ScalarEnumerationTraits<object::ImageKind>::enumeration(
+ IO &IO, object::ImageKind &Value) {
+#define ECase(X) IO.enumCase(Value, #X, object::X)
+ ECase(IMG_None);
+ ECase(IMG_Object);
+ ECase(IMG_Bitcode);
+ ECase(IMG_Cubin);
+ ECase(IMG_Fatbinary);
+ ECase(IMG_PTX);
+ ECase(IMG_LAST);
+#undef ECase
+ IO.enumFallback<Hex16>(Value);
+}
+
+void ScalarEnumerationTraits<object::OffloadKind>::enumeration(
+ IO &IO, object::OffloadKind &Value) {
+#define ECase(X) IO.enumCase(Value, #X, object::X)
+ ECase(OFK_None);
+ ECase(OFK_OpenMP);
+ ECase(OFK_Cuda);
+ ECase(OFK_HIP);
+ ECase(OFK_LAST);
+#undef ECase
+ IO.enumFallback<Hex16>(Value);
+}
+
+void MappingTraits<OffloadYAML::Binary>::mapping(IO &IO,
+ OffloadYAML::Binary &O) {
+ assert(!IO.getContext() && "The IO context is initialized already");
+ IO.setContext(&O);
+ IO.mapTag("!Offload", true);
+ IO.mapOptional("Version", O.Version);
+ IO.mapOptional("Size", O.Size);
+ IO.mapOptional("EntryOffset", O.EntryOffset);
+ IO.mapOptional("EntrySize", O.EntrySize);
+ IO.mapRequired("Members", O.Members);
+ IO.setContext(nullptr);
+}
+
+void MappingTraits<OffloadYAML::Binary::StringEntry>::mapping(
+ IO &IO, OffloadYAML::Binary::StringEntry &SE) {
+ assert(IO.getContext() && "The IO context is not initialized");
+ IO.mapRequired("Key", SE.Key);
+ IO.mapRequired("Value", SE.Value);
+}
+
+void MappingTraits<OffloadYAML::Binary::Member>::mapping(
+ IO &IO, OffloadYAML::Binary::Member &M) {
+ assert(IO.getContext() && "The IO context is not initialized");
+ IO.mapOptional("ImageKind", M.ImageKind);
+ IO.mapOptional("OffloadKind", M.OffloadKind);
+ IO.mapOptional("Flags", M.Flags);
+ IO.mapOptional("String", M.StringEntries);
+ IO.mapOptional("Content", M.Content);
+}
+
+} // namespace yaml
+
+} // namespace llvm
diff --git a/llvm/lib/ObjectYAML/WasmEmitter.cpp b/llvm/lib/ObjectYAML/WasmEmitter.cpp
index 2aa2ef3e5541..6230312eff7b 100644
--- a/llvm/lib/ObjectYAML/WasmEmitter.cpp
+++ b/llvm/lib/ObjectYAML/WasmEmitter.cpp
@@ -33,7 +33,7 @@ private:
void writeRelocSection(raw_ostream &OS, WasmYAML::Section &Sec,
uint32_t SectionIndex);
- void writeInitExpr(raw_ostream &OS, const wasm::WasmInitExpr &InitExpr);
+ void writeInitExpr(raw_ostream &OS, const WasmYAML::InitExpr &InitExpr);
void writeSectionContent(raw_ostream &OS, WasmYAML::CustomSection &Section);
void writeSectionContent(raw_ostream &OS, WasmYAML::TypeSection &Section);
@@ -129,29 +129,34 @@ void WasmWriter::reportError(const Twine &Msg) {
}
void WasmWriter::writeInitExpr(raw_ostream &OS,
- const wasm::WasmInitExpr &InitExpr) {
- writeUint8(OS, InitExpr.Opcode);
- switch (InitExpr.Opcode) {
- case wasm::WASM_OPCODE_I32_CONST:
- encodeSLEB128(InitExpr.Value.Int32, OS);
- break;
- case wasm::WASM_OPCODE_I64_CONST:
- encodeSLEB128(InitExpr.Value.Int64, OS);
- break;
- case wasm::WASM_OPCODE_F32_CONST:
- writeUint32(OS, InitExpr.Value.Float32);
- break;
- case wasm::WASM_OPCODE_F64_CONST:
- writeUint64(OS, InitExpr.Value.Float64);
- break;
- case wasm::WASM_OPCODE_GLOBAL_GET:
- encodeULEB128(InitExpr.Value.Global, OS);
- break;
- default:
- reportError("unknown opcode in init_expr: " + Twine(InitExpr.Opcode));
- return;
+ const WasmYAML::InitExpr &InitExpr) {
+ if (InitExpr.Extended) {
+ InitExpr.Body.writeAsBinary(OS);
+ } else {
+ writeUint8(OS, InitExpr.Inst.Opcode);
+ switch (InitExpr.Inst.Opcode) {
+ case wasm::WASM_OPCODE_I32_CONST:
+ encodeSLEB128(InitExpr.Inst.Value.Int32, OS);
+ break;
+ case wasm::WASM_OPCODE_I64_CONST:
+ encodeSLEB128(InitExpr.Inst.Value.Int64, OS);
+ break;
+ case wasm::WASM_OPCODE_F32_CONST:
+ writeUint32(OS, InitExpr.Inst.Value.Float32);
+ break;
+ case wasm::WASM_OPCODE_F64_CONST:
+ writeUint64(OS, InitExpr.Inst.Value.Float64);
+ break;
+ case wasm::WASM_OPCODE_GLOBAL_GET:
+ encodeULEB128(InitExpr.Inst.Value.Global, OS);
+ break;
+ default:
+ reportError("unknown opcode in init_expr: " +
+ Twine(InitExpr.Inst.Opcode));
+ return;
+ }
+ writeUint8(OS, wasm::WASM_OPCODE_END);
}
- writeUint8(OS, wasm::WASM_OPCODE_END);
}
void WasmWriter::writeSectionContent(raw_ostream &OS,
@@ -187,13 +192,10 @@ void WasmWriter::writeSectionContent(raw_ostream &OS,
// SYMBOL_TABLE subsection
if (Section.SymbolTable.size()) {
writeUint8(OS, wasm::WASM_SYMBOL_TABLE);
-
encodeULEB128(Section.SymbolTable.size(), SubSection.getStream());
-#ifndef NDEBUG
- uint32_t SymbolIndex = 0;
-#endif
- for (const WasmYAML::SymbolInfo &Info : Section.SymbolTable) {
- assert(Info.Index == SymbolIndex++);
+ for (auto Sym : llvm::enumerate(Section.SymbolTable)) {
+ const WasmYAML::SymbolInfo &Info = Sym.value();
+ assert(Info.Index == Sym.index());
writeUint8(SubSection.getStream(), Info.Kind);
encodeULEB128(Info.Flags, SubSection.getStream());
switch (Info.Kind) {
@@ -481,7 +483,7 @@ void WasmWriter::writeSectionContent(raw_ostream &OS,
++ExpectedIndex;
writeUint8(OS, Global.Type);
writeUint8(OS, Global.Mutable);
- writeInitExpr(OS, Global.InitExpr);
+ writeInitExpr(OS, Global.Init);
}
}
diff --git a/llvm/lib/ObjectYAML/WasmYAML.cpp b/llvm/lib/ObjectYAML/WasmYAML.cpp
index 3f0172ebf361..7ca422487df2 100644
--- a/llvm/lib/ObjectYAML/WasmYAML.cpp
+++ b/llvm/lib/ObjectYAML/WasmYAML.cpp
@@ -367,8 +367,7 @@ void MappingTraits<WasmYAML::LocalDecl>::mapping(
void MappingTraits<WasmYAML::Limits>::mapping(IO &IO,
WasmYAML::Limits &Limits) {
- if (!IO.outputting() || Limits.Flags)
- IO.mapOptional("Flags", Limits.Flags);
+ IO.mapOptional("Flags", Limits.Flags, 0);
IO.mapRequired("Minimum", Limits.Minimum);
if (!IO.outputting() || Limits.Flags & wasm::WASM_LIMITS_FLAG_HAS_MAX)
IO.mapOptional("Maximum", Limits.Maximum);
@@ -376,8 +375,7 @@ void MappingTraits<WasmYAML::Limits>::mapping(IO &IO,
void MappingTraits<WasmYAML::ElemSegment>::mapping(
IO &IO, WasmYAML::ElemSegment &Segment) {
- if (!IO.outputting() || Segment.Flags)
- IO.mapOptional("Flags", Segment.Flags);
+ IO.mapOptional("Flags", Segment.Flags, 0);
if (!IO.outputting() ||
Segment.Flags & wasm::WASM_ELEM_SEGMENT_HAS_TABLE_NUMBER)
IO.mapOptional("TableNumber", Segment.TableNumber);
@@ -420,35 +418,40 @@ void MappingTraits<WasmYAML::Global>::mapping(IO &IO,
IO.mapRequired("Index", Global.Index);
IO.mapRequired("Type", Global.Type);
IO.mapRequired("Mutable", Global.Mutable);
- IO.mapRequired("InitExpr", Global.InitExpr);
+ IO.mapRequired("InitExpr", Global.Init);
}
-void MappingTraits<wasm::WasmInitExpr>::mapping(IO &IO,
- wasm::WasmInitExpr &Expr) {
- WasmYAML::Opcode Op = Expr.Opcode;
- IO.mapRequired("Opcode", Op);
- Expr.Opcode = Op;
- switch (Expr.Opcode) {
- case wasm::WASM_OPCODE_I32_CONST:
- IO.mapRequired("Value", Expr.Value.Int32);
- break;
- case wasm::WASM_OPCODE_I64_CONST:
- IO.mapRequired("Value", Expr.Value.Int64);
- break;
- case wasm::WASM_OPCODE_F32_CONST:
- IO.mapRequired("Value", Expr.Value.Float32);
- break;
- case wasm::WASM_OPCODE_F64_CONST:
- IO.mapRequired("Value", Expr.Value.Float64);
- break;
- case wasm::WASM_OPCODE_GLOBAL_GET:
- IO.mapRequired("Index", Expr.Value.Global);
- break;
- case wasm::WASM_OPCODE_REF_NULL: {
- WasmYAML::ValueType Ty = wasm::WASM_TYPE_EXTERNREF;
- IO.mapRequired("Type", Ty);
- break;
- }
+void MappingTraits<WasmYAML::InitExpr>::mapping(IO &IO,
+ WasmYAML::InitExpr &Expr) {
+ IO.mapOptional("Extended", Expr.Extended, false);
+ if (Expr.Extended) {
+ IO.mapRequired("Body", Expr.Body);
+ } else {
+ WasmYAML::Opcode Op = Expr.Inst.Opcode;
+ IO.mapRequired("Opcode", Op);
+ Expr.Inst.Opcode = Op;
+ switch (Expr.Inst.Opcode) {
+ case wasm::WASM_OPCODE_I32_CONST:
+ IO.mapRequired("Value", Expr.Inst.Value.Int32);
+ break;
+ case wasm::WASM_OPCODE_I64_CONST:
+ IO.mapRequired("Value", Expr.Inst.Value.Int64);
+ break;
+ case wasm::WASM_OPCODE_F32_CONST:
+ IO.mapRequired("Value", Expr.Inst.Value.Float32);
+ break;
+ case wasm::WASM_OPCODE_F64_CONST:
+ IO.mapRequired("Value", Expr.Inst.Value.Float64);
+ break;
+ case wasm::WASM_OPCODE_GLOBAL_GET:
+ IO.mapRequired("Index", Expr.Inst.Value.Global);
+ break;
+ case wasm::WASM_OPCODE_REF_NULL: {
+ WasmYAML::ValueType Ty = wasm::WASM_TYPE_EXTERNREF;
+ IO.mapRequired("Type", Ty);
+ break;
+ }
+ }
}
}
@@ -464,8 +467,8 @@ void MappingTraits<WasmYAML::DataSegment>::mapping(
if ((Segment.InitFlags & wasm::WASM_DATA_SEGMENT_IS_PASSIVE) == 0) {
IO.mapRequired("Offset", Segment.Offset);
} else {
- Segment.Offset.Opcode = wasm::WASM_OPCODE_I32_CONST;
- Segment.Offset.Value.Int32 = 0;
+ Segment.Offset.Inst.Opcode = wasm::WASM_OPCODE_I32_CONST;
+ Segment.Offset.Inst.Value.Int32 = 0;
}
IO.mapRequired("Content", Segment.Content);
}
diff --git a/llvm/lib/ObjectYAML/XCOFFEmitter.cpp b/llvm/lib/ObjectYAML/XCOFFEmitter.cpp
index 2a7204d3f773..1ceac6c05893 100644
--- a/llvm/lib/ObjectYAML/XCOFFEmitter.cpp
+++ b/llvm/lib/ObjectYAML/XCOFFEmitter.cpp
@@ -212,8 +212,8 @@ bool XCOFFWriter::initStringTable() {
for (const std::unique_ptr<XCOFFYAML::AuxSymbolEnt> &AuxSym :
YamlSym.AuxEntries) {
if (auto AS = dyn_cast<XCOFFYAML::FileAuxEnt>(AuxSym.get()))
- if (nameShouldBeInStringTable(AS->FileNameOrString.getValueOr("")))
- StrTblBuilder.add(AS->FileNameOrString.getValueOr(""));
+ if (nameShouldBeInStringTable(AS->FileNameOrString.value_or("")))
+ StrTblBuilder.add(AS->FileNameOrString.value_or(""));
}
}
@@ -247,8 +247,7 @@ bool XCOFFWriter::initFileHeader(uint64_t CurrentOffset) {
Twine(AuxCount));
return false;
}
- YamlSym.NumberOfAuxEntries =
- YamlSym.NumberOfAuxEntries.getValueOr(AuxCount);
+ YamlSym.NumberOfAuxEntries = YamlSym.NumberOfAuxEntries.value_or(AuxCount);
// Add the number of auxiliary symbols to the total number.
InitFileHdr.NumberOfSymTableEntries += *YamlSym.NumberOfAuxEntries;
}
@@ -378,59 +377,60 @@ void XCOFFWriter::writeFileHeader() {
}
void XCOFFWriter::writeAuxFileHeader() {
- W.write<uint16_t>(InitAuxFileHdr.Magic.getValueOr(yaml::Hex16(1)));
- W.write<uint16_t>(InitAuxFileHdr.Version.getValueOr(yaml::Hex16(1)));
+ W.write<uint16_t>(InitAuxFileHdr.Magic.value_or(yaml::Hex16(1)));
+ W.write<uint16_t>(InitAuxFileHdr.Version.value_or(yaml::Hex16(1)));
if (Is64Bit) {
W.OS.write_zeros(4); // Reserved for debugger.
- W.write<uint64_t>(InitAuxFileHdr.TextStartAddr.getValueOr(yaml::Hex64(0)));
- W.write<uint64_t>(InitAuxFileHdr.DataStartAddr.getValueOr(yaml::Hex64(0)));
- W.write<uint64_t>(InitAuxFileHdr.TOCAnchorAddr.getValueOr(yaml::Hex64(0)));
+ W.write<uint64_t>(InitAuxFileHdr.TextStartAddr.value_or(yaml::Hex64(0)));
+ W.write<uint64_t>(InitAuxFileHdr.DataStartAddr.value_or(yaml::Hex64(0)));
+ W.write<uint64_t>(InitAuxFileHdr.TOCAnchorAddr.value_or(yaml::Hex64(0)));
} else {
- W.write<uint32_t>(InitAuxFileHdr.TextSize.getValueOr(yaml::Hex64(0)));
- W.write<uint32_t>(InitAuxFileHdr.InitDataSize.getValueOr(yaml::Hex64(0)));
- W.write<uint32_t>(InitAuxFileHdr.BssDataSize.getValueOr(yaml::Hex64(0)));
- W.write<uint32_t>(InitAuxFileHdr.EntryPointAddr.getValueOr(yaml::Hex64(0)));
- W.write<uint32_t>(InitAuxFileHdr.TextStartAddr.getValueOr(yaml::Hex64(0)));
- W.write<uint32_t>(InitAuxFileHdr.DataStartAddr.getValueOr(yaml::Hex64(0)));
- W.write<uint32_t>(InitAuxFileHdr.TOCAnchorAddr.getValueOr(yaml::Hex64(0)));
+ W.write<uint32_t>(InitAuxFileHdr.TextSize.value_or(yaml::Hex64(0)));
+ W.write<uint32_t>(InitAuxFileHdr.InitDataSize.value_or(yaml::Hex64(0)));
+ W.write<uint32_t>(InitAuxFileHdr.BssDataSize.value_or(yaml::Hex64(0)));
+ W.write<uint32_t>(InitAuxFileHdr.EntryPointAddr.value_or(yaml::Hex64(0)));
+ W.write<uint32_t>(InitAuxFileHdr.TextStartAddr.value_or(yaml::Hex64(0)));
+ W.write<uint32_t>(InitAuxFileHdr.DataStartAddr.value_or(yaml::Hex64(0)));
+ W.write<uint32_t>(InitAuxFileHdr.TOCAnchorAddr.value_or(yaml::Hex64(0)));
}
- W.write<uint16_t>(InitAuxFileHdr.SecNumOfEntryPoint.getValueOr(0));
- W.write<uint16_t>(InitAuxFileHdr.SecNumOfText.getValueOr(0));
- W.write<uint16_t>(InitAuxFileHdr.SecNumOfData.getValueOr(0));
- W.write<uint16_t>(InitAuxFileHdr.SecNumOfTOC.getValueOr(0));
- W.write<uint16_t>(InitAuxFileHdr.SecNumOfLoader.getValueOr(0));
- W.write<uint16_t>(InitAuxFileHdr.SecNumOfBSS.getValueOr(0));
- W.write<uint16_t>(InitAuxFileHdr.MaxAlignOfText.getValueOr(yaml::Hex16(0)));
- W.write<uint16_t>(InitAuxFileHdr.MaxAlignOfData.getValueOr(yaml::Hex16(0)));
- W.write<uint16_t>(InitAuxFileHdr.ModuleType.getValueOr(yaml::Hex16(0)));
- W.write<uint8_t>(InitAuxFileHdr.CpuFlag.getValueOr(yaml::Hex8(0)));
+ W.write<uint16_t>(InitAuxFileHdr.SecNumOfEntryPoint.value_or(0));
+ W.write<uint16_t>(InitAuxFileHdr.SecNumOfText.value_or(0));
+ W.write<uint16_t>(InitAuxFileHdr.SecNumOfData.value_or(0));
+ W.write<uint16_t>(InitAuxFileHdr.SecNumOfTOC.value_or(0));
+ W.write<uint16_t>(InitAuxFileHdr.SecNumOfLoader.value_or(0));
+ W.write<uint16_t>(InitAuxFileHdr.SecNumOfBSS.value_or(0));
+ W.write<uint16_t>(InitAuxFileHdr.MaxAlignOfText.value_or(yaml::Hex16(0)));
+ W.write<uint16_t>(InitAuxFileHdr.MaxAlignOfData.value_or(yaml::Hex16(0)));
+ W.write<uint16_t>(InitAuxFileHdr.ModuleType.value_or(yaml::Hex16(0)));
+ W.write<uint8_t>(InitAuxFileHdr.CpuFlag.value_or(yaml::Hex8(0)));
W.write<uint8_t>(0); // Reserved for CPU type.
if (Is64Bit) {
- W.write<uint8_t>(InitAuxFileHdr.TextPageSize.getValueOr(yaml::Hex8(0)));
- W.write<uint8_t>(InitAuxFileHdr.DataPageSize.getValueOr(yaml::Hex8(0)));
- W.write<uint8_t>(InitAuxFileHdr.StackPageSize.getValueOr(yaml::Hex8(0)));
+ W.write<uint8_t>(InitAuxFileHdr.TextPageSize.value_or(yaml::Hex8(0)));
+ W.write<uint8_t>(InitAuxFileHdr.DataPageSize.value_or(yaml::Hex8(0)));
+ W.write<uint8_t>(InitAuxFileHdr.StackPageSize.value_or(yaml::Hex8(0)));
W.write<uint8_t>(
- InitAuxFileHdr.FlagAndTDataAlignment.getValueOr(yaml::Hex8(0x80)));
- W.write<uint64_t>(InitAuxFileHdr.TextSize.getValueOr(yaml::Hex64(0)));
- W.write<uint64_t>(InitAuxFileHdr.InitDataSize.getValueOr(yaml::Hex64(0)));
- W.write<uint64_t>(InitAuxFileHdr.BssDataSize.getValueOr(yaml::Hex64(0)));
- W.write<uint64_t>(InitAuxFileHdr.EntryPointAddr.getValueOr(yaml::Hex64(0)));
- W.write<uint64_t>(InitAuxFileHdr.MaxStackSize.getValueOr(yaml::Hex64(0)));
- W.write<uint64_t>(InitAuxFileHdr.MaxDataSize.getValueOr(yaml::Hex64(0)));
+ InitAuxFileHdr.FlagAndTDataAlignment.value_or(yaml::Hex8(0x80)));
+ W.write<uint64_t>(InitAuxFileHdr.TextSize.value_or(yaml::Hex64(0)));
+ W.write<uint64_t>(InitAuxFileHdr.InitDataSize.value_or(yaml::Hex64(0)));
+ W.write<uint64_t>(InitAuxFileHdr.BssDataSize.value_or(yaml::Hex64(0)));
+ W.write<uint64_t>(InitAuxFileHdr.EntryPointAddr.value_or(yaml::Hex64(0)));
+ W.write<uint64_t>(InitAuxFileHdr.MaxStackSize.value_or(yaml::Hex64(0)));
+ W.write<uint64_t>(InitAuxFileHdr.MaxDataSize.value_or(yaml::Hex64(0)));
} else {
- W.write<uint32_t>(InitAuxFileHdr.MaxStackSize.getValueOr(yaml::Hex64(0)));
- W.write<uint32_t>(InitAuxFileHdr.MaxDataSize.getValueOr(yaml::Hex64(0)));
+ W.write<uint32_t>(InitAuxFileHdr.MaxStackSize.value_or(yaml::Hex64(0)));
+ W.write<uint32_t>(InitAuxFileHdr.MaxDataSize.value_or(yaml::Hex64(0)));
W.OS.write_zeros(4); // Reserved for debugger.
- W.write<uint8_t>(InitAuxFileHdr.TextPageSize.getValueOr(yaml::Hex8(0)));
- W.write<uint8_t>(InitAuxFileHdr.DataPageSize.getValueOr(yaml::Hex8(0)));
- W.write<uint8_t>(InitAuxFileHdr.StackPageSize.getValueOr(yaml::Hex8(0)));
+ W.write<uint8_t>(InitAuxFileHdr.TextPageSize.value_or(yaml::Hex8(0)));
+ W.write<uint8_t>(InitAuxFileHdr.DataPageSize.value_or(yaml::Hex8(0)));
+ W.write<uint8_t>(InitAuxFileHdr.StackPageSize.value_or(yaml::Hex8(0)));
W.write<uint8_t>(
- InitAuxFileHdr.FlagAndTDataAlignment.getValueOr(yaml::Hex8(0)));
+ InitAuxFileHdr.FlagAndTDataAlignment.value_or(yaml::Hex8(0)));
}
- W.write<uint16_t>(InitAuxFileHdr.SecNumOfTData.getValueOr(0));
- W.write<uint16_t>(InitAuxFileHdr.SecNumOfTBSS.getValueOr(0));
+ W.write<uint16_t>(InitAuxFileHdr.SecNumOfTData.value_or(0));
+ W.write<uint16_t>(InitAuxFileHdr.SecNumOfTBSS.value_or(0));
if (Is64Bit) {
- W.write<uint16_t>(InitAuxFileHdr.Flag.getValueOr(yaml::Hex16(XCOFF::SHR_SYMTAB)));
+ W.write<uint16_t>(
+ InitAuxFileHdr.Flag.value_or(yaml::Hex16(XCOFF::SHR_SYMTAB)));
if (InitFileHdr.AuxHeaderSize > XCOFF::AuxFileHeaderSize64)
W.OS.write_zeros(InitFileHdr.AuxHeaderSize - XCOFF::AuxFileHeaderSize64);
} else if (InitFileHdr.AuxHeaderSize > XCOFF::AuxFileHeaderSize32) {
@@ -526,52 +526,52 @@ bool XCOFFWriter::writeRelocations() {
void XCOFFWriter::writeAuxSymbol(const XCOFFYAML::CsectAuxEnt &AuxSym) {
if (Is64Bit) {
- W.write<uint32_t>(AuxSym.SectionOrLengthLo.getValueOr(0));
- W.write<uint32_t>(AuxSym.ParameterHashIndex.getValueOr(0));
- W.write<uint16_t>(AuxSym.TypeChkSectNum.getValueOr(0));
- W.write<uint8_t>(AuxSym.SymbolAlignmentAndType.getValueOr(0));
- W.write<uint8_t>(AuxSym.StorageMappingClass.getValueOr(XCOFF::XMC_PR));
- W.write<uint32_t>(AuxSym.SectionOrLengthHi.getValueOr(0));
+ W.write<uint32_t>(AuxSym.SectionOrLengthLo.value_or(0));
+ W.write<uint32_t>(AuxSym.ParameterHashIndex.value_or(0));
+ W.write<uint16_t>(AuxSym.TypeChkSectNum.value_or(0));
+ W.write<uint8_t>(AuxSym.SymbolAlignmentAndType.value_or(0));
+ W.write<uint8_t>(AuxSym.StorageMappingClass.value_or(XCOFF::XMC_PR));
+ W.write<uint32_t>(AuxSym.SectionOrLengthHi.value_or(0));
W.write<uint8_t>(0);
W.write<uint8_t>(XCOFF::AUX_CSECT);
} else {
- W.write<uint32_t>(AuxSym.SectionOrLength.getValueOr(0));
- W.write<uint32_t>(AuxSym.ParameterHashIndex.getValueOr(0));
- W.write<uint16_t>(AuxSym.TypeChkSectNum.getValueOr(0));
- W.write<uint8_t>(AuxSym.SymbolAlignmentAndType.getValueOr(0));
- W.write<uint8_t>(AuxSym.StorageMappingClass.getValueOr(XCOFF::XMC_PR));
- W.write<uint32_t>(AuxSym.StabInfoIndex.getValueOr(0));
- W.write<uint16_t>(AuxSym.StabSectNum.getValueOr(0));
+ W.write<uint32_t>(AuxSym.SectionOrLength.value_or(0));
+ W.write<uint32_t>(AuxSym.ParameterHashIndex.value_or(0));
+ W.write<uint16_t>(AuxSym.TypeChkSectNum.value_or(0));
+ W.write<uint8_t>(AuxSym.SymbolAlignmentAndType.value_or(0));
+ W.write<uint8_t>(AuxSym.StorageMappingClass.value_or(XCOFF::XMC_PR));
+ W.write<uint32_t>(AuxSym.StabInfoIndex.value_or(0));
+ W.write<uint16_t>(AuxSym.StabSectNum.value_or(0));
}
}
void XCOFFWriter::writeAuxSymbol(const XCOFFYAML::ExcpetionAuxEnt &AuxSym) {
assert(Is64Bit && "can't write the exception auxiliary symbol for XCOFF32");
- W.write<uint64_t>(AuxSym.OffsetToExceptionTbl.getValueOr(0));
- W.write<uint32_t>(AuxSym.SizeOfFunction.getValueOr(0));
- W.write<uint32_t>(AuxSym.SymIdxOfNextBeyond.getValueOr(0));
+ W.write<uint64_t>(AuxSym.OffsetToExceptionTbl.value_or(0));
+ W.write<uint32_t>(AuxSym.SizeOfFunction.value_or(0));
+ W.write<uint32_t>(AuxSym.SymIdxOfNextBeyond.value_or(0));
W.write<uint8_t>(0);
W.write<uint8_t>(XCOFF::AUX_EXCEPT);
}
void XCOFFWriter::writeAuxSymbol(const XCOFFYAML::FunctionAuxEnt &AuxSym) {
if (Is64Bit) {
- W.write<uint64_t>(AuxSym.PtrToLineNum.getValueOr(0));
- W.write<uint32_t>(AuxSym.SizeOfFunction.getValueOr(0));
- W.write<uint32_t>(AuxSym.SymIdxOfNextBeyond.getValueOr(0));
+ W.write<uint64_t>(AuxSym.PtrToLineNum.value_or(0));
+ W.write<uint32_t>(AuxSym.SizeOfFunction.value_or(0));
+ W.write<uint32_t>(AuxSym.SymIdxOfNextBeyond.value_or(0));
W.write<uint8_t>(0);
W.write<uint8_t>(XCOFF::AUX_FCN);
} else {
- W.write<uint32_t>(AuxSym.OffsetToExceptionTbl.getValueOr(0));
- W.write<uint32_t>(AuxSym.SizeOfFunction.getValueOr(0));
- W.write<uint32_t>(AuxSym.PtrToLineNum.getValueOr(0));
- W.write<uint32_t>(AuxSym.SymIdxOfNextBeyond.getValueOr(0));
+ W.write<uint32_t>(AuxSym.OffsetToExceptionTbl.value_or(0));
+ W.write<uint32_t>(AuxSym.SizeOfFunction.value_or(0));
+ W.write<uint32_t>(AuxSym.PtrToLineNum.value_or(0));
+ W.write<uint32_t>(AuxSym.SymIdxOfNextBeyond.value_or(0));
W.OS.write_zeros(2);
}
}
void XCOFFWriter::writeAuxSymbol(const XCOFFYAML::FileAuxEnt &AuxSym) {
- StringRef FileName = AuxSym.FileNameOrString.getValueOr("");
+ StringRef FileName = AuxSym.FileNameOrString.value_or("");
if (nameShouldBeInStringTable(FileName)) {
W.write<int32_t>(0);
W.write<uint32_t>(StrTblBuilder.getOffset(FileName));
@@ -579,7 +579,7 @@ void XCOFFWriter::writeAuxSymbol(const XCOFFYAML::FileAuxEnt &AuxSym) {
writeName(FileName, W);
}
W.OS.write_zeros(XCOFF::FileNamePadSize);
- W.write<uint8_t>(AuxSym.FileStringType.getValueOr(XCOFF::XFT_FN));
+ W.write<uint8_t>(AuxSym.FileStringType.value_or(XCOFF::XFT_FN));
if (Is64Bit) {
W.OS.write_zeros(2);
W.write<uint8_t>(XCOFF::AUX_FILE);
@@ -590,36 +590,36 @@ void XCOFFWriter::writeAuxSymbol(const XCOFFYAML::FileAuxEnt &AuxSym) {
void XCOFFWriter::writeAuxSymbol(const XCOFFYAML::BlockAuxEnt &AuxSym) {
if (Is64Bit) {
- W.write<uint32_t>(AuxSym.LineNum.getValueOr(0));
+ W.write<uint32_t>(AuxSym.LineNum.value_or(0));
W.OS.write_zeros(13);
W.write<uint8_t>(XCOFF::AUX_SYM);
} else {
W.OS.write_zeros(2);
- W.write<uint16_t>(AuxSym.LineNumHi.getValueOr(0));
- W.write<uint16_t>(AuxSym.LineNumLo.getValueOr(0));
+ W.write<uint16_t>(AuxSym.LineNumHi.value_or(0));
+ W.write<uint16_t>(AuxSym.LineNumLo.value_or(0));
W.OS.write_zeros(12);
}
}
void XCOFFWriter::writeAuxSymbol(const XCOFFYAML::SectAuxEntForDWARF &AuxSym) {
if (Is64Bit) {
- W.write<uint64_t>(AuxSym.LengthOfSectionPortion.getValueOr(0));
- W.write<uint64_t>(AuxSym.NumberOfRelocEnt.getValueOr(0));
+ W.write<uint64_t>(AuxSym.LengthOfSectionPortion.value_or(0));
+ W.write<uint64_t>(AuxSym.NumberOfRelocEnt.value_or(0));
W.write<uint8_t>(0);
W.write<uint8_t>(XCOFF::AUX_SECT);
} else {
- W.write<uint32_t>(AuxSym.LengthOfSectionPortion.getValueOr(0));
+ W.write<uint32_t>(AuxSym.LengthOfSectionPortion.value_or(0));
W.OS.write_zeros(4);
- W.write<uint32_t>(AuxSym.NumberOfRelocEnt.getValueOr(0));
+ W.write<uint32_t>(AuxSym.NumberOfRelocEnt.value_or(0));
W.OS.write_zeros(6);
}
}
void XCOFFWriter::writeAuxSymbol(const XCOFFYAML::SectAuxEntForStat &AuxSym) {
assert(!Is64Bit && "can't write the stat auxiliary symbol for XCOFF64");
- W.write<uint32_t>(AuxSym.SectionLength.getValueOr(0));
- W.write<uint16_t>(AuxSym.NumberOfRelocEnt.getValueOr(0));
- W.write<uint16_t>(AuxSym.NumberOfLineNum.getValueOr(0));
+ W.write<uint32_t>(AuxSym.SectionLength.value_or(0));
+ W.write<uint16_t>(AuxSym.NumberOfRelocEnt.value_or(0));
+ W.write<uint16_t>(AuxSym.NumberOfLineNum.value_or(0));
W.OS.write_zeros(10);
}
@@ -686,7 +686,7 @@ bool XCOFFWriter::writeSymbols() {
W.write<uint16_t>(YamlSym.Type);
W.write<uint8_t>(YamlSym.StorageClass);
- uint8_t NumOfAuxSym = YamlSym.NumberOfAuxEntries.getValueOr(0);
+ uint8_t NumOfAuxSym = YamlSym.NumberOfAuxEntries.value_or(0);
W.write<uint8_t>(NumOfAuxSym);
if (!NumOfAuxSym && !YamlSym.AuxEntries.size())
diff --git a/llvm/lib/ObjectYAML/yaml2obj.cpp b/llvm/lib/ObjectYAML/yaml2obj.cpp
index d19fa0a52530..06050e246fbf 100644
--- a/llvm/lib/ObjectYAML/yaml2obj.cpp
+++ b/llvm/lib/ObjectYAML/yaml2obj.cpp
@@ -42,10 +42,14 @@ bool convertYAML(yaml::Input &YIn, raw_ostream &Out, ErrorHandler ErrHandler,
return yaml2macho(Doc, Out, ErrHandler);
if (Doc.Minidump)
return yaml2minidump(*Doc.Minidump, Out, ErrHandler);
+ if (Doc.Offload)
+ return yaml2offload(*Doc.Offload, Out, ErrHandler);
if (Doc.Wasm)
return yaml2wasm(*Doc.Wasm, Out, ErrHandler);
if (Doc.Xcoff)
return yaml2xcoff(*Doc.Xcoff, Out, ErrHandler);
+ if (Doc.DXContainer)
+ return yaml2dxcontainer(*Doc.DXContainer, Out, ErrHandler);
ErrHandler("unknown document type");
return false;
diff --git a/llvm/lib/Option/ArgList.cpp b/llvm/lib/Option/ArgList.cpp
index ad7be5fbec19..fab0fb07cbc8 100644
--- a/llvm/lib/Option/ArgList.cpp
+++ b/llvm/lib/Option/ArgList.cpp
@@ -95,6 +95,13 @@ std::vector<std::string> ArgList::getAllArgValues(OptSpecifier Id) const {
return std::vector<std::string>(Values.begin(), Values.end());
}
+void ArgList::addOptInFlag(ArgStringList &Output, OptSpecifier Pos,
+ OptSpecifier Neg) const {
+ if (Arg *A = getLastArg(Pos, Neg))
+ if (A->getOption().matches(Pos))
+ A->render(*this, Output);
+}
+
void ArgList::AddAllArgsExcept(ArgStringList &Output,
ArrayRef<OptSpecifier> Ids,
ArrayRef<OptSpecifier> ExcludeIds) const {
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 015ca1eec4df..42fde3752724 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -27,6 +27,7 @@
#include "llvm/Analysis/CFLSteensAliasAnalysis.h"
#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CallPrinter.h"
#include "llvm/Analysis/CostModel.h"
#include "llvm/Analysis/CycleAnalysis.h"
#include "llvm/Analysis/DDG.h"
@@ -185,7 +186,7 @@
#include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h"
#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
#include "llvm/Transforms/Scalar/LoopVersioningLICM.h"
-#include "llvm/Transforms/Scalar/LowerAtomic.h"
+#include "llvm/Transforms/Scalar/LowerAtomicPass.h"
#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
#include "llvm/Transforms/Scalar/LowerGuardIntrinsic.h"
@@ -212,6 +213,7 @@
#include "llvm/Transforms/Scalar/SpeculativeExecution.h"
#include "llvm/Transforms/Scalar/StraightLineStrengthReduce.h"
#include "llvm/Transforms/Scalar/StructurizeCFG.h"
+#include "llvm/Transforms/Scalar/TLSVariableHoist.h"
#include "llvm/Transforms/Scalar/TailRecursionElimination.h"
#include "llvm/Transforms/Scalar/WarnMissedTransforms.h"
#include "llvm/Transforms/Utils/AddDiscriminators.h"
@@ -229,11 +231,13 @@
#include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopVersioning.h"
+#include "llvm/Transforms/Utils/LowerGlobalDtors.h"
#include "llvm/Transforms/Utils/LowerInvoke.h"
#include "llvm/Transforms/Utils/LowerSwitch.h"
#include "llvm/Transforms/Utils/Mem2Reg.h"
#include "llvm/Transforms/Utils/MetaRenamer.h"
#include "llvm/Transforms/Utils/NameAnonGlobals.h"
+#include "llvm/Transforms/Utils/PredicateInfo.h"
#include "llvm/Transforms/Utils/RelLookupTableConverter.h"
#include "llvm/Transforms/Utils/StripGCRelocates.h"
#include "llvm/Transforms/Utils/StripNonLineTableDebugInfo.h"
@@ -371,6 +375,17 @@ bool shouldPopulateClassToPassNames() {
!printAfterPasses().empty();
}
+// A pass for testing -print-on-crash.
+// DO NOT USE THIS EXCEPT FOR TESTING!
+class TriggerCrashPass : public PassInfoMixin<TriggerCrashPass> {
+public:
+ PreservedAnalyses run(Module &, ModuleAnalysisManager &) {
+ abort();
+ return PreservedAnalyses::all();
+ }
+ static StringRef name() { return "TriggerCrashPass"; }
+};
+
} // namespace
PassBuilder::PassBuilder(TargetMachine *TM, PipelineTuningOptions PTO,
@@ -585,6 +600,10 @@ Expected<bool> parseInlinerPassOptions(StringRef Params) {
return parseSinglePassOption(Params, "only-mandatory", "InlinerPass");
}
+Expected<bool> parseCoroSplitPassOptions(StringRef Params) {
+ return parseSinglePassOption(Params, "reuse-storage", "CoroSplitPass");
+}
+
Expected<bool> parseEarlyCSEPassOptions(StringRef Params) {
return parseSinglePassOption(Params, "memssa", "EarlyCSE");
}
@@ -679,6 +698,8 @@ Expected<SimplifyCFGOptions> parseSimplifyCFGOptions(StringRef Params) {
bool Enable = !ParamName.consume_front("no-");
if (ParamName == "forward-switch-cond") {
Result.forwardSwitchCondToPhi(Enable);
+ } else if (ParamName == "switch-range-to-icmp") {
+ Result.convertSwitchRangeToICmp(Enable);
} else if (ParamName == "switch-to-lookup") {
Result.convertSwitchToLookupTable(Enable);
} else if (ParamName == "keep-loops") {
@@ -747,6 +768,24 @@ Expected<std::pair<bool, bool>> parseLoopUnswitchOptions(StringRef Params) {
return Result;
}
+Expected<LICMOptions> parseLICMOptions(StringRef Params) {
+ LICMOptions Result;
+ while (!Params.empty()) {
+ StringRef ParamName;
+ std::tie(ParamName, Params) = Params.split(';');
+
+ bool Enable = !ParamName.consume_front("no-");
+ if (ParamName == "allowspeculation") {
+ Result.AllowSpeculation = Enable;
+ } else {
+ return make_error<StringError>(
+ formatv("invalid LICM pass parameter '{0}' ", ParamName).str(),
+ inconvertibleErrorCode());
+ }
+ }
+ return Result;
+}
+
Expected<bool> parseMergedLoadStoreMotionOptions(StringRef Params) {
bool Result = false;
while (!Params.empty()) {
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 93637c890c4f..a5345172aae1 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -32,6 +32,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
#include "llvm/Transforms/Coroutines/CoroCleanup.h"
+#include "llvm/Transforms/Coroutines/CoroConditionalWrapper.h"
#include "llvm/Transforms/Coroutines/CoroEarly.h"
#include "llvm/Transforms/Coroutines/CoroElide.h"
#include "llvm/Transforms/Coroutines/CoroSplit.h"
@@ -140,7 +141,7 @@ static cl::opt<InliningAdvisorMode> UseInlineAdvisor(
"Use release mode (AOT-compiled model).")));
static cl::opt<bool> EnableSyntheticCounts(
- "enable-npm-synthetic-counts", cl::init(false), cl::Hidden, cl::ZeroOrMore,
+ "enable-npm-synthetic-counts", cl::Hidden,
cl::desc("Run synthetic function entry count generation "
"pass"));
@@ -150,8 +151,7 @@ static cl::opt<bool>
cl::Hidden,
cl::desc("Enable inline deferral during PGO"));
-static cl::opt<bool> EnableMemProfiler("enable-mem-prof", cl::init(false),
- cl::Hidden, cl::ZeroOrMore,
+static cl::opt<bool> EnableMemProfiler("enable-mem-prof", cl::Hidden,
cl::desc("Enable memory profiler"));
static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
@@ -159,13 +159,13 @@ static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
cl::desc("Enable module inliner"));
static cl::opt<bool> PerformMandatoryInliningsFirst(
- "mandatory-inlining-first", cl::init(true), cl::Hidden, cl::ZeroOrMore,
+ "mandatory-inlining-first", cl::init(true), cl::Hidden,
cl::desc("Perform mandatory inlinings module-wide, before performing "
"inlining."));
static cl::opt<bool> EnableO3NonTrivialUnswitching(
"enable-npm-O3-nontrivial-unswitch", cl::init(true), cl::Hidden,
- cl::ZeroOrMore, cl::desc("Enable non-trivial loop unswitching for -O3"));
+ cl::desc("Enable non-trivial loop unswitching for -O3"));
static cl::opt<bool> EnableEagerlyInvalidateAnalyses(
"eagerly-invalidate-analyses", cl::init(true), cl::Hidden,
@@ -233,9 +233,7 @@ void PassBuilder::invokePeepholeEPCallbacks(FunctionPassManager &FPM,
// Helper to add AnnotationRemarksPass.
static void addAnnotationRemarksPass(ModulePassManager &MPM) {
- FunctionPassManager FPM;
- FPM.addPass(AnnotationRemarksPass());
- MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+ MPM.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass()));
}
// Helper to check if the current compilation phase is preparing for LTO
@@ -259,14 +257,16 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
// Hoisting of scalars and load expressions.
- FPM.addPass(SimplifyCFGPass());
+ FPM.addPass(
+ SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
FPM.addPass(InstCombinePass());
FPM.addPass(LibCallsShrinkWrapPass());
invokePeepholeEPCallbacks(FPM, Level);
- FPM.addPass(SimplifyCFGPass());
+ FPM.addPass(
+ SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
// Form canonically associated expression trees, and simplify the trees using
// basic mathematical properties. For example, this will form (nearly)
@@ -291,14 +291,19 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
LPM1.addPass(LoopSimplifyCFGPass());
// Try to remove as much code from the loop header as possible,
- // to reduce amount of IR that will have to be duplicated.
+ // to reduce amount of IR that will have to be duplicated. However,
+ // do not perform speculative hoisting the first time as LICM
+ // will destroy metadata that may not need to be destroyed if run
+ // after loop rotation.
// TODO: Investigate promotion cap for O1.
- LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
+ LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
+ /*AllowSpeculation=*/false));
LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
isLTOPreLink(Phase)));
// TODO: Investigate promotion cap for O1.
- LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
+ LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
+ /*AllowSpeculation=*/true));
LPM1.addPass(SimpleLoopUnswitchPass());
if (EnableLoopFlatten)
LPM1.addPass(LoopFlattenPass());
@@ -335,7 +340,8 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
/*UseMemorySSA=*/true,
/*UseBlockFrequencyInfo=*/true));
- FPM.addPass(SimplifyCFGPass());
+ FPM.addPass(
+ SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
FPM.addPass(InstCombinePass());
// The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
// *All* loop passes must preserve it, in order to be able to use it.
@@ -373,7 +379,8 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
// the simplifications and basic cleanup after all the simplifications.
// TODO: Investigate if this is too expensive.
FPM.addPass(ADCEPass());
- FPM.addPass(SimplifyCFGPass());
+ FPM.addPass(
+ SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
FPM.addPass(InstCombinePass());
invokePeepholeEPCallbacks(FPM, Level);
@@ -408,7 +415,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
// Global value numbering based sinking.
if (EnableGVNSink) {
FPM.addPass(GVNSinkPass());
- FPM.addPass(SimplifyCFGPass());
+ FPM.addPass(
+ SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
}
if (EnableConstraintElimination)
@@ -421,7 +429,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
FPM.addPass(JumpThreadingPass());
FPM.addPass(CorrelatedValuePropagationPass());
- FPM.addPass(SimplifyCFGPass());
+ FPM.addPass(
+ SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
FPM.addPass(InstCombinePass());
if (Level == OptimizationLevel::O3)
FPM.addPass(AggressiveInstCombinePass());
@@ -438,7 +447,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
FPM.addPass(PGOMemOPSizeOpt());
FPM.addPass(TailCallElimPass());
- FPM.addPass(SimplifyCFGPass());
+ FPM.addPass(
+ SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
// Form canonically associated expression trees, and simplify the trees using
// basic mathematical properties. For example, this will form (nearly)
@@ -463,15 +473,20 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
LPM1.addPass(LoopSimplifyCFGPass());
// Try to remove as much code from the loop header as possible,
- // to reduce amount of IR that will have to be duplicated.
+ // to reduce amount of IR that will have to be duplicated. However,
+ // do not perform speculative hoisting the first time as LICM
+ // will destroy metadata that may not need to be destroyed if run
+ // after loop rotation.
// TODO: Investigate promotion cap for O1.
- LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
+ LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
+ /*AllowSpeculation=*/false));
// Disable header duplication in loop rotation at -Oz.
LPM1.addPass(
LoopRotatePass(Level != OptimizationLevel::Oz, isLTOPreLink(Phase)));
// TODO: Investigate promotion cap for O1.
- LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
+ LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
+ /*AllowSpeculation=*/true));
LPM1.addPass(
SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3 &&
EnableO3NonTrivialUnswitching));
@@ -510,7 +525,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
/*UseMemorySSA=*/true,
/*UseBlockFrequencyInfo=*/true));
- FPM.addPass(SimplifyCFGPass());
+ FPM.addPass(
+ SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
FPM.addPass(InstCombinePass());
// The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
// LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
@@ -567,7 +583,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
FPM.addPass(DSEPass());
FPM.addPass(createFunctionToLoopPassAdaptor(
- LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
+ LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
+ /*AllowSpeculation=*/true),
/*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
FPM.addPass(CoroElidePass());
@@ -575,8 +592,10 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
for (auto &C : ScalarOptimizerLateEPCallbacks)
C(FPM, Level);
- FPM.addPass(SimplifyCFGPass(
- SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true)));
+ FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
+ .convertSwitchRangeToICmp(true)
+ .hoistCommonInsts(true)
+ .sinkCommonInsts(true)));
FPM.addPass(InstCombinePass());
invokePeepholeEPCallbacks(FPM, Level);
@@ -596,7 +615,8 @@ void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
OptimizationLevel Level, bool RunProfileGen,
bool IsCS, std::string ProfileFile,
- std::string ProfileRemappingFile) {
+ std::string ProfileRemappingFile,
+ ThinOrFullLTOPhase LTOPhase) {
assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
if (!IsCS && !DisablePreInliner) {
InlineParams IP;
@@ -608,13 +628,16 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
// performance testing.
// FIXME: this comment is cargo culted from the old pass manager, revisit).
IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
- ModuleInlinerWrapperPass MIWP(IP);
+ ModuleInlinerWrapperPass MIWP(
+ IP, /* MandatoryFirst */ true,
+ InlineContext{LTOPhase, InlinePass::EarlyInliner});
CGSCCPassManager &CGPipeline = MIWP.getPM();
FunctionPassManager FPM;
FPM.addPass(SROAPass());
FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
- FPM.addPass(SimplifyCFGPass()); // Merge & remove basic blocks.
+ FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
+ true))); // Merge & remove basic blocks.
FPM.addPass(InstCombinePass()); // Combine silly sequences.
invokePeepholeEPCallbacks(FPM, Level);
@@ -641,13 +664,13 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
// Perform PGO instrumentation.
MPM.addPass(PGOInstrumentationGen(IsCS));
- FunctionPassManager FPM;
// Disable header duplication in loop rotation at -Oz.
- FPM.addPass(createFunctionToLoopPassAdaptor(
- LoopRotatePass(Level != OptimizationLevel::Oz), /*UseMemorySSA=*/false,
- /*UseBlockFrequencyInfo=*/false));
- MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM),
- PTO.EagerlyInvalidateAnalyses));
+ MPM.addPass(createModuleToFunctionPassAdaptor(
+ createFunctionToLoopPassAdaptor(
+ LoopRotatePass(Level != OptimizationLevel::Oz),
+ /*UseMemorySSA=*/false,
+ /*UseBlockFrequencyInfo=*/false),
+ PTO.EagerlyInvalidateAnalyses));
// Add the profile lowering pass.
InstrProfOptions Options;
@@ -692,6 +715,12 @@ ModuleInlinerWrapperPass
PassBuilder::buildInlinerPipeline(OptimizationLevel Level,
ThinOrFullLTOPhase Phase) {
InlineParams IP = getInlineParamsFromOptLevel(Level);
+ // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
+ // disable hot callsite inline (as much as possible [1]) because it makes
+ // profile annotation in the backend inaccurate.
+ //
+ // [1] Note the cost of a function could be below zero due to erased
+ // prologue / epilogue.
if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
PGOOpt->Action == PGOOptions::SampleUse)
IP.HotCallSiteThreshold = 0;
@@ -699,8 +728,10 @@ PassBuilder::buildInlinerPipeline(OptimizationLevel Level,
if (PGOOpt)
IP.EnableDeferral = EnablePGOInlineDeferral;
- ModuleInlinerWrapperPass MIWP(IP, PerformMandatoryInliningsFirst,
- UseInlineAdvisor, MaxDevirtIterations);
+ ModuleInlinerWrapperPass MIWP(
+ IP, PerformMandatoryInliningsFirst,
+ InlineContext{Phase, InlinePass::CGSCCInliner},
+ UseInlineAdvisor, MaxDevirtIterations);
// Require the GlobalsAA analysis for the module so we can query it within
// the CGSCC pipeline.
@@ -765,6 +796,12 @@ PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level,
ModulePassManager MPM;
InlineParams IP = getInlineParamsFromOptLevel(Level);
+ // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
+ // disable hot callsite inline (as much as possible [1]) because it makes
+ // profile annotation in the backend inaccurate.
+ //
+ // [1] Note the cost of a function could be below zero due to erased
+ // prologue / epilogue.
if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
PGOOpt->Action == PGOOptions::SampleUse)
IP.HotCallSiteThreshold = 0;
@@ -780,7 +817,7 @@ PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level,
// inline deferral logic in module inliner.
IP.EnableDeferral = false;
- MPM.addPass(ModuleInlinerPass(IP, UseInlineAdvisor));
+ MPM.addPass(ModuleInlinerPass(IP, UseInlineAdvisor, Phase));
MPM.addPass(createModuleToFunctionPassAdaptor(
buildFunctionSimplificationPipeline(Level, Phase),
@@ -832,6 +869,7 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
// Do basic inference of function attributes from known properties of system
// libraries and other oracles.
MPM.addPass(InferFunctionAttrsPass());
+ MPM.addPass(CoroEarlyPass());
// Create an early function pass manager to cleanup the output of the
// frontend.
@@ -842,7 +880,6 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
EarlyFPM.addPass(SimplifyCFGPass());
EarlyFPM.addPass(SROAPass());
EarlyFPM.addPass(EarlyCSEPass());
- EarlyFPM.addPass(CoroEarlyPass());
if (Level == OptimizationLevel::O3)
EarlyFPM.addPass(CallSiteSplittingPass());
@@ -928,7 +965,8 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
GlobalCleanupPM.addPass(InstCombinePass());
invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
- GlobalCleanupPM.addPass(SimplifyCFGPass());
+ GlobalCleanupPM.addPass(
+ SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),
PTO.EagerlyInvalidateAnalyses));
@@ -939,7 +977,7 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
addPGOInstrPasses(MPM, Level,
/* RunProfileGen */ PGOOpt->Action == PGOOptions::IRInstr,
/* IsCS */ false, PGOOpt->ProfileFile,
- PGOOpt->ProfileRemappingFile);
+ PGOOpt->ProfileRemappingFile, Phase);
MPM.addPass(PGOIndirectCallPromotion(false, false));
}
if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
@@ -955,6 +993,8 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
else
MPM.addPass(buildInlinerPipeline(Level, Phase));
+ MPM.addPass(CoroCleanupPass());
+
if (EnableMemProfiler && Phase != ThinOrFullLTOPhase::ThinLTOPreLink) {
MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass()));
MPM.addPass(ModuleMemProfilerPass());
@@ -1007,7 +1047,8 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
ExtraPasses.addPass(CorrelatedValuePropagationPass());
ExtraPasses.addPass(InstCombinePass());
LoopPassManager LPM;
- LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
+ LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
+ /*AllowSpeculation=*/true));
LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
OptimizationLevel::O3));
ExtraPasses.addPass(
@@ -1015,7 +1056,8 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
ExtraPasses.addPass(
createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true,
/*UseBlockFrequencyInfo=*/true));
- ExtraPasses.addPass(SimplifyCFGPass());
+ ExtraPasses.addPass(
+ SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
ExtraPasses.addPass(InstCombinePass());
FPM.addPass(std::move(ExtraPasses));
}
@@ -1031,6 +1073,7 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
// before SLP vectorization.
FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
.forwardSwitchCondToPhi(true)
+ .convertSwitchRangeToICmp(true)
.convertSwitchToLookupTable(true)
.needCanonicalLoops(false)
.hoistCommonInsts(true)
@@ -1073,7 +1116,8 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
FPM.addPass(
RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
FPM.addPass(createFunctionToLoopPassAdaptor(
- LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
+ LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
+ /*AllowSpeculation=*/true),
/*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
}
@@ -1087,7 +1131,9 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
ModulePassManager
PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
- bool LTOPreLink) {
+ ThinOrFullLTOPhase LTOPhase) {
+ const bool LTOPreLink = (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink ||
+ LTOPhase == ThinOrFullLTOPhase::FullLTOPreLink);
ModulePassManager MPM;
// Optimize globals now that the module is fully simplified.
@@ -1127,21 +1173,24 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
/* IsCS */ true, PGOOpt->CSProfileGenFile,
- PGOOpt->ProfileRemappingFile);
+ PGOOpt->ProfileRemappingFile, LTOPhase);
else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false,
/* IsCS */ true, PGOOpt->ProfileFile,
- PGOOpt->ProfileRemappingFile);
+ PGOOpt->ProfileRemappingFile, LTOPhase);
}
- // Re-require GloblasAA here prior to function passes. This is particularly
+ // Re-compute GlobalsAA here prior to function passes. This is particularly
// useful as the above will have inlined, DCE'ed, and function-attr
// propagated everything. We should at this point have a reasonably minimal
// and richly annotated call graph. By computing aliasing and mod/ref
// information for all local globals here, the late loop passes and notably
// the vectorizer will be able to use them to help recognize vectorizable
// memory operations.
- MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
+ MPM.addPass(RecomputeGlobalsAAPass());
+
+ for (auto &C : OptimizerEarlyEPCallbacks)
+ C(MPM, Level);
FunctionPassManager OptimizePM;
OptimizePM.addPass(Float2IntPass());
@@ -1202,9 +1251,8 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
// LoopSink (and other loop passes since the last simplifyCFG) might have
// resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
- OptimizePM.addPass(SimplifyCFGPass());
-
- OptimizePM.addPass(CoroCleanupPass());
+ OptimizePM.addPass(
+ SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
// Add the core optimizing pipeline.
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
@@ -1230,9 +1278,6 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
if (PTO.MergeFunctions)
MPM.addPass(MergeFunctionsPass());
- if (PTO.CallGraphProfile)
- MPM.addPass(CGProfilePass());
-
// Now we need to do some global optimization transforms.
// FIXME: It would seem like these should come first in the optimization
// pipeline and maybe be the bottom of the canonicalization pipeline? Weird
@@ -1240,6 +1285,9 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
MPM.addPass(GlobalDCEPass());
MPM.addPass(ConstantMergePass());
+ if (PTO.CallGraphProfile && !LTOPreLink)
+ MPM.addPass(CGProfilePass());
+
// TODO: Relative look table converter pass caused an issue when full lto is
// enabled. See https://reviews.llvm.org/D94355 for more details.
// Until the issue fixed, disable this pass during pre-linking phase.
@@ -1270,13 +1318,14 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
if (PGOOpt && PGOOpt->DebugInfoForProfiling)
MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
+ const ThinOrFullLTOPhase LTOPhase = LTOPreLink
+ ? ThinOrFullLTOPhase::FullLTOPreLink
+ : ThinOrFullLTOPhase::None;
// Add the core simplification pipeline.
- MPM.addPass(buildModuleSimplificationPipeline(
- Level, LTOPreLink ? ThinOrFullLTOPhase::FullLTOPreLink
- : ThinOrFullLTOPhase::None));
+ MPM.addPass(buildModuleSimplificationPipeline(Level, LTOPhase));
// Now add the optimization pipeline.
- MPM.addPass(buildModuleOptimizationPipeline(Level, LTOPreLink));
+ MPM.addPass(buildModuleOptimizationPipeline(Level, LTOPhase));
if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
PGOOpt->Action == PGOOptions::SampleUse)
@@ -1330,11 +1379,6 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
// Reduce the size of the IR as much as possible.
MPM.addPass(GlobalOptPass());
- // Module simplification splits coroutines, but does not fully clean up
- // coroutine intrinsics. To ensure ThinLTO optimization passes don't trip up
- // on these, we schedule the cleanup here.
- MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass()));
-
if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
PGOOpt->Action == PGOOptions::SampleUse)
MPM.addPass(PseudoProbeUpdatePass());
@@ -1400,7 +1444,8 @@ ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
Level, ThinOrFullLTOPhase::ThinLTOPostLink));
// Now add the optimization pipeline.
- MPM.addPass(buildModuleOptimizationPipeline(Level));
+ MPM.addPass(buildModuleOptimizationPipeline(
+ Level, ThinOrFullLTOPhase::ThinLTOPostLink));
// Emit annotation remarks.
addAnnotationRemarksPass(MPM);
@@ -1425,6 +1470,9 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
// Convert @llvm.global.annotations to !annotation metadata.
MPM.addPass(Annotation2MetadataPass());
+ for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
+ C(MPM, Level);
+
// Create a function that performs CFI checks for cross-DSO calls with targets
// in the current module.
MPM.addPass(CrossDSOCFIPass());
@@ -1438,6 +1486,9 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
// in ICP.
MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
+ for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
+ C(MPM, Level);
+
// Emit annotation remarks.
addAnnotationRemarksPass(MPM);
@@ -1469,10 +1520,8 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
MPM.addPass(InferFunctionAttrsPass());
if (Level.getSpeedupLevel() > 1) {
- FunctionPassManager EarlyFPM;
- EarlyFPM.addPass(CallSiteSplittingPass());
MPM.addPass(createModuleToFunctionPassAdaptor(
- std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses));
+ CallSiteSplittingPass(), PTO.EagerlyInvalidateAnalyses));
// Indirect call promotion. This should promote all the targets that are
// left by the earlier promotion pass that promotes intra-module targets.
@@ -1519,6 +1568,9 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
// pipeline).
MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
+ for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
+ C(MPM, Level);
+
// Emit annotation remarks.
addAnnotationRemarksPass(MPM);
@@ -1556,7 +1608,11 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
// valuable as the inliner doesn't currently care whether it is inlining an
// invoke or a call.
// Run the inliner now.
- MPM.addPass(ModuleInlinerWrapperPass(getInlineParamsFromOptLevel(Level)));
+ MPM.addPass(ModuleInlinerWrapperPass(
+ getInlineParamsFromOptLevel(Level),
+ /* MandatoryFirst */ true,
+ InlineContext{ThinOrFullLTOPhase::FullLTOPostLink,
+ InlinePass::CGSCCInliner}));
// Optimize globals again after we ran the inliner.
MPM.addPass(GlobalOptPass());
@@ -1573,7 +1629,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
FPM.addPass(InstCombinePass());
invokePeepholeEPCallbacks(FPM, Level);
- FPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true));
+ FPM.addPass(JumpThreadingPass());
// Do a post inline PGO instrumentation and use pass. This is a context
// sensitive PGO pass.
@@ -1581,11 +1637,13 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
/* IsCS */ true, PGOOpt->CSProfileGenFile,
- PGOOpt->ProfileRemappingFile);
+ PGOOpt->ProfileRemappingFile,
+ ThinOrFullLTOPhase::FullLTOPostLink);
else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false,
/* IsCS */ true, PGOOpt->ProfileFile,
- PGOOpt->ProfileRemappingFile);
+ PGOOpt->ProfileRemappingFile,
+ ThinOrFullLTOPhase::FullLTOPostLink);
}
// Break up allocas
@@ -1612,7 +1670,8 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
FunctionPassManager MainFPM;
MainFPM.addPass(createFunctionToLoopPassAdaptor(
- LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
+ LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
+ /*AllowSpeculation=*/true),
/*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
if (RunNewGVN)
@@ -1656,7 +1715,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
createModuleToPostOrderCGSCCPassAdaptor(OpenMPOptCGSCCPass()));
invokePeepholeEPCallbacks(MainFPM, Level);
- MainFPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true));
+ MainFPM.addPass(JumpThreadingPass());
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM),
PTO.EagerlyInvalidateAnalyses));
@@ -1676,8 +1735,9 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
// Add late LTO optimization passes.
// Delete basic blocks, which optimization passes may have killed.
- MPM.addPass(createModuleToFunctionPassAdaptor(
- SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true))));
+ MPM.addPass(createModuleToFunctionPassAdaptor(SimplifyCFGPass(
+ SimplifyCFGOptions().convertSwitchRangeToICmp(true).hoistCommonInsts(
+ true))));
// Drop bodies of available eternally objects to improve GlobalDCE.
MPM.addPass(EliminateAvailableExternallyPass());
@@ -1688,6 +1748,12 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
if (PTO.MergeFunctions)
MPM.addPass(MergeFunctionsPass());
+ if (PTO.CallGraphProfile)
+ MPM.addPass(CGProfilePass());
+
+ for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
+ C(MPM, Level);
+
// Emit annotation remarks.
addAnnotationRemarksPass(MPM);
@@ -1770,6 +1836,10 @@ ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
if (!FPM.isEmpty())
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
}
+
+ for (auto &C : OptimizerEarlyEPCallbacks)
+ C(MPM, Level);
+
if (!VectorizerStartEPCallbacks.empty()) {
FunctionPassManager FPM;
for (auto &C : VectorizerStartEPCallbacks)
@@ -1778,11 +1848,14 @@ ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
}
- MPM.addPass(createModuleToFunctionPassAdaptor(CoroEarlyPass()));
+ ModulePassManager CoroPM;
+ CoroPM.addPass(CoroEarlyPass());
CGSCCPassManager CGPM;
CGPM.addPass(CoroSplitPass());
- MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
- MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass()));
+ CoroPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
+ CoroPM.addPass(CoroCleanupPass());
+ CoroPM.addPass(GlobalDCEPass());
+ MPM.addPass(CoroConditionalWrapper(std::move(CoroPM)));
for (auto &C : OptimizerLastEPCallbacks)
C(MPM, Level);
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 8e0af11b854d..7c29bffbc327 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -26,7 +26,6 @@ MODULE_ANALYSIS("profile-summary", ProfileSummaryAnalysis())
MODULE_ANALYSIS("stack-safety", StackSafetyGlobalAnalysis())
MODULE_ANALYSIS("verify", VerifierAnalysis())
MODULE_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
-MODULE_ANALYSIS("asan-globals-md", ASanGlobalsMetadataAnalysis())
MODULE_ANALYSIS("inline-advisor", InlineAdvisorAnalysis())
MODULE_ANALYSIS("ir-similarity", IRSimilarityAnalysis())
@@ -50,9 +49,12 @@ MODULE_PASS("canonicalize-aliases", CanonicalizeAliasesPass())
MODULE_PASS("cg-profile", CGProfilePass())
MODULE_PASS("check-debugify", NewPMCheckDebugifyPass())
MODULE_PASS("constmerge", ConstantMergePass())
+MODULE_PASS("coro-early", CoroEarlyPass())
+MODULE_PASS("coro-cleanup", CoroCleanupPass())
MODULE_PASS("cross-dso-cfi", CrossDSOCFIPass())
MODULE_PASS("deadargelim", DeadArgumentEliminationPass())
MODULE_PASS("debugify", NewPMDebugifyPass())
+MODULE_PASS("dot-callgraph", CallGraphDOTPrinterPass())
MODULE_PASS("elim-avail-extern", EliminateAvailableExternallyPass())
MODULE_PASS("extract-blocks", BlockExtractorPass())
MODULE_PASS("forceattrs", ForceFunctionAttrsPass())
@@ -64,6 +66,7 @@ MODULE_PASS("globalsplit", GlobalSplitPass())
MODULE_PASS("hotcoldsplit", HotColdSplittingPass())
MODULE_PASS("inferattrs", InferFunctionAttrsPass())
MODULE_PASS("inliner-wrapper", ModuleInlinerWrapperPass())
+MODULE_PASS("inliner-ml-advisor-release", ModuleInlinerWrapperPass(getInlineParams(), true, {}, InliningAdvisorMode::Release, 0))
MODULE_PASS("print<inline-advisor>", InlineAdvisorAnalysisPrinterPass(dbgs()))
MODULE_PASS("inliner-wrapper-no-mandatory-first", ModuleInlinerWrapperPass(
getInlineParams(),
@@ -76,6 +79,7 @@ MODULE_PASS("invalidate<all>", InvalidateAllAnalysesPass())
MODULE_PASS("ipsccp", IPSCCPPass())
MODULE_PASS("iroutliner", IROutlinerPass())
MODULE_PASS("print-ir-similarity", IRSimilarityAnalysisPrinterPass(dbgs()))
+MODULE_PASS("lower-global-dtors", LowerGlobalDtorsPass())
MODULE_PASS("lowertypetests", LowerTypeTestsPass())
MODULE_PASS("metarenamer", MetaRenamerPass())
MODULE_PASS("mergefunc", MergeFunctionsPass())
@@ -94,6 +98,7 @@ MODULE_PASS("print-lcg-dot", LazyCallGraphDOTPrinterPass(dbgs()))
MODULE_PASS("print-must-be-executed-contexts", MustBeExecutedContextPrinterPass(dbgs()))
MODULE_PASS("print-stack-safety", StackSafetyGlobalPrinterPass(dbgs()))
MODULE_PASS("print<module-debuginfo>", ModuleDebugInfoPrinterPass(dbgs()))
+MODULE_PASS("recompute-globalsaa", RecomputeGlobalsAAPass())
MODULE_PASS("rel-lookup-table-converter", RelLookupTableConverterPass())
MODULE_PASS("rewrite-statepoints-for-gc", RewriteStatepointsForGC())
MODULE_PASS("rewrite-symbols", RewriteSymbolPass())
@@ -109,7 +114,9 @@ MODULE_PASS("strip-debug-declare", StripDebugDeclarePass())
MODULE_PASS("strip-nondebug", StripNonDebugSymbolsPass())
MODULE_PASS("strip-nonlinetable-debuginfo", StripNonLineTableDebugInfoPass())
MODULE_PASS("synthetic-counts-propagation", SyntheticCountsPropagation())
+MODULE_PASS("trigger-crash", TriggerCrashPass())
MODULE_PASS("verify", VerifierPass())
+MODULE_PASS("view-callgraph", CallGraphViewerPass())
MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass())
MODULE_PASS("dfsan", DataFlowSanitizerPass())
MODULE_PASS("msan-module", ModuleMemorySanitizerPass({}))
@@ -165,7 +172,6 @@ CGSCC_PASS("invalidate<all>", InvalidateAllAnalysesPass())
CGSCC_PASS("function-attrs", PostOrderFunctionAttrsPass())
CGSCC_PASS("attributor-cgscc", AttributorCGSCCPass())
CGSCC_PASS("openmp-opt-cgscc", OpenMPOptCGSCCPass())
-CGSCC_PASS("coro-split", CoroSplitPass())
CGSCC_PASS("no-op-cgscc", NoOpCGSCCPass())
#undef CGSCC_PASS
@@ -179,6 +185,13 @@ CGSCC_PASS_WITH_PARAMS("inline",
},
parseInlinerPassOptions,
"only-mandatory")
+CGSCC_PASS_WITH_PARAMS("coro-split",
+ "CoroSplitPass",
+ [](bool OptimizeFrame) {
+ return CoroSplitPass(OptimizeFrame);
+ },
+ parseCoroSplitPassOptions,
+ "reuse-storage")
#undef CGSCC_PASS_WITH_PARAMS
#ifndef FUNCTION_ANALYSIS
@@ -247,9 +260,7 @@ FUNCTION_PASS("callsite-splitting", CallSiteSplittingPass())
FUNCTION_PASS("consthoist", ConstantHoistingPass())
FUNCTION_PASS("constraint-elimination", ConstraintEliminationPass())
FUNCTION_PASS("chr", ControlHeightReductionPass())
-FUNCTION_PASS("coro-early", CoroEarlyPass())
FUNCTION_PASS("coro-elide", CoroElidePass())
-FUNCTION_PASS("coro-cleanup", CoroCleanupPass())
FUNCTION_PASS("correlated-propagation", CorrelatedValuePropagationPass())
FUNCTION_PASS("dce", DCEPass())
FUNCTION_PASS("dfa-jump-threading", DFAJumpThreadingPass())
@@ -257,8 +268,14 @@ FUNCTION_PASS("div-rem-pairs", DivRemPairsPass())
FUNCTION_PASS("dse", DSEPass())
FUNCTION_PASS("dot-cfg", CFGPrinterPass())
FUNCTION_PASS("dot-cfg-only", CFGOnlyPrinterPass())
-FUNCTION_PASS("dot-dom", DomTreePrinterPass())
-FUNCTION_PASS("dot-dom-only", DomTreeOnlyPrinterPass())
+FUNCTION_PASS("dot-dom", DomPrinter())
+FUNCTION_PASS("dot-dom-only", DomOnlyPrinter())
+FUNCTION_PASS("dot-post-dom", PostDomPrinter())
+FUNCTION_PASS("dot-post-dom-only", PostDomOnlyPrinter())
+FUNCTION_PASS("view-dom", DomViewer())
+FUNCTION_PASS("view-dom-only", DomOnlyViewer())
+FUNCTION_PASS("view-post-dom", PostDomViewer())
+FUNCTION_PASS("view-post-dom-only", PostDomOnlyViewer())
FUNCTION_PASS("fix-irreducible", FixIrreduciblePass())
FUNCTION_PASS("flattencfg", FlattenCFGPass())
FUNCTION_PASS("make-guards-explicit", MakeGuardsExplicitPass())
@@ -361,6 +378,7 @@ FUNCTION_PASS("verify<safepoint-ir>", SafepointIRVerifierPass())
FUNCTION_PASS("verify<scalar-evolution>", ScalarEvolutionVerifierPass())
FUNCTION_PASS("view-cfg", CFGViewerPass())
FUNCTION_PASS("view-cfg-only", CFGOnlyViewerPass())
+FUNCTION_PASS("tlshoist", TLSVariableHoistPass())
FUNCTION_PASS("transform-warning", WarnMissedTransformationsPass())
FUNCTION_PASS("tsan", ThreadSanitizerPass())
FUNCTION_PASS("memprof", MemProfilerPass())
@@ -402,13 +420,6 @@ FUNCTION_PASS_WITH_PARAMS("loop-unroll",
"no-profile-peeling;profile-peeling;"
"no-runtime;runtime;"
"no-upperbound;upperbound")
-FUNCTION_PASS_WITH_PARAMS("asan",
- "AddressSanitizerPass",
- [](AddressSanitizerOptions Opts) {
- return AddressSanitizerPass(Opts);
- },
- parseASanPassOptions,
- "kernel")
FUNCTION_PASS_WITH_PARAMS("msan",
"MemorySanitizerPass",
[](MemorySanitizerOptions Opts) {
@@ -423,6 +434,7 @@ FUNCTION_PASS_WITH_PARAMS("simplifycfg",
},
parseSimplifyCFGOptions,
"no-forward-switch-cond;forward-switch-cond;"
+ "no-switch-range-to-icmp;switch-range-to-icmp;"
"no-switch-to-lookup;switch-to-lookup;"
"no-keep-loops;keep-loops;"
"no-hoist-common-insts;hoist-common-insts;"
@@ -466,7 +478,6 @@ FUNCTION_PASS_WITH_PARAMS("print<stack-lifetime>",
#ifndef LOOPNEST_PASS
#define LOOPNEST_PASS(NAME, CREATE_PASS)
#endif
-LOOPNEST_PASS("lnicm", LNICMPass())
LOOPNEST_PASS("loop-flatten", LoopFlattenPass())
LOOPNEST_PASS("loop-interchange", LoopInterchangePass())
LOOPNEST_PASS("loop-unroll-and-jam", LoopUnrollAndJamPass())
@@ -489,7 +500,6 @@ LOOP_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
LOOP_PASS("canon-freeze", CanonicalizeFreezeInLoopsPass())
LOOP_PASS("dot-ddg", DDGDotPrinterPass())
LOOP_PASS("invalidate<all>", InvalidateAllAnalysesPass())
-LOOP_PASS("licm", LICMPass())
LOOP_PASS("loop-idiom", LoopIdiomRecognizePass())
LOOP_PASS("loop-instsimplify", LoopInstSimplifyPass())
LOOP_PASS("loop-rotate", LoopRotatePass())
@@ -522,4 +532,18 @@ LOOP_PASS_WITH_PARAMS("simple-loop-unswitch",
},
parseLoopUnswitchOptions,
"nontrivial;no-nontrivial;trivial;no-trivial")
+
+LOOP_PASS_WITH_PARAMS("licm", "LICMPass",
+ [](LICMOptions Params) {
+ return LICMPass(Params);
+ },
+ parseLICMOptions,
+ "allowspeculation");
+
+LOOP_PASS_WITH_PARAMS("lnicm", "LNICMPass",
+ [](LICMOptions Params) {
+ return LNICMPass(Params);
+ },
+ parseLICMOptions,
+ "allowspeculation");
#undef LOOP_PASS_WITH_PARAMS
diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp
index c42b1cb26f13..ab9f8bf9c957 100644
--- a/llvm/lib/Passes/StandardInstrumentations.cpp
+++ b/llvm/lib/Passes/StandardInstrumentations.cpp
@@ -19,6 +19,7 @@
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
@@ -27,12 +28,14 @@
#include "llvm/IR/PrintPasses.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/CrashRecoveryContext.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/Regex.h"
+#include "llvm/Support/Signals.h"
#include "llvm/Support/raw_ostream.h"
#include <unordered_map>
#include <unordered_set>
@@ -164,6 +167,12 @@ static cl::opt<std::string> DotCfgDir(
cl::desc("Generate dot files into specified directory for changed IRs"),
cl::Hidden, cl::init("./"));
+// An option to print the IR that was being processed when a pass crashes.
+static cl::opt<bool>
+ PrintCrashIR("print-on-crash",
+ cl::desc("Print the last form of the IR before crash"),
+ cl::init(false), cl::Hidden);
+
namespace {
// Perform a system based diff between \p Before and \p After, using
@@ -439,19 +448,11 @@ const Module *getModuleForComparison(Any IR) {
return nullptr;
}
-} // namespace
-
-template <typename T> ChangeReporter<T>::~ChangeReporter() {
- assert(BeforeStack.empty() && "Problem with Change Printer stack.");
-}
-
-template <typename T>
-bool ChangeReporter<T>::isInterestingFunction(const Function &F) {
+bool isInterestingFunction(const Function &F) {
return isFunctionInPrintList(F.getName());
}
-template <typename T>
-bool ChangeReporter<T>::isInterestingPass(StringRef PassID) {
+bool isInterestingPass(StringRef PassID) {
if (isIgnored(PassID))
return false;
@@ -462,8 +463,7 @@ bool ChangeReporter<T>::isInterestingPass(StringRef PassID) {
// Return true when this is a pass on IR for which printing
// of changes is desired.
-template <typename T>
-bool ChangeReporter<T>::isInteresting(Any IR, StringRef PassID) {
+bool isInteresting(Any IR, StringRef PassID) {
if (!isInterestingPass(PassID))
return false;
if (any_isa<const Function *>(IR))
@@ -471,6 +471,12 @@ bool ChangeReporter<T>::isInteresting(Any IR, StringRef PassID) {
return true;
}
+} // namespace
+
+template <typename T> ChangeReporter<T>::~ChangeReporter() {
+ assert(BeforeStack.empty() && "Problem with Change Printer stack.");
+}
+
template <typename T>
void ChangeReporter<T>::saveIRBeforePass(Any IR, StringRef PassID) {
// Always need to place something on the stack because invalidated passes
@@ -587,7 +593,7 @@ void TextChangeReporter<T>::handleIgnored(StringRef PassID, std::string &Name) {
Out << formatv("*** IR Pass {0} on {1} ignored ***\n", PassID, Name);
}
-IRChangedPrinter::~IRChangedPrinter() {}
+IRChangedPrinter::~IRChangedPrinter() = default;
void IRChangedPrinter::registerCallbacks(PassInstrumentationCallbacks &PIC) {
if (PrintChanged == ChangePrinter::PrintChangedVerbose ||
@@ -1186,7 +1192,7 @@ void VerifyInstrumentation::registerCallbacks(
if (DebugLogging)
dbgs() << "Verifying function " << F->getName() << "\n";
- if (verifyFunction(*F))
+ if (verifyFunction(*F, &errs()))
report_fatal_error("Broken function found, compilation aborted!");
} else if (any_isa<const Module *>(IR) ||
any_isa<const LazyCallGraph::SCC *>(IR)) {
@@ -1201,13 +1207,13 @@ void VerifyInstrumentation::registerCallbacks(
if (DebugLogging)
dbgs() << "Verifying module " << M->getName() << "\n";
- if (verifyModule(*M))
+ if (verifyModule(*M, &errs()))
report_fatal_error("Broken module found, compilation aborted!");
}
});
}
-InLineChangePrinter::~InLineChangePrinter() {}
+InLineChangePrinter::~InLineChangePrinter() = default;
void InLineChangePrinter::generateIRRepresentation(Any IR, StringRef PassID,
IRDataT<EmptyData> &D) {
@@ -2117,6 +2123,51 @@ StandardInstrumentations::StandardInstrumentations(
ChangePrinter::PrintChangedDotCfgVerbose),
Verify(DebugLogging), VerifyEach(VerifyEach) {}
+PrintCrashIRInstrumentation *PrintCrashIRInstrumentation::CrashReporter =
+ nullptr;
+
+void PrintCrashIRInstrumentation::reportCrashIR() { dbgs() << SavedIR; }
+
+void PrintCrashIRInstrumentation::SignalHandler(void *) {
+ // Called by signal handlers so do not lock here
+ // Is the PrintCrashIRInstrumentation still alive?
+ if (!CrashReporter)
+ return;
+
+ assert(PrintCrashIR && "Did not expect to get here without option set.");
+ CrashReporter->reportCrashIR();
+}
+
+PrintCrashIRInstrumentation::~PrintCrashIRInstrumentation() {
+ if (!CrashReporter)
+ return;
+
+ assert(PrintCrashIR && "Did not expect to get here without option set.");
+ CrashReporter = nullptr;
+}
+
+void PrintCrashIRInstrumentation::registerCallbacks(
+ PassInstrumentationCallbacks &PIC) {
+ if (!PrintCrashIR || CrashReporter)
+ return;
+
+ sys::AddSignalHandler(SignalHandler, nullptr);
+ CrashReporter = this;
+
+ PIC.registerBeforeNonSkippedPassCallback([this](StringRef PassID, Any IR) {
+ SavedIR.clear();
+ raw_string_ostream OS(SavedIR);
+ OS << formatv("*** Dump of {0}IR Before Last Pass {1}",
+ llvm::forcePrintModuleIR() ? "Module " : "", PassID);
+ if (!isInteresting(IR, PassID)) {
+ OS << " Filtered Out ***\n";
+ return;
+ }
+ OS << " Started ***\n";
+ unwrapAndPrint(OS, IR);
+ });
+}
+
void StandardInstrumentations::registerCallbacks(
PassInstrumentationCallbacks &PIC, FunctionAnalysisManager *FAM) {
PrintIR.registerCallbacks(PIC);
@@ -2132,6 +2183,7 @@ void StandardInstrumentations::registerCallbacks(
Verify.registerCallbacks(PIC);
PrintChangedDiff.registerCallbacks(PIC);
WebsiteChangeReporter.registerCallbacks(PIC);
+ PrintCrashIR.registerCallbacks(PIC);
}
template class ChangeReporter<std::string>;
diff --git a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
index 94c2bee3590c..f9e58fd6afa5 100644
--- a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
+++ b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
@@ -123,13 +123,15 @@ Counter CounterExpressionBuilder::simplify(Counter ExpressionTree) {
return C;
}
-Counter CounterExpressionBuilder::add(Counter LHS, Counter RHS) {
- return simplify(get(CounterExpression(CounterExpression::Add, LHS, RHS)));
+Counter CounterExpressionBuilder::add(Counter LHS, Counter RHS, bool Simplify) {
+ auto Cnt = get(CounterExpression(CounterExpression::Add, LHS, RHS));
+ return Simplify ? simplify(Cnt) : Cnt;
}
-Counter CounterExpressionBuilder::subtract(Counter LHS, Counter RHS) {
- return simplify(
- get(CounterExpression(CounterExpression::Subtract, LHS, RHS)));
+Counter CounterExpressionBuilder::subtract(Counter LHS, Counter RHS,
+ bool Simplify) {
+ auto Cnt = get(CounterExpression(CounterExpression::Subtract, LHS, RHS));
+ return Simplify ? simplify(Cnt) : Cnt;
}
void CounterMappingContext::dump(const Counter &C, raw_ostream &OS) const {
diff --git a/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp b/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp
index c6691e321b3c..1a187795a8a0 100644
--- a/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp
+++ b/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp
@@ -19,6 +19,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/Object/Archive.h"
#include "llvm/Object/Binary.h"
#include "llvm/Object/COFF.h"
#include "llvm/Object/Error.h"
@@ -174,7 +175,8 @@ Error RawCoverageFilenamesReader::readUncompressed(CovMapVersion Version,
else
P.assign(CWD);
llvm::sys::path::append(P, Filename);
- Filenames.push_back(static_cast<std::string>(P));
+ sys::path::remove_dots(P, /*remove_dot_dot=*/true);
+ Filenames.push_back(static_cast<std::string>(P.str()));
}
}
}
diff --git a/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp b/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp
index ceb2d7dcb5b9..781a2901dbb9 100644
--- a/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp
+++ b/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp
@@ -49,12 +49,8 @@ void CoverageFilenamesSectionWriter::write(raw_ostream &OS, bool Compress) {
SmallString<128> CompressedStr;
bool doCompression =
Compress && zlib::isAvailable() && DoInstrProfNameCompression;
- if (doCompression) {
- auto E =
- zlib::compress(FilenamesStr, CompressedStr, zlib::BestSizeCompression);
- if (E)
- report_bad_alloc_error("Failed to zlib compress coverage data");
- }
+ if (doCompression)
+ zlib::compress(FilenamesStr, CompressedStr, zlib::BestSizeCompression);
// ::= <num-filenames>
// <uncompressed-len>
diff --git a/llvm/lib/ProfileData/GCOV.cpp b/llvm/lib/ProfileData/GCOV.cpp
index 72d1addab01e..feacf40b8d0a 100644
--- a/llvm/lib/ProfileData/GCOV.cpp
+++ b/llvm/lib/ProfileData/GCOV.cpp
@@ -13,6 +13,7 @@
#include "llvm/ProfileData/GCOV.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Demangle/Demangle.h"
#include "llvm/Support/Debug.h"
@@ -23,7 +24,6 @@
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <system_error>
-#include <unordered_map>
using namespace llvm;
@@ -663,6 +663,8 @@ void Context::collectFunction(GCOVFunction &f, Summary &summary) {
if (f.startLine >= si.startLineToFunctions.size())
si.startLineToFunctions.resize(f.startLine + 1);
si.startLineToFunctions[f.startLine].push_back(&f);
+ SmallSet<uint32_t, 16> lines;
+ SmallSet<uint32_t, 16> linesExec;
for (const GCOVBlock &b : f.blocksRange()) {
if (b.lines.empty())
continue;
@@ -671,9 +673,9 @@ void Context::collectFunction(GCOVFunction &f, Summary &summary) {
si.lines.resize(maxLineNum + 1);
for (uint32_t lineNum : b.lines) {
LineInfo &line = si.lines[lineNum];
- if (!line.exists)
+ if (lines.insert(lineNum).second)
++summary.lines;
- if (line.count == 0 && b.count)
+ if (b.count && linesExec.insert(lineNum).second)
++summary.linesExec;
line.exists = true;
line.count += b.count;
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 07d467305ae5..48ac5ce0d607 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -51,6 +51,7 @@
#include <memory>
#include <string>
#include <system_error>
+#include <type_traits>
#include <utility>
#include <vector>
@@ -466,12 +467,8 @@ Error collectPGOFuncNameStrings(ArrayRef<std::string> NameStrs,
}
SmallString<128> CompressedNameStrings;
- Error E = zlib::compress(StringRef(UncompressedNameStrings),
- CompressedNameStrings, zlib::BestSizeCompression);
- if (E) {
- consumeError(std::move(E));
- return make_error<InstrProfError>(instrprof_error::compress_failed);
- }
+ zlib::compress(StringRef(UncompressedNameStrings), CompressedNameStrings,
+ zlib::BestSizeCompression);
return WriteStringToResult(CompressedNameStrings.size(),
CompressedNameStrings);
@@ -1311,4 +1308,76 @@ void OverlapStats::dump(raw_fd_ostream &OS) const {
}
}
+namespace IndexedInstrProf {
+// A C++14 compatible version of the offsetof macro.
+template <typename T1, typename T2>
+inline size_t constexpr offsetOf(T1 T2::*Member) {
+ constexpr T2 Object{};
+ return size_t(&(Object.*Member)) - size_t(&Object);
+}
+
+static inline uint64_t read(const unsigned char *Buffer, size_t Offset) {
+ return *reinterpret_cast<const uint64_t *>(Buffer + Offset);
+}
+
+uint64_t Header::formatVersion() const {
+ using namespace support;
+ return endian::byte_swap<uint64_t, little>(Version);
+}
+
+Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
+ using namespace support;
+ static_assert(std::is_standard_layout<Header>::value,
+ "The header should be standard layout type since we use offset "
+ "of fields to read.");
+ Header H;
+
+ H.Magic = read(Buffer, offsetOf(&Header::Magic));
+ // Check the magic number.
+ uint64_t Magic = endian::byte_swap<uint64_t, little>(H.Magic);
+ if (Magic != IndexedInstrProf::Magic)
+ return make_error<InstrProfError>(instrprof_error::bad_magic);
+
+ // Read the version.
+ H.Version = read(Buffer, offsetOf(&Header::Version));
+ if (GET_VERSION(H.formatVersion()) >
+ IndexedInstrProf::ProfVersion::CurrentVersion)
+ return make_error<InstrProfError>(instrprof_error::unsupported_version);
+
+ switch (GET_VERSION(H.formatVersion())) {
+ // When a new field is added in the header add a case statement here to
+ // populate it.
+ static_assert(
+ IndexedInstrProf::ProfVersion::CurrentVersion == Version8,
+ "Please update the reading code below if a new field has been added, "
+ "if not add a case statement to fall through to the latest version.");
+ case 8ull:
+ H.MemProfOffset = read(Buffer, offsetOf(&Header::MemProfOffset));
+ LLVM_FALLTHROUGH;
+ default: // Version7 (when the backwards compatible header was introduced).
+ H.HashType = read(Buffer, offsetOf(&Header::HashType));
+ H.HashOffset = read(Buffer, offsetOf(&Header::HashOffset));
+ }
+
+ return H;
+}
+
+size_t Header::size() const {
+ switch (GET_VERSION(formatVersion())) {
+ // When a new field is added to the header add a case statement here to
+ // compute the size as offset of the new field + size of the new field. This
+ // relies on the field being added to the end of the list.
+ static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version8,
+ "Please update the size computation below if a new field has "
+ "been added to the header, if not add a case statement to "
+ "fall through to the latest version.");
+ case 8ull:
+ return offsetOf(&Header::MemProfOffset) + sizeof(Header::MemProfOffset);
+ default: // Version7 (when the backwards compatible header was introduced).
+ return offsetOf(&Header::HashOffset) + sizeof(Header::HashOffset);
+ }
+}
+
+} // namespace IndexedInstrProf
+
} // end namespace llvm
diff --git a/llvm/lib/ProfileData/InstrProfCorrelator.cpp b/llvm/lib/ProfileData/InstrProfCorrelator.cpp
index 8e38a6869d07..4b8212c546f7 100644
--- a/llvm/lib/ProfileData/InstrProfCorrelator.cpp
+++ b/llvm/lib/ProfileData/InstrProfCorrelator.cpp
@@ -7,10 +7,15 @@
//===----------------------------------------------------------------------===//
#include "llvm/ProfileData/InstrProfCorrelator.h"
+#include "llvm/DebugInfo/DIContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFDie.h"
+#include "llvm/DebugInfo/DWARF/DWARFExpression.h"
+#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
+#include "llvm/DebugInfo/DWARF/DWARFLocationExpression.h"
+#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
#include "llvm/Object/MachO.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/Path.h"
#define DEBUG_TYPE "correlator"
@@ -279,7 +284,7 @@ void DwarfInstrProfCorrelator<IntPtrT>::correlateProfileDataImpl() {
LLVM_DEBUG(Die.dump(dbgs()));
}
this->addProbe(*FunctionName, *CFGHash, *CounterPtr - CountersStart,
- FunctionPtr.getValueOr(0), *NumCounters);
+ FunctionPtr.value_or(0), *NumCounters);
};
for (auto &CU : DICtx->normal_units())
for (const auto &Entry : CU->dies())
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index 138b1532d778..ee8989979a26 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -14,11 +14,11 @@
#include "llvm/ProfileData/InstrProfReader.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/ProfileSummary.h"
#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/ProfileData/MemProf.h"
#include "llvm/ProfileData/ProfileCommon.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
@@ -27,7 +27,6 @@
#include "llvm/Support/SwapByteOrder.h"
#include "llvm/Support/SymbolRemappingReader.h"
#include <algorithm>
-#include <cctype>
#include <cstddef>
#include <cstdint>
#include <limits>
@@ -43,13 +42,13 @@ using namespace llvm;
static InstrProfKind getProfileKindFromVersion(uint64_t Version) {
InstrProfKind ProfileKind = InstrProfKind::Unknown;
if (Version & VARIANT_MASK_IR_PROF) {
- ProfileKind |= InstrProfKind::IR;
+ ProfileKind |= InstrProfKind::IRInstrumentation;
}
if (Version & VARIANT_MASK_CSIR_PROF) {
- ProfileKind |= InstrProfKind::CS;
+ ProfileKind |= InstrProfKind::ContextSensitive;
}
if (Version & VARIANT_MASK_INSTR_ENTRY) {
- ProfileKind |= InstrProfKind::BB;
+ ProfileKind |= InstrProfKind::FunctionEntryInstrumentation;
}
if (Version & VARIANT_MASK_BYTE_COVERAGE) {
ProfileKind |= InstrProfKind::SingleByteCoverage;
@@ -57,6 +56,9 @@ static InstrProfKind getProfileKindFromVersion(uint64_t Version) {
if (Version & VARIANT_MASK_FUNCTION_ENTRY_ONLY) {
ProfileKind |= InstrProfKind::FunctionEntryOnly;
}
+ if (Version & VARIANT_MASK_MEMPROF) {
+ ProfileKind |= InstrProfKind::MemProf;
+ }
return ProfileKind;
}
@@ -153,14 +155,6 @@ IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
return std::move(Result);
}
-void InstrProfIterator::Increment() {
- if (auto E = Reader->readNextRecord(Record)) {
- // Handle errors in the reader.
- InstrProfError::take(std::move(E));
- *this = InstrProfIterator();
- }
-}
-
bool TextInstrProfReader::hasFormat(const MemoryBuffer &Buffer) {
// Verify that this really looks like plain ASCII text by checking a
// 'reasonable' number of characters (up to profile magic size).
@@ -180,16 +174,16 @@ Error TextInstrProfReader::readHeader() {
while (Line->startswith(":")) {
StringRef Str = Line->substr(1);
if (Str.equals_insensitive("ir"))
- ProfileKind |= InstrProfKind::IR;
+ ProfileKind |= InstrProfKind::IRInstrumentation;
else if (Str.equals_insensitive("fe"))
- ProfileKind |= InstrProfKind::FE;
+ ProfileKind |= InstrProfKind::FrontendInstrumentation;
else if (Str.equals_insensitive("csir")) {
- ProfileKind |= InstrProfKind::IR;
- ProfileKind |= InstrProfKind::CS;
+ ProfileKind |= InstrProfKind::IRInstrumentation;
+ ProfileKind |= InstrProfKind::ContextSensitive;
} else if (Str.equals_insensitive("entry_first"))
- ProfileKind |= InstrProfKind::BB;
+ ProfileKind |= InstrProfKind::FunctionEntryInstrumentation;
else if (Str.equals_insensitive("not_entry_first"))
- ProfileKind &= ~InstrProfKind::BB;
+ ProfileKind &= ~InstrProfKind::FunctionEntryInstrumentation;
else
return error(instrprof_error::bad_header);
++Line;
@@ -454,7 +448,7 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
return error(instrprof_error::bad_header);
std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>();
- if (Error E = createSymtab(*NewSymtab.get()))
+ if (Error E = createSymtab(*NewSymtab))
return E;
Symtab = std::move(NewSymtab);
@@ -942,24 +936,17 @@ Error IndexedInstrProfReader::readHeader() {
if ((const unsigned char *)DataBuffer->getBufferEnd() - Cur < 24)
return error(instrprof_error::truncated);
- auto *Header = reinterpret_cast<const IndexedInstrProf::Header *>(Cur);
- Cur += sizeof(IndexedInstrProf::Header);
+ auto HeaderOr = IndexedInstrProf::Header::readFromBuffer(Start);
+ if (!HeaderOr)
+ return HeaderOr.takeError();
- // Check the magic number.
- uint64_t Magic = endian::byte_swap<uint64_t, little>(Header->Magic);
- if (Magic != IndexedInstrProf::Magic)
- return error(instrprof_error::bad_magic);
-
- // Read the version.
- uint64_t FormatVersion = endian::byte_swap<uint64_t, little>(Header->Version);
- if (GET_VERSION(FormatVersion) >
- IndexedInstrProf::ProfVersion::CurrentVersion)
- return error(instrprof_error::unsupported_version);
+ const IndexedInstrProf::Header *Header = &HeaderOr.get();
+ Cur += Header->size();
- Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur,
+ Cur = readSummary((IndexedInstrProf::ProfVersion)Header->formatVersion(), Cur,
/* UseCS */ false);
- if (FormatVersion & VARIANT_MASK_CSIR_PROF)
- Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur,
+ if (Header->formatVersion() & VARIANT_MASK_CSIR_PROF)
+ Cur = readSummary((IndexedInstrProf::ProfVersion)Header->formatVersion(), Cur,
/* UseCS */ true);
// Read the hash type and start offset.
@@ -970,10 +957,46 @@ Error IndexedInstrProfReader::readHeader() {
uint64_t HashOffset = endian::byte_swap<uint64_t, little>(Header->HashOffset);
- // The rest of the file is an on disk hash table.
- auto IndexPtr =
- std::make_unique<InstrProfReaderIndex<OnDiskHashTableImplV3>>(
- Start + HashOffset, Cur, Start, HashType, FormatVersion);
+ // The hash table with profile counts comes next.
+ auto IndexPtr = std::make_unique<InstrProfReaderIndex<OnDiskHashTableImplV3>>(
+ Start + HashOffset, Cur, Start, HashType, Header->formatVersion());
+
+ // The MemProfOffset field in the header is only valid when the format version
+ // is higher than 8 (when it was introduced).
+ if (GET_VERSION(Header->formatVersion()) >= 8 &&
+ Header->formatVersion() & VARIANT_MASK_MEMPROF) {
+ uint64_t MemProfOffset =
+ endian::byte_swap<uint64_t, little>(Header->MemProfOffset);
+
+ const unsigned char *Ptr = Start + MemProfOffset;
+ // The value returned from RecordTableGenerator.Emit.
+ const uint64_t RecordTableOffset =
+ support::endian::readNext<uint64_t, little, unaligned>(Ptr);
+ // The offset in the stream right before invoking FrameTableGenerator.Emit.
+ const uint64_t FramePayloadOffset =
+ support::endian::readNext<uint64_t, little, unaligned>(Ptr);
+ // The value returned from FrameTableGenerator.Emit.
+ const uint64_t FrameTableOffset =
+ support::endian::readNext<uint64_t, little, unaligned>(Ptr);
+
+ // Read the schema.
+ auto SchemaOr = memprof::readMemProfSchema(Ptr);
+ if (!SchemaOr)
+ return SchemaOr.takeError();
+ Schema = SchemaOr.get();
+
+ // Now initialize the table reader with a pointer into data buffer.
+ MemProfRecordTable.reset(MemProfRecordHashTable::Create(
+ /*Buckets=*/Start + RecordTableOffset,
+ /*Payload=*/Ptr,
+ /*Base=*/Start, memprof::RecordLookupTrait(Schema)));
+
+ // Initialize the frame table reader with the payload and bucket offsets.
+ MemProfFrameTable.reset(MemProfFrameHashTable::Create(
+ /*Buckets=*/Start + FrameTableOffset,
+ /*Payload=*/Start + FramePayloadOffset,
+ /*Base=*/Start, memprof::FrameLookupTrait()));
+ }
// Load the remapping table now if requested.
if (RemappingBuffer) {
@@ -991,16 +1014,16 @@ Error IndexedInstrProfReader::readHeader() {
}
InstrProfSymtab &IndexedInstrProfReader::getSymtab() {
- if (Symtab.get())
- return *Symtab.get();
+ if (Symtab)
+ return *Symtab;
std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>();
- if (Error E = Index->populateSymtab(*NewSymtab.get())) {
+ if (Error E = Index->populateSymtab(*NewSymtab)) {
consumeError(error(InstrProfError::take(std::move(E))));
}
Symtab = std::move(NewSymtab);
- return *Symtab.get();
+ return *Symtab;
}
Expected<InstrProfRecord>
@@ -1019,6 +1042,43 @@ IndexedInstrProfReader::getInstrProfRecord(StringRef FuncName,
return error(instrprof_error::hash_mismatch);
}
+Expected<memprof::MemProfRecord>
+IndexedInstrProfReader::getMemProfRecord(const uint64_t FuncNameHash) {
+ // TODO: Add memprof specific errors.
+ if (MemProfRecordTable == nullptr)
+ return make_error<InstrProfError>(instrprof_error::invalid_prof,
+ "no memprof data available in profile");
+ auto Iter = MemProfRecordTable->find(FuncNameHash);
+ if (Iter == MemProfRecordTable->end())
+ return make_error<InstrProfError>(
+ instrprof_error::unknown_function,
+ "memprof record not found for function hash " + Twine(FuncNameHash));
+
+ // Setup a callback to convert from frame ids to frame using the on-disk
+ // FrameData hash table.
+ memprof::FrameId LastUnmappedFrameId = 0;
+ bool HasFrameMappingError = false;
+ auto IdToFrameCallback = [&](const memprof::FrameId Id) {
+ auto FrIter = MemProfFrameTable->find(Id);
+ if (FrIter == MemProfFrameTable->end()) {
+ LastUnmappedFrameId = Id;
+ HasFrameMappingError = true;
+ return memprof::Frame(0, 0, 0, false);
+ }
+ return *FrIter;
+ };
+
+ memprof::MemProfRecord Record(*Iter, IdToFrameCallback);
+
+ // Check that all frame ids were successfully converted to frames.
+ if (HasFrameMappingError) {
+ return make_error<InstrProfError>(instrprof_error::hash_mismatch,
+ "memprof frame not found for frame id " +
+ Twine(LastUnmappedFrameId));
+ }
+ return Record;
+}
+
Error IndexedInstrProfReader::getFunctionCounts(StringRef FuncName,
uint64_t FuncHash,
std::vector<uint64_t> &Counts) {
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 8ded1c0426e5..cd4e8900c963 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/ProfileSummary.h"
#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/ProfileData/MemProf.h"
#include "llvm/ProfileData/ProfileCommon.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/EndianStream.h"
@@ -23,7 +24,6 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/OnDiskHashTable.h"
#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
#include <cstdint>
#include <memory>
#include <string>
@@ -32,7 +32,6 @@
#include <vector>
using namespace llvm;
-extern cl::opt<bool> DebugInfoCorrelate;
// A struct to define how the data stream should be patched. For Indexed
// profiling, only uint64_t data type is needed.
@@ -64,11 +63,16 @@ public:
if (IsFDOStream) {
raw_fd_ostream &FDOStream = static_cast<raw_fd_ostream &>(OS);
+ const uint64_t LastPos = FDOStream.tell();
for (int K = 0; K < NItems; K++) {
FDOStream.seek(P[K].Pos);
for (int I = 0; I < P[K].N; I++)
write(P[K].D[I]);
}
+ // Reset the stream to the last position after patching so that users
+ // don't accidentally overwrite data. This makes it consistent with
+ // the string stream below which replaces the data directly.
+ FDOStream.seek(LastPos);
} else {
raw_string_ostream &SOStream = static_cast<raw_string_ostream &>(OS);
std::string &Data = SOStream.str(); // with flush
@@ -249,11 +253,51 @@ void InstrProfWriter::addRecord(StringRef Name, uint64_t Hash,
Dest.sortValueData();
}
+void InstrProfWriter::addMemProfRecord(
+ const Function::GUID Id, const memprof::IndexedMemProfRecord &Record) {
+ auto Result = MemProfRecordData.insert({Id, Record});
+ // If we inserted a new record then we are done.
+ if (Result.second) {
+ return;
+ }
+ memprof::IndexedMemProfRecord &Existing = Result.first->second;
+ Existing.merge(Record);
+}
+
+bool InstrProfWriter::addMemProfFrame(const memprof::FrameId Id,
+ const memprof::Frame &Frame,
+ function_ref<void(Error)> Warn) {
+ auto Result = MemProfFrameData.insert({Id, Frame});
+ // If a mapping already exists for the current frame id and it does not
+ // match the new mapping provided then reset the existing contents and bail
+ // out. We don't support the merging of memprof data whose Frame -> Id
+ // mapping across profiles is inconsistent.
+ if (!Result.second && Result.first->second != Frame) {
+ Warn(make_error<InstrProfError>(instrprof_error::malformed,
+ "frame to id mapping mismatch"));
+ return false;
+ }
+ return true;
+}
+
void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW,
function_ref<void(Error)> Warn) {
for (auto &I : IPW.FunctionData)
for (auto &Func : I.getValue())
addRecord(I.getKey(), Func.first, std::move(Func.second), 1, Warn);
+
+ MemProfFrameData.reserve(IPW.MemProfFrameData.size());
+ for (auto &I : IPW.MemProfFrameData) {
+ // If we weren't able to add the frame mappings then it doesn't make sense
+ // to try to merge the records from this profile.
+ if (!addMemProfFrame(I.first, I.second, Warn))
+ return;
+ }
+
+ MemProfRecordData.reserve(IPW.MemProfRecordData.size());
+ for (auto &I : IPW.MemProfRecordData) {
+ addMemProfRecord(I.first, I.second);
+ }
}
bool InstrProfWriter::shouldEncodeData(const ProfilingData &PD) {
@@ -298,30 +342,34 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
for (const auto &I : FunctionData)
if (shouldEncodeData(I.getValue()))
Generator.insert(I.getKey(), &I.getValue());
+
// Write the header.
IndexedInstrProf::Header Header;
Header.Magic = IndexedInstrProf::Magic;
Header.Version = IndexedInstrProf::ProfVersion::CurrentVersion;
- if (static_cast<bool>(ProfileKind & InstrProfKind::IR))
+ if (static_cast<bool>(ProfileKind & InstrProfKind::IRInstrumentation))
Header.Version |= VARIANT_MASK_IR_PROF;
- if (static_cast<bool>(ProfileKind & InstrProfKind::CS))
+ if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive))
Header.Version |= VARIANT_MASK_CSIR_PROF;
- if (static_cast<bool>(ProfileKind & InstrProfKind::BB))
+ if (static_cast<bool>(ProfileKind &
+ InstrProfKind::FunctionEntryInstrumentation))
Header.Version |= VARIANT_MASK_INSTR_ENTRY;
if (static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage))
Header.Version |= VARIANT_MASK_BYTE_COVERAGE;
if (static_cast<bool>(ProfileKind & InstrProfKind::FunctionEntryOnly))
Header.Version |= VARIANT_MASK_FUNCTION_ENTRY_ONLY;
+ if (static_cast<bool>(ProfileKind & InstrProfKind::MemProf))
+ Header.Version |= VARIANT_MASK_MEMPROF;
Header.Unused = 0;
Header.HashType = static_cast<uint64_t>(IndexedInstrProf::HashType);
Header.HashOffset = 0;
+ Header.MemProfOffset = 0;
int N = sizeof(IndexedInstrProf::Header) / sizeof(uint64_t);
- // Only write out all the fields except 'HashOffset'. We need
- // to remember the offset of that field to allow back patching
- // later.
- for (int I = 0; I < N - 1; I++)
+ // Only write out all the fields except 'HashOffset' and 'MemProfOffset'. We
+ // need to remember the offset of these fields to allow back patching later.
+ for (int I = 0; I < N - 2; I++)
OS.write(reinterpret_cast<uint64_t *>(&Header)[I]);
// Save the location of Header.HashOffset field in \c OS.
@@ -329,6 +377,13 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
// Reserve the space for HashOffset field.
OS.write(0);
+ // Save the location of MemProf profile data. This is stored in two parts as
+ // the schema and as a separate on-disk chained hashtable.
+ uint64_t MemProfSectionOffset = OS.tell();
+ // Reserve space for the MemProf table field to be patched later if this
+ // profile contains memory profile information.
+ OS.write(0);
+
// Reserve space to write profile summary data.
uint32_t NumEntries = ProfileSummaryBuilder::DefaultCutoffs.size();
uint32_t SummarySize = Summary::getSize(Summary::NumKinds, NumEntries);
@@ -338,7 +393,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
OS.write(0);
uint64_t CSSummaryOffset = 0;
uint64_t CSSummarySize = 0;
- if (static_cast<bool>(ProfileKind & InstrProfKind::CS)) {
+ if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive)) {
CSSummaryOffset = OS.tell();
CSSummarySize = SummarySize / sizeof(uint64_t);
for (unsigned I = 0; I < CSSummarySize; I++)
@@ -348,6 +403,63 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
// Write the hash table.
uint64_t HashTableStart = Generator.Emit(OS.OS, *InfoObj);
+ // Write the MemProf profile data if we have it. This includes a simple schema
+ // with the format described below followed by the hashtable:
+ // uint64_t RecordTableOffset = RecordTableGenerator.Emit
+ // uint64_t FramePayloadOffset = Stream offset before emitting the frame table
+ // uint64_t FrameTableOffset = FrameTableGenerator.Emit
+ // uint64_t Num schema entries
+ // uint64_t Schema entry 0
+ // uint64_t Schema entry 1
+ // ....
+ // uint64_t Schema entry N - 1
+ // OnDiskChainedHashTable MemProfRecordData
+ // OnDiskChainedHashTable MemProfFrameData
+ uint64_t MemProfSectionStart = 0;
+ if (static_cast<bool>(ProfileKind & InstrProfKind::MemProf)) {
+ MemProfSectionStart = OS.tell();
+ OS.write(0ULL); // Reserve space for the memprof record table offset.
+ OS.write(0ULL); // Reserve space for the memprof frame payload offset.
+ OS.write(0ULL); // Reserve space for the memprof frame table offset.
+
+ auto Schema = memprof::PortableMemInfoBlock::getSchema();
+ OS.write(static_cast<uint64_t>(Schema.size()));
+ for (const auto Id : Schema) {
+ OS.write(static_cast<uint64_t>(Id));
+ }
+
+ auto RecordWriter = std::make_unique<memprof::RecordWriterTrait>();
+ RecordWriter->Schema = &Schema;
+ OnDiskChainedHashTableGenerator<memprof::RecordWriterTrait>
+ RecordTableGenerator;
+ for (auto &I : MemProfRecordData) {
+ // Insert the key (func hash) and value (memprof record).
+ RecordTableGenerator.insert(I.first, I.second);
+ }
+
+ uint64_t RecordTableOffset =
+ RecordTableGenerator.Emit(OS.OS, *RecordWriter);
+
+ uint64_t FramePayloadOffset = OS.tell();
+
+ auto FrameWriter = std::make_unique<memprof::FrameWriterTrait>();
+ OnDiskChainedHashTableGenerator<memprof::FrameWriterTrait>
+ FrameTableGenerator;
+ for (auto &I : MemProfFrameData) {
+ // Insert the key (frame id) and value (frame contents).
+ FrameTableGenerator.insert(I.first, I.second);
+ }
+
+ uint64_t FrameTableOffset = FrameTableGenerator.Emit(OS.OS, *FrameWriter);
+
+ PatchItem PatchItems[] = {
+ {MemProfSectionStart, &RecordTableOffset, 1},
+ {MemProfSectionStart + sizeof(uint64_t), &FramePayloadOffset, 1},
+ {MemProfSectionStart + 2 * sizeof(uint64_t), &FrameTableOffset, 1},
+ };
+ OS.patch(PatchItems, 3);
+ }
+
// Allocate space for data to be serialized out.
std::unique_ptr<IndexedInstrProf::Summary> TheSummary =
IndexedInstrProf::allocSummary(SummarySize);
@@ -359,7 +471,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
// For Context Sensitive summary.
std::unique_ptr<IndexedInstrProf::Summary> TheCSSummary = nullptr;
- if (static_cast<bool>(ProfileKind & InstrProfKind::CS)) {
+ if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive)) {
TheCSSummary = IndexedInstrProf::allocSummary(SummarySize);
std::unique_ptr<ProfileSummary> CSPS = CSISB.getSummary();
setSummary(TheCSSummary.get(), *CSPS);
@@ -370,6 +482,8 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
PatchItem PatchItems[] = {
// Patch the Header.HashOffset field.
{HashTableStartFieldOffset, &HashTableStart, 1},
+ // Patch the Header.MemProfOffset (=0 for profiles without MemProf data).
+ {MemProfSectionOffset, &MemProfSectionStart, 1},
// Patch the summary data.
{SummaryOffset, reinterpret_cast<uint64_t *>(TheSummary.get()),
(int)(SummarySize / sizeof(uint64_t))},
@@ -472,12 +586,13 @@ void InstrProfWriter::writeRecordInText(StringRef Name, uint64_t Hash,
Error InstrProfWriter::writeText(raw_fd_ostream &OS) {
// Check CS first since it implies an IR level profile.
- if (static_cast<bool>(ProfileKind & InstrProfKind::CS))
+ if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive))
OS << "# CSIR level Instrumentation Flag\n:csir\n";
- else if (static_cast<bool>(ProfileKind & InstrProfKind::IR))
+ else if (static_cast<bool>(ProfileKind & InstrProfKind::IRInstrumentation))
OS << "# IR level Instrumentation Flag\n:ir\n";
- if (static_cast<bool>(ProfileKind & InstrProfKind::BB))
+ if (static_cast<bool>(ProfileKind &
+ InstrProfKind::FunctionEntryInstrumentation))
OS << "# Always instrument the function entry block\n:entry_first\n";
InstrProfSymtab Symtab;
diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp
new file mode 100644
index 000000000000..3d44cf0b4c37
--- /dev/null
+++ b/llvm/lib/ProfileData/MemProf.cpp
@@ -0,0 +1,110 @@
+#include "llvm/ProfileData/MemProf.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Function.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/EndianStream.h"
+
+namespace llvm {
+namespace memprof {
+
+void IndexedMemProfRecord::serialize(const MemProfSchema &Schema,
+ raw_ostream &OS) {
+ using namespace support;
+
+ endian::Writer LE(OS, little);
+
+ LE.write<uint64_t>(AllocSites.size());
+ for (const IndexedAllocationInfo &N : AllocSites) {
+ LE.write<uint64_t>(N.CallStack.size());
+ for (const FrameId &Id : N.CallStack)
+ LE.write<FrameId>(Id);
+ N.Info.serialize(Schema, OS);
+ }
+
+ // Related contexts.
+ LE.write<uint64_t>(CallSites.size());
+ for (const auto &Frames : CallSites) {
+ LE.write<uint64_t>(Frames.size());
+ for (const FrameId &Id : Frames)
+ LE.write<FrameId>(Id);
+ }
+}
+
+IndexedMemProfRecord
+IndexedMemProfRecord::deserialize(const MemProfSchema &Schema,
+ const unsigned char *Ptr) {
+ using namespace support;
+
+ IndexedMemProfRecord Record;
+
+ // Read the meminfo nodes.
+ const uint64_t NumNodes = endian::readNext<uint64_t, little, unaligned>(Ptr);
+ for (uint64_t I = 0; I < NumNodes; I++) {
+ IndexedAllocationInfo Node;
+ const uint64_t NumFrames =
+ endian::readNext<uint64_t, little, unaligned>(Ptr);
+ for (uint64_t J = 0; J < NumFrames; J++) {
+ const FrameId Id = endian::readNext<FrameId, little, unaligned>(Ptr);
+ Node.CallStack.push_back(Id);
+ }
+ Node.Info.deserialize(Schema, Ptr);
+ Ptr += PortableMemInfoBlock::serializedSize();
+ Record.AllocSites.push_back(Node);
+ }
+
+ // Read the callsite information.
+ const uint64_t NumCtxs = endian::readNext<uint64_t, little, unaligned>(Ptr);
+ for (uint64_t J = 0; J < NumCtxs; J++) {
+ const uint64_t NumFrames =
+ endian::readNext<uint64_t, little, unaligned>(Ptr);
+ llvm::SmallVector<FrameId> Frames;
+ Frames.reserve(NumFrames);
+ for (uint64_t K = 0; K < NumFrames; K++) {
+ const FrameId Id = endian::readNext<FrameId, little, unaligned>(Ptr);
+ Frames.push_back(Id);
+ }
+ Record.CallSites.push_back(Frames);
+ }
+
+ return Record;
+}
+
+GlobalValue::GUID IndexedMemProfRecord::getGUID(const StringRef FunctionName) {
+ const auto Pos = FunctionName.find(".llvm.");
+
+ // We use the function guid which we expect to be a uint64_t. At
+ // this time, it is the lower 64 bits of the md5 of the function
+ // name. Any suffix with .llvm. is trimmed since these are added by
+ // thinLTO global promotion. At the time the profile is consumed,
+ // these suffixes will not be present.
+ return Function::getGUID(FunctionName.take_front(Pos));
+}
+
+Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer) {
+ using namespace support;
+
+ const unsigned char *Ptr = Buffer;
+ const uint64_t NumSchemaIds =
+ endian::readNext<uint64_t, little, unaligned>(Ptr);
+ if (NumSchemaIds > static_cast<uint64_t>(Meta::Size)) {
+ return make_error<InstrProfError>(instrprof_error::malformed,
+ "memprof schema invalid");
+ }
+
+ MemProfSchema Result;
+ for (size_t I = 0; I < NumSchemaIds; I++) {
+ const uint64_t Tag = endian::readNext<uint64_t, little, unaligned>(Ptr);
+ if (Tag >= static_cast<uint64_t>(Meta::Size)) {
+ return make_error<InstrProfError>(instrprof_error::malformed,
+ "memprof schema invalid");
+ }
+ Result.push_back(static_cast<Meta>(Tag));
+ }
+ // Advace the buffer to one past the schema if we succeeded.
+ Buffer = Ptr;
+ return Result;
+}
+
+} // namespace memprof
+} // namespace llvm
diff --git a/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp b/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
index bbb640cfaee8..755e25b355a8 100644
--- a/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
+++ b/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
@@ -10,20 +10,16 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/IR/Type.h"
+#include "llvm/IR/ProfileSummary.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/ProfileCommon.h"
#include "llvm/ProfileData/SampleProf.h"
-#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
cl::opt<bool> UseContextLessSummary(
- "profile-summary-contextless", cl::Hidden, cl::init(false), cl::ZeroOrMore,
+ "profile-summary-contextless", cl::Hidden,
cl::desc("Merge context profiles before calculating thresholds."));
// The following two parameters determine the threshold for a count to be
@@ -34,38 +30,38 @@ cl::opt<bool> UseContextLessSummary(
// threshold for determining cold count (everything <= this threshold is
// considered cold).
cl::opt<int> ProfileSummaryCutoffHot(
- "profile-summary-cutoff-hot", cl::Hidden, cl::init(990000), cl::ZeroOrMore,
+ "profile-summary-cutoff-hot", cl::Hidden, cl::init(990000),
cl::desc("A count is hot if it exceeds the minimum count to"
" reach this percentile of total counts."));
cl::opt<int> ProfileSummaryCutoffCold(
- "profile-summary-cutoff-cold", cl::Hidden, cl::init(999999), cl::ZeroOrMore,
+ "profile-summary-cutoff-cold", cl::Hidden, cl::init(999999),
cl::desc("A count is cold if it is below the minimum count"
" to reach this percentile of total counts."));
cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold(
"profile-summary-huge-working-set-size-threshold", cl::Hidden,
- cl::init(15000), cl::ZeroOrMore,
+ cl::init(15000),
cl::desc("The code working set size is considered huge if the number of"
" blocks required to reach the -profile-summary-cutoff-hot"
" percentile exceeds this count."));
cl::opt<unsigned> ProfileSummaryLargeWorkingSetSizeThreshold(
"profile-summary-large-working-set-size-threshold", cl::Hidden,
- cl::init(12500), cl::ZeroOrMore,
+ cl::init(12500),
cl::desc("The code working set size is considered large if the number of"
" blocks required to reach the -profile-summary-cutoff-hot"
" percentile exceeds this count."));
// The next two options override the counts derived from summary computation and
// are useful for debugging purposes.
-cl::opt<int> ProfileSummaryHotCount(
- "profile-summary-hot-count", cl::ReallyHidden, cl::ZeroOrMore,
+cl::opt<uint64_t> ProfileSummaryHotCount(
+ "profile-summary-hot-count", cl::ReallyHidden,
cl::desc("A fixed hot count that overrides the count derived from"
" profile-summary-cutoff-hot"));
-cl::opt<int> ProfileSummaryColdCount(
- "profile-summary-cold-count", cl::ReallyHidden, cl::ZeroOrMore,
+cl::opt<uint64_t> ProfileSummaryColdCount(
+ "profile-summary-cold-count", cl::ReallyHidden,
cl::desc("A fixed cold count that overrides the count derived from"
" profile-summary-cutoff-cold"));
@@ -110,7 +106,13 @@ void SampleProfileSummaryBuilder::addRecord(
NumFunctions++;
if (FS.getHeadSamples() > MaxFunctionCount)
MaxFunctionCount = FS.getHeadSamples();
+ } else if (FS.getContext().hasAttribute(
+ sampleprof::ContextDuplicatedIntoBase)) {
+ // Do not recount callee samples if they are already merged into their base
+ // profiles. This can happen to CS nested profile.
+ return;
}
+
for (const auto &I : FS.getBodySamples()) {
uint64_t Count = I.second.getSamples();
addCount(Count);
@@ -194,7 +196,7 @@ SampleProfileSummaryBuilder::computeSummaryForProfiles(
// more function profiles each with lower counts, which in turn leads to lower
// hot thresholds. To compensate for that, by default we merge context
// profiles before computing profile summary.
- if (UseContextLessSummary || (sampleprof::FunctionSamples::ProfileIsCSFlat &&
+ if (UseContextLessSummary || (sampleprof::FunctionSamples::ProfileIsCS &&
!UseContextLessSummary.getNumOccurrences())) {
for (const auto &I : Profiles) {
ContextLessProfiles[I.second.getName()].merge(I.second);
diff --git a/llvm/lib/ProfileData/RawMemProfReader.cpp b/llvm/lib/ProfileData/RawMemProfReader.cpp
index f8d13c74fac3..2423fd38e9a2 100644
--- a/llvm/lib/ProfileData/RawMemProfReader.cpp
+++ b/llvm/lib/ProfileData/RawMemProfReader.cpp
@@ -10,69 +10,55 @@
//
//===----------------------------------------------------------------------===//
+#include <algorithm>
#include <cstdint>
+#include <memory>
#include <type_traits>
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
+#include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h"
+#include "llvm/Object/Binary.h"
+#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Object/ObjectFile.h"
#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/ProfileData/MemProf.h"
#include "llvm/ProfileData/MemProfData.inc"
#include "llvm/ProfileData/RawMemProfReader.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Path.h"
+
+#define DEBUG_TYPE "memprof"
namespace llvm {
namespace memprof {
namespace {
-
-struct Summary {
- uint64_t Version;
- uint64_t TotalSizeBytes;
- uint64_t NumSegments;
- uint64_t NumMIBInfo;
- uint64_t NumStackOffsets;
-};
-
template <class T = uint64_t> inline T alignedRead(const char *Ptr) {
static_assert(std::is_pod<T>::value, "Not a pod type.");
assert(reinterpret_cast<size_t>(Ptr) % sizeof(T) == 0 && "Unaligned Read");
return *reinterpret_cast<const T *>(Ptr);
}
-Summary computeSummary(const char *Start) {
- auto *H = reinterpret_cast<const Header *>(Start);
-
- // Check alignment while reading the number of items in each section.
- return Summary{
- H->Version,
- H->TotalSize,
- alignedRead(Start + H->SegmentOffset),
- alignedRead(Start + H->MIBOffset),
- alignedRead(Start + H->StackOffset),
- };
-}
-
-} // namespace
-
-Expected<std::unique_ptr<RawMemProfReader>>
-RawMemProfReader::create(const Twine &Path) {
- auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path, /*IsText=*/true);
- if (std::error_code EC = BufferOr.getError())
- return errorCodeToError(EC);
-
- std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
+Error checkBuffer(const MemoryBuffer &Buffer) {
+ if (!RawMemProfReader::hasFormat(Buffer))
+ return make_error<InstrProfError>(instrprof_error::bad_magic);
- if (Buffer->getBufferSize() == 0)
+ if (Buffer.getBufferSize() == 0)
return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
- if (!RawMemProfReader::hasFormat(*Buffer))
- return make_error<InstrProfError>(instrprof_error::bad_magic);
-
- if (Buffer->getBufferSize() < sizeof(Header)) {
+ if (Buffer.getBufferSize() < sizeof(Header)) {
return make_error<InstrProfError>(instrprof_error::truncated);
}
// The size of the buffer can be > header total size since we allow repeated
// serialization of memprof profiles to the same file.
uint64_t TotalSize = 0;
- const char *Next = Buffer->getBufferStart();
- while (Next < Buffer->getBufferEnd()) {
+ const char *Next = Buffer.getBufferStart();
+ while (Next < Buffer.getBufferEnd()) {
auto *H = reinterpret_cast<const Header *>(Next);
if (H->Version != MEMPROF_RAW_VERSION) {
return make_error<InstrProfError>(instrprof_error::unsupported_version);
@@ -82,11 +68,143 @@ RawMemProfReader::create(const Twine &Path) {
Next += H->TotalSize;
}
- if (Buffer->getBufferSize() != TotalSize) {
+ if (Buffer.getBufferSize() != TotalSize) {
return make_error<InstrProfError>(instrprof_error::malformed);
}
+ return Error::success();
+}
+
+llvm::SmallVector<SegmentEntry> readSegmentEntries(const char *Ptr) {
+ using namespace support;
+
+ const uint64_t NumItemsToRead =
+ endian::readNext<uint64_t, little, unaligned>(Ptr);
+ llvm::SmallVector<SegmentEntry> Items;
+ for (uint64_t I = 0; I < NumItemsToRead; I++) {
+ Items.push_back(*reinterpret_cast<const SegmentEntry *>(
+ Ptr + I * sizeof(SegmentEntry)));
+ }
+ return Items;
+}
+
+llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>>
+readMemInfoBlocks(const char *Ptr) {
+ using namespace support;
+
+ const uint64_t NumItemsToRead =
+ endian::readNext<uint64_t, little, unaligned>(Ptr);
+ llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> Items;
+ for (uint64_t I = 0; I < NumItemsToRead; I++) {
+ const uint64_t Id = endian::readNext<uint64_t, little, unaligned>(Ptr);
+ const MemInfoBlock MIB = *reinterpret_cast<const MemInfoBlock *>(Ptr);
+ Items.push_back({Id, MIB});
+ // Only increment by size of MIB since readNext implicitly increments.
+ Ptr += sizeof(MemInfoBlock);
+ }
+ return Items;
+}
+
+CallStackMap readStackInfo(const char *Ptr) {
+ using namespace support;
+
+ const uint64_t NumItemsToRead =
+ endian::readNext<uint64_t, little, unaligned>(Ptr);
+ CallStackMap Items;
+
+ for (uint64_t I = 0; I < NumItemsToRead; I++) {
+ const uint64_t StackId = endian::readNext<uint64_t, little, unaligned>(Ptr);
+ const uint64_t NumPCs = endian::readNext<uint64_t, little, unaligned>(Ptr);
+
+ SmallVector<uint64_t> CallStack;
+ for (uint64_t J = 0; J < NumPCs; J++) {
+ CallStack.push_back(endian::readNext<uint64_t, little, unaligned>(Ptr));
+ }
+
+ Items[StackId] = CallStack;
+ }
+ return Items;
+}
+
+// Merges the contents of stack information in \p From to \p To. Returns true if
+// any stack ids observed previously map to a different set of program counter
+// addresses.
+bool mergeStackMap(const CallStackMap &From, CallStackMap &To) {
+ for (const auto &IdStack : From) {
+ auto I = To.find(IdStack.first);
+ if (I == To.end()) {
+ To[IdStack.first] = IdStack.second;
+ } else {
+ // Check that the PCs are the same (in order).
+ if (IdStack.second != I->second)
+ return true;
+ }
+ }
+ return false;
+}
- return std::make_unique<RawMemProfReader>(std::move(Buffer));
+Error report(Error E, const StringRef Context) {
+ return joinErrors(createStringError(inconvertibleErrorCode(), Context),
+ std::move(E));
+}
+
+bool isRuntimePath(const StringRef Path) {
+ return StringRef(llvm::sys::path::convert_to_slash(Path))
+ .contains("memprof/memprof_");
+}
+
+std::string getBuildIdString(const SegmentEntry &Entry) {
+ constexpr size_t Size = sizeof(Entry.BuildId) / sizeof(uint8_t);
+ constexpr uint8_t Zeros[Size] = {0};
+ // If the build id is unset print a helpful string instead of all zeros.
+ if (memcmp(Entry.BuildId, Zeros, Size) == 0)
+ return "<None>";
+
+ std::string Str;
+ raw_string_ostream OS(Str);
+ for (size_t I = 0; I < Size; I++) {
+ OS << format_hex_no_prefix(Entry.BuildId[I], 2);
+ }
+ return OS.str();
+}
+} // namespace
+
+Expected<std::unique_ptr<RawMemProfReader>>
+RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary,
+ bool KeepName) {
+ auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path);
+ if (std::error_code EC = BufferOr.getError())
+ return report(errorCodeToError(EC), Path.getSingleStringRef());
+
+ std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
+ if (Error E = checkBuffer(*Buffer))
+ return report(std::move(E), Path.getSingleStringRef());
+
+ if (ProfiledBinary.empty())
+ return report(
+ errorCodeToError(make_error_code(std::errc::invalid_argument)),
+ "Path to profiled binary is empty!");
+
+ auto BinaryOr = llvm::object::createBinary(ProfiledBinary);
+ if (!BinaryOr) {
+ return report(BinaryOr.takeError(), ProfiledBinary);
+ }
+
+ // Use new here since constructor is private.
+ std::unique_ptr<RawMemProfReader> Reader(
+ new RawMemProfReader(std::move(BinaryOr.get()), KeepName));
+ if (Error E = Reader->initialize(std::move(Buffer))) {
+ return std::move(E);
+ }
+ return std::move(Reader);
+}
+
+bool RawMemProfReader::hasFormat(const StringRef Path) {
+ auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path);
+ if (!BufferOr)
+ return false;
+
+ std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
+ return hasFormat(*Buffer);
}
bool RawMemProfReader::hasFormat(const MemoryBuffer &Buffer) {
@@ -98,24 +216,343 @@ bool RawMemProfReader::hasFormat(const MemoryBuffer &Buffer) {
return Magic == MEMPROF_RAW_MAGIC_64;
}
-void RawMemProfReader::printSummaries(raw_ostream &OS) const {
- int Count = 0;
+void RawMemProfReader::printYAML(raw_ostream &OS) {
+ uint64_t NumAllocFunctions = 0, NumMibInfo = 0;
+ for (const auto &KV : FunctionProfileData) {
+ const size_t NumAllocSites = KV.second.AllocSites.size();
+ if (NumAllocSites > 0) {
+ NumAllocFunctions++;
+ NumMibInfo += NumAllocSites;
+ }
+ }
+
+ OS << "MemprofProfile:\n";
+ OS << " Summary:\n";
+ OS << " Version: " << MEMPROF_RAW_VERSION << "\n";
+ OS << " NumSegments: " << SegmentInfo.size() << "\n";
+ OS << " NumMibInfo: " << NumMibInfo << "\n";
+ OS << " NumAllocFunctions: " << NumAllocFunctions << "\n";
+ OS << " NumStackOffsets: " << StackMap.size() << "\n";
+ // Print out the segment information.
+ OS << " Segments:\n";
+ for (const auto &Entry : SegmentInfo) {
+ OS << " -\n";
+ OS << " BuildId: " << getBuildIdString(Entry) << "\n";
+ OS << " Start: 0x" << llvm::utohexstr(Entry.Start) << "\n";
+ OS << " End: 0x" << llvm::utohexstr(Entry.End) << "\n";
+ OS << " Offset: 0x" << llvm::utohexstr(Entry.Offset) << "\n";
+ }
+ // Print out the merged contents of the profiles.
+ OS << " Records:\n";
+ for (const auto &Entry : *this) {
+ OS << " -\n";
+ OS << " FunctionGUID: " << Entry.first << "\n";
+ Entry.second.print(OS);
+ }
+}
+
+Error RawMemProfReader::initialize(std::unique_ptr<MemoryBuffer> DataBuffer) {
+ const StringRef FileName = Binary.getBinary()->getFileName();
+
+ auto *ElfObject = dyn_cast<object::ELFObjectFileBase>(Binary.getBinary());
+ if (!ElfObject) {
+ return report(make_error<StringError>(Twine("Not an ELF file: "),
+ inconvertibleErrorCode()),
+ FileName);
+ }
+
+ // Check whether the profiled binary was built with position independent code
+ // (PIC). For now we provide a error message until symbolization support
+ // is added for pic.
+ auto* Elf64LEObject = llvm::cast<llvm::object::ELF64LEObjectFile>(ElfObject);
+ const llvm::object::ELF64LEFile& ElfFile = Elf64LEObject->getELFFile();
+ auto PHdrsOr = ElfFile.program_headers();
+ if(!PHdrsOr)
+ return report(make_error<StringError>(Twine("Could not read program headers: "),
+ inconvertibleErrorCode()),
+ FileName);
+ auto FirstLoadHeader = PHdrsOr->begin();
+ while (FirstLoadHeader->p_type != llvm::ELF::PT_LOAD)
+ ++FirstLoadHeader;
+ if(FirstLoadHeader->p_vaddr == 0)
+ return report(make_error<StringError>(Twine("Unsupported position independent code"),
+ inconvertibleErrorCode()),
+ FileName);
+
+ auto Triple = ElfObject->makeTriple();
+ if (!Triple.isX86())
+ return report(make_error<StringError>(Twine("Unsupported target: ") +
+ Triple.getArchName(),
+ inconvertibleErrorCode()),
+ FileName);
+
+ auto *Object = cast<object::ObjectFile>(Binary.getBinary());
+ std::unique_ptr<DIContext> Context = DWARFContext::create(
+ *Object, DWARFContext::ProcessDebugRelocations::Process);
+
+ auto SOFOr = symbolize::SymbolizableObjectFile::create(
+ Object, std::move(Context), /*UntagAddresses=*/false);
+ if (!SOFOr)
+ return report(SOFOr.takeError(), FileName);
+ Symbolizer = std::move(SOFOr.get());
+
+ if (Error E = readRawProfile(std::move(DataBuffer)))
+ return E;
+
+ if (Error E = symbolizeAndFilterStackFrames())
+ return E;
+
+ return mapRawProfileToRecords();
+}
+
+Error RawMemProfReader::mapRawProfileToRecords() {
+ // Hold a mapping from function to each callsite location we encounter within
+ // it that is part of some dynamic allocation context. The location is stored
+ // as a pointer to a symbolized list of inline frames.
+ using LocationPtr = const llvm::SmallVector<FrameId> *;
+ llvm::DenseMap<GlobalValue::GUID, llvm::SetVector<LocationPtr>>
+ PerFunctionCallSites;
+
+ // Convert the raw profile callstack data into memprof records. While doing so
+ // keep track of related contexts so that we can fill these in later.
+ for (const auto &Entry : CallstackProfileData) {
+ const uint64_t StackId = Entry.first;
+
+ auto It = StackMap.find(StackId);
+ if (It == StackMap.end())
+ return make_error<InstrProfError>(
+ instrprof_error::malformed,
+ "memprof callstack record does not contain id: " + Twine(StackId));
+
+ // Construct the symbolized callstack.
+ llvm::SmallVector<FrameId> Callstack;
+ Callstack.reserve(It->getSecond().size());
+
+ llvm::ArrayRef<uint64_t> Addresses = It->getSecond();
+ for (size_t I = 0; I < Addresses.size(); I++) {
+ const uint64_t Address = Addresses[I];
+ assert(SymbolizedFrame.count(Address) > 0 &&
+ "Address not found in SymbolizedFrame map");
+ const SmallVector<FrameId> &Frames = SymbolizedFrame[Address];
+
+ assert(!idToFrame(Frames.back()).IsInlineFrame &&
+ "The last frame should not be inlined");
+
+ // Record the callsites for each function. Skip the first frame of the
+ // first address since it is the allocation site itself that is recorded
+ // as an alloc site.
+ for (size_t J = 0; J < Frames.size(); J++) {
+ if (I == 0 && J == 0)
+ continue;
+ // We attach the entire bottom-up frame here for the callsite even
+ // though we only need the frames up to and including the frame for
+ // Frames[J].Function. This will enable better deduplication for
+ // compression in the future.
+ const GlobalValue::GUID Guid = idToFrame(Frames[J]).Function;
+ PerFunctionCallSites[Guid].insert(&Frames);
+ }
+
+ // Add all the frames to the current allocation callstack.
+ Callstack.append(Frames.begin(), Frames.end());
+ }
+
+ // We attach the memprof record to each function bottom-up including the
+ // first non-inline frame.
+ for (size_t I = 0; /*Break out using the condition below*/; I++) {
+ const Frame &F = idToFrame(Callstack[I]);
+ auto Result =
+ FunctionProfileData.insert({F.Function, IndexedMemProfRecord()});
+ IndexedMemProfRecord &Record = Result.first->second;
+ Record.AllocSites.emplace_back(Callstack, Entry.second);
+
+ if (!F.IsInlineFrame)
+ break;
+ }
+ }
+
+ // Fill in the related callsites per function.
+ for (auto I = PerFunctionCallSites.begin(), E = PerFunctionCallSites.end();
+ I != E; I++) {
+ const GlobalValue::GUID Id = I->first;
+ // Some functions may have only callsite data and no allocation data. Here
+ // we insert a new entry for callsite data if we need to.
+ auto Result = FunctionProfileData.insert({Id, IndexedMemProfRecord()});
+ IndexedMemProfRecord &Record = Result.first->second;
+ for (LocationPtr Loc : I->getSecond()) {
+ Record.CallSites.push_back(*Loc);
+ }
+ }
+
+ return Error::success();
+}
+
+Error RawMemProfReader::symbolizeAndFilterStackFrames() {
+ // The specifier to use when symbolization is requested.
+ const DILineInfoSpecifier Specifier(
+ DILineInfoSpecifier::FileLineInfoKind::RawValue,
+ DILineInfoSpecifier::FunctionNameKind::LinkageName);
+
+ // For entries where all PCs in the callstack are discarded, we erase the
+ // entry from the stack map.
+ llvm::SmallVector<uint64_t> EntriesToErase;
+ // We keep track of all prior discarded entries so that we can avoid invoking
+ // the symbolizer for such entries.
+ llvm::DenseSet<uint64_t> AllVAddrsToDiscard;
+ for (auto &Entry : StackMap) {
+ for (const uint64_t VAddr : Entry.getSecond()) {
+ // Check if we have already symbolized and cached the result or if we
+ // don't want to attempt symbolization since we know this address is bad.
+ // In this case the address is also removed from the current callstack.
+ if (SymbolizedFrame.count(VAddr) > 0 ||
+ AllVAddrsToDiscard.contains(VAddr))
+ continue;
+
+ Expected<DIInliningInfo> DIOr = Symbolizer->symbolizeInlinedCode(
+ getModuleOffset(VAddr), Specifier, /*UseSymbolTable=*/false);
+ if (!DIOr)
+ return DIOr.takeError();
+ DIInliningInfo DI = DIOr.get();
+
+ // Drop frames which we can't symbolize or if they belong to the runtime.
+ if (DI.getFrame(0).FunctionName == DILineInfo::BadString ||
+ isRuntimePath(DI.getFrame(0).FileName)) {
+ AllVAddrsToDiscard.insert(VAddr);
+ continue;
+ }
+
+ for (size_t I = 0, NumFrames = DI.getNumberOfFrames(); I < NumFrames;
+ I++) {
+ const auto &DIFrame = DI.getFrame(I);
+ const uint64_t Guid =
+ IndexedMemProfRecord::getGUID(DIFrame.FunctionName);
+ const Frame F(Guid, DIFrame.Line - DIFrame.StartLine, DIFrame.Column,
+ // Only the last entry is not an inlined location.
+ I != NumFrames - 1);
+ // Here we retain a mapping from the GUID to symbol name instead of
+ // adding it to the frame object directly to reduce memory overhead.
+ // This is because there can be many unique frames, particularly for
+ // callsite frames.
+ if (KeepSymbolName)
+ GuidToSymbolName.insert({Guid, DIFrame.FunctionName});
+
+ const FrameId Hash = F.hash();
+ IdToFrame.insert({Hash, F});
+ SymbolizedFrame[VAddr].push_back(Hash);
+ }
+ }
+
+ auto &CallStack = Entry.getSecond();
+ llvm::erase_if(CallStack, [&AllVAddrsToDiscard](const uint64_t A) {
+ return AllVAddrsToDiscard.contains(A);
+ });
+ if (CallStack.empty())
+ EntriesToErase.push_back(Entry.getFirst());
+ }
+
+ // Drop the entries where the callstack is empty.
+ for (const uint64_t Id : EntriesToErase) {
+ StackMap.erase(Id);
+ CallstackProfileData.erase(Id);
+ }
+
+ if (StackMap.empty())
+ return make_error<InstrProfError>(
+ instrprof_error::malformed,
+ "no entries in callstack map after symbolization");
+
+ return Error::success();
+}
+
+Error RawMemProfReader::readRawProfile(
+ std::unique_ptr<MemoryBuffer> DataBuffer) {
const char *Next = DataBuffer->getBufferStart();
+
while (Next < DataBuffer->getBufferEnd()) {
- auto Summary = computeSummary(Next);
- OS << "MemProf Profile " << ++Count << "\n";
- OS << " Version: " << Summary.Version << "\n";
- OS << " TotalSizeBytes: " << Summary.TotalSizeBytes << "\n";
- OS << " NumSegments: " << Summary.NumSegments << "\n";
- OS << " NumMIBInfo: " << Summary.NumMIBInfo << "\n";
- OS << " NumStackOffsets: " << Summary.NumStackOffsets << "\n";
- // TODO: Print the build ids once we can record them using the
- // sanitizer_procmaps library for linux.
+ auto *Header = reinterpret_cast<const memprof::Header *>(Next);
- auto *H = reinterpret_cast<const Header *>(Next);
- Next += H->TotalSize;
+ // Read in the segment information, check whether its the same across all
+ // profiles in this binary file.
+ const llvm::SmallVector<SegmentEntry> Entries =
+ readSegmentEntries(Next + Header->SegmentOffset);
+ if (!SegmentInfo.empty() && SegmentInfo != Entries) {
+ // We do not expect segment information to change when deserializing from
+ // the same binary profile file. This can happen if dynamic libraries are
+ // loaded/unloaded between profile dumping.
+ return make_error<InstrProfError>(
+ instrprof_error::malformed,
+ "memprof raw profile has different segment information");
+ }
+ SegmentInfo.assign(Entries.begin(), Entries.end());
+
+ // Read in the MemInfoBlocks. Merge them based on stack id - we assume that
+ // raw profiles in the same binary file are from the same process so the
+ // stackdepot ids are the same.
+ for (const auto &Value : readMemInfoBlocks(Next + Header->MIBOffset)) {
+ if (CallstackProfileData.count(Value.first)) {
+ CallstackProfileData[Value.first].Merge(Value.second);
+ } else {
+ CallstackProfileData[Value.first] = Value.second;
+ }
+ }
+
+ // Read in the callstack for each ids. For multiple raw profiles in the same
+ // file, we expect that the callstack is the same for a unique id.
+ const CallStackMap CSM = readStackInfo(Next + Header->StackOffset);
+ if (StackMap.empty()) {
+ StackMap = CSM;
+ } else {
+ if (mergeStackMap(CSM, StackMap))
+ return make_error<InstrProfError>(
+ instrprof_error::malformed,
+ "memprof raw profile got different call stack for same id");
+ }
+
+ Next += Header->TotalSize;
+ }
+
+ return Error::success();
+}
+
+object::SectionedAddress
+RawMemProfReader::getModuleOffset(const uint64_t VirtualAddress) {
+ LLVM_DEBUG({
+ SegmentEntry *ContainingSegment = nullptr;
+ for (auto &SE : SegmentInfo) {
+ if (VirtualAddress > SE.Start && VirtualAddress <= SE.End) {
+ ContainingSegment = &SE;
+ }
}
+
+ // Ensure that the virtual address is valid.
+ assert(ContainingSegment && "Could not find a segment entry");
+ });
+
+ // TODO: Compute the file offset based on the maps and program headers. For
+ // now this only works for non PIE binaries.
+ return object::SectionedAddress{VirtualAddress};
}
+Error RawMemProfReader::readNextRecord(GuidMemProfRecordPair &GuidRecord) {
+ if (FunctionProfileData.empty())
+ return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
+
+ if (Iter == FunctionProfileData.end())
+ return make_error<InstrProfError>(instrprof_error::eof);
+
+ auto IdToFrameCallback = [this](const FrameId Id) {
+ Frame F = this->idToFrame(Id);
+ if (!this->KeepSymbolName)
+ return F;
+ auto Iter = this->GuidToSymbolName.find(F.Function);
+ assert(Iter != this->GuidToSymbolName.end());
+ F.SymbolName = Iter->getSecond();
+ return F;
+ };
+
+ const IndexedMemProfRecord &IndexedRecord = Iter->second;
+ GuidRecord = {Iter->first, MemProfRecord(IndexedRecord, IdToFrameCallback)};
+ Iter++;
+ return Error::success();
+}
} // namespace memprof
} // namespace llvm
diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp
index 9b01a386a360..f794e64a13e7 100644
--- a/llvm/lib/ProfileData/SampleProf.cpp
+++ b/llvm/lib/ProfileData/SampleProf.cpp
@@ -19,9 +19,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/LEB128.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/raw_ostream.h"
#include <string>
@@ -31,22 +29,21 @@ using namespace llvm;
using namespace sampleprof;
static cl::opt<uint64_t> ProfileSymbolListCutOff(
- "profile-symbol-list-cutoff", cl::Hidden, cl::init(-1), cl::ZeroOrMore,
+ "profile-symbol-list-cutoff", cl::Hidden, cl::init(-1),
cl::desc("Cutoff value about how many symbols in profile symbol list "
"will be used. This is very useful for performance debugging"));
cl::opt<bool> GenerateMergedBaseProfiles(
- "generate-merged-base-profiles", cl::init(true), cl::ZeroOrMore,
+ "generate-merged-base-profiles",
cl::desc("When generating nested context-sensitive profiles, always "
"generate extra base profile for function with all its context "
"profiles merged into it."));
namespace llvm {
namespace sampleprof {
-SampleProfileFormat FunctionSamples::Format;
bool FunctionSamples::ProfileIsProbeBased = false;
-bool FunctionSamples::ProfileIsCSFlat = false;
-bool FunctionSamples::ProfileIsCSNested = false;
+bool FunctionSamples::ProfileIsCS = false;
+bool FunctionSamples::ProfileIsPreInlined = false;
bool FunctionSamples::UseMD5 = false;
bool FunctionSamples::HasUniqSuffix = true;
bool FunctionSamples::ProfileIsFS = false;
@@ -88,8 +85,6 @@ class SampleProfErrorCategoryType : public std::error_category {
return "Counter overflow";
case sampleprof_error::ostream_seek_unsupported:
return "Ostream does not support seek";
- case sampleprof_error::compress_failed:
- return "Compress failure";
case sampleprof_error::uncompress_failed:
return "Uncompress failure";
case sampleprof_error::zlib_unavailable:
@@ -523,6 +518,12 @@ void CSProfileConverter::convertProfiles(CSProfileConverter::FrameNode &Node) {
auto &SamplesMap = NodeProfile->functionSamplesAt(ChildNode.CallSiteLoc);
SamplesMap.emplace(OrigChildContext.getName().str(), *ChildProfile);
NodeProfile->addTotalSamples(ChildProfile->getTotalSamples());
+ // Remove the corresponding body sample for the callsite and update the
+ // total weight.
+ auto Count = NodeProfile->removeCalledTargetAndBodySample(
+ ChildNode.CallSiteLoc.LineOffset, ChildNode.CallSiteLoc.Discriminator,
+ OrigChildContext.getName());
+ NodeProfile->removeTotalSamples(Count);
}
// Separate child profile to be a standalone profile, if the current parent
@@ -531,13 +532,14 @@ void CSProfileConverter::convertProfiles(CSProfileConverter::FrameNode &Node) {
// thus done optionally. It is seen that duplicating context profiles into
// base profiles improves the code quality for thinlto build by allowing a
// profile in the prelink phase for to-be-fully-inlined functions.
- if (!NodeProfile || GenerateMergedBaseProfiles)
+ if (!NodeProfile) {
ProfileMap[ChildProfile->getContext()].merge(*ChildProfile);
-
- // Contexts coming with a `ContextShouldBeInlined` attribute indicate this
- // is a preinliner-computed profile.
- if (OrigChildContext.hasAttribute(ContextShouldBeInlined))
- FunctionSamples::ProfileIsCSNested = true;
+ } else if (GenerateMergedBaseProfiles) {
+ ProfileMap[ChildProfile->getContext()].merge(*ChildProfile);
+ auto &SamplesMap = NodeProfile->functionSamplesAt(ChildNode.CallSiteLoc);
+ SamplesMap[ChildProfile->getName().str()].getContext().setAttribute(
+ ContextDuplicatedIntoBase);
+ }
// Remove the original child profile.
ProfileMap.erase(OrigChildContext);
diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index 80c02faaba04..280e3c6cb8d1 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -23,6 +23,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/ProfileSummary.h"
#include "llvm/ProfileData/ProfileCommon.h"
#include "llvm/ProfileData/SampleProf.h"
@@ -39,7 +40,6 @@
#include <cstdint>
#include <limits>
#include <memory>
-#include <set>
#include <system_error>
#include <vector>
@@ -348,7 +348,7 @@ std::error_code SampleProfileReaderText::readImpl() {
}
FProfile.getContext().setAllAttributes(Attributes);
if (Attributes & (uint32_t)ContextShouldBeInlined)
- ProfileIsCSNested = true;
+ ProfileIsPreInlined = true;
DepthMetadata = Depth;
break;
}
@@ -358,14 +358,14 @@ std::error_code SampleProfileReaderText::readImpl() {
assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
"Cannot have both context-sensitive and regular profile");
- ProfileIsCSFlat = (CSProfileCount > 0);
+ ProfileIsCS = (CSProfileCount > 0);
assert((TopLevelProbeProfileCount == 0 ||
TopLevelProbeProfileCount == Profiles.size()) &&
"Cannot have both probe-based profiles and regular profiles");
ProfileIsProbeBased = (TopLevelProbeProfileCount > 0);
FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
- FunctionSamples::ProfileIsCSFlat = ProfileIsCSFlat;
- FunctionSamples::ProfileIsCSNested = ProfileIsCSNested;
+ FunctionSamples::ProfileIsCS = ProfileIsCS;
+ FunctionSamples::ProfileIsPreInlined = ProfileIsPreInlined;
if (Result == sampleprof_error::success)
computeSummary();
@@ -630,7 +630,7 @@ SampleProfileReaderExtBinaryBase::readContextFromTable() {
ErrorOr<SampleContext>
SampleProfileReaderExtBinaryBase::readSampleContextFromTable() {
- if (ProfileIsCSFlat) {
+ if (ProfileIsCS) {
auto FContext(readContextFromTable());
if (std::error_code EC = FContext.getError())
return EC;
@@ -654,9 +654,9 @@ std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
Summary->setPartialProfile(true);
if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext))
- FunctionSamples::ProfileIsCSFlat = ProfileIsCSFlat = true;
- if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsCSNested))
- FunctionSamples::ProfileIsCSNested = ProfileIsCSNested;
+ FunctionSamples::ProfileIsCS = ProfileIsCS = true;
+ if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsPreInlined))
+ FunctionSamples::ProfileIsPreInlined = ProfileIsPreInlined = true;
if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator))
FunctionSamples::ProfileIsFS = ProfileIsFS = true;
break;
@@ -777,7 +777,7 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
}
}
- if (ProfileIsCSFlat) {
+ if (ProfileIsCS) {
DenseSet<uint64_t> FuncGuidsToUse;
if (useMD5()) {
for (auto Name : FuncsToUse)
@@ -847,7 +847,7 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
}
assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
"Cannot have both context-sensitive and regular profile");
- assert((!CSProfileCount || ProfileIsCSFlat) &&
+ assert((!CSProfileCount || ProfileIsCS) &&
"Section flag should be consistent with actual profile");
return sampleprof_error::success;
}
@@ -1105,7 +1105,7 @@ SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute,
FProfile->getContext().setAllAttributes(*Attributes);
}
- if (!ProfileIsCSFlat) {
+ if (!ProfileIsCS) {
// Read all the attributes for inlined function calls.
auto NumCallsites = readNumber<uint32_t>();
if (std::error_code EC = NumCallsites.getError())
@@ -1275,8 +1275,8 @@ static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) {
Flags.append("partial,");
if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext))
Flags.append("context,");
- if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsCSNested))
- Flags.append("context-nested,");
+ if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsPreInlined))
+ Flags.append("preInlined,");
if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator))
Flags.append("fs-discriminator,");
break;
@@ -1828,7 +1828,7 @@ SampleProfileReaderItaniumRemapper::create(std::unique_ptr<MemoryBuffer> &B,
SampleProfileReader &Reader,
LLVMContext &C) {
auto Remappings = std::make_unique<SymbolRemappingReader>();
- if (Error E = Remappings->read(*B.get())) {
+ if (Error E = Remappings->read(*B)) {
handleAllErrors(
std::move(E), [&](const SymbolRemappingParseError &ParseError) {
C.diagnose(DiagnosticInfoSampleProfile(B->getBufferIdentifier(),
@@ -1882,7 +1882,6 @@ SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C,
Reader->Remapper = std::move(ReaderOrErr.get());
}
- FunctionSamples::Format = Reader->getFormat();
if (std::error_code EC = Reader->readHeader()) {
return EC;
}
diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp
index b575425d4e94..8ec6b7ebc29e 100644
--- a/llvm/lib/ProfileData/SampleProfWriter.cpp
+++ b/llvm/lib/ProfileData/SampleProfWriter.cpp
@@ -19,7 +19,6 @@
#include "llvm/ProfileData/SampleProfWriter.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/StringSet.h"
#include "llvm/ProfileData/ProfileCommon.h"
#include "llvm/ProfileData/SampleProf.h"
#include "llvm/Support/Compression.h"
@@ -87,10 +86,8 @@ std::error_code SampleProfileWriterExtBinaryBase::compressAndOutput() {
return sampleprof_error::success;
auto &OS = *OutputStream;
SmallString<128> CompressedStrings;
- llvm::Error E = zlib::compress(UncompressedStrings, CompressedStrings,
- zlib::BestSizeCompression);
- if (E)
- return sampleprof_error::compress_failed;
+ zlib::compress(UncompressedStrings, CompressedStrings,
+ zlib::BestSizeCompression);
encodeULEB128(UncompressedStrings.size(), OS);
encodeULEB128(CompressedStrings.size(), OS);
OS << CompressedStrings.str();
@@ -172,7 +169,7 @@ std::error_code SampleProfileWriterExtBinaryBase::writeFuncOffsetTable() {
return (std::error_code)sampleprof_error::success;
};
- if (FunctionSamples::ProfileIsCSFlat) {
+ if (FunctionSamples::ProfileIsCS) {
// Sort the contexts before writing them out. This is to help fast load all
// context profiles for a function as well as their callee contexts which
// can help profile-guided importing for ThinLTO.
@@ -202,11 +199,11 @@ std::error_code SampleProfileWriterExtBinaryBase::writeFuncMetadata(
if (FunctionSamples::ProfileIsProbeBased)
encodeULEB128(FunctionProfile.getFunctionHash(), OS);
- if (FunctionSamples::ProfileIsCSFlat || FunctionSamples::ProfileIsCSNested) {
+ if (FunctionSamples::ProfileIsCS || FunctionSamples::ProfileIsPreInlined) {
encodeULEB128(FunctionProfile.getContext().getAllAttributes(), OS);
}
- if (!FunctionSamples::ProfileIsCSFlat) {
+ if (!FunctionSamples::ProfileIsCS) {
// Recursively emit attributes for all callee samples.
uint64_t NumCallsites = 0;
for (const auto &J : FunctionProfile.getCallsiteSamples())
@@ -228,8 +225,8 @@ std::error_code SampleProfileWriterExtBinaryBase::writeFuncMetadata(
std::error_code SampleProfileWriterExtBinaryBase::writeFuncMetadata(
const SampleProfileMap &Profiles) {
- if (!FunctionSamples::ProfileIsProbeBased &&
- !FunctionSamples::ProfileIsCSFlat && !FunctionSamples::ProfileIsCSNested)
+ if (!FunctionSamples::ProfileIsProbeBased && !FunctionSamples::ProfileIsCS &&
+ !FunctionSamples::ProfileIsPreInlined)
return sampleprof_error::success;
for (const auto &Entry : Profiles) {
if (std::error_code EC = writeFuncMetadata(Entry.second))
@@ -324,12 +321,12 @@ std::error_code SampleProfileWriterExtBinaryBase::writeOneSection(
if (Type == SecFuncMetadata && FunctionSamples::ProfileIsProbeBased)
addSectionFlag(SecFuncMetadata, SecFuncMetadataFlags::SecFlagIsProbeBased);
if (Type == SecFuncMetadata &&
- (FunctionSamples::ProfileIsCSFlat || FunctionSamples::ProfileIsCSNested))
+ (FunctionSamples::ProfileIsCS || FunctionSamples::ProfileIsPreInlined))
addSectionFlag(SecFuncMetadata, SecFuncMetadataFlags::SecFlagHasAttribute);
- if (Type == SecProfSummary && FunctionSamples::ProfileIsCSFlat)
+ if (Type == SecProfSummary && FunctionSamples::ProfileIsCS)
addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagFullContext);
- if (Type == SecProfSummary && FunctionSamples::ProfileIsCSNested)
- addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagIsCSNested);
+ if (Type == SecProfSummary && FunctionSamples::ProfileIsPreInlined)
+ addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagIsPreInlined);
if (Type == SecProfSummary && FunctionSamples::ProfileIsFS)
addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagFSDiscriminator);
@@ -471,7 +468,7 @@ SampleProfileWriterCompactBinary::write(const SampleProfileMap &ProfileMap) {
/// it needs to be parsed by the SampleProfileReaderText class.
std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) {
auto &OS = *OutputStream;
- if (FunctionSamples::ProfileIsCSFlat)
+ if (FunctionSamples::ProfileIsCS)
OS << "[" << S.getContext().toString() << "]:" << S.getTotalSamples();
else
OS << S.getName() << ":" << S.getTotalSamples();
@@ -871,8 +868,7 @@ SampleProfileWriter::create(std::unique_ptr<raw_ostream> &OS,
std::unique_ptr<SampleProfileWriter> Writer;
// Currently only Text and Extended Binary format are supported for CSSPGO.
- if ((FunctionSamples::ProfileIsCSFlat ||
- FunctionSamples::ProfileIsProbeBased) &&
+ if ((FunctionSamples::ProfileIsCS || FunctionSamples::ProfileIsProbeBased) &&
(Format == SPF_Binary || Format == SPF_Compact_Binary))
return sampleprof_error::unsupported_writing_format;
diff --git a/llvm/lib/Remarks/BitstreamRemarkSerializer.cpp b/llvm/lib/Remarks/BitstreamRemarkSerializer.cpp
index 0810bf531db8..5a77a25b1569 100644
--- a/llvm/lib/Remarks/BitstreamRemarkSerializer.cpp
+++ b/llvm/lib/Remarks/BitstreamRemarkSerializer.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Remarks/BitstreamRemarkSerializer.h"
+#include "llvm/Remarks/Remark.h"
using namespace llvm;
using namespace llvm::remarks;
diff --git a/llvm/lib/Remarks/RemarkLinker.cpp b/llvm/lib/Remarks/RemarkLinker.cpp
index 62f80918ea1d..cbe966794c49 100644
--- a/llvm/lib/Remarks/RemarkLinker.cpp
+++ b/llvm/lib/Remarks/RemarkLinker.cpp
@@ -17,11 +17,14 @@
#include "llvm/Remarks/RemarkParser.h"
#include "llvm/Remarks/RemarkSerializer.h"
#include "llvm/Support/Error.h"
-#include "llvm/Support/raw_ostream.h"
using namespace llvm;
using namespace llvm::remarks;
+namespace llvm {
+class raw_ostream;
+}
+
static Expected<StringRef>
getRemarksSectionName(const object::ObjectFile &Obj) {
if (Obj.isMachO())
@@ -63,7 +66,7 @@ void RemarkLinker::setExternalFilePrependPath(StringRef PrependPathIn) {
}
// Discard remarks with no source location.
-static bool shouldKeepRemark(const Remark &R) { return R.Loc.hasValue(); }
+static bool shouldKeepRemark(const Remark &R) { return R.Loc.has_value(); }
Error RemarkLinker::link(StringRef Buffer, Optional<Format> RemarkFormat) {
if (!RemarkFormat) {
diff --git a/llvm/lib/Remarks/RemarkParser.cpp b/llvm/lib/Remarks/RemarkParser.cpp
index f36767efcbf4..fc0612fb76e2 100644
--- a/llvm/lib/Remarks/RemarkParser.cpp
+++ b/llvm/lib/Remarks/RemarkParser.cpp
@@ -118,7 +118,7 @@ struct CParser {
: createRemarkParser(ParserFormat, Buf))) {}
void handleError(Error E) { Err.emplace(toString(std::move(E))); }
- bool hasError() const { return Err.hasValue(); }
+ bool hasError() const { return Err.has_value(); }
const char *getMessage() const { return Err ? Err->c_str() : nullptr; };
};
} // namespace
diff --git a/llvm/lib/Remarks/YAMLRemarkSerializer.cpp b/llvm/lib/Remarks/YAMLRemarkSerializer.cpp
index 9e965aa4f6c4..fff2b655e821 100644
--- a/llvm/lib/Remarks/YAMLRemarkSerializer.cpp
+++ b/llvm/lib/Remarks/YAMLRemarkSerializer.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Remarks/YAMLRemarkSerializer.h"
+#include "llvm/Remarks/Remark.h"
#include "llvm/Support/FileSystem.h"
using namespace llvm;
@@ -58,8 +59,7 @@ template <> struct MappingTraits<remarks::Remark *> {
if (auto *Serializer = dyn_cast<YAMLStrTabRemarkSerializer>(
reinterpret_cast<RemarkSerializer *>(io.getContext()))) {
- assert(Serializer->StrTab.hasValue() &&
- "YAMLStrTabSerializer with no StrTab.");
+ assert(Serializer->StrTab && "YAMLStrTabSerializer with no StrTab.");
StringTable &StrTab = *Serializer->StrTab;
unsigned PassID = StrTab.add(Remark->PassName).first;
unsigned NameID = StrTab.add(Remark->RemarkName).first;
@@ -83,8 +83,7 @@ template <> struct MappingTraits<RemarkLocation> {
if (auto *Serializer = dyn_cast<YAMLStrTabRemarkSerializer>(
reinterpret_cast<RemarkSerializer *>(io.getContext()))) {
- assert(Serializer->StrTab.hasValue() &&
- "YAMLStrTabSerializer with no StrTab.");
+ assert(Serializer->StrTab && "YAMLStrTabSerializer with no StrTab.");
StringTable &StrTab = *Serializer->StrTab;
unsigned FileID = StrTab.add(File).first;
io.mapRequired("File", FileID);
@@ -138,8 +137,7 @@ template <> struct MappingTraits<Argument> {
if (auto *Serializer = dyn_cast<YAMLStrTabRemarkSerializer>(
reinterpret_cast<RemarkSerializer *>(io.getContext()))) {
- assert(Serializer->StrTab.hasValue() &&
- "YAMLStrTabSerializer with no StrTab.");
+ assert(Serializer->StrTab && "YAMLStrTabSerializer with no StrTab.");
StringTable &StrTab = *Serializer->StrTab;
auto ValueID = StrTab.add(A.Val).first;
io.mapRequired(A.Key.data(), ValueID);
diff --git a/llvm/lib/Support/AArch64TargetParser.cpp b/llvm/lib/Support/AArch64TargetParser.cpp
index cdf7c8ade9aa..e2579bf53260 100644
--- a/llvm/lib/Support/AArch64TargetParser.cpp
+++ b/llvm/lib/Support/AArch64TargetParser.cpp
@@ -64,62 +64,14 @@ bool AArch64::getExtensionFeatures(uint64_t Extensions,
if (Extensions == AArch64::AEK_INVALID)
return false;
- if (Extensions & AEK_FP)
- Features.push_back("+fp-armv8");
- if (Extensions & AEK_SIMD)
- Features.push_back("+neon");
- if (Extensions & AEK_CRC)
- Features.push_back("+crc");
- if (Extensions & AEK_CRYPTO)
- Features.push_back("+crypto");
- if (Extensions & AEK_DOTPROD)
- Features.push_back("+dotprod");
- if (Extensions & AEK_FP16FML)
- Features.push_back("+fp16fml");
- if (Extensions & AEK_FP16)
- Features.push_back("+fullfp16");
- if (Extensions & AEK_PROFILE)
- Features.push_back("+spe");
- if (Extensions & AEK_RAS)
- Features.push_back("+ras");
- if (Extensions & AEK_LSE)
- Features.push_back("+lse");
- if (Extensions & AEK_RDM)
- Features.push_back("+rdm");
- if (Extensions & AEK_SVE)
- Features.push_back("+sve");
- if (Extensions & AEK_SVE2)
- Features.push_back("+sve2");
- if (Extensions & AEK_SVE2AES)
- Features.push_back("+sve2-aes");
- if (Extensions & AEK_SVE2SM4)
- Features.push_back("+sve2-sm4");
- if (Extensions & AEK_SVE2SHA3)
- Features.push_back("+sve2-sha3");
- if (Extensions & AEK_SVE2BITPERM)
- Features.push_back("+sve2-bitperm");
- if (Extensions & AArch64::AEK_TME)
- Features.push_back("+tme");
- if (Extensions & AEK_RCPC)
- Features.push_back("+rcpc");
- if (Extensions & AEK_BRBE)
- Features.push_back("+brbe");
- if (Extensions & AEK_PAUTH)
- Features.push_back("+pauth");
- if (Extensions & AEK_FLAGM)
- Features.push_back("+flagm");
- if (Extensions & AArch64::AEK_SME)
- Features.push_back("+sme");
- if (Extensions & AArch64::AEK_SMEF64)
- Features.push_back("+sme-f64");
- if (Extensions & AArch64::AEK_SMEI64)
- Features.push_back("+sme-i64");
- if (Extensions & AArch64::AEK_HBC)
- Features.push_back("+hbc");
- if (Extensions & AArch64::AEK_MOPS)
- Features.push_back("+mops");
- if (Extensions & AArch64::AEK_PERFMON)
- Features.push_back("+perfmon");
+#define AARCH64_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE) \
+ if (Extensions & ID) { \
+ const char *feature = FEATURE; \
+ /* INVALID and NONE have no feature name. */ \
+ if (feature) \
+ Features.push_back(feature); \
+ }
+#include "../../include/llvm/Support/AArch64TargetParser.def"
return true;
}
diff --git a/llvm/lib/Support/APFixedPoint.cpp b/llvm/lib/Support/APFixedPoint.cpp
index 61b30b5c5c60..f1d07184793c 100644
--- a/llvm/lib/Support/APFixedPoint.cpp
+++ b/llvm/lib/Support/APFixedPoint.cpp
@@ -233,11 +233,11 @@ APFixedPoint APFixedPoint::mul(const APFixedPoint &Other,
// Widen the LHS and RHS so we can perform a full multiplication.
unsigned Wide = CommonFXSema.getWidth() * 2;
if (CommonFXSema.isSigned()) {
- ThisVal = ThisVal.sextOrSelf(Wide);
- OtherVal = OtherVal.sextOrSelf(Wide);
+ ThisVal = ThisVal.sext(Wide);
+ OtherVal = OtherVal.sext(Wide);
} else {
- ThisVal = ThisVal.zextOrSelf(Wide);
- OtherVal = OtherVal.zextOrSelf(Wide);
+ ThisVal = ThisVal.zext(Wide);
+ OtherVal = OtherVal.zext(Wide);
}
// Perform the full multiplication and downscale to get the same scale.
@@ -290,11 +290,11 @@ APFixedPoint APFixedPoint::div(const APFixedPoint &Other,
// Widen the LHS and RHS so we can perform a full division.
unsigned Wide = CommonFXSema.getWidth() * 2;
if (CommonFXSema.isSigned()) {
- ThisVal = ThisVal.sextOrSelf(Wide);
- OtherVal = OtherVal.sextOrSelf(Wide);
+ ThisVal = ThisVal.sext(Wide);
+ OtherVal = OtherVal.sext(Wide);
} else {
- ThisVal = ThisVal.zextOrSelf(Wide);
- OtherVal = OtherVal.zextOrSelf(Wide);
+ ThisVal = ThisVal.zext(Wide);
+ OtherVal = OtherVal.zext(Wide);
}
// Upscale to compensate for the loss of precision from division, and
@@ -340,9 +340,9 @@ APFixedPoint APFixedPoint::shl(unsigned Amt, bool *Overflow) const {
// Widen the LHS.
unsigned Wide = Sema.getWidth() * 2;
if (Sema.isSigned())
- ThisVal = ThisVal.sextOrSelf(Wide);
+ ThisVal = ThisVal.sext(Wide);
else
- ThisVal = ThisVal.zextOrSelf(Wide);
+ ThisVal = ThisVal.zext(Wide);
// Clamp the shift amount at the original width, and perform the shift.
Amt = std::min(Amt, ThisVal.getBitWidth());
diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp
index 4b75c9db8526..2ae28fe066cd 100644
--- a/llvm/lib/Support/APFloat.cpp
+++ b/llvm/lib/Support/APFloat.cpp
@@ -2213,8 +2213,11 @@ IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
// when truncating from PowerPC double-double to double format), the
// right shift could lose result mantissa bits. Adjust exponent instead
// of performing excessive shift.
+ // Also do a similar trick in case shifting denormal would produce zero
+ // significand as this case isn't handled correctly by normalize.
if (shift < 0 && isFiniteNonZero()) {
- int exponentChange = significandMSB() + 1 - fromSemantics.precision;
+ int omsb = significandMSB() + 1;
+ int exponentChange = omsb - fromSemantics.precision;
if (exponent + exponentChange < toSemantics.minExponent)
exponentChange = toSemantics.minExponent - exponent;
if (exponentChange < shift)
@@ -2222,6 +2225,10 @@ IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
if (exponentChange < 0) {
shift -= exponentChange;
exponent += exponentChange;
+ } else if (omsb <= -shift) {
+ exponentChange = omsb + shift - 1; // leave at least one bit set
+ shift -= exponentChange;
+ exponent += exponentChange;
}
}
diff --git a/llvm/lib/Support/APInt.cpp b/llvm/lib/Support/APInt.cpp
index b536e9a9a6d0..f74178b1ba4e 100644
--- a/llvm/lib/Support/APInt.cpp
+++ b/llvm/lib/Support/APInt.cpp
@@ -343,7 +343,7 @@ void APInt::flipAllBitsSlowCase() {
/// In the slow case, we know the result is large.
APInt APInt::concatSlowCase(const APInt &NewLSB) const {
unsigned NewWidth = getBitWidth() + NewLSB.getBitWidth();
- APInt Result = NewLSB.zextOrSelf(NewWidth);
+ APInt Result = NewLSB.zext(NewWidth);
Result.insertBits(*this, NewLSB.getBitWidth());
return Result;
}
@@ -502,12 +502,51 @@ uint64_t APInt::extractBitsAsZExtValue(unsigned numBits,
return retBits;
}
+unsigned APInt::getSufficientBitsNeeded(StringRef Str, uint8_t Radix) {
+ assert(!Str.empty() && "Invalid string length");
+ size_t StrLen = Str.size();
+
+ // Each computation below needs to know if it's negative.
+ unsigned IsNegative = false;
+ if (Str[0] == '-' || Str[0] == '+') {
+ IsNegative = Str[0] == '-';
+ StrLen--;
+ assert(StrLen && "String is only a sign, needs a value.");
+ }
+
+ // For radixes of power-of-two values, the bits required is accurately and
+ // easily computed.
+ if (Radix == 2)
+ return StrLen + IsNegative;
+ if (Radix == 8)
+ return StrLen * 3 + IsNegative;
+ if (Radix == 16)
+ return StrLen * 4 + IsNegative;
+
+ // Compute a sufficient number of bits that is always large enough but might
+ // be too large. This avoids the assertion in the constructor. This
+ // calculation doesn't work appropriately for the numbers 0-9, so just use 4
+ // bits in that case.
+ if (Radix == 10)
+ return (StrLen == 1 ? 4 : StrLen * 64 / 18) + IsNegative;
+
+ assert(Radix == 36);
+ return (StrLen == 1 ? 7 : StrLen * 16 / 3) + IsNegative;
+}
+
unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
- assert(!str.empty() && "Invalid string length");
- assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
- radix == 36) &&
- "Radix should be 2, 8, 10, 16, or 36!");
+ // Compute a sufficient number of bits that is always large enough but might
+ // be too large.
+ unsigned sufficient = getSufficientBitsNeeded(str, radix);
+
+ // For bases 2, 8, and 16, the sufficient number of bits is exact and we can
+ // return the value directly. For bases 10 and 36, we need to do extra work.
+ if (radix == 2 || radix == 8 || radix == 16)
+ return sufficient;
+ // This is grossly inefficient but accurate. We could probably do something
+ // with a computation of roughly slen*64/20 and then adjust by the value of
+ // the first few digits. But, I'm not sure how accurate that could be.
size_t slen = str.size();
// Each computation below needs to know if it's negative.
@@ -519,28 +558,6 @@ unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
assert(slen && "String is only a sign, needs a value.");
}
- // For radixes of power-of-two values, the bits required is accurately and
- // easily computed
- if (radix == 2)
- return slen + isNegative;
- if (radix == 8)
- return slen * 3 + isNegative;
- if (radix == 16)
- return slen * 4 + isNegative;
-
- // FIXME: base 36
-
- // This is grossly inefficient but accurate. We could probably do something
- // with a computation of roughly slen*64/20 and then adjust by the value of
- // the first few digits. But, I'm not sure how accurate that could be.
-
- // Compute a sufficient number of bits that is always large enough but might
- // be too large. This avoids the assertion in the constructor. This
- // calculation doesn't work appropriately for the numbers 0-9, so just use 4
- // bits in that case.
- unsigned sufficient
- = radix == 10? (slen == 1 ? 4 : slen * 64/18)
- : (slen == 1 ? 7 : slen * 16/3);
// Convert to the actual binary value.
APInt tmp(sufficient, StringRef(p, slen), radix);
@@ -595,7 +612,7 @@ APInt APInt::getLoBits(unsigned numBits) const {
APInt APInt::getSplat(unsigned NewLen, const APInt &V) {
assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");
- APInt Val = V.zextOrSelf(NewLen);
+ APInt Val = V.zext(NewLen);
for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1)
Val |= Val << I;
@@ -879,11 +896,14 @@ double APInt::roundToDouble(bool isSigned) const {
// Truncate to new width.
APInt APInt::trunc(unsigned width) const {
- assert(width < BitWidth && "Invalid APInt Truncate request");
+ assert(width <= BitWidth && "Invalid APInt Truncate request");
if (width <= APINT_BITS_PER_WORD)
return APInt(width, getRawData()[0]);
+ if (width == BitWidth)
+ return *this;
+
APInt Result(getMemory(getNumWords(width)), width);
// Copy full words.
@@ -901,7 +921,7 @@ APInt APInt::trunc(unsigned width) const {
// Truncate to new width with unsigned saturation.
APInt APInt::truncUSat(unsigned width) const {
- assert(width < BitWidth && "Invalid APInt Truncate request");
+ assert(width <= BitWidth && "Invalid APInt Truncate request");
// Can we just losslessly truncate it?
if (isIntN(width))
@@ -912,7 +932,7 @@ APInt APInt::truncUSat(unsigned width) const {
// Truncate to new width with signed saturation.
APInt APInt::truncSSat(unsigned width) const {
- assert(width < BitWidth && "Invalid APInt Truncate request");
+ assert(width <= BitWidth && "Invalid APInt Truncate request");
// Can we just losslessly truncate it?
if (isSignedIntN(width))
@@ -924,11 +944,14 @@ APInt APInt::truncSSat(unsigned width) const {
// Sign extend to a new width.
APInt APInt::sext(unsigned Width) const {
- assert(Width > BitWidth && "Invalid APInt SignExtend request");
+ assert(Width >= BitWidth && "Invalid APInt SignExtend request");
if (Width <= APINT_BITS_PER_WORD)
return APInt(Width, SignExtend64(U.VAL, BitWidth));
+ if (Width == BitWidth)
+ return *this;
+
APInt Result(getMemory(getNumWords(Width)), Width);
// Copy words.
@@ -948,11 +971,14 @@ APInt APInt::sext(unsigned Width) const {
// Zero extend to a new width.
APInt APInt::zext(unsigned width) const {
- assert(width > BitWidth && "Invalid APInt ZeroExtend request");
+ assert(width >= BitWidth && "Invalid APInt ZeroExtend request");
if (width <= APINT_BITS_PER_WORD)
return APInt(width, U.VAL);
+ if (width == BitWidth)
+ return *this;
+
APInt Result(getMemory(getNumWords(width)), width);
// Copy words.
@@ -981,24 +1007,6 @@ APInt APInt::sextOrTrunc(unsigned width) const {
return *this;
}
-APInt APInt::truncOrSelf(unsigned width) const {
- if (BitWidth > width)
- return trunc(width);
- return *this;
-}
-
-APInt APInt::zextOrSelf(unsigned width) const {
- if (BitWidth < width)
- return zext(width);
- return *this;
-}
-
-APInt APInt::sextOrSelf(unsigned width) const {
- if (BitWidth < width)
- return sext(width);
- return *this;
-}
-
/// Arithmetic right-shift this APInt by shiftAmt.
/// Arithmetic right-shift function.
void APInt::ashrInPlace(const APInt &shiftAmt) {
@@ -2960,7 +2968,8 @@ llvm::APIntOps::GetMostSignificantDifferentBit(const APInt &A, const APInt &B) {
return A.getBitWidth() - ((A ^ B).countLeadingZeros() + 1);
}
-APInt llvm::APIntOps::ScaleBitMask(const APInt &A, unsigned NewBitWidth) {
+APInt llvm::APIntOps::ScaleBitMask(const APInt &A, unsigned NewBitWidth,
+ bool MatchAllBits) {
unsigned OldBitWidth = A.getBitWidth();
assert((((OldBitWidth % NewBitWidth) == 0) ||
((NewBitWidth % OldBitWidth) == 0)) &&
@@ -2984,11 +2993,16 @@ APInt llvm::APIntOps::ScaleBitMask(const APInt &A, unsigned NewBitWidth) {
if (A[i])
NewA.setBits(i * Scale, (i + 1) * Scale);
} else {
- // Merge bits - if any old bit is set, then set scale equivalent new bit.
unsigned Scale = OldBitWidth / NewBitWidth;
- for (unsigned i = 0; i != NewBitWidth; ++i)
- if (!A.extractBits(Scale, i * Scale).isZero())
- NewA.setBit(i);
+ for (unsigned i = 0; i != NewBitWidth; ++i) {
+ if (MatchAllBits) {
+ if (A.extractBits(Scale, i * Scale).isAllOnes())
+ NewA.setBit(i);
+ } else {
+ if (!A.extractBits(Scale, i * Scale).isZero())
+ NewA.setBit(i);
+ }
+ }
}
return NewA;
diff --git a/llvm/lib/Support/ARMAttributeParser.cpp b/llvm/lib/Support/ARMAttributeParser.cpp
index 9ba224cee0ca..adb5d3f0964d 100644
--- a/llvm/lib/Support/ARMAttributeParser.cpp
+++ b/llvm/lib/Support/ARMAttributeParser.cpp
@@ -87,7 +87,7 @@ Error ARMAttributeParser::CPU_arch(AttrType tag) {
"ARM v6KZ", "ARM v6T2", "ARM v6K", "ARM v7", "ARM v6-M", "ARM v6S-M",
"ARM v7E-M", "ARM v8", nullptr,
"ARM v8-M Baseline", "ARM v8-M Mainline", nullptr, nullptr, nullptr,
- "ARM v8.1-M Mainline"
+ "ARM v8.1-M Mainline", "ARM v9-A"
};
return parseStringAttribute("CPU_arch", tag, makeArrayRef(strings));
}
diff --git a/llvm/lib/Support/ARMWinEH.cpp b/llvm/lib/Support/ARMWinEH.cpp
index 8e7fa1149082..29c7a28541f2 100644
--- a/llvm/lib/Support/ARMWinEH.cpp
+++ b/llvm/lib/Support/ARMWinEH.cpp
@@ -11,22 +11,35 @@
namespace llvm {
namespace ARM {
namespace WinEH {
-std::pair<uint16_t, uint32_t> SavedRegisterMask(const RuntimeFunction &RF) {
+std::pair<uint16_t, uint32_t> SavedRegisterMask(const RuntimeFunction &RF,
+ bool Prologue) {
uint8_t NumRegisters = RF.Reg();
uint8_t RegistersVFP = RF.R();
uint8_t LinkRegister = RF.L();
uint8_t ChainedFrame = RF.C();
- uint16_t GPRMask = (ChainedFrame << 11) | (LinkRegister << 14);
+ uint16_t GPRMask = (ChainedFrame << 11);
uint32_t VFPMask = 0;
+ if (Prologue) {
+ GPRMask |= (LinkRegister << 14);
+ } else {
+ // If Ret != 0, we pop into Lr and return later
+ if (RF.Ret() != ReturnType::RT_POP)
+ GPRMask |= (LinkRegister << 14);
+ else if (!RF.H()) // If H == 0, we pop directly into Pc
+ GPRMask |= (LinkRegister << 15);
+ // else, Ret == 0 && H == 1, we pop into Pc separately afterwards
+ }
+
if (RegistersVFP)
VFPMask |= (((1 << ((NumRegisters + 1) % 8)) - 1) << 8);
else
GPRMask |= (((1 << (NumRegisters + 1)) - 1) << 4);
- if (PrologueFolding(RF))
- GPRMask |= (((1 << (NumRegisters + 1)) - 1) << (~RF.StackAdjust() & 0x3));
+ if ((PrologueFolding(RF) && Prologue) || (EpilogueFolding(RF) && !Prologue))
+ GPRMask |= (((1 << ((RF.StackAdjust() & 0x3) + 1)) - 1)
+ << (~RF.StackAdjust() & 0x3));
return std::make_pair(GPRMask, VFPMask);
}
diff --git a/llvm/lib/Support/AddressRanges.cpp b/llvm/lib/Support/AddressRanges.cpp
new file mode 100644
index 000000000000..5ba011bac4e9
--- /dev/null
+++ b/llvm/lib/Support/AddressRanges.cpp
@@ -0,0 +1,59 @@
+//===- AddressRanges.cpp ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/AddressRanges.h"
+#include "llvm/ADT/STLExtras.h"
+#include <inttypes.h>
+
+using namespace llvm;
+
+void AddressRanges::insert(AddressRange Range) {
+ if (Range.size() == 0)
+ return;
+
+ auto It = llvm::upper_bound(Ranges, Range);
+ auto It2 = It;
+ while (It2 != Ranges.end() && It2->start() < Range.end())
+ ++It2;
+ if (It != It2) {
+ Range = {Range.start(), std::max(Range.end(), It2[-1].end())};
+ It = Ranges.erase(It, It2);
+ }
+ if (It != Ranges.begin() && Range.start() < It[-1].end())
+ It[-1] = {It[-1].start(), std::max(It[-1].end(), Range.end())};
+ else
+ Ranges.insert(It, Range);
+}
+
+bool AddressRanges::contains(uint64_t Addr) const {
+ auto It = std::partition_point(
+ Ranges.begin(), Ranges.end(),
+ [=](const AddressRange &R) { return R.start() <= Addr; });
+ return It != Ranges.begin() && Addr < It[-1].end();
+}
+
+bool AddressRanges::contains(AddressRange Range) const {
+ if (Range.size() == 0)
+ return false;
+ auto It = std::partition_point(
+ Ranges.begin(), Ranges.end(),
+ [=](const AddressRange &R) { return R.start() <= Range.start(); });
+ if (It == Ranges.begin())
+ return false;
+ return Range.end() <= It[-1].end();
+}
+
+Optional<AddressRange>
+AddressRanges::getRangeThatContains(uint64_t Addr) const {
+ auto It = std::partition_point(
+ Ranges.begin(), Ranges.end(),
+ [=](const AddressRange &R) { return R.start() <= Addr; });
+ if (It != Ranges.begin() && Addr < It[-1].end())
+ return It[-1];
+ return llvm::None;
+}
diff --git a/llvm/lib/Support/BLAKE3/LICENSE b/llvm/lib/Support/BLAKE3/LICENSE
new file mode 100644
index 000000000000..f5892efc3b9b
--- /dev/null
+++ b/llvm/lib/Support/BLAKE3/LICENSE
@@ -0,0 +1,330 @@
+This work is released into the public domain with CC0 1.0. Alternatively, it is
+licensed under the Apache License 2.0.
+
+-------------------------------------------------------------------------------
+
+Creative Commons Legal Code
+
+CC0 1.0 Universal
+
+ CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
+ LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
+ ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
+ INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
+ REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
+ PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
+ THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
+ HEREUNDER.
+
+Statement of Purpose
+
+The laws of most jurisdictions throughout the world automatically confer
+exclusive Copyright and Related Rights (defined below) upon the creator
+and subsequent owner(s) (each and all, an "owner") of an original work of
+authorship and/or a database (each, a "Work").
+
+Certain owners wish to permanently relinquish those rights to a Work for
+the purpose of contributing to a commons of creative, cultural and
+scientific works ("Commons") that the public can reliably and without fear
+of later claims of infringement build upon, modify, incorporate in other
+works, reuse and redistribute as freely as possible in any form whatsoever
+and for any purposes, including without limitation commercial purposes.
+These owners may contribute to the Commons to promote the ideal of a free
+culture and the further production of creative, cultural and scientific
+works, or to gain reputation or greater distribution for their Work in
+part through the use and efforts of others.
+
+For these and/or other purposes and motivations, and without any
+expectation of additional consideration or compensation, the person
+associating CC0 with a Work (the "Affirmer"), to the extent that he or she
+is an owner of Copyright and Related Rights in the Work, voluntarily
+elects to apply CC0 to the Work and publicly distribute the Work under its
+terms, with knowledge of his or her Copyright and Related Rights in the
+Work and the meaning and intended legal effect of CC0 on those rights.
+
+1. Copyright and Related Rights. A Work made available under CC0 may be
+protected by copyright and related or neighboring rights ("Copyright and
+Related Rights"). Copyright and Related Rights include, but are not
+limited to, the following:
+
+ i. the right to reproduce, adapt, distribute, perform, display,
+ communicate, and translate a Work;
+ ii. moral rights retained by the original author(s) and/or performer(s);
+iii. publicity and privacy rights pertaining to a person's image or
+ likeness depicted in a Work;
+ iv. rights protecting against unfair competition in regards to a Work,
+ subject to the limitations in paragraph 4(a), below;
+ v. rights protecting the extraction, dissemination, use and reuse of data
+ in a Work;
+ vi. database rights (such as those arising under Directive 96/9/EC of the
+ European Parliament and of the Council of 11 March 1996 on the legal
+ protection of databases, and under any national implementation
+ thereof, including any amended or successor version of such
+ directive); and
+vii. other similar, equivalent or corresponding rights throughout the
+ world based on applicable law or treaty, and any national
+ implementations thereof.
+
+2. Waiver. To the greatest extent permitted by, but not in contravention
+of, applicable law, Affirmer hereby overtly, fully, permanently,
+irrevocably and unconditionally waives, abandons, and surrenders all of
+Affirmer's Copyright and Related Rights and associated claims and causes
+of action, whether now known or unknown (including existing as well as
+future claims and causes of action), in the Work (i) in all territories
+worldwide, (ii) for the maximum duration provided by applicable law or
+treaty (including future time extensions), (iii) in any current or future
+medium and for any number of copies, and (iv) for any purpose whatsoever,
+including without limitation commercial, advertising or promotional
+purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
+member of the public at large and to the detriment of Affirmer's heirs and
+successors, fully intending that such Waiver shall not be subject to
+revocation, rescission, cancellation, termination, or any other legal or
+equitable action to disrupt the quiet enjoyment of the Work by the public
+as contemplated by Affirmer's express Statement of Purpose.
+
+3. Public License Fallback. Should any part of the Waiver for any reason
+be judged legally invalid or ineffective under applicable law, then the
+Waiver shall be preserved to the maximum extent permitted taking into
+account Affirmer's express Statement of Purpose. In addition, to the
+extent the Waiver is so judged Affirmer hereby grants to each affected
+person a royalty-free, non transferable, non sublicensable, non exclusive,
+irrevocable and unconditional license to exercise Affirmer's Copyright and
+Related Rights in the Work (i) in all territories worldwide, (ii) for the
+maximum duration provided by applicable law or treaty (including future
+time extensions), (iii) in any current or future medium and for any number
+of copies, and (iv) for any purpose whatsoever, including without
+limitation commercial, advertising or promotional purposes (the
+"License"). The License shall be deemed effective as of the date CC0 was
+applied by Affirmer to the Work. Should any part of the License for any
+reason be judged legally invalid or ineffective under applicable law, such
+partial invalidity or ineffectiveness shall not invalidate the remainder
+of the License, and in such case Affirmer hereby affirms that he or she
+will not (i) exercise any of his or her remaining Copyright and Related
+Rights in the Work or (ii) assert any associated claims and causes of
+action with respect to the Work, in either case contrary to Affirmer's
+express Statement of Purpose.
+
+4. Limitations and Disclaimers.
+
+ a. No trademark or patent rights held by Affirmer are waived, abandoned,
+ surrendered, licensed or otherwise affected by this document.
+ b. Affirmer offers the Work as-is and makes no representations or
+ warranties of any kind concerning the Work, express, implied,
+ statutory or otherwise, including without limitation warranties of
+ title, merchantability, fitness for a particular purpose, non
+ infringement, or the absence of latent or other defects, accuracy, or
+ the present or absence of errors, whether or not discoverable, all to
+ the greatest extent permissible under applicable law.
+ c. Affirmer disclaims responsibility for clearing rights of other persons
+ that may apply to the Work or any use thereof, including without
+ limitation any person's Copyright and Related Rights in the Work.
+ Further, Affirmer disclaims responsibility for obtaining any necessary
+ consents, permissions or other rights required for any use of the
+ Work.
+ d. Affirmer understands and acknowledges that Creative Commons is not a
+ party to this document and has no duty or obligation with respect to
+ this CC0 or use of the Work.
+
+-------------------------------------------------------------------------------
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright 2019 Jack O'Connor and Samuel Neves
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/llvm/lib/Support/BLAKE3/README.md b/llvm/lib/Support/BLAKE3/README.md
new file mode 100644
index 000000000000..319a7514e8b5
--- /dev/null
+++ b/llvm/lib/Support/BLAKE3/README.md
@@ -0,0 +1,296 @@
+Implementation of BLAKE3, originating from https://github.com/BLAKE3-team/BLAKE3/tree/1.3.1/c
+
+# Example
+
+An example program that hashes bytes from standard input and prints the
+result:
+
+Using the C++ API:
+
+```c++
+#include "llvm/Support/BLAKE3.h"
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+int main() {
+ // Initialize the hasher.
+ llvm::BLAKE3 hasher;
+
+ // Read input bytes from stdin.
+ char buf[65536];
+ while (1) {
+ ssize_t n = read(STDIN_FILENO, buf, sizeof(buf));
+ if (n > 0) {
+ hasher.update(llvm::StringRef(buf, n));
+ } else if (n == 0) {
+ break; // end of file
+ } else {
+ fprintf(stderr, "read failed: %s\n", strerror(errno));
+ exit(1);
+ }
+ }
+
+ // Finalize the hash. Default output length is 32 bytes.
+ auto output = hasher.final();
+
+ // Print the hash as hexadecimal.
+ for (uint8_t byte : output) {
+ printf("%02x", byte);
+ }
+ printf("\n");
+ return 0;
+}
+```
+
+Using the C API:
+
+```c
+#include "llvm-c/blake3.h"
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+int main() {
+ // Initialize the hasher.
+ llvm_blake3_hasher hasher;
+ llvm_blake3_hasher_init(&hasher);
+
+ // Read input bytes from stdin.
+ unsigned char buf[65536];
+ while (1) {
+ ssize_t n = read(STDIN_FILENO, buf, sizeof(buf));
+ if (n > 0) {
+ llvm_blake3_hasher_update(&hasher, buf, n);
+ } else if (n == 0) {
+ break; // end of file
+ } else {
+ fprintf(stderr, "read failed: %s\n", strerror(errno));
+ exit(1);
+ }
+ }
+
+ // Finalize the hash. LLVM_BLAKE3_OUT_LEN is the default output length, 32 bytes.
+ uint8_t output[LLVM_BLAKE3_OUT_LEN];
+ llvm_blake3_hasher_finalize(&hasher, output, LLVM_BLAKE3_OUT_LEN);
+
+ // Print the hash as hexadecimal.
+ for (size_t i = 0; i < LLVM_BLAKE3_OUT_LEN; i++) {
+ printf("%02x", output[i]);
+ }
+ printf("\n");
+ return 0;
+}
+```
+
+# API
+
+## The Class/Struct
+
+```c++
+class BLAKE3 {
+ // API
+private:
+ llvm_blake3_hasher Hasher;
+};
+```
+```c
+typedef struct {
+ // private fields
+} llvm_blake3_hasher;
+```
+
+An incremental BLAKE3 hashing state, which can accept any number of
+updates. This implementation doesn't allocate any heap memory, but
+`sizeof(llvm_blake3_hasher)` itself is relatively large, currently 1912 bytes
+on x86-64. This size can be reduced by restricting the maximum input
+length, as described in Section 5.4 of [the BLAKE3
+spec](https://github.com/BLAKE3-team/BLAKE3-specs/blob/master/blake3.pdf),
+but this implementation doesn't currently support that strategy.
+
+## Common API Functions
+
+```c++
+BLAKE3::BLAKE3();
+
+void BLAKE3::init();
+```
+```c
+void llvm_blake3_hasher_init(
+ llvm_blake3_hasher *self);
+```
+
+Initialize a `llvm_blake3_hasher` in the default hashing mode.
+
+---
+
+```c++
+void BLAKE3::update(ArrayRef<uint8_t> Data);
+
+void BLAKE3::update(StringRef Str);
+```
+```c
+void llvm_blake3_hasher_update(
+ llvm_blake3_hasher *self,
+ const void *input,
+ size_t input_len);
+```
+
+Add input to the hasher. This can be called any number of times.
+
+---
+
+```c++
+template <size_t NumBytes = LLVM_BLAKE3_OUT_LEN>
+using BLAKE3Result = std::array<uint8_t, NumBytes>;
+
+template <size_t NumBytes = LLVM_BLAKE3_OUT_LEN>
+void BLAKE3::final(BLAKE3Result<NumBytes> &Result);
+
+template <size_t NumBytes = LLVM_BLAKE3_OUT_LEN>
+BLAKE3Result<NumBytes> BLAKE3::final();
+```
+```c
+void llvm_blake3_hasher_finalize(
+ const llvm_blake3_hasher *self,
+ uint8_t *out,
+ size_t out_len);
+```
+
+Finalize the hasher and return an output of any length, given in bytes.
+This doesn't modify the hasher itself, and it's possible to finalize
+again after adding more input. The constant `LLVM_BLAKE3_OUT_LEN` provides
+the default output length, 32 bytes, which is recommended for most
+callers.
+
+Outputs shorter than the default length of 32 bytes (256 bits) provide
+less security. An N-bit BLAKE3 output is intended to provide N bits of
+first and second preimage resistance and N/2 bits of collision
+resistance, for any N up to 256. Longer outputs don't provide any
+additional security.
+
+Shorter BLAKE3 outputs are prefixes of longer ones. Explicitly
+requesting a short output is equivalent to truncating the default-length
+output. (Note that this is different between BLAKE2 and BLAKE3.)
+
+## Less Common API Functions
+
+```c
+void llvm_blake3_hasher_init_keyed(
+ llvm_blake3_hasher *self,
+ const uint8_t key[LLVM_BLAKE3_KEY_LEN]);
+```
+
+Initialize a `llvm_blake3_hasher` in the keyed hashing mode. The key must be
+exactly 32 bytes.
+
+---
+
+```c
+void llvm_blake3_hasher_init_derive_key(
+ llvm_blake3_hasher *self,
+ const char *context);
+```
+
+Initialize a `llvm_blake3_hasher` in the key derivation mode. The context
+string is given as an initialization parameter, and afterwards input key
+material should be given with `llvm_blake3_hasher_update`. The context string
+is a null-terminated C string which should be **hardcoded, globally
+unique, and application-specific**. The context string should not
+include any dynamic input like salts, nonces, or identifiers read from a
+database at runtime. A good default format for the context string is
+`"[application] [commit timestamp] [purpose]"`, e.g., `"example.com
+2019-12-25 16:18:03 session tokens v1"`.
+
+This function is intended for application code written in C. For
+language bindings, see `llvm_blake3_hasher_init_derive_key_raw` below.
+
+---
+
+```c
+void llvm_blake3_hasher_init_derive_key_raw(
+ llvm_blake3_hasher *self,
+ const void *context,
+ size_t context_len);
+```
+
+As `llvm_blake3_hasher_init_derive_key` above, except that the context string
+is given as a pointer to an array of arbitrary bytes with a provided
+length. This is intended for writing language bindings, where C string
+conversion would add unnecessary overhead and new error cases. Unicode
+strings should be encoded as UTF-8.
+
+Application code in C should prefer `llvm_blake3_hasher_init_derive_key`,
+which takes the context as a C string. If you need to use arbitrary
+bytes as a context string in application code, consider whether you're
+violating the requirement that context strings should be hardcoded.
+
+---
+
+```c
+void llvm_blake3_hasher_finalize_seek(
+ const llvm_blake3_hasher *self,
+ uint64_t seek,
+ uint8_t *out,
+ size_t out_len);
+```
+
+The same as `llvm_blake3_hasher_finalize`, but with an additional `seek`
+parameter for the starting byte position in the output stream. To
+efficiently stream a large output without allocating memory, call this
+function in a loop, incrementing `seek` by the output length each time.
+
+---
+
+```c
+void llvm_blake3_hasher_reset(
+ llvm_blake3_hasher *self);
+```
+
+Reset the hasher to its initial state, prior to any calls to
+`llvm_blake3_hasher_update`. Currently this is no different from calling
+`llvm_blake3_hasher_init` or similar again. However, if this implementation gains
+multithreading support in the future, and if `llvm_blake3_hasher` holds (optional)
+threading resources, this function will reuse those resources.
+
+
+# Building
+
+This implementation is just C and assembly files.
+
+## x86
+
+Dynamic dispatch is enabled by default on x86. The implementation will
+query the CPU at runtime to detect SIMD support, and it will use the
+widest instruction set available. By default, `blake3_dispatch.c`
+expects to be linked with code for five different instruction sets:
+portable C, SSE2, SSE4.1, AVX2, and AVX-512.
+
+For each of the x86 SIMD instruction sets, four versions are available:
+three flavors of assembly (Unix, Windows MSVC, and Windows GNU) and one
+version using C intrinsics. The assembly versions are generally
+preferred. They perform better, they perform more consistently across
+different compilers, and they build more quickly. On the other hand, the
+assembly versions are x86\_64-only, and you need to select the right
+flavor for your target platform.
+
+## ARM NEON
+
+The NEON implementation is enabled by default on AArch64, but not on
+other ARM targets, since not all of them support it. To enable it, set
+`BLAKE3_USE_NEON=1`.
+
+To explicitiy disable using NEON instructions on AArch64, set
+`BLAKE3_USE_NEON=0`.
+
+## Other Platforms
+
+The portable implementation should work on most other architectures.
+
+# Multithreading
+
+The implementation doesn't currently support multithreading.
diff --git a/llvm/lib/Support/BLAKE3/blake3.c b/llvm/lib/Support/BLAKE3/blake3.c
new file mode 100644
index 000000000000..a369452a3e75
--- /dev/null
+++ b/llvm/lib/Support/BLAKE3/blake3.c
@@ -0,0 +1,627 @@
+/*===-- blake3.c - BLAKE3 C Implementation ------------------------*- C -*-===*\
+|* *|
+|* Released into the public domain with CC0 1.0 *|
+|* See 'llvm/lib/Support/BLAKE3/LICENSE' for info. *|
+|* SPDX-License-Identifier: CC0-1.0 *|
+|* *|
+\*===----------------------------------------------------------------------===*/
+
+#include <assert.h>
+#include <stdbool.h>
+#include <string.h>
+
+#include "blake3_impl.h"
+
+const char *llvm_blake3_version(void) { return BLAKE3_VERSION_STRING; }
+
+INLINE void chunk_state_init(blake3_chunk_state *self, const uint32_t key[8],
+ uint8_t flags) {
+ memcpy(self->cv, key, BLAKE3_KEY_LEN);
+ self->chunk_counter = 0;
+ memset(self->buf, 0, BLAKE3_BLOCK_LEN);
+ self->buf_len = 0;
+ self->blocks_compressed = 0;
+ self->flags = flags;
+}
+
+INLINE void chunk_state_reset(blake3_chunk_state *self, const uint32_t key[8],
+ uint64_t chunk_counter) {
+ memcpy(self->cv, key, BLAKE3_KEY_LEN);
+ self->chunk_counter = chunk_counter;
+ self->blocks_compressed = 0;
+ memset(self->buf, 0, BLAKE3_BLOCK_LEN);
+ self->buf_len = 0;
+}
+
+INLINE size_t chunk_state_len(const blake3_chunk_state *self) {
+ return (BLAKE3_BLOCK_LEN * (size_t)self->blocks_compressed) +
+ ((size_t)self->buf_len);
+}
+
+INLINE size_t chunk_state_fill_buf(blake3_chunk_state *self,
+ const uint8_t *input, size_t input_len) {
+ size_t take = BLAKE3_BLOCK_LEN - ((size_t)self->buf_len);
+ if (take > input_len) {
+ take = input_len;
+ }
+ uint8_t *dest = self->buf + ((size_t)self->buf_len);
+ memcpy(dest, input, take);
+ self->buf_len += (uint8_t)take;
+ return take;
+}
+
+INLINE uint8_t chunk_state_maybe_start_flag(const blake3_chunk_state *self) {
+ if (self->blocks_compressed == 0) {
+ return CHUNK_START;
+ } else {
+ return 0;
+ }
+}
+
+typedef struct {
+ uint32_t input_cv[8];
+ uint64_t counter;
+ uint8_t block[BLAKE3_BLOCK_LEN];
+ uint8_t block_len;
+ uint8_t flags;
+} output_t;
+
+INLINE output_t make_output(const uint32_t input_cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN],
+ uint8_t block_len, uint64_t counter,
+ uint8_t flags) {
+ output_t ret;
+ memcpy(ret.input_cv, input_cv, 32);
+ memcpy(ret.block, block, BLAKE3_BLOCK_LEN);
+ ret.block_len = block_len;
+ ret.counter = counter;
+ ret.flags = flags;
+ return ret;
+}
+
+// Chaining values within a given chunk (specifically the compress_in_place
+// interface) are represented as words. This avoids unnecessary bytes<->words
+// conversion overhead in the portable implementation. However, the hash_many
+// interface handles both user input and parent node blocks, so it accepts
+// bytes. For that reason, chaining values in the CV stack are represented as
+// bytes.
+INLINE void output_chaining_value(const output_t *self, uint8_t cv[32]) {
+ uint32_t cv_words[8];
+ memcpy(cv_words, self->input_cv, 32);
+ blake3_compress_in_place(cv_words, self->block, self->block_len,
+ self->counter, self->flags);
+ store_cv_words(cv, cv_words);
+}
+
+INLINE void output_root_bytes(const output_t *self, uint64_t seek, uint8_t *out,
+ size_t out_len) {
+ uint64_t output_block_counter = seek / 64;
+ size_t offset_within_block = seek % 64;
+ uint8_t wide_buf[64];
+ while (out_len > 0) {
+ blake3_compress_xof(self->input_cv, self->block, self->block_len,
+ output_block_counter, self->flags | ROOT, wide_buf);
+ size_t available_bytes = 64 - offset_within_block;
+ size_t memcpy_len;
+ if (out_len > available_bytes) {
+ memcpy_len = available_bytes;
+ } else {
+ memcpy_len = out_len;
+ }
+ memcpy(out, wide_buf + offset_within_block, memcpy_len);
+ out += memcpy_len;
+ out_len -= memcpy_len;
+ output_block_counter += 1;
+ offset_within_block = 0;
+ }
+}
+
+INLINE void chunk_state_update(blake3_chunk_state *self, const uint8_t *input,
+ size_t input_len) {
+ if (self->buf_len > 0) {
+ size_t take = chunk_state_fill_buf(self, input, input_len);
+ input += take;
+ input_len -= take;
+ if (input_len > 0) {
+ blake3_compress_in_place(
+ self->cv, self->buf, BLAKE3_BLOCK_LEN, self->chunk_counter,
+ self->flags | chunk_state_maybe_start_flag(self));
+ self->blocks_compressed += 1;
+ self->buf_len = 0;
+ memset(self->buf, 0, BLAKE3_BLOCK_LEN);
+ }
+ }
+
+ while (input_len > BLAKE3_BLOCK_LEN) {
+ blake3_compress_in_place(self->cv, input, BLAKE3_BLOCK_LEN,
+ self->chunk_counter,
+ self->flags | chunk_state_maybe_start_flag(self));
+ self->blocks_compressed += 1;
+ input += BLAKE3_BLOCK_LEN;
+ input_len -= BLAKE3_BLOCK_LEN;
+ }
+
+ size_t take = chunk_state_fill_buf(self, input, input_len);
+ input += take;
+ input_len -= take;
+}
+
+INLINE output_t chunk_state_output(const blake3_chunk_state *self) {
+ uint8_t block_flags =
+ self->flags | chunk_state_maybe_start_flag(self) | CHUNK_END;
+ return make_output(self->cv, self->buf, self->buf_len, self->chunk_counter,
+ block_flags);
+}
+
+INLINE output_t parent_output(const uint8_t block[BLAKE3_BLOCK_LEN],
+ const uint32_t key[8], uint8_t flags) {
+ return make_output(key, block, BLAKE3_BLOCK_LEN, 0, flags | PARENT);
+}
+
+// Given some input larger than one chunk, return the number of bytes that
+// should go in the left subtree. This is the largest power-of-2 number of
+// chunks that leaves at least 1 byte for the right subtree.
+INLINE size_t left_len(size_t content_len) {
+ // Subtract 1 to reserve at least one byte for the right side. content_len
+ // should always be greater than BLAKE3_CHUNK_LEN.
+ size_t full_chunks = (content_len - 1) / BLAKE3_CHUNK_LEN;
+ return round_down_to_power_of_2(full_chunks) * BLAKE3_CHUNK_LEN;
+}
+
+// Use SIMD parallelism to hash up to MAX_SIMD_DEGREE chunks at the same time
+// on a single thread. Write out the chunk chaining values and return the
+// number of chunks hashed. These chunks are never the root and never empty;
+// those cases use a different codepath.
+INLINE size_t compress_chunks_parallel(const uint8_t *input, size_t input_len,
+ const uint32_t key[8],
+ uint64_t chunk_counter, uint8_t flags,
+ uint8_t *out) {
+#if defined(BLAKE3_TESTING)
+ assert(0 < input_len);
+ assert(input_len <= MAX_SIMD_DEGREE * BLAKE3_CHUNK_LEN);
+#endif
+
+ const uint8_t *chunks_array[MAX_SIMD_DEGREE];
+ size_t input_position = 0;
+ size_t chunks_array_len = 0;
+ while (input_len - input_position >= BLAKE3_CHUNK_LEN) {
+ chunks_array[chunks_array_len] = &input[input_position];
+ input_position += BLAKE3_CHUNK_LEN;
+ chunks_array_len += 1;
+ }
+
+ blake3_hash_many(chunks_array, chunks_array_len,
+ BLAKE3_CHUNK_LEN / BLAKE3_BLOCK_LEN, key, chunk_counter,
+ true, flags, CHUNK_START, CHUNK_END, out);
+
+ // Hash the remaining partial chunk, if there is one. Note that the empty
+ // chunk (meaning the empty message) is a different codepath.
+ if (input_len > input_position) {
+ uint64_t counter = chunk_counter + (uint64_t)chunks_array_len;
+ blake3_chunk_state chunk_state;
+ chunk_state_init(&chunk_state, key, flags);
+ chunk_state.chunk_counter = counter;
+ chunk_state_update(&chunk_state, &input[input_position],
+ input_len - input_position);
+ output_t output = chunk_state_output(&chunk_state);
+ output_chaining_value(&output, &out[chunks_array_len * BLAKE3_OUT_LEN]);
+ return chunks_array_len + 1;
+ } else {
+ return chunks_array_len;
+ }
+}
+
+// Use SIMD parallelism to hash up to MAX_SIMD_DEGREE parents at the same time
+// on a single thread. Write out the parent chaining values and return the
+// number of parents hashed. (If there's an odd input chaining value left over,
+// return it as an additional output.) These parents are never the root and
+// never empty; those cases use a different codepath.
+INLINE size_t compress_parents_parallel(const uint8_t *child_chaining_values,
+ size_t num_chaining_values,
+ const uint32_t key[8], uint8_t flags,
+ uint8_t *out) {
+#if defined(BLAKE3_TESTING)
+ assert(2 <= num_chaining_values);
+ assert(num_chaining_values <= 2 * MAX_SIMD_DEGREE_OR_2);
+#endif
+
+ const uint8_t *parents_array[MAX_SIMD_DEGREE_OR_2];
+ size_t parents_array_len = 0;
+ while (num_chaining_values - (2 * parents_array_len) >= 2) {
+ parents_array[parents_array_len] =
+ &child_chaining_values[2 * parents_array_len * BLAKE3_OUT_LEN];
+ parents_array_len += 1;
+ }
+
+ blake3_hash_many(parents_array, parents_array_len, 1, key,
+ 0, // Parents always use counter 0.
+ false, flags | PARENT,
+ 0, // Parents have no start flags.
+ 0, // Parents have no end flags.
+ out);
+
+ // If there's an odd child left over, it becomes an output.
+ if (num_chaining_values > 2 * parents_array_len) {
+ memcpy(&out[parents_array_len * BLAKE3_OUT_LEN],
+ &child_chaining_values[2 * parents_array_len * BLAKE3_OUT_LEN],
+ BLAKE3_OUT_LEN);
+ return parents_array_len + 1;
+ } else {
+ return parents_array_len;
+ }
+}
+
+// The wide helper function returns (writes out) an array of chaining values
+// and returns the length of that array. The number of chaining values returned
+// is the dyanmically detected SIMD degree, at most MAX_SIMD_DEGREE. Or fewer,
+// if the input is shorter than that many chunks. The reason for maintaining a
+// wide array of chaining values going back up the tree, is to allow the
+// implementation to hash as many parents in parallel as possible.
+//
+// As a special case when the SIMD degree is 1, this function will still return
+// at least 2 outputs. This guarantees that this function doesn't perform the
+// root compression. (If it did, it would use the wrong flags, and also we
+// wouldn't be able to implement exendable ouput.) Note that this function is
+// not used when the whole input is only 1 chunk long; that's a different
+// codepath.
+//
+// Why not just have the caller split the input on the first update(), instead
+// of implementing this special rule? Because we don't want to limit SIMD or
+// multi-threading parallelism for that update().
+static size_t blake3_compress_subtree_wide(const uint8_t *input,
+ size_t input_len,
+ const uint32_t key[8],
+ uint64_t chunk_counter,
+ uint8_t flags, uint8_t *out) {
+ // Note that the single chunk case does *not* bump the SIMD degree up to 2
+ // when it is 1. If this implementation adds multi-threading in the future,
+ // this gives us the option of multi-threading even the 2-chunk case, which
+ // can help performance on smaller platforms.
+ if (input_len <= blake3_simd_degree() * BLAKE3_CHUNK_LEN) {
+ return compress_chunks_parallel(input, input_len, key, chunk_counter, flags,
+ out);
+ }
+
+ // With more than simd_degree chunks, we need to recurse. Start by dividing
+ // the input into left and right subtrees. (Note that this is only optimal
+ // as long as the SIMD degree is a power of 2. If we ever get a SIMD degree
+ // of 3 or something, we'll need a more complicated strategy.)
+ size_t left_input_len = left_len(input_len);
+ size_t right_input_len = input_len - left_input_len;
+ const uint8_t *right_input = &input[left_input_len];
+ uint64_t right_chunk_counter =
+ chunk_counter + (uint64_t)(left_input_len / BLAKE3_CHUNK_LEN);
+
+ // Make space for the child outputs. Here we use MAX_SIMD_DEGREE_OR_2 to
+ // account for the special case of returning 2 outputs when the SIMD degree
+ // is 1.
+ uint8_t cv_array[2 * MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN];
+ size_t degree = blake3_simd_degree();
+ if (left_input_len > BLAKE3_CHUNK_LEN && degree == 1) {
+ // The special case: We always use a degree of at least two, to make
+ // sure there are two outputs. Except, as noted above, at the chunk
+ // level, where we allow degree=1. (Note that the 1-chunk-input case is
+ // a different codepath.)
+ degree = 2;
+ }
+ uint8_t *right_cvs = &cv_array[degree * BLAKE3_OUT_LEN];
+
+ // Recurse! If this implementation adds multi-threading support in the
+ // future, this is where it will go.
+ size_t left_n = blake3_compress_subtree_wide(input, left_input_len, key,
+ chunk_counter, flags, cv_array);
+ size_t right_n = blake3_compress_subtree_wide(
+ right_input, right_input_len, key, right_chunk_counter, flags, right_cvs);
+
+ // The special case again. If simd_degree=1, then we'll have left_n=1 and
+ // right_n=1. Rather than compressing them into a single output, return
+ // them directly, to make sure we always have at least two outputs.
+ if (left_n == 1) {
+ memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN);
+ return 2;
+ }
+
+ // Otherwise, do one layer of parent node compression.
+ size_t num_chaining_values = left_n + right_n;
+ return compress_parents_parallel(cv_array, num_chaining_values, key, flags,
+ out);
+}
+
+// Hash a subtree with compress_subtree_wide(), and then condense the resulting
+// list of chaining values down to a single parent node. Don't compress that
+// last parent node, however. Instead, return its message bytes (the
+// concatenated chaining values of its children). This is necessary when the
+// first call to update() supplies a complete subtree, because the topmost
+// parent node of that subtree could end up being the root. It's also necessary
+// for extended output in the general case.
+//
+// As with compress_subtree_wide(), this function is not used on inputs of 1
+// chunk or less. That's a different codepath.
+INLINE void compress_subtree_to_parent_node(
+ const uint8_t *input, size_t input_len, const uint32_t key[8],
+ uint64_t chunk_counter, uint8_t flags, uint8_t out[2 * BLAKE3_OUT_LEN]) {
+#if defined(BLAKE3_TESTING)
+ assert(input_len > BLAKE3_CHUNK_LEN);
+#endif
+
+ uint8_t cv_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN];
+ size_t num_cvs = blake3_compress_subtree_wide(input, input_len, key,
+ chunk_counter, flags, cv_array);
+ assert(num_cvs <= MAX_SIMD_DEGREE_OR_2);
+
+ // If MAX_SIMD_DEGREE is greater than 2 and there's enough input,
+ // compress_subtree_wide() returns more than 2 chaining values. Condense
+ // them into 2 by forming parent nodes repeatedly.
+ uint8_t out_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN / 2];
+ // The second half of this loop condition is always true, and we just
+ // asserted it above. But GCC can't tell that it's always true, and if NDEBUG
+ // is set on platforms where MAX_SIMD_DEGREE_OR_2 == 2, GCC emits spurious
+ // warnings here. GCC 8.5 is particularly sensitive, so if you're changing
+ // this code, test it against that version.
+ while (num_cvs > 2 && num_cvs <= MAX_SIMD_DEGREE_OR_2) {
+ num_cvs =
+ compress_parents_parallel(cv_array, num_cvs, key, flags, out_array);
+ memcpy(cv_array, out_array, num_cvs * BLAKE3_OUT_LEN);
+ }
+ memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN);
+}
+
+INLINE void hasher_init_base(blake3_hasher *self, const uint32_t key[8],
+ uint8_t flags) {
+ memcpy(self->key, key, BLAKE3_KEY_LEN);
+ chunk_state_init(&self->chunk, key, flags);
+ self->cv_stack_len = 0;
+}
+
+void llvm_blake3_hasher_init(blake3_hasher *self) { hasher_init_base(self, IV, 0); }
+
+void llvm_blake3_hasher_init_keyed(blake3_hasher *self,
+ const uint8_t key[BLAKE3_KEY_LEN]) {
+ uint32_t key_words[8];
+ load_key_words(key, key_words);
+ hasher_init_base(self, key_words, KEYED_HASH);
+}
+
+void llvm_blake3_hasher_init_derive_key_raw(blake3_hasher *self, const void *context,
+ size_t context_len) {
+ blake3_hasher context_hasher;
+ hasher_init_base(&context_hasher, IV, DERIVE_KEY_CONTEXT);
+ llvm_blake3_hasher_update(&context_hasher, context, context_len);
+ uint8_t context_key[BLAKE3_KEY_LEN];
+ llvm_blake3_hasher_finalize(&context_hasher, context_key, BLAKE3_KEY_LEN);
+ uint32_t context_key_words[8];
+ load_key_words(context_key, context_key_words);
+ hasher_init_base(self, context_key_words, DERIVE_KEY_MATERIAL);
+}
+
+void llvm_blake3_hasher_init_derive_key(blake3_hasher *self, const char *context) {
+ llvm_blake3_hasher_init_derive_key_raw(self, context, strlen(context));
+}
+
+// As described in hasher_push_cv() below, we do "lazy merging", delaying
+// merges until right before the next CV is about to be added. This is
+// different from the reference implementation. Another difference is that we
+// aren't always merging 1 chunk at a time. Instead, each CV might represent
+// any power-of-two number of chunks, as long as the smaller-above-larger stack
+// order is maintained. Instead of the "count the trailing 0-bits" algorithm
+// described in the spec, we use a "count the total number of 1-bits" variant
+// that doesn't require us to retain the subtree size of the CV on top of the
+// stack. The principle is the same: each CV that should remain in the stack is
+// represented by a 1-bit in the total number of chunks (or bytes) so far.
+INLINE void hasher_merge_cv_stack(blake3_hasher *self, uint64_t total_len) {
+ size_t post_merge_stack_len = (size_t)popcnt(total_len);
+ while (self->cv_stack_len > post_merge_stack_len) {
+ uint8_t *parent_node =
+ &self->cv_stack[(self->cv_stack_len - 2) * BLAKE3_OUT_LEN];
+ output_t output = parent_output(parent_node, self->key, self->chunk.flags);
+ output_chaining_value(&output, parent_node);
+ self->cv_stack_len -= 1;
+ }
+}
+
+// In reference_impl.rs, we merge the new CV with existing CVs from the stack
+// before pushing it. We can do that because we know more input is coming, so
+// we know none of the merges are root.
+//
+// This setting is different. We want to feed as much input as possible to
+// compress_subtree_wide(), without setting aside anything for the chunk_state.
+// If the user gives us 64 KiB, we want to parallelize over all 64 KiB at once
+// as a single subtree, if at all possible.
+//
+// This leads to two problems:
+// 1) This 64 KiB input might be the only call that ever gets made to update.
+// In this case, the root node of the 64 KiB subtree would be the root node
+// of the whole tree, and it would need to be ROOT finalized. We can't
+// compress it until we know.
+// 2) This 64 KiB input might complete a larger tree, whose root node is
+// similarly going to be the the root of the whole tree. For example, maybe
+// we have 196 KiB (that is, 128 + 64) hashed so far. We can't compress the
+// node at the root of the 256 KiB subtree until we know how to finalize it.
+//
+// The second problem is solved with "lazy merging". That is, when we're about
+// to add a CV to the stack, we don't merge it with anything first, as the
+// reference impl does. Instead we do merges using the *previous* CV that was
+// added, which is sitting on top of the stack, and we put the new CV
+// (unmerged) on top of the stack afterwards. This guarantees that we never
+// merge the root node until finalize().
+//
+// Solving the first problem requires an additional tool,
+// compress_subtree_to_parent_node(). That function always returns the top
+// *two* chaining values of the subtree it's compressing. We then do lazy
+// merging with each of them separately, so that the second CV will always
+// remain unmerged. (That also helps us support extendable output when we're
+// hashing an input all-at-once.)
+INLINE void hasher_push_cv(blake3_hasher *self, uint8_t new_cv[BLAKE3_OUT_LEN],
+ uint64_t chunk_counter) {
+ hasher_merge_cv_stack(self, chunk_counter);
+ memcpy(&self->cv_stack[self->cv_stack_len * BLAKE3_OUT_LEN], new_cv,
+ BLAKE3_OUT_LEN);
+ self->cv_stack_len += 1;
+}
+
+void llvm_blake3_hasher_update(blake3_hasher *self, const void *input,
+ size_t input_len) {
+ // Explicitly checking for zero avoids causing UB by passing a null pointer
+ // to memcpy. This comes up in practice with things like:
+ // std::vector<uint8_t> v;
+ // blake3_hasher_update(&hasher, v.data(), v.size());
+ if (input_len == 0) {
+ return;
+ }
+
+ const uint8_t *input_bytes = (const uint8_t *)input;
+
+ // If we have some partial chunk bytes in the internal chunk_state, we need
+ // to finish that chunk first.
+ if (chunk_state_len(&self->chunk) > 0) {
+ size_t take = BLAKE3_CHUNK_LEN - chunk_state_len(&self->chunk);
+ if (take > input_len) {
+ take = input_len;
+ }
+ chunk_state_update(&self->chunk, input_bytes, take);
+ input_bytes += take;
+ input_len -= take;
+ // If we've filled the current chunk and there's more coming, finalize this
+ // chunk and proceed. In this case we know it's not the root.
+ if (input_len > 0) {
+ output_t output = chunk_state_output(&self->chunk);
+ uint8_t chunk_cv[32];
+ output_chaining_value(&output, chunk_cv);
+ hasher_push_cv(self, chunk_cv, self->chunk.chunk_counter);
+ chunk_state_reset(&self->chunk, self->key, self->chunk.chunk_counter + 1);
+ } else {
+ return;
+ }
+ }
+
+ // Now the chunk_state is clear, and we have more input. If there's more than
+ // a single chunk (so, definitely not the root chunk), hash the largest whole
+ // subtree we can, with the full benefits of SIMD (and maybe in the future,
+ // multi-threading) parallelism. Two restrictions:
+ // - The subtree has to be a power-of-2 number of chunks. Only subtrees along
+ // the right edge can be incomplete, and we don't know where the right edge
+ // is going to be until we get to finalize().
+ // - The subtree must evenly divide the total number of chunks up until this
+ // point (if total is not 0). If the current incomplete subtree is only
+ // waiting for 1 more chunk, we can't hash a subtree of 4 chunks. We have
+ // to complete the current subtree first.
+ // Because we might need to break up the input to form powers of 2, or to
+ // evenly divide what we already have, this part runs in a loop.
+ while (input_len > BLAKE3_CHUNK_LEN) {
+ size_t subtree_len = round_down_to_power_of_2(input_len);
+ uint64_t count_so_far = self->chunk.chunk_counter * BLAKE3_CHUNK_LEN;
+ // Shrink the subtree_len until it evenly divides the count so far. We know
+ // that subtree_len itself is a power of 2, so we can use a bitmasking
+ // trick instead of an actual remainder operation. (Note that if the caller
+ // consistently passes power-of-2 inputs of the same size, as is hopefully
+ // typical, this loop condition will always fail, and subtree_len will
+ // always be the full length of the input.)
+ //
+ // An aside: We don't have to shrink subtree_len quite this much. For
+ // example, if count_so_far is 1, we could pass 2 chunks to
+ // compress_subtree_to_parent_node. Since we'll get 2 CVs back, we'll still
+ // get the right answer in the end, and we might get to use 2-way SIMD
+ // parallelism. The problem with this optimization, is that it gets us
+ // stuck always hashing 2 chunks. The total number of chunks will remain
+ // odd, and we'll never graduate to higher degrees of parallelism. See
+ // https://github.com/BLAKE3-team/BLAKE3/issues/69.
+ while ((((uint64_t)(subtree_len - 1)) & count_so_far) != 0) {
+ subtree_len /= 2;
+ }
+ // The shrunken subtree_len might now be 1 chunk long. If so, hash that one
+ // chunk by itself. Otherwise, compress the subtree into a pair of CVs.
+ uint64_t subtree_chunks = subtree_len / BLAKE3_CHUNK_LEN;
+ if (subtree_len <= BLAKE3_CHUNK_LEN) {
+ blake3_chunk_state chunk_state;
+ chunk_state_init(&chunk_state, self->key, self->chunk.flags);
+ chunk_state.chunk_counter = self->chunk.chunk_counter;
+ chunk_state_update(&chunk_state, input_bytes, subtree_len);
+ output_t output = chunk_state_output(&chunk_state);
+ uint8_t cv[BLAKE3_OUT_LEN];
+ output_chaining_value(&output, cv);
+ hasher_push_cv(self, cv, chunk_state.chunk_counter);
+ } else {
+ // This is the high-performance happy path, though getting here depends
+ // on the caller giving us a long enough input.
+ uint8_t cv_pair[2 * BLAKE3_OUT_LEN];
+ compress_subtree_to_parent_node(input_bytes, subtree_len, self->key,
+ self->chunk.chunk_counter,
+ self->chunk.flags, cv_pair);
+ hasher_push_cv(self, cv_pair, self->chunk.chunk_counter);
+ hasher_push_cv(self, &cv_pair[BLAKE3_OUT_LEN],
+ self->chunk.chunk_counter + (subtree_chunks / 2));
+ }
+ self->chunk.chunk_counter += subtree_chunks;
+ input_bytes += subtree_len;
+ input_len -= subtree_len;
+ }
+
+ // If there's any remaining input less than a full chunk, add it to the chunk
+ // state. In that case, also do a final merge loop to make sure the subtree
+ // stack doesn't contain any unmerged pairs. The remaining input means we
+ // know these merges are non-root. This merge loop isn't strictly necessary
+ // here, because hasher_push_chunk_cv already does its own merge loop, but it
+ // simplifies blake3_hasher_finalize below.
+ if (input_len > 0) {
+ chunk_state_update(&self->chunk, input_bytes, input_len);
+ hasher_merge_cv_stack(self, self->chunk.chunk_counter);
+ }
+}
+
+void llvm_blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
+ size_t out_len) {
+ llvm_blake3_hasher_finalize_seek(self, 0, out, out_len);
+#if LLVM_MEMORY_SANITIZER_BUILD
+ // Avoid false positives due to uninstrumented assembly code.
+ __msan_unpoison(out, out_len);
+#endif
+}
+
+void llvm_blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,
+ uint8_t *out, size_t out_len) {
+ // Explicitly checking for zero avoids causing UB by passing a null pointer
+ // to memcpy. This comes up in practice with things like:
+ // std::vector<uint8_t> v;
+ // blake3_hasher_finalize(&hasher, v.data(), v.size());
+ if (out_len == 0) {
+ return;
+ }
+
+ // If the subtree stack is empty, then the current chunk is the root.
+ if (self->cv_stack_len == 0) {
+ output_t output = chunk_state_output(&self->chunk);
+ output_root_bytes(&output, seek, out, out_len);
+ return;
+ }
+ // If there are any bytes in the chunk state, finalize that chunk and do a
+ // roll-up merge between that chunk hash and every subtree in the stack. In
+ // this case, the extra merge loop at the end of blake3_hasher_update
+ // guarantees that none of the subtrees in the stack need to be merged with
+ // each other first. Otherwise, if there are no bytes in the chunk state,
+ // then the top of the stack is a chunk hash, and we start the merge from
+ // that.
+ output_t output;
+ size_t cvs_remaining;
+ if (chunk_state_len(&self->chunk) > 0) {
+ cvs_remaining = self->cv_stack_len;
+ output = chunk_state_output(&self->chunk);
+ } else {
+ // There are always at least 2 CVs in the stack in this case.
+ cvs_remaining = self->cv_stack_len - 2;
+ output = parent_output(&self->cv_stack[cvs_remaining * 32], self->key,
+ self->chunk.flags);
+ }
+ while (cvs_remaining > 0) {
+ cvs_remaining -= 1;
+ uint8_t parent_block[BLAKE3_BLOCK_LEN];
+ memcpy(parent_block, &self->cv_stack[cvs_remaining * 32], 32);
+ output_chaining_value(&output, &parent_block[32]);
+ output = parent_output(parent_block, self->key, self->chunk.flags);
+ }
+ output_root_bytes(&output, seek, out, out_len);
+}
+
+void llvm_blake3_hasher_reset(blake3_hasher *self) {
+ chunk_state_reset(&self->chunk, self->key, 0);
+ self->cv_stack_len = 0;
+}
diff --git a/llvm/lib/Support/BLAKE3/blake3_avx2.c b/llvm/lib/Support/BLAKE3/blake3_avx2.c
new file mode 100644
index 000000000000..e76aa1a3aeb3
--- /dev/null
+++ b/llvm/lib/Support/BLAKE3/blake3_avx2.c
@@ -0,0 +1,326 @@
+#include "blake3_impl.h"
+
+#include <immintrin.h>
+
+#define DEGREE 8
+
+INLINE __m256i loadu(const uint8_t src[32]) {
+ return _mm256_loadu_si256((const __m256i *)src);
+}
+
+INLINE void storeu(__m256i src, uint8_t dest[16]) {
+ _mm256_storeu_si256((__m256i *)dest, src);
+}
+
+INLINE __m256i addv(__m256i a, __m256i b) { return _mm256_add_epi32(a, b); }
+
+// Note that clang-format doesn't like the name "xor" for some reason.
+INLINE __m256i xorv(__m256i a, __m256i b) { return _mm256_xor_si256(a, b); }
+
+INLINE __m256i set1(uint32_t x) { return _mm256_set1_epi32((int32_t)x); }
+
+INLINE __m256i rot16(__m256i x) {
+ return _mm256_shuffle_epi8(
+ x, _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
+ 13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2));
+}
+
+INLINE __m256i rot12(__m256i x) {
+ return _mm256_or_si256(_mm256_srli_epi32(x, 12), _mm256_slli_epi32(x, 32 - 12));
+}
+
+INLINE __m256i rot8(__m256i x) {
+ return _mm256_shuffle_epi8(
+ x, _mm256_set_epi8(12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1,
+ 12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1));
+}
+
+INLINE __m256i rot7(__m256i x) {
+ return _mm256_or_si256(_mm256_srli_epi32(x, 7), _mm256_slli_epi32(x, 32 - 7));
+}
+
+INLINE void round_fn(__m256i v[16], __m256i m[16], size_t r) {
+ v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][0]]);
+ v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][2]]);
+ v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][4]]);
+ v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][6]]);
+ v[0] = addv(v[0], v[4]);
+ v[1] = addv(v[1], v[5]);
+ v[2] = addv(v[2], v[6]);
+ v[3] = addv(v[3], v[7]);
+ v[12] = xorv(v[12], v[0]);
+ v[13] = xorv(v[13], v[1]);
+ v[14] = xorv(v[14], v[2]);
+ v[15] = xorv(v[15], v[3]);
+ v[12] = rot16(v[12]);
+ v[13] = rot16(v[13]);
+ v[14] = rot16(v[14]);
+ v[15] = rot16(v[15]);
+ v[8] = addv(v[8], v[12]);
+ v[9] = addv(v[9], v[13]);
+ v[10] = addv(v[10], v[14]);
+ v[11] = addv(v[11], v[15]);
+ v[4] = xorv(v[4], v[8]);
+ v[5] = xorv(v[5], v[9]);
+ v[6] = xorv(v[6], v[10]);
+ v[7] = xorv(v[7], v[11]);
+ v[4] = rot12(v[4]);
+ v[5] = rot12(v[5]);
+ v[6] = rot12(v[6]);
+ v[7] = rot12(v[7]);
+ v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][1]]);
+ v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][3]]);
+ v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][5]]);
+ v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][7]]);
+ v[0] = addv(v[0], v[4]);
+ v[1] = addv(v[1], v[5]);
+ v[2] = addv(v[2], v[6]);
+ v[3] = addv(v[3], v[7]);
+ v[12] = xorv(v[12], v[0]);
+ v[13] = xorv(v[13], v[1]);
+ v[14] = xorv(v[14], v[2]);
+ v[15] = xorv(v[15], v[3]);
+ v[12] = rot8(v[12]);
+ v[13] = rot8(v[13]);
+ v[14] = rot8(v[14]);
+ v[15] = rot8(v[15]);
+ v[8] = addv(v[8], v[12]);
+ v[9] = addv(v[9], v[13]);
+ v[10] = addv(v[10], v[14]);
+ v[11] = addv(v[11], v[15]);
+ v[4] = xorv(v[4], v[8]);
+ v[5] = xorv(v[5], v[9]);
+ v[6] = xorv(v[6], v[10]);
+ v[7] = xorv(v[7], v[11]);
+ v[4] = rot7(v[4]);
+ v[5] = rot7(v[5]);
+ v[6] = rot7(v[6]);
+ v[7] = rot7(v[7]);
+
+ v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][8]]);
+ v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][10]]);
+ v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][12]]);
+ v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][14]]);
+ v[0] = addv(v[0], v[5]);
+ v[1] = addv(v[1], v[6]);
+ v[2] = addv(v[2], v[7]);
+ v[3] = addv(v[3], v[4]);
+ v[15] = xorv(v[15], v[0]);
+ v[12] = xorv(v[12], v[1]);
+ v[13] = xorv(v[13], v[2]);
+ v[14] = xorv(v[14], v[3]);
+ v[15] = rot16(v[15]);
+ v[12] = rot16(v[12]);
+ v[13] = rot16(v[13]);
+ v[14] = rot16(v[14]);
+ v[10] = addv(v[10], v[15]);
+ v[11] = addv(v[11], v[12]);
+ v[8] = addv(v[8], v[13]);
+ v[9] = addv(v[9], v[14]);
+ v[5] = xorv(v[5], v[10]);
+ v[6] = xorv(v[6], v[11]);
+ v[7] = xorv(v[7], v[8]);
+ v[4] = xorv(v[4], v[9]);
+ v[5] = rot12(v[5]);
+ v[6] = rot12(v[6]);
+ v[7] = rot12(v[7]);
+ v[4] = rot12(v[4]);
+ v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][9]]);
+ v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][11]]);
+ v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][13]]);
+ v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][15]]);
+ v[0] = addv(v[0], v[5]);
+ v[1] = addv(v[1], v[6]);
+ v[2] = addv(v[2], v[7]);
+ v[3] = addv(v[3], v[4]);
+ v[15] = xorv(v[15], v[0]);
+ v[12] = xorv(v[12], v[1]);
+ v[13] = xorv(v[13], v[2]);
+ v[14] = xorv(v[14], v[3]);
+ v[15] = rot8(v[15]);
+ v[12] = rot8(v[12]);
+ v[13] = rot8(v[13]);
+ v[14] = rot8(v[14]);
+ v[10] = addv(v[10], v[15]);
+ v[11] = addv(v[11], v[12]);
+ v[8] = addv(v[8], v[13]);
+ v[9] = addv(v[9], v[14]);
+ v[5] = xorv(v[5], v[10]);
+ v[6] = xorv(v[6], v[11]);
+ v[7] = xorv(v[7], v[8]);
+ v[4] = xorv(v[4], v[9]);
+ v[5] = rot7(v[5]);
+ v[6] = rot7(v[6]);
+ v[7] = rot7(v[7]);
+ v[4] = rot7(v[4]);
+}
+
+INLINE void transpose_vecs(__m256i vecs[DEGREE]) {
+ // Interleave 32-bit lanes. The low unpack is lanes 00/11/44/55, and the high
+ // is 22/33/66/77.
+ __m256i ab_0145 = _mm256_unpacklo_epi32(vecs[0], vecs[1]);
+ __m256i ab_2367 = _mm256_unpackhi_epi32(vecs[0], vecs[1]);
+ __m256i cd_0145 = _mm256_unpacklo_epi32(vecs[2], vecs[3]);
+ __m256i cd_2367 = _mm256_unpackhi_epi32(vecs[2], vecs[3]);
+ __m256i ef_0145 = _mm256_unpacklo_epi32(vecs[4], vecs[5]);
+ __m256i ef_2367 = _mm256_unpackhi_epi32(vecs[4], vecs[5]);
+ __m256i gh_0145 = _mm256_unpacklo_epi32(vecs[6], vecs[7]);
+ __m256i gh_2367 = _mm256_unpackhi_epi32(vecs[6], vecs[7]);
+
+ // Interleave 64-bit lates. The low unpack is lanes 00/22 and the high is
+ // 11/33.
+ __m256i abcd_04 = _mm256_unpacklo_epi64(ab_0145, cd_0145);
+ __m256i abcd_15 = _mm256_unpackhi_epi64(ab_0145, cd_0145);
+ __m256i abcd_26 = _mm256_unpacklo_epi64(ab_2367, cd_2367);
+ __m256i abcd_37 = _mm256_unpackhi_epi64(ab_2367, cd_2367);
+ __m256i efgh_04 = _mm256_unpacklo_epi64(ef_0145, gh_0145);
+ __m256i efgh_15 = _mm256_unpackhi_epi64(ef_0145, gh_0145);
+ __m256i efgh_26 = _mm256_unpacklo_epi64(ef_2367, gh_2367);
+ __m256i efgh_37 = _mm256_unpackhi_epi64(ef_2367, gh_2367);
+
+ // Interleave 128-bit lanes.
+ vecs[0] = _mm256_permute2x128_si256(abcd_04, efgh_04, 0x20);
+ vecs[1] = _mm256_permute2x128_si256(abcd_15, efgh_15, 0x20);
+ vecs[2] = _mm256_permute2x128_si256(abcd_26, efgh_26, 0x20);
+ vecs[3] = _mm256_permute2x128_si256(abcd_37, efgh_37, 0x20);
+ vecs[4] = _mm256_permute2x128_si256(abcd_04, efgh_04, 0x31);
+ vecs[5] = _mm256_permute2x128_si256(abcd_15, efgh_15, 0x31);
+ vecs[6] = _mm256_permute2x128_si256(abcd_26, efgh_26, 0x31);
+ vecs[7] = _mm256_permute2x128_si256(abcd_37, efgh_37, 0x31);
+}
+
+INLINE void transpose_msg_vecs(const uint8_t *const *inputs,
+ size_t block_offset, __m256i out[16]) {
+ out[0] = loadu(&inputs[0][block_offset + 0 * sizeof(__m256i)]);
+ out[1] = loadu(&inputs[1][block_offset + 0 * sizeof(__m256i)]);
+ out[2] = loadu(&inputs[2][block_offset + 0 * sizeof(__m256i)]);
+ out[3] = loadu(&inputs[3][block_offset + 0 * sizeof(__m256i)]);
+ out[4] = loadu(&inputs[4][block_offset + 0 * sizeof(__m256i)]);
+ out[5] = loadu(&inputs[5][block_offset + 0 * sizeof(__m256i)]);
+ out[6] = loadu(&inputs[6][block_offset + 0 * sizeof(__m256i)]);
+ out[7] = loadu(&inputs[7][block_offset + 0 * sizeof(__m256i)]);
+ out[8] = loadu(&inputs[0][block_offset + 1 * sizeof(__m256i)]);
+ out[9] = loadu(&inputs[1][block_offset + 1 * sizeof(__m256i)]);
+ out[10] = loadu(&inputs[2][block_offset + 1 * sizeof(__m256i)]);
+ out[11] = loadu(&inputs[3][block_offset + 1 * sizeof(__m256i)]);
+ out[12] = loadu(&inputs[4][block_offset + 1 * sizeof(__m256i)]);
+ out[13] = loadu(&inputs[5][block_offset + 1 * sizeof(__m256i)]);
+ out[14] = loadu(&inputs[6][block_offset + 1 * sizeof(__m256i)]);
+ out[15] = loadu(&inputs[7][block_offset + 1 * sizeof(__m256i)]);
+ for (size_t i = 0; i < 8; ++i) {
+ _mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0);
+ }
+ transpose_vecs(&out[0]);
+ transpose_vecs(&out[8]);
+}
+
+INLINE void load_counters(uint64_t counter, bool increment_counter,
+ __m256i *out_lo, __m256i *out_hi) {
+ const __m256i mask = _mm256_set1_epi32(-(int32_t)increment_counter);
+ const __m256i add0 = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
+ const __m256i add1 = _mm256_and_si256(mask, add0);
+ __m256i l = _mm256_add_epi32(_mm256_set1_epi32((int32_t)counter), add1);
+ __m256i carry = _mm256_cmpgt_epi32(_mm256_xor_si256(add1, _mm256_set1_epi32(0x80000000)),
+ _mm256_xor_si256( l, _mm256_set1_epi32(0x80000000)));
+ __m256i h = _mm256_sub_epi32(_mm256_set1_epi32((int32_t)(counter >> 32)), carry);
+ *out_lo = l;
+ *out_hi = h;
+}
+
+static
+void blake3_hash8_avx2(const uint8_t *const *inputs, size_t blocks,
+ const uint32_t key[8], uint64_t counter,
+ bool increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
+ __m256i h_vecs[8] = {
+ set1(key[0]), set1(key[1]), set1(key[2]), set1(key[3]),
+ set1(key[4]), set1(key[5]), set1(key[6]), set1(key[7]),
+ };
+ __m256i counter_low_vec, counter_high_vec;
+ load_counters(counter, increment_counter, &counter_low_vec,
+ &counter_high_vec);
+ uint8_t block_flags = flags | flags_start;
+
+ for (size_t block = 0; block < blocks; block++) {
+ if (block + 1 == blocks) {
+ block_flags |= flags_end;
+ }
+ __m256i block_len_vec = set1(BLAKE3_BLOCK_LEN);
+ __m256i block_flags_vec = set1(block_flags);
+ __m256i msg_vecs[16];
+ transpose_msg_vecs(inputs, block * BLAKE3_BLOCK_LEN, msg_vecs);
+
+ __m256i v[16] = {
+ h_vecs[0], h_vecs[1], h_vecs[2], h_vecs[3],
+ h_vecs[4], h_vecs[5], h_vecs[6], h_vecs[7],
+ set1(IV[0]), set1(IV[1]), set1(IV[2]), set1(IV[3]),
+ counter_low_vec, counter_high_vec, block_len_vec, block_flags_vec,
+ };
+ round_fn(v, msg_vecs, 0);
+ round_fn(v, msg_vecs, 1);
+ round_fn(v, msg_vecs, 2);
+ round_fn(v, msg_vecs, 3);
+ round_fn(v, msg_vecs, 4);
+ round_fn(v, msg_vecs, 5);
+ round_fn(v, msg_vecs, 6);
+ h_vecs[0] = xorv(v[0], v[8]);
+ h_vecs[1] = xorv(v[1], v[9]);
+ h_vecs[2] = xorv(v[2], v[10]);
+ h_vecs[3] = xorv(v[3], v[11]);
+ h_vecs[4] = xorv(v[4], v[12]);
+ h_vecs[5] = xorv(v[5], v[13]);
+ h_vecs[6] = xorv(v[6], v[14]);
+ h_vecs[7] = xorv(v[7], v[15]);
+
+ block_flags = flags;
+ }
+
+ transpose_vecs(h_vecs);
+ storeu(h_vecs[0], &out[0 * sizeof(__m256i)]);
+ storeu(h_vecs[1], &out[1 * sizeof(__m256i)]);
+ storeu(h_vecs[2], &out[2 * sizeof(__m256i)]);
+ storeu(h_vecs[3], &out[3 * sizeof(__m256i)]);
+ storeu(h_vecs[4], &out[4 * sizeof(__m256i)]);
+ storeu(h_vecs[5], &out[5 * sizeof(__m256i)]);
+ storeu(h_vecs[6], &out[6 * sizeof(__m256i)]);
+ storeu(h_vecs[7], &out[7 * sizeof(__m256i)]);
+}
+
+#if !defined(BLAKE3_NO_SSE41)
+void blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs,
+ size_t blocks, const uint32_t key[8],
+ uint64_t counter, bool increment_counter,
+ uint8_t flags, uint8_t flags_start,
+ uint8_t flags_end, uint8_t *out);
+#else
+void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
+ size_t blocks, const uint32_t key[8],
+ uint64_t counter, bool increment_counter,
+ uint8_t flags, uint8_t flags_start,
+ uint8_t flags_end, uint8_t *out);
+#endif
+
+void blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs,
+ size_t blocks, const uint32_t key[8],
+ uint64_t counter, bool increment_counter,
+ uint8_t flags, uint8_t flags_start,
+ uint8_t flags_end, uint8_t *out) {
+ while (num_inputs >= DEGREE) {
+ blake3_hash8_avx2(inputs, blocks, key, counter, increment_counter, flags,
+ flags_start, flags_end, out);
+ if (increment_counter) {
+ counter += DEGREE;
+ }
+ inputs += DEGREE;
+ num_inputs -= DEGREE;
+ out = &out[DEGREE * BLAKE3_OUT_LEN];
+ }
+#if !defined(BLAKE3_NO_SSE41)
+ blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter,
+ increment_counter, flags, flags_start, flags_end, out);
+#else
+ blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter,
+ increment_counter, flags, flags_start, flags_end,
+ out);
+#endif
+}
diff --git a/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_unix.S b/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_unix.S
new file mode 100644
index 000000000000..449e07492832
--- /dev/null
+++ b/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_unix.S
@@ -0,0 +1,1826 @@
+#if defined(__x86_64__)
+
+#if defined(__ELF__) && defined(__linux__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
+#if defined(__ELF__) && defined(__CET__) && defined(__has_include)
+#if __has_include(<cet.h>)
+#include <cet.h>
+#endif
+#endif
+
+#if !defined(_CET_ENDBR)
+#define _CET_ENDBR
+#endif
+
+#ifdef __APPLE__
+#define HIDDEN .private_extern
+#else
+#define HIDDEN .hidden
+#endif
+
+.intel_syntax noprefix
+HIDDEN _blake3_hash_many_avx2
+HIDDEN blake3_hash_many_avx2
+.global _blake3_hash_many_avx2
+.global blake3_hash_many_avx2
+#ifdef __APPLE__
+.text
+#else
+.section .text
+#endif
+ .p2align 6
+_blake3_hash_many_avx2:
+blake3_hash_many_avx2:
+ _CET_ENDBR
+ push r15
+ push r14
+ push r13
+ push r12
+ push rbx
+ push rbp
+ mov rbp, rsp
+ sub rsp, 680
+ and rsp, 0xFFFFFFFFFFFFFFC0
+ neg r9d
+ vmovd xmm0, r9d
+ vpbroadcastd ymm0, xmm0
+ vmovdqa ymmword ptr [rsp+0x280], ymm0
+ vpand ymm1, ymm0, ymmword ptr [ADD0+rip]
+ vpand ymm2, ymm0, ymmword ptr [ADD1+rip]
+ vmovdqa ymmword ptr [rsp+0x220], ymm2
+ vmovd xmm2, r8d
+ vpbroadcastd ymm2, xmm2
+ vpaddd ymm2, ymm2, ymm1
+ vmovdqa ymmword ptr [rsp+0x240], ymm2
+ vpxor ymm1, ymm1, ymmword ptr [CMP_MSB_MASK+rip]
+ vpxor ymm2, ymm2, ymmword ptr [CMP_MSB_MASK+rip]
+ vpcmpgtd ymm2, ymm1, ymm2
+ shr r8, 32
+ vmovd xmm3, r8d
+ vpbroadcastd ymm3, xmm3
+ vpsubd ymm3, ymm3, ymm2
+ vmovdqa ymmword ptr [rsp+0x260], ymm3
+ shl rdx, 6
+ mov qword ptr [rsp+0x2A0], rdx
+ cmp rsi, 8
+ jc 3f
+2:
+ vpbroadcastd ymm0, dword ptr [rcx]
+ vpbroadcastd ymm1, dword ptr [rcx+0x4]
+ vpbroadcastd ymm2, dword ptr [rcx+0x8]
+ vpbroadcastd ymm3, dword ptr [rcx+0xC]
+ vpbroadcastd ymm4, dword ptr [rcx+0x10]
+ vpbroadcastd ymm5, dword ptr [rcx+0x14]
+ vpbroadcastd ymm6, dword ptr [rcx+0x18]
+ vpbroadcastd ymm7, dword ptr [rcx+0x1C]
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ mov r10, qword ptr [rdi+0x10]
+ mov r11, qword ptr [rdi+0x18]
+ mov r12, qword ptr [rdi+0x20]
+ mov r13, qword ptr [rdi+0x28]
+ mov r14, qword ptr [rdi+0x30]
+ mov r15, qword ptr [rdi+0x38]
+ movzx eax, byte ptr [rbp+0x38]
+ movzx ebx, byte ptr [rbp+0x40]
+ or eax, ebx
+ xor edx, edx
+.p2align 5
+9:
+ movzx ebx, byte ptr [rbp+0x48]
+ or ebx, eax
+ add rdx, 64
+ cmp rdx, qword ptr [rsp+0x2A0]
+ cmove eax, ebx
+ mov dword ptr [rsp+0x200], eax
+ vmovups xmm8, xmmword ptr [r8+rdx-0x40]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x40], 0x01
+ vmovups xmm9, xmmword ptr [r9+rdx-0x40]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x40], 0x01
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-0x40]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x40], 0x01
+ vmovups xmm11, xmmword ptr [r11+rdx-0x40]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x40], 0x01
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm8, ymm12, ymm14, 136
+ vmovaps ymmword ptr [rsp], ymm8
+ vshufps ymm9, ymm12, ymm14, 221
+ vmovaps ymmword ptr [rsp+0x20], ymm9
+ vshufps ymm10, ymm13, ymm15, 136
+ vmovaps ymmword ptr [rsp+0x40], ymm10
+ vshufps ymm11, ymm13, ymm15, 221
+ vmovaps ymmword ptr [rsp+0x60], ymm11
+ vmovups xmm8, xmmword ptr [r8+rdx-0x30]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x30], 0x01
+ vmovups xmm9, xmmword ptr [r9+rdx-0x30]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x30], 0x01
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-0x30]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x30], 0x01
+ vmovups xmm11, xmmword ptr [r11+rdx-0x30]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x30], 0x01
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm8, ymm12, ymm14, 136
+ vmovaps ymmword ptr [rsp+0x80], ymm8
+ vshufps ymm9, ymm12, ymm14, 221
+ vmovaps ymmword ptr [rsp+0xA0], ymm9
+ vshufps ymm10, ymm13, ymm15, 136
+ vmovaps ymmword ptr [rsp+0xC0], ymm10
+ vshufps ymm11, ymm13, ymm15, 221
+ vmovaps ymmword ptr [rsp+0xE0], ymm11
+ vmovups xmm8, xmmword ptr [r8+rdx-0x20]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x20], 0x01
+ vmovups xmm9, xmmword ptr [r9+rdx-0x20]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x20], 0x01
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-0x20]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x20], 0x01
+ vmovups xmm11, xmmword ptr [r11+rdx-0x20]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x20], 0x01
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm8, ymm12, ymm14, 136
+ vmovaps ymmword ptr [rsp+0x100], ymm8
+ vshufps ymm9, ymm12, ymm14, 221
+ vmovaps ymmword ptr [rsp+0x120], ymm9
+ vshufps ymm10, ymm13, ymm15, 136
+ vmovaps ymmword ptr [rsp+0x140], ymm10
+ vshufps ymm11, ymm13, ymm15, 221
+ vmovaps ymmword ptr [rsp+0x160], ymm11
+ vmovups xmm8, xmmword ptr [r8+rdx-0x10]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x10], 0x01
+ vmovups xmm9, xmmword ptr [r9+rdx-0x10]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x10], 0x01
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-0x10]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x10], 0x01
+ vmovups xmm11, xmmword ptr [r11+rdx-0x10]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x10], 0x01
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm8, ymm12, ymm14, 136
+ vmovaps ymmword ptr [rsp+0x180], ymm8
+ vshufps ymm9, ymm12, ymm14, 221
+ vmovaps ymmword ptr [rsp+0x1A0], ymm9
+ vshufps ymm10, ymm13, ymm15, 136
+ vmovaps ymmword ptr [rsp+0x1C0], ymm10
+ vshufps ymm11, ymm13, ymm15, 221
+ vmovaps ymmword ptr [rsp+0x1E0], ymm11
+ vpbroadcastd ymm15, dword ptr [rsp+0x200]
+ prefetcht0 [r8+rdx+0x80]
+ prefetcht0 [r12+rdx+0x80]
+ prefetcht0 [r9+rdx+0x80]
+ prefetcht0 [r13+rdx+0x80]
+ prefetcht0 [r10+rdx+0x80]
+ prefetcht0 [r14+rdx+0x80]
+ prefetcht0 [r11+rdx+0x80]
+ prefetcht0 [r15+rdx+0x80]
+ vpaddd ymm0, ymm0, ymmword ptr [rsp]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x40]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x80]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0xC0]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm0, ymmword ptr [rsp+0x240]
+ vpxor ymm13, ymm1, ymmword ptr [rsp+0x260]
+ vpxor ymm14, ymm2, ymmword ptr [BLAKE3_BLOCK_LEN+rip]
+ vpxor ymm15, ymm3, ymm15
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [BLAKE3_IV_0+rip]
+ vpaddd ymm9, ymm13, ymmword ptr [BLAKE3_IV_1+rip]
+ vpaddd ymm10, ymm14, ymmword ptr [BLAKE3_IV_2+rip]
+ vpaddd ymm11, ymm15, ymmword ptr [BLAKE3_IV_3+rip]
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x20]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x60]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0xA0]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0xE0]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x100]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x140]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x180]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1C0]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x120]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x160]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1A0]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1E0]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x40]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x60]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0xE0]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x80]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0xC0]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x140]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1A0]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x20]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x180]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x120]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1E0]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x160]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0xA0]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1C0]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x100]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x60]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x140]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1A0]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0xE0]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x80]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x180]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x40]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1C0]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0xC0]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x120]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x160]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x100]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0xA0]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1E0]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x20]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x140]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x180]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1C0]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1A0]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0xE0]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x120]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x60]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1E0]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x80]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x160]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0xA0]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x20]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x40]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x100]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0xC0]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x180]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x120]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1E0]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1C0]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x1A0]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x160]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x140]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x100]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0xE0]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0xA0]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0xC0]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x40]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x60]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x20]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x80]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x120]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x160]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x100]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1E0]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x1C0]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0xA0]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x180]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x20]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x1A0]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x40]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x80]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x60]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x140]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0xC0]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0xE0]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x160]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0xA0]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x20]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x100]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x1E0]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x120]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0xC0]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x1C0]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x40]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x60]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0xE0]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x140]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x180]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x80]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1A0]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vpxor ymm0, ymm0, ymm8
+ vpxor ymm1, ymm1, ymm9
+ vpxor ymm2, ymm2, ymm10
+ vpxor ymm3, ymm3, ymm11
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpxor ymm4, ymm4, ymm12
+ vpxor ymm5, ymm5, ymm13
+ vpxor ymm6, ymm6, ymm14
+ vpxor ymm7, ymm7, ymm15
+ movzx eax, byte ptr [rbp+0x38]
+ jne 9b
+ mov rbx, qword ptr [rbp+0x50]
+ vunpcklps ymm8, ymm0, ymm1
+ vunpcklps ymm9, ymm2, ymm3
+ vunpckhps ymm10, ymm0, ymm1
+ vunpcklps ymm11, ymm4, ymm5
+ vunpcklps ymm0, ymm6, ymm7
+ vshufps ymm12, ymm8, ymm9, 78
+ vblendps ymm1, ymm8, ymm12, 0xCC
+ vshufps ymm8, ymm11, ymm0, 78
+ vunpckhps ymm13, ymm2, ymm3
+ vblendps ymm2, ymm11, ymm8, 0xCC
+ vblendps ymm3, ymm12, ymm9, 0xCC
+ vperm2f128 ymm12, ymm1, ymm2, 0x20
+ vmovups ymmword ptr [rbx], ymm12
+ vunpckhps ymm14, ymm4, ymm5
+ vblendps ymm4, ymm8, ymm0, 0xCC
+ vunpckhps ymm15, ymm6, ymm7
+ vperm2f128 ymm7, ymm3, ymm4, 0x20
+ vmovups ymmword ptr [rbx+0x20], ymm7
+ vshufps ymm5, ymm10, ymm13, 78
+ vblendps ymm6, ymm5, ymm13, 0xCC
+ vshufps ymm13, ymm14, ymm15, 78
+ vblendps ymm10, ymm10, ymm5, 0xCC
+ vblendps ymm14, ymm14, ymm13, 0xCC
+ vperm2f128 ymm8, ymm10, ymm14, 0x20
+ vmovups ymmword ptr [rbx+0x40], ymm8
+ vblendps ymm15, ymm13, ymm15, 0xCC
+ vperm2f128 ymm13, ymm6, ymm15, 0x20
+ vmovups ymmword ptr [rbx+0x60], ymm13
+ vperm2f128 ymm9, ymm1, ymm2, 0x31
+ vperm2f128 ymm11, ymm3, ymm4, 0x31
+ vmovups ymmword ptr [rbx+0x80], ymm9
+ vperm2f128 ymm14, ymm10, ymm14, 0x31
+ vperm2f128 ymm15, ymm6, ymm15, 0x31
+ vmovups ymmword ptr [rbx+0xA0], ymm11
+ vmovups ymmword ptr [rbx+0xC0], ymm14
+ vmovups ymmword ptr [rbx+0xE0], ymm15
+ vmovdqa ymm0, ymmword ptr [rsp+0x220]
+ vpaddd ymm1, ymm0, ymmword ptr [rsp+0x240]
+ vmovdqa ymmword ptr [rsp+0x240], ymm1
+ vpxor ymm0, ymm0, ymmword ptr [CMP_MSB_MASK+rip]
+ vpxor ymm2, ymm1, ymmword ptr [CMP_MSB_MASK+rip]
+ vpcmpgtd ymm2, ymm0, ymm2
+ vmovdqa ymm0, ymmword ptr [rsp+0x260]
+ vpsubd ymm2, ymm0, ymm2
+ vmovdqa ymmword ptr [rsp+0x260], ymm2
+ add rdi, 64
+ add rbx, 256
+ mov qword ptr [rbp+0x50], rbx
+ sub rsi, 8
+ cmp rsi, 8
+ jnc 2b
+ test rsi, rsi
+ jnz 3f
+4:
+ vzeroupper
+ mov rsp, rbp
+ pop rbp
+ pop rbx
+ pop r12
+ pop r13
+ pop r14
+ pop r15
+ ret
+.p2align 5
+3:
+ mov rbx, qword ptr [rbp+0x50]
+ mov r15, qword ptr [rsp+0x2A0]
+ movzx r13d, byte ptr [rbp+0x38]
+ movzx r12d, byte ptr [rbp+0x48]
+ test rsi, 0x4
+ je 3f
+ vbroadcasti128 ymm0, xmmword ptr [rcx]
+ vbroadcasti128 ymm1, xmmword ptr [rcx+0x10]
+ vmovdqa ymm8, ymm0
+ vmovdqa ymm9, ymm1
+ vbroadcasti128 ymm12, xmmword ptr [rsp+0x240]
+ vbroadcasti128 ymm13, xmmword ptr [rsp+0x260]
+ vpunpckldq ymm14, ymm12, ymm13
+ vpunpckhdq ymm15, ymm12, ymm13
+ vpermq ymm14, ymm14, 0x50
+ vpermq ymm15, ymm15, 0x50
+ vbroadcasti128 ymm12, xmmword ptr [BLAKE3_BLOCK_LEN+rip]
+ vpblendd ymm14, ymm14, ymm12, 0x44
+ vpblendd ymm15, ymm15, ymm12, 0x44
+ vmovdqa ymmword ptr [rsp], ymm14
+ vmovdqa ymmword ptr [rsp+0x20], ymm15
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ mov r10, qword ptr [rdi+0x10]
+ mov r11, qword ptr [rdi+0x18]
+ movzx eax, byte ptr [rbp+0x40]
+ or eax, r13d
+ xor edx, edx
+.p2align 5
+2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ mov dword ptr [rsp+0x200], eax
+ vmovups ymm2, ymmword ptr [r8+rdx-0x40]
+ vinsertf128 ymm2, ymm2, xmmword ptr [r9+rdx-0x40], 0x01
+ vmovups ymm3, ymmword ptr [r8+rdx-0x30]
+ vinsertf128 ymm3, ymm3, xmmword ptr [r9+rdx-0x30], 0x01
+ vshufps ymm4, ymm2, ymm3, 136
+ vshufps ymm5, ymm2, ymm3, 221
+ vmovups ymm2, ymmword ptr [r8+rdx-0x20]
+ vinsertf128 ymm2, ymm2, xmmword ptr [r9+rdx-0x20], 0x01
+ vmovups ymm3, ymmword ptr [r8+rdx-0x10]
+ vinsertf128 ymm3, ymm3, xmmword ptr [r9+rdx-0x10], 0x01
+ vshufps ymm6, ymm2, ymm3, 136
+ vshufps ymm7, ymm2, ymm3, 221
+ vpshufd ymm6, ymm6, 0x93
+ vpshufd ymm7, ymm7, 0x93
+ vmovups ymm10, ymmword ptr [r10+rdx-0x40]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r11+rdx-0x40], 0x01
+ vmovups ymm11, ymmword ptr [r10+rdx-0x30]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r11+rdx-0x30], 0x01
+ vshufps ymm12, ymm10, ymm11, 136
+ vshufps ymm13, ymm10, ymm11, 221
+ vmovups ymm10, ymmword ptr [r10+rdx-0x20]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r11+rdx-0x20], 0x01
+ vmovups ymm11, ymmword ptr [r10+rdx-0x10]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r11+rdx-0x10], 0x01
+ vshufps ymm14, ymm10, ymm11, 136
+ vshufps ymm15, ymm10, ymm11, 221
+ vpshufd ymm14, ymm14, 0x93
+ vpshufd ymm15, ymm15, 0x93
+ prefetcht0 [r8+rdx+0x80]
+ prefetcht0 [r9+rdx+0x80]
+ prefetcht0 [r10+rdx+0x80]
+ prefetcht0 [r11+rdx+0x80]
+ vpbroadcastd ymm2, dword ptr [rsp+0x200]
+ vmovdqa ymm3, ymmword ptr [rsp]
+ vmovdqa ymm11, ymmword ptr [rsp+0x20]
+ vpblendd ymm3, ymm3, ymm2, 0x88
+ vpblendd ymm11, ymm11, ymm2, 0x88
+ vbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV+rip]
+ vmovdqa ymm10, ymm2
+ mov al, 7
+9:
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm8, ymm8, ymm12
+ vmovdqa ymmword ptr [rsp+0x40], ymm4
+ nop
+ vmovdqa ymmword ptr [rsp+0x60], ymm12
+ nop
+ vpaddd ymm0, ymm0, ymm1
+ vpaddd ymm8, ymm8, ymm9
+ vpxor ymm3, ymm3, ymm0
+ vpxor ymm11, ymm11, ymm8
+ vbroadcasti128 ymm4, xmmword ptr [ROT16+rip]
+ vpshufb ymm3, ymm3, ymm4
+ vpshufb ymm11, ymm11, ymm4
+ vpaddd ymm2, ymm2, ymm3
+ vpaddd ymm10, ymm10, ymm11
+ vpxor ymm1, ymm1, ymm2
+ vpxor ymm9, ymm9, ymm10
+ vpsrld ymm4, ymm1, 12
+ vpslld ymm1, ymm1, 20
+ vpor ymm1, ymm1, ymm4
+ vpsrld ymm4, ymm9, 12
+ vpslld ymm9, ymm9, 20
+ vpor ymm9, ymm9, ymm4
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm0, ymm0, ymm1
+ vpaddd ymm8, ymm8, ymm9
+ vmovdqa ymmword ptr [rsp+0x80], ymm5
+ vmovdqa ymmword ptr [rsp+0xA0], ymm13
+ vpxor ymm3, ymm3, ymm0
+ vpxor ymm11, ymm11, ymm8
+ vbroadcasti128 ymm4, xmmword ptr [ROT8+rip]
+ vpshufb ymm3, ymm3, ymm4
+ vpshufb ymm11, ymm11, ymm4
+ vpaddd ymm2, ymm2, ymm3
+ vpaddd ymm10, ymm10, ymm11
+ vpxor ymm1, ymm1, ymm2
+ vpxor ymm9, ymm9, ymm10
+ vpsrld ymm4, ymm1, 7
+ vpslld ymm1, ymm1, 25
+ vpor ymm1, ymm1, ymm4
+ vpsrld ymm4, ymm9, 7
+ vpslld ymm9, ymm9, 25
+ vpor ymm9, ymm9, ymm4
+ vpshufd ymm0, ymm0, 0x93
+ vpshufd ymm8, ymm8, 0x93
+ vpshufd ymm3, ymm3, 0x4E
+ vpshufd ymm11, ymm11, 0x4E
+ vpshufd ymm2, ymm2, 0x39
+ vpshufd ymm10, ymm10, 0x39
+ vpaddd ymm0, ymm0, ymm6
+ vpaddd ymm8, ymm8, ymm14
+ vpaddd ymm0, ymm0, ymm1
+ vpaddd ymm8, ymm8, ymm9
+ vpxor ymm3, ymm3, ymm0
+ vpxor ymm11, ymm11, ymm8
+ vbroadcasti128 ymm4, xmmword ptr [ROT16+rip]
+ vpshufb ymm3, ymm3, ymm4
+ vpshufb ymm11, ymm11, ymm4
+ vpaddd ymm2, ymm2, ymm3
+ vpaddd ymm10, ymm10, ymm11
+ vpxor ymm1, ymm1, ymm2
+ vpxor ymm9, ymm9, ymm10
+ vpsrld ymm4, ymm1, 12
+ vpslld ymm1, ymm1, 20
+ vpor ymm1, ymm1, ymm4
+ vpsrld ymm4, ymm9, 12
+ vpslld ymm9, ymm9, 20
+ vpor ymm9, ymm9, ymm4
+ vpaddd ymm0, ymm0, ymm7
+ vpaddd ymm8, ymm8, ymm15
+ vpaddd ymm0, ymm0, ymm1
+ vpaddd ymm8, ymm8, ymm9
+ vpxor ymm3, ymm3, ymm0
+ vpxor ymm11, ymm11, ymm8
+ vbroadcasti128 ymm4, xmmword ptr [ROT8+rip]
+ vpshufb ymm3, ymm3, ymm4
+ vpshufb ymm11, ymm11, ymm4
+ vpaddd ymm2, ymm2, ymm3
+ vpaddd ymm10, ymm10, ymm11
+ vpxor ymm1, ymm1, ymm2
+ vpxor ymm9, ymm9, ymm10
+ vpsrld ymm4, ymm1, 7
+ vpslld ymm1, ymm1, 25
+ vpor ymm1, ymm1, ymm4
+ vpsrld ymm4, ymm9, 7
+ vpslld ymm9, ymm9, 25
+ vpor ymm9, ymm9, ymm4
+ vpshufd ymm0, ymm0, 0x39
+ vpshufd ymm8, ymm8, 0x39
+ vpshufd ymm3, ymm3, 0x4E
+ vpshufd ymm11, ymm11, 0x4E
+ vpshufd ymm2, ymm2, 0x93
+ vpshufd ymm10, ymm10, 0x93
+ dec al
+ je 9f
+ vmovdqa ymm4, ymmword ptr [rsp+0x40]
+ vmovdqa ymm5, ymmword ptr [rsp+0x80]
+ vshufps ymm12, ymm4, ymm5, 214
+ vpshufd ymm13, ymm4, 0x0F
+ vpshufd ymm4, ymm12, 0x39
+ vshufps ymm12, ymm6, ymm7, 250
+ vpblendd ymm13, ymm13, ymm12, 0xAA
+ vpunpcklqdq ymm12, ymm7, ymm5
+ vpblendd ymm12, ymm12, ymm6, 0x88
+ vpshufd ymm12, ymm12, 0x78
+ vpunpckhdq ymm5, ymm5, ymm7
+ vpunpckldq ymm6, ymm6, ymm5
+ vpshufd ymm7, ymm6, 0x1E
+ vmovdqa ymmword ptr [rsp+0x40], ymm13
+ vmovdqa ymmword ptr [rsp+0x80], ymm12
+ vmovdqa ymm12, ymmword ptr [rsp+0x60]
+ vmovdqa ymm13, ymmword ptr [rsp+0xA0]
+ vshufps ymm5, ymm12, ymm13, 214
+ vpshufd ymm6, ymm12, 0x0F
+ vpshufd ymm12, ymm5, 0x39
+ vshufps ymm5, ymm14, ymm15, 250
+ vpblendd ymm6, ymm6, ymm5, 0xAA
+ vpunpcklqdq ymm5, ymm15, ymm13
+ vpblendd ymm5, ymm5, ymm14, 0x88
+ vpshufd ymm5, ymm5, 0x78
+ vpunpckhdq ymm13, ymm13, ymm15
+ vpunpckldq ymm14, ymm14, ymm13
+ vpshufd ymm15, ymm14, 0x1E
+ vmovdqa ymm13, ymm6
+ vmovdqa ymm14, ymm5
+ vmovdqa ymm5, ymmword ptr [rsp+0x40]
+ vmovdqa ymm6, ymmword ptr [rsp+0x80]
+ jmp 9b
+9:
+ vpxor ymm0, ymm0, ymm2
+ vpxor ymm1, ymm1, ymm3
+ vpxor ymm8, ymm8, ymm10
+ vpxor ymm9, ymm9, ymm11
+ mov eax, r13d
+ cmp rdx, r15
+ jne 2b
+ vmovdqu xmmword ptr [rbx], xmm0
+ vmovdqu xmmword ptr [rbx+0x10], xmm1
+ vextracti128 xmmword ptr [rbx+0x20], ymm0, 0x01
+ vextracti128 xmmword ptr [rbx+0x30], ymm1, 0x01
+ vmovdqu xmmword ptr [rbx+0x40], xmm8
+ vmovdqu xmmword ptr [rbx+0x50], xmm9
+ vextracti128 xmmword ptr [rbx+0x60], ymm8, 0x01
+ vextracti128 xmmword ptr [rbx+0x70], ymm9, 0x01
+ vmovaps xmm8, xmmword ptr [rsp+0x280]
+ vmovaps xmm0, xmmword ptr [rsp+0x240]
+ vmovaps xmm1, xmmword ptr [rsp+0x250]
+ vmovaps xmm2, xmmword ptr [rsp+0x260]
+ vmovaps xmm3, xmmword ptr [rsp+0x270]
+ vblendvps xmm0, xmm0, xmm1, xmm8
+ vblendvps xmm2, xmm2, xmm3, xmm8
+ vmovaps xmmword ptr [rsp+0x240], xmm0
+ vmovaps xmmword ptr [rsp+0x260], xmm2
+ add rbx, 128
+ add rdi, 32
+ sub rsi, 4
+3:
+ test rsi, 0x2
+ je 3f
+ vbroadcasti128 ymm0, xmmword ptr [rcx]
+ vbroadcasti128 ymm1, xmmword ptr [rcx+0x10]
+ vmovd xmm13, dword ptr [rsp+0x240]
+ vpinsrd xmm13, xmm13, dword ptr [rsp+0x260], 1
+ vpinsrd xmm13, xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
+ vmovd xmm14, dword ptr [rsp+0x244]
+ vpinsrd xmm14, xmm14, dword ptr [rsp+0x264], 1
+ vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
+ vinserti128 ymm13, ymm13, xmm14, 0x01
+ vbroadcasti128 ymm14, xmmword ptr [ROT16+rip]
+ vbroadcasti128 ymm15, xmmword ptr [ROT8+rip]
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ movzx eax, byte ptr [rbp+0x40]
+ or eax, r13d
+ xor edx, edx
+.p2align 5
+2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ mov dword ptr [rsp+0x200], eax
+ vbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV+rip]
+ vpbroadcastd ymm8, dword ptr [rsp+0x200]
+ vpblendd ymm3, ymm13, ymm8, 0x88
+ vmovups ymm8, ymmword ptr [r8+rdx-0x40]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-0x40], 0x01
+ vmovups ymm9, ymmword ptr [r8+rdx-0x30]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-0x30], 0x01
+ vshufps ymm4, ymm8, ymm9, 136
+ vshufps ymm5, ymm8, ymm9, 221
+ vmovups ymm8, ymmword ptr [r8+rdx-0x20]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-0x20], 0x01
+ vmovups ymm9, ymmword ptr [r8+rdx-0x10]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-0x10], 0x01
+ vshufps ymm6, ymm8, ymm9, 136
+ vshufps ymm7, ymm8, ymm9, 221
+ vpshufd ymm6, ymm6, 0x93
+ vpshufd ymm7, ymm7, 0x93
+ mov al, 7
+9:
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm0, ymm0, ymm1
+ vpxor ymm3, ymm3, ymm0
+ vpshufb ymm3, ymm3, ymm14
+ vpaddd ymm2, ymm2, ymm3
+ vpxor ymm1, ymm1, ymm2
+ vpsrld ymm8, ymm1, 12
+ vpslld ymm1, ymm1, 20
+ vpor ymm1, ymm1, ymm8
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm0, ymm0, ymm1
+ vpxor ymm3, ymm3, ymm0
+ vpshufb ymm3, ymm3, ymm15
+ vpaddd ymm2, ymm2, ymm3
+ vpxor ymm1, ymm1, ymm2
+ vpsrld ymm8, ymm1, 7
+ vpslld ymm1, ymm1, 25
+ vpor ymm1, ymm1, ymm8
+ vpshufd ymm0, ymm0, 0x93
+ vpshufd ymm3, ymm3, 0x4E
+ vpshufd ymm2, ymm2, 0x39
+ vpaddd ymm0, ymm0, ymm6
+ vpaddd ymm0, ymm0, ymm1
+ vpxor ymm3, ymm3, ymm0
+ vpshufb ymm3, ymm3, ymm14
+ vpaddd ymm2, ymm2, ymm3
+ vpxor ymm1, ymm1, ymm2
+ vpsrld ymm8, ymm1, 12
+ vpslld ymm1, ymm1, 20
+ vpor ymm1, ymm1, ymm8
+ vpaddd ymm0, ymm0, ymm7
+ vpaddd ymm0, ymm0, ymm1
+ vpxor ymm3, ymm3, ymm0
+ vpshufb ymm3, ymm3, ymm15
+ vpaddd ymm2, ymm2, ymm3
+ vpxor ymm1, ymm1, ymm2
+ vpsrld ymm8, ymm1, 7
+ vpslld ymm1, ymm1, 25
+ vpor ymm1, ymm1, ymm8
+ vpshufd ymm0, ymm0, 0x39
+ vpshufd ymm3, ymm3, 0x4E
+ vpshufd ymm2, ymm2, 0x93
+ dec al
+ jz 9f
+ vshufps ymm8, ymm4, ymm5, 214
+ vpshufd ymm9, ymm4, 0x0F
+ vpshufd ymm4, ymm8, 0x39
+ vshufps ymm8, ymm6, ymm7, 250
+ vpblendd ymm9, ymm9, ymm8, 0xAA
+ vpunpcklqdq ymm8, ymm7, ymm5
+ vpblendd ymm8, ymm8, ymm6, 0x88
+ vpshufd ymm8, ymm8, 0x78
+ vpunpckhdq ymm5, ymm5, ymm7
+ vpunpckldq ymm6, ymm6, ymm5
+ vpshufd ymm7, ymm6, 0x1E
+ vmovdqa ymm5, ymm9
+ vmovdqa ymm6, ymm8
+ jmp 9b
+9:
+ vpxor ymm0, ymm0, ymm2
+ vpxor ymm1, ymm1, ymm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne 2b
+ vmovdqu xmmword ptr [rbx], xmm0
+ vmovdqu xmmword ptr [rbx+0x10], xmm1
+ vextracti128 xmmword ptr [rbx+0x20], ymm0, 0x01
+ vextracti128 xmmword ptr [rbx+0x30], ymm1, 0x01
+ vmovaps ymm8, ymmword ptr [rsp+0x280]
+ vmovaps ymm0, ymmword ptr [rsp+0x240]
+ vmovups ymm1, ymmword ptr [rsp+0x248]
+ vmovaps ymm2, ymmword ptr [rsp+0x260]
+ vmovups ymm3, ymmword ptr [rsp+0x268]
+ vblendvps ymm0, ymm0, ymm1, ymm8
+ vblendvps ymm2, ymm2, ymm3, ymm8
+ vmovaps ymmword ptr [rsp+0x240], ymm0
+ vmovaps ymmword ptr [rsp+0x260], ymm2
+ add rbx, 64
+ add rdi, 16
+ sub rsi, 2
+3:
+ test rsi, 0x1
+ je 4b
+ vmovdqu xmm0, xmmword ptr [rcx]
+ vmovdqu xmm1, xmmword ptr [rcx+0x10]
+ vmovd xmm3, dword ptr [rsp+0x240]
+ vpinsrd xmm3, xmm3, dword ptr [rsp+0x260], 1
+ vpinsrd xmm13, xmm3, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
+ vmovdqa xmm14, xmmword ptr [ROT16+rip]
+ vmovdqa xmm15, xmmword ptr [ROT8+rip]
+ mov r8, qword ptr [rdi]
+ movzx eax, byte ptr [rbp+0x40]
+ or eax, r13d
+ xor edx, edx
+.p2align 5
+2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ vmovdqa xmm2, xmmword ptr [BLAKE3_IV+rip]
+ vmovdqa xmm3, xmm13
+ vpinsrd xmm3, xmm3, eax, 3
+ vmovups xmm8, xmmword ptr [r8+rdx-0x40]
+ vmovups xmm9, xmmword ptr [r8+rdx-0x30]
+ vshufps xmm4, xmm8, xmm9, 136
+ vshufps xmm5, xmm8, xmm9, 221
+ vmovups xmm8, xmmword ptr [r8+rdx-0x20]
+ vmovups xmm9, xmmword ptr [r8+rdx-0x10]
+ vshufps xmm6, xmm8, xmm9, 136
+ vshufps xmm7, xmm8, xmm9, 221
+ vpshufd xmm6, xmm6, 0x93
+ vpshufd xmm7, xmm7, 0x93
+ mov al, 7
+9:
+ vpaddd xmm0, xmm0, xmm4
+ vpaddd xmm0, xmm0, xmm1
+ vpxor xmm3, xmm3, xmm0
+ vpshufb xmm3, xmm3, xmm14
+ vpaddd xmm2, xmm2, xmm3
+ vpxor xmm1, xmm1, xmm2
+ vpsrld xmm8, xmm1, 12
+ vpslld xmm1, xmm1, 20
+ vpor xmm1, xmm1, xmm8
+ vpaddd xmm0, xmm0, xmm5
+ vpaddd xmm0, xmm0, xmm1
+ vpxor xmm3, xmm3, xmm0
+ vpshufb xmm3, xmm3, xmm15
+ vpaddd xmm2, xmm2, xmm3
+ vpxor xmm1, xmm1, xmm2
+ vpsrld xmm8, xmm1, 7
+ vpslld xmm1, xmm1, 25
+ vpor xmm1, xmm1, xmm8
+ vpshufd xmm0, xmm0, 0x93
+ vpshufd xmm3, xmm3, 0x4E
+ vpshufd xmm2, xmm2, 0x39
+ vpaddd xmm0, xmm0, xmm6
+ vpaddd xmm0, xmm0, xmm1
+ vpxor xmm3, xmm3, xmm0
+ vpshufb xmm3, xmm3, xmm14
+ vpaddd xmm2, xmm2, xmm3
+ vpxor xmm1, xmm1, xmm2
+ vpsrld xmm8, xmm1, 12
+ vpslld xmm1, xmm1, 20
+ vpor xmm1, xmm1, xmm8
+ vpaddd xmm0, xmm0, xmm7
+ vpaddd xmm0, xmm0, xmm1
+ vpxor xmm3, xmm3, xmm0
+ vpshufb xmm3, xmm3, xmm15
+ vpaddd xmm2, xmm2, xmm3
+ vpxor xmm1, xmm1, xmm2
+ vpsrld xmm8, xmm1, 7
+ vpslld xmm1, xmm1, 25
+ vpor xmm1, xmm1, xmm8
+ vpshufd xmm0, xmm0, 0x39
+ vpshufd xmm3, xmm3, 0x4E
+ vpshufd xmm2, xmm2, 0x93
+ dec al
+ jz 9f
+ vshufps xmm8, xmm4, xmm5, 214
+ vpshufd xmm9, xmm4, 0x0F
+ vpshufd xmm4, xmm8, 0x39
+ vshufps xmm8, xmm6, xmm7, 250
+ vpblendd xmm9, xmm9, xmm8, 0xAA
+ vpunpcklqdq xmm8, xmm7, xmm5
+ vpblendd xmm8, xmm8, xmm6, 0x88
+ vpshufd xmm8, xmm8, 0x78
+ vpunpckhdq xmm5, xmm5, xmm7
+ vpunpckldq xmm6, xmm6, xmm5
+ vpshufd xmm7, xmm6, 0x1E
+ vmovdqa xmm5, xmm9
+ vmovdqa xmm6, xmm8
+ jmp 9b
+9:
+ vpxor xmm0, xmm0, xmm2
+ vpxor xmm1, xmm1, xmm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne 2b
+ vmovdqu xmmword ptr [rbx], xmm0
+ vmovdqu xmmword ptr [rbx+0x10], xmm1
+ jmp 4b
+
+
+#ifdef __APPLE__
+.static_data
+#else
+.section .rodata
+#endif
+.p2align 6
+ADD0:
+ .long 0, 1, 2, 3, 4, 5, 6, 7
+ADD1:
+ .long 8, 8, 8, 8, 8, 8, 8, 8
+BLAKE3_IV_0:
+ .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667
+ .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667
+BLAKE3_IV_1:
+ .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85
+ .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85
+BLAKE3_IV_2:
+ .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372
+ .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372
+BLAKE3_IV_3:
+ .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A
+ .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A
+BLAKE3_BLOCK_LEN:
+ .long 0x00000040, 0x00000040, 0x00000040, 0x00000040
+ .long 0x00000040, 0x00000040, 0x00000040, 0x00000040
+ROT16:
+ .byte 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13
+ROT8:
+ .byte 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12
+CMP_MSB_MASK:
+ .long 0x80000000, 0x80000000, 0x80000000, 0x80000000
+ .long 0x80000000, 0x80000000, 0x80000000, 0x80000000
+BLAKE3_IV:
+ .long 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A
+
+#endif
diff --git a/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_gnu.S b/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_gnu.S
new file mode 100644
index 000000000000..bb58d2ae64b1
--- /dev/null
+++ b/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_gnu.S
@@ -0,0 +1,1817 @@
+.intel_syntax noprefix
+.global _blake3_hash_many_avx2
+.global blake3_hash_many_avx2
+.section .text
+ .p2align 6
+_blake3_hash_many_avx2:
+blake3_hash_many_avx2:
+ push r15
+ push r14
+ push r13
+ push r12
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ mov rbp, rsp
+ sub rsp, 880
+ and rsp, 0xFFFFFFFFFFFFFFC0
+ vmovdqa xmmword ptr [rsp+0x2D0], xmm6
+ vmovdqa xmmword ptr [rsp+0x2E0], xmm7
+ vmovdqa xmmword ptr [rsp+0x2F0], xmm8
+ vmovdqa xmmword ptr [rsp+0x300], xmm9
+ vmovdqa xmmword ptr [rsp+0x310], xmm10
+ vmovdqa xmmword ptr [rsp+0x320], xmm11
+ vmovdqa xmmword ptr [rsp+0x330], xmm12
+ vmovdqa xmmword ptr [rsp+0x340], xmm13
+ vmovdqa xmmword ptr [rsp+0x350], xmm14
+ vmovdqa xmmword ptr [rsp+0x360], xmm15
+ mov rdi, rcx
+ mov rsi, rdx
+ mov rdx, r8
+ mov rcx, r9
+ mov r8, qword ptr [rbp+0x68]
+ movzx r9, byte ptr [rbp+0x70]
+ neg r9d
+ vmovd xmm0, r9d
+ vpbroadcastd ymm0, xmm0
+ vmovdqa ymmword ptr [rsp+0x260], ymm0
+ vpand ymm1, ymm0, ymmword ptr [ADD0+rip]
+ vpand ymm2, ymm0, ymmword ptr [ADD1+rip]
+ vmovdqa ymmword ptr [rsp+0x2A0], ymm2
+ vmovd xmm2, r8d
+ vpbroadcastd ymm2, xmm2
+ vpaddd ymm2, ymm2, ymm1
+ vmovdqa ymmword ptr [rsp+0x220], ymm2
+ vpxor ymm1, ymm1, ymmword ptr [CMP_MSB_MASK+rip]
+ vpxor ymm2, ymm2, ymmword ptr [CMP_MSB_MASK+rip]
+ vpcmpgtd ymm2, ymm1, ymm2
+ shr r8, 32
+ vmovd xmm3, r8d
+ vpbroadcastd ymm3, xmm3
+ vpsubd ymm3, ymm3, ymm2
+ vmovdqa ymmword ptr [rsp+0x240], ymm3
+ shl rdx, 6
+ mov qword ptr [rsp+0x2C0], rdx
+ cmp rsi, 8
+ jc 3f
+2:
+ vpbroadcastd ymm0, dword ptr [rcx]
+ vpbroadcastd ymm1, dword ptr [rcx+0x4]
+ vpbroadcastd ymm2, dword ptr [rcx+0x8]
+ vpbroadcastd ymm3, dword ptr [rcx+0xC]
+ vpbroadcastd ymm4, dword ptr [rcx+0x10]
+ vpbroadcastd ymm5, dword ptr [rcx+0x14]
+ vpbroadcastd ymm6, dword ptr [rcx+0x18]
+ vpbroadcastd ymm7, dword ptr [rcx+0x1C]
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ mov r10, qword ptr [rdi+0x10]
+ mov r11, qword ptr [rdi+0x18]
+ mov r12, qword ptr [rdi+0x20]
+ mov r13, qword ptr [rdi+0x28]
+ mov r14, qword ptr [rdi+0x30]
+ mov r15, qword ptr [rdi+0x38]
+ movzx eax, byte ptr [rbp+0x78]
+ movzx ebx, byte ptr [rbp+0x80]
+ or eax, ebx
+ xor edx, edx
+.p2align 5
+9:
+ movzx ebx, byte ptr [rbp+0x88]
+ or ebx, eax
+ add rdx, 64
+ cmp rdx, qword ptr [rsp+0x2C0]
+ cmove eax, ebx
+ mov dword ptr [rsp+0x200], eax
+ vmovups xmm8, xmmword ptr [r8+rdx-0x40]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x40], 0x01
+ vmovups xmm9, xmmword ptr [r9+rdx-0x40]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x40], 0x01
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-0x40]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x40], 0x01
+ vmovups xmm11, xmmword ptr [r11+rdx-0x40]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x40], 0x01
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm8, ymm12, ymm14, 136
+ vmovaps ymmword ptr [rsp], ymm8
+ vshufps ymm9, ymm12, ymm14, 221
+ vmovaps ymmword ptr [rsp+0x20], ymm9
+ vshufps ymm10, ymm13, ymm15, 136
+ vmovaps ymmword ptr [rsp+0x40], ymm10
+ vshufps ymm11, ymm13, ymm15, 221
+ vmovaps ymmword ptr [rsp+0x60], ymm11
+ vmovups xmm8, xmmword ptr [r8+rdx-0x30]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x30], 0x01
+ vmovups xmm9, xmmword ptr [r9+rdx-0x30]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x30], 0x01
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-0x30]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x30], 0x01
+ vmovups xmm11, xmmword ptr [r11+rdx-0x30]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x30], 0x01
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm8, ymm12, ymm14, 136
+ vmovaps ymmword ptr [rsp+0x80], ymm8
+ vshufps ymm9, ymm12, ymm14, 221
+ vmovaps ymmword ptr [rsp+0xA0], ymm9
+ vshufps ymm10, ymm13, ymm15, 136
+ vmovaps ymmword ptr [rsp+0xC0], ymm10
+ vshufps ymm11, ymm13, ymm15, 221
+ vmovaps ymmword ptr [rsp+0xE0], ymm11
+ vmovups xmm8, xmmword ptr [r8+rdx-0x20]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x20], 0x01
+ vmovups xmm9, xmmword ptr [r9+rdx-0x20]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x20], 0x01
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-0x20]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x20], 0x01
+ vmovups xmm11, xmmword ptr [r11+rdx-0x20]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x20], 0x01
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm8, ymm12, ymm14, 136
+ vmovaps ymmword ptr [rsp+0x100], ymm8
+ vshufps ymm9, ymm12, ymm14, 221
+ vmovaps ymmword ptr [rsp+0x120], ymm9
+ vshufps ymm10, ymm13, ymm15, 136
+ vmovaps ymmword ptr [rsp+0x140], ymm10
+ vshufps ymm11, ymm13, ymm15, 221
+ vmovaps ymmword ptr [rsp+0x160], ymm11
+ vmovups xmm8, xmmword ptr [r8+rdx-0x10]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x10], 0x01
+ vmovups xmm9, xmmword ptr [r9+rdx-0x10]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x10], 0x01
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-0x10]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x10], 0x01
+ vmovups xmm11, xmmword ptr [r11+rdx-0x10]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x10], 0x01
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm8, ymm12, ymm14, 136
+ vmovaps ymmword ptr [rsp+0x180], ymm8
+ vshufps ymm9, ymm12, ymm14, 221
+ vmovaps ymmword ptr [rsp+0x1A0], ymm9
+ vshufps ymm10, ymm13, ymm15, 136
+ vmovaps ymmword ptr [rsp+0x1C0], ymm10
+ vshufps ymm11, ymm13, ymm15, 221
+ vmovaps ymmword ptr [rsp+0x1E0], ymm11
+ vpbroadcastd ymm15, dword ptr [rsp+0x200]
+ prefetcht0 [r8+rdx+0x80]
+ prefetcht0 [r12+rdx+0x80]
+ prefetcht0 [r9+rdx+0x80]
+ prefetcht0 [r13+rdx+0x80]
+ prefetcht0 [r10+rdx+0x80]
+ prefetcht0 [r14+rdx+0x80]
+ prefetcht0 [r11+rdx+0x80]
+ prefetcht0 [r15+rdx+0x80]
+ vpaddd ymm0, ymm0, ymmword ptr [rsp]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x40]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x80]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0xC0]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm0, ymmword ptr [rsp+0x220]
+ vpxor ymm13, ymm1, ymmword ptr [rsp+0x240]
+ vpxor ymm14, ymm2, ymmword ptr [BLAKE3_BLOCK_LEN+rip]
+ vpxor ymm15, ymm3, ymm15
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [BLAKE3_IV_0+rip]
+ vpaddd ymm9, ymm13, ymmword ptr [BLAKE3_IV_1+rip]
+ vpaddd ymm10, ymm14, ymmword ptr [BLAKE3_IV_2+rip]
+ vpaddd ymm11, ymm15, ymmword ptr [BLAKE3_IV_3+rip]
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x20]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x60]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0xA0]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0xE0]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x100]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x140]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x180]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1C0]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x120]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x160]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1A0]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1E0]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x40]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x60]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0xE0]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x80]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0xC0]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x140]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1A0]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x20]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x180]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x120]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1E0]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x160]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0xA0]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1C0]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x100]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x60]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x140]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1A0]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0xE0]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x80]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x180]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x40]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1C0]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0xC0]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x120]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x160]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x100]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0xA0]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1E0]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x20]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x140]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x180]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1C0]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1A0]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0xE0]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x120]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x60]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1E0]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x80]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x160]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0xA0]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x20]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x40]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x100]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0xC0]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x180]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x120]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1E0]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1C0]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x1A0]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x160]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x140]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x100]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0xE0]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0xA0]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0xC0]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x40]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x60]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x20]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x80]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x120]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x160]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x100]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1E0]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x1C0]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0xA0]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x180]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x20]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x1A0]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x40]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x80]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x60]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x140]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0xC0]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0xE0]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x160]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0xA0]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x20]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x100]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x1E0]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x120]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0xC0]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x1C0]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x40]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x60]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0xE0]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+0x200], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0x140]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0x180]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0x80]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1A0]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8+rip]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vpxor ymm0, ymm0, ymm8
+ vpxor ymm1, ymm1, ymm9
+ vpxor ymm2, ymm2, ymm10
+ vpxor ymm3, ymm3, ymm11
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpxor ymm4, ymm4, ymm12
+ vpxor ymm5, ymm5, ymm13
+ vpxor ymm6, ymm6, ymm14
+ vpxor ymm7, ymm7, ymm15
+ movzx eax, byte ptr [rbp+0x78]
+ jne 9b
+ mov rbx, qword ptr [rbp+0x90]
+ vunpcklps ymm8, ymm0, ymm1
+ vunpcklps ymm9, ymm2, ymm3
+ vunpckhps ymm10, ymm0, ymm1
+ vunpcklps ymm11, ymm4, ymm5
+ vunpcklps ymm0, ymm6, ymm7
+ vshufps ymm12, ymm8, ymm9, 78
+ vblendps ymm1, ymm8, ymm12, 0xCC
+ vshufps ymm8, ymm11, ymm0, 78
+ vunpckhps ymm13, ymm2, ymm3
+ vblendps ymm2, ymm11, ymm8, 0xCC
+ vblendps ymm3, ymm12, ymm9, 0xCC
+ vperm2f128 ymm12, ymm1, ymm2, 0x20
+ vmovups ymmword ptr [rbx], ymm12
+ vunpckhps ymm14, ymm4, ymm5
+ vblendps ymm4, ymm8, ymm0, 0xCC
+ vunpckhps ymm15, ymm6, ymm7
+ vperm2f128 ymm7, ymm3, ymm4, 0x20
+ vmovups ymmword ptr [rbx+0x20], ymm7
+ vshufps ymm5, ymm10, ymm13, 78
+ vblendps ymm6, ymm5, ymm13, 0xCC
+ vshufps ymm13, ymm14, ymm15, 78
+ vblendps ymm10, ymm10, ymm5, 0xCC
+ vblendps ymm14, ymm14, ymm13, 0xCC
+ vperm2f128 ymm8, ymm10, ymm14, 0x20
+ vmovups ymmword ptr [rbx+0x40], ymm8
+ vblendps ymm15, ymm13, ymm15, 0xCC
+ vperm2f128 ymm13, ymm6, ymm15, 0x20
+ vmovups ymmword ptr [rbx+0x60], ymm13
+ vperm2f128 ymm9, ymm1, ymm2, 0x31
+ vperm2f128 ymm11, ymm3, ymm4, 0x31
+ vmovups ymmword ptr [rbx+0x80], ymm9
+ vperm2f128 ymm14, ymm10, ymm14, 0x31
+ vperm2f128 ymm15, ymm6, ymm15, 0x31
+ vmovups ymmword ptr [rbx+0xA0], ymm11
+ vmovups ymmword ptr [rbx+0xC0], ymm14
+ vmovups ymmword ptr [rbx+0xE0], ymm15
+ vmovdqa ymm0, ymmword ptr [rsp+0x2A0]
+ vpaddd ymm1, ymm0, ymmword ptr [rsp+0x220]
+ vmovdqa ymmword ptr [rsp+0x220], ymm1
+ vpxor ymm0, ymm0, ymmword ptr [CMP_MSB_MASK+rip]
+ vpxor ymm2, ymm1, ymmword ptr [CMP_MSB_MASK+rip]
+ vpcmpgtd ymm2, ymm0, ymm2
+ vmovdqa ymm0, ymmword ptr [rsp+0x240]
+ vpsubd ymm2, ymm0, ymm2
+ vmovdqa ymmword ptr [rsp+0x240], ymm2
+ add rdi, 64
+ add rbx, 256
+ mov qword ptr [rbp+0x90], rbx
+ sub rsi, 8
+ cmp rsi, 8
+ jnc 2b
+ test rsi, rsi
+ jnz 3f
+4:
+ vzeroupper
+ vmovdqa xmm6, xmmword ptr [rsp+0x2D0]
+ vmovdqa xmm7, xmmword ptr [rsp+0x2E0]
+ vmovdqa xmm8, xmmword ptr [rsp+0x2F0]
+ vmovdqa xmm9, xmmword ptr [rsp+0x300]
+ vmovdqa xmm10, xmmword ptr [rsp+0x310]
+ vmovdqa xmm11, xmmword ptr [rsp+0x320]
+ vmovdqa xmm12, xmmword ptr [rsp+0x330]
+ vmovdqa xmm13, xmmword ptr [rsp+0x340]
+ vmovdqa xmm14, xmmword ptr [rsp+0x350]
+ vmovdqa xmm15, xmmword ptr [rsp+0x360]
+ mov rsp, rbp
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ pop r12
+ pop r13
+ pop r14
+ pop r15
+ ret
+.p2align 5
+3:
+ mov rbx, qword ptr [rbp+0x90]
+ mov r15, qword ptr [rsp+0x2C0]
+ movzx r13d, byte ptr [rbp+0x78]
+ movzx r12d, byte ptr [rbp+0x88]
+ test rsi, 0x4
+ je 3f
+ vbroadcasti128 ymm0, xmmword ptr [rcx]
+ vbroadcasti128 ymm1, xmmword ptr [rcx+0x10]
+ vmovdqa ymm8, ymm0
+ vmovdqa ymm9, ymm1
+ vbroadcasti128 ymm12, xmmword ptr [rsp+0x220]
+ vbroadcasti128 ymm13, xmmword ptr [rsp+0x240]
+ vpunpckldq ymm14, ymm12, ymm13
+ vpunpckhdq ymm15, ymm12, ymm13
+ vpermq ymm14, ymm14, 0x50
+ vpermq ymm15, ymm15, 0x50
+ vbroadcasti128 ymm12, xmmword ptr [BLAKE3_BLOCK_LEN+rip]
+ vpblendd ymm14, ymm14, ymm12, 0x44
+ vpblendd ymm15, ymm15, ymm12, 0x44
+ vmovdqa ymmword ptr [rsp], ymm14
+ vmovdqa ymmword ptr [rsp+0x20], ymm15
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ mov r10, qword ptr [rdi+0x10]
+ mov r11, qword ptr [rdi+0x18]
+ movzx eax, byte ptr [rbp+0x80]
+ or eax, r13d
+ xor edx, edx
+.p2align 5
+2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ mov dword ptr [rsp+0x200], eax
+ vmovups ymm2, ymmword ptr [r8+rdx-0x40]
+ vinsertf128 ymm2, ymm2, xmmword ptr [r9+rdx-0x40], 0x01
+ vmovups ymm3, ymmword ptr [r8+rdx-0x30]
+ vinsertf128 ymm3, ymm3, xmmword ptr [r9+rdx-0x30], 0x01
+ vshufps ymm4, ymm2, ymm3, 136
+ vshufps ymm5, ymm2, ymm3, 221
+ vmovups ymm2, ymmword ptr [r8+rdx-0x20]
+ vinsertf128 ymm2, ymm2, xmmword ptr [r9+rdx-0x20], 0x01
+ vmovups ymm3, ymmword ptr [r8+rdx-0x10]
+ vinsertf128 ymm3, ymm3, xmmword ptr [r9+rdx-0x10], 0x01
+ vshufps ymm6, ymm2, ymm3, 136
+ vshufps ymm7, ymm2, ymm3, 221
+ vpshufd ymm6, ymm6, 0x93
+ vpshufd ymm7, ymm7, 0x93
+ vmovups ymm10, ymmword ptr [r10+rdx-0x40]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r11+rdx-0x40], 0x01
+ vmovups ymm11, ymmword ptr [r10+rdx-0x30]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r11+rdx-0x30], 0x01
+ vshufps ymm12, ymm10, ymm11, 136
+ vshufps ymm13, ymm10, ymm11, 221
+ vmovups ymm10, ymmword ptr [r10+rdx-0x20]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r11+rdx-0x20], 0x01
+ vmovups ymm11, ymmword ptr [r10+rdx-0x10]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r11+rdx-0x10], 0x01
+ vshufps ymm14, ymm10, ymm11, 136
+ vshufps ymm15, ymm10, ymm11, 221
+ vpshufd ymm14, ymm14, 0x93
+ vpshufd ymm15, ymm15, 0x93
+ vpbroadcastd ymm2, dword ptr [rsp+0x200]
+ vmovdqa ymm3, ymmword ptr [rsp]
+ vmovdqa ymm11, ymmword ptr [rsp+0x20]
+ vpblendd ymm3, ymm3, ymm2, 0x88
+ vpblendd ymm11, ymm11, ymm2, 0x88
+ vbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV+rip]
+ vmovdqa ymm10, ymm2
+ mov al, 7
+9:
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm8, ymm8, ymm12
+ vmovdqa ymmword ptr [rsp+0x40], ymm4
+ nop
+ vmovdqa ymmword ptr [rsp+0x60], ymm12
+ nop
+ vpaddd ymm0, ymm0, ymm1
+ vpaddd ymm8, ymm8, ymm9
+ vpxor ymm3, ymm3, ymm0
+ vpxor ymm11, ymm11, ymm8
+ vbroadcasti128 ymm4, xmmword ptr [ROT16+rip]
+ vpshufb ymm3, ymm3, ymm4
+ vpshufb ymm11, ymm11, ymm4
+ vpaddd ymm2, ymm2, ymm3
+ vpaddd ymm10, ymm10, ymm11
+ vpxor ymm1, ymm1, ymm2
+ vpxor ymm9, ymm9, ymm10
+ vpsrld ymm4, ymm1, 12
+ vpslld ymm1, ymm1, 20
+ vpor ymm1, ymm1, ymm4
+ vpsrld ymm4, ymm9, 12
+ vpslld ymm9, ymm9, 20
+ vpor ymm9, ymm9, ymm4
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm0, ymm0, ymm1
+ vpaddd ymm8, ymm8, ymm9
+ vmovdqa ymmword ptr [rsp+0x80], ymm5
+ vmovdqa ymmword ptr [rsp+0xA0], ymm13
+ vpxor ymm3, ymm3, ymm0
+ vpxor ymm11, ymm11, ymm8
+ vbroadcasti128 ymm4, xmmword ptr [ROT8+rip]
+ vpshufb ymm3, ymm3, ymm4
+ vpshufb ymm11, ymm11, ymm4
+ vpaddd ymm2, ymm2, ymm3
+ vpaddd ymm10, ymm10, ymm11
+ vpxor ymm1, ymm1, ymm2
+ vpxor ymm9, ymm9, ymm10
+ vpsrld ymm4, ymm1, 7
+ vpslld ymm1, ymm1, 25
+ vpor ymm1, ymm1, ymm4
+ vpsrld ymm4, ymm9, 7
+ vpslld ymm9, ymm9, 25
+ vpor ymm9, ymm9, ymm4
+ vpshufd ymm0, ymm0, 0x93
+ vpshufd ymm8, ymm8, 0x93
+ vpshufd ymm3, ymm3, 0x4E
+ vpshufd ymm11, ymm11, 0x4E
+ vpshufd ymm2, ymm2, 0x39
+ vpshufd ymm10, ymm10, 0x39
+ vpaddd ymm0, ymm0, ymm6
+ vpaddd ymm8, ymm8, ymm14
+ vpaddd ymm0, ymm0, ymm1
+ vpaddd ymm8, ymm8, ymm9
+ vpxor ymm3, ymm3, ymm0
+ vpxor ymm11, ymm11, ymm8
+ vbroadcasti128 ymm4, xmmword ptr [ROT16+rip]
+ vpshufb ymm3, ymm3, ymm4
+ vpshufb ymm11, ymm11, ymm4
+ vpaddd ymm2, ymm2, ymm3
+ vpaddd ymm10, ymm10, ymm11
+ vpxor ymm1, ymm1, ymm2
+ vpxor ymm9, ymm9, ymm10
+ vpsrld ymm4, ymm1, 12
+ vpslld ymm1, ymm1, 20
+ vpor ymm1, ymm1, ymm4
+ vpsrld ymm4, ymm9, 12
+ vpslld ymm9, ymm9, 20
+ vpor ymm9, ymm9, ymm4
+ vpaddd ymm0, ymm0, ymm7
+ vpaddd ymm8, ymm8, ymm15
+ vpaddd ymm0, ymm0, ymm1
+ vpaddd ymm8, ymm8, ymm9
+ vpxor ymm3, ymm3, ymm0
+ vpxor ymm11, ymm11, ymm8
+ vbroadcasti128 ymm4, xmmword ptr [ROT8+rip]
+ vpshufb ymm3, ymm3, ymm4
+ vpshufb ymm11, ymm11, ymm4
+ vpaddd ymm2, ymm2, ymm3
+ vpaddd ymm10, ymm10, ymm11
+ vpxor ymm1, ymm1, ymm2
+ vpxor ymm9, ymm9, ymm10
+ vpsrld ymm4, ymm1, 7
+ vpslld ymm1, ymm1, 25
+ vpor ymm1, ymm1, ymm4
+ vpsrld ymm4, ymm9, 7
+ vpslld ymm9, ymm9, 25
+ vpor ymm9, ymm9, ymm4
+ vpshufd ymm0, ymm0, 0x39
+ vpshufd ymm8, ymm8, 0x39
+ vpshufd ymm3, ymm3, 0x4E
+ vpshufd ymm11, ymm11, 0x4E
+ vpshufd ymm2, ymm2, 0x93
+ vpshufd ymm10, ymm10, 0x93
+ dec al
+ je 9f
+ vmovdqa ymm4, ymmword ptr [rsp+0x40]
+ vmovdqa ymm5, ymmword ptr [rsp+0x80]
+ vshufps ymm12, ymm4, ymm5, 214
+ vpshufd ymm13, ymm4, 0x0F
+ vpshufd ymm4, ymm12, 0x39
+ vshufps ymm12, ymm6, ymm7, 250
+ vpblendd ymm13, ymm13, ymm12, 0xAA
+ vpunpcklqdq ymm12, ymm7, ymm5
+ vpblendd ymm12, ymm12, ymm6, 0x88
+ vpshufd ymm12, ymm12, 0x78
+ vpunpckhdq ymm5, ymm5, ymm7
+ vpunpckldq ymm6, ymm6, ymm5
+ vpshufd ymm7, ymm6, 0x1E
+ vmovdqa ymmword ptr [rsp+0x40], ymm13
+ vmovdqa ymmword ptr [rsp+0x80], ymm12
+ vmovdqa ymm12, ymmword ptr [rsp+0x60]
+ vmovdqa ymm13, ymmword ptr [rsp+0xA0]
+ vshufps ymm5, ymm12, ymm13, 214
+ vpshufd ymm6, ymm12, 0x0F
+ vpshufd ymm12, ymm5, 0x39
+ vshufps ymm5, ymm14, ymm15, 250
+ vpblendd ymm6, ymm6, ymm5, 0xAA
+ vpunpcklqdq ymm5, ymm15, ymm13
+ vpblendd ymm5, ymm5, ymm14, 0x88
+ vpshufd ymm5, ymm5, 0x78
+ vpunpckhdq ymm13, ymm13, ymm15
+ vpunpckldq ymm14, ymm14, ymm13
+ vpshufd ymm15, ymm14, 0x1E
+ vmovdqa ymm13, ymm6
+ vmovdqa ymm14, ymm5
+ vmovdqa ymm5, ymmword ptr [rsp+0x40]
+ vmovdqa ymm6, ymmword ptr [rsp+0x80]
+ jmp 9b
+9:
+ vpxor ymm0, ymm0, ymm2
+ vpxor ymm1, ymm1, ymm3
+ vpxor ymm8, ymm8, ymm10
+ vpxor ymm9, ymm9, ymm11
+ mov eax, r13d
+ cmp rdx, r15
+ jne 2b
+ vmovdqu xmmword ptr [rbx], xmm0
+ vmovdqu xmmword ptr [rbx+0x10], xmm1
+ vextracti128 xmmword ptr [rbx+0x20], ymm0, 0x01
+ vextracti128 xmmword ptr [rbx+0x30], ymm1, 0x01
+ vmovdqu xmmword ptr [rbx+0x40], xmm8
+ vmovdqu xmmword ptr [rbx+0x50], xmm9
+ vextracti128 xmmword ptr [rbx+0x60], ymm8, 0x01
+ vextracti128 xmmword ptr [rbx+0x70], ymm9, 0x01
+ vmovaps xmm8, xmmword ptr [rsp+0x260]
+ vmovaps xmm0, xmmword ptr [rsp+0x220]
+ vmovaps xmm1, xmmword ptr [rsp+0x230]
+ vmovaps xmm2, xmmword ptr [rsp+0x240]
+ vmovaps xmm3, xmmword ptr [rsp+0x250]
+ vblendvps xmm0, xmm0, xmm1, xmm8
+ vblendvps xmm2, xmm2, xmm3, xmm8
+ vmovaps xmmword ptr [rsp+0x220], xmm0
+ vmovaps xmmword ptr [rsp+0x240], xmm2
+ add rbx, 128
+ add rdi, 32
+ sub rsi, 4
+3:
+ test rsi, 0x2
+ je 3f
+ vbroadcasti128 ymm0, xmmword ptr [rcx]
+ vbroadcasti128 ymm1, xmmword ptr [rcx+0x10]
+ vmovd xmm13, dword ptr [rsp+0x220]
+ vpinsrd xmm13, xmm13, dword ptr [rsp+0x240], 1
+ vpinsrd xmm13, xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
+ vmovd xmm14, dword ptr [rsp+0x224]
+ vpinsrd xmm14, xmm14, dword ptr [rsp+0x244], 1
+ vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
+ vinserti128 ymm13, ymm13, xmm14, 0x01
+ vbroadcasti128 ymm14, xmmword ptr [ROT16+rip]
+ vbroadcasti128 ymm15, xmmword ptr [ROT8+rip]
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ movzx eax, byte ptr [rbp+0x80]
+ or eax, r13d
+ xor edx, edx
+.p2align 5
+2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ mov dword ptr [rsp+0x200], eax
+ vbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV+rip]
+ vpbroadcastd ymm8, dword ptr [rsp+0x200]
+ vpblendd ymm3, ymm13, ymm8, 0x88
+ vmovups ymm8, ymmword ptr [r8+rdx-0x40]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-0x40], 0x01
+ vmovups ymm9, ymmword ptr [r8+rdx-0x30]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-0x30], 0x01
+ vshufps ymm4, ymm8, ymm9, 136
+ vshufps ymm5, ymm8, ymm9, 221
+ vmovups ymm8, ymmword ptr [r8+rdx-0x20]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-0x20], 0x01
+ vmovups ymm9, ymmword ptr [r8+rdx-0x10]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-0x10], 0x01
+ vshufps ymm6, ymm8, ymm9, 136
+ vshufps ymm7, ymm8, ymm9, 221
+ vpshufd ymm6, ymm6, 0x93
+ vpshufd ymm7, ymm7, 0x93
+ mov al, 7
+9:
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm0, ymm0, ymm1
+ vpxor ymm3, ymm3, ymm0
+ vpshufb ymm3, ymm3, ymm14
+ vpaddd ymm2, ymm2, ymm3
+ vpxor ymm1, ymm1, ymm2
+ vpsrld ymm8, ymm1, 12
+ vpslld ymm1, ymm1, 20
+ vpor ymm1, ymm1, ymm8
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm0, ymm0, ymm1
+ vpxor ymm3, ymm3, ymm0
+ vpshufb ymm3, ymm3, ymm15
+ vpaddd ymm2, ymm2, ymm3
+ vpxor ymm1, ymm1, ymm2
+ vpsrld ymm8, ymm1, 7
+ vpslld ymm1, ymm1, 25
+ vpor ymm1, ymm1, ymm8
+ vpshufd ymm0, ymm0, 0x93
+ vpshufd ymm3, ymm3, 0x4E
+ vpshufd ymm2, ymm2, 0x39
+ vpaddd ymm0, ymm0, ymm6
+ vpaddd ymm0, ymm0, ymm1
+ vpxor ymm3, ymm3, ymm0
+ vpshufb ymm3, ymm3, ymm14
+ vpaddd ymm2, ymm2, ymm3
+ vpxor ymm1, ymm1, ymm2
+ vpsrld ymm8, ymm1, 12
+ vpslld ymm1, ymm1, 20
+ vpor ymm1, ymm1, ymm8
+ vpaddd ymm0, ymm0, ymm7
+ vpaddd ymm0, ymm0, ymm1
+ vpxor ymm3, ymm3, ymm0
+ vpshufb ymm3, ymm3, ymm15
+ vpaddd ymm2, ymm2, ymm3
+ vpxor ymm1, ymm1, ymm2
+ vpsrld ymm8, ymm1, 7
+ vpslld ymm1, ymm1, 25
+ vpor ymm1, ymm1, ymm8
+ vpshufd ymm0, ymm0, 0x39
+ vpshufd ymm3, ymm3, 0x4E
+ vpshufd ymm2, ymm2, 0x93
+ dec al
+ jz 9f
+ vshufps ymm8, ymm4, ymm5, 214
+ vpshufd ymm9, ymm4, 0x0F
+ vpshufd ymm4, ymm8, 0x39
+ vshufps ymm8, ymm6, ymm7, 250
+ vpblendd ymm9, ymm9, ymm8, 0xAA
+ vpunpcklqdq ymm8, ymm7, ymm5
+ vpblendd ymm8, ymm8, ymm6, 0x88
+ vpshufd ymm8, ymm8, 0x78
+ vpunpckhdq ymm5, ymm5, ymm7
+ vpunpckldq ymm6, ymm6, ymm5
+ vpshufd ymm7, ymm6, 0x1E
+ vmovdqa ymm5, ymm9
+ vmovdqa ymm6, ymm8
+ jmp 9b
+9:
+ vpxor ymm0, ymm0, ymm2
+ vpxor ymm1, ymm1, ymm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne 2b
+ vmovdqu xmmword ptr [rbx], xmm0
+ vmovdqu xmmword ptr [rbx+0x10], xmm1
+ vextracti128 xmmword ptr [rbx+0x20], ymm0, 0x01
+ vextracti128 xmmword ptr [rbx+0x30], ymm1, 0x01
+ vmovaps ymm8, ymmword ptr [rsp+0x260]
+ vmovaps ymm0, ymmword ptr [rsp+0x220]
+ vmovups ymm1, ymmword ptr [rsp+0x228]
+ vmovaps ymm2, ymmword ptr [rsp+0x240]
+ vmovups ymm3, ymmword ptr [rsp+0x248]
+ vblendvps ymm0, ymm0, ymm1, ymm8
+ vblendvps ymm2, ymm2, ymm3, ymm8
+ vmovaps ymmword ptr [rsp+0x220], ymm0
+ vmovaps ymmword ptr [rsp+0x240], ymm2
+ add rbx, 64
+ add rdi, 16
+ sub rsi, 2
+3:
+ test rsi, 0x1
+ je 4b
+ vmovdqu xmm0, xmmword ptr [rcx]
+ vmovdqu xmm1, xmmword ptr [rcx+0x10]
+ vmovd xmm3, dword ptr [rsp+0x220]
+ vpinsrd xmm3, xmm3, dword ptr [rsp+0x240], 1
+ vpinsrd xmm13, xmm3, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
+ vmovdqa xmm14, xmmword ptr [ROT16+rip]
+ vmovdqa xmm15, xmmword ptr [ROT8+rip]
+ mov r8, qword ptr [rdi]
+ movzx eax, byte ptr [rbp+0x80]
+ or eax, r13d
+ xor edx, edx
+.p2align 5
+2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ vmovdqa xmm2, xmmword ptr [BLAKE3_IV+rip]
+ vmovdqa xmm3, xmm13
+ vpinsrd xmm3, xmm3, eax, 3
+ vmovups xmm8, xmmword ptr [r8+rdx-0x40]
+ vmovups xmm9, xmmword ptr [r8+rdx-0x30]
+ vshufps xmm4, xmm8, xmm9, 136
+ vshufps xmm5, xmm8, xmm9, 221
+ vmovups xmm8, xmmword ptr [r8+rdx-0x20]
+ vmovups xmm9, xmmword ptr [r8+rdx-0x10]
+ vshufps xmm6, xmm8, xmm9, 136
+ vshufps xmm7, xmm8, xmm9, 221
+ vpshufd xmm6, xmm6, 0x93
+ vpshufd xmm7, xmm7, 0x93
+ mov al, 7
+9:
+ vpaddd xmm0, xmm0, xmm4
+ vpaddd xmm0, xmm0, xmm1
+ vpxor xmm3, xmm3, xmm0
+ vpshufb xmm3, xmm3, xmm14
+ vpaddd xmm2, xmm2, xmm3
+ vpxor xmm1, xmm1, xmm2
+ vpsrld xmm8, xmm1, 12
+ vpslld xmm1, xmm1, 20
+ vpor xmm1, xmm1, xmm8
+ vpaddd xmm0, xmm0, xmm5
+ vpaddd xmm0, xmm0, xmm1
+ vpxor xmm3, xmm3, xmm0
+ vpshufb xmm3, xmm3, xmm15
+ vpaddd xmm2, xmm2, xmm3
+ vpxor xmm1, xmm1, xmm2
+ vpsrld xmm8, xmm1, 7
+ vpslld xmm1, xmm1, 25
+ vpor xmm1, xmm1, xmm8
+ vpshufd xmm0, xmm0, 0x93
+ vpshufd xmm3, xmm3, 0x4E
+ vpshufd xmm2, xmm2, 0x39
+ vpaddd xmm0, xmm0, xmm6
+ vpaddd xmm0, xmm0, xmm1
+ vpxor xmm3, xmm3, xmm0
+ vpshufb xmm3, xmm3, xmm14
+ vpaddd xmm2, xmm2, xmm3
+ vpxor xmm1, xmm1, xmm2
+ vpsrld xmm8, xmm1, 12
+ vpslld xmm1, xmm1, 20
+ vpor xmm1, xmm1, xmm8
+ vpaddd xmm0, xmm0, xmm7
+ vpaddd xmm0, xmm0, xmm1
+ vpxor xmm3, xmm3, xmm0
+ vpshufb xmm3, xmm3, xmm15
+ vpaddd xmm2, xmm2, xmm3
+ vpxor xmm1, xmm1, xmm2
+ vpsrld xmm8, xmm1, 7
+ vpslld xmm1, xmm1, 25
+ vpor xmm1, xmm1, xmm8
+ vpshufd xmm0, xmm0, 0x39
+ vpshufd xmm3, xmm3, 0x4E
+ vpshufd xmm2, xmm2, 0x93
+ dec al
+ jz 9f
+ vshufps xmm8, xmm4, xmm5, 214
+ vpshufd xmm9, xmm4, 0x0F
+ vpshufd xmm4, xmm8, 0x39
+ vshufps xmm8, xmm6, xmm7, 250
+ vpblendd xmm9, xmm9, xmm8, 0xAA
+ vpunpcklqdq xmm8, xmm7, xmm5
+ vpblendd xmm8, xmm8, xmm6, 0x88
+ vpshufd xmm8, xmm8, 0x78
+ vpunpckhdq xmm5, xmm5, xmm7
+ vpunpckldq xmm6, xmm6, xmm5
+ vpshufd xmm7, xmm6, 0x1E
+ vmovdqa xmm5, xmm9
+ vmovdqa xmm6, xmm8
+ jmp 9b
+9:
+ vpxor xmm0, xmm0, xmm2
+ vpxor xmm1, xmm1, xmm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne 2b
+ vmovdqu xmmword ptr [rbx], xmm0
+ vmovdqu xmmword ptr [rbx+0x10], xmm1
+ jmp 4b
+
+.section .rodata
+.p2align 6
+ADD0:
+ .long 0, 1, 2, 3, 4, 5, 6, 7
+ADD1:
+ .long 8, 8, 8, 8, 8, 8, 8, 8
+BLAKE3_IV_0:
+ .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667
+ .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667
+BLAKE3_IV_1:
+ .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85
+ .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85
+BLAKE3_IV_2:
+ .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372
+ .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372
+BLAKE3_IV_3:
+ .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A
+ .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A
+BLAKE3_BLOCK_LEN:
+ .long 0x00000040, 0x00000040, 0x00000040, 0x00000040
+ .long 0x00000040, 0x00000040, 0x00000040, 0x00000040
+ROT16:
+ .byte 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13
+ROT8:
+ .byte 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12
+CMP_MSB_MASK:
+ .long 0x80000000, 0x80000000, 0x80000000, 0x80000000
+ .long 0x80000000, 0x80000000, 0x80000000, 0x80000000
+BLAKE3_IV:
+ .long 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A
+
diff --git a/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_msvc.asm b/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_msvc.asm
new file mode 100644
index 000000000000..352298edd2e8
--- /dev/null
+++ b/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_msvc.asm
@@ -0,0 +1,1828 @@
+public _blake3_hash_many_avx2
+public blake3_hash_many_avx2
+
+_TEXT SEGMENT ALIGN(16) 'CODE'
+
+ALIGN 16
+blake3_hash_many_avx2 PROC
+_blake3_hash_many_avx2 PROC
+ push r15
+ push r14
+ push r13
+ push r12
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ mov rbp, rsp
+ sub rsp, 880
+ and rsp, 0FFFFFFFFFFFFFFC0H
+ vmovdqa xmmword ptr [rsp+2D0H], xmm6
+ vmovdqa xmmword ptr [rsp+2E0H], xmm7
+ vmovdqa xmmword ptr [rsp+2F0H], xmm8
+ vmovdqa xmmword ptr [rsp+300H], xmm9
+ vmovdqa xmmword ptr [rsp+310H], xmm10
+ vmovdqa xmmword ptr [rsp+320H], xmm11
+ vmovdqa xmmword ptr [rsp+330H], xmm12
+ vmovdqa xmmword ptr [rsp+340H], xmm13
+ vmovdqa xmmword ptr [rsp+350H], xmm14
+ vmovdqa xmmword ptr [rsp+360H], xmm15
+ mov rdi, rcx
+ mov rsi, rdx
+ mov rdx, r8
+ mov rcx, r9
+ mov r8, qword ptr [rbp+68H]
+ movzx r9, byte ptr [rbp+70H]
+ neg r9d
+ vmovd xmm0, r9d
+ vpbroadcastd ymm0, xmm0
+ vmovdqa ymmword ptr [rsp+260H], ymm0
+ vpand ymm1, ymm0, ymmword ptr [ADD0]
+ vpand ymm2, ymm0, ymmword ptr [ADD1]
+ vmovdqa ymmword ptr [rsp+2A0H], ymm2
+ vmovd xmm2, r8d
+ vpbroadcastd ymm2, xmm2
+ vpaddd ymm2, ymm2, ymm1
+ vmovdqa ymmword ptr [rsp+220H], ymm2
+ vpxor ymm1, ymm1, ymmword ptr [CMP_MSB_MASK]
+ vpxor ymm2, ymm2, ymmword ptr [CMP_MSB_MASK]
+ vpcmpgtd ymm2, ymm1, ymm2
+ shr r8, 32
+ vmovd xmm3, r8d
+ vpbroadcastd ymm3, xmm3
+ vpsubd ymm3, ymm3, ymm2
+ vmovdqa ymmword ptr [rsp+240H], ymm3
+ shl rdx, 6
+ mov qword ptr [rsp+2C0H], rdx
+ cmp rsi, 8
+ jc final7blocks
+outerloop8:
+ vpbroadcastd ymm0, dword ptr [rcx]
+ vpbroadcastd ymm1, dword ptr [rcx+4H]
+ vpbroadcastd ymm2, dword ptr [rcx+8H]
+ vpbroadcastd ymm3, dword ptr [rcx+0CH]
+ vpbroadcastd ymm4, dword ptr [rcx+10H]
+ vpbroadcastd ymm5, dword ptr [rcx+14H]
+ vpbroadcastd ymm6, dword ptr [rcx+18H]
+ vpbroadcastd ymm7, dword ptr [rcx+1CH]
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+8H]
+ mov r10, qword ptr [rdi+10H]
+ mov r11, qword ptr [rdi+18H]
+ mov r12, qword ptr [rdi+20H]
+ mov r13, qword ptr [rdi+28H]
+ mov r14, qword ptr [rdi+30H]
+ mov r15, qword ptr [rdi+38H]
+ movzx eax, byte ptr [rbp+78H]
+ movzx ebx, byte ptr [rbp+80H]
+ or eax, ebx
+ xor edx, edx
+ALIGN 16
+innerloop8:
+ movzx ebx, byte ptr [rbp+88H]
+ or ebx, eax
+ add rdx, 64
+ cmp rdx, qword ptr [rsp+2C0H]
+ cmove eax, ebx
+ mov dword ptr [rsp+200H], eax
+ vmovups xmm8, xmmword ptr [r8+rdx-40H]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-40H], 01H
+ vmovups xmm9, xmmword ptr [r9+rdx-40H]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-40H], 01H
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-40H]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-40H], 01H
+ vmovups xmm11, xmmword ptr [r11+rdx-40H]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-40H], 01H
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm8, ymm12, ymm14, 136
+ vmovaps ymmword ptr [rsp], ymm8
+ vshufps ymm9, ymm12, ymm14, 221
+ vmovaps ymmword ptr [rsp+20H], ymm9
+ vshufps ymm10, ymm13, ymm15, 136
+ vmovaps ymmword ptr [rsp+40H], ymm10
+ vshufps ymm11, ymm13, ymm15, 221
+ vmovaps ymmword ptr [rsp+60H], ymm11
+ vmovups xmm8, xmmword ptr [r8+rdx-30H]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-30H], 01H
+ vmovups xmm9, xmmword ptr [r9+rdx-30H]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-30H], 01H
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-30H]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-30H], 01H
+ vmovups xmm11, xmmword ptr [r11+rdx-30H]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-30H], 01H
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm8, ymm12, ymm14, 136
+ vmovaps ymmword ptr [rsp+80H], ymm8
+ vshufps ymm9, ymm12, ymm14, 221
+ vmovaps ymmword ptr [rsp+0A0H], ymm9
+ vshufps ymm10, ymm13, ymm15, 136
+ vmovaps ymmword ptr [rsp+0C0H], ymm10
+ vshufps ymm11, ymm13, ymm15, 221
+ vmovaps ymmword ptr [rsp+0E0H], ymm11
+ vmovups xmm8, xmmword ptr [r8+rdx-20H]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-20H], 01H
+ vmovups xmm9, xmmword ptr [r9+rdx-20H]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-20H], 01H
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-20H]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-20H], 01H
+ vmovups xmm11, xmmword ptr [r11+rdx-20H]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-20H], 01H
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm8, ymm12, ymm14, 136
+ vmovaps ymmword ptr [rsp+100H], ymm8
+ vshufps ymm9, ymm12, ymm14, 221
+ vmovaps ymmword ptr [rsp+120H], ymm9
+ vshufps ymm10, ymm13, ymm15, 136
+ vmovaps ymmword ptr [rsp+140H], ymm10
+ vshufps ymm11, ymm13, ymm15, 221
+ vmovaps ymmword ptr [rsp+160H], ymm11
+ vmovups xmm8, xmmword ptr [r8+rdx-10H]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-10H], 01H
+ vmovups xmm9, xmmword ptr [r9+rdx-10H]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-10H], 01H
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-10H]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-10H], 01H
+ vmovups xmm11, xmmword ptr [r11+rdx-10H]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-10H], 01H
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm8, ymm12, ymm14, 136
+ vmovaps ymmword ptr [rsp+180H], ymm8
+ vshufps ymm9, ymm12, ymm14, 221
+ vmovaps ymmword ptr [rsp+1A0H], ymm9
+ vshufps ymm10, ymm13, ymm15, 136
+ vmovaps ymmword ptr [rsp+1C0H], ymm10
+ vshufps ymm11, ymm13, ymm15, 221
+ vmovaps ymmword ptr [rsp+1E0H], ymm11
+ vpbroadcastd ymm15, dword ptr [rsp+200H]
+ prefetcht0 byte ptr [r8+rdx+80H]
+ prefetcht0 byte ptr [r12+rdx+80H]
+ prefetcht0 byte ptr [r9+rdx+80H]
+ prefetcht0 byte ptr [r13+rdx+80H]
+ prefetcht0 byte ptr [r10+rdx+80H]
+ prefetcht0 byte ptr [r14+rdx+80H]
+ prefetcht0 byte ptr [r11+rdx+80H]
+ prefetcht0 byte ptr [r15+rdx+80H]
+ vpaddd ymm0, ymm0, ymmword ptr [rsp]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+40H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+80H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0C0H]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm0, ymmword ptr [rsp+220H]
+ vpxor ymm13, ymm1, ymmword ptr [rsp+240H]
+ vpxor ymm14, ymm2, ymmword ptr [BLAKE3_BLOCK_LEN]
+ vpxor ymm15, ymm3, ymm15
+ vbroadcasti128 ymm8, xmmword ptr [ROT16]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [BLAKE3_IV_0]
+ vpaddd ymm9, ymm13, ymmword ptr [BLAKE3_IV_1]
+ vpaddd ymm10, ymm14, ymmword ptr [BLAKE3_IV_2]
+ vpaddd ymm11, ymm15, ymmword ptr [BLAKE3_IV_3]
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+20H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+60H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0A0H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0E0H]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+100H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+140H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+180H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+1C0H]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+120H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+160H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+1A0H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+1E0H]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+40H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+60H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0E0H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+80H]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0C0H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+140H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+1A0H]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+20H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+180H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+120H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+1E0H]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+160H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0A0H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+1C0H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+100H]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+60H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+140H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+1A0H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0E0H]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+80H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+180H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+40H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+1C0H]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0C0H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+120H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+160H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+100H]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0A0H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+1E0H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+20H]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+140H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+180H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+1C0H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+1A0H]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0E0H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+120H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+60H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+1E0H]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+80H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+160H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0A0H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+20H]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+40H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+100H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0C0H]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+180H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+120H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+1E0H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+1C0H]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+1A0H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+160H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+140H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+100H]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+0E0H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0A0H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0C0H]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+40H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+60H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+20H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+80H]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+120H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+160H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+100H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+1E0H]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+1C0H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0A0H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+180H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+20H]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+1A0H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+40H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+80H]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+60H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+140H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+0C0H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0E0H]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+160H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+0A0H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+20H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+100H]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+1E0H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+120H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0C0H]
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxor ymm12, ymm12, ymm0
+ vpxor ymm13, ymm13, ymm1
+ vpxor ymm14, ymm14, ymm2
+ vpxor ymm15, ymm15, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8]
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpshufb ymm15, ymm15, ymm8
+ vpaddd ymm8, ymm12, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxor ymm4, ymm4, ymm8
+ vpxor ymm5, ymm5, ymm9
+ vpxor ymm6, ymm6, ymm10
+ vpxor ymm7, ymm7, ymm11
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+1C0H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+40H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+60H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+0E0H]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT16]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vmovdqa ymmword ptr [rsp+200H], ymm8
+ vpsrld ymm8, ymm5, 12
+ vpslld ymm5, ymm5, 20
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 12
+ vpslld ymm6, ymm6, 20
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 12
+ vpslld ymm7, ymm7, 20
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 12
+ vpslld ymm4, ymm4, 20
+ vpor ymm4, ymm4, ymm8
+ vpaddd ymm0, ymm0, ymmword ptr [rsp+140H]
+ vpaddd ymm1, ymm1, ymmword ptr [rsp+180H]
+ vpaddd ymm2, ymm2, ymmword ptr [rsp+80H]
+ vpaddd ymm3, ymm3, ymmword ptr [rsp+1A0H]
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxor ymm15, ymm15, ymm0
+ vpxor ymm12, ymm12, ymm1
+ vpxor ymm13, ymm13, ymm2
+ vpxor ymm14, ymm14, ymm3
+ vbroadcasti128 ymm8, xmmword ptr [ROT8]
+ vpshufb ymm15, ymm15, ymm8
+ vpshufb ymm12, ymm12, ymm8
+ vpshufb ymm13, ymm13, ymm8
+ vpshufb ymm14, ymm14, ymm8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm13, ymmword ptr [rsp+200H]
+ vpaddd ymm9, ymm9, ymm14
+ vpxor ymm5, ymm5, ymm10
+ vpxor ymm6, ymm6, ymm11
+ vpxor ymm7, ymm7, ymm8
+ vpxor ymm4, ymm4, ymm9
+ vpxor ymm0, ymm0, ymm8
+ vpxor ymm1, ymm1, ymm9
+ vpxor ymm2, ymm2, ymm10
+ vpxor ymm3, ymm3, ymm11
+ vpsrld ymm8, ymm5, 7
+ vpslld ymm5, ymm5, 25
+ vpor ymm5, ymm5, ymm8
+ vpsrld ymm8, ymm6, 7
+ vpslld ymm6, ymm6, 25
+ vpor ymm6, ymm6, ymm8
+ vpsrld ymm8, ymm7, 7
+ vpslld ymm7, ymm7, 25
+ vpor ymm7, ymm7, ymm8
+ vpsrld ymm8, ymm4, 7
+ vpslld ymm4, ymm4, 25
+ vpor ymm4, ymm4, ymm8
+ vpxor ymm4, ymm4, ymm12
+ vpxor ymm5, ymm5, ymm13
+ vpxor ymm6, ymm6, ymm14
+ vpxor ymm7, ymm7, ymm15
+ movzx eax, byte ptr [rbp+78H]
+ jne innerloop8
+ mov rbx, qword ptr [rbp+90H]
+ vunpcklps ymm8, ymm0, ymm1
+ vunpcklps ymm9, ymm2, ymm3
+ vunpckhps ymm10, ymm0, ymm1
+ vunpcklps ymm11, ymm4, ymm5
+ vunpcklps ymm0, ymm6, ymm7
+ vshufps ymm12, ymm8, ymm9, 78
+ vblendps ymm1, ymm8, ymm12, 0CCH
+ vshufps ymm8, ymm11, ymm0, 78
+ vunpckhps ymm13, ymm2, ymm3
+ vblendps ymm2, ymm11, ymm8, 0CCH
+ vblendps ymm3, ymm12, ymm9, 0CCH
+ vperm2f128 ymm12, ymm1, ymm2, 20H
+ vmovups ymmword ptr [rbx], ymm12
+ vunpckhps ymm14, ymm4, ymm5
+ vblendps ymm4, ymm8, ymm0, 0CCH
+ vunpckhps ymm15, ymm6, ymm7
+ vperm2f128 ymm7, ymm3, ymm4, 20H
+ vmovups ymmword ptr [rbx+20H], ymm7
+ vshufps ymm5, ymm10, ymm13, 78
+ vblendps ymm6, ymm5, ymm13, 0CCH
+ vshufps ymm13, ymm14, ymm15, 78
+ vblendps ymm10, ymm10, ymm5, 0CCH
+ vblendps ymm14, ymm14, ymm13, 0CCH
+ vperm2f128 ymm8, ymm10, ymm14, 20H
+ vmovups ymmword ptr [rbx+40H], ymm8
+ vblendps ymm15, ymm13, ymm15, 0CCH
+ vperm2f128 ymm13, ymm6, ymm15, 20H
+ vmovups ymmword ptr [rbx+60H], ymm13
+ vperm2f128 ymm9, ymm1, ymm2, 31H
+ vperm2f128 ymm11, ymm3, ymm4, 31H
+ vmovups ymmword ptr [rbx+80H], ymm9
+ vperm2f128 ymm14, ymm10, ymm14, 31H
+ vperm2f128 ymm15, ymm6, ymm15, 31H
+ vmovups ymmword ptr [rbx+0A0H], ymm11
+ vmovups ymmword ptr [rbx+0C0H], ymm14
+ vmovups ymmword ptr [rbx+0E0H], ymm15
+ vmovdqa ymm0, ymmword ptr [rsp+2A0H]
+ vpaddd ymm1, ymm0, ymmword ptr [rsp+220H]
+ vmovdqa ymmword ptr [rsp+220H], ymm1
+ vpxor ymm0, ymm0, ymmword ptr [CMP_MSB_MASK]
+ vpxor ymm2, ymm1, ymmword ptr [CMP_MSB_MASK]
+ vpcmpgtd ymm2, ymm0, ymm2
+ vmovdqa ymm0, ymmword ptr [rsp+240H]
+ vpsubd ymm2, ymm0, ymm2
+ vmovdqa ymmword ptr [rsp+240H], ymm2
+ add rdi, 64
+ add rbx, 256
+ mov qword ptr [rbp+90H], rbx
+ sub rsi, 8
+ cmp rsi, 8
+ jnc outerloop8
+ test rsi, rsi
+ jnz final7blocks
+unwind:
+ vzeroupper
+ vmovdqa xmm6, xmmword ptr [rsp+2D0H]
+ vmovdqa xmm7, xmmword ptr [rsp+2E0H]
+ vmovdqa xmm8, xmmword ptr [rsp+2F0H]
+ vmovdqa xmm9, xmmword ptr [rsp+300H]
+ vmovdqa xmm10, xmmword ptr [rsp+310H]
+ vmovdqa xmm11, xmmword ptr [rsp+320H]
+ vmovdqa xmm12, xmmword ptr [rsp+330H]
+ vmovdqa xmm13, xmmword ptr [rsp+340H]
+ vmovdqa xmm14, xmmword ptr [rsp+350H]
+ vmovdqa xmm15, xmmword ptr [rsp+360H]
+ mov rsp, rbp
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ pop r12
+ pop r13
+ pop r14
+ pop r15
+ ret
+ALIGN 16
+final7blocks:
+ mov rbx, qword ptr [rbp+90H]
+ mov r15, qword ptr [rsp+2C0H]
+ movzx r13d, byte ptr [rbp+78H]
+ movzx r12d, byte ptr [rbp+88H]
+ test rsi, 4H
+ je final3blocks
+ vbroadcasti128 ymm0, xmmword ptr [rcx]
+ vbroadcasti128 ymm1, xmmword ptr [rcx+10H]
+ vmovdqa ymm8, ymm0
+ vmovdqa ymm9, ymm1
+ vbroadcasti128 ymm12, xmmword ptr [rsp+220H]
+ vbroadcasti128 ymm13, xmmword ptr [rsp+240H]
+ vpunpckldq ymm14, ymm12, ymm13
+ vpunpckhdq ymm15, ymm12, ymm13
+ vpermq ymm14, ymm14, 50H
+ vpermq ymm15, ymm15, 50H
+ vbroadcasti128 ymm12, xmmword ptr [BLAKE3_BLOCK_LEN]
+ vpblendd ymm14, ymm14, ymm12, 44H
+ vpblendd ymm15, ymm15, ymm12, 44H
+ vmovdqa ymmword ptr [rsp], ymm14
+ vmovdqa ymmword ptr [rsp+20H], ymm15
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+8H]
+ mov r10, qword ptr [rdi+10H]
+ mov r11, qword ptr [rdi+18H]
+ movzx eax, byte ptr [rbp+80H]
+ or eax, r13d
+ xor edx, edx
+ALIGN 16
+innerloop4:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ mov dword ptr [rsp+200H], eax
+ vmovups ymm2, ymmword ptr [r8+rdx-40H]
+ vinsertf128 ymm2, ymm2, xmmword ptr [r9+rdx-40H], 01H
+ vmovups ymm3, ymmword ptr [r8+rdx-30H]
+ vinsertf128 ymm3, ymm3, xmmword ptr [r9+rdx-30H], 01H
+ vshufps ymm4, ymm2, ymm3, 136
+ vshufps ymm5, ymm2, ymm3, 221
+ vmovups ymm2, ymmword ptr [r8+rdx-20H]
+ vinsertf128 ymm2, ymm2, xmmword ptr [r9+rdx-20H], 01H
+ vmovups ymm3, ymmword ptr [r8+rdx-10H]
+ vinsertf128 ymm3, ymm3, xmmword ptr [r9+rdx-10H], 01H
+ vshufps ymm6, ymm2, ymm3, 136
+ vshufps ymm7, ymm2, ymm3, 221
+ vpshufd ymm6, ymm6, 93H
+ vpshufd ymm7, ymm7, 93H
+ vmovups ymm10, ymmword ptr [r10+rdx-40H]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r11+rdx-40H], 01H
+ vmovups ymm11, ymmword ptr [r10+rdx-30H]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r11+rdx-30H], 01H
+ vshufps ymm12, ymm10, ymm11, 136
+ vshufps ymm13, ymm10, ymm11, 221
+ vmovups ymm10, ymmword ptr [r10+rdx-20H]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r11+rdx-20H], 01H
+ vmovups ymm11, ymmword ptr [r10+rdx-10H]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r11+rdx-10H], 01H
+ vshufps ymm14, ymm10, ymm11, 136
+ vshufps ymm15, ymm10, ymm11, 221
+ vpshufd ymm14, ymm14, 93H
+ vpshufd ymm15, ymm15, 93H
+ vpbroadcastd ymm2, dword ptr [rsp+200H]
+ vmovdqa ymm3, ymmword ptr [rsp]
+ vmovdqa ymm11, ymmword ptr [rsp+20H]
+ vpblendd ymm3, ymm3, ymm2, 88H
+ vpblendd ymm11, ymm11, ymm2, 88H
+ vbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV]
+ vmovdqa ymm10, ymm2
+ mov al, 7
+roundloop4:
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm8, ymm8, ymm12
+ vmovdqa ymmword ptr [rsp+40H], ymm4
+ nop
+ vmovdqa ymmword ptr [rsp+60H], ymm12
+ nop
+ vpaddd ymm0, ymm0, ymm1
+ vpaddd ymm8, ymm8, ymm9
+ vpxor ymm3, ymm3, ymm0
+ vpxor ymm11, ymm11, ymm8
+ vbroadcasti128 ymm4, xmmword ptr [ROT16]
+ vpshufb ymm3, ymm3, ymm4
+ vpshufb ymm11, ymm11, ymm4
+ vpaddd ymm2, ymm2, ymm3
+ vpaddd ymm10, ymm10, ymm11
+ vpxor ymm1, ymm1, ymm2
+ vpxor ymm9, ymm9, ymm10
+ vpsrld ymm4, ymm1, 12
+ vpslld ymm1, ymm1, 20
+ vpor ymm1, ymm1, ymm4
+ vpsrld ymm4, ymm9, 12
+ vpslld ymm9, ymm9, 20
+ vpor ymm9, ymm9, ymm4
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm0, ymm0, ymm1
+ vpaddd ymm8, ymm8, ymm9
+ vmovdqa ymmword ptr [rsp+80H], ymm5
+ vmovdqa ymmword ptr [rsp+0A0H], ymm13
+ vpxor ymm3, ymm3, ymm0
+ vpxor ymm11, ymm11, ymm8
+ vbroadcasti128 ymm4, xmmword ptr [ROT8]
+ vpshufb ymm3, ymm3, ymm4
+ vpshufb ymm11, ymm11, ymm4
+ vpaddd ymm2, ymm2, ymm3
+ vpaddd ymm10, ymm10, ymm11
+ vpxor ymm1, ymm1, ymm2
+ vpxor ymm9, ymm9, ymm10
+ vpsrld ymm4, ymm1, 7
+ vpslld ymm1, ymm1, 25
+ vpor ymm1, ymm1, ymm4
+ vpsrld ymm4, ymm9, 7
+ vpslld ymm9, ymm9, 25
+ vpor ymm9, ymm9, ymm4
+ vpshufd ymm0, ymm0, 93H
+ vpshufd ymm8, ymm8, 93H
+ vpshufd ymm3, ymm3, 4EH
+ vpshufd ymm11, ymm11, 4EH
+ vpshufd ymm2, ymm2, 39H
+ vpshufd ymm10, ymm10, 39H
+ vpaddd ymm0, ymm0, ymm6
+ vpaddd ymm8, ymm8, ymm14
+ vpaddd ymm0, ymm0, ymm1
+ vpaddd ymm8, ymm8, ymm9
+ vpxor ymm3, ymm3, ymm0
+ vpxor ymm11, ymm11, ymm8
+ vbroadcasti128 ymm4, xmmword ptr [ROT16]
+ vpshufb ymm3, ymm3, ymm4
+ vpshufb ymm11, ymm11, ymm4
+ vpaddd ymm2, ymm2, ymm3
+ vpaddd ymm10, ymm10, ymm11
+ vpxor ymm1, ymm1, ymm2
+ vpxor ymm9, ymm9, ymm10
+ vpsrld ymm4, ymm1, 12
+ vpslld ymm1, ymm1, 20
+ vpor ymm1, ymm1, ymm4
+ vpsrld ymm4, ymm9, 12
+ vpslld ymm9, ymm9, 20
+ vpor ymm9, ymm9, ymm4
+ vpaddd ymm0, ymm0, ymm7
+ vpaddd ymm8, ymm8, ymm15
+ vpaddd ymm0, ymm0, ymm1
+ vpaddd ymm8, ymm8, ymm9
+ vpxor ymm3, ymm3, ymm0
+ vpxor ymm11, ymm11, ymm8
+ vbroadcasti128 ymm4, xmmword ptr [ROT8]
+ vpshufb ymm3, ymm3, ymm4
+ vpshufb ymm11, ymm11, ymm4
+ vpaddd ymm2, ymm2, ymm3
+ vpaddd ymm10, ymm10, ymm11
+ vpxor ymm1, ymm1, ymm2
+ vpxor ymm9, ymm9, ymm10
+ vpsrld ymm4, ymm1, 7
+ vpslld ymm1, ymm1, 25
+ vpor ymm1, ymm1, ymm4
+ vpsrld ymm4, ymm9, 7
+ vpslld ymm9, ymm9, 25
+ vpor ymm9, ymm9, ymm4
+ vpshufd ymm0, ymm0, 39H
+ vpshufd ymm8, ymm8, 39H
+ vpshufd ymm3, ymm3, 4EH
+ vpshufd ymm11, ymm11, 4EH
+ vpshufd ymm2, ymm2, 93H
+ vpshufd ymm10, ymm10, 93H
+ dec al
+ je endroundloop4
+ vmovdqa ymm4, ymmword ptr [rsp+40H]
+ vmovdqa ymm5, ymmword ptr [rsp+80H]
+ vshufps ymm12, ymm4, ymm5, 214
+ vpshufd ymm13, ymm4, 0FH
+ vpshufd ymm4, ymm12, 39H
+ vshufps ymm12, ymm6, ymm7, 250
+ vpblendd ymm13, ymm13, ymm12, 0AAH
+ vpunpcklqdq ymm12, ymm7, ymm5
+ vpblendd ymm12, ymm12, ymm6, 88H
+ vpshufd ymm12, ymm12, 78H
+ vpunpckhdq ymm5, ymm5, ymm7
+ vpunpckldq ymm6, ymm6, ymm5
+ vpshufd ymm7, ymm6, 1EH
+ vmovdqa ymmword ptr [rsp+40H], ymm13
+ vmovdqa ymmword ptr [rsp+80H], ymm12
+ vmovdqa ymm12, ymmword ptr [rsp+60H]
+ vmovdqa ymm13, ymmword ptr [rsp+0A0H]
+ vshufps ymm5, ymm12, ymm13, 214
+ vpshufd ymm6, ymm12, 0FH
+ vpshufd ymm12, ymm5, 39H
+ vshufps ymm5, ymm14, ymm15, 250
+ vpblendd ymm6, ymm6, ymm5, 0AAH
+ vpunpcklqdq ymm5, ymm15, ymm13
+ vpblendd ymm5, ymm5, ymm14, 88H
+ vpshufd ymm5, ymm5, 78H
+ vpunpckhdq ymm13, ymm13, ymm15
+ vpunpckldq ymm14, ymm14, ymm13
+ vpshufd ymm15, ymm14, 1EH
+ vmovdqa ymm13, ymm6
+ vmovdqa ymm14, ymm5
+ vmovdqa ymm5, ymmword ptr [rsp+40H]
+ vmovdqa ymm6, ymmword ptr [rsp+80H]
+ jmp roundloop4
+endroundloop4:
+ vpxor ymm0, ymm0, ymm2
+ vpxor ymm1, ymm1, ymm3
+ vpxor ymm8, ymm8, ymm10
+ vpxor ymm9, ymm9, ymm11
+ mov eax, r13d
+ cmp rdx, r15
+ jne innerloop4
+ vmovdqu xmmword ptr [rbx], xmm0
+ vmovdqu xmmword ptr [rbx+10H], xmm1
+ vextracti128 xmmword ptr [rbx+20H], ymm0, 01H
+ vextracti128 xmmword ptr [rbx+30H], ymm1, 01H
+ vmovdqu xmmword ptr [rbx+40H], xmm8
+ vmovdqu xmmword ptr [rbx+50H], xmm9
+ vextracti128 xmmword ptr [rbx+60H], ymm8, 01H
+ vextracti128 xmmword ptr [rbx+70H], ymm9, 01H
+ vmovaps xmm8, xmmword ptr [rsp+260H]
+ vmovaps xmm0, xmmword ptr [rsp+220H]
+ vmovaps xmm1, xmmword ptr [rsp+230H]
+ vmovaps xmm2, xmmword ptr [rsp+240H]
+ vmovaps xmm3, xmmword ptr [rsp+250H]
+ vblendvps xmm0, xmm0, xmm1, xmm8
+ vblendvps xmm2, xmm2, xmm3, xmm8
+ vmovaps xmmword ptr [rsp+220H], xmm0
+ vmovaps xmmword ptr [rsp+240H], xmm2
+ add rbx, 128
+ add rdi, 32
+ sub rsi, 4
+final3blocks:
+ test rsi, 2H
+ je final1blocks
+ vbroadcasti128 ymm0, xmmword ptr [rcx]
+ vbroadcasti128 ymm1, xmmword ptr [rcx+10H]
+ vmovd xmm13, dword ptr [rsp+220H]
+ vpinsrd xmm13, xmm13, dword ptr [rsp+240H], 1
+ vpinsrd xmm13, xmm13, dword ptr [BLAKE3_BLOCK_LEN], 2
+ vmovd xmm14, dword ptr [rsp+224H]
+ vpinsrd xmm14, xmm14, dword ptr [rsp+244H], 1
+ vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN], 2
+ vinserti128 ymm13, ymm13, xmm14, 01H
+ vbroadcasti128 ymm14, xmmword ptr [ROT16]
+ vbroadcasti128 ymm15, xmmword ptr [ROT8]
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+8H]
+ movzx eax, byte ptr [rbp+80H]
+ or eax, r13d
+ xor edx, edx
+ALIGN 16
+innerloop2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ mov dword ptr [rsp+200H], eax
+ vbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV]
+ vpbroadcastd ymm8, dword ptr [rsp+200H]
+ vpblendd ymm3, ymm13, ymm8, 88H
+ vmovups ymm8, ymmword ptr [r8+rdx-40H]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-40H], 01H
+ vmovups ymm9, ymmword ptr [r8+rdx-30H]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-30H], 01H
+ vshufps ymm4, ymm8, ymm9, 136
+ vshufps ymm5, ymm8, ymm9, 221
+ vmovups ymm8, ymmword ptr [r8+rdx-20H]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-20H], 01H
+ vmovups ymm9, ymmword ptr [r8+rdx-10H]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-10H], 01H
+ vshufps ymm6, ymm8, ymm9, 136
+ vshufps ymm7, ymm8, ymm9, 221
+ vpshufd ymm6, ymm6, 93H
+ vpshufd ymm7, ymm7, 93H
+ mov al, 7
+roundloop2:
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm0, ymm0, ymm1
+ vpxor ymm3, ymm3, ymm0
+ vpshufb ymm3, ymm3, ymm14
+ vpaddd ymm2, ymm2, ymm3
+ vpxor ymm1, ymm1, ymm2
+ vpsrld ymm8, ymm1, 12
+ vpslld ymm1, ymm1, 20
+ vpor ymm1, ymm1, ymm8
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm0, ymm0, ymm1
+ vpxor ymm3, ymm3, ymm0
+ vpshufb ymm3, ymm3, ymm15
+ vpaddd ymm2, ymm2, ymm3
+ vpxor ymm1, ymm1, ymm2
+ vpsrld ymm8, ymm1, 7
+ vpslld ymm1, ymm1, 25
+ vpor ymm1, ymm1, ymm8
+ vpshufd ymm0, ymm0, 93H
+ vpshufd ymm3, ymm3, 4EH
+ vpshufd ymm2, ymm2, 39H
+ vpaddd ymm0, ymm0, ymm6
+ vpaddd ymm0, ymm0, ymm1
+ vpxor ymm3, ymm3, ymm0
+ vpshufb ymm3, ymm3, ymm14
+ vpaddd ymm2, ymm2, ymm3
+ vpxor ymm1, ymm1, ymm2
+ vpsrld ymm8, ymm1, 12
+ vpslld ymm1, ymm1, 20
+ vpor ymm1, ymm1, ymm8
+ vpaddd ymm0, ymm0, ymm7
+ vpaddd ymm0, ymm0, ymm1
+ vpxor ymm3, ymm3, ymm0
+ vpshufb ymm3, ymm3, ymm15
+ vpaddd ymm2, ymm2, ymm3
+ vpxor ymm1, ymm1, ymm2
+ vpsrld ymm8, ymm1, 7
+ vpslld ymm1, ymm1, 25
+ vpor ymm1, ymm1, ymm8
+ vpshufd ymm0, ymm0, 39H
+ vpshufd ymm3, ymm3, 4EH
+ vpshufd ymm2, ymm2, 93H
+ dec al
+ jz endroundloop2
+ vshufps ymm8, ymm4, ymm5, 214
+ vpshufd ymm9, ymm4, 0FH
+ vpshufd ymm4, ymm8, 39H
+ vshufps ymm8, ymm6, ymm7, 250
+ vpblendd ymm9, ymm9, ymm8, 0AAH
+ vpunpcklqdq ymm8, ymm7, ymm5
+ vpblendd ymm8, ymm8, ymm6, 88H
+ vpshufd ymm8, ymm8, 78H
+ vpunpckhdq ymm5, ymm5, ymm7
+ vpunpckldq ymm6, ymm6, ymm5
+ vpshufd ymm7, ymm6, 1EH
+ vmovdqa ymm5, ymm9
+ vmovdqa ymm6, ymm8
+ jmp roundloop2
+endroundloop2:
+ vpxor ymm0, ymm0, ymm2
+ vpxor ymm1, ymm1, ymm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne innerloop2
+ vmovdqu xmmword ptr [rbx], xmm0
+ vmovdqu xmmword ptr [rbx+10H], xmm1
+ vextracti128 xmmword ptr [rbx+20H], ymm0, 01H
+ vextracti128 xmmword ptr [rbx+30H], ymm1, 01H
+ vmovaps ymm8, ymmword ptr [rsp+260H]
+ vmovaps ymm0, ymmword ptr [rsp+220H]
+ vmovups ymm1, ymmword ptr [rsp+228H]
+ vmovaps ymm2, ymmword ptr [rsp+240H]
+ vmovups ymm3, ymmword ptr [rsp+248H]
+ vblendvps ymm0, ymm0, ymm1, ymm8
+ vblendvps ymm2, ymm2, ymm3, ymm8
+ vmovaps ymmword ptr [rsp+220H], ymm0
+ vmovaps ymmword ptr [rsp+240H], ymm2
+ add rbx, 64
+ add rdi, 16
+ sub rsi, 2
+final1blocks:
+ test rsi, 1H
+ je unwind
+ vmovdqu xmm0, xmmword ptr [rcx]
+ vmovdqu xmm1, xmmword ptr [rcx+10H]
+ vmovd xmm3, dword ptr [rsp+220H]
+ vpinsrd xmm3, xmm3, dword ptr [rsp+240H], 1
+ vpinsrd xmm13, xmm3, dword ptr [BLAKE3_BLOCK_LEN], 2
+ vmovdqa xmm14, xmmword ptr [ROT16]
+ vmovdqa xmm15, xmmword ptr [ROT8]
+ mov r8, qword ptr [rdi]
+ movzx eax, byte ptr [rbp+80H]
+ or eax, r13d
+ xor edx, edx
+ALIGN 16
+innerloop1:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ vmovdqa xmm2, xmmword ptr [BLAKE3_IV]
+ vmovdqa xmm3, xmm13
+ vpinsrd xmm3, xmm3, eax, 3
+ vmovups xmm8, xmmword ptr [r8+rdx-40H]
+ vmovups xmm9, xmmword ptr [r8+rdx-30H]
+ vshufps xmm4, xmm8, xmm9, 136
+ vshufps xmm5, xmm8, xmm9, 221
+ vmovups xmm8, xmmword ptr [r8+rdx-20H]
+ vmovups xmm9, xmmword ptr [r8+rdx-10H]
+ vshufps xmm6, xmm8, xmm9, 136
+ vshufps xmm7, xmm8, xmm9, 221
+ vpshufd xmm6, xmm6, 93H
+ vpshufd xmm7, xmm7, 93H
+ mov al, 7
+roundloop1:
+ vpaddd xmm0, xmm0, xmm4
+ vpaddd xmm0, xmm0, xmm1
+ vpxor xmm3, xmm3, xmm0
+ vpshufb xmm3, xmm3, xmm14
+ vpaddd xmm2, xmm2, xmm3
+ vpxor xmm1, xmm1, xmm2
+ vpsrld xmm8, xmm1, 12
+ vpslld xmm1, xmm1, 20
+ vpor xmm1, xmm1, xmm8
+ vpaddd xmm0, xmm0, xmm5
+ vpaddd xmm0, xmm0, xmm1
+ vpxor xmm3, xmm3, xmm0
+ vpshufb xmm3, xmm3, xmm15
+ vpaddd xmm2, xmm2, xmm3
+ vpxor xmm1, xmm1, xmm2
+ vpsrld xmm8, xmm1, 7
+ vpslld xmm1, xmm1, 25
+ vpor xmm1, xmm1, xmm8
+ vpshufd xmm0, xmm0, 93H
+ vpshufd xmm3, xmm3, 4EH
+ vpshufd xmm2, xmm2, 39H
+ vpaddd xmm0, xmm0, xmm6
+ vpaddd xmm0, xmm0, xmm1
+ vpxor xmm3, xmm3, xmm0
+ vpshufb xmm3, xmm3, xmm14
+ vpaddd xmm2, xmm2, xmm3
+ vpxor xmm1, xmm1, xmm2
+ vpsrld xmm8, xmm1, 12
+ vpslld xmm1, xmm1, 20
+ vpor xmm1, xmm1, xmm8
+ vpaddd xmm0, xmm0, xmm7
+ vpaddd xmm0, xmm0, xmm1
+ vpxor xmm3, xmm3, xmm0
+ vpshufb xmm3, xmm3, xmm15
+ vpaddd xmm2, xmm2, xmm3
+ vpxor xmm1, xmm1, xmm2
+ vpsrld xmm8, xmm1, 7
+ vpslld xmm1, xmm1, 25
+ vpor xmm1, xmm1, xmm8
+ vpshufd xmm0, xmm0, 39H
+ vpshufd xmm3, xmm3, 4EH
+ vpshufd xmm2, xmm2, 93H
+ dec al
+ jz endroundloop1
+ vshufps xmm8, xmm4, xmm5, 214
+ vpshufd xmm9, xmm4, 0FH
+ vpshufd xmm4, xmm8, 39H
+ vshufps xmm8, xmm6, xmm7, 250
+ vpblendd xmm9, xmm9, xmm8, 0AAH
+ vpunpcklqdq xmm8, xmm7, xmm5
+ vpblendd xmm8, xmm8, xmm6, 88H
+ vpshufd xmm8, xmm8, 78H
+ vpunpckhdq xmm5, xmm5, xmm7
+ vpunpckldq xmm6, xmm6, xmm5
+ vpshufd xmm7, xmm6, 1EH
+ vmovdqa xmm5, xmm9
+ vmovdqa xmm6, xmm8
+ jmp roundloop1
+endroundloop1:
+ vpxor xmm0, xmm0, xmm2
+ vpxor xmm1, xmm1, xmm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne innerloop1
+ vmovdqu xmmword ptr [rbx], xmm0
+ vmovdqu xmmword ptr [rbx+10H], xmm1
+ jmp unwind
+
+_blake3_hash_many_avx2 ENDP
+blake3_hash_many_avx2 ENDP
+_TEXT ENDS
+
+_RDATA SEGMENT READONLY PAGE ALIAS(".rdata") 'CONST'
+ALIGN 64
+ADD0:
+ dd 0, 1, 2, 3, 4, 5, 6, 7
+
+ADD1:
+ dd 8 dup (8)
+
+BLAKE3_IV_0:
+ dd 8 dup (6A09E667H)
+
+BLAKE3_IV_1:
+ dd 8 dup (0BB67AE85H)
+
+BLAKE3_IV_2:
+ dd 8 dup (3C6EF372H)
+
+BLAKE3_IV_3:
+ dd 8 dup (0A54FF53AH)
+
+BLAKE3_BLOCK_LEN:
+ dd 8 dup (64)
+
+ROT16:
+ db 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13
+
+ROT8:
+ db 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12
+
+CMP_MSB_MASK:
+ dd 8 dup(80000000H)
+
+BLAKE3_IV:
+ dd 6A09E667H, 0BB67AE85H, 3C6EF372H, 0A54FF53AH
+
+_RDATA ENDS
+END
diff --git a/llvm/lib/Support/BLAKE3/blake3_avx512.c b/llvm/lib/Support/BLAKE3/blake3_avx512.c
new file mode 100644
index 000000000000..9c35b08c439a
--- /dev/null
+++ b/llvm/lib/Support/BLAKE3/blake3_avx512.c
@@ -0,0 +1,1207 @@
+#include "blake3_impl.h"
+
+#include <immintrin.h>
+
+#define _mm_shuffle_ps2(a, b, c) \
+ (_mm_castps_si128( \
+ _mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), (c))))
+
+INLINE __m128i loadu_128(const uint8_t src[16]) {
+ return _mm_loadu_si128((const __m128i *)src);
+}
+
+INLINE __m256i loadu_256(const uint8_t src[32]) {
+ return _mm256_loadu_si256((const __m256i *)src);
+}
+
+INLINE __m512i loadu_512(const uint8_t src[64]) {
+ return _mm512_loadu_si512((const __m512i *)src);
+}
+
+INLINE void storeu_128(__m128i src, uint8_t dest[16]) {
+ _mm_storeu_si128((__m128i *)dest, src);
+}
+
+INLINE void storeu_256(__m256i src, uint8_t dest[16]) {
+ _mm256_storeu_si256((__m256i *)dest, src);
+}
+
+INLINE __m128i add_128(__m128i a, __m128i b) { return _mm_add_epi32(a, b); }
+
+INLINE __m256i add_256(__m256i a, __m256i b) { return _mm256_add_epi32(a, b); }
+
+INLINE __m512i add_512(__m512i a, __m512i b) { return _mm512_add_epi32(a, b); }
+
+INLINE __m128i xor_128(__m128i a, __m128i b) { return _mm_xor_si128(a, b); }
+
+INLINE __m256i xor_256(__m256i a, __m256i b) { return _mm256_xor_si256(a, b); }
+
+INLINE __m512i xor_512(__m512i a, __m512i b) { return _mm512_xor_si512(a, b); }
+
+INLINE __m128i set1_128(uint32_t x) { return _mm_set1_epi32((int32_t)x); }
+
+INLINE __m256i set1_256(uint32_t x) { return _mm256_set1_epi32((int32_t)x); }
+
+INLINE __m512i set1_512(uint32_t x) { return _mm512_set1_epi32((int32_t)x); }
+
+INLINE __m128i set4(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
+ return _mm_setr_epi32((int32_t)a, (int32_t)b, (int32_t)c, (int32_t)d);
+}
+
+INLINE __m128i rot16_128(__m128i x) { return _mm_ror_epi32(x, 16); }
+
+INLINE __m256i rot16_256(__m256i x) { return _mm256_ror_epi32(x, 16); }
+
+INLINE __m512i rot16_512(__m512i x) { return _mm512_ror_epi32(x, 16); }
+
+INLINE __m128i rot12_128(__m128i x) { return _mm_ror_epi32(x, 12); }
+
+INLINE __m256i rot12_256(__m256i x) { return _mm256_ror_epi32(x, 12); }
+
+INLINE __m512i rot12_512(__m512i x) { return _mm512_ror_epi32(x, 12); }
+
+INLINE __m128i rot8_128(__m128i x) { return _mm_ror_epi32(x, 8); }
+
+INLINE __m256i rot8_256(__m256i x) { return _mm256_ror_epi32(x, 8); }
+
+INLINE __m512i rot8_512(__m512i x) { return _mm512_ror_epi32(x, 8); }
+
+INLINE __m128i rot7_128(__m128i x) { return _mm_ror_epi32(x, 7); }
+
+INLINE __m256i rot7_256(__m256i x) { return _mm256_ror_epi32(x, 7); }
+
+INLINE __m512i rot7_512(__m512i x) { return _mm512_ror_epi32(x, 7); }
+
+/*
+ * ----------------------------------------------------------------------------
+ * compress_avx512
+ * ----------------------------------------------------------------------------
+ */
+
+INLINE void g1(__m128i *row0, __m128i *row1, __m128i *row2, __m128i *row3,
+ __m128i m) {
+ *row0 = add_128(add_128(*row0, m), *row1);
+ *row3 = xor_128(*row3, *row0);
+ *row3 = rot16_128(*row3);
+ *row2 = add_128(*row2, *row3);
+ *row1 = xor_128(*row1, *row2);
+ *row1 = rot12_128(*row1);
+}
+
+INLINE void g2(__m128i *row0, __m128i *row1, __m128i *row2, __m128i *row3,
+ __m128i m) {
+ *row0 = add_128(add_128(*row0, m), *row1);
+ *row3 = xor_128(*row3, *row0);
+ *row3 = rot8_128(*row3);
+ *row2 = add_128(*row2, *row3);
+ *row1 = xor_128(*row1, *row2);
+ *row1 = rot7_128(*row1);
+}
+
+// Note the optimization here of leaving row1 as the unrotated row, rather than
+// row0. All the message loads below are adjusted to compensate for this. See
+// discussion at https://github.com/sneves/blake2-avx2/pull/4
+INLINE void diagonalize(__m128i *row0, __m128i *row2, __m128i *row3) {
+ *row0 = _mm_shuffle_epi32(*row0, _MM_SHUFFLE(2, 1, 0, 3));
+ *row3 = _mm_shuffle_epi32(*row3, _MM_SHUFFLE(1, 0, 3, 2));
+ *row2 = _mm_shuffle_epi32(*row2, _MM_SHUFFLE(0, 3, 2, 1));
+}
+
+INLINE void undiagonalize(__m128i *row0, __m128i *row2, __m128i *row3) {
+ *row0 = _mm_shuffle_epi32(*row0, _MM_SHUFFLE(0, 3, 2, 1));
+ *row3 = _mm_shuffle_epi32(*row3, _MM_SHUFFLE(1, 0, 3, 2));
+ *row2 = _mm_shuffle_epi32(*row2, _MM_SHUFFLE(2, 1, 0, 3));
+}
+
+INLINE void compress_pre(__m128i rows[4], const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN],
+ uint8_t block_len, uint64_t counter, uint8_t flags) {
+ rows[0] = loadu_128((uint8_t *)&cv[0]);
+ rows[1] = loadu_128((uint8_t *)&cv[4]);
+ rows[2] = set4(IV[0], IV[1], IV[2], IV[3]);
+ rows[3] = set4(counter_low(counter), counter_high(counter),
+ (uint32_t)block_len, (uint32_t)flags);
+
+ __m128i m0 = loadu_128(&block[sizeof(__m128i) * 0]);
+ __m128i m1 = loadu_128(&block[sizeof(__m128i) * 1]);
+ __m128i m2 = loadu_128(&block[sizeof(__m128i) * 2]);
+ __m128i m3 = loadu_128(&block[sizeof(__m128i) * 3]);
+
+ __m128i t0, t1, t2, t3, tt;
+
+ // Round 1. The first round permutes the message words from the original
+ // input order, into the groups that get mixed in parallel.
+ t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(2, 0, 2, 0)); // 6 4 2 0
+ g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+ t1 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 3, 1)); // 7 5 3 1
+ g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+ diagonalize(&rows[0], &rows[2], &rows[3]);
+ t2 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(2, 0, 2, 0)); // 14 12 10 8
+ t2 = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2, 1, 0, 3)); // 12 10 8 14
+ g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+ t3 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 1, 3, 1)); // 15 13 11 9
+ t3 = _mm_shuffle_epi32(t3, _MM_SHUFFLE(2, 1, 0, 3)); // 13 11 9 15
+ g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+ undiagonalize(&rows[0], &rows[2], &rows[3]);
+ m0 = t0;
+ m1 = t1;
+ m2 = t2;
+ m3 = t3;
+
+ // Round 2. This round and all following rounds apply a fixed permutation
+ // to the message words from the round before.
+ t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
+ t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
+ g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+ t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
+ tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
+ t1 = _mm_blend_epi16(tt, t1, 0xCC);
+ g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+ diagonalize(&rows[0], &rows[2], &rows[3]);
+ t2 = _mm_unpacklo_epi64(m3, m1);
+ tt = _mm_blend_epi16(t2, m2, 0xC0);
+ t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
+ g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+ t3 = _mm_unpackhi_epi32(m1, m3);
+ tt = _mm_unpacklo_epi32(m2, t3);
+ t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
+ g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+ undiagonalize(&rows[0], &rows[2], &rows[3]);
+ m0 = t0;
+ m1 = t1;
+ m2 = t2;
+ m3 = t3;
+
+ // Round 3
+ t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
+ t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
+ g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+ t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
+ tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
+ t1 = _mm_blend_epi16(tt, t1, 0xCC);
+ g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+ diagonalize(&rows[0], &rows[2], &rows[3]);
+ t2 = _mm_unpacklo_epi64(m3, m1);
+ tt = _mm_blend_epi16(t2, m2, 0xC0);
+ t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
+ g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+ t3 = _mm_unpackhi_epi32(m1, m3);
+ tt = _mm_unpacklo_epi32(m2, t3);
+ t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
+ g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+ undiagonalize(&rows[0], &rows[2], &rows[3]);
+ m0 = t0;
+ m1 = t1;
+ m2 = t2;
+ m3 = t3;
+
+ // Round 4
+ t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
+ t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
+ g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+ t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
+ tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
+ t1 = _mm_blend_epi16(tt, t1, 0xCC);
+ g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+ diagonalize(&rows[0], &rows[2], &rows[3]);
+ t2 = _mm_unpacklo_epi64(m3, m1);
+ tt = _mm_blend_epi16(t2, m2, 0xC0);
+ t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
+ g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+ t3 = _mm_unpackhi_epi32(m1, m3);
+ tt = _mm_unpacklo_epi32(m2, t3);
+ t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
+ g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+ undiagonalize(&rows[0], &rows[2], &rows[3]);
+ m0 = t0;
+ m1 = t1;
+ m2 = t2;
+ m3 = t3;
+
+ // Round 5
+ t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
+ t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
+ g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+ t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
+ tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
+ t1 = _mm_blend_epi16(tt, t1, 0xCC);
+ g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+ diagonalize(&rows[0], &rows[2], &rows[3]);
+ t2 = _mm_unpacklo_epi64(m3, m1);
+ tt = _mm_blend_epi16(t2, m2, 0xC0);
+ t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
+ g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+ t3 = _mm_unpackhi_epi32(m1, m3);
+ tt = _mm_unpacklo_epi32(m2, t3);
+ t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
+ g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+ undiagonalize(&rows[0], &rows[2], &rows[3]);
+ m0 = t0;
+ m1 = t1;
+ m2 = t2;
+ m3 = t3;
+
+ // Round 6
+ t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
+ t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
+ g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+ t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
+ tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
+ t1 = _mm_blend_epi16(tt, t1, 0xCC);
+ g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+ diagonalize(&rows[0], &rows[2], &rows[3]);
+ t2 = _mm_unpacklo_epi64(m3, m1);
+ tt = _mm_blend_epi16(t2, m2, 0xC0);
+ t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
+ g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+ t3 = _mm_unpackhi_epi32(m1, m3);
+ tt = _mm_unpacklo_epi32(m2, t3);
+ t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
+ g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+ undiagonalize(&rows[0], &rows[2], &rows[3]);
+ m0 = t0;
+ m1 = t1;
+ m2 = t2;
+ m3 = t3;
+
+ // Round 7
+ t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
+ t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
+ g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+ t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
+ tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
+ t1 = _mm_blend_epi16(tt, t1, 0xCC);
+ g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+ diagonalize(&rows[0], &rows[2], &rows[3]);
+ t2 = _mm_unpacklo_epi64(m3, m1);
+ tt = _mm_blend_epi16(t2, m2, 0xC0);
+ t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
+ g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+ t3 = _mm_unpackhi_epi32(m1, m3);
+ tt = _mm_unpacklo_epi32(m2, t3);
+ t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
+ g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+ undiagonalize(&rows[0], &rows[2], &rows[3]);
+}
+
+void blake3_compress_xof_avx512(const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN],
+ uint8_t block_len, uint64_t counter,
+ uint8_t flags, uint8_t out[64]) {
+ __m128i rows[4];
+ compress_pre(rows, cv, block, block_len, counter, flags);
+ storeu_128(xor_128(rows[0], rows[2]), &out[0]);
+ storeu_128(xor_128(rows[1], rows[3]), &out[16]);
+ storeu_128(xor_128(rows[2], loadu_128((uint8_t *)&cv[0])), &out[32]);
+ storeu_128(xor_128(rows[3], loadu_128((uint8_t *)&cv[4])), &out[48]);
+}
+
+void blake3_compress_in_place_avx512(uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN],
+ uint8_t block_len, uint64_t counter,
+ uint8_t flags) {
+ __m128i rows[4];
+ compress_pre(rows, cv, block, block_len, counter, flags);
+ storeu_128(xor_128(rows[0], rows[2]), (uint8_t *)&cv[0]);
+ storeu_128(xor_128(rows[1], rows[3]), (uint8_t *)&cv[4]);
+}
+
+/*
+ * ----------------------------------------------------------------------------
+ * hash4_avx512
+ * ----------------------------------------------------------------------------
+ */
+
+INLINE void round_fn4(__m128i v[16], __m128i m[16], size_t r) {
+ v[0] = add_128(v[0], m[(size_t)MSG_SCHEDULE[r][0]]);
+ v[1] = add_128(v[1], m[(size_t)MSG_SCHEDULE[r][2]]);
+ v[2] = add_128(v[2], m[(size_t)MSG_SCHEDULE[r][4]]);
+ v[3] = add_128(v[3], m[(size_t)MSG_SCHEDULE[r][6]]);
+ v[0] = add_128(v[0], v[4]);
+ v[1] = add_128(v[1], v[5]);
+ v[2] = add_128(v[2], v[6]);
+ v[3] = add_128(v[3], v[7]);
+ v[12] = xor_128(v[12], v[0]);
+ v[13] = xor_128(v[13], v[1]);
+ v[14] = xor_128(v[14], v[2]);
+ v[15] = xor_128(v[15], v[3]);
+ v[12] = rot16_128(v[12]);
+ v[13] = rot16_128(v[13]);
+ v[14] = rot16_128(v[14]);
+ v[15] = rot16_128(v[15]);
+ v[8] = add_128(v[8], v[12]);
+ v[9] = add_128(v[9], v[13]);
+ v[10] = add_128(v[10], v[14]);
+ v[11] = add_128(v[11], v[15]);
+ v[4] = xor_128(v[4], v[8]);
+ v[5] = xor_128(v[5], v[9]);
+ v[6] = xor_128(v[6], v[10]);
+ v[7] = xor_128(v[7], v[11]);
+ v[4] = rot12_128(v[4]);
+ v[5] = rot12_128(v[5]);
+ v[6] = rot12_128(v[6]);
+ v[7] = rot12_128(v[7]);
+ v[0] = add_128(v[0], m[(size_t)MSG_SCHEDULE[r][1]]);
+ v[1] = add_128(v[1], m[(size_t)MSG_SCHEDULE[r][3]]);
+ v[2] = add_128(v[2], m[(size_t)MSG_SCHEDULE[r][5]]);
+ v[3] = add_128(v[3], m[(size_t)MSG_SCHEDULE[r][7]]);
+ v[0] = add_128(v[0], v[4]);
+ v[1] = add_128(v[1], v[5]);
+ v[2] = add_128(v[2], v[6]);
+ v[3] = add_128(v[3], v[7]);
+ v[12] = xor_128(v[12], v[0]);
+ v[13] = xor_128(v[13], v[1]);
+ v[14] = xor_128(v[14], v[2]);
+ v[15] = xor_128(v[15], v[3]);
+ v[12] = rot8_128(v[12]);
+ v[13] = rot8_128(v[13]);
+ v[14] = rot8_128(v[14]);
+ v[15] = rot8_128(v[15]);
+ v[8] = add_128(v[8], v[12]);
+ v[9] = add_128(v[9], v[13]);
+ v[10] = add_128(v[10], v[14]);
+ v[11] = add_128(v[11], v[15]);
+ v[4] = xor_128(v[4], v[8]);
+ v[5] = xor_128(v[5], v[9]);
+ v[6] = xor_128(v[6], v[10]);
+ v[7] = xor_128(v[7], v[11]);
+ v[4] = rot7_128(v[4]);
+ v[5] = rot7_128(v[5]);
+ v[6] = rot7_128(v[6]);
+ v[7] = rot7_128(v[7]);
+
+ v[0] = add_128(v[0], m[(size_t)MSG_SCHEDULE[r][8]]);
+ v[1] = add_128(v[1], m[(size_t)MSG_SCHEDULE[r][10]]);
+ v[2] = add_128(v[2], m[(size_t)MSG_SCHEDULE[r][12]]);
+ v[3] = add_128(v[3], m[(size_t)MSG_SCHEDULE[r][14]]);
+ v[0] = add_128(v[0], v[5]);
+ v[1] = add_128(v[1], v[6]);
+ v[2] = add_128(v[2], v[7]);
+ v[3] = add_128(v[3], v[4]);
+ v[15] = xor_128(v[15], v[0]);
+ v[12] = xor_128(v[12], v[1]);
+ v[13] = xor_128(v[13], v[2]);
+ v[14] = xor_128(v[14], v[3]);
+ v[15] = rot16_128(v[15]);
+ v[12] = rot16_128(v[12]);
+ v[13] = rot16_128(v[13]);
+ v[14] = rot16_128(v[14]);
+ v[10] = add_128(v[10], v[15]);
+ v[11] = add_128(v[11], v[12]);
+ v[8] = add_128(v[8], v[13]);
+ v[9] = add_128(v[9], v[14]);
+ v[5] = xor_128(v[5], v[10]);
+ v[6] = xor_128(v[6], v[11]);
+ v[7] = xor_128(v[7], v[8]);
+ v[4] = xor_128(v[4], v[9]);
+ v[5] = rot12_128(v[5]);
+ v[6] = rot12_128(v[6]);
+ v[7] = rot12_128(v[7]);
+ v[4] = rot12_128(v[4]);
+ v[0] = add_128(v[0], m[(size_t)MSG_SCHEDULE[r][9]]);
+ v[1] = add_128(v[1], m[(size_t)MSG_SCHEDULE[r][11]]);
+ v[2] = add_128(v[2], m[(size_t)MSG_SCHEDULE[r][13]]);
+ v[3] = add_128(v[3], m[(size_t)MSG_SCHEDULE[r][15]]);
+ v[0] = add_128(v[0], v[5]);
+ v[1] = add_128(v[1], v[6]);
+ v[2] = add_128(v[2], v[7]);
+ v[3] = add_128(v[3], v[4]);
+ v[15] = xor_128(v[15], v[0]);
+ v[12] = xor_128(v[12], v[1]);
+ v[13] = xor_128(v[13], v[2]);
+ v[14] = xor_128(v[14], v[3]);
+ v[15] = rot8_128(v[15]);
+ v[12] = rot8_128(v[12]);
+ v[13] = rot8_128(v[13]);
+ v[14] = rot8_128(v[14]);
+ v[10] = add_128(v[10], v[15]);
+ v[11] = add_128(v[11], v[12]);
+ v[8] = add_128(v[8], v[13]);
+ v[9] = add_128(v[9], v[14]);
+ v[5] = xor_128(v[5], v[10]);
+ v[6] = xor_128(v[6], v[11]);
+ v[7] = xor_128(v[7], v[8]);
+ v[4] = xor_128(v[4], v[9]);
+ v[5] = rot7_128(v[5]);
+ v[6] = rot7_128(v[6]);
+ v[7] = rot7_128(v[7]);
+ v[4] = rot7_128(v[4]);
+}
+
+INLINE void transpose_vecs_128(__m128i vecs[4]) {
+ // Interleave 32-bit lates. The low unpack is lanes 00/11 and the high is
+ // 22/33. Note that this doesn't split the vector into two lanes, as the
+ // AVX2 counterparts do.
+ __m128i ab_01 = _mm_unpacklo_epi32(vecs[0], vecs[1]);
+ __m128i ab_23 = _mm_unpackhi_epi32(vecs[0], vecs[1]);
+ __m128i cd_01 = _mm_unpacklo_epi32(vecs[2], vecs[3]);
+ __m128i cd_23 = _mm_unpackhi_epi32(vecs[2], vecs[3]);
+
+ // Interleave 64-bit lanes.
+ __m128i abcd_0 = _mm_unpacklo_epi64(ab_01, cd_01);
+ __m128i abcd_1 = _mm_unpackhi_epi64(ab_01, cd_01);
+ __m128i abcd_2 = _mm_unpacklo_epi64(ab_23, cd_23);
+ __m128i abcd_3 = _mm_unpackhi_epi64(ab_23, cd_23);
+
+ vecs[0] = abcd_0;
+ vecs[1] = abcd_1;
+ vecs[2] = abcd_2;
+ vecs[3] = abcd_3;
+}
+
+INLINE void transpose_msg_vecs4(const uint8_t *const *inputs,
+ size_t block_offset, __m128i out[16]) {
+ out[0] = loadu_128(&inputs[0][block_offset + 0 * sizeof(__m128i)]);
+ out[1] = loadu_128(&inputs[1][block_offset + 0 * sizeof(__m128i)]);
+ out[2] = loadu_128(&inputs[2][block_offset + 0 * sizeof(__m128i)]);
+ out[3] = loadu_128(&inputs[3][block_offset + 0 * sizeof(__m128i)]);
+ out[4] = loadu_128(&inputs[0][block_offset + 1 * sizeof(__m128i)]);
+ out[5] = loadu_128(&inputs[1][block_offset + 1 * sizeof(__m128i)]);
+ out[6] = loadu_128(&inputs[2][block_offset + 1 * sizeof(__m128i)]);
+ out[7] = loadu_128(&inputs[3][block_offset + 1 * sizeof(__m128i)]);
+ out[8] = loadu_128(&inputs[0][block_offset + 2 * sizeof(__m128i)]);
+ out[9] = loadu_128(&inputs[1][block_offset + 2 * sizeof(__m128i)]);
+ out[10] = loadu_128(&inputs[2][block_offset + 2 * sizeof(__m128i)]);
+ out[11] = loadu_128(&inputs[3][block_offset + 2 * sizeof(__m128i)]);
+ out[12] = loadu_128(&inputs[0][block_offset + 3 * sizeof(__m128i)]);
+ out[13] = loadu_128(&inputs[1][block_offset + 3 * sizeof(__m128i)]);
+ out[14] = loadu_128(&inputs[2][block_offset + 3 * sizeof(__m128i)]);
+ out[15] = loadu_128(&inputs[3][block_offset + 3 * sizeof(__m128i)]);
+ for (size_t i = 0; i < 4; ++i) {
+ _mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0);
+ }
+ transpose_vecs_128(&out[0]);
+ transpose_vecs_128(&out[4]);
+ transpose_vecs_128(&out[8]);
+ transpose_vecs_128(&out[12]);
+}
+
+INLINE void load_counters4(uint64_t counter, bool increment_counter,
+ __m128i *out_lo, __m128i *out_hi) {
+ uint64_t mask = (increment_counter ? ~0 : 0);
+ __m256i mask_vec = _mm256_set1_epi64x(mask);
+ __m256i deltas = _mm256_setr_epi64x(0, 1, 2, 3);
+ deltas = _mm256_and_si256(mask_vec, deltas);
+ __m256i counters =
+ _mm256_add_epi64(_mm256_set1_epi64x((int64_t)counter), deltas);
+ *out_lo = _mm256_cvtepi64_epi32(counters);
+ *out_hi = _mm256_cvtepi64_epi32(_mm256_srli_epi64(counters, 32));
+}
+
+static
+void blake3_hash4_avx512(const uint8_t *const *inputs, size_t blocks,
+ const uint32_t key[8], uint64_t counter,
+ bool increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
+ __m128i h_vecs[8] = {
+ set1_128(key[0]), set1_128(key[1]), set1_128(key[2]), set1_128(key[3]),
+ set1_128(key[4]), set1_128(key[5]), set1_128(key[6]), set1_128(key[7]),
+ };
+ __m128i counter_low_vec, counter_high_vec;
+ load_counters4(counter, increment_counter, &counter_low_vec,
+ &counter_high_vec);
+ uint8_t block_flags = flags | flags_start;
+
+ for (size_t block = 0; block < blocks; block++) {
+ if (block + 1 == blocks) {
+ block_flags |= flags_end;
+ }
+ __m128i block_len_vec = set1_128(BLAKE3_BLOCK_LEN);
+ __m128i block_flags_vec = set1_128(block_flags);
+ __m128i msg_vecs[16];
+ transpose_msg_vecs4(inputs, block * BLAKE3_BLOCK_LEN, msg_vecs);
+
+ __m128i v[16] = {
+ h_vecs[0], h_vecs[1], h_vecs[2], h_vecs[3],
+ h_vecs[4], h_vecs[5], h_vecs[6], h_vecs[7],
+ set1_128(IV[0]), set1_128(IV[1]), set1_128(IV[2]), set1_128(IV[3]),
+ counter_low_vec, counter_high_vec, block_len_vec, block_flags_vec,
+ };
+ round_fn4(v, msg_vecs, 0);
+ round_fn4(v, msg_vecs, 1);
+ round_fn4(v, msg_vecs, 2);
+ round_fn4(v, msg_vecs, 3);
+ round_fn4(v, msg_vecs, 4);
+ round_fn4(v, msg_vecs, 5);
+ round_fn4(v, msg_vecs, 6);
+ h_vecs[0] = xor_128(v[0], v[8]);
+ h_vecs[1] = xor_128(v[1], v[9]);
+ h_vecs[2] = xor_128(v[2], v[10]);
+ h_vecs[3] = xor_128(v[3], v[11]);
+ h_vecs[4] = xor_128(v[4], v[12]);
+ h_vecs[5] = xor_128(v[5], v[13]);
+ h_vecs[6] = xor_128(v[6], v[14]);
+ h_vecs[7] = xor_128(v[7], v[15]);
+
+ block_flags = flags;
+ }
+
+ transpose_vecs_128(&h_vecs[0]);
+ transpose_vecs_128(&h_vecs[4]);
+ // The first four vecs now contain the first half of each output, and the
+ // second four vecs contain the second half of each output.
+ storeu_128(h_vecs[0], &out[0 * sizeof(__m128i)]);
+ storeu_128(h_vecs[4], &out[1 * sizeof(__m128i)]);
+ storeu_128(h_vecs[1], &out[2 * sizeof(__m128i)]);
+ storeu_128(h_vecs[5], &out[3 * sizeof(__m128i)]);
+ storeu_128(h_vecs[2], &out[4 * sizeof(__m128i)]);
+ storeu_128(h_vecs[6], &out[5 * sizeof(__m128i)]);
+ storeu_128(h_vecs[3], &out[6 * sizeof(__m128i)]);
+ storeu_128(h_vecs[7], &out[7 * sizeof(__m128i)]);
+}
+
+/*
+ * ----------------------------------------------------------------------------
+ * hash8_avx512
+ * ----------------------------------------------------------------------------
+ */
+
+INLINE void round_fn8(__m256i v[16], __m256i m[16], size_t r) {
+ v[0] = add_256(v[0], m[(size_t)MSG_SCHEDULE[r][0]]);
+ v[1] = add_256(v[1], m[(size_t)MSG_SCHEDULE[r][2]]);
+ v[2] = add_256(v[2], m[(size_t)MSG_SCHEDULE[r][4]]);
+ v[3] = add_256(v[3], m[(size_t)MSG_SCHEDULE[r][6]]);
+ v[0] = add_256(v[0], v[4]);
+ v[1] = add_256(v[1], v[5]);
+ v[2] = add_256(v[2], v[6]);
+ v[3] = add_256(v[3], v[7]);
+ v[12] = xor_256(v[12], v[0]);
+ v[13] = xor_256(v[13], v[1]);
+ v[14] = xor_256(v[14], v[2]);
+ v[15] = xor_256(v[15], v[3]);
+ v[12] = rot16_256(v[12]);
+ v[13] = rot16_256(v[13]);
+ v[14] = rot16_256(v[14]);
+ v[15] = rot16_256(v[15]);
+ v[8] = add_256(v[8], v[12]);
+ v[9] = add_256(v[9], v[13]);
+ v[10] = add_256(v[10], v[14]);
+ v[11] = add_256(v[11], v[15]);
+ v[4] = xor_256(v[4], v[8]);
+ v[5] = xor_256(v[5], v[9]);
+ v[6] = xor_256(v[6], v[10]);
+ v[7] = xor_256(v[7], v[11]);
+ v[4] = rot12_256(v[4]);
+ v[5] = rot12_256(v[5]);
+ v[6] = rot12_256(v[6]);
+ v[7] = rot12_256(v[7]);
+ v[0] = add_256(v[0], m[(size_t)MSG_SCHEDULE[r][1]]);
+ v[1] = add_256(v[1], m[(size_t)MSG_SCHEDULE[r][3]]);
+ v[2] = add_256(v[2], m[(size_t)MSG_SCHEDULE[r][5]]);
+ v[3] = add_256(v[3], m[(size_t)MSG_SCHEDULE[r][7]]);
+ v[0] = add_256(v[0], v[4]);
+ v[1] = add_256(v[1], v[5]);
+ v[2] = add_256(v[2], v[6]);
+ v[3] = add_256(v[3], v[7]);
+ v[12] = xor_256(v[12], v[0]);
+ v[13] = xor_256(v[13], v[1]);
+ v[14] = xor_256(v[14], v[2]);
+ v[15] = xor_256(v[15], v[3]);
+ v[12] = rot8_256(v[12]);
+ v[13] = rot8_256(v[13]);
+ v[14] = rot8_256(v[14]);
+ v[15] = rot8_256(v[15]);
+ v[8] = add_256(v[8], v[12]);
+ v[9] = add_256(v[9], v[13]);
+ v[10] = add_256(v[10], v[14]);
+ v[11] = add_256(v[11], v[15]);
+ v[4] = xor_256(v[4], v[8]);
+ v[5] = xor_256(v[5], v[9]);
+ v[6] = xor_256(v[6], v[10]);
+ v[7] = xor_256(v[7], v[11]);
+ v[4] = rot7_256(v[4]);
+ v[5] = rot7_256(v[5]);
+ v[6] = rot7_256(v[6]);
+ v[7] = rot7_256(v[7]);
+
+ v[0] = add_256(v[0], m[(size_t)MSG_SCHEDULE[r][8]]);
+ v[1] = add_256(v[1], m[(size_t)MSG_SCHEDULE[r][10]]);
+ v[2] = add_256(v[2], m[(size_t)MSG_SCHEDULE[r][12]]);
+ v[3] = add_256(v[3], m[(size_t)MSG_SCHEDULE[r][14]]);
+ v[0] = add_256(v[0], v[5]);
+ v[1] = add_256(v[1], v[6]);
+ v[2] = add_256(v[2], v[7]);
+ v[3] = add_256(v[3], v[4]);
+ v[15] = xor_256(v[15], v[0]);
+ v[12] = xor_256(v[12], v[1]);
+ v[13] = xor_256(v[13], v[2]);
+ v[14] = xor_256(v[14], v[3]);
+ v[15] = rot16_256(v[15]);
+ v[12] = rot16_256(v[12]);
+ v[13] = rot16_256(v[13]);
+ v[14] = rot16_256(v[14]);
+ v[10] = add_256(v[10], v[15]);
+ v[11] = add_256(v[11], v[12]);
+ v[8] = add_256(v[8], v[13]);
+ v[9] = add_256(v[9], v[14]);
+ v[5] = xor_256(v[5], v[10]);
+ v[6] = xor_256(v[6], v[11]);
+ v[7] = xor_256(v[7], v[8]);
+ v[4] = xor_256(v[4], v[9]);
+ v[5] = rot12_256(v[5]);
+ v[6] = rot12_256(v[6]);
+ v[7] = rot12_256(v[7]);
+ v[4] = rot12_256(v[4]);
+ v[0] = add_256(v[0], m[(size_t)MSG_SCHEDULE[r][9]]);
+ v[1] = add_256(v[1], m[(size_t)MSG_SCHEDULE[r][11]]);
+ v[2] = add_256(v[2], m[(size_t)MSG_SCHEDULE[r][13]]);
+ v[3] = add_256(v[3], m[(size_t)MSG_SCHEDULE[r][15]]);
+ v[0] = add_256(v[0], v[5]);
+ v[1] = add_256(v[1], v[6]);
+ v[2] = add_256(v[2], v[7]);
+ v[3] = add_256(v[3], v[4]);
+ v[15] = xor_256(v[15], v[0]);
+ v[12] = xor_256(v[12], v[1]);
+ v[13] = xor_256(v[13], v[2]);
+ v[14] = xor_256(v[14], v[3]);
+ v[15] = rot8_256(v[15]);
+ v[12] = rot8_256(v[12]);
+ v[13] = rot8_256(v[13]);
+ v[14] = rot8_256(v[14]);
+ v[10] = add_256(v[10], v[15]);
+ v[11] = add_256(v[11], v[12]);
+ v[8] = add_256(v[8], v[13]);
+ v[9] = add_256(v[9], v[14]);
+ v[5] = xor_256(v[5], v[10]);
+ v[6] = xor_256(v[6], v[11]);
+ v[7] = xor_256(v[7], v[8]);
+ v[4] = xor_256(v[4], v[9]);
+ v[5] = rot7_256(v[5]);
+ v[6] = rot7_256(v[6]);
+ v[7] = rot7_256(v[7]);
+ v[4] = rot7_256(v[4]);
+}
+
+INLINE void transpose_vecs_256(__m256i vecs[8]) {
+ // Interleave 32-bit lanes. The low unpack is lanes 00/11/44/55, and the high
+ // is 22/33/66/77.
+ __m256i ab_0145 = _mm256_unpacklo_epi32(vecs[0], vecs[1]);
+ __m256i ab_2367 = _mm256_unpackhi_epi32(vecs[0], vecs[1]);
+ __m256i cd_0145 = _mm256_unpacklo_epi32(vecs[2], vecs[3]);
+ __m256i cd_2367 = _mm256_unpackhi_epi32(vecs[2], vecs[3]);
+ __m256i ef_0145 = _mm256_unpacklo_epi32(vecs[4], vecs[5]);
+ __m256i ef_2367 = _mm256_unpackhi_epi32(vecs[4], vecs[5]);
+ __m256i gh_0145 = _mm256_unpacklo_epi32(vecs[6], vecs[7]);
+ __m256i gh_2367 = _mm256_unpackhi_epi32(vecs[6], vecs[7]);
+
+ // Interleave 64-bit lates. The low unpack is lanes 00/22 and the high is
+ // 11/33.
+ __m256i abcd_04 = _mm256_unpacklo_epi64(ab_0145, cd_0145);
+ __m256i abcd_15 = _mm256_unpackhi_epi64(ab_0145, cd_0145);
+ __m256i abcd_26 = _mm256_unpacklo_epi64(ab_2367, cd_2367);
+ __m256i abcd_37 = _mm256_unpackhi_epi64(ab_2367, cd_2367);
+ __m256i efgh_04 = _mm256_unpacklo_epi64(ef_0145, gh_0145);
+ __m256i efgh_15 = _mm256_unpackhi_epi64(ef_0145, gh_0145);
+ __m256i efgh_26 = _mm256_unpacklo_epi64(ef_2367, gh_2367);
+ __m256i efgh_37 = _mm256_unpackhi_epi64(ef_2367, gh_2367);
+
+ // Interleave 128-bit lanes.
+ vecs[0] = _mm256_permute2x128_si256(abcd_04, efgh_04, 0x20);
+ vecs[1] = _mm256_permute2x128_si256(abcd_15, efgh_15, 0x20);
+ vecs[2] = _mm256_permute2x128_si256(abcd_26, efgh_26, 0x20);
+ vecs[3] = _mm256_permute2x128_si256(abcd_37, efgh_37, 0x20);
+ vecs[4] = _mm256_permute2x128_si256(abcd_04, efgh_04, 0x31);
+ vecs[5] = _mm256_permute2x128_si256(abcd_15, efgh_15, 0x31);
+ vecs[6] = _mm256_permute2x128_si256(abcd_26, efgh_26, 0x31);
+ vecs[7] = _mm256_permute2x128_si256(abcd_37, efgh_37, 0x31);
+}
+
+INLINE void transpose_msg_vecs8(const uint8_t *const *inputs,
+ size_t block_offset, __m256i out[16]) {
+ out[0] = loadu_256(&inputs[0][block_offset + 0 * sizeof(__m256i)]);
+ out[1] = loadu_256(&inputs[1][block_offset + 0 * sizeof(__m256i)]);
+ out[2] = loadu_256(&inputs[2][block_offset + 0 * sizeof(__m256i)]);
+ out[3] = loadu_256(&inputs[3][block_offset + 0 * sizeof(__m256i)]);
+ out[4] = loadu_256(&inputs[4][block_offset + 0 * sizeof(__m256i)]);
+ out[5] = loadu_256(&inputs[5][block_offset + 0 * sizeof(__m256i)]);
+ out[6] = loadu_256(&inputs[6][block_offset + 0 * sizeof(__m256i)]);
+ out[7] = loadu_256(&inputs[7][block_offset + 0 * sizeof(__m256i)]);
+ out[8] = loadu_256(&inputs[0][block_offset + 1 * sizeof(__m256i)]);
+ out[9] = loadu_256(&inputs[1][block_offset + 1 * sizeof(__m256i)]);
+ out[10] = loadu_256(&inputs[2][block_offset + 1 * sizeof(__m256i)]);
+ out[11] = loadu_256(&inputs[3][block_offset + 1 * sizeof(__m256i)]);
+ out[12] = loadu_256(&inputs[4][block_offset + 1 * sizeof(__m256i)]);
+ out[13] = loadu_256(&inputs[5][block_offset + 1 * sizeof(__m256i)]);
+ out[14] = loadu_256(&inputs[6][block_offset + 1 * sizeof(__m256i)]);
+ out[15] = loadu_256(&inputs[7][block_offset + 1 * sizeof(__m256i)]);
+ for (size_t i = 0; i < 8; ++i) {
+ _mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0);
+ }
+ transpose_vecs_256(&out[0]);
+ transpose_vecs_256(&out[8]);
+}
+
+INLINE void load_counters8(uint64_t counter, bool increment_counter,
+ __m256i *out_lo, __m256i *out_hi) {
+ uint64_t mask = (increment_counter ? ~0 : 0);
+ __m512i mask_vec = _mm512_set1_epi64(mask);
+ __m512i deltas = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ deltas = _mm512_and_si512(mask_vec, deltas);
+ __m512i counters =
+ _mm512_add_epi64(_mm512_set1_epi64((int64_t)counter), deltas);
+ *out_lo = _mm512_cvtepi64_epi32(counters);
+ *out_hi = _mm512_cvtepi64_epi32(_mm512_srli_epi64(counters, 32));
+}
+
+static
+void blake3_hash8_avx512(const uint8_t *const *inputs, size_t blocks,
+ const uint32_t key[8], uint64_t counter,
+ bool increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
+ __m256i h_vecs[8] = {
+ set1_256(key[0]), set1_256(key[1]), set1_256(key[2]), set1_256(key[3]),
+ set1_256(key[4]), set1_256(key[5]), set1_256(key[6]), set1_256(key[7]),
+ };
+ __m256i counter_low_vec, counter_high_vec;
+ load_counters8(counter, increment_counter, &counter_low_vec,
+ &counter_high_vec);
+ uint8_t block_flags = flags | flags_start;
+
+ for (size_t block = 0; block < blocks; block++) {
+ if (block + 1 == blocks) {
+ block_flags |= flags_end;
+ }
+ __m256i block_len_vec = set1_256(BLAKE3_BLOCK_LEN);
+ __m256i block_flags_vec = set1_256(block_flags);
+ __m256i msg_vecs[16];
+ transpose_msg_vecs8(inputs, block * BLAKE3_BLOCK_LEN, msg_vecs);
+
+ __m256i v[16] = {
+ h_vecs[0], h_vecs[1], h_vecs[2], h_vecs[3],
+ h_vecs[4], h_vecs[5], h_vecs[6], h_vecs[7],
+ set1_256(IV[0]), set1_256(IV[1]), set1_256(IV[2]), set1_256(IV[3]),
+ counter_low_vec, counter_high_vec, block_len_vec, block_flags_vec,
+ };
+ round_fn8(v, msg_vecs, 0);
+ round_fn8(v, msg_vecs, 1);
+ round_fn8(v, msg_vecs, 2);
+ round_fn8(v, msg_vecs, 3);
+ round_fn8(v, msg_vecs, 4);
+ round_fn8(v, msg_vecs, 5);
+ round_fn8(v, msg_vecs, 6);
+ h_vecs[0] = xor_256(v[0], v[8]);
+ h_vecs[1] = xor_256(v[1], v[9]);
+ h_vecs[2] = xor_256(v[2], v[10]);
+ h_vecs[3] = xor_256(v[3], v[11]);
+ h_vecs[4] = xor_256(v[4], v[12]);
+ h_vecs[5] = xor_256(v[5], v[13]);
+ h_vecs[6] = xor_256(v[6], v[14]);
+ h_vecs[7] = xor_256(v[7], v[15]);
+
+ block_flags = flags;
+ }
+
+ transpose_vecs_256(h_vecs);
+ storeu_256(h_vecs[0], &out[0 * sizeof(__m256i)]);
+ storeu_256(h_vecs[1], &out[1 * sizeof(__m256i)]);
+ storeu_256(h_vecs[2], &out[2 * sizeof(__m256i)]);
+ storeu_256(h_vecs[3], &out[3 * sizeof(__m256i)]);
+ storeu_256(h_vecs[4], &out[4 * sizeof(__m256i)]);
+ storeu_256(h_vecs[5], &out[5 * sizeof(__m256i)]);
+ storeu_256(h_vecs[6], &out[6 * sizeof(__m256i)]);
+ storeu_256(h_vecs[7], &out[7 * sizeof(__m256i)]);
+}
+
+/*
+ * ----------------------------------------------------------------------------
+ * hash16_avx512
+ * ----------------------------------------------------------------------------
+ */
+
+INLINE void round_fn16(__m512i v[16], __m512i m[16], size_t r) {
+ v[0] = add_512(v[0], m[(size_t)MSG_SCHEDULE[r][0]]);
+ v[1] = add_512(v[1], m[(size_t)MSG_SCHEDULE[r][2]]);
+ v[2] = add_512(v[2], m[(size_t)MSG_SCHEDULE[r][4]]);
+ v[3] = add_512(v[3], m[(size_t)MSG_SCHEDULE[r][6]]);
+ v[0] = add_512(v[0], v[4]);
+ v[1] = add_512(v[1], v[5]);
+ v[2] = add_512(v[2], v[6]);
+ v[3] = add_512(v[3], v[7]);
+ v[12] = xor_512(v[12], v[0]);
+ v[13] = xor_512(v[13], v[1]);
+ v[14] = xor_512(v[14], v[2]);
+ v[15] = xor_512(v[15], v[3]);
+ v[12] = rot16_512(v[12]);
+ v[13] = rot16_512(v[13]);
+ v[14] = rot16_512(v[14]);
+ v[15] = rot16_512(v[15]);
+ v[8] = add_512(v[8], v[12]);
+ v[9] = add_512(v[9], v[13]);
+ v[10] = add_512(v[10], v[14]);
+ v[11] = add_512(v[11], v[15]);
+ v[4] = xor_512(v[4], v[8]);
+ v[5] = xor_512(v[5], v[9]);
+ v[6] = xor_512(v[6], v[10]);
+ v[7] = xor_512(v[7], v[11]);
+ v[4] = rot12_512(v[4]);
+ v[5] = rot12_512(v[5]);
+ v[6] = rot12_512(v[6]);
+ v[7] = rot12_512(v[7]);
+ v[0] = add_512(v[0], m[(size_t)MSG_SCHEDULE[r][1]]);
+ v[1] = add_512(v[1], m[(size_t)MSG_SCHEDULE[r][3]]);
+ v[2] = add_512(v[2], m[(size_t)MSG_SCHEDULE[r][5]]);
+ v[3] = add_512(v[3], m[(size_t)MSG_SCHEDULE[r][7]]);
+ v[0] = add_512(v[0], v[4]);
+ v[1] = add_512(v[1], v[5]);
+ v[2] = add_512(v[2], v[6]);
+ v[3] = add_512(v[3], v[7]);
+ v[12] = xor_512(v[12], v[0]);
+ v[13] = xor_512(v[13], v[1]);
+ v[14] = xor_512(v[14], v[2]);
+ v[15] = xor_512(v[15], v[3]);
+ v[12] = rot8_512(v[12]);
+ v[13] = rot8_512(v[13]);
+ v[14] = rot8_512(v[14]);
+ v[15] = rot8_512(v[15]);
+ v[8] = add_512(v[8], v[12]);
+ v[9] = add_512(v[9], v[13]);
+ v[10] = add_512(v[10], v[14]);
+ v[11] = add_512(v[11], v[15]);
+ v[4] = xor_512(v[4], v[8]);
+ v[5] = xor_512(v[5], v[9]);
+ v[6] = xor_512(v[6], v[10]);
+ v[7] = xor_512(v[7], v[11]);
+ v[4] = rot7_512(v[4]);
+ v[5] = rot7_512(v[5]);
+ v[6] = rot7_512(v[6]);
+ v[7] = rot7_512(v[7]);
+
+ v[0] = add_512(v[0], m[(size_t)MSG_SCHEDULE[r][8]]);
+ v[1] = add_512(v[1], m[(size_t)MSG_SCHEDULE[r][10]]);
+ v[2] = add_512(v[2], m[(size_t)MSG_SCHEDULE[r][12]]);
+ v[3] = add_512(v[3], m[(size_t)MSG_SCHEDULE[r][14]]);
+ v[0] = add_512(v[0], v[5]);
+ v[1] = add_512(v[1], v[6]);
+ v[2] = add_512(v[2], v[7]);
+ v[3] = add_512(v[3], v[4]);
+ v[15] = xor_512(v[15], v[0]);
+ v[12] = xor_512(v[12], v[1]);
+ v[13] = xor_512(v[13], v[2]);
+ v[14] = xor_512(v[14], v[3]);
+ v[15] = rot16_512(v[15]);
+ v[12] = rot16_512(v[12]);
+ v[13] = rot16_512(v[13]);
+ v[14] = rot16_512(v[14]);
+ v[10] = add_512(v[10], v[15]);
+ v[11] = add_512(v[11], v[12]);
+ v[8] = add_512(v[8], v[13]);
+ v[9] = add_512(v[9], v[14]);
+ v[5] = xor_512(v[5], v[10]);
+ v[6] = xor_512(v[6], v[11]);
+ v[7] = xor_512(v[7], v[8]);
+ v[4] = xor_512(v[4], v[9]);
+ v[5] = rot12_512(v[5]);
+ v[6] = rot12_512(v[6]);
+ v[7] = rot12_512(v[7]);
+ v[4] = rot12_512(v[4]);
+ v[0] = add_512(v[0], m[(size_t)MSG_SCHEDULE[r][9]]);
+ v[1] = add_512(v[1], m[(size_t)MSG_SCHEDULE[r][11]]);
+ v[2] = add_512(v[2], m[(size_t)MSG_SCHEDULE[r][13]]);
+ v[3] = add_512(v[3], m[(size_t)MSG_SCHEDULE[r][15]]);
+ v[0] = add_512(v[0], v[5]);
+ v[1] = add_512(v[1], v[6]);
+ v[2] = add_512(v[2], v[7]);
+ v[3] = add_512(v[3], v[4]);
+ v[15] = xor_512(v[15], v[0]);
+ v[12] = xor_512(v[12], v[1]);
+ v[13] = xor_512(v[13], v[2]);
+ v[14] = xor_512(v[14], v[3]);
+ v[15] = rot8_512(v[15]);
+ v[12] = rot8_512(v[12]);
+ v[13] = rot8_512(v[13]);
+ v[14] = rot8_512(v[14]);
+ v[10] = add_512(v[10], v[15]);
+ v[11] = add_512(v[11], v[12]);
+ v[8] = add_512(v[8], v[13]);
+ v[9] = add_512(v[9], v[14]);
+ v[5] = xor_512(v[5], v[10]);
+ v[6] = xor_512(v[6], v[11]);
+ v[7] = xor_512(v[7], v[8]);
+ v[4] = xor_512(v[4], v[9]);
+ v[5] = rot7_512(v[5]);
+ v[6] = rot7_512(v[6]);
+ v[7] = rot7_512(v[7]);
+ v[4] = rot7_512(v[4]);
+}
+
+// 0b10001000, or lanes a0/a2/b0/b2 in little-endian order
+#define LO_IMM8 0x88
+
+INLINE __m512i unpack_lo_128(__m512i a, __m512i b) {
+ return _mm512_shuffle_i32x4(a, b, LO_IMM8);
+}
+
+// 0b11011101, or lanes a1/a3/b1/b3 in little-endian order
+#define HI_IMM8 0xdd
+
+INLINE __m512i unpack_hi_128(__m512i a, __m512i b) {
+ return _mm512_shuffle_i32x4(a, b, HI_IMM8);
+}
+
+INLINE void transpose_vecs_512(__m512i vecs[16]) {
+ // Interleave 32-bit lanes. The _0 unpack is lanes
+ // 0/0/1/1/4/4/5/5/8/8/9/9/12/12/13/13, and the _2 unpack is lanes
+ // 2/2/3/3/6/6/7/7/10/10/11/11/14/14/15/15.
+ __m512i ab_0 = _mm512_unpacklo_epi32(vecs[0], vecs[1]);
+ __m512i ab_2 = _mm512_unpackhi_epi32(vecs[0], vecs[1]);
+ __m512i cd_0 = _mm512_unpacklo_epi32(vecs[2], vecs[3]);
+ __m512i cd_2 = _mm512_unpackhi_epi32(vecs[2], vecs[3]);
+ __m512i ef_0 = _mm512_unpacklo_epi32(vecs[4], vecs[5]);
+ __m512i ef_2 = _mm512_unpackhi_epi32(vecs[4], vecs[5]);
+ __m512i gh_0 = _mm512_unpacklo_epi32(vecs[6], vecs[7]);
+ __m512i gh_2 = _mm512_unpackhi_epi32(vecs[6], vecs[7]);
+ __m512i ij_0 = _mm512_unpacklo_epi32(vecs[8], vecs[9]);
+ __m512i ij_2 = _mm512_unpackhi_epi32(vecs[8], vecs[9]);
+ __m512i kl_0 = _mm512_unpacklo_epi32(vecs[10], vecs[11]);
+ __m512i kl_2 = _mm512_unpackhi_epi32(vecs[10], vecs[11]);
+ __m512i mn_0 = _mm512_unpacklo_epi32(vecs[12], vecs[13]);
+ __m512i mn_2 = _mm512_unpackhi_epi32(vecs[12], vecs[13]);
+ __m512i op_0 = _mm512_unpacklo_epi32(vecs[14], vecs[15]);
+ __m512i op_2 = _mm512_unpackhi_epi32(vecs[14], vecs[15]);
+
+ // Interleave 64-bit lates. The _0 unpack is lanes
+ // 0/0/0/0/4/4/4/4/8/8/8/8/12/12/12/12, the _1 unpack is lanes
+ // 1/1/1/1/5/5/5/5/9/9/9/9/13/13/13/13, the _2 unpack is lanes
+ // 2/2/2/2/6/6/6/6/10/10/10/10/14/14/14/14, and the _3 unpack is lanes
+ // 3/3/3/3/7/7/7/7/11/11/11/11/15/15/15/15.
+ __m512i abcd_0 = _mm512_unpacklo_epi64(ab_0, cd_0);
+ __m512i abcd_1 = _mm512_unpackhi_epi64(ab_0, cd_0);
+ __m512i abcd_2 = _mm512_unpacklo_epi64(ab_2, cd_2);
+ __m512i abcd_3 = _mm512_unpackhi_epi64(ab_2, cd_2);
+ __m512i efgh_0 = _mm512_unpacklo_epi64(ef_0, gh_0);
+ __m512i efgh_1 = _mm512_unpackhi_epi64(ef_0, gh_0);
+ __m512i efgh_2 = _mm512_unpacklo_epi64(ef_2, gh_2);
+ __m512i efgh_3 = _mm512_unpackhi_epi64(ef_2, gh_2);
+ __m512i ijkl_0 = _mm512_unpacklo_epi64(ij_0, kl_0);
+ __m512i ijkl_1 = _mm512_unpackhi_epi64(ij_0, kl_0);
+ __m512i ijkl_2 = _mm512_unpacklo_epi64(ij_2, kl_2);
+ __m512i ijkl_3 = _mm512_unpackhi_epi64(ij_2, kl_2);
+ __m512i mnop_0 = _mm512_unpacklo_epi64(mn_0, op_0);
+ __m512i mnop_1 = _mm512_unpackhi_epi64(mn_0, op_0);
+ __m512i mnop_2 = _mm512_unpacklo_epi64(mn_2, op_2);
+ __m512i mnop_3 = _mm512_unpackhi_epi64(mn_2, op_2);
+
+ // Interleave 128-bit lanes. The _0 unpack is
+ // 0/0/0/0/8/8/8/8/0/0/0/0/8/8/8/8, the _1 unpack is
+ // 1/1/1/1/9/9/9/9/1/1/1/1/9/9/9/9, and so on.
+ __m512i abcdefgh_0 = unpack_lo_128(abcd_0, efgh_0);
+ __m512i abcdefgh_1 = unpack_lo_128(abcd_1, efgh_1);
+ __m512i abcdefgh_2 = unpack_lo_128(abcd_2, efgh_2);
+ __m512i abcdefgh_3 = unpack_lo_128(abcd_3, efgh_3);
+ __m512i abcdefgh_4 = unpack_hi_128(abcd_0, efgh_0);
+ __m512i abcdefgh_5 = unpack_hi_128(abcd_1, efgh_1);
+ __m512i abcdefgh_6 = unpack_hi_128(abcd_2, efgh_2);
+ __m512i abcdefgh_7 = unpack_hi_128(abcd_3, efgh_3);
+ __m512i ijklmnop_0 = unpack_lo_128(ijkl_0, mnop_0);
+ __m512i ijklmnop_1 = unpack_lo_128(ijkl_1, mnop_1);
+ __m512i ijklmnop_2 = unpack_lo_128(ijkl_2, mnop_2);
+ __m512i ijklmnop_3 = unpack_lo_128(ijkl_3, mnop_3);
+ __m512i ijklmnop_4 = unpack_hi_128(ijkl_0, mnop_0);
+ __m512i ijklmnop_5 = unpack_hi_128(ijkl_1, mnop_1);
+ __m512i ijklmnop_6 = unpack_hi_128(ijkl_2, mnop_2);
+ __m512i ijklmnop_7 = unpack_hi_128(ijkl_3, mnop_3);
+
+ // Interleave 128-bit lanes again for the final outputs.
+ vecs[0] = unpack_lo_128(abcdefgh_0, ijklmnop_0);
+ vecs[1] = unpack_lo_128(abcdefgh_1, ijklmnop_1);
+ vecs[2] = unpack_lo_128(abcdefgh_2, ijklmnop_2);
+ vecs[3] = unpack_lo_128(abcdefgh_3, ijklmnop_3);
+ vecs[4] = unpack_lo_128(abcdefgh_4, ijklmnop_4);
+ vecs[5] = unpack_lo_128(abcdefgh_5, ijklmnop_5);
+ vecs[6] = unpack_lo_128(abcdefgh_6, ijklmnop_6);
+ vecs[7] = unpack_lo_128(abcdefgh_7, ijklmnop_7);
+ vecs[8] = unpack_hi_128(abcdefgh_0, ijklmnop_0);
+ vecs[9] = unpack_hi_128(abcdefgh_1, ijklmnop_1);
+ vecs[10] = unpack_hi_128(abcdefgh_2, ijklmnop_2);
+ vecs[11] = unpack_hi_128(abcdefgh_3, ijklmnop_3);
+ vecs[12] = unpack_hi_128(abcdefgh_4, ijklmnop_4);
+ vecs[13] = unpack_hi_128(abcdefgh_5, ijklmnop_5);
+ vecs[14] = unpack_hi_128(abcdefgh_6, ijklmnop_6);
+ vecs[15] = unpack_hi_128(abcdefgh_7, ijklmnop_7);
+}
+
+INLINE void transpose_msg_vecs16(const uint8_t *const *inputs,
+ size_t block_offset, __m512i out[16]) {
+ out[0] = loadu_512(&inputs[0][block_offset]);
+ out[1] = loadu_512(&inputs[1][block_offset]);
+ out[2] = loadu_512(&inputs[2][block_offset]);
+ out[3] = loadu_512(&inputs[3][block_offset]);
+ out[4] = loadu_512(&inputs[4][block_offset]);
+ out[5] = loadu_512(&inputs[5][block_offset]);
+ out[6] = loadu_512(&inputs[6][block_offset]);
+ out[7] = loadu_512(&inputs[7][block_offset]);
+ out[8] = loadu_512(&inputs[8][block_offset]);
+ out[9] = loadu_512(&inputs[9][block_offset]);
+ out[10] = loadu_512(&inputs[10][block_offset]);
+ out[11] = loadu_512(&inputs[11][block_offset]);
+ out[12] = loadu_512(&inputs[12][block_offset]);
+ out[13] = loadu_512(&inputs[13][block_offset]);
+ out[14] = loadu_512(&inputs[14][block_offset]);
+ out[15] = loadu_512(&inputs[15][block_offset]);
+ for (size_t i = 0; i < 16; ++i) {
+ _mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0);
+ }
+ transpose_vecs_512(out);
+}
+
+INLINE void load_counters16(uint64_t counter, bool increment_counter,
+ __m512i *out_lo, __m512i *out_hi) {
+ const __m512i mask = _mm512_set1_epi32(-(int32_t)increment_counter);
+ const __m512i add0 = _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ const __m512i add1 = _mm512_and_si512(mask, add0);
+ __m512i l = _mm512_add_epi32(_mm512_set1_epi32((int32_t)counter), add1);
+ __mmask16 carry = _mm512_cmp_epu32_mask(l, add1, _MM_CMPINT_LT);
+ __m512i h = _mm512_mask_add_epi32(_mm512_set1_epi32((int32_t)(counter >> 32)), carry, _mm512_set1_epi32((int32_t)(counter >> 32)), _mm512_set1_epi32(1));
+ *out_lo = l;
+ *out_hi = h;
+}
+
+static
+void blake3_hash16_avx512(const uint8_t *const *inputs, size_t blocks,
+ const uint32_t key[8], uint64_t counter,
+ bool increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end,
+ uint8_t *out) {
+ __m512i h_vecs[8] = {
+ set1_512(key[0]), set1_512(key[1]), set1_512(key[2]), set1_512(key[3]),
+ set1_512(key[4]), set1_512(key[5]), set1_512(key[6]), set1_512(key[7]),
+ };
+ __m512i counter_low_vec, counter_high_vec;
+ load_counters16(counter, increment_counter, &counter_low_vec,
+ &counter_high_vec);
+ uint8_t block_flags = flags | flags_start;
+
+ for (size_t block = 0; block < blocks; block++) {
+ if (block + 1 == blocks) {
+ block_flags |= flags_end;
+ }
+ __m512i block_len_vec = set1_512(BLAKE3_BLOCK_LEN);
+ __m512i block_flags_vec = set1_512(block_flags);
+ __m512i msg_vecs[16];
+ transpose_msg_vecs16(inputs, block * BLAKE3_BLOCK_LEN, msg_vecs);
+
+ __m512i v[16] = {
+ h_vecs[0], h_vecs[1], h_vecs[2], h_vecs[3],
+ h_vecs[4], h_vecs[5], h_vecs[6], h_vecs[7],
+ set1_512(IV[0]), set1_512(IV[1]), set1_512(IV[2]), set1_512(IV[3]),
+ counter_low_vec, counter_high_vec, block_len_vec, block_flags_vec,
+ };
+ round_fn16(v, msg_vecs, 0);
+ round_fn16(v, msg_vecs, 1);
+ round_fn16(v, msg_vecs, 2);
+ round_fn16(v, msg_vecs, 3);
+ round_fn16(v, msg_vecs, 4);
+ round_fn16(v, msg_vecs, 5);
+ round_fn16(v, msg_vecs, 6);
+ h_vecs[0] = xor_512(v[0], v[8]);
+ h_vecs[1] = xor_512(v[1], v[9]);
+ h_vecs[2] = xor_512(v[2], v[10]);
+ h_vecs[3] = xor_512(v[3], v[11]);
+ h_vecs[4] = xor_512(v[4], v[12]);
+ h_vecs[5] = xor_512(v[5], v[13]);
+ h_vecs[6] = xor_512(v[6], v[14]);
+ h_vecs[7] = xor_512(v[7], v[15]);
+
+ block_flags = flags;
+ }
+
+ // transpose_vecs_512 operates on a 16x16 matrix of words, but we only have 8
+ // state vectors. Pad the matrix with zeros. After transposition, store the
+ // lower half of each vector.
+ __m512i padded[16] = {
+ h_vecs[0], h_vecs[1], h_vecs[2], h_vecs[3],
+ h_vecs[4], h_vecs[5], h_vecs[6], h_vecs[7],
+ set1_512(0), set1_512(0), set1_512(0), set1_512(0),
+ set1_512(0), set1_512(0), set1_512(0), set1_512(0),
+ };
+ transpose_vecs_512(padded);
+ _mm256_mask_storeu_epi32(&out[0 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[0]));
+ _mm256_mask_storeu_epi32(&out[1 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[1]));
+ _mm256_mask_storeu_epi32(&out[2 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[2]));
+ _mm256_mask_storeu_epi32(&out[3 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[3]));
+ _mm256_mask_storeu_epi32(&out[4 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[4]));
+ _mm256_mask_storeu_epi32(&out[5 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[5]));
+ _mm256_mask_storeu_epi32(&out[6 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[6]));
+ _mm256_mask_storeu_epi32(&out[7 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[7]));
+ _mm256_mask_storeu_epi32(&out[8 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[8]));
+ _mm256_mask_storeu_epi32(&out[9 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[9]));
+ _mm256_mask_storeu_epi32(&out[10 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[10]));
+ _mm256_mask_storeu_epi32(&out[11 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[11]));
+ _mm256_mask_storeu_epi32(&out[12 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[12]));
+ _mm256_mask_storeu_epi32(&out[13 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[13]));
+ _mm256_mask_storeu_epi32(&out[14 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[14]));
+ _mm256_mask_storeu_epi32(&out[15 * sizeof(__m256i)], (__mmask8)-1, _mm512_castsi512_si256(padded[15]));
+}
+
+/*
+ * ----------------------------------------------------------------------------
+ * hash_many_avx512
+ * ----------------------------------------------------------------------------
+ */
+
+INLINE void hash_one_avx512(const uint8_t *input, size_t blocks,
+ const uint32_t key[8], uint64_t counter,
+ uint8_t flags, uint8_t flags_start,
+ uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN]) {
+ uint32_t cv[8];
+ memcpy(cv, key, BLAKE3_KEY_LEN);
+ uint8_t block_flags = flags | flags_start;
+ while (blocks > 0) {
+ if (blocks == 1) {
+ block_flags |= flags_end;
+ }
+ blake3_compress_in_place_avx512(cv, input, BLAKE3_BLOCK_LEN, counter,
+ block_flags);
+ input = &input[BLAKE3_BLOCK_LEN];
+ blocks -= 1;
+ block_flags = flags;
+ }
+ memcpy(out, cv, BLAKE3_OUT_LEN);
+}
+
+void blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs,
+ size_t blocks, const uint32_t key[8],
+ uint64_t counter, bool increment_counter,
+ uint8_t flags, uint8_t flags_start,
+ uint8_t flags_end, uint8_t *out) {
+ while (num_inputs >= 16) {
+ blake3_hash16_avx512(inputs, blocks, key, counter, increment_counter, flags,
+ flags_start, flags_end, out);
+ if (increment_counter) {
+ counter += 16;
+ }
+ inputs += 16;
+ num_inputs -= 16;
+ out = &out[16 * BLAKE3_OUT_LEN];
+ }
+ while (num_inputs >= 8) {
+ blake3_hash8_avx512(inputs, blocks, key, counter, increment_counter, flags,
+ flags_start, flags_end, out);
+ if (increment_counter) {
+ counter += 8;
+ }
+ inputs += 8;
+ num_inputs -= 8;
+ out = &out[8 * BLAKE3_OUT_LEN];
+ }
+ while (num_inputs >= 4) {
+ blake3_hash4_avx512(inputs, blocks, key, counter, increment_counter, flags,
+ flags_start, flags_end, out);
+ if (increment_counter) {
+ counter += 4;
+ }
+ inputs += 4;
+ num_inputs -= 4;
+ out = &out[4 * BLAKE3_OUT_LEN];
+ }
+ while (num_inputs > 0) {
+ hash_one_avx512(inputs[0], blocks, key, counter, flags, flags_start,
+ flags_end, out);
+ if (increment_counter) {
+ counter += 1;
+ }
+ inputs += 1;
+ num_inputs -= 1;
+ out = &out[BLAKE3_OUT_LEN];
+ }
+}
diff --git a/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_unix.S b/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_unix.S
new file mode 100644
index 000000000000..3afc0e2250e2
--- /dev/null
+++ b/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_unix.S
@@ -0,0 +1,2601 @@
+#if defined(__x86_64__)
+
+#if defined(__ELF__) && defined(__linux__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
+#if defined(__ELF__) && defined(__CET__) && defined(__has_include)
+#if __has_include(<cet.h>)
+#include <cet.h>
+#endif
+#endif
+
+#if !defined(_CET_ENDBR)
+#define _CET_ENDBR
+#endif
+
+#ifdef __APPLE__
+#define HIDDEN .private_extern
+#else
+#define HIDDEN .hidden
+#endif
+
+.intel_syntax noprefix
+HIDDEN _blake3_hash_many_avx512
+HIDDEN blake3_hash_many_avx512
+HIDDEN blake3_compress_in_place_avx512
+HIDDEN _blake3_compress_in_place_avx512
+HIDDEN blake3_compress_xof_avx512
+HIDDEN _blake3_compress_xof_avx512
+.global _blake3_hash_many_avx512
+.global blake3_hash_many_avx512
+.global blake3_compress_in_place_avx512
+.global _blake3_compress_in_place_avx512
+.global blake3_compress_xof_avx512
+.global _blake3_compress_xof_avx512
+
+#ifdef __APPLE__
+.text
+#else
+.section .text
+#endif
+.p2align 6
+_blake3_hash_many_avx512:
+blake3_hash_many_avx512:
+ _CET_ENDBR
+ push r15
+ push r14
+ push r13
+ push r12
+ push rbx
+ push rbp
+ mov rbp, rsp
+ sub rsp, 144
+ and rsp, 0xFFFFFFFFFFFFFFC0
+ neg r9
+ kmovw k1, r9d
+ vmovd xmm0, r8d
+ vpbroadcastd ymm0, xmm0
+ shr r8, 32
+ vmovd xmm1, r8d
+ vpbroadcastd ymm1, xmm1
+ vmovdqa ymm4, ymm1
+ vmovdqa ymm5, ymm1
+ vpaddd ymm2, ymm0, ymmword ptr [ADD0+rip]
+ vpaddd ymm3, ymm0, ymmword ptr [ADD0+32+rip]
+ vpcmpltud k2, ymm2, ymm0
+ vpcmpltud k3, ymm3, ymm0
+ vpaddd ymm4 {k2}, ymm4, dword ptr [ADD1+rip] {1to8}
+ vpaddd ymm5 {k3}, ymm5, dword ptr [ADD1+rip] {1to8}
+ knotw k2, k1
+ vmovdqa32 ymm2 {k2}, ymm0
+ vmovdqa32 ymm3 {k2}, ymm0
+ vmovdqa32 ymm4 {k2}, ymm1
+ vmovdqa32 ymm5 {k2}, ymm1
+ vmovdqa ymmword ptr [rsp], ymm2
+ vmovdqa ymmword ptr [rsp+0x1*0x20], ymm3
+ vmovdqa ymmword ptr [rsp+0x2*0x20], ymm4
+ vmovdqa ymmword ptr [rsp+0x3*0x20], ymm5
+ shl rdx, 6
+ mov qword ptr [rsp+0x80], rdx
+ cmp rsi, 16
+ jc 3f
+2:
+ vpbroadcastd zmm0, dword ptr [rcx]
+ vpbroadcastd zmm1, dword ptr [rcx+0x1*0x4]
+ vpbroadcastd zmm2, dword ptr [rcx+0x2*0x4]
+ vpbroadcastd zmm3, dword ptr [rcx+0x3*0x4]
+ vpbroadcastd zmm4, dword ptr [rcx+0x4*0x4]
+ vpbroadcastd zmm5, dword ptr [rcx+0x5*0x4]
+ vpbroadcastd zmm6, dword ptr [rcx+0x6*0x4]
+ vpbroadcastd zmm7, dword ptr [rcx+0x7*0x4]
+ movzx eax, byte ptr [rbp+0x38]
+ movzx ebx, byte ptr [rbp+0x40]
+ or eax, ebx
+ xor edx, edx
+.p2align 5
+9:
+ movzx ebx, byte ptr [rbp+0x48]
+ or ebx, eax
+ add rdx, 64
+ cmp rdx, qword ptr [rsp+0x80]
+ cmove eax, ebx
+ mov dword ptr [rsp+0x88], eax
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ mov r10, qword ptr [rdi+0x10]
+ mov r11, qword ptr [rdi+0x18]
+ mov r12, qword ptr [rdi+0x40]
+ mov r13, qword ptr [rdi+0x48]
+ mov r14, qword ptr [rdi+0x50]
+ mov r15, qword ptr [rdi+0x58]
+ vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20]
+ vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
+ vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20]
+ vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01
+ vpunpcklqdq zmm8, zmm16, zmm17
+ vpunpckhqdq zmm9, zmm16, zmm17
+ vmovdqu32 ymm18, ymmword ptr [rdx+r10-0x2*0x20]
+ vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01
+ vmovdqu32 ymm19, ymmword ptr [rdx+r11-0x2*0x20]
+ vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
+ vpunpcklqdq zmm10, zmm18, zmm19
+ vpunpckhqdq zmm11, zmm18, zmm19
+ mov r8, qword ptr [rdi+0x20]
+ mov r9, qword ptr [rdi+0x28]
+ mov r10, qword ptr [rdi+0x30]
+ mov r11, qword ptr [rdi+0x38]
+ mov r12, qword ptr [rdi+0x60]
+ mov r13, qword ptr [rdi+0x68]
+ mov r14, qword ptr [rdi+0x70]
+ mov r15, qword ptr [rdi+0x78]
+ vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20]
+ vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
+ vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20]
+ vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01
+ vpunpcklqdq zmm12, zmm16, zmm17
+ vpunpckhqdq zmm13, zmm16, zmm17
+ vmovdqu32 ymm18, ymmword ptr [rdx+r10-0x2*0x20]
+ vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01
+ vmovdqu32 ymm19, ymmword ptr [rdx+r11-0x2*0x20]
+ vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
+ vpunpcklqdq zmm14, zmm18, zmm19
+ vpunpckhqdq zmm15, zmm18, zmm19
+ vmovdqa32 zmm27, zmmword ptr [INDEX0+rip]
+ vmovdqa32 zmm31, zmmword ptr [INDEX1+rip]
+ vshufps zmm16, zmm8, zmm10, 136
+ vshufps zmm17, zmm12, zmm14, 136
+ vmovdqa32 zmm20, zmm16
+ vpermt2d zmm16, zmm27, zmm17
+ vpermt2d zmm20, zmm31, zmm17
+ vshufps zmm17, zmm8, zmm10, 221
+ vshufps zmm30, zmm12, zmm14, 221
+ vmovdqa32 zmm21, zmm17
+ vpermt2d zmm17, zmm27, zmm30
+ vpermt2d zmm21, zmm31, zmm30
+ vshufps zmm18, zmm9, zmm11, 136
+ vshufps zmm8, zmm13, zmm15, 136
+ vmovdqa32 zmm22, zmm18
+ vpermt2d zmm18, zmm27, zmm8
+ vpermt2d zmm22, zmm31, zmm8
+ vshufps zmm19, zmm9, zmm11, 221
+ vshufps zmm8, zmm13, zmm15, 221
+ vmovdqa32 zmm23, zmm19
+ vpermt2d zmm19, zmm27, zmm8
+ vpermt2d zmm23, zmm31, zmm8
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ mov r10, qword ptr [rdi+0x10]
+ mov r11, qword ptr [rdi+0x18]
+ mov r12, qword ptr [rdi+0x40]
+ mov r13, qword ptr [rdi+0x48]
+ mov r14, qword ptr [rdi+0x50]
+ mov r15, qword ptr [rdi+0x58]
+ vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20]
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
+ vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20]
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01
+ vpunpcklqdq zmm8, zmm24, zmm25
+ vpunpckhqdq zmm9, zmm24, zmm25
+ vmovdqu32 ymm24, ymmword ptr [r10+rdx-0x1*0x20]
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01
+ vmovdqu32 ymm25, ymmword ptr [r11+rdx-0x1*0x20]
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01
+ vpunpcklqdq zmm10, zmm24, zmm25
+ vpunpckhqdq zmm11, zmm24, zmm25
+ prefetcht0 [r8+rdx+0x80]
+ prefetcht0 [r12+rdx+0x80]
+ prefetcht0 [r9+rdx+0x80]
+ prefetcht0 [r13+rdx+0x80]
+ prefetcht0 [r10+rdx+0x80]
+ prefetcht0 [r14+rdx+0x80]
+ prefetcht0 [r11+rdx+0x80]
+ prefetcht0 [r15+rdx+0x80]
+ mov r8, qword ptr [rdi+0x20]
+ mov r9, qword ptr [rdi+0x28]
+ mov r10, qword ptr [rdi+0x30]
+ mov r11, qword ptr [rdi+0x38]
+ mov r12, qword ptr [rdi+0x60]
+ mov r13, qword ptr [rdi+0x68]
+ mov r14, qword ptr [rdi+0x70]
+ mov r15, qword ptr [rdi+0x78]
+ vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20]
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
+ vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20]
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01
+ vpunpcklqdq zmm12, zmm24, zmm25
+ vpunpckhqdq zmm13, zmm24, zmm25
+ vmovdqu32 ymm24, ymmword ptr [r10+rdx-0x1*0x20]
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01
+ vmovdqu32 ymm25, ymmword ptr [r11+rdx-0x1*0x20]
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01
+ vpunpcklqdq zmm14, zmm24, zmm25
+ vpunpckhqdq zmm15, zmm24, zmm25
+ prefetcht0 [r8+rdx+0x80]
+ prefetcht0 [r12+rdx+0x80]
+ prefetcht0 [r9+rdx+0x80]
+ prefetcht0 [r13+rdx+0x80]
+ prefetcht0 [r10+rdx+0x80]
+ prefetcht0 [r14+rdx+0x80]
+ prefetcht0 [r11+rdx+0x80]
+ prefetcht0 [r15+rdx+0x80]
+ vshufps zmm24, zmm8, zmm10, 136
+ vshufps zmm30, zmm12, zmm14, 136
+ vmovdqa32 zmm28, zmm24
+ vpermt2d zmm24, zmm27, zmm30
+ vpermt2d zmm28, zmm31, zmm30
+ vshufps zmm25, zmm8, zmm10, 221
+ vshufps zmm30, zmm12, zmm14, 221
+ vmovdqa32 zmm29, zmm25
+ vpermt2d zmm25, zmm27, zmm30
+ vpermt2d zmm29, zmm31, zmm30
+ vshufps zmm26, zmm9, zmm11, 136
+ vshufps zmm8, zmm13, zmm15, 136
+ vmovdqa32 zmm30, zmm26
+ vpermt2d zmm26, zmm27, zmm8
+ vpermt2d zmm30, zmm31, zmm8
+ vshufps zmm8, zmm9, zmm11, 221
+ vshufps zmm10, zmm13, zmm15, 221
+ vpermi2d zmm27, zmm8, zmm10
+ vpermi2d zmm31, zmm8, zmm10
+ vpbroadcastd zmm8, dword ptr [BLAKE3_IV_0+rip]
+ vpbroadcastd zmm9, dword ptr [BLAKE3_IV_1+rip]
+ vpbroadcastd zmm10, dword ptr [BLAKE3_IV_2+rip]
+ vpbroadcastd zmm11, dword ptr [BLAKE3_IV_3+rip]
+ vmovdqa32 zmm12, zmmword ptr [rsp]
+ vmovdqa32 zmm13, zmmword ptr [rsp+0x1*0x40]
+ vpbroadcastd zmm14, dword ptr [BLAKE3_BLOCK_LEN+rip]
+ vpbroadcastd zmm15, dword ptr [rsp+0x22*0x4]
+ vpaddd zmm0, zmm0, zmm16
+ vpaddd zmm1, zmm1, zmm18
+ vpaddd zmm2, zmm2, zmm20
+ vpaddd zmm3, zmm3, zmm22
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm17
+ vpaddd zmm1, zmm1, zmm19
+ vpaddd zmm2, zmm2, zmm21
+ vpaddd zmm3, zmm3, zmm23
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm24
+ vpaddd zmm1, zmm1, zmm26
+ vpaddd zmm2, zmm2, zmm28
+ vpaddd zmm3, zmm3, zmm30
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm25
+ vpaddd zmm1, zmm1, zmm27
+ vpaddd zmm2, zmm2, zmm29
+ vpaddd zmm3, zmm3, zmm31
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpaddd zmm0, zmm0, zmm18
+ vpaddd zmm1, zmm1, zmm19
+ vpaddd zmm2, zmm2, zmm23
+ vpaddd zmm3, zmm3, zmm20
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm22
+ vpaddd zmm1, zmm1, zmm26
+ vpaddd zmm2, zmm2, zmm16
+ vpaddd zmm3, zmm3, zmm29
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm17
+ vpaddd zmm1, zmm1, zmm28
+ vpaddd zmm2, zmm2, zmm25
+ vpaddd zmm3, zmm3, zmm31
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm27
+ vpaddd zmm1, zmm1, zmm21
+ vpaddd zmm2, zmm2, zmm30
+ vpaddd zmm3, zmm3, zmm24
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpaddd zmm0, zmm0, zmm19
+ vpaddd zmm1, zmm1, zmm26
+ vpaddd zmm2, zmm2, zmm29
+ vpaddd zmm3, zmm3, zmm23
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm20
+ vpaddd zmm1, zmm1, zmm28
+ vpaddd zmm2, zmm2, zmm18
+ vpaddd zmm3, zmm3, zmm30
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm22
+ vpaddd zmm1, zmm1, zmm25
+ vpaddd zmm2, zmm2, zmm27
+ vpaddd zmm3, zmm3, zmm24
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm21
+ vpaddd zmm1, zmm1, zmm16
+ vpaddd zmm2, zmm2, zmm31
+ vpaddd zmm3, zmm3, zmm17
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpaddd zmm0, zmm0, zmm26
+ vpaddd zmm1, zmm1, zmm28
+ vpaddd zmm2, zmm2, zmm30
+ vpaddd zmm3, zmm3, zmm29
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm23
+ vpaddd zmm1, zmm1, zmm25
+ vpaddd zmm2, zmm2, zmm19
+ vpaddd zmm3, zmm3, zmm31
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm20
+ vpaddd zmm1, zmm1, zmm27
+ vpaddd zmm2, zmm2, zmm21
+ vpaddd zmm3, zmm3, zmm17
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm16
+ vpaddd zmm1, zmm1, zmm18
+ vpaddd zmm2, zmm2, zmm24
+ vpaddd zmm3, zmm3, zmm22
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpaddd zmm0, zmm0, zmm28
+ vpaddd zmm1, zmm1, zmm25
+ vpaddd zmm2, zmm2, zmm31
+ vpaddd zmm3, zmm3, zmm30
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm29
+ vpaddd zmm1, zmm1, zmm27
+ vpaddd zmm2, zmm2, zmm26
+ vpaddd zmm3, zmm3, zmm24
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm23
+ vpaddd zmm1, zmm1, zmm21
+ vpaddd zmm2, zmm2, zmm16
+ vpaddd zmm3, zmm3, zmm22
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm18
+ vpaddd zmm1, zmm1, zmm19
+ vpaddd zmm2, zmm2, zmm17
+ vpaddd zmm3, zmm3, zmm20
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpaddd zmm0, zmm0, zmm25
+ vpaddd zmm1, zmm1, zmm27
+ vpaddd zmm2, zmm2, zmm24
+ vpaddd zmm3, zmm3, zmm31
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm30
+ vpaddd zmm1, zmm1, zmm21
+ vpaddd zmm2, zmm2, zmm28
+ vpaddd zmm3, zmm3, zmm17
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm29
+ vpaddd zmm1, zmm1, zmm16
+ vpaddd zmm2, zmm2, zmm18
+ vpaddd zmm3, zmm3, zmm20
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm19
+ vpaddd zmm1, zmm1, zmm26
+ vpaddd zmm2, zmm2, zmm22
+ vpaddd zmm3, zmm3, zmm23
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpaddd zmm0, zmm0, zmm27
+ vpaddd zmm1, zmm1, zmm21
+ vpaddd zmm2, zmm2, zmm17
+ vpaddd zmm3, zmm3, zmm24
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm31
+ vpaddd zmm1, zmm1, zmm16
+ vpaddd zmm2, zmm2, zmm25
+ vpaddd zmm3, zmm3, zmm22
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm30
+ vpaddd zmm1, zmm1, zmm18
+ vpaddd zmm2, zmm2, zmm19
+ vpaddd zmm3, zmm3, zmm23
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm26
+ vpaddd zmm1, zmm1, zmm28
+ vpaddd zmm2, zmm2, zmm20
+ vpaddd zmm3, zmm3, zmm29
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpxord zmm0, zmm0, zmm8
+ vpxord zmm1, zmm1, zmm9
+ vpxord zmm2, zmm2, zmm10
+ vpxord zmm3, zmm3, zmm11
+ vpxord zmm4, zmm4, zmm12
+ vpxord zmm5, zmm5, zmm13
+ vpxord zmm6, zmm6, zmm14
+ vpxord zmm7, zmm7, zmm15
+ movzx eax, byte ptr [rbp+0x38]
+ jne 9b
+ mov rbx, qword ptr [rbp+0x50]
+ vpunpckldq zmm16, zmm0, zmm1
+ vpunpckhdq zmm17, zmm0, zmm1
+ vpunpckldq zmm18, zmm2, zmm3
+ vpunpckhdq zmm19, zmm2, zmm3
+ vpunpckldq zmm20, zmm4, zmm5
+ vpunpckhdq zmm21, zmm4, zmm5
+ vpunpckldq zmm22, zmm6, zmm7
+ vpunpckhdq zmm23, zmm6, zmm7
+ vpunpcklqdq zmm0, zmm16, zmm18
+ vpunpckhqdq zmm1, zmm16, zmm18
+ vpunpcklqdq zmm2, zmm17, zmm19
+ vpunpckhqdq zmm3, zmm17, zmm19
+ vpunpcklqdq zmm4, zmm20, zmm22
+ vpunpckhqdq zmm5, zmm20, zmm22
+ vpunpcklqdq zmm6, zmm21, zmm23
+ vpunpckhqdq zmm7, zmm21, zmm23
+ vshufi32x4 zmm16, zmm0, zmm4, 0x88
+ vshufi32x4 zmm17, zmm1, zmm5, 0x88
+ vshufi32x4 zmm18, zmm2, zmm6, 0x88
+ vshufi32x4 zmm19, zmm3, zmm7, 0x88
+ vshufi32x4 zmm20, zmm0, zmm4, 0xDD
+ vshufi32x4 zmm21, zmm1, zmm5, 0xDD
+ vshufi32x4 zmm22, zmm2, zmm6, 0xDD
+ vshufi32x4 zmm23, zmm3, zmm7, 0xDD
+ vshufi32x4 zmm0, zmm16, zmm17, 0x88
+ vshufi32x4 zmm1, zmm18, zmm19, 0x88
+ vshufi32x4 zmm2, zmm20, zmm21, 0x88
+ vshufi32x4 zmm3, zmm22, zmm23, 0x88
+ vshufi32x4 zmm4, zmm16, zmm17, 0xDD
+ vshufi32x4 zmm5, zmm18, zmm19, 0xDD
+ vshufi32x4 zmm6, zmm20, zmm21, 0xDD
+ vshufi32x4 zmm7, zmm22, zmm23, 0xDD
+ vmovdqu32 zmmword ptr [rbx], zmm0
+ vmovdqu32 zmmword ptr [rbx+0x1*0x40], zmm1
+ vmovdqu32 zmmword ptr [rbx+0x2*0x40], zmm2
+ vmovdqu32 zmmword ptr [rbx+0x3*0x40], zmm3
+ vmovdqu32 zmmword ptr [rbx+0x4*0x40], zmm4
+ vmovdqu32 zmmword ptr [rbx+0x5*0x40], zmm5
+ vmovdqu32 zmmword ptr [rbx+0x6*0x40], zmm6
+ vmovdqu32 zmmword ptr [rbx+0x7*0x40], zmm7
+ vmovdqa32 zmm0, zmmword ptr [rsp]
+ vmovdqa32 zmm1, zmmword ptr [rsp+0x1*0x40]
+ vmovdqa32 zmm2, zmm0
+ vpaddd zmm2{k1}, zmm0, dword ptr [ADD16+rip] {1to16}
+ vpcmpltud k2, zmm2, zmm0
+ vpaddd zmm1 {k2}, zmm1, dword ptr [ADD1+rip] {1to16}
+ vmovdqa32 zmmword ptr [rsp], zmm2
+ vmovdqa32 zmmword ptr [rsp+0x1*0x40], zmm1
+ add rdi, 128
+ add rbx, 512
+ mov qword ptr [rbp+0x50], rbx
+ sub rsi, 16
+ cmp rsi, 16
+ jnc 2b
+ test rsi, rsi
+ jnz 3f
+4:
+ vzeroupper
+ mov rsp, rbp
+ pop rbp
+ pop rbx
+ pop r12
+ pop r13
+ pop r14
+ pop r15
+ ret
+.p2align 6
+3:
+ test esi, 0x8
+ je 3f
+ vpbroadcastd ymm0, dword ptr [rcx]
+ vpbroadcastd ymm1, dword ptr [rcx+0x4]
+ vpbroadcastd ymm2, dword ptr [rcx+0x8]
+ vpbroadcastd ymm3, dword ptr [rcx+0xC]
+ vpbroadcastd ymm4, dword ptr [rcx+0x10]
+ vpbroadcastd ymm5, dword ptr [rcx+0x14]
+ vpbroadcastd ymm6, dword ptr [rcx+0x18]
+ vpbroadcastd ymm7, dword ptr [rcx+0x1C]
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ mov r10, qword ptr [rdi+0x10]
+ mov r11, qword ptr [rdi+0x18]
+ mov r12, qword ptr [rdi+0x20]
+ mov r13, qword ptr [rdi+0x28]
+ mov r14, qword ptr [rdi+0x30]
+ mov r15, qword ptr [rdi+0x38]
+ movzx eax, byte ptr [rbp+0x38]
+ movzx ebx, byte ptr [rbp+0x40]
+ or eax, ebx
+ xor edx, edx
+2:
+ movzx ebx, byte ptr [rbp+0x48]
+ or ebx, eax
+ add rdx, 64
+ cmp rdx, qword ptr [rsp+0x80]
+ cmove eax, ebx
+ mov dword ptr [rsp+0x88], eax
+ vmovups xmm8, xmmword ptr [r8+rdx-0x40]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x40], 0x01
+ vmovups xmm9, xmmword ptr [r9+rdx-0x40]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x40], 0x01
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-0x40]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x40], 0x01
+ vmovups xmm11, xmmword ptr [r11+rdx-0x40]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x40], 0x01
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm16, ymm12, ymm14, 136
+ vshufps ymm17, ymm12, ymm14, 221
+ vshufps ymm18, ymm13, ymm15, 136
+ vshufps ymm19, ymm13, ymm15, 221
+ vmovups xmm8, xmmword ptr [r8+rdx-0x30]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x30], 0x01
+ vmovups xmm9, xmmword ptr [r9+rdx-0x30]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x30], 0x01
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-0x30]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x30], 0x01
+ vmovups xmm11, xmmword ptr [r11+rdx-0x30]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x30], 0x01
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm20, ymm12, ymm14, 136
+ vshufps ymm21, ymm12, ymm14, 221
+ vshufps ymm22, ymm13, ymm15, 136
+ vshufps ymm23, ymm13, ymm15, 221
+ vmovups xmm8, xmmword ptr [r8+rdx-0x20]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x20], 0x01
+ vmovups xmm9, xmmword ptr [r9+rdx-0x20]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x20], 0x01
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-0x20]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x20], 0x01
+ vmovups xmm11, xmmword ptr [r11+rdx-0x20]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x20], 0x01
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm24, ymm12, ymm14, 136
+ vshufps ymm25, ymm12, ymm14, 221
+ vshufps ymm26, ymm13, ymm15, 136
+ vshufps ymm27, ymm13, ymm15, 221
+ vmovups xmm8, xmmword ptr [r8+rdx-0x10]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x10], 0x01
+ vmovups xmm9, xmmword ptr [r9+rdx-0x10]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x10], 0x01
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-0x10]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x10], 0x01
+ vmovups xmm11, xmmword ptr [r11+rdx-0x10]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x10], 0x01
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm28, ymm12, ymm14, 136
+ vshufps ymm29, ymm12, ymm14, 221
+ vshufps ymm30, ymm13, ymm15, 136
+ vshufps ymm31, ymm13, ymm15, 221
+ vpbroadcastd ymm8, dword ptr [BLAKE3_IV_0+rip]
+ vpbroadcastd ymm9, dword ptr [BLAKE3_IV_1+rip]
+ vpbroadcastd ymm10, dword ptr [BLAKE3_IV_2+rip]
+ vpbroadcastd ymm11, dword ptr [BLAKE3_IV_3+rip]
+ vmovdqa ymm12, ymmword ptr [rsp]
+ vmovdqa ymm13, ymmword ptr [rsp+0x40]
+ vpbroadcastd ymm14, dword ptr [BLAKE3_BLOCK_LEN+rip]
+ vpbroadcastd ymm15, dword ptr [rsp+0x88]
+ vpaddd ymm0, ymm0, ymm16
+ vpaddd ymm1, ymm1, ymm18
+ vpaddd ymm2, ymm2, ymm20
+ vpaddd ymm3, ymm3, ymm22
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm17
+ vpaddd ymm1, ymm1, ymm19
+ vpaddd ymm2, ymm2, ymm21
+ vpaddd ymm3, ymm3, ymm23
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm24
+ vpaddd ymm1, ymm1, ymm26
+ vpaddd ymm2, ymm2, ymm28
+ vpaddd ymm3, ymm3, ymm30
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm25
+ vpaddd ymm1, ymm1, ymm27
+ vpaddd ymm2, ymm2, ymm29
+ vpaddd ymm3, ymm3, ymm31
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpaddd ymm0, ymm0, ymm18
+ vpaddd ymm1, ymm1, ymm19
+ vpaddd ymm2, ymm2, ymm23
+ vpaddd ymm3, ymm3, ymm20
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm22
+ vpaddd ymm1, ymm1, ymm26
+ vpaddd ymm2, ymm2, ymm16
+ vpaddd ymm3, ymm3, ymm29
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm17
+ vpaddd ymm1, ymm1, ymm28
+ vpaddd ymm2, ymm2, ymm25
+ vpaddd ymm3, ymm3, ymm31
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm27
+ vpaddd ymm1, ymm1, ymm21
+ vpaddd ymm2, ymm2, ymm30
+ vpaddd ymm3, ymm3, ymm24
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpaddd ymm0, ymm0, ymm19
+ vpaddd ymm1, ymm1, ymm26
+ vpaddd ymm2, ymm2, ymm29
+ vpaddd ymm3, ymm3, ymm23
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm20
+ vpaddd ymm1, ymm1, ymm28
+ vpaddd ymm2, ymm2, ymm18
+ vpaddd ymm3, ymm3, ymm30
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm22
+ vpaddd ymm1, ymm1, ymm25
+ vpaddd ymm2, ymm2, ymm27
+ vpaddd ymm3, ymm3, ymm24
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm21
+ vpaddd ymm1, ymm1, ymm16
+ vpaddd ymm2, ymm2, ymm31
+ vpaddd ymm3, ymm3, ymm17
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpaddd ymm0, ymm0, ymm26
+ vpaddd ymm1, ymm1, ymm28
+ vpaddd ymm2, ymm2, ymm30
+ vpaddd ymm3, ymm3, ymm29
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm23
+ vpaddd ymm1, ymm1, ymm25
+ vpaddd ymm2, ymm2, ymm19
+ vpaddd ymm3, ymm3, ymm31
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm20
+ vpaddd ymm1, ymm1, ymm27
+ vpaddd ymm2, ymm2, ymm21
+ vpaddd ymm3, ymm3, ymm17
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm16
+ vpaddd ymm1, ymm1, ymm18
+ vpaddd ymm2, ymm2, ymm24
+ vpaddd ymm3, ymm3, ymm22
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpaddd ymm0, ymm0, ymm28
+ vpaddd ymm1, ymm1, ymm25
+ vpaddd ymm2, ymm2, ymm31
+ vpaddd ymm3, ymm3, ymm30
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm29
+ vpaddd ymm1, ymm1, ymm27
+ vpaddd ymm2, ymm2, ymm26
+ vpaddd ymm3, ymm3, ymm24
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm23
+ vpaddd ymm1, ymm1, ymm21
+ vpaddd ymm2, ymm2, ymm16
+ vpaddd ymm3, ymm3, ymm22
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm18
+ vpaddd ymm1, ymm1, ymm19
+ vpaddd ymm2, ymm2, ymm17
+ vpaddd ymm3, ymm3, ymm20
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpaddd ymm0, ymm0, ymm25
+ vpaddd ymm1, ymm1, ymm27
+ vpaddd ymm2, ymm2, ymm24
+ vpaddd ymm3, ymm3, ymm31
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm30
+ vpaddd ymm1, ymm1, ymm21
+ vpaddd ymm2, ymm2, ymm28
+ vpaddd ymm3, ymm3, ymm17
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm29
+ vpaddd ymm1, ymm1, ymm16
+ vpaddd ymm2, ymm2, ymm18
+ vpaddd ymm3, ymm3, ymm20
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm19
+ vpaddd ymm1, ymm1, ymm26
+ vpaddd ymm2, ymm2, ymm22
+ vpaddd ymm3, ymm3, ymm23
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpaddd ymm0, ymm0, ymm27
+ vpaddd ymm1, ymm1, ymm21
+ vpaddd ymm2, ymm2, ymm17
+ vpaddd ymm3, ymm3, ymm24
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm31
+ vpaddd ymm1, ymm1, ymm16
+ vpaddd ymm2, ymm2, ymm25
+ vpaddd ymm3, ymm3, ymm22
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm30
+ vpaddd ymm1, ymm1, ymm18
+ vpaddd ymm2, ymm2, ymm19
+ vpaddd ymm3, ymm3, ymm23
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm26
+ vpaddd ymm1, ymm1, ymm28
+ vpaddd ymm2, ymm2, ymm20
+ vpaddd ymm3, ymm3, ymm29
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpxor ymm0, ymm0, ymm8
+ vpxor ymm1, ymm1, ymm9
+ vpxor ymm2, ymm2, ymm10
+ vpxor ymm3, ymm3, ymm11
+ vpxor ymm4, ymm4, ymm12
+ vpxor ymm5, ymm5, ymm13
+ vpxor ymm6, ymm6, ymm14
+ vpxor ymm7, ymm7, ymm15
+ movzx eax, byte ptr [rbp+0x38]
+ jne 2b
+ mov rbx, qword ptr [rbp+0x50]
+ vunpcklps ymm8, ymm0, ymm1
+ vunpcklps ymm9, ymm2, ymm3
+ vunpckhps ymm10, ymm0, ymm1
+ vunpcklps ymm11, ymm4, ymm5
+ vunpcklps ymm0, ymm6, ymm7
+ vshufps ymm12, ymm8, ymm9, 78
+ vblendps ymm1, ymm8, ymm12, 0xCC
+ vshufps ymm8, ymm11, ymm0, 78
+ vunpckhps ymm13, ymm2, ymm3
+ vblendps ymm2, ymm11, ymm8, 0xCC
+ vblendps ymm3, ymm12, ymm9, 0xCC
+ vperm2f128 ymm12, ymm1, ymm2, 0x20
+ vmovups ymmword ptr [rbx], ymm12
+ vunpckhps ymm14, ymm4, ymm5
+ vblendps ymm4, ymm8, ymm0, 0xCC
+ vunpckhps ymm15, ymm6, ymm7
+ vperm2f128 ymm7, ymm3, ymm4, 0x20
+ vmovups ymmword ptr [rbx+0x20], ymm7
+ vshufps ymm5, ymm10, ymm13, 78
+ vblendps ymm6, ymm5, ymm13, 0xCC
+ vshufps ymm13, ymm14, ymm15, 78
+ vblendps ymm10, ymm10, ymm5, 0xCC
+ vblendps ymm14, ymm14, ymm13, 0xCC
+ vperm2f128 ymm8, ymm10, ymm14, 0x20
+ vmovups ymmword ptr [rbx+0x40], ymm8
+ vblendps ymm15, ymm13, ymm15, 0xCC
+ vperm2f128 ymm13, ymm6, ymm15, 0x20
+ vmovups ymmword ptr [rbx+0x60], ymm13
+ vperm2f128 ymm9, ymm1, ymm2, 0x31
+ vperm2f128 ymm11, ymm3, ymm4, 0x31
+ vmovups ymmword ptr [rbx+0x80], ymm9
+ vperm2f128 ymm14, ymm10, ymm14, 0x31
+ vperm2f128 ymm15, ymm6, ymm15, 0x31
+ vmovups ymmword ptr [rbx+0xA0], ymm11
+ vmovups ymmword ptr [rbx+0xC0], ymm14
+ vmovups ymmword ptr [rbx+0xE0], ymm15
+ vmovdqa ymm0, ymmword ptr [rsp]
+ vmovdqa ymm2, ymmword ptr [rsp+0x2*0x20]
+ vmovdqa32 ymm0 {k1}, ymmword ptr [rsp+0x1*0x20]
+ vmovdqa32 ymm2 {k1}, ymmword ptr [rsp+0x3*0x20]
+ vmovdqa ymmword ptr [rsp], ymm0
+ vmovdqa ymmword ptr [rsp+0x2*0x20], ymm2
+ add rbx, 256
+ mov qword ptr [rbp+0x50], rbx
+ add rdi, 64
+ sub rsi, 8
+3:
+ mov rbx, qword ptr [rbp+0x50]
+ mov r15, qword ptr [rsp+0x80]
+ movzx r13, byte ptr [rbp+0x38]
+ movzx r12, byte ptr [rbp+0x48]
+ test esi, 0x4
+ je 3f
+ vbroadcasti32x4 zmm0, xmmword ptr [rcx]
+ vbroadcasti32x4 zmm1, xmmword ptr [rcx+0x1*0x10]
+ vmovdqa xmm12, xmmword ptr [rsp]
+ vmovdqa xmm13, xmmword ptr [rsp+0x4*0x10]
+ vpunpckldq xmm14, xmm12, xmm13
+ vpunpckhdq xmm15, xmm12, xmm13
+ vpermq ymm14, ymm14, 0xDC
+ vpermq ymm15, ymm15, 0xDC
+ vpbroadcastd zmm12, dword ptr [BLAKE3_BLOCK_LEN+rip]
+ vinserti64x4 zmm13, zmm14, ymm15, 0x01
+ mov eax, 17476
+ kmovw k2, eax
+ vpblendmd zmm13 {k2}, zmm13, zmm12
+ vbroadcasti32x4 zmm15, xmmword ptr [BLAKE3_IV+rip]
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ mov r10, qword ptr [rdi+0x10]
+ mov r11, qword ptr [rdi+0x18]
+ mov eax, 43690
+ kmovw k3, eax
+ mov eax, 34952
+ kmovw k4, eax
+ movzx eax, byte ptr [rbp+0x40]
+ or eax, r13d
+ xor edx, edx
+.p2align 5
+2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ mov dword ptr [rsp+0x88], eax
+ vmovdqa32 zmm2, zmm15
+ vpbroadcastd zmm8, dword ptr [rsp+0x22*0x4]
+ vpblendmd zmm3 {k4}, zmm13, zmm8
+ vmovups zmm8, zmmword ptr [r8+rdx-0x1*0x40]
+ vinserti32x4 zmm8, zmm8, xmmword ptr [r9+rdx-0x4*0x10], 0x01
+ vinserti32x4 zmm8, zmm8, xmmword ptr [r10+rdx-0x4*0x10], 0x02
+ vinserti32x4 zmm8, zmm8, xmmword ptr [r11+rdx-0x4*0x10], 0x03
+ vmovups zmm9, zmmword ptr [r8+rdx-0x30]
+ vinserti32x4 zmm9, zmm9, xmmword ptr [r9+rdx-0x3*0x10], 0x01
+ vinserti32x4 zmm9, zmm9, xmmword ptr [r10+rdx-0x3*0x10], 0x02
+ vinserti32x4 zmm9, zmm9, xmmword ptr [r11+rdx-0x3*0x10], 0x03
+ vshufps zmm4, zmm8, zmm9, 136
+ vshufps zmm5, zmm8, zmm9, 221
+ vmovups zmm8, zmmword ptr [r8+rdx-0x20]
+ vinserti32x4 zmm8, zmm8, xmmword ptr [r9+rdx-0x2*0x10], 0x01
+ vinserti32x4 zmm8, zmm8, xmmword ptr [r10+rdx-0x2*0x10], 0x02
+ vinserti32x4 zmm8, zmm8, xmmword ptr [r11+rdx-0x2*0x10], 0x03
+ vmovups zmm9, zmmword ptr [r8+rdx-0x10]
+ vinserti32x4 zmm9, zmm9, xmmword ptr [r9+rdx-0x1*0x10], 0x01
+ vinserti32x4 zmm9, zmm9, xmmword ptr [r10+rdx-0x1*0x10], 0x02
+ vinserti32x4 zmm9, zmm9, xmmword ptr [r11+rdx-0x1*0x10], 0x03
+ vshufps zmm6, zmm8, zmm9, 136
+ vshufps zmm7, zmm8, zmm9, 221
+ vpshufd zmm6, zmm6, 0x93
+ vpshufd zmm7, zmm7, 0x93
+ mov al, 7
+9:
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm0, zmm0, zmm1
+ vpxord zmm3, zmm3, zmm0
+ vprord zmm3, zmm3, 16
+ vpaddd zmm2, zmm2, zmm3
+ vpxord zmm1, zmm1, zmm2
+ vprord zmm1, zmm1, 12
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm0, zmm0, zmm1
+ vpxord zmm3, zmm3, zmm0
+ vprord zmm3, zmm3, 8
+ vpaddd zmm2, zmm2, zmm3
+ vpxord zmm1, zmm1, zmm2
+ vprord zmm1, zmm1, 7
+ vpshufd zmm0, zmm0, 0x93
+ vpshufd zmm3, zmm3, 0x4E
+ vpshufd zmm2, zmm2, 0x39
+ vpaddd zmm0, zmm0, zmm6
+ vpaddd zmm0, zmm0, zmm1
+ vpxord zmm3, zmm3, zmm0
+ vprord zmm3, zmm3, 16
+ vpaddd zmm2, zmm2, zmm3
+ vpxord zmm1, zmm1, zmm2
+ vprord zmm1, zmm1, 12
+ vpaddd zmm0, zmm0, zmm7
+ vpaddd zmm0, zmm0, zmm1
+ vpxord zmm3, zmm3, zmm0
+ vprord zmm3, zmm3, 8
+ vpaddd zmm2, zmm2, zmm3
+ vpxord zmm1, zmm1, zmm2
+ vprord zmm1, zmm1, 7
+ vpshufd zmm0, zmm0, 0x39
+ vpshufd zmm3, zmm3, 0x4E
+ vpshufd zmm2, zmm2, 0x93
+ dec al
+ jz 9f
+ vshufps zmm8, zmm4, zmm5, 214
+ vpshufd zmm9, zmm4, 0x0F
+ vpshufd zmm4, zmm8, 0x39
+ vshufps zmm8, zmm6, zmm7, 250
+ vpblendmd zmm9 {k3}, zmm9, zmm8
+ vpunpcklqdq zmm8, zmm7, zmm5
+ vpblendmd zmm8 {k4}, zmm8, zmm6
+ vpshufd zmm8, zmm8, 0x78
+ vpunpckhdq zmm5, zmm5, zmm7
+ vpunpckldq zmm6, zmm6, zmm5
+ vpshufd zmm7, zmm6, 0x1E
+ vmovdqa32 zmm5, zmm9
+ vmovdqa32 zmm6, zmm8
+ jmp 9b
+9:
+ vpxord zmm0, zmm0, zmm2
+ vpxord zmm1, zmm1, zmm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne 2b
+ vmovdqu xmmword ptr [rbx], xmm0
+ vmovdqu xmmword ptr [rbx+0x10], xmm1
+ vextracti128 xmmword ptr [rbx+0x20], ymm0, 0x01
+ vextracti128 xmmword ptr [rbx+0x30], ymm1, 0x01
+ vextracti32x4 xmmword ptr [rbx+0x4*0x10], zmm0, 0x02
+ vextracti32x4 xmmword ptr [rbx+0x5*0x10], zmm1, 0x02
+ vextracti32x4 xmmword ptr [rbx+0x6*0x10], zmm0, 0x03
+ vextracti32x4 xmmword ptr [rbx+0x7*0x10], zmm1, 0x03
+ vmovdqa xmm0, xmmword ptr [rsp]
+ vmovdqa xmm2, xmmword ptr [rsp+0x40]
+ vmovdqa32 xmm0 {k1}, xmmword ptr [rsp+0x1*0x10]
+ vmovdqa32 xmm2 {k1}, xmmword ptr [rsp+0x5*0x10]
+ vmovdqa xmmword ptr [rsp], xmm0
+ vmovdqa xmmword ptr [rsp+0x40], xmm2
+ add rbx, 128
+ add rdi, 32
+ sub rsi, 4
+3:
+ test esi, 0x2
+ je 3f
+ vbroadcasti128 ymm0, xmmword ptr [rcx]
+ vbroadcasti128 ymm1, xmmword ptr [rcx+0x10]
+ vmovd xmm13, dword ptr [rsp]
+ vpinsrd xmm13, xmm13, dword ptr [rsp+0x40], 1
+ vpinsrd xmm13, xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
+ vmovd xmm14, dword ptr [rsp+0x4]
+ vpinsrd xmm14, xmm14, dword ptr [rsp+0x44], 1
+ vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
+ vinserti128 ymm13, ymm13, xmm14, 0x01
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ movzx eax, byte ptr [rbp+0x40]
+ or eax, r13d
+ xor edx, edx
+.p2align 5
+2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ mov dword ptr [rsp+0x88], eax
+ vbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV+rip]
+ vpbroadcastd ymm8, dword ptr [rsp+0x88]
+ vpblendd ymm3, ymm13, ymm8, 0x88
+ vmovups ymm8, ymmword ptr [r8+rdx-0x40]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-0x40], 0x01
+ vmovups ymm9, ymmword ptr [r8+rdx-0x30]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-0x30], 0x01
+ vshufps ymm4, ymm8, ymm9, 136
+ vshufps ymm5, ymm8, ymm9, 221
+ vmovups ymm8, ymmword ptr [r8+rdx-0x20]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-0x20], 0x01
+ vmovups ymm9, ymmword ptr [r8+rdx-0x10]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-0x10], 0x01
+ vshufps ymm6, ymm8, ymm9, 136
+ vshufps ymm7, ymm8, ymm9, 221
+ vpshufd ymm6, ymm6, 0x93
+ vpshufd ymm7, ymm7, 0x93
+ mov al, 7
+9:
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm0, ymm0, ymm1
+ vpxord ymm3, ymm3, ymm0
+ vprord ymm3, ymm3, 16
+ vpaddd ymm2, ymm2, ymm3
+ vpxord ymm1, ymm1, ymm2
+ vprord ymm1, ymm1, 12
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm0, ymm0, ymm1
+ vpxord ymm3, ymm3, ymm0
+ vprord ymm3, ymm3, 8
+ vpaddd ymm2, ymm2, ymm3
+ vpxord ymm1, ymm1, ymm2
+ vprord ymm1, ymm1, 7
+ vpshufd ymm0, ymm0, 0x93
+ vpshufd ymm3, ymm3, 0x4E
+ vpshufd ymm2, ymm2, 0x39
+ vpaddd ymm0, ymm0, ymm6
+ vpaddd ymm0, ymm0, ymm1
+ vpxord ymm3, ymm3, ymm0
+ vprord ymm3, ymm3, 16
+ vpaddd ymm2, ymm2, ymm3
+ vpxord ymm1, ymm1, ymm2
+ vprord ymm1, ymm1, 12
+ vpaddd ymm0, ymm0, ymm7
+ vpaddd ymm0, ymm0, ymm1
+ vpxord ymm3, ymm3, ymm0
+ vprord ymm3, ymm3, 8
+ vpaddd ymm2, ymm2, ymm3
+ vpxord ymm1, ymm1, ymm2
+ vprord ymm1, ymm1, 7
+ vpshufd ymm0, ymm0, 0x39
+ vpshufd ymm3, ymm3, 0x4E
+ vpshufd ymm2, ymm2, 0x93
+ dec al
+ jz 9f
+ vshufps ymm8, ymm4, ymm5, 214
+ vpshufd ymm9, ymm4, 0x0F
+ vpshufd ymm4, ymm8, 0x39
+ vshufps ymm8, ymm6, ymm7, 250
+ vpblendd ymm9, ymm9, ymm8, 0xAA
+ vpunpcklqdq ymm8, ymm7, ymm5
+ vpblendd ymm8, ymm8, ymm6, 0x88
+ vpshufd ymm8, ymm8, 0x78
+ vpunpckhdq ymm5, ymm5, ymm7
+ vpunpckldq ymm6, ymm6, ymm5
+ vpshufd ymm7, ymm6, 0x1E
+ vmovdqa ymm5, ymm9
+ vmovdqa ymm6, ymm8
+ jmp 9b
+9:
+ vpxor ymm0, ymm0, ymm2
+ vpxor ymm1, ymm1, ymm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne 2b
+ vmovdqu xmmword ptr [rbx], xmm0
+ vmovdqu xmmword ptr [rbx+0x10], xmm1
+ vextracti128 xmmword ptr [rbx+0x20], ymm0, 0x01
+ vextracti128 xmmword ptr [rbx+0x30], ymm1, 0x01
+ vmovdqa xmm0, xmmword ptr [rsp]
+ vmovdqa xmm2, xmmword ptr [rsp+0x4*0x10]
+ vmovdqu32 xmm0 {k1}, xmmword ptr [rsp+0x8]
+ vmovdqu32 xmm2 {k1}, xmmword ptr [rsp+0x48]
+ vmovdqa xmmword ptr [rsp], xmm0
+ vmovdqa xmmword ptr [rsp+0x4*0x10], xmm2
+ add rbx, 64
+ add rdi, 16
+ sub rsi, 2
+3:
+ test esi, 0x1
+ je 4b
+ vmovdqu xmm0, xmmword ptr [rcx]
+ vmovdqu xmm1, xmmword ptr [rcx+0x10]
+ vmovd xmm14, dword ptr [rsp]
+ vpinsrd xmm14, xmm14, dword ptr [rsp+0x40], 1
+ vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
+ vmovdqa xmm15, xmmword ptr [BLAKE3_IV+rip]
+ mov r8, qword ptr [rdi]
+ movzx eax, byte ptr [rbp+0x40]
+ or eax, r13d
+ xor edx, edx
+.p2align 5
+2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ vpinsrd xmm3, xmm14, eax, 3
+ vmovdqa xmm2, xmm15
+ vmovups xmm8, xmmword ptr [r8+rdx-0x40]
+ vmovups xmm9, xmmword ptr [r8+rdx-0x30]
+ vshufps xmm4, xmm8, xmm9, 136
+ vshufps xmm5, xmm8, xmm9, 221
+ vmovups xmm8, xmmword ptr [r8+rdx-0x20]
+ vmovups xmm9, xmmword ptr [r8+rdx-0x10]
+ vshufps xmm6, xmm8, xmm9, 136
+ vshufps xmm7, xmm8, xmm9, 221
+ vpshufd xmm6, xmm6, 0x93
+ vpshufd xmm7, xmm7, 0x93
+ mov al, 7
+9:
+ vpaddd xmm0, xmm0, xmm4
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 16
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 12
+ vpaddd xmm0, xmm0, xmm5
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 8
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 7
+ vpshufd xmm0, xmm0, 0x93
+ vpshufd xmm3, xmm3, 0x4E
+ vpshufd xmm2, xmm2, 0x39
+ vpaddd xmm0, xmm0, xmm6
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 16
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 12
+ vpaddd xmm0, xmm0, xmm7
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 8
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 7
+ vpshufd xmm0, xmm0, 0x39
+ vpshufd xmm3, xmm3, 0x4E
+ vpshufd xmm2, xmm2, 0x93
+ dec al
+ jz 9f
+ vshufps xmm8, xmm4, xmm5, 214
+ vpshufd xmm9, xmm4, 0x0F
+ vpshufd xmm4, xmm8, 0x39
+ vshufps xmm8, xmm6, xmm7, 250
+ vpblendd xmm9, xmm9, xmm8, 0xAA
+ vpunpcklqdq xmm8, xmm7, xmm5
+ vpblendd xmm8, xmm8, xmm6, 0x88
+ vpshufd xmm8, xmm8, 0x78
+ vpunpckhdq xmm5, xmm5, xmm7
+ vpunpckldq xmm6, xmm6, xmm5
+ vpshufd xmm7, xmm6, 0x1E
+ vmovdqa xmm5, xmm9
+ vmovdqa xmm6, xmm8
+ jmp 9b
+9:
+ vpxor xmm0, xmm0, xmm2
+ vpxor xmm1, xmm1, xmm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne 2b
+ vmovdqu xmmword ptr [rbx], xmm0
+ vmovdqu xmmword ptr [rbx+0x10], xmm1
+ jmp 4b
+.p2align 6
+_blake3_compress_in_place_avx512:
+blake3_compress_in_place_avx512:
+ _CET_ENDBR
+ vmovdqu xmm0, xmmword ptr [rdi]
+ vmovdqu xmm1, xmmword ptr [rdi+0x10]
+ movzx eax, r8b
+ movzx edx, dl
+ shl rax, 32
+ add rdx, rax
+ vmovq xmm3, rcx
+ vmovq xmm4, rdx
+ vpunpcklqdq xmm3, xmm3, xmm4
+ vmovaps xmm2, xmmword ptr [BLAKE3_IV+rip]
+ vmovups xmm8, xmmword ptr [rsi]
+ vmovups xmm9, xmmword ptr [rsi+0x10]
+ vshufps xmm4, xmm8, xmm9, 136
+ vshufps xmm5, xmm8, xmm9, 221
+ vmovups xmm8, xmmword ptr [rsi+0x20]
+ vmovups xmm9, xmmword ptr [rsi+0x30]
+ vshufps xmm6, xmm8, xmm9, 136
+ vshufps xmm7, xmm8, xmm9, 221
+ vpshufd xmm6, xmm6, 0x93
+ vpshufd xmm7, xmm7, 0x93
+ mov al, 7
+9:
+ vpaddd xmm0, xmm0, xmm4
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 16
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 12
+ vpaddd xmm0, xmm0, xmm5
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 8
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 7
+ vpshufd xmm0, xmm0, 0x93
+ vpshufd xmm3, xmm3, 0x4E
+ vpshufd xmm2, xmm2, 0x39
+ vpaddd xmm0, xmm0, xmm6
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 16
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 12
+ vpaddd xmm0, xmm0, xmm7
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 8
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 7
+ vpshufd xmm0, xmm0, 0x39
+ vpshufd xmm3, xmm3, 0x4E
+ vpshufd xmm2, xmm2, 0x93
+ dec al
+ jz 9f
+ vshufps xmm8, xmm4, xmm5, 214
+ vpshufd xmm9, xmm4, 0x0F
+ vpshufd xmm4, xmm8, 0x39
+ vshufps xmm8, xmm6, xmm7, 250
+ vpblendd xmm9, xmm9, xmm8, 0xAA
+ vpunpcklqdq xmm8, xmm7, xmm5
+ vpblendd xmm8, xmm8, xmm6, 0x88
+ vpshufd xmm8, xmm8, 0x78
+ vpunpckhdq xmm5, xmm5, xmm7
+ vpunpckldq xmm6, xmm6, xmm5
+ vpshufd xmm7, xmm6, 0x1E
+ vmovdqa xmm5, xmm9
+ vmovdqa xmm6, xmm8
+ jmp 9b
+9:
+ vpxor xmm0, xmm0, xmm2
+ vpxor xmm1, xmm1, xmm3
+ vmovdqu xmmword ptr [rdi], xmm0
+ vmovdqu xmmword ptr [rdi+0x10], xmm1
+ ret
+
+.p2align 6
+_blake3_compress_xof_avx512:
+blake3_compress_xof_avx512:
+ _CET_ENDBR
+ vmovdqu xmm0, xmmword ptr [rdi]
+ vmovdqu xmm1, xmmword ptr [rdi+0x10]
+ movzx eax, r8b
+ movzx edx, dl
+ shl rax, 32
+ add rdx, rax
+ vmovq xmm3, rcx
+ vmovq xmm4, rdx
+ vpunpcklqdq xmm3, xmm3, xmm4
+ vmovaps xmm2, xmmword ptr [BLAKE3_IV+rip]
+ vmovups xmm8, xmmword ptr [rsi]
+ vmovups xmm9, xmmword ptr [rsi+0x10]
+ vshufps xmm4, xmm8, xmm9, 136
+ vshufps xmm5, xmm8, xmm9, 221
+ vmovups xmm8, xmmword ptr [rsi+0x20]
+ vmovups xmm9, xmmword ptr [rsi+0x30]
+ vshufps xmm6, xmm8, xmm9, 136
+ vshufps xmm7, xmm8, xmm9, 221
+ vpshufd xmm6, xmm6, 0x93
+ vpshufd xmm7, xmm7, 0x93
+ mov al, 7
+9:
+ vpaddd xmm0, xmm0, xmm4
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 16
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 12
+ vpaddd xmm0, xmm0, xmm5
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 8
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 7
+ vpshufd xmm0, xmm0, 0x93
+ vpshufd xmm3, xmm3, 0x4E
+ vpshufd xmm2, xmm2, 0x39
+ vpaddd xmm0, xmm0, xmm6
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 16
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 12
+ vpaddd xmm0, xmm0, xmm7
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 8
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 7
+ vpshufd xmm0, xmm0, 0x39
+ vpshufd xmm3, xmm3, 0x4E
+ vpshufd xmm2, xmm2, 0x93
+ dec al
+ jz 9f
+ vshufps xmm8, xmm4, xmm5, 214
+ vpshufd xmm9, xmm4, 0x0F
+ vpshufd xmm4, xmm8, 0x39
+ vshufps xmm8, xmm6, xmm7, 250
+ vpblendd xmm9, xmm9, xmm8, 0xAA
+ vpunpcklqdq xmm8, xmm7, xmm5
+ vpblendd xmm8, xmm8, xmm6, 0x88
+ vpshufd xmm8, xmm8, 0x78
+ vpunpckhdq xmm5, xmm5, xmm7
+ vpunpckldq xmm6, xmm6, xmm5
+ vpshufd xmm7, xmm6, 0x1E
+ vmovdqa xmm5, xmm9
+ vmovdqa xmm6, xmm8
+ jmp 9b
+9:
+ vpxor xmm0, xmm0, xmm2
+ vpxor xmm1, xmm1, xmm3
+ vpxor xmm2, xmm2, [rdi]
+ vpxor xmm3, xmm3, [rdi+0x10]
+ vmovdqu xmmword ptr [r9], xmm0
+ vmovdqu xmmword ptr [r9+0x10], xmm1
+ vmovdqu xmmword ptr [r9+0x20], xmm2
+ vmovdqu xmmword ptr [r9+0x30], xmm3
+ ret
+
+#ifdef __APPLE__
+.static_data
+#else
+.section .rodata
+#endif
+.p2align 6
+INDEX0:
+ .long 0, 1, 2, 3, 16, 17, 18, 19
+ .long 8, 9, 10, 11, 24, 25, 26, 27
+INDEX1:
+ .long 4, 5, 6, 7, 20, 21, 22, 23
+ .long 12, 13, 14, 15, 28, 29, 30, 31
+ADD0:
+ .long 0, 1, 2, 3, 4, 5, 6, 7
+ .long 8, 9, 10, 11, 12, 13, 14, 15
+ADD1: .long 1
+
+ADD16: .long 16
+BLAKE3_BLOCK_LEN:
+ .long 64
+.p2align 6
+BLAKE3_IV:
+BLAKE3_IV_0:
+ .long 0x6A09E667
+BLAKE3_IV_1:
+ .long 0xBB67AE85
+BLAKE3_IV_2:
+ .long 0x3C6EF372
+BLAKE3_IV_3:
+ .long 0xA54FF53A
+
+#endif
diff --git a/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_gnu.S b/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_gnu.S
new file mode 100644
index 000000000000..e10b9f36cbcc
--- /dev/null
+++ b/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_gnu.S
@@ -0,0 +1,2615 @@
+.intel_syntax noprefix
+
+.global _blake3_hash_many_avx512
+.global blake3_hash_many_avx512
+.global blake3_compress_in_place_avx512
+.global _blake3_compress_in_place_avx512
+.global blake3_compress_xof_avx512
+.global _blake3_compress_xof_avx512
+
+.section .text
+.p2align 6
+_blake3_hash_many_avx512:
+blake3_hash_many_avx512:
+ push r15
+ push r14
+ push r13
+ push r12
+ push rdi
+ push rsi
+ push rbx
+ push rbp
+ mov rbp, rsp
+ sub rsp, 304
+ and rsp, 0xFFFFFFFFFFFFFFC0
+ vmovdqa xmmword ptr [rsp+0x90], xmm6
+ vmovdqa xmmword ptr [rsp+0xA0], xmm7
+ vmovdqa xmmword ptr [rsp+0xB0], xmm8
+ vmovdqa xmmword ptr [rsp+0xC0], xmm9
+ vmovdqa xmmword ptr [rsp+0xD0], xmm10
+ vmovdqa xmmword ptr [rsp+0xE0], xmm11
+ vmovdqa xmmword ptr [rsp+0xF0], xmm12
+ vmovdqa xmmword ptr [rsp+0x100], xmm13
+ vmovdqa xmmword ptr [rsp+0x110], xmm14
+ vmovdqa xmmword ptr [rsp+0x120], xmm15
+ mov rdi, rcx
+ mov rsi, rdx
+ mov rdx, r8
+ mov rcx, r9
+ mov r8, qword ptr [rbp+0x68]
+ movzx r9, byte ptr [rbp+0x70]
+ neg r9
+ kmovw k1, r9d
+ vmovd xmm0, r8d
+ vpbroadcastd ymm0, xmm0
+ shr r8, 32
+ vmovd xmm1, r8d
+ vpbroadcastd ymm1, xmm1
+ vmovdqa ymm4, ymm1
+ vmovdqa ymm5, ymm1
+ vpaddd ymm2, ymm0, ymmword ptr [ADD0+rip]
+ vpaddd ymm3, ymm0, ymmword ptr [ADD0+32+rip]
+ vpcmpltud k2, ymm2, ymm0
+ vpcmpltud k3, ymm3, ymm0
+ vpaddd ymm4 {k2}, ymm4, dword ptr [ADD1+rip] {1to8}
+ vpaddd ymm5 {k3}, ymm5, dword ptr [ADD1+rip] {1to8}
+ knotw k2, k1
+ vmovdqa32 ymm2 {k2}, ymm0
+ vmovdqa32 ymm3 {k2}, ymm0
+ vmovdqa32 ymm4 {k2}, ymm1
+ vmovdqa32 ymm5 {k2}, ymm1
+ vmovdqa ymmword ptr [rsp], ymm2
+ vmovdqa ymmword ptr [rsp+0x20], ymm3
+ vmovdqa ymmword ptr [rsp+0x40], ymm4
+ vmovdqa ymmword ptr [rsp+0x60], ymm5
+ shl rdx, 6
+ mov qword ptr [rsp+0x80], rdx
+ cmp rsi, 16
+ jc 3f
+2:
+ vpbroadcastd zmm0, dword ptr [rcx]
+ vpbroadcastd zmm1, dword ptr [rcx+0x1*0x4]
+ vpbroadcastd zmm2, dword ptr [rcx+0x2*0x4]
+ vpbroadcastd zmm3, dword ptr [rcx+0x3*0x4]
+ vpbroadcastd zmm4, dword ptr [rcx+0x4*0x4]
+ vpbroadcastd zmm5, dword ptr [rcx+0x5*0x4]
+ vpbroadcastd zmm6, dword ptr [rcx+0x6*0x4]
+ vpbroadcastd zmm7, dword ptr [rcx+0x7*0x4]
+ movzx eax, byte ptr [rbp+0x78]
+ movzx ebx, byte ptr [rbp+0x80]
+ or eax, ebx
+ xor edx, edx
+.p2align 5
+9:
+ movzx ebx, byte ptr [rbp+0x88]
+ or ebx, eax
+ add rdx, 64
+ cmp rdx, qword ptr [rsp+0x80]
+ cmove eax, ebx
+ mov dword ptr [rsp+0x88], eax
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ mov r10, qword ptr [rdi+0x10]
+ mov r11, qword ptr [rdi+0x18]
+ mov r12, qword ptr [rdi+0x40]
+ mov r13, qword ptr [rdi+0x48]
+ mov r14, qword ptr [rdi+0x50]
+ mov r15, qword ptr [rdi+0x58]
+ vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20]
+ vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
+ vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20]
+ vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01
+ vpunpcklqdq zmm8, zmm16, zmm17
+ vpunpckhqdq zmm9, zmm16, zmm17
+ vmovdqu32 ymm18, ymmword ptr [rdx+r10-0x2*0x20]
+ vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01
+ vmovdqu32 ymm19, ymmword ptr [rdx+r11-0x2*0x20]
+ vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
+ vpunpcklqdq zmm10, zmm18, zmm19
+ vpunpckhqdq zmm11, zmm18, zmm19
+ mov r8, qword ptr [rdi+0x20]
+ mov r9, qword ptr [rdi+0x28]
+ mov r10, qword ptr [rdi+0x30]
+ mov r11, qword ptr [rdi+0x38]
+ mov r12, qword ptr [rdi+0x60]
+ mov r13, qword ptr [rdi+0x68]
+ mov r14, qword ptr [rdi+0x70]
+ mov r15, qword ptr [rdi+0x78]
+ vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20]
+ vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
+ vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20]
+ vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01
+ vpunpcklqdq zmm12, zmm16, zmm17
+ vpunpckhqdq zmm13, zmm16, zmm17
+ vmovdqu32 ymm18, ymmword ptr [rdx+r10-0x2*0x20]
+ vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01
+ vmovdqu32 ymm19, ymmword ptr [rdx+r11-0x2*0x20]
+ vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
+ vpunpcklqdq zmm14, zmm18, zmm19
+ vpunpckhqdq zmm15, zmm18, zmm19
+ vmovdqa32 zmm27, zmmword ptr [INDEX0+rip]
+ vmovdqa32 zmm31, zmmword ptr [INDEX1+rip]
+ vshufps zmm16, zmm8, zmm10, 136
+ vshufps zmm17, zmm12, zmm14, 136
+ vmovdqa32 zmm20, zmm16
+ vpermt2d zmm16, zmm27, zmm17
+ vpermt2d zmm20, zmm31, zmm17
+ vshufps zmm17, zmm8, zmm10, 221
+ vshufps zmm30, zmm12, zmm14, 221
+ vmovdqa32 zmm21, zmm17
+ vpermt2d zmm17, zmm27, zmm30
+ vpermt2d zmm21, zmm31, zmm30
+ vshufps zmm18, zmm9, zmm11, 136
+ vshufps zmm8, zmm13, zmm15, 136
+ vmovdqa32 zmm22, zmm18
+ vpermt2d zmm18, zmm27, zmm8
+ vpermt2d zmm22, zmm31, zmm8
+ vshufps zmm19, zmm9, zmm11, 221
+ vshufps zmm8, zmm13, zmm15, 221
+ vmovdqa32 zmm23, zmm19
+ vpermt2d zmm19, zmm27, zmm8
+ vpermt2d zmm23, zmm31, zmm8
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ mov r10, qword ptr [rdi+0x10]
+ mov r11, qword ptr [rdi+0x18]
+ mov r12, qword ptr [rdi+0x40]
+ mov r13, qword ptr [rdi+0x48]
+ mov r14, qword ptr [rdi+0x50]
+ mov r15, qword ptr [rdi+0x58]
+ vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20]
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
+ vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20]
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01
+ vpunpcklqdq zmm8, zmm24, zmm25
+ vpunpckhqdq zmm9, zmm24, zmm25
+ vmovdqu32 ymm24, ymmword ptr [r10+rdx-0x1*0x20]
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01
+ vmovdqu32 ymm25, ymmword ptr [r11+rdx-0x1*0x20]
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01
+ vpunpcklqdq zmm10, zmm24, zmm25
+ vpunpckhqdq zmm11, zmm24, zmm25
+ prefetcht0 [r8+rdx+0x80]
+ prefetcht0 [r12+rdx+0x80]
+ prefetcht0 [r9+rdx+0x80]
+ prefetcht0 [r13+rdx+0x80]
+ prefetcht0 [r10+rdx+0x80]
+ prefetcht0 [r14+rdx+0x80]
+ prefetcht0 [r11+rdx+0x80]
+ prefetcht0 [r15+rdx+0x80]
+ mov r8, qword ptr [rdi+0x20]
+ mov r9, qword ptr [rdi+0x28]
+ mov r10, qword ptr [rdi+0x30]
+ mov r11, qword ptr [rdi+0x38]
+ mov r12, qword ptr [rdi+0x60]
+ mov r13, qword ptr [rdi+0x68]
+ mov r14, qword ptr [rdi+0x70]
+ mov r15, qword ptr [rdi+0x78]
+ vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20]
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
+ vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20]
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01
+ vpunpcklqdq zmm12, zmm24, zmm25
+ vpunpckhqdq zmm13, zmm24, zmm25
+ vmovdqu32 ymm24, ymmword ptr [r10+rdx-0x1*0x20]
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01
+ vmovdqu32 ymm25, ymmword ptr [r11+rdx-0x1*0x20]
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01
+ vpunpcklqdq zmm14, zmm24, zmm25
+ vpunpckhqdq zmm15, zmm24, zmm25
+ prefetcht0 [r8+rdx+0x80]
+ prefetcht0 [r12+rdx+0x80]
+ prefetcht0 [r9+rdx+0x80]
+ prefetcht0 [r13+rdx+0x80]
+ prefetcht0 [r10+rdx+0x80]
+ prefetcht0 [r14+rdx+0x80]
+ prefetcht0 [r11+rdx+0x80]
+ prefetcht0 [r15+rdx+0x80]
+ vshufps zmm24, zmm8, zmm10, 136
+ vshufps zmm30, zmm12, zmm14, 136
+ vmovdqa32 zmm28, zmm24
+ vpermt2d zmm24, zmm27, zmm30
+ vpermt2d zmm28, zmm31, zmm30
+ vshufps zmm25, zmm8, zmm10, 221
+ vshufps zmm30, zmm12, zmm14, 221
+ vmovdqa32 zmm29, zmm25
+ vpermt2d zmm25, zmm27, zmm30
+ vpermt2d zmm29, zmm31, zmm30
+ vshufps zmm26, zmm9, zmm11, 136
+ vshufps zmm8, zmm13, zmm15, 136
+ vmovdqa32 zmm30, zmm26
+ vpermt2d zmm26, zmm27, zmm8
+ vpermt2d zmm30, zmm31, zmm8
+ vshufps zmm8, zmm9, zmm11, 221
+ vshufps zmm10, zmm13, zmm15, 221
+ vpermi2d zmm27, zmm8, zmm10
+ vpermi2d zmm31, zmm8, zmm10
+ vpbroadcastd zmm8, dword ptr [BLAKE3_IV_0+rip]
+ vpbroadcastd zmm9, dword ptr [BLAKE3_IV_1+rip]
+ vpbroadcastd zmm10, dword ptr [BLAKE3_IV_2+rip]
+ vpbroadcastd zmm11, dword ptr [BLAKE3_IV_3+rip]
+ vmovdqa32 zmm12, zmmword ptr [rsp]
+ vmovdqa32 zmm13, zmmword ptr [rsp+0x1*0x40]
+ vpbroadcastd zmm14, dword ptr [BLAKE3_BLOCK_LEN+rip]
+ vpbroadcastd zmm15, dword ptr [rsp+0x22*0x4]
+ vpaddd zmm0, zmm0, zmm16
+ vpaddd zmm1, zmm1, zmm18
+ vpaddd zmm2, zmm2, zmm20
+ vpaddd zmm3, zmm3, zmm22
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm17
+ vpaddd zmm1, zmm1, zmm19
+ vpaddd zmm2, zmm2, zmm21
+ vpaddd zmm3, zmm3, zmm23
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm24
+ vpaddd zmm1, zmm1, zmm26
+ vpaddd zmm2, zmm2, zmm28
+ vpaddd zmm3, zmm3, zmm30
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm25
+ vpaddd zmm1, zmm1, zmm27
+ vpaddd zmm2, zmm2, zmm29
+ vpaddd zmm3, zmm3, zmm31
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpaddd zmm0, zmm0, zmm18
+ vpaddd zmm1, zmm1, zmm19
+ vpaddd zmm2, zmm2, zmm23
+ vpaddd zmm3, zmm3, zmm20
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm22
+ vpaddd zmm1, zmm1, zmm26
+ vpaddd zmm2, zmm2, zmm16
+ vpaddd zmm3, zmm3, zmm29
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm17
+ vpaddd zmm1, zmm1, zmm28
+ vpaddd zmm2, zmm2, zmm25
+ vpaddd zmm3, zmm3, zmm31
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm27
+ vpaddd zmm1, zmm1, zmm21
+ vpaddd zmm2, zmm2, zmm30
+ vpaddd zmm3, zmm3, zmm24
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpaddd zmm0, zmm0, zmm19
+ vpaddd zmm1, zmm1, zmm26
+ vpaddd zmm2, zmm2, zmm29
+ vpaddd zmm3, zmm3, zmm23
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm20
+ vpaddd zmm1, zmm1, zmm28
+ vpaddd zmm2, zmm2, zmm18
+ vpaddd zmm3, zmm3, zmm30
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm22
+ vpaddd zmm1, zmm1, zmm25
+ vpaddd zmm2, zmm2, zmm27
+ vpaddd zmm3, zmm3, zmm24
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm21
+ vpaddd zmm1, zmm1, zmm16
+ vpaddd zmm2, zmm2, zmm31
+ vpaddd zmm3, zmm3, zmm17
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpaddd zmm0, zmm0, zmm26
+ vpaddd zmm1, zmm1, zmm28
+ vpaddd zmm2, zmm2, zmm30
+ vpaddd zmm3, zmm3, zmm29
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm23
+ vpaddd zmm1, zmm1, zmm25
+ vpaddd zmm2, zmm2, zmm19
+ vpaddd zmm3, zmm3, zmm31
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm20
+ vpaddd zmm1, zmm1, zmm27
+ vpaddd zmm2, zmm2, zmm21
+ vpaddd zmm3, zmm3, zmm17
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm16
+ vpaddd zmm1, zmm1, zmm18
+ vpaddd zmm2, zmm2, zmm24
+ vpaddd zmm3, zmm3, zmm22
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpaddd zmm0, zmm0, zmm28
+ vpaddd zmm1, zmm1, zmm25
+ vpaddd zmm2, zmm2, zmm31
+ vpaddd zmm3, zmm3, zmm30
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm29
+ vpaddd zmm1, zmm1, zmm27
+ vpaddd zmm2, zmm2, zmm26
+ vpaddd zmm3, zmm3, zmm24
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm23
+ vpaddd zmm1, zmm1, zmm21
+ vpaddd zmm2, zmm2, zmm16
+ vpaddd zmm3, zmm3, zmm22
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm18
+ vpaddd zmm1, zmm1, zmm19
+ vpaddd zmm2, zmm2, zmm17
+ vpaddd zmm3, zmm3, zmm20
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpaddd zmm0, zmm0, zmm25
+ vpaddd zmm1, zmm1, zmm27
+ vpaddd zmm2, zmm2, zmm24
+ vpaddd zmm3, zmm3, zmm31
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm30
+ vpaddd zmm1, zmm1, zmm21
+ vpaddd zmm2, zmm2, zmm28
+ vpaddd zmm3, zmm3, zmm17
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm29
+ vpaddd zmm1, zmm1, zmm16
+ vpaddd zmm2, zmm2, zmm18
+ vpaddd zmm3, zmm3, zmm20
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm19
+ vpaddd zmm1, zmm1, zmm26
+ vpaddd zmm2, zmm2, zmm22
+ vpaddd zmm3, zmm3, zmm23
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpaddd zmm0, zmm0, zmm27
+ vpaddd zmm1, zmm1, zmm21
+ vpaddd zmm2, zmm2, zmm17
+ vpaddd zmm3, zmm3, zmm24
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm31
+ vpaddd zmm1, zmm1, zmm16
+ vpaddd zmm2, zmm2, zmm25
+ vpaddd zmm3, zmm3, zmm22
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm30
+ vpaddd zmm1, zmm1, zmm18
+ vpaddd zmm2, zmm2, zmm19
+ vpaddd zmm3, zmm3, zmm23
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm26
+ vpaddd zmm1, zmm1, zmm28
+ vpaddd zmm2, zmm2, zmm20
+ vpaddd zmm3, zmm3, zmm29
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpxord zmm0, zmm0, zmm8
+ vpxord zmm1, zmm1, zmm9
+ vpxord zmm2, zmm2, zmm10
+ vpxord zmm3, zmm3, zmm11
+ vpxord zmm4, zmm4, zmm12
+ vpxord zmm5, zmm5, zmm13
+ vpxord zmm6, zmm6, zmm14
+ vpxord zmm7, zmm7, zmm15
+ movzx eax, byte ptr [rbp+0x78]
+ jne 9b
+ mov rbx, qword ptr [rbp+0x90]
+ vpunpckldq zmm16, zmm0, zmm1
+ vpunpckhdq zmm17, zmm0, zmm1
+ vpunpckldq zmm18, zmm2, zmm3
+ vpunpckhdq zmm19, zmm2, zmm3
+ vpunpckldq zmm20, zmm4, zmm5
+ vpunpckhdq zmm21, zmm4, zmm5
+ vpunpckldq zmm22, zmm6, zmm7
+ vpunpckhdq zmm23, zmm6, zmm7
+ vpunpcklqdq zmm0, zmm16, zmm18
+ vpunpckhqdq zmm1, zmm16, zmm18
+ vpunpcklqdq zmm2, zmm17, zmm19
+ vpunpckhqdq zmm3, zmm17, zmm19
+ vpunpcklqdq zmm4, zmm20, zmm22
+ vpunpckhqdq zmm5, zmm20, zmm22
+ vpunpcklqdq zmm6, zmm21, zmm23
+ vpunpckhqdq zmm7, zmm21, zmm23
+ vshufi32x4 zmm16, zmm0, zmm4, 0x88
+ vshufi32x4 zmm17, zmm1, zmm5, 0x88
+ vshufi32x4 zmm18, zmm2, zmm6, 0x88
+ vshufi32x4 zmm19, zmm3, zmm7, 0x88
+ vshufi32x4 zmm20, zmm0, zmm4, 0xDD
+ vshufi32x4 zmm21, zmm1, zmm5, 0xDD
+ vshufi32x4 zmm22, zmm2, zmm6, 0xDD
+ vshufi32x4 zmm23, zmm3, zmm7, 0xDD
+ vshufi32x4 zmm0, zmm16, zmm17, 0x88
+ vshufi32x4 zmm1, zmm18, zmm19, 0x88
+ vshufi32x4 zmm2, zmm20, zmm21, 0x88
+ vshufi32x4 zmm3, zmm22, zmm23, 0x88
+ vshufi32x4 zmm4, zmm16, zmm17, 0xDD
+ vshufi32x4 zmm5, zmm18, zmm19, 0xDD
+ vshufi32x4 zmm6, zmm20, zmm21, 0xDD
+ vshufi32x4 zmm7, zmm22, zmm23, 0xDD
+ vmovdqu32 zmmword ptr [rbx], zmm0
+ vmovdqu32 zmmword ptr [rbx+0x1*0x40], zmm1
+ vmovdqu32 zmmword ptr [rbx+0x2*0x40], zmm2
+ vmovdqu32 zmmword ptr [rbx+0x3*0x40], zmm3
+ vmovdqu32 zmmword ptr [rbx+0x4*0x40], zmm4
+ vmovdqu32 zmmword ptr [rbx+0x5*0x40], zmm5
+ vmovdqu32 zmmword ptr [rbx+0x6*0x40], zmm6
+ vmovdqu32 zmmword ptr [rbx+0x7*0x40], zmm7
+ vmovdqa32 zmm0, zmmword ptr [rsp]
+ vmovdqa32 zmm1, zmmword ptr [rsp+0x1*0x40]
+ vmovdqa32 zmm2, zmm0
+ vpaddd zmm2{k1}, zmm0, dword ptr [ADD16+rip] {1to16}
+ vpcmpltud k2, zmm2, zmm0
+ vpaddd zmm1 {k2}, zmm1, dword ptr [ADD1+rip] {1to16}
+ vmovdqa32 zmmword ptr [rsp], zmm2
+ vmovdqa32 zmmword ptr [rsp+0x1*0x40], zmm1
+ add rdi, 128
+ add rbx, 512
+ mov qword ptr [rbp+0x90], rbx
+ sub rsi, 16
+ cmp rsi, 16
+ jnc 2b
+ test rsi, rsi
+ jne 3f
+4:
+ vzeroupper
+ vmovdqa xmm6, xmmword ptr [rsp+0x90]
+ vmovdqa xmm7, xmmword ptr [rsp+0xA0]
+ vmovdqa xmm8, xmmword ptr [rsp+0xB0]
+ vmovdqa xmm9, xmmword ptr [rsp+0xC0]
+ vmovdqa xmm10, xmmword ptr [rsp+0xD0]
+ vmovdqa xmm11, xmmword ptr [rsp+0xE0]
+ vmovdqa xmm12, xmmword ptr [rsp+0xF0]
+ vmovdqa xmm13, xmmword ptr [rsp+0x100]
+ vmovdqa xmm14, xmmword ptr [rsp+0x110]
+ vmovdqa xmm15, xmmword ptr [rsp+0x120]
+ mov rsp, rbp
+ pop rbp
+ pop rbx
+ pop rsi
+ pop rdi
+ pop r12
+ pop r13
+ pop r14
+ pop r15
+ ret
+.p2align 6
+3:
+ test esi, 0x8
+ je 3f
+ vpbroadcastd ymm0, dword ptr [rcx]
+ vpbroadcastd ymm1, dword ptr [rcx+0x4]
+ vpbroadcastd ymm2, dword ptr [rcx+0x8]
+ vpbroadcastd ymm3, dword ptr [rcx+0xC]
+ vpbroadcastd ymm4, dword ptr [rcx+0x10]
+ vpbroadcastd ymm5, dword ptr [rcx+0x14]
+ vpbroadcastd ymm6, dword ptr [rcx+0x18]
+ vpbroadcastd ymm7, dword ptr [rcx+0x1C]
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ mov r10, qword ptr [rdi+0x10]
+ mov r11, qword ptr [rdi+0x18]
+ mov r12, qword ptr [rdi+0x20]
+ mov r13, qword ptr [rdi+0x28]
+ mov r14, qword ptr [rdi+0x30]
+ mov r15, qword ptr [rdi+0x38]
+ movzx eax, byte ptr [rbp+0x78]
+ movzx ebx, byte ptr [rbp+0x80]
+ or eax, ebx
+ xor edx, edx
+2:
+ movzx ebx, byte ptr [rbp+0x88]
+ or ebx, eax
+ add rdx, 64
+ cmp rdx, qword ptr [rsp+0x80]
+ cmove eax, ebx
+ mov dword ptr [rsp+0x88], eax
+ vmovups xmm8, xmmword ptr [r8+rdx-0x40]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x40], 0x01
+ vmovups xmm9, xmmword ptr [r9+rdx-0x40]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x40], 0x01
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-0x40]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x40], 0x01
+ vmovups xmm11, xmmword ptr [r11+rdx-0x40]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x40], 0x01
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm16, ymm12, ymm14, 136
+ vshufps ymm17, ymm12, ymm14, 221
+ vshufps ymm18, ymm13, ymm15, 136
+ vshufps ymm19, ymm13, ymm15, 221
+ vmovups xmm8, xmmword ptr [r8+rdx-0x30]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x30], 0x01
+ vmovups xmm9, xmmword ptr [r9+rdx-0x30]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x30], 0x01
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-0x30]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x30], 0x01
+ vmovups xmm11, xmmword ptr [r11+rdx-0x30]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x30], 0x01
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm20, ymm12, ymm14, 136
+ vshufps ymm21, ymm12, ymm14, 221
+ vshufps ymm22, ymm13, ymm15, 136
+ vshufps ymm23, ymm13, ymm15, 221
+ vmovups xmm8, xmmword ptr [r8+rdx-0x20]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x20], 0x01
+ vmovups xmm9, xmmword ptr [r9+rdx-0x20]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x20], 0x01
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-0x20]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x20], 0x01
+ vmovups xmm11, xmmword ptr [r11+rdx-0x20]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x20], 0x01
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm24, ymm12, ymm14, 136
+ vshufps ymm25, ymm12, ymm14, 221
+ vshufps ymm26, ymm13, ymm15, 136
+ vshufps ymm27, ymm13, ymm15, 221
+ vmovups xmm8, xmmword ptr [r8+rdx-0x10]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x10], 0x01
+ vmovups xmm9, xmmword ptr [r9+rdx-0x10]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x10], 0x01
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-0x10]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x10], 0x01
+ vmovups xmm11, xmmword ptr [r11+rdx-0x10]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x10], 0x01
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm28, ymm12, ymm14, 136
+ vshufps ymm29, ymm12, ymm14, 221
+ vshufps ymm30, ymm13, ymm15, 136
+ vshufps ymm31, ymm13, ymm15, 221
+ vpbroadcastd ymm8, dword ptr [BLAKE3_IV_0+rip]
+ vpbroadcastd ymm9, dword ptr [BLAKE3_IV_1+rip]
+ vpbroadcastd ymm10, dword ptr [BLAKE3_IV_2+rip]
+ vpbroadcastd ymm11, dword ptr [BLAKE3_IV_3+rip]
+ vmovdqa ymm12, ymmword ptr [rsp]
+ vmovdqa ymm13, ymmword ptr [rsp+0x40]
+ vpbroadcastd ymm14, dword ptr [BLAKE3_BLOCK_LEN+rip]
+ vpbroadcastd ymm15, dword ptr [rsp+0x88]
+ vpaddd ymm0, ymm0, ymm16
+ vpaddd ymm1, ymm1, ymm18
+ vpaddd ymm2, ymm2, ymm20
+ vpaddd ymm3, ymm3, ymm22
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm17
+ vpaddd ymm1, ymm1, ymm19
+ vpaddd ymm2, ymm2, ymm21
+ vpaddd ymm3, ymm3, ymm23
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm24
+ vpaddd ymm1, ymm1, ymm26
+ vpaddd ymm2, ymm2, ymm28
+ vpaddd ymm3, ymm3, ymm30
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm25
+ vpaddd ymm1, ymm1, ymm27
+ vpaddd ymm2, ymm2, ymm29
+ vpaddd ymm3, ymm3, ymm31
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpaddd ymm0, ymm0, ymm18
+ vpaddd ymm1, ymm1, ymm19
+ vpaddd ymm2, ymm2, ymm23
+ vpaddd ymm3, ymm3, ymm20
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm22
+ vpaddd ymm1, ymm1, ymm26
+ vpaddd ymm2, ymm2, ymm16
+ vpaddd ymm3, ymm3, ymm29
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm17
+ vpaddd ymm1, ymm1, ymm28
+ vpaddd ymm2, ymm2, ymm25
+ vpaddd ymm3, ymm3, ymm31
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm27
+ vpaddd ymm1, ymm1, ymm21
+ vpaddd ymm2, ymm2, ymm30
+ vpaddd ymm3, ymm3, ymm24
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpaddd ymm0, ymm0, ymm19
+ vpaddd ymm1, ymm1, ymm26
+ vpaddd ymm2, ymm2, ymm29
+ vpaddd ymm3, ymm3, ymm23
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm20
+ vpaddd ymm1, ymm1, ymm28
+ vpaddd ymm2, ymm2, ymm18
+ vpaddd ymm3, ymm3, ymm30
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm22
+ vpaddd ymm1, ymm1, ymm25
+ vpaddd ymm2, ymm2, ymm27
+ vpaddd ymm3, ymm3, ymm24
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm21
+ vpaddd ymm1, ymm1, ymm16
+ vpaddd ymm2, ymm2, ymm31
+ vpaddd ymm3, ymm3, ymm17
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpaddd ymm0, ymm0, ymm26
+ vpaddd ymm1, ymm1, ymm28
+ vpaddd ymm2, ymm2, ymm30
+ vpaddd ymm3, ymm3, ymm29
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm23
+ vpaddd ymm1, ymm1, ymm25
+ vpaddd ymm2, ymm2, ymm19
+ vpaddd ymm3, ymm3, ymm31
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm20
+ vpaddd ymm1, ymm1, ymm27
+ vpaddd ymm2, ymm2, ymm21
+ vpaddd ymm3, ymm3, ymm17
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm16
+ vpaddd ymm1, ymm1, ymm18
+ vpaddd ymm2, ymm2, ymm24
+ vpaddd ymm3, ymm3, ymm22
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpaddd ymm0, ymm0, ymm28
+ vpaddd ymm1, ymm1, ymm25
+ vpaddd ymm2, ymm2, ymm31
+ vpaddd ymm3, ymm3, ymm30
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm29
+ vpaddd ymm1, ymm1, ymm27
+ vpaddd ymm2, ymm2, ymm26
+ vpaddd ymm3, ymm3, ymm24
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm23
+ vpaddd ymm1, ymm1, ymm21
+ vpaddd ymm2, ymm2, ymm16
+ vpaddd ymm3, ymm3, ymm22
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm18
+ vpaddd ymm1, ymm1, ymm19
+ vpaddd ymm2, ymm2, ymm17
+ vpaddd ymm3, ymm3, ymm20
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpaddd ymm0, ymm0, ymm25
+ vpaddd ymm1, ymm1, ymm27
+ vpaddd ymm2, ymm2, ymm24
+ vpaddd ymm3, ymm3, ymm31
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm30
+ vpaddd ymm1, ymm1, ymm21
+ vpaddd ymm2, ymm2, ymm28
+ vpaddd ymm3, ymm3, ymm17
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm29
+ vpaddd ymm1, ymm1, ymm16
+ vpaddd ymm2, ymm2, ymm18
+ vpaddd ymm3, ymm3, ymm20
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm19
+ vpaddd ymm1, ymm1, ymm26
+ vpaddd ymm2, ymm2, ymm22
+ vpaddd ymm3, ymm3, ymm23
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpaddd ymm0, ymm0, ymm27
+ vpaddd ymm1, ymm1, ymm21
+ vpaddd ymm2, ymm2, ymm17
+ vpaddd ymm3, ymm3, ymm24
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm31
+ vpaddd ymm1, ymm1, ymm16
+ vpaddd ymm2, ymm2, ymm25
+ vpaddd ymm3, ymm3, ymm22
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm30
+ vpaddd ymm1, ymm1, ymm18
+ vpaddd ymm2, ymm2, ymm19
+ vpaddd ymm3, ymm3, ymm23
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm26
+ vpaddd ymm1, ymm1, ymm28
+ vpaddd ymm2, ymm2, ymm20
+ vpaddd ymm3, ymm3, ymm29
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpxor ymm0, ymm0, ymm8
+ vpxor ymm1, ymm1, ymm9
+ vpxor ymm2, ymm2, ymm10
+ vpxor ymm3, ymm3, ymm11
+ vpxor ymm4, ymm4, ymm12
+ vpxor ymm5, ymm5, ymm13
+ vpxor ymm6, ymm6, ymm14
+ vpxor ymm7, ymm7, ymm15
+ movzx eax, byte ptr [rbp+0x78]
+ jne 2b
+ mov rbx, qword ptr [rbp+0x90]
+ vunpcklps ymm8, ymm0, ymm1
+ vunpcklps ymm9, ymm2, ymm3
+ vunpckhps ymm10, ymm0, ymm1
+ vunpcklps ymm11, ymm4, ymm5
+ vunpcklps ymm0, ymm6, ymm7
+ vshufps ymm12, ymm8, ymm9, 78
+ vblendps ymm1, ymm8, ymm12, 0xCC
+ vshufps ymm8, ymm11, ymm0, 78
+ vunpckhps ymm13, ymm2, ymm3
+ vblendps ymm2, ymm11, ymm8, 0xCC
+ vblendps ymm3, ymm12, ymm9, 0xCC
+ vperm2f128 ymm12, ymm1, ymm2, 0x20
+ vmovups ymmword ptr [rbx], ymm12
+ vunpckhps ymm14, ymm4, ymm5
+ vblendps ymm4, ymm8, ymm0, 0xCC
+ vunpckhps ymm15, ymm6, ymm7
+ vperm2f128 ymm7, ymm3, ymm4, 0x20
+ vmovups ymmword ptr [rbx+0x20], ymm7
+ vshufps ymm5, ymm10, ymm13, 78
+ vblendps ymm6, ymm5, ymm13, 0xCC
+ vshufps ymm13, ymm14, ymm15, 78
+ vblendps ymm10, ymm10, ymm5, 0xCC
+ vblendps ymm14, ymm14, ymm13, 0xCC
+ vperm2f128 ymm8, ymm10, ymm14, 0x20
+ vmovups ymmword ptr [rbx+0x40], ymm8
+ vblendps ymm15, ymm13, ymm15, 0xCC
+ vperm2f128 ymm13, ymm6, ymm15, 0x20
+ vmovups ymmword ptr [rbx+0x60], ymm13
+ vperm2f128 ymm9, ymm1, ymm2, 0x31
+ vperm2f128 ymm11, ymm3, ymm4, 0x31
+ vmovups ymmword ptr [rbx+0x80], ymm9
+ vperm2f128 ymm14, ymm10, ymm14, 0x31
+ vperm2f128 ymm15, ymm6, ymm15, 0x31
+ vmovups ymmword ptr [rbx+0xA0], ymm11
+ vmovups ymmword ptr [rbx+0xC0], ymm14
+ vmovups ymmword ptr [rbx+0xE0], ymm15
+ vmovdqa ymm0, ymmword ptr [rsp]
+ vmovdqa ymm2, ymmword ptr [rsp+0x40]
+ vmovdqa32 ymm0 {k1}, ymmword ptr [rsp+0x1*0x20]
+ vmovdqa32 ymm2 {k1}, ymmword ptr [rsp+0x3*0x20]
+ vmovdqa ymmword ptr [rsp], ymm0
+ vmovdqa ymmword ptr [rsp+0x40], ymm2
+ add rbx, 256
+ mov qword ptr [rbp+0x90], rbx
+ add rdi, 64
+ sub rsi, 8
+3:
+ mov rbx, qword ptr [rbp+0x90]
+ mov r15, qword ptr [rsp+0x80]
+ movzx r13, byte ptr [rbp+0x78]
+ movzx r12, byte ptr [rbp+0x88]
+ test esi, 0x4
+ je 3f
+ vbroadcasti32x4 zmm0, xmmword ptr [rcx]
+ vbroadcasti32x4 zmm1, xmmword ptr [rcx+0x1*0x10]
+ vmovdqa xmm12, xmmword ptr [rsp]
+ vmovdqa xmm13, xmmword ptr [rsp+0x40]
+ vpunpckldq xmm14, xmm12, xmm13
+ vpunpckhdq xmm15, xmm12, xmm13
+ vpermq ymm14, ymm14, 0xDC
+ vpermq ymm15, ymm15, 0xDC
+ vpbroadcastd zmm12, dword ptr [BLAKE3_BLOCK_LEN+rip]
+ vinserti64x4 zmm13, zmm14, ymm15, 0x01
+ mov eax, 17476
+ kmovw k2, eax
+ vpblendmd zmm13 {k2}, zmm13, zmm12
+ vbroadcasti32x4 zmm15, xmmword ptr [BLAKE3_IV+rip]
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ mov r10, qword ptr [rdi+0x10]
+ mov r11, qword ptr [rdi+0x18]
+ mov eax, 43690
+ kmovw k3, eax
+ mov eax, 34952
+ kmovw k4, eax
+ movzx eax, byte ptr [rbp+0x80]
+ or eax, r13d
+ xor edx, edx
+.p2align 5
+2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ mov dword ptr [rsp+0x88], eax
+ vmovdqa32 zmm2, zmm15
+ vpbroadcastd zmm8, dword ptr [rsp+0x22*0x4]
+ vpblendmd zmm3 {k4}, zmm13, zmm8
+ vmovups zmm8, zmmword ptr [r8+rdx-0x1*0x40]
+ vinserti32x4 zmm8, zmm8, xmmword ptr [r9+rdx-0x4*0x10], 0x01
+ vinserti32x4 zmm8, zmm8, xmmword ptr [r10+rdx-0x4*0x10], 0x02
+ vinserti32x4 zmm8, zmm8, xmmword ptr [r11+rdx-0x4*0x10], 0x03
+ vmovups zmm9, zmmword ptr [r8+rdx-0x30]
+ vinserti32x4 zmm9, zmm9, xmmword ptr [r9+rdx-0x3*0x10], 0x01
+ vinserti32x4 zmm9, zmm9, xmmword ptr [r10+rdx-0x3*0x10], 0x02
+ vinserti32x4 zmm9, zmm9, xmmword ptr [r11+rdx-0x3*0x10], 0x03
+ vshufps zmm4, zmm8, zmm9, 136
+ vshufps zmm5, zmm8, zmm9, 221
+ vmovups zmm8, zmmword ptr [r8+rdx-0x20]
+ vinserti32x4 zmm8, zmm8, xmmword ptr [r9+rdx-0x2*0x10], 0x01
+ vinserti32x4 zmm8, zmm8, xmmword ptr [r10+rdx-0x2*0x10], 0x02
+ vinserti32x4 zmm8, zmm8, xmmword ptr [r11+rdx-0x2*0x10], 0x03
+ vmovups zmm9, zmmword ptr [r8+rdx-0x10]
+ vinserti32x4 zmm9, zmm9, xmmword ptr [r9+rdx-0x1*0x10], 0x01
+ vinserti32x4 zmm9, zmm9, xmmword ptr [r10+rdx-0x1*0x10], 0x02
+ vinserti32x4 zmm9, zmm9, xmmword ptr [r11+rdx-0x1*0x10], 0x03
+ vshufps zmm6, zmm8, zmm9, 136
+ vshufps zmm7, zmm8, zmm9, 221
+ vpshufd zmm6, zmm6, 0x93
+ vpshufd zmm7, zmm7, 0x93
+ mov al, 7
+9:
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm0, zmm0, zmm1
+ vpxord zmm3, zmm3, zmm0
+ vprord zmm3, zmm3, 16
+ vpaddd zmm2, zmm2, zmm3
+ vpxord zmm1, zmm1, zmm2
+ vprord zmm1, zmm1, 12
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm0, zmm0, zmm1
+ vpxord zmm3, zmm3, zmm0
+ vprord zmm3, zmm3, 8
+ vpaddd zmm2, zmm2, zmm3
+ vpxord zmm1, zmm1, zmm2
+ vprord zmm1, zmm1, 7
+ vpshufd zmm0, zmm0, 0x93
+ vpshufd zmm3, zmm3, 0x4E
+ vpshufd zmm2, zmm2, 0x39
+ vpaddd zmm0, zmm0, zmm6
+ vpaddd zmm0, zmm0, zmm1
+ vpxord zmm3, zmm3, zmm0
+ vprord zmm3, zmm3, 16
+ vpaddd zmm2, zmm2, zmm3
+ vpxord zmm1, zmm1, zmm2
+ vprord zmm1, zmm1, 12
+ vpaddd zmm0, zmm0, zmm7
+ vpaddd zmm0, zmm0, zmm1
+ vpxord zmm3, zmm3, zmm0
+ vprord zmm3, zmm3, 8
+ vpaddd zmm2, zmm2, zmm3
+ vpxord zmm1, zmm1, zmm2
+ vprord zmm1, zmm1, 7
+ vpshufd zmm0, zmm0, 0x39
+ vpshufd zmm3, zmm3, 0x4E
+ vpshufd zmm2, zmm2, 0x93
+ dec al
+ jz 9f
+ vshufps zmm8, zmm4, zmm5, 214
+ vpshufd zmm9, zmm4, 0x0F
+ vpshufd zmm4, zmm8, 0x39
+ vshufps zmm8, zmm6, zmm7, 250
+ vpblendmd zmm9 {k3}, zmm9, zmm8
+ vpunpcklqdq zmm8, zmm7, zmm5
+ vpblendmd zmm8 {k4}, zmm8, zmm6
+ vpshufd zmm8, zmm8, 0x78
+ vpunpckhdq zmm5, zmm5, zmm7
+ vpunpckldq zmm6, zmm6, zmm5
+ vpshufd zmm7, zmm6, 0x1E
+ vmovdqa32 zmm5, zmm9
+ vmovdqa32 zmm6, zmm8
+ jmp 9b
+9:
+ vpxord zmm0, zmm0, zmm2
+ vpxord zmm1, zmm1, zmm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne 2b
+ vmovdqu xmmword ptr [rbx], xmm0
+ vmovdqu xmmword ptr [rbx+0x10], xmm1
+ vextracti128 xmmword ptr [rbx+0x20], ymm0, 0x01
+ vextracti128 xmmword ptr [rbx+0x30], ymm1, 0x01
+ vextracti32x4 xmmword ptr [rbx+0x4*0x10], zmm0, 0x02
+ vextracti32x4 xmmword ptr [rbx+0x5*0x10], zmm1, 0x02
+ vextracti32x4 xmmword ptr [rbx+0x6*0x10], zmm0, 0x03
+ vextracti32x4 xmmword ptr [rbx+0x7*0x10], zmm1, 0x03
+ vmovdqa xmm0, xmmword ptr [rsp]
+ vmovdqa xmm2, xmmword ptr [rsp+0x40]
+ vmovdqa32 xmm0 {k1}, xmmword ptr [rsp+0x1*0x10]
+ vmovdqa32 xmm2 {k1}, xmmword ptr [rsp+0x5*0x10]
+ vmovdqa xmmword ptr [rsp], xmm0
+ vmovdqa xmmword ptr [rsp+0x40], xmm2
+ add rbx, 128
+ add rdi, 32
+ sub rsi, 4
+3:
+ test esi, 0x2
+ je 3f
+ vbroadcasti128 ymm0, xmmword ptr [rcx]
+ vbroadcasti128 ymm1, xmmword ptr [rcx+0x10]
+ vmovd xmm13, dword ptr [rsp]
+ vpinsrd xmm13, xmm13, dword ptr [rsp+0x40], 1
+ vpinsrd xmm13, xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
+ vmovd xmm14, dword ptr [rsp+0x4]
+ vpinsrd xmm14, xmm14, dword ptr [rsp+0x44], 1
+ vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
+ vinserti128 ymm13, ymm13, xmm14, 0x01
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ movzx eax, byte ptr [rbp+0x80]
+ or eax, r13d
+ xor edx, edx
+.p2align 5
+2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ mov dword ptr [rsp+0x88], eax
+ vbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV+rip]
+ vpbroadcastd ymm8, dword ptr [rsp+0x88]
+ vpblendd ymm3, ymm13, ymm8, 0x88
+ vmovups ymm8, ymmword ptr [r8+rdx-0x40]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-0x40], 0x01
+ vmovups ymm9, ymmword ptr [r8+rdx-0x30]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-0x30], 0x01
+ vshufps ymm4, ymm8, ymm9, 136
+ vshufps ymm5, ymm8, ymm9, 221
+ vmovups ymm8, ymmword ptr [r8+rdx-0x20]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-0x20], 0x01
+ vmovups ymm9, ymmword ptr [r8+rdx-0x10]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-0x10], 0x01
+ vshufps ymm6, ymm8, ymm9, 136
+ vshufps ymm7, ymm8, ymm9, 221
+ vpshufd ymm6, ymm6, 0x93
+ vpshufd ymm7, ymm7, 0x93
+ mov al, 7
+9:
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm0, ymm0, ymm1
+ vpxord ymm3, ymm3, ymm0
+ vprord ymm3, ymm3, 16
+ vpaddd ymm2, ymm2, ymm3
+ vpxord ymm1, ymm1, ymm2
+ vprord ymm1, ymm1, 12
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm0, ymm0, ymm1
+ vpxord ymm3, ymm3, ymm0
+ vprord ymm3, ymm3, 8
+ vpaddd ymm2, ymm2, ymm3
+ vpxord ymm1, ymm1, ymm2
+ vprord ymm1, ymm1, 7
+ vpshufd ymm0, ymm0, 0x93
+ vpshufd ymm3, ymm3, 0x4E
+ vpshufd ymm2, ymm2, 0x39
+ vpaddd ymm0, ymm0, ymm6
+ vpaddd ymm0, ymm0, ymm1
+ vpxord ymm3, ymm3, ymm0
+ vprord ymm3, ymm3, 16
+ vpaddd ymm2, ymm2, ymm3
+ vpxord ymm1, ymm1, ymm2
+ vprord ymm1, ymm1, 12
+ vpaddd ymm0, ymm0, ymm7
+ vpaddd ymm0, ymm0, ymm1
+ vpxord ymm3, ymm3, ymm0
+ vprord ymm3, ymm3, 8
+ vpaddd ymm2, ymm2, ymm3
+ vpxord ymm1, ymm1, ymm2
+ vprord ymm1, ymm1, 7
+ vpshufd ymm0, ymm0, 0x39
+ vpshufd ymm3, ymm3, 0x4E
+ vpshufd ymm2, ymm2, 0x93
+ dec al
+ jz 9f
+ vshufps ymm8, ymm4, ymm5, 214
+ vpshufd ymm9, ymm4, 0x0F
+ vpshufd ymm4, ymm8, 0x39
+ vshufps ymm8, ymm6, ymm7, 250
+ vpblendd ymm9, ymm9, ymm8, 0xAA
+ vpunpcklqdq ymm8, ymm7, ymm5
+ vpblendd ymm8, ymm8, ymm6, 0x88
+ vpshufd ymm8, ymm8, 0x78
+ vpunpckhdq ymm5, ymm5, ymm7
+ vpunpckldq ymm6, ymm6, ymm5
+ vpshufd ymm7, ymm6, 0x1E
+ vmovdqa ymm5, ymm9
+ vmovdqa ymm6, ymm8
+ jmp 9b
+9:
+ vpxor ymm0, ymm0, ymm2
+ vpxor ymm1, ymm1, ymm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne 2b
+ vmovdqu xmmword ptr [rbx], xmm0
+ vmovdqu xmmword ptr [rbx+0x10], xmm1
+ vextracti128 xmmword ptr [rbx+0x20], ymm0, 0x01
+ vextracti128 xmmword ptr [rbx+0x30], ymm1, 0x01
+ vmovdqa xmm0, xmmword ptr [rsp]
+ vmovdqa xmm2, xmmword ptr [rsp+0x40]
+ vmovdqu32 xmm0 {k1}, xmmword ptr [rsp+0x8]
+ vmovdqu32 xmm2 {k1}, xmmword ptr [rsp+0x48]
+ vmovdqa xmmword ptr [rsp], xmm0
+ vmovdqa xmmword ptr [rsp+0x40], xmm2
+ add rbx, 64
+ add rdi, 16
+ sub rsi, 2
+3:
+ test esi, 0x1
+ je 4b
+ vmovdqu xmm0, xmmword ptr [rcx]
+ vmovdqu xmm1, xmmword ptr [rcx+0x10]
+ vmovd xmm14, dword ptr [rsp]
+ vpinsrd xmm14, xmm14, dword ptr [rsp+0x40], 1
+ vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
+ vmovdqa xmm15, xmmword ptr [BLAKE3_IV+rip]
+ mov r8, qword ptr [rdi]
+ movzx eax, byte ptr [rbp+0x80]
+ or eax, r13d
+ xor edx, edx
+.p2align 5
+2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ vpinsrd xmm3, xmm14, eax, 3
+ vmovdqa xmm2, xmm15
+ vmovups xmm8, xmmword ptr [r8+rdx-0x40]
+ vmovups xmm9, xmmword ptr [r8+rdx-0x30]
+ vshufps xmm4, xmm8, xmm9, 136
+ vshufps xmm5, xmm8, xmm9, 221
+ vmovups xmm8, xmmword ptr [r8+rdx-0x20]
+ vmovups xmm9, xmmword ptr [r8+rdx-0x10]
+ vshufps xmm6, xmm8, xmm9, 136
+ vshufps xmm7, xmm8, xmm9, 221
+ vpshufd xmm6, xmm6, 0x93
+ vpshufd xmm7, xmm7, 0x93
+ mov al, 7
+9:
+ vpaddd xmm0, xmm0, xmm4
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 16
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 12
+ vpaddd xmm0, xmm0, xmm5
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 8
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 7
+ vpshufd xmm0, xmm0, 0x93
+ vpshufd xmm3, xmm3, 0x4E
+ vpshufd xmm2, xmm2, 0x39
+ vpaddd xmm0, xmm0, xmm6
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 16
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 12
+ vpaddd xmm0, xmm0, xmm7
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 8
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 7
+ vpshufd xmm0, xmm0, 0x39
+ vpshufd xmm3, xmm3, 0x4E
+ vpshufd xmm2, xmm2, 0x93
+ dec al
+ jz 9f
+ vshufps xmm8, xmm4, xmm5, 214
+ vpshufd xmm9, xmm4, 0x0F
+ vpshufd xmm4, xmm8, 0x39
+ vshufps xmm8, xmm6, xmm7, 250
+ vpblendd xmm9, xmm9, xmm8, 0xAA
+ vpunpcklqdq xmm8, xmm7, xmm5
+ vpblendd xmm8, xmm8, xmm6, 0x88
+ vpshufd xmm8, xmm8, 0x78
+ vpunpckhdq xmm5, xmm5, xmm7
+ vpunpckldq xmm6, xmm6, xmm5
+ vpshufd xmm7, xmm6, 0x1E
+ vmovdqa xmm5, xmm9
+ vmovdqa xmm6, xmm8
+ jmp 9b
+9:
+ vpxor xmm0, xmm0, xmm2
+ vpxor xmm1, xmm1, xmm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne 2b
+ vmovdqu xmmword ptr [rbx], xmm0
+ vmovdqu xmmword ptr [rbx+0x10], xmm1
+ jmp 4b
+
+
+.p2align 6
+_blake3_compress_in_place_avx512:
+blake3_compress_in_place_avx512:
+ sub rsp, 72
+ vmovdqa xmmword ptr [rsp], xmm6
+ vmovdqa xmmword ptr [rsp+0x10], xmm7
+ vmovdqa xmmword ptr [rsp+0x20], xmm8
+ vmovdqa xmmword ptr [rsp+0x30], xmm9
+ vmovdqu xmm0, xmmword ptr [rcx]
+ vmovdqu xmm1, xmmword ptr [rcx+0x10]
+ movzx eax, byte ptr [rsp+0x70]
+ movzx r8d, r8b
+ shl rax, 32
+ add r8, rax
+ vmovq xmm3, r9
+ vmovq xmm4, r8
+ vpunpcklqdq xmm3, xmm3, xmm4
+ vmovaps xmm2, xmmword ptr [BLAKE3_IV+rip]
+ vmovups xmm8, xmmword ptr [rdx]
+ vmovups xmm9, xmmword ptr [rdx+0x10]
+ vshufps xmm4, xmm8, xmm9, 136
+ vshufps xmm5, xmm8, xmm9, 221
+ vmovups xmm8, xmmword ptr [rdx+0x20]
+ vmovups xmm9, xmmword ptr [rdx+0x30]
+ vshufps xmm6, xmm8, xmm9, 136
+ vshufps xmm7, xmm8, xmm9, 221
+ vpshufd xmm6, xmm6, 0x93
+ vpshufd xmm7, xmm7, 0x93
+ mov al, 7
+9:
+ vpaddd xmm0, xmm0, xmm4
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 16
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 12
+ vpaddd xmm0, xmm0, xmm5
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 8
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 7
+ vpshufd xmm0, xmm0, 0x93
+ vpshufd xmm3, xmm3, 0x4E
+ vpshufd xmm2, xmm2, 0x39
+ vpaddd xmm0, xmm0, xmm6
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 16
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 12
+ vpaddd xmm0, xmm0, xmm7
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 8
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 7
+ vpshufd xmm0, xmm0, 0x39
+ vpshufd xmm3, xmm3, 0x4E
+ vpshufd xmm2, xmm2, 0x93
+ dec al
+ jz 9f
+ vshufps xmm8, xmm4, xmm5, 214
+ vpshufd xmm9, xmm4, 0x0F
+ vpshufd xmm4, xmm8, 0x39
+ vshufps xmm8, xmm6, xmm7, 250
+ vpblendd xmm9, xmm9, xmm8, 0xAA
+ vpunpcklqdq xmm8, xmm7, xmm5
+ vpblendd xmm8, xmm8, xmm6, 0x88
+ vpshufd xmm8, xmm8, 0x78
+ vpunpckhdq xmm5, xmm5, xmm7
+ vpunpckldq xmm6, xmm6, xmm5
+ vpshufd xmm7, xmm6, 0x1E
+ vmovdqa xmm5, xmm9
+ vmovdqa xmm6, xmm8
+ jmp 9b
+9:
+ vpxor xmm0, xmm0, xmm2
+ vpxor xmm1, xmm1, xmm3
+ vmovdqu xmmword ptr [rcx], xmm0
+ vmovdqu xmmword ptr [rcx+0x10], xmm1
+ vmovdqa xmm6, xmmword ptr [rsp]
+ vmovdqa xmm7, xmmword ptr [rsp+0x10]
+ vmovdqa xmm8, xmmword ptr [rsp+0x20]
+ vmovdqa xmm9, xmmword ptr [rsp+0x30]
+ add rsp, 72
+ ret
+
+
+.p2align 6
+_blake3_compress_xof_avx512:
+blake3_compress_xof_avx512:
+ sub rsp, 72
+ vmovdqa xmmword ptr [rsp], xmm6
+ vmovdqa xmmword ptr [rsp+0x10], xmm7
+ vmovdqa xmmword ptr [rsp+0x20], xmm8
+ vmovdqa xmmword ptr [rsp+0x30], xmm9
+ vmovdqu xmm0, xmmword ptr [rcx]
+ vmovdqu xmm1, xmmword ptr [rcx+0x10]
+ movzx eax, byte ptr [rsp+0x70]
+ movzx r8d, r8b
+ mov r10, qword ptr [rsp+0x78]
+ shl rax, 32
+ add r8, rax
+ vmovq xmm3, r9
+ vmovq xmm4, r8
+ vpunpcklqdq xmm3, xmm3, xmm4
+ vmovaps xmm2, xmmword ptr [BLAKE3_IV+rip]
+ vmovups xmm8, xmmword ptr [rdx]
+ vmovups xmm9, xmmword ptr [rdx+0x10]
+ vshufps xmm4, xmm8, xmm9, 136
+ vshufps xmm5, xmm8, xmm9, 221
+ vmovups xmm8, xmmword ptr [rdx+0x20]
+ vmovups xmm9, xmmword ptr [rdx+0x30]
+ vshufps xmm6, xmm8, xmm9, 136
+ vshufps xmm7, xmm8, xmm9, 221
+ vpshufd xmm6, xmm6, 0x93
+ vpshufd xmm7, xmm7, 0x93
+ mov al, 7
+9:
+ vpaddd xmm0, xmm0, xmm4
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 16
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 12
+ vpaddd xmm0, xmm0, xmm5
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 8
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 7
+ vpshufd xmm0, xmm0, 0x93
+ vpshufd xmm3, xmm3, 0x4E
+ vpshufd xmm2, xmm2, 0x39
+ vpaddd xmm0, xmm0, xmm6
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 16
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 12
+ vpaddd xmm0, xmm0, xmm7
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 8
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 7
+ vpshufd xmm0, xmm0, 0x39
+ vpshufd xmm3, xmm3, 0x4E
+ vpshufd xmm2, xmm2, 0x93
+ dec al
+ jz 9f
+ vshufps xmm8, xmm4, xmm5, 214
+ vpshufd xmm9, xmm4, 0x0F
+ vpshufd xmm4, xmm8, 0x39
+ vshufps xmm8, xmm6, xmm7, 250
+ vpblendd xmm9, xmm9, xmm8, 0xAA
+ vpunpcklqdq xmm8, xmm7, xmm5
+ vpblendd xmm8, xmm8, xmm6, 0x88
+ vpshufd xmm8, xmm8, 0x78
+ vpunpckhdq xmm5, xmm5, xmm7
+ vpunpckldq xmm6, xmm6, xmm5
+ vpshufd xmm7, xmm6, 0x1E
+ vmovdqa xmm5, xmm9
+ vmovdqa xmm6, xmm8
+ jmp 9b
+9:
+ vpxor xmm0, xmm0, xmm2
+ vpxor xmm1, xmm1, xmm3
+ vpxor xmm2, xmm2, xmmword ptr [rcx]
+ vpxor xmm3, xmm3, xmmword ptr [rcx+0x10]
+ vmovdqu xmmword ptr [r10], xmm0
+ vmovdqu xmmword ptr [r10+0x10], xmm1
+ vmovdqu xmmword ptr [r10+0x20], xmm2
+ vmovdqu xmmword ptr [r10+0x30], xmm3
+ vmovdqa xmm6, xmmword ptr [rsp]
+ vmovdqa xmm7, xmmword ptr [rsp+0x10]
+ vmovdqa xmm8, xmmword ptr [rsp+0x20]
+ vmovdqa xmm9, xmmword ptr [rsp+0x30]
+ add rsp, 72
+ ret
+
+.section .rodata
+.p2align 6
+INDEX0:
+ .long 0, 1, 2, 3, 16, 17, 18, 19
+ .long 8, 9, 10, 11, 24, 25, 26, 27
+INDEX1:
+ .long 4, 5, 6, 7, 20, 21, 22, 23
+ .long 12, 13, 14, 15, 28, 29, 30, 31
+ADD0:
+ .long 0, 1, 2, 3, 4, 5, 6, 7
+ .long 8, 9, 10, 11, 12, 13, 14, 15
+ADD1: .long 1
+
+ADD16: .long 16
+BLAKE3_BLOCK_LEN:
+ .long 64
+.p2align 6
+BLAKE3_IV:
+BLAKE3_IV_0:
+ .long 0x6A09E667
+BLAKE3_IV_1:
+ .long 0xBB67AE85
+BLAKE3_IV_2:
+ .long 0x3C6EF372
+BLAKE3_IV_3:
+ .long 0xA54FF53A
diff --git a/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_msvc.asm b/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_msvc.asm
new file mode 100644
index 000000000000..b19efbaaeb36
--- /dev/null
+++ b/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_msvc.asm
@@ -0,0 +1,2634 @@
+public _blake3_hash_many_avx512
+public blake3_hash_many_avx512
+public blake3_compress_in_place_avx512
+public _blake3_compress_in_place_avx512
+public blake3_compress_xof_avx512
+public _blake3_compress_xof_avx512
+
+_TEXT SEGMENT ALIGN(16) 'CODE'
+
+ALIGN 16
+blake3_hash_many_avx512 PROC
+_blake3_hash_many_avx512 PROC
+ push r15
+ push r14
+ push r13
+ push r12
+ push rdi
+ push rsi
+ push rbx
+ push rbp
+ mov rbp, rsp
+ sub rsp, 304
+ and rsp, 0FFFFFFFFFFFFFFC0H
+ vmovdqa xmmword ptr [rsp+90H], xmm6
+ vmovdqa xmmword ptr [rsp+0A0H], xmm7
+ vmovdqa xmmword ptr [rsp+0B0H], xmm8
+ vmovdqa xmmword ptr [rsp+0C0H], xmm9
+ vmovdqa xmmword ptr [rsp+0D0H], xmm10
+ vmovdqa xmmword ptr [rsp+0E0H], xmm11
+ vmovdqa xmmword ptr [rsp+0F0H], xmm12
+ vmovdqa xmmword ptr [rsp+100H], xmm13
+ vmovdqa xmmword ptr [rsp+110H], xmm14
+ vmovdqa xmmword ptr [rsp+120H], xmm15
+ mov rdi, rcx
+ mov rsi, rdx
+ mov rdx, r8
+ mov rcx, r9
+ mov r8, qword ptr [rbp+68H]
+ movzx r9, byte ptr [rbp+70H]
+ neg r9
+ kmovw k1, r9d
+ vmovd xmm0, r8d
+ vpbroadcastd ymm0, xmm0
+ shr r8, 32
+ vmovd xmm1, r8d
+ vpbroadcastd ymm1, xmm1
+ vmovdqa ymm4, ymm1
+ vmovdqa ymm5, ymm1
+ vpaddd ymm2, ymm0, ymmword ptr [ADD0]
+ vpaddd ymm3, ymm0, ymmword ptr [ADD0+32]
+ vpcmpud k2, ymm2, ymm0, 1
+ vpcmpud k3, ymm3, ymm0, 1
+ ; XXX: ml64.exe does not currently understand the syntax. We use a workaround.
+ vpbroadcastd ymm6, dword ptr [ADD1]
+ vpaddd ymm4 {k2}, ymm4, ymm6
+ vpaddd ymm5 {k3}, ymm5, ymm6
+ ; vpaddd ymm4 {k2}, ymm4, dword ptr [ADD1] {1to8}
+ ; vpaddd ymm5 {k3}, ymm5, dword ptr [ADD1] {1to8}
+ knotw k2, k1
+ vmovdqa32 ymm2 {k2}, ymm0
+ vmovdqa32 ymm3 {k2}, ymm0
+ vmovdqa32 ymm4 {k2}, ymm1
+ vmovdqa32 ymm5 {k2}, ymm1
+ vmovdqa ymmword ptr [rsp], ymm2
+ vmovdqa ymmword ptr [rsp+20H], ymm3
+ vmovdqa ymmword ptr [rsp+40H], ymm4
+ vmovdqa ymmword ptr [rsp+60H], ymm5
+ shl rdx, 6
+ mov qword ptr [rsp+80H], rdx
+ cmp rsi, 16
+ jc final15blocks
+outerloop16:
+ vpbroadcastd zmm0, dword ptr [rcx]
+ vpbroadcastd zmm1, dword ptr [rcx+1H*4H]
+ vpbroadcastd zmm2, dword ptr [rcx+2H*4H]
+ vpbroadcastd zmm3, dword ptr [rcx+3H*4H]
+ vpbroadcastd zmm4, dword ptr [rcx+4H*4H]
+ vpbroadcastd zmm5, dword ptr [rcx+5H*4H]
+ vpbroadcastd zmm6, dword ptr [rcx+6H*4H]
+ vpbroadcastd zmm7, dword ptr [rcx+7H*4H]
+ movzx eax, byte ptr [rbp+78H]
+ movzx ebx, byte ptr [rbp+80H]
+ or eax, ebx
+ xor edx, edx
+ALIGN 16
+innerloop16:
+ movzx ebx, byte ptr [rbp+88H]
+ or ebx, eax
+ add rdx, 64
+ cmp rdx, qword ptr [rsp+80H]
+ cmove eax, ebx
+ mov dword ptr [rsp+88H], eax
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+8H]
+ mov r10, qword ptr [rdi+10H]
+ mov r11, qword ptr [rdi+18H]
+ mov r12, qword ptr [rdi+40H]
+ mov r13, qword ptr [rdi+48H]
+ mov r14, qword ptr [rdi+50H]
+ mov r15, qword ptr [rdi+58H]
+ vmovdqu32 ymm16, ymmword ptr [rdx+r8-2H*20H]
+ vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-2H*20H], 01H
+ vmovdqu32 ymm17, ymmword ptr [rdx+r9-2H*20H]
+ vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-2H*20H], 01H
+ vpunpcklqdq zmm8, zmm16, zmm17
+ vpunpckhqdq zmm9, zmm16, zmm17
+ vmovdqu32 ymm18, ymmword ptr [rdx+r10-2H*20H]
+ vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-2H*20H], 01H
+ vmovdqu32 ymm19, ymmword ptr [rdx+r11-2H*20H]
+ vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-2H*20H], 01H
+ vpunpcklqdq zmm10, zmm18, zmm19
+ vpunpckhqdq zmm11, zmm18, zmm19
+ mov r8, qword ptr [rdi+20H]
+ mov r9, qword ptr [rdi+28H]
+ mov r10, qword ptr [rdi+30H]
+ mov r11, qword ptr [rdi+38H]
+ mov r12, qword ptr [rdi+60H]
+ mov r13, qword ptr [rdi+68H]
+ mov r14, qword ptr [rdi+70H]
+ mov r15, qword ptr [rdi+78H]
+ vmovdqu32 ymm16, ymmword ptr [rdx+r8-2H*20H]
+ vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-2H*20H], 01H
+ vmovdqu32 ymm17, ymmword ptr [rdx+r9-2H*20H]
+ vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-2H*20H], 01H
+ vpunpcklqdq zmm12, zmm16, zmm17
+ vpunpckhqdq zmm13, zmm16, zmm17
+ vmovdqu32 ymm18, ymmword ptr [rdx+r10-2H*20H]
+ vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-2H*20H], 01H
+ vmovdqu32 ymm19, ymmword ptr [rdx+r11-2H*20H]
+ vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-2H*20H], 01H
+ vpunpcklqdq zmm14, zmm18, zmm19
+ vpunpckhqdq zmm15, zmm18, zmm19
+ vmovdqa32 zmm27, zmmword ptr [INDEX0]
+ vmovdqa32 zmm31, zmmword ptr [INDEX1]
+ vshufps zmm16, zmm8, zmm10, 136
+ vshufps zmm17, zmm12, zmm14, 136
+ vmovdqa32 zmm20, zmm16
+ vpermt2d zmm16, zmm27, zmm17
+ vpermt2d zmm20, zmm31, zmm17
+ vshufps zmm17, zmm8, zmm10, 221
+ vshufps zmm30, zmm12, zmm14, 221
+ vmovdqa32 zmm21, zmm17
+ vpermt2d zmm17, zmm27, zmm30
+ vpermt2d zmm21, zmm31, zmm30
+ vshufps zmm18, zmm9, zmm11, 136
+ vshufps zmm8, zmm13, zmm15, 136
+ vmovdqa32 zmm22, zmm18
+ vpermt2d zmm18, zmm27, zmm8
+ vpermt2d zmm22, zmm31, zmm8
+ vshufps zmm19, zmm9, zmm11, 221
+ vshufps zmm8, zmm13, zmm15, 221
+ vmovdqa32 zmm23, zmm19
+ vpermt2d zmm19, zmm27, zmm8
+ vpermt2d zmm23, zmm31, zmm8
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+8H]
+ mov r10, qword ptr [rdi+10H]
+ mov r11, qword ptr [rdi+18H]
+ mov r12, qword ptr [rdi+40H]
+ mov r13, qword ptr [rdi+48H]
+ mov r14, qword ptr [rdi+50H]
+ mov r15, qword ptr [rdi+58H]
+ vmovdqu32 ymm24, ymmword ptr [r8+rdx-1H*20H]
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-1H*20H], 01H
+ vmovdqu32 ymm25, ymmword ptr [r9+rdx-1H*20H]
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-1H*20H], 01H
+ vpunpcklqdq zmm8, zmm24, zmm25
+ vpunpckhqdq zmm9, zmm24, zmm25
+ vmovdqu32 ymm24, ymmword ptr [r10+rdx-1H*20H]
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-1H*20H], 01H
+ vmovdqu32 ymm25, ymmword ptr [r11+rdx-1H*20H]
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-1H*20H], 01H
+ vpunpcklqdq zmm10, zmm24, zmm25
+ vpunpckhqdq zmm11, zmm24, zmm25
+ prefetcht0 byte ptr [r8+rdx+80H]
+ prefetcht0 byte ptr [r12+rdx+80H]
+ prefetcht0 byte ptr [r9+rdx+80H]
+ prefetcht0 byte ptr [r13+rdx+80H]
+ prefetcht0 byte ptr [r10+rdx+80H]
+ prefetcht0 byte ptr [r14+rdx+80H]
+ prefetcht0 byte ptr [r11+rdx+80H]
+ prefetcht0 byte ptr [r15+rdx+80H]
+ mov r8, qword ptr [rdi+20H]
+ mov r9, qword ptr [rdi+28H]
+ mov r10, qword ptr [rdi+30H]
+ mov r11, qword ptr [rdi+38H]
+ mov r12, qword ptr [rdi+60H]
+ mov r13, qword ptr [rdi+68H]
+ mov r14, qword ptr [rdi+70H]
+ mov r15, qword ptr [rdi+78H]
+ vmovdqu32 ymm24, ymmword ptr [r8+rdx-1H*20H]
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-1H*20H], 01H
+ vmovdqu32 ymm25, ymmword ptr [r9+rdx-1H*20H]
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-1H*20H], 01H
+ vpunpcklqdq zmm12, zmm24, zmm25
+ vpunpckhqdq zmm13, zmm24, zmm25
+ vmovdqu32 ymm24, ymmword ptr [r10+rdx-1H*20H]
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-1H*20H], 01H
+ vmovdqu32 ymm25, ymmword ptr [r11+rdx-1H*20H]
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-1H*20H], 01H
+ vpunpcklqdq zmm14, zmm24, zmm25
+ vpunpckhqdq zmm15, zmm24, zmm25
+ prefetcht0 byte ptr [r8+rdx+80H]
+ prefetcht0 byte ptr [r12+rdx+80H]
+ prefetcht0 byte ptr [r9+rdx+80H]
+ prefetcht0 byte ptr [r13+rdx+80H]
+ prefetcht0 byte ptr [r10+rdx+80H]
+ prefetcht0 byte ptr [r14+rdx+80H]
+ prefetcht0 byte ptr [r11+rdx+80H]
+ prefetcht0 byte ptr [r15+rdx+80H]
+ vshufps zmm24, zmm8, zmm10, 136
+ vshufps zmm30, zmm12, zmm14, 136
+ vmovdqa32 zmm28, zmm24
+ vpermt2d zmm24, zmm27, zmm30
+ vpermt2d zmm28, zmm31, zmm30
+ vshufps zmm25, zmm8, zmm10, 221
+ vshufps zmm30, zmm12, zmm14, 221
+ vmovdqa32 zmm29, zmm25
+ vpermt2d zmm25, zmm27, zmm30
+ vpermt2d zmm29, zmm31, zmm30
+ vshufps zmm26, zmm9, zmm11, 136
+ vshufps zmm8, zmm13, zmm15, 136
+ vmovdqa32 zmm30, zmm26
+ vpermt2d zmm26, zmm27, zmm8
+ vpermt2d zmm30, zmm31, zmm8
+ vshufps zmm8, zmm9, zmm11, 221
+ vshufps zmm10, zmm13, zmm15, 221
+ vpermi2d zmm27, zmm8, zmm10
+ vpermi2d zmm31, zmm8, zmm10
+ vpbroadcastd zmm8, dword ptr [BLAKE3_IV_0]
+ vpbroadcastd zmm9, dword ptr [BLAKE3_IV_1]
+ vpbroadcastd zmm10, dword ptr [BLAKE3_IV_2]
+ vpbroadcastd zmm11, dword ptr [BLAKE3_IV_3]
+ vmovdqa32 zmm12, zmmword ptr [rsp]
+ vmovdqa32 zmm13, zmmword ptr [rsp+1H*40H]
+ vpbroadcastd zmm14, dword ptr [BLAKE3_BLOCK_LEN]
+ vpbroadcastd zmm15, dword ptr [rsp+22H*4H]
+ vpaddd zmm0, zmm0, zmm16
+ vpaddd zmm1, zmm1, zmm18
+ vpaddd zmm2, zmm2, zmm20
+ vpaddd zmm3, zmm3, zmm22
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm17
+ vpaddd zmm1, zmm1, zmm19
+ vpaddd zmm2, zmm2, zmm21
+ vpaddd zmm3, zmm3, zmm23
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm24
+ vpaddd zmm1, zmm1, zmm26
+ vpaddd zmm2, zmm2, zmm28
+ vpaddd zmm3, zmm3, zmm30
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm25
+ vpaddd zmm1, zmm1, zmm27
+ vpaddd zmm2, zmm2, zmm29
+ vpaddd zmm3, zmm3, zmm31
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpaddd zmm0, zmm0, zmm18
+ vpaddd zmm1, zmm1, zmm19
+ vpaddd zmm2, zmm2, zmm23
+ vpaddd zmm3, zmm3, zmm20
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm22
+ vpaddd zmm1, zmm1, zmm26
+ vpaddd zmm2, zmm2, zmm16
+ vpaddd zmm3, zmm3, zmm29
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm17
+ vpaddd zmm1, zmm1, zmm28
+ vpaddd zmm2, zmm2, zmm25
+ vpaddd zmm3, zmm3, zmm31
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm27
+ vpaddd zmm1, zmm1, zmm21
+ vpaddd zmm2, zmm2, zmm30
+ vpaddd zmm3, zmm3, zmm24
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpaddd zmm0, zmm0, zmm19
+ vpaddd zmm1, zmm1, zmm26
+ vpaddd zmm2, zmm2, zmm29
+ vpaddd zmm3, zmm3, zmm23
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm20
+ vpaddd zmm1, zmm1, zmm28
+ vpaddd zmm2, zmm2, zmm18
+ vpaddd zmm3, zmm3, zmm30
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm22
+ vpaddd zmm1, zmm1, zmm25
+ vpaddd zmm2, zmm2, zmm27
+ vpaddd zmm3, zmm3, zmm24
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm21
+ vpaddd zmm1, zmm1, zmm16
+ vpaddd zmm2, zmm2, zmm31
+ vpaddd zmm3, zmm3, zmm17
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpaddd zmm0, zmm0, zmm26
+ vpaddd zmm1, zmm1, zmm28
+ vpaddd zmm2, zmm2, zmm30
+ vpaddd zmm3, zmm3, zmm29
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm23
+ vpaddd zmm1, zmm1, zmm25
+ vpaddd zmm2, zmm2, zmm19
+ vpaddd zmm3, zmm3, zmm31
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm20
+ vpaddd zmm1, zmm1, zmm27
+ vpaddd zmm2, zmm2, zmm21
+ vpaddd zmm3, zmm3, zmm17
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm16
+ vpaddd zmm1, zmm1, zmm18
+ vpaddd zmm2, zmm2, zmm24
+ vpaddd zmm3, zmm3, zmm22
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpaddd zmm0, zmm0, zmm28
+ vpaddd zmm1, zmm1, zmm25
+ vpaddd zmm2, zmm2, zmm31
+ vpaddd zmm3, zmm3, zmm30
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm29
+ vpaddd zmm1, zmm1, zmm27
+ vpaddd zmm2, zmm2, zmm26
+ vpaddd zmm3, zmm3, zmm24
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm23
+ vpaddd zmm1, zmm1, zmm21
+ vpaddd zmm2, zmm2, zmm16
+ vpaddd zmm3, zmm3, zmm22
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm18
+ vpaddd zmm1, zmm1, zmm19
+ vpaddd zmm2, zmm2, zmm17
+ vpaddd zmm3, zmm3, zmm20
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpaddd zmm0, zmm0, zmm25
+ vpaddd zmm1, zmm1, zmm27
+ vpaddd zmm2, zmm2, zmm24
+ vpaddd zmm3, zmm3, zmm31
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm30
+ vpaddd zmm1, zmm1, zmm21
+ vpaddd zmm2, zmm2, zmm28
+ vpaddd zmm3, zmm3, zmm17
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm29
+ vpaddd zmm1, zmm1, zmm16
+ vpaddd zmm2, zmm2, zmm18
+ vpaddd zmm3, zmm3, zmm20
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm19
+ vpaddd zmm1, zmm1, zmm26
+ vpaddd zmm2, zmm2, zmm22
+ vpaddd zmm3, zmm3, zmm23
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpaddd zmm0, zmm0, zmm27
+ vpaddd zmm1, zmm1, zmm21
+ vpaddd zmm2, zmm2, zmm17
+ vpaddd zmm3, zmm3, zmm24
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vprord zmm15, zmm15, 16
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 12
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vpaddd zmm0, zmm0, zmm31
+ vpaddd zmm1, zmm1, zmm16
+ vpaddd zmm2, zmm2, zmm25
+ vpaddd zmm3, zmm3, zmm22
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm1, zmm1, zmm5
+ vpaddd zmm2, zmm2, zmm6
+ vpaddd zmm3, zmm3, zmm7
+ vpxord zmm12, zmm12, zmm0
+ vpxord zmm13, zmm13, zmm1
+ vpxord zmm14, zmm14, zmm2
+ vpxord zmm15, zmm15, zmm3
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vprord zmm15, zmm15, 8
+ vpaddd zmm8, zmm8, zmm12
+ vpaddd zmm9, zmm9, zmm13
+ vpaddd zmm10, zmm10, zmm14
+ vpaddd zmm11, zmm11, zmm15
+ vpxord zmm4, zmm4, zmm8
+ vpxord zmm5, zmm5, zmm9
+ vpxord zmm6, zmm6, zmm10
+ vpxord zmm7, zmm7, zmm11
+ vprord zmm4, zmm4, 7
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vpaddd zmm0, zmm0, zmm30
+ vpaddd zmm1, zmm1, zmm18
+ vpaddd zmm2, zmm2, zmm19
+ vpaddd zmm3, zmm3, zmm23
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 16
+ vprord zmm12, zmm12, 16
+ vprord zmm13, zmm13, 16
+ vprord zmm14, zmm14, 16
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 12
+ vprord zmm6, zmm6, 12
+ vprord zmm7, zmm7, 12
+ vprord zmm4, zmm4, 12
+ vpaddd zmm0, zmm0, zmm26
+ vpaddd zmm1, zmm1, zmm28
+ vpaddd zmm2, zmm2, zmm20
+ vpaddd zmm3, zmm3, zmm29
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm1, zmm1, zmm6
+ vpaddd zmm2, zmm2, zmm7
+ vpaddd zmm3, zmm3, zmm4
+ vpxord zmm15, zmm15, zmm0
+ vpxord zmm12, zmm12, zmm1
+ vpxord zmm13, zmm13, zmm2
+ vpxord zmm14, zmm14, zmm3
+ vprord zmm15, zmm15, 8
+ vprord zmm12, zmm12, 8
+ vprord zmm13, zmm13, 8
+ vprord zmm14, zmm14, 8
+ vpaddd zmm10, zmm10, zmm15
+ vpaddd zmm11, zmm11, zmm12
+ vpaddd zmm8, zmm8, zmm13
+ vpaddd zmm9, zmm9, zmm14
+ vpxord zmm5, zmm5, zmm10
+ vpxord zmm6, zmm6, zmm11
+ vpxord zmm7, zmm7, zmm8
+ vpxord zmm4, zmm4, zmm9
+ vprord zmm5, zmm5, 7
+ vprord zmm6, zmm6, 7
+ vprord zmm7, zmm7, 7
+ vprord zmm4, zmm4, 7
+ vpxord zmm0, zmm0, zmm8
+ vpxord zmm1, zmm1, zmm9
+ vpxord zmm2, zmm2, zmm10
+ vpxord zmm3, zmm3, zmm11
+ vpxord zmm4, zmm4, zmm12
+ vpxord zmm5, zmm5, zmm13
+ vpxord zmm6, zmm6, zmm14
+ vpxord zmm7, zmm7, zmm15
+ movzx eax, byte ptr [rbp+78H]
+ jne innerloop16
+ mov rbx, qword ptr [rbp+90H]
+ vpunpckldq zmm16, zmm0, zmm1
+ vpunpckhdq zmm17, zmm0, zmm1
+ vpunpckldq zmm18, zmm2, zmm3
+ vpunpckhdq zmm19, zmm2, zmm3
+ vpunpckldq zmm20, zmm4, zmm5
+ vpunpckhdq zmm21, zmm4, zmm5
+ vpunpckldq zmm22, zmm6, zmm7
+ vpunpckhdq zmm23, zmm6, zmm7
+ vpunpcklqdq zmm0, zmm16, zmm18
+ vpunpckhqdq zmm1, zmm16, zmm18
+ vpunpcklqdq zmm2, zmm17, zmm19
+ vpunpckhqdq zmm3, zmm17, zmm19
+ vpunpcklqdq zmm4, zmm20, zmm22
+ vpunpckhqdq zmm5, zmm20, zmm22
+ vpunpcklqdq zmm6, zmm21, zmm23
+ vpunpckhqdq zmm7, zmm21, zmm23
+ vshufi32x4 zmm16, zmm0, zmm4, 88H
+ vshufi32x4 zmm17, zmm1, zmm5, 88H
+ vshufi32x4 zmm18, zmm2, zmm6, 88H
+ vshufi32x4 zmm19, zmm3, zmm7, 88H
+ vshufi32x4 zmm20, zmm0, zmm4, 0DDH
+ vshufi32x4 zmm21, zmm1, zmm5, 0DDH
+ vshufi32x4 zmm22, zmm2, zmm6, 0DDH
+ vshufi32x4 zmm23, zmm3, zmm7, 0DDH
+ vshufi32x4 zmm0, zmm16, zmm17, 88H
+ vshufi32x4 zmm1, zmm18, zmm19, 88H
+ vshufi32x4 zmm2, zmm20, zmm21, 88H
+ vshufi32x4 zmm3, zmm22, zmm23, 88H
+ vshufi32x4 zmm4, zmm16, zmm17, 0DDH
+ vshufi32x4 zmm5, zmm18, zmm19, 0DDH
+ vshufi32x4 zmm6, zmm20, zmm21, 0DDH
+ vshufi32x4 zmm7, zmm22, zmm23, 0DDH
+ vmovdqu32 zmmword ptr [rbx], zmm0
+ vmovdqu32 zmmword ptr [rbx+1H*40H], zmm1
+ vmovdqu32 zmmword ptr [rbx+2H*40H], zmm2
+ vmovdqu32 zmmword ptr [rbx+3H*40H], zmm3
+ vmovdqu32 zmmword ptr [rbx+4H*40H], zmm4
+ vmovdqu32 zmmword ptr [rbx+5H*40H], zmm5
+ vmovdqu32 zmmword ptr [rbx+6H*40H], zmm6
+ vmovdqu32 zmmword ptr [rbx+7H*40H], zmm7
+ vmovdqa32 zmm0, zmmword ptr [rsp]
+ vmovdqa32 zmm1, zmmword ptr [rsp+1H*40H]
+ vmovdqa32 zmm2, zmm0
+ ; XXX: ml64.exe does not currently understand the syntax. We use a workaround.
+ vpbroadcastd zmm4, dword ptr [ADD16]
+ vpbroadcastd zmm5, dword ptr [ADD1]
+ vpaddd zmm2{k1}, zmm0, zmm4
+ ; vpaddd zmm2{k1}, zmm0, dword ptr [ADD16] ; {1to16}
+ vpcmpud k2, zmm2, zmm0, 1
+ vpaddd zmm1 {k2}, zmm1, zmm5
+ ; vpaddd zmm1 {k2}, zmm1, dword ptr [ADD1] ; {1to16}
+ vmovdqa32 zmmword ptr [rsp], zmm2
+ vmovdqa32 zmmword ptr [rsp+1H*40H], zmm1
+ add rdi, 128
+ add rbx, 512
+ mov qword ptr [rbp+90H], rbx
+ sub rsi, 16
+ cmp rsi, 16
+ jnc outerloop16
+ test rsi, rsi
+ jne final15blocks
+unwind:
+ vzeroupper
+ vmovdqa xmm6, xmmword ptr [rsp+90H]
+ vmovdqa xmm7, xmmword ptr [rsp+0A0H]
+ vmovdqa xmm8, xmmword ptr [rsp+0B0H]
+ vmovdqa xmm9, xmmword ptr [rsp+0C0H]
+ vmovdqa xmm10, xmmword ptr [rsp+0D0H]
+ vmovdqa xmm11, xmmword ptr [rsp+0E0H]
+ vmovdqa xmm12, xmmword ptr [rsp+0F0H]
+ vmovdqa xmm13, xmmword ptr [rsp+100H]
+ vmovdqa xmm14, xmmword ptr [rsp+110H]
+ vmovdqa xmm15, xmmword ptr [rsp+120H]
+ mov rsp, rbp
+ pop rbp
+ pop rbx
+ pop rsi
+ pop rdi
+ pop r12
+ pop r13
+ pop r14
+ pop r15
+ ret
+ALIGN 16
+final15blocks:
+ test esi, 8H
+ je final7blocks
+ vpbroadcastd ymm0, dword ptr [rcx]
+ vpbroadcastd ymm1, dword ptr [rcx+4H]
+ vpbroadcastd ymm2, dword ptr [rcx+8H]
+ vpbroadcastd ymm3, dword ptr [rcx+0CH]
+ vpbroadcastd ymm4, dword ptr [rcx+10H]
+ vpbroadcastd ymm5, dword ptr [rcx+14H]
+ vpbroadcastd ymm6, dword ptr [rcx+18H]
+ vpbroadcastd ymm7, dword ptr [rcx+1CH]
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+8H]
+ mov r10, qword ptr [rdi+10H]
+ mov r11, qword ptr [rdi+18H]
+ mov r12, qword ptr [rdi+20H]
+ mov r13, qword ptr [rdi+28H]
+ mov r14, qword ptr [rdi+30H]
+ mov r15, qword ptr [rdi+38H]
+ movzx eax, byte ptr [rbp+78H]
+ movzx ebx, byte ptr [rbp+80H]
+ or eax, ebx
+ xor edx, edx
+innerloop8:
+ movzx ebx, byte ptr [rbp+88H]
+ or ebx, eax
+ add rdx, 64
+ cmp rdx, qword ptr [rsp+80H]
+ cmove eax, ebx
+ mov dword ptr [rsp+88H], eax
+ vmovups xmm8, xmmword ptr [r8+rdx-40H]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-40H], 01H
+ vmovups xmm9, xmmword ptr [r9+rdx-40H]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-40H], 01H
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-40H]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-40H], 01H
+ vmovups xmm11, xmmword ptr [r11+rdx-40H]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-40H], 01H
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm16, ymm12, ymm14, 136
+ vshufps ymm17, ymm12, ymm14, 221
+ vshufps ymm18, ymm13, ymm15, 136
+ vshufps ymm19, ymm13, ymm15, 221
+ vmovups xmm8, xmmword ptr [r8+rdx-30H]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-30H], 01H
+ vmovups xmm9, xmmword ptr [r9+rdx-30H]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-30H], 01H
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-30H]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-30H], 01H
+ vmovups xmm11, xmmword ptr [r11+rdx-30H]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-30H], 01H
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm20, ymm12, ymm14, 136
+ vshufps ymm21, ymm12, ymm14, 221
+ vshufps ymm22, ymm13, ymm15, 136
+ vshufps ymm23, ymm13, ymm15, 221
+ vmovups xmm8, xmmword ptr [r8+rdx-20H]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-20H], 01H
+ vmovups xmm9, xmmword ptr [r9+rdx-20H]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-20H], 01H
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-20H]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-20H], 01H
+ vmovups xmm11, xmmword ptr [r11+rdx-20H]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-20H], 01H
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm24, ymm12, ymm14, 136
+ vshufps ymm25, ymm12, ymm14, 221
+ vshufps ymm26, ymm13, ymm15, 136
+ vshufps ymm27, ymm13, ymm15, 221
+ vmovups xmm8, xmmword ptr [r8+rdx-10H]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-10H], 01H
+ vmovups xmm9, xmmword ptr [r9+rdx-10H]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-10H], 01H
+ vunpcklpd ymm12, ymm8, ymm9
+ vunpckhpd ymm13, ymm8, ymm9
+ vmovups xmm10, xmmword ptr [r10+rdx-10H]
+ vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-10H], 01H
+ vmovups xmm11, xmmword ptr [r11+rdx-10H]
+ vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-10H], 01H
+ vunpcklpd ymm14, ymm10, ymm11
+ vunpckhpd ymm15, ymm10, ymm11
+ vshufps ymm28, ymm12, ymm14, 136
+ vshufps ymm29, ymm12, ymm14, 221
+ vshufps ymm30, ymm13, ymm15, 136
+ vshufps ymm31, ymm13, ymm15, 221
+ vpbroadcastd ymm8, dword ptr [BLAKE3_IV_0]
+ vpbroadcastd ymm9, dword ptr [BLAKE3_IV_1]
+ vpbroadcastd ymm10, dword ptr [BLAKE3_IV_2]
+ vpbroadcastd ymm11, dword ptr [BLAKE3_IV_3]
+ vmovdqa ymm12, ymmword ptr [rsp]
+ vmovdqa ymm13, ymmword ptr [rsp+40H]
+ vpbroadcastd ymm14, dword ptr [BLAKE3_BLOCK_LEN]
+ vpbroadcastd ymm15, dword ptr [rsp+88H]
+ vpaddd ymm0, ymm0, ymm16
+ vpaddd ymm1, ymm1, ymm18
+ vpaddd ymm2, ymm2, ymm20
+ vpaddd ymm3, ymm3, ymm22
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm17
+ vpaddd ymm1, ymm1, ymm19
+ vpaddd ymm2, ymm2, ymm21
+ vpaddd ymm3, ymm3, ymm23
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm24
+ vpaddd ymm1, ymm1, ymm26
+ vpaddd ymm2, ymm2, ymm28
+ vpaddd ymm3, ymm3, ymm30
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm25
+ vpaddd ymm1, ymm1, ymm27
+ vpaddd ymm2, ymm2, ymm29
+ vpaddd ymm3, ymm3, ymm31
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpaddd ymm0, ymm0, ymm18
+ vpaddd ymm1, ymm1, ymm19
+ vpaddd ymm2, ymm2, ymm23
+ vpaddd ymm3, ymm3, ymm20
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm22
+ vpaddd ymm1, ymm1, ymm26
+ vpaddd ymm2, ymm2, ymm16
+ vpaddd ymm3, ymm3, ymm29
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm17
+ vpaddd ymm1, ymm1, ymm28
+ vpaddd ymm2, ymm2, ymm25
+ vpaddd ymm3, ymm3, ymm31
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm27
+ vpaddd ymm1, ymm1, ymm21
+ vpaddd ymm2, ymm2, ymm30
+ vpaddd ymm3, ymm3, ymm24
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpaddd ymm0, ymm0, ymm19
+ vpaddd ymm1, ymm1, ymm26
+ vpaddd ymm2, ymm2, ymm29
+ vpaddd ymm3, ymm3, ymm23
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm20
+ vpaddd ymm1, ymm1, ymm28
+ vpaddd ymm2, ymm2, ymm18
+ vpaddd ymm3, ymm3, ymm30
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm22
+ vpaddd ymm1, ymm1, ymm25
+ vpaddd ymm2, ymm2, ymm27
+ vpaddd ymm3, ymm3, ymm24
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm21
+ vpaddd ymm1, ymm1, ymm16
+ vpaddd ymm2, ymm2, ymm31
+ vpaddd ymm3, ymm3, ymm17
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpaddd ymm0, ymm0, ymm26
+ vpaddd ymm1, ymm1, ymm28
+ vpaddd ymm2, ymm2, ymm30
+ vpaddd ymm3, ymm3, ymm29
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm23
+ vpaddd ymm1, ymm1, ymm25
+ vpaddd ymm2, ymm2, ymm19
+ vpaddd ymm3, ymm3, ymm31
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm20
+ vpaddd ymm1, ymm1, ymm27
+ vpaddd ymm2, ymm2, ymm21
+ vpaddd ymm3, ymm3, ymm17
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm16
+ vpaddd ymm1, ymm1, ymm18
+ vpaddd ymm2, ymm2, ymm24
+ vpaddd ymm3, ymm3, ymm22
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpaddd ymm0, ymm0, ymm28
+ vpaddd ymm1, ymm1, ymm25
+ vpaddd ymm2, ymm2, ymm31
+ vpaddd ymm3, ymm3, ymm30
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm29
+ vpaddd ymm1, ymm1, ymm27
+ vpaddd ymm2, ymm2, ymm26
+ vpaddd ymm3, ymm3, ymm24
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm23
+ vpaddd ymm1, ymm1, ymm21
+ vpaddd ymm2, ymm2, ymm16
+ vpaddd ymm3, ymm3, ymm22
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm18
+ vpaddd ymm1, ymm1, ymm19
+ vpaddd ymm2, ymm2, ymm17
+ vpaddd ymm3, ymm3, ymm20
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpaddd ymm0, ymm0, ymm25
+ vpaddd ymm1, ymm1, ymm27
+ vpaddd ymm2, ymm2, ymm24
+ vpaddd ymm3, ymm3, ymm31
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm30
+ vpaddd ymm1, ymm1, ymm21
+ vpaddd ymm2, ymm2, ymm28
+ vpaddd ymm3, ymm3, ymm17
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm29
+ vpaddd ymm1, ymm1, ymm16
+ vpaddd ymm2, ymm2, ymm18
+ vpaddd ymm3, ymm3, ymm20
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm19
+ vpaddd ymm1, ymm1, ymm26
+ vpaddd ymm2, ymm2, ymm22
+ vpaddd ymm3, ymm3, ymm23
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpaddd ymm0, ymm0, ymm27
+ vpaddd ymm1, ymm1, ymm21
+ vpaddd ymm2, ymm2, ymm17
+ vpaddd ymm3, ymm3, ymm24
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vprord ymm15, ymm15, 16
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 12
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vpaddd ymm0, ymm0, ymm31
+ vpaddd ymm1, ymm1, ymm16
+ vpaddd ymm2, ymm2, ymm25
+ vpaddd ymm3, ymm3, ymm22
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm1, ymm1, ymm5
+ vpaddd ymm2, ymm2, ymm6
+ vpaddd ymm3, ymm3, ymm7
+ vpxord ymm12, ymm12, ymm0
+ vpxord ymm13, ymm13, ymm1
+ vpxord ymm14, ymm14, ymm2
+ vpxord ymm15, ymm15, ymm3
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vprord ymm15, ymm15, 8
+ vpaddd ymm8, ymm8, ymm12
+ vpaddd ymm9, ymm9, ymm13
+ vpaddd ymm10, ymm10, ymm14
+ vpaddd ymm11, ymm11, ymm15
+ vpxord ymm4, ymm4, ymm8
+ vpxord ymm5, ymm5, ymm9
+ vpxord ymm6, ymm6, ymm10
+ vpxord ymm7, ymm7, ymm11
+ vprord ymm4, ymm4, 7
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vpaddd ymm0, ymm0, ymm30
+ vpaddd ymm1, ymm1, ymm18
+ vpaddd ymm2, ymm2, ymm19
+ vpaddd ymm3, ymm3, ymm23
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 16
+ vprord ymm12, ymm12, 16
+ vprord ymm13, ymm13, 16
+ vprord ymm14, ymm14, 16
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 12
+ vprord ymm6, ymm6, 12
+ vprord ymm7, ymm7, 12
+ vprord ymm4, ymm4, 12
+ vpaddd ymm0, ymm0, ymm26
+ vpaddd ymm1, ymm1, ymm28
+ vpaddd ymm2, ymm2, ymm20
+ vpaddd ymm3, ymm3, ymm29
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm1, ymm1, ymm6
+ vpaddd ymm2, ymm2, ymm7
+ vpaddd ymm3, ymm3, ymm4
+ vpxord ymm15, ymm15, ymm0
+ vpxord ymm12, ymm12, ymm1
+ vpxord ymm13, ymm13, ymm2
+ vpxord ymm14, ymm14, ymm3
+ vprord ymm15, ymm15, 8
+ vprord ymm12, ymm12, 8
+ vprord ymm13, ymm13, 8
+ vprord ymm14, ymm14, 8
+ vpaddd ymm10, ymm10, ymm15
+ vpaddd ymm11, ymm11, ymm12
+ vpaddd ymm8, ymm8, ymm13
+ vpaddd ymm9, ymm9, ymm14
+ vpxord ymm5, ymm5, ymm10
+ vpxord ymm6, ymm6, ymm11
+ vpxord ymm7, ymm7, ymm8
+ vpxord ymm4, ymm4, ymm9
+ vprord ymm5, ymm5, 7
+ vprord ymm6, ymm6, 7
+ vprord ymm7, ymm7, 7
+ vprord ymm4, ymm4, 7
+ vpxor ymm0, ymm0, ymm8
+ vpxor ymm1, ymm1, ymm9
+ vpxor ymm2, ymm2, ymm10
+ vpxor ymm3, ymm3, ymm11
+ vpxor ymm4, ymm4, ymm12
+ vpxor ymm5, ymm5, ymm13
+ vpxor ymm6, ymm6, ymm14
+ vpxor ymm7, ymm7, ymm15
+ movzx eax, byte ptr [rbp+78H]
+ jne innerloop8
+ mov rbx, qword ptr [rbp+90H]
+ vunpcklps ymm8, ymm0, ymm1
+ vunpcklps ymm9, ymm2, ymm3
+ vunpckhps ymm10, ymm0, ymm1
+ vunpcklps ymm11, ymm4, ymm5
+ vunpcklps ymm0, ymm6, ymm7
+ vshufps ymm12, ymm8, ymm9, 78
+ vblendps ymm1, ymm8, ymm12, 0CCH
+ vshufps ymm8, ymm11, ymm0, 78
+ vunpckhps ymm13, ymm2, ymm3
+ vblendps ymm2, ymm11, ymm8, 0CCH
+ vblendps ymm3, ymm12, ymm9, 0CCH
+ vperm2f128 ymm12, ymm1, ymm2, 20H
+ vmovups ymmword ptr [rbx], ymm12
+ vunpckhps ymm14, ymm4, ymm5
+ vblendps ymm4, ymm8, ymm0, 0CCH
+ vunpckhps ymm15, ymm6, ymm7
+ vperm2f128 ymm7, ymm3, ymm4, 20H
+ vmovups ymmword ptr [rbx+20H], ymm7
+ vshufps ymm5, ymm10, ymm13, 78
+ vblendps ymm6, ymm5, ymm13, 0CCH
+ vshufps ymm13, ymm14, ymm15, 78
+ vblendps ymm10, ymm10, ymm5, 0CCH
+ vblendps ymm14, ymm14, ymm13, 0CCH
+ vperm2f128 ymm8, ymm10, ymm14, 20H
+ vmovups ymmword ptr [rbx+40H], ymm8
+ vblendps ymm15, ymm13, ymm15, 0CCH
+ vperm2f128 ymm13, ymm6, ymm15, 20H
+ vmovups ymmword ptr [rbx+60H], ymm13
+ vperm2f128 ymm9, ymm1, ymm2, 31H
+ vperm2f128 ymm11, ymm3, ymm4, 31H
+ vmovups ymmword ptr [rbx+80H], ymm9
+ vperm2f128 ymm14, ymm10, ymm14, 31H
+ vperm2f128 ymm15, ymm6, ymm15, 31H
+ vmovups ymmword ptr [rbx+0A0H], ymm11
+ vmovups ymmword ptr [rbx+0C0H], ymm14
+ vmovups ymmword ptr [rbx+0E0H], ymm15
+ vmovdqa ymm0, ymmword ptr [rsp]
+ vmovdqa ymm2, ymmword ptr [rsp+40H]
+ vmovdqa32 ymm0 {k1}, ymmword ptr [rsp+1H*20H]
+ vmovdqa32 ymm2 {k1}, ymmword ptr [rsp+3H*20H]
+ vmovdqa ymmword ptr [rsp], ymm0
+ vmovdqa ymmword ptr [rsp+40H], ymm2
+ add rbx, 256
+ mov qword ptr [rbp+90H], rbx
+ add rdi, 64
+ sub rsi, 8
+final7blocks:
+ mov rbx, qword ptr [rbp+90H]
+ mov r15, qword ptr [rsp+80H]
+ movzx r13, byte ptr [rbp+78H]
+ movzx r12, byte ptr [rbp+88H]
+ test esi, 4H
+ je final3blocks
+ vbroadcasti32x4 zmm0, xmmword ptr [rcx]
+ vbroadcasti32x4 zmm1, xmmword ptr [rcx+1H*10H]
+ vmovdqa xmm12, xmmword ptr [rsp]
+ vmovdqa xmm13, xmmword ptr [rsp+40H]
+ vpunpckldq xmm14, xmm12, xmm13
+ vpunpckhdq xmm15, xmm12, xmm13
+ vpermq ymm14, ymm14, 0DCH
+ vpermq ymm15, ymm15, 0DCH
+ vpbroadcastd zmm12, dword ptr [BLAKE3_BLOCK_LEN]
+ vinserti64x4 zmm13, zmm14, ymm15, 01H
+ mov eax, 17476
+ kmovw k2, eax
+ vpblendmd zmm13 {k2}, zmm13, zmm12
+ vbroadcasti32x4 zmm15, xmmword ptr [BLAKE3_IV]
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+8H]
+ mov r10, qword ptr [rdi+10H]
+ mov r11, qword ptr [rdi+18H]
+ mov eax, 43690
+ kmovw k3, eax
+ mov eax, 34952
+ kmovw k4, eax
+ movzx eax, byte ptr [rbp+80H]
+ or eax, r13d
+ xor edx, edx
+ALIGN 16
+innerloop4:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ mov dword ptr [rsp+88H], eax
+ vmovdqa32 zmm2, zmm15
+ vpbroadcastd zmm8, dword ptr [rsp+22H*4H]
+ vpblendmd zmm3 {k4}, zmm13, zmm8
+ vmovups zmm8, zmmword ptr [r8+rdx-1H*40H]
+ vinserti32x4 zmm8, zmm8, xmmword ptr [r9+rdx-4H*10H], 01H
+ vinserti32x4 zmm8, zmm8, xmmword ptr [r10+rdx-4H*10H], 02H
+ vinserti32x4 zmm8, zmm8, xmmword ptr [r11+rdx-4H*10H], 03H
+ vmovups zmm9, zmmword ptr [r8+rdx-30H]
+ vinserti32x4 zmm9, zmm9, xmmword ptr [r9+rdx-3H*10H], 01H
+ vinserti32x4 zmm9, zmm9, xmmword ptr [r10+rdx-3H*10H], 02H
+ vinserti32x4 zmm9, zmm9, xmmword ptr [r11+rdx-3H*10H], 03H
+ vshufps zmm4, zmm8, zmm9, 136
+ vshufps zmm5, zmm8, zmm9, 221
+ vmovups zmm8, zmmword ptr [r8+rdx-20H]
+ vinserti32x4 zmm8, zmm8, xmmword ptr [r9+rdx-2H*10H], 01H
+ vinserti32x4 zmm8, zmm8, xmmword ptr [r10+rdx-2H*10H], 02H
+ vinserti32x4 zmm8, zmm8, xmmword ptr [r11+rdx-2H*10H], 03H
+ vmovups zmm9, zmmword ptr [r8+rdx-10H]
+ vinserti32x4 zmm9, zmm9, xmmword ptr [r9+rdx-1H*10H], 01H
+ vinserti32x4 zmm9, zmm9, xmmword ptr [r10+rdx-1H*10H], 02H
+ vinserti32x4 zmm9, zmm9, xmmword ptr [r11+rdx-1H*10H], 03H
+ vshufps zmm6, zmm8, zmm9, 136
+ vshufps zmm7, zmm8, zmm9, 221
+ vpshufd zmm6, zmm6, 93H
+ vpshufd zmm7, zmm7, 93H
+ mov al, 7
+roundloop4:
+ vpaddd zmm0, zmm0, zmm4
+ vpaddd zmm0, zmm0, zmm1
+ vpxord zmm3, zmm3, zmm0
+ vprord zmm3, zmm3, 16
+ vpaddd zmm2, zmm2, zmm3
+ vpxord zmm1, zmm1, zmm2
+ vprord zmm1, zmm1, 12
+ vpaddd zmm0, zmm0, zmm5
+ vpaddd zmm0, zmm0, zmm1
+ vpxord zmm3, zmm3, zmm0
+ vprord zmm3, zmm3, 8
+ vpaddd zmm2, zmm2, zmm3
+ vpxord zmm1, zmm1, zmm2
+ vprord zmm1, zmm1, 7
+ vpshufd zmm0, zmm0, 93H
+ vpshufd zmm3, zmm3, 4EH
+ vpshufd zmm2, zmm2, 39H
+ vpaddd zmm0, zmm0, zmm6
+ vpaddd zmm0, zmm0, zmm1
+ vpxord zmm3, zmm3, zmm0
+ vprord zmm3, zmm3, 16
+ vpaddd zmm2, zmm2, zmm3
+ vpxord zmm1, zmm1, zmm2
+ vprord zmm1, zmm1, 12
+ vpaddd zmm0, zmm0, zmm7
+ vpaddd zmm0, zmm0, zmm1
+ vpxord zmm3, zmm3, zmm0
+ vprord zmm3, zmm3, 8
+ vpaddd zmm2, zmm2, zmm3
+ vpxord zmm1, zmm1, zmm2
+ vprord zmm1, zmm1, 7
+ vpshufd zmm0, zmm0, 39H
+ vpshufd zmm3, zmm3, 4EH
+ vpshufd zmm2, zmm2, 93H
+ dec al
+ jz endroundloop4
+ vshufps zmm8, zmm4, zmm5, 214
+ vpshufd zmm9, zmm4, 0FH
+ vpshufd zmm4, zmm8, 39H
+ vshufps zmm8, zmm6, zmm7, 250
+ vpblendmd zmm9 {k3}, zmm9, zmm8
+ vpunpcklqdq zmm8, zmm7, zmm5
+ vpblendmd zmm8 {k4}, zmm8, zmm6
+ vpshufd zmm8, zmm8, 78H
+ vpunpckhdq zmm5, zmm5, zmm7
+ vpunpckldq zmm6, zmm6, zmm5
+ vpshufd zmm7, zmm6, 1EH
+ vmovdqa32 zmm5, zmm9
+ vmovdqa32 zmm6, zmm8
+ jmp roundloop4
+endroundloop4:
+ vpxord zmm0, zmm0, zmm2
+ vpxord zmm1, zmm1, zmm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne innerloop4
+ vmovdqu xmmword ptr [rbx], xmm0
+ vmovdqu xmmword ptr [rbx+10H], xmm1
+ vextracti128 xmmword ptr [rbx+20H], ymm0, 01H
+ vextracti128 xmmword ptr [rbx+30H], ymm1, 01H
+ vextracti32x4 xmmword ptr [rbx+4H*10H], zmm0, 02H
+ vextracti32x4 xmmword ptr [rbx+5H*10H], zmm1, 02H
+ vextracti32x4 xmmword ptr [rbx+6H*10H], zmm0, 03H
+ vextracti32x4 xmmword ptr [rbx+7H*10H], zmm1, 03H
+ vmovdqa xmm0, xmmword ptr [rsp]
+ vmovdqa xmm2, xmmword ptr [rsp+40H]
+ vmovdqa32 xmm0 {k1}, xmmword ptr [rsp+1H*10H]
+ vmovdqa32 xmm2 {k1}, xmmword ptr [rsp+5H*10H]
+ vmovdqa xmmword ptr [rsp], xmm0
+ vmovdqa xmmword ptr [rsp+40H], xmm2
+ add rbx, 128
+ add rdi, 32
+ sub rsi, 4
+final3blocks:
+ test esi, 2H
+ je final1block
+ vbroadcasti128 ymm0, xmmword ptr [rcx]
+ vbroadcasti128 ymm1, xmmword ptr [rcx+10H]
+ vmovd xmm13, dword ptr [rsp]
+ vpinsrd xmm13, xmm13, dword ptr [rsp+40H], 1
+ vpinsrd xmm13, xmm13, dword ptr [BLAKE3_BLOCK_LEN], 2
+ vmovd xmm14, dword ptr [rsp+4H]
+ vpinsrd xmm14, xmm14, dword ptr [rsp+44H], 1
+ vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN], 2
+ vinserti128 ymm13, ymm13, xmm14, 01H
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+8H]
+ movzx eax, byte ptr [rbp+80H]
+ or eax, r13d
+ xor edx, edx
+ALIGN 16
+innerloop2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ mov dword ptr [rsp+88H], eax
+ vbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV]
+ vpbroadcastd ymm8, dword ptr [rsp+88H]
+ vpblendd ymm3, ymm13, ymm8, 88H
+ vmovups ymm8, ymmword ptr [r8+rdx-40H]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-40H], 01H
+ vmovups ymm9, ymmword ptr [r8+rdx-30H]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-30H], 01H
+ vshufps ymm4, ymm8, ymm9, 136
+ vshufps ymm5, ymm8, ymm9, 221
+ vmovups ymm8, ymmword ptr [r8+rdx-20H]
+ vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-20H], 01H
+ vmovups ymm9, ymmword ptr [r8+rdx-10H]
+ vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-10H], 01H
+ vshufps ymm6, ymm8, ymm9, 136
+ vshufps ymm7, ymm8, ymm9, 221
+ vpshufd ymm6, ymm6, 93H
+ vpshufd ymm7, ymm7, 93H
+ mov al, 7
+roundloop2:
+ vpaddd ymm0, ymm0, ymm4
+ vpaddd ymm0, ymm0, ymm1
+ vpxord ymm3, ymm3, ymm0
+ vprord ymm3, ymm3, 16
+ vpaddd ymm2, ymm2, ymm3
+ vpxord ymm1, ymm1, ymm2
+ vprord ymm1, ymm1, 12
+ vpaddd ymm0, ymm0, ymm5
+ vpaddd ymm0, ymm0, ymm1
+ vpxord ymm3, ymm3, ymm0
+ vprord ymm3, ymm3, 8
+ vpaddd ymm2, ymm2, ymm3
+ vpxord ymm1, ymm1, ymm2
+ vprord ymm1, ymm1, 7
+ vpshufd ymm0, ymm0, 93H
+ vpshufd ymm3, ymm3, 4EH
+ vpshufd ymm2, ymm2, 39H
+ vpaddd ymm0, ymm0, ymm6
+ vpaddd ymm0, ymm0, ymm1
+ vpxord ymm3, ymm3, ymm0
+ vprord ymm3, ymm3, 16
+ vpaddd ymm2, ymm2, ymm3
+ vpxord ymm1, ymm1, ymm2
+ vprord ymm1, ymm1, 12
+ vpaddd ymm0, ymm0, ymm7
+ vpaddd ymm0, ymm0, ymm1
+ vpxord ymm3, ymm3, ymm0
+ vprord ymm3, ymm3, 8
+ vpaddd ymm2, ymm2, ymm3
+ vpxord ymm1, ymm1, ymm2
+ vprord ymm1, ymm1, 7
+ vpshufd ymm0, ymm0, 39H
+ vpshufd ymm3, ymm3, 4EH
+ vpshufd ymm2, ymm2, 93H
+ dec al
+ jz endroundloop2
+ vshufps ymm8, ymm4, ymm5, 214
+ vpshufd ymm9, ymm4, 0FH
+ vpshufd ymm4, ymm8, 39H
+ vshufps ymm8, ymm6, ymm7, 250
+ vpblendd ymm9, ymm9, ymm8, 0AAH
+ vpunpcklqdq ymm8, ymm7, ymm5
+ vpblendd ymm8, ymm8, ymm6, 88H
+ vpshufd ymm8, ymm8, 78H
+ vpunpckhdq ymm5, ymm5, ymm7
+ vpunpckldq ymm6, ymm6, ymm5
+ vpshufd ymm7, ymm6, 1EH
+ vmovdqa ymm5, ymm9
+ vmovdqa ymm6, ymm8
+ jmp roundloop2
+endroundloop2:
+ vpxor ymm0, ymm0, ymm2
+ vpxor ymm1, ymm1, ymm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne innerloop2
+ vmovdqu xmmword ptr [rbx], xmm0
+ vmovdqu xmmword ptr [rbx+10H], xmm1
+ vextracti128 xmmword ptr [rbx+20H], ymm0, 01H
+ vextracti128 xmmword ptr [rbx+30H], ymm1, 01H
+ vmovdqa xmm0, xmmword ptr [rsp]
+ vmovdqa xmm2, xmmword ptr [rsp+40H]
+ vmovdqu32 xmm0 {k1}, xmmword ptr [rsp+8H]
+ vmovdqu32 xmm2 {k1}, xmmword ptr [rsp+48H]
+ vmovdqa xmmword ptr [rsp], xmm0
+ vmovdqa xmmword ptr [rsp+40H], xmm2
+ add rbx, 64
+ add rdi, 16
+ sub rsi, 2
+final1block:
+ test esi, 1H
+ je unwind
+ vmovdqu xmm0, xmmword ptr [rcx]
+ vmovdqu xmm1, xmmword ptr [rcx+10H]
+ vmovd xmm14, dword ptr [rsp]
+ vpinsrd xmm14, xmm14, dword ptr [rsp+40H], 1
+ vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN], 2
+ vmovdqa xmm15, xmmword ptr [BLAKE3_IV]
+ mov r8, qword ptr [rdi]
+ movzx eax, byte ptr [rbp+80H]
+ or eax, r13d
+ xor edx, edx
+ALIGN 16
+innerloop1:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ vpinsrd xmm3, xmm14, eax, 3
+ vmovdqa xmm2, xmm15
+ vmovups xmm8, xmmword ptr [r8+rdx-40H]
+ vmovups xmm9, xmmword ptr [r8+rdx-30H]
+ vshufps xmm4, xmm8, xmm9, 136
+ vshufps xmm5, xmm8, xmm9, 221
+ vmovups xmm8, xmmword ptr [r8+rdx-20H]
+ vmovups xmm9, xmmword ptr [r8+rdx-10H]
+ vshufps xmm6, xmm8, xmm9, 136
+ vshufps xmm7, xmm8, xmm9, 221
+ vpshufd xmm6, xmm6, 93H
+ vpshufd xmm7, xmm7, 93H
+ mov al, 7
+roundloop1:
+ vpaddd xmm0, xmm0, xmm4
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 16
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 12
+ vpaddd xmm0, xmm0, xmm5
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 8
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 7
+ vpshufd xmm0, xmm0, 93H
+ vpshufd xmm3, xmm3, 4EH
+ vpshufd xmm2, xmm2, 39H
+ vpaddd xmm0, xmm0, xmm6
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 16
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 12
+ vpaddd xmm0, xmm0, xmm7
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 8
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 7
+ vpshufd xmm0, xmm0, 39H
+ vpshufd xmm3, xmm3, 4EH
+ vpshufd xmm2, xmm2, 93H
+ dec al
+ jz endroundloop1
+ vshufps xmm8, xmm4, xmm5, 214
+ vpshufd xmm9, xmm4, 0FH
+ vpshufd xmm4, xmm8, 39H
+ vshufps xmm8, xmm6, xmm7, 250
+ vpblendd xmm9, xmm9, xmm8, 0AAH
+ vpunpcklqdq xmm8, xmm7, xmm5
+ vpblendd xmm8, xmm8, xmm6, 88H
+ vpshufd xmm8, xmm8, 78H
+ vpunpckhdq xmm5, xmm5, xmm7
+ vpunpckldq xmm6, xmm6, xmm5
+ vpshufd xmm7, xmm6, 1EH
+ vmovdqa xmm5, xmm9
+ vmovdqa xmm6, xmm8
+ jmp roundloop1
+endroundloop1:
+ vpxor xmm0, xmm0, xmm2
+ vpxor xmm1, xmm1, xmm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne innerloop1
+ vmovdqu xmmword ptr [rbx], xmm0
+ vmovdqu xmmword ptr [rbx+10H], xmm1
+ jmp unwind
+
+_blake3_hash_many_avx512 ENDP
+blake3_hash_many_avx512 ENDP
+
+ALIGN 16
+blake3_compress_in_place_avx512 PROC
+_blake3_compress_in_place_avx512 PROC
+ sub rsp, 72
+ vmovdqa xmmword ptr [rsp], xmm6
+ vmovdqa xmmword ptr [rsp+10H], xmm7
+ vmovdqa xmmword ptr [rsp+20H], xmm8
+ vmovdqa xmmword ptr [rsp+30H], xmm9
+ vmovdqu xmm0, xmmword ptr [rcx]
+ vmovdqu xmm1, xmmword ptr [rcx+10H]
+ movzx eax, byte ptr [rsp+70H]
+ movzx r8d, r8b
+ shl rax, 32
+ add r8, rax
+ vmovq xmm3, r9
+ vmovq xmm4, r8
+ vpunpcklqdq xmm3, xmm3, xmm4
+ vmovaps xmm2, xmmword ptr [BLAKE3_IV]
+ vmovups xmm8, xmmword ptr [rdx]
+ vmovups xmm9, xmmword ptr [rdx+10H]
+ vshufps xmm4, xmm8, xmm9, 136
+ vshufps xmm5, xmm8, xmm9, 221
+ vmovups xmm8, xmmword ptr [rdx+20H]
+ vmovups xmm9, xmmword ptr [rdx+30H]
+ vshufps xmm6, xmm8, xmm9, 136
+ vshufps xmm7, xmm8, xmm9, 221
+ vpshufd xmm6, xmm6, 93H
+ vpshufd xmm7, xmm7, 93H
+ mov al, 7
+@@:
+ vpaddd xmm0, xmm0, xmm4
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 16
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 12
+ vpaddd xmm0, xmm0, xmm5
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 8
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 7
+ vpshufd xmm0, xmm0, 93H
+ vpshufd xmm3, xmm3, 4EH
+ vpshufd xmm2, xmm2, 39H
+ vpaddd xmm0, xmm0, xmm6
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 16
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 12
+ vpaddd xmm0, xmm0, xmm7
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 8
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 7
+ vpshufd xmm0, xmm0, 39H
+ vpshufd xmm3, xmm3, 4EH
+ vpshufd xmm2, xmm2, 93H
+ dec al
+ jz @F
+ vshufps xmm8, xmm4, xmm5, 214
+ vpshufd xmm9, xmm4, 0FH
+ vpshufd xmm4, xmm8, 39H
+ vshufps xmm8, xmm6, xmm7, 250
+ vpblendd xmm9, xmm9, xmm8, 0AAH
+ vpunpcklqdq xmm8, xmm7, xmm5
+ vpblendd xmm8, xmm8, xmm6, 88H
+ vpshufd xmm8, xmm8, 78H
+ vpunpckhdq xmm5, xmm5, xmm7
+ vpunpckldq xmm6, xmm6, xmm5
+ vpshufd xmm7, xmm6, 1EH
+ vmovdqa xmm5, xmm9
+ vmovdqa xmm6, xmm8
+ jmp @B
+@@:
+ vpxor xmm0, xmm0, xmm2
+ vpxor xmm1, xmm1, xmm3
+ vmovdqu xmmword ptr [rcx], xmm0
+ vmovdqu xmmword ptr [rcx+10H], xmm1
+ vmovdqa xmm6, xmmword ptr [rsp]
+ vmovdqa xmm7, xmmword ptr [rsp+10H]
+ vmovdqa xmm8, xmmword ptr [rsp+20H]
+ vmovdqa xmm9, xmmword ptr [rsp+30H]
+ add rsp, 72
+ ret
+_blake3_compress_in_place_avx512 ENDP
+blake3_compress_in_place_avx512 ENDP
+
+ALIGN 16
+blake3_compress_xof_avx512 PROC
+_blake3_compress_xof_avx512 PROC
+ sub rsp, 72
+ vmovdqa xmmword ptr [rsp], xmm6
+ vmovdqa xmmword ptr [rsp+10H], xmm7
+ vmovdqa xmmword ptr [rsp+20H], xmm8
+ vmovdqa xmmword ptr [rsp+30H], xmm9
+ vmovdqu xmm0, xmmword ptr [rcx]
+ vmovdqu xmm1, xmmword ptr [rcx+10H]
+ movzx eax, byte ptr [rsp+70H]
+ movzx r8d, r8b
+ mov r10, qword ptr [rsp+78H]
+ shl rax, 32
+ add r8, rax
+ vmovq xmm3, r9
+ vmovq xmm4, r8
+ vpunpcklqdq xmm3, xmm3, xmm4
+ vmovaps xmm2, xmmword ptr [BLAKE3_IV]
+ vmovups xmm8, xmmword ptr [rdx]
+ vmovups xmm9, xmmword ptr [rdx+10H]
+ vshufps xmm4, xmm8, xmm9, 136
+ vshufps xmm5, xmm8, xmm9, 221
+ vmovups xmm8, xmmword ptr [rdx+20H]
+ vmovups xmm9, xmmword ptr [rdx+30H]
+ vshufps xmm6, xmm8, xmm9, 136
+ vshufps xmm7, xmm8, xmm9, 221
+ vpshufd xmm6, xmm6, 93H
+ vpshufd xmm7, xmm7, 93H
+ mov al, 7
+@@:
+ vpaddd xmm0, xmm0, xmm4
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 16
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 12
+ vpaddd xmm0, xmm0, xmm5
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 8
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 7
+ vpshufd xmm0, xmm0, 93H
+ vpshufd xmm3, xmm3, 4EH
+ vpshufd xmm2, xmm2, 39H
+ vpaddd xmm0, xmm0, xmm6
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 16
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 12
+ vpaddd xmm0, xmm0, xmm7
+ vpaddd xmm0, xmm0, xmm1
+ vpxord xmm3, xmm3, xmm0
+ vprord xmm3, xmm3, 8
+ vpaddd xmm2, xmm2, xmm3
+ vpxord xmm1, xmm1, xmm2
+ vprord xmm1, xmm1, 7
+ vpshufd xmm0, xmm0, 39H
+ vpshufd xmm3, xmm3, 4EH
+ vpshufd xmm2, xmm2, 93H
+ dec al
+ jz @F
+ vshufps xmm8, xmm4, xmm5, 214
+ vpshufd xmm9, xmm4, 0FH
+ vpshufd xmm4, xmm8, 39H
+ vshufps xmm8, xmm6, xmm7, 250
+ vpblendd xmm9, xmm9, xmm8, 0AAH
+ vpunpcklqdq xmm8, xmm7, xmm5
+ vpblendd xmm8, xmm8, xmm6, 88H
+ vpshufd xmm8, xmm8, 78H
+ vpunpckhdq xmm5, xmm5, xmm7
+ vpunpckldq xmm6, xmm6, xmm5
+ vpshufd xmm7, xmm6, 1EH
+ vmovdqa xmm5, xmm9
+ vmovdqa xmm6, xmm8
+ jmp @B
+@@:
+ vpxor xmm0, xmm0, xmm2
+ vpxor xmm1, xmm1, xmm3
+ vpxor xmm2, xmm2, xmmword ptr [rcx]
+ vpxor xmm3, xmm3, xmmword ptr [rcx+10H]
+ vmovdqu xmmword ptr [r10], xmm0
+ vmovdqu xmmword ptr [r10+10H], xmm1
+ vmovdqu xmmword ptr [r10+20H], xmm2
+ vmovdqu xmmword ptr [r10+30H], xmm3
+ vmovdqa xmm6, xmmword ptr [rsp]
+ vmovdqa xmm7, xmmword ptr [rsp+10H]
+ vmovdqa xmm8, xmmword ptr [rsp+20H]
+ vmovdqa xmm9, xmmword ptr [rsp+30H]
+ add rsp, 72
+ ret
+_blake3_compress_xof_avx512 ENDP
+blake3_compress_xof_avx512 ENDP
+
+_TEXT ENDS
+
+_RDATA SEGMENT READONLY PAGE ALIAS(".rdata") 'CONST'
+ALIGN 64
+INDEX0:
+ dd 0, 1, 2, 3, 16, 17, 18, 19
+ dd 8, 9, 10, 11, 24, 25, 26, 27
+INDEX1:
+ dd 4, 5, 6, 7, 20, 21, 22, 23
+ dd 12, 13, 14, 15, 28, 29, 30, 31
+ADD0:
+ dd 0, 1, 2, 3, 4, 5, 6, 7
+ dd 8, 9, 10, 11, 12, 13, 14, 15
+ADD1:
+ dd 1
+ADD16:
+ dd 16
+BLAKE3_BLOCK_LEN:
+ dd 64
+ALIGN 64
+BLAKE3_IV:
+BLAKE3_IV_0:
+ dd 06A09E667H
+BLAKE3_IV_1:
+ dd 0BB67AE85H
+BLAKE3_IV_2:
+ dd 03C6EF372H
+BLAKE3_IV_3:
+ dd 0A54FF53AH
+
+_RDATA ENDS
+END
diff --git a/llvm/lib/Support/BLAKE3/blake3_dispatch.c b/llvm/lib/Support/BLAKE3/blake3_dispatch.c
new file mode 100644
index 000000000000..e96e714225f4
--- /dev/null
+++ b/llvm/lib/Support/BLAKE3/blake3_dispatch.c
@@ -0,0 +1,277 @@
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "blake3_impl.h"
+
+#if defined(IS_X86)
+#if defined(_MSC_VER)
+#include <intrin.h>
+#elif defined(__GNUC__)
+#include <immintrin.h>
+#else
+#error "Unimplemented!"
+#endif
+#endif
+
+#define MAYBE_UNUSED(x) (void)((x))
+
+#if defined(IS_X86)
+static uint64_t xgetbv(void) {
+#if defined(_MSC_VER)
+ return _xgetbv(0);
+#else
+ uint32_t eax = 0, edx = 0;
+ __asm__ __volatile__("xgetbv\n" : "=a"(eax), "=d"(edx) : "c"(0));
+ return ((uint64_t)edx << 32) | eax;
+#endif
+}
+
+static void cpuid(uint32_t out[4], uint32_t id) {
+#if defined(_MSC_VER)
+ __cpuid((int *)out, id);
+#elif defined(__i386__) || defined(_M_IX86)
+ __asm__ __volatile__("movl %%ebx, %1\n"
+ "cpuid\n"
+ "xchgl %1, %%ebx\n"
+ : "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3])
+ : "a"(id));
+#else
+ __asm__ __volatile__("cpuid\n"
+ : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3])
+ : "a"(id));
+#endif
+}
+
+static void cpuidex(uint32_t out[4], uint32_t id, uint32_t sid) {
+#if defined(_MSC_VER)
+ __cpuidex((int *)out, id, sid);
+#elif defined(__i386__) || defined(_M_IX86)
+ __asm__ __volatile__("movl %%ebx, %1\n"
+ "cpuid\n"
+ "xchgl %1, %%ebx\n"
+ : "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3])
+ : "a"(id), "c"(sid));
+#else
+ __asm__ __volatile__("cpuid\n"
+ : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3])
+ : "a"(id), "c"(sid));
+#endif
+}
+
+#endif
+
+enum cpu_feature {
+ SSE2 = 1 << 0,
+ SSSE3 = 1 << 1,
+ SSE41 = 1 << 2,
+ AVX = 1 << 3,
+ AVX2 = 1 << 4,
+ AVX512F = 1 << 5,
+ AVX512VL = 1 << 6,
+ /* ... */
+ UNDEFINED = 1 << 30
+};
+
+#if !defined(BLAKE3_TESTING)
+static /* Allow the variable to be controlled manually for testing */
+#endif
+ enum cpu_feature g_cpu_features = UNDEFINED;
+
+LLVM_ATTRIBUTE_USED
+#if !defined(BLAKE3_TESTING)
+static
+#endif
+ enum cpu_feature
+ get_cpu_features(void) {
+
+ if (g_cpu_features != UNDEFINED) {
+ return g_cpu_features;
+ } else {
+#if defined(IS_X86)
+ uint32_t regs[4] = {0};
+ uint32_t *eax = &regs[0], *ebx = &regs[1], *ecx = &regs[2], *edx = &regs[3];
+ (void)edx;
+ enum cpu_feature features = 0;
+ cpuid(regs, 0);
+ const int max_id = *eax;
+ cpuid(regs, 1);
+#if defined(__amd64__) || defined(_M_X64)
+ features |= SSE2;
+#else
+ if (*edx & (1UL << 26))
+ features |= SSE2;
+#endif
+ if (*ecx & (1UL << 0))
+ features |= SSSE3;
+ if (*ecx & (1UL << 19))
+ features |= SSE41;
+
+ if (*ecx & (1UL << 27)) { // OSXSAVE
+ const uint64_t mask = xgetbv();
+ if ((mask & 6) == 6) { // SSE and AVX states
+ if (*ecx & (1UL << 28))
+ features |= AVX;
+ if (max_id >= 7) {
+ cpuidex(regs, 7, 0);
+ if (*ebx & (1UL << 5))
+ features |= AVX2;
+ if ((mask & 224) == 224) { // Opmask, ZMM_Hi256, Hi16_Zmm
+ if (*ebx & (1UL << 31))
+ features |= AVX512VL;
+ if (*ebx & (1UL << 16))
+ features |= AVX512F;
+ }
+ }
+ }
+ }
+ g_cpu_features = features;
+ return features;
+#else
+ /* How to detect NEON? */
+ return 0;
+#endif
+ }
+}
+
+void blake3_compress_in_place(uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN],
+ uint8_t block_len, uint64_t counter,
+ uint8_t flags) {
+#if defined(IS_X86)
+ const enum cpu_feature features = get_cpu_features();
+ MAYBE_UNUSED(features);
+#if !defined(BLAKE3_NO_AVX512)
+ if (features & AVX512VL) {
+ blake3_compress_in_place_avx512(cv, block, block_len, counter, flags);
+ return;
+ }
+#endif
+#if !defined(BLAKE3_NO_SSE41)
+ if (features & SSE41) {
+ blake3_compress_in_place_sse41(cv, block, block_len, counter, flags);
+ return;
+ }
+#endif
+#if !defined(BLAKE3_NO_SSE2)
+ if (features & SSE2) {
+ blake3_compress_in_place_sse2(cv, block, block_len, counter, flags);
+ return;
+ }
+#endif
+#endif
+ blake3_compress_in_place_portable(cv, block, block_len, counter, flags);
+}
+
+void blake3_compress_xof(const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN],
+ uint8_t block_len, uint64_t counter, uint8_t flags,
+ uint8_t out[64]) {
+#if defined(IS_X86)
+ const enum cpu_feature features = get_cpu_features();
+ MAYBE_UNUSED(features);
+#if !defined(BLAKE3_NO_AVX512)
+ if (features & AVX512VL) {
+ blake3_compress_xof_avx512(cv, block, block_len, counter, flags, out);
+ return;
+ }
+#endif
+#if !defined(BLAKE3_NO_SSE41)
+ if (features & SSE41) {
+ blake3_compress_xof_sse41(cv, block, block_len, counter, flags, out);
+ return;
+ }
+#endif
+#if !defined(BLAKE3_NO_SSE2)
+ if (features & SSE2) {
+ blake3_compress_xof_sse2(cv, block, block_len, counter, flags, out);
+ return;
+ }
+#endif
+#endif
+ blake3_compress_xof_portable(cv, block, block_len, counter, flags, out);
+}
+
+void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
+ size_t blocks, const uint32_t key[8], uint64_t counter,
+ bool increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
+#if defined(IS_X86)
+ const enum cpu_feature features = get_cpu_features();
+ MAYBE_UNUSED(features);
+#if !defined(BLAKE3_NO_AVX512)
+ if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {
+ blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,
+ increment_counter, flags, flags_start, flags_end,
+ out);
+ return;
+ }
+#endif
+#if !defined(BLAKE3_NO_AVX2)
+ if (features & AVX2) {
+ blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter,
+ increment_counter, flags, flags_start, flags_end,
+ out);
+ return;
+ }
+#endif
+#if !defined(BLAKE3_NO_SSE41)
+ if (features & SSE41) {
+ blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter,
+ increment_counter, flags, flags_start, flags_end,
+ out);
+ return;
+ }
+#endif
+#if !defined(BLAKE3_NO_SSE2)
+ if (features & SSE2) {
+ blake3_hash_many_sse2(inputs, num_inputs, blocks, key, counter,
+ increment_counter, flags, flags_start, flags_end,
+ out);
+ return;
+ }
+#endif
+#endif
+
+#if BLAKE3_USE_NEON == 1
+ blake3_hash_many_neon(inputs, num_inputs, blocks, key, counter,
+ increment_counter, flags, flags_start, flags_end, out);
+ return;
+#endif
+
+ blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter,
+ increment_counter, flags, flags_start, flags_end,
+ out);
+}
+
+// The dynamically detected SIMD degree of the current platform.
+size_t blake3_simd_degree(void) {
+#if defined(IS_X86)
+ const enum cpu_feature features = get_cpu_features();
+ MAYBE_UNUSED(features);
+#if !defined(BLAKE3_NO_AVX512)
+ if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {
+ return 16;
+ }
+#endif
+#if !defined(BLAKE3_NO_AVX2)
+ if (features & AVX2) {
+ return 8;
+ }
+#endif
+#if !defined(BLAKE3_NO_SSE41)
+ if (features & SSE41) {
+ return 4;
+ }
+#endif
+#if !defined(BLAKE3_NO_SSE2)
+ if (features & SSE2) {
+ return 4;
+ }
+#endif
+#endif
+#if BLAKE3_USE_NEON == 1
+ return 4;
+#endif
+ return 1;
+}
diff --git a/llvm/lib/Support/BLAKE3/blake3_impl.h b/llvm/lib/Support/BLAKE3/blake3_impl.h
new file mode 100644
index 000000000000..180d0a6eeda8
--- /dev/null
+++ b/llvm/lib/Support/BLAKE3/blake3_impl.h
@@ -0,0 +1,312 @@
+#ifndef BLAKE3_IMPL_H
+#define BLAKE3_IMPL_H
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "llvm-c/blake3.h"
+// For \p LLVM_LIBRARY_VISIBILITY
+#include "llvm/Support/Compiler.h"
+
+// Remove the 'llvm_' prefix for the rest of the internal implementation.
+#define BLAKE3_VERSION_STRING LLVM_BLAKE3_VERSION_STRING
+#define BLAKE3_KEY_LEN LLVM_BLAKE3_KEY_LEN
+#define BLAKE3_OUT_LEN LLVM_BLAKE3_OUT_LEN
+#define BLAKE3_BLOCK_LEN LLVM_BLAKE3_BLOCK_LEN
+#define BLAKE3_CHUNK_LEN LLVM_BLAKE3_CHUNK_LEN
+#define BLAKE3_MAX_DEPTH LLVM_BLAKE3_MAX_DEPTH
+#define blake3_hasher llvm_blake3_hasher
+#define blake3_chunk_state llvm_blake3_chunk_state
+
+// internal flags
+enum blake3_flags {
+ CHUNK_START = 1 << 0,
+ CHUNK_END = 1 << 1,
+ PARENT = 1 << 2,
+ ROOT = 1 << 3,
+ KEYED_HASH = 1 << 4,
+ DERIVE_KEY_CONTEXT = 1 << 5,
+ DERIVE_KEY_MATERIAL = 1 << 6,
+};
+
+// This C implementation tries to support recent versions of GCC, Clang, and
+// MSVC.
+#if defined(_MSC_VER)
+#define INLINE static __forceinline
+#else
+#define INLINE static inline __attribute__((always_inline))
+#endif
+
+#if defined(__x86_64__) || defined(_M_X64)
+#define IS_X86
+#define IS_X86_64
+#endif
+
+#if defined(__i386__) || defined(_M_IX86)
+#define IS_X86
+#define IS_X86_32
+#endif
+
+#if defined(__aarch64__) || defined(_M_ARM64)
+#define IS_AARCH64
+#endif
+
+#if defined(IS_X86)
+#if defined(_MSC_VER)
+#include <intrin.h>
+#endif
+#include <immintrin.h>
+#endif
+
+#if !defined(BLAKE3_USE_NEON)
+ // If BLAKE3_USE_NEON not manually set, autodetect based on AArch64ness
+ #if defined(IS_AARCH64)
+ #define BLAKE3_USE_NEON 1
+ #else
+ #define BLAKE3_USE_NEON 0
+ #endif
+#endif
+
+#if defined(IS_X86)
+#define MAX_SIMD_DEGREE 16
+#elif BLAKE3_USE_NEON == 1
+#define MAX_SIMD_DEGREE 4
+#else
+#define MAX_SIMD_DEGREE 1
+#endif
+
+// There are some places where we want a static size that's equal to the
+// MAX_SIMD_DEGREE, but also at least 2.
+#define MAX_SIMD_DEGREE_OR_2 (MAX_SIMD_DEGREE > 2 ? MAX_SIMD_DEGREE : 2)
+
+static const uint32_t IV[8] = {0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL,
+ 0xA54FF53AUL, 0x510E527FUL, 0x9B05688CUL,
+ 0x1F83D9ABUL, 0x5BE0CD19UL};
+
+static const uint8_t MSG_SCHEDULE[7][16] = {
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
+ {2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8},
+ {3, 4, 10, 12, 13, 2, 7, 14, 6, 5, 9, 0, 11, 15, 8, 1},
+ {10, 7, 12, 9, 14, 3, 13, 15, 4, 0, 11, 2, 5, 8, 1, 6},
+ {12, 13, 9, 11, 15, 10, 14, 8, 7, 2, 5, 3, 0, 1, 6, 4},
+ {9, 14, 11, 5, 8, 12, 15, 1, 13, 3, 0, 10, 2, 6, 4, 7},
+ {11, 15, 5, 0, 1, 9, 8, 6, 14, 10, 2, 12, 3, 4, 7, 13},
+};
+
+/* Find index of the highest set bit */
+/* x is assumed to be nonzero. */
+static unsigned int highest_one(uint64_t x) {
+#if defined(__GNUC__) || defined(__clang__)
+ return 63 ^ __builtin_clzll(x);
+#elif defined(_MSC_VER) && defined(IS_X86_64)
+ unsigned long index;
+ _BitScanReverse64(&index, x);
+ return index;
+#elif defined(_MSC_VER) && defined(IS_X86_32)
+ if(x >> 32) {
+ unsigned long index;
+ _BitScanReverse(&index, (unsigned long)(x >> 32));
+ return 32 + index;
+ } else {
+ unsigned long index;
+ _BitScanReverse(&index, (unsigned long)x);
+ return index;
+ }
+#else
+ unsigned int c = 0;
+ if(x & 0xffffffff00000000ULL) { x >>= 32; c += 32; }
+ if(x & 0x00000000ffff0000ULL) { x >>= 16; c += 16; }
+ if(x & 0x000000000000ff00ULL) { x >>= 8; c += 8; }
+ if(x & 0x00000000000000f0ULL) { x >>= 4; c += 4; }
+ if(x & 0x000000000000000cULL) { x >>= 2; c += 2; }
+ if(x & 0x0000000000000002ULL) { c += 1; }
+ return c;
+#endif
+}
+
+// Count the number of 1 bits.
+INLINE unsigned int popcnt(uint64_t x) {
+#if defined(__GNUC__) || defined(__clang__)
+ return __builtin_popcountll(x);
+#else
+ unsigned int count = 0;
+ while (x != 0) {
+ count += 1;
+ x &= x - 1;
+ }
+ return count;
+#endif
+}
+
+// Largest power of two less than or equal to x. As a special case, returns 1
+// when x is 0.
+INLINE uint64_t round_down_to_power_of_2(uint64_t x) {
+ return 1ULL << highest_one(x | 1);
+}
+
+INLINE uint32_t counter_low(uint64_t counter) { return (uint32_t)counter; }
+
+INLINE uint32_t counter_high(uint64_t counter) {
+ return (uint32_t)(counter >> 32);
+}
+
+INLINE uint32_t load32(const void *src) {
+ const uint8_t *p = (const uint8_t *)src;
+ return ((uint32_t)(p[0]) << 0) | ((uint32_t)(p[1]) << 8) |
+ ((uint32_t)(p[2]) << 16) | ((uint32_t)(p[3]) << 24);
+}
+
+INLINE void load_key_words(const uint8_t key[BLAKE3_KEY_LEN],
+ uint32_t key_words[8]) {
+ key_words[0] = load32(&key[0 * 4]);
+ key_words[1] = load32(&key[1 * 4]);
+ key_words[2] = load32(&key[2 * 4]);
+ key_words[3] = load32(&key[3 * 4]);
+ key_words[4] = load32(&key[4 * 4]);
+ key_words[5] = load32(&key[5 * 4]);
+ key_words[6] = load32(&key[6 * 4]);
+ key_words[7] = load32(&key[7 * 4]);
+}
+
+INLINE void store32(void *dst, uint32_t w) {
+ uint8_t *p = (uint8_t *)dst;
+ p[0] = (uint8_t)(w >> 0);
+ p[1] = (uint8_t)(w >> 8);
+ p[2] = (uint8_t)(w >> 16);
+ p[3] = (uint8_t)(w >> 24);
+}
+
+INLINE void store_cv_words(uint8_t bytes_out[32], uint32_t cv_words[8]) {
+ store32(&bytes_out[0 * 4], cv_words[0]);
+ store32(&bytes_out[1 * 4], cv_words[1]);
+ store32(&bytes_out[2 * 4], cv_words[2]);
+ store32(&bytes_out[3 * 4], cv_words[3]);
+ store32(&bytes_out[4 * 4], cv_words[4]);
+ store32(&bytes_out[5 * 4], cv_words[5]);
+ store32(&bytes_out[6 * 4], cv_words[6]);
+ store32(&bytes_out[7 * 4], cv_words[7]);
+}
+
+LLVM_LIBRARY_VISIBILITY
+void blake3_compress_in_place(uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN],
+ uint8_t block_len, uint64_t counter,
+ uint8_t flags);
+
+LLVM_LIBRARY_VISIBILITY
+void blake3_compress_xof(const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN],
+ uint8_t block_len, uint64_t counter, uint8_t flags,
+ uint8_t out[64]);
+
+LLVM_LIBRARY_VISIBILITY
+void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
+ size_t blocks, const uint32_t key[8], uint64_t counter,
+ bool increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t *out);
+
+LLVM_LIBRARY_VISIBILITY
+size_t blake3_simd_degree(void);
+
+
+// Declarations for implementation-specific functions.
+LLVM_LIBRARY_VISIBILITY
+void blake3_compress_in_place_portable(uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN],
+ uint8_t block_len, uint64_t counter,
+ uint8_t flags);
+
+LLVM_LIBRARY_VISIBILITY
+void blake3_compress_xof_portable(const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN],
+ uint8_t block_len, uint64_t counter,
+ uint8_t flags, uint8_t out[64]);
+
+LLVM_LIBRARY_VISIBILITY
+void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
+ size_t blocks, const uint32_t key[8],
+ uint64_t counter, bool increment_counter,
+ uint8_t flags, uint8_t flags_start,
+ uint8_t flags_end, uint8_t *out);
+
+#if defined(IS_X86)
+#if !defined(BLAKE3_NO_SSE2)
+LLVM_LIBRARY_VISIBILITY
+void blake3_compress_in_place_sse2(uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN],
+ uint8_t block_len, uint64_t counter,
+ uint8_t flags);
+LLVM_LIBRARY_VISIBILITY
+void blake3_compress_xof_sse2(const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN],
+ uint8_t block_len, uint64_t counter,
+ uint8_t flags, uint8_t out[64]);
+LLVM_LIBRARY_VISIBILITY
+void blake3_hash_many_sse2(const uint8_t *const *inputs, size_t num_inputs,
+ size_t blocks, const uint32_t key[8],
+ uint64_t counter, bool increment_counter,
+ uint8_t flags, uint8_t flags_start,
+ uint8_t flags_end, uint8_t *out);
+#endif
+#if !defined(BLAKE3_NO_SSE41)
+LLVM_LIBRARY_VISIBILITY
+void blake3_compress_in_place_sse41(uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN],
+ uint8_t block_len, uint64_t counter,
+ uint8_t flags);
+LLVM_LIBRARY_VISIBILITY
+void blake3_compress_xof_sse41(const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN],
+ uint8_t block_len, uint64_t counter,
+ uint8_t flags, uint8_t out[64]);
+LLVM_LIBRARY_VISIBILITY
+void blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs,
+ size_t blocks, const uint32_t key[8],
+ uint64_t counter, bool increment_counter,
+ uint8_t flags, uint8_t flags_start,
+ uint8_t flags_end, uint8_t *out);
+#endif
+#if !defined(BLAKE3_NO_AVX2)
+LLVM_LIBRARY_VISIBILITY
+void blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs,
+ size_t blocks, const uint32_t key[8],
+ uint64_t counter, bool increment_counter,
+ uint8_t flags, uint8_t flags_start,
+ uint8_t flags_end, uint8_t *out);
+#endif
+#if !defined(BLAKE3_NO_AVX512)
+LLVM_LIBRARY_VISIBILITY
+void blake3_compress_in_place_avx512(uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN],
+ uint8_t block_len, uint64_t counter,
+ uint8_t flags);
+
+LLVM_LIBRARY_VISIBILITY
+void blake3_compress_xof_avx512(const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN],
+ uint8_t block_len, uint64_t counter,
+ uint8_t flags, uint8_t out[64]);
+
+LLVM_LIBRARY_VISIBILITY
+void blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs,
+ size_t blocks, const uint32_t key[8],
+ uint64_t counter, bool increment_counter,
+ uint8_t flags, uint8_t flags_start,
+ uint8_t flags_end, uint8_t *out);
+#endif
+#endif
+
+#if BLAKE3_USE_NEON == 1
+LLVM_LIBRARY_VISIBILITY
+void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs,
+ size_t blocks, const uint32_t key[8],
+ uint64_t counter, bool increment_counter,
+ uint8_t flags, uint8_t flags_start,
+ uint8_t flags_end, uint8_t *out);
+#endif
+
+
+#endif /* BLAKE3_IMPL_H */
diff --git a/llvm/lib/Support/BLAKE3/blake3_neon.c b/llvm/lib/Support/BLAKE3/blake3_neon.c
new file mode 100644
index 000000000000..380bbfc3e466
--- /dev/null
+++ b/llvm/lib/Support/BLAKE3/blake3_neon.c
@@ -0,0 +1,356 @@
+#include "blake3_impl.h"
+
+#if BLAKE3_USE_NEON
+
+#include <arm_neon.h>
+
+#ifdef __ARM_BIG_ENDIAN
+#error "This implementation only supports little-endian ARM."
+// It might be that all we need for big-endian support here is to get the loads
+// and stores right, but step zero would be finding a way to test it in CI.
+#endif
+
+INLINE uint32x4_t loadu_128(const uint8_t src[16]) {
+ // vld1q_u32 has alignment requirements. Don't use it.
+ uint32x4_t x;
+ memcpy(&x, src, 16);
+ return x;
+}
+
+INLINE void storeu_128(uint32x4_t src, uint8_t dest[16]) {
+ // vst1q_u32 has alignment requirements. Don't use it.
+ memcpy(dest, &src, 16);
+}
+
+INLINE uint32x4_t add_128(uint32x4_t a, uint32x4_t b) {
+ return vaddq_u32(a, b);
+}
+
+INLINE uint32x4_t xor_128(uint32x4_t a, uint32x4_t b) {
+ return veorq_u32(a, b);
+}
+
+INLINE uint32x4_t set1_128(uint32_t x) { return vld1q_dup_u32(&x); }
+
+INLINE uint32x4_t set4(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
+ uint32_t array[4] = {a, b, c, d};
+ return vld1q_u32(array);
+}
+
+INLINE uint32x4_t rot16_128(uint32x4_t x) {
+ return vorrq_u32(vshrq_n_u32(x, 16), vshlq_n_u32(x, 32 - 16));
+}
+
+INLINE uint32x4_t rot12_128(uint32x4_t x) {
+ return vorrq_u32(vshrq_n_u32(x, 12), vshlq_n_u32(x, 32 - 12));
+}
+
+INLINE uint32x4_t rot8_128(uint32x4_t x) {
+ return vorrq_u32(vshrq_n_u32(x, 8), vshlq_n_u32(x, 32 - 8));
+}
+
+INLINE uint32x4_t rot7_128(uint32x4_t x) {
+ return vorrq_u32(vshrq_n_u32(x, 7), vshlq_n_u32(x, 32 - 7));
+}
+
+// TODO: compress_neon
+
+// TODO: hash2_neon
+
+/*
+ * ----------------------------------------------------------------------------
+ * hash4_neon
+ * ----------------------------------------------------------------------------
+ */
+
+INLINE void round_fn4(uint32x4_t v[16], uint32x4_t m[16], size_t r) {
+ v[0] = add_128(v[0], m[(size_t)MSG_SCHEDULE[r][0]]);
+ v[1] = add_128(v[1], m[(size_t)MSG_SCHEDULE[r][2]]);
+ v[2] = add_128(v[2], m[(size_t)MSG_SCHEDULE[r][4]]);
+ v[3] = add_128(v[3], m[(size_t)MSG_SCHEDULE[r][6]]);
+ v[0] = add_128(v[0], v[4]);
+ v[1] = add_128(v[1], v[5]);
+ v[2] = add_128(v[2], v[6]);
+ v[3] = add_128(v[3], v[7]);
+ v[12] = xor_128(v[12], v[0]);
+ v[13] = xor_128(v[13], v[1]);
+ v[14] = xor_128(v[14], v[2]);
+ v[15] = xor_128(v[15], v[3]);
+ v[12] = rot16_128(v[12]);
+ v[13] = rot16_128(v[13]);
+ v[14] = rot16_128(v[14]);
+ v[15] = rot16_128(v[15]);
+ v[8] = add_128(v[8], v[12]);
+ v[9] = add_128(v[9], v[13]);
+ v[10] = add_128(v[10], v[14]);
+ v[11] = add_128(v[11], v[15]);
+ v[4] = xor_128(v[4], v[8]);
+ v[5] = xor_128(v[5], v[9]);
+ v[6] = xor_128(v[6], v[10]);
+ v[7] = xor_128(v[7], v[11]);
+ v[4] = rot12_128(v[4]);
+ v[5] = rot12_128(v[5]);
+ v[6] = rot12_128(v[6]);
+ v[7] = rot12_128(v[7]);
+ v[0] = add_128(v[0], m[(size_t)MSG_SCHEDULE[r][1]]);
+ v[1] = add_128(v[1], m[(size_t)MSG_SCHEDULE[r][3]]);
+ v[2] = add_128(v[2], m[(size_t)MSG_SCHEDULE[r][5]]);
+ v[3] = add_128(v[3], m[(size_t)MSG_SCHEDULE[r][7]]);
+ v[0] = add_128(v[0], v[4]);
+ v[1] = add_128(v[1], v[5]);
+ v[2] = add_128(v[2], v[6]);
+ v[3] = add_128(v[3], v[7]);
+ v[12] = xor_128(v[12], v[0]);
+ v[13] = xor_128(v[13], v[1]);
+ v[14] = xor_128(v[14], v[2]);
+ v[15] = xor_128(v[15], v[3]);
+ v[12] = rot8_128(v[12]);
+ v[13] = rot8_128(v[13]);
+ v[14] = rot8_128(v[14]);
+ v[15] = rot8_128(v[15]);
+ v[8] = add_128(v[8], v[12]);
+ v[9] = add_128(v[9], v[13]);
+ v[10] = add_128(v[10], v[14]);
+ v[11] = add_128(v[11], v[15]);
+ v[4] = xor_128(v[4], v[8]);
+ v[5] = xor_128(v[5], v[9]);
+ v[6] = xor_128(v[6], v[10]);
+ v[7] = xor_128(v[7], v[11]);
+ v[4] = rot7_128(v[4]);
+ v[5] = rot7_128(v[5]);
+ v[6] = rot7_128(v[6]);
+ v[7] = rot7_128(v[7]);
+
+ v[0] = add_128(v[0], m[(size_t)MSG_SCHEDULE[r][8]]);
+ v[1] = add_128(v[1], m[(size_t)MSG_SCHEDULE[r][10]]);
+ v[2] = add_128(v[2], m[(size_t)MSG_SCHEDULE[r][12]]);
+ v[3] = add_128(v[3], m[(size_t)MSG_SCHEDULE[r][14]]);
+ v[0] = add_128(v[0], v[5]);
+ v[1] = add_128(v[1], v[6]);
+ v[2] = add_128(v[2], v[7]);
+ v[3] = add_128(v[3], v[4]);
+ v[15] = xor_128(v[15], v[0]);
+ v[12] = xor_128(v[12], v[1]);
+ v[13] = xor_128(v[13], v[2]);
+ v[14] = xor_128(v[14], v[3]);
+ v[15] = rot16_128(v[15]);
+ v[12] = rot16_128(v[12]);
+ v[13] = rot16_128(v[13]);
+ v[14] = rot16_128(v[14]);
+ v[10] = add_128(v[10], v[15]);
+ v[11] = add_128(v[11], v[12]);
+ v[8] = add_128(v[8], v[13]);
+ v[9] = add_128(v[9], v[14]);
+ v[5] = xor_128(v[5], v[10]);
+ v[6] = xor_128(v[6], v[11]);
+ v[7] = xor_128(v[7], v[8]);
+ v[4] = xor_128(v[4], v[9]);
+ v[5] = rot12_128(v[5]);
+ v[6] = rot12_128(v[6]);
+ v[7] = rot12_128(v[7]);
+ v[4] = rot12_128(v[4]);
+ v[0] = add_128(v[0], m[(size_t)MSG_SCHEDULE[r][9]]);
+ v[1] = add_128(v[1], m[(size_t)MSG_SCHEDULE[r][11]]);
+ v[2] = add_128(v[2], m[(size_t)MSG_SCHEDULE[r][13]]);
+ v[3] = add_128(v[3], m[(size_t)MSG_SCHEDULE[r][15]]);
+ v[0] = add_128(v[0], v[5]);
+ v[1] = add_128(v[1], v[6]);
+ v[2] = add_128(v[2], v[7]);
+ v[3] = add_128(v[3], v[4]);
+ v[15] = xor_128(v[15], v[0]);
+ v[12] = xor_128(v[12], v[1]);
+ v[13] = xor_128(v[13], v[2]);
+ v[14] = xor_128(v[14], v[3]);
+ v[15] = rot8_128(v[15]);
+ v[12] = rot8_128(v[12]);
+ v[13] = rot8_128(v[13]);
+ v[14] = rot8_128(v[14]);
+ v[10] = add_128(v[10], v[15]);
+ v[11] = add_128(v[11], v[12]);
+ v[8] = add_128(v[8], v[13]);
+ v[9] = add_128(v[9], v[14]);
+ v[5] = xor_128(v[5], v[10]);
+ v[6] = xor_128(v[6], v[11]);
+ v[7] = xor_128(v[7], v[8]);
+ v[4] = xor_128(v[4], v[9]);
+ v[5] = rot7_128(v[5]);
+ v[6] = rot7_128(v[6]);
+ v[7] = rot7_128(v[7]);
+ v[4] = rot7_128(v[4]);
+}
+
+INLINE void transpose_vecs_128(uint32x4_t vecs[4]) {
+ // Individually transpose the four 2x2 sub-matrices in each corner.
+ uint32x4x2_t rows01 = vtrnq_u32(vecs[0], vecs[1]);
+ uint32x4x2_t rows23 = vtrnq_u32(vecs[2], vecs[3]);
+
+ // Swap the top-right and bottom-left 2x2s (which just got transposed).
+ vecs[0] =
+ vcombine_u32(vget_low_u32(rows01.val[0]), vget_low_u32(rows23.val[0]));
+ vecs[1] =
+ vcombine_u32(vget_low_u32(rows01.val[1]), vget_low_u32(rows23.val[1]));
+ vecs[2] =
+ vcombine_u32(vget_high_u32(rows01.val[0]), vget_high_u32(rows23.val[0]));
+ vecs[3] =
+ vcombine_u32(vget_high_u32(rows01.val[1]), vget_high_u32(rows23.val[1]));
+}
+
+INLINE void transpose_msg_vecs4(const uint8_t *const *inputs,
+ size_t block_offset, uint32x4_t out[16]) {
+ out[0] = loadu_128(&inputs[0][block_offset + 0 * sizeof(uint32x4_t)]);
+ out[1] = loadu_128(&inputs[1][block_offset + 0 * sizeof(uint32x4_t)]);
+ out[2] = loadu_128(&inputs[2][block_offset + 0 * sizeof(uint32x4_t)]);
+ out[3] = loadu_128(&inputs[3][block_offset + 0 * sizeof(uint32x4_t)]);
+ out[4] = loadu_128(&inputs[0][block_offset + 1 * sizeof(uint32x4_t)]);
+ out[5] = loadu_128(&inputs[1][block_offset + 1 * sizeof(uint32x4_t)]);
+ out[6] = loadu_128(&inputs[2][block_offset + 1 * sizeof(uint32x4_t)]);
+ out[7] = loadu_128(&inputs[3][block_offset + 1 * sizeof(uint32x4_t)]);
+ out[8] = loadu_128(&inputs[0][block_offset + 2 * sizeof(uint32x4_t)]);
+ out[9] = loadu_128(&inputs[1][block_offset + 2 * sizeof(uint32x4_t)]);
+ out[10] = loadu_128(&inputs[2][block_offset + 2 * sizeof(uint32x4_t)]);
+ out[11] = loadu_128(&inputs[3][block_offset + 2 * sizeof(uint32x4_t)]);
+ out[12] = loadu_128(&inputs[0][block_offset + 3 * sizeof(uint32x4_t)]);
+ out[13] = loadu_128(&inputs[1][block_offset + 3 * sizeof(uint32x4_t)]);
+ out[14] = loadu_128(&inputs[2][block_offset + 3 * sizeof(uint32x4_t)]);
+ out[15] = loadu_128(&inputs[3][block_offset + 3 * sizeof(uint32x4_t)]);
+ transpose_vecs_128(&out[0]);
+ transpose_vecs_128(&out[4]);
+ transpose_vecs_128(&out[8]);
+ transpose_vecs_128(&out[12]);
+}
+
+INLINE void load_counters4(uint64_t counter, bool increment_counter,
+ uint32x4_t *out_low, uint32x4_t *out_high) {
+ uint64_t mask = (increment_counter ? ~0 : 0);
+ *out_low = set4(
+ counter_low(counter + (mask & 0)), counter_low(counter + (mask & 1)),
+ counter_low(counter + (mask & 2)), counter_low(counter + (mask & 3)));
+ *out_high = set4(
+ counter_high(counter + (mask & 0)), counter_high(counter + (mask & 1)),
+ counter_high(counter + (mask & 2)), counter_high(counter + (mask & 3)));
+}
+
+static
+void blake3_hash4_neon(const uint8_t *const *inputs, size_t blocks,
+ const uint32_t key[8], uint64_t counter,
+ bool increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
+ uint32x4_t h_vecs[8] = {
+ set1_128(key[0]), set1_128(key[1]), set1_128(key[2]), set1_128(key[3]),
+ set1_128(key[4]), set1_128(key[5]), set1_128(key[6]), set1_128(key[7]),
+ };
+ uint32x4_t counter_low_vec, counter_high_vec;
+ load_counters4(counter, increment_counter, &counter_low_vec,
+ &counter_high_vec);
+ uint8_t block_flags = flags | flags_start;
+
+ for (size_t block = 0; block < blocks; block++) {
+ if (block + 1 == blocks) {
+ block_flags |= flags_end;
+ }
+ uint32x4_t block_len_vec = set1_128(BLAKE3_BLOCK_LEN);
+ uint32x4_t block_flags_vec = set1_128(block_flags);
+ uint32x4_t msg_vecs[16];
+ transpose_msg_vecs4(inputs, block * BLAKE3_BLOCK_LEN, msg_vecs);
+
+ uint32x4_t v[16] = {
+ h_vecs[0], h_vecs[1], h_vecs[2], h_vecs[3],
+ h_vecs[4], h_vecs[5], h_vecs[6], h_vecs[7],
+ set1_128(IV[0]), set1_128(IV[1]), set1_128(IV[2]), set1_128(IV[3]),
+ counter_low_vec, counter_high_vec, block_len_vec, block_flags_vec,
+ };
+ round_fn4(v, msg_vecs, 0);
+ round_fn4(v, msg_vecs, 1);
+ round_fn4(v, msg_vecs, 2);
+ round_fn4(v, msg_vecs, 3);
+ round_fn4(v, msg_vecs, 4);
+ round_fn4(v, msg_vecs, 5);
+ round_fn4(v, msg_vecs, 6);
+ h_vecs[0] = xor_128(v[0], v[8]);
+ h_vecs[1] = xor_128(v[1], v[9]);
+ h_vecs[2] = xor_128(v[2], v[10]);
+ h_vecs[3] = xor_128(v[3], v[11]);
+ h_vecs[4] = xor_128(v[4], v[12]);
+ h_vecs[5] = xor_128(v[5], v[13]);
+ h_vecs[6] = xor_128(v[6], v[14]);
+ h_vecs[7] = xor_128(v[7], v[15]);
+
+ block_flags = flags;
+ }
+
+ transpose_vecs_128(&h_vecs[0]);
+ transpose_vecs_128(&h_vecs[4]);
+ // The first four vecs now contain the first half of each output, and the
+ // second four vecs contain the second half of each output.
+ storeu_128(h_vecs[0], &out[0 * sizeof(uint32x4_t)]);
+ storeu_128(h_vecs[4], &out[1 * sizeof(uint32x4_t)]);
+ storeu_128(h_vecs[1], &out[2 * sizeof(uint32x4_t)]);
+ storeu_128(h_vecs[5], &out[3 * sizeof(uint32x4_t)]);
+ storeu_128(h_vecs[2], &out[4 * sizeof(uint32x4_t)]);
+ storeu_128(h_vecs[6], &out[5 * sizeof(uint32x4_t)]);
+ storeu_128(h_vecs[3], &out[6 * sizeof(uint32x4_t)]);
+ storeu_128(h_vecs[7], &out[7 * sizeof(uint32x4_t)]);
+}
+
+/*
+ * ----------------------------------------------------------------------------
+ * hash_many_neon
+ * ----------------------------------------------------------------------------
+ */
+
+void blake3_compress_in_place_portable(uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN],
+ uint8_t block_len, uint64_t counter,
+ uint8_t flags);
+
+INLINE void hash_one_neon(const uint8_t *input, size_t blocks,
+ const uint32_t key[8], uint64_t counter,
+ uint8_t flags, uint8_t flags_start, uint8_t flags_end,
+ uint8_t out[BLAKE3_OUT_LEN]) {
+ uint32_t cv[8];
+ memcpy(cv, key, BLAKE3_KEY_LEN);
+ uint8_t block_flags = flags | flags_start;
+ while (blocks > 0) {
+ if (blocks == 1) {
+ block_flags |= flags_end;
+ }
+ // TODO: Implement compress_neon. However note that according to
+ // https://github.com/BLAKE2/BLAKE2/commit/7965d3e6e1b4193438b8d3a656787587d2579227,
+ // compress_neon might not be any faster than compress_portable.
+ blake3_compress_in_place_portable(cv, input, BLAKE3_BLOCK_LEN, counter,
+ block_flags);
+ input = &input[BLAKE3_BLOCK_LEN];
+ blocks -= 1;
+ block_flags = flags;
+ }
+ memcpy(out, cv, BLAKE3_OUT_LEN);
+}
+
+void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs,
+ size_t blocks, const uint32_t key[8],
+ uint64_t counter, bool increment_counter,
+ uint8_t flags, uint8_t flags_start,
+ uint8_t flags_end, uint8_t *out) {
+ while (num_inputs >= 4) {
+ blake3_hash4_neon(inputs, blocks, key, counter, increment_counter, flags,
+ flags_start, flags_end, out);
+ if (increment_counter) {
+ counter += 4;
+ }
+ inputs += 4;
+ num_inputs -= 4;
+ out = &out[4 * BLAKE3_OUT_LEN];
+ }
+ while (num_inputs > 0) {
+ hash_one_neon(inputs[0], blocks, key, counter, flags, flags_start,
+ flags_end, out);
+ if (increment_counter) {
+ counter += 1;
+ }
+ inputs += 1;
+ num_inputs -= 1;
+ out = &out[BLAKE3_OUT_LEN];
+ }
+}
+
+#endif // BLAKE3_USE_NEON
diff --git a/llvm/lib/Support/BLAKE3/blake3_portable.c b/llvm/lib/Support/BLAKE3/blake3_portable.c
new file mode 100644
index 000000000000..062dd1b47fb6
--- /dev/null
+++ b/llvm/lib/Support/BLAKE3/blake3_portable.c
@@ -0,0 +1,160 @@
+#include "blake3_impl.h"
+#include <string.h>
+
+INLINE uint32_t rotr32(uint32_t w, uint32_t c) {
+ return (w >> c) | (w << (32 - c));
+}
+
+INLINE void g(uint32_t *state, size_t a, size_t b, size_t c, size_t d,
+ uint32_t x, uint32_t y) {
+ state[a] = state[a] + state[b] + x;
+ state[d] = rotr32(state[d] ^ state[a], 16);
+ state[c] = state[c] + state[d];
+ state[b] = rotr32(state[b] ^ state[c], 12);
+ state[a] = state[a] + state[b] + y;
+ state[d] = rotr32(state[d] ^ state[a], 8);
+ state[c] = state[c] + state[d];
+ state[b] = rotr32(state[b] ^ state[c], 7);
+}
+
+INLINE void round_fn(uint32_t state[16], const uint32_t *msg, size_t round) {
+ // Select the message schedule based on the round.
+ const uint8_t *schedule = MSG_SCHEDULE[round];
+
+ // Mix the columns.
+ g(state, 0, 4, 8, 12, msg[schedule[0]], msg[schedule[1]]);
+ g(state, 1, 5, 9, 13, msg[schedule[2]], msg[schedule[3]]);
+ g(state, 2, 6, 10, 14, msg[schedule[4]], msg[schedule[5]]);
+ g(state, 3, 7, 11, 15, msg[schedule[6]], msg[schedule[7]]);
+
+ // Mix the rows.
+ g(state, 0, 5, 10, 15, msg[schedule[8]], msg[schedule[9]]);
+ g(state, 1, 6, 11, 12, msg[schedule[10]], msg[schedule[11]]);
+ g(state, 2, 7, 8, 13, msg[schedule[12]], msg[schedule[13]]);
+ g(state, 3, 4, 9, 14, msg[schedule[14]], msg[schedule[15]]);
+}
+
+INLINE void compress_pre(uint32_t state[16], const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN],
+ uint8_t block_len, uint64_t counter, uint8_t flags) {
+ uint32_t block_words[16];
+ block_words[0] = load32(block + 4 * 0);
+ block_words[1] = load32(block + 4 * 1);
+ block_words[2] = load32(block + 4 * 2);
+ block_words[3] = load32(block + 4 * 3);
+ block_words[4] = load32(block + 4 * 4);
+ block_words[5] = load32(block + 4 * 5);
+ block_words[6] = load32(block + 4 * 6);
+ block_words[7] = load32(block + 4 * 7);
+ block_words[8] = load32(block + 4 * 8);
+ block_words[9] = load32(block + 4 * 9);
+ block_words[10] = load32(block + 4 * 10);
+ block_words[11] = load32(block + 4 * 11);
+ block_words[12] = load32(block + 4 * 12);
+ block_words[13] = load32(block + 4 * 13);
+ block_words[14] = load32(block + 4 * 14);
+ block_words[15] = load32(block + 4 * 15);
+
+ state[0] = cv[0];
+ state[1] = cv[1];
+ state[2] = cv[2];
+ state[3] = cv[3];
+ state[4] = cv[4];
+ state[5] = cv[5];
+ state[6] = cv[6];
+ state[7] = cv[7];
+ state[8] = IV[0];
+ state[9] = IV[1];
+ state[10] = IV[2];
+ state[11] = IV[3];
+ state[12] = counter_low(counter);
+ state[13] = counter_high(counter);
+ state[14] = (uint32_t)block_len;
+ state[15] = (uint32_t)flags;
+
+ round_fn(state, &block_words[0], 0);
+ round_fn(state, &block_words[0], 1);
+ round_fn(state, &block_words[0], 2);
+ round_fn(state, &block_words[0], 3);
+ round_fn(state, &block_words[0], 4);
+ round_fn(state, &block_words[0], 5);
+ round_fn(state, &block_words[0], 6);
+}
+
+void blake3_compress_in_place_portable(uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN],
+ uint8_t block_len, uint64_t counter,
+ uint8_t flags) {
+ uint32_t state[16];
+ compress_pre(state, cv, block, block_len, counter, flags);
+ cv[0] = state[0] ^ state[8];
+ cv[1] = state[1] ^ state[9];
+ cv[2] = state[2] ^ state[10];
+ cv[3] = state[3] ^ state[11];
+ cv[4] = state[4] ^ state[12];
+ cv[5] = state[5] ^ state[13];
+ cv[6] = state[6] ^ state[14];
+ cv[7] = state[7] ^ state[15];
+}
+
+void blake3_compress_xof_portable(const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN],
+ uint8_t block_len, uint64_t counter,
+ uint8_t flags, uint8_t out[64]) {
+ uint32_t state[16];
+ compress_pre(state, cv, block, block_len, counter, flags);
+
+ store32(&out[0 * 4], state[0] ^ state[8]);
+ store32(&out[1 * 4], state[1] ^ state[9]);
+ store32(&out[2 * 4], state[2] ^ state[10]);
+ store32(&out[3 * 4], state[3] ^ state[11]);
+ store32(&out[4 * 4], state[4] ^ state[12]);
+ store32(&out[5 * 4], state[5] ^ state[13]);
+ store32(&out[6 * 4], state[6] ^ state[14]);
+ store32(&out[7 * 4], state[7] ^ state[15]);
+ store32(&out[8 * 4], state[8] ^ cv[0]);
+ store32(&out[9 * 4], state[9] ^ cv[1]);
+ store32(&out[10 * 4], state[10] ^ cv[2]);
+ store32(&out[11 * 4], state[11] ^ cv[3]);
+ store32(&out[12 * 4], state[12] ^ cv[4]);
+ store32(&out[13 * 4], state[13] ^ cv[5]);
+ store32(&out[14 * 4], state[14] ^ cv[6]);
+ store32(&out[15 * 4], state[15] ^ cv[7]);
+}
+
+INLINE void hash_one_portable(const uint8_t *input, size_t blocks,
+ const uint32_t key[8], uint64_t counter,
+ uint8_t flags, uint8_t flags_start,
+ uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN]) {
+ uint32_t cv[8];
+ memcpy(cv, key, BLAKE3_KEY_LEN);
+ uint8_t block_flags = flags | flags_start;
+ while (blocks > 0) {
+ if (blocks == 1) {
+ block_flags |= flags_end;
+ }
+ blake3_compress_in_place_portable(cv, input, BLAKE3_BLOCK_LEN, counter,
+ block_flags);
+ input = &input[BLAKE3_BLOCK_LEN];
+ blocks -= 1;
+ block_flags = flags;
+ }
+ store_cv_words(out, cv);
+}
+
+void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
+ size_t blocks, const uint32_t key[8],
+ uint64_t counter, bool increment_counter,
+ uint8_t flags, uint8_t flags_start,
+ uint8_t flags_end, uint8_t *out) {
+ while (num_inputs > 0) {
+ hash_one_portable(inputs[0], blocks, key, counter, flags, flags_start,
+ flags_end, out);
+ if (increment_counter) {
+ counter += 1;
+ }
+ inputs += 1;
+ num_inputs -= 1;
+ out = &out[BLAKE3_OUT_LEN];
+ }
+}
diff --git a/llvm/lib/Support/BLAKE3/blake3_sse2.c b/llvm/lib/Support/BLAKE3/blake3_sse2.c
new file mode 100644
index 000000000000..f4449ac0b3cd
--- /dev/null
+++ b/llvm/lib/Support/BLAKE3/blake3_sse2.c
@@ -0,0 +1,566 @@
+#include "blake3_impl.h"
+
+#include <immintrin.h>
+
+#define DEGREE 4
+
+#define _mm_shuffle_ps2(a, b, c) \
+ (_mm_castps_si128( \
+ _mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), (c))))
+
+INLINE __m128i loadu(const uint8_t src[16]) {
+ return _mm_loadu_si128((const __m128i *)src);
+}
+
+INLINE void storeu(__m128i src, uint8_t dest[16]) {
+ _mm_storeu_si128((__m128i *)dest, src);
+}
+
+INLINE __m128i addv(__m128i a, __m128i b) { return _mm_add_epi32(a, b); }
+
+// Note that clang-format doesn't like the name "xor" for some reason.
+INLINE __m128i xorv(__m128i a, __m128i b) { return _mm_xor_si128(a, b); }
+
+INLINE __m128i set1(uint32_t x) { return _mm_set1_epi32((int32_t)x); }
+
+INLINE __m128i set4(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
+ return _mm_setr_epi32((int32_t)a, (int32_t)b, (int32_t)c, (int32_t)d);
+}
+
+INLINE __m128i rot16(__m128i x) {
+ return _mm_shufflehi_epi16(_mm_shufflelo_epi16(x, 0xB1), 0xB1);
+}
+
+INLINE __m128i rot12(__m128i x) {
+ return xorv(_mm_srli_epi32(x, 12), _mm_slli_epi32(x, 32 - 12));
+}
+
+INLINE __m128i rot8(__m128i x) {
+ return xorv(_mm_srli_epi32(x, 8), _mm_slli_epi32(x, 32 - 8));
+}
+
+INLINE __m128i rot7(__m128i x) {
+ return xorv(_mm_srli_epi32(x, 7), _mm_slli_epi32(x, 32 - 7));
+}
+
+INLINE void g1(__m128i *row0, __m128i *row1, __m128i *row2, __m128i *row3,
+ __m128i m) {
+ *row0 = addv(addv(*row0, m), *row1);
+ *row3 = xorv(*row3, *row0);
+ *row3 = rot16(*row3);
+ *row2 = addv(*row2, *row3);
+ *row1 = xorv(*row1, *row2);
+ *row1 = rot12(*row1);
+}
+
+INLINE void g2(__m128i *row0, __m128i *row1, __m128i *row2, __m128i *row3,
+ __m128i m) {
+ *row0 = addv(addv(*row0, m), *row1);
+ *row3 = xorv(*row3, *row0);
+ *row3 = rot8(*row3);
+ *row2 = addv(*row2, *row3);
+ *row1 = xorv(*row1, *row2);
+ *row1 = rot7(*row1);
+}
+
+// Note the optimization here of leaving row1 as the unrotated row, rather than
+// row0. All the message loads below are adjusted to compensate for this. See
+// discussion at https://github.com/sneves/blake2-avx2/pull/4
+INLINE void diagonalize(__m128i *row0, __m128i *row2, __m128i *row3) {
+ *row0 = _mm_shuffle_epi32(*row0, _MM_SHUFFLE(2, 1, 0, 3));
+ *row3 = _mm_shuffle_epi32(*row3, _MM_SHUFFLE(1, 0, 3, 2));
+ *row2 = _mm_shuffle_epi32(*row2, _MM_SHUFFLE(0, 3, 2, 1));
+}
+
+INLINE void undiagonalize(__m128i *row0, __m128i *row2, __m128i *row3) {
+ *row0 = _mm_shuffle_epi32(*row0, _MM_SHUFFLE(0, 3, 2, 1));
+ *row3 = _mm_shuffle_epi32(*row3, _MM_SHUFFLE(1, 0, 3, 2));
+ *row2 = _mm_shuffle_epi32(*row2, _MM_SHUFFLE(2, 1, 0, 3));
+}
+
+INLINE __m128i blend_epi16(__m128i a, __m128i b, const int16_t imm8) {
+ const __m128i bits = _mm_set_epi16(0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01);
+ __m128i mask = _mm_set1_epi16(imm8);
+ mask = _mm_and_si128(mask, bits);
+ mask = _mm_cmpeq_epi16(mask, bits);
+ return _mm_or_si128(_mm_and_si128(mask, b), _mm_andnot_si128(mask, a));
+}
+
+INLINE void compress_pre(__m128i rows[4], const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN],
+ uint8_t block_len, uint64_t counter, uint8_t flags) {
+ rows[0] = loadu((uint8_t *)&cv[0]);
+ rows[1] = loadu((uint8_t *)&cv[4]);
+ rows[2] = set4(IV[0], IV[1], IV[2], IV[3]);
+ rows[3] = set4(counter_low(counter), counter_high(counter),
+ (uint32_t)block_len, (uint32_t)flags);
+
+ __m128i m0 = loadu(&block[sizeof(__m128i) * 0]);
+ __m128i m1 = loadu(&block[sizeof(__m128i) * 1]);
+ __m128i m2 = loadu(&block[sizeof(__m128i) * 2]);
+ __m128i m3 = loadu(&block[sizeof(__m128i) * 3]);
+
+ __m128i t0, t1, t2, t3, tt;
+
+ // Round 1. The first round permutes the message words from the original
+ // input order, into the groups that get mixed in parallel.
+ t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(2, 0, 2, 0)); // 6 4 2 0
+ g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+ t1 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 3, 1)); // 7 5 3 1
+ g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+ diagonalize(&rows[0], &rows[2], &rows[3]);
+ t2 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(2, 0, 2, 0)); // 14 12 10 8
+ t2 = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2, 1, 0, 3)); // 12 10 8 14
+ g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+ t3 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 1, 3, 1)); // 15 13 11 9
+ t3 = _mm_shuffle_epi32(t3, _MM_SHUFFLE(2, 1, 0, 3)); // 13 11 9 15
+ g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+ undiagonalize(&rows[0], &rows[2], &rows[3]);
+ m0 = t0;
+ m1 = t1;
+ m2 = t2;
+ m3 = t3;
+
+ // Round 2. This round and all following rounds apply a fixed permutation
+ // to the message words from the round before.
+ t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
+ t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
+ g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+ t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
+ tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
+ t1 = blend_epi16(tt, t1, 0xCC);
+ g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+ diagonalize(&rows[0], &rows[2], &rows[3]);
+ t2 = _mm_unpacklo_epi64(m3, m1);
+ tt = blend_epi16(t2, m2, 0xC0);
+ t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
+ g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+ t3 = _mm_unpackhi_epi32(m1, m3);
+ tt = _mm_unpacklo_epi32(m2, t3);
+ t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
+ g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+ undiagonalize(&rows[0], &rows[2], &rows[3]);
+ m0 = t0;
+ m1 = t1;
+ m2 = t2;
+ m3 = t3;
+
+ // Round 3
+ t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
+ t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
+ g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+ t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
+ tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
+ t1 = blend_epi16(tt, t1, 0xCC);
+ g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+ diagonalize(&rows[0], &rows[2], &rows[3]);
+ t2 = _mm_unpacklo_epi64(m3, m1);
+ tt = blend_epi16(t2, m2, 0xC0);
+ t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
+ g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+ t3 = _mm_unpackhi_epi32(m1, m3);
+ tt = _mm_unpacklo_epi32(m2, t3);
+ t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
+ g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+ undiagonalize(&rows[0], &rows[2], &rows[3]);
+ m0 = t0;
+ m1 = t1;
+ m2 = t2;
+ m3 = t3;
+
+ // Round 4
+ t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
+ t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
+ g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+ t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
+ tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
+ t1 = blend_epi16(tt, t1, 0xCC);
+ g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+ diagonalize(&rows[0], &rows[2], &rows[3]);
+ t2 = _mm_unpacklo_epi64(m3, m1);
+ tt = blend_epi16(t2, m2, 0xC0);
+ t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
+ g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+ t3 = _mm_unpackhi_epi32(m1, m3);
+ tt = _mm_unpacklo_epi32(m2, t3);
+ t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
+ g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+ undiagonalize(&rows[0], &rows[2], &rows[3]);
+ m0 = t0;
+ m1 = t1;
+ m2 = t2;
+ m3 = t3;
+
+ // Round 5
+ t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
+ t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
+ g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+ t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
+ tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
+ t1 = blend_epi16(tt, t1, 0xCC);
+ g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+ diagonalize(&rows[0], &rows[2], &rows[3]);
+ t2 = _mm_unpacklo_epi64(m3, m1);
+ tt = blend_epi16(t2, m2, 0xC0);
+ t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
+ g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+ t3 = _mm_unpackhi_epi32(m1, m3);
+ tt = _mm_unpacklo_epi32(m2, t3);
+ t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
+ g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+ undiagonalize(&rows[0], &rows[2], &rows[3]);
+ m0 = t0;
+ m1 = t1;
+ m2 = t2;
+ m3 = t3;
+
+ // Round 6
+ t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
+ t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
+ g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+ t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
+ tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
+ t1 = blend_epi16(tt, t1, 0xCC);
+ g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+ diagonalize(&rows[0], &rows[2], &rows[3]);
+ t2 = _mm_unpacklo_epi64(m3, m1);
+ tt = blend_epi16(t2, m2, 0xC0);
+ t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
+ g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+ t3 = _mm_unpackhi_epi32(m1, m3);
+ tt = _mm_unpacklo_epi32(m2, t3);
+ t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
+ g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+ undiagonalize(&rows[0], &rows[2], &rows[3]);
+ m0 = t0;
+ m1 = t1;
+ m2 = t2;
+ m3 = t3;
+
+ // Round 7
+ t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
+ t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
+ g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+ t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
+ tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
+ t1 = blend_epi16(tt, t1, 0xCC);
+ g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+ diagonalize(&rows[0], &rows[2], &rows[3]);
+ t2 = _mm_unpacklo_epi64(m3, m1);
+ tt = blend_epi16(t2, m2, 0xC0);
+ t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
+ g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+ t3 = _mm_unpackhi_epi32(m1, m3);
+ tt = _mm_unpacklo_epi32(m2, t3);
+ t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
+ g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+ undiagonalize(&rows[0], &rows[2], &rows[3]);
+}
+
+void blake3_compress_in_place_sse2(uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN],
+ uint8_t block_len, uint64_t counter,
+ uint8_t flags) {
+ __m128i rows[4];
+ compress_pre(rows, cv, block, block_len, counter, flags);
+ storeu(xorv(rows[0], rows[2]), (uint8_t *)&cv[0]);
+ storeu(xorv(rows[1], rows[3]), (uint8_t *)&cv[4]);
+}
+
+void blake3_compress_xof_sse2(const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN],
+ uint8_t block_len, uint64_t counter,
+ uint8_t flags, uint8_t out[64]) {
+ __m128i rows[4];
+ compress_pre(rows, cv, block, block_len, counter, flags);
+ storeu(xorv(rows[0], rows[2]), &out[0]);
+ storeu(xorv(rows[1], rows[3]), &out[16]);
+ storeu(xorv(rows[2], loadu((uint8_t *)&cv[0])), &out[32]);
+ storeu(xorv(rows[3], loadu((uint8_t *)&cv[4])), &out[48]);
+}
+
+INLINE void round_fn(__m128i v[16], __m128i m[16], size_t r) {
+ v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][0]]);
+ v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][2]]);
+ v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][4]]);
+ v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][6]]);
+ v[0] = addv(v[0], v[4]);
+ v[1] = addv(v[1], v[5]);
+ v[2] = addv(v[2], v[6]);
+ v[3] = addv(v[3], v[7]);
+ v[12] = xorv(v[12], v[0]);
+ v[13] = xorv(v[13], v[1]);
+ v[14] = xorv(v[14], v[2]);
+ v[15] = xorv(v[15], v[3]);
+ v[12] = rot16(v[12]);
+ v[13] = rot16(v[13]);
+ v[14] = rot16(v[14]);
+ v[15] = rot16(v[15]);
+ v[8] = addv(v[8], v[12]);
+ v[9] = addv(v[9], v[13]);
+ v[10] = addv(v[10], v[14]);
+ v[11] = addv(v[11], v[15]);
+ v[4] = xorv(v[4], v[8]);
+ v[5] = xorv(v[5], v[9]);
+ v[6] = xorv(v[6], v[10]);
+ v[7] = xorv(v[7], v[11]);
+ v[4] = rot12(v[4]);
+ v[5] = rot12(v[5]);
+ v[6] = rot12(v[6]);
+ v[7] = rot12(v[7]);
+ v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][1]]);
+ v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][3]]);
+ v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][5]]);
+ v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][7]]);
+ v[0] = addv(v[0], v[4]);
+ v[1] = addv(v[1], v[5]);
+ v[2] = addv(v[2], v[6]);
+ v[3] = addv(v[3], v[7]);
+ v[12] = xorv(v[12], v[0]);
+ v[13] = xorv(v[13], v[1]);
+ v[14] = xorv(v[14], v[2]);
+ v[15] = xorv(v[15], v[3]);
+ v[12] = rot8(v[12]);
+ v[13] = rot8(v[13]);
+ v[14] = rot8(v[14]);
+ v[15] = rot8(v[15]);
+ v[8] = addv(v[8], v[12]);
+ v[9] = addv(v[9], v[13]);
+ v[10] = addv(v[10], v[14]);
+ v[11] = addv(v[11], v[15]);
+ v[4] = xorv(v[4], v[8]);
+ v[5] = xorv(v[5], v[9]);
+ v[6] = xorv(v[6], v[10]);
+ v[7] = xorv(v[7], v[11]);
+ v[4] = rot7(v[4]);
+ v[5] = rot7(v[5]);
+ v[6] = rot7(v[6]);
+ v[7] = rot7(v[7]);
+
+ v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][8]]);
+ v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][10]]);
+ v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][12]]);
+ v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][14]]);
+ v[0] = addv(v[0], v[5]);
+ v[1] = addv(v[1], v[6]);
+ v[2] = addv(v[2], v[7]);
+ v[3] = addv(v[3], v[4]);
+ v[15] = xorv(v[15], v[0]);
+ v[12] = xorv(v[12], v[1]);
+ v[13] = xorv(v[13], v[2]);
+ v[14] = xorv(v[14], v[3]);
+ v[15] = rot16(v[15]);
+ v[12] = rot16(v[12]);
+ v[13] = rot16(v[13]);
+ v[14] = rot16(v[14]);
+ v[10] = addv(v[10], v[15]);
+ v[11] = addv(v[11], v[12]);
+ v[8] = addv(v[8], v[13]);
+ v[9] = addv(v[9], v[14]);
+ v[5] = xorv(v[5], v[10]);
+ v[6] = xorv(v[6], v[11]);
+ v[7] = xorv(v[7], v[8]);
+ v[4] = xorv(v[4], v[9]);
+ v[5] = rot12(v[5]);
+ v[6] = rot12(v[6]);
+ v[7] = rot12(v[7]);
+ v[4] = rot12(v[4]);
+ v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][9]]);
+ v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][11]]);
+ v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][13]]);
+ v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][15]]);
+ v[0] = addv(v[0], v[5]);
+ v[1] = addv(v[1], v[6]);
+ v[2] = addv(v[2], v[7]);
+ v[3] = addv(v[3], v[4]);
+ v[15] = xorv(v[15], v[0]);
+ v[12] = xorv(v[12], v[1]);
+ v[13] = xorv(v[13], v[2]);
+ v[14] = xorv(v[14], v[3]);
+ v[15] = rot8(v[15]);
+ v[12] = rot8(v[12]);
+ v[13] = rot8(v[13]);
+ v[14] = rot8(v[14]);
+ v[10] = addv(v[10], v[15]);
+ v[11] = addv(v[11], v[12]);
+ v[8] = addv(v[8], v[13]);
+ v[9] = addv(v[9], v[14]);
+ v[5] = xorv(v[5], v[10]);
+ v[6] = xorv(v[6], v[11]);
+ v[7] = xorv(v[7], v[8]);
+ v[4] = xorv(v[4], v[9]);
+ v[5] = rot7(v[5]);
+ v[6] = rot7(v[6]);
+ v[7] = rot7(v[7]);
+ v[4] = rot7(v[4]);
+}
+
+INLINE void transpose_vecs(__m128i vecs[DEGREE]) {
+ // Interleave 32-bit lates. The low unpack is lanes 00/11 and the high is
+ // 22/33. Note that this doesn't split the vector into two lanes, as the
+ // AVX2 counterparts do.
+ __m128i ab_01 = _mm_unpacklo_epi32(vecs[0], vecs[1]);
+ __m128i ab_23 = _mm_unpackhi_epi32(vecs[0], vecs[1]);
+ __m128i cd_01 = _mm_unpacklo_epi32(vecs[2], vecs[3]);
+ __m128i cd_23 = _mm_unpackhi_epi32(vecs[2], vecs[3]);
+
+ // Interleave 64-bit lanes.
+ __m128i abcd_0 = _mm_unpacklo_epi64(ab_01, cd_01);
+ __m128i abcd_1 = _mm_unpackhi_epi64(ab_01, cd_01);
+ __m128i abcd_2 = _mm_unpacklo_epi64(ab_23, cd_23);
+ __m128i abcd_3 = _mm_unpackhi_epi64(ab_23, cd_23);
+
+ vecs[0] = abcd_0;
+ vecs[1] = abcd_1;
+ vecs[2] = abcd_2;
+ vecs[3] = abcd_3;
+}
+
+INLINE void transpose_msg_vecs(const uint8_t *const *inputs,
+ size_t block_offset, __m128i out[16]) {
+ out[0] = loadu(&inputs[0][block_offset + 0 * sizeof(__m128i)]);
+ out[1] = loadu(&inputs[1][block_offset + 0 * sizeof(__m128i)]);
+ out[2] = loadu(&inputs[2][block_offset + 0 * sizeof(__m128i)]);
+ out[3] = loadu(&inputs[3][block_offset + 0 * sizeof(__m128i)]);
+ out[4] = loadu(&inputs[0][block_offset + 1 * sizeof(__m128i)]);
+ out[5] = loadu(&inputs[1][block_offset + 1 * sizeof(__m128i)]);
+ out[6] = loadu(&inputs[2][block_offset + 1 * sizeof(__m128i)]);
+ out[7] = loadu(&inputs[3][block_offset + 1 * sizeof(__m128i)]);
+ out[8] = loadu(&inputs[0][block_offset + 2 * sizeof(__m128i)]);
+ out[9] = loadu(&inputs[1][block_offset + 2 * sizeof(__m128i)]);
+ out[10] = loadu(&inputs[2][block_offset + 2 * sizeof(__m128i)]);
+ out[11] = loadu(&inputs[3][block_offset + 2 * sizeof(__m128i)]);
+ out[12] = loadu(&inputs[0][block_offset + 3 * sizeof(__m128i)]);
+ out[13] = loadu(&inputs[1][block_offset + 3 * sizeof(__m128i)]);
+ out[14] = loadu(&inputs[2][block_offset + 3 * sizeof(__m128i)]);
+ out[15] = loadu(&inputs[3][block_offset + 3 * sizeof(__m128i)]);
+ for (size_t i = 0; i < 4; ++i) {
+ _mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0);
+ }
+ transpose_vecs(&out[0]);
+ transpose_vecs(&out[4]);
+ transpose_vecs(&out[8]);
+ transpose_vecs(&out[12]);
+}
+
+INLINE void load_counters(uint64_t counter, bool increment_counter,
+ __m128i *out_lo, __m128i *out_hi) {
+ const __m128i mask = _mm_set1_epi32(-(int32_t)increment_counter);
+ const __m128i add0 = _mm_set_epi32(3, 2, 1, 0);
+ const __m128i add1 = _mm_and_si128(mask, add0);
+ __m128i l = _mm_add_epi32(_mm_set1_epi32((int32_t)counter), add1);
+ __m128i carry = _mm_cmpgt_epi32(_mm_xor_si128(add1, _mm_set1_epi32(0x80000000)),
+ _mm_xor_si128( l, _mm_set1_epi32(0x80000000)));
+ __m128i h = _mm_sub_epi32(_mm_set1_epi32((int32_t)(counter >> 32)), carry);
+ *out_lo = l;
+ *out_hi = h;
+}
+
+static
+void blake3_hash4_sse2(const uint8_t *const *inputs, size_t blocks,
+ const uint32_t key[8], uint64_t counter,
+ bool increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
+ __m128i h_vecs[8] = {
+ set1(key[0]), set1(key[1]), set1(key[2]), set1(key[3]),
+ set1(key[4]), set1(key[5]), set1(key[6]), set1(key[7]),
+ };
+ __m128i counter_low_vec, counter_high_vec;
+ load_counters(counter, increment_counter, &counter_low_vec,
+ &counter_high_vec);
+ uint8_t block_flags = flags | flags_start;
+
+ for (size_t block = 0; block < blocks; block++) {
+ if (block + 1 == blocks) {
+ block_flags |= flags_end;
+ }
+ __m128i block_len_vec = set1(BLAKE3_BLOCK_LEN);
+ __m128i block_flags_vec = set1(block_flags);
+ __m128i msg_vecs[16];
+ transpose_msg_vecs(inputs, block * BLAKE3_BLOCK_LEN, msg_vecs);
+
+ __m128i v[16] = {
+ h_vecs[0], h_vecs[1], h_vecs[2], h_vecs[3],
+ h_vecs[4], h_vecs[5], h_vecs[6], h_vecs[7],
+ set1(IV[0]), set1(IV[1]), set1(IV[2]), set1(IV[3]),
+ counter_low_vec, counter_high_vec, block_len_vec, block_flags_vec,
+ };
+ round_fn(v, msg_vecs, 0);
+ round_fn(v, msg_vecs, 1);
+ round_fn(v, msg_vecs, 2);
+ round_fn(v, msg_vecs, 3);
+ round_fn(v, msg_vecs, 4);
+ round_fn(v, msg_vecs, 5);
+ round_fn(v, msg_vecs, 6);
+ h_vecs[0] = xorv(v[0], v[8]);
+ h_vecs[1] = xorv(v[1], v[9]);
+ h_vecs[2] = xorv(v[2], v[10]);
+ h_vecs[3] = xorv(v[3], v[11]);
+ h_vecs[4] = xorv(v[4], v[12]);
+ h_vecs[5] = xorv(v[5], v[13]);
+ h_vecs[6] = xorv(v[6], v[14]);
+ h_vecs[7] = xorv(v[7], v[15]);
+
+ block_flags = flags;
+ }
+
+ transpose_vecs(&h_vecs[0]);
+ transpose_vecs(&h_vecs[4]);
+ // The first four vecs now contain the first half of each output, and the
+ // second four vecs contain the second half of each output.
+ storeu(h_vecs[0], &out[0 * sizeof(__m128i)]);
+ storeu(h_vecs[4], &out[1 * sizeof(__m128i)]);
+ storeu(h_vecs[1], &out[2 * sizeof(__m128i)]);
+ storeu(h_vecs[5], &out[3 * sizeof(__m128i)]);
+ storeu(h_vecs[2], &out[4 * sizeof(__m128i)]);
+ storeu(h_vecs[6], &out[5 * sizeof(__m128i)]);
+ storeu(h_vecs[3], &out[6 * sizeof(__m128i)]);
+ storeu(h_vecs[7], &out[7 * sizeof(__m128i)]);
+}
+
+INLINE void hash_one_sse2(const uint8_t *input, size_t blocks,
+ const uint32_t key[8], uint64_t counter,
+ uint8_t flags, uint8_t flags_start,
+ uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN]) {
+ uint32_t cv[8];
+ memcpy(cv, key, BLAKE3_KEY_LEN);
+ uint8_t block_flags = flags | flags_start;
+ while (blocks > 0) {
+ if (blocks == 1) {
+ block_flags |= flags_end;
+ }
+ blake3_compress_in_place_sse2(cv, input, BLAKE3_BLOCK_LEN, counter,
+ block_flags);
+ input = &input[BLAKE3_BLOCK_LEN];
+ blocks -= 1;
+ block_flags = flags;
+ }
+ memcpy(out, cv, BLAKE3_OUT_LEN);
+}
+
+void blake3_hash_many_sse2(const uint8_t *const *inputs, size_t num_inputs,
+ size_t blocks, const uint32_t key[8],
+ uint64_t counter, bool increment_counter,
+ uint8_t flags, uint8_t flags_start,
+ uint8_t flags_end, uint8_t *out) {
+ while (num_inputs >= DEGREE) {
+ blake3_hash4_sse2(inputs, blocks, key, counter, increment_counter, flags,
+ flags_start, flags_end, out);
+ if (increment_counter) {
+ counter += DEGREE;
+ }
+ inputs += DEGREE;
+ num_inputs -= DEGREE;
+ out = &out[DEGREE * BLAKE3_OUT_LEN];
+ }
+ while (num_inputs > 0) {
+ hash_one_sse2(inputs[0], blocks, key, counter, flags, flags_start,
+ flags_end, out);
+ if (increment_counter) {
+ counter += 1;
+ }
+ inputs += 1;
+ num_inputs -= 1;
+ out = &out[BLAKE3_OUT_LEN];
+ }
+}
diff --git a/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_unix.S b/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_unix.S
new file mode 100644
index 000000000000..0106b13ba851
--- /dev/null
+++ b/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_unix.S
@@ -0,0 +1,2307 @@
+#if defined(__x86_64__)
+
+#if defined(__ELF__) && defined(__linux__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
+#if defined(__ELF__) && defined(__CET__) && defined(__has_include)
+#if __has_include(<cet.h>)
+#include <cet.h>
+#endif
+#endif
+
+#if !defined(_CET_ENDBR)
+#define _CET_ENDBR
+#endif
+
+#ifdef __APPLE__
+#define HIDDEN .private_extern
+#else
+#define HIDDEN .hidden
+#endif
+
+.intel_syntax noprefix
+HIDDEN blake3_hash_many_sse2
+HIDDEN _blake3_hash_many_sse2
+HIDDEN blake3_compress_in_place_sse2
+HIDDEN _blake3_compress_in_place_sse2
+HIDDEN blake3_compress_xof_sse2
+HIDDEN _blake3_compress_xof_sse2
+.global blake3_hash_many_sse2
+.global _blake3_hash_many_sse2
+.global blake3_compress_in_place_sse2
+.global _blake3_compress_in_place_sse2
+.global blake3_compress_xof_sse2
+.global _blake3_compress_xof_sse2
+#ifdef __APPLE__
+.text
+#else
+.section .text
+#endif
+ .p2align 6
+_blake3_hash_many_sse2:
+blake3_hash_many_sse2:
+ _CET_ENDBR
+ push r15
+ push r14
+ push r13
+ push r12
+ push rbx
+ push rbp
+ mov rbp, rsp
+ sub rsp, 360
+ and rsp, 0xFFFFFFFFFFFFFFC0
+ neg r9d
+ movd xmm0, r9d
+ pshufd xmm0, xmm0, 0x00
+ movdqa xmmword ptr [rsp+0x130], xmm0
+ movdqa xmm1, xmm0
+ pand xmm1, xmmword ptr [ADD0+rip]
+ pand xmm0, xmmword ptr [ADD1+rip]
+ movdqa xmmword ptr [rsp+0x150], xmm0
+ movd xmm0, r8d
+ pshufd xmm0, xmm0, 0x00
+ paddd xmm0, xmm1
+ movdqa xmmword ptr [rsp+0x110], xmm0
+ pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip]
+ pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip]
+ pcmpgtd xmm1, xmm0
+ shr r8, 32
+ movd xmm2, r8d
+ pshufd xmm2, xmm2, 0x00
+ psubd xmm2, xmm1
+ movdqa xmmword ptr [rsp+0x120], xmm2
+ mov rbx, qword ptr [rbp+0x50]
+ mov r15, rdx
+ shl r15, 6
+ movzx r13d, byte ptr [rbp+0x38]
+ movzx r12d, byte ptr [rbp+0x48]
+ cmp rsi, 4
+ jc 3f
+2:
+ movdqu xmm3, xmmword ptr [rcx]
+ pshufd xmm0, xmm3, 0x00
+ pshufd xmm1, xmm3, 0x55
+ pshufd xmm2, xmm3, 0xAA
+ pshufd xmm3, xmm3, 0xFF
+ movdqu xmm7, xmmword ptr [rcx+0x10]
+ pshufd xmm4, xmm7, 0x00
+ pshufd xmm5, xmm7, 0x55
+ pshufd xmm6, xmm7, 0xAA
+ pshufd xmm7, xmm7, 0xFF
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ mov r10, qword ptr [rdi+0x10]
+ mov r11, qword ptr [rdi+0x18]
+ movzx eax, byte ptr [rbp+0x40]
+ or eax, r13d
+ xor edx, edx
+9:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ movdqu xmm8, xmmword ptr [r8+rdx-0x40]
+ movdqu xmm9, xmmword ptr [r9+rdx-0x40]
+ movdqu xmm10, xmmword ptr [r10+rdx-0x40]
+ movdqu xmm11, xmmword ptr [r11+rdx-0x40]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp], xmm8
+ movdqa xmmword ptr [rsp+0x10], xmm9
+ movdqa xmmword ptr [rsp+0x20], xmm12
+ movdqa xmmword ptr [rsp+0x30], xmm13
+ movdqu xmm8, xmmword ptr [r8+rdx-0x30]
+ movdqu xmm9, xmmword ptr [r9+rdx-0x30]
+ movdqu xmm10, xmmword ptr [r10+rdx-0x30]
+ movdqu xmm11, xmmword ptr [r11+rdx-0x30]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp+0x40], xmm8
+ movdqa xmmword ptr [rsp+0x50], xmm9
+ movdqa xmmword ptr [rsp+0x60], xmm12
+ movdqa xmmword ptr [rsp+0x70], xmm13
+ movdqu xmm8, xmmword ptr [r8+rdx-0x20]
+ movdqu xmm9, xmmword ptr [r9+rdx-0x20]
+ movdqu xmm10, xmmword ptr [r10+rdx-0x20]
+ movdqu xmm11, xmmword ptr [r11+rdx-0x20]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp+0x80], xmm8
+ movdqa xmmword ptr [rsp+0x90], xmm9
+ movdqa xmmword ptr [rsp+0xA0], xmm12
+ movdqa xmmword ptr [rsp+0xB0], xmm13
+ movdqu xmm8, xmmword ptr [r8+rdx-0x10]
+ movdqu xmm9, xmmword ptr [r9+rdx-0x10]
+ movdqu xmm10, xmmword ptr [r10+rdx-0x10]
+ movdqu xmm11, xmmword ptr [r11+rdx-0x10]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp+0xC0], xmm8
+ movdqa xmmword ptr [rsp+0xD0], xmm9
+ movdqa xmmword ptr [rsp+0xE0], xmm12
+ movdqa xmmword ptr [rsp+0xF0], xmm13
+ movdqa xmm9, xmmword ptr [BLAKE3_IV_1+rip]
+ movdqa xmm10, xmmword ptr [BLAKE3_IV_2+rip]
+ movdqa xmm11, xmmword ptr [BLAKE3_IV_3+rip]
+ movdqa xmm12, xmmword ptr [rsp+0x110]
+ movdqa xmm13, xmmword ptr [rsp+0x120]
+ movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN+rip]
+ movd xmm15, eax
+ pshufd xmm15, xmm15, 0x00
+ prefetcht0 [r8+rdx+0x80]
+ prefetcht0 [r9+rdx+0x80]
+ prefetcht0 [r10+rdx+0x80]
+ prefetcht0 [r11+rdx+0x80]
+ paddd xmm0, xmmword ptr [rsp]
+ paddd xmm1, xmmword ptr [rsp+0x20]
+ paddd xmm2, xmmword ptr [rsp+0x40]
+ paddd xmm3, xmmword ptr [rsp+0x60]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ movdqa xmm8, xmmword ptr [BLAKE3_IV_0+rip]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x10]
+ paddd xmm1, xmmword ptr [rsp+0x30]
+ paddd xmm2, xmmword ptr [rsp+0x50]
+ paddd xmm3, xmmword ptr [rsp+0x70]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x80]
+ paddd xmm1, xmmword ptr [rsp+0xA0]
+ paddd xmm2, xmmword ptr [rsp+0xC0]
+ paddd xmm3, xmmword ptr [rsp+0xE0]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x90]
+ paddd xmm1, xmmword ptr [rsp+0xB0]
+ paddd xmm2, xmmword ptr [rsp+0xD0]
+ paddd xmm3, xmmword ptr [rsp+0xF0]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x20]
+ paddd xmm1, xmmword ptr [rsp+0x30]
+ paddd xmm2, xmmword ptr [rsp+0x70]
+ paddd xmm3, xmmword ptr [rsp+0x40]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x60]
+ paddd xmm1, xmmword ptr [rsp+0xA0]
+ paddd xmm2, xmmword ptr [rsp]
+ paddd xmm3, xmmword ptr [rsp+0xD0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x10]
+ paddd xmm1, xmmword ptr [rsp+0xC0]
+ paddd xmm2, xmmword ptr [rsp+0x90]
+ paddd xmm3, xmmword ptr [rsp+0xF0]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xB0]
+ paddd xmm1, xmmword ptr [rsp+0x50]
+ paddd xmm2, xmmword ptr [rsp+0xE0]
+ paddd xmm3, xmmword ptr [rsp+0x80]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x30]
+ paddd xmm1, xmmword ptr [rsp+0xA0]
+ paddd xmm2, xmmword ptr [rsp+0xD0]
+ paddd xmm3, xmmword ptr [rsp+0x70]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x40]
+ paddd xmm1, xmmword ptr [rsp+0xC0]
+ paddd xmm2, xmmword ptr [rsp+0x20]
+ paddd xmm3, xmmword ptr [rsp+0xE0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x60]
+ paddd xmm1, xmmword ptr [rsp+0x90]
+ paddd xmm2, xmmword ptr [rsp+0xB0]
+ paddd xmm3, xmmword ptr [rsp+0x80]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x50]
+ paddd xmm1, xmmword ptr [rsp]
+ paddd xmm2, xmmword ptr [rsp+0xF0]
+ paddd xmm3, xmmword ptr [rsp+0x10]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xA0]
+ paddd xmm1, xmmword ptr [rsp+0xC0]
+ paddd xmm2, xmmword ptr [rsp+0xE0]
+ paddd xmm3, xmmword ptr [rsp+0xD0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x70]
+ paddd xmm1, xmmword ptr [rsp+0x90]
+ paddd xmm2, xmmword ptr [rsp+0x30]
+ paddd xmm3, xmmword ptr [rsp+0xF0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x40]
+ paddd xmm1, xmmword ptr [rsp+0xB0]
+ paddd xmm2, xmmword ptr [rsp+0x50]
+ paddd xmm3, xmmword ptr [rsp+0x10]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp]
+ paddd xmm1, xmmword ptr [rsp+0x20]
+ paddd xmm2, xmmword ptr [rsp+0x80]
+ paddd xmm3, xmmword ptr [rsp+0x60]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xC0]
+ paddd xmm1, xmmword ptr [rsp+0x90]
+ paddd xmm2, xmmword ptr [rsp+0xF0]
+ paddd xmm3, xmmword ptr [rsp+0xE0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xD0]
+ paddd xmm1, xmmword ptr [rsp+0xB0]
+ paddd xmm2, xmmword ptr [rsp+0xA0]
+ paddd xmm3, xmmword ptr [rsp+0x80]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x70]
+ paddd xmm1, xmmword ptr [rsp+0x50]
+ paddd xmm2, xmmword ptr [rsp]
+ paddd xmm3, xmmword ptr [rsp+0x60]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x20]
+ paddd xmm1, xmmword ptr [rsp+0x30]
+ paddd xmm2, xmmword ptr [rsp+0x10]
+ paddd xmm3, xmmword ptr [rsp+0x40]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x90]
+ paddd xmm1, xmmword ptr [rsp+0xB0]
+ paddd xmm2, xmmword ptr [rsp+0x80]
+ paddd xmm3, xmmword ptr [rsp+0xF0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xE0]
+ paddd xmm1, xmmword ptr [rsp+0x50]
+ paddd xmm2, xmmword ptr [rsp+0xC0]
+ paddd xmm3, xmmword ptr [rsp+0x10]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xD0]
+ paddd xmm1, xmmword ptr [rsp]
+ paddd xmm2, xmmword ptr [rsp+0x20]
+ paddd xmm3, xmmword ptr [rsp+0x40]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x30]
+ paddd xmm1, xmmword ptr [rsp+0xA0]
+ paddd xmm2, xmmword ptr [rsp+0x60]
+ paddd xmm3, xmmword ptr [rsp+0x70]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xB0]
+ paddd xmm1, xmmword ptr [rsp+0x50]
+ paddd xmm2, xmmword ptr [rsp+0x10]
+ paddd xmm3, xmmword ptr [rsp+0x80]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xF0]
+ paddd xmm1, xmmword ptr [rsp]
+ paddd xmm2, xmmword ptr [rsp+0x90]
+ paddd xmm3, xmmword ptr [rsp+0x60]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xE0]
+ paddd xmm1, xmmword ptr [rsp+0x20]
+ paddd xmm2, xmmword ptr [rsp+0x30]
+ paddd xmm3, xmmword ptr [rsp+0x70]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xA0]
+ paddd xmm1, xmmword ptr [rsp+0xC0]
+ paddd xmm2, xmmword ptr [rsp+0x40]
+ paddd xmm3, xmmword ptr [rsp+0xD0]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ pxor xmm0, xmm8
+ pxor xmm1, xmm9
+ pxor xmm2, xmm10
+ pxor xmm3, xmm11
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ pxor xmm4, xmm12
+ pxor xmm5, xmm13
+ pxor xmm6, xmm14
+ pxor xmm7, xmm15
+ mov eax, r13d
+ jne 9b
+ movdqa xmm9, xmm0
+ punpckldq xmm0, xmm1
+ punpckhdq xmm9, xmm1
+ movdqa xmm11, xmm2
+ punpckldq xmm2, xmm3
+ punpckhdq xmm11, xmm3
+ movdqa xmm1, xmm0
+ punpcklqdq xmm0, xmm2
+ punpckhqdq xmm1, xmm2
+ movdqa xmm3, xmm9
+ punpcklqdq xmm9, xmm11
+ punpckhqdq xmm3, xmm11
+ movdqu xmmword ptr [rbx], xmm0
+ movdqu xmmword ptr [rbx+0x20], xmm1
+ movdqu xmmword ptr [rbx+0x40], xmm9
+ movdqu xmmword ptr [rbx+0x60], xmm3
+ movdqa xmm9, xmm4
+ punpckldq xmm4, xmm5
+ punpckhdq xmm9, xmm5
+ movdqa xmm11, xmm6
+ punpckldq xmm6, xmm7
+ punpckhdq xmm11, xmm7
+ movdqa xmm5, xmm4
+ punpcklqdq xmm4, xmm6
+ punpckhqdq xmm5, xmm6
+ movdqa xmm7, xmm9
+ punpcklqdq xmm9, xmm11
+ punpckhqdq xmm7, xmm11
+ movdqu xmmword ptr [rbx+0x10], xmm4
+ movdqu xmmword ptr [rbx+0x30], xmm5
+ movdqu xmmword ptr [rbx+0x50], xmm9
+ movdqu xmmword ptr [rbx+0x70], xmm7
+ movdqa xmm1, xmmword ptr [rsp+0x110]
+ movdqa xmm0, xmm1
+ paddd xmm1, xmmword ptr [rsp+0x150]
+ movdqa xmmword ptr [rsp+0x110], xmm1
+ pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip]
+ pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip]
+ pcmpgtd xmm0, xmm1
+ movdqa xmm1, xmmword ptr [rsp+0x120]
+ psubd xmm1, xmm0
+ movdqa xmmword ptr [rsp+0x120], xmm1
+ add rbx, 128
+ add rdi, 32
+ sub rsi, 4
+ cmp rsi, 4
+ jnc 2b
+ test rsi, rsi
+ jnz 3f
+4:
+ mov rsp, rbp
+ pop rbp
+ pop rbx
+ pop r12
+ pop r13
+ pop r14
+ pop r15
+ ret
+.p2align 5
+3:
+ test esi, 0x2
+ je 3f
+ movups xmm0, xmmword ptr [rcx]
+ movups xmm1, xmmword ptr [rcx+0x10]
+ movaps xmm8, xmm0
+ movaps xmm9, xmm1
+ movd xmm13, dword ptr [rsp+0x110]
+ movd xmm14, dword ptr [rsp+0x120]
+ punpckldq xmm13, xmm14
+ movaps xmmword ptr [rsp], xmm13
+ movd xmm14, dword ptr [rsp+0x114]
+ movd xmm13, dword ptr [rsp+0x124]
+ punpckldq xmm14, xmm13
+ movaps xmmword ptr [rsp+0x10], xmm14
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ movzx eax, byte ptr [rbp+0x40]
+ or eax, r13d
+ xor edx, edx
+2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
+ movaps xmm10, xmm2
+ movups xmm4, xmmword ptr [r8+rdx-0x40]
+ movups xmm5, xmmword ptr [r8+rdx-0x30]
+ movaps xmm3, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm3, xmm5, 221
+ movaps xmm5, xmm3
+ movups xmm6, xmmword ptr [r8+rdx-0x20]
+ movups xmm7, xmmword ptr [r8+rdx-0x10]
+ movaps xmm3, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 0x93
+ shufps xmm3, xmm7, 221
+ pshufd xmm7, xmm3, 0x93
+ movups xmm12, xmmword ptr [r9+rdx-0x40]
+ movups xmm13, xmmword ptr [r9+rdx-0x30]
+ movaps xmm11, xmm12
+ shufps xmm12, xmm13, 136
+ shufps xmm11, xmm13, 221
+ movaps xmm13, xmm11
+ movups xmm14, xmmword ptr [r9+rdx-0x20]
+ movups xmm15, xmmword ptr [r9+rdx-0x10]
+ movaps xmm11, xmm14
+ shufps xmm14, xmm15, 136
+ pshufd xmm14, xmm14, 0x93
+ shufps xmm11, xmm15, 221
+ pshufd xmm15, xmm11, 0x93
+ shl rax, 0x20
+ or rax, 0x40
+ movq xmm3, rax
+ movdqa xmmword ptr [rsp+0x20], xmm3
+ movaps xmm3, xmmword ptr [rsp]
+ movaps xmm11, xmmword ptr [rsp+0x10]
+ punpcklqdq xmm3, xmmword ptr [rsp+0x20]
+ punpcklqdq xmm11, xmmword ptr [rsp+0x20]
+ mov al, 7
+9:
+ paddd xmm0, xmm4
+ paddd xmm8, xmm12
+ movaps xmmword ptr [rsp+0x20], xmm4
+ movaps xmmword ptr [rsp+0x30], xmm12
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ pshuflw xmm3, xmm3, 0xB1
+ pshufhw xmm3, xmm3, 0xB1
+ pshuflw xmm11, xmm11, 0xB1
+ pshufhw xmm11, xmm11, 0xB1
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 20
+ psrld xmm4, 12
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 20
+ psrld xmm4, 12
+ por xmm9, xmm4
+ paddd xmm0, xmm5
+ paddd xmm8, xmm13
+ movaps xmmword ptr [rsp+0x40], xmm5
+ movaps xmmword ptr [rsp+0x50], xmm13
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ movdqa xmm13, xmm3
+ psrld xmm3, 8
+ pslld xmm13, 24
+ pxor xmm3, xmm13
+ movdqa xmm13, xmm11
+ psrld xmm11, 8
+ pslld xmm13, 24
+ pxor xmm11, xmm13
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 25
+ psrld xmm4, 7
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 25
+ psrld xmm4, 7
+ por xmm9, xmm4
+ pshufd xmm0, xmm0, 0x93
+ pshufd xmm8, xmm8, 0x93
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm11, xmm11, 0x4E
+ pshufd xmm2, xmm2, 0x39
+ pshufd xmm10, xmm10, 0x39
+ paddd xmm0, xmm6
+ paddd xmm8, xmm14
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ pshuflw xmm3, xmm3, 0xB1
+ pshufhw xmm3, xmm3, 0xB1
+ pshuflw xmm11, xmm11, 0xB1
+ pshufhw xmm11, xmm11, 0xB1
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 20
+ psrld xmm4, 12
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 20
+ psrld xmm4, 12
+ por xmm9, xmm4
+ paddd xmm0, xmm7
+ paddd xmm8, xmm15
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ movdqa xmm13, xmm3
+ psrld xmm3, 8
+ pslld xmm13, 24
+ pxor xmm3, xmm13
+ movdqa xmm13, xmm11
+ psrld xmm11, 8
+ pslld xmm13, 24
+ pxor xmm11, xmm13
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 25
+ psrld xmm4, 7
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 25
+ psrld xmm4, 7
+ por xmm9, xmm4
+ pshufd xmm0, xmm0, 0x39
+ pshufd xmm8, xmm8, 0x39
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm11, xmm11, 0x4E
+ pshufd xmm2, xmm2, 0x93
+ pshufd xmm10, xmm10, 0x93
+ dec al
+ je 9f
+ movdqa xmm12, xmmword ptr [rsp+0x20]
+ movdqa xmm5, xmmword ptr [rsp+0x40]
+ pshufd xmm13, xmm12, 0x0F
+ shufps xmm12, xmm5, 214
+ pshufd xmm4, xmm12, 0x39
+ movdqa xmm12, xmm6
+ shufps xmm12, xmm7, 250
+ pand xmm13, xmmword ptr [PBLENDW_0x33_MASK+rip]
+ pand xmm12, xmmword ptr [PBLENDW_0xCC_MASK+rip]
+ por xmm13, xmm12
+ movdqa xmmword ptr [rsp+0x20], xmm13
+ movdqa xmm12, xmm7
+ punpcklqdq xmm12, xmm5
+ movdqa xmm13, xmm6
+ pand xmm12, xmmword ptr [PBLENDW_0x3F_MASK+rip]
+ pand xmm13, xmmword ptr [PBLENDW_0xC0_MASK+rip]
+ por xmm12, xmm13
+ pshufd xmm12, xmm12, 0x78
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 0x1E
+ movdqa xmmword ptr [rsp+0x40], xmm12
+ movdqa xmm5, xmmword ptr [rsp+0x30]
+ movdqa xmm13, xmmword ptr [rsp+0x50]
+ pshufd xmm6, xmm5, 0x0F
+ shufps xmm5, xmm13, 214
+ pshufd xmm12, xmm5, 0x39
+ movdqa xmm5, xmm14
+ shufps xmm5, xmm15, 250
+ pand xmm6, xmmword ptr [PBLENDW_0x33_MASK+rip]
+ pand xmm5, xmmword ptr [PBLENDW_0xCC_MASK+rip]
+ por xmm6, xmm5
+ movdqa xmm5, xmm15
+ punpcklqdq xmm5, xmm13
+ movdqa xmmword ptr [rsp+0x30], xmm2
+ movdqa xmm2, xmm14
+ pand xmm5, xmmword ptr [PBLENDW_0x3F_MASK+rip]
+ pand xmm2, xmmword ptr [PBLENDW_0xC0_MASK+rip]
+ por xmm5, xmm2
+ movdqa xmm2, xmmword ptr [rsp+0x30]
+ pshufd xmm5, xmm5, 0x78
+ punpckhdq xmm13, xmm15
+ punpckldq xmm14, xmm13
+ pshufd xmm15, xmm14, 0x1E
+ movdqa xmm13, xmm6
+ movdqa xmm14, xmm5
+ movdqa xmm5, xmmword ptr [rsp+0x20]
+ movdqa xmm6, xmmword ptr [rsp+0x40]
+ jmp 9b
+9:
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ pxor xmm8, xmm10
+ pxor xmm9, xmm11
+ mov eax, r13d
+ cmp rdx, r15
+ jne 2b
+ movups xmmword ptr [rbx], xmm0
+ movups xmmword ptr [rbx+0x10], xmm1
+ movups xmmword ptr [rbx+0x20], xmm8
+ movups xmmword ptr [rbx+0x30], xmm9
+ mov eax, dword ptr [rsp+0x130]
+ neg eax
+ mov r10d, dword ptr [rsp+0x110+8*rax]
+ mov r11d, dword ptr [rsp+0x120+8*rax]
+ mov dword ptr [rsp+0x110], r10d
+ mov dword ptr [rsp+0x120], r11d
+ add rdi, 16
+ add rbx, 64
+ sub rsi, 2
+3:
+ test esi, 0x1
+ je 4b
+ movups xmm0, xmmword ptr [rcx]
+ movups xmm1, xmmword ptr [rcx+0x10]
+ movd xmm13, dword ptr [rsp+0x110]
+ movd xmm14, dword ptr [rsp+0x120]
+ punpckldq xmm13, xmm14
+ mov r8, qword ptr [rdi]
+ movzx eax, byte ptr [rbp+0x40]
+ or eax, r13d
+ xor edx, edx
+2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
+ shl rax, 32
+ or rax, 64
+ movq xmm12, rax
+ movdqa xmm3, xmm13
+ punpcklqdq xmm3, xmm12
+ movups xmm4, xmmword ptr [r8+rdx-0x40]
+ movups xmm5, xmmword ptr [r8+rdx-0x30]
+ movaps xmm8, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm8, xmm5, 221
+ movaps xmm5, xmm8
+ movups xmm6, xmmword ptr [r8+rdx-0x20]
+ movups xmm7, xmmword ptr [r8+rdx-0x10]
+ movaps xmm8, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 0x93
+ shufps xmm8, xmm7, 221
+ pshufd xmm7, xmm8, 0x93
+ mov al, 7
+9:
+ paddd xmm0, xmm4
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshuflw xmm3, xmm3, 0xB1
+ pshufhw xmm3, xmm3, 0xB1
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm5
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ movdqa xmm14, xmm3
+ psrld xmm3, 8
+ pslld xmm14, 24
+ pxor xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x93
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x39
+ paddd xmm0, xmm6
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshuflw xmm3, xmm3, 0xB1
+ pshufhw xmm3, xmm3, 0xB1
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm7
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ movdqa xmm14, xmm3
+ psrld xmm3, 8
+ pslld xmm14, 24
+ pxor xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x39
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x93
+ dec al
+ jz 9f
+ movdqa xmm8, xmm4
+ shufps xmm8, xmm5, 214
+ pshufd xmm9, xmm4, 0x0F
+ pshufd xmm4, xmm8, 0x39
+ movdqa xmm8, xmm6
+ shufps xmm8, xmm7, 250
+ pand xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip]
+ pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip]
+ por xmm9, xmm8
+ movdqa xmm8, xmm7
+ punpcklqdq xmm8, xmm5
+ movdqa xmm10, xmm6
+ pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip]
+ pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip]
+ por xmm8, xmm10
+ pshufd xmm8, xmm8, 0x78
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 0x1E
+ movdqa xmm5, xmm9
+ movdqa xmm6, xmm8
+ jmp 9b
+9:
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne 2b
+ movups xmmword ptr [rbx], xmm0
+ movups xmmword ptr [rbx+0x10], xmm1
+ jmp 4b
+
+.p2align 6
+blake3_compress_in_place_sse2:
+_blake3_compress_in_place_sse2:
+ _CET_ENDBR
+ movups xmm0, xmmword ptr [rdi]
+ movups xmm1, xmmword ptr [rdi+0x10]
+ movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
+ shl r8, 32
+ add rdx, r8
+ movq xmm3, rcx
+ movq xmm4, rdx
+ punpcklqdq xmm3, xmm4
+ movups xmm4, xmmword ptr [rsi]
+ movups xmm5, xmmword ptr [rsi+0x10]
+ movaps xmm8, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm8, xmm5, 221
+ movaps xmm5, xmm8
+ movups xmm6, xmmword ptr [rsi+0x20]
+ movups xmm7, xmmword ptr [rsi+0x30]
+ movaps xmm8, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 0x93
+ shufps xmm8, xmm7, 221
+ pshufd xmm7, xmm8, 0x93
+ mov al, 7
+9:
+ paddd xmm0, xmm4
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshuflw xmm3, xmm3, 0xB1
+ pshufhw xmm3, xmm3, 0xB1
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm5
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ movdqa xmm14, xmm3
+ psrld xmm3, 8
+ pslld xmm14, 24
+ pxor xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x93
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x39
+ paddd xmm0, xmm6
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshuflw xmm3, xmm3, 0xB1
+ pshufhw xmm3, xmm3, 0xB1
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm7
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ movdqa xmm14, xmm3
+ psrld xmm3, 8
+ pslld xmm14, 24
+ pxor xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x39
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x93
+ dec al
+ jz 9f
+ movdqa xmm8, xmm4
+ shufps xmm8, xmm5, 214
+ pshufd xmm9, xmm4, 0x0F
+ pshufd xmm4, xmm8, 0x39
+ movdqa xmm8, xmm6
+ shufps xmm8, xmm7, 250
+ pand xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip]
+ pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip]
+ por xmm9, xmm8
+ movdqa xmm8, xmm7
+ punpcklqdq xmm8, xmm5
+ movdqa xmm10, xmm6
+ pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip]
+ pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip]
+ por xmm8, xmm10
+ pshufd xmm8, xmm8, 0x78
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 0x1E
+ movdqa xmm5, xmm9
+ movdqa xmm6, xmm8
+ jmp 9b
+9:
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ movups xmmword ptr [rdi], xmm0
+ movups xmmword ptr [rdi+0x10], xmm1
+ ret
+
+.p2align 6
+blake3_compress_xof_sse2:
+_blake3_compress_xof_sse2:
+ _CET_ENDBR
+ movups xmm0, xmmword ptr [rdi]
+ movups xmm1, xmmword ptr [rdi+0x10]
+ movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
+ movzx eax, r8b
+ movzx edx, dl
+ shl rax, 32
+ add rdx, rax
+ movq xmm3, rcx
+ movq xmm4, rdx
+ punpcklqdq xmm3, xmm4
+ movups xmm4, xmmword ptr [rsi]
+ movups xmm5, xmmword ptr [rsi+0x10]
+ movaps xmm8, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm8, xmm5, 221
+ movaps xmm5, xmm8
+ movups xmm6, xmmword ptr [rsi+0x20]
+ movups xmm7, xmmword ptr [rsi+0x30]
+ movaps xmm8, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 0x93
+ shufps xmm8, xmm7, 221
+ pshufd xmm7, xmm8, 0x93
+ mov al, 7
+9:
+ paddd xmm0, xmm4
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshuflw xmm3, xmm3, 0xB1
+ pshufhw xmm3, xmm3, 0xB1
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm5
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ movdqa xmm14, xmm3
+ psrld xmm3, 8
+ pslld xmm14, 24
+ pxor xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x93
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x39
+ paddd xmm0, xmm6
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshuflw xmm3, xmm3, 0xB1
+ pshufhw xmm3, xmm3, 0xB1
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm7
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ movdqa xmm14, xmm3
+ psrld xmm3, 8
+ pslld xmm14, 24
+ pxor xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x39
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x93
+ dec al
+ jz 9f
+ movdqa xmm8, xmm4
+ shufps xmm8, xmm5, 214
+ pshufd xmm9, xmm4, 0x0F
+ pshufd xmm4, xmm8, 0x39
+ movdqa xmm8, xmm6
+ shufps xmm8, xmm7, 250
+ pand xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip]
+ pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip]
+ por xmm9, xmm8
+ movdqa xmm8, xmm7
+ punpcklqdq xmm8, xmm5
+ movdqa xmm10, xmm6
+ pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip]
+ pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip]
+ por xmm8, xmm10
+ pshufd xmm8, xmm8, 0x78
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 0x1E
+ movdqa xmm5, xmm9
+ movdqa xmm6, xmm8
+ jmp 9b
+9:
+ movdqu xmm4, xmmword ptr [rdi]
+ movdqu xmm5, xmmword ptr [rdi+0x10]
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ pxor xmm2, xmm4
+ pxor xmm3, xmm5
+ movups xmmword ptr [r9], xmm0
+ movups xmmword ptr [r9+0x10], xmm1
+ movups xmmword ptr [r9+0x20], xmm2
+ movups xmmword ptr [r9+0x30], xmm3
+ ret
+
+
+#ifdef __APPLE__
+.static_data
+#else
+.section .rodata
+#endif
+.p2align 6
+BLAKE3_IV:
+ .long 0x6A09E667, 0xBB67AE85
+ .long 0x3C6EF372, 0xA54FF53A
+ADD0:
+ .long 0, 1, 2, 3
+ADD1:
+ .long 4, 4, 4, 4
+BLAKE3_IV_0:
+ .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667
+BLAKE3_IV_1:
+ .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85
+BLAKE3_IV_2:
+ .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372
+BLAKE3_IV_3:
+ .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A
+BLAKE3_BLOCK_LEN:
+ .long 64, 64, 64, 64
+CMP_MSB_MASK:
+ .long 0x80000000, 0x80000000, 0x80000000, 0x80000000
+PBLENDW_0x33_MASK:
+ .long 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0x00000000
+PBLENDW_0xCC_MASK:
+ .long 0x00000000, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF
+PBLENDW_0x3F_MASK:
+ .long 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000
+PBLENDW_0xC0_MASK:
+ .long 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF
+
+#endif
diff --git a/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_gnu.S b/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_gnu.S
new file mode 100644
index 000000000000..8852ba5976e1
--- /dev/null
+++ b/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_gnu.S
@@ -0,0 +1,2332 @@
+.intel_syntax noprefix
+.global blake3_hash_many_sse2
+.global _blake3_hash_many_sse2
+.global blake3_compress_in_place_sse2
+.global _blake3_compress_in_place_sse2
+.global blake3_compress_xof_sse2
+.global _blake3_compress_xof_sse2
+.section .text
+ .p2align 6
+_blake3_hash_many_sse2:
+blake3_hash_many_sse2:
+ push r15
+ push r14
+ push r13
+ push r12
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ mov rbp, rsp
+ sub rsp, 528
+ and rsp, 0xFFFFFFFFFFFFFFC0
+ movdqa xmmword ptr [rsp+0x170], xmm6
+ movdqa xmmword ptr [rsp+0x180], xmm7
+ movdqa xmmword ptr [rsp+0x190], xmm8
+ movdqa xmmword ptr [rsp+0x1A0], xmm9
+ movdqa xmmword ptr [rsp+0x1B0], xmm10
+ movdqa xmmword ptr [rsp+0x1C0], xmm11
+ movdqa xmmword ptr [rsp+0x1D0], xmm12
+ movdqa xmmword ptr [rsp+0x1E0], xmm13
+ movdqa xmmword ptr [rsp+0x1F0], xmm14
+ movdqa xmmword ptr [rsp+0x200], xmm15
+ mov rdi, rcx
+ mov rsi, rdx
+ mov rdx, r8
+ mov rcx, r9
+ mov r8, qword ptr [rbp+0x68]
+ movzx r9, byte ptr [rbp+0x70]
+ neg r9d
+ movd xmm0, r9d
+ pshufd xmm0, xmm0, 0x00
+ movdqa xmmword ptr [rsp+0x130], xmm0
+ movdqa xmm1, xmm0
+ pand xmm1, xmmword ptr [ADD0+rip]
+ pand xmm0, xmmword ptr [ADD1+rip]
+ movdqa xmmword ptr [rsp+0x150], xmm0
+ movd xmm0, r8d
+ pshufd xmm0, xmm0, 0x00
+ paddd xmm0, xmm1
+ movdqa xmmword ptr [rsp+0x110], xmm0
+ pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip]
+ pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip]
+ pcmpgtd xmm1, xmm0
+ shr r8, 32
+ movd xmm2, r8d
+ pshufd xmm2, xmm2, 0x00
+ psubd xmm2, xmm1
+ movdqa xmmword ptr [rsp+0x120], xmm2
+ mov rbx, qword ptr [rbp+0x90]
+ mov r15, rdx
+ shl r15, 6
+ movzx r13d, byte ptr [rbp+0x78]
+ movzx r12d, byte ptr [rbp+0x88]
+ cmp rsi, 4
+ jc 3f
+2:
+ movdqu xmm3, xmmword ptr [rcx]
+ pshufd xmm0, xmm3, 0x00
+ pshufd xmm1, xmm3, 0x55
+ pshufd xmm2, xmm3, 0xAA
+ pshufd xmm3, xmm3, 0xFF
+ movdqu xmm7, xmmword ptr [rcx+0x10]
+ pshufd xmm4, xmm7, 0x00
+ pshufd xmm5, xmm7, 0x55
+ pshufd xmm6, xmm7, 0xAA
+ pshufd xmm7, xmm7, 0xFF
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ mov r10, qword ptr [rdi+0x10]
+ mov r11, qword ptr [rdi+0x18]
+ movzx eax, byte ptr [rbp+0x80]
+ or eax, r13d
+ xor edx, edx
+9:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ movdqu xmm8, xmmword ptr [r8+rdx-0x40]
+ movdqu xmm9, xmmword ptr [r9+rdx-0x40]
+ movdqu xmm10, xmmword ptr [r10+rdx-0x40]
+ movdqu xmm11, xmmword ptr [r11+rdx-0x40]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp], xmm8
+ movdqa xmmword ptr [rsp+0x10], xmm9
+ movdqa xmmword ptr [rsp+0x20], xmm12
+ movdqa xmmword ptr [rsp+0x30], xmm13
+ movdqu xmm8, xmmword ptr [r8+rdx-0x30]
+ movdqu xmm9, xmmword ptr [r9+rdx-0x30]
+ movdqu xmm10, xmmword ptr [r10+rdx-0x30]
+ movdqu xmm11, xmmword ptr [r11+rdx-0x30]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp+0x40], xmm8
+ movdqa xmmword ptr [rsp+0x50], xmm9
+ movdqa xmmword ptr [rsp+0x60], xmm12
+ movdqa xmmword ptr [rsp+0x70], xmm13
+ movdqu xmm8, xmmword ptr [r8+rdx-0x20]
+ movdqu xmm9, xmmword ptr [r9+rdx-0x20]
+ movdqu xmm10, xmmword ptr [r10+rdx-0x20]
+ movdqu xmm11, xmmword ptr [r11+rdx-0x20]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp+0x80], xmm8
+ movdqa xmmword ptr [rsp+0x90], xmm9
+ movdqa xmmword ptr [rsp+0xA0], xmm12
+ movdqa xmmword ptr [rsp+0xB0], xmm13
+ movdqu xmm8, xmmword ptr [r8+rdx-0x10]
+ movdqu xmm9, xmmword ptr [r9+rdx-0x10]
+ movdqu xmm10, xmmword ptr [r10+rdx-0x10]
+ movdqu xmm11, xmmword ptr [r11+rdx-0x10]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp+0xC0], xmm8
+ movdqa xmmword ptr [rsp+0xD0], xmm9
+ movdqa xmmword ptr [rsp+0xE0], xmm12
+ movdqa xmmword ptr [rsp+0xF0], xmm13
+ movdqa xmm9, xmmword ptr [BLAKE3_IV_1+rip]
+ movdqa xmm10, xmmword ptr [BLAKE3_IV_2+rip]
+ movdqa xmm11, xmmword ptr [BLAKE3_IV_3+rip]
+ movdqa xmm12, xmmword ptr [rsp+0x110]
+ movdqa xmm13, xmmword ptr [rsp+0x120]
+ movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN+rip]
+ movd xmm15, eax
+ pshufd xmm15, xmm15, 0x00
+ prefetcht0 [r8+rdx+0x80]
+ prefetcht0 [r9+rdx+0x80]
+ prefetcht0 [r10+rdx+0x80]
+ prefetcht0 [r11+rdx+0x80]
+ paddd xmm0, xmmword ptr [rsp]
+ paddd xmm1, xmmword ptr [rsp+0x20]
+ paddd xmm2, xmmword ptr [rsp+0x40]
+ paddd xmm3, xmmword ptr [rsp+0x60]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ movdqa xmm8, xmmword ptr [BLAKE3_IV_0+rip]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x10]
+ paddd xmm1, xmmword ptr [rsp+0x30]
+ paddd xmm2, xmmword ptr [rsp+0x50]
+ paddd xmm3, xmmword ptr [rsp+0x70]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x80]
+ paddd xmm1, xmmword ptr [rsp+0xA0]
+ paddd xmm2, xmmword ptr [rsp+0xC0]
+ paddd xmm3, xmmword ptr [rsp+0xE0]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x90]
+ paddd xmm1, xmmword ptr [rsp+0xB0]
+ paddd xmm2, xmmword ptr [rsp+0xD0]
+ paddd xmm3, xmmword ptr [rsp+0xF0]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x20]
+ paddd xmm1, xmmword ptr [rsp+0x30]
+ paddd xmm2, xmmword ptr [rsp+0x70]
+ paddd xmm3, xmmword ptr [rsp+0x40]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x60]
+ paddd xmm1, xmmword ptr [rsp+0xA0]
+ paddd xmm2, xmmword ptr [rsp]
+ paddd xmm3, xmmword ptr [rsp+0xD0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x10]
+ paddd xmm1, xmmword ptr [rsp+0xC0]
+ paddd xmm2, xmmword ptr [rsp+0x90]
+ paddd xmm3, xmmword ptr [rsp+0xF0]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xB0]
+ paddd xmm1, xmmword ptr [rsp+0x50]
+ paddd xmm2, xmmword ptr [rsp+0xE0]
+ paddd xmm3, xmmword ptr [rsp+0x80]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x30]
+ paddd xmm1, xmmword ptr [rsp+0xA0]
+ paddd xmm2, xmmword ptr [rsp+0xD0]
+ paddd xmm3, xmmword ptr [rsp+0x70]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x40]
+ paddd xmm1, xmmword ptr [rsp+0xC0]
+ paddd xmm2, xmmword ptr [rsp+0x20]
+ paddd xmm3, xmmword ptr [rsp+0xE0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x60]
+ paddd xmm1, xmmword ptr [rsp+0x90]
+ paddd xmm2, xmmword ptr [rsp+0xB0]
+ paddd xmm3, xmmword ptr [rsp+0x80]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x50]
+ paddd xmm1, xmmword ptr [rsp]
+ paddd xmm2, xmmword ptr [rsp+0xF0]
+ paddd xmm3, xmmword ptr [rsp+0x10]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xA0]
+ paddd xmm1, xmmword ptr [rsp+0xC0]
+ paddd xmm2, xmmword ptr [rsp+0xE0]
+ paddd xmm3, xmmword ptr [rsp+0xD0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x70]
+ paddd xmm1, xmmword ptr [rsp+0x90]
+ paddd xmm2, xmmword ptr [rsp+0x30]
+ paddd xmm3, xmmword ptr [rsp+0xF0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x40]
+ paddd xmm1, xmmword ptr [rsp+0xB0]
+ paddd xmm2, xmmword ptr [rsp+0x50]
+ paddd xmm3, xmmword ptr [rsp+0x10]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp]
+ paddd xmm1, xmmword ptr [rsp+0x20]
+ paddd xmm2, xmmword ptr [rsp+0x80]
+ paddd xmm3, xmmword ptr [rsp+0x60]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xC0]
+ paddd xmm1, xmmword ptr [rsp+0x90]
+ paddd xmm2, xmmword ptr [rsp+0xF0]
+ paddd xmm3, xmmword ptr [rsp+0xE0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xD0]
+ paddd xmm1, xmmword ptr [rsp+0xB0]
+ paddd xmm2, xmmword ptr [rsp+0xA0]
+ paddd xmm3, xmmword ptr [rsp+0x80]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x70]
+ paddd xmm1, xmmword ptr [rsp+0x50]
+ paddd xmm2, xmmword ptr [rsp]
+ paddd xmm3, xmmword ptr [rsp+0x60]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x20]
+ paddd xmm1, xmmword ptr [rsp+0x30]
+ paddd xmm2, xmmword ptr [rsp+0x10]
+ paddd xmm3, xmmword ptr [rsp+0x40]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x90]
+ paddd xmm1, xmmword ptr [rsp+0xB0]
+ paddd xmm2, xmmword ptr [rsp+0x80]
+ paddd xmm3, xmmword ptr [rsp+0xF0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xE0]
+ paddd xmm1, xmmword ptr [rsp+0x50]
+ paddd xmm2, xmmword ptr [rsp+0xC0]
+ paddd xmm3, xmmword ptr [rsp+0x10]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xD0]
+ paddd xmm1, xmmword ptr [rsp]
+ paddd xmm2, xmmword ptr [rsp+0x20]
+ paddd xmm3, xmmword ptr [rsp+0x40]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x30]
+ paddd xmm1, xmmword ptr [rsp+0xA0]
+ paddd xmm2, xmmword ptr [rsp+0x60]
+ paddd xmm3, xmmword ptr [rsp+0x70]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xB0]
+ paddd xmm1, xmmword ptr [rsp+0x50]
+ paddd xmm2, xmmword ptr [rsp+0x10]
+ paddd xmm3, xmmword ptr [rsp+0x80]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xF0]
+ paddd xmm1, xmmword ptr [rsp]
+ paddd xmm2, xmmword ptr [rsp+0x90]
+ paddd xmm3, xmmword ptr [rsp+0x60]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xE0]
+ paddd xmm1, xmmword ptr [rsp+0x20]
+ paddd xmm2, xmmword ptr [rsp+0x30]
+ paddd xmm3, xmmword ptr [rsp+0x70]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ pshuflw xmm15, xmm15, 0xB1
+ pshufhw xmm15, xmm15, 0xB1
+ pshuflw xmm12, xmm12, 0xB1
+ pshufhw xmm12, xmm12, 0xB1
+ pshuflw xmm13, xmm13, 0xB1
+ pshufhw xmm13, xmm13, 0xB1
+ pshuflw xmm14, xmm14, 0xB1
+ pshufhw xmm14, xmm14, 0xB1
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xA0]
+ paddd xmm1, xmmword ptr [rsp+0xC0]
+ paddd xmm2, xmmword ptr [rsp+0x40]
+ paddd xmm3, xmmword ptr [rsp+0xD0]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ pxor xmm0, xmm8
+ pxor xmm1, xmm9
+ pxor xmm2, xmm10
+ pxor xmm3, xmm11
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ pxor xmm4, xmm12
+ pxor xmm5, xmm13
+ pxor xmm6, xmm14
+ pxor xmm7, xmm15
+ mov eax, r13d
+ jne 9b
+ movdqa xmm9, xmm0
+ punpckldq xmm0, xmm1
+ punpckhdq xmm9, xmm1
+ movdqa xmm11, xmm2
+ punpckldq xmm2, xmm3
+ punpckhdq xmm11, xmm3
+ movdqa xmm1, xmm0
+ punpcklqdq xmm0, xmm2
+ punpckhqdq xmm1, xmm2
+ movdqa xmm3, xmm9
+ punpcklqdq xmm9, xmm11
+ punpckhqdq xmm3, xmm11
+ movdqu xmmword ptr [rbx], xmm0
+ movdqu xmmword ptr [rbx+0x20], xmm1
+ movdqu xmmword ptr [rbx+0x40], xmm9
+ movdqu xmmword ptr [rbx+0x60], xmm3
+ movdqa xmm9, xmm4
+ punpckldq xmm4, xmm5
+ punpckhdq xmm9, xmm5
+ movdqa xmm11, xmm6
+ punpckldq xmm6, xmm7
+ punpckhdq xmm11, xmm7
+ movdqa xmm5, xmm4
+ punpcklqdq xmm4, xmm6
+ punpckhqdq xmm5, xmm6
+ movdqa xmm7, xmm9
+ punpcklqdq xmm9, xmm11
+ punpckhqdq xmm7, xmm11
+ movdqu xmmword ptr [rbx+0x10], xmm4
+ movdqu xmmword ptr [rbx+0x30], xmm5
+ movdqu xmmword ptr [rbx+0x50], xmm9
+ movdqu xmmword ptr [rbx+0x70], xmm7
+ movdqa xmm1, xmmword ptr [rsp+0x110]
+ movdqa xmm0, xmm1
+ paddd xmm1, xmmword ptr [rsp+0x150]
+ movdqa xmmword ptr [rsp+0x110], xmm1
+ pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip]
+ pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip]
+ pcmpgtd xmm0, xmm1
+ movdqa xmm1, xmmword ptr [rsp+0x120]
+ psubd xmm1, xmm0
+ movdqa xmmword ptr [rsp+0x120], xmm1
+ add rbx, 128
+ add rdi, 32
+ sub rsi, 4
+ cmp rsi, 4
+ jnc 2b
+ test rsi, rsi
+ jne 3f
+4:
+ movdqa xmm6, xmmword ptr [rsp+0x170]
+ movdqa xmm7, xmmword ptr [rsp+0x180]
+ movdqa xmm8, xmmword ptr [rsp+0x190]
+ movdqa xmm9, xmmword ptr [rsp+0x1A0]
+ movdqa xmm10, xmmword ptr [rsp+0x1B0]
+ movdqa xmm11, xmmword ptr [rsp+0x1C0]
+ movdqa xmm12, xmmword ptr [rsp+0x1D0]
+ movdqa xmm13, xmmword ptr [rsp+0x1E0]
+ movdqa xmm14, xmmword ptr [rsp+0x1F0]
+ movdqa xmm15, xmmword ptr [rsp+0x200]
+ mov rsp, rbp
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ pop r12
+ pop r13
+ pop r14
+ pop r15
+ ret
+.p2align 5
+3:
+ test esi, 0x2
+ je 3f
+ movups xmm0, xmmword ptr [rcx]
+ movups xmm1, xmmword ptr [rcx+0x10]
+ movaps xmm8, xmm0
+ movaps xmm9, xmm1
+ movd xmm13, dword ptr [rsp+0x110]
+ movd xmm14, dword ptr [rsp+0x120]
+ punpckldq xmm13, xmm14
+ movaps xmmword ptr [rsp], xmm13
+ movd xmm14, dword ptr [rsp+0x114]
+ movd xmm13, dword ptr [rsp+0x124]
+ punpckldq xmm14, xmm13
+ movaps xmmword ptr [rsp+0x10], xmm14
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ movzx eax, byte ptr [rbp+0x80]
+ or eax, r13d
+ xor edx, edx
+2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
+ movaps xmm10, xmm2
+ movups xmm4, xmmword ptr [r8+rdx-0x40]
+ movups xmm5, xmmword ptr [r8+rdx-0x30]
+ movaps xmm3, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm3, xmm5, 221
+ movaps xmm5, xmm3
+ movups xmm6, xmmword ptr [r8+rdx-0x20]
+ movups xmm7, xmmword ptr [r8+rdx-0x10]
+ movaps xmm3, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 0x93
+ shufps xmm3, xmm7, 221
+ pshufd xmm7, xmm3, 0x93
+ movups xmm12, xmmword ptr [r9+rdx-0x40]
+ movups xmm13, xmmword ptr [r9+rdx-0x30]
+ movaps xmm11, xmm12
+ shufps xmm12, xmm13, 136
+ shufps xmm11, xmm13, 221
+ movaps xmm13, xmm11
+ movups xmm14, xmmword ptr [r9+rdx-0x20]
+ movups xmm15, xmmword ptr [r9+rdx-0x10]
+ movaps xmm11, xmm14
+ shufps xmm14, xmm15, 136
+ pshufd xmm14, xmm14, 0x93
+ shufps xmm11, xmm15, 221
+ pshufd xmm15, xmm11, 0x93
+ shl rax, 0x20
+ or rax, 0x40
+ movq xmm3, rax
+ movdqa xmmword ptr [rsp+0x20], xmm3
+ movaps xmm3, xmmword ptr [rsp]
+ movaps xmm11, xmmword ptr [rsp+0x10]
+ punpcklqdq xmm3, xmmword ptr [rsp+0x20]
+ punpcklqdq xmm11, xmmword ptr [rsp+0x20]
+ mov al, 7
+9:
+ paddd xmm0, xmm4
+ paddd xmm8, xmm12
+ movaps xmmword ptr [rsp+0x20], xmm4
+ movaps xmmword ptr [rsp+0x30], xmm12
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ pshuflw xmm3, xmm3, 0xB1
+ pshufhw xmm3, xmm3, 0xB1
+ pshuflw xmm11, xmm11, 0xB1
+ pshufhw xmm11, xmm11, 0xB1
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 20
+ psrld xmm4, 12
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 20
+ psrld xmm4, 12
+ por xmm9, xmm4
+ paddd xmm0, xmm5
+ paddd xmm8, xmm13
+ movaps xmmword ptr [rsp+0x40], xmm5
+ movaps xmmword ptr [rsp+0x50], xmm13
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ movdqa xmm13, xmm3
+ psrld xmm3, 8
+ pslld xmm13, 24
+ pxor xmm3, xmm13
+ movdqa xmm13, xmm11
+ psrld xmm11, 8
+ pslld xmm13, 24
+ pxor xmm11, xmm13
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 25
+ psrld xmm4, 7
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 25
+ psrld xmm4, 7
+ por xmm9, xmm4
+ pshufd xmm0, xmm0, 0x93
+ pshufd xmm8, xmm8, 0x93
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm11, xmm11, 0x4E
+ pshufd xmm2, xmm2, 0x39
+ pshufd xmm10, xmm10, 0x39
+ paddd xmm0, xmm6
+ paddd xmm8, xmm14
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ pshuflw xmm3, xmm3, 0xB1
+ pshufhw xmm3, xmm3, 0xB1
+ pshuflw xmm11, xmm11, 0xB1
+ pshufhw xmm11, xmm11, 0xB1
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 20
+ psrld xmm4, 12
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 20
+ psrld xmm4, 12
+ por xmm9, xmm4
+ paddd xmm0, xmm7
+ paddd xmm8, xmm15
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ movdqa xmm13, xmm3
+ psrld xmm3, 8
+ pslld xmm13, 24
+ pxor xmm3, xmm13
+ movdqa xmm13, xmm11
+ psrld xmm11, 8
+ pslld xmm13, 24
+ pxor xmm11, xmm13
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 25
+ psrld xmm4, 7
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 25
+ psrld xmm4, 7
+ por xmm9, xmm4
+ pshufd xmm0, xmm0, 0x39
+ pshufd xmm8, xmm8, 0x39
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm11, xmm11, 0x4E
+ pshufd xmm2, xmm2, 0x93
+ pshufd xmm10, xmm10, 0x93
+ dec al
+ je 9f
+ movdqa xmm12, xmmword ptr [rsp+0x20]
+ movdqa xmm5, xmmword ptr [rsp+0x40]
+ pshufd xmm13, xmm12, 0x0F
+ shufps xmm12, xmm5, 214
+ pshufd xmm4, xmm12, 0x39
+ movdqa xmm12, xmm6
+ shufps xmm12, xmm7, 250
+ pand xmm13, xmmword ptr [PBLENDW_0x33_MASK+rip]
+ pand xmm12, xmmword ptr [PBLENDW_0xCC_MASK+rip]
+ por xmm13, xmm12
+ movdqa xmmword ptr [rsp+0x20], xmm13
+ movdqa xmm12, xmm7
+ punpcklqdq xmm12, xmm5
+ movdqa xmm13, xmm6
+ pand xmm12, xmmword ptr [PBLENDW_0x3F_MASK+rip]
+ pand xmm13, xmmword ptr [PBLENDW_0xC0_MASK+rip]
+ por xmm12, xmm13
+ pshufd xmm12, xmm12, 0x78
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 0x1E
+ movdqa xmmword ptr [rsp+0x40], xmm12
+ movdqa xmm5, xmmword ptr [rsp+0x30]
+ movdqa xmm13, xmmword ptr [rsp+0x50]
+ pshufd xmm6, xmm5, 0x0F
+ shufps xmm5, xmm13, 214
+ pshufd xmm12, xmm5, 0x39
+ movdqa xmm5, xmm14
+ shufps xmm5, xmm15, 250
+ pand xmm6, xmmword ptr [PBLENDW_0x33_MASK+rip]
+ pand xmm5, xmmword ptr [PBLENDW_0xCC_MASK+rip]
+ por xmm6, xmm5
+ movdqa xmm5, xmm15
+ punpcklqdq xmm5, xmm13
+ movdqa xmmword ptr [rsp+0x30], xmm2
+ movdqa xmm2, xmm14
+ pand xmm5, xmmword ptr [PBLENDW_0x3F_MASK+rip]
+ pand xmm2, xmmword ptr [PBLENDW_0xC0_MASK+rip]
+ por xmm5, xmm2
+ movdqa xmm2, xmmword ptr [rsp+0x30]
+ pshufd xmm5, xmm5, 0x78
+ punpckhdq xmm13, xmm15
+ punpckldq xmm14, xmm13
+ pshufd xmm15, xmm14, 0x1E
+ movdqa xmm13, xmm6
+ movdqa xmm14, xmm5
+ movdqa xmm5, xmmword ptr [rsp+0x20]
+ movdqa xmm6, xmmword ptr [rsp+0x40]
+ jmp 9b
+9:
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ pxor xmm8, xmm10
+ pxor xmm9, xmm11
+ mov eax, r13d
+ cmp rdx, r15
+ jne 2b
+ movups xmmword ptr [rbx], xmm0
+ movups xmmword ptr [rbx+0x10], xmm1
+ movups xmmword ptr [rbx+0x20], xmm8
+ movups xmmword ptr [rbx+0x30], xmm9
+ mov eax, dword ptr [rsp+0x130]
+ neg eax
+ mov r10d, dword ptr [rsp+0x110+8*rax]
+ mov r11d, dword ptr [rsp+0x120+8*rax]
+ mov dword ptr [rsp+0x110], r10d
+ mov dword ptr [rsp+0x120], r11d
+ add rdi, 16
+ add rbx, 64
+ sub rsi, 2
+3:
+ test esi, 0x1
+ je 4b
+ movups xmm0, xmmword ptr [rcx]
+ movups xmm1, xmmword ptr [rcx+0x10]
+ movd xmm13, dword ptr [rsp+0x110]
+ movd xmm14, dword ptr [rsp+0x120]
+ punpckldq xmm13, xmm14
+ mov r8, qword ptr [rdi]
+ movzx eax, byte ptr [rbp+0x80]
+ or eax, r13d
+ xor edx, edx
+2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
+ shl rax, 32
+ or rax, 64
+ movq xmm12, rax
+ movdqa xmm3, xmm13
+ punpcklqdq xmm3, xmm12
+ movups xmm4, xmmword ptr [r8+rdx-0x40]
+ movups xmm5, xmmword ptr [r8+rdx-0x30]
+ movaps xmm8, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm8, xmm5, 221
+ movaps xmm5, xmm8
+ movups xmm6, xmmword ptr [r8+rdx-0x20]
+ movups xmm7, xmmword ptr [r8+rdx-0x10]
+ movaps xmm8, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 0x93
+ shufps xmm8, xmm7, 221
+ pshufd xmm7, xmm8, 0x93
+ mov al, 7
+9:
+ paddd xmm0, xmm4
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshuflw xmm3, xmm3, 0xB1
+ pshufhw xmm3, xmm3, 0xB1
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm5
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ movdqa xmm14, xmm3
+ psrld xmm3, 8
+ pslld xmm14, 24
+ pxor xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x93
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x39
+ paddd xmm0, xmm6
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshuflw xmm3, xmm3, 0xB1
+ pshufhw xmm3, xmm3, 0xB1
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm7
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ movdqa xmm14, xmm3
+ psrld xmm3, 8
+ pslld xmm14, 24
+ pxor xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x39
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x93
+ dec al
+ jz 9f
+ movdqa xmm8, xmm4
+ shufps xmm8, xmm5, 214
+ pshufd xmm9, xmm4, 0x0F
+ pshufd xmm4, xmm8, 0x39
+ movdqa xmm8, xmm6
+ shufps xmm8, xmm7, 250
+ pand xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip]
+ pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip]
+ por xmm9, xmm8
+ movdqa xmm8, xmm7
+ punpcklqdq xmm8, xmm5
+ movdqa xmm10, xmm6
+ pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip]
+ pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip]
+ por xmm8, xmm10
+ pshufd xmm8, xmm8, 0x78
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 0x1E
+ movdqa xmm5, xmm9
+ movdqa xmm6, xmm8
+ jmp 9b
+9:
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne 2b
+ movups xmmword ptr [rbx], xmm0
+ movups xmmword ptr [rbx+0x10], xmm1
+ jmp 4b
+
+.p2align 6
+blake3_compress_in_place_sse2:
+_blake3_compress_in_place_sse2:
+ sub rsp, 120
+ movdqa xmmword ptr [rsp], xmm6
+ movdqa xmmword ptr [rsp+0x10], xmm7
+ movdqa xmmword ptr [rsp+0x20], xmm8
+ movdqa xmmword ptr [rsp+0x30], xmm9
+ movdqa xmmword ptr [rsp+0x40], xmm11
+ movdqa xmmword ptr [rsp+0x50], xmm14
+ movdqa xmmword ptr [rsp+0x60], xmm15
+ movups xmm0, xmmword ptr [rcx]
+ movups xmm1, xmmword ptr [rcx+0x10]
+ movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
+ movzx eax, byte ptr [rsp+0xA0]
+ movzx r8d, r8b
+ shl rax, 32
+ add r8, rax
+ movq xmm3, r9
+ movq xmm4, r8
+ punpcklqdq xmm3, xmm4
+ movups xmm4, xmmword ptr [rdx]
+ movups xmm5, xmmword ptr [rdx+0x10]
+ movaps xmm8, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm8, xmm5, 221
+ movaps xmm5, xmm8
+ movups xmm6, xmmword ptr [rdx+0x20]
+ movups xmm7, xmmword ptr [rdx+0x30]
+ movaps xmm8, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 0x93
+ shufps xmm8, xmm7, 221
+ pshufd xmm7, xmm8, 0x93
+ mov al, 7
+9:
+ paddd xmm0, xmm4
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshuflw xmm3, xmm3, 0xB1
+ pshufhw xmm3, xmm3, 0xB1
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm5
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ movdqa xmm14, xmm3
+ psrld xmm3, 8
+ pslld xmm14, 24
+ pxor xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x93
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x39
+ paddd xmm0, xmm6
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshuflw xmm3, xmm3, 0xB1
+ pshufhw xmm3, xmm3, 0xB1
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm7
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ movdqa xmm14, xmm3
+ psrld xmm3, 8
+ pslld xmm14, 24
+ pxor xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x39
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x93
+ dec al
+ jz 9f
+ movdqa xmm8, xmm4
+ shufps xmm8, xmm5, 214
+ pshufd xmm9, xmm4, 0x0F
+ pshufd xmm4, xmm8, 0x39
+ movdqa xmm8, xmm6
+ shufps xmm8, xmm7, 250
+ pand xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip]
+ pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip]
+ por xmm9, xmm8
+ movdqa xmm8, xmm7
+ punpcklqdq xmm8, xmm5
+ movdqa xmm14, xmm6
+ pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip]
+ pand xmm14, xmmword ptr [PBLENDW_0xC0_MASK+rip]
+ por xmm8, xmm14
+ pshufd xmm8, xmm8, 0x78
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 0x1E
+ movdqa xmm5, xmm9
+ movdqa xmm6, xmm8
+ jmp 9b
+9:
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ movups xmmword ptr [rcx], xmm0
+ movups xmmword ptr [rcx+0x10], xmm1
+ movdqa xmm6, xmmword ptr [rsp]
+ movdqa xmm7, xmmword ptr [rsp+0x10]
+ movdqa xmm8, xmmword ptr [rsp+0x20]
+ movdqa xmm9, xmmword ptr [rsp+0x30]
+ movdqa xmm11, xmmword ptr [rsp+0x40]
+ movdqa xmm14, xmmword ptr [rsp+0x50]
+ movdqa xmm15, xmmword ptr [rsp+0x60]
+ add rsp, 120
+ ret
+
+
+.p2align 6
+_blake3_compress_xof_sse2:
+blake3_compress_xof_sse2:
+ sub rsp, 120
+ movdqa xmmword ptr [rsp], xmm6
+ movdqa xmmword ptr [rsp+0x10], xmm7
+ movdqa xmmword ptr [rsp+0x20], xmm8
+ movdqa xmmword ptr [rsp+0x30], xmm9
+ movdqa xmmword ptr [rsp+0x40], xmm11
+ movdqa xmmword ptr [rsp+0x50], xmm14
+ movdqa xmmword ptr [rsp+0x60], xmm15
+ movups xmm0, xmmword ptr [rcx]
+ movups xmm1, xmmword ptr [rcx+0x10]
+ movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
+ movzx eax, byte ptr [rsp+0xA0]
+ movzx r8d, r8b
+ mov r10, qword ptr [rsp+0xA8]
+ shl rax, 32
+ add r8, rax
+ movq xmm3, r9
+ movq xmm4, r8
+ punpcklqdq xmm3, xmm4
+ movups xmm4, xmmword ptr [rdx]
+ movups xmm5, xmmword ptr [rdx+0x10]
+ movaps xmm8, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm8, xmm5, 221
+ movaps xmm5, xmm8
+ movups xmm6, xmmword ptr [rdx+0x20]
+ movups xmm7, xmmword ptr [rdx+0x30]
+ movaps xmm8, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 0x93
+ shufps xmm8, xmm7, 221
+ pshufd xmm7, xmm8, 0x93
+ mov al, 7
+9:
+ paddd xmm0, xmm4
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshuflw xmm3, xmm3, 0xB1
+ pshufhw xmm3, xmm3, 0xB1
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm5
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ movdqa xmm14, xmm3
+ psrld xmm3, 8
+ pslld xmm14, 24
+ pxor xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x93
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x39
+ paddd xmm0, xmm6
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshuflw xmm3, xmm3, 0xB1
+ pshufhw xmm3, xmm3, 0xB1
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm7
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ movdqa xmm14, xmm3
+ psrld xmm3, 8
+ pslld xmm14, 24
+ pxor xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x39
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x93
+ dec al
+ jz 9f
+ movdqa xmm8, xmm4
+ shufps xmm8, xmm5, 214
+ pshufd xmm9, xmm4, 0x0F
+ pshufd xmm4, xmm8, 0x39
+ movdqa xmm8, xmm6
+ shufps xmm8, xmm7, 250
+ pand xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip]
+ pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip]
+ por xmm9, xmm8
+ movdqa xmm8, xmm7
+ punpcklqdq xmm8, xmm5
+ movdqa xmm14, xmm6
+ pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip]
+ pand xmm14, xmmword ptr [PBLENDW_0xC0_MASK+rip]
+ por xmm8, xmm14
+ pshufd xmm8, xmm8, 0x78
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 0x1E
+ movdqa xmm5, xmm9
+ movdqa xmm6, xmm8
+ jmp 9b
+9:
+ movdqu xmm4, xmmword ptr [rcx]
+ movdqu xmm5, xmmword ptr [rcx+0x10]
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ pxor xmm2, xmm4
+ pxor xmm3, xmm5
+ movups xmmword ptr [r10], xmm0
+ movups xmmword ptr [r10+0x10], xmm1
+ movups xmmword ptr [r10+0x20], xmm2
+ movups xmmword ptr [r10+0x30], xmm3
+ movdqa xmm6, xmmword ptr [rsp]
+ movdqa xmm7, xmmword ptr [rsp+0x10]
+ movdqa xmm8, xmmword ptr [rsp+0x20]
+ movdqa xmm9, xmmword ptr [rsp+0x30]
+ movdqa xmm11, xmmword ptr [rsp+0x40]
+ movdqa xmm14, xmmword ptr [rsp+0x50]
+ movdqa xmm15, xmmword ptr [rsp+0x60]
+ add rsp, 120
+ ret
+
+
+.section .rodata
+.p2align 6
+BLAKE3_IV:
+ .long 0x6A09E667, 0xBB67AE85
+ .long 0x3C6EF372, 0xA54FF53A
+ADD0:
+ .long 0, 1, 2, 3
+ADD1:
+ .long 4, 4, 4, 4
+BLAKE3_IV_0:
+ .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667
+BLAKE3_IV_1:
+ .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85
+BLAKE3_IV_2:
+ .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372
+BLAKE3_IV_3:
+ .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A
+BLAKE3_BLOCK_LEN:
+ .long 64, 64, 64, 64
+CMP_MSB_MASK:
+ .long 0x80000000, 0x80000000, 0x80000000, 0x80000000
+PBLENDW_0x33_MASK:
+ .long 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0x00000000
+PBLENDW_0xCC_MASK:
+ .long 0x00000000, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF
+PBLENDW_0x3F_MASK:
+ .long 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000
+PBLENDW_0xC0_MASK:
+ .long 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF
diff --git a/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_msvc.asm b/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_msvc.asm
new file mode 100644
index 000000000000..507502f11a80
--- /dev/null
+++ b/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_msvc.asm
@@ -0,0 +1,2350 @@
+public _blake3_hash_many_sse2
+public blake3_hash_many_sse2
+public blake3_compress_in_place_sse2
+public _blake3_compress_in_place_sse2
+public blake3_compress_xof_sse2
+public _blake3_compress_xof_sse2
+
+_TEXT SEGMENT ALIGN(16) 'CODE'
+
+ALIGN 16
+blake3_hash_many_sse2 PROC
+_blake3_hash_many_sse2 PROC
+ push r15
+ push r14
+ push r13
+ push r12
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ mov rbp, rsp
+ sub rsp, 528
+ and rsp, 0FFFFFFFFFFFFFFC0H
+ movdqa xmmword ptr [rsp+170H], xmm6
+ movdqa xmmword ptr [rsp+180H], xmm7
+ movdqa xmmword ptr [rsp+190H], xmm8
+ movdqa xmmword ptr [rsp+1A0H], xmm9
+ movdqa xmmword ptr [rsp+1B0H], xmm10
+ movdqa xmmword ptr [rsp+1C0H], xmm11
+ movdqa xmmword ptr [rsp+1D0H], xmm12
+ movdqa xmmword ptr [rsp+1E0H], xmm13
+ movdqa xmmword ptr [rsp+1F0H], xmm14
+ movdqa xmmword ptr [rsp+200H], xmm15
+ mov rdi, rcx
+ mov rsi, rdx
+ mov rdx, r8
+ mov rcx, r9
+ mov r8, qword ptr [rbp+68H]
+ movzx r9, byte ptr [rbp+70H]
+ neg r9d
+ movd xmm0, r9d
+ pshufd xmm0, xmm0, 00H
+ movdqa xmmword ptr [rsp+130H], xmm0
+ movdqa xmm1, xmm0
+ pand xmm1, xmmword ptr [ADD0]
+ pand xmm0, xmmword ptr [ADD1]
+ movdqa xmmword ptr [rsp+150H], xmm0
+ movd xmm0, r8d
+ pshufd xmm0, xmm0, 00H
+ paddd xmm0, xmm1
+ movdqa xmmword ptr [rsp+110H], xmm0
+ pxor xmm0, xmmword ptr [CMP_MSB_MASK]
+ pxor xmm1, xmmword ptr [CMP_MSB_MASK]
+ pcmpgtd xmm1, xmm0
+ shr r8, 32
+ movd xmm2, r8d
+ pshufd xmm2, xmm2, 00H
+ psubd xmm2, xmm1
+ movdqa xmmword ptr [rsp+120H], xmm2
+ mov rbx, qword ptr [rbp+90H]
+ mov r15, rdx
+ shl r15, 6
+ movzx r13d, byte ptr [rbp+78H]
+ movzx r12d, byte ptr [rbp+88H]
+ cmp rsi, 4
+ jc final3blocks
+outerloop4:
+ movdqu xmm3, xmmword ptr [rcx]
+ pshufd xmm0, xmm3, 00H
+ pshufd xmm1, xmm3, 55H
+ pshufd xmm2, xmm3, 0AAH
+ pshufd xmm3, xmm3, 0FFH
+ movdqu xmm7, xmmword ptr [rcx+10H]
+ pshufd xmm4, xmm7, 00H
+ pshufd xmm5, xmm7, 55H
+ pshufd xmm6, xmm7, 0AAH
+ pshufd xmm7, xmm7, 0FFH
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+8H]
+ mov r10, qword ptr [rdi+10H]
+ mov r11, qword ptr [rdi+18H]
+ movzx eax, byte ptr [rbp+80H]
+ or eax, r13d
+ xor edx, edx
+innerloop4:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ movdqu xmm8, xmmword ptr [r8+rdx-40H]
+ movdqu xmm9, xmmword ptr [r9+rdx-40H]
+ movdqu xmm10, xmmword ptr [r10+rdx-40H]
+ movdqu xmm11, xmmword ptr [r11+rdx-40H]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp], xmm8
+ movdqa xmmword ptr [rsp+10H], xmm9
+ movdqa xmmword ptr [rsp+20H], xmm12
+ movdqa xmmword ptr [rsp+30H], xmm13
+ movdqu xmm8, xmmword ptr [r8+rdx-30H]
+ movdqu xmm9, xmmword ptr [r9+rdx-30H]
+ movdqu xmm10, xmmword ptr [r10+rdx-30H]
+ movdqu xmm11, xmmword ptr [r11+rdx-30H]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp+40H], xmm8
+ movdqa xmmword ptr [rsp+50H], xmm9
+ movdqa xmmword ptr [rsp+60H], xmm12
+ movdqa xmmword ptr [rsp+70H], xmm13
+ movdqu xmm8, xmmword ptr [r8+rdx-20H]
+ movdqu xmm9, xmmword ptr [r9+rdx-20H]
+ movdqu xmm10, xmmword ptr [r10+rdx-20H]
+ movdqu xmm11, xmmword ptr [r11+rdx-20H]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp+80H], xmm8
+ movdqa xmmword ptr [rsp+90H], xmm9
+ movdqa xmmword ptr [rsp+0A0H], xmm12
+ movdqa xmmword ptr [rsp+0B0H], xmm13
+ movdqu xmm8, xmmword ptr [r8+rdx-10H]
+ movdqu xmm9, xmmword ptr [r9+rdx-10H]
+ movdqu xmm10, xmmword ptr [r10+rdx-10H]
+ movdqu xmm11, xmmword ptr [r11+rdx-10H]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp+0C0H], xmm8
+ movdqa xmmword ptr [rsp+0D0H], xmm9
+ movdqa xmmword ptr [rsp+0E0H], xmm12
+ movdqa xmmword ptr [rsp+0F0H], xmm13
+ movdqa xmm9, xmmword ptr [BLAKE3_IV_1]
+ movdqa xmm10, xmmword ptr [BLAKE3_IV_2]
+ movdqa xmm11, xmmword ptr [BLAKE3_IV_3]
+ movdqa xmm12, xmmword ptr [rsp+110H]
+ movdqa xmm13, xmmword ptr [rsp+120H]
+ movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN]
+ movd xmm15, eax
+ pshufd xmm15, xmm15, 00H
+ prefetcht0 byte ptr [r8+rdx+80H]
+ prefetcht0 byte ptr [r9+rdx+80H]
+ prefetcht0 byte ptr [r10+rdx+80H]
+ prefetcht0 byte ptr [r11+rdx+80H]
+ paddd xmm0, xmmword ptr [rsp]
+ paddd xmm1, xmmword ptr [rsp+20H]
+ paddd xmm2, xmmword ptr [rsp+40H]
+ paddd xmm3, xmmword ptr [rsp+60H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ pshuflw xmm12, xmm12, 0B1H
+ pshufhw xmm12, xmm12, 0B1H
+ pshuflw xmm13, xmm13, 0B1H
+ pshufhw xmm13, xmm13, 0B1H
+ pshuflw xmm14, xmm14, 0B1H
+ pshufhw xmm14, xmm14, 0B1H
+ pshuflw xmm15, xmm15, 0B1H
+ pshufhw xmm15, xmm15, 0B1H
+ movdqa xmm8, xmmword ptr [BLAKE3_IV_0]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+10H]
+ paddd xmm1, xmmword ptr [rsp+30H]
+ paddd xmm2, xmmword ptr [rsp+50H]
+ paddd xmm3, xmmword ptr [rsp+70H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+80H]
+ paddd xmm1, xmmword ptr [rsp+0A0H]
+ paddd xmm2, xmmword ptr [rsp+0C0H]
+ paddd xmm3, xmmword ptr [rsp+0E0H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ pshuflw xmm15, xmm15, 0B1H
+ pshufhw xmm15, xmm15, 0B1H
+ pshuflw xmm12, xmm12, 0B1H
+ pshufhw xmm12, xmm12, 0B1H
+ pshuflw xmm13, xmm13, 0B1H
+ pshufhw xmm13, xmm13, 0B1H
+ pshuflw xmm14, xmm14, 0B1H
+ pshufhw xmm14, xmm14, 0B1H
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+90H]
+ paddd xmm1, xmmword ptr [rsp+0B0H]
+ paddd xmm2, xmmword ptr [rsp+0D0H]
+ paddd xmm3, xmmword ptr [rsp+0F0H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+20H]
+ paddd xmm1, xmmword ptr [rsp+30H]
+ paddd xmm2, xmmword ptr [rsp+70H]
+ paddd xmm3, xmmword ptr [rsp+40H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ pshuflw xmm12, xmm12, 0B1H
+ pshufhw xmm12, xmm12, 0B1H
+ pshuflw xmm13, xmm13, 0B1H
+ pshufhw xmm13, xmm13, 0B1H
+ pshuflw xmm14, xmm14, 0B1H
+ pshufhw xmm14, xmm14, 0B1H
+ pshuflw xmm15, xmm15, 0B1H
+ pshufhw xmm15, xmm15, 0B1H
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+60H]
+ paddd xmm1, xmmword ptr [rsp+0A0H]
+ paddd xmm2, xmmword ptr [rsp]
+ paddd xmm3, xmmword ptr [rsp+0D0H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+10H]
+ paddd xmm1, xmmword ptr [rsp+0C0H]
+ paddd xmm2, xmmword ptr [rsp+90H]
+ paddd xmm3, xmmword ptr [rsp+0F0H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ pshuflw xmm15, xmm15, 0B1H
+ pshufhw xmm15, xmm15, 0B1H
+ pshuflw xmm12, xmm12, 0B1H
+ pshufhw xmm12, xmm12, 0B1H
+ pshuflw xmm13, xmm13, 0B1H
+ pshufhw xmm13, xmm13, 0B1H
+ pshuflw xmm14, xmm14, 0B1H
+ pshufhw xmm14, xmm14, 0B1H
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0B0H]
+ paddd xmm1, xmmword ptr [rsp+50H]
+ paddd xmm2, xmmword ptr [rsp+0E0H]
+ paddd xmm3, xmmword ptr [rsp+80H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+30H]
+ paddd xmm1, xmmword ptr [rsp+0A0H]
+ paddd xmm2, xmmword ptr [rsp+0D0H]
+ paddd xmm3, xmmword ptr [rsp+70H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ pshuflw xmm12, xmm12, 0B1H
+ pshufhw xmm12, xmm12, 0B1H
+ pshuflw xmm13, xmm13, 0B1H
+ pshufhw xmm13, xmm13, 0B1H
+ pshuflw xmm14, xmm14, 0B1H
+ pshufhw xmm14, xmm14, 0B1H
+ pshuflw xmm15, xmm15, 0B1H
+ pshufhw xmm15, xmm15, 0B1H
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+40H]
+ paddd xmm1, xmmword ptr [rsp+0C0H]
+ paddd xmm2, xmmword ptr [rsp+20H]
+ paddd xmm3, xmmword ptr [rsp+0E0H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+60H]
+ paddd xmm1, xmmword ptr [rsp+90H]
+ paddd xmm2, xmmword ptr [rsp+0B0H]
+ paddd xmm3, xmmword ptr [rsp+80H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ pshuflw xmm15, xmm15, 0B1H
+ pshufhw xmm15, xmm15, 0B1H
+ pshuflw xmm12, xmm12, 0B1H
+ pshufhw xmm12, xmm12, 0B1H
+ pshuflw xmm13, xmm13, 0B1H
+ pshufhw xmm13, xmm13, 0B1H
+ pshuflw xmm14, xmm14, 0B1H
+ pshufhw xmm14, xmm14, 0B1H
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+50H]
+ paddd xmm1, xmmword ptr [rsp]
+ paddd xmm2, xmmword ptr [rsp+0F0H]
+ paddd xmm3, xmmword ptr [rsp+10H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0A0H]
+ paddd xmm1, xmmword ptr [rsp+0C0H]
+ paddd xmm2, xmmword ptr [rsp+0E0H]
+ paddd xmm3, xmmword ptr [rsp+0D0H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ pshuflw xmm12, xmm12, 0B1H
+ pshufhw xmm12, xmm12, 0B1H
+ pshuflw xmm13, xmm13, 0B1H
+ pshufhw xmm13, xmm13, 0B1H
+ pshuflw xmm14, xmm14, 0B1H
+ pshufhw xmm14, xmm14, 0B1H
+ pshuflw xmm15, xmm15, 0B1H
+ pshufhw xmm15, xmm15, 0B1H
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+70H]
+ paddd xmm1, xmmword ptr [rsp+90H]
+ paddd xmm2, xmmword ptr [rsp+30H]
+ paddd xmm3, xmmword ptr [rsp+0F0H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+40H]
+ paddd xmm1, xmmword ptr [rsp+0B0H]
+ paddd xmm2, xmmword ptr [rsp+50H]
+ paddd xmm3, xmmword ptr [rsp+10H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ pshuflw xmm15, xmm15, 0B1H
+ pshufhw xmm15, xmm15, 0B1H
+ pshuflw xmm12, xmm12, 0B1H
+ pshufhw xmm12, xmm12, 0B1H
+ pshuflw xmm13, xmm13, 0B1H
+ pshufhw xmm13, xmm13, 0B1H
+ pshuflw xmm14, xmm14, 0B1H
+ pshufhw xmm14, xmm14, 0B1H
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp]
+ paddd xmm1, xmmword ptr [rsp+20H]
+ paddd xmm2, xmmword ptr [rsp+80H]
+ paddd xmm3, xmmword ptr [rsp+60H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0C0H]
+ paddd xmm1, xmmword ptr [rsp+90H]
+ paddd xmm2, xmmword ptr [rsp+0F0H]
+ paddd xmm3, xmmword ptr [rsp+0E0H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ pshuflw xmm12, xmm12, 0B1H
+ pshufhw xmm12, xmm12, 0B1H
+ pshuflw xmm13, xmm13, 0B1H
+ pshufhw xmm13, xmm13, 0B1H
+ pshuflw xmm14, xmm14, 0B1H
+ pshufhw xmm14, xmm14, 0B1H
+ pshuflw xmm15, xmm15, 0B1H
+ pshufhw xmm15, xmm15, 0B1H
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0D0H]
+ paddd xmm1, xmmword ptr [rsp+0B0H]
+ paddd xmm2, xmmword ptr [rsp+0A0H]
+ paddd xmm3, xmmword ptr [rsp+80H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+70H]
+ paddd xmm1, xmmword ptr [rsp+50H]
+ paddd xmm2, xmmword ptr [rsp]
+ paddd xmm3, xmmword ptr [rsp+60H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ pshuflw xmm15, xmm15, 0B1H
+ pshufhw xmm15, xmm15, 0B1H
+ pshuflw xmm12, xmm12, 0B1H
+ pshufhw xmm12, xmm12, 0B1H
+ pshuflw xmm13, xmm13, 0B1H
+ pshufhw xmm13, xmm13, 0B1H
+ pshuflw xmm14, xmm14, 0B1H
+ pshufhw xmm14, xmm14, 0B1H
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+20H]
+ paddd xmm1, xmmword ptr [rsp+30H]
+ paddd xmm2, xmmword ptr [rsp+10H]
+ paddd xmm3, xmmword ptr [rsp+40H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+90H]
+ paddd xmm1, xmmword ptr [rsp+0B0H]
+ paddd xmm2, xmmword ptr [rsp+80H]
+ paddd xmm3, xmmword ptr [rsp+0F0H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ pshuflw xmm12, xmm12, 0B1H
+ pshufhw xmm12, xmm12, 0B1H
+ pshuflw xmm13, xmm13, 0B1H
+ pshufhw xmm13, xmm13, 0B1H
+ pshuflw xmm14, xmm14, 0B1H
+ pshufhw xmm14, xmm14, 0B1H
+ pshuflw xmm15, xmm15, 0B1H
+ pshufhw xmm15, xmm15, 0B1H
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0E0H]
+ paddd xmm1, xmmword ptr [rsp+50H]
+ paddd xmm2, xmmword ptr [rsp+0C0H]
+ paddd xmm3, xmmword ptr [rsp+10H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0D0H]
+ paddd xmm1, xmmword ptr [rsp]
+ paddd xmm2, xmmword ptr [rsp+20H]
+ paddd xmm3, xmmword ptr [rsp+40H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ pshuflw xmm15, xmm15, 0B1H
+ pshufhw xmm15, xmm15, 0B1H
+ pshuflw xmm12, xmm12, 0B1H
+ pshufhw xmm12, xmm12, 0B1H
+ pshuflw xmm13, xmm13, 0B1H
+ pshufhw xmm13, xmm13, 0B1H
+ pshuflw xmm14, xmm14, 0B1H
+ pshufhw xmm14, xmm14, 0B1H
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+30H]
+ paddd xmm1, xmmword ptr [rsp+0A0H]
+ paddd xmm2, xmmword ptr [rsp+60H]
+ paddd xmm3, xmmword ptr [rsp+70H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0B0H]
+ paddd xmm1, xmmword ptr [rsp+50H]
+ paddd xmm2, xmmword ptr [rsp+10H]
+ paddd xmm3, xmmword ptr [rsp+80H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ pshuflw xmm12, xmm12, 0B1H
+ pshufhw xmm12, xmm12, 0B1H
+ pshuflw xmm13, xmm13, 0B1H
+ pshufhw xmm13, xmm13, 0B1H
+ pshuflw xmm14, xmm14, 0B1H
+ pshufhw xmm14, xmm14, 0B1H
+ pshuflw xmm15, xmm15, 0B1H
+ pshufhw xmm15, xmm15, 0B1H
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0F0H]
+ paddd xmm1, xmmword ptr [rsp]
+ paddd xmm2, xmmword ptr [rsp+90H]
+ paddd xmm3, xmmword ptr [rsp+60H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0E0H]
+ paddd xmm1, xmmword ptr [rsp+20H]
+ paddd xmm2, xmmword ptr [rsp+30H]
+ paddd xmm3, xmmword ptr [rsp+70H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ pshuflw xmm15, xmm15, 0B1H
+ pshufhw xmm15, xmm15, 0B1H
+ pshuflw xmm12, xmm12, 0B1H
+ pshufhw xmm12, xmm12, 0B1H
+ pshuflw xmm13, xmm13, 0B1H
+ pshufhw xmm13, xmm13, 0B1H
+ pshuflw xmm14, xmm14, 0B1H
+ pshufhw xmm14, xmm14, 0B1H
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0A0H]
+ paddd xmm1, xmmword ptr [rsp+0C0H]
+ paddd xmm2, xmmword ptr [rsp+40H]
+ paddd xmm3, xmmword ptr [rsp+0D0H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmm15
+ psrld xmm15, 8
+ pslld xmm8, 24
+ pxor xmm15, xmm8
+ movdqa xmm8, xmm12
+ psrld xmm12, 8
+ pslld xmm8, 24
+ pxor xmm12, xmm8
+ movdqa xmm8, xmm13
+ psrld xmm13, 8
+ pslld xmm8, 24
+ pxor xmm13, xmm8
+ movdqa xmm8, xmm14
+ psrld xmm14, 8
+ pslld xmm8, 24
+ pxor xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ pxor xmm0, xmm8
+ pxor xmm1, xmm9
+ pxor xmm2, xmm10
+ pxor xmm3, xmm11
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ pxor xmm4, xmm12
+ pxor xmm5, xmm13
+ pxor xmm6, xmm14
+ pxor xmm7, xmm15
+ mov eax, r13d
+ jne innerloop4
+ movdqa xmm9, xmm0
+ punpckldq xmm0, xmm1
+ punpckhdq xmm9, xmm1
+ movdqa xmm11, xmm2
+ punpckldq xmm2, xmm3
+ punpckhdq xmm11, xmm3
+ movdqa xmm1, xmm0
+ punpcklqdq xmm0, xmm2
+ punpckhqdq xmm1, xmm2
+ movdqa xmm3, xmm9
+ punpcklqdq xmm9, xmm11
+ punpckhqdq xmm3, xmm11
+ movdqu xmmword ptr [rbx], xmm0
+ movdqu xmmword ptr [rbx+20H], xmm1
+ movdqu xmmword ptr [rbx+40H], xmm9
+ movdqu xmmword ptr [rbx+60H], xmm3
+ movdqa xmm9, xmm4
+ punpckldq xmm4, xmm5
+ punpckhdq xmm9, xmm5
+ movdqa xmm11, xmm6
+ punpckldq xmm6, xmm7
+ punpckhdq xmm11, xmm7
+ movdqa xmm5, xmm4
+ punpcklqdq xmm4, xmm6
+ punpckhqdq xmm5, xmm6
+ movdqa xmm7, xmm9
+ punpcklqdq xmm9, xmm11
+ punpckhqdq xmm7, xmm11
+ movdqu xmmword ptr [rbx+10H], xmm4
+ movdqu xmmword ptr [rbx+30H], xmm5
+ movdqu xmmword ptr [rbx+50H], xmm9
+ movdqu xmmword ptr [rbx+70H], xmm7
+ movdqa xmm1, xmmword ptr [rsp+110H]
+ movdqa xmm0, xmm1
+ paddd xmm1, xmmword ptr [rsp+150H]
+ movdqa xmmword ptr [rsp+110H], xmm1
+ pxor xmm0, xmmword ptr [CMP_MSB_MASK]
+ pxor xmm1, xmmword ptr [CMP_MSB_MASK]
+ pcmpgtd xmm0, xmm1
+ movdqa xmm1, xmmword ptr [rsp+120H]
+ psubd xmm1, xmm0
+ movdqa xmmword ptr [rsp+120H], xmm1
+ add rbx, 128
+ add rdi, 32
+ sub rsi, 4
+ cmp rsi, 4
+ jnc outerloop4
+ test rsi, rsi
+ jne final3blocks
+unwind:
+ movdqa xmm6, xmmword ptr [rsp+170H]
+ movdqa xmm7, xmmword ptr [rsp+180H]
+ movdqa xmm8, xmmword ptr [rsp+190H]
+ movdqa xmm9, xmmword ptr [rsp+1A0H]
+ movdqa xmm10, xmmword ptr [rsp+1B0H]
+ movdqa xmm11, xmmword ptr [rsp+1C0H]
+ movdqa xmm12, xmmword ptr [rsp+1D0H]
+ movdqa xmm13, xmmword ptr [rsp+1E0H]
+ movdqa xmm14, xmmword ptr [rsp+1F0H]
+ movdqa xmm15, xmmword ptr [rsp+200H]
+ mov rsp, rbp
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ pop r12
+ pop r13
+ pop r14
+ pop r15
+ ret
+ALIGN 16
+final3blocks:
+ test esi, 2H
+ je final1block
+ movups xmm0, xmmword ptr [rcx]
+ movups xmm1, xmmword ptr [rcx+10H]
+ movaps xmm8, xmm0
+ movaps xmm9, xmm1
+ movd xmm13, dword ptr [rsp+110H]
+ movd xmm14, dword ptr [rsp+120H]
+ punpckldq xmm13, xmm14
+ movaps xmmword ptr [rsp], xmm13
+ movd xmm14, dword ptr [rsp+114H]
+ movd xmm13, dword ptr [rsp+124H]
+ punpckldq xmm14, xmm13
+ movaps xmmword ptr [rsp+10H], xmm14
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+8H]
+ movzx eax, byte ptr [rbp+80H]
+ or eax, r13d
+ xor edx, edx
+innerloop2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ movaps xmm2, xmmword ptr [BLAKE3_IV]
+ movaps xmm10, xmm2
+ movups xmm4, xmmword ptr [r8+rdx-40H]
+ movups xmm5, xmmword ptr [r8+rdx-30H]
+ movaps xmm3, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm3, xmm5, 221
+ movaps xmm5, xmm3
+ movups xmm6, xmmword ptr [r8+rdx-20H]
+ movups xmm7, xmmword ptr [r8+rdx-10H]
+ movaps xmm3, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 93H
+ shufps xmm3, xmm7, 221
+ pshufd xmm7, xmm3, 93H
+ movups xmm12, xmmword ptr [r9+rdx-40H]
+ movups xmm13, xmmword ptr [r9+rdx-30H]
+ movaps xmm11, xmm12
+ shufps xmm12, xmm13, 136
+ shufps xmm11, xmm13, 221
+ movaps xmm13, xmm11
+ movups xmm14, xmmword ptr [r9+rdx-20H]
+ movups xmm15, xmmword ptr [r9+rdx-10H]
+ movaps xmm11, xmm14
+ shufps xmm14, xmm15, 136
+ pshufd xmm14, xmm14, 93H
+ shufps xmm11, xmm15, 221
+ pshufd xmm15, xmm11, 93H
+ shl rax, 20H
+ or rax, 40H
+ movd xmm3, rax
+ movdqa xmmword ptr [rsp+20H], xmm3
+ movaps xmm3, xmmword ptr [rsp]
+ movaps xmm11, xmmword ptr [rsp+10H]
+ punpcklqdq xmm3, xmmword ptr [rsp+20H]
+ punpcklqdq xmm11, xmmword ptr [rsp+20H]
+ mov al, 7
+roundloop2:
+ paddd xmm0, xmm4
+ paddd xmm8, xmm12
+ movaps xmmword ptr [rsp+20H], xmm4
+ movaps xmmword ptr [rsp+30H], xmm12
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ pshuflw xmm3, xmm3, 0B1H
+ pshufhw xmm3, xmm3, 0B1H
+ pshuflw xmm11, xmm11, 0B1H
+ pshufhw xmm11, xmm11, 0B1H
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 20
+ psrld xmm4, 12
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 20
+ psrld xmm4, 12
+ por xmm9, xmm4
+ paddd xmm0, xmm5
+ paddd xmm8, xmm13
+ movaps xmmword ptr [rsp+40H], xmm5
+ movaps xmmword ptr [rsp+50H], xmm13
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ movdqa xmm13, xmm3
+ psrld xmm3, 8
+ pslld xmm13, 24
+ pxor xmm3, xmm13
+ movdqa xmm13, xmm11
+ psrld xmm11, 8
+ pslld xmm13, 24
+ pxor xmm11, xmm13
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 25
+ psrld xmm4, 7
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 25
+ psrld xmm4, 7
+ por xmm9, xmm4
+ pshufd xmm0, xmm0, 93H
+ pshufd xmm8, xmm8, 93H
+ pshufd xmm3, xmm3, 4EH
+ pshufd xmm11, xmm11, 4EH
+ pshufd xmm2, xmm2, 39H
+ pshufd xmm10, xmm10, 39H
+ paddd xmm0, xmm6
+ paddd xmm8, xmm14
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ pshuflw xmm3, xmm3, 0B1H
+ pshufhw xmm3, xmm3, 0B1H
+ pshuflw xmm11, xmm11, 0B1H
+ pshufhw xmm11, xmm11, 0B1H
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 20
+ psrld xmm4, 12
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 20
+ psrld xmm4, 12
+ por xmm9, xmm4
+ paddd xmm0, xmm7
+ paddd xmm8, xmm15
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ movdqa xmm13, xmm3
+ psrld xmm3, 8
+ pslld xmm13, 24
+ pxor xmm3, xmm13
+ movdqa xmm13, xmm11
+ psrld xmm11, 8
+ pslld xmm13, 24
+ pxor xmm11, xmm13
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 25
+ psrld xmm4, 7
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 25
+ psrld xmm4, 7
+ por xmm9, xmm4
+ pshufd xmm0, xmm0, 39H
+ pshufd xmm8, xmm8, 39H
+ pshufd xmm3, xmm3, 4EH
+ pshufd xmm11, xmm11, 4EH
+ pshufd xmm2, xmm2, 93H
+ pshufd xmm10, xmm10, 93H
+ dec al
+ je endroundloop2
+ movdqa xmm12, xmmword ptr [rsp+20H]
+ movdqa xmm5, xmmword ptr [rsp+40H]
+ pshufd xmm13, xmm12, 0FH
+ shufps xmm12, xmm5, 214
+ pshufd xmm4, xmm12, 39H
+ movdqa xmm12, xmm6
+ shufps xmm12, xmm7, 250
+ pand xmm13, xmmword ptr [PBLENDW_0x33_MASK]
+ pand xmm12, xmmword ptr [PBLENDW_0xCC_MASK]
+ por xmm13, xmm12
+ movdqa xmmword ptr [rsp+20H], xmm13
+ movdqa xmm12, xmm7
+ punpcklqdq xmm12, xmm5
+ movdqa xmm13, xmm6
+ pand xmm12, xmmword ptr [PBLENDW_0x3F_MASK]
+ pand xmm13, xmmword ptr [PBLENDW_0xC0_MASK]
+ por xmm12, xmm13
+ pshufd xmm12, xmm12, 78H
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 1EH
+ movdqa xmmword ptr [rsp+40H], xmm12
+ movdqa xmm5, xmmword ptr [rsp+30H]
+ movdqa xmm13, xmmword ptr [rsp+50H]
+ pshufd xmm6, xmm5, 0FH
+ shufps xmm5, xmm13, 214
+ pshufd xmm12, xmm5, 39H
+ movdqa xmm5, xmm14
+ shufps xmm5, xmm15, 250
+ pand xmm6, xmmword ptr [PBLENDW_0x33_MASK]
+ pand xmm5, xmmword ptr [PBLENDW_0xCC_MASK]
+ por xmm6, xmm5
+ movdqa xmm5, xmm15
+ punpcklqdq xmm5, xmm13
+ movdqa xmmword ptr [rsp+30H], xmm2
+ movdqa xmm2, xmm14
+ pand xmm5, xmmword ptr [PBLENDW_0x3F_MASK]
+ pand xmm2, xmmword ptr [PBLENDW_0xC0_MASK]
+ por xmm5, xmm2
+ movdqa xmm2, xmmword ptr [rsp+30H]
+ pshufd xmm5, xmm5, 78H
+ punpckhdq xmm13, xmm15
+ punpckldq xmm14, xmm13
+ pshufd xmm15, xmm14, 1EH
+ movdqa xmm13, xmm6
+ movdqa xmm14, xmm5
+ movdqa xmm5, xmmword ptr [rsp+20H]
+ movdqa xmm6, xmmword ptr [rsp+40H]
+ jmp roundloop2
+endroundloop2:
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ pxor xmm8, xmm10
+ pxor xmm9, xmm11
+ mov eax, r13d
+ cmp rdx, r15
+ jne innerloop2
+ movups xmmword ptr [rbx], xmm0
+ movups xmmword ptr [rbx+10H], xmm1
+ movups xmmword ptr [rbx+20H], xmm8
+ movups xmmword ptr [rbx+30H], xmm9
+ mov eax, dword ptr [rsp+130H]
+ neg eax
+ mov r10d, dword ptr [rsp+110H+8*rax]
+ mov r11d, dword ptr [rsp+120H+8*rax]
+ mov dword ptr [rsp+110H], r10d
+ mov dword ptr [rsp+120H], r11d
+ add rdi, 16
+ add rbx, 64
+ sub rsi, 2
+final1block:
+ test esi, 1H
+ je unwind
+ movups xmm0, xmmword ptr [rcx]
+ movups xmm1, xmmword ptr [rcx+10H]
+ movd xmm13, dword ptr [rsp+110H]
+ movd xmm14, dword ptr [rsp+120H]
+ punpckldq xmm13, xmm14
+ mov r8, qword ptr [rdi]
+ movzx eax, byte ptr [rbp+80H]
+ or eax, r13d
+ xor edx, edx
+innerloop1:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ movaps xmm2, xmmword ptr [BLAKE3_IV]
+ shl rax, 32
+ or rax, 64
+ movd xmm12, rax
+ movdqa xmm3, xmm13
+ punpcklqdq xmm3, xmm12
+ movups xmm4, xmmword ptr [r8+rdx-40H]
+ movups xmm5, xmmword ptr [r8+rdx-30H]
+ movaps xmm8, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm8, xmm5, 221
+ movaps xmm5, xmm8
+ movups xmm6, xmmword ptr [r8+rdx-20H]
+ movups xmm7, xmmword ptr [r8+rdx-10H]
+ movaps xmm8, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 93H
+ shufps xmm8, xmm7, 221
+ pshufd xmm7, xmm8, 93H
+ mov al, 7
+roundloop1:
+ paddd xmm0, xmm4
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshuflw xmm3, xmm3, 0B1H
+ pshufhw xmm3, xmm3, 0B1H
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm5
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ movdqa xmm14, xmm3
+ psrld xmm3, 8
+ pslld xmm14, 24
+ pxor xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 93H
+ pshufd xmm3, xmm3, 4EH
+ pshufd xmm2, xmm2, 39H
+ paddd xmm0, xmm6
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshuflw xmm3, xmm3, 0B1H
+ pshufhw xmm3, xmm3, 0B1H
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm7
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ movdqa xmm14, xmm3
+ psrld xmm3, 8
+ pslld xmm14, 24
+ pxor xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 39H
+ pshufd xmm3, xmm3, 4EH
+ pshufd xmm2, xmm2, 93H
+ dec al
+ jz endroundloop1
+ movdqa xmm8, xmm4
+ shufps xmm8, xmm5, 214
+ pshufd xmm9, xmm4, 0FH
+ pshufd xmm4, xmm8, 39H
+ movdqa xmm8, xmm6
+ shufps xmm8, xmm7, 250
+ pand xmm9, xmmword ptr [PBLENDW_0x33_MASK]
+ pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK]
+ por xmm9, xmm8
+ movdqa xmm8, xmm7
+ punpcklqdq xmm8, xmm5
+ movdqa xmm10, xmm6
+ pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK]
+ pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK]
+ por xmm8, xmm10
+ pshufd xmm8, xmm8, 78H
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 1EH
+ movdqa xmm5, xmm9
+ movdqa xmm6, xmm8
+ jmp roundloop1
+endroundloop1:
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne innerloop1
+ movups xmmword ptr [rbx], xmm0
+ movups xmmword ptr [rbx+10H], xmm1
+ jmp unwind
+_blake3_hash_many_sse2 ENDP
+blake3_hash_many_sse2 ENDP
+
+blake3_compress_in_place_sse2 PROC
+_blake3_compress_in_place_sse2 PROC
+ sub rsp, 120
+ movdqa xmmword ptr [rsp], xmm6
+ movdqa xmmword ptr [rsp+10H], xmm7
+ movdqa xmmword ptr [rsp+20H], xmm8
+ movdqa xmmword ptr [rsp+30H], xmm9
+ movdqa xmmword ptr [rsp+40H], xmm11
+ movdqa xmmword ptr [rsp+50H], xmm14
+ movdqa xmmword ptr [rsp+60H], xmm15
+ movups xmm0, xmmword ptr [rcx]
+ movups xmm1, xmmword ptr [rcx+10H]
+ movaps xmm2, xmmword ptr [BLAKE3_IV]
+ movzx eax, byte ptr [rsp+0A0H]
+ movzx r8d, r8b
+ shl rax, 32
+ add r8, rax
+ movd xmm3, r9
+ movd xmm4, r8
+ punpcklqdq xmm3, xmm4
+ movups xmm4, xmmword ptr [rdx]
+ movups xmm5, xmmword ptr [rdx+10H]
+ movaps xmm8, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm8, xmm5, 221
+ movaps xmm5, xmm8
+ movups xmm6, xmmword ptr [rdx+20H]
+ movups xmm7, xmmword ptr [rdx+30H]
+ movaps xmm8, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 93H
+ shufps xmm8, xmm7, 221
+ pshufd xmm7, xmm8, 93H
+ mov al, 7
+@@:
+ paddd xmm0, xmm4
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshuflw xmm3, xmm3, 0B1H
+ pshufhw xmm3, xmm3, 0B1H
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm5
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ movdqa xmm14, xmm3
+ psrld xmm3, 8
+ pslld xmm14, 24
+ pxor xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 93H
+ pshufd xmm3, xmm3, 4EH
+ pshufd xmm2, xmm2, 39H
+ paddd xmm0, xmm6
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshuflw xmm3, xmm3, 0B1H
+ pshufhw xmm3, xmm3, 0B1H
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm7
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ movdqa xmm14, xmm3
+ psrld xmm3, 8
+ pslld xmm14, 24
+ pxor xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 39H
+ pshufd xmm3, xmm3, 4EH
+ pshufd xmm2, xmm2, 93H
+ dec al
+ jz @F
+ movdqa xmm8, xmm4
+ shufps xmm8, xmm5, 214
+ pshufd xmm9, xmm4, 0FH
+ pshufd xmm4, xmm8, 39H
+ movdqa xmm8, xmm6
+ shufps xmm8, xmm7, 250
+ pand xmm9, xmmword ptr [PBLENDW_0x33_MASK]
+ pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK]
+ por xmm9, xmm8
+ movdqa xmm8, xmm7
+ punpcklqdq xmm8, xmm5
+ movdqa xmm14, xmm6
+ pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK]
+ pand xmm14, xmmword ptr [PBLENDW_0xC0_MASK]
+ por xmm8, xmm14
+ pshufd xmm8, xmm8, 78H
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 1EH
+ movdqa xmm5, xmm9
+ movdqa xmm6, xmm8
+ jmp @B
+@@:
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ movups xmmword ptr [rcx], xmm0
+ movups xmmword ptr [rcx+10H], xmm1
+ movdqa xmm6, xmmword ptr [rsp]
+ movdqa xmm7, xmmword ptr [rsp+10H]
+ movdqa xmm8, xmmword ptr [rsp+20H]
+ movdqa xmm9, xmmword ptr [rsp+30H]
+ movdqa xmm11, xmmword ptr [rsp+40H]
+ movdqa xmm14, xmmword ptr [rsp+50H]
+ movdqa xmm15, xmmword ptr [rsp+60H]
+ add rsp, 120
+ ret
+_blake3_compress_in_place_sse2 ENDP
+blake3_compress_in_place_sse2 ENDP
+
+ALIGN 16
+blake3_compress_xof_sse2 PROC
+_blake3_compress_xof_sse2 PROC
+ sub rsp, 120
+ movdqa xmmword ptr [rsp], xmm6
+ movdqa xmmword ptr [rsp+10H], xmm7
+ movdqa xmmword ptr [rsp+20H], xmm8
+ movdqa xmmword ptr [rsp+30H], xmm9
+ movdqa xmmword ptr [rsp+40H], xmm11
+ movdqa xmmword ptr [rsp+50H], xmm14
+ movdqa xmmword ptr [rsp+60H], xmm15
+ movups xmm0, xmmword ptr [rcx]
+ movups xmm1, xmmword ptr [rcx+10H]
+ movaps xmm2, xmmword ptr [BLAKE3_IV]
+ movzx eax, byte ptr [rsp+0A0H]
+ movzx r8d, r8b
+ mov r10, qword ptr [rsp+0A8H]
+ shl rax, 32
+ add r8, rax
+ movd xmm3, r9
+ movd xmm4, r8
+ punpcklqdq xmm3, xmm4
+ movups xmm4, xmmword ptr [rdx]
+ movups xmm5, xmmword ptr [rdx+10H]
+ movaps xmm8, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm8, xmm5, 221
+ movaps xmm5, xmm8
+ movups xmm6, xmmword ptr [rdx+20H]
+ movups xmm7, xmmword ptr [rdx+30H]
+ movaps xmm8, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 93H
+ shufps xmm8, xmm7, 221
+ pshufd xmm7, xmm8, 93H
+ mov al, 7
+@@:
+ paddd xmm0, xmm4
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshuflw xmm3, xmm3, 0B1H
+ pshufhw xmm3, xmm3, 0B1H
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm5
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ movdqa xmm14, xmm3
+ psrld xmm3, 8
+ pslld xmm14, 24
+ pxor xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 93H
+ pshufd xmm3, xmm3, 4EH
+ pshufd xmm2, xmm2, 39H
+ paddd xmm0, xmm6
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshuflw xmm3, xmm3, 0B1H
+ pshufhw xmm3, xmm3, 0B1H
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm7
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ movdqa xmm14, xmm3
+ psrld xmm3, 8
+ pslld xmm14, 24
+ pxor xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 39H
+ pshufd xmm3, xmm3, 4EH
+ pshufd xmm2, xmm2, 93H
+ dec al
+ jz @F
+ movdqa xmm8, xmm4
+ shufps xmm8, xmm5, 214
+ pshufd xmm9, xmm4, 0FH
+ pshufd xmm4, xmm8, 39H
+ movdqa xmm8, xmm6
+ shufps xmm8, xmm7, 250
+ pand xmm9, xmmword ptr [PBLENDW_0x33_MASK]
+ pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK]
+ por xmm9, xmm8
+ movdqa xmm8, xmm7
+ punpcklqdq xmm8, xmm5
+ movdqa xmm14, xmm6
+ pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK]
+ pand xmm14, xmmword ptr [PBLENDW_0xC0_MASK]
+ por xmm8, xmm14
+ pshufd xmm8, xmm8, 78H
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 1EH
+ movdqa xmm5, xmm9
+ movdqa xmm6, xmm8
+ jmp @B
+@@:
+ movdqu xmm4, xmmword ptr [rcx]
+ movdqu xmm5, xmmword ptr [rcx+10H]
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ pxor xmm2, xmm4
+ pxor xmm3, xmm5
+ movups xmmword ptr [r10], xmm0
+ movups xmmword ptr [r10+10H], xmm1
+ movups xmmword ptr [r10+20H], xmm2
+ movups xmmword ptr [r10+30H], xmm3
+ movdqa xmm6, xmmword ptr [rsp]
+ movdqa xmm7, xmmword ptr [rsp+10H]
+ movdqa xmm8, xmmword ptr [rsp+20H]
+ movdqa xmm9, xmmword ptr [rsp+30H]
+ movdqa xmm11, xmmword ptr [rsp+40H]
+ movdqa xmm14, xmmword ptr [rsp+50H]
+ movdqa xmm15, xmmword ptr [rsp+60H]
+ add rsp, 120
+ ret
+_blake3_compress_xof_sse2 ENDP
+blake3_compress_xof_sse2 ENDP
+
+_TEXT ENDS
+
+
+_RDATA SEGMENT READONLY PAGE ALIAS(".rdata") 'CONST'
+ALIGN 64
+BLAKE3_IV:
+ dd 6A09E667H, 0BB67AE85H, 3C6EF372H, 0A54FF53AH
+
+ADD0:
+ dd 0, 1, 2, 3
+
+ADD1:
+ dd 4 dup (4)
+
+BLAKE3_IV_0:
+ dd 4 dup (6A09E667H)
+
+BLAKE3_IV_1:
+ dd 4 dup (0BB67AE85H)
+
+BLAKE3_IV_2:
+ dd 4 dup (3C6EF372H)
+
+BLAKE3_IV_3:
+ dd 4 dup (0A54FF53AH)
+
+BLAKE3_BLOCK_LEN:
+ dd 4 dup (64)
+
+CMP_MSB_MASK:
+ dd 8 dup(80000000H)
+
+PBLENDW_0x33_MASK:
+ dd 0FFFFFFFFH, 000000000H, 0FFFFFFFFH, 000000000H
+PBLENDW_0xCC_MASK:
+ dd 000000000H, 0FFFFFFFFH, 000000000H, 0FFFFFFFFH
+PBLENDW_0x3F_MASK:
+ dd 0FFFFFFFFH, 0FFFFFFFFH, 0FFFFFFFFH, 000000000H
+PBLENDW_0xC0_MASK:
+ dd 000000000H, 000000000H, 000000000H, 0FFFFFFFFH
+
+_RDATA ENDS
+END
diff --git a/llvm/lib/Support/BLAKE3/blake3_sse41.c b/llvm/lib/Support/BLAKE3/blake3_sse41.c
new file mode 100644
index 000000000000..87a8dae15ce9
--- /dev/null
+++ b/llvm/lib/Support/BLAKE3/blake3_sse41.c
@@ -0,0 +1,560 @@
+#include "blake3_impl.h"
+
+#include <immintrin.h>
+
+#define DEGREE 4
+
+#define _mm_shuffle_ps2(a, b, c) \
+ (_mm_castps_si128( \
+ _mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), (c))))
+
+INLINE __m128i loadu(const uint8_t src[16]) {
+ return _mm_loadu_si128((const __m128i *)src);
+}
+
+INLINE void storeu(__m128i src, uint8_t dest[16]) {
+ _mm_storeu_si128((__m128i *)dest, src);
+}
+
+INLINE __m128i addv(__m128i a, __m128i b) { return _mm_add_epi32(a, b); }
+
+// Note that clang-format doesn't like the name "xor" for some reason.
+INLINE __m128i xorv(__m128i a, __m128i b) { return _mm_xor_si128(a, b); }
+
+INLINE __m128i set1(uint32_t x) { return _mm_set1_epi32((int32_t)x); }
+
+INLINE __m128i set4(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
+ return _mm_setr_epi32((int32_t)a, (int32_t)b, (int32_t)c, (int32_t)d);
+}
+
+INLINE __m128i rot16(__m128i x) {
+ return _mm_shuffle_epi8(
+ x, _mm_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2));
+}
+
+INLINE __m128i rot12(__m128i x) {
+ return xorv(_mm_srli_epi32(x, 12), _mm_slli_epi32(x, 32 - 12));
+}
+
+INLINE __m128i rot8(__m128i x) {
+ return _mm_shuffle_epi8(
+ x, _mm_set_epi8(12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1));
+}
+
+INLINE __m128i rot7(__m128i x) {
+ return xorv(_mm_srli_epi32(x, 7), _mm_slli_epi32(x, 32 - 7));
+}
+
+INLINE void g1(__m128i *row0, __m128i *row1, __m128i *row2, __m128i *row3,
+ __m128i m) {
+ *row0 = addv(addv(*row0, m), *row1);
+ *row3 = xorv(*row3, *row0);
+ *row3 = rot16(*row3);
+ *row2 = addv(*row2, *row3);
+ *row1 = xorv(*row1, *row2);
+ *row1 = rot12(*row1);
+}
+
+INLINE void g2(__m128i *row0, __m128i *row1, __m128i *row2, __m128i *row3,
+ __m128i m) {
+ *row0 = addv(addv(*row0, m), *row1);
+ *row3 = xorv(*row3, *row0);
+ *row3 = rot8(*row3);
+ *row2 = addv(*row2, *row3);
+ *row1 = xorv(*row1, *row2);
+ *row1 = rot7(*row1);
+}
+
+// Note the optimization here of leaving row1 as the unrotated row, rather than
+// row0. All the message loads below are adjusted to compensate for this. See
+// discussion at https://github.com/sneves/blake2-avx2/pull/4
+INLINE void diagonalize(__m128i *row0, __m128i *row2, __m128i *row3) {
+ *row0 = _mm_shuffle_epi32(*row0, _MM_SHUFFLE(2, 1, 0, 3));
+ *row3 = _mm_shuffle_epi32(*row3, _MM_SHUFFLE(1, 0, 3, 2));
+ *row2 = _mm_shuffle_epi32(*row2, _MM_SHUFFLE(0, 3, 2, 1));
+}
+
+INLINE void undiagonalize(__m128i *row0, __m128i *row2, __m128i *row3) {
+ *row0 = _mm_shuffle_epi32(*row0, _MM_SHUFFLE(0, 3, 2, 1));
+ *row3 = _mm_shuffle_epi32(*row3, _MM_SHUFFLE(1, 0, 3, 2));
+ *row2 = _mm_shuffle_epi32(*row2, _MM_SHUFFLE(2, 1, 0, 3));
+}
+
+INLINE void compress_pre(__m128i rows[4], const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN],
+ uint8_t block_len, uint64_t counter, uint8_t flags) {
+ rows[0] = loadu((uint8_t *)&cv[0]);
+ rows[1] = loadu((uint8_t *)&cv[4]);
+ rows[2] = set4(IV[0], IV[1], IV[2], IV[3]);
+ rows[3] = set4(counter_low(counter), counter_high(counter),
+ (uint32_t)block_len, (uint32_t)flags);
+
+ __m128i m0 = loadu(&block[sizeof(__m128i) * 0]);
+ __m128i m1 = loadu(&block[sizeof(__m128i) * 1]);
+ __m128i m2 = loadu(&block[sizeof(__m128i) * 2]);
+ __m128i m3 = loadu(&block[sizeof(__m128i) * 3]);
+
+ __m128i t0, t1, t2, t3, tt;
+
+ // Round 1. The first round permutes the message words from the original
+ // input order, into the groups that get mixed in parallel.
+ t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(2, 0, 2, 0)); // 6 4 2 0
+ g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+ t1 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 3, 1)); // 7 5 3 1
+ g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+ diagonalize(&rows[0], &rows[2], &rows[3]);
+ t2 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(2, 0, 2, 0)); // 14 12 10 8
+ t2 = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2, 1, 0, 3)); // 12 10 8 14
+ g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+ t3 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 1, 3, 1)); // 15 13 11 9
+ t3 = _mm_shuffle_epi32(t3, _MM_SHUFFLE(2, 1, 0, 3)); // 13 11 9 15
+ g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+ undiagonalize(&rows[0], &rows[2], &rows[3]);
+ m0 = t0;
+ m1 = t1;
+ m2 = t2;
+ m3 = t3;
+
+ // Round 2. This round and all following rounds apply a fixed permutation
+ // to the message words from the round before.
+ t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
+ t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
+ g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+ t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
+ tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
+ t1 = _mm_blend_epi16(tt, t1, 0xCC);
+ g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+ diagonalize(&rows[0], &rows[2], &rows[3]);
+ t2 = _mm_unpacklo_epi64(m3, m1);
+ tt = _mm_blend_epi16(t2, m2, 0xC0);
+ t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
+ g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+ t3 = _mm_unpackhi_epi32(m1, m3);
+ tt = _mm_unpacklo_epi32(m2, t3);
+ t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
+ g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+ undiagonalize(&rows[0], &rows[2], &rows[3]);
+ m0 = t0;
+ m1 = t1;
+ m2 = t2;
+ m3 = t3;
+
+ // Round 3
+ t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
+ t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
+ g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+ t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
+ tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
+ t1 = _mm_blend_epi16(tt, t1, 0xCC);
+ g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+ diagonalize(&rows[0], &rows[2], &rows[3]);
+ t2 = _mm_unpacklo_epi64(m3, m1);
+ tt = _mm_blend_epi16(t2, m2, 0xC0);
+ t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
+ g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+ t3 = _mm_unpackhi_epi32(m1, m3);
+ tt = _mm_unpacklo_epi32(m2, t3);
+ t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
+ g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+ undiagonalize(&rows[0], &rows[2], &rows[3]);
+ m0 = t0;
+ m1 = t1;
+ m2 = t2;
+ m3 = t3;
+
+ // Round 4
+ t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
+ t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
+ g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+ t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
+ tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
+ t1 = _mm_blend_epi16(tt, t1, 0xCC);
+ g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+ diagonalize(&rows[0], &rows[2], &rows[3]);
+ t2 = _mm_unpacklo_epi64(m3, m1);
+ tt = _mm_blend_epi16(t2, m2, 0xC0);
+ t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
+ g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+ t3 = _mm_unpackhi_epi32(m1, m3);
+ tt = _mm_unpacklo_epi32(m2, t3);
+ t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
+ g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+ undiagonalize(&rows[0], &rows[2], &rows[3]);
+ m0 = t0;
+ m1 = t1;
+ m2 = t2;
+ m3 = t3;
+
+ // Round 5
+ t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
+ t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
+ g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+ t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
+ tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
+ t1 = _mm_blend_epi16(tt, t1, 0xCC);
+ g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+ diagonalize(&rows[0], &rows[2], &rows[3]);
+ t2 = _mm_unpacklo_epi64(m3, m1);
+ tt = _mm_blend_epi16(t2, m2, 0xC0);
+ t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
+ g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+ t3 = _mm_unpackhi_epi32(m1, m3);
+ tt = _mm_unpacklo_epi32(m2, t3);
+ t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
+ g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+ undiagonalize(&rows[0], &rows[2], &rows[3]);
+ m0 = t0;
+ m1 = t1;
+ m2 = t2;
+ m3 = t3;
+
+ // Round 6
+ t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
+ t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
+ g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+ t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
+ tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
+ t1 = _mm_blend_epi16(tt, t1, 0xCC);
+ g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+ diagonalize(&rows[0], &rows[2], &rows[3]);
+ t2 = _mm_unpacklo_epi64(m3, m1);
+ tt = _mm_blend_epi16(t2, m2, 0xC0);
+ t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
+ g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+ t3 = _mm_unpackhi_epi32(m1, m3);
+ tt = _mm_unpacklo_epi32(m2, t3);
+ t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
+ g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+ undiagonalize(&rows[0], &rows[2], &rows[3]);
+ m0 = t0;
+ m1 = t1;
+ m2 = t2;
+ m3 = t3;
+
+ // Round 7
+ t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
+ t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
+ g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+ t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
+ tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
+ t1 = _mm_blend_epi16(tt, t1, 0xCC);
+ g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+ diagonalize(&rows[0], &rows[2], &rows[3]);
+ t2 = _mm_unpacklo_epi64(m3, m1);
+ tt = _mm_blend_epi16(t2, m2, 0xC0);
+ t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
+ g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+ t3 = _mm_unpackhi_epi32(m1, m3);
+ tt = _mm_unpacklo_epi32(m2, t3);
+ t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
+ g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+ undiagonalize(&rows[0], &rows[2], &rows[3]);
+}
+
+void blake3_compress_in_place_sse41(uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN],
+ uint8_t block_len, uint64_t counter,
+ uint8_t flags) {
+ __m128i rows[4];
+ compress_pre(rows, cv, block, block_len, counter, flags);
+ storeu(xorv(rows[0], rows[2]), (uint8_t *)&cv[0]);
+ storeu(xorv(rows[1], rows[3]), (uint8_t *)&cv[4]);
+}
+
+void blake3_compress_xof_sse41(const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN],
+ uint8_t block_len, uint64_t counter,
+ uint8_t flags, uint8_t out[64]) {
+ __m128i rows[4];
+ compress_pre(rows, cv, block, block_len, counter, flags);
+ storeu(xorv(rows[0], rows[2]), &out[0]);
+ storeu(xorv(rows[1], rows[3]), &out[16]);
+ storeu(xorv(rows[2], loadu((uint8_t *)&cv[0])), &out[32]);
+ storeu(xorv(rows[3], loadu((uint8_t *)&cv[4])), &out[48]);
+}
+
+INLINE void round_fn(__m128i v[16], __m128i m[16], size_t r) {
+ v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][0]]);
+ v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][2]]);
+ v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][4]]);
+ v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][6]]);
+ v[0] = addv(v[0], v[4]);
+ v[1] = addv(v[1], v[5]);
+ v[2] = addv(v[2], v[6]);
+ v[3] = addv(v[3], v[7]);
+ v[12] = xorv(v[12], v[0]);
+ v[13] = xorv(v[13], v[1]);
+ v[14] = xorv(v[14], v[2]);
+ v[15] = xorv(v[15], v[3]);
+ v[12] = rot16(v[12]);
+ v[13] = rot16(v[13]);
+ v[14] = rot16(v[14]);
+ v[15] = rot16(v[15]);
+ v[8] = addv(v[8], v[12]);
+ v[9] = addv(v[9], v[13]);
+ v[10] = addv(v[10], v[14]);
+ v[11] = addv(v[11], v[15]);
+ v[4] = xorv(v[4], v[8]);
+ v[5] = xorv(v[5], v[9]);
+ v[6] = xorv(v[6], v[10]);
+ v[7] = xorv(v[7], v[11]);
+ v[4] = rot12(v[4]);
+ v[5] = rot12(v[5]);
+ v[6] = rot12(v[6]);
+ v[7] = rot12(v[7]);
+ v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][1]]);
+ v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][3]]);
+ v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][5]]);
+ v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][7]]);
+ v[0] = addv(v[0], v[4]);
+ v[1] = addv(v[1], v[5]);
+ v[2] = addv(v[2], v[6]);
+ v[3] = addv(v[3], v[7]);
+ v[12] = xorv(v[12], v[0]);
+ v[13] = xorv(v[13], v[1]);
+ v[14] = xorv(v[14], v[2]);
+ v[15] = xorv(v[15], v[3]);
+ v[12] = rot8(v[12]);
+ v[13] = rot8(v[13]);
+ v[14] = rot8(v[14]);
+ v[15] = rot8(v[15]);
+ v[8] = addv(v[8], v[12]);
+ v[9] = addv(v[9], v[13]);
+ v[10] = addv(v[10], v[14]);
+ v[11] = addv(v[11], v[15]);
+ v[4] = xorv(v[4], v[8]);
+ v[5] = xorv(v[5], v[9]);
+ v[6] = xorv(v[6], v[10]);
+ v[7] = xorv(v[7], v[11]);
+ v[4] = rot7(v[4]);
+ v[5] = rot7(v[5]);
+ v[6] = rot7(v[6]);
+ v[7] = rot7(v[7]);
+
+ v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][8]]);
+ v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][10]]);
+ v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][12]]);
+ v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][14]]);
+ v[0] = addv(v[0], v[5]);
+ v[1] = addv(v[1], v[6]);
+ v[2] = addv(v[2], v[7]);
+ v[3] = addv(v[3], v[4]);
+ v[15] = xorv(v[15], v[0]);
+ v[12] = xorv(v[12], v[1]);
+ v[13] = xorv(v[13], v[2]);
+ v[14] = xorv(v[14], v[3]);
+ v[15] = rot16(v[15]);
+ v[12] = rot16(v[12]);
+ v[13] = rot16(v[13]);
+ v[14] = rot16(v[14]);
+ v[10] = addv(v[10], v[15]);
+ v[11] = addv(v[11], v[12]);
+ v[8] = addv(v[8], v[13]);
+ v[9] = addv(v[9], v[14]);
+ v[5] = xorv(v[5], v[10]);
+ v[6] = xorv(v[6], v[11]);
+ v[7] = xorv(v[7], v[8]);
+ v[4] = xorv(v[4], v[9]);
+ v[5] = rot12(v[5]);
+ v[6] = rot12(v[6]);
+ v[7] = rot12(v[7]);
+ v[4] = rot12(v[4]);
+ v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][9]]);
+ v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][11]]);
+ v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][13]]);
+ v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][15]]);
+ v[0] = addv(v[0], v[5]);
+ v[1] = addv(v[1], v[6]);
+ v[2] = addv(v[2], v[7]);
+ v[3] = addv(v[3], v[4]);
+ v[15] = xorv(v[15], v[0]);
+ v[12] = xorv(v[12], v[1]);
+ v[13] = xorv(v[13], v[2]);
+ v[14] = xorv(v[14], v[3]);
+ v[15] = rot8(v[15]);
+ v[12] = rot8(v[12]);
+ v[13] = rot8(v[13]);
+ v[14] = rot8(v[14]);
+ v[10] = addv(v[10], v[15]);
+ v[11] = addv(v[11], v[12]);
+ v[8] = addv(v[8], v[13]);
+ v[9] = addv(v[9], v[14]);
+ v[5] = xorv(v[5], v[10]);
+ v[6] = xorv(v[6], v[11]);
+ v[7] = xorv(v[7], v[8]);
+ v[4] = xorv(v[4], v[9]);
+ v[5] = rot7(v[5]);
+ v[6] = rot7(v[6]);
+ v[7] = rot7(v[7]);
+ v[4] = rot7(v[4]);
+}
+
+INLINE void transpose_vecs(__m128i vecs[DEGREE]) {
+ // Interleave 32-bit lates. The low unpack is lanes 00/11 and the high is
+ // 22/33. Note that this doesn't split the vector into two lanes, as the
+ // AVX2 counterparts do.
+ __m128i ab_01 = _mm_unpacklo_epi32(vecs[0], vecs[1]);
+ __m128i ab_23 = _mm_unpackhi_epi32(vecs[0], vecs[1]);
+ __m128i cd_01 = _mm_unpacklo_epi32(vecs[2], vecs[3]);
+ __m128i cd_23 = _mm_unpackhi_epi32(vecs[2], vecs[3]);
+
+ // Interleave 64-bit lanes.
+ __m128i abcd_0 = _mm_unpacklo_epi64(ab_01, cd_01);
+ __m128i abcd_1 = _mm_unpackhi_epi64(ab_01, cd_01);
+ __m128i abcd_2 = _mm_unpacklo_epi64(ab_23, cd_23);
+ __m128i abcd_3 = _mm_unpackhi_epi64(ab_23, cd_23);
+
+ vecs[0] = abcd_0;
+ vecs[1] = abcd_1;
+ vecs[2] = abcd_2;
+ vecs[3] = abcd_3;
+}
+
+INLINE void transpose_msg_vecs(const uint8_t *const *inputs,
+ size_t block_offset, __m128i out[16]) {
+ out[0] = loadu(&inputs[0][block_offset + 0 * sizeof(__m128i)]);
+ out[1] = loadu(&inputs[1][block_offset + 0 * sizeof(__m128i)]);
+ out[2] = loadu(&inputs[2][block_offset + 0 * sizeof(__m128i)]);
+ out[3] = loadu(&inputs[3][block_offset + 0 * sizeof(__m128i)]);
+ out[4] = loadu(&inputs[0][block_offset + 1 * sizeof(__m128i)]);
+ out[5] = loadu(&inputs[1][block_offset + 1 * sizeof(__m128i)]);
+ out[6] = loadu(&inputs[2][block_offset + 1 * sizeof(__m128i)]);
+ out[7] = loadu(&inputs[3][block_offset + 1 * sizeof(__m128i)]);
+ out[8] = loadu(&inputs[0][block_offset + 2 * sizeof(__m128i)]);
+ out[9] = loadu(&inputs[1][block_offset + 2 * sizeof(__m128i)]);
+ out[10] = loadu(&inputs[2][block_offset + 2 * sizeof(__m128i)]);
+ out[11] = loadu(&inputs[3][block_offset + 2 * sizeof(__m128i)]);
+ out[12] = loadu(&inputs[0][block_offset + 3 * sizeof(__m128i)]);
+ out[13] = loadu(&inputs[1][block_offset + 3 * sizeof(__m128i)]);
+ out[14] = loadu(&inputs[2][block_offset + 3 * sizeof(__m128i)]);
+ out[15] = loadu(&inputs[3][block_offset + 3 * sizeof(__m128i)]);
+ for (size_t i = 0; i < 4; ++i) {
+ _mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0);
+ }
+ transpose_vecs(&out[0]);
+ transpose_vecs(&out[4]);
+ transpose_vecs(&out[8]);
+ transpose_vecs(&out[12]);
+}
+
+INLINE void load_counters(uint64_t counter, bool increment_counter,
+ __m128i *out_lo, __m128i *out_hi) {
+ const __m128i mask = _mm_set1_epi32(-(int32_t)increment_counter);
+ const __m128i add0 = _mm_set_epi32(3, 2, 1, 0);
+ const __m128i add1 = _mm_and_si128(mask, add0);
+ __m128i l = _mm_add_epi32(_mm_set1_epi32((int32_t)counter), add1);
+ __m128i carry = _mm_cmpgt_epi32(_mm_xor_si128(add1, _mm_set1_epi32(0x80000000)),
+ _mm_xor_si128( l, _mm_set1_epi32(0x80000000)));
+ __m128i h = _mm_sub_epi32(_mm_set1_epi32((int32_t)(counter >> 32)), carry);
+ *out_lo = l;
+ *out_hi = h;
+}
+
+static
+void blake3_hash4_sse41(const uint8_t *const *inputs, size_t blocks,
+ const uint32_t key[8], uint64_t counter,
+ bool increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
+ __m128i h_vecs[8] = {
+ set1(key[0]), set1(key[1]), set1(key[2]), set1(key[3]),
+ set1(key[4]), set1(key[5]), set1(key[6]), set1(key[7]),
+ };
+ __m128i counter_low_vec, counter_high_vec;
+ load_counters(counter, increment_counter, &counter_low_vec,
+ &counter_high_vec);
+ uint8_t block_flags = flags | flags_start;
+
+ for (size_t block = 0; block < blocks; block++) {
+ if (block + 1 == blocks) {
+ block_flags |= flags_end;
+ }
+ __m128i block_len_vec = set1(BLAKE3_BLOCK_LEN);
+ __m128i block_flags_vec = set1(block_flags);
+ __m128i msg_vecs[16];
+ transpose_msg_vecs(inputs, block * BLAKE3_BLOCK_LEN, msg_vecs);
+
+ __m128i v[16] = {
+ h_vecs[0], h_vecs[1], h_vecs[2], h_vecs[3],
+ h_vecs[4], h_vecs[5], h_vecs[6], h_vecs[7],
+ set1(IV[0]), set1(IV[1]), set1(IV[2]), set1(IV[3]),
+ counter_low_vec, counter_high_vec, block_len_vec, block_flags_vec,
+ };
+ round_fn(v, msg_vecs, 0);
+ round_fn(v, msg_vecs, 1);
+ round_fn(v, msg_vecs, 2);
+ round_fn(v, msg_vecs, 3);
+ round_fn(v, msg_vecs, 4);
+ round_fn(v, msg_vecs, 5);
+ round_fn(v, msg_vecs, 6);
+ h_vecs[0] = xorv(v[0], v[8]);
+ h_vecs[1] = xorv(v[1], v[9]);
+ h_vecs[2] = xorv(v[2], v[10]);
+ h_vecs[3] = xorv(v[3], v[11]);
+ h_vecs[4] = xorv(v[4], v[12]);
+ h_vecs[5] = xorv(v[5], v[13]);
+ h_vecs[6] = xorv(v[6], v[14]);
+ h_vecs[7] = xorv(v[7], v[15]);
+
+ block_flags = flags;
+ }
+
+ transpose_vecs(&h_vecs[0]);
+ transpose_vecs(&h_vecs[4]);
+ // The first four vecs now contain the first half of each output, and the
+ // second four vecs contain the second half of each output.
+ storeu(h_vecs[0], &out[0 * sizeof(__m128i)]);
+ storeu(h_vecs[4], &out[1 * sizeof(__m128i)]);
+ storeu(h_vecs[1], &out[2 * sizeof(__m128i)]);
+ storeu(h_vecs[5], &out[3 * sizeof(__m128i)]);
+ storeu(h_vecs[2], &out[4 * sizeof(__m128i)]);
+ storeu(h_vecs[6], &out[5 * sizeof(__m128i)]);
+ storeu(h_vecs[3], &out[6 * sizeof(__m128i)]);
+ storeu(h_vecs[7], &out[7 * sizeof(__m128i)]);
+}
+
+INLINE void hash_one_sse41(const uint8_t *input, size_t blocks,
+ const uint32_t key[8], uint64_t counter,
+ uint8_t flags, uint8_t flags_start,
+ uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN]) {
+ uint32_t cv[8];
+ memcpy(cv, key, BLAKE3_KEY_LEN);
+ uint8_t block_flags = flags | flags_start;
+ while (blocks > 0) {
+ if (blocks == 1) {
+ block_flags |= flags_end;
+ }
+ blake3_compress_in_place_sse41(cv, input, BLAKE3_BLOCK_LEN, counter,
+ block_flags);
+ input = &input[BLAKE3_BLOCK_LEN];
+ blocks -= 1;
+ block_flags = flags;
+ }
+ memcpy(out, cv, BLAKE3_OUT_LEN);
+}
+
+void blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs,
+ size_t blocks, const uint32_t key[8],
+ uint64_t counter, bool increment_counter,
+ uint8_t flags, uint8_t flags_start,
+ uint8_t flags_end, uint8_t *out) {
+ while (num_inputs >= DEGREE) {
+ blake3_hash4_sse41(inputs, blocks, key, counter, increment_counter, flags,
+ flags_start, flags_end, out);
+ if (increment_counter) {
+ counter += DEGREE;
+ }
+ inputs += DEGREE;
+ num_inputs -= DEGREE;
+ out = &out[DEGREE * BLAKE3_OUT_LEN];
+ }
+ while (num_inputs > 0) {
+ hash_one_sse41(inputs[0], blocks, key, counter, flags, flags_start,
+ flags_end, out);
+ if (increment_counter) {
+ counter += 1;
+ }
+ inputs += 1;
+ num_inputs -= 1;
+ out = &out[BLAKE3_OUT_LEN];
+ }
+}
diff --git a/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_unix.S b/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_unix.S
new file mode 100644
index 000000000000..4e918c5bb2cc
--- /dev/null
+++ b/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_unix.S
@@ -0,0 +1,2044 @@
+#if defined(__x86_64__)
+
+#if defined(__ELF__) && defined(__linux__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
+#if defined(__ELF__) && defined(__CET__) && defined(__has_include)
+#if __has_include(<cet.h>)
+#include <cet.h>
+#endif
+#endif
+
+#if !defined(_CET_ENDBR)
+#define _CET_ENDBR
+#endif
+
+#ifdef __APPLE__
+#define HIDDEN .private_extern
+#else
+#define HIDDEN .hidden
+#endif
+
+.intel_syntax noprefix
+HIDDEN blake3_hash_many_sse41
+HIDDEN _blake3_hash_many_sse41
+HIDDEN blake3_compress_in_place_sse41
+HIDDEN _blake3_compress_in_place_sse41
+HIDDEN blake3_compress_xof_sse41
+HIDDEN _blake3_compress_xof_sse41
+.global blake3_hash_many_sse41
+.global _blake3_hash_many_sse41
+.global blake3_compress_in_place_sse41
+.global _blake3_compress_in_place_sse41
+.global blake3_compress_xof_sse41
+.global _blake3_compress_xof_sse41
+#ifdef __APPLE__
+.text
+#else
+.section .text
+#endif
+ .p2align 6
+_blake3_hash_many_sse41:
+blake3_hash_many_sse41:
+ _CET_ENDBR
+ push r15
+ push r14
+ push r13
+ push r12
+ push rbx
+ push rbp
+ mov rbp, rsp
+ sub rsp, 360
+ and rsp, 0xFFFFFFFFFFFFFFC0
+ neg r9d
+ movd xmm0, r9d
+ pshufd xmm0, xmm0, 0x00
+ movdqa xmmword ptr [rsp+0x130], xmm0
+ movdqa xmm1, xmm0
+ pand xmm1, xmmword ptr [ADD0+rip]
+ pand xmm0, xmmword ptr [ADD1+rip]
+ movdqa xmmword ptr [rsp+0x150], xmm0
+ movd xmm0, r8d
+ pshufd xmm0, xmm0, 0x00
+ paddd xmm0, xmm1
+ movdqa xmmword ptr [rsp+0x110], xmm0
+ pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip]
+ pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip]
+ pcmpgtd xmm1, xmm0
+ shr r8, 32
+ movd xmm2, r8d
+ pshufd xmm2, xmm2, 0x00
+ psubd xmm2, xmm1
+ movdqa xmmword ptr [rsp+0x120], xmm2
+ mov rbx, qword ptr [rbp+0x50]
+ mov r15, rdx
+ shl r15, 6
+ movzx r13d, byte ptr [rbp+0x38]
+ movzx r12d, byte ptr [rbp+0x48]
+ cmp rsi, 4
+ jc 3f
+2:
+ movdqu xmm3, xmmword ptr [rcx]
+ pshufd xmm0, xmm3, 0x00
+ pshufd xmm1, xmm3, 0x55
+ pshufd xmm2, xmm3, 0xAA
+ pshufd xmm3, xmm3, 0xFF
+ movdqu xmm7, xmmword ptr [rcx+0x10]
+ pshufd xmm4, xmm7, 0x00
+ pshufd xmm5, xmm7, 0x55
+ pshufd xmm6, xmm7, 0xAA
+ pshufd xmm7, xmm7, 0xFF
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ mov r10, qword ptr [rdi+0x10]
+ mov r11, qword ptr [rdi+0x18]
+ movzx eax, byte ptr [rbp+0x40]
+ or eax, r13d
+ xor edx, edx
+9:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ movdqu xmm8, xmmword ptr [r8+rdx-0x40]
+ movdqu xmm9, xmmword ptr [r9+rdx-0x40]
+ movdqu xmm10, xmmword ptr [r10+rdx-0x40]
+ movdqu xmm11, xmmword ptr [r11+rdx-0x40]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp], xmm8
+ movdqa xmmword ptr [rsp+0x10], xmm9
+ movdqa xmmword ptr [rsp+0x20], xmm12
+ movdqa xmmword ptr [rsp+0x30], xmm13
+ movdqu xmm8, xmmword ptr [r8+rdx-0x30]
+ movdqu xmm9, xmmword ptr [r9+rdx-0x30]
+ movdqu xmm10, xmmword ptr [r10+rdx-0x30]
+ movdqu xmm11, xmmword ptr [r11+rdx-0x30]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp+0x40], xmm8
+ movdqa xmmword ptr [rsp+0x50], xmm9
+ movdqa xmmword ptr [rsp+0x60], xmm12
+ movdqa xmmword ptr [rsp+0x70], xmm13
+ movdqu xmm8, xmmword ptr [r8+rdx-0x20]
+ movdqu xmm9, xmmword ptr [r9+rdx-0x20]
+ movdqu xmm10, xmmword ptr [r10+rdx-0x20]
+ movdqu xmm11, xmmword ptr [r11+rdx-0x20]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp+0x80], xmm8
+ movdqa xmmword ptr [rsp+0x90], xmm9
+ movdqa xmmword ptr [rsp+0xA0], xmm12
+ movdqa xmmword ptr [rsp+0xB0], xmm13
+ movdqu xmm8, xmmword ptr [r8+rdx-0x10]
+ movdqu xmm9, xmmword ptr [r9+rdx-0x10]
+ movdqu xmm10, xmmword ptr [r10+rdx-0x10]
+ movdqu xmm11, xmmword ptr [r11+rdx-0x10]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp+0xC0], xmm8
+ movdqa xmmword ptr [rsp+0xD0], xmm9
+ movdqa xmmword ptr [rsp+0xE0], xmm12
+ movdqa xmmword ptr [rsp+0xF0], xmm13
+ movdqa xmm9, xmmword ptr [BLAKE3_IV_1+rip]
+ movdqa xmm10, xmmword ptr [BLAKE3_IV_2+rip]
+ movdqa xmm11, xmmword ptr [BLAKE3_IV_3+rip]
+ movdqa xmm12, xmmword ptr [rsp+0x110]
+ movdqa xmm13, xmmword ptr [rsp+0x120]
+ movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN+rip]
+ movd xmm15, eax
+ pshufd xmm15, xmm15, 0x00
+ prefetcht0 [r8+rdx+0x80]
+ prefetcht0 [r9+rdx+0x80]
+ prefetcht0 [r10+rdx+0x80]
+ prefetcht0 [r11+rdx+0x80]
+ paddd xmm0, xmmword ptr [rsp]
+ paddd xmm1, xmmword ptr [rsp+0x20]
+ paddd xmm2, xmmword ptr [rsp+0x40]
+ paddd xmm3, xmmword ptr [rsp+0x60]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [BLAKE3_IV_0+rip]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x10]
+ paddd xmm1, xmmword ptr [rsp+0x30]
+ paddd xmm2, xmmword ptr [rsp+0x50]
+ paddd xmm3, xmmword ptr [rsp+0x70]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x80]
+ paddd xmm1, xmmword ptr [rsp+0xA0]
+ paddd xmm2, xmmword ptr [rsp+0xC0]
+ paddd xmm3, xmmword ptr [rsp+0xE0]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x90]
+ paddd xmm1, xmmword ptr [rsp+0xB0]
+ paddd xmm2, xmmword ptr [rsp+0xD0]
+ paddd xmm3, xmmword ptr [rsp+0xF0]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x20]
+ paddd xmm1, xmmword ptr [rsp+0x30]
+ paddd xmm2, xmmword ptr [rsp+0x70]
+ paddd xmm3, xmmword ptr [rsp+0x40]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x60]
+ paddd xmm1, xmmword ptr [rsp+0xA0]
+ paddd xmm2, xmmword ptr [rsp]
+ paddd xmm3, xmmword ptr [rsp+0xD0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x10]
+ paddd xmm1, xmmword ptr [rsp+0xC0]
+ paddd xmm2, xmmword ptr [rsp+0x90]
+ paddd xmm3, xmmword ptr [rsp+0xF0]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xB0]
+ paddd xmm1, xmmword ptr [rsp+0x50]
+ paddd xmm2, xmmword ptr [rsp+0xE0]
+ paddd xmm3, xmmword ptr [rsp+0x80]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x30]
+ paddd xmm1, xmmword ptr [rsp+0xA0]
+ paddd xmm2, xmmword ptr [rsp+0xD0]
+ paddd xmm3, xmmword ptr [rsp+0x70]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x40]
+ paddd xmm1, xmmword ptr [rsp+0xC0]
+ paddd xmm2, xmmword ptr [rsp+0x20]
+ paddd xmm3, xmmword ptr [rsp+0xE0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x60]
+ paddd xmm1, xmmword ptr [rsp+0x90]
+ paddd xmm2, xmmword ptr [rsp+0xB0]
+ paddd xmm3, xmmword ptr [rsp+0x80]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x50]
+ paddd xmm1, xmmword ptr [rsp]
+ paddd xmm2, xmmword ptr [rsp+0xF0]
+ paddd xmm3, xmmword ptr [rsp+0x10]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xA0]
+ paddd xmm1, xmmword ptr [rsp+0xC0]
+ paddd xmm2, xmmword ptr [rsp+0xE0]
+ paddd xmm3, xmmword ptr [rsp+0xD0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x70]
+ paddd xmm1, xmmword ptr [rsp+0x90]
+ paddd xmm2, xmmword ptr [rsp+0x30]
+ paddd xmm3, xmmword ptr [rsp+0xF0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x40]
+ paddd xmm1, xmmword ptr [rsp+0xB0]
+ paddd xmm2, xmmword ptr [rsp+0x50]
+ paddd xmm3, xmmword ptr [rsp+0x10]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp]
+ paddd xmm1, xmmword ptr [rsp+0x20]
+ paddd xmm2, xmmword ptr [rsp+0x80]
+ paddd xmm3, xmmword ptr [rsp+0x60]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xC0]
+ paddd xmm1, xmmword ptr [rsp+0x90]
+ paddd xmm2, xmmword ptr [rsp+0xF0]
+ paddd xmm3, xmmword ptr [rsp+0xE0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xD0]
+ paddd xmm1, xmmword ptr [rsp+0xB0]
+ paddd xmm2, xmmword ptr [rsp+0xA0]
+ paddd xmm3, xmmword ptr [rsp+0x80]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x70]
+ paddd xmm1, xmmword ptr [rsp+0x50]
+ paddd xmm2, xmmword ptr [rsp]
+ paddd xmm3, xmmword ptr [rsp+0x60]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x20]
+ paddd xmm1, xmmword ptr [rsp+0x30]
+ paddd xmm2, xmmword ptr [rsp+0x10]
+ paddd xmm3, xmmword ptr [rsp+0x40]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x90]
+ paddd xmm1, xmmword ptr [rsp+0xB0]
+ paddd xmm2, xmmword ptr [rsp+0x80]
+ paddd xmm3, xmmword ptr [rsp+0xF0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xE0]
+ paddd xmm1, xmmword ptr [rsp+0x50]
+ paddd xmm2, xmmword ptr [rsp+0xC0]
+ paddd xmm3, xmmword ptr [rsp+0x10]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xD0]
+ paddd xmm1, xmmword ptr [rsp]
+ paddd xmm2, xmmword ptr [rsp+0x20]
+ paddd xmm3, xmmword ptr [rsp+0x40]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x30]
+ paddd xmm1, xmmword ptr [rsp+0xA0]
+ paddd xmm2, xmmword ptr [rsp+0x60]
+ paddd xmm3, xmmword ptr [rsp+0x70]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xB0]
+ paddd xmm1, xmmword ptr [rsp+0x50]
+ paddd xmm2, xmmword ptr [rsp+0x10]
+ paddd xmm3, xmmword ptr [rsp+0x80]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xF0]
+ paddd xmm1, xmmword ptr [rsp]
+ paddd xmm2, xmmword ptr [rsp+0x90]
+ paddd xmm3, xmmword ptr [rsp+0x60]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xE0]
+ paddd xmm1, xmmword ptr [rsp+0x20]
+ paddd xmm2, xmmword ptr [rsp+0x30]
+ paddd xmm3, xmmword ptr [rsp+0x70]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xA0]
+ paddd xmm1, xmmword ptr [rsp+0xC0]
+ paddd xmm2, xmmword ptr [rsp+0x40]
+ paddd xmm3, xmmword ptr [rsp+0xD0]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ pxor xmm0, xmm8
+ pxor xmm1, xmm9
+ pxor xmm2, xmm10
+ pxor xmm3, xmm11
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ pxor xmm4, xmm12
+ pxor xmm5, xmm13
+ pxor xmm6, xmm14
+ pxor xmm7, xmm15
+ mov eax, r13d
+ jne 9b
+ movdqa xmm9, xmm0
+ punpckldq xmm0, xmm1
+ punpckhdq xmm9, xmm1
+ movdqa xmm11, xmm2
+ punpckldq xmm2, xmm3
+ punpckhdq xmm11, xmm3
+ movdqa xmm1, xmm0
+ punpcklqdq xmm0, xmm2
+ punpckhqdq xmm1, xmm2
+ movdqa xmm3, xmm9
+ punpcklqdq xmm9, xmm11
+ punpckhqdq xmm3, xmm11
+ movdqu xmmword ptr [rbx], xmm0
+ movdqu xmmword ptr [rbx+0x20], xmm1
+ movdqu xmmword ptr [rbx+0x40], xmm9
+ movdqu xmmword ptr [rbx+0x60], xmm3
+ movdqa xmm9, xmm4
+ punpckldq xmm4, xmm5
+ punpckhdq xmm9, xmm5
+ movdqa xmm11, xmm6
+ punpckldq xmm6, xmm7
+ punpckhdq xmm11, xmm7
+ movdqa xmm5, xmm4
+ punpcklqdq xmm4, xmm6
+ punpckhqdq xmm5, xmm6
+ movdqa xmm7, xmm9
+ punpcklqdq xmm9, xmm11
+ punpckhqdq xmm7, xmm11
+ movdqu xmmword ptr [rbx+0x10], xmm4
+ movdqu xmmword ptr [rbx+0x30], xmm5
+ movdqu xmmword ptr [rbx+0x50], xmm9
+ movdqu xmmword ptr [rbx+0x70], xmm7
+ movdqa xmm1, xmmword ptr [rsp+0x110]
+ movdqa xmm0, xmm1
+ paddd xmm1, xmmword ptr [rsp+0x150]
+ movdqa xmmword ptr [rsp+0x110], xmm1
+ pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip]
+ pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip]
+ pcmpgtd xmm0, xmm1
+ movdqa xmm1, xmmword ptr [rsp+0x120]
+ psubd xmm1, xmm0
+ movdqa xmmword ptr [rsp+0x120], xmm1
+ add rbx, 128
+ add rdi, 32
+ sub rsi, 4
+ cmp rsi, 4
+ jnc 2b
+ test rsi, rsi
+ jnz 3f
+4:
+ mov rsp, rbp
+ pop rbp
+ pop rbx
+ pop r12
+ pop r13
+ pop r14
+ pop r15
+ ret
+.p2align 5
+3:
+ test esi, 0x2
+ je 3f
+ movups xmm0, xmmword ptr [rcx]
+ movups xmm1, xmmword ptr [rcx+0x10]
+ movaps xmm8, xmm0
+ movaps xmm9, xmm1
+ movd xmm13, dword ptr [rsp+0x110]
+ pinsrd xmm13, dword ptr [rsp+0x120], 1
+ pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
+ movaps xmmword ptr [rsp], xmm13
+ movd xmm14, dword ptr [rsp+0x114]
+ pinsrd xmm14, dword ptr [rsp+0x124], 1
+ pinsrd xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
+ movaps xmmword ptr [rsp+0x10], xmm14
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ movzx eax, byte ptr [rbp+0x40]
+ or eax, r13d
+ xor edx, edx
+2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
+ movaps xmm10, xmm2
+ movups xmm4, xmmword ptr [r8+rdx-0x40]
+ movups xmm5, xmmword ptr [r8+rdx-0x30]
+ movaps xmm3, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm3, xmm5, 221
+ movaps xmm5, xmm3
+ movups xmm6, xmmword ptr [r8+rdx-0x20]
+ movups xmm7, xmmword ptr [r8+rdx-0x10]
+ movaps xmm3, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 0x93
+ shufps xmm3, xmm7, 221
+ pshufd xmm7, xmm3, 0x93
+ movups xmm12, xmmword ptr [r9+rdx-0x40]
+ movups xmm13, xmmword ptr [r9+rdx-0x30]
+ movaps xmm11, xmm12
+ shufps xmm12, xmm13, 136
+ shufps xmm11, xmm13, 221
+ movaps xmm13, xmm11
+ movups xmm14, xmmword ptr [r9+rdx-0x20]
+ movups xmm15, xmmword ptr [r9+rdx-0x10]
+ movaps xmm11, xmm14
+ shufps xmm14, xmm15, 136
+ pshufd xmm14, xmm14, 0x93
+ shufps xmm11, xmm15, 221
+ pshufd xmm15, xmm11, 0x93
+ movaps xmm3, xmmword ptr [rsp]
+ movaps xmm11, xmmword ptr [rsp+0x10]
+ pinsrd xmm3, eax, 3
+ pinsrd xmm11, eax, 3
+ mov al, 7
+9:
+ paddd xmm0, xmm4
+ paddd xmm8, xmm12
+ movaps xmmword ptr [rsp+0x20], xmm4
+ movaps xmmword ptr [rsp+0x30], xmm12
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ movaps xmm12, xmmword ptr [ROT16+rip]
+ pshufb xmm3, xmm12
+ pshufb xmm11, xmm12
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 20
+ psrld xmm4, 12
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 20
+ psrld xmm4, 12
+ por xmm9, xmm4
+ paddd xmm0, xmm5
+ paddd xmm8, xmm13
+ movaps xmmword ptr [rsp+0x40], xmm5
+ movaps xmmword ptr [rsp+0x50], xmm13
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ movaps xmm13, xmmword ptr [ROT8+rip]
+ pshufb xmm3, xmm13
+ pshufb xmm11, xmm13
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 25
+ psrld xmm4, 7
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 25
+ psrld xmm4, 7
+ por xmm9, xmm4
+ pshufd xmm0, xmm0, 0x93
+ pshufd xmm8, xmm8, 0x93
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm11, xmm11, 0x4E
+ pshufd xmm2, xmm2, 0x39
+ pshufd xmm10, xmm10, 0x39
+ paddd xmm0, xmm6
+ paddd xmm8, xmm14
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ pshufb xmm3, xmm12
+ pshufb xmm11, xmm12
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 20
+ psrld xmm4, 12
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 20
+ psrld xmm4, 12
+ por xmm9, xmm4
+ paddd xmm0, xmm7
+ paddd xmm8, xmm15
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ pshufb xmm3, xmm13
+ pshufb xmm11, xmm13
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 25
+ psrld xmm4, 7
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 25
+ psrld xmm4, 7
+ por xmm9, xmm4
+ pshufd xmm0, xmm0, 0x39
+ pshufd xmm8, xmm8, 0x39
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm11, xmm11, 0x4E
+ pshufd xmm2, xmm2, 0x93
+ pshufd xmm10, xmm10, 0x93
+ dec al
+ je 9f
+ movdqa xmm12, xmmword ptr [rsp+0x20]
+ movdqa xmm5, xmmword ptr [rsp+0x40]
+ pshufd xmm13, xmm12, 0x0F
+ shufps xmm12, xmm5, 214
+ pshufd xmm4, xmm12, 0x39
+ movdqa xmm12, xmm6
+ shufps xmm12, xmm7, 250
+ pblendw xmm13, xmm12, 0xCC
+ movdqa xmm12, xmm7
+ punpcklqdq xmm12, xmm5
+ pblendw xmm12, xmm6, 0xC0
+ pshufd xmm12, xmm12, 0x78
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 0x1E
+ movdqa xmmword ptr [rsp+0x20], xmm13
+ movdqa xmmword ptr [rsp+0x40], xmm12
+ movdqa xmm5, xmmword ptr [rsp+0x30]
+ movdqa xmm13, xmmword ptr [rsp+0x50]
+ pshufd xmm6, xmm5, 0x0F
+ shufps xmm5, xmm13, 214
+ pshufd xmm12, xmm5, 0x39
+ movdqa xmm5, xmm14
+ shufps xmm5, xmm15, 250
+ pblendw xmm6, xmm5, 0xCC
+ movdqa xmm5, xmm15
+ punpcklqdq xmm5, xmm13
+ pblendw xmm5, xmm14, 0xC0
+ pshufd xmm5, xmm5, 0x78
+ punpckhdq xmm13, xmm15
+ punpckldq xmm14, xmm13
+ pshufd xmm15, xmm14, 0x1E
+ movdqa xmm13, xmm6
+ movdqa xmm14, xmm5
+ movdqa xmm5, xmmword ptr [rsp+0x20]
+ movdqa xmm6, xmmword ptr [rsp+0x40]
+ jmp 9b
+9:
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ pxor xmm8, xmm10
+ pxor xmm9, xmm11
+ mov eax, r13d
+ cmp rdx, r15
+ jne 2b
+ movups xmmword ptr [rbx], xmm0
+ movups xmmword ptr [rbx+0x10], xmm1
+ movups xmmword ptr [rbx+0x20], xmm8
+ movups xmmword ptr [rbx+0x30], xmm9
+ movdqa xmm0, xmmword ptr [rsp+0x130]
+ movdqa xmm1, xmmword ptr [rsp+0x110]
+ movdqa xmm2, xmmword ptr [rsp+0x120]
+ movdqu xmm3, xmmword ptr [rsp+0x118]
+ movdqu xmm4, xmmword ptr [rsp+0x128]
+ blendvps xmm1, xmm3, xmm0
+ blendvps xmm2, xmm4, xmm0
+ movdqa xmmword ptr [rsp+0x110], xmm1
+ movdqa xmmword ptr [rsp+0x120], xmm2
+ add rdi, 16
+ add rbx, 64
+ sub rsi, 2
+3:
+ test esi, 0x1
+ je 4b
+ movups xmm0, xmmword ptr [rcx]
+ movups xmm1, xmmword ptr [rcx+0x10]
+ movd xmm13, dword ptr [rsp+0x110]
+ pinsrd xmm13, dword ptr [rsp+0x120], 1
+ pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
+ movaps xmm14, xmmword ptr [ROT8+rip]
+ movaps xmm15, xmmword ptr [ROT16+rip]
+ mov r8, qword ptr [rdi]
+ movzx eax, byte ptr [rbp+0x40]
+ or eax, r13d
+ xor edx, edx
+2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
+ movaps xmm3, xmm13
+ pinsrd xmm3, eax, 3
+ movups xmm4, xmmword ptr [r8+rdx-0x40]
+ movups xmm5, xmmword ptr [r8+rdx-0x30]
+ movaps xmm8, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm8, xmm5, 221
+ movaps xmm5, xmm8
+ movups xmm6, xmmword ptr [r8+rdx-0x20]
+ movups xmm7, xmmword ptr [r8+rdx-0x10]
+ movaps xmm8, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 0x93
+ shufps xmm8, xmm7, 221
+ pshufd xmm7, xmm8, 0x93
+ mov al, 7
+9:
+ paddd xmm0, xmm4
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm15
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm5
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x93
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x39
+ paddd xmm0, xmm6
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm15
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm7
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x39
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x93
+ dec al
+ jz 9f
+ movdqa xmm8, xmm4
+ shufps xmm8, xmm5, 214
+ pshufd xmm9, xmm4, 0x0F
+ pshufd xmm4, xmm8, 0x39
+ movdqa xmm8, xmm6
+ shufps xmm8, xmm7, 250
+ pblendw xmm9, xmm8, 0xCC
+ movdqa xmm8, xmm7
+ punpcklqdq xmm8, xmm5
+ pblendw xmm8, xmm6, 0xC0
+ pshufd xmm8, xmm8, 0x78
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 0x1E
+ movdqa xmm5, xmm9
+ movdqa xmm6, xmm8
+ jmp 9b
+9:
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne 2b
+ movups xmmword ptr [rbx], xmm0
+ movups xmmword ptr [rbx+0x10], xmm1
+ jmp 4b
+
+.p2align 6
+blake3_compress_in_place_sse41:
+_blake3_compress_in_place_sse41:
+ _CET_ENDBR
+ movups xmm0, xmmword ptr [rdi]
+ movups xmm1, xmmword ptr [rdi+0x10]
+ movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
+ shl r8, 32
+ add rdx, r8
+ movq xmm3, rcx
+ movq xmm4, rdx
+ punpcklqdq xmm3, xmm4
+ movups xmm4, xmmword ptr [rsi]
+ movups xmm5, xmmword ptr [rsi+0x10]
+ movaps xmm8, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm8, xmm5, 221
+ movaps xmm5, xmm8
+ movups xmm6, xmmword ptr [rsi+0x20]
+ movups xmm7, xmmword ptr [rsi+0x30]
+ movaps xmm8, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 0x93
+ shufps xmm8, xmm7, 221
+ pshufd xmm7, xmm8, 0x93
+ movaps xmm14, xmmword ptr [ROT8+rip]
+ movaps xmm15, xmmword ptr [ROT16+rip]
+ mov al, 7
+9:
+ paddd xmm0, xmm4
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm15
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm5
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x93
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x39
+ paddd xmm0, xmm6
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm15
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm7
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x39
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x93
+ dec al
+ jz 9f
+ movdqa xmm8, xmm4
+ shufps xmm8, xmm5, 214
+ pshufd xmm9, xmm4, 0x0F
+ pshufd xmm4, xmm8, 0x39
+ movdqa xmm8, xmm6
+ shufps xmm8, xmm7, 250
+ pblendw xmm9, xmm8, 0xCC
+ movdqa xmm8, xmm7
+ punpcklqdq xmm8, xmm5
+ pblendw xmm8, xmm6, 0xC0
+ pshufd xmm8, xmm8, 0x78
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 0x1E
+ movdqa xmm5, xmm9
+ movdqa xmm6, xmm8
+ jmp 9b
+9:
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ movups xmmword ptr [rdi], xmm0
+ movups xmmword ptr [rdi+0x10], xmm1
+ ret
+
+.p2align 6
+blake3_compress_xof_sse41:
+_blake3_compress_xof_sse41:
+ _CET_ENDBR
+ movups xmm0, xmmword ptr [rdi]
+ movups xmm1, xmmword ptr [rdi+0x10]
+ movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
+ movzx eax, r8b
+ movzx edx, dl
+ shl rax, 32
+ add rdx, rax
+ movq xmm3, rcx
+ movq xmm4, rdx
+ punpcklqdq xmm3, xmm4
+ movups xmm4, xmmword ptr [rsi]
+ movups xmm5, xmmword ptr [rsi+0x10]
+ movaps xmm8, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm8, xmm5, 221
+ movaps xmm5, xmm8
+ movups xmm6, xmmword ptr [rsi+0x20]
+ movups xmm7, xmmword ptr [rsi+0x30]
+ movaps xmm8, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 0x93
+ shufps xmm8, xmm7, 221
+ pshufd xmm7, xmm8, 0x93
+ movaps xmm14, xmmword ptr [ROT8+rip]
+ movaps xmm15, xmmword ptr [ROT16+rip]
+ mov al, 7
+9:
+ paddd xmm0, xmm4
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm15
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm5
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x93
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x39
+ paddd xmm0, xmm6
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm15
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm7
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x39
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x93
+ dec al
+ jz 9f
+ movdqa xmm8, xmm4
+ shufps xmm8, xmm5, 214
+ pshufd xmm9, xmm4, 0x0F
+ pshufd xmm4, xmm8, 0x39
+ movdqa xmm8, xmm6
+ shufps xmm8, xmm7, 250
+ pblendw xmm9, xmm8, 0xCC
+ movdqa xmm8, xmm7
+ punpcklqdq xmm8, xmm5
+ pblendw xmm8, xmm6, 0xC0
+ pshufd xmm8, xmm8, 0x78
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 0x1E
+ movdqa xmm5, xmm9
+ movdqa xmm6, xmm8
+ jmp 9b
+9:
+ movdqu xmm4, xmmword ptr [rdi]
+ movdqu xmm5, xmmword ptr [rdi+0x10]
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ pxor xmm2, xmm4
+ pxor xmm3, xmm5
+ movups xmmword ptr [r9], xmm0
+ movups xmmword ptr [r9+0x10], xmm1
+ movups xmmword ptr [r9+0x20], xmm2
+ movups xmmword ptr [r9+0x30], xmm3
+ ret
+
+
+#ifdef __APPLE__
+.static_data
+#else
+.section .rodata
+#endif
+.p2align 6
+BLAKE3_IV:
+ .long 0x6A09E667, 0xBB67AE85
+ .long 0x3C6EF372, 0xA54FF53A
+ROT16:
+ .byte 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13
+ROT8:
+ .byte 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12
+ADD0:
+ .long 0, 1, 2, 3
+ADD1:
+ .long 4, 4, 4, 4
+BLAKE3_IV_0:
+ .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667
+BLAKE3_IV_1:
+ .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85
+BLAKE3_IV_2:
+ .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372
+BLAKE3_IV_3:
+ .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A
+BLAKE3_BLOCK_LEN:
+ .long 64, 64, 64, 64
+CMP_MSB_MASK:
+ .long 0x80000000, 0x80000000, 0x80000000, 0x80000000
+
+#endif
diff --git a/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_gnu.S b/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_gnu.S
new file mode 100644
index 000000000000..60d0a4042e71
--- /dev/null
+++ b/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_gnu.S
@@ -0,0 +1,2069 @@
+.intel_syntax noprefix
+.global blake3_hash_many_sse41
+.global _blake3_hash_many_sse41
+.global blake3_compress_in_place_sse41
+.global _blake3_compress_in_place_sse41
+.global blake3_compress_xof_sse41
+.global _blake3_compress_xof_sse41
+.section .text
+ .p2align 6
+_blake3_hash_many_sse41:
+blake3_hash_many_sse41:
+ push r15
+ push r14
+ push r13
+ push r12
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ mov rbp, rsp
+ sub rsp, 528
+ and rsp, 0xFFFFFFFFFFFFFFC0
+ movdqa xmmword ptr [rsp+0x170], xmm6
+ movdqa xmmword ptr [rsp+0x180], xmm7
+ movdqa xmmword ptr [rsp+0x190], xmm8
+ movdqa xmmword ptr [rsp+0x1A0], xmm9
+ movdqa xmmword ptr [rsp+0x1B0], xmm10
+ movdqa xmmword ptr [rsp+0x1C0], xmm11
+ movdqa xmmword ptr [rsp+0x1D0], xmm12
+ movdqa xmmword ptr [rsp+0x1E0], xmm13
+ movdqa xmmword ptr [rsp+0x1F0], xmm14
+ movdqa xmmword ptr [rsp+0x200], xmm15
+ mov rdi, rcx
+ mov rsi, rdx
+ mov rdx, r8
+ mov rcx, r9
+ mov r8, qword ptr [rbp+0x68]
+ movzx r9, byte ptr [rbp+0x70]
+ neg r9d
+ movd xmm0, r9d
+ pshufd xmm0, xmm0, 0x00
+ movdqa xmmword ptr [rsp+0x130], xmm0
+ movdqa xmm1, xmm0
+ pand xmm1, xmmword ptr [ADD0+rip]
+ pand xmm0, xmmword ptr [ADD1+rip]
+ movdqa xmmword ptr [rsp+0x150], xmm0
+ movd xmm0, r8d
+ pshufd xmm0, xmm0, 0x00
+ paddd xmm0, xmm1
+ movdqa xmmword ptr [rsp+0x110], xmm0
+ pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip]
+ pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip]
+ pcmpgtd xmm1, xmm0
+ shr r8, 32
+ movd xmm2, r8d
+ pshufd xmm2, xmm2, 0x00
+ psubd xmm2, xmm1
+ movdqa xmmword ptr [rsp+0x120], xmm2
+ mov rbx, qword ptr [rbp+0x90]
+ mov r15, rdx
+ shl r15, 6
+ movzx r13d, byte ptr [rbp+0x78]
+ movzx r12d, byte ptr [rbp+0x88]
+ cmp rsi, 4
+ jc 3f
+2:
+ movdqu xmm3, xmmword ptr [rcx]
+ pshufd xmm0, xmm3, 0x00
+ pshufd xmm1, xmm3, 0x55
+ pshufd xmm2, xmm3, 0xAA
+ pshufd xmm3, xmm3, 0xFF
+ movdqu xmm7, xmmword ptr [rcx+0x10]
+ pshufd xmm4, xmm7, 0x00
+ pshufd xmm5, xmm7, 0x55
+ pshufd xmm6, xmm7, 0xAA
+ pshufd xmm7, xmm7, 0xFF
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ mov r10, qword ptr [rdi+0x10]
+ mov r11, qword ptr [rdi+0x18]
+ movzx eax, byte ptr [rbp+0x80]
+ or eax, r13d
+ xor edx, edx
+9:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ movdqu xmm8, xmmword ptr [r8+rdx-0x40]
+ movdqu xmm9, xmmword ptr [r9+rdx-0x40]
+ movdqu xmm10, xmmword ptr [r10+rdx-0x40]
+ movdqu xmm11, xmmword ptr [r11+rdx-0x40]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp], xmm8
+ movdqa xmmword ptr [rsp+0x10], xmm9
+ movdqa xmmword ptr [rsp+0x20], xmm12
+ movdqa xmmword ptr [rsp+0x30], xmm13
+ movdqu xmm8, xmmword ptr [r8+rdx-0x30]
+ movdqu xmm9, xmmword ptr [r9+rdx-0x30]
+ movdqu xmm10, xmmword ptr [r10+rdx-0x30]
+ movdqu xmm11, xmmword ptr [r11+rdx-0x30]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp+0x40], xmm8
+ movdqa xmmword ptr [rsp+0x50], xmm9
+ movdqa xmmword ptr [rsp+0x60], xmm12
+ movdqa xmmword ptr [rsp+0x70], xmm13
+ movdqu xmm8, xmmword ptr [r8+rdx-0x20]
+ movdqu xmm9, xmmword ptr [r9+rdx-0x20]
+ movdqu xmm10, xmmword ptr [r10+rdx-0x20]
+ movdqu xmm11, xmmword ptr [r11+rdx-0x20]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp+0x80], xmm8
+ movdqa xmmword ptr [rsp+0x90], xmm9
+ movdqa xmmword ptr [rsp+0xA0], xmm12
+ movdqa xmmword ptr [rsp+0xB0], xmm13
+ movdqu xmm8, xmmword ptr [r8+rdx-0x10]
+ movdqu xmm9, xmmword ptr [r9+rdx-0x10]
+ movdqu xmm10, xmmword ptr [r10+rdx-0x10]
+ movdqu xmm11, xmmword ptr [r11+rdx-0x10]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp+0xC0], xmm8
+ movdqa xmmword ptr [rsp+0xD0], xmm9
+ movdqa xmmword ptr [rsp+0xE0], xmm12
+ movdqa xmmword ptr [rsp+0xF0], xmm13
+ movdqa xmm9, xmmword ptr [BLAKE3_IV_1+rip]
+ movdqa xmm10, xmmword ptr [BLAKE3_IV_2+rip]
+ movdqa xmm11, xmmword ptr [BLAKE3_IV_3+rip]
+ movdqa xmm12, xmmword ptr [rsp+0x110]
+ movdqa xmm13, xmmword ptr [rsp+0x120]
+ movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN+rip]
+ movd xmm15, eax
+ pshufd xmm15, xmm15, 0x00
+ prefetcht0 [r8+rdx+0x80]
+ prefetcht0 [r9+rdx+0x80]
+ prefetcht0 [r10+rdx+0x80]
+ prefetcht0 [r11+rdx+0x80]
+ paddd xmm0, xmmword ptr [rsp]
+ paddd xmm1, xmmword ptr [rsp+0x20]
+ paddd xmm2, xmmword ptr [rsp+0x40]
+ paddd xmm3, xmmword ptr [rsp+0x60]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [BLAKE3_IV_0+rip]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x10]
+ paddd xmm1, xmmword ptr [rsp+0x30]
+ paddd xmm2, xmmword ptr [rsp+0x50]
+ paddd xmm3, xmmword ptr [rsp+0x70]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x80]
+ paddd xmm1, xmmword ptr [rsp+0xA0]
+ paddd xmm2, xmmword ptr [rsp+0xC0]
+ paddd xmm3, xmmword ptr [rsp+0xE0]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x90]
+ paddd xmm1, xmmword ptr [rsp+0xB0]
+ paddd xmm2, xmmword ptr [rsp+0xD0]
+ paddd xmm3, xmmword ptr [rsp+0xF0]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x20]
+ paddd xmm1, xmmword ptr [rsp+0x30]
+ paddd xmm2, xmmword ptr [rsp+0x70]
+ paddd xmm3, xmmword ptr [rsp+0x40]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x60]
+ paddd xmm1, xmmword ptr [rsp+0xA0]
+ paddd xmm2, xmmword ptr [rsp]
+ paddd xmm3, xmmword ptr [rsp+0xD0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x10]
+ paddd xmm1, xmmword ptr [rsp+0xC0]
+ paddd xmm2, xmmword ptr [rsp+0x90]
+ paddd xmm3, xmmword ptr [rsp+0xF0]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xB0]
+ paddd xmm1, xmmword ptr [rsp+0x50]
+ paddd xmm2, xmmword ptr [rsp+0xE0]
+ paddd xmm3, xmmword ptr [rsp+0x80]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x30]
+ paddd xmm1, xmmword ptr [rsp+0xA0]
+ paddd xmm2, xmmword ptr [rsp+0xD0]
+ paddd xmm3, xmmword ptr [rsp+0x70]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x40]
+ paddd xmm1, xmmword ptr [rsp+0xC0]
+ paddd xmm2, xmmword ptr [rsp+0x20]
+ paddd xmm3, xmmword ptr [rsp+0xE0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x60]
+ paddd xmm1, xmmword ptr [rsp+0x90]
+ paddd xmm2, xmmword ptr [rsp+0xB0]
+ paddd xmm3, xmmword ptr [rsp+0x80]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x50]
+ paddd xmm1, xmmword ptr [rsp]
+ paddd xmm2, xmmword ptr [rsp+0xF0]
+ paddd xmm3, xmmword ptr [rsp+0x10]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xA0]
+ paddd xmm1, xmmword ptr [rsp+0xC0]
+ paddd xmm2, xmmword ptr [rsp+0xE0]
+ paddd xmm3, xmmword ptr [rsp+0xD0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x70]
+ paddd xmm1, xmmword ptr [rsp+0x90]
+ paddd xmm2, xmmword ptr [rsp+0x30]
+ paddd xmm3, xmmword ptr [rsp+0xF0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x40]
+ paddd xmm1, xmmword ptr [rsp+0xB0]
+ paddd xmm2, xmmword ptr [rsp+0x50]
+ paddd xmm3, xmmword ptr [rsp+0x10]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp]
+ paddd xmm1, xmmword ptr [rsp+0x20]
+ paddd xmm2, xmmword ptr [rsp+0x80]
+ paddd xmm3, xmmword ptr [rsp+0x60]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xC0]
+ paddd xmm1, xmmword ptr [rsp+0x90]
+ paddd xmm2, xmmword ptr [rsp+0xF0]
+ paddd xmm3, xmmword ptr [rsp+0xE0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xD0]
+ paddd xmm1, xmmword ptr [rsp+0xB0]
+ paddd xmm2, xmmword ptr [rsp+0xA0]
+ paddd xmm3, xmmword ptr [rsp+0x80]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x70]
+ paddd xmm1, xmmword ptr [rsp+0x50]
+ paddd xmm2, xmmword ptr [rsp]
+ paddd xmm3, xmmword ptr [rsp+0x60]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x20]
+ paddd xmm1, xmmword ptr [rsp+0x30]
+ paddd xmm2, xmmword ptr [rsp+0x10]
+ paddd xmm3, xmmword ptr [rsp+0x40]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x90]
+ paddd xmm1, xmmword ptr [rsp+0xB0]
+ paddd xmm2, xmmword ptr [rsp+0x80]
+ paddd xmm3, xmmword ptr [rsp+0xF0]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xE0]
+ paddd xmm1, xmmword ptr [rsp+0x50]
+ paddd xmm2, xmmword ptr [rsp+0xC0]
+ paddd xmm3, xmmword ptr [rsp+0x10]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xD0]
+ paddd xmm1, xmmword ptr [rsp]
+ paddd xmm2, xmmword ptr [rsp+0x20]
+ paddd xmm3, xmmword ptr [rsp+0x40]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0x30]
+ paddd xmm1, xmmword ptr [rsp+0xA0]
+ paddd xmm2, xmmword ptr [rsp+0x60]
+ paddd xmm3, xmmword ptr [rsp+0x70]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xB0]
+ paddd xmm1, xmmword ptr [rsp+0x50]
+ paddd xmm2, xmmword ptr [rsp+0x10]
+ paddd xmm3, xmmword ptr [rsp+0x80]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xF0]
+ paddd xmm1, xmmword ptr [rsp]
+ paddd xmm2, xmmword ptr [rsp+0x90]
+ paddd xmm3, xmmword ptr [rsp+0x60]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xE0]
+ paddd xmm1, xmmword ptr [rsp+0x20]
+ paddd xmm2, xmmword ptr [rsp+0x30]
+ paddd xmm3, xmmword ptr [rsp+0x70]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+0x100], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0xA0]
+ paddd xmm1, xmmword ptr [rsp+0xC0]
+ paddd xmm2, xmmword ptr [rsp+0x40]
+ paddd xmm3, xmmword ptr [rsp+0xD0]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8+rip]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+0x100]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ pxor xmm0, xmm8
+ pxor xmm1, xmm9
+ pxor xmm2, xmm10
+ pxor xmm3, xmm11
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ pxor xmm4, xmm12
+ pxor xmm5, xmm13
+ pxor xmm6, xmm14
+ pxor xmm7, xmm15
+ mov eax, r13d
+ jne 9b
+ movdqa xmm9, xmm0
+ punpckldq xmm0, xmm1
+ punpckhdq xmm9, xmm1
+ movdqa xmm11, xmm2
+ punpckldq xmm2, xmm3
+ punpckhdq xmm11, xmm3
+ movdqa xmm1, xmm0
+ punpcklqdq xmm0, xmm2
+ punpckhqdq xmm1, xmm2
+ movdqa xmm3, xmm9
+ punpcklqdq xmm9, xmm11
+ punpckhqdq xmm3, xmm11
+ movdqu xmmword ptr [rbx], xmm0
+ movdqu xmmword ptr [rbx+0x20], xmm1
+ movdqu xmmword ptr [rbx+0x40], xmm9
+ movdqu xmmword ptr [rbx+0x60], xmm3
+ movdqa xmm9, xmm4
+ punpckldq xmm4, xmm5
+ punpckhdq xmm9, xmm5
+ movdqa xmm11, xmm6
+ punpckldq xmm6, xmm7
+ punpckhdq xmm11, xmm7
+ movdqa xmm5, xmm4
+ punpcklqdq xmm4, xmm6
+ punpckhqdq xmm5, xmm6
+ movdqa xmm7, xmm9
+ punpcklqdq xmm9, xmm11
+ punpckhqdq xmm7, xmm11
+ movdqu xmmword ptr [rbx+0x10], xmm4
+ movdqu xmmword ptr [rbx+0x30], xmm5
+ movdqu xmmword ptr [rbx+0x50], xmm9
+ movdqu xmmword ptr [rbx+0x70], xmm7
+ movdqa xmm1, xmmword ptr [rsp+0x110]
+ movdqa xmm0, xmm1
+ paddd xmm1, xmmword ptr [rsp+0x150]
+ movdqa xmmword ptr [rsp+0x110], xmm1
+ pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip]
+ pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip]
+ pcmpgtd xmm0, xmm1
+ movdqa xmm1, xmmword ptr [rsp+0x120]
+ psubd xmm1, xmm0
+ movdqa xmmword ptr [rsp+0x120], xmm1
+ add rbx, 128
+ add rdi, 32
+ sub rsi, 4
+ cmp rsi, 4
+ jnc 2b
+ test rsi, rsi
+ jne 3f
+4:
+ movdqa xmm6, xmmword ptr [rsp+0x170]
+ movdqa xmm7, xmmword ptr [rsp+0x180]
+ movdqa xmm8, xmmword ptr [rsp+0x190]
+ movdqa xmm9, xmmword ptr [rsp+0x1A0]
+ movdqa xmm10, xmmword ptr [rsp+0x1B0]
+ movdqa xmm11, xmmword ptr [rsp+0x1C0]
+ movdqa xmm12, xmmword ptr [rsp+0x1D0]
+ movdqa xmm13, xmmword ptr [rsp+0x1E0]
+ movdqa xmm14, xmmword ptr [rsp+0x1F0]
+ movdqa xmm15, xmmword ptr [rsp+0x200]
+ mov rsp, rbp
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ pop r12
+ pop r13
+ pop r14
+ pop r15
+ ret
+.p2align 5
+3:
+ test esi, 0x2
+ je 3f
+ movups xmm0, xmmword ptr [rcx]
+ movups xmm1, xmmword ptr [rcx+0x10]
+ movaps xmm8, xmm0
+ movaps xmm9, xmm1
+ movd xmm13, dword ptr [rsp+0x110]
+ pinsrd xmm13, dword ptr [rsp+0x120], 1
+ pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
+ movaps xmmword ptr [rsp], xmm13
+ movd xmm14, dword ptr [rsp+0x114]
+ pinsrd xmm14, dword ptr [rsp+0x124], 1
+ pinsrd xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
+ movaps xmmword ptr [rsp+0x10], xmm14
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+0x8]
+ movzx eax, byte ptr [rbp+0x80]
+ or eax, r13d
+ xor edx, edx
+2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
+ movaps xmm10, xmm2
+ movups xmm4, xmmword ptr [r8+rdx-0x40]
+ movups xmm5, xmmword ptr [r8+rdx-0x30]
+ movaps xmm3, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm3, xmm5, 221
+ movaps xmm5, xmm3
+ movups xmm6, xmmword ptr [r8+rdx-0x20]
+ movups xmm7, xmmword ptr [r8+rdx-0x10]
+ movaps xmm3, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 0x93
+ shufps xmm3, xmm7, 221
+ pshufd xmm7, xmm3, 0x93
+ movups xmm12, xmmword ptr [r9+rdx-0x40]
+ movups xmm13, xmmword ptr [r9+rdx-0x30]
+ movaps xmm11, xmm12
+ shufps xmm12, xmm13, 136
+ shufps xmm11, xmm13, 221
+ movaps xmm13, xmm11
+ movups xmm14, xmmword ptr [r9+rdx-0x20]
+ movups xmm15, xmmword ptr [r9+rdx-0x10]
+ movaps xmm11, xmm14
+ shufps xmm14, xmm15, 136
+ pshufd xmm14, xmm14, 0x93
+ shufps xmm11, xmm15, 221
+ pshufd xmm15, xmm11, 0x93
+ movaps xmm3, xmmword ptr [rsp]
+ movaps xmm11, xmmword ptr [rsp+0x10]
+ pinsrd xmm3, eax, 3
+ pinsrd xmm11, eax, 3
+ mov al, 7
+9:
+ paddd xmm0, xmm4
+ paddd xmm8, xmm12
+ movaps xmmword ptr [rsp+0x20], xmm4
+ movaps xmmword ptr [rsp+0x30], xmm12
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ movaps xmm12, xmmword ptr [ROT16+rip]
+ pshufb xmm3, xmm12
+ pshufb xmm11, xmm12
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 20
+ psrld xmm4, 12
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 20
+ psrld xmm4, 12
+ por xmm9, xmm4
+ paddd xmm0, xmm5
+ paddd xmm8, xmm13
+ movaps xmmword ptr [rsp+0x40], xmm5
+ movaps xmmword ptr [rsp+0x50], xmm13
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ movaps xmm13, xmmword ptr [ROT8+rip]
+ pshufb xmm3, xmm13
+ pshufb xmm11, xmm13
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 25
+ psrld xmm4, 7
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 25
+ psrld xmm4, 7
+ por xmm9, xmm4
+ pshufd xmm0, xmm0, 0x93
+ pshufd xmm8, xmm8, 0x93
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm11, xmm11, 0x4E
+ pshufd xmm2, xmm2, 0x39
+ pshufd xmm10, xmm10, 0x39
+ paddd xmm0, xmm6
+ paddd xmm8, xmm14
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ pshufb xmm3, xmm12
+ pshufb xmm11, xmm12
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 20
+ psrld xmm4, 12
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 20
+ psrld xmm4, 12
+ por xmm9, xmm4
+ paddd xmm0, xmm7
+ paddd xmm8, xmm15
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ pshufb xmm3, xmm13
+ pshufb xmm11, xmm13
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 25
+ psrld xmm4, 7
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 25
+ psrld xmm4, 7
+ por xmm9, xmm4
+ pshufd xmm0, xmm0, 0x39
+ pshufd xmm8, xmm8, 0x39
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm11, xmm11, 0x4E
+ pshufd xmm2, xmm2, 0x93
+ pshufd xmm10, xmm10, 0x93
+ dec al
+ je 9f
+ movdqa xmm12, xmmword ptr [rsp+0x20]
+ movdqa xmm5, xmmword ptr [rsp+0x40]
+ pshufd xmm13, xmm12, 0x0F
+ shufps xmm12, xmm5, 214
+ pshufd xmm4, xmm12, 0x39
+ movdqa xmm12, xmm6
+ shufps xmm12, xmm7, 250
+ pblendw xmm13, xmm12, 0xCC
+ movdqa xmm12, xmm7
+ punpcklqdq xmm12, xmm5
+ pblendw xmm12, xmm6, 0xC0
+ pshufd xmm12, xmm12, 0x78
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 0x1E
+ movdqa xmmword ptr [rsp+0x20], xmm13
+ movdqa xmmword ptr [rsp+0x40], xmm12
+ movdqa xmm5, xmmword ptr [rsp+0x30]
+ movdqa xmm13, xmmword ptr [rsp+0x50]
+ pshufd xmm6, xmm5, 0x0F
+ shufps xmm5, xmm13, 214
+ pshufd xmm12, xmm5, 0x39
+ movdqa xmm5, xmm14
+ shufps xmm5, xmm15, 250
+ pblendw xmm6, xmm5, 0xCC
+ movdqa xmm5, xmm15
+ punpcklqdq xmm5, xmm13
+ pblendw xmm5, xmm14, 0xC0
+ pshufd xmm5, xmm5, 0x78
+ punpckhdq xmm13, xmm15
+ punpckldq xmm14, xmm13
+ pshufd xmm15, xmm14, 0x1E
+ movdqa xmm13, xmm6
+ movdqa xmm14, xmm5
+ movdqa xmm5, xmmword ptr [rsp+0x20]
+ movdqa xmm6, xmmword ptr [rsp+0x40]
+ jmp 9b
+9:
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ pxor xmm8, xmm10
+ pxor xmm9, xmm11
+ mov eax, r13d
+ cmp rdx, r15
+ jne 2b
+ movups xmmword ptr [rbx], xmm0
+ movups xmmword ptr [rbx+0x10], xmm1
+ movups xmmword ptr [rbx+0x20], xmm8
+ movups xmmword ptr [rbx+0x30], xmm9
+ movdqa xmm0, xmmword ptr [rsp+0x130]
+ movdqa xmm1, xmmword ptr [rsp+0x110]
+ movdqa xmm2, xmmword ptr [rsp+0x120]
+ movdqu xmm3, xmmword ptr [rsp+0x118]
+ movdqu xmm4, xmmword ptr [rsp+0x128]
+ blendvps xmm1, xmm3, xmm0
+ blendvps xmm2, xmm4, xmm0
+ movdqa xmmword ptr [rsp+0x110], xmm1
+ movdqa xmmword ptr [rsp+0x120], xmm2
+ add rdi, 16
+ add rbx, 64
+ sub rsi, 2
+3:
+ test esi, 0x1
+ je 4b
+ movups xmm0, xmmword ptr [rcx]
+ movups xmm1, xmmword ptr [rcx+0x10]
+ movd xmm13, dword ptr [rsp+0x110]
+ pinsrd xmm13, dword ptr [rsp+0x120], 1
+ pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
+ movaps xmm14, xmmword ptr [ROT8+rip]
+ movaps xmm15, xmmword ptr [ROT16+rip]
+ mov r8, qword ptr [rdi]
+ movzx eax, byte ptr [rbp+0x80]
+ or eax, r13d
+ xor edx, edx
+2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
+ movaps xmm3, xmm13
+ pinsrd xmm3, eax, 3
+ movups xmm4, xmmword ptr [r8+rdx-0x40]
+ movups xmm5, xmmword ptr [r8+rdx-0x30]
+ movaps xmm8, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm8, xmm5, 221
+ movaps xmm5, xmm8
+ movups xmm6, xmmword ptr [r8+rdx-0x20]
+ movups xmm7, xmmword ptr [r8+rdx-0x10]
+ movaps xmm8, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 0x93
+ shufps xmm8, xmm7, 221
+ pshufd xmm7, xmm8, 0x93
+ mov al, 7
+9:
+ paddd xmm0, xmm4
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm15
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm5
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x93
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x39
+ paddd xmm0, xmm6
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm15
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm7
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x39
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x93
+ dec al
+ jz 9f
+ movdqa xmm8, xmm4
+ shufps xmm8, xmm5, 214
+ pshufd xmm9, xmm4, 0x0F
+ pshufd xmm4, xmm8, 0x39
+ movdqa xmm8, xmm6
+ shufps xmm8, xmm7, 250
+ pblendw xmm9, xmm8, 0xCC
+ movdqa xmm8, xmm7
+ punpcklqdq xmm8, xmm5
+ pblendw xmm8, xmm6, 0xC0
+ pshufd xmm8, xmm8, 0x78
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 0x1E
+ movdqa xmm5, xmm9
+ movdqa xmm6, xmm8
+ jmp 9b
+9:
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne 2b
+ movups xmmword ptr [rbx], xmm0
+ movups xmmword ptr [rbx+0x10], xmm1
+ jmp 4b
+
+.p2align 6
+blake3_compress_in_place_sse41:
+_blake3_compress_in_place_sse41:
+ sub rsp, 120
+ movdqa xmmword ptr [rsp], xmm6
+ movdqa xmmword ptr [rsp+0x10], xmm7
+ movdqa xmmword ptr [rsp+0x20], xmm8
+ movdqa xmmword ptr [rsp+0x30], xmm9
+ movdqa xmmword ptr [rsp+0x40], xmm11
+ movdqa xmmword ptr [rsp+0x50], xmm14
+ movdqa xmmword ptr [rsp+0x60], xmm15
+ movups xmm0, xmmword ptr [rcx]
+ movups xmm1, xmmword ptr [rcx+0x10]
+ movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
+ movzx eax, byte ptr [rsp+0xA0]
+ movzx r8d, r8b
+ shl rax, 32
+ add r8, rax
+ movq xmm3, r9
+ movq xmm4, r8
+ punpcklqdq xmm3, xmm4
+ movups xmm4, xmmword ptr [rdx]
+ movups xmm5, xmmword ptr [rdx+0x10]
+ movaps xmm8, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm8, xmm5, 221
+ movaps xmm5, xmm8
+ movups xmm6, xmmword ptr [rdx+0x20]
+ movups xmm7, xmmword ptr [rdx+0x30]
+ movaps xmm8, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 0x93
+ shufps xmm8, xmm7, 221
+ pshufd xmm7, xmm8, 0x93
+ movaps xmm14, xmmword ptr [ROT8+rip]
+ movaps xmm15, xmmword ptr [ROT16+rip]
+ mov al, 7
+9:
+ paddd xmm0, xmm4
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm15
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm5
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x93
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x39
+ paddd xmm0, xmm6
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm15
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm7
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x39
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x93
+ dec al
+ jz 9f
+ movdqa xmm8, xmm4
+ shufps xmm8, xmm5, 214
+ pshufd xmm9, xmm4, 0x0F
+ pshufd xmm4, xmm8, 0x39
+ movdqa xmm8, xmm6
+ shufps xmm8, xmm7, 250
+ pblendw xmm9, xmm8, 0xCC
+ movdqa xmm8, xmm7
+ punpcklqdq xmm8, xmm5
+ pblendw xmm8, xmm6, 0xC0
+ pshufd xmm8, xmm8, 0x78
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 0x1E
+ movdqa xmm5, xmm9
+ movdqa xmm6, xmm8
+ jmp 9b
+9:
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ movups xmmword ptr [rcx], xmm0
+ movups xmmword ptr [rcx+0x10], xmm1
+ movdqa xmm6, xmmword ptr [rsp]
+ movdqa xmm7, xmmword ptr [rsp+0x10]
+ movdqa xmm8, xmmword ptr [rsp+0x20]
+ movdqa xmm9, xmmword ptr [rsp+0x30]
+ movdqa xmm11, xmmword ptr [rsp+0x40]
+ movdqa xmm14, xmmword ptr [rsp+0x50]
+ movdqa xmm15, xmmword ptr [rsp+0x60]
+ add rsp, 120
+ ret
+
+
+.p2align 6
+_blake3_compress_xof_sse41:
+blake3_compress_xof_sse41:
+ sub rsp, 120
+ movdqa xmmword ptr [rsp], xmm6
+ movdqa xmmword ptr [rsp+0x10], xmm7
+ movdqa xmmword ptr [rsp+0x20], xmm8
+ movdqa xmmword ptr [rsp+0x30], xmm9
+ movdqa xmmword ptr [rsp+0x40], xmm11
+ movdqa xmmword ptr [rsp+0x50], xmm14
+ movdqa xmmword ptr [rsp+0x60], xmm15
+ movups xmm0, xmmword ptr [rcx]
+ movups xmm1, xmmword ptr [rcx+0x10]
+ movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
+ movzx eax, byte ptr [rsp+0xA0]
+ movzx r8d, r8b
+ mov r10, qword ptr [rsp+0xA8]
+ shl rax, 32
+ add r8, rax
+ movq xmm3, r9
+ movq xmm4, r8
+ punpcklqdq xmm3, xmm4
+ movups xmm4, xmmword ptr [rdx]
+ movups xmm5, xmmword ptr [rdx+0x10]
+ movaps xmm8, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm8, xmm5, 221
+ movaps xmm5, xmm8
+ movups xmm6, xmmword ptr [rdx+0x20]
+ movups xmm7, xmmword ptr [rdx+0x30]
+ movaps xmm8, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 0x93
+ shufps xmm8, xmm7, 221
+ pshufd xmm7, xmm8, 0x93
+ movaps xmm14, xmmword ptr [ROT8+rip]
+ movaps xmm15, xmmword ptr [ROT16+rip]
+ mov al, 7
+9:
+ paddd xmm0, xmm4
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm15
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm5
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x93
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x39
+ paddd xmm0, xmm6
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm15
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm7
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 0x39
+ pshufd xmm3, xmm3, 0x4E
+ pshufd xmm2, xmm2, 0x93
+ dec al
+ jz 9f
+ movdqa xmm8, xmm4
+ shufps xmm8, xmm5, 214
+ pshufd xmm9, xmm4, 0x0F
+ pshufd xmm4, xmm8, 0x39
+ movdqa xmm8, xmm6
+ shufps xmm8, xmm7, 250
+ pblendw xmm9, xmm8, 0xCC
+ movdqa xmm8, xmm7
+ punpcklqdq xmm8, xmm5
+ pblendw xmm8, xmm6, 0xC0
+ pshufd xmm8, xmm8, 0x78
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 0x1E
+ movdqa xmm5, xmm9
+ movdqa xmm6, xmm8
+ jmp 9b
+9:
+ movdqu xmm4, xmmword ptr [rcx]
+ movdqu xmm5, xmmword ptr [rcx+0x10]
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ pxor xmm2, xmm4
+ pxor xmm3, xmm5
+ movups xmmword ptr [r10], xmm0
+ movups xmmword ptr [r10+0x10], xmm1
+ movups xmmword ptr [r10+0x20], xmm2
+ movups xmmword ptr [r10+0x30], xmm3
+ movdqa xmm6, xmmword ptr [rsp]
+ movdqa xmm7, xmmword ptr [rsp+0x10]
+ movdqa xmm8, xmmword ptr [rsp+0x20]
+ movdqa xmm9, xmmword ptr [rsp+0x30]
+ movdqa xmm11, xmmword ptr [rsp+0x40]
+ movdqa xmm14, xmmword ptr [rsp+0x50]
+ movdqa xmm15, xmmword ptr [rsp+0x60]
+ add rsp, 120
+ ret
+
+
+.section .rodata
+.p2align 6
+BLAKE3_IV:
+ .long 0x6A09E667, 0xBB67AE85
+ .long 0x3C6EF372, 0xA54FF53A
+ROT16:
+ .byte 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13
+ROT8:
+ .byte 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12
+ADD0:
+ .long 0, 1, 2, 3
+ADD1:
+ .long 4, 4, 4, 4
+BLAKE3_IV_0:
+ .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667
+BLAKE3_IV_1:
+ .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85
+BLAKE3_IV_2:
+ .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372
+BLAKE3_IV_3:
+ .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A
+BLAKE3_BLOCK_LEN:
+ .long 64, 64, 64, 64
+CMP_MSB_MASK:
+ .long 0x80000000, 0x80000000, 0x80000000, 0x80000000
diff --git a/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_msvc.asm b/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_msvc.asm
new file mode 100644
index 000000000000..8966c7b84406
--- /dev/null
+++ b/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_msvc.asm
@@ -0,0 +1,2089 @@
+public _blake3_hash_many_sse41
+public blake3_hash_many_sse41
+public blake3_compress_in_place_sse41
+public _blake3_compress_in_place_sse41
+public blake3_compress_xof_sse41
+public _blake3_compress_xof_sse41
+
+_TEXT SEGMENT ALIGN(16) 'CODE'
+
+ALIGN 16
+blake3_hash_many_sse41 PROC
+_blake3_hash_many_sse41 PROC
+ push r15
+ push r14
+ push r13
+ push r12
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ mov rbp, rsp
+ sub rsp, 528
+ and rsp, 0FFFFFFFFFFFFFFC0H
+ movdqa xmmword ptr [rsp+170H], xmm6
+ movdqa xmmword ptr [rsp+180H], xmm7
+ movdqa xmmword ptr [rsp+190H], xmm8
+ movdqa xmmword ptr [rsp+1A0H], xmm9
+ movdqa xmmword ptr [rsp+1B0H], xmm10
+ movdqa xmmword ptr [rsp+1C0H], xmm11
+ movdqa xmmword ptr [rsp+1D0H], xmm12
+ movdqa xmmword ptr [rsp+1E0H], xmm13
+ movdqa xmmword ptr [rsp+1F0H], xmm14
+ movdqa xmmword ptr [rsp+200H], xmm15
+ mov rdi, rcx
+ mov rsi, rdx
+ mov rdx, r8
+ mov rcx, r9
+ mov r8, qword ptr [rbp+68H]
+ movzx r9, byte ptr [rbp+70H]
+ neg r9d
+ movd xmm0, r9d
+ pshufd xmm0, xmm0, 00H
+ movdqa xmmword ptr [rsp+130H], xmm0
+ movdqa xmm1, xmm0
+ pand xmm1, xmmword ptr [ADD0]
+ pand xmm0, xmmword ptr [ADD1]
+ movdqa xmmword ptr [rsp+150H], xmm0
+ movd xmm0, r8d
+ pshufd xmm0, xmm0, 00H
+ paddd xmm0, xmm1
+ movdqa xmmword ptr [rsp+110H], xmm0
+ pxor xmm0, xmmword ptr [CMP_MSB_MASK]
+ pxor xmm1, xmmword ptr [CMP_MSB_MASK]
+ pcmpgtd xmm1, xmm0
+ shr r8, 32
+ movd xmm2, r8d
+ pshufd xmm2, xmm2, 00H
+ psubd xmm2, xmm1
+ movdqa xmmword ptr [rsp+120H], xmm2
+ mov rbx, qword ptr [rbp+90H]
+ mov r15, rdx
+ shl r15, 6
+ movzx r13d, byte ptr [rbp+78H]
+ movzx r12d, byte ptr [rbp+88H]
+ cmp rsi, 4
+ jc final3blocks
+outerloop4:
+ movdqu xmm3, xmmword ptr [rcx]
+ pshufd xmm0, xmm3, 00H
+ pshufd xmm1, xmm3, 55H
+ pshufd xmm2, xmm3, 0AAH
+ pshufd xmm3, xmm3, 0FFH
+ movdqu xmm7, xmmword ptr [rcx+10H]
+ pshufd xmm4, xmm7, 00H
+ pshufd xmm5, xmm7, 55H
+ pshufd xmm6, xmm7, 0AAH
+ pshufd xmm7, xmm7, 0FFH
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+8H]
+ mov r10, qword ptr [rdi+10H]
+ mov r11, qword ptr [rdi+18H]
+ movzx eax, byte ptr [rbp+80H]
+ or eax, r13d
+ xor edx, edx
+innerloop4:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ movdqu xmm8, xmmword ptr [r8+rdx-40H]
+ movdqu xmm9, xmmword ptr [r9+rdx-40H]
+ movdqu xmm10, xmmword ptr [r10+rdx-40H]
+ movdqu xmm11, xmmword ptr [r11+rdx-40H]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp], xmm8
+ movdqa xmmword ptr [rsp+10H], xmm9
+ movdqa xmmword ptr [rsp+20H], xmm12
+ movdqa xmmword ptr [rsp+30H], xmm13
+ movdqu xmm8, xmmword ptr [r8+rdx-30H]
+ movdqu xmm9, xmmword ptr [r9+rdx-30H]
+ movdqu xmm10, xmmword ptr [r10+rdx-30H]
+ movdqu xmm11, xmmword ptr [r11+rdx-30H]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp+40H], xmm8
+ movdqa xmmword ptr [rsp+50H], xmm9
+ movdqa xmmword ptr [rsp+60H], xmm12
+ movdqa xmmword ptr [rsp+70H], xmm13
+ movdqu xmm8, xmmword ptr [r8+rdx-20H]
+ movdqu xmm9, xmmword ptr [r9+rdx-20H]
+ movdqu xmm10, xmmword ptr [r10+rdx-20H]
+ movdqu xmm11, xmmword ptr [r11+rdx-20H]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp+80H], xmm8
+ movdqa xmmword ptr [rsp+90H], xmm9
+ movdqa xmmword ptr [rsp+0A0H], xmm12
+ movdqa xmmword ptr [rsp+0B0H], xmm13
+ movdqu xmm8, xmmword ptr [r8+rdx-10H]
+ movdqu xmm9, xmmword ptr [r9+rdx-10H]
+ movdqu xmm10, xmmword ptr [r10+rdx-10H]
+ movdqu xmm11, xmmword ptr [r11+rdx-10H]
+ movdqa xmm12, xmm8
+ punpckldq xmm8, xmm9
+ punpckhdq xmm12, xmm9
+ movdqa xmm14, xmm10
+ punpckldq xmm10, xmm11
+ punpckhdq xmm14, xmm11
+ movdqa xmm9, xmm8
+ punpcklqdq xmm8, xmm10
+ punpckhqdq xmm9, xmm10
+ movdqa xmm13, xmm12
+ punpcklqdq xmm12, xmm14
+ punpckhqdq xmm13, xmm14
+ movdqa xmmword ptr [rsp+0C0H], xmm8
+ movdqa xmmword ptr [rsp+0D0H], xmm9
+ movdqa xmmword ptr [rsp+0E0H], xmm12
+ movdqa xmmword ptr [rsp+0F0H], xmm13
+ movdqa xmm9, xmmword ptr [BLAKE3_IV_1]
+ movdqa xmm10, xmmword ptr [BLAKE3_IV_2]
+ movdqa xmm11, xmmword ptr [BLAKE3_IV_3]
+ movdqa xmm12, xmmword ptr [rsp+110H]
+ movdqa xmm13, xmmword ptr [rsp+120H]
+ movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN]
+ movd xmm15, eax
+ pshufd xmm15, xmm15, 00H
+ prefetcht0 byte ptr [r8+rdx+80H]
+ prefetcht0 byte ptr [r9+rdx+80H]
+ prefetcht0 byte ptr [r10+rdx+80H]
+ prefetcht0 byte ptr [r11+rdx+80H]
+ paddd xmm0, xmmword ptr [rsp]
+ paddd xmm1, xmmword ptr [rsp+20H]
+ paddd xmm2, xmmword ptr [rsp+40H]
+ paddd xmm3, xmmword ptr [rsp+60H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [BLAKE3_IV_0]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+10H]
+ paddd xmm1, xmmword ptr [rsp+30H]
+ paddd xmm2, xmmword ptr [rsp+50H]
+ paddd xmm3, xmmword ptr [rsp+70H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+80H]
+ paddd xmm1, xmmword ptr [rsp+0A0H]
+ paddd xmm2, xmmword ptr [rsp+0C0H]
+ paddd xmm3, xmmword ptr [rsp+0E0H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+90H]
+ paddd xmm1, xmmword ptr [rsp+0B0H]
+ paddd xmm2, xmmword ptr [rsp+0D0H]
+ paddd xmm3, xmmword ptr [rsp+0F0H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+20H]
+ paddd xmm1, xmmword ptr [rsp+30H]
+ paddd xmm2, xmmword ptr [rsp+70H]
+ paddd xmm3, xmmword ptr [rsp+40H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+60H]
+ paddd xmm1, xmmword ptr [rsp+0A0H]
+ paddd xmm2, xmmword ptr [rsp]
+ paddd xmm3, xmmword ptr [rsp+0D0H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+10H]
+ paddd xmm1, xmmword ptr [rsp+0C0H]
+ paddd xmm2, xmmword ptr [rsp+90H]
+ paddd xmm3, xmmword ptr [rsp+0F0H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0B0H]
+ paddd xmm1, xmmword ptr [rsp+50H]
+ paddd xmm2, xmmword ptr [rsp+0E0H]
+ paddd xmm3, xmmword ptr [rsp+80H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+30H]
+ paddd xmm1, xmmword ptr [rsp+0A0H]
+ paddd xmm2, xmmword ptr [rsp+0D0H]
+ paddd xmm3, xmmword ptr [rsp+70H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+40H]
+ paddd xmm1, xmmword ptr [rsp+0C0H]
+ paddd xmm2, xmmword ptr [rsp+20H]
+ paddd xmm3, xmmword ptr [rsp+0E0H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+60H]
+ paddd xmm1, xmmword ptr [rsp+90H]
+ paddd xmm2, xmmword ptr [rsp+0B0H]
+ paddd xmm3, xmmword ptr [rsp+80H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+50H]
+ paddd xmm1, xmmword ptr [rsp]
+ paddd xmm2, xmmword ptr [rsp+0F0H]
+ paddd xmm3, xmmword ptr [rsp+10H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0A0H]
+ paddd xmm1, xmmword ptr [rsp+0C0H]
+ paddd xmm2, xmmword ptr [rsp+0E0H]
+ paddd xmm3, xmmword ptr [rsp+0D0H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+70H]
+ paddd xmm1, xmmword ptr [rsp+90H]
+ paddd xmm2, xmmword ptr [rsp+30H]
+ paddd xmm3, xmmword ptr [rsp+0F0H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+40H]
+ paddd xmm1, xmmword ptr [rsp+0B0H]
+ paddd xmm2, xmmword ptr [rsp+50H]
+ paddd xmm3, xmmword ptr [rsp+10H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp]
+ paddd xmm1, xmmword ptr [rsp+20H]
+ paddd xmm2, xmmword ptr [rsp+80H]
+ paddd xmm3, xmmword ptr [rsp+60H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0C0H]
+ paddd xmm1, xmmword ptr [rsp+90H]
+ paddd xmm2, xmmword ptr [rsp+0F0H]
+ paddd xmm3, xmmword ptr [rsp+0E0H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0D0H]
+ paddd xmm1, xmmword ptr [rsp+0B0H]
+ paddd xmm2, xmmword ptr [rsp+0A0H]
+ paddd xmm3, xmmword ptr [rsp+80H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+70H]
+ paddd xmm1, xmmword ptr [rsp+50H]
+ paddd xmm2, xmmword ptr [rsp]
+ paddd xmm3, xmmword ptr [rsp+60H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+20H]
+ paddd xmm1, xmmword ptr [rsp+30H]
+ paddd xmm2, xmmword ptr [rsp+10H]
+ paddd xmm3, xmmword ptr [rsp+40H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+90H]
+ paddd xmm1, xmmword ptr [rsp+0B0H]
+ paddd xmm2, xmmword ptr [rsp+80H]
+ paddd xmm3, xmmword ptr [rsp+0F0H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0E0H]
+ paddd xmm1, xmmword ptr [rsp+50H]
+ paddd xmm2, xmmword ptr [rsp+0C0H]
+ paddd xmm3, xmmword ptr [rsp+10H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0D0H]
+ paddd xmm1, xmmword ptr [rsp]
+ paddd xmm2, xmmword ptr [rsp+20H]
+ paddd xmm3, xmmword ptr [rsp+40H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+30H]
+ paddd xmm1, xmmword ptr [rsp+0A0H]
+ paddd xmm2, xmmword ptr [rsp+60H]
+ paddd xmm3, xmmword ptr [rsp+70H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0B0H]
+ paddd xmm1, xmmword ptr [rsp+50H]
+ paddd xmm2, xmmword ptr [rsp+10H]
+ paddd xmm3, xmmword ptr [rsp+80H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT16]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0F0H]
+ paddd xmm1, xmmword ptr [rsp]
+ paddd xmm2, xmmword ptr [rsp+90H]
+ paddd xmm3, xmmword ptr [rsp+60H]
+ paddd xmm0, xmm4
+ paddd xmm1, xmm5
+ paddd xmm2, xmm6
+ paddd xmm3, xmm7
+ pxor xmm12, xmm0
+ pxor xmm13, xmm1
+ pxor xmm14, xmm2
+ pxor xmm15, xmm3
+ movdqa xmm8, xmmword ptr [ROT8]
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ pshufb xmm15, xmm8
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm12
+ paddd xmm9, xmm13
+ paddd xmm10, xmm14
+ paddd xmm11, xmm15
+ pxor xmm4, xmm8
+ pxor xmm5, xmm9
+ pxor xmm6, xmm10
+ pxor xmm7, xmm11
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ paddd xmm0, xmmword ptr [rsp+0E0H]
+ paddd xmm1, xmmword ptr [rsp+20H]
+ paddd xmm2, xmmword ptr [rsp+30H]
+ paddd xmm3, xmmword ptr [rsp+70H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT16]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ movdqa xmmword ptr [rsp+100H], xmm8
+ movdqa xmm8, xmm5
+ psrld xmm8, 12
+ pslld xmm5, 20
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 12
+ pslld xmm6, 20
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 12
+ pslld xmm7, 20
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 12
+ pslld xmm4, 20
+ por xmm4, xmm8
+ paddd xmm0, xmmword ptr [rsp+0A0H]
+ paddd xmm1, xmmword ptr [rsp+0C0H]
+ paddd xmm2, xmmword ptr [rsp+40H]
+ paddd xmm3, xmmword ptr [rsp+0D0H]
+ paddd xmm0, xmm5
+ paddd xmm1, xmm6
+ paddd xmm2, xmm7
+ paddd xmm3, xmm4
+ pxor xmm15, xmm0
+ pxor xmm12, xmm1
+ pxor xmm13, xmm2
+ pxor xmm14, xmm3
+ movdqa xmm8, xmmword ptr [ROT8]
+ pshufb xmm15, xmm8
+ pshufb xmm12, xmm8
+ pshufb xmm13, xmm8
+ pshufb xmm14, xmm8
+ paddd xmm10, xmm15
+ paddd xmm11, xmm12
+ movdqa xmm8, xmmword ptr [rsp+100H]
+ paddd xmm8, xmm13
+ paddd xmm9, xmm14
+ pxor xmm5, xmm10
+ pxor xmm6, xmm11
+ pxor xmm7, xmm8
+ pxor xmm4, xmm9
+ pxor xmm0, xmm8
+ pxor xmm1, xmm9
+ pxor xmm2, xmm10
+ pxor xmm3, xmm11
+ movdqa xmm8, xmm5
+ psrld xmm8, 7
+ pslld xmm5, 25
+ por xmm5, xmm8
+ movdqa xmm8, xmm6
+ psrld xmm8, 7
+ pslld xmm6, 25
+ por xmm6, xmm8
+ movdqa xmm8, xmm7
+ psrld xmm8, 7
+ pslld xmm7, 25
+ por xmm7, xmm8
+ movdqa xmm8, xmm4
+ psrld xmm8, 7
+ pslld xmm4, 25
+ por xmm4, xmm8
+ pxor xmm4, xmm12
+ pxor xmm5, xmm13
+ pxor xmm6, xmm14
+ pxor xmm7, xmm15
+ mov eax, r13d
+ jne innerloop4
+ movdqa xmm9, xmm0
+ punpckldq xmm0, xmm1
+ punpckhdq xmm9, xmm1
+ movdqa xmm11, xmm2
+ punpckldq xmm2, xmm3
+ punpckhdq xmm11, xmm3
+ movdqa xmm1, xmm0
+ punpcklqdq xmm0, xmm2
+ punpckhqdq xmm1, xmm2
+ movdqa xmm3, xmm9
+ punpcklqdq xmm9, xmm11
+ punpckhqdq xmm3, xmm11
+ movdqu xmmword ptr [rbx], xmm0
+ movdqu xmmword ptr [rbx+20H], xmm1
+ movdqu xmmword ptr [rbx+40H], xmm9
+ movdqu xmmword ptr [rbx+60H], xmm3
+ movdqa xmm9, xmm4
+ punpckldq xmm4, xmm5
+ punpckhdq xmm9, xmm5
+ movdqa xmm11, xmm6
+ punpckldq xmm6, xmm7
+ punpckhdq xmm11, xmm7
+ movdqa xmm5, xmm4
+ punpcklqdq xmm4, xmm6
+ punpckhqdq xmm5, xmm6
+ movdqa xmm7, xmm9
+ punpcklqdq xmm9, xmm11
+ punpckhqdq xmm7, xmm11
+ movdqu xmmword ptr [rbx+10H], xmm4
+ movdqu xmmword ptr [rbx+30H], xmm5
+ movdqu xmmword ptr [rbx+50H], xmm9
+ movdqu xmmword ptr [rbx+70H], xmm7
+ movdqa xmm1, xmmword ptr [rsp+110H]
+ movdqa xmm0, xmm1
+ paddd xmm1, xmmword ptr [rsp+150H]
+ movdqa xmmword ptr [rsp+110H], xmm1
+ pxor xmm0, xmmword ptr [CMP_MSB_MASK]
+ pxor xmm1, xmmword ptr [CMP_MSB_MASK]
+ pcmpgtd xmm0, xmm1
+ movdqa xmm1, xmmword ptr [rsp+120H]
+ psubd xmm1, xmm0
+ movdqa xmmword ptr [rsp+120H], xmm1
+ add rbx, 128
+ add rdi, 32
+ sub rsi, 4
+ cmp rsi, 4
+ jnc outerloop4
+ test rsi, rsi
+ jne final3blocks
+unwind:
+ movdqa xmm6, xmmword ptr [rsp+170H]
+ movdqa xmm7, xmmword ptr [rsp+180H]
+ movdqa xmm8, xmmword ptr [rsp+190H]
+ movdqa xmm9, xmmword ptr [rsp+1A0H]
+ movdqa xmm10, xmmword ptr [rsp+1B0H]
+ movdqa xmm11, xmmword ptr [rsp+1C0H]
+ movdqa xmm12, xmmword ptr [rsp+1D0H]
+ movdqa xmm13, xmmword ptr [rsp+1E0H]
+ movdqa xmm14, xmmword ptr [rsp+1F0H]
+ movdqa xmm15, xmmword ptr [rsp+200H]
+ mov rsp, rbp
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ pop r12
+ pop r13
+ pop r14
+ pop r15
+ ret
+ALIGN 16
+final3blocks:
+ test esi, 2H
+ je final1block
+ movups xmm0, xmmword ptr [rcx]
+ movups xmm1, xmmword ptr [rcx+10H]
+ movaps xmm8, xmm0
+ movaps xmm9, xmm1
+ movd xmm13, dword ptr [rsp+110H]
+ pinsrd xmm13, dword ptr [rsp+120H], 1
+ pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN], 2
+ movaps xmmword ptr [rsp], xmm13
+ movd xmm14, dword ptr [rsp+114H]
+ pinsrd xmm14, dword ptr [rsp+124H], 1
+ pinsrd xmm14, dword ptr [BLAKE3_BLOCK_LEN], 2
+ movaps xmmword ptr [rsp+10H], xmm14
+ mov r8, qword ptr [rdi]
+ mov r9, qword ptr [rdi+8H]
+ movzx eax, byte ptr [rbp+80H]
+ or eax, r13d
+ xor edx, edx
+innerloop2:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ movaps xmm2, xmmword ptr [BLAKE3_IV]
+ movaps xmm10, xmm2
+ movups xmm4, xmmword ptr [r8+rdx-40H]
+ movups xmm5, xmmword ptr [r8+rdx-30H]
+ movaps xmm3, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm3, xmm5, 221
+ movaps xmm5, xmm3
+ movups xmm6, xmmword ptr [r8+rdx-20H]
+ movups xmm7, xmmword ptr [r8+rdx-10H]
+ movaps xmm3, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 93H
+ shufps xmm3, xmm7, 221
+ pshufd xmm7, xmm3, 93H
+ movups xmm12, xmmword ptr [r9+rdx-40H]
+ movups xmm13, xmmword ptr [r9+rdx-30H]
+ movaps xmm11, xmm12
+ shufps xmm12, xmm13, 136
+ shufps xmm11, xmm13, 221
+ movaps xmm13, xmm11
+ movups xmm14, xmmword ptr [r9+rdx-20H]
+ movups xmm15, xmmword ptr [r9+rdx-10H]
+ movaps xmm11, xmm14
+ shufps xmm14, xmm15, 136
+ pshufd xmm14, xmm14, 93H
+ shufps xmm11, xmm15, 221
+ pshufd xmm15, xmm11, 93H
+ movaps xmm3, xmmword ptr [rsp]
+ movaps xmm11, xmmword ptr [rsp+10H]
+ pinsrd xmm3, eax, 3
+ pinsrd xmm11, eax, 3
+ mov al, 7
+roundloop2:
+ paddd xmm0, xmm4
+ paddd xmm8, xmm12
+ movaps xmmword ptr [rsp+20H], xmm4
+ movaps xmmword ptr [rsp+30H], xmm12
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ movaps xmm12, xmmword ptr [ROT16]
+ pshufb xmm3, xmm12
+ pshufb xmm11, xmm12
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 20
+ psrld xmm4, 12
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 20
+ psrld xmm4, 12
+ por xmm9, xmm4
+ paddd xmm0, xmm5
+ paddd xmm8, xmm13
+ movaps xmmword ptr [rsp+40H], xmm5
+ movaps xmmword ptr [rsp+50H], xmm13
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ movaps xmm13, xmmword ptr [ROT8]
+ pshufb xmm3, xmm13
+ pshufb xmm11, xmm13
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 25
+ psrld xmm4, 7
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 25
+ psrld xmm4, 7
+ por xmm9, xmm4
+ pshufd xmm0, xmm0, 93H
+ pshufd xmm8, xmm8, 93H
+ pshufd xmm3, xmm3, 4EH
+ pshufd xmm11, xmm11, 4EH
+ pshufd xmm2, xmm2, 39H
+ pshufd xmm10, xmm10, 39H
+ paddd xmm0, xmm6
+ paddd xmm8, xmm14
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ pshufb xmm3, xmm12
+ pshufb xmm11, xmm12
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 20
+ psrld xmm4, 12
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 20
+ psrld xmm4, 12
+ por xmm9, xmm4
+ paddd xmm0, xmm7
+ paddd xmm8, xmm15
+ paddd xmm0, xmm1
+ paddd xmm8, xmm9
+ pxor xmm3, xmm0
+ pxor xmm11, xmm8
+ pshufb xmm3, xmm13
+ pshufb xmm11, xmm13
+ paddd xmm2, xmm3
+ paddd xmm10, xmm11
+ pxor xmm1, xmm2
+ pxor xmm9, xmm10
+ movdqa xmm4, xmm1
+ pslld xmm1, 25
+ psrld xmm4, 7
+ por xmm1, xmm4
+ movdqa xmm4, xmm9
+ pslld xmm9, 25
+ psrld xmm4, 7
+ por xmm9, xmm4
+ pshufd xmm0, xmm0, 39H
+ pshufd xmm8, xmm8, 39H
+ pshufd xmm3, xmm3, 4EH
+ pshufd xmm11, xmm11, 4EH
+ pshufd xmm2, xmm2, 93H
+ pshufd xmm10, xmm10, 93H
+ dec al
+ je endroundloop2
+ movdqa xmm12, xmmword ptr [rsp+20H]
+ movdqa xmm5, xmmword ptr [rsp+40H]
+ pshufd xmm13, xmm12, 0FH
+ shufps xmm12, xmm5, 214
+ pshufd xmm4, xmm12, 39H
+ movdqa xmm12, xmm6
+ shufps xmm12, xmm7, 250
+ pblendw xmm13, xmm12, 0CCH
+ movdqa xmm12, xmm7
+ punpcklqdq xmm12, xmm5
+ pblendw xmm12, xmm6, 0C0H
+ pshufd xmm12, xmm12, 78H
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 1EH
+ movdqa xmmword ptr [rsp+20H], xmm13
+ movdqa xmmword ptr [rsp+40H], xmm12
+ movdqa xmm5, xmmword ptr [rsp+30H]
+ movdqa xmm13, xmmword ptr [rsp+50H]
+ pshufd xmm6, xmm5, 0FH
+ shufps xmm5, xmm13, 214
+ pshufd xmm12, xmm5, 39H
+ movdqa xmm5, xmm14
+ shufps xmm5, xmm15, 250
+ pblendw xmm6, xmm5, 0CCH
+ movdqa xmm5, xmm15
+ punpcklqdq xmm5, xmm13
+ pblendw xmm5, xmm14, 0C0H
+ pshufd xmm5, xmm5, 78H
+ punpckhdq xmm13, xmm15
+ punpckldq xmm14, xmm13
+ pshufd xmm15, xmm14, 1EH
+ movdqa xmm13, xmm6
+ movdqa xmm14, xmm5
+ movdqa xmm5, xmmword ptr [rsp+20H]
+ movdqa xmm6, xmmword ptr [rsp+40H]
+ jmp roundloop2
+endroundloop2:
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ pxor xmm8, xmm10
+ pxor xmm9, xmm11
+ mov eax, r13d
+ cmp rdx, r15
+ jne innerloop2
+ movups xmmword ptr [rbx], xmm0
+ movups xmmword ptr [rbx+10H], xmm1
+ movups xmmword ptr [rbx+20H], xmm8
+ movups xmmword ptr [rbx+30H], xmm9
+ movdqa xmm0, xmmword ptr [rsp+130H]
+ movdqa xmm1, xmmword ptr [rsp+110H]
+ movdqa xmm2, xmmword ptr [rsp+120H]
+ movdqu xmm3, xmmword ptr [rsp+118H]
+ movdqu xmm4, xmmword ptr [rsp+128H]
+ blendvps xmm1, xmm3, xmm0
+ blendvps xmm2, xmm4, xmm0
+ movdqa xmmword ptr [rsp+110H], xmm1
+ movdqa xmmword ptr [rsp+120H], xmm2
+ add rdi, 16
+ add rbx, 64
+ sub rsi, 2
+final1block:
+ test esi, 1H
+ je unwind
+ movups xmm0, xmmword ptr [rcx]
+ movups xmm1, xmmword ptr [rcx+10H]
+ movd xmm13, dword ptr [rsp+110H]
+ pinsrd xmm13, dword ptr [rsp+120H], 1
+ pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN], 2
+ movaps xmm14, xmmword ptr [ROT8]
+ movaps xmm15, xmmword ptr [ROT16]
+ mov r8, qword ptr [rdi]
+ movzx eax, byte ptr [rbp+80H]
+ or eax, r13d
+ xor edx, edx
+innerloop1:
+ mov r14d, eax
+ or eax, r12d
+ add rdx, 64
+ cmp rdx, r15
+ cmovne eax, r14d
+ movaps xmm2, xmmword ptr [BLAKE3_IV]
+ movaps xmm3, xmm13
+ pinsrd xmm3, eax, 3
+ movups xmm4, xmmword ptr [r8+rdx-40H]
+ movups xmm5, xmmword ptr [r8+rdx-30H]
+ movaps xmm8, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm8, xmm5, 221
+ movaps xmm5, xmm8
+ movups xmm6, xmmword ptr [r8+rdx-20H]
+ movups xmm7, xmmword ptr [r8+rdx-10H]
+ movaps xmm8, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 93H
+ shufps xmm8, xmm7, 221
+ pshufd xmm7, xmm8, 93H
+ mov al, 7
+roundloop1:
+ paddd xmm0, xmm4
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm15
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm5
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 93H
+ pshufd xmm3, xmm3, 4EH
+ pshufd xmm2, xmm2, 39H
+ paddd xmm0, xmm6
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm15
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm7
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 39H
+ pshufd xmm3, xmm3, 4EH
+ pshufd xmm2, xmm2, 93H
+ dec al
+ jz endroundloop1
+ movdqa xmm8, xmm4
+ shufps xmm8, xmm5, 214
+ pshufd xmm9, xmm4, 0FH
+ pshufd xmm4, xmm8, 39H
+ movdqa xmm8, xmm6
+ shufps xmm8, xmm7, 250
+ pblendw xmm9, xmm8, 0CCH
+ movdqa xmm8, xmm7
+ punpcklqdq xmm8, xmm5
+ pblendw xmm8, xmm6, 0C0H
+ pshufd xmm8, xmm8, 78H
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 1EH
+ movdqa xmm5, xmm9
+ movdqa xmm6, xmm8
+ jmp roundloop1
+endroundloop1:
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ mov eax, r13d
+ cmp rdx, r15
+ jne innerloop1
+ movups xmmword ptr [rbx], xmm0
+ movups xmmword ptr [rbx+10H], xmm1
+ jmp unwind
+_blake3_hash_many_sse41 ENDP
+blake3_hash_many_sse41 ENDP
+
+blake3_compress_in_place_sse41 PROC
+_blake3_compress_in_place_sse41 PROC
+ sub rsp, 120
+ movdqa xmmword ptr [rsp], xmm6
+ movdqa xmmword ptr [rsp+10H], xmm7
+ movdqa xmmword ptr [rsp+20H], xmm8
+ movdqa xmmword ptr [rsp+30H], xmm9
+ movdqa xmmword ptr [rsp+40H], xmm11
+ movdqa xmmword ptr [rsp+50H], xmm14
+ movdqa xmmword ptr [rsp+60H], xmm15
+ movups xmm0, xmmword ptr [rcx]
+ movups xmm1, xmmword ptr [rcx+10H]
+ movaps xmm2, xmmword ptr [BLAKE3_IV]
+ movzx eax, byte ptr [rsp+0A0H]
+ movzx r8d, r8b
+ shl rax, 32
+ add r8, rax
+ movd xmm3, r9
+ movd xmm4, r8
+ punpcklqdq xmm3, xmm4
+ movups xmm4, xmmword ptr [rdx]
+ movups xmm5, xmmword ptr [rdx+10H]
+ movaps xmm8, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm8, xmm5, 221
+ movaps xmm5, xmm8
+ movups xmm6, xmmword ptr [rdx+20H]
+ movups xmm7, xmmword ptr [rdx+30H]
+ movaps xmm8, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 93H
+ shufps xmm8, xmm7, 221
+ pshufd xmm7, xmm8, 93H
+ movaps xmm14, xmmword ptr [ROT8]
+ movaps xmm15, xmmword ptr [ROT16]
+ mov al, 7
+@@:
+ paddd xmm0, xmm4
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm15
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm5
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 93H
+ pshufd xmm3, xmm3, 4EH
+ pshufd xmm2, xmm2, 39H
+ paddd xmm0, xmm6
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm15
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm7
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 39H
+ pshufd xmm3, xmm3, 4EH
+ pshufd xmm2, xmm2, 93H
+ dec al
+ jz @F
+ movdqa xmm8, xmm4
+ shufps xmm8, xmm5, 214
+ pshufd xmm9, xmm4, 0FH
+ pshufd xmm4, xmm8, 39H
+ movdqa xmm8, xmm6
+ shufps xmm8, xmm7, 250
+ pblendw xmm9, xmm8, 0CCH
+ movdqa xmm8, xmm7
+ punpcklqdq xmm8, xmm5
+ pblendw xmm8, xmm6, 0C0H
+ pshufd xmm8, xmm8, 78H
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 1EH
+ movdqa xmm5, xmm9
+ movdqa xmm6, xmm8
+ jmp @B
+@@:
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ movups xmmword ptr [rcx], xmm0
+ movups xmmword ptr [rcx+10H], xmm1
+ movdqa xmm6, xmmword ptr [rsp]
+ movdqa xmm7, xmmword ptr [rsp+10H]
+ movdqa xmm8, xmmword ptr [rsp+20H]
+ movdqa xmm9, xmmword ptr [rsp+30H]
+ movdqa xmm11, xmmword ptr [rsp+40H]
+ movdqa xmm14, xmmword ptr [rsp+50H]
+ movdqa xmm15, xmmword ptr [rsp+60H]
+ add rsp, 120
+ ret
+_blake3_compress_in_place_sse41 ENDP
+blake3_compress_in_place_sse41 ENDP
+
+ALIGN 16
+blake3_compress_xof_sse41 PROC
+_blake3_compress_xof_sse41 PROC
+ sub rsp, 120
+ movdqa xmmword ptr [rsp], xmm6
+ movdqa xmmword ptr [rsp+10H], xmm7
+ movdqa xmmword ptr [rsp+20H], xmm8
+ movdqa xmmword ptr [rsp+30H], xmm9
+ movdqa xmmword ptr [rsp+40H], xmm11
+ movdqa xmmword ptr [rsp+50H], xmm14
+ movdqa xmmword ptr [rsp+60H], xmm15
+ movups xmm0, xmmword ptr [rcx]
+ movups xmm1, xmmword ptr [rcx+10H]
+ movaps xmm2, xmmword ptr [BLAKE3_IV]
+ movzx eax, byte ptr [rsp+0A0H]
+ movzx r8d, r8b
+ mov r10, qword ptr [rsp+0A8H]
+ shl rax, 32
+ add r8, rax
+ movd xmm3, r9
+ movd xmm4, r8
+ punpcklqdq xmm3, xmm4
+ movups xmm4, xmmword ptr [rdx]
+ movups xmm5, xmmword ptr [rdx+10H]
+ movaps xmm8, xmm4
+ shufps xmm4, xmm5, 136
+ shufps xmm8, xmm5, 221
+ movaps xmm5, xmm8
+ movups xmm6, xmmword ptr [rdx+20H]
+ movups xmm7, xmmword ptr [rdx+30H]
+ movaps xmm8, xmm6
+ shufps xmm6, xmm7, 136
+ pshufd xmm6, xmm6, 93H
+ shufps xmm8, xmm7, 221
+ pshufd xmm7, xmm8, 93H
+ movaps xmm14, xmmword ptr [ROT8]
+ movaps xmm15, xmmword ptr [ROT16]
+ mov al, 7
+@@:
+ paddd xmm0, xmm4
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm15
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm5
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 93H
+ pshufd xmm3, xmm3, 4EH
+ pshufd xmm2, xmm2, 39H
+ paddd xmm0, xmm6
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm15
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 20
+ psrld xmm11, 12
+ por xmm1, xmm11
+ paddd xmm0, xmm7
+ paddd xmm0, xmm1
+ pxor xmm3, xmm0
+ pshufb xmm3, xmm14
+ paddd xmm2, xmm3
+ pxor xmm1, xmm2
+ movdqa xmm11, xmm1
+ pslld xmm1, 25
+ psrld xmm11, 7
+ por xmm1, xmm11
+ pshufd xmm0, xmm0, 39H
+ pshufd xmm3, xmm3, 4EH
+ pshufd xmm2, xmm2, 93H
+ dec al
+ jz @F
+ movdqa xmm8, xmm4
+ shufps xmm8, xmm5, 214
+ pshufd xmm9, xmm4, 0FH
+ pshufd xmm4, xmm8, 39H
+ movdqa xmm8, xmm6
+ shufps xmm8, xmm7, 250
+ pblendw xmm9, xmm8, 0CCH
+ movdqa xmm8, xmm7
+ punpcklqdq xmm8, xmm5
+ pblendw xmm8, xmm6, 0C0H
+ pshufd xmm8, xmm8, 78H
+ punpckhdq xmm5, xmm7
+ punpckldq xmm6, xmm5
+ pshufd xmm7, xmm6, 1EH
+ movdqa xmm5, xmm9
+ movdqa xmm6, xmm8
+ jmp @B
+@@:
+ movdqu xmm4, xmmword ptr [rcx]
+ movdqu xmm5, xmmword ptr [rcx+10H]
+ pxor xmm0, xmm2
+ pxor xmm1, xmm3
+ pxor xmm2, xmm4
+ pxor xmm3, xmm5
+ movups xmmword ptr [r10], xmm0
+ movups xmmword ptr [r10+10H], xmm1
+ movups xmmword ptr [r10+20H], xmm2
+ movups xmmword ptr [r10+30H], xmm3
+ movdqa xmm6, xmmword ptr [rsp]
+ movdqa xmm7, xmmword ptr [rsp+10H]
+ movdqa xmm8, xmmword ptr [rsp+20H]
+ movdqa xmm9, xmmword ptr [rsp+30H]
+ movdqa xmm11, xmmword ptr [rsp+40H]
+ movdqa xmm14, xmmword ptr [rsp+50H]
+ movdqa xmm15, xmmword ptr [rsp+60H]
+ add rsp, 120
+ ret
+_blake3_compress_xof_sse41 ENDP
+blake3_compress_xof_sse41 ENDP
+
+_TEXT ENDS
+
+
+_RDATA SEGMENT READONLY PAGE ALIAS(".rdata") 'CONST'
+ALIGN 64
+BLAKE3_IV:
+ dd 6A09E667H, 0BB67AE85H, 3C6EF372H, 0A54FF53AH
+
+ADD0:
+ dd 0, 1, 2, 3
+
+ADD1:
+ dd 4 dup (4)
+
+BLAKE3_IV_0:
+ dd 4 dup (6A09E667H)
+
+BLAKE3_IV_1:
+ dd 4 dup (0BB67AE85H)
+
+BLAKE3_IV_2:
+ dd 4 dup (3C6EF372H)
+
+BLAKE3_IV_3:
+ dd 4 dup (0A54FF53AH)
+
+BLAKE3_BLOCK_LEN:
+ dd 4 dup (64)
+
+ROT16:
+ db 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13
+
+ROT8:
+ db 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12
+
+CMP_MSB_MASK:
+ dd 8 dup(80000000H)
+
+_RDATA ENDS
+END
+
diff --git a/llvm/lib/Support/BinaryStreamWriter.cpp b/llvm/lib/Support/BinaryStreamWriter.cpp
index 8c9efa0ed9a9..dc4ea200c7be 100644
--- a/llvm/lib/Support/BinaryStreamWriter.cpp
+++ b/llvm/lib/Support/BinaryStreamWriter.cpp
@@ -8,7 +8,6 @@
#include "llvm/Support/BinaryStreamWriter.h"
-#include "llvm/Support/BinaryStreamError.h"
#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/BinaryStreamRef.h"
#include "llvm/Support/LEB128.h"
@@ -94,10 +93,11 @@ BinaryStreamWriter::split(uint64_t Off) const {
Error BinaryStreamWriter::padToAlignment(uint32_t Align) {
uint64_t NewOffset = alignTo(Offset, Align);
- if (NewOffset > getLength())
- return make_error<BinaryStreamError>(stream_error_code::stream_too_short);
+ const uint64_t ZerosSize = 64;
+ static constexpr char Zeros[ZerosSize] = {};
while (Offset < NewOffset)
- if (auto EC = writeInteger('\0'))
- return EC;
+ if (auto E = writeArray(
+ ArrayRef<char>(Zeros, std::min(ZerosSize, NewOffset - Offset))))
+ return E;
return Error::success();
}
diff --git a/llvm/lib/Support/CSKYAttributeParser.cpp b/llvm/lib/Support/CSKYAttributeParser.cpp
new file mode 100644
index 000000000000..ea1ac9232315
--- /dev/null
+++ b/llvm/lib/Support/CSKYAttributeParser.cpp
@@ -0,0 +1,155 @@
+//===-- CSKYAttributeParser.cpp - CSKY Attribute Parser -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/CSKYAttributeParser.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Errc.h"
+
+using namespace llvm;
+
+const CSKYAttributeParser::DisplayHandler
+ CSKYAttributeParser::displayRoutines[] = {
+ {
+ CSKYAttrs::CSKY_ARCH_NAME,
+ &ELFAttributeParser::stringAttribute,
+ },
+ {
+ CSKYAttrs::CSKY_CPU_NAME,
+ &ELFAttributeParser::stringAttribute,
+ },
+ {
+ CSKYAttrs::CSKY_ISA_FLAGS,
+ &ELFAttributeParser::integerAttribute,
+ },
+ {
+ CSKYAttrs::CSKY_ISA_EXT_FLAGS,
+ &ELFAttributeParser::integerAttribute,
+ },
+ {
+ CSKYAttrs::CSKY_DSP_VERSION,
+ &CSKYAttributeParser::dspVersion,
+ },
+ {
+ CSKYAttrs::CSKY_VDSP_VERSION,
+ &CSKYAttributeParser::vdspVersion,
+ },
+ {
+ CSKYAttrs::CSKY_FPU_VERSION,
+ &CSKYAttributeParser::fpuVersion,
+ },
+ {
+ CSKYAttrs::CSKY_FPU_ABI,
+ &CSKYAttributeParser::fpuABI,
+ },
+ {
+ CSKYAttrs::CSKY_FPU_ROUNDING,
+ &CSKYAttributeParser::fpuRounding,
+ },
+ {
+ CSKYAttrs::CSKY_FPU_DENORMAL,
+ &CSKYAttributeParser::fpuDenormal,
+ },
+ {
+ CSKYAttrs::CSKY_FPU_EXCEPTION,
+ &CSKYAttributeParser::fpuException,
+ },
+ {
+ CSKYAttrs::CSKY_FPU_NUMBER_MODULE,
+ &ELFAttributeParser::stringAttribute,
+ },
+ {
+ CSKYAttrs::CSKY_FPU_HARDFP,
+ &CSKYAttributeParser::fpuHardFP,
+ }};
+
+Error CSKYAttributeParser::handler(uint64_t tag, bool &handled) {
+ handled = false;
+ for (unsigned AHI = 0, AHE = array_lengthof(displayRoutines); AHI != AHE;
+ ++AHI) {
+ if (uint64_t(displayRoutines[AHI].attribute) == tag) {
+ if (Error e = (this->*displayRoutines[AHI].routine)(tag))
+ return e;
+ handled = true;
+ break;
+ }
+ }
+
+ return Error::success();
+}
+
+Error CSKYAttributeParser::dspVersion(unsigned tag) {
+ static const char *strings[] = {"Error", "DSP Extension", "DSP 2.0"};
+ return parseStringAttribute("Tag_CSKY_DSP_VERSION", tag,
+ makeArrayRef(strings));
+}
+
+Error CSKYAttributeParser::vdspVersion(unsigned tag) {
+ static const char *strings[] = {"Error", "VDSP Version 1", "VDSP Version 2"};
+ return parseStringAttribute("Tag_CSKY_VDSP_VERSION", tag,
+ makeArrayRef(strings));
+}
+
+Error CSKYAttributeParser::fpuVersion(unsigned tag) {
+ static const char *strings[] = {"Error", "FPU Version 1", "FPU Version 2",
+ "FPU Version 3"};
+ return parseStringAttribute("Tag_CSKY_FPU_VERSION", tag,
+ makeArrayRef(strings));
+}
+
+Error CSKYAttributeParser::fpuABI(unsigned tag) {
+ static const char *strings[] = {"Error", "Soft", "SoftFP", "Hard"};
+ return parseStringAttribute("Tag_CSKY_FPU_ABI", tag, makeArrayRef(strings));
+}
+
+Error CSKYAttributeParser::fpuRounding(unsigned tag) {
+ static const char *strings[] = {"None", "Needed"};
+ return parseStringAttribute("Tag_CSKY_FPU_ROUNDING", tag,
+ makeArrayRef(strings));
+}
+
+Error CSKYAttributeParser::fpuDenormal(unsigned tag) {
+ static const char *strings[] = {"None", "Needed"};
+ return parseStringAttribute("Tag_CSKY_FPU_DENORMAL", tag,
+ makeArrayRef(strings));
+}
+
+Error CSKYAttributeParser::fpuException(unsigned tag) {
+ static const char *strings[] = {"None", "Needed"};
+ return parseStringAttribute("Tag_CSKY_FPU_EXCEPTION", tag,
+ makeArrayRef(strings));
+}
+
+Error CSKYAttributeParser::fpuHardFP(unsigned tag) {
+ uint64_t value = de.getULEB128(cursor);
+ ListSeparator LS(" ");
+
+ std::string description;
+
+ if (value & 0x1) {
+ description += LS;
+ description += "Half";
+ }
+ if ((value >> 1) & 0x1) {
+ description += LS;
+ description += "Single";
+ }
+ if ((value >> 2) & 0x1) {
+ description += LS;
+ description += "Double";
+ }
+
+ if (description.empty()) {
+ printAttribute(tag, value, "");
+ return createStringError(errc::invalid_argument,
+ "unknown Tag_CSKY_FPU_HARDFP value: " +
+ Twine(value));
+ }
+
+ printAttribute(tag, value, description);
+ return Error::success();
+}
diff --git a/llvm/lib/Support/CSKYAttributes.cpp b/llvm/lib/Support/CSKYAttributes.cpp
new file mode 100644
index 000000000000..6130517e44e3
--- /dev/null
+++ b/llvm/lib/Support/CSKYAttributes.cpp
@@ -0,0 +1,33 @@
+//===-- CSKYAttributes.cpp - CSKY Attributes ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/CSKYAttributes.h"
+
+using namespace llvm;
+using namespace llvm::CSKYAttrs;
+
+static const TagNameItem tagData[] = {
+ {CSKY_ARCH_NAME, "Tag_CSKY_ARCH_NAME"},
+ {CSKY_CPU_NAME, "Tag_CSKY_CPU_NAME"},
+ {CSKY_CPU_NAME, "Tag_CSKY_CPU_NAME"},
+ {CSKY_ISA_FLAGS, "Tag_CSKY_ISA_FLAGS"},
+ {CSKY_ISA_EXT_FLAGS, "Tag_CSKY_ISA_EXT_FLAGS"},
+ {CSKY_DSP_VERSION, "Tag_CSKY_DSP_VERSION"},
+ {CSKY_VDSP_VERSION, "Tag_CSKY_VDSP_VERSION"},
+ {CSKY_FPU_VERSION, "Tag_CSKY_FPU_VERSION"},
+ {CSKY_FPU_ABI, "Tag_CSKY_FPU_ABI"},
+ {CSKY_FPU_ROUNDING, "Tag_CSKY_FPU_ROUNDING"},
+ {CSKY_FPU_DENORMAL, "Tag_CSKY_FPU_DENORMAL"},
+ {CSKY_FPU_EXCEPTION, "Tag_CSKY_FPU_EXCEPTION"},
+ {CSKY_FPU_NUMBER_MODULE, "Tag_CSKY_FPU_NUMBER_MODULE"},
+ {CSKY_FPU_HARDFP, "Tag_CSKY_FPU_HARDFP"}};
+
+constexpr TagNameMap CSKYAttributeTags{tagData};
+const TagNameMap &llvm::CSKYAttrs::getCSKYAttributeTags() {
+ return CSKYAttributeTags;
+}
diff --git a/llvm/lib/Support/CSKYTargetParser.cpp b/llvm/lib/Support/CSKYTargetParser.cpp
new file mode 100644
index 000000000000..7e9d2ca0428d
--- /dev/null
+++ b/llvm/lib/Support/CSKYTargetParser.cpp
@@ -0,0 +1,181 @@
+//===-- TargetParser - Parser for target features ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a target parser to recognise CSKY hardware features
+// such as CPU/ARCH names.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/CSKYTargetParser.h"
+#include "llvm/ADT/StringSwitch.h"
+
+using namespace llvm;
+
+bool CSKY::getFPUFeatures(CSKYFPUKind CSKYFPUKind,
+ std::vector<StringRef> &Features) {
+
+ if (CSKYFPUKind >= FK_LAST || CSKYFPUKind == FK_INVALID)
+ return false;
+
+ switch (CSKYFPUKind) {
+ case FK_AUTO:
+ Features.push_back("+fpuv2_sf");
+ Features.push_back("+fpuv2_df");
+ Features.push_back("+fdivdu");
+ break;
+ case FK_FPV2:
+ Features.push_back("+fpuv2_sf");
+ Features.push_back("+fpuv2_df");
+ break;
+ case FK_FPV2_DIVD:
+ Features.push_back("+fpuv2_sf");
+ Features.push_back("+fpuv2_df");
+ Features.push_back("+fdivdu");
+ break;
+ case FK_FPV2_SF:
+ Features.push_back("+fpuv2_sf");
+ break;
+ case FK_FPV3:
+ Features.push_back("+fpuv3_hf");
+ Features.push_back("+fpuv3_hi");
+ Features.push_back("+fpuv3_sf");
+ Features.push_back("+fpuv3_df");
+ break;
+ case FK_FPV3_HF:
+ Features.push_back("+fpuv3_hf");
+ Features.push_back("+fpuv3_hi");
+ break;
+ case FK_FPV3_HSF:
+ Features.push_back("+fpuv3_hf");
+ Features.push_back("+fpuv3_hi");
+ Features.push_back("+fpuv3_sf");
+ break;
+ case FK_FPV3_SDF:
+ Features.push_back("+fpuv3_sf");
+ Features.push_back("+fpuv3_df");
+ break;
+ default:
+ llvm_unreachable("Unknown FPU Kind");
+ return false;
+ }
+
+ return true;
+}
+
+// ======================================================= //
+// Information by ID
+// ======================================================= //
+
+StringRef CSKY::getArchName(ArchKind AK) {
+ return ARCHNames[static_cast<unsigned>(AK)].getName();
+}
+
+// The default cpu's name is same as arch name.
+StringRef CSKY::getDefaultCPU(StringRef Arch) {
+ ArchKind AK = parseArch(Arch);
+ if (AK == CSKY::ArchKind::INVALID)
+ return StringRef();
+
+ return Arch;
+}
+
+// ======================================================= //
+// Parsers
+// ======================================================= //
+CSKY::ArchKind CSKY::parseArch(StringRef Arch) {
+ for (const auto A : ARCHNames) {
+ if (A.getName() == Arch)
+ return A.ID;
+ }
+
+ return CSKY::ArchKind::INVALID;
+}
+
+CSKY::ArchKind CSKY::parseCPUArch(StringRef CPU) {
+ for (const auto C : CPUNames) {
+ if (CPU == C.getName())
+ return C.ArchID;
+ }
+
+ return CSKY::ArchKind::INVALID;
+}
+
+uint64_t CSKY::parseArchExt(StringRef ArchExt) {
+ for (const auto &A : CSKYARCHExtNames) {
+ if (ArchExt == A.getName())
+ return A.ID;
+ }
+ return AEK_INVALID;
+}
+
+void CSKY::fillValidCPUArchList(SmallVectorImpl<StringRef> &Values) {
+ for (const CpuNames<CSKY::ArchKind> &Arch : CPUNames) {
+ if (Arch.ArchID != CSKY::ArchKind::INVALID)
+ Values.push_back(Arch.getName());
+ }
+}
+
+StringRef CSKY::getFPUName(unsigned FPUKind) {
+ if (FPUKind >= FK_LAST)
+ return StringRef();
+ return FPUNames[FPUKind].getName();
+}
+
+CSKY::FPUVersion CSKY::getFPUVersion(unsigned FPUKind) {
+ if (FPUKind >= FK_LAST)
+ return FPUVersion::NONE;
+ return FPUNames[FPUKind].FPUVer;
+}
+
+uint64_t CSKY::getDefaultExtensions(StringRef CPU) {
+ return StringSwitch<uint64_t>(CPU)
+#define CSKY_CPU_NAME(NAME, ID, DEFAULT_EXT) \
+ .Case(NAME, ARCHNames[static_cast<unsigned>(ArchKind::ID)].archBaseExt | \
+ DEFAULT_EXT)
+#include "llvm/Support/CSKYTargetParser.def"
+ .Default(CSKY::AEK_INVALID);
+}
+
+StringRef CSKY::getArchExtName(uint64_t ArchExtKind) {
+ for (const auto &AE : CSKYARCHExtNames)
+ if (ArchExtKind == AE.ID)
+ return AE.getName();
+ return StringRef();
+}
+
+static bool stripNegationPrefix(StringRef &Name) {
+ if (Name.startswith("no")) {
+ Name = Name.substr(2);
+ return true;
+ }
+ return false;
+}
+
+StringRef CSKY::getArchExtFeature(StringRef ArchExt) {
+ bool Negated = stripNegationPrefix(ArchExt);
+ for (const auto &AE : CSKYARCHExtNames) {
+ if (AE.Feature && ArchExt == AE.getName())
+ return StringRef(Negated ? AE.NegFeature : AE.Feature);
+ }
+
+ return StringRef();
+}
+
+bool CSKY::getExtensionFeatures(uint64_t Extensions,
+ std::vector<StringRef> &Features) {
+ if (Extensions == CSKY::AEK_INVALID)
+ return false;
+
+ for (const auto &AE : CSKYARCHExtNames) {
+ if ((Extensions & AE.ID) == AE.ID && AE.Feature)
+ Features.push_back(AE.Feature);
+ }
+
+ return true;
+}
diff --git a/llvm/lib/Support/CodeGenCoverage.cpp b/llvm/lib/Support/CodeGenCoverage.cpp
index 73e0fb3edce8..d5ab77b9c66f 100644
--- a/llvm/lib/Support/CodeGenCoverage.cpp
+++ b/llvm/lib/Support/CodeGenCoverage.cpp
@@ -23,7 +23,7 @@ using namespace llvm;
static sys::SmartMutex<true> OutputMutex;
-CodeGenCoverage::CodeGenCoverage() {}
+CodeGenCoverage::CodeGenCoverage() = default;
void CodeGenCoverage::setCovered(uint64_t RuleID) {
if (RuleCoverage.size() <= RuleID)
diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp
index 71a6ebf2a72e..eb6c04d987b3 100644
--- a/llvm/lib/Support/CommandLine.cpp
+++ b/llvm/lib/Support/CommandLine.cpp
@@ -166,7 +166,7 @@ public:
// This collects the different subcommands that have been registered.
SmallPtrSet<SubCommand *, 4> RegisteredSubCommands;
- CommandLineParser() : ActiveSubCommand(nullptr) {
+ CommandLineParser() {
registerSubCommand(&*TopLevelSubCommand);
registerSubCommand(&*AllSubCommands);
}
@@ -418,7 +418,7 @@ public:
}
private:
- SubCommand *ActiveSubCommand;
+ SubCommand *ActiveSubCommand = nullptr;
Option *LookupOption(SubCommand &Sub, StringRef &Arg, StringRef &Value);
Option *LookupLongOption(SubCommand &Sub, StringRef &Arg, StringRef &Value,
@@ -918,21 +918,34 @@ static size_t parseBackslash(StringRef Src, size_t I, SmallString<128> &Token) {
return I - 1;
}
-// Windows treats whitespace, double quotes, and backslashes specially.
+// Windows treats whitespace, double quotes, and backslashes specially, except
+// when parsing the first token of a full command line, in which case
+// backslashes are not special.
static bool isWindowsSpecialChar(char C) {
return isWhitespaceOrNull(C) || C == '\\' || C == '\"';
}
+static bool isWindowsSpecialCharInCommandName(char C) {
+ return isWhitespaceOrNull(C) || C == '\"';
+}
// Windows tokenization implementation. The implementation is designed to be
// inlined and specialized for the two user entry points.
-static inline void
-tokenizeWindowsCommandLineImpl(StringRef Src, StringSaver &Saver,
- function_ref<void(StringRef)> AddToken,
- bool AlwaysCopy, function_ref<void()> MarkEOL) {
+static inline void tokenizeWindowsCommandLineImpl(
+ StringRef Src, StringSaver &Saver, function_ref<void(StringRef)> AddToken,
+ bool AlwaysCopy, function_ref<void()> MarkEOL, bool InitialCommandName) {
SmallString<128> Token;
+ // Sometimes, this function will be handling a full command line including an
+ // executable pathname at the start. In that situation, the initial pathname
+ // needs different handling from the following arguments, because when
+ // CreateProcess or cmd.exe scans the pathname, it doesn't treat \ as
+ // escaping the quote character, whereas when libc scans the rest of the
+ // command line, it does.
+ bool CommandName = InitialCommandName;
+
// Try to do as much work inside the state machine as possible.
enum { INIT, UNQUOTED, QUOTED } State = INIT;
+
for (size_t I = 0, E = Src.size(); I < E; ++I) {
switch (State) {
case INIT: {
@@ -947,19 +960,29 @@ tokenizeWindowsCommandLineImpl(StringRef Src, StringSaver &Saver,
if (I >= E)
break;
size_t Start = I;
- while (I < E && !isWindowsSpecialChar(Src[I]))
- ++I;
+ if (CommandName) {
+ while (I < E && !isWindowsSpecialCharInCommandName(Src[I]))
+ ++I;
+ } else {
+ while (I < E && !isWindowsSpecialChar(Src[I]))
+ ++I;
+ }
StringRef NormalChars = Src.slice(Start, I);
if (I >= E || isWhitespaceOrNull(Src[I])) {
// No special characters: slice out the substring and start the next
// token. Copy the string if the caller asks us to.
AddToken(AlwaysCopy ? Saver.save(NormalChars) : NormalChars);
- if (I < E && Src[I] == '\n')
+ if (I < E && Src[I] == '\n') {
MarkEOL();
+ CommandName = InitialCommandName;
+ } else {
+ CommandName = false;
+ }
} else if (Src[I] == '\"') {
Token += NormalChars;
State = QUOTED;
} else if (Src[I] == '\\') {
+ assert(!CommandName && "or else we'd have treated it as a normal char");
Token += NormalChars;
I = parseBackslash(Src, I, Token);
State = UNQUOTED;
@@ -976,12 +999,16 @@ tokenizeWindowsCommandLineImpl(StringRef Src, StringSaver &Saver,
// token.
AddToken(Saver.save(Token.str()));
Token.clear();
- if (Src[I] == '\n')
+ if (Src[I] == '\n') {
+ CommandName = InitialCommandName;
MarkEOL();
+ } else {
+ CommandName = false;
+ }
State = INIT;
} else if (Src[I] == '\"') {
State = QUOTED;
- } else if (Src[I] == '\\') {
+ } else if (Src[I] == '\\' && !CommandName) {
I = parseBackslash(Src, I, Token);
} else {
Token.push_back(Src[I]);
@@ -999,7 +1026,7 @@ tokenizeWindowsCommandLineImpl(StringRef Src, StringSaver &Saver,
// Otherwise, end the quoted portion and return to the unquoted state.
State = UNQUOTED;
}
- } else if (Src[I] == '\\') {
+ } else if (Src[I] == '\\' && !CommandName) {
I = parseBackslash(Src, I, Token);
} else {
Token.push_back(Src[I]);
@@ -1008,7 +1035,7 @@ tokenizeWindowsCommandLineImpl(StringRef Src, StringSaver &Saver,
}
}
- if (State == UNQUOTED)
+ if (State != INIT)
AddToken(Saver.save(Token.str()));
}
@@ -1021,7 +1048,7 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver,
NewArgv.push_back(nullptr);
};
tokenizeWindowsCommandLineImpl(Src, Saver, AddToken,
- /*AlwaysCopy=*/true, OnEOL);
+ /*AlwaysCopy=*/true, OnEOL, false);
}
void cl::TokenizeWindowsCommandLineNoCopy(StringRef Src, StringSaver &Saver,
@@ -1029,7 +1056,19 @@ void cl::TokenizeWindowsCommandLineNoCopy(StringRef Src, StringSaver &Saver,
auto AddToken = [&](StringRef Tok) { NewArgv.push_back(Tok); };
auto OnEOL = []() {};
tokenizeWindowsCommandLineImpl(Src, Saver, AddToken, /*AlwaysCopy=*/false,
- OnEOL);
+ OnEOL, false);
+}
+
+void cl::TokenizeWindowsCommandLineFull(StringRef Src, StringSaver &Saver,
+ SmallVectorImpl<const char *> &NewArgv,
+ bool MarkEOLs) {
+ auto AddToken = [&](StringRef Tok) { NewArgv.push_back(Tok.data()); };
+ auto OnEOL = [&]() {
+ if (MarkEOLs)
+ NewArgv.push_back(nullptr);
+ };
+ tokenizeWindowsCommandLineImpl(Src, Saver, AddToken,
+ /*AlwaysCopy=*/true, OnEOL, true);
}
void cl::tokenizeConfigFile(StringRef Source, StringSaver &Saver,
@@ -1737,21 +1776,6 @@ bool Option::addOccurrence(unsigned pos, StringRef ArgName, StringRef Value,
if (!MultiArg)
NumOccurrences++; // Increment the number of times we have been seen
- switch (getNumOccurrencesFlag()) {
- case Optional:
- if (NumOccurrences > 1)
- return error("may only occur zero or one times!", ArgName);
- break;
- case Required:
- if (NumOccurrences > 1)
- return error("must occur exactly one time!", ArgName);
- LLVM_FALLTHROUGH;
- case OneOrMore:
- case ZeroOrMore:
- case ConsumeAfter:
- break;
- }
-
return handleOccurrence(pos, ArgName, Value);
}
@@ -2236,7 +2260,7 @@ protected:
public:
explicit HelpPrinter(bool showHidden) : ShowHidden(showHidden) {}
- virtual ~HelpPrinter() {}
+ virtual ~HelpPrinter() = default;
// Invoke the printer.
void operator=(bool Value) {
@@ -2444,11 +2468,7 @@ public:
#else
OS << "LLVM (http://llvm.org/):\n ";
#endif
- OS << PACKAGE_NAME << " version " << PACKAGE_VERSION;
-#ifdef LLVM_VERSION_INFO
- OS << " " << LLVM_VERSION_INFO;
-#endif
- OS << "\n ";
+ OS << PACKAGE_NAME << " version " << PACKAGE_VERSION << "\n ";
#if LLVM_IS_DEBUG_BUILD
OS << "DEBUG build";
#else
diff --git a/llvm/lib/Support/Compression.cpp b/llvm/lib/Support/Compression.cpp
index ccf6ef4bb662..983a6348bbe4 100644
--- a/llvm/lib/Support/Compression.cpp
+++ b/llvm/lib/Support/Compression.cpp
@@ -46,18 +46,20 @@ static StringRef convertZlibCodeToString(int Code) {
bool zlib::isAvailable() { return true; }
-Error zlib::compress(StringRef InputBuffer,
- SmallVectorImpl<char> &CompressedBuffer, int Level) {
+void zlib::compress(StringRef InputBuffer,
+ SmallVectorImpl<char> &CompressedBuffer, int Level) {
unsigned long CompressedSize = ::compressBound(InputBuffer.size());
CompressedBuffer.resize_for_overwrite(CompressedSize);
int Res =
::compress2((Bytef *)CompressedBuffer.data(), &CompressedSize,
(const Bytef *)InputBuffer.data(), InputBuffer.size(), Level);
+ if (Res == Z_MEM_ERROR)
+ report_bad_alloc_error("Allocation failed");
+ assert(Res == Z_OK);
// Tell MemorySanitizer that zlib output buffer is fully initialized.
// This avoids a false report when running LLVM with uninstrumented ZLib.
__msan_unpoison(CompressedBuffer.data(), CompressedSize);
CompressedBuffer.truncate(CompressedSize);
- return Res ? createError(convertZlibCodeToString(Res)) : Error::success();
}
Error zlib::uncompress(StringRef InputBuffer, char *UncompressedBuffer,
@@ -87,8 +89,8 @@ uint32_t zlib::crc32(StringRef Buffer) {
#else
bool zlib::isAvailable() { return false; }
-Error zlib::compress(StringRef InputBuffer,
- SmallVectorImpl<char> &CompressedBuffer, int Level) {
+void zlib::compress(StringRef InputBuffer,
+ SmallVectorImpl<char> &CompressedBuffer, int Level) {
llvm_unreachable("zlib::compress is unavailable");
}
Error zlib::uncompress(StringRef InputBuffer, char *UncompressedBuffer,
diff --git a/llvm/lib/Support/ConvertUTFWrapper.cpp b/llvm/lib/Support/ConvertUTFWrapper.cpp
index 392c4c4890e1..9bf3f8f8b897 100644
--- a/llvm/lib/Support/ConvertUTFWrapper.cpp
+++ b/llvm/lib/Support/ConvertUTFWrapper.cpp
@@ -34,31 +34,31 @@ bool ConvertUTF8toWide(unsigned WideCharWidth, llvm::StringRef Source,
const UTF8 *sourceStart = (const UTF8*)Source.data();
// FIXME: Make the type of the result buffer correct instead of
// using reinterpret_cast.
- UTF16 *targetStart = reinterpret_cast<UTF16*>(ResultPtr);
+ UTF16 *targetStart = reinterpret_cast<UTF16 *>(ResultPtr);
ConversionFlags flags = strictConversion;
- result = ConvertUTF8toUTF16(
- &sourceStart, sourceStart + Source.size(),
- &targetStart, targetStart + Source.size(), flags);
+ result =
+ ConvertUTF8toUTF16(&sourceStart, sourceStart + Source.size(),
+ &targetStart, targetStart + Source.size(), flags);
if (result == conversionOK)
- ResultPtr = reinterpret_cast<char*>(targetStart);
+ ResultPtr = reinterpret_cast<char *>(targetStart);
else
ErrorPtr = sourceStart;
} else if (WideCharWidth == 4) {
- const UTF8 *sourceStart = (const UTF8*)Source.data();
+ const UTF8 *sourceStart = (const UTF8 *)Source.data();
// FIXME: Make the type of the result buffer correct instead of
// using reinterpret_cast.
- UTF32 *targetStart = reinterpret_cast<UTF32*>(ResultPtr);
+ UTF32 *targetStart = reinterpret_cast<UTF32 *>(ResultPtr);
ConversionFlags flags = strictConversion;
- result = ConvertUTF8toUTF32(
- &sourceStart, sourceStart + Source.size(),
- &targetStart, targetStart + Source.size(), flags);
+ result =
+ ConvertUTF8toUTF32(&sourceStart, sourceStart + Source.size(),
+ &targetStart, targetStart + Source.size(), flags);
if (result == conversionOK)
- ResultPtr = reinterpret_cast<char*>(targetStart);
+ ResultPtr = reinterpret_cast<char *>(targetStart);
else
ErrorPtr = sourceStart;
}
- assert((result != targetExhausted)
- && "ConvertUTF8toUTFXX exhausted target buffer");
+ assert((result != targetExhausted) &&
+ "ConvertUTF8toUTFXX exhausted target buffer");
return result == conversionOK;
}
@@ -67,20 +67,18 @@ bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr) {
const UTF32 *SourceEnd = SourceStart + 1;
UTF8 *TargetStart = reinterpret_cast<UTF8 *>(ResultPtr);
UTF8 *TargetEnd = TargetStart + 4;
- ConversionResult CR = ConvertUTF32toUTF8(&SourceStart, SourceEnd,
- &TargetStart, TargetEnd,
- strictConversion);
+ ConversionResult CR = ConvertUTF32toUTF8(
+ &SourceStart, SourceEnd, &TargetStart, TargetEnd, strictConversion);
if (CR != conversionOK)
return false;
- ResultPtr = reinterpret_cast<char*>(TargetStart);
+ ResultPtr = reinterpret_cast<char *>(TargetStart);
return true;
}
bool hasUTF16ByteOrderMark(ArrayRef<char> S) {
- return (S.size() >= 2 &&
- ((S[0] == '\xff' && S[1] == '\xfe') ||
- (S[0] == '\xfe' && S[1] == '\xff')));
+ return (S.size() >= 2 && ((S[0] == '\xff' && S[1] == '\xfe') ||
+ (S[0] == '\xfe' && S[1] == '\xff')));
}
bool convertUTF16ToUTF8String(ArrayRef<char> SrcBytes, std::string &Out) {
@@ -134,11 +132,69 @@ bool convertUTF16ToUTF8String(ArrayRef<char> SrcBytes, std::string &Out) {
return true;
}
-bool convertUTF16ToUTF8String(ArrayRef<UTF16> Src, std::string &Out)
-{
+bool convertUTF16ToUTF8String(ArrayRef<UTF16> Src, std::string &Out) {
return convertUTF16ToUTF8String(
llvm::ArrayRef<char>(reinterpret_cast<const char *>(Src.data()),
- Src.size() * sizeof(UTF16)), Out);
+ Src.size() * sizeof(UTF16)),
+ Out);
+}
+
+bool convertUTF32ToUTF8String(ArrayRef<char> SrcBytes, std::string &Out) {
+ assert(Out.empty());
+
+ // Error out on an uneven byte count.
+ if (SrcBytes.size() % 4)
+ return false;
+
+ // Avoid OOB by returning early on empty input.
+ if (SrcBytes.empty())
+ return true;
+
+ const UTF32 *Src = reinterpret_cast<const UTF32 *>(SrcBytes.begin());
+ const UTF32 *SrcEnd = reinterpret_cast<const UTF32 *>(SrcBytes.end());
+
+ assert((uintptr_t)Src % sizeof(UTF32) == 0);
+
+ // Byteswap if necessary.
+ std::vector<UTF32> ByteSwapped;
+ if (Src[0] == UNI_UTF32_BYTE_ORDER_MARK_SWAPPED) {
+ ByteSwapped.insert(ByteSwapped.end(), Src, SrcEnd);
+ for (UTF32 &I : ByteSwapped)
+ I = llvm::ByteSwap_32(I);
+ Src = &ByteSwapped[0];
+ SrcEnd = &ByteSwapped[ByteSwapped.size() - 1] + 1;
+ }
+
+ // Skip the BOM for conversion.
+ if (Src[0] == UNI_UTF32_BYTE_ORDER_MARK_NATIVE)
+ Src++;
+
+ // Just allocate enough space up front. We'll shrink it later. Allocate
+ // enough that we can fit a null terminator without reallocating.
+ Out.resize(SrcBytes.size() * UNI_MAX_UTF8_BYTES_PER_CODE_POINT + 1);
+ UTF8 *Dst = reinterpret_cast<UTF8 *>(&Out[0]);
+ UTF8 *DstEnd = Dst + Out.size();
+
+ ConversionResult CR =
+ ConvertUTF32toUTF8(&Src, SrcEnd, &Dst, DstEnd, strictConversion);
+ assert(CR != targetExhausted);
+
+ if (CR != conversionOK) {
+ Out.clear();
+ return false;
+ }
+
+ Out.resize(reinterpret_cast<char *>(Dst) - &Out[0]);
+ Out.push_back(0);
+ Out.pop_back();
+ return true;
+}
+
+bool convertUTF32ToUTF8String(ArrayRef<UTF32> Src, std::string &Out) {
+ return convertUTF32ToUTF8String(
+ llvm::ArrayRef<char>(reinterpret_cast<const char *>(Src.data()),
+ Src.size() * sizeof(UTF32)),
+ Out);
}
bool convertUTF8ToUTF16String(StringRef SrcUTF8,
diff --git a/llvm/lib/Support/CrashRecoveryContext.cpp b/llvm/lib/Support/CrashRecoveryContext.cpp
index 2ee3074b840e..292ba63d14aa 100644
--- a/llvm/lib/Support/CrashRecoveryContext.cpp
+++ b/llvm/lib/Support/CrashRecoveryContext.cpp
@@ -9,6 +9,7 @@
#include "llvm/Support/CrashRecoveryContext.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ExitCodes.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/ThreadLocal.h"
@@ -16,10 +17,6 @@
#include <mutex>
#include <setjmp.h>
-#if !defined(_MSC_VER) && !defined(_WIN32)
-#include "llvm/Support/ExitCodes.h"
-#endif
-
using namespace llvm;
namespace {
@@ -97,7 +94,7 @@ static ManagedStatic<sys::ThreadLocal<const CrashRecoveryContext>>
static void installExceptionOrSignalHandlers();
static void uninstallExceptionOrSignalHandlers();
-CrashRecoveryContextCleanup::~CrashRecoveryContextCleanup() {}
+CrashRecoveryContextCleanup::~CrashRecoveryContextCleanup() = default;
CrashRecoveryContext::CrashRecoveryContext() {
// On Windows, if abort() was previously triggered (and caught by a previous
@@ -445,7 +442,7 @@ bool CrashRecoveryContext::RunSafely(function_ref<void()> Fn) {
llvm_unreachable("Most likely setjmp wasn't called!");
}
-bool CrashRecoveryContext::throwIfCrash(int RetCode) {
+bool CrashRecoveryContext::isCrash(int RetCode) {
#if defined(_WIN32)
// On Windows, the high bits are reserved for kernel return codes. Values
// starting with 0x80000000 are reserved for "warnings"; values of 0xC0000000
@@ -454,12 +451,21 @@ bool CrashRecoveryContext::throwIfCrash(int RetCode) {
unsigned Code = ((unsigned)RetCode & 0xF0000000) >> 28;
if (Code != 0xC && Code != 8)
return false;
- ::RaiseException(RetCode, 0, 0, NULL);
#else
// On Unix, signals are represented by return codes of 128 or higher.
// Exit code 128 is a reserved value and should not be raised as a signal.
if (RetCode <= 128)
return false;
+#endif
+ return true;
+}
+
+bool CrashRecoveryContext::throwIfCrash(int RetCode) {
+ if (!isCrash(RetCode))
+ return false;
+#if defined(_WIN32)
+ ::RaiseException(RetCode, 0, 0, NULL);
+#else
llvm::sys::unregisterHandlers();
raise(RetCode - 128);
#endif
diff --git a/llvm/lib/Support/Debug.cpp b/llvm/lib/Support/Debug.cpp
index 5470d931b00b..98a9ac4722b5 100644
--- a/llvm/lib/Support/Debug.cpp
+++ b/llvm/lib/Support/Debug.cpp
@@ -132,7 +132,7 @@ struct CreateDebugOnly {
"debug-only",
cl::desc("Enable a specific type of debug output (comma separated list "
"of types)"),
- cl::Hidden, cl::ZeroOrMore, cl::value_desc("debug string"),
+ cl::Hidden, cl::value_desc("debug string"),
cl::location(DebugOnlyOptLoc), cl::ValueRequired);
}
};
diff --git a/llvm/lib/Support/DebugCounter.cpp b/llvm/lib/Support/DebugCounter.cpp
index f553463be8df..bc2df37e773d 100644
--- a/llvm/lib/Support/DebugCounter.cpp
+++ b/llvm/lib/Support/DebugCounter.cpp
@@ -49,8 +49,7 @@ struct CreateDebugCounterOption {
return new DebugCounterList(
"debug-counter", cl::Hidden,
cl::desc("Comma separated list of debug counter skip and count"),
- cl::CommaSeparated, cl::ZeroOrMore,
- cl::location(DebugCounter::instance()));
+ cl::CommaSeparated, cl::location(DebugCounter::instance()));
}
};
} // namespace
diff --git a/llvm/lib/Support/DeltaAlgorithm.cpp b/llvm/lib/Support/DeltaAlgorithm.cpp
index a2017a10ab3f..341de244547c 100644
--- a/llvm/lib/Support/DeltaAlgorithm.cpp
+++ b/llvm/lib/Support/DeltaAlgorithm.cpp
@@ -11,8 +11,7 @@
#include <set>
using namespace llvm;
-DeltaAlgorithm::~DeltaAlgorithm() {
-}
+DeltaAlgorithm::~DeltaAlgorithm() = default;
bool DeltaAlgorithm::GetTestResult(const changeset_ty &Changes) {
if (FailedTestsCache.count(Changes))
diff --git a/llvm/lib/Support/DynamicLibrary.cpp b/llvm/lib/Support/DynamicLibrary.cpp
index 2bcdbdcdb9b0..7b9d7abe7545 100644
--- a/llvm/lib/Support/DynamicLibrary.cpp
+++ b/llvm/lib/Support/DynamicLibrary.cpp
@@ -12,14 +12,11 @@
#include "llvm/Support/DynamicLibrary.h"
#include "llvm-c/Support.h"
-#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Config/config.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/Mutex.h"
-#include <cstdio>
-#include <cstring>
#include <vector>
using namespace llvm;
@@ -29,14 +26,14 @@ using namespace llvm::sys;
class DynamicLibrary::HandleSet {
typedef std::vector<void *> HandleList;
HandleList Handles;
- void *Process;
+ void *Process = nullptr;
public:
static void *DLOpen(const char *Filename, std::string *Err);
static void DLClose(void *Handle);
static void *DLSym(void *Handle, const char *Symbol);
- HandleSet() : Process(nullptr) {}
+ HandleSet() = default;
~HandleSet();
HandleList::iterator Find(void *Handle) { return find(Handles, Handle); }
diff --git a/llvm/lib/Support/Errno.cpp b/llvm/lib/Support/Errno.cpp
index d18231c6ebf5..7f665be8db6c 100644
--- a/llvm/lib/Support/Errno.cpp
+++ b/llvm/lib/Support/Errno.cpp
@@ -12,8 +12,7 @@
#include "llvm/Support/Errno.h"
#include "llvm/Config/config.h"
-#include "llvm/Support/raw_ostream.h"
-#include <string.h>
+#include <cstring>
#if HAVE_ERRNO_H
#include <errno.h>
diff --git a/llvm/lib/Support/ErrorHandling.cpp b/llvm/lib/Support/ErrorHandling.cpp
index 80c0e00439a5..b8b3b7424ac6 100644
--- a/llvm/lib/Support/ErrorHandling.cpp
+++ b/llvm/lib/Support/ErrorHandling.cpp
@@ -119,7 +119,10 @@ void llvm::report_fatal_error(const Twine &Reason, bool GenCrashDiag) {
// files registered with RemoveFileOnSignal.
sys::RunInterruptHandlers();
- abort();
+ if (GenCrashDiag)
+ abort();
+ else
+ exit(1);
}
void llvm::install_bad_alloc_error_handler(fatal_error_handler_t handler,
diff --git a/llvm/lib/Support/FileUtilities.cpp b/llvm/lib/Support/FileUtilities.cpp
index 489b8d119e6f..eda3eb044901 100644
--- a/llvm/lib/Support/FileUtilities.cpp
+++ b/llvm/lib/Support/FileUtilities.cpp
@@ -17,6 +17,7 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Process.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdint>
#include <cstdlib>
@@ -323,4 +324,69 @@ llvm::Error llvm::writeFileAtomically(
return Error::success();
}
+Expected<FilePermissionsApplier>
+FilePermissionsApplier::create(StringRef InputFilename) {
+ sys::fs::file_status Status;
+
+ if (InputFilename != "-") {
+ if (auto EC = sys::fs::status(InputFilename, Status))
+ return createFileError(InputFilename, EC);
+ } else {
+ Status.permissions(static_cast<sys::fs::perms>(0777));
+ }
+
+ return FilePermissionsApplier(InputFilename, Status);
+}
+
+Error FilePermissionsApplier::apply(
+ StringRef OutputFilename, bool CopyDates,
+ Optional<sys::fs::perms> OverwritePermissions) {
+ sys::fs::file_status Status = InputStatus;
+
+ if (OverwritePermissions)
+ Status.permissions(*OverwritePermissions);
+
+ int FD = 0;
+
+ // Writing to stdout should not be treated as an error here, just
+ // do not set access/modification times or permissions.
+ if (OutputFilename == "-")
+ return Error::success();
+
+ if (std::error_code EC = sys::fs::openFileForWrite(OutputFilename, FD,
+ sys::fs::CD_OpenExisting))
+ return createFileError(OutputFilename, EC);
+
+ if (CopyDates)
+ if (std::error_code EC = sys::fs::setLastAccessAndModificationTime(
+ FD, Status.getLastAccessedTime(), Status.getLastModificationTime()))
+ return createFileError(OutputFilename, EC);
+
+ sys::fs::file_status OStat;
+ if (std::error_code EC = sys::fs::status(FD, OStat))
+ return createFileError(OutputFilename, EC);
+ if (OStat.type() == sys::fs::file_type::regular_file) {
+#ifndef _WIN32
+ // Keep ownership if llvm-objcopy is called under root.
+ if (OutputFilename == InputFilename && OStat.getUser() == 0)
+ sys::fs::changeFileOwnership(FD, Status.getUser(), Status.getGroup());
+#endif
+
+ sys::fs::perms Perm = Status.permissions();
+ if (OutputFilename != InputFilename)
+ Perm = static_cast<sys::fs::perms>(Perm & ~sys::fs::getUmask() & ~06000);
+#ifdef _WIN32
+ if (std::error_code EC = sys::fs::setPermissions(OutputFilename, Perm))
+#else
+ if (std::error_code EC = sys::fs::setPermissions(FD, Perm))
+#endif
+ return createFileError(OutputFilename, EC);
+ }
+
+ if (std::error_code EC = sys::Process::SafelyCloseFileDescriptor(FD))
+ return createFileError(OutputFilename, EC);
+
+ return Error::success();
+}
+
char llvm::AtomicFileWriteError::ID;
diff --git a/llvm/lib/Support/FoldingSet.cpp b/llvm/lib/Support/FoldingSet.cpp
index e3d7168305af..178855289fe8 100644
--- a/llvm/lib/Support/FoldingSet.cpp
+++ b/llvm/lib/Support/FoldingSet.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/FoldingSet.h"
-#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/ErrorHandling.h"
@@ -25,12 +24,6 @@ using namespace llvm;
//===----------------------------------------------------------------------===//
// FoldingSetNodeIDRef Implementation
-/// ComputeHash - Compute a strong hash value for this FoldingSetNodeIDRef,
-/// used to lookup the node in the FoldingSetBase.
-unsigned FoldingSetNodeIDRef::ComputeHash() const {
- return static_cast<unsigned>(hash_combine_range(Data, Data+Size));
-}
-
bool FoldingSetNodeIDRef::operator==(FoldingSetNodeIDRef RHS) const {
if (Size != RHS.Size) return false;
return memcmp(Data, RHS.Data, Size*sizeof(*Data)) == 0;
@@ -49,41 +42,6 @@ bool FoldingSetNodeIDRef::operator<(FoldingSetNodeIDRef RHS) const {
/// Add* - Add various data types to Bit data.
///
-void FoldingSetNodeID::AddPointer(const void *Ptr) {
- // Note: this adds pointers to the hash using sizes and endianness that
- // depend on the host. It doesn't matter, however, because hashing on
- // pointer values is inherently unstable. Nothing should depend on the
- // ordering of nodes in the folding set.
- static_assert(sizeof(uintptr_t) <= sizeof(unsigned long long),
- "unexpected pointer size");
- AddInteger(reinterpret_cast<uintptr_t>(Ptr));
-}
-void FoldingSetNodeID::AddInteger(signed I) {
- Bits.push_back(I);
-}
-void FoldingSetNodeID::AddInteger(unsigned I) {
- Bits.push_back(I);
-}
-void FoldingSetNodeID::AddInteger(long I) {
- AddInteger((unsigned long)I);
-}
-void FoldingSetNodeID::AddInteger(unsigned long I) {
- if (sizeof(long) == sizeof(int))
- AddInteger(unsigned(I));
- else if (sizeof(long) == sizeof(long long)) {
- AddInteger((unsigned long long)I);
- } else {
- llvm_unreachable("unexpected sizeof(long)");
- }
-}
-void FoldingSetNodeID::AddInteger(long long I) {
- AddInteger((unsigned long long)I);
-}
-void FoldingSetNodeID::AddInteger(unsigned long long I) {
- AddInteger(unsigned(I));
- AddInteger(unsigned(I >> 32));
-}
-
void FoldingSetNodeID::AddString(StringRef String) {
unsigned Size = String.size();
@@ -145,12 +103,6 @@ void FoldingSetNodeID::AddNodeID(const FoldingSetNodeID &ID) {
Bits.append(ID.Bits.begin(), ID.Bits.end());
}
-/// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to
-/// lookup the node in the FoldingSetBase.
-unsigned FoldingSetNodeID::ComputeHash() const {
- return FoldingSetNodeIDRef(Bits.data(), Bits.size()).ComputeHash();
-}
-
/// operator== - Used to compare two nodes to each other.
///
bool FoldingSetNodeID::operator==(const FoldingSetNodeID &RHS) const {
diff --git a/llvm/lib/Support/FormatVariadic.cpp b/llvm/lib/Support/FormatVariadic.cpp
index f6d48bcd50e8..0709d65e81e0 100644
--- a/llvm/lib/Support/FormatVariadic.cpp
+++ b/llvm/lib/Support/FormatVariadic.cpp
@@ -130,7 +130,7 @@ formatv_object_base::splitLiteralAndReplacement(StringRef Fmt) {
StringRef Right = Fmt.substr(BC + 1);
auto RI = parseReplacementItem(Spec);
- if (RI.hasValue())
+ if (RI)
return std::make_pair(*RI, Right);
// If there was an error parsing the replacement item, treat it as an
diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp
index f6003b783245..08e3a27e0173 100644
--- a/llvm/lib/Support/Host.cpp
+++ b/llvm/lib/Support/Host.cpp
@@ -11,20 +11,15 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/Host.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Config/llvm-config.h"
-#include "llvm/Support/BCD.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/X86TargetParser.h"
#include "llvm/Support/raw_ostream.h"
-#include <assert.h>
#include <string.h>
// Include the platform-specific parts of this class.
@@ -38,11 +33,16 @@
#ifdef _MSC_VER
#include <intrin.h>
#endif
-#if defined(__APPLE__) && (!defined(__x86_64__))
+#ifdef __MVS__
+#include "llvm/Support/BCD.h"
+#endif
+#if defined(__APPLE__)
#include <mach/host_info.h>
#include <mach/mach.h>
#include <mach/mach_host.h>
#include <mach/machine.h>
+#include <sys/param.h>
+#include <sys/sysctl.h>
#endif
#ifdef _AIX
#include <sys/systemcfg.h>
@@ -296,6 +296,12 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
}
}
+ if (Implementer == "0xc0") { // Ampere Computing
+ return StringSwitch<const char *>(Part)
+ .Case("0xac3", "ampere1")
+ .Default("generic");
+ }
+
return "generic";
}
@@ -330,7 +336,7 @@ StringRef getCPUNameFromS390Model(unsigned int Id, bool HaveVectorSupport) {
case 3931:
case 3932:
default:
- return HaveVectorSupport? "arch14" : "zEC12";
+ return HaveVectorSupport? "z16" : "zEC12";
}
}
} // end anonymous namespace
@@ -380,6 +386,26 @@ StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) {
return "generic";
}
+StringRef sys::detail::getHostCPUNameForRISCV(StringRef ProcCpuinfoContent) {
+ // There are 24 lines in /proc/cpuinfo
+ SmallVector<StringRef> Lines;
+ ProcCpuinfoContent.split(Lines, "\n");
+
+ // Look for uarch line to determine cpu name
+ StringRef UArch;
+ for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
+ if (Lines[I].startswith("uarch")) {
+ UArch = Lines[I].substr(5).ltrim("\t :");
+ break;
+ }
+ }
+
+ return StringSwitch<const char *>(UArch)
+ .Case("sifive,u74-mc", "sifive-u74")
+ .Case("sifive,bullet0", "sifive-u74")
+ .Default("generic");
+}
+
StringRef sys::detail::getHostCPUNameForBPF() {
#if !defined(__linux__) || !defined(__x86_64__)
return "generic";
@@ -1034,9 +1060,9 @@ getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
case 25:
CPU = "znver3";
*Type = X86::AMDFAM19H;
- if (Model <= 0x0f) {
+ if (Model <= 0x0f || Model == 0x21) {
*Subtype = X86::AMDFAM19H_ZNVER3;
- break; // 00h-0Fh: Zen3
+ break; // 00h-0Fh, 21h: Zen3
}
break;
default:
@@ -1299,32 +1325,45 @@ StringRef sys::getHostCPUName() {
bool HaveVectorSupport = CVT[244] & 0x80;
return getCPUNameFromS390Model(Id, HaveVectorSupport);
}
-#elif defined(__APPLE__) && defined(__aarch64__)
-StringRef sys::getHostCPUName() {
- return "cyclone";
-}
-#elif defined(__APPLE__) && defined(__arm__)
-StringRef sys::getHostCPUName() {
- host_basic_info_data_t hostInfo;
- mach_msg_type_number_t infoCount;
+#elif defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__))
+#define CPUFAMILY_ARM_SWIFT 0x1e2d6381
+#define CPUFAMILY_ARM_CYCLONE 0x37a09642
+#define CPUFAMILY_ARM_TYPHOON 0x2c91a47e
+#define CPUFAMILY_ARM_TWISTER 0x92fb37c8
+#define CPUFAMILY_ARM_HURRICANE 0x67ceee93
+#define CPUFAMILY_ARM_MONSOON_MISTRAL 0xe81e7ef6
+#define CPUFAMILY_ARM_VORTEX_TEMPEST 0x07d34b9f
+#define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504d2
+#define CPUFAMILY_ARM_FIRESTORM_ICESTORM 0x1b588bb3
- infoCount = HOST_BASIC_INFO_COUNT;
- mach_port_t hostPort = mach_host_self();
- host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo,
- &infoCount);
- mach_port_deallocate(mach_task_self(), hostPort);
+StringRef sys::getHostCPUName() {
+ uint32_t Family;
+ size_t Length = sizeof(Family);
+ sysctlbyname("hw.cpufamily", &Family, &Length, NULL, 0);
- if (hostInfo.cpu_type != CPU_TYPE_ARM) {
- assert(false && "CPUType not equal to ARM should not be possible on ARM");
- return "generic";
+ switch (Family) {
+ case CPUFAMILY_ARM_SWIFT:
+ return "swift";
+ case CPUFAMILY_ARM_CYCLONE:
+ return "apple-a7";
+ case CPUFAMILY_ARM_TYPHOON:
+ return "apple-a8";
+ case CPUFAMILY_ARM_TWISTER:
+ return "apple-a9";
+ case CPUFAMILY_ARM_HURRICANE:
+ return "apple-a10";
+ case CPUFAMILY_ARM_MONSOON_MISTRAL:
+ return "apple-a11";
+ case CPUFAMILY_ARM_VORTEX_TEMPEST:
+ return "apple-a12";
+ case CPUFAMILY_ARM_LIGHTNING_THUNDER:
+ return "apple-a13";
+ case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
+ return "apple-m1";
+ default:
+ // Default to the newest CPU we know about.
+ return "apple-m1";
}
- switch (hostInfo.cpu_subtype) {
- case CPU_SUBTYPE_ARM_V7S:
- return "swift";
- default:;
- }
-
- return "generic";
}
#elif defined(_AIX)
StringRef sys::getHostCPUName() {
@@ -1360,6 +1399,11 @@ StringRef sys::getHostCPUName() {
}
#elif defined(__riscv)
StringRef sys::getHostCPUName() {
+#if defined(__linux__)
+ std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
+ StringRef Content = P ? P->getBuffer() : "";
+ return detail::getHostCPUNameForRISCV(Content);
+#else
#if __riscv_xlen == 64
return "generic-rv64";
#elif __riscv_xlen == 32
@@ -1367,6 +1411,7 @@ StringRef sys::getHostCPUName() {
#else
#error "Unhandled value of __riscv_xlen"
#endif
+#endif
}
#else
StringRef sys::getHostCPUName() { return "generic"; }
@@ -1455,9 +1500,6 @@ int computeHostNumPhysicalCores() {
#elif defined(__linux__) && defined(__s390x__)
int computeHostNumPhysicalCores() { return sysconf(_SC_NPROCESSORS_ONLN); }
#elif defined(__APPLE__)
-#include <sys/param.h>
-#include <sys/sysctl.h>
-
// Gets the number of *physical cores* on the machine.
int computeHostNumPhysicalCores() {
uint32_t count;
@@ -1706,6 +1748,9 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
.Case("asimd", "neon")
.Case("fp", "fp-armv8")
.Case("crc32", "crc")
+ .Case("atomics", "lse")
+ .Case("sve", "sve")
+ .Case("sve2", "sve2")
#else
.Case("half", "fp16")
.Case("neon", "neon")
diff --git a/llvm/lib/Support/ItaniumManglingCanonicalizer.cpp b/llvm/lib/Support/ItaniumManglingCanonicalizer.cpp
index e6cba26cfcf3..52d5de93ff7d 100644
--- a/llvm/lib/Support/ItaniumManglingCanonicalizer.cpp
+++ b/llvm/lib/Support/ItaniumManglingCanonicalizer.cpp
@@ -189,20 +189,6 @@ public:
bool trackedNodeIsUsed() const { return TrackedNodeIsUsed; }
};
-/// Convert St3foo to NSt3fooE so that equivalences naming one also affect the
-/// other.
-template<>
-struct CanonicalizerAllocator::MakeNodeImpl<
- itanium_demangle::StdQualifiedName> {
- CanonicalizerAllocator &Self;
- Node *make(Node *Child) {
- Node *StdNamespace = Self.makeNode<itanium_demangle::NameType>("std");
- if (!StdNamespace)
- return nullptr;
- return Self.makeNode<itanium_demangle::NestedName>(StdNamespace, Child);
- }
-};
-
// FIXME: Also expand built-in substitutions?
using CanonicalizingDemangler =
diff --git a/llvm/lib/Support/JSON.cpp b/llvm/lib/Support/JSON.cpp
index 20babbe56d86..b87e39f0a963 100644
--- a/llvm/lib/Support/JSON.cpp
+++ b/llvm/lib/Support/JSON.cpp
@@ -509,13 +509,25 @@ bool Parser::parseNumber(char First, Value &Out) {
S.push_back(next());
char *End;
// Try first to parse as integer, and if so preserve full 64 bits.
- // strtoll returns long long >= 64 bits, so check it's in range too.
- auto I = std::strtoll(S.c_str(), &End, 10);
- if (End == S.end() && I >= std::numeric_limits<int64_t>::min() &&
- I <= std::numeric_limits<int64_t>::max()) {
+ // We check for errno for out of bounds errors and for End == S.end()
+ // to make sure that the numeric string is not malformed.
+ errno = 0;
+ int64_t I = std::strtoll(S.c_str(), &End, 10);
+ if (End == S.end() && errno != ERANGE) {
Out = int64_t(I);
return true;
}
+ // strtroull has a special handling for negative numbers, but in this
+ // case we don't want to do that because negative numbers were already
+ // handled in the previous block.
+ if (First != '-') {
+ errno = 0;
+ uint64_t UI = std::strtoull(S.c_str(), &End, 10);
+ if (End == S.end() && errno != ERANGE) {
+ Out = UI;
+ return true;
+ }
+ }
// If it's not an integer
Out = std::strtod(S.c_str(), &End);
return End == S.end() || parseError("Invalid JSON value (number?)");
diff --git a/llvm/lib/Support/KnownBits.cpp b/llvm/lib/Support/KnownBits.cpp
index 8e154067abc0..9f34405e54fc 100644
--- a/llvm/lib/Support/KnownBits.cpp
+++ b/llvm/lib/Support/KnownBits.cpp
@@ -340,7 +340,7 @@ Optional<bool> KnownBits::eq(const KnownBits &LHS, const KnownBits &RHS) {
Optional<bool> KnownBits::ne(const KnownBits &LHS, const KnownBits &RHS) {
if (Optional<bool> KnownEQ = eq(LHS, RHS))
- return Optional<bool>(!KnownEQ.getValue());
+ return Optional<bool>(!*KnownEQ);
return None;
}
@@ -356,7 +356,7 @@ Optional<bool> KnownBits::ugt(const KnownBits &LHS, const KnownBits &RHS) {
Optional<bool> KnownBits::uge(const KnownBits &LHS, const KnownBits &RHS) {
if (Optional<bool> IsUGT = ugt(RHS, LHS))
- return Optional<bool>(!IsUGT.getValue());
+ return Optional<bool>(!*IsUGT);
return None;
}
@@ -380,7 +380,7 @@ Optional<bool> KnownBits::sgt(const KnownBits &LHS, const KnownBits &RHS) {
Optional<bool> KnownBits::sge(const KnownBits &LHS, const KnownBits &RHS) {
if (Optional<bool> KnownSGT = sgt(RHS, LHS))
- return Optional<bool>(!KnownSGT.getValue());
+ return Optional<bool>(!*KnownSGT);
return None;
}
@@ -413,11 +413,11 @@ KnownBits KnownBits::abs(bool IntMinIsPoison) const {
}
KnownBits KnownBits::mul(const KnownBits &LHS, const KnownBits &RHS,
- bool SelfMultiply) {
+ bool NoUndefSelfMultiply) {
unsigned BitWidth = LHS.getBitWidth();
assert(BitWidth == RHS.getBitWidth() && !LHS.hasConflict() &&
!RHS.hasConflict() && "Operand mismatch");
- assert((!SelfMultiply || (LHS.One == RHS.One && LHS.Zero == RHS.Zero)) &&
+ assert((!NoUndefSelfMultiply || LHS == RHS) &&
"Self multiplication knownbits mismatch");
// Compute the high known-0 bits by multiplying the unsigned max of each side.
@@ -501,7 +501,7 @@ KnownBits KnownBits::mul(const KnownBits &LHS, const KnownBits &RHS,
Res.One = BottomKnown.getLoBits(ResultBitsKnown);
// If we're self-multiplying then bit[1] is guaranteed to be zero.
- if (SelfMultiply && BitWidth > 1) {
+ if (NoUndefSelfMultiply && BitWidth > 1) {
assert(Res.One[1] == 0 &&
"Self-multiplication failed Quadratic Reciprocity!");
Res.Zero.setBit(1);
diff --git a/llvm/lib/Support/LineIterator.cpp b/llvm/lib/Support/LineIterator.cpp
index 7bdf1271ac25..9874d16d19e1 100644
--- a/llvm/lib/Support/LineIterator.cpp
+++ b/llvm/lib/Support/LineIterator.cpp
@@ -38,7 +38,7 @@ line_iterator::line_iterator(const MemoryBuffer &Buffer, bool SkipBlanks,
line_iterator::line_iterator(const MemoryBufferRef &Buffer, bool SkipBlanks,
char CommentMarker)
: Buffer(Buffer.getBufferSize() ? Optional<MemoryBufferRef>(Buffer) : None),
- CommentMarker(CommentMarker), SkipBlanks(SkipBlanks), LineNumber(1),
+ CommentMarker(CommentMarker), SkipBlanks(SkipBlanks),
CurrentLine(Buffer.getBufferSize() ? Buffer.getBufferStart() : nullptr,
0) {
// Ensure that if we are constructed on a non-empty memory buffer that it is
diff --git a/llvm/lib/Support/MD5.cpp b/llvm/lib/Support/MD5.cpp
index caadde389504..fdcf34d70ad9 100644
--- a/llvm/lib/Support/MD5.cpp
+++ b/llvm/lib/Support/MD5.cpp
@@ -261,13 +261,13 @@ void MD5::final(MD5Result &Result) {
support::endian::write32le(&Result[12], InternalState.d);
}
-StringRef MD5::final() {
+MD5::MD5Result MD5::final() {
+ MD5Result Result;
final(Result);
- return StringRef(reinterpret_cast<char *>(Result.Bytes.data()),
- Result.Bytes.size());
+ return Result;
}
-StringRef MD5::result() {
+MD5::MD5Result MD5::result() {
auto StateToRestore = InternalState;
auto Hash = final();
@@ -280,15 +280,15 @@ StringRef MD5::result() {
SmallString<32> MD5::MD5Result::digest() const {
SmallString<32> Str;
- toHex(Bytes, /*LowerCase*/ true, Str);
+ toHex(*this, /*LowerCase*/ true, Str);
return Str;
}
void MD5::stringifyResult(MD5Result &Result, SmallVectorImpl<char> &Str) {
- toHex(Result.Bytes, /*LowerCase*/ true, Str);
+ toHex(Result, /*LowerCase*/ true, Str);
}
-std::array<uint8_t, 16> MD5::hash(ArrayRef<uint8_t> Data) {
+MD5::MD5Result MD5::hash(ArrayRef<uint8_t> Data) {
MD5 Hash;
Hash.update(Data);
MD5::MD5Result Res;
diff --git a/llvm/lib/Support/MathExtras.cpp b/llvm/lib/Support/MathExtras.cpp
index 7efffaa7f8b8..ad44b1a21676 100644
--- a/llvm/lib/Support/MathExtras.cpp
+++ b/llvm/lib/Support/MathExtras.cpp
@@ -15,7 +15,7 @@
#ifdef _MSC_VER
#include <limits>
#else
-#include <math.h>
+#include <cmath>
#endif
namespace llvm {
diff --git a/llvm/lib/Support/Memory.cpp b/llvm/lib/Support/Memory.cpp
index 581484268cd8..f1ba2d0cfe3a 100644
--- a/llvm/lib/Support/Memory.cpp
+++ b/llvm/lib/Support/Memory.cpp
@@ -13,7 +13,6 @@
#include "llvm/Support/Memory.h"
#include "llvm/Config/llvm-config.h"
-#include "llvm/Support/Valgrind.h"
#ifndef NDEBUG
#include "llvm/Support/raw_ostream.h"
diff --git a/llvm/lib/Support/MemoryBuffer.cpp b/llvm/lib/Support/MemoryBuffer.cpp
index 7816779cca1d..9872dfa78b26 100644
--- a/llvm/lib/Support/MemoryBuffer.cpp
+++ b/llvm/lib/Support/MemoryBuffer.cpp
@@ -13,10 +13,9 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Config/config.h"
-#include "llvm/Support/AutoConvert.h"
+#include "llvm/Support/Errc.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Errc.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Process.h"
@@ -32,13 +31,17 @@
#else
#include <io.h>
#endif
+
+#ifdef __MVS__
+#include "llvm/Support/AutoConvert.h"
+#endif
using namespace llvm;
//===----------------------------------------------------------------------===//
// MemoryBuffer implementation itself.
//===----------------------------------------------------------------------===//
-MemoryBuffer::~MemoryBuffer() { }
+MemoryBuffer::~MemoryBuffer() = default;
/// init - Initialize this MemoryBuffer as a reference to externally allocated
/// memory, memory that we know is already null terminated.
@@ -286,6 +289,8 @@ WritableMemoryBuffer::getNewUninitMemBuffer(size_t Size, const Twine &BufferName
StringRef NameRef = BufferName.toStringRef(NameBuf);
size_t AlignedStringLen = alignTo(sizeof(MemBuffer) + NameRef.size() + 1, 16);
size_t RealLen = AlignedStringLen + Size + 1;
+ if (RealLen <= Size) // Check for rollover.
+ return nullptr;
char *Mem = static_cast<char*>(operator new(RealLen, std::nothrow));
if (!Mem)
return nullptr;
@@ -533,4 +538,4 @@ MemoryBufferRef MemoryBuffer::getMemBufferRef() const {
return MemoryBufferRef(Data, Identifier);
}
-SmallVectorMemoryBuffer::~SmallVectorMemoryBuffer() {}
+SmallVectorMemoryBuffer::~SmallVectorMemoryBuffer() = default;
diff --git a/llvm/lib/Support/NativeFormatting.cpp b/llvm/lib/Support/NativeFormatting.cpp
index 0a797046bb68..8a69f7513255 100644
--- a/llvm/lib/Support/NativeFormatting.cpp
+++ b/llvm/lib/Support/NativeFormatting.cpp
@@ -14,6 +14,10 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#if defined(_WIN32) && !defined(__MINGW32__)
+#include <float.h> // For _fpclass in llvm::write_double.
+#endif
+
using namespace llvm;
template<typename T, std::size_t N>
@@ -133,7 +137,7 @@ void llvm::write_hex(raw_ostream &S, uint64_t N, HexPrintStyle Style,
Optional<size_t> Width) {
const size_t kMaxWidth = 128u;
- size_t W = std::min(kMaxWidth, Width.getValueOr(0u));
+ size_t W = std::min(kMaxWidth, Width.value_or(0u));
unsigned Nibbles = (64 - countLeadingZeros(N) + 3) / 4;
bool Prefix = (Style == HexPrintStyle::PrefixLower ||
@@ -161,7 +165,7 @@ void llvm::write_hex(raw_ostream &S, uint64_t N, HexPrintStyle Style,
void llvm::write_double(raw_ostream &S, double N, FloatStyle Style,
Optional<size_t> Precision) {
- size_t Prec = Precision.getValueOr(getDefaultPrecision(Style));
+ size_t Prec = Precision.value_or(getDefaultPrecision(Style));
if (std::isnan(N)) {
S << "nan";
@@ -258,5 +262,5 @@ size_t llvm::getDefaultPrecision(FloatStyle Style) {
case FloatStyle::Percent:
return 2; // Number of decimal places.
}
- LLVM_BUILTIN_UNREACHABLE;
+ llvm_unreachable("Unknown FloatStyle enum");
}
diff --git a/llvm/lib/Support/Parallel.cpp b/llvm/lib/Support/Parallel.cpp
index 4977c188f934..798d7124e7e9 100644
--- a/llvm/lib/Support/Parallel.cpp
+++ b/llvm/lib/Support/Parallel.cpp
@@ -89,7 +89,7 @@ public:
void add(std::function<void()> F) override {
{
std::lock_guard<std::mutex> Lock(Mutex);
- WorkStack.push(F);
+ WorkStack.push(std::move(F));
}
Cond.notify_one();
}
@@ -102,7 +102,7 @@ private:
Cond.wait(Lock, [&] { return Stop || !WorkStack.empty(); });
if (Stop)
break;
- auto Task = WorkStack.top();
+ auto Task = std::move(WorkStack.top());
WorkStack.pop();
Lock.unlock();
Task();
@@ -161,7 +161,7 @@ TaskGroup::~TaskGroup() {
void TaskGroup::spawn(std::function<void()> F) {
if (Parallel) {
L.inc();
- Executor::getDefaultExecutor()->add([&, F] {
+ Executor::getDefaultExecutor()->add([&, F = std::move(F)] {
F();
L.dec();
});
@@ -175,8 +175,8 @@ void TaskGroup::spawn(std::function<void()> F) {
} // namespace llvm
#endif // LLVM_ENABLE_THREADS
-void llvm::parallelForEachN(size_t Begin, size_t End,
- llvm::function_ref<void(size_t)> Fn) {
+void llvm::parallelFor(size_t Begin, size_t End,
+ llvm::function_ref<void(size_t)> Fn) {
// If we have zero or one items, then do not incur the overhead of spinning up
// a task group. They are surprisingly expensive, and because they do not
// support nested parallelism, a single entry task group can block parallel
diff --git a/llvm/lib/Support/Path.cpp b/llvm/lib/Support/Path.cpp
index 63d8d4ee4648..283dc70f2bc9 100644
--- a/llvm/lib/Support/Path.cpp
+++ b/llvm/lib/Support/Path.cpp
@@ -22,7 +22,6 @@
#include "llvm/Support/Process.h"
#include "llvm/Support/Signals.h"
#include <cctype>
-#include <cstring>
#if !defined(_MSC_VER) && !defined(__MINGW32__)
#include <unistd.h>
@@ -761,11 +760,15 @@ bool remove_dots(SmallVectorImpl<char> &the_path, bool remove_dot_dot,
}
}
+ SmallString<256> buffer = root;
+ // "root" could be "/", which may need to be translated into "\".
+ make_preferred(buffer, style);
+ needs_change |= root != buffer;
+
// Avoid rewriting the path unless we have to.
if (!needs_change)
return false;
- SmallString<256> buffer = root;
if (!components.empty()) {
buffer += components[0];
for (StringRef C : makeArrayRef(components).drop_front()) {
@@ -1199,9 +1202,18 @@ Error readNativeFileToEOF(file_t FileHandle, SmallVectorImpl<char> &Buffer,
#include "Windows/Path.inc"
#endif
+bool IsLLVMDriver = false;
+
namespace llvm {
namespace sys {
namespace fs {
+
+std::string getMainExecutable(const char *Argv0, void *MainAddr) {
+ if (IsLLVMDriver)
+ return sys::path::stem(Argv0).str();
+ return getMainExecutableImpl(Argv0, MainAddr);
+}
+
TempFile::TempFile(StringRef Name, int FD)
: TmpName(std::string(Name)), FD(FD) {}
TempFile::TempFile(TempFile &&Other) { *this = std::move(Other); }
diff --git a/llvm/lib/Support/Process.cpp b/llvm/lib/Support/Process.cpp
index 547b3b73eec2..cf3962ae927b 100644
--- a/llvm/lib/Support/Process.cpp
+++ b/llvm/lib/Support/Process.cpp
@@ -42,7 +42,7 @@ Optional<std::string> Process::FindInEnvPath(StringRef EnvName,
assert(!path::is_absolute(FileName));
Optional<std::string> FoundPath;
Optional<std::string> OptPath = Process::GetEnv(EnvName);
- if (!OptPath.hasValue())
+ if (!OptPath)
return FoundPath;
const char EnvPathSeparatorStr[] = {Separator, '\0'};
diff --git a/llvm/lib/Support/Program.cpp b/llvm/lib/Support/Program.cpp
index c7a59642b27e..0560714a6acd 100644
--- a/llvm/lib/Support/Program.cpp
+++ b/llvm/lib/Support/Program.cpp
@@ -14,7 +14,6 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/raw_ostream.h"
-#include <system_error>
using namespace llvm;
using namespace sys;
diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp
index 2b3395b669b8..7fe04af4696b 100644
--- a/llvm/lib/Support/RISCVISAInfo.cpp
+++ b/llvm/lib/Support/RISCVISAInfo.cpp
@@ -37,7 +37,7 @@ struct RISCVSupportedExtension {
} // end anonymous namespace
-static constexpr StringLiteral AllStdExts = "mafdqlcbjtpvn";
+static constexpr StringLiteral AllStdExts = "mafdqlcbkjtpvn";
static const RISCVSupportedExtension SupportedExtensions[] = {
{"i", RISCVExtensionVersion{2, 0}},
@@ -48,9 +48,16 @@ static const RISCVSupportedExtension SupportedExtensions[] = {
{"d", RISCVExtensionVersion{2, 0}},
{"c", RISCVExtensionVersion{2, 0}},
+ {"zihintpause", RISCVExtensionVersion{2, 0}},
+
{"zfhmin", RISCVExtensionVersion{1, 0}},
{"zfh", RISCVExtensionVersion{1, 0}},
+ {"zfinx", RISCVExtensionVersion{1, 0}},
+ {"zdinx", RISCVExtensionVersion{1, 0}},
+ {"zhinxmin", RISCVExtensionVersion{1, 0}},
+ {"zhinx", RISCVExtensionVersion{1, 0}},
+
{"zba", RISCVExtensionVersion{1, 0}},
{"zbb", RISCVExtensionVersion{1, 0}},
{"zbc", RISCVExtensionVersion{1, 0}},
@@ -88,6 +95,10 @@ static const RISCVSupportedExtension SupportedExtensions[] = {
{"zve64x", RISCVExtensionVersion{1, 0}},
{"zve64f", RISCVExtensionVersion{1, 0}},
{"zve64d", RISCVExtensionVersion{1, 0}},
+
+ {"zicbom", RISCVExtensionVersion{1, 0}},
+ {"zicboz", RISCVExtensionVersion{1, 0}},
+ {"zicbop", RISCVExtensionVersion{1, 0}},
};
static const RISCVSupportedExtension SupportedExperimentalExtensions[] = {
@@ -97,6 +108,7 @@ static const RISCVSupportedExtension SupportedExperimentalExtensions[] = {
{"zbp", RISCVExtensionVersion{0, 93}},
{"zbr", RISCVExtensionVersion{0, 93}},
{"zbt", RISCVExtensionVersion{0, 93}},
+ {"zvfh", RISCVExtensionVersion{0, 1}},
};
static bool stripExperimentalPrefix(StringRef &Ext) {
@@ -340,7 +352,7 @@ static Error getExtensionVersion(StringRef Ext, StringRef In, unsigned &Major,
if (!MajorStr.empty() && In.consume_front("p")) {
MinorStr = In.take_while(isDigit);
- In = In.substr(MajorStr.size() + 1);
+ In = In.substr(MajorStr.size() + MinorStr.size() - 1);
// Expected 'p' to be followed by minor version number.
if (MinorStr.empty()) {
@@ -398,8 +410,8 @@ static Error getExtensionVersion(StringRef Ext, StringRef In, unsigned &Major,
if (!MinorStr.empty())
Error += "." + MinorStr.str();
Error += " for experimental extension '" + Ext.str() +
- "'(this compiler supports " + utostr(SupportedVers.Major) + "." +
- utostr(SupportedVers.Minor) + ")";
+ "' (this compiler supports " + utostr(SupportedVers.Major) +
+ "." + utostr(SupportedVers.Minor) + ")";
return createStringError(errc::invalid_argument, Error);
}
return Error::success();
@@ -686,11 +698,11 @@ Error RISCVISAInfo::checkDependency() {
bool HasE = Exts.count("e") != 0;
bool HasD = Exts.count("d") != 0;
bool HasF = Exts.count("f") != 0;
- bool HasZve32x = Exts.count("zve32x") != 0;
+ bool HasZfinx = Exts.count("zfinx") != 0;
+ bool HasZdinx = Exts.count("zdinx") != 0;
+ bool HasVector = Exts.count("zve32x") != 0;
bool HasZve32f = Exts.count("zve32f") != 0;
bool HasZve64d = Exts.count("zve64d") != 0;
- bool HasV = Exts.count("v") != 0;
- bool HasVector = HasZve32x || HasV;
bool HasZvl = MinVLen != 0;
if (HasE && !IsRv32)
@@ -706,17 +718,22 @@ Error RISCVISAInfo::checkDependency() {
return createStringError(errc::invalid_argument,
"d requires f extension to also be specified");
- // FIXME: Consider Zfinx in the future
- if (HasZve32f && !HasF)
+ if (HasZve32f && !HasF && !HasZfinx)
+ return createStringError(
+ errc::invalid_argument,
+ "zve32f requires f or zfinx extension to also be specified");
+
+ if (HasZve64d && !HasD && !HasZdinx)
return createStringError(
errc::invalid_argument,
- "zve32f requires f extension to also be specified");
+ "zve64d requires d or zdinx extension to also be specified");
- // FIXME: Consider Zdinx in the future
- if (HasZve64d && !HasD)
+ if (Exts.count("zvfh") && !Exts.count("zfh") && !Exts.count("zfhmin") &&
+ !Exts.count("zhinx") && !Exts.count("zhinxmin"))
return createStringError(
errc::invalid_argument,
- "zve64d requires d extension to also be specified");
+ "zvfh requires zfh, zfhmin, zhinx or zhinxmin extension to also be "
+ "specified");
if (HasZvl && !HasVector)
return createStringError(
@@ -730,9 +747,12 @@ Error RISCVISAInfo::checkDependency() {
return Error::success();
}
-static const char *ImpliedExtsV[] = {"zvl128b", "f", "d"};
+static const char *ImpliedExtsV[] = {"zvl128b", "zve64d", "f", "d"};
static const char *ImpliedExtsZfhmin[] = {"f"};
static const char *ImpliedExtsZfh[] = {"f"};
+static const char *ImpliedExtsZdinx[] = {"zfinx"};
+static const char *ImpliedExtsZhinxmin[] = {"zfinx"};
+static const char *ImpliedExtsZhinx[] = {"zfinx"};
static const char *ImpliedExtsZve64d[] = {"zve64f"};
static const char *ImpliedExtsZve64f[] = {"zve64x", "zve32f"};
static const char *ImpliedExtsZve64x[] = {"zve32x", "zvl64b"};
@@ -752,6 +772,7 @@ static const char *ImpliedExtsZvl64b[] = {"zvl32b"};
static const char *ImpliedExtsZk[] = {"zkn", "zkt", "zkr"};
static const char *ImpliedExtsZkn[] = {"zbkb", "zbkc", "zbkx", "zkne", "zknd", "zknh"};
static const char *ImpliedExtsZks[] = {"zbkb", "zbkc", "zbkx", "zksed", "zksh"};
+static const char *ImpliedExtsZvfh[] = {"zve32f"};
struct ImpliedExtsEntry {
StringLiteral Name;
@@ -767,8 +788,11 @@ struct ImpliedExtsEntry {
// Note: The table needs to be sorted by name.
static constexpr ImpliedExtsEntry ImpliedExts[] = {
{{"v"}, {ImpliedExtsV}},
+ {{"zdinx"}, {ImpliedExtsZdinx}},
{{"zfh"}, {ImpliedExtsZfh}},
{{"zfhmin"}, {ImpliedExtsZfhmin}},
+ {{"zhinx"}, {ImpliedExtsZhinx}},
+ {{"zhinxmin"}, {ImpliedExtsZhinxmin}},
{{"zk"}, {ImpliedExtsZk}},
{{"zkn"}, {ImpliedExtsZkn}},
{{"zks"}, {ImpliedExtsZks}},
@@ -777,6 +801,7 @@ static constexpr ImpliedExtsEntry ImpliedExts[] = {
{{"zve64d"}, {ImpliedExtsZve64d}},
{{"zve64f"}, {ImpliedExtsZve64f}},
{{"zve64x"}, {ImpliedExtsZve64x}},
+ {{"zvfh"}, {ImpliedExtsZvfh}},
{{"zvl1024b"}, {ImpliedExtsZvl1024b}},
{{"zvl128b"}, {ImpliedExtsZvl128b}},
{{"zvl16384b"}, {ImpliedExtsZvl16384b}},
@@ -826,6 +851,38 @@ void RISCVISAInfo::updateImplication() {
}
}
+struct CombinedExtsEntry {
+ StringLiteral CombineExt;
+ ArrayRef<const char *> RequiredExts;
+};
+
+static constexpr CombinedExtsEntry CombineIntoExts[] = {
+ {{"zk"}, {ImpliedExtsZk}},
+ {{"zkn"}, {ImpliedExtsZkn}},
+ {{"zks"}, {ImpliedExtsZks}},
+};
+
+void RISCVISAInfo::updateCombination() {
+ bool IsNewCombine = false;
+ do {
+ IsNewCombine = false;
+ for (CombinedExtsEntry CombineIntoExt : CombineIntoExts) {
+ auto CombineExt = CombineIntoExt.CombineExt;
+ auto RequiredExts = CombineIntoExt.RequiredExts;
+ if (hasExtension(CombineExt))
+ continue;
+ bool IsAllRequiredFeatureExist = true;
+ for (const char *Ext : RequiredExts)
+ IsAllRequiredFeatureExist &= hasExtension(Ext);
+ if (IsAllRequiredFeatureExist) {
+ auto Version = findDefaultVersion(CombineExt);
+ addExtension(CombineExt, Version->Major, Version->Minor);
+ IsNewCombine = true;
+ }
+ }
+ } while (IsNewCombine);
+}
+
void RISCVISAInfo::updateFLen() {
FLen = 0;
// TODO: Handle q extension.
@@ -862,11 +919,6 @@ void RISCVISAInfo::updateMaxELen() {
ExtName.getAsInteger(10, ZveELen);
MaxELen = std::max(MaxELen, ZveELen);
}
- if (ExtName == "v") {
- MaxELenFp = 64;
- MaxELen = 64;
- return;
- }
}
}
@@ -904,6 +956,7 @@ std::vector<std::string> RISCVISAInfo::toFeatureVector() const {
llvm::Expected<std::unique_ptr<RISCVISAInfo>>
RISCVISAInfo::postProcessAndChecking(std::unique_ptr<RISCVISAInfo> &&ISAInfo) {
ISAInfo->updateImplication();
+ ISAInfo->updateCombination();
ISAInfo->updateFLen();
ISAInfo->updateMinVLen();
ISAInfo->updateMaxELen();
@@ -912,3 +965,18 @@ RISCVISAInfo::postProcessAndChecking(std::unique_ptr<RISCVISAInfo> &&ISAInfo) {
return std::move(Result);
return std::move(ISAInfo);
}
+
+StringRef RISCVISAInfo::computeDefaultABI() const {
+ if (XLen == 32) {
+ if (hasExtension("d"))
+ return "ilp32d";
+ if (hasExtension("e"))
+ return "ilp32e";
+ return "ilp32";
+ } else if (XLen == 64) {
+ if (hasExtension("d"))
+ return "lp64d";
+ return "lp64";
+ }
+ llvm_unreachable("Invalid XLEN");
+}
diff --git a/llvm/lib/Support/SHA1.cpp b/llvm/lib/Support/SHA1.cpp
index 5dce44af9ecd..52bae700350d 100644
--- a/llvm/lib/Support/SHA1.cpp
+++ b/llvm/lib/Support/SHA1.cpp
@@ -263,7 +263,7 @@ void SHA1::pad() {
addUncounted(InternalState.ByteCount << 3);
}
-StringRef SHA1::final() {
+void SHA1::final(std::array<uint32_t, HASH_LENGTH / 4> &HashResult) {
// Pad to complete the last block
pad();
@@ -281,12 +281,19 @@ StringRef SHA1::final() {
(((InternalState.State[i]) >> 24) & 0x000000ff);
}
#endif
+}
- // Return pointer to hash (20 characters)
- return StringRef((char *)HashResult, HASH_LENGTH);
+std::array<uint8_t, 20> SHA1::final() {
+ union {
+ std::array<uint32_t, HASH_LENGTH / 4> HashResult;
+ std::array<uint8_t, HASH_LENGTH> ReturnResult;
+ };
+ static_assert(sizeof(HashResult) == sizeof(ReturnResult), "");
+ final(HashResult);
+ return ReturnResult;
}
-StringRef SHA1::result() {
+std::array<uint8_t, 20> SHA1::result() {
auto StateToRestore = InternalState;
auto Hash = final();
@@ -301,9 +308,5 @@ StringRef SHA1::result() {
std::array<uint8_t, 20> SHA1::hash(ArrayRef<uint8_t> Data) {
SHA1 Hash;
Hash.update(Data);
- StringRef S = Hash.final();
-
- std::array<uint8_t, 20> Arr;
- memcpy(Arr.data(), S.data(), S.size());
- return Arr;
+ return Hash.final();
}
diff --git a/llvm/lib/Support/SHA256.cpp b/llvm/lib/Support/SHA256.cpp
index 3b81506847ec..81d897fb4187 100644
--- a/llvm/lib/Support/SHA256.cpp
+++ b/llvm/lib/Support/SHA256.cpp
@@ -243,7 +243,7 @@ void SHA256::pad() {
addUncounted(len);
}
-StringRef SHA256::final() {
+void SHA256::final(std::array<uint32_t, HASH_LENGTH / 4> &HashResult) {
// Pad to complete the last block
pad();
@@ -261,12 +261,19 @@ StringRef SHA256::final() {
(((InternalState.State[i]) >> 24) & 0x000000ff);
}
#endif
+}
- // Return pointer to hash (32 characters)
- return StringRef((char *)HashResult, HASH_LENGTH);
+std::array<uint8_t, 32> SHA256::final() {
+ union {
+ std::array<uint32_t, HASH_LENGTH / 4> HashResult;
+ std::array<uint8_t, HASH_LENGTH> ReturnResult;
+ };
+ static_assert(sizeof(HashResult) == sizeof(ReturnResult), "");
+ final(HashResult);
+ return ReturnResult;
}
-StringRef SHA256::result() {
+std::array<uint8_t, 32> SHA256::result() {
auto StateToRestore = InternalState;
auto Hash = final();
@@ -281,11 +288,7 @@ StringRef SHA256::result() {
std::array<uint8_t, 32> SHA256::hash(ArrayRef<uint8_t> Data) {
SHA256 Hash;
Hash.update(Data);
- StringRef S = Hash.final();
-
- std::array<uint8_t, 32> Arr;
- memcpy(Arr.data(), S.data(), S.size());
- return Arr;
+ return Hash.final();
}
} // namespace llvm
diff --git a/llvm/lib/Support/ScopedPrinter.cpp b/llvm/lib/Support/ScopedPrinter.cpp
index a434e50e8c1f..ef6dd5fdf1d6 100644
--- a/llvm/lib/Support/ScopedPrinter.cpp
+++ b/llvm/lib/Support/ScopedPrinter.cpp
@@ -7,17 +7,10 @@ using namespace llvm::support;
namespace llvm {
raw_ostream &operator<<(raw_ostream &OS, const HexNumber &Value) {
- OS << "0x" << to_hexString(Value.Value);
+ OS << "0x" << utohexstr(Value.Value);
return OS;
}
-std::string to_hexString(uint64_t Value, bool UpperCase) {
- std::string number;
- llvm::raw_string_ostream stream(number);
- stream << format_hex_no_prefix(Value, 1, UpperCase);
- return stream.str();
-}
-
void ScopedPrinter::printBinaryImpl(StringRef Label, StringRef Str,
ArrayRef<uint8_t> Data, bool Block,
uint32_t StartOffset) {
diff --git a/llvm/lib/Support/Signals.cpp b/llvm/lib/Support/Signals.cpp
index 1d61f2bf7525..a6fd845da869 100644
--- a/llvm/lib/Support/Signals.cpp
+++ b/llvm/lib/Support/Signals.cpp
@@ -15,7 +15,6 @@
#include "DebugOptions.h"
-#include "llvm/ADT/STLArrayExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/CommandLine.h"
@@ -23,15 +22,14 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/FileUtilities.h"
#include "llvm/Support/Format.h"
-#include "llvm/Support/FormatAdapters.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Mutex.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/StringSaver.h"
#include "llvm/Support/raw_ostream.h"
+#include <array>
#include <vector>
//===----------------------------------------------------------------------===//
@@ -83,12 +81,20 @@ struct CallbackAndCookie {
enum class Status { Empty, Initializing, Initialized, Executing };
std::atomic<Status> Flag;
};
+
static constexpr size_t MaxSignalHandlerCallbacks = 8;
-static CallbackAndCookie CallBacksToRun[MaxSignalHandlerCallbacks];
+
+// A global array of CallbackAndCookie may not compile with
+// -Werror=global-constructors in c++20 and above
+static std::array<CallbackAndCookie, MaxSignalHandlerCallbacks> &
+CallBacksToRun() {
+ static std::array<CallbackAndCookie, MaxSignalHandlerCallbacks> callbacks;
+ return callbacks;
+}
// Signal-safe.
void sys::RunSignalHandlers() {
- for (CallbackAndCookie &RunMe : CallBacksToRun) {
+ for (CallbackAndCookie &RunMe : CallBacksToRun()) {
auto Expected = CallbackAndCookie::Status::Initialized;
auto Desired = CallbackAndCookie::Status::Executing;
if (!RunMe.Flag.compare_exchange_strong(Expected, Desired))
@@ -103,7 +109,7 @@ void sys::RunSignalHandlers() {
// Signal-safe.
static void insertSignalHandler(sys::SignalHandlerCallback FnPtr,
void *Cookie) {
- for (CallbackAndCookie &SetMe : CallBacksToRun) {
+ for (CallbackAndCookie &SetMe : CallBacksToRun()) {
auto Expected = CallbackAndCookie::Status::Empty;
auto Desired = CallbackAndCookie::Status::Initializing;
if (!SetMe.Flag.compare_exchange_strong(Expected, Desired))
diff --git a/llvm/lib/Support/Signposts.cpp b/llvm/lib/Support/Signposts.cpp
index 074dddc81c80..232b84e965a0 100644
--- a/llvm/lib/Support/Signposts.cpp
+++ b/llvm/lib/Support/Signposts.cpp
@@ -7,8 +7,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/Signposts.h"
-
+#include "llvm/ADT/StringRef.h"
#include "llvm/Config/config.h"
+
#if LLVM_SUPPORT_XCODE_SIGNPOSTS
#include "llvm/ADT/DenseMap.h"
#include "llvm/Support/Mutex.h"
@@ -24,7 +25,7 @@ using namespace llvm;
namespace {
os_log_t *LogCreator() {
os_log_t *X = new os_log_t;
- *X = os_log_create("org.llvm.signposts", OS_LOG_CATEGORY_POINTS_OF_INTEREST);
+ *X = os_log_create("org.llvm.signposts", "toolchain");
return X;
}
struct LogDeleter {
diff --git a/llvm/lib/Support/SourceMgr.cpp b/llvm/lib/Support/SourceMgr.cpp
index 2eb2989b200b..42982b4c8e6c 100644
--- a/llvm/lib/Support/SourceMgr.cpp
+++ b/llvm/lib/Support/SourceMgr.cpp
@@ -40,6 +40,17 @@ static const size_t TabStop = 8;
unsigned SourceMgr::AddIncludeFile(const std::string &Filename,
SMLoc IncludeLoc,
std::string &IncludedFile) {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> NewBufOrErr =
+ OpenIncludeFile(Filename, IncludedFile);
+ if (!NewBufOrErr)
+ return 0;
+
+ return AddNewSourceBuffer(std::move(*NewBufOrErr), IncludeLoc);
+}
+
+ErrorOr<std::unique_ptr<MemoryBuffer>>
+SourceMgr::OpenIncludeFile(const std::string &Filename,
+ std::string &IncludedFile) {
IncludedFile = Filename;
ErrorOr<std::unique_ptr<MemoryBuffer>> NewBufOrErr =
MemoryBuffer::getFile(IncludedFile);
@@ -52,10 +63,7 @@ unsigned SourceMgr::AddIncludeFile(const std::string &Filename,
NewBufOrErr = MemoryBuffer::getFile(IncludedFile);
}
- if (!NewBufOrErr)
- return 0;
-
- return AddNewSourceBuffer(std::move(*NewBufOrErr), IncludeLoc);
+ return NewBufOrErr;
}
unsigned SourceMgr::FindBufferContainingLoc(SMLoc Loc) const {
diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp
index 137b37f2b1c3..0fb65accbf1d 100644
--- a/llvm/lib/Support/SpecialCaseList.cpp
+++ b/llvm/lib/Support/SpecialCaseList.cpp
@@ -198,7 +198,7 @@ bool SpecialCaseList::parse(const MemoryBuffer *MB,
return true;
}
-SpecialCaseList::~SpecialCaseList() {}
+SpecialCaseList::~SpecialCaseList() = default;
bool SpecialCaseList::inSection(StringRef Section, StringRef Prefix,
StringRef Query, StringRef Category) const {
diff --git a/llvm/lib/Support/Statistic.cpp b/llvm/lib/Support/Statistic.cpp
index 95ee885d2f8f..ec12118650c1 100644
--- a/llvm/lib/Support/Statistic.cpp
+++ b/llvm/lib/Support/Statistic.cpp
@@ -192,7 +192,7 @@ void llvm::PrintStatistics(raw_ostream &OS) {
// Print all of the statistics.
for (TrackingStatistic *Stat : Stats.Stats)
- OS << format("%*u %-*s - %s\n", MaxValLen, Stat->getValue(),
+ OS << format("%*" PRIu64 " %-*s - %s\n", MaxValLen, Stat->getValue(),
MaxDebugTypeLen, Stat->getDebugType(), Stat->getDesc());
OS << '\n'; // Flush the output stream.
@@ -253,9 +253,9 @@ void llvm::PrintStatistics() {
#endif
}
-const std::vector<std::pair<StringRef, unsigned>> llvm::GetStatistics() {
+const std::vector<std::pair<StringRef, uint64_t>> llvm::GetStatistics() {
sys::SmartScopedLock<true> Reader(*StatLock);
- std::vector<std::pair<StringRef, unsigned>> ReturnStats;
+ std::vector<std::pair<StringRef, uint64_t>> ReturnStats;
for (const auto &Stat : StatInfo->statistics())
ReturnStats.emplace_back(Stat->getName(), Stat->getValue());
diff --git a/llvm/lib/Support/StringMap.cpp b/llvm/lib/Support/StringMap.cpp
index 012c785b4351..9b2f96fca2cd 100644
--- a/llvm/lib/Support/StringMap.cpp
+++ b/llvm/lib/Support/StringMap.cpp
@@ -18,7 +18,7 @@ using namespace llvm;
/// Returns the number of buckets to allocate to ensure that the DenseMap can
/// accommodate \p NumEntries without need to grow().
-static unsigned getMinBucketToReserveForEntries(unsigned NumEntries) {
+static inline unsigned getMinBucketToReserveForEntries(unsigned NumEntries) {
// Ensure that "NumEntries * 4 < NumBuckets * 3"
if (NumEntries == 0)
return 0;
@@ -27,6 +27,21 @@ static unsigned getMinBucketToReserveForEntries(unsigned NumEntries) {
return NextPowerOf2(NumEntries * 4 / 3 + 1);
}
+static inline StringMapEntryBase **createTable(unsigned NewNumBuckets) {
+ auto **Table = static_cast<StringMapEntryBase **>(safe_calloc(
+ NewNumBuckets + 1, sizeof(StringMapEntryBase **) + sizeof(unsigned)));
+
+ // Allocate one extra bucket, set it to look filled so the iterators stop at
+ // end.
+ Table[NewNumBuckets] = (StringMapEntryBase *)2;
+ return Table;
+}
+
+static inline unsigned *getHashTable(StringMapEntryBase **TheTable,
+ unsigned NumBuckets) {
+ return reinterpret_cast<unsigned *>(TheTable + NumBuckets + 1);
+}
+
StringMapImpl::StringMapImpl(unsigned InitSize, unsigned itemSize) {
ItemSize = itemSize;
@@ -54,15 +69,10 @@ void StringMapImpl::init(unsigned InitSize) {
NumItems = 0;
NumTombstones = 0;
- TheTable = static_cast<StringMapEntryBase **>(safe_calloc(
- NewNumBuckets + 1, sizeof(StringMapEntryBase **) + sizeof(unsigned)));
+ TheTable = createTable(NewNumBuckets);
// Set the member only if TheTable was successfully allocated
NumBuckets = NewNumBuckets;
-
- // Allocate one extra bucket, set it to look filled so the iterators stop at
- // end.
- TheTable[NumBuckets] = (StringMapEntryBase *)2;
}
/// LookupBucketFor - Look up the bucket that the specified string should end
@@ -71,14 +81,12 @@ void StringMapImpl::init(unsigned InitSize) {
/// case, the FullHashValue field of the bucket will be set to the hash value
/// of the string.
unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
- unsigned HTSize = NumBuckets;
- if (HTSize == 0) { // Hash table unallocated so far?
+ // Hash table unallocated so far?
+ if (NumBuckets == 0)
init(16);
- HTSize = NumBuckets;
- }
unsigned FullHashValue = djbHash(Name, 0);
- unsigned BucketNo = FullHashValue & (HTSize - 1);
- unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
+ unsigned BucketNo = FullHashValue & (NumBuckets - 1);
+ unsigned *HashTable = getHashTable(TheTable, NumBuckets);
unsigned ProbeAmt = 1;
int FirstTombstone = -1;
@@ -117,7 +125,7 @@ unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
}
// Okay, we didn't find the item. Probe to the next bucket.
- BucketNo = (BucketNo + ProbeAmt) & (HTSize - 1);
+ BucketNo = (BucketNo + ProbeAmt) & (NumBuckets - 1);
// Use quadratic probing, it has fewer clumping artifacts than linear
// probing and has good cache behavior in the common case.
@@ -129,12 +137,11 @@ unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
/// in the map, return the bucket number of the key. Otherwise return -1.
/// This does not modify the map.
int StringMapImpl::FindKey(StringRef Key) const {
- unsigned HTSize = NumBuckets;
- if (HTSize == 0)
+ if (NumBuckets == 0)
return -1; // Really empty table?
unsigned FullHashValue = djbHash(Key, 0);
- unsigned BucketNo = FullHashValue & (HTSize - 1);
- unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
+ unsigned BucketNo = FullHashValue & (NumBuckets - 1);
+ unsigned *HashTable = getHashTable(TheTable, NumBuckets);
unsigned ProbeAmt = 1;
while (true) {
@@ -161,7 +168,7 @@ int StringMapImpl::FindKey(StringRef Key) const {
}
// Okay, we didn't find the item. Probe to the next bucket.
- BucketNo = (BucketNo + ProbeAmt) & (HTSize - 1);
+ BucketNo = (BucketNo + ProbeAmt) & (NumBuckets - 1);
// Use quadratic probing, it has fewer clumping artifacts than linear
// probing and has good cache behavior in the common case.
@@ -198,8 +205,6 @@ StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) {
/// the appropriate mod-of-hashtable-size.
unsigned StringMapImpl::RehashTable(unsigned BucketNo) {
unsigned NewSize;
- unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
-
// If the hash table is now more than 3/4 full, or if fewer than 1/8 of
// the buckets are empty (meaning that many are filled with tombstones),
// grow/rehash the table.
@@ -213,36 +218,25 @@ unsigned StringMapImpl::RehashTable(unsigned BucketNo) {
}
unsigned NewBucketNo = BucketNo;
- // Allocate one extra bucket which will always be non-empty. This allows the
- // iterators to stop at end.
- auto NewTableArray = static_cast<StringMapEntryBase **>(safe_calloc(
- NewSize + 1, sizeof(StringMapEntryBase *) + sizeof(unsigned)));
-
- unsigned *NewHashArray = (unsigned *)(NewTableArray + NewSize + 1);
- NewTableArray[NewSize] = (StringMapEntryBase *)2;
+ auto **NewTableArray = createTable(NewSize);
+ unsigned *NewHashArray = getHashTable(NewTableArray, NewSize);
+ unsigned *HashTable = getHashTable(TheTable, NumBuckets);
// Rehash all the items into their new buckets. Luckily :) we already have
// the hash values available, so we don't have to rehash any strings.
for (unsigned I = 0, E = NumBuckets; I != E; ++I) {
StringMapEntryBase *Bucket = TheTable[I];
if (Bucket && Bucket != getTombstoneVal()) {
- // Fast case, bucket available.
+ // If the bucket is not available, probe for a spot.
unsigned FullHash = HashTable[I];
unsigned NewBucket = FullHash & (NewSize - 1);
- if (!NewTableArray[NewBucket]) {
- NewTableArray[FullHash & (NewSize - 1)] = Bucket;
- NewHashArray[FullHash & (NewSize - 1)] = FullHash;
- if (I == BucketNo)
- NewBucketNo = NewBucket;
- continue;
+ if (NewTableArray[NewBucket]) {
+ unsigned ProbeSize = 1;
+ do {
+ NewBucket = (NewBucket + ProbeSize++) & (NewSize - 1);
+ } while (NewTableArray[NewBucket]);
}
- // Otherwise probe for a spot.
- unsigned ProbeSize = 1;
- do {
- NewBucket = (NewBucket + ProbeSize++) & (NewSize - 1);
- } while (NewTableArray[NewBucket]);
-
// Finally found a slot. Fill it in.
NewTableArray[NewBucket] = Bucket;
NewHashArray[NewBucket] = FullHash;
diff --git a/llvm/lib/Support/StringRef.cpp b/llvm/lib/Support/StringRef.cpp
index 3ed08ed38661..096b2d2d8c07 100644
--- a/llvm/lib/Support/StringRef.cpp
+++ b/llvm/lib/Support/StringRef.cpp
@@ -98,6 +98,13 @@ unsigned StringRef::edit_distance(llvm::StringRef Other,
AllowReplacements, MaxEditDistance);
}
+unsigned llvm::StringRef::edit_distance_insensitive(
+ StringRef Other, bool AllowReplacements, unsigned MaxEditDistance) const {
+ return llvm::ComputeMappedEditDistance(
+ makeArrayRef(data(), size()), makeArrayRef(Other.data(), Other.size()),
+ llvm::toLower, AllowReplacements, MaxEditDistance);
+}
+
//===----------------------------------------------------------------------===//
// String Operations
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Support/TargetParser.cpp b/llvm/lib/Support/TargetParser.cpp
index 0105cd2e8153..e5590d458fed 100644
--- a/llvm/lib/Support/TargetParser.cpp
+++ b/llvm/lib/Support/TargetParser.cpp
@@ -104,6 +104,7 @@ constexpr GPUInfo AMDGCNGPUs[] = {
{{"gfx909"}, {"gfx909"}, GK_GFX909, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
{{"gfx90a"}, {"gfx90a"}, GK_GFX90A, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
{{"gfx90c"}, {"gfx90c"}, GK_GFX90C, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
+ {{"gfx940"}, {"gfx940"}, GK_GFX940, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
{{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK},
{{"gfx1011"}, {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK},
{{"gfx1012"}, {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK},
@@ -114,6 +115,11 @@ constexpr GPUInfo AMDGCNGPUs[] = {
{{"gfx1033"}, {"gfx1033"}, GK_GFX1033, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
{{"gfx1034"}, {"gfx1034"}, GK_GFX1034, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
{{"gfx1035"}, {"gfx1035"}, GK_GFX1035, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
+ {{"gfx1036"}, {"gfx1036"}, GK_GFX1036, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
+ {{"gfx1100"}, {"gfx1100"}, GK_GFX1100, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
+ {{"gfx1101"}, {"gfx1101"}, GK_GFX1101, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
+ {{"gfx1102"}, {"gfx1102"}, GK_GFX1102, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
+ {{"gfx1103"}, {"gfx1103"}, GK_GFX1103, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
};
const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef<GPUInfo> Table) {
@@ -217,6 +223,7 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
case GK_GFX909: return {9, 0, 9};
case GK_GFX90A: return {9, 0, 10};
case GK_GFX90C: return {9, 0, 12};
+ case GK_GFX940: return {9, 4, 0};
case GK_GFX1010: return {10, 1, 0};
case GK_GFX1011: return {10, 1, 1};
case GK_GFX1012: return {10, 1, 2};
@@ -227,6 +234,11 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
case GK_GFX1033: return {10, 3, 3};
case GK_GFX1034: return {10, 3, 4};
case GK_GFX1035: return {10, 3, 5};
+ case GK_GFX1036: return {10, 3, 6};
+ case GK_GFX1100: return {11, 0, 0};
+ case GK_GFX1101: return {11, 0, 1};
+ case GK_GFX1102: return {11, 0, 2};
+ case GK_GFX1103: return {11, 0, 3};
default: return {0, 0, 0};
}
}
@@ -329,21 +341,6 @@ bool getCPUFeaturesExceptStdExt(CPUKind Kind,
return true;
}
-StringRef computeDefaultABIFromArch(const llvm::RISCVISAInfo &ISAInfo) {
- if (ISAInfo.getXLen() == 32) {
- if (ISAInfo.hasExtension("d"))
- return "ilp32d";
- if (ISAInfo.hasExtension("e"))
- return "ilp32e";
- return "ilp32";
- } else if (ISAInfo.getXLen() == 64) {
- if (ISAInfo.hasExtension("d"))
- return "lp64d";
- return "lp64";
- }
- llvm_unreachable("Invalid XLEN");
-}
-
} // namespace RISCV
} // namespace llvm
diff --git a/llvm/lib/Support/ThreadPool.cpp b/llvm/lib/Support/ThreadPool.cpp
index 9f92ae1c7a7c..31461e31c65c 100644
--- a/llvm/lib/Support/ThreadPool.cpp
+++ b/llvm/lib/Support/ThreadPool.cpp
@@ -24,11 +24,19 @@ using namespace llvm;
#if LLVM_ENABLE_THREADS
+// A note on thread groups: Tasks are by default in no group (represented
+// by nullptr ThreadPoolTaskGroup pointer in the Tasks queue) and functionality
+// here normally works on all tasks regardless of their group (functions
+// in that case receive nullptr ThreadPoolTaskGroup pointer as argument).
+// A task in a group has a pointer to that ThreadPoolTaskGroup in the Tasks
+// queue, and functions called to work only on tasks from one group take that
+// pointer.
+
ThreadPool::ThreadPool(ThreadPoolStrategy S)
: Strategy(S), MaxThreadCount(S.compute_thread_count()) {}
void ThreadPool::grow(int requested) {
- std::unique_lock<std::mutex> LockGuard(ThreadsLock);
+ llvm::sys::ScopedWriter LockGuard(ThreadsLock);
if (Threads.size() >= MaxThreadCount)
return; // Already hit the max thread pool size.
int newThreadCount = std::min<int>(requested, MaxThreadCount);
@@ -36,52 +44,129 @@ void ThreadPool::grow(int requested) {
int ThreadID = Threads.size();
Threads.emplace_back([this, ThreadID] {
Strategy.apply_thread_strategy(ThreadID);
- while (true) {
- std::function<void()> Task;
- {
- std::unique_lock<std::mutex> LockGuard(QueueLock);
- // Wait for tasks to be pushed in the queue
- QueueCondition.wait(LockGuard,
- [&] { return !EnableFlag || !Tasks.empty(); });
- // Exit condition
- if (!EnableFlag && Tasks.empty())
- return;
- // Yeah, we have a task, grab it and release the lock on the queue
-
- // We first need to signal that we are active before popping the queue
- // in order for wait() to properly detect that even if the queue is
- // empty, there is still a task in flight.
- ++ActiveThreads;
- Task = std::move(Tasks.front());
- Tasks.pop();
- }
- // Run the task we just grabbed
- Task();
-
- bool Notify;
- {
- // Adjust `ActiveThreads`, in case someone waits on ThreadPool::wait()
- std::lock_guard<std::mutex> LockGuard(QueueLock);
- --ActiveThreads;
- Notify = workCompletedUnlocked();
- }
- // Notify task completion if this is the last active thread, in case
- // someone waits on ThreadPool::wait().
- if (Notify)
- CompletionCondition.notify_all();
- }
+ processTasks(nullptr);
});
}
}
+#ifndef NDEBUG
+// The group of the tasks run by the current thread.
+static LLVM_THREAD_LOCAL std::vector<ThreadPoolTaskGroup *>
+ *CurrentThreadTaskGroups = nullptr;
+#endif
+
+// WaitingForGroup == nullptr means all tasks regardless of their group.
+void ThreadPool::processTasks(ThreadPoolTaskGroup *WaitingForGroup) {
+ while (true) {
+ std::function<void()> Task;
+ ThreadPoolTaskGroup *GroupOfTask;
+ {
+ std::unique_lock<std::mutex> LockGuard(QueueLock);
+ bool workCompletedForGroup = false; // Result of workCompletedUnlocked()
+ // Wait for tasks to be pushed in the queue
+ QueueCondition.wait(LockGuard, [&] {
+ return !EnableFlag || !Tasks.empty() ||
+ (WaitingForGroup != nullptr &&
+ (workCompletedForGroup =
+ workCompletedUnlocked(WaitingForGroup)));
+ });
+ // Exit condition
+ if (!EnableFlag && Tasks.empty())
+ return;
+ if (WaitingForGroup != nullptr && workCompletedForGroup)
+ return;
+ // Yeah, we have a task, grab it and release the lock on the queue
+
+ // We first need to signal that we are active before popping the queue
+ // in order for wait() to properly detect that even if the queue is
+ // empty, there is still a task in flight.
+ ++ActiveThreads;
+ Task = std::move(Tasks.front().first);
+ GroupOfTask = Tasks.front().second;
+ // Need to count active threads in each group separately, ActiveThreads
+ // would never be 0 if waiting for another group inside a wait.
+ if (GroupOfTask != nullptr)
+ ++ActiveGroups[GroupOfTask]; // Increment or set to 1 if new item
+ Tasks.pop_front();
+ }
+#ifndef NDEBUG
+ if (CurrentThreadTaskGroups == nullptr)
+ CurrentThreadTaskGroups = new std::vector<ThreadPoolTaskGroup *>;
+ CurrentThreadTaskGroups->push_back(GroupOfTask);
+#endif
+
+ // Run the task we just grabbed
+ Task();
+
+#ifndef NDEBUG
+ CurrentThreadTaskGroups->pop_back();
+ if (CurrentThreadTaskGroups->empty()) {
+ delete CurrentThreadTaskGroups;
+ CurrentThreadTaskGroups = nullptr;
+ }
+#endif
+
+ bool Notify;
+ bool NotifyGroup;
+ {
+ // Adjust `ActiveThreads`, in case someone waits on ThreadPool::wait()
+ std::lock_guard<std::mutex> LockGuard(QueueLock);
+ --ActiveThreads;
+ if (GroupOfTask != nullptr) {
+ auto A = ActiveGroups.find(GroupOfTask);
+ if (--(A->second) == 0)
+ ActiveGroups.erase(A);
+ }
+ Notify = workCompletedUnlocked(GroupOfTask);
+ NotifyGroup = GroupOfTask != nullptr && Notify;
+ }
+ // Notify task completion if this is the last active thread, in case
+ // someone waits on ThreadPool::wait().
+ if (Notify)
+ CompletionCondition.notify_all();
+ // If this was a task in a group, notify also threads waiting for tasks
+ // in this function on QueueCondition, to make a recursive wait() return
+ // after the group it's been waiting for has finished.
+ if (NotifyGroup)
+ QueueCondition.notify_all();
+ }
+}
+
+bool ThreadPool::workCompletedUnlocked(ThreadPoolTaskGroup *Group) const {
+ if (Group == nullptr)
+ return !ActiveThreads && Tasks.empty();
+ return ActiveGroups.count(Group) == 0 &&
+ !llvm::any_of(Tasks,
+ [Group](const auto &T) { return T.second == Group; });
+}
+
void ThreadPool::wait() {
+ assert(!isWorkerThread()); // Would deadlock waiting for itself.
// Wait for all threads to complete and the queue to be empty
std::unique_lock<std::mutex> LockGuard(QueueLock);
- CompletionCondition.wait(LockGuard, [&] { return workCompletedUnlocked(); });
+ CompletionCondition.wait(LockGuard,
+ [&] { return workCompletedUnlocked(nullptr); });
+}
+
+void ThreadPool::wait(ThreadPoolTaskGroup &Group) {
+ // Wait for all threads in the group to complete.
+ if (!isWorkerThread()) {
+ std::unique_lock<std::mutex> LockGuard(QueueLock);
+ CompletionCondition.wait(LockGuard,
+ [&] { return workCompletedUnlocked(&Group); });
+ return;
+ }
+ // Make sure to not deadlock waiting for oneself.
+ assert(CurrentThreadTaskGroups == nullptr ||
+ !llvm::is_contained(*CurrentThreadTaskGroups, &Group));
+ // Handle the case of recursive call from another task in a different group,
+ // in which case process tasks while waiting to keep the thread busy and avoid
+ // possible deadlock.
+ processTasks(&Group);
}
bool ThreadPool::isWorkerThread() const {
- std::unique_lock<std::mutex> LockGuard(ThreadsLock);
+ llvm::sys::ScopedReader LockGuard(ThreadsLock);
llvm::thread::id CurrentThreadId = llvm::this_thread::get_id();
for (const llvm::thread &Thread : Threads)
if (CurrentThreadId == Thread.get_id())
@@ -96,7 +181,7 @@ ThreadPool::~ThreadPool() {
EnableFlag = false;
}
QueueCondition.notify_all();
- std::unique_lock<std::mutex> LockGuard(ThreadsLock);
+ llvm::sys::ScopedReader LockGuard(ThreadsLock);
for (auto &Worker : Threads)
Worker.join();
}
@@ -115,12 +200,18 @@ ThreadPool::ThreadPool(ThreadPoolStrategy S) : MaxThreadCount(1) {
void ThreadPool::wait() {
// Sequential implementation running the tasks
while (!Tasks.empty()) {
- auto Task = std::move(Tasks.front());
- Tasks.pop();
+ auto Task = std::move(Tasks.front().first);
+ Tasks.pop_front();
Task();
}
}
+void ThreadPool::wait(ThreadPoolTaskGroup &) {
+ // Simply wait for all, this works even if recursive (the running task
+ // is already removed from the queue).
+ wait();
+}
+
bool ThreadPool::isWorkerThread() const {
report_fatal_error("LLVM compiled without multithreading");
}
diff --git a/llvm/lib/Support/TrigramIndex.cpp b/llvm/lib/Support/TrigramIndex.cpp
index 4370adc9c3e0..40a20ccc6583 100644
--- a/llvm/lib/Support/TrigramIndex.cpp
+++ b/llvm/lib/Support/TrigramIndex.cpp
@@ -15,6 +15,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/TrigramIndex.h"
+#include "llvm/ADT/StringRef.h"
#include <set>
using namespace llvm;
diff --git a/llvm/lib/Support/Triple.cpp b/llvm/lib/Support/Triple.cpp
index a9afcc9db96a..6696d158b2c1 100644
--- a/llvm/lib/Support/Triple.cpp
+++ b/llvm/lib/Support/Triple.cpp
@@ -37,6 +37,7 @@ StringRef Triple::getArchTypeName(ArchType Kind) {
case bpfeb: return "bpfeb";
case bpfel: return "bpfel";
case csky: return "csky";
+ case dxil: return "dxil";
case hexagon: return "hexagon";
case hsail64: return "hsail64";
case hsail: return "hsail";
@@ -44,6 +45,8 @@ StringRef Triple::getArchTypeName(ArchType Kind) {
case lanai: return "lanai";
case le32: return "le32";
case le64: return "le64";
+ case loongarch32: return "loongarch32";
+ case loongarch64: return "loongarch64";
case m68k: return "m68k";
case mips64: return "mips64";
case mips64el: return "mips64el";
@@ -164,6 +167,11 @@ StringRef Triple::getArchTypePrefix(ArchType Kind) {
case ve: return "ve";
case csky: return "csky";
+
+ case loongarch32:
+ case loongarch64: return "loongarch";
+
+ case dxil: return "dx";
}
}
@@ -203,6 +211,7 @@ StringRef Triple::getOSTypeName(OSType Kind) {
case Contiki: return "contiki";
case Darwin: return "darwin";
case DragonFly: return "dragonfly";
+ case DriverKit: return "driverkit";
case ELFIAMCU: return "elfiamcu";
case Emscripten: return "emscripten";
case FreeBSD: return "freebsd";
@@ -222,6 +231,7 @@ StringRef Triple::getOSTypeName(OSType Kind) {
case NetBSD: return "netbsd";
case OpenBSD: return "openbsd";
case PS4: return "ps4";
+ case PS5: return "ps5";
case RTEMS: return "rtems";
case Solaris: return "solaris";
case TvOS: return "tvos";
@@ -229,6 +239,7 @@ StringRef Triple::getOSTypeName(OSType Kind) {
case WatchOS: return "watchos";
case Win32: return "windows";
case ZOS: return "zos";
+ case ShaderModel: return "shadermodel";
}
llvm_unreachable("Invalid OSType");
@@ -258,6 +269,21 @@ StringRef Triple::getEnvironmentTypeName(EnvironmentType Kind) {
case MuslEABIHF: return "musleabihf";
case MuslX32: return "muslx32";
case Simulator: return "simulator";
+ case Pixel: return "pixel";
+ case Vertex: return "vertex";
+ case Geometry: return "geometry";
+ case Hull: return "hull";
+ case Domain: return "domain";
+ case Compute: return "compute";
+ case Library: return "library";
+ case RayGeneration: return "raygeneration";
+ case Intersection: return "intersection";
+ case AnyHit: return "anyhit";
+ case ClosestHit: return "closesthit";
+ case Miss: return "miss";
+ case Callable: return "callable";
+ case Mesh: return "mesh";
+ case Amplification: return "amplification";
}
llvm_unreachable("Invalid EnvironmentType!");
@@ -311,12 +337,14 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
.Case("sparc", sparc)
.Case("sparcel", sparcel)
.Case("sparcv9", sparcv9)
+ .Case("s390x", systemz)
.Case("systemz", systemz)
.Case("tce", tce)
.Case("tcele", tcele)
.Case("thumb", thumb)
.Case("thumbeb", thumbeb)
.Case("x86", x86)
+ .Case("i386", x86)
.Case("x86-64", x86_64)
.Case("xcore", xcore)
.Case("nvptx", nvptx)
@@ -340,6 +368,9 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
.Case("renderscript64", renderscript64)
.Case("ve", ve)
.Case("csky", csky)
+ .Case("loongarch32", loongarch32)
+ .Case("loongarch64", loongarch64)
+ .Case("dxil", dxil)
.Default(UnknownArch);
}
@@ -464,8 +495,10 @@ static Triple::ArchType parseArch(StringRef ArchName) {
.Case("hsail64", Triple::hsail64)
.Case("spir", Triple::spir)
.Case("spir64", Triple::spir64)
- .Case("spirv32", Triple::spirv32)
- .Case("spirv64", Triple::spirv64)
+ .Cases("spirv32", "spirv32v1.0", "spirv32v1.1", "spirv32v1.2",
+ "spirv32v1.3", "spirv32v1.4", "spirv32v1.5", Triple::spirv32)
+ .Cases("spirv64", "spirv64v1.0", "spirv64v1.1", "spirv64v1.2",
+ "spirv64v1.3", "spirv64v1.4", "spirv64v1.5", Triple::spirv64)
.StartsWith("kalimba", Triple::kalimba)
.Case("lanai", Triple::lanai)
.Case("renderscript32", Triple::renderscript32)
@@ -475,6 +508,9 @@ static Triple::ArchType parseArch(StringRef ArchName) {
.Case("wasm32", Triple::wasm32)
.Case("wasm64", Triple::wasm64)
.Case("csky", Triple::csky)
+ .Case("loongarch32", Triple::loongarch32)
+ .Case("loongarch64", Triple::loongarch64)
+ .Case("dxil", Triple::dxil)
.Default(Triple::UnknownArch);
// Some architectures require special parsing logic just to compute the
@@ -538,9 +574,11 @@ static Triple::OSType parseOS(StringRef OSName) {
.StartsWith("nvcl", Triple::NVCL)
.StartsWith("amdhsa", Triple::AMDHSA)
.StartsWith("ps4", Triple::PS4)
+ .StartsWith("ps5", Triple::PS5)
.StartsWith("elfiamcu", Triple::ELFIAMCU)
.StartsWith("tvos", Triple::TvOS)
.StartsWith("watchos", Triple::WatchOS)
+ .StartsWith("driverkit", Triple::DriverKit)
.StartsWith("mesa3d", Triple::Mesa3D)
.StartsWith("contiki", Triple::Contiki)
.StartsWith("amdpal", Triple::AMDPAL)
@@ -548,6 +586,7 @@ static Triple::OSType parseOS(StringRef OSName) {
.StartsWith("hurd", Triple::Hurd)
.StartsWith("wasi", Triple::WASI)
.StartsWith("emscripten", Triple::Emscripten)
+ .StartsWith("shadermodel", Triple::ShaderModel)
.Default(Triple::UnknownOS);
}
@@ -574,20 +613,36 @@ static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) {
.StartsWith("coreclr", Triple::CoreCLR)
.StartsWith("simulator", Triple::Simulator)
.StartsWith("macabi", Triple::MacABI)
+ .StartsWith("pixel", Triple::Pixel)
+ .StartsWith("vertex", Triple::Vertex)
+ .StartsWith("geometry", Triple::Geometry)
+ .StartsWith("hull", Triple::Hull)
+ .StartsWith("domain", Triple::Domain)
+ .StartsWith("compute", Triple::Compute)
+ .StartsWith("library", Triple::Library)
+ .StartsWith("raygeneration", Triple::RayGeneration)
+ .StartsWith("intersection", Triple::Intersection)
+ .StartsWith("anyhit", Triple::AnyHit)
+ .StartsWith("closesthit", Triple::ClosestHit)
+ .StartsWith("miss", Triple::Miss)
+ .StartsWith("callable", Triple::Callable)
+ .StartsWith("mesh", Triple::Mesh)
+ .StartsWith("amplification", Triple::Amplification)
.Default(Triple::UnknownEnvironment);
}
static Triple::ObjectFormatType parseFormat(StringRef EnvironmentName) {
return StringSwitch<Triple::ObjectFormatType>(EnvironmentName)
- // "xcoff" must come before "coff" because of the order-dependendent
- // pattern matching.
- .EndsWith("xcoff", Triple::XCOFF)
- .EndsWith("coff", Triple::COFF)
- .EndsWith("elf", Triple::ELF)
- .EndsWith("goff", Triple::GOFF)
- .EndsWith("macho", Triple::MachO)
- .EndsWith("wasm", Triple::Wasm)
- .Default(Triple::UnknownObjectFormat);
+ // "xcoff" must come before "coff" because of the order-dependendent
+ // pattern matching.
+ .EndsWith("xcoff", Triple::XCOFF)
+ .EndsWith("coff", Triple::COFF)
+ .EndsWith("elf", Triple::ELF)
+ .EndsWith("goff", Triple::GOFF)
+ .EndsWith("macho", Triple::MachO)
+ .EndsWith("wasm", Triple::Wasm)
+ .EndsWith("spirv", Triple::SPIRV)
+ .Default(Triple::UnknownObjectFormat);
}
static Triple::SubArchType parseSubArch(StringRef SubArchName) {
@@ -601,6 +656,16 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) {
if (SubArchName == "arm64e")
return Triple::AArch64SubArch_arm64e;
+ if (SubArchName.startswith("spirv"))
+ return StringSwitch<Triple::SubArchType>(SubArchName)
+ .EndsWith("v1.0", Triple::SPIRVSubArch_v10)
+ .EndsWith("v1.1", Triple::SPIRVSubArch_v11)
+ .EndsWith("v1.2", Triple::SPIRVSubArch_v12)
+ .EndsWith("v1.3", Triple::SPIRVSubArch_v13)
+ .EndsWith("v1.4", Triple::SPIRVSubArch_v14)
+ .EndsWith("v1.5", Triple::SPIRVSubArch_v15)
+ .Default(Triple::NoSubArch);
+
StringRef ARMSubArch = ARM::getCanonicalArchName(SubArchName);
// For now, this is the small part. Early return.
@@ -688,13 +753,24 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) {
static StringRef getObjectFormatTypeName(Triple::ObjectFormatType Kind) {
switch (Kind) {
- case Triple::UnknownObjectFormat: return "";
- case Triple::COFF: return "coff";
- case Triple::ELF: return "elf";
- case Triple::GOFF: return "goff";
- case Triple::MachO: return "macho";
- case Triple::Wasm: return "wasm";
- case Triple::XCOFF: return "xcoff";
+ case Triple::UnknownObjectFormat:
+ return "";
+ case Triple::COFF:
+ return "coff";
+ case Triple::ELF:
+ return "elf";
+ case Triple::GOFF:
+ return "goff";
+ case Triple::MachO:
+ return "macho";
+ case Triple::Wasm:
+ return "wasm";
+ case Triple::XCOFF:
+ return "xcoff";
+ case Triple::DXContainer:
+ return "dxcontainer";
+ case Triple::SPIRV:
+ return "spirv";
}
llvm_unreachable("unknown object format type");
}
@@ -731,6 +807,8 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
case Triple::lanai:
case Triple::le32:
case Triple::le64:
+ case Triple::loongarch32:
+ case Triple::loongarch64:
case Triple::m68k:
case Triple::mips64:
case Triple::mips64el:
@@ -776,8 +854,10 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
case Triple::spirv32:
case Triple::spirv64:
- // TODO: In future this will be Triple::SPIRV.
- return Triple::UnknownObjectFormat;
+ return Triple::SPIRV;
+
+ case Triple::dxil:
+ return Triple::DXContainer;
}
llvm_unreachable("unknown architecture");
}
@@ -1158,6 +1238,8 @@ bool Triple::getMacOSXVersion(VersionTuple &Version) const {
// IOS.
Version = VersionTuple(10, 4);
break;
+ case DriverKit:
+ llvm_unreachable("OSX version isn't relevant for DriverKit");
}
return true;
}
@@ -1182,6 +1264,8 @@ VersionTuple Triple::getiOSVersion() const {
}
case WatchOS:
llvm_unreachable("conflicting triple info");
+ case DriverKit:
+ llvm_unreachable("DriverKit doesn't have an iOS version");
}
}
@@ -1203,6 +1287,20 @@ VersionTuple Triple::getWatchOSVersion() const {
}
case IOS:
llvm_unreachable("conflicting triple info");
+ case DriverKit:
+ llvm_unreachable("DriverKit doesn't have a WatchOS version");
+ }
+}
+
+VersionTuple Triple::getDriverKitVersion() const {
+ switch (getOS()) {
+ default:
+ llvm_unreachable("unexpected OS for Darwin triple");
+ case DriverKit:
+ VersionTuple Version = getOSVersion();
+ if (Version.getMajor() == 0)
+ return Version.withMajorReplaced(19);
+ return Version;
}
}
@@ -1285,11 +1383,13 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
case llvm::Triple::arm:
case llvm::Triple::armeb:
case llvm::Triple::csky:
+ case llvm::Triple::dxil:
case llvm::Triple::hexagon:
case llvm::Triple::hsail:
case llvm::Triple::kalimba:
case llvm::Triple::lanai:
case llvm::Triple::le32:
+ case llvm::Triple::loongarch32:
case llvm::Triple::m68k:
case llvm::Triple::mips:
case llvm::Triple::mipsel:
@@ -1321,6 +1421,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
case llvm::Triple::bpfel:
case llvm::Triple::hsail64:
case llvm::Triple::le64:
+ case llvm::Triple::loongarch64:
case llvm::Triple::mips64:
case llvm::Triple::mips64el:
case llvm::Triple::nvptx64:
@@ -1372,11 +1473,13 @@ Triple Triple::get32BitArchVariant() const {
case Triple::arm:
case Triple::armeb:
case Triple::csky:
+ case Triple::dxil:
case Triple::hexagon:
case Triple::hsail:
case Triple::kalimba:
case Triple::lanai:
case Triple::le32:
+ case Triple::loongarch32:
case Triple::m68k:
case Triple::mips:
case Triple::mipsel:
@@ -1406,6 +1509,7 @@ Triple Triple::get32BitArchVariant() const {
case Triple::amdil64: T.setArch(Triple::amdil); break;
case Triple::hsail64: T.setArch(Triple::hsail); break;
case Triple::le64: T.setArch(Triple::le32); break;
+ case Triple::loongarch64: T.setArch(Triple::loongarch32); break;
case Triple::mips64:
T.setArch(Triple::mips, getSubArch());
break;
@@ -1419,7 +1523,9 @@ Triple Triple::get32BitArchVariant() const {
case Triple::riscv64: T.setArch(Triple::riscv32); break;
case Triple::sparcv9: T.setArch(Triple::sparc); break;
case Triple::spir64: T.setArch(Triple::spir); break;
- case Triple::spirv64: T.setArch(Triple::spirv32); break;
+ case Triple::spirv64:
+ T.setArch(Triple::spirv32, getSubArch());
+ break;
case Triple::wasm64: T.setArch(Triple::wasm32); break;
case Triple::x86_64: T.setArch(Triple::x86); break;
}
@@ -1433,6 +1539,7 @@ Triple Triple::get64BitArchVariant() const {
case Triple::arc:
case Triple::avr:
case Triple::csky:
+ case Triple::dxil:
case Triple::hexagon:
case Triple::kalimba:
case Triple::lanai:
@@ -1455,6 +1562,7 @@ Triple Triple::get64BitArchVariant() const {
case Triple::bpfel:
case Triple::hsail64:
case Triple::le64:
+ case Triple::loongarch64:
case Triple::mips64:
case Triple::mips64el:
case Triple::nvptx64:
@@ -1478,6 +1586,7 @@ Triple Triple::get64BitArchVariant() const {
case Triple::armeb: T.setArch(Triple::aarch64_be); break;
case Triple::hsail: T.setArch(Triple::hsail64); break;
case Triple::le32: T.setArch(Triple::le64); break;
+ case Triple::loongarch32: T.setArch(Triple::loongarch64); break;
case Triple::mips:
T.setArch(Triple::mips64, getSubArch());
break;
@@ -1491,7 +1600,9 @@ Triple Triple::get64BitArchVariant() const {
case Triple::riscv32: T.setArch(Triple::riscv64); break;
case Triple::sparc: T.setArch(Triple::sparcv9); break;
case Triple::spir: T.setArch(Triple::spir64); break;
- case Triple::spirv32: T.setArch(Triple::spirv64); break;
+ case Triple::spirv32:
+ T.setArch(Triple::spirv64, getSubArch());
+ break;
case Triple::thumb: T.setArch(Triple::aarch64); break;
case Triple::thumbeb: T.setArch(Triple::aarch64_be); break;
case Triple::wasm32: T.setArch(Triple::wasm64); break;
@@ -1511,12 +1622,15 @@ Triple Triple::getBigEndianArchVariant() const {
case Triple::amdil64:
case Triple::amdil:
case Triple::avr:
+ case Triple::dxil:
case Triple::hexagon:
case Triple::hsail64:
case Triple::hsail:
case Triple::kalimba:
case Triple::le32:
case Triple::le64:
+ case Triple::loongarch32:
+ case Triple::loongarch64:
case Triple::msp430:
case Triple::nvptx64:
case Triple::nvptx:
@@ -1611,12 +1725,15 @@ bool Triple::isLittleEndian() const {
case Triple::avr:
case Triple::bpfel:
case Triple::csky:
+ case Triple::dxil:
case Triple::hexagon:
case Triple::hsail64:
case Triple::hsail:
case Triple::kalimba:
case Triple::le32:
case Triple::le64:
+ case Triple::loongarch32:
+ case Triple::loongarch64:
case Triple::mips64el:
case Triple::mipsel:
case Triple::msp430:
@@ -1725,6 +1842,8 @@ VersionTuple Triple::getMinimumSupportedOSVersion() const {
if (isSimulatorEnvironment())
return VersionTuple(7, 0, 0);
break;
+ case Triple::DriverKit:
+ return VersionTuple(20, 0, 0);
default:
break;
}
@@ -1755,6 +1874,7 @@ StringRef Triple::getARMCPUForArch(StringRef MArch) const {
case llvm::Triple::MacOSX:
case llvm::Triple::TvOS:
case llvm::Triple::WatchOS:
+ case llvm::Triple::DriverKit:
if (MArch == "v7k")
return "cortex-a7";
break;
@@ -1811,3 +1931,33 @@ VersionTuple Triple::getCanonicalVersionForOS(OSType OSKind,
return Version;
}
}
+
+// HLSL triple environment orders are relied on in the front end
+static_assert(Triple::Vertex - Triple::Pixel == 1,
+ "incorrect HLSL stage order");
+static_assert(Triple::Geometry - Triple::Pixel == 2,
+ "incorrect HLSL stage order");
+static_assert(Triple::Hull - Triple::Pixel == 3,
+ "incorrect HLSL stage order");
+static_assert(Triple::Domain - Triple::Pixel == 4,
+ "incorrect HLSL stage order");
+static_assert(Triple::Compute - Triple::Pixel == 5,
+ "incorrect HLSL stage order");
+static_assert(Triple::Library - Triple::Pixel == 6,
+ "incorrect HLSL stage order");
+static_assert(Triple::RayGeneration - Triple::Pixel == 7,
+ "incorrect HLSL stage order");
+static_assert(Triple::Intersection - Triple::Pixel == 8,
+ "incorrect HLSL stage order");
+static_assert(Triple::AnyHit - Triple::Pixel == 9,
+ "incorrect HLSL stage order");
+static_assert(Triple::ClosestHit - Triple::Pixel == 10,
+ "incorrect HLSL stage order");
+static_assert(Triple::Miss - Triple::Pixel == 11,
+ "incorrect HLSL stage order");
+static_assert(Triple::Callable - Triple::Pixel == 12,
+ "incorrect HLSL stage order");
+static_assert(Triple::Mesh - Triple::Pixel == 13,
+ "incorrect HLSL stage order");
+static_assert(Triple::Amplification - Triple::Pixel == 14,
+ "incorrect HLSL stage order");
diff --git a/llvm/lib/Support/TypeSize.cpp b/llvm/lib/Support/TypeSize.cpp
index a80fde83e3bc..8bed9b29cba5 100644
--- a/llvm/lib/Support/TypeSize.cpp
+++ b/llvm/lib/Support/TypeSize.cpp
@@ -21,11 +21,10 @@ struct CreateScalableErrorAsWarning {
/// using the wrong interface on a scalable vector.
static void *call() {
return new cl::opt<bool>(
- "treat-scalable-fixed-error-as-warning", cl::Hidden, cl::init(false),
+ "treat-scalable-fixed-error-as-warning", cl::Hidden,
cl::desc(
"Treat issues where a fixed-width property is requested from a "
- "scalable type as a warning, instead of an error."),
- cl::ZeroOrMore);
+ "scalable type as a warning, instead of an error"));
}
};
} // namespace
diff --git a/llvm/lib/Support/Unicode.cpp b/llvm/lib/Support/Unicode.cpp
index bb6e75555b4c..103710303094 100644
--- a/llvm/lib/Support/Unicode.cpp
+++ b/llvm/lib/Support/Unicode.cpp
@@ -19,197 +19,271 @@ namespace llvm {
namespace sys {
namespace unicode {
+/// Unicode code points of the categories L, M, N, P, S and Zs are considered
+/// printable.
+/// In addition, U+00AD SOFT HYPHEN is also considered printable, as
+/// it's actually displayed on most terminals. \return true if the character is
+/// considered printable.
bool isPrintable(int UCS) {
- // Sorted list of non-overlapping intervals of code points that are not
- // supposed to be printable.
- static const UnicodeCharRange NonPrintableRanges[] = {
- { 0x0000, 0x001F }, { 0x007F, 0x009F }, { 0x034F, 0x034F },
- { 0x0378, 0x0379 }, { 0x037F, 0x0383 }, { 0x038B, 0x038B },
- { 0x038D, 0x038D }, { 0x03A2, 0x03A2 }, { 0x0528, 0x0530 },
- { 0x0557, 0x0558 }, { 0x0560, 0x0560 }, { 0x0588, 0x0588 },
- { 0x058B, 0x058E }, { 0x0590, 0x0590 }, { 0x05C8, 0x05CF },
- { 0x05EB, 0x05EF }, { 0x05F5, 0x0605 }, { 0x061C, 0x061D },
- { 0x06DD, 0x06DD }, { 0x070E, 0x070F }, { 0x074B, 0x074C },
- { 0x07B2, 0x07BF }, { 0x07FB, 0x07FF }, { 0x082E, 0x082F },
- { 0x083F, 0x083F }, { 0x085C, 0x085D }, { 0x085F, 0x089F },
- { 0x08A1, 0x08A1 }, { 0x08AD, 0x08E3 }, { 0x08FF, 0x08FF },
- { 0x0978, 0x0978 }, { 0x0980, 0x0980 }, { 0x0984, 0x0984 },
- { 0x098D, 0x098E }, { 0x0991, 0x0992 }, { 0x09A9, 0x09A9 },
- { 0x09B1, 0x09B1 }, { 0x09B3, 0x09B5 }, { 0x09BA, 0x09BB },
- { 0x09C5, 0x09C6 }, { 0x09C9, 0x09CA }, { 0x09CF, 0x09D6 },
- { 0x09D8, 0x09DB }, { 0x09DE, 0x09DE }, { 0x09E4, 0x09E5 },
- { 0x09FC, 0x0A00 }, { 0x0A04, 0x0A04 }, { 0x0A0B, 0x0A0E },
- { 0x0A11, 0x0A12 }, { 0x0A29, 0x0A29 }, { 0x0A31, 0x0A31 },
- { 0x0A34, 0x0A34 }, { 0x0A37, 0x0A37 }, { 0x0A3A, 0x0A3B },
- { 0x0A3D, 0x0A3D }, { 0x0A43, 0x0A46 }, { 0x0A49, 0x0A4A },
- { 0x0A4E, 0x0A50 }, { 0x0A52, 0x0A58 }, { 0x0A5D, 0x0A5D },
- { 0x0A5F, 0x0A65 }, { 0x0A76, 0x0A80 }, { 0x0A84, 0x0A84 },
- { 0x0A8E, 0x0A8E }, { 0x0A92, 0x0A92 }, { 0x0AA9, 0x0AA9 },
- { 0x0AB1, 0x0AB1 }, { 0x0AB4, 0x0AB4 }, { 0x0ABA, 0x0ABB },
- { 0x0AC6, 0x0AC6 }, { 0x0ACA, 0x0ACA }, { 0x0ACE, 0x0ACF },
- { 0x0AD1, 0x0ADF }, { 0x0AE4, 0x0AE5 }, { 0x0AF2, 0x0B00 },
- { 0x0B04, 0x0B04 }, { 0x0B0D, 0x0B0E }, { 0x0B11, 0x0B12 },
- { 0x0B29, 0x0B29 }, { 0x0B31, 0x0B31 }, { 0x0B34, 0x0B34 },
- { 0x0B3A, 0x0B3B }, { 0x0B45, 0x0B46 }, { 0x0B49, 0x0B4A },
- { 0x0B4E, 0x0B55 }, { 0x0B58, 0x0B5B }, { 0x0B5E, 0x0B5E },
- { 0x0B64, 0x0B65 }, { 0x0B78, 0x0B81 }, { 0x0B84, 0x0B84 },
- { 0x0B8B, 0x0B8D }, { 0x0B91, 0x0B91 }, { 0x0B96, 0x0B98 },
- { 0x0B9B, 0x0B9B }, { 0x0B9D, 0x0B9D }, { 0x0BA0, 0x0BA2 },
- { 0x0BA5, 0x0BA7 }, { 0x0BAB, 0x0BAD }, { 0x0BBA, 0x0BBD },
- { 0x0BC3, 0x0BC5 }, { 0x0BC9, 0x0BC9 }, { 0x0BCE, 0x0BCF },
- { 0x0BD1, 0x0BD6 }, { 0x0BD8, 0x0BE5 }, { 0x0BFB, 0x0C00 },
- { 0x0C04, 0x0C04 }, { 0x0C0D, 0x0C0D }, { 0x0C11, 0x0C11 },
- { 0x0C29, 0x0C29 }, { 0x0C34, 0x0C34 }, { 0x0C3A, 0x0C3C },
- { 0x0C45, 0x0C45 }, { 0x0C49, 0x0C49 }, { 0x0C4E, 0x0C54 },
- { 0x0C57, 0x0C57 }, { 0x0C5A, 0x0C5F }, { 0x0C64, 0x0C65 },
- { 0x0C70, 0x0C77 }, { 0x0C80, 0x0C81 }, { 0x0C84, 0x0C84 },
- { 0x0C8D, 0x0C8D }, { 0x0C91, 0x0C91 }, { 0x0CA9, 0x0CA9 },
- { 0x0CB4, 0x0CB4 }, { 0x0CBA, 0x0CBB }, { 0x0CC5, 0x0CC5 },
- { 0x0CC9, 0x0CC9 }, { 0x0CCE, 0x0CD4 }, { 0x0CD7, 0x0CDD },
- { 0x0CDF, 0x0CDF }, { 0x0CE4, 0x0CE5 }, { 0x0CF0, 0x0CF0 },
- { 0x0CF3, 0x0D01 }, { 0x0D04, 0x0D04 }, { 0x0D0D, 0x0D0D },
- { 0x0D11, 0x0D11 }, { 0x0D3B, 0x0D3C }, { 0x0D45, 0x0D45 },
- { 0x0D49, 0x0D49 }, { 0x0D4F, 0x0D56 }, { 0x0D58, 0x0D5F },
- { 0x0D64, 0x0D65 }, { 0x0D76, 0x0D78 }, { 0x0D80, 0x0D81 },
- { 0x0D84, 0x0D84 }, { 0x0D97, 0x0D99 }, { 0x0DB2, 0x0DB2 },
- { 0x0DBC, 0x0DBC }, { 0x0DBE, 0x0DBF }, { 0x0DC7, 0x0DC9 },
- { 0x0DCB, 0x0DCE }, { 0x0DD5, 0x0DD5 }, { 0x0DD7, 0x0DD7 },
- { 0x0DE0, 0x0DF1 }, { 0x0DF5, 0x0E00 }, { 0x0E3B, 0x0E3E },
- { 0x0E5C, 0x0E80 }, { 0x0E83, 0x0E83 }, { 0x0E85, 0x0E86 },
- { 0x0E89, 0x0E89 }, { 0x0E8B, 0x0E8C }, { 0x0E8E, 0x0E93 },
- { 0x0E98, 0x0E98 }, { 0x0EA0, 0x0EA0 }, { 0x0EA4, 0x0EA4 },
- { 0x0EA6, 0x0EA6 }, { 0x0EA8, 0x0EA9 }, { 0x0EAC, 0x0EAC },
- { 0x0EBA, 0x0EBA }, { 0x0EBE, 0x0EBF }, { 0x0EC5, 0x0EC5 },
- { 0x0EC7, 0x0EC7 }, { 0x0ECE, 0x0ECF }, { 0x0EDA, 0x0EDB },
- { 0x0EE0, 0x0EFF }, { 0x0F48, 0x0F48 }, { 0x0F6D, 0x0F70 },
- { 0x0F98, 0x0F98 }, { 0x0FBD, 0x0FBD }, { 0x0FCD, 0x0FCD },
- { 0x0FDB, 0x0FFF }, { 0x10C6, 0x10C6 }, { 0x10C8, 0x10CC },
- { 0x10CE, 0x10CF }, { 0x115F, 0x1160 }, { 0x1249, 0x1249 },
- { 0x124E, 0x124F }, { 0x1257, 0x1257 }, { 0x1259, 0x1259 },
- { 0x125E, 0x125F }, { 0x1289, 0x1289 }, { 0x128E, 0x128F },
- { 0x12B1, 0x12B1 }, { 0x12B6, 0x12B7 }, { 0x12BF, 0x12BF },
- { 0x12C1, 0x12C1 }, { 0x12C6, 0x12C7 }, { 0x12D7, 0x12D7 },
- { 0x1311, 0x1311 }, { 0x1316, 0x1317 }, { 0x135B, 0x135C },
- { 0x137D, 0x137F }, { 0x139A, 0x139F }, { 0x13F5, 0x13FF },
- { 0x169D, 0x169F }, { 0x16F1, 0x16FF }, { 0x170D, 0x170D },
- { 0x1715, 0x171F }, { 0x1737, 0x173F }, { 0x1754, 0x175F },
- { 0x176D, 0x176D }, { 0x1771, 0x1771 }, { 0x1774, 0x177F },
- { 0x17B4, 0x17B5 }, { 0x17DE, 0x17DF }, { 0x17EA, 0x17EF },
- { 0x17FA, 0x17FF }, { 0x180B, 0x180D }, { 0x180F, 0x180F },
- { 0x181A, 0x181F }, { 0x1878, 0x187F }, { 0x18AB, 0x18AF },
- { 0x18F6, 0x18FF }, { 0x191D, 0x191F }, { 0x192C, 0x192F },
- { 0x193C, 0x193F }, { 0x1941, 0x1943 }, { 0x196E, 0x196F },
- { 0x1975, 0x197F }, { 0x19AC, 0x19AF }, { 0x19CA, 0x19CF },
- { 0x19DB, 0x19DD }, { 0x1A1C, 0x1A1D }, { 0x1A5F, 0x1A5F },
- { 0x1A7D, 0x1A7E }, { 0x1A8A, 0x1A8F }, { 0x1A9A, 0x1A9F },
- { 0x1AAE, 0x1AFF }, { 0x1B4C, 0x1B4F }, { 0x1B7D, 0x1B7F },
- { 0x1BF4, 0x1BFB }, { 0x1C38, 0x1C3A }, { 0x1C4A, 0x1C4C },
- { 0x1C80, 0x1CBF }, { 0x1CC8, 0x1CCF }, { 0x1CF7, 0x1CFF },
- { 0x1DE7, 0x1DFB }, { 0x1F16, 0x1F17 }, { 0x1F1E, 0x1F1F },
- { 0x1F46, 0x1F47 }, { 0x1F4E, 0x1F4F }, { 0x1F58, 0x1F58 },
- { 0x1F5A, 0x1F5A }, { 0x1F5C, 0x1F5C }, { 0x1F5E, 0x1F5E },
- { 0x1F7E, 0x1F7F }, { 0x1FB5, 0x1FB5 }, { 0x1FC5, 0x1FC5 },
- { 0x1FD4, 0x1FD5 }, { 0x1FDC, 0x1FDC }, { 0x1FF0, 0x1FF1 },
- { 0x1FF5, 0x1FF5 }, { 0x1FFF, 0x1FFF }, { 0x200B, 0x200F },
- { 0x202A, 0x202E }, { 0x2060, 0x206F }, { 0x2072, 0x2073 },
- { 0x208F, 0x208F }, { 0x209D, 0x209F }, { 0x20BB, 0x20CF },
- { 0x20F1, 0x20FF }, { 0x218A, 0x218F }, { 0x23F4, 0x23FF },
- { 0x2427, 0x243F }, { 0x244B, 0x245F }, { 0x2700, 0x2700 },
- { 0x2B4D, 0x2B4F }, { 0x2B5A, 0x2BFF }, { 0x2C2F, 0x2C2F },
- { 0x2C5F, 0x2C5F }, { 0x2CF4, 0x2CF8 }, { 0x2D26, 0x2D26 },
- { 0x2D28, 0x2D2C }, { 0x2D2E, 0x2D2F }, { 0x2D68, 0x2D6E },
- { 0x2D71, 0x2D7E }, { 0x2D97, 0x2D9F }, { 0x2DA7, 0x2DA7 },
- { 0x2DAF, 0x2DAF }, { 0x2DB7, 0x2DB7 }, { 0x2DBF, 0x2DBF },
- { 0x2DC7, 0x2DC7 }, { 0x2DCF, 0x2DCF }, { 0x2DD7, 0x2DD7 },
- { 0x2DDF, 0x2DDF }, { 0x2E3C, 0x2E7F }, { 0x2E9A, 0x2E9A },
- { 0x2EF4, 0x2EFF }, { 0x2FD6, 0x2FEF }, { 0x2FFC, 0x2FFF },
- { 0x3040, 0x3040 }, { 0x3097, 0x3098 }, { 0x3100, 0x3104 },
- { 0x312E, 0x3130 }, { 0x3164, 0x3164 }, { 0x318F, 0x318F },
- { 0x31BB, 0x31BF }, { 0x31E4, 0x31EF }, { 0x321F, 0x321F },
- { 0x32FF, 0x32FF }, { 0x4DB6, 0x4DBF }, { 0x9FCD, 0x9FFF },
- { 0xA48D, 0xA48F }, { 0xA4C7, 0xA4CF }, { 0xA62C, 0xA63F },
- { 0xA698, 0xA69E }, { 0xA6F8, 0xA6FF }, { 0xA78F, 0xA78F },
- { 0xA794, 0xA79F }, { 0xA7AB, 0xA7F7 }, { 0xA82C, 0xA82F },
- { 0xA83A, 0xA83F }, { 0xA878, 0xA87F }, { 0xA8C5, 0xA8CD },
- { 0xA8DA, 0xA8DF }, { 0xA8FC, 0xA8FF }, { 0xA954, 0xA95E },
- { 0xA97D, 0xA97F }, { 0xA9CE, 0xA9CE }, { 0xA9DA, 0xA9DD },
- { 0xA9E0, 0xA9FF }, { 0xAA37, 0xAA3F }, { 0xAA4E, 0xAA4F },
- { 0xAA5A, 0xAA5B }, { 0xAA7C, 0xAA7F }, { 0xAAC3, 0xAADA },
- { 0xAAF7, 0xAB00 }, { 0xAB07, 0xAB08 }, { 0xAB0F, 0xAB10 },
- { 0xAB17, 0xAB1F }, { 0xAB27, 0xAB27 }, { 0xAB2F, 0xABBF },
- { 0xABEE, 0xABEF }, { 0xABFA, 0xABFF }, { 0xD7A4, 0xD7AF },
- { 0xD7C7, 0xD7CA }, { 0xD7FC, 0xDFFF }, { 0xFA6E, 0xFA6F },
- { 0xFADA, 0xFAFF }, { 0xFB07, 0xFB12 }, { 0xFB18, 0xFB1C },
- { 0xFB37, 0xFB37 }, { 0xFB3D, 0xFB3D }, { 0xFB3F, 0xFB3F },
- { 0xFB42, 0xFB42 }, { 0xFB45, 0xFB45 }, { 0xFBC2, 0xFBD2 },
- { 0xFD40, 0xFD4F }, { 0xFD90, 0xFD91 }, { 0xFDC8, 0xFDEF },
- { 0xFDFE, 0xFE0F }, { 0xFE1A, 0xFE1F }, { 0xFE27, 0xFE2F },
- { 0xFE53, 0xFE53 }, { 0xFE67, 0xFE67 }, { 0xFE6C, 0xFE6F },
- { 0xFE75, 0xFE75 }, { 0xFEFD, 0xFEFF }, { 0xFF00, 0xFF00 },
- { 0xFFA0, 0xFFA0 }, { 0xFFBF, 0xFFC1 }, { 0xFFC8, 0xFFC9 },
- { 0xFFD0, 0xFFD1 }, { 0xFFD8, 0xFFD9 }, { 0xFFDD, 0xFFDF },
- { 0xFFE7, 0xFFE7 }, { 0xFFEF, 0xFFFB }, { 0xFFFE, 0xFFFF },
- { 0x1000C, 0x1000C }, { 0x10027, 0x10027 }, { 0x1003B, 0x1003B },
- { 0x1003E, 0x1003E }, { 0x1004E, 0x1004F }, { 0x1005E, 0x1007F },
- { 0x100FB, 0x100FF }, { 0x10103, 0x10106 }, { 0x10134, 0x10136 },
- { 0x1018B, 0x1018F }, { 0x1019C, 0x101CF }, { 0x101FE, 0x1027F },
- { 0x1029D, 0x1029F }, { 0x102D1, 0x102FF }, { 0x1031F, 0x1031F },
- { 0x10324, 0x1032F }, { 0x1034B, 0x1037F }, { 0x1039E, 0x1039E },
- { 0x103C4, 0x103C7 }, { 0x103D6, 0x103FF }, { 0x1049E, 0x1049F },
- { 0x104AA, 0x107FF }, { 0x10806, 0x10807 }, { 0x10809, 0x10809 },
- { 0x10836, 0x10836 }, { 0x10839, 0x1083B }, { 0x1083D, 0x1083E },
- { 0x10856, 0x10856 }, { 0x10860, 0x108FF }, { 0x1091C, 0x1091E },
- { 0x1093A, 0x1093E }, { 0x10940, 0x1097F }, { 0x109B8, 0x109BD },
- { 0x109C0, 0x109FF }, { 0x10A04, 0x10A04 }, { 0x10A07, 0x10A0B },
- { 0x10A14, 0x10A14 }, { 0x10A18, 0x10A18 }, { 0x10A34, 0x10A37 },
- { 0x10A3B, 0x10A3E }, { 0x10A48, 0x10A4F }, { 0x10A59, 0x10A5F },
- { 0x10A80, 0x10AFF }, { 0x10B36, 0x10B38 }, { 0x10B56, 0x10B57 },
- { 0x10B73, 0x10B77 }, { 0x10B80, 0x10BFF }, { 0x10C49, 0x10E5F },
- { 0x10E7F, 0x10FFF }, { 0x1104E, 0x11051 }, { 0x11070, 0x1107F },
- { 0x110BD, 0x110BD }, { 0x110C2, 0x110CF }, { 0x110E9, 0x110EF },
- { 0x110FA, 0x110FF }, { 0x11135, 0x11135 }, { 0x11144, 0x1117F },
- { 0x111C9, 0x111CF }, { 0x111DA, 0x1167F }, { 0x116B8, 0x116BF },
- { 0x116CA, 0x11FFF }, { 0x1236F, 0x123FF }, { 0x12463, 0x1246F },
- { 0x12474, 0x12FFF }, { 0x1342F, 0x167FF }, { 0x16A39, 0x16EFF },
- { 0x16F45, 0x16F4F }, { 0x16F7F, 0x16F8E }, { 0x16FA0, 0x1AFFF },
- { 0x1B002, 0x1CFFF }, { 0x1D0F6, 0x1D0FF }, { 0x1D127, 0x1D128 },
- { 0x1D173, 0x1D17A }, { 0x1D1DE, 0x1D1FF }, { 0x1D246, 0x1D2FF },
- { 0x1D357, 0x1D35F }, { 0x1D372, 0x1D3FF }, { 0x1D455, 0x1D455 },
- { 0x1D49D, 0x1D49D }, { 0x1D4A0, 0x1D4A1 }, { 0x1D4A3, 0x1D4A4 },
- { 0x1D4A7, 0x1D4A8 }, { 0x1D4AD, 0x1D4AD }, { 0x1D4BA, 0x1D4BA },
- { 0x1D4BC, 0x1D4BC }, { 0x1D4C4, 0x1D4C4 }, { 0x1D506, 0x1D506 },
- { 0x1D50B, 0x1D50C }, { 0x1D515, 0x1D515 }, { 0x1D51D, 0x1D51D },
- { 0x1D53A, 0x1D53A }, { 0x1D53F, 0x1D53F }, { 0x1D545, 0x1D545 },
- { 0x1D547, 0x1D549 }, { 0x1D551, 0x1D551 }, { 0x1D6A6, 0x1D6A7 },
- { 0x1D7CC, 0x1D7CD }, { 0x1D800, 0x1EDFF }, { 0x1EE04, 0x1EE04 },
- { 0x1EE20, 0x1EE20 }, { 0x1EE23, 0x1EE23 }, { 0x1EE25, 0x1EE26 },
- { 0x1EE28, 0x1EE28 }, { 0x1EE33, 0x1EE33 }, { 0x1EE38, 0x1EE38 },
- { 0x1EE3A, 0x1EE3A }, { 0x1EE3C, 0x1EE41 }, { 0x1EE43, 0x1EE46 },
- { 0x1EE48, 0x1EE48 }, { 0x1EE4A, 0x1EE4A }, { 0x1EE4C, 0x1EE4C },
- { 0x1EE50, 0x1EE50 }, { 0x1EE53, 0x1EE53 }, { 0x1EE55, 0x1EE56 },
- { 0x1EE58, 0x1EE58 }, { 0x1EE5A, 0x1EE5A }, { 0x1EE5C, 0x1EE5C },
- { 0x1EE5E, 0x1EE5E }, { 0x1EE60, 0x1EE60 }, { 0x1EE63, 0x1EE63 },
- { 0x1EE65, 0x1EE66 }, { 0x1EE6B, 0x1EE6B }, { 0x1EE73, 0x1EE73 },
- { 0x1EE78, 0x1EE78 }, { 0x1EE7D, 0x1EE7D }, { 0x1EE7F, 0x1EE7F },
- { 0x1EE8A, 0x1EE8A }, { 0x1EE9C, 0x1EEA0 }, { 0x1EEA4, 0x1EEA4 },
- { 0x1EEAA, 0x1EEAA }, { 0x1EEBC, 0x1EEEF }, { 0x1EEF2, 0x1EFFF },
- { 0x1F02C, 0x1F02F }, { 0x1F094, 0x1F09F }, { 0x1F0AF, 0x1F0B0 },
- { 0x1F0BF, 0x1F0C0 }, { 0x1F0D0, 0x1F0D0 }, { 0x1F0E0, 0x1F0FF },
- { 0x1F10B, 0x1F10F }, { 0x1F12F, 0x1F12F }, { 0x1F16C, 0x1F16F },
- { 0x1F19B, 0x1F1E5 }, { 0x1F203, 0x1F20F }, { 0x1F23B, 0x1F23F },
- { 0x1F249, 0x1F24F }, { 0x1F252, 0x1F2FF }, { 0x1F321, 0x1F32F },
- { 0x1F336, 0x1F336 }, { 0x1F37D, 0x1F37F }, { 0x1F394, 0x1F39F },
- { 0x1F3C5, 0x1F3C5 }, { 0x1F3CB, 0x1F3DF }, { 0x1F3F1, 0x1F3FF },
- { 0x1F43F, 0x1F43F }, { 0x1F441, 0x1F441 }, { 0x1F4F8, 0x1F4F8 },
- { 0x1F4FD, 0x1F4FF }, { 0x1F53E, 0x1F53F }, { 0x1F544, 0x1F54F },
- { 0x1F568, 0x1F5FA }, { 0x1F641, 0x1F644 }, { 0x1F650, 0x1F67F },
- { 0x1F6C6, 0x1F6FF }, { 0x1F774, 0x1FFFF }, { 0x2A6D7, 0x2A6FF },
- { 0x2B735, 0x2B73F }, { 0x2B81E, 0x2F7FF }, { 0x2FA1E, 0xF0000 },
- { 0xFFFFE, 0xFFFFF }, { 0x10FFFE, 0x10FFFF }
- };
- static const UnicodeCharSet NonPrintables(NonPrintableRanges);
+ // https://unicode.org/Public/14.0.0/ucdxml/
+ static const UnicodeCharRange PrintableRanges[] = {
+ {0x0020, 0x007E}, {0x00A0, 0x00AC}, {0x00AE, 0x0377},
+ {0x037A, 0x037F}, {0x0384, 0x038A}, {0x038C, 0x038C},
+ {0x038E, 0x03A1}, {0x03A3, 0x052F}, {0x0531, 0x0556},
+ {0x0559, 0x058A}, {0x058D, 0x058F}, {0x0591, 0x05C7},
+ {0x05D0, 0x05EA}, {0x05EF, 0x05F4}, {0x0606, 0x061B},
+ {0x061D, 0x06DC}, {0x06DE, 0x070D}, {0x0710, 0x074A},
+ {0x074D, 0x07B1}, {0x07C0, 0x07FA}, {0x07FD, 0x082D},
+ {0x0830, 0x083E}, {0x0840, 0x085B}, {0x085E, 0x085E},
+ {0x0860, 0x086A}, {0x0870, 0x088E}, {0x0898, 0x08E1},
+ {0x08E3, 0x0983}, {0x0985, 0x098C}, {0x098F, 0x0990},
+ {0x0993, 0x09A8}, {0x09AA, 0x09B0}, {0x09B2, 0x09B2},
+ {0x09B6, 0x09B9}, {0x09BC, 0x09C4}, {0x09C7, 0x09C8},
+ {0x09CB, 0x09CE}, {0x09D7, 0x09D7}, {0x09DC, 0x09DD},
+ {0x09DF, 0x09E3}, {0x09E6, 0x09FE}, {0x0A01, 0x0A03},
+ {0x0A05, 0x0A0A}, {0x0A0F, 0x0A10}, {0x0A13, 0x0A28},
+ {0x0A2A, 0x0A30}, {0x0A32, 0x0A33}, {0x0A35, 0x0A36},
+ {0x0A38, 0x0A39}, {0x0A3C, 0x0A3C}, {0x0A3E, 0x0A42},
+ {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A51, 0x0A51},
+ {0x0A59, 0x0A5C}, {0x0A5E, 0x0A5E}, {0x0A66, 0x0A76},
+ {0x0A81, 0x0A83}, {0x0A85, 0x0A8D}, {0x0A8F, 0x0A91},
+ {0x0A93, 0x0AA8}, {0x0AAA, 0x0AB0}, {0x0AB2, 0x0AB3},
+ {0x0AB5, 0x0AB9}, {0x0ABC, 0x0AC5}, {0x0AC7, 0x0AC9},
+ {0x0ACB, 0x0ACD}, {0x0AD0, 0x0AD0}, {0x0AE0, 0x0AE3},
+ {0x0AE6, 0x0AF1}, {0x0AF9, 0x0AFF}, {0x0B01, 0x0B03},
+ {0x0B05, 0x0B0C}, {0x0B0F, 0x0B10}, {0x0B13, 0x0B28},
+ {0x0B2A, 0x0B30}, {0x0B32, 0x0B33}, {0x0B35, 0x0B39},
+ {0x0B3C, 0x0B44}, {0x0B47, 0x0B48}, {0x0B4B, 0x0B4D},
+ {0x0B55, 0x0B57}, {0x0B5C, 0x0B5D}, {0x0B5F, 0x0B63},
+ {0x0B66, 0x0B77}, {0x0B82, 0x0B83}, {0x0B85, 0x0B8A},
+ {0x0B8E, 0x0B90}, {0x0B92, 0x0B95}, {0x0B99, 0x0B9A},
+ {0x0B9C, 0x0B9C}, {0x0B9E, 0x0B9F}, {0x0BA3, 0x0BA4},
+ {0x0BA8, 0x0BAA}, {0x0BAE, 0x0BB9}, {0x0BBE, 0x0BC2},
+ {0x0BC6, 0x0BC8}, {0x0BCA, 0x0BCD}, {0x0BD0, 0x0BD0},
+ {0x0BD7, 0x0BD7}, {0x0BE6, 0x0BFA}, {0x0C00, 0x0C0C},
+ {0x0C0E, 0x0C10}, {0x0C12, 0x0C28}, {0x0C2A, 0x0C39},
+ {0x0C3C, 0x0C44}, {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D},
+ {0x0C55, 0x0C56}, {0x0C58, 0x0C5A}, {0x0C5D, 0x0C5D},
+ {0x0C60, 0x0C63}, {0x0C66, 0x0C6F}, {0x0C77, 0x0C8C},
+ {0x0C8E, 0x0C90}, {0x0C92, 0x0CA8}, {0x0CAA, 0x0CB3},
+ {0x0CB5, 0x0CB9}, {0x0CBC, 0x0CC4}, {0x0CC6, 0x0CC8},
+ {0x0CCA, 0x0CCD}, {0x0CD5, 0x0CD6}, {0x0CDD, 0x0CDE},
+ {0x0CE0, 0x0CE3}, {0x0CE6, 0x0CEF}, {0x0CF1, 0x0CF2},
+ {0x0D00, 0x0D0C}, {0x0D0E, 0x0D10}, {0x0D12, 0x0D44},
+ {0x0D46, 0x0D48}, {0x0D4A, 0x0D4F}, {0x0D54, 0x0D63},
+ {0x0D66, 0x0D7F}, {0x0D81, 0x0D83}, {0x0D85, 0x0D96},
+ {0x0D9A, 0x0DB1}, {0x0DB3, 0x0DBB}, {0x0DBD, 0x0DBD},
+ {0x0DC0, 0x0DC6}, {0x0DCA, 0x0DCA}, {0x0DCF, 0x0DD4},
+ {0x0DD6, 0x0DD6}, {0x0DD8, 0x0DDF}, {0x0DE6, 0x0DEF},
+ {0x0DF2, 0x0DF4}, {0x0E01, 0x0E3A}, {0x0E3F, 0x0E5B},
+ {0x0E81, 0x0E82}, {0x0E84, 0x0E84}, {0x0E86, 0x0E8A},
+ {0x0E8C, 0x0EA3}, {0x0EA5, 0x0EA5}, {0x0EA7, 0x0EBD},
+ {0x0EC0, 0x0EC4}, {0x0EC6, 0x0EC6}, {0x0EC8, 0x0ECD},
+ {0x0ED0, 0x0ED9}, {0x0EDC, 0x0EDF}, {0x0F00, 0x0F47},
+ {0x0F49, 0x0F6C}, {0x0F71, 0x0F97}, {0x0F99, 0x0FBC},
+ {0x0FBE, 0x0FCC}, {0x0FCE, 0x0FDA}, {0x1000, 0x10C5},
+ {0x10C7, 0x10C7}, {0x10CD, 0x10CD}, {0x10D0, 0x1248},
+ {0x124A, 0x124D}, {0x1250, 0x1256}, {0x1258, 0x1258},
+ {0x125A, 0x125D}, {0x1260, 0x1288}, {0x128A, 0x128D},
+ {0x1290, 0x12B0}, {0x12B2, 0x12B5}, {0x12B8, 0x12BE},
+ {0x12C0, 0x12C0}, {0x12C2, 0x12C5}, {0x12C8, 0x12D6},
+ {0x12D8, 0x1310}, {0x1312, 0x1315}, {0x1318, 0x135A},
+ {0x135D, 0x137C}, {0x1380, 0x1399}, {0x13A0, 0x13F5},
+ {0x13F8, 0x13FD}, {0x1400, 0x169C}, {0x16A0, 0x16F8},
+ {0x1700, 0x1715}, {0x171F, 0x1736}, {0x1740, 0x1753},
+ {0x1760, 0x176C}, {0x176E, 0x1770}, {0x1772, 0x1773},
+ {0x1780, 0x17DD}, {0x17E0, 0x17E9}, {0x17F0, 0x17F9},
+ {0x1800, 0x180D}, {0x180F, 0x1819}, {0x1820, 0x1878},
+ {0x1880, 0x18AA}, {0x18B0, 0x18F5}, {0x1900, 0x191E},
+ {0x1920, 0x192B}, {0x1930, 0x193B}, {0x1940, 0x1940},
+ {0x1944, 0x196D}, {0x1970, 0x1974}, {0x1980, 0x19AB},
+ {0x19B0, 0x19C9}, {0x19D0, 0x19DA}, {0x19DE, 0x1A1B},
+ {0x1A1E, 0x1A5E}, {0x1A60, 0x1A7C}, {0x1A7F, 0x1A89},
+ {0x1A90, 0x1A99}, {0x1AA0, 0x1AAD}, {0x1AB0, 0x1ACE},
+ {0x1B00, 0x1B4C}, {0x1B50, 0x1B7E}, {0x1B80, 0x1BF3},
+ {0x1BFC, 0x1C37}, {0x1C3B, 0x1C49}, {0x1C4D, 0x1C88},
+ {0x1C90, 0x1CBA}, {0x1CBD, 0x1CC7}, {0x1CD0, 0x1CFA},
+ {0x1D00, 0x1F15}, {0x1F18, 0x1F1D}, {0x1F20, 0x1F45},
+ {0x1F48, 0x1F4D}, {0x1F50, 0x1F57}, {0x1F59, 0x1F59},
+ {0x1F5B, 0x1F5B}, {0x1F5D, 0x1F5D}, {0x1F5F, 0x1F7D},
+ {0x1F80, 0x1FB4}, {0x1FB6, 0x1FC4}, {0x1FC6, 0x1FD3},
+ {0x1FD6, 0x1FDB}, {0x1FDD, 0x1FEF}, {0x1FF2, 0x1FF4},
+ {0x1FF6, 0x1FFE}, {0x2000, 0x200A}, {0x2010, 0x2027},
+ {0x202F, 0x205F}, {0x2070, 0x2071}, {0x2074, 0x208E},
+ {0x2090, 0x209C}, {0x20A0, 0x20C0}, {0x20D0, 0x20F0},
+ {0x2100, 0x218B}, {0x2190, 0x2426}, {0x2440, 0x244A},
+ {0x2460, 0x2B73}, {0x2B76, 0x2B95}, {0x2B97, 0x2CF3},
+ {0x2CF9, 0x2D25}, {0x2D27, 0x2D27}, {0x2D2D, 0x2D2D},
+ {0x2D30, 0x2D67}, {0x2D6F, 0x2D70}, {0x2D7F, 0x2D96},
+ {0x2DA0, 0x2DA6}, {0x2DA8, 0x2DAE}, {0x2DB0, 0x2DB6},
+ {0x2DB8, 0x2DBE}, {0x2DC0, 0x2DC6}, {0x2DC8, 0x2DCE},
+ {0x2DD0, 0x2DD6}, {0x2DD8, 0x2DDE}, {0x2DE0, 0x2E5D},
+ {0x2E80, 0x2E99}, {0x2E9B, 0x2EF3}, {0x2F00, 0x2FD5},
+ {0x2FF0, 0x2FFB}, {0x3000, 0x303F}, {0x3041, 0x3096},
+ {0x3099, 0x30FF}, {0x3105, 0x312F}, {0x3131, 0x318E},
+ {0x3190, 0x31E3}, {0x31F0, 0x321E}, {0x3220, 0xA48C},
+ {0xA490, 0xA4C6}, {0xA4D0, 0xA62B}, {0xA640, 0xA6F7},
+ {0xA700, 0xA7CA}, {0xA7D0, 0xA7D1}, {0xA7D3, 0xA7D3},
+ {0xA7D5, 0xA7D9}, {0xA7F2, 0xA82C}, {0xA830, 0xA839},
+ {0xA840, 0xA877}, {0xA880, 0xA8C5}, {0xA8CE, 0xA8D9},
+ {0xA8E0, 0xA953}, {0xA95F, 0xA97C}, {0xA980, 0xA9CD},
+ {0xA9CF, 0xA9D9}, {0xA9DE, 0xA9FE}, {0xAA00, 0xAA36},
+ {0xAA40, 0xAA4D}, {0xAA50, 0xAA59}, {0xAA5C, 0xAAC2},
+ {0xAADB, 0xAAF6}, {0xAB01, 0xAB06}, {0xAB09, 0xAB0E},
+ {0xAB11, 0xAB16}, {0xAB20, 0xAB26}, {0xAB28, 0xAB2E},
+ {0xAB30, 0xAB6B}, {0xAB70, 0xABED}, {0xABF0, 0xABF9},
+ {0xAC00, 0xD7A3}, {0xD7B0, 0xD7C6}, {0xD7CB, 0xD7FB},
+ {0xF900, 0xFA6D}, {0xFA70, 0xFAD9}, {0xFB00, 0xFB06},
+ {0xFB13, 0xFB17}, {0xFB1D, 0xFB36}, {0xFB38, 0xFB3C},
+ {0xFB3E, 0xFB3E}, {0xFB40, 0xFB41}, {0xFB43, 0xFB44},
+ {0xFB46, 0xFBC2}, {0xFBD3, 0xFD8F}, {0xFD92, 0xFDC7},
+ {0xFDCF, 0xFDCF}, {0xFDF0, 0xFE19}, {0xFE20, 0xFE52},
+ {0xFE54, 0xFE66}, {0xFE68, 0xFE6B}, {0xFE70, 0xFE74},
+ {0xFE76, 0xFEFC}, {0xFF01, 0xFFBE}, {0xFFC2, 0xFFC7},
+ {0xFFCA, 0xFFCF}, {0xFFD2, 0xFFD7}, {0xFFDA, 0xFFDC},
+ {0xFFE0, 0xFFE6}, {0xFFE8, 0xFFEE}, {0xFFFC, 0xFFFD},
+ {0x10000, 0x1000B}, {0x1000D, 0x10026}, {0x10028, 0x1003A},
+ {0x1003C, 0x1003D}, {0x1003F, 0x1004D}, {0x10050, 0x1005D},
+ {0x10080, 0x100FA}, {0x10100, 0x10102}, {0x10107, 0x10133},
+ {0x10137, 0x1018E}, {0x10190, 0x1019C}, {0x101A0, 0x101A0},
+ {0x101D0, 0x101FD}, {0x10280, 0x1029C}, {0x102A0, 0x102D0},
+ {0x102E0, 0x102FB}, {0x10300, 0x10323}, {0x1032D, 0x1034A},
+ {0x10350, 0x1037A}, {0x10380, 0x1039D}, {0x1039F, 0x103C3},
+ {0x103C8, 0x103D5}, {0x10400, 0x1049D}, {0x104A0, 0x104A9},
+ {0x104B0, 0x104D3}, {0x104D8, 0x104FB}, {0x10500, 0x10527},
+ {0x10530, 0x10563}, {0x1056F, 0x1057A}, {0x1057C, 0x1058A},
+ {0x1058C, 0x10592}, {0x10594, 0x10595}, {0x10597, 0x105A1},
+ {0x105A3, 0x105B1}, {0x105B3, 0x105B9}, {0x105BB, 0x105BC},
+ {0x10600, 0x10736}, {0x10740, 0x10755}, {0x10760, 0x10767},
+ {0x10780, 0x10785}, {0x10787, 0x107B0}, {0x107B2, 0x107BA},
+ {0x10800, 0x10805}, {0x10808, 0x10808}, {0x1080A, 0x10835},
+ {0x10837, 0x10838}, {0x1083C, 0x1083C}, {0x1083F, 0x10855},
+ {0x10857, 0x1089E}, {0x108A7, 0x108AF}, {0x108E0, 0x108F2},
+ {0x108F4, 0x108F5}, {0x108FB, 0x1091B}, {0x1091F, 0x10939},
+ {0x1093F, 0x1093F}, {0x10980, 0x109B7}, {0x109BC, 0x109CF},
+ {0x109D2, 0x10A03}, {0x10A05, 0x10A06}, {0x10A0C, 0x10A13},
+ {0x10A15, 0x10A17}, {0x10A19, 0x10A35}, {0x10A38, 0x10A3A},
+ {0x10A3F, 0x10A48}, {0x10A50, 0x10A58}, {0x10A60, 0x10A9F},
+ {0x10AC0, 0x10AE6}, {0x10AEB, 0x10AF6}, {0x10B00, 0x10B35},
+ {0x10B39, 0x10B55}, {0x10B58, 0x10B72}, {0x10B78, 0x10B91},
+ {0x10B99, 0x10B9C}, {0x10BA9, 0x10BAF}, {0x10C00, 0x10C48},
+ {0x10C80, 0x10CB2}, {0x10CC0, 0x10CF2}, {0x10CFA, 0x10D27},
+ {0x10D30, 0x10D39}, {0x10E60, 0x10E7E}, {0x10E80, 0x10EA9},
+ {0x10EAB, 0x10EAD}, {0x10EB0, 0x10EB1}, {0x10F00, 0x10F27},
+ {0x10F30, 0x10F59}, {0x10F70, 0x10F89}, {0x10FB0, 0x10FCB},
+ {0x10FE0, 0x10FF6}, {0x11000, 0x1104D}, {0x11052, 0x11075},
+ {0x1107F, 0x110BC}, {0x110BE, 0x110C2}, {0x110D0, 0x110E8},
+ {0x110F0, 0x110F9}, {0x11100, 0x11134}, {0x11136, 0x11147},
+ {0x11150, 0x11176}, {0x11180, 0x111DF}, {0x111E1, 0x111F4},
+ {0x11200, 0x11211}, {0x11213, 0x1123E}, {0x11280, 0x11286},
+ {0x11288, 0x11288}, {0x1128A, 0x1128D}, {0x1128F, 0x1129D},
+ {0x1129F, 0x112A9}, {0x112B0, 0x112EA}, {0x112F0, 0x112F9},
+ {0x11300, 0x11303}, {0x11305, 0x1130C}, {0x1130F, 0x11310},
+ {0x11313, 0x11328}, {0x1132A, 0x11330}, {0x11332, 0x11333},
+ {0x11335, 0x11339}, {0x1133B, 0x11344}, {0x11347, 0x11348},
+ {0x1134B, 0x1134D}, {0x11350, 0x11350}, {0x11357, 0x11357},
+ {0x1135D, 0x11363}, {0x11366, 0x1136C}, {0x11370, 0x11374},
+ {0x11400, 0x1145B}, {0x1145D, 0x11461}, {0x11480, 0x114C7},
+ {0x114D0, 0x114D9}, {0x11580, 0x115B5}, {0x115B8, 0x115DD},
+ {0x11600, 0x11644}, {0x11650, 0x11659}, {0x11660, 0x1166C},
+ {0x11680, 0x116B9}, {0x116C0, 0x116C9}, {0x11700, 0x1171A},
+ {0x1171D, 0x1172B}, {0x11730, 0x11746}, {0x11800, 0x1183B},
+ {0x118A0, 0x118F2}, {0x118FF, 0x11906}, {0x11909, 0x11909},
+ {0x1190C, 0x11913}, {0x11915, 0x11916}, {0x11918, 0x11935},
+ {0x11937, 0x11938}, {0x1193B, 0x11946}, {0x11950, 0x11959},
+ {0x119A0, 0x119A7}, {0x119AA, 0x119D7}, {0x119DA, 0x119E4},
+ {0x11A00, 0x11A47}, {0x11A50, 0x11AA2}, {0x11AB0, 0x11AF8},
+ {0x11C00, 0x11C08}, {0x11C0A, 0x11C36}, {0x11C38, 0x11C45},
+ {0x11C50, 0x11C6C}, {0x11C70, 0x11C8F}, {0x11C92, 0x11CA7},
+ {0x11CA9, 0x11CB6}, {0x11D00, 0x11D06}, {0x11D08, 0x11D09},
+ {0x11D0B, 0x11D36}, {0x11D3A, 0x11D3A}, {0x11D3C, 0x11D3D},
+ {0x11D3F, 0x11D47}, {0x11D50, 0x11D59}, {0x11D60, 0x11D65},
+ {0x11D67, 0x11D68}, {0x11D6A, 0x11D8E}, {0x11D90, 0x11D91},
+ {0x11D93, 0x11D98}, {0x11DA0, 0x11DA9}, {0x11EE0, 0x11EF8},
+ {0x11FB0, 0x11FB0}, {0x11FC0, 0x11FF1}, {0x11FFF, 0x12399},
+ {0x12400, 0x1246E}, {0x12470, 0x12474}, {0x12480, 0x12543},
+ {0x12F90, 0x12FF2}, {0x13000, 0x1342E}, {0x14400, 0x14646},
+ {0x16800, 0x16A38}, {0x16A40, 0x16A5E}, {0x16A60, 0x16A69},
+ {0x16A6E, 0x16ABE}, {0x16AC0, 0x16AC9}, {0x16AD0, 0x16AED},
+ {0x16AF0, 0x16AF5}, {0x16B00, 0x16B45}, {0x16B50, 0x16B59},
+ {0x16B5B, 0x16B61}, {0x16B63, 0x16B77}, {0x16B7D, 0x16B8F},
+ {0x16E40, 0x16E9A}, {0x16F00, 0x16F4A}, {0x16F4F, 0x16F87},
+ {0x16F8F, 0x16F9F}, {0x16FE0, 0x16FE4}, {0x16FF0, 0x16FF1},
+ {0x17000, 0x187F7}, {0x18800, 0x18CD5}, {0x18D00, 0x18D08},
+ {0x1AFF0, 0x1AFF3}, {0x1AFF5, 0x1AFFB}, {0x1AFFD, 0x1AFFE},
+ {0x1B000, 0x1B122}, {0x1B150, 0x1B152}, {0x1B164, 0x1B167},
+ {0x1B170, 0x1B2FB}, {0x1BC00, 0x1BC6A}, {0x1BC70, 0x1BC7C},
+ {0x1BC80, 0x1BC88}, {0x1BC90, 0x1BC99}, {0x1BC9C, 0x1BC9F},
+ {0x1CF00, 0x1CF2D}, {0x1CF30, 0x1CF46}, {0x1CF50, 0x1CFC3},
+ {0x1D000, 0x1D0F5}, {0x1D100, 0x1D126}, {0x1D129, 0x1D172},
+ {0x1D17B, 0x1D1EA}, {0x1D200, 0x1D245}, {0x1D2E0, 0x1D2F3},
+ {0x1D300, 0x1D356}, {0x1D360, 0x1D378}, {0x1D400, 0x1D454},
+ {0x1D456, 0x1D49C}, {0x1D49E, 0x1D49F}, {0x1D4A2, 0x1D4A2},
+ {0x1D4A5, 0x1D4A6}, {0x1D4A9, 0x1D4AC}, {0x1D4AE, 0x1D4B9},
+ {0x1D4BB, 0x1D4BB}, {0x1D4BD, 0x1D4C3}, {0x1D4C5, 0x1D505},
+ {0x1D507, 0x1D50A}, {0x1D50D, 0x1D514}, {0x1D516, 0x1D51C},
+ {0x1D51E, 0x1D539}, {0x1D53B, 0x1D53E}, {0x1D540, 0x1D544},
+ {0x1D546, 0x1D546}, {0x1D54A, 0x1D550}, {0x1D552, 0x1D6A5},
+ {0x1D6A8, 0x1D7CB}, {0x1D7CE, 0x1DA8B}, {0x1DA9B, 0x1DA9F},
+ {0x1DAA1, 0x1DAAF}, {0x1DF00, 0x1DF1E}, {0x1E000, 0x1E006},
+ {0x1E008, 0x1E018}, {0x1E01B, 0x1E021}, {0x1E023, 0x1E024},
+ {0x1E026, 0x1E02A}, {0x1E100, 0x1E12C}, {0x1E130, 0x1E13D},
+ {0x1E140, 0x1E149}, {0x1E14E, 0x1E14F}, {0x1E290, 0x1E2AE},
+ {0x1E2C0, 0x1E2F9}, {0x1E2FF, 0x1E2FF}, {0x1E7E0, 0x1E7E6},
+ {0x1E7E8, 0x1E7EB}, {0x1E7ED, 0x1E7EE}, {0x1E7F0, 0x1E7FE},
+ {0x1E800, 0x1E8C4}, {0x1E8C7, 0x1E8D6}, {0x1E900, 0x1E94B},
+ {0x1E950, 0x1E959}, {0x1E95E, 0x1E95F}, {0x1EC71, 0x1ECB4},
+ {0x1ED01, 0x1ED3D}, {0x1EE00, 0x1EE03}, {0x1EE05, 0x1EE1F},
+ {0x1EE21, 0x1EE22}, {0x1EE24, 0x1EE24}, {0x1EE27, 0x1EE27},
+ {0x1EE29, 0x1EE32}, {0x1EE34, 0x1EE37}, {0x1EE39, 0x1EE39},
+ {0x1EE3B, 0x1EE3B}, {0x1EE42, 0x1EE42}, {0x1EE47, 0x1EE47},
+ {0x1EE49, 0x1EE49}, {0x1EE4B, 0x1EE4B}, {0x1EE4D, 0x1EE4F},
+ {0x1EE51, 0x1EE52}, {0x1EE54, 0x1EE54}, {0x1EE57, 0x1EE57},
+ {0x1EE59, 0x1EE59}, {0x1EE5B, 0x1EE5B}, {0x1EE5D, 0x1EE5D},
+ {0x1EE5F, 0x1EE5F}, {0x1EE61, 0x1EE62}, {0x1EE64, 0x1EE64},
+ {0x1EE67, 0x1EE6A}, {0x1EE6C, 0x1EE72}, {0x1EE74, 0x1EE77},
+ {0x1EE79, 0x1EE7C}, {0x1EE7E, 0x1EE7E}, {0x1EE80, 0x1EE89},
+ {0x1EE8B, 0x1EE9B}, {0x1EEA1, 0x1EEA3}, {0x1EEA5, 0x1EEA9},
+ {0x1EEAB, 0x1EEBB}, {0x1EEF0, 0x1EEF1}, {0x1F000, 0x1F02B},
+ {0x1F030, 0x1F093}, {0x1F0A0, 0x1F0AE}, {0x1F0B1, 0x1F0BF},
+ {0x1F0C1, 0x1F0CF}, {0x1F0D1, 0x1F0F5}, {0x1F100, 0x1F1AD},
+ {0x1F1E6, 0x1F202}, {0x1F210, 0x1F23B}, {0x1F240, 0x1F248},
+ {0x1F250, 0x1F251}, {0x1F260, 0x1F265}, {0x1F300, 0x1F6D7},
+ {0x1F6DD, 0x1F6EC}, {0x1F6F0, 0x1F6FC}, {0x1F700, 0x1F773},
+ {0x1F780, 0x1F7D8}, {0x1F7E0, 0x1F7EB}, {0x1F7F0, 0x1F7F0},
+ {0x1F800, 0x1F80B}, {0x1F810, 0x1F847}, {0x1F850, 0x1F859},
+ {0x1F860, 0x1F887}, {0x1F890, 0x1F8AD}, {0x1F8B0, 0x1F8B1},
+ {0x1F900, 0x1FA53}, {0x1FA60, 0x1FA6D}, {0x1FA70, 0x1FA74},
+ {0x1FA78, 0x1FA7C}, {0x1FA80, 0x1FA86}, {0x1FA90, 0x1FAAC},
+ {0x1FAB0, 0x1FABA}, {0x1FAC0, 0x1FAC5}, {0x1FAD0, 0x1FAD9},
+ {0x1FAE0, 0x1FAE7}, {0x1FAF0, 0x1FAF6}, {0x1FB00, 0x1FB92},
+ {0x1FB94, 0x1FBCA}, {0x1FBF0, 0x1FBF9}, {0x20000, 0x2A6DF},
+ {0x2A700, 0x2B738}, {0x2B740, 0x2B81D}, {0x2B820, 0x2CEA1},
+ {0x2CEB0, 0x2EBE0}, {0x2F800, 0x2FA1D}, {0x30000, 0x3134A},
+ {0xE0100, 0xE01EF}};
+
+ static const UnicodeCharSet Printables(PrintableRanges);
+ // Clang special cases 0x00AD (SOFT HYPHEN) which is rendered as an actual
+ // hyphen in most terminals.
+ return UCS == 0x00AD || Printables.contains(UCS);
+}
+
+/// Unicode code points of the Cf category are considered
+/// fornatting characters.
+bool isFormatting(int UCS) {
+
+ // https://unicode.org/Public/14.0.0/ucdxml/
+ static const UnicodeCharRange Cf[] = {
+ {0x00AD, 0x00AD}, {0x0600, 0x0605}, {0x061C, 0x061C},
+ {0x06DD, 0x06DD}, {0x070F, 0x070F}, {0x0890, 0x0891},
+ {0x08E2, 0x08E2}, {0x180E, 0x180E}, {0x200B, 0x200F},
+ {0x202A, 0x202E}, {0x2060, 0x2064}, {0x2066, 0x206F},
+ {0xFEFF, 0xFEFF}, {0xFFF9, 0xFFFB}, {0x110BD, 0x110BD},
+ {0x110CD, 0x110CD}, {0x13430, 0x13438}, {0x1BCA0, 0x1BCA3},
+ {0x1D173, 0x1D17A}, {0xE0001, 0xE0001}, {0xE0020, 0xE007F}};
- return UCS >= 0 && UCS <= 0x10FFFF && !NonPrintables.contains(UCS);
+ static const UnicodeCharSet Format(Cf);
+ return Format.contains(UCS);
}
/// Gets the number of positions a character is likely to occupy when output
diff --git a/llvm/lib/Support/UnicodeNameToCodepoint.cpp b/llvm/lib/Support/UnicodeNameToCodepoint.cpp
new file mode 100644
index 000000000000..1e8aebf1b8eb
--- /dev/null
+++ b/llvm/lib/Support/UnicodeNameToCodepoint.cpp
@@ -0,0 +1,551 @@
+//===- llvm/Support/UnicodeNameToCodepoint.cpp - Unicode character properties
+//-*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements functions to map the name or alias of a unicode
+// character to its codepoint.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Unicode.h"
+
+namespace llvm {
+namespace sys {
+namespace unicode {
+
+extern const char *UnicodeNameToCodepointDict;
+extern const uint8_t *UnicodeNameToCodepointIndex;
+extern const std::size_t UnicodeNameToCodepointIndexSize;
+extern const std::size_t UnicodeNameToCodepointLargestNameSize;
+
+using BufferType = SmallString<64>;
+
+struct Node {
+ bool IsRoot = false;
+ char32_t Value = 0xFFFFFFFF;
+ uint32_t ChildrenOffset = 0;
+ bool HasSibling = false;
+ uint32_t Size = 0;
+ StringRef Name;
+ const Node *Parent = nullptr;
+
+ constexpr bool isValid() const {
+ return !Name.empty() || Value == 0xFFFFFFFF;
+ }
+ constexpr bool hasChildren() const { return ChildrenOffset != 0 || IsRoot; }
+
+ std::string fullName() const {
+ std::string S;
+ // Reserve enough space for most unicode code points.
+ // The chosen value represent the 99th percentile of name size as of
+ // Unicode 14.
+ S.reserve(46);
+ const Node *N = this;
+ while (N) {
+ std::reverse_copy(N->Name.begin(), N->Name.end(), std::back_inserter(S));
+ N = N->Parent;
+ }
+ std::reverse(S.begin(), S.end());
+ return S;
+ }
+};
+
+static Node createRoot() {
+ Node N;
+ N.IsRoot = true;
+ N.ChildrenOffset = 1;
+ N.Size = 1;
+ return N;
+}
+
+static Node readNode(uint32_t Offset, const Node *Parent = nullptr) {
+ if (Offset == 0)
+ return createRoot();
+
+ uint32_t Origin = Offset;
+ Node N;
+ N.Parent = Parent;
+ uint8_t NameInfo = UnicodeNameToCodepointIndex[Offset++];
+ if (Offset + 6 >= UnicodeNameToCodepointIndexSize)
+ return N;
+
+ bool LongName = NameInfo & 0x40;
+ bool HasValue = NameInfo & 0x80;
+ std::size_t Size = NameInfo & ~0xC0;
+ if (LongName) {
+ uint32_t NameOffset = (UnicodeNameToCodepointIndex[Offset++] << 8);
+ NameOffset |= UnicodeNameToCodepointIndex[Offset++];
+ N.Name = StringRef(UnicodeNameToCodepointDict + NameOffset, Size);
+ } else {
+ N.Name = StringRef(UnicodeNameToCodepointDict + Size, 1);
+ }
+ if (HasValue) {
+ uint8_t H = UnicodeNameToCodepointIndex[Offset++];
+ uint8_t M = UnicodeNameToCodepointIndex[Offset++];
+ uint8_t L = UnicodeNameToCodepointIndex[Offset++];
+ N.Value = ((H << 16) | (M << 8) | L) >> 3;
+
+ bool HasChildren = L & 0x02;
+ N.HasSibling = L & 0x01;
+
+ if (HasChildren) {
+ N.ChildrenOffset = UnicodeNameToCodepointIndex[Offset++] << 16;
+ N.ChildrenOffset |= UnicodeNameToCodepointIndex[Offset++] << 8;
+ N.ChildrenOffset |= UnicodeNameToCodepointIndex[Offset++];
+ }
+ } else {
+ uint8_t H = UnicodeNameToCodepointIndex[Offset++];
+ N.HasSibling = H & 0x80;
+ bool HasChildren = H & 0x40;
+ H &= ~0xC0;
+ if (HasChildren) {
+ N.ChildrenOffset = (H << 16);
+ N.ChildrenOffset |=
+ (uint32_t(UnicodeNameToCodepointIndex[Offset++]) << 8);
+ N.ChildrenOffset |= UnicodeNameToCodepointIndex[Offset++];
+ }
+ }
+ N.Size = Offset - Origin;
+ return N;
+}
+
+static bool startsWith(StringRef Name, StringRef Needle, bool Strict,
+ std::size_t &Consummed, char &PreviousCharInName,
+ char &PreviousCharInNeedle, bool IsPrefix = false) {
+
+ Consummed = 0;
+ if (Strict) {
+ if (!Name.startswith(Needle))
+ return false;
+ Consummed = Needle.size();
+ return true;
+ }
+ if (Needle.empty())
+ return true;
+
+ auto NamePos = Name.begin();
+ auto NeedlePos = Needle.begin();
+
+ char PreviousCharInNameOrigin = PreviousCharInName;
+ char PreviousCharInNeedleOrigin = PreviousCharInNeedle;
+
+ auto IgnoreSpaces = [](auto It, auto End, char &PreviousChar,
+ bool IgnoreEnd = false) {
+ while (It != End) {
+ const auto Next = std::next(It);
+ // Ignore spaces, underscore, medial hyphens
+ // https://unicode.org/reports/tr44/#UAX44-LM2.
+ bool Ignore =
+ *It == ' ' || *It == '_' ||
+ (*It == '-' && isAlnum(PreviousChar) &&
+ ((Next != End && isAlnum(*Next)) || (Next == End && IgnoreEnd)));
+ PreviousChar = *It;
+ if (!Ignore)
+ break;
+ ++It;
+ }
+ return It;
+ };
+
+ while (true) {
+ NamePos = IgnoreSpaces(NamePos, Name.end(), PreviousCharInName);
+ NeedlePos =
+ IgnoreSpaces(NeedlePos, Needle.end(), PreviousCharInNeedle, IsPrefix);
+ if (NeedlePos == Needle.end())
+ break;
+ if (NamePos == Name.end())
+ break;
+ if (toUpper(*NeedlePos) != toUpper(*NamePos))
+ break;
+ NeedlePos++;
+ NamePos++;
+ }
+ Consummed = std::distance(Name.begin(), NamePos);
+ if (NeedlePos != Needle.end()) {
+ PreviousCharInName = PreviousCharInNameOrigin;
+ PreviousCharInNeedle = PreviousCharInNeedleOrigin;
+ }
+ return NeedlePos == Needle.end();
+}
+
+static std::tuple<Node, bool, uint32_t>
+compareNode(uint32_t Offset, StringRef Name, bool Strict,
+ char PreviousCharInName, char PreviousCharInNeedle,
+ BufferType &Buffer, const Node *Parent = nullptr) {
+ Node N = readNode(Offset, Parent);
+ std::size_t Consummed = 0;
+ bool DoesStartWith =
+ N.IsRoot || startsWith(Name, N.Name, Strict, Consummed,
+ PreviousCharInName, PreviousCharInNeedle);
+ if (!DoesStartWith)
+ return std::make_tuple(N, false, 0);
+
+ if (Name.size() - Consummed == 0 && N.Value != 0xFFFFFFFF)
+ return std::make_tuple(N, true, N.Value);
+
+ if (N.hasChildren()) {
+ uint32_t ChildOffset = N.ChildrenOffset;
+ for (;;) {
+ Node C;
+ bool Matches;
+ uint32_t Value;
+ std::tie(C, Matches, Value) =
+ compareNode(ChildOffset, Name.substr(Consummed), Strict,
+ PreviousCharInName, PreviousCharInNeedle, Buffer, &N);
+ if (Matches) {
+ std::reverse_copy(C.Name.begin(), C.Name.end(),
+ std::back_inserter(Buffer));
+ return std::make_tuple(N, true, Value);
+ }
+ ChildOffset += C.Size;
+ if (!C.HasSibling)
+ break;
+ }
+ }
+ return std::make_tuple(N, false, 0);
+}
+
+static std::tuple<Node, bool, uint32_t>
+compareNode(uint32_t Offset, StringRef Name, bool Strict, BufferType &Buffer) {
+ return compareNode(Offset, Name, Strict, 0, 0, Buffer);
+}
+
+// clang-format off
+constexpr const char *const HangulSyllables[][3] = {
+ { "G", "A", "" },
+ { "GG", "AE", "G" },
+ { "N", "YA", "GG" },
+ { "D", "YAE", "GS" },
+ { "DD", "EO", "N", },
+ { "R", "E", "NJ" },
+ { "M", "YEO", "NH" },
+ { "B", "YE", "D" },
+ { "BB", "O", "L" },
+ { "S", "WA", "LG" },
+ { "SS", "WAE", "LM" },
+ { "", "OE", "LB" },
+ { "J", "YO", "LS" },
+ { "JJ", "U", "LT" },
+ { "C", "WEO", "LP" },
+ { "K", "WE", "LH" },
+ { "T", "WI", "M" },
+ { "P", "YU", "B" },
+ { "H", "EU", "BS" },
+ { 0, "YI", "S" },
+ { 0, "I", "SS" },
+ { 0, 0, "NG" },
+ { 0, 0, "J" },
+ { 0, 0, "C" },
+ { 0, 0, "K" },
+ { 0, 0, "T" },
+ { 0, 0, "P" },
+ { 0, 0, "H" }
+ };
+// clang-format on
+
+// Unicode 14.0
+// 3.12 Conjoining Jamo Behavior Common constants
+constexpr const char32_t SBase = 0xAC00;
+constexpr const uint32_t LCount = 19;
+constexpr const uint32_t VCount = 21;
+constexpr const uint32_t TCount = 28;
+
+static std::size_t findSyllable(StringRef Name, bool Strict,
+ char &PreviousInName, int &Pos, int Column) {
+ assert(Column == 0 || Column == 1 || Column == 2);
+ static std::size_t CountPerColumn[] = {LCount, VCount, TCount};
+ char NeedleStart = 0;
+ int Len = -1;
+ int Prev = PreviousInName;
+ for (std::size_t I = 0; I < CountPerColumn[Column]; I++) {
+ StringRef Syllable(HangulSyllables[I][Column]);
+ if (int(Syllable.size()) <= Len)
+ continue;
+ std::size_t Consummed = 0;
+ char PreviousInNameCopy = PreviousInName;
+ bool DoesStartWith = startsWith(Name, Syllable, Strict, Consummed,
+ PreviousInNameCopy, NeedleStart);
+ if (!DoesStartWith)
+ continue;
+ Len = Consummed;
+ Pos = I;
+ Prev = PreviousInNameCopy;
+ }
+ if (Len == -1)
+ return 0;
+ PreviousInName = Prev;
+ return size_t(Len);
+}
+
+static llvm::Optional<char32_t>
+nameToHangulCodePoint(StringRef Name, bool Strict, BufferType &Buffer) {
+ Buffer.clear();
+ // Hangul Syllable Decomposition
+ std::size_t Consummed = 0;
+ char NameStart = 0, NeedleStart = 0;
+ bool DoesStartWith = startsWith(Name, "HANGUL SYLLABLE ", Strict, Consummed,
+ NameStart, NeedleStart);
+ if (!DoesStartWith)
+ return None;
+ Name = Name.substr(Consummed);
+ int L = -1, V = -1, T = -1;
+ Name = Name.substr(findSyllable(Name, Strict, NameStart, L, 0));
+ Name = Name.substr(findSyllable(Name, Strict, NameStart, V, 1));
+ Name = Name.substr(findSyllable(Name, Strict, NameStart, T, 2));
+ if (L != -1 && V != -1 && T != -1 && Name.empty()) {
+ if (!Strict) {
+ Buffer.append("HANGUL SYLLABLE ");
+ if (L != -1)
+ Buffer.append(HangulSyllables[L][0]);
+ if (V != -1)
+ Buffer.append(HangulSyllables[V][1]);
+ if (T != -1)
+ Buffer.append(HangulSyllables[T][2]);
+ }
+ return SBase + (std::uint32_t(L) * VCount + std::uint32_t(V)) * TCount +
+ std::uint32_t(T);
+ }
+ // Otherwise, it's an illegal syllable name.
+ return None;
+}
+
+struct GeneratedNamesData {
+ StringRef Prefix;
+ uint32_t Start;
+ uint32_t End;
+};
+
+// Unicode 14.0 Table 4-8. Name Derivation Rule Prefix Strings
+// This needs to be kept in sync with
+// llvm/utils/UnicodeData/UnicodeNameMappingGenerator.cpp
+static const GeneratedNamesData GeneratedNamesDataTable[] = {
+ {"CJK UNIFIED IDEOGRAPH-", 0x3400, 0x4DBF},
+ {"CJK UNIFIED IDEOGRAPH-", 0x4E00, 0x9FFC},
+ {"CJK UNIFIED IDEOGRAPH-", 0x20000, 0x2A6DD},
+ {"CJK UNIFIED IDEOGRAPH-", 0x2A700, 0x2B734},
+ {"CJK UNIFIED IDEOGRAPH-", 0x2B740, 0x2B81D},
+ {"CJK UNIFIED IDEOGRAPH-", 0x2B820, 0x2CEA1},
+ {"CJK UNIFIED IDEOGRAPH-", 0x2CEB0, 0x2EBE0},
+ {"CJK UNIFIED IDEOGRAPH-", 0x30000, 0x3134A},
+ {"TANGUT IDEOGRAPH-", 0x17000, 0x187F7},
+ {"TANGUT IDEOGRAPH-", 0x18D00, 0x18D08},
+ {"KHITAN SMALL SCRIPT CHARACTER-", 0x18B00, 0x18CD5},
+ {"NUSHU CHARACTER-", 0x1B170, 0x1B2FB},
+ {"CJK COMPATIBILITY IDEOGRAPH-", 0xF900, 0xFA6D},
+ {"CJK COMPATIBILITY IDEOGRAPH-", 0xFA70, 0xFAD9},
+ {"CJK COMPATIBILITY IDEOGRAPH-", 0x2F800, 0x2FA1D},
+};
+
+static llvm::Optional<char32_t>
+nameToGeneratedCodePoint(StringRef Name, bool Strict, BufferType &Buffer) {
+ for (auto &&Item : GeneratedNamesDataTable) {
+ Buffer.clear();
+ std::size_t Consummed = 0;
+ char NameStart = 0, NeedleStart = 0;
+ bool DoesStartWith = startsWith(Name, Item.Prefix, Strict, Consummed,
+ NameStart, NeedleStart, /*isPrefix*/ true);
+ if (!DoesStartWith)
+ continue;
+ auto Number = Name.substr(Consummed);
+ unsigned long long V = 0;
+ // Be consistent about mandating upper casing.
+ if (Strict &&
+ llvm::any_of(Number, [](char C) { return C >= 'a' && C <= 'f'; }))
+ return {};
+ if (getAsUnsignedInteger(Number, 16, V) || V < Item.Start || V > Item.End)
+ continue;
+ if (!Strict) {
+ Buffer.append(Item.Prefix);
+ Buffer.append(utohexstr(V, true));
+ }
+ return V;
+ }
+ return None;
+}
+
+static llvm::Optional<char32_t> nameToCodepoint(StringRef Name, bool Strict,
+ BufferType &Buffer) {
+ if (Name.empty())
+ return None;
+
+ llvm::Optional<char32_t> Res = nameToHangulCodePoint(Name, Strict, Buffer);
+ if (!Res)
+ Res = nameToGeneratedCodePoint(Name, Strict, Buffer);
+ if (Res)
+ return *Res;
+
+ Buffer.clear();
+ Node Node;
+ bool Matches;
+ uint32_t Value;
+ std::tie(Node, Matches, Value) = compareNode(0, Name, Strict, Buffer);
+ if (Matches) {
+ std::reverse(Buffer.begin(), Buffer.end());
+ // UAX44-LM2. Ignore case, whitespace, underscore ('_'), and all medial
+ // hyphens except the hyphen in U+1180 HANGUL JUNGSEONG O-E.
+ if (!Strict && Value == 0x116c &&
+ Name.find_insensitive("O-E") != StringRef::npos) {
+ Buffer = "HANGUL JUNGSEONG O-E";
+ Value = 0x1180;
+ }
+ return Value;
+ }
+ return None;
+}
+
+llvm::Optional<char32_t> nameToCodepointStrict(StringRef Name) {
+
+ BufferType Buffer;
+ auto Opt = nameToCodepoint(Name, true, Buffer);
+ return Opt;
+}
+
+llvm::Optional<LooseMatchingResult>
+nameToCodepointLooseMatching(StringRef Name) {
+ BufferType Buffer;
+ auto Opt = nameToCodepoint(Name, false, Buffer);
+ if (!Opt)
+ return None;
+ return LooseMatchingResult{*Opt, Buffer};
+}
+
+// Find the unicode character whose editing distance to Pattern
+// is shortest, using the Wagner–Fischer algorithm.
+llvm::SmallVector<MatchForCodepointName>
+nearestMatchesForCodepointName(StringRef Pattern, std::size_t MaxMatchesCount) {
+ // We maintain a fixed size vector of matches,
+ // sorted by distance
+ // The worst match (with the biggest distance) are discarded when new elements
+ // are added.
+ std::size_t LargestEditDistance = 0;
+ llvm::SmallVector<MatchForCodepointName> Matches;
+ Matches.reserve(MaxMatchesCount + 1);
+
+ auto Insert = [&](const Node &Node, uint32_t Distance,
+ char32_t Value) -> bool {
+ if (Distance > LargestEditDistance) {
+ if (Matches.size() == MaxMatchesCount)
+ return false;
+ LargestEditDistance = Distance;
+ }
+ // To avoid allocations, the creation of the name is delayed
+ // as much as possible.
+ std::string Name;
+ auto GetName = [&] {
+ if (Name.empty())
+ Name = Node.fullName();
+ return Name;
+ };
+
+ auto It = std::lower_bound(
+ Matches.begin(), Matches.end(), Distance,
+ [&](const MatchForCodepointName &a, std::size_t Distance) {
+ if (Distance == a.Distance)
+ return a.Name < GetName();
+ return a.Distance < Distance;
+ });
+ if (It == Matches.end() && Matches.size() == MaxMatchesCount)
+ return false;
+
+ MatchForCodepointName M{GetName(), Distance, Value};
+ Matches.insert(It, std::move(M));
+ if (Matches.size() > MaxMatchesCount)
+ Matches.pop_back();
+ return true;
+ };
+
+ // We ignore case, space, hyphens, etc,
+ // in both the search pattern and the prospective names.
+ auto Normalize = [](StringRef Name) {
+ std::string Out;
+ Out.reserve(Name.size());
+ for (char C : Name) {
+ if (isAlnum(C))
+ Out.push_back(toUpper(C));
+ }
+ return Out;
+ };
+ std::string NormalizedName = Normalize(Pattern);
+
+ // Allocate a matrix big enough for longest names.
+ const std::size_t Columns =
+ std::min(NormalizedName.size(), UnicodeNameToCodepointLargestNameSize) +
+ 1;
+
+ LLVM_ATTRIBUTE_UNUSED static std::size_t Rows =
+ UnicodeNameToCodepointLargestNameSize + 1;
+
+ std::vector<char> Distances(
+ Columns * (UnicodeNameToCodepointLargestNameSize + 1), 0);
+
+ auto Get = [&Distances, Columns](size_t Column, std::size_t Row) -> char & {
+ assert(Column < Columns);
+ assert(Row < Rows);
+ return Distances[Row * Columns + Column];
+ };
+
+ for (std::size_t I = 0; I < Columns; I++)
+ Get(I, 0) = I;
+
+ // Visit the childrens,
+ // Filling (and overriding) the matrix for the name fragment of each node
+ // iteratively. CompleteName is used to collect the actual name of potential
+ // match, respecting case and spacing.
+ auto VisitNode = [&](const Node &N, std::size_t Row,
+ auto &VisitNode) -> void {
+ std::size_t J = 0;
+ for (; J < N.Name.size(); J++) {
+ if (!isAlnum(N.Name[J]))
+ continue;
+
+ Get(0, Row) = Row;
+
+ for (std::size_t I = 1; I < Columns; I++) {
+ const int Delete = Get(I - 1, Row) + 1;
+ const int Insert = Get(I, Row - 1) + 1;
+
+ const int Replace =
+ Get(I - 1, Row - 1) + (NormalizedName[I - 1] != N.Name[J] ? 1 : 0);
+
+ Get(I, Row) = std::min(Insert, std::min(Delete, Replace));
+ }
+
+ Row++;
+ }
+
+ unsigned Cost = Get(Columns - 1, Row - 1);
+ if (N.Value != 0xFFFFFFFF) {
+ Insert(N, Cost, N.Value);
+ }
+
+ if (N.hasChildren()) {
+ auto ChildOffset = N.ChildrenOffset;
+ for (;;) {
+ Node C = readNode(ChildOffset, &N);
+ ChildOffset += C.Size;
+ if (!C.isValid())
+ break;
+ VisitNode(C, Row, VisitNode);
+ if (!C.HasSibling)
+ break;
+ }
+ }
+ };
+
+ Node Root = createRoot();
+ VisitNode(Root, 1, VisitNode);
+ return Matches;
+}
+
+} // namespace unicode
+
+} // namespace sys
+} // namespace llvm
diff --git a/llvm/lib/Support/UnicodeNameToCodepointGenerated.cpp b/llvm/lib/Support/UnicodeNameToCodepointGenerated.cpp
new file mode 100644
index 000000000000..86e8378eceb1
--- /dev/null
+++ b/llvm/lib/Support/UnicodeNameToCodepointGenerated.cpp
@@ -0,0 +1,20911 @@
+
+//===------------- Support/UnicodeNameToCodepointGenerated.cpp ------------===//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements mapping the name of a unicode code point to its value.
+//
+// This file was generated using ./bin/UnicodeNameMappingGenerator.
+// Do not edit manually.
+//
+//===----------------------------------------------------------------------===//
+
+/*
+UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE
+
+See Terms of Use <https://www.unicode.org/copyright.html>
+for definitions of Unicode Inc.’s Data Files and Software.
+
+NOTICE TO USER: Carefully read the following legal agreement.
+BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S
+DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"),
+YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
+TERMS AND CONDITIONS OF THIS AGREEMENT.
+IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE
+THE DATA FILES OR SOFTWARE.
+
+COPYRIGHT AND PERMISSION NOTICE
+
+Copyright © 1991-2022 Unicode, Inc. All rights reserved.
+Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of the Unicode data files and any associated documentation
+(the "Data Files") or Unicode software and any associated documentation
+(the "Software") to deal in the Data Files or Software
+without restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, and/or sell copies of
+the Data Files or Software, and to permit persons to whom the Data Files
+or Software are furnished to do so, provided that either
+(a) this copyright and permission notice appear with all copies
+of the Data Files or Software, or
+(b) this copyright and permission notice appear in associated
+Documentation.
+
+THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
+NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
+DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THE DATA FILES OR SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder
+shall not be used in advertising or otherwise to promote the sale,
+use or other dealings in these Data Files or Software without prior
+written authorization of the copyright holder.
+*/
+
+#include "llvm/Support/Compiler.h"
+#include <cstddef>
+#include <cstdint>
+namespace llvm {
+namespace sys {
+namespace unicode {
+extern const char *UnicodeNameToCodepointDict;
+extern const uint8_t *UnicodeNameToCodepointIndex;
+extern const std::size_t UnicodeNameToCodepointIndexSize;
+extern const std::size_t UnicodeNameToCodepointLargestNameSize;
+const char *UnicodeNameToCodepointDict =
+ " _-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789OWER RIGHT CURLY BRACKET SECTIONM "
+ "LEFT MEMBER OF DOUBLE VERTICALPER BODY TILTING FROM HIP JOINTSFACE WITH "
+ "SYMBOLS COVERING MOUTHVED STEM PARAGRAPH SIGN ORNAMENTVE LESS-THAN ABOVE "
+ "SLANTED EQUAL KORANIC STOP SIGN ISOLATED FORMROFLEX CLICK WITH RETROFLEX "
+ "HOOKSWIRL BIRGA WITH DOUBLE ORNAMENTOWNWARDS HARPOON WITH BARB RIGHT "
+ "HORIZONTAL STROKES TO THE RIGHT LEFTWARDS TRIANGLE-HEADED "
+ "ARROWFT-POINTING ANGLE QUOTATION MARK LOWER HALF INVERSE MEDIUM "
+ "SHADERONT-TILTED SHADOWED WHITE ARROWDIFIER LETTER LABIALIZATION MARKDIC "
+ "KASHMIRI INDEPENDENT SVARITAMARK WITH LEFT RIGHT ARROW ABOVEOUBLE-LINE "
+ "EQUAL ABOVE LESS-THANL ARABIC LETTER TAH AND TWO DOTSLL BUT UPPER LEFT "
+ "QUADRANT BLACKRIGHT SEMICIRCLE WITH THREE DOTSLAR SIGN WITH OVERLAID "
+ "BACKSLASH CONTAINING SMALL WHITE TRIANGLEEN ARM ENDING IN ARROW POINTING "
+ "LAGAB TIMES U OVER LAGAB TIMES ULOWER LEFT CURLY BRACKET "
+ "SECTIONRIGHTWARDS TRIANGLE-HEADED ARROWTRIANGLE-HEADED RIGHTWARDS ARROW "
+ "DOWNWARDS EQUILATERAL ARROWHEAD DOWNWARDS TRIANGLE-HEADED ARROWER ARROWS "
+ "CIRCLING ANTICLOCKWISEER IGI SHIR OVER SHIR UD OVER UDER TAB NI OVER NI "
+ "DISH OVER DISHESS-THAN ABOVE DOUBLE-LINE EQUALETALLED BLACK AND WHITE "
+ "FLORETTEATHARVAVEDIC INDEPENDENT SVARITAAND MIDDLE RIGHT TO LOWER "
+ "CENTREWO DOTS ABOVE AND TWO DOTS BELOWWO DOTS OVER ONE DOT PUNCTUATIONS "
+ "VERTICALLY BELOW AND SMALL TAHTIMES ASH2 KU OVER HI TIMES ASH2AND "
+ "LEFTWARDS OPEN CIRCLE ARROWSTICAL BAR DOUBLE RIGHT TURNSTILENORMAL FACTOR "
+ "SEMIDIRECT PRODUCTD ARROW WITH TRIANGLE ARROWHEADSSEMICIRCULAR "
+ "ANTICLOCKWISE ARROWINTING DOWNWARDS THEN NORTH EASTHT-POINTING ANGLE "
+ "QUOTATION MARKHUR KAZAKH KIRGHIZ ALEF MAKSURA THIRD WHITE RIGHT POINTING "
+ "INDEX SHADOWED WHITE RIGHTWARDS ARROWIDE AND JOINED WITH "
+ "INTERSECTIONUPPER AND LOWER ONE EIGHTH BLOCKIGHTWARDS HARPOON WITH BARB "
+ "DOWNTER-THAN ABOVE DOUBLE-LINE EQUALH SUPERSCRIPT ALEF ISOLATED "
+ "FORMROXIMATELY NOR ACTUALLY EQUAL TOAISING BOTH HANDS IN CELEBRATIONIRECT "
+ "PRODUCT WITH BOTTOM CLOSEDTOP HALF DIVIDED BY VERTICAL BARGREATER-THAN "
+ "ABOVE SLANTED EQUALTOM-LIGHTED RIGHTWARDS ARROWHEADH HAMZA ABOVE WITH "
+ "ALEF MAKSURA H HORIZONTAL MIDDLE BLACK STRIPERONG CENTRALIZATION STROKE "
+ "BELOW TRIANGULAR THREE QUARTERS BLOCK TORTOISE SHELL BRACKET "
+ "ORNAMENTWNWARDS ARROW WITH TIP LEFTWARDSDED HIGH STOP WITH FILLED "
+ "CENTRETION SIGN WITH CIRCUMFLEX ACCENTS AND UPWARDS OPEN CIRCLE "
+ "ARROWSHAND WITH MIDDLE FINGER EXTENDEDOF UPWARDS TRIANGLE-HEADED "
+ "ARROWLEFTWARDS HARPOON WITH BARB DOWNED ARABIC-INDIC DIGIT FOUR "
+ "BELOWEDIUM SHADE AND RIGHT HALF BLOCKLE-LINE EQUAL ABOVE GREATER-THANARDS "
+ "ARROW ABOVE LEFTWARDS ARROW BAR AT END OF HORIZONTAL STROKEEDIUM SHADE "
+ "AND LOWER HALF BLOCKE TO MIDDLE LEFT TO LOWER CENTREED ARABIC-INDIC DIGIT "
+ "FOUR ABOVEED COMMA QUOTATION MARK ORNAMENTE-POINTED BLACK RIGHTWARDS "
+ "ARROWE CONTAINING BLACK SMALL LOZENGEARDROP-SPOKED PROPELLER ASTERISKE "
+ "SQUARED LATIN CAPITAL LETTER PLE COMMA QUOTATION MARK ORNAMENTUG2 OVER "
+ "TUG2 TUG2 OVER TUG2 PAPARDS HARPOON WITH BARB DOWNWARDS-POINTING ANGLE "
+ "BRACKET ORNAMENTRIANGLE-HEADED OPEN CIRCLE ARROW BETWEEN MIDDLE AND RING "
+ "FINGERSED UPWARDS EQUILATERAL ARROWHEAD-SHADOWED WHITE RIGHTWARDS "
+ "ARROWAISED HAND WITH FINGERS SPLAYEDETALLED OUTLINED BLACK "
+ "FLORETTEACK-TILTED SHADOWED WHITE ARROWTNAMESE ALTERNATE READING MARK "
+ "RINGS OVER ONE RING PUNCTUATIONRIGHTWARDS HARPOON WITH BARB UPAND MIDDLE "
+ "LEFT TO LOWER CENTREONE HUNDRED THIRTY-FIVE DEGREES CROSSING ASH OVER ASH "
+ "OVER ASHUPWARDS HARPOON WITH BARB RIGHTRING OVER TWO RINGS "
+ "PUNCTUATIONLEFTWARDS EQUILATERAL ARROWHEADIN WHITE CIRCLE IN BLACK "
+ "SQUAREMAKSURA WITH SUPERSCRIPT ALEF -HIRAGANA PROLONGED SOUND MARKSAD "
+ "WITH LAM WITH ALEF MAKSURADOWNWARDS AND RIGHTWARDS ARROWEFT SEMICIRCLE "
+ "WITH THREE DOTSGHT FOUR POINTED PINWHEEL STARDOT BELOW AND THREE DOTS "
+ "ABOVEAND JOINED BY DASH WITH SUBSETGREATER-THAN ABOVE EQUALS SIGNINDEX "
+ "THUMB CURVE THUMB INSIDEDIVIDED BY HORIZONTAL BAR AND EART EXCLAMATION "
+ "MARK ORNAMENTHT CENTRALIZATION STROKE BELOWON WITH RIGHTWARDS ARROW "
+ "ABOVEMODIFIER LETTER LEFT HALF RINGOPEN CENTRE EIGHT POINTED STARQAF WITH "
+ "LAM WITH ALEF MAKSURAHIGH-REVERSED-9 QUOTATION MARKMINTON RACQUET AND "
+ "SHUTTLECOCKAGGRAVATED INDEPENDENT SVARITAEXTENDED ARABIC-INDIC DIGIT "
+ "TEVERSED LUNATE EPSILON SYMBOLWITH RIGHTWARDS ARROW AT LEFTONAL INDICATOR "
+ "SYMBOL LETTER OVER RIGHTWARDS ARROW TO BARSUPERSCRIPT ALEF INITIAL "
+ "FORMNS-SERIF INTERROBANG ORNAMENTEFTWARDS HARPOON WITH BARB "
+ "UPSEMICIRCULAR PATH AROUND POLEDOWN MIDDLE THUMB INDEX CROSSDOWN HEAVY "
+ "AND RIGHT UP LIGHTCKED FACE WITH EXPLODING HEAD WITH REVERSED NEGATION "
+ "SLASHLIGHT FOUR POINTED BLACK CUSP DOWN INDEX THUMB HOOK MIDDLEDOT OVER "
+ "TWO DOTS PUNCTUATIONPUNCTUATION CHINOOK FULL STOPUP HEAVY AND RIGHT DOWN "
+ "LIGHTCONTAINING BLACK SMALL CIRCLEACE DIRECTION POSITION NOSE FTING POINT "
+ "RIGHTWARDS ARROWT LITTER IN ITS PLACE SYMBOLOUND-TIPPED RIGHTWARDS "
+ "ARROWISMILLAH AR-RAHMAN AR-RAHEEMDOWN HEAVY AND LEFT UP LIGHTUPWARDS AND "
+ "RIGHTWARDS ARROWRECTANGULAR PATH AROUND POLEEFT ARC GREATER-THAN "
+ "BRACKETMONOGRAMMOS TESSERA DODEKATASALTIRE WITH ROUNDED CORNERSBESIDE AND "
+ "JOINED WITH UNIONMIDDLE RING LITTLE CONJOINEDASTERISKS ALIGNED "
+ "VERTICALLYUP HEAVY AND LEFT DOWN LIGHTUPPER CENTRE TO MIDDLE RIGHTHREE "
+ "HUNDRED FIFTEEN DEGREESLEFTWARDS OF DOWNWARDS ARROWDOUBLE ANUSVARA "
+ "ANTARGOMUKHAHADED WHITE RIGHTWARDS ARROWU ALAYHI WAAALIHEE WA-SALLAMIBE "
+ "SYLLABLE BOUNDARY MARKEREDGE-TAILED RIGHTWARDS ARROWLIQUID MEASURE FIRST "
+ "SUBUNIT-FEATHERED RIGHTWARDS ARROWRIANGULAR ONE QUARTER BLOCKIMPERFECTUM "
+ "CUM PROLATIONE OUR BALLOON-SPOKED ASTERISKEAVY WHITE RIGHTWARDS ARROWIDE "
+ "ARC ANTICLOCKWISE ARROWIDE-HEADED RIGHTWARDS ARROWCIRCLE WITH NORTHWEST "
+ "ARROWBETWEEN TWO HORIZONTAL BARSHEAD MARK WITH MOON AND SUNZERO FOR ODD "
+ "POWERS OF FOURWO DOTS BELOW AND DOT ABOVEHANDED INTERLACED "
+ "PENTAGRAMLESS-THAN ABOVE EQUALS SIGNBRDA RNYING YIG MGO MDUN MABRDA "
+ "RNYING YIG MGO SGAB MARIGHT ARC LESS-THAN BRACKETUPPER MIDDLE LEFT TO "
+ "UPPER CONTINUOUS UNDERLINE SYMBOL AND LEFT SEMICIRCLE ARROWSTALIC LATIN "
+ "CAPITAL LETTER ONE LARGE AND ONE SMALL EYEENTATION FORM FOR VERTICAL "
+ "LARGE EQUILATERAL ARROWHEADEMICIRCULAR CLOCKWISE ARROWFINGER COVERING "
+ "CLOSED LIPSSTRUMENTAL NOTATION SYMBOL-PHARYNGEAL VOICED FRICATIVE BARREE "
+ "WITH TWO DOTS BELOWKATHAKA INDEPENDENT SVARITATWO HUNDRED SEVENTY "
+ "DEGREESDOUBLE PRIME QUOTATION MARKDOUBLE ANGLE QUOTATION MARKRIPLE "
+ "VERTICAL BAR OPERATOR DIVIDED BY HORIZONTAL RULEPPY PERSON RAISING ONE "
+ "HANDWALLPLANE SHOULDER HIP MOVELOWER MIDDLE LEFT TO LOWER FOUR FINGERS "
+ "CONJOINED BENTLOWER TONAL RANGE INDICATORLIGHT CENTRALIZATION "
+ "STROKEYAJURVEDIC MIDLINE SVARITAINDUSTRIAL STANDARD SYMBOLMEEM WITH HAH "
+ "WITH TATWEELDOTTED SUBSTITUTION MARKERCRIPT LIGATURE ET ORNAMENTSSIAN "
+ "ASTROLOGICAL SYMBOL ONOMICAL SYMBOL FOR URANUSOORPLANE SHOULDER HIP "
+ "MOVEHTORA SKLIRON CHROMA VASIS OR APPROXIMATELY EQUAL TOLANTED SOUTH "
+ "ARROW WITH HORIGHT PARENTHESIS ORNAMENTDOTTED LUNATE SIGMA "
+ "SYMBOLDROP-SHADOWED WHITE SQUAREMODIFIER FITZPATRICK TYPE-AND MIDDLE "
+ "FINGERS CROSSEDE ONE-WAY LEFT WAY TRAFFIC GAD OVER GAD GAR OVER GARLINE "
+ "FEED SEPARATOR SYMBOLRIPLE DOT PUNCTUATION MARKLEFTWARDS OF UPWARDS "
+ "ARROWTHREE DOTS ABOVE DOWNWARDSU REVERSED OVER U REVERSEDBLE TENNIS "
+ "PADDLE AND BALLERSTRASS ELLIPTIC FUNCTIONOCKED FEMALE AND MALE SIGN "
+ "WITHIN TRIANGLE ARROWHEADUNEVEN EYES AND WAVY MOUTH LESS THAN THE "
+ "DENOMINATORAND RIGHT ONE EIGHTH BLOCK NEGATED WITH VERTICAL BARJECT "
+ "REPLACEMENT CHARACTERMARRIED PARTNERSHIP SYMBOLIDEOGRAPHIC ITERATION "
+ "MARKOTATED FLORAL HEART BULLETALEF MAKSURA ISOLATED FORMORTHOGONAL "
+ "CROSSHATCH FILLWITH LEFTWARDS ARROW ABOVECLOCKWISE ARROW WITH "
+ "MINUSLLALLAHOU ALAYHE WASSALLAMCAT FACE WITH SMILING EYESOUTLINED "
+ "RIGHTWARDS ARROWINVERTED EXCLAMATION MARKBREVE WITH INVERTED "
+ "BREVEFECTIVENESS OR DISTORTIONOLD ASSYRIAN WORD DIVIDERMBINING "
+ "CRYPTOGRAMMIC DOTLEFT PARENTHESIS ORNAMENTREE-HUNDRED-AND-TWENTIETHSTROKE "
+ "AND TWO DOTS ABOVETERNION INTEGRAL OPERATORRIGHT DIAGONAL HALF BLACKRIPLE "
+ "BIRGA WITH ORNAMENTDOUBLE CANDRABINDU VIRAMAOUBLE BIRGA WITH ORNAMENT "
+ "WITH DOUBLE MIDDLE TILDERANCH BANK IDENTIFICATIONELD HOCKEY STICK AND "
+ "BALL WITH DOUBLE GRAVE ACCENTMULTIPLICATION SIGN BELOWNIVERSAL RECYCLING "
+ "SYMBOLLEFTWARDS ARROW WITH HOOKONE UNDER EIGHTEEN SYMBOLLOW QUILT SQUARE "
+ "ORNAMENTFFICULTY AT THE BEGINNINGBUT NOT ACTUALLY EQUAL TOTTED "
+ "SUBSTITUTION BRACKETTAB OVER TAB GAR OVER GARMEDIUM TRIANGLE ARROWHEAD "
+ "OVER NUN LAGAR TIMES SALRIST CIRCLE HITTING WALL WITH DOUBLE VERTICAL "
+ "BARCROSSING NORTH EAST ARROW WITH CIRCLED ONE OVERLAYCAT FACE WITH CLOSED "
+ "EYESDIAERESIS AND HOOK SYMBOLDRY MEASURE FIRST SUBUNITING ON THE FLOOR "
+ "LAUGHINGAND MALE AND FEMALE SIGNVOICED LARYNGEAL SPIRANTTEARDROP-SPOKED "
+ "ASTERISKTED INTERPOLATION MARKERUPRIGHT RECTANGULAR ZERORIGHTWARDS THEN "
+ "CURVING BLACK LENTICULAR BRACKETIGATURE OPEN ET ORNAMENTARROW POINTING "
+ "DIRECTLY BLIC ADDRESS LOUDSPEAKERCULINE ORDINAL INDICATORING FACE WITH "
+ "OPEN MOUTHMTAVRULI CAPITAL LETTER ARM CIRCLE HITTING WALL WELVE POINTED "
+ "BLACK STARLARGE TRIANGLE ARROWHEADLINE HORIZONTAL ELLIPSISORIZONTAL BAR "
+ "WITH NOTCHWITH UPWARDS ARROW ABOVEONE-HUNDRED-AND-SIXTIETHBUSINESS SUIT "
+ "LEVITATINGPERSCRIPT ALEF MOKHASSASCONSECUTIVE EQUALS SIGNSDESCENDING "
+ "MUSICAL NOTESGLOTTAL STOP WITH STROKEEYES AND HAND OVER MOUTHLICATION "
+ "PROGRAM COMMANDFINGER AND THUMB CROSSEDGREATER-THAN OR EQUAL TOISOSCELES "
+ "RIGHT TRIANGLEWITH CANCELLATION STROKEOTTOM SHADED WHITE "
+ "ARROWOTTOM-SHADED WHITE ARROWDIAGONAL CROSSHATCH FILLUPWARD POINTING "
+ "TRIANGLESINGLE-LINE NOT EQUAL TOSYLLABLE REPETITION MARKT BLACK "
+ "RIGHTWARDS ARROWMALL CIRCLE TO THE RIGHTSMALL ARABIC LETTER TAH DOUBLE "
+ "HORIZONTAL STROKE POINTING BACKHAND INDEXEQUAL TO OR "
+ "GREATER-THANINTERSECTION WITH SERIFSHEAVY BLACK HEART BULLETBERKANAN "
+ "BEORC BJARKAN BCOMPATIBILITY IDEOGRAPH-LEFT DIAGONAL HALF BLACKWO DOTS "
+ "VERTICALLY ABOVEDOWNSCALING FACTOR KIIZH OVER TOP SQUARE "
+ "BRACKETLY-RECYCLED PAPER SYMBOLE PLUS A PLUS SU PLUS NASTROKE THROUGH "
+ "DESCENDERPOINTING DOWNWARDS ABOVESHAPE WITH A DOT INSIDEIVE FINGERS "
+ "SPREAD OPENALGAMATION OR COPRODUCTCIRCUMFLEX ACCENT ABOVEININE ORDINAL "
+ "INDICATORLSCHREIBER PAUSE SYMBOLUPWARDS THEN NORTH WESTLEFT-SHADED WHITE "
+ "ARROWCLUSTER-INITIAL LETTER ALEF MAKSURA FINAL FORMMITIAN CONJUGATE "
+ "MATRIXISTED RIGHTWARDS ARROWSSING DIAGONAL CROSSING YELORUSSIAN-UKRAINIAN "
+ "ISOLIDUS BINARY RELATION WITH HALF-CIRCLE BELOWRIGHT HORIZONTAL SECANTUP "
+ "SPREAD THUMB FORWARDORIGINAL OF OR EQUAL TOPUNCTUATION END OF "
+ "TEXTVERTICAL BISECTING LINERIGHT DIAGONAL ELLIPSISORAH WITH NINE BRANCHES "
+ "POINTING AT THE VIEWERREE VARIATION SELECTOR WO-WAY LEFT WAY TRAFFICWHITE "
+ "FOUR POINTED CUSPHANKED RIGHTWARDS ARROWWESTERN PWO KAREN TONE-ESS "
+ "OUTLINED WHITE STARP WITH EXCLAMATION MARK HUNDRED TWENTY-EIGHTH BARBED "
+ "RIGHTWARDS ARROWRTOISE SHELL BRACKETED OMBINING ANUSVARA ABOVEATTACHING "
+ "VERTICAL OMETDOT BELOW AND DOT ABOVEAVOURING DELICIOUS FOODRAISED "
+ "OMISSION BRACKETPA OVER PA GAR OVER GARGREEK SMALL LETTER IOTAASCENDING "
+ "MUSICAL NOTESIDE ARC CLOCKWISE ARROWAND WOMAN HOLDING HANDSRIGHT-POINTING "
+ "TRIANGLEOVER RIGHTWARDS HARPOON CAKE WITH SWIRL DESIGNZANTINE MUSICAL "
+ "SYMBOL IGHT-SHADED WHITE ARROWHT TRIFOLIATE SNOWFLAKEOVERLAPPING LOGICAL "
+ "ANDHREE POINTED BLACK STARARTY HORN AND PARTY HATCURRENT SYMBOL FORM TWO "
+ "ROTATED NINETY DEGREESUBLE VERTICAL BAR BELOWDOWNWARDS THEN CURVING "
+ "ARABIC LETTER TAH ABOVEANG DEPARTING TONE MARK WITH DECORATIVE COVEROVER "
+ "NU11 BUR OVER BUROVER LEFTWARDS HARPOONUIGHUR KIRGHIZ YEH "
+ "WITSYMPTOTICALLY EQUAL TOOVER SHIR BUR OVER BURCONSONANT MODIFIER "
+ "BARDOMAIN ANTIRESTRICTIONND RECORDING COPYRIGHTTRIPLE VERTICAL "
+ "STROKEUPPER RIGHT AND LOWER DOUBLE SOLIDUS OVERLAYLATIN CAPITAL LETTER "
+ "SLONG HORIZONTAL STROKERIGHT-POINTING FLEURONQUESTION MARK ORNAMENT WITH "
+ "THREE DOTS ABOVEUBSCRIPT SMALL LETTER LOW PARAPHRASE BRACKET WITH SINGLE "
+ "ZAPYATAYAPUNCTUATION KUNDDALIYAUPPER ONE EIGHTH BLOCKARMENIAN ETERNITY "
+ "SIGNDOUBLE VERTICAL STROKEPRECEDED BY APOSTROPHEPOINTING UPWARDS "
+ "BELOWKEEPING STILL MOUNTAINTWO HORIZONTAL STROKESPERSET OF NOR EQUAL "
+ "TODOUBLE-LINED HEAD MARKMNYAM YIG GI MGO RGYANEAST-POINTING AIRPLANEIGEL "
+ "LONG-BRANCH-SOL SDOWNWARDS ZIGZAG ARROWACKSLANTED SOUTH ARROWRECTILINEAR "
+ "BLACK STARI YFESIS TETARTIMORIONREE-CIRCLE ALTERNATE IDOWN-POINTING "
+ "TRIANGLEHEXIFORM LONG ANUSVARANOT INCLUDING THE POLESHORT VERTICAL "
+ "STROKES SYMBOL FOR LIGHTHOUSEUSTOMER ACCOUNT NUMBERIN DEPARTING TONE "
+ "MARKDRESSED TO THE SUBJECTSHORT RIGHTWARDS ARROWLEFT TRIANGLE "
+ "OPERATORALEF WITH LAM WITH YEH RIGHT ARROWHEAD ABOVEING HEAD IN "
+ "SILHOUETTEHORT HORIZONTAL STROKEINDIRECT QUESTION MARKSEMI-VOICED SOUND "
+ "MARKCURLY BRACKET ORNAMENTCJK UNIFIED IDEOGRAPH-TRIPLE RIGHT "
+ "TURNSTILEYIAKENG PUACHUE HMONG WITH CIRCUMFLEX ABOVEWITH HORIZONTAL "
+ "STROKECONSONANT SIGN MEDIAL ROUND A POINT OPERATORWITH JEEM INITIAL "
+ "FORMWASALLAM ISOLATED FORM-ROTATED DIVISION SIGNRROW WITH ROUNDED "
+ "HEADGREATER-THAN DIAERESISWITH VOICED SOUND MARKLE BESIDE VERTICAL "
+ "BARINVERTED SMALL V BELOWINVERTED SMALL V ABOVE OVER STAMPED ENVELOPEBAR "
+ "ABOVE INTERSECTIONREASE FONT SIZE SYMBOLARD SHELL FLOPPY DISKDOWNWARDS "
+ "ARROW ABOVEACUTE AND HOOK SYMBOLEFT-POINTING TRIANGLE-SHAPED BAG "
+ "DELIMITEREFT OPEN BOX OPERATORDOWN HORIZONTAL LIGHTEFT HORIZONTAL "
+ "SECANTDOWN HORIZONTAL HEAVYYIG MGO TSHEG SHAD MA-ROUND NOTEHEAD DOWN "
+ "ABOVE SHORT DOWN TACKAKIA TELOUS ICHIMATOSINVERTED GLOTTAL STOPINVERTED "
+ "BRIDGE BELOWDELIMITER TSHEG BSTARHALF TRIANGULAR COLONHAND INTERIOR "
+ "PRODUCTWO-CIRCLE ALTERNATE IWO-CIRCLE NUKTA ABOVEINTERSECTION "
+ "OPERATORINTERSECTING LOGICAL TILDE OPERATOR ABOVE GRUENT WITH DOT "
+ "ABOVEHOCKEY STICK AND PUCKHORIZONTAL TABULATIONHOUSAND MILLIONS SIGNTHICK "
+ "LETTER SELECTORCTOR OR CROSS PRODUCTCRUCIFORM NUMBER FOURTEEN POINTED "
+ "ASTERISKCROSSE STICK AND BALLXTRA SHORT VOWEL MARKFINAL CONSONANT SIGN "
+ "EIGHT SPOKED ASTERISKELATIONAL COMPOSITIONVOICED ITERATION MARKDOUBLE "
+ "LEFT TURNSTILEEQUAL TO OR LESS-THANER RIGHT CORNER ANGLEALLING DIAGONAL "
+ "SLASHLATTENED OPEN A ABOVEFLATTENED PARENTHESISDIGRAMMOS EX "
+ "DODEKATATRIANGULAR HALF BLOCKWITH INVERTED V ABOVEGHT OPEN BOX "
+ "OPERATORTOUCHING INSIDE MOUTHGRAMMOS OKTO DODEKATAARKENING OF THE "
+ "LIGHTVERY HEAVY BARB ARROW WITH VERTICAL STROKE AND SLANTED PARALLELSH "
+ "AMPERSAND ORNAMENT WITH SHORT RIGHT LEGAND VOWEL LENGTH MARKPAP PLUS PAP "
+ "PLUS LU3RATING SYSTEM COMMANDVERTICAL LINE OVERLAYBOTTOM U-SHAPED ARROWND "
+ "TELEPHONE RECEIVERRISING DIAGONAL SLASHMORPHOLOGICAL DIVIDERSHORT "
+ "LEFTWARDS ARROWMIDDLE RING LITTLE ONSIDE TO SIDE SCISSORSMALE WITH STROKE "
+ "SIGNBUT NOT EQUIVALENT TOARYSTIAN FIVE HUNDREDQUADRANT CIRCULAR ARCRELICT "
+ "HOUSE BUILDINGREVERSED FEATHER MARKLETTER SMALL CAPITAL OP SHADED WHITE "
+ "ARROWOCAL NOTATION SYMBOL-OPPOSING AN PLUS NAGABESIDE RIGHT "
+ "TRIANGLENTISTRY SYMBOL LIGHT OHAMMAD ISOLATED FORMLESS-THAN OR EQUAL "
+ "TOWITH SOROCHYA NOZHKAHAR2 TIMES GAL PLUS RUMAI PALAUNG TONE-5HALF CIRCLE "
+ "WITH DOTPLUS GISH TIMES TAK4VAL WITH OVAL INSIDEINSIDE MOUTH RELAXEDINING "
+ "OBLIQUE STROKEDOUBLE ANGLE BRACKETCRESCENT MOON SYMBOLGRA GCAN -CHAR "
+ "RTAGSENARMONIOS ANTIFONIAA- SHOG GI MGO RGYAN OVER TUR ZA OVER "
+ "ZAUBHAANAHU WA TAAALAAONE MARK SGAW KAREN INVERSE WHITE CIRCLEINVERTED "
+ "CANDRABINDU OVER LAGAR GUNU SHEAND NORTH EAST ARROWWET CULTIVATION "
+ "SIGNSIDEWAYS NOON GHUNNAONCAVE-SIDED DIAMONDBSET OF NOR EQUAL TODOUBLE "
+ "DOT TONE MARKPOTABLE WATER SYMBOLSINGLE DOT TONE MARKIRCLES HITTING WALL "
+ "HREE-DOT NUKTA ABOVEFOUR RAISED KNUCKLESBETWEEN PALM FACINGSANGE "
+ "ANTIRESTRICTIONCURRENCY SYMBOL RIELTRANSPOSITION MARKERSEPARATOR MIDDLE "
+ "DOTSEPARATOR KEY SYMBOLFORMS LIGHT VERTICALOVER LEFTWARDS ARROWTHROUGH "
+ "SMALL CIRCLENIS RACQUET AND BALLWITH FOUR DOTS ABOVESCRIPTION CHARACTER "
+ "CURVED ANGLE BRACKETHORIZONTAL BAR WITH OTLESS J WITH STROKEFINAL "
+ "CONSONANT MARKMULTIPLE PUNCTUATIONINDEX RING LITTLE ONUP-POINTING "
+ "TRIANGLEAND NORTH WEST ARROWDOTLESS HEAD OF KHAHIMAGE OF OR EQUAL "
+ "TOGHTWARDS ARROW BELOWEVERSED ROTATED RANAAND SOUTH EAST ARROWAND SOUTH "
+ "WEST ARROWFIVE SPOKED ASTERISK79 OVER LAK-079 GUNULEFT-TO-RIGHT "
+ "SECANTHIGH RATHA OR LOW PAWORD REPETITION MARKHIGH TONE APOSTROPHEE "
+ "CONSONANT MODIFIERCONSONANT SIGN HAARULEFT AND LOWER RIGHTCENTRE VERTICAL "
+ "LINERIGHT QUADRANT BLACKRIGHT-POINTING ANGLEJUDEO-SPANISH VARIKAKHAMTI "
+ "REDUPLICATIONARXIS KAI FTHORA VOUREAN STANDARD SYMBOLYRENAIC TWO "
+ "DRACHMASLATALIZED HOOK BELOWRIGHT U-SHAPED ARROWLE WITH POPPING "
+ "CORKWARE-FUNCTION SYMBOLLASHING SWEAT SYMBOL WITH HORIZONTAL BARL "
+ "FUNCTIONAL SYMBOL CHEMICAL SYMBOL FOR AND DIAGONAL STROKESTAR WITH "
+ "MIDDLE DOTCHARACTER INTRODUCERDOWN ARROWHEAD BELOWEMESTVENNY ZADERZHKA "
+ "BEGIN LOGOGRAM MARKREVERSED ONE HUNDREDRIGHT ANGLE WITH DOTYIG MGO PHUR "
+ "SHAD MA ABOVE LEFT TRIANGLEOW-9 QUOTATION MARK WITH STRIKETHROUGHGIBBOUS "
+ "MOON SYMBOLTHANG LONG ANUSVARALEADING MCHAN RTAGSVARIATION INDICATORSEVEN "
+ "EIGHTHS BLOCKNETWORKED COMPUTERSKULL AND CROSSBONESLANTED EQUAL ABOVE "
+ "VASTNESS OR WASTINGAHU ALAYHI WA-AALIHNE HUNDRED TWENTY PNDRED POINTS "
+ "SYMBOLRROW NO-BREAK SPACEIGATURE AYIN-DALETHSH PLUS HU PLUS ASHFLOORPLANE "
+ "TWISTINGRATUM SUPER STRATUMOTATED ARDHAVISARGAWOMEN HOLDING HANDSBETWEEN "
+ "MIDDLE RING WITH VERTICAL TAILDOWN POINTING INDEXTIGHTLY-CLOSED "
+ "EYESALTERNATE LAKH MARKD CIRCUMFLEX ACCENTVARIANT WITH SQUARENOGRAPHIC "
+ "FULL STOPGAPPED CIRCLE ARROWUP HORIZONTAL LIGHTLF MADDA OVER "
+ "MADDAREE-QUARTER CIRCLE NORTH ARROW WITH HOANSPOSITION BRACKETSEQUENCE "
+ "INTRODUCERARENTHESIS NOTEHEADHORT STROKE OVERLAYVERTICAL TABULATIONOVER E "
+ "NUN OVER NUNTRANNO MALO POVYSHEUP HORIZONTAL HEAVY AND "
+ "PROSGEGRAMMENIVARIANT FORM ILIMMUFT-POINTING FLEURON LOVE YOU HAND "
+ "SIGNHURISAZ THURS THORN AND RETROFLEX HOOKARTIAL DIFFERENTIALLEFT "
+ "POINTING INDEXTO LOWER RIGHT FILLQUESTION MARK ABOVECIRCLED SANS-SERIF "
+ "HAND COVERING MOUTHWITH YEH FINAL FORMET WITH WHITE CROSSLEFT TO LOWER "
+ "RIGHTATED TELLER MACHINERIGHT TO LOWER LEFTINSIDE CIRCLE BELOWCIRCLED "
+ "WHITE ARROWRY CULTIVATION SIGNURRENCY SYMBOL BAHTITED LIABILITY SIGNVERSE "
+ "FINAL BARLINEUBLE DOT WITHIN DOTVERSAL INTERSECTIONISPUTED END OF AYAHOP "
+ "SEMICIRCLE ARROWDENOMINATOR SIXTEENLEFT U-SHAPED ARROWQUADRUPLE "
+ "CRESCENTSA END LOGOGRAM MARKSYMBOL FOR BEGINNERPREFIXED NASAL SIGN "
+ "FLUTTERING IN WINDC DIGRAPH WITH CURLSTRAIGHT THUMB BENTRIGHT MIDDLE "
+ "STROKETWENTY-FIVE DEGREESSTRATIAN FIFTY MNASIN CHEN SPUNGS SHADTURNED "
+ "SECTION MARKTURNED PADA PISELEH KASKAL U GUNU DISHEVEN POWERS OF FOURDOWN "
+ "AND HORIZONTALIMIDIA SEXTULA SIGNPARAGRAPH SEPARATORARABIC FORM "
+ "SHAPINGILDING CONSTRUCTIONHEAD-SHAPED POINTERNAXIAN FIVE HUNDREDFIVE "
+ "FINGERS SPREAD IN A RECTANGLE BOXLUB-SPOKED ASTERISKMSHELL MOBILE "
+ "PHONETART OF RUB EL HIZBANS-SERIF CAPITAL LING SHIRT WITH SASHSLANTED "
+ "NORTH ARROWMOVES AGAINST CHEEKRAILING MCHAN RTAGSWEST POINTING LEAF OVER "
+ "INVERTED SHUGGLY VERTICAL LINEUM WITH DRUMSTICKSWITH STROKE SYMBOLTO "
+ "LOWER LEFT FILLBAARAKA WA-TAAALAATOP U-SHAPED ARROWGISH CROSSING "
+ "GISHASTROLOGICAL SIGN PERFIXED LETTER RAATIN SMALL LETTER RIST CIRCLE "
+ "FRONT EVERSED CHELYUSTKAABBREVIATION MARK EVENTEEN FULL STOPATERRESTRIAL "
+ "ALIENTYPE A ELECTRONICSARROW SHAFT WIDTH WHITE VERTICAL BAR FOR "
+ "SIMALUNGUN SAU-SHAPED ORNAMENTSQUARTER NOTE STEM ERTICAL BAR "
+ "VIRAMAEPIGRAPHIC LETTER DOUBLE PUNCTUATIONPUNCTUATION BINDU ENTY-TWO "
+ "POINT TWOENTERING TONE MARKASTED SWEET POTATOVARIANT FORM LIMMUGATIVE "
+ "ACKNOWLEDGEWITH JUSTIFICATIONDOWN-OUTPUT SYMBOLOTLESS DALATH RISH NOT "
+ "LITTER SYMBOLOU ALAYHE WASALLAMOUCHTONE TELEPHONE AND NO DOTS ABOVEORK ON "
+ "THE DECAYEDEAST POINTING LEAFTROFLEX HOOK BELOW AND SMASH PRODUCTOW TONE "
+ "APOSTROPHEFORTY-FIVE DEGREESFORKED PARAGRAPHOSVERY SMALL DIAMOND AND "
+ "YPOGEGRAMMENIFIVE EIGHTHS BLOCKPACING CANDRABINDU WITH KAVYKA "
+ "ABOVEIGATURE ZAYIN-YODHJEEM ISOLATED FORMYLLABLE LENGTHENER WITH FLOWING "
+ "SANDSET OVER BUILDINGSKANTAJA NAASIKYAYACUP WITHOUT HANDLEKBAR ISOLATED "
+ "FORMSEPTUPLE CRESCENTSHUNDREDS UNIT MARKNINETEEN FULL STOPCTLY EQUIVALENT "
+ "TOUPPER MIDDLE RIGHTHOUSANDS SEPARATORNISH VERSE DIVIDERNITE PART "
+ "INTEGRALHORIZONTALLY BELOWSMALL CIRCLE ABOVEKOREAN CHARACTER ONORMAL "
+ "SUBGROUP OFCANTILLATION SIGN HOLDING BACK TEARSLOWER MIDDLE RIGHTCOPPER "
+ "ANTIMONIATEAND LOW RIGHT RING THUMB INDEX THUMBCONTINUING "
+ "OVERLAPMATHEMATICAL SPACESINGLE PUNCTUATIONINDEPENDENT VOWEL IN "
+ "POSSESSION SIGN WITH CIRCLE ABOVEITAN SMALL SCRIPT WITH CIRCLE BELOW "
+ "WITH CROSSED-TAILSHAN REDUPLICATIONBOTTOM RIGHT KASRAIGSAW PUZZLE PIECEIX "
+ "SPOKED ASTERISKSYMMETRIC SWAPPING SPREAD THUMB SIDEUP ARROWHEAD "
+ "BELOWTILTING FROM WAISTYPTIAN HIEROGLYPH NYOOGA NAAKSIKYAYABASELINE ROUND "
+ "DOTHAIS LUS NTOG NTOGS PRESSED TOGETHERNYET THYOOM TA-ROLHILOSOPHERS "
+ "SULFURSMALL RED TRIANGLERYUKOVAYA SVETLAYALEFT MIDDLE STROKEUTLINED BLACK "
+ "STARLOSED CIRCLE ARROWLEFT-STEM TONE BARS INSIDE AND ABOVESOUL ISOLATED "
+ "FORMVOCALIZATION MARK WITH BULLET NOSEA PLUS HA PLUS DAPUNCTUATION SIGN "
+ "ALTERNATE NUMBER BUT RELIEVED FACECONSONANT SIGN PA-GAAHLAA TTUDDAAGAMBDA "
+ "WITH STROKEAPLI DYO DODEKATALAGOLITIC LETTER WHITE PARENTHESISDELPHIC "
+ "FIVE MNASINVERTED MCHU CANYEH ISOLATED FORMCONTOURED OUTLINESIGN O WITH "
+ "CROSSPRECEDING SOLIDUS ALTERNATION MARKASTERN PWO KAREN MEEM INITIAL "
+ "FORMPRESSIONLESS FACEPRIZNAK MODIFIER MEDIUM BARB ARROWCIRCLES WITH "
+ "DOTSCONTINUATION SIGNWHITE SHOGI PIECERIATION SELECTOR-CANDRABINDU "
+ "ABOVEEAR SCREEN SYMBOL WITH TILDE ABOVEABBREVIATION SIGNKE BOTTLE AND "
+ "CUPKHAH INITIAL FORMLAPPING LESS-THANSTRAIGHT MOVEMENT AND PALATAL "
+ "HOOKREATIONAL VEHICLEAMPHYLIAN DIGAMMARIGHT HALF CIRCLEVERY SMALL "
+ "SQUARECLOSED LITTLE YUSCOMBINING NUMBER LAH ISOLATED FORM WITH SOUND "
+ "WAVESULAR MEDIUM SHADESQUARED TIMES KURLHOUETTE OF JAPANMANENT PAPER "
+ "SIGNEMICOLON UNDERBARMALL WHITE CIRCLELIAN HIEROGLYPH ALD PERMIC LETTER "
+ "URNED DAMMA BELOWURNED COMMA ABOVEQUAT REVERSED ESHCAL SYMBOL BOTTOMAEUM "
+ "ONE PLETHRON0 WHEELED CHARIOTCANCELLATION MARKTRIPLE DASH ARROWHIRTEEN "
+ "FULL STOPVARIANT FORM IMINVRE TOURNOIS SIGNTHREE SOUND WAVESUP POINTING "
+ "INDEXVARIANT FORM USSUHORIZONTAL DOUBLEHORIZONTAL SINGLEGENERIC "
+ "MATERIALSOURTEEN FULL STOPNG STROKE OVERLAYNFORMATION SOURCEFROM SMALL "
+ "CIRCLEFRACTION ONE HALFBOTTOM HALF BLACKIASTRE MARK ABOVESERVER EYE "
+ "SYMBOLICTED LEFT ENTRY-NEGATIVE CIRCLED IDEOGRAPHIC COMMA OVER ZU PLUS "
+ "SARHAH ISOLATED FORMUP AND HORIZONTALRYBLION BASE SIGNVARIANT FORM "
+ "ASH9TONAL RANGE MARK ONE EIGHTH BLOCK-DENTAL PERCUSSIVEBE WITH "
+ "MERIDIANSGREATER-THAN SIGNGREATER-THAN NOR BRIGHTNESS SYMBOLBERBER "
+ "ACADEMY YAS REVOLVING LIGHTHEART-SHAPED EYES PLUS SHA3 PLUS AOPEN-HEADED "
+ "ARROWWO VERTICAL DOTS WITH NOT EQUAL TOTIAL ARTS UNIFORMING POLE AND "
+ "FISHFACING BABY CHICKVEE WITH UNDERBARY ON BLACK SQUAREAUKAZ LAGU LOGR "
+ "LATHERING TOGETHERINEAR ANNOTATION TARTING FROM SIGNNE EYEBROW "
+ "RAISEDPINWHEEL ASTERISKINITIAL LETTER RAMILITARY AIRPLANEVERAGE WITH "
+ "SLASHTAN ISOLATED FORM GRAVEYARD SYMBOL TO BLACK DIAMONDAND BLACK "
+ "SQUARESOWER NUMERAL SIGNIGHTEEN FULL STOP LAGAB TIMES ASH2NASALIZATION "
+ "MARKFINGER-POST ARROW LAGAR OVER LAGARTERSYLLABIC TSHEGNAUDIZ NYD NAUD "
+ "NTEN THOUSAND SIGNBRACKET EXTENSIONFLICK ALTERNATINGCTION "
+ "APPLICATIONCROSS PUNCTUATIONVARIANT FORM ESHCH WITH UMBRELLAARENTHESES "
+ "ABOVEDOUBLE TURNSTILEDITORIAL CORONISVERY HEAVY SHAFTDOUBLE DOT "
+ "ABOVECONSONANT JOINERVIEWING CEREMONYBOTTOM HALF RINGCORNER "
+ "DOWNWARDSDOUBLE CRESCENTSAFFRICATION MARKUPERSCRIPT ALAPHUP-OUTPUT "
+ "SYMBOLCOMPRESSED ARROWANABAZAR SQUARE UPPER OVER LOWERVOWEL LENGTHENERUP "
+ "MIDDLE HINGEDDOWN RIGHT BARB BOLD GREEK CROSSDEWAYS U BRACKETDOUBLE "
+ "ZAPYATAYAB2 TENU PLUS TABDOTTED CRESCENTSCASIAN ALBANIAN DOUBLE HEAD "
+ "MARKCREAMING IN FEARCORNER LEFTWARDSIFTEEN FULL STOP LIGHT MOON "
+ "ARTASERIFS AT BOTTOMNION WITH SERIFSHYPHENATION MARKSMALL NOON ABOVEIDED "
+ "GREEK CROSSORIZONTAL JOINERIGHTH NOTE STEM IMENSIONAL ANGLEINDEPENDENT "
+ "SHININDEX THUMB SIDEHIGH SPACING DOTMAGNIFYING GLASSRISING TONE MARK "
+ "SMALL ROTATIONS INSERTION POINTRIZONTAL ELLIPSEINES CONVERGING HMATULLAH "
+ "ALAYHESLANTED EQUAL TOSMALL CAPITAL ELHOLDING TOGETHERPEN CENTRE "
+ "CROSSLTERNATE HASANTALOWER OVER UPPERSTUCK-OUT TONGUESTRING "
+ "FRETBOARDSTRAIGHT STRETCHSTICKING OUT FARSTERISK OPERATOR PLUS KAK PLUS "
+ "AADIAN SYLLABICS K PERMITTED HEREO-MINOAN SIGN CMLD ASSYRIAN ONE LEFT "
+ "HALF CIRCLELEFT ARROW ABOVENTAIGANA LETTER SANS-SERIF ARROW OR THE IMAGE "
+ "OFYATHOS BASE SIGNLLOW PAN OF FOODTAKANA-HIRAGANA IPPER-MOUTH FACEIRCLE X "
+ "NOTEHEADLIGHT BARB ARROWLIGHT AND RIGHT ISTOS DISC SIGN OLD WHITE "
+ "CIRCLEIVE POINTED STAROLD TAMIL VIRAMAYIR MKPARAQ MEUNEPSILON "
+ "UNDERBARUDLY CRYING FACEEN MILLIONS SIGNRIGHT DOWN BARB END OF TEXT "
+ "MARKUBJOINED LETTER ENTRE WHITE STARENUMERATION SIGNERCURY SUBLIMATERAYS "
+ "AND DOTTED RIGHT HALF BELOWRIGHT HALF BLACKMIDDLE AND RIGHTMIDDLE AND "
+ "BELOWRAIDO RAD REID R TIMES GAN2 TENUUMBER SIGN ABOVEDVUMYA ZAPYATYMI "
+ "TIMES DISH TENUSHU2 PLUS KASKALRESH-AYIN-DALETHREPETITION MARK-WAVY HAMZA "
+ "BELOWE PLUS GAN2 TENUPLE MEASURE REST AND HEAVY RIGHTULDERED OPEN "
+ "BOXECIMAL SEPARATOR AND LIGHT RIGHTEFORE COMPLETIONRECORD SEPARATORWITH "
+ "HEARING AIDWITH CENTRED DOTSIGN RISING TONE WITH BUNNY EARSWITH LEFT "
+ "UPTURNPRECEDING SUBSETQUALS SIGN BELOWWITH HAMZA ABOVEQ WITH HOOK "
+ "TAILTRIPLE CRESCENTSSITION INDICATORPRECHGESANG STEMNAL DIGIT "
+ "SHAPESEVERSED VISARGA EVERY OTHER TIMEMESTVENNY KLYUCHPLACEHOLDER MARKR "
+ "PLUS GAN2 TENUFALLING DIAGONAL WITH DOT INSIDEPOSTPOSITION MENFFERENCE "
+ "BETWEEN CAPPED MOUNTAINFLOORPLANE SPACEND OF PARAGRAPHMURDA "
+ "MAHAPRANABINDING BRACKETNASALIZED TONE-N-ARY SUMMATIONUSTER NOTEHEAD "
+ "BLOCK DIAGONAL NOON WITH KASRANOON FINAL FORMNO GOOD GESTURENJOINING "
+ "MACRONNA DOUBLE HELIXRIGHT RERENGGANATINATE MYSLITEPERTHO PEORTH PPLUS "
+ "SIGN BELOWATA LINK ESCAPEPRISHTHAMATRA EPUT SYMBOL FOR RIGHTWARDS "
+ "TICKRIGHTWARDS AND QUADRUPLE ARROWQUADRUPLE DASH R WITH FISHHOOKPENSION "
+ "RAILWAYRIGHT HALF RINGVERTICAL SECANTREAMY EYEBROWS RECEPTIVE "
+ "EARTHRECITATIVE MARKREVERSE SOLIDUSREVERSED OPEN EGHT REPEAT SIGNON TOP "
+ "OF MODEMNVERTED UBADAMASALTER PAHLAVI BENT OVER INDEXBELOW LONG "
+ "DASHBELGTHOR SYMBOLODO SOFT HYPHENS IN SILHOUETTES ELEVATUS MARKOGOGRAM "
+ "KHAMTI BAR ABOVE UNIONOLIDUS OPERATORNOT APPROXIMATEOND PLACE "
+ "MEDALONJOINED HINGEDONTOUR INTEGRALORIZONTAL COLONORT EQUALS SIGNOUBLE "
+ "BACKSLASHOW-FALLING TONEOWER HALF BLACKRNAMENT STROKE-RMAN PENNY SIGNPEN "
+ "SQUARED DOTTOP RIGHT FATHADOING CARTWHEELFOUR DOTS WITH FOUR "
+ "ENCLOSURESFRACTION DIGIT FTER COMPLETIONDIGA AELA-PILLADIALYTIKA "
+ "TONOSTRIANGULAR MARKDI ALLAHOU ANHUGEMINATION MARKGGLY LINE "
+ "BELOWDESCENDING TONEFORWARD TILTINGGROUP SEPARATORHAKING PARALLELHALF "
+ "FILL SPACETIP ON THE LEFTHEH MEDIAL FORMTILDE DIAERESISTHROWING A "
+ "KISSDAGESH OR MAPIQHOOKED INDEX UPTHREE DISH TENUHORIZONTAL "
+ "DASHHORIZONTAL FILLEH INITIAL FORMDOWNWARDS TRENDUMAI PALAUNG FAE "
+ "ISOLATED FORME MUSICAL NOTESE OVER INFINITYDOWN SEQUENTIALULTIPLICATION "
+ "XUGMENTATION DOTEFT REPEAT SIGNEFTWARDS ARROWSDOUBLE TRIANGLEUBLE RING "
+ "BELOWERICAN FOOTBALLESIDE LESS-THANU PLUS U PLUS UESSARON CHRONONETIC "
+ "VERSE SIGNTWO WITH STROKEEXPONENT SYMBOLTVIMADUR SYMBOLLONG VOWEL SIGNLD "
+ "TAMIL SHORT LEFT DOWN BARB LEFT HALF BELOWLEFT HALF BLACKCIRCUIT-OUTPUT "
+ "LEFT HAND INDEXLETTER CAPITAL LEVEL TONE MARKLEVEN FULL STOPLIGHT AND "
+ "LEFT LMOST EQUAL TO UR POINTED STARLONG HOOK BELOWCKET CALCULATORLOOK OF "
+ "TRIUMPHLOSED INSULAR GCAPITAL LETTERSSIXTEENTH NOTESMALAKON CHROMA "
+ "MARRYING MAIDENMEEM FINAL FORMBROWS STRAIGHT BREAKING HYPHENMIDDLE "
+ "DIAGONALSHORT OVER LONGINVERTED STROKEHOUSAND STATERSHREE DOTS "
+ "BELOWIAMOND UNDERBARIDING ENCLOSUREIGN PALI VIRAMAIMISEOS "
+ "CHRONOUIMPERFECTA RESTING SYMBOL FOR CORNER WITH DOTINGLE HEAD MARKINUS "
+ "SIGN BELOWINVERTED LAZY SSHITA PLUS GISHIRCUMFLEX BELOWTAI LAING TONE-ITH "
+ "FINGERNAILSIZED WHEELCHAIRSTROKE NOT SIGNKISIM5 TIMES BISTERESIS SYMBOLST "
+ "SYRIAC CROSSST QUARTER MOONSSICAL BUILDINGCLOSED BY CURVELATION "
+ "FUNCTIONXTEEN FULL STOPAMARITAN SOURCE WITH DESCENDER CORNER "
+ "BRACKET-CARRIER LETTERZAIN FINAL FORM OVER SIG4 SHU2 NEPOSTOYANNAYA OVER "
+ "MOUNTAINSVOWEL SEPARATORZERO WITH SLASH TOUCHING INDEX THUMB STRAIGHT "
+ "CLOUD AND RAINYNCHRONOUS IDLE TIMES IGI GUNU WITH RIGHT LEGVOWEL "
+ "SHORTENERWITH DOWN ARROWACHES THE LIMITWITH RAIN DROPSAI LAING DIGIT "
+ "OPERATOR WITH ALMOST EQUAL TOWHITE DOT RIGHTWALLPLANE SPACE PLUS HI PLUS "
+ "A-PIECE SWIMSUIT THROUGH CIRCLE AND LOWER LEFTAMOUNT OF CHECK DEYTEROU "
+ "ICHOU WITH DIAERESIS ALTERNATE FORM-NO-EVIL MONKEY PARESTIGMENON ALIF "
+ "LENGTHENER2 CHARIOT FRAMEALAYHE ASSALLAMAND PARALLEL TOBLACK "
+ "TRIANGLEBLADE SCISSORSPARATED SYMBOLD-UP NEWSPAPERPARTMENT STOREFORWARD "
+ "INDEX INOLOGICAL DOTMOTHETIC ABOVEFINAL ANUSVARAAND COLD SWEATINVERTED "
+ "BIRGASEL LOCOMOTIVEUP RIGHT BARB OVER GUD LUGALINSERTION SIGNVRON "
+ "SNOWFLAKESEPARATOR MARKING HANDS SIGNSMALL TRIANGLEUSPENSION MARKDASIA "
+ "PNEUMATAINFINITY BELOWPAO KAREN TONESHESHIG TIMES IGHTWARDS VANEUNIT "
+ "SEPARATORTRIANGLE WITH XO EKFONITIKONTERMINAL MARK-UNION OPERATORDI "
+ "ALLAAHU ANHWITH LEFT HOOKPPED MIDDLE UPDEYTEROS ICHOSDIAGONAL "
+ "MOUTHTETARTOS ICHOSDIAGONAL PATH PROTECTED AREAMRACHNOTIKHAYARING "
+ "MEGAPHONEGERED TREMOLO-BAG MEMBERSHIP HASER FOR VAVWITH DOT BELOWPEN MARK "
+ "BELOWSMALL LETTER JLOTUS POSITIONSMALL LETTER DBHATTIPROLU AAANGLE "
+ "OPENING SHAN MEDIAL WAPLE WITH HEARTPLETE INFINITYLOWER DIAGONALPLITTING "
+ "APARTED SYMBOL FOR IKHAYA PUTNAYATELPIECE CLOCKWITH FATHATAN CERTAINTY "
+ "SIGNENDED MULTIMAPLEFTWARDS AND CRIFICIAL WINEYOUTHFUL FOLLYEND OF "
+ "SECTIONONE SOUND WAVELEFTWARDS TICKTWO WHITE DOTSSTRONG ISOLATEENNA WITH "
+ "BARSCEPTER OF JOVECENTURIAL SIGNOOTNOTE MARKERTWO ENCLOSURESLESS-THAN NOR "
+ "-HEADED ARROW SPEECH BUBBLESEMIVOWEL SIGN ALLAJALALOUHOUCOLON OPERATORUAL "
+ "WHEELCHAIRSQUIGGLE ARROWOBLIQUE HYPHENERIAL ARAMAIC ERIC "
+ "INDICATOREPENTHETIC YUTLETTER OVERLAPNYI ZLA NAA DAUBHAYATO MUKHAERTICAL "
+ "JOINEROLD RESOLUTIONALF TREE TRUNKVONIC ASTERISKLACE OF SAJDAHLITTLE "
+ "SECTIONOT TILDE ABOVELIGHTLY SMALL UPPED INDEX UPOTHERS CIRCLEDTURKIC "
+ "LETTER FATHATAN ABOVEISED ROUND DOTSECOND SUBUNITLINE EXTENSION1 OVER "
+ "LAK-081ROSS ON SHIELDIRCULAR VIRAMAFFED FLATBREADFFICE BUILDINGOUR OBOLS "
+ "SIGNSMOKING SYMBOLOUSING THUNDERLEVEN TWELFTHSSURROUND FROM OPPOSING "
+ "PIRIGJOINED SQUARESAMNUC PII KUUHORANGE DIAMONDORD SEPARATOR EXCLAMATION "
+ "OHTWO DOT LEADERINVERTED DAMMANORTH ARABIAN -CURRENCY SIGNIWAZ TIR TYR "
+ "TIVE OBOLS SIGNIVE KEY SYMBOLOSITION SYMBOLITA PLUS GISH ISSION "
+ "TICKETSVERTICAL HEAVYSIDE-DOWN FACEZAKAYA LANTERNTIMES OPERATORDIRECTION "
+ "FLIPREH FINAL FORMRD PLACE MEDALAU LENGTH MARKWORD SEPARATOR CROSSING "
+ "ESH2GYPTOLOGICAL AVERTICAL LIGHTDOUBLE-STRUCK DIO MICROPHONEVERTICAL "
+ "ABOVEDOES NOT EXISTGHT WITH STARSGUNU TIMES ASHAFETY SCISSORSHIRD-STAGE "
+ "HLIREATIVE HEAVENTHER CHRISTMASAROUND-PROFILEHREE-LEGGED TEVENIENCE "
+ "STOREQUINARIUS SIGNVERTICAL COLONRIGHT CROSSBARUNDER RELATIONMENSION "
+ "ORIGINTHOUSANDS MARKUND MARK ABOVEZAH WITH MEEM REVERSED-SCHWA WITH LONG "
+ "LEGREE-LINE STAFFMEDIUM DIAMONDTHOUSANDS SIGNTHAKA ANUDATTAAI LENGTH "
+ "MARKTOP HALF BLACK AND DIAERESISTRANSMIT STATEDUN3 GUNU GUNUTHALAN ETHEL "
+ "OTHREE POINTED TIMES SHU TENUMID-LEVEL TONEHESIVE BANDAGERRIAGE RETURN OF "
+ "THE HORNSAPPED PRESENT-ESASA DOTTEDMALO POVYSHE GTER TSHEG MADOUBLE "
+ "STROKEEVERSED DAMMACULATED LORRYHIEROGLYPHIC MESSENIAN TENDVOECHELNAYA "
+ "JES SU NGA ROGYA GRAM SHADOPPOSING NAGARPENTRY PLANETU WAS-SALAAMDOUBLE "
+ "CIRCLEVERLAY MIDDLEAN RUPEE SIGNVERGREEN TREEROTATED BIRGABY "
+ "DEFINITIONURNED W BELOWUPERIMPOSED XLISION SYMBOLUPONDIUS SIGNDOTTED "
+ "ZLAMA IRCLED INDEX NING MOVEMENTIOT SYLLABLE FICATION CARDNINE "
+ "TWELFTHSINVERTED TURNITING THROUGHHINESE TONE YSYNDESMOS NEOIVE SLOW SIGN "
+ "AND SKI BOOTAMUHU ALAYNAAIVE POINT ONEDOUBLE MUCAADHERICAL ANGLEDOUBLE "
+ "HYPHEN AND YEN SIGNMALL LETTER ZOTEHEAD BLACKISH LIRA SIGNNUMERIC SIGN "
+ "MEDIUM SQUARE VARIANT FORMERTION SYMBOLAR WITH QUILLHAKASSIAN CHEARLAUG "
+ "SYMBOLSAMYOK SANNYACIRCLE INSIDESSAGE WAITINGUPSILON WITH U WITH "
+ "STROKENUMERATOR ONEOLVING HEARTSOMAN NUMERAL CHRYSANTHEMUMSTABLE "
+ "SYMBOLL-TYPE SYMBOLOBLIQUE LINE ARCHAIC KOPPAER BOARD FILLS KRYZHEM ON S "
+ "KAI APOTHESHAM DIGIT ONEMASORA CIRCLELATERAL CLICKNTY FULL STOPOGOTYPE "
+ "SIGN S UP TOGETHER-PER-EM SPACE-OR-PLUS SIGNLEFT CROSSBARSAL PLUS "
+ "TUG2ARGOSYNTHETON-OFF CALENDARCITATION MARKTIRTA TUMETESEUROPE-AFRICAYOD "
+ "YOD PATAHCROSSING GAN2WO-LINE STAFFYMBOL TAU RHOKAPYEOUNPIEUPRTABLE "
+ "STEREOSILI PNEUMATACROSSING GABAOON NOTEHEAD CROSSING MUSHARROW "
+ "OVERLAYH-TYPE SYMBOLVERTICAL BARS OPPOSING KUREMPHATIC TONESIGN "
+ "AVAGRAHASIGN PAMUDPODVERTICAL FILLONAL COMPUTERMARKS CHAPTERMELODIC "
+ "QITSACRIPTION TAKESTERTIUS SIGNCRIPTIONAL PAK WORK SYMBOLLEGETOS ICHOSONG "
+ "RIGHT LEGCHECKER BOARDUPWARDS TRENDONG-LEGGED DEONGRATULATIONARRED "
+ "TRIDENTSHESH PLUS KII WITH STROKEGAR FRACTION BAT AND BALL CROSSING "
+ "KA2WITH INTEGRALAUDATE CHRIVIFOREMENTIONEDMODIFIER MARK WITHOUT SNOWED "
+ "PAPERCLIPSZHOU NUMERAL VEN POINT ONENG TERMINATORPPOSING LUGALGAW KAREN "
+ "SHADIAERESIZED UWITH ASTERISKBOHAIRIC KHEIPA NJI PIPAEMED DOUBLE VERBASAN "
+ "LETTER MINDER RIBBONSIA-AUSTRALIA WITH JEGOGANHREE TWELFTHSPAIRED "
+ "ARROWSUSICAL LEIMMA BZHI MIG CANRN PENTATHLONLVEOLAR CLICKTE ORDER "
+ "MARKGIFT ENVELOPEVE-LINE STAFFSMALL LETTERSYUUKALEAPINTURIZONTAL "
+ "TAILEELING PERSON WITH TEE TOPPLUS OPERATORFROWNING FACEIMAGE "
+ "BRACKETRIPLE SVARITAIGHT TWELFTHSRACKETS ABOVEWAVY OVERLINELVE FULL "
+ "STOPTHIRD SUBUNITMINUS WHITE XMINUS SIMILARILE SEPARATORBACKSLASH BARW "
+ "RING INSIDE DIMINUTION-1FINAL SEMKATHEHU FEOH FE FFULL SURROUND HEADED "
+ "ARROWSELECTED AREAUDDISA SIRRAHDIC MARK SIGNBALL AND HOOPUSHING "
+ "UPWARDWAW-AYIN-RESHOUT MIDDLE UP WITH INK PENOURTH SUBUNITRANKS CASKET "
+ "INVERTED FORKVICE CONTROL DIRECTIONAL TROFLEX CLICKRIGHT "
+ "HARPOONAWELLEMET YAZNAP PIZZICATOFINAL LETTER MAILBOX WITH TOP HALF "
+ "RINGANNED LEATHERLOCATION SIGNACCOMMODATION B BAR SYMBOLBOTTOM CORNERFT "
+ "ARROWHEAD TED HAND SIGNUFFLE PRODUCTMULTIOCULAR OQUARTERS SIGNEAVENLY "
+ "EARTHPREPONDERANCEFIXED-FORM RAIFI ROHINGYA LOCK WITH KEYILABIAL "
+ "CLICKINTEREST SIGNWAVY LOW LINEEDIC ANUSVARAMOBILE PHONESVOWEL SIGN "
+ "PABOWING DEEPLY WITH OVERBARUE OF LIBERTY TIMES KASKALLEFT-LIGHTEDVOLTAGE "
+ "SIGNCRESCENT BARSHORT RIKRIKNUITY SYMBOLUPPER CORNERENOS CHRONOUDIGRAPH "
+ "YORIALLPOINT PENDIGRAPH KOTOMPTY CENTRE LU PLUS ESH2DICTION SIGNLEADING "
+ "EYESMPHASIS MARKMEDARY CAMELMBELLISHMENTACE INTEGRALS SUBPUNCTISLUS "
+ "NOTEHEADLOWERED FLAGDOWN NEUTRALN ELEMENT OFENT ARROW POULL NOTEHEAD-MAIL "
+ "SYMBOLUME INTEGRALSHED BARLINESMALL DOUBLELEFT HARPOONCROSSING "
+ "NUNMONOGRAPH UKMUM TIMES PAMEDIUM SHAFTNGLE BARLINEDOUBLE ARROWEGIN "
+ "SEGMENTUBSCRIPT TWOMADDA ABOVE MALL SECTIONAFU LEERAEWAWDATA SQUARESMALL "
+ "TRIPLELICKING LIPSAA AS-SALAAM-DZUD RTAGS DASHED ARROWNORTHERN TSESMILING "
+ "FACEEIGHTH NOTESMIDDLE PIECELL MODIFIER-UN WITH RAYSACUTE ACCENTSECTION "
+ "SIGNLINKING MARKLINGING FIREDOT OPERATORLLE PATTERN NJALA GONDI LIMBS "
+ "DIGITSDOUBLE ARCH WITH INDEX NDING PERSONM NSHUT NYAMLER CONSTANTSH ZIDA "
+ "TENUNCK CONSTANTCROSSING LU2CROSSING KALCROSSING GI4DENTAL CLICKNATURAL "
+ "SIGNENARIUS SIGNNARROW SHAFTDOWN HARPOONDUG TIMES NIUGHT BALLOONMING TO "
+ "MEETNERSHIP SIGNNEPOSTOYANNYMETA STAVROUEMELY HEAVY WITH DAGESHEAGULL "
+ "BELOW SKEWED LEFTLOWER CORNERNOTCHED HOOKNOTCHED TAILEMISOFT SIGNEEPING "
+ "SMALLDE MARK SIGNMANNAZ MAN MUH PLUS GISHSAZ IS ISS IRNAM BCAD MARISTMAS "
+ "TREETEARS OF JOYTE SEPARATOR IN TRIANGLEIN MIDDLE UPBINING MARK PHEME "
+ "JOINERANG KHANG GYBLACK CIRCLEFOUNTAIN PENFORMING ARTSINDEX MIDDLEPOETRY "
+ "MARK-GAW KAREN EURION CHRONONPOUTING FACEIGATURE SHRITERNATE AYINPORT "
+ "CONTROLBEHIND CLOUDUTH-SLAVEY KUTH ARABIAN TRIPLE DANDATRIPLE "
+ "FLAMEBETWEEN LIPSFT RERENGGANINUSOID SIGNUSEATED FACEINVERTEBRATEAND "
+ "OPERATORBRATION MODEAND CRESCENTBRIDGE ABOVEBSCRIPT ALEFOUR TWELFTHSYAN "
+ "NUMERAL IRAGANA HOKAOUGHT BUBBLEFERENCE MARKOUCHES THUMBFEMININE "
+ "DOTBUTTON MOUSEFOLDED HANDSBLOWING FACEBLUE DIAMONDING ENVELOPE "
+ "KLYUCHEVAYAING HITTING ING OPERATORXIRON KLASMAFLAG ON POSTROLLING EYES "
+ "LINE SYMBOLINTEGRATION OVER KASKAL RIGHT DOUBLERED KEYBOARD AND "
+ "PICTUREGUARDED AREAGROUND SLIDEGREEN DRAGONRCHAIC SAMPITHREE HEARTSWITH "
+ "SMALL VRANCHING OUTHEAD-BANDAGEHAND FORMAT RIAL TRAMWAYRIAGE SYMBOLHASIS "
+ "SYMBOLARALLELOGRAMHALF BRACKETREVERSE MARKVER EQUAL TOAR DIAERESISHAH "
+ "WITH DALREN CROSSINGREFACE COLONHIBITED SIGNBAHIRGOMUKHAQUARTER "
+ "SIGNQUARED ARROW CROSSING GUBACK OF HANDQUIRREL TAILIDENTICAL TOGEBA "
+ "KAREN IRING OVERLAYVAKRAHASANYAPROTOS ICHOSGBY FOOTBALLRAFFIC LIGHTHREE "
+ "FINGERSATNAH HAFUKHVICTORY HANDTOP-LIGHTED ATTOOED HEADRAH BEN YOMO6 LONG "
+ "NGGOO-SHAPED SIGNTHODOX CROSSHYPHEN-MINUSRIGHT SINGLETHIC LETTER TRAGRAM "
+ "FOR THETA SYMBOLWIGGLY FENCEOPPOSING LU2 OVER KISIM5OQ NSHUT YUMLARGE "
+ "DOUBLE ON PEDESTALS ABOVE SIGN OVER MIDDLEALT PAN SIGNOPLE HUGGINGOHAZARD "
+ "SIGNLATALIZATIONYOD TRIANGLEOGOGRAM NYAJYOUTHFULNESSON US SYMBOLYMBOL "
+ "BINDU OK HAND SIGNKANA REPEAT CIRCLED PLUSLARGE TRIPLECENDING NODESS-THAN "
+ "SIGNEVERING FACEERPENDICULARKLYUCHEVAYA CK-O-LANTERNOPENING LEFTSUR OVER "
+ "SURKAPPA SYMBOLCIRCLES AND OING TO MEETOID NOTEHEADOTTOM HALF OT "
+ "MONGKEUAEQCHARACTER-1BCABBAGE-TREEALTERNATING FALLING DOTS OVER TWO "
+ "PIIRTY-SECOND BYSMAL WATERONISHED FACEETRETES SIGNLAYING CARDSCHAIR "
+ "SYMBOLKHAMTI TONE-KHMIMIC KHEICHARACTER-18CALENDAR PADCIAN LETTER "
+ "-SIMPLIFIED IVE TWELFTHS OF ANTIMONYROUNDED ZEROHREE BALUDAE WITH "
+ "VEILGRAMMA SIGNHORA DIGIT ULO TWO SUMLACK SULFURTRAIGHT WAWL OF THREADL "
+ "TIMES LAL0 FOOTSTOOL WITH JACKSWHITE JOKERI TIMES NUNI TIMES BADESH "
+ "DIGRAPHACKED COMMATHIRDS SIGNLACKLETTER MACING FACE-OFF SYMBOLLEFT "
+ "SYMBOLLEFT SINGLEXAGRAM FOR ENTHESIZED 6 LONG NGGE-MINUS SIGN WITH "
+ "FLASHE2 TIMES ANLEEP SYMBOLLEAF CLOVERHEELED SHOEWO TWELFTHSHAGGAR "
+ "YAZHLATIN CROSSERCENT SIGNHEAVEN MARKDUATION CAPHEATED FACE WITH "
+ "COMMAEPIDAUREAN HAWH HMONG WITH CARONHANG KHUDAMSINGLE AND 5 LONG "
+ "MBOOLCE TSA CANMBA BAYANNALD SCRIPT XSIMILE SIGNMBLER GLASSLD POLISH "
+ "OLEFT DOUBLESSANGKIYEOKGRAVE-ACUTEACUTE-GRAVEHOKHLOM ON THREE "
+ "TIMESEORGIAN NARSTERED SIGNHLETIC SHOEACTIVE SIGNHITE DRAGONGSUM "
+ "-KHYILDYO CHRONONGUISED FACETONAL MARK UMAN FIGUREWASLA ABOVETIEE "
+ "SHEUOQTIGHT ACUTE WITH DASIASPIRATED FAHIGH STROKELETION MARKJECT "
+ "SYMBOLLON SKEWED JIHVAMULIYAUG RTAGS GYSVASTI SIGNINDICESIMA TRUNCATED "
+ "AEEZING FACELEU SATANGAINDERGARTENJOYOUS LAKEKAARA POLLUFOURTH ROOT WITH "
+ "TRILLZZA WA JALL WITH TITLOUISHED FACELOSED ENTRYSPEED TRAININ EQUAL "
+ "TOLOSING MARKLOTI NAGRI IMULTANEOUSUETTE BREADTUNE "
+ "COOKIEYEORINHIEUHIRCLED TEXTIPLE TONGUEFGHANI SIGNTA EQUAL TOISIGOTHIC "
+ "ZWING NEEDLEFINAL SIGMA-COPPER ORE WRIST FLEXFIRE ENGINEIVERY TRUCKUBLE "
+ "TONGUESYURA SASAKWINKING EYEIX TWELFTHSWE PALAUNG SYMBOL VIDJ WITH "
+ "MAPIQIEN MONSTERKRAINIAN IETRESS SIGN LTED FLOWERGE AT "
+ "NIGHTKTIESELSKABLTERNATE YAXI RADICAL LINE FILLERLU PLUS IGIGENTLE WIND3 "
+ "LONG NGGOTETRAFONIASXESTES SIGNTH-THALATHAEAVER DENE ENG DIGRAPHSTEAMY "
+ "ROOMGHAIN WITH THAM DIGIT LUPOVODNAYAIBLE-CREE YTWO FINGERSEUNJOMNDEUQTY "
+ "THOUSANDILIQUA SIGNEDICAL MASKILCROW SIGNABOVE RIGHTIL "
+ "FRAGMENTXTINGUISHERTENS DIGIT WITH GARDENEN STRAIGHTTRIAN CAMELGAP "
+ "FILLER-SMALL CLOUDSTORIC SITEGAYANUKITTA WITH PLATELT OF CLOTHETEI MAYEK "
+ "TRESVETLAYASECOND MARKPHNAEK MUANRISING DOTSBETA SYMBOLZIGZAG LINEUTH "
+ "CORNERSCURVED BENDRITING HANDBELOW RIGHTPODCHASHIEMUPADHMANIYAUTING "
+ "WHALECROSSING URPARAKALESMABLACK ARROWCROSSING BUCROSSING ENCROSSING "
+ "IMCROSSING PIRIPLE PRIMENSE CHEEKS PROPORTIONCTION MARK CTION "
+ "MARK-PERISPOMENI I ZAPYATOYAWNING FACEDE KIKAKUI VARYS ICHOSQUERED "
+ "FLAGQUIQUADRATEND OF PIECEVYKA ABOVE SHOE STILEND ODD SIGNSHAAYATHIYAVE "
+ "OF PEACEDENT EMBLEMNBLENDED UKRIGHT-LIGHTRIGHT-HAND UNJO WYNN W S "
+ "ZAPYATOYNIKOLSBURG POST OFFICEVA V CHELNUBANK "
+ "SYMBOLDALETH-RESHVAMAGOMUKHAPUT MORTUUMNG LEFT LEGRING LIQUIDDASH SYMBOL "
+ "DECORATIONCAN RGYINGSRPOON ABOVECARET TILDE OF FLOWERSOLD NUBIAN ORT "
+ "BARLINEAMUSED FACEORCE SYMBOLVISARGA ONERYVNIA SIGNCK SEXTANT-OHINGYA "
+ "YEHOF MASHFAATZERO THIRDSOF ENVELOPERUNNING MANONIAN SIGN OVER BULUG "
+ "OVER IDIM CH AND LAMPCHING CHICKCELANDIC-YRCE OF PIZZAOMAN SIYAQ "
+ "CCUMULATIONOPPOSING ENOPPOSING IMOR OPERATORBOTTOM MARKNYIS -KHYILCONTAIN "
+ "AS BREVE BELOWOUTHERN TSEROR-BARRED RONTHISMATAOVERSTRUCK COND "
+ "SCREENNUSVARA ONENUN HAFUKHANUMBER ZEROROKUTASTI ANUMBER SIGNCREDIT "
+ "SIGNNTIMONY ORE PLUS MASH2OUBLE ACUTEBZHI -KHYIL PLUS NUNUZURRENT "
+ "SIGNOUBLE DANDANITIAL IZHECOMBINATIONOUNDED FACEROSS ACCENTBUMPY "
+ "ABOVERCHAIC JNYAMIDDLE STEMASE TO THE AND MACRONDONG "
+ "TSHUGSDOACHASHMEEREAKTHROUGH TIMES ESH2AILLESS PHIRIGHT GUARDMONOCULAR "
+ "OMOVED BELOWDIATONON DIATH PRODUCTRANSMISSIONRIGHT HEAVYRIGHT LIGHTMFON "
+ "PIPAEMME LONG CANMED RGYINGSARAM GONDI UPPER HALFRESPONDS "
+ "TOAESCULAPIUSAESHAE NYAMARM SPIRAL ARMS RAISEDDOLLAR SIGNDOUBLE "
+ "SHADDOUBLE RINGDOUBLE MARKARPEGGIATO AGAZ DAEG DMICAL HEARTMIDDLE "
+ "BENTDOUBLE AND MIDDLE HOOKAGONAL SIGNDESK PERSONSHEQEL SIGNUNIT DIGIT "
+ "MUUSIKATOANMUNCIA SIGNRADITIONAL N THE VERGERACHMA SIGNATION SPACE TACK "
+ "BELOWRA SOMPENG ATION POINTRAISED FLAGRAGGISMATAOTING STAR1 PLASTICSZH "
+ "DIGRAPHFAHRENHEITQUISH QUADOSTAL MARKVEL SLIDERTHMIKON N 1 LONG "
+ "MBEURIPIGMENTIT MBAAKETC WITH DOTROUND DOT HEAVY BEATISMUTH OREGHT "
+ "LIFTERWO SHORTS OUT INDEX URVED OMETBSTRUCTIONHERMOMETERION BOTTLEXED "
+ "BICEPSBROKEN BARHAAPRAANA WING HEARTOUTER JOIN AND BREVEFINAL HETHOUTHERN "
+ "TAATRICHISMAOSSED SHEIVIOUS PAGEAYER BEADS AND ARROWOUND OMEGA AND "
+ "ACUTEFFICULTIESTAIL GLASSATTY WITH OUR FIFTHSRSI SYMBOLTWO SHORTSOON "
+ "LILITHOON SELENAEUTRAL YERSTRUCTION RGE CIRCLEUR YIG MGOUR HUNDREDR2 PLUS "
+ "SUYMBOL AIVAOP NKAARAEKAI SYMBOLKA SATANGAK2 PLUS BUGIMEL-HETHRHO "
+ "SYMBOLETTA-PILLAKINDI MVOPSTRAL SIGNHAMZA MARKI ARCHAIONTYPE COLONOPEN "
+ "SHELFCHAD RTAGSUR CORNERSCH BALLOONRGE SQUARESTROM SIGNTWO THIRDSRESH "
+ "BELOW5 PLASTICS OF DHARMAHEADSTROKEORTHERN TARIGHT SIGNIXTHS DISHROUNDED "
+ "ERF SHE-GOATT AND BOLT3 PLASTICSHUNGARIAN TIMES SIGNTING HEARTEVERSED PE6 "
+ "PLASTICSJONG TILE REVERSED IITH DIGIT SYLLABLE MZU OVER ZUCAPITAL ETOROME "
+ "SIGNVERAGE BOXPLUS BELOWIKRON ISONUTH OR SPYPLUS ERIN2TEMPLATIONHOOK "
+ "ABOVEPLUS NAGA BELOW LEFTWITH SPOONHAN DIGIT FRONT WALLY AND RICEGREE "
+ "SLASHRCHAIC KHAWITH STRAWANGKHANKHUGAGE CLAIMFTOGGOS OUGGING FACERING "
+ "ABOVEILE FOLDERIDDLE MARKIGATING RA DRAWINGS TERNATIVE PRALINEAR "
+ "GBAKURUNENTESE CROSSPPOPOTAMUSRIGHT HOOKIED SHRIMPTRESS AND "
+ "TREFACTIONHREE ABOVEXHEEJ CEEVIDEOGRAPH POLICE CARANGULAR TOTOP "
+ "CORNERGANDA MARKHOTIC HOOKPOUND SIGNIGATURE OEGAS BZUNG TRETCHED "
+ "CROEZENIAN INHERENT A AND MOUSEBOLD SHAFT2 LONG MBOING-SHIFT ANDHI "
+ "MARKING LARGE INITIAL RAROAD OMEGAAUTOMOBILE2 PLASTICSFOR RECORDINDU "
+ "BELOWTAMAN SIGNUSEL HORSEGOLUBCHIK THDAY CAKERED DRAGONTHAPASCAN 2 PLUS "
+ "ASH AND KNIFEUSHED FACEVIE CAMERA LATE FORMICAL TAPERRDHACANDRAWITH "
+ "WINGSASTERISCUSICK FIGUREPASSIMBANG KABA TENUPEDAL MARK7 PLASTICSRKING "
+ "FACE4 PLASTICSRECIPITATEFORMATTINGGUA PI MAOINDEX BENTBLACK "
+ "FLAGASPIRATIONGGRAVATIONBA SATANGALPAPRAANA WITH RAIN WITH PLUSA TANG "
+ "LAIED FINGERSNTITY MARKED FIGURE-N NGGEUAETALENT SIGN WITH "
+ "PAGEENETRATIONNTO SHRINESHMIRI YEHLEFT-HAND -LUE KARANENS SYMBOLLEK ATTAK "
+ "NAKE BELOWEDESTRIANSLENDED YUS POVODNAYALOWER HOOKALEF LAMEDCROSS MARK "
+ "THOUSANDSCROPHONIC UBLE DASH WITH RINGSHARP SIGNLEFT GUARDLEFT "
+ "LIGHTMONOGRAM BLEFT HEAVYMONOFONIASDIRGA MUREEONGCHIEUMMONOSPACE AILED "
+ "BIRD PLUS SHU2EARTH MARKW OR MODELCOMPONENT-COMPONENT OANDAKHIATUPPER "
+ "HOOKNUMBER TENDIATONIKI LTERNATE UA PLUS KURLTIC CROSSSBUB "
+ "-CHALENTHUSIASMLEFT SERIFA PLUS IGIEBENSTIMME WITH LOW DIGIT ZEROMONTH "
+ "SIGNSGOR RTAGSSMALL TAH EIGHTIETHSLONG FINALLONG OVER UP HARPOONZAR "
+ "AMULETNDU TEMPLELONG TSHEGCY MESSAGEDA PLUS HANGUAGE TAGUP OR DOWNUP "
+ "NEUTRALNGLICANA WLLOW HEARTDA SATANGA SCHROEDERSELINE ESHAB2 TIMES EICH "
+ "STARKABATA TREED WITH DOTLOGICAL ORAKKHANGYAOSMILO SIGNNASPIRATEDUNKIA "
+ "SIGNLHAG RTAGSLGIZ EOLHX WITH TAILSPACE MARKCURLED WAWNANGMONTHONOTE WITH "
+ "LET SYMBOLSCAN LINE-ND SEGMENTLINDRICITYLIMITATIONDED PERSONNDA PA "
+ "NJISE-CREE SKLIGHT BULBLIGHT BEATMOTORCYCLE WITH TICKEEKING EYE RGYA "
+ "GRAMCURLY HAIRELT BUCKLE RESUPINUSMEL SYMBOLMALL ALEPHSSANGARAEAON MEDIAL "
+ "E PLUS SUMCISIVENESSADAK BINDILANE MERGE WITH EGGS TIMES SHESS OF MILKU "
+ "CIN HAU UM ROTUNDAKRYZHEVAYAWHOLE NOTEST PALETTEOLON EQUALLACK JOKEROLING "
+ "FACEDUOUS TREEWHITE HAIRRUPEE MARKLA USED ASMEEM ABOVEUMAN EARTHSIDEWAYS "
+ "IZEIRO SIGNU2 PLUS BACIRCLED CAST-FEEDINGOMMA BELOWDOUBLE BARSSANGPIEUPM "
+ "STALLIONMINO TILE OVER KAD5COLATE BARAEDA-PILLAUAM TSHOOJRUDIMENTA "
+ "-SHAPED HASIXTEENTHSEQUIHOPPERALLY MARK LE LETTER ME PLUS "
+ "ENLE-DELAYEDCHECK MARKEARLY FORMUARDEDNESSADDA WITH OF HYGIEIAWHITE "
+ "FLAGMILLE SIGN WITH BASE WITH BELTMADDA MARK SPARKLERHEADSCARFHARD SIGNIA "
+ "SYMBOLHARACTERSSEMICOLONNGER SHIPZ DIGRAPHNCLOSING NFORZANDOSHAB CEEBLOND "
+ "HAIRIDEWAYS UARCHAIC MRFUL FACEQUSHSHAYAXHAUSTIONNG SANDALIDEOGRAM "
+ "QUADCOLONLONG TIP TIMES PAPSEPTEMBERQUEEN OF IALECT-P NDAILING ICE CREAM5 "
+ "CYPERUS5 LONG JO AND TAILWRY SMILEWORDSPACEMRACHNAYAHINOCEROSHOT "
+ "SASAKMAEMGBIEEWRINKLES HIMA SIMARED JOKERMUKPHRENGRCHAIC IIHIYYAALAAREAK "
+ "HERE TIMES HAM HE-GOATRDEL DKARRCHAIC RALVIN SIGNREDNE ON APODEXIAHOOK "
+ "MARKMBROIDERYZAL SASAKMALL RINGHWAZ EH E3 PLUS ANTIMES NA2RIED FACE5 "
+ "BATHTUBLOWER DOTI PLUS LI STREAMERMHANCHOLLR PLUS RA "
+ "TROMIKONMETOBELUSMARK CIM ZAKRYTAYAHREE FOR AND CURLHI SYMBOLMARK SHADNA "
+ "KHONNAXCITEMENTREFORMED AND BELTSIVE FACE TIMES UDISEN-ISEN PLUS LAL "
+ "PLUS KU3ROTATION-OTAL SIGNOF STIMME-STACCATO PLUS GUDT ON BONE PLUS GALS "
+ "DIGRAPHODIASTOLET OF MEATLARGEMENTYRANISMA OKED HEADITRA SIGNZERO "
+ "SIGNOKED TAILLAN SIGN OF BLOODIVE-PULL-IVINATIONNVERTED ROUTH WIND PLUS "
+ "ZA7 PLUS TUROUT MOUTHYEAR SIGNYEH ABOVEYEH WITH OURA SIGNORTH "
+ "WINDTAKHALLUS PLUS SAGSPIRITUS IRST MARKTABE SIGNOCCLUSIONZENE RINGON "
+ "GROUNDL ME HANDKYO TOWERON TEUAEQSTEBASKETRTER MARKRUM CLEF-OO DENNENKU "
+ "RU KHAKSTREPTON OVER LUMONE MARK- OVER BALKEMPHRENGONE THIRDSTRELNAYARTS "
+ "MEDAL0 LONG LEONG GRAVEKING BOOTONGSEONG "
+ "RPORATIONOKOUFISMAORT-TWIG-SSANGSIOS1 CHARIOT OF PAPERJERUSALEMLACKFOOT "
+ "RWARI DDAOM SYMBOLK GESTUREKA- SHOG KAMEYTSA OP HALF OSTAL BALLPLE "
+ "HEARTLITTLE UP GARSHUNILISSANDO IGN NUKTAIGN SAFHAIGN TOMPILINE FACETEH "
+ "ABOVELIGHTNING-AMMONIACIGHTH ASHTED PLANT RICKSHAWNO TELEIAPIDERY HAILE "
+ "TILDE247 DIPTEILIPPINE Y BLOSSOMNIGHT OF NGUN SIGNPROJECTORZIR SASAKSMALL "
+ "YUSPPOSITIONLLABLE OMPPOINTED LLABLE B0NIGGAHITA RA OR RINIHSHVASASOF "
+ "PASUQ FROM BARLIVERANCENING SIGNIGH HAMZAP ELAMITEING LANESP DIGRAPH-LOW "
+ "TONEING STONENTRACTIONINISHMENTROJECTIONINNYIIYHELEFT "
+ "TACKNUSVARAYAPAA-PILLAOW KAVYKATANDSTILL2 GARMENTOVER MUSHLEFT RINGOVER "
+ "GAN2-MID TONENTERPRISEPENTASEMEPENT SIGNIN SQUAREINAL NOTENSERT AT "
+ "INARBORASRNEY PARAY-FOURTH Y-FOURTHSRO WIDTH NTESSENCE-KHYUD "
+ "PAPANYANGGAING CARD ING DOLLSPADE SUITING GLOVEED DIGIT ETRASIMOUEAVY "
+ "DOWNURNED AYBBITE LIPSEBIT SIGNTRESVETLOAVE ARROWETTI BALLCHOSEONG URLY "
+ "LOOPFROM WALLUTRA MARKFACING UPED PLANETABOVE TO UPPER DOTATHAMASATAL "
+ "RUNOUTCORN FACEVIGINTILEUURDHAJA UBSTITUTEANG CITI URNED GANFEH WITH "
+ "TUKWENTISDEPARTUREURAMAZDAABKHASIAN ANTHAKHATDENT AND VERLONG "
+ "AAJANYALANUR-DE-LISACE NOTE ALI GALI VRAKHIYA G IN HOLEA PLUS "
+ "NAVELOPMENTAOS ICHOSCAPITAL QGREATER YANTAYALANBICYCLISTCAPITAL IANSKRIT "
+ "SUE MAEMBAGITTARIUSBIAL SIGNCARTRIDGEDAD WITH B DIGRAPHEIGHT OF "
+ "CRESCENDOVISARGAYAVOCALIC RBEER MUGSVER LUGALD SALTIRETUTEYASATCANG "
+ "TE-UTONE MAI EEN WITH ER BUBBLEVICE MARKBING CANEGRIK SIGNENTRY SAWWITH "
+ "FACEATTACHED EFAIDRIN CAPITAL DANGGEUAETEFORMED TARISTERA HALF NOTEFISH "
+ "TAILEMPTY SETDOWN SIGNDOWN STEPCOIN SIGNADMA GDANBASE UNITWING STAREURO "
+ "SIGNADEG ADEGARM CLOCKAROSHTHI VOETOCHIEFINAL NUNCHANICAL CUBE ROOTCLOSED "
+ "PLESAME DOTALPAPRANAES AKURU EMBEDDINGAFFE FACEFLAT SIGNAF PERSONBOTH "
+ "BENTTREDECILEALAYALAM ERTY LINEBO GYFU GHALSHELETTTED STEMDOWN HANDBO "
+ "BAIMAIHALF SIGNELEGRAPH AISED DOTFINAL NGABRUL SHADFOUR BENTAS MEMBERETER "
+ "SIGNTO CORNERERCIAL ATE AT LEFTUNGSEONG VANAGARI URUZ UR UVINE "
+ "LEAFUPTSTIMMEUVUZHAKKUAINTBRUSHFINAL MEMDRAM SIGNHAIKSUKI "
+ "UNGLASSESCHAVIYANICOMPLETEDWASH TAILUMED HEADELLOWSHIPTRAIGHT UDUS "
+ "RTAGSVEUAENGAMANEROSIS KAIYARAAEVEN OF CHATTAWA OVER "
+ "KGKATAKANAKASRATANETRASEMEL POLISHETA SIGNCK CHARTET SHOESOHM SIGN PLUS "
+ "DI PLUS DUL-LAKUNAEST WINDLA LENGACLIMBING OVER ZIEUFEUAETONE FOR OVER "
+ "MUCHINESE ON CROSSOMMA BARCLOSED TOMANIAN OM NTEUMOLLOWINGBUNDANCEBOX "
+ "TRAYOVER GA2OVER BU FILE BOXBRA FACETTO MARK8 KANAKOYBEYFILIROSSED OANC "
+ "SIGNYENISEI IRD MARKYER YAGHTAI LUE FEBRUARYTAALUJA IS FORM BOL SIGNING "
+ "ROD LANTANGBOT FACETAR EYESOVERRIDEIS WHEELTTENTIONOVER TIROVER SHEOVER "
+ "SAGOVER GI4FINAL THCASSETTE1 BARLEYJACK OF "
+ "JAVIYANISWIMMINGEXCHANGECEILING RSE DUNGJUNCTIONSUPERSETCER "
+ "BALLEVERANCEOO TYPE SUCCEEDSCANDICUSIS-PILLAC SIYAQ OTIFIED YESIEUNG "
+ "NUTILLUCABLEWAYITA MFONOT MBUAETURNED MCAL DISC OTTAVA AMS HORNT NGGEET1 "
+ "HELMETYIDDISH ORM FEED OF YARNOREHEAD ON LEFTNAVIYANIECH YIWNLTRY "
+ "LEGEBEEFILILUB SUITSMA SIGNNCE SIGNM ALLAAHED BRICKULLS LEGNAMENNY "
+ "ZAKRYTOEAIYANNOINA METEKN-JOINERSIX DOTSACKSPACELORRAINEABAAFILIWBOY "
+ "HATABOAFILIDAMMATANLONG BARNG RTAGSDANTAJA LONG S TNEUTRAL E OF "
+ "POOUKEUTNDALOW DOUBNEIFORM LOW STOPNED FOODDDY BEARLOZHITIE "
+ "SLIDINGSIFISTONHAN-AKATDIM GUNUUNG DASHAEN NYAMMON TIMESHORT ERSIGN "
+ "LAEMEM-QOPHUNDERTIEUNDERDOTDIT CARD TTUDDAGMMATION MIONIAN DOCUMENTW "
+ "PRINTSDUSHENNAMALL AXEMY HOUSE TALENTSMANDARINDVISVARAMANGALAMDVANTAGE "
+ "SCOTS SSHKIR KAMARRATANDS-CREE SHOE JOTDIAMONDSWASH KAFDIFONIASME "
+ "BADGEUATRILLOERAL URNER TRUTHALLIANCESALT OF VOLUTION-PHIEUPHUAREG "
+ "YALEANING SQUEEZEDYRILLIC EOUT BOXVOMITINGCOUNCIL COUNTERSA SIGN "
+ "AUBJOINERENICIAN ESH LOOPODESTONE0 BRONZEOCUS OF OCK SALTOCALIC "
+ "MYPORROON-X BELOWOBOOFILICOMBINEDEREVODKAERDIGRISLATION XSNA LDANSE "
+ "WEDGEELEPHANTEK ONKARNITIAL ZD BUBBLESOFTNESSD CROSS NINE OF SCRIPT "
+ "GLKULIZMYUP TRUCKNI ABOVE YUQ NAEUDAWADI SATCHELEGORIAN "
+ "SENTAGONLOCATIVENOTE PAD POLNAYA-KHIEUKHSPERSIONSANYAKA EN NTEUMNRES "
+ "TOSLESS SHALESSER YNOVEMBERS OCHKOM-EM DASHLF RING LFWIDTH RASWADI-CREE "
+ "THCURLICUENO THUMBCURSIVE NO SLASHY BEETLERDEL NAGIMANSIS GBASINNAASTERN "
+ "WGLASNAYAAZHAAKKU CURRENTTO-LEFT ATAKANA XCELLENTVERGENCEATE "
+ "MARKATEBOARDTHOSCOPEBINOVILETICK IN PENTAGONAPITAL FRILLIONSREE "
+ "MARKINAGARI ARTYRIA RED HAIRBACKWARDFRAKTUR BATBEIT QAIRTHRAY "
+ "POPPERHESPIAN REATNESSTHIOPIC BACK YERANS SIGNFRICAN DPAVIYANI ANTENNAAST "
+ "WINDHOP BELLQUINTILEBEVERAGEBER POLEGORAZDO HANDLESAVY BANDTRICOLONGREAT "
+ "SA CEDILLATER FACEIGMOID SWRINKLEDVE SASAK3 ARMOURWRITING RAMMA GGRAUGHTS "
+ "BILLIONSATH MARKHREE OF RASMIAN GARITIC BIEE FONTRI DISHWON "
+ "SIGNAY-NIGHTRIYOOSAN AT DUSK56 TURO2FLOURISHFOR STOPPALOCHKABLE "
+ "SIGNICHAEAN ARCASITEPUSHPIKAZWJ THAJV OVER MAR "
+ "TSHESHARBAHAYZWARAKAYHARMONICBLINEAR PAKPAK ETIRRUP RTISMOS EANE "
+ "TREEARKLEAN BLED CARHAGALL HWO ABOVEPRECEDESHALF GURGENITIVEVESSEL "
+ "BPROSTAYAPUB DAWBPAIRTHRAARSI YEHRESVETLYWN HEARTI SHAKTIING BELL KEMBANG "
+ "FACING ING BOWLTOWARDS ARRIVINGPUN IYEKPTHAHA SOV ROGLF FACE RAMBATAY "
+ "SIGNGOLIAN VAYANNAVE DOT QUEEZE GHEUGHEEL PUMPUBUFILI-WELSH ERNIN "
+ "ANJAEMLILAMITE ZQAPHA D MADDAD MOUTHIBIFILIGRADUALPSTICKSALLOT "
+ "X-TIKEUTSCOOTER CHIKI LASHES CER-WAAXIMATAQUARIUS-CREE RIANGQI LIGHT "
+ "XCOMING 3 OMEGABAMBOOSSOLDIERTRAINERA NAME VAPOURSVANESE "
+ "THESEOSPUSHPINSANDHI CRACKER-MU-MO--SHIFT-3 SPICE3 SWORD-MACRONENSHUETI "
+ "RTAGS6 NGGOOI NTEUMSAMPHAOLE LEAFVOICINGPURPLE A -PHRUSPRINGSCOPTIC "
+ "THIEUTHHYAAUSHNUMBERSSA VAH BAIRKANSAYANNAVAV YODCONTACTEN "
+ "LEAFS-SAJDALEUT KAVOWEL K-THIRTYTHKUQI SANGAN ALESMA GLAGOLIER "
+ "THAN-KIYEOKLEYBALLNTAINS LAYANNALEK TOO3 WHEELLENGTH-TORNADOAS "
+ "SIGNHAARKAADYNAMICSHIFT TMANCHU WO WAENMUNGKAH TEDUNGMARCATOVYSOKO DU "
+ "NJAAWO MARKMASSAGEMRACHNYDIARGONDRIL BUMAAYYAATIKRAMAEAD OREHEXAGONUM "
+ "IYEKMAI SATTIVATE VEW NOWREATHY ASHTRA ACTER TDHALATHE GLASSE DRINKAD "
+ "NECKASH FRODIPLOUNDISIMOUMERICASUN MEUTAETMEUNHANGUL DOFONONSHORT AMINIMA "
+ "MINGKALSIDDHAMARDNESSAHAPAKHARRED OMBOL B0ARRED BREREKANHEADINGWO FOR "
+ "RESILLOHALANTASIGN UD5 NGGEEAELAENGHAYANNA WAAJIBMEETORUAU "
+ "MARKVEMENT-DANCINGDANESE WOLOSONG MASKRAKHANG SHAKERHIUCHUSNESTED "
+ "SERPINADAYANNAUKKAKHANEQUDAADA FACENIKAHITLJUDIJER2 GUNUEIGHT KUP TACKUP "
+ "STEPUP SIGNWORSHIPRA REPAAPEZIUMAUNTLETAULDRON BUTTONUP "
+ "MARKWDRIVERLYGISMAEAVY YAATAEAN ASUTORUDEAVOURRD DISKRD FACENAYANNA "
+ "STRIDESHAKINGNANCIALHI SIGNRDO RJE APLOUNUP HANDRANGKEPRARIETYATH OF ED "
+ "RICEWAZ EOHSEXTILERAYANNAECEMBER SLOWLYTAISYOU3 AREPAYMAIC "
+ "LBULANCESUKUUDOBUFFALOOUR OF RISIMOU9 CLOTH MENDUTRTHIAN OUT HUB2 WOMAN "
+ "MUQDAMJIBWAY ANGKUOQ7 NGUAN OPEN-O MUOMAEONTIEENBLACHKOWIFRUITCELSIUSOP "
+ "MARK KEFULAXOPHONEEULEUNGOVER ANCHEINAP0 WHEATTTHACANANGLONGKAYANNAFINAGH "
+ "0 SPEAROVER DUVILIK BYNAMIC FORKINGRIPPLE "
+ "CHEVRONKEUAERICHIEUCHTROLLEYUSSYERUTTILIK BREVIS YELLOW BERRIES3 "
+ "EIGHTBERGINETALL AAPHUTHAOONGONANANGLED KARO BAONG UEXPLOYAN URFACE "
+ "URGLASSPENGKALCAP TENISIBLE T ASHESRMUKHI ISLANDF SASAKPAYEROKIVE OF "
+ "IMILAR F DAVIDOT NGOMITALIC PECTIVEOT REPHPEGERMAFATIGUE "
+ "OCLOCKORTIETHCANDRA ILLEANNCABINET7 NGGUAITON RA1 ARROWAN MARKJAIN "
+ "OMJARATI TCHFORKJAYANNARRECTUSJECTIVEWIGNYANCAYANNAURATIONTAYANNAJERAN "
+ "JIL DRUMBIG YUSORKHON FAYANNA26 EYYYPAYANNATA MARKOREVMA SYNAGMAIKHAHITY "
+ "GREENORCULUSUT TIMEPERVISEANGOLATCK LIMEPOVODNYSTERINGGENERALFLUENCE9 "
+ "NGGAAKUTAARUKYLISMAESTIVALCLEAVER3 MONTHKPAK "
+ "WAVILLAINKOMBUVATYSCAPEOLAPUK KOQNDONKORONISINNABAR "
+ "FLEXUSOKRYTIEANDERERALTILLOPRENKHACLOTHESLAGIOS "
+ "ROGRESSTHALIYALAK-050OCTOBERIC WANDOCTAGONCOASTERP PIEETICYCLESOGDIAN "
+ "OWILO SL SEGNOBARREKHPPROACHOFFICERST TUBEUYGHUR BORZAYAOF SOAPCLOSE EOX "
+ "BACKICOPTEROX LINEROKEN L2 OLIVEYA LAMPOMERANGPALLAWAPOMOFO "
+ "LONSUMKKURUNIETNAHTATASHEELYAH LI TRYASKAPANSIOSPANESE YAYANNAKHA "
+ "YATGAYANNAFINAL YBOURINGON FACEYANMAR MAELEEIFIED "
+ "ETSECHKABOARDERAMAKKANOW ALEF PLOPHUAM ALEFRY "
+ "FACEARADDOBOWTIEPBOARDDIESISROCKET TIKHYNACLESSICKLEBLINK "
+ "DICINEDOKMAIANCHORRENGTHAPYRUSAJANI PECIALVILIANAPLI MURNAMABISHOPDERMA "
+ "PALUTAPEAKS BURGERAEMMAE AGUNG MURDAASSINGVERTKAC CLEF LONGA "
+ "LELETUNGAAMARBUTADGEHOGN DASHSHAYIMVIRIAMSHMAAMRICORNREMEDYZHITSAASHGABOW "
+ "TIE KAPALAILUREN-NISFARSEOSMPLING MELIKN YANGOTTED-BOFILINSUZ ANOWMANON "
+ "KEYNOZHEKSAUCERNSANAQPOKOJIPOMMEENTEVMANTIIMUCHURCHNTOGENUUMISHCREASECRAYO"
+ "NCHEMA ONOCLEANUARYNOKHUKCHEIKHCUPPEDNOR BUCHESS CUMBER "
+ "QATANBEFILICHIRETCHO CHOBELOSOFOUNDPUFFEDCLOSETS "
+ "TENTOGONEKODHADHANIMALBANWA EPOCHS SHOE EQUIDOCENCEOCIETYCODILE "
+ "DIPLIUYANNAQETANARIISAPQAMATSNKNOWNOITIC "
+ "PWATCHUZEIROBAFILISAADIYCKNESSRAVEL-PEPPERAPISMACARIK CASTLECATAWANEUME "
+ "AKEUAEBGBIEERAKLITATTERYATTIC BISCUSCALATEOSETTERKAANU SPLITAK-668NDA "
+ "TARBITSAPENCILDE DOGAKABATUP BOWNISTERRSHANAPIRIT OOMUUTRSIAN "
+ "CARETNIRUGU RULERRSENICCEVITUNIZKO "
+ "RISEMEUPNAYACHADINCHAMKOANGKATOPITSANGBAT NCH "
+ "FRPICKETRACINGDAGGERRAAKANOPEN POPEN DAUTUMNBETAN OOPED SOUNAP9 MUENKE "
+ "PHOKAYAH UBLE XEUNYAMELLITELIGIONLIGON 2 NGGUI MAIMI HOOKKASAR 2 MBOO6 "
+ "NGGEEUREUTSTROFO-HIEUHEN GHEEUAENAKEYCAP-HIDETEMPUS SPATHIGHAMALIB "
+ "YAMLEVEL-3 NGGAIASMA WEORTHGHETTISPADESGHEUAEEMASTIHORT "
+ "IGOBLINSUCKEDEVENTHLONG EUGGAGEGORGON00-102GO NGUEENTH-INSHIPSURANG4 "
+ "DART4 DEERWRENCH4 KPEEINGAATLISHA HUR PALITIKIUCIBLEHUMBS EIGHTYGLAZ "
+ "HINHALETARGETUDARKA2 KPOOLLIPOPAASHAETOPBARGNANT LAMEDHYOMBO TE USETE "
+ "TSEERMATASTLERSUAEQTUXO NEOSTOLI LASTON7 NGONKRISISLD "
+ "MAPFRAMESUANGXIKNIFE IGGLESGANGIA3 GBEE3 HEEIYRENE STANCYSTANCE7 MBEEKY "
+ "WAYESTAN 7 KAPOU MBITILBOAT7 MBUU7 NDOOIN YEHKUSHU2LAFRONSSLESSET "
+ "KUTILLAGETRIKE 9 NJEEKTIKO 7 GUANLAMADH6 TREEENIKI 0 "
+ "NGGOGEDOLAKILLERTRAPLIIDE ESFORMEE-IEUNGYSTICKINDHI GEADALEU "
+ "MBUTAUROSTHAKKUGGLING0 NYONA-KARA0 NYUNTAU ROEPACT INAGMA-PIEUPSPLIT "
+ "KLITONTERON SPITALINCUNXX FACEA HAAMXIMIZEIEVAN GBASAQTEUWEN0 "
+ "NGGIENTIMAFORTIS1 WINE8 NYENMANYA WN BOWWN BOXSYOUWA8 NYANTUXEDOF "
+ "CLEFDVANCEDUCEUSHERMESIX OF HEISEITIMATEF MARE1 GBOO1 GOLDIXTY "
+ "PHIMAHUGURAMUMADDAHMADR MEYANNAE WAVEIYANNAMALGAMITULUMAGRANTSYNAFIHIBIT "
+ "5 WOOLMALL FWINDOWTIKENOHEUAEP8 MBEEITABLEAFFIX TURBAN1 NDEEFATHA "
+ "HASHKAFAMILYISSIMOHAM AIHAMEDHISSHARHAMILOISSANTAGOGUE5 MERIWO OF ME "
+ "DIEFF OF MECHIK1 HORNTAIKHUTIRYAKITHER HE MGOAESURAT NJAQHALF "
+ "HIRINGUTAMINGEXHALE8 HOOUHO HOISKAPI 4 NGENSIXTHS4 NJOOM BOARM "
+ "BULLHIVETEGS-PA SURED YAKASHED ICEWBERRYED CAPGRASP 4 MUANWORKER6 HUAN6 "
+ "GUEIYIN-DOSWORDSEXISTS4 NYINHINGE EAHMUKXYOOJFLUTEPEAN 8 KPEFEARN8 "
+ "GBUFSAAQRONOSPAATOBREW INNA PASEQ2-VASZHAINPATAKIMMERINTHU1 "
+ "WVIIMGBAFLAGSPCHA LACA7 NEN7 MIN2 NJA2 HEN1 PEEANGELOTHALEYBUSBISAHILLU "
+ "2 NJUPEITHZIDI 8 FEEILVERYAMOKPEN O JERA2 HOOPEN-P1 TWO2 POO2 PTEYECEK2 "
+ "MBUROGOMWISADBORZYTSADI2 SEE MOOD1 YOOTTOCK2 MBEBOOTSFORCEBSTERTTORU1 "
+ "TEE2 MBA1-VASIRACYTUEUMIPEHA7 TWE KAWI7 NIN8 KPOYENAP2 KPA2 "
+ "KPIBLANKTSEEBWINDUBLAKOOUNCEURTLEIPINGWINJA7-"
+ "VASFLICTTSEREHIRIQHISTIHIUTHATIYA4 NDO6 GBARAIDAHOLAM4 TOO4 WUIATAF 4 "
+ "WOO4 VOOWLINETON AGVANGGURE HOLARHIMELRATERGULUSRASHAAWAY "
+ "WU318WUAETAVROSHOTELGORGIQUIRY6 KOOHOUR 32 JE CHWV4 KPU4 MON4 MBO4 LOO4 "
+ "LEERACHYAUTHS4 GBIZSEKAR-RUB CAPO4 ABB AMPS5 NDUHASE-HATHIHAYINHALA AR "
+ "AERIEENRIEULHAINU5 KEEZYGOS5 MBI "
+ "ALLORICEMHANNATINNETIPPIARERUHALQAASPERTILES4-VASHETHERDIONHI "
+ "ROTIGMA5-VASRCHIDRELAARELA "
+ "REIWATKAANREGIATMAAUARTARHADDAAPPLEHAALUASEIAPMUNK3 HINPLUTOPLUTA HAA "
+ "UTIESBENDE GORAPLHAU3 FOOGAMALPPAGE6 WEEBASSAGEAN 6-VASPONSEPOLI "
+ "FUJIZILDEXING GAZE-BEITH3 HON ICONBHADHBHETHRITSITRIOLIKARAIHVUSXTRA- "
+ "ILUTTEGEHIKURUXW XWUTEUXPITER3 BOO7 FUA7 GBEGESH2GADOL7 HUNPI RO7 JEE3 "
+ "RA3HUTA TORSO DEKABAARUTHINGRILLA3 VEEQAAFUI KOIBACUSTRACKGOGI "
+ "TORCH3-VASHROOMI-RESVATOR3 WEI COATHUMP 6 SOO6 SIAICHON6 TA2ICRONBASA "
+ "PTUNEGHULU6 RA2BALAGTRAIFVIET GHNUTPEPETPSILIIARDSIAUDAVAAVU3 "
+ "NDIANNONNNAN -ALAFAAMAEEKEET-BEAMUBURUUBUTSEISMANIS "
+ "FYURIICUBEDEMBICNINTHAADHUSOLVEWBOATLOBE "
+ "SENTONGENTEGALILOMKAEESHIUGUSTVRIDOLOOP UDAATNIEUNEIDONNGUE SARIEGL "
+ "HDAIC NGMANEGIONLOAN ALGARLEASEWFISHLEERIEOPLEEO-EUENUTOUBITOO BOX9-VASO "
+ "RUAO PLA-SIOSLAYARO KAIO ANGSADHENZEUMSAKINALLI "
+ "ALLEY-RINGSALADCROWNNSYONNSUAENSIEESATA "
+ "ENANOSAUILEMLJACTRICNUENGENJETNTXIVENENGOTERIA UNADATUSNTHA A "
+ "YUESPINEUMMERMSHAEMROCKUNGBAMPIRESHOOKSILA3MPAREVZMET TELUMALONMUOY "
+ "SHIMAADULTMAQAFMUCH DSMANMI ROSHTINDKAR "
+ "MISRAMETRYDLINGWAQFAMINGOSICLEAGMA "
+ "MIEUMWAAVUDOTS-"
+ "MELONAEMAEMEEMUMEIZIAEPENDWICHAEREEMENOEMEPETMETEGMMOTHUNOO LURALEATH "
+ "LWAY "
+ "SKATEEBALLDELTANCORADENCEDEPTHUKARADBOATLOURENENOENEMKANASHINEGARDESTYNA "
+ "POSHARUMADYAMAI KMAIZEDHAM E GEEWATTOMAALAACHKAUNITYM "
+ "RAMMAAEHENDEPSHANGEAGLE TABSSHAR2SHARANADA MACUSNABLACHULASUKUNCIEUCF "
+ "COWRUSH CHUTECCEPT1 FANOMBIEEYYAL0 MANAMEKH8 NWAK-020RYASOTUUMUSTNUTOLD "
+ "XRRITOKO LA9 MUNITUALCANUSCEREKSTORMROWN F SOWYIZETF "
+ "EWEKNOBSUQUETCHADACAUSEEURAELATIKRUHUAKARORRUDAA9 DEE0-VASOQPEN9 KUAJANG "
+ "WIANGCAUDA8 RO2EVAL 1 DWEKHAPHEUAEMCHOOLCHOOIRUMP-CHIME0 "
+ "OILRULAIKESH2KERETCHESTCHERYKBALL9 MENU U U0 DWOERKHA8 "
+ "MANCCOLICAKESCLUBSJUDGECAKRAURITYLAGUSESHE3JUDULALOG "
+ "LABOROPLETLABATVITAEFAIHUOBYLAOCADO0 BEELAMDA8-VASESO E9 WVE9 "
+ "WVAJERVISURYAISTLE0 DOO0 JOO0 HEESTARTKUSMACKAGEKURONURINE9 YEEET TUOJKI "
+ "8 NANCLONECALYAORUTOOKARACECAK9 NDEOKEE 9 NDACAANGITHI 9 "
+ "PU2JUEUIOSTERALPHA0 GBOCECEKCLIFF9 NUNL-JUZ9 NONL NET0 GEE0 "
+ "HANCKTIEKWAENFAAFURUISROOKVEYZRRORRROISHYATUKIZETATURUWAIRWAHAYEUXUNAVWAET"
+ "WAAKUNAHSINKRPSEVEUXSIKIRIFYURUSVESTZATAZZY TZELZIZ2VOS "
+ "YUDHRUSISOKARUTUYUKUZELOZAYNUTANSA-ITAXIUTTYTFONXEYNZIETXEIAWAW "
+ "SUABVIDAUON SLURULU SUNGRSO-RT TRUNARUNGROA "
+ "YWAASELFWDERSEEVSEENWULUROARUHURRUKUVIYOVEDESEYEVUEQHEYSHEENHEEPHEROHERUHE"
+ "YNHEYTHHWA2 YAHID HIINHILDHAVEHAYNHWAA2 SOHUEN2 RO2 QOHSHUHWAH2 "
+ "PEIANOIARA2 NOHMI 2 VIHOKEHOM HOPHHOSTHSDA3 MUFFINFIRIFITA3 PA3 MI3 ME3 "
+ "TAEZZO3 YU3 LE3 RIFAIBFASTFEEMFETHFEUQGIBAGIDAGIEAGIR2GOALGORTGROMGRU "
+ "GUINFWAA3 L33 KU3 JO3 JEGAMEGAML3 EEGEDEGGWS3 A3GHOMKMA KOBAKOETKOKEKOKO1 "
+ "KU1 KIKWAA1 IN1 HA1 GA1 DULAAN1 DO1 RAKANGKAPHKCET1 QI1 "
+ "POKICKLFERLFIELIFULIUMLIWNLOLL1 DAKALIILUYIK HINORINY 2 L22 KAIFAT2 "
+ "BUIGERIITOJOT JEONJIIM1 YI1 VU1 SU1 SI1 SAKAAFKAD3KAKOIPODIQAAISI "
+ "1358ARGIAROO7 JAATIMAPAQ7 LUAPON7 KIARA36 NA6 RU6 QABASH6 POBAYI6 LA6 L66 "
+ "JOATYA7 EIAULA7 DD7 DA7 BE6 WU6 SEBAGSBALD8 QE8 PI8 KO8 JIAAMU8 GU8 "
+ "FOAFEL8 EN9 JA9 TA9 TO9 TU9 SO9 SI9 SE9 PI9 PAA IEA-HA8 WE8 SUAACUALTA7 "
+ "VO7 TIAMLA7 REAN X8 DU8 BOAILM7 ZAALDAEAAE5 FE5 FADZHA5 DE5 BB5 AU5 AN5 "
+ "A2EANSEEEEDGER5 LIDIM2EENG5 JU5 IN5 GI4 FI4 NE4 L44 KE4 DO4 WAEENUEETA4 "
+ "ZEEHEH4 WIEIPTEIRTEIWS4 TU4 TE6 DIBUNGBUOYCANOCASECAYNCHAUCHEH6 "
+ "HIBERDBETH6 JE6 HEBOOKBORE6 FU5 VACWAA5 TODAGSDAIR5 TEDDAKDDHI5 OODEAD5 "
+ "NU5 MOCKEN5 WE5 WACOONCOREHUVA5 VECRETNDAPPAWN0 "
+ "BINCERPEEPNAM2NHAYOXIANGA2 OHMNET NEO -UM NDUEPLUMMUASPOLOMPET0 "
+ "NIPLUGPRIL0 PUMMU2QEF NAG NAAUPEUX0 HO0 JUPHABPHIN0 "
+ "KOMVATMUINOBATOFUMOENGODLEOONEOBROO-YOOOTH R SOJI ONA "
+ "WEBNRUAOUBTNPEANOWC-ONE-RAYNJAMORAXNWAANUTSORIINUNGNTOCNTAANSUB0 ZO C "
+ "DRAFE0 SA0 YEQOPAMFAALUMNMARUMESOMARYREIAMIIM028BMIINMLYARGU2LOVOLUIS0 "
+ "WIMEARQHAU0 RADE6DA2UOPZUP8 ID70D42WAU5 "
+ "UCYACWIWOQUEHUEZ8F04-"
+ "0UDYA7AOMSREX9819E3UMXDJAE80DZEVOKAUMAUJAWXZOOZJEB89B576-"
+ "0620AZUVAUAYD6D7ZORQ00PUQQIGQIF7 "
+ "OQARPOQQUFVNOQOTQOFCA9550557BXGCAHBUD5B68 "
+ "AUQAAG-CAIVOYAL2BAU72C5-0VUUBIBIMNYOT18D15514DIWRMU Y00I-IHOJHOX0 E0 "
+ "UL000-0LJE04A0B9LFA1 XSUUJHAK00121JAH1-21-0JEUKUEKAQSIIFOMFLYO "
+ "YOAYXEHTUJFAJOEH3 IFUEES-OIX4 "
+ "EF8CF143-0XAUEZHEYKXAN305X0031CXWVXWG25320BNII-TE3 "
+ "DTJE2DD2-0HHANIB40488309713938291716494B4E1D1AQWR7R0C0D0VDW099F39092G9G3";
+uint8_t UnicodeNameToCodepointIndex_[239405] = {
+ 0x00, 0x05, 0xc0, 0x00, 0x6b, 0x15, 0xc0, 0x00, 0x95, 0x12, 0xc0, 0x00,
+ 0xdd, 0x06, 0xc0, 0x01, 0x03, 0x14, 0xc0, 0x01, 0x27, 0x18, 0xc0, 0x01,
+ 0x41, 0x16, 0xc0, 0x01, 0x57, 0x03, 0xc0, 0x01, 0x7b, 0x04, 0xc0, 0x01,
+ 0xd8, 0x0e, 0xc0, 0x01, 0xfe, 0x17, 0xc0, 0x02, 0x22, 0x0a, 0xc0, 0x02,
+ 0x3f, 0x0b, 0xc0, 0x02, 0x5d, 0x19, 0xc0, 0x02, 0x7d, 0x08, 0xc0, 0x02,
+ 0x95, 0x0f, 0xc0, 0x02, 0xb1, 0x0d, 0xc0, 0x02, 0xd1, 0x10, 0xc0, 0x02,
+ 0xef, 0x1a, 0xc0, 0x03, 0x15, 0x07, 0xc0, 0x03, 0x2d, 0x09, 0xc0, 0x03,
+ 0x84, 0x11, 0xc0, 0x03, 0xa6, 0x1c, 0xc0, 0x04, 0x0a, 0x0c, 0xc0, 0x04,
+ 0x2c, 0x42, 0x00, 0xe3, 0xc0, 0x04, 0x44, 0x1b, 0x40, 0x04, 0x5a, 0x03,
+ 0xc0, 0x04, 0x6e, 0x43, 0x30, 0x23, 0xc0, 0x04, 0x9d, 0x0a, 0xc0, 0x04,
+ 0xaf, 0x14, 0xc0, 0x04, 0xcb, 0x11, 0xc0, 0x04, 0xea, 0x0e, 0xc0, 0x05,
+ 0x25, 0x0b, 0xc0, 0x05, 0x37, 0x17, 0xc0, 0x05, 0x4c, 0x07, 0xc0, 0x05,
+ 0x72, 0x1b, 0x40, 0x05, 0x8a, 0x07, 0xc0, 0x05, 0xa2, 0x0b, 0xc0, 0x05,
+ 0xe9, 0x16, 0xc0, 0x06, 0x07, 0x03, 0xc0, 0x06, 0x24, 0x0d, 0xc0, 0x06,
+ 0x60, 0x0e, 0xc0, 0x06, 0x6e, 0x0a, 0xc0, 0x06, 0x7e, 0x05, 0xc0, 0x06,
+ 0x9a, 0x10, 0xc0, 0x06, 0xaf, 0x11, 0xc0, 0x06, 0xbf, 0x42, 0x00, 0xe3,
+ 0xc0, 0x06, 0xf1, 0x1b, 0xc0, 0x06, 0xfb, 0x12, 0xc0, 0x07, 0x0f, 0x17,
+ 0xc0, 0x07, 0x2e, 0x0f, 0xc0, 0x07, 0x5a, 0x19, 0xc0, 0x07, 0x68, 0xcc,
+ 0x85, 0x35, 0x01, 0x4e, 0x60, 0x14, 0xc0, 0x07, 0x78, 0x0e, 0xc0, 0x07,
+ 0x8a, 0x0b, 0xc0, 0x07, 0x92, 0x03, 0xc0, 0x07, 0xbb, 0x11, 0xc0, 0x07,
+ 0xef, 0x07, 0xc0, 0x08, 0x1d, 0x17, 0xc0, 0x08, 0x3f, 0x4f, 0x62, 0x1f,
+ 0xc0, 0x08, 0x5b, 0x0a, 0x40, 0x08, 0x79, 0x07, 0xc0, 0x08, 0x87, 0x0b,
+ 0xc0, 0x08, 0xbb, 0x14, 0xc0, 0x08, 0xf9, 0x11, 0xc0, 0x09, 0x13, 0x17,
+ 0xc0, 0x09, 0x5f, 0x03, 0xc0, 0x09, 0x71, 0xc2, 0xe6, 0x9f, 0x0f, 0xa6,
+ 0x01, 0xcf, 0x60, 0xb7, 0x0f, 0xcf, 0x60, 0x07, 0xc0, 0x09, 0x96, 0x0b,
+ 0xc0, 0x09, 0xd2, 0x11, 0xc0, 0x0a, 0x02, 0x03, 0xc0, 0x0a, 0x44, 0x17,
+ 0xc0, 0x0a, 0x6c, 0xc9, 0xa9, 0x51, 0x0f, 0xcc, 0x78, 0x03, 0xc0, 0x0a,
+ 0x94, 0x07, 0xc0, 0x0a, 0xa6, 0x0b, 0xc0, 0x0a, 0xbc, 0x11, 0xc0, 0x0a,
+ 0xe4, 0x42, 0x03, 0x66, 0x40, 0x0a, 0xee, 0x03, 0xc0, 0x0a, 0xfa, 0x02,
+ 0xc0, 0x0b, 0x34, 0x17, 0xc0, 0x0b, 0x40, 0x0a, 0xc0, 0x0b, 0x56, 0x11,
+ 0xc0, 0x0b, 0x72, 0x14, 0xc0, 0x0b, 0x9e, 0x07, 0xc0, 0x0b, 0xae, 0x0b,
+ 0xc0, 0x0b, 0xcc, 0x19, 0x40, 0x0c, 0x04, 0x14, 0xc0, 0x0c, 0x14, 0xc2,
+ 0x24, 0xe2, 0x0f, 0xd4, 0x99, 0x06, 0xc0, 0x0c, 0x36, 0x0e, 0xc0, 0x0c,
+ 0x58, 0x17, 0xc0, 0x0c, 0x80, 0xc7, 0x2e, 0x21, 0x01, 0x38, 0x43, 0x00,
+ 0x0c, 0x92, 0x10, 0xc0, 0x0c, 0x96, 0x15, 0xc0, 0x0c, 0xb9, 0x16, 0xc0,
+ 0x0c, 0xcd, 0xc7, 0xc0, 0xa5, 0x01, 0x32, 0x91, 0x44, 0xdf, 0xff, 0xc0,
+ 0x0c, 0xd9, 0x05, 0xc0, 0x0c, 0xfb, 0x12, 0xc0, 0x0d, 0x19, 0xcb, 0x91,
+ 0xe6, 0x01, 0x0a, 0x69, 0x18, 0xc0, 0x0d, 0x27, 0x0f, 0xc0, 0x0d, 0x33,
+ 0xcb, 0x90, 0xff, 0x00, 0x30, 0x59, 0x07, 0xc0, 0x0d, 0x49, 0xc5, 0xd8,
+ 0x44, 0x0f, 0xcf, 0x70, 0x11, 0xc0, 0x0d, 0x55, 0x0e, 0xc0, 0x0d, 0x95,
+ 0x03, 0xc0, 0x0d, 0xa3, 0x0b, 0xc0, 0x0d, 0xd5, 0x07, 0xc0, 0x0e, 0x01,
+ 0x17, 0xc0, 0x0e, 0x2a, 0x14, 0xc0, 0x0e, 0x65, 0x1b, 0xc0, 0x0e, 0x75,
+ 0x49, 0xb4, 0xc7, 0x40, 0x0e, 0x81, 0x11, 0xc0, 0x0e, 0xaf, 0x07, 0xc0,
+ 0x0e, 0xed, 0x0b, 0xc0, 0x0f, 0x22, 0x1b, 0xc0, 0x0f, 0x5b, 0x03, 0xc0,
+ 0x0f, 0x6d, 0xcd, 0x7f, 0x73, 0x01, 0x08, 0xa1, 0xc4, 0x0f, 0x0c, 0x0f,
+ 0xcc, 0xc9, 0x17, 0x40, 0x0f, 0x9a, 0x12, 0xc0, 0x0f, 0xa6, 0x10, 0xc0,
+ 0x0f, 0xc2, 0xc7, 0x57, 0x8b, 0x01, 0x30, 0x13, 0x00, 0x0f, 0xdc, 0xc5,
+ 0x19, 0xdd, 0x01, 0x32, 0x29, 0x48, 0xbe, 0x5a, 0x40, 0x0f, 0xe0, 0x07,
+ 0xc0, 0x0f, 0xec, 0x11, 0xc0, 0x10, 0x10, 0x03, 0xc0, 0x10, 0x3e, 0x0b,
+ 0xc0, 0x10, 0x68, 0x1b, 0xc0, 0x10, 0x92, 0xcb, 0x96, 0x3d, 0x01, 0x05,
+ 0xa1, 0x17, 0x40, 0x10, 0xa2, 0x10, 0xc0, 0x10, 0xb8, 0x42, 0x00, 0x06,
+ 0xc0, 0x10, 0xe4, 0x43, 0x00, 0x89, 0xc0, 0x10, 0xf0, 0x0f, 0xc0, 0x11,
+ 0x00, 0xce, 0x72, 0xc6, 0x0f, 0x9f, 0x71, 0xd3, 0x42, 0xc7, 0x0f, 0xc8,
+ 0xf8, 0x11, 0xc0, 0x11, 0x10, 0x0a, 0xc0, 0x11, 0x2a, 0x0b, 0xc0, 0x11,
+ 0x3f, 0x03, 0xc0, 0x11, 0x5b, 0x07, 0xc0, 0x11, 0x7d, 0x14, 0x40, 0x11,
+ 0x91, 0x0e, 0xc0, 0x11, 0xa1, 0x11, 0xc0, 0x11, 0xba, 0x03, 0xc0, 0x11,
+ 0xe4, 0x14, 0xc0, 0x12, 0x0a, 0x17, 0xc0, 0x12, 0x1c, 0x07, 0xc0, 0x12,
+ 0x32, 0x0b, 0x40, 0x12, 0x46, 0x0b, 0xc0, 0x12, 0x6a, 0x07, 0xc0, 0x12,
+ 0x8b, 0x11, 0xc0, 0x12, 0xbd, 0x03, 0xc0, 0x12, 0xec, 0x17, 0xc0, 0x13,
+ 0x2d, 0x43, 0x15, 0xe9, 0xc0, 0x13, 0x3d, 0x47, 0xca, 0x45, 0x40, 0x13,
+ 0x47, 0x10, 0xc0, 0x13, 0x6b, 0x07, 0xc0, 0x13, 0x77, 0x03, 0xc0, 0x13,
+ 0x84, 0x0a, 0xc0, 0x13, 0xa0, 0x0b, 0xc0, 0x13, 0xbe, 0x11, 0xc0, 0x13,
+ 0xdf, 0xc5, 0xd4, 0x02, 0x01, 0x5f, 0x18, 0x07, 0xc0, 0x13, 0xeb, 0x03,
+ 0xc0, 0x14, 0x20, 0x11, 0xc0, 0x14, 0x4f, 0x56, 0x30, 0x4e, 0xc0, 0x14,
+ 0x74, 0x17, 0xc0, 0x14, 0x8e, 0x45, 0x60, 0x4f, 0xc0, 0x14, 0xa4, 0x43,
+ 0xc2, 0x7e, 0xc0, 0x14, 0xd3, 0x0b, 0x40, 0x14, 0xf9, 0x47, 0xc0, 0xb3,
+ 0xc0, 0x15, 0x05, 0xd3, 0x46, 0x6a, 0x01, 0x19, 0x39, 0xc2, 0x00, 0xbf,
+ 0x01, 0x15, 0xd9, 0xc4, 0xe4, 0x5b, 0x0f, 0xd3, 0xd8, 0x0f, 0xc0, 0x15,
+ 0x11, 0x03, 0xc0, 0x15, 0x1f, 0x09, 0xc0, 0x15, 0x32, 0x1a, 0xc0, 0x15,
+ 0x3c, 0x48, 0xbd, 0x72, 0xc0, 0x15, 0x4a, 0x0e, 0xc0, 0x15, 0x7c, 0x44,
+ 0x00, 0x2d, 0xc0, 0x15, 0x90, 0x10, 0xc0, 0x15, 0x9a, 0xcb, 0x8f, 0xcb,
+ 0x01, 0x1e, 0x79, 0x14, 0xc0, 0x15, 0xb9, 0x42, 0x00, 0xe3, 0xc0, 0x15,
+ 0xcb, 0x15, 0xc0, 0x15, 0xd5, 0x17, 0xc0, 0x15, 0xe1, 0xcc, 0x81, 0xbd,
+ 0x0f, 0xa7, 0x39, 0xcd, 0x76, 0x5c, 0x0f, 0x99, 0x91, 0xc2, 0x0c, 0x43,
+ 0x0f, 0xa2, 0x0b, 0x00, 0x15, 0xed, 0xd0, 0x57, 0xb2, 0x01, 0x70, 0x70,
+ 0x17, 0xc0, 0x15, 0xf7, 0x11, 0xc0, 0x16, 0x13, 0x14, 0xc0, 0x16, 0x2f,
+ 0x07, 0xc0, 0x16, 0x3f, 0x0b, 0xc0, 0x16, 0x62, 0xc4, 0xe0, 0x07, 0x0f,
+ 0xa3, 0xd9, 0x03, 0xc0, 0x16, 0x6c, 0x0e, 0x40, 0x16, 0x78, 0xc5, 0xc8,
+ 0x6f, 0x0f, 0xcd, 0x51, 0x14, 0xc0, 0x16, 0x86, 0x42, 0x02, 0x10, 0xc0,
+ 0x16, 0xa2, 0xc2, 0x09, 0x66, 0x0f, 0xcc, 0x49, 0xc7, 0xc7, 0xf9, 0x0f,
+ 0xb7, 0x11, 0x10, 0xc0, 0x16, 0xae, 0x12, 0xc0, 0x16, 0xc4, 0x0e, 0xc0,
+ 0x16, 0xda, 0x17, 0xc0, 0x16, 0xea, 0x05, 0xc0, 0x16, 0xf4, 0x04, 0xc0,
+ 0x16, 0xfe, 0xc7, 0xb5, 0x83, 0x01, 0x09, 0x31, 0x43, 0x00, 0x5f, 0xc0,
+ 0x17, 0x10, 0x09, 0xc0, 0x17, 0x1a, 0xc8, 0xad, 0x5d, 0x0f, 0xaa, 0x49,
+ 0xce, 0x71, 0x76, 0x0f, 0x9f, 0x11, 0xc3, 0x02, 0x3b, 0x0f, 0x9b, 0x11,
+ 0x9a, 0x0f, 0xa0, 0x11, 0x15, 0xc0, 0x17, 0x26, 0xcb, 0x8a, 0xd6, 0x0f,
+ 0xa2, 0x60, 0xd0, 0x5c, 0x12, 0x0f, 0xc8, 0x81, 0x48, 0xb8, 0x6a, 0xc0,
+ 0x17, 0x32, 0x50, 0x58, 0x72, 0xc0, 0x17, 0x44, 0x4a, 0x17, 0xa1, 0xc0,
+ 0x17, 0x6c, 0x07, 0xc0, 0x17, 0x8c, 0xc5, 0xdc, 0x1d, 0x0f, 0xce, 0xf8,
+ 0x03, 0xc0, 0x17, 0x9e, 0x17, 0xc0, 0x17, 0xb4, 0x11, 0xc0, 0x17, 0xc6,
+ 0xc4, 0xe2, 0x9b, 0x0f, 0xa2, 0xb1, 0xd2, 0x4d, 0x45, 0x0f, 0xcf, 0x48,
+ 0xc6, 0xd1, 0x75, 0x01, 0x35, 0xd9, 0x03, 0xc0, 0x17, 0xd2, 0x46, 0x2c,
+ 0xb4, 0xc0, 0x17, 0xe4, 0xcc, 0x01, 0xbb, 0x00, 0x01, 0x10, 0x0b, 0xc0,
+ 0x17, 0xee, 0x07, 0xc0, 0x17, 0xf8, 0xcb, 0x94, 0xa6, 0x0f, 0xcb, 0x89,
+ 0xc4, 0xe4, 0x2f, 0x0f, 0xd4, 0x00, 0x10, 0xc0, 0x18, 0x0a, 0xc4, 0x26,
+ 0xba, 0x01, 0x37, 0x59, 0x14, 0xc0, 0x18, 0x26, 0x12, 0xc0, 0x18, 0x48,
+ 0x06, 0xc0, 0x18, 0x54, 0x17, 0xc0, 0x18, 0x60, 0x0f, 0xc0, 0x18, 0x6c,
+ 0x0e, 0xc0, 0x18, 0x7b, 0xc4, 0xc7, 0xcb, 0x0f, 0x99, 0xa9, 0x96, 0x0f,
+ 0xa0, 0x42, 0x00, 0x18, 0x87, 0x58, 0x25, 0x43, 0xc0, 0x18, 0x90, 0x48,
+ 0x91, 0xff, 0xc0, 0x18, 0x9a, 0x47, 0x08, 0x5b, 0x40, 0x18, 0xe8, 0x07,
+ 0xc0, 0x19, 0x22, 0x03, 0xc0, 0x19, 0x3c, 0xc4, 0xcc, 0x07, 0x01, 0x37,
+ 0x51, 0x0b, 0xc0, 0x19, 0x50, 0x11, 0xc0, 0x19, 0x71, 0xcc, 0x85, 0x11,
+ 0x0f, 0x9c, 0x20, 0x17, 0xc0, 0x19, 0x83, 0xc2, 0x00, 0x03, 0x0f, 0xcc,
+ 0x01, 0x1b, 0xc0, 0x19, 0x8f, 0x11, 0xc0, 0x19, 0x9b, 0x07, 0xc0, 0x19,
+ 0xb3, 0xc5, 0x72, 0xa4, 0x0f, 0xcc, 0xba, 0x00, 0x19, 0xbf, 0x05, 0xc0,
+ 0x19, 0xc5, 0x0f, 0xc0, 0x19, 0xcf, 0x17, 0xc0, 0x19, 0xe3, 0xc4, 0xe0,
+ 0x13, 0x01, 0x35, 0x81, 0x10, 0xc0, 0x19, 0xf5, 0x14, 0xc0, 0x1a, 0x1b,
+ 0x0e, 0xc0, 0x1a, 0x2d, 0x42, 0x01, 0x25, 0xc0, 0x1a, 0x3c, 0x99, 0x0f,
+ 0xa0, 0x23, 0x00, 0x1a, 0x46, 0x12, 0xc0, 0x1a, 0x4c, 0xc2, 0x00, 0xfe,
+ 0x0f, 0xcf, 0x29, 0xc2, 0x00, 0x74, 0x0f, 0xd4, 0xc8, 0x0b, 0xc0, 0x1a,
+ 0x56, 0x11, 0xc0, 0x1a, 0x62, 0xd1, 0x50, 0xac, 0x01, 0x1c, 0xd1, 0x03,
+ 0x40, 0x1a, 0x7d, 0x42, 0x02, 0xd3, 0xc0, 0x1a, 0x8f, 0xc7, 0xc8, 0xc4,
+ 0x0f, 0x9e, 0xcb, 0x00, 0x1a, 0x99, 0xc4, 0x78, 0xfe, 0x0f, 0x9d, 0x30,
+ 0x42, 0x00, 0x15, 0xc0, 0x1a, 0x9f, 0x48, 0xb9, 0x0a, 0xc0, 0x1a, 0xab,
+ 0x14, 0xc0, 0x1a, 0xbd, 0x12, 0xc0, 0x1a, 0xcb, 0xc7, 0xb3, 0x73, 0x01,
+ 0x10, 0xd9, 0xc6, 0xcc, 0x53, 0x0f, 0xca, 0x91, 0xc9, 0xab, 0x52, 0x0f,
+ 0xcb, 0x48, 0xca, 0xa3, 0x8c, 0x0f, 0xaa, 0x41, 0xc3, 0x20, 0xac, 0x01,
+ 0x35, 0x99, 0x42, 0x00, 0x84, 0xc0, 0x1a, 0xdb, 0x42, 0x01, 0xdd, 0x40,
+ 0x1a, 0xe7, 0x42, 0x05, 0xc0, 0xc0, 0x1a, 0xf3, 0xca, 0xa5, 0x62, 0x01,
+ 0x19, 0x69, 0x47, 0xba, 0x9b, 0xc0, 0x1a, 0xff, 0xc5, 0xdd, 0xfd, 0x0f,
+ 0x98, 0x00, 0x42, 0x00, 0x30, 0xc0, 0x1b, 0x23, 0xc5, 0x65, 0x68, 0x01,
+ 0x18, 0x9b, 0x00, 0x1b, 0x2f, 0xcb, 0x91, 0x20, 0x0f, 0xd5, 0x09, 0x03,
+ 0xc0, 0x1b, 0x35, 0x15, 0xc0, 0x1b, 0x3d, 0x42, 0x02, 0x2f, 0xc0, 0x1b,
+ 0x49, 0xc5, 0xc5, 0x38, 0x01, 0x35, 0xc9, 0x05, 0xc0, 0x1b, 0x59, 0x14,
+ 0xc0, 0x1b, 0x63, 0x07, 0xc0, 0x1b, 0x6f, 0xc3, 0x92, 0x91, 0x01, 0x5f,
+ 0x91, 0xce, 0x6b, 0xaa, 0x01, 0x5f, 0xd9, 0xc4, 0xe0, 0xff, 0x0f, 0xc9,
+ 0x98, 0x10, 0xc0, 0x1b, 0x7b, 0x42, 0x00, 0xbc, 0xc0, 0x1b, 0x8d, 0x1a,
+ 0xc0, 0x1b, 0x99, 0x06, 0xc0, 0x1b, 0xab, 0xd1, 0x51, 0xde, 0x0f, 0xaf,
+ 0xf1, 0x46, 0xc7, 0x36, 0x40, 0x1b, 0xb7, 0x07, 0xc0, 0x1b, 0xc9, 0x03,
+ 0xc0, 0x1b, 0xdb, 0x14, 0xc0, 0x1b, 0xfb, 0x11, 0xc0, 0x1c, 0x09, 0x17,
+ 0xc0, 0x1c, 0x15, 0xca, 0xa1, 0x0c, 0x0f, 0xde, 0x2a, 0x00, 0x1c, 0x27,
+ 0x0e, 0xc0, 0x1c, 0x2b, 0x42, 0x00, 0x33, 0xc0, 0x1c, 0x35, 0x10, 0xc0,
+ 0x1c, 0x41, 0xc6, 0xd0, 0x7f, 0x01, 0x37, 0xa9, 0xc9, 0xb1, 0xe5, 0x01,
+ 0x32, 0x81, 0x16, 0xc0, 0x1c, 0x4d, 0x48, 0x69, 0x46, 0xc0, 0x1c, 0x5c,
+ 0xc7, 0xc6, 0x01, 0x0f, 0x9d, 0xb9, 0xd1, 0x50, 0xdf, 0x0f, 0x9b, 0xb1,
+ 0xc2, 0x00, 0x2c, 0x0f, 0xcb, 0xd9, 0x45, 0x73, 0xa7, 0x40, 0x1c, 0x78,
+ 0x17, 0xc0, 0x1c, 0x84, 0x0b, 0xc0, 0x1c, 0x93, 0xc8, 0xbc, 0xe2, 0x0f,
+ 0xb7, 0xc8, 0x11, 0xc0, 0x1c, 0x9f, 0x07, 0xc0, 0x1c, 0xa7, 0x0b, 0xc0,
+ 0x1c, 0xb7, 0x03, 0x40, 0x1c, 0xc3, 0x14, 0xc0, 0x1c, 0xcf, 0x03, 0xc0,
+ 0x1c, 0xdb, 0x11, 0xc0, 0x1c, 0xf5, 0x0b, 0xc0, 0x1d, 0x19, 0xcd, 0x7f,
+ 0xa7, 0x01, 0x4f, 0x11, 0xc3, 0x2d, 0xa5, 0x0f, 0xa0, 0x88, 0x11, 0xc0,
+ 0x1d, 0x2f, 0x03, 0xc0, 0x1d, 0x3b, 0x14, 0xc0, 0x1d, 0x47, 0xc4, 0xdc,
+ 0xf0, 0x0f, 0x9f, 0x5a, 0x00, 0x1d, 0x5d, 0xcb, 0x90, 0x18, 0x0f, 0xc9,
+ 0x39, 0x42, 0x00, 0x27, 0xc0, 0x1d, 0x63, 0x03, 0x40, 0x1d, 0x7e, 0x17,
+ 0xc0, 0x1d, 0x8a, 0x43, 0x1c, 0x85, 0xc0, 0x1d, 0x96, 0xde, 0x0f, 0x40,
+ 0x0f, 0xa8, 0xe1, 0x46, 0xcf, 0xdd, 0xc0, 0x1d, 0xa8, 0x05, 0xc0, 0x1d,
+ 0xdf, 0x42, 0x00, 0x4b, 0xc0, 0x1d, 0xeb, 0xc6, 0x55, 0xf6, 0x01, 0x06,
+ 0x01, 0x4b, 0x9a, 0x47, 0xc0, 0x1d, 0xfb, 0x46, 0xc9, 0x58, 0x40, 0x1e,
+ 0x07, 0x03, 0xc0, 0x1e, 0x25, 0xc2, 0x02, 0xfb, 0x0f, 0xcc, 0x88, 0x0f,
+ 0xc0, 0x1e, 0x31, 0x10, 0xc0, 0x1e, 0x3d, 0x42, 0x00, 0x2c, 0xc0, 0x1e,
+ 0x49, 0x4b, 0x90, 0xb2, 0x40, 0x1e, 0x55, 0x07, 0xc0, 0x1e, 0x6d, 0x03,
+ 0xc0, 0x1e, 0x7d, 0xcd, 0x77, 0x7a, 0x01, 0x11, 0x13, 0x00, 0x1e, 0x8f,
+ 0x0b, 0xc0, 0x1e, 0x95, 0xd4, 0x3e, 0x58, 0x0f, 0xa5, 0x31, 0x11, 0x40,
+ 0x1e, 0xa4, 0x43, 0x00, 0x67, 0xc0, 0x1e, 0xba, 0x90, 0x01, 0x30, 0x4b,
+ 0x00, 0x1e, 0xca, 0x48, 0xb9, 0xa2, 0xc0, 0x1e, 0xe9, 0xc6, 0xb7, 0x74,
+ 0x01, 0x13, 0xdb, 0x00, 0x1e, 0xfb, 0x42, 0x0e, 0xa6, 0xc0, 0x1e, 0xff,
+ 0x42, 0x15, 0x13, 0xc0, 0x1f, 0x11, 0x15, 0x40, 0x1f, 0x1d, 0x0b, 0xc0,
+ 0x1f, 0x29, 0x03, 0xc0, 0x1f, 0x33, 0xcc, 0x71, 0x94, 0x0f, 0xb5, 0x60,
+ 0xc8, 0xb9, 0x52, 0x01, 0x02, 0x99, 0x03, 0xc0, 0x1f, 0x3f, 0xc5, 0xd4,
+ 0x2a, 0x0f, 0x9e, 0x50, 0x0b, 0xc0, 0x1f, 0x49, 0x11, 0xc0, 0x1f, 0x59,
+ 0x07, 0xc0, 0x1f, 0x75, 0xca, 0x9b, 0xbc, 0x0f, 0xa7, 0xf8, 0x03, 0xc0,
+ 0x1f, 0x94, 0x17, 0x40, 0x1f, 0xa5, 0x10, 0xc0, 0x1f, 0xb8, 0xc2, 0x00,
+ 0x3b, 0x01, 0x36, 0x7b, 0x00, 0x1f, 0xce, 0x15, 0xc0, 0x1f, 0xd4, 0xc7,
+ 0xc7, 0xba, 0x01, 0x16, 0xa3, 0x00, 0x1f, 0xe0, 0x0e, 0xc0, 0x1f, 0xe6,
+ 0x89, 0x0f, 0xa0, 0xb3, 0x00, 0x1f, 0xf6, 0x87, 0x0f, 0xcb, 0x38, 0x42,
+ 0x00, 0xcc, 0xc0, 0x1f, 0xfa, 0x09, 0xc0, 0x20, 0x0a, 0x14, 0xc0, 0x20,
+ 0x17, 0x4a, 0xa6, 0x5c, 0xc0, 0x20, 0x2b, 0x0e, 0xc0, 0x20, 0x50, 0x4b,
+ 0x8e, 0x55, 0xc0, 0x20, 0x5a, 0xc5, 0xdd, 0xda, 0x0f, 0xa7, 0x31, 0xc7,
+ 0x7b, 0xdd, 0x0f, 0xa6, 0x71, 0xc8, 0xb9, 0xba, 0x0f, 0xa1, 0xf1, 0x10,
+ 0x40, 0x20, 0x7c, 0x16, 0xc0, 0x20, 0x88, 0x17, 0xc0, 0x20, 0x98, 0x44,
+ 0x00, 0x28, 0xc0, 0x20, 0xb6, 0x15, 0xc0, 0x20, 0xc0, 0x12, 0xc0, 0x20,
+ 0xd0, 0xcf, 0x66, 0xfc, 0x0f, 0xad, 0x49, 0xcd, 0x79, 0xf7, 0x0f, 0xa7,
+ 0xf1, 0x45, 0x9f, 0x92, 0xc0, 0x20, 0xdc, 0xc4, 0xe4, 0x23, 0x0f, 0xa1,
+ 0x48, 0x14, 0xc0, 0x20, 0xeb, 0x10, 0xc0, 0x21, 0x0e, 0x03, 0xc0, 0x21,
+ 0x2c, 0x15, 0xc0, 0x21, 0x3a, 0xc8, 0xa2, 0x57, 0x0f, 0xb5, 0xb1, 0xc8,
+ 0xbe, 0x6a, 0x0f, 0xcf, 0x59, 0xcc, 0x8a, 0x75, 0x0f, 0xd6, 0x10, 0x44,
+ 0x05, 0x1e, 0xc0, 0x21, 0x46, 0xd8, 0x21, 0xcb, 0x0f, 0xa7, 0x11, 0xc5,
+ 0xc1, 0x02, 0x0f, 0xa6, 0x61, 0x14, 0xc0, 0x21, 0x52, 0xdc, 0x12, 0x71,
+ 0x0f, 0xb5, 0x70, 0x47, 0x34, 0x2f, 0xc0, 0x21, 0x5e, 0x4f, 0x63, 0x87,
+ 0xc0, 0x21, 0x71, 0xd3, 0x45, 0x86, 0x08, 0x5c, 0xd1, 0xcc, 0x45, 0x8d,
+ 0x08, 0x5c, 0xc9, 0x47, 0x02, 0x0e, 0x40, 0x21, 0x7d, 0x49, 0xae, 0x34,
+ 0xc0, 0x21, 0xd8, 0x11, 0xc0, 0x21, 0xe4, 0x03, 0x40, 0x21, 0xf0, 0x18,
+ 0xc0, 0x21, 0xfc, 0xc2, 0x00, 0x29, 0x0f, 0xcc, 0x61, 0x15, 0xc0, 0x22,
+ 0x08, 0x05, 0xc0, 0x22, 0x1a, 0x55, 0x38, 0x15, 0xc0, 0x22, 0x24, 0x0e,
+ 0xc0, 0x22, 0x3c, 0x45, 0x9e, 0xa0, 0xc0, 0x22, 0x4e, 0xce, 0x6b, 0xc6,
+ 0x0f, 0x9f, 0x61, 0xd5, 0x37, 0x82, 0x0f, 0x9e, 0xd1, 0xc9, 0xb3, 0xb9,
+ 0x0f, 0xce, 0x78, 0xc7, 0xc9, 0xc7, 0x0f, 0xd4, 0xa1, 0x44, 0xde, 0xdf,
+ 0xc0, 0x22, 0x60, 0x09, 0xc0, 0x22, 0x6c, 0x18, 0xc0, 0x22, 0x78, 0x46,
+ 0xce, 0x09, 0xc0, 0x22, 0x88, 0x15, 0xc0, 0x22, 0x94, 0x07, 0xc0, 0x22,
+ 0xa4, 0x45, 0x05, 0xbb, 0xc0, 0x22, 0xb0, 0xce, 0x74, 0x40, 0x01, 0x19,
+ 0x89, 0x03, 0xc0, 0x22, 0xbc, 0xd0, 0x5f, 0xe2, 0x01, 0x12, 0x79, 0xc8,
+ 0xb6, 0x42, 0x01, 0x80, 0x18, 0x11, 0xc0, 0x22, 0xc6, 0x03, 0xc0, 0x22,
+ 0xd6, 0xcd, 0x77, 0x39, 0x01, 0x36, 0xd1, 0xc3, 0x00, 0xcb, 0x0f, 0xa2,
+ 0xb9, 0xd2, 0x47, 0x4b, 0x0f, 0xca, 0x08, 0xc2, 0x00, 0x58, 0x0f, 0xcd,
+ 0x21, 0x42, 0x01, 0x48, 0xc0, 0x22, 0xeb, 0x4a, 0xa7, 0x42, 0xc0, 0x22,
+ 0xfb, 0x17, 0xc0, 0x23, 0x07, 0x16, 0xc0, 0x23, 0x13, 0x89, 0x0f, 0xa0,
+ 0xab, 0x00, 0x23, 0x1d, 0x47, 0x73, 0x7e, 0xc0, 0x23, 0x29, 0xc7, 0xae,
+ 0xcf, 0x01, 0x05, 0x59, 0xc6, 0xb9, 0xb4, 0x0f, 0xae, 0x73, 0x00, 0x23,
+ 0x4d, 0xcb, 0x95, 0x14, 0x0f, 0xaa, 0x51, 0x0e, 0xc0, 0x23, 0x53, 0xc2,
+ 0x00, 0xbf, 0x0f, 0xb5, 0x51, 0xd2, 0x49, 0x8b, 0x0f, 0xb5, 0x78, 0x47,
+ 0xc6, 0xe1, 0xc0, 0x23, 0x5f, 0xc6, 0xcb, 0xab, 0x0f, 0xca, 0xf9, 0xc2,
+ 0x00, 0x3b, 0x0f, 0xcc, 0x30, 0x42, 0x01, 0xe2, 0xc0, 0x23, 0x83, 0x44,
+ 0x39, 0x86, 0xc0, 0x23, 0x8d, 0xca, 0xa5, 0x44, 0x01, 0x09, 0xc1, 0xc4,
+ 0xce, 0x23, 0x01, 0x01, 0x03, 0x00, 0x23, 0x99, 0x10, 0xc0, 0x23, 0x9d,
+ 0xce, 0x61, 0x03, 0x00, 0x00, 0x80, 0x18, 0xc0, 0x23, 0xa9, 0x15, 0xc0,
+ 0x23, 0xb5, 0x05, 0xc0, 0x23, 0xc1, 0x45, 0x75, 0x61, 0xc0, 0x23, 0xd9,
+ 0xcc, 0x86, 0xd9, 0x01, 0x01, 0xd9, 0xcd, 0x7c, 0x74, 0x0f, 0x9c, 0xb9,
+ 0x42, 0x00, 0xa9, 0xc0, 0x23, 0xeb, 0x42, 0x04, 0x2b, 0xc0, 0x23, 0xf7,
+ 0x45, 0xdc, 0xc7, 0xc0, 0x24, 0x03, 0xcb, 0x4f, 0x1a, 0x0f, 0xb0, 0x61,
+ 0xd3, 0x1c, 0x59, 0x07, 0xff, 0xe8, 0x43, 0x00, 0x2e, 0xc0, 0x24, 0x19,
+ 0xc2, 0x00, 0x75, 0x0f, 0xa4, 0x6b, 0x00, 0x24, 0x2d, 0xc4, 0x7c, 0x7d,
+ 0x0f, 0x9c, 0x03, 0x00, 0x24, 0x3d, 0x43, 0x00, 0x89, 0xc0, 0x24, 0x43,
+ 0x57, 0x27, 0x2f, 0xc0, 0x24, 0x4f, 0xc7, 0x44, 0xfa, 0x07, 0xef, 0xe1,
+ 0xc3, 0x01, 0x09, 0x0f, 0xca, 0x30, 0xc2, 0x00, 0x3b, 0x0f, 0xd5, 0x43,
+ 0x00, 0x24, 0x5b, 0x42, 0x02, 0xa7, 0xc0, 0x24, 0x61, 0xc8, 0xb6, 0xba,
+ 0x0f, 0xc8, 0xb1, 0x43, 0x0d, 0x05, 0xc0, 0x24, 0x71, 0x46, 0x1c, 0xa1,
+ 0xc0, 0x24, 0x7b, 0x44, 0x12, 0xb8, 0xc0, 0x24, 0x99, 0xd2, 0x49, 0x1f,
+ 0x0f, 0x9b, 0x01, 0xc2, 0x00, 0x40, 0x0f, 0x99, 0xcb, 0x00, 0x24, 0xbf,
+ 0xc5, 0xde, 0x39, 0x0f, 0xa0, 0x99, 0xc5, 0xd9, 0x2a, 0x0f, 0xb5, 0x18,
+ 0xc3, 0xe5, 0x57, 0x0f, 0xd4, 0x91, 0x0b, 0xc0, 0x24, 0xc5, 0x42, 0x01,
+ 0xdd, 0xc0, 0x24, 0xd8, 0x96, 0x0f, 0xa0, 0x03, 0x00, 0x24, 0xe5, 0x05,
+ 0xc0, 0x24, 0xeb, 0xc4, 0xb0, 0x4f, 0x0f, 0xa0, 0x3b, 0x00, 0x24, 0xf7,
+ 0x8f, 0x0f, 0xa0, 0x78, 0xc8, 0xbe, 0xb2, 0x01, 0x05, 0xe9, 0xc8, 0x76,
+ 0x54, 0x01, 0x05, 0x41, 0x43, 0x5d, 0xc0, 0xc0, 0x24, 0xfd, 0x10, 0xc0,
+ 0x25, 0x0f, 0xcc, 0x89, 0x49, 0x0f, 0x9e, 0x49, 0xca, 0xa7, 0xba, 0x01,
+ 0x4f, 0xa1, 0x5a, 0x19, 0xae, 0x40, 0x25, 0x19, 0x51, 0x50, 0x8a, 0xc0,
+ 0x25, 0x3d, 0x42, 0x02, 0x32, 0xc0, 0x25, 0x7c, 0xc5, 0xda, 0x74, 0x0f,
+ 0xce, 0xd8, 0x14, 0xc0, 0x25, 0x9a, 0xc3, 0x0e, 0x6a, 0x01, 0x35, 0xb1,
+ 0x44, 0x02, 0x27, 0xc0, 0x25, 0xac, 0xd5, 0x34, 0x10, 0x01, 0x51, 0x78,
+ 0x07, 0xc0, 0x25, 0xb8, 0xca, 0x89, 0x7b, 0x01, 0x38, 0x61, 0xc3, 0x14,
+ 0x45, 0x01, 0x32, 0x69, 0x43, 0x1c, 0x87, 0xc0, 0x25, 0xc4, 0xcc, 0x86,
+ 0x79, 0x0f, 0xa7, 0x99, 0xc4, 0x87, 0x8b, 0x0f, 0x9d, 0xd9, 0x47, 0xc1,
+ 0xe0, 0x40, 0x25, 0xce, 0x0e, 0xc0, 0x25, 0xda, 0xd0, 0x59, 0xb2, 0x0f,
+ 0xdd, 0xd8, 0x4d, 0x7b, 0x70, 0xc0, 0x25, 0xec, 0xc5, 0xdc, 0x63, 0x01,
+ 0x5f, 0x30, 0x09, 0xc0, 0x26, 0x06, 0xc2, 0x07, 0x49, 0x0f, 0xb4, 0xa9,
+ 0x49, 0xa7, 0x9d, 0xc0, 0x26, 0x16, 0x10, 0xc0, 0x26, 0x22, 0x0f, 0xc0,
+ 0x26, 0x2c, 0x43, 0x26, 0x1e, 0xc0, 0x26, 0x38, 0xc4, 0xde, 0xd3, 0x01,
+ 0x32, 0x49, 0x0d, 0xc0, 0x26, 0x44, 0x42, 0x02, 0x32, 0xc0, 0x26, 0x50,
+ 0xda, 0x1b, 0x82, 0x0f, 0x9e, 0x99, 0xc2, 0x00, 0x99, 0x0f, 0x99, 0x70,
+ 0xc3, 0xe5, 0x18, 0x0f, 0xcc, 0xb1, 0xc5, 0x46, 0xcd, 0x0f, 0xa2, 0xa8,
+ 0x14, 0xc0, 0x26, 0x62, 0xc9, 0xb2, 0x90, 0x01, 0x05, 0x71, 0xc3, 0x17,
+ 0x93, 0x0f, 0x99, 0xb9, 0xcb, 0x8e, 0xb8, 0x0f, 0xca, 0x18, 0x43, 0x02,
+ 0xdf, 0xc0, 0x26, 0x72, 0x0b, 0xc0, 0x26, 0x7a, 0x11, 0xc0, 0x26, 0x84,
+ 0x17, 0xc0, 0x26, 0x90, 0x42, 0x00, 0x29, 0xc0, 0x26, 0x9c, 0x03, 0x40,
+ 0x26, 0xa6, 0xc4, 0xbc, 0xf7, 0x0f, 0xb5, 0xe9, 0x42, 0x00, 0x7f, 0xc0,
+ 0x26, 0xb2, 0x16, 0xc0, 0x26, 0xe8, 0xc9, 0xac, 0x60, 0x0f, 0xaf, 0xe1,
+ 0x57, 0x29, 0x12, 0xc0, 0x26, 0xf4, 0xc4, 0x32, 0xd0, 0x0f, 0x9a, 0x29,
+ 0xc4, 0x5a, 0xfe, 0x0f, 0xa2, 0x29, 0x11, 0x40, 0x27, 0x00, 0x03, 0xc0,
+ 0x27, 0x0f, 0x0b, 0xc0, 0x27, 0x2c, 0x17, 0xc0, 0x27, 0x4a, 0x11, 0x40,
+ 0x27, 0x57, 0x4c, 0x89, 0xf1, 0xc0, 0x27, 0x64, 0x03, 0xc0, 0x27, 0xc4,
+ 0x0e, 0xc0, 0x27, 0xd4, 0x10, 0xc0, 0x27, 0xde, 0xc7, 0xc9, 0x81, 0x0f,
+ 0xcf, 0x51, 0xc8, 0xb9, 0x22, 0x0f, 0xcf, 0xc0, 0x09, 0xc0, 0x27, 0xee,
+ 0x42, 0x00, 0x4e, 0xc0, 0x27, 0xfd, 0xc3, 0x18, 0xb3, 0x00, 0x03, 0xf3,
+ 0x00, 0x28, 0x09, 0x14, 0xc0, 0x28, 0x0d, 0xc2, 0x16, 0x59, 0x01, 0x4f,
+ 0xf3, 0x00, 0x28, 0x1f, 0xc4, 0x00, 0x3b, 0x0f, 0x9d, 0x59, 0xcf, 0x65,
+ 0x3a, 0x01, 0x4e, 0xe9, 0x46, 0xce, 0x3f, 0xc0, 0x28, 0x25, 0x47, 0xc6,
+ 0x39, 0x40, 0x28, 0x54, 0xd7, 0x22, 0x44, 0x01, 0x39, 0xc9, 0x11, 0xc0,
+ 0x28, 0x6c, 0xd7, 0x27, 0x18, 0x0f, 0xa8, 0x00, 0x43, 0x01, 0xa4, 0xc0,
+ 0x28, 0x76, 0xc3, 0x91, 0xe8, 0x01, 0x32, 0x41, 0x85, 0x01, 0x18, 0x91,
+ 0x44, 0x02, 0x8b, 0xc0, 0x28, 0x82, 0x47, 0x2d, 0x4e, 0xc0, 0x28, 0x8c,
+ 0x42, 0x00, 0x43, 0x40, 0x28, 0xbc, 0xce, 0x75, 0x4a, 0x0f, 0xd3, 0xc9,
+ 0xc8, 0xbf, 0x9a, 0x01, 0x31, 0x61, 0xd6, 0x2f, 0x46, 0x01, 0x08, 0x09,
+ 0x0f, 0xc0, 0x28, 0xc8, 0xc3, 0x1f, 0x19, 0x0f, 0xce, 0x89, 0x44, 0x0d,
+ 0xff, 0x40, 0x28, 0xd4, 0x54, 0x3e, 0x94, 0xc0, 0x29, 0x06, 0x46, 0x0c,
+ 0x8e, 0xc0, 0x29, 0x6a, 0x07, 0xc0, 0x29, 0x76, 0xc9, 0xb3, 0x44, 0x01,
+ 0x1f, 0x81, 0x42, 0x00, 0xe6, 0xc0, 0x29, 0x88, 0x4b, 0x66, 0xd0, 0xc0,
+ 0x29, 0x94, 0xcb, 0x91, 0xaf, 0x0f, 0xa3, 0xf0, 0x42, 0x00, 0xf1, 0xc0,
+ 0x29, 0xa3, 0xca, 0x9c, 0xca, 0x01, 0x05, 0x99, 0xc7, 0xc6, 0xb0, 0x0f,
+ 0x9a, 0x30, 0x00, 0x40, 0x29, 0xad, 0x43, 0x10, 0x73, 0xc0, 0x29, 0xb9,
+ 0x96, 0x0f, 0xa0, 0xe3, 0x00, 0x29, 0xc5, 0xca, 0xa4, 0xc2, 0x01, 0x3e,
+ 0x89, 0xc4, 0xca, 0xcf, 0x01, 0x34, 0x99, 0xc2, 0x06, 0x46, 0x01, 0x31,
+ 0x29, 0x09, 0x40, 0x29, 0xd1, 0x16, 0xc0, 0x29, 0xf2, 0x05, 0xc0, 0x2a,
+ 0x02, 0xc7, 0x5a, 0x55, 0x01, 0x15, 0x31, 0xd5, 0x2b, 0xc1, 0x01, 0x12,
+ 0x18, 0xc9, 0xad, 0x5c, 0x01, 0x34, 0xd9, 0xcb, 0x8f, 0x26, 0x0f, 0xa2,
+ 0xf8, 0x47, 0x02, 0x0e, 0xc0, 0x2a, 0x0e, 0x15, 0xc0, 0x2a, 0x55, 0x48,
+ 0xa3, 0x64, 0xc0, 0x2a, 0x61, 0x46, 0x09, 0x97, 0xc0, 0x2a, 0x6d, 0x4b,
+ 0x6f, 0xc7, 0xc0, 0x2a, 0x91, 0x56, 0x30, 0x90, 0x40, 0x2a, 0xae, 0xc8,
+ 0xbc, 0xb2, 0x01, 0x1f, 0x31, 0x42, 0x00, 0x99, 0xc0, 0x2a, 0xb8, 0x47,
+ 0xc2, 0xd5, 0xc0, 0x2a, 0xc4, 0xc9, 0x49, 0x4c, 0x00, 0x00, 0x31, 0x45,
+ 0x31, 0xf0, 0x40, 0x2a, 0xd0, 0x54, 0x3e, 0x80, 0xc0, 0x2a, 0xdc, 0x12,
+ 0xc0, 0x2b, 0x38, 0x11, 0x40, 0x2b, 0x44, 0x46, 0xd0, 0x6d, 0xc0, 0x2b,
+ 0x50, 0xc5, 0xdd, 0x8f, 0x0f, 0xca, 0x88, 0xcf, 0x65, 0xb2, 0x0f, 0x9e,
+ 0x41, 0xd7, 0x26, 0x49, 0x01, 0x51, 0xf9, 0x12, 0xc0, 0x2b, 0x5c, 0xc7,
+ 0xc5, 0x67, 0x0f, 0xb4, 0x88, 0xcc, 0x88, 0x35, 0x0f, 0xb5, 0x09, 0x45,
+ 0xd7, 0x72, 0x40, 0x2b, 0x68, 0x1a, 0xc0, 0x2b, 0x8a, 0x43, 0x1d, 0xbb,
+ 0xc0, 0x2b, 0x96, 0x42, 0x02, 0x10, 0xc0, 0x2b, 0xb2, 0x19, 0xc0, 0x2b,
+ 0xbe, 0x9b, 0x0f, 0xa3, 0x33, 0x00, 0x2b, 0xd1, 0x11, 0xc0, 0x2b, 0xd7,
+ 0xc2, 0x00, 0x50, 0x0f, 0xa5, 0x19, 0xc5, 0xdc, 0x8b, 0x0f, 0xa4, 0x83,
+ 0x00, 0x2b, 0xe4, 0xc2, 0x00, 0xb1, 0x0f, 0xa0, 0xb9, 0xc2, 0x02, 0x6f,
+ 0x0f, 0xcd, 0xa1, 0x47, 0xc9, 0xdc, 0x40, 0x2b, 0xea, 0x11, 0xc0, 0x2b,
+ 0xf6, 0x03, 0xc0, 0x2c, 0x08, 0x42, 0x0f, 0xe1, 0x40, 0x2c, 0x14, 0x10,
+ 0xc0, 0x2c, 0x1e, 0x0e, 0xc0, 0x2c, 0x31, 0x15, 0xc0, 0x2c, 0x3b, 0x06,
+ 0xc0, 0x2c, 0x50, 0xc2, 0x07, 0xb8, 0x0f, 0xa3, 0xb3, 0x00, 0x2c, 0x5c,
+ 0x44, 0x82, 0x11, 0xc0, 0x2c, 0x60, 0x05, 0xc0, 0x2c, 0x84, 0x96, 0x0f,
+ 0xcc, 0x3b, 0x00, 0x2c, 0x94, 0x14, 0xc0, 0x2c, 0xa7, 0x09, 0x40, 0x2c,
+ 0xb1, 0xc3, 0x18, 0x91, 0x0f, 0xcd, 0x61, 0xcc, 0x8a, 0x81, 0x01, 0x31,
+ 0x19, 0x16, 0xc0, 0x2c, 0xc3, 0xc4, 0x56, 0x1d, 0x0f, 0xa2, 0xc9, 0x42,
+ 0x02, 0xa7, 0xc0, 0x2c, 0xcf, 0x14, 0xc0, 0x2c, 0xdb, 0x42, 0x00, 0x76,
+ 0xc0, 0x2c, 0xe5, 0x44, 0x1f, 0x3c, 0x40, 0x2c, 0xf1, 0x03, 0xc0, 0x2c,
+ 0xfb, 0x10, 0xc0, 0x2d, 0x1d, 0xc2, 0x02, 0xa7, 0x0f, 0xa8, 0xa3, 0x00,
+ 0x2d, 0x30, 0x16, 0xc0, 0x2d, 0x3a, 0xc5, 0xdc, 0x95, 0x01, 0x11, 0xa9,
+ 0x07, 0xc0, 0x2d, 0x46, 0x86, 0x0f, 0xb6, 0x79, 0xca, 0x9e, 0x1e, 0x0f,
+ 0xce, 0x18, 0xc4, 0x02, 0x10, 0x0f, 0xce, 0x43, 0x00, 0x2d, 0x52, 0x95,
+ 0x0f, 0xb4, 0x63, 0x00, 0x2d, 0x58, 0x42, 0x02, 0xa7, 0xc0, 0x2d, 0x62,
+ 0x89, 0x0f, 0xa0, 0xdb, 0x00, 0x2d, 0x7a, 0x44, 0xdf, 0xb3, 0xc0, 0x2d,
+ 0x80, 0xd3, 0x46, 0x1e, 0x0f, 0x9e, 0xb9, 0x44, 0x6f, 0xbf, 0xc0, 0x2d,
+ 0x8c, 0xc4, 0x00, 0x3b, 0x0f, 0xd5, 0x19, 0xc5, 0xdc, 0x4f, 0x0f, 0x99,
+ 0x78, 0x0b, 0xc0, 0x2d, 0x96, 0x03, 0xc0, 0x2d, 0xa6, 0x11, 0xc0, 0x2d,
+ 0xb0, 0x07, 0x40, 0x2d, 0xc8, 0x57, 0x2a, 0x54, 0xc0, 0x2d, 0xd2, 0xcd,
+ 0x7c, 0xe9, 0x07, 0xf7, 0xf8, 0xd2, 0x4b, 0x4d, 0x08, 0xe3, 0x61, 0x47,
+ 0x34, 0x2f, 0xc0, 0x2e, 0x26, 0x06, 0xc0, 0x2e, 0x4a, 0x4b, 0x93, 0x30,
+ 0xc0, 0x2e, 0x5c, 0xce, 0x73, 0x1a, 0x08, 0xe2, 0x19, 0x45, 0x00, 0xba,
+ 0xc0, 0x2e, 0x64, 0x4b, 0x6f, 0xc7, 0xc0, 0x2e, 0x74, 0x47, 0x02, 0x0e,
+ 0x40, 0x2e, 0x94, 0x19, 0xc0, 0x2e, 0xfb, 0x43, 0x00, 0x75, 0xc0, 0x2f,
+ 0x05, 0xc5, 0x0a, 0xe2, 0x01, 0x2e, 0x53, 0x00, 0x2f, 0x15, 0x46, 0x19,
+ 0xbb, 0xc0, 0x2f, 0x1b, 0xc2, 0x00, 0x3b, 0x0f, 0xa8, 0x93, 0x00, 0x2f,
+ 0x2d, 0x43, 0x00, 0xc7, 0xc0, 0x2f, 0x39, 0xc6, 0xcf, 0xbf, 0x0f, 0x9b,
+ 0x69, 0xd0, 0x5c, 0xb2, 0x0f, 0xb1, 0x69, 0xc5, 0xd5, 0x01, 0x0f, 0xcc,
+ 0xf1, 0x16, 0x40, 0x2f, 0x45, 0x42, 0x00, 0x4b, 0xc0, 0x2f, 0x51, 0x42,
+ 0x0f, 0x9b, 0xc0, 0x2f, 0x5f, 0x91, 0x01, 0x32, 0x63, 0x00, 0x2f, 0x6b,
+ 0x48, 0x00, 0xcc, 0xc0, 0x2f, 0x71, 0x45, 0xd4, 0x43, 0xc0, 0x2f, 0x9a,
+ 0xc4, 0xe2, 0xa3, 0x0f, 0xa6, 0x91, 0xca, 0x9a, 0xae, 0x0f, 0x9c, 0xd1,
+ 0xc3, 0x13, 0x35, 0x0f, 0x9a, 0x59, 0x89, 0x0f, 0xcd, 0xa8, 0xc7, 0xca,
+ 0x3e, 0x0f, 0xcc, 0x09, 0x09, 0xc0, 0x2f, 0xbc, 0x43, 0x1b, 0x67, 0xc0,
+ 0x2f, 0xc8, 0xc3, 0x00, 0x38, 0x01, 0x32, 0x71, 0xd1, 0x52, 0xee, 0x01,
+ 0x05, 0xb1, 0xc7, 0x77, 0xc1, 0x01, 0x05, 0x21, 0x10, 0xc0, 0x2f, 0xd4,
+ 0x0f, 0xc0, 0x2f, 0xdc, 0xc2, 0x10, 0x3f, 0x0f, 0xaf, 0x13, 0x00, 0x2f,
+ 0xe8, 0xc4, 0x8a, 0x84, 0x0f, 0xcc, 0x70, 0xc8, 0x21, 0xfb, 0x0f, 0xc9,
+ 0x29, 0x45, 0x5b, 0x53, 0xc0, 0x2f, 0xee, 0x4c, 0x8c, 0x61, 0x40, 0x2f,
+ 0xfa, 0x14, 0xc0, 0x30, 0x63, 0x44, 0x0b, 0x13, 0xc0, 0x30, 0x6f, 0xca,
+ 0xa4, 0x54, 0x70, 0x00, 0x09, 0xcf, 0x68, 0xfa, 0x01, 0x31, 0xf3, 0x00,
+ 0x30, 0x83, 0x04, 0xc0, 0x30, 0x87, 0x06, 0xc0, 0x30, 0x93, 0xd5, 0x34,
+ 0x4f, 0x0f, 0xca, 0x69, 0x42, 0x01, 0x7c, 0x40, 0x30, 0x9f, 0xc5, 0xcf,
+ 0x36, 0x0f, 0xcf, 0x99, 0xc3, 0x0c, 0xa5, 0x0f, 0xd6, 0x08, 0x44, 0x00,
+ 0x67, 0xc0, 0x30, 0xd9, 0x46, 0x01, 0x4a, 0xc0, 0x31, 0x0d, 0x4a, 0x01,
+ 0xa9, 0xc0, 0x31, 0x4b, 0xce, 0x72, 0xb8, 0x0f, 0xb2, 0x19, 0x00, 0x40,
+ 0x31, 0x69, 0x0b, 0xc0, 0x31, 0x90, 0xda, 0x1c, 0x6c, 0x01, 0x35, 0x79,
+ 0x06, 0xc0, 0x31, 0xa9, 0xcb, 0x96, 0x1c, 0x0f, 0xb0, 0x91, 0xce, 0x6e,
+ 0xc8, 0x01, 0x5e, 0x88, 0x00, 0x40, 0x31, 0xb5, 0x47, 0x02, 0x0e, 0xc0,
+ 0x31, 0xc1, 0xcc, 0x1d, 0xc7, 0x08, 0x1c, 0xf8, 0x03, 0xc0, 0x32, 0x24,
+ 0x0e, 0xc0, 0x32, 0x32, 0x50, 0x5b, 0xb2, 0xc0, 0x32, 0x42, 0x14, 0xc0,
+ 0x32, 0x84, 0x45, 0xd4, 0x0c, 0xc0, 0x32, 0x8e, 0xc6, 0xcb, 0x57, 0x0f,
+ 0xcc, 0xa1, 0x4b, 0x8d, 0x8f, 0x40, 0x32, 0xa8, 0x14, 0xc0, 0x33, 0x00,
+ 0x16, 0xc0, 0x33, 0x0f, 0x17, 0xc0, 0x33, 0x19, 0xc8, 0x6b, 0xf0, 0x01,
+ 0x11, 0xd9, 0x0e, 0xc0, 0x33, 0x2b, 0xc3, 0x6b, 0x12, 0x0f, 0xa9, 0x51,
+ 0xc6, 0xd1, 0x6f, 0x0f, 0x9f, 0x29, 0x43, 0x6e, 0xfe, 0xc0, 0x33, 0x38,
+ 0xc2, 0x01, 0x25, 0x0f, 0xd4, 0xe8, 0x0f, 0xc0, 0x33, 0x44, 0x10, 0xc0,
+ 0x33, 0x57, 0x42, 0x01, 0x29, 0xc0, 0x33, 0x6b, 0xc7, 0xc4, 0xcd, 0x0f,
+ 0xad, 0xa1, 0x16, 0xc0, 0x33, 0x77, 0xdb, 0x18, 0x8a, 0x0f, 0xb2, 0x59,
+ 0xc3, 0x23, 0x1b, 0x01, 0x5f, 0x09, 0x48, 0xbc, 0x42, 0x40, 0x33, 0x83,
+ 0x42, 0x00, 0x09, 0xc0, 0x33, 0xbf, 0x47, 0x0d, 0xdb, 0xc0, 0x33, 0xc7,
+ 0xcb, 0x93, 0x46, 0x01, 0x37, 0x61, 0xc6, 0xcd, 0x5b, 0x0f, 0x99, 0xd1,
+ 0xca, 0xa4, 0x2c, 0x0f, 0xb6, 0xa9, 0xc9, 0xac, 0xf9, 0x0f, 0xcb, 0xf1,
+ 0xca, 0x9f, 0x40, 0x0f, 0xcc, 0xd8, 0xcf, 0x68, 0xdc, 0x01, 0x1c, 0x71,
+ 0x12, 0xc0, 0x33, 0xdf, 0xc4, 0xe0, 0x5b, 0x01, 0x5e, 0xd0, 0xd3, 0x40,
+ 0x67, 0x0f, 0xa5, 0x79, 0xc9, 0x8c, 0x04, 0x0f, 0xb1, 0x79, 0x96, 0x0f,
+ 0xb6, 0xb1, 0xca, 0x9e, 0xdc, 0x0f, 0xc8, 0xb8, 0x18, 0xc0, 0x33, 0xee,
+ 0x4f, 0x61, 0x20, 0xc0, 0x33, 0xfa, 0x42, 0x00, 0xac, 0xc0, 0x34, 0x0c,
+ 0x15, 0xc0, 0x34, 0x19, 0x08, 0xc0, 0x34, 0x25, 0x05, 0xc0, 0x34, 0x34,
+ 0x06, 0xc0, 0x34, 0x40, 0x46, 0xd2, 0x65, 0xc0, 0x34, 0x4d, 0xc8, 0xb6,
+ 0x1a, 0x0f, 0xa7, 0x28, 0x43, 0x01, 0xad, 0xc0, 0x34, 0x59, 0x49, 0x1c,
+ 0x89, 0x40, 0x34, 0x65, 0xc5, 0xdb, 0x41, 0x01, 0x37, 0xc1, 0xd5, 0x33,
+ 0xbc, 0x0f, 0x9e, 0x91, 0x05, 0x40, 0x34, 0xaf, 0xc6, 0x3c, 0x52, 0x01,
+ 0x15, 0xbb, 0x00, 0x34, 0xbb, 0x92, 0x0f, 0xa3, 0xfa, 0x00, 0x34, 0xc1,
+ 0x14, 0xc0, 0x34, 0xc7, 0xc6, 0x08, 0xea, 0x01, 0x05, 0x49, 0x0f, 0xc0,
+ 0x34, 0xdd, 0xc7, 0xbf, 0xe8, 0x0f, 0xa1, 0xd1, 0xc2, 0x00, 0x6c, 0x0f,
+ 0xd5, 0xa8, 0x43, 0x01, 0xfe, 0xc0, 0x34, 0xec, 0xc3, 0x0e, 0x66, 0x0f,
+ 0xb6, 0xf3, 0x00, 0x34, 0xf6, 0xc3, 0x04, 0x85, 0x0f, 0xa0, 0x58, 0x4a,
+ 0x15, 0x7c, 0xc0, 0x35, 0x02, 0xcc, 0x87, 0xb1, 0x0f, 0xad, 0x71, 0x10,
+ 0xc0, 0x35, 0x26, 0xcb, 0x91, 0xd0, 0x0f, 0xca, 0x01, 0xd2, 0x47, 0x39,
+ 0x01, 0x71, 0xf0, 0x16, 0xc0, 0x35, 0x36, 0x10, 0xc0, 0x35, 0x42, 0x14,
+ 0xc0, 0x35, 0x4e, 0x18, 0xc0, 0x35, 0x5a, 0xc9, 0xac, 0x72, 0x0f, 0xae,
+ 0x89, 0x45, 0xd7, 0x90, 0xc0, 0x35, 0x6c, 0xc4, 0x7f, 0xa8, 0x0f, 0xce,
+ 0x38, 0x06, 0xc0, 0x35, 0x78, 0xcf, 0x68, 0xeb, 0x01, 0x33, 0x81, 0x0b,
+ 0xc0, 0x35, 0x84, 0x44, 0x14, 0x97, 0x40, 0x35, 0x90, 0xca, 0x93, 0xd6,
+ 0x01, 0x38, 0x69, 0x07, 0xc0, 0x35, 0x9c, 0xcd, 0x75, 0x72, 0x0f, 0x9c,
+ 0x08, 0x9b, 0x0f, 0xd5, 0x83, 0x00, 0x35, 0xae, 0x03, 0xc0, 0x35, 0xb4,
+ 0x11, 0xc0, 0x35, 0xc4, 0x07, 0xc0, 0x35, 0xd9, 0xca, 0xa0, 0xc6, 0x0f,
+ 0xb1, 0x98, 0xc6, 0xd1, 0x7b, 0x0f, 0xcc, 0x51, 0x17, 0xc0, 0x35, 0xe5,
+ 0x14, 0xc0, 0x35, 0xef, 0xc2, 0x01, 0xbb, 0x0f, 0xcd, 0xb3, 0x00, 0x36,
+ 0x0b, 0xc4, 0x18, 0xb3, 0x0f, 0xae, 0x01, 0x89, 0x0f, 0x99, 0x5b, 0x00,
+ 0x36, 0x11, 0xc4, 0xe3, 0xc3, 0x0f, 0xd6, 0xa8, 0x05, 0xc0, 0x36, 0x17,
+ 0x42, 0x01, 0x0c, 0xc0, 0x36, 0x29, 0x0e, 0xc0, 0x36, 0x35, 0xca, 0x9c,
+ 0x0c, 0x01, 0x31, 0x59, 0xce, 0x73, 0xd0, 0x0f, 0x9c, 0x29, 0xc3, 0xd3,
+ 0x0e, 0x0f, 0xce, 0xd1, 0xc4, 0xd2, 0xb5, 0x0f, 0xa3, 0x50, 0x07, 0xc0,
+ 0x36, 0x3f, 0x11, 0xc0, 0x36, 0x4b, 0x03, 0xc0, 0x36, 0x60, 0xca, 0x9f,
+ 0x54, 0x0f, 0x9b, 0x20, 0x42, 0x02, 0xa7, 0xc0, 0x36, 0x6c, 0xc7, 0xc0,
+ 0x20, 0x01, 0x37, 0xe9, 0x10, 0xc0, 0x36, 0x76, 0xc2, 0x00, 0x40, 0x01,
+ 0x1e, 0xd8, 0x42, 0x01, 0xa3, 0xc0, 0x36, 0x82, 0x0f, 0xc0, 0x36, 0x8c,
+ 0x03, 0xc0, 0x36, 0x98, 0xc4, 0xe3, 0x9b, 0x0f, 0xc9, 0xd0, 0x14, 0xc0,
+ 0x36, 0xa4, 0x15, 0xc0, 0x36, 0xb1, 0x47, 0xc0, 0x0b, 0xc0, 0x36, 0xbe,
+ 0x45, 0xd5, 0xd3, 0xc0, 0x36, 0xca, 0x0e, 0xc0, 0x36, 0xd6, 0xd9, 0x1e,
+ 0xe6, 0x0f, 0x9e, 0x89, 0xd2, 0x4b, 0xb9, 0x01, 0x50, 0x68, 0xc4, 0xde,
+ 0x8b, 0x0f, 0xd4, 0xf3, 0x00, 0x36, 0xe2, 0x0e, 0xc0, 0x36, 0xe8, 0x43,
+ 0x6c, 0xc3, 0xc0, 0x36, 0xfa, 0x42, 0x07, 0x2f, 0xc0, 0x37, 0x12, 0x06,
+ 0xc0, 0x37, 0x1a, 0x10, 0x40, 0x37, 0x26, 0x49, 0xb3, 0x68, 0xc0, 0x37,
+ 0x34, 0x06, 0xc0, 0x37, 0x40, 0x42, 0x01, 0x1b, 0xc0, 0x37, 0x4a, 0x10,
+ 0xc0, 0x37, 0x54, 0x14, 0xc0, 0x37, 0x66, 0x03, 0xc0, 0x37, 0x78, 0x4b,
+ 0x93, 0x72, 0xc0, 0x37, 0x84, 0xc2, 0x00, 0xa2, 0x0f, 0xa6, 0xe9, 0x0e,
+ 0xc0, 0x37, 0xa8, 0xcd, 0x78, 0x3d, 0x00, 0x04, 0xa8, 0x16, 0xc0, 0x37,
+ 0xb4, 0x17, 0xc0, 0x37, 0xc0, 0x10, 0xc0, 0x37, 0xd5, 0x06, 0xc0, 0x37,
+ 0xee, 0xc3, 0x87, 0x43, 0x0f, 0xaf, 0xf9, 0x11, 0xc0, 0x37, 0xfc, 0x43,
+ 0x0b, 0x09, 0xc0, 0x38, 0x08, 0xca, 0x46, 0x99, 0x0f, 0xa7, 0x8b, 0x00,
+ 0x38, 0x12, 0xca, 0xa0, 0xd0, 0x0f, 0x9d, 0x28, 0x16, 0xc0, 0x38, 0x16,
+ 0x4c, 0x86, 0xb5, 0xc0, 0x38, 0x22, 0x46, 0xce, 0x93, 0xc0, 0x38, 0x47,
+ 0x15, 0xc0, 0x38, 0x65, 0x14, 0xc0, 0x38, 0x7d, 0x0e, 0xc0, 0x38, 0x8f,
+ 0x12, 0xc0, 0x38, 0xa1, 0x90, 0x0f, 0xa3, 0x43, 0x00, 0x38, 0xad, 0x0a,
+ 0xc0, 0x38, 0xdb, 0xc6, 0xd1, 0x87, 0x0f, 0xae, 0xb1, 0xc4, 0x60, 0xb3,
+ 0x00, 0x05, 0x79, 0xc5, 0xdb, 0x28, 0x0f, 0xcd, 0x19, 0x09, 0x40, 0x38,
+ 0xe7, 0x15, 0xc0, 0x38, 0xf7, 0x42, 0x00, 0x72, 0xc0, 0x39, 0x03, 0x43,
+ 0x1c, 0xe7, 0x40, 0x39, 0x0d, 0x06, 0xc0, 0x39, 0x19, 0x47, 0x02, 0x0e,
+ 0x40, 0x39, 0x2b, 0x15, 0xc0, 0x39, 0x8b, 0x0e, 0xc0, 0x39, 0x9d, 0x50,
+ 0x0f, 0x5e, 0xc0, 0x39, 0xa9, 0x16, 0xc0, 0x39, 0xb5, 0x4b, 0x6f, 0xc7,
+ 0xc0, 0x39, 0xc1, 0x4f, 0x30, 0x90, 0xc0, 0x3a, 0x02, 0x46, 0x09, 0x97,
+ 0x40, 0x3a, 0x0c, 0xc2, 0x01, 0xbb, 0x0f, 0xd5, 0x11, 0xcd, 0x7d, 0x37,
+ 0x0f, 0xce, 0x70, 0x9b, 0x0f, 0xa8, 0x8b, 0x00, 0x3a, 0x30, 0xc9, 0xa9,
+ 0xcf, 0x01, 0x09, 0x50, 0x46, 0x5c, 0x02, 0xc0, 0x3a, 0x3f, 0x45, 0xde,
+ 0x2a, 0xc0, 0x3a, 0x49, 0xc3, 0x4d, 0xd4, 0x0f, 0xaa, 0x59, 0x47, 0xc9,
+ 0xff, 0xc0, 0x3a, 0x72, 0x10, 0x40, 0x3a, 0x90, 0x52, 0x4c, 0xeb, 0xc0,
+ 0x3a, 0x9a, 0x48, 0xbb, 0xb2, 0xc0, 0x3a, 0xa6, 0x45, 0xdd, 0xf3, 0xc0,
+ 0x3a, 0xbe, 0x44, 0x2f, 0x1e, 0xc0, 0x3a, 0xde, 0x49, 0xb3, 0x4d, 0x40,
+ 0x3b, 0x00, 0xc6, 0x00, 0xf3, 0x01, 0x05, 0x69, 0xc2, 0x00, 0xcc, 0x0f,
+ 0xa4, 0x7b, 0x00, 0x3b, 0x28, 0xc4, 0x13, 0x35, 0x0f, 0xa2, 0xc1, 0xc7,
+ 0xc5, 0xe5, 0x0f, 0xca, 0xe9, 0xc2, 0x00, 0xac, 0x0f, 0xd4, 0x08, 0xc3,
+ 0x14, 0x6b, 0x0f, 0xa1, 0x41, 0xd4, 0x3d, 0xe0, 0x01, 0x93, 0xf8, 0x15,
+ 0xc0, 0x3b, 0x34, 0x42, 0x00, 0xa4, 0xc0, 0x3b, 0x3e, 0x19, 0xc0, 0x3b,
+ 0x4a, 0x43, 0x11, 0x7f, 0xc0, 0x3b, 0x60, 0xc5, 0xd8, 0x99, 0x01, 0x32,
+ 0x33, 0x00, 0x3b, 0x6c, 0x43, 0x5c, 0xeb, 0xc0, 0x3b, 0x72, 0x46, 0xd3,
+ 0x13, 0xc0, 0x3b, 0x7e, 0xc5, 0xde, 0x70, 0x0f, 0xa2, 0xa1, 0xc7, 0xc4,
+ 0xd4, 0x0f, 0xc8, 0x98, 0xcc, 0x86, 0x55, 0x0f, 0xc9, 0x11, 0xc2, 0x02,
+ 0x35, 0x01, 0x15, 0xe3, 0x00, 0x3b, 0x8e, 0x04, 0xc0, 0x3b, 0x94, 0x0b,
+ 0xc0, 0x3b, 0xa0, 0x47, 0x34, 0xa6, 0xc0, 0x3b, 0xac, 0xd3, 0x40, 0x7a,
+ 0x01, 0x01, 0x79, 0xc8, 0xba, 0x42, 0x0f, 0xa6, 0xd9, 0xca, 0xa4, 0x22,
+ 0x0f, 0xcf, 0xf8, 0x10, 0xc0, 0x3b, 0xb8, 0x94, 0x01, 0x15, 0xeb, 0x00,
+ 0x3b, 0xc2, 0x16, 0xc0, 0x3b, 0xd7, 0x00, 0xc0, 0x3b, 0xe8, 0x42, 0x02,
+ 0x2f, 0xc0, 0x3c, 0x0b, 0xc2, 0x00, 0x40, 0x0f, 0xa2, 0x19, 0xcc, 0x40,
+ 0x81, 0x00, 0x05, 0x00, 0xca, 0xa7, 0x06, 0x0f, 0x0a, 0x79, 0x0e, 0xc0,
+ 0x3c, 0x17, 0x46, 0x09, 0x97, 0xc0, 0x3c, 0x23, 0x15, 0xc0, 0x3c, 0x47,
+ 0x45, 0x28, 0xb1, 0x40, 0x3c, 0x53, 0x44, 0x75, 0x34, 0xc0, 0x3c, 0x6f,
+ 0x0f, 0xc0, 0x3c, 0x7b, 0xca, 0x9d, 0x92, 0x0f, 0xa9, 0x49, 0xc2, 0x02,
+ 0xa7, 0x00, 0x00, 0x00, 0xc5, 0x13, 0x84, 0x01, 0x16, 0x1b, 0x00, 0x3c,
+ 0x87, 0xcc, 0x06, 0xbb, 0x01, 0x16, 0x11, 0x48, 0x19, 0xb9, 0xc0, 0x3c,
+ 0x8d, 0x15, 0xc0, 0x3c, 0x99, 0x05, 0xc0, 0x3c, 0xa5, 0xc7, 0x05, 0xc0,
+ 0x01, 0x10, 0x79, 0xce, 0x72, 0xd4, 0x01, 0x50, 0x49, 0xd2, 0x48, 0x6b,
+ 0x01, 0x57, 0xf8, 0xca, 0xa0, 0x76, 0x00, 0x3f, 0xf9, 0x06, 0xc0, 0x3c,
+ 0xb1, 0x0e, 0xc0, 0x3c, 0xc3, 0xd0, 0x0f, 0x09, 0x00, 0x3f, 0xc9, 0x43,
+ 0x0a, 0x8a, 0xc0, 0x3c, 0xd5, 0x47, 0x10, 0x78, 0xc0, 0x3c, 0xe1, 0xd4,
+ 0x3d, 0x18, 0x00, 0x3f, 0xa0, 0xc3, 0x83, 0x55, 0x0f, 0xcb, 0xb9, 0xce,
+ 0x73, 0x8a, 0x0f, 0x98, 0x18, 0x46, 0x04, 0x8f, 0xc0, 0x3c, 0xed, 0x44,
+ 0x0b, 0x0d, 0x40, 0x3d, 0x0f, 0x44, 0xe4, 0x3b, 0xc0, 0x3d, 0x31, 0x12,
+ 0xc0, 0x3d, 0x3d, 0x00, 0x40, 0x3d, 0x49, 0xc3, 0x01, 0x97, 0x0f, 0xcc,
+ 0x29, 0xcf, 0x68, 0xeb, 0x01, 0x33, 0x89, 0x94, 0x0f, 0xa2, 0x12, 0x00,
+ 0x3d, 0x5b, 0x89, 0x0f, 0xca, 0xd1, 0x52, 0x4d, 0xb1, 0x40, 0x3d, 0x68,
+ 0x16, 0xc0, 0x3d, 0xee, 0x05, 0xc0, 0x3d, 0xf8, 0xd1, 0x50, 0x24, 0x0f,
+ 0xb0, 0x88, 0x15, 0xc0, 0x3e, 0x04, 0x42, 0x00, 0x99, 0xc0, 0x3e, 0x0e,
+ 0xc9, 0xa9, 0x3f, 0x00, 0x9b, 0x09, 0xc9, 0x11, 0xf6, 0x00, 0x9b, 0x11,
+ 0x12, 0xc0, 0x3e, 0x18, 0xcd, 0x2c, 0xb2, 0x00, 0x9b, 0x39, 0x46, 0x09,
+ 0x97, 0xc0, 0x3e, 0x24, 0x47, 0x34, 0x2f, 0xc0, 0x3e, 0x42, 0x4b, 0x8f,
+ 0x68, 0x40, 0x3e, 0x60, 0x07, 0xc0, 0x3e, 0x86, 0x47, 0xc5, 0x60, 0xc0,
+ 0x3e, 0xa1, 0x88, 0x0f, 0xce, 0xe9, 0x4d, 0x7c, 0x67, 0x40, 0x3e, 0xad,
+ 0x00, 0xc0, 0x3f, 0x26, 0xc6, 0x59, 0xd6, 0x01, 0x33, 0x50, 0xc6, 0x31,
+ 0x92, 0x01, 0x38, 0x4b, 0x00, 0x3f, 0x36, 0xca, 0x3a, 0x52, 0x01, 0x1c,
+ 0x31, 0x42, 0x00, 0xa9, 0xc0, 0x3f, 0x3c, 0x00, 0xc0, 0x3f, 0x48, 0xc5,
+ 0xd6, 0x0f, 0x00, 0x00, 0x28, 0x4b, 0x98, 0x4d, 0xc0, 0x3f, 0x5a, 0x4b,
+ 0x97, 0x45, 0xc0, 0x3f, 0x66, 0x48, 0xb6, 0x9a, 0x40, 0x3f, 0x72, 0x42,
+ 0x00, 0x65, 0xc0, 0x3f, 0x7e, 0x0b, 0x40, 0x3f, 0x88, 0x46, 0xd2, 0x05,
+ 0xc0, 0x3f, 0x94, 0xc4, 0x61, 0x0d, 0x00, 0x00, 0xd8, 0xcc, 0x83, 0x9d,
+ 0x01, 0x08, 0x39, 0x42, 0x00, 0x79, 0x40, 0x3f, 0x9e, 0x95, 0x0f, 0xa2,
+ 0x01, 0xc7, 0xb4, 0xd2, 0x0f, 0xa2, 0x98, 0x0b, 0xc0, 0x3f, 0xb0, 0x4c,
+ 0x83, 0x55, 0xc0, 0x3f, 0xbc, 0x42, 0x00, 0xb1, 0xc0, 0x3f, 0xd8, 0x47,
+ 0xc7, 0x12, 0xc0, 0x3f, 0xe4, 0x47, 0xc7, 0xb3, 0x40, 0x40, 0x18, 0xc5,
+ 0xd8, 0x30, 0x0f, 0xcc, 0x69, 0xc4, 0xe0, 0xfb, 0x0f, 0x9e, 0x61, 0x03,
+ 0xc0, 0x40, 0x42, 0xc5, 0xd0, 0x38, 0x0f, 0xcb, 0xe9, 0x4c, 0x89, 0xe5,
+ 0x40, 0x40, 0x4c, 0x07, 0xc0, 0x40, 0xc0, 0x03, 0xc0, 0x40, 0xca, 0x0b,
+ 0xc0, 0x40, 0xe2, 0x11, 0x40, 0x40, 0xee, 0xc2, 0x00, 0xb1, 0x01, 0x34,
+ 0xcb, 0x00, 0x40, 0xfa, 0x0f, 0xc0, 0x41, 0x00, 0x11, 0xc0, 0x41, 0x0c,
+ 0xcf, 0x63, 0x4b, 0x01, 0x05, 0x81, 0xc3, 0x73, 0xfc, 0x0f, 0xce, 0xf1,
+ 0xc7, 0xc8, 0x23, 0x01, 0x80, 0x98, 0xca, 0xa5, 0xee, 0x01, 0x09, 0xb9,
+ 0x14, 0x40, 0x41, 0x18, 0xc6, 0xd2, 0xef, 0x0f, 0x9d, 0x91, 0xc4, 0xbc,
+ 0x5c, 0x0f, 0xce, 0x20, 0x11, 0xc0, 0x41, 0x25, 0xca, 0xa4, 0xae, 0x01,
+ 0x4f, 0x31, 0x03, 0x40, 0x41, 0x37, 0x43, 0x01, 0x95, 0xc0, 0x41, 0x43,
+ 0xd0, 0x5f, 0xc2, 0x01, 0x3e, 0x39, 0xcc, 0x89, 0xc1, 0x01, 0x31, 0x31,
+ 0x0b, 0xc0, 0x41, 0x4f, 0x45, 0x0c, 0x91, 0x40, 0x41, 0x5b, 0xc2, 0x00,
+ 0x29, 0x0f, 0xcd, 0x31, 0x4b, 0x96, 0xd7, 0x40, 0x41, 0x67, 0x47, 0xc0,
+ 0xc1, 0xc0, 0x41, 0x7f, 0x07, 0xc0, 0x41, 0x9d, 0x52, 0x28, 0xce, 0xc0,
+ 0x41, 0xa7, 0xc3, 0x00, 0x44, 0x0f, 0xce, 0x28, 0x07, 0xc0, 0x41, 0xad,
+ 0xc7, 0xc4, 0x10, 0x01, 0x36, 0x71, 0xc8, 0x12, 0x47, 0x01, 0x30, 0x69,
+ 0x42, 0x00, 0x43, 0x40, 0x41, 0xb7, 0x06, 0xc0, 0x41, 0xc6, 0x47, 0xc0,
+ 0x89, 0xc0, 0x41, 0xd0, 0xc3, 0x0d, 0x14, 0x0f, 0xd6, 0x90, 0x16, 0xc0,
+ 0x41, 0xf8, 0xc8, 0xb8, 0x4a, 0x01, 0x09, 0x28, 0x42, 0x00, 0x2a, 0xc0,
+ 0x42, 0x04, 0x16, 0x40, 0x42, 0x28, 0xd1, 0x53, 0xdc, 0x01, 0x1f, 0xf9,
+ 0x46, 0x38, 0xe8, 0xc0, 0x42, 0x34, 0xda, 0x1c, 0x52, 0x07, 0xff, 0xe0,
+ 0x0e, 0xc0, 0x42, 0x40, 0xcb, 0x8e, 0x34, 0x0f, 0xcb, 0xa8, 0x44, 0x78,
+ 0xf3, 0xc0, 0x42, 0x4f, 0xc4, 0xcc, 0x91, 0x00, 0x16, 0xd8, 0x46, 0xd1,
+ 0xbd, 0xc0, 0x42, 0x67, 0x44, 0x3c, 0x52, 0x40, 0x42, 0x73, 0x46, 0xcd,
+ 0x37, 0xc0, 0x42, 0x7f, 0x51, 0x50, 0x35, 0xc0, 0x42, 0xc2, 0x4a, 0x51,
+ 0x89, 0x40, 0x42, 0xda, 0x15, 0xc0, 0x42, 0xf2, 0x42, 0x01, 0x0e, 0xc0,
+ 0x42, 0xfe, 0x48, 0x10, 0xb4, 0xc0, 0x43, 0x0a, 0x45, 0x01, 0xc3, 0xc0,
+ 0x43, 0x16, 0xd4, 0x3b, 0xd8, 0x08, 0xd1, 0x99, 0x47, 0x02, 0x0e, 0xc0,
+ 0x43, 0x2e, 0x46, 0x34, 0x6f, 0x40, 0x43, 0x8a, 0xce, 0x6d, 0xcc, 0x01,
+ 0x17, 0xf9, 0x14, 0xc0, 0x43, 0x96, 0x15, 0xc0, 0x43, 0xa8, 0x45, 0x00,
+ 0x49, 0xc0, 0x43, 0xb4, 0xca, 0x9c, 0xe8, 0x01, 0x4c, 0x11, 0xd6, 0x2c,
+ 0x02, 0x01, 0x53, 0x20, 0x49, 0xaf, 0xe4, 0xc0, 0x43, 0xc0, 0xc2, 0x11,
+ 0xa5, 0x01, 0x5f, 0x11, 0xc8, 0xb6, 0x3a, 0x0f, 0xcc, 0x98, 0x47, 0xca,
+ 0x14, 0xc0, 0x43, 0xd2, 0x47, 0xc0, 0xf2, 0xc0, 0x44, 0x02, 0xcc, 0x8b,
+ 0x41, 0x0f, 0x9c, 0x19, 0x94, 0x0f, 0xd6, 0xc8, 0xc2, 0x00, 0x10, 0x01,
+ 0x35, 0xa9, 0xc5, 0xd7, 0xe5, 0x01, 0x32, 0x19, 0xc6, 0xd1, 0x2d, 0x0f,
+ 0xc9, 0xc8, 0xc6, 0xd1, 0x09, 0x0f, 0xab, 0xc9, 0xc2, 0x00, 0x74, 0x01,
+ 0x50, 0xe8, 0xc9, 0x48, 0xa4, 0x01, 0x33, 0x49, 0x42, 0x02, 0xbc, 0xc0,
+ 0x44, 0x32, 0xd9, 0x1e, 0x37, 0x01, 0x50, 0xb0, 0xcb, 0x5a, 0x97, 0x01,
+ 0x12, 0xf9, 0x00, 0x40, 0x44, 0x3e, 0xc6, 0xcb, 0xb7, 0x01, 0x31, 0x79,
+ 0x00, 0x40, 0x44, 0x4a, 0x45, 0xd4, 0x89, 0xc0, 0x44, 0x56, 0xca, 0xa4,
+ 0x7c, 0x0f, 0xa4, 0xd9, 0xc6, 0x08, 0xea, 0x00, 0x05, 0x28, 0x42, 0x00,
+ 0x89, 0xc0, 0x44, 0x68, 0xc8, 0xb9, 0x1a, 0x0f, 0xcb, 0x59, 0xc2, 0x49,
+ 0x0c, 0x0f, 0xb7, 0xb1, 0x50, 0x5b, 0x52, 0xc0, 0x44, 0x73, 0x06, 0x40,
+ 0x44, 0xf5, 0xc8, 0xb9, 0x32, 0x01, 0x36, 0x81, 0x07, 0xc0, 0x44, 0xff,
+ 0x42, 0x00, 0xa9, 0xc0, 0x45, 0x0c, 0x11, 0xc0, 0x45, 0x1b, 0x12, 0xc0,
+ 0x45, 0x25, 0x14, 0xc0, 0x45, 0x31, 0x4b, 0x8c, 0x62, 0x40, 0x45, 0x3d,
+ 0xc6, 0xcb, 0x75, 0x01, 0x32, 0x89, 0xc6, 0xd2, 0x53, 0x01, 0x71, 0xf8,
+ 0xc5, 0xd1, 0xee, 0x01, 0x31, 0x21, 0xc5, 0xda, 0x47, 0x01, 0x08, 0x30,
+ 0xc9, 0x08, 0xe7, 0x01, 0x31, 0x09, 0x50, 0x59, 0x12, 0x40, 0x45, 0xb5,
+ 0xc3, 0x03, 0xd9, 0x0f, 0xa7, 0xbb, 0x00, 0x45, 0xc1, 0xc4, 0x2a, 0xa0,
+ 0x0f, 0x9e, 0xa8, 0xc5, 0x79, 0x8a, 0x0f, 0xa6, 0x29, 0xc9, 0xac, 0x57,
+ 0x0f, 0xc8, 0xc8, 0xc5, 0x11, 0x55, 0x0f, 0xa1, 0x8a, 0x00, 0x45, 0xc7,
+ 0x42, 0xbe, 0x99, 0xc0, 0x45, 0xcd, 0x08, 0x40, 0x45, 0xd9, 0x14, 0xc0,
+ 0x45, 0xe1, 0x05, 0xc0, 0x45, 0xeb, 0x15, 0xc0, 0x46, 0x05, 0x12, 0xc0,
+ 0x46, 0x29, 0x04, 0xc0, 0x46, 0x35, 0x16, 0xc0, 0x46, 0x4b, 0x46, 0xd0,
+ 0x31, 0xc0, 0x46, 0x63, 0x06, 0xc0, 0x46, 0x6f, 0x0e, 0xc0, 0x46, 0x81,
+ 0x0a, 0xc0, 0x46, 0x8d, 0x0f, 0xc0, 0x46, 0x9f, 0x19, 0xc0, 0x46, 0xa7,
+ 0x08, 0xc0, 0x46, 0xb1, 0x0c, 0xc0, 0x46, 0xbd, 0x07, 0xc0, 0x46, 0xc9,
+ 0x44, 0xe3, 0xb7, 0xc0, 0x46, 0xdb, 0xc3, 0x1a, 0x7c, 0x01, 0x75, 0xc9,
+ 0x09, 0x40, 0x46, 0xeb, 0x96, 0x01, 0x8e, 0x03, 0x00, 0x46, 0xf7, 0xc2,
+ 0x47, 0xa4, 0x01, 0x8e, 0x09, 0xc2, 0xe5, 0x85, 0x01, 0x8e, 0x11, 0xc3,
+ 0xe5, 0x84, 0x01, 0x8e, 0x19, 0x95, 0x01, 0x8e, 0x8b, 0x00, 0x46, 0xfb,
+ 0x8a, 0x01, 0x8e, 0x83, 0x00, 0x47, 0x15, 0x90, 0x01, 0x8e, 0x79, 0x92,
+ 0x01, 0x8e, 0x93, 0x00, 0x47, 0x2d, 0x86, 0x01, 0x8e, 0xa1, 0x93, 0x01,
+ 0x8f, 0x18, 0x42, 0x00, 0x3b, 0xc0, 0x47, 0x39, 0x07, 0xc0, 0x47, 0x48,
+ 0x14, 0xc0, 0x47, 0x54, 0xcb, 0x94, 0xc7, 0x0f, 0x9e, 0x09, 0xc5, 0xdc,
+ 0x45, 0x0f, 0x99, 0x80, 0x0b, 0xc0, 0x47, 0x5e, 0x14, 0xc0, 0x47, 0x68,
+ 0x44, 0xe0, 0xa7, 0xc0, 0x47, 0x74, 0x42, 0x00, 0x47, 0x40, 0x47, 0x9e,
+ 0xc3, 0x01, 0xe7, 0x01, 0x35, 0xb9, 0xc4, 0x79, 0xe6, 0x01, 0x31, 0x39,
+ 0xc5, 0xd7, 0x2c, 0x0f, 0xa1, 0xf9, 0xc4, 0xe3, 0x6f, 0x0f, 0xa0, 0xa1,
+ 0xc2, 0x18, 0xb3, 0x0f, 0xce, 0x92, 0x00, 0x47, 0xbc, 0x48, 0xbe, 0x52,
+ 0xc0, 0x47, 0xc2, 0xca, 0xa7, 0x56, 0x0f, 0x9b, 0x59, 0xc7, 0xc0, 0x6d,
+ 0x0f, 0xcb, 0x10, 0xc3, 0x1c, 0xe6, 0x0f, 0xd3, 0xe1, 0xca, 0xa6, 0xf2,
+ 0x01, 0x05, 0x10, 0x44, 0x00, 0x74, 0xc0, 0x47, 0xce, 0xc9, 0xad, 0x89,
+ 0x0f, 0xa9, 0x70, 0x42, 0x00, 0xcc, 0xc0, 0x47, 0xda, 0xc2, 0x01, 0x48,
+ 0x0f, 0xa2, 0x89, 0xc6, 0xcc, 0xc5, 0x0f, 0xa0, 0x51, 0xc6, 0xd2, 0xd7,
+ 0x0f, 0xca, 0x80, 0xc8, 0xb9, 0x92, 0x0f, 0xa5, 0x99, 0xca, 0x39, 0x0b,
+ 0x0f, 0x98, 0xc8, 0xcd, 0x7b, 0x7d, 0x0f, 0x9e, 0x78, 0xc4, 0x9e, 0x3a,
+ 0x0f, 0xcb, 0x29, 0x0d, 0x40, 0x47, 0xea, 0x47, 0x1d, 0xd4, 0xc0, 0x47,
+ 0xf6, 0xc2, 0x00, 0x3d, 0x01, 0x30, 0x21, 0x12, 0xc0, 0x48, 0x5c, 0x0f,
+ 0x40, 0x48, 0x74, 0x42, 0x00, 0x84, 0xc0, 0x48, 0x7e, 0xce, 0x6e, 0x58,
+ 0x0f, 0xa4, 0x89, 0xcb, 0x96, 0xab, 0x0f, 0xb6, 0x58, 0xc8, 0xb7, 0x4a,
+ 0x01, 0x30, 0x61, 0x16, 0xc0, 0x48, 0x8a, 0xca, 0xa0, 0xe4, 0x01, 0x19,
+ 0x91, 0x4a, 0x9c, 0x3e, 0xc0, 0x48, 0xa2, 0xce, 0x73, 0xfa, 0x0f, 0x9f,
+ 0x51, 0x08, 0xc0, 0x48, 0xae, 0xd5, 0x33, 0xa7, 0x01, 0x53, 0x68, 0xcb,
+ 0x99, 0x29, 0x01, 0x12, 0xc1, 0xc2, 0x00, 0x65, 0x0f, 0xd5, 0xc1, 0xd2,
+ 0x4b, 0xa7, 0x01, 0x72, 0x78, 0xc2, 0x00, 0x45, 0x00, 0x01, 0xd3, 0x00,
+ 0x48, 0xc0, 0xcd, 0x76, 0x9d, 0x0f, 0xa5, 0x28, 0x0b, 0xc0, 0x48, 0xc4,
+ 0xc7, 0xc5, 0x28, 0x0f, 0x9a, 0xd0, 0xc5, 0x11, 0x55, 0x0f, 0xa1, 0x70,
+ 0x1b, 0xc0, 0x48, 0xce, 0x44, 0x1b, 0xaa, 0x40, 0x48, 0xda, 0x46, 0x83,
+ 0x27, 0xc0, 0x48, 0xf8, 0xc6, 0xca, 0x97, 0x0f, 0xa6, 0x58, 0xc7, 0x72,
+ 0xbf, 0x0f, 0xc9, 0x09, 0x42, 0x00, 0x40, 0xc0, 0x49, 0x04, 0x42, 0x00,
+ 0x3b, 0xc0, 0x49, 0x10, 0xc2, 0x04, 0x3d, 0x01, 0x30, 0x0a, 0x00, 0x49,
+ 0x1c, 0xd3, 0x46, 0x90, 0x0f, 0xac, 0x09, 0x42, 0x02, 0xaf, 0xc0, 0x49,
+ 0x22, 0xcf, 0x69, 0x09, 0x0f, 0x9e, 0xd8, 0x42, 0x00, 0x49, 0xc0, 0x49,
+ 0x2e, 0x17, 0x40, 0x49, 0x38, 0xc8, 0xbe, 0x8a, 0x0f, 0x98, 0x30, 0xc3,
+ 0xe5, 0x15, 0x0f, 0xb6, 0x19, 0xc3, 0x01, 0x4b, 0x0f, 0x9b, 0x70, 0x45,
+ 0x00, 0xba, 0xc0, 0x49, 0x4a, 0x51, 0x4e, 0xf2, 0xc0, 0x49, 0x9a, 0x4d,
+ 0x77, 0xc8, 0x40, 0x49, 0xac, 0x0e, 0xc0, 0x49, 0xc6, 0xe0, 0x00, 0xa7,
+ 0x01, 0x3b, 0x09, 0x14, 0x40, 0x49, 0xd2, 0x00, 0xc0, 0x49, 0xde, 0xc3,
+ 0x2e, 0xab, 0x01, 0x5f, 0x01, 0xc4, 0x2a, 0x3e, 0x0f, 0xce, 0x08, 0x42,
+ 0x01, 0x19, 0xc0, 0x49, 0xea, 0xc5, 0x00, 0xb9, 0x00, 0x05, 0x10, 0xc5,
+ 0x00, 0xb9, 0x01, 0x05, 0xa9, 0xc3, 0x12, 0xad, 0x00, 0x05, 0xc0, 0x50,
+ 0x5b, 0x72, 0xc0, 0x49, 0xf6, 0x4d, 0x76, 0xde, 0x40, 0x4a, 0x04, 0x47,
+ 0x02, 0x0e, 0xc0, 0x4a, 0x48, 0x47, 0x0a, 0xda, 0xc0, 0x4a, 0x5a, 0x49,
+ 0x0b, 0x17, 0xc0, 0x4a, 0x66, 0xce, 0x74, 0xb0, 0x00, 0x24, 0x11, 0xc6,
+ 0x4a, 0x9f, 0x05, 0x33, 0xf1, 0xc7, 0xc7, 0x27, 0x05, 0x33, 0xf8, 0xce,
+ 0x74, 0xf6, 0x00, 0x04, 0x99, 0xc5, 0x1d, 0x1d, 0x01, 0x10, 0xb0, 0x49,
+ 0xb0, 0x2c, 0x40, 0x4a, 0x72, 0x8e, 0x0f, 0xcd, 0x69, 0x96, 0x0f, 0xa5,
+ 0xd0, 0xcb, 0x94, 0xd2, 0x01, 0x35, 0xe1, 0xc7, 0xb3, 0x85, 0x07, 0xf2,
+ 0x28, 0xc7, 0xc5, 0x36, 0x01, 0x35, 0xd1, 0x06, 0xc0, 0x4a, 0x96, 0xc5,
+ 0x33, 0x24, 0x00, 0x01, 0xd8, 0x16, 0xc0, 0x4a, 0x9c, 0xcf, 0x62, 0xc4,
+ 0x0f, 0xca, 0x40, 0xc9, 0xb2, 0x7e, 0x01, 0x09, 0x01, 0x45, 0x29, 0x7c,
+ 0x40, 0x4a, 0xa8, 0xc5, 0xda, 0xce, 0x0f, 0x99, 0x89, 0xcf, 0x6b, 0x34,
+ 0x0f, 0xb2, 0x40, 0x43, 0x01, 0x97, 0xc0, 0x4a, 0xae, 0xc6, 0xd2, 0x41,
+ 0x01, 0x11, 0xf9, 0x45, 0xd6, 0x7d, 0x40, 0x4a, 0xb8, 0x48, 0xbe, 0x22,
+ 0xc0, 0x4a, 0xd4, 0xcd, 0x75, 0x65, 0x0f, 0xc8, 0xc0, 0x42, 0x00, 0xaf,
+ 0xc0, 0x4b, 0x26, 0xd5, 0x34, 0x3a, 0x01, 0x39, 0xd1, 0xcd, 0x79, 0x41,
+ 0x01, 0x00, 0x30, 0x45, 0xdb, 0xa0, 0xc0, 0x4b, 0x32, 0x46, 0x39, 0xfb,
+ 0x40, 0x4b, 0x52, 0xcd, 0x7d, 0xd3, 0x01, 0x53, 0x61, 0x43, 0x05, 0xb2,
+ 0xc0, 0x4b, 0x5e, 0x46, 0x00, 0xd4, 0x40, 0x4b, 0x6a, 0xc8, 0xbc, 0xea,
+ 0x0f, 0xd3, 0xd1, 0x42, 0x00, 0xc2, 0xc0, 0x4b, 0x76, 0xd3, 0x41, 0x84,
+ 0x01, 0x71, 0xe0, 0x16, 0xc0, 0x4b, 0x82, 0x14, 0xc0, 0x4b, 0x8e, 0x46,
+ 0xd2, 0xf5, 0xc0, 0x4b, 0x98, 0xcd, 0x31, 0x8b, 0x0f, 0xac, 0x19, 0xc4,
+ 0x01, 0xdd, 0x0f, 0x9e, 0xf9, 0xcc, 0x83, 0x85, 0x0f, 0xce, 0x68, 0xd7,
+ 0x28, 0xb6, 0x01, 0x39, 0x49, 0x03, 0xc0, 0x4b, 0xa4, 0x0b, 0x40, 0x4b,
+ 0xb0, 0xc6, 0xcc, 0xf5, 0x01, 0x1f, 0x89, 0xc8, 0xb5, 0x72, 0x0f, 0xaf,
+ 0x00, 0xce, 0x73, 0x60, 0x0f, 0x9c, 0xc9, 0xc2, 0x00, 0xb0, 0x0f, 0xb6,
+ 0x99, 0xce, 0x71, 0x68, 0x0f, 0xca, 0xc8, 0x00, 0x40, 0x4b, 0xbc, 0x16,
+ 0xc0, 0x4b, 0xc8, 0xca, 0x85, 0xc7, 0x0f, 0xd7, 0x08, 0xc4, 0xba, 0xe0,
+ 0x0f, 0xcc, 0xa9, 0x47, 0xc2, 0xea, 0x40, 0x4b, 0xd4, 0x48, 0x10, 0xc1,
+ 0xc0, 0x4b, 0xf0, 0xc5, 0xdb, 0x0f, 0x0f, 0xcb, 0x50, 0xc3, 0x05, 0x9f,
+ 0x01, 0x32, 0x21, 0xc6, 0xce, 0x6f, 0x0f, 0xb7, 0x82, 0x00, 0x4b, 0xfc,
+ 0x4c, 0x11, 0xe2, 0xc0, 0x4c, 0x02, 0xd1, 0x48, 0x11, 0x00, 0x41, 0xb1,
+ 0x0f, 0xc0, 0x4c, 0x2c, 0x4b, 0x6f, 0xc7, 0xc0, 0x4c, 0x38, 0x47, 0x02,
+ 0x0e, 0x40, 0x4c, 0x5c, 0xc4, 0xde, 0xd7, 0x0f, 0xcd, 0xd1, 0xc3, 0x0e,
+ 0x61, 0x0f, 0xcf, 0xb8, 0xc2, 0x1e, 0xd5, 0x0f, 0xcd, 0x41, 0xc2, 0x02,
+ 0xa7, 0x0f, 0xa4, 0x02, 0x00, 0x4c, 0xb4, 0xc2, 0x00, 0x29, 0x01, 0x37,
+ 0xb9, 0xcd, 0x77, 0x46, 0x0f, 0x9d, 0xf8, 0x16, 0xc0, 0x4c, 0xba, 0x12,
+ 0x40, 0x4c, 0xc4, 0x86, 0x0f, 0xb7, 0xb9, 0xca, 0x9e, 0x3c, 0x0f, 0xab,
+ 0xa9, 0x42, 0x02, 0x37, 0x40, 0x4c, 0xce, 0x46, 0x70, 0xd0, 0xc0, 0x4c,
+ 0xda, 0xcb, 0x96, 0xcc, 0x0f, 0x9a, 0xa8, 0x45, 0x00, 0xdd, 0xc0, 0x4c,
+ 0xe6, 0xce, 0x70, 0x96, 0x05, 0x33, 0x98, 0xc3, 0x15, 0x0f, 0x0f, 0xcc,
+ 0x81, 0xc2, 0x0b, 0x47, 0x0f, 0xc9, 0xb8, 0x14, 0xc0, 0x4c, 0xf2, 0x4c,
+ 0x01, 0xf6, 0xc0, 0x4c, 0xfc, 0xc5, 0xda, 0x6a, 0x01, 0x30, 0xc1, 0x18,
+ 0xc0, 0x4d, 0x0e, 0xd0, 0x5b, 0xf2, 0x0f, 0xca, 0xc0, 0xc3, 0x00, 0x28,
+ 0x0f, 0xb5, 0xf9, 0x42, 0x00, 0x61, 0xc0, 0x4d, 0x1a, 0xd0, 0x5e, 0x42,
+ 0x01, 0x1b, 0xe9, 0xca, 0x9a, 0x72, 0x0f, 0x99, 0x01, 0x46, 0x2a, 0x9f,
+ 0xc0, 0x4d, 0x2e, 0xdd, 0x11, 0x51, 0x0f, 0xc9, 0x78, 0xca, 0xa2, 0x10,
+ 0x01, 0x37, 0x49, 0x43, 0x00, 0x4b, 0xc0, 0x4d, 0x3a, 0x92, 0x0f, 0xb5,
+ 0x11, 0xc3, 0x19, 0x78, 0x0f, 0xb7, 0x08, 0x43, 0xc4, 0x20, 0xc0, 0x4d,
+ 0x46, 0xc4, 0xc0, 0x85, 0x0f, 0xb7, 0xa0, 0xc3, 0x00, 0xca, 0x01, 0x34,
+ 0xb1, 0xc2, 0x15, 0x13, 0x0f, 0xcf, 0x18, 0x44, 0x07, 0x31, 0xc0, 0x4d,
+ 0x52, 0xc4, 0x44, 0xba, 0x01, 0x08, 0x41, 0x07, 0xc0, 0x4d, 0x64, 0xc3,
+ 0x1f, 0x48, 0x0f, 0xa6, 0xe0, 0xc8, 0xbb, 0xba, 0x0f, 0x9c, 0x90, 0xc5,
+ 0x2a, 0x94, 0x01, 0x3a, 0x21, 0xc3, 0x12, 0xb8, 0x01, 0x30, 0x1b, 0x00,
+ 0x4d, 0x70, 0xd0, 0x5f, 0xf2, 0x0f, 0x9e, 0xa1, 0xc7, 0xca, 0x61, 0x0f,
+ 0x9e, 0x10, 0xc2, 0x00, 0x71, 0x0f, 0xa0, 0x61, 0xc2, 0x00, 0x3c, 0x0f,
+ 0xa0, 0x68, 0x43, 0x00, 0x8e, 0xc0, 0x4d, 0x76, 0xd6, 0x2c, 0x18, 0x01,
+ 0x08, 0xb8, 0xd6, 0x1f, 0x7f, 0x0f, 0xb3, 0x53, 0x00, 0x4d, 0x82, 0xc2,
+ 0x11, 0xa5, 0x00, 0x01, 0x7a, 0x00, 0x4d, 0x88, 0x4e, 0x6d, 0x16, 0xc0,
+ 0x4d, 0x8e, 0xdb, 0x15, 0xcc, 0x08, 0xd5, 0x03, 0x00, 0x4d, 0x96, 0x45,
+ 0x01, 0xc3, 0xc0, 0x4d, 0x9c, 0x15, 0xc0, 0x4d, 0xb4, 0xcf, 0x63, 0xff,
+ 0x08, 0xd4, 0xc1, 0x55, 0x34, 0x79, 0xc0, 0x4d, 0xc0, 0x57, 0x26, 0xd3,
+ 0xc0, 0x4d, 0xf0, 0x47, 0x02, 0x0e, 0xc0, 0x4e, 0x00, 0x46, 0x34, 0x6f,
+ 0x40, 0x4e, 0x5a, 0xc8, 0xb7, 0x5a, 0x01, 0x35, 0xe9, 0xc2, 0x01, 0x26,
+ 0x0f, 0xcf, 0x30, 0xd4, 0x3e, 0x44, 0x01, 0x1c, 0xa1, 0x00, 0xc0, 0x4e,
+ 0x66, 0xc4, 0x15, 0x2e, 0x0f, 0xca, 0x70, 0x46, 0x09, 0x97, 0xc0, 0x4e,
+ 0x78, 0x47, 0x02, 0x0e, 0x40, 0x4e, 0x9c, 0x4c, 0x11, 0xe2, 0xc0, 0x4f,
+ 0x16, 0x47, 0x34, 0x2f, 0xc0, 0x4f, 0x28, 0x4a, 0x51, 0x89, 0xc0, 0x4f,
+ 0x35, 0xd0, 0x59, 0xf2, 0x08, 0x7a, 0x29, 0x47, 0x02, 0x0e, 0x40, 0x4f,
+ 0x5f, 0x42, 0x01, 0x19, 0xc0, 0x4f, 0xbc, 0xd8, 0x24, 0x6b, 0x01, 0x3d,
+ 0x38, 0x48, 0x19, 0xd4, 0xc0, 0x4f, 0xc6, 0xc5, 0xda, 0xc9, 0x01, 0x19,
+ 0x78, 0xc6, 0xd2, 0xdd, 0x0f, 0xaa, 0x69, 0xcd, 0x6a, 0x0a, 0x00, 0x00,
+ 0xb0, 0x43, 0x68, 0xf2, 0xc0, 0x50, 0x1a, 0xc3, 0x09, 0x3a, 0x0f, 0xa4,
+ 0x48, 0x47, 0x02, 0x0e, 0xc0, 0x50, 0x72, 0x45, 0x00, 0xba, 0xc0, 0x50,
+ 0xc8, 0x4b, 0x6f, 0xc7, 0xc0, 0x50, 0xd8, 0x4c, 0x85, 0xa1, 0x40, 0x50,
+ 0xee, 0x07, 0xc0, 0x50, 0xfe, 0xca, 0xa4, 0xe0, 0x01, 0x05, 0xb9, 0x42,
+ 0x06, 0x4e, 0x40, 0x51, 0x0a, 0x43, 0x1b, 0x32, 0xc0, 0x51, 0x1f, 0xc6,
+ 0xce, 0xff, 0x0f, 0x9a, 0xe9, 0xc2, 0x00, 0x89, 0x00, 0x01, 0x00, 0x49,
+ 0x6e, 0x41, 0x40, 0x51, 0x2c, 0x44, 0x03, 0xda, 0xc0, 0x51, 0x38, 0xc3,
+ 0x01, 0xe5, 0x0f, 0xab, 0xba, 0x00, 0x51, 0x4a, 0xc9, 0xac, 0xde, 0x0f,
+ 0x9e, 0x29, 0xcb, 0x94, 0x01, 0x0f, 0xa1, 0x99, 0x11, 0xc0, 0x51, 0x50,
+ 0xc3, 0x09, 0x3a, 0x0f, 0xcf, 0xe8, 0x15, 0xc0, 0x51, 0x5a, 0xc4, 0xdf,
+ 0x9b, 0x0f, 0xcd, 0xc1, 0xc7, 0xc8, 0xb6, 0x0f, 0xcd, 0xc8, 0x00, 0xc0,
+ 0x51, 0x66, 0x47, 0xc3, 0xed, 0xc0, 0x51, 0x72, 0xc6, 0x91, 0xd5, 0x0f,
+ 0x99, 0xd9, 0xc4, 0xaf, 0x8f, 0x0f, 0x98, 0x2b, 0x00, 0x51, 0x9c, 0xd2,
+ 0x4a, 0xf3, 0x0f, 0x98, 0x38, 0xc6, 0x07, 0x9a, 0x01, 0x1d, 0x99, 0xc3,
+ 0x00, 0xf1, 0x01, 0x1d, 0x91, 0xcd, 0x7b, 0x97, 0x01, 0x50, 0x58, 0x00,
+ 0x40, 0x51, 0xa2, 0x43, 0x00, 0x3d, 0xc0, 0x51, 0xba, 0x46, 0x07, 0x2f,
+ 0xc0, 0x51, 0xcf, 0xc6, 0xb0, 0xf5, 0x00, 0x00, 0xd0, 0xcc, 0x81, 0x5d,
+ 0x01, 0x11, 0x79, 0xc2, 0x00, 0x29, 0x0f, 0x9e, 0x20, 0xc2, 0x00, 0x0a,
+ 0x0f, 0x9b, 0x19, 0xcf, 0x61, 0x7a, 0x0f, 0xb4, 0xf8, 0x0e, 0xc0, 0x52,
+ 0x09, 0xca, 0xa1, 0x3e, 0x0f, 0xb0, 0x78, 0x42, 0x02, 0xa7, 0xc0, 0x52,
+ 0x13, 0xca, 0x4a, 0x11, 0x01, 0x51, 0x98, 0xd5, 0x36, 0x1d, 0x0f, 0xb3,
+ 0xa9, 0x90, 0x0f, 0xcd, 0x10, 0x42, 0x02, 0x41, 0xc0, 0x52, 0x20, 0x10,
+ 0xc0, 0x52, 0x2c, 0xc2, 0x00, 0x4e, 0x01, 0x01, 0x90, 0xc9, 0xb2, 0x87,
+ 0x0f, 0xcd, 0x79, 0xc7, 0xc7, 0xcf, 0x01, 0x18, 0x29, 0x12, 0xc0, 0x52,
+ 0x39, 0xc7, 0xc4, 0x1e, 0x01, 0x5e, 0xc1, 0xcc, 0x88, 0xb9, 0x0f, 0xb6,
+ 0x38, 0xca, 0x9b, 0xb2, 0x01, 0x1c, 0xb9, 0xc5, 0xbf, 0x4d, 0x01, 0x13,
+ 0xd3, 0x00, 0x52, 0x48, 0x15, 0xc0, 0x52, 0x4c, 0x46, 0xcf, 0xd1, 0xc0,
+ 0x52, 0x58, 0xc4, 0xde, 0xe3, 0x0f, 0xcb, 0x40, 0x05, 0xc0, 0x52, 0x6a,
+ 0xcc, 0x83, 0xb5, 0x01, 0x08, 0x73, 0x00, 0x52, 0x76, 0x1b, 0x40, 0x52,
+ 0x7c, 0xc2, 0x00, 0xf1, 0x01, 0x32, 0x3b, 0x00, 0x52, 0x88, 0x15, 0xc0,
+ 0x52, 0x8e, 0xc4, 0x09, 0x3a, 0x0f, 0xd5, 0x00, 0x42, 0x11, 0xee, 0xc0,
+ 0x52, 0x9d, 0xca, 0x0e, 0x64, 0x01, 0x39, 0x79, 0x07, 0xc0, 0x52, 0xa9,
+ 0xc3, 0x13, 0x4e, 0x0f, 0xd4, 0x28, 0xc8, 0xbb, 0x9a, 0x0f, 0xb7, 0xd8,
+ 0xc3, 0x4c, 0xa1, 0x01, 0x32, 0x99, 0xc3, 0x1a, 0x2e, 0x0f, 0xa9, 0x58,
+ 0xcd, 0x7d, 0x44, 0x01, 0x56, 0xd0, 0xc8, 0xb8, 0xf2, 0x0f, 0xa5, 0x49,
+ 0x8e, 0x0f, 0xa4, 0x51, 0xc9, 0x92, 0xda, 0x00, 0x05, 0xb0, 0x00, 0x40,
+ 0x52, 0xb5, 0xcc, 0x85, 0xe9, 0x0f, 0xb6, 0x11, 0x49, 0xab, 0xa3, 0xc0,
+ 0x52, 0xc1, 0x07, 0x40, 0x52, 0xcd, 0x87, 0x0f, 0xae, 0x7b, 0x00, 0x52,
+ 0xd9, 0xc3, 0x7f, 0x6c, 0x0f, 0xb6, 0xa0, 0x16, 0xc0, 0x52, 0xe5, 0x4b,
+ 0x8d, 0x9a, 0xc0, 0x52, 0xfd, 0x03, 0xc0, 0x53, 0x21, 0xc3, 0x2a, 0xf6,
+ 0x0f, 0xcc, 0xe0, 0xcc, 0x23, 0x33, 0x08, 0xd7, 0xab, 0x00, 0x53, 0x33,
+ 0x0e, 0xc0, 0x53, 0x37, 0xce, 0x75, 0x3c, 0x08, 0xd7, 0x7b, 0x00, 0x53,
+ 0x46, 0x47, 0xc1, 0x07, 0xc0, 0x53, 0x4a, 0xcb, 0x5a, 0x32, 0x08, 0xd7,
+ 0x32, 0x00, 0x53, 0x5c, 0xc3, 0x03, 0x03, 0x01, 0x35, 0xa1, 0x0f, 0x40,
+ 0x53, 0x60, 0x05, 0xc0, 0x53, 0x70, 0x45, 0x00, 0xba, 0xc0, 0x53, 0x7c,
+ 0x47, 0x34, 0x2f, 0xc0, 0x53, 0xb4, 0x46, 0x09, 0x97, 0xc0, 0x53, 0xc4,
+ 0x49, 0xaa, 0x7a, 0xc0, 0x53, 0xe8, 0x47, 0xc1, 0xd2, 0x40, 0x53, 0xfa,
+ 0xc7, 0xc4, 0x3a, 0x0f, 0xa1, 0xe1, 0xc5, 0xdd, 0x44, 0x0f, 0xca, 0xf0,
+ 0x03, 0xc0, 0x54, 0x12, 0xc8, 0x5b, 0xfa, 0x0f, 0x9b, 0x91, 0xc9, 0xad,
+ 0xfe, 0x0f, 0xd5, 0xa0, 0x45, 0x00, 0x73, 0xc0, 0x54, 0x1e, 0xc8, 0xb8,
+ 0x2a, 0x0f, 0x9a, 0xb9, 0xc7, 0x42, 0xd3, 0x00, 0x05, 0x19, 0xcb, 0x95,
+ 0xb9, 0x0f, 0xd6, 0xb9, 0xc2, 0x11, 0xee, 0x0f, 0xa2, 0xe8, 0x15, 0xc0,
+ 0x54, 0x2a, 0x42, 0x00, 0x45, 0x40, 0x54, 0x36, 0xcf, 0x5f, 0x33, 0x01,
+ 0x18, 0xb1, 0x16, 0xc0, 0x54, 0x42, 0xc5, 0xd9, 0x66, 0x01, 0x5f, 0x38,
+ 0x4d, 0x7e, 0xe4, 0xc0, 0x54, 0x4e, 0xc4, 0x13, 0x66, 0x0f, 0x9b, 0xf8,
+ 0xc3, 0x63, 0x7e, 0x0f, 0xb4, 0x9b, 0x00, 0x54, 0x5a, 0xc7, 0xc9, 0x7a,
+ 0x0f, 0xa3, 0x70, 0xca, 0x8b, 0x2b, 0x01, 0x3e, 0x13, 0x00, 0x54, 0x60,
+ 0x15, 0xc0, 0x54, 0x66, 0xd1, 0x51, 0xef, 0x01, 0x33, 0xf1, 0x00, 0xc0,
+ 0x54, 0x78, 0xcc, 0x85, 0x89, 0x0f, 0x9d, 0x69, 0xc9, 0x8e, 0x15, 0x00,
+ 0x01, 0x28, 0xc3, 0xb3, 0xd0, 0x01, 0x38, 0x79, 0xc6, 0x16, 0x32, 0x01,
+ 0x37, 0x21, 0xd6, 0x31, 0x82, 0x0f, 0xac, 0x31, 0xc9, 0xaa, 0xd4, 0x0f,
+ 0xb0, 0xa1, 0xc4, 0xe0, 0x73, 0x0f, 0xa1, 0x38, 0x05, 0xc0, 0x54, 0x8a,
+ 0x94, 0x0f, 0x9a, 0x81, 0xc4, 0xe4, 0x6b, 0x0f, 0xca, 0xe0, 0xc6, 0xa4,
+ 0xe4, 0x01, 0x05, 0x89, 0xc8, 0xb5, 0x6a, 0x01, 0x05, 0x38, 0xcb, 0x9a,
+ 0x31, 0x01, 0x00, 0x41, 0xcf, 0x62, 0x79, 0x01, 0x72, 0x70, 0xc9, 0xad,
+ 0x92, 0x0f, 0xa4, 0xe1, 0xc2, 0x00, 0x40, 0x0f, 0xa2, 0xd8, 0x16, 0xc0,
+ 0x54, 0x9a, 0xc3, 0x05, 0x14, 0x08, 0x5d, 0x4b, 0x00, 0x54, 0xaa, 0xc4,
+ 0x09, 0x9d, 0x08, 0x5d, 0x60, 0xc3, 0x02, 0xa3, 0x08, 0x5c, 0xe1, 0xc5,
+ 0x0d, 0x20, 0x08, 0x5c, 0xd8, 0xc3, 0xb5, 0x3e, 0x08, 0x5c, 0x89, 0x15,
+ 0xc0, 0x54, 0xb0, 0xc2, 0x00, 0x67, 0x08, 0x5c, 0x71, 0xc3, 0x20, 0x18,
+ 0x08, 0x5c, 0x61, 0xc8, 0xb9, 0x7a, 0x08, 0x5c, 0x59, 0xc6, 0xcf, 0xd7,
+ 0x08, 0x5c, 0x51, 0xc4, 0xe0, 0xe7, 0x08, 0x5c, 0x49, 0xc4, 0x4a, 0xb9,
+ 0x08, 0x5c, 0x41, 0xc2, 0x01, 0x7f, 0x08, 0x5c, 0x23, 0x00, 0x54, 0xba,
+ 0xc5, 0x4a, 0xb3, 0x08, 0x5c, 0x31, 0xcd, 0x7e, 0x89, 0x08, 0x5c, 0x29,
+ 0xc6, 0x40, 0x9a, 0x08, 0x5c, 0x19, 0xc5, 0x9c, 0xa2, 0x08, 0x5c, 0x11,
+ 0xc4, 0xe3, 0x27, 0x08, 0x5c, 0x09, 0xc5, 0xa5, 0xfd, 0x08, 0x5c, 0x00,
+ 0xd2, 0x48, 0xd7, 0x00, 0xb9, 0xb1, 0xd2, 0x4c, 0xa3, 0x00, 0xb9, 0xa8,
+ 0x48, 0xba, 0xd2, 0xc0, 0x54, 0xc0, 0xc3, 0x25, 0xd6, 0x01, 0x5e, 0xd8,
+ 0x46, 0xd3, 0x79, 0xc0, 0x54, 0xd2, 0x50, 0x5c, 0x52, 0x40, 0x54, 0xe8,
+ 0x4c, 0x7e, 0xd8, 0xc0, 0x55, 0x3c, 0x48, 0xb4, 0x80, 0x40, 0x55, 0x52,
+ 0xcc, 0x8b, 0x05, 0x01, 0x30, 0x59, 0x45, 0x74, 0xd9, 0xc0, 0x55, 0x86,
+ 0x42, 0x00, 0x29, 0x40, 0x55, 0x92, 0x0b, 0xc0, 0x55, 0x9f, 0xd6, 0x31,
+ 0xae, 0x0f, 0xae, 0xd8, 0x49, 0x07, 0xbb, 0xc0, 0x55, 0xab, 0xd1, 0x54,
+ 0x42, 0x01, 0x1e, 0x53, 0x00, 0x55, 0xb7, 0xd3, 0x45, 0xd2, 0x01, 0x1e,
+ 0x4a, 0x00, 0x55, 0xbd, 0xcb, 0x91, 0x0a, 0x01, 0x12, 0xe1, 0xc3, 0x1e,
+ 0x36, 0x00, 0x03, 0xf9, 0xcb, 0x91, 0x57, 0x0f, 0xb4, 0xd0, 0xca, 0x9a,
+ 0x90, 0x01, 0x08, 0x49, 0xc7, 0xc5, 0xec, 0x01, 0x08, 0x19, 0xc4, 0x00,
+ 0xba, 0x00, 0x05, 0x80, 0xc4, 0x00, 0x87, 0x0f, 0xb1, 0xa9, 0xc6, 0x00,
+ 0x91, 0x0f, 0xa5, 0x58, 0x48, 0x89, 0xf5, 0xc0, 0x55, 0xc3, 0x43, 0x09,
+ 0x9a, 0x40, 0x55, 0xdc, 0x49, 0xb3, 0x95, 0xc0, 0x56, 0x0c, 0xcb, 0x96,
+ 0x27, 0x01, 0x35, 0x71, 0x0b, 0x40, 0x56, 0x3e, 0x51, 0x53, 0xfe, 0xc0,
+ 0x56, 0x50, 0x53, 0x43, 0x4c, 0x40, 0x56, 0x62, 0x03, 0xc0, 0x56, 0x6e,
+ 0xdb, 0x16, 0xbf, 0x01, 0x1c, 0x11, 0xcb, 0x8f, 0x5d, 0x0f, 0xcb, 0xc0,
+ 0x46, 0x8d, 0x69, 0xc0, 0x56, 0x7a, 0xce, 0x6c, 0x28, 0x0f, 0xb7, 0x90,
+ 0xd7, 0x2a, 0xde, 0x01, 0x1c, 0x99, 0xc3, 0x01, 0xfd, 0x0f, 0x9d, 0x78,
+ 0x0f, 0xc0, 0x56, 0x92, 0xc6, 0x20, 0xab, 0x00, 0x05, 0x40, 0x12, 0xc0,
+ 0x56, 0x9e, 0xca, 0xa6, 0xa2, 0x0f, 0xc9, 0x21, 0xcc, 0x81, 0x45, 0x0f,
+ 0xa1, 0x50, 0xdc, 0x12, 0x55, 0x01, 0x3c, 0xd9, 0xc9, 0x9a, 0x28, 0x01,
+ 0x05, 0x79, 0xc3, 0x1c, 0xd9, 0x0f, 0xa0, 0x4a, 0x00, 0x56, 0xaa, 0x44,
+ 0x01, 0x4a, 0xc0, 0x56, 0xb0, 0x00, 0xc0, 0x56, 0xbc, 0x4a, 0x01, 0xa9,
+ 0x40, 0x56, 0xd7, 0x4a, 0x01, 0x68, 0xc0, 0x56, 0xe9, 0x48, 0x00, 0x5f,
+ 0x40, 0x56, 0xf5, 0x43, 0x00, 0x5b, 0xc0, 0x57, 0x01, 0xc5, 0xd8, 0xb7,
+ 0x0f, 0x9b, 0x48, 0x44, 0x00, 0xde, 0xc0, 0x57, 0x0f, 0x00, 0x40, 0x57,
+ 0x35, 0x43, 0x06, 0x64, 0xc0, 0x57, 0x4d, 0xc5, 0x11, 0x55, 0x0f, 0xa1,
+ 0xb0, 0x4b, 0x97, 0x24, 0xc0, 0x57, 0x65, 0xc7, 0xb7, 0x72, 0x01, 0x14,
+ 0x0b, 0x00, 0x57, 0x74, 0x42, 0x05, 0xc0, 0xc0, 0x57, 0x7a, 0xc5, 0xd4,
+ 0xfc, 0x01, 0x15, 0x71, 0xc6, 0x07, 0xb0, 0x01, 0x11, 0x22, 0x00, 0x57,
+ 0x89, 0x46, 0x00, 0x8b, 0x40, 0x57, 0x8f, 0xc4, 0xe4, 0x07, 0x0f, 0xa1,
+ 0x61, 0xc8, 0x02, 0xe7, 0x00, 0x01, 0x20, 0xdd, 0x11, 0xe2, 0x0d, 0xe4,
+ 0xf9, 0xcb, 0x99, 0x81, 0x0d, 0xe4, 0xf1, 0xd5, 0x33, 0xfb, 0x0d, 0xe4,
+ 0xe9, 0xd1, 0x4f, 0xcf, 0x0d, 0xe4, 0xe1, 0x46, 0xd2, 0x95, 0xc0, 0x57,
+ 0x9e, 0x47, 0x02, 0x0e, 0x40, 0x57, 0xba, 0x43, 0x00, 0xa8, 0xc0, 0x58,
+ 0x57, 0x00, 0x40, 0x58, 0x69, 0xc4, 0x01, 0xe3, 0x01, 0x2c, 0x99, 0xc9,
+ 0xb4, 0xd0, 0x0f, 0xab, 0xb0, 0x00, 0x40, 0x58, 0x75, 0xc3, 0x3e, 0xe1,
+ 0x0f, 0xa4, 0x19, 0xc2, 0x0f, 0x7b, 0x0f, 0x9b, 0x08, 0x44, 0x01, 0xd6,
+ 0xc0, 0x58, 0x81, 0xcd, 0x78, 0x71, 0x0f, 0xa4, 0xf0, 0x42, 0x01, 0x1b,
+ 0xc0, 0x58, 0x8b, 0xc5, 0xd7, 0x7c, 0x01, 0x08, 0xf8, 0x43, 0x1f, 0x3d,
+ 0xc0, 0x58, 0x97, 0xcd, 0x5e, 0x85, 0x00, 0x00, 0xf1, 0xd1, 0x51, 0x34,
+ 0x0f, 0xb4, 0xc9, 0xc4, 0xe2, 0xeb, 0x0f, 0xcf, 0xf0, 0xc6, 0x00, 0x91,
+ 0x01, 0x1e, 0x71, 0xc4, 0x00, 0x49, 0x01, 0x5c, 0x81, 0xc5, 0x00, 0x2c,
+ 0x01, 0x5c, 0x88, 0xc5, 0xd7, 0x1d, 0x0f, 0x9a, 0x71, 0xcd, 0x7c, 0xf6,
+ 0x0f, 0xcf, 0x38, 0x5d, 0x10, 0x69, 0xc0, 0x58, 0xa3, 0xcb, 0x8f, 0x1b,
+ 0x00, 0x05, 0x70, 0xcc, 0x45, 0x8d, 0x05, 0x4a, 0xf9, 0x18, 0xc0, 0x59,
+ 0x0b, 0x4f, 0x30, 0x90, 0xc0, 0x59, 0x17, 0x47, 0x02, 0x0e, 0x40, 0x59,
+ 0x26, 0x00, 0xc0, 0x59, 0x86, 0x46, 0x01, 0x4a, 0xc0, 0x59, 0xd5, 0x02,
+ 0xc0, 0x5a, 0x1c, 0xd5, 0x33, 0x29, 0x01, 0x51, 0xe8, 0x00, 0xc0, 0x5a,
+ 0x38, 0xc8, 0xbf, 0xa2, 0x0f, 0xab, 0x69, 0xc9, 0xb0, 0xaa, 0x0f, 0xd4,
+ 0x80, 0x47, 0x02, 0x5b, 0x40, 0x5a, 0x5c, 0xc4, 0x15, 0x2e, 0x0f, 0x9a,
+ 0xc9, 0xc7, 0xc1, 0x0e, 0x0f, 0x9a, 0xc0, 0xd0, 0x5f, 0xb2, 0x01, 0x49,
+ 0x59, 0xd0, 0x3c, 0x90, 0x01, 0x49, 0x80, 0xc2, 0x00, 0x3d, 0x0f, 0xb4,
+ 0x00, 0xd9, 0x20, 0xda, 0x0f, 0xc9, 0x19, 0x07, 0xc0, 0x5a, 0x74, 0xc9,
+ 0xad, 0x38, 0x0f, 0xcf, 0xd8, 0x00, 0xc0, 0x5a, 0x80, 0x4e, 0x6e, 0x90,
+ 0x40, 0x5a, 0x8c, 0xd3, 0x1c, 0xa7, 0x01, 0x3b, 0x39, 0xd8, 0x25, 0x13,
+ 0x01, 0x3b, 0x29, 0xc9, 0xb1, 0xa6, 0x01, 0x09, 0xd1, 0xdd, 0x11, 0x8b,
+ 0x01, 0x5e, 0x69, 0xd7, 0x28, 0x71, 0x01, 0x5e, 0x78, 0x48, 0x56, 0x9a,
+ 0xc0, 0x5a, 0xaa, 0x15, 0xc0, 0x5a, 0xcf, 0xca, 0x9a, 0x06, 0x08, 0x0c,
+ 0x89, 0x06, 0xc0, 0x5a, 0xd9, 0xce, 0x74, 0x08, 0x08, 0x0c, 0xb9, 0xc7,
+ 0xc2, 0x3b, 0x08, 0x0c, 0xd1, 0xce, 0x6f, 0x70, 0x08, 0x0c, 0xd8, 0xc3,
+ 0x02, 0x10, 0x0f, 0x9f, 0xa8, 0x45, 0xdb, 0x3c, 0xc0, 0x5a, 0xeb, 0x44,
+ 0x0b, 0xe6, 0xc0, 0x5a, 0xf7, 0x90, 0x01, 0x36, 0x32, 0x00, 0x5b, 0x2b,
+ 0x91, 0x0f, 0xa7, 0xdb, 0x00, 0x5b, 0x31, 0xd1, 0x52, 0x77, 0x01, 0x1d,
+ 0xb8, 0xc2, 0x00, 0x44, 0x01, 0x11, 0xb0, 0x44, 0x00, 0x74, 0xc0, 0x5b,
+ 0x3d, 0xc4, 0xe3, 0x7b, 0x0f, 0xcc, 0xe8, 0xc5, 0x11, 0x55, 0x0f, 0xa1,
+ 0x80, 0x49, 0x53, 0xa9, 0xc0, 0x5b, 0x49, 0x47, 0x34, 0x2f, 0xc0, 0x5b,
+ 0x55, 0x46, 0x09, 0x97, 0x40, 0x5b, 0x73, 0x43, 0x00, 0xed, 0xc0, 0x5b,
+ 0x91, 0x10, 0x40, 0x5b, 0xbb, 0xc9, 0xb0, 0xe0, 0x01, 0x5f, 0x99, 0xc6,
+ 0xbc, 0xf4, 0x01, 0x5f, 0xa1, 0xc8, 0xbd, 0xb2, 0x01, 0x5f, 0xa9, 0xc8,
+ 0xbc, 0xf2, 0x01, 0x5f, 0xb1, 0xc8, 0xbb, 0xca, 0x01, 0x5f, 0xb9, 0xc9,
+ 0xb3, 0xcb, 0x01, 0x5f, 0xc0, 0x9e, 0x07, 0xf0, 0x03, 0x00, 0x5b, 0xc7,
+ 0x9f, 0x07, 0xf0, 0x0b, 0x00, 0x5c, 0x0d, 0xa6, 0x07, 0xf0, 0x43, 0x00,
+ 0x5c, 0x47, 0xa5, 0x07, 0xf0, 0x3b, 0x00, 0x5c, 0x6f, 0xa4, 0x07, 0xf0,
+ 0x33, 0x00, 0x5c, 0x97, 0xa3, 0x07, 0xf0, 0x2b, 0x00, 0x5c, 0xbf, 0xa2,
+ 0x07, 0xf0, 0x23, 0x00, 0x5c, 0xe7, 0xa1, 0x07, 0xf0, 0x1b, 0x00, 0x5d,
+ 0x0f, 0xa0, 0x07, 0xf0, 0x12, 0x00, 0x5d, 0x37, 0x42, 0x00, 0x91, 0xc0,
+ 0x5d, 0x5f, 0xc5, 0x0a, 0x8a, 0x05, 0x30, 0x69, 0xc9, 0x11, 0xf6, 0x05,
+ 0x30, 0x71, 0xcd, 0x2c, 0xb2, 0x05, 0x30, 0x79, 0x46, 0x09, 0x97, 0x40,
+ 0x5d, 0x6b, 0x46, 0x05, 0x87, 0xc0, 0x5d, 0x8f, 0x42, 0x00, 0x36, 0xc0,
+ 0x5d, 0xd2, 0xc5, 0xda, 0xdd, 0x01, 0x09, 0x18, 0x45, 0x00, 0xba, 0xc0,
+ 0x5d, 0xe4, 0x45, 0x2b, 0x5f, 0x40, 0x5e, 0x22, 0x5f, 0x0c, 0x84, 0xc0,
+ 0x5e, 0x56, 0xcc, 0x82, 0x7d, 0x01, 0x18, 0xb8, 0xc8, 0xb7, 0x0a, 0x0f,
+ 0xa7, 0xe1, 0x00, 0x40, 0x5e, 0x62, 0x4f, 0x0b, 0x17, 0xc0, 0x5e, 0x6e,
+ 0x4d, 0x29, 0xb9, 0x40, 0x5e, 0xee, 0xcc, 0x81, 0xc9, 0x01, 0x11, 0x81,
+ 0xc7, 0xc2, 0x0a, 0x0f, 0x9e, 0x81, 0xc4, 0xe3, 0x0b, 0x0f, 0x98, 0x58,
+ 0xcb, 0x96, 0x69, 0x01, 0x0c, 0x49, 0xcd, 0x3f, 0xe2, 0x01, 0x0a, 0xf1,
+ 0x08, 0xc0, 0x5f, 0x6e, 0x16, 0xc0, 0x5f, 0x7a, 0x44, 0x05, 0x14, 0x40,
+ 0x5f, 0x86, 0x00, 0xc0, 0x5f, 0xac, 0x46, 0xcc, 0xa1, 0xc0, 0x5f, 0xf6,
+ 0x45, 0xdd, 0x6c, 0x40, 0x60, 0x02, 0xc4, 0x0d, 0x13, 0x0e, 0x9b, 0xc1,
+ 0xc3, 0x05, 0x14, 0x0e, 0x9b, 0xb8, 0x09, 0xc0, 0x60, 0x14, 0xca, 0xa4,
+ 0xb8, 0x0f, 0x9c, 0x58, 0x43, 0x5c, 0x89, 0xc0, 0x60, 0x26, 0xc3, 0x04,
+ 0x85, 0x0f, 0xd6, 0xa0, 0xc5, 0xc4, 0xa4, 0x01, 0x38, 0x39, 0xc9, 0xb1,
+ 0xf7, 0x0f, 0xad, 0x68, 0x43, 0x02, 0x31, 0xc0, 0x60, 0x7a, 0xc8, 0xba,
+ 0xa2, 0x0f, 0xcb, 0x08, 0x45, 0x92, 0x80, 0xc0, 0x60, 0x98, 0x4a, 0xa7,
+ 0xa6, 0xc0, 0x60, 0xbc, 0x45, 0xd8, 0xb2, 0x40, 0x61, 0x22, 0x0d, 0xc0,
+ 0x61, 0x40, 0x44, 0x06, 0xb2, 0xc0, 0x61, 0x4c, 0xc3, 0x0f, 0xed, 0x0f,
+ 0xa1, 0x10, 0x00, 0xc0, 0x61, 0x7a, 0x02, 0x40, 0x61, 0xa4, 0x10, 0xc0,
+ 0x61, 0xb6, 0xce, 0x72, 0xfe, 0x0f, 0xca, 0x48, 0xcc, 0x84, 0x2d, 0x0f,
+ 0xa5, 0x69, 0xc9, 0xa8, 0xc1, 0x0f, 0xd3, 0xa0, 0x44, 0x16, 0xcb, 0xc0,
+ 0x61, 0xc0, 0x44, 0x83, 0x63, 0x40, 0x61, 0xcc, 0x07, 0xc0, 0x61, 0xd8,
+ 0x42, 0x00, 0xa2, 0x40, 0x61, 0xe2, 0x44, 0x0d, 0xde, 0xc0, 0x61, 0xee,
+ 0x42, 0x02, 0x32, 0x40, 0x62, 0x12, 0xd8, 0x22, 0xa3, 0x0f, 0xa8, 0xe9,
+ 0xd6, 0x08, 0x88, 0x01, 0x1f, 0x01, 0xcd, 0x00, 0x32, 0x01, 0x1e, 0xf1,
+ 0xcb, 0x1a, 0x50, 0x01, 0x1e, 0xe1, 0xce, 0x25, 0xad, 0x01, 0x1d, 0xa1,
+ 0x42, 0x00, 0xd0, 0xc0, 0x62, 0x1c, 0x46, 0x00, 0x2c, 0xc0, 0x62, 0x26,
+ 0x45, 0x00, 0x49, 0xc0, 0x62, 0x30, 0x44, 0x13, 0x1d, 0x40, 0x62, 0x3a,
+ 0x42, 0x01, 0x7c, 0xc0, 0x62, 0x49, 0xc9, 0xb0, 0xce, 0x01, 0x19, 0x80,
+ 0x56, 0x30, 0x22, 0xc0, 0x62, 0x55, 0xd6, 0x2c, 0x70, 0x0f, 0x89, 0x50,
+ 0xc2, 0x00, 0x8e, 0x0f, 0xcd, 0xbb, 0x00, 0x62, 0x67, 0xc4, 0x7f, 0x35,
+ 0x0f, 0xcf, 0x80, 0x8f, 0x0f, 0xb4, 0x53, 0x00, 0x62, 0x6d, 0xc2, 0x00,
+ 0x74, 0x0f, 0xb4, 0x31, 0xcc, 0x84, 0xd5, 0x01, 0x09, 0x11, 0x05, 0xc0,
+ 0x62, 0x73, 0x42, 0x05, 0x26, 0x40, 0x62, 0x7f, 0x43, 0x01, 0x95, 0xc0,
+ 0x62, 0x8b, 0x49, 0x89, 0xf4, 0xc0, 0x62, 0x97, 0x44, 0x0b, 0x26, 0xc0,
+ 0x62, 0xbf, 0xc5, 0x33, 0x24, 0x01, 0x02, 0xe9, 0xcb, 0x95, 0x1f, 0x0f,
+ 0xa9, 0x88, 0x87, 0x01, 0x15, 0x43, 0x00, 0x62, 0xf3, 0xc4, 0xe3, 0xd3,
+ 0x0f, 0x9d, 0xd0, 0x12, 0xc0, 0x62, 0xf9, 0xc2, 0x02, 0xa7, 0x0f, 0xce,
+ 0x62, 0x00, 0x63, 0x05, 0x08, 0xc0, 0x63, 0x0b, 0x0e, 0xc0, 0x63, 0x21,
+ 0x06, 0xc0, 0x63, 0x2b, 0x11, 0xc0, 0x63, 0x45, 0x05, 0xc0, 0x63, 0x51,
+ 0x03, 0xc0, 0x63, 0x67, 0x0a, 0xc0, 0x63, 0x7f, 0x15, 0xc0, 0x63, 0x8b,
+ 0x07, 0xc0, 0x63, 0x9b, 0x42, 0x00, 0x74, 0xc0, 0x63, 0xb7, 0x42, 0x01,
+ 0x4a, 0xc0, 0x63, 0xc3, 0x0f, 0xc0, 0x63, 0xcf, 0x09, 0xc0, 0x63, 0xe1,
+ 0xc5, 0xdb, 0xb9, 0x0e, 0x99, 0xd9, 0xd3, 0x40, 0x2e, 0x0e, 0x99, 0xb9,
+ 0x14, 0xc0, 0x63, 0xfc, 0x12, 0xc0, 0x64, 0x06, 0x0d, 0xc0, 0x64, 0x16,
+ 0x04, 0xc0, 0x64, 0x22, 0xc3, 0x85, 0x26, 0x0e, 0x98, 0xe9, 0xcc, 0x8a,
+ 0xb1, 0x0e, 0x98, 0x88, 0x14, 0xc0, 0x64, 0x34, 0xd2, 0x4b, 0x17, 0x0f,
+ 0x9b, 0xa9, 0xc3, 0x3a, 0x48, 0x0f, 0xd6, 0xb0, 0x07, 0xc0, 0x64, 0x40,
+ 0x44, 0xcd, 0xca, 0x40, 0x64, 0x52, 0x96, 0x01, 0x37, 0xd1, 0xc7, 0x80,
+ 0xa2, 0x01, 0x05, 0xc1, 0xd4, 0x3b, 0x60, 0x0f, 0x9d, 0xf0, 0xd7, 0x2a,
+ 0x82, 0x01, 0x3a, 0x29, 0xc2, 0x00, 0x29, 0x0f, 0xa0, 0x2a, 0x00, 0x64,
+ 0x76, 0xc7, 0x17, 0x6b, 0x01, 0x1f, 0x91, 0x47, 0x50, 0x5d, 0x40, 0x64,
+ 0x7c, 0x00, 0x40, 0x64, 0x88, 0x45, 0xd8, 0x17, 0xc0, 0x64, 0x97, 0x4b,
+ 0x96, 0x8a, 0xc0, 0x64, 0xbf, 0xc7, 0x11, 0x53, 0x0f, 0xb1, 0x58, 0x42,
+ 0x00, 0x6f, 0x40, 0x64, 0xcb, 0x15, 0xc0, 0x64, 0xd1, 0x45, 0x01, 0xc3,
+ 0xc0, 0x64, 0xe1, 0x0e, 0xc0, 0x65, 0x2d, 0x52, 0x47, 0xb7, 0xc0, 0x65,
+ 0x39, 0x46, 0x09, 0x97, 0xc0, 0x65, 0x43, 0x4b, 0x6f, 0xc7, 0xc0, 0x65,
+ 0x6d, 0xc9, 0xac, 0x96, 0x00, 0x7d, 0xf3, 0x00, 0x65, 0x9e, 0x52, 0x4c,
+ 0x13, 0x40, 0x65, 0xa4, 0x47, 0x02, 0x0e, 0xc0, 0x65, 0xbc, 0x42, 0x00,
+ 0xa2, 0xc0, 0x65, 0xce, 0xce, 0x6c, 0x6e, 0x01, 0x6b, 0x81, 0xd0, 0x57,
+ 0xe2, 0x01, 0x6b, 0xf8, 0x00, 0xc0, 0x65, 0xd4, 0xc8, 0xbc, 0x32, 0x01,
+ 0x71, 0xd0, 0xd3, 0x46, 0x31, 0x0f, 0xdd, 0x81, 0x4a, 0x03, 0x3d, 0x40,
+ 0x66, 0x16, 0x00, 0xc0, 0x66, 0x28, 0x47, 0x09, 0x90, 0x40, 0x66, 0x8f,
+ 0x47, 0x0a, 0xda, 0xc0, 0x66, 0xa7, 0xc9, 0xb4, 0xbe, 0x00, 0x2c, 0x79,
+ 0xc6, 0x59, 0x92, 0x00, 0x2c, 0x51, 0xc9, 0x11, 0xf6, 0x00, 0x2c, 0x49,
+ 0x03, 0xc0, 0x66, 0xb3, 0xcd, 0x2c, 0xb2, 0x00, 0x2a, 0xf1, 0x05, 0xc0,
+ 0x66, 0xbf, 0x07, 0xc0, 0x66, 0xcb, 0xde, 0x0f, 0x5e, 0x00, 0x2a, 0xc8,
+ 0xca, 0xa6, 0x84, 0x0f, 0x9d, 0x41, 0xcd, 0x75, 0xc0, 0x0f, 0xb4, 0xd8,
+ 0xce, 0x72, 0x9c, 0x0f, 0x9c, 0xf9, 0xc4, 0x7a, 0xfe, 0x01, 0x5f, 0x28,
+ 0x05, 0xc0, 0x66, 0xd7, 0x4d, 0x29, 0xb9, 0xc0, 0x66, 0xe3, 0xcf, 0x6b,
+ 0x52, 0x0f, 0x4a, 0x21, 0xd0, 0x58, 0x92, 0x0f, 0x4a, 0x29, 0x47, 0x63,
+ 0xff, 0xc0, 0x67, 0x63, 0xc5, 0x08, 0x09, 0x0f, 0x4a, 0x39, 0x10, 0xc0,
+ 0x67, 0x6f, 0x46, 0x09, 0x97, 0xc0, 0x67, 0x7b, 0x48, 0x10, 0xb4, 0x40,
+ 0x67, 0x9f, 0x04, 0xc0, 0x67, 0xab, 0x05, 0xc0, 0x67, 0xcc, 0x06, 0xc0,
+ 0x67, 0xe0, 0x12, 0xc0, 0x67, 0xec, 0x16, 0xc0, 0x68, 0x00, 0x14, 0xc0,
+ 0x68, 0x1b, 0x18, 0xc0, 0x68, 0x28, 0x15, 0xc0, 0x68, 0x32, 0x03, 0xc0,
+ 0x68, 0x58, 0x0e, 0xc0, 0x68, 0x86, 0x42, 0x00, 0xec, 0xc0, 0x68, 0x92,
+ 0x0f, 0xc0, 0x68, 0x9e, 0x42, 0x01, 0x4a, 0xc0, 0x68, 0xb3, 0xc5, 0x61,
+ 0xc0, 0x0f, 0xb8, 0x19, 0x43, 0x03, 0xd3, 0xc0, 0x68, 0xbd, 0xc4, 0x83,
+ 0x39, 0x0f, 0xb8, 0x11, 0x09, 0xc0, 0x68, 0xc9, 0x44, 0x1a, 0x05, 0xc0,
+ 0x68, 0xd5, 0xc3, 0xdd, 0x05, 0x0f, 0xba, 0x31, 0xc5, 0xdd, 0xe4, 0x0f,
+ 0xba, 0xa9, 0x0a, 0x40, 0x68, 0xe4, 0xda, 0x1a, 0xcc, 0x01, 0x36, 0xa9,
+ 0xce, 0x72, 0x72, 0x01, 0x1c, 0x38, 0xc4, 0xd9, 0x17, 0x01, 0x34, 0xb9,
+ 0xc8, 0x8d, 0x71, 0x01, 0x09, 0xa9, 0xc2, 0x00, 0x61, 0x00, 0x00, 0x38,
+ 0xce, 0x73, 0xde, 0x01, 0x19, 0x71, 0xc8, 0x07, 0x5f, 0x01, 0x12, 0x60,
+ 0xcb, 0x23, 0xa0, 0x01, 0x12, 0x51, 0xc2, 0x00, 0xf1, 0x01, 0x12, 0x42,
+ 0x00, 0x68, 0xee, 0xc9, 0xae, 0x07, 0x0f, 0xb7, 0xd1, 0x0f, 0x40, 0x68,
+ 0xf4, 0xc8, 0xbf, 0xca, 0x0f, 0xb7, 0x61, 0xc9, 0xb1, 0x1f, 0x0f, 0xb7,
+ 0x58, 0x51, 0x52, 0x22, 0xc0, 0x69, 0x00, 0xcb, 0x99, 0xa2, 0x0f, 0xd6,
+ 0x00, 0x4b, 0x05, 0xf7, 0xc0, 0x69, 0x18, 0xce, 0x6f, 0x54, 0x0f, 0xa7,
+ 0xb0, 0xc2, 0x00, 0x49, 0x01, 0x11, 0x03, 0x00, 0x69, 0x38, 0xca, 0x9d,
+ 0x24, 0x01, 0x09, 0x59, 0xc9, 0x25, 0xca, 0x0f, 0xa5, 0x11, 0xc7, 0xca,
+ 0x84, 0x0f, 0xb1, 0x01, 0xcb, 0x90, 0x7b, 0x0f, 0xb1, 0x38, 0x14, 0xc0,
+ 0x69, 0x3e, 0x44, 0x0b, 0x02, 0xc0, 0x69, 0x4a, 0xcc, 0x8c, 0x01, 0x0f,
+ 0xb1, 0x90, 0xcb, 0x8b, 0x06, 0x01, 0x30, 0x51, 0xc9, 0xa8, 0x43, 0x08,
+ 0x0c, 0xe0, 0x0e, 0xc0, 0x69, 0x55, 0x10, 0xc0, 0x69, 0x5f, 0x06, 0xc0,
+ 0x69, 0x75, 0x16, 0xc0, 0x69, 0x83, 0x05, 0xc0, 0x69, 0x91, 0x83, 0x08,
+ 0xb8, 0x93, 0x00, 0x69, 0x9b, 0x0c, 0xc0, 0x69, 0xa1, 0x04, 0xc0, 0x69,
+ 0xab, 0x09, 0xc0, 0x69, 0xb5, 0xc2, 0x00, 0xd0, 0x08, 0xb8, 0x89, 0xc2,
+ 0x0d, 0xf6, 0x08, 0xb8, 0x79, 0xc2, 0x00, 0x39, 0x08, 0xb8, 0x69, 0xc2,
+ 0x01, 0xc3, 0x08, 0xb8, 0x49, 0x12, 0xc0, 0x69, 0xbf, 0x0d, 0x40, 0x69,
+ 0xc9, 0xc8, 0x91, 0x9a, 0x08, 0xb9, 0xf9, 0x44, 0x00, 0xbb, 0x40, 0x69,
+ 0xd3, 0xc5, 0x28, 0xee, 0x08, 0xb9, 0xd9, 0xc2, 0x00, 0xc4, 0x08, 0xb9,
+ 0xd0, 0xc4, 0x26, 0x78, 0x08, 0xb9, 0xc9, 0xc5, 0x06, 0xdb, 0x08, 0xb9,
+ 0xc1, 0x15, 0xc0, 0x69, 0xe3, 0x08, 0xc0, 0x69, 0xef, 0x16, 0xc0, 0x69,
+ 0xfb, 0xc3, 0x05, 0x14, 0x08, 0xb9, 0x89, 0xc4, 0x15, 0xe7, 0x08, 0xb9,
+ 0x80, 0x83, 0x08, 0xb9, 0x03, 0x00, 0x6a, 0x07, 0x91, 0x08, 0xb9, 0x41,
+ 0x87, 0x08, 0xb9, 0x31, 0x97, 0x08, 0xb9, 0x23, 0x00, 0x6a, 0x17, 0x8b,
+ 0x08, 0xb9, 0x12, 0x00, 0x6a, 0x1b, 0x0e, 0xc0, 0x6a, 0x1f, 0xc2, 0x00,
+ 0x39, 0x08, 0xb8, 0xf0, 0xc6, 0x6a, 0xfb, 0x01, 0x08, 0x01, 0xc5, 0xd6,
+ 0xdc, 0x0f, 0xd4, 0xb8, 0xd3, 0x46, 0x0b, 0x01, 0x03, 0x69, 0xd2, 0x4d,
+ 0x69, 0x01, 0x03, 0x58, 0xc4, 0x01, 0x96, 0x01, 0x4c, 0xf9, 0xc5, 0x09,
+ 0x02, 0x00, 0x05, 0xa0, 0x42, 0x00, 0xe3, 0xc0, 0x6a, 0x29, 0xc5, 0xde,
+ 0x3e, 0x01, 0x1b, 0xd3, 0x00, 0x6a, 0x38, 0xc5, 0x9b, 0xd5, 0x01, 0x1b,
+ 0xab, 0x00, 0x6a, 0x3e, 0x0b, 0xc0, 0x6a, 0x44, 0xd0, 0x5c, 0xa2, 0x01,
+ 0x1b, 0xb9, 0x14, 0xc0, 0x6a, 0x53, 0x42, 0x02, 0xae, 0xc0, 0x6a, 0x5f,
+ 0x06, 0xc0, 0x6a, 0x69, 0x15, 0xc0, 0x6a, 0x7b, 0xc5, 0xd7, 0x8b, 0x01,
+ 0x1b, 0x61, 0x05, 0xc0, 0x6a, 0x91, 0xd6, 0x31, 0x14, 0x01, 0x1b, 0x49,
+ 0xcf, 0x64, 0x86, 0x01, 0x1b, 0x41, 0x44, 0x00, 0x49, 0xc0, 0x6a, 0x9d,
+ 0x44, 0xe1, 0x43, 0xc0, 0x6a, 0xa9, 0xcd, 0x7d, 0xed, 0x01, 0x1a, 0x00,
+ 0x42, 0x00, 0x79, 0xc0, 0x6a, 0xb5, 0xd8, 0x23, 0x63, 0x00, 0x04, 0xf8,
+ 0xc7, 0x2d, 0x87, 0x00, 0x01, 0x39, 0xc4, 0x66, 0x29, 0x01, 0x5f, 0x20,
+ 0xd1, 0x48, 0x11, 0x08, 0x59, 0xc9, 0x47, 0x02, 0x0e, 0x40, 0x6a, 0xc1,
+ 0xc4, 0x3d, 0xd8, 0x0f, 0x9f, 0xd1, 0xc6, 0x36, 0x23, 0x00, 0x01, 0x30,
+ 0xca, 0xa7, 0xc4, 0x08, 0x08, 0x11, 0x47, 0x34, 0x2f, 0xc0, 0x6b, 0x42,
+ 0x19, 0xc0, 0x6b, 0x69, 0xd9, 0x20, 0xc1, 0x08, 0x09, 0xe1, 0xdc, 0x14,
+ 0xbd, 0x08, 0x09, 0xe9, 0x48, 0x14, 0xc4, 0x40, 0x6b, 0x75, 0x4a, 0x9f,
+ 0x0e, 0xc0, 0x6b, 0x81, 0xc9, 0xb0, 0x23, 0x0f, 0xca, 0x50, 0xd4, 0x3c,
+ 0xb4, 0x0f, 0xbd, 0x89, 0xcb, 0x58, 0xc7, 0x0f, 0xbd, 0x21, 0x46, 0x01,
+ 0xfc, 0xc0, 0x6b, 0xa3, 0x15, 0xc0, 0x6b, 0xaf, 0xd5, 0x34, 0x8e, 0x0f,
+ 0xbd, 0xe8, 0x43, 0x00, 0x7a, 0xc0, 0x6b, 0xbb, 0xd4, 0x3e, 0x30, 0x0f,
+ 0x9b, 0xf0, 0xc3, 0x1e, 0x19, 0x01, 0x16, 0x43, 0x00, 0x6b, 0xee, 0x0e,
+ 0xc0, 0x6b, 0xf4, 0xca, 0x9b, 0xc6, 0x0f, 0x9f, 0xc8, 0xc8, 0x2f, 0x03,
+ 0x0f, 0xb6, 0x48, 0x8d, 0x0f, 0xab, 0x73, 0x00, 0x6b, 0xfe, 0xc6, 0xc9,
+ 0xcf, 0x0f, 0xd4, 0x18, 0xcb, 0x95, 0xfb, 0x0f, 0x9c, 0xa8, 0x47, 0x02,
+ 0x0e, 0xc0, 0x6c, 0x0b, 0x4d, 0x7f, 0x25, 0x40, 0x6c, 0x95, 0x4b, 0x96,
+ 0x48, 0xc0, 0x6c, 0xa9, 0xc4, 0xae, 0x42, 0x0f, 0x99, 0xe1, 0xc5, 0xd9,
+ 0x98, 0x0f, 0xa1, 0x08, 0x42, 0x00, 0x3b, 0xc0, 0x6c, 0xd0, 0xc9, 0x95,
+ 0x84, 0x01, 0x21, 0x10, 0x00, 0xc0, 0x6c, 0xd8, 0xc7, 0xc6, 0xa2, 0x0f,
+ 0xd6, 0x80, 0xc2, 0x00, 0x81, 0x0f, 0xd4, 0xa9, 0x8d, 0x0f, 0x9f, 0x33,
+ 0x00, 0x6c, 0xe4, 0xc3, 0x09, 0xe5, 0x0f, 0x9a, 0x60, 0x0e, 0xc0, 0x6c,
+ 0xea, 0x46, 0x77, 0x20, 0x40, 0x6c, 0xfa, 0xc3, 0x00, 0x3c, 0x0f, 0xcf,
+ 0xd3, 0x00, 0x6d, 0x30, 0xc5, 0xdb, 0x46, 0x01, 0x35, 0xf1, 0x47, 0xc1,
+ 0x9a, 0x40, 0x6d, 0x36, 0xc3, 0x09, 0x3b, 0x0f, 0xcd, 0x09, 0xde, 0x0f,
+ 0xd6, 0x0f, 0x9f, 0xc0, 0x00, 0x40, 0x6d, 0x48, 0x47, 0x02, 0x0e, 0xc0,
+ 0x6d, 0x60, 0x42, 0x00, 0x99, 0xc0, 0x6d, 0xa5, 0xc7, 0xc0, 0x3c, 0x05,
+ 0x37, 0x91, 0xc9, 0x11, 0xf6, 0x05, 0x37, 0x99, 0xc9, 0xa8, 0x55, 0x05,
+ 0x37, 0xb1, 0xcd, 0x2c, 0xb2, 0x05, 0x37, 0xb8, 0x0d, 0xc0, 0x6d, 0xaf,
+ 0xcb, 0x93, 0x25, 0x0f, 0xa1, 0x59, 0xc2, 0x00, 0x45, 0x0f, 0xca, 0x98,
+ 0x43, 0x40, 0x85, 0xc0, 0x6d, 0xbd, 0xc4, 0xcd, 0x51, 0x0f, 0xa8, 0x59,
+ 0x8a, 0x0f, 0xb6, 0x02, 0x00, 0x6d, 0xd9, 0x00, 0xc0, 0x6d, 0xdf, 0xc8,
+ 0xbd, 0xc2, 0x0f, 0xa4, 0x40, 0xca, 0x9e, 0xc8, 0x0f, 0xb6, 0x21, 0xcb,
+ 0x90, 0xc8, 0x0f, 0xca, 0xb1, 0xc2, 0x05, 0x03, 0x0f, 0xcb, 0x78, 0xc9,
+ 0xb3, 0x0e, 0x01, 0x05, 0xf9, 0xc7, 0x82, 0x99, 0x0f, 0xd7, 0x30, 0xc5,
+ 0xd8, 0xc6, 0x0f, 0x9d, 0x89, 0xc6, 0xd3, 0x97, 0x0f, 0xcf, 0x10, 0xca,
+ 0xa0, 0x94, 0x0f, 0x9c, 0x11, 0x86, 0x0f, 0xa1, 0x30, 0xcf, 0x61, 0xd4,
+ 0x01, 0x4f, 0xc9, 0xc7, 0x27, 0x5d, 0x01, 0x4f, 0xc0, 0x87, 0x0f, 0xb5,
+ 0x91, 0xc3, 0x1d, 0xb1, 0x0f, 0xb5, 0xa0, 0xc3, 0x00, 0x5f, 0x0f, 0xcd,
+ 0x59, 0x44, 0x7c, 0x59, 0xc0, 0x6d, 0xeb, 0xca, 0x9d, 0xba, 0x0f, 0xa4,
+ 0x99, 0xd0, 0x57, 0x82, 0x0f, 0x9e, 0xb1, 0x14, 0xc0, 0x6e, 0x03, 0xc2,
+ 0x05, 0x26, 0x0f, 0xd6, 0xc0, 0xc9, 0xac, 0x45, 0x01, 0x19, 0x63, 0x00,
+ 0x6e, 0x0f, 0x45, 0xb1, 0x74, 0xc0, 0x6e, 0x15, 0x16, 0x40, 0x6e, 0x47,
+ 0x00, 0xc0, 0x6e, 0x53, 0xc8, 0xbd, 0xaa, 0x0f, 0xb6, 0x70, 0xc4, 0x0b,
+ 0xcb, 0x01, 0x13, 0x61, 0xc7, 0x00, 0x90, 0x01, 0x09, 0xb0, 0xc5, 0xb2,
+ 0x39, 0x0f, 0x9b, 0xd1, 0xc3, 0x0f, 0xed, 0x0f, 0xd5, 0x90, 0xc3, 0xe6,
+ 0x11, 0x0f, 0xcc, 0x58, 0xc5, 0x00, 0xef, 0x0f, 0xb4, 0x79, 0x16, 0x40,
+ 0x6e, 0x65, 0xc4, 0xdf, 0x87, 0x01, 0x2e, 0x71, 0xc2, 0x00, 0x3d, 0x01,
+ 0x01, 0x13, 0x00, 0x6e, 0x71, 0xc4, 0x2a, 0xcc, 0x0f, 0xab, 0x5a, 0x00,
+ 0x6e, 0x77, 0x46, 0x77, 0x20, 0x40, 0x6e, 0x7d, 0x4b, 0x6f, 0xc7, 0xc0,
+ 0x6e, 0x95, 0x47, 0x02, 0x0e, 0x40, 0x6e, 0x9d, 0xc4, 0x4c, 0x31, 0x0f,
+ 0xce, 0x59, 0x95, 0x0f, 0xd7, 0x38, 0x06, 0xc0, 0x6e, 0xfb, 0x42, 0x00,
+ 0x07, 0xc0, 0x6f, 0x07, 0xc2, 0x00, 0x3b, 0x0f, 0xcf, 0x88, 0x0b, 0xc0,
+ 0x6f, 0x11, 0x44, 0xdf, 0xf3, 0x40, 0x6f, 0x1b, 0x44, 0x9b, 0x5b, 0xc0,
+ 0x6f, 0x3b, 0xc8, 0xbf, 0x92, 0x0f, 0xc8, 0x71, 0xc5, 0xdd, 0x3f, 0x0f,
+ 0xcb, 0x31, 0xc2, 0x00, 0x7a, 0x0f, 0xcf, 0xc8, 0x03, 0xc0, 0x6f, 0x4d,
+ 0xc2, 0x00, 0x5f, 0x00, 0x16, 0xc0, 0x09, 0xc0, 0x6f, 0x5d, 0x0d, 0xc0,
+ 0x6f, 0x6f, 0x03, 0xc0, 0x6f, 0x92, 0x15, 0xc0, 0x6f, 0xa4, 0x06, 0xc0,
+ 0x6f, 0xc1, 0x1b, 0xc0, 0x6f, 0xd1, 0x08, 0xc0, 0x6f, 0xdb, 0x42, 0x11,
+ 0xee, 0xc0, 0x6f, 0xed, 0x0b, 0xc0, 0x6f, 0xff, 0x07, 0xc0, 0x70, 0x0f,
+ 0x0f, 0xc0, 0x70, 0x31, 0x16, 0xc0, 0x70, 0x3d, 0x0e, 0xc0, 0x70, 0x4f,
+ 0x11, 0xc0, 0x70, 0x59, 0x12, 0xc0, 0x70, 0x71, 0xcc, 0x87, 0x5d, 0x0e,
+ 0x83, 0x51, 0x42, 0x02, 0x41, 0xc0, 0x70, 0x87, 0xc4, 0xc6, 0xc9, 0x0e,
+ 0x82, 0x01, 0x14, 0x40, 0x70, 0x93, 0xc4, 0x26, 0x78, 0x08, 0xe3, 0x13,
+ 0x00, 0x70, 0x9f, 0xc5, 0x06, 0xdb, 0x08, 0xe3, 0x0b, 0x00, 0x70, 0xa5,
+ 0x15, 0xc0, 0x70, 0xa9, 0x08, 0xc0, 0x70, 0xbb, 0x16, 0xc0, 0x70, 0xc3,
+ 0xc3, 0x05, 0x14, 0x08, 0xe2, 0xd0, 0x45, 0x09, 0x98, 0xc0, 0x70, 0xd1,
+ 0xcb, 0x97, 0xf5, 0x08, 0xe2, 0x11, 0xc4, 0x19, 0x53, 0x08, 0xe2, 0x08,
+ 0x9f, 0x08, 0xe2, 0x29, 0x9e, 0x08, 0xe2, 0x20, 0x03, 0xc0, 0x70, 0xf5,
+ 0x42, 0x07, 0xb2, 0xc0, 0x71, 0x01, 0xcb, 0x1e, 0x89, 0x08, 0xe1, 0xe0,
+ 0x03, 0xc0, 0x71, 0x0d, 0x91, 0x08, 0xe1, 0xd1, 0x87, 0x08, 0xe1, 0xc1,
+ 0x48, 0xb2, 0x2d, 0xc0, 0x71, 0x19, 0x97, 0x08, 0xe1, 0x93, 0x00, 0x71,
+ 0x24, 0x8b, 0x08, 0xe1, 0x82, 0x00, 0x71, 0x28, 0xc2, 0x00, 0xd0, 0x08,
+ 0xe1, 0x71, 0x15, 0xc0, 0x71, 0x2c, 0x18, 0xc0, 0x71, 0x3c, 0xc2, 0x00,
+ 0xdb, 0x08, 0xe1, 0x49, 0xc2, 0x00, 0x39, 0x08, 0xe1, 0x41, 0xc2, 0x19,
+ 0x2c, 0x08, 0xe1, 0x39, 0xc2, 0x01, 0xc3, 0x08, 0xe1, 0x31, 0x04, 0xc0,
+ 0x71, 0x46, 0x12, 0xc0, 0x71, 0x50, 0x10, 0xc0, 0x71, 0x5a, 0x06, 0xc0,
+ 0x71, 0x70, 0x16, 0xc0, 0x71, 0x7e, 0x0c, 0xc0, 0x71, 0x8c, 0x05, 0xc0,
+ 0x71, 0x96, 0x09, 0xc0, 0x71, 0xa0, 0x0d, 0xc0, 0x71, 0xaa, 0x83, 0x08,
+ 0xe0, 0x03, 0x00, 0x71, 0xb4, 0x91, 0x08, 0xe0, 0x61, 0x87, 0x08, 0xe0,
+ 0x51, 0x97, 0x08, 0xe0, 0x23, 0x00, 0x71, 0xc0, 0x8b, 0x08, 0xe0, 0x12,
+ 0x00, 0x71, 0xc4, 0x43, 0x00, 0x29, 0xc0, 0x71, 0xc8, 0x00, 0x40, 0x71,
+ 0xf6, 0x45, 0x00, 0x2c, 0xc0, 0x72, 0x15, 0x44, 0x00, 0x49, 0xc0, 0x72,
+ 0x21, 0x06, 0x40, 0x72, 0x2b, 0xdb, 0x18, 0x6f, 0x01, 0x3f, 0x00, 0xc2,
+ 0x00, 0xbf, 0x01, 0x11, 0x43, 0x00, 0x72, 0x3d, 0xc3, 0x02, 0x9b, 0x01,
+ 0x11, 0x3a, 0x00, 0x72, 0x41, 0xcd, 0x7e, 0xa3, 0x0f, 0xa8, 0x79, 0x4a,
+ 0xa0, 0x1c, 0x40, 0x72, 0x47, 0xc6, 0x02, 0x0e, 0x0f, 0xa4, 0x61, 0xc5,
+ 0xd6, 0x05, 0x0f, 0x9f, 0x48, 0xca, 0x9b, 0x44, 0x0f, 0xcf, 0xa1, 0xc2,
+ 0x11, 0xa5, 0x0f, 0xd5, 0xb8, 0x00, 0xc0, 0x72, 0x53, 0x46, 0x01, 0x4a,
+ 0xc0, 0x72, 0xa2, 0x02, 0x40, 0x72, 0xe9, 0xc7, 0xc8, 0x3f, 0x0f, 0xcb,
+ 0x61, 0xd3, 0x45, 0x01, 0x0f, 0x9a, 0x18, 0xc4, 0x0b, 0x66, 0x0f, 0xa0,
+ 0x30, 0x4b, 0x37, 0x43, 0xc0, 0x73, 0x05, 0xd8, 0x24, 0xe3, 0x01, 0x16,
+ 0xd1, 0x45, 0x00, 0x8c, 0xc0, 0x73, 0x11, 0x11, 0xc0, 0x73, 0x23, 0x03,
+ 0xc0, 0x73, 0x2f, 0xc4, 0x00, 0xba, 0x00, 0x01, 0xe1, 0xcf, 0x69, 0x18,
+ 0x01, 0x55, 0x32, 0x00, 0x73, 0x3b, 0x47, 0x02, 0x0e, 0xc0, 0x73, 0x41,
+ 0x46, 0x09, 0x97, 0xc0, 0x73, 0x99, 0x4c, 0x11, 0xe2, 0xc0, 0x73, 0xbd,
+ 0x15, 0xc0, 0x73, 0xcd, 0x4f, 0x30, 0x90, 0xc0, 0x73, 0xd9, 0x4b, 0x6f,
+ 0xc7, 0x40, 0x73, 0xfb, 0x42, 0x00, 0x2f, 0xc0, 0x74, 0x17, 0xd6, 0x21,
+ 0x9d, 0x0f, 0xb3, 0x90, 0x47, 0x02, 0x0e, 0xc0, 0x74, 0x24, 0x4c, 0x11,
+ 0xe2, 0x40, 0x74, 0x9a, 0x07, 0xc0, 0x74, 0xa6, 0x0d, 0x40, 0x74, 0xb0,
+ 0x43, 0xb6, 0x2f, 0xc0, 0x74, 0xbc, 0xd3, 0x44, 0x1d, 0x01, 0x96, 0x78,
+ 0xc4, 0x1e, 0xf2, 0x0f, 0xa4, 0x20, 0xcf, 0x63, 0xe1, 0x08, 0x49, 0xf9,
+ 0x47, 0x02, 0x0e, 0x40, 0x74, 0xde, 0x83, 0x08, 0x14, 0x03, 0x00, 0x75,
+ 0x40, 0x87, 0x08, 0x14, 0x0b, 0x00, 0x75, 0x44, 0x84, 0x08, 0x14, 0x13,
+ 0x00, 0x75, 0x48, 0x89, 0x08, 0x14, 0x21, 0x86, 0x08, 0x14, 0x29, 0x8b,
+ 0x08, 0x14, 0x31, 0x99, 0x08, 0x14, 0x39, 0x9c, 0x08, 0x14, 0x41, 0x96,
+ 0x08, 0x14, 0xbb, 0x00, 0x75, 0x4c, 0x8c, 0x08, 0x14, 0x51, 0x8d, 0x08,
+ 0x14, 0x5b, 0x00, 0x75, 0x54, 0x93, 0x08, 0x14, 0x61, 0x8e, 0x08, 0x14,
+ 0x69, 0x8f, 0x08, 0x14, 0x73, 0x00, 0x75, 0x58, 0x90, 0x08, 0x14, 0x7b,
+ 0x00, 0x75, 0x5c, 0x97, 0x08, 0x14, 0x91, 0x92, 0x08, 0x14, 0x99, 0x94,
+ 0x08, 0x14, 0xa9, 0x95, 0x08, 0x14, 0xb1, 0x8a, 0x08, 0x14, 0xd9, 0x9a,
+ 0x08, 0x14, 0xe0, 0x42, 0x09, 0x3b, 0xc0, 0x75, 0x60, 0xc6, 0x8f, 0xfc,
+ 0x01, 0x05, 0xf0, 0x15, 0xc0, 0x75, 0x6d, 0x47, 0x02, 0x0e, 0xc0, 0x75,
+ 0x79, 0x05, 0xc0, 0x75, 0xc9, 0x52, 0x48, 0xc5, 0x40, 0x75, 0xd5, 0x00,
+ 0x40, 0x75, 0xeb, 0xc2, 0x05, 0x03, 0x0f, 0x9f, 0xb9, 0xc5, 0xd8, 0x71,
+ 0x0f, 0xcb, 0xe0, 0xc8, 0xbc, 0x7a, 0x0f, 0xa0, 0xf1, 0xc3, 0x01, 0xe5,
+ 0x0f, 0xd4, 0xe0, 0x47, 0x02, 0x0e, 0xc0, 0x75, 0xf7, 0xc8, 0x22, 0x83,
+ 0x00, 0x75, 0x79, 0x4b, 0x6f, 0xc7, 0xc0, 0x76, 0x4e, 0x15, 0xc0, 0x76,
+ 0x7b, 0xc5, 0xdc, 0x54, 0x00, 0x76, 0x31, 0x49, 0xb2, 0x63, 0xc0, 0x76,
+ 0x87, 0xd1, 0x52, 0xaa, 0x00, 0x76, 0x61, 0xc9, 0xae, 0x97, 0x00, 0x76,
+ 0x69, 0x46, 0x09, 0x97, 0xc0, 0x76, 0x97, 0x43, 0x60, 0xe8, 0x40, 0x76,
+ 0xbb, 0x46, 0x00, 0x2c, 0xc0, 0x76, 0xc7, 0x45, 0x00, 0x49, 0xc0, 0x76,
+ 0xef, 0x44, 0x02, 0x9b, 0xc0, 0x77, 0x0b, 0x45, 0x01, 0xce, 0xc0, 0x77,
+ 0x15, 0xce, 0x6b, 0x9c, 0x01, 0x38, 0x09, 0x44, 0x05, 0x14, 0xc0, 0x77,
+ 0x30, 0x16, 0xc0, 0x77, 0x3c, 0xd2, 0x4a, 0x75, 0x0f, 0xdc, 0x21, 0xd3,
+ 0x3f, 0xe2, 0x0f, 0xdc, 0x30, 0x46, 0x01, 0xfc, 0xc0, 0x77, 0x48, 0x16,
+ 0xc0, 0x77, 0x5a, 0x15, 0xc0, 0x77, 0x66, 0xd0, 0x58, 0x62, 0x0f, 0xc1,
+ 0xe9, 0xd1, 0x56, 0xd9, 0x0f, 0xc1, 0xa9, 0x03, 0xc0, 0x77, 0x72, 0xcf,
+ 0x61, 0x4d, 0x01, 0x3f, 0x81, 0x06, 0xc0, 0x77, 0x81, 0xcd, 0x7c, 0xa8,
+ 0x01, 0x0e, 0x41, 0x0a, 0xc0, 0x77, 0x8d, 0xc6, 0xca, 0xa3, 0x0f, 0xb3,
+ 0x69, 0x46, 0x04, 0x8f, 0x40, 0x77, 0x99, 0x46, 0x03, 0x13, 0xc0, 0x77,
+ 0xa5, 0x4e, 0x6c, 0xfa, 0xc0, 0x77, 0xb1, 0xcc, 0x4e, 0x35, 0x0f, 0xa9,
+ 0xd1, 0xd1, 0x56, 0x2f, 0x0f, 0xb7, 0x31, 0xc8, 0x2e, 0x20, 0x0f, 0xb7,
+ 0x38, 0xc4, 0x32, 0xbc, 0x01, 0x15, 0x2b, 0x00, 0x77, 0xbd, 0x45, 0x01,
+ 0xa2, 0xc0, 0x77, 0xc3, 0xd7, 0x27, 0xfe, 0x01, 0x17, 0x81, 0x45, 0x11,
+ 0x17, 0xc0, 0x77, 0xd2, 0xc9, 0xb2, 0xea, 0x01, 0x4b, 0xf1, 0x45, 0x01,
+ 0x5d, 0x40, 0x77, 0xf9, 0xc9, 0xb0, 0xd7, 0x0f, 0xcc, 0x21, 0xd7, 0x1f,
+ 0x33, 0x01, 0x33, 0x91, 0xc2, 0x00, 0x45, 0x01, 0x11, 0x53, 0x00, 0x78,
+ 0x05, 0x16, 0x40, 0x78, 0x09, 0xc8, 0x9c, 0xae, 0x01, 0x1c, 0x61, 0xc5,
+ 0xb9, 0x85, 0x01, 0x01, 0xf8, 0xc9, 0xac, 0x4e, 0x01, 0x37, 0x89, 0xcf,
+ 0x6a, 0x62, 0x01, 0x30, 0xa0, 0x03, 0xc0, 0x78, 0x15, 0xc4, 0x93, 0xa9,
+ 0x08, 0x1c, 0x09, 0x09, 0xc0, 0x78, 0x21, 0x0d, 0xc0, 0x78, 0x2d, 0x06,
+ 0xc0, 0x78, 0x39, 0xc2, 0x01, 0x23, 0x08, 0x1c, 0x2b, 0x00, 0x78, 0x45,
+ 0xc2, 0x02, 0xa0, 0x08, 0x1c, 0x31, 0x1c, 0xc0, 0x78, 0x4b, 0x16, 0xc0,
+ 0x78, 0x55, 0xc3, 0x4a, 0xb9, 0x08, 0x1c, 0x51, 0x15, 0xc0, 0x78, 0x65,
+ 0xc5, 0xdd, 0x99, 0x08, 0x1c, 0x69, 0xc3, 0x00, 0x4e, 0x08, 0x1c, 0x71,
+ 0xc3, 0x20, 0x18, 0x08, 0x1c, 0x81, 0xc2, 0x05, 0x1c, 0x08, 0x1c, 0xa1,
+ 0xc4, 0xe4, 0x97, 0x08, 0x1c, 0xb1, 0xc5, 0xd5, 0xec, 0x08, 0x1c, 0xb9,
+ 0x8b, 0x08, 0x1c, 0xd9, 0x97, 0x08, 0x1c, 0xe0, 0x43, 0x11, 0x3c, 0xc0,
+ 0x78, 0x75, 0x06, 0xc0, 0x78, 0xd1, 0x14, 0x40, 0x78, 0xe0, 0xc7, 0xc9,
+ 0xab, 0x0f, 0xb4, 0x09, 0x0f, 0xc0, 0x78, 0xec, 0xd7, 0x26, 0x8e, 0x01,
+ 0x5f, 0xf8, 0x14, 0xc0, 0x78, 0xf8, 0x0a, 0xc0, 0x79, 0x16, 0x10, 0xc0,
+ 0x79, 0x34, 0x0d, 0xc0, 0x79, 0x58, 0x42, 0x28, 0x5b, 0xc0, 0x79, 0x76,
+ 0x42, 0x01, 0x99, 0xc0, 0x79, 0x82, 0x42, 0x36, 0xa2, 0xc0, 0x79, 0x9a,
+ 0x42, 0x2f, 0xf9, 0xc0, 0x79, 0xae, 0x42, 0x14, 0x7d, 0xc0, 0x79, 0xbe,
+ 0x19, 0xc0, 0x79, 0xd0, 0x1b, 0xc0, 0x79, 0xe8, 0x0f, 0xc0, 0x79, 0xfa,
+ 0x16, 0xc0, 0x7a, 0x18, 0x15, 0x40, 0x7a, 0x36, 0xd7, 0x27, 0x01, 0x01,
+ 0x15, 0xc9, 0x84, 0x0f, 0x99, 0xf8, 0x0e, 0xc0, 0x7a, 0x54, 0x12, 0xc0,
+ 0x7a, 0x60, 0xcc, 0x8a, 0x99, 0x00, 0x2f, 0x79, 0x45, 0x01, 0xc3, 0xc0,
+ 0x7a, 0x6c, 0x47, 0x26, 0x6b, 0x40, 0x7a, 0x7e, 0x16, 0xc0, 0x7a, 0xc8,
+ 0x06, 0xc0, 0x7a, 0xd4, 0xce, 0x6f, 0x00, 0x02, 0x6e, 0x19, 0x19, 0xc0,
+ 0x7a, 0xe8, 0x42, 0x00, 0x99, 0xc0, 0x7a, 0xf4, 0xd0, 0x5a, 0xb2, 0x02,
+ 0x6e, 0x39, 0x15, 0xc0, 0x7a, 0xfe, 0x12, 0xc0, 0x7b, 0x10, 0x08, 0xc0,
+ 0x7b, 0x22, 0x09, 0xc0, 0x7b, 0x2e, 0x42, 0x00, 0xa2, 0xc0, 0x7b, 0x38,
+ 0xca, 0xa3, 0xa0, 0x02, 0x6e, 0x79, 0x03, 0xc0, 0x7b, 0x44, 0x04, 0xc0,
+ 0x7b, 0x56, 0x42, 0x01, 0x19, 0xc0, 0x7b, 0x68, 0x42, 0x00, 0x74, 0xc0,
+ 0x7b, 0x72, 0x11, 0xc0, 0x7b, 0x82, 0xca, 0xa5, 0x6c, 0x02, 0x6f, 0xd8,
+ 0x48, 0x01, 0x6b, 0xc0, 0x7b, 0x8e, 0xc2, 0x00, 0x40, 0x0f, 0xa0, 0x72,
+ 0x00, 0x7b, 0xb4, 0x00, 0xc0, 0x7b, 0xb8, 0xc2, 0x05, 0x03, 0x0f, 0x9f,
+ 0x40, 0xc6, 0xc6, 0xf0, 0x01, 0x18, 0xdb, 0x00, 0x7b, 0xd0, 0xc2, 0x00,
+ 0x40, 0x01, 0x18, 0x12, 0x00, 0x7b, 0xd6, 0xd9, 0x1f, 0x7c, 0x0f, 0xb3,
+ 0x43, 0x00, 0x7b, 0xda, 0x87, 0x0f, 0xab, 0x98, 0xc4, 0x49, 0x2a, 0x0f,
+ 0x9b, 0x79, 0xc3, 0xb2, 0x36, 0x0f, 0xa0, 0xe8, 0x15, 0xc0, 0x7b, 0xe0,
+ 0xc3, 0x2f, 0x1e, 0x0f, 0xa9, 0x43, 0x00, 0x7b, 0xea, 0xc6, 0xcb, 0x15,
+ 0x0f, 0x9a, 0xa0, 0x06, 0xc0, 0x7b, 0xf0, 0x4d, 0x7f, 0xf5, 0xc0, 0x7c,
+ 0x02, 0x45, 0xdb, 0x2d, 0xc0, 0x7c, 0x20, 0x09, 0x40, 0x7c, 0x32, 0xc6,
+ 0x40, 0x87, 0x01, 0x00, 0x51, 0xc3, 0x23, 0x08, 0x0f, 0xa4, 0x38, 0x44,
+ 0xc7, 0xf4, 0xc0, 0x7c, 0x3e, 0xcb, 0x96, 0xb6, 0x0f, 0xa1, 0x18, 0x4c,
+ 0x1c, 0x86, 0xc0, 0x7c, 0x4a, 0x44, 0x00, 0x49, 0xc0, 0x7c, 0x56, 0x45,
+ 0x00, 0x2c, 0xc0, 0x7c, 0x62, 0x48, 0xb5, 0x4a, 0xc0, 0x7c, 0x6e, 0x47,
+ 0xc3, 0x3e, 0xc0, 0x7c, 0x78, 0xd4, 0x3b, 0x24, 0x07, 0xff, 0x41, 0xcd,
+ 0x1b, 0x41, 0x07, 0xff, 0x51, 0xcf, 0x14, 0x22, 0x07, 0xff, 0x61, 0xcc,
+ 0x0d, 0xae, 0x07, 0xff, 0x69, 0xcc, 0x0d, 0x9e, 0x07, 0xff, 0x70, 0x02,
+ 0xc0, 0x7c, 0x84, 0x00, 0x40, 0x7c, 0x93, 0x47, 0x02, 0x0e, 0xc0, 0x7c,
+ 0x9f, 0xce, 0x1c, 0x92, 0x01, 0x84, 0xe9, 0xd5, 0x34, 0xb8, 0x01, 0x84,
+ 0xf1, 0xcc, 0x80, 0xe5, 0x01, 0x84, 0xf8, 0xc3, 0x06, 0x19, 0x01, 0x00,
+ 0x83, 0x00, 0x7c, 0xf7, 0xc9, 0xab, 0x49, 0x01, 0x70, 0x90, 0x42, 0x00,
+ 0x29, 0xc0, 0x7d, 0x07, 0x47, 0xc7, 0x04, 0x40, 0x7d, 0x13, 0x46, 0x0b,
+ 0x11, 0xc0, 0x7d, 0x25, 0xc7, 0x00, 0x91, 0x0f, 0xa9, 0x19, 0xc7, 0xc1,
+ 0x93, 0x0f, 0xa9, 0x10, 0x14, 0xc0, 0x7d, 0x37, 0xc4, 0x1e, 0x43, 0x01,
+ 0x11, 0x5a, 0x00, 0x7d, 0x56, 0xcd, 0x77, 0xef, 0x01, 0x1c, 0x01, 0x4d,
+ 0x7a, 0xe1, 0x40, 0x7d, 0x5a, 0xc5, 0x65, 0x44, 0x01, 0x10, 0xf3, 0x00,
+ 0x7d, 0x66, 0x49, 0x53, 0x89, 0x40, 0x7d, 0x6c, 0x42, 0x01, 0x19, 0xc0,
+ 0x7d, 0x76, 0x42, 0x00, 0x7a, 0x40, 0x7d, 0x82, 0x0b, 0xc0, 0x7d, 0x8e,
+ 0xc2, 0x01, 0x0b, 0x00, 0x04, 0x22, 0x00, 0x7d, 0x9a, 0xd3, 0x46, 0x0b,
+ 0x01, 0x03, 0x61, 0xd2, 0x4d, 0x69, 0x01, 0x03, 0x50, 0xcd, 0x76, 0xeb,
+ 0x0f, 0xd5, 0x51, 0x44, 0x05, 0x89, 0x40, 0x7d, 0xa0, 0x16, 0xc0, 0x7d,
+ 0xaf, 0x42, 0x00, 0x06, 0xc0, 0x7d, 0xbb, 0xc5, 0x40, 0x88, 0x01, 0x80,
+ 0x01, 0x05, 0xc0, 0x7d, 0xc7, 0xc9, 0x11, 0xf6, 0x01, 0x80, 0x11, 0xce,
+ 0x1c, 0x92, 0x01, 0x80, 0x29, 0xcb, 0x97, 0x87, 0x01, 0x80, 0x39, 0xcf,
+ 0x66, 0xa2, 0x01, 0x81, 0x51, 0xd0, 0x5a, 0x32, 0x01, 0x81, 0x59, 0xd2,
+ 0x49, 0x0d, 0x01, 0x81, 0x69, 0xd3, 0x3f, 0xcf, 0x01, 0x81, 0xf1, 0xcf,
+ 0x64, 0x59, 0x01, 0x81, 0xf9, 0x4b, 0x55, 0xe0, 0x40, 0x7d, 0xd3, 0xc4,
+ 0x59, 0x33, 0x0f, 0x9b, 0x41, 0xc3, 0xb3, 0x72, 0x0f, 0xce, 0x50, 0xda,
+ 0x1a, 0x16, 0x01, 0x12, 0x98, 0x4e, 0x70, 0x18, 0x40, 0x7e, 0x09, 0x8f,
+ 0x0f, 0xd5, 0x89, 0x42, 0x00, 0xa9, 0xc0, 0x7e, 0x1b, 0xc6, 0xd0, 0x1f,
+ 0x0f, 0xaf, 0xd1, 0xc9, 0xaa, 0x29, 0x0f, 0xb0, 0xf8, 0xc2, 0x00, 0xd1,
+ 0x0f, 0xa3, 0x4b, 0x00, 0x7e, 0x27, 0xca, 0xa2, 0x38, 0x0f, 0xb5, 0xd0,
+ 0x00, 0xc0, 0x7e, 0x33, 0xdb, 0x14, 0xd9, 0x01, 0x3d, 0x98, 0xcc, 0x8c,
+ 0x25, 0x01, 0x33, 0xf9, 0xca, 0x9d, 0x4c, 0x01, 0x31, 0xc0, 0x46, 0x1a,
+ 0x37, 0xc0, 0x7e, 0x85, 0x46, 0x06, 0x1d, 0xc0, 0x7e, 0x91, 0x4a, 0x03,
+ 0xc8, 0xc0, 0x7e, 0x9d, 0x4b, 0x03, 0x87, 0xc0, 0x7e, 0xbb, 0x4a, 0x01,
+ 0x88, 0xc0, 0x7e, 0xd9, 0x48, 0x09, 0x0d, 0x40, 0x7e, 0xf7, 0x06, 0xc0,
+ 0x7f, 0x15, 0xc7, 0xc2, 0xff, 0x0f, 0x9b, 0xb9, 0xc9, 0xa1, 0x3f, 0x0f,
+ 0xb0, 0x48, 0x42, 0x00, 0x29, 0xc0, 0x7f, 0x1f, 0xc2, 0x11, 0xee, 0x01,
+ 0x18, 0xd0, 0x44, 0xcc, 0x6b, 0xc0, 0x7f, 0x29, 0x44, 0x00, 0x74, 0x40,
+ 0x7f, 0x41, 0x49, 0xb0, 0xfb, 0xc0, 0x7f, 0x4d, 0xc9, 0xae, 0xcd, 0x01,
+ 0x35, 0x00, 0x42, 0x00, 0x36, 0xc0, 0x7f, 0x6b, 0x44, 0x00, 0x74, 0xc0,
+ 0x7f, 0x7b, 0x42, 0x00, 0x5d, 0x40, 0x7f, 0x8d, 0xd3, 0x3f, 0x96, 0x0f,
+ 0x98, 0xa1, 0xd4, 0x39, 0x08, 0x0f, 0x98, 0x90, 0xda, 0x14, 0xa3, 0x01,
+ 0x3d, 0xe1, 0xc4, 0x03, 0x30, 0x0f, 0xa4, 0x90, 0xda, 0x1b, 0x9c, 0x01,
+ 0x08, 0xc1, 0xca, 0x9b, 0x08, 0x0f, 0x9e, 0x58, 0xc4, 0x00, 0x87, 0x0f,
+ 0xb1, 0x49, 0xc8, 0x1d, 0x3c, 0x0f, 0xb2, 0x00, 0xcb, 0x98, 0xc6, 0x01,
+ 0x12, 0x01, 0xc3, 0x1e, 0xcf, 0x0f, 0xa9, 0x39, 0xc6, 0xcf, 0xf5, 0x0f,
+ 0xc9, 0xe0, 0x44, 0x00, 0x74, 0x40, 0x7f, 0x99, 0xc5, 0xda, 0xd8, 0x0f,
+ 0xcd, 0x49, 0x16, 0xc0, 0x7f, 0xab, 0xc9, 0xb1, 0x82, 0x01, 0x37, 0x98,
+ 0xc9, 0x1c, 0xaa, 0x01, 0x3b, 0x31, 0xc3, 0x00, 0x28, 0x01, 0x34, 0xc3,
+ 0x00, 0x7f, 0xbd, 0xc8, 0x31, 0xd1, 0x0f, 0xa5, 0xf0, 0xc9, 0xb1, 0x5e,
+ 0x01, 0x34, 0xe1, 0xca, 0x9b, 0x4e, 0x0f, 0xa5, 0x50, 0x14, 0xc0, 0x7f,
+ 0xc3, 0xc5, 0x03, 0x0a, 0x01, 0x37, 0x90, 0xc3, 0x4c, 0xa1, 0x01, 0x15,
+ 0x49, 0xc4, 0x63, 0xf2, 0x01, 0x10, 0x01, 0x0d, 0xc0, 0x7f, 0xd3, 0xc6,
+ 0xb7, 0xfc, 0x00, 0x00, 0x61, 0xcb, 0x90, 0xd3, 0x0f, 0xcb, 0x00, 0xc6,
+ 0xb9, 0xbc, 0x0f, 0xa3, 0x18, 0xc2, 0x2e, 0x0e, 0x0f, 0x98, 0x08, 0x42,
+ 0x00, 0x5d, 0xc0, 0x7f, 0xe8, 0xcb, 0x8e, 0xad, 0x01, 0x09, 0xd9, 0xc4,
+ 0x89, 0x7c, 0x0f, 0x9f, 0x68, 0xc7, 0x43, 0xb7, 0x0f, 0xa7, 0x01, 0xc4,
+ 0xd7, 0xa5, 0x0f, 0xad, 0xb8, 0x0e, 0xc0, 0x80, 0x0a, 0xc4, 0xe2, 0x0b,
+ 0x0f, 0xce, 0x30, 0xca, 0x90, 0x19, 0x0f, 0xcb, 0xb1, 0x46, 0xce, 0x0f,
+ 0x40, 0x80, 0x16, 0x43, 0x01, 0xe9, 0xc0, 0x80, 0x22, 0xc2, 0x01, 0x48,
+ 0x01, 0x19, 0x13, 0x00, 0x80, 0x2e, 0xc6, 0x21, 0xfd, 0x0f, 0xa1, 0xc0,
+ 0x46, 0x12, 0x41, 0xc0, 0x80, 0x34, 0x48, 0xa3, 0xc6, 0x40, 0x80, 0x40,
+ 0x00, 0xc0, 0x80, 0x52, 0x46, 0x48, 0x65, 0x40, 0x80, 0x6a, 0xc8, 0xba,
+ 0x52, 0x01, 0x35, 0x89, 0xd1, 0x57, 0x50, 0x01, 0x03, 0x08, 0x9b, 0x01,
+ 0x37, 0xa1, 0xc8, 0xb6, 0xd2, 0x0f, 0x9d, 0x08, 0xc8, 0x1b, 0xc8, 0x01,
+ 0x32, 0x01, 0xd7, 0x26, 0x77, 0x00, 0x05, 0x50, 0xc9, 0xa8, 0xa6, 0x0f,
+ 0xb1, 0x41, 0xc4, 0x14, 0xdd, 0x0f, 0xd5, 0xb0, 0x43, 0x14, 0xcf, 0xc0,
+ 0x80, 0xca, 0x87, 0x0f, 0xa9, 0x2a, 0x00, 0x80, 0xdf, 0x8a, 0x0f, 0xa0,
+ 0xfb, 0x00, 0x80, 0xf1, 0xcd, 0x7f, 0x9a, 0x0f, 0xa2, 0x50, 0xcb, 0x05,
+ 0x1c, 0x01, 0x02, 0xc9, 0xc4, 0x01, 0xc3, 0x01, 0x71, 0x68, 0xc4, 0x0e,
+ 0x9a, 0x01, 0x00, 0x91, 0xc5, 0x40, 0x88, 0x01, 0x00, 0x38, 0x42, 0x00,
+ 0x5d, 0xc0, 0x81, 0x03, 0x42, 0x00, 0x47, 0x40, 0x81, 0x15, 0xc5, 0x15,
+ 0x2d, 0x0f, 0xd5, 0x48, 0x46, 0x56, 0x32, 0xc0, 0x81, 0x21, 0xc6, 0x44,
+ 0xfb, 0x01, 0x05, 0x29, 0xc6, 0xd0, 0x67, 0x0f, 0x98, 0x60, 0x47, 0x02,
+ 0x0e, 0xc0, 0x81, 0x2d, 0x45, 0x2b, 0x5f, 0xc0, 0x81, 0x87, 0x4b, 0x6f,
+ 0xc7, 0xc0, 0x81, 0x9f, 0x45, 0x00, 0xba, 0x40, 0x81, 0xe6, 0x00, 0xc0,
+ 0x81, 0xf8, 0x11, 0x40, 0x82, 0x04, 0xd8, 0x22, 0x73, 0x01, 0x17, 0x79,
+ 0x44, 0x04, 0xce, 0x40, 0x82, 0x1c, 0x42, 0x11, 0xa5, 0xc0, 0x82, 0x28,
+ 0x0b, 0xc0, 0x82, 0x32, 0x9b, 0x01, 0x4f, 0xf8, 0xc3, 0x03, 0x2a, 0x0f,
+ 0xcd, 0xf1, 0xc3, 0x36, 0x44, 0x0f, 0xcd, 0xf8, 0x0b, 0xc0, 0x82, 0x44,
+ 0x49, 0xb2, 0xb4, 0x40, 0x82, 0x50, 0x91, 0x0f, 0xb4, 0x39, 0x45, 0x05,
+ 0x88, 0x40, 0x82, 0x70, 0x4b, 0x94, 0xb1, 0xc0, 0x82, 0x8c, 0xd7, 0x28,
+ 0x15, 0x0f, 0xaa, 0x71, 0xc8, 0x2f, 0x03, 0x0f, 0xb5, 0xc8, 0xc4, 0x5d,
+ 0x24, 0x01, 0x31, 0xf9, 0x46, 0xcc, 0xef, 0xc0, 0x82, 0x9e, 0xc6, 0x18,
+ 0x8e, 0x0f, 0xce, 0xe0, 0x46, 0xd3, 0x7f, 0xc0, 0x82, 0xaa, 0xc9, 0xab,
+ 0x2e, 0x0f, 0x9a, 0xb0, 0x46, 0x09, 0x97, 0xc0, 0x82, 0xbf, 0x03, 0xc0,
+ 0x82, 0xe3, 0x18, 0xc0, 0x82, 0xf5, 0x0e, 0xc0, 0x83, 0x01, 0xd4, 0x3d,
+ 0x04, 0x05, 0x57, 0xa1, 0xd8, 0x24, 0x53, 0x05, 0x57, 0x99, 0x46, 0xcc,
+ 0x3b, 0x40, 0x83, 0x0d, 0xc2, 0x00, 0x45, 0x0f, 0x9a, 0x41, 0xc9, 0x85,
+ 0xc8, 0x0f, 0xd7, 0x00, 0x42, 0x00, 0xbf, 0xc0, 0x83, 0x19, 0xcd, 0x73,
+ 0xd1, 0x0f, 0xc9, 0xb0, 0x42, 0x00, 0x84, 0xc0, 0x83, 0x29, 0xc2, 0x00,
+ 0x8e, 0x0f, 0xa2, 0x21, 0xc2, 0x00, 0x40, 0x0f, 0xa0, 0x0a, 0x00, 0x83,
+ 0x38, 0x11, 0xc0, 0x83, 0x3c, 0x47, 0xbf, 0xfd, 0xc0, 0x83, 0x4e, 0x42,
+ 0x17, 0x28, 0xc0, 0x83, 0x9d, 0xc3, 0x19, 0x2a, 0x0f, 0xa0, 0x92, 0x00,
+ 0x83, 0xa7, 0x0b, 0xc0, 0x83, 0xad, 0x07, 0xc0, 0x83, 0xb7, 0xcb, 0x8c,
+ 0xc9, 0x01, 0x50, 0x50, 0xc8, 0xbb, 0x6a, 0x0f, 0xaf, 0x81, 0x42, 0x00,
+ 0xbd, 0x40, 0x83, 0xc3, 0x87, 0x0f, 0xaa, 0x61, 0xc3, 0x57, 0xb3, 0x0f,
+ 0xcc, 0xf8, 0x00, 0x40, 0x83, 0xcf, 0x4a, 0x4c, 0x94, 0xc0, 0x83, 0xdb,
+ 0xc7, 0xc3, 0x06, 0x0f, 0xce, 0x48, 0xc4, 0x26, 0x78, 0x0e, 0x97, 0x4b,
+ 0x00, 0x84, 0x07, 0x07, 0xc0, 0x84, 0x0d, 0x15, 0xc0, 0x84, 0x1c, 0x08,
+ 0xc0, 0x84, 0x2e, 0x16, 0xc0, 0x84, 0x3b, 0xc3, 0x05, 0x14, 0x0e, 0x97,
+ 0x09, 0xc4, 0x15, 0xe7, 0x0e, 0x97, 0x00, 0xce, 0x6f, 0x0e, 0x08, 0xf7,
+ 0xc1, 0xca, 0xa1, 0x16, 0x08, 0xf7, 0xb9, 0x4b, 0x6f, 0xc7, 0xc0, 0x84,
+ 0x49, 0xc5, 0xcd, 0xfd, 0x08, 0xf7, 0x91, 0x47, 0x02, 0x0e, 0x40, 0x84,
+ 0x59, 0x4b, 0x99, 0x13, 0xc0, 0x84, 0xb5, 0xcd, 0x7a, 0xee, 0x0f, 0x8d,
+ 0x69, 0xd8, 0x21, 0xe3, 0x00, 0x05, 0xd1, 0xc6, 0xc3, 0xd9, 0x01, 0x81,
+ 0xe0, 0x45, 0x45, 0x76, 0xc0, 0x84, 0xcf, 0xcc, 0x88, 0x41, 0x01, 0x35,
+ 0x69, 0xd1, 0x55, 0x63, 0x0f, 0xca, 0x58, 0xca, 0x9f, 0x36, 0x01, 0x39,
+ 0x01, 0x42, 0x00, 0x5d, 0xc0, 0x84, 0xeb, 0x47, 0xb3, 0xd6, 0x40, 0x84,
+ 0xfd, 0xd6, 0x2f, 0x04, 0x01, 0x37, 0x79, 0xc7, 0xc1, 0x5b, 0x0f, 0x9a,
+ 0x08, 0xc7, 0x61, 0xfa, 0x01, 0x05, 0xe1, 0x48, 0xbe, 0xba, 0xc0, 0x85,
+ 0x25, 0x00, 0xc0, 0x85, 0x43, 0xce, 0x6e, 0xac, 0x0f, 0xab, 0x81, 0x45,
+ 0xd9, 0x7a, 0xc0, 0x85, 0x5b, 0xc2, 0x0f, 0x7b, 0x0f, 0xcb, 0x69, 0xce,
+ 0x6f, 0xee, 0x0f, 0xcd, 0xe9, 0xc6, 0xcc, 0xad, 0x0f, 0xa2, 0xf0, 0x46,
+ 0xca, 0xdf, 0xc0, 0x85, 0x79, 0x4a, 0x9d, 0xd8, 0x40, 0x85, 0x87, 0x87,
+ 0x0f, 0xce, 0xc9, 0xc3, 0x2b, 0x00, 0x0f, 0xcf, 0x91, 0xc7, 0xc9, 0x34,
+ 0x0f, 0xd4, 0x20, 0x42, 0x00, 0x63, 0xc0, 0x85, 0xcd, 0xc5, 0xd8, 0x5d,
+ 0x0f, 0x9a, 0x20, 0x0b, 0xc0, 0x85, 0xd7, 0x44, 0x91, 0x02, 0x40, 0x85,
+ 0xec, 0xcc, 0x07, 0xc7, 0x01, 0x13, 0x59, 0xc9, 0x00, 0xca, 0x01, 0x13,
+ 0x50, 0xcb, 0x97, 0xf5, 0x0b, 0x53, 0x79, 0xc4, 0x19, 0x53, 0x0b, 0x53,
+ 0x71, 0x45, 0x09, 0x98, 0x40, 0x85, 0xf8, 0x16, 0xc0, 0x86, 0x1c, 0x14,
+ 0xc0, 0x86, 0x2c, 0x42, 0x00, 0xd0, 0xc0, 0x86, 0x34, 0xc2, 0x00, 0xdb,
+ 0x0b, 0x52, 0xdb, 0x00, 0x86, 0x3c, 0x0d, 0xc0, 0x86, 0x40, 0x87, 0x0b,
+ 0x52, 0xc3, 0x00, 0x86, 0x50, 0xc2, 0x01, 0x4a, 0x0b, 0x52, 0xb9, 0xc3,
+ 0x04, 0x2e, 0x0b, 0x52, 0xa1, 0x91, 0x0b, 0x52, 0x93, 0x00, 0x86, 0x54,
+ 0x12, 0xc0, 0x86, 0x5c, 0x10, 0xc0, 0x86, 0x66, 0x0f, 0xc0, 0x86, 0x72,
+ 0xc3, 0x30, 0x59, 0x0b, 0x52, 0x59, 0xc2, 0x0e, 0x9a, 0x0b, 0x52, 0x2b,
+ 0x00, 0x86, 0x7e, 0x83, 0x0b, 0x52, 0x31, 0xc2, 0x01, 0x5d, 0x0b, 0x52,
+ 0x21, 0xc2, 0x42, 0xcd, 0x0b, 0x52, 0x10, 0x44, 0x00, 0xbb, 0xc0, 0x86,
+ 0x82, 0x46, 0x10, 0x79, 0xc0, 0x86, 0xba, 0x4a, 0x9e, 0x82, 0x40, 0x86,
+ 0xd6, 0x46, 0x02, 0x0f, 0xc0, 0x86, 0xfa, 0x4f, 0x62, 0x88, 0x40, 0x87,
+ 0x64, 0xd4, 0x3d, 0xb8, 0x05, 0x53, 0x81, 0xd2, 0x4d, 0x21, 0x05, 0x4f,
+ 0x30, 0x4f, 0x6a, 0x71, 0xc0, 0x87, 0x76, 0x54, 0x39, 0x80, 0x40, 0x87,
+ 0x9a, 0xc7, 0xc6, 0xb7, 0x00, 0x81, 0x59, 0x03, 0xc0, 0x87, 0xa6, 0x8b,
+ 0x00, 0x81, 0x6b, 0x00, 0x87, 0xb1, 0x97, 0x00, 0x81, 0x7b, 0x00, 0x87,
+ 0xb5, 0x87, 0x00, 0x81, 0x8b, 0x00, 0x87, 0xb9, 0x44, 0xb9, 0x62, 0xc0,
+ 0x87, 0xbf, 0x48, 0xb2, 0x2d, 0xc0, 0x87, 0xc9, 0x15, 0xc0, 0x87, 0xd7,
+ 0x52, 0x28, 0x9f, 0xc0, 0x87, 0xe3, 0xcc, 0x89, 0x19, 0x00, 0x83, 0x89,
+ 0x46, 0xce, 0x5d, 0x40, 0x87, 0xef, 0x0f, 0xc0, 0x87, 0xff, 0xce, 0x6e,
+ 0x4a, 0x00, 0x84, 0x10, 0xc4, 0x15, 0xe7, 0x00, 0x82, 0x01, 0xc3, 0x05,
+ 0x14, 0x00, 0x82, 0x09, 0x16, 0xc0, 0x88, 0x0b, 0x08, 0xc0, 0x88, 0x17,
+ 0x15, 0xc0, 0x88, 0x23, 0xc5, 0x06, 0xdb, 0x00, 0x82, 0x41, 0xc4, 0x26,
+ 0x78, 0x00, 0x82, 0x48, 0xc7, 0xc6, 0xfd, 0x0f, 0xa8, 0xf9, 0xc5, 0x5b,
+ 0x0d, 0x01, 0x19, 0x42, 0x00, 0x88, 0x2f, 0x00, 0xc0, 0x88, 0x35, 0x4a,
+ 0x0d, 0xd8, 0x40, 0x88, 0x53, 0xcb, 0x97, 0x9d, 0x08, 0x85, 0xeb, 0x00,
+ 0x88, 0x6b, 0x4b, 0x6f, 0xc7, 0xc0, 0x88, 0x71, 0x06, 0xc0, 0x88, 0x91,
+ 0x15, 0xc0, 0x88, 0x9d, 0xd0, 0x5d, 0x12, 0x08, 0x85, 0xe1, 0xd1, 0x50,
+ 0xce, 0x08, 0x85, 0xd9, 0x47, 0x02, 0x0e, 0x40, 0x88, 0xa9, 0x45, 0x00,
+ 0xba, 0xc0, 0x89, 0x10, 0x45, 0x2b, 0x5f, 0xc0, 0x89, 0x1c, 0x46, 0x34,
+ 0x6f, 0xc0, 0x89, 0x2b, 0x47, 0x02, 0x0e, 0xc0, 0x89, 0x3d, 0x46, 0x09,
+ 0x97, 0x40, 0x89, 0xa3, 0x45, 0xdb, 0xe6, 0xc0, 0x89, 0xc7, 0x09, 0x40,
+ 0x89, 0xe5, 0x4c, 0x8c, 0x49, 0xc0, 0x89, 0xf1, 0xc6, 0x92, 0x0c, 0x0b,
+ 0x7f, 0x20, 0x46, 0x09, 0x97, 0xc0, 0x89, 0xf9, 0x45, 0x00, 0xba, 0xc0,
+ 0x8a, 0x1d, 0x4b, 0x6f, 0xc7, 0xc0, 0x8a, 0x2f, 0x47, 0x02, 0x0e, 0x40,
+ 0x8a, 0x49, 0x15, 0xc0, 0x8a, 0xb0, 0xd1, 0x50, 0xce, 0x08, 0x91, 0xe9,
+ 0x06, 0xc0, 0x8a, 0xbc, 0xce, 0x73, 0x1a, 0x08, 0x91, 0xd1, 0x4b, 0x6f,
+ 0xc7, 0xc0, 0x8a, 0xc8, 0x47, 0x02, 0x0e, 0x40, 0x8a, 0xdf, 0x15, 0xc0,
+ 0x8b, 0x42, 0x46, 0x09, 0x97, 0xc0, 0x8b, 0x4e, 0xd4, 0x3a, 0xd4, 0x00,
+ 0xbe, 0xd9, 0x46, 0x34, 0x6f, 0xc0, 0x8b, 0x72, 0x52, 0x4c, 0xb5, 0xc0,
+ 0x8b, 0x7e, 0x47, 0x02, 0x0e, 0x40, 0x8b, 0x94, 0x4c, 0x11, 0xe2, 0xc0,
+ 0x8b, 0xde, 0xd1, 0x53, 0xa9, 0x08, 0x52, 0x41, 0x47, 0x34, 0x2f, 0xc0,
+ 0x8b, 0xf6, 0x46, 0x09, 0x97, 0xc0, 0x8c, 0x00, 0x18, 0xc0, 0x8c, 0x10,
+ 0x45, 0x00, 0xba, 0xc0, 0x8c, 0x1c, 0x47, 0x02, 0x0e, 0x40, 0x8c, 0x3a,
+ 0xc5, 0x01, 0xc2, 0x0f, 0xa4, 0x59, 0x44, 0x00, 0x74, 0x40, 0x8c, 0x90,
+ 0x16, 0xc0, 0x8c, 0x9f, 0xc3, 0x7c, 0xb4, 0x01, 0x5e, 0xe0, 0x44, 0x03,
+ 0xda, 0xc0, 0x8c, 0xab, 0xc2, 0x00, 0x29, 0x01, 0x35, 0x90, 0xc6, 0x6b,
+ 0xc0, 0x0f, 0xa7, 0x81, 0x42, 0x01, 0x31, 0xc0, 0x8c, 0xb7, 0x00, 0xc0,
+ 0x8c, 0xef, 0x45, 0x02, 0x6d, 0x40, 0x8d, 0x07, 0x44, 0x0d, 0x14, 0xc0,
+ 0x8d, 0x13, 0x4d, 0x7c, 0x5a, 0x40, 0x8d, 0x2b, 0xc9, 0x2a, 0xec, 0x01,
+ 0x5e, 0x48, 0xc4, 0x9b, 0xb8, 0x01, 0x1c, 0xc1, 0xc4, 0x02, 0x6d, 0x00,
+ 0x04, 0x28, 0x03, 0xc0, 0x8d, 0x31, 0x51, 0x54, 0xca, 0xc0, 0x8d, 0x3d,
+ 0x4e, 0x6f, 0x9a, 0x40, 0x8d, 0x49, 0x48, 0xbd, 0x22, 0x40, 0x8d, 0x55,
+ 0xc2, 0x00, 0xdb, 0x01, 0x10, 0x39, 0x47, 0xc4, 0xb8, 0x40, 0x8d, 0x6d,
+ 0xc7, 0x77, 0xc1, 0x01, 0x05, 0x31, 0xc8, 0xb5, 0x62, 0x0f, 0xa4, 0x28,
+ 0xcc, 0x5f, 0x56, 0x01, 0x03, 0x71, 0xc4, 0xa8, 0x2a, 0x0f, 0x9e, 0xf0,
+ 0x02, 0xc0, 0x8d, 0x7f, 0xc7, 0xc6, 0x5c, 0x01, 0x56, 0xe8, 0x42, 0x00,
+ 0x8e, 0xc0, 0x8d, 0x8b, 0xcf, 0x4c, 0x04, 0x01, 0x15, 0x93, 0x00, 0x8d,
+ 0x95, 0xcd, 0x7e, 0x62, 0x01, 0x05, 0xd8, 0x45, 0x84, 0xa8, 0xc0, 0x8d,
+ 0x9b, 0x00, 0xc0, 0x8d, 0xab, 0x87, 0x0f, 0xae, 0x42, 0x00, 0x8d, 0xe4,
+ 0xd9, 0x1f, 0x63, 0x0f, 0xa8, 0xf1, 0xc5, 0x53, 0xf8, 0x01, 0x36, 0xa3,
+ 0x00, 0x8d, 0xf3, 0x12, 0xc0, 0x8d, 0xf9, 0xcd, 0x80, 0x43, 0x0f, 0xa7,
+ 0xa9, 0x04, 0xc0, 0x8e, 0x05, 0xce, 0x71, 0x92, 0x0f, 0xb5, 0x68, 0xd0,
+ 0x5f, 0x52, 0x01, 0x03, 0x79, 0xc8, 0xb8, 0x42, 0x08, 0x0c, 0x70, 0xcc,
+ 0x8a, 0xa5, 0x0f, 0x0a, 0x71, 0x46, 0x02, 0x0f, 0x40, 0x8e, 0x11, 0xc4,
+ 0x26, 0x78, 0x0f, 0x0a, 0x49, 0xc5, 0x06, 0xdb, 0x0f, 0x0a, 0x41, 0x15,
+ 0xc0, 0x8e, 0x93, 0x08, 0xc0, 0x8e, 0x9f, 0x16, 0xc0, 0x8e, 0xab, 0xc3,
+ 0x05, 0x14, 0x0f, 0x0a, 0x09, 0xc4, 0x15, 0xe7, 0x0f, 0x0a, 0x00, 0xd2,
+ 0x4a, 0xcf, 0x0f, 0x09, 0xe9, 0x44, 0x00, 0xbb, 0x40, 0x8e, 0xb7, 0x86,
+ 0x0f, 0x09, 0xb1, 0x89, 0x0f, 0x09, 0xa9, 0x95, 0x0f, 0x09, 0xa1, 0x98,
+ 0x0f, 0x09, 0x99, 0x8c, 0x0f, 0x09, 0x91, 0x8f, 0x0f, 0x09, 0x89, 0x84,
+ 0x0f, 0x09, 0x80, 0x4c, 0x8b, 0xad, 0xc0, 0x8e, 0xc3, 0xce, 0x1c, 0x92,
+ 0x0b, 0x7f, 0x08, 0x44, 0x00, 0x51, 0xc0, 0x8e, 0xcb, 0xc8, 0xab, 0x80,
+ 0x01, 0x08, 0xb0, 0x4f, 0x6a, 0x80, 0x40, 0x8e, 0xe1, 0xc2, 0x00, 0xbf,
+ 0x01, 0x16, 0x09, 0xc3, 0x02, 0x9b, 0x01, 0x16, 0x00, 0xc8, 0x60, 0x55,
+ 0x01, 0x10, 0x89, 0x46, 0x1f, 0x87, 0x40, 0x8e, 0xed, 0xc8, 0x26, 0x58,
+ 0x01, 0x10, 0x81, 0x47, 0x20, 0x7d, 0x40, 0x8e, 0xf9, 0xca, 0x9e, 0x14,
+ 0x00, 0x3f, 0xf1, 0xc9, 0xb1, 0xb8, 0x00, 0x3f, 0xe9, 0x45, 0x09, 0x98,
+ 0x40, 0x8f, 0x0b, 0xc9, 0xb1, 0x55, 0x00, 0x3f, 0xd1, 0xd2, 0x4a, 0x1b,
+ 0x00, 0x3f, 0xa9, 0x46, 0x02, 0x0f, 0x40, 0x8f, 0x2f, 0xc2, 0x01, 0xc3,
+ 0x00, 0x3f, 0xc1, 0x47, 0x1d, 0xd4, 0x40, 0x8f, 0xaf, 0xca, 0x9f, 0x2c,
+ 0x00, 0x3f, 0xb9, 0xc9, 0xac, 0x8d, 0x00, 0x3f, 0xb0, 0xc7, 0xc0, 0xd6,
+ 0x0f, 0xd3, 0x69, 0xc7, 0xc8, 0x77, 0x0f, 0xd3, 0x39, 0xc8, 0xb9, 0xe2,
+ 0x0f, 0xd3, 0x41, 0xc8, 0xbb, 0x52, 0x0f, 0xd3, 0x49, 0xc5, 0xa0, 0x85,
+ 0x0f, 0xd3, 0x51, 0x05, 0x40, 0x8f, 0xc7, 0xc5, 0xa0, 0x85, 0x0f, 0xd3,
+ 0x19, 0xc7, 0xc8, 0x77, 0x0f, 0xd3, 0x01, 0xc8, 0xb9, 0xe2, 0x0f, 0xd3,
+ 0x09, 0xc8, 0xbb, 0x52, 0x0f, 0xd3, 0x11, 0x05, 0xc0, 0x8f, 0xd3, 0xc7,
+ 0xc0, 0xd6, 0x0f, 0xd3, 0x30, 0x4a, 0xa3, 0x46, 0xc0, 0x8f, 0xdf, 0x5a,
+ 0x1a, 0x98, 0x40, 0x8f, 0xf7, 0xcc, 0x88, 0x4d, 0x01, 0x1c, 0x19, 0x43,
+ 0x18, 0x14, 0x40, 0x90, 0x0d, 0xc4, 0x0e, 0x9a, 0x01, 0x00, 0xa1, 0xc5,
+ 0x40, 0x88, 0x01, 0x00, 0x19, 0xc4, 0x02, 0xb9, 0x01, 0x00, 0x08, 0xc2,
+ 0x00, 0x8e, 0x01, 0x32, 0x0b, 0x00, 0x90, 0x29, 0x00, 0x40, 0x90, 0x2f,
+ 0x07, 0xc0, 0x90, 0x3b, 0x04, 0xc0, 0x90, 0x45, 0x11, 0xc0, 0x90, 0x51,
+ 0x0b, 0xc0, 0x90, 0x5b, 0x0a, 0xc0, 0x90, 0x65, 0x18, 0xc0, 0x90, 0x71,
+ 0x03, 0xc0, 0x90, 0x7b, 0x42, 0x00, 0x1c, 0xc0, 0x90, 0x85, 0x43, 0xe5,
+ 0xc6, 0xc0, 0x90, 0x8d, 0x43, 0xe6, 0x4d, 0xc0, 0x90, 0xb0, 0x42, 0xe6,
+ 0xa1, 0xc0, 0x90, 0xd9, 0x42, 0xdd, 0x2f, 0xc0, 0x90, 0xe5, 0x42, 0xde,
+ 0x65, 0xc0, 0x90, 0xf9, 0x42, 0xe4, 0xce, 0xc0, 0x91, 0x09, 0x42, 0xe6,
+ 0x99, 0xc0, 0x91, 0x1d, 0x43, 0xe5, 0x5a, 0xc0, 0x91, 0x29, 0x42, 0xc6,
+ 0x1c, 0xc0, 0x91, 0x45, 0x10, 0xc0, 0x91, 0x4d, 0x42, 0xe4, 0xb6, 0xc0,
+ 0x91, 0x5d, 0x43, 0xe5, 0xd8, 0xc0, 0x91, 0x71, 0x43, 0xe5, 0xf3, 0xc0,
+ 0x91, 0x97, 0x42, 0xd1, 0x32, 0xc0, 0x91, 0xb7, 0x42, 0xe5, 0x0a, 0xc0,
+ 0x91, 0xcf, 0x42, 0xe6, 0x9d, 0xc0, 0x91, 0xe7, 0x42, 0xe6, 0x9b, 0x40,
+ 0x92, 0x03, 0x14, 0xc0, 0x92, 0x0f, 0x59, 0x10, 0x15, 0x40, 0x92, 0x1b,
+ 0xc3, 0x00, 0xcb, 0x01, 0x11, 0xc9, 0x49, 0x0f, 0x0c, 0x40, 0x92, 0x3f,
+ 0x48, 0x14, 0x8a, 0xc0, 0x92, 0x4b, 0x07, 0x40, 0x92, 0x9f, 0x0f, 0xc0,
+ 0x92, 0xab, 0xc3, 0x0d, 0xe5, 0x00, 0x9b, 0x28, 0xcc, 0x88, 0xad, 0x00,
+ 0x9b, 0x31, 0xd2, 0x45, 0xf9, 0x00, 0x9b, 0x40, 0xc3, 0x05, 0x14, 0x00,
+ 0x9b, 0x49, 0x16, 0xc0, 0x92, 0xb7, 0x08, 0xc0, 0x92, 0xc3, 0x15, 0xc0,
+ 0x92, 0xcf, 0xc5, 0x06, 0xdb, 0x00, 0x9b, 0x81, 0xc4, 0x26, 0x78, 0x00,
+ 0x9b, 0x88, 0x16, 0xc0, 0x92, 0xdb, 0x08, 0xc0, 0x92, 0xf0, 0x15, 0xc0,
+ 0x92, 0xfc, 0xc6, 0xcf, 0x9b, 0x00, 0x9b, 0xc9, 0xc6, 0x2a, 0xfe, 0x00,
+ 0x9b, 0xd1, 0xc7, 0x0d, 0x04, 0x00, 0x9b, 0xd8, 0xc5, 0xdc, 0x7c, 0x00,
+ 0x9c, 0x81, 0x06, 0xc0, 0x93, 0x08, 0xc6, 0x80, 0xbb, 0x00, 0x9c, 0x91,
+ 0xcc, 0x80, 0xb5, 0x00, 0x9c, 0x99, 0x0d, 0xc0, 0x93, 0x17, 0xc6, 0xcc,
+ 0x65, 0x00, 0x9c, 0xb1, 0xc5, 0xce, 0xca, 0x00, 0x9c, 0xb8, 0xc7, 0x81,
+ 0x9e, 0x01, 0x10, 0x43, 0x00, 0x93, 0x23, 0x45, 0xda, 0x29, 0xc0, 0x93,
+ 0x27, 0xc5, 0xbb, 0x55, 0x0f, 0xa0, 0xc1, 0xc5, 0xd8, 0x67, 0x0f, 0xb6,
+ 0xb8, 0xd2, 0x4a, 0xab, 0x08, 0x7f, 0xb1, 0x46, 0x02, 0x0f, 0x40, 0x93,
+ 0x31, 0x83, 0x08, 0x28, 0x01, 0xc2, 0x00, 0x51, 0x08, 0x28, 0x09, 0x05,
+ 0xc0, 0x93, 0x94, 0x06, 0xc0, 0x93, 0x9e, 0x10, 0xc0, 0x93, 0xa8, 0x87,
+ 0x08, 0x28, 0x43, 0x00, 0x93, 0xbc, 0xc2, 0x14, 0xda, 0x08, 0x28, 0x49,
+ 0x09, 0xc0, 0x93, 0xc0, 0xc2, 0x01, 0x7f, 0x08, 0x28, 0x61, 0x8b, 0x08,
+ 0x28, 0x69, 0xc2, 0x1c, 0x52, 0x08, 0x28, 0x71, 0x0d, 0xc0, 0x93, 0xce,
+ 0x0e, 0xc0, 0x93, 0xd8, 0xc2, 0x00, 0x4e, 0x08, 0x28, 0x91, 0x91, 0x08,
+ 0x28, 0xb1, 0xc2, 0x00, 0x67, 0x08, 0x28, 0xb9, 0xc2, 0x99, 0xe7, 0x08,
+ 0x28, 0xc1, 0x14, 0xc0, 0x93, 0xe2, 0x15, 0xc0, 0x93, 0xec, 0x16, 0xc0,
+ 0x93, 0xf6, 0x97, 0x08, 0x28, 0xf9, 0xc2, 0x00, 0x5f, 0x08, 0x29, 0x01,
+ 0xc2, 0x24, 0xe2, 0x08, 0x29, 0x09, 0x9b, 0x08, 0x29, 0x11, 0x1c, 0x40,
+ 0x94, 0x00, 0x42, 0x00, 0xac, 0xc0, 0x94, 0x0a, 0x12, 0xc0, 0x94, 0x10,
+ 0xcf, 0x15, 0x36, 0x01, 0x39, 0x98, 0x46, 0x00, 0x8b, 0x40, 0x94, 0x1c,
+ 0x43, 0x00, 0x55, 0xc0, 0x94, 0x28, 0xda, 0x1c, 0xee, 0x0f, 0xa8, 0xd0,
+ 0xc4, 0x0e, 0x9a, 0x01, 0x00, 0x99, 0xc5, 0x40, 0x88, 0x01, 0x00, 0x11,
+ 0xc4, 0x02, 0xb9, 0x01, 0x00, 0x00, 0xc4, 0x00, 0x49, 0x01, 0x19, 0x59,
+ 0xc5, 0x00, 0x2c, 0x01, 0x19, 0x30, 0x46, 0x04, 0x8f, 0xc0, 0x94, 0x4a,
+ 0x46, 0x01, 0xfc, 0x40, 0x94, 0x5c, 0xc3, 0x05, 0x14, 0x01, 0x5f, 0x81,
+ 0xc3, 0x02, 0x9f, 0x01, 0x5f, 0x88, 0x00, 0xc0, 0x94, 0x6e, 0x42, 0x00,
+ 0x97, 0x40, 0x94, 0x7a, 0xca, 0x9f, 0x9a, 0x01, 0x12, 0xd1, 0x47, 0x37,
+ 0x4f, 0x40, 0x94, 0x8f, 0x95, 0x01, 0x12, 0xc9, 0xc8, 0x19, 0x58, 0x01,
+ 0x09, 0x70, 0xc5, 0x00, 0xb9, 0x01, 0x05, 0x61, 0xce, 0x72, 0x48, 0x01,
+ 0x05, 0x01, 0x45, 0xd3, 0xe9, 0x40, 0x94, 0x9b, 0xc6, 0xcd, 0x91, 0x0f,
+ 0xcd, 0x71, 0xc3, 0x0e, 0x6b, 0x0f, 0x9d, 0xc0, 0x46, 0x09, 0x97, 0xc0,
+ 0x94, 0xa7, 0xc2, 0x00, 0x7a, 0x08, 0xec, 0xc1, 0x18, 0xc0, 0x94, 0xcb,
+ 0x45, 0x00, 0xba, 0xc0, 0x94, 0xd7, 0x47, 0x02, 0x0e, 0x40, 0x94, 0xe3,
+ 0xc8, 0x91, 0x02, 0x01, 0x05, 0x91, 0xc5, 0xda, 0xb0, 0x0f, 0xa4, 0x10,
+ 0x45, 0x00, 0xba, 0xc0, 0x95, 0x50, 0x47, 0x02, 0x0e, 0xc0, 0x95, 0x74,
+ 0x4b, 0x6f, 0xc7, 0xc0, 0x95, 0xe9, 0x46, 0x09, 0x97, 0xc0, 0x96, 0x07,
+ 0xc5, 0xd6, 0xb9, 0x00, 0x53, 0x81, 0x03, 0xc0, 0x96, 0x2b, 0xc3, 0x02,
+ 0x30, 0x00, 0x53, 0x91, 0xc3, 0x06, 0x63, 0x00, 0x53, 0x99, 0xc8, 0xbb,
+ 0x5a, 0x00, 0x53, 0xa0, 0x45, 0x00, 0xba, 0xc0, 0x96, 0x37, 0x47, 0x02,
+ 0x0e, 0xc0, 0x96, 0x59, 0x46, 0x34, 0x6f, 0xc0, 0x96, 0xc4, 0xc2, 0x00,
+ 0x7a, 0x00, 0x56, 0x81, 0x46, 0x09, 0x97, 0xc0, 0x96, 0xd0, 0xd1, 0x50,
+ 0xce, 0x00, 0x57, 0x81, 0xca, 0x76, 0x52, 0x00, 0x57, 0x88, 0x96, 0x0f,
+ 0xa0, 0x81, 0xc5, 0xde, 0x61, 0x0f, 0xca, 0x28, 0xc4, 0xe3, 0x4b, 0x08,
+ 0x19, 0x99, 0x03, 0xc0, 0x96, 0xf4, 0xc8, 0xbd, 0x52, 0x08, 0x19, 0xa9,
+ 0x0b, 0xc0, 0x97, 0x00, 0x0a, 0xc0, 0x97, 0x0c, 0x16, 0xc0, 0x97, 0x18,
+ 0xc3, 0x71, 0x13, 0x08, 0x19, 0xc9, 0xc5, 0xdd, 0xd5, 0x08, 0x19, 0xd1,
+ 0xc5, 0xdd, 0x5d, 0x08, 0x19, 0xd9, 0xc5, 0x84, 0xe1, 0x08, 0x19, 0xe1,
+ 0x10, 0xc0, 0x97, 0x24, 0xc3, 0xad, 0x41, 0x08, 0x19, 0xf1, 0xc4, 0xde,
+ 0xd3, 0x08, 0x19, 0xf9, 0xc8, 0xbf, 0x7a, 0x08, 0x1a, 0x01, 0xc5, 0xd5,
+ 0xa1, 0x08, 0x1a, 0x11, 0xc5, 0xda, 0x1f, 0x08, 0x1a, 0x19, 0xc5, 0xd5,
+ 0x6f, 0x08, 0x1a, 0x29, 0xc5, 0xdd, 0x85, 0x08, 0x1a, 0x31, 0xc5, 0xd4,
+ 0x6b, 0x08, 0x1a, 0x49, 0xc7, 0xc1, 0xa1, 0x08, 0x19, 0x89, 0xc4, 0xe0,
+ 0x53, 0x08, 0x19, 0x90, 0x07, 0xc0, 0x97, 0x30, 0x4a, 0x07, 0xca, 0x40,
+ 0x97, 0x3c, 0x45, 0xda, 0x51, 0xc0, 0x97, 0x63, 0xcb, 0x8e, 0x29, 0x0f,
+ 0x9c, 0x99, 0xc3, 0x5f, 0x5f, 0x0f, 0x9a, 0x39, 0xc9, 0x1f, 0x0f, 0x00,
+ 0x03, 0x00, 0x46, 0x96, 0x81, 0xc0, 0x97, 0x81, 0xcb, 0x8d, 0x63, 0x0f,
+ 0xb1, 0x60, 0xca, 0x9b, 0x6c, 0x0f, 0xa4, 0xb9, 0x43, 0x11, 0x49, 0x40,
+ 0x97, 0x90, 0x45, 0x00, 0x8b, 0x40, 0x97, 0x9c, 0xc3, 0x03, 0x0d, 0x01,
+ 0x32, 0x51, 0xc6, 0xaf, 0x06, 0x0f, 0xa4, 0x70, 0x46, 0x4d, 0x6c, 0xc0,
+ 0x97, 0xa8, 0x46, 0x8f, 0x12, 0x40, 0x97, 0xb4, 0x8e, 0x0f, 0xa3, 0x3b,
+ 0x00, 0x97, 0xd2, 0xc9, 0xb3, 0xa7, 0x0f, 0xcc, 0x90, 0xc9, 0xb3, 0x29,
+ 0x0f, 0x98, 0xf9, 0xd1, 0x54, 0xa8, 0x0f, 0x98, 0x81, 0xc3, 0x26, 0x19,
+ 0x0f, 0xcf, 0x20, 0x48, 0x4f, 0x6b, 0xc0, 0x97, 0xd8, 0xca, 0xa6, 0x52,
+ 0x0f, 0xca, 0xd8, 0xc4, 0xdf, 0x0b, 0x0f, 0xcd, 0x39, 0x42, 0x00, 0x5d,
+ 0x40, 0x97, 0xe4, 0xc8, 0x27, 0xbc, 0x01, 0x15, 0xb1, 0x43, 0x38, 0x5f,
+ 0x40, 0x97, 0xf0, 0xd0, 0x1d, 0xec, 0x07, 0xe9, 0xf1, 0xd1, 0x1a, 0x4a,
+ 0x07, 0xe9, 0xf8, 0x4d, 0x53, 0xa9, 0xc0, 0x98, 0x18, 0x47, 0x34, 0x2f,
+ 0xc0, 0x98, 0x24, 0xc8, 0xba, 0x12, 0x0f, 0x69, 0x71, 0x51, 0x4f, 0x03,
+ 0x40, 0x98, 0x4b, 0xc4, 0xdf, 0x13, 0x0f, 0xb4, 0xb1, 0xc3, 0x22, 0xd3,
+ 0x0f, 0xb4, 0x69, 0xca, 0x9f, 0x90, 0x0f, 0xb4, 0xa1, 0xca, 0xa0, 0x4e,
+ 0x0f, 0xb4, 0xc1, 0xcb, 0x91, 0x4c, 0x0f, 0xb7, 0x88, 0x00, 0xc0, 0x98,
+ 0x63, 0xcf, 0x6a, 0xcb, 0x0f, 0xd3, 0x88, 0xe0, 0x02, 0x47, 0x0f, 0xa8,
+ 0xd8, 0x10, 0xc0, 0x98, 0x6f, 0xd5, 0x36, 0x71, 0x00, 0x04, 0xe8, 0xc6,
+ 0xcb, 0xd5, 0x01, 0x19, 0x29, 0xc8, 0xb7, 0xca, 0x0f, 0xa5, 0xfa, 0x00,
+ 0x98, 0x77, 0x00, 0xc0, 0x98, 0x7d, 0x43, 0x00, 0x29, 0x40, 0x98, 0xb3,
+ 0x12, 0xc0, 0x98, 0xc5, 0xc4, 0x14, 0x4c, 0x00, 0xe3, 0xe9, 0xc5, 0xd6,
+ 0xf5, 0x00, 0xe3, 0xd9, 0x42, 0x14, 0x48, 0xc0, 0x98, 0xd1, 0xd0, 0x4f,
+ 0x37, 0x00, 0xe3, 0xc9, 0x47, 0x02, 0x0e, 0xc0, 0x98, 0xdd, 0x46, 0x09,
+ 0x97, 0x40, 0x98, 0xf5, 0x46, 0x0c, 0x51, 0xc0, 0x99, 0x19, 0xc8, 0xb6,
+ 0x1a, 0x0f, 0xa7, 0x20, 0x06, 0xc0, 0x99, 0x31, 0x05, 0xc0, 0x99, 0x3d,
+ 0xcf, 0x6a, 0xf8, 0x01, 0x22, 0x39, 0x04, 0xc0, 0x99, 0x49, 0xcd, 0x7e,
+ 0xca, 0x01, 0x22, 0x19, 0xc4, 0x4a, 0x3f, 0x01, 0x22, 0x11, 0xc4, 0x01,
+ 0x23, 0x01, 0x22, 0x00, 0xc4, 0x7e, 0x7a, 0x0f, 0xa0, 0xc9, 0xcb, 0x99,
+ 0xce, 0x0f, 0xb6, 0x88, 0x4e, 0x6e, 0x3c, 0xc0, 0x99, 0x5b, 0xc6, 0x59,
+ 0x92, 0x01, 0x72, 0xe8, 0xc3, 0x02, 0x6e, 0x01, 0x01, 0xf1, 0xc2, 0x00,
+ 0xb6, 0x0f, 0xae, 0xba, 0x00, 0x99, 0x67, 0xd5, 0x37, 0x97, 0x00, 0xb4,
+ 0xe1, 0xcc, 0x37, 0xa0, 0x00, 0xb4, 0xd9, 0x47, 0x02, 0x0e, 0xc0, 0x99,
+ 0x6d, 0xca, 0xa5, 0x1c, 0x00, 0xb4, 0x00, 0x47, 0x02, 0x0e, 0xc0, 0x99,
+ 0xc7, 0x46, 0x09, 0x97, 0x40, 0x9a, 0x4a, 0x4f, 0x0b, 0x17, 0xc0, 0x9a,
+ 0x6e, 0x4d, 0x29, 0xb9, 0x40, 0x9a, 0xd5, 0x12, 0xc0, 0x9b, 0x3c, 0xc5,
+ 0xdb, 0x73, 0x0e, 0x7e, 0x11, 0x06, 0xc0, 0x9b, 0x4d, 0x11, 0xc0, 0x9b,
+ 0x63, 0x0d, 0xc0, 0x9b, 0x72, 0x15, 0xc0, 0x9b, 0x90, 0xc6, 0xd2, 0xfb,
+ 0x0e, 0x7d, 0x3b, 0x00, 0x9b, 0xa3, 0x1c, 0xc0, 0x9b, 0xa7, 0xc4, 0xe0,
+ 0x1b, 0x0e, 0x7c, 0x19, 0x14, 0xc0, 0x9b, 0xb1, 0x42, 0x11, 0xee, 0xc0,
+ 0x9b, 0xbd, 0x49, 0xb1, 0x79, 0xc0, 0x9b, 0xc9, 0x4a, 0xa0, 0x8a, 0x40,
+ 0x9b, 0xe7, 0xc3, 0x23, 0x6d, 0x0e, 0x7a, 0x31, 0xc5, 0x78, 0xdb, 0x0e,
+ 0x7a, 0x29, 0xce, 0x72, 0xe2, 0x0e, 0x7a, 0x21, 0x46, 0xce, 0xe7, 0x40,
+ 0x9b, 0xfd, 0xdb, 0x18, 0xf6, 0x0e, 0x7a, 0x09, 0x45, 0x01, 0xc3, 0xc0,
+ 0x9c, 0x05, 0xd7, 0x29, 0x40, 0x0e, 0x79, 0xf1, 0x51, 0x54, 0x75, 0x40,
+ 0x9c, 0x57, 0xc8, 0xba, 0xca, 0x08, 0xd2, 0x39, 0x44, 0x00, 0xbb, 0x40,
+ 0x9c, 0x69, 0x46, 0x37, 0xee, 0xc0, 0x9c, 0x7b, 0x46, 0x26, 0xd5, 0x40,
+ 0x9c, 0x87, 0xd6, 0x2d, 0xe6, 0x08, 0xd2, 0x29, 0xc9, 0x15, 0xcc, 0x08,
+ 0xd1, 0xf8, 0xca, 0xa4, 0x36, 0x08, 0xd2, 0x21, 0xcb, 0x99, 0x6b, 0x08,
+ 0xd2, 0x19, 0xc4, 0x01, 0xe2, 0x08, 0xd2, 0x11, 0xc5, 0x32, 0x89, 0x08,
+ 0xd2, 0x08, 0x0d, 0xc0, 0x9c, 0x93, 0xc2, 0x00, 0xd0, 0x08, 0xd1, 0x89,
+ 0x15, 0xc0, 0x9c, 0xa3, 0xc2, 0x02, 0x41, 0x08, 0xd1, 0x69, 0xc2, 0x00,
+ 0xdb, 0x08, 0xd1, 0x61, 0xc2, 0x00, 0x39, 0x08, 0xd1, 0x59, 0xc2, 0x19,
+ 0x2c, 0x08, 0xd1, 0x51, 0xc2, 0x00, 0x02, 0x08, 0xd1, 0x49, 0x1c, 0xc0,
+ 0x9c, 0xb3, 0x06, 0xc0, 0x9c, 0xbd, 0x16, 0xc0, 0x9c, 0xcf, 0xc2, 0x01,
+ 0xc3, 0x08, 0xd1, 0x11, 0x04, 0xc0, 0x9c, 0xe1, 0x12, 0xc0, 0x9c, 0xeb,
+ 0x10, 0xc0, 0x9c, 0xf5, 0xc2, 0x25, 0x3b, 0x08, 0xd0, 0x91, 0x05, 0xc0,
+ 0x9d, 0x0b, 0x09, 0xc0, 0x9d, 0x15, 0x83, 0x08, 0xd0, 0x00, 0xcb, 0x36,
+ 0x51, 0x08, 0xd0, 0x51, 0x45, 0x00, 0xba, 0x40, 0x9d, 0x1f, 0xd5, 0x34,
+ 0xa3, 0x01, 0x51, 0xf1, 0x45, 0x00, 0x2d, 0xc0, 0x9d, 0x3f, 0xd4, 0x3a,
+ 0xc0, 0x01, 0x53, 0x28, 0x46, 0xcc, 0x23, 0xc0, 0x9d, 0x4b, 0xc3, 0x3a,
+ 0x48, 0x01, 0x4c, 0x08, 0xcf, 0x60, 0x30, 0x01, 0x4c, 0x49, 0xcd, 0x7d,
+ 0x6b, 0x01, 0x4c, 0x38, 0xc6, 0x57, 0xec, 0x01, 0x00, 0x69, 0x42, 0x00,
+ 0x10, 0xc0, 0x9d, 0x55, 0xc5, 0x40, 0x88, 0x01, 0x00, 0x58, 0xcb, 0x95,
+ 0x82, 0x01, 0x37, 0xd9, 0xd3, 0x44, 0xdb, 0x0f, 0xa9, 0x81, 0xc6, 0xcf,
+ 0x1d, 0x0f, 0xa3, 0xd1, 0xc4, 0xc9, 0x19, 0x0f, 0xa3, 0xc9, 0xcb, 0x95,
+ 0x6c, 0x0f, 0x9f, 0x19, 0xc5, 0xb0, 0x15, 0x0f, 0x9c, 0x71, 0xc6, 0xcd,
+ 0x2b, 0x0f, 0x9f, 0x79, 0xda, 0x19, 0x46, 0x01, 0x80, 0x20, 0x42, 0x00,
+ 0xb0, 0xc0, 0x9d, 0x61, 0x42, 0x00, 0x49, 0xc0, 0x9d, 0x6d, 0x46, 0x09,
+ 0x97, 0xc0, 0x9d, 0x79, 0xd3, 0x45, 0x99, 0x05, 0x4e, 0x69, 0xcf, 0x60,
+ 0xc6, 0x05, 0x4e, 0x11, 0x4f, 0x30, 0x90, 0xc0, 0x9d, 0x9d, 0x4b, 0x6f,
+ 0xc7, 0xc0, 0x9d, 0xaf, 0x45, 0x00, 0xba, 0x40, 0x9d, 0xd1, 0x44, 0x02,
+ 0xbe, 0xc0, 0x9d, 0xec, 0x45, 0x44, 0xba, 0x40, 0x9d, 0xf8, 0xd0, 0x0f,
+ 0x09, 0x01, 0x02, 0x41, 0xc4, 0x01, 0xc3, 0x00, 0x01, 0xf8, 0x49, 0x14,
+ 0x89, 0xc0, 0x9e, 0x04, 0x48, 0x91, 0xff, 0x40, 0x9e, 0x7d, 0x47, 0x02,
+ 0x0e, 0xc0, 0x9e, 0xcf, 0xd0, 0x59, 0x92, 0x08, 0x75, 0x69, 0x4a, 0x51,
+ 0x89, 0x40, 0x9f, 0x54, 0x8e, 0x00, 0x00, 0xc3, 0x00, 0x9f, 0x60, 0x94,
+ 0x01, 0x32, 0x58, 0x95, 0x00, 0xa8, 0x2b, 0x00, 0x9f, 0x6a, 0x90, 0x00,
+ 0xa6, 0x83, 0x00, 0x9f, 0x95, 0x85, 0x00, 0xa5, 0x0b, 0x00, 0x9f, 0xd2,
+ 0x04, 0xc0, 0x9f, 0xf5, 0x96, 0x00, 0xa3, 0x33, 0x00, 0xa0, 0x07, 0x19,
+ 0xc0, 0xa0, 0x39, 0x94, 0x00, 0xaa, 0x83, 0x00, 0xa0, 0x55, 0x88, 0x00,
+ 0xaa, 0xeb, 0x00, 0xa0, 0x78, 0x87, 0x00, 0xa0, 0x0b, 0x00, 0xa0, 0x9d,
+ 0x91, 0x00, 0xa0, 0x2b, 0x00, 0xa0, 0xa7, 0x9b, 0x00, 0xa9, 0xf3, 0x00,
+ 0xa0, 0xb9, 0x8e, 0x00, 0xa7, 0x53, 0x00, 0xa0, 0xdc, 0x8f, 0x00, 0xa5,
+ 0xdb, 0x00, 0xa1, 0x00, 0x8d, 0x00, 0xa4, 0x1b, 0x00, 0xa1, 0x24, 0x92,
+ 0x00, 0xa2, 0x4b, 0x00, 0xa1, 0x44, 0x83, 0x00, 0xa0, 0x53, 0x00, 0xa1,
+ 0x61, 0x93, 0x00, 0xac, 0x2b, 0x00, 0xa1, 0x7d, 0x0a, 0xc0, 0xa1, 0x92,
+ 0x8b, 0x00, 0xa0, 0x1b, 0x00, 0xa1, 0x9c, 0xcc, 0x23, 0x33, 0x00, 0xa0,
+ 0xf0, 0xc2, 0x00, 0x49, 0x0f, 0xab, 0x79, 0x9b, 0x0f, 0x9b, 0x60, 0xc3,
+ 0x00, 0x54, 0x01, 0x08, 0x29, 0x96, 0x01, 0x01, 0xc2, 0x00, 0xa1, 0xa4,
+ 0xc8, 0xb6, 0x32, 0x0f, 0xae, 0x19, 0xc5, 0x06, 0x82, 0x0f, 0xa6, 0x3a,
+ 0x00, 0xa1, 0xaa, 0xca, 0xa0, 0x80, 0x0f, 0x9d, 0x01, 0x90, 0x00, 0x16,
+ 0x38, 0xc9, 0xaa, 0x4d, 0x0f, 0x9c, 0x79, 0xc9, 0xb2, 0x99, 0x0f, 0xd4,
+ 0xd0, 0xcb, 0x75, 0x5a, 0x00, 0x00, 0x69, 0xc2, 0x01, 0xbb, 0x0f, 0xca,
+ 0xa8, 0x97, 0x08, 0x15, 0x93, 0x00, 0xa1, 0xb0, 0x94, 0x08, 0x15, 0x2b,
+ 0x00, 0xa1, 0xb7, 0x8e, 0x08, 0x15, 0x1b, 0x00, 0xa1, 0xbb, 0x83, 0x08,
+ 0x15, 0x03, 0x00, 0xa1, 0xc2, 0x93, 0x08, 0x15, 0x41, 0x84, 0x08, 0x15,
+ 0x49, 0x8f, 0x08, 0x15, 0x53, 0x00, 0xa1, 0xc6, 0x91, 0x08, 0x15, 0x59,
+ 0x86, 0x08, 0x15, 0x13, 0x00, 0xa1, 0xcd, 0x96, 0x08, 0x15, 0x6b, 0x00,
+ 0xa1, 0xd1, 0x95, 0x08, 0x15, 0x83, 0x00, 0xa1, 0xd8, 0x42, 0x09, 0x8f,
+ 0xc0, 0xa1, 0xea, 0x90, 0x08, 0x15, 0xab, 0x00, 0xa1, 0xf6, 0x9a, 0x08,
+ 0x15, 0xa1, 0x92, 0x08, 0x15, 0xbb, 0x00, 0xa2, 0x02, 0x8b, 0x08, 0x15,
+ 0xcb, 0x00, 0xa2, 0x06, 0x87, 0x08, 0x15, 0xd3, 0x00, 0xa2, 0x0a, 0x8d,
+ 0x08, 0x15, 0xe3, 0x00, 0xa2, 0x0e, 0x89, 0x08, 0x16, 0x02, 0x00, 0xa2,
+ 0x12, 0x47, 0x02, 0x0e, 0xc0, 0xa2, 0x16, 0xcd, 0x79, 0x8f, 0x08, 0x2b,
+ 0x78, 0xcb, 0x8d, 0xbb, 0x0f, 0xa7, 0xc0, 0x46, 0x00, 0x8b, 0x40, 0xa2,
+ 0x8a, 0x26, 0xc0, 0xa2, 0x96, 0x25, 0xc0, 0xa2, 0xd6, 0x03, 0x40, 0xa3,
+ 0x16, 0x03, 0xc0, 0xa3, 0x1e, 0x26, 0x40, 0xa3, 0x56, 0xc5, 0x61, 0xf7,
+ 0x01, 0x74, 0x01, 0x03, 0x40, 0xa3, 0x96, 0x0e, 0xc0, 0xa3, 0xa4, 0xc4,
+ 0xdf, 0xfb, 0x01, 0x74, 0xd9, 0x0b, 0xc0, 0xa3, 0xb0, 0xc2, 0x00, 0x27,
+ 0x01, 0x75, 0x39, 0x4c, 0x8c, 0x6d, 0x40, 0xa3, 0xbc, 0x07, 0xc0, 0xa3,
+ 0xf2, 0x45, 0x03, 0x14, 0xc0, 0xa3, 0xfe, 0x10, 0xc0, 0xa4, 0x0a, 0xc2,
+ 0x05, 0x1d, 0x01, 0x74, 0xe1, 0x0b, 0xc0, 0xa4, 0x16, 0x46, 0xcd, 0xaf,
+ 0xc0, 0xa4, 0x22, 0xc4, 0xdf, 0x97, 0x01, 0x75, 0xb0, 0xc5, 0x18, 0x8f,
+ 0x01, 0x74, 0x29, 0x43, 0x39, 0x8b, 0x40, 0xa4, 0x2e, 0x11, 0xc0, 0xa4,
+ 0x3a, 0xc5, 0xc0, 0xd0, 0x01, 0x75, 0x71, 0x45, 0xdc, 0x09, 0xc0, 0xa4,
+ 0x4a, 0xc3, 0x87, 0x22, 0x01, 0x76, 0xc0, 0xc4, 0x14, 0x8d, 0x01, 0x74,
+ 0x39, 0xc5, 0x8c, 0xf0, 0x01, 0x74, 0x99, 0xc4, 0xe1, 0x3b, 0x01, 0x76,
+ 0x09, 0xc5, 0xd5, 0x65, 0x01, 0x77, 0x88, 0xc3, 0x05, 0x14, 0x01, 0x74,
+ 0x41, 0xc3, 0x02, 0x9f, 0x01, 0x74, 0x48, 0xc9, 0xab, 0xac, 0x01, 0x74,
+ 0x51, 0xc4, 0x04, 0xa6, 0x01, 0x74, 0xf1, 0xc2, 0x13, 0x38, 0x01, 0x75,
+ 0x40, 0x44, 0xb3, 0x85, 0xc0, 0xa4, 0x56, 0x44, 0x08, 0x48, 0x40, 0xa4,
+ 0x66, 0x42, 0x01, 0x9c, 0xc0, 0xa4, 0x72, 0xc3, 0x02, 0x9b, 0x01, 0x74,
+ 0xc1, 0xc3, 0x00, 0xbf, 0x01, 0x76, 0x38, 0x11, 0xc0, 0xa4, 0x7c, 0x07,
+ 0x40, 0xa4, 0x94, 0x03, 0xc0, 0xa4, 0xa0, 0x44, 0x15, 0xa8, 0x40, 0xa4,
+ 0xac, 0xc3, 0x05, 0xba, 0x01, 0x75, 0x19, 0xc3, 0x65, 0xba, 0x01, 0x76,
+ 0x50, 0xc3, 0x01, 0x9d, 0x01, 0x75, 0x49, 0x4c, 0x8c, 0x6d, 0x40, 0xa4,
+ 0xb8, 0xc2, 0x0c, 0x43, 0x01, 0x75, 0x59, 0xc2, 0x00, 0x28, 0x01, 0x75,
+ 0xc1, 0x43, 0x0a, 0x0c, 0x40, 0xa4, 0xc8, 0xc3, 0x05, 0x14, 0x01, 0x75,
+ 0x89, 0x16, 0xc0, 0xa4, 0xd2, 0xc4, 0x09, 0x9d, 0x01, 0x75, 0xa0, 0x45,
+ 0x1b, 0xa0, 0xc0, 0xa4, 0xde, 0xc4, 0xe0, 0x07, 0x01, 0x77, 0x20, 0x90,
+ 0x01, 0x8e, 0xe8, 0x99, 0x01, 0x8e, 0x23, 0x00, 0xa4, 0xe8, 0x9c, 0x01,
+ 0x8e, 0xbb, 0x00, 0xa4, 0xf0, 0x92, 0x01, 0x8e, 0x99, 0x96, 0x01, 0x8e,
+ 0xc9, 0x89, 0x01, 0x8e, 0xd0, 0x9c, 0x01, 0x8e, 0xab, 0x00, 0xa4, 0xfa,
+ 0x92, 0x01, 0x8e, 0x3b, 0x00, 0xa5, 0x10, 0x89, 0x01, 0x8e, 0xb1, 0xc3,
+ 0xe6, 0x56, 0x01, 0x8f, 0x00, 0x86, 0x01, 0x8e, 0xd9, 0x9c, 0x01, 0x8e,
+ 0xe1, 0x89, 0x01, 0x8f, 0x10, 0xc8, 0x78, 0xcc, 0x0f, 0xb3, 0xf3, 0x00,
+ 0xa5, 0x16, 0xc5, 0x01, 0xc2, 0x01, 0x38, 0x98, 0xce, 0x6d, 0xb0, 0x0f,
+ 0xa7, 0x19, 0xc8, 0xbb, 0x4a, 0x0f, 0xce, 0x00, 0x45, 0xde, 0x1b, 0xc0,
+ 0xa5, 0x1c, 0x14, 0x40, 0xa5, 0x28, 0x94, 0x0f, 0xd4, 0x89, 0xc2, 0x05,
+ 0x26, 0x01, 0x36, 0x98, 0x47, 0xc2, 0xf1, 0xc0, 0xa5, 0x34, 0x47, 0x07,
+ 0x93, 0x40, 0xa5, 0x43, 0x47, 0x02, 0x0e, 0xc0, 0xa5, 0x52, 0x18, 0xc0,
+ 0xa5, 0xb4, 0xcd, 0x2c, 0xb2, 0x08, 0x8a, 0x19, 0x06, 0xc0, 0xa5, 0xc0,
+ 0x15, 0xc0, 0xa5, 0xd2, 0xc7, 0xc2, 0xab, 0x08, 0x89, 0xa1, 0xc7, 0xc3,
+ 0xd8, 0x08, 0x89, 0x91, 0xc6, 0xb6, 0x44, 0x08, 0x89, 0x88, 0x4f, 0x30,
+ 0x90, 0xc0, 0xa5, 0xde, 0x4b, 0x6f, 0xc7, 0xc0, 0xa5, 0xfc, 0x47, 0x02,
+ 0x0e, 0xc0, 0xa6, 0x1b, 0x4c, 0x11, 0xe2, 0xc0, 0xa6, 0x84, 0x46, 0x09,
+ 0x97, 0x40, 0xa6, 0x94, 0xcc, 0x88, 0xa1, 0x0f, 0xb5, 0xc0, 0x47, 0x34,
+ 0x2f, 0xc0, 0xa6, 0xb8, 0x47, 0x02, 0x0e, 0x40, 0xa6, 0xcb, 0xc8, 0x1d,
+ 0x3c, 0x0f, 0xb1, 0xf9, 0xc4, 0x00, 0x87, 0x0f, 0xb1, 0x10, 0x00, 0xc0,
+ 0xa7, 0x30, 0xc9, 0xae, 0xe8, 0x01, 0x36, 0x61, 0x43, 0x00, 0xa8, 0x40,
+ 0xa7, 0x40, 0xca, 0x9b, 0xee, 0x0f, 0x9b, 0xc1, 0xc5, 0xc9, 0x75, 0x0f,
+ 0xd5, 0x98, 0x09, 0xc0, 0xa7, 0x52, 0x03, 0xc0, 0xa7, 0x5c, 0x14, 0xc0,
+ 0xa7, 0x72, 0x0e, 0xc0, 0xa7, 0x7a, 0x42, 0x00, 0x8c, 0xc0, 0xa7, 0x90,
+ 0x16, 0xc0, 0xa7, 0x9c, 0x06, 0xc0, 0xa7, 0xb7, 0x07, 0xc0, 0xa7, 0xc8,
+ 0x08, 0xc0, 0xa7, 0xd4, 0x05, 0xc0, 0xa7, 0xe0, 0x15, 0xc0, 0xa8, 0x03,
+ 0x04, 0xc0, 0xa8, 0x25, 0x42, 0x02, 0x2b, 0xc0, 0xa8, 0x2f, 0x17, 0xc0,
+ 0xa8, 0x3b, 0x0b, 0xc0, 0xa8, 0x4b, 0x47, 0x2e, 0x48, 0xc0, 0xa8, 0x55,
+ 0x11, 0xc0, 0xa8, 0x61, 0x0f, 0xc0, 0xa8, 0x7c, 0x12, 0xc0, 0xa8, 0x8b,
+ 0x10, 0xc0, 0xa8, 0x95, 0x1a, 0xc0, 0xa8, 0xa1, 0x42, 0x01, 0x23, 0xc0,
+ 0xa8, 0xab, 0x49, 0x07, 0xbb, 0x40, 0xa8, 0xbd, 0xce, 0x72, 0x80, 0x01,
+ 0x1c, 0x21, 0xc6, 0x81, 0x9c, 0x01, 0x10, 0x09, 0xc7, 0x50, 0x25, 0x0f,
+ 0xae, 0xe1, 0xc3, 0x1b, 0xa1, 0x0f, 0xcf, 0x68, 0x47, 0xb4, 0x64, 0xc0,
+ 0xa8, 0xc9, 0x83, 0x00, 0x01, 0x60, 0x48, 0xb6, 0xaa, 0xc0, 0xa8, 0xd5,
+ 0x42, 0x00, 0x29, 0x40, 0xa8, 0xe1, 0xd7, 0x16, 0xc3, 0x01, 0x1c, 0x09,
+ 0x45, 0xc2, 0x13, 0xc0, 0xa8, 0xed, 0xcc, 0x62, 0xe5, 0x01, 0x11, 0x71,
+ 0x44, 0x7e, 0xe0, 0x40, 0xa8, 0xf9, 0xc6, 0xd3, 0xa9, 0x0f, 0xa3, 0xb9,
+ 0xc4, 0x00, 0xba, 0x0f, 0xb5, 0x38, 0xc9, 0xb0, 0x74, 0x0f, 0x9c, 0x51,
+ 0xcb, 0x98, 0x16, 0x0f, 0xb0, 0xb1, 0xc9, 0x96, 0x1e, 0x0f, 0xb0, 0xa8,
+ 0x00, 0x40, 0xa9, 0x05, 0xc2, 0x00, 0x75, 0x0f, 0x9b, 0x99, 0x87, 0x0f,
+ 0x9b, 0x50, 0xcb, 0x8d, 0x79, 0x0f, 0x89, 0x79, 0xca, 0x9d, 0x6a, 0x00,
+ 0x05, 0x48, 0x15, 0xc0, 0xa9, 0x11, 0x05, 0xc0, 0xa9, 0x1d, 0x46, 0xd1,
+ 0x4b, 0xc0, 0xa9, 0x29, 0x4b, 0x96, 0x06, 0xc0, 0xa9, 0x3b, 0x08, 0xc0,
+ 0xa9, 0x53, 0xd5, 0x36, 0xda, 0x01, 0x67, 0xf8, 0xc7, 0xb4, 0xd2, 0x0f,
+ 0xca, 0x11, 0xc9, 0xb2, 0x36, 0x0f, 0x9b, 0xd8, 0x42, 0x00, 0xa9, 0xc0,
+ 0xa9, 0x5f, 0xc3, 0x02, 0xad, 0x01, 0x02, 0x80, 0x45, 0x05, 0xfd, 0xc0,
+ 0xa9, 0x81, 0x46, 0x11, 0x55, 0x40, 0xa9, 0xa7, 0x46, 0x00, 0x8b, 0x40,
+ 0xa9, 0xc3, 0xce, 0x6c, 0x7c, 0x0f, 0xa2, 0x79, 0xc8, 0x78, 0xcc, 0x0f,
+ 0x9d, 0x60, 0x42, 0x00, 0xa9, 0xc0, 0xa9, 0xdb, 0x00, 0x40, 0xaa, 0x3d,
+ 0xc6, 0xcc, 0x89, 0x0f, 0x9d, 0x51, 0xcf, 0x69, 0x27, 0x01, 0x50, 0x81,
+ 0xcc, 0x08, 0xfb, 0x00, 0x02, 0xf0, 0x1c, 0xc0, 0xaa, 0x49, 0x97, 0x09,
+ 0x18, 0x5b, 0x00, 0xaa, 0x64, 0x16, 0xc0, 0xaa, 0x9f, 0x15, 0xc0, 0xaa,
+ 0xbb, 0x10, 0xc0, 0xaa, 0xd4, 0x0f, 0xc0, 0xaa, 0xf0, 0x0e, 0xc0, 0xab,
+ 0x0c, 0x0d, 0xc0, 0xab, 0x21, 0x0a, 0xc0, 0xab, 0x42, 0x09, 0xc0, 0xab,
+ 0x57, 0x87, 0x09, 0x04, 0x53, 0x00, 0xab, 0x70, 0x06, 0xc0, 0xab, 0xa8,
+ 0x04, 0xc0, 0xab, 0xbd, 0x83, 0x09, 0x00, 0x03, 0x00, 0xab, 0xd2, 0x12,
+ 0xc0, 0xac, 0x16, 0x14, 0xc0, 0xac, 0x2d, 0x8b, 0x09, 0x09, 0xfa, 0x00,
+ 0xac, 0x3c, 0x49, 0x1e, 0x56, 0xc0, 0xac, 0x72, 0xce, 0x74, 0x16, 0x09,
+ 0x23, 0x89, 0xd9, 0x1d, 0xba, 0x09, 0x23, 0x80, 0x42, 0x00, 0xec, 0xc0,
+ 0xac, 0x84, 0x07, 0xc0, 0xac, 0x90, 0x15, 0xc0, 0xac, 0x9c, 0x08, 0xc0,
+ 0xac, 0xae, 0x11, 0xc0, 0xac, 0xba, 0x16, 0x40, 0xac, 0xc6, 0x42, 0x00,
+ 0x36, 0xc0, 0xac, 0xd2, 0xc9, 0xaf, 0x03, 0x0f, 0xca, 0x60, 0x45, 0x3a,
+ 0xd8, 0xc0, 0xac, 0xde, 0xca, 0x9e, 0x96, 0x0f, 0x9a, 0xd8, 0xcf, 0x55,
+ 0xa9, 0x01, 0x37, 0xf1, 0xca, 0x9e, 0xb4, 0x0f, 0xcb, 0x20, 0xcc, 0x87,
+ 0x81, 0x01, 0x08, 0x21, 0x45, 0x02, 0x6d, 0x40, 0xac, 0xea, 0x42, 0xe6,
+ 0x4a, 0xc0, 0xac, 0xf6, 0x1e, 0xc0, 0xac, 0xfe, 0x1d, 0x40, 0xad, 0x06,
+ 0x19, 0xc0, 0xad, 0x2e, 0x1a, 0xc0, 0xad, 0x3e, 0x1c, 0xc0, 0xad, 0x46,
+ 0x83, 0x08, 0x40, 0x01, 0x87, 0x08, 0x40, 0x09, 0x8b, 0x08, 0x40, 0x11,
+ 0x91, 0x08, 0x40, 0x19, 0x97, 0x08, 0x40, 0x21, 0x0c, 0xc0, 0xad, 0x4e,
+ 0x0d, 0xc0, 0xad, 0x56, 0x0e, 0xc0, 0xad, 0x6a, 0x0f, 0xc0, 0xad, 0x7e,
+ 0x10, 0xc0, 0xad, 0x92, 0x12, 0xc0, 0xad, 0xa6, 0x14, 0xc0, 0xad, 0xba,
+ 0x15, 0xc0, 0xad, 0xce, 0x16, 0x40, 0xad, 0xe2, 0xd0, 0x5a, 0xa2, 0x00,
+ 0xe9, 0x59, 0xc8, 0xbe, 0xaa, 0x00, 0x26, 0x01, 0xcd, 0x7f, 0xb4, 0x05,
+ 0x33, 0x70, 0x46, 0x02, 0x0f, 0xc0, 0xad, 0xf6, 0x48, 0x19, 0x9b, 0x40,
+ 0xae, 0x73, 0x46, 0x02, 0x0f, 0xc0, 0xae, 0x85, 0x48, 0x19, 0x9b, 0x40,
+ 0xaf, 0x04, 0xc4, 0x26, 0x78, 0x0f, 0xdf, 0xc9, 0xc4, 0x15, 0xe7, 0x0f,
+ 0xdf, 0x81, 0xc3, 0x05, 0x14, 0x0f, 0xdf, 0x89, 0x16, 0xc0, 0xaf, 0x16,
+ 0x08, 0xc0, 0xaf, 0x22, 0x15, 0xc0, 0xaf, 0x2e, 0xc5, 0x06, 0xdb, 0x0f,
+ 0xdf, 0xc0, 0xe0, 0x07, 0x87, 0x01, 0x51, 0x90, 0xc2, 0x00, 0xbf, 0x01,
+ 0x18, 0xa1, 0xc8, 0x08, 0xe8, 0x00, 0x05, 0x38, 0xe0, 0x00, 0x87, 0x0f,
+ 0xc9, 0x60, 0x47, 0xc1, 0x1c, 0xc0, 0xaf, 0x3a, 0x00, 0x40, 0xaf, 0x42,
+ 0x48, 0x78, 0xbf, 0xc0, 0xaf, 0x5e, 0x45, 0x00, 0xba, 0xc0, 0xaf, 0x6a,
+ 0x0e, 0xc0, 0xaf, 0x7a, 0x4b, 0x6f, 0xc7, 0xc0, 0xaf, 0x86, 0xd6, 0x2d,
+ 0x20, 0x00, 0x6f, 0xa0, 0x14, 0xc0, 0xaf, 0x9c, 0x08, 0xc0, 0xaf, 0xa8,
+ 0xcb, 0x1a, 0x50, 0x0e, 0xd4, 0x59, 0x05, 0xc0, 0xaf, 0xc2, 0x15, 0xc0,
+ 0xaf, 0xcc, 0x0e, 0xc0, 0xaf, 0xea, 0x42, 0x02, 0xae, 0xc0, 0xaf, 0xf4,
+ 0x16, 0xc0, 0xaf, 0xfa, 0xdb, 0x18, 0xa5, 0x0e, 0xd3, 0x79, 0x07, 0xc0,
+ 0xb0, 0x08, 0x0a, 0xc0, 0xb0, 0x1a, 0x10, 0xc0, 0xb0, 0x27, 0x42, 0x00,
+ 0xa2, 0xc0, 0xb0, 0x33, 0x42, 0x00, 0x38, 0xc0, 0xb0, 0x3f, 0x44, 0x8c,
+ 0x27, 0xc0, 0xb0, 0x4b, 0x06, 0xc0, 0xb0, 0x57, 0x46, 0xd3, 0x9d, 0x40,
+ 0xb0, 0x63, 0xe0, 0x04, 0x87, 0x01, 0x39, 0xf1, 0x47, 0x0a, 0xaa, 0x40,
+ 0xb0, 0x75, 0x4b, 0x6f, 0xc7, 0xc0, 0xb0, 0x87, 0x47, 0x02, 0x0e, 0xc0,
+ 0xb0, 0xaa, 0x15, 0xc0, 0xb1, 0x11, 0xd0, 0x5c, 0xe2, 0x08, 0xae, 0x49,
+ 0x50, 0x5d, 0xf2, 0xc0, 0xb1, 0x1b, 0x06, 0x40, 0xb1, 0x27, 0x46, 0x04,
+ 0x8f, 0xc0, 0xb1, 0x33, 0x46, 0x01, 0xfc, 0x40, 0xb1, 0x4b, 0xc9, 0x00,
+ 0xca, 0x01, 0x54, 0xe9, 0xcc, 0x07, 0xc7, 0x01, 0x54, 0xf0, 0xdb, 0x16,
+ 0x38, 0x01, 0x54, 0xf9, 0xde, 0x0e, 0xaa, 0x01, 0x55, 0x00, 0xcb, 0x6c,
+ 0x2b, 0x0f, 0xb4, 0x11, 0xc8, 0xbf, 0xba, 0x0f, 0x9a, 0xe0, 0xc3, 0x00,
+ 0x44, 0x0f, 0xb4, 0x49, 0xcd, 0x80, 0x77, 0x0f, 0xaf, 0xe8, 0x00, 0xc0,
+ 0xb1, 0x63, 0x45, 0x2d, 0xd5, 0x40, 0xb1, 0x79, 0xc6, 0xd1, 0xf3, 0x01,
+ 0x34, 0xd1, 0xcb, 0x99, 0x34, 0x01, 0x34, 0xa8, 0x44, 0x00, 0x2d, 0xc0,
+ 0xb1, 0x95, 0xc6, 0xd3, 0x91, 0x0f, 0x9a, 0x98, 0xd2, 0x4b, 0x71, 0x01,
+ 0x13, 0x19, 0xcd, 0x7b, 0xff, 0x00, 0x04, 0xe0, 0x45, 0x00, 0x8c, 0xc0,
+ 0xb1, 0xa1, 0x48, 0xba, 0x8a, 0x40, 0xb1, 0xad, 0xc7, 0xc4, 0x09, 0x0f,
+ 0xce, 0x11, 0xc3, 0x05, 0xba, 0x01, 0x30, 0x98, 0x45, 0x00, 0xba, 0xc0,
+ 0xb1, 0xb9, 0x4b, 0x6f, 0xc7, 0xc0, 0xb1, 0xcb, 0x47, 0x02, 0x0e, 0xc0,
+ 0xb1, 0xf1, 0xd4, 0x3d, 0x40, 0x05, 0x45, 0xa1, 0x06, 0x40, 0xb2, 0x5c,
+ 0xd4, 0x10, 0xc9, 0x0f, 0xb3, 0xd1, 0x46, 0x11, 0x39, 0x40, 0xb2, 0x6e,
+ 0xc8, 0xbd, 0x9a, 0x0f, 0xa7, 0x08, 0x03, 0xc0, 0xb2, 0x7a, 0x15, 0xc0,
+ 0xb2, 0x90, 0xc4, 0xde, 0x9f, 0x00, 0x41, 0xd9, 0x1c, 0xc0, 0xb2, 0x9c,
+ 0xc5, 0x7a, 0xc2, 0x00, 0x41, 0xc9, 0xcd, 0x7a, 0xba, 0x00, 0x41, 0xb9,
+ 0xc3, 0xe5, 0xa2, 0x00, 0x41, 0x99, 0xc7, 0xc4, 0x33, 0x00, 0x41, 0x80,
+ 0x44, 0x01, 0xc4, 0xc0, 0xb2, 0xa8, 0x4f, 0x0f, 0x5f, 0x40, 0xb2, 0xc9,
+ 0x15, 0xc0, 0xb2, 0xd9, 0x91, 0x00, 0x41, 0x5b, 0x00, 0xb2, 0xe5, 0x8b,
+ 0x00, 0x41, 0x51, 0x45, 0x2c, 0x86, 0xc0, 0xb2, 0xee, 0x97, 0x00, 0x41,
+ 0x39, 0x83, 0x00, 0x41, 0x1b, 0x00, 0xb3, 0x01, 0x87, 0x00, 0x40, 0xe8,
+ 0x16, 0xc0, 0xb3, 0x05, 0x15, 0xc0, 0xb3, 0x17, 0xc4, 0x49, 0x87, 0x00,
+ 0x40, 0x99, 0xc3, 0xe5, 0x6f, 0x00, 0x40, 0x91, 0xc2, 0x02, 0x09, 0x00,
+ 0x40, 0x81, 0x0b, 0xc0, 0xb3, 0x23, 0xc3, 0x20, 0x18, 0x00, 0x40, 0x69,
+ 0xc3, 0x8c, 0x3f, 0x00, 0x40, 0x61, 0xc5, 0xdd, 0x7b, 0x00, 0x40, 0x59,
+ 0xc4, 0xe1, 0x63, 0x00, 0x40, 0x51, 0xc3, 0x70, 0x3f, 0x00, 0x40, 0x49,
+ 0xc3, 0x0a, 0xe2, 0x00, 0x40, 0x31, 0x04, 0xc0, 0xb3, 0x2f, 0xc5, 0x49,
+ 0x80, 0x00, 0x40, 0x19, 0xc5, 0xb5, 0x1e, 0x00, 0x40, 0x11, 0xc4, 0xd8,
+ 0xe5, 0x00, 0x40, 0x00, 0xcf, 0x40, 0x0c, 0x01, 0x31, 0x00, 0x8a, 0x0f,
+ 0xcd, 0x29, 0xc8, 0x43, 0xb6, 0x0f, 0x9d, 0x80, 0x87, 0x01, 0x19, 0x99,
+ 0x4a, 0xa0, 0x30, 0x40, 0xb3, 0x3b, 0x44, 0x00, 0x74, 0xc0, 0xb3, 0x47,
+ 0xc6, 0xca, 0x85, 0x0f, 0xb1, 0x50, 0xcc, 0x82, 0xd1, 0x0f, 0xb2, 0x11,
+ 0xcd, 0x7d, 0x5e, 0x0f, 0xb2, 0x08, 0x4c, 0x24, 0xe3, 0xc0, 0xb3, 0x59,
+ 0x53, 0x41, 0xe3, 0x40, 0xb3, 0x6b, 0x8d, 0x0f, 0xcc, 0x41, 0x44, 0x45,
+ 0xa1, 0x40, 0xb3, 0x77, 0xc6, 0x02, 0xd1, 0x01, 0x3a, 0x69, 0xc4, 0x0e,
+ 0x6a, 0x01, 0x39, 0x81, 0xcb, 0x8e, 0x08, 0x01, 0x38, 0xf0, 0xc6, 0xd3,
+ 0x8b, 0x0f, 0x9b, 0x39, 0x4b, 0x8c, 0x62, 0x40, 0xb3, 0xa7, 0x4c, 0x88,
+ 0x29, 0xc0, 0xb4, 0x27, 0xc4, 0x2a, 0x3e, 0x0f, 0x9b, 0x81, 0x00, 0xc0,
+ 0xb4, 0x3f, 0x95, 0x0f, 0xd3, 0x98, 0xc4, 0xe1, 0xe7, 0x0f, 0xb6, 0x69,
+ 0xc7, 0xc6, 0x7f, 0x0f, 0xb6, 0x90, 0xc2, 0x00, 0x74, 0x00, 0x00, 0x79,
+ 0xc3, 0x00, 0xa3, 0x00, 0x00, 0x70, 0xc2, 0x00, 0x45, 0x0f, 0xcc, 0x11,
+ 0xc2, 0x11, 0xa5, 0x01, 0x32, 0x78, 0x46, 0x03, 0x13, 0xc0, 0xb4, 0x67,
+ 0x48, 0x0b, 0x17, 0xc0, 0xb4, 0x77, 0xd4, 0x19, 0x9a, 0x0f, 0xb3, 0x80,
+ 0xc2, 0x00, 0xc4, 0x0f, 0xad, 0xa9, 0xc7, 0xc4, 0xa3, 0x0f, 0xd4, 0xd8,
+ 0xcd, 0x7b, 0xcb, 0x01, 0x36, 0x20, 0x45, 0x15, 0xa7, 0xc0, 0xb4, 0x9b,
+ 0x45, 0x20, 0x6c, 0x40, 0xb4, 0xcb, 0xd0, 0x0d, 0xaa, 0x0f, 0xb3, 0x58,
+ 0xcd, 0x80, 0x6a, 0x01, 0x4f, 0xb0, 0x9f, 0x08, 0xd5, 0x11, 0x9e, 0x08,
+ 0xd5, 0x08, 0x45, 0x02, 0x9a, 0x40, 0xb4, 0xfb, 0xc5, 0xd7, 0x3b, 0x08,
+ 0xd4, 0xe9, 0xcb, 0x99, 0x6b, 0x08, 0xd4, 0xe1, 0xc4, 0x01, 0xe2, 0x08,
+ 0xd4, 0xd9, 0xc5, 0x32, 0x89, 0x08, 0xd4, 0xd0, 0xc8, 0xba, 0xca, 0x08,
+ 0xd4, 0xc9, 0x44, 0x00, 0xbb, 0x40, 0xb5, 0x07, 0xc2, 0x00, 0x02, 0x08,
+ 0xd4, 0xa9, 0x95, 0x08, 0xd4, 0xa3, 0x00, 0xb5, 0x1f, 0x8e, 0x08, 0xd4,
+ 0x91, 0x94, 0x08, 0xd4, 0x89, 0x8f, 0x08, 0xd4, 0x81, 0x84, 0x08, 0xd4,
+ 0x79, 0x90, 0x08, 0xd4, 0x73, 0x00, 0xb5, 0x23, 0x86, 0x08, 0xd4, 0x69,
+ 0x8d, 0x08, 0xd4, 0x59, 0x89, 0x08, 0xd4, 0x50, 0x15, 0xc0, 0xb5, 0x27,
+ 0xc2, 0x00, 0xdb, 0x08, 0xd4, 0x39, 0xc2, 0x00, 0x39, 0x08, 0xd4, 0x30,
+ 0x0d, 0xc0, 0xb5, 0x31, 0xc2, 0x00, 0xd0, 0x08, 0xd4, 0x11, 0x15, 0xc0,
+ 0xb5, 0x41, 0xc2, 0x02, 0x41, 0x08, 0xd3, 0xf1, 0xc2, 0x00, 0xdb, 0x08,
+ 0xd3, 0xe9, 0xc2, 0x00, 0x39, 0x08, 0xd3, 0xe1, 0xc2, 0x19, 0x2c, 0x08,
+ 0xd3, 0xd9, 0xc2, 0x00, 0x02, 0x08, 0xd3, 0xd1, 0x1c, 0xc0, 0xb5, 0x51,
+ 0x06, 0xc0, 0xb5, 0x5b, 0x16, 0xc0, 0xb5, 0x6f, 0xc2, 0x01, 0xc3, 0x08,
+ 0xd3, 0xa1, 0x04, 0xc0, 0xb5, 0x81, 0x12, 0xc0, 0xb5, 0x8b, 0x10, 0xc0,
+ 0xb5, 0x95, 0x0c, 0xc0, 0xb5, 0xab, 0x05, 0xc0, 0xb5, 0xb5, 0x09, 0xc0,
+ 0xb5, 0xbf, 0x83, 0x08, 0xd2, 0x80, 0xcb, 0x36, 0x51, 0x08, 0xd2, 0xd9,
+ 0x45, 0x00, 0xba, 0x40, 0xb5, 0xc9, 0xd1, 0x31, 0xc8, 0x0f, 0xad, 0x61,
+ 0xc9, 0xa9, 0x12, 0x0f, 0x9b, 0x31, 0xc6, 0x59, 0x92, 0x00, 0x05, 0x68,
+ 0xc4, 0x26, 0x78, 0x08, 0x87, 0xc9, 0xc5, 0x06, 0xdb, 0x08, 0x87, 0xc1,
+ 0x15, 0xc0, 0xb5, 0xe9, 0x08, 0xc0, 0xb5, 0xf5, 0x16, 0xc0, 0xb6, 0x01,
+ 0xc3, 0x05, 0x14, 0x08, 0x87, 0x89, 0xc4, 0x15, 0xe7, 0x08, 0x87, 0x80,
+ 0x42, 0x01, 0xc3, 0xc0, 0xb6, 0x0d, 0x07, 0xc0, 0xb6, 0x15, 0xc2, 0x38,
+ 0x2a, 0x08, 0x87, 0x31, 0xc2, 0x53, 0x31, 0x08, 0x87, 0x29, 0xc2, 0x14,
+ 0x77, 0x08, 0x87, 0x21, 0xc2, 0x02, 0x98, 0x08, 0x87, 0x11, 0x10, 0xc0,
+ 0xb6, 0x1f, 0xc3, 0xe5, 0xf9, 0x08, 0x87, 0x01, 0xc3, 0x38, 0x66, 0x08,
+ 0x86, 0xf9, 0xc3, 0x14, 0x4b, 0x08, 0x86, 0xf1, 0xc3, 0x0f, 0xb6, 0x08,
+ 0x86, 0xe9, 0xc3, 0x44, 0x79, 0x08, 0x86, 0xe1, 0xc3, 0x62, 0x26, 0x08,
+ 0x86, 0xd9, 0xc3, 0xc1, 0x9d, 0x08, 0x86, 0xd1, 0xc3, 0x12, 0xae, 0x08,
+ 0x86, 0xc1, 0xc3, 0x40, 0x40, 0x08, 0x86, 0xa9, 0xc3, 0x70, 0xaf, 0x08,
+ 0x86, 0xa1, 0xc3, 0xe5, 0x87, 0x08, 0x86, 0x99, 0xc3, 0x44, 0x19, 0x08,
+ 0x86, 0x91, 0xc3, 0x02, 0x97, 0x08, 0x86, 0x89, 0xc3, 0xc3, 0x6e, 0x08,
+ 0x86, 0x80, 0xd4, 0x38, 0x90, 0x08, 0x7a, 0xc9, 0x44, 0x02, 0x9f, 0xc0,
+ 0xb6, 0x31, 0xcf, 0x38, 0x95, 0x08, 0x7a, 0xb8, 0xc3, 0x05, 0x14, 0x08,
+ 0x7a, 0x8b, 0x00, 0xb6, 0x40, 0x16, 0x40, 0xb6, 0x46, 0xcc, 0x08, 0x5b,
+ 0x08, 0x7a, 0x81, 0xca, 0x9d, 0x38, 0x08, 0x7a, 0x79, 0xcf, 0x66, 0xed,
+ 0x08, 0x7a, 0x71, 0x45, 0x11, 0xba, 0xc0, 0xb6, 0x52, 0x46, 0x0e, 0xd4,
+ 0xc0, 0xb6, 0x5e, 0x49, 0x04, 0xf9, 0xc0, 0xb6, 0x6a, 0x44, 0x05, 0x18,
+ 0x40, 0xb6, 0x76, 0x0e, 0xc0, 0xb6, 0x82, 0xc4, 0xe0, 0x4b, 0x08, 0x7a,
+ 0x19, 0xc3, 0xb5, 0x3e, 0x08, 0x7a, 0x11, 0x15, 0xc0, 0xb6, 0x8e, 0xc9,
+ 0x5d, 0xe2, 0x08, 0x7a, 0x01, 0xc2, 0x00, 0x67, 0x08, 0x79, 0xf1, 0x03,
+ 0xc0, 0xb6, 0x98, 0xc3, 0x20, 0x18, 0x08, 0x79, 0xd9, 0xc3, 0x00, 0x4e,
+ 0x08, 0x79, 0xd1, 0xc4, 0xe0, 0xe7, 0x08, 0x79, 0xc1, 0xc4, 0x4a, 0xb9,
+ 0x08, 0x79, 0xb9, 0xc2, 0x01, 0x7f, 0x08, 0x79, 0x9b, 0x00, 0xb6, 0xa4,
+ 0xc5, 0x4a, 0xb3, 0x08, 0x79, 0xa9, 0xc3, 0x7e, 0x89, 0x08, 0x79, 0xa1,
+ 0xc5, 0x9c, 0xa2, 0x08, 0x79, 0x91, 0xc4, 0xe3, 0x27, 0x08, 0x79, 0x88,
+ 0x00, 0xc0, 0xb6, 0xaa, 0x42, 0x00, 0xa9, 0x40, 0xb7, 0x06, 0xcd, 0x7a,
+ 0xad, 0x0f, 0xaa, 0x29, 0x15, 0xc0, 0xb7, 0x5e, 0x06, 0xc0, 0xb7, 0x85,
+ 0x10, 0xc0, 0xb7, 0x8f, 0xce, 0x6c, 0xec, 0x01, 0x20, 0xf9, 0xd0, 0x5e,
+ 0x82, 0x01, 0x20, 0xf1, 0xcf, 0x64, 0x3b, 0x01, 0x20, 0xe9, 0x08, 0xc0,
+ 0xb7, 0x99, 0x07, 0xc0, 0xb7, 0xa5, 0x42, 0x00, 0x64, 0xc0, 0xb7, 0xaf,
+ 0xd3, 0x42, 0x42, 0x01, 0x20, 0x59, 0xc9, 0x1b, 0x00, 0x01, 0x20, 0x51,
+ 0xd5, 0x33, 0xd1, 0x01, 0x20, 0x49, 0x04, 0xc0, 0xb7, 0xbb, 0xcb, 0x49,
+ 0x4a, 0x01, 0x20, 0x31, 0xd2, 0x48, 0x47, 0x01, 0x5c, 0xb8, 0x47, 0x02,
+ 0x0e, 0xc0, 0xb7, 0xc7, 0x0a, 0xc0, 0xb8, 0x39, 0x4d, 0x76, 0xb7, 0xc0,
+ 0xb8, 0x4b, 0x14, 0xc0, 0xb8, 0x57, 0x47, 0xc0, 0x4a, 0xc0, 0xb8, 0x69,
+ 0x47, 0xbf, 0xda, 0xc0, 0xb8, 0x7b, 0xd1, 0x48, 0x11, 0x00, 0x38, 0x79,
+ 0x42, 0x00, 0x99, 0xc0, 0xb8, 0x8d, 0x42, 0x06, 0x62, 0xc0, 0xb8, 0x99,
+ 0x07, 0xc0, 0xb8, 0xa5, 0xc7, 0xc9, 0x6c, 0x00, 0x3a, 0x51, 0xc5, 0x23,
+ 0x26, 0x00, 0x3a, 0x49, 0xcc, 0x86, 0xf1, 0x00, 0x3a, 0x01, 0xc9, 0xa8,
+ 0xaf, 0x00, 0x3a, 0x09, 0x16, 0xc0, 0xb8, 0xb1, 0x4d, 0x78, 0xb2, 0x40,
+ 0xb8, 0xbd, 0x83, 0x05, 0x40, 0x01, 0x8b, 0x05, 0x40, 0x09, 0x97, 0x05,
+ 0x40, 0x19, 0x87, 0x05, 0x40, 0x21, 0x91, 0x05, 0x40, 0x29, 0x0d, 0xc0,
+ 0xb8, 0xc9, 0x09, 0xc0, 0xb8, 0xd3, 0x05, 0xc0, 0xb8, 0xdd, 0x16, 0xc0,
+ 0xb8, 0xe7, 0x06, 0xc0, 0xb8, 0xf5, 0xc2, 0x01, 0x23, 0x05, 0x41, 0x11,
+ 0x0c, 0xc0, 0xb9, 0x03, 0xc2, 0x00, 0x10, 0x05, 0x40, 0xc1, 0x12, 0xc0,
+ 0xb9, 0x0d, 0x04, 0xc0, 0xb9, 0x17, 0xc2, 0x00, 0xa2, 0x05, 0x40, 0xe9,
+ 0x14, 0xc0, 0xb9, 0x21, 0xc2, 0x01, 0xc8, 0x05, 0x40, 0xf9, 0xc2, 0x00,
+ 0xfb, 0x05, 0x41, 0x08, 0xc8, 0xb9, 0xea, 0x05, 0x40, 0x11, 0xc7, 0x5a,
+ 0xdb, 0x05, 0x40, 0x31, 0x03, 0x40, 0xb9, 0x2b, 0x83, 0x05, 0x41, 0x19,
+ 0x8b, 0x05, 0x41, 0x21, 0x97, 0x05, 0x41, 0x29, 0x87, 0x05, 0x41, 0x31,
+ 0xc2, 0x01, 0x24, 0x05, 0x41, 0x38, 0x9e, 0x05, 0x41, 0x41, 0x9f, 0x05,
+ 0x41, 0x49, 0xa0, 0x05, 0x41, 0x51, 0xa1, 0x05, 0x41, 0x58, 0xca, 0x9d,
+ 0x10, 0x0f, 0xa5, 0x61, 0xc5, 0xdb, 0x64, 0x0f, 0xb5, 0x20, 0xd6, 0x2f,
+ 0xb4, 0x0f, 0xaf, 0x19, 0xc2, 0x00, 0x29, 0x0f, 0xa8, 0x43, 0x00, 0xb9,
+ 0x37, 0xcf, 0x6b, 0x34, 0x0f, 0xb2, 0x50, 0x87, 0x01, 0x3a, 0x3b, 0x00,
+ 0xb9, 0x3d, 0xc9, 0x78, 0x74, 0x0f, 0xa4, 0xb0, 0xc2, 0x02, 0xae, 0x01,
+ 0x4d, 0x09, 0xc4, 0x00, 0x49, 0x01, 0x4d, 0x00, 0xcc, 0x8c, 0x55, 0x0f,
+ 0xae, 0x99, 0xc8, 0xbb, 0xda, 0x0f, 0xae, 0x91, 0xc5, 0x08, 0x91, 0x0f,
+ 0xa0, 0xd0, 0xc4, 0xe4, 0x43, 0x0f, 0xab, 0xc0, 0x90, 0x0f, 0xca, 0x21,
+ 0xcb, 0x8c, 0xea, 0x0f, 0xcf, 0xa8, 0x43, 0x00, 0x3d, 0xc0, 0xb9, 0x41,
+ 0x46, 0x07, 0x2f, 0x40, 0xb9, 0x62, 0xcc, 0x85, 0xf5, 0x01, 0x36, 0x29,
+ 0xc9, 0xb2, 0xa2, 0x0f, 0x98, 0xf0, 0x52, 0x48, 0xe9, 0xc0, 0xb9, 0x9a,
+ 0x47, 0x02, 0x0e, 0xc0, 0xb9, 0xc2, 0xc8, 0x7a, 0x7e, 0x00, 0xdd, 0xd1,
+ 0x46, 0x09, 0x97, 0xc0, 0xba, 0x4c, 0x51, 0x4f, 0x25, 0xc0, 0xba, 0x70,
+ 0x45, 0x00, 0xba, 0xc0, 0xba, 0x82, 0x4d, 0x80, 0x50, 0x40, 0xba, 0x8e,
+ 0xcf, 0x69, 0xae, 0x0f, 0x98, 0x20, 0xd5, 0x37, 0x43, 0x01, 0x17, 0x49,
+ 0xce, 0x74, 0x32, 0x01, 0x15, 0x89, 0x46, 0x23, 0xa0, 0xc0, 0xba, 0x98,
+ 0x46, 0x00, 0xd4, 0x40, 0xba, 0xa4, 0xc2, 0x00, 0x55, 0x01, 0x14, 0x13,
+ 0x00, 0xba, 0xbc, 0x46, 0x00, 0xd4, 0xc0, 0xba, 0xc0, 0x45, 0x00, 0x8c,
+ 0x40, 0xba, 0xcc, 0xd1, 0x1a, 0x4a, 0x01, 0x04, 0x71, 0xd0, 0x1d, 0xec,
+ 0x01, 0x04, 0x69, 0x07, 0xc0, 0xba, 0xde, 0xc5, 0x1d, 0x1d, 0x01, 0x04,
+ 0x59, 0xc9, 0x60, 0xf3, 0x01, 0x04, 0x51, 0xc4, 0x26, 0x78, 0x01, 0x04,
+ 0x49, 0x15, 0xc0, 0xba, 0xea, 0x08, 0xc0, 0xba, 0xf6, 0x16, 0xc0, 0xbb,
+ 0x02, 0xc3, 0x05, 0x14, 0x01, 0x04, 0x09, 0xc4, 0x15, 0xe7, 0x01, 0x04,
+ 0x00, 0x87, 0x01, 0x19, 0x19, 0x44, 0x00, 0x74, 0x40, 0xbb, 0x0e, 0x00,
+ 0xc0, 0xbb, 0x1a, 0xc7, 0xc1, 0xfc, 0x01, 0x55, 0x52, 0x00, 0xbb, 0x7c,
+ 0x46, 0xcf, 0xe3, 0xc0, 0xbb, 0x82, 0xca, 0xa4, 0x40, 0x00, 0x04, 0xf0,
+ 0x16, 0xc0, 0xbb, 0x8a, 0xc2, 0x00, 0x89, 0x0f, 0xc9, 0xa2, 0x00, 0xbb,
+ 0x99, 0xc6, 0x1d, 0xb4, 0x01, 0x11, 0xbb, 0x00, 0xbb, 0x9f, 0xc9, 0xb3,
+ 0xdd, 0x01, 0x0a, 0x50, 0x00, 0x40, 0xbb, 0xa5, 0xcd, 0x7a, 0xc7, 0x01,
+ 0x08, 0xf1, 0x5b, 0x17, 0x2b, 0x40, 0xbb, 0xbd, 0xc5, 0x29, 0xfc, 0x0f,
+ 0xc9, 0x81, 0xc3, 0x12, 0xb8, 0x0f, 0xd6, 0x19, 0xc6, 0x18, 0x8e, 0x0f,
+ 0xd6, 0x20, 0xc3, 0x01, 0x4b, 0x0f, 0xd5, 0x39, 0x45, 0x3c, 0x54, 0x40,
+ 0xbb, 0xf5, 0xcc, 0x8b, 0xdd, 0x01, 0x08, 0x78, 0x49, 0xb0, 0x08, 0xc0,
+ 0xbc, 0x01, 0xcc, 0x87, 0xd5, 0x0f, 0xb6, 0xe8, 0x46, 0x17, 0x33, 0x40,
+ 0xbc, 0x3f, 0xc5, 0x00, 0xb9, 0x00, 0x01, 0x5b, 0x00, 0xbc, 0x47, 0xcb,
+ 0x8d, 0xb0, 0x00, 0x05, 0x88, 0xc8, 0x2a, 0x06, 0x0f, 0xc8, 0x79, 0xca,
+ 0xa1, 0xca, 0x0f, 0xc8, 0x60, 0xcb, 0x95, 0xcf, 0x0f, 0x9c, 0x69, 0xc5,
+ 0xd7, 0x1d, 0x0f, 0x9a, 0x68, 0xc4, 0x12, 0x50, 0x0f, 0xa1, 0xe9, 0xc4,
+ 0x00, 0x87, 0x0f, 0xa1, 0xb8, 0xd0, 0x58, 0x52, 0x01, 0x1c, 0x91, 0xd2,
+ 0x49, 0x67, 0x01, 0x1c, 0x88, 0xc8, 0x19, 0x58, 0x01, 0x5f, 0xe9, 0xc9,
+ 0xa8, 0x5e, 0x0f, 0xb7, 0x98, 0x94, 0x0f, 0xa6, 0xf9, 0x00, 0xc0, 0xbc,
+ 0x4b, 0x95, 0x0f, 0xae, 0x80, 0x43, 0x02, 0x18, 0xc0, 0xbc, 0x57, 0xc8,
+ 0xbd, 0x5a, 0x0f, 0x9c, 0x49, 0xd1, 0x4f, 0xf1, 0x01, 0x81, 0xe9, 0xcc,
+ 0x84, 0x45, 0x01, 0x92, 0x80, 0x46, 0x0b, 0x11, 0xc0, 0xbc, 0x61, 0x47,
+ 0x34, 0x2f, 0xc0, 0xbc, 0x6d, 0x46, 0x09, 0x97, 0xc0, 0xbc, 0x83, 0x47,
+ 0xc3, 0x3e, 0xc0, 0xbc, 0xa1, 0x52, 0x4b, 0xef, 0xc0, 0xbc, 0xe7, 0x4a,
+ 0x9f, 0x86, 0x40, 0xbc, 0xf3, 0x45, 0x6b, 0x87, 0xc0, 0xbd, 0x31, 0x45,
+ 0x00, 0xb4, 0xc0, 0xbd, 0x3d, 0xc5, 0xdc, 0x0e, 0x0f, 0xd4, 0x10, 0x00,
+ 0x40, 0xbd, 0x4f, 0xcf, 0x63, 0x1e, 0x08, 0xd7, 0xa3, 0x00, 0xbd, 0x5b,
+ 0x46, 0x02, 0x0f, 0x40, 0xbd, 0x5f, 0x00, 0x40, 0xbd, 0xcd, 0xc4, 0x28,
+ 0xb1, 0x08, 0xd7, 0x63, 0x00, 0xbd, 0xd9, 0xcc, 0x23, 0x33, 0x08, 0xd7,
+ 0x3a, 0x00, 0xbd, 0xdd, 0x00, 0x40, 0xbd, 0xe3, 0x00, 0xc0, 0xbd, 0xf2,
+ 0x46, 0xd0, 0x4f, 0xc0, 0xbe, 0x0a, 0xcd, 0x79, 0x34, 0x0f, 0xc9, 0x90,
+ 0x49, 0xab, 0x91, 0xc0, 0xbe, 0x1c, 0x49, 0x2b, 0xed, 0x40, 0xbe, 0x4e,
+ 0x44, 0xaa, 0x7f, 0xc0, 0xbe, 0x90, 0x0f, 0xc0, 0xbe, 0xaa, 0xc3, 0x07,
+ 0xa2, 0x0b, 0x5b, 0x81, 0x16, 0xc0, 0xbe, 0xb6, 0xc2, 0x04, 0xad, 0x0b,
+ 0x5b, 0x61, 0x10, 0xc0, 0xbe, 0xc8, 0x1a, 0xc0, 0xbe, 0xd4, 0x0a, 0xc0,
+ 0xbe, 0xe4, 0xc8, 0xbe, 0xd2, 0x0b, 0x5b, 0x39, 0x44, 0xde, 0xeb, 0xc0,
+ 0xbe, 0xf0, 0xc6, 0xce, 0xe1, 0x0b, 0x5a, 0x18, 0x16, 0xc0, 0xbf, 0x0c,
+ 0x47, 0x0d, 0x04, 0xc0, 0xbf, 0x18, 0xc8, 0x33, 0xee, 0x0b, 0x5a, 0xf0,
+ 0xc4, 0x26, 0x78, 0x0b, 0x5a, 0xc9, 0xc5, 0x06, 0xdb, 0x0b, 0x5a, 0xc1,
+ 0x15, 0xc0, 0xbf, 0x22, 0x08, 0xc0, 0xbf, 0x2e, 0x16, 0xc0, 0xbf, 0x3a,
+ 0xc3, 0x05, 0x14, 0x0b, 0x5a, 0x89, 0xc4, 0x15, 0xe7, 0x0b, 0x5a, 0x80,
+ 0x16, 0xc0, 0xbf, 0x46, 0xc3, 0xdf, 0xff, 0x0b, 0x59, 0xa9, 0x15, 0xc0,
+ 0xbf, 0x52, 0x0d, 0x40, 0xbf, 0x5c, 0x03, 0xc0, 0xbf, 0x68, 0x19, 0xc0,
+ 0xbf, 0x80, 0x0b, 0xc0, 0xbf, 0x88, 0x11, 0xc0, 0xbf, 0x94, 0x17, 0xc0,
+ 0xbf, 0xa0, 0x07, 0x40, 0xbf, 0xac, 0xd0, 0x3a, 0x4c, 0x0f, 0xb5, 0x81,
+ 0xc2, 0x00, 0xf1, 0x0f, 0xca, 0xa0, 0xc8, 0x1d, 0x3c, 0x0f, 0xb1, 0xf1,
+ 0xc4, 0x00, 0x87, 0x0f, 0xb1, 0x08, 0xcb, 0x8d, 0xd1, 0x01, 0x1f, 0xf1,
+ 0xc5, 0x00, 0x92, 0x01, 0x1f, 0xd8, 0xc7, 0x00, 0x90, 0x01, 0x1f, 0xe9,
+ 0xcb, 0x8d, 0x6e, 0x01, 0x1f, 0xe0, 0x43, 0x00, 0xe5, 0xc0, 0xbf, 0xb8,
+ 0xc3, 0x32, 0x36, 0x0f, 0xa7, 0x70, 0xc7, 0x00, 0xfa, 0x01, 0x03, 0x49,
+ 0xca, 0xa1, 0x5c, 0x01, 0x01, 0x60, 0xd1, 0x54, 0xfd, 0x0f, 0xb5, 0x40,
+ 0xc7, 0x00, 0x8b, 0x01, 0x57, 0x08, 0x42, 0x00, 0x45, 0xc0, 0xbf, 0xc7,
+ 0xc7, 0xc7, 0x51, 0x01, 0x18, 0x31, 0xcc, 0x8b, 0x1d, 0x0f, 0xb1, 0x18,
+ 0xc4, 0x00, 0xba, 0x01, 0x0a, 0x61, 0xd1, 0x57, 0x1d, 0x01, 0x01, 0x89,
+ 0xca, 0xa8, 0x00, 0x01, 0x01, 0x80, 0xc8, 0x12, 0x85, 0x01, 0x31, 0x71,
+ 0x8a, 0x0f, 0x9a, 0x89, 0xc3, 0x04, 0x20, 0x0f, 0xcc, 0xd0, 0xc4, 0x02,
+ 0xde, 0x08, 0x5d, 0x59, 0x19, 0xc0, 0xbf, 0xd1, 0xc2, 0x00, 0xc4, 0x08,
+ 0x5d, 0x68, 0xc8, 0x0d, 0x03, 0x08, 0x5d, 0x78, 0xc3, 0x11, 0xef, 0x08,
+ 0x5c, 0x81, 0x03, 0x40, 0xbf, 0xdb, 0xc2, 0x00, 0x8e, 0x08, 0x5c, 0x38,
+ 0xce, 0x73, 0x1a, 0x08, 0x48, 0xf9, 0x47, 0x34, 0x2f, 0xc0, 0xbf, 0xe7,
+ 0x47, 0x02, 0x0e, 0x40, 0xbf, 0xf4, 0x47, 0x02, 0x0e, 0xc0, 0xc0, 0x57,
+ 0x15, 0xc0, 0xc0, 0xdd, 0xd0, 0x59, 0x22, 0x05, 0x43, 0xa9, 0x45, 0x01,
+ 0xc3, 0x40, 0xc0, 0xe7, 0x12, 0xc0, 0xc0, 0xf3, 0x16, 0xc0, 0xc1, 0x03,
+ 0x05, 0xc0, 0xc1, 0x15, 0x19, 0xc0, 0xc1, 0x29, 0x0a, 0xc0, 0xc1, 0x35,
+ 0x04, 0xc0, 0xc1, 0x47, 0x15, 0xc0, 0xc1, 0x5a, 0x42, 0x01, 0xc3, 0xc0,
+ 0xc1, 0x78, 0x42, 0x01, 0x0f, 0xc0, 0xc1, 0x84, 0x42, 0x00, 0x58, 0xc0,
+ 0xc1, 0x8e, 0x14, 0xc0, 0xc1, 0x9a, 0xc5, 0xdb, 0xd2, 0x08, 0x0f, 0x71,
+ 0xc4, 0xb4, 0x91, 0x08, 0x0f, 0x99, 0xc7, 0xc9, 0xa4, 0x08, 0x0f, 0xb9,
+ 0x09, 0xc0, 0xc1, 0xa6, 0xc5, 0x01, 0xa2, 0x08, 0x0e, 0xc9, 0xc5, 0xd3,
+ 0xe4, 0x08, 0x0f, 0xc0, 0xc6, 0x5b, 0x02, 0x00, 0x04, 0x81, 0xc4, 0x09,
+ 0x9d, 0x00, 0x00, 0xa1, 0x16, 0xc0, 0xc1, 0xb2, 0xc3, 0x05, 0x14, 0x00,
+ 0x00, 0x88, 0x03, 0xc0, 0xc1, 0xbe, 0x09, 0xc0, 0xc1, 0xca, 0x15, 0xc0,
+ 0xc1, 0xd6, 0xc2, 0x00, 0x7a, 0x00, 0x4a, 0x81, 0x4b, 0x6f, 0xc7, 0xc0,
+ 0xc1, 0xe2, 0x47, 0x02, 0x0e, 0xc0, 0xc2, 0x17, 0xc7, 0xc7, 0xac, 0x05,
+ 0x47, 0xe9, 0xca, 0x9d, 0x56, 0x05, 0x47, 0xd9, 0xc5, 0x95, 0xf0, 0x05,
+ 0x47, 0xd1, 0x06, 0x40, 0xc2, 0x8c, 0xc6, 0xd2, 0x77, 0x0f, 0xae, 0xa1,
+ 0xc8, 0x3f, 0xff, 0x0f, 0xad, 0x28, 0x96, 0x0f, 0x9e, 0xe3, 0x00, 0xc2,
+ 0x9e, 0x43, 0x00, 0x3d, 0x40, 0xc2, 0xa4, 0x44, 0x05, 0xaa, 0xc0, 0xc2,
+ 0xb0, 0xca, 0xa6, 0xac, 0x0f, 0x99, 0x98, 0x44, 0x02, 0x9b, 0xc0, 0xc2,
+ 0xbc, 0x45, 0x00, 0x8c, 0x40, 0xc2, 0xce, 0x46, 0x00, 0x8b, 0x40, 0xc2,
+ 0xda, 0x46, 0x00, 0x8b, 0x40, 0xc2, 0xec, 0xc5, 0x61, 0xc0, 0x0e, 0x98,
+ 0x2b, 0x00, 0xc2, 0xfe, 0x0a, 0xc0, 0xc3, 0x04, 0x49, 0xb1, 0xaf, 0xc0,
+ 0xc3, 0x10, 0x48, 0xbc, 0x1a, 0x40, 0xc3, 0x1c, 0xc4, 0x26, 0x78, 0x00,
+ 0x01, 0xcb, 0x00, 0xc3, 0x28, 0xc5, 0x06, 0xdb, 0x00, 0x01, 0xc3, 0x00,
+ 0xc3, 0x2c, 0x15, 0xc0, 0xc3, 0x30, 0x08, 0xc0, 0xc3, 0x42, 0x16, 0xc0,
+ 0xc3, 0x54, 0xc3, 0x05, 0x14, 0x00, 0x01, 0x8b, 0x00, 0xc3, 0x66, 0xc4,
+ 0x15, 0xe7, 0x00, 0x01, 0x82, 0x00, 0xc3, 0x6a, 0x06, 0xc0, 0xc3, 0x6e,
+ 0xd0, 0x5c, 0xe2, 0x08, 0xca, 0x31, 0xca, 0x93, 0x30, 0x08, 0xca, 0x29,
+ 0x45, 0x00, 0xba, 0xc0, 0xc3, 0x7a, 0x47, 0x30, 0x9f, 0xc0, 0xc3, 0x92,
+ 0xca, 0xa0, 0x3a, 0x08, 0xca, 0x09, 0xd3, 0x44, 0xee, 0x08, 0xc9, 0xf9,
+ 0x18, 0xc0, 0xc3, 0x9e, 0x47, 0x02, 0x0e, 0x40, 0xc3, 0xaa, 0x45, 0x29,
+ 0x90, 0xc0, 0xc4, 0x17, 0xc3, 0x23, 0x1b, 0x01, 0x11, 0x19, 0xc7, 0xc3,
+ 0xfb, 0x0f, 0xc9, 0xf8, 0x4b, 0x43, 0x54, 0xc0, 0xc4, 0x21, 0xca, 0xa3,
+ 0x64, 0x01, 0x3b, 0xf9, 0x46, 0x09, 0x97, 0x40, 0xc4, 0x2d, 0xca, 0xa3,
+ 0x64, 0x01, 0x3c, 0x49, 0x46, 0x09, 0x97, 0x40, 0xc4, 0x4b, 0xc8, 0xbf,
+ 0x32, 0x01, 0x36, 0x69, 0x49, 0xae, 0x85, 0x40, 0xc4, 0x6f, 0xa3, 0x01,
+ 0x34, 0x29, 0xa2, 0x01, 0x34, 0x21, 0xa1, 0x01, 0x34, 0x19, 0xa0, 0x01,
+ 0x34, 0x11, 0x9f, 0x01, 0x34, 0x09, 0x9e, 0x01, 0x34, 0x00, 0xc9, 0xb4,
+ 0x52, 0x01, 0x18, 0x01, 0x44, 0x4a, 0x60, 0x40, 0xc4, 0x7b, 0xc9, 0xab,
+ 0x9a, 0x0f, 0xd3, 0xc1, 0xc3, 0x02, 0x0e, 0x0f, 0xa5, 0x38, 0xc5, 0x11,
+ 0x55, 0x0f, 0xa1, 0x90, 0x48, 0xbf, 0xb2, 0xc0, 0xc4, 0x93, 0x42, 0x00,
+ 0x97, 0x40, 0xc4, 0xa5, 0xc9, 0x03, 0xde, 0x01, 0x18, 0x21, 0xd7, 0x27,
+ 0xfe, 0x01, 0x17, 0x89, 0xc4, 0x32, 0xbc, 0x01, 0x15, 0x23, 0x00, 0xc4,
+ 0xec, 0xc9, 0xb2, 0xea, 0x01, 0x4b, 0xf8, 0xd2, 0x4e, 0x2f, 0x0f, 0xa9,
+ 0xe9, 0xcc, 0x4e, 0x35, 0x0f, 0xa9, 0xd9, 0x4e, 0x6c, 0xfa, 0x40, 0xc4,
+ 0xf2, 0x42, 0x3c, 0xd3, 0xc0, 0xc4, 0xfe, 0xc5, 0x02, 0xfd, 0x0f, 0x81,
+ 0x80, 0xc5, 0x02, 0xfd, 0x0f, 0x83, 0x11, 0x42, 0x3c, 0xd3, 0x40, 0xc5,
+ 0x28, 0x00, 0xc0, 0xc5, 0x52, 0x42, 0x00, 0xa9, 0xc0, 0xc5, 0xa4, 0x02,
+ 0x40, 0xc5, 0xb6, 0x05, 0xc0, 0xc5, 0xc8, 0xc5, 0x8a, 0x10, 0x01, 0x4c,
+ 0xc9, 0x15, 0xc0, 0xc5, 0xd4, 0xc9, 0xad, 0xd1, 0x0f, 0xd7, 0x29, 0xd4,
+ 0x3a, 0xe8, 0x01, 0x70, 0x41, 0xc6, 0xcc, 0x71, 0x01, 0x70, 0x99, 0xd4,
+ 0x3d, 0x90, 0x01, 0x70, 0xb0, 0xc8, 0x18, 0x67, 0x01, 0x16, 0x29, 0xc5,
+ 0x1d, 0x1d, 0x01, 0x11, 0xc1, 0xc4, 0x25, 0xd5, 0x01, 0x10, 0xa1, 0xc5,
+ 0x00, 0xd4, 0x00, 0x16, 0xc8, 0xd1, 0x50, 0xce, 0x08, 0xc1, 0xd9, 0x45,
+ 0x00, 0xba, 0xc0, 0xc5, 0xe0, 0x4b, 0x6f, 0xc7, 0xc0, 0xc5, 0xf2, 0x47,
+ 0x02, 0x0e, 0x40, 0xc6, 0x15, 0xcf, 0x4c, 0x01, 0x01, 0x17, 0x5b, 0x00,
+ 0xc6, 0x7c, 0xc6, 0x00, 0x4e, 0x01, 0x10, 0x60, 0xc9, 0x23, 0x9f, 0x01,
+ 0x17, 0x08, 0xc5, 0x2d, 0x7a, 0x01, 0x14, 0x03, 0x00, 0xc6, 0x82, 0xc3,
+ 0x00, 0x9a, 0x01, 0x15, 0x60, 0xdd, 0x11, 0x6e, 0x01, 0x57, 0x70, 0xc7,
+ 0x87, 0xc2, 0x0f, 0xad, 0xd9, 0xc4, 0x27, 0xe3, 0x0f, 0xad, 0xca, 0x00,
+ 0xc6, 0x88, 0x0e, 0xc0, 0xc6, 0x8e, 0x45, 0x08, 0xcb, 0xc0, 0xc6, 0x9a,
+ 0x49, 0xb2, 0xab, 0xc0, 0xc6, 0xcb, 0x44, 0xaf, 0x82, 0xc0, 0xc6, 0xe9,
+ 0xd7, 0x27, 0x8b, 0x0d, 0xe3, 0x90, 0x99, 0x0d, 0xe1, 0xc3, 0x00, 0xc6,
+ 0xf5, 0x96, 0x0d, 0xe0, 0x1b, 0x00, 0xc7, 0x14, 0x95, 0x0d, 0xe0, 0xe3,
+ 0x00, 0xc7, 0x1c, 0x8c, 0x0d, 0xe0, 0xdb, 0x00, 0xc7, 0x2c, 0x90, 0x0d,
+ 0xe0, 0xd3, 0x00, 0xc7, 0x30, 0x8f, 0x0d, 0xe0, 0xcb, 0x00, 0xc7, 0x3a,
+ 0x94, 0x0d, 0xe0, 0x5b, 0x00, 0xc7, 0x3e, 0x8e, 0x0d, 0xe0, 0x33, 0x00,
+ 0xc7, 0x4e, 0x8a, 0x0d, 0xe0, 0x03, 0x00, 0xc7, 0x58, 0x8d, 0x0d, 0xe0,
+ 0x2b, 0x00, 0xc7, 0x5c, 0x86, 0x0d, 0xe0, 0x43, 0x00, 0xc7, 0x64, 0x88,
+ 0x0d, 0xe0, 0x23, 0x00, 0xc7, 0x6e, 0x92, 0x0d, 0xe0, 0x13, 0x00, 0xc7,
+ 0x74, 0x89, 0x0d, 0xe0, 0x53, 0x00, 0xc7, 0x80, 0x98, 0x0d, 0xe0, 0x4b,
+ 0x00, 0xc7, 0x86, 0x84, 0x0d, 0xe0, 0x39, 0x9a, 0x0d, 0xe0, 0x0b, 0x00,
+ 0xc7, 0x8c, 0x91, 0x0d, 0xe2, 0x23, 0x00, 0xc7, 0x90, 0x97, 0x0d, 0xe2,
+ 0x8b, 0x00, 0xc7, 0xa2, 0x87, 0x0d, 0xe2, 0x3b, 0x00, 0xc7, 0xb0, 0xc2,
+ 0x0c, 0x43, 0x0d, 0xe2, 0x81, 0x8b, 0x0d, 0xe2, 0x33, 0x00, 0xc7, 0xb8,
+ 0x83, 0x0d, 0xe2, 0x0a, 0x00, 0xc7, 0xbc, 0xe0, 0x03, 0xa7, 0x01, 0x3c,
+ 0xf9, 0xc8, 0x7d, 0xa4, 0x07, 0xf2, 0x49, 0xc8, 0x80, 0x2e, 0x07, 0xf2,
+ 0x68, 0xc6, 0x00, 0x91, 0x0f, 0xa5, 0x41, 0xd0, 0x5e, 0xd2, 0x01, 0x72,
+ 0x18, 0xc5, 0xa0, 0xc1, 0x0f, 0xaf, 0x09, 0x45, 0x00, 0x8c, 0x40, 0xc7,
+ 0xc2, 0x00, 0xc0, 0xc7, 0xce, 0x42, 0x00, 0xa9, 0x40, 0xc7, 0xef, 0x51,
+ 0x53, 0xed, 0xc0, 0xc8, 0x38, 0xc3, 0x4e, 0x13, 0x0f, 0xb5, 0xd8, 0xcf,
+ 0x25, 0xc4, 0x01, 0x33, 0xe1, 0x4f, 0x68, 0x28, 0x40, 0xc8, 0x40, 0x9c,
+ 0x0f, 0x8f, 0xf9, 0x9b, 0x0f, 0x8f, 0xf1, 0x9a, 0x0f, 0x8f, 0xe9, 0x99,
+ 0x0f, 0x8f, 0xe1, 0x98, 0x0f, 0x8f, 0xd9, 0x97, 0x0f, 0x8f, 0xd1, 0x96,
+ 0x0f, 0x8f, 0xc9, 0x95, 0x0f, 0x8f, 0xc1, 0x94, 0x0f, 0x8f, 0xb9, 0x93,
+ 0x0f, 0x8f, 0xb1, 0x92, 0x0f, 0x8f, 0xa9, 0x91, 0x0f, 0x8f, 0xa1, 0x90,
+ 0x0f, 0x8f, 0x99, 0x8f, 0x0f, 0x8f, 0x91, 0x8e, 0x0f, 0x8f, 0x89, 0x8d,
+ 0x0f, 0x8f, 0x81, 0x8c, 0x0f, 0x8f, 0x79, 0x8b, 0x0f, 0x8f, 0x71, 0x8a,
+ 0x0f, 0x8f, 0x69, 0x89, 0x0f, 0x8f, 0x61, 0x88, 0x0f, 0x8f, 0x59, 0x87,
+ 0x0f, 0x8f, 0x51, 0x86, 0x0f, 0x8f, 0x49, 0x85, 0x0f, 0x8f, 0x41, 0x84,
+ 0x0f, 0x8f, 0x39, 0x83, 0x0f, 0x8f, 0x30, 0xc5, 0x1e, 0x96, 0x05, 0x4a,
+ 0x99, 0x4a, 0x6f, 0xc8, 0x40, 0xc8, 0x4c, 0x8a, 0x05, 0x4a, 0x91, 0x94,
+ 0x05, 0x4a, 0x89, 0x90, 0x05, 0x4a, 0x82, 0x00, 0xc8, 0x63, 0x83, 0x05,
+ 0x4a, 0x31, 0x10, 0xc0, 0xc8, 0x67, 0x0f, 0xc0, 0xc8, 0x79, 0xc2, 0x00,
+ 0xd0, 0x05, 0x4a, 0x09, 0xc2, 0x01, 0x4a, 0x05, 0x4a, 0x01, 0xc2, 0x19,
+ 0x2c, 0x05, 0x49, 0xf9, 0xc2, 0x00, 0xdb, 0x05, 0x49, 0xf1, 0xc2, 0x00,
+ 0x39, 0x05, 0x49, 0xe9, 0xc2, 0x0d, 0xf6, 0x05, 0x49, 0xe1, 0xc2, 0x25,
+ 0x3b, 0x05, 0x49, 0xd1, 0xc2, 0x00, 0x64, 0x05, 0x49, 0xc9, 0xc2, 0x01,
+ 0x5d, 0x05, 0x49, 0xb9, 0xc2, 0x00, 0xb0, 0x05, 0x49, 0xb1, 0xc2, 0x0e,
+ 0x9a, 0x05, 0x49, 0xa1, 0xc2, 0x01, 0x6f, 0x05, 0x49, 0x99, 0xc2, 0x01,
+ 0x30, 0x05, 0x49, 0x89, 0xc2, 0x02, 0x2b, 0x05, 0x49, 0x80, 0x15, 0xc0,
+ 0xc8, 0x83, 0x03, 0xc0, 0xc8, 0xa6, 0x11, 0xc0, 0xc8, 0xae, 0x42, 0x00,
+ 0xd0, 0xc0, 0xc8, 0xc0, 0x4a, 0x07, 0xbb, 0xc0, 0xc8, 0xcc, 0x05, 0xc0,
+ 0xc8, 0xd8, 0xcb, 0x1a, 0x50, 0x00, 0x01, 0x4b, 0x00, 0xc8, 0xed, 0x08,
+ 0xc0, 0xc8, 0xf1, 0xe0, 0x05, 0xa7, 0x01, 0x16, 0x51, 0x16, 0xc0, 0xc8,
+ 0xfb, 0x42, 0x00, 0x58, 0xc0, 0xc9, 0x0f, 0x19, 0xc0, 0xc9, 0x1b, 0x46,
+ 0x04, 0x8f, 0xc0, 0xc9, 0x27, 0xd7, 0x29, 0x85, 0x01, 0x70, 0x69, 0xd6,
+ 0x2c, 0xf4, 0x01, 0x70, 0xe8, 0x19, 0xc0, 0xc9, 0x33, 0x16, 0xc0, 0xc9,
+ 0x42, 0x15, 0xc0, 0xc9, 0x54, 0x0a, 0xc0, 0xc9, 0x60, 0xd0, 0x58, 0x62,
+ 0x0f, 0xc1, 0xf1, 0xc5, 0x01, 0xa2, 0x01, 0x0c, 0x93, 0x00, 0xc9, 0x6a,
+ 0xd1, 0x55, 0x30, 0x01, 0x0f, 0xf1, 0x06, 0xc0, 0xc9, 0x74, 0xcd, 0x7c,
+ 0xa8, 0x01, 0x0e, 0x49, 0x14, 0xc0, 0xc9, 0x80, 0xcf, 0x61, 0x4d, 0x01,
+ 0x5a, 0x31, 0x04, 0xc0, 0xc9, 0x8c, 0x08, 0xc0, 0xc9, 0x9e, 0xd7, 0x26,
+ 0xbc, 0x0f, 0xc5, 0x38, 0x49, 0x01, 0xaa, 0xc0, 0xc9, 0xaa, 0x15, 0xc0,
+ 0xc9, 0xc2, 0xdb, 0x16, 0x1d, 0x01, 0x37, 0x29, 0x48, 0xbc, 0xba, 0xc0,
+ 0xc9, 0xce, 0x47, 0x55, 0x85, 0x40, 0xc9, 0xe6, 0xc8, 0x07, 0x5f, 0x01,
+ 0x12, 0xb9, 0xcb, 0x90, 0x9c, 0x01, 0x12, 0xb1, 0xc8, 0x18, 0x67, 0x01,
+ 0x10, 0xc1, 0xc5, 0x00, 0xd4, 0x00, 0x16, 0xd1, 0xc4, 0xe3, 0x07, 0x0f,
+ 0xb6, 0xf9, 0xc5, 0x01, 0xaa, 0x01, 0x71, 0x80, 0x45, 0x11, 0x17, 0xc0,
+ 0xc9, 0xfb, 0x43, 0x11, 0x49, 0xc0, 0xca, 0x07, 0x45, 0x00, 0x49, 0xc0,
+ 0xca, 0x13, 0x46, 0x00, 0x2c, 0x40, 0xca, 0x1f, 0xce, 0x6b, 0xb8, 0x0f,
+ 0xae, 0xf1, 0x42, 0x00, 0x2a, 0x40, 0xca, 0x2b, 0xc6, 0xcf, 0xad, 0x0f,
+ 0xbc, 0x59, 0xc7, 0xc1, 0x00, 0x0f, 0xa6, 0x68, 0xc3, 0xe5, 0x99, 0x0f,
+ 0x93, 0x29, 0x42, 0x01, 0xe2, 0xc0, 0xca, 0x37, 0xc2, 0x07, 0x49, 0x0f,
+ 0x93, 0x19, 0xc2, 0x10, 0x37, 0x0f, 0x93, 0x09, 0xc2, 0x11, 0xf6, 0x0f,
+ 0x93, 0x00, 0xc3, 0x05, 0x14, 0x01, 0x0b, 0x03, 0x00, 0xca, 0x43, 0x08,
+ 0xc0, 0xca, 0x47, 0x15, 0xc0, 0xca, 0x51, 0xd4, 0x3f, 0x20, 0x01, 0x0c,
+ 0x19, 0x16, 0xc0, 0xca, 0x60, 0x07, 0xc0, 0xca, 0x73, 0xc4, 0x26, 0x78,
+ 0x01, 0x0b, 0x40, 0x07, 0xc0, 0xca, 0x7f, 0xcb, 0x92, 0xc2, 0x08, 0x0c,
+ 0xa8, 0xd3, 0x45, 0xe5, 0x08, 0x0c, 0xa1, 0xcc, 0x83, 0xfd, 0x08, 0x0c,
+ 0xb1, 0xcd, 0x76, 0xaa, 0x08, 0x0c, 0xc8, 0xc3, 0x63, 0x7e, 0x0f, 0xb4,
+ 0x19, 0xc5, 0xd8, 0x49, 0x0f, 0xb7, 0x20, 0xc4, 0x07, 0x73, 0x01, 0x38,
+ 0x5b, 0x00, 0xca, 0x91, 0xc4, 0xb9, 0x3c, 0x01, 0x38, 0x51, 0x0f, 0xc0,
+ 0xca, 0x97, 0xcc, 0x88, 0xf5, 0x0f, 0xc8, 0xd1, 0xd4, 0x21, 0x3f, 0x01,
+ 0x70, 0x31, 0xc3, 0x02, 0xa3, 0x01, 0x71, 0x9b, 0x00, 0xca, 0xa9, 0xc6,
+ 0x0b, 0x09, 0x01, 0x70, 0x59, 0xc5, 0x0a, 0x8a, 0x01, 0x71, 0xa0, 0xc3,
+ 0x80, 0x5d, 0x0f, 0x98, 0x40, 0xcb, 0x8f, 0x31, 0x01, 0x31, 0x11, 0xc7,
+ 0xc4, 0x95, 0x0f, 0xa8, 0xc0, 0xc3, 0x63, 0x7e, 0x0f, 0x9e, 0x71, 0xca,
+ 0xa5, 0xa8, 0x0f, 0x9e, 0x68, 0xca, 0x9d, 0x2e, 0x08, 0x73, 0xf1, 0x44,
+ 0x05, 0x14, 0x40, 0xca, 0xaf, 0x44, 0x26, 0x78, 0xc0, 0xca, 0xc1, 0x45,
+ 0x06, 0xdb, 0xc0, 0xca, 0xcd, 0x15, 0xc0, 0xca, 0xd7, 0x08, 0xc0, 0xca,
+ 0xe3, 0x16, 0xc0, 0xca, 0xeb, 0xcb, 0x0d, 0x00, 0x08, 0x73, 0x90, 0xc4,
+ 0x26, 0x78, 0x08, 0x73, 0x41, 0xc5, 0x06, 0xdb, 0x08, 0x73, 0x39, 0x15,
+ 0xc0, 0xca, 0xf9, 0x08, 0xc0, 0xcb, 0x05, 0x16, 0xc0, 0xcb, 0x11, 0xc3,
+ 0x05, 0x14, 0x08, 0x73, 0x00, 0x47, 0x02, 0x0e, 0xc0, 0xcb, 0x1d, 0xcf,
+ 0x62, 0x4c, 0x00, 0xb7, 0x81, 0xcf, 0x66, 0x1b, 0x00, 0xb7, 0x79, 0xcd,
+ 0x78, 0x16, 0x00, 0xb7, 0x71, 0xd1, 0x57, 0x61, 0x00, 0xb7, 0x69, 0xd4,
+ 0x3b, 0xec, 0x00, 0xb7, 0x61, 0xd2, 0x4c, 0xa3, 0x00, 0xb7, 0x58, 0xc2,
+ 0x00, 0x29, 0x0f, 0x9e, 0x19, 0xd3, 0x46, 0xc9, 0x0f, 0x9d, 0xe8, 0xa2,
+ 0x07, 0xf0, 0x73, 0x00, 0xcb, 0xad, 0x9e, 0x07, 0xf0, 0x53, 0x00, 0xcb,
+ 0xd5, 0x9d, 0x07, 0xf0, 0x4b, 0x00, 0xcb, 0xfd, 0xa6, 0x70, 0x08, 0x13,
+ 0x00, 0xcc, 0x25, 0xa5, 0x70, 0x08, 0x0b, 0x00, 0xcc, 0x4d, 0xa4, 0x70,
+ 0x08, 0x03, 0x00, 0xcc, 0x75, 0xa3, 0x07, 0xf0, 0x7b, 0x00, 0xcc, 0x9d,
+ 0xa1, 0x07, 0xf0, 0x6b, 0x00, 0xcc, 0xc5, 0xa0, 0x07, 0xf0, 0x63, 0x00,
+ 0xcc, 0xed, 0x9f, 0x07, 0xf0, 0x5a, 0x00, 0xcd, 0x15, 0xa2, 0x70, 0x08,
+ 0x43, 0x00, 0xcd, 0x3d, 0xa1, 0x70, 0x08, 0x3b, 0x00, 0xcd, 0x59, 0xa0,
+ 0x70, 0x08, 0x33, 0x00, 0xcd, 0x81, 0x9f, 0x70, 0x08, 0x2b, 0x00, 0xcd,
+ 0xa9, 0x9e, 0x70, 0x08, 0x23, 0x00, 0xcd, 0xd1, 0x9d, 0x70, 0x08, 0x1b,
+ 0x00, 0xcd, 0xf9, 0xa6, 0x70, 0x08, 0x61, 0xa5, 0x70, 0x08, 0x59, 0xa4,
+ 0x70, 0x08, 0x51, 0xa3, 0x70, 0x08, 0x48, 0xa6, 0x70, 0x0a, 0x91, 0xa5,
+ 0x70, 0x0a, 0x89, 0xa4, 0x70, 0x0a, 0x81, 0xa3, 0x70, 0x0a, 0x79, 0xa2,
+ 0x70, 0x0a, 0x71, 0xa1, 0x70, 0x0a, 0x69, 0xa0, 0x70, 0x0a, 0x61, 0x9f,
+ 0x70, 0x0a, 0x59, 0x9e, 0x70, 0x0a, 0x51, 0x9d, 0x70, 0x0a, 0x48, 0xa6,
+ 0x70, 0x0a, 0x41, 0xa5, 0x70, 0x0a, 0x39, 0xa4, 0x70, 0x0a, 0x31, 0xa3,
+ 0x70, 0x0a, 0x29, 0xa2, 0x70, 0x0a, 0x21, 0xa1, 0x70, 0x0a, 0x19, 0xa0,
+ 0x70, 0x0a, 0x11, 0x9f, 0x70, 0x0a, 0x09, 0x9e, 0x70, 0x0a, 0x01, 0x9d,
+ 0x70, 0x09, 0xf8, 0xa6, 0x70, 0x09, 0xf1, 0xa5, 0x70, 0x09, 0xe9, 0xa4,
+ 0x70, 0x09, 0xe1, 0xa3, 0x70, 0x09, 0xd9, 0xa2, 0x70, 0x09, 0xd1, 0xa1,
+ 0x70, 0x09, 0xc9, 0xa0, 0x70, 0x09, 0xc1, 0x9f, 0x70, 0x09, 0xb9, 0x9e,
+ 0x70, 0x09, 0xb1, 0x9d, 0x70, 0x09, 0xa8, 0xa6, 0x70, 0x09, 0xa1, 0xa5,
+ 0x70, 0x09, 0x99, 0xa4, 0x70, 0x09, 0x91, 0xa3, 0x70, 0x09, 0x89, 0xa2,
+ 0x70, 0x09, 0x81, 0xa1, 0x70, 0x09, 0x79, 0xa0, 0x70, 0x09, 0x71, 0x9f,
+ 0x70, 0x09, 0x69, 0x9e, 0x70, 0x09, 0x61, 0x9d, 0x70, 0x09, 0x58, 0xa6,
+ 0x70, 0x09, 0x51, 0xa5, 0x70, 0x09, 0x49, 0xa4, 0x70, 0x09, 0x41, 0xa3,
+ 0x70, 0x09, 0x39, 0xa2, 0x70, 0x09, 0x31, 0xa1, 0x70, 0x09, 0x29, 0xa0,
+ 0x70, 0x09, 0x21, 0x9f, 0x70, 0x09, 0x19, 0x9e, 0x70, 0x09, 0x11, 0x9d,
+ 0x70, 0x09, 0x08, 0xa6, 0x70, 0x09, 0x01, 0xa5, 0x70, 0x08, 0xf9, 0xa4,
+ 0x70, 0x08, 0xf1, 0xa3, 0x70, 0x08, 0xe9, 0xa2, 0x70, 0x08, 0xe1, 0xa1,
+ 0x70, 0x08, 0xd9, 0xa0, 0x70, 0x08, 0xd1, 0x9f, 0x70, 0x08, 0xc9, 0x9e,
+ 0x70, 0x08, 0xc1, 0x9d, 0x70, 0x08, 0xb8, 0xa6, 0x70, 0x08, 0xb1, 0xa5,
+ 0x70, 0x08, 0xa9, 0xa4, 0x70, 0x08, 0xa1, 0xa3, 0x70, 0x08, 0x99, 0xa2,
+ 0x70, 0x08, 0x91, 0xa1, 0x70, 0x08, 0x89, 0xa0, 0x70, 0x08, 0x81, 0x9f,
+ 0x70, 0x08, 0x79, 0x9e, 0x70, 0x08, 0x71, 0x9d, 0x70, 0x08, 0x68, 0x47,
+ 0x14, 0x8b, 0xc0, 0xce, 0x21, 0x45, 0x10, 0x7a, 0x40, 0xce, 0x90, 0xc4,
+ 0x15, 0xe7, 0x05, 0x31, 0x01, 0xc3, 0x05, 0x14, 0x05, 0x31, 0x09, 0x16,
+ 0xc0, 0xce, 0xb2, 0x08, 0xc0, 0xce, 0xbe, 0x15, 0xc0, 0xce, 0xca, 0xc5,
+ 0x06, 0xdb, 0x05, 0x31, 0x41, 0xc4, 0x26, 0x78, 0x05, 0x31, 0x48, 0x51,
+ 0x54, 0x86, 0xc0, 0xce, 0xd6, 0x44, 0x05, 0x8d, 0xc0, 0xce, 0xee, 0xd5,
+ 0x37, 0x2e, 0x01, 0x35, 0x41, 0xc4, 0x02, 0x6d, 0x00, 0x03, 0xe3, 0x00,
+ 0xcf, 0x06, 0xc8, 0x22, 0x83, 0x01, 0x17, 0x71, 0xc9, 0x3b, 0x79, 0x01,
+ 0x02, 0xf1, 0x16, 0xc0, 0xcf, 0x0a, 0xcb, 0x93, 0xb4, 0x01, 0x4c, 0xd1,
+ 0xc8, 0xb8, 0x92, 0x01, 0x71, 0xe9, 0x4c, 0x8a, 0xe1, 0xc0, 0xcf, 0x1c,
+ 0xda, 0x1c, 0x86, 0x01, 0x81, 0xd8, 0x46, 0x11, 0x39, 0xc0, 0xcf, 0x2e,
+ 0xd0, 0x58, 0xc2, 0x0f, 0xbd, 0x29, 0x45, 0xda, 0xab, 0x40, 0xcf, 0x50,
+ 0xdc, 0x14, 0x31, 0x00, 0xe7, 0xd1, 0x03, 0xc0, 0xcf, 0x5c, 0xcb, 0x93,
+ 0xf6, 0x00, 0xe7, 0xb1, 0xcb, 0x8f, 0xe1, 0x00, 0xe7, 0xa9, 0x14, 0xc0,
+ 0xcf, 0x6e, 0xcd, 0x2e, 0xcb, 0x00, 0xe7, 0x79, 0xd6, 0x2e, 0xc2, 0x00,
+ 0xe7, 0x71, 0xc6, 0xd3, 0x0d, 0x00, 0xe7, 0x69, 0x48, 0x5f, 0x6a, 0xc0,
+ 0xcf, 0x80, 0xda, 0x19, 0x2c, 0x00, 0xe6, 0xa1, 0xc9, 0xae, 0xa9, 0x00,
+ 0xe6, 0x98, 0x42, 0x00, 0x58, 0xc0, 0xcf, 0x98, 0x42, 0x00, 0x2c, 0xc0,
+ 0xcf, 0xa4, 0x47, 0xc7, 0x7b, 0xc0, 0xcf, 0xb0, 0xe0, 0x04, 0xa7, 0x00,
+ 0xe7, 0x09, 0x16, 0xc0, 0xcf, 0xbc, 0x42, 0x02, 0x2b, 0xc0, 0xcf, 0xce,
+ 0x4b, 0x19, 0x2c, 0xc0, 0xcf, 0xda, 0xc7, 0xc9, 0x03, 0x00, 0xe6, 0x91,
+ 0xc5, 0xdb, 0xe1, 0x00, 0xe6, 0x88, 0xc4, 0xe3, 0xa7, 0x0b, 0x7f, 0x89,
+ 0xc2, 0x00, 0x64, 0x0b, 0x7f, 0x80, 0xc6, 0xa0, 0xd4, 0x0f, 0xa7, 0xc9,
+ 0xc4, 0xe0, 0x8b, 0x0f, 0x9d, 0x70, 0x83, 0x08, 0x2b, 0x81, 0x04, 0xc0,
+ 0xcf, 0xef, 0x05, 0xc0, 0xcf, 0xf9, 0x06, 0xc0, 0xd0, 0x03, 0x87, 0x08,
+ 0x2b, 0xc3, 0x00, 0xd0, 0x0d, 0xc2, 0x14, 0xda, 0x08, 0x2b, 0xc9, 0xc2,
+ 0x01, 0x30, 0x08, 0x2b, 0xd1, 0x0a, 0xc0, 0xd0, 0x11, 0x8b, 0x08, 0x2b,
+ 0xf3, 0x00, 0xd0, 0x1b, 0xc2, 0x1c, 0x52, 0x08, 0x2c, 0x01, 0x0e, 0xc0,
+ 0xd0, 0x21, 0xc2, 0x00, 0x4e, 0x08, 0x2c, 0x21, 0x10, 0xc0, 0xd0, 0x2b,
+ 0x91, 0x08, 0x2c, 0x39, 0xc2, 0x00, 0x67, 0x08, 0x2c, 0x41, 0xc2, 0x0f,
+ 0x9a, 0x08, 0x2c, 0x49, 0x15, 0xc0, 0xd0, 0x35, 0x16, 0xc0, 0xd0, 0x3f,
+ 0x97, 0x08, 0x2c, 0x81, 0x9b, 0x08, 0x2c, 0xa1, 0xc2, 0x0a, 0xe2, 0x08,
+ 0x2c, 0xa9, 0xc2, 0x02, 0x2b, 0x08, 0x2c, 0x09, 0xc2, 0x01, 0x19, 0x08,
+ 0x2c, 0x51, 0xc2, 0x00, 0x5f, 0x08, 0x2c, 0x89, 0xc2, 0x24, 0xe2, 0x08,
+ 0x2c, 0x90, 0x83, 0x08, 0x2c, 0xb9, 0x04, 0xc0, 0xd0, 0x49, 0x05, 0xc0,
+ 0xd0, 0x53, 0x06, 0xc0, 0xd0, 0x5d, 0x87, 0x08, 0x2c, 0xfb, 0x00, 0xd0,
+ 0x67, 0xc2, 0x14, 0xda, 0x08, 0x2d, 0x01, 0xc2, 0x01, 0x30, 0x08, 0x2d,
+ 0x09, 0x0a, 0xc0, 0xd0, 0x6b, 0x8b, 0x08, 0x2d, 0x2b, 0x00, 0xd0, 0x75,
+ 0xc2, 0x1c, 0x52, 0x08, 0x2d, 0x39, 0xc2, 0x02, 0x2b, 0x08, 0x2d, 0x41,
+ 0x0e, 0xc0, 0xd0, 0x7b, 0xc2, 0x00, 0x4e, 0x08, 0x2d, 0x59, 0x10, 0xc0,
+ 0xd0, 0x85, 0x91, 0x08, 0x2d, 0x71, 0xc2, 0x00, 0x67, 0x08, 0x2d, 0x79,
+ 0xc2, 0x0f, 0x9a, 0x08, 0x2d, 0x81, 0xc2, 0x01, 0x19, 0x08, 0x2d, 0x89,
+ 0x15, 0xc0, 0xd0, 0x8f, 0x16, 0xc0, 0xd0, 0x99, 0x97, 0x08, 0x2d, 0xb9,
+ 0xc2, 0x00, 0x5f, 0x08, 0x2d, 0xc1, 0xc2, 0x24, 0xe2, 0x08, 0x2d, 0xc9,
+ 0x9b, 0x08, 0x2d, 0xd9, 0xc2, 0x0a, 0xe2, 0x08, 0x2d, 0xe0, 0x44, 0x0d,
+ 0x14, 0xc0, 0xd0, 0xa3, 0xca, 0x9c, 0x02, 0x01, 0x0a, 0xc0, 0x45, 0x02,
+ 0xde, 0xc0, 0xd0, 0xaf, 0x43, 0x02, 0xa0, 0x40, 0xd0, 0xc1, 0xc6, 0x06,
+ 0xdb, 0x01, 0x0a, 0xd9, 0x15, 0xc0, 0xd0, 0xcd, 0xc5, 0x9c, 0x06, 0x01,
+ 0x0a, 0xa9, 0x16, 0xc0, 0xd0, 0xd9, 0xc5, 0xd9, 0x1b, 0x01, 0x0a, 0x89,
+ 0xc7, 0x08, 0x79, 0x00, 0x05, 0xe1, 0xc4, 0x01, 0xce, 0x00, 0x05, 0xe8,
+ 0x42, 0x00, 0xb4, 0xc0, 0xd0, 0xe5, 0x0e, 0xc0, 0xd0, 0xf1, 0x05, 0xc0,
+ 0xd1, 0x01, 0x14, 0xc0, 0xd1, 0x0b, 0x42, 0x00, 0xe3, 0xc0, 0xd1, 0x17,
+ 0x07, 0xc0, 0xd1, 0x23, 0x15, 0xc0, 0xd1, 0x2f, 0x06, 0xc0, 0xd1, 0x41,
+ 0xc9, 0x11, 0xf6, 0x70, 0x01, 0x71, 0xcc, 0x89, 0xcd, 0x70, 0x01, 0x69,
+ 0x12, 0xc0, 0xd1, 0x4d, 0x03, 0xc0, 0xd1, 0x59, 0xc5, 0x1e, 0xc8, 0x70,
+ 0x03, 0xf1, 0xcd, 0x36, 0x86, 0x70, 0x03, 0xe1, 0xcb, 0x97, 0x9d, 0x70,
+ 0x01, 0x18, 0x4b, 0x6f, 0xc7, 0xc0, 0xd1, 0x6b, 0x47, 0x02, 0x0e, 0x40,
+ 0xd1, 0x73, 0x47, 0x02, 0x0e, 0xc0, 0xd1, 0xc5, 0x45, 0x00, 0xba, 0xc0,
+ 0xd2, 0x26, 0x4b, 0x6f, 0xc7, 0x40, 0xd2, 0x32, 0x43, 0x02, 0xab, 0xc0,
+ 0xd2, 0x3a, 0x43, 0x44, 0xc7, 0xc0, 0xd2, 0x46, 0xc5, 0x55, 0xd8, 0x0f,
+ 0x9a, 0x50, 0xd7, 0x27, 0xd0, 0x08, 0xff, 0xf9, 0x15, 0xc0, 0xd2, 0x52,
+ 0xd2, 0x4c, 0xc7, 0x08, 0xff, 0x71, 0x16, 0xc0, 0xd2, 0x6a, 0x03, 0xc0,
+ 0xd2, 0x76, 0x05, 0xc0, 0xd2, 0x88, 0x0e, 0xc0, 0xd2, 0x94, 0x06, 0xc0,
+ 0xd2, 0xa0, 0xd4, 0x39, 0xe4, 0x08, 0xff, 0x21, 0x49, 0x53, 0xa9, 0xc0,
+ 0xd2, 0xb8, 0x4b, 0x6f, 0xc7, 0xc0, 0xd2, 0xca, 0xc2, 0x00, 0x7a, 0x00,
+ 0x5e, 0x81, 0x47, 0x34, 0x2f, 0xc0, 0xd2, 0xea, 0xca, 0xa3, 0xdc, 0x00,
+ 0x5f, 0xa1, 0xc9, 0xab, 0xe2, 0x00, 0x5f, 0xa9, 0xca, 0x76, 0x52, 0x00,
+ 0x5f, 0xc8, 0x46, 0x09, 0x97, 0xc0, 0xd2, 0xfc, 0xd1, 0x50, 0xce, 0x08,
+ 0xb5, 0xc9, 0x47, 0x02, 0x0e, 0xc0, 0xd3, 0x20, 0x45, 0x00, 0xba, 0xc0,
+ 0xd3, 0x87, 0x4b, 0x6f, 0xc7, 0x40, 0xd3, 0x99, 0x45, 0x00, 0xba, 0xc0,
+ 0xd3, 0xb3, 0x4b, 0x92, 0x80, 0xc0, 0xd3, 0xe6, 0x4b, 0x8c, 0xbe, 0xc0,
+ 0xd4, 0x0a, 0x42, 0x00, 0x99, 0xc0, 0xd4, 0x2e, 0x4b, 0x6f, 0xc7, 0xc0,
+ 0xd4, 0x3a, 0x47, 0x02, 0x0e, 0x40, 0xd4, 0x64, 0x16, 0xc0, 0xd4, 0xb2,
+ 0x83, 0x00, 0xcb, 0x1b, 0x00, 0xd4, 0xc6, 0x87, 0x00, 0xcb, 0x5b, 0x00,
+ 0xd4, 0xd0, 0x97, 0x00, 0xcb, 0x3b, 0x00, 0xd4, 0xd8, 0x91, 0x00, 0xcb,
+ 0x4b, 0x00, 0xd4, 0xdc, 0x8b, 0x00, 0xcb, 0x21, 0x10, 0xc0, 0xd4, 0xe0,
+ 0x0d, 0xc0, 0xd4, 0xea, 0xc2, 0x0f, 0x9a, 0x00, 0xca, 0xf9, 0xc2, 0x00,
+ 0xd0, 0x00, 0xca, 0xf1, 0xc2, 0x02, 0x41, 0x00, 0xca, 0xe9, 0xc2, 0x00,
+ 0x87, 0x00, 0xca, 0xe1, 0xc2, 0x01, 0xc3, 0x00, 0xca, 0xd9, 0x12, 0xc0,
+ 0xd4, 0xf4, 0xc2, 0x00, 0xdb, 0x00, 0xca, 0xc1, 0xc2, 0x19, 0x2c, 0x00,
+ 0xca, 0xa9, 0xc2, 0x0d, 0xf6, 0x00, 0xca, 0xa1, 0xc2, 0x8d, 0x8f, 0x00,
+ 0xca, 0x88, 0x47, 0x10, 0x78, 0xc0, 0xd4, 0xfe, 0x49, 0xb2, 0x63, 0xc0,
+ 0xd5, 0x16, 0x46, 0x34, 0x6f, 0xc0, 0xd5, 0x2e, 0x45, 0xdb, 0x96, 0xc0,
+ 0xd5, 0x48, 0x47, 0x02, 0x0e, 0x40, 0xd5, 0x54, 0xc2, 0x17, 0x28, 0x0f,
+ 0xcc, 0x19, 0xcd, 0x77, 0xbb, 0x01, 0x05, 0xd0, 0x46, 0x04, 0x8f, 0xc0,
+ 0xd5, 0x60, 0xd1, 0x50, 0x79, 0x01, 0x36, 0x49, 0x42, 0x00, 0x10, 0xc0,
+ 0xd5, 0x6c, 0x06, 0xc0, 0xd5, 0x78, 0x15, 0xc0, 0xd5, 0x84, 0x03, 0xc0,
+ 0xd5, 0x9c, 0x05, 0xc0, 0xd5, 0xa8, 0xd7, 0x29, 0xb3, 0x01, 0x09, 0x49,
+ 0xcc, 0x8a, 0xd5, 0x0f, 0xac, 0x78, 0xd2, 0x22, 0x49, 0x0f, 0xbe, 0x11,
+ 0x06, 0xc0, 0xd5, 0xb4, 0x0e, 0xc0, 0xd5, 0xc0, 0x14, 0xc0, 0xd5, 0xcc,
+ 0xce, 0x6f, 0xb6, 0x0f, 0xaf, 0x59, 0xcc, 0x86, 0xfd, 0x0f, 0xad, 0x89,
+ 0xd3, 0x3f, 0xf5, 0x0f, 0xad, 0x39, 0xd8, 0x23, 0x03, 0x01, 0x53, 0xb0,
+ 0x42, 0x00, 0xa9, 0xc0, 0xd5, 0xd8, 0xcc, 0x79, 0x42, 0x01, 0x00, 0x21,
+ 0xc7, 0xbc, 0x33, 0x01, 0x71, 0xd8, 0x00, 0xc0, 0xd5, 0xf0, 0xc9, 0xa1,
+ 0x3f, 0x0f, 0xc8, 0xa0, 0xcf, 0x69, 0xf9, 0x01, 0x36, 0x41, 0xc5, 0xdc,
+ 0x6d, 0x01, 0x30, 0x40, 0xc9, 0xb2, 0xfc, 0x0f, 0xa2, 0x71, 0xc7, 0xc4,
+ 0x6b, 0x0f, 0xa2, 0x68, 0xc4, 0x5e, 0x73, 0x01, 0x11, 0xa1, 0x00, 0x40,
+ 0xd5, 0xfa, 0xc5, 0x9b, 0x3f, 0x0f, 0x99, 0x09, 0xc7, 0xc2, 0x49, 0x01,
+ 0x4f, 0x38, 0x11, 0xc0, 0xd6, 0x06, 0xc7, 0xc0, 0x27, 0x00, 0x3d, 0x51,
+ 0x07, 0xc0, 0xd6, 0x18, 0xc7, 0xc0, 0x5f, 0x00, 0x3d, 0x41, 0x03, 0xc0,
+ 0xd6, 0x2a, 0x47, 0x02, 0x0e, 0xc0, 0xd6, 0x36, 0xc5, 0xdb, 0xfa, 0x00,
+ 0x3d, 0x80, 0x05, 0xc0, 0xd6, 0xa0, 0x46, 0x09, 0x97, 0x40, 0xd6, 0xac,
+ 0x43, 0x01, 0xd0, 0xc0, 0xd6, 0xd0, 0x96, 0x0f, 0x9d, 0x48, 0x05, 0xc0,
+ 0xd6, 0xee, 0xcc, 0x88, 0x65, 0x01, 0x71, 0x18, 0x05, 0xc0, 0xd6, 0xfa,
+ 0xcc, 0x88, 0x65, 0x01, 0x71, 0x10, 0xd3, 0x05, 0xf4, 0x01, 0x49, 0xd3,
+ 0x00, 0xd7, 0x06, 0xda, 0x1d, 0x08, 0x01, 0x49, 0xe0, 0xd0, 0x5e, 0xb2,
+ 0x0f, 0x15, 0x71, 0x47, 0x02, 0x0e, 0x40, 0xd7, 0x0c, 0x42, 0xe6, 0x8f,
+ 0xc0, 0xd7, 0x85, 0x23, 0xc0, 0xd7, 0x91, 0x22, 0xc0, 0xd7, 0xa3, 0x24,
+ 0x40, 0xd7, 0xaf, 0xc5, 0xb4, 0xb0, 0x0f, 0xd5, 0x28, 0xc4, 0x63, 0x7d,
+ 0x0f, 0xb4, 0x58, 0xc5, 0xdd, 0x21, 0x0f, 0xad, 0x91, 0xc3, 0x05, 0xb1,
+ 0x0f, 0xb4, 0xe0, 0xd3, 0x44, 0x56, 0x01, 0x56, 0xd9, 0xc5, 0xd7, 0x36,
+ 0x01, 0x5e, 0xb8, 0x42, 0x00, 0x49, 0xc0, 0xd7, 0xbb, 0x45, 0x05, 0xef,
+ 0x40, 0xd7, 0xc7, 0xc5, 0x61, 0xc0, 0x01, 0x31, 0xb9, 0xc8, 0x2d, 0xb2,
+ 0x01, 0x31, 0xb1, 0x19, 0xc0, 0xd7, 0xd9, 0xc7, 0x71, 0xa7, 0x01, 0x31,
+ 0x99, 0xc4, 0x83, 0x39, 0x01, 0x31, 0x91, 0xc4, 0x2a, 0x95, 0x01, 0x31,
+ 0x89, 0xc6, 0x73, 0xca, 0x01, 0x31, 0x80, 0x4d, 0x18, 0x5a, 0xc0, 0xd7,
+ 0xe5, 0xc5, 0x1e, 0xc8, 0x01, 0x12, 0x59, 0xc8, 0x1e, 0x3f, 0x01, 0x11,
+ 0x69, 0x12, 0xc0, 0xd7, 0xfd, 0x54, 0x3b, 0xb0, 0xc0, 0xd8, 0x09, 0xce,
+ 0x6f, 0xe0, 0x01, 0x57, 0xb1, 0x47, 0xc4, 0x17, 0xc0, 0xd8, 0x15, 0xd7,
+ 0x27, 0x5d, 0x01, 0x57, 0xd9, 0xc6, 0xce, 0x21, 0x01, 0x72, 0x58, 0xd0,
+ 0x59, 0x62, 0x01, 0x5e, 0xf8, 0xc2, 0x38, 0x5e, 0x0f, 0x9e, 0x31, 0x45,
+ 0x05, 0x88, 0x40, 0xd8, 0x21, 0xc5, 0xd4, 0x70, 0x0f, 0xb4, 0x70, 0x11,
+ 0xc0, 0xd8, 0x2d, 0xc6, 0xcb, 0x99, 0x0e, 0x9a, 0x81, 0xc5, 0x07, 0xeb,
+ 0x0e, 0x99, 0xb1, 0x43, 0x11, 0xf7, 0x40, 0xd8, 0x39, 0x03, 0xc0, 0xd8,
+ 0x45, 0xc5, 0xd9, 0x84, 0x0e, 0x99, 0x28, 0x0b, 0xc0, 0xd8, 0x51, 0xc8,
+ 0x35, 0xc9, 0x0e, 0x9a, 0x41, 0x07, 0xc0, 0xd8, 0x61, 0xc4, 0xe4, 0x4b,
+ 0x0e, 0x9a, 0x19, 0xc5, 0xd7, 0x45, 0x0e, 0x99, 0x00, 0xcb, 0x9a, 0x1b,
+ 0x0e, 0x9a, 0x99, 0xc9, 0xae, 0x73, 0x0e, 0x98, 0x68, 0x11, 0xc0, 0xd8,
+ 0x73, 0x43, 0x07, 0xa2, 0xc0, 0xd8, 0x7d, 0xc5, 0xb7, 0x35, 0x0e, 0x99,
+ 0x09, 0xc5, 0x04, 0xe2, 0x0e, 0x98, 0x30, 0xca, 0xa1, 0x8e, 0x0e, 0x9a,
+ 0x89, 0xcb, 0x96, 0xe2, 0x0e, 0x9a, 0x09, 0xc6, 0xd1, 0xe7, 0x0e, 0x98,
+ 0xc9, 0xc5, 0x39, 0x0b, 0x0e, 0x98, 0x60, 0xc7, 0xc3, 0x68, 0x0e, 0x9a,
+ 0x69, 0xcb, 0x4c, 0x26, 0x0e, 0x98, 0xb0, 0x16, 0xc0, 0xd8, 0x87, 0xc8,
+ 0xb7, 0x62, 0x0e, 0x9a, 0x59, 0xc6, 0x83, 0x26, 0x0e, 0x9a, 0x28, 0xc9,
+ 0xa8, 0xb8, 0x0e, 0x9a, 0x51, 0xcc, 0x81, 0x51, 0x0e, 0x9a, 0x11, 0xc7,
+ 0x2d, 0x56, 0x0e, 0x99, 0xd1, 0x10, 0xc0, 0xd8, 0x91, 0xc3, 0x2c, 0xff,
+ 0x0e, 0x98, 0xe0, 0xc3, 0x13, 0x69, 0x0e, 0x9a, 0x31, 0xc6, 0xcc, 0x17,
+ 0x0e, 0x98, 0x90, 0xc3, 0x1c, 0xe6, 0x0e, 0x9a, 0x21, 0xc5, 0x20, 0xd8,
+ 0x0e, 0x98, 0xb8, 0xc6, 0xcb, 0x2d, 0x0e, 0x9a, 0x01, 0xc6, 0x14, 0xc5,
+ 0x0e, 0x99, 0xc9, 0xc4, 0x7c, 0xaa, 0x0e, 0x98, 0x40, 0xc8, 0x55, 0xc9,
+ 0x0e, 0x99, 0x43, 0x00, 0xd8, 0xa3, 0xca, 0xa7, 0xd8, 0x0e, 0x99, 0xf1,
+ 0xc8, 0xbd, 0x6a, 0x0e, 0x99, 0x91, 0xcc, 0x8b, 0x7d, 0x0e, 0x99, 0x78,
+ 0xc5, 0xdc, 0x5e, 0x0e, 0x99, 0xa9, 0x07, 0x40, 0xd8, 0xa9, 0x03, 0xc0,
+ 0xd8, 0xb9, 0xc5, 0xdd, 0x58, 0x0e, 0x99, 0x51, 0xca, 0xa2, 0x06, 0x0e,
+ 0x98, 0x98, 0xc6, 0xcf, 0x53, 0x0e, 0x99, 0x39, 0xcc, 0x84, 0xc9, 0x0e,
+ 0x98, 0x50, 0xce, 0x70, 0x7a, 0x0e, 0x99, 0x19, 0xcc, 0x88, 0x11, 0x0e,
+ 0x98, 0x71, 0xc6, 0x69, 0x74, 0x0e, 0x98, 0x48, 0x45, 0x0a, 0xe9, 0xc0,
+ 0xd8, 0xc5, 0xcd, 0x79, 0x82, 0x0f, 0xa6, 0x30, 0x46, 0x36, 0xb7, 0xc0,
+ 0xd8, 0xd1, 0xc5, 0xbc, 0xed, 0x0f, 0xa9, 0x69, 0xc6, 0x30, 0xf3, 0x0f,
+ 0xa7, 0xd0, 0x45, 0x00, 0xba, 0xc0, 0xd8, 0xe9, 0x42, 0x00, 0x49, 0xc0,
+ 0xd9, 0x09, 0x4b, 0x6f, 0xc7, 0xc0, 0xd9, 0x15, 0xce, 0x74, 0xcc, 0x00,
+ 0x62, 0xb1, 0x46, 0x09, 0x97, 0xc0, 0xd9, 0x3b, 0x4f, 0x63, 0xa5, 0x40,
+ 0xd9, 0x5f, 0xc5, 0x11, 0x55, 0x0f, 0xa1, 0x78, 0xd0, 0x5d, 0x52, 0x01,
+ 0x4e, 0xa9, 0xcf, 0x66, 0x66, 0x01, 0x4e, 0xa0, 0xc8, 0x18, 0x67, 0x01,
+ 0x11, 0xe3, 0x00, 0xd9, 0x6f, 0x45, 0x00, 0x8c, 0x40, 0xd9, 0x73, 0x46,
+ 0x09, 0x97, 0xc0, 0xd9, 0x7f, 0xc2, 0x00, 0x7a, 0x08, 0xa6, 0x39, 0x03,
+ 0xc0, 0xd9, 0xa3, 0xc5, 0xd5, 0xce, 0x08, 0xa6, 0x29, 0x45, 0x00, 0xba,
+ 0xc0, 0xd9, 0xaf, 0x4b, 0x6f, 0xc7, 0xc0, 0xd9, 0xc5, 0x47, 0x02, 0x0e,
+ 0x40, 0xd9, 0xeb, 0xc2, 0x00, 0x3d, 0x01, 0x02, 0x51, 0xca, 0x9e, 0x0a,
+ 0x01, 0x72, 0x90, 0xe0, 0x05, 0x07, 0x08, 0x59, 0xd0, 0x1b, 0xc0, 0xda,
+ 0x52, 0x44, 0x00, 0xbb, 0xc0, 0xda, 0x5e, 0x49, 0x5c, 0xf2, 0x40, 0xda,
+ 0x8a, 0x09, 0xc0, 0xda, 0x96, 0x42, 0x00, 0x74, 0xc0, 0xda, 0xa2, 0x05,
+ 0xc0, 0xda, 0xae, 0xd5, 0x32, 0x81, 0x00, 0x78, 0x39, 0x15, 0xc0, 0xda,
+ 0xc0, 0x04, 0xc0, 0xda, 0xcc, 0xd5, 0x32, 0xff, 0x00, 0x78, 0x61, 0x10,
+ 0xc0, 0xda, 0xd6, 0x16, 0xc0, 0xda, 0xe2, 0x14, 0xc0, 0xda, 0xec, 0x4c,
+ 0x85, 0x65, 0xc0, 0xda, 0xf8, 0xc7, 0xc3, 0xa7, 0x00, 0x7c, 0x21, 0xc6,
+ 0xcb, 0x09, 0x00, 0x7c, 0x29, 0xd6, 0x2d, 0xfc, 0x00, 0x7e, 0x89, 0xd3,
+ 0x3f, 0xbc, 0x00, 0x7e, 0xc8, 0x4d, 0x79, 0x27, 0xc0, 0xdb, 0x04, 0x46,
+ 0x02, 0x0f, 0x40, 0xdb, 0x10, 0x15, 0xc0, 0xdb, 0x70, 0xc9, 0xaf, 0xf6,
+ 0x00, 0x78, 0xc0, 0xc4, 0x15, 0xe7, 0x00, 0x79, 0x01, 0xc3, 0x05, 0x14,
+ 0x00, 0x79, 0x09, 0x16, 0xc0, 0xdb, 0x7c, 0x08, 0xc0, 0xdb, 0x88, 0x15,
+ 0xc0, 0xdb, 0x94, 0xc5, 0x06, 0xdb, 0x00, 0x79, 0x41, 0xc4, 0x26, 0x78,
+ 0x00, 0x79, 0x49, 0x45, 0x01, 0xce, 0x40, 0xdb, 0xa0, 0xc2, 0x04, 0xc6,
+ 0x00, 0x7b, 0x89, 0x8b, 0x00, 0x7b, 0x93, 0x00, 0xdb, 0xc4, 0x97, 0x00,
+ 0x7b, 0xa3, 0x00, 0xdb, 0xc8, 0x48, 0xb2, 0x2d, 0xc0, 0xdb, 0xcc, 0x87,
+ 0x00, 0x7b, 0xd3, 0x00, 0xdb, 0xda, 0x91, 0x00, 0x7b, 0xe3, 0x00, 0xdb,
+ 0xde, 0xca, 0x9d, 0xe2, 0x00, 0x7c, 0x02, 0x00, 0xdb, 0xe2, 0xcd, 0x7c,
+ 0xc2, 0x00, 0x7d, 0xf8, 0xca, 0x9a, 0xf4, 0x00, 0x7e, 0x01, 0xca, 0xa5,
+ 0x9e, 0x00, 0x7e, 0x09, 0xc9, 0xb2, 0x5a, 0x00, 0x7e, 0x11, 0xca, 0xa3,
+ 0x96, 0x00, 0x7e, 0x18, 0x1b, 0xc0, 0xdb, 0xe6, 0x51, 0x54, 0xec, 0xc0,
+ 0xdc, 0x00, 0x16, 0xc0, 0xdc, 0x08, 0x03, 0x40, 0xdc, 0x14, 0xe0, 0x02,
+ 0x07, 0x01, 0x6b, 0x78, 0x43, 0x02, 0xa3, 0xc0, 0xdc, 0x20, 0xdc, 0x13,
+ 0xa5, 0x01, 0x02, 0x89, 0xce, 0x6f, 0xb6, 0x0f, 0xaf, 0x51, 0xcc, 0x86,
+ 0xfd, 0x0f, 0xad, 0x81, 0xc6, 0x78, 0x78, 0x0f, 0xa4, 0xa9, 0x55, 0x33,
+ 0x7d, 0xc0, 0xdc, 0x2a, 0x48, 0x19, 0xb9, 0xc0, 0xdc, 0x36, 0xce, 0x71,
+ 0xd8, 0x01, 0x4e, 0x49, 0xd8, 0x23, 0x03, 0x01, 0x53, 0xa9, 0xd1, 0x40,
+ 0xee, 0x0f, 0xa3, 0x61, 0xd3, 0x40, 0xec, 0x0f, 0xa3, 0x68, 0xd7, 0x26,
+ 0xa5, 0x0f, 0xc5, 0x81, 0x58, 0x21, 0x6b, 0xc0, 0xdc, 0x42, 0x57, 0x2b,
+ 0x23, 0x40, 0xdc, 0x54, 0x0e, 0xc0, 0xdc, 0x60, 0x42, 0x01, 0xc3, 0xc0,
+ 0xdc, 0x70, 0x06, 0xc0, 0xdc, 0x82, 0x14, 0xc0, 0xdc, 0x98, 0xc5, 0x4d,
+ 0x40, 0x00, 0x32, 0x83, 0x00, 0xdc, 0xae, 0x08, 0xc0, 0xdc, 0xbb, 0x15,
+ 0xc0, 0xdc, 0xd6, 0x45, 0x05, 0x75, 0xc0, 0xdd, 0x01, 0x16, 0xc0, 0xdd,
+ 0x13, 0x05, 0xc0, 0xdd, 0x2f, 0x42, 0x00, 0xd0, 0xc0, 0xdd, 0x3b, 0x12,
+ 0xc0, 0xdd, 0x47, 0x18, 0xc0, 0xdd, 0x5d, 0xd2, 0x4d, 0xd5, 0x00, 0x44,
+ 0x39, 0x07, 0xc0, 0xdd, 0x69, 0xd0, 0x5e, 0x02, 0x00, 0x32, 0xf9, 0xc8,
+ 0xbe, 0xf2, 0x00, 0x32, 0xc9, 0xce, 0x72, 0x2c, 0x00, 0x32, 0xb9, 0xcd,
+ 0x2c, 0xb2, 0x00, 0x30, 0xf9, 0x47, 0x34, 0x2f, 0x40, 0xdd, 0x75, 0x46,
+ 0x09, 0x97, 0xc0, 0xdd, 0x81, 0x44, 0x00, 0x67, 0xc0, 0xdd, 0xa5, 0xcb,
+ 0x90, 0x4f, 0x00, 0x30, 0x39, 0xc9, 0xb3, 0x71, 0x00, 0x30, 0x30, 0x48,
+ 0x19, 0x9b, 0xc0, 0xdd, 0xb1, 0x46, 0x02, 0x0f, 0x40, 0xdd, 0xc3, 0xd0,
+ 0x48, 0x12, 0x00, 0x2a, 0xf9, 0xc9, 0x2d, 0x85, 0x00, 0x2a, 0xd0, 0xc4,
+ 0x0a, 0x8b, 0x00, 0x2a, 0xe9, 0x4e, 0x0b, 0x18, 0x40, 0xde, 0x3c, 0xcf,
+ 0x0f, 0x0a, 0x00, 0x2a, 0xe1, 0xcc, 0x81, 0x39, 0x00, 0x2a, 0xd8, 0x4e,
+ 0x0b, 0x18, 0xc0, 0xde, 0xb5, 0xd1, 0x2b, 0xed, 0x0f, 0x4a, 0x40, 0xc4,
+ 0x6b, 0x52, 0x0f, 0x49, 0x11, 0x06, 0xc0, 0xdf, 0x35, 0xc4, 0x76, 0x31,
+ 0x0f, 0x49, 0x21, 0xc4, 0xe4, 0xb3, 0x0f, 0x49, 0x29, 0x04, 0xc0, 0xdf,
+ 0x41, 0x15, 0xc0, 0xdf, 0x4b, 0xc2, 0x00, 0x67, 0x0f, 0x49, 0x41, 0xc2,
+ 0x00, 0x39, 0x0f, 0x49, 0x51, 0x87, 0x0f, 0x49, 0x59, 0xc2, 0x00, 0x87,
+ 0x0f, 0x49, 0x61, 0x8b, 0x0f, 0x49, 0x69, 0x91, 0x0f, 0x49, 0x71, 0x1b,
+ 0xc0, 0xdf, 0x57, 0xc3, 0x7e, 0x89, 0x0f, 0x49, 0x89, 0x10, 0xc0, 0xdf,
+ 0x61, 0x0d, 0xc0, 0xdf, 0x73, 0x97, 0x0f, 0x49, 0xa9, 0xc4, 0xe1, 0x4b,
+ 0x0f, 0x49, 0xb1, 0xc3, 0x11, 0xee, 0x0f, 0x49, 0xb9, 0xc2, 0x00, 0xd0,
+ 0x0f, 0x49, 0xc1, 0xc4, 0xd8, 0x3a, 0x0f, 0x49, 0xc9, 0x09, 0xc0, 0xdf,
+ 0x85, 0xc2, 0x00, 0x16, 0x0f, 0x49, 0xe1, 0xc2, 0x02, 0x41, 0x0f, 0x49,
+ 0xf1, 0xc3, 0xa9, 0xfc, 0x0f, 0x4a, 0x08, 0xc8, 0x01, 0xbf, 0x0f, 0x4a,
+ 0x31, 0xd4, 0x3d, 0x2c, 0x0f, 0x4a, 0x48, 0xc4, 0x33, 0x5e, 0x0f, 0x4a,
+ 0x51, 0xd0, 0x56, 0xc9, 0x0f, 0x4a, 0x58, 0xc4, 0x15, 0xe7, 0x0f, 0x4a,
+ 0x81, 0xc3, 0x05, 0x14, 0x0f, 0x4a, 0x89, 0x16, 0xc0, 0xdf, 0x8f, 0x08,
+ 0xc0, 0xdf, 0x9b, 0x15, 0xc0, 0xdf, 0xa7, 0xc5, 0x06, 0xdb, 0x0f, 0x4a,
+ 0xc1, 0xc4, 0x26, 0x78, 0x0f, 0x4a, 0xc8, 0xd0, 0x0f, 0x09, 0x0f, 0x4a,
+ 0xf1, 0xcd, 0x2c, 0xb2, 0x0f, 0x4a, 0xf8, 0x47, 0xc5, 0x21, 0xc0, 0xdf,
+ 0xb3, 0xc4, 0xe4, 0x63, 0x0f, 0xba, 0x13, 0x00, 0xdf, 0xbf, 0xcb, 0x8c,
+ 0xd4, 0x0f, 0xb8, 0x79, 0xca, 0x9a, 0xfe, 0x0f, 0xb9, 0xf1, 0xc4, 0x1a,
+ 0xa8, 0x0f, 0xba, 0xc8, 0x14, 0xc0, 0xdf, 0xc3, 0xc7, 0xc8, 0xe0, 0x0f,
+ 0xb8, 0x99, 0x46, 0x4c, 0x4a, 0xc0, 0xdf, 0xd2, 0x03, 0x40, 0xdf, 0xde,
+ 0x42, 0x00, 0xfa, 0xc0, 0xdf, 0xf0, 0xc8, 0xbe, 0x7a, 0x0f, 0xbb, 0x80,
+ 0x11, 0xc0, 0xdf, 0xff, 0xd2, 0x4e, 0x1d, 0x0f, 0xb8, 0x71, 0xca, 0xa1,
+ 0x52, 0x0f, 0xba, 0xf9, 0x17, 0x40, 0xe0, 0x0e, 0xc5, 0xd7, 0x13, 0x0f,
+ 0xb9, 0xfb, 0x00, 0xe0, 0x1a, 0x42, 0x00, 0x74, 0xc0, 0xe0, 0x20, 0xc4,
+ 0xdf, 0x17, 0x0f, 0xba, 0x69, 0xc6, 0x7b, 0x50, 0x0f, 0xba, 0x88, 0x07,
+ 0xc0, 0xe0, 0x2c, 0xc8, 0xba, 0xfa, 0x0f, 0xb8, 0xc2, 0x00, 0xe0, 0x44,
+ 0x0b, 0xc0, 0xe0, 0x4a, 0xc8, 0xbb, 0x32, 0x0f, 0xb9, 0x40, 0x17, 0xc0,
+ 0xe0, 0x5c, 0x42, 0x00, 0x65, 0xc0, 0xe0, 0x68, 0xc5, 0xd4, 0x93, 0x0f,
+ 0xb8, 0xd9, 0xc5, 0xac, 0x22, 0x0f, 0xba, 0x39, 0xce, 0x6f, 0x62, 0x0f,
+ 0xba, 0x79, 0x16, 0xc0, 0xe0, 0x75, 0xc3, 0xc9, 0x9a, 0x0f, 0xba, 0xa0,
+ 0xcb, 0x97, 0xb3, 0x0f, 0xb9, 0x59, 0x43, 0x00, 0xe3, 0xc0, 0xe0, 0x84,
+ 0xc2, 0x01, 0x29, 0x0f, 0xb8, 0x09, 0x0e, 0xc0, 0xe0, 0x8e, 0xc6, 0xcd,
+ 0xd3, 0x0f, 0xb9, 0xd1, 0xca, 0x9a, 0xcc, 0x0f, 0xb9, 0xe9, 0xc4, 0x04,
+ 0x65, 0x0f, 0xba, 0xb9, 0xc6, 0xd2, 0x4d, 0x0f, 0xba, 0xd8, 0xc7, 0xc2,
+ 0xb9, 0x0f, 0xb9, 0x51, 0xc8, 0xba, 0xe2, 0x0f, 0xba, 0x98, 0xc3, 0x04,
+ 0xe4, 0x0f, 0xb8, 0xa9, 0xc3, 0x00, 0x2e, 0x0f, 0xbb, 0x78, 0xd0, 0x5d,
+ 0x22, 0x0f, 0xb8, 0x83, 0x00, 0xe0, 0xa3, 0xc8, 0xbe, 0xc2, 0x0f, 0xb9,
+ 0xc1, 0xc4, 0x97, 0x51, 0x0f, 0xbb, 0x88, 0xc3, 0x02, 0x11, 0x0f, 0xb8,
+ 0x21, 0x9a, 0x0f, 0xba, 0x50, 0xc9, 0xaf, 0xed, 0x0f, 0xb8, 0x01, 0xc7,
+ 0xc8, 0x62, 0x0f, 0xba, 0x08, 0xc3, 0x1a, 0x7c, 0x0f, 0xb8, 0xd1, 0xc2,
+ 0x01, 0xdf, 0x0f, 0xba, 0x48, 0xc4, 0x91, 0x3d, 0x0f, 0xb8, 0xe3, 0x00,
+ 0xe0, 0xa7, 0xcb, 0x91, 0x36, 0x0f, 0xb9, 0x08, 0x11, 0xc0, 0xe0, 0xad,
+ 0x44, 0x01, 0xcf, 0x40, 0xe0, 0xb9, 0xd7, 0x08, 0xf0, 0x01, 0x53, 0x78,
+ 0xd3, 0x43, 0xab, 0x0f, 0x9f, 0x39, 0xc5, 0x46, 0x98, 0x0f, 0xb4, 0xb8,
+ 0x1d, 0xc0, 0xe0, 0xc5, 0x1e, 0xc0, 0xe0, 0xed, 0x1f, 0xc0, 0xe1, 0x15,
+ 0x20, 0xc0, 0xe1, 0x3d, 0x21, 0xc0, 0xe1, 0x65, 0x22, 0x40, 0xe1, 0x8d,
+ 0xd3, 0x41, 0x97, 0x01, 0x3f, 0x91, 0x05, 0xc0, 0xe1, 0x9f, 0xd1, 0x05,
+ 0x75, 0x01, 0x0d, 0xd1, 0x16, 0xc0, 0xe1, 0xab, 0x48, 0x03, 0xc8, 0xc0,
+ 0xe1, 0xb7, 0xcb, 0x87, 0x8d, 0x01, 0x50, 0x88, 0x46, 0x00, 0x8b, 0x40,
+ 0xe1, 0xbd, 0xda, 0x19, 0xc8, 0x01, 0x37, 0x11, 0xc3, 0x92, 0x53, 0x01,
+ 0x5e, 0xc8, 0x8d, 0x00, 0x01, 0x53, 0x00, 0xe1, 0xc9, 0x8f, 0x01, 0x02,
+ 0x10, 0xc2, 0x00, 0xdb, 0x08, 0xba, 0x31, 0x83, 0x08, 0xb8, 0x70, 0xc2,
+ 0x00, 0xc1, 0x08, 0xba, 0x29, 0xc2, 0x19, 0x2c, 0x08, 0xb8, 0x81, 0x83,
+ 0x08, 0xb8, 0x19, 0xc2, 0x01, 0x30, 0x08, 0xb8, 0x10, 0x06, 0xc0, 0xe1,
+ 0xcf, 0xc2, 0x00, 0xd0, 0x08, 0xb8, 0xa1, 0x83, 0x08, 0xb8, 0x98, 0x16,
+ 0xc0, 0xe1, 0xd9, 0xc2, 0x00, 0xd0, 0x08, 0xb8, 0x61, 0x83, 0x08, 0xb8,
+ 0x20, 0x83, 0x08, 0xba, 0x01, 0xc2, 0x00, 0xd0, 0x08, 0xb8, 0x58, 0x49,
+ 0x0c, 0x8d, 0x40, 0xe1, 0xe3, 0xc2, 0x00, 0xd0, 0x08, 0xb8, 0xc9, 0x83,
+ 0x08, 0xb8, 0x50, 0xc2, 0x00, 0xd0, 0x08, 0xb8, 0xc1, 0x83, 0x08, 0xb8,
+ 0x40, 0xc2, 0x00, 0xd0, 0x08, 0xb8, 0xb9, 0x83, 0x08, 0xb8, 0xa8, 0xc2,
+ 0x00, 0xd0, 0x08, 0xb8, 0x39, 0x83, 0x08, 0xb8, 0x30, 0xc2, 0x00, 0xd0,
+ 0x08, 0xb8, 0x09, 0x83, 0x08, 0xb8, 0x00, 0xc5, 0xdd, 0x08, 0x08, 0xb9,
+ 0xf1, 0x15, 0xc0, 0xe1, 0xf5, 0xc6, 0xd0, 0xeb, 0x08, 0xb9, 0x58, 0xc4,
+ 0x18, 0x10, 0x08, 0xb9, 0xb9, 0xc2, 0x22, 0xcc, 0x08, 0xb9, 0xb0, 0xc3,
+ 0x0d, 0x14, 0x08, 0xb9, 0xa9, 0xc3, 0x09, 0x9e, 0x08, 0xb9, 0xa0, 0xc4,
+ 0x02, 0xde, 0x08, 0xb9, 0x99, 0xc2, 0x02, 0xa0, 0x08, 0xb9, 0x90, 0x8f,
+ 0x08, 0xb9, 0x51, 0x8b, 0x08, 0xb9, 0x49, 0x99, 0x08, 0xb9, 0x39, 0x83,
+ 0x08, 0xb9, 0x08, 0x97, 0x08, 0xb9, 0x28, 0x8b, 0x08, 0xb9, 0x18, 0xca,
+ 0x9f, 0x04, 0x08, 0xb8, 0xf9, 0x83, 0x08, 0xb8, 0xe8, 0xc2, 0x01, 0x9d,
+ 0x01, 0x1c, 0xab, 0x00, 0xe2, 0x01, 0x44, 0x48, 0xaa, 0x40, 0xe2, 0x05,
+ 0xc9, 0x52, 0x08, 0x01, 0x1b, 0xb0, 0xc9, 0x52, 0x08, 0x01, 0x1b, 0xc8,
+ 0xc3, 0x01, 0xbb, 0x01, 0x1b, 0x9b, 0x00, 0xe2, 0x11, 0xc5, 0xd8, 0xf3,
+ 0x01, 0x19, 0xb0, 0xc2, 0x01, 0x23, 0x01, 0x1b, 0xa1, 0xce, 0x6c, 0xde,
+ 0x01, 0x1a, 0x30, 0x00, 0xc0, 0xe2, 0x17, 0xca, 0x6c, 0xe2, 0x01, 0x1a,
+ 0x78, 0x43, 0x01, 0x47, 0xc0, 0xe2, 0x29, 0x42, 0x05, 0x03, 0xc0, 0xe2,
+ 0x33, 0xcf, 0x67, 0xdd, 0x01, 0x1a, 0xd0, 0xd1, 0x52, 0x00, 0x01, 0x1b,
+ 0x71, 0x16, 0xc0, 0xe2, 0x3d, 0xc8, 0x7d, 0xf2, 0x01, 0x19, 0xf9, 0xca,
+ 0x9a, 0x9a, 0x01, 0x19, 0xb8, 0xc8, 0xb5, 0xea, 0x01, 0x1b, 0x51, 0x46,
+ 0x02, 0xd2, 0x40, 0xe2, 0x49, 0xcb, 0x94, 0xf3, 0x01, 0x1b, 0x39, 0xca,
+ 0x6c, 0xe2, 0x01, 0x1a, 0x28, 0xc9, 0x20, 0xa8, 0x01, 0x1b, 0x21, 0xc8,
+ 0x52, 0x09, 0x01, 0x1a, 0xd8, 0x49, 0x07, 0x49, 0xc0, 0xe2, 0x67, 0xcf,
+ 0x6a, 0x53, 0x01, 0x12, 0x80, 0x0a, 0xc0, 0xe2, 0x73, 0x15, 0xc0, 0xe2,
+ 0x7d, 0xc2, 0x00, 0x5f, 0x08, 0x59, 0x61, 0x1b, 0xc0, 0xe2, 0x8b, 0xc2,
+ 0x00, 0x4e, 0x08, 0x59, 0x41, 0x10, 0xc0, 0xe2, 0x95, 0x06, 0xc0, 0xe2,
+ 0xa9, 0x16, 0xc0, 0xe2, 0xb3, 0xc2, 0x1c, 0x52, 0x08, 0x58, 0xc1, 0xc2,
+ 0x00, 0x89, 0x08, 0x58, 0xb9, 0x09, 0xc0, 0xe2, 0xc3, 0x1a, 0xc0, 0xe2,
+ 0xd3, 0xc2, 0x00, 0x3c, 0x08, 0x58, 0x81, 0x97, 0x08, 0x58, 0x73, 0x00,
+ 0xe2, 0xe3, 0x8b, 0x08, 0x58, 0x63, 0x00, 0xe2, 0xe7, 0x91, 0x08, 0x58,
+ 0x53, 0x00, 0xe2, 0xeb, 0x87, 0x08, 0x58, 0x43, 0x00, 0xe2, 0xef, 0x83,
+ 0x08, 0x58, 0x03, 0x00, 0xe2, 0xf3, 0xc2, 0x00, 0x67, 0x08, 0x58, 0xf1,
+ 0xc2, 0x14, 0xda, 0x08, 0x58, 0xf9, 0x04, 0xc0, 0xe3, 0x09, 0xc2, 0x01,
+ 0x19, 0x08, 0x59, 0x69, 0xc2, 0x00, 0x49, 0x08, 0x59, 0x71, 0x1c, 0x40,
+ 0xe3, 0x13, 0xc3, 0x05, 0x14, 0x08, 0x08, 0x3b, 0x00, 0xe3, 0x1d, 0x16,
+ 0xc0, 0xe3, 0x21, 0x08, 0xc0, 0xe3, 0x32, 0x15, 0xc0, 0xe3, 0x3a, 0xc5,
+ 0x06, 0xdb, 0x08, 0x08, 0x73, 0x00, 0xe3, 0x4c, 0xc4, 0x26, 0x78, 0x08,
+ 0x08, 0x7a, 0x00, 0xe3, 0x57, 0x46, 0x0f, 0x88, 0xc0, 0xe3, 0x64, 0x4e,
+ 0x72, 0x02, 0x40, 0xe3, 0x7a, 0xce, 0x71, 0x22, 0x08, 0x09, 0xf1, 0xcd,
+ 0x7d, 0xb9, 0x08, 0x09, 0xf8, 0x0e, 0xc0, 0xe3, 0x86, 0x46, 0x11, 0x39,
+ 0xc0, 0xe3, 0x92, 0x42, 0x00, 0x58, 0xc0, 0xe3, 0xcb, 0x49, 0x07, 0xbb,
+ 0xc0, 0xe3, 0xd7, 0x43, 0x11, 0x49, 0xc0, 0xe3, 0xef, 0x46, 0x00, 0x2c,
+ 0x40, 0xe4, 0x07, 0xc6, 0x0b, 0x09, 0x0f, 0xbc, 0x81, 0xc6, 0x02, 0xd1,
+ 0x0f, 0xbc, 0x30, 0xc6, 0x13, 0x52, 0x0f, 0xbd, 0x59, 0xd2, 0x4d, 0x57,
+ 0x0f, 0xbd, 0xb8, 0xd6, 0x08, 0x88, 0x01, 0x1f, 0x09, 0xcd, 0x00, 0x32,
+ 0x01, 0x1e, 0xf9, 0xcb, 0x1a, 0x50, 0x01, 0x1e, 0xe9, 0xce, 0x25, 0xad,
+ 0x01, 0x1d, 0xab, 0x00, 0xe4, 0x1f, 0x45, 0x01, 0xce, 0xc0, 0xe4, 0x25,
+ 0x46, 0x00, 0x2c, 0xc0, 0xe4, 0x3d, 0x45, 0x00, 0x49, 0xc0, 0xe4, 0x47,
+ 0xd7, 0x15, 0x64, 0x01, 0x49, 0xd8, 0x46, 0x00, 0x8b, 0x40, 0xe4, 0x51,
+ 0x00, 0xc0, 0xe4, 0x5d, 0xc3, 0x00, 0x74, 0x0f, 0x9d, 0x98, 0xc4, 0x01,
+ 0xc3, 0x0f, 0xa8, 0xb3, 0x00, 0xe4, 0x69, 0x95, 0x0f, 0xa6, 0xd0, 0x84,
+ 0x01, 0x88, 0x2b, 0x00, 0xe4, 0x6f, 0x92, 0x01, 0x88, 0x31, 0x8f, 0x01,
+ 0x88, 0x39, 0x88, 0x01, 0x88, 0x41, 0x86, 0x01, 0x88, 0x49, 0x96, 0x01,
+ 0x88, 0x51, 0x90, 0x01, 0x88, 0x5b, 0x00, 0xe4, 0x73, 0x8e, 0x01, 0x88,
+ 0x63, 0x00, 0xe4, 0x7e, 0x89, 0x01, 0x88, 0x6b, 0x00, 0xe4, 0x82, 0x8d,
+ 0x01, 0x88, 0x73, 0x00, 0xe4, 0x92, 0x8a, 0x01, 0x88, 0x79, 0x8c, 0x01,
+ 0x88, 0x83, 0x00, 0xe4, 0x96, 0x93, 0x01, 0x88, 0x89, 0x9a, 0x01, 0x88,
+ 0x91, 0x9c, 0x01, 0x88, 0xbb, 0x00, 0xe4, 0x9a, 0x85, 0x01, 0x88, 0xc3,
+ 0x00, 0xe4, 0xa6, 0x95, 0x01, 0x88, 0xcb, 0x00, 0xe4, 0xaa, 0x94, 0x01,
+ 0x88, 0xb1, 0x83, 0x01, 0x88, 0xd3, 0x00, 0xe4, 0xae, 0x91, 0x01, 0x88,
+ 0xdb, 0x00, 0xe4, 0xcb, 0x87, 0x01, 0x88, 0xe3, 0x00, 0xe4, 0xe5, 0x8b,
+ 0x01, 0x89, 0x3b, 0x00, 0xe4, 0xfc, 0x97, 0x01, 0x89, 0x43, 0x00, 0xe5,
+ 0x15, 0x98, 0x01, 0x89, 0x50, 0x92, 0x01, 0x8d, 0xa1, 0x96, 0x01, 0x8d,
+ 0xa9, 0x8d, 0x01, 0x8d, 0xb1, 0x8a, 0x01, 0x8d, 0xb9, 0x89, 0x01, 0x8d,
+ 0xd8, 0x9e, 0x0f, 0xd8, 0x03, 0x00, 0xe5, 0x1b, 0xa0, 0x0f, 0xd8, 0x1b,
+ 0x00, 0xe5, 0x3b, 0x9f, 0x0f, 0xd8, 0x0b, 0x00, 0xe5, 0x4d, 0xa2, 0x0f,
+ 0xd8, 0x7b, 0x00, 0xe5, 0x66, 0xa1, 0x0f, 0xd8, 0x3b, 0x00, 0xe5, 0x6a,
+ 0xa3, 0x0f, 0xd8, 0xf0, 0x00, 0xc0, 0xe5, 0x75, 0x02, 0x40, 0xe5, 0xbf,
+ 0xc4, 0xe3, 0x33, 0x0f, 0xa6, 0xc1, 0xc5, 0x1c, 0xae, 0x0f, 0xa4, 0xc8,
+ 0x4a, 0xa5, 0x3a, 0x40, 0xe5, 0xcb, 0xc8, 0xb5, 0x7a, 0x0f, 0xd3, 0x81,
+ 0xc8, 0xb8, 0x02, 0x0f, 0xcf, 0xb1, 0x11, 0x40, 0xe5, 0xe3, 0x42, 0x00,
+ 0xb0, 0xc0, 0xe5, 0xf2, 0x4f, 0x2a, 0x5c, 0xc0, 0xe5, 0xff, 0x46, 0xcd,
+ 0x25, 0xc0, 0xe6, 0x15, 0xc5, 0xd5, 0x56, 0x00, 0xda, 0xe1, 0x46, 0x09,
+ 0x97, 0xc0, 0xe6, 0x21, 0x47, 0x02, 0x0e, 0xc0, 0xe6, 0x45, 0xc9, 0xb3,
+ 0x3b, 0x00, 0xda, 0x21, 0x4b, 0x6f, 0xc7, 0xc0, 0xe6, 0xe9, 0x45, 0x00,
+ 0xba, 0x40, 0xe7, 0x1a, 0xcd, 0x7e, 0x6f, 0x0f, 0x9e, 0x00, 0xc9, 0x11,
+ 0xf6, 0x0b, 0x57, 0xa9, 0x4a, 0x51, 0x89, 0xc0, 0xe7, 0x38, 0x47, 0x02,
+ 0x0e, 0x40, 0xe7, 0x4a, 0xc6, 0x00, 0x91, 0x0f, 0xb5, 0xe1, 0xc5, 0xd4,
+ 0x66, 0x0f, 0xa3, 0xe1, 0xc6, 0x50, 0xe2, 0x0f, 0x9b, 0xe1, 0xc5, 0x55,
+ 0x91, 0x0f, 0xa1, 0x20, 0x12, 0xc0, 0xe7, 0xc2, 0x83, 0x05, 0x35, 0x01,
+ 0x0d, 0xc0, 0xe7, 0xd8, 0x97, 0x05, 0x35, 0x11, 0xc2, 0x02, 0xe0, 0x05,
+ 0x35, 0x21, 0x14, 0xc0, 0xe7, 0xfb, 0x16, 0xc0, 0xe8, 0x0d, 0x91, 0x05,
+ 0x35, 0x39, 0x10, 0xc0, 0xe8, 0x19, 0x8b, 0x05, 0x35, 0x49, 0x0e, 0xc0,
+ 0xe8, 0x46, 0x8f, 0x05, 0x35, 0x9b, 0x00, 0xe8, 0x5e, 0x15, 0xc0, 0xe8,
+ 0x76, 0x1b, 0xc0, 0xe8, 0x90, 0x19, 0xc0, 0xe8, 0xa0, 0x08, 0x40, 0xe8,
+ 0xaa, 0x0f, 0xc0, 0xe8, 0xc0, 0xc3, 0x0d, 0xe5, 0x05, 0x37, 0xa0, 0x47,
+ 0x01, 0xeb, 0xc0, 0xe8, 0xcc, 0x00, 0xc0, 0xe8, 0xd2, 0x15, 0x40, 0xe8,
+ 0xde, 0x15, 0xc0, 0xe8, 0xea, 0x43, 0x0c, 0xe0, 0xc0, 0xe8, 0xf6, 0x4f,
+ 0x30, 0x90, 0xc0, 0xe9, 0x02, 0x4b, 0x6f, 0xc7, 0xc0, 0xe9, 0x0c, 0x47,
+ 0x02, 0x0e, 0x40, 0xe9, 0x2e, 0xc3, 0x82, 0x4c, 0x0f, 0xb6, 0x08, 0xc5,
+ 0xb5, 0x75, 0x0f, 0xa6, 0x51, 0xc7, 0xc9, 0x96, 0x0f, 0xcf, 0xe0, 0xcf,
+ 0x67, 0x38, 0x01, 0x33, 0x61, 0xcc, 0x82, 0xdd, 0x01, 0x33, 0x59, 0xd8,
+ 0x23, 0x1b, 0x0f, 0x9c, 0xe9, 0xd7, 0x29, 0xca, 0x0f, 0x9c, 0xe0, 0xc5,
+ 0x11, 0x55, 0x0f, 0xa1, 0xd9, 0xca, 0xa5, 0x76, 0x0f, 0xce, 0xa0, 0xcc,
+ 0x20, 0x76, 0x01, 0x1f, 0x18, 0x47, 0x02, 0x0e, 0xc0, 0xe9, 0x91, 0x15,
+ 0xc0, 0xe9, 0xf4, 0x4b, 0x6f, 0xc7, 0xc0, 0xea, 0x00, 0x03, 0xc0, 0xea,
+ 0x20, 0x46, 0x09, 0x97, 0xc0, 0xea, 0x32, 0x46, 0x76, 0x52, 0xc0, 0xea,
+ 0x56, 0x49, 0x3a, 0xd4, 0xc0, 0xea, 0x62, 0xc6, 0xd2, 0xcb, 0x00, 0x4f,
+ 0xd1, 0xca, 0x9f, 0xae, 0x00, 0x4f, 0xd8, 0xc5, 0xd9, 0xb6, 0x0f, 0x9b,
+ 0x89, 0x49, 0x03, 0x37, 0x40, 0xea, 0x6e, 0xc6, 0x00, 0x91, 0x01, 0x1b,
+ 0xf1, 0xd8, 0x23, 0xc3, 0x0f, 0xa8, 0xa9, 0xc6, 0xcd, 0x19, 0x0f, 0xd6,
+ 0x88, 0xcf, 0x62, 0x6a, 0x0f, 0xa3, 0x29, 0xce, 0x2f, 0xbc, 0x0f, 0xa3,
+ 0x20, 0xc9, 0x18, 0x66, 0x01, 0x10, 0xc8, 0xd1, 0x51, 0xab, 0x0f, 0xab,
+ 0x60, 0xce, 0x6f, 0x0e, 0x00, 0xd0, 0xf9, 0xc7, 0xc9, 0xd5, 0x00, 0xd0,
+ 0xf1, 0x4b, 0x6f, 0xc7, 0xc0, 0xea, 0x74, 0x47, 0x02, 0x0e, 0x40, 0xea,
+ 0x8a, 0x97, 0x00, 0xba, 0x99, 0x8b, 0x00, 0xba, 0x90, 0xc2, 0x00, 0xd0,
+ 0x00, 0xba, 0x89, 0xc2, 0x0d, 0xf6, 0x00, 0xba, 0x81, 0xc2, 0x01, 0x4a,
+ 0x00, 0xba, 0x79, 0xc2, 0x00, 0xdb, 0x00, 0xba, 0x71, 0xc2, 0x00, 0x39,
+ 0x00, 0xba, 0x69, 0xc2, 0x19, 0x2c, 0x00, 0xba, 0x61, 0xc2, 0x01, 0xc3,
+ 0x00, 0xba, 0x59, 0xc2, 0x01, 0x5d, 0x00, 0xba, 0x51, 0xc2, 0x00, 0xb0,
+ 0x00, 0xba, 0x49, 0x10, 0xc0, 0xea, 0xea, 0xc2, 0x0e, 0x9a, 0x00, 0xba,
+ 0x39, 0xc2, 0x01, 0x6f, 0x00, 0xba, 0x31, 0xc2, 0x01, 0x30, 0x00, 0xba,
+ 0x21, 0xc2, 0x02, 0x2b, 0x00, 0xba, 0x19, 0x97, 0x00, 0xba, 0x11, 0x8b,
+ 0x00, 0xba, 0x09, 0x83, 0x00, 0xba, 0x00, 0xcb, 0x8c, 0xa8, 0x0f, 0xa3,
+ 0x81, 0xcb, 0x91, 0xdb, 0x0f, 0x98, 0x48, 0xc4, 0xe3, 0x0f, 0x0f, 0xa5,
+ 0xe1, 0x95, 0x0f, 0xd3, 0x90, 0x4c, 0x83, 0x49, 0xc0, 0xea, 0xf4, 0x90,
+ 0x0f, 0xcf, 0x00, 0x47, 0x34, 0x2f, 0xc0, 0xeb, 0x00, 0x47, 0x02, 0x0e,
+ 0xc0, 0xeb, 0x2d, 0x18, 0xc0, 0xeb, 0x95, 0x45, 0x00, 0xba, 0xc0, 0xeb,
+ 0xa1, 0x06, 0xc0, 0xeb, 0xc5, 0x4c, 0x11, 0xe2, 0x40, 0xeb, 0xd7, 0xdb,
+ 0x15, 0x96, 0x01, 0x1c, 0x59, 0xc5, 0x1c, 0xae, 0x0f, 0xa4, 0xa1, 0xc3,
+ 0x01, 0x5d, 0x00, 0x05, 0x30, 0x86, 0x0f, 0x9a, 0xf1, 0xd0, 0x5b, 0x62,
+ 0x00, 0x04, 0x11, 0xca, 0xa7, 0x10, 0x0f, 0xc9, 0x88, 0x42, 0x00, 0xbf,
+ 0xc0, 0xeb, 0xe7, 0x46, 0xd0, 0xd9, 0xc0, 0xeb, 0xf3, 0xcb, 0x97, 0x50,
+ 0x0e, 0x82, 0x28, 0xc5, 0x87, 0x64, 0x0e, 0x81, 0x23, 0x00, 0xeb, 0xff,
+ 0x46, 0xd1, 0xa5, 0xc0, 0xec, 0x03, 0x11, 0xc0, 0xec, 0x10, 0x14, 0xc0,
+ 0xec, 0x25, 0x42, 0x00, 0xfe, 0xc0, 0xec, 0x31, 0xc6, 0xc8, 0x94, 0x0e,
+ 0x83, 0x08, 0x14, 0xc0, 0xec, 0x3d, 0x12, 0xc0, 0xec, 0x49, 0x45, 0xd8,
+ 0x4e, 0xc0, 0xec, 0x59, 0x10, 0x40, 0xec, 0x71, 0x16, 0xc0, 0xec, 0x7d,
+ 0x48, 0xbc, 0x8a, 0xc0, 0xec, 0x92, 0xc5, 0xd9, 0x02, 0x0e, 0x81, 0x4b,
+ 0x00, 0xec, 0xa4, 0x1b, 0xc0, 0xec, 0xaa, 0xc7, 0xc0, 0x9e, 0x0e, 0x80,
+ 0xe8, 0x0b, 0xc0, 0xec, 0xb7, 0xc2, 0x42, 0xcd, 0x0e, 0x81, 0x79, 0xc5,
+ 0xd7, 0x27, 0x0e, 0x80, 0x08, 0x42, 0x14, 0xda, 0xc0, 0xec, 0xd4, 0x12,
+ 0x40, 0xec, 0xe0, 0x46, 0x3d, 0xd7, 0xc0, 0xec, 0xea, 0xda, 0x19, 0xfc,
+ 0x0e, 0x86, 0x29, 0x49, 0xb5, 0x21, 0x40, 0xed, 0x15, 0x44, 0xdf, 0x57,
+ 0xc0, 0xed, 0x27, 0x47, 0xc8, 0x2a, 0xc0, 0xed, 0x39, 0x44, 0x56, 0x2e,
+ 0x40, 0xed, 0x45, 0x42, 0x02, 0x2f, 0xc0, 0xed, 0x4f, 0x15, 0xc0, 0xed,
+ 0x59, 0xc6, 0xcd, 0xf1, 0x0e, 0x81, 0xf8, 0x10, 0xc0, 0xed, 0x65, 0x46,
+ 0xd1, 0x69, 0xc0, 0xed, 0x71, 0xc7, 0xc7, 0x5f, 0x0e, 0x83, 0x41, 0xc9,
+ 0xac, 0x9f, 0x0e, 0x83, 0x21, 0xc6, 0xd0, 0x9d, 0x0e, 0x82, 0xa9, 0xce,
+ 0x6d, 0x08, 0x0e, 0x80, 0x70, 0x48, 0xbd, 0x2a, 0xc0, 0xed, 0x7d, 0xca,
+ 0x9e, 0x32, 0x0e, 0x82, 0xb8, 0x14, 0xc0, 0xed, 0x9d, 0x07, 0xc0, 0xed,
+ 0xa7, 0x0a, 0xc0, 0xed, 0xb9, 0xc6, 0xd1, 0x51, 0x0e, 0x81, 0x38, 0x07,
+ 0xc0, 0xed, 0xc3, 0xc6, 0xc4, 0xab, 0x0e, 0x82, 0xe8, 0x49, 0xab, 0x64,
+ 0xc0, 0xed, 0xcf, 0xc5, 0xda, 0x92, 0x0e, 0x82, 0xd9, 0x44, 0xdf, 0x27,
+ 0xc0, 0xed, 0xdb, 0x46, 0xce, 0x7b, 0x40, 0xed, 0xe5, 0x42, 0x00, 0xba,
+ 0xc0, 0xed, 0xf1, 0x42, 0x00, 0xb1, 0xc0, 0xed, 0xfb, 0x46, 0xce, 0xf3,
+ 0xc0, 0xee, 0x07, 0x07, 0x40, 0xee, 0x13, 0x44, 0xe4, 0xaf, 0xc0, 0xee,
+ 0x28, 0xc3, 0x4e, 0x10, 0x0e, 0x80, 0xc8, 0xc6, 0xcd, 0x1f, 0x0e, 0x81,
+ 0xe1, 0xc4, 0xc8, 0x2c, 0x0e, 0x81, 0x28, 0xc2, 0x0d, 0x10, 0x08, 0xe3,
+ 0x58, 0x9b, 0x08, 0xe3, 0x50, 0xc4, 0x18, 0x10, 0x08, 0xe3, 0x03, 0x00,
+ 0xee, 0x32, 0xc2, 0x22, 0xcc, 0x08, 0xe2, 0xfa, 0x00, 0xee, 0x38, 0x0b,
+ 0xc0, 0xee, 0x3e, 0x11, 0x40, 0xee, 0x4a, 0x0a, 0xc0, 0xee, 0x56, 0x19,
+ 0xc0, 0xee, 0x62, 0xc2, 0x00, 0xc4, 0x08, 0xe3, 0x18, 0xc4, 0x26, 0x78,
+ 0x08, 0xe2, 0xc9, 0xc5, 0x06, 0xdb, 0x08, 0xe2, 0xc1, 0x15, 0xc0, 0xee,
+ 0x6c, 0x08, 0xc0, 0xee, 0x78, 0x16, 0xc0, 0xee, 0x84, 0xc3, 0x05, 0x14,
+ 0x08, 0xe2, 0x89, 0xc4, 0x15, 0xe7, 0x08, 0xe2, 0x80, 0xc7, 0x7a, 0x7f,
+ 0x08, 0xe2, 0x01, 0xc7, 0x14, 0x39, 0x08, 0xe1, 0xe8, 0xc4, 0x1e, 0x97,
+ 0x08, 0xe1, 0xf9, 0xc5, 0x40, 0xe7, 0x08, 0xe1, 0xf0, 0x97, 0x08, 0xe1,
+ 0xd9, 0x8b, 0x08, 0xe1, 0xc9, 0x83, 0x08, 0xe1, 0x78, 0x8e, 0x08, 0xe1,
+ 0xb1, 0x94, 0x08, 0xe1, 0xa2, 0x00, 0xee, 0x90, 0x97, 0x08, 0xe1, 0x98,
+ 0x8b, 0x08, 0xe1, 0x88, 0x83, 0x08, 0xe1, 0x69, 0xc2, 0x0d, 0xf6, 0x08,
+ 0xe1, 0x61, 0xc2, 0x00, 0xd0, 0x08, 0xe1, 0x58, 0x83, 0x08, 0xe1, 0x51,
+ 0x47, 0xb2, 0x2e, 0x40, 0xee, 0x94, 0xc2, 0x00, 0xd0, 0x08, 0xe1, 0x29,
+ 0x83, 0x08, 0xe1, 0x20, 0xc2, 0x00, 0xd0, 0x08, 0xe1, 0x19, 0x83, 0x08,
+ 0xe1, 0x10, 0x83, 0x08, 0xe1, 0x09, 0xc2, 0x00, 0xc1, 0x08, 0xe0, 0xe1,
+ 0xc2, 0x19, 0x2c, 0x08, 0xe0, 0xb9, 0xc2, 0x01, 0x30, 0x08, 0xe0, 0x90,
+ 0xc2, 0x00, 0xd0, 0x08, 0xe1, 0x01, 0x83, 0x08, 0xe0, 0xf9, 0x06, 0x40,
+ 0xee, 0x9f, 0xc2, 0x00, 0xd0, 0x08, 0xe0, 0xf1, 0x83, 0x08, 0xe0, 0xe9,
+ 0x16, 0x40, 0xee, 0xa9, 0xc2, 0x00, 0xd0, 0x08, 0xe0, 0xb1, 0x83, 0x08,
+ 0xe0, 0xa8, 0xc2, 0x00, 0xd0, 0x08, 0xe0, 0xa1, 0x83, 0x08, 0xe0, 0x98,
+ 0xc2, 0x00, 0xd0, 0x08, 0xe0, 0x89, 0x83, 0x08, 0xe0, 0x80, 0xc2, 0x00,
+ 0xd0, 0x08, 0xe0, 0x79, 0x83, 0x08, 0xe0, 0x70, 0x97, 0x08, 0xe0, 0x69,
+ 0x8b, 0x08, 0xe0, 0x59, 0x83, 0x08, 0xe0, 0x08, 0x97, 0x08, 0xe0, 0x28,
+ 0x8b, 0x08, 0xe0, 0x18, 0x45, 0x00, 0x49, 0xc0, 0xee, 0xb3, 0x46, 0x00,
+ 0x2c, 0xc0, 0xee, 0xd9, 0x16, 0xc0, 0xef, 0x01, 0xce, 0x6b, 0x9c, 0x01,
+ 0x38, 0x19, 0x45, 0x01, 0xce, 0xc0, 0xef, 0x0d, 0xd3, 0x3f, 0xe2, 0x01,
+ 0x2c, 0x39, 0xd2, 0x4a, 0x75, 0x01, 0x2c, 0x29, 0x44, 0x05, 0x14, 0x40,
+ 0xef, 0x25, 0x04, 0xc0, 0xef, 0x31, 0xc8, 0x0a, 0xff, 0x01, 0x02, 0x71,
+ 0xc4, 0x02, 0x6d, 0x00, 0x02, 0xf9, 0xc6, 0x4a, 0x9f, 0x01, 0x72, 0x3b,
+ 0x00, 0xef, 0x3d, 0xdb, 0x18, 0x1e, 0x01, 0x80, 0xf8, 0x46, 0x01, 0x4a,
+ 0xc0, 0xef, 0x43, 0xc5, 0x32, 0xbb, 0x01, 0x3e, 0xe8, 0x46, 0x01, 0x4a,
+ 0xc0, 0xef, 0x5b, 0x00, 0x40, 0xef, 0x73, 0xc7, 0x30, 0xf2, 0x01, 0x3e,
+ 0x61, 0x47, 0xc3, 0x14, 0xc0, 0xef, 0x7f, 0xc3, 0x17, 0x99, 0x0f, 0xd4,
+ 0xc0, 0x00, 0x40, 0xef, 0x85, 0x46, 0x00, 0x8b, 0x40, 0xef, 0x91, 0xc4,
+ 0x15, 0xe7, 0x00, 0x00, 0x79, 0xc3, 0x05, 0x14, 0x00, 0x00, 0x70, 0x03,
+ 0xc0, 0xef, 0xa9, 0x42, 0x00, 0xd0, 0xc0, 0xef, 0xb1, 0x14, 0xc0, 0xef,
+ 0xbd, 0xc8, 0x6e, 0xdc, 0x01, 0x3e, 0xe1, 0x11, 0xc0, 0xef, 0xc9, 0x15,
+ 0xc0, 0xef, 0xd5, 0x05, 0xc0, 0xef, 0xf8, 0x16, 0xc0, 0xf0, 0x13, 0x08,
+ 0xc0, 0xf0, 0x27, 0x4a, 0x07, 0xbb, 0xc0, 0xf0, 0x31, 0xcb, 0x1a, 0x50,
+ 0x00, 0x01, 0x43, 0x00, 0xf0, 0x3d, 0xe0, 0x05, 0xa7, 0x01, 0x16, 0x49,
+ 0x42, 0x00, 0x58, 0xc0, 0xf0, 0x41, 0x19, 0xc0, 0xf0, 0x4d, 0x04, 0xc0,
+ 0xf0, 0x5f, 0x0e, 0x40, 0xf0, 0x6b, 0x19, 0xc0, 0xf0, 0x77, 0x16, 0xc0,
+ 0xf0, 0x86, 0xd0, 0x58, 0x62, 0x0f, 0xc1, 0xe1, 0xc5, 0x01, 0xa2, 0x01,
+ 0x0c, 0x83, 0x00, 0xf0, 0x98, 0x14, 0xc0, 0xf0, 0xa2, 0xd1, 0x55, 0x30,
+ 0x01, 0x0f, 0xe9, 0x06, 0xc0, 0xf0, 0xae, 0x15, 0xc0, 0xf0, 0xba, 0x0a,
+ 0xc0, 0xf0, 0xc6, 0xcd, 0x7c, 0xa8, 0x01, 0x0e, 0x39, 0x04, 0xc0, 0xf0,
+ 0xd0, 0xcf, 0x61, 0x4d, 0x01, 0x5a, 0x29, 0x08, 0xc0, 0xf0, 0xe2, 0xd7,
+ 0x26, 0xbc, 0x0f, 0xc5, 0x20, 0x49, 0x01, 0xaa, 0xc0, 0xf0, 0xee, 0x15,
+ 0xc0, 0xf1, 0x06, 0xdb, 0x16, 0x1d, 0x01, 0x37, 0x31, 0x49, 0x3c, 0xe1,
+ 0xc0, 0xf1, 0x12, 0x47, 0x55, 0x85, 0x40, 0xf1, 0x2a, 0xca, 0x37, 0x4e,
+ 0x01, 0x17, 0x31, 0xc5, 0x07, 0x62, 0x01, 0x13, 0x40, 0xc3, 0x02, 0xa3,
+ 0x01, 0x16, 0xb1, 0xcd, 0x78, 0x30, 0x01, 0x53, 0xc9, 0xd3, 0x43, 0x39,
+ 0x01, 0x53, 0xd8, 0x42, 0x00, 0x2a, 0xc0, 0xf1, 0x3f, 0xcc, 0x88, 0x7d,
+ 0x01, 0x13, 0x30, 0x45, 0x00, 0xd5, 0xc0, 0xf1, 0x5a, 0x43, 0x02, 0x9c,
+ 0x40, 0xf1, 0x70, 0xd4, 0x00, 0xd3, 0x01, 0x55, 0x40, 0x06, 0xc0, 0xf1,
+ 0x7c, 0x16, 0xc0, 0xf1, 0x8c, 0x83, 0x00, 0xe1, 0x19, 0xc2, 0x01, 0x4a,
+ 0x00, 0xe1, 0x11, 0x15, 0xc0, 0xf1, 0x9e, 0xc2, 0x02, 0x41, 0x00, 0xe0,
+ 0xf9, 0x0a, 0xc0, 0xf1, 0xa8, 0xc2, 0x00, 0xdb, 0x00, 0xe0, 0xe1, 0xc2,
+ 0x00, 0x39, 0x00, 0xe0, 0xd9, 0xc2, 0x19, 0x2c, 0x00, 0xe0, 0xd1, 0x0f,
+ 0xc0, 0xf1, 0xb2, 0x04, 0xc0, 0xf1, 0xbc, 0x08, 0xc0, 0xf1, 0xc6, 0x12,
+ 0xc0, 0xf1, 0xd0, 0x10, 0xc0, 0xf1, 0xe0, 0xc2, 0x25, 0x3b, 0x00, 0xe0,
+ 0x41, 0x05, 0xc0, 0xf1, 0xf0, 0x09, 0xc0, 0xf1, 0xfa, 0x0d, 0x40, 0xf2,
+ 0x04, 0xc4, 0x26, 0x78, 0x00, 0xe2, 0x49, 0xc5, 0x06, 0xdb, 0x00, 0xe2,
+ 0x41, 0x15, 0xc0, 0xf2, 0x14, 0x08, 0xc0, 0xf2, 0x20, 0x16, 0xc0, 0xf2,
+ 0x2c, 0xc3, 0x05, 0x14, 0x00, 0xe2, 0x09, 0xc4, 0x15, 0xe7, 0x00, 0xe2,
+ 0x00, 0x16, 0xc0, 0xf2, 0x38, 0xc6, 0xc0, 0x98, 0x00, 0xe1, 0xe9, 0xd2,
+ 0x4e, 0x0b, 0x00, 0xe1, 0xe0, 0x44, 0x00, 0xbb, 0xc0, 0xf2, 0x47, 0x50,
+ 0x5c, 0xf2, 0x40, 0xf2, 0x53, 0x8d, 0x00, 0xe1, 0x6b, 0x00, 0xf2, 0x5f,
+ 0x90, 0x00, 0xe1, 0x83, 0x00, 0xf2, 0x65, 0x96, 0x00, 0xe1, 0x99, 0x94,
+ 0x00, 0xe1, 0x91, 0x92, 0x00, 0xe1, 0x89, 0x8e, 0x00, 0xe1, 0x79, 0x8f,
+ 0x00, 0xe1, 0x70, 0x87, 0x00, 0xe1, 0x61, 0x97, 0x00, 0xe1, 0x53, 0x00,
+ 0xf2, 0x6b, 0x91, 0x00, 0xe1, 0x43, 0x00, 0xf2, 0x6f, 0x8b, 0x00, 0xe1,
+ 0x39, 0xc2, 0x04, 0xc6, 0x00, 0xe1, 0x30, 0x00, 0xc0, 0xf2, 0x73, 0xc4,
+ 0x03, 0x0e, 0x01, 0x30, 0x3a, 0x00, 0xf2, 0xa7, 0x1b, 0xc0, 0xf2, 0xb0,
+ 0xc2, 0x01, 0x5d, 0x05, 0x26, 0x81, 0x12, 0xc0, 0xf2, 0xba, 0x06, 0xc0,
+ 0xf2, 0xc4, 0x16, 0xc0, 0xf2, 0xce, 0x09, 0xc0, 0xf2, 0xe2, 0x0d, 0xc0,
+ 0xf2, 0xec, 0xc2, 0x25, 0x3b, 0x05, 0x26, 0xc9, 0x05, 0xc0, 0xf2, 0xf6,
+ 0xc2, 0x01, 0xc3, 0x05, 0x26, 0xf9, 0x10, 0xc0, 0xf3, 0x00, 0xc2, 0x00,
+ 0xdb, 0x05, 0x27, 0x09, 0x15, 0xc0, 0xf3, 0x0a, 0x1c, 0xc0, 0xf3, 0x14,
+ 0x0a, 0xc0, 0xf3, 0x1e, 0xc2, 0x8d, 0x8f, 0x05, 0x27, 0x39, 0xc2, 0x00,
+ 0x87, 0x05, 0x27, 0x49, 0xc2, 0x01, 0x4a, 0x05, 0x27, 0x51, 0x83, 0x05,
+ 0x27, 0x73, 0x00, 0xf3, 0x28, 0x87, 0x05, 0x27, 0x83, 0x00, 0xf3, 0x2c,
+ 0x8b, 0x05, 0x27, 0x91, 0x91, 0x05, 0x27, 0x9b, 0x00, 0xf3, 0x30, 0x97,
+ 0x05, 0x27, 0xa2, 0x00, 0xf3, 0x34, 0xc5, 0x0a, 0x8a, 0x05, 0x27, 0xf1,
+ 0xc9, 0x11, 0xf6, 0x05, 0x27, 0xf8, 0x00, 0xc0, 0xf3, 0x3c, 0x43, 0x02,
+ 0xe8, 0x40, 0xf3, 0x57, 0xcd, 0x7b, 0xd8, 0x0f, 0xac, 0x39, 0xc7, 0x00,
+ 0x90, 0x0f, 0xa8, 0xb8, 0x46, 0x09, 0x97, 0xc0, 0xf3, 0x63, 0xcd, 0x2c,
+ 0xb2, 0x00, 0xca, 0x29, 0xd0, 0x0f, 0x09, 0x00, 0xca, 0x21, 0x15, 0xc0,
+ 0xf3, 0x87, 0x45, 0x34, 0x6f, 0xc0, 0xf3, 0x99, 0x47, 0x02, 0x0e, 0x40,
+ 0xf3, 0xa5, 0x85, 0x08, 0x49, 0xc9, 0x90, 0x08, 0x49, 0x5b, 0x00, 0xf3,
+ 0xf4, 0x8e, 0x08, 0x49, 0x4b, 0x00, 0xf3, 0xf8, 0x87, 0x08, 0x49, 0x23,
+ 0x00, 0xf3, 0xfc, 0x83, 0x08, 0x49, 0x03, 0x00, 0xf4, 0x00, 0x96, 0x08,
+ 0x49, 0x7b, 0x00, 0xf4, 0x04, 0x95, 0x08, 0x49, 0x9b, 0x00, 0xf4, 0x08,
+ 0x93, 0x08, 0x49, 0x91, 0x88, 0x08, 0x49, 0x89, 0x97, 0x08, 0x49, 0x81,
+ 0x94, 0x08, 0x49, 0x69, 0x91, 0x08, 0x49, 0x61, 0x8f, 0x08, 0x49, 0x51,
+ 0x8d, 0x08, 0x49, 0x41, 0x9b, 0x08, 0x49, 0x39, 0x8b, 0x08, 0x49, 0x31,
+ 0x98, 0x08, 0x49, 0x29, 0x86, 0x08, 0x49, 0x19, 0x89, 0x08, 0x49, 0x11,
+ 0x84, 0x08, 0x49, 0x08, 0x90, 0x08, 0x14, 0xc8, 0x90, 0x08, 0x14, 0xd0,
+ 0x8a, 0x08, 0x14, 0x18, 0x8a, 0x08, 0x14, 0x49, 0x96, 0x08, 0x14, 0xc0,
+ 0x8d, 0x08, 0x14, 0xa0, 0x8f, 0x08, 0x14, 0x80, 0x90, 0x08, 0x14, 0x88,
+ 0x00, 0xc0, 0xf4, 0x0c, 0xc6, 0xc1, 0xfd, 0x01, 0x55, 0x5a, 0x00, 0xf4,
+ 0x48, 0x45, 0x03, 0x14, 0xc0, 0xf4, 0x4e, 0x56, 0x2c, 0xde, 0x40, 0xf4,
+ 0x58, 0x15, 0xc0, 0xf4, 0x9f, 0xd5, 0x32, 0xd5, 0x00, 0x14, 0xb3, 0x00,
+ 0xf4, 0xb4, 0x42, 0x01, 0x19, 0xc0, 0xf4, 0xba, 0x03, 0xc0, 0xf4, 0xc9,
+ 0xd8, 0x21, 0x0b, 0x00, 0xe9, 0x21, 0xcc, 0x23, 0x33, 0x00, 0x14, 0xa3,
+ 0x00, 0xf4, 0xd5, 0xdb, 0x17, 0xb2, 0x00, 0x14, 0xa9, 0x42, 0x01, 0x2d,
+ 0xc0, 0xf4, 0xdb, 0xc2, 0x1d, 0xc1, 0x00, 0x0d, 0x31, 0xcf, 0x65, 0xfd,
+ 0x00, 0x0d, 0xd9, 0xc4, 0x95, 0x50, 0x00, 0x0d, 0xf9, 0xcc, 0x83, 0xe5,
+ 0x00, 0x0e, 0x01, 0xcd, 0x79, 0x0d, 0x00, 0x0e, 0x08, 0xc4, 0x0d, 0x21,
+ 0x01, 0x38, 0xe9, 0x48, 0x0b, 0x18, 0x40, 0xf4, 0xe7, 0xca, 0xa6, 0xe8,
+ 0x05, 0x3f, 0xb9, 0x49, 0x11, 0x74, 0xc0, 0xf4, 0xf3, 0x0b, 0xc0, 0xf4,
+ 0xfb, 0xc9, 0xa8, 0x9d, 0x05, 0x3f, 0xf8, 0xc9, 0xb2, 0xa2, 0x0f, 0x98,
+ 0xe1, 0xc6, 0x00, 0x91, 0x0f, 0x98, 0xb8, 0x0d, 0xc0, 0xf5, 0x07, 0x12,
+ 0xc0, 0xf5, 0x0f, 0x10, 0xc0, 0xf5, 0x1f, 0xc2, 0x00, 0x99, 0x00, 0x74,
+ 0x41, 0x15, 0xc0, 0xf5, 0x2f, 0xc2, 0x00, 0x58, 0x00, 0x74, 0xa1, 0x16,
+ 0xc0, 0xf5, 0x3b, 0xc2, 0x00, 0x6b, 0x00, 0x74, 0xd1, 0x43, 0xc9, 0xe0,
+ 0xc0, 0xf5, 0x45, 0xc2, 0x00, 0xa2, 0x00, 0x75, 0x09, 0xc2, 0x42, 0xcd,
+ 0x00, 0x75, 0x11, 0xc2, 0x00, 0x79, 0x00, 0x75, 0x19, 0xc2, 0x01, 0xc8,
+ 0x00, 0x75, 0x2b, 0x00, 0xf5, 0x55, 0xc2, 0x02, 0xa0, 0x00, 0x75, 0x39,
+ 0x43, 0x60, 0xe8, 0xc0, 0xf5, 0x5b, 0x91, 0x00, 0x75, 0x68, 0x83, 0x00,
+ 0x75, 0x83, 0x00, 0xf5, 0x67, 0x45, 0xdb, 0x96, 0xc0, 0xf5, 0x77, 0x8b,
+ 0x00, 0x75, 0xa3, 0x00, 0xf5, 0x83, 0x9b, 0x00, 0x75, 0xb3, 0x00, 0xf5,
+ 0x87, 0x97, 0x00, 0x75, 0xc3, 0x00, 0xf5, 0x8b, 0x87, 0x00, 0x76, 0x03,
+ 0x00, 0xf5, 0x8f, 0x91, 0x00, 0x76, 0x10, 0xcf, 0x67, 0xfb, 0x00, 0x75,
+ 0xd1, 0x4e, 0x6f, 0xc4, 0x40, 0xf5, 0x93, 0xc2, 0x13, 0x4c, 0x00, 0x76,
+ 0x41, 0x16, 0xc0, 0xf5, 0x9f, 0xc6, 0xcd, 0x31, 0x00, 0x76, 0x58, 0xc4,
+ 0x15, 0xe7, 0x00, 0x76, 0x81, 0xc3, 0x05, 0x14, 0x00, 0x76, 0x89, 0x16,
+ 0xc0, 0xf5, 0xa9, 0x08, 0xc0, 0xf5, 0xb5, 0x15, 0xc0, 0xf5, 0xc1, 0xc5,
+ 0x06, 0xdb, 0x00, 0x76, 0xc1, 0xc4, 0x26, 0x78, 0x00, 0x76, 0xc8, 0xc2,
+ 0x00, 0x10, 0x00, 0x76, 0xe1, 0xc2, 0x00, 0xa2, 0x00, 0x76, 0xe8, 0x16,
+ 0xc0, 0xf5, 0xcd, 0x4f, 0x60, 0x6c, 0xc0, 0xf5, 0xd9, 0x4f, 0x01, 0xf3,
+ 0xc0, 0xf5, 0xe5, 0xda, 0x1a, 0x7e, 0x01, 0x3a, 0x81, 0xc6, 0xcd, 0x8b,
+ 0x01, 0x38, 0x81, 0xd5, 0x37, 0x6d, 0x01, 0x2e, 0xe9, 0x43, 0x05, 0xb2,
+ 0x40, 0xf5, 0xf1, 0x16, 0xc0, 0xf5, 0xf7, 0x4f, 0x60, 0x6c, 0xc0, 0xf6,
+ 0x03, 0xcf, 0x68, 0x37, 0x01, 0x3e, 0xa1, 0xd5, 0x37, 0x6d, 0x01, 0x2e,
+ 0xe1, 0x44, 0x20, 0xe8, 0x40, 0xf6, 0x0f, 0x0e, 0xc0, 0xf6, 0x15, 0x4f,
+ 0x2c, 0x4a, 0x40, 0xf6, 0x21, 0x48, 0x01, 0xd3, 0xc0, 0xf6, 0x27, 0xc5,
+ 0x06, 0xe2, 0x01, 0x2c, 0x03, 0x00, 0xf6, 0x31, 0xc6, 0x02, 0xd1, 0x01,
+ 0x2f, 0x01, 0xcc, 0x01, 0xdb, 0x0f, 0xdc, 0x70, 0xcc, 0x06, 0xdb, 0x01,
+ 0x2c, 0xa1, 0xcd, 0x15, 0x02, 0x0f, 0xdc, 0x10, 0xdb, 0x14, 0xf4, 0x0f,
+ 0xdb, 0x69, 0x45, 0x02, 0xde, 0x40, 0xf6, 0x37, 0xc5, 0x01, 0xa2, 0x01,
+ 0x0f, 0x3b, 0x00, 0xf6, 0x43, 0xcc, 0x82, 0x35, 0x01, 0x0f, 0x72, 0x00,
+ 0xf6, 0x47, 0x42, 0x00, 0x2c, 0xc0, 0xf6, 0x4d, 0x42, 0x02, 0xa0, 0x40,
+ 0xf6, 0x59, 0xcf, 0x5b, 0xc3, 0x0f, 0xc2, 0x89, 0xcc, 0x88, 0xdd, 0x0f,
+ 0xc1, 0xc8, 0xc4, 0x01, 0xa3, 0x01, 0x0c, 0x8b, 0x00, 0xf6, 0x65, 0xc5,
+ 0xdb, 0x50, 0x01, 0x70, 0xa8, 0xcb, 0x82, 0xba, 0x01, 0x0f, 0x09, 0xcb,
+ 0x82, 0x36, 0x01, 0x0e, 0x88, 0x51, 0x01, 0x51, 0xc0, 0xf6, 0x69, 0x45,
+ 0x11, 0x3a, 0x40, 0xf6, 0x75, 0xc5, 0x01, 0xa2, 0x01, 0x58, 0x31, 0xd3,
+ 0x43, 0xe4, 0x01, 0x5c, 0x48, 0xc8, 0x2e, 0x20, 0x0f, 0xb7, 0x41, 0xcc,
+ 0x4e, 0x35, 0x0f, 0xa9, 0xe0, 0xd0, 0x5d, 0x52, 0x01, 0x2f, 0x71, 0xcf,
+ 0x66, 0x66, 0x01, 0x2f, 0x68, 0xd2, 0x4c, 0xd9, 0x01, 0x3e, 0xf8, 0xc4,
+ 0x01, 0x9b, 0x01, 0x18, 0x1b, 0x00, 0xf6, 0x81, 0xcf, 0x6a, 0xda, 0x01,
+ 0x4d, 0xe8, 0xcb, 0x01, 0xfc, 0x01, 0x0f, 0x99, 0xcc, 0x82, 0x35, 0x01,
+ 0x0e, 0xa9, 0xc5, 0x01, 0xa2, 0x01, 0x0c, 0xab, 0x00, 0xf6, 0x85, 0xcb,
+ 0x94, 0x22, 0x01, 0x58, 0x69, 0xd5, 0x01, 0x92, 0x01, 0x5b, 0x29, 0xd0,
+ 0x5b, 0xc2, 0x0f, 0xc2, 0xc8, 0x4f, 0x66, 0x48, 0xc0, 0xf6, 0x8b, 0x50,
+ 0x5c, 0xd2, 0x40, 0xf6, 0x97, 0x00, 0x40, 0xf6, 0xa3, 0xca, 0x1b, 0x09,
+ 0x00, 0x00, 0xf9, 0xc9, 0x6b, 0xaf, 0x01, 0x5f, 0xd0, 0xc3, 0xa1, 0xa2,
+ 0x08, 0x1c, 0x01, 0xc2, 0x00, 0x74, 0x08, 0x1c, 0x98, 0xc4, 0xe2, 0x57,
+ 0x08, 0x1c, 0x11, 0xc4, 0x92, 0x76, 0x08, 0x1c, 0xc8, 0xc2, 0x00, 0xd0,
+ 0x08, 0x1c, 0x19, 0xc2, 0x0f, 0x9b, 0x08, 0x1c, 0x58, 0xc4, 0xdb, 0x4c,
+ 0x08, 0x1c, 0x21, 0xc3, 0x01, 0xce, 0x08, 0x1c, 0x78, 0xc2, 0x01, 0x6f,
+ 0x08, 0x1c, 0x40, 0xc3, 0x04, 0x87, 0x08, 0x1c, 0x39, 0x97, 0x08, 0x1c,
+ 0x88, 0xc2, 0x00, 0x3d, 0x08, 0x1c, 0x49, 0xc5, 0xd6, 0xaf, 0x08, 0x1c,
+ 0xc1, 0x91, 0x08, 0x1c, 0xd0, 0xc3, 0x11, 0xef, 0x08, 0x1c, 0x61, 0x03,
+ 0xc0, 0xf6, 0xb5, 0xc2, 0x06, 0x62, 0x08, 0x1c, 0xe8, 0x0a, 0xc0, 0xf6,
+ 0xc1, 0x07, 0xc0, 0xf6, 0xcd, 0x19, 0xc0, 0xf6, 0xdf, 0x15, 0xc0, 0xf6,
+ 0xf1, 0x46, 0x06, 0x1d, 0xc0, 0xf7, 0x0b, 0x0e, 0xc0, 0xf7, 0x17, 0x16,
+ 0xc0, 0xf7, 0x2d, 0x04, 0xc0, 0xf7, 0x3f, 0x42, 0x02, 0xae, 0xc0, 0xf7,
+ 0x4b, 0x05, 0xc0, 0xf7, 0x57, 0x06, 0xc0, 0xf7, 0x6c, 0x14, 0xc0, 0xf7,
+ 0x7c, 0x0f, 0xc0, 0xf7, 0x88, 0xc9, 0x60, 0xf3, 0x01, 0x3c, 0xa9, 0xcc,
+ 0x07, 0xbb, 0x01, 0x3a, 0xd1, 0x03, 0xc0, 0xf7, 0x94, 0x11, 0xc0, 0xf7,
+ 0xa6, 0x08, 0xc0, 0xf7, 0xb8, 0xcb, 0x58, 0xc7, 0x01, 0x38, 0xd1, 0xd4,
+ 0x10, 0xc9, 0x0f, 0xb3, 0xc8, 0xc5, 0xaf, 0x07, 0x0f, 0xd5, 0x33, 0x00,
+ 0xf7, 0xc4, 0xc5, 0x36, 0xb7, 0x0f, 0x9d, 0x38, 0x42, 0x00, 0x30, 0xc0,
+ 0xf7, 0xca, 0xcf, 0x6b, 0x34, 0x0f, 0xb2, 0x48, 0xd3, 0x43, 0x85, 0x01,
+ 0x36, 0x89, 0xc7, 0x00, 0x90, 0x01, 0x1c, 0x40, 0x42, 0x36, 0xa2, 0xc0,
+ 0xf7, 0xdc, 0x42, 0x2f, 0xf9, 0xc0, 0xf7, 0xf4, 0x42, 0x14, 0x7d, 0xc0,
+ 0xf8, 0x10, 0x42, 0x28, 0x5b, 0xc0, 0xf8, 0x20, 0x42, 0x01, 0x99, 0x40,
+ 0xf8, 0x38, 0x42, 0x28, 0x5b, 0xc0, 0xf8, 0x48, 0x42, 0x01, 0x99, 0xc0,
+ 0xf8, 0x68, 0x42, 0x36, 0xa2, 0xc0, 0xf8, 0x84, 0x42, 0x2f, 0xf9, 0xc0,
+ 0xf8, 0x90, 0x42, 0x14, 0x7d, 0x40, 0xf8, 0xac, 0x42, 0x28, 0x5b, 0xc0,
+ 0xf8, 0xd3, 0x42, 0x01, 0x99, 0xc0, 0xf8, 0xe7, 0x42, 0x36, 0xa2, 0xc0,
+ 0xf9, 0x05, 0x42, 0x2f, 0xf9, 0xc0, 0xf9, 0x11, 0x42, 0x14, 0x7d, 0xc0,
+ 0xf9, 0x33, 0x47, 0xc1, 0x15, 0x40, 0xf9, 0x57, 0x42, 0x28, 0x5b, 0xc0,
+ 0xf9, 0x5f, 0x42, 0x01, 0x99, 0xc0, 0xf9, 0x71, 0x42, 0x36, 0xa2, 0xc0,
+ 0xf9, 0x89, 0x42, 0x2f, 0xf9, 0xc0, 0xf9, 0xa5, 0x42, 0x14, 0x7d, 0x40,
+ 0xf9, 0xc5, 0xa0, 0x0d, 0x80, 0xb1, 0x9f, 0x0d, 0x80, 0xa9, 0x9e, 0x0d,
+ 0x80, 0xa0, 0xa3, 0x0d, 0x80, 0x99, 0xa2, 0x0d, 0x80, 0x91, 0xa1, 0x0d,
+ 0x80, 0x89, 0xa0, 0x0d, 0x80, 0x81, 0x9f, 0x0d, 0x80, 0x79, 0x9e, 0x0d,
+ 0x80, 0x08, 0xa2, 0x0d, 0x80, 0x71, 0xa1, 0x0d, 0x80, 0x69, 0xa0, 0x0d,
+ 0x80, 0x61, 0x9f, 0x0d, 0x80, 0x59, 0x9e, 0x0d, 0x80, 0x50, 0xa1, 0x0d,
+ 0x80, 0x49, 0xa0, 0x0d, 0x80, 0x41, 0x9f, 0x0d, 0x80, 0x39, 0x9e, 0x0d,
+ 0x80, 0x30, 0xc2, 0x02, 0xa0, 0x0d, 0x80, 0x29, 0xa0, 0x0d, 0x80, 0x21,
+ 0x9f, 0x0d, 0x80, 0x19, 0x9e, 0x0d, 0x80, 0x10, 0x42, 0x28, 0x5b, 0xc0,
+ 0xf9, 0xf2, 0x42, 0x01, 0x99, 0xc0, 0xfa, 0x0e, 0x42, 0x2f, 0xf9, 0xc0,
+ 0xfa, 0x1e, 0x42, 0x14, 0x7d, 0x40, 0xfa, 0x32, 0x42, 0x14, 0x7d, 0xc0,
+ 0xfa, 0x46, 0x42, 0x36, 0xa2, 0xc0, 0xfa, 0x60, 0x42, 0x28, 0x5b, 0x40,
+ 0xfa, 0x70, 0x42, 0x28, 0x5b, 0xc0, 0xfa, 0x88, 0x42, 0x01, 0x99, 0xc0,
+ 0xfa, 0xa0, 0x42, 0x36, 0xa2, 0xc0, 0xfa, 0xae, 0x42, 0x2f, 0xf9, 0xc0,
+ 0xfa, 0xbe, 0x42, 0x14, 0x7d, 0x40, 0xfa, 0xda, 0x42, 0x28, 0x5b, 0xc0,
+ 0xfa, 0xf6, 0x42, 0x01, 0x99, 0xc0, 0xfb, 0x14, 0x42, 0x2f, 0xf9, 0xc0,
+ 0xfb, 0x38, 0x42, 0x14, 0x7d, 0xc0, 0xfb, 0x54, 0x42, 0x36, 0xa2, 0x40,
+ 0xfb, 0x64, 0x42, 0x28, 0x5b, 0xc0, 0xfb, 0x7a, 0x42, 0x01, 0x99, 0xc0,
+ 0xfb, 0x96, 0x42, 0x36, 0xa2, 0xc0, 0xfb, 0xaa, 0x42, 0x2f, 0xf9, 0xc0,
+ 0xfb, 0xca, 0x42, 0x14, 0x7d, 0x40, 0xfb, 0xe2, 0x48, 0x19, 0x9b, 0xc0,
+ 0xfc, 0x02, 0x46, 0x02, 0x0f, 0x40, 0xfc, 0x0e, 0x45, 0x12, 0x5c, 0xc0,
+ 0xfc, 0xa4, 0x4b, 0x11, 0xe3, 0x40, 0xfc, 0xd4, 0xc9, 0xaa, 0x3b, 0x00,
+ 0x2e, 0x29, 0xc9, 0xb0, 0xbc, 0x00, 0x2e, 0x21, 0xcd, 0x79, 0x00, 0x00,
+ 0x2d, 0x78, 0x1c, 0xc0, 0xfc, 0xf2, 0x06, 0xc0, 0xfc, 0xfc, 0xc4, 0xe1,
+ 0x1b, 0x00, 0x2d, 0x61, 0xc3, 0x11, 0x14, 0x00, 0x2d, 0x59, 0x42, 0x0c,
+ 0x43, 0xc0, 0xfd, 0x08, 0x16, 0xc0, 0xfd, 0x14, 0x42, 0x0f, 0x9a, 0xc0,
+ 0xfd, 0x1e, 0xcc, 0x89, 0x6d, 0x00, 0x2d, 0x11, 0x42, 0x00, 0xb0, 0xc0,
+ 0xfd, 0x2a, 0xc5, 0x48, 0x14, 0x00, 0x2c, 0xb9, 0x15, 0xc0, 0xfd, 0x36,
+ 0xc7, 0xc9, 0xf1, 0x00, 0x2c, 0x89, 0x43, 0x09, 0x3b, 0xc0, 0xfd, 0x42,
+ 0x0f, 0x40, 0xfd, 0x51, 0x43, 0x01, 0x7f, 0xc0, 0xfd, 0x66, 0xc7, 0x0c,
+ 0x96, 0x02, 0x6e, 0x48, 0x0b, 0xc0, 0xfd, 0x96, 0xc7, 0xc7, 0xe4, 0x02,
+ 0x6e, 0xf9, 0xd5, 0x35, 0xc9, 0x02, 0x6f, 0x19, 0x07, 0x40, 0xfd, 0xa2,
+ 0xc6, 0x78, 0x44, 0x02, 0x6e, 0x21, 0xd2, 0x49, 0xd3, 0x02, 0x6e, 0x88,
+ 0x10, 0xc0, 0xfd, 0xb4, 0xcc, 0x84, 0x39, 0x02, 0x6f, 0x58, 0x45, 0x03,
+ 0x14, 0xc0, 0xfd, 0xc0, 0xc9, 0xaf, 0x54, 0x02, 0x6e, 0x59, 0xce, 0x6e,
+ 0x82, 0x02, 0x6e, 0xb0, 0xc4, 0x12, 0x38, 0x02, 0x6e, 0x51, 0xc7, 0xc9,
+ 0x18, 0x02, 0x6f, 0x11, 0xcd, 0x7e, 0x7c, 0x02, 0x6f, 0x68, 0xc9, 0xb4,
+ 0xfd, 0x02, 0x6e, 0x61, 0xc8, 0xb6, 0x0a, 0x02, 0x6e, 0x80, 0x14, 0xc0,
+ 0xfd, 0xcc, 0xd1, 0x55, 0xc9, 0x02, 0x6f, 0x60, 0xc5, 0xdb, 0x82, 0x02,
+ 0x6e, 0x71, 0xcb, 0x93, 0xbf, 0x02, 0x6e, 0xd0, 0xc7, 0xc9, 0x73, 0x02,
+ 0x6e, 0x91, 0xc8, 0xb6, 0x12, 0x02, 0x6f, 0xb1, 0xcf, 0x63, 0xb4, 0x02,
+ 0x6f, 0xf0, 0xcd, 0x77, 0x12, 0x02, 0x6e, 0xa1, 0xcb, 0x98, 0x79, 0x02,
+ 0x6f, 0x51, 0xd0, 0x5e, 0x72, 0x02, 0x6f, 0xf8, 0x16, 0xc0, 0xfd, 0xd8,
+ 0xc8, 0xba, 0x72, 0x02, 0x6f, 0x80, 0x10, 0xc0, 0xfd, 0xe4, 0xc7, 0xc8,
+ 0x7e, 0x02, 0x6e, 0xf1, 0xc6, 0xcc, 0x17, 0x02, 0x6f, 0x48, 0x42, 0x02,
+ 0xaf, 0xc0, 0xfd, 0xf0, 0xca, 0x9b, 0x30, 0x02, 0x6f, 0x30, 0x51, 0x54,
+ 0x86, 0xc0, 0xfd, 0xfc, 0x04, 0xc0, 0xfe, 0x1a, 0xd5, 0x37, 0x2e, 0x01,
+ 0x35, 0x49, 0x4a, 0xa5, 0x4e, 0xc0, 0xfe, 0x26, 0xce, 0x71, 0x30, 0x01,
+ 0x1d, 0x79, 0xc8, 0x22, 0x83, 0x01, 0x01, 0x31, 0x16, 0x40, 0xfe, 0x36,
+ 0x00, 0x40, 0xfe, 0x42, 0xc7, 0xc1, 0x77, 0x01, 0x33, 0x41, 0xc8, 0xbd,
+ 0xba, 0x01, 0x30, 0xa9, 0xc6, 0xcd, 0x19, 0x0f, 0x99, 0xb1, 0xc3, 0xcd,
+ 0x94, 0x0f, 0x99, 0x68, 0xd2, 0x4a, 0xe1, 0x01, 0x1f, 0x98, 0x00, 0x40,
+ 0xfe, 0x4e, 0xd0, 0x0d, 0xaa, 0x0f, 0xb3, 0x48, 0x83, 0x0f, 0xd5, 0x61,
+ 0xc8, 0xbd, 0xfa, 0x0f, 0xa1, 0xc8, 0x45, 0x02, 0x9a, 0x40, 0xfe, 0x5d,
+ 0x42, 0x01, 0x5d, 0xc0, 0xfe, 0x6f, 0xc5, 0xc4, 0x0a, 0x0f, 0xc8, 0xe9,
+ 0x4c, 0x83, 0x79, 0x40, 0xfe, 0x79, 0x46, 0x09, 0x97, 0xc0, 0xfe, 0x85,
+ 0x45, 0x00, 0xba, 0xc0, 0xfe, 0xa9, 0x45, 0x01, 0xc3, 0xc0, 0xfe, 0xb5,
+ 0x46, 0x34, 0x6f, 0xc0, 0xfe, 0xc1, 0x47, 0x02, 0x0e, 0x40, 0xfe, 0xd5,
+ 0xcd, 0x7a, 0x86, 0x00, 0xb9, 0xa1, 0x4b, 0x6f, 0xc7, 0xc0, 0xff, 0x3f,
+ 0x47, 0x02, 0x0e, 0x40, 0xff, 0x47, 0x43, 0x4e, 0xaf, 0xc0, 0xff, 0xa5,
+ 0x4d, 0x7b, 0xe5, 0x40, 0xff, 0xc7, 0x47, 0x34, 0x2f, 0xc0, 0xff, 0xe5,
+ 0x47, 0x02, 0x0e, 0x40, 0xff, 0xf8, 0xc9, 0x11, 0xf6, 0x07, 0xfb, 0x09,
+ 0xc5, 0x0a, 0x8a, 0x07, 0xfb, 0x20, 0xcf, 0x69, 0x63, 0x07, 0xfb, 0x11,
+ 0xcb, 0x03, 0xbc, 0x07, 0xff, 0x48, 0xcf, 0x69, 0x63, 0x07, 0xfb, 0x19,
+ 0xcb, 0x03, 0xbc, 0x07, 0xff, 0x58, 0x00, 0xc1, 0x00, 0x55, 0xde, 0x0d,
+ 0xd8, 0x07, 0xfb, 0x80, 0xc6, 0x92, 0x0c, 0x07, 0xfd, 0x01, 0x47, 0x02,
+ 0x0e, 0x41, 0x00, 0x6d, 0xcb, 0x90, 0x91, 0x0f, 0xb4, 0x23, 0x01, 0x00,
+ 0xc7, 0xcb, 0x8d, 0xe7, 0x0f, 0xa3, 0x00, 0xcc, 0x80, 0x9d, 0x01, 0x35,
+ 0x09, 0xd1, 0x54, 0xdb, 0x0f, 0xa8, 0x30, 0x83, 0x01, 0x82, 0x13, 0x01,
+ 0x00, 0xcd, 0x15, 0xc1, 0x00, 0xd3, 0x8b, 0x01, 0x82, 0x21, 0x97, 0x01,
+ 0x82, 0x31, 0x87, 0x01, 0x82, 0x41, 0x91, 0x01, 0x82, 0x51, 0x0d, 0xc1,
+ 0x00, 0xed, 0x09, 0xc1, 0x01, 0x01, 0x1c, 0xc1, 0x01, 0x15, 0x16, 0xc1,
+ 0x01, 0x29, 0x06, 0xc1, 0x01, 0x3d, 0x90, 0x01, 0x84, 0x9b, 0x01, 0x01,
+ 0x51, 0x0a, 0xc1, 0x01, 0x65, 0x04, 0xc1, 0x01, 0x79, 0x12, 0xc1, 0x01,
+ 0x8d, 0x0f, 0xc1, 0x01, 0xa1, 0x1b, 0xc1, 0x01, 0xb5, 0x14, 0xc1, 0x01,
+ 0xc1, 0x19, 0xc1, 0x01, 0xd5, 0xc2, 0x5d, 0xb3, 0x01, 0x84, 0xa0, 0x00,
+ 0xc1, 0x01, 0xe5, 0xcb, 0x9a, 0x52, 0x01, 0x01, 0x39, 0xc6, 0x89, 0xd3,
+ 0x00, 0x01, 0x68, 0x43, 0x01, 0xd8, 0xc1, 0x01, 0xf1, 0x44, 0x00, 0xde,
+ 0x41, 0x02, 0x0f, 0xc4, 0x25, 0xd5, 0x01, 0x03, 0x21, 0xc9, 0x1b, 0x0a,
+ 0x01, 0x03, 0x19, 0xc5, 0x03, 0x4d, 0x01, 0x03, 0x10, 0xcf, 0x67, 0x29,
+ 0x0f, 0xa9, 0x01, 0xc7, 0x67, 0x31, 0x0f, 0xa9, 0x21, 0xcd, 0x7d, 0x10,
+ 0x0f, 0xa9, 0x08, 0x0e, 0xc1, 0x02, 0x37, 0xc6, 0xcd, 0x61, 0x01, 0x15,
+ 0xd1, 0xc7, 0x00, 0x40, 0x01, 0x11, 0x4b, 0x01, 0x02, 0x43, 0xc6, 0x10,
+ 0xce, 0x01, 0x01, 0xe9, 0xcb, 0x33, 0x33, 0x01, 0x51, 0xe0, 0x00, 0x41,
+ 0x02, 0x47, 0x46, 0x62, 0x28, 0xc1, 0x02, 0x57, 0x47, 0xc5, 0x98, 0x41,
+ 0x02, 0x63, 0xda, 0x1c, 0x38, 0x01, 0x4e, 0xf0, 0x15, 0xc1, 0x02, 0x6f,
+ 0xcb, 0x99, 0xd9, 0x0f, 0xa4, 0x08, 0xc4, 0x00, 0xc3, 0x01, 0x10, 0x31,
+ 0x43, 0x2c, 0xff, 0x41, 0x02, 0x7b, 0xcc, 0x87, 0x2d, 0x0f, 0xa7, 0x41,
+ 0xce, 0x6e, 0x66, 0x01, 0x4e, 0xe0, 0xcd, 0x76, 0x4f, 0x01, 0x05, 0xc9,
+ 0x48, 0xb7, 0x8a, 0x41, 0x02, 0x87, 0xd7, 0x28, 0x2c, 0x0f, 0xd7, 0xa8,
+ 0xc2, 0x00, 0xf1, 0x01, 0x13, 0x0b, 0x01, 0x02, 0xab, 0xce, 0x33, 0xae,
+ 0x01, 0x53, 0x38, 0x4a, 0xa7, 0x9c, 0xc1, 0x02, 0xb1, 0x49, 0xb4, 0x1c,
+ 0x41, 0x02, 0xbf, 0x54, 0x3b, 0x88, 0xc1, 0x02, 0xcb, 0xd1, 0x2b, 0x57,
+ 0x01, 0x81, 0x60, 0xc4, 0x0a, 0x8b, 0x01, 0x80, 0x09, 0xcb, 0x90, 0xa7,
+ 0x01, 0x80, 0x30, 0xcc, 0x83, 0x25, 0x01, 0x8c, 0x81, 0xcc, 0x88, 0x71,
+ 0x01, 0x8c, 0x89, 0xc8, 0x2b, 0x60, 0x01, 0x8c, 0x91, 0x16, 0xc1, 0x02,
+ 0xe9, 0x08, 0xc1, 0x02, 0xf9, 0x0f, 0xc1, 0x03, 0x05, 0xcb, 0x97, 0x0e,
+ 0x01, 0x8c, 0xc1, 0xcb, 0x93, 0x88, 0x01, 0x8c, 0xd1, 0xcb, 0x8e, 0x1e,
+ 0x01, 0x8c, 0xe9, 0xca, 0xa3, 0x28, 0x01, 0x8c, 0xf0, 0x47, 0x34, 0x2f,
+ 0xc1, 0x03, 0x11, 0xcc, 0x83, 0x19, 0x08, 0x42, 0xb9, 0x47, 0x02, 0x0e,
+ 0x41, 0x03, 0x1e, 0xc6, 0x57, 0xec, 0x01, 0x03, 0x01, 0xd4, 0x3a, 0xfc,
+ 0x01, 0x71, 0x88, 0x42, 0x00, 0x97, 0xc1, 0x03, 0x81, 0xd0, 0x5e, 0xc2,
+ 0x0f, 0xa3, 0x78, 0x05, 0xc1, 0x03, 0x99, 0x0a, 0xc1, 0x03, 0xb7, 0x52,
+ 0x48, 0x59, 0xc1, 0x03, 0xc5, 0x15, 0xc1, 0x03, 0xd1, 0x0e, 0xc1, 0x04,
+ 0x05, 0x06, 0xc1, 0x04, 0x15, 0x16, 0xc1, 0x04, 0x2a, 0xd9, 0x0f, 0x09,
+ 0x01, 0x3a, 0xa9, 0xd6, 0x2c, 0xb2, 0x01, 0x3a, 0xa1, 0x08, 0xc1, 0x04,
+ 0x40, 0xc3, 0xe6, 0x74, 0x01, 0x38, 0x89, 0x14, 0xc1, 0x04, 0x50, 0x42,
+ 0x02, 0xae, 0xc1, 0x04, 0x5c, 0x0f, 0xc1, 0x04, 0x68, 0xc6, 0x1c, 0xb4,
+ 0x01, 0x2f, 0x31, 0x12, 0xc1, 0x04, 0x74, 0x43, 0x00, 0x5f, 0x41, 0x04,
+ 0x80, 0x45, 0x15, 0xa7, 0xc1, 0x04, 0x8c, 0x45, 0x20, 0x6c, 0x41, 0x04,
+ 0xaa, 0x45, 0x20, 0x6c, 0xc1, 0x04, 0xc8, 0x45, 0x15, 0xa7, 0x41, 0x04,
+ 0xe6, 0xd5, 0x35, 0xde, 0x0f, 0xc4, 0x19, 0xca, 0x35, 0xe9, 0x0f, 0xc3,
+ 0x59, 0xd0, 0x5c, 0x32, 0x0f, 0xc3, 0x19, 0xd1, 0x50, 0x46, 0x0f, 0xc3,
+ 0x99, 0xd0, 0x35, 0xe3, 0x0f, 0xc3, 0xd8, 0xd5, 0x35, 0xde, 0x0f, 0xc4,
+ 0x11, 0xd0, 0x35, 0xe3, 0x0f, 0xc3, 0xd1, 0xd0, 0x5c, 0x32, 0x0f, 0xc3,
+ 0x11, 0xca, 0x35, 0xe9, 0x0f, 0xc3, 0x51, 0xd1, 0x50, 0x46, 0x0f, 0xc3,
+ 0x90, 0xd5, 0x35, 0xde, 0x0f, 0xc4, 0x01, 0xd0, 0x5c, 0x32, 0x0f, 0xc3,
+ 0x01, 0xca, 0x35, 0xe9, 0x0f, 0xc3, 0x41, 0xd1, 0x50, 0x46, 0x0f, 0xc3,
+ 0x81, 0xd0, 0x35, 0xe3, 0x0f, 0xc3, 0xc0, 0xd0, 0x5c, 0x32, 0x0f, 0xc3,
+ 0x09, 0xca, 0x35, 0xe9, 0x0f, 0xc3, 0x49, 0xd1, 0x50, 0x46, 0x0f, 0xc3,
+ 0x89, 0xd0, 0x35, 0xe3, 0x0f, 0xc3, 0xc9, 0xd5, 0x35, 0xde, 0x0f, 0xc4,
+ 0x08, 0x00, 0xc1, 0x05, 0x04, 0xc2, 0x00, 0x27, 0x0f, 0xd4, 0xf8, 0x00,
+ 0xc1, 0x05, 0x10, 0xc5, 0xda, 0xf6, 0x0f, 0x9a, 0x48, 0xc9, 0xae, 0x4f,
+ 0x0f, 0x17, 0xf9, 0x46, 0x09, 0x97, 0xc1, 0x05, 0x28, 0x45, 0x2b, 0x5f,
+ 0xc1, 0x05, 0x4c, 0x47, 0x02, 0x0e, 0x41, 0x05, 0x5e, 0xd4, 0x39, 0x08,
+ 0x0f, 0x98, 0xc1, 0xd3, 0x3f, 0x96, 0x0f, 0x98, 0xb0, 0xc2, 0x00, 0x7a,
+ 0x08, 0xc7, 0xf9, 0x47, 0x34, 0x2f, 0xc1, 0x05, 0xe5, 0x46, 0x09, 0x97,
+ 0xc1, 0x05, 0xfd, 0x4d, 0x29, 0xb9, 0xc1, 0x06, 0x21, 0x4f, 0x0b, 0x17,
+ 0x41, 0x06, 0x80, 0x0e, 0xc1, 0x06, 0xdf, 0xc8, 0x7d, 0xa4, 0x07, 0xf2,
+ 0x59, 0xc4, 0x0e, 0x9a, 0x01, 0x81, 0x80, 0xca, 0xa7, 0xf6, 0x0f, 0x9f,
+ 0x99, 0xca, 0xa1, 0x7a, 0x0f, 0x9f, 0xa1, 0xc9, 0x42, 0xd1, 0x0f, 0xa2,
+ 0x58, 0x58, 0x21, 0xb3, 0xc1, 0x06, 0xeb, 0xc4, 0x0e, 0x9a, 0x01, 0x80,
+ 0xe0, 0xc8, 0x31, 0x90, 0x0f, 0xac, 0x29, 0xc6, 0xcb, 0xe1, 0x0f, 0xb7,
+ 0xc1, 0xc4, 0x5c, 0x58, 0x0f, 0xca, 0x78, 0xc5, 0x8d, 0xed, 0x0f, 0xcb,
+ 0xf9, 0xc4, 0x1d, 0xa8, 0x01, 0x1f, 0x29, 0xc5, 0x71, 0x71, 0x0f, 0xd6,
+ 0x98, 0x42, 0x00, 0xaf, 0x41, 0x06, 0xf7, 0x00, 0xc1, 0x07, 0x03, 0xc7,
+ 0x90, 0x53, 0x01, 0x10, 0xe1, 0xcd, 0x79, 0x41, 0x01, 0x00, 0x28, 0xca,
+ 0xa0, 0xbc, 0x0f, 0x9b, 0xa3, 0x01, 0x07, 0x25, 0xc3, 0x00, 0x74, 0x01,
+ 0x56, 0xe1, 0xce, 0x4a, 0x43, 0x01, 0x70, 0x80, 0x44, 0x00, 0x8c, 0xc1,
+ 0x07, 0x2b, 0xc4, 0x3a, 0xb4, 0x0f, 0xc9, 0x31, 0xc7, 0xc2, 0x8f, 0x0f,
+ 0xa4, 0x31, 0xcf, 0x64, 0x95, 0x0f, 0xb0, 0xc1, 0x15, 0xc1, 0x07, 0x35,
+ 0xd2, 0x4c, 0x25, 0x0f, 0xcb, 0xc8, 0x4d, 0x27, 0x30, 0xc1, 0x07, 0x41,
+ 0xc7, 0xc1, 0xbd, 0x0f, 0x9a, 0x10, 0xc8, 0xb6, 0x62, 0x01, 0x05, 0x19,
+ 0xc3, 0x91, 0xe8, 0x0f, 0x9a, 0xf8, 0x46, 0x01, 0xec, 0xc1, 0x07, 0x4d,
+ 0xd1, 0x55, 0x85, 0x0f, 0xa1, 0x28, 0xd8, 0x21, 0xfb, 0x0f, 0xb1, 0x30,
+ 0xcd, 0x78, 0x64, 0x01, 0x0a, 0xf9, 0xc5, 0x03, 0x02, 0x01, 0x02, 0x20,
+ 0xc4, 0xe2, 0x5f, 0x0f, 0xad, 0xf1, 0xc5, 0xd6, 0xcd, 0x0f, 0xad, 0xe9,
+ 0xc7, 0x87, 0xc2, 0x0f, 0xad, 0xe0, 0xca, 0x9b, 0x76, 0x01, 0x3e, 0xb9,
+ 0xc5, 0x06, 0xe2, 0x01, 0x2c, 0x41, 0x45, 0x15, 0xdb, 0xc1, 0x07, 0x53,
+ 0xc4, 0x00, 0xf0, 0x00, 0x01, 0x70, 0x10, 0xc1, 0x07, 0x5f, 0x03, 0xc1,
+ 0x07, 0x6b, 0x06, 0xc1, 0x07, 0x7d, 0x05, 0xc1, 0x07, 0x89, 0x15, 0xc1,
+ 0x07, 0x99, 0x0e, 0xc1, 0x07, 0xa5, 0x07, 0xc1, 0x07, 0xb5, 0x42, 0x00,
+ 0xb4, 0xc1, 0x07, 0xc1, 0x42, 0x00, 0xe3, 0xc1, 0x07, 0xcd, 0x14, 0xc1,
+ 0x07, 0xd9, 0xc5, 0x1e, 0xc8, 0x07, 0xfa, 0xf1, 0x12, 0xc1, 0x07, 0xe5,
+ 0xc6, 0x60, 0xb1, 0x07, 0xff, 0x19, 0xca, 0x9b, 0x58, 0x07, 0xff, 0x21,
+ 0xc8, 0x77, 0x99, 0x07, 0xff, 0x29, 0xc8, 0xbe, 0x72, 0x07, 0xff, 0x31,
+ 0xcc, 0x89, 0xcd, 0x07, 0xf8, 0x69, 0xc9, 0x11, 0xf6, 0x07, 0xf8, 0x71,
+ 0xcd, 0x36, 0x86, 0x07, 0xfa, 0xe0, 0xcc, 0x68, 0xfd, 0x01, 0x31, 0xeb,
+ 0x01, 0x07, 0xf7, 0xce, 0x6f, 0x46, 0x01, 0x03, 0x41, 0xcb, 0x62, 0xc8,
+ 0x0f, 0xca, 0x38, 0x44, 0x3f, 0xf8, 0xc1, 0x07, 0xfb, 0x42, 0x00, 0xe1,
+ 0xc1, 0x08, 0x05, 0xc7, 0xc1, 0x0e, 0x0f, 0xcf, 0x40, 0xc3, 0x17, 0x28,
+ 0x01, 0x2e, 0x49, 0xd1, 0x55, 0x74, 0x0f, 0x9d, 0x19, 0xd7, 0x2a, 0x3d,
+ 0x0f, 0x9b, 0x28, 0xc7, 0xc7, 0x89, 0x0f, 0xae, 0x21, 0xc6, 0x9e, 0xf4,
+ 0x0f, 0xa6, 0x09, 0xc9, 0x1b, 0x0a, 0x00, 0x00, 0xe0, 0xc9, 0xae, 0x58,
+ 0x0f, 0xa7, 0xe9, 0xc6, 0xd0, 0x25, 0x0f, 0x9c, 0xf0, 0xc6, 0xb7, 0xec,
+ 0x0f, 0xd4, 0xb1, 0xc5, 0x62, 0xce, 0x0f, 0x9c, 0xb0, 0x14, 0xc1, 0x08,
+ 0x11, 0x16, 0xc1, 0x08, 0x1d, 0x10, 0xc1, 0x08, 0x3b, 0x06, 0xc1, 0x08,
+ 0x54, 0x15, 0xc1, 0x08, 0x68, 0x04, 0xc1, 0x08, 0x7e, 0x0a, 0xc1, 0x08,
+ 0x88, 0x03, 0xc1, 0x08, 0x92, 0xc2, 0x01, 0x4a, 0x0b, 0x7a, 0x11, 0x1c,
+ 0xc1, 0x08, 0x9c, 0x43, 0x70, 0x51, 0xc1, 0x08, 0xae, 0x09, 0xc1, 0x08,
+ 0xca, 0xc2, 0x8d, 0x8f, 0x0b, 0x79, 0x39, 0x13, 0xc1, 0x08, 0xd2, 0xc2,
+ 0x02, 0x2b, 0x0b, 0x78, 0xf1, 0x0e, 0xc1, 0x08, 0xdc, 0x18, 0xc1, 0x08,
+ 0xea, 0xc2, 0x00, 0x87, 0x0b, 0x78, 0x39, 0x0f, 0xc1, 0x08, 0xf4, 0x12,
+ 0x41, 0x08, 0xfe, 0xc5, 0x05, 0x02, 0x0b, 0x7c, 0x91, 0xc5, 0x00, 0xd4,
+ 0x0b, 0x7c, 0x89, 0xc9, 0x63, 0x69, 0x0b, 0x7c, 0x81, 0xc5, 0x00, 0x2c,
+ 0x0b, 0x7c, 0x78, 0x97, 0x0b, 0x7b, 0x53, 0x01, 0x09, 0x08, 0x8b, 0x0b,
+ 0x7b, 0x0b, 0x01, 0x09, 0x29, 0x87, 0x0b, 0x7a, 0xeb, 0x01, 0x09, 0x4d,
+ 0xc2, 0x00, 0x18, 0x0b, 0x7c, 0x19, 0x91, 0x0b, 0x7a, 0xcb, 0x01, 0x09,
+ 0x63, 0x9b, 0x0b, 0x7b, 0x8b, 0x01, 0x09, 0x73, 0x90, 0x0b, 0x7b, 0xeb,
+ 0x01, 0x09, 0x7d, 0x83, 0x0b, 0x7a, 0xa3, 0x01, 0x09, 0x81, 0xca, 0x9d,
+ 0x7e, 0x0b, 0x7b, 0xc3, 0x01, 0x09, 0xa1, 0x99, 0x0b, 0x7a, 0xe2, 0x01,
+ 0x09, 0xa5, 0x49, 0xaa, 0xc2, 0xc1, 0x09, 0xa9, 0xca, 0xa1, 0x84, 0x0b,
+ 0x7a, 0x89, 0xd6, 0x2b, 0xec, 0x0b, 0x7a, 0x78, 0xcb, 0x95, 0xc4, 0x01,
+ 0x22, 0x49, 0xcc, 0x8a, 0xbd, 0x01, 0x22, 0x40, 0xc5, 0xbc, 0xed, 0x0f,
+ 0xa9, 0x61, 0xc5, 0x36, 0xb7, 0x0f, 0x9d, 0x21, 0xc5, 0x00, 0xb9, 0x00,
+ 0x05, 0xa9, 0xc2, 0x00, 0x51, 0x0f, 0xcd, 0x00, 0xc3, 0x02, 0xa3, 0x00,
+ 0x05, 0xb9, 0xe0, 0x06, 0x67, 0x0f, 0xde, 0x10, 0x00, 0xc1, 0x09, 0xb5,
+ 0xcd, 0x79, 0x4e, 0x01, 0x10, 0x98, 0xc4, 0xd1, 0x89, 0x0f, 0xae, 0xa9,
+ 0xc4, 0x5c, 0x58, 0x0f, 0xa5, 0xe9, 0xc3, 0x22, 0xd3, 0x0f, 0xb4, 0x80,
+ 0x43, 0x01, 0xdf, 0xc1, 0x09, 0xc4, 0x45, 0xdc, 0xdb, 0x41, 0x0a, 0x00,
+ 0xce, 0x72, 0x10, 0x0b, 0x74, 0xd1, 0x15, 0xc1, 0x0a, 0x12, 0xc9, 0x11,
+ 0xf6, 0x0b, 0x74, 0xc1, 0x05, 0xc1, 0x0a, 0x1e, 0x46, 0x09, 0x97, 0xc1,
+ 0x0a, 0x2a, 0x47, 0x34, 0x2f, 0x41, 0x0a, 0x51, 0xc9, 0xaf, 0x93, 0x01,
+ 0x1e, 0xc9, 0x16, 0xc1, 0x0a, 0x67, 0x4a, 0xa4, 0x0e, 0xc1, 0x0a, 0x79,
+ 0xcf, 0x67, 0xa1, 0x01, 0x1e, 0x99, 0xc5, 0x1d, 0x88, 0x01, 0x1e, 0x88,
+ 0x4a, 0x9d, 0xf6, 0xc1, 0x0a, 0x85, 0x46, 0x09, 0x97, 0xc1, 0x0a, 0x8d,
+ 0x51, 0x51, 0x89, 0x41, 0x0a, 0xab, 0x48, 0xbc, 0x6a, 0xc1, 0x0a, 0xbb,
+ 0x4d, 0x75, 0xcd, 0x41, 0x0a, 0xcb, 0xc2, 0x07, 0xb8, 0x01, 0x12, 0xf1,
+ 0xc5, 0x01, 0x95, 0x01, 0x11, 0x0b, 0x01, 0x0a, 0xda, 0xd4, 0x3f, 0x34,
+ 0x01, 0x4c, 0xe8, 0xc4, 0x15, 0xe7, 0x05, 0x5f, 0x81, 0xc4, 0x26, 0x78,
+ 0x05, 0x5f, 0xc9, 0xc3, 0x05, 0x14, 0x05, 0x5f, 0x89, 0x16, 0xc1, 0x0a,
+ 0xde, 0x08, 0xc1, 0x0a, 0xea, 0x15, 0xc1, 0x0a, 0xf6, 0xc5, 0x06, 0xdb,
+ 0x05, 0x5f, 0xc0, 0xc8, 0xbf, 0xd2, 0x05, 0x5f, 0x69, 0xc3, 0x7c, 0x50,
+ 0x05, 0x57, 0x91, 0xcb, 0x8e, 0x6b, 0x05, 0x57, 0x88, 0x4a, 0x6f, 0xc8,
+ 0xc1, 0x0b, 0x02, 0xc5, 0x1e, 0x96, 0x05, 0x57, 0xb0, 0x46, 0x02, 0x0f,
+ 0xc1, 0x0b, 0x32, 0xc7, 0xc2, 0xc7, 0x05, 0x5f, 0x60, 0xc2, 0x00, 0xd1,
+ 0x05, 0x57, 0x81, 0xc2, 0x06, 0xdb, 0x05, 0x5f, 0x58, 0x00, 0xc1, 0x0b,
+ 0xa1, 0xc3, 0x1a, 0xd2, 0x0f, 0xb7, 0x19, 0xcf, 0x68, 0xaf, 0x0f, 0xcd,
+ 0xe0, 0xc3, 0x03, 0x0c, 0x01, 0x37, 0x83, 0x01, 0x0b, 0xad, 0xc5, 0xd7,
+ 0x86, 0x0f, 0xaf, 0xd8, 0x00, 0x41, 0x0b, 0xb1, 0x49, 0x89, 0xf4, 0xc1,
+ 0x0b, 0xbd, 0xcd, 0x78, 0x98, 0x01, 0x1c, 0x69, 0xc4, 0x47, 0x02, 0x0f,
+ 0xb4, 0xe8, 0x16, 0xc1, 0x0b, 0xc7, 0x15, 0xc1, 0x0b, 0xd9, 0xce, 0x6c,
+ 0x1a, 0x08, 0xb3, 0x3b, 0x01, 0x0b, 0xe8, 0xcd, 0x76, 0x69, 0x08, 0xb3,
+ 0x0b, 0x01, 0x0b, 0xee, 0xc5, 0x01, 0x2d, 0x00, 0xc0, 0x03, 0x01, 0x0b,
+ 0xf4, 0x06, 0xc1, 0x0b, 0xfa, 0x47, 0x02, 0x0e, 0xc1, 0x0c, 0x06, 0x08,
+ 0xc1, 0x0c, 0x91, 0xcf, 0x69, 0xbd, 0x00, 0xc0, 0x71, 0xc6, 0xcd, 0xc7,
+ 0x00, 0xc0, 0x51, 0x47, 0xc2, 0x57, 0xc1, 0x0c, 0xa3, 0x42, 0x00, 0x99,
+ 0xc1, 0x0c, 0xaf, 0xc8, 0x22, 0x83, 0x00, 0xc0, 0x08, 0x00, 0xc1, 0x0c,
+ 0xbb, 0xcb, 0x5c, 0x17, 0x0f, 0xc8, 0x88, 0xc5, 0x11, 0x55, 0x0f, 0xa1,
+ 0xa8, 0x00, 0xc1, 0x0c, 0xc7, 0x45, 0x02, 0x09, 0x41, 0x0c, 0xe3, 0xc2,
+ 0x00, 0x96, 0x01, 0x15, 0x39, 0xcd, 0x7c, 0xcf, 0x0f, 0xc9, 0xd8, 0xd0,
+ 0x57, 0xf2, 0x0f, 0x9c, 0x89, 0xc4, 0x2a, 0x3e, 0x0f, 0xcb, 0x70, 0xc3,
+ 0x79, 0x83, 0x0f, 0xa7, 0xa1, 0xdd, 0x10, 0x4c, 0x0f, 0xa7, 0x90, 0x47,
+ 0xc7, 0x4a, 0xc1, 0x0c, 0xef, 0x45, 0x58, 0xc2, 0xc1, 0x0d, 0x1d, 0x4a,
+ 0xa3, 0x0a, 0xc1, 0x0d, 0x5b, 0x15, 0xc1, 0x0d, 0x6d, 0x4e, 0x73, 0x52,
+ 0xc1, 0x0d, 0x79, 0x08, 0xc1, 0x0d, 0x8b, 0x42, 0x00, 0x2c, 0xc1, 0x0d,
+ 0x97, 0x45, 0x00, 0x49, 0x41, 0x0d, 0xa3, 0xc4, 0x14, 0x09, 0x0e, 0x97,
+ 0x98, 0xc4, 0x00, 0x2d, 0x0e, 0x97, 0x43, 0x01, 0x0d, 0xbb, 0xc5, 0x66,
+ 0xb1, 0x0e, 0x97, 0x58, 0xc4, 0x18, 0x10, 0x0e, 0x97, 0x3b, 0x01, 0x0d,
+ 0xc1, 0xc2, 0x22, 0xcc, 0x0e, 0x97, 0x32, 0x01, 0x0d, 0xc7, 0x0b, 0xc1,
+ 0x0d, 0xcd, 0xc3, 0x09, 0x9e, 0x0e, 0x97, 0x22, 0x01, 0x0d, 0xd9, 0x0a,
+ 0xc1, 0x0d, 0xdf, 0x19, 0xc1, 0x0d, 0xeb, 0xc2, 0x00, 0xc4, 0x0e, 0x97,
+ 0x50, 0x91, 0x08, 0xf7, 0xb1, 0x87, 0x08, 0xf7, 0xa9, 0x97, 0x08, 0xf7,
+ 0xa1, 0x8b, 0x08, 0xf7, 0x98, 0x83, 0x08, 0xf7, 0x89, 0xc2, 0x0d, 0xf6,
+ 0x08, 0xf7, 0x81, 0xc2, 0x02, 0x41, 0x08, 0xf7, 0x79, 0xc2, 0x00, 0xdb,
+ 0x08, 0xf7, 0x71, 0xc2, 0x00, 0x39, 0x08, 0xf7, 0x69, 0xc2, 0x19, 0x2c,
+ 0x08, 0xf7, 0x61, 0x10, 0xc1, 0x0d, 0xf5, 0xc2, 0x25, 0x3b, 0x08, 0xf7,
+ 0x51, 0xc2, 0x00, 0x64, 0x08, 0xf7, 0x49, 0xc2, 0x0e, 0x9a, 0x08, 0xf7,
+ 0x39, 0xc2, 0x01, 0x6f, 0x08, 0xf7, 0x31, 0xc2, 0x01, 0xc3, 0x08, 0xf7,
+ 0x29, 0xc2, 0x01, 0x5d, 0x08, 0xf7, 0x21, 0xc2, 0x00, 0xb0, 0x08, 0xf7,
+ 0x19, 0xc2, 0x01, 0x30, 0x08, 0xf7, 0x09, 0xc2, 0x02, 0x2b, 0x08, 0xf7,
+ 0x00, 0x46, 0x09, 0x97, 0xc1, 0x0e, 0x05, 0x14, 0xc1, 0x0e, 0x29, 0x18,
+ 0xc1, 0x0e, 0x35, 0x45, 0x00, 0xba, 0xc1, 0x0e, 0x41, 0x47, 0x02, 0x0e,
+ 0x41, 0x0e, 0x5f, 0x15, 0xc1, 0x0e, 0xc6, 0x4b, 0x6f, 0xc7, 0xc1, 0x0e,
+ 0xd2, 0x47, 0x02, 0x0e, 0xc1, 0x0e, 0xe8, 0xc9, 0xaa, 0xa7, 0x08, 0xe3,
+ 0x89, 0xc9, 0x15, 0xcc, 0x08, 0xe3, 0x80, 0x4c, 0x37, 0x33, 0xc1, 0x0f,
+ 0x48, 0xcf, 0x20, 0xfc, 0x01, 0x35, 0x29, 0xc4, 0x00, 0xba, 0x01, 0x32,
+ 0x10, 0x45, 0x00, 0xba, 0xc1, 0x0f, 0x54, 0x47, 0x02, 0x0e, 0xc1, 0x0f,
+ 0x66, 0x4b, 0x6f, 0xc7, 0xc1, 0x0f, 0xcf, 0xce, 0x73, 0x0c, 0x00, 0x6a,
+ 0xb9, 0x49, 0x53, 0xa9, 0xc1, 0x0f, 0xf5, 0x06, 0xc1, 0x10, 0x01, 0x47,
+ 0x34, 0x2f, 0x41, 0x10, 0x0d, 0x4c, 0x11, 0xe2, 0xc1, 0x10, 0x19, 0x47,
+ 0x34, 0x2f, 0xc1, 0x10, 0x37, 0x52, 0x48, 0x11, 0xc1, 0x10, 0x4a, 0x47,
+ 0x02, 0x0e, 0xc1, 0x10, 0x56, 0xc7, 0xc3, 0xae, 0x08, 0x56, 0x40, 0xc7,
+ 0xc3, 0xe6, 0x0f, 0xab, 0xd1, 0x43, 0x03, 0x35, 0xc1, 0x10, 0xbb, 0x45,
+ 0x00, 0x8c, 0xc1, 0x10, 0xc7, 0xd7, 0x29, 0xf8, 0x0f, 0xa3, 0x58, 0xcb,
+ 0x05, 0x1c, 0x00, 0x42, 0xf1, 0xcf, 0x63, 0xff, 0x00, 0x42, 0xd9, 0xd1,
+ 0x4e, 0xbf, 0x00, 0x42, 0xd1, 0xd0, 0x58, 0x32, 0x00, 0x42, 0xc9, 0x47,
+ 0x02, 0x0e, 0x41, 0x10, 0xd3, 0x0e, 0xc1, 0x10, 0xf3, 0x15, 0xc1, 0x10,
+ 0xff, 0xd1, 0x50, 0xce, 0x08, 0x8b, 0xa0, 0xc5, 0x8d, 0x1c, 0x0f, 0x81,
+ 0x51, 0x19, 0xc1, 0x11, 0x0b, 0x07, 0xc1, 0x11, 0x1d, 0x15, 0xc1, 0x11,
+ 0x29, 0x10, 0xc1, 0x11, 0x47, 0xca, 0xa0, 0x9e, 0x0f, 0x80, 0x21, 0xcc,
+ 0x87, 0xe1, 0x0f, 0x80, 0x29, 0x11, 0xc1, 0x11, 0x53, 0x16, 0xc1, 0x11,
+ 0x5f, 0x08, 0xc1, 0x11, 0x6b, 0xc4, 0xe3, 0xc7, 0x0f, 0x81, 0x11, 0xcd,
+ 0x78, 0x8b, 0x0f, 0x81, 0x29, 0x42, 0x01, 0x5d, 0xc1, 0x11, 0x77, 0xc6,
+ 0xce, 0x39, 0x0f, 0x81, 0x40, 0x43, 0x00, 0xe5, 0xc1, 0x11, 0x83, 0x00,
+ 0x41, 0x11, 0x96, 0x42, 0x0b, 0x26, 0xc1, 0x11, 0xa8, 0xc3, 0x64, 0xae,
+ 0x01, 0x15, 0xc1, 0xc3, 0x0e, 0xa7, 0x01, 0x14, 0x62, 0x01, 0x11, 0xb4,
+ 0xcc, 0x45, 0x8d, 0x08, 0x95, 0x49, 0x47, 0x02, 0x0e, 0x41, 0x11, 0xb8,
+ 0xc4, 0x26, 0x78, 0x0b, 0x53, 0x49, 0xc5, 0x06, 0xdb, 0x0b, 0x53, 0x41,
+ 0x15, 0xc1, 0x12, 0x14, 0x08, 0xc1, 0x12, 0x20, 0x16, 0xc1, 0x12, 0x2c,
+ 0xc3, 0x05, 0x14, 0x0b, 0x53, 0x09, 0xc4, 0x15, 0xe7, 0x0b, 0x53, 0x00,
+ 0xc2, 0x13, 0x4c, 0x0b, 0x52, 0xf1, 0xc3, 0x01, 0x9b, 0x0b, 0x52, 0xa9,
+ 0x83, 0x0b, 0x52, 0x00, 0x8b, 0x0b, 0x52, 0xe9, 0x91, 0x0b, 0x52, 0x98,
+ 0x8b, 0x0b, 0x52, 0xe1, 0x91, 0x0b, 0x52, 0x48, 0x90, 0x0b, 0x52, 0xd0,
+ 0x91, 0x0b, 0x52, 0xc9, 0xc4, 0xe2, 0x77, 0x0b, 0x52, 0x61, 0xc3, 0x4d,
+ 0xe7, 0x0b, 0x52, 0x40, 0x83, 0x0b, 0x52, 0xb0, 0x91, 0x0b, 0x52, 0x89,
+ 0x8e, 0x0b, 0x52, 0x68, 0x83, 0x0b, 0x52, 0x81, 0xc2, 0x00, 0x0a, 0x0b,
+ 0x52, 0x38, 0xc2, 0x00, 0x74, 0x0b, 0x52, 0x79, 0xc2, 0x04, 0x2b, 0x0b,
+ 0x52, 0x08, 0xc3, 0x7c, 0x57, 0x0b, 0x52, 0x71, 0xc2, 0x03, 0x4e, 0x0b,
+ 0x52, 0x18, 0x8b, 0x0b, 0x52, 0x50, 0x4f, 0x68, 0x91, 0xc1, 0x12, 0x38,
+ 0xce, 0x6c, 0xc2, 0x05, 0x53, 0xd9, 0x15, 0xc1, 0x12, 0x40, 0x03, 0xc1,
+ 0x12, 0x4c, 0xc9, 0x0e, 0x6e, 0x00, 0x81, 0xb9, 0x42, 0x07, 0xb2, 0xc1,
+ 0x12, 0x58, 0xce, 0x70, 0xb2, 0x00, 0x82, 0x51, 0x57, 0x28, 0x9f, 0xc1,
+ 0x12, 0x64, 0xd4, 0x38, 0x7c, 0x00, 0x84, 0x79, 0x4c, 0x8c, 0x31, 0x41,
+ 0x12, 0x78, 0x03, 0xc1, 0x12, 0x80, 0xc8, 0xbb, 0xd2, 0x00, 0x82, 0x61,
+ 0xc9, 0xb4, 0xe2, 0x00, 0x82, 0x69, 0xc8, 0xbf, 0x5a, 0x00, 0x82, 0x79,
+ 0x45, 0x4d, 0x21, 0x41, 0x12, 0x8c, 0xc4, 0x15, 0xe7, 0x00, 0x84, 0x81,
+ 0xc3, 0x05, 0x14, 0x00, 0x84, 0x89, 0x16, 0xc1, 0x12, 0x98, 0x08, 0xc1,
+ 0x12, 0xa4, 0x15, 0xc1, 0x12, 0xb0, 0xc5, 0x06, 0xdb, 0x00, 0x84, 0xc1,
+ 0xc4, 0x26, 0x78, 0x00, 0x84, 0xc8, 0x83, 0x00, 0x81, 0x0b, 0x01, 0x12,
+ 0xbc, 0x0d, 0xc1, 0x12, 0xc6, 0x16, 0xc1, 0x12, 0xd3, 0x15, 0xc1, 0x12,
+ 0xe4, 0x09, 0xc1, 0x12, 0xf8, 0x10, 0xc1, 0x13, 0x08, 0x05, 0xc1, 0x13,
+ 0x1c, 0x0c, 0xc1, 0x13, 0x26, 0x06, 0xc1, 0x13, 0x30, 0x12, 0xc1, 0x13,
+ 0x3e, 0x04, 0xc1, 0x13, 0x48, 0x0f, 0xc1, 0x13, 0x52, 0xc2, 0x19, 0x2c,
+ 0x00, 0x80, 0xd1, 0x14, 0xc1, 0x13, 0x5c, 0x0e, 0xc1, 0x13, 0x66, 0x19,
+ 0xc1, 0x13, 0x70, 0xc2, 0x00, 0xd0, 0x00, 0x80, 0xf9, 0x8b, 0x00, 0x81,
+ 0x1b, 0x01, 0x13, 0x7a, 0x97, 0x00, 0x81, 0x2b, 0x01, 0x13, 0x7e, 0x87,
+ 0x00, 0x81, 0x3b, 0x01, 0x13, 0x82, 0x91, 0x00, 0x81, 0x49, 0x48, 0xb2,
+ 0x2d, 0x41, 0x13, 0x88, 0xc2, 0x02, 0x2e, 0x05, 0x53, 0xb1, 0xc2, 0xc8,
+ 0xd4, 0x05, 0x53, 0xa9, 0xc3, 0xe6, 0x17, 0x05, 0x53, 0xa0, 0xc4, 0x26,
+ 0x78, 0x05, 0x4f, 0xc9, 0xc5, 0x06, 0xdb, 0x05, 0x4f, 0xc1, 0x15, 0xc1,
+ 0x13, 0x96, 0x08, 0xc1, 0x13, 0xa2, 0x16, 0xc1, 0x13, 0xae, 0xc3, 0x05,
+ 0x14, 0x05, 0x4f, 0x89, 0xc4, 0x15, 0xe7, 0x05, 0x4f, 0x80, 0xc5, 0xd6,
+ 0x73, 0x00, 0x83, 0x19, 0xc6, 0xce, 0x57, 0x00, 0x83, 0x20, 0x83, 0x00,
+ 0x81, 0x61, 0x8b, 0x00, 0x81, 0x92, 0x01, 0x13, 0xba, 0x8b, 0x00, 0x81,
+ 0x70, 0x97, 0x00, 0x81, 0x80, 0xc6, 0x00, 0xd3, 0x00, 0x81, 0xa8, 0xc2,
+ 0x25, 0x9f, 0x00, 0x81, 0x99, 0x91, 0x00, 0x81, 0xa0, 0x94, 0x00, 0x82,
+ 0xb3, 0x01, 0x13, 0xc3, 0x8e, 0x00, 0x82, 0xc2, 0x01, 0x13, 0xc7, 0xcc,
+ 0x85, 0xad, 0x00, 0x83, 0x11, 0x44, 0x00, 0xd0, 0x41, 0x13, 0xcb, 0xc2,
+ 0x2c, 0x43, 0x00, 0x83, 0x39, 0xc2, 0x0f, 0xe1, 0x00, 0x83, 0x40, 0xc2,
+ 0x49, 0x0c, 0x00, 0x83, 0x91, 0x97, 0x00, 0x83, 0x99, 0xc2, 0x02, 0xe0,
+ 0x00, 0x83, 0xa0, 0x46, 0x30, 0xa0, 0xc1, 0x13, 0xde, 0x4a, 0xa6, 0x0c,
+ 0x41, 0x13, 0xf6, 0xc2, 0x02, 0xa0, 0x00, 0x82, 0x11, 0xc4, 0x02, 0xde,
+ 0x00, 0x82, 0x18, 0xc3, 0x09, 0x9e, 0x00, 0x82, 0x21, 0xc3, 0x0d, 0x14,
+ 0x00, 0x82, 0x28, 0xc2, 0x22, 0xcc, 0x00, 0x82, 0x31, 0xc4, 0x18, 0x10,
+ 0x00, 0x82, 0x38, 0xca, 0x9f, 0xfe, 0x0f, 0xad, 0x30, 0x47, 0x02, 0x0e,
+ 0xc1, 0x14, 0x08, 0xca, 0x3b, 0x06, 0x01, 0x87, 0xd9, 0xce, 0x1c, 0x92,
+ 0x01, 0x87, 0xe9, 0xd5, 0x34, 0xb8, 0x01, 0x87, 0xf1, 0xcc, 0x80, 0xfd,
+ 0x01, 0x87, 0xf8, 0xd1, 0x2f, 0xfb, 0x01, 0x84, 0xd9, 0xd6, 0x2f, 0xf6,
+ 0x01, 0x84, 0xe1, 0xcd, 0x77, 0x87, 0x01, 0x85, 0x01, 0xd4, 0x0d, 0xe2,
+ 0x01, 0x87, 0xe0, 0xc6, 0x00, 0xd3, 0x08, 0x86, 0x68, 0xc9, 0xb2, 0x2d,
+ 0x08, 0x86, 0x11, 0x03, 0xc1, 0x14, 0x5e, 0x91, 0x08, 0x85, 0xb9, 0x87,
+ 0x08, 0x85, 0xa9, 0x97, 0x08, 0x85, 0x9b, 0x01, 0x14, 0x6a, 0x8b, 0x08,
+ 0x85, 0x8a, 0x01, 0x14, 0x6e, 0x46, 0x00, 0x59, 0xc1, 0x14, 0x72, 0xc4,
+ 0x19, 0x53, 0x08, 0x86, 0x00, 0xcb, 0x45, 0x8e, 0x08, 0x85, 0xf1, 0x44,
+ 0x00, 0xbb, 0x41, 0x14, 0x7e, 0xc2, 0x00, 0xd0, 0x08, 0x85, 0x79, 0x15,
+ 0xc1, 0x14, 0x96, 0xc2, 0x02, 0x41, 0x08, 0x85, 0x59, 0xc2, 0x00, 0xdb,
+ 0x08, 0x85, 0x51, 0x14, 0xc1, 0x14, 0xa6, 0xc2, 0x19, 0x2c, 0x08, 0x85,
+ 0x41, 0xc2, 0x01, 0xc3, 0x08, 0x85, 0x39, 0x04, 0xc1, 0x14, 0xb0, 0x12,
+ 0xc1, 0x14, 0xba, 0x10, 0xc1, 0x14, 0xc4, 0x06, 0xc1, 0x14, 0xda, 0x16,
+ 0xc1, 0x14, 0xe8, 0x0c, 0xc1, 0x14, 0xf6, 0x05, 0xc1, 0x15, 0x00, 0x09,
+ 0xc1, 0x15, 0x0a, 0x0d, 0xc1, 0x15, 0x14, 0x83, 0x08, 0x84, 0x1b, 0x01,
+ 0x15, 0x1e, 0x91, 0x08, 0x84, 0x59, 0x87, 0x08, 0x84, 0x49, 0x97, 0x08,
+ 0x84, 0x3b, 0x01, 0x15, 0x2a, 0x8b, 0x08, 0x84, 0x2a, 0x01, 0x15, 0x2e,
+ 0xc4, 0xde, 0x93, 0x05, 0x49, 0x79, 0xc3, 0xe4, 0xfd, 0x05, 0x49, 0x70,
+ 0xc5, 0xde, 0x02, 0x05, 0x49, 0x63, 0x01, 0x15, 0x32, 0xc6, 0xca, 0x77,
+ 0x05, 0x49, 0x58, 0x91, 0x05, 0x49, 0x51, 0x87, 0x05, 0x49, 0x3b, 0x01,
+ 0x15, 0x38, 0x97, 0x05, 0x49, 0x42, 0x01, 0x15, 0x3c, 0x11, 0xc1, 0x15,
+ 0x40, 0x8b, 0x05, 0x49, 0x21, 0x83, 0x05, 0x49, 0x11, 0xc2, 0x00, 0x64,
+ 0x05, 0x49, 0x09, 0xc2, 0x02, 0x41, 0x05, 0x49, 0x01, 0x0a, 0xc1, 0x15,
+ 0x48, 0x16, 0xc1, 0x15, 0x52, 0xc2, 0x01, 0x4a, 0x05, 0x48, 0xe9, 0xc2,
+ 0x00, 0xdb, 0x05, 0x48, 0xe1, 0xc2, 0x19, 0x2c, 0x05, 0x48, 0xd9, 0xc2,
+ 0x00, 0x39, 0x05, 0x48, 0xd1, 0xc2, 0x01, 0x5d, 0x05, 0x48, 0xc9, 0xc2,
+ 0x0e, 0x9a, 0x05, 0x48, 0xc1, 0xc2, 0x01, 0xc3, 0x05, 0x48, 0xb9, 0x12,
+ 0xc1, 0x15, 0x5c, 0x10, 0xc1, 0x15, 0x66, 0xc2, 0x02, 0x1c, 0x05, 0x48,
+ 0x81, 0x15, 0xc1, 0x15, 0x76, 0xc2, 0x01, 0x30, 0x05, 0x48, 0x61, 0x0d,
+ 0x41, 0x15, 0x80, 0xc4, 0x26, 0x78, 0x05, 0x48, 0x49, 0xc5, 0x06, 0xdb,
+ 0x05, 0x48, 0x41, 0x15, 0xc1, 0x15, 0x8a, 0x08, 0xc1, 0x15, 0x96, 0x16,
+ 0xc1, 0x15, 0xa2, 0xc3, 0x05, 0x14, 0x05, 0x48, 0x09, 0xc4, 0x15, 0xe7,
+ 0x05, 0x48, 0x00, 0x45, 0x00, 0xba, 0xc1, 0x15, 0xae, 0x42, 0x00, 0x49,
+ 0xc1, 0x15, 0xd4, 0x4b, 0x6f, 0xc7, 0xc1, 0x15, 0xe0, 0xce, 0x74, 0xcc,
+ 0x00, 0x66, 0xb1, 0x46, 0x09, 0x97, 0x41, 0x16, 0x06, 0xc4, 0xe1, 0x83,
+ 0x0f, 0xcc, 0xc1, 0x4b, 0x91, 0xfc, 0x41, 0x16, 0x2a, 0x05, 0xc1, 0x16,
+ 0x8e, 0x04, 0x41, 0x16, 0xc6, 0xc4, 0x26, 0x78, 0x08, 0x97, 0xc9, 0x15,
+ 0xc1, 0x17, 0x06, 0x08, 0xc1, 0x17, 0x12, 0x16, 0xc1, 0x17, 0x1e, 0xc3,
+ 0x05, 0x14, 0x08, 0x97, 0x89, 0xc4, 0x15, 0xe7, 0x08, 0x97, 0x81, 0xc5,
+ 0x06, 0xdb, 0x08, 0x97, 0xc0, 0xc6, 0x1e, 0x95, 0x08, 0x97, 0x51, 0xc5,
+ 0x33, 0x5d, 0x08, 0x97, 0x49, 0xc8, 0x14, 0x38, 0x08, 0x96, 0xf8, 0x91,
+ 0x08, 0x97, 0x39, 0x03, 0xc1, 0x17, 0x2a, 0x87, 0x08, 0x97, 0x29, 0x97,
+ 0x08, 0x97, 0x1b, 0x01, 0x17, 0x36, 0x8b, 0x08, 0x97, 0x0a, 0x01, 0x17,
+ 0x3a, 0xc2, 0x00, 0xd0, 0x08, 0x96, 0xf1, 0x15, 0xc1, 0x17, 0x3e, 0xc2,
+ 0x02, 0x41, 0x08, 0x96, 0xd9, 0xc2, 0x00, 0xdb, 0x08, 0x96, 0xd1, 0x14,
+ 0xc1, 0x17, 0x48, 0xc2, 0x19, 0x2c, 0x08, 0x96, 0xc1, 0xc2, 0x01, 0xc3,
+ 0x08, 0x96, 0xb9, 0x04, 0xc1, 0x17, 0x52, 0x12, 0xc1, 0x17, 0x62, 0x10,
+ 0xc1, 0x17, 0x6c, 0x06, 0xc1, 0x17, 0x82, 0x16, 0xc1, 0x17, 0x90, 0x0c,
+ 0xc1, 0x17, 0x9e, 0x05, 0xc1, 0x17, 0xae, 0x09, 0xc1, 0x17, 0xb8, 0x0d,
+ 0xc1, 0x17, 0xc8, 0x83, 0x08, 0x95, 0x83, 0x01, 0x17, 0xd2, 0x91, 0x08,
+ 0x95, 0xc1, 0x87, 0x08, 0x95, 0xb1, 0x97, 0x08, 0x95, 0xa3, 0x01, 0x17,
+ 0xde, 0x8b, 0x08, 0x95, 0x92, 0x01, 0x17, 0xe2, 0x44, 0x00, 0xbb, 0xc1,
+ 0x17, 0xe6, 0xcb, 0x45, 0x8e, 0x08, 0x91, 0xd8, 0x46, 0x00, 0x59, 0xc1,
+ 0x17, 0xfc, 0xc4, 0x19, 0x53, 0x08, 0x91, 0xc0, 0x03, 0xc1, 0x18, 0x08,
+ 0x91, 0x08, 0x91, 0x91, 0x87, 0x08, 0x91, 0x81, 0x97, 0x08, 0x91, 0x79,
+ 0x8b, 0x08, 0x91, 0x6a, 0x01, 0x18, 0x14, 0x0e, 0xc1, 0x18, 0x18, 0xc2,
+ 0x00, 0xd0, 0x08, 0x91, 0x51, 0xc2, 0x0d, 0xf6, 0x08, 0x91, 0x49, 0xc2,
+ 0x02, 0x41, 0x08, 0x91, 0x41, 0xc2, 0x00, 0x39, 0x08, 0x91, 0x31, 0xc2,
+ 0x19, 0x2c, 0x08, 0x91, 0x29, 0xc2, 0x01, 0xc3, 0x08, 0x91, 0x21, 0x04,
+ 0xc1, 0x18, 0x22, 0x12, 0xc1, 0x18, 0x32, 0x10, 0xc1, 0x18, 0x3c, 0x06,
+ 0xc1, 0x18, 0x52, 0x16, 0xc1, 0x18, 0x60, 0x0c, 0xc1, 0x18, 0x6e, 0x05,
+ 0xc1, 0x18, 0x78, 0x09, 0xc1, 0x18, 0x82, 0x0d, 0xc1, 0x18, 0x92, 0x83,
+ 0x08, 0x90, 0x03, 0x01, 0x18, 0x9c, 0x91, 0x08, 0x90, 0x31, 0x87, 0x08,
+ 0x90, 0x21, 0x97, 0x08, 0x90, 0x19, 0x8b, 0x08, 0x90, 0x10, 0x46, 0x10,
+ 0x79, 0xc1, 0x18, 0xa8, 0x44, 0x00, 0xbb, 0x41, 0x18, 0xc8, 0xc4, 0x26,
+ 0x78, 0x00, 0xbf, 0x49, 0xc5, 0x06, 0xdb, 0x00, 0xbf, 0x41, 0x15, 0xc1,
+ 0x19, 0x0a, 0x08, 0xc1, 0x19, 0x16, 0x16, 0xc1, 0x19, 0x22, 0xc3, 0x05,
+ 0x14, 0x00, 0xbf, 0x09, 0xc4, 0x15, 0xe7, 0x00, 0xbf, 0x00, 0x45, 0x00,
+ 0xba, 0xc1, 0x19, 0x2e, 0x4a, 0x9f, 0xf4, 0x41, 0x19, 0x4f, 0x13, 0xc1,
+ 0x19, 0x57, 0xc2, 0x00, 0x35, 0x00, 0xbd, 0x6b, 0x01, 0x19, 0x73, 0xc2,
+ 0x14, 0x98, 0x00, 0xbd, 0x5a, 0x01, 0x19, 0x77, 0xc2, 0x0f, 0x9a, 0x00,
+ 0xbd, 0x11, 0x0e, 0xc1, 0x19, 0x7b, 0xc2, 0x00, 0xd0, 0x00, 0xbd, 0x01,
+ 0x15, 0xc1, 0x19, 0x83, 0xc2, 0x17, 0xbd, 0x00, 0xbc, 0xe1, 0xc2, 0x00,
+ 0x79, 0x00, 0xbc, 0xd1, 0xc2, 0x42, 0xcd, 0x00, 0xbc, 0xc9, 0xc2, 0x00,
+ 0xa2, 0x00, 0xbc, 0xc1, 0x12, 0xc1, 0x19, 0x93, 0xc2, 0x01, 0x5d, 0x00,
+ 0xbc, 0xa1, 0x10, 0xc1, 0x19, 0x9b, 0x16, 0xc1, 0x19, 0xb1, 0x06, 0xc1,
+ 0x19, 0xc3, 0x05, 0xc1, 0x19, 0xcb, 0x0d, 0x41, 0x19, 0xd7, 0x0e, 0xc1,
+ 0x19, 0xe3, 0x06, 0xc1, 0x19, 0xef, 0xc8, 0xb9, 0xf2, 0x08, 0x52, 0xa1,
+ 0x05, 0xc1, 0x19, 0xf9, 0xcc, 0x12, 0x2d, 0x08, 0x52, 0x88, 0x44, 0x05,
+ 0x14, 0xc1, 0x1a, 0x05, 0x16, 0x41, 0x1a, 0x11, 0xc4, 0x09, 0x9d, 0x08,
+ 0x52, 0x19, 0x16, 0xc1, 0x1a, 0x1d, 0xc3, 0x05, 0x14, 0x08, 0x52, 0x00,
+ 0xc5, 0x1e, 0x96, 0x08, 0x51, 0xf9, 0x45, 0x34, 0x70, 0x41, 0x1a, 0x29,
+ 0x42, 0x00, 0x58, 0xc1, 0x1a, 0x35, 0xc5, 0xdc, 0xd1, 0x08, 0x51, 0xc9,
+ 0xc9, 0x31, 0x98, 0x08, 0x51, 0xc1, 0xc7, 0x40, 0xe5, 0x08, 0x50, 0x79,
+ 0xc8, 0x14, 0x38, 0x08, 0x50, 0x70, 0x18, 0xc1, 0x1a, 0x41, 0x16, 0xc1,
+ 0x1a, 0x4b, 0xc2, 0x00, 0xdb, 0x08, 0x51, 0x59, 0xc2, 0x00, 0x39, 0x08,
+ 0x51, 0x51, 0xc2, 0x19, 0x2c, 0x08, 0x51, 0x49, 0xc2, 0x01, 0xc3, 0x08,
+ 0x51, 0x41, 0x04, 0xc1, 0x1a, 0x59, 0x12, 0xc1, 0x1a, 0x63, 0x10, 0xc1,
+ 0x1a, 0x6d, 0x06, 0xc1, 0x1a, 0x7d, 0xc2, 0x25, 0x3b, 0x08, 0x50, 0xb9,
+ 0x05, 0xc1, 0x1a, 0x8b, 0x09, 0xc1, 0x1a, 0x95, 0x0d, 0xc1, 0x1a, 0x9f,
+ 0x83, 0x08, 0x50, 0x01, 0x15, 0xc1, 0x1a, 0xaf, 0xc2, 0x02, 0x1c, 0x08,
+ 0x51, 0x81, 0xc2, 0x00, 0xd0, 0x08, 0x51, 0x88, 0xc4, 0x00, 0x87, 0x0f,
+ 0xb0, 0xbb, 0x01, 0x1a, 0xbf, 0xd9, 0x20, 0x8f, 0x0f, 0xb1, 0xe8, 0xc9,
+ 0xb0, 0x11, 0x0f, 0xd4, 0x31, 0xca, 0xa6, 0x3e, 0x0f, 0xd5, 0xd0, 0x46,
+ 0xcc, 0x4d, 0xc1, 0x1a, 0xc5, 0xc4, 0x00, 0x87, 0x0f, 0xb0, 0x80, 0x15,
+ 0xc1, 0x1a, 0xfc, 0x47, 0x02, 0x0e, 0xc1, 0x1b, 0x06, 0xce, 0x6c, 0x52,
+ 0x08, 0xa2, 0xe9, 0xd0, 0x5f, 0x92, 0x08, 0xa2, 0xd9, 0x06, 0xc1, 0x1b,
+ 0x6d, 0xd1, 0x50, 0xce, 0x08, 0xa2, 0x79, 0xca, 0x93, 0x30, 0x08, 0xa2,
+ 0x71, 0xc5, 0x0a, 0x8a, 0x08, 0xa2, 0x69, 0xc2, 0x00, 0x7a, 0x08, 0xa2,
+ 0x49, 0x4b, 0x6f, 0xc7, 0x41, 0x1b, 0x7f, 0xcb, 0x99, 0xe4, 0x01, 0x05,
+ 0x51, 0x48, 0xb6, 0x82, 0xc1, 0x1b, 0x9f, 0x45, 0x15, 0xdb, 0xc1, 0x1b,
+ 0xbe, 0xc4, 0x02, 0x6d, 0x00, 0x00, 0x50, 0xc4, 0x00, 0x49, 0x01, 0x5c,
+ 0x91, 0xc5, 0x00, 0x2c, 0x01, 0x5c, 0x98, 0x48, 0x0b, 0x09, 0xc1, 0x1b,
+ 0xca, 0x48, 0x20, 0x7c, 0xc1, 0x1b, 0xfa, 0xcb, 0x49, 0x4a, 0x00, 0x00,
+ 0xa9, 0x49, 0x1e, 0x56, 0x41, 0x1c, 0x18, 0xe0, 0x05, 0x87, 0x01, 0x15,
+ 0x78, 0x43, 0x07, 0x28, 0xc1, 0x1c, 0x2a, 0x42, 0x02, 0xaf, 0x41, 0x1c,
+ 0x36, 0xc9, 0x00, 0xca, 0x01, 0x13, 0xc9, 0x43, 0x00, 0xe2, 0x41, 0x1c,
+ 0x3c, 0xcc, 0x07, 0xc7, 0x01, 0x13, 0xc1, 0x43, 0x00, 0xe2, 0x41, 0x1c,
+ 0x48, 0x4b, 0x6f, 0xc7, 0xc1, 0x1c, 0x54, 0xca, 0x9d, 0x56, 0x08, 0xcf,
+ 0x19, 0x45, 0x00, 0xba, 0xc1, 0x1c, 0x7d, 0x47, 0x02, 0x0e, 0x41, 0x1c,
+ 0x8d, 0x47, 0x34, 0x2f, 0xc1, 0x1c, 0xf0, 0xd5, 0x34, 0x25, 0x08, 0x45,
+ 0x59, 0x47, 0x02, 0x0e, 0x41, 0x1d, 0x01, 0xd4, 0x3a, 0x48, 0x0f, 0xb5,
+ 0x89, 0xcf, 0x67, 0x83, 0x01, 0x00, 0x88, 0x00, 0xc1, 0x1d, 0x6a, 0xd6,
+ 0x2e, 0x12, 0x0f, 0xb7, 0x50, 0xcc, 0x23, 0x9f, 0x01, 0x15, 0xa0, 0xe0,
+ 0x02, 0xc7, 0x0f, 0xaa, 0x21, 0x0e, 0xc1, 0x1d, 0x7c, 0x4b, 0x2c, 0x44,
+ 0x41, 0x1d, 0x88, 0xca, 0xa7, 0xc4, 0x01, 0x1b, 0xd9, 0xd2, 0x4c, 0x01,
+ 0x01, 0x17, 0x53, 0x01, 0x1d, 0x8e, 0x15, 0xc1, 0x1d, 0x94, 0x16, 0xc1,
+ 0x1d, 0xa0, 0x03, 0xc1, 0x1d, 0xac, 0xcc, 0x07, 0xc7, 0x01, 0x13, 0x79,
+ 0xc9, 0x00, 0xca, 0x01, 0x13, 0x71, 0x43, 0x00, 0xe2, 0xc1, 0x1d, 0xc4,
+ 0xcc, 0x89, 0x0d, 0x01, 0x13, 0x11, 0xcb, 0x6b, 0x83, 0x01, 0x11, 0x30,
+ 0x43, 0x00, 0xaf, 0xc1, 0x1d, 0xd0, 0xc4, 0xe3, 0x33, 0x0f, 0xa6, 0x9a,
+ 0x01, 0x1d, 0xda, 0xc5, 0x00, 0xb9, 0x0f, 0xb5, 0x58, 0xc5, 0xd5, 0x1a,
+ 0x0f, 0xab, 0x91, 0xca, 0xa2, 0x56, 0x0f, 0xb5, 0xb8, 0xc9, 0xa9, 0xa2,
+ 0x00, 0x04, 0x19, 0xc7, 0xc9, 0x50, 0x0f, 0xb5, 0x98, 0x99, 0x0f, 0x09,
+ 0x61, 0x87, 0x0f, 0x09, 0x53, 0x01, 0x1d, 0xe0, 0x91, 0x0f, 0x09, 0x43,
+ 0x01, 0x1d, 0xe4, 0x97, 0x0f, 0x09, 0x39, 0x8b, 0x0f, 0x09, 0x31, 0x83,
+ 0x0f, 0x09, 0x23, 0x01, 0x1d, 0xe8, 0x14, 0xc1, 0x1d, 0xec, 0xc2, 0x01,
+ 0x30, 0x0f, 0x09, 0x11, 0x12, 0xc1, 0x1d, 0xf6, 0x0f, 0xc1, 0x1e, 0x00,
+ 0xc2, 0x00, 0xd0, 0x0f, 0x08, 0x23, 0x01, 0x1e, 0x0a, 0x10, 0xc1, 0x1e,
+ 0x0e, 0x06, 0xc1, 0x1e, 0x38, 0x1a, 0xc1, 0x1e, 0x42, 0xc2, 0x19, 0x2c,
+ 0x0f, 0x08, 0xc1, 0xc2, 0x0f, 0x9a, 0x0f, 0x08, 0xb9, 0xc2, 0x00, 0x87,
+ 0x0f, 0x08, 0xa9, 0x16, 0xc1, 0x1e, 0x4c, 0xc2, 0x02, 0x41, 0x0f, 0x08,
+ 0x91, 0xc2, 0x02, 0x2b, 0x0f, 0x08, 0x71, 0xc2, 0x02, 0x1c, 0x0f, 0x08,
+ 0x59, 0xc2, 0x0d, 0xf6, 0x0f, 0x08, 0x51, 0xc2, 0x00, 0xdb, 0x0f, 0x08,
+ 0x49, 0xc2, 0x00, 0x64, 0x0f, 0x08, 0x40, 0xc4, 0x18, 0x10, 0x0f, 0x0a,
+ 0x39, 0xc2, 0x22, 0xcc, 0x0f, 0x0a, 0x30, 0xc3, 0x0d, 0x14, 0x0f, 0x0a,
+ 0x29, 0xc3, 0x09, 0x9e, 0x0f, 0x0a, 0x20, 0xc4, 0x02, 0xde, 0x0f, 0x0a,
+ 0x19, 0xc2, 0x02, 0xa0, 0x0f, 0x0a, 0x10, 0xc5, 0xd7, 0xdb, 0x0f, 0x09,
+ 0xe1, 0x44, 0x15, 0xec, 0x41, 0x1e, 0x5c, 0x1f, 0xc1, 0x1e, 0x7a, 0x1e,
+ 0x41, 0x1e, 0xba, 0x16, 0xc1, 0x1e, 0xde, 0xd2, 0x4b, 0x5f, 0x01, 0x24,
+ 0xd1, 0x07, 0xc1, 0x1e, 0xf0, 0x15, 0xc1, 0x1e, 0xfc, 0x08, 0x41, 0x1f,
+ 0x06, 0xc4, 0x25, 0xd5, 0x01, 0x50, 0x21, 0xc3, 0x02, 0xa3, 0x01, 0x50,
+ 0x18, 0xce, 0x6d, 0x24, 0x01, 0x50, 0x31, 0xd5, 0x33, 0x68, 0x01, 0x50,
+ 0x28, 0xce, 0x72, 0xd4, 0x01, 0x50, 0x11, 0xcd, 0x7d, 0x51, 0x01, 0x50,
+ 0x09, 0xcc, 0x83, 0x3d, 0x01, 0x50, 0x00, 0xc4, 0x26, 0x78, 0x00, 0x3e,
+ 0x49, 0xc5, 0x06, 0xdb, 0x00, 0x3e, 0x41, 0x15, 0xc1, 0x1f, 0x12, 0x08,
+ 0xc1, 0x1f, 0x1e, 0x16, 0xc1, 0x1f, 0x2a, 0xc3, 0x05, 0x14, 0x00, 0x3e,
+ 0x09, 0xc4, 0x15, 0xe7, 0x00, 0x3e, 0x00, 0x0c, 0xc1, 0x1f, 0x36, 0x90,
+ 0x00, 0x3e, 0x93, 0x01, 0x1f, 0x40, 0xc2, 0x19, 0x2c, 0x00, 0x3f, 0x31,
+ 0xc2, 0x01, 0x4a, 0x00, 0x3f, 0x29, 0xc2, 0x00, 0xd0, 0x00, 0x3f, 0x21,
+ 0xc2, 0x01, 0xc3, 0x00, 0x3f, 0x09, 0xc2, 0x00, 0xdb, 0x00, 0x3e, 0xf9,
+ 0xc2, 0x02, 0x2b, 0x00, 0x3e, 0xf1, 0xc2, 0x00, 0x87, 0x00, 0x3e, 0xe9,
+ 0xc3, 0x9f, 0x2c, 0x00, 0x3e, 0xe1, 0xc2, 0x0d, 0xf6, 0x00, 0x3e, 0xd9,
+ 0x14, 0xc1, 0x1f, 0x50, 0xc2, 0x0e, 0x9a, 0x00, 0x3e, 0xc3, 0x01, 0x1f,
+ 0x5a, 0xc3, 0x1c, 0x63, 0x00, 0x3e, 0xb9, 0xc2, 0x01, 0x6f, 0x00, 0x3e,
+ 0xa9, 0xc2, 0x00, 0xb0, 0x00, 0x3e, 0xa1, 0xc2, 0x01, 0x5d, 0x00, 0x3e,
+ 0x99, 0x91, 0x00, 0x3e, 0x83, 0x01, 0x1f, 0x60, 0x97, 0x00, 0x3e, 0x71,
+ 0x87, 0x00, 0x3e, 0x6b, 0x01, 0x1f, 0x64, 0x8b, 0x00, 0x3e, 0x61, 0x83,
+ 0x00, 0x3e, 0x50, 0xd0, 0x57, 0xd2, 0x00, 0x3f, 0x99, 0xd1, 0x56, 0xc8,
+ 0x00, 0x3f, 0x91, 0x45, 0x2c, 0x86, 0xc1, 0x1f, 0x68, 0x46, 0x2e, 0xee,
+ 0x41, 0x1f, 0x80, 0xc6, 0x52, 0xa4, 0x0f, 0xd3, 0x59, 0xc5, 0xd8, 0xda,
+ 0x0f, 0xd3, 0x60, 0xc6, 0x52, 0xa4, 0x0f, 0xd3, 0x21, 0xc5, 0xd8, 0xda,
+ 0x0f, 0xd3, 0x28, 0xc8, 0xbd, 0x32, 0x0f, 0xcd, 0x81, 0xca, 0xa5, 0xd0,
+ 0x0f, 0xcd, 0x89, 0xc4, 0xe1, 0xeb, 0x0f, 0xcd, 0x91, 0xca, 0xa6, 0xb6,
+ 0x0f, 0xcd, 0x98, 0xa3, 0x0f, 0x9f, 0xf9, 0xa2, 0x0f, 0x9f, 0xf1, 0xa1,
+ 0x0f, 0x9f, 0xe9, 0xa0, 0x0f, 0x9f, 0xe1, 0xc3, 0xe5, 0xfc, 0x0f, 0x9f,
+ 0xd8, 0xc3, 0x0e, 0xa7, 0x01, 0x10, 0x2b, 0x01, 0x1f, 0x92, 0xc4, 0x9b,
+ 0xb8, 0x0f, 0xae, 0x63, 0x01, 0x1f, 0x98, 0xc8, 0xb9, 0xb2, 0x0f, 0xae,
+ 0x59, 0x10, 0x41, 0x1f, 0x9c, 0x42, 0x09, 0xda, 0x41, 0x1f, 0xab, 0x43,
+ 0x00, 0x55, 0xc1, 0x1f, 0xb7, 0xd0, 0x5e, 0x92, 0x0f, 0xcd, 0xd8, 0xca,
+ 0xa5, 0x58, 0x09, 0xa1, 0xc1, 0x1d, 0x41, 0x1f, 0xc3, 0xcc, 0x82, 0x41,
+ 0x09, 0xa1, 0xb9, 0x42, 0xcf, 0x41, 0x41, 0x1f, 0xd3, 0xcd, 0x76, 0x42,
+ 0x09, 0xa1, 0xb1, 0x1d, 0x41, 0x1f, 0xfa, 0x49, 0xaf, 0xb7, 0xc1, 0x20,
+ 0x12, 0x1d, 0x41, 0x20, 0x1e, 0xd0, 0x59, 0xc2, 0x09, 0xa1, 0x89, 0x42,
+ 0xcf, 0x41, 0x41, 0x20, 0x26, 0xce, 0x70, 0x6c, 0x09, 0xa1, 0x81, 0x1d,
+ 0x41, 0x20, 0x49, 0x42, 0xd1, 0x3e, 0xc1, 0x20, 0x62, 0x1d, 0x41, 0x20,
+ 0x72, 0x1e, 0xc1, 0x20, 0x94, 0x1d, 0x41, 0x20, 0xb6, 0xa5, 0x09, 0x9f,
+ 0x19, 0xa4, 0x09, 0x9f, 0x11, 0xa3, 0x09, 0x9f, 0x09, 0xa2, 0x09, 0x9f,
+ 0x01, 0xa1, 0x09, 0x9e, 0xf9, 0xa0, 0x09, 0x9e, 0xf1, 0x9f, 0x09, 0x9e,
+ 0xe9, 0x9e, 0x09, 0x9e, 0xda, 0x01, 0x20, 0xe6, 0xa5, 0x09, 0x9e, 0xcb,
+ 0x01, 0x20, 0xea, 0xa4, 0x09, 0x9e, 0xc1, 0xa3, 0x09, 0x9e, 0xb3, 0x01,
+ 0x20, 0xee, 0xa2, 0x09, 0x9e, 0xa9, 0xa1, 0x09, 0x9e, 0x93, 0x01, 0x20,
+ 0xf2, 0xa0, 0x09, 0x9e, 0x89, 0x9f, 0x09, 0x9e, 0x81, 0x9e, 0x09, 0x9e,
+ 0x78, 0x1f, 0xc1, 0x20, 0xfa, 0x1e, 0xc1, 0x21, 0x15, 0x1d, 0x41, 0x21,
+ 0x49, 0x21, 0xc1, 0x21, 0x73, 0x20, 0xc1, 0x21, 0x7f, 0x1f, 0xc1, 0x21,
+ 0xaa, 0x1e, 0xc1, 0x21, 0xd8, 0x1d, 0x41, 0x22, 0x00, 0x20, 0xc1, 0x22,
+ 0x27, 0x1f, 0xc1, 0x22, 0x49, 0x1e, 0xc1, 0x22, 0x71, 0x1d, 0x41, 0x22,
+ 0x9f, 0x21, 0xc1, 0x22, 0xcf, 0x20, 0xc1, 0x22, 0xeb, 0x1f, 0xc1, 0x23,
+ 0x16, 0x1e, 0xc1, 0x23, 0x41, 0x1d, 0x41, 0x23, 0x6f, 0x1f, 0xc1, 0x23,
+ 0x99, 0x1e, 0xc1, 0x23, 0xc1, 0x1d, 0x41, 0x23, 0xef, 0xa4, 0x09, 0x95,
+ 0x71, 0xa3, 0x09, 0x95, 0x69, 0xa2, 0x09, 0x95, 0x61, 0xa1, 0x09, 0x95,
+ 0x59, 0xa0, 0x09, 0x95, 0x51, 0x9f, 0x09, 0x95, 0x49, 0x9e, 0x09, 0x95,
+ 0x40, 0x1e, 0xc1, 0x24, 0x19, 0x1d, 0x41, 0x24, 0x21, 0x42, 0xdd, 0x2f,
+ 0xc1, 0x24, 0x4b, 0x42, 0x8c, 0xff, 0xc1, 0x24, 0x57, 0x1d, 0x41, 0x24,
+ 0x65, 0x21, 0xc1, 0x24, 0x79, 0x20, 0xc1, 0x24, 0x90, 0x1f, 0xc1, 0x24,
+ 0xbe, 0x1e, 0xc1, 0x24, 0xef, 0x1d, 0x41, 0x25, 0x26, 0xa5, 0x09, 0x8d,
+ 0x61, 0xa4, 0x09, 0x8d, 0x59, 0xa3, 0x09, 0x8d, 0x4b, 0x01, 0x25, 0x50,
+ 0xa2, 0x09, 0x8d, 0x41, 0xa1, 0x09, 0x8d, 0x39, 0xa0, 0x09, 0x8d, 0x31,
+ 0x9f, 0x09, 0x8d, 0x23, 0x01, 0x25, 0x54, 0x9e, 0x09, 0x8d, 0x18, 0xa5,
+ 0x09, 0x8d, 0x11, 0xa4, 0x09, 0x8d, 0x09, 0xa3, 0x09, 0x8d, 0x01, 0xa2,
+ 0x09, 0x8c, 0xf9, 0xa1, 0x09, 0x8c, 0xf1, 0xa0, 0x09, 0x8c, 0xe9, 0x9f,
+ 0x09, 0x8c, 0xe1, 0x9e, 0x09, 0x8c, 0xd8, 0x22, 0xc1, 0x25, 0x58, 0x21,
+ 0xc1, 0x25, 0x6c, 0x20, 0xc1, 0x25, 0x9a, 0x1f, 0xc1, 0x25, 0xc8, 0x1e,
+ 0xc1, 0x25, 0xf6, 0x1d, 0x41, 0x26, 0x21, 0x22, 0xc1, 0x26, 0x4b, 0x21,
+ 0xc1, 0x26, 0x5e, 0x20, 0xc1, 0x26, 0x8f, 0x1f, 0xc1, 0x26, 0xc0, 0x1e,
+ 0xc1, 0x26, 0xeb, 0x1d, 0x41, 0x27, 0x16, 0x23, 0xc1, 0x27, 0x3d, 0x22,
+ 0xc1, 0x27, 0x60, 0x21, 0xc1, 0x27, 0x91, 0x20, 0xc1, 0x27, 0xbf, 0x1f,
+ 0xc1, 0x27, 0xed, 0x1e, 0xc1, 0x28, 0x18, 0x1d, 0x41, 0x28, 0x40, 0x1f,
+ 0xc1, 0x28, 0x67, 0x1e, 0xc1, 0x28, 0x7b, 0x1d, 0x41, 0x28, 0xa6, 0x4c,
+ 0x84, 0x69, 0xc1, 0x28, 0xcd, 0xd2, 0x48, 0x35, 0x0f, 0xa3, 0xe8, 0xc4,
+ 0x26, 0x78, 0x00, 0x37, 0xc9, 0xc5, 0x06, 0xdb, 0x00, 0x37, 0xc1, 0x15,
+ 0xc1, 0x28, 0xe3, 0x08, 0xc1, 0x28, 0xef, 0x16, 0xc1, 0x28, 0xfb, 0xc3,
+ 0x05, 0x14, 0x00, 0x37, 0x89, 0xc4, 0x15, 0xe7, 0x00, 0x37, 0x80, 0xcd,
+ 0x2c, 0xb2, 0x01, 0x02, 0x49, 0xc4, 0x01, 0xc3, 0x00, 0x01, 0x08, 0x09,
+ 0xc1, 0x29, 0x07, 0x0a, 0xc1, 0x29, 0x39, 0x04, 0xc1, 0x29, 0x5a, 0x05,
+ 0xc1, 0x29, 0x7f, 0x06, 0xc1, 0x29, 0xaa, 0x16, 0xc1, 0x29, 0xd5, 0x0e,
+ 0xc1, 0x2a, 0x0a, 0x0f, 0xc1, 0x2a, 0x2d, 0x15, 0xc1, 0x2a, 0x54, 0x14,
+ 0xc1, 0x2a, 0x83, 0x13, 0xc1, 0x2a, 0xac, 0x18, 0xc1, 0x2a, 0xd5, 0x1a,
+ 0xc1, 0x2a, 0xf5, 0x10, 0xc1, 0x2b, 0x1a, 0x0d, 0xc1, 0x2b, 0x41, 0x19,
+ 0xc1, 0x2b, 0x6a, 0x12, 0xc1, 0x2b, 0x87, 0x1c, 0xc1, 0x2b, 0xac, 0x1b,
+ 0xc1, 0x2b, 0xd7, 0x0c, 0xc1, 0x2b, 0xf4, 0x08, 0x41, 0x2c, 0x17, 0xca,
+ 0x45, 0x8f, 0x00, 0x9b, 0x01, 0xc7, 0x52, 0x01, 0x00, 0x9b, 0x20, 0x47,
+ 0x1d, 0xd4, 0xc1, 0x2c, 0x3b, 0xc2, 0x01, 0xc3, 0x00, 0x9b, 0x18, 0xc2,
+ 0x02, 0xa0, 0x00, 0x9b, 0x51, 0xc4, 0x02, 0xde, 0x00, 0x9b, 0x58, 0xc3,
+ 0x09, 0x9e, 0x00, 0x9b, 0x61, 0xc3, 0x0d, 0x14, 0x00, 0x9b, 0x68, 0xc2,
+ 0x22, 0xcc, 0x00, 0x9b, 0x71, 0xc4, 0x18, 0x10, 0x00, 0x9b, 0x78, 0xc2,
+ 0x00, 0xc4, 0x00, 0x9b, 0x93, 0x01, 0x2c, 0x47, 0xc5, 0x28, 0xee, 0x00,
+ 0x9b, 0x99, 0xc5, 0x0d, 0x0d, 0x00, 0x9b, 0xa0, 0xc4, 0x4a, 0x2e, 0x00,
+ 0x9b, 0xa9, 0xc4, 0x45, 0x6a, 0x00, 0x9b, 0xb0, 0xc4, 0xd2, 0x1d, 0x00,
+ 0x9b, 0xb9, 0xc6, 0x18, 0x10, 0x00, 0x9b, 0xc0, 0xc4, 0xb4, 0x50, 0x00,
+ 0x9c, 0x8b, 0x01, 0x2c, 0x4d, 0xc4, 0xe1, 0x33, 0x00, 0x9c, 0xa0, 0xc4,
+ 0x59, 0x96, 0x00, 0x9c, 0xa9, 0xc3, 0x34, 0x38, 0x00, 0x9c, 0xc8, 0x00,
+ 0x41, 0x2c, 0x53, 0xcf, 0x44, 0x5a, 0x01, 0x1f, 0x39, 0x00, 0x41, 0x2c,
+ 0x5f, 0x16, 0xc1, 0x2c, 0x77, 0x15, 0xc1, 0x2c, 0x83, 0xc4, 0x5d, 0xe2,
+ 0x08, 0x7f, 0x99, 0xc4, 0xb9, 0x7e, 0x08, 0x7f, 0x91, 0xc2, 0x00, 0x67,
+ 0x08, 0x7f, 0x81, 0xc3, 0x20, 0x18, 0x08, 0x7f, 0x69, 0xc3, 0x00, 0x4e,
+ 0x08, 0x7f, 0x61, 0xc6, 0xcf, 0xd7, 0x08, 0x7f, 0x59, 0xc4, 0xe0, 0xe7,
+ 0x08, 0x7f, 0x51, 0xc4, 0x4a, 0xb9, 0x08, 0x7f, 0x49, 0xc2, 0x01, 0x7f,
+ 0x08, 0x7f, 0x23, 0x01, 0x2c, 0x8d, 0xc5, 0x4a, 0xb3, 0x08, 0x7f, 0x31,
+ 0xc3, 0x7e, 0x89, 0x08, 0x7f, 0x29, 0xc6, 0x40, 0x9a, 0x08, 0x7f, 0x19,
+ 0xc5, 0x9c, 0xa2, 0x08, 0x7f, 0x11, 0xc4, 0xe3, 0x27, 0x08, 0x7f, 0x09,
+ 0x03, 0x41, 0x2c, 0x93, 0x87, 0x08, 0x28, 0x11, 0xc2, 0x01, 0x7f, 0x08,
+ 0x28, 0x18, 0x87, 0x08, 0x28, 0x21, 0xc2, 0x01, 0x7f, 0x08, 0x28, 0x30,
+ 0xc2, 0x00, 0x06, 0x08, 0x28, 0x29, 0x87, 0x08, 0x28, 0x99, 0x83, 0x08,
+ 0x28, 0xa1, 0xc2, 0x1c, 0x52, 0x08, 0x28, 0xa8, 0x8b, 0x08, 0x28, 0x38,
+ 0x87, 0x08, 0x28, 0x51, 0xc2, 0x1c, 0x52, 0x08, 0x28, 0x59, 0x0a, 0x41,
+ 0x2c, 0x9f, 0x87, 0x08, 0x28, 0x79, 0xc2, 0x01, 0x7f, 0x08, 0x29, 0x38,
+ 0x87, 0x08, 0x28, 0x81, 0xc2, 0x00, 0x49, 0x08, 0x28, 0x88, 0x87, 0x08,
+ 0x28, 0xc9, 0xc2, 0x01, 0x19, 0x08, 0x28, 0xd0, 0x87, 0x08, 0x28, 0xd9,
+ 0xc2, 0x01, 0x7f, 0x08, 0x28, 0xe0, 0x87, 0x08, 0x28, 0xe9, 0xc2, 0x01,
+ 0x7f, 0x08, 0x28, 0xf0, 0x87, 0x08, 0x29, 0x19, 0xc2, 0x01, 0x7f, 0x08,
+ 0x29, 0x20, 0xe0, 0x0a, 0xe7, 0x01, 0x3a, 0x50, 0xdf, 0x0c, 0x46, 0x01,
+ 0x3a, 0x09, 0x47, 0x0a, 0xaa, 0x41, 0x2c, 0xa9, 0xc9, 0xad, 0xe3, 0x0f,
+ 0xac, 0x21, 0xd5, 0x31, 0xd9, 0x0f, 0xa7, 0x48, 0x43, 0x05, 0xc0, 0xc1,
+ 0x2c, 0xbb, 0xc6, 0x01, 0xdb, 0x00, 0x00, 0xc9, 0x16, 0xc1, 0x2c, 0xc7,
+ 0xc4, 0x02, 0x6d, 0x00, 0x00, 0x51, 0xcd, 0x7e, 0x48, 0x00, 0x04, 0x39,
+ 0xcc, 0x87, 0xc9, 0x00, 0x04, 0xb8, 0xc6, 0x02, 0xd1, 0x01, 0x4f, 0x99,
+ 0xc7, 0x3a, 0x19, 0x01, 0x4f, 0x89, 0xc6, 0x0b, 0x09, 0x01, 0x4f, 0x78,
+ 0xc6, 0x02, 0xd1, 0x01, 0x4f, 0x91, 0xc7, 0x3a, 0x19, 0x01, 0x4f, 0x81,
+ 0xc6, 0x0b, 0x09, 0x01, 0x4f, 0x70, 0x43, 0x01, 0x7b, 0xc1, 0x2c, 0xd6,
+ 0xcf, 0x6b, 0x7f, 0x01, 0x16, 0xa8, 0xc5, 0x33, 0x24, 0x01, 0x12, 0xa9,
+ 0xc4, 0x00, 0xba, 0x00, 0x01, 0xeb, 0x01, 0x2c, 0xe2, 0xcd, 0x7c, 0x33,
+ 0x01, 0x53, 0x70, 0xc2, 0x00, 0xf1, 0x01, 0x12, 0x69, 0xd4, 0x3b, 0x74,
+ 0x01, 0x53, 0xc0, 0xcb, 0x95, 0x6c, 0x0f, 0x9f, 0x21, 0xc6, 0xcd, 0x2b,
+ 0x0f, 0x9f, 0x80, 0xc4, 0x26, 0x78, 0x08, 0xed, 0x49, 0xc5, 0x06, 0xdb,
+ 0x08, 0xed, 0x41, 0x15, 0xc1, 0x2c, 0xe6, 0x08, 0xc1, 0x2c, 0xf2, 0x16,
+ 0xc1, 0x2c, 0xfe, 0xc3, 0x05, 0x14, 0x08, 0xed, 0x09, 0xc4, 0x15, 0xe7,
+ 0x08, 0xed, 0x00, 0xc5, 0x1e, 0x96, 0x08, 0xec, 0xb9, 0x4a, 0x6f, 0xc8,
+ 0x41, 0x2d, 0x0a, 0xc7, 0x40, 0xe5, 0x08, 0xec, 0xb1, 0xc8, 0x14, 0x38,
+ 0x08, 0xec, 0xa8, 0xc2, 0x0d, 0xf6, 0x08, 0xec, 0x49, 0xc2, 0x00, 0x39,
+ 0x08, 0xec, 0x41, 0xc2, 0x00, 0xd0, 0x08, 0xec, 0x39, 0x12, 0xc1, 0x2d,
+ 0x28, 0x10, 0xc1, 0x2d, 0x32, 0x06, 0xc1, 0x2d, 0x3c, 0x0c, 0xc1, 0x2d,
+ 0x4a, 0x0e, 0xc1, 0x2d, 0x54, 0x16, 0xc1, 0x2d, 0x5e, 0x05, 0xc1, 0x2d,
+ 0x6c, 0x09, 0xc1, 0x2d, 0x76, 0x0d, 0xc1, 0x2d, 0x80, 0xc2, 0x01, 0xc3,
+ 0x08, 0xeb, 0x81, 0x04, 0xc1, 0x2d, 0x8a, 0xc2, 0x02, 0x41, 0x08, 0xeb,
+ 0x69, 0xc2, 0x19, 0x2c, 0x08, 0xeb, 0x61, 0x83, 0x08, 0xeb, 0x03, 0x01,
+ 0x2d, 0x94, 0xc2, 0x01, 0x24, 0x08, 0xeb, 0x51, 0xc2, 0x02, 0xe0, 0x08,
+ 0xeb, 0x39, 0x97, 0x08, 0xeb, 0x23, 0x01, 0x2d, 0xa0, 0x8b, 0x08, 0xeb,
+ 0x12, 0x01, 0x2d, 0xa4, 0xca, 0xa6, 0x2a, 0x00, 0x50, 0x09, 0xc5, 0x60,
+ 0x30, 0x00, 0x50, 0x11, 0x42, 0x07, 0xb2, 0xc1, 0x2d, 0xa8, 0xc5, 0x33,
+ 0x5d, 0x00, 0x51, 0xe1, 0xc5, 0xd9, 0x5c, 0x00, 0x52, 0x89, 0xc6, 0xd3,
+ 0x85, 0x00, 0x53, 0xa8, 0x83, 0x00, 0x50, 0x2b, 0x01, 0x2d, 0xb4, 0x8b,
+ 0x00, 0x50, 0x3b, 0x01, 0x2d, 0xc0, 0x97, 0x00, 0x50, 0x4b, 0x01, 0x2d,
+ 0xc4, 0xc2, 0x02, 0xe0, 0x00, 0x50, 0x79, 0xc2, 0x01, 0x24, 0x00, 0x50,
+ 0x99, 0x0d, 0xc1, 0x2d, 0xc8, 0x09, 0xc1, 0x2d, 0xd0, 0x10, 0xc1, 0x2d,
+ 0xd8, 0x05, 0xc1, 0x2d, 0xee, 0x0c, 0xc1, 0x2d, 0xf8, 0x16, 0xc1, 0x2e,
+ 0x02, 0x06, 0xc1, 0x2e, 0x10, 0x12, 0xc1, 0x2e, 0x1e, 0x04, 0xc1, 0x2e,
+ 0x28, 0xc2, 0x01, 0xc3, 0x00, 0x51, 0x71, 0xc2, 0x19, 0x2c, 0x00, 0x51,
+ 0x79, 0x14, 0xc1, 0x2e, 0x32, 0x0e, 0xc1, 0x2e, 0x3c, 0xc2, 0x02, 0x41,
+ 0x00, 0x51, 0xa9, 0x15, 0xc1, 0x2e, 0x46, 0xc2, 0x00, 0xd0, 0x00, 0x51,
+ 0xc9, 0xc2, 0x02, 0x1c, 0x00, 0x52, 0xd9, 0xc2, 0x00, 0x87, 0x00, 0x52,
+ 0xf0, 0x03, 0xc1, 0x2e, 0x50, 0x8b, 0x00, 0x51, 0xfb, 0x01, 0x2e, 0x5c,
+ 0x97, 0x00, 0x52, 0x0b, 0x01, 0x2e, 0x60, 0xc2, 0x02, 0xe0, 0x00, 0x52,
+ 0x39, 0xc2, 0x01, 0x24, 0x00, 0x52, 0x58, 0xc4, 0x15, 0xe7, 0x00, 0x53,
+ 0x31, 0xc3, 0x05, 0x14, 0x00, 0x53, 0x39, 0x16, 0xc1, 0x2e, 0x64, 0x08,
+ 0xc1, 0x2e, 0x70, 0x15, 0xc1, 0x2e, 0x7c, 0xc5, 0x06, 0xdb, 0x00, 0x53,
+ 0x71, 0xc4, 0x26, 0x78, 0x00, 0x53, 0x78, 0xc4, 0xe3, 0x57, 0x00, 0x53,
+ 0x89, 0xd0, 0x50, 0xcf, 0x00, 0x53, 0xb0, 0x05, 0xc1, 0x2e, 0x88, 0x03,
+ 0xc1, 0x2e, 0x94, 0x42, 0x07, 0xb2, 0xc1, 0x2e, 0xa0, 0xc5, 0x33, 0x5d,
+ 0x00, 0x55, 0xe1, 0x15, 0xc1, 0x2e, 0xac, 0xc6, 0xd2, 0x2f, 0x00, 0x57,
+ 0xe1, 0x16, 0x41, 0x2e, 0xb8, 0x83, 0x00, 0x54, 0x2b, 0x01, 0x2e, 0xc4,
+ 0x8b, 0x00, 0x54, 0x3b, 0x01, 0x2e, 0xd0, 0x97, 0x00, 0x54, 0x4b, 0x01,
+ 0x2e, 0xd4, 0x18, 0xc1, 0x2e, 0xd8, 0x87, 0x00, 0x54, 0x79, 0x91, 0x00,
+ 0x54, 0x99, 0x0d, 0xc1, 0x2e, 0xe2, 0x09, 0xc1, 0x2e, 0xec, 0x10, 0xc1,
+ 0x2e, 0xf6, 0x05, 0xc1, 0x2f, 0x0c, 0x0c, 0xc1, 0x2f, 0x16, 0x16, 0xc1,
+ 0x2f, 0x20, 0x06, 0xc1, 0x2f, 0x2e, 0x12, 0xc1, 0x2f, 0x3c, 0x04, 0xc1,
+ 0x2f, 0x46, 0xc2, 0x01, 0xc3, 0x00, 0x55, 0x71, 0xc2, 0x19, 0x2c, 0x00,
+ 0x55, 0x79, 0xc2, 0x00, 0x39, 0x00, 0x55, 0x81, 0x0e, 0xc1, 0x2f, 0x50,
+ 0x15, 0xc1, 0x2f, 0x5a, 0xc2, 0x00, 0xd0, 0x00, 0x55, 0xc9, 0xc3, 0xb4,
+ 0xa6, 0x00, 0x57, 0xc8, 0x47, 0xc7, 0x7b, 0xc1, 0x2f, 0x6a, 0x45, 0x00,
+ 0xba, 0x41, 0x2f, 0x72, 0xc4, 0x15, 0xe7, 0x00, 0x57, 0x31, 0xc3, 0x05,
+ 0x14, 0x00, 0x57, 0x39, 0x16, 0xc1, 0x2f, 0x98, 0x08, 0xc1, 0x2f, 0xa4,
+ 0x15, 0xc1, 0x2f, 0xb0, 0xc5, 0x06, 0xdb, 0x00, 0x57, 0x71, 0xc4, 0x26,
+ 0x78, 0x00, 0x57, 0x78, 0xc5, 0xd7, 0xc2, 0x08, 0x19, 0xa1, 0xc3, 0x84,
+ 0xf8, 0x08, 0x19, 0x80, 0xc3, 0xb6, 0x96, 0x08, 0x19, 0xb1, 0xc4, 0xe0,
+ 0x9b, 0x08, 0x1a, 0x38, 0xc3, 0xdb, 0xd3, 0x08, 0x19, 0xb9, 0xc4, 0xde,
+ 0xa3, 0x08, 0x1a, 0x40, 0xc5, 0xd5, 0x8d, 0x08, 0x19, 0xc1, 0xc4, 0xe2,
+ 0xf3, 0x08, 0x1a, 0x20, 0xc5, 0xd6, 0x46, 0x08, 0x19, 0xe9, 0x43, 0x02,
+ 0x6e, 0x41, 0x2f, 0xbc, 0x42, 0x01, 0x12, 0xc1, 0x2f, 0xc8, 0x42, 0x00,
+ 0xbd, 0x41, 0x30, 0x32, 0x04, 0xc1, 0x30, 0x4a, 0xd5, 0x34, 0xe2, 0x01,
+ 0x16, 0xd9, 0x45, 0x00, 0x8c, 0xc1, 0x30, 0x56, 0x11, 0xc1, 0x30, 0x68,
+ 0x03, 0xc1, 0x30, 0x74, 0xc4, 0x00, 0xba, 0x00, 0x01, 0xf1, 0xcf, 0x69,
+ 0x18, 0x01, 0x55, 0x3a, 0x01, 0x30, 0x80, 0x4b, 0x6f, 0xc7, 0xc1, 0x30,
+ 0x86, 0x47, 0x02, 0x0e, 0xc1, 0x30, 0xaa, 0x45, 0x00, 0xba, 0xc1, 0x31,
+ 0x13, 0xce, 0x73, 0x0c, 0x08, 0x9a, 0xb9, 0xc2, 0x00, 0x7a, 0x08, 0x9a,
+ 0x80, 0xc4, 0x00, 0x87, 0x0f, 0xb0, 0x03, 0x01, 0x31, 0x2d, 0xda, 0x1d,
+ 0x3c, 0x0f, 0xb1, 0xc0, 0xc9, 0x1b, 0x0a, 0x00, 0x00, 0xe9, 0xc4, 0x01,
+ 0xc3, 0x01, 0x5e, 0x90, 0xc8, 0xbd, 0xd2, 0x01, 0x37, 0x71, 0xc7, 0xc5,
+ 0x9f, 0x01, 0x37, 0x68, 0x48, 0x07, 0x5a, 0xc1, 0x31, 0x33, 0xcb, 0x94,
+ 0x6f, 0x01, 0x11, 0xd0, 0x58, 0x22, 0x13, 0xc1, 0x31, 0x3f, 0x4f, 0x0b,
+ 0x17, 0xc1, 0x31, 0xc5, 0x47, 0x02, 0x0e, 0xc1, 0x32, 0x49, 0xd3, 0x45,
+ 0xf8, 0x00, 0x87, 0xd9, 0x4d, 0x29, 0xb9, 0x41, 0x32, 0xcf, 0xc8, 0x2f,
+ 0x03, 0x0f, 0xb6, 0x50, 0x4f, 0x0b, 0x17, 0xc1, 0x33, 0x53, 0x4d, 0x29,
+ 0xb9, 0x41, 0x33, 0xbc, 0xc4, 0xe3, 0x33, 0x0f, 0xa6, 0xc9, 0xc5, 0x1c,
+ 0xae, 0x0f, 0xcf, 0x08, 0x45, 0x00, 0xba, 0xc1, 0x34, 0x25, 0x47, 0x02,
+ 0x0e, 0xc1, 0x34, 0x41, 0x4b, 0x6f, 0xc7, 0xc1, 0x34, 0xa8, 0x03, 0xc1,
+ 0x34, 0xc8, 0x46, 0x09, 0x97, 0xc1, 0x34, 0xd4, 0xc6, 0xd2, 0xcb, 0x00,
+ 0x5b, 0x81, 0x49, 0x53, 0xa9, 0x41, 0x34, 0xf8, 0xc5, 0xd3, 0x5b, 0x0f,
+ 0x69, 0xe9, 0xc4, 0x01, 0xce, 0x0f, 0x69, 0xe0, 0x16, 0xc1, 0x35, 0x04,
+ 0x08, 0xc1, 0x35, 0x15, 0xc3, 0x05, 0x14, 0x0f, 0x68, 0x0b, 0x01, 0x35,
+ 0x1d, 0x15, 0xc1, 0x35, 0x21, 0xc5, 0x06, 0xdb, 0x0f, 0x68, 0x43, 0x01,
+ 0x35, 0x33, 0xc4, 0x26, 0x78, 0x0f, 0x68, 0x4a, 0x01, 0x35, 0x3e, 0x16,
+ 0xc1, 0x35, 0x4b, 0x08, 0xc1, 0x35, 0x63, 0x15, 0xc1, 0x35, 0x72, 0xc5,
+ 0x06, 0xdb, 0x0f, 0x69, 0xa9, 0xc4, 0x26, 0x78, 0x0f, 0x69, 0xb0, 0x44,
+ 0x05, 0x18, 0xc1, 0x35, 0x81, 0xcc, 0x86, 0xfd, 0x0f, 0xad, 0x78, 0x00,
+ 0xc1, 0x35, 0x8d, 0x02, 0x41, 0x35, 0xb5, 0xc5, 0xd7, 0xa4, 0x0f, 0xad,
+ 0xc0, 0x48, 0xb5, 0xda, 0xc1, 0x35, 0xc1, 0x47, 0xc9, 0x88, 0xc1, 0x35,
+ 0xcd, 0x42, 0x00, 0xfb, 0xc1, 0x35, 0xdf, 0x4a, 0x9d, 0xa6, 0xc1, 0x35,
+ 0xeb, 0x4e, 0x70, 0xf8, 0xc1, 0x35, 0xfd, 0x4e, 0x72, 0x3a, 0xc1, 0x36,
+ 0x09, 0xc3, 0x19, 0x2a, 0x0f, 0xae, 0xe9, 0x43, 0x00, 0x67, 0xc1, 0x36,
+ 0x15, 0x47, 0xc7, 0x4a, 0x41, 0x36, 0x1f, 0xc5, 0x29, 0xfc, 0x0f, 0xa3,
+ 0xa9, 0xc3, 0x12, 0xb8, 0x0f, 0xa3, 0xa1, 0xc5, 0xda, 0xa1, 0x0f, 0xce,
+ 0x98, 0x4b, 0x11, 0xe3, 0xc1, 0x36, 0x2b, 0xc7, 0xc2, 0x42, 0x00, 0xe3,
+ 0xe0, 0xd1, 0x4f, 0x36, 0x00, 0xe3, 0xd1, 0xc8, 0xb9, 0x9a, 0x00, 0xe3,
+ 0xc0, 0x11, 0xc1, 0x36, 0x37, 0x0e, 0xc1, 0x36, 0x49, 0x07, 0xc1, 0x36,
+ 0x60, 0x17, 0xc1, 0x36, 0x74, 0x0b, 0xc1, 0x36, 0x86, 0x03, 0x41, 0x36,
+ 0x98, 0xc4, 0x26, 0x78, 0x00, 0xe2, 0xc9, 0xc5, 0x06, 0xdb, 0x00, 0xe2,
+ 0xc1, 0x15, 0xc1, 0x36, 0xae, 0x08, 0xc1, 0x36, 0xba, 0x16, 0xc1, 0x36,
+ 0xc6, 0xc3, 0x05, 0x14, 0x00, 0xe2, 0x89, 0xc4, 0x15, 0xe7, 0x00, 0xe2,
+ 0x80, 0xca, 0x22, 0x51, 0x01, 0x39, 0x69, 0xcb, 0x8e, 0x08, 0x01, 0x38,
+ 0xf9, 0xcb, 0x58, 0xc7, 0x01, 0x38, 0xc9, 0xca, 0x28, 0xc3, 0x01, 0x34,
+ 0xe8, 0xcf, 0x63, 0x0f, 0x01, 0x22, 0x51, 0xc3, 0x02, 0x2c, 0x01, 0x22,
+ 0x40, 0xd6, 0x2f, 0x1a, 0x01, 0x22, 0x49, 0xc4, 0x68, 0xba, 0x01, 0x22,
+ 0x08, 0xd9, 0x1e, 0xcd, 0x01, 0x22, 0x31, 0xc6, 0xcb, 0x8d, 0x01, 0x22,
+ 0x29, 0xca, 0xa5, 0xda, 0x01, 0x22, 0x20, 0xc4, 0x03, 0xc8, 0x01, 0x4d,
+ 0x39, 0xc2, 0x02, 0xae, 0x01, 0x4d, 0x30, 0x45, 0x2a, 0xa0, 0x41, 0x36,
+ 0xd2, 0xc5, 0xd4, 0x84, 0x00, 0xb4, 0xd1, 0x42, 0x01, 0x9c, 0xc1, 0x36,
+ 0xde, 0x0b, 0xc1, 0x36, 0xf0, 0x17, 0xc1, 0x36, 0xfc, 0x11, 0xc1, 0x37,
+ 0x0c, 0xc4, 0xe2, 0x6b, 0x00, 0xb4, 0x81, 0xc4, 0xde, 0x7f, 0x00, 0xb4,
+ 0x79, 0x15, 0xc1, 0x37, 0x16, 0x10, 0xc1, 0x37, 0x22, 0xc4, 0xe0, 0x67,
+ 0x00, 0xb4, 0x61, 0xc4, 0xe4, 0x13, 0x00, 0xb4, 0x59, 0x05, 0xc1, 0x37,
+ 0x2e, 0xc5, 0xd6, 0xb4, 0x00, 0xb4, 0x41, 0xc4, 0xe3, 0x4f, 0x00, 0xb4,
+ 0x39, 0xc5, 0xd3, 0xf3, 0x00, 0xb4, 0x19, 0xc4, 0xe4, 0xcb, 0x00, 0xb4,
+ 0x11, 0xc5, 0xd7, 0x9a, 0x00, 0xb4, 0x08, 0x83, 0x08, 0x24, 0xb3, 0x01,
+ 0x37, 0x3a, 0xc2, 0x01, 0x5d, 0x08, 0x24, 0x09, 0xc2, 0x01, 0x6f, 0x08,
+ 0x24, 0x11, 0xc2, 0x25, 0x3b, 0x08, 0x24, 0x19, 0xc2, 0x8d, 0x8f, 0x08,
+ 0x24, 0x21, 0x0d, 0xc1, 0x37, 0x44, 0x06, 0xc1, 0x37, 0x50, 0xc2, 0x00,
+ 0x39, 0x08, 0x24, 0x39, 0x15, 0xc1, 0x37, 0x5c, 0xc4, 0xe3, 0x13, 0x08,
+ 0x24, 0x59, 0xc2, 0x01, 0x30, 0x08, 0x24, 0x61, 0xc2, 0x00, 0x87, 0x08,
+ 0x24, 0x69, 0xc4, 0xd8, 0x3a, 0x08, 0x24, 0x71, 0xc4, 0xe0, 0xd7, 0x08,
+ 0x24, 0x81, 0xc4, 0xe4, 0xbb, 0x08, 0x24, 0x89, 0xc4, 0xb9, 0x50, 0x08,
+ 0x24, 0x91, 0xc3, 0x7e, 0x89, 0x08, 0x24, 0x99, 0xc2, 0x00, 0xd0, 0x08,
+ 0x24, 0xa1, 0xc2, 0x19, 0x2c, 0x08, 0x24, 0xa9, 0x87, 0x08, 0x24, 0xbb,
+ 0x01, 0x37, 0x66, 0x8b, 0x08, 0x24, 0xc1, 0x91, 0x08, 0x24, 0xcb, 0x01,
+ 0x37, 0x6a, 0x97, 0x08, 0x24, 0xd0, 0xc4, 0x15, 0xe7, 0x08, 0x25, 0x01,
+ 0xc3, 0x05, 0x14, 0x08, 0x25, 0x09, 0x16, 0xc1, 0x37, 0x6e, 0x08, 0xc1,
+ 0x37, 0x7a, 0x15, 0xc1, 0x37, 0x86, 0xc5, 0x06, 0xdb, 0x08, 0x25, 0x41,
+ 0xc4, 0x26, 0x78, 0x08, 0x25, 0x48, 0x83, 0x08, 0x25, 0x83, 0x01, 0x37,
+ 0x92, 0xc3, 0x00, 0x38, 0x08, 0x25, 0xa1, 0xc3, 0x1c, 0x63, 0x08, 0x25,
+ 0xa9, 0x87, 0x08, 0x25, 0xbb, 0x01, 0x37, 0x9d, 0x0a, 0xc1, 0x37, 0xa7,
+ 0x8b, 0x08, 0x25, 0xd9, 0x0d, 0xc1, 0x37, 0xb1, 0xc2, 0x00, 0xdb, 0x08,
+ 0x25, 0xf9, 0xc2, 0x01, 0xc3, 0x08, 0x26, 0x01, 0xc2, 0x00, 0xc1, 0x08,
+ 0x26, 0x09, 0x91, 0x08, 0x26, 0x13, 0x01, 0x37, 0xc1, 0xc2, 0x00, 0xb0,
+ 0x08, 0x26, 0x21, 0x15, 0xc1, 0x37, 0xc7, 0x16, 0xc1, 0x37, 0xd1, 0xc3,
+ 0x40, 0xe2, 0x08, 0x26, 0x69, 0x97, 0x08, 0x26, 0x71, 0xc2, 0x01, 0x4a,
+ 0x08, 0x26, 0x79, 0xc3, 0x91, 0x00, 0x08, 0x26, 0x89, 0x1c, 0x41, 0x37,
+ 0xd9, 0x83, 0x08, 0x26, 0xc3, 0x01, 0x37, 0xe3, 0xc3, 0x00, 0x38, 0x08,
+ 0x26, 0xe1, 0xc3, 0x1c, 0x63, 0x08, 0x26, 0xe9, 0x87, 0x08, 0x26, 0xfb,
+ 0x01, 0x37, 0xee, 0x0a, 0xc1, 0x37, 0xf8, 0x8b, 0x08, 0x27, 0x19, 0x0d,
+ 0xc1, 0x38, 0x02, 0xc2, 0x00, 0xdb, 0x08, 0x27, 0x39, 0xc2, 0x01, 0xc3,
+ 0x08, 0x27, 0x41, 0xc2, 0x00, 0xc1, 0x08, 0x27, 0x49, 0x91, 0x08, 0x27,
+ 0x53, 0x01, 0x38, 0x12, 0xc2, 0x00, 0xb0, 0x08, 0x27, 0x61, 0x15, 0xc1,
+ 0x38, 0x18, 0x16, 0xc1, 0x38, 0x22, 0xc3, 0x40, 0xe2, 0x08, 0x27, 0xa9,
+ 0x97, 0x08, 0x27, 0xb1, 0xc2, 0x01, 0x4a, 0x08, 0x27, 0xb9, 0xc3, 0x91,
+ 0x00, 0x08, 0x27, 0xc9, 0x1c, 0x41, 0x38, 0x2a, 0x03, 0xc1, 0x38, 0x34,
+ 0x11, 0xc1, 0x38, 0x46, 0xc8, 0xbb, 0x2a, 0x0e, 0x7a, 0xc2, 0x01, 0x38,
+ 0x52, 0xc3, 0x74, 0xc6, 0x0e, 0x7e, 0x09, 0x07, 0xc1, 0x38, 0x58, 0xcf,
+ 0x58, 0xe3, 0x0e, 0x7b, 0x59, 0xcb, 0x95, 0x77, 0x0e, 0x7a, 0x98, 0xc5,
+ 0xd5, 0x5b, 0x0e, 0x7e, 0x01, 0xc4, 0xde, 0xf7, 0x0e, 0x7d, 0x7a, 0x01,
+ 0x38, 0x64, 0xc6, 0xad, 0x17, 0x0e, 0x7d, 0xf9, 0xc5, 0xdd, 0x8a, 0x0e,
+ 0x7c, 0x21, 0x42, 0x14, 0x98, 0xc1, 0x38, 0x68, 0xc6, 0xd2, 0xd1, 0x0e,
+ 0x7b, 0x71, 0xc5, 0x5f, 0x8d, 0x0e, 0x7a, 0xa0, 0x16, 0xc1, 0x38, 0x77,
+ 0xc8, 0xb9, 0x2a, 0x0e, 0x7b, 0xeb, 0x01, 0x38, 0x8f, 0x49, 0xad, 0x77,
+ 0x41, 0x38, 0x93, 0x00, 0x41, 0x38, 0xaf, 0xc6, 0xad, 0x79, 0x0e, 0x7c,
+ 0x29, 0x03, 0x41, 0x38, 0xbb, 0xc2, 0x13, 0x38, 0x0e, 0x7c, 0x11, 0xd2,
+ 0x47, 0xff, 0x0e, 0x7b, 0x60, 0xc5, 0xd2, 0xae, 0x0e, 0x7b, 0x79, 0xc8,
+ 0x48, 0x09, 0x0e, 0x7a, 0xd8, 0x4c, 0x8b, 0x35, 0xc1, 0x38, 0xc7, 0xcb,
+ 0x93, 0x7d, 0x0e, 0x7b, 0x31, 0xc8, 0x4e, 0x4b, 0x0e, 0x7b, 0x29, 0xc9,
+ 0xa9, 0x48, 0x0e, 0x7b, 0x21, 0xc8, 0xbf, 0x6a, 0x0e, 0x7b, 0x18, 0x16,
+ 0xc1, 0x38, 0xdf, 0xc6, 0xbf, 0x8c, 0x0e, 0x7b, 0x09, 0xc7, 0xc2, 0x96,
+ 0x0e, 0x7b, 0x01, 0xc5, 0xd4, 0xd4, 0x0e, 0x7a, 0xf0, 0xa0, 0x0e, 0x7a,
+ 0x19, 0x9f, 0x0e, 0x7a, 0x10, 0x0d, 0xc1, 0x38, 0xeb, 0x05, 0xc1, 0x39,
+ 0x00, 0x06, 0xc1, 0x39, 0x0f, 0x16, 0xc1, 0x39, 0x1b, 0x15, 0xc1, 0x39,
+ 0x2d, 0x11, 0xc1, 0x39, 0x45, 0x42, 0x01, 0x53, 0xc1, 0x39, 0x55, 0x1c,
+ 0xc1, 0x39, 0x5f, 0x42, 0x00, 0x39, 0xc1, 0x39, 0x69, 0xc5, 0xd9, 0x43,
+ 0x0e, 0x79, 0x39, 0xc6, 0xcf, 0xb3, 0x0e, 0x79, 0x29, 0xc7, 0xc9, 0x8f,
+ 0x0e, 0x79, 0x21, 0x48, 0xbd, 0xca, 0xc1, 0x39, 0x75, 0x4d, 0x75, 0x8c,
+ 0xc1, 0x39, 0x81, 0x47, 0xc2, 0x7a, 0xc1, 0x39, 0x8b, 0x46, 0xcd, 0xdf,
+ 0x41, 0x39, 0x97, 0xc9, 0xb0, 0x62, 0x0e, 0x79, 0x91, 0xc6, 0xb0, 0x65,
+ 0x0e, 0x79, 0x89, 0xc7, 0x6d, 0xa2, 0x0e, 0x79, 0x80, 0x42, 0x07, 0xb2,
+ 0xc1, 0x39, 0xa3, 0xc8, 0x14, 0x38, 0x08, 0xd1, 0xc1, 0x46, 0x1e, 0x89,
+ 0x41, 0x39, 0xaf, 0xd6, 0x2d, 0xe6, 0x08, 0xd2, 0x31, 0xc9, 0x15, 0xcc,
+ 0x08, 0xd2, 0x00, 0x4d, 0x7f, 0x25, 0xc1, 0x39, 0xbe, 0xd1, 0x56, 0x1e,
+ 0x08, 0xd1, 0xd0, 0xc3, 0x1d, 0x35, 0x08, 0xd1, 0x91, 0xc2, 0x00, 0xd0,
+ 0x08, 0xd0, 0x61, 0x83, 0x08, 0xd0, 0x58, 0x83, 0x08, 0xd1, 0x81, 0xc2,
+ 0x0d, 0xf6, 0x08, 0xd1, 0x79, 0xc2, 0x00, 0xd0, 0x08, 0xd1, 0x70, 0x83,
+ 0x08, 0xd1, 0x41, 0xc2, 0x00, 0xd0, 0x08, 0xd1, 0x38, 0x1c, 0xc1, 0x39,
+ 0xd6, 0xc2, 0x00, 0xd0, 0x08, 0xd0, 0xe1, 0x83, 0x08, 0xd0, 0xd9, 0x06,
+ 0x41, 0x39, 0xe0, 0x15, 0xc1, 0x39, 0xea, 0xc2, 0x00, 0xd0, 0x08, 0xd0,
+ 0xd1, 0x83, 0x08, 0xd0, 0xc9, 0x16, 0x41, 0x39, 0xf4, 0xc2, 0x00, 0xd0,
+ 0x08, 0xd1, 0x09, 0x83, 0x08, 0xd1, 0x00, 0xc2, 0x00, 0xd0, 0x08, 0xd0,
+ 0xf9, 0x83, 0x08, 0xd0, 0xf0, 0x83, 0x08, 0xd0, 0xe9, 0xc2, 0x00, 0xc1,
+ 0x08, 0xd0, 0xc1, 0xc2, 0x19, 0x2c, 0x08, 0xd0, 0x99, 0xc2, 0x01, 0x30,
+ 0x08, 0xd0, 0x78, 0xc2, 0x00, 0xd0, 0x08, 0xd0, 0x89, 0x83, 0x08, 0xd0,
+ 0x80, 0xc2, 0x00, 0xd0, 0x08, 0xd0, 0x71, 0x83, 0x08, 0xd0, 0x68, 0xca,
+ 0x9d, 0xe2, 0x08, 0xd0, 0x49, 0x03, 0xc1, 0x39, 0xfe, 0x91, 0x08, 0xd0,
+ 0x33, 0x01, 0x3a, 0x06, 0x87, 0x08, 0xd0, 0x21, 0x97, 0x08, 0xd0, 0x1b,
+ 0x01, 0x3a, 0x0a, 0x8b, 0x08, 0xd0, 0x08, 0xcf, 0x60, 0x30, 0x01, 0x4c,
+ 0x51, 0xcd, 0x7d, 0x6b, 0x01, 0x4c, 0x40, 0x12, 0xc1, 0x3a, 0x0e, 0xcb,
+ 0x34, 0xad, 0x01, 0x50, 0xf8, 0xc8, 0xb8, 0x8a, 0x01, 0x00, 0x61, 0xcc,
+ 0x40, 0x81, 0x07, 0xf7, 0xf8, 0x43, 0x16, 0x55, 0xc1, 0x3a, 0x1a, 0x42,
+ 0x00, 0x75, 0x41, 0x3a, 0x3e, 0x45, 0x02, 0x10, 0xc1, 0x3a, 0x4a, 0xcc,
+ 0x86, 0x3d, 0x05, 0x4e, 0x08, 0x16, 0xc1, 0x3a, 0xd6, 0xc3, 0x05, 0x14,
+ 0x05, 0x4e, 0x89, 0xc4, 0x15, 0xe7, 0x05, 0x4e, 0x81, 0x08, 0xc1, 0x3a,
+ 0xe2, 0x15, 0xc1, 0x3a, 0xee, 0xc5, 0x06, 0xdb, 0x05, 0x4e, 0xc1, 0xc4,
+ 0x26, 0x78, 0x05, 0x4e, 0xc8, 0xc5, 0xdd, 0x53, 0x05, 0x4d, 0xf9, 0xc7,
+ 0xc6, 0xf6, 0x05, 0x4d, 0xf1, 0xc5, 0xdd, 0x12, 0x05, 0x4d, 0xe8, 0xc5,
+ 0xd8, 0xbc, 0x05, 0x4d, 0xe1, 0xca, 0xa2, 0xf6, 0x05, 0x4d, 0xd9, 0x16,
+ 0xc1, 0x3a, 0xfa, 0xc4, 0xc5, 0x6e, 0x05, 0x4d, 0xc3, 0x01, 0x3b, 0x04,
+ 0xc4, 0xdf, 0x6f, 0x05, 0x4d, 0xb2, 0x01, 0x3b, 0x0a, 0xc5, 0xde, 0x11,
+ 0x05, 0x4c, 0x0b, 0x01, 0x3b, 0x10, 0xc7, 0xc7, 0xd6, 0x05, 0x4c, 0x19,
+ 0xc5, 0xd9, 0xcf, 0x05, 0x4c, 0x11, 0xc9, 0xaf, 0xff, 0x05, 0x4c, 0x00,
+ 0x46, 0x02, 0xae, 0xc1, 0x3b, 0x16, 0x46, 0x01, 0xc8, 0x41, 0x3b, 0x28,
+ 0xc5, 0x18, 0x25, 0x01, 0x02, 0xb9, 0xd1, 0x1e, 0x3f, 0x01, 0x50, 0x60,
+ 0x10, 0xc1, 0x3b, 0x34, 0x0c, 0xc1, 0x3b, 0x73, 0x13, 0xc1, 0x3b, 0x93,
+ 0x14, 0xc1, 0x3b, 0xaf, 0x15, 0xc1, 0x3b, 0xd6, 0x05, 0xc1, 0x3c, 0x08,
+ 0x1c, 0xc1, 0x3c, 0x36, 0x19, 0xc1, 0x3c, 0x68, 0x0a, 0xc1, 0x3c, 0x84,
+ 0x1b, 0xc1, 0x3c, 0xb6, 0x1a, 0xc1, 0x3c, 0xd2, 0x0f, 0xc1, 0x3c, 0xf0,
+ 0x8b, 0x05, 0x00, 0x13, 0x01, 0x3d, 0x1e, 0x83, 0x05, 0x00, 0x53, 0x01,
+ 0x3d, 0x34, 0xc2, 0x01, 0xba, 0x05, 0x00, 0x6b, 0x01, 0x3d, 0x40, 0x91,
+ 0x05, 0x00, 0x8b, 0x01, 0x3d, 0x48, 0x87, 0x05, 0x00, 0xa3, 0x01, 0x3d,
+ 0x54, 0x04, 0xc1, 0x3d, 0x58, 0x12, 0xc1, 0x3d, 0x86, 0x08, 0xc1, 0x3d,
+ 0xa9, 0x18, 0xc1, 0x3d, 0xcc, 0x06, 0xc1, 0x3d, 0xf3, 0x16, 0xc1, 0x3e,
+ 0x1a, 0x0e, 0xc1, 0x3e, 0x3d, 0x09, 0xc1, 0x3e, 0x67, 0x0d, 0x41, 0x3e,
+ 0x8e, 0xc3, 0xe5, 0x75, 0x05, 0x24, 0x81, 0x0e, 0xc1, 0x3e, 0xb1, 0x0d,
+ 0xc1, 0x3e, 0xbe, 0x10, 0xc1, 0x3e, 0xc8, 0x05, 0xc1, 0x3e, 0xd8, 0x15,
+ 0xc1, 0x3e, 0xf1, 0x09, 0xc1, 0x3e, 0xfb, 0x0f, 0xc1, 0x3f, 0x0f, 0x0a,
+ 0xc1, 0x3f, 0x19, 0x04, 0xc1, 0x3f, 0x23, 0x1b, 0xc1, 0x3f, 0x2f, 0x12,
+ 0xc1, 0x3f, 0x39, 0x16, 0xc1, 0x3f, 0x45, 0x1c, 0xc1, 0x3f, 0x4f, 0x06,
+ 0xc1, 0x3f, 0x63, 0xc2, 0x00, 0x11, 0x05, 0x25, 0x49, 0x0c, 0xc1, 0x3f,
+ 0x6d, 0x18, 0xc1, 0x3f, 0x75, 0xc2, 0x02, 0xa0, 0x05, 0x25, 0xc0, 0xc3,
+ 0xe5, 0xb4, 0x08, 0x75, 0x43, 0x01, 0x3f, 0x81, 0xc3, 0x0d, 0xff, 0x08,
+ 0x75, 0x03, 0x01, 0x3f, 0x87, 0x07, 0xc1, 0x3f, 0x8d, 0x0a, 0xc1, 0x3f,
+ 0xa1, 0xc2, 0x00, 0x27, 0x08, 0x75, 0x29, 0xc3, 0x7e, 0x89, 0x08, 0x75,
+ 0x21, 0xc2, 0x01, 0xdf, 0x08, 0x75, 0x19, 0xc3, 0x20, 0x18, 0x08, 0x75,
+ 0x11, 0xc3, 0x8c, 0x3f, 0x08, 0x75, 0x09, 0xc3, 0xb3, 0xa6, 0x08, 0x74,
+ 0xf9, 0x0d, 0xc1, 0x3f, 0xad, 0xc3, 0x0f, 0x9a, 0x08, 0x74, 0xe1, 0xc2,
+ 0x02, 0x41, 0x08, 0x74, 0xd3, 0x01, 0x3f, 0xb9, 0xc2, 0x00, 0x87, 0x08,
+ 0x74, 0xc9, 0x1a, 0xc1, 0x3f, 0xbf, 0x1c, 0xc1, 0x3f, 0xc9, 0x16, 0xc1,
+ 0x3f, 0xd4, 0x42, 0x0e, 0x9a, 0xc1, 0x3f, 0xde, 0x15, 0xc1, 0x3f, 0xe6,
+ 0xc2, 0x25, 0x3b, 0x08, 0x74, 0x81, 0x14, 0xc1, 0x3f, 0xfc, 0x05, 0xc1,
+ 0x40, 0x06, 0x12, 0xc1, 0x40, 0x10, 0xc2, 0x00, 0x51, 0x08, 0x74, 0x08,
+ 0xca, 0xa8, 0x1e, 0x08, 0x75, 0x61, 0xca, 0x9c, 0xd4, 0x08, 0x75, 0x58,
+ 0x00, 0xc1, 0x40, 0x1a, 0xc8, 0xbb, 0x3a, 0x0f, 0xae, 0xc8, 0x12, 0xc1,
+ 0x40, 0x26, 0x83, 0x00, 0xa7, 0xa3, 0x01, 0x40, 0x36, 0x8a, 0x00, 0xa9,
+ 0x2b, 0x01, 0x40, 0x44, 0x91, 0x00, 0xa7, 0x8b, 0x01, 0x40, 0x61, 0x99,
+ 0x00, 0xa8, 0x3b, 0x01, 0x40, 0x6f, 0x87, 0x00, 0xa7, 0x69, 0x8b, 0x00,
+ 0xa7, 0x7a, 0x01, 0x40, 0x88, 0x83, 0x00, 0xa6, 0x3b, 0x01, 0x40, 0x8c,
+ 0x19, 0xc1, 0x40, 0xa3, 0x91, 0x00, 0xa6, 0x23, 0x01, 0x40, 0xbc, 0xc2,
+ 0x00, 0x75, 0x00, 0xac, 0xb3, 0x01, 0x40, 0xc4, 0x89, 0x00, 0xac, 0xab,
+ 0x01, 0x40, 0xd9, 0x44, 0xde, 0xaf, 0xc1, 0x40, 0xee, 0x48, 0xbc, 0x52,
+ 0xc1, 0x40, 0xfd, 0x87, 0x00, 0xa6, 0x01, 0x8b, 0x00, 0xa6, 0x13, 0x01,
+ 0x41, 0x08, 0x8a, 0x00, 0xa6, 0x90, 0x83, 0x00, 0xa4, 0x83, 0x01, 0x41,
+ 0x0c, 0xc7, 0xc4, 0xf7, 0x00, 0xb3, 0x69, 0x19, 0xc1, 0x41, 0x19, 0x91,
+ 0x00, 0xa4, 0x6b, 0x01, 0x41, 0x32, 0x8b, 0x00, 0xa4, 0x5b, 0x01, 0x41,
+ 0x36, 0x87, 0x00, 0xa4, 0x48, 0x4b, 0x92, 0x54, 0xc1, 0x41, 0x3a, 0x49,
+ 0xad, 0x4a, 0xc1, 0x41, 0x42, 0xcb, 0x92, 0x96, 0x00, 0xa9, 0xf8, 0x42,
+ 0x07, 0x26, 0xc1, 0x41, 0x65, 0x16, 0xc1, 0x41, 0x7e, 0x8a, 0x00, 0xab,
+ 0x53, 0x01, 0x41, 0x95, 0x83, 0x00, 0xa2, 0xab, 0x01, 0x41, 0xbb, 0x1b,
+ 0xc1, 0x41, 0xc6, 0x19, 0xc1, 0x41, 0xd6, 0x91, 0x00, 0xa2, 0x83, 0x01,
+ 0x41, 0xef, 0x8b, 0x00, 0xa2, 0x73, 0x01, 0x41, 0xf3, 0x87, 0x00, 0xa2,
+ 0x60, 0x87, 0x00, 0xa0, 0x63, 0x01, 0x41, 0xf7, 0x83, 0x00, 0xa0, 0xbb,
+ 0x01, 0x41, 0xfd, 0x91, 0x00, 0xa0, 0x93, 0x01, 0x42, 0x05, 0x8b, 0x00,
+ 0xa0, 0x72, 0x01, 0x42, 0x0c, 0x47, 0xc0, 0xac, 0xc1, 0x42, 0x10, 0x19,
+ 0xc1, 0x42, 0x1a, 0x83, 0x00, 0xaa, 0x5b, 0x01, 0x42, 0x35, 0x91, 0x00,
+ 0xaa, 0x43, 0x01, 0x42, 0x40, 0x8b, 0x00, 0xaa, 0x33, 0x01, 0x42, 0x44,
+ 0x87, 0x00, 0xaa, 0x10, 0x8b, 0x00, 0xaa, 0xab, 0x01, 0x42, 0x48, 0xc8,
+ 0x11, 0xf7, 0x00, 0xb3, 0x71, 0xc3, 0x14, 0x72, 0x00, 0xaa, 0xd9, 0x83,
+ 0x00, 0xaa, 0xcb, 0x01, 0x42, 0x52, 0x91, 0x00, 0xaa, 0xbb, 0x01, 0x42,
+ 0x59, 0x87, 0x00, 0xaa, 0x98, 0xc8, 0xbc, 0x9a, 0x00, 0xc6, 0xe1, 0x90,
+ 0x00, 0xa1, 0x58, 0x47, 0xc5, 0xb4, 0xc1, 0x42, 0x5d, 0x9b, 0x00, 0xc5,
+ 0x81, 0x91, 0x00, 0xa0, 0x31, 0x90, 0x00, 0xa1, 0x68, 0x83, 0x00, 0xa9,
+ 0x6b, 0x01, 0x42, 0x7f, 0x91, 0x00, 0xa9, 0x53, 0x01, 0x42, 0x8a, 0x19,
+ 0xc1, 0x42, 0x92, 0x46, 0x92, 0x9a, 0xc1, 0x42, 0xab, 0x8b, 0x00, 0xa9,
+ 0x43, 0x01, 0x42, 0xe9, 0x87, 0x00, 0xa9, 0x30, 0x83, 0x00, 0xa6, 0xd3,
+ 0x01, 0x42, 0xed, 0x8a, 0x00, 0xad, 0x33, 0x01, 0x42, 0xf8, 0x87, 0x00,
+ 0xa6, 0x99, 0x8b, 0x00, 0xa6, 0xab, 0x01, 0x43, 0x0d, 0x91, 0x00, 0xa6,
+ 0xbb, 0x01, 0x43, 0x11, 0x19, 0x41, 0x43, 0x15, 0x83, 0x00, 0xa5, 0x53,
+ 0x01, 0x43, 0x2e, 0x87, 0x00, 0xa5, 0x1b, 0x01, 0x43, 0x39, 0x91, 0x00,
+ 0xa5, 0x3b, 0x01, 0x43, 0x3f, 0x8b, 0x00, 0xa5, 0x2b, 0x01, 0x43, 0x46,
+ 0x19, 0xc1, 0x43, 0x4a, 0x8a, 0x00, 0xa5, 0xe8, 0x99, 0x00, 0xa4, 0x23,
+ 0x01, 0x43, 0x63, 0x83, 0x00, 0xa3, 0x93, 0x01, 0x43, 0x7c, 0x87, 0x00,
+ 0xa3, 0x59, 0x8b, 0x00, 0xa3, 0x6b, 0x01, 0x43, 0x87, 0x91, 0x00, 0xa3,
+ 0x7a, 0x01, 0x43, 0x8b, 0x19, 0xc1, 0x43, 0x8f, 0x83, 0x00, 0xa1, 0xc3,
+ 0x01, 0x43, 0xa8, 0x91, 0x00, 0xa1, 0x9b, 0x01, 0x43, 0xb3, 0x87, 0x00,
+ 0xa1, 0x79, 0x8b, 0x00, 0xa1, 0x8a, 0x01, 0x43, 0xbb, 0x83, 0x00, 0xa0,
+ 0x5b, 0x01, 0x43, 0xbf, 0x9b, 0x00, 0xc5, 0x89, 0x8b, 0x00, 0xa0, 0xe3,
+ 0x01, 0x43, 0xc7, 0x4a, 0xa0, 0xa8, 0xc1, 0x43, 0xcd, 0x90, 0x00, 0xa1,
+ 0x70, 0x83, 0x00, 0xac, 0x1b, 0x01, 0x43, 0xd5, 0x91, 0x00, 0xac, 0x0b,
+ 0x01, 0x43, 0xe0, 0x8b, 0x00, 0xab, 0xfa, 0x01, 0x43, 0xe4, 0x8d, 0x00,
+ 0xab, 0xe9, 0xc5, 0x59, 0x93, 0x00, 0xa0, 0x00, 0x8b, 0x00, 0xa0, 0x21,
+ 0x90, 0x00, 0xa1, 0x60, 0xd0, 0x5a, 0x52, 0x01, 0x02, 0x08, 0xc9, 0x36,
+ 0xe7, 0x0f, 0xae, 0x10, 0x97, 0x08, 0x15, 0xfa, 0x01, 0x43, 0xe8, 0x94,
+ 0x08, 0x16, 0x48, 0x86, 0x08, 0x15, 0x32, 0x01, 0x43, 0xef, 0x9f, 0x08,
+ 0x15, 0x38, 0x84, 0x08, 0x16, 0x52, 0x01, 0x43, 0xf3, 0x9f, 0x08, 0x15,
+ 0x60, 0x96, 0x08, 0x16, 0x3a, 0x01, 0x43, 0xff, 0x8a, 0x08, 0x15, 0x73,
+ 0x01, 0x44, 0x03, 0x95, 0x08, 0x15, 0xc1, 0x96, 0x08, 0x16, 0x12, 0x01,
+ 0x44, 0x07, 0xc2, 0x8c, 0x53, 0x08, 0x15, 0x89, 0xc2, 0xe6, 0x81, 0x08,
+ 0x16, 0x30, 0x90, 0x08, 0x15, 0x99, 0x86, 0x08, 0x15, 0xf1, 0x89, 0x08,
+ 0x16, 0x20, 0x9f, 0x08, 0x15, 0x08, 0x8b, 0x08, 0x16, 0x28, 0x9f, 0x08,
+ 0x16, 0x78, 0x9f, 0x08, 0x15, 0xe8, 0x9f, 0x08, 0x16, 0x08, 0x03, 0xc1,
+ 0x44, 0x0b, 0xc3, 0x0b, 0xc8, 0x08, 0x29, 0x89, 0x09, 0xc1, 0x44, 0x17,
+ 0x06, 0xc1, 0x44, 0x23, 0x07, 0xc1, 0x44, 0x33, 0x1c, 0xc1, 0x44, 0x3d,
+ 0x16, 0xc1, 0x44, 0x47, 0x05, 0xc1, 0x44, 0x59, 0x1b, 0xc1, 0x44, 0x67,
+ 0x0b, 0xc1, 0x44, 0x73, 0x15, 0xc1, 0x44, 0x85, 0x0e, 0xc1, 0x44, 0x8f,
+ 0xc4, 0xdf, 0x1f, 0x08, 0x2a, 0x01, 0x0c, 0xc1, 0x44, 0x9b, 0x0d, 0xc1,
+ 0x44, 0xa7, 0xc4, 0xdf, 0xa7, 0x08, 0x2a, 0x31, 0x42, 0x0f, 0x9a, 0xc1,
+ 0x44, 0xb3, 0xc3, 0xda, 0xa6, 0x08, 0x2a, 0x61, 0xc4, 0xe4, 0x53, 0x08,
+ 0x2a, 0x71, 0xc2, 0x00, 0x45, 0x08, 0x2a, 0x91, 0xc3, 0xd2, 0xb3, 0x08,
+ 0x2a, 0xa1, 0x12, 0xc1, 0x44, 0xbb, 0xc3, 0x07, 0x81, 0x08, 0x2a, 0xc9,
+ 0xc4, 0xde, 0x87, 0x08, 0x2a, 0xd8, 0xcc, 0x85, 0x1d, 0x0f, 0xb1, 0xc9,
+ 0xc9, 0xa9, 0x36, 0x0f, 0xb1, 0xe0, 0x07, 0xc1, 0x44, 0xc7, 0x06, 0xc1,
+ 0x45, 0x07, 0x03, 0xc1, 0x45, 0x47, 0x08, 0xc1, 0x45, 0x87, 0x24, 0xc1,
+ 0x45, 0xc7, 0x23, 0xc1, 0x46, 0x07, 0x20, 0xc1, 0x46, 0x47, 0x1f, 0xc1,
+ 0x46, 0x87, 0x1e, 0xc1, 0x46, 0xc7, 0x1d, 0xc1, 0x47, 0x07, 0x05, 0xc1,
+ 0x47, 0x47, 0x04, 0xc1, 0x47, 0x87, 0x26, 0xc1, 0x47, 0xc7, 0x25, 0xc1,
+ 0x48, 0x07, 0x22, 0xc1, 0x48, 0x47, 0x21, 0x41, 0x48, 0x87, 0x24, 0xc1,
+ 0x48, 0xc7, 0x23, 0xc1, 0x49, 0x07, 0x22, 0xc1, 0x49, 0x47, 0x21, 0xc1,
+ 0x49, 0x87, 0x1f, 0xc1, 0x49, 0xc7, 0x1d, 0xc1, 0x4a, 0x07, 0x08, 0xc1,
+ 0x4a, 0x47, 0x04, 0xc1, 0x4a, 0x87, 0x03, 0xc1, 0x4a, 0xc7, 0x26, 0xc1,
+ 0x4b, 0x07, 0x25, 0xc1, 0x4b, 0x47, 0x07, 0xc1, 0x4b, 0x87, 0x06, 0xc1,
+ 0x4b, 0xc7, 0x05, 0xc1, 0x4c, 0x07, 0x20, 0xc1, 0x4c, 0x47, 0x1e, 0x41,
+ 0x4c, 0x87, 0x1e, 0xc1, 0x4c, 0xc7, 0x1d, 0x41, 0x4c, 0xff, 0x06, 0xc1,
+ 0x4d, 0x3f, 0x05, 0xc1, 0x4d, 0x67, 0x04, 0xc1, 0x4d, 0xa7, 0x03, 0xc1,
+ 0x4d, 0xe7, 0x26, 0xc1, 0x4e, 0x27, 0x25, 0xc1, 0x4e, 0x67, 0x24, 0xc1,
+ 0x4e, 0xa7, 0x23, 0xc1, 0x4e, 0xe7, 0x22, 0xc1, 0x4f, 0x1f, 0x21, 0xc1,
+ 0x4f, 0x5f, 0x20, 0xc1, 0x4f, 0x9f, 0x1f, 0xc1, 0x4f, 0xdf, 0x1e, 0xc1,
+ 0x50, 0x1f, 0x1d, 0x41, 0x50, 0x5f, 0x08, 0xc1, 0x50, 0x9f, 0x07, 0xc1,
+ 0x50, 0xdf, 0x06, 0xc1, 0x51, 0x1f, 0x05, 0xc1, 0x51, 0x5f, 0x04, 0xc1,
+ 0x51, 0x9f, 0x03, 0xc1, 0x51, 0xdf, 0x26, 0xc1, 0x52, 0x1f, 0x25, 0xc1,
+ 0x52, 0x5f, 0x24, 0xc1, 0x52, 0x9f, 0x23, 0xc1, 0x52, 0xdf, 0x22, 0xc1,
+ 0x53, 0x1f, 0x21, 0xc1, 0x53, 0x5f, 0x20, 0xc1, 0x53, 0x9f, 0x1f, 0xc1,
+ 0x53, 0xdf, 0x1e, 0xc1, 0x54, 0x1f, 0x1d, 0x41, 0x54, 0x5f, 0x92, 0x01,
+ 0x74, 0xc9, 0x8f, 0x01, 0x75, 0xb9, 0xc2, 0x00, 0x74, 0x01, 0x76, 0xb8,
+ 0xc3, 0x43, 0x08, 0x01, 0x74, 0x09, 0xc5, 0x78, 0xee, 0x01, 0x76, 0x10,
+ 0xc6, 0xca, 0xeb, 0x01, 0x75, 0x01, 0xc2, 0x0d, 0x10, 0x01, 0x76, 0x78,
+ 0x15, 0xc1, 0x54, 0x9f, 0xc4, 0x63, 0x7e, 0x01, 0x76, 0x59, 0x09, 0xc1,
+ 0x54, 0xbd, 0x0e, 0xc1, 0x54, 0xc9, 0x16, 0xc1, 0x54, 0xd5, 0xc4, 0x45,
+ 0x10, 0x01, 0x76, 0xd9, 0x08, 0xc1, 0x54, 0xe7, 0x07, 0xc1, 0x54, 0xf9,
+ 0xc5, 0xa0, 0x85, 0x01, 0x77, 0x11, 0xc4, 0xa3, 0x1a, 0x01, 0x77, 0x31,
+ 0xc6, 0x87, 0xe7, 0x01, 0x77, 0x80, 0x45, 0x71, 0x24, 0xc1, 0x55, 0x05,
+ 0xc2, 0x00, 0x65, 0x01, 0x74, 0x58, 0xc3, 0x05, 0x14, 0x01, 0x74, 0x61,
+ 0xc3, 0x02, 0x9f, 0x01, 0x74, 0x68, 0xc3, 0x21, 0xdf, 0x01, 0x74, 0x91,
+ 0x44, 0x4b, 0x1f, 0x41, 0x55, 0x0f, 0x49, 0x8c, 0x70, 0xc1, 0x55, 0x1b,
+ 0xc2, 0x8c, 0x30, 0x01, 0x75, 0x78, 0xc3, 0x05, 0x14, 0x01, 0x75, 0x61,
+ 0xc3, 0x02, 0x9f, 0x01, 0x75, 0x68, 0xc3, 0x05, 0x14, 0x01, 0x75, 0x21,
+ 0xc3, 0x02, 0x9f, 0x01, 0x75, 0x28, 0x9a, 0x01, 0x74, 0x31, 0xcb, 0x93,
+ 0x67, 0x01, 0x75, 0x51, 0xc2, 0x02, 0x6f, 0x01, 0x77, 0x18, 0xc3, 0x05,
+ 0x14, 0x01, 0x75, 0xd1, 0xc3, 0x02, 0x9f, 0x01, 0x75, 0xd8, 0xc3, 0x05,
+ 0x14, 0x01, 0x74, 0x71, 0x16, 0xc1, 0x55, 0x29, 0xc4, 0x09, 0x9d, 0x01,
+ 0x74, 0x88, 0xc3, 0x05, 0x14, 0x01, 0x76, 0x89, 0xc3, 0x02, 0x9f, 0x01,
+ 0x76, 0x90, 0x43, 0x0f, 0x06, 0xc1, 0x55, 0x35, 0x86, 0x01, 0x77, 0x08,
+ 0xc2, 0x00, 0x45, 0x01, 0x74, 0xe9, 0xc4, 0x14, 0xdd, 0x01, 0x74, 0xf9,
+ 0xc4, 0xd7, 0x14, 0x01, 0x75, 0xe9, 0x44, 0x0d, 0xee, 0x41, 0x55, 0x41,
+ 0xc2, 0x01, 0xe2, 0x01, 0x75, 0xa9, 0xc2, 0x00, 0xfe, 0x01, 0x75, 0xe0,
+ 0x44, 0x02, 0x11, 0xc1, 0x55, 0x4d, 0x43, 0xad, 0x64, 0x41, 0x55, 0x59,
+ 0xc3, 0x05, 0x14, 0x01, 0x76, 0x19, 0xc3, 0x02, 0x9f, 0x01, 0x76, 0x20,
+ 0xc4, 0x18, 0x10, 0x01, 0x77, 0x59, 0x16, 0xc1, 0x55, 0x65, 0xc6, 0x87,
+ 0xe7, 0x01, 0x77, 0x78, 0xc3, 0x05, 0x14, 0x01, 0x76, 0xe9, 0x16, 0x41,
+ 0x55, 0x71, 0xc2, 0x02, 0xa0, 0x01, 0x75, 0x91, 0xc4, 0x02, 0xde, 0x01,
+ 0x75, 0x98, 0xc3, 0x05, 0x14, 0x01, 0x75, 0xf1, 0x16, 0x41, 0x55, 0x7d,
+ 0x9c, 0x01, 0x8e, 0xc1, 0x89, 0x01, 0x8e, 0xf8, 0xc2, 0x47, 0xa4, 0x01,
+ 0x8e, 0x49, 0x9c, 0x01, 0x8e, 0xf0, 0x9c, 0x01, 0x8e, 0x2b, 0x01, 0x55,
+ 0x89, 0x89, 0x01, 0x8e, 0x31, 0x99, 0x01, 0x8e, 0x6b, 0x01, 0x55, 0x94,
+ 0x96, 0x01, 0x8e, 0x50, 0xc2, 0x47, 0xa4, 0x01, 0x8e, 0x60, 0xc5, 0x08,
+ 0xd9, 0x0f, 0xdc, 0xa8, 0x4d, 0x29, 0xb9, 0xc1, 0x55, 0x98, 0x47, 0x02,
+ 0x0e, 0x41, 0x55, 0xe7, 0xc3, 0x91, 0xe8, 0x0f, 0x9a, 0x91, 0xc9, 0xae,
+ 0x3d, 0x0f, 0x99, 0xc0, 0xc2, 0x02, 0x0a, 0x01, 0x02, 0x01, 0xc9, 0x33,
+ 0xdd, 0x00, 0x00, 0x4a, 0x01, 0x56, 0x36, 0xcf, 0x64, 0xfe, 0x0f, 0xa6,
+ 0x49, 0xcd, 0x7b, 0x22, 0x0f, 0xa6, 0x42, 0x01, 0x56, 0x3a, 0xc3, 0xd8,
+ 0xd0, 0x08, 0x8a, 0x39, 0x0e, 0xc1, 0x56, 0x40, 0xc3, 0x39, 0x6e, 0x08,
+ 0x89, 0x31, 0xc3, 0x82, 0xa0, 0x08, 0x89, 0x29, 0xc3, 0x14, 0x72, 0x08,
+ 0x89, 0x21, 0xc3, 0x47, 0xd9, 0x08, 0x89, 0x11, 0x1b, 0xc1, 0x56, 0x4c,
+ 0xc3, 0xc2, 0xab, 0x08, 0x88, 0xf9, 0x04, 0xc1, 0x56, 0x58, 0x12, 0xc1,
+ 0x56, 0x64, 0x10, 0xc1, 0x56, 0x70, 0x06, 0xc1, 0x56, 0x88, 0x16, 0xc1,
+ 0x56, 0x98, 0x0c, 0xc1, 0x56, 0xa8, 0x05, 0xc1, 0x56, 0xb4, 0x09, 0xc1,
+ 0x56, 0xc0, 0x0d, 0xc1, 0x56, 0xcc, 0x87, 0x08, 0x88, 0x31, 0x97, 0x08,
+ 0x88, 0x29, 0x8b, 0x08, 0x88, 0x21, 0xc2, 0x04, 0xc6, 0x08, 0x88, 0x18,
+ 0x4a, 0x6f, 0xc8, 0xc1, 0x56, 0xd8, 0xc5, 0x1e, 0x96, 0x08, 0x89, 0x98,
+ 0xcb, 0x97, 0xf5, 0x08, 0x8a, 0x11, 0xc4, 0x19, 0x53, 0x08, 0x8a, 0x09,
+ 0x45, 0x09, 0x98, 0x41, 0x56, 0xfb, 0xcb, 0x45, 0x8e, 0x08, 0x8a, 0x01,
+ 0x44, 0x00, 0xbb, 0x41, 0x57, 0x1f, 0xc2, 0x01, 0x4a, 0x05, 0x51, 0xb1,
+ 0xc2, 0x00, 0xdb, 0x05, 0x51, 0xa9, 0xc2, 0x00, 0x39, 0x05, 0x51, 0xa1,
+ 0xc2, 0x19, 0x2c, 0x05, 0x51, 0x99, 0x46, 0x26, 0xf7, 0x41, 0x57, 0x31,
+ 0x97, 0x05, 0x51, 0x6b, 0x01, 0x57, 0x3f, 0x03, 0xc1, 0x57, 0x43, 0x91,
+ 0x05, 0x51, 0x7b, 0x01, 0x57, 0x4f, 0xc2, 0x06, 0xdb, 0x05, 0x51, 0x61,
+ 0x8b, 0x05, 0x51, 0x52, 0x01, 0x57, 0x53, 0xc2, 0x00, 0xd0, 0x05, 0x51,
+ 0x41, 0x15, 0xc1, 0x57, 0x57, 0x10, 0xc1, 0x57, 0x61, 0x09, 0xc1, 0x57,
+ 0x73, 0x0d, 0xc1, 0x57, 0x7d, 0x91, 0x05, 0x50, 0x29, 0x83, 0x05, 0x50,
+ 0x03, 0x01, 0x57, 0x87, 0x87, 0x05, 0x50, 0x19, 0x46, 0x26, 0xf7, 0xc1,
+ 0x57, 0x8b, 0xc2, 0x02, 0x41, 0x05, 0x51, 0x29, 0xc2, 0x00, 0xdb, 0x05,
+ 0x51, 0x21, 0xc2, 0x00, 0x39, 0x05, 0x51, 0x19, 0xc2, 0x19, 0x2c, 0x05,
+ 0x51, 0x11, 0x04, 0xc1, 0x57, 0xba, 0x0f, 0xc1, 0x57, 0xca, 0x12, 0xc1,
+ 0x57, 0xd4, 0x06, 0xc1, 0x57, 0xe4, 0x16, 0xc1, 0x57, 0xf4, 0x0c, 0xc1,
+ 0x57, 0xfe, 0x42, 0x11, 0xee, 0xc1, 0x58, 0x08, 0x97, 0x05, 0x50, 0x11,
+ 0x8b, 0x05, 0x50, 0x08, 0xcc, 0x86, 0x19, 0x05, 0x52, 0xf9, 0x06, 0xc1,
+ 0x58, 0x12, 0xc6, 0x99, 0x4e, 0x05, 0x52, 0xe0, 0xc4, 0x26, 0x78, 0x05,
+ 0x52, 0xc9, 0xc5, 0x06, 0xdb, 0x05, 0x52, 0xc1, 0x15, 0xc1, 0x58, 0x1e,
+ 0x08, 0xc1, 0x58, 0x2a, 0x16, 0xc1, 0x58, 0x36, 0xc4, 0x15, 0xe7, 0x05,
+ 0x52, 0x81, 0xc3, 0x05, 0x14, 0x05, 0x52, 0x88, 0xc3, 0x05, 0x14, 0x08,
+ 0x7e, 0x2b, 0x01, 0x58, 0x42, 0x16, 0xc1, 0x58, 0x48, 0xc4, 0x09, 0x9d,
+ 0x08, 0x7e, 0x40, 0xc3, 0xb5, 0x3e, 0x08, 0x7e, 0x21, 0x15, 0xc1, 0x58,
+ 0x58, 0xc4, 0xe0, 0xe7, 0x08, 0x7d, 0xd9, 0xc4, 0x4a, 0xb9, 0x08, 0x7d,
+ 0xd1, 0xc2, 0x01, 0x7f, 0x08, 0x7d, 0xab, 0x01, 0x58, 0x6a, 0xc5, 0x4a,
+ 0xb3, 0x08, 0x7d, 0xc1, 0xca, 0xa5, 0x26, 0x08, 0x7d, 0xb9, 0xc3, 0x7e,
+ 0x89, 0x08, 0x7d, 0xb1, 0xc6, 0x40, 0x9a, 0x08, 0x7d, 0xa1, 0xc5, 0x9c,
+ 0xa2, 0x08, 0x7d, 0x99, 0xc4, 0xe3, 0x27, 0x08, 0x7d, 0x91, 0x03, 0xc1,
+ 0x58, 0x70, 0xc6, 0xcf, 0xd7, 0x08, 0x7d, 0xe1, 0xc3, 0x00, 0x4e, 0x08,
+ 0x7d, 0xe9, 0xc3, 0x20, 0x18, 0x08, 0x7d, 0xf1, 0xc2, 0x00, 0x67, 0x08,
+ 0x7e, 0x09, 0xc4, 0x5d, 0xe2, 0x08, 0x7e, 0x10, 0xc4, 0x01, 0xc3, 0x01,
+ 0x3a, 0x61, 0x43, 0x00, 0x55, 0xc1, 0x58, 0x7c, 0x12, 0x41, 0x58, 0x88,
+ 0xc6, 0xd3, 0xc1, 0x01, 0x34, 0xa1, 0xc5, 0xd4, 0x3e, 0x0f, 0x9c, 0x61,
+ 0x47, 0x53, 0xfe, 0x41, 0x58, 0x97, 0x51, 0x4f, 0x69, 0xc1, 0x58, 0x9d,
+ 0x14, 0x41, 0x59, 0x0e, 0x48, 0x5b, 0x32, 0xc1, 0x59, 0x18, 0x10, 0xc1,
+ 0x59, 0x24, 0x4f, 0x66, 0xcf, 0xc1, 0x59, 0x30, 0x44, 0x31, 0xef, 0x41,
+ 0x59, 0x3c, 0x0b, 0xc1, 0x59, 0x44, 0x07, 0x41, 0x59, 0x50, 0x43, 0x00,
+ 0x4a, 0xc1, 0x59, 0x5c, 0x11, 0xc1, 0x59, 0x66, 0x45, 0x0b, 0x12, 0xc1,
+ 0x59, 0x72, 0x42, 0x00, 0x2d, 0x41, 0x59, 0x7e, 0x43, 0x06, 0xa8, 0xc1,
+ 0x59, 0x8a, 0xcf, 0x64, 0x0e, 0x00, 0xd5, 0xb0, 0x46, 0x18, 0x54, 0xc1,
+ 0x59, 0x96, 0xcf, 0x0e, 0x7d, 0x01, 0x06, 0xd9, 0xc4, 0x1e, 0xc9, 0x00,
+ 0x18, 0x1b, 0x01, 0x59, 0xa8, 0xd1, 0x52, 0x55, 0x00, 0x18, 0x90, 0x11,
+ 0xc1, 0x59, 0xac, 0x07, 0xc1, 0x59, 0xbc, 0xc8, 0x20, 0xa9, 0x00, 0x18,
+ 0x42, 0x01, 0x59, 0xc8, 0x49, 0xa8, 0x70, 0xc1, 0x59, 0xd4, 0xd0, 0x5e,
+ 0xf2, 0x00, 0x1a, 0x38, 0xce, 0x3b, 0x7a, 0x01, 0x06, 0xe1, 0xc6, 0xcf,
+ 0xef, 0x00, 0x1a, 0x90, 0x49, 0x05, 0xf9, 0xc1, 0x59, 0xf3, 0x48, 0xba,
+ 0x9a, 0xc1, 0x59, 0xff, 0xd0, 0x08, 0xf7, 0x00, 0x18, 0x13, 0x01, 0x5a,
+ 0x2b, 0x03, 0xc1, 0x5a, 0x31, 0x11, 0xc1, 0x5a, 0x40, 0xc6, 0xbd, 0xf4,
+ 0x00, 0x19, 0x38, 0x45, 0x2e, 0xef, 0xc1, 0x5a, 0x4f, 0xce, 0x6c, 0x98,
+ 0x00, 0xee, 0x19, 0xca, 0xa2, 0x4c, 0x00, 0xee, 0x11, 0x47, 0x25, 0xae,
+ 0xc1, 0x5a, 0x59, 0x16, 0xc1, 0x5a, 0x65, 0xcc, 0x84, 0x81, 0x00, 0x19,
+ 0xe0, 0xca, 0xa0, 0x6c, 0x08, 0x99, 0xd9, 0x14, 0x41, 0x5a, 0x6b, 0x4b,
+ 0x94, 0xe8, 0xc1, 0x5a, 0x7a, 0x50, 0x5c, 0x02, 0x41, 0x5a, 0x86, 0x12,
+ 0xc1, 0x5a, 0x92, 0xc7, 0x04, 0xed, 0x00, 0xee, 0x91, 0xc7, 0x0a, 0x80,
+ 0x00, 0xee, 0x88, 0xc7, 0x05, 0x00, 0x00, 0xee, 0x81, 0x10, 0x41, 0x5a,
+ 0x9e, 0xc5, 0x05, 0x02, 0x00, 0xee, 0x79, 0xc5, 0x00, 0xd4, 0x00, 0x1a,
+ 0xd8, 0xc5, 0xcc, 0x90, 0x00, 0x19, 0x43, 0x01, 0x5a, 0xaa, 0xce, 0x6d,
+ 0xf6, 0x00, 0xd5, 0xb9, 0xc7, 0x7d, 0xa5, 0x00, 0x18, 0x29, 0x51, 0x52,
+ 0x33, 0x41, 0x5a, 0xb0, 0xc5, 0x60, 0xb2, 0x00, 0x18, 0x23, 0x01, 0x5a,
+ 0xce, 0xcf, 0x68, 0x55, 0x00, 0x19, 0x00, 0x49, 0x60, 0xf4, 0xc1, 0x5a,
+ 0xd6, 0x03, 0x41, 0x5a, 0xe2, 0xd0, 0x5d, 0xa2, 0x00, 0xd6, 0x31, 0xce,
+ 0x70, 0xc0, 0x00, 0x1a, 0x50, 0xc8, 0xbb, 0x12, 0x00, 0xd5, 0xa9, 0x00,
+ 0x41, 0x5a, 0xee, 0xc8, 0x9e, 0x5c, 0x00, 0x18, 0x49, 0xc2, 0x00, 0xc0,
+ 0x00, 0x18, 0xd9, 0xce, 0x6b, 0xf0, 0x00, 0x1a, 0x58, 0x45, 0x02, 0x6d,
+ 0xc1, 0x5a, 0xfa, 0xc5, 0x1e, 0xc8, 0x00, 0x19, 0xf0, 0xca, 0x8d, 0xb1,
+ 0x01, 0x02, 0x91, 0xc2, 0x00, 0xfe, 0x00, 0x02, 0x00, 0x4b, 0x93, 0x04,
+ 0xc1, 0x5b, 0x06, 0x4b, 0x99, 0xef, 0x41, 0x5b, 0x24, 0xc4, 0xde, 0xbf,
+ 0x01, 0x19, 0xa9, 0xc4, 0xe3, 0x37, 0x01, 0x19, 0xa0, 0x45, 0x00, 0x8c,
+ 0xc1, 0x5b, 0x42, 0x43, 0x54, 0xfc, 0x41, 0x5b, 0x54, 0xc5, 0xdc, 0x86,
+ 0x0f, 0x9c, 0xd9, 0xd3, 0x42, 0x09, 0x00, 0x04, 0xd8, 0xc6, 0x0e, 0xbd,
+ 0x01, 0x12, 0xa1, 0xc4, 0x00, 0xba, 0x01, 0x05, 0x08, 0x4c, 0x29, 0xba,
+ 0xc1, 0x5b, 0x63, 0x46, 0x10, 0x79, 0x41, 0x5b, 0xd0, 0x4e, 0x0b, 0x18,
+ 0xc1, 0x5b, 0xea, 0x49, 0x29, 0x29, 0x41, 0x5c, 0x57, 0xce, 0x74, 0x4e,
+ 0x08, 0x17, 0x01, 0x46, 0x09, 0x97, 0xc1, 0x5c, 0x63, 0x47, 0x34, 0x2f,
+ 0x41, 0x5c, 0x81, 0xc9, 0x11, 0xf6, 0x01, 0x67, 0xc9, 0xd4, 0x2f, 0xe2,
+ 0x01, 0x67, 0xd1, 0xd6, 0x2f, 0xe0, 0x01, 0x67, 0xd9, 0xcd, 0x4b, 0xac,
+ 0x01, 0x67, 0xe0, 0xd0, 0x53, 0xaa, 0x01, 0x67, 0xe9, 0xc8, 0x11, 0xf7,
+ 0x01, 0x67, 0xf0, 0xcd, 0x80, 0x02, 0x0f, 0xa8, 0x81, 0x4d, 0x7f, 0x32,
+ 0xc1, 0x5c, 0x9f, 0xc4, 0xe3, 0x33, 0x0f, 0xa6, 0xa9, 0x17, 0xc1, 0x5c,
+ 0xab, 0xd8, 0x24, 0xfb, 0x01, 0x52, 0x69, 0x42, 0x06, 0x62, 0x41, 0x5c,
+ 0xba, 0xd3, 0x41, 0x97, 0x01, 0x3f, 0x99, 0x05, 0xc1, 0x5c, 0xcc, 0xc8,
+ 0x1e, 0x3f, 0x01, 0x11, 0x89, 0xd1, 0x05, 0x75, 0x01, 0x0d, 0xd9, 0x16,
+ 0xc1, 0x5c, 0xd8, 0x45, 0x00, 0x2c, 0xc1, 0x5c, 0xe4, 0x48, 0x03, 0xc8,
+ 0x41, 0x5c, 0xf0, 0x16, 0xc1, 0x5c, 0xf6, 0x07, 0xc1, 0x5d, 0x06, 0x44,
+ 0x26, 0x78, 0xc1, 0x5d, 0x12, 0x15, 0xc1, 0x5d, 0x1e, 0x08, 0xc1, 0x5d,
+ 0x2a, 0x43, 0x05, 0x14, 0x41, 0x5d, 0x36, 0xc9, 0xad, 0xe3, 0x0f, 0x99,
+ 0x49, 0xc4, 0x2a, 0x90, 0x0f, 0x99, 0x41, 0xc4, 0x27, 0x54, 0x0f, 0x99,
+ 0x39, 0xc7, 0xc2, 0x34, 0x0f, 0x99, 0x50, 0x05, 0xc1, 0x5d, 0x42, 0x0a,
+ 0xc1, 0x5d, 0x56, 0xde, 0x0f, 0x7c, 0x01, 0x3a, 0x11, 0x19, 0xc1, 0x5d,
+ 0x6e, 0x06, 0xc1, 0x5d, 0x78, 0x0e, 0xc1, 0x5d, 0x86, 0x47, 0x34, 0x2f,
+ 0xc1, 0x5d, 0x92, 0x16, 0xc1, 0x5d, 0xa8, 0xc6, 0x0e, 0xbd, 0x01, 0x14,
+ 0xe1, 0x03, 0xc1, 0x5d, 0xb7, 0x14, 0xc1, 0x5d, 0xc3, 0x0f, 0xc1, 0x5d,
+ 0xcf, 0x12, 0xc1, 0x5d, 0xdb, 0x0b, 0xc1, 0x5d, 0xf3, 0xcc, 0x07, 0xc7,
+ 0x01, 0x4e, 0x09, 0x04, 0xc1, 0x5e, 0x05, 0xcc, 0x07, 0xbb, 0x01, 0x4d,
+ 0xb1, 0x9a, 0x01, 0x5d, 0xf1, 0xcf, 0x69, 0xcc, 0x0f, 0x88, 0x69, 0xc6,
+ 0x0b, 0x09, 0x0f, 0xbe, 0xb9, 0x0d, 0x41, 0x5e, 0x11, 0x45, 0x00, 0x8c,
+ 0xc1, 0x5e, 0x1d, 0x5e, 0x0e, 0xe6, 0x41, 0x5e, 0x47, 0x97, 0x09, 0x1b,
+ 0x53, 0x01, 0x5e, 0x4d, 0x83, 0x09, 0x1a, 0xeb, 0x01, 0x5e, 0x64, 0x8b,
+ 0x09, 0x1b, 0x1b, 0x01, 0x5e, 0x76, 0xc2, 0x8d, 0xc6, 0x09, 0x1b, 0x10,
+ 0x94, 0x09, 0x19, 0x43, 0x01, 0x5e, 0x91, 0x00, 0xc1, 0x5e, 0xae, 0x8f,
+ 0x09, 0x18, 0xeb, 0x01, 0x5e, 0xc1, 0x1c, 0xc1, 0x5e, 0xd6, 0xc4, 0xde,
+ 0x97, 0x09, 0x1a, 0xc9, 0xc2, 0x01, 0xe2, 0x09, 0x1a, 0x8b, 0x01, 0x5e,
+ 0xe1, 0x90, 0x09, 0x19, 0x33, 0x01, 0x5e, 0xf5, 0x86, 0x09, 0x18, 0x9b,
+ 0x01, 0x5e, 0xfb, 0x84, 0x09, 0x18, 0x91, 0x9f, 0x09, 0x18, 0x88, 0x97,
+ 0x09, 0x18, 0x2b, 0x01, 0x5f, 0x05, 0x83, 0x09, 0x17, 0x5b, 0x01, 0x5f,
+ 0x1d, 0x8b, 0x09, 0x17, 0xf3, 0x01, 0x5f, 0x3c, 0x87, 0x09, 0x17, 0xe2,
+ 0x01, 0x5f, 0x51, 0x8b, 0x09, 0x16, 0xdb, 0x01, 0x5f, 0x57, 0x0a, 0xc1,
+ 0x5f, 0x6e, 0x83, 0x09, 0x14, 0x9b, 0x01, 0x5f, 0x87, 0x97, 0x09, 0x17,
+ 0x12, 0x01, 0x5f, 0x9f, 0x8b, 0x09, 0x12, 0x63, 0x01, 0x5f, 0xc0, 0x97,
+ 0x09, 0x13, 0x0b, 0x01, 0x5f, 0xde, 0x83, 0x09, 0x11, 0xf3, 0x01, 0x5f,
+ 0xee, 0x87, 0x09, 0x12, 0x42, 0x01, 0x60, 0x06, 0x97, 0x09, 0x11, 0x63,
+ 0x01, 0x60, 0x0a, 0x8b, 0x09, 0x11, 0x53, 0x01, 0x60, 0x2c, 0x87, 0x09,
+ 0x11, 0x43, 0x01, 0x60, 0x36, 0x83, 0x09, 0x11, 0x02, 0x01, 0x60, 0x3d,
+ 0x97, 0x09, 0x0f, 0xdb, 0x01, 0x60, 0x56, 0x83, 0x09, 0x0d, 0xbb, 0x01,
+ 0x60, 0x7f, 0x8b, 0x09, 0x0f, 0xba, 0x01, 0x60, 0x9f, 0x83, 0x09, 0x0a,
+ 0xbb, 0x01, 0x60, 0xaf, 0xc5, 0xd5, 0xf6, 0x09, 0x0d, 0xb1, 0x97, 0x09,
+ 0x0d, 0x53, 0x01, 0x60, 0xe5, 0x8b, 0x09, 0x0d, 0x03, 0x01, 0x61, 0x12,
+ 0xc4, 0x73, 0x32, 0x09, 0x0c, 0xf8, 0x8b, 0x09, 0x09, 0x6b, 0x01, 0x61,
+ 0x24, 0x83, 0x09, 0x09, 0x4b, 0x01, 0x61, 0x2a, 0x97, 0x09, 0x09, 0xba,
+ 0x01, 0x61, 0x32, 0x97, 0x09, 0x08, 0xb3, 0x01, 0x61, 0x47, 0x8b, 0x09,
+ 0x08, 0x03, 0x01, 0x61, 0x6d, 0x07, 0xc1, 0x61, 0x8a, 0x83, 0x09, 0x05,
+ 0xaa, 0x01, 0x61, 0x99, 0xc3, 0x0a, 0xe2, 0x09, 0x05, 0x0b, 0x01, 0x61,
+ 0xd5, 0xc3, 0x05, 0x4e, 0x09, 0x05, 0x03, 0x01, 0x61, 0xd9, 0x14, 0xc1,
+ 0x61, 0xdf, 0x9f, 0x09, 0x04, 0x6b, 0x01, 0x61, 0xee, 0x90, 0x09, 0x04,
+ 0xbb, 0x01, 0x61, 0xf4, 0x8e, 0x09, 0x04, 0xb1, 0xc3, 0xe0, 0x5f, 0x09,
+ 0x04, 0xa9, 0xc3, 0x03, 0x30, 0x09, 0x04, 0xa1, 0x00, 0x41, 0x61, 0xf8,
+ 0x97, 0x09, 0x03, 0xd3, 0x01, 0x62, 0x04, 0x8b, 0x09, 0x03, 0x93, 0x01,
+ 0x62, 0x27, 0x83, 0x09, 0x02, 0xaa, 0x01, 0x62, 0x42, 0x97, 0x09, 0x02,
+ 0x6b, 0x01, 0x62, 0x5a, 0x83, 0x09, 0x02, 0x03, 0x01, 0x62, 0x6e, 0x8b,
+ 0x09, 0x02, 0x4a, 0x01, 0x62, 0x92, 0x86, 0x09, 0x00, 0xe3, 0x01, 0x62,
+ 0x98, 0x84, 0x09, 0x00, 0x53, 0x01, 0x62, 0x9e, 0xc3, 0x01, 0xc3, 0x09,
+ 0x01, 0x5b, 0x01, 0x62, 0xa9, 0x15, 0xc1, 0x62, 0xaf, 0x14, 0xc1, 0x62,
+ 0xbc, 0xc3, 0x0e, 0x61, 0x09, 0x01, 0x99, 0x90, 0x09, 0x01, 0x6b, 0x01,
+ 0x62, 0xcb, 0x8e, 0x09, 0x01, 0x03, 0x01, 0x62, 0xd5, 0x8d, 0x09, 0x00,
+ 0xeb, 0x01, 0x62, 0xe7, 0x9f, 0x09, 0x00, 0x49, 0x47, 0x03, 0x4c, 0x41,
+ 0x62, 0xed, 0x8b, 0x09, 0x13, 0xfb, 0x01, 0x63, 0x1b, 0xc4, 0x73, 0x32,
+ 0x09, 0x13, 0xf3, 0x01, 0x63, 0x23, 0x83, 0x09, 0x13, 0xd2, 0x01, 0x63,
+ 0x29, 0x97, 0x09, 0x14, 0x91, 0x8b, 0x09, 0x14, 0x89, 0x83, 0x09, 0x14,
+ 0x7a, 0x01, 0x63, 0x35, 0xc2, 0x01, 0xe2, 0x09, 0x0a, 0xb1, 0x94, 0x09,
+ 0x0a, 0xa9, 0x90, 0x09, 0x0a, 0xa1, 0x8f, 0x09, 0x0a, 0x73, 0x01, 0x63,
+ 0x39, 0x8e, 0x09, 0x0a, 0x5b, 0x01, 0x63, 0x43, 0x89, 0x09, 0x0a, 0x2b,
+ 0x01, 0x63, 0x4d, 0xc3, 0x7e, 0x08, 0x09, 0x0a, 0x13, 0x01, 0x63, 0x54,
+ 0x84, 0x09, 0x0a, 0x09, 0xc2, 0x00, 0xd3, 0x09, 0x0a, 0x00, 0xc9, 0xa8,
+ 0xd3, 0x09, 0x23, 0xa1, 0xc8, 0xbd, 0xe2, 0x09, 0x23, 0x99, 0xc5, 0x33,
+ 0x24, 0x09, 0x23, 0x90, 0x43, 0x02, 0x6f, 0xc1, 0x63, 0x5a, 0x44, 0xe0,
+ 0x57, 0x41, 0x63, 0x82, 0x45, 0x00, 0x2d, 0xc1, 0x63, 0x8e, 0x47, 0xc0,
+ 0x43, 0x41, 0x63, 0xb6, 0x45, 0x1b, 0xec, 0xc1, 0x63, 0xc6, 0x43, 0x4d,
+ 0x57, 0xc1, 0x63, 0xeb, 0x54, 0x38, 0x68, 0x41, 0x64, 0x13, 0x44, 0x0d,
+ 0x14, 0xc1, 0x64, 0x1f, 0x44, 0x09, 0x9e, 0x41, 0x64, 0x43, 0x43, 0x02,
+ 0x6f, 0xc1, 0x64, 0x72, 0x50, 0x5b, 0x82, 0x41, 0x64, 0x98, 0x43, 0x02,
+ 0xa0, 0xc1, 0x64, 0xa4, 0x45, 0x02, 0xde, 0x41, 0x64, 0xc9, 0x42, 0x01,
+ 0xc8, 0xc1, 0x64, 0xee, 0xd1, 0x57, 0x2e, 0x01, 0x1d, 0x50, 0xc8, 0xb7,
+ 0x32, 0x0f, 0xa5, 0x89, 0xc4, 0x00, 0xba, 0x00, 0x05, 0x20, 0xc8, 0x7d,
+ 0xa4, 0x07, 0xf2, 0x51, 0xc8, 0x80, 0x2e, 0x07, 0xf2, 0x70, 0x9f, 0x09,
+ 0x7f, 0x91, 0x9e, 0x09, 0x7f, 0x88, 0x1e, 0xc1, 0x64, 0xfa, 0x1d, 0x41,
+ 0x65, 0x06, 0x26, 0xc1, 0x65, 0x2a, 0x25, 0xc1, 0x65, 0x4e, 0x24, 0xc1,
+ 0x65, 0x76, 0x23, 0xc1, 0x65, 0x9d, 0x22, 0xc1, 0x65, 0xc1, 0x21, 0xc1,
+ 0x65, 0xe5, 0x20, 0xc1, 0x65, 0xfd, 0x1f, 0xc1, 0x66, 0x1d, 0x1e, 0xc1,
+ 0x66, 0x3d, 0x1d, 0x41, 0x66, 0x5c, 0x87, 0x08, 0x41, 0x99, 0x8b, 0x08,
+ 0x41, 0xa1, 0x91, 0x08, 0x41, 0xa9, 0x83, 0x08, 0x41, 0x90, 0x83, 0x08,
+ 0x41, 0xb9, 0x87, 0x08, 0x41, 0xc0, 0x83, 0x08, 0x41, 0xe1, 0x91, 0x08,
+ 0x41, 0xf8, 0x83, 0x08, 0x40, 0x29, 0x91, 0x08, 0x40, 0x40, 0x83, 0x08,
+ 0x40, 0x51, 0x87, 0x08, 0x40, 0x59, 0x8b, 0x08, 0x40, 0x61, 0x91, 0x08,
+ 0x40, 0x69, 0x97, 0x08, 0x40, 0x70, 0x83, 0x08, 0x40, 0x79, 0x87, 0x08,
+ 0x40, 0x81, 0x8b, 0x08, 0x40, 0x89, 0x91, 0x08, 0x40, 0x91, 0x97, 0x08,
+ 0x40, 0x98, 0x83, 0x08, 0x40, 0xa1, 0x87, 0x08, 0x40, 0xa9, 0x8b, 0x08,
+ 0x40, 0xb1, 0x91, 0x08, 0x40, 0xb9, 0x97, 0x08, 0x40, 0xc0, 0x83, 0x08,
+ 0x40, 0xc9, 0x87, 0x08, 0x40, 0xd1, 0x8b, 0x08, 0x40, 0xd9, 0x91, 0x08,
+ 0x40, 0xe1, 0x97, 0x08, 0x40, 0xe8, 0x83, 0x08, 0x40, 0xf1, 0x87, 0x08,
+ 0x40, 0xf9, 0x8b, 0x08, 0x41, 0x01, 0x91, 0x08, 0x41, 0x09, 0x97, 0x08,
+ 0x41, 0x10, 0x83, 0x08, 0x41, 0x19, 0x87, 0x08, 0x41, 0x21, 0x8b, 0x08,
+ 0x41, 0x29, 0x91, 0x08, 0x41, 0x31, 0x97, 0x08, 0x41, 0x38, 0x83, 0x08,
+ 0x41, 0x41, 0x87, 0x08, 0x41, 0x49, 0x8b, 0x08, 0x41, 0x51, 0x91, 0x08,
+ 0x41, 0x59, 0x97, 0x08, 0x41, 0x60, 0x83, 0x08, 0x41, 0x69, 0x87, 0x08,
+ 0x41, 0x71, 0x8b, 0x08, 0x41, 0x79, 0x91, 0x08, 0x41, 0x81, 0x97, 0x08,
+ 0x41, 0x88, 0x97, 0x00, 0x22, 0x1b, 0x01, 0x66, 0x7c, 0x16, 0xc1, 0x66,
+ 0x8f, 0x19, 0xc1, 0x66, 0xb2, 0x10, 0xc1, 0x66, 0xbc, 0x0e, 0xc1, 0x66,
+ 0xce, 0x14, 0xc1, 0x66, 0xe6, 0x87, 0x00, 0x22, 0x6b, 0x01, 0x66, 0xf8,
+ 0x06, 0xc1, 0x67, 0x25, 0x15, 0xc1, 0x67, 0x48, 0x12, 0xc1, 0x67, 0x6a,
+ 0x83, 0x00, 0x21, 0x83, 0x01, 0x67, 0x7d, 0xc2, 0x0f, 0x9a, 0x00, 0x28,
+ 0xd9, 0x1b, 0xc1, 0x67, 0x8f, 0x0d, 0xc1, 0x67, 0xab, 0x0a, 0xc1, 0x67,
+ 0xc8, 0x09, 0xc1, 0x67, 0xd5, 0x04, 0xc1, 0x67, 0xe4, 0x91, 0x00, 0x21,
+ 0xf3, 0x01, 0x68, 0x02, 0x8b, 0x00, 0x21, 0xc3, 0x01, 0x68, 0x15, 0x1c,
+ 0xc1, 0x68, 0x32, 0x05, 0xc1, 0x68, 0x3d, 0x44, 0x13, 0x35, 0xc1, 0x68,
+ 0x58, 0xc2, 0x00, 0x5f, 0x00, 0x21, 0x91, 0xc2, 0x1c, 0x52, 0x00, 0x22,
+ 0xc1, 0xc4, 0xe0, 0x1b, 0x00, 0x23, 0x98, 0xc4, 0xe2, 0x37, 0x00, 0x26,
+ 0xa9, 0xc6, 0xcf, 0xe9, 0x00, 0x25, 0xa9, 0xc6, 0xce, 0xb7, 0x00, 0x25,
+ 0x28, 0x87, 0x00, 0x21, 0x6b, 0x01, 0x68, 0x64, 0x06, 0xc1, 0x68, 0x91,
+ 0x15, 0xc1, 0x68, 0xb4, 0x12, 0xc1, 0x68, 0xd6, 0x83, 0x00, 0x20, 0x83,
+ 0x01, 0x68, 0xe3, 0xc2, 0x00, 0x28, 0x00, 0x28, 0xe1, 0xc2, 0x0f, 0x9a,
+ 0x00, 0x28, 0xd1, 0x1b, 0xc1, 0x68, 0xf5, 0x14, 0xc1, 0x69, 0x11, 0x0e,
+ 0xc1, 0x69, 0x23, 0x0d, 0xc1, 0x69, 0x35, 0x0a, 0xc1, 0x69, 0x52, 0x09,
+ 0xc1, 0x69, 0x5f, 0x05, 0xc1, 0x69, 0x6e, 0x97, 0x00, 0x21, 0x1b, 0x01,
+ 0x69, 0x89, 0x04, 0xc1, 0x69, 0x96, 0x91, 0x00, 0x20, 0xf3, 0x01, 0x69,
+ 0xb4, 0x8b, 0x00, 0x20, 0xc3, 0x01, 0x69, 0xc7, 0x1c, 0xc1, 0x69, 0xe4,
+ 0x16, 0xc1, 0x69, 0xef, 0xc2, 0x1c, 0x52, 0x00, 0x20, 0x41, 0x10, 0xc1,
+ 0x6a, 0x06, 0xc2, 0x00, 0x5f, 0x00, 0x20, 0x91, 0x44, 0x13, 0x35, 0xc1,
+ 0x6a, 0x12, 0xc4, 0xe0, 0x1b, 0x00, 0x23, 0x90, 0xc4, 0xe2, 0x37, 0x00,
+ 0x26, 0xa1, 0xc6, 0xcf, 0xe9, 0x00, 0x25, 0xa1, 0xc6, 0xce, 0xb7, 0x00,
+ 0x25, 0x20, 0xc2, 0x02, 0xa0, 0x0f, 0xdf, 0x91, 0xc4, 0x02, 0xde, 0x0f,
+ 0xdf, 0x98, 0xc3, 0x09, 0x9e, 0x0f, 0xdf, 0xa1, 0xc3, 0x0d, 0x14, 0x0f,
+ 0xdf, 0xa8, 0xc2, 0x22, 0xcc, 0x0f, 0xdf, 0xb1, 0xc4, 0x18, 0x10, 0x0f,
+ 0xdf, 0xb8, 0xa0, 0x00, 0x04, 0x79, 0x9f, 0x00, 0x04, 0x70, 0x47, 0xc2,
+ 0x50, 0xc1, 0x6a, 0x1e, 0x43, 0x00, 0x2c, 0xc1, 0x6a, 0x2a, 0x0e, 0xc1,
+ 0x6a, 0x30, 0xde, 0x0f, 0xb8, 0x01, 0x00, 0xd9, 0xd4, 0x3e, 0xd0, 0x00,
+ 0x04, 0xd0, 0x47, 0x34, 0x2f, 0xc1, 0x6a, 0x3a, 0x46, 0x09, 0x97, 0x41,
+ 0x6a, 0x58, 0xcb, 0x1e, 0x89, 0x00, 0x6c, 0x09, 0x03, 0xc1, 0x6a, 0x76,
+ 0xc9, 0xb2, 0x24, 0x00, 0x6c, 0x18, 0x46, 0x02, 0x0f, 0xc1, 0x6a, 0x82,
+ 0x4a, 0x9d, 0xec, 0x41, 0x6a, 0xd0, 0xca, 0x63, 0xc8, 0x00, 0x6e, 0x79,
+ 0x0d, 0xc1, 0x6a, 0xf4, 0x45, 0x63, 0xc3, 0xc1, 0x6b, 0x00, 0x42, 0x01,
+ 0x30, 0x41, 0x6b, 0x1e, 0x47, 0x01, 0xbb, 0xc1, 0x6b, 0x2a, 0x43, 0x46,
+ 0xac, 0x41, 0x6b, 0x34, 0x0b, 0xc1, 0x6b, 0x46, 0xc8, 0x11, 0xf7, 0x0e,
+ 0xd4, 0x41, 0x0e, 0xc1, 0x6b, 0x52, 0x48, 0xb8, 0x0a, 0xc1, 0x6b, 0x5e,
+ 0x5c, 0x12, 0x39, 0x41, 0x6b, 0x70, 0x11, 0xc1, 0x6b, 0x7f, 0x46, 0x94,
+ 0x69, 0x41, 0x6b, 0x8b, 0xc8, 0x52, 0x00, 0x0e, 0xd4, 0x49, 0x48, 0x18,
+ 0xb0, 0xc1, 0x6b, 0x9d, 0x47, 0xc0, 0x12, 0xc1, 0x6b, 0xa9, 0x47, 0xc6,
+ 0xe8, 0xc1, 0x6b, 0xb9, 0x46, 0xd0, 0xb5, 0x41, 0x6b, 0xc5, 0x47, 0x7f,
+ 0x5a, 0xc1, 0x6b, 0xd7, 0x0b, 0x41, 0x6b, 0xdf, 0xe0, 0x00, 0x67, 0x0e,
+ 0xd3, 0xa8, 0x11, 0xc1, 0x6b, 0xe9, 0x07, 0xc1, 0x6b, 0xfb, 0x46, 0xcd,
+ 0x13, 0x41, 0x6c, 0x0a, 0xc9, 0xaa, 0xb9, 0x0e, 0xd3, 0x61, 0xc3, 0x10,
+ 0xa1, 0x0e, 0xd1, 0x81, 0x42, 0x0c, 0x43, 0x41, 0x6c, 0x16, 0x03, 0xc1,
+ 0x6c, 0x32, 0xc3, 0x01, 0x9c, 0x0e, 0xcf, 0xfa, 0x01, 0x6c, 0x3e, 0xc3,
+ 0x6b, 0x04, 0x0e, 0xd3, 0x51, 0x44, 0x12, 0x51, 0x41, 0x6c, 0x42, 0x47,
+ 0xc3, 0xdf, 0xc1, 0x6c, 0x52, 0x44, 0x1a, 0x39, 0x41, 0x6c, 0x6a, 0x45,
+ 0xdb, 0x37, 0xc1, 0x6c, 0x9e, 0x44, 0xdc, 0x0a, 0x41, 0x6c, 0xaa, 0x44,
+ 0xcf, 0x23, 0xc1, 0x6c, 0xbc, 0x44, 0x87, 0x15, 0x41, 0x6c, 0xc8, 0x4f,
+ 0x61, 0xa7, 0xc1, 0x6c, 0xd4, 0x47, 0xc6, 0x55, 0x41, 0x6c, 0xe6, 0xc7,
+ 0x0b, 0xc8, 0x0e, 0xc8, 0x51, 0xc8, 0x3b, 0xec, 0x0e, 0xc8, 0x49, 0xc6,
+ 0x24, 0x3b, 0x0e, 0xc8, 0x40, 0xca, 0x22, 0x51, 0x01, 0x39, 0xb1, 0xd4,
+ 0x3e, 0xbc, 0x0f, 0xa9, 0x79, 0xcd, 0x0e, 0x61, 0x0f, 0xbe, 0x68, 0x03,
+ 0xc1, 0x6d, 0x0e, 0x91, 0x08, 0xad, 0xd1, 0x87, 0x08, 0xad, 0xc1, 0xc9,
+ 0xb2, 0x2d, 0x08, 0xad, 0xa3, 0x01, 0x6d, 0x23, 0x97, 0x08, 0xad, 0x93,
+ 0x01, 0x6d, 0x27, 0x8b, 0x08, 0xad, 0x82, 0x01, 0x6d, 0x2b, 0x83, 0x08,
+ 0xac, 0x03, 0x01, 0x6d, 0x2f, 0x16, 0xc1, 0x6d, 0x41, 0xc2, 0x00, 0xd0,
+ 0x08, 0xad, 0x71, 0x15, 0xc1, 0x6d, 0x56, 0x18, 0xc1, 0x6d, 0x66, 0xc2,
+ 0x00, 0xdb, 0x08, 0xad, 0x49, 0xc2, 0x00, 0x39, 0x08, 0xad, 0x41, 0xc2,
+ 0x19, 0x2c, 0x08, 0xad, 0x39, 0xc2, 0x01, 0xc3, 0x08, 0xad, 0x31, 0x04,
+ 0xc1, 0x6d, 0x70, 0x12, 0xc1, 0x6d, 0x7a, 0x10, 0xc1, 0x6d, 0x84, 0x06,
+ 0xc1, 0x6d, 0x9a, 0x0c, 0xc1, 0x6d, 0xa8, 0x05, 0xc1, 0x6d, 0xb2, 0x09,
+ 0xc1, 0x6d, 0xbc, 0x0d, 0xc1, 0x6d, 0xc6, 0x91, 0x08, 0xac, 0x61, 0x87,
+ 0x08, 0xac, 0x51, 0x97, 0x08, 0xac, 0x23, 0x01, 0x6d, 0xd0, 0x8b, 0x08,
+ 0xac, 0x12, 0x01, 0x6d, 0xd4, 0x07, 0xc1, 0x6d, 0xd8, 0x44, 0x00, 0xbb,
+ 0x41, 0x6d, 0xe4, 0xa0, 0x08, 0xae, 0x41, 0x9f, 0x08, 0xae, 0x39, 0x9e,
+ 0x08, 0xae, 0x30, 0xcb, 0x97, 0xf5, 0x08, 0xae, 0x19, 0xc4, 0x19, 0x53,
+ 0x08, 0xae, 0x10, 0xd3, 0x41, 0x25, 0x0f, 0xad, 0x09, 0xd1, 0x53, 0x10,
+ 0x0f, 0xad, 0x01, 0xd4, 0x06, 0x73, 0x0f, 0xac, 0xd9, 0xd3, 0x43, 0x13,
+ 0x0f, 0xac, 0xd0, 0xd3, 0x41, 0x25, 0x0f, 0xac, 0xf9, 0xd1, 0x53, 0x10,
+ 0x0f, 0xac, 0xf1, 0xd4, 0x06, 0x73, 0x0f, 0xac, 0xc9, 0xd3, 0x43, 0x13,
+ 0x0f, 0xac, 0xc0, 0x11, 0xc1, 0x6e, 0x02, 0xcc, 0x86, 0x85, 0x01, 0x31,
+ 0x51, 0xc6, 0x0e, 0xbd, 0x01, 0x12, 0xd9, 0x45, 0x00, 0x8c, 0x41, 0x6e,
+ 0x0e, 0xc4, 0x27, 0xe3, 0x00, 0x00, 0x11, 0xc7, 0xc3, 0x92, 0x00, 0x00,
+ 0x09, 0x15, 0xc1, 0x6e, 0x1a, 0xce, 0x6d, 0x94, 0x00, 0x04, 0xb1, 0xcc,
+ 0x87, 0xc9, 0x00, 0x04, 0xb0, 0xc4, 0x1d, 0xa8, 0x01, 0x1f, 0x21, 0xc6,
+ 0xcd, 0xcd, 0x0f, 0xa6, 0x78, 0xcb, 0x99, 0x55, 0x0f, 0xde, 0x31, 0xc5,
+ 0x21, 0xd2, 0x0f, 0xde, 0x48, 0xc4, 0x00, 0x49, 0x0f, 0xde, 0x39, 0xc5,
+ 0x00, 0x2c, 0x0f, 0xde, 0x40, 0xcb, 0x1e, 0x89, 0x05, 0x46, 0x29, 0x42,
+ 0x07, 0xb2, 0xc1, 0x6e, 0x26, 0xc8, 0x14, 0x38, 0x05, 0x44, 0x00, 0x03,
+ 0xc1, 0x6e, 0x32, 0x91, 0x05, 0x46, 0x0b, 0x01, 0x6e, 0x3e, 0x87, 0x05,
+ 0x45, 0xf3, 0x01, 0x6e, 0x42, 0x48, 0xb2, 0x2d, 0xc1, 0x6e, 0x46, 0x8b,
+ 0x05, 0x45, 0xb3, 0x01, 0x6e, 0x54, 0x97, 0x05, 0x45, 0xc2, 0x01, 0x6e,
+ 0x58, 0x15, 0xc1, 0x6e, 0x5c, 0xc2, 0x00, 0xd0, 0x05, 0x45, 0x91, 0x0e,
+ 0xc1, 0x6e, 0x6c, 0x83, 0x05, 0x44, 0x13, 0x01, 0x6e, 0x76, 0x8b, 0x05,
+ 0x44, 0x23, 0x01, 0x6e, 0x82, 0x97, 0x05, 0x44, 0x33, 0x01, 0x6e, 0x86,
+ 0x18, 0xc1, 0x6e, 0x8a, 0x87, 0x05, 0x44, 0x63, 0x01, 0x6e, 0x94, 0x91,
+ 0x05, 0x44, 0x7b, 0x01, 0x6e, 0x98, 0x0d, 0xc1, 0x6e, 0x9c, 0x09, 0xc1,
+ 0x6e, 0xa6, 0x10, 0xc1, 0x6e, 0xb0, 0x05, 0xc1, 0x6e, 0xc6, 0x0c, 0xc1,
+ 0x6e, 0xd0, 0x16, 0xc1, 0x6e, 0xda, 0x06, 0xc1, 0x6e, 0xe8, 0x12, 0xc1,
+ 0x6e, 0xf6, 0x04, 0xc1, 0x6f, 0x00, 0xc2, 0x01, 0xc3, 0x05, 0x45, 0x51,
+ 0xc2, 0x19, 0x2c, 0x05, 0x45, 0x59, 0xc2, 0x00, 0x39, 0x05, 0x45, 0x60,
+ 0xc4, 0x19, 0x53, 0x05, 0x46, 0x71, 0xcb, 0x97, 0xf5, 0x05, 0x46, 0x79,
+ 0x45, 0x09, 0x98, 0x41, 0x6f, 0x0a, 0x47, 0x00, 0x58, 0xc1, 0x6f, 0x2e,
+ 0x48, 0xb9, 0x02, 0x41, 0x6f, 0x3a, 0x10, 0xc1, 0x6f, 0x40, 0xc6, 0xcd,
+ 0x6d, 0x00, 0x41, 0xe1, 0xc5, 0xd7, 0x0e, 0x00, 0x41, 0xa1, 0xc5, 0xd3,
+ 0xfd, 0x00, 0x41, 0x88, 0xcb, 0x96, 0x5e, 0x00, 0x41, 0xe9, 0xc9, 0xa9,
+ 0x99, 0x00, 0x41, 0xa8, 0xc3, 0xdd, 0x83, 0x00, 0x41, 0xd1, 0xc4, 0xe1,
+ 0x73, 0x00, 0x41, 0xc0, 0xc7, 0xc4, 0x33, 0x00, 0x41, 0x69, 0xce, 0x70,
+ 0x34, 0x00, 0x40, 0xd9, 0xc6, 0x64, 0xa4, 0x00, 0x40, 0xc9, 0xc9, 0xac,
+ 0x3c, 0x00, 0x40, 0xc1, 0xc2, 0x00, 0x74, 0x00, 0x40, 0xb2, 0x01, 0x6f,
+ 0x4c, 0x8b, 0x00, 0x41, 0x41, 0xc7, 0xc3, 0x4c, 0x00, 0x41, 0x21, 0xce,
+ 0x70, 0x34, 0x00, 0x40, 0xd0, 0xc4, 0xdb, 0xfb, 0x00, 0x41, 0x61, 0xc6,
+ 0xc3, 0x4d, 0x00, 0x41, 0x28, 0xc9, 0xb1, 0x4c, 0x00, 0x41, 0x0a, 0x01,
+ 0x6f, 0x52, 0x8b, 0x00, 0x41, 0x49, 0x97, 0x00, 0x41, 0x31, 0x83, 0x00,
+ 0x41, 0x13, 0x01, 0x6f, 0x56, 0x87, 0x00, 0x40, 0xe0, 0x83, 0x00, 0x41,
+ 0x00, 0xc3, 0xb8, 0xac, 0x00, 0x40, 0xa9, 0xc6, 0xcd, 0x07, 0x00, 0x40,
+ 0x89, 0xc2, 0x00, 0x8d, 0x00, 0x40, 0x40, 0xc3, 0x00, 0xd0, 0x00, 0x40,
+ 0xa1, 0xc6, 0xcf, 0x77, 0x00, 0x40, 0x70, 0x90, 0x00, 0x40, 0x79, 0x96,
+ 0x00, 0x40, 0x39, 0x9b, 0x00, 0x40, 0x20, 0xc2, 0x04, 0xc6, 0x00, 0x40,
+ 0x29, 0xc2, 0x00, 0x8d, 0x00, 0x40, 0x08, 0xc3, 0x02, 0x9b, 0x01, 0x52,
+ 0xc1, 0xc2, 0x00, 0xbf, 0x01, 0x52, 0xb8, 0xc6, 0x00, 0x91, 0x0f, 0xa5,
+ 0x21, 0xc4, 0x00, 0x87, 0x0f, 0xb1, 0xa1, 0xcd, 0x7f, 0x66, 0x0f, 0xb6,
+ 0x60, 0xc9, 0x00, 0xca, 0x01, 0x54, 0xab, 0x01, 0x6f, 0x5a, 0xcc, 0x07,
+ 0xc7, 0x01, 0x54, 0xb2, 0x01, 0x6f, 0x60, 0xc9, 0xab, 0x6d, 0x01, 0x5a,
+ 0xd1, 0xcd, 0x7d, 0x2a, 0x01, 0x5a, 0xe0, 0x15, 0xc1, 0x6f, 0x66, 0xd1,
+ 0x50, 0x68, 0x08, 0x8e, 0xe9, 0xca, 0x9d, 0x56, 0x08, 0x8e, 0xe1, 0x07,
+ 0xc1, 0x6f, 0x7c, 0x06, 0xc1, 0x6f, 0x88, 0x46, 0x34, 0x6f, 0xc1, 0x6f,
+ 0x9a, 0xd1, 0x50, 0xce, 0x08, 0x8e, 0x39, 0xc2, 0x00, 0x7a, 0x08, 0x8e,
+ 0x21, 0x47, 0x02, 0x0e, 0x41, 0x6f, 0xa6, 0xc4, 0xe3, 0x9f, 0x08, 0x22,
+ 0x81, 0x16, 0xc1, 0x70, 0x0b, 0xc4, 0xe0, 0xf7, 0x08, 0x22, 0x91, 0xc3,
+ 0x1b, 0x05, 0x08, 0x22, 0x99, 0x15, 0xc1, 0x70, 0x15, 0xc6, 0xcc, 0x05,
+ 0x08, 0x22, 0xb9, 0x42, 0x0c, 0x43, 0xc1, 0x70, 0x1f, 0x0a, 0xc1, 0x70,
+ 0x27, 0xc3, 0xe5, 0xae, 0x08, 0x22, 0xd1, 0xc4, 0xe3, 0x63, 0x08, 0x22,
+ 0xd9, 0xc3, 0x9e, 0xc8, 0x08, 0x22, 0xe1, 0xc3, 0x34, 0x6f, 0x08, 0x22,
+ 0xe9, 0xc3, 0xe5, 0x39, 0x08, 0x22, 0xf9, 0x0f, 0xc1, 0x70, 0x33, 0xc5,
+ 0xdd, 0x4e, 0x08, 0x23, 0x09, 0x42, 0x02, 0xa0, 0xc1, 0x70, 0x3f, 0xc4,
+ 0xe1, 0x0f, 0x08, 0x23, 0x21, 0x0b, 0xc1, 0x70, 0x49, 0x07, 0xc1, 0x70,
+ 0x59, 0x03, 0xc1, 0x70, 0x69, 0x11, 0xc1, 0x70, 0x8f, 0xc4, 0xdf, 0x73,
+ 0x08, 0x23, 0x71, 0xc3, 0x20, 0x18, 0x08, 0x23, 0x79, 0xc2, 0x02, 0xae,
+ 0x08, 0x23, 0x98, 0xc7, 0xc4, 0x64, 0x0d, 0xe5, 0x19, 0xc9, 0xb3, 0x05,
+ 0x0d, 0xe5, 0x11, 0xd2, 0x4c, 0x7f, 0x0d, 0xe5, 0x09, 0xce, 0x70, 0x42,
+ 0x0d, 0xe5, 0x00, 0x46, 0x03, 0x87, 0xc1, 0x70, 0xaf, 0xc9, 0xaf, 0x30,
+ 0x01, 0x56, 0xf1, 0xc9, 0x32, 0xb7, 0x01, 0x56, 0xfb, 0x01, 0x70, 0xb5,
+ 0xc7, 0xc4, 0x5d, 0x01, 0x57, 0x03, 0x01, 0x70, 0xbb, 0xd3, 0x46, 0xdc,
+ 0x01, 0x5a, 0x71, 0x04, 0x41, 0x70, 0xbf, 0x91, 0x01, 0x09, 0xa1, 0x87,
+ 0x01, 0x09, 0x79, 0x8e, 0x01, 0x08, 0x99, 0x89, 0x01, 0x08, 0x50, 0x8f,
+ 0x01, 0x09, 0x99, 0x88, 0x01, 0x09, 0x89, 0x87, 0x01, 0x09, 0x81, 0x84,
+ 0x01, 0x09, 0x61, 0x94, 0x01, 0x08, 0xd9, 0x92, 0x01, 0x08, 0xc1, 0x8e,
+ 0x01, 0x08, 0x91, 0x8b, 0x01, 0x08, 0x81, 0x8a, 0x01, 0x08, 0x58, 0xd0,
+ 0x5b, 0xc2, 0x0f, 0xc2, 0xb9, 0xcc, 0x82, 0x35, 0x01, 0x0e, 0xc9, 0xc5,
+ 0x01, 0xa2, 0x01, 0x0c, 0xcb, 0x01, 0x70, 0xcb, 0x49, 0x01, 0xaa, 0xc1,
+ 0x70, 0xcf, 0xcb, 0x01, 0xfc, 0x01, 0x58, 0x19, 0xcb, 0x94, 0x22, 0x01,
+ 0x58, 0x59, 0xd5, 0x01, 0x92, 0x01, 0x5b, 0x4a, 0x01, 0x70, 0xe1, 0xd0,
+ 0x5b, 0xc2, 0x0f, 0xc2, 0xb1, 0xc5, 0x01, 0xa2, 0x01, 0x0c, 0xc3, 0x01,
+ 0x70, 0xe7, 0xcc, 0x82, 0x35, 0x01, 0x0e, 0xc1, 0x49, 0x01, 0xaa, 0xc1,
+ 0x70, 0xeb, 0xcb, 0x01, 0xfc, 0x01, 0x58, 0x11, 0xcb, 0x94, 0x22, 0x01,
+ 0x58, 0x51, 0xd5, 0x01, 0x92, 0x01, 0x5b, 0x42, 0x01, 0x70, 0xfd, 0xc5,
+ 0x86, 0x2c, 0x08, 0xd4, 0xf9, 0xcc, 0x86, 0x25, 0x08, 0xd4, 0xf0, 0xc7,
+ 0x40, 0xe5, 0x08, 0xd4, 0xb9, 0xc8, 0x14, 0x38, 0x08, 0xd4, 0xb1, 0xcb,
+ 0x93, 0xf6, 0x08, 0xd4, 0x29, 0xcb, 0x8f, 0xe1, 0x08, 0xd4, 0x20, 0x8a,
+ 0x08, 0xd4, 0x98, 0x89, 0x08, 0xd4, 0x60, 0x83, 0x08, 0xd4, 0x49, 0xc2,
+ 0x00, 0xd0, 0x08, 0xd4, 0x40, 0xc3, 0x1d, 0x35, 0x08, 0xd4, 0x19, 0xc2,
+ 0x00, 0xd0, 0x08, 0xd2, 0xe9, 0x83, 0x08, 0xd2, 0xe0, 0x83, 0x08, 0xd4,
+ 0x09, 0xc2, 0x0d, 0xf6, 0x08, 0xd4, 0x01, 0xc2, 0x00, 0xd0, 0x08, 0xd3,
+ 0xf8, 0x83, 0x08, 0xd3, 0xc9, 0xc2, 0x00, 0xd0, 0x08, 0xd3, 0xc0, 0xc2,
+ 0x02, 0x1c, 0x08, 0xd3, 0xb9, 0xc2, 0x00, 0xd0, 0x08, 0xd3, 0x71, 0x83,
+ 0x08, 0xd3, 0x69, 0x06, 0x41, 0x71, 0x03, 0x15, 0xc1, 0x71, 0x0d, 0xc2,
+ 0x00, 0xd0, 0x08, 0xd3, 0x61, 0x83, 0x08, 0xd3, 0x59, 0x16, 0x41, 0x71,
+ 0x17, 0xc2, 0x00, 0xd0, 0x08, 0xd3, 0x99, 0x83, 0x08, 0xd3, 0x90, 0xc2,
+ 0x00, 0xd0, 0x08, 0xd3, 0x89, 0x83, 0x08, 0xd3, 0x80, 0x83, 0x08, 0xd3,
+ 0x79, 0xc2, 0x00, 0xc1, 0x08, 0xd3, 0x51, 0xc2, 0x19, 0x2c, 0x08, 0xd3,
+ 0x29, 0xc2, 0x01, 0x30, 0x08, 0xd3, 0x00, 0xc2, 0x00, 0xd0, 0x08, 0xd3,
+ 0x21, 0x83, 0x08, 0xd3, 0x18, 0xc2, 0x00, 0xd0, 0x08, 0xd3, 0x11, 0x83,
+ 0x08, 0xd3, 0x08, 0xc2, 0x00, 0xd0, 0x08, 0xd2, 0xf9, 0x83, 0x08, 0xd2,
+ 0xf0, 0x48, 0xb2, 0x2d, 0xc1, 0x71, 0x21, 0x03, 0xc1, 0x71, 0x29, 0x91,
+ 0x08, 0xd2, 0xab, 0x01, 0x71, 0x31, 0x87, 0x08, 0xd2, 0xa1, 0x97, 0x08,
+ 0xd2, 0x9b, 0x01, 0x71, 0x35, 0x8b, 0x08, 0xd2, 0x88, 0xc4, 0x18, 0x10,
+ 0x08, 0x87, 0xb9, 0xc2, 0x22, 0xcc, 0x08, 0x87, 0xb0, 0xc3, 0x0d, 0x14,
+ 0x08, 0x87, 0xa9, 0xc3, 0x09, 0x9e, 0x08, 0x87, 0xa0, 0xc4, 0x02, 0xde,
+ 0x08, 0x87, 0x99, 0xc2, 0x02, 0xa0, 0x08, 0x87, 0x90, 0x87, 0x08, 0x87,
+ 0x41, 0x8a, 0x08, 0x86, 0xb0, 0x8a, 0x08, 0x87, 0x39, 0xc2, 0x16, 0x1c,
+ 0x08, 0x87, 0x18, 0xc3, 0x44, 0x79, 0x08, 0x87, 0x09, 0xc2, 0x02, 0x98,
+ 0x08, 0x86, 0xc9, 0xc3, 0x40, 0x40, 0x08, 0x86, 0xb8, 0xd1, 0x50, 0x57,
+ 0x08, 0x7a, 0xc1, 0xcd, 0x7a, 0x52, 0x08, 0x7a, 0xaa, 0x01, 0x71, 0x39,
+ 0xc8, 0x0d, 0x03, 0x08, 0x7a, 0xa0, 0xc5, 0x28, 0xee, 0x08, 0x7a, 0x99,
+ 0xc2, 0x00, 0xc4, 0x08, 0x7a, 0x90, 0xc5, 0x05, 0x02, 0x08, 0x7a, 0x69,
+ 0xc5, 0x00, 0xd4, 0x08, 0x7a, 0x60, 0xc5, 0x05, 0x02, 0x08, 0x7a, 0x59,
+ 0xc5, 0x00, 0xd4, 0x08, 0x7a, 0x50, 0xc5, 0x00, 0xd4, 0x08, 0x7a, 0x49,
+ 0xc5, 0x05, 0x02, 0x08, 0x7a, 0x38, 0xc5, 0x00, 0xd4, 0x08, 0x7a, 0x41,
+ 0xc5, 0x05, 0x02, 0x08, 0x7a, 0x30, 0xc3, 0x26, 0x1a, 0x08, 0x7a, 0x21,
+ 0xc5, 0xcf, 0xd8, 0x08, 0x79, 0xc8, 0xc3, 0x11, 0xef, 0x08, 0x7a, 0x09,
+ 0x03, 0x41, 0x71, 0x3f, 0xc3, 0x16, 0x5a, 0x08, 0x79, 0xe9, 0xc4, 0x36,
+ 0xb5, 0x08, 0x79, 0x80, 0xc2, 0x00, 0x8e, 0x08, 0x79, 0xb0, 0x16, 0xc1,
+ 0x71, 0x4b, 0x08, 0xc1, 0x71, 0x5d, 0x19, 0xc1, 0x71, 0x65, 0x0e, 0xc1,
+ 0x71, 0x75, 0x11, 0xc1, 0x71, 0x8b, 0x0b, 0xc1, 0x71, 0xa4, 0x05, 0xc1,
+ 0x71, 0xb8, 0x14, 0xc1, 0x71, 0xde, 0x0a, 0xc1, 0x71, 0xf9, 0x06, 0xc1,
+ 0x72, 0x21, 0x12, 0xc1, 0x72, 0x47, 0x07, 0xc1, 0x72, 0x80, 0x03, 0xc1,
+ 0x72, 0x94, 0xc3, 0xdf, 0x37, 0x01, 0x98, 0x31, 0x0d, 0xc1, 0x72, 0xba,
+ 0x09, 0xc1, 0x73, 0x1b, 0x15, 0xc1, 0x73, 0x40, 0x10, 0xc1, 0x73, 0x58,
+ 0x04, 0xc1, 0x73, 0x79, 0x0f, 0xc1, 0x73, 0x99, 0x1b, 0xc1, 0x73, 0xec,
+ 0xc8, 0xbe, 0xda, 0x01, 0x9e, 0xf0, 0x0e, 0xc1, 0x73, 0xf8, 0x15, 0xc1,
+ 0x74, 0x02, 0x0d, 0xc1, 0x74, 0x32, 0xcc, 0x83, 0x3d, 0x01, 0x15, 0x09,
+ 0x16, 0xc1, 0x74, 0x3e, 0x0f, 0xc1, 0x74, 0x4e, 0x12, 0xc1, 0x74, 0x58,
+ 0x05, 0xc1, 0x74, 0x64, 0x18, 0xc1, 0x74, 0x74, 0x17, 0xc1, 0x74, 0x7e,
+ 0x0a, 0xc1, 0x74, 0x8a, 0x11, 0xc1, 0x74, 0x9e, 0x08, 0xc1, 0x74, 0xa8,
+ 0xc7, 0xc4, 0x56, 0x0f, 0x8c, 0xf9, 0x10, 0xc1, 0x74, 0xc0, 0xc2, 0x02,
+ 0xfb, 0x0f, 0x8c, 0xa1, 0xc8, 0x0a, 0xff, 0x01, 0x4e, 0x31, 0xd5, 0x36,
+ 0xc5, 0x01, 0x4e, 0x21, 0xc2, 0x15, 0x95, 0x0f, 0x8a, 0x78, 0xc9, 0xb0,
+ 0xf2, 0x01, 0x20, 0xd3, 0x01, 0x74, 0xca, 0xc4, 0x40, 0x89, 0x01, 0x21,
+ 0x01, 0xcf, 0x6a, 0x08, 0x01, 0x20, 0xb1, 0x45, 0xa0, 0x21, 0xc1, 0x74,
+ 0xd0, 0x48, 0x46, 0xa3, 0xc1, 0x74, 0xdc, 0xcf, 0x69, 0x45, 0x01, 0x0a,
+ 0x78, 0x07, 0xc1, 0x74, 0xe8, 0xcf, 0x61, 0x02, 0x01, 0x20, 0x80, 0x07,
+ 0xc1, 0x74, 0xf7, 0xc3, 0x11, 0xf7, 0x01, 0x20, 0x00, 0xcd, 0x7d, 0xe0,
+ 0x01, 0x20, 0xe1, 0xc8, 0xb7, 0xfa, 0x01, 0x20, 0x60, 0xc5, 0x61, 0x0c,
+ 0x01, 0x20, 0xd9, 0x10, 0x41, 0x75, 0x03, 0xc4, 0x23, 0xca, 0x01, 0x20,
+ 0xc1, 0xcd, 0x75, 0x58, 0x01, 0x20, 0x68, 0xc8, 0xb8, 0x9a, 0x01, 0x20,
+ 0x41, 0xc3, 0x08, 0x93, 0x01, 0x20, 0x38, 0x0f, 0xc1, 0x75, 0x0f, 0xc2,
+ 0x00, 0x67, 0x00, 0x39, 0x33, 0x01, 0x75, 0x1b, 0x16, 0xc1, 0x75, 0x21,
+ 0x15, 0xc1, 0x75, 0x30, 0x14, 0xc1, 0x75, 0x4e, 0xc4, 0xc0, 0x4b, 0x00,
+ 0x39, 0x49, 0x87, 0x00, 0x39, 0x29, 0xcd, 0x7e, 0x14, 0x00, 0x39, 0x21,
+ 0xc3, 0x20, 0x18, 0x00, 0x39, 0x11, 0xc6, 0xd0, 0xcd, 0x00, 0x39, 0x01,
+ 0xc4, 0xe0, 0xe7, 0x00, 0x38, 0xf9, 0xc4, 0xde, 0xef, 0x00, 0x38, 0xeb,
+ 0x01, 0x75, 0x5a, 0xc2, 0x01, 0x7f, 0x00, 0x38, 0xbb, 0x01, 0x75, 0x60,
+ 0xc4, 0x69, 0x81, 0x00, 0x38, 0xc9, 0xc3, 0x7e, 0x89, 0x00, 0x38, 0xc1,
+ 0x06, 0xc1, 0x75, 0x66, 0xc5, 0xd7, 0x5e, 0x00, 0x38, 0x9b, 0x01, 0x75,
+ 0x72, 0xc4, 0xe3, 0x27, 0x00, 0x38, 0x91, 0xc5, 0x58, 0x4d, 0x00, 0x38,
+ 0x80, 0x44, 0x7c, 0x67, 0xc1, 0x75, 0x78, 0x48, 0xbf, 0x2a, 0xc1, 0x75,
+ 0x82, 0xcf, 0x62, 0xf1, 0x00, 0x38, 0x28, 0xc7, 0x08, 0x6b, 0x00, 0x39,
+ 0xc9, 0xca, 0x01, 0x68, 0x00, 0x39, 0xc0, 0x45, 0xd8, 0x94, 0xc1, 0x75,
+ 0x94, 0xc4, 0xde, 0xa7, 0x00, 0x39, 0xf9, 0xc7, 0xc4, 0x2c, 0x00, 0x3a,
+ 0x10, 0xc6, 0x19, 0x7a, 0x00, 0x39, 0xa9, 0xc5, 0x05, 0x02, 0x00, 0x39,
+ 0xa1, 0xc5, 0x00, 0xd4, 0x00, 0x39, 0x98, 0xc6, 0x19, 0x7a, 0x00, 0x39,
+ 0x91, 0xc5, 0x05, 0x02, 0x00, 0x39, 0x89, 0xc5, 0x00, 0xd4, 0x00, 0x39,
+ 0x80, 0xc9, 0xaf, 0x0c, 0x00, 0x38, 0x51, 0x4b, 0x8f, 0xd6, 0x41, 0x75,
+ 0xa0, 0x48, 0xbf, 0x02, 0xc1, 0x75, 0xac, 0x4a, 0x9f, 0x22, 0x41, 0x75,
+ 0xbb, 0xcf, 0x60, 0x12, 0x00, 0x38, 0x01, 0x45, 0x75, 0x81, 0x41, 0x75,
+ 0xca, 0x51, 0x55, 0x41, 0xc1, 0x75, 0xd6, 0x4a, 0x0e, 0x7d, 0x41, 0x75,
+ 0xe2, 0xc5, 0x00, 0xd4, 0x00, 0x3a, 0x39, 0xc5, 0x05, 0x02, 0x00, 0x3a,
+ 0x40, 0x91, 0x05, 0x40, 0x39, 0xc2, 0x01, 0x23, 0x05, 0x40, 0x40, 0x91,
+ 0x05, 0x40, 0x49, 0xc2, 0x01, 0x23, 0x05, 0x40, 0x50, 0x91, 0x05, 0x40,
+ 0x61, 0xc2, 0x01, 0x23, 0x05, 0x40, 0x68, 0x16, 0xc1, 0x75, 0xee, 0x91,
+ 0x05, 0x40, 0xa1, 0xc2, 0x01, 0x23, 0x05, 0x40, 0xa8, 0x06, 0xc1, 0x75,
+ 0xf8, 0x91, 0x05, 0x40, 0xb1, 0xc2, 0x01, 0x23, 0x05, 0x40, 0xb8, 0x91,
+ 0x05, 0x40, 0x71, 0xc2, 0x01, 0x23, 0x05, 0x40, 0x78, 0x91, 0x05, 0x40,
+ 0xc9, 0xc2, 0x01, 0x23, 0x05, 0x40, 0xd0, 0x91, 0x05, 0x40, 0xd9, 0xc2,
+ 0x01, 0x23, 0x05, 0x40, 0xe0, 0x91, 0x05, 0x40, 0xf1, 0xc2, 0x00, 0x79,
+ 0x05, 0x41, 0x00, 0xc7, 0x14, 0x39, 0x05, 0x40, 0x59, 0xd0, 0x5a, 0xd2,
+ 0x05, 0x41, 0x60, 0x46, 0x00, 0x8b, 0x41, 0x76, 0x02, 0x95, 0x01, 0x39,
+ 0x40, 0xd1, 0x4f, 0xe0, 0x01, 0x3e, 0x49, 0xc2, 0x00, 0x55, 0x01, 0x14,
+ 0x1b, 0x01, 0x76, 0x14, 0x46, 0x00, 0xd4, 0xc1, 0x76, 0x18, 0x45, 0x00,
+ 0x8c, 0xc1, 0x76, 0x24, 0x47, 0x13, 0x6d, 0x41, 0x76, 0x36, 0x0e, 0xc1,
+ 0x76, 0x42, 0xd1, 0x1a, 0x4a, 0x01, 0x03, 0xf1, 0x07, 0xc1, 0x76, 0x4e,
+ 0xc5, 0x1d, 0x1d, 0x01, 0x03, 0xd9, 0xc9, 0x60, 0xf3, 0x01, 0x03, 0xd1,
+ 0xc4, 0x26, 0x78, 0x01, 0x03, 0xc9, 0x15, 0xc1, 0x76, 0x5a, 0x08, 0xc1,
+ 0x76, 0x66, 0xc4, 0x15, 0xe7, 0x01, 0x03, 0x81, 0x16, 0xc1, 0x76, 0x72,
+ 0xc3, 0x05, 0x14, 0x00, 0x05, 0xc8, 0xca, 0xa1, 0x98, 0x00, 0xe6, 0x39,
+ 0xca, 0xa4, 0x86, 0x00, 0xe6, 0x31, 0xca, 0x9c, 0x8e, 0x00, 0xe6, 0x29,
+ 0xcb, 0x90, 0x23, 0x00, 0xe6, 0x21, 0xc5, 0xdd, 0x53, 0x00, 0xe6, 0x19,
+ 0x12, 0xc1, 0x76, 0x7e, 0xc5, 0xdd, 0xb7, 0x00, 0xe6, 0x00, 0x08, 0xc1,
+ 0x76, 0x8a, 0x04, 0xc1, 0x76, 0x94, 0x0e, 0xc1, 0x76, 0x9e, 0x14, 0xc1,
+ 0x76, 0xa8, 0x15, 0xc1, 0x76, 0xb2, 0x0d, 0xc1, 0x76, 0xbc, 0xc2, 0x00,
+ 0xd0, 0x00, 0xdd, 0x01, 0xc2, 0x8d, 0x8f, 0x00, 0xdc, 0xf9, 0xc2, 0x01,
+ 0x4a, 0x00, 0xdc, 0xe9, 0xc2, 0x19, 0x2c, 0x00, 0xdc, 0xd1, 0xc2, 0x01,
+ 0xc3, 0x00, 0xdc, 0xc9, 0xc2, 0x02, 0x41, 0x00, 0xdc, 0xb9, 0xc2, 0x00,
+ 0xb0, 0x00, 0xdc, 0xa9, 0x10, 0xc1, 0x76, 0xc6, 0xc2, 0x0e, 0x9a, 0x00,
+ 0xdc, 0x99, 0xc2, 0x01, 0x6f, 0x00, 0xdc, 0x91, 0xc2, 0x02, 0x1c, 0x00,
+ 0xdc, 0x81, 0xc2, 0x25, 0x3b, 0x00, 0xdc, 0x79, 0xc2, 0x00, 0x64, 0x00,
+ 0xdc, 0x71, 0xc2, 0x01, 0x30, 0x00, 0xdc, 0x61, 0xc2, 0x0f, 0x9a, 0x00,
+ 0xdc, 0x59, 0x87, 0x00, 0xdc, 0x43, 0x01, 0x76, 0xd6, 0x91, 0x00, 0xdc,
+ 0x39, 0x83, 0x00, 0xdc, 0x1b, 0x01, 0x76, 0xda, 0x97, 0x00, 0xdc, 0x29,
+ 0x8b, 0x00, 0xdc, 0x20, 0xc4, 0x26, 0x78, 0x00, 0xdd, 0xc9, 0xc5, 0x06,
+ 0xdb, 0x00, 0xdd, 0xc1, 0x15, 0xc1, 0x76, 0xde, 0x08, 0xc1, 0x76, 0xea,
+ 0x16, 0xc1, 0x76, 0xf6, 0xc3, 0x05, 0x14, 0x00, 0xdd, 0x89, 0xc4, 0x15,
+ 0xe7, 0x00, 0xdd, 0x80, 0x47, 0xc1, 0xe7, 0xc1, 0x77, 0x02, 0x42, 0x16,
+ 0x59, 0xc1, 0x77, 0x0e, 0xc7, 0xc3, 0x5a, 0x00, 0xdd, 0x08, 0xc6, 0x1e,
+ 0x95, 0x00, 0xdd, 0x59, 0x42, 0x00, 0xb0, 0x41, 0x77, 0x1a, 0x10, 0xc1,
+ 0x77, 0x24, 0xc5, 0xdb, 0x1e, 0x00, 0xdd, 0x40, 0xca, 0x37, 0x4e, 0x01,
+ 0x13, 0xf9, 0xc5, 0x07, 0x62, 0x01, 0x13, 0xe8, 0x4c, 0x24, 0x3b, 0xc1,
+ 0x77, 0x42, 0xcb, 0x0e, 0xbd, 0x01, 0x55, 0xa1, 0x44, 0x1f, 0xb2, 0xc1,
+ 0x77, 0x4e, 0xcf, 0x6a, 0x8f, 0x01, 0x55, 0xc0, 0x00, 0x41, 0x77, 0x5a,
+ 0xd0, 0x03, 0xb7, 0x01, 0x4b, 0xc9, 0x42, 0x06, 0x62, 0x41, 0x77, 0x6f,
+ 0xc3, 0x02, 0xa3, 0x01, 0x55, 0xe9, 0xcf, 0x60, 0xf3, 0x01, 0x55, 0xf9,
+ 0xd9, 0x1f, 0x18, 0x01, 0x56, 0x08, 0xca, 0x0e, 0xbe, 0x01, 0x04, 0x61,
+ 0xc4, 0x00, 0x2d, 0x01, 0x04, 0x40, 0xc4, 0x18, 0x10, 0x01, 0x04, 0x39,
+ 0xc2, 0x22, 0xcc, 0x01, 0x04, 0x30, 0xc3, 0x0d, 0x14, 0x01, 0x04, 0x29,
+ 0xc3, 0x09, 0x9e, 0x01, 0x04, 0x20, 0xc4, 0x02, 0xde, 0x01, 0x04, 0x19,
+ 0xc2, 0x02, 0xa0, 0x01, 0x04, 0x10, 0x4a, 0x00, 0x87, 0xc1, 0x77, 0x7b,
+ 0x4e, 0x1d, 0x3c, 0x41, 0x77, 0x92, 0x42, 0x00, 0x99, 0xc1, 0x77, 0x9e,
+ 0x07, 0xc1, 0x77, 0xb0, 0x14, 0xc1, 0x77, 0xcb, 0x16, 0xc1, 0x77, 0xdd,
+ 0xcc, 0x87, 0x21, 0x0f, 0xa9, 0xc9, 0xce, 0x71, 0xf4, 0x0f, 0xa9, 0xc1,
+ 0xd1, 0x55, 0x96, 0x01, 0x53, 0x09, 0x03, 0xc1, 0x77, 0xe9, 0xd1, 0x54,
+ 0x0f, 0x07, 0xf2, 0x89, 0xc9, 0x11, 0xf6, 0x07, 0xf2, 0x91, 0xc9, 0xa8,
+ 0x55, 0x07, 0xf2, 0xa1, 0xcd, 0x2c, 0xb2, 0x07, 0xf2, 0xb1, 0x42, 0x00,
+ 0x49, 0xc1, 0x77, 0xfb, 0xcb, 0x97, 0x9d, 0x07, 0xf2, 0xf9, 0x12, 0xc1,
+ 0x78, 0x07, 0xcc, 0x89, 0xcd, 0x07, 0xf3, 0x19, 0xd1, 0x54, 0xb9, 0x07,
+ 0xf3, 0x29, 0xcb, 0x99, 0x60, 0x07, 0xf3, 0x48, 0xcc, 0x23, 0x9f, 0x01,
+ 0x55, 0x60, 0x02, 0xc1, 0x78, 0x13, 0x00, 0x41, 0x78, 0x1b, 0xce, 0x50,
+ 0xaf, 0x01, 0x1c, 0xc9, 0xc2, 0x00, 0x29, 0x0f, 0xad, 0x42, 0x01, 0x78,
+ 0x27, 0xc2, 0x00, 0xcc, 0x0f, 0xa3, 0xc0, 0xc5, 0x07, 0x62, 0x01, 0x10,
+ 0xe8, 0xd5, 0x37, 0x43, 0x01, 0x17, 0x41, 0xce, 0x74, 0x32, 0x01, 0x15,
+ 0x81, 0x46, 0x23, 0xa0, 0xc1, 0x78, 0x2d, 0x46, 0x00, 0xd4, 0x41, 0x78,
+ 0x39, 0x42, 0x00, 0x99, 0xc1, 0x78, 0x51, 0xc9, 0xa8, 0x55, 0x07, 0xf0,
+ 0xa1, 0x07, 0xc1, 0x78, 0x5d, 0xcd, 0x2c, 0xb2, 0x07, 0xf0, 0xb1, 0xd3,
+ 0x22, 0x78, 0x07, 0xf0, 0xc9, 0xce, 0x72, 0x1e, 0x07, 0xf1, 0x81, 0xcd,
+ 0x80, 0x29, 0x07, 0xf1, 0xa1, 0x0e, 0xc1, 0x78, 0x6f, 0x46, 0x00, 0x2c,
+ 0xc1, 0x78, 0x7b, 0x4c, 0x1c, 0x86, 0x41, 0x78, 0xa9, 0xcd, 0x80, 0x1c,
+ 0x01, 0x18, 0xc1, 0xc7, 0xc4, 0x72, 0x0f, 0xb6, 0x80, 0x04, 0xc1, 0x78,
+ 0xb5, 0x47, 0x70, 0xa5, 0xc1, 0x78, 0xc1, 0x16, 0xc1, 0x78, 0xd9, 0x08,
+ 0xc1, 0x78, 0xf1, 0x15, 0xc1, 0x78, 0xfb, 0x49, 0xb2, 0x12, 0xc1, 0x79,
+ 0x07, 0x48, 0xbb, 0x82, 0xc1, 0x79, 0x1f, 0x48, 0xb7, 0x1a, 0xc1, 0x79,
+ 0x37, 0x0d, 0xc1, 0x79, 0x4f, 0x49, 0xa8, 0xf7, 0xc1, 0x79, 0x5b, 0xc9,
+ 0xa9, 0x7e, 0x0f, 0x85, 0xf9, 0xcb, 0x8d, 0x16, 0x0f, 0x86, 0xf8, 0x16,
+ 0xc1, 0x79, 0x73, 0x08, 0x41, 0x79, 0x7f, 0x00, 0x41, 0x79, 0x8b, 0x46,
+ 0x08, 0xf1, 0xc1, 0x79, 0x9d, 0xc9, 0xb0, 0xa1, 0x0f, 0xa6, 0x20, 0x00,
+ 0xc1, 0x79, 0xa9, 0xd8, 0x25, 0xbb, 0x01, 0x33, 0xe8, 0x4d, 0x29, 0xb9,
+ 0xc1, 0x79, 0xb5, 0x4f, 0x0b, 0x17, 0x41, 0x7a, 0x1d, 0x16, 0xc1, 0x7a,
+ 0x85, 0xc8, 0x4b, 0x5f, 0x01, 0x24, 0x31, 0x07, 0xc1, 0x7a, 0x97, 0x15,
+ 0xc1, 0x7a, 0xa3, 0x08, 0x41, 0x7a, 0xaf, 0xc4, 0x26, 0x78, 0x01, 0x23,
+ 0xe1, 0xc5, 0x06, 0xdb, 0x01, 0x23, 0xd9, 0x15, 0xc1, 0x7a, 0xbb, 0x08,
+ 0xc1, 0x7a, 0xc7, 0x16, 0xc1, 0x7a, 0xd3, 0xc3, 0x05, 0x14, 0x01, 0x23,
+ 0xa0, 0x0d, 0xc1, 0x7a, 0xdf, 0xc5, 0xd9, 0x61, 0x01, 0x90, 0x0b, 0x01,
+ 0x7a, 0xf1, 0x16, 0xc1, 0x7a, 0xf7, 0xc5, 0xd6, 0x8c, 0x01, 0x90, 0x1b,
+ 0x01, 0x7b, 0x09, 0xc5, 0xda, 0xe7, 0x01, 0x90, 0x23, 0x01, 0x7b, 0x0f,
+ 0x12, 0xc1, 0x7b, 0x15, 0xc4, 0xad, 0x2b, 0x01, 0x90, 0x33, 0x01, 0x7b,
+ 0x27, 0xc5, 0xb7, 0x9d, 0x01, 0x90, 0x3b, 0x01, 0x7b, 0x2d, 0x05, 0xc1,
+ 0x7b, 0x33, 0xc5, 0x90, 0xe4, 0x01, 0x90, 0x6a, 0x01, 0x7b, 0x45, 0xc4,
+ 0xe1, 0x47, 0x01, 0x90, 0xe9, 0xc3, 0x0d, 0x03, 0x01, 0x90, 0xf0, 0xc3,
+ 0x05, 0x14, 0x01, 0x91, 0x01, 0x16, 0xc1, 0x7b, 0x4b, 0x08, 0xc1, 0x7b,
+ 0x5d, 0x15, 0xc1, 0x7b, 0x6d, 0x07, 0xc1, 0x7b, 0x8b, 0x10, 0xc1, 0x7b,
+ 0x9d, 0x0f, 0xc1, 0x7b, 0xa9, 0x19, 0xc1, 0x7b, 0xb5, 0xc4, 0xdf, 0xbf,
+ 0x01, 0x91, 0x91, 0x05, 0xc1, 0x7b, 0xc1, 0xc5, 0xdd, 0x71, 0x01, 0x91,
+ 0xc1, 0x42, 0x01, 0x19, 0xc1, 0x7b, 0xcd, 0xc8, 0xba, 0x62, 0x01, 0x91,
+ 0xf8, 0xc2, 0x00, 0xf1, 0x01, 0x11, 0x29, 0x45, 0x00, 0x8c, 0x41, 0x7b,
+ 0xdd, 0xca, 0x1b, 0x09, 0x01, 0x01, 0x49, 0xc2, 0x07, 0xa3, 0x01, 0x70,
+ 0x79, 0xc7, 0x62, 0x81, 0x01, 0x72, 0x68, 0xc5, 0x26, 0xf7, 0x08, 0xd7,
+ 0xc1, 0xc7, 0x41, 0x71, 0x08, 0xd7, 0x80, 0x00, 0x41, 0x7b, 0xe9, 0x08,
+ 0xc1, 0x7b, 0xf8, 0x8b, 0x08, 0xd6, 0xbb, 0x01, 0x7c, 0x02, 0x97, 0x08,
+ 0xd6, 0xcb, 0x01, 0x7c, 0x06, 0x91, 0x08, 0xd6, 0xc1, 0x87, 0x08, 0xd6,
+ 0xb1, 0x83, 0x08, 0xd6, 0xa9, 0x05, 0xc1, 0x7c, 0x0a, 0xc2, 0x00, 0x39,
+ 0x08, 0xd6, 0x91, 0x12, 0xc1, 0x7c, 0x14, 0x10, 0xc1, 0x7c, 0x1e, 0x16,
+ 0xc1, 0x7c, 0x28, 0xc2, 0x01, 0x5d, 0x08, 0xd6, 0x61, 0xc2, 0x0d, 0xf6,
+ 0x08, 0xd6, 0x59, 0x0d, 0xc1, 0x7c, 0x32, 0xc2, 0x01, 0x30, 0x08, 0xd6,
+ 0x49, 0xc2, 0x00, 0xd0, 0x08, 0xd6, 0x41, 0xc2, 0x02, 0x41, 0x08, 0xd6,
+ 0x31, 0xc2, 0x02, 0x1c, 0x08, 0xd6, 0x29, 0xc2, 0x0e, 0x9a, 0x08, 0xd6,
+ 0x21, 0xc2, 0x01, 0xc3, 0x08, 0xd6, 0x19, 0xc2, 0x00, 0xdb, 0x08, 0xd6,
+ 0x10, 0xc5, 0x26, 0xf7, 0x08, 0xd7, 0x91, 0xca, 0xa4, 0x04, 0x08, 0xd7,
+ 0x88, 0x00, 0x41, 0x7c, 0x3c, 0xc6, 0x26, 0xf6, 0x08, 0xd7, 0x50, 0xc5,
+ 0x26, 0xf7, 0x08, 0xd7, 0x49, 0xc4, 0x0d, 0xe5, 0x08, 0xd7, 0x2a, 0x01,
+ 0x7c, 0x4b, 0xc4, 0x0a, 0x64, 0x0f, 0x99, 0xa1, 0xc9, 0xb4, 0x01, 0x0f,
+ 0xd7, 0x99, 0xc7, 0xc5, 0x0c, 0x0f, 0xd7, 0xa1, 0xc6, 0x28, 0x24, 0x01,
+ 0x70, 0xc8, 0x47, 0x34, 0x2f, 0xc1, 0x7c, 0x51, 0xd6, 0x2c, 0x9c, 0x08,
+ 0x43, 0xc1, 0x42, 0x00, 0x49, 0x41, 0x7c, 0x5f, 0x18, 0xc1, 0x7c, 0x6b,
+ 0x0d, 0xc1, 0x7c, 0x77, 0x16, 0xc1, 0x7c, 0x89, 0x1b, 0xc1, 0x7c, 0x93,
+ 0xc3, 0xe6, 0x20, 0x0b, 0x5c, 0x59, 0x42, 0x00, 0xd0, 0xc1, 0x7c, 0x9f,
+ 0xc4, 0xe4, 0x03, 0x0b, 0x5c, 0x39, 0xc4, 0xe3, 0xcb, 0x0b, 0x5c, 0x21,
+ 0xc5, 0xd3, 0xdf, 0x0b, 0x5c, 0x09, 0x0e, 0x41, 0x7c, 0xa9, 0x05, 0xc1,
+ 0x7c, 0xb5, 0xc3, 0xe6, 0x3e, 0x0b, 0x59, 0x71, 0xc2, 0x20, 0xec, 0x0b,
+ 0x59, 0x69, 0x10, 0xc1, 0x7c, 0xc1, 0xc5, 0xd7, 0x54, 0x0b, 0x59, 0x51,
+ 0x0a, 0xc1, 0x7c, 0xdd, 0xc3, 0xc4, 0x86, 0x0b, 0x59, 0x31, 0xc3, 0x2d,
+ 0x34, 0x0b, 0x59, 0x21, 0xc4, 0xe4, 0xd7, 0x0b, 0x59, 0x19, 0xc3, 0xbe,
+ 0x32, 0x0b, 0x59, 0x09, 0xc3, 0x20, 0xeb, 0x0b, 0x58, 0xf1, 0xc3, 0xe5,
+ 0x4e, 0x0b, 0x58, 0xe0, 0xc8, 0xbc, 0x0a, 0x0b, 0x5b, 0xb9, 0xc8, 0xbf,
+ 0x72, 0x0b, 0x5b, 0xb1, 0x16, 0xc1, 0x7c, 0xef, 0x05, 0xc1, 0x7c, 0xfe,
+ 0xd2, 0x4d, 0xe7, 0x0b, 0x5b, 0x90, 0xc2, 0x11, 0xa5, 0x0b, 0x5b, 0x89,
+ 0x44, 0x9f, 0x7e, 0x41, 0x7d, 0x0a, 0xc2, 0x20, 0xec, 0x0b, 0x5b, 0x79,
+ 0xca, 0x9f, 0x7c, 0x0b, 0x5b, 0x69, 0xce, 0x73, 0xb4, 0x0b, 0x5b, 0x30,
+ 0xc3, 0xe6, 0x1d, 0x0b, 0x5b, 0x59, 0xc3, 0xe5, 0x60, 0x0b, 0x5b, 0x48,
+ 0xc3, 0x44, 0x23, 0x0b, 0x5b, 0x51, 0x1b, 0xc1, 0x7d, 0x16, 0xc3, 0x26,
+ 0x9a, 0x0b, 0x5a, 0x20, 0xc3, 0x95, 0x80, 0x0b, 0x5b, 0x41, 0xc2, 0x01,
+ 0x0f, 0x0b, 0x5b, 0x28, 0xc3, 0x46, 0x7d, 0x0b, 0x5b, 0x19, 0xc4, 0xe4,
+ 0x47, 0x0b, 0x5a, 0x11, 0xc4, 0xdf, 0x67, 0x0b, 0x5a, 0x01, 0xc4, 0xe0,
+ 0x47, 0x0b, 0x59, 0xd9, 0x16, 0x41, 0x7d, 0x22, 0xc8, 0xbd, 0x12, 0x0b,
+ 0x5b, 0x09, 0x42, 0x00, 0xc4, 0x41, 0x7d, 0x2c, 0xc9, 0x33, 0xed, 0x0b,
+ 0x5a, 0xf9, 0x95, 0x0b, 0x5a, 0xe0, 0xc4, 0x18, 0x10, 0x0b, 0x5a, 0xb9,
+ 0xc2, 0x22, 0xcc, 0x0b, 0x5a, 0xb0, 0xc3, 0x0d, 0x14, 0x0b, 0x5a, 0xa9,
+ 0xc3, 0x09, 0x9e, 0x0b, 0x5a, 0xa0, 0xc4, 0x02, 0xde, 0x0b, 0x5a, 0x99,
+ 0xc2, 0x02, 0xa0, 0x0b, 0x5a, 0x90, 0xc3, 0xe5, 0x30, 0x0b, 0x59, 0xb1,
+ 0xc2, 0x00, 0x5a, 0x0b, 0x59, 0x80, 0xc3, 0xa7, 0x6a, 0x0b, 0x59, 0xa1,
+ 0x91, 0x0b, 0x59, 0x88, 0xc3, 0x40, 0xe3, 0x0b, 0x59, 0x99, 0xc2, 0x00,
+ 0xcb, 0x0b, 0x59, 0x90, 0x03, 0xc1, 0x7d, 0x34, 0x98, 0x0b, 0x58, 0xb9,
+ 0x84, 0x0b, 0x58, 0xb1, 0x19, 0xc1, 0x7d, 0x3c, 0x0b, 0xc1, 0x7d, 0x44,
+ 0x17, 0x41, 0x7d, 0x4c, 0x98, 0x0b, 0x58, 0xc9, 0x84, 0x0b, 0x58, 0xc0,
+ 0x03, 0xc1, 0x7d, 0x54, 0x98, 0x0b, 0x58, 0x19, 0x84, 0x0b, 0x58, 0x10,
+ 0x98, 0x0b, 0x58, 0x99, 0x84, 0x0b, 0x58, 0x91, 0x11, 0x41, 0x7d, 0x5c,
+ 0x03, 0xc1, 0x7d, 0x64, 0x98, 0x0b, 0x58, 0x39, 0x84, 0x0b, 0x58, 0x30,
+ 0x98, 0x0b, 0x58, 0x49, 0x84, 0x0b, 0x58, 0x41, 0x07, 0x41, 0x7d, 0x6c,
+ 0xc4, 0x2a, 0xcc, 0x0f, 0xa7, 0x79, 0xc4, 0x01, 0xc3, 0x01, 0x80, 0x92,
+ 0x01, 0x7d, 0x74, 0x00, 0xc1, 0x7d, 0x7a, 0xcb, 0x7a, 0xa2, 0x0f, 0xa5,
+ 0xd8, 0x91, 0x08, 0x5d, 0x51, 0xc4, 0x18, 0x12, 0x08, 0x5d, 0x70, 0xc3,
+ 0x77, 0x79, 0x08, 0x5c, 0x79, 0xc4, 0xdc, 0x2d, 0x08, 0x5c, 0x68, 0x16,
+ 0xc1, 0x7d, 0xa2, 0xc3, 0x05, 0x14, 0x08, 0x48, 0xb2, 0x01, 0x7d, 0xb2,
+ 0x16, 0xc1, 0x7d, 0xb8, 0x15, 0xc1, 0x7d, 0xc4, 0xc4, 0xa9, 0x57, 0x08,
+ 0x48, 0x99, 0xc3, 0xe5, 0x78, 0x08, 0x48, 0x91, 0xc2, 0x00, 0x67, 0x08,
+ 0x48, 0x81, 0x03, 0xc1, 0x7d, 0xd6, 0xc3, 0x20, 0x18, 0x08, 0x48, 0x69,
+ 0xc3, 0x00, 0x4e, 0x08, 0x48, 0x61, 0xc4, 0xb9, 0xf7, 0x08, 0x48, 0x59,
+ 0xc3, 0xba, 0x37, 0x08, 0x48, 0x51, 0xc3, 0x4a, 0xb9, 0x08, 0x48, 0x49,
+ 0xc2, 0x01, 0x7f, 0x08, 0x48, 0x23, 0x01, 0x7d, 0xe2, 0xc3, 0x69, 0x81,
+ 0x08, 0x48, 0x31, 0xc3, 0xe4, 0xf4, 0x08, 0x48, 0x29, 0xc4, 0xdb, 0x4b,
+ 0x08, 0x48, 0x19, 0xc4, 0xe0, 0x8f, 0x08, 0x48, 0x11, 0xc3, 0x0b, 0xc8,
+ 0x08, 0x48, 0x08, 0x0d, 0xc1, 0x7d, 0xe6, 0x09, 0xc1, 0x7d, 0xf0, 0x10,
+ 0xc1, 0x7d, 0xfa, 0x05, 0xc1, 0x7e, 0x10, 0xc2, 0x25, 0x3b, 0x05, 0x42,
+ 0x31, 0x16, 0xc1, 0x7e, 0x1d, 0x06, 0xc1, 0x7e, 0x2f, 0x12, 0xc1, 0x7e,
+ 0x3f, 0xc2, 0x01, 0x5d, 0x05, 0x42, 0x71, 0xc2, 0x01, 0xc3, 0x05, 0x42,
+ 0x79, 0xc2, 0x01, 0x4a, 0x05, 0x42, 0x99, 0x1c, 0xc1, 0x7e, 0x49, 0x15,
+ 0xc1, 0x7e, 0x53, 0xc2, 0x19, 0x2c, 0x05, 0x42, 0xb9, 0xc2, 0x00, 0x39,
+ 0x05, 0x42, 0xc1, 0xc2, 0x00, 0xdb, 0x05, 0x42, 0xc9, 0xc2, 0x00, 0xd0,
+ 0x05, 0x42, 0xe1, 0x83, 0x05, 0x42, 0xeb, 0x01, 0x7e, 0x63, 0x8b, 0x05,
+ 0x42, 0xf1, 0x97, 0x05, 0x42, 0xf9, 0x87, 0x05, 0x43, 0x03, 0x01, 0x7e,
+ 0x6f, 0x91, 0x05, 0x43, 0x09, 0xc2, 0x0f, 0x9a, 0x05, 0x43, 0x11, 0xc2,
+ 0x8d, 0x8f, 0x05, 0x43, 0x19, 0xc2, 0x00, 0x87, 0x05, 0x43, 0x21, 0x45,
+ 0x17, 0xbd, 0x41, 0x7e, 0x73, 0x17, 0xc1, 0x7e, 0x7f, 0xcf, 0x68, 0x46,
+ 0x05, 0x43, 0xa0, 0xc4, 0x01, 0xe2, 0x05, 0x43, 0xb1, 0xcb, 0x99, 0x6b,
+ 0x05, 0x43, 0xb8, 0xc9, 0xa2, 0x56, 0x08, 0x0e, 0x81, 0x0e, 0xc1, 0x7e,
+ 0x8b, 0xc6, 0xca, 0xd9, 0x08, 0x0f, 0xa0, 0xcc, 0x89, 0x91, 0x08, 0x0e,
+ 0x91, 0xc4, 0xdf, 0xeb, 0x08, 0x0e, 0xc1, 0xc4, 0x5e, 0xc9, 0x08, 0x0f,
+ 0x80, 0x03, 0xc1, 0x7e, 0x97, 0xc4, 0xdf, 0xbb, 0x08, 0x0e, 0xa1, 0xc3,
+ 0x46, 0x7d, 0x08, 0x0e, 0xe1, 0x11, 0x41, 0x7e, 0xa7, 0xc4, 0x29, 0xfd,
+ 0x08, 0x0e, 0xa9, 0xc8, 0xbd, 0xda, 0x08, 0x0f, 0xe0, 0xc5, 0xb7, 0xed,
+ 0x08, 0x0e, 0xb1, 0xc3, 0x00, 0xbf, 0x08, 0x0f, 0x49, 0xc3, 0x06, 0xa7,
+ 0x08, 0x0f, 0x50, 0x11, 0xc1, 0x7e, 0xb6, 0xc2, 0x02, 0xe0, 0x08, 0x0f,
+ 0x8b, 0x01, 0x7e, 0xc0, 0xc8, 0xb8, 0x62, 0x08, 0x0f, 0x58, 0x42, 0x00,
+ 0x0a, 0xc1, 0x7e, 0xc6, 0xc2, 0x39, 0x8b, 0x08, 0x0e, 0xf9, 0xc4, 0x04,
+ 0x15, 0x08, 0x0f, 0x29, 0xc8, 0xb9, 0xca, 0x08, 0x0f, 0xd9, 0xc7, 0xc0,
+ 0xdd, 0x08, 0x0f, 0xd0, 0xc6, 0xca, 0xaf, 0x08, 0x0e, 0xe9, 0xc5, 0xd4,
+ 0xed, 0x08, 0x0e, 0xf0, 0x86, 0x08, 0x0f, 0x01, 0xc2, 0x00, 0x35, 0x08,
+ 0x0f, 0xb0, 0xc4, 0xe1, 0x07, 0x08, 0x0f, 0x19, 0xc2, 0x00, 0x5f, 0x08,
+ 0x0f, 0x78, 0xc2, 0x00, 0xc2, 0x08, 0x0f, 0x69, 0xc6, 0xcd, 0x67, 0x08,
+ 0x0f, 0xa8, 0xc5, 0xd5, 0xe2, 0x08, 0x0f, 0xc9, 0xc7, 0xc4, 0x87, 0x08,
+ 0x0e, 0xb8, 0xc4, 0x02, 0xde, 0x00, 0x00, 0x99, 0xc2, 0x02, 0xa0, 0x00,
+ 0x00, 0x90, 0xcb, 0x83, 0x0e, 0x00, 0x4a, 0xa1, 0xd0, 0x50, 0xcf, 0x00,
+ 0x4b, 0x80, 0xcb, 0x1f, 0x0d, 0x00, 0x4a, 0x99, 0xc9, 0x93, 0x31, 0x05,
+ 0x47, 0xc8, 0x4b, 0x91, 0xc5, 0xc1, 0x7e, 0xd0, 0x44, 0x00, 0xbb, 0x41,
+ 0x7e, 0xdc, 0x03, 0xc1, 0x7f, 0x11, 0xcf, 0x61, 0x11, 0x00, 0x4a, 0x71,
+ 0x91, 0x00, 0x4a, 0x5b, 0x01, 0x7f, 0x25, 0x46, 0x2e, 0xee, 0xc1, 0x7f,
+ 0x2f, 0x47, 0xc7, 0x7b, 0xc1, 0x7f, 0x37, 0x87, 0x00, 0x4a, 0x39, 0x48,
+ 0xb2, 0x2d, 0xc1, 0x7f, 0x45, 0x97, 0x00, 0x4a, 0x0b, 0x01, 0x7f, 0x53,
+ 0x8b, 0x00, 0x49, 0xfa, 0x01, 0x7f, 0x5e, 0x0a, 0xc1, 0x7f, 0x62, 0x15,
+ 0xc1, 0x7f, 0x6c, 0x18, 0xc1, 0x7f, 0x7a, 0x0e, 0xc1, 0x7f, 0x84, 0x14,
+ 0xc1, 0x7f, 0x8c, 0x1b, 0xc1, 0x7f, 0x9c, 0xc2, 0x01, 0xc3, 0x00, 0x49,
+ 0x73, 0x01, 0x7f, 0xa6, 0x04, 0xc1, 0x7f, 0xac, 0x12, 0xc1, 0x7f, 0xbc,
+ 0x10, 0xc1, 0x7f, 0xc6, 0x06, 0xc1, 0x7f, 0xda, 0x16, 0xc1, 0x7f, 0xe8,
+ 0x0c, 0xc1, 0x7f, 0xf6, 0x05, 0xc1, 0x80, 0x06, 0x09, 0xc1, 0x80, 0x13,
+ 0x0d, 0xc1, 0x80, 0x27, 0x83, 0x00, 0x48, 0x2b, 0x01, 0x80, 0x2f, 0x91,
+ 0x00, 0x48, 0x9b, 0x01, 0x80, 0x43, 0x87, 0x00, 0x48, 0x79, 0x97, 0x00,
+ 0x48, 0x4b, 0x01, 0x80, 0x4d, 0x8b, 0x00, 0x48, 0x3b, 0x01, 0x80, 0x58,
+ 0xc2, 0x0f, 0x9a, 0x00, 0x4a, 0xc1, 0x1c, 0xc1, 0x80, 0x5c, 0xc2, 0x00,
+ 0x87, 0x00, 0x4a, 0xf0, 0x45, 0x09, 0x98, 0xc1, 0x80, 0x66, 0xcb, 0x97,
+ 0xf5, 0x00, 0x4b, 0x29, 0xc4, 0x19, 0x53, 0x00, 0x4b, 0x20, 0xc7, 0xc7,
+ 0x19, 0x0f, 0x9e, 0xe8, 0x4f, 0x0b, 0x17, 0xc1, 0x80, 0x8a, 0x4d, 0x29,
+ 0xb9, 0x41, 0x80, 0xec, 0xcf, 0x66, 0x0c, 0x01, 0x1f, 0x41, 0xd4, 0x3b,
+ 0x10, 0x01, 0x1c, 0xb0, 0x47, 0x07, 0x9a, 0xc1, 0x81, 0x4e, 0x44, 0x00,
+ 0xf1, 0xc1, 0x81, 0x5a, 0xc4, 0x51, 0xb7, 0x01, 0x1e, 0x30, 0xc8, 0x01,
+ 0x92, 0x01, 0x1e, 0x19, 0xc6, 0x02, 0xd1, 0x01, 0x1e, 0x00, 0xc4, 0x51,
+ 0xb7, 0x01, 0x1e, 0x41, 0xc8, 0x01, 0x92, 0x01, 0x1e, 0x29, 0xc6, 0x02,
+ 0xd1, 0x01, 0x1e, 0x10, 0xc4, 0x51, 0xb7, 0x01, 0x1e, 0x39, 0xc8, 0x01,
+ 0x92, 0x01, 0x1e, 0x21, 0xc6, 0x02, 0xd1, 0x01, 0x1e, 0x08, 0x44, 0x84,
+ 0x6c, 0x41, 0x81, 0x66, 0xca, 0xa6, 0xde, 0x0e, 0x98, 0x11, 0xcd, 0x7f,
+ 0xce, 0x0e, 0x98, 0x08, 0xc2, 0x00, 0x74, 0x01, 0x34, 0x79, 0xc3, 0x01,
+ 0x95, 0x01, 0x34, 0x60, 0xc3, 0x01, 0x95, 0x01, 0x34, 0x71, 0xc2, 0x00,
+ 0x74, 0x01, 0x34, 0x68, 0x00, 0x41, 0x81, 0x72, 0x00, 0x41, 0x81, 0x7e,
+ 0xc4, 0x18, 0x10, 0x00, 0x01, 0xbb, 0x01, 0x81, 0x8a, 0xc2, 0x22, 0xcc,
+ 0x00, 0x01, 0xb2, 0x01, 0x81, 0x8e, 0xc3, 0x0d, 0x14, 0x00, 0x01, 0xab,
+ 0x01, 0x81, 0x92, 0xc3, 0x09, 0x9e, 0x00, 0x01, 0xa2, 0x01, 0x81, 0x96,
+ 0xc4, 0x02, 0xde, 0x00, 0x01, 0x9b, 0x01, 0x81, 0x9a, 0xc2, 0x02, 0xa0,
+ 0x00, 0x01, 0x92, 0x01, 0x81, 0x9e, 0x00, 0x41, 0x81, 0xa2, 0x00, 0x41,
+ 0x81, 0xae, 0x45, 0x09, 0x98, 0xc1, 0x81, 0xba, 0xcb, 0x97, 0xf5, 0x08,
+ 0xca, 0x20, 0xc5, 0x33, 0x5d, 0x08, 0xca, 0x19, 0xc7, 0xc3, 0xa7, 0x08,
+ 0xc9, 0xe9, 0xcb, 0x1e, 0x89, 0x08, 0xc9, 0xe1, 0xc8, 0x14, 0x38, 0x08,
+ 0xc9, 0xd8, 0xc2, 0x00, 0x39, 0x08, 0xca, 0x11, 0xc2, 0x19, 0x2c, 0x08,
+ 0xca, 0x00, 0xc5, 0x1e, 0x96, 0x08, 0xc9, 0xf1, 0x4a, 0x6f, 0xc8, 0x41,
+ 0x81, 0xde, 0xc2, 0x02, 0x1c, 0x08, 0xc9, 0x79, 0x0e, 0xc1, 0x81, 0xf8,
+ 0xc2, 0x00, 0xd0, 0x08, 0xc9, 0x69, 0x15, 0xc1, 0x82, 0x02, 0xc2, 0x02,
+ 0x41, 0x08, 0xc9, 0x49, 0xc2, 0x00, 0x39, 0x08, 0xc9, 0x39, 0x1b, 0xc1,
+ 0x82, 0x12, 0xc2, 0x01, 0xc3, 0x08, 0xc9, 0x21, 0x04, 0xc1, 0x82, 0x1c,
+ 0x12, 0xc1, 0x82, 0x26, 0x10, 0xc1, 0x82, 0x30, 0x06, 0xc1, 0x82, 0x46,
+ 0x16, 0xc1, 0x82, 0x54, 0xc2, 0x25, 0x3b, 0x08, 0xc8, 0x99, 0x05, 0xc1,
+ 0x82, 0x64, 0x09, 0xc1, 0x82, 0x6e, 0x0d, 0xc1, 0x82, 0x78, 0x91, 0x08,
+ 0xc8, 0x49, 0x87, 0x08, 0xc8, 0x31, 0x97, 0x08, 0xc8, 0x23, 0x01, 0x82,
+ 0x82, 0x8b, 0x08, 0xc8, 0x13, 0x01, 0x82, 0x86, 0x83, 0x08, 0xc8, 0x02,
+ 0x01, 0x82, 0x8a, 0xc5, 0x03, 0x4d, 0x01, 0x16, 0x39, 0x15, 0x41, 0x82,
+ 0x8e, 0xca, 0xa3, 0x64, 0x01, 0x3c, 0x99, 0x46, 0x09, 0x97, 0x41, 0x82,
+ 0x9a, 0xc4, 0x26, 0x78, 0x01, 0x3b, 0xf1, 0xc5, 0x06, 0xdb, 0x01, 0x3b,
+ 0xe9, 0x15, 0xc1, 0x82, 0xbe, 0x08, 0xc1, 0x82, 0xca, 0x16, 0xc1, 0x82,
+ 0xd6, 0xc3, 0x05, 0x14, 0x01, 0x3b, 0xb0, 0xc4, 0x26, 0x78, 0x01, 0x3c,
+ 0x41, 0xc5, 0x06, 0xdb, 0x01, 0x3c, 0x39, 0x15, 0xc1, 0x82, 0xe2, 0x08,
+ 0xc1, 0x82, 0xee, 0x16, 0xc1, 0x82, 0xfa, 0xc3, 0x05, 0x14, 0x01, 0x3c,
+ 0x01, 0xc4, 0x15, 0xe7, 0x0f, 0x88, 0x58, 0xc4, 0x00, 0x87, 0x0f, 0xb0,
+ 0xf1, 0xd1, 0x4f, 0x14, 0x0f, 0xb1, 0x28, 0xc8, 0x18, 0x67, 0x01, 0x16,
+ 0x21, 0xd7, 0x26, 0x1b, 0x0f, 0xa5, 0x01, 0x45, 0x00, 0x8c, 0xc1, 0x83,
+ 0x06, 0xc6, 0xcf, 0xad, 0x0f, 0xbc, 0xe0, 0xc4, 0x01, 0x23, 0x0f, 0xc8,
+ 0x43, 0x01, 0x83, 0x1e, 0xcc, 0x84, 0xa5, 0x0f, 0xc8, 0x4a, 0x01, 0x83,
+ 0x24, 0x16, 0xc1, 0x83, 0x2a, 0x15, 0xc1, 0x83, 0x36, 0x0a, 0xc1, 0x83,
+ 0x42, 0x03, 0xc1, 0x83, 0x4e, 0xcf, 0x61, 0x4d, 0x01, 0x3f, 0x89, 0xcb,
+ 0x01, 0xfc, 0x01, 0x0f, 0x4b, 0x01, 0x83, 0x5d, 0x06, 0xc1, 0x83, 0x63,
+ 0xcd, 0x7c, 0xa8, 0x01, 0x0e, 0x51, 0xcc, 0x2e, 0x48, 0x01, 0x0d, 0x79,
+ 0xc6, 0xca, 0xa3, 0x0f, 0xb3, 0x79, 0x46, 0x04, 0x8f, 0xc1, 0x83, 0x6f,
+ 0xd1, 0x56, 0xd9, 0x0f, 0xc1, 0xb9, 0xd0, 0x58, 0x62, 0x0f, 0xc1, 0xf8,
+ 0xd2, 0x4c, 0xfd, 0x01, 0x57, 0x88, 0xd0, 0x5d, 0x52, 0x01, 0x4f, 0x49,
+ 0xcf, 0x66, 0x66, 0x01, 0x4f, 0x40, 0x43, 0xe5, 0x0c, 0xc1, 0x83, 0x7b,
+ 0x43, 0xe5, 0xff, 0xc1, 0x83, 0x97, 0x43, 0xe5, 0xdb, 0xc1, 0x83, 0xb3,
+ 0x43, 0xe6, 0x6e, 0xc1, 0x83, 0xcf, 0x43, 0xe6, 0x3b, 0xc1, 0x83, 0xeb,
+ 0x43, 0xe5, 0xa8, 0xc1, 0x84, 0x07, 0x43, 0xe5, 0x45, 0x41, 0x84, 0x23,
+ 0x43, 0xe5, 0xdb, 0xc1, 0x84, 0x3f, 0x43, 0xe5, 0xff, 0xc1, 0x84, 0x5b,
+ 0x43, 0xe6, 0x6e, 0xc1, 0x84, 0x77, 0x43, 0xe6, 0x3b, 0xc1, 0x84, 0x93,
+ 0x43, 0xe5, 0x0c, 0xc1, 0x84, 0xaf, 0x43, 0xe5, 0xa8, 0xc1, 0x84, 0xcb,
+ 0x43, 0xe5, 0x45, 0x41, 0x84, 0xe7, 0x05, 0xc1, 0x85, 0x03, 0x49, 0x07,
+ 0xbb, 0xc1, 0x85, 0x15, 0x17, 0xc1, 0x85, 0x24, 0x44, 0x06, 0xbb, 0xc1,
+ 0x85, 0x30, 0x15, 0xc1, 0x85, 0x3c, 0xcd, 0x2c, 0xb2, 0x01, 0x02, 0x39,
+ 0xd0, 0x0f, 0x09, 0x01, 0x01, 0xe1, 0x12, 0xc1, 0x85, 0x50, 0x06, 0xc1,
+ 0x85, 0x5a, 0x0a, 0xc1, 0x85, 0x66, 0x0e, 0xc1, 0x85, 0x72, 0xdb, 0x16,
+ 0x89, 0x01, 0x4c, 0xb1, 0x47, 0xc4, 0x17, 0xc1, 0x85, 0x7c, 0xcc, 0x83,
+ 0x0d, 0x00, 0x16, 0xe9, 0xcd, 0x7d, 0x9f, 0x07, 0xf2, 0x61, 0xce, 0x70,
+ 0x0a, 0x01, 0x70, 0xb8, 0xc9, 0x1b, 0xc7, 0x01, 0x35, 0x19, 0xcb, 0x21,
+ 0x00, 0x01, 0x35, 0x11, 0xc6, 0x00, 0x91, 0x01, 0x5f, 0xe0, 0x47, 0x73,
+ 0x59, 0xc1, 0x85, 0x8b, 0xce, 0x6e, 0xd6, 0x01, 0x4e, 0xf9, 0x45, 0x02,
+ 0x6d, 0x41, 0x85, 0xa3, 0xc5, 0x02, 0xd2, 0x01, 0x2e, 0x61, 0xc4, 0x0d,
+ 0x21, 0x01, 0x02, 0xe0, 0xc5, 0x0b, 0x0a, 0x01, 0x58, 0xd1, 0xc6, 0x27,
+ 0x5e, 0x01, 0x72, 0x50, 0xc5, 0x33, 0x5d, 0x08, 0xc1, 0xd1, 0x42, 0x07,
+ 0xb2, 0xc1, 0x85, 0xaf, 0xc8, 0x14, 0x38, 0x08, 0xc1, 0xb8, 0x03, 0xc1,
+ 0x85, 0xbb, 0x91, 0x08, 0xc1, 0xa9, 0x87, 0x08, 0xc1, 0x99, 0xc9, 0xb2,
+ 0x2d, 0x08, 0xc1, 0x8b, 0x01, 0x85, 0xc7, 0x97, 0x08, 0xc1, 0x7b, 0x01,
+ 0x85, 0xcb, 0x8b, 0x08, 0xc1, 0x6a, 0x01, 0x85, 0xcf, 0x14, 0xc1, 0x85,
+ 0xd3, 0xc2, 0x00, 0xd0, 0x08, 0xc1, 0x51, 0x15, 0xc1, 0x85, 0xdd, 0xc2,
+ 0x02, 0x41, 0x08, 0xc1, 0x31, 0xc2, 0x00, 0xdb, 0x08, 0xc1, 0x29, 0xc2,
+ 0x19, 0x2c, 0x08, 0xc1, 0x19, 0xc2, 0x01, 0xc3, 0x08, 0xc1, 0x11, 0x04,
+ 0xc1, 0x85, 0xed, 0x12, 0xc1, 0x85, 0xf7, 0x10, 0xc1, 0x86, 0x01, 0x06,
+ 0xc1, 0x86, 0x17, 0x16, 0xc1, 0x86, 0x25, 0x0c, 0xc1, 0x86, 0x33, 0x05,
+ 0xc1, 0x86, 0x3d, 0x09, 0xc1, 0x86, 0x47, 0x0d, 0xc1, 0x86, 0x51, 0x83,
+ 0x08, 0xc0, 0x03, 0x01, 0x86, 0x5b, 0x91, 0x08, 0xc0, 0x41, 0x87, 0x08,
+ 0xc0, 0x31, 0x97, 0x08, 0xc0, 0x23, 0x01, 0x86, 0x67, 0x8b, 0x08, 0xc0,
+ 0x12, 0x01, 0x86, 0x6b, 0xc9, 0x23, 0x9f, 0x01, 0x17, 0x68, 0xc9, 0x23,
+ 0x9f, 0x01, 0x17, 0x00, 0xcc, 0x87, 0xbd, 0x0f, 0xad, 0xd0, 0x43, 0x02,
+ 0x5f, 0xc1, 0x86, 0x6f, 0xd5, 0x32, 0x57, 0x0d, 0xe3, 0x80, 0xc8, 0x00,
+ 0x5f, 0x0d, 0xe4, 0x43, 0x01, 0x86, 0x9e, 0xc4, 0x51, 0xb7, 0x0d, 0xe4,
+ 0x39, 0x0e, 0xc1, 0x86, 0xa4, 0xc6, 0x02, 0xd1, 0x0d, 0xe4, 0x29, 0xc3,
+ 0x02, 0xa3, 0x0d, 0xe4, 0x21, 0xc5, 0x1f, 0x0c, 0x0d, 0xe4, 0x11, 0xcb,
+ 0x8f, 0x94, 0x0d, 0xe4, 0x09, 0xc5, 0x31, 0xee, 0x0d, 0xe4, 0x00, 0x42,
+ 0x01, 0x6f, 0xc1, 0x86, 0xb0, 0xc6, 0xce, 0x8d, 0x0d, 0xe3, 0xd9, 0xc6,
+ 0x99, 0xc8, 0x0d, 0xe3, 0xd1, 0xd4, 0x3c, 0xdc, 0x0d, 0xe3, 0xb9, 0xc6,
+ 0x27, 0x9c, 0x0d, 0xe3, 0xb0, 0xcf, 0x61, 0x98, 0x0d, 0xe3, 0xa1, 0xd1,
+ 0x27, 0x91, 0x0d, 0xe3, 0x88, 0xc2, 0x00, 0x2b, 0x0d, 0xe1, 0xd1, 0x8a,
+ 0x0d, 0xe1, 0xc9, 0x91, 0x0d, 0xe2, 0xeb, 0x01, 0x86, 0xbf, 0xc2, 0x06,
+ 0xdb, 0x0d, 0xe2, 0xf9, 0x8b, 0x0d, 0xe2, 0xf1, 0x83, 0x0d, 0xe2, 0xe0,
+ 0x00, 0xc1, 0x86, 0xc3, 0x8a, 0x0d, 0xe0, 0x88, 0x00, 0xc1, 0x86, 0xcd,
+ 0x45, 0xd9, 0x89, 0xc1, 0x86, 0xfe, 0xc6, 0xcf, 0x17, 0x0d, 0xe2, 0x48,
+ 0x00, 0x41, 0x87, 0x1a, 0x00, 0xc1, 0x87, 0x38, 0x45, 0x44, 0xf8, 0x41,
+ 0x87, 0x49, 0x00, 0x41, 0x87, 0x59, 0x8a, 0x0d, 0xe0, 0xc1, 0xc2, 0x00,
+ 0x3f, 0x0d, 0xe0, 0x81, 0x48, 0xb5, 0xfa, 0x41, 0x87, 0x6a, 0x8a, 0x0d,
+ 0xe0, 0xb9, 0x44, 0x08, 0x48, 0x41, 0x87, 0x72, 0x8e, 0x0d, 0xe0, 0xb0,
+ 0x8d, 0x0d, 0xe0, 0xa1, 0x00, 0x41, 0x87, 0x7a, 0x8a, 0x0d, 0xe0, 0x99,
+ 0xc2, 0x00, 0x3f, 0x0d, 0xe0, 0x68, 0xc2, 0x04, 0x4d, 0x0d, 0xe0, 0x70,
+ 0xc2, 0x04, 0x4d, 0x0d, 0xe0, 0x61, 0x47, 0xc0, 0x35, 0x41, 0x87, 0x84,
+ 0xc4, 0xe4, 0x37, 0x0d, 0xe1, 0xf0, 0xc8, 0xbb, 0x02, 0x0d, 0xe3, 0x50,
+ 0x99, 0x0d, 0xe2, 0x98, 0x97, 0x0d, 0xe2, 0xd9, 0x99, 0x0d, 0xe2, 0xd1,
+ 0xc2, 0x38, 0x2a, 0x0d, 0xe2, 0xc9, 0x83, 0x0d, 0xe2, 0x18, 0x8a, 0x0d,
+ 0xe2, 0xb9, 0xc2, 0x04, 0x4d, 0x0d, 0xe2, 0xa1, 0x8b, 0x0d, 0xe2, 0x50,
+ 0x97, 0x0d, 0xe2, 0x91, 0x87, 0x0d, 0xe2, 0x58, 0x87, 0x0d, 0xe2, 0x40,
+ 0xc2, 0x00, 0x59, 0x0d, 0xe2, 0x28, 0xca, 0xa2, 0xc4, 0x01, 0x71, 0xb1,
+ 0xcb, 0x98, 0x9a, 0x01, 0x71, 0xb8, 0xc5, 0x06, 0x82, 0x00, 0x04, 0x69,
+ 0x42, 0x01, 0x0f, 0xc1, 0x87, 0x8c, 0xc7, 0x27, 0x5d, 0x00, 0x02, 0xe3,
+ 0x01, 0x87, 0x98, 0xcd, 0x7b, 0x15, 0x0f, 0xb3, 0xf9, 0x55, 0x33, 0x92,
+ 0x41, 0x87, 0x9c, 0x14, 0xc1, 0x87, 0xa8, 0xc8, 0x68, 0xc5, 0x01, 0x18,
+ 0x81, 0x16, 0xc1, 0x87, 0xba, 0x15, 0xc1, 0x87, 0xcf, 0x12, 0xc1, 0x87,
+ 0xdb, 0x47, 0x00, 0x58, 0xc1, 0x87, 0xe7, 0xe0, 0x09, 0x27, 0x0f, 0xac,
+ 0xa9, 0xcc, 0x89, 0x79, 0x0f, 0xac, 0xa1, 0xc9, 0xb2, 0xf3, 0x01, 0x4d,
+ 0x81, 0xc5, 0x01, 0x95, 0x01, 0x4d, 0x1b, 0x01, 0x87, 0xf6, 0xd2, 0x4a,
+ 0x3f, 0x01, 0x70, 0x89, 0xcd, 0x2c, 0xb2, 0x01, 0x71, 0x71, 0xc5, 0x0a,
+ 0x8a, 0x01, 0x72, 0x08, 0x9f, 0x01, 0x37, 0x09, 0x9e, 0x01, 0x37, 0x00,
+ 0xd1, 0x53, 0x54, 0x01, 0x33, 0xd1, 0x45, 0x1a, 0xad, 0x41, 0x87, 0xfc,
+ 0x87, 0x05, 0x4a, 0x4b, 0x01, 0x88, 0x26, 0x03, 0xc1, 0x88, 0x2e, 0x91,
+ 0x05, 0x4a, 0x59, 0x97, 0x05, 0x4a, 0x41, 0x8b, 0x05, 0x4a, 0x38, 0x89,
+ 0x05, 0x4a, 0x78, 0x1b, 0xc1, 0x88, 0x36, 0xc2, 0x0e, 0x9a, 0x05, 0x4a,
+ 0x21, 0x09, 0xc1, 0x88, 0x40, 0x83, 0x05, 0x49, 0xa8, 0xc2, 0x01, 0x5d,
+ 0x05, 0x4a, 0x11, 0x83, 0x05, 0x49, 0xc0, 0x07, 0xc1, 0x88, 0x4a, 0xd5,
+ 0x32, 0x18, 0x01, 0x3e, 0x31, 0xcd, 0x25, 0xae, 0x00, 0x02, 0xeb, 0x01,
+ 0x88, 0x56, 0x0b, 0xc1, 0x88, 0x5a, 0x42, 0x00, 0x67, 0xc1, 0x88, 0x66,
+ 0xd3, 0x1f, 0xcd, 0x01, 0x70, 0x18, 0x10, 0xc1, 0x88, 0x75, 0x14, 0x41,
+ 0x88, 0x7f, 0xc9, 0x9b, 0x77, 0x01, 0x3e, 0xb1, 0x43, 0x02, 0x6f, 0xc1,
+ 0x88, 0x8b, 0xcf, 0x63, 0x5a, 0x0f, 0xdd, 0xe0, 0x43, 0x01, 0xd0, 0xc1,
+ 0x88, 0x97, 0xd5, 0x36, 0xb0, 0x0f, 0xab, 0xe8, 0xc7, 0xc9, 0xb2, 0x01,
+ 0x1d, 0xc9, 0xcd, 0x77, 0xfc, 0x01, 0x71, 0x08, 0xcc, 0x00, 0x33, 0x00,
+ 0x03, 0xeb, 0x01, 0x88, 0xaf, 0xc6, 0xb7, 0x3b, 0x01, 0x18, 0x49, 0xcd,
+ 0x69, 0x65, 0x01, 0x80, 0x68, 0x00, 0x41, 0x88, 0xb3, 0xc4, 0x20, 0xe6,
+ 0x01, 0x18, 0x59, 0x0b, 0x41, 0x88, 0xc5, 0x14, 0xc1, 0x88, 0xd1, 0xc3,
+ 0x00, 0x3a, 0x01, 0x15, 0x11, 0x0a, 0xc1, 0x88, 0xdd, 0xd5, 0x08, 0x89,
+ 0x01, 0x80, 0xa8, 0x45, 0x00, 0x5a, 0xc1, 0x88, 0xef, 0xd9, 0x1f, 0xc7,
+ 0x01, 0x70, 0x28, 0xcb, 0x8a, 0x0a, 0x01, 0x4e, 0xc9, 0x45, 0x01, 0xfd,
+ 0x41, 0x89, 0x05, 0xd6, 0x08, 0x88, 0x01, 0x4c, 0xc1, 0xd2, 0x21, 0x89,
+ 0x01, 0x80, 0x88, 0xca, 0x01, 0xfd, 0x01, 0x0f, 0x43, 0x01, 0x89, 0x21,
+ 0xc9, 0xb0, 0x6b, 0x01, 0x0c, 0xe8, 0x42, 0x00, 0x2c, 0xc1, 0x89, 0x25,
+ 0x42, 0x02, 0xa0, 0xc1, 0x89, 0x31, 0xd5, 0x37, 0xc1, 0x0f, 0xc5, 0x18,
+ 0xcf, 0x5b, 0xc3, 0x0f, 0xc2, 0x91, 0x42, 0x00, 0xe3, 0x41, 0x89, 0x3d,
+ 0x45, 0x11, 0x3a, 0xc1, 0x89, 0x49, 0x03, 0x41, 0x89, 0x55, 0x00, 0xc1,
+ 0x89, 0x61, 0xc5, 0x14, 0xa5, 0x01, 0x48, 0xd0, 0xcb, 0x82, 0xba, 0x01,
+ 0x0f, 0x11, 0x46, 0x00, 0x59, 0x41, 0x89, 0x7e, 0xc5, 0xca, 0xa4, 0x0f,
+ 0xb3, 0x71, 0xd7, 0x2a, 0x6b, 0x0f, 0xc5, 0x28, 0x45, 0x04, 0x90, 0xc1,
+ 0x89, 0x8d, 0xd8, 0x23, 0xdb, 0x0f, 0xc5, 0x09, 0xdf, 0x0c, 0x65, 0x0f,
+ 0xc5, 0x48, 0xd0, 0x56, 0xda, 0x0f, 0xc1, 0xb1, 0xe0, 0x01, 0xe7, 0x0f,
+ 0xc5, 0x58, 0xd0, 0x5a, 0x22, 0x0f, 0xa8, 0x71, 0xcd, 0x0b, 0x91, 0x01,
+ 0x19, 0x51, 0xd4, 0x3b, 0x9c, 0x01, 0x4f, 0xe9, 0xdb, 0x18, 0x39, 0x00,
+ 0x05, 0xd8, 0xdc, 0x14, 0x4d, 0x01, 0x3d, 0x49, 0xd7, 0x29, 0xe1, 0x01,
+ 0x49, 0xc0, 0xc7, 0x00, 0xfa, 0x01, 0x03, 0x39, 0xc8, 0xb6, 0xca, 0x01,
+ 0x01, 0x71, 0xc9, 0xb3, 0x9e, 0x01, 0x01, 0x59, 0xc4, 0x01, 0xc3, 0x01,
+ 0x00, 0x78, 0xd6, 0x2d, 0x4c, 0x00, 0x2c, 0x69, 0xc4, 0xb9, 0x3c, 0x0f,
+ 0xc8, 0xe1, 0xcb, 0x8f, 0xf7, 0x00, 0x7e, 0xaa, 0x01, 0x89, 0x99, 0xc4,
+ 0x00, 0x49, 0x01, 0x5d, 0x81, 0xc5, 0x00, 0x2c, 0x01, 0x5d, 0x88, 0xc4,
+ 0x00, 0x49, 0x01, 0x5d, 0x91, 0xc5, 0x00, 0x2c, 0x01, 0x5d, 0x98, 0xc2,
+ 0x02, 0xae, 0x01, 0x5d, 0xa1, 0xc4, 0x03, 0xc8, 0x01, 0x5d, 0xb0, 0xc2,
+ 0x02, 0xae, 0x01, 0x5d, 0xa9, 0xc4, 0x03, 0xc8, 0x01, 0x5d, 0xb8, 0xc7,
+ 0xc9, 0x42, 0x0f, 0x9d, 0x11, 0xc5, 0xdb, 0x41, 0x0f, 0xb7, 0xe0, 0xc6,
+ 0xd0, 0x2b, 0x0f, 0x93, 0x21, 0xc2, 0x00, 0x59, 0x0f, 0x93, 0x10, 0x00,
+ 0x41, 0x89, 0x9f, 0x0b, 0xc1, 0x89, 0xb1, 0xc3, 0x09, 0x9e, 0x01, 0x0b,
+ 0x18, 0xc2, 0x22, 0xcc, 0x01, 0x0b, 0x2b, 0x01, 0x89, 0xc3, 0xc4, 0x18,
+ 0x10, 0x01, 0x0b, 0x30, 0xc2, 0x00, 0xc4, 0x01, 0x0b, 0x4b, 0x01, 0x89,
+ 0xc9, 0x19, 0xc1, 0x89, 0xcf, 0xc4, 0x02, 0xde, 0x01, 0x0b, 0x10, 0xc5,
+ 0x66, 0xb1, 0x01, 0x0b, 0x51, 0xc4, 0x00, 0x2d, 0x01, 0x0b, 0x38, 0x42,
+ 0x09, 0x40, 0xc1, 0x89, 0xd9, 0xcb, 0x9a, 0x05, 0x08, 0x0c, 0x91, 0xcd,
+ 0x7a, 0xd4, 0x08, 0x0c, 0xc0, 0x46, 0x00, 0x8b, 0x41, 0x89, 0xe5, 0xc6,
+ 0x02, 0xe9, 0x0f, 0x8b, 0x61, 0xc6, 0x42, 0xd4, 0x0f, 0x8b, 0x59, 0xc6,
+ 0x5c, 0x5b, 0x0f, 0x8b, 0x50, 0xd8, 0x21, 0x3b, 0x01, 0x70, 0x38, 0xc5,
+ 0x06, 0x67, 0x08, 0x73, 0xe9, 0xc7, 0x08, 0x79, 0x08, 0x73, 0xe1, 0xc4,
+ 0x01, 0xce, 0x08, 0x73, 0xd8, 0xc8, 0x0d, 0x03, 0x08, 0x73, 0xd1, 0xc2,
+ 0x0d, 0x10, 0x08, 0x73, 0x88, 0xc8, 0x0d, 0x03, 0x08, 0x73, 0xc9, 0x9b,
+ 0x08, 0x73, 0x80, 0x44, 0x18, 0x10, 0xc1, 0x89, 0xf1, 0x42, 0x22, 0xcc,
+ 0x41, 0x89, 0xfd, 0x0b, 0xc1, 0x8a, 0x09, 0x11, 0x41, 0x8a, 0x15, 0x0a,
+ 0xc1, 0x8a, 0x21, 0x19, 0xc1, 0x8a, 0x2d, 0xc2, 0x00, 0xc4, 0x08, 0x73,
+ 0x48, 0xc4, 0x18, 0x10, 0x08, 0x73, 0x31, 0xc2, 0x22, 0xcc, 0x08, 0x73,
+ 0x28, 0xc3, 0x0d, 0x14, 0x08, 0x73, 0x21, 0xc3, 0x09, 0x9e, 0x08, 0x73,
+ 0x18, 0xc4, 0x02, 0xde, 0x08, 0x73, 0x11, 0xc2, 0x02, 0xa0, 0x08, 0x73,
+ 0x08, 0x08, 0xc1, 0x8a, 0x39, 0x91, 0x00, 0xb5, 0x73, 0x01, 0x8a, 0x45,
+ 0x15, 0xc1, 0x8a, 0x63, 0x8d, 0x00, 0xb7, 0x8b, 0x01, 0x8a, 0x7c, 0x9a,
+ 0x00, 0xb7, 0x51, 0x93, 0x00, 0xb7, 0x49, 0x0b, 0xc1, 0x8a, 0x82, 0x0e,
+ 0xc1, 0x8a, 0xa3, 0x85, 0x00, 0xb6, 0x6b, 0x01, 0x8a, 0xaf, 0x87, 0x00,
+ 0xb6, 0x13, 0x01, 0x8a, 0xbf, 0x86, 0x00, 0xb6, 0x8b, 0x01, 0x8a, 0xd7,
+ 0xcc, 0x84, 0xe1, 0x00, 0xb6, 0xb9, 0xd8, 0x25, 0x2b, 0x00, 0xb6, 0x91,
+ 0x16, 0xc1, 0x8a, 0xe3, 0x9c, 0x00, 0xb6, 0x71, 0x03, 0xc1, 0x8a, 0xef,
+ 0xcf, 0x60, 0xe4, 0x00, 0xb6, 0x41, 0x89, 0x00, 0xb5, 0xab, 0x01, 0x8b,
+ 0x07, 0xc7, 0xc7, 0xf2, 0x00, 0xb6, 0x19, 0xd1, 0x57, 0x0c, 0x00, 0xb5,
+ 0xf1, 0x42, 0x00, 0xd0, 0xc1, 0x8b, 0x11, 0x99, 0x00, 0xb5, 0x2b, 0x01,
+ 0x8b, 0x1d, 0xd0, 0x5d, 0x82, 0x00, 0xb5, 0x89, 0x9b, 0x00, 0xb5, 0x23,
+ 0x01, 0x8b, 0x23, 0xc9, 0xb4, 0x88, 0x00, 0xb5, 0x11, 0x98, 0x00, 0xb5,
+ 0x08, 0xa1, 0x70, 0x0c, 0x49, 0xa0, 0x70, 0x0c, 0x41, 0xa6, 0x70, 0x0c,
+ 0x71, 0xa5, 0x70, 0x0c, 0x69, 0xa4, 0x70, 0x0c, 0x61, 0xa3, 0x70, 0x0c,
+ 0x59, 0xa2, 0x70, 0x0c, 0x51, 0x9f, 0x70, 0x0c, 0x39, 0x9e, 0x70, 0x0c,
+ 0x31, 0x9d, 0x70, 0x0c, 0x28, 0xa0, 0x70, 0x0b, 0x01, 0x9f, 0x70, 0x0a,
+ 0xf9, 0x9e, 0x70, 0x0a, 0xf1, 0x9d, 0x70, 0x0a, 0xe9, 0xa6, 0x70, 0x0b,
+ 0x31, 0xa5, 0x70, 0x0b, 0x29, 0xa4, 0x70, 0x0b, 0x21, 0xa3, 0x70, 0x0b,
+ 0x19, 0xa2, 0x70, 0x0b, 0x11, 0xa1, 0x70, 0x0b, 0x08, 0xa6, 0x70, 0x0a,
+ 0xe1, 0xa5, 0x70, 0x0a, 0xd9, 0xa4, 0x70, 0x0a, 0xd1, 0xa3, 0x70, 0x0a,
+ 0xc9, 0xa2, 0x70, 0x0a, 0xc1, 0xa1, 0x70, 0x0a, 0xb9, 0xa0, 0x70, 0x0a,
+ 0xb1, 0x9f, 0x70, 0x0a, 0xa9, 0x9e, 0x70, 0x0a, 0xa1, 0x9d, 0x70, 0x0a,
+ 0x98, 0xa6, 0x70, 0x0d, 0xb1, 0xa5, 0x70, 0x0d, 0xa9, 0xa4, 0x70, 0x0d,
+ 0xa1, 0xa3, 0x70, 0x0d, 0x99, 0xa2, 0x70, 0x0d, 0x91, 0xa1, 0x70, 0x0d,
+ 0x89, 0xa0, 0x70, 0x0d, 0x81, 0x9f, 0x70, 0x0d, 0x79, 0x9e, 0x70, 0x0d,
+ 0x71, 0x9d, 0x70, 0x0d, 0x68, 0xa6, 0x70, 0x0d, 0x61, 0xa5, 0x70, 0x0d,
+ 0x59, 0xa4, 0x70, 0x0d, 0x51, 0xa3, 0x70, 0x0d, 0x49, 0xa2, 0x70, 0x0d,
+ 0x41, 0xa1, 0x70, 0x0d, 0x39, 0xa0, 0x70, 0x0d, 0x31, 0x9f, 0x70, 0x0d,
+ 0x29, 0x9e, 0x70, 0x0d, 0x21, 0x9d, 0x70, 0x0d, 0x18, 0xa6, 0x70, 0x0d,
+ 0x11, 0xa5, 0x70, 0x0d, 0x09, 0xa4, 0x70, 0x0d, 0x01, 0xa3, 0x70, 0x0c,
+ 0xf9, 0xa2, 0x70, 0x0c, 0xf1, 0xa1, 0x70, 0x0c, 0xe9, 0xa0, 0x70, 0x0c,
+ 0xe1, 0x9f, 0x70, 0x0c, 0xd9, 0x9e, 0x70, 0x0c, 0xd1, 0x9d, 0x70, 0x0c,
+ 0xc8, 0xa6, 0x70, 0x0c, 0xc1, 0xa5, 0x70, 0x0c, 0xb9, 0xa4, 0x70, 0x0c,
+ 0xb1, 0xa3, 0x70, 0x0c, 0xa9, 0xa2, 0x70, 0x0c, 0xa1, 0xa1, 0x70, 0x0c,
+ 0x99, 0xa0, 0x70, 0x0c, 0x91, 0x9f, 0x70, 0x0c, 0x89, 0x9e, 0x70, 0x0c,
+ 0x81, 0x9d, 0x70, 0x0c, 0x78, 0xa6, 0x70, 0x0c, 0x21, 0xa5, 0x70, 0x0c,
+ 0x19, 0xa4, 0x70, 0x0c, 0x11, 0xa3, 0x70, 0x0c, 0x09, 0xa2, 0x70, 0x0c,
+ 0x01, 0xa1, 0x70, 0x0b, 0xf9, 0xa0, 0x70, 0x0b, 0xf1, 0x9f, 0x70, 0x0b,
+ 0xe9, 0x9e, 0x70, 0x0b, 0xe1, 0x9d, 0x70, 0x0b, 0xd8, 0xa6, 0x70, 0x0b,
+ 0xd1, 0xa5, 0x70, 0x0b, 0xc9, 0xa4, 0x70, 0x0b, 0xc1, 0xa3, 0x70, 0x0b,
+ 0xb9, 0xa2, 0x70, 0x0b, 0xb1, 0xa1, 0x70, 0x0b, 0xa9, 0xa0, 0x70, 0x0b,
+ 0xa1, 0x9f, 0x70, 0x0b, 0x99, 0x9e, 0x70, 0x0b, 0x91, 0x9d, 0x70, 0x0b,
+ 0x88, 0xa6, 0x70, 0x0b, 0x81, 0xa5, 0x70, 0x0b, 0x79, 0xa4, 0x70, 0x0b,
+ 0x71, 0xa3, 0x70, 0x0b, 0x69, 0xa2, 0x70, 0x0b, 0x61, 0xa1, 0x70, 0x0b,
+ 0x59, 0xa0, 0x70, 0x0b, 0x51, 0x9f, 0x70, 0x0b, 0x49, 0x9e, 0x70, 0x0b,
+ 0x41, 0x9d, 0x70, 0x0b, 0x38, 0xa3, 0x70, 0x0f, 0x79, 0xa2, 0x70, 0x0f,
+ 0x71, 0xa1, 0x70, 0x0f, 0x69, 0xa0, 0x70, 0x0f, 0x61, 0x9f, 0x70, 0x0f,
+ 0x59, 0x9e, 0x70, 0x0f, 0x51, 0x9d, 0x70, 0x0f, 0x48, 0xa6, 0x70, 0x0f,
+ 0x41, 0xa5, 0x70, 0x0f, 0x39, 0xa4, 0x70, 0x0f, 0x31, 0xa3, 0x70, 0x0f,
+ 0x29, 0xa2, 0x70, 0x0f, 0x21, 0xa1, 0x70, 0x0f, 0x19, 0xa0, 0x70, 0x0f,
+ 0x11, 0x9f, 0x70, 0x0f, 0x09, 0x9e, 0x70, 0x0f, 0x01, 0x9d, 0x70, 0x0e,
+ 0xf8, 0xa6, 0x70, 0x0e, 0xf1, 0xa5, 0x70, 0x0e, 0xe9, 0xa4, 0x70, 0x0e,
+ 0xe1, 0xa3, 0x70, 0x0e, 0xd9, 0xa2, 0x70, 0x0e, 0xd1, 0xa1, 0x70, 0x0e,
+ 0xc9, 0xa0, 0x70, 0x0e, 0xc1, 0x9f, 0x70, 0x0e, 0xb9, 0x9e, 0x70, 0x0e,
+ 0xb1, 0x9d, 0x70, 0x0e, 0xa8, 0xa6, 0x70, 0x0e, 0xa1, 0xa5, 0x70, 0x0e,
+ 0x99, 0xa4, 0x70, 0x0e, 0x91, 0xa3, 0x70, 0x0e, 0x89, 0xa2, 0x70, 0x0e,
+ 0x81, 0xa1, 0x70, 0x0e, 0x79, 0xa0, 0x70, 0x0e, 0x71, 0x9f, 0x70, 0x0e,
+ 0x69, 0x9e, 0x70, 0x0e, 0x61, 0x9d, 0x70, 0x0e, 0x58, 0xa6, 0x70, 0x0e,
+ 0x51, 0xa5, 0x70, 0x0e, 0x49, 0xa4, 0x70, 0x0e, 0x41, 0xa3, 0x70, 0x0e,
+ 0x39, 0xa2, 0x70, 0x0e, 0x31, 0xa1, 0x70, 0x0e, 0x29, 0xa0, 0x70, 0x0e,
+ 0x21, 0x9f, 0x70, 0x0e, 0x19, 0x9e, 0x70, 0x0e, 0x11, 0x9d, 0x70, 0x0e,
+ 0x08, 0xa6, 0x70, 0x0e, 0x01, 0xa5, 0x70, 0x0d, 0xf9, 0xa4, 0x70, 0x0d,
+ 0xf1, 0xa3, 0x70, 0x0d, 0xe9, 0xa2, 0x70, 0x0d, 0xe1, 0xa1, 0x70, 0x0d,
+ 0xd9, 0xa0, 0x70, 0x0d, 0xd1, 0x9f, 0x70, 0x0d, 0xc9, 0x9e, 0x70, 0x0d,
+ 0xc1, 0x9d, 0x70, 0x0d, 0xb8, 0x87, 0x05, 0x2f, 0x0b, 0x01, 0x8b, 0x27,
+ 0x0a, 0xc1, 0x8b, 0x32, 0x19, 0xc1, 0x8b, 0x55, 0x12, 0xc1, 0x8b, 0x78,
+ 0x04, 0xc1, 0x8b, 0x92, 0x0f, 0xc1, 0x8b, 0xb0, 0x0d, 0xc1, 0x8b, 0xd4,
+ 0x09, 0xc1, 0x8b, 0xf5, 0x08, 0xc1, 0x8c, 0x13, 0x18, 0xc1, 0x8c, 0x2d,
+ 0x16, 0xc1, 0x8c, 0x47, 0x06, 0xc1, 0x8c, 0x65, 0x0e, 0xc1, 0x8c, 0x83,
+ 0x14, 0xc1, 0x8c, 0x9d, 0x10, 0xc1, 0x8c, 0xb7, 0x15, 0xc1, 0x8c, 0xe4,
+ 0x1c, 0xc1, 0x8d, 0x02, 0x05, 0xc1, 0x8d, 0x20, 0x0c, 0xc1, 0x8d, 0x3a,
+ 0x1b, 0xc1, 0x8d, 0x54, 0x8b, 0x05, 0x29, 0x23, 0x01, 0x8d, 0x6e, 0x83,
+ 0x05, 0x2a, 0x4b, 0x01, 0x8d, 0x72, 0x91, 0x05, 0x2d, 0xd3, 0x01, 0x8d,
+ 0x76, 0x97, 0x05, 0x2c, 0xaa, 0x01, 0x8d, 0x81, 0x08, 0xc1, 0x8d, 0x85,
+ 0x0d, 0xc1, 0x8d, 0x91, 0x16, 0xc1, 0x8d, 0x9d, 0xc3, 0xe6, 0x5f, 0x05,
+ 0x30, 0xb1, 0xc4, 0x10, 0xd0, 0x05, 0x30, 0xb9, 0x06, 0xc1, 0x8d, 0xaf,
+ 0xc4, 0x9d, 0xd8, 0x05, 0x30, 0xf8, 0xc2, 0x02, 0xa0, 0x05, 0x31, 0x11,
+ 0xc4, 0x02, 0xde, 0x05, 0x31, 0x18, 0xc3, 0x09, 0x9e, 0x05, 0x31, 0x21,
+ 0xc3, 0x0d, 0x14, 0x05, 0x31, 0x28, 0xc2, 0x22, 0xcc, 0x05, 0x31, 0x31,
+ 0xc4, 0x18, 0x10, 0x05, 0x31, 0x38, 0x9f, 0x0f, 0xdb, 0x81, 0xa0, 0x0f,
+ 0xdb, 0x89, 0xa1, 0x0f, 0xdb, 0x91, 0xa2, 0x0f, 0xdb, 0x99, 0xa3, 0x0f,
+ 0xdb, 0xa1, 0xa4, 0x0f, 0xdb, 0xa8, 0xd6, 0x30, 0x7a, 0x01, 0x3e, 0x51,
+ 0xd5, 0x38, 0x00, 0x01, 0x4e, 0x81, 0xd6, 0x30, 0x38, 0x01, 0x57, 0x11,
+ 0xd5, 0x34, 0xcd, 0x01, 0x57, 0x20, 0x00, 0x41, 0x8d, 0xb9, 0x42, 0x00,
+ 0x03, 0xc1, 0x8d, 0xc5, 0xcc, 0x89, 0x55, 0x0f, 0xb5, 0x31, 0xc4, 0x1e,
+ 0xc9, 0x01, 0x71, 0x78, 0xc4, 0x01, 0xc3, 0x01, 0x81, 0x8b, 0x01, 0x8d,
+ 0xd4, 0xd6, 0x31, 0x2a, 0x01, 0x81, 0x92, 0x01, 0x8d, 0xd8, 0x46, 0x0f,
+ 0x88, 0xc1, 0x8d, 0xde, 0xcb, 0x58, 0xc7, 0x0f, 0xbd, 0x31, 0x46, 0x01,
+ 0xfc, 0xc1, 0x8d, 0xea, 0xcf, 0x61, 0xd4, 0x0f, 0xb3, 0xe9, 0x15, 0xc1,
+ 0x8d, 0xf6, 0xd4, 0x3c, 0xb4, 0x0f, 0xbd, 0x98, 0xcc, 0x07, 0xc7, 0x01,
+ 0x16, 0xc9, 0xc9, 0x00, 0xca, 0x01, 0x16, 0xc0, 0xc7, 0xc2, 0xb2, 0x00,
+ 0xe7, 0xb9, 0xcb, 0x40, 0xe1, 0x00, 0xe7, 0x91, 0x48, 0x14, 0x39, 0x41,
+ 0x8e, 0x08, 0xd3, 0x40, 0xd9, 0x00, 0xe7, 0x99, 0xd3, 0x3f, 0xa9, 0x00,
+ 0xe7, 0x81, 0x50, 0x5f, 0x62, 0x41, 0x8e, 0x23, 0xc8, 0x74, 0xc4, 0x00,
+ 0xe7, 0x2b, 0x01, 0x8e, 0x2f, 0xc6, 0x74, 0xc6, 0x00, 0xe7, 0x1b, 0x01,
+ 0x8e, 0x35, 0xc7, 0x02, 0x40, 0x00, 0xe7, 0x10, 0x45, 0x00, 0x5a, 0xc1,
+ 0x8e, 0x3b, 0xc7, 0x0e, 0x70, 0x00, 0xe6, 0xe8, 0xc8, 0x9e, 0xe8, 0x00,
+ 0xe7, 0xc1, 0x43, 0x61, 0x97, 0x41, 0x8e, 0x47, 0xc5, 0x00, 0xd4, 0x00,
+ 0xe7, 0xa1, 0xc5, 0x05, 0x02, 0x00, 0xe6, 0xc0, 0xcf, 0x67, 0xce, 0x00,
+ 0xe6, 0xf9, 0xcd, 0x04, 0xfa, 0x00, 0xe6, 0xf1, 0xcd, 0x7d, 0x78, 0x00,
+ 0xe6, 0xd8, 0xce, 0x74, 0xbe, 0x00, 0xe6, 0xe1, 0xc6, 0xcd, 0xa9, 0x00,
+ 0xe6, 0x80, 0xdb, 0x17, 0xe8, 0x00, 0xe6, 0xbb, 0x01, 0x8e, 0x4d, 0xd3,
+ 0x02, 0x34, 0x00, 0xe6, 0xb1, 0xde, 0x0f, 0xf4, 0x00, 0xe6, 0xa8, 0xc2,
+ 0x00, 0x51, 0x08, 0x2b, 0x89, 0x87, 0x08, 0x2b, 0x90, 0x87, 0x08, 0x2b,
+ 0x99, 0xc2, 0x01, 0x7f, 0x08, 0x2b, 0xa0, 0x87, 0x08, 0x2b, 0xa9, 0xc2,
+ 0x01, 0x7f, 0x08, 0x2b, 0xb0, 0x8b, 0x08, 0x2b, 0xb8, 0xc2, 0x00, 0xd0,
+ 0x08, 0x2b, 0xe9, 0x83, 0x08, 0x2b, 0xe0, 0xc2, 0x1c, 0x52, 0x08, 0x2b,
+ 0xf8, 0xc2, 0x00, 0xdb, 0x08, 0x2c, 0x19, 0x83, 0x08, 0x2c, 0x10, 0x87,
+ 0x08, 0x2c, 0x29, 0xc2, 0x1c, 0x52, 0x08, 0x2c, 0x30, 0xc2, 0x01, 0x7f,
+ 0x08, 0x2c, 0x69, 0x87, 0x08, 0x2c, 0x60, 0x87, 0x08, 0x2c, 0x71, 0xc2,
+ 0x01, 0x7f, 0x08, 0x2c, 0x78, 0xc2, 0x00, 0x51, 0x08, 0x2c, 0xc1, 0x87,
+ 0x08, 0x2c, 0xc8, 0x87, 0x08, 0x2c, 0xd1, 0xc2, 0x01, 0x7f, 0x08, 0x2c,
+ 0xd8, 0x87, 0x08, 0x2c, 0xe1, 0xc2, 0x01, 0x7f, 0x08, 0x2c, 0xe8, 0x8b,
+ 0x08, 0x2c, 0xf0, 0x83, 0x08, 0x2d, 0x19, 0xc2, 0x00, 0xd0, 0x08, 0x2d,
+ 0x20, 0xc2, 0x1c, 0x52, 0x08, 0x2d, 0x30, 0x83, 0x08, 0x2d, 0x49, 0xc2,
+ 0x00, 0xdb, 0x08, 0x2d, 0x50, 0x87, 0x08, 0x2d, 0x61, 0xc2, 0x1c, 0x52,
+ 0x08, 0x2d, 0x68, 0x87, 0x08, 0x2d, 0x99, 0xc2, 0x01, 0x7f, 0x08, 0x2d,
+ 0xa0, 0x87, 0x08, 0x2d, 0xa9, 0xc2, 0x01, 0x7f, 0x08, 0x2d, 0xb0, 0xc7,
+ 0x3f, 0xe8, 0x01, 0x0a, 0xe9, 0xc6, 0xd3, 0x5b, 0x01, 0x0a, 0xd0, 0xc7,
+ 0x3f, 0xe8, 0x01, 0x0a, 0xe1, 0xc6, 0x9c, 0x06, 0x01, 0x0a, 0xb9, 0xc8,
+ 0x08, 0x79, 0x00, 0x05, 0xf0, 0xc6, 0x9c, 0x06, 0x01, 0x0a, 0xb1, 0xc6,
+ 0x8d, 0x4d, 0x01, 0x0a, 0xa0, 0xc4, 0x9d, 0x74, 0x01, 0x0a, 0xc9, 0xc6,
+ 0xcf, 0x29, 0x01, 0x0a, 0x80, 0xc4, 0x06, 0x68, 0x01, 0x0a, 0x99, 0xc4,
+ 0x0f, 0x1f, 0x01, 0x0a, 0x90, 0xca, 0x1f, 0x0e, 0x70, 0x03, 0x01, 0xcf,
+ 0x54, 0xbb, 0x70, 0x01, 0xf0, 0xc7, 0x80, 0x2f, 0x70, 0x02, 0xf9, 0x07,
+ 0xc1, 0x8e, 0x53, 0x45, 0x0b, 0x12, 0x41, 0x8e, 0x5f, 0xd0, 0x08, 0xf7,
+ 0x70, 0x02, 0xf1, 0x11, 0x41, 0x8e, 0x6b, 0x45, 0x00, 0x2d, 0xc1, 0x8e,
+ 0x77, 0xce, 0x61, 0xd5, 0x70, 0x02, 0xe0, 0xcb, 0x2c, 0xb4, 0x70, 0x01,
+ 0xf9, 0xcc, 0x01, 0xbb, 0x70, 0x01, 0x10, 0xca, 0x0e, 0xbe, 0x70, 0x01,
+ 0xe9, 0xcf, 0x0f, 0x0a, 0x70, 0x01, 0x08, 0xc8, 0x52, 0x00, 0x70, 0x01,
+ 0xd9, 0xc6, 0x27, 0x5e, 0x70, 0x01, 0x79, 0xc4, 0x40, 0x89, 0x70, 0x01,
+ 0x00, 0x45, 0x09, 0x98, 0xc1, 0x8e, 0x89, 0xca, 0x99, 0x61, 0x70, 0x01,
+ 0x20, 0xc8, 0x60, 0xf4, 0x70, 0x01, 0x59, 0xcb, 0x8e, 0x13, 0x70, 0x01,
+ 0x28, 0xc7, 0x0b, 0x00, 0x70, 0x01, 0x51, 0xc9, 0x2d, 0x85, 0x70, 0x01,
+ 0x39, 0xc8, 0x36, 0x21, 0x70, 0x01, 0x30, 0x97, 0x00, 0xbb, 0x99, 0x8b,
+ 0x00, 0xbb, 0x90, 0xc2, 0x0d, 0xf6, 0x00, 0xbb, 0x81, 0xc2, 0x01, 0x4a,
+ 0x00, 0xbb, 0x79, 0xc2, 0x00, 0xdb, 0x00, 0xbb, 0x71, 0xc2, 0x19, 0x2c,
+ 0x00, 0xbb, 0x61, 0xc2, 0x01, 0xc3, 0x00, 0xbb, 0x59, 0xc2, 0x01, 0x5d,
+ 0x00, 0xbb, 0x51, 0xc2, 0x00, 0xb0, 0x00, 0xbb, 0x49, 0x10, 0xc1, 0x8e,
+ 0xad, 0xc2, 0x0e, 0x9a, 0x00, 0xbb, 0x39, 0xc2, 0x01, 0x6f, 0x00, 0xbb,
+ 0x31, 0xc2, 0x01, 0x30, 0x00, 0xbb, 0x21, 0xc2, 0x02, 0x2b, 0x00, 0xbb,
+ 0x19, 0x97, 0x00, 0xbb, 0x11, 0x8b, 0x00, 0xbb, 0x09, 0x83, 0x00, 0xbb,
+ 0x00, 0x83, 0x00, 0xb8, 0x03, 0x01, 0x8e, 0xb7, 0xc2, 0x00, 0xd0, 0x00,
+ 0xb8, 0x89, 0xc2, 0x0d, 0xf6, 0x00, 0xb8, 0x81, 0xc2, 0x01, 0x4a, 0x00,
+ 0xb8, 0x79, 0xc2, 0x00, 0xdb, 0x00, 0xb8, 0x71, 0xc2, 0x00, 0x39, 0x00,
+ 0xb8, 0x69, 0xc2, 0x19, 0x2c, 0x00, 0xb8, 0x61, 0xc2, 0x01, 0xc3, 0x00,
+ 0xb8, 0x59, 0xc2, 0x01, 0x5d, 0x00, 0xb8, 0x51, 0xc2, 0x00, 0xb0, 0x00,
+ 0xb8, 0x49, 0x10, 0xc1, 0x8e, 0xbd, 0xc2, 0x0e, 0x9a, 0x00, 0xb8, 0x39,
+ 0xc2, 0x01, 0x6f, 0x00, 0xb8, 0x31, 0xc2, 0x01, 0x30, 0x00, 0xb8, 0x21,
+ 0xc2, 0x02, 0x2b, 0x00, 0xb8, 0x19, 0x97, 0x00, 0xb8, 0x11, 0x8b, 0x00,
+ 0xb8, 0x08, 0xc8, 0x7a, 0x8b, 0x00, 0xb8, 0xa9, 0xc6, 0x1e, 0x95, 0x00,
+ 0xb8, 0xa0, 0x97, 0x00, 0xb8, 0x99, 0x8b, 0x00, 0xb8, 0x90, 0x4a, 0xa3,
+ 0x3c, 0xc1, 0x8e, 0xc7, 0xce, 0x1c, 0x92, 0x0b, 0x7f, 0x00, 0x46, 0x09,
+ 0x97, 0xc1, 0x8e, 0xe7, 0x47, 0x02, 0x0e, 0x41, 0x8f, 0x0b, 0x44, 0x00,
+ 0xbb, 0xc1, 0x8f, 0x77, 0xd1, 0x55, 0xeb, 0x08, 0xff, 0x79, 0xc9, 0xaf,
+ 0x9c, 0x08, 0xff, 0x61, 0xcc, 0x8a, 0x69, 0x08, 0xff, 0x38, 0xc9, 0xab,
+ 0x0a, 0x08, 0xff, 0x69, 0x4b, 0x9a, 0x10, 0x41, 0x8f, 0x9f, 0xcb, 0x94,
+ 0xfe, 0x08, 0xff, 0x59, 0xcd, 0x73, 0x0d, 0x00, 0x5e, 0xb9, 0xcc, 0x8a,
+ 0x51, 0x00, 0x5f, 0xc0, 0xcb, 0x97, 0xea, 0x08, 0xff, 0x51, 0xca, 0x97,
+ 0xa9, 0x00, 0x5f, 0xb8, 0xc8, 0x42, 0xd2, 0x08, 0xff, 0x31, 0x46, 0x02,
+ 0x0f, 0x41, 0x8f, 0xab, 0xd3, 0x43, 0xf7, 0x08, 0xff, 0x29, 0x45, 0x09,
+ 0x98, 0xc1, 0x90, 0x12, 0xc7, 0xbf, 0xf6, 0x00, 0x5f, 0x99, 0xc9, 0xb0,
+ 0x59, 0x00, 0x5f, 0xb0, 0xd8, 0x25, 0x8b, 0x08, 0xfe, 0xa1, 0x46, 0x02,
+ 0xdd, 0xc1, 0x90, 0x36, 0x44, 0x05, 0x14, 0x41, 0x90, 0x4e, 0x03, 0xc1,
+ 0x90, 0x74, 0x8b, 0x00, 0x5d, 0xfb, 0x01, 0x90, 0x80, 0x97, 0x00, 0x5e,
+ 0x0b, 0x01, 0x90, 0x84, 0x87, 0x00, 0x5e, 0x33, 0x01, 0x90, 0x88, 0x91,
+ 0x00, 0x5e, 0x52, 0x01, 0x90, 0x8c, 0xc3, 0x09, 0x41, 0x00, 0x5f, 0x81,
+ 0x44, 0x05, 0x14, 0xc1, 0x90, 0x90, 0xc4, 0x00, 0xba, 0x00, 0x5f, 0xd0,
+ 0xc4, 0x26, 0x78, 0x08, 0xb6, 0x49, 0xc5, 0x06, 0xdb, 0x08, 0xb6, 0x41,
+ 0x15, 0xc1, 0x90, 0x9c, 0x08, 0xc1, 0x90, 0xa8, 0x16, 0xc1, 0x90, 0xb4,
+ 0xc3, 0x05, 0x14, 0x08, 0xb6, 0x09, 0xc4, 0x15, 0xe7, 0x08, 0xb6, 0x00,
+ 0x83, 0x08, 0xb4, 0x03, 0x01, 0x90, 0xc0, 0x14, 0xc1, 0x90, 0xd2, 0xc2,
+ 0x00, 0xd0, 0x08, 0xb5, 0x49, 0x15, 0xc1, 0x90, 0xdc, 0xc2, 0x02, 0x41,
+ 0x08, 0xb5, 0x31, 0xc2, 0x00, 0xdb, 0x08, 0xb5, 0x29, 0xc2, 0x19, 0x2c,
+ 0x08, 0xb5, 0x19, 0xc2, 0x01, 0xc3, 0x08, 0xb5, 0x11, 0x04, 0xc1, 0x90,
+ 0xe6, 0x12, 0xc1, 0x90, 0xf0, 0x10, 0xc1, 0x90, 0xfa, 0x06, 0xc1, 0x91,
+ 0x10, 0x16, 0xc1, 0x91, 0x1e, 0x0c, 0xc1, 0x91, 0x2c, 0x05, 0xc1, 0x91,
+ 0x36, 0x09, 0xc1, 0x91, 0x40, 0x0d, 0xc1, 0x91, 0x4a, 0x91, 0x08, 0xb4,
+ 0x41, 0x87, 0x08, 0xb4, 0x31, 0x97, 0x08, 0xb4, 0x23, 0x01, 0x91, 0x54,
+ 0x8b, 0x08, 0xb4, 0x12, 0x01, 0x91, 0x58, 0xc5, 0x33, 0x5d, 0x08, 0xb5,
+ 0xb9, 0x42, 0x07, 0xb2, 0xc1, 0x91, 0x5c, 0xc8, 0x14, 0x38, 0x08, 0xb5,
+ 0x58, 0x03, 0xc1, 0x91, 0x68, 0x91, 0x08, 0xb5, 0xa1, 0x87, 0x08, 0xb5,
+ 0x91, 0x97, 0x08, 0xb5, 0x83, 0x01, 0x91, 0x74, 0x8b, 0x08, 0xb5, 0x72,
+ 0x01, 0x91, 0x78, 0xc5, 0xde, 0x25, 0x00, 0xd5, 0x69, 0x0a, 0xc1, 0x91,
+ 0x7c, 0x42, 0x0d, 0xf6, 0xc1, 0x91, 0x88, 0x0d, 0xc1, 0x91, 0x9d, 0x44,
+ 0x38, 0x7e, 0xc1, 0x91, 0xb2, 0x14, 0xc1, 0x91, 0xc7, 0xc6, 0xca, 0xc7,
+ 0x00, 0xd5, 0x29, 0xc5, 0xdc, 0xcc, 0x00, 0xd5, 0x03, 0x01, 0x91, 0xd3,
+ 0x45, 0x28, 0xb1, 0x41, 0x91, 0xd9, 0xc4, 0x26, 0x78, 0x00, 0xd4, 0xc9,
+ 0xc5, 0x06, 0xdb, 0x00, 0xd4, 0xc1, 0x15, 0xc1, 0x91, 0xe1, 0x08, 0xc1,
+ 0x91, 0xed, 0x16, 0xc1, 0x91, 0xf9, 0xc3, 0x05, 0x14, 0x00, 0xd4, 0x89,
+ 0xc4, 0x15, 0xe7, 0x00, 0xd4, 0x80, 0xc4, 0x26, 0x78, 0x00, 0xd4, 0x49,
+ 0xc5, 0x06, 0xdb, 0x00, 0xd4, 0x41, 0x15, 0xc1, 0x92, 0x05, 0x08, 0xc1,
+ 0x92, 0x11, 0x16, 0xc1, 0x92, 0x1d, 0xc3, 0x05, 0x14, 0x00, 0xd4, 0x09,
+ 0xc4, 0x15, 0xe7, 0x00, 0xd4, 0x00, 0xd9, 0x1d, 0xd3, 0x00, 0xd3, 0xf9,
+ 0x4d, 0x30, 0x92, 0x41, 0x92, 0x29, 0x91, 0x00, 0xd3, 0x5b, 0x01, 0x92,
+ 0x49, 0x16, 0xc1, 0x92, 0x57, 0x83, 0x00, 0xd3, 0x0b, 0x01, 0x92, 0x63,
+ 0x87, 0x00, 0xd3, 0x71, 0x97, 0x00, 0xd3, 0x4b, 0x01, 0x92, 0x6f, 0x8b,
+ 0x00, 0xd3, 0x2b, 0x01, 0x92, 0x7a, 0xc7, 0xc2, 0xce, 0x00, 0xd3, 0x10,
+ 0xc8, 0xbd, 0xea, 0x00, 0xd2, 0xa1, 0x0e, 0xc1, 0x92, 0x7e, 0xc2, 0x01,
+ 0x24, 0x00, 0xd2, 0x91, 0xc2, 0x02, 0xe0, 0x00, 0xd2, 0x89, 0x97, 0x00,
+ 0xd2, 0x7b, 0x01, 0x92, 0x97, 0x8b, 0x00, 0xd2, 0x6b, 0x01, 0x92, 0x9b,
+ 0x83, 0x00, 0xd2, 0x59, 0x45, 0x08, 0xcb, 0xc1, 0x92, 0x9f, 0xc2, 0x01,
+ 0x4a, 0x00, 0xd2, 0x29, 0x14, 0xc1, 0x92, 0xcb, 0xc2, 0x01, 0xc3, 0x00,
+ 0xd1, 0xf1, 0xc2, 0x01, 0x5d, 0x00, 0xd1, 0xb9, 0x10, 0xc1, 0x92, 0xd8,
+ 0xc2, 0x0e, 0x9a, 0x00, 0xd1, 0x78, 0x44, 0x1a, 0xce, 0xc1, 0x92, 0xe8,
+ 0x15, 0xc1, 0x92, 0xfc, 0xc2, 0x00, 0xd0, 0x00, 0xca, 0xb9, 0x83, 0x00,
+ 0xca, 0xb0, 0x8b, 0x00, 0xcb, 0x69, 0xc2, 0x0f, 0xe1, 0x00, 0xcb, 0x60,
+ 0x8a, 0x00, 0xcb, 0x31, 0x87, 0x00, 0xcb, 0x28, 0x87, 0x00, 0xcb, 0x50,
+ 0x91, 0x00, 0xcb, 0x40, 0x83, 0x00, 0xcb, 0x11, 0xc2, 0x01, 0x30, 0x00,
+ 0xca, 0x90, 0xc2, 0x00, 0xd0, 0x00, 0xcb, 0x01, 0x83, 0x00, 0xca, 0x80,
+ 0xc2, 0x00, 0xd0, 0x00, 0xca, 0xd1, 0x83, 0x00, 0xca, 0xc8, 0x42, 0x00,
+ 0xe8, 0xc1, 0x93, 0x06, 0xc6, 0xd3, 0x49, 0x05, 0x56, 0xf1, 0xc3, 0x71,
+ 0xe5, 0x05, 0x56, 0xe9, 0xc5, 0xda, 0x2e, 0x05, 0x56, 0xe0, 0xc4, 0x7b,
+ 0x07, 0x05, 0x56, 0x11, 0xc3, 0x1c, 0xd6, 0x05, 0x56, 0x09, 0xc5, 0xda,
+ 0x2e, 0x05, 0x56, 0x01, 0xc2, 0x13, 0x4c, 0x05, 0x55, 0xf8, 0x03, 0xc1,
+ 0x93, 0x10, 0x97, 0x05, 0x55, 0xa3, 0x01, 0x93, 0x26, 0x8b, 0x05, 0x55,
+ 0x93, 0x01, 0x93, 0x31, 0x87, 0x05, 0x55, 0xa9, 0x91, 0x05, 0x55, 0xb0,
+ 0xc3, 0x01, 0x95, 0x05, 0x55, 0x81, 0xc3, 0x01, 0xfd, 0x05, 0x55, 0xb8,
+ 0x45, 0x08, 0xcb, 0xc1, 0x93, 0x35, 0x44, 0x05, 0x36, 0x41, 0x93, 0x8f,
+ 0xcb, 0x50, 0x7f, 0x01, 0x36, 0x51, 0xc8, 0xbd, 0x02, 0x01, 0x5e, 0x10,
+ 0xc6, 0x30, 0x98, 0x01, 0x18, 0xc9, 0x44, 0x06, 0x1f, 0x41, 0x93, 0xe9,
+ 0x46, 0x10, 0x29, 0xc1, 0x93, 0xf5, 0xc5, 0xce, 0x22, 0x01, 0x71, 0xc0,
+ 0xc6, 0xd2, 0x71, 0x01, 0x0a, 0x71, 0x52, 0x46, 0xb6, 0xc1, 0x94, 0x01,
+ 0x45, 0x1a, 0x38, 0xc1, 0x94, 0x0d, 0xc8, 0x52, 0x00, 0x01, 0x71, 0xa8,
+ 0xc8, 0x36, 0x21, 0x01, 0x0a, 0x59, 0xc4, 0x01, 0x96, 0x01, 0x4d, 0x10,
+ 0xc8, 0xbd, 0x0a, 0x01, 0x09, 0x91, 0xc4, 0x0a, 0x8b, 0x01, 0x71, 0x90,
+ 0xd0, 0x59, 0xe2, 0x01, 0x3e, 0x01, 0xce, 0x05, 0x19, 0x01, 0x02, 0xb0,
+ 0x50, 0x5a, 0x72, 0xc1, 0x94, 0x19, 0xcf, 0x65, 0x85, 0x01, 0x59, 0x88,
+ 0xd0, 0x27, 0x1f, 0x01, 0x0f, 0xb1, 0x44, 0x39, 0xfd, 0x41, 0x94, 0x25,
+ 0x4c, 0x89, 0x85, 0xc1, 0x94, 0x3d, 0x4b, 0x95, 0x35, 0xc1, 0x94, 0x49,
+ 0x43, 0x07, 0x6e, 0xc1, 0x94, 0x4f, 0x4c, 0x80, 0x91, 0x41, 0x94, 0x55,
+ 0x15, 0xc1, 0x94, 0x5b, 0xcb, 0x58, 0xc7, 0x0f, 0xbd, 0x08, 0xce, 0x73,
+ 0x7c, 0x01, 0x10, 0x21, 0xc6, 0xd3, 0xc7, 0x01, 0x10, 0x18, 0xc8, 0xb8,
+ 0xba, 0x00, 0x3d, 0x79, 0xc6, 0xcb, 0xc3, 0x00, 0x3d, 0x71, 0xc8, 0xbb,
+ 0x1a, 0x00, 0x3d, 0x58, 0xc8, 0xb8, 0x32, 0x00, 0x3d, 0x49, 0xc6, 0xcc,
+ 0x5f, 0x00, 0x3d, 0x61, 0xc8, 0xb6, 0x52, 0x00, 0x3d, 0x68, 0xc8, 0xb8,
+ 0xaa, 0x00, 0x3d, 0x39, 0xc6, 0xcd, 0x01, 0x00, 0x3d, 0x30, 0xc5, 0xda,
+ 0xec, 0x00, 0x3d, 0x29, 0xc5, 0xd8, 0x3a, 0x00, 0x3d, 0x21, 0x09, 0xc1,
+ 0x94, 0x67, 0x16, 0xc1, 0x94, 0x79, 0x06, 0xc1, 0x94, 0x92, 0x15, 0xc1,
+ 0x94, 0x9c, 0x0a, 0xc1, 0x94, 0xac, 0xc9, 0xb4, 0xd9, 0x00, 0x3c, 0xb9,
+ 0xc8, 0xb7, 0x22, 0x00, 0x3c, 0xb1, 0xc8, 0xbd, 0x92, 0x00, 0x3c, 0xa9,
+ 0xc3, 0xa9, 0x9c, 0x00, 0x3c, 0xa1, 0x1c, 0xc1, 0x94, 0xb8, 0x0e, 0xc1,
+ 0x94, 0xc0, 0xc5, 0xde, 0x7a, 0x00, 0x3c, 0x51, 0xc5, 0xdb, 0x00, 0x00,
+ 0x3c, 0x49, 0xc5, 0xd8, 0xd0, 0x00, 0x3c, 0x41, 0x03, 0xc1, 0x94, 0xcc,
+ 0x0d, 0xc1, 0x94, 0xd8, 0xc3, 0x47, 0x81, 0x00, 0x3c, 0x21, 0xc3, 0x47,
+ 0xd9, 0x00, 0x3c, 0x19, 0x10, 0x41, 0x94, 0xe4, 0x49, 0x3b, 0x93, 0xc1,
+ 0x94, 0xf0, 0xd3, 0x44, 0x0a, 0x00, 0x71, 0xf8, 0xc4, 0x15, 0xe7, 0x00,
+ 0x72, 0x81, 0xc3, 0x05, 0x14, 0x00, 0x72, 0x89, 0x16, 0xc1, 0x95, 0x44,
+ 0x08, 0xc1, 0x95, 0x50, 0x15, 0xc1, 0x95, 0x5c, 0xc5, 0x06, 0xdb, 0x00,
+ 0x72, 0xc1, 0xc4, 0x26, 0x78, 0x00, 0x72, 0xc8, 0xc8, 0x1e, 0x3f, 0x01,
+ 0x19, 0x01, 0xcc, 0x85, 0x71, 0x01, 0x5e, 0x51, 0xcc, 0x83, 0x19, 0x01,
+ 0x71, 0xc9, 0xd0, 0x1d, 0xec, 0x01, 0x72, 0xc9, 0xd1, 0x1a, 0x4a, 0x01,
+ 0x72, 0xd0, 0xc5, 0x13, 0x67, 0x01, 0x18, 0xe9, 0xc3, 0x0a, 0xea, 0x01,
+ 0x18, 0x70, 0xc5, 0x13, 0x67, 0x01, 0x18, 0xe1, 0xc3, 0x0a, 0xea, 0x01,
+ 0x18, 0x78, 0xca, 0xa1, 0xb6, 0x01, 0x49, 0xe8, 0x83, 0x0f, 0x15, 0x6b,
+ 0x01, 0x95, 0x68, 0x04, 0xc1, 0x95, 0x6c, 0x91, 0x0f, 0x15, 0x51, 0x87,
+ 0x0f, 0x15, 0x33, 0x01, 0x95, 0x76, 0x97, 0x0f, 0x15, 0x29, 0x8b, 0x0f,
+ 0x15, 0x0b, 0x01, 0x95, 0x7a, 0xc2, 0x00, 0xdb, 0x0f, 0x15, 0x01, 0xc2,
+ 0x00, 0x39, 0x0f, 0x14, 0xf9, 0xc2, 0x00, 0xd0, 0x0f, 0x14, 0xf1, 0xc2,
+ 0x25, 0x3b, 0x0f, 0x14, 0xe9, 0xc2, 0x01, 0x4a, 0x0f, 0x14, 0xe1, 0xc2,
+ 0x19, 0x2c, 0x0f, 0x14, 0xd9, 0xc3, 0x1c, 0x63, 0x0f, 0x14, 0xd1, 0xc2,
+ 0x0d, 0xf6, 0x0f, 0x14, 0xc9, 0x10, 0xc1, 0x95, 0x7e, 0xc2, 0x01, 0xc3,
+ 0x0f, 0x14, 0xb1, 0xc2, 0x01, 0x30, 0x0f, 0x14, 0xa9, 0xc2, 0x02, 0x2b,
+ 0x0f, 0x14, 0xa1, 0xc2, 0x0e, 0x9a, 0x0f, 0x14, 0x99, 0xc2, 0x01, 0x6f,
+ 0x0f, 0x14, 0x91, 0xc2, 0x00, 0xb0, 0x0f, 0x14, 0x80, 0xc2, 0xe6, 0x7d,
+ 0x0f, 0x92, 0x09, 0xc2, 0x8c, 0x54, 0x0f, 0x92, 0x10, 0xc3, 0xe5, 0x81,
+ 0x0f, 0x92, 0x41, 0xc3, 0xe6, 0x59, 0x0f, 0x92, 0x29, 0xc3, 0xe5, 0xa5,
+ 0x0f, 0x92, 0x00, 0xc3, 0xe6, 0x6b, 0x0f, 0x92, 0x39, 0xc3, 0xe5, 0x3f,
+ 0x0f, 0x92, 0x18, 0xc3, 0xe5, 0x54, 0x0f, 0x92, 0x31, 0xc3, 0xe5, 0xe4,
+ 0x0f, 0x92, 0x20, 0xd8, 0x03, 0xaf, 0x01, 0x3c, 0xe9, 0x46, 0x00, 0x8b,
+ 0x41, 0x95, 0x88, 0xc6, 0x1c, 0xb4, 0x01, 0x01, 0x19, 0xc5, 0xcd, 0xce,
+ 0x0f, 0xa6, 0x81, 0xcc, 0x87, 0x69, 0x0f, 0xb5, 0x48, 0xc4, 0x03, 0xd7,
+ 0x01, 0x31, 0xa9, 0xc3, 0x02, 0x34, 0x01, 0x31, 0xa0, 0xcf, 0x05, 0x98,
+ 0x01, 0x15, 0x51, 0xc9, 0x32, 0x24, 0x01, 0x4c, 0x01, 0xcf, 0x27, 0x65,
+ 0x01, 0x57, 0xa1, 0xd6, 0x30, 0x7a, 0x01, 0x57, 0xa8, 0xc4, 0x18, 0x26,
+ 0x01, 0x01, 0xa1, 0xc3, 0x25, 0xd6, 0x01, 0x4f, 0xd8, 0xd6, 0x2d, 0x62,
+ 0x01, 0x53, 0x41, 0xd6, 0x2c, 0x2e, 0x01, 0x53, 0x48, 0xc9, 0x00, 0xca,
+ 0x01, 0x57, 0xb9, 0xcc, 0x07, 0xc7, 0x01, 0x57, 0xc0, 0xc5, 0xc3, 0x08,
+ 0x0f, 0x9b, 0xc9, 0xc4, 0x55, 0x81, 0x0f, 0xa1, 0x00, 0xc7, 0xc8, 0x70,
+ 0x0e, 0x9a, 0xb1, 0xc7, 0xb6, 0x0b, 0x0e, 0x98, 0xc0, 0xc4, 0x1d, 0xa8,
+ 0x0e, 0x99, 0x59, 0xc7, 0x05, 0x79, 0x0e, 0x98, 0x38, 0xc7, 0xca, 0x37,
+ 0x0e, 0x9a, 0xa9, 0xca, 0xa3, 0x32, 0x0e, 0x99, 0x68, 0xca, 0x9b, 0xe4,
+ 0x0e, 0x9a, 0xa1, 0x0f, 0xc1, 0x95, 0xa0, 0xc8, 0xbc, 0xd2, 0x0e, 0x98,
+ 0x80, 0xc7, 0xb1, 0x21, 0x0e, 0x9a, 0x39, 0xca, 0xa6, 0x20, 0x0e, 0x99,
+ 0x11, 0xd9, 0x1d, 0xa1, 0x0e, 0x98, 0x78, 0x43, 0x5e, 0x7a, 0xc1, 0x95,
+ 0xac, 0x10, 0x41, 0x95, 0xb8, 0xc3, 0x14, 0xc8, 0x0e, 0x9a, 0x79, 0x07,
+ 0x41, 0x95, 0xc2, 0x11, 0xc1, 0x95, 0xce, 0xc6, 0xca, 0xd3, 0x0e, 0x99,
+ 0x48, 0xc9, 0xab, 0x5b, 0x0e, 0x99, 0x99, 0xc8, 0xba, 0xba, 0x0e, 0x99,
+ 0x81, 0xc7, 0xc4, 0xc6, 0x0e, 0x98, 0xf8, 0xc3, 0x01, 0xd2, 0x0e, 0x99,
+ 0xf8, 0x15, 0xc1, 0x95, 0xda, 0xc5, 0xd9, 0x93, 0x0e, 0x98, 0xd1, 0xc3,
+ 0x29, 0x43, 0x0e, 0x98, 0xa0, 0xc5, 0x83, 0x4f, 0x0e, 0x99, 0xa1, 0xc5,
+ 0x5b, 0x25, 0x0e, 0x99, 0x20, 0xd7, 0x28, 0xfb, 0x01, 0x3d, 0xd1, 0xcf,
+ 0x15, 0x36, 0x01, 0x39, 0xd8, 0xcd, 0x7f, 0x59, 0x01, 0x38, 0x31, 0x43,
+ 0x05, 0xbb, 0xc1, 0x95, 0xe4, 0xc4, 0x00, 0xba, 0x01, 0x09, 0x09, 0xcf,
+ 0x62, 0x01, 0x0f, 0xac, 0x00, 0x05, 0xc1, 0x95, 0xf3, 0x03, 0xc1, 0x95,
+ 0xff, 0x42, 0x07, 0xb2, 0xc1, 0x96, 0x0b, 0xc5, 0x33, 0x5d, 0x00, 0x61,
+ 0xe1, 0xc7, 0xc3, 0x61, 0x00, 0x63, 0xb9, 0xc5, 0xdc, 0x40, 0x00, 0x63,
+ 0xf8, 0x45, 0x02, 0x10, 0xc1, 0x96, 0x17, 0xc9, 0x36, 0x53, 0x00, 0x62,
+ 0xa8, 0x03, 0xc1, 0x96, 0x80, 0x8b, 0x00, 0x61, 0xfb, 0x01, 0x96, 0x8c,
+ 0x97, 0x00, 0x62, 0x0b, 0x01, 0x96, 0x90, 0x48, 0xb2, 0x2d, 0xc1, 0x96,
+ 0x94, 0x87, 0x00, 0x62, 0x33, 0x01, 0x96, 0xa2, 0x91, 0x00, 0x62, 0x52,
+ 0x01, 0x96, 0xa6, 0xc4, 0x15, 0xe7, 0x00, 0x63, 0x31, 0xc3, 0x05, 0x14,
+ 0x00, 0x63, 0x39, 0x16, 0xc1, 0x96, 0xaa, 0x08, 0xc1, 0x96, 0xb6, 0x15,
+ 0xc1, 0x96, 0xc2, 0xc5, 0x06, 0xdb, 0x00, 0x63, 0x71, 0xc4, 0x26, 0x78,
+ 0x00, 0x63, 0x78, 0xdb, 0x15, 0xe7, 0x00, 0x63, 0xc1, 0x48, 0xb5, 0xca,
+ 0xc1, 0x96, 0xce, 0x16, 0x41, 0x96, 0xda, 0x00, 0x41, 0x96, 0xe6, 0xca,
+ 0x9e, 0xe6, 0x01, 0x70, 0xd9, 0x44, 0x05, 0x18, 0x41, 0x96, 0xf2, 0xc4,
+ 0x26, 0x78, 0x08, 0xa6, 0xc9, 0xc5, 0x06, 0xdb, 0x08, 0xa6, 0xc1, 0x15,
+ 0xc1, 0x96, 0xfe, 0x08, 0xc1, 0x97, 0x0a, 0x16, 0xc1, 0x97, 0x16, 0xc3,
+ 0x05, 0x14, 0x08, 0xa6, 0x89, 0xc4, 0x15, 0xe7, 0x08, 0xa6, 0x80, 0xd0,
+ 0x50, 0xcf, 0x08, 0xa6, 0x31, 0xc3, 0x7c, 0x50, 0x08, 0xa4, 0x00, 0x03,
+ 0xc1, 0x97, 0x22, 0xc5, 0x33, 0x5d, 0x08, 0xa6, 0x19, 0xcb, 0x1e, 0x89,
+ 0x08, 0xa5, 0xf9, 0x42, 0x07, 0xb2, 0x41, 0x97, 0x2e, 0x03, 0xc1, 0x97,
+ 0x3a, 0x46, 0x2e, 0xee, 0xc1, 0x97, 0x46, 0x91, 0x08, 0xa5, 0xe1, 0x87,
+ 0x08, 0xa5, 0xc9, 0x48, 0xb2, 0x2d, 0xc1, 0x97, 0x4e, 0x97, 0x08, 0xa5,
+ 0x9b, 0x01, 0x97, 0x5c, 0x8b, 0x08, 0xa5, 0x8a, 0x01, 0x97, 0x60, 0xc2,
+ 0x00, 0xd0, 0x08, 0xa5, 0x79, 0x15, 0xc1, 0x97, 0x64, 0x18, 0xc1, 0x97,
+ 0x74, 0xc2, 0x00, 0xdb, 0x08, 0xa5, 0x51, 0xc2, 0x00, 0x39, 0x08, 0xa5,
+ 0x49, 0xc2, 0x19, 0x2c, 0x08, 0xa5, 0x41, 0xc2, 0x01, 0xc3, 0x08, 0xa5,
+ 0x39, 0x04, 0xc1, 0x97, 0x7e, 0x12, 0xc1, 0x97, 0x88, 0x10, 0xc1, 0x97,
+ 0x92, 0x06, 0xc1, 0x97, 0xa8, 0x16, 0xc1, 0x97, 0xb6, 0x0c, 0xc1, 0x97,
+ 0xc4, 0x05, 0xc1, 0x97, 0xce, 0x09, 0xc1, 0x97, 0xd8, 0x0d, 0xc1, 0x97,
+ 0xe2, 0x83, 0x08, 0xa4, 0x0b, 0x01, 0x97, 0xec, 0x91, 0x08, 0xa4, 0x69,
+ 0x87, 0x08, 0xa4, 0x59, 0x97, 0x08, 0xa4, 0x2b, 0x01, 0x97, 0xf8, 0x8b,
+ 0x08, 0xa4, 0x1a, 0x01, 0x97, 0xfc, 0xc9, 0xae, 0x7c, 0x00, 0x78, 0x01,
+ 0x45, 0x10, 0x7a, 0x41, 0x98, 0x00, 0x14, 0xc1, 0x98, 0x1c, 0x42, 0x19,
+ 0x2c, 0xc1, 0x98, 0x2e, 0x0f, 0xc1, 0x98, 0x3a, 0xce, 0x70, 0x50, 0x00,
+ 0x7c, 0x11, 0xc8, 0xbb, 0x42, 0x00, 0x7c, 0x19, 0x42, 0x58, 0x61, 0xc1,
+ 0x98, 0x46, 0x44, 0xe0, 0x6f, 0xc1, 0x98, 0x52, 0xd1, 0x4f, 0x9c, 0x00,
+ 0x7c, 0x60, 0x45, 0x00, 0xba, 0xc1, 0x98, 0x5e, 0x47, 0x02, 0x0e, 0x41,
+ 0x98, 0x70, 0x44, 0x02, 0x11, 0xc1, 0x98, 0xd2, 0x4b, 0x8f, 0xec, 0x41,
+ 0x98, 0xde, 0x46, 0x10, 0xb6, 0xc1, 0x98, 0xea, 0xd1, 0x56, 0xfb, 0x00,
+ 0x78, 0x58, 0x47, 0x90, 0xa7, 0xc1, 0x98, 0xf6, 0x45, 0x95, 0xf1, 0xc1,
+ 0x99, 0x02, 0xc6, 0xd3, 0x19, 0x00, 0x79, 0xc0, 0xc9, 0xb4, 0x37, 0x00,
+ 0x78, 0x41, 0xc3, 0x01, 0xe3, 0x00, 0x78, 0x68, 0x15, 0xc1, 0x99, 0x0e,
+ 0x49, 0xad, 0x6e, 0x41, 0x99, 0x18, 0x44, 0x97, 0x1a, 0xc1, 0x99, 0x24,
+ 0x4a, 0x9f, 0xd6, 0x41, 0x99, 0x33, 0x15, 0xc1, 0x99, 0x3f, 0xd3, 0x47,
+ 0x02, 0x00, 0x7e, 0xd0, 0xd3, 0x45, 0x73, 0x00, 0x78, 0x89, 0xcd, 0x76,
+ 0x01, 0x00, 0x78, 0x90, 0xc2, 0x00, 0x45, 0x00, 0x79, 0xe1, 0xc2, 0x02,
+ 0x2c, 0x00, 0x79, 0xe8, 0xca, 0x9c, 0xfc, 0x00, 0x78, 0xa9, 0xca, 0xa4,
+ 0xfe, 0x00, 0x78, 0xb0, 0x0d, 0xc1, 0x99, 0x4b, 0x09, 0xc1, 0x99, 0x61,
+ 0x10, 0xc1, 0x99, 0x6b, 0x05, 0xc1, 0x99, 0x81, 0xc2, 0x25, 0x3b, 0x00,
+ 0x7a, 0x39, 0x16, 0xc1, 0x99, 0x8b, 0x06, 0xc1, 0x99, 0x9d, 0x12, 0xc1,
+ 0x99, 0xaf, 0x04, 0xc1, 0x99, 0xb9, 0xc2, 0x01, 0xc3, 0x00, 0x7a, 0xc1,
+ 0xc2, 0x01, 0x4a, 0x00, 0x7a, 0xe9, 0x1c, 0xc1, 0x99, 0xc3, 0xc2, 0x00,
+ 0x02, 0x00, 0x7b, 0x01, 0xc2, 0x19, 0x2c, 0x00, 0x7b, 0x09, 0x14, 0xc1,
+ 0x99, 0xcd, 0xc2, 0x00, 0xdb, 0x00, 0x7b, 0x19, 0x15, 0xc1, 0x99, 0xd7,
+ 0xc2, 0x00, 0xd0, 0x00, 0x7b, 0x39, 0x83, 0x00, 0x7b, 0x41, 0xcd, 0x7f,
+ 0xe8, 0x00, 0x7b, 0x50, 0xd4, 0x39, 0x1c, 0x00, 0x78, 0xb9, 0xcb, 0x98,
+ 0x63, 0x00, 0x78, 0xc8, 0xc2, 0x02, 0xa0, 0x00, 0x79, 0x11, 0xc4, 0x02,
+ 0xde, 0x00, 0x79, 0x18, 0xc3, 0x09, 0x9e, 0x00, 0x79, 0x21, 0xc3, 0x0d,
+ 0x14, 0x00, 0x79, 0x28, 0xc2, 0x22, 0xcc, 0x00, 0x79, 0x31, 0xc4, 0x18,
+ 0x10, 0x00, 0x79, 0x38, 0xc3, 0x05, 0x14, 0x00, 0x79, 0x51, 0x16, 0xc1,
+ 0x99, 0xe7, 0x08, 0xc1, 0x99, 0xf3, 0x15, 0xc1, 0x99, 0xff, 0xc5, 0x06,
+ 0xdb, 0x00, 0x79, 0x89, 0xc4, 0x26, 0x78, 0x00, 0x79, 0x91, 0xc4, 0x15,
+ 0xe7, 0x00, 0x79, 0x98, 0x8b, 0x00, 0x7b, 0x98, 0x97, 0x00, 0x7b, 0xa8,
+ 0x94, 0x00, 0x7b, 0xb3, 0x01, 0x9a, 0x0b, 0x8e, 0x00, 0x7b, 0xc2, 0x01,
+ 0x9a, 0x0f, 0x87, 0x00, 0x7b, 0xd8, 0x91, 0x00, 0x7b, 0xe8, 0x8b, 0x00,
+ 0x7c, 0x08, 0x83, 0x01, 0x69, 0x83, 0x01, 0x9a, 0x13, 0x87, 0x01, 0x6b,
+ 0x33, 0x01, 0x9a, 0x84, 0x8b, 0x01, 0x6a, 0x49, 0x97, 0x01, 0x6a, 0x99,
+ 0x91, 0x01, 0x6b, 0x38, 0x8c, 0x01, 0x69, 0xa9, 0x8a, 0x01, 0x6a, 0x08,
+ 0x48, 0xba, 0x82, 0xc1, 0x9a, 0x88, 0xcd, 0x7f, 0x0b, 0x01, 0x6b, 0x20,
+ 0xcb, 0x8d, 0xfd, 0x01, 0x6a, 0x59, 0xc8, 0xb6, 0x7a, 0x01, 0x6a, 0xc0,
+ 0x00, 0xc1, 0x9a, 0xa7, 0xda, 0x05, 0x0d, 0x01, 0x71, 0x50, 0xc2, 0x00,
+ 0xbf, 0x01, 0x52, 0xb1, 0xc3, 0x02, 0x9b, 0x01, 0x52, 0xa8, 0xcb, 0x97,
+ 0x03, 0x01, 0x50, 0x41, 0xcc, 0x86, 0x6d, 0x01, 0x50, 0x38, 0xc7, 0x09,
+ 0x0d, 0x01, 0x49, 0xa1, 0xc9, 0x03, 0xc8, 0x01, 0x49, 0xa9, 0xca, 0x3c,
+ 0xa4, 0x0f, 0xc5, 0x88, 0xc9, 0x01, 0x88, 0x01, 0x49, 0xb1, 0xca, 0x03,
+ 0x87, 0x01, 0x49, 0xb8, 0x48, 0x19, 0x9b, 0xc1, 0x9a, 0xb3, 0x07, 0xc1,
+ 0x9b, 0x11, 0x45, 0x17, 0x15, 0x41, 0x9b, 0x1d, 0x43, 0x01, 0xc5, 0xc1,
+ 0x9b, 0x29, 0x43, 0x2d, 0x2f, 0xc1, 0x9b, 0x35, 0x4b, 0x4c, 0x93, 0x41,
+ 0x9b, 0x41, 0x03, 0xc1, 0x9b, 0xad, 0x45, 0x00, 0x59, 0xc1, 0x9b, 0xbc,
+ 0xd3, 0x44, 0x69, 0x00, 0x47, 0x11, 0xd0, 0x5e, 0x52, 0x00, 0x33, 0x58,
+ 0x4f, 0x2f, 0xa0, 0xc1, 0x9b, 0xcb, 0x03, 0xc1, 0x9b, 0xda, 0x43, 0x0d,
+ 0xed, 0xc1, 0x9b, 0xe4, 0xcd, 0x75, 0xb3, 0x00, 0x32, 0xe8, 0x00, 0xc1,
+ 0x9b, 0xea, 0xc3, 0x13, 0x00, 0x00, 0x32, 0x6a, 0x01, 0x9b, 0xfc, 0xc4,
+ 0x04, 0xa7, 0x00, 0x32, 0x73, 0x01, 0x9c, 0x02, 0xc8, 0x11, 0xf7, 0x00,
+ 0x36, 0xa1, 0xd0, 0x5c, 0x72, 0x00, 0x33, 0x69, 0xce, 0x6f, 0x7e, 0x00,
+ 0x30, 0x10, 0x45, 0x03, 0x14, 0xc1, 0x9c, 0x0f, 0x17, 0xc1, 0x9c, 0x39,
+ 0x46, 0x10, 0x79, 0xc1, 0x9c, 0x4e, 0x44, 0x00, 0xbb, 0xc1, 0x9c, 0x70,
+ 0xd3, 0x46, 0xa3, 0x00, 0x36, 0xf1, 0xc5, 0xd7, 0x18, 0x00, 0x32, 0x8b,
+ 0x01, 0x9c, 0x8c, 0xc8, 0x52, 0x00, 0x00, 0x30, 0xd8, 0xc8, 0xb5, 0x52,
+ 0x00, 0x47, 0x91, 0xc8, 0xb8, 0xc2, 0x00, 0x47, 0x89, 0xc8, 0x6e, 0xbf,
+ 0x00, 0x47, 0x80, 0x44, 0x05, 0x14, 0xc1, 0x9c, 0x90, 0xd1, 0x52, 0x44,
+ 0x00, 0x47, 0x19, 0x03, 0xc1, 0x9c, 0xa2, 0xd2, 0x4b, 0x95, 0x00, 0x33,
+ 0x61, 0xda, 0x1b, 0x1a, 0x00, 0x30, 0xf0, 0x45, 0x00, 0x33, 0xc1, 0x9c,
+ 0xb1, 0xc4, 0x0a, 0x8b, 0x00, 0x30, 0x60, 0xd3, 0x41, 0xbd, 0x00, 0x44,
+ 0xf9, 0x44, 0x08, 0x0b, 0x41, 0x9c, 0xcc, 0xd1, 0x53, 0xcb, 0x00, 0x44,
+ 0x89, 0x11, 0xc1, 0x9c, 0xd8, 0xce, 0x70, 0xa4, 0x00, 0x37, 0x49, 0xcb,
+ 0x8e, 0x13, 0x00, 0x33, 0x50, 0xcc, 0x41, 0x19, 0x00, 0x44, 0x71, 0x4a,
+ 0x6f, 0xc8, 0x41, 0x9c, 0xe4, 0x4c, 0x81, 0x09, 0xc1, 0x9c, 0xf6, 0x46,
+ 0x0a, 0x10, 0x41, 0x9d, 0x02, 0xca, 0x43, 0x42, 0x00, 0x30, 0x29, 0xc4,
+ 0x00, 0xba, 0x00, 0x30, 0x00, 0xc4, 0x26, 0x78, 0x00, 0x33, 0x49, 0xc5,
+ 0x06, 0xdb, 0x00, 0x33, 0x41, 0x15, 0xc1, 0x9d, 0x0e, 0x08, 0xc1, 0x9d,
+ 0x1a, 0x16, 0xc1, 0x9d, 0x26, 0xc3, 0x05, 0x14, 0x00, 0x33, 0x09, 0xc4,
+ 0x15, 0xe7, 0x00, 0x33, 0x00, 0xd1, 0x57, 0x1d, 0x00, 0x30, 0x51, 0xca,
+ 0xa8, 0x00, 0x00, 0x30, 0x48, 0x44, 0x40, 0xee, 0xc1, 0x9d, 0x32, 0xc7,
+ 0xc2, 0xdc, 0x07, 0xd8, 0xb1, 0xc8, 0xb8, 0x22, 0x00, 0x2c, 0x38, 0xc2,
+ 0x16, 0x5a, 0x00, 0x2b, 0xab, 0x01, 0x9d, 0x4a, 0xc3, 0xb1, 0x0d, 0x00,
+ 0x2c, 0x31, 0xc2, 0x38, 0x2a, 0x00, 0x2c, 0x29, 0x42, 0x00, 0x3c, 0xc1,
+ 0x9d, 0x56, 0x12, 0xc1, 0x9d, 0x5e, 0x05, 0xc1, 0x9d, 0x6a, 0x14, 0xc1,
+ 0x9d, 0x76, 0x16, 0xc1, 0x9d, 0x80, 0x18, 0xc1, 0x9d, 0x90, 0x15, 0xc1,
+ 0x9d, 0x9a, 0x0c, 0xc1, 0x9d, 0xa6, 0xc3, 0x2a, 0x91, 0x00, 0x2b, 0xb1,
+ 0xc3, 0x00, 0xc3, 0x00, 0x2b, 0xa1, 0x09, 0xc1, 0x9d, 0xb0, 0xc2, 0x01,
+ 0x23, 0x00, 0x2b, 0x81, 0xc3, 0xe6, 0x1a, 0x00, 0x2b, 0x69, 0xc4, 0xe1,
+ 0x0b, 0x00, 0x2b, 0x61, 0xc3, 0x03, 0x0d, 0x00, 0x2b, 0x59, 0x1c, 0xc1,
+ 0x9d, 0xbc, 0x07, 0xc1, 0x9d, 0xc6, 0xc2, 0x0e, 0x9a, 0x00, 0x2b, 0x21,
+ 0xc3, 0x18, 0xf2, 0x00, 0x2b, 0x11, 0xc3, 0x36, 0x99, 0x00, 0x2b, 0x08,
+ 0xc3, 0xb1, 0x0d, 0x00, 0x2a, 0xb1, 0xc2, 0x38, 0x2a, 0x00, 0x2a, 0xa9,
+ 0x42, 0x00, 0x3c, 0xc1, 0x9d, 0xd4, 0x12, 0xc1, 0x9d, 0xdc, 0xc2, 0x16,
+ 0x5a, 0x00, 0x2a, 0x2b, 0x01, 0x9d, 0xe8, 0x05, 0xc1, 0x9d, 0xee, 0x14,
+ 0xc1, 0x9d, 0xfa, 0x16, 0xc1, 0x9e, 0x04, 0x18, 0xc1, 0x9e, 0x0e, 0x15,
+ 0xc1, 0x9e, 0x18, 0x0c, 0xc1, 0x9e, 0x24, 0xc3, 0x2a, 0x91, 0x00, 0x2a,
+ 0x31, 0xc3, 0x00, 0xc3, 0x00, 0x2a, 0x21, 0x09, 0xc1, 0x9e, 0x2e, 0xc2,
+ 0x01, 0x23, 0x00, 0x2a, 0x01, 0xc3, 0xe6, 0x1a, 0x00, 0x29, 0xe9, 0xc4,
+ 0xe1, 0x0b, 0x00, 0x29, 0xe1, 0xc3, 0x03, 0x0d, 0x00, 0x29, 0xd9, 0x1c,
+ 0xc1, 0x9e, 0x3a, 0x07, 0xc1, 0x9e, 0x44, 0xc2, 0x0e, 0x9a, 0x00, 0x29,
+ 0xa1, 0xc3, 0x36, 0x99, 0x00, 0x29, 0x89, 0xc3, 0x18, 0xf2, 0x00, 0x29,
+ 0x90, 0xc4, 0x6b, 0x52, 0x0f, 0x48, 0x01, 0x06, 0xc1, 0x9e, 0x52, 0xc4,
+ 0x76, 0x31, 0x0f, 0x48, 0x11, 0xc4, 0xe4, 0xb3, 0x0f, 0x48, 0x19, 0x04,
+ 0xc1, 0x9e, 0x5e, 0x15, 0xc1, 0x9e, 0x68, 0xc2, 0x00, 0x67, 0x0f, 0x48,
+ 0x31, 0xc2, 0x00, 0x39, 0x0f, 0x48, 0x41, 0x87, 0x0f, 0x48, 0x49, 0xc2,
+ 0x00, 0x87, 0x0f, 0x48, 0x51, 0x8b, 0x0f, 0x48, 0x59, 0x91, 0x0f, 0x48,
+ 0x61, 0x1b, 0xc1, 0x9e, 0x74, 0xc3, 0x7e, 0x89, 0x0f, 0x48, 0x79, 0x10,
+ 0xc1, 0x9e, 0x7e, 0x0d, 0xc1, 0x9e, 0x90, 0x97, 0x0f, 0x48, 0x99, 0xc4,
+ 0xe1, 0x4b, 0x0f, 0x48, 0xa1, 0xc3, 0x11, 0xee, 0x0f, 0x48, 0xa9, 0xc2,
+ 0x00, 0xd0, 0x0f, 0x48, 0xb1, 0xc4, 0xd8, 0x3a, 0x0f, 0x48, 0xb9, 0x09,
+ 0xc1, 0x9e, 0xa2, 0xc2, 0x00, 0x16, 0x0f, 0x48, 0xd1, 0xc2, 0x02, 0x41,
+ 0x0f, 0x48, 0xe1, 0xc3, 0xa9, 0xfc, 0x0f, 0x48, 0xf8, 0xc4, 0x14, 0x74,
+ 0x0f, 0x49, 0x19, 0xc2, 0x00, 0xd0, 0x0f, 0x49, 0x78, 0x83, 0x0f, 0x49,
+ 0x31, 0xc2, 0x01, 0x7f, 0x0f, 0x49, 0x48, 0xc9, 0xaf, 0x27, 0x0f, 0x49,
+ 0x39, 0xc2, 0x00, 0xd0, 0x0f, 0x4a, 0x18, 0xc2, 0x01, 0x7f, 0x0f, 0x49,
+ 0x81, 0x83, 0x0f, 0x49, 0xa0, 0xc2, 0x05, 0x1d, 0x0f, 0x49, 0x91, 0xc2,
+ 0x19, 0x2c, 0x0f, 0x49, 0xd9, 0xc2, 0x00, 0xd0, 0x0f, 0x49, 0xe8, 0xc2,
+ 0x0f, 0x9b, 0x0f, 0x49, 0x99, 0xc2, 0x00, 0xd0, 0x0f, 0x49, 0xf9, 0xc2,
+ 0x01, 0x53, 0x0f, 0x4a, 0x10, 0x83, 0x0f, 0x49, 0xd1, 0xc2, 0x00, 0x51,
+ 0x0f, 0x4a, 0x00, 0xc2, 0x02, 0xa0, 0x0f, 0x4a, 0x91, 0xc4, 0x02, 0xde,
+ 0x0f, 0x4a, 0x98, 0xc3, 0x09, 0x9e, 0x0f, 0x4a, 0xa1, 0xc3, 0x0d, 0x14,
+ 0x0f, 0x4a, 0xa8, 0xc2, 0x22, 0xcc, 0x0f, 0x4a, 0xb1, 0xc4, 0x18, 0x10,
+ 0x0f, 0x4a, 0xb8, 0xc7, 0xc0, 0xeb, 0x0f, 0xbb, 0x61, 0xc4, 0xe4, 0xab,
+ 0x0f, 0xbb, 0x58, 0x02, 0x41, 0x9e, 0xac, 0xc6, 0xcf, 0x8f, 0x0f, 0xbb,
+ 0x2b, 0x01, 0x9e, 0xb4, 0x48, 0xba, 0xf2, 0x41, 0x9e, 0xb8, 0xc3, 0x04,
+ 0xa1, 0x0f, 0xb9, 0x01, 0xcb, 0x4c, 0x50, 0x0f, 0xb9, 0x28, 0xc2, 0x34,
+ 0x63, 0x0f, 0xba, 0x61, 0xcb, 0x95, 0xa3, 0x0f, 0xba, 0x71, 0xc6, 0xd1,
+ 0xed, 0x0f, 0xba, 0x80, 0xc5, 0xd9, 0x25, 0x0f, 0xbb, 0x0b, 0x01, 0x9e,
+ 0xc7, 0xc4, 0x2d, 0xad, 0x0f, 0xbb, 0x00, 0xc4, 0xdf, 0x63, 0x0f, 0xba,
+ 0x5b, 0x01, 0x9e, 0xcd, 0xc7, 0xc7, 0x0b, 0x0f, 0xba, 0xc0, 0xc4, 0xde,
+ 0xcf, 0x0f, 0xbb, 0x19, 0xca, 0x9f, 0x68, 0x0f, 0xbb, 0x20, 0xc2, 0xe5,
+ 0xfd, 0x0f, 0xba, 0x00, 0xc4, 0x91, 0x3d, 0x0f, 0xb9, 0x49, 0xc5, 0x87,
+ 0xc4, 0x0f, 0xba, 0x40, 0xc5, 0xd5, 0xe7, 0x0f, 0xb9, 0x93, 0x01, 0x9e,
+ 0xd3, 0xc5, 0xd9, 0x8e, 0x0f, 0xb9, 0xdb, 0x01, 0x9e, 0xdd, 0xc4, 0x08,
+ 0x88, 0x0f, 0xbb, 0x68, 0xc2, 0xe5, 0xfd, 0x0f, 0xb8, 0xc8, 0xc5, 0xdb,
+ 0x7d, 0x0f, 0xb8, 0x53, 0x01, 0x9e, 0xe3, 0xc5, 0xd7, 0xb8, 0x0f, 0xb8,
+ 0xb2, 0x01, 0x9e, 0xed, 0x46, 0x5d, 0x2b, 0xc1, 0x9e, 0xf3, 0xc4, 0x4e,
+ 0x2b, 0x0f, 0xb8, 0x68, 0x96, 0x0f, 0xb8, 0xa3, 0x01, 0x9e, 0xff, 0xc9,
+ 0xad, 0xec, 0x0f, 0xb9, 0xc8, 0xcd, 0x7b, 0x49, 0x0f, 0xba, 0x91, 0xd3,
+ 0x40, 0xc6, 0x0f, 0xba, 0xe2, 0x01, 0x9f, 0x05, 0x00, 0xc1, 0x9f, 0x0b,
+ 0xc6, 0xd1, 0xab, 0x0f, 0xb8, 0x28, 0xc4, 0xe1, 0x17, 0x0f, 0xb9, 0xb3,
+ 0x01, 0x9f, 0x1d, 0xc2, 0x01, 0xdf, 0x0f, 0xba, 0x29, 0xc5, 0xd9, 0x16,
+ 0x0f, 0xbb, 0x50, 0x02, 0x41, 0x9f, 0x23, 0xc2, 0xe5, 0xfd, 0x0f, 0xb8,
+ 0xe8, 0xc8, 0xb7, 0x42, 0x0f, 0xba, 0xb1, 0xc2, 0x00, 0x33, 0x0f, 0xbb,
+ 0x70, 0xc4, 0xb4, 0xbe, 0x0f, 0xbb, 0x91, 0xc5, 0xd5, 0x60, 0x0f, 0xbb,
+ 0x98, 0x22, 0xc1, 0x9f, 0x2b, 0x21, 0xc1, 0x9f, 0x53, 0x20, 0xc1, 0x9f,
+ 0x84, 0x1f, 0xc1, 0x9f, 0xaf, 0x1e, 0xc1, 0x9f, 0xda, 0x1d, 0xc1, 0xa0,
+ 0x05, 0x23, 0xc1, 0xa0, 0x29, 0x24, 0xc1, 0xa0, 0x54, 0x25, 0xc1, 0xa0,
+ 0x7c, 0x26, 0x41, 0xa0, 0xa4, 0x1d, 0xc1, 0xa0, 0xd2, 0x1e, 0xc1, 0xa1,
+ 0x0c, 0x1f, 0xc1, 0xa1, 0x3a, 0x20, 0xc1, 0xa1, 0x65, 0x21, 0xc1, 0xa1,
+ 0x90, 0x22, 0xc1, 0xa1, 0xb8, 0x23, 0xc1, 0xa1, 0xe0, 0x24, 0xc1, 0xa2,
+ 0x08, 0x25, 0xc1, 0xa2, 0x30, 0x26, 0x41, 0xa2, 0x58, 0x1d, 0xc1, 0xa2,
+ 0x80, 0x1e, 0xc1, 0xa2, 0xb1, 0x1f, 0xc1, 0xa2, 0xdf, 0x20, 0xc1, 0xa3,
+ 0x0a, 0x21, 0xc1, 0xa3, 0x32, 0x22, 0xc1, 0xa3, 0x5a, 0x23, 0xc1, 0xa3,
+ 0x82, 0x24, 0xc1, 0xa3, 0xad, 0x25, 0xc1, 0xa3, 0xd5, 0x26, 0x41, 0xa4,
+ 0x00, 0x1d, 0xc1, 0xa4, 0x2e, 0x1e, 0xc1, 0xa4, 0x59, 0x1f, 0xc1, 0xa4,
+ 0x81, 0x20, 0xc1, 0xa4, 0xac, 0x21, 0xc1, 0xa4, 0xd7, 0x22, 0xc1, 0xa4,
+ 0xff, 0x23, 0xc1, 0xa5, 0x2a, 0x24, 0xc1, 0xa5, 0x58, 0x25, 0xc1, 0xa5,
+ 0x83, 0x26, 0x41, 0xa5, 0xb1, 0x1d, 0xc1, 0xa5, 0xdb, 0x1e, 0xc1, 0xa6,
+ 0x03, 0x1f, 0xc1, 0xa6, 0x2b, 0x20, 0xc1, 0xa6, 0x53, 0x21, 0xc1, 0xa6,
+ 0x7b, 0x22, 0xc1, 0xa6, 0xa3, 0x23, 0xc1, 0xa6, 0xd1, 0x24, 0xc1, 0xa6,
+ 0xf9, 0x25, 0xc1, 0xa7, 0x21, 0x26, 0x41, 0xa7, 0x49, 0x1d, 0xc1, 0xa7,
+ 0x69, 0x1e, 0xc1, 0xa7, 0x8d, 0x1f, 0xc1, 0xa7, 0xb5, 0xc2, 0xe6, 0x4a,
+ 0x0a, 0x32, 0x30, 0xcf, 0x62, 0xe2, 0x01, 0x11, 0x99, 0xd2, 0x4e, 0x77,
+ 0x01, 0x4a, 0x00, 0xd3, 0x44, 0x7c, 0x01, 0x0d, 0xb1, 0x4f, 0x01, 0x93,
+ 0x41, 0xa7, 0xdd, 0xe0, 0x09, 0x07, 0x0f, 0xa8, 0x20, 0xc8, 0x52, 0x09,
+ 0x01, 0x4d, 0x21, 0xc8, 0x4e, 0x9b, 0x01, 0x4c, 0xf0, 0xc9, 0x18, 0x66,
+ 0x01, 0x10, 0xb8, 0xc2, 0x00, 0xd0, 0x08, 0xba, 0x21, 0x83, 0x08, 0xba,
+ 0x18, 0xc2, 0x00, 0xd0, 0x08, 0xba, 0x11, 0x83, 0x08, 0xba, 0x08, 0xc2,
+ 0x01, 0x5d, 0x08, 0xb8, 0xd1, 0xc2, 0x01, 0x30, 0x08, 0xb8, 0xb1, 0xc2,
+ 0x01, 0x6f, 0x08, 0xb8, 0x28, 0xc6, 0x00, 0x41, 0x08, 0xb9, 0xe9, 0xcc,
+ 0x82, 0x65, 0x08, 0xb9, 0xe0, 0x00, 0x41, 0xa7, 0xfb, 0xc4, 0x02, 0xb9,
+ 0x01, 0x1a, 0xf1, 0xc8, 0x52, 0x09, 0x01, 0x1a, 0xc0, 0xc9, 0x52, 0x08,
+ 0x01, 0x1b, 0xc0, 0xcb, 0x95, 0xf0, 0x01, 0x1b, 0x91, 0x45, 0x9a, 0x3d,
+ 0xc1, 0xa8, 0x3f, 0xc8, 0xba, 0x22, 0x01, 0x1a, 0xe8, 0x00, 0xc1, 0xa8,
+ 0x51, 0xca, 0x6c, 0xe2, 0x01, 0x1a, 0xb0, 0x00, 0xc1, 0xa8, 0x63, 0x43,
+ 0x33, 0x60, 0x41, 0xa8, 0x75, 0xc9, 0xae, 0x22, 0x01, 0x1b, 0x69, 0xcc,
+ 0x88, 0x89, 0x01, 0x1b, 0x18, 0xc9, 0x20, 0xa8, 0x01, 0x1b, 0x29, 0x42,
+ 0x00, 0x15, 0xc1, 0xa8, 0x81, 0xc8, 0x52, 0x09, 0x01, 0x1a, 0xe1, 0xc9,
+ 0x02, 0xfe, 0x01, 0x1a, 0x49, 0xc3, 0xba, 0x27, 0x01, 0x19, 0xf0, 0x46,
+ 0x00, 0xe2, 0xc1, 0xa8, 0x8d, 0xd9, 0x1f, 0xae, 0x01, 0x12, 0x30, 0x87,
+ 0x08, 0x59, 0xa9, 0xc2, 0x00, 0x4e, 0x08, 0x59, 0x48, 0xc3, 0x04, 0x65,
+ 0x08, 0x59, 0xa1, 0x0a, 0xc1, 0xa8, 0x9c, 0x87, 0x08, 0x59, 0x78, 0x87,
+ 0x08, 0x59, 0x59, 0xc2, 0x0c, 0x43, 0x08, 0x59, 0x50, 0xc2, 0x02, 0x6f,
+ 0x08, 0x59, 0x39, 0xc2, 0x0c, 0x43, 0x08, 0x59, 0x31, 0x87, 0x08, 0x59,
+ 0x29, 0x09, 0x41, 0xa8, 0xa6, 0xc2, 0x01, 0x7f, 0x08, 0x58, 0xe1, 0x87,
+ 0x08, 0x58, 0xd8, 0xc2, 0x01, 0x7f, 0x08, 0x58, 0xd1, 0x87, 0x08, 0x58,
+ 0xc9, 0xc2, 0x00, 0xac, 0x08, 0x58, 0xe8, 0xc2, 0x01, 0x7f, 0x08, 0x58,
+ 0xb1, 0xc2, 0x09, 0x3b, 0x08, 0x58, 0xa9, 0x87, 0x08, 0x58, 0xa0, 0xc2,
+ 0x00, 0x5f, 0x08, 0x58, 0x99, 0x87, 0x08, 0x58, 0x89, 0xc2, 0x0c, 0x43,
+ 0x08, 0x58, 0x90, 0x97, 0x08, 0x58, 0x78, 0x8b, 0x08, 0x58, 0x68, 0x91,
+ 0x08, 0x58, 0x58, 0x87, 0x08, 0x58, 0x48, 0x87, 0x08, 0x58, 0x33, 0x01,
+ 0xa8, 0xb6, 0x83, 0x08, 0x58, 0x0b, 0x01, 0xa8, 0xba, 0x90, 0x08, 0x58,
+ 0x21, 0x91, 0x08, 0x58, 0x10, 0x87, 0x08, 0x59, 0x01, 0xc2, 0x01, 0x7f,
+ 0x08, 0x59, 0x08, 0x87, 0x08, 0x59, 0x81, 0xc2, 0x01, 0x7f, 0x08, 0x59,
+ 0x90, 0x00, 0x41, 0xa8, 0xc2, 0x0a, 0xc1, 0xa8, 0xce, 0xc2, 0x00, 0xc4,
+ 0x08, 0x08, 0x83, 0x01, 0xa8, 0xe0, 0x19, 0x41, 0xa8, 0xe6, 0x0b, 0xc1,
+ 0xa8, 0xf6, 0x11, 0x41, 0xa9, 0x08, 0xc2, 0x22, 0xcc, 0x08, 0x08, 0x63,
+ 0x01, 0xa9, 0x1a, 0xc4, 0x18, 0x10, 0x08, 0x08, 0x6a, 0x01, 0xa9, 0x27,
+ 0x00, 0xc1, 0xa9, 0x34, 0x9b, 0x08, 0x08, 0xba, 0x01, 0xa9, 0x40, 0x00,
+ 0xc1, 0xa9, 0x46, 0xc2, 0x0d, 0x10, 0x08, 0x08, 0xc2, 0x01, 0xa9, 0x52,
+ 0xc9, 0xb3, 0x20, 0x08, 0x09, 0xb9, 0x08, 0xc1, 0xa9, 0x58, 0xce, 0x71,
+ 0x22, 0x08, 0x09, 0xc9, 0xcd, 0x7d, 0xb9, 0x08, 0x09, 0xd0, 0xc4, 0x02,
+ 0x6d, 0x08, 0x08, 0x01, 0xc3, 0x02, 0xa3, 0x08, 0x08, 0x08, 0x45, 0x00,
+ 0x2d, 0xc1, 0xa9, 0x64, 0x44, 0x00, 0x4a, 0x41, 0xa9, 0xa4, 0xc2, 0x02,
+ 0xae, 0x01, 0x2b, 0xcb, 0x01, 0xa9, 0xbc, 0xc4, 0x00, 0x49, 0x01, 0x2b,
+ 0xc3, 0x01, 0xa9, 0xc2, 0x42, 0x00, 0x58, 0xc1, 0xa9, 0xc8, 0xc5, 0x00,
+ 0x2c, 0x01, 0x2b, 0xd1, 0xc8, 0x00, 0x5f, 0x01, 0x28, 0x1b, 0x01, 0xa9,
+ 0xd7, 0x4f, 0x61, 0x5c, 0xc1, 0xa9, 0xdd, 0x4c, 0x52, 0xbb, 0xc1, 0xa9,
+ 0xe9, 0xca, 0x01, 0x68, 0x01, 0x28, 0x08, 0x45, 0x00, 0x5a, 0xc1, 0xa9,
+ 0xf5, 0x43, 0x11, 0x19, 0x41, 0xaa, 0x10, 0x4b, 0x99, 0xb8, 0xc1, 0xaa,
+ 0x28, 0x4b, 0x8e, 0x76, 0xc1, 0xaa, 0x3a, 0x4a, 0x11, 0x39, 0xc1, 0xaa,
+ 0x4c, 0x4a, 0x5c, 0x42, 0x41, 0xaa, 0x5e, 0x4b, 0x99, 0xb8, 0xc1, 0xaa,
+ 0x70, 0x4b, 0x8e, 0x76, 0xc1, 0xaa, 0x82, 0x4a, 0x5c, 0x42, 0xc1, 0xaa,
+ 0x94, 0x4a, 0x11, 0x39, 0x41, 0xaa, 0xac, 0x4f, 0x66, 0xc0, 0xc1, 0xaa,
+ 0xc4, 0xdc, 0x12, 0xc5, 0x01, 0x2a, 0x31, 0xdc, 0x13, 0xc1, 0x01, 0x2a,
+ 0x21, 0x4f, 0x12, 0xca, 0x41, 0xaa, 0xd6, 0xd8, 0x25, 0xa3, 0x01, 0x1d,
+ 0xb0, 0xc8, 0x1e, 0x3f, 0x01, 0x19, 0x09, 0xcc, 0x85, 0x71, 0x01, 0x5e,
+ 0x59, 0xd0, 0x1d, 0xec, 0x01, 0x72, 0xd9, 0xd1, 0x1a, 0x4a, 0x01, 0x72,
+ 0xe0, 0x05, 0xc1, 0xaa, 0xe8, 0xcc, 0x88, 0x65, 0x01, 0x71, 0x28, 0x05,
+ 0xc1, 0xaa, 0xf4, 0xcc, 0x88, 0x65, 0x01, 0x71, 0x20, 0xd0, 0x5d, 0x52,
+ 0x01, 0x4e, 0x91, 0xcf, 0x66, 0x66, 0x01, 0x4e, 0x88, 0xca, 0xa7, 0xec,
+ 0x0f, 0xaa, 0x79, 0xca, 0x9e, 0x78, 0x0f, 0xcb, 0x18, 0xc5, 0xdb, 0xd7,
+ 0x0f, 0xa6, 0x88, 0x97, 0x01, 0x8d, 0x00, 0x89, 0x01, 0x89, 0x5b, 0x01,
+ 0xab, 0x00, 0x90, 0x01, 0x89, 0x78, 0x8a, 0x01, 0x8d, 0xc8, 0x90, 0x01,
+ 0x89, 0x61, 0x97, 0x01, 0x8d, 0x19, 0x8a, 0x01, 0x8d, 0xc1, 0x99, 0x01,
+ 0x8d, 0xe0, 0x99, 0x01, 0x8d, 0xe8, 0x8b, 0x01, 0x8d, 0x10, 0x8a, 0x01,
+ 0x88, 0x99, 0x8b, 0x01, 0x8d, 0x09, 0x9b, 0x01, 0x8d, 0xd0, 0x8a, 0x01,
+ 0x88, 0xa0, 0x8a, 0x01, 0x88, 0xa8, 0x8b, 0x01, 0x88, 0xf3, 0x01, 0xab,
+ 0x04, 0x97, 0x01, 0x89, 0x03, 0x01, 0xab, 0x0a, 0x90, 0x01, 0x89, 0x13,
+ 0x01, 0xab, 0x10, 0x8f, 0x01, 0x8d, 0x81, 0x8a, 0x01, 0x8d, 0xf8, 0x97,
+ 0x01, 0x89, 0x09, 0xcf, 0x33, 0xad, 0x01, 0x89, 0x71, 0x91, 0x01, 0x8d,
+ 0x31, 0x10, 0xc1, 0xab, 0x18, 0x8f, 0x01, 0x8d, 0x89, 0x87, 0x01, 0x8d,
+ 0xf0, 0x8a, 0x01, 0x88, 0xe9, 0x8b, 0x01, 0x88, 0xf9, 0x90, 0x01, 0x89,
+ 0x1b, 0x01, 0xab, 0x20, 0x94, 0x01, 0x89, 0x31, 0x87, 0x01, 0x8d, 0x20,
+ 0x97, 0x01, 0x89, 0x49, 0x8a, 0x01, 0x89, 0x69, 0x94, 0x01, 0x8d, 0x41,
+ 0xc2, 0x1b, 0x88, 0x01, 0x8d, 0x53, 0x01, 0xab, 0x28, 0x8f, 0x01, 0x8d,
+ 0x60, 0xc2, 0x1b, 0x88, 0x01, 0x8d, 0x58, 0xa1, 0x0f, 0xd8, 0x43, 0x01,
+ 0xab, 0x2c, 0x9f, 0x0f, 0xd8, 0x13, 0x01, 0xab, 0x37, 0xa2, 0x0f, 0xd8,
+ 0x83, 0x01, 0xab, 0x50, 0xa0, 0x0f, 0xd8, 0x23, 0x01, 0xab, 0x54, 0xa3,
+ 0x0f, 0xd8, 0xf8, 0xa2, 0x0f, 0xd8, 0x9b, 0x01, 0xab, 0x65, 0xa1, 0x0f,
+ 0xd8, 0x5b, 0x01, 0xab, 0x69, 0xa3, 0x0f, 0xd9, 0x10, 0xa2, 0x0f, 0xd8,
+ 0x8b, 0x01, 0xab, 0x74, 0xa0, 0x0f, 0xd8, 0x2b, 0x01, 0xab, 0x78, 0xa3,
+ 0x0f, 0xd9, 0x01, 0xa1, 0x0f, 0xd8, 0x4a, 0x01, 0xab, 0x8a, 0xa3, 0x0f,
+ 0xd9, 0x68, 0xa3, 0x0f, 0xd9, 0x31, 0xa2, 0x0f, 0xd8, 0xb2, 0x01, 0xab,
+ 0x91, 0x05, 0xc1, 0xab, 0x95, 0x15, 0xc1, 0xab, 0xbc, 0x16, 0xc1, 0xab,
+ 0xff, 0x06, 0xc1, 0xac, 0x1d, 0x14, 0xc1, 0xac, 0x30, 0x0e, 0xc1, 0xac,
+ 0x42, 0xd6, 0x2c, 0xb2, 0x01, 0x3a, 0x99, 0x08, 0xc1, 0xac, 0x52, 0xc3,
+ 0xe6, 0x74, 0x01, 0x38, 0x91, 0x0f, 0xc1, 0xac, 0x5a, 0x17, 0xc1, 0xac,
+ 0x66, 0x0a, 0xc1, 0xac, 0x70, 0x12, 0xc1, 0xac, 0x7e, 0x43, 0x00, 0x5f,
+ 0xc1, 0xac, 0x90, 0xc6, 0xca, 0x91, 0x01, 0x4e, 0x99, 0xc7, 0xc9, 0x3b,
+ 0x01, 0x5e, 0x20, 0x4a, 0x14, 0xda, 0xc1, 0xac, 0x9c, 0x4f, 0x66, 0x93,
+ 0x41, 0xac, 0xae, 0xca, 0x9f, 0xc2, 0x0f, 0xa5, 0xb9, 0xc9, 0xb3, 0x32,
+ 0x0f, 0xa5, 0xb1, 0xcb, 0x99, 0x60, 0x0f, 0xa5, 0xa9, 0xc8, 0x77, 0x99,
+ 0x0f, 0xa5, 0xa0, 0xc2, 0x00, 0x45, 0x0f, 0x9c, 0x43, 0x01, 0xac, 0xc2,
+ 0x42, 0x00, 0x30, 0x41, 0xac, 0xc8, 0x0f, 0xc1, 0xac, 0xd8, 0xc3, 0x01,
+ 0xad, 0x00, 0xda, 0xd2, 0x01, 0xac, 0xe7, 0x4a, 0xa2, 0x24, 0xc1, 0xac,
+ 0xed, 0x4b, 0x95, 0x40, 0xc1, 0xac, 0xf9, 0x4a, 0x51, 0x89, 0xc1, 0xad,
+ 0x05, 0x06, 0x41, 0xad, 0x29, 0x42, 0x00, 0xb0, 0xc1, 0xad, 0x43, 0xc4,
+ 0xde, 0xcb, 0x00, 0xda, 0xf0, 0xc4, 0x26, 0x78, 0x00, 0xda, 0xc9, 0xc5,
+ 0x06, 0xdb, 0x00, 0xda, 0xc1, 0x15, 0xc1, 0xad, 0x4f, 0x08, 0xc1, 0xad,
+ 0x5b, 0x16, 0xc1, 0xad, 0x67, 0xc3, 0x05, 0x14, 0x00, 0xda, 0x89, 0xc4,
+ 0x15, 0xe7, 0x00, 0xda, 0x80, 0x03, 0xc1, 0xad, 0x73, 0xc9, 0xa9, 0xfc,
+ 0x00, 0xda, 0x51, 0xc8, 0xbe, 0x12, 0x00, 0xda, 0x49, 0x07, 0xc1, 0xad,
+ 0x8e, 0x16, 0xc1, 0xad, 0x9a, 0x0d, 0xc1, 0xad, 0xa7, 0xc2, 0x00, 0xd0,
+ 0x00, 0xd9, 0x99, 0xc2, 0x0d, 0xf6, 0x00, 0xd9, 0x93, 0x01, 0xad, 0xb4,
+ 0xc2, 0x01, 0x4a, 0x00, 0xd9, 0x79, 0xc2, 0x00, 0xdb, 0x00, 0xd9, 0x73,
+ 0x01, 0xad, 0xba, 0xc2, 0x00, 0x39, 0x00, 0xd9, 0x6b, 0x01, 0xad, 0xc3,
+ 0xc2, 0x19, 0x2c, 0x00, 0xd9, 0x61, 0xc2, 0x01, 0xc3, 0x00, 0xd9, 0x59,
+ 0xc2, 0x01, 0x5d, 0x00, 0xd9, 0x4b, 0x01, 0xad, 0xcc, 0xc2, 0x00, 0xb0,
+ 0x00, 0xd9, 0x3b, 0x01, 0xad, 0xd2, 0x10, 0xc1, 0xad, 0xd8, 0xc2, 0x0e,
+ 0x9a, 0x00, 0xd9, 0x23, 0x01, 0xad, 0xeb, 0xc2, 0x25, 0x3b, 0x00, 0xd8,
+ 0xd3, 0x01, 0xad, 0xf1, 0xc2, 0x00, 0x64, 0x00, 0xd8, 0xc3, 0x01, 0xad,
+ 0xf7, 0xc2, 0x01, 0x30, 0x00, 0xd8, 0xab, 0x01, 0xad, 0xfd, 0xc5, 0xde,
+ 0x0c, 0x00, 0xd8, 0x8b, 0x01, 0xae, 0x03, 0xc5, 0xdb, 0x5f, 0x00, 0xd8,
+ 0x4b, 0x01, 0xae, 0x09, 0xc5, 0xd7, 0xbd, 0x00, 0xd8, 0x3a, 0x01, 0xae,
+ 0x0f, 0xc5, 0xd8, 0xbc, 0x00, 0xda, 0x13, 0x01, 0xae, 0x15, 0x16, 0xc1,
+ 0xae, 0x1b, 0xc8, 0xb5, 0xaa, 0x00, 0xd9, 0xe3, 0x01, 0xae, 0x2a, 0xc7,
+ 0xc4, 0x79, 0x00, 0xd9, 0xd3, 0x01, 0xae, 0x30, 0xc4, 0xc5, 0x6e, 0x00,
+ 0xd9, 0xc3, 0x01, 0xae, 0x36, 0xc3, 0x96, 0x9c, 0x00, 0xd9, 0xb2, 0x01,
+ 0xae, 0x3c, 0xc7, 0xc3, 0x8b, 0x00, 0xd9, 0xa1, 0xc5, 0xd4, 0x75, 0x00,
+ 0xd8, 0x21, 0xc6, 0xcf, 0x59, 0x00, 0xd8, 0x19, 0xc5, 0xde, 0x48, 0x00,
+ 0xd8, 0x11, 0x44, 0xdf, 0x3f, 0x41, 0xae, 0x42, 0x44, 0x08, 0xcb, 0xc1,
+ 0xae, 0x4e, 0x43, 0x01, 0xc8, 0xc1, 0xae, 0x5a, 0xc8, 0xaf, 0x82, 0x0b,
+ 0x57, 0x90, 0x8b, 0x0b, 0x57, 0x69, 0x87, 0x0b, 0x57, 0x63, 0x01, 0xae,
+ 0x66, 0x97, 0x0b, 0x57, 0x53, 0x01, 0xae, 0x70, 0x91, 0x0b, 0x57, 0x43,
+ 0x01, 0xae, 0x76, 0x83, 0x0b, 0x57, 0x39, 0xc2, 0x01, 0x4a, 0x0b, 0x56,
+ 0xdb, 0x01, 0xae, 0x7a, 0xc2, 0x00, 0xb0, 0x0b, 0x57, 0x29, 0x1b, 0xc1,
+ 0xae, 0x80, 0xc2, 0x5d, 0xb3, 0x0b, 0x57, 0x19, 0xc2, 0x01, 0x5d, 0x0b,
+ 0x57, 0x11, 0xc2, 0x00, 0xf1, 0x0b, 0x57, 0x09, 0xc2, 0x00, 0x89, 0x0b,
+ 0x56, 0xf9, 0x06, 0xc1, 0xae, 0x8c, 0x09, 0xc1, 0xae, 0x96, 0xc2, 0x01,
+ 0x6c, 0x0b, 0x56, 0xe1, 0xc4, 0xdf, 0xdf, 0x0b, 0x56, 0xd1, 0xc2, 0x00,
+ 0x81, 0x0b, 0x56, 0xc9, 0x0d, 0xc1, 0xae, 0xa2, 0xc3, 0x00, 0x50, 0x0b,
+ 0x56, 0xa1, 0xc2, 0x00, 0x87, 0x0b, 0x56, 0x99, 0xc2, 0x00, 0x40, 0x0b,
+ 0x56, 0x90, 0x45, 0xd6, 0x6e, 0xc1, 0xae, 0xac, 0x83, 0x05, 0x35, 0x59,
+ 0x07, 0xc1, 0xae, 0xd0, 0x17, 0xc1, 0xae, 0xda, 0x8b, 0x05, 0x36, 0xe8,
+ 0x83, 0x05, 0x35, 0x09, 0x97, 0x05, 0x35, 0x19, 0xc3, 0x17, 0x29, 0x05,
+ 0x35, 0xd1, 0x07, 0xc1, 0xae, 0xe4, 0x91, 0x05, 0x36, 0xfb, 0x01, 0xae,
+ 0xf2, 0x8b, 0x05, 0x37, 0x29, 0xc2, 0x00, 0xb0, 0x05, 0x37, 0x48, 0x07,
+ 0xc1, 0xae, 0xfe, 0x0b, 0xc1, 0xaf, 0x0c, 0x97, 0x05, 0x36, 0x61, 0xc2,
+ 0x10, 0x11, 0x05, 0x36, 0x88, 0x03, 0xc1, 0xaf, 0x16, 0x8b, 0x05, 0x37,
+ 0x21, 0x07, 0x41, 0xaf, 0x1e, 0xc2, 0x16, 0x5a, 0x05, 0x35, 0x41, 0xc3,
+ 0x4f, 0x43, 0x05, 0x35, 0x89, 0x0c, 0xc1, 0xaf, 0x26, 0x97, 0x05, 0x35,
+ 0xeb, 0x01, 0xaf, 0x38, 0xc3, 0x01, 0xe2, 0x05, 0x36, 0x19, 0x16, 0xc1,
+ 0xaf, 0x3e, 0x8b, 0x05, 0x36, 0x79, 0x09, 0xc1, 0xaf, 0x4a, 0x83, 0x05,
+ 0x36, 0xd8, 0x83, 0x05, 0x35, 0x51, 0xc4, 0xe2, 0x9f, 0x05, 0x35, 0x71,
+ 0x97, 0x05, 0x36, 0x69, 0x8b, 0x05, 0x36, 0xe1, 0xc2, 0x7f, 0xc0, 0x05,
+ 0x36, 0xf0, 0x07, 0xc1, 0xaf, 0x5a, 0x97, 0x05, 0x35, 0xa9, 0x8b, 0x05,
+ 0x36, 0x71, 0x04, 0xc1, 0xaf, 0x64, 0x83, 0x05, 0x37, 0x19, 0x91, 0x05,
+ 0x37, 0x30, 0xc2, 0x5d, 0xa1, 0x05, 0x35, 0xa1, 0x0a, 0xc1, 0xaf, 0x70,
+ 0x8b, 0x05, 0x35, 0xb9, 0xc3, 0xd7, 0xe2, 0x05, 0x35, 0xc9, 0xc4, 0xbf,
+ 0xf1, 0x05, 0x37, 0x60, 0xc2, 0x7f, 0xc0, 0x05, 0x35, 0xf9, 0xc2, 0x92,
+ 0xb5, 0x05, 0x36, 0x09, 0x83, 0x05, 0x36, 0x10, 0xc2, 0x0f, 0xe1, 0x05,
+ 0x36, 0x49, 0x83, 0x05, 0x36, 0xd0, 0xc2, 0x02, 0xe0, 0x05, 0x36, 0x59,
+ 0x97, 0x05, 0x36, 0xc1, 0xc2, 0x00, 0x7a, 0x05, 0x36, 0xc9, 0xc5, 0xd8,
+ 0xe9, 0x05, 0x37, 0x68, 0x4c, 0x85, 0x4d, 0xc1, 0xaf, 0x84, 0xc2, 0x01,
+ 0xc3, 0x05, 0x37, 0xa8, 0xe0, 0x06, 0x87, 0x01, 0x3d, 0x58, 0xcb, 0x96,
+ 0x74, 0x0f, 0xac, 0x11, 0xda, 0x1c, 0xee, 0x0f, 0xa8, 0xc8, 0xc4, 0x40,
+ 0x89, 0x00, 0x00, 0x41, 0x5a, 0x1a, 0x30, 0x41, 0xaf, 0x90, 0x4c, 0x8a,
+ 0xc9, 0xc1, 0xaf, 0x9c, 0xc9, 0xad, 0xc8, 0x00, 0xdf, 0x30, 0xc7, 0xc6,
+ 0xc5, 0x00, 0xdf, 0x99, 0xc5, 0xc8, 0x5d, 0x00, 0xdf, 0x90, 0x8a, 0x00,
+ 0xdf, 0x89, 0xc2, 0x00, 0x75, 0x00, 0xdf, 0x80, 0x97, 0x00, 0xdf, 0x73,
+ 0x01, 0xaf, 0xac, 0x45, 0xc6, 0xd3, 0xc1, 0xaf, 0xb2, 0x91, 0x00, 0xdf,
+ 0x61, 0x8b, 0x00, 0xdf, 0x51, 0x87, 0x00, 0xdf, 0x3b, 0x01, 0xaf, 0xba,
+ 0xc8, 0xbf, 0x0a, 0x00, 0xdf, 0x40, 0x97, 0x00, 0xdf, 0x29, 0x8b, 0x00,
+ 0xdf, 0x21, 0x0f, 0xc1, 0xaf, 0xbe, 0x10, 0xc1, 0xaf, 0xcb, 0xc2, 0x00,
+ 0x64, 0x00, 0xdf, 0x09, 0x15, 0xc1, 0xaf, 0xe7, 0xc2, 0x00, 0xdb, 0x00,
+ 0xde, 0xf1, 0xc2, 0x19, 0x2c, 0x00, 0xde, 0xd9, 0xc2, 0x00, 0x39, 0x00,
+ 0xde, 0x91, 0xc2, 0x0e, 0x9a, 0x00, 0xde, 0x89, 0xc2, 0x25, 0x3b, 0x00,
+ 0xde, 0x81, 0xc2, 0x01, 0x30, 0x00, 0xde, 0x71, 0xc2, 0x00, 0xb0, 0x00,
+ 0xde, 0x3b, 0x01, 0xaf, 0xf7, 0xc2, 0x01, 0x4a, 0x00, 0xde, 0x59, 0xc7,
+ 0xc6, 0xd3, 0x00, 0xde, 0x31, 0xc2, 0x01, 0x5d, 0x00, 0xde, 0x29, 0xc2,
+ 0x00, 0xd0, 0x00, 0xde, 0x11, 0x83, 0x00, 0xde, 0x00, 0x0d, 0xc1, 0xaf,
+ 0xfd, 0xc2, 0x00, 0xd0, 0x00, 0x4d, 0xc9, 0x15, 0xc1, 0xb0, 0x0a, 0xc2,
+ 0x00, 0xdb, 0x00, 0x4d, 0x91, 0x14, 0xc1, 0xb0, 0x1a, 0x1b, 0xc1, 0xb0,
+ 0x2d, 0xc2, 0x01, 0xc3, 0x00, 0x4d, 0x71, 0x04, 0xc1, 0xb0, 0x37, 0x12,
+ 0xc1, 0xb0, 0x41, 0x10, 0xc1, 0xb0, 0x4b, 0x06, 0xc1, 0xb0, 0x61, 0x16,
+ 0xc1, 0xb0, 0x6f, 0x0c, 0xc1, 0xb0, 0x7d, 0x05, 0xc1, 0xb0, 0x87, 0x09,
+ 0xc1, 0xb0, 0x91, 0x83, 0x00, 0x4c, 0x2b, 0x01, 0xb0, 0x9b, 0x91, 0x00,
+ 0x4c, 0x99, 0x8b, 0x00, 0x4c, 0x3b, 0x01, 0xb0, 0xa7, 0x97, 0x00, 0x4c,
+ 0x4b, 0x01, 0xb0, 0xab, 0x18, 0xc1, 0xb0, 0xaf, 0x87, 0x00, 0x4c, 0x78,
+ 0x44, 0x00, 0xbb, 0xc1, 0xb0, 0xbb, 0xca, 0xa0, 0x26, 0x00, 0x4f, 0xf0,
+ 0x03, 0xc1, 0xb0, 0xd1, 0x91, 0x00, 0x4e, 0x59, 0x87, 0x00, 0x4e, 0x39,
+ 0x48, 0xb2, 0x2d, 0xc1, 0xb0, 0xdd, 0x97, 0x00, 0x4e, 0x0b, 0x01, 0xb0,
+ 0xeb, 0x8b, 0x00, 0x4d, 0xfa, 0x01, 0xb0, 0xef, 0xcd, 0x73, 0x0d, 0x00,
+ 0x4e, 0xb9, 0xc3, 0x7c, 0x50, 0x00, 0x4c, 0x01, 0xd0, 0x50, 0xcf, 0x00,
+ 0x4f, 0xe8, 0xc4, 0x15, 0xe7, 0x00, 0x4f, 0x31, 0xc3, 0x05, 0x14, 0x00,
+ 0x4f, 0x39, 0x16, 0xc1, 0xb0, 0xf3, 0x08, 0xc1, 0xb0, 0xff, 0x15, 0xc1,
+ 0xb1, 0x0b, 0xc5, 0x06, 0xdb, 0x00, 0x4f, 0x71, 0xc4, 0x26, 0x78, 0x00,
+ 0x4f, 0x78, 0xc4, 0x01, 0xc3, 0x00, 0x4f, 0x91, 0xc4, 0x00, 0xba, 0x00,
+ 0x4f, 0x98, 0x4a, 0x78, 0x64, 0xc1, 0xb1, 0x17, 0xd3, 0x44, 0x8f, 0x00,
+ 0x4f, 0xc8, 0xe0, 0x06, 0x07, 0x01, 0x5a, 0xf0, 0xc2, 0x10, 0x11, 0x00,
+ 0xd0, 0xd9, 0x91, 0x00, 0xd0, 0xd1, 0x87, 0x00, 0xd0, 0xc9, 0x97, 0x00,
+ 0xd0, 0xc1, 0x8b, 0x00, 0xd0, 0xb8, 0xc2, 0x00, 0xd0, 0x00, 0xd0, 0xb1,
+ 0x83, 0x00, 0xd0, 0xa9, 0xc2, 0x0d, 0xf6, 0x00, 0xd0, 0xa1, 0xc2, 0x02,
+ 0x41, 0x00, 0xd0, 0x99, 0xc2, 0x00, 0xdb, 0x00, 0xd0, 0x91, 0xc2, 0x00,
+ 0x39, 0x00, 0xd0, 0x89, 0xc2, 0x19, 0x2c, 0x00, 0xd0, 0x81, 0x10, 0xc1,
+ 0xb1, 0x2a, 0xc2, 0x25, 0x3b, 0x00, 0xd0, 0x69, 0xc2, 0x00, 0x64, 0x00,
+ 0xd0, 0x61, 0xc2, 0x0e, 0x9a, 0x00, 0xd0, 0x49, 0xc2, 0x01, 0x6f, 0x00,
+ 0xd0, 0x41, 0x0f, 0xc1, 0xb1, 0x3c, 0xc2, 0x01, 0x5d, 0x00, 0xd0, 0x29,
+ 0xc2, 0x00, 0xb0, 0x00, 0xd0, 0x21, 0xc2, 0x01, 0x30, 0x00, 0xd0, 0x09,
+ 0xc2, 0x02, 0x2b, 0x00, 0xd0, 0x00, 0x83, 0x00, 0xba, 0x41, 0xc2, 0x01,
+ 0x30, 0x00, 0xba, 0x28, 0x45, 0xda, 0xf1, 0xc1, 0xb1, 0x46, 0xc5, 0xd5,
+ 0x4c, 0x01, 0x40, 0x00, 0xc6, 0x57, 0xec, 0x08, 0x83, 0xf9, 0xc3, 0x05,
+ 0x14, 0x08, 0x82, 0x93, 0x01, 0xb1, 0x7b, 0xc4, 0x26, 0x78, 0x08, 0x82,
+ 0xd3, 0x01, 0xb1, 0x7f, 0xc5, 0x06, 0xdb, 0x08, 0x82, 0xcb, 0x01, 0xb1,
+ 0x85, 0x15, 0xc1, 0xb1, 0x89, 0x08, 0xc1, 0xb1, 0x9b, 0x16, 0x41, 0xb1,
+ 0xa3, 0x91, 0x08, 0x80, 0x8b, 0x01, 0xb1, 0xb1, 0x0e, 0xc1, 0xb1, 0xb7,
+ 0xc2, 0x00, 0xd0, 0x08, 0x81, 0x99, 0xc2, 0x00, 0x39, 0x08, 0x81, 0x69,
+ 0xc2, 0x19, 0x2c, 0x08, 0x81, 0x61, 0xc2, 0x01, 0xc3, 0x08, 0x81, 0x59,
+ 0x04, 0xc1, 0xb1, 0xc1, 0x12, 0xc1, 0xb1, 0xcb, 0x10, 0xc1, 0xb1, 0xd5,
+ 0x06, 0xc1, 0xb1, 0xeb, 0x16, 0xc1, 0xb1, 0xf9, 0x0c, 0xc1, 0xb2, 0x07,
+ 0x05, 0xc1, 0xb2, 0x11, 0x09, 0xc1, 0xb2, 0x1b, 0x0d, 0xc1, 0xb2, 0x25,
+ 0x83, 0x08, 0x80, 0x2b, 0x01, 0xb2, 0x2f, 0x87, 0x08, 0x80, 0x79, 0x18,
+ 0xc1, 0xb2, 0x3b, 0x97, 0x08, 0x80, 0x4b, 0x01, 0xb2, 0x45, 0x8b, 0x08,
+ 0x80, 0x3b, 0x01, 0xb2, 0x49, 0x15, 0x41, 0xb2, 0x4d, 0x4a, 0x6f, 0xc8,
+ 0xc1, 0xb2, 0x5d, 0xc5, 0x1e, 0x96, 0x08, 0x82, 0x30, 0xd0, 0x5c, 0x82,
+ 0x08, 0x83, 0x81, 0xcb, 0x93, 0xf6, 0x08, 0x80, 0x21, 0xcb, 0x8f, 0xe1,
+ 0x08, 0x80, 0x19, 0xcb, 0x1e, 0x89, 0x08, 0x80, 0x01, 0xc8, 0x14, 0x38,
+ 0x08, 0x80, 0x09, 0xc7, 0x40, 0xe5, 0x08, 0x80, 0x10, 0x45, 0x09, 0x98,
+ 0xc1, 0xb2, 0x86, 0xcb, 0x97, 0xf5, 0x08, 0x82, 0x41, 0xc4, 0x19, 0x53,
+ 0x08, 0x82, 0x38, 0x0e, 0xc1, 0xb2, 0xaa, 0xcc, 0x80, 0xa9, 0x08, 0x82,
+ 0x61, 0x42, 0x00, 0x58, 0x41, 0xb2, 0xb6, 0x42, 0x0f, 0x7b, 0xc1, 0xb2,
+ 0xc0, 0x4a, 0x9a, 0xb8, 0x41, 0xb2, 0xcc, 0xc6, 0x2e, 0x82, 0x0e, 0x86,
+ 0xc9, 0xc6, 0xca, 0x9d, 0x0e, 0x86, 0xc0, 0x00, 0x41, 0xb2, 0xd8, 0x00,
+ 0xc1, 0xb2, 0xe4, 0xc2, 0x01, 0x6f, 0x0e, 0x80, 0x82, 0x01, 0xb2, 0xf0,
+ 0xc5, 0x57, 0xbd, 0x0e, 0x84, 0x49, 0xc6, 0xad, 0x17, 0x0e, 0x82, 0x51,
+ 0xc6, 0xcb, 0xf9, 0x0e, 0x81, 0xd2, 0x01, 0xb2, 0xf4, 0x44, 0xe1, 0x8b,
+ 0xc1, 0xb2, 0xfa, 0xc6, 0xcf, 0x11, 0x0e, 0x80, 0x60, 0x43, 0x0f, 0xf8,
+ 0xc1, 0xb3, 0x02, 0xc5, 0xd5, 0x88, 0x0e, 0x80, 0x38, 0x46, 0xd0, 0xc1,
+ 0xc1, 0xb3, 0x0e, 0x42, 0x0f, 0x7b, 0x41, 0xb3, 0x38, 0x11, 0xc1, 0xb3,
+ 0x42, 0xc2, 0x01, 0x0f, 0x0e, 0x84, 0x29, 0x45, 0xdd, 0xa3, 0x41, 0xb3,
+ 0x54, 0x45, 0xd7, 0x81, 0xc1, 0xb3, 0x60, 0x44, 0xcf, 0x3b, 0xc1, 0xb3,
+ 0x6c, 0x42, 0x00, 0x4e, 0xc1, 0xb3, 0x76, 0x43, 0x07, 0xc5, 0x41, 0xb3,
+ 0x82, 0x46, 0xd2, 0x7d, 0xc1, 0xb3, 0x8c, 0xca, 0x9b, 0x9e, 0x0e, 0x81,
+ 0x40, 0xc4, 0x1a, 0x73, 0x0e, 0x87, 0x41, 0xc5, 0xd6, 0x00, 0x0e, 0x83,
+ 0xf3, 0x01, 0xb3, 0x98, 0xca, 0x9a, 0x68, 0x0e, 0x82, 0x20, 0xc6, 0xcb,
+ 0xa5, 0x0e, 0x87, 0x13, 0x01, 0xb3, 0x9e, 0xc7, 0xc0, 0xf9, 0x0e, 0x86,
+ 0xf2, 0x01, 0xb3, 0xa2, 0xc4, 0x77, 0x35, 0x0e, 0x83, 0x48, 0xc3, 0x05,
+ 0xa9, 0x0e, 0x83, 0x33, 0x01, 0xb3, 0xa6, 0x10, 0x41, 0xb3, 0xac, 0xca,
+ 0x9e, 0xd2, 0x0e, 0x87, 0x39, 0x09, 0xc1, 0xb3, 0xb8, 0x03, 0xc1, 0xb3,
+ 0xc7, 0x45, 0x1a, 0x57, 0xc1, 0xb3, 0xd3, 0xc3, 0x1f, 0x1d, 0x0e, 0x84,
+ 0x32, 0x01, 0xb3, 0xe9, 0x44, 0x1a, 0x13, 0xc1, 0xb3, 0xef, 0x42, 0x00,
+ 0xbd, 0x41, 0xb4, 0x07, 0x11, 0xc1, 0xb4, 0x13, 0xc4, 0x7a, 0x04, 0x0e,
+ 0x82, 0x80, 0xd4, 0x39, 0x30, 0x0e, 0x86, 0x61, 0xd6, 0x2e, 0x80, 0x0e,
+ 0x86, 0x59, 0x10, 0xc1, 0xb4, 0x22, 0x48, 0x1a, 0x02, 0xc1, 0xb4, 0x2e,
+ 0x4f, 0x67, 0x47, 0xc1, 0xb4, 0x3a, 0x4a, 0xa3, 0x6e, 0xc1, 0xb4, 0x46,
+ 0xc8, 0x9c, 0xe0, 0x0e, 0x81, 0xa2, 0x01, 0xb4, 0x62, 0xc8, 0xba, 0x3a,
+ 0x0e, 0x85, 0x81, 0xca, 0xa2, 0xec, 0x0e, 0x85, 0x79, 0xcb, 0x92, 0x33,
+ 0x0e, 0x85, 0x70, 0xc6, 0xce, 0xd5, 0x0e, 0x86, 0x51, 0xc6, 0xd1, 0x63,
+ 0x0e, 0x86, 0x49, 0xc5, 0xd6, 0x9b, 0x0e, 0x86, 0x40, 0xc3, 0x63, 0x2b,
+ 0x0e, 0x83, 0x39, 0xc8, 0x9c, 0xe0, 0x0e, 0x81, 0xd8, 0x8b, 0x0e, 0x82,
+ 0xb1, 0xc2, 0x00, 0x45, 0x0e, 0x80, 0xc0, 0x08, 0xc1, 0xb4, 0x68, 0xc7,
+ 0xc2, 0x9d, 0x0e, 0x84, 0xc0, 0xd5, 0x32, 0xc0, 0x0e, 0x85, 0x61, 0x43,
+ 0x01, 0x55, 0x41, 0xb4, 0x74, 0xd4, 0x3d, 0xcc, 0x0e, 0x85, 0xb1, 0xc7,
+ 0xc3, 0x45, 0x0e, 0x83, 0xd8, 0xcd, 0x79, 0x75, 0x0e, 0x83, 0xa1, 0xcb,
+ 0x94, 0x17, 0x0e, 0x83, 0x00, 0x12, 0xc1, 0xb4, 0x80, 0xcb, 0x94, 0xbc,
+ 0x0e, 0x85, 0x89, 0xcd, 0x7a, 0xfb, 0x0e, 0x85, 0x51, 0x16, 0xc1, 0xb4,
+ 0x8c, 0x45, 0xd9, 0xed, 0xc1, 0xb4, 0x98, 0xce, 0x6d, 0x5c, 0x0e, 0x85,
+ 0x20, 0x0b, 0xc1, 0xb4, 0xa4, 0x45, 0xaa, 0x6b, 0x41, 0xb4, 0xb4, 0xc6,
+ 0xd0, 0xf1, 0x0e, 0x84, 0x41, 0xc5, 0x13, 0x43, 0x0e, 0x81, 0x89, 0xc4,
+ 0xae, 0x15, 0x0e, 0x80, 0x78, 0x07, 0xc1, 0xb4, 0xca, 0xc3, 0x02, 0x44,
+ 0x0e, 0x80, 0xa0, 0x45, 0x7c, 0xbe, 0xc1, 0xb4, 0xd9, 0xc3, 0xbe, 0x04,
+ 0x0e, 0x81, 0x70, 0xc3, 0x63, 0x2b, 0x0e, 0x83, 0xa9, 0xc8, 0x9c, 0xe0,
+ 0x0e, 0x81, 0x60, 0x00, 0xc1, 0xb4, 0xef, 0xca, 0x9c, 0xde, 0x0e, 0x81,
+ 0x00, 0xc3, 0x63, 0x2b, 0x0e, 0x82, 0x39, 0xc8, 0x9c, 0xe0, 0x0e, 0x80,
+ 0xa8, 0x45, 0xb9, 0x3c, 0xc1, 0xb5, 0x01, 0x0e, 0x41, 0xb5, 0x1a, 0x42,
+ 0x06, 0x4e, 0xc1, 0xb5, 0x24, 0xc5, 0xd8, 0x85, 0x0e, 0x80, 0xf0, 0xc3,
+ 0x63, 0x2b, 0x0e, 0x82, 0xc9, 0xc8, 0x9c, 0xe0, 0x0e, 0x81, 0x30, 0xc6,
+ 0xd0, 0x0d, 0x0e, 0x81, 0xc3, 0x01, 0xb5, 0x33, 0x43, 0x13, 0x4f, 0xc1,
+ 0xb5, 0x39, 0xc9, 0x94, 0x92, 0x0e, 0x80, 0x10, 0x00, 0xc1, 0xb5, 0x43,
+ 0xca, 0x9c, 0xde, 0x0e, 0x81, 0x08, 0xc2, 0x0d, 0x10, 0x08, 0xe3, 0x48,
+ 0xc2, 0x0d, 0x10, 0x08, 0xe3, 0x40, 0xc3, 0x45, 0x6b, 0x08, 0xe3, 0x39,
+ 0xc2, 0x00, 0x5f, 0x08, 0xe2, 0xf0, 0xc3, 0x0d, 0x0f, 0x08, 0xe3, 0x31,
+ 0xc2, 0x00, 0x33, 0x08, 0xe2, 0xe8, 0xc4, 0x0d, 0x0e, 0x08, 0xe3, 0x29,
+ 0xc3, 0x02, 0xdf, 0x08, 0xe2, 0xe0, 0xc4, 0x18, 0x12, 0x08, 0xe3, 0x21,
+ 0x91, 0x08, 0xe2, 0xd8, 0xc4, 0x18, 0x10, 0x08, 0xe2, 0xb9, 0xc2, 0x22,
+ 0xcc, 0x08, 0xe2, 0xb0, 0xc3, 0x0d, 0x14, 0x08, 0xe2, 0xa9, 0xc3, 0x09,
+ 0x9e, 0x08, 0xe2, 0xa0, 0xc4, 0x02, 0xde, 0x08, 0xe2, 0x99, 0xc2, 0x02,
+ 0xa0, 0x08, 0xe2, 0x90, 0x94, 0x08, 0xe1, 0xa8, 0x8e, 0x08, 0xe0, 0x41,
+ 0x94, 0x08, 0xe0, 0x32, 0x01, 0xb5, 0x55, 0xc2, 0x00, 0xd0, 0x08, 0xe0,
+ 0xd9, 0x83, 0x08, 0xe0, 0xd0, 0xc2, 0x00, 0xd0, 0x08, 0xe0, 0xc9, 0x83,
+ 0x08, 0xe0, 0xc0, 0x46, 0x01, 0x92, 0xc1, 0xb5, 0x59, 0x04, 0xc1, 0xb5,
+ 0x65, 0xd5, 0x37, 0x6d, 0x01, 0x2e, 0xf9, 0xc6, 0xcc, 0x1d, 0x0f, 0xac,
+ 0x69, 0x12, 0xc1, 0xb5, 0x71, 0xcc, 0x85, 0x7d, 0x0f, 0xac, 0x59, 0xe0,
+ 0x05, 0xe7, 0x01, 0x49, 0xf8, 0x46, 0x01, 0x92, 0xc1, 0xb5, 0x7d, 0xcf,
+ 0x68, 0x37, 0x01, 0x3e, 0x99, 0x15, 0xc1, 0xb5, 0x89, 0xda, 0x1a, 0x7e,
+ 0x01, 0x3a, 0x79, 0xc6, 0xcd, 0x8b, 0x01, 0x38, 0x71, 0xd5, 0x37, 0x6d,
+ 0x01, 0x2e, 0xf1, 0x4f, 0x60, 0x6c, 0x41, 0xb5, 0x95, 0xdb, 0x14, 0xf4,
+ 0x0f, 0xdb, 0x79, 0x45, 0x02, 0xde, 0x41, 0xb5, 0xa1, 0xc6, 0x02, 0xd1,
+ 0x01, 0x2f, 0x09, 0xd4, 0x39, 0x94, 0x01, 0x2e, 0xd9, 0xc5, 0x06, 0xe2,
+ 0x01, 0x2c, 0x21, 0xcc, 0x01, 0xdb, 0x0f, 0xdc, 0x78, 0xcd, 0x15, 0x02,
+ 0x01, 0x2c, 0x11, 0xcc, 0x06, 0xdb, 0x01, 0x2c, 0x08, 0xc6, 0xcd, 0x4f,
+ 0x0f, 0xd5, 0x59, 0xd0, 0x54, 0xdc, 0x0f, 0xa8, 0x28, 0xc9, 0x33, 0xad,
+ 0x01, 0x72, 0x40, 0xce, 0x6f, 0xfc, 0x01, 0x3f, 0xf9, 0xcc, 0x82, 0x35,
+ 0x01, 0x3f, 0xcb, 0x01, 0xb5, 0xad, 0xc5, 0x01, 0xa2, 0x01, 0x3f, 0xb2,
+ 0x01, 0xb5, 0xb3, 0xcc, 0x82, 0x35, 0x01, 0x3f, 0xc3, 0x01, 0xb5, 0xb9,
+ 0xc5, 0x01, 0xa2, 0x01, 0x3f, 0xab, 0x01, 0xb5, 0xbf, 0xce, 0x6f, 0xfc,
+ 0x01, 0x59, 0x98, 0x46, 0x00, 0x2c, 0xc1, 0xb5, 0xc5, 0xc4, 0x32, 0xbc,
+ 0x01, 0x3e, 0xf0, 0xe0, 0x00, 0x47, 0x01, 0x57, 0x30, 0x45, 0x00, 0x8c,
+ 0xc1, 0xb5, 0xd1, 0xd7, 0x2a, 0x99, 0x01, 0x52, 0xc8, 0xcf, 0x64, 0xd1,
+ 0x01, 0x52, 0xe1, 0xcb, 0x98, 0x42, 0x01, 0x52, 0xd1, 0x42, 0x00, 0x58,
+ 0xc1, 0xb5, 0xe3, 0xc8, 0x52, 0x09, 0x01, 0x52, 0xf8, 0x10, 0xc1, 0xb5,
+ 0xef, 0x14, 0x41, 0xb5, 0xf9, 0x43, 0x01, 0xd0, 0xc1, 0xb6, 0x05, 0xd5,
+ 0x36, 0xb0, 0x0f, 0xab, 0xd8, 0x45, 0x00, 0x2d, 0xc1, 0xb6, 0x2c, 0xd6,
+ 0x29, 0x86, 0x01, 0x70, 0x60, 0xc9, 0x9b, 0x77, 0x01, 0x3e, 0xa9, 0x43,
+ 0x02, 0x6f, 0x41, 0xb6, 0x5a, 0xd5, 0x32, 0x18, 0x01, 0x3e, 0x29, 0x07,
+ 0xc1, 0xb6, 0x66, 0xcd, 0x25, 0xae, 0x00, 0x02, 0xdb, 0x01, 0xb6, 0x72,
+ 0x0b, 0xc1, 0xb6, 0x76, 0xcc, 0x6f, 0xb7, 0x0f, 0xaf, 0x41, 0xd3, 0x1f,
+ 0xcd, 0x01, 0x70, 0x10, 0xcb, 0x90, 0x86, 0x01, 0x36, 0xe1, 0xcc, 0x00,
+ 0x33, 0x00, 0x03, 0xdb, 0x01, 0xb6, 0x82, 0xc6, 0xb7, 0x3b, 0x01, 0x18,
+ 0x41, 0xcd, 0x69, 0x65, 0x01, 0x80, 0x60, 0x0a, 0xc1, 0xb6, 0x86, 0xc3,
+ 0x00, 0x3a, 0x01, 0x15, 0x19, 0x14, 0xc1, 0xb6, 0x98, 0xd5, 0x08, 0x89,
+ 0x01, 0x80, 0xa0, 0x0b, 0xc1, 0xb6, 0xa4, 0xc4, 0x20, 0xe6, 0x01, 0x18,
+ 0x50, 0xc7, 0xc9, 0xb2, 0x01, 0x1d, 0xc1, 0xcd, 0x77, 0xfc, 0x01, 0x71,
+ 0x00, 0x00, 0x41, 0xb6, 0xb0, 0x45, 0x00, 0x5a, 0xc1, 0xb6, 0xc2, 0xd9,
+ 0x1f, 0xc7, 0x01, 0x70, 0x20, 0xcb, 0x93, 0xd5, 0x0f, 0xac, 0x71, 0xcb,
+ 0x8a, 0x0a, 0x01, 0x4e, 0xc1, 0x45, 0x01, 0xfd, 0x41, 0xb6, 0xda, 0x45,
+ 0x04, 0x90, 0xc1, 0xb6, 0xf6, 0x44, 0x01, 0x5e, 0x41, 0xb7, 0x02, 0xc6,
+ 0xcf, 0x35, 0x0f, 0xb6, 0x29, 0xd5, 0x2c, 0xf5, 0x01, 0x70, 0xe0, 0xca,
+ 0x01, 0xfd, 0x01, 0x0f, 0x33, 0x01, 0xb7, 0x0e, 0xc9, 0xb0, 0x6b, 0x01,
+ 0x0c, 0xe0, 0x42, 0x00, 0x2c, 0xc1, 0xb7, 0x14, 0x42, 0x02, 0xa0, 0xc1,
+ 0xb7, 0x20, 0xd5, 0x37, 0xc1, 0x0f, 0xc5, 0x10, 0x00, 0xc1, 0xb7, 0x2c,
+ 0xc5, 0x14, 0xa5, 0x01, 0x48, 0xc8, 0xc5, 0xca, 0xa4, 0x0f, 0xb3, 0x61,
+ 0xd7, 0x2a, 0x6b, 0x0f, 0xc5, 0x30, 0xcb, 0x82, 0xba, 0x01, 0x0f, 0x01,
+ 0x46, 0x00, 0x59, 0x41, 0xb7, 0x49, 0x42, 0x00, 0xe3, 0xc1, 0xb7, 0x58,
+ 0xcf, 0x5b, 0xc3, 0x0f, 0xc2, 0x80, 0x03, 0xc1, 0xb7, 0x64, 0x45, 0x11,
+ 0x3a, 0x41, 0xb7, 0x70, 0x45, 0x04, 0x90, 0xc1, 0xb7, 0x7c, 0xd8, 0x23,
+ 0xf3, 0x0f, 0xc5, 0x01, 0xdf, 0x0c, 0x65, 0x0f, 0xc5, 0x40, 0xd0, 0x56,
+ 0xda, 0x0f, 0xc1, 0xa1, 0xe0, 0x01, 0xe7, 0x0f, 0xc5, 0x50, 0xd0, 0x5a,
+ 0x22, 0x0f, 0xa8, 0x69, 0xcd, 0x0b, 0x91, 0x01, 0x19, 0x49, 0xd4, 0x3b,
+ 0x9c, 0x01, 0x4f, 0xe1, 0xdb, 0x18, 0x39, 0x00, 0x05, 0x58, 0xdc, 0x14,
+ 0x4d, 0x01, 0x3d, 0x51, 0xdb, 0x15, 0x60, 0x01, 0x49, 0xc8, 0xc7, 0x00,
+ 0xfa, 0x01, 0x03, 0x31, 0xc8, 0xb6, 0xca, 0x01, 0x01, 0x69, 0xc9, 0xb3,
+ 0x9e, 0x01, 0x01, 0x51, 0xc4, 0x01, 0xc3, 0x01, 0x00, 0x70, 0xd6, 0x2d,
+ 0x4c, 0x00, 0x2c, 0x71, 0xc4, 0xb9, 0x3c, 0x0f, 0xc8, 0xd9, 0xcb, 0x8f,
+ 0xf7, 0x00, 0x7e, 0xb2, 0x01, 0xb7, 0x88, 0xcc, 0x07, 0xc7, 0x01, 0x13,
+ 0xb1, 0x43, 0x00, 0xe2, 0xc1, 0xb7, 0x8e, 0xd0, 0x5a, 0x92, 0x01, 0x53,
+ 0xeb, 0x01, 0xb7, 0x9a, 0xcb, 0x1a, 0x1a, 0x01, 0x54, 0x28, 0xcf, 0x09,
+ 0xf8, 0x01, 0x4b, 0xb1, 0x44, 0x00, 0x58, 0xc1, 0xb7, 0xa0, 0x15, 0xc1,
+ 0xb7, 0xa6, 0x44, 0x07, 0xc7, 0x41, 0xb7, 0xb2, 0xd8, 0x24, 0x3b, 0x01,
+ 0x54, 0x39, 0xcf, 0x62, 0xb5, 0x01, 0x54, 0x48, 0xc2, 0x0e, 0x9a, 0x00,
+ 0xe2, 0x79, 0xc2, 0x02, 0x1c, 0x00, 0xe0, 0xc9, 0x83, 0x00, 0xe0, 0x60,
+ 0x16, 0xc1, 0xb7, 0xb8, 0x15, 0xc1, 0xb7, 0xc2, 0xc2, 0x00, 0xd0, 0x00,
+ 0xe0, 0x59, 0x83, 0x00, 0xe0, 0x50, 0xc2, 0x00, 0xd0, 0x00, 0xe1, 0x09,
+ 0x83, 0x00, 0xe1, 0x00, 0xc2, 0x00, 0xdb, 0x00, 0xe0, 0xf1, 0x83, 0x00,
+ 0xe0, 0xe8, 0xc2, 0x00, 0xdb, 0x00, 0xe0, 0xb1, 0x83, 0x00, 0xe0, 0xa8,
+ 0xc2, 0x00, 0xdb, 0x00, 0xe0, 0xa1, 0x83, 0x00, 0xe0, 0x98, 0xc2, 0x00,
+ 0xdb, 0x00, 0xe0, 0x91, 0x83, 0x00, 0xe0, 0x88, 0xc2, 0x00, 0xd0, 0x00,
+ 0xe0, 0x81, 0xc2, 0x00, 0xdb, 0x00, 0xe0, 0x79, 0x83, 0x00, 0xe0, 0x70,
+ 0x83, 0x00, 0xe0, 0x69, 0xc2, 0x19, 0x2c, 0x00, 0xe0, 0x49, 0xc2, 0x01,
+ 0x30, 0x00, 0xe0, 0x28, 0xc2, 0x00, 0xd0, 0x00, 0xe0, 0x39, 0x83, 0x00,
+ 0xe0, 0x30, 0xc2, 0x00, 0xdb, 0x00, 0xe0, 0x21, 0x83, 0x00, 0xe0, 0x18,
+ 0xc2, 0x00, 0xd0, 0x00, 0xe0, 0x11, 0xc2, 0x00, 0xdb, 0x00, 0xe0, 0x09,
+ 0x83, 0x00, 0xe0, 0x00, 0xc4, 0x18, 0x10, 0x00, 0xe2, 0x39, 0xc2, 0x22,
+ 0xcc, 0x00, 0xe2, 0x30, 0xc3, 0x0d, 0x14, 0x00, 0xe2, 0x29, 0xc3, 0x09,
+ 0x9e, 0x00, 0xe2, 0x20, 0xc4, 0x02, 0xde, 0x00, 0xe2, 0x19, 0xc2, 0x02,
+ 0xa0, 0x00, 0xe2, 0x10, 0xc5, 0xda, 0x79, 0x00, 0xe1, 0xfb, 0x01, 0xb7,
+ 0xcc, 0xc5, 0x4e, 0x18, 0x00, 0xe1, 0xd8, 0xc5, 0x33, 0x5d, 0x00, 0xe1,
+ 0xb9, 0xc3, 0x00, 0xea, 0x00, 0xe1, 0xb0, 0xc2, 0x00, 0x39, 0x00, 0xe1,
+ 0x29, 0xc2, 0x19, 0x2c, 0x00, 0xe1, 0x20, 0xc3, 0x01, 0x95, 0x00, 0xe1,
+ 0xa8, 0xc6, 0xd3, 0xbb, 0x00, 0xe1, 0xa0, 0x97, 0x00, 0xe1, 0x58, 0x91,
+ 0x00, 0xe1, 0x48, 0x15, 0xc1, 0xb7, 0xd2, 0xcc, 0x1a, 0x8c, 0x0f, 0xbc,
+ 0x71, 0x14, 0xc1, 0xb7, 0xe4, 0x44, 0x00, 0x49, 0xc1, 0xb7, 0xf0, 0xcc,
+ 0x07, 0xbb, 0x01, 0x3a, 0xc1, 0xca, 0xa7, 0xc4, 0x0f, 0xaf, 0xc1, 0x08,
+ 0xc1, 0xb7, 0xf6, 0xcb, 0x58, 0xc7, 0x0f, 0xbd, 0x11, 0xd5, 0x34, 0x8e,
+ 0x0f, 0xbd, 0xd9, 0x16, 0x41, 0xb8, 0x02, 0xc5, 0xd4, 0xe3, 0x0f, 0xaf,
+ 0x92, 0x01, 0xb8, 0x0e, 0xc2, 0x00, 0xd0, 0x08, 0xfd, 0x81, 0x83, 0x05,
+ 0x27, 0x60, 0x83, 0x05, 0x26, 0x89, 0xc2, 0x00, 0xd0, 0x05, 0x26, 0x90,
+ 0x83, 0x05, 0x26, 0x99, 0xc2, 0x02, 0x1c, 0x05, 0x26, 0xe0, 0x83, 0x05,
+ 0x26, 0xa1, 0xc2, 0x00, 0xd0, 0x05, 0x26, 0xa9, 0x15, 0xc1, 0xb8, 0x14,
+ 0x44, 0x05, 0x14, 0x41, 0xb8, 0x1e, 0x83, 0x05, 0x26, 0xb1, 0xc2, 0x00,
+ 0xd0, 0x05, 0x27, 0x68, 0x83, 0x05, 0x26, 0xb9, 0xc2, 0x00, 0xd0, 0x05,
+ 0x26, 0xc0, 0x83, 0x05, 0x26, 0xd1, 0xc2, 0x00, 0xd0, 0x05, 0x26, 0xd8,
+ 0x83, 0x05, 0x27, 0x01, 0xc2, 0x01, 0x30, 0x05, 0x27, 0x28, 0x83, 0x05,
+ 0x27, 0x11, 0xc2, 0x00, 0xd0, 0x05, 0x27, 0x58, 0xc2, 0x00, 0xd0, 0x05,
+ 0x27, 0x19, 0x83, 0x05, 0x27, 0x20, 0x83, 0x05, 0x27, 0x31, 0xc2, 0x00,
+ 0xd0, 0x05, 0x27, 0x40, 0x87, 0x05, 0x27, 0x78, 0x97, 0x05, 0x27, 0x88,
+ 0x87, 0x05, 0x27, 0xb8, 0x87, 0x05, 0x27, 0xa9, 0x8a, 0x05, 0x27, 0xb0,
+ 0xc9, 0x1b, 0x0a, 0x01, 0x01, 0x41, 0xca, 0x33, 0xdc, 0x00, 0x00, 0x5b,
+ 0x01, 0xb8, 0x2a, 0xc4, 0x1b, 0x05, 0x00, 0x00, 0x51, 0x4c, 0x87, 0x8d,
+ 0x41, 0xb8, 0x30, 0x48, 0xba, 0xc2, 0xc1, 0xb8, 0x3c, 0x42, 0x01, 0x60,
+ 0x41, 0xb8, 0x64, 0xc4, 0x26, 0x78, 0x00, 0xca, 0x79, 0xc5, 0x06, 0xdb,
+ 0x00, 0xca, 0x71, 0x15, 0xc1, 0xb8, 0x76, 0x08, 0xc1, 0xb8, 0x82, 0x16,
+ 0xc1, 0xb8, 0x8e, 0xc3, 0x05, 0x14, 0x00, 0xca, 0x39, 0xc4, 0x15, 0xe7,
+ 0x00, 0xca, 0x30, 0x44, 0x00, 0xbb, 0xc1, 0xb8, 0x9a, 0x4c, 0x29, 0xba,
+ 0xc1, 0xb8, 0xb2, 0x50, 0x5c, 0xf2, 0x41, 0xb8, 0xe0, 0x46, 0x00, 0xb9,
+ 0xc1, 0xb8, 0xf2, 0xcf, 0x69, 0x72, 0x00, 0xc8, 0x00, 0x16, 0xc1, 0xb9,
+ 0x0f, 0x09, 0xc1, 0xb9, 0x1f, 0xc2, 0x00, 0xd0, 0x00, 0xc8, 0xe1, 0x15,
+ 0xc1, 0xb9, 0x2f, 0xc2, 0x01, 0x4a, 0x00, 0xc8, 0xc1, 0xc2, 0x00, 0xdb,
+ 0x00, 0xc8, 0xb9, 0xc2, 0x00, 0x39, 0x00, 0xc8, 0xb1, 0xc2, 0x19, 0x2c,
+ 0x00, 0xc8, 0xab, 0x01, 0xb9, 0x3f, 0xc2, 0x01, 0xc3, 0x00, 0xc8, 0xa1,
+ 0x04, 0xc1, 0xb9, 0x43, 0x12, 0xc1, 0xb9, 0x4d, 0x10, 0xc1, 0xb9, 0x57,
+ 0x06, 0xc1, 0xb9, 0x61, 0x0c, 0xc1, 0xb9, 0x6b, 0x05, 0xc1, 0xb9, 0x75,
+ 0x0d, 0x41, 0xb9, 0x7f, 0x90, 0x08, 0x49, 0xc0, 0x9b, 0x08, 0x49, 0xb8,
+ 0x90, 0x08, 0x49, 0xb0, 0x90, 0x08, 0x49, 0xa8, 0x96, 0x08, 0x49, 0xa0,
+ 0x95, 0x08, 0x49, 0x70, 0x04, 0xc1, 0xb9, 0x89, 0x44, 0x0b, 0x0d, 0xc1,
+ 0xb9, 0x95, 0x46, 0x76, 0x5f, 0xc1, 0xb9, 0xa1, 0xc9, 0x32, 0xb7, 0x01,
+ 0x3e, 0xc9, 0xc7, 0xc4, 0x5d, 0x01, 0x3e, 0xc1, 0xc6, 0x02, 0xd1, 0x01,
+ 0x2f, 0x79, 0x11, 0xc1, 0xb9, 0xad, 0x16, 0xc1, 0xb9, 0xb9, 0xd6, 0x2f,
+ 0x72, 0x01, 0x50, 0xf1, 0x47, 0xc6, 0x9b, 0xc1, 0xb9, 0xc5, 0x47, 0xc1,
+ 0x69, 0x41, 0xb9, 0xd1, 0xcc, 0x23, 0x9f, 0x01, 0x55, 0x68, 0x0e, 0xc1,
+ 0xb9, 0xdd, 0x4f, 0x0b, 0x17, 0x41, 0xb9, 0xe9, 0x96, 0x01, 0x04, 0xe1,
+ 0x95, 0x01, 0x04, 0xdb, 0x01, 0xb9, 0xf5, 0x92, 0x01, 0x04, 0xd1, 0x90,
+ 0x01, 0x04, 0xc9, 0x8f, 0x01, 0x04, 0xc1, 0x8e, 0x01, 0x04, 0xb9, 0x8d,
+ 0x01, 0x04, 0xb1, 0x8a, 0x01, 0x04, 0xa9, 0x9a, 0x01, 0x04, 0x99, 0x91,
+ 0x01, 0x04, 0x91, 0x87, 0x01, 0x04, 0x89, 0x83, 0x01, 0x04, 0x81, 0x98,
+ 0x00, 0xeb, 0x29, 0x97, 0x00, 0xeb, 0x21, 0x94, 0x00, 0xeb, 0x19, 0x8b,
+ 0x00, 0xeb, 0x11, 0x8c, 0x01, 0x63, 0xe0, 0x4d, 0x37, 0xb4, 0xc1, 0xb9,
+ 0xfb, 0xca, 0x9f, 0xe0, 0x00, 0x14, 0xbb, 0x01, 0xba, 0x7a, 0xce, 0x6b,
+ 0xe2, 0x05, 0x3c, 0x78, 0x46, 0x00, 0x8b, 0x41, 0xba, 0x80, 0xcd, 0x7e,
+ 0xf1, 0x00, 0x0e, 0x1b, 0x01, 0xba, 0x8c, 0x47, 0x10, 0x30, 0x41, 0xba,
+ 0x92, 0xc2, 0x00, 0x74, 0x00, 0xe9, 0x29, 0xcd, 0x7c, 0xdc, 0x00, 0x0e,
+ 0x10, 0xcc, 0x23, 0x3f, 0x00, 0x15, 0x08, 0x47, 0x80, 0x10, 0xc1, 0xba,
+ 0x9e, 0xd1, 0x54, 0x97, 0x00, 0x15, 0x68, 0x46, 0x02, 0x0f, 0xc1, 0xba,
+ 0xaa, 0x48, 0x19, 0x9b, 0x41, 0xbb, 0x60, 0x88, 0x05, 0x3f, 0xd9, 0x92,
+ 0x05, 0x3f, 0xe0, 0xc9, 0x4f, 0x9d, 0x05, 0x3f, 0xe9, 0xc6, 0xcb, 0x3f,
+ 0x05, 0x3f, 0xf0, 0x91, 0x00, 0x74, 0x09, 0x0a, 0x41, 0xbb, 0x6c, 0x44,
+ 0x68, 0x00, 0xc1, 0xbb, 0x78, 0x91, 0x00, 0x74, 0xd9, 0x43, 0x60, 0xe8,
+ 0x41, 0xbb, 0xa4, 0xc2, 0x0f, 0x7b, 0x00, 0x74, 0x39, 0xc2, 0x42, 0xcd,
+ 0x00, 0x74, 0x69, 0x91, 0x00, 0x74, 0xc8, 0x42, 0x01, 0x7c, 0xc1, 0xbb,
+ 0xb0, 0x49, 0xb1, 0xd3, 0x41, 0xbb, 0xbc, 0x91, 0x00, 0x74, 0xa9, 0x43,
+ 0x60, 0xe8, 0x41, 0xbb, 0xc8, 0x08, 0xc1, 0xbb, 0xd4, 0xc3, 0x02, 0x45,
+ 0x00, 0x74, 0xe9, 0xc4, 0xdf, 0x43, 0x00, 0x74, 0xf8, 0x42, 0x00, 0x48,
+ 0x41, 0xbb, 0xe0, 0xc4, 0xdf, 0x43, 0x00, 0x75, 0x59, 0xc3, 0x02, 0x45,
+ 0x00, 0x75, 0x70, 0x83, 0x00, 0x75, 0x91, 0x8f, 0x00, 0x75, 0x99, 0x9b,
+ 0x00, 0x76, 0x19, 0x8b, 0x00, 0x76, 0x20, 0xc2, 0x00, 0xd1, 0x00, 0x75,
+ 0x89, 0xc2, 0x00, 0x45, 0x00, 0x75, 0xd8, 0x8b, 0x00, 0x75, 0xa8, 0x9b,
+ 0x00, 0x75, 0xb8, 0x97, 0x00, 0x75, 0xc8, 0x8b, 0x00, 0x76, 0x08, 0xc2,
+ 0x01, 0xc8, 0x00, 0x75, 0xe1, 0xc3, 0x4d, 0xc3, 0x00, 0x75, 0xe8, 0xc2,
+ 0x01, 0x23, 0x00, 0x76, 0x49, 0x8b, 0x00, 0x76, 0x50, 0xc2, 0x02, 0xa0,
+ 0x00, 0x76, 0x91, 0xc4, 0x02, 0xde, 0x00, 0x76, 0x98, 0xc3, 0x09, 0x9e,
+ 0x00, 0x76, 0xa1, 0xc3, 0x0d, 0x14, 0x00, 0x76, 0xa8, 0xc2, 0x22, 0xcc,
+ 0x00, 0x76, 0xb1, 0xc4, 0x18, 0x10, 0x00, 0x76, 0xb8, 0x45, 0x01, 0x93,
+ 0xc1, 0xbb, 0xec, 0xd1, 0x47, 0x70, 0x0f, 0xdc, 0xc8, 0x46, 0x02, 0xae,
+ 0xc1, 0xbb, 0xf8, 0x5b, 0x18, 0xc0, 0x41, 0xbc, 0x0a, 0xc6, 0x0b, 0x09,
+ 0x01, 0x3a, 0x91, 0xc6, 0x02, 0xd1, 0x0f, 0xa9, 0xf8, 0xe0, 0x03, 0x67,
+ 0x01, 0x1d, 0x88, 0x45, 0x01, 0x93, 0xc1, 0xbc, 0x16, 0xd2, 0x43, 0x27,
+ 0x0f, 0xdc, 0xc0, 0x5b, 0x16, 0xa4, 0xc1, 0xbc, 0x22, 0x46, 0x01, 0xc8,
+ 0x41, 0xbc, 0x2e, 0xe0, 0x00, 0x27, 0x01, 0x1d, 0x80, 0x45, 0x00, 0x27,
+ 0xc1, 0xbc, 0x40, 0x4d, 0x3d, 0x55, 0x41, 0xbc, 0x4c, 0xe0, 0x08, 0x67,
+ 0x0f, 0xdb, 0x40, 0x0f, 0xc1, 0xbc, 0x52, 0xcc, 0x0d, 0x9e, 0x01, 0x2e,
+ 0xd0, 0x44, 0x02, 0x9a, 0x41, 0xbc, 0x58, 0xcd, 0x3f, 0xe8, 0x0f, 0xdc,
+ 0x19, 0xce, 0x08, 0x79, 0x0f, 0xdc, 0x28, 0x00, 0x41, 0xbc, 0x5e, 0xcc,
+ 0x8a, 0x45, 0x01, 0x0f, 0x78, 0x45, 0x01, 0x95, 0xc1, 0xbc, 0x76, 0xc9,
+ 0x61, 0x53, 0x01, 0x48, 0x50, 0xcd, 0x7e, 0x3b, 0x01, 0x0c, 0xf9, 0x4e,
+ 0x6f, 0xa8, 0x41, 0xbc, 0x82, 0x00, 0x41, 0xbc, 0x8e, 0x44, 0x00, 0x49,
+ 0xc1, 0xbc, 0xac, 0x45, 0x00, 0x2c, 0x41, 0xbc, 0xb6, 0xd0, 0x58, 0x62,
+ 0x0f, 0xc2, 0x09, 0xc5, 0x01, 0xa2, 0x0f, 0xc2, 0x28, 0x00, 0x41, 0xbc,
+ 0xc0, 0xca, 0xa8, 0x0a, 0x01, 0x0d, 0x40, 0xcc, 0x81, 0xed, 0x01, 0x4a,
+ 0x89, 0xcd, 0x7e, 0xfe, 0x01, 0x4a, 0x68, 0xcd, 0x7e, 0xfe, 0x01, 0x4a,
+ 0x79, 0xcc, 0x81, 0xed, 0x01, 0x4a, 0x60, 0xdc, 0x13, 0x6d, 0x01, 0x52,
+ 0x51, 0x46, 0x00, 0xd4, 0xc1, 0xbc, 0xcc, 0x45, 0x00, 0x8c, 0x41, 0xbc,
+ 0xd8, 0xc3, 0x7e, 0x1c, 0x08, 0x1c, 0x91, 0xc2, 0x00, 0x06, 0x08, 0x1c,
+ 0xa8, 0xce, 0x64, 0xe1, 0x0f, 0xdc, 0xb9, 0xde, 0x0f, 0x04, 0x01, 0x3b,
+ 0x18, 0x45, 0x00, 0x2d, 0xc1, 0xbc, 0xea, 0x50, 0x0f, 0x0a, 0xc1, 0xbc,
+ 0xfc, 0xca, 0x0e, 0xbe, 0x0f, 0xbf, 0x80, 0x45, 0x01, 0xfd, 0xc1, 0xbd,
+ 0x08, 0xdc, 0x14, 0xa1, 0x01, 0x3d, 0xe9, 0xdb, 0x15, 0x7b, 0x01, 0x3c,
+ 0xa0, 0x03, 0xc1, 0xbd, 0x1a, 0x45, 0x1a, 0x38, 0xc1, 0xbd, 0x26, 0x0b,
+ 0xc1, 0xbd, 0x32, 0xc6, 0xa8, 0x2a, 0x01, 0x3a, 0x41, 0xda, 0x19, 0x94,
+ 0x0f, 0xb3, 0x88, 0x45, 0x20, 0x6c, 0xc1, 0xbd, 0x3e, 0x4e, 0x47, 0x15,
+ 0x41, 0xbd, 0x4a, 0x03, 0xc1, 0xbd, 0x56, 0x42, 0x00, 0x27, 0xc1, 0xbd,
+ 0x62, 0x43, 0x00, 0x4a, 0xc1, 0xbd, 0x6c, 0xd8, 0x21, 0x9b, 0x0f, 0xb3,
+ 0x98, 0x49, 0x0a, 0xe6, 0xc1, 0xbd, 0x78, 0xdf, 0x03, 0xa8, 0x01, 0x3c,
+ 0xf1, 0x4e, 0x22, 0x43, 0x41, 0xbd, 0x84, 0x44, 0x02, 0xc3, 0xc1, 0xbd,
+ 0x90, 0xc7, 0xc0, 0x74, 0x01, 0x38, 0xc0, 0x49, 0x2c, 0x46, 0xc1, 0xbd,
+ 0x9a, 0x51, 0x08, 0xa9, 0x41, 0xbd, 0xa0, 0x45, 0x3a, 0x0c, 0xc1, 0xbd,
+ 0xac, 0x42, 0x01, 0x7f, 0xc1, 0xbd, 0xb2, 0xc5, 0x02, 0xd2, 0x01, 0x5a,
+ 0xc2, 0x01, 0xbd, 0xbe, 0x46, 0x82, 0xba, 0xc1, 0xbd, 0xca, 0xcc, 0x30,
+ 0xf2, 0x01, 0x3c, 0xb9, 0x11, 0x41, 0xbd, 0xd0, 0xdc, 0x12, 0x8d, 0x01,
+ 0x3c, 0xe1, 0x44, 0x00, 0x2d, 0x41, 0xbd, 0xe2, 0xc9, 0x68, 0x55, 0x01,
+ 0x3c, 0xb1, 0xcf, 0x65, 0x58, 0x01, 0x38, 0xb0, 0xc7, 0x0b, 0x00, 0x01,
+ 0x39, 0x89, 0xd1, 0x36, 0x21, 0x0f, 0xb3, 0xa1, 0x51, 0x48, 0x5a, 0x41,
+ 0xbd, 0xf1, 0xd2, 0x4e, 0x65, 0x01, 0x39, 0x71, 0xd0, 0x5a, 0xc2, 0x01,
+ 0x38, 0xe1, 0xd4, 0x38, 0xb8, 0x01, 0x5a, 0xb0, 0xdb, 0x15, 0x2a, 0x01,
+ 0x39, 0x21, 0x44, 0x0d, 0x14, 0x41, 0xbe, 0x00, 0xd1, 0x56, 0x62, 0x01,
+ 0x37, 0xe0, 0xca, 0x95, 0xd0, 0x0f, 0xa4, 0xf9, 0x45, 0x00, 0x8c, 0xc1,
+ 0xbe, 0x0c, 0xc5, 0x07, 0x73, 0x0f, 0xd7, 0xb0, 0xa0, 0x0d, 0x87, 0xd1,
+ 0x9f, 0x0d, 0x87, 0xc9, 0x9e, 0x0d, 0x87, 0xc1, 0xa3, 0x0d, 0x87, 0xe9,
+ 0xa2, 0x0d, 0x87, 0xe1, 0xa1, 0x0d, 0x87, 0xd8, 0xa4, 0x0d, 0x87, 0xb9,
+ 0xa3, 0x0d, 0x87, 0xb1, 0xa2, 0x0d, 0x87, 0xa9, 0xa1, 0x0d, 0x87, 0xa1,
+ 0xa0, 0x0d, 0x87, 0x99, 0x9f, 0x0d, 0x87, 0x91, 0x9e, 0x0d, 0x87, 0x88,
+ 0xa1, 0x0d, 0x87, 0x81, 0xa0, 0x0d, 0x87, 0x79, 0x9f, 0x0d, 0x87, 0x71,
+ 0x9e, 0x0d, 0x87, 0x68, 0xa3, 0x0d, 0x88, 0x39, 0xa2, 0x0d, 0x88, 0x31,
+ 0xa1, 0x0d, 0x88, 0x29, 0xa0, 0x0d, 0x88, 0x21, 0x9f, 0x0d, 0x88, 0x19,
+ 0x9e, 0x0d, 0x88, 0x10, 0xa1, 0x0d, 0x88, 0x09, 0xa0, 0x0d, 0x88, 0x01,
+ 0x9f, 0x0d, 0x87, 0xf9, 0x9e, 0x0d, 0x87, 0xf0, 0x9e, 0x0d, 0x85, 0xd1,
+ 0xa5, 0x0d, 0x86, 0x09, 0xa4, 0x0d, 0x86, 0x01, 0xa3, 0x0d, 0x85, 0xf9,
+ 0xa2, 0x0d, 0x85, 0xf1, 0xa1, 0x0d, 0x85, 0xe9, 0xa0, 0x0d, 0x85, 0xe1,
+ 0x9f, 0x0d, 0x85, 0xd8, 0xa4, 0x0d, 0x85, 0xc9, 0xa3, 0x0d, 0x85, 0xc1,
+ 0xa2, 0x0d, 0x85, 0xb9, 0xa1, 0x0d, 0x85, 0xb1, 0xa0, 0x0d, 0x85, 0xa9,
+ 0x9f, 0x0d, 0x85, 0xa1, 0x9e, 0x0d, 0x85, 0x98, 0xa0, 0x0d, 0x85, 0x91,
+ 0x9f, 0x0d, 0x85, 0x89, 0x9e, 0x0d, 0x85, 0x80, 0xa4, 0x0d, 0x85, 0x79,
+ 0xa3, 0x0d, 0x85, 0x71, 0xa2, 0x0d, 0x85, 0x69, 0xa1, 0x0d, 0x85, 0x61,
+ 0xa0, 0x0d, 0x85, 0x59, 0x9f, 0x0d, 0x85, 0x51, 0x9e, 0x0d, 0x85, 0x48,
+ 0x9e, 0x0d, 0x84, 0xf3, 0x01, 0xbe, 0x1e, 0xa6, 0x0d, 0x85, 0x31, 0xa5,
+ 0x0d, 0x85, 0x29, 0xa4, 0x0d, 0x85, 0x21, 0xa3, 0x0d, 0x85, 0x19, 0xa2,
+ 0x0d, 0x85, 0x11, 0xa1, 0x0d, 0x85, 0x09, 0xa0, 0x0d, 0x85, 0x01, 0x9f,
+ 0x0d, 0x84, 0xf8, 0xa2, 0x0d, 0x84, 0xe9, 0xa1, 0x0d, 0x84, 0xe1, 0xa0,
+ 0x0d, 0x84, 0xd9, 0x9f, 0x0d, 0x84, 0xd1, 0x9e, 0x0d, 0x84, 0xc8, 0xc2,
+ 0x00, 0xe8, 0x0d, 0x84, 0xc1, 0xa3, 0x0d, 0x84, 0xb9, 0xa2, 0x0d, 0x84,
+ 0xb1, 0xa1, 0x0d, 0x84, 0xa9, 0xa0, 0x0d, 0x84, 0xa1, 0x9f, 0x0d, 0x84,
+ 0x99, 0x9e, 0x0d, 0x84, 0x90, 0xa0, 0x0d, 0x84, 0x89, 0x9f, 0x0d, 0x84,
+ 0x81, 0x9e, 0x0d, 0x84, 0x78, 0xc2, 0x00, 0xac, 0x0d, 0x84, 0x71, 0xa4,
+ 0x0d, 0x84, 0x69, 0xa3, 0x0d, 0x84, 0x61, 0xa2, 0x0d, 0x84, 0x59, 0xa1,
+ 0x0d, 0x84, 0x51, 0xa0, 0x0d, 0x84, 0x49, 0x9f, 0x0d, 0x84, 0x41, 0x9e,
+ 0x0d, 0x84, 0x38, 0xa6, 0x0d, 0x84, 0x31, 0xa5, 0x0d, 0x84, 0x29, 0xa4,
+ 0x0d, 0x84, 0x21, 0xa3, 0x0d, 0x84, 0x19, 0xa2, 0x0d, 0x84, 0x11, 0xa1,
+ 0x0d, 0x84, 0x09, 0xa0, 0x0d, 0x84, 0x01, 0x9f, 0x0d, 0x83, 0xf9, 0x9e,
+ 0x0d, 0x83, 0xf0, 0x9f, 0x0d, 0x88, 0xf1, 0x9e, 0x0d, 0x88, 0xe8, 0xa0,
+ 0x0d, 0x81, 0xd1, 0x9f, 0x0d, 0x81, 0xc9, 0x9e, 0x0d, 0x81, 0xc1, 0xc2,
+ 0x06, 0x52, 0x0d, 0x81, 0xd8, 0xa3, 0x0d, 0x81, 0xb9, 0xa2, 0x0d, 0x81,
+ 0xb1, 0xa1, 0x0d, 0x81, 0xa9, 0xa0, 0x0d, 0x81, 0xa1, 0x9f, 0x0d, 0x81,
+ 0x99, 0x9e, 0x0d, 0x81, 0x90, 0xa4, 0x0d, 0x81, 0x89, 0xa3, 0x0d, 0x81,
+ 0x81, 0xa2, 0x0d, 0x81, 0x79, 0xa1, 0x0d, 0x81, 0x71, 0xa0, 0x0d, 0x81,
+ 0x69, 0x9f, 0x0d, 0x81, 0x61, 0x9e, 0x0d, 0x81, 0x58, 0xa5, 0x0d, 0x81,
+ 0x51, 0xa4, 0x0d, 0x81, 0x49, 0xa3, 0x0d, 0x81, 0x41, 0xa2, 0x0d, 0x81,
+ 0x39, 0xa1, 0x0d, 0x81, 0x31, 0xa0, 0x0d, 0x81, 0x29, 0x9f, 0x0d, 0x81,
+ 0x21, 0x9e, 0x0d, 0x81, 0x18, 0xc2, 0x00, 0x3c, 0x0d, 0x81, 0x11, 0x9e,
+ 0x0d, 0x80, 0xbb, 0x01, 0xbe, 0x26, 0xa6, 0x0d, 0x80, 0xf9, 0xa5, 0x0d,
+ 0x80, 0xf1, 0xa4, 0x0d, 0x80, 0xe9, 0xa3, 0x0d, 0x80, 0xe1, 0xa2, 0x0d,
+ 0x80, 0xd9, 0xa1, 0x0d, 0x80, 0xd1, 0xa0, 0x0d, 0x80, 0xc9, 0x9f, 0x0d,
+ 0x80, 0xc0, 0xa1, 0x0d, 0x88, 0xc9, 0xa0, 0x0d, 0x88, 0xc1, 0x9f, 0x0d,
+ 0x88, 0xb9, 0x9e, 0x0d, 0x88, 0xb1, 0xa2, 0x0d, 0x88, 0xd1, 0xa3, 0x0d,
+ 0x88, 0xd9, 0xa4, 0x0d, 0x88, 0xe0, 0xa1, 0x0d, 0x88, 0xa9, 0xa0, 0x0d,
+ 0x88, 0xa1, 0x9f, 0x0d, 0x88, 0x99, 0x9e, 0x0d, 0x88, 0x90, 0xa2, 0x0d,
+ 0x88, 0x89, 0xa1, 0x0d, 0x88, 0x81, 0xa0, 0x0d, 0x88, 0x79, 0x9f, 0x0d,
+ 0x88, 0x71, 0x9e, 0x0d, 0x88, 0x68, 0xa2, 0x0d, 0x88, 0x61, 0xa1, 0x0d,
+ 0x88, 0x59, 0xa0, 0x0d, 0x88, 0x51, 0x9f, 0x0d, 0x88, 0x49, 0x9e, 0x0d,
+ 0x88, 0x40, 0xc2, 0x42, 0xcd, 0x0d, 0x87, 0x11, 0xa2, 0x0d, 0x87, 0x09,
+ 0xa1, 0x0d, 0x87, 0x01, 0xa0, 0x0d, 0x86, 0xf9, 0x9f, 0x0d, 0x86, 0xf1,
+ 0x9e, 0x0d, 0x86, 0xe8, 0x9e, 0x0d, 0x87, 0x19, 0x9f, 0x0d, 0x87, 0x21,
+ 0xa0, 0x0d, 0x87, 0x29, 0xa1, 0x0d, 0x87, 0x30, 0x9e, 0x0d, 0x87, 0x39,
+ 0x9f, 0x0d, 0x87, 0x41, 0xa0, 0x0d, 0x87, 0x49, 0xa1, 0x0d, 0x87, 0x51,
+ 0xa2, 0x0d, 0x87, 0x59, 0xa3, 0x0d, 0x87, 0x60, 0xa2, 0x0d, 0x86, 0xd9,
+ 0xa1, 0x0d, 0x86, 0xd1, 0xa0, 0x0d, 0x86, 0xc9, 0x9f, 0x0d, 0x86, 0xc1,
+ 0x9e, 0x0d, 0x86, 0xb9, 0xa3, 0x0d, 0x86, 0xe0, 0xc2, 0x01, 0xc3, 0x0d,
+ 0x86, 0xb1, 0x9f, 0x0d, 0x86, 0xa9, 0x9e, 0x0d, 0x86, 0xa0, 0xa1, 0x0d,
+ 0x86, 0x99, 0xa0, 0x0d, 0x86, 0x91, 0x9f, 0x0d, 0x86, 0x89, 0x9e, 0x0d,
+ 0x86, 0x80, 0xa4, 0x0d, 0x86, 0x79, 0xa3, 0x0d, 0x86, 0x71, 0xa2, 0x0d,
+ 0x86, 0x69, 0xa1, 0x0d, 0x86, 0x61, 0xa0, 0x0d, 0x86, 0x59, 0x9f, 0x0d,
+ 0x86, 0x51, 0x9e, 0x0d, 0x86, 0x48, 0xa4, 0x0d, 0x86, 0x41, 0xa3, 0x0d,
+ 0x86, 0x39, 0xa2, 0x0d, 0x86, 0x31, 0xa1, 0x0d, 0x86, 0x29, 0xa0, 0x0d,
+ 0x86, 0x21, 0x9f, 0x0d, 0x86, 0x19, 0x9e, 0x0d, 0x86, 0x10, 0xc2, 0x00,
+ 0x39, 0x0d, 0x83, 0xe9, 0xa3, 0x0d, 0x83, 0xe1, 0xa2, 0x0d, 0x83, 0xd9,
+ 0xa1, 0x0d, 0x83, 0xd1, 0xa0, 0x0d, 0x83, 0xc9, 0x9f, 0x0d, 0x83, 0xc1,
+ 0x9e, 0x0d, 0x83, 0xb8, 0xa6, 0x0d, 0x83, 0xb1, 0xa5, 0x0d, 0x83, 0xa9,
+ 0xa4, 0x0d, 0x83, 0xa1, 0xa3, 0x0d, 0x83, 0x99, 0xa2, 0x0d, 0x83, 0x91,
+ 0xa1, 0x0d, 0x83, 0x89, 0xa0, 0x0d, 0x83, 0x81, 0x9f, 0x0d, 0x83, 0x79,
+ 0x9e, 0x0d, 0x83, 0x70, 0x9f, 0x0d, 0x83, 0x19, 0x9e, 0x0d, 0x83, 0x11,
+ 0xa0, 0x0d, 0x83, 0x21, 0xa1, 0x0d, 0x83, 0x29, 0xa2, 0x0d, 0x83, 0x31,
+ 0xa3, 0x0d, 0x83, 0x39, 0xa4, 0x0d, 0x83, 0x40, 0xa1, 0x0d, 0x83, 0x09,
+ 0xa0, 0x0d, 0x83, 0x01, 0x9f, 0x0d, 0x82, 0xf9, 0x9e, 0x0d, 0x82, 0xf0,
+ 0x9e, 0x0d, 0x83, 0x49, 0x9f, 0x0d, 0x83, 0x51, 0xa0, 0x0d, 0x83, 0x59,
+ 0xa1, 0x0d, 0x83, 0x61, 0xc2, 0x00, 0xf1, 0x0d, 0x83, 0x68, 0xa4, 0x0d,
+ 0x82, 0xe9, 0xa3, 0x0d, 0x82, 0xe1, 0xa2, 0x0d, 0x82, 0xd9, 0xa1, 0x0d,
+ 0x82, 0xd1, 0xa0, 0x0d, 0x82, 0xc9, 0x9f, 0x0d, 0x82, 0xc1, 0x9e, 0x0d,
+ 0x82, 0xb8, 0xa2, 0x0d, 0x82, 0xb1, 0xa1, 0x0d, 0x82, 0xa9, 0xa0, 0x0d,
+ 0x82, 0xa1, 0x9f, 0x0d, 0x82, 0x99, 0x9e, 0x0d, 0x82, 0x90, 0xa5, 0x0d,
+ 0x82, 0x89, 0xa4, 0x0d, 0x82, 0x81, 0xa3, 0x0d, 0x82, 0x79, 0xa2, 0x0d,
+ 0x82, 0x71, 0xa1, 0x0d, 0x82, 0x69, 0xa0, 0x0d, 0x82, 0x61, 0x9f, 0x0d,
+ 0x82, 0x59, 0x9e, 0x0d, 0x82, 0x50, 0xa3, 0x0d, 0x82, 0x49, 0xa2, 0x0d,
+ 0x82, 0x41, 0xa1, 0x0d, 0x82, 0x39, 0xa0, 0x0d, 0x82, 0x31, 0x9f, 0x0d,
+ 0x82, 0x29, 0x9e, 0x0d, 0x82, 0x20, 0xa5, 0x0d, 0x82, 0x19, 0xa4, 0x0d,
+ 0x82, 0x11, 0xa3, 0x0d, 0x82, 0x09, 0xa2, 0x0d, 0x82, 0x01, 0xa1, 0x0d,
+ 0x81, 0xf9, 0xa0, 0x0d, 0x81, 0xf1, 0x9f, 0x0d, 0x81, 0xe9, 0x9e, 0x0d,
+ 0x81, 0xe0, 0xca, 0xa2, 0x7e, 0x07, 0xda, 0x79, 0x48, 0xb7, 0xf2, 0x41,
+ 0xbe, 0x2e, 0xc2, 0x00, 0x67, 0x00, 0x2f, 0x23, 0x01, 0xbe, 0x40, 0xc3,
+ 0xba, 0x37, 0x00, 0x2e, 0xdb, 0x01, 0xbe, 0x46, 0xc3, 0x0b, 0xc8, 0x00,
+ 0x2e, 0x8b, 0x01, 0xbe, 0x4c, 0xc3, 0x04, 0xac, 0x00, 0x2e, 0xab, 0x01,
+ 0xbe, 0x52, 0x16, 0xc1, 0xbe, 0x58, 0x15, 0xc1, 0xbe, 0x73, 0xc4, 0x5d,
+ 0xe2, 0x00, 0x2f, 0x43, 0x01, 0xbe, 0x85, 0xc3, 0xe5, 0x78, 0x00, 0x2f,
+ 0x3b, 0x01, 0xbe, 0x8b, 0x46, 0x26, 0xf7, 0xc1, 0xbe, 0x91, 0xc3, 0x20,
+ 0x18, 0x00, 0x2f, 0x03, 0x01, 0xbe, 0xb5, 0xc3, 0x00, 0x4e, 0x00, 0x2e,
+ 0xf3, 0x01, 0xbe, 0xbb, 0xc5, 0xa2, 0x83, 0x00, 0x2e, 0xe3, 0x01, 0xbe,
+ 0xc1, 0xc3, 0x4a, 0xb9, 0x00, 0x2e, 0xcb, 0x01, 0xbe, 0xc7, 0xc5, 0x4a,
+ 0xb3, 0x00, 0x2e, 0xb3, 0x01, 0xbe, 0xcd, 0xc2, 0x01, 0x7f, 0x00, 0x2e,
+ 0xa3, 0x01, 0xbe, 0xd3, 0xc5, 0x40, 0x9a, 0x00, 0x2e, 0x9b, 0x01, 0xbe,
+ 0xdd, 0xc5, 0x9c, 0xa2, 0x00, 0x2e, 0x93, 0x01, 0xbe, 0xe3, 0x03, 0xc1,
+ 0xbe, 0xe9, 0x45, 0x06, 0xa6, 0x41, 0xbe, 0xf3, 0xd4, 0x3d, 0xa4, 0x07,
+ 0xd8, 0xf1, 0x13, 0xc1, 0xbf, 0x23, 0x15, 0xc1, 0xbf, 0x32, 0xc4, 0xe4,
+ 0x8b, 0x00, 0x2d, 0xf9, 0xc5, 0xdb, 0x23, 0x00, 0x2d, 0xe9, 0xcf, 0x64,
+ 0xa4, 0x00, 0x2d, 0xe1, 0x0a, 0xc1, 0xbf, 0x42, 0xc5, 0x79, 0xbe, 0x00,
+ 0x2d, 0xb9, 0xc5, 0xd5, 0x7e, 0x00, 0x2d, 0xa8, 0x43, 0x09, 0x3b, 0xc1,
+ 0xbf, 0x57, 0xcb, 0x97, 0x7c, 0x00, 0x2e, 0x31, 0xc9, 0xae, 0xb2, 0x00,
+ 0x2e, 0x19, 0xc5, 0xd4, 0x16, 0x00, 0x2e, 0x01, 0xc5, 0xda, 0xa6, 0x00,
+ 0x2d, 0xf0, 0xc4, 0xe1, 0x23, 0x00, 0x2d, 0x71, 0x03, 0x41, 0xbf, 0x63,
+ 0xc3, 0x51, 0x3f, 0x00, 0x2d, 0x69, 0xc4, 0x40, 0xe8, 0x00, 0x2d, 0x38,
+ 0xcc, 0x89, 0x9d, 0x00, 0x2d, 0x51, 0xc3, 0x17, 0xc9, 0x00, 0x2c, 0xd0,
+ 0x07, 0xc1, 0xbf, 0x6f, 0xc5, 0xd5, 0x24, 0x00, 0x2c, 0xb0, 0xc3, 0x75,
+ 0x8b, 0x00, 0x2d, 0x41, 0xc9, 0xaf, 0xc9, 0x00, 0x2c, 0xf8, 0xc3, 0x15,
+ 0xe7, 0x00, 0x2d, 0x09, 0xc4, 0x56, 0x4f, 0x00, 0x2c, 0xc8, 0xc9, 0xb3,
+ 0xef, 0x00, 0x2c, 0x99, 0xc4, 0xa0, 0x89, 0x00, 0x2c, 0x90, 0xc3, 0x26,
+ 0x1a, 0x00, 0x2c, 0xe3, 0x01, 0xbf, 0x7b, 0xc6, 0xcb, 0x63, 0x00, 0x2c,
+ 0xf0, 0xc4, 0xde, 0xbb, 0x00, 0x2d, 0x19, 0xc7, 0xc3, 0x6f, 0x00, 0x2d,
+ 0x21, 0xc5, 0xdd, 0x35, 0x00, 0x2d, 0x2a, 0x01, 0xbf, 0x81, 0x05, 0xc1,
+ 0xbf, 0x87, 0xcf, 0x61, 0xb6, 0x02, 0x6e, 0x09, 0x03, 0xc1, 0xbf, 0x99,
+ 0xc6, 0xd2, 0xb3, 0x02, 0x6f, 0x21, 0x19, 0xc1, 0xbf, 0xa3, 0xd6, 0x2d,
+ 0xa4, 0x02, 0x6f, 0x99, 0xcf, 0x67, 0x56, 0x02, 0x6f, 0xa9, 0xcb, 0x92,
+ 0x1d, 0x02, 0x6f, 0xc1, 0xcb, 0x90, 0x39, 0x02, 0x6f, 0xc8, 0xd9, 0x1f,
+ 0x95, 0x02, 0x6e, 0x11, 0xc8, 0xbb, 0xf2, 0x02, 0x6f, 0xd0, 0xc9, 0xae,
+ 0xc4, 0x02, 0x6f, 0x39, 0xc6, 0xcc, 0x17, 0x02, 0x6f, 0x41, 0xc9, 0xb1,
+ 0x94, 0x02, 0x6f, 0xa0, 0xc5, 0xd5, 0x79, 0x02, 0x6e, 0x29, 0xca, 0x9e,
+ 0x50, 0x02, 0x6e, 0x98, 0xc6, 0xd3, 0x37, 0x02, 0x6e, 0x41, 0xcd, 0x7f,
+ 0xdb, 0x02, 0x6f, 0xe8, 0x44, 0x3e, 0x62, 0xc1, 0xbf, 0xaf, 0xc3, 0x00,
+ 0x88, 0x02, 0x6e, 0xa8, 0xc3, 0x05, 0x9f, 0x02, 0x6e, 0xb9, 0xc4, 0x07,
+ 0xc8, 0x02, 0x6f, 0x00, 0xc6, 0xcc, 0xb9, 0x02, 0x6e, 0xc1, 0xc8, 0xba,
+ 0x5a, 0x02, 0x6f, 0xe0, 0xc7, 0x12, 0x48, 0x02, 0x6f, 0x29, 0xc7, 0x50,
+ 0x25, 0x02, 0x6f, 0x70, 0xa1, 0x0f, 0xdb, 0xc1, 0x9f, 0x0f, 0xdb, 0xb1,
+ 0xa0, 0x0f, 0xdb, 0xb9, 0xa2, 0x0f, 0xdb, 0xc9, 0xa3, 0x0f, 0xdb, 0xd1,
+ 0xa4, 0x0f, 0xdb, 0xd9, 0xc4, 0xe1, 0x7b, 0x0f, 0xdc, 0x08, 0x45, 0x04,
+ 0x90, 0xc1, 0xbf, 0xb9, 0xc2, 0x00, 0xb1, 0x01, 0x00, 0xa8, 0xa6, 0x01,
+ 0x1d, 0xe9, 0xa4, 0x01, 0x1d, 0xe1, 0xa0, 0x01, 0x1d, 0xd9, 0x9e, 0x01,
+ 0x1d, 0xd0, 0x42, 0x00, 0x03, 0xc1, 0xbf, 0xc5, 0xcc, 0x89, 0x55, 0x0f,
+ 0xb5, 0x28, 0xc6, 0xce, 0x1b, 0x0f, 0x9e, 0x39, 0xc4, 0x00, 0x87, 0x0f,
+ 0xa1, 0xa0, 0xcb, 0x93, 0x0f, 0x0f, 0x9f, 0x09, 0xc8, 0x37, 0x8f, 0x0f,
+ 0x9f, 0x02, 0x01, 0xbf, 0xd4, 0xc4, 0xce, 0x15, 0x01, 0x34, 0x91, 0xc6,
+ 0xca, 0xb5, 0x01, 0x31, 0x69, 0xc6, 0xcf, 0x6b, 0x0f, 0xb7, 0x00, 0xc2,
+ 0x02, 0xa7, 0x0f, 0xc9, 0xf1, 0x89, 0x0f, 0xa2, 0xe0, 0xda, 0x1a, 0xb2,
+ 0x0f, 0xc8, 0xf1, 0xd8, 0x23, 0x7b, 0x0f, 0xd7, 0x80, 0xc4, 0x26, 0x78,
+ 0x08, 0x69, 0xc9, 0xc5, 0x06, 0xdb, 0x08, 0x69, 0xc1, 0x15, 0xc1, 0xbf,
+ 0xd8, 0x08, 0xc1, 0xbf, 0xe4, 0x16, 0xc1, 0xbf, 0xf0, 0xc3, 0x05, 0x14,
+ 0x08, 0x69, 0x89, 0xc4, 0x15, 0xe7, 0x08, 0x69, 0x80, 0x42, 0x01, 0x6f,
+ 0xc1, 0xbf, 0xfc, 0xc8, 0xbe, 0xea, 0x08, 0x69, 0x20, 0xc9, 0xaa, 0xb0,
+ 0x08, 0x69, 0x19, 0xc5, 0xd9, 0xe8, 0x08, 0x69, 0x10, 0x91, 0x08, 0x69,
+ 0x09, 0x87, 0x08, 0x69, 0x01, 0x97, 0x08, 0x68, 0xf9, 0x8b, 0x08, 0x68,
+ 0xf1, 0x83, 0x08, 0x68, 0xe8, 0xc2, 0x02, 0x41, 0x08, 0x68, 0xe1, 0x10,
+ 0xc1, 0xc0, 0x0e, 0x0d, 0xc1, 0xc0, 0x1e, 0xc2, 0x19, 0x2c, 0x08, 0x68,
+ 0xc1, 0xc2, 0x01, 0x4a, 0x08, 0x68, 0xb1, 0xc2, 0x01, 0xc3, 0x08, 0x68,
+ 0xa1, 0xc2, 0x00, 0xdb, 0x08, 0x68, 0x99, 0xc2, 0x01, 0x30, 0x08, 0x68,
+ 0x91, 0x14, 0xc1, 0xc0, 0x2e, 0x06, 0xc1, 0xc0, 0x38, 0xc2, 0x00, 0x87,
+ 0x08, 0x68, 0x49, 0xc2, 0x00, 0xd0, 0x08, 0x68, 0x39, 0xc2, 0x00, 0x64,
+ 0x08, 0x68, 0x31, 0xc2, 0x25, 0x3b, 0x08, 0x68, 0x29, 0x16, 0xc1, 0xc0,
+ 0x42, 0x83, 0x08, 0x68, 0x01, 0xc2, 0x01, 0x5d, 0x08, 0x68, 0x09, 0xc2,
+ 0x00, 0xb0, 0x08, 0x68, 0x11, 0xc2, 0x02, 0x1c, 0x08, 0x68, 0x71, 0x15,
+ 0x41, 0xc0, 0x4c, 0x97, 0x00, 0xb9, 0x99, 0x8b, 0x00, 0xb9, 0x90, 0xc2,
+ 0x00, 0xd0, 0x00, 0xb9, 0x89, 0xc2, 0x0d, 0xf6, 0x00, 0xb9, 0x81, 0xc2,
+ 0x01, 0x4a, 0x00, 0xb9, 0x79, 0xc2, 0x00, 0xdb, 0x00, 0xb9, 0x71, 0xc2,
+ 0x00, 0x39, 0x00, 0xb9, 0x69, 0xc2, 0x19, 0x2c, 0x00, 0xb9, 0x61, 0xc2,
+ 0x01, 0xc3, 0x00, 0xb9, 0x59, 0xc2, 0x01, 0x5d, 0x00, 0xb9, 0x51, 0xc2,
+ 0x00, 0xb0, 0x00, 0xb9, 0x49, 0x10, 0xc1, 0xc0, 0x56, 0xc2, 0x0e, 0x9a,
+ 0x00, 0xb9, 0x39, 0xc2, 0x01, 0x6f, 0x00, 0xb9, 0x31, 0xc2, 0x01, 0x30,
+ 0x00, 0xb9, 0x21, 0xc2, 0x02, 0x2b, 0x00, 0xb9, 0x19, 0x97, 0x00, 0xb9,
+ 0x11, 0x8b, 0x00, 0xb9, 0x09, 0x83, 0x00, 0xb9, 0x00, 0x49, 0xb0, 0x7d,
+ 0xc1, 0xc0, 0x60, 0x0c, 0xc1, 0xc0, 0xad, 0xd4, 0x3a, 0x5c, 0x01, 0x81,
+ 0x71, 0xd4, 0x3a, 0x34, 0x01, 0x81, 0x79, 0x47, 0x02, 0x0e, 0xc1, 0xc0,
+ 0xb9, 0xc6, 0x92, 0x0c, 0x01, 0x8b, 0x20, 0xc3, 0x05, 0x14, 0x01, 0x81,
+ 0x09, 0x16, 0xc1, 0xc1, 0x16, 0x08, 0xc1, 0xc1, 0x24, 0x15, 0xc1, 0xc1,
+ 0x30, 0xc5, 0x06, 0xdb, 0x01, 0x81, 0x41, 0xc4, 0x26, 0x78, 0x01, 0x81,
+ 0x48, 0xc3, 0x05, 0x14, 0x08, 0x47, 0xdb, 0x01, 0xc1, 0x3c, 0x16, 0xc1,
+ 0xc1, 0x42, 0xc4, 0x0d, 0x13, 0x08, 0x47, 0xe0, 0x16, 0xc1, 0xc1, 0x4e,
+ 0x15, 0xc1, 0xc1, 0x5a, 0xc4, 0xb9, 0x7e, 0x08, 0x47, 0x91, 0xc2, 0x00,
+ 0x67, 0x08, 0x47, 0x81, 0x03, 0xc1, 0xc1, 0x64, 0xc3, 0x20, 0x18, 0x08,
+ 0x47, 0x69, 0xc3, 0x00, 0x4e, 0x08, 0x47, 0x61, 0xc6, 0xcf, 0xd7, 0x08,
+ 0x47, 0x59, 0xc4, 0xe0, 0xe7, 0x08, 0x47, 0x51, 0xc4, 0x4a, 0xb9, 0x08,
+ 0x47, 0x49, 0xc2, 0x01, 0x7f, 0x08, 0x47, 0x23, 0x01, 0xc1, 0x70, 0xc4,
+ 0xdf, 0x07, 0x08, 0x47, 0x31, 0xc3, 0x7e, 0x89, 0x08, 0x47, 0x29, 0xcb,
+ 0x95, 0x8d, 0x08, 0x47, 0x19, 0xc5, 0x9c, 0xa2, 0x08, 0x47, 0x11, 0xc4,
+ 0xe3, 0x27, 0x08, 0x47, 0x08, 0xca, 0x3b, 0x06, 0x07, 0xfb, 0x29, 0x47,
+ 0x02, 0x0e, 0xc1, 0xc1, 0x76, 0xd1, 0x2f, 0xfb, 0x07, 0xfc, 0xf1, 0xd6,
+ 0x2f, 0xf6, 0x07, 0xfc, 0xf8, 0x0d, 0xc1, 0xc1, 0xb1, 0x15, 0xc1, 0xc1,
+ 0xc0, 0xc5, 0xd6, 0x8c, 0x07, 0xfd, 0x4b, 0x01, 0xc1, 0xcc, 0xc5, 0xda,
+ 0xe7, 0x07, 0xfd, 0x89, 0x12, 0xc1, 0xc1, 0xd0, 0x8b, 0x07, 0xfe, 0xe3,
+ 0x01, 0xc1, 0xdf, 0x05, 0xc1, 0xc1, 0xe5, 0x16, 0xc1, 0xc1, 0xf1, 0xc5,
+ 0x90, 0xe4, 0x07, 0xfd, 0xf1, 0x83, 0x07, 0xfe, 0x13, 0x01, 0xc1, 0xfd,
+ 0x1b, 0xc1, 0xc2, 0x01, 0x87, 0x07, 0xfe, 0x3b, 0x01, 0xc2, 0x1b, 0x91,
+ 0x07, 0xfe, 0x63, 0x01, 0xc2, 0x23, 0x19, 0xc1, 0xc2, 0x27, 0x97, 0x07,
+ 0xfe, 0x99, 0xc5, 0xd9, 0x61, 0x07, 0xfd, 0x22, 0x01, 0xc2, 0x39, 0xd1,
+ 0x4e, 0xd0, 0x0f, 0xb4, 0x28, 0x47, 0x78, 0xc0, 0x41, 0xc2, 0x3d, 0x45,
+ 0x03, 0x14, 0xc1, 0xc2, 0x49, 0x83, 0x01, 0x82, 0xa9, 0x8b, 0x01, 0x82,
+ 0xb9, 0x97, 0x01, 0x82, 0xc9, 0x87, 0x01, 0x82, 0xd9, 0x91, 0x01, 0x82,
+ 0xe8, 0x83, 0x01, 0x82, 0x59, 0x8b, 0x01, 0x82, 0x69, 0x97, 0x01, 0x82,
+ 0x79, 0x87, 0x01, 0x82, 0x89, 0x91, 0x01, 0x82, 0x98, 0x83, 0x01, 0x82,
+ 0x61, 0x8b, 0x01, 0x82, 0x71, 0x97, 0x01, 0x82, 0x81, 0x87, 0x01, 0x82,
+ 0x91, 0x91, 0x01, 0x82, 0xa0, 0x83, 0x01, 0x82, 0xb1, 0x8b, 0x01, 0x82,
+ 0xc1, 0x97, 0x01, 0x82, 0xd1, 0x87, 0x01, 0x82, 0xe1, 0x91, 0x01, 0x82,
+ 0xf0, 0x83, 0x01, 0x82, 0xf9, 0x8b, 0x01, 0x83, 0x09, 0x97, 0x01, 0x83,
+ 0x21, 0x87, 0x01, 0x83, 0x31, 0x91, 0x01, 0x83, 0x40, 0x83, 0x01, 0x83,
+ 0x01, 0x8b, 0x01, 0x83, 0x11, 0x97, 0x01, 0x83, 0x29, 0x87, 0x01, 0x83,
+ 0x39, 0x91, 0x01, 0x83, 0x48, 0x83, 0x01, 0x83, 0x51, 0x8b, 0x01, 0x83,
+ 0x59, 0x97, 0x01, 0x83, 0x61, 0x87, 0x01, 0x83, 0x69, 0x91, 0x01, 0x83,
+ 0x70, 0x83, 0x01, 0x83, 0x79, 0x8b, 0x01, 0x83, 0x91, 0x97, 0x01, 0x83,
+ 0xa9, 0x87, 0x01, 0x83, 0xc1, 0x91, 0x01, 0x83, 0xd8, 0x83, 0x01, 0x83,
+ 0x81, 0x8b, 0x01, 0x83, 0x99, 0x97, 0x01, 0x83, 0xb1, 0x87, 0x01, 0x83,
+ 0xc9, 0x91, 0x01, 0x83, 0xe0, 0x83, 0x01, 0x83, 0x89, 0x8b, 0x01, 0x83,
+ 0xa1, 0x97, 0x01, 0x83, 0xb9, 0x87, 0x01, 0x83, 0xd1, 0x91, 0x01, 0x83,
+ 0xe8, 0x83, 0x01, 0x83, 0xf1, 0x8b, 0x01, 0x83, 0xf9, 0x97, 0x01, 0x84,
+ 0x01, 0x87, 0x01, 0x84, 0x09, 0x91, 0x01, 0x84, 0x10, 0x83, 0x01, 0x84,
+ 0x21, 0x97, 0x01, 0x84, 0x31, 0x91, 0x01, 0x84, 0x40, 0x83, 0x01, 0x84,
+ 0x49, 0x8b, 0x01, 0x84, 0x51, 0x97, 0x01, 0x84, 0x59, 0x87, 0x01, 0x84,
+ 0x61, 0x91, 0x01, 0x84, 0x68, 0x83, 0x01, 0x84, 0x79, 0x8b, 0x01, 0x84,
+ 0x81, 0x87, 0x01, 0x84, 0x89, 0x91, 0x01, 0x84, 0x90, 0xc6, 0x1c, 0xb4,
+ 0x01, 0x02, 0x19, 0xce, 0x6b, 0x17, 0x01, 0x70, 0xd0, 0x45, 0x6b, 0x02,
+ 0xc1, 0xc2, 0x6f, 0xcc, 0x0d, 0x9e, 0x01, 0x2e, 0xc9, 0xc6, 0x1c, 0xb4,
+ 0x01, 0x2e, 0xc1, 0xcc, 0x01, 0xdb, 0x0f, 0xdc, 0x81, 0x42, 0x00, 0x58,
+ 0x41, 0xc2, 0x7b, 0xc9, 0x16, 0x2f, 0x01, 0x37, 0x39, 0x0e, 0xc1, 0xc2,
+ 0x81, 0xc8, 0xb5, 0x82, 0x01, 0x09, 0x39, 0xc8, 0xb9, 0x82, 0x01, 0x02,
+ 0xa1, 0xd0, 0x0f, 0x09, 0x00, 0x05, 0x09, 0xcd, 0x2c, 0xb2, 0x00, 0x05,
+ 0xf9, 0xcb, 0x10, 0xc9, 0x01, 0x70, 0xc0, 0xda, 0x1b, 0xb6, 0x01, 0x35,
+ 0x21, 0x51, 0x55, 0xda, 0x41, 0xc2, 0x90, 0x00, 0x41, 0xc2, 0xa2, 0xc9,
+ 0x57, 0x36, 0x01, 0x1d, 0x71, 0x45, 0x00, 0x8c, 0xc1, 0xc2, 0xb4, 0x03,
+ 0x41, 0xc2, 0xd8, 0x47, 0x34, 0x2f, 0xc1, 0xc2, 0xe4, 0x47, 0x02, 0x0e,
+ 0x41, 0xc2, 0xf7, 0x47, 0x34, 0x2f, 0xc1, 0xc3, 0x50, 0x47, 0x02, 0x0e,
+ 0x41, 0xc3, 0x63, 0xc5, 0x53, 0x93, 0x01, 0x09, 0xc9, 0x49, 0x1b, 0x0b,
+ 0x41, 0xc3, 0xc6, 0xd1, 0x31, 0xb3, 0x0f, 0xae, 0xd1, 0xc4, 0x05, 0x4b,
+ 0x01, 0x4f, 0x08, 0xd3, 0x41, 0x4b, 0x0f, 0x65, 0xa1, 0x47, 0x34, 0x2f,
+ 0xc1, 0xc3, 0xd6, 0xca, 0xa6, 0xc0, 0x0f, 0x65, 0x81, 0x49, 0x53, 0xa9,
+ 0xc1, 0xc4, 0x1b, 0xcb, 0x5f, 0x92, 0x0f, 0x65, 0x61, 0xc9, 0x41, 0x55,
+ 0x0f, 0x65, 0x00, 0xd5, 0x36, 0x08, 0x01, 0x4f, 0x28, 0x08, 0xc1, 0xc4,
+ 0x27, 0x16, 0xc1, 0xc4, 0x33, 0xc3, 0x05, 0x14, 0x0e, 0x9b, 0x90, 0xda,
+ 0x1b, 0x00, 0x01, 0x81, 0xb9, 0x4b, 0x19, 0xd1, 0x41, 0xc4, 0x3f, 0x48,
+ 0x0a, 0x53, 0xc1, 0xc4, 0x6f, 0x49, 0xb0, 0xb3, 0xc1, 0xc4, 0x7b, 0xcd,
+ 0x7e, 0x2e, 0x01, 0x7f, 0xa1, 0x4e, 0x71, 0xbc, 0xc1, 0xc4, 0x87, 0xc8,
+ 0x02, 0xf5, 0x01, 0x7f, 0xd8, 0xc7, 0xc2, 0x88, 0x01, 0x8c, 0x99, 0x0a,
+ 0xc1, 0xc4, 0x9d, 0xc7, 0xc5, 0xf3, 0x01, 0x8c, 0xb0, 0x43, 0x09, 0x9e,
+ 0xc1, 0xc4, 0xa9, 0xc9, 0xac, 0x2a, 0x01, 0x8c, 0xc8, 0xca, 0x9e, 0xfa,
+ 0x01, 0x8c, 0xb9, 0xc7, 0xc7, 0xa5, 0x01, 0x8c, 0xf8, 0x16, 0xc1, 0xc4,
+ 0xb5, 0xc3, 0x05, 0x14, 0x08, 0x42, 0xc2, 0x01, 0xc4, 0xc8, 0x16, 0xc1,
+ 0xc4, 0xcc, 0x15, 0xc1, 0xc4, 0xd8, 0x03, 0xc1, 0xc4, 0xe2, 0xc3, 0x20,
+ 0x18, 0x08, 0x42, 0x69, 0xc3, 0x00, 0x4e, 0x08, 0x42, 0x61, 0xc6, 0xcf,
+ 0xd7, 0x08, 0x42, 0x59, 0xc4, 0xe0, 0xe7, 0x08, 0x42, 0x51, 0xc4, 0x4a,
+ 0xb9, 0x08, 0x42, 0x49, 0xc2, 0x01, 0x7f, 0x08, 0x42, 0x23, 0x01, 0xc4,
+ 0xee, 0xc5, 0x4a, 0xb3, 0x08, 0x42, 0x31, 0xc3, 0x7e, 0x89, 0x08, 0x42,
+ 0x29, 0xc6, 0x40, 0x9a, 0x08, 0x42, 0x19, 0xc5, 0x9c, 0xa2, 0x08, 0x42,
+ 0x11, 0xc4, 0xe3, 0x27, 0x08, 0x42, 0x09, 0xc2, 0x00, 0x67, 0x08, 0x42,
+ 0x81, 0xc4, 0xb9, 0x7e, 0x08, 0x42, 0x91, 0xc4, 0x5d, 0xe2, 0x08, 0x42,
+ 0x98, 0xc7, 0xc9, 0x0a, 0x0f, 0xa2, 0xd1, 0xc3, 0x1c, 0xe4, 0x0f, 0xa2,
+ 0x91, 0xc6, 0xa8, 0xc4, 0x0f, 0xa3, 0x09, 0xc5, 0xd4, 0xf7, 0x0f, 0xa3,
+ 0x10, 0x45, 0xa6, 0x50, 0xc1, 0xc4, 0xf4, 0xc5, 0x02, 0xd2, 0x01, 0x2e,
+ 0x5b, 0x01, 0xc5, 0x2b, 0xd4, 0x3a, 0x0c, 0x01, 0x3f, 0x0b, 0x01, 0xc5,
+ 0x2f, 0xc8, 0xb8, 0x3a, 0x01, 0x33, 0x38, 0x07, 0xc1, 0xc5, 0x35, 0xd5,
+ 0x31, 0xc4, 0x0f, 0xad, 0x59, 0x11, 0x41, 0xc5, 0x3f, 0xca, 0x9d, 0x2e,
+ 0x0f, 0xc5, 0x69, 0xc3, 0x05, 0x14, 0x0f, 0xc5, 0x60, 0xc5, 0x0b, 0x0a,
+ 0x01, 0x2d, 0x0b, 0x01, 0xc5, 0x4b, 0xc7, 0x37, 0x27, 0x01, 0x38, 0x21,
+ 0xc9, 0xb0, 0x1a, 0x01, 0x33, 0x21, 0xc2, 0x05, 0x1d, 0x0f, 0x99, 0x1b,
+ 0x01, 0xc5, 0x4f, 0x0f, 0xc1, 0xc5, 0x53, 0xca, 0x50, 0x80, 0x01, 0x30,
+ 0xb1, 0xc3, 0x0e, 0x6b, 0x01, 0x30, 0x31, 0xc9, 0xb3, 0x83, 0x07, 0xf2,
+ 0x30, 0x03, 0xc1, 0xc5, 0x5f, 0x43, 0x00, 0x4a, 0xc1, 0xc5, 0x6b, 0x45,
+ 0x0a, 0xe1, 0x41, 0xc5, 0x75, 0xc6, 0x3a, 0x1a, 0x01, 0x2e, 0x3b, 0x01,
+ 0xc5, 0x7b, 0x48, 0xbe, 0x32, 0xc1, 0xc5, 0x7f, 0x43, 0x01, 0x47, 0x41,
+ 0xc5, 0x8b, 0x14, 0xc1, 0xc5, 0x97, 0xd7, 0x28, 0x5a, 0x01, 0x36, 0xb9,
+ 0xc8, 0x36, 0xb4, 0x01, 0x30, 0x79, 0xd2, 0x49, 0xaf, 0x0f, 0xab, 0xf0,
+ 0x0e, 0xc1, 0xc5, 0xa3, 0x4c, 0x0e, 0x55, 0xc1, 0xc5, 0xb0, 0xcc, 0x7d,
+ 0x5f, 0x01, 0x31, 0xc8, 0x44, 0x00, 0x2d, 0xc1, 0xc5, 0xbc, 0xc8, 0x46,
+ 0x71, 0x01, 0x2d, 0x68, 0x4a, 0x03, 0x3d, 0xc1, 0xc5, 0xc8, 0x4a, 0x01,
+ 0xa9, 0x41, 0xc5, 0xd4, 0x46, 0x01, 0xdc, 0xc1, 0xc5, 0xe9, 0xca, 0x9c,
+ 0x2a, 0x01, 0x5e, 0xe8, 0xcc, 0x88, 0x59, 0x01, 0x2d, 0x89, 0x42, 0x00,
+ 0xc4, 0x41, 0xc5, 0xf9, 0x46, 0x05, 0x87, 0xc1, 0xc6, 0x05, 0xce, 0x51,
+ 0x6a, 0x01, 0x58, 0xf0, 0xd5, 0x35, 0xde, 0x0f, 0xc4, 0x39, 0xd0, 0x35,
+ 0xe3, 0x0f, 0xc3, 0xf9, 0xd0, 0x5c, 0x32, 0x0f, 0xc3, 0x39, 0xca, 0x35,
+ 0xe9, 0x0f, 0xc3, 0x79, 0xd1, 0x50, 0x46, 0x0f, 0xc3, 0xb8, 0xd5, 0x35,
+ 0xde, 0x0f, 0xc4, 0x31, 0xd1, 0x50, 0x46, 0x0f, 0xc3, 0xb1, 0xca, 0x35,
+ 0xe9, 0x0f, 0xc3, 0x71, 0xd0, 0x5c, 0x32, 0x0f, 0xc3, 0x31, 0xd0, 0x35,
+ 0xe3, 0x0f, 0xc3, 0xf0, 0xd5, 0x35, 0xde, 0x0f, 0xc4, 0x29, 0xd1, 0x50,
+ 0x46, 0x0f, 0xc3, 0xa9, 0xca, 0x35, 0xe9, 0x0f, 0xc3, 0x69, 0xd0, 0x5c,
+ 0x32, 0x0f, 0xc3, 0x29, 0xd0, 0x35, 0xe3, 0x0f, 0xc3, 0xe8, 0xd5, 0x35,
+ 0xde, 0x0f, 0xc4, 0x21, 0xd1, 0x50, 0x46, 0x0f, 0xc3, 0xa1, 0xca, 0x35,
+ 0xe9, 0x0f, 0xc3, 0x61, 0xd0, 0x5c, 0x32, 0x0f, 0xc3, 0x21, 0xd0, 0x35,
+ 0xe3, 0x0f, 0xc3, 0xe0, 0xc5, 0xdc, 0xfe, 0x0f, 0x9c, 0x81, 0xcc, 0x87,
+ 0x15, 0x0f, 0x99, 0x60, 0xc6, 0xcc, 0x83, 0x0f, 0xb5, 0xf1, 0xc4, 0x51,
+ 0xb7, 0x0f, 0x98, 0x51, 0xc7, 0xc5, 0x75, 0x0f, 0xa0, 0x19, 0xc4, 0xe3,
+ 0xcf, 0x0f, 0xc9, 0xe8, 0xc4, 0x26, 0x78, 0x0f, 0x17, 0xc9, 0xc5, 0x06,
+ 0xdb, 0x0f, 0x17, 0xc1, 0x15, 0xc1, 0xc6, 0x17, 0x08, 0xc1, 0xc6, 0x23,
+ 0x16, 0xc1, 0xc6, 0x2f, 0xc3, 0x05, 0x14, 0x0f, 0x17, 0x89, 0xc4, 0x15,
+ 0xe7, 0x0f, 0x17, 0x80, 0xc3, 0xd8, 0x41, 0x0f, 0x17, 0x73, 0x01, 0xc6,
+ 0x3b, 0xc3, 0x12, 0xe0, 0x0f, 0x17, 0x62, 0x01, 0xc6, 0x41, 0x1b, 0xc1,
+ 0xc6, 0x47, 0x97, 0x0f, 0x16, 0xf3, 0x01, 0xc6, 0x51, 0x10, 0xc1, 0xc6,
+ 0x57, 0x83, 0x0f, 0x16, 0x0b, 0x01, 0xc6, 0x67, 0x87, 0x0f, 0x16, 0xdb,
+ 0x01, 0xc6, 0x78, 0x91, 0x0f, 0x16, 0xab, 0x01, 0xc6, 0x7c, 0x8b, 0x0f,
+ 0x16, 0xe3, 0x01, 0xc6, 0x83, 0x16, 0xc1, 0xc6, 0x89, 0x0e, 0xc1, 0xc6,
+ 0x9f, 0xc2, 0x00, 0xd0, 0x0f, 0x16, 0xd1, 0x0d, 0xc1, 0xc6, 0xa9, 0xc2,
+ 0x01, 0xc3, 0x0f, 0x16, 0xc1, 0xc2, 0x00, 0x39, 0x0f, 0x16, 0xb9, 0xc2,
+ 0x02, 0x41, 0x0f, 0x16, 0x99, 0xc2, 0x01, 0x4a, 0x0f, 0x16, 0x91, 0xc2,
+ 0x02, 0x1c, 0x0f, 0x16, 0x89, 0xc2, 0x25, 0x3b, 0x0f, 0x16, 0x81, 0x15,
+ 0xc1, 0xc6, 0xb3, 0xc2, 0x00, 0x87, 0x0f, 0x16, 0x69, 0x12, 0xc1, 0xc6,
+ 0xbd, 0xc2, 0x01, 0x30, 0x0f, 0x16, 0x29, 0xc2, 0x0e, 0x9a, 0x0f, 0x16,
+ 0x21, 0xc2, 0x00, 0x64, 0x0f, 0x16, 0x19, 0xc2, 0x01, 0x5d, 0x0f, 0x16,
+ 0x10, 0xc6, 0x2a, 0xfe, 0x08, 0xc7, 0x91, 0xc6, 0xcf, 0x9b, 0x08, 0xc7,
+ 0x89, 0x15, 0xc1, 0xc6, 0xc7, 0x08, 0xc1, 0xc6, 0xd3, 0x16, 0x41, 0xc6,
+ 0xdf, 0xc4, 0x26, 0x78, 0x08, 0xc7, 0x49, 0xc5, 0x06, 0xdb, 0x08, 0xc7,
+ 0x41, 0x15, 0xc1, 0xc6, 0xf1, 0x08, 0xc1, 0xc6, 0xfd, 0x16, 0xc1, 0xc7,
+ 0x09, 0xc3, 0x05, 0x14, 0x08, 0xc7, 0x09, 0xc4, 0x15, 0xe7, 0x08, 0xc7,
+ 0x00, 0xc4, 0xdf, 0x7f, 0x08, 0xc6, 0xf9, 0x15, 0xc1, 0xc7, 0x15, 0x0a,
+ 0xc1, 0xc7, 0x21, 0xc2, 0x05, 0x1c, 0x08, 0xc6, 0xc1, 0xc2, 0x02, 0xaa,
+ 0x08, 0xc6, 0xb9, 0x83, 0x08, 0xc6, 0x0b, 0x01, 0xc7, 0x31, 0xc2, 0x0e,
+ 0x9a, 0x08, 0xc6, 0xa1, 0x10, 0xc1, 0xc7, 0x3f, 0xc3, 0x02, 0x10, 0x08,
+ 0xc6, 0x91, 0x91, 0x08, 0xc6, 0x4b, 0x01, 0xc7, 0x4b, 0x87, 0x08, 0xc6,
+ 0x43, 0x01, 0xc7, 0x51, 0x17, 0xc1, 0xc7, 0x55, 0x1b, 0xc1, 0xc7, 0x5d,
+ 0xc2, 0x00, 0xe8, 0x08, 0xc6, 0x61, 0xc2, 0x01, 0x30, 0x08, 0xc6, 0x59,
+ 0xc2, 0x25, 0x9f, 0x08, 0xc6, 0x31, 0xc2, 0x00, 0x8c, 0x08, 0xc6, 0x10,
+ 0xc4, 0xdf, 0x7f, 0x08, 0xc5, 0xf9, 0x15, 0xc1, 0xc7, 0x6c, 0x0a, 0xc1,
+ 0xc7, 0x78, 0xc2, 0x05, 0x1c, 0x08, 0xc5, 0xc1, 0xc2, 0x02, 0xaa, 0x08,
+ 0xc5, 0xb9, 0x83, 0x08, 0xc5, 0x0b, 0x01, 0xc7, 0x88, 0xc2, 0x0e, 0x9a,
+ 0x08, 0xc5, 0xa1, 0x10, 0xc1, 0xc7, 0x96, 0xc3, 0x02, 0x10, 0x08, 0xc5,
+ 0x91, 0x91, 0x08, 0xc5, 0x4b, 0x01, 0xc7, 0xa2, 0x87, 0x08, 0xc5, 0x43,
+ 0x01, 0xc7, 0xa8, 0x17, 0xc1, 0xc7, 0xac, 0x1b, 0xc1, 0xc7, 0xb4, 0xc2,
+ 0x00, 0xe8, 0x08, 0xc5, 0x61, 0xc2, 0x01, 0x30, 0x08, 0xc5, 0x59, 0xc2,
+ 0x25, 0x9f, 0x08, 0xc5, 0x31, 0xc2, 0x00, 0x8c, 0x08, 0xc5, 0x10, 0xc3,
+ 0x02, 0x6e, 0x01, 0x18, 0x39, 0xc7, 0x80, 0x2f, 0x07, 0xf2, 0x78, 0xc5,
+ 0x00, 0x2c, 0x01, 0x49, 0x99, 0xc4, 0x00, 0x49, 0x01, 0x59, 0xf8, 0xcf,
+ 0x1b, 0x25, 0x01, 0x02, 0xa9, 0xcc, 0x8c, 0x19, 0x0f, 0x9d, 0xa0, 0x05,
+ 0xc1, 0xc7, 0xc3, 0xd7, 0x15, 0x2e, 0x01, 0x39, 0x19, 0xd8, 0x21, 0x23,
+ 0x01, 0x39, 0x11, 0x44, 0x05, 0x18, 0xc1, 0xc7, 0xcf, 0xcb, 0x8d, 0xdc,
+ 0x0f, 0x9a, 0x01, 0xd2, 0x22, 0x49, 0x0f, 0xbe, 0x30, 0xcb, 0x93, 0x5c,
+ 0x0f, 0x9b, 0xe8, 0x00, 0xc1, 0xc7, 0xdb, 0xc9, 0xab, 0xd9, 0x0f, 0xb1,
+ 0xb0, 0xd7, 0x29, 0x6e, 0x0f, 0xb0, 0x59, 0xd0, 0x59, 0x32, 0x0f, 0xb1,
+ 0x88, 0xdf, 0x0d, 0x9b, 0x01, 0x36, 0xf1, 0x49, 0x0d, 0x20, 0x41, 0xc8,
+ 0x24, 0xe0, 0x06, 0x87, 0x01, 0x3d, 0x60, 0xc9, 0xb2, 0xa2, 0x0f, 0x98,
+ 0xe9, 0xc6, 0x00, 0x91, 0x0f, 0x98, 0xa8, 0xca, 0x5d, 0xa2, 0x07, 0xf8,
+ 0x19, 0xc7, 0x68, 0xc6, 0x07, 0xff, 0x10, 0xc7, 0x0b, 0x00, 0x07, 0xf8,
+ 0x51, 0xc8, 0x36, 0x21, 0x07, 0xf8, 0x31, 0xc9, 0x2d, 0x85, 0x07, 0xf8,
+ 0x38, 0x45, 0x09, 0x98, 0xc1, 0xc8, 0x30, 0xca, 0x99, 0x61, 0x07, 0xf8,
+ 0x20, 0x11, 0xc1, 0xc8, 0x54, 0xd0, 0x08, 0xf7, 0x07, 0xf9, 0xf1, 0xc8,
+ 0x8e, 0x16, 0x07, 0xff, 0x00, 0xc8, 0x52, 0x00, 0x07, 0xf8, 0xd9, 0xc6,
+ 0x27, 0x5e, 0x07, 0xf8, 0x78, 0x07, 0xc1, 0xc8, 0x60, 0x45, 0x0b, 0x12,
+ 0xc1, 0xc8, 0x6c, 0xc7, 0x80, 0x2f, 0x07, 0xf9, 0xf8, 0xca, 0x0e, 0xbe,
+ 0x07, 0xf8, 0xe9, 0xcf, 0x0f, 0x0a, 0x07, 0xf8, 0x08, 0xcf, 0x54, 0xbb,
+ 0x07, 0xf8, 0xf1, 0xca, 0x1f, 0x0e, 0x07, 0xfa, 0x00, 0xcb, 0x2c, 0xb4,
+ 0x07, 0xf8, 0xf9, 0xcc, 0x01, 0xbb, 0x07, 0xf8, 0x10, 0xce, 0x61, 0xd5,
+ 0x07, 0xf9, 0xe1, 0x45, 0x00, 0x2d, 0x41, 0xc8, 0x78, 0xc9, 0x9f, 0xc3,
+ 0x07, 0xff, 0x09, 0xcb, 0x8e, 0x13, 0x07, 0xf8, 0x29, 0xc8, 0x60, 0xf4,
+ 0x07, 0xf8, 0x58, 0x00, 0x41, 0xc8, 0x90, 0xc9, 0xa8, 0x28, 0x0f, 0x9c,
+ 0x39, 0x95, 0x0f, 0x9c, 0x30, 0xc5, 0x91, 0x52, 0x0f, 0xb4, 0x91, 0xcb,
+ 0x92, 0xf9, 0x0f, 0xcf, 0x78, 0x49, 0xb2, 0xcf, 0xc1, 0xc8, 0x9c, 0xc2,
+ 0x00, 0xac, 0x0b, 0x7a, 0x50, 0x44, 0x1a, 0xce, 0xc1, 0xc8, 0xa8, 0x15,
+ 0xc1, 0xc8, 0xc4, 0x87, 0x0b, 0x7a, 0x41, 0x42, 0x07, 0x26, 0xc1, 0xc8,
+ 0xd8, 0xc2, 0x01, 0x6f, 0x0b, 0x78, 0x71, 0x83, 0x0b, 0x78, 0x50, 0x83,
+ 0x0b, 0x78, 0x83, 0x01, 0xc8, 0xe2, 0x1b, 0xc1, 0xc8, 0xe8, 0x09, 0xc1,
+ 0xc8, 0xf2, 0x10, 0xc1, 0xc8, 0xfc, 0xc2, 0x00, 0xd0, 0x0b, 0x78, 0x88,
+ 0x1c, 0xc1, 0xc9, 0x06, 0x42, 0x07, 0x26, 0xc1, 0xc9, 0x1c, 0xc2, 0x0e,
+ 0x9a, 0x0b, 0x78, 0x79, 0x83, 0x0b, 0x78, 0x58, 0xc2, 0x16, 0x5a, 0x0b,
+ 0x7a, 0x31, 0x83, 0x0b, 0x79, 0xd1, 0xc2, 0x0d, 0xf6, 0x0b, 0x79, 0xa1,
+ 0xc2, 0x00, 0xd0, 0x0b, 0x79, 0x98, 0xc2, 0x00, 0x2c, 0x0b, 0x7a, 0x29,
+ 0x83, 0x0b, 0x78, 0x08, 0xc2, 0x00, 0xd0, 0x0b, 0x7a, 0x21, 0x83, 0x0b,
+ 0x79, 0x30, 0x8a, 0x0b, 0x7a, 0x19, 0x47, 0x78, 0xc0, 0x41, 0xc9, 0x26,
+ 0x1c, 0xc1, 0xc9, 0x36, 0x15, 0xc1, 0xc9, 0x44, 0x83, 0x0b, 0x79, 0xd9,
+ 0xc2, 0x00, 0xd0, 0x0b, 0x79, 0xa8, 0x16, 0xc1, 0xc9, 0x4e, 0xc4, 0xe2,
+ 0x83, 0x0b, 0x79, 0x89, 0xc2, 0x02, 0x2b, 0x0b, 0x79, 0x01, 0xc3, 0x3a,
+ 0x09, 0x0b, 0x78, 0x91, 0xc2, 0x00, 0xb0, 0x0b, 0x78, 0x10, 0x0a, 0xc1,
+ 0xc9, 0x5c, 0x83, 0x0b, 0x78, 0xf8, 0xc2, 0x01, 0x30, 0x0b, 0x79, 0x11,
+ 0x83, 0x0b, 0x79, 0x08, 0x0a, 0xc1, 0xc9, 0x66, 0xc2, 0x19, 0x2c, 0x0b,
+ 0x78, 0xb9, 0x83, 0x0b, 0x78, 0xb0, 0xc2, 0x00, 0x87, 0x0b, 0x78, 0x49,
+ 0x83, 0x0b, 0x78, 0x40, 0xc2, 0x00, 0xd0, 0x0b, 0x78, 0x29, 0x83, 0x0b,
+ 0x78, 0x20, 0xc2, 0x00, 0xdb, 0x0b, 0x78, 0x19, 0x83, 0x0b, 0x78, 0x00,
+ 0x8b, 0x0b, 0x7c, 0x39, 0xc2, 0x13, 0x38, 0x0b, 0x7b, 0xf9, 0xc2, 0x00,
+ 0x75, 0x0b, 0x7b, 0x81, 0xc2, 0x06, 0xdb, 0x0b, 0x7b, 0x79, 0x97, 0x0b,
+ 0x7b, 0x71, 0x83, 0x0b, 0x7b, 0x5a, 0x01, 0xc9, 0x70, 0x91, 0x0b, 0x7b,
+ 0x2b, 0x01, 0xc9, 0x77, 0x89, 0x0b, 0x7c, 0x21, 0xc2, 0x00, 0x75, 0x0b,
+ 0x7b, 0x49, 0x97, 0x0b, 0x7b, 0x41, 0x8b, 0x0b, 0x7b, 0x39, 0x87, 0x0b,
+ 0x7b, 0x31, 0x83, 0x0b, 0x7b, 0x12, 0x01, 0xc9, 0x7d, 0x83, 0x0b, 0x7c,
+ 0x29, 0x8b, 0x0b, 0x7b, 0xd1, 0x94, 0x0b, 0x7b, 0xbb, 0x01, 0xc9, 0x84,
+ 0x90, 0x0b, 0x7a, 0xf2, 0x01, 0xc9, 0x88, 0x07, 0xc1, 0xc9, 0x8c, 0x89,
+ 0x0b, 0x7c, 0x09, 0x97, 0x0b, 0x7b, 0xe1, 0x91, 0x0b, 0x7a, 0xd0, 0xc2,
+ 0x03, 0xd4, 0x0b, 0x7c, 0x01, 0x8b, 0x0b, 0x7b, 0x90, 0x89, 0x0b, 0x7b,
+ 0xf0, 0x97, 0x0b, 0x7b, 0xd9, 0x8b, 0x0b, 0x7b, 0xc9, 0x87, 0x0b, 0x7b,
+ 0x9b, 0x01, 0xc9, 0x94, 0x90, 0x0b, 0x7a, 0xbb, 0x01, 0xc9, 0x98, 0xc2,
+ 0x61, 0x75, 0x0b, 0x7a, 0xb1, 0x83, 0x0b, 0x7a, 0xa8, 0x94, 0x0b, 0x7b,
+ 0xb0, 0x91, 0x0b, 0x7a, 0xd8, 0xca, 0xa1, 0x84, 0x0b, 0x7a, 0x99, 0xc7,
+ 0xc1, 0x62, 0x0b, 0x7a, 0x90, 0xc5, 0x1e, 0xc8, 0x01, 0x12, 0x11, 0xc4,
+ 0x00, 0xba, 0x01, 0x10, 0x92, 0x01, 0xc9, 0x9c, 0x4e, 0x75, 0x20, 0xc1,
+ 0xc9, 0xa0, 0xcb, 0x58, 0xc7, 0x0f, 0xbd, 0x19, 0x46, 0x01, 0xfc, 0xc1,
+ 0xc9, 0xac, 0x04, 0xc1, 0xc9, 0xb8, 0x45, 0x00, 0x2c, 0xc1, 0xc9, 0xc4,
+ 0x44, 0x00, 0x49, 0xc1, 0xc9, 0xce, 0x08, 0xc1, 0xc9, 0xd8, 0xcc, 0x07,
+ 0xbb, 0x01, 0x3a, 0xc9, 0x15, 0xc1, 0xc9, 0xea, 0xd2, 0x4c, 0x91, 0x01,
+ 0x02, 0xf9, 0x46, 0x0f, 0x88, 0x41, 0xca, 0x02, 0xc5, 0x0a, 0x8a, 0x01,
+ 0x72, 0x61, 0xd0, 0x0f, 0x09, 0x01, 0x72, 0x99, 0xcd, 0x2c, 0xb2, 0x01,
+ 0x72, 0xa0, 0xca, 0x9c, 0x70, 0x0b, 0x74, 0xc9, 0x4c, 0x29, 0xba, 0x41,
+ 0xca, 0x0e, 0xc4, 0x0a, 0x8b, 0x0b, 0x74, 0xb9, 0x4e, 0x0b, 0x18, 0x41,
+ 0xca, 0x88, 0x16, 0xc1, 0xcb, 0x02, 0xc3, 0x05, 0x14, 0x0b, 0x74, 0x0b,
+ 0x01, 0xcb, 0x14, 0xc4, 0x26, 0x78, 0x0b, 0x74, 0x49, 0xc5, 0x06, 0xdb,
+ 0x0b, 0x74, 0x41, 0x15, 0xc1, 0xcb, 0x1a, 0x08, 0xc1, 0xcb, 0x26, 0xc4,
+ 0x15, 0xe7, 0x0b, 0x74, 0x00, 0xc8, 0x4b, 0x5f, 0x0b, 0x74, 0x99, 0x07,
+ 0xc1, 0xcb, 0x32, 0x15, 0xc1, 0xcb, 0x3e, 0x08, 0xc1, 0xcb, 0x4a, 0x16,
+ 0x41, 0xcb, 0x56, 0xc8, 0xb5, 0x5a, 0x01, 0x1e, 0xc1, 0xc6, 0xcd, 0xe5,
+ 0x01, 0x1e, 0xb9, 0x4a, 0x9b, 0x12, 0x41, 0xcb, 0x68, 0xca, 0x9c, 0x16,
+ 0x01, 0x1e, 0xa1, 0xc5, 0x2e, 0xee, 0x01, 0x1e, 0x90, 0x1d, 0xc1, 0xcb,
+ 0x74, 0x1e, 0x41, 0xcb, 0x9c, 0xc3, 0x05, 0x14, 0x0f, 0x46, 0x39, 0x16,
+ 0xc1, 0xcb, 0xc4, 0x08, 0xc1, 0xcb, 0xd0, 0x15, 0xc1, 0xcb, 0xdc, 0xc5,
+ 0x06, 0xdb, 0x0f, 0x46, 0x71, 0xc4, 0x26, 0x78, 0x0f, 0x46, 0x78, 0x16,
+ 0xc1, 0xcb, 0xe8, 0x47, 0x0d, 0x04, 0xc1, 0xcb, 0xf2, 0xc8, 0x33, 0xee,
+ 0x0f, 0x46, 0xb0, 0x49, 0x53, 0xa9, 0xc1, 0xcb, 0xfc, 0x47, 0x34, 0x2f,
+ 0xc1, 0xcc, 0x18, 0x0e, 0x41, 0xcc, 0x3f, 0xcb, 0x91, 0x99, 0x08, 0x4c,
+ 0xf3, 0x01, 0xcc, 0x4b, 0x47, 0x02, 0x0e, 0x41, 0xcc, 0x51, 0x00, 0x41,
+ 0xcc, 0xb3, 0xc2, 0x02, 0xa0, 0x05, 0x5f, 0x91, 0xc4, 0x02, 0xde, 0x05,
+ 0x5f, 0x98, 0xc3, 0x09, 0x9e, 0x05, 0x5f, 0xa1, 0xc3, 0x0d, 0x14, 0x05,
+ 0x5f, 0xa8, 0xc2, 0x22, 0xcc, 0x05, 0x5f, 0xb1, 0xc4, 0x18, 0x10, 0x05,
+ 0x5f, 0xb8, 0xc4, 0xe4, 0x73, 0x05, 0x5f, 0x51, 0xc7, 0xc6, 0x16, 0x05,
+ 0x5f, 0x49, 0xc5, 0xd5, 0x3d, 0x05, 0x5f, 0x31, 0x03, 0xc1, 0xcc, 0xbf,
+ 0x0b, 0xc1, 0xcc, 0xcd, 0xc4, 0xbd, 0x08, 0x05, 0x5f, 0x19, 0xc7, 0x40,
+ 0xe5, 0x05, 0x57, 0xa9, 0x17, 0xc1, 0xcc, 0xd7, 0xc6, 0xce, 0x4b, 0x05,
+ 0x5f, 0x38, 0x8b, 0x05, 0x5e, 0x7b, 0x01, 0xcc, 0xe1, 0x10, 0xc1, 0xcc,
+ 0xe7, 0x16, 0xc1, 0xcd, 0x03, 0x12, 0xc1, 0xcd, 0x16, 0x0d, 0xc1, 0xcd,
+ 0x23, 0x04, 0xc1, 0xcd, 0x32, 0x06, 0xc1, 0xcd, 0x3c, 0x09, 0xc1, 0xcd,
+ 0x4c, 0x15, 0xc1, 0xcd, 0x58, 0x42, 0x11, 0xee, 0xc1, 0xcd, 0x6a, 0x91,
+ 0x05, 0x57, 0x09, 0x87, 0x05, 0x57, 0x01, 0xc3, 0x18, 0x95, 0x05, 0x5e,
+ 0xa1, 0xc5, 0xd5, 0x92, 0x05, 0x5e, 0x89, 0xc2, 0x05, 0x1d, 0x05, 0x5e,
+ 0x71, 0xc3, 0xcc, 0x38, 0x05, 0x5e, 0x69, 0xc4, 0xb0, 0x02, 0x05, 0x5e,
+ 0x61, 0xc3, 0x27, 0x01, 0x05, 0x5e, 0x1b, 0x01, 0xcd, 0x74, 0xc3, 0x02,
+ 0xf9, 0x05, 0x5e, 0x13, 0x01, 0xcd, 0x7a, 0xc3, 0x0c, 0x26, 0x05, 0x5e,
+ 0x59, 0x0c, 0x41, 0xcd, 0x80, 0xc7, 0xc0, 0x82, 0x0f, 0xb7, 0xa9, 0xc4,
+ 0xd0, 0x81, 0x0f, 0xb7, 0x28, 0x00, 0x41, 0xcd, 0x8c, 0xc4, 0x00, 0x87,
+ 0x0f, 0xa1, 0x69, 0xc4, 0xd0, 0xf1, 0x0f, 0xd5, 0x20, 0xc5, 0x61, 0xc0,
+ 0x0e, 0x98, 0x01, 0x1b, 0x41, 0xcd, 0x9e, 0x46, 0x45, 0x87, 0xc1, 0xcd,
+ 0xaa, 0xd9, 0x1e, 0x69, 0x08, 0xb3, 0x19, 0xcf, 0x62, 0x5b, 0x00, 0xc0,
+ 0x30, 0xca, 0x01, 0x28, 0x08, 0xb3, 0x4b, 0x01, 0xcd, 0xb0, 0xdc, 0x14,
+ 0x85, 0x00, 0xc0, 0x38, 0xd5, 0x01, 0x32, 0x08, 0xb3, 0x40, 0x46, 0x00,
+ 0x8b, 0x41, 0xcd, 0xb6, 0x46, 0x00, 0x8b, 0x41, 0xcd, 0xc2, 0xd9, 0x1e,
+ 0x9b, 0x08, 0xb3, 0x11, 0x45, 0x09, 0x98, 0x41, 0xcd, 0xce, 0xc2, 0x01,
+ 0xc3, 0x00, 0xc1, 0x73, 0x01, 0xcd, 0xf2, 0x83, 0x00, 0xc1, 0x03, 0x01,
+ 0xcd, 0xf8, 0x16, 0xc1, 0xce, 0x04, 0x42, 0x11, 0xee, 0xc1, 0xce, 0x14,
+ 0x15, 0xc1, 0xce, 0x1f, 0x1c, 0xc1, 0xce, 0x2f, 0x0e, 0xc1, 0xce, 0x3f,
+ 0xc3, 0x39, 0x6e, 0x00, 0xc1, 0xf1, 0x0d, 0xc1, 0xce, 0x49, 0xc2, 0x00,
+ 0x87, 0x00, 0xc1, 0xc9, 0xc2, 0x01, 0x4a, 0x00, 0xc1, 0xc1, 0xc2, 0x00,
+ 0x39, 0x00, 0xc1, 0xb9, 0xc2, 0x19, 0x2c, 0x00, 0xc1, 0xb1, 0xc2, 0x25,
+ 0x3b, 0x00, 0xc1, 0xa9, 0xc2, 0x0e, 0x9a, 0x00, 0xc1, 0x99, 0xc2, 0x01,
+ 0x30, 0x00, 0xc1, 0x69, 0xc2, 0x0f, 0x9a, 0x00, 0xc1, 0x61, 0xc2, 0x00,
+ 0xb0, 0x00, 0xc1, 0x59, 0xc2, 0x01, 0x5d, 0x00, 0xc1, 0x51, 0xc2, 0x00,
+ 0xc1, 0x00, 0xc1, 0x41, 0x87, 0x00, 0xc1, 0x0b, 0x01, 0xce, 0x53, 0x97,
+ 0x00, 0xc1, 0x23, 0x01, 0xce, 0x57, 0x91, 0x00, 0xc1, 0x1b, 0x01, 0xce,
+ 0x5b, 0x8b, 0x00, 0xc1, 0x10, 0x57, 0x28, 0x43, 0xc1, 0xce, 0x5f, 0xc8,
+ 0x3b, 0x7a, 0x00, 0xc0, 0x29, 0xc8, 0x11, 0xf7, 0x00, 0xc0, 0x18, 0xc9,
+ 0x11, 0xf6, 0x00, 0xc0, 0x49, 0xc5, 0x0a, 0x8a, 0x00, 0xc0, 0x40, 0xc3,
+ 0x0d, 0xe5, 0x00, 0xc0, 0x21, 0xc3, 0x0a, 0x8c, 0x00, 0xc0, 0x10, 0xca,
+ 0xa0, 0xf8, 0x0f, 0xa5, 0xc1, 0xc3, 0x32, 0x20, 0x0f, 0xa5, 0x80, 0x06,
+ 0xc1, 0xce, 0x6f, 0x45, 0x00, 0xba, 0xc1, 0xce, 0x81, 0xd1, 0x50, 0xce,
+ 0x08, 0xb2, 0x19, 0x4b, 0x6f, 0xc7, 0xc1, 0xce, 0x91, 0x47, 0x02, 0x0e,
+ 0x41, 0xce, 0xb1, 0x47, 0x02, 0x0e, 0xc1, 0xcf, 0x16, 0xd9, 0x1d, 0x88,
+ 0x05, 0x5a, 0xd8, 0x48, 0x0b, 0x17, 0xc1, 0xcf, 0x5c, 0x12, 0xc1, 0xcf,
+ 0xfd, 0xca, 0x9c, 0xac, 0x0e, 0xb8, 0xd1, 0xcc, 0x8b, 0x65, 0x0e, 0xb8,
+ 0xc1, 0xcc, 0x89, 0xfd, 0x0e, 0xb8, 0xb9, 0xce, 0x10, 0x3e, 0x0e, 0xb8,
+ 0xb1, 0x46, 0x03, 0x13, 0xc1, 0xd0, 0x0f, 0xc5, 0xdb, 0xf0, 0x0e, 0xb7,
+ 0xd8, 0x15, 0xc1, 0xd0, 0xaf, 0x46, 0x09, 0x97, 0xc1, 0xd0, 0xbb, 0x48,
+ 0x0b, 0x17, 0xc1, 0xd0, 0xdf, 0x47, 0xc7, 0x4a, 0xc1, 0xd1, 0x80, 0x12,
+ 0xc1, 0xd1, 0xae, 0xca, 0x9c, 0xac, 0x0e, 0xb7, 0x01, 0xcc, 0x8b, 0x65,
+ 0x0e, 0xb6, 0xf1, 0xcc, 0x89, 0xfd, 0x0e, 0xb6, 0xe9, 0xce, 0x10, 0x3e,
+ 0x0e, 0xb6, 0xe1, 0xc5, 0xdb, 0xf0, 0x0e, 0xb6, 0x09, 0x48, 0xbd, 0x42,
+ 0x41, 0xd1, 0xc0, 0x46, 0x09, 0x97, 0xc1, 0xd1, 0xcc, 0x46, 0x03, 0x13,
+ 0xc1, 0xd1, 0xf0, 0x48, 0x0b, 0x17, 0x41, 0xd2, 0x58, 0x4a, 0x43, 0x55,
+ 0xc1, 0xd2, 0xc0, 0x46, 0x07, 0x2f, 0x41, 0xd2, 0xde, 0x46, 0x09, 0x97,
+ 0xc1, 0xd2, 0xea, 0x46, 0x03, 0x13, 0xc1, 0xd3, 0x0e, 0x48, 0x0b, 0x17,
+ 0x41, 0xd3, 0x76, 0x47, 0xbd, 0x43, 0xc1, 0xd3, 0xc2, 0xcf, 0x35, 0x0c,
+ 0x01, 0x3e, 0x68, 0x44, 0x00, 0x2e, 0xc1, 0xd3, 0xce, 0xcd, 0x27, 0x2f,
+ 0x01, 0x3e, 0x58, 0xd5, 0x35, 0x36, 0x01, 0x3f, 0x71, 0x46, 0x01, 0xfc,
+ 0xc1, 0xd3, 0xe6, 0xd4, 0x38, 0xf4, 0x01, 0x3f, 0x51, 0xcd, 0x0b, 0x91,
+ 0x01, 0x3f, 0x40, 0xc3, 0x03, 0x26, 0x0e, 0x97, 0x90, 0xc4, 0x14, 0x09,
+ 0x0e, 0x97, 0x88, 0xc4, 0x14, 0x09, 0x0e, 0x97, 0x80, 0xc5, 0x14, 0x08,
+ 0x0e, 0x97, 0x79, 0xc2, 0x00, 0x5f, 0x0e, 0x97, 0x28, 0xc4, 0x14, 0x09,
+ 0x0e, 0x97, 0x70, 0xc6, 0x52, 0xcd, 0x0e, 0x97, 0x69, 0xc3, 0x02, 0xdf,
+ 0x0e, 0x97, 0x18, 0xc4, 0x22, 0x44, 0x0e, 0x97, 0x61, 0x91, 0x0e, 0x97,
+ 0x10, 0xc2, 0x19, 0x2c, 0x08, 0xf7, 0x59, 0x83, 0x08, 0xf7, 0x41, 0xc2,
+ 0x01, 0x30, 0x08, 0xf7, 0x10, 0xc4, 0x26, 0x78, 0x08, 0xea, 0xc9, 0xc5,
+ 0x06, 0xdb, 0x08, 0xea, 0xc1, 0x15, 0xc1, 0xd3, 0xf2, 0x08, 0xc1, 0xd3,
+ 0xfe, 0x16, 0xc1, 0xd4, 0x0a, 0xc3, 0x05, 0x14, 0x08, 0xea, 0x89, 0xc4,
+ 0x15, 0xe7, 0x08, 0xea, 0x80, 0xc6, 0xd1, 0x39, 0x08, 0xea, 0x39, 0xc4,
+ 0xbb, 0x54, 0x08, 0xea, 0x30, 0xc5, 0x1e, 0x96, 0x08, 0xea, 0x29, 0x4a,
+ 0x6f, 0xc8, 0x41, 0xd4, 0x16, 0xc7, 0xc3, 0xa7, 0x08, 0xea, 0x21, 0xc6,
+ 0x1e, 0x89, 0x08, 0xea, 0x19, 0xc5, 0x33, 0x5d, 0x08, 0xea, 0x11, 0xc7,
+ 0x40, 0xe5, 0x08, 0xea, 0x09, 0xc8, 0x14, 0x38, 0x08, 0xea, 0x00, 0x16,
+ 0xc1, 0xd4, 0x36, 0x0c, 0xc1, 0xd4, 0x4a, 0x0d, 0xc1, 0xd4, 0x5a, 0x0e,
+ 0xc1, 0xd4, 0x6a, 0xc2, 0x00, 0xd0, 0x08, 0xe9, 0x61, 0x15, 0xc1, 0xd4,
+ 0x74, 0xc2, 0x02, 0x41, 0x08, 0xe9, 0x41, 0xc2, 0x00, 0x39, 0x08, 0xe9,
+ 0x31, 0xc2, 0x19, 0x2c, 0x08, 0xe9, 0x29, 0xc2, 0x01, 0xc3, 0x08, 0xe9,
+ 0x21, 0x04, 0xc1, 0xd4, 0x84, 0x12, 0xc1, 0xd4, 0x8e, 0x10, 0xc1, 0xd4,
+ 0x98, 0x06, 0xc1, 0xd4, 0xae, 0x05, 0xc1, 0xd4, 0xbc, 0x09, 0xc1, 0xd4,
+ 0xc6, 0x83, 0x08, 0xe8, 0x03, 0x01, 0xd4, 0xd0, 0x91, 0x08, 0xe8, 0x49,
+ 0x87, 0x08, 0xe8, 0x31, 0x97, 0x08, 0xe8, 0x23, 0x01, 0xd4, 0xdc, 0x8b,
+ 0x08, 0xe8, 0x12, 0x01, 0xd4, 0xe0, 0x44, 0x00, 0xbb, 0xc1, 0xd4, 0xe4,
+ 0x50, 0x5c, 0xf2, 0x41, 0xd4, 0xf0, 0x91, 0x08, 0xe5, 0xa1, 0x87, 0x08,
+ 0xe5, 0x99, 0x97, 0x08, 0xe5, 0x91, 0x8b, 0x08, 0xe5, 0x89, 0xc2, 0x04,
+ 0xc6, 0x08, 0xe5, 0x80, 0x83, 0x08, 0xe4, 0x79, 0xc2, 0x00, 0xd0, 0x08,
+ 0xe4, 0x71, 0x15, 0xc1, 0xd5, 0x4a, 0xc2, 0x00, 0xdb, 0x08, 0xe4, 0x59,
+ 0xc2, 0x00, 0x39, 0x08, 0xe4, 0x51, 0xc2, 0x19, 0x2c, 0x08, 0xe4, 0x49,
+ 0xc2, 0x00, 0x02, 0x08, 0xe4, 0x41, 0x1c, 0xc1, 0xd5, 0x54, 0xc2, 0x01,
+ 0x4a, 0x08, 0xe4, 0x29, 0x06, 0xc1, 0xd5, 0x5e, 0x16, 0xc1, 0xd5, 0x68,
+ 0xc2, 0x01, 0xc3, 0x08, 0xe4, 0x09, 0xc2, 0x01, 0x5d, 0x08, 0xe4, 0x01,
+ 0x12, 0xc1, 0xd5, 0x76, 0x10, 0xc1, 0xd5, 0x80, 0xc2, 0x25, 0x3b, 0x08,
+ 0xe3, 0xc1, 0x05, 0xc1, 0xd5, 0x90, 0xc2, 0x01, 0x30, 0x08, 0xe3, 0xa1,
+ 0x0d, 0x41, 0xd5, 0x9a, 0xd8, 0x20, 0xf3, 0x01, 0x35, 0x39, 0xc4, 0x00,
+ 0xba, 0x01, 0x35, 0x30, 0x05, 0xc1, 0xd5, 0xa4, 0x03, 0xc1, 0xd5, 0xb6,
+ 0x18, 0xc1, 0xd5, 0xc2, 0xc4, 0x00, 0xb0, 0x00, 0x6a, 0x78, 0x18, 0xc1,
+ 0xd5, 0xcc, 0x83, 0x00, 0x68, 0x2b, 0x01, 0xd5, 0xdc, 0x8b, 0x00, 0x68,
+ 0x3b, 0x01, 0xd5, 0xee, 0x97, 0x00, 0x68, 0x4b, 0x01, 0xd5, 0xf2, 0x87,
+ 0x00, 0x68, 0x73, 0x01, 0xd5, 0xf6, 0x91, 0x00, 0x68, 0x93, 0x01, 0xd5,
+ 0xfa, 0x0d, 0xc1, 0xd5, 0xfe, 0x09, 0xc1, 0xd6, 0x08, 0x10, 0xc1, 0xd6,
+ 0x12, 0x05, 0xc1, 0xd6, 0x26, 0x0c, 0xc1, 0xd6, 0x2e, 0x16, 0xc1, 0xd6,
+ 0x38, 0x06, 0xc1, 0xd6, 0x46, 0x12, 0xc1, 0xd6, 0x5a, 0x04, 0xc1, 0xd6,
+ 0x64, 0xc2, 0x01, 0xc3, 0x00, 0x69, 0x71, 0xc2, 0x19, 0x2c, 0x00, 0x69,
+ 0x79, 0x14, 0xc1, 0xd6, 0x6e, 0x0e, 0xc1, 0xd6, 0x78, 0x15, 0xc1, 0xd6,
+ 0x80, 0xc2, 0x00, 0xd0, 0x00, 0x69, 0xc8, 0x03, 0xc1, 0xd6, 0x90, 0x8b,
+ 0x00, 0x69, 0xfb, 0x01, 0xd6, 0x9c, 0x97, 0x00, 0x6a, 0x0b, 0x01, 0xd6,
+ 0xa0, 0x48, 0xb2, 0x2d, 0xc1, 0xd6, 0xa4, 0x87, 0x00, 0x6a, 0x33, 0x01,
+ 0xd6, 0xb2, 0x91, 0x00, 0x6a, 0x52, 0x01, 0xd6, 0xb6, 0x44, 0x05, 0x14,
+ 0xc1, 0xd6, 0xba, 0x46, 0x02, 0xdd, 0x41, 0xd6, 0xe0, 0x45, 0x09, 0x98,
+ 0xc1, 0xd6, 0xf8, 0xc8, 0xbc, 0xda, 0x00, 0x6b, 0xc8, 0xc3, 0x09, 0x41,
+ 0x00, 0x6b, 0x81, 0x44, 0x05, 0x14, 0x41, 0xd7, 0x1c, 0xcb, 0x92, 0x07,
+ 0x08, 0x57, 0xb1, 0xc8, 0x02, 0x9f, 0x08, 0x57, 0xa9, 0x42, 0x00, 0x58,
+ 0xc1, 0xd7, 0x28, 0xc7, 0x2c, 0xab, 0x08, 0x57, 0x89, 0xc4, 0x0e, 0x6a,
+ 0x08, 0x57, 0x80, 0xc3, 0x05, 0x14, 0x08, 0x57, 0x5b, 0x01, 0xd7, 0x35,
+ 0x16, 0xc1, 0xd7, 0x3b, 0xc4, 0x0d, 0x13, 0x08, 0x57, 0x60, 0xc5, 0x05,
+ 0x02, 0x08, 0x57, 0x31, 0xc5, 0x00, 0xd4, 0x08, 0x57, 0x28, 0x16, 0xc1,
+ 0xd7, 0x47, 0x15, 0xc1, 0xd7, 0x59, 0xc4, 0x5d, 0xe2, 0x08, 0x57, 0x09,
+ 0x13, 0xc1, 0xd7, 0x69, 0x1a, 0xc1, 0xd7, 0x75, 0xc2, 0x14, 0xda, 0x08,
+ 0x56, 0xe1, 0xc2, 0x00, 0x67, 0x08, 0x56, 0xd9, 0x03, 0xc1, 0xd7, 0x81,
+ 0xc3, 0x20, 0x18, 0x08, 0x56, 0xb9, 0xc3, 0x00, 0x4e, 0x08, 0x56, 0xb1,
+ 0x06, 0xc1, 0xd7, 0x93, 0xc6, 0xcf, 0xd7, 0x08, 0x56, 0x99, 0x0d, 0xc1,
+ 0xd7, 0x9f, 0xc4, 0x4a, 0xb9, 0x08, 0x56, 0x79, 0xc2, 0x01, 0x7f, 0x08,
+ 0x56, 0x33, 0x01, 0xd7, 0xab, 0x0c, 0xc1, 0xd7, 0xb1, 0x1c, 0xc1, 0xd7,
+ 0xbd, 0xc3, 0x7e, 0x89, 0x08, 0x56, 0x39, 0x09, 0xc1, 0xd7, 0xc9, 0x04,
+ 0x41, 0xd7, 0xd5, 0xd8, 0x22, 0xd3, 0x0f, 0xab, 0xa1, 0xc6, 0xd1, 0xdb,
+ 0x0f, 0xc9, 0xa8, 0xc6, 0xd2, 0x9b, 0x0f, 0xa3, 0x99, 0xca, 0xa1, 0x66,
+ 0x0f, 0xa3, 0x90, 0x03, 0xc1, 0xd7, 0xe1, 0xc3, 0xa7, 0x52, 0x00, 0x42,
+ 0xb9, 0xc8, 0xb9, 0xc2, 0x00, 0x42, 0xb1, 0x0b, 0xc1, 0xd8, 0x28, 0xc7,
+ 0xb9, 0xc3, 0x00, 0x42, 0x29, 0xc5, 0xd6, 0xc3, 0x00, 0x42, 0x00, 0xcc,
+ 0x85, 0xd1, 0x08, 0x8b, 0xb1, 0x46, 0x02, 0x0f, 0x41, 0xd8, 0x30, 0xcb,
+ 0x45, 0x8e, 0x08, 0x8b, 0xa9, 0xc9, 0xad, 0xb6, 0x08, 0x8b, 0x98, 0xc5,
+ 0x06, 0xbb, 0x0f, 0x81, 0x49, 0xc8, 0xb5, 0xa2, 0x0f, 0x80, 0x11, 0xcb,
+ 0x8f, 0x3c, 0x0f, 0x80, 0x30, 0xc8, 0xbd, 0xa2, 0x0f, 0x80, 0x01, 0x48,
+ 0xae, 0x47, 0x41, 0xd8, 0x8a, 0xc9, 0xab, 0xbe, 0x0f, 0x80, 0x09, 0x46,
+ 0xd1, 0xf9, 0xc1, 0xd8, 0x94, 0x48, 0xb5, 0x32, 0xc1, 0xd8, 0x9e, 0xc5,
+ 0xc1, 0x78, 0x0f, 0x81, 0x31, 0xc5, 0xda, 0x60, 0x0f, 0x81, 0x38, 0xc9,
+ 0xac, 0x06, 0x0f, 0x80, 0x19, 0x47, 0xbb, 0x83, 0x41, 0xd8, 0xa8, 0x46,
+ 0xbb, 0x84, 0xc1, 0xd8, 0xb2, 0xc5, 0xd6, 0xf0, 0x0f, 0x81, 0x18, 0x46,
+ 0xd2, 0xe9, 0xc1, 0xd8, 0xbc, 0x48, 0xbe, 0x4a, 0x41, 0xd8, 0xc6, 0x47,
+ 0xc5, 0x7c, 0xc1, 0xd8, 0xd0, 0x47, 0xc7, 0x2e, 0x41, 0xd8, 0xda, 0xc2,
+ 0x00, 0x3b, 0x0f, 0x81, 0x59, 0xc4, 0x8e, 0x88, 0x0f, 0x81, 0x20, 0x15,
+ 0xc1, 0xd8, 0xe4, 0xc8, 0x87, 0xb5, 0x0f, 0x9d, 0xcb, 0x01, 0xd8, 0xf0,
+ 0xc4, 0x23, 0x2e, 0x0f, 0x9d, 0xa8, 0xca, 0xa2, 0xba, 0x01, 0x33, 0x79,
+ 0xcc, 0x83, 0xf1, 0x01, 0x33, 0x71, 0xc9, 0xb3, 0xb0, 0x01, 0x33, 0x68,
+ 0x48, 0x1f, 0x1f, 0xc1, 0xd8, 0xf6, 0xcf, 0x65, 0x2b, 0x0f, 0x9d, 0xb0,
+ 0x00, 0x41, 0xd9, 0x03, 0x14, 0xc1, 0xd9, 0x0f, 0xc2, 0x00, 0xd0, 0x08,
+ 0x95, 0x31, 0xc2, 0x0d, 0xf6, 0x08, 0x95, 0x29, 0xc2, 0x02, 0x41, 0x08,
+ 0x95, 0x21, 0xc2, 0x00, 0xdb, 0x08, 0x95, 0x19, 0xc2, 0x19, 0x2c, 0x08,
+ 0x95, 0x09, 0xc2, 0x01, 0xc3, 0x08, 0x95, 0x01, 0x04, 0xc1, 0xd9, 0x1f,
+ 0x12, 0xc1, 0xd9, 0x29, 0x10, 0xc1, 0xd9, 0x33, 0x06, 0xc1, 0xd9, 0x43,
+ 0x16, 0xc1, 0xd9, 0x51, 0x0c, 0xc1, 0xd9, 0x5f, 0x05, 0xc1, 0xd9, 0x69,
+ 0x09, 0xc1, 0xd9, 0x73, 0x0d, 0xc1, 0xd9, 0x7d, 0x87, 0x08, 0x94, 0x19,
+ 0x83, 0x08, 0x94, 0x01, 0x8b, 0x08, 0x94, 0x09, 0x97, 0x08, 0x94, 0x10,
+ 0xc4, 0x18, 0x10, 0x0b, 0x53, 0x39, 0xc2, 0x22, 0xcc, 0x0b, 0x53, 0x30,
+ 0xc3, 0x0d, 0x14, 0x0b, 0x53, 0x29, 0xc3, 0x09, 0x9e, 0x0b, 0x53, 0x20,
+ 0xc4, 0x02, 0xde, 0x0b, 0x53, 0x19, 0xc2, 0x02, 0xa0, 0x0b, 0x53, 0x10,
+ 0xa2, 0x05, 0x53, 0xe9, 0x9f, 0x05, 0x53, 0xe0, 0x44, 0x00, 0xd0, 0xc1,
+ 0xd9, 0x87, 0xc6, 0x00, 0x41, 0x00, 0x82, 0x58, 0xc7, 0x14, 0x39, 0x00,
+ 0x81, 0xb1, 0xc3, 0x89, 0x6c, 0x00, 0x81, 0xd0, 0xc5, 0x40, 0xe7, 0x00,
+ 0x81, 0xc1, 0xc4, 0x1e, 0x97, 0x00, 0x81, 0xc8, 0x9e, 0x00, 0x83, 0x49,
+ 0x9f, 0x00, 0x83, 0x51, 0xa0, 0x00, 0x83, 0x59, 0xa1, 0x00, 0x83, 0x61,
+ 0xa2, 0x00, 0x83, 0x68, 0x9e, 0x00, 0x84, 0xd1, 0xa0, 0x00, 0x84, 0xd8,
+ 0x45, 0xc7, 0x97, 0xc1, 0xd9, 0x99, 0xcd, 0x7b, 0xb1, 0x00, 0x82, 0x70,
+ 0xc3, 0x05, 0x14, 0x00, 0x84, 0xf1, 0xcb, 0x0f, 0x09, 0x00, 0x84, 0xf8,
+ 0xc2, 0x02, 0xa0, 0x00, 0x84, 0x91, 0xc4, 0x02, 0xde, 0x00, 0x84, 0x98,
+ 0xc3, 0x09, 0x9e, 0x00, 0x84, 0xa1, 0xc3, 0x0d, 0x14, 0x00, 0x84, 0xa8,
+ 0xc2, 0x22, 0xcc, 0x00, 0x84, 0xb1, 0xc4, 0x18, 0x10, 0x00, 0x84, 0xb8,
+ 0xc7, 0xc7, 0x97, 0x05, 0x53, 0xd1, 0x97, 0x00, 0x81, 0x50, 0xc2, 0x00,
+ 0xd0, 0x00, 0x80, 0x0b, 0x01, 0xd9, 0xab, 0x83, 0x00, 0x80, 0x00, 0x83,
+ 0x00, 0x80, 0x83, 0x01, 0xd9, 0xb1, 0x16, 0xc1, 0xd9, 0xb7, 0xc2, 0x00,
+ 0xd0, 0x00, 0x80, 0x88, 0x0a, 0xc1, 0xd9, 0xc1, 0x83, 0x00, 0x80, 0xf1,
+ 0xc2, 0x0d, 0xf6, 0x00, 0x82, 0x89, 0xcd, 0x7c, 0x19, 0x00, 0x83, 0x08,
+ 0x83, 0x00, 0x80, 0x11, 0xc2, 0x00, 0xd0, 0x00, 0x80, 0x19, 0xc7, 0xbd,
+ 0xeb, 0x00, 0x81, 0xf8, 0xc2, 0x01, 0x30, 0x00, 0x80, 0x21, 0xc2, 0x19,
+ 0x2c, 0x00, 0x80, 0x49, 0x10, 0xc1, 0xd9, 0xce, 0x83, 0x00, 0x80, 0xa0,
+ 0x83, 0x00, 0x80, 0x29, 0xc2, 0x00, 0xd0, 0x00, 0x80, 0x30, 0x83, 0x00,
+ 0x80, 0x39, 0xc2, 0x00, 0xd0, 0x00, 0x80, 0x40, 0x06, 0xc1, 0xd9, 0xd8,
+ 0x83, 0x00, 0x80, 0x91, 0xc2, 0x00, 0xd0, 0x00, 0x80, 0x98, 0x83, 0x00,
+ 0x80, 0xa9, 0xc2, 0x00, 0xd0, 0x00, 0x80, 0xb0, 0x83, 0x00, 0x80, 0xb9,
+ 0xc2, 0x00, 0xd0, 0x00, 0x80, 0xc0, 0x83, 0x00, 0x80, 0xc9, 0x43, 0x01,
+ 0x55, 0x41, 0xd9, 0xe2, 0x83, 0x00, 0x80, 0xd9, 0xcf, 0x65, 0x0d, 0x00,
+ 0x84, 0x70, 0x83, 0x00, 0x80, 0xe1, 0xc2, 0x00, 0xdb, 0x00, 0x81, 0x00,
+ 0x83, 0x00, 0x80, 0xe9, 0x51, 0x28, 0xa0, 0x41, 0xd9, 0xf8, 0x8b, 0x00,
+ 0x81, 0x20, 0x97, 0x00, 0x81, 0x30, 0x51, 0x50, 0x02, 0x41, 0xda, 0x04,
+ 0x94, 0x00, 0x82, 0x93, 0x01, 0xda, 0x16, 0x8e, 0x00, 0x82, 0xa2, 0x01,
+ 0xda, 0x1a, 0xc4, 0x18, 0x10, 0x05, 0x4f, 0xb9, 0xc2, 0x22, 0xcc, 0x05,
+ 0x4f, 0xb0, 0xc3, 0x0d, 0x14, 0x05, 0x4f, 0xa9, 0xc3, 0x09, 0x9e, 0x05,
+ 0x4f, 0xa0, 0xc4, 0x02, 0xde, 0x05, 0x4f, 0x99, 0xc2, 0x02, 0xa0, 0x05,
+ 0x4f, 0x90, 0xc5, 0xd5, 0xc9, 0x00, 0x84, 0xe2, 0x01, 0xda, 0x1e, 0x94,
+ 0x00, 0x82, 0xb8, 0x8e, 0x00, 0x82, 0xc8, 0xc2, 0x04, 0xc6, 0x00, 0x84,
+ 0x19, 0x87, 0x00, 0x84, 0x23, 0x01, 0xda, 0x22, 0xc7, 0xca, 0x30, 0x00,
+ 0x84, 0x30, 0xc2, 0x19, 0x2c, 0x00, 0x81, 0xd9, 0xc2, 0x00, 0x39, 0x00,
+ 0x81, 0xe1, 0xc2, 0x01, 0x4a, 0x00, 0x81, 0xe9, 0xc2, 0x00, 0xd0, 0x00,
+ 0x81, 0xf0, 0xc2, 0x00, 0xc1, 0x00, 0x82, 0xf1, 0xc2, 0x01, 0xc3, 0x00,
+ 0x82, 0xf9, 0xc2, 0x00, 0xdb, 0x00, 0x83, 0x00, 0x15, 0xc1, 0xda, 0x28,
+ 0x83, 0x01, 0x85, 0x13, 0x01, 0xda, 0x42, 0x0f, 0xc1, 0xda, 0x48, 0x8b,
+ 0x01, 0x85, 0x21, 0x97, 0x01, 0x85, 0x31, 0x87, 0x01, 0x85, 0x41, 0x91,
+ 0x01, 0x85, 0x51, 0x0d, 0xc1, 0xda, 0x5f, 0x09, 0xc1, 0xda, 0x73, 0x1c,
+ 0xc1, 0xda, 0x87, 0x16, 0xc1, 0xda, 0x9b, 0x06, 0xc1, 0xda, 0xaf, 0x90,
+ 0x01, 0x87, 0x9b, 0x01, 0xda, 0xc3, 0x0a, 0xc1, 0xda, 0xd7, 0x04, 0xc1,
+ 0xda, 0xeb, 0x12, 0xc1, 0xda, 0xff, 0x1b, 0xc1, 0xdb, 0x13, 0x14, 0xc1,
+ 0xdb, 0x1f, 0x19, 0xc1, 0xdb, 0x33, 0x18, 0x41, 0xdb, 0x43, 0x97, 0x08,
+ 0x85, 0xc1, 0x8b, 0x08, 0x85, 0xb1, 0x83, 0x08, 0x85, 0x80, 0x97, 0x08,
+ 0x85, 0xa0, 0x8b, 0x08, 0x85, 0x90, 0xc5, 0x86, 0x20, 0x08, 0x86, 0x09,
+ 0xcc, 0x45, 0x8d, 0x08, 0x85, 0xf8, 0xc5, 0x33, 0x5d, 0x08, 0x85, 0xd1,
+ 0x42, 0x07, 0xb2, 0xc1, 0xdb, 0x57, 0xc8, 0x14, 0x38, 0x08, 0x84, 0x09,
+ 0xcb, 0x1e, 0x89, 0x08, 0x84, 0x00, 0x83, 0x08, 0x85, 0x71, 0xc2, 0x0d,
+ 0xf6, 0x08, 0x85, 0x69, 0xc2, 0x00, 0xd0, 0x08, 0x85, 0x60, 0x83, 0x08,
+ 0x85, 0x49, 0xc2, 0x00, 0xd0, 0x08, 0x84, 0xe0, 0xc2, 0x00, 0xd0, 0x08,
+ 0x85, 0x31, 0x83, 0x08, 0x85, 0x28, 0xc2, 0x00, 0xd0, 0x08, 0x85, 0x21,
+ 0x83, 0x08, 0x85, 0x18, 0x83, 0x08, 0x85, 0x11, 0xc2, 0x00, 0xc1, 0x08,
+ 0x84, 0xe9, 0xc2, 0x19, 0x2c, 0x08, 0x84, 0xb1, 0xc2, 0x01, 0x30, 0x08,
+ 0x84, 0x88, 0xc2, 0x00, 0xd0, 0x08, 0x85, 0x09, 0x83, 0x08, 0x85, 0x01,
+ 0x06, 0x41, 0xdb, 0x63, 0xc2, 0x00, 0xd0, 0x08, 0x84, 0xf9, 0x83, 0x08,
+ 0x84, 0xf1, 0x16, 0x41, 0xdb, 0x73, 0xc2, 0x00, 0xd0, 0x08, 0x84, 0xa9,
+ 0x83, 0x08, 0x84, 0xa0, 0xc2, 0x00, 0xd0, 0x08, 0x84, 0x99, 0x83, 0x08,
+ 0x84, 0x90, 0xc2, 0x00, 0xd0, 0x08, 0x84, 0x81, 0x83, 0x08, 0x84, 0x78,
+ 0xc2, 0x00, 0xd0, 0x08, 0x84, 0x71, 0x83, 0x08, 0x84, 0x68, 0x97, 0x08,
+ 0x84, 0x61, 0x8b, 0x08, 0x84, 0x51, 0x83, 0x08, 0x84, 0x20, 0x97, 0x08,
+ 0x84, 0x40, 0x8b, 0x08, 0x84, 0x30, 0xc7, 0xca, 0x76, 0x05, 0x49, 0x68,
+ 0x87, 0x05, 0x49, 0x48, 0x87, 0x05, 0x49, 0x30, 0x91, 0x05, 0x49, 0x29,
+ 0x87, 0x05, 0x49, 0x18, 0x83, 0x05, 0x48, 0xf9, 0xc2, 0x01, 0x6f, 0x05,
+ 0x48, 0x98, 0xc2, 0x00, 0xd0, 0x05, 0x48, 0xf1, 0x83, 0x05, 0x48, 0x90,
+ 0xc2, 0x00, 0xd0, 0x05, 0x48, 0xb1, 0x83, 0x05, 0x48, 0xa8, 0x83, 0x05,
+ 0x48, 0xa1, 0xc2, 0x19, 0x2c, 0x05, 0x48, 0x89, 0xc2, 0x01, 0x30, 0x05,
+ 0x48, 0x68, 0xc2, 0x00, 0xd0, 0x05, 0x48, 0x79, 0x83, 0x05, 0x48, 0x70,
+ 0xc2, 0x00, 0xd0, 0x05, 0x48, 0x59, 0x83, 0x05, 0x48, 0x50, 0xc4, 0x18,
+ 0x10, 0x05, 0x48, 0x39, 0xc2, 0x22, 0xcc, 0x05, 0x48, 0x30, 0xc3, 0x0d,
+ 0x14, 0x05, 0x48, 0x29, 0xc3, 0x09, 0x9e, 0x05, 0x48, 0x20, 0xc4, 0x02,
+ 0xde, 0x05, 0x48, 0x19, 0xc2, 0x02, 0xa0, 0x05, 0x48, 0x10, 0x15, 0xc1,
+ 0xdb, 0x7d, 0xcb, 0x1e, 0x89, 0x00, 0x64, 0x09, 0x03, 0xc1, 0xdb, 0x89,
+ 0x42, 0x07, 0xb2, 0xc1, 0xdb, 0x95, 0xc5, 0x33, 0x5d, 0x00, 0x65, 0xe1,
+ 0xcb, 0x8f, 0xe1, 0x00, 0x67, 0x89, 0xcb, 0x93, 0xf6, 0x00, 0x67, 0x90,
+ 0x45, 0x02, 0x10, 0xc1, 0xdb, 0xa1, 0xc9, 0x36, 0x53, 0x00, 0x66, 0xa8,
+ 0x03, 0xc1, 0xdc, 0x10, 0x8b, 0x00, 0x65, 0xfb, 0x01, 0xdc, 0x1c, 0x97,
+ 0x00, 0x66, 0x0b, 0x01, 0xdc, 0x20, 0x48, 0xb2, 0x2d, 0xc1, 0xdc, 0x24,
+ 0x87, 0x00, 0x66, 0x33, 0x01, 0xdc, 0x32, 0x91, 0x00, 0x66, 0x52, 0x01,
+ 0xdc, 0x36, 0xc4, 0x15, 0xe7, 0x00, 0x67, 0x31, 0xc3, 0x05, 0x14, 0x00,
+ 0x67, 0x39, 0x16, 0xc1, 0xdc, 0x3a, 0x08, 0xc1, 0xdc, 0x46, 0x15, 0xc1,
+ 0xdc, 0x52, 0xc5, 0x06, 0xdb, 0x00, 0x67, 0x71, 0xc4, 0x26, 0x78, 0x00,
+ 0x67, 0x78, 0x11, 0xc1, 0xdc, 0x5e, 0x0e, 0xc1, 0xdc, 0x71, 0x06, 0xc1,
+ 0xdc, 0x86, 0x15, 0xc1, 0xdc, 0x96, 0x0a, 0xc1, 0xdc, 0xe0, 0x16, 0xc1,
+ 0xdc, 0xf2, 0x0f, 0xc1, 0xdd, 0x17, 0x07, 0xc1, 0xdd, 0x29, 0x05, 0xc1,
+ 0xdd, 0x4c, 0x0b, 0xc1, 0xdd, 0x64, 0xc5, 0xa0, 0xc1, 0x01, 0x78, 0x89,
+ 0x12, 0xc1, 0xdd, 0x6e, 0x19, 0xc1, 0xdd, 0x84, 0x14, 0xc1, 0xdd, 0x9e,
+ 0x03, 0xc1, 0xdd, 0xb8, 0x09, 0xc1, 0xdd, 0xd0, 0x04, 0xc1, 0xdd, 0xe9,
+ 0x10, 0xc1, 0xde, 0x03, 0x08, 0xc1, 0xde, 0x0d, 0x42, 0x25, 0x3b, 0xc1,
+ 0xde, 0x2f, 0xc3, 0x26, 0x9b, 0x01, 0x7b, 0x21, 0x18, 0xc1, 0xde, 0x39,
+ 0xc6, 0xc6, 0x9b, 0x01, 0x7e, 0x40, 0x06, 0xc1, 0xde, 0x45, 0x05, 0xc1,
+ 0xde, 0x5d, 0x04, 0xc1, 0xde, 0x9d, 0x03, 0xc1, 0xde, 0xdd, 0x26, 0xc1,
+ 0xdf, 0x1d, 0x25, 0xc1, 0xdf, 0x5d, 0x24, 0xc1, 0xdf, 0x9d, 0x23, 0xc1,
+ 0xdf, 0xdd, 0x22, 0xc1, 0xe0, 0x1d, 0x21, 0xc1, 0xe0, 0x5d, 0x20, 0xc1,
+ 0xe0, 0x9d, 0x1f, 0xc1, 0xe0, 0xdd, 0x1e, 0xc1, 0xe1, 0x1d, 0x1d, 0x41,
+ 0xe1, 0x5d, 0x08, 0xc1, 0xe1, 0x9d, 0x07, 0xc1, 0xe1, 0xdd, 0x06, 0xc1,
+ 0xe2, 0x1d, 0x05, 0xc1, 0xe2, 0x5d, 0x04, 0xc1, 0xe2, 0x9d, 0x03, 0xc1,
+ 0xe2, 0xdd, 0x26, 0xc1, 0xe3, 0x1d, 0x25, 0xc1, 0xe3, 0x5d, 0x24, 0xc1,
+ 0xe3, 0x9d, 0x23, 0xc1, 0xe3, 0xdd, 0x22, 0xc1, 0xe4, 0x1d, 0x21, 0xc1,
+ 0xe4, 0x5d, 0x20, 0xc1, 0xe4, 0x9d, 0x1f, 0xc1, 0xe4, 0xdd, 0x1e, 0xc1,
+ 0xe5, 0x1d, 0x1d, 0x41, 0xe5, 0x5d, 0xc4, 0x18, 0x10, 0x08, 0x97, 0xb9,
+ 0xc2, 0x22, 0xcc, 0x08, 0x97, 0xb0, 0xc3, 0x0d, 0x14, 0x08, 0x97, 0xa9,
+ 0xc3, 0x09, 0x9e, 0x08, 0x97, 0xa0, 0xc4, 0x02, 0xde, 0x08, 0x97, 0x99,
+ 0xc2, 0x02, 0xa0, 0x08, 0x97, 0x90, 0x8b, 0x08, 0x97, 0x31, 0x83, 0x08,
+ 0x97, 0x01, 0x97, 0x08, 0x97, 0x40, 0x97, 0x08, 0x97, 0x20, 0x8b, 0x08,
+ 0x97, 0x10, 0x83, 0x08, 0x96, 0xe9, 0xc2, 0x00, 0xd0, 0x08, 0x96, 0xe0,
+ 0x83, 0x08, 0x96, 0xc9, 0xc2, 0x00, 0x39, 0x08, 0x96, 0x50, 0xc2, 0x00,
+ 0xd0, 0x08, 0x96, 0xb1, 0xc2, 0x01, 0x5d, 0x08, 0x96, 0xa9, 0x83, 0x08,
+ 0x96, 0xa0, 0xc2, 0x00, 0xd0, 0x08, 0x96, 0x99, 0x83, 0x08, 0x96, 0x90,
+ 0x83, 0x08, 0x96, 0x89, 0xc2, 0x00, 0xc1, 0x08, 0x96, 0x61, 0xc2, 0x19,
+ 0x2c, 0x08, 0x96, 0x29, 0xc2, 0x01, 0x30, 0x08, 0x95, 0xf8, 0xc2, 0x00,
+ 0xd0, 0x08, 0x96, 0x81, 0x83, 0x08, 0x96, 0x79, 0x06, 0x41, 0xe5, 0x9d,
+ 0xc2, 0x00, 0xd0, 0x08, 0x96, 0x71, 0x83, 0x08, 0x96, 0x69, 0x16, 0x41,
+ 0xe5, 0xad, 0xc2, 0x00, 0xd0, 0x08, 0x96, 0x21, 0xc2, 0x25, 0x3b, 0x08,
+ 0x96, 0x19, 0x83, 0x08, 0x96, 0x10, 0xc2, 0x00, 0xd0, 0x08, 0x96, 0x09,
+ 0x83, 0x08, 0x96, 0x00, 0xc2, 0x00, 0xd0, 0x08, 0x95, 0xf1, 0xc2, 0x01,
+ 0x30, 0x08, 0x95, 0xe9, 0x83, 0x08, 0x95, 0xe0, 0xc2, 0x00, 0xd0, 0x08,
+ 0x95, 0xd9, 0x83, 0x08, 0x95, 0xd0, 0x97, 0x08, 0x95, 0xc9, 0x8b, 0x08,
+ 0x95, 0xb9, 0x83, 0x08, 0x95, 0x88, 0x97, 0x08, 0x95, 0xa8, 0x8b, 0x08,
+ 0x95, 0x98, 0x15, 0xc1, 0xe5, 0xb7, 0xc5, 0x33, 0x5d, 0x08, 0x91, 0xb1,
+ 0xc6, 0x1e, 0x95, 0x08, 0x91, 0xa9, 0xc8, 0x14, 0x38, 0x08, 0x91, 0xa0,
+ 0xcc, 0x45, 0x8d, 0x08, 0x91, 0xe1, 0xc5, 0x86, 0x20, 0x08, 0x91, 0xc8,
+ 0x97, 0x08, 0x91, 0x99, 0x8b, 0x08, 0x91, 0x89, 0x83, 0x08, 0x91, 0x60,
+ 0x8b, 0x08, 0x91, 0x70, 0xc2, 0x00, 0xdb, 0x08, 0x91, 0x59, 0x83, 0x08,
+ 0x91, 0x38, 0xc2, 0x00, 0xd0, 0x08, 0x91, 0x19, 0xc2, 0x01, 0x5d, 0x08,
+ 0x91, 0x11, 0x83, 0x08, 0x91, 0x08, 0xc2, 0x00, 0xd0, 0x08, 0x91, 0x01,
+ 0x83, 0x08, 0x90, 0xf8, 0x83, 0x08, 0x90, 0xf1, 0xc2, 0x00, 0xc1, 0x08,
+ 0x90, 0xc1, 0xc2, 0x19, 0x2c, 0x08, 0x90, 0x99, 0xc2, 0x01, 0x30, 0x08,
+ 0x90, 0x68, 0xc2, 0x00, 0xd0, 0x08, 0x90, 0xe9, 0x06, 0xc1, 0xe5, 0xc3,
+ 0x83, 0x08, 0x90, 0xd8, 0xc2, 0x00, 0xd0, 0x08, 0x90, 0xd1, 0x83, 0x08,
+ 0x90, 0xc9, 0x16, 0x41, 0xe5, 0xd3, 0xc2, 0x25, 0x3b, 0x08, 0x90, 0x89,
+ 0x83, 0x08, 0x90, 0x80, 0xc2, 0x00, 0xd0, 0x08, 0x90, 0x79, 0x83, 0x08,
+ 0x90, 0x70, 0xc2, 0x00, 0xd0, 0x08, 0x90, 0x61, 0xc2, 0x01, 0x30, 0x08,
+ 0x90, 0x59, 0x83, 0x08, 0x90, 0x50, 0xc2, 0x00, 0xd0, 0x08, 0x90, 0x49,
+ 0x83, 0x08, 0x90, 0x40, 0x97, 0x08, 0x90, 0x39, 0x8b, 0x08, 0x90, 0x29,
+ 0x83, 0x08, 0x90, 0x08, 0x43, 0x4e, 0xf0, 0xc1, 0xe5, 0xdd, 0x12, 0xc1,
+ 0xe5, 0xe5, 0x04, 0xc1, 0xe5, 0xf7, 0x45, 0xda, 0x97, 0xc1, 0xe6, 0x03,
+ 0xc9, 0xb2, 0x51, 0x00, 0xcf, 0x81, 0x4a, 0xa2, 0x42, 0x41, 0xe6, 0x0f,
+ 0x03, 0xc1, 0xe6, 0x23, 0x0d, 0xc1, 0xe6, 0x35, 0xcb, 0x93, 0x93, 0x00,
+ 0xbe, 0xc9, 0x04, 0xc1, 0xe6, 0x47, 0xc7, 0xc2, 0x1f, 0x00, 0xbe, 0xb9,
+ 0x05, 0xc1, 0xe6, 0x51, 0xc6, 0xcb, 0x69, 0x00, 0xbe, 0x89, 0xcd, 0x78,
+ 0x23, 0x00, 0xbe, 0x81, 0x16, 0xc1, 0xe6, 0x5d, 0x14, 0xc1, 0xe6, 0x69,
+ 0xcb, 0x99, 0xfa, 0x00, 0xbe, 0x49, 0xcd, 0x7d, 0x1d, 0x00, 0xbe, 0x41,
+ 0xc7, 0xc4, 0x41, 0x00, 0xbe, 0x30, 0xc4, 0x18, 0x10, 0x00, 0xbf, 0x39,
+ 0xc2, 0x22, 0xcc, 0x00, 0xbf, 0x30, 0xc3, 0x0d, 0x14, 0x00, 0xbf, 0x29,
+ 0xc3, 0x09, 0x9e, 0x00, 0xbf, 0x20, 0xc4, 0x02, 0xde, 0x00, 0xbf, 0x19,
+ 0xc2, 0x02, 0xa0, 0x00, 0xbf, 0x10, 0x03, 0xc1, 0xe6, 0x75, 0x11, 0xc1,
+ 0xe6, 0x85, 0x87, 0x00, 0xbe, 0x09, 0x8b, 0x00, 0xbd, 0xbb, 0x01, 0xe6,
+ 0x8d, 0x9b, 0x00, 0xbd, 0xcb, 0x01, 0xe6, 0x95, 0x97, 0x00, 0xbd, 0xda,
+ 0x01, 0xe6, 0x9d, 0x83, 0x00, 0xbd, 0xa9, 0x93, 0x00, 0xbd, 0xa0, 0x03,
+ 0xc1, 0xe6, 0xa5, 0x48, 0xb7, 0x6a, 0xc1, 0xe6, 0xb5, 0x87, 0x00, 0xbd,
+ 0x79, 0x97, 0x00, 0xbd, 0x3b, 0x01, 0xe6, 0xc1, 0x8b, 0x00, 0xbd, 0x2a,
+ 0x01, 0xe6, 0xcc, 0x9b, 0x00, 0xbd, 0x70, 0x9b, 0x00, 0xbd, 0x60, 0x83,
+ 0x00, 0xbd, 0x09, 0x91, 0x00, 0xbc, 0xd8, 0x83, 0x00, 0xbc, 0xf9, 0xc2,
+ 0x00, 0xfb, 0x00, 0xbc, 0xf1, 0xc2, 0x00, 0xd0, 0x00, 0xbc, 0xe8, 0x0a,
+ 0xc1, 0xe6, 0xd0, 0x91, 0x00, 0xbc, 0xb0, 0x91, 0x00, 0xbc, 0x99, 0xc2,
+ 0x00, 0x10, 0x00, 0xbc, 0x71, 0xc2, 0x42, 0xcd, 0x00, 0xbc, 0x49, 0xc2,
+ 0x0f, 0x7b, 0x00, 0xbc, 0x20, 0x0a, 0xc1, 0xe6, 0xd8, 0x91, 0x00, 0xbc,
+ 0x89, 0x83, 0x00, 0xbc, 0x79, 0x42, 0x00, 0x8e, 0x41, 0xe6, 0xe0, 0x91,
+ 0x00, 0xbc, 0x61, 0x83, 0x00, 0xbc, 0x50, 0x0a, 0xc1, 0xe6, 0xe8, 0x91,
+ 0x00, 0xbc, 0x39, 0x83, 0x00, 0xbc, 0x28, 0x0a, 0xc1, 0xe6, 0xf0, 0x91,
+ 0x00, 0xbc, 0x11, 0x83, 0x00, 0xbc, 0x00, 0xc4, 0x22, 0xd6, 0x08, 0x52,
+ 0xc1, 0xc4, 0x6e, 0x13, 0x08, 0x52, 0xa8, 0x11, 0xc1, 0xe6, 0xf8, 0xc4,
+ 0x19, 0x53, 0x08, 0x52, 0xb0, 0xcb, 0x80, 0xaa, 0x08, 0x52, 0x99, 0xc5,
+ 0x02, 0xd2, 0x08, 0x52, 0x90, 0xc8, 0x4b, 0x94, 0x08, 0x52, 0x39, 0xc7,
+ 0x0d, 0x04, 0x08, 0x52, 0x30, 0xc5, 0x28, 0xee, 0x08, 0x52, 0x29, 0xc2,
+ 0x00, 0xc4, 0x08, 0x52, 0x20, 0xc4, 0x02, 0xde, 0x08, 0x52, 0x11, 0xc2,
+ 0x02, 0xa0, 0x08, 0x52, 0x08, 0xcb, 0x36, 0x51, 0x08, 0x50, 0x61, 0x45,
+ 0x00, 0xba, 0x41, 0xe7, 0x02, 0xc7, 0x0e, 0x70, 0x08, 0x51, 0xd1, 0xcf,
+ 0x65, 0xa3, 0x08, 0x50, 0x68, 0xc2, 0x00, 0xd0, 0x08, 0x51, 0xa9, 0x83,
+ 0x08, 0x51, 0x60, 0x16, 0xc1, 0xe7, 0x18, 0xc2, 0x00, 0xd0, 0x08, 0x51,
+ 0x01, 0x83, 0x08, 0x50, 0xf8, 0xc2, 0x00, 0xd0, 0x08, 0x51, 0x39, 0x83,
+ 0x08, 0x51, 0x30, 0xc2, 0x00, 0xd0, 0x08, 0x51, 0x29, 0x83, 0x08, 0x51,
+ 0x20, 0x83, 0x08, 0x51, 0x19, 0xc2, 0x00, 0xc1, 0x08, 0x50, 0xf1, 0xc2,
+ 0x19, 0x2c, 0x08, 0x50, 0xc8, 0xc2, 0x00, 0xd0, 0x08, 0x51, 0x11, 0x83,
+ 0x08, 0x51, 0x09, 0x06, 0x41, 0xe7, 0x26, 0xc2, 0x00, 0xd0, 0x08, 0x50,
+ 0xb1, 0x83, 0x08, 0x50, 0xa8, 0xc2, 0x00, 0xd0, 0x08, 0x50, 0x99, 0x83,
+ 0x08, 0x50, 0x90, 0xc2, 0x00, 0xd0, 0x08, 0x50, 0x89, 0x83, 0x08, 0x50,
+ 0x81, 0xc2, 0x02, 0x2b, 0x08, 0x51, 0x90, 0xc2, 0x00, 0xd0, 0x08, 0x51,
+ 0x69, 0xc2, 0x0d, 0xf6, 0x08, 0x51, 0x71, 0x83, 0x08, 0x51, 0x78, 0x46,
+ 0x00, 0x8b, 0x41, 0xe7, 0x30, 0xca, 0xa7, 0x92, 0x0f, 0xd2, 0x53, 0x01,
+ 0xe7, 0x3c, 0xc5, 0xa8, 0xf7, 0x0f, 0xd0, 0x0b, 0x01, 0xe7, 0x42, 0x0d,
+ 0xc1, 0xe7, 0x48, 0xc6, 0xca, 0xfd, 0x0f, 0xd0, 0x1b, 0x01, 0xe7, 0x5a,
+ 0xc4, 0xde, 0x83, 0x0f, 0xd0, 0x13, 0x01, 0xe7, 0x60, 0xc4, 0xe3, 0x93,
+ 0x0f, 0xd0, 0x2b, 0x01, 0xe7, 0x66, 0x47, 0x45, 0x86, 0x41, 0xe7, 0x6c,
+ 0x0b, 0xc1, 0xe7, 0x88, 0xca, 0xa0, 0x26, 0x08, 0xa2, 0xf0, 0x18, 0xc1,
+ 0xe7, 0x94, 0xc2, 0x00, 0xd0, 0x08, 0xa1, 0xa1, 0x15, 0xc1, 0xe7, 0xa0,
+ 0x10, 0xc1, 0xe7, 0xb0, 0x06, 0xc1, 0xe7, 0xc8, 0x16, 0xc1, 0xe7, 0xd6,
+ 0x0c, 0xc1, 0xe7, 0xe4, 0x05, 0xc1, 0xe7, 0xee, 0x09, 0xc1, 0xe7, 0xf8,
+ 0x0d, 0xc1, 0xe8, 0x02, 0x83, 0x08, 0xa0, 0x03, 0x01, 0xe8, 0x0c, 0x91,
+ 0x08, 0xa0, 0x61, 0x87, 0x08, 0xa0, 0x51, 0x97, 0x08, 0xa0, 0x23, 0x01,
+ 0xe8, 0x18, 0x8b, 0x08, 0xa0, 0x13, 0x01, 0xe8, 0x1c, 0x12, 0xc1, 0xe8,
+ 0x20, 0x04, 0xc1, 0xe8, 0x2a, 0x0f, 0xc1, 0xe8, 0x34, 0xc2, 0x19, 0x2c,
+ 0x08, 0xa1, 0x59, 0x14, 0xc1, 0xe8, 0x3e, 0x0e, 0xc1, 0xe8, 0x48, 0xc2,
+ 0x01, 0x4a, 0x08, 0xa1, 0x80, 0x46, 0x00, 0x59, 0xc1, 0xe8, 0x52, 0x45,
+ 0x09, 0x98, 0xc1, 0xe8, 0x5e, 0xc4, 0x19, 0x53, 0x08, 0xa2, 0x58, 0x03,
+ 0xc1, 0xe8, 0x82, 0x91, 0x08, 0xa2, 0x01, 0x87, 0x08, 0xa1, 0xf1, 0x48,
+ 0xb2, 0x2d, 0xc1, 0xe8, 0x8e, 0x97, 0x08, 0xa1, 0xc3, 0x01, 0xe8, 0x9c,
+ 0x8b, 0x08, 0xa1, 0xb2, 0x01, 0xe8, 0xa0, 0xc8, 0xb9, 0x72, 0x00, 0xce,
+ 0xf3, 0x01, 0xe8, 0xa4, 0x16, 0xc1, 0xe8, 0xa8, 0x46, 0x09, 0x97, 0xc1,
+ 0xe8, 0xb4, 0x47, 0x02, 0x0e, 0xc1, 0xe8, 0xd8, 0x4b, 0x6f, 0xc7, 0x41,
+ 0xe8, 0xea, 0xc9, 0xb2, 0xa2, 0x0f, 0x98, 0xd1, 0xc6, 0x00, 0x91, 0x0f,
+ 0x98, 0x88, 0xca, 0xa2, 0x88, 0x01, 0x3a, 0x71, 0xc2, 0x15, 0x95, 0x0f,
+ 0x8c, 0x79, 0xc2, 0x00, 0x03, 0x0f, 0x8c, 0x71, 0xc2, 0x0d, 0xf6, 0x0f,
+ 0x8c, 0x69, 0xc2, 0x00, 0xb0, 0x0f, 0x8c, 0x61, 0xc2, 0x00, 0x63, 0x0f,
+ 0x8c, 0x59, 0x55, 0x0b, 0x11, 0xc1, 0xe9, 0x0a, 0xcd, 0x2c, 0xb2, 0x0f,
+ 0xde, 0x20, 0xca, 0xa3, 0xd2, 0x01, 0x27, 0xf9, 0x47, 0x34, 0x2f, 0xc1,
+ 0xe9, 0x72, 0x55, 0x0b, 0x11, 0xc1, 0xe9, 0x88, 0xc8, 0x01, 0x92, 0x0f,
+ 0xbe, 0xb1, 0xc6, 0x0b, 0x09, 0x0f, 0xbe, 0xc0, 0xc5, 0x0d, 0x20, 0x0f,
+ 0xdd, 0xe9, 0xdc, 0x04, 0xcb, 0x0f, 0xdd, 0xf1, 0xc7, 0x3a, 0x19, 0x0f,
+ 0xdd, 0xf8, 0xd6, 0x2d, 0xd0, 0x01, 0x14, 0x49, 0xd4, 0x3a, 0x20, 0x01,
+ 0x14, 0x40, 0xe0, 0x07, 0x47, 0x01, 0x12, 0x38, 0xca, 0x37, 0x4e, 0x01,
+ 0x13, 0xa9, 0xc5, 0x07, 0x62, 0x01, 0x13, 0x88, 0xca, 0x37, 0x4e, 0x01,
+ 0x13, 0xa1, 0xc5, 0x07, 0x62, 0x01, 0x13, 0x80, 0xcf, 0x61, 0x11, 0x08,
+ 0xcf, 0x21, 0x03, 0xc1, 0xe9, 0xf0, 0x91, 0x08, 0xce, 0xe1, 0x87, 0x08,
+ 0xce, 0xd1, 0xc9, 0xb2, 0x2d, 0x08, 0xce, 0xb3, 0x01, 0xe9, 0xfc, 0x97,
+ 0x08, 0xce, 0xa3, 0x01, 0xea, 0x00, 0x8b, 0x08, 0xce, 0x92, 0x01, 0xea,
+ 0x04, 0xc7, 0xc3, 0x61, 0x08, 0xcf, 0x11, 0x03, 0xc1, 0xea, 0x08, 0x42,
+ 0x07, 0xb2, 0x41, 0xea, 0x14, 0x14, 0xc1, 0xea, 0x20, 0x0e, 0xc1, 0xea,
+ 0x2a, 0xc2, 0x00, 0xd0, 0x08, 0xce, 0x71, 0x15, 0xc1, 0xea, 0x34, 0x18,
+ 0xc1, 0xea, 0x44, 0xc2, 0x19, 0x2c, 0x08, 0xce, 0x39, 0xc2, 0x01, 0xc3,
+ 0x08, 0xce, 0x31, 0x04, 0xc1, 0xea, 0x51, 0x12, 0xc1, 0xea, 0x5b, 0x10,
+ 0xc1, 0xea, 0x65, 0x06, 0xc1, 0xea, 0x7b, 0x16, 0xc1, 0xea, 0x89, 0x0c,
+ 0xc1, 0xea, 0x97, 0x05, 0xc1, 0xea, 0xa1, 0x09, 0xc1, 0xea, 0xab, 0x0d,
+ 0xc1, 0xea, 0xb5, 0x83, 0x08, 0xcd, 0x03, 0x01, 0xea, 0xbf, 0x91, 0x08,
+ 0xcd, 0x61, 0x87, 0x08, 0xcd, 0x51, 0x97, 0x08, 0xcd, 0x23, 0x01, 0xea,
+ 0xcb, 0x8b, 0x08, 0xcd, 0x12, 0x01, 0xea, 0xcf, 0xc3, 0x05, 0x14, 0x08,
+ 0x45, 0x3b, 0x01, 0xea, 0xd3, 0x16, 0xc1, 0xea, 0xd9, 0x08, 0x41, 0xea,
+ 0xe9, 0x16, 0xc1, 0xea, 0xf5, 0x15, 0xc1, 0xeb, 0x01, 0x46, 0x26, 0xf7,
+ 0xc1, 0xeb, 0x0b, 0xc4, 0x5d, 0xe2, 0x08, 0x44, 0xd9, 0xc4, 0xb9, 0x7e,
+ 0x08, 0x44, 0xd1, 0xc2, 0x00, 0x67, 0x08, 0x44, 0xc1, 0x03, 0xc1, 0xeb,
+ 0x41, 0xc3, 0x20, 0x18, 0x08, 0x44, 0xa9, 0xc3, 0x00, 0x4e, 0x08, 0x44,
+ 0x99, 0xc6, 0xcf, 0xd7, 0x08, 0x44, 0x89, 0xc4, 0xe0, 0xe7, 0x08, 0x44,
+ 0x79, 0xc4, 0x4a, 0xb9, 0x08, 0x44, 0x69, 0xc2, 0x01, 0x7f, 0x08, 0x44,
+ 0x3b, 0x01, 0xeb, 0x4d, 0xc5, 0x4a, 0xb3, 0x08, 0x44, 0x49, 0xc3, 0x7e,
+ 0x89, 0x08, 0x44, 0x41, 0xc6, 0x40, 0x9a, 0x08, 0x44, 0x29, 0xc5, 0x9c,
+ 0xa2, 0x08, 0x44, 0x21, 0xc4, 0xe3, 0x27, 0x08, 0x44, 0x18, 0x45, 0x20,
+ 0x6c, 0xc1, 0xeb, 0x53, 0x45, 0x15, 0xa7, 0xc1, 0xeb, 0x7e, 0x46, 0x09,
+ 0x91, 0x41, 0xeb, 0xa9, 0xde, 0x0e, 0x32, 0x0f, 0xaa, 0x19, 0x4a, 0x00,
+ 0x27, 0x41, 0xeb, 0xc1, 0xe0, 0x0c, 0x07, 0x01, 0x3d, 0x88, 0xcc, 0x23,
+ 0x9f, 0x01, 0x17, 0x60, 0x46, 0x1f, 0x87, 0xc1, 0xeb, 0xc7, 0xc3, 0x00,
+ 0xbb, 0x00, 0x05, 0x60, 0xc3, 0x33, 0xa8, 0x01, 0x15, 0x69, 0xc4, 0x1e,
+ 0xc9, 0x01, 0x12, 0x08, 0x43, 0x07, 0x28, 0xc1, 0xeb, 0xd3, 0xce, 0x66,
+ 0xcf, 0x01, 0x12, 0x49, 0xd6, 0x2b, 0xc0, 0x01, 0x12, 0x21, 0xcc, 0x81,
+ 0x99, 0x01, 0x10, 0x48, 0xca, 0x37, 0x4e, 0x01, 0x13, 0x69, 0xc5, 0x07,
+ 0x62, 0x01, 0x13, 0x00, 0x86, 0x0f, 0xae, 0x51, 0xc2, 0x09, 0x3b, 0x0f,
+ 0xae, 0x48, 0xd6, 0x2b, 0x68, 0x0f, 0xa6, 0xa0, 0x87, 0x0f, 0x09, 0x58,
+ 0x91, 0x0f, 0x09, 0x48, 0x83, 0x0f, 0x09, 0x28, 0xc2, 0x00, 0x39, 0x0f,
+ 0x09, 0x19, 0x83, 0x0f, 0x08, 0xb0, 0xc2, 0x00, 0xdb, 0x0f, 0x09, 0x09,
+ 0x83, 0x0f, 0x08, 0xd0, 0xc2, 0x00, 0xdb, 0x0f, 0x09, 0x01, 0x83, 0x0f,
+ 0x08, 0x00, 0x8a, 0x0f, 0x08, 0xf8, 0x12, 0xc1, 0xeb, 0xdf, 0xc2, 0x0f,
+ 0x9a, 0x0f, 0x08, 0xc9, 0x16, 0xc1, 0xeb, 0xe9, 0xc2, 0x00, 0x39, 0x0f,
+ 0x08, 0x89, 0xc2, 0x19, 0x2c, 0x0f, 0x08, 0x81, 0xc2, 0x00, 0x64, 0x0f,
+ 0x08, 0x61, 0xc2, 0x02, 0x2b, 0x0f, 0x08, 0x39, 0x83, 0x0f, 0x08, 0x28,
+ 0xc2, 0x00, 0xdb, 0x0f, 0x08, 0xe9, 0x83, 0x0f, 0x08, 0x78, 0xc2, 0x19,
+ 0x2c, 0x0f, 0x08, 0xd9, 0x83, 0x0f, 0x08, 0x30, 0xc2, 0x8d, 0x8f, 0x0f,
+ 0x08, 0xa1, 0x83, 0x0f, 0x08, 0x19, 0xc2, 0x0d, 0xf6, 0x0f, 0x08, 0x08,
+ 0xcc, 0x86, 0x61, 0x0f, 0x09, 0xd9, 0xc6, 0xcc, 0x9b, 0x0f, 0x09, 0xd1,
+ 0xc8, 0x7f, 0x59, 0x0f, 0x09, 0xc9, 0xc5, 0xd8, 0x2b, 0x0f, 0x09, 0xc1,
+ 0xc6, 0x18, 0x8e, 0x0f, 0x09, 0xb8, 0x08, 0xc1, 0xeb, 0xf9, 0x07, 0xc1,
+ 0xec, 0x29, 0x04, 0xc1, 0xec, 0x69, 0x26, 0xc1, 0xec, 0xa9, 0x25, 0xc1,
+ 0xec, 0xe9, 0x24, 0xc1, 0xed, 0x29, 0x23, 0xc1, 0xed, 0x69, 0x22, 0xc1,
+ 0xed, 0xa9, 0x21, 0xc1, 0xed, 0xe9, 0x20, 0xc1, 0xee, 0x29, 0x1f, 0xc1,
+ 0xee, 0x69, 0x1e, 0xc1, 0xee, 0xa9, 0x1d, 0xc1, 0xee, 0xe9, 0x06, 0xc1,
+ 0xef, 0x29, 0x05, 0xc1, 0xef, 0x69, 0x03, 0x41, 0xef, 0xa9, 0x08, 0xc1,
+ 0xef, 0xe9, 0x07, 0xc1, 0xf0, 0x29, 0x06, 0xc1, 0xf0, 0x69, 0x05, 0xc1,
+ 0xf0, 0xa9, 0x04, 0xc1, 0xf0, 0xe9, 0x03, 0xc1, 0xf1, 0x29, 0x26, 0xc1,
+ 0xf1, 0x69, 0x25, 0xc1, 0xf1, 0xa9, 0x24, 0x41, 0xf1, 0xe9, 0x42, 0x00,
+ 0x28, 0xc1, 0xf2, 0x29, 0xd1, 0x52, 0xcc, 0x01, 0x24, 0xa1, 0xcc, 0x48,
+ 0x29, 0x01, 0x24, 0x88, 0xd1, 0x56, 0xa6, 0x01, 0x24, 0xc9, 0xcf, 0x66,
+ 0xb1, 0x01, 0x24, 0x90, 0xd2, 0x48, 0x23, 0x01, 0x24, 0xc1, 0x0b, 0x41,
+ 0xf2, 0x35, 0xd0, 0x59, 0x52, 0x01, 0x24, 0xb1, 0xd1, 0x53, 0x65, 0x01,
+ 0x24, 0xa8, 0xc4, 0x18, 0x10, 0x00, 0x3e, 0x39, 0xc2, 0x22, 0xcc, 0x00,
+ 0x3e, 0x30, 0xc3, 0x0d, 0x14, 0x00, 0x3e, 0x29, 0xc3, 0x09, 0x9e, 0x00,
+ 0x3e, 0x20, 0xc4, 0x02, 0xde, 0x00, 0x3e, 0x19, 0xc2, 0x02, 0xa0, 0x00,
+ 0x3e, 0x10, 0x44, 0xe4, 0x3f, 0xc1, 0xf2, 0x41, 0x83, 0x00, 0x3e, 0xb0,
+ 0xc2, 0x19, 0x2c, 0x00, 0x3f, 0x13, 0x01, 0xf2, 0x53, 0x83, 0x00, 0x3f,
+ 0x1a, 0x01, 0xf2, 0x59, 0xc2, 0x00, 0x39, 0x00, 0x3e, 0xd1, 0x83, 0x00,
+ 0x3e, 0xc8, 0xc8, 0xbc, 0x92, 0x00, 0x3e, 0x88, 0x91, 0x00, 0x3e, 0x78,
+ 0x87, 0x00, 0x3e, 0x58, 0xcb, 0x5a, 0x32, 0x00, 0x3f, 0x89, 0xc8, 0xae,
+ 0xfb, 0x00, 0x3f, 0x81, 0xc9, 0x3d, 0x18, 0x00, 0x3f, 0x79, 0xcf, 0x64,
+ 0x1d, 0x00, 0x3f, 0x70, 0xcb, 0x5a, 0x32, 0x00, 0x3f, 0x69, 0xc8, 0xae,
+ 0xfb, 0x00, 0x3f, 0x61, 0xc9, 0x3d, 0x18, 0x00, 0x3f, 0x58, 0x46, 0x00,
+ 0x8b, 0x41, 0xf2, 0x5f, 0x95, 0x0f, 0xae, 0x68, 0xc3, 0x23, 0x2f, 0x0f,
+ 0xae, 0x2b, 0x01, 0xf2, 0x77, 0xc3, 0x15, 0xa8, 0x0f, 0xd5, 0xc8, 0xc5,
+ 0x11, 0x0d, 0x01, 0x1e, 0xd1, 0x45, 0xd9, 0x2f, 0x41, 0xf2, 0x7d, 0xc4,
+ 0x9e, 0x9c, 0x0f, 0x99, 0xf1, 0xc5, 0xdb, 0x9b, 0x0f, 0x99, 0xe8, 0x20,
+ 0xc1, 0xf2, 0x87, 0x1f, 0xc1, 0xf2, 0xaa, 0x1e, 0xc1, 0xf2, 0xd8, 0x1d,
+ 0x41, 0xf3, 0x06, 0xa6, 0x09, 0x82, 0xc9, 0xa5, 0x09, 0x82, 0xc1, 0xa4,
+ 0x09, 0x82, 0xb9, 0xa3, 0x09, 0x82, 0xb1, 0xa2, 0x09, 0x82, 0xa3, 0x01,
+ 0xf3, 0x30, 0xa1, 0x09, 0x82, 0x99, 0xa0, 0x09, 0x82, 0x91, 0x9f, 0x09,
+ 0x82, 0x89, 0x9e, 0x09, 0x82, 0x80, 0x22, 0xc1, 0xf3, 0x34, 0x21, 0xc1,
+ 0xf3, 0x3f, 0x20, 0xc1, 0xf3, 0x67, 0x1f, 0xc1, 0xf3, 0x98, 0x1e, 0xc1,
+ 0xf3, 0xcc, 0x1d, 0x41, 0xf3, 0xfa, 0x47, 0x07, 0x9a, 0xc1, 0xf4, 0x27,
+ 0x44, 0x00, 0xf1, 0x41, 0xf4, 0x33, 0x1e, 0xc1, 0xf4, 0x3f, 0x1d, 0x41,
+ 0xf4, 0x5d, 0xa5, 0x09, 0x8c, 0x39, 0xa4, 0x09, 0x8c, 0x31, 0xa3, 0x09,
+ 0x8c, 0x23, 0x01, 0xf4, 0x87, 0xa2, 0x09, 0x8c, 0x19, 0xa1, 0x09, 0x8c,
+ 0x11, 0xa0, 0x09, 0x8c, 0x09, 0x9f, 0x09, 0x8c, 0x01, 0x9e, 0x09, 0x8b,
+ 0xf8, 0xc2, 0xe6, 0x77, 0x09, 0x9d, 0x6b, 0x01, 0xf4, 0x8b, 0x20, 0xc1,
+ 0xf4, 0x8f, 0x1f, 0xc1, 0xf4, 0xc3, 0x1e, 0xc1, 0xf4, 0xf7, 0x1d, 0x41,
+ 0xf5, 0x25, 0x20, 0xc1, 0xf5, 0x52, 0x1f, 0xc1, 0xf5, 0x5e, 0x1e, 0xc1,
+ 0xf5, 0x86, 0x1d, 0x41, 0xf5, 0xae, 0xc2, 0xe4, 0xef, 0x09, 0x82, 0x79,
+ 0x23, 0xc1, 0xf5, 0xd5, 0x22, 0xc1, 0xf5, 0xfd, 0x21, 0xc1, 0xf6, 0x25,
+ 0x20, 0xc1, 0xf6, 0x59, 0x1f, 0xc1, 0xf6, 0x84, 0x1e, 0xc1, 0xf6, 0xac,
+ 0x1d, 0x41, 0xf6, 0xda, 0xa3, 0x09, 0xa0, 0x23, 0x01, 0xf7, 0x04, 0xa2,
+ 0x09, 0x9f, 0xd3, 0x01, 0xf7, 0x24, 0xa1, 0x09, 0x9f, 0xc9, 0xa0, 0x09,
+ 0x9f, 0xc1, 0x9f, 0x09, 0x9f, 0xb9, 0x9e, 0x09, 0x9f, 0xb1, 0x9d, 0x09,
+ 0x9f, 0xa8, 0xa6, 0x09, 0x9f, 0xa1, 0xa5, 0x09, 0x9f, 0x99, 0xa4, 0x09,
+ 0x9f, 0x91, 0xa3, 0x09, 0x9f, 0x89, 0xa2, 0x09, 0x9f, 0x7b, 0x01, 0xf7,
+ 0x48, 0xa1, 0x09, 0x9f, 0x6b, 0x01, 0xf7, 0x4c, 0xa0, 0x09, 0x9f, 0x53,
+ 0x01, 0xf7, 0x50, 0x9f, 0x09, 0x9f, 0x2b, 0x01, 0xf7, 0x58, 0x9e, 0x09,
+ 0x9f, 0x20, 0x83, 0x09, 0x9e, 0xe0, 0x83, 0x09, 0x9e, 0xd0, 0x83, 0x09,
+ 0x9e, 0xb8, 0x84, 0x09, 0x9e, 0xa1, 0x83, 0x09, 0x9e, 0x98, 0xa2, 0x09,
+ 0x9e, 0x71, 0xa1, 0x09, 0x9e, 0x63, 0x01, 0xf7, 0x68, 0xa0, 0x09, 0x9e,
+ 0x59, 0x9f, 0x09, 0x9e, 0x51, 0x9e, 0x09, 0x9e, 0x49, 0x9d, 0x09, 0x9e,
+ 0x40, 0xa6, 0x09, 0x9e, 0x39, 0xa5, 0x09, 0x9e, 0x2b, 0x01, 0xf7, 0x6c,
+ 0xa4, 0x09, 0x9e, 0x1b, 0x01, 0xf7, 0x70, 0xa3, 0x09, 0x9e, 0x11, 0xa2,
+ 0x09, 0x9e, 0x09, 0xa1, 0x09, 0x9d, 0xfb, 0x01, 0xf7, 0x74, 0xa0, 0x09,
+ 0x9d, 0xf1, 0x9f, 0x09, 0x9d, 0xe9, 0x9e, 0x09, 0x9d, 0xe1, 0x9d, 0x09,
+ 0x9d, 0xd2, 0x01, 0xf7, 0x78, 0xa6, 0x09, 0x9d, 0xc3, 0x01, 0xf7, 0x7c,
+ 0xa5, 0x09, 0x9d, 0xb9, 0xa4, 0x09, 0x9d, 0xb1, 0xa3, 0x09, 0x9d, 0xa9,
+ 0xa2, 0x09, 0x9d, 0xa1, 0xa1, 0x09, 0x9d, 0x99, 0xa0, 0x09, 0x9d, 0x8b,
+ 0x01, 0xf7, 0x80, 0x9f, 0x09, 0x9d, 0x81, 0x9e, 0x09, 0x9d, 0x78, 0x9f,
+ 0x09, 0x9b, 0x09, 0x9e, 0x09, 0x9b, 0x01, 0x9d, 0x09, 0x9a, 0xf8, 0xa6,
+ 0x09, 0x9a, 0xf1, 0xa5, 0x09, 0x9a, 0xe9, 0xa4, 0x09, 0x9a, 0xe1, 0xa3,
+ 0x09, 0x9a, 0xd9, 0xa2, 0x09, 0x9a, 0xd1, 0xa1, 0x09, 0x9a, 0xc9, 0xa0,
+ 0x09, 0x9a, 0xc1, 0x9f, 0x09, 0x9a, 0xb3, 0x01, 0xf7, 0x84, 0x9e, 0x09,
+ 0x9a, 0xa9, 0x9d, 0x09, 0x9a, 0xa0, 0xa6, 0x09, 0x9a, 0x93, 0x01, 0xf7,
+ 0x88, 0xa5, 0x09, 0x9a, 0x89, 0xa4, 0x09, 0x9a, 0x81, 0xa3, 0x09, 0x9a,
+ 0x79, 0xa2, 0x09, 0x9a, 0x71, 0xa1, 0x09, 0x9a, 0x69, 0xa0, 0x09, 0x9a,
+ 0x5b, 0x01, 0xf7, 0x8c, 0x9f, 0x09, 0x9a, 0x51, 0x9e, 0x09, 0x9a, 0x49,
+ 0x9d, 0x09, 0x9a, 0x40, 0xa6, 0x09, 0x9a, 0x39, 0xa5, 0x09, 0x9a, 0x31,
+ 0xa4, 0x09, 0x9a, 0x29, 0xa3, 0x09, 0x9a, 0x21, 0xa2, 0x09, 0x9a, 0x19,
+ 0xa1, 0x09, 0x9a, 0x11, 0xa0, 0x09, 0x9a, 0x09, 0x9f, 0x09, 0x9a, 0x01,
+ 0x9e, 0x09, 0x99, 0xf9, 0x9d, 0x09, 0x99, 0xf0, 0xa6, 0x09, 0x99, 0xe9,
+ 0xa5, 0x09, 0x99, 0xe1, 0xa4, 0x09, 0x99, 0xd9, 0xa3, 0x09, 0x99, 0xc3,
+ 0x01, 0xf7, 0x90, 0xa2, 0x09, 0x99, 0xb9, 0xa1, 0x09, 0x99, 0xb1, 0xa0,
+ 0x09, 0x99, 0xa9, 0x9f, 0x09, 0x99, 0xa1, 0x9e, 0x09, 0x99, 0x98, 0xa3,
+ 0x09, 0x99, 0x91, 0xa2, 0x09, 0x99, 0x89, 0xa1, 0x09, 0x99, 0x81, 0xa0,
+ 0x09, 0x99, 0x73, 0x01, 0xf7, 0x98, 0x9f, 0x09, 0x99, 0x63, 0x01, 0xf7,
+ 0x9c, 0x9e, 0x09, 0x99, 0x59, 0x9d, 0x09, 0x99, 0x50, 0xa6, 0x09, 0x99,
+ 0x49, 0xa5, 0x09, 0x99, 0x41, 0xa4, 0x09, 0x99, 0x39, 0xa3, 0x09, 0x99,
+ 0x31, 0xa2, 0x09, 0x99, 0x29, 0xa1, 0x09, 0x99, 0x21, 0xa0, 0x09, 0x99,
+ 0x19, 0x9f, 0x09, 0x99, 0x11, 0x9e, 0x09, 0x99, 0x09, 0x9d, 0x09, 0x99,
+ 0x00, 0xa6, 0x09, 0x98, 0xf9, 0xa5, 0x09, 0x98, 0xf1, 0xa4, 0x09, 0x98,
+ 0xe9, 0xa3, 0x09, 0x98, 0xdb, 0x01, 0xf7, 0xa0, 0xa2, 0x09, 0x98, 0xd1,
+ 0xa1, 0x09, 0x98, 0xc9, 0xa0, 0x09, 0x98, 0xc1, 0x9f, 0x09, 0x98, 0xb9,
+ 0x9e, 0x09, 0x98, 0xab, 0x01, 0xf7, 0xa4, 0x9d, 0x09, 0x98, 0xa0, 0xa6,
+ 0x09, 0x98, 0x93, 0x01, 0xf7, 0xa8, 0xa5, 0x09, 0x98, 0x83, 0x01, 0xf7,
+ 0xac, 0xa4, 0x09, 0x98, 0x73, 0x01, 0xf7, 0xb0, 0xa3, 0x09, 0x98, 0x69,
+ 0xa2, 0x09, 0x98, 0x61, 0xa1, 0x09, 0x98, 0x59, 0xa0, 0x09, 0x98, 0x4b,
+ 0x01, 0xf7, 0xb4, 0x9f, 0x09, 0x98, 0x41, 0x9e, 0x09, 0x98, 0x38, 0xa3,
+ 0x09, 0x98, 0x31, 0xa2, 0x09, 0x98, 0x29, 0xa1, 0x09, 0x98, 0x21, 0xa0,
+ 0x09, 0x98, 0x19, 0x9f, 0x09, 0x98, 0x11, 0x9e, 0x09, 0x98, 0x09, 0x9d,
+ 0x09, 0x98, 0x00, 0xa6, 0x09, 0x97, 0xf9, 0xa5, 0x09, 0x97, 0xf1, 0xa4,
+ 0x09, 0x97, 0xe9, 0xa3, 0x09, 0x97, 0xe1, 0xa2, 0x09, 0x97, 0xd3, 0x01,
+ 0xf7, 0xb8, 0xa1, 0x09, 0x97, 0xc9, 0xa0, 0x09, 0x97, 0xc1, 0x9f, 0x09,
+ 0x97, 0xb9, 0x9e, 0x09, 0x97, 0xb1, 0x9d, 0x09, 0x97, 0xa8, 0xa6, 0x09,
+ 0x97, 0xa1, 0xa5, 0x09, 0x97, 0x99, 0xa4, 0x09, 0x97, 0x91, 0xa3, 0x09,
+ 0x97, 0x7b, 0x01, 0xf7, 0xbc, 0xa2, 0x09, 0x97, 0x71, 0xa1, 0x09, 0x97,
+ 0x69, 0xa0, 0x09, 0x97, 0x61, 0x9f, 0x09, 0x97, 0x59, 0x9e, 0x09, 0x97,
+ 0x51, 0x9d, 0x09, 0x97, 0x48, 0xa6, 0x09, 0x97, 0x41, 0xa5, 0x09, 0x97,
+ 0x39, 0xa4, 0x09, 0x97, 0x2b, 0x01, 0xf7, 0xc4, 0xa3, 0x09, 0x97, 0x21,
+ 0xa2, 0x09, 0x97, 0x19, 0xa1, 0x09, 0x97, 0x03, 0x01, 0xf7, 0xc8, 0xa0,
+ 0x09, 0x96, 0xf9, 0x9f, 0x09, 0x96, 0xf1, 0x9e, 0x09, 0x96, 0xe9, 0x9d,
+ 0x09, 0x96, 0xe0, 0xa6, 0x09, 0x96, 0xd9, 0xa5, 0x09, 0x96, 0xd1, 0xa4,
+ 0x09, 0x96, 0xc9, 0xa3, 0x09, 0x96, 0xbb, 0x01, 0xf7, 0xd0, 0xa2, 0x09,
+ 0x96, 0xb1, 0xa1, 0x09, 0x96, 0xa9, 0xa0, 0x09, 0x96, 0xa1, 0x9f, 0x09,
+ 0x96, 0x93, 0x01, 0xf7, 0xd4, 0x9e, 0x09, 0x96, 0x88, 0xa6, 0x09, 0x96,
+ 0x81, 0xa5, 0x09, 0x96, 0x79, 0xa4, 0x09, 0x96, 0x71, 0xa3, 0x09, 0x96,
+ 0x69, 0xa2, 0x09, 0x96, 0x61, 0xa1, 0x09, 0x96, 0x59, 0xa0, 0x09, 0x96,
+ 0x51, 0x9f, 0x09, 0x96, 0x49, 0x9e, 0x09, 0x96, 0x41, 0x9d, 0x09, 0x96,
+ 0x38, 0xa6, 0x09, 0x96, 0x31, 0xa5, 0x09, 0x96, 0x29, 0xa4, 0x09, 0x96,
+ 0x21, 0xa3, 0x09, 0x96, 0x13, 0x01, 0xf7, 0xd8, 0xa2, 0x09, 0x96, 0x09,
+ 0xa1, 0x09, 0x96, 0x01, 0xa0, 0x09, 0x95, 0xf9, 0x9f, 0x09, 0x95, 0xf1,
+ 0x9e, 0x09, 0x95, 0xe9, 0x9d, 0x09, 0x95, 0xda, 0x01, 0xf7, 0xdc, 0xa6,
+ 0x09, 0x95, 0xd1, 0xa5, 0x09, 0x95, 0xc9, 0xa4, 0x09, 0x95, 0xc1, 0xa3,
+ 0x09, 0x95, 0xb9, 0xa2, 0x09, 0x95, 0xb1, 0xa1, 0x09, 0x95, 0xa9, 0xa0,
+ 0x09, 0x95, 0x93, 0x01, 0xf7, 0xe0, 0x9f, 0x09, 0x95, 0x83, 0x01, 0xf7,
+ 0xe8, 0x9e, 0x09, 0x95, 0x78, 0x9e, 0x09, 0x95, 0x39, 0x9d, 0x09, 0x95,
+ 0x30, 0xa6, 0x09, 0x95, 0x29, 0xa5, 0x09, 0x95, 0x21, 0xa4, 0x09, 0x95,
+ 0x19, 0xa3, 0x09, 0x95, 0x11, 0xa2, 0x09, 0x95, 0x09, 0xa1, 0x09, 0x95,
+ 0x01, 0xa0, 0x09, 0x94, 0xf3, 0x01, 0xf7, 0xec, 0x9f, 0x09, 0x94, 0xe9,
+ 0x9e, 0x09, 0x94, 0xda, 0x01, 0xf7, 0xf0, 0x1f, 0xc1, 0xf7, 0xf4, 0x1e,
+ 0xc1, 0xf8, 0x03, 0x1d, 0x41, 0xf8, 0x34, 0xc2, 0xdc, 0x39, 0x09, 0x91,
+ 0xa9, 0x1e, 0xc1, 0xf8, 0x58, 0x1d, 0x41, 0xf8, 0x83, 0x21, 0xc1, 0xf8,
+ 0xaa, 0x20, 0xc1, 0xf8, 0xb6, 0x1f, 0xc1, 0xf8, 0xea, 0x1e, 0xc1, 0xf9,
+ 0x15, 0x1d, 0x41, 0xf9, 0x40, 0xa1, 0x09, 0x8f, 0x71, 0xa0, 0x09, 0x8f,
+ 0x69, 0x9f, 0x09, 0x8f, 0x61, 0x9e, 0x09, 0x8f, 0x59, 0x9d, 0x09, 0x8f,
+ 0x4a, 0x01, 0xf9, 0x64, 0xa6, 0x09, 0x8f, 0x41, 0xa5, 0x09, 0x8f, 0x39,
+ 0xa4, 0x09, 0x8f, 0x31, 0xa3, 0x09, 0x8f, 0x29, 0xa2, 0x09, 0x8f, 0x21,
+ 0xa1, 0x09, 0x8f, 0x19, 0xa0, 0x09, 0x8f, 0x03, 0x01, 0xf9, 0x68, 0x9f,
+ 0x09, 0x8e, 0xf9, 0x9e, 0x09, 0x8e, 0xeb, 0x01, 0xf9, 0x70, 0x9d, 0x09,
+ 0x8e, 0xe0, 0xa6, 0x09, 0x8e, 0xd9, 0xa5, 0x09, 0x8e, 0xcb, 0x01, 0xf9,
+ 0x74, 0xa4, 0x09, 0x8e, 0xc1, 0xa3, 0x09, 0x8e, 0xb9, 0xa2, 0x09, 0x8e,
+ 0xb1, 0xa1, 0x09, 0x8e, 0xa3, 0x01, 0xf9, 0x78, 0xa0, 0x09, 0x8e, 0x99,
+ 0x9f, 0x09, 0x8e, 0x8b, 0x01, 0xf9, 0x7c, 0x9e, 0x09, 0x8e, 0x81, 0x9d,
+ 0x09, 0x8e, 0x78, 0xa6, 0x09, 0x8e, 0x71, 0xa5, 0x09, 0x8e, 0x69, 0xa4,
+ 0x09, 0x8e, 0x5b, 0x01, 0xf9, 0x80, 0xa3, 0x09, 0x8e, 0x4b, 0x01, 0xf9,
+ 0x84, 0xa2, 0x09, 0x8e, 0x3b, 0x01, 0xf9, 0x88, 0xa1, 0x09, 0x8e, 0x31,
+ 0xa0, 0x09, 0x8e, 0x29, 0x9f, 0x09, 0x8d, 0xe3, 0x01, 0xf9, 0x8c, 0x9e,
+ 0x09, 0x8d, 0xd9, 0x9d, 0x09, 0x8d, 0xca, 0x01, 0xf9, 0xac, 0xa6, 0x09,
+ 0x8d, 0xc1, 0xa5, 0x09, 0x8d, 0xb9, 0xa4, 0x09, 0x8d, 0xb1, 0xa3, 0x09,
+ 0x8d, 0xa9, 0xa2, 0x09, 0x8d, 0xa1, 0xa1, 0x09, 0x8d, 0x99, 0xa0, 0x09,
+ 0x8d, 0x8b, 0x01, 0xf9, 0xb0, 0x9f, 0x09, 0x8d, 0x81, 0x9e, 0x09, 0x8d,
+ 0x6a, 0x01, 0xf9, 0xb4, 0x83, 0x09, 0x8d, 0x50, 0x83, 0x09, 0x8d, 0x28,
+ 0xa1, 0x09, 0x8b, 0xf1, 0xa0, 0x09, 0x8b, 0xe9, 0x9f, 0x09, 0x8b, 0xe1,
+ 0x9e, 0x09, 0x8b, 0xd9, 0x9d, 0x09, 0x8b, 0xd0, 0xa6, 0x09, 0x8b, 0xc9,
+ 0xa5, 0x09, 0x8b, 0xc1, 0xa4, 0x09, 0x8b, 0xb9, 0xa3, 0x09, 0x8b, 0xb1,
+ 0xa2, 0x09, 0x8b, 0xa3, 0x01, 0xf9, 0xbc, 0xa1, 0x09, 0x8b, 0x99, 0xa0,
+ 0x09, 0x8b, 0x8b, 0x01, 0xf9, 0xc0, 0x9f, 0x09, 0x8b, 0x81, 0x9e, 0x09,
+ 0x8b, 0x79, 0x9d, 0x09, 0x8b, 0x70, 0xa6, 0x09, 0x8b, 0x69, 0xa5, 0x09,
+ 0x8b, 0x61, 0xa4, 0x09, 0x8b, 0x53, 0x01, 0xf9, 0xc4, 0xa3, 0x09, 0x8b,
+ 0x43, 0x01, 0xf9, 0xc8, 0xa2, 0x09, 0x8b, 0x39, 0xa1, 0x09, 0x8b, 0x31,
+ 0xa0, 0x09, 0x8b, 0x29, 0x9f, 0x09, 0x8b, 0x21, 0x9e, 0x09, 0x8b, 0x19,
+ 0x9d, 0x09, 0x8b, 0x10, 0xa6, 0x09, 0x8b, 0x09, 0xa5, 0x09, 0x8b, 0x01,
+ 0xa4, 0x09, 0x8a, 0xf9, 0xa3, 0x09, 0x8a, 0xeb, 0x01, 0xf9, 0xcc, 0xa2,
+ 0x09, 0x8a, 0xe1, 0xa1, 0x09, 0x8a, 0xd9, 0xa0, 0x09, 0x8a, 0xd1, 0x9f,
+ 0x09, 0x8a, 0xc9, 0x9e, 0x09, 0x8a, 0xc1, 0x9d, 0x09, 0x8a, 0xb2, 0x01,
+ 0xf9, 0xd0, 0xa6, 0x09, 0x8a, 0xa9, 0xa5, 0x09, 0x8a, 0xa1, 0xa4, 0x09,
+ 0x8a, 0x99, 0xa3, 0x09, 0x8a, 0x91, 0xa2, 0x09, 0x8a, 0x89, 0xa1, 0x09,
+ 0x8a, 0x81, 0xa0, 0x09, 0x8a, 0x79, 0x9f, 0x09, 0x8a, 0x71, 0x9e, 0x09,
+ 0x8a, 0x63, 0x01, 0xf9, 0xd4, 0x9d, 0x09, 0x8a, 0x58, 0xa6, 0x09, 0x8a,
+ 0x51, 0xa5, 0x09, 0x8a, 0x49, 0xa4, 0x09, 0x8a, 0x33, 0x01, 0xf9, 0xd8,
+ 0xa3, 0x09, 0x8a, 0x23, 0x01, 0xf9, 0xe0, 0xa2, 0x09, 0x8a, 0x19, 0xa1,
+ 0x09, 0x8a, 0x11, 0xa0, 0x09, 0x8a, 0x09, 0x9f, 0x09, 0x8a, 0x01, 0x9e,
+ 0x09, 0x89, 0xf8, 0xa0, 0x09, 0x89, 0xf1, 0x9f, 0x09, 0x89, 0xe9, 0x9e,
+ 0x09, 0x89, 0xcb, 0x01, 0xf9, 0xe4, 0x9d, 0x09, 0x89, 0xc0, 0xa6, 0x09,
+ 0x89, 0xb9, 0xa5, 0x09, 0x89, 0xb1, 0xa4, 0x09, 0x89, 0xa3, 0x01, 0xf9,
+ 0xf0, 0xa3, 0x09, 0x89, 0x93, 0x01, 0xf9, 0xf4, 0xa2, 0x09, 0x89, 0x83,
+ 0x01, 0xf9, 0xf8, 0xa1, 0x09, 0x89, 0x79, 0xa0, 0x09, 0x89, 0x71, 0x9f,
+ 0x09, 0x89, 0x69, 0x9e, 0x09, 0x89, 0x61, 0x9d, 0x09, 0x89, 0x58, 0xa6,
+ 0x09, 0x89, 0x51, 0xa5, 0x09, 0x89, 0x43, 0x01, 0xf9, 0xfc, 0xa4, 0x09,
+ 0x89, 0x33, 0x01, 0xfa, 0x00, 0xa3, 0x09, 0x89, 0x29, 0xa2, 0x09, 0x89,
+ 0x21, 0xa1, 0x09, 0x89, 0x19, 0xa0, 0x09, 0x89, 0x11, 0x9f, 0x09, 0x89,
+ 0x09, 0x9e, 0x09, 0x88, 0xfb, 0x01, 0xfa, 0x04, 0x9d, 0x09, 0x88, 0xf0,
+ 0xa6, 0x09, 0x88, 0xe9, 0xa5, 0x09, 0x88, 0xe1, 0xa4, 0x09, 0x88, 0xd9,
+ 0xa3, 0x09, 0x88, 0xd1, 0xa2, 0x09, 0x88, 0xc9, 0xa1, 0x09, 0x88, 0xc1,
+ 0xa0, 0x09, 0x88, 0xb9, 0x9f, 0x09, 0x88, 0xb1, 0x9e, 0x09, 0x88, 0xa3,
+ 0x01, 0xfa, 0x08, 0x9d, 0x09, 0x88, 0x98, 0xa6, 0x09, 0x88, 0x91, 0xa5,
+ 0x09, 0x88, 0x89, 0xa4, 0x09, 0x88, 0x81, 0xa3, 0x09, 0x88, 0x79, 0xa2,
+ 0x09, 0x88, 0x71, 0xa1, 0x09, 0x88, 0x69, 0xa0, 0x09, 0x88, 0x5b, 0x01,
+ 0xfa, 0x0c, 0x9f, 0x09, 0x88, 0x51, 0x9e, 0x09, 0x88, 0x49, 0x9d, 0x09,
+ 0x88, 0x40, 0xa6, 0x09, 0x88, 0x39, 0xa5, 0x09, 0x88, 0x31, 0xa4, 0x09,
+ 0x88, 0x29, 0xa3, 0x09, 0x88, 0x21, 0xa2, 0x09, 0x88, 0x19, 0xa1, 0x09,
+ 0x88, 0x11, 0xa0, 0x09, 0x88, 0x09, 0x9f, 0x09, 0x88, 0x01, 0x9e, 0x09,
+ 0x87, 0xf2, 0x01, 0xfa, 0x10, 0xa4, 0x09, 0x86, 0x4b, 0x01, 0xfa, 0x14,
+ 0xa3, 0x09, 0x86, 0x41, 0xa2, 0x09, 0x86, 0x39, 0xa1, 0x09, 0x86, 0x31,
+ 0xa0, 0x09, 0x86, 0x29, 0x9f, 0x09, 0x86, 0x21, 0x9e, 0x09, 0x86, 0x19,
+ 0x9d, 0x09, 0x86, 0x10, 0xa6, 0x09, 0x86, 0x09, 0xa5, 0x09, 0x86, 0x01,
+ 0xa4, 0x09, 0x85, 0xf9, 0xa3, 0x09, 0x85, 0xf1, 0xa2, 0x09, 0x85, 0xe9,
+ 0xa1, 0x09, 0x85, 0xdb, 0x01, 0xfa, 0x34, 0xa0, 0x09, 0x85, 0xd1, 0x9f,
+ 0x09, 0x85, 0xc3, 0x01, 0xfa, 0x38, 0x9e, 0x09, 0x85, 0xb9, 0x9d, 0x09,
+ 0x85, 0x6a, 0x01, 0xfa, 0x3c, 0xa6, 0x09, 0x85, 0x61, 0xa5, 0x09, 0x85,
+ 0x53, 0x01, 0xfa, 0x60, 0xa4, 0x09, 0x85, 0x49, 0xa3, 0x09, 0x85, 0x3b,
+ 0x01, 0xfa, 0x64, 0xa2, 0x09, 0x85, 0x31, 0xa1, 0x09, 0x85, 0x29, 0xa0,
+ 0x09, 0x85, 0x21, 0x9f, 0x09, 0x85, 0x19, 0x9e, 0x09, 0x85, 0x11, 0x9d,
+ 0x09, 0x85, 0x08, 0xa6, 0x09, 0x85, 0x01, 0xa5, 0x09, 0x84, 0xf9, 0xa4,
+ 0x09, 0x84, 0xf1, 0xa3, 0x09, 0x84, 0xe9, 0xa2, 0x09, 0x84, 0xe1, 0xa1,
+ 0x09, 0x84, 0xd3, 0x01, 0xfa, 0x68, 0xa0, 0x09, 0x84, 0xc9, 0x9f, 0x09,
+ 0x84, 0xc1, 0x9e, 0x09, 0x84, 0xb3, 0x01, 0xfa, 0x6c, 0x9d, 0x09, 0x84,
+ 0xa8, 0xa6, 0x09, 0x84, 0xa1, 0xa5, 0x09, 0x84, 0x99, 0xa4, 0x09, 0x84,
+ 0x8b, 0x01, 0xfa, 0x70, 0xa3, 0x09, 0x84, 0x81, 0xa2, 0x09, 0x84, 0x79,
+ 0xa1, 0x09, 0x84, 0x71, 0xa0, 0x09, 0x84, 0x69, 0x9f, 0x09, 0x84, 0x61,
+ 0x9e, 0x09, 0x84, 0x59, 0x9d, 0x09, 0x84, 0x50, 0xa6, 0x09, 0x84, 0x49,
+ 0xa5, 0x09, 0x84, 0x41, 0xa4, 0x09, 0x84, 0x39, 0xa3, 0x09, 0x84, 0x31,
+ 0xa2, 0x09, 0x84, 0x29, 0xa1, 0x09, 0x84, 0x21, 0xa0, 0x09, 0x84, 0x19,
+ 0x9f, 0x09, 0x84, 0x11, 0x9e, 0x09, 0x84, 0x09, 0x9d, 0x09, 0x84, 0x00,
+ 0xa6, 0x09, 0x83, 0xf9, 0xa5, 0x09, 0x83, 0xeb, 0x01, 0xfa, 0x74, 0xa4,
+ 0x09, 0x83, 0xe1, 0xa3, 0x09, 0x83, 0xd9, 0xa2, 0x09, 0x83, 0xd1, 0xa1,
+ 0x09, 0x83, 0xc9, 0xa0, 0x09, 0x83, 0xc1, 0x9f, 0x09, 0x83, 0xb9, 0x9e,
+ 0x09, 0x83, 0xb0, 0xa1, 0x09, 0x83, 0xa9, 0xa0, 0x09, 0x83, 0xa1, 0x9f,
+ 0x09, 0x83, 0x99, 0x9e, 0x09, 0x83, 0x91, 0x9d, 0x09, 0x83, 0x88, 0xa6,
+ 0x09, 0x83, 0x81, 0xa5, 0x09, 0x83, 0x79, 0xa4, 0x09, 0x83, 0x71, 0xa3,
+ 0x09, 0x83, 0x69, 0xa2, 0x09, 0x83, 0x61, 0xa1, 0x09, 0x83, 0x59, 0xa0,
+ 0x09, 0x83, 0x51, 0x9f, 0x09, 0x83, 0x49, 0x9e, 0x09, 0x83, 0x41, 0x9d,
+ 0x09, 0x83, 0x32, 0x01, 0xfa, 0x78, 0xa6, 0x09, 0x83, 0x29, 0xa5, 0x09,
+ 0x83, 0x21, 0xa4, 0x09, 0x83, 0x19, 0xa3, 0x09, 0x83, 0x11, 0xa2, 0x09,
+ 0x83, 0x09, 0xa1, 0x09, 0x83, 0x01, 0xa0, 0x09, 0x82, 0xf9, 0x9f, 0x09,
+ 0x82, 0xdb, 0x01, 0xfa, 0x7c, 0x9e, 0x09, 0x82, 0xd0, 0xcb, 0x58, 0xc7,
+ 0x0f, 0xbd, 0x39, 0x46, 0x01, 0xfc, 0xc1, 0xfa, 0x88, 0x15, 0xc1, 0xfa,
+ 0x94, 0xd4, 0x3c, 0xb4, 0x0f, 0xbd, 0xa0, 0xc4, 0x18, 0x10, 0x00, 0x37,
+ 0xb9, 0xc2, 0x22, 0xcc, 0x00, 0x37, 0xb0, 0xc3, 0x0d, 0x14, 0x00, 0x37,
+ 0xa9, 0xc3, 0x09, 0x9e, 0x00, 0x37, 0xa0, 0xc4, 0x02, 0xde, 0x00, 0x37,
+ 0x99, 0xc2, 0x02, 0xa0, 0x00, 0x37, 0x90, 0x97, 0x00, 0x98, 0x4b, 0x01,
+ 0xfa, 0xa0, 0x47, 0x23, 0x34, 0xc1, 0xfa, 0xa6, 0x83, 0x00, 0x98, 0x43,
+ 0x01, 0xfa, 0xc9, 0x8b, 0x00, 0x98, 0x51, 0x87, 0x00, 0x98, 0x6b, 0x01,
+ 0xfa, 0xcd, 0x91, 0x00, 0x98, 0x73, 0x01, 0xfa, 0xd1, 0x19, 0xc1, 0xfa,
+ 0xd5, 0x09, 0xc1, 0xfa, 0xe7, 0x1b, 0x41, 0xfb, 0x05, 0x0a, 0xc1, 0xfb,
+ 0x1f, 0x83, 0x00, 0x90, 0x03, 0x01, 0xfb, 0x41, 0x97, 0x00, 0x90, 0x09,
+ 0x8b, 0x00, 0x90, 0x11, 0x87, 0x00, 0x90, 0x2b, 0x01, 0xfb, 0x45, 0x91,
+ 0x00, 0x90, 0x32, 0x01, 0xfb, 0x49, 0x04, 0xc1, 0xfb, 0x4d, 0x83, 0x00,
+ 0x93, 0x03, 0x01, 0xfb, 0x67, 0x97, 0x00, 0x93, 0x09, 0x8b, 0x00, 0x93,
+ 0x11, 0x87, 0x00, 0x93, 0x2b, 0x01, 0xfb, 0x6b, 0x91, 0x00, 0x93, 0x33,
+ 0x01, 0xfb, 0x6f, 0x19, 0x41, 0xfb, 0x73, 0x05, 0xc1, 0xfb, 0x82, 0x83,
+ 0x00, 0x93, 0xc3, 0x01, 0xfb, 0xa0, 0x97, 0x00, 0x93, 0xc9, 0x8b, 0x00,
+ 0x93, 0xd1, 0x87, 0x00, 0x93, 0xeb, 0x01, 0xfb, 0xa4, 0x91, 0x00, 0x93,
+ 0xf3, 0x01, 0xfb, 0xa8, 0xc2, 0x01, 0x4a, 0x00, 0x93, 0xf9, 0x0a, 0x41,
+ 0xfb, 0xac, 0x1c, 0xc1, 0xfb, 0xcf, 0x06, 0xc1, 0xfb, 0xe8, 0x83, 0x00,
+ 0x97, 0x83, 0x01, 0xfc, 0x0f, 0x97, 0x00, 0x97, 0x89, 0x8b, 0x00, 0x97,
+ 0x91, 0x87, 0x00, 0x97, 0xab, 0x01, 0xfc, 0x13, 0x91, 0x00, 0x97, 0xb3,
+ 0x01, 0xfc, 0x17, 0xc2, 0x01, 0x4a, 0x00, 0x97, 0xb8, 0x42, 0x00, 0x8e,
+ 0xc1, 0xfc, 0x1b, 0x83, 0x00, 0x93, 0x83, 0x01, 0xfc, 0x34, 0x97, 0x00,
+ 0x93, 0x89, 0x8b, 0x00, 0x93, 0x91, 0x87, 0x00, 0x93, 0xab, 0x01, 0xfc,
+ 0x38, 0x91, 0x00, 0x93, 0xb3, 0x01, 0xfc, 0x3c, 0xc2, 0x01, 0x4a, 0x00,
+ 0x93, 0xb9, 0x0a, 0xc1, 0xfc, 0x40, 0x15, 0xc1, 0xfc, 0x63, 0x1c, 0x41,
+ 0xfc, 0x83, 0x83, 0x00, 0x90, 0x43, 0x01, 0xfc, 0xa0, 0x97, 0x00, 0x90,
+ 0x49, 0x8b, 0x00, 0x90, 0x51, 0x87, 0x00, 0x90, 0x6b, 0x01, 0xfc, 0xa4,
+ 0x91, 0x00, 0x90, 0x73, 0x01, 0xfc, 0xa8, 0xc2, 0x01, 0x4a, 0x00, 0x90,
+ 0x78, 0x83, 0x00, 0x90, 0xc3, 0x01, 0xfc, 0xac, 0x97, 0x00, 0x90, 0xc9,
+ 0x8b, 0x00, 0x90, 0xd1, 0x87, 0x00, 0x90, 0xeb, 0x01, 0xfc, 0xb0, 0x91,
+ 0x00, 0x90, 0xf3, 0x01, 0xfc, 0xb4, 0x19, 0xc1, 0xfc, 0xb8, 0xc2, 0x19,
+ 0x2c, 0x00, 0x9a, 0xc8, 0x1c, 0xc1, 0xfc, 0xc7, 0x83, 0x00, 0x91, 0x83,
+ 0x01, 0xfc, 0xe7, 0x97, 0x00, 0x91, 0x89, 0x8b, 0x00, 0x91, 0x91, 0x87,
+ 0x00, 0x91, 0xab, 0x01, 0xfc, 0xeb, 0x91, 0x00, 0x91, 0xb3, 0x01, 0xfc,
+ 0xf5, 0xc2, 0x01, 0x4a, 0x00, 0x91, 0xb9, 0x0a, 0xc1, 0xfc, 0xf9, 0x15,
+ 0x41, 0xfd, 0x1c, 0x83, 0x00, 0x91, 0x43, 0x01, 0xfd, 0x36, 0x97, 0x00,
+ 0x91, 0x49, 0x8b, 0x00, 0x91, 0x51, 0x87, 0x00, 0x91, 0x6b, 0x01, 0xfd,
+ 0x3a, 0x91, 0x00, 0x91, 0x73, 0x01, 0xfd, 0x3e, 0xc2, 0x01, 0x4a, 0x00,
+ 0x91, 0x79, 0xc2, 0x19, 0x2c, 0x00, 0x9a, 0xc0, 0x83, 0x00, 0x92, 0x03,
+ 0x01, 0xfd, 0x42, 0x97, 0x00, 0x92, 0x09, 0x8b, 0x00, 0x92, 0x11, 0x87,
+ 0x00, 0x92, 0x2b, 0x01, 0xfd, 0x46, 0x91, 0x00, 0x92, 0x33, 0x01, 0xfd,
+ 0x4a, 0x19, 0xc1, 0xfd, 0x4e, 0x0a, 0xc1, 0xfd, 0x60, 0x1b, 0x41, 0xfd,
+ 0x7e, 0x83, 0x00, 0x93, 0x43, 0x01, 0xfd, 0x98, 0x97, 0x00, 0x93, 0x49,
+ 0x8b, 0x00, 0x93, 0x51, 0x87, 0x00, 0x93, 0x6b, 0x01, 0xfd, 0x9c, 0x91,
+ 0x00, 0x93, 0x71, 0xc2, 0x01, 0x4a, 0x00, 0x93, 0x78, 0x83, 0x00, 0x94,
+ 0x03, 0x01, 0xfd, 0xa0, 0x97, 0x00, 0x94, 0x09, 0x8b, 0x00, 0x94, 0x11,
+ 0x87, 0x00, 0x94, 0x2b, 0x01, 0xfd, 0xa4, 0x91, 0x00, 0x94, 0x33, 0x01,
+ 0xfd, 0xa8, 0x19, 0xc1, 0xfd, 0xac, 0x1b, 0x41, 0xfd, 0xbe, 0x83, 0x00,
+ 0x94, 0x83, 0x01, 0xfd, 0xd8, 0x97, 0x00, 0x94, 0x89, 0x8b, 0x00, 0x94,
+ 0x91, 0x87, 0x00, 0x94, 0xab, 0x01, 0xfd, 0xdc, 0x91, 0x00, 0x94, 0xb3,
+ 0x01, 0xfd, 0xe0, 0xc2, 0x01, 0x4a, 0x00, 0x94, 0xb9, 0x1b, 0x41, 0xfd,
+ 0xe4, 0x83, 0x00, 0x95, 0x43, 0x01, 0xfe, 0x07, 0x97, 0x00, 0x95, 0x49,
+ 0x8b, 0x00, 0x95, 0x51, 0x87, 0x00, 0x95, 0x6b, 0x01, 0xfe, 0x0b, 0x91,
+ 0x00, 0x95, 0x73, 0x01, 0xfe, 0x0f, 0x19, 0xc1, 0xfe, 0x13, 0x1a, 0xc1,
+ 0xfe, 0x25, 0x1b, 0x41, 0xfe, 0x43, 0x83, 0x00, 0x96, 0x43, 0x01, 0xfe,
+ 0x5d, 0x97, 0x00, 0x96, 0x49, 0x8b, 0x00, 0x96, 0x51, 0x87, 0x00, 0x96,
+ 0x6b, 0x01, 0xfe, 0x61, 0x91, 0x00, 0x96, 0x72, 0x01, 0xfe, 0x65, 0x0a,
+ 0xc1, 0xfe, 0x69, 0x83, 0x00, 0x9a, 0x83, 0x01, 0xfe, 0x8c, 0x97, 0x00,
+ 0x9a, 0x89, 0x8b, 0x00, 0x9a, 0x91, 0x87, 0x00, 0x9a, 0xab, 0x01, 0xfe,
+ 0x90, 0x91, 0x00, 0x9a, 0xb3, 0x01, 0xfe, 0x94, 0x19, 0x41, 0xfe, 0x98,
+ 0x83, 0x00, 0x96, 0xc3, 0x01, 0xfe, 0xa7, 0x97, 0x00, 0x96, 0xc9, 0x8b,
+ 0x00, 0x96, 0xd1, 0x87, 0x00, 0x96, 0xeb, 0x01, 0xfe, 0xab, 0x91, 0x00,
+ 0x96, 0xf3, 0x01, 0xfe, 0xaf, 0xc2, 0x01, 0x4a, 0x00, 0x96, 0xf9, 0x0a,
+ 0xc1, 0xfe, 0xb3, 0x1c, 0x41, 0xfe, 0xd3, 0x83, 0x00, 0x97, 0x43, 0x01,
+ 0xfe, 0xed, 0x97, 0x00, 0x97, 0x49, 0x8b, 0x00, 0x97, 0x51, 0x87, 0x00,
+ 0x97, 0x6b, 0x01, 0xfe, 0xf1, 0x91, 0x00, 0x97, 0x72, 0x01, 0xfe, 0xf5,
+ 0x83, 0x00, 0x98, 0x03, 0x01, 0xfe, 0xf9, 0x97, 0x00, 0x98, 0x09, 0x8b,
+ 0x00, 0x98, 0x11, 0x87, 0x00, 0x98, 0x2b, 0x01, 0xfe, 0xfd, 0x91, 0x00,
+ 0x98, 0x33, 0x01, 0xff, 0x01, 0xc2, 0x01, 0x4a, 0x00, 0x98, 0x38, 0x83,
+ 0x00, 0x9a, 0x43, 0x01, 0xff, 0x05, 0x97, 0x00, 0x9a, 0x49, 0x8b, 0x00,
+ 0x9a, 0x51, 0x87, 0x00, 0x9a, 0x6b, 0x01, 0xff, 0x09, 0x91, 0x00, 0x9a,
+ 0x71, 0x19, 0xc1, 0xff, 0x0d, 0xc2, 0x19, 0x2c, 0x00, 0x9a, 0xd0, 0x4b,
+ 0x63, 0xff, 0xc1, 0xff, 0x1c, 0xd1, 0x36, 0x4b, 0x00, 0x9a, 0xf0, 0xc9,
+ 0x57, 0x20, 0x00, 0x9b, 0xe0, 0xc6, 0xce, 0xc9, 0x00, 0x9c, 0xc0, 0x48,
+ 0x6e, 0x42, 0xc1, 0xff, 0x28, 0x45, 0x00, 0x8c, 0x41, 0xff, 0x34, 0xc5,
+ 0x01, 0xa2, 0x01, 0x18, 0x09, 0xc5, 0xd8, 0x53, 0x0f, 0xa9, 0x31, 0xc4,
+ 0xe3, 0xdb, 0x0f, 0xa8, 0x61, 0xca, 0xa5, 0x94, 0x0f, 0xa5, 0x08, 0xc2,
+ 0x39, 0x8b, 0x08, 0x7f, 0xa9, 0xc3, 0x1e, 0x1b, 0x08, 0x7f, 0x40, 0xc3,
+ 0x11, 0xef, 0x08, 0x7f, 0xa1, 0x03, 0x41, 0xff, 0x58, 0xc2, 0x00, 0x8e,
+ 0x08, 0x7f, 0x38, 0xc4, 0x36, 0xb5, 0x08, 0x7f, 0x01, 0xc3, 0x16, 0x5a,
+ 0x08, 0x7f, 0x78, 0x87, 0x08, 0x29, 0x29, 0xc4, 0x38, 0x2c, 0x08, 0x29,
+ 0x30, 0xd6, 0x2e, 0x6a, 0x01, 0x39, 0xb9, 0xcd, 0x0e, 0x61, 0x01, 0x39,
+ 0xa9, 0xca, 0x22, 0x51, 0x01, 0x39, 0xa0, 0xc2, 0x00, 0x55, 0x01, 0x10,
+ 0x71, 0xcb, 0x6d, 0x97, 0x00, 0x04, 0xb8, 0xcb, 0x98, 0xd1, 0x00, 0x00,
+ 0x23, 0x01, 0xff, 0x64, 0xc3, 0x09, 0x3f, 0x00, 0x00, 0x18, 0x43, 0x05,
+ 0xb2, 0xc1, 0xff, 0x6a, 0xcd, 0x76, 0x76, 0x01, 0x12, 0xe8, 0x00, 0x41,
+ 0xff, 0x82, 0xc4, 0x18, 0x10, 0x08, 0xed, 0x39, 0xc2, 0x22, 0xcc, 0x08,
+ 0xed, 0x30, 0xc3, 0x0d, 0x14, 0x08, 0xed, 0x29, 0xc3, 0x09, 0x9e, 0x08,
+ 0xed, 0x20, 0xc4, 0x02, 0xde, 0x08, 0xed, 0x19, 0xc2, 0x02, 0xa0, 0x08,
+ 0xed, 0x10, 0x03, 0xc1, 0xff, 0x8c, 0xc2, 0x01, 0x24, 0x08, 0xec, 0x99,
+ 0xc2, 0x02, 0xe0, 0x08, 0xec, 0x81, 0x97, 0x08, 0xec, 0x6b, 0x01, 0xff,
+ 0x98, 0x8b, 0x08, 0xec, 0x5a, 0x01, 0xff, 0x9c, 0xc2, 0x00, 0xd0, 0x08,
+ 0xec, 0x31, 0x83, 0x08, 0xec, 0x28, 0xc2, 0x01, 0x30, 0x08, 0xec, 0x21,
+ 0x83, 0x08, 0xeb, 0xd0, 0x06, 0xc1, 0xff, 0xa0, 0xc2, 0x00, 0xd0, 0x08,
+ 0xeb, 0xc9, 0x83, 0x08, 0xeb, 0xc0, 0xc2, 0x00, 0xd0, 0x08, 0xec, 0x09,
+ 0x83, 0x08, 0xec, 0x00, 0xc2, 0x00, 0xdb, 0x08, 0xeb, 0xf9, 0x83, 0x08,
+ 0xeb, 0xa8, 0x16, 0xc1, 0xff, 0xaa, 0xc2, 0x00, 0xd0, 0x08, 0xeb, 0xa1,
+ 0x83, 0x08, 0xeb, 0x98, 0xc2, 0x00, 0xd0, 0x08, 0xeb, 0xe1, 0x83, 0x08,
+ 0xeb, 0xd8, 0xc2, 0x00, 0xd0, 0x08, 0xeb, 0xb9, 0x83, 0x08, 0xeb, 0xb0,
+ 0xc2, 0x00, 0xd0, 0x08, 0xeb, 0x91, 0x83, 0x08, 0xeb, 0x88, 0xc2, 0x00,
+ 0xd0, 0x08, 0xeb, 0x79, 0x83, 0x08, 0xeb, 0x70, 0x97, 0x08, 0xeb, 0x59,
+ 0x8b, 0x08, 0xeb, 0x41, 0x83, 0x08, 0xeb, 0x08, 0x97, 0x08, 0xeb, 0x28,
+ 0x8b, 0x08, 0xeb, 0x18, 0xc5, 0x40, 0xe7, 0x00, 0x50, 0x19, 0xc4, 0x1e,
+ 0x97, 0x00, 0x52, 0x68, 0x83, 0x00, 0x50, 0x31, 0x8b, 0x00, 0x50, 0x81,
+ 0x97, 0x00, 0x50, 0xa0, 0x8b, 0x00, 0x50, 0x40, 0x97, 0x00, 0x50, 0x50,
+ 0x83, 0x00, 0x50, 0xa9, 0x0a, 0x41, 0xff, 0xb4, 0x83, 0x00, 0x50, 0xb9,
+ 0x0a, 0x41, 0xff, 0xbe, 0xc2, 0x01, 0x30, 0x00, 0x50, 0xc9, 0xc2, 0x19,
+ 0x2c, 0x00, 0x50, 0xf1, 0xc2, 0x00, 0xc1, 0x00, 0x51, 0x19, 0x83, 0x00,
+ 0x51, 0x40, 0x83, 0x00, 0x50, 0xd1, 0xc2, 0x00, 0xd0, 0x00, 0x50, 0xd8,
+ 0x83, 0x00, 0x50, 0xe1, 0xc2, 0x00, 0xd0, 0x00, 0x50, 0xe8, 0x16, 0xc1,
+ 0xff, 0xc8, 0x83, 0x00, 0x51, 0x21, 0xc2, 0x00, 0xd0, 0x00, 0x51, 0x28,
+ 0x06, 0xc1, 0xff, 0xd2, 0x83, 0x00, 0x51, 0x31, 0xc2, 0x00, 0xd0, 0x00,
+ 0x51, 0x38, 0x83, 0x00, 0x51, 0x51, 0xc2, 0x00, 0xd0, 0x00, 0x51, 0x58,
+ 0x83, 0x00, 0x51, 0x61, 0xc2, 0x00, 0xd0, 0x00, 0x51, 0x68, 0x83, 0x00,
+ 0x51, 0x81, 0xc2, 0x00, 0x39, 0x00, 0x52, 0xe0, 0x83, 0x00, 0x51, 0x91,
+ 0xc2, 0x00, 0xdb, 0x00, 0x51, 0x98, 0xc2, 0x00, 0xd0, 0x00, 0x51, 0xb1,
+ 0x83, 0x00, 0x51, 0xc0, 0x83, 0x00, 0x51, 0xf1, 0x8b, 0x00, 0x52, 0x41,
+ 0x97, 0x00, 0x52, 0x60, 0x8b, 0x00, 0x52, 0x00, 0x97, 0x00, 0x52, 0x10,
+ 0xc2, 0x02, 0xa0, 0x00, 0x53, 0x41, 0xc4, 0x02, 0xde, 0x00, 0x53, 0x48,
+ 0xc3, 0x09, 0x9e, 0x00, 0x53, 0x51, 0xc3, 0x0d, 0x14, 0x00, 0x53, 0x58,
+ 0xc2, 0x22, 0xcc, 0x00, 0x53, 0x61, 0xc4, 0x18, 0x10, 0x00, 0x53, 0x68,
+ 0xca, 0x1e, 0x8a, 0x00, 0x54, 0x09, 0xd1, 0x33, 0x57, 0x00, 0x57, 0xf0,
+ 0xc7, 0x14, 0x39, 0x00, 0x54, 0x11, 0xc7, 0x7a, 0x7f, 0x00, 0x55, 0xe8,
+ 0xc5, 0x40, 0xe7, 0x00, 0x54, 0x19, 0xc4, 0x1e, 0x97, 0x00, 0x56, 0x68,
+ 0xc4, 0xdb, 0xfb, 0x00, 0x57, 0xd1, 0xc5, 0xd7, 0x18, 0x00, 0x57, 0xd8,
+ 0xd4, 0x3a, 0x84, 0x00, 0x57, 0xe9, 0xd5, 0x33, 0x53, 0x00, 0x57, 0xf8,
+ 0x83, 0x00, 0x54, 0x31, 0x8b, 0x00, 0x54, 0x81, 0x97, 0x00, 0x54, 0xa0,
+ 0x8b, 0x00, 0x54, 0x40, 0x97, 0x00, 0x54, 0x50, 0x47, 0xb2, 0x2e, 0xc1,
+ 0xff, 0xdc, 0x83, 0x00, 0x55, 0xa8, 0x83, 0x00, 0x54, 0xa9, 0xc2, 0x00,
+ 0xd0, 0x00, 0x54, 0xb0, 0x83, 0x00, 0x54, 0xb9, 0xc2, 0x00, 0xd0, 0x00,
+ 0x54, 0xc0, 0xc2, 0x01, 0x30, 0x00, 0x54, 0xc9, 0xc2, 0x19, 0x2c, 0x00,
+ 0x54, 0xf1, 0xc2, 0x00, 0xc1, 0x00, 0x55, 0x19, 0x83, 0x00, 0x55, 0x40,
+ 0x83, 0x00, 0x54, 0xd1, 0xc2, 0x00, 0xd0, 0x00, 0x54, 0xd8, 0x83, 0x00,
+ 0x54, 0xe1, 0xc2, 0x00, 0xd0, 0x00, 0x54, 0xe8, 0x16, 0xc1, 0xff, 0xea,
+ 0x83, 0x00, 0x55, 0x21, 0xc2, 0x00, 0xd0, 0x00, 0x55, 0x28, 0x06, 0xc1,
+ 0xff, 0xf4, 0x83, 0x00, 0x55, 0x31, 0xc2, 0x00, 0xd0, 0x00, 0x55, 0x38,
+ 0x83, 0x00, 0x55, 0x51, 0xc2, 0x00, 0xd0, 0x00, 0x55, 0x58, 0x83, 0x00,
+ 0x55, 0x61, 0xc2, 0x00, 0xd0, 0x00, 0x55, 0x68, 0x83, 0x00, 0x55, 0x91,
+ 0xc2, 0x00, 0xdb, 0x00, 0x55, 0x98, 0xc2, 0x00, 0xd0, 0x00, 0x55, 0xb1,
+ 0xc2, 0x0d, 0xf6, 0x00, 0x55, 0xb9, 0x83, 0x00, 0x55, 0xc0, 0x87, 0x00,
+ 0x54, 0x69, 0x91, 0x00, 0x54, 0x88, 0x03, 0xc1, 0xff, 0xfe, 0x8b, 0x00,
+ 0x55, 0xfb, 0x02, 0x00, 0x0a, 0x97, 0x00, 0x56, 0x0b, 0x02, 0x00, 0x0e,
+ 0x48, 0xb2, 0x2d, 0xc2, 0x00, 0x12, 0x47, 0xc7, 0x7b, 0xc2, 0x00, 0x20,
+ 0x87, 0x00, 0x56, 0x39, 0x91, 0x00, 0x56, 0x58, 0xc2, 0x02, 0xa0, 0x00,
+ 0x57, 0x41, 0xc4, 0x02, 0xde, 0x00, 0x57, 0x48, 0xc3, 0x09, 0x9e, 0x00,
+ 0x57, 0x51, 0xc3, 0x0d, 0x14, 0x00, 0x57, 0x58, 0xc2, 0x22, 0xcc, 0x00,
+ 0x57, 0x61, 0xc4, 0x18, 0x10, 0x00, 0x57, 0x68, 0xc2, 0x0d, 0x10, 0x08,
+ 0x1a, 0x09, 0xc8, 0x0d, 0x03, 0x08, 0x1a, 0x50, 0x0f, 0xc2, 0x00, 0x28,
+ 0x42, 0x00, 0x74, 0xc2, 0x00, 0x34, 0x18, 0xc2, 0x00, 0x40, 0x06, 0xc2,
+ 0x00, 0x4c, 0x11, 0xc2, 0x00, 0x61, 0x48, 0x0b, 0x17, 0xc2, 0x00, 0x79,
+ 0x15, 0xc2, 0x00, 0x95, 0x12, 0xc2, 0x00, 0xad, 0x0d, 0xc2, 0x00, 0xce,
+ 0x0e, 0xc2, 0x00, 0xde, 0xcc, 0x56, 0x9a, 0x00, 0x1b, 0xa1, 0x1b, 0xc2,
+ 0x00, 0xf6, 0xcd, 0x2c, 0xb2, 0x00, 0x1b, 0xf1, 0x16, 0xc2, 0x01, 0x02,
+ 0x03, 0xc2, 0x01, 0x1e, 0xcb, 0x93, 0xa9, 0x00, 0x1e, 0x81, 0x14, 0xc2,
+ 0x01, 0x2e, 0x08, 0xc2, 0x01, 0x3a, 0xcb, 0x92, 0x3e, 0x08, 0x0c, 0x29,
+ 0xcb, 0x8c, 0xb3, 0x08, 0x0c, 0x41, 0xc9, 0xab, 0x7f, 0x08, 0x0c, 0x51,
+ 0x4d, 0x78, 0x4a, 0x42, 0x01, 0x46, 0xc4, 0xe3, 0x33, 0x0f, 0xa6, 0xb9,
+ 0xc5, 0x1c, 0xae, 0x0f, 0xa4, 0xd1, 0xc5, 0xd7, 0x1d, 0x0f, 0x9a, 0x79,
+ 0xc5, 0xd9, 0xfc, 0x0f, 0xca, 0xb8, 0x4a, 0x37, 0x44, 0xc2, 0x01, 0x58,
+ 0xcf, 0x65, 0xc1, 0x01, 0x55, 0x28, 0xc3, 0x02, 0xa3, 0x01, 0x16, 0xb9,
+ 0xcd, 0x78, 0x30, 0x01, 0x53, 0xd1, 0xd3, 0x43, 0x39, 0x01, 0x53, 0xe0,
+ 0x42, 0x00, 0x2a, 0xc2, 0x01, 0x64, 0x43, 0x00, 0x5f, 0x42, 0x01, 0x7f,
+ 0x45, 0x00, 0xd5, 0xc2, 0x01, 0x8b, 0x43, 0x02, 0x9c, 0x42, 0x01, 0x9d,
+ 0xd4, 0x00, 0xd3, 0x01, 0x55, 0x48, 0x48, 0xb2, 0x2d, 0xc2, 0x01, 0xa9,
+ 0x03, 0xc2, 0x01, 0xb7, 0xc2, 0x01, 0x24, 0x08, 0x9a, 0x59, 0xc2, 0x02,
+ 0xe0, 0x08, 0x9a, 0x39, 0x97, 0x08, 0x9a, 0x0b, 0x02, 0x01, 0xc3, 0x8b,
+ 0x08, 0x99, 0xfa, 0x02, 0x01, 0xc7, 0x18, 0xc2, 0x01, 0xcb, 0xc2, 0x00,
+ 0xd0, 0x08, 0x99, 0xc9, 0x15, 0xc2, 0x01, 0xdb, 0x0e, 0xc2, 0x01, 0xeb,
+ 0xc2, 0x00, 0x39, 0x08, 0x99, 0x81, 0xc2, 0x19, 0x2c, 0x08, 0x99, 0x79,
+ 0xc2, 0x01, 0xc3, 0x08, 0x99, 0x71, 0x04, 0xc2, 0x01, 0xf5, 0x12, 0xc2,
+ 0x01, 0xff, 0x06, 0xc2, 0x02, 0x09, 0x16, 0xc2, 0x02, 0x17, 0x10, 0xc2,
+ 0x02, 0x25, 0x0c, 0xc2, 0x02, 0x3b, 0x05, 0xc2, 0x02, 0x45, 0x09, 0xc2,
+ 0x02, 0x4f, 0x0d, 0xc2, 0x02, 0x59, 0x83, 0x08, 0x98, 0x2b, 0x02, 0x02,
+ 0x63, 0xc2, 0x01, 0x24, 0x08, 0x98, 0x99, 0x97, 0x08, 0x98, 0x4b, 0x02,
+ 0x02, 0x6f, 0x8b, 0x08, 0x98, 0x3b, 0x02, 0x02, 0x73, 0xc2, 0x02, 0xe0,
+ 0x08, 0x98, 0x78, 0xc5, 0xd7, 0x3b, 0x08, 0x9a, 0xe9, 0x42, 0x07, 0xb2,
+ 0xc2, 0x02, 0x77, 0x03, 0xc2, 0x02, 0x83, 0xc5, 0x33, 0x5d, 0x08, 0x99,
+ 0xe1, 0x05, 0x42, 0x02, 0x8f, 0x46, 0x00, 0x8b, 0x42, 0x02, 0x9b, 0xc5,
+ 0x07, 0x62, 0x01, 0x12, 0x89, 0xca, 0x37, 0x4e, 0x01, 0x12, 0x70, 0x42,
+ 0x00, 0xdb, 0xc2, 0x02, 0xa5, 0x0a, 0xc2, 0x02, 0xaf, 0x03, 0xc2, 0x02,
+ 0xc3, 0x16, 0xc2, 0x02, 0xd3, 0x07, 0xc2, 0x02, 0xdd, 0xc2, 0x17, 0xb6,
+ 0x00, 0xe5, 0xb9, 0xc2, 0x02, 0x09, 0x00, 0xe5, 0xb1, 0xc2, 0x00, 0x28,
+ 0x00, 0xe5, 0x99, 0x0c, 0xc2, 0x02, 0xe7, 0xc3, 0xe6, 0x47, 0x00, 0xe5,
+ 0x71, 0x05, 0xc2, 0x02, 0xf3, 0x15, 0xc2, 0x03, 0x03, 0xc3, 0xe5, 0x69,
+ 0x00, 0xe5, 0x39, 0x09, 0xc2, 0x03, 0x0f, 0x0d, 0xc2, 0x03, 0x1b, 0x12,
+ 0xc2, 0x03, 0x27, 0xc2, 0x05, 0x1d, 0x00, 0xe5, 0x19, 0xc3, 0x82, 0x78,
+ 0x00, 0xe5, 0x01, 0x1c, 0xc2, 0x03, 0x33, 0xc2, 0x00, 0x45, 0x00, 0xe4,
+ 0xe9, 0xc3, 0x09, 0xe6, 0x00, 0xe4, 0xe1, 0xc3, 0x12, 0xb8, 0x00, 0xe4,
+ 0xd9, 0xc2, 0x00, 0x74, 0x00, 0xe4, 0xc1, 0xc3, 0x21, 0x7e, 0x00, 0xe4,
+ 0xa9, 0xc3, 0x62, 0xe1, 0x00, 0xe4, 0x99, 0xc3, 0x10, 0xd0, 0x00, 0xe4,
+ 0x88, 0x03, 0xc2, 0x03, 0x3f, 0xc3, 0x10, 0xd0, 0x00, 0x85, 0x09, 0x09,
+ 0xc2, 0x03, 0x49, 0xc3, 0x62, 0xe1, 0x00, 0x85, 0x19, 0xc2, 0x00, 0xc4,
+ 0x00, 0x85, 0x21, 0xc3, 0x21, 0x7e, 0x00, 0x85, 0x29, 0x1c, 0xc2, 0x03,
+ 0x55, 0x42, 0x01, 0x6f, 0xc2, 0x03, 0x61, 0xc2, 0x00, 0x74, 0x00, 0x85,
+ 0x41, 0x0d, 0xc2, 0x03, 0x69, 0xc3, 0x03, 0x03, 0x00, 0x85, 0x51, 0xc3,
+ 0x12, 0xb8, 0x00, 0x85, 0x59, 0xc3, 0x09, 0xe6, 0x00, 0x85, 0x61, 0xc2,
+ 0x00, 0x45, 0x00, 0x85, 0x69, 0x12, 0xc2, 0x03, 0x75, 0xc3, 0x82, 0x78,
+ 0x00, 0x85, 0x81, 0x15, 0xc2, 0x03, 0x81, 0xc2, 0x05, 0x1d, 0x00, 0x85,
+ 0x99, 0xc3, 0xe5, 0x69, 0x00, 0x85, 0xb9, 0x05, 0xc2, 0x03, 0x8d, 0x0c,
+ 0xc2, 0x03, 0x9d, 0xc3, 0xe6, 0x47, 0x00, 0x85, 0xf1, 0x0a, 0xc2, 0x03,
+ 0xa9, 0xc2, 0x00, 0x28, 0x00, 0x86, 0x19, 0xc2, 0x17, 0xb6, 0x00, 0x86,
+ 0x38, 0x03, 0xc2, 0x03, 0xbd, 0xc3, 0x10, 0xd0, 0x00, 0x86, 0x89, 0x09,
+ 0xc2, 0x03, 0xcd, 0xc3, 0x62, 0xe1, 0x00, 0x86, 0x99, 0x07, 0xc2, 0x03,
+ 0xd9, 0xc3, 0x21, 0x7e, 0x00, 0x86, 0xa9, 0x1c, 0xc2, 0x03, 0xe3, 0x16,
+ 0xc2, 0x03, 0xef, 0xc2, 0x00, 0x74, 0x00, 0x86, 0xc1, 0x0d, 0xc2, 0x03,
+ 0xf9, 0x42, 0x00, 0xdb, 0xc2, 0x04, 0x05, 0xc3, 0x12, 0xb8, 0x00, 0x86,
+ 0xd9, 0xc3, 0x09, 0xe6, 0x00, 0x86, 0xe1, 0xc2, 0x00, 0x45, 0x00, 0x86,
+ 0xe9, 0x12, 0xc2, 0x04, 0x0f, 0xc3, 0x82, 0x78, 0x00, 0x87, 0x01, 0x15,
+ 0xc2, 0x04, 0x1b, 0xc2, 0x05, 0x1d, 0x00, 0x87, 0x19, 0xc3, 0xe5, 0x69,
+ 0x00, 0x87, 0x39, 0x05, 0xc2, 0x04, 0x27, 0x0c, 0xc2, 0x04, 0x37, 0xc3,
+ 0xe6, 0x47, 0x00, 0x87, 0x71, 0x0a, 0xc2, 0x04, 0x43, 0xc2, 0x00, 0x28,
+ 0x00, 0x87, 0x99, 0xc2, 0x02, 0x09, 0x00, 0x87, 0xb1, 0xc2, 0x17, 0xb6,
+ 0x00, 0x87, 0xb8, 0x03, 0xc2, 0x04, 0x57, 0xc3, 0x10, 0xd0, 0x01, 0x68,
+ 0x09, 0x09, 0xc2, 0x04, 0x61, 0xc3, 0x62, 0xe1, 0x01, 0x68, 0x19, 0xc2,
+ 0x00, 0xc4, 0x01, 0x68, 0x21, 0xc3, 0x21, 0x7e, 0x01, 0x68, 0x29, 0x1c,
+ 0xc2, 0x04, 0x6d, 0x42, 0x01, 0x6f, 0xc2, 0x04, 0x79, 0xc2, 0x00, 0x74,
+ 0x01, 0x68, 0x41, 0x0d, 0xc2, 0x04, 0x81, 0xc3, 0x03, 0x03, 0x01, 0x68,
+ 0x51, 0xc3, 0x12, 0xb8, 0x01, 0x68, 0x59, 0xc3, 0x09, 0xe6, 0x01, 0x68,
+ 0x61, 0xc2, 0x00, 0x45, 0x01, 0x68, 0x69, 0x12, 0xc2, 0x04, 0x8d, 0xc3,
+ 0x82, 0x78, 0x01, 0x68, 0x81, 0x15, 0xc2, 0x04, 0x99, 0xc2, 0x05, 0x1d,
+ 0x01, 0x68, 0x99, 0xc3, 0xe5, 0x69, 0x01, 0x68, 0xb9, 0x05, 0xc2, 0x04,
+ 0xa5, 0x0c, 0xc2, 0x04, 0xb5, 0xc3, 0xe6, 0x47, 0x01, 0x68, 0xf1, 0x0a,
+ 0xc2, 0x04, 0xc1, 0xc2, 0x00, 0x28, 0x01, 0x69, 0x19, 0xc2, 0x17, 0xb6,
+ 0x01, 0x69, 0x38, 0xc3, 0xe5, 0x4b, 0x01, 0x60, 0x01, 0x04, 0xc2, 0x04,
+ 0xd5, 0xc4, 0xdf, 0x83, 0x01, 0x60, 0x11, 0xc7, 0xc1, 0xf5, 0x01, 0x60,
+ 0x19, 0x06, 0xc2, 0x04, 0xe1, 0x1b, 0xc2, 0x04, 0xf3, 0x1c, 0xc2, 0x05,
+ 0x05, 0x8b, 0x01, 0x60, 0x5b, 0x02, 0x05, 0x11, 0xc4, 0xe1, 0x6b, 0x01,
+ 0x60, 0x69, 0x0e, 0xc2, 0x05, 0x23, 0xc7, 0x60, 0xdd, 0x01, 0x60, 0x79,
+ 0xc5, 0xdb, 0x78, 0x01, 0x60, 0x81, 0x11, 0xc2, 0x05, 0x2f, 0x12, 0xc2,
+ 0x05, 0x3b, 0xc5, 0xd7, 0xb3, 0x01, 0x60, 0x99, 0x15, 0xc2, 0x05, 0x45,
+ 0x16, 0xc2, 0x05, 0x5e, 0xc3, 0xc5, 0x6f, 0x01, 0x60, 0xb1, 0x08, 0xc2,
+ 0x05, 0x70, 0xc4, 0xdf, 0x9f, 0x01, 0x60, 0xc1, 0x05, 0x42, 0x05, 0x7c,
+ 0xc3, 0xe5, 0x4b, 0x01, 0x61, 0x81, 0x04, 0xc2, 0x05, 0x88, 0xc4, 0xdf,
+ 0x83, 0x01, 0x61, 0x91, 0xc7, 0xc1, 0xf5, 0x01, 0x61, 0x99, 0x06, 0xc2,
+ 0x05, 0x94, 0x1b, 0xc2, 0x05, 0xa6, 0x1c, 0xc2, 0x05, 0xb8, 0x8b, 0x01,
+ 0x61, 0xdb, 0x02, 0x05, 0xc4, 0xc4, 0xe1, 0x6b, 0x01, 0x61, 0xe9, 0x0e,
+ 0xc2, 0x05, 0xd6, 0xc7, 0x60, 0xdd, 0x01, 0x61, 0xf9, 0xc5, 0xdb, 0x78,
+ 0x01, 0x62, 0x01, 0x11, 0xc2, 0x05, 0xe2, 0x12, 0xc2, 0x05, 0xee, 0xc5,
+ 0xd7, 0xb3, 0x01, 0x62, 0x19, 0x15, 0xc2, 0x05, 0xf8, 0x16, 0xc2, 0x06,
+ 0x11, 0xc3, 0xc5, 0x6f, 0x01, 0x62, 0x31, 0x08, 0xc2, 0x06, 0x23, 0xc4,
+ 0xdf, 0x9f, 0x01, 0x62, 0x41, 0x05, 0x42, 0x06, 0x2f, 0xcb, 0x1e, 0x89,
+ 0x00, 0x58, 0x09, 0x03, 0xc2, 0x06, 0x3b, 0x42, 0x07, 0xb2, 0xc2, 0x06,
+ 0x47, 0xc5, 0x33, 0x5d, 0x00, 0x59, 0xe1, 0xc8, 0x7d, 0xa4, 0x00, 0x5a,
+ 0xa8, 0x83, 0x00, 0x58, 0x2b, 0x02, 0x06, 0x53, 0x8b, 0x00, 0x58, 0x3b,
+ 0x02, 0x06, 0x5f, 0x97, 0x00, 0x58, 0x4b, 0x02, 0x06, 0x63, 0x18, 0xc2,
+ 0x06, 0x67, 0x87, 0x00, 0x58, 0x79, 0x91, 0x00, 0x58, 0x99, 0x0d, 0xc2,
+ 0x06, 0x71, 0x09, 0xc2, 0x06, 0x7b, 0x10, 0xc2, 0x06, 0x85, 0x05, 0xc2,
+ 0x06, 0x9b, 0x0c, 0xc2, 0x06, 0xa5, 0x16, 0xc2, 0x06, 0xaf, 0x06, 0xc2,
+ 0x06, 0xbd, 0x12, 0xc2, 0x06, 0xcb, 0x04, 0xc2, 0x06, 0xd5, 0xc2, 0x01,
+ 0xc3, 0x00, 0x59, 0x71, 0x1b, 0xc2, 0x06, 0xdf, 0x14, 0xc2, 0x06, 0xe9,
+ 0x0e, 0xc2, 0x06, 0xf9, 0x15, 0xc2, 0x07, 0x03, 0xc2, 0x00, 0xd0, 0x00,
+ 0x59, 0xc9, 0xc2, 0x01, 0x4a, 0x00, 0x5b, 0x88, 0x03, 0xc2, 0x07, 0x13,
+ 0x8b, 0x00, 0x59, 0xfb, 0x02, 0x07, 0x1f, 0x97, 0x00, 0x5a, 0x0b, 0x02,
+ 0x07, 0x23, 0x48, 0xb2, 0x2d, 0xc2, 0x07, 0x27, 0x87, 0x00, 0x5a, 0x39,
+ 0x91, 0x00, 0x5a, 0x58, 0xcd, 0x74, 0xcd, 0x00, 0x5a, 0xb1, 0xcd, 0x73,
+ 0x0d, 0x00, 0x5a, 0xb8, 0xc4, 0x15, 0xe7, 0x00, 0x5b, 0x31, 0xc3, 0x05,
+ 0x14, 0x00, 0x5b, 0x39, 0x16, 0xc2, 0x07, 0x35, 0x08, 0xc2, 0x07, 0x41,
+ 0x15, 0xc2, 0x07, 0x4d, 0xc5, 0x06, 0xdb, 0x00, 0x5b, 0x71, 0xc4, 0x26,
+ 0x78, 0x00, 0x5b, 0x78, 0x44, 0x05, 0x14, 0xc2, 0x07, 0x59, 0x46, 0x02,
+ 0xdd, 0x42, 0x07, 0x71, 0x0a, 0xc2, 0x07, 0x7d, 0x19, 0xc2, 0x07, 0x8f,
+ 0xc2, 0x00, 0xc4, 0x0f, 0x68, 0x52, 0x02, 0x07, 0x9f, 0x11, 0xc2, 0x07,
+ 0xa5, 0x0b, 0x42, 0x07, 0xb7, 0x00, 0x42, 0x07, 0xc9, 0xc2, 0x22, 0xcc,
+ 0x0f, 0x68, 0x33, 0x02, 0x07, 0xd5, 0xc4, 0x18, 0x10, 0x0f, 0x68, 0x3a,
+ 0x02, 0x07, 0xe2, 0x9b, 0x0f, 0x68, 0x8b, 0x02, 0x07, 0xef, 0x00, 0x42,
+ 0x07, 0xf5, 0xc2, 0x0d, 0x10, 0x0f, 0x68, 0x93, 0x02, 0x08, 0x01, 0x00,
+ 0x42, 0x08, 0x07, 0xc2, 0x02, 0xa0, 0x0f, 0x69, 0x7b, 0x02, 0x08, 0x13,
+ 0xc4, 0x02, 0xde, 0x0f, 0x69, 0x81, 0xc2, 0x00, 0xc4, 0x0f, 0x69, 0xba,
+ 0x02, 0x08, 0x19, 0xc3, 0x09, 0x9e, 0x0f, 0x69, 0x8b, 0x02, 0x08, 0x1f,
+ 0xc3, 0x0d, 0x14, 0x0f, 0x69, 0x90, 0xc2, 0x22, 0xcc, 0x0f, 0x69, 0x9b,
+ 0x02, 0x08, 0x25, 0xc4, 0x18, 0x10, 0x0f, 0x69, 0xa0, 0xc6, 0x72, 0x26,
+ 0x01, 0x01, 0x21, 0xd9, 0x11, 0xc9, 0x01, 0x71, 0x58, 0x42, 0x06, 0x62,
+ 0xc2, 0x08, 0x2b, 0x47, 0x0f, 0x81, 0xc2, 0x08, 0x37, 0x42, 0x00, 0x6b,
+ 0xc2, 0x08, 0x4f, 0x08, 0xc2, 0x08, 0x59, 0xc4, 0x04, 0x1f, 0x0f, 0xa8,
+ 0x99, 0x4d, 0x7f, 0x32, 0xc2, 0x08, 0x65, 0xca, 0x6c, 0x80, 0x0f, 0xa2,
+ 0x80, 0xd9, 0x1d, 0x56, 0x01, 0x3d, 0xf1, 0x4f, 0x66, 0x75, 0x42, 0x08,
+ 0x71, 0xce, 0x1c, 0x92, 0x0b, 0x7f, 0x19, 0xc9, 0xa9, 0xea, 0x0b, 0x7f,
+ 0x10, 0x4c, 0x11, 0xe2, 0xc2, 0x08, 0x7d, 0x4a, 0x51, 0x89, 0xc2, 0x08,
+ 0x8f, 0x47, 0x02, 0x0e, 0x42, 0x08, 0x9b, 0x46, 0xc9, 0x58, 0xc2, 0x08,
+ 0xf1, 0x4c, 0x86, 0x0d, 0x42, 0x09, 0x01, 0x47, 0x34, 0x2f, 0xc2, 0x09,
+ 0x0d, 0x4d, 0x29, 0xb9, 0xc2, 0x09, 0x22, 0x4f, 0x0b, 0x17, 0x42, 0x09,
+ 0x5d, 0x47, 0xc8, 0x07, 0xc2, 0x09, 0x98, 0x48, 0xb6, 0x6a, 0x42, 0x09,
+ 0xb7, 0x47, 0x34, 0x2f, 0xc2, 0x09, 0xd0, 0x47, 0x02, 0x0e, 0x42, 0x09,
+ 0xda, 0x15, 0xc2, 0x0a, 0x3c, 0x4b, 0x52, 0x39, 0x42, 0x0a, 0x48, 0x47,
+ 0x02, 0x0e, 0xc2, 0x0a, 0xbb, 0x48, 0x56, 0x9a, 0x42, 0x0b, 0x18, 0xcd,
+ 0x77, 0x6d, 0x00, 0xe3, 0xf9, 0xc6, 0x77, 0x74, 0x00, 0xe3, 0xf0, 0x8a,
+ 0x00, 0xe3, 0xb9, 0x98, 0x00, 0xe3, 0xb1, 0x84, 0x00, 0xe3, 0xa9, 0xc2,
+ 0x02, 0x10, 0x00, 0xe3, 0xa0, 0x91, 0x00, 0xe3, 0x99, 0x87, 0x00, 0xe3,
+ 0x71, 0x97, 0x00, 0xe3, 0x49, 0x8b, 0x00, 0xe3, 0x21, 0x83, 0x00, 0xe2,
+ 0xd2, 0x02, 0x0b, 0x2a, 0xc2, 0x01, 0xa3, 0x00, 0xe3, 0x91, 0x90, 0x00,
+ 0xe3, 0x89, 0xc2, 0x04, 0xcd, 0x00, 0xe3, 0x81, 0x92, 0x00, 0xe3, 0x78,
+ 0x9b, 0x00, 0xe3, 0x69, 0xc2, 0x1b, 0x88, 0x00, 0xe3, 0x61, 0x86, 0x00,
+ 0xe3, 0x59, 0x85, 0x00, 0xe3, 0x50, 0x94, 0x00, 0xe3, 0x41, 0xc2, 0x16,
+ 0x59, 0x00, 0xe3, 0x39, 0x8a, 0x00, 0xe3, 0x31, 0x95, 0x00, 0xe3, 0x28,
+ 0x03, 0xc2, 0x0b, 0x2e, 0x8e, 0x00, 0xe2, 0xf1, 0xc2, 0x00, 0x75, 0x00,
+ 0xe2, 0xe9, 0x89, 0x00, 0xe2, 0xe1, 0x96, 0x00, 0xe2, 0xd8, 0xc4, 0x18,
+ 0x10, 0x00, 0xe2, 0xb9, 0xc2, 0x22, 0xcc, 0x00, 0xe2, 0xb0, 0xc3, 0x0d,
+ 0x14, 0x00, 0xe2, 0xa9, 0xc3, 0x09, 0x9e, 0x00, 0xe2, 0xa0, 0xc4, 0x02,
+ 0xde, 0x00, 0xe2, 0x99, 0xc2, 0x02, 0xa0, 0x00, 0xe2, 0x90, 0x46, 0x01,
+ 0xfc, 0xc2, 0x0b, 0x3e, 0xcd, 0x56, 0x88, 0x01, 0x5d, 0xe0, 0xc9, 0xaa,
+ 0x56, 0x00, 0xb4, 0xc9, 0xc5, 0xd7, 0xa9, 0x00, 0xb4, 0xa9, 0xc5, 0xcc,
+ 0x96, 0x00, 0xb4, 0x98, 0xc3, 0x09, 0x38, 0x00, 0xb4, 0xc1, 0xc6, 0xcc,
+ 0x95, 0x00, 0xb4, 0xa0, 0xc7, 0xc7, 0x82, 0x00, 0xb4, 0xb9, 0x94, 0x00,
+ 0xb4, 0x91, 0xc3, 0x04, 0xa7, 0x00, 0xb4, 0x30, 0x94, 0x00, 0xb4, 0xb1,
+ 0xc2, 0x1b, 0x88, 0x00, 0xb4, 0x88, 0xc5, 0xd8, 0xad, 0x00, 0xb4, 0x71,
+ 0xc3, 0x14, 0xa7, 0x00, 0xb4, 0x20, 0xc6, 0xd1, 0x15, 0x00, 0xb4, 0x69,
+ 0xc3, 0x00, 0x44, 0x00, 0xb4, 0x28, 0xc4, 0xe2, 0xef, 0x00, 0xb4, 0x51,
+ 0xc3, 0x1f, 0x48, 0x00, 0xb4, 0x48, 0xc3, 0x00, 0x49, 0x08, 0x24, 0x01,
+ 0x83, 0x08, 0x24, 0xd8, 0xc2, 0x00, 0xd0, 0x08, 0x24, 0x29, 0xc3, 0xb8,
+ 0xac, 0x08, 0x24, 0x78, 0xc3, 0x0e, 0x66, 0x08, 0x24, 0x31, 0xc2, 0x00,
+ 0xd0, 0x08, 0x24, 0x50, 0x83, 0x08, 0x24, 0x41, 0xc4, 0xdf, 0xb7, 0x08,
+ 0x24, 0x48, 0x87, 0x08, 0x24, 0xe0, 0x91, 0x08, 0x24, 0xe8, 0xc2, 0x02,
+ 0xa0, 0x08, 0x25, 0x11, 0xc4, 0x02, 0xde, 0x08, 0x25, 0x18, 0xc3, 0x09,
+ 0x9e, 0x08, 0x25, 0x21, 0xc3, 0x0d, 0x14, 0x08, 0x25, 0x28, 0xc2, 0x22,
+ 0xcc, 0x08, 0x25, 0x31, 0xc4, 0x18, 0x10, 0x08, 0x25, 0x38, 0x8b, 0x08,
+ 0x25, 0x8b, 0x02, 0x0b, 0x4a, 0x8a, 0x08, 0x25, 0x98, 0x0a, 0xc2, 0x0b,
+ 0x4e, 0xc2, 0x00, 0x74, 0x08, 0x25, 0xc0, 0x83, 0x08, 0x25, 0xc9, 0xc2,
+ 0x19, 0x2c, 0x08, 0x25, 0xd0, 0x83, 0x08, 0x25, 0xe1, 0xc2, 0x19, 0x2c,
+ 0x08, 0x25, 0xf1, 0xc2, 0x00, 0xd0, 0x08, 0x26, 0x80, 0xc2, 0x00, 0x74,
+ 0x08, 0x26, 0x18, 0x83, 0x08, 0x26, 0x31, 0xc2, 0x00, 0xd0, 0x08, 0x26,
+ 0x38, 0x83, 0x08, 0x26, 0x41, 0x15, 0x42, 0x0b, 0x64, 0x83, 0x08, 0x26,
+ 0x91, 0xc2, 0x00, 0xd0, 0x08, 0x26, 0x98, 0x8b, 0x08, 0x26, 0xcb, 0x02,
+ 0x0b, 0x6e, 0x8a, 0x08, 0x26, 0xd8, 0x0a, 0xc2, 0x0b, 0x72, 0xc2, 0x00,
+ 0x74, 0x08, 0x27, 0x00, 0x83, 0x08, 0x27, 0x09, 0xc2, 0x19, 0x2c, 0x08,
+ 0x27, 0x10, 0x83, 0x08, 0x27, 0x21, 0xc2, 0x19, 0x2c, 0x08, 0x27, 0x31,
+ 0xc2, 0x00, 0xd0, 0x08, 0x27, 0xc0, 0xc2, 0x00, 0x74, 0x08, 0x27, 0x58,
+ 0x83, 0x08, 0x27, 0x71, 0xc2, 0x00, 0xd0, 0x08, 0x27, 0x78, 0x83, 0x08,
+ 0x27, 0x81, 0x15, 0x42, 0x0b, 0x88, 0x83, 0x08, 0x27, 0xd1, 0xc2, 0x00,
+ 0xd0, 0x08, 0x27, 0xd8, 0xc2, 0x14, 0x49, 0x0e, 0x7e, 0x19, 0xc3, 0x9c,
+ 0x8d, 0x0e, 0x7a, 0xe1, 0xc6, 0xcd, 0x49, 0x0e, 0x7a, 0x90, 0xc8, 0xbb,
+ 0x92, 0x0e, 0x7c, 0x81, 0xc8, 0x93, 0xed, 0x0e, 0x7b, 0x80, 0xcf, 0x69,
+ 0x9f, 0x0e, 0x7a, 0xc8, 0xd0, 0x5f, 0x82, 0x0e, 0x7b, 0xa9, 0xc6, 0xcd,
+ 0x85, 0x0e, 0x7b, 0x68, 0x00, 0x42, 0x0b, 0x92, 0xc2, 0x25, 0xa1, 0x0e,
+ 0x7c, 0x09, 0xc2, 0x14, 0x49, 0x0e, 0x7a, 0x82, 0x02, 0x0b, 0xa2, 0x45,
+ 0xd6, 0xfa, 0xc2, 0x0b, 0xa8, 0xc4, 0xe1, 0xc7, 0x0e, 0x7c, 0x33, 0x02,
+ 0x0b, 0xcc, 0xc6, 0xce, 0x03, 0x0e, 0x7a, 0xb2, 0x02, 0x0b, 0xd0, 0x00,
+ 0x42, 0x0b, 0xd4, 0x4d, 0x75, 0xe7, 0xc2, 0x0b, 0xe0, 0x47, 0x87, 0x3a,
+ 0xc2, 0x0b, 0xf8, 0x16, 0xc2, 0x0c, 0x04, 0xc8, 0x4e, 0x4b, 0x0e, 0x7b,
+ 0x91, 0xc9, 0xa9, 0x48, 0x0e, 0x7b, 0x88, 0x47, 0x87, 0x3a, 0xc2, 0x0c,
+ 0x10, 0xc7, 0xc8, 0x69, 0x0e, 0x7d, 0x40, 0xc7, 0x2d, 0x19, 0x0e, 0x7a,
+ 0xe9, 0xc6, 0xcb, 0xdb, 0x0e, 0x7a, 0xa8, 0xcb, 0x93, 0x7d, 0x0e, 0x7b,
+ 0x51, 0xc8, 0x4e, 0x4b, 0x0e, 0x7b, 0x49, 0xc9, 0xa9, 0x48, 0x0e, 0x7b,
+ 0x41, 0xc8, 0xbf, 0x6a, 0x0e, 0x7b, 0x38, 0xc8, 0xbf, 0x8a, 0x0e, 0x7b,
+ 0x11, 0xc4, 0xca, 0xab, 0x0e, 0x7a, 0xf8, 0xc4, 0x78, 0xdc, 0x0e, 0x7a,
+ 0x03, 0x02, 0x0c, 0x22, 0xc5, 0xdb, 0xb4, 0x0e, 0x79, 0x49, 0xc6, 0xcd,
+ 0xeb, 0x0e, 0x79, 0x40, 0xca, 0x9b, 0x26, 0x0e, 0x79, 0xf9, 0xc6, 0xd2,
+ 0xad, 0x0e, 0x79, 0xc2, 0x02, 0x0c, 0x28, 0xc9, 0xb3, 0x56, 0x0e, 0x79,
+ 0xe9, 0xd4, 0x3e, 0xf8, 0x0e, 0x79, 0xa0, 0xc5, 0xbe, 0xad, 0x0e, 0x79,
+ 0xe1, 0xc6, 0x6d, 0xaa, 0x0e, 0x79, 0x19, 0x45, 0xda, 0x15, 0x42, 0x0c,
+ 0x2e, 0xce, 0x38, 0x5a, 0x0e, 0x79, 0xd9, 0xc4, 0xe0, 0xab, 0x0e, 0x79,
+ 0x59, 0xd3, 0x42, 0x68, 0x0e, 0x78, 0xd1, 0x49, 0xa9, 0xd8, 0x42, 0x0c,
+ 0x3a, 0xc7, 0xc5, 0xde, 0x0e, 0x79, 0xd1, 0xc7, 0xca, 0x5a, 0x0e, 0x79,
+ 0xa9, 0x90, 0x0e, 0x79, 0x08, 0x06, 0xc2, 0x0c, 0x46, 0x46, 0x75, 0x93,
+ 0x42, 0x0c, 0x55, 0xc8, 0x3f, 0x04, 0x0e, 0x79, 0x99, 0x07, 0x42, 0x0c,
+ 0x5f, 0xc5, 0xd6, 0x50, 0x0e, 0x79, 0x61, 0xc3, 0xe5, 0x72, 0x0e, 0x79,
+ 0x10, 0xc6, 0xc2, 0x7a, 0x0e, 0x78, 0xf9, 0x46, 0xcd, 0xdf, 0x42, 0x0c,
+ 0x6b, 0x15, 0xc2, 0x0c, 0x77, 0x43, 0x01, 0x55, 0x42, 0x0c, 0x83, 0x43,
+ 0x3d, 0xd0, 0xc2, 0x0c, 0x8f, 0x43, 0x01, 0x55, 0x42, 0x0c, 0x9b, 0x43,
+ 0x01, 0x55, 0xc2, 0x0c, 0xa7, 0x4d, 0x78, 0xd9, 0x42, 0x0c, 0xb3, 0xc5,
+ 0x40, 0xe7, 0x08, 0xd1, 0xc9, 0xc4, 0x1e, 0x97, 0x08, 0xd1, 0xa0, 0xce,
+ 0x1e, 0x74, 0x08, 0xd1, 0xb9, 0xc5, 0x1e, 0x8f, 0x08, 0xd1, 0xaa, 0x02,
+ 0x0c, 0xbf, 0xc2, 0x02, 0x41, 0x08, 0xd1, 0xf1, 0xc2, 0x00, 0xdb, 0x08,
+ 0xd1, 0xe9, 0xc2, 0x00, 0x39, 0x08, 0xd1, 0xe1, 0xc2, 0x19, 0x2c, 0x08,
+ 0xd1, 0xd8, 0xc2, 0x00, 0xd0, 0x08, 0xd1, 0x31, 0x83, 0x08, 0xd1, 0x28,
+ 0xc2, 0x00, 0xd0, 0x08, 0xd0, 0xb9, 0x83, 0x08, 0xd0, 0xb0, 0xc2, 0x00,
+ 0xd0, 0x08, 0xd1, 0x21, 0x83, 0x08, 0xd1, 0x18, 0xc2, 0x00, 0xd0, 0x08,
+ 0xd0, 0xa9, 0x83, 0x08, 0xd0, 0xa0, 0x97, 0x08, 0xd0, 0x41, 0x8b, 0x08,
+ 0xd0, 0x38, 0x87, 0x08, 0xd0, 0x28, 0x87, 0x08, 0xd0, 0x10, 0xc9, 0xaf,
+ 0x1e, 0x01, 0x51, 0x09, 0xc5, 0xd5, 0x6a, 0x01, 0x51, 0x00, 0x03, 0xc2,
+ 0x0c, 0xc5, 0x12, 0xc2, 0x0c, 0xd4, 0xc5, 0xd5, 0x56, 0x05, 0x4e, 0x31,
+ 0x0e, 0xc2, 0x0c, 0xe0, 0xc5, 0xdb, 0x91, 0x05, 0x4e, 0x21, 0xcd, 0x79,
+ 0x9c, 0x05, 0x4e, 0xf1, 0xc9, 0xaa, 0xe6, 0x05, 0x4e, 0xf8, 0xc7, 0xc5,
+ 0x13, 0x05, 0x4e, 0x79, 0xc3, 0x1f, 0x62, 0x05, 0x4e, 0x00, 0xc2, 0x01,
+ 0x30, 0x05, 0x4c, 0x93, 0x02, 0x0c, 0xea, 0xc2, 0x00, 0xd0, 0x05, 0x4d,
+ 0x91, 0xc2, 0x0d, 0xf6, 0x05, 0x4d, 0x8b, 0x02, 0x0c, 0xf0, 0xc2, 0x01,
+ 0x4a, 0x05, 0x4d, 0x71, 0xc2, 0x00, 0xdb, 0x05, 0x4d, 0x69, 0xc2, 0x00,
+ 0x39, 0x05, 0x4d, 0x5b, 0x02, 0x0c, 0xf6, 0xc2, 0x19, 0x2c, 0x05, 0x4d,
+ 0x51, 0xc2, 0x01, 0xc3, 0x05, 0x4d, 0x49, 0xc2, 0x01, 0x5d, 0x05, 0x4d,
+ 0x3b, 0x02, 0x0c, 0xfc, 0xc2, 0x00, 0xb0, 0x05, 0x4d, 0x2b, 0x02, 0x0d,
+ 0x02, 0x10, 0xc2, 0x0d, 0x06, 0x06, 0xc2, 0x0d, 0x1f, 0x16, 0xc2, 0x0d,
+ 0x2f, 0xc2, 0x25, 0x3b, 0x05, 0x4c, 0xbb, 0x02, 0x0d, 0x3f, 0xc2, 0x00,
+ 0x64, 0x05, 0x4c, 0xab, 0x02, 0x0d, 0x45, 0xc2, 0x02, 0x2b, 0x05, 0x4c,
+ 0x7b, 0x02, 0x0d, 0x4b, 0x91, 0x05, 0x4c, 0x71, 0x83, 0x05, 0x4c, 0x23,
+ 0x02, 0x0d, 0x4f, 0x87, 0x05, 0x4c, 0x61, 0x97, 0x05, 0x4c, 0x41, 0x8b,
+ 0x05, 0x4c, 0x32, 0x02, 0x0d, 0x53, 0xc4, 0x02, 0xde, 0x05, 0x4e, 0x99,
+ 0xc2, 0x02, 0xa0, 0x05, 0x4e, 0x90, 0xc3, 0x09, 0x9e, 0x05, 0x4e, 0xa1,
+ 0xc3, 0x0d, 0x14, 0x05, 0x4e, 0xa8, 0xc2, 0x22, 0xcc, 0x05, 0x4e, 0xb1,
+ 0xc4, 0x18, 0x10, 0x05, 0x4e, 0xb8, 0x03, 0xc2, 0x0d, 0x5d, 0xc5, 0x0d,
+ 0xe4, 0x05, 0x4d, 0xa8, 0xc7, 0xc5, 0x91, 0x05, 0x4d, 0xc8, 0xc6, 0xcb,
+ 0xb1, 0x05, 0x4d, 0xb8, 0xc5, 0xda, 0x8d, 0x05, 0x4d, 0x98, 0xc5, 0x00,
+ 0x2c, 0x01, 0x2c, 0xeb, 0x02, 0x0d, 0x69, 0xc4, 0x00, 0x49, 0x01, 0x2c,
+ 0xc2, 0x02, 0x0d, 0x72, 0xc5, 0x00, 0x2c, 0x01, 0x2c, 0xb9, 0xc4, 0x00,
+ 0x49, 0x01, 0x2c, 0xb0, 0x1b, 0xc2, 0x0d, 0x78, 0x0c, 0xc2, 0x0d, 0x8d,
+ 0x14, 0xc2, 0x0d, 0xa9, 0x09, 0xc2, 0x0d, 0xcc, 0x1c, 0xc2, 0x0d, 0xf3,
+ 0x04, 0xc2, 0x0e, 0x1a, 0x06, 0xc2, 0x0e, 0x3d, 0x8b, 0x05, 0x0b, 0xfb,
+ 0x02, 0x0e, 0x60, 0x83, 0x05, 0x0c, 0x2b, 0x02, 0x0e, 0x73, 0x97, 0x05,
+ 0x0c, 0x9b, 0x02, 0x0e, 0x7b, 0x91, 0x05, 0x0c, 0x63, 0x02, 0x0e, 0x95,
+ 0x87, 0x05, 0x0c, 0x7a, 0x02, 0x0e, 0xa1, 0x0c, 0xc2, 0x0e, 0xa9, 0x9b,
+ 0x05, 0x1f, 0xc3, 0x02, 0x0e, 0xc5, 0x97, 0x05, 0x1f, 0x93, 0x02, 0x0e,
+ 0xd8, 0x91, 0x05, 0x1f, 0x73, 0x02, 0x0e, 0xf2, 0x8b, 0x05, 0x1f, 0x12,
+ 0x02, 0x0e, 0xfe, 0x9b, 0x05, 0x20, 0xa3, 0x02, 0x0f, 0x11, 0x97, 0x05,
+ 0x20, 0x73, 0x02, 0x0f, 0x24, 0x91, 0x05, 0x20, 0x53, 0x02, 0x0f, 0x3e,
+ 0x8b, 0x05, 0x1f, 0xf2, 0x02, 0x0f, 0x4a, 0x9b, 0x05, 0x1e, 0xe3, 0x02,
+ 0x0f, 0x5d, 0x97, 0x05, 0x1e, 0xb3, 0x02, 0x0f, 0x70, 0x87, 0x05, 0x1e,
+ 0x93, 0x02, 0x0f, 0x8a, 0x91, 0x05, 0x1e, 0x7b, 0x02, 0x0f, 0x92, 0x83,
+ 0x05, 0x1e, 0x43, 0x02, 0x0f, 0x9e, 0x14, 0x42, 0x0f, 0xaa, 0x0a, 0xc2,
+ 0x0f, 0xcd, 0x15, 0xc2, 0x0f, 0xf0, 0x8b, 0x05, 0x18, 0x5b, 0x02, 0x10,
+ 0x1a, 0x83, 0x05, 0x18, 0x93, 0x02, 0x10, 0x2d, 0x97, 0x05, 0x19, 0x03,
+ 0x02, 0x10, 0x39, 0x91, 0x05, 0x18, 0xcb, 0x02, 0x10, 0x53, 0x87, 0x05,
+ 0x18, 0xe3, 0x02, 0x10, 0x5f, 0x9b, 0x05, 0x19, 0x32, 0x02, 0x10, 0x67,
+ 0x0a, 0xc2, 0x10, 0x7a, 0x9b, 0x05, 0x16, 0x63, 0x02, 0x10, 0x9d, 0x87,
+ 0x05, 0x16, 0x13, 0x02, 0x10, 0xb0, 0x97, 0x05, 0x16, 0x33, 0x02, 0x10,
+ 0xb8, 0x8b, 0x05, 0x15, 0x83, 0x02, 0x10, 0xd2, 0x83, 0x05, 0x15, 0xc3,
+ 0x02, 0x10, 0xe5, 0x91, 0x05, 0x15, 0xfa, 0x02, 0x10, 0xf1, 0x87, 0x05,
+ 0x15, 0x03, 0x02, 0x10, 0xfd, 0x91, 0x05, 0x14, 0xeb, 0x02, 0x11, 0x05,
+ 0x97, 0x05, 0x15, 0x23, 0x02, 0x11, 0x11, 0x83, 0x05, 0x14, 0xb3, 0x02,
+ 0x11, 0x2b, 0x8b, 0x05, 0x14, 0x7b, 0x02, 0x11, 0x37, 0x1c, 0xc2, 0x11,
+ 0x4a, 0x0a, 0xc2, 0x11, 0x74, 0x9b, 0x05, 0x15, 0x52, 0x02, 0x11, 0x97,
+ 0x87, 0x05, 0x14, 0x5b, 0x02, 0x11, 0xaa, 0x91, 0x05, 0x14, 0x43, 0x02,
+ 0x11, 0xb2, 0x97, 0x05, 0x00, 0xab, 0x02, 0x11, 0xba, 0x83, 0x05, 0x14,
+ 0x12, 0x02, 0x11, 0xc1, 0x87, 0x05, 0x13, 0xf3, 0x02, 0x11, 0xcd, 0x1a,
+ 0xc2, 0x11, 0xd5, 0x0b, 0xc2, 0x11, 0xfa, 0x83, 0x05, 0x13, 0x9b, 0x02,
+ 0x12, 0x05, 0xc2, 0x01, 0xba, 0x05, 0x13, 0xbb, 0x02, 0x12, 0x11, 0x91,
+ 0x05, 0x13, 0xdb, 0x02, 0x12, 0x1d, 0x0f, 0xc2, 0x12, 0x29, 0x10, 0xc2,
+ 0x12, 0x4c, 0x0e, 0x42, 0x12, 0x69, 0x8b, 0x05, 0x23, 0x9b, 0x02, 0x12,
+ 0x93, 0x97, 0x05, 0x24, 0x1b, 0x02, 0x12, 0xa6, 0x91, 0x05, 0x23, 0xfb,
+ 0x02, 0x12, 0xc0, 0x9b, 0x05, 0x24, 0x4a, 0x02, 0x12, 0xcc, 0x9b, 0x05,
+ 0x23, 0x6b, 0x02, 0x12, 0xdf, 0x8b, 0x05, 0x22, 0xfb, 0x02, 0x12, 0xf2,
+ 0x91, 0x05, 0x23, 0x4b, 0x02, 0x13, 0x05, 0xc2, 0x01, 0xba, 0x05, 0x23,
+ 0x32, 0x02, 0x13, 0x11, 0x09, 0xc2, 0x13, 0x15, 0x8b, 0x05, 0x05, 0x83,
+ 0x02, 0x13, 0x3a, 0x83, 0x05, 0x05, 0xbb, 0x02, 0x13, 0x4d, 0x97, 0x05,
+ 0x06, 0x2b, 0x02, 0x13, 0x59, 0x91, 0x05, 0x05, 0xfb, 0x02, 0x13, 0x73,
+ 0x87, 0x05, 0x06, 0x13, 0x02, 0x13, 0x7f, 0x9b, 0x05, 0x06, 0x5a, 0x02,
+ 0x13, 0x83, 0x96, 0x05, 0x00, 0x03, 0x02, 0x13, 0x8f, 0x9a, 0x05, 0x00,
+ 0x09, 0x92, 0x05, 0x00, 0x19, 0x87, 0x05, 0x00, 0x32, 0x02, 0x13, 0x95,
+ 0x96, 0x05, 0x00, 0x41, 0x9a, 0x05, 0x00, 0x49, 0x92, 0x05, 0x00, 0x58,
+ 0x9a, 0x05, 0x00, 0x61, 0x92, 0x05, 0x00, 0x70, 0x96, 0x05, 0x00, 0x79,
+ 0x9a, 0x05, 0x00, 0x81, 0x92, 0x05, 0x00, 0x90, 0x9a, 0x05, 0x00, 0x98,
+ 0x8b, 0x05, 0x00, 0xc3, 0x02, 0x13, 0xa1, 0x83, 0x05, 0x01, 0x03, 0x02,
+ 0x13, 0xb4, 0x97, 0x05, 0x01, 0x73, 0x02, 0x13, 0xc0, 0x91, 0x05, 0x01,
+ 0x3b, 0x02, 0x13, 0xda, 0x87, 0x05, 0x01, 0x53, 0x02, 0x13, 0xe6, 0x9b,
+ 0x05, 0x01, 0xa3, 0x02, 0x13, 0xee, 0x04, 0x42, 0x14, 0x01, 0x8b, 0x05,
+ 0x01, 0xd3, 0x02, 0x14, 0x2b, 0x83, 0x05, 0x02, 0x0b, 0x02, 0x14, 0x3e,
+ 0x97, 0x05, 0x02, 0x63, 0x02, 0x14, 0x4a, 0x91, 0x05, 0x02, 0x43, 0x02,
+ 0x14, 0x64, 0x9b, 0x05, 0x02, 0x92, 0x02, 0x14, 0x70, 0x8b, 0x05, 0x06,
+ 0x7b, 0x02, 0x14, 0x83, 0x83, 0x05, 0x06, 0x9b, 0x02, 0x14, 0x8f, 0x91,
+ 0x05, 0x06, 0xb3, 0x02, 0x14, 0x9b, 0x97, 0x05, 0x06, 0xd3, 0x02, 0x14,
+ 0xa3, 0x9b, 0x05, 0x07, 0x02, 0x02, 0x14, 0xb6, 0x8b, 0x05, 0x07, 0x23,
+ 0x02, 0x14, 0xc2, 0x83, 0x05, 0x07, 0x63, 0x02, 0x14, 0xd5, 0x91, 0x05,
+ 0x07, 0x83, 0x02, 0x14, 0xe1, 0x07, 0xc2, 0x14, 0xed, 0x97, 0x05, 0x07,
+ 0xb3, 0x02, 0x14, 0xf5, 0x9b, 0x05, 0x07, 0xe2, 0x02, 0x15, 0x08, 0x8b,
+ 0x05, 0x08, 0x13, 0x02, 0x15, 0x1b, 0x83, 0x05, 0x08, 0x4b, 0x02, 0x15,
+ 0x2e, 0x97, 0x05, 0x08, 0xb3, 0x02, 0x15, 0x3a, 0x91, 0x05, 0x08, 0x7b,
+ 0x02, 0x15, 0x54, 0x87, 0x05, 0x08, 0x93, 0x02, 0x15, 0x60, 0x06, 0x42,
+ 0x15, 0x68, 0x8b, 0x05, 0x08, 0xe3, 0x02, 0x15, 0x8b, 0x83, 0x05, 0x09,
+ 0x1b, 0x02, 0x15, 0x9e, 0x97, 0x05, 0x09, 0x93, 0x02, 0x15, 0xaa, 0x91,
+ 0x05, 0x09, 0x5b, 0x02, 0x15, 0xc4, 0x87, 0x05, 0x09, 0x72, 0x02, 0x15,
+ 0xd0, 0x8b, 0x05, 0x0d, 0xcb, 0x02, 0x15, 0xd8, 0x83, 0x05, 0x0e, 0x0b,
+ 0x02, 0x15, 0xeb, 0x97, 0x05, 0x0e, 0x83, 0x02, 0x15, 0xf7, 0x91, 0x05,
+ 0x0e, 0x4b, 0x02, 0x16, 0x11, 0x87, 0x05, 0x0e, 0x63, 0x02, 0x16, 0x1d,
+ 0x9b, 0x05, 0x0e, 0xb2, 0x02, 0x16, 0x25, 0x8b, 0x05, 0x0e, 0xe3, 0x02,
+ 0x16, 0x38, 0x83, 0x05, 0x0f, 0x23, 0x02, 0x16, 0x4b, 0x97, 0x05, 0x0f,
+ 0xa3, 0x02, 0x16, 0x57, 0x91, 0x05, 0x0f, 0x63, 0x02, 0x16, 0x71, 0x87,
+ 0x05, 0x0f, 0x83, 0x02, 0x16, 0x7d, 0x09, 0x42, 0x16, 0x89, 0x8b, 0x05,
+ 0x0f, 0xd3, 0x02, 0x16, 0xac, 0x83, 0x05, 0x10, 0x0b, 0x02, 0x16, 0xbf,
+ 0x97, 0x05, 0x10, 0x83, 0x02, 0x16, 0xcb, 0x91, 0x05, 0x10, 0x43, 0x02,
+ 0x16, 0xe5, 0x87, 0x05, 0x10, 0x62, 0x02, 0x16, 0xf1, 0x8b, 0x05, 0x24,
+ 0x8b, 0x02, 0x16, 0xfd, 0xc2, 0x1d, 0xc1, 0x05, 0x24, 0xd0, 0xc2, 0x00,
+ 0x8d, 0x05, 0x24, 0x91, 0x87, 0x05, 0x26, 0x30, 0x1b, 0xc2, 0x17, 0x01,
+ 0xc3, 0xe4, 0xe8, 0x05, 0x25, 0xa1, 0xc3, 0xa9, 0x68, 0x05, 0x26, 0x28,
+ 0x9b, 0x05, 0x25, 0xe3, 0x02, 0x17, 0x0d, 0xc3, 0xe4, 0xe5, 0x05, 0x25,
+ 0xe9, 0xc2, 0x00, 0x7e, 0x05, 0x25, 0xf1, 0xc2, 0x01, 0x7f, 0x05, 0x26,
+ 0x18, 0xc2, 0x00, 0xba, 0x05, 0x24, 0xa9, 0x0a, 0x42, 0x17, 0x15, 0x09,
+ 0xc2, 0x17, 0x2b, 0xc2, 0x02, 0x37, 0x05, 0x24, 0xb9, 0x83, 0x05, 0x25,
+ 0x09, 0xc2, 0x01, 0xbb, 0x05, 0x25, 0xb0, 0x8b, 0x05, 0x24, 0xc1, 0xc2,
+ 0x00, 0x11, 0x05, 0x24, 0xe0, 0x1a, 0xc2, 0x17, 0x37, 0xc2, 0x00, 0xa2,
+ 0x05, 0x25, 0x68, 0xc3, 0x02, 0xaa, 0x05, 0x24, 0xd9, 0xc2, 0x00, 0x33,
+ 0x05, 0x25, 0x28, 0x91, 0x05, 0x24, 0xe9, 0xc2, 0x00, 0x8d, 0x05, 0x25,
+ 0x70, 0xc2, 0x00, 0xa4, 0x05, 0x24, 0xf1, 0xc2, 0x63, 0xd6, 0x05, 0x25,
+ 0x60, 0xc2, 0x00, 0xfe, 0x05, 0x25, 0x01, 0x97, 0x05, 0x25, 0x40, 0x17,
+ 0xc2, 0x17, 0x49, 0xc2, 0x01, 0xbb, 0x05, 0x25, 0x59, 0x83, 0x05, 0x25,
+ 0x91, 0xc4, 0xdf, 0x23, 0x05, 0x26, 0x20, 0xc3, 0x66, 0x20, 0x05, 0x25,
+ 0x21, 0x97, 0x05, 0x25, 0xc8, 0x0c, 0xc2, 0x17, 0x51, 0x91, 0x05, 0x25,
+ 0x98, 0xc2, 0x00, 0x33, 0x05, 0x25, 0x79, 0xc2, 0x02, 0x37, 0x05, 0x25,
+ 0x88, 0xd6, 0x30, 0x64, 0x08, 0x75, 0x88, 0xcf, 0x33, 0xad, 0x08, 0x75,
+ 0x80, 0x96, 0x08, 0x75, 0x49, 0x99, 0x08, 0x75, 0x31, 0xc2, 0x17, 0xb6,
+ 0x08, 0x74, 0xb9, 0xc3, 0x6b, 0x53, 0x08, 0x74, 0x00, 0xc2, 0x0c, 0x42,
+ 0x08, 0x75, 0x39, 0xc2, 0x00, 0xd0, 0x08, 0x74, 0x48, 0xc3, 0x48, 0x60,
+ 0x08, 0x74, 0xf1, 0xc2, 0x0f, 0x9b, 0x08, 0x74, 0xe8, 0xcf, 0x6b, 0x25,
+ 0x08, 0x74, 0xd8, 0xc4, 0xdf, 0xa3, 0x08, 0x74, 0xc1, 0x83, 0x08, 0x74,
+ 0x50, 0x87, 0x08, 0x74, 0xb1, 0x83, 0x08, 0x74, 0x7a, 0x02, 0x17, 0x61,
+ 0x83, 0x08, 0x74, 0xa9, 0xc2, 0x01, 0x7f, 0x08, 0x74, 0x20, 0x86, 0x08,
+ 0x74, 0xa1, 0x8e, 0x08, 0x74, 0x58, 0xc2, 0x01, 0x9d, 0x08, 0x74, 0x99,
+ 0xc3, 0x11, 0xef, 0x08, 0x74, 0x91, 0xc2, 0x00, 0x74, 0x08, 0x74, 0x89,
+ 0x87, 0x08, 0x74, 0x28, 0xc2, 0x00, 0xd0, 0x08, 0x74, 0x71, 0x83, 0x08,
+ 0x74, 0x68, 0x0a, 0xc2, 0x17, 0x65, 0xc2, 0x03, 0x4e, 0x08, 0x74, 0x30,
+ 0xc2, 0x01, 0x7f, 0x08, 0x74, 0x19, 0x87, 0x08, 0x74, 0x10, 0xc9, 0x1c,
+ 0x63, 0x00, 0x04, 0xa1, 0xc3, 0x16, 0x32, 0x70, 0x03, 0xf8, 0x83, 0x08,
+ 0xd5, 0xf9, 0x91, 0x08, 0xd5, 0xf1, 0x8b, 0x08, 0xd5, 0xe9, 0x87, 0x08,
+ 0xd5, 0xe0, 0x9b, 0x00, 0xc5, 0xfb, 0x02, 0x17, 0x71, 0x83, 0x00, 0xa7,
+ 0xaa, 0x02, 0x17, 0x77, 0x19, 0xc2, 0x17, 0x7b, 0x83, 0x00, 0xa8, 0xab,
+ 0x02, 0x17, 0x94, 0x91, 0x00, 0xa8, 0x9b, 0x02, 0x17, 0x9c, 0x8b, 0x00,
+ 0xa8, 0x8b, 0x02, 0x17, 0xa4, 0x87, 0x00, 0xa8, 0x80, 0x9b, 0x00, 0xc5,
+ 0xf1, 0x4c, 0x86, 0x01, 0xc2, 0x17, 0xa8, 0x91, 0x00, 0xa7, 0x90, 0x83,
+ 0x00, 0xa8, 0x03, 0x02, 0x17, 0xc0, 0x87, 0x00, 0xa7, 0xb1, 0x8b, 0x00,
+ 0xa7, 0xc3, 0x02, 0x17, 0xc4, 0x91, 0x00, 0xa7, 0xe2, 0x02, 0x17, 0xc8,
+ 0x8b, 0x00, 0xa7, 0x80, 0x47, 0xc6, 0x8d, 0xc2, 0x17, 0xcc, 0x9b, 0x00,
+ 0xc5, 0xe1, 0x46, 0xd3, 0x4f, 0xc2, 0x17, 0xd6, 0x83, 0x00, 0xa6, 0x42,
+ 0x02, 0x18, 0x02, 0x91, 0x00, 0xc6, 0x53, 0x02, 0x18, 0x06, 0x8b, 0x00,
+ 0xc6, 0x33, 0x02, 0x18, 0x0a, 0x87, 0x00, 0xa6, 0x49, 0x83, 0x00, 0xa6,
+ 0x5a, 0x02, 0x18, 0x0e, 0x9b, 0x00, 0xc5, 0xd9, 0x91, 0x00, 0xa6, 0x28,
+ 0x83, 0x00, 0xb3, 0xab, 0x02, 0x18, 0x12, 0x91, 0x00, 0xb3, 0x9b, 0x02,
+ 0x18, 0x16, 0x8b, 0x00, 0xb3, 0x8a, 0x02, 0x18, 0x1a, 0x83, 0x00, 0xac,
+ 0x9b, 0x02, 0x18, 0x1e, 0x91, 0x00, 0xac, 0x8b, 0x02, 0x18, 0x29, 0x8b,
+ 0x00, 0xac, 0x7a, 0x02, 0x18, 0x2d, 0xc4, 0x4b, 0x20, 0x00, 0xab, 0xe1,
+ 0xc4, 0xe1, 0x1f, 0x00, 0xab, 0xda, 0x02, 0x18, 0x31, 0x8b, 0x00, 0xab,
+ 0x0b, 0x02, 0x18, 0x4a, 0x87, 0x00, 0xaa, 0xf8, 0x8b, 0x00, 0xa6, 0x18,
+ 0x46, 0x69, 0x75, 0xc2, 0x18, 0x4e, 0x83, 0x00, 0xa4, 0x8a, 0x02, 0x18,
+ 0xa6, 0x91, 0x00, 0xa4, 0xc3, 0x02, 0x18, 0xaa, 0x8b, 0x00, 0xa4, 0xa3,
+ 0x02, 0x18, 0xae, 0x87, 0x00, 0xa4, 0x91, 0x83, 0x00, 0xa4, 0xe2, 0x02,
+ 0x18, 0xb2, 0x91, 0x00, 0xa4, 0x70, 0x8b, 0x00, 0xa4, 0x60, 0x94, 0x00,
+ 0xc7, 0xa1, 0x8e, 0x00, 0xc7, 0x98, 0x99, 0x00, 0xb3, 0xfb, 0x02, 0x18,
+ 0xb6, 0x0d, 0xc2, 0x18, 0xc6, 0x10, 0xc2, 0x18, 0xd6, 0x83, 0x00, 0xad,
+ 0x99, 0x91, 0x00, 0xad, 0x91, 0x8b, 0x00, 0xad, 0x89, 0x87, 0x00, 0xad,
+ 0x81, 0x95, 0x00, 0xa8, 0x40, 0x91, 0x00, 0xac, 0x43, 0x02, 0x18, 0xe6,
+ 0xc2, 0x00, 0x28, 0x00, 0xc7, 0x41, 0x83, 0x00, 0xac, 0x49, 0x8b, 0x00,
+ 0xac, 0x39, 0x87, 0x00, 0xac, 0x30, 0x8a, 0x00, 0xab, 0x7b, 0x02, 0x18,
+ 0xea, 0x87, 0x00, 0xa3, 0x39, 0x8b, 0x00, 0xa3, 0x41, 0x91, 0x00, 0xa3,
+ 0x49, 0x83, 0x00, 0xa3, 0x50, 0x19, 0xc2, 0x19, 0x06, 0xc8, 0xbc, 0x52,
+ 0x00, 0xad, 0x73, 0x02, 0x19, 0x11, 0x83, 0x00, 0xab, 0x33, 0x02, 0x19,
+ 0x2a, 0x91, 0x00, 0xab, 0x23, 0x02, 0x19, 0x2e, 0x8b, 0x00, 0xab, 0x03,
+ 0x02, 0x19, 0x32, 0x87, 0x00, 0xaa, 0xf0, 0x9b, 0x00, 0xc5, 0xb9, 0x83,
+ 0x00, 0xa2, 0xb2, 0x02, 0x19, 0x36, 0x83, 0x00, 0xab, 0x99, 0x91, 0x00,
+ 0xab, 0x91, 0x8b, 0x00, 0xab, 0x89, 0x87, 0x00, 0xab, 0x80, 0x91, 0x00,
+ 0xa2, 0xeb, 0x02, 0x19, 0x3a, 0x8b, 0x00, 0xa2, 0xcb, 0x02, 0x19, 0x3e,
+ 0x87, 0x00, 0xa2, 0xb9, 0x83, 0x00, 0xa3, 0x0a, 0x02, 0x19, 0x42, 0x91,
+ 0x00, 0xa2, 0x88, 0x8b, 0x00, 0xa2, 0x78, 0x42, 0x00, 0x15, 0x42, 0x19,
+ 0x46, 0x9b, 0x00, 0xc5, 0x99, 0x83, 0x00, 0xa0, 0xc8, 0x91, 0x00, 0xa0,
+ 0xa2, 0x02, 0x19, 0x52, 0x8b, 0x00, 0xa0, 0x80, 0xc2, 0x00, 0x28, 0x00,
+ 0xc7, 0x01, 0x87, 0x00, 0xaa, 0x18, 0x83, 0x00, 0xc6, 0x9b, 0x02, 0x19,
+ 0x58, 0x91, 0x00, 0xc6, 0x8b, 0x02, 0x19, 0x5c, 0x8b, 0x00, 0xc6, 0x7b,
+ 0x02, 0x19, 0x60, 0xc2, 0x02, 0xe0, 0x00, 0xc6, 0x70, 0x9b, 0x00, 0xc6,
+ 0x29, 0x83, 0x00, 0xaa, 0x62, 0x02, 0x19, 0x64, 0x91, 0x00, 0xaa, 0x48,
+ 0x8b, 0x00, 0xaa, 0x38, 0x44, 0x10, 0x6a, 0xc2, 0x19, 0x68, 0x8b, 0x00,
+ 0xaa, 0xb0, 0x83, 0x00, 0xaa, 0xd2, 0x02, 0x19, 0x9a, 0x91, 0x00, 0xaa,
+ 0xc0, 0x95, 0x00, 0xc6, 0xd3, 0x02, 0x19, 0x9e, 0x90, 0x00, 0xc6, 0xcb,
+ 0x02, 0x19, 0xa2, 0x8f, 0x00, 0xc6, 0xc1, 0x85, 0x00, 0xc6, 0xb9, 0x8d,
+ 0x00, 0xc6, 0xb1, 0x96, 0x00, 0xc6, 0xa9, 0x92, 0x00, 0xc6, 0xa0, 0x9b,
+ 0x00, 0xc6, 0x21, 0x83, 0x00, 0xa9, 0x72, 0x02, 0x19, 0xa6, 0x9b, 0x00,
+ 0xc6, 0x19, 0x91, 0x00, 0xa9, 0x58, 0x83, 0x00, 0xa9, 0xcb, 0x02, 0x19,
+ 0xaa, 0x91, 0x00, 0xa9, 0xab, 0x02, 0x19, 0xae, 0x8b, 0x00, 0xa9, 0x8b,
+ 0x02, 0x19, 0xb2, 0x87, 0x00, 0xa9, 0x78, 0xc3, 0x4d, 0xc4, 0x00, 0xa9,
+ 0x61, 0xc3, 0x2b, 0xd4, 0x00, 0xa2, 0x91, 0x12, 0xc2, 0x19, 0xb6, 0xc3,
+ 0x90, 0xd8, 0x00, 0xa4, 0x79, 0xc2, 0x01, 0x24, 0x00, 0xa0, 0x39, 0x99,
+ 0x00, 0xa0, 0xe9, 0xc3, 0x15, 0xdb, 0x00, 0xa5, 0x49, 0xc3, 0x11, 0xf1,
+ 0x00, 0xa6, 0x31, 0xc3, 0x15, 0x31, 0x00, 0xa6, 0xc9, 0xc3, 0x19, 0xe1,
+ 0x00, 0xa7, 0x99, 0xc3, 0xd5, 0x5e, 0x00, 0xa3, 0x88, 0x8b, 0x00, 0xa9,
+ 0x48, 0x9b, 0x00, 0xc5, 0xe9, 0x83, 0x00, 0xa6, 0xda, 0x02, 0x19, 0xc2,
+ 0x83, 0x00, 0xad, 0x23, 0x02, 0x19, 0xc6, 0x91, 0x00, 0xad, 0x13, 0x02,
+ 0x19, 0xca, 0x8b, 0x00, 0xad, 0x02, 0x02, 0x19, 0xce, 0x8b, 0x00, 0xa6,
+ 0xb0, 0x91, 0x00, 0xa6, 0xc0, 0x87, 0x00, 0xa6, 0xe1, 0x8b, 0x00, 0xa6,
+ 0xf3, 0x02, 0x19, 0xd2, 0x91, 0x00, 0xa7, 0x13, 0x02, 0x19, 0xd6, 0x83,
+ 0x00, 0xa7, 0x32, 0x02, 0x19, 0xda, 0x9b, 0x00, 0xc5, 0xd1, 0x83, 0x00,
+ 0xa5, 0x5a, 0x02, 0x19, 0xde, 0x45, 0x30, 0xa1, 0x42, 0x19, 0xe2, 0x91,
+ 0x00, 0xa5, 0x42, 0x02, 0x19, 0xea, 0x8b, 0x00, 0xa5, 0x30, 0x87, 0x00,
+ 0xa5, 0x61, 0x8b, 0x00, 0xa5, 0x73, 0x02, 0x19, 0xf0, 0x91, 0x00, 0xa5,
+ 0x93, 0x02, 0x19, 0xf4, 0x83, 0x00, 0xa5, 0xb2, 0x02, 0x19, 0xf8, 0x83,
+ 0x00, 0xa3, 0xf3, 0x02, 0x19, 0xfc, 0x87, 0x00, 0xa3, 0xa1, 0x8b, 0x00,
+ 0xa3, 0xb3, 0x02, 0x1a, 0x04, 0x91, 0x00, 0xa3, 0xd2, 0x02, 0x1a, 0x08,
+ 0x9b, 0x00, 0xc5, 0xc1, 0x83, 0x00, 0xa3, 0x9a, 0x02, 0x1a, 0x0c, 0x8b,
+ 0x00, 0xa3, 0x70, 0x91, 0x00, 0xa3, 0x80, 0x91, 0x00, 0xa2, 0x03, 0x02,
+ 0x1a, 0x10, 0x83, 0x00, 0xa2, 0x23, 0x02, 0x1a, 0x18, 0x8b, 0x00, 0xa1,
+ 0xe3, 0x02, 0x1a, 0x1c, 0x87, 0x00, 0xa1, 0xd0, 0x9b, 0x00, 0xc5, 0xa9,
+ 0x83, 0x00, 0xa1, 0xca, 0x02, 0x1a, 0x20, 0x9b, 0x00, 0xc5, 0xa1, 0x91,
+ 0x00, 0xa1, 0xa0, 0x8b, 0x00, 0xa1, 0x90, 0x9b, 0x00, 0xc5, 0x91, 0x8b,
+ 0x00, 0xa0, 0x10, 0xc7, 0xc6, 0x4e, 0x00, 0xad, 0x78, 0x95, 0x00, 0xa8,
+ 0x31, 0x8f, 0x00, 0xa5, 0xf0, 0x8b, 0x00, 0xb3, 0x79, 0x83, 0x00, 0xac,
+ 0x22, 0x02, 0x1a, 0x24, 0x91, 0x00, 0xac, 0x10, 0x8b, 0x00, 0xac, 0x00,
+ 0x97, 0x08, 0x15, 0x22, 0x02, 0x1a, 0x28, 0x9f, 0x08, 0x16, 0x70, 0xa0,
+ 0x08, 0x16, 0x61, 0xa1, 0x08, 0x16, 0x69, 0x9f, 0x08, 0x16, 0x58, 0x9f,
+ 0x08, 0x15, 0xb0, 0x9f, 0x08, 0x15, 0x78, 0x9f, 0x08, 0x16, 0x18, 0xc2,
+ 0x00, 0x72, 0x08, 0x29, 0x81, 0xc2, 0x00, 0xbf, 0x08, 0x2a, 0x40, 0xc2,
+ 0x03, 0x4e, 0x08, 0x29, 0x91, 0xc4, 0xdf, 0x8f, 0x08, 0x2a, 0xc0, 0xc2,
+ 0x00, 0xfe, 0x08, 0x29, 0x99, 0xc3, 0x2e, 0x0f, 0x08, 0x2a, 0x09, 0x1c,
+ 0x42, 0x1a, 0x34, 0x84, 0x08, 0x29, 0xa1, 0xc2, 0x17, 0xb6, 0x08, 0x29,
+ 0xb0, 0xc3, 0x1a, 0xfe, 0x08, 0x29, 0xa9, 0x0a, 0x42, 0x1a, 0x40, 0xc2,
+ 0x02, 0x2c, 0x08, 0x29, 0xc1, 0xc3, 0x4b, 0x13, 0x08, 0x2a, 0x99, 0xc3,
+ 0xe5, 0xc0, 0x08, 0x2a, 0xe0, 0x0a, 0xc2, 0x1a, 0x4a, 0x03, 0xc2, 0x1a,
+ 0x5b, 0x42, 0x19, 0x2c, 0x42, 0x1a, 0x65, 0xc3, 0x02, 0x05, 0x08, 0x29,
+ 0xd1, 0xc3, 0xe5, 0x51, 0x08, 0x2b, 0x08, 0xc2, 0x01, 0x5f, 0x08, 0x29,
+ 0xe1, 0xc3, 0x2d, 0xfd, 0x08, 0x29, 0xf9, 0xc2, 0x01, 0x48, 0x08, 0x2a,
+ 0xf0, 0x0a, 0xc2, 0x1a, 0x6d, 0xc3, 0xe6, 0x44, 0x08, 0x2a, 0xd0, 0xc2,
+ 0x00, 0xd1, 0x08, 0x29, 0xf1, 0xc3, 0xb7, 0xb1, 0x08, 0x2a, 0x28, 0xc3,
+ 0xe5, 0xcf, 0x08, 0x2a, 0x19, 0xc3, 0x53, 0x85, 0x08, 0x2a, 0x88, 0xc2,
+ 0x00, 0xb1, 0x08, 0x2a, 0x21, 0xc2, 0x33, 0x52, 0x08, 0x2b, 0x18, 0x9b,
+ 0x08, 0x2a, 0x39, 0x94, 0x08, 0x2a, 0x68, 0xc2, 0x00, 0xc4, 0x08, 0x2a,
+ 0xb9, 0xc3, 0xe5, 0xc0, 0x08, 0x2b, 0x10, 0x9d, 0x17, 0xcf, 0x01, 0x88,
+ 0x17, 0xcf, 0x79, 0x87, 0x17, 0xcf, 0x71, 0x86, 0x17, 0xcf, 0x69, 0x85,
+ 0x17, 0xcf, 0x61, 0x84, 0x17, 0xcf, 0x59, 0x83, 0x17, 0xcf, 0x51, 0xa6,
+ 0x17, 0xcf, 0x49, 0xa5, 0x17, 0xcf, 0x41, 0xa4, 0x17, 0xcf, 0x39, 0xa3,
+ 0x17, 0xcf, 0x31, 0xa2, 0x17, 0xcf, 0x29, 0xa1, 0x17, 0xcf, 0x21, 0xa0,
+ 0x17, 0xcf, 0x19, 0x9f, 0x17, 0xcf, 0x11, 0x9e, 0x17, 0xcf, 0x08, 0x88,
+ 0x17, 0xce, 0xf9, 0x87, 0x17, 0xce, 0xf1, 0xa6, 0x17, 0xce, 0xc9, 0x86,
+ 0x17, 0xce, 0xe9, 0x85, 0x17, 0xce, 0xe1, 0x84, 0x17, 0xce, 0xd9, 0x83,
+ 0x17, 0xce, 0xd1, 0xa5, 0x17, 0xce, 0xc1, 0xa4, 0x17, 0xce, 0xb9, 0xa3,
+ 0x17, 0xce, 0xb1, 0xa2, 0x17, 0xce, 0xa9, 0xa1, 0x17, 0xce, 0xa1, 0xa0,
+ 0x17, 0xce, 0x99, 0x9f, 0x17, 0xce, 0x91, 0x9e, 0x17, 0xce, 0x89, 0x9d,
+ 0x17, 0xce, 0x80, 0x83, 0x17, 0xcd, 0x51, 0xa6, 0x17, 0xcd, 0x49, 0xa5,
+ 0x17, 0xcd, 0x41, 0xa4, 0x17, 0xcd, 0x39, 0xa3, 0x17, 0xcd, 0x31, 0xa2,
+ 0x17, 0xcd, 0x29, 0xa1, 0x17, 0xcd, 0x21, 0x86, 0x17, 0xcd, 0x69, 0x85,
+ 0x17, 0xcd, 0x61, 0x84, 0x17, 0xcd, 0x59, 0xa0, 0x17, 0xcd, 0x19, 0x9f,
+ 0x17, 0xcd, 0x11, 0x9e, 0x17, 0xcd, 0x09, 0x9d, 0x17, 0xcd, 0x01, 0x87,
+ 0x17, 0xcd, 0x71, 0x88, 0x17, 0xcd, 0x78, 0x88, 0x17, 0xcf, 0xf9, 0x87,
+ 0x17, 0xcf, 0xf1, 0x86, 0x17, 0xcf, 0xe9, 0x85, 0x17, 0xcf, 0xe1, 0x84,
+ 0x17, 0xcf, 0xd9, 0x83, 0x17, 0xcf, 0xd1, 0xa6, 0x17, 0xcf, 0xc9, 0xa5,
+ 0x17, 0xcf, 0xc1, 0xa4, 0x17, 0xcf, 0xb9, 0xa3, 0x17, 0xcf, 0xb1, 0xa2,
+ 0x17, 0xcf, 0xa9, 0xa1, 0x17, 0xcf, 0xa1, 0xa0, 0x17, 0xcf, 0x99, 0x9f,
+ 0x17, 0xcf, 0x91, 0x9e, 0x17, 0xcf, 0x89, 0x9d, 0x17, 0xcf, 0x80, 0x9d,
+ 0x17, 0xcb, 0x81, 0x88, 0x17, 0xcb, 0xf9, 0x87, 0x17, 0xcb, 0xf1, 0x86,
+ 0x17, 0xcb, 0xe9, 0x85, 0x17, 0xcb, 0xe1, 0x84, 0x17, 0xcb, 0xd9, 0x83,
+ 0x17, 0xcb, 0xd1, 0xa6, 0x17, 0xcb, 0xc9, 0xa5, 0x17, 0xcb, 0xc1, 0xa4,
+ 0x17, 0xcb, 0xb9, 0xa3, 0x17, 0xcb, 0xb1, 0xa2, 0x17, 0xcb, 0xa9, 0xa1,
+ 0x17, 0xcb, 0xa1, 0xa0, 0x17, 0xcb, 0x99, 0x9f, 0x17, 0xcb, 0x91, 0x9e,
+ 0x17, 0xcb, 0x88, 0x88, 0x17, 0xcb, 0x79, 0x87, 0x17, 0xcb, 0x71, 0x86,
+ 0x17, 0xcb, 0x69, 0x85, 0x17, 0xcb, 0x61, 0x84, 0x17, 0xcb, 0x59, 0x83,
+ 0x17, 0xcb, 0x51, 0xa6, 0x17, 0xcb, 0x49, 0xa5, 0x17, 0xcb, 0x41, 0xa4,
+ 0x17, 0xcb, 0x39, 0xa3, 0x17, 0xcb, 0x31, 0xa2, 0x17, 0xcb, 0x29, 0xa1,
+ 0x17, 0xcb, 0x21, 0x9d, 0x17, 0xcb, 0x01, 0x9e, 0x17, 0xcb, 0x09, 0x9f,
+ 0x17, 0xcb, 0x11, 0xa0, 0x17, 0xcb, 0x18, 0x9d, 0x17, 0xc9, 0x81, 0x88,
+ 0x17, 0xc9, 0xf9, 0x87, 0x17, 0xc9, 0xf1, 0x86, 0x17, 0xc9, 0xe9, 0x85,
+ 0x17, 0xc9, 0xe1, 0x84, 0x17, 0xc9, 0xd9, 0x83, 0x17, 0xc9, 0xd1, 0xa6,
+ 0x17, 0xc9, 0xc9, 0xa5, 0x17, 0xc9, 0xc1, 0xa4, 0x17, 0xc9, 0xb9, 0xa3,
+ 0x17, 0xc9, 0xb1, 0xa2, 0x17, 0xc9, 0xa9, 0xa1, 0x17, 0xc9, 0xa1, 0xa0,
+ 0x17, 0xc9, 0x99, 0x9f, 0x17, 0xc9, 0x91, 0x9e, 0x17, 0xc9, 0x88, 0x88,
+ 0x17, 0xc9, 0x79, 0x87, 0x17, 0xc9, 0x71, 0x86, 0x17, 0xc9, 0x69, 0x85,
+ 0x17, 0xc9, 0x61, 0x84, 0x17, 0xc9, 0x59, 0x83, 0x17, 0xc9, 0x51, 0xa6,
+ 0x17, 0xc9, 0x49, 0xa5, 0x17, 0xc9, 0x41, 0xa4, 0x17, 0xc9, 0x39, 0xa3,
+ 0x17, 0xc9, 0x31, 0xa2, 0x17, 0xc9, 0x29, 0xa1, 0x17, 0xc9, 0x21, 0xa0,
+ 0x17, 0xc9, 0x19, 0x9f, 0x17, 0xc9, 0x11, 0x9e, 0x17, 0xc9, 0x09, 0x9d,
+ 0x17, 0xc9, 0x00, 0x88, 0x17, 0xc8, 0xf9, 0x87, 0x17, 0xc8, 0xf1, 0x86,
+ 0x17, 0xc8, 0xe9, 0x85, 0x17, 0xc8, 0xe1, 0x84, 0x17, 0xc8, 0xd9, 0x83,
+ 0x17, 0xc8, 0xd1, 0xa6, 0x17, 0xc8, 0xc9, 0xa5, 0x17, 0xc8, 0xc1, 0xa4,
+ 0x17, 0xc8, 0xb9, 0xa3, 0x17, 0xc8, 0xb1, 0xa2, 0x17, 0xc8, 0xa9, 0xa1,
+ 0x17, 0xc8, 0xa1, 0xa0, 0x17, 0xc8, 0x99, 0x9f, 0x17, 0xc8, 0x91, 0x9e,
+ 0x17, 0xc8, 0x89, 0x9d, 0x17, 0xc8, 0x80, 0x88, 0x17, 0xc8, 0x79, 0x87,
+ 0x17, 0xc8, 0x71, 0x86, 0x17, 0xc8, 0x69, 0x85, 0x17, 0xc8, 0x61, 0x84,
+ 0x17, 0xc8, 0x59, 0x83, 0x17, 0xc8, 0x51, 0xa6, 0x17, 0xc8, 0x49, 0xa5,
+ 0x17, 0xc8, 0x41, 0xa4, 0x17, 0xc8, 0x39, 0xa3, 0x17, 0xc8, 0x31, 0xa2,
+ 0x17, 0xc8, 0x29, 0xa1, 0x17, 0xc8, 0x21, 0xa0, 0x17, 0xc8, 0x19, 0x9f,
+ 0x17, 0xc8, 0x11, 0x9e, 0x17, 0xc8, 0x09, 0x9d, 0x17, 0xc8, 0x00, 0x88,
+ 0x17, 0xce, 0x79, 0x87, 0x17, 0xce, 0x71, 0x86, 0x17, 0xce, 0x69, 0x85,
+ 0x17, 0xce, 0x61, 0x84, 0x17, 0xce, 0x59, 0x83, 0x17, 0xce, 0x51, 0xa6,
+ 0x17, 0xce, 0x49, 0xa5, 0x17, 0xce, 0x41, 0xa4, 0x17, 0xce, 0x39, 0xa3,
+ 0x17, 0xce, 0x31, 0xa2, 0x17, 0xce, 0x29, 0xa1, 0x17, 0xce, 0x21, 0xa0,
+ 0x17, 0xce, 0x19, 0x9f, 0x17, 0xce, 0x11, 0x9d, 0x17, 0xce, 0x01, 0x9e,
+ 0x17, 0xce, 0x08, 0x87, 0x17, 0xcd, 0xf1, 0x86, 0x17, 0xcd, 0xe9, 0x85,
+ 0x17, 0xcd, 0xe1, 0x84, 0x17, 0xcd, 0xd9, 0x83, 0x17, 0xcd, 0xd1, 0xa6,
+ 0x17, 0xcd, 0xc9, 0xa5, 0x17, 0xcd, 0xc1, 0xa4, 0x17, 0xcd, 0xb9, 0xa3,
+ 0x17, 0xcd, 0xb1, 0xa2, 0x17, 0xcd, 0xa9, 0xa1, 0x17, 0xcd, 0xa1, 0x9d,
+ 0x17, 0xcd, 0x81, 0x9e, 0x17, 0xcd, 0x89, 0x9f, 0x17, 0xcd, 0x91, 0xa0,
+ 0x17, 0xcd, 0x99, 0x88, 0x17, 0xcd, 0xf8, 0x88, 0x17, 0xcc, 0xf9, 0x87,
+ 0x17, 0xcc, 0xf1, 0x86, 0x17, 0xcc, 0xe9, 0x85, 0x17, 0xcc, 0xe1, 0x84,
+ 0x17, 0xcc, 0xd9, 0x83, 0x17, 0xcc, 0xd1, 0xa6, 0x17, 0xcc, 0xc9, 0xa5,
+ 0x17, 0xcc, 0xc1, 0xa4, 0x17, 0xcc, 0xb9, 0xa3, 0x17, 0xcc, 0xb1, 0xa2,
+ 0x17, 0xcc, 0xa9, 0xa1, 0x17, 0xcc, 0xa1, 0x9d, 0x17, 0xcc, 0x81, 0x9e,
+ 0x17, 0xcc, 0x89, 0x9f, 0x17, 0xcc, 0x91, 0xa0, 0x17, 0xcc, 0x98, 0x88,
+ 0x17, 0xcc, 0x79, 0x87, 0x17, 0xcc, 0x71, 0x86, 0x17, 0xcc, 0x69, 0x85,
+ 0x17, 0xcc, 0x61, 0x84, 0x17, 0xcc, 0x59, 0x83, 0x17, 0xcc, 0x51, 0xa6,
+ 0x17, 0xcc, 0x49, 0xa5, 0x17, 0xcc, 0x41, 0xa4, 0x17, 0xcc, 0x39, 0xa3,
+ 0x17, 0xcc, 0x31, 0xa2, 0x17, 0xcc, 0x29, 0xa1, 0x17, 0xcc, 0x21, 0xa0,
+ 0x17, 0xcc, 0x19, 0x9f, 0x17, 0xcc, 0x11, 0x9e, 0x17, 0xcc, 0x09, 0x9d,
+ 0x17, 0xcc, 0x00, 0xa5, 0x17, 0xca, 0xc1, 0xa4, 0x17, 0xca, 0xb9, 0xa3,
+ 0x17, 0xca, 0xb1, 0xa2, 0x17, 0xca, 0xa9, 0xa1, 0x17, 0xca, 0xa1, 0x9e,
+ 0x17, 0xca, 0x89, 0x9d, 0x17, 0xca, 0x81, 0x9f, 0x17, 0xca, 0x91, 0xa0,
+ 0x17, 0xca, 0x99, 0xa6, 0x17, 0xca, 0xc9, 0x83, 0x17, 0xca, 0xd1, 0x84,
+ 0x17, 0xca, 0xd9, 0x85, 0x17, 0xca, 0xe1, 0x86, 0x17, 0xca, 0xe9, 0x87,
+ 0x17, 0xca, 0xf1, 0x88, 0x17, 0xca, 0xf8, 0x88, 0x17, 0xca, 0x79, 0x87,
+ 0x17, 0xca, 0x71, 0x86, 0x17, 0xca, 0x69, 0x85, 0x17, 0xca, 0x61, 0x84,
+ 0x17, 0xca, 0x59, 0x83, 0x17, 0xca, 0x51, 0xa6, 0x17, 0xca, 0x49, 0xa5,
+ 0x17, 0xca, 0x41, 0xa4, 0x17, 0xca, 0x39, 0xa3, 0x17, 0xca, 0x31, 0xa2,
+ 0x17, 0xca, 0x29, 0xa1, 0x17, 0xca, 0x21, 0xa0, 0x17, 0xca, 0x19, 0x9f,
+ 0x17, 0xca, 0x11, 0x9e, 0x17, 0xca, 0x09, 0x9d, 0x17, 0xca, 0x00, 0xa2,
+ 0x17, 0xc3, 0xa9, 0x9f, 0x17, 0xc3, 0x91, 0x88, 0x17, 0xc3, 0xf9, 0x87,
+ 0x17, 0xc3, 0xf1, 0x86, 0x17, 0xc3, 0xe9, 0x85, 0x17, 0xc3, 0xe1, 0x84,
+ 0x17, 0xc3, 0xd9, 0x83, 0x17, 0xc3, 0xd1, 0xa6, 0x17, 0xc3, 0xc9, 0xa5,
+ 0x17, 0xc3, 0xc1, 0xa4, 0x17, 0xc3, 0xb9, 0xa3, 0x17, 0xc3, 0xb1, 0xa1,
+ 0x17, 0xc3, 0xa1, 0xa0, 0x17, 0xc3, 0x99, 0x9e, 0x17, 0xc3, 0x89, 0x9d,
+ 0x17, 0xc3, 0x80, 0x83, 0x17, 0xc3, 0x51, 0xa2, 0x17, 0xc3, 0x29, 0xa1,
+ 0x17, 0xc3, 0x21, 0xa0, 0x17, 0xc3, 0x19, 0x9f, 0x17, 0xc3, 0x11, 0x9e,
+ 0x17, 0xc3, 0x09, 0x88, 0x17, 0xc3, 0x79, 0x87, 0x17, 0xc3, 0x71, 0x86,
+ 0x17, 0xc3, 0x69, 0x85, 0x17, 0xc3, 0x61, 0x84, 0x17, 0xc3, 0x59, 0xa6,
+ 0x17, 0xc3, 0x49, 0xa5, 0x17, 0xc3, 0x41, 0xa4, 0x17, 0xc3, 0x39, 0xa3,
+ 0x17, 0xc3, 0x31, 0x9d, 0x17, 0xc3, 0x00, 0xa6, 0x17, 0xc2, 0xc9, 0xa5,
+ 0x17, 0xc2, 0xc1, 0xa4, 0x17, 0xc2, 0xb9, 0xa3, 0x17, 0xc2, 0xb1, 0xa2,
+ 0x17, 0xc2, 0xa9, 0xa1, 0x17, 0xc2, 0xa1, 0xa0, 0x17, 0xc2, 0x99, 0x9f,
+ 0x17, 0xc2, 0x91, 0x9e, 0x17, 0xc2, 0x89, 0x9d, 0x17, 0xc2, 0x81, 0x85,
+ 0x17, 0xc2, 0xe1, 0x84, 0x17, 0xc2, 0xd9, 0x83, 0x17, 0xc2, 0xd1, 0x86,
+ 0x17, 0xc2, 0xe9, 0x87, 0x17, 0xc2, 0xf1, 0x88, 0x17, 0xc2, 0xf8, 0x88,
+ 0x17, 0xc2, 0x79, 0x87, 0x17, 0xc2, 0x71, 0xa6, 0x17, 0xc2, 0x49, 0xa5,
+ 0x17, 0xc2, 0x41, 0xa4, 0x17, 0xc2, 0x39, 0xa3, 0x17, 0xc2, 0x31, 0xa2,
+ 0x17, 0xc2, 0x29, 0xa1, 0x17, 0xc2, 0x21, 0xa0, 0x17, 0xc2, 0x19, 0x86,
+ 0x17, 0xc2, 0x69, 0x85, 0x17, 0xc2, 0x61, 0x84, 0x17, 0xc2, 0x59, 0x83,
+ 0x17, 0xc2, 0x51, 0x9f, 0x17, 0xc2, 0x11, 0x9e, 0x17, 0xc2, 0x09, 0x9d,
+ 0x17, 0xc2, 0x00, 0xa5, 0x17, 0xc1, 0x41, 0xa4, 0x17, 0xc1, 0x39, 0xa3,
+ 0x17, 0xc1, 0x31, 0xa2, 0x17, 0xc1, 0x29, 0xa1, 0x17, 0xc1, 0x21, 0x88,
+ 0x17, 0xc1, 0x79, 0x87, 0x17, 0xc1, 0x71, 0x86, 0x17, 0xc1, 0x69, 0x85,
+ 0x17, 0xc1, 0x61, 0x84, 0x17, 0xc1, 0x59, 0x83, 0x17, 0xc1, 0x51, 0xa6,
+ 0x17, 0xc1, 0x49, 0xa0, 0x17, 0xc1, 0x19, 0x9f, 0x17, 0xc1, 0x11, 0x9e,
+ 0x17, 0xc1, 0x09, 0x9d, 0x17, 0xc1, 0x00, 0xa5, 0x17, 0xc0, 0x41, 0xa4,
+ 0x17, 0xc0, 0x39, 0x88, 0x17, 0xc0, 0x79, 0x87, 0x17, 0xc0, 0x71, 0x86,
+ 0x17, 0xc0, 0x69, 0x85, 0x17, 0xc0, 0x61, 0x84, 0x17, 0xc0, 0x59, 0x83,
+ 0x17, 0xc0, 0x51, 0xa6, 0x17, 0xc0, 0x49, 0xa3, 0x17, 0xc0, 0x31, 0xa2,
+ 0x17, 0xc0, 0x29, 0xa1, 0x17, 0xc0, 0x21, 0x9d, 0x17, 0xc0, 0x01, 0x9e,
+ 0x17, 0xc0, 0x09, 0x9f, 0x17, 0xc0, 0x11, 0xa0, 0x17, 0xc0, 0x18, 0x88,
+ 0x17, 0xc7, 0xf9, 0x87, 0x17, 0xc7, 0xf1, 0x86, 0x17, 0xc7, 0xe9, 0x85,
+ 0x17, 0xc7, 0xe1, 0x84, 0x17, 0xc7, 0xd9, 0x83, 0x17, 0xc7, 0xd1, 0xa6,
+ 0x17, 0xc7, 0xc9, 0xa5, 0x17, 0xc7, 0xc1, 0xa4, 0x17, 0xc7, 0xb9, 0xa3,
+ 0x17, 0xc7, 0xb1, 0xa2, 0x17, 0xc7, 0xa9, 0xa1, 0x17, 0xc7, 0xa1, 0xa0,
+ 0x17, 0xc7, 0x99, 0x9f, 0x17, 0xc7, 0x91, 0x9e, 0x17, 0xc7, 0x89, 0x9d,
+ 0x17, 0xc7, 0x80, 0x9d, 0x17, 0xc5, 0x81, 0x88, 0x17, 0xc5, 0xf9, 0x87,
+ 0x17, 0xc5, 0xf1, 0x86, 0x17, 0xc5, 0xe9, 0x85, 0x17, 0xc5, 0xe1, 0x84,
+ 0x17, 0xc5, 0xd9, 0x83, 0x17, 0xc5, 0xd1, 0xa6, 0x17, 0xc5, 0xc9, 0xa5,
+ 0x17, 0xc5, 0xc1, 0xa4, 0x17, 0xc5, 0xb9, 0xa3, 0x17, 0xc5, 0xb1, 0xa2,
+ 0x17, 0xc5, 0xa9, 0xa1, 0x17, 0xc5, 0xa1, 0xa0, 0x17, 0xc5, 0x99, 0x9f,
+ 0x17, 0xc5, 0x91, 0x9e, 0x17, 0xc5, 0x88, 0x88, 0x17, 0xc5, 0x79, 0x87,
+ 0x17, 0xc5, 0x71, 0x86, 0x17, 0xc5, 0x69, 0x85, 0x17, 0xc5, 0x61, 0x84,
+ 0x17, 0xc5, 0x59, 0x83, 0x17, 0xc5, 0x51, 0xa6, 0x17, 0xc5, 0x49, 0xa5,
+ 0x17, 0xc5, 0x41, 0xa4, 0x17, 0xc5, 0x39, 0xa3, 0x17, 0xc5, 0x31, 0xa2,
+ 0x17, 0xc5, 0x29, 0xa1, 0x17, 0xc5, 0x21, 0xa0, 0x17, 0xc5, 0x19, 0x9f,
+ 0x17, 0xc5, 0x11, 0x9e, 0x17, 0xc5, 0x09, 0x9d, 0x17, 0xc5, 0x00, 0x88,
+ 0x17, 0xc4, 0xf9, 0x87, 0x17, 0xc4, 0xf1, 0x86, 0x17, 0xc4, 0xe9, 0x85,
+ 0x17, 0xc4, 0xe1, 0x84, 0x17, 0xc4, 0xd9, 0x83, 0x17, 0xc4, 0xd1, 0xa6,
+ 0x17, 0xc4, 0xc9, 0xa5, 0x17, 0xc4, 0xc1, 0xa4, 0x17, 0xc4, 0xb9, 0xa3,
+ 0x17, 0xc4, 0xb1, 0xa2, 0x17, 0xc4, 0xa9, 0xa1, 0x17, 0xc4, 0xa1, 0xa0,
+ 0x17, 0xc4, 0x99, 0x9f, 0x17, 0xc4, 0x91, 0x9e, 0x17, 0xc4, 0x89, 0x9d,
+ 0x17, 0xc4, 0x80, 0x88, 0x17, 0xc4, 0x79, 0x87, 0x17, 0xc4, 0x71, 0x86,
+ 0x17, 0xc4, 0x69, 0x85, 0x17, 0xc4, 0x61, 0x84, 0x17, 0xc4, 0x59, 0x83,
+ 0x17, 0xc4, 0x51, 0xa6, 0x17, 0xc4, 0x49, 0xa5, 0x17, 0xc4, 0x41, 0xa4,
+ 0x17, 0xc4, 0x39, 0xa3, 0x17, 0xc4, 0x31, 0xa2, 0x17, 0xc4, 0x29, 0xa1,
+ 0x17, 0xc4, 0x21, 0xa0, 0x17, 0xc4, 0x19, 0x9f, 0x17, 0xc4, 0x11, 0x9e,
+ 0x17, 0xc4, 0x09, 0x9d, 0x17, 0xc4, 0x00, 0x88, 0x17, 0xc7, 0x79, 0x87,
+ 0x17, 0xc7, 0x71, 0x86, 0x17, 0xc7, 0x69, 0x85, 0x17, 0xc7, 0x61, 0x84,
+ 0x17, 0xc7, 0x59, 0x83, 0x17, 0xc7, 0x51, 0xa6, 0x17, 0xc7, 0x49, 0xa5,
+ 0x17, 0xc7, 0x41, 0xa4, 0x17, 0xc7, 0x39, 0xa3, 0x17, 0xc7, 0x31, 0xa2,
+ 0x17, 0xc7, 0x29, 0xa1, 0x17, 0xc7, 0x21, 0x9d, 0x17, 0xc7, 0x01, 0x9e,
+ 0x17, 0xc7, 0x09, 0x9f, 0x17, 0xc7, 0x11, 0xa0, 0x17, 0xc7, 0x18, 0xa6,
+ 0x17, 0xc6, 0xc9, 0xa5, 0x17, 0xc6, 0xc1, 0xa4, 0x17, 0xc6, 0xb9, 0xa3,
+ 0x17, 0xc6, 0xb1, 0xa2, 0x17, 0xc6, 0xa9, 0xa1, 0x17, 0xc6, 0xa1, 0xa0,
+ 0x17, 0xc6, 0x99, 0x9f, 0x17, 0xc6, 0x91, 0x9e, 0x17, 0xc6, 0x89, 0x9d,
+ 0x17, 0xc6, 0x81, 0x83, 0x17, 0xc6, 0xd1, 0x84, 0x17, 0xc6, 0xd9, 0x85,
+ 0x17, 0xc6, 0xe1, 0x86, 0x17, 0xc6, 0xe9, 0x87, 0x17, 0xc6, 0xf1, 0x88,
+ 0x17, 0xc6, 0xf8, 0x88, 0x17, 0xc6, 0x79, 0x87, 0x17, 0xc6, 0x71, 0x86,
+ 0x17, 0xc6, 0x69, 0x85, 0x17, 0xc6, 0x61, 0x84, 0x17, 0xc6, 0x59, 0x83,
+ 0x17, 0xc6, 0x51, 0xa6, 0x17, 0xc6, 0x49, 0xa5, 0x17, 0xc6, 0x41, 0xa4,
+ 0x17, 0xc6, 0x39, 0xa3, 0x17, 0xc6, 0x31, 0xa2, 0x17, 0xc6, 0x29, 0xa1,
+ 0x17, 0xc6, 0x21, 0xa0, 0x17, 0xc6, 0x19, 0x9f, 0x17, 0xc6, 0x11, 0x9e,
+ 0x17, 0xc6, 0x09, 0x9d, 0x17, 0xc6, 0x00, 0x88, 0x17, 0xc1, 0xf9, 0x87,
+ 0x17, 0xc1, 0xf1, 0x86, 0x17, 0xc1, 0xe9, 0x85, 0x17, 0xc1, 0xe1, 0x84,
+ 0x17, 0xc1, 0xd9, 0x83, 0x17, 0xc1, 0xd1, 0xa6, 0x17, 0xc1, 0xc9, 0xa5,
+ 0x17, 0xc1, 0xc1, 0xa4, 0x17, 0xc1, 0xb9, 0xa3, 0x17, 0xc1, 0xb1, 0xa2,
+ 0x17, 0xc1, 0xa9, 0xa1, 0x17, 0xc1, 0xa1, 0xa0, 0x17, 0xc1, 0x99, 0x9f,
+ 0x17, 0xc1, 0x91, 0x9e, 0x17, 0xc1, 0x89, 0x9d, 0x17, 0xc1, 0x80, 0x88,
+ 0x17, 0xc0, 0xf9, 0x87, 0x17, 0xc0, 0xf1, 0x86, 0x17, 0xc0, 0xe9, 0x85,
+ 0x17, 0xc0, 0xe1, 0x84, 0x17, 0xc0, 0xd9, 0x83, 0x17, 0xc0, 0xd1, 0xa6,
+ 0x17, 0xc0, 0xc9, 0xa5, 0x17, 0xc0, 0xc1, 0xa4, 0x17, 0xc0, 0xb9, 0xa3,
+ 0x17, 0xc0, 0xb1, 0xa2, 0x17, 0xc0, 0xa9, 0xa1, 0x17, 0xc0, 0xa1, 0xa0,
+ 0x17, 0xc0, 0x99, 0x9f, 0x17, 0xc0, 0x91, 0x9e, 0x17, 0xc0, 0x89, 0x9d,
+ 0x17, 0xc0, 0x80, 0x86, 0x17, 0xd0, 0xe9, 0x85, 0x17, 0xd0, 0xe1, 0x84,
+ 0x17, 0xd0, 0xd9, 0x83, 0x17, 0xd0, 0xd1, 0xa6, 0x17, 0xd0, 0xc9, 0xa5,
+ 0x17, 0xd0, 0xc1, 0xa4, 0x17, 0xd0, 0xb9, 0xa3, 0x17, 0xd0, 0xb1, 0xa2,
+ 0x17, 0xd0, 0xa9, 0xa1, 0x17, 0xd0, 0xa1, 0xa0, 0x17, 0xd0, 0x99, 0x9f,
+ 0x17, 0xd0, 0x91, 0x9e, 0x17, 0xd0, 0x89, 0x9d, 0x17, 0xd0, 0x80, 0x88,
+ 0x17, 0xd0, 0x79, 0x87, 0x17, 0xd0, 0x71, 0x86, 0x17, 0xd0, 0x69, 0x85,
+ 0x17, 0xd0, 0x61, 0x84, 0x17, 0xd0, 0x59, 0x83, 0x17, 0xd0, 0x51, 0xa6,
+ 0x17, 0xd0, 0x49, 0xa5, 0x17, 0xd0, 0x41, 0xa4, 0x17, 0xd0, 0x39, 0xa3,
+ 0x17, 0xd0, 0x31, 0xa2, 0x17, 0xd0, 0x29, 0xa1, 0x17, 0xd0, 0x21, 0xa0,
+ 0x17, 0xd0, 0x19, 0x9f, 0x17, 0xd0, 0x11, 0x9e, 0x17, 0xd0, 0x09, 0x9d,
+ 0x17, 0xd0, 0x00, 0xa6, 0x07, 0xd6, 0xc9, 0xa5, 0x07, 0xd6, 0xc1, 0xa4,
+ 0x07, 0xd6, 0xb9, 0xa3, 0x07, 0xd6, 0xb1, 0xa2, 0x07, 0xd6, 0xa9, 0xa1,
+ 0x07, 0xd6, 0xa1, 0xa0, 0x07, 0xd6, 0x99, 0x9f, 0x07, 0xd6, 0x91, 0x9e,
+ 0x07, 0xd6, 0x89, 0x9d, 0x07, 0xd6, 0x80, 0x88, 0x07, 0xd6, 0x79, 0x87,
+ 0x07, 0xd6, 0x71, 0x86, 0x07, 0xd6, 0x69, 0x85, 0x07, 0xd6, 0x61, 0x84,
+ 0x07, 0xd6, 0x59, 0x83, 0x07, 0xd6, 0x51, 0xa6, 0x07, 0xd6, 0x49, 0xa5,
+ 0x07, 0xd6, 0x41, 0xa4, 0x07, 0xd6, 0x39, 0xa3, 0x07, 0xd6, 0x31, 0xa2,
+ 0x07, 0xd6, 0x29, 0xa1, 0x07, 0xd6, 0x21, 0xa0, 0x07, 0xd6, 0x19, 0x9f,
+ 0x07, 0xd6, 0x11, 0x9e, 0x07, 0xd6, 0x09, 0x9d, 0x07, 0xd6, 0x00, 0x88,
+ 0x07, 0xd5, 0xf9, 0x87, 0x07, 0xd5, 0xf1, 0x86, 0x07, 0xd5, 0xe9, 0x85,
+ 0x07, 0xd5, 0xe1, 0x84, 0x07, 0xd5, 0xd9, 0x83, 0x07, 0xd5, 0xd1, 0xa6,
+ 0x07, 0xd5, 0xc9, 0xa5, 0x07, 0xd5, 0xc1, 0xa4, 0x07, 0xd5, 0xb9, 0xa3,
+ 0x07, 0xd5, 0xb1, 0xa2, 0x07, 0xd5, 0xa9, 0xa1, 0x07, 0xd5, 0xa1, 0xa0,
+ 0x07, 0xd5, 0x99, 0x9f, 0x07, 0xd5, 0x91, 0x9e, 0x07, 0xd5, 0x89, 0x9d,
+ 0x07, 0xd5, 0x80, 0x88, 0x07, 0xd5, 0x79, 0x87, 0x07, 0xd5, 0x71, 0x86,
+ 0x07, 0xd5, 0x69, 0x85, 0x07, 0xd5, 0x61, 0x84, 0x07, 0xd5, 0x59, 0x83,
+ 0x07, 0xd5, 0x51, 0xa6, 0x07, 0xd5, 0x49, 0xa5, 0x07, 0xd5, 0x41, 0xa4,
+ 0x07, 0xd5, 0x39, 0xa3, 0x07, 0xd5, 0x31, 0xa2, 0x07, 0xd5, 0x29, 0xa1,
+ 0x07, 0xd5, 0x21, 0xa0, 0x07, 0xd5, 0x19, 0x9f, 0x07, 0xd5, 0x11, 0x9e,
+ 0x07, 0xd5, 0x09, 0x9d, 0x07, 0xd5, 0x00, 0x88, 0x07, 0xd4, 0xf9, 0x87,
+ 0x07, 0xd4, 0xf1, 0x86, 0x07, 0xd4, 0xe9, 0x85, 0x07, 0xd4, 0xe1, 0x84,
+ 0x07, 0xd4, 0xd9, 0x83, 0x07, 0xd4, 0xd1, 0xa6, 0x07, 0xd4, 0xc9, 0xa5,
+ 0x07, 0xd4, 0xc1, 0xa4, 0x07, 0xd4, 0xb9, 0xa3, 0x07, 0xd4, 0xb1, 0xa2,
+ 0x07, 0xd4, 0xa9, 0xa1, 0x07, 0xd4, 0xa1, 0xa0, 0x07, 0xd4, 0x99, 0x9f,
+ 0x07, 0xd4, 0x91, 0x9e, 0x07, 0xd4, 0x89, 0x9d, 0x07, 0xd4, 0x80, 0x88,
+ 0x07, 0xd4, 0x79, 0x87, 0x07, 0xd4, 0x71, 0x86, 0x07, 0xd4, 0x69, 0x85,
+ 0x07, 0xd4, 0x61, 0x84, 0x07, 0xd4, 0x59, 0x83, 0x07, 0xd4, 0x51, 0xa6,
+ 0x07, 0xd4, 0x49, 0xa5, 0x07, 0xd4, 0x41, 0xa4, 0x07, 0xd4, 0x39, 0xa3,
+ 0x07, 0xd4, 0x31, 0xa2, 0x07, 0xd4, 0x29, 0xa1, 0x07, 0xd4, 0x21, 0xa0,
+ 0x07, 0xd4, 0x19, 0x9f, 0x07, 0xd4, 0x11, 0x9e, 0x07, 0xd4, 0x09, 0x9d,
+ 0x07, 0xd4, 0x00, 0x86, 0x07, 0xd3, 0xe9, 0x85, 0x07, 0xd3, 0xe1, 0x84,
+ 0x07, 0xd3, 0xd9, 0x83, 0x07, 0xd3, 0xd1, 0xa6, 0x07, 0xd3, 0xc9, 0xa5,
+ 0x07, 0xd3, 0xc1, 0xa4, 0x07, 0xd3, 0xb9, 0xa3, 0x07, 0xd3, 0xb1, 0xa2,
+ 0x07, 0xd3, 0xa9, 0xa1, 0x07, 0xd3, 0xa1, 0xa0, 0x07, 0xd3, 0x99, 0x9f,
+ 0x07, 0xd3, 0x91, 0x9e, 0x07, 0xd3, 0x89, 0x9d, 0x07, 0xd3, 0x81, 0x87,
+ 0x07, 0xd3, 0xf1, 0x88, 0x07, 0xd3, 0xf8, 0x86, 0x07, 0xd3, 0x69, 0x85,
+ 0x07, 0xd3, 0x61, 0x84, 0x07, 0xd3, 0x59, 0x83, 0x07, 0xd3, 0x51, 0xa6,
+ 0x07, 0xd3, 0x49, 0xa5, 0x07, 0xd3, 0x41, 0xa4, 0x07, 0xd3, 0x39, 0xa3,
+ 0x07, 0xd3, 0x31, 0xa2, 0x07, 0xd3, 0x29, 0xa1, 0x07, 0xd3, 0x21, 0xa0,
+ 0x07, 0xd3, 0x19, 0x9f, 0x07, 0xd3, 0x11, 0x9e, 0x07, 0xd3, 0x09, 0x9d,
+ 0x07, 0xd3, 0x00, 0x88, 0x07, 0xd2, 0xf9, 0x87, 0x07, 0xd2, 0xf1, 0x86,
+ 0x07, 0xd2, 0xe9, 0x85, 0x07, 0xd2, 0xe1, 0x84, 0x07, 0xd2, 0xd9, 0x83,
+ 0x07, 0xd2, 0xd1, 0xa6, 0x07, 0xd2, 0xc9, 0xa5, 0x07, 0xd2, 0xc1, 0xa4,
+ 0x07, 0xd2, 0xb9, 0xa3, 0x07, 0xd2, 0xb1, 0xa2, 0x07, 0xd2, 0xa9, 0xa1,
+ 0x07, 0xd2, 0xa1, 0xa0, 0x07, 0xd2, 0x99, 0x9f, 0x07, 0xd2, 0x91, 0x9e,
+ 0x07, 0xd2, 0x89, 0x9d, 0x07, 0xd2, 0x80, 0x88, 0x07, 0xd2, 0x79, 0x87,
+ 0x07, 0xd2, 0x71, 0x86, 0x07, 0xd2, 0x69, 0x85, 0x07, 0xd2, 0x61, 0x84,
+ 0x07, 0xd2, 0x59, 0x83, 0x07, 0xd2, 0x51, 0xa6, 0x07, 0xd2, 0x49, 0xa5,
+ 0x07, 0xd2, 0x41, 0xa4, 0x07, 0xd2, 0x39, 0xa3, 0x07, 0xd2, 0x31, 0xa2,
+ 0x07, 0xd2, 0x29, 0xa1, 0x07, 0xd2, 0x21, 0xa0, 0x07, 0xd2, 0x19, 0x9f,
+ 0x07, 0xd2, 0x11, 0x9d, 0x07, 0xd2, 0x01, 0x9e, 0x07, 0xd2, 0x08, 0x88,
+ 0x07, 0xd1, 0xf9, 0x87, 0x07, 0xd1, 0xf1, 0x86, 0x07, 0xd1, 0xe9, 0x85,
+ 0x07, 0xd1, 0xe1, 0x84, 0x07, 0xd1, 0xd9, 0x83, 0x07, 0xd1, 0xd1, 0xa6,
+ 0x07, 0xd1, 0xc9, 0xa5, 0x07, 0xd1, 0xc1, 0xa4, 0x07, 0xd1, 0xb9, 0xa3,
+ 0x07, 0xd1, 0xb1, 0xa2, 0x07, 0xd1, 0xa9, 0xa1, 0x07, 0xd1, 0xa1, 0xa0,
+ 0x07, 0xd1, 0x99, 0x9f, 0x07, 0xd1, 0x91, 0x9e, 0x07, 0xd1, 0x89, 0x9d,
+ 0x07, 0xd1, 0x80, 0x88, 0x07, 0xd1, 0x79, 0x87, 0x07, 0xd1, 0x71, 0x86,
+ 0x07, 0xd1, 0x69, 0x85, 0x07, 0xd1, 0x61, 0x84, 0x07, 0xd1, 0x59, 0x83,
+ 0x07, 0xd1, 0x51, 0xa6, 0x07, 0xd1, 0x49, 0xa5, 0x07, 0xd1, 0x41, 0xa4,
+ 0x07, 0xd1, 0x39, 0xa3, 0x07, 0xd1, 0x31, 0xa2, 0x07, 0xd1, 0x29, 0xa1,
+ 0x07, 0xd1, 0x21, 0xa0, 0x07, 0xd1, 0x19, 0x9f, 0x07, 0xd1, 0x11, 0x9e,
+ 0x07, 0xd1, 0x09, 0x9d, 0x07, 0xd1, 0x00, 0x88, 0x07, 0xd0, 0xf9, 0x87,
+ 0x07, 0xd0, 0xf1, 0x86, 0x07, 0xd0, 0xe9, 0x85, 0x07, 0xd0, 0xe1, 0x84,
+ 0x07, 0xd0, 0xd9, 0x83, 0x07, 0xd0, 0xd1, 0xa6, 0x07, 0xd0, 0xc9, 0xa5,
+ 0x07, 0xd0, 0xc1, 0xa4, 0x07, 0xd0, 0xb9, 0xa3, 0x07, 0xd0, 0xb1, 0xa2,
+ 0x07, 0xd0, 0xa9, 0xa1, 0x07, 0xd0, 0xa1, 0xa0, 0x07, 0xd0, 0x99, 0x9f,
+ 0x07, 0xd0, 0x91, 0x9e, 0x07, 0xd0, 0x89, 0x9d, 0x07, 0xd0, 0x80, 0x88,
+ 0x07, 0xd0, 0x79, 0x87, 0x07, 0xd0, 0x71, 0x86, 0x07, 0xd0, 0x69, 0x85,
+ 0x07, 0xd0, 0x61, 0x84, 0x07, 0xd0, 0x59, 0x83, 0x07, 0xd0, 0x51, 0xa6,
+ 0x07, 0xd0, 0x49, 0xa5, 0x07, 0xd0, 0x41, 0xa4, 0x07, 0xd0, 0x39, 0xa3,
+ 0x07, 0xd0, 0x31, 0xa2, 0x07, 0xd0, 0x29, 0xa1, 0x07, 0xd0, 0x21, 0xa0,
+ 0x07, 0xd0, 0x19, 0x9f, 0x07, 0xd0, 0x11, 0x9e, 0x07, 0xd0, 0x09, 0x9d,
+ 0x07, 0xd0, 0x00, 0x88, 0x07, 0xcf, 0xf9, 0x87, 0x07, 0xcf, 0xf1, 0x86,
+ 0x07, 0xcf, 0xe9, 0x85, 0x07, 0xcf, 0xe1, 0x84, 0x07, 0xcf, 0xd9, 0x83,
+ 0x07, 0xcf, 0xd1, 0xa6, 0x07, 0xcf, 0xc9, 0xa5, 0x07, 0xcf, 0xc1, 0xa4,
+ 0x07, 0xcf, 0xb9, 0xa3, 0x07, 0xcf, 0xb1, 0xa2, 0x07, 0xcf, 0xa9, 0xa1,
+ 0x07, 0xcf, 0xa1, 0xa0, 0x07, 0xcf, 0x99, 0x9f, 0x07, 0xcf, 0x91, 0x9e,
+ 0x07, 0xcf, 0x89, 0x9d, 0x07, 0xcf, 0x80, 0x88, 0x07, 0xcf, 0x79, 0x87,
+ 0x07, 0xcf, 0x71, 0x86, 0x07, 0xcf, 0x69, 0x85, 0x07, 0xcf, 0x61, 0x84,
+ 0x07, 0xcf, 0x59, 0x83, 0x07, 0xcf, 0x51, 0xa6, 0x07, 0xcf, 0x49, 0xa5,
+ 0x07, 0xcf, 0x41, 0xa4, 0x07, 0xcf, 0x39, 0xa3, 0x07, 0xcf, 0x31, 0xa2,
+ 0x07, 0xcf, 0x29, 0xa1, 0x07, 0xcf, 0x21, 0xa0, 0x07, 0xcf, 0x19, 0x9f,
+ 0x07, 0xcf, 0x11, 0x9e, 0x07, 0xcf, 0x09, 0x9d, 0x07, 0xcf, 0x00, 0x88,
+ 0x07, 0xce, 0xf9, 0x87, 0x07, 0xce, 0xf1, 0x86, 0x07, 0xce, 0xe9, 0x85,
+ 0x07, 0xce, 0xe1, 0x84, 0x07, 0xce, 0xd9, 0x83, 0x07, 0xce, 0xd1, 0xa6,
+ 0x07, 0xce, 0xc9, 0xa5, 0x07, 0xce, 0xc1, 0xa4, 0x07, 0xce, 0xb9, 0xa3,
+ 0x07, 0xce, 0xb1, 0xa2, 0x07, 0xce, 0xa9, 0xa1, 0x07, 0xce, 0xa1, 0xa0,
+ 0x07, 0xce, 0x99, 0x9f, 0x07, 0xce, 0x91, 0x9e, 0x07, 0xce, 0x89, 0x9d,
+ 0x07, 0xce, 0x80, 0x88, 0x07, 0xce, 0x79, 0x87, 0x07, 0xce, 0x71, 0x86,
+ 0x07, 0xce, 0x69, 0x85, 0x07, 0xce, 0x61, 0x84, 0x07, 0xce, 0x59, 0x83,
+ 0x07, 0xce, 0x51, 0xa6, 0x07, 0xce, 0x49, 0xa5, 0x07, 0xce, 0x41, 0xa4,
+ 0x07, 0xce, 0x39, 0xa3, 0x07, 0xce, 0x31, 0xa2, 0x07, 0xce, 0x29, 0xa1,
+ 0x07, 0xce, 0x21, 0xa0, 0x07, 0xce, 0x19, 0x9f, 0x07, 0xce, 0x11, 0x9e,
+ 0x07, 0xce, 0x09, 0x9d, 0x07, 0xce, 0x00, 0x88, 0x07, 0xcd, 0xf9, 0x87,
+ 0x07, 0xcd, 0xf1, 0x86, 0x07, 0xcd, 0xe9, 0x85, 0x07, 0xcd, 0xe1, 0x84,
+ 0x07, 0xcd, 0xd9, 0x83, 0x07, 0xcd, 0xd1, 0xa6, 0x07, 0xcd, 0xc9, 0xa5,
+ 0x07, 0xcd, 0xc1, 0xa4, 0x07, 0xcd, 0xb9, 0xa3, 0x07, 0xcd, 0xb1, 0xa2,
+ 0x07, 0xcd, 0xa9, 0xa1, 0x07, 0xcd, 0xa1, 0xa0, 0x07, 0xcd, 0x99, 0x9f,
+ 0x07, 0xcd, 0x91, 0x9e, 0x07, 0xcd, 0x89, 0x9d, 0x07, 0xcd, 0x80, 0x88,
+ 0x07, 0xcd, 0x79, 0x87, 0x07, 0xcd, 0x71, 0x86, 0x07, 0xcd, 0x69, 0x85,
+ 0x07, 0xcd, 0x61, 0x84, 0x07, 0xcd, 0x59, 0x83, 0x07, 0xcd, 0x51, 0xa6,
+ 0x07, 0xcd, 0x49, 0xa5, 0x07, 0xcd, 0x41, 0xa4, 0x07, 0xcd, 0x39, 0xa3,
+ 0x07, 0xcd, 0x31, 0xa2, 0x07, 0xcd, 0x29, 0xa1, 0x07, 0xcd, 0x21, 0xa0,
+ 0x07, 0xcd, 0x19, 0x9f, 0x07, 0xcd, 0x11, 0x9e, 0x07, 0xcd, 0x09, 0x9d,
+ 0x07, 0xcd, 0x00, 0x88, 0x07, 0xcc, 0xf9, 0x87, 0x07, 0xcc, 0xf1, 0x86,
+ 0x07, 0xcc, 0xe9, 0x85, 0x07, 0xcc, 0xe1, 0x84, 0x07, 0xcc, 0xd9, 0x83,
+ 0x07, 0xcc, 0xd1, 0xa6, 0x07, 0xcc, 0xc9, 0xa5, 0x07, 0xcc, 0xc1, 0xa4,
+ 0x07, 0xcc, 0xb9, 0xa3, 0x07, 0xcc, 0xb1, 0xa2, 0x07, 0xcc, 0xa9, 0xa1,
+ 0x07, 0xcc, 0xa1, 0xa0, 0x07, 0xcc, 0x99, 0x9f, 0x07, 0xcc, 0x91, 0x9e,
+ 0x07, 0xcc, 0x89, 0x9d, 0x07, 0xcc, 0x80, 0x88, 0x07, 0xcc, 0x79, 0x87,
+ 0x07, 0xcc, 0x71, 0x86, 0x07, 0xcc, 0x69, 0x85, 0x07, 0xcc, 0x61, 0x84,
+ 0x07, 0xcc, 0x59, 0x83, 0x07, 0xcc, 0x51, 0xa6, 0x07, 0xcc, 0x49, 0xa5,
+ 0x07, 0xcc, 0x41, 0xa4, 0x07, 0xcc, 0x39, 0xa3, 0x07, 0xcc, 0x31, 0xa2,
+ 0x07, 0xcc, 0x29, 0xa1, 0x07, 0xcc, 0x21, 0xa0, 0x07, 0xcc, 0x19, 0x9f,
+ 0x07, 0xcc, 0x11, 0x9e, 0x07, 0xcc, 0x09, 0x9d, 0x07, 0xcc, 0x00, 0x88,
+ 0x07, 0xcb, 0xf9, 0x87, 0x07, 0xcb, 0xf1, 0x86, 0x07, 0xcb, 0xe9, 0x85,
+ 0x07, 0xcb, 0xe1, 0x84, 0x07, 0xcb, 0xd9, 0x83, 0x07, 0xcb, 0xd1, 0xa6,
+ 0x07, 0xcb, 0xc9, 0xa5, 0x07, 0xcb, 0xc1, 0xa4, 0x07, 0xcb, 0xb9, 0xa3,
+ 0x07, 0xcb, 0xb1, 0xa2, 0x07, 0xcb, 0xa9, 0xa1, 0x07, 0xcb, 0xa1, 0xa0,
+ 0x07, 0xcb, 0x99, 0x9f, 0x07, 0xcb, 0x91, 0x9e, 0x07, 0xcb, 0x89, 0x9d,
+ 0x07, 0xcb, 0x80, 0x88, 0x07, 0xcb, 0x79, 0x87, 0x07, 0xcb, 0x71, 0x86,
+ 0x07, 0xcb, 0x69, 0x85, 0x07, 0xcb, 0x61, 0x84, 0x07, 0xcb, 0x59, 0x83,
+ 0x07, 0xcb, 0x51, 0xa6, 0x07, 0xcb, 0x49, 0xa5, 0x07, 0xcb, 0x41, 0xa4,
+ 0x07, 0xcb, 0x39, 0xa3, 0x07, 0xcb, 0x31, 0xa2, 0x07, 0xcb, 0x29, 0xa1,
+ 0x07, 0xcb, 0x21, 0xa0, 0x07, 0xcb, 0x19, 0x9f, 0x07, 0xcb, 0x11, 0x9e,
+ 0x07, 0xcb, 0x09, 0x9d, 0x07, 0xcb, 0x00, 0x88, 0x07, 0xca, 0xf9, 0x87,
+ 0x07, 0xca, 0xf1, 0x86, 0x07, 0xca, 0xe9, 0x85, 0x07, 0xca, 0xe1, 0x84,
+ 0x07, 0xca, 0xd9, 0x83, 0x07, 0xca, 0xd1, 0xa6, 0x07, 0xca, 0xc9, 0xa5,
+ 0x07, 0xca, 0xc1, 0xa4, 0x07, 0xca, 0xb9, 0xa3, 0x07, 0xca, 0xb1, 0xa2,
+ 0x07, 0xca, 0xa9, 0xa1, 0x07, 0xca, 0xa1, 0xa0, 0x07, 0xca, 0x99, 0x9f,
+ 0x07, 0xca, 0x91, 0x9e, 0x07, 0xca, 0x89, 0x9d, 0x07, 0xca, 0x80, 0x88,
+ 0x07, 0xca, 0x79, 0x87, 0x07, 0xca, 0x71, 0x86, 0x07, 0xca, 0x69, 0x85,
+ 0x07, 0xca, 0x61, 0x84, 0x07, 0xca, 0x59, 0x83, 0x07, 0xca, 0x51, 0xa6,
+ 0x07, 0xca, 0x49, 0xa5, 0x07, 0xca, 0x41, 0xa4, 0x07, 0xca, 0x39, 0xa3,
+ 0x07, 0xca, 0x31, 0xa2, 0x07, 0xca, 0x29, 0xa1, 0x07, 0xca, 0x21, 0xa0,
+ 0x07, 0xca, 0x19, 0x9f, 0x07, 0xca, 0x11, 0x9e, 0x07, 0xca, 0x09, 0x9d,
+ 0x07, 0xca, 0x00, 0x88, 0x07, 0xc9, 0xf9, 0x87, 0x07, 0xc9, 0xf1, 0x86,
+ 0x07, 0xc9, 0xe9, 0x85, 0x07, 0xc9, 0xe1, 0x84, 0x07, 0xc9, 0xd9, 0x83,
+ 0x07, 0xc9, 0xd1, 0xa6, 0x07, 0xc9, 0xc9, 0xa5, 0x07, 0xc9, 0xc1, 0xa4,
+ 0x07, 0xc9, 0xb9, 0xa3, 0x07, 0xc9, 0xb1, 0xa2, 0x07, 0xc9, 0xa9, 0xa1,
+ 0x07, 0xc9, 0xa1, 0xa0, 0x07, 0xc9, 0x99, 0x9d, 0x07, 0xc9, 0x81, 0x9e,
+ 0x07, 0xc9, 0x89, 0x9f, 0x07, 0xc9, 0x90, 0xa4, 0x07, 0xc9, 0x39, 0xa3,
+ 0x07, 0xc9, 0x31, 0xa2, 0x07, 0xc9, 0x29, 0xa1, 0x07, 0xc9, 0x21, 0xa0,
+ 0x07, 0xc9, 0x19, 0x9f, 0x07, 0xc9, 0x11, 0x9d, 0x07, 0xc9, 0x01, 0x9e,
+ 0x07, 0xc9, 0x09, 0xa5, 0x07, 0xc9, 0x41, 0xa6, 0x07, 0xc9, 0x49, 0x83,
+ 0x07, 0xc9, 0x51, 0x84, 0x07, 0xc9, 0x59, 0x85, 0x07, 0xc9, 0x61, 0x86,
+ 0x07, 0xc9, 0x69, 0x87, 0x07, 0xc9, 0x71, 0x88, 0x07, 0xc9, 0x78, 0x86,
+ 0x07, 0xc8, 0xe9, 0x85, 0x07, 0xc8, 0xe1, 0x84, 0x07, 0xc8, 0xd9, 0x83,
+ 0x07, 0xc8, 0xd1, 0xa6, 0x07, 0xc8, 0xc9, 0xa5, 0x07, 0xc8, 0xc1, 0xa4,
+ 0x07, 0xc8, 0xb9, 0xa3, 0x07, 0xc8, 0xb1, 0xa2, 0x07, 0xc8, 0xa9, 0xa1,
+ 0x07, 0xc8, 0xa1, 0xa0, 0x07, 0xc8, 0x99, 0x9f, 0x07, 0xc8, 0x91, 0x9e,
+ 0x07, 0xc8, 0x89, 0x9d, 0x07, 0xc8, 0x81, 0x87, 0x07, 0xc8, 0xf1, 0x88,
+ 0x07, 0xc8, 0xf8, 0x88, 0x07, 0xc8, 0x79, 0x87, 0x07, 0xc8, 0x71, 0x86,
+ 0x07, 0xc8, 0x69, 0x85, 0x07, 0xc8, 0x61, 0x84, 0x07, 0xc8, 0x59, 0x83,
+ 0x07, 0xc8, 0x51, 0xa6, 0x07, 0xc8, 0x49, 0xa5, 0x07, 0xc8, 0x41, 0xa4,
+ 0x07, 0xc8, 0x39, 0xa3, 0x07, 0xc8, 0x31, 0xa2, 0x07, 0xc8, 0x29, 0xa1,
+ 0x07, 0xc8, 0x21, 0xa0, 0x07, 0xc8, 0x19, 0x9d, 0x07, 0xc8, 0x01, 0x9e,
+ 0x07, 0xc8, 0x09, 0x9f, 0x07, 0xc8, 0x10, 0xc3, 0xa6, 0x59, 0x01, 0x75,
+ 0x81, 0xc2, 0x02, 0xe0, 0x01, 0x76, 0x29, 0xc5, 0x6f, 0xb7, 0x01, 0x76,
+ 0x41, 0xc4, 0x08, 0x92, 0x01, 0x76, 0x49, 0xc3, 0x07, 0xe5, 0x01, 0x77,
+ 0x38, 0xc3, 0x1a, 0x7c, 0x01, 0x76, 0x81, 0xc3, 0x00, 0xfe, 0x01, 0x76,
+ 0xa0, 0xc3, 0x08, 0x48, 0x01, 0x76, 0x99, 0xc3, 0x47, 0x24, 0x01, 0x76,
+ 0xd0, 0xcd, 0x7f, 0x4c, 0x01, 0x76, 0xc9, 0xc4, 0xe4, 0x33, 0x01, 0x77,
+ 0x71, 0xc5, 0xd5, 0x65, 0x01, 0x77, 0x98, 0xc2, 0x00, 0x35, 0x01, 0x76,
+ 0xe1, 0xc3, 0x04, 0x5a, 0x01, 0x77, 0x29, 0xc3, 0x23, 0x6d, 0x01, 0x77,
+ 0x50, 0xc2, 0x00, 0xfe, 0x01, 0x77, 0x01, 0xc3, 0x18, 0x11, 0x01, 0x77,
+ 0x60, 0xc3, 0x05, 0x14, 0x01, 0x74, 0x11, 0x16, 0x42, 0x1a, 0x7a, 0xc3,
+ 0x05, 0x14, 0x01, 0x74, 0xa1, 0xc3, 0x02, 0x9f, 0x01, 0x74, 0xa8, 0x0a,
+ 0xc2, 0x1a, 0x86, 0x19, 0xc2, 0x1a, 0x92, 0xc6, 0xc6, 0x9b, 0x01, 0x77,
+ 0x48, 0xc2, 0x02, 0xa0, 0x01, 0x74, 0x79, 0xc4, 0x02, 0xde, 0x01, 0x74,
+ 0x80, 0xc3, 0x05, 0x14, 0x01, 0x74, 0xb1, 0xc3, 0x02, 0x9f, 0x01, 0x74,
+ 0xb8, 0xc3, 0x05, 0x14, 0x01, 0x76, 0xa9, 0xc3, 0x02, 0x9f, 0x01, 0x76,
+ 0xb0, 0xc3, 0x05, 0x14, 0x01, 0x75, 0x09, 0xc3, 0x02, 0x9f, 0x01, 0x75,
+ 0x10, 0xc3, 0x05, 0x14, 0x01, 0x76, 0x69, 0xc3, 0x02, 0x9f, 0x01, 0x76,
+ 0x70, 0xc4, 0xe4, 0x33, 0x01, 0x77, 0x69, 0xc5, 0xd5, 0x65, 0x01, 0x77,
+ 0x90, 0xc2, 0x02, 0xa0, 0x01, 0x76, 0xf1, 0xc4, 0x02, 0xde, 0x01, 0x76,
+ 0xf8, 0xc2, 0x02, 0xa0, 0x01, 0x75, 0xf9, 0xc4, 0x02, 0xde, 0x01, 0x76,
+ 0x00, 0x92, 0x01, 0x8e, 0x59, 0x9c, 0x01, 0x8e, 0x72, 0x02, 0x1a, 0x9e,
+ 0x89, 0x01, 0x8e, 0x40, 0x09, 0xc2, 0x1a, 0xa2, 0x98, 0x05, 0x5b, 0xa9,
+ 0x97, 0x05, 0x5b, 0xa1, 0x91, 0x05, 0x5b, 0x99, 0x8b, 0x05, 0x5b, 0x91,
+ 0x87, 0x05, 0x5b, 0x89, 0x83, 0x05, 0x5b, 0x81, 0x1b, 0xc2, 0x1a, 0xba,
+ 0x19, 0xc2, 0x1a, 0xd2, 0x16, 0xc2, 0x1a, 0xea, 0x10, 0xc2, 0x1a, 0xfe,
+ 0x0a, 0xc2, 0x1b, 0x19, 0x0f, 0xc2, 0x1b, 0x37, 0x0e, 0xc2, 0x1b, 0x4f,
+ 0xc2, 0x02, 0x2b, 0x05, 0x5b, 0xb9, 0x42, 0x00, 0xe3, 0xc2, 0x1b, 0x67,
+ 0x95, 0x05, 0x5c, 0xeb, 0x02, 0x1b, 0x7f, 0x06, 0x42, 0x1b, 0x97, 0x83,
+ 0x00, 0x9d, 0x01, 0x87, 0x00, 0x9d, 0x09, 0x8b, 0x00, 0x9d, 0x11, 0x91,
+ 0x00, 0x9d, 0x19, 0x97, 0x00, 0x9d, 0x21, 0x98, 0x00, 0x9d, 0x29, 0x09,
+ 0xc2, 0x1b, 0xb5, 0xc2, 0x02, 0x2b, 0x00, 0x9d, 0x39, 0x0a, 0xc2, 0x1b,
+ 0xcd, 0x0e, 0xc2, 0x1b, 0xeb, 0x0f, 0xc2, 0x1c, 0x03, 0x10, 0xc2, 0x1c,
+ 0x1b, 0x42, 0x00, 0xe3, 0xc2, 0x1c, 0x36, 0x95, 0x00, 0x9e, 0x6b, 0x02,
+ 0x1c, 0x4e, 0x06, 0xc2, 0x1c, 0x66, 0x16, 0xc2, 0x1c, 0x84, 0x19, 0xc2,
+ 0x1c, 0x98, 0x1b, 0x42, 0x1c, 0xb0, 0x00, 0x42, 0x1c, 0xc8, 0xcd, 0x77,
+ 0x94, 0x0f, 0xa5, 0xc8, 0xc3, 0x39, 0x6e, 0x08, 0x8a, 0x21, 0xc2, 0x04,
+ 0xc6, 0x08, 0x89, 0x18, 0xc2, 0x04, 0xc6, 0x08, 0x89, 0x09, 0xc3, 0xa9,
+ 0x9c, 0x08, 0x89, 0x00, 0xc3, 0x39, 0x6e, 0x08, 0x88, 0xf1, 0xc2, 0x04,
+ 0xc6, 0x08, 0x88, 0xe8, 0xc3, 0x39, 0x6e, 0x08, 0x88, 0xe1, 0xc2, 0x04,
+ 0xc6, 0x08, 0x88, 0xd8, 0xc2, 0x04, 0xc6, 0x08, 0x88, 0xd1, 0xc3, 0x3c,
+ 0x8a, 0x08, 0x88, 0xa9, 0xc3, 0xa9, 0x9c, 0x08, 0x88, 0x81, 0xc3, 0x4f,
+ 0x37, 0x08, 0x88, 0x58, 0xc3, 0x39, 0x6e, 0x08, 0x88, 0xc9, 0xc2, 0x04,
+ 0xc6, 0x08, 0x88, 0xc1, 0x06, 0x42, 0x1c, 0xd4, 0xc3, 0x39, 0x6e, 0x08,
+ 0x88, 0xb9, 0xc2, 0x04, 0xc6, 0x08, 0x88, 0xb1, 0x16, 0x42, 0x1c, 0xe0,
+ 0xc3, 0x39, 0x6e, 0x08, 0x88, 0x79, 0xc2, 0x04, 0xc6, 0x08, 0x88, 0x70,
+ 0xc3, 0x39, 0x6e, 0x08, 0x88, 0x69, 0xc2, 0x04, 0xc6, 0x08, 0x88, 0x60,
+ 0xc3, 0x39, 0x6e, 0x08, 0x88, 0x51, 0xc2, 0x04, 0xc6, 0x08, 0x88, 0x48,
+ 0xc3, 0x39, 0x6e, 0x08, 0x88, 0x41, 0xc2, 0x04, 0xc6, 0x08, 0x88, 0x38,
+ 0x87, 0x08, 0x89, 0x63, 0x02, 0x1c, 0xec, 0x83, 0x08, 0x89, 0x3b, 0x02,
+ 0x1c, 0xf0, 0x91, 0x08, 0x89, 0x73, 0x02, 0x1c, 0xfc, 0x97, 0x08, 0x89,
+ 0x53, 0x02, 0x1d, 0x00, 0x8b, 0x08, 0x89, 0x42, 0x02, 0x1d, 0x04, 0xc4,
+ 0x26, 0x78, 0x08, 0x89, 0xf9, 0xc5, 0x06, 0xdb, 0x08, 0x89, 0xf1, 0x15,
+ 0xc2, 0x1d, 0x08, 0x08, 0xc2, 0x1d, 0x14, 0x16, 0xc2, 0x1d, 0x20, 0xc3,
+ 0x05, 0x14, 0x08, 0x89, 0xb9, 0xc4, 0x15, 0xe7, 0x08, 0x89, 0xb0, 0xc7,
+ 0x40, 0xe5, 0x08, 0x88, 0x11, 0xc8, 0x14, 0x38, 0x08, 0x88, 0x09, 0xcb,
+ 0x1e, 0x89, 0x08, 0x88, 0x00, 0x8a, 0x05, 0x52, 0x69, 0x8f, 0x05, 0x52,
+ 0x61, 0xc2, 0x00, 0x75, 0x05, 0x52, 0x18, 0x87, 0x05, 0x51, 0x90, 0x97,
+ 0x05, 0x51, 0x89, 0x8b, 0x05, 0x51, 0x81, 0x83, 0x05, 0x51, 0x48, 0x87,
+ 0x05, 0x51, 0x70, 0x8b, 0x05, 0x51, 0x58, 0x83, 0x05, 0x51, 0x39, 0xc2,
+ 0x0d, 0xf6, 0x05, 0x51, 0x30, 0x09, 0xc2, 0x1d, 0x2c, 0x83, 0x05, 0x50,
+ 0xc1, 0xc2, 0x0f, 0xe1, 0x05, 0x50, 0xb9, 0x0a, 0x42, 0x1d, 0x36, 0xc2,
+ 0x00, 0xd0, 0x05, 0x50, 0x49, 0x83, 0x05, 0x50, 0x40, 0xc2, 0x00, 0xd0,
+ 0x05, 0x50, 0x39, 0x83, 0x05, 0x50, 0x30, 0x8b, 0x05, 0x50, 0x20, 0xc2,
+ 0x00, 0xcc, 0x05, 0x52, 0x59, 0x8e, 0x05, 0x52, 0x51, 0x94, 0x05, 0x52,
+ 0x49, 0x9b, 0x05, 0x52, 0x41, 0x92, 0x05, 0x52, 0x39, 0x90, 0x05, 0x52,
+ 0x33, 0x02, 0x1d, 0x46, 0x96, 0x05, 0x52, 0x29, 0xc2, 0x11, 0xee, 0x05,
+ 0x52, 0x21, 0x89, 0x05, 0x52, 0x09, 0x8d, 0x05, 0x52, 0x00, 0xc2, 0x01,
+ 0x5d, 0x05, 0x51, 0x09, 0x83, 0x05, 0x50, 0xe9, 0xc2, 0x00, 0xd0, 0x05,
+ 0x50, 0xf0, 0x83, 0x05, 0x51, 0x01, 0xc2, 0x0f, 0xe1, 0x05, 0x50, 0xf8,
+ 0xc2, 0x00, 0xd0, 0x05, 0x50, 0xe1, 0xc2, 0x00, 0xb0, 0x05, 0x50, 0xd9,
+ 0x83, 0x05, 0x50, 0xd0, 0xc2, 0x0e, 0x9a, 0x05, 0x50, 0xc9, 0xc2, 0x00,
+ 0xd0, 0x05, 0x50, 0xb1, 0x83, 0x05, 0x50, 0xa8, 0xc2, 0x00, 0xd0, 0x05,
+ 0x50, 0xa1, 0x83, 0x05, 0x50, 0x98, 0xc2, 0x00, 0xd0, 0x05, 0x50, 0x79,
+ 0x83, 0x05, 0x50, 0x70, 0xc2, 0x00, 0xd0, 0x05, 0x50, 0x69, 0x83, 0x05,
+ 0x50, 0x60, 0xcb, 0x97, 0xf5, 0x05, 0x52, 0xf1, 0xc4, 0x19, 0x53, 0x05,
+ 0x52, 0xe8, 0xc4, 0x18, 0x10, 0x05, 0x52, 0xb9, 0xc2, 0x22, 0xcc, 0x05,
+ 0x52, 0xb0, 0xc3, 0x0d, 0x14, 0x05, 0x52, 0xa9, 0xc3, 0x09, 0x9e, 0x05,
+ 0x52, 0xa0, 0xc4, 0x02, 0xde, 0x05, 0x52, 0x99, 0xc2, 0x02, 0xa0, 0x05,
+ 0x52, 0x90, 0xc8, 0x0d, 0x03, 0x08, 0x7e, 0x58, 0x19, 0xc2, 0x1d, 0x4a,
+ 0xc2, 0x00, 0xc4, 0x08, 0x7e, 0x49, 0xc4, 0x02, 0xde, 0x08, 0x7e, 0x38,
+ 0xc3, 0x11, 0xef, 0x08, 0x7e, 0x19, 0xca, 0xa5, 0xf8, 0x08, 0x7d, 0x89,
+ 0xc5, 0xdc, 0x2c, 0x08, 0x7d, 0xf8, 0xc2, 0x00, 0x8e, 0x08, 0x7d, 0xc8,
+ 0xc4, 0x36, 0xb5, 0x08, 0x7d, 0x81, 0xc3, 0x16, 0x5a, 0x08, 0x7e, 0x00,
+ 0xc9, 0xad, 0x41, 0x01, 0x31, 0x49, 0xc8, 0xb8, 0xa2, 0x01, 0x31, 0x40,
+ 0xc5, 0xcb, 0xf4, 0x0f, 0xaa, 0x13, 0x02, 0x1d, 0x54, 0x4a, 0x9b, 0xf8,
+ 0x42, 0x1d, 0x5a, 0xe0, 0x0b, 0x07, 0x0f, 0x8c, 0x50, 0x08, 0xc2, 0x1d,
+ 0x66, 0x8b, 0x0f, 0x00, 0x5b, 0x02, 0x1d, 0x72, 0x04, 0xc2, 0x1d, 0x84,
+ 0x1b, 0xc2, 0x1d, 0x90, 0x15, 0xc2, 0x1d, 0xa2, 0xc6, 0x7b, 0xab, 0x0f,
+ 0x00, 0xe9, 0x16, 0xc2, 0x1d, 0xb2, 0xc4, 0xdf, 0x9f, 0x0f, 0x00, 0xc1,
+ 0xc3, 0xc5, 0x6f, 0x0f, 0x00, 0xb1, 0xc5, 0xd7, 0xb3, 0x0f, 0x00, 0x99,
+ 0xc6, 0xcb, 0xed, 0x0f, 0x00, 0x91, 0xc3, 0x06, 0xc5, 0x0f, 0x00, 0x89,
+ 0xc5, 0xdb, 0x78, 0x0f, 0x00, 0x81, 0xc7, 0x60, 0xdd, 0x0f, 0x00, 0x79,
+ 0xc7, 0xc4, 0x48, 0x0f, 0x00, 0x71, 0xc4, 0xe1, 0x6b, 0x0f, 0x00, 0x69,
+ 0x06, 0xc2, 0x1d, 0xbe, 0x1c, 0xc2, 0x1d, 0xca, 0xc7, 0xc1, 0xf5, 0x0f,
+ 0x00, 0x19, 0xc4, 0xdf, 0x83, 0x0f, 0x00, 0x11, 0xc3, 0xe5, 0x4b, 0x0f,
+ 0x00, 0x00, 0x44, 0x29, 0xb5, 0xc2, 0x1d, 0xd6, 0x03, 0x42, 0x1d, 0xf4,
+ 0xc5, 0x00, 0xd4, 0x01, 0x07, 0x81, 0xc5, 0x05, 0x02, 0x00, 0x1a, 0xc8,
+ 0xcc, 0x80, 0xc1, 0x01, 0x07, 0x39, 0x4c, 0x05, 0xf6, 0x42, 0x1e, 0x06,
+ 0xc5, 0x05, 0x02, 0x00, 0xef, 0xe9, 0xc5, 0x00, 0xd4, 0x00, 0x1a, 0x60,
+ 0x02, 0xc2, 0x1e, 0x12, 0x00, 0x42, 0x1e, 0x1e, 0x43, 0x00, 0x2e, 0xc2,
+ 0x1e, 0x2d, 0x43, 0x00, 0x75, 0x42, 0x1e, 0x35, 0x45, 0x01, 0xd5, 0xc2,
+ 0x1e, 0x47, 0xd2, 0x49, 0xf7, 0x00, 0x19, 0x10, 0x00, 0xc2, 0x1e, 0x53,
+ 0x46, 0x01, 0x4a, 0x42, 0x1e, 0x6f, 0x43, 0x00, 0x75, 0xc2, 0x1e, 0x7b,
+ 0xc6, 0x80, 0x30, 0x00, 0x19, 0x90, 0x4d, 0x29, 0xb9, 0xc2, 0x1e, 0x8b,
+ 0x55, 0x37, 0xac, 0x42, 0x1f, 0x0e, 0xde, 0x0f, 0x22, 0x00, 0xd5, 0xc9,
+ 0x46, 0x19, 0x9d, 0x42, 0x1f, 0x22, 0xcc, 0x86, 0x91, 0x01, 0x07, 0x49,
+ 0xd5, 0x32, 0xea, 0x00, 0xef, 0xc8, 0xc8, 0xb9, 0x8a, 0x01, 0x07, 0x41,
+ 0xcc, 0x83, 0x0d, 0x00, 0xd6, 0x59, 0xc3, 0x02, 0xa3, 0x00, 0xd5, 0xa0,
+ 0x00, 0x42, 0x1f, 0x34, 0x44, 0x00, 0x5a, 0xc2, 0x1f, 0x4c, 0x16, 0xc2,
+ 0x1f, 0x56, 0x42, 0x01, 0x48, 0x42, 0x1f, 0x60, 0xcb, 0x8f, 0xc0, 0x00,
+ 0xef, 0xd9, 0x49, 0xb4, 0x7f, 0x42, 0x1f, 0x6c, 0xc5, 0xd9, 0xf7, 0x00,
+ 0xd5, 0x89, 0xc6, 0x05, 0x01, 0x00, 0x19, 0x20, 0xd8, 0x24, 0x23, 0x01,
+ 0x07, 0x21, 0xc6, 0xce, 0xc3, 0x01, 0x07, 0x19, 0x15, 0xc2, 0x1f, 0x7e,
+ 0xc6, 0x02, 0xd1, 0x01, 0x06, 0xeb, 0x02, 0x1f, 0x8a, 0xc7, 0x3a, 0x19,
+ 0x01, 0x06, 0xf8, 0xcc, 0x89, 0x25, 0x01, 0x06, 0xc9, 0xcb, 0x02, 0x5c,
+ 0x01, 0x06, 0xa8, 0xcd, 0x33, 0xee, 0x00, 0x24, 0x49, 0x48, 0x0d, 0x04,
+ 0xc2, 0x1f, 0x90, 0x12, 0xc2, 0x1f, 0x9c, 0xce, 0x6c, 0xa6, 0x00, 0x24,
+ 0x29, 0x16, 0xc2, 0x1f, 0xac, 0x47, 0x02, 0x0e, 0xc2, 0x1f, 0xc1, 0xc5,
+ 0xda, 0x88, 0x05, 0x33, 0x79, 0xc6, 0x4a, 0x9f, 0x05, 0x33, 0xe0, 0xc6,
+ 0x05, 0x01, 0x00, 0x19, 0x68, 0xc3, 0x01, 0xe7, 0x00, 0x18, 0x63, 0x02,
+ 0x20, 0x2f, 0xc9, 0x1e, 0x8b, 0x00, 0x18, 0x80, 0x44, 0x0a, 0x8c, 0xc2,
+ 0x20, 0x35, 0xcf, 0x60, 0xa8, 0x07, 0xf1, 0x32, 0x02, 0x20, 0x44, 0xd5,
+ 0x36, 0x86, 0x01, 0x06, 0x99, 0x15, 0x42, 0x20, 0x4a, 0xcd, 0x7d, 0x92,
+ 0x00, 0xd6, 0x29, 0xc4, 0x05, 0x03, 0x00, 0x19, 0xd8, 0xe0, 0x08, 0x47,
+ 0x00, 0xd5, 0xd0, 0xc3, 0x0f, 0xbe, 0x00, 0x18, 0x33, 0x02, 0x20, 0x56,
+ 0x45, 0x32, 0xf5, 0x42, 0x20, 0x62, 0xc4, 0x00, 0x49, 0x00, 0xef, 0xb9,
+ 0xc5, 0x00, 0x2c, 0x00, 0xef, 0xb0, 0xd1, 0x2f, 0xfb, 0x01, 0x84, 0xc9,
+ 0xd6, 0x2f, 0xf6, 0x01, 0x84, 0xd0, 0x46, 0x9a, 0x3c, 0xc2, 0x20, 0x6e,
+ 0xd1, 0x3c, 0x67, 0x00, 0x1a, 0x70, 0x47, 0x1d, 0x71, 0xc2, 0x20, 0x7a,
+ 0xc6, 0x65, 0x43, 0x00, 0xd5, 0x90, 0xc6, 0x00, 0xd3, 0x00, 0xee, 0x70,
+ 0xc2, 0x00, 0xd1, 0x08, 0x1b, 0xb1, 0xc3, 0x63, 0x78, 0x08, 0x1b, 0xb9,
+ 0xc4, 0xde, 0xdb, 0x08, 0x1b, 0xc1, 0xc5, 0xdb, 0x6e, 0x08, 0x1b, 0xc9,
+ 0xc3, 0xe6, 0x0b, 0x08, 0x1b, 0xd0, 0x02, 0xc2, 0x20, 0x86, 0x00, 0x42,
+ 0x20, 0x98, 0xc5, 0x00, 0xd4, 0x00, 0xd6, 0x41, 0xc5, 0x05, 0x02, 0x00,
+ 0x18, 0xf8, 0x4a, 0x57, 0x93, 0xc2, 0x20, 0xb0, 0xd4, 0x3e, 0x08, 0x00,
+ 0x19, 0x08, 0xc5, 0x00, 0xd4, 0x00, 0x19, 0xe9, 0xc5, 0x05, 0x02, 0x00,
+ 0x1a, 0x98, 0xc5, 0x00, 0xd4, 0x00, 0x18, 0x69, 0xc5, 0x05, 0x02, 0x00,
+ 0x19, 0x48, 0xc4, 0x26, 0x78, 0x0e, 0x9b, 0x89, 0xc5, 0x06, 0xdb, 0x0e,
+ 0x9b, 0x81, 0x15, 0xc2, 0x20, 0xc2, 0x08, 0xc2, 0x20, 0xce, 0x16, 0xc2,
+ 0x20, 0xda, 0xc3, 0x05, 0x14, 0x0e, 0x9b, 0x48, 0xc4, 0x26, 0x78, 0x0e,
+ 0x9b, 0x41, 0xc5, 0x06, 0xdb, 0x0e, 0x9b, 0x39, 0x15, 0xc2, 0x20, 0xe6,
+ 0x08, 0xc2, 0x20, 0xf2, 0x16, 0xc2, 0x20, 0xfe, 0xc3, 0x05, 0x14, 0x0e,
+ 0x9b, 0x00, 0xc7, 0x80, 0x70, 0x01, 0x17, 0xe9, 0x48, 0x00, 0x5f, 0xc2,
+ 0x21, 0x0a, 0xd6, 0x2c, 0x86, 0x01, 0x17, 0xd0, 0xcf, 0x4c, 0x01, 0x01,
+ 0x15, 0x9b, 0x02, 0x21, 0x10, 0xc6, 0x00, 0x4e, 0x01, 0x10, 0x58, 0x0d,
+ 0xc2, 0x21, 0x16, 0x0a, 0xc2, 0x21, 0x26, 0x42, 0x01, 0x30, 0xc2, 0x21,
+ 0x32, 0x15, 0xc2, 0x21, 0x3e, 0x06, 0xc2, 0x21, 0x54, 0x03, 0xc2, 0x21,
+ 0x66, 0xc4, 0xdf, 0x33, 0x01, 0x64, 0x19, 0xc3, 0xd1, 0x8c, 0x01, 0x64,
+ 0x49, 0xc4, 0xde, 0xdb, 0x01, 0x64, 0x69, 0x16, 0xc2, 0x21, 0x72, 0xc5,
+ 0xd8, 0xcb, 0x01, 0x64, 0x99, 0x0e, 0xc2, 0x21, 0x7e, 0xc2, 0x02, 0x2f,
+ 0x01, 0x64, 0xc9, 0xc2, 0x00, 0xec, 0x01, 0x64, 0xd9, 0x91, 0x01, 0x64,
+ 0xfb, 0x02, 0x21, 0x8a, 0x12, 0xc2, 0x21, 0x96, 0xc2, 0x00, 0x79, 0x01,
+ 0x65, 0x19, 0xc2, 0x00, 0xe4, 0x01, 0x65, 0x49, 0x08, 0xc2, 0x21, 0xa0,
+ 0x42, 0x07, 0x2f, 0xc2, 0x21, 0xaa, 0xcd, 0x7c, 0x40, 0x01, 0x67, 0x98,
+ 0x0d, 0xc2, 0x21, 0xb6, 0xc5, 0xda, 0xb5, 0x01, 0x67, 0x29, 0xc5, 0xd8,
+ 0x08, 0x01, 0x67, 0x31, 0x15, 0xc2, 0x21, 0xc2, 0xc6, 0xd1, 0x45, 0x01,
+ 0x67, 0x40, 0x0a, 0xc2, 0x21, 0xce, 0x42, 0x01, 0x30, 0xc2, 0x21, 0xda,
+ 0x15, 0xc2, 0x21, 0xe6, 0x06, 0xc2, 0x21, 0xfc, 0x03, 0xc2, 0x22, 0x0e,
+ 0xc4, 0xdf, 0x33, 0x01, 0x64, 0x11, 0xc3, 0xd1, 0x8c, 0x01, 0x64, 0x41,
+ 0xc4, 0xde, 0xdb, 0x01, 0x64, 0x61, 0x16, 0xc2, 0x22, 0x1a, 0xc5, 0xd8,
+ 0xcb, 0x01, 0x64, 0x91, 0x0d, 0xc2, 0x22, 0x26, 0x0e, 0xc2, 0x22, 0x36,
+ 0xc2, 0x02, 0x2f, 0x01, 0x64, 0xc1, 0xc2, 0x00, 0xec, 0x01, 0x64, 0xd1,
+ 0x91, 0x01, 0x64, 0xf3, 0x02, 0x22, 0x42, 0x12, 0xc2, 0x22, 0x4e, 0xc2,
+ 0x00, 0x79, 0x01, 0x65, 0x11, 0xc2, 0x00, 0xe4, 0x01, 0x65, 0x41, 0x08,
+ 0xc2, 0x22, 0x58, 0x42, 0x07, 0x2f, 0xc2, 0x22, 0x62, 0xcd, 0x7c, 0x40,
+ 0x01, 0x67, 0x90, 0xc8, 0xbb, 0xa2, 0x01, 0x67, 0x79, 0x49, 0xac, 0x21,
+ 0x42, 0x22, 0x6e, 0xc3, 0x05, 0x14, 0x08, 0x17, 0x09, 0x16, 0xc2, 0x22,
+ 0x7a, 0x08, 0xc2, 0x22, 0x86, 0x15, 0xc2, 0x22, 0x92, 0xc5, 0x06, 0xdb,
+ 0x08, 0x17, 0x41, 0xc4, 0x26, 0x78, 0x08, 0x17, 0x48, 0x16, 0xc2, 0x22,
+ 0x9e, 0x08, 0xc2, 0x22, 0xac, 0x15, 0xc2, 0x22, 0xb4, 0x45, 0x06, 0xdb,
+ 0xc2, 0x22, 0xc0, 0x44, 0x26, 0x78, 0xc2, 0x22, 0xca, 0xcb, 0x0d, 0x00,
+ 0x08, 0x17, 0x98, 0xcb, 0x9a, 0x5d, 0x0f, 0xa7, 0x59, 0xcc, 0x81, 0x81,
+ 0x0f, 0xa7, 0x50, 0xc7, 0x57, 0x8b, 0x0f, 0x98, 0x11, 0xd0, 0x59, 0x82,
+ 0x01, 0x52, 0x62, 0x02, 0x22, 0xd6, 0xc4, 0x0e, 0xa6, 0x01, 0x56, 0x7b,
+ 0x02, 0x22, 0xdc, 0xc6, 0x2d, 0xd0, 0x01, 0x56, 0x82, 0x02, 0x22, 0xe2,
+ 0xcf, 0x62, 0xe2, 0x01, 0x11, 0x91, 0xd2, 0x4e, 0x77, 0x01, 0x4a, 0x08,
+ 0xd3, 0x44, 0x7c, 0x01, 0x0d, 0xb9, 0xe0, 0x0b, 0xa7, 0x01, 0x5b, 0x70,
+ 0xdb, 0x16, 0xda, 0x0f, 0xae, 0xc1, 0x46, 0x01, 0x4a, 0x42, 0x22, 0xe8,
+ 0xe0, 0x09, 0x07, 0x0f, 0xa8, 0x18, 0x19, 0xc2, 0x22, 0xf1, 0x42, 0x00,
+ 0xc4, 0xc2, 0x22, 0xfb, 0x44, 0x02, 0xde, 0x42, 0x23, 0x07, 0x45, 0x66,
+ 0xb1, 0xc2, 0x23, 0x13, 0x44, 0x00, 0x2d, 0x42, 0x23, 0x1f, 0xc7, 0xc1,
+ 0xd9, 0x0f, 0xab, 0x21, 0xc7, 0xc7, 0x6d, 0x0f, 0xaa, 0xc0, 0x44, 0x18,
+ 0x10, 0xc2, 0x23, 0x2b, 0x42, 0x22, 0xcc, 0x42, 0x23, 0x37, 0x43, 0x0d,
+ 0x14, 0xc2, 0x23, 0x43, 0x43, 0x09, 0x9e, 0x42, 0x23, 0x4f, 0xc7, 0xc1,
+ 0xd9, 0x0f, 0xaa, 0xe1, 0xc7, 0xc7, 0x6d, 0x0f, 0xaa, 0x80, 0x44, 0x0d,
+ 0x21, 0xc2, 0x23, 0x5b, 0xd8, 0x02, 0xef, 0x0f, 0x8b, 0x71, 0x85, 0x0f,
+ 0x8b, 0x69, 0x86, 0x0f, 0x89, 0x68, 0xdb, 0x15, 0x45, 0x01, 0x3d, 0x91,
+ 0xd8, 0x22, 0x8b, 0x01, 0x1c, 0x49, 0xcb, 0x8f, 0x73, 0x0f, 0x8b, 0x79,
+ 0x46, 0xc3, 0x3f, 0x42, 0x23, 0x65, 0x45, 0x01, 0xfd, 0xc2, 0x23, 0xab,
+ 0x9c, 0x0f, 0x89, 0x70, 0x0b, 0xc2, 0x23, 0xb7, 0xc3, 0x02, 0x2c, 0x01,
+ 0x14, 0xe9, 0x11, 0x42, 0x23, 0xc3, 0x45, 0x0b, 0x12, 0xc2, 0x23, 0xcd,
+ 0xc8, 0x00, 0xcb, 0x01, 0x4e, 0x00, 0x16, 0xc2, 0x23, 0xd9, 0xc8, 0x4b,
+ 0x5f, 0x01, 0x23, 0x91, 0x07, 0xc2, 0x23, 0xee, 0x15, 0xc2, 0x23, 0xfa,
+ 0x08, 0x42, 0x24, 0x06, 0xc7, 0x01, 0x93, 0x0f, 0xbe, 0xab, 0x02, 0x24,
+ 0x10, 0xc4, 0x03, 0x4e, 0x01, 0x14, 0xb8, 0xd0, 0x5b, 0x32, 0x01, 0x14,
+ 0xd9, 0x4c, 0x04, 0x1b, 0x42, 0x24, 0x16, 0xcc, 0x87, 0x51, 0x01, 0x14,
+ 0xd1, 0xce, 0x61, 0xd5, 0x01, 0x4d, 0xc0, 0xc4, 0x1d, 0x1e, 0x01, 0x14,
+ 0xb1, 0x49, 0x1f, 0x19, 0x42, 0x24, 0x22, 0xc3, 0x25, 0xd6, 0x01, 0x14,
+ 0xa9, 0xcc, 0x8b, 0x29, 0x01, 0x4d, 0xc9, 0xc7, 0x36, 0x16, 0x01, 0x4d,
+ 0xb9, 0xca, 0x9a, 0xa4, 0x01, 0x81, 0xb0, 0x49, 0x9f, 0x87, 0xc2, 0x24,
+ 0x28, 0x5b, 0x16, 0xf5, 0xc2, 0x24, 0x74, 0xd1, 0x53, 0x87, 0x0f, 0xb6,
+ 0x40, 0xc5, 0x1c, 0xb5, 0x01, 0x4d, 0xf9, 0xc5, 0xdc, 0x4a, 0x01, 0x5d,
+ 0xf8, 0x50, 0x4b, 0xf0, 0xc2, 0x24, 0x7c, 0x48, 0xbc, 0xc2, 0x42, 0x24,
+ 0x88, 0x03, 0xc2, 0x24, 0xc0, 0x46, 0x02, 0xae, 0xc2, 0x24, 0xc6, 0x0e,
+ 0xc2, 0x24, 0xd2, 0xd0, 0x5d, 0x52, 0x01, 0x2e, 0x89, 0xcd, 0x7a, 0x93,
+ 0x01, 0x2e, 0x69, 0x43, 0x02, 0x9f, 0xc2, 0x24, 0xde, 0x15, 0xc2, 0x24,
+ 0xe4, 0xce, 0x0e, 0xf1, 0x01, 0x4d, 0xa8, 0xe0, 0x07, 0xa7, 0x01, 0x4d,
+ 0xd0, 0xa2, 0x09, 0x1b, 0x5b, 0x02, 0x24, 0xf0, 0xd1, 0x54, 0x20, 0x09,
+ 0x2a, 0x11, 0x8f, 0x09, 0x1b, 0x71, 0xc3, 0x2b, 0x88, 0x09, 0x1b, 0x68,
+ 0xa4, 0x09, 0x2a, 0x09, 0xc2, 0xd1, 0x86, 0x09, 0x1b, 0x09, 0x89, 0x09,
+ 0x1b, 0x01, 0x00, 0x42, 0x24, 0xf6, 0xc2, 0xde, 0xe9, 0x09, 0x1b, 0x49,
+ 0x89, 0x09, 0x1b, 0x41, 0x84, 0x09, 0x1b, 0x33, 0x02, 0x25, 0x02, 0xa0,
+ 0x09, 0x1b, 0x29, 0xc8, 0xb5, 0xba, 0x09, 0x1b, 0x20, 0x97, 0x09, 0x19,
+ 0xbb, 0x02, 0x25, 0x08, 0x9f, 0x09, 0x19, 0x5b, 0x02, 0x25, 0x17, 0x8b,
+ 0x09, 0x19, 0xab, 0x02, 0x25, 0x1b, 0xa1, 0x09, 0x19, 0xa1, 0x00, 0x42,
+ 0x25, 0x1f, 0x97, 0x09, 0x1c, 0xcb, 0x02, 0x25, 0x2b, 0x47, 0x1b, 0x73,
+ 0xc2, 0x25, 0x31, 0xc3, 0x6c, 0x49, 0x09, 0x18, 0x60, 0x47, 0x03, 0x4c,
+ 0xc2, 0x25, 0x43, 0xc2, 0x01, 0xdf, 0x09, 0x19, 0x1b, 0x02, 0x25, 0x5c,
+ 0xc3, 0x1d, 0xd4, 0x09, 0x19, 0x10, 0x97, 0x09, 0x1a, 0xe1, 0xa0, 0x09,
+ 0x1a, 0xd2, 0x02, 0x25, 0x62, 0xc3, 0xe5, 0x21, 0x09, 0x1a, 0xc1, 0x9f,
+ 0x09, 0x1a, 0xb9, 0x9a, 0x09, 0x1a, 0xb1, 0x47, 0x03, 0x4c, 0x42, 0x25,
+ 0x68, 0xc5, 0x39, 0xc7, 0x09, 0x19, 0x38, 0xc2, 0x0b, 0x47, 0x09, 0x18,
+ 0xe1, 0x00, 0x42, 0x25, 0x7b, 0x8f, 0x09, 0x18, 0x43, 0x02, 0x25, 0x96,
+ 0x94, 0x09, 0x18, 0x4b, 0x02, 0x25, 0x9c, 0x8d, 0x09, 0x18, 0x39, 0xc2,
+ 0x0b, 0x48, 0x09, 0x18, 0x30, 0xc2, 0x38, 0xb6, 0x09, 0x17, 0xd3, 0x02,
+ 0x25, 0xa2, 0x94, 0x09, 0x17, 0xd9, 0x89, 0x09, 0x17, 0x9b, 0x02, 0x25,
+ 0xa8, 0x84, 0x09, 0x17, 0x83, 0x02, 0x25, 0xae, 0x00, 0x42, 0x25, 0xb2,
+ 0x9f, 0x09, 0x1c, 0xb9, 0x94, 0x09, 0x18, 0x0b, 0x02, 0x25, 0xc4, 0x8e,
+ 0x09, 0x18, 0x01, 0xc5, 0x58, 0xf4, 0x09, 0x17, 0xf8, 0xc5, 0x39, 0xc7,
+ 0x09, 0x17, 0xe8, 0x00, 0xc2, 0x25, 0xc8, 0xc3, 0xd8, 0x33, 0x09, 0x17,
+ 0x09, 0xc2, 0x9c, 0x98, 0x09, 0x17, 0x01, 0x89, 0x09, 0x16, 0xea, 0x02,
+ 0x25, 0xd4, 0x97, 0x09, 0x16, 0xbb, 0x02, 0x25, 0xdb, 0x87, 0x09, 0x15,
+ 0xd3, 0x02, 0x25, 0xee, 0x83, 0x09, 0x15, 0x6b, 0x02, 0x26, 0x05, 0x0b,
+ 0x42, 0x26, 0x1f, 0x89, 0x09, 0x14, 0xab, 0x02, 0x26, 0x40, 0x94, 0x09,
+ 0x15, 0x61, 0xc4, 0xe3, 0xaf, 0x09, 0x15, 0x59, 0x8e, 0x09, 0x15, 0x4a,
+ 0x02, 0x26, 0x44, 0x94, 0x09, 0x17, 0x4b, 0x02, 0x26, 0x4a, 0x8f, 0x09,
+ 0x17, 0x3b, 0x02, 0x26, 0x4e, 0xc3, 0x06, 0x47, 0x09, 0x17, 0x31, 0x86,
+ 0x09, 0x17, 0x23, 0x02, 0x26, 0x54, 0xc8, 0x8b, 0x5c, 0x09, 0x17, 0x18,
+ 0x90, 0x09, 0x1c, 0x7b, 0x02, 0x26, 0x58, 0xc3, 0x78, 0x3e, 0x09, 0x13,
+ 0x01, 0x8f, 0x09, 0x12, 0x7b, 0x02, 0x26, 0x65, 0x9f, 0x09, 0x12, 0x71,
+ 0xc8, 0x98, 0x84, 0x09, 0x12, 0x68, 0xc2, 0x2b, 0x85, 0x09, 0x13, 0x13,
+ 0x02, 0x26, 0x6b, 0x90, 0x09, 0x13, 0x1a, 0x02, 0x26, 0x6f, 0xa1, 0x09,
+ 0x1c, 0x71, 0x8f, 0x09, 0x12, 0x33, 0x02, 0x26, 0x7c, 0xc2, 0x01, 0x30,
+ 0x09, 0x12, 0x03, 0x02, 0x26, 0x86, 0x9f, 0x09, 0x11, 0xf8, 0x00, 0x42,
+ 0x26, 0x8e, 0xc2, 0x01, 0xe2, 0x09, 0x11, 0x93, 0x02, 0x26, 0x9a, 0xc4,
+ 0xe4, 0xc3, 0x09, 0x11, 0x89, 0xc4, 0xe4, 0x7f, 0x09, 0x11, 0x81, 0x89,
+ 0x09, 0x11, 0x73, 0x02, 0x26, 0xa5, 0xc8, 0xb5, 0xd2, 0x09, 0x11, 0x68,
+ 0xc9, 0xab, 0xc7, 0x09, 0x28, 0xf9, 0x90, 0x09, 0x11, 0x58, 0x95, 0x09,
+ 0x11, 0x4a, 0x02, 0x26, 0xab, 0xc2, 0x01, 0xe2, 0x09, 0x11, 0x33, 0x02,
+ 0x26, 0xaf, 0x94, 0x09, 0x11, 0x29, 0x8a, 0x09, 0x11, 0x21, 0x9f, 0x09,
+ 0x11, 0x19, 0x00, 0x42, 0x26, 0xb3, 0x9f, 0x09, 0x0f, 0xeb, 0x02, 0x26,
+ 0xbf, 0x8f, 0x09, 0x10, 0xeb, 0x02, 0x26, 0xc3, 0x8e, 0x09, 0x10, 0xe1,
+ 0x8a, 0x09, 0x10, 0xd9, 0xc3, 0x38, 0x73, 0x09, 0x10, 0xbb, 0x02, 0x26,
+ 0xcc, 0xa0, 0x09, 0x10, 0xb1, 0xca, 0x8d, 0x2d, 0x09, 0x0f, 0xe0, 0x42,
+ 0x0c, 0x67, 0xc2, 0x26, 0xd0, 0x42, 0x01, 0x30, 0xc2, 0x26, 0xf2, 0x8f,
+ 0x09, 0x0f, 0xa3, 0x02, 0x27, 0x00, 0x8e, 0x09, 0x0f, 0x93, 0x02, 0x27,
+ 0x09, 0xc4, 0xdf, 0xdb, 0x09, 0x0f, 0x88, 0xc2, 0x01, 0xe2, 0x09, 0x0f,
+ 0xd1, 0xc4, 0xe3, 0xe7, 0x09, 0x0f, 0xc9, 0x8e, 0x09, 0x0f, 0xc0, 0x47,
+ 0x03, 0x4c, 0xc2, 0x27, 0x0f, 0xc9, 0xae, 0xdf, 0x09, 0x1b, 0x79, 0xc4,
+ 0x45, 0xaf, 0x09, 0x0c, 0xe3, 0x02, 0x27, 0x5b, 0x0f, 0xc2, 0x27, 0x5f,
+ 0x8e, 0x09, 0x0c, 0xbb, 0x02, 0x27, 0x67, 0x8d, 0x09, 0x0c, 0xab, 0x02,
+ 0x27, 0x6b, 0x06, 0xc2, 0x27, 0x71, 0x84, 0x09, 0x0c, 0x79, 0x9f, 0x09,
+ 0x0c, 0x6a, 0x02, 0x27, 0x84, 0xc4, 0x5d, 0xd2, 0x09, 0x0d, 0xa9, 0x94,
+ 0x09, 0x0d, 0x9b, 0x02, 0x27, 0x8a, 0x90, 0x09, 0x0d, 0x91, 0x8e, 0x09,
+ 0x0d, 0x83, 0x02, 0x27, 0x90, 0xa4, 0x09, 0x0d, 0x79, 0xa1, 0x09, 0x0d,
+ 0x6b, 0x02, 0x27, 0x96, 0xa0, 0x09, 0x0d, 0x61, 0x49, 0x05, 0x54, 0x42,
+ 0x27, 0x9c, 0x15, 0xc2, 0x27, 0xa2, 0x90, 0x09, 0x0d, 0x29, 0x86, 0x09,
+ 0x0d, 0x21, 0x47, 0x03, 0x4c, 0x42, 0x27, 0xb5, 0x47, 0x03, 0x4c, 0x42,
+ 0x27, 0xc2, 0x00, 0xc2, 0x27, 0xf3, 0x8e, 0x09, 0x09, 0x60, 0xc2, 0x01,
+ 0xe2, 0x09, 0x1b, 0xe9, 0xc2, 0xaf, 0x5c, 0x09, 0x09, 0xf1, 0xc2, 0x58,
+ 0xf2, 0x09, 0x09, 0xc2, 0x02, 0x28, 0x02, 0x86, 0x09, 0x08, 0xf3, 0x02,
+ 0x28, 0x08, 0x9f, 0x09, 0x08, 0xc3, 0x02, 0x28, 0x0c, 0x94, 0x09, 0x09,
+ 0x2b, 0x02, 0x28, 0x10, 0x8f, 0x09, 0x09, 0x1b, 0x02, 0x28, 0x18, 0x8e,
+ 0x09, 0x09, 0x11, 0xcc, 0x88, 0xe9, 0x09, 0x08, 0xb8, 0x15, 0xc2, 0x28,
+ 0x1e, 0x89, 0x09, 0x1b, 0xe1, 0x14, 0xc2, 0x28, 0x2b, 0xc3, 0x7e, 0x08,
+ 0x09, 0x08, 0x39, 0xa1, 0x09, 0x08, 0x23, 0x02, 0x28, 0x39, 0x00, 0x42,
+ 0x28, 0x3d, 0xc5, 0xda, 0xba, 0x09, 0x07, 0xf3, 0x02, 0x28, 0x49, 0xc2,
+ 0xe1, 0x2e, 0x09, 0x1b, 0xd8, 0xc2, 0x01, 0x5d, 0x09, 0x07, 0x73, 0x02,
+ 0x28, 0x4f, 0x9f, 0x09, 0x05, 0xbb, 0x02, 0x28, 0x53, 0xc4, 0x09, 0x26,
+ 0x09, 0x07, 0xe9, 0x94, 0x09, 0x07, 0xdb, 0x02, 0x28, 0x57, 0x90, 0x09,
+ 0x07, 0xb3, 0x02, 0x28, 0x5b, 0x8f, 0x09, 0x07, 0xa9, 0x8e, 0x09, 0x07,
+ 0x93, 0x02, 0x28, 0x62, 0x86, 0x09, 0x07, 0x83, 0x02, 0x28, 0x6e, 0xc5,
+ 0x39, 0xc7, 0x09, 0x05, 0xb0, 0x00, 0x42, 0x28, 0x74, 0xce, 0x73, 0x28,
+ 0x09, 0x25, 0x60, 0xc3, 0x9e, 0x4d, 0x09, 0x04, 0xfb, 0x02, 0x28, 0x80,
+ 0xc2, 0x00, 0xc4, 0x09, 0x04, 0xf0, 0x47, 0x03, 0x4c, 0x42, 0x28, 0x86,
+ 0x00, 0x42, 0x28, 0xac, 0xd3, 0x42, 0x55, 0x09, 0x04, 0x61, 0xc9, 0xa8,
+ 0xe5, 0x09, 0x04, 0x58, 0x89, 0x09, 0x04, 0x0b, 0x02, 0x28, 0xc4, 0x84,
+ 0x09, 0x03, 0xf3, 0x02, 0x28, 0xd0, 0xc2, 0x38, 0x6a, 0x09, 0x04, 0x49,
+ 0x90, 0x09, 0x04, 0x23, 0x02, 0x28, 0xda, 0x8a, 0x09, 0x04, 0x19, 0x00,
+ 0x42, 0x28, 0xe5, 0x8f, 0x09, 0x03, 0xa3, 0x02, 0x28, 0xf7, 0xc2, 0x01,
+ 0xe2, 0x09, 0x03, 0xcb, 0x02, 0x29, 0x04, 0x90, 0x09, 0x03, 0xbb, 0x02,
+ 0x29, 0x0a, 0x84, 0x09, 0x03, 0x98, 0x89, 0x09, 0x02, 0xb3, 0x02, 0x29,
+ 0x10, 0xcb, 0x38, 0xad, 0x09, 0x24, 0x41, 0x94, 0x09, 0x03, 0x7b, 0x02,
+ 0x29, 0x18, 0x8f, 0x09, 0x03, 0x70, 0x00, 0xc2, 0x29, 0x1c, 0x94, 0x09,
+ 0x02, 0x9b, 0x02, 0x29, 0x28, 0xc3, 0x6c, 0x4d, 0x09, 0x02, 0x8a, 0x02,
+ 0x29, 0x2c, 0xc4, 0x38, 0x68, 0x09, 0x02, 0x1b, 0x02, 0x29, 0x32, 0x86,
+ 0x09, 0x02, 0x0b, 0x02, 0x29, 0x38, 0x94, 0x09, 0x02, 0x3b, 0x02, 0x29,
+ 0x3e, 0x8e, 0x09, 0x02, 0x23, 0x02, 0x29, 0x44, 0xc2, 0xe6, 0xad, 0x09,
+ 0x02, 0x10, 0x47, 0x03, 0x4c, 0x42, 0x29, 0x50, 0xcb, 0x98, 0x84, 0x09,
+ 0x24, 0x10, 0x00, 0xc2, 0x29, 0x60, 0x9f, 0x09, 0x00, 0xb2, 0x02, 0x29,
+ 0x6c, 0x47, 0x03, 0x4c, 0x42, 0x29, 0x72, 0x8a, 0x09, 0x01, 0xc3, 0x02,
+ 0x29, 0x7e, 0xc3, 0xe5, 0x9f, 0x09, 0x01, 0xb8, 0xc3, 0x91, 0xee, 0x09,
+ 0x01, 0xb1, 0xc2, 0x01, 0x9d, 0x09, 0x01, 0xa2, 0x02, 0x29, 0x8c, 0xc3,
+ 0x04, 0x65, 0x09, 0x01, 0x91, 0x00, 0x42, 0x29, 0x92, 0xc3, 0x36, 0xb6,
+ 0x09, 0x01, 0x51, 0xc2, 0x00, 0xd1, 0x09, 0x01, 0x49, 0x47, 0x03, 0x4c,
+ 0x42, 0x29, 0xa4, 0x47, 0x03, 0x4c, 0x42, 0x29, 0xcc, 0xc3, 0x78, 0x3e,
+ 0x09, 0x00, 0x41, 0xc4, 0x7a, 0x34, 0x09, 0x00, 0x39, 0xca, 0x39, 0xc2,
+ 0x09, 0x00, 0x31, 0xc3, 0x04, 0x2a, 0x09, 0x00, 0x29, 0xc2, 0x00, 0xd0,
+ 0x09, 0x00, 0x21, 0xc9, 0x5d, 0x99, 0x09, 0x00, 0x19, 0xc3, 0x62, 0x19,
+ 0x09, 0x00, 0x11, 0x83, 0x09, 0x00, 0x08, 0x14, 0xc2, 0x29, 0xd8, 0x00,
+ 0x42, 0x29, 0xe5, 0xc9, 0x0a, 0xfe, 0x09, 0x1c, 0xa0, 0x92, 0x09, 0x13,
+ 0xe9, 0x90, 0x09, 0x13, 0xe1, 0x86, 0x09, 0x13, 0xd8, 0x84, 0x09, 0x14,
+ 0x80, 0xc2, 0x00, 0x74, 0x09, 0x0a, 0x99, 0x00, 0x42, 0x29, 0xf1, 0x9f,
+ 0x09, 0x0a, 0x69, 0xd0, 0x5d, 0x92, 0x09, 0x0a, 0x60, 0x8b, 0x09, 0x0a,
+ 0x32, 0x02, 0x2a, 0x09, 0x4b, 0x96, 0xa0, 0x42, 0x2a, 0x0d, 0x97, 0x09,
+ 0x20, 0xa3, 0x02, 0x2a, 0x19, 0xd3, 0x42, 0xa1, 0x09, 0x22, 0x33, 0x02,
+ 0x2a, 0x1f, 0xc5, 0xdb, 0xdc, 0x09, 0x21, 0x59, 0xc5, 0xd7, 0xf9, 0x09,
+ 0x20, 0xe9, 0xc4, 0x04, 0x59, 0x09, 0x20, 0x71, 0xc3, 0x02, 0x2c, 0x09,
+ 0x20, 0x38, 0xc3, 0x26, 0x1a, 0x09, 0x22, 0xb9, 0xc3, 0x0f, 0xd6, 0x09,
+ 0x22, 0xb0, 0x97, 0x09, 0x20, 0x9b, 0x02, 0x2a, 0x2d, 0xd1, 0x53, 0x21,
+ 0x09, 0x22, 0x23, 0x02, 0x2a, 0x33, 0xc5, 0xdb, 0xdc, 0x09, 0x21, 0x51,
+ 0xc5, 0xd7, 0xf9, 0x09, 0x20, 0xe1, 0xc4, 0x04, 0x59, 0x09, 0x20, 0x69,
+ 0xc3, 0x02, 0x2c, 0x09, 0x20, 0x30, 0x08, 0xc2, 0x2a, 0x37, 0xca, 0x9d,
+ 0x2e, 0x09, 0x23, 0x31, 0xc9, 0xac, 0xcc, 0x09, 0x23, 0x28, 0x97, 0x09,
+ 0x20, 0x93, 0x02, 0x2a, 0x43, 0x51, 0x52, 0xdd, 0xc2, 0x2a, 0x49, 0xc5,
+ 0xdb, 0xdc, 0x09, 0x21, 0x49, 0xc5, 0xd7, 0xf9, 0x09, 0x20, 0xd9, 0xc4,
+ 0x04, 0x59, 0x09, 0x20, 0x61, 0xc3, 0x02, 0x2c, 0x09, 0x20, 0x28, 0x97,
+ 0x09, 0x20, 0x8b, 0x02, 0x2a, 0x51, 0xc3, 0x02, 0x2c, 0x09, 0x20, 0x23,
+ 0x02, 0x2a, 0x57, 0xd1, 0x54, 0x64, 0x09, 0x22, 0x01, 0xc5, 0xdb, 0xdc,
+ 0x09, 0x21, 0x41, 0xc5, 0xd7, 0xf9, 0x09, 0x20, 0xd1, 0xc4, 0x04, 0x59,
+ 0x09, 0x20, 0x58, 0xc3, 0x0f, 0xd6, 0x09, 0x21, 0x99, 0xc4, 0x04, 0x59,
+ 0x09, 0x21, 0x90, 0x97, 0x09, 0x20, 0x83, 0x02, 0x2a, 0x5d, 0x15, 0xc2,
+ 0x2a, 0x63, 0x04, 0xc2, 0x2a, 0x6f, 0xc3, 0x02, 0x2c, 0x09, 0x20, 0x1b,
+ 0x02, 0x2a, 0x7e, 0x44, 0x64, 0xa6, 0xc2, 0x2a, 0x84, 0xc4, 0x04, 0x59,
+ 0x09, 0x20, 0x50, 0x97, 0x09, 0x20, 0x7b, 0x02, 0x2a, 0x8c, 0x04, 0xc2,
+ 0x2a, 0x92, 0xc3, 0x02, 0x2c, 0x09, 0x20, 0x13, 0x02, 0x2a, 0xa1, 0xd2,
+ 0x49, 0x31, 0x09, 0x21, 0xe3, 0x02, 0x2a, 0xa7, 0x44, 0x7a, 0x36, 0xc2,
+ 0x2a, 0xaf, 0x44, 0x64, 0xa6, 0xc2, 0x2a, 0xb7, 0xc4, 0x04, 0x59, 0x09,
+ 0x20, 0x48, 0xc8, 0xbf, 0x52, 0x09, 0x23, 0x21, 0x48, 0x15, 0x02, 0xc2,
+ 0x2a, 0xbf, 0x07, 0xc2, 0x2a, 0xcb, 0x46, 0x06, 0x67, 0xc2, 0x2a, 0xd7,
+ 0x04, 0xc2, 0x2a, 0xe3, 0xc5, 0xdb, 0x8c, 0x09, 0x21, 0x61, 0x44, 0x64,
+ 0xa6, 0x42, 0x2a, 0xef, 0xc7, 0x08, 0x79, 0x09, 0x23, 0x11, 0xc5, 0xd3,
+ 0x5b, 0x09, 0x23, 0x08, 0x47, 0x8d, 0x4d, 0xc2, 0x2a, 0xf7, 0xc5, 0xdd,
+ 0x62, 0x09, 0x22, 0xc9, 0x04, 0xc2, 0x2b, 0x03, 0xc3, 0x02, 0x2c, 0x09,
+ 0x20, 0x03, 0x02, 0x2b, 0x0f, 0x44, 0x7a, 0x36, 0xc2, 0x2b, 0x15, 0x44,
+ 0x64, 0xa6, 0x42, 0x2b, 0x1d, 0x04, 0xc2, 0x2b, 0x25, 0xc3, 0x02, 0x2c,
+ 0x09, 0x20, 0x0b, 0x02, 0x2b, 0x34, 0x50, 0x57, 0x72, 0xc2, 0x2b, 0x3a,
+ 0x44, 0x7a, 0x36, 0xc2, 0x2b, 0x46, 0x44, 0x64, 0xa6, 0xc2, 0x2b, 0x54,
+ 0xc4, 0x04, 0x59, 0x09, 0x20, 0x40, 0xc2, 0x00, 0x11, 0x01, 0x3d, 0x81,
+ 0x46, 0x19, 0xbb, 0x42, 0x2b, 0x5c, 0xa1, 0x09, 0x7f, 0x81, 0x9f, 0x09,
+ 0x7f, 0x79, 0x9d, 0x09, 0x7f, 0x70, 0xa6, 0x09, 0x7f, 0x69, 0xa5, 0x09,
+ 0x7f, 0x61, 0xa4, 0x09, 0x7f, 0x59, 0xa2, 0x09, 0x7f, 0x51, 0xa1, 0x09,
+ 0x7f, 0x49, 0xa0, 0x09, 0x7f, 0x41, 0x9f, 0x09, 0x7f, 0x39, 0x9e, 0x09,
+ 0x7f, 0x31, 0x9d, 0x09, 0x7f, 0x28, 0xa6, 0x09, 0x7f, 0x21, 0xa5, 0x09,
+ 0x7f, 0x19, 0xa4, 0x09, 0x7f, 0x11, 0xa3, 0x09, 0x7f, 0x09, 0xa2, 0x09,
+ 0x7f, 0x01, 0xa1, 0x09, 0x7e, 0xf9, 0x9f, 0x09, 0x7e, 0xf1, 0x9e, 0x09,
+ 0x7e, 0xe9, 0x9d, 0x09, 0x7e, 0xe0, 0xa6, 0x09, 0x7e, 0xd9, 0xa5, 0x09,
+ 0x7e, 0xd1, 0xa4, 0x09, 0x7e, 0xc9, 0xa3, 0x09, 0x7e, 0xc1, 0xa2, 0x09,
+ 0x7e, 0xb9, 0xa1, 0x09, 0x7e, 0xb1, 0xa0, 0x09, 0x7e, 0xa9, 0x9f, 0x09,
+ 0x7e, 0xa1, 0x9e, 0x09, 0x7e, 0x99, 0x9d, 0x09, 0x7e, 0x90, 0xa6, 0x09,
+ 0x7e, 0x89, 0xa5, 0x09, 0x7e, 0x81, 0xa3, 0x09, 0x7e, 0x79, 0xa2, 0x09,
+ 0x7e, 0x6b, 0x02, 0x2b, 0x68, 0xa1, 0x09, 0x7e, 0x61, 0xa0, 0x09, 0x7e,
+ 0x59, 0x9f, 0x09, 0x7e, 0x51, 0x9e, 0x09, 0x7e, 0x49, 0x9d, 0x09, 0x7e,
+ 0x40, 0xa6, 0x09, 0x7e, 0x39, 0xa5, 0x09, 0x7e, 0x31, 0xa4, 0x09, 0x7e,
+ 0x29, 0xa3, 0x09, 0x7e, 0x21, 0xa1, 0x09, 0x7e, 0x19, 0xa0, 0x09, 0x7e,
+ 0x11, 0x9f, 0x09, 0x7e, 0x09, 0x9e, 0x09, 0x7e, 0x01, 0x9d, 0x09, 0x7d,
+ 0xf8, 0xa6, 0x09, 0x7d, 0xf1, 0xa5, 0x09, 0x7d, 0xe9, 0xa3, 0x09, 0x7d,
+ 0xe1, 0xa2, 0x09, 0x7d, 0xd9, 0xa1, 0x09, 0x7d, 0xd1, 0xa0, 0x09, 0x7d,
+ 0xc9, 0x9f, 0x09, 0x7d, 0xc1, 0x9e, 0x09, 0x7d, 0xb9, 0x9d, 0x09, 0x7d,
+ 0xb0, 0xa6, 0x09, 0x7d, 0xa9, 0xa4, 0x09, 0x7d, 0xa1, 0xa3, 0x09, 0x7d,
+ 0x99, 0xa1, 0x09, 0x7d, 0x91, 0x9e, 0x09, 0x7d, 0x89, 0x9d, 0x09, 0x7d,
+ 0x80, 0xa6, 0x09, 0x7d, 0x79, 0xa5, 0x09, 0x7d, 0x71, 0xa4, 0x09, 0x7d,
+ 0x69, 0xa3, 0x09, 0x7d, 0x61, 0xa2, 0x09, 0x7d, 0x59, 0xa1, 0x09, 0x7d,
+ 0x51, 0xa0, 0x09, 0x7d, 0x49, 0x9d, 0x09, 0x7d, 0x40, 0xa6, 0x09, 0x7d,
+ 0x39, 0xa5, 0x09, 0x7d, 0x31, 0xa4, 0x09, 0x7d, 0x29, 0xa3, 0x09, 0x7d,
+ 0x21, 0xa2, 0x09, 0x7d, 0x19, 0xa1, 0x09, 0x7d, 0x11, 0xa0, 0x09, 0x7d,
+ 0x09, 0x9e, 0x09, 0x7d, 0x00, 0xa6, 0x09, 0x7c, 0xf9, 0xa4, 0x09, 0x7c,
+ 0xf1, 0xa2, 0x09, 0x7c, 0xe9, 0xa0, 0x09, 0x7c, 0xe1, 0x9f, 0x09, 0x7c,
+ 0xd3, 0x02, 0x2b, 0x6c, 0x9e, 0x09, 0x7c, 0xc9, 0x9d, 0x09, 0x7c, 0xc0,
+ 0xa6, 0x09, 0x7c, 0xb9, 0xa5, 0x09, 0x7c, 0xb1, 0xa4, 0x09, 0x7c, 0xa9,
+ 0xa3, 0x09, 0x7c, 0xa1, 0xa2, 0x09, 0x7c, 0x99, 0xa1, 0x09, 0x7c, 0x91,
+ 0x9f, 0x09, 0x7c, 0x89, 0x9e, 0x09, 0x7c, 0x80, 0xcb, 0x95, 0x2a, 0x00,
+ 0xe4, 0x41, 0x46, 0x00, 0x8b, 0xc2, 0x2b, 0x70, 0x8d, 0x00, 0x23, 0xca,
+ 0x02, 0x2b, 0x7a, 0x44, 0x03, 0x15, 0xc2, 0x2b, 0x80, 0xce, 0x73, 0xec,
+ 0x00, 0xe4, 0x29, 0x87, 0x00, 0x22, 0x13, 0x02, 0x2b, 0x92, 0x15, 0xc2,
+ 0x2b, 0x98, 0xc2, 0x00, 0x28, 0x05, 0x34, 0x69, 0xc3, 0x28, 0x28, 0x05,
+ 0x34, 0x98, 0xc6, 0xd0, 0xf7, 0x00, 0xe4, 0x19, 0x87, 0x00, 0x28, 0xe8,
+ 0xc7, 0x5b, 0xcd, 0x00, 0xe4, 0x11, 0xca, 0x9c, 0x34, 0x05, 0x32, 0x79,
+ 0xc2, 0x1c, 0x52, 0x00, 0x22, 0xd0, 0xcd, 0x7b, 0x2f, 0x00, 0xe4, 0x09,
+ 0xc2, 0x00, 0xd0, 0x00, 0x28, 0xa9, 0xc2, 0x1c, 0x52, 0x00, 0x22, 0xc9,
+ 0xc9, 0x51, 0x80, 0x00, 0x23, 0x38, 0x44, 0x0d, 0xed, 0xc2, 0x2b, 0xae,
+ 0xc2, 0x00, 0xd0, 0x00, 0x28, 0xb9, 0x48, 0x10, 0x2f, 0x42, 0x2b, 0xba,
+ 0x8e, 0x00, 0x21, 0xdb, 0x02, 0x2b, 0xd2, 0x90, 0x00, 0x21, 0xeb, 0x02,
+ 0x2b, 0xd8, 0xcf, 0x6b, 0x16, 0x00, 0x27, 0x69, 0x8f, 0x00, 0x21, 0xe3,
+ 0x02, 0x2b, 0xde, 0x95, 0x00, 0x22, 0x0b, 0x02, 0x2b, 0xe4, 0x94, 0x00,
+ 0x22, 0x03, 0x02, 0x2b, 0xea, 0x88, 0x00, 0x22, 0x20, 0xc3, 0x28, 0x28,
+ 0x00, 0x29, 0x69, 0x1c, 0xc2, 0x2b, 0xf0, 0x46, 0x00, 0x59, 0xc2, 0x2c,
+ 0x07, 0xc2, 0x1c, 0x52, 0x00, 0x22, 0x93, 0x02, 0x2c, 0x11, 0x87, 0x00,
+ 0x21, 0xa1, 0xc2, 0x00, 0x28, 0x05, 0x34, 0x08, 0x0a, 0xc2, 0x2c, 0x17,
+ 0xc4, 0x74, 0x82, 0x00, 0x26, 0xcb, 0x02, 0x2c, 0x36, 0xc9, 0xb5, 0x06,
+ 0x00, 0x25, 0x7b, 0x02, 0x2c, 0x3c, 0xcc, 0x84, 0xbd, 0x00, 0x24, 0x69,
+ 0x44, 0x62, 0x60, 0x42, 0x2c, 0x42, 0x87, 0x00, 0x21, 0xfb, 0x02, 0x2c,
+ 0x52, 0xc7, 0xbe, 0xab, 0x00, 0x26, 0x79, 0xc2, 0x00, 0xba, 0x00, 0x23,
+ 0x88, 0xc7, 0xc1, 0xcb, 0x00, 0x28, 0xf9, 0x49, 0xb1, 0x31, 0xc2, 0x2c,
+ 0x58, 0x46, 0x00, 0x8b, 0x42, 0x2c, 0x6d, 0x83, 0x00, 0x22, 0x7b, 0x02,
+ 0x2c, 0x79, 0xc3, 0x21, 0x51, 0x00, 0x22, 0x5b, 0x02, 0x2c, 0x81, 0x90,
+ 0x05, 0x32, 0xf9, 0x97, 0x00, 0x22, 0x71, 0x8b, 0x00, 0x22, 0xb8, 0x11,
+ 0xc2, 0x2c, 0x87, 0xcd, 0x78, 0x09, 0x00, 0x26, 0x61, 0x83, 0x00, 0x21,
+ 0xd3, 0x02, 0x2c, 0x93, 0xc2, 0x1c, 0x52, 0x00, 0x22, 0xe1, 0xc2, 0x00,
+ 0xba, 0x00, 0x23, 0x78, 0x83, 0x00, 0x22, 0x2b, 0x02, 0x2c, 0x99, 0xc2,
+ 0x00, 0x28, 0x05, 0x34, 0xa8, 0xc2, 0x01, 0x7f, 0x00, 0x21, 0x9b, 0x02,
+ 0x2c, 0xa5, 0xc2, 0x1c, 0x52, 0x00, 0x22, 0x98, 0x03, 0xc2, 0x2c, 0xab,
+ 0xca, 0xa0, 0x44, 0x05, 0x32, 0x69, 0x87, 0x00, 0x21, 0x89, 0xca, 0xa2,
+ 0x60, 0x05, 0x32, 0xd9, 0x0b, 0xc2, 0x2c, 0xba, 0xd7, 0x27, 0x46, 0x00,
+ 0x22, 0xb0, 0xcf, 0x6b, 0x16, 0x00, 0x27, 0x39, 0xc4, 0x6d, 0xb5, 0x00,
+ 0x23, 0x0b, 0x02, 0x2c, 0xc6, 0x96, 0x00, 0x23, 0xf8, 0x46, 0x00, 0x8b,
+ 0xc2, 0x2c, 0xcc, 0x87, 0x00, 0x21, 0xab, 0x02, 0x2c, 0xde, 0xc6, 0xcb,
+ 0x81, 0x00, 0x23, 0xab, 0x02, 0x2c, 0xe4, 0x91, 0x00, 0x22, 0x8a, 0x02,
+ 0x2c, 0xea, 0x87, 0x00, 0x21, 0xbb, 0x02, 0x2c, 0xee, 0x0a, 0x42, 0x2c,
+ 0xfa, 0xc2, 0x01, 0x7f, 0x00, 0x22, 0x3b, 0x02, 0x2d, 0x07, 0xc8, 0xb6,
+ 0x5a, 0x05, 0x34, 0xd9, 0xd0, 0x51, 0x79, 0x05, 0x32, 0xc9, 0xc3, 0x28,
+ 0x28, 0x05, 0x34, 0x38, 0xc8, 0x82, 0x09, 0x05, 0x32, 0x59, 0xc7, 0x7f,
+ 0xba, 0x05, 0x33, 0x48, 0x8e, 0x00, 0x20, 0xdb, 0x02, 0x2d, 0x0d, 0x90,
+ 0x00, 0x20, 0xeb, 0x02, 0x2d, 0x13, 0xcf, 0x6b, 0x16, 0x00, 0x27, 0x61,
+ 0x8f, 0x00, 0x20, 0xe3, 0x02, 0x2d, 0x19, 0x95, 0x00, 0x21, 0x0b, 0x02,
+ 0x2d, 0x1f, 0x94, 0x00, 0x21, 0x03, 0x02, 0x2d, 0x25, 0x88, 0x00, 0x21,
+ 0x20, 0xc3, 0x28, 0x28, 0x00, 0x29, 0x61, 0x1c, 0xc2, 0x2d, 0x2b, 0x46,
+ 0x00, 0x59, 0xc2, 0x2d, 0x42, 0xc2, 0x1c, 0x52, 0x00, 0x20, 0x13, 0x02,
+ 0x2d, 0x4c, 0x87, 0x00, 0x20, 0xa1, 0xc2, 0x00, 0x28, 0x05, 0x34, 0x00,
+ 0x0a, 0xc2, 0x2d, 0x52, 0xc4, 0x74, 0x82, 0x00, 0x26, 0xc3, 0x02, 0x2d,
+ 0x71, 0xc9, 0xb5, 0x06, 0x00, 0x25, 0x73, 0x02, 0x2d, 0x77, 0xcc, 0x84,
+ 0xbd, 0x00, 0x24, 0x61, 0x44, 0x62, 0x60, 0x42, 0x2d, 0x7d, 0x87, 0x00,
+ 0x20, 0xfb, 0x02, 0x2d, 0x8d, 0xc2, 0x00, 0xba, 0x00, 0x23, 0x80, 0xc7,
+ 0xc1, 0xcb, 0x00, 0x28, 0xf1, 0x49, 0xb1, 0x31, 0xc2, 0x2d, 0x93, 0x46,
+ 0x00, 0x8b, 0x42, 0x2d, 0xa8, 0x83, 0x00, 0x21, 0x7b, 0x02, 0x2d, 0xb4,
+ 0xc3, 0x21, 0x51, 0x00, 0x21, 0x5b, 0x02, 0x2d, 0xbc, 0x8b, 0x00, 0x20,
+ 0x39, 0x97, 0x00, 0x21, 0x71, 0x90, 0x05, 0x32, 0xf0, 0xc2, 0x00, 0xd0,
+ 0x00, 0x28, 0xb1, 0x48, 0x10, 0x2f, 0xc2, 0x2d, 0xc2, 0xca, 0x9b, 0xd0,
+ 0x00, 0x23, 0xd0, 0xc2, 0x00, 0xd0, 0x00, 0x28, 0xa1, 0xc2, 0x1c, 0x52,
+ 0x00, 0x20, 0x49, 0xc9, 0x51, 0x80, 0x00, 0x23, 0x30, 0x11, 0xc2, 0x2d,
+ 0xda, 0xcd, 0x78, 0x09, 0x00, 0x26, 0x59, 0x83, 0x00, 0x20, 0xd3, 0x02,
+ 0x2d, 0xe6, 0xc2, 0x1c, 0x52, 0x00, 0x20, 0x61, 0xc2, 0x00, 0xba, 0x00,
+ 0x23, 0x70, 0x83, 0x00, 0x21, 0x2b, 0x02, 0x2d, 0xec, 0xc2, 0x00, 0x28,
+ 0x05, 0x34, 0xa0, 0xc2, 0x01, 0x7f, 0x00, 0x20, 0x9b, 0x02, 0x2d, 0xf8,
+ 0xc2, 0x1c, 0x52, 0x00, 0x20, 0x18, 0xc2, 0x01, 0x7f, 0x00, 0x21, 0x3b,
+ 0x02, 0x2d, 0xfe, 0xc8, 0xb6, 0x5a, 0x05, 0x34, 0xd1, 0xd0, 0x51, 0x79,
+ 0x05, 0x32, 0xc1, 0xc3, 0x28, 0x28, 0x05, 0x34, 0x30, 0x46, 0x00, 0x8b,
+ 0xc2, 0x2e, 0x04, 0x8d, 0x00, 0x23, 0xc2, 0x02, 0x2e, 0x0e, 0x03, 0xc2,
+ 0x2e, 0x14, 0xd7, 0x27, 0x46, 0x00, 0x20, 0x31, 0x87, 0x00, 0x20, 0x89,
+ 0xca, 0xa0, 0x44, 0x05, 0x32, 0x61, 0xca, 0xa2, 0x60, 0x05, 0x32, 0xd1,
+ 0x0b, 0x42, 0x2e, 0x23, 0xcf, 0x6b, 0x16, 0x00, 0x27, 0x31, 0xc4, 0x6d,
+ 0xb5, 0x00, 0x23, 0x03, 0x02, 0x2e, 0x2f, 0x96, 0x00, 0x23, 0xf0, 0x46,
+ 0x00, 0x8b, 0xc2, 0x2e, 0x35, 0x87, 0x00, 0x20, 0xab, 0x02, 0x2e, 0x47,
+ 0xc6, 0xcb, 0x81, 0x00, 0x23, 0xa3, 0x02, 0x2e, 0x4d, 0x91, 0x00, 0x20,
+ 0x0a, 0x02, 0x2e, 0x53, 0x87, 0x00, 0x20, 0xbb, 0x02, 0x2e, 0x57, 0x0a,
+ 0x42, 0x2e, 0x63, 0x87, 0x00, 0x21, 0x13, 0x02, 0x2e, 0x70, 0x15, 0xc2,
+ 0x2e, 0x76, 0xc2, 0x00, 0x28, 0x05, 0x34, 0x61, 0xc3, 0x28, 0x28, 0x05,
+ 0x34, 0x90, 0xc2, 0x1c, 0x52, 0x00, 0x20, 0x51, 0xca, 0x9c, 0x34, 0x05,
+ 0x32, 0x70, 0xc8, 0x82, 0x09, 0x05, 0x32, 0x51, 0xc7, 0x7f, 0xba, 0x05,
+ 0x33, 0x40, 0xc4, 0x02, 0xde, 0x00, 0x04, 0x79, 0xc2, 0x02, 0xa0, 0x00,
+ 0x04, 0x70, 0xe0, 0x06, 0x27, 0x01, 0x01, 0xd0, 0x07, 0xc2, 0x2e, 0x8c,
+ 0xd3, 0x3f, 0x70, 0x01, 0x00, 0xd0, 0x44, 0x05, 0x14, 0xc2, 0x2e, 0x92,
+ 0xc6, 0x2a, 0xfe, 0x08, 0x8f, 0x91, 0xc6, 0xcf, 0x9b, 0x08, 0x8f, 0x89,
+ 0x15, 0xc2, 0x2e, 0x9e, 0x08, 0xc2, 0x2e, 0xaa, 0x16, 0x42, 0x2e, 0xb6,
+ 0xc4, 0x26, 0x78, 0x08, 0x8f, 0x49, 0xc5, 0x06, 0xdb, 0x08, 0x8f, 0x41,
+ 0x15, 0xc2, 0x2e, 0xc8, 0x08, 0xc2, 0x2e, 0xd4, 0x16, 0xc2, 0x2e, 0xe0,
+ 0xc3, 0x05, 0x14, 0x08, 0x8f, 0x08, 0xc9, 0xaf, 0x39, 0x00, 0x6c, 0x11,
+ 0xc8, 0xb5, 0x9a, 0x00, 0x6e, 0x50, 0x03, 0xc2, 0x2e, 0xec, 0x0b, 0xc2,
+ 0x2f, 0x14, 0x17, 0xc2, 0x2f, 0x2c, 0x07, 0xc2, 0x2f, 0x38, 0x11, 0xc2,
+ 0x2f, 0x44, 0x0f, 0xc2, 0x2f, 0x50, 0xd2, 0x4b, 0x05, 0x00, 0x6c, 0xf1,
+ 0x48, 0xbb, 0xfa, 0xc2, 0x2f, 0x5a, 0x48, 0xb6, 0x92, 0xc2, 0x2f, 0x6a,
+ 0x48, 0xb8, 0xda, 0xc2, 0x2f, 0x76, 0xc7, 0xca, 0x1b, 0x00, 0x6d, 0xd1,
+ 0xc7, 0xc5, 0x3d, 0x00, 0x6d, 0xd9, 0xc7, 0xc0, 0x04, 0x00, 0x6e, 0x01,
+ 0xc7, 0xc3, 0xc3, 0x00, 0x6e, 0x21, 0xc7, 0xc8, 0x0e, 0x00, 0x6e, 0x30,
+ 0xc4, 0x15, 0xe7, 0x00, 0x6f, 0x31, 0xc3, 0x05, 0x14, 0x00, 0x6f, 0x39,
+ 0x16, 0xc2, 0x2f, 0x88, 0x08, 0xc2, 0x2f, 0x94, 0x15, 0xc2, 0x2f, 0xa0,
+ 0xc5, 0x06, 0xdb, 0x00, 0x6f, 0x71, 0xc4, 0x26, 0x78, 0x00, 0x6f, 0x78,
+ 0x45, 0xb0, 0x74, 0xc2, 0x2f, 0xac, 0x44, 0xc8, 0xbe, 0x42, 0x2f, 0xbe,
+ 0xca, 0xa7, 0x60, 0x00, 0x6e, 0x89, 0xc8, 0xb7, 0x82, 0x00, 0x6e, 0x99,
+ 0xc9, 0xaf, 0x42, 0x00, 0x6e, 0xb1, 0xc7, 0xc8, 0xbd, 0x00, 0x6e, 0xd1,
+ 0x42, 0x01, 0x30, 0x42, 0x2f, 0xcd, 0xca, 0x9c, 0xb6, 0x00, 0x6e, 0xc1,
+ 0xc9, 0x93, 0x53, 0x00, 0x6e, 0xf8, 0x4a, 0x82, 0xf7, 0xc2, 0x2f, 0xd9,
+ 0x02, 0x42, 0x2f, 0xfd, 0xc7, 0x0b, 0xc8, 0x0e, 0xc8, 0x99, 0xc8, 0x3b,
+ 0xec, 0x0e, 0xc8, 0x91, 0xc6, 0x24, 0x3b, 0x0e, 0xc8, 0x88, 0x4c, 0x82,
+ 0xf5, 0xc2, 0x30, 0x09, 0xc4, 0x09, 0x3a, 0x0e, 0xd3, 0xf0, 0xda, 0x19,
+ 0xe2, 0x0e, 0xd3, 0x81, 0x44, 0x01, 0x10, 0x42, 0x30, 0x1d, 0xc8, 0xbe,
+ 0x0a, 0x0e, 0xd0, 0x99, 0xc7, 0xc1, 0xb6, 0x0e, 0xd0, 0x91, 0xc7, 0x81,
+ 0x92, 0x0e, 0xd0, 0x88, 0xca, 0xa4, 0x5e, 0x0e, 0xd0, 0x43, 0x02, 0x30,
+ 0x27, 0xcf, 0x64, 0x2c, 0x0e, 0xd0, 0x38, 0xc3, 0x0d, 0xe5, 0x0e, 0xd4,
+ 0x51, 0xc3, 0x0a, 0x8c, 0x0e, 0xd4, 0x38, 0xc6, 0xcf, 0x23, 0x0e, 0xd1,
+ 0x61, 0xc7, 0x81, 0x92, 0x0e, 0xd1, 0x59, 0xc6, 0xcc, 0x7d, 0x0e, 0xd1,
+ 0x50, 0xd2, 0x4d, 0x9f, 0x0e, 0xd3, 0x89, 0x44, 0x00, 0x7d, 0x42, 0x30,
+ 0x2d, 0xd1, 0x57, 0x3f, 0x0e, 0xc9, 0x01, 0x15, 0xc2, 0x30, 0x39, 0x46,
+ 0x17, 0x14, 0x42, 0x30, 0x45, 0xc7, 0x0b, 0xc8, 0x0e, 0xc8, 0xa9, 0xc7,
+ 0x00, 0x91, 0x0e, 0xc8, 0xa0, 0xc7, 0x0b, 0xc8, 0x0e, 0xc8, 0x69, 0xc8,
+ 0x3b, 0xec, 0x0e, 0xc8, 0x61, 0xc6, 0x24, 0x3b, 0x0e, 0xc8, 0x58, 0x00,
+ 0xc2, 0x30, 0x51, 0x02, 0x42, 0x30, 0x6f, 0x43, 0x0e, 0xd1, 0xc2, 0x30,
+ 0x7b, 0x12, 0x42, 0x30, 0x87, 0x44, 0xdf, 0x47, 0xc2, 0x30, 0x91, 0x45,
+ 0xd9, 0x6b, 0xc2, 0x30, 0x9d, 0x44, 0xda, 0xac, 0x42, 0x30, 0xc1, 0xc3,
+ 0x1e, 0x1b, 0x0e, 0xd3, 0x0b, 0x02, 0x30, 0xd3, 0x4b, 0x94, 0x64, 0x42,
+ 0x30, 0xd7, 0x4b, 0x40, 0xb3, 0xc2, 0x30, 0xe9, 0x4a, 0x18, 0xa5, 0x42,
+ 0x30, 0xf5, 0x45, 0xd7, 0x95, 0xc2, 0x31, 0x07, 0x47, 0xc0, 0x90, 0xc2,
+ 0x31, 0x13, 0x00, 0xc2, 0x31, 0x25, 0x42, 0x00, 0x97, 0xc2, 0x31, 0x31,
+ 0x4f, 0x67, 0x74, 0x42, 0x31, 0x4f, 0xc2, 0x01, 0x29, 0x0e, 0xd3, 0x59,
+ 0x43, 0x12, 0x8f, 0x42, 0x31, 0x61, 0x00, 0x42, 0x31, 0x7f, 0x19, 0xc2,
+ 0x31, 0x8b, 0xc7, 0xc1, 0xb6, 0x0e, 0xd1, 0x91, 0xc7, 0x81, 0x92, 0x0e,
+ 0xd1, 0x88, 0x4b, 0x40, 0xb3, 0xc2, 0x31, 0x97, 0x4a, 0x18, 0xa5, 0xc2,
+ 0x31, 0xdd, 0x49, 0x1e, 0x56, 0xc2, 0x32, 0x23, 0x46, 0xd3, 0xd3, 0x42,
+ 0x32, 0x35, 0x49, 0xa9, 0x6c, 0xc2, 0x32, 0x47, 0x05, 0xc2, 0x32, 0x53,
+ 0xc5, 0xaf, 0xb5, 0x0e, 0xd2, 0x83, 0x02, 0x32, 0x5f, 0xc4, 0x64, 0xa0,
+ 0x0e, 0xd2, 0x6b, 0x02, 0x32, 0x63, 0x45, 0x05, 0x75, 0xc2, 0x32, 0x67,
+ 0xc5, 0x7d, 0x5e, 0x0e, 0xd2, 0x0b, 0x02, 0x32, 0x8b, 0xc5, 0xa9, 0x3a,
+ 0x0e, 0xd1, 0xf2, 0x02, 0x32, 0x8f, 0xc6, 0xd3, 0x3d, 0x0e, 0xd1, 0xd1,
+ 0xc6, 0xcf, 0xa7, 0x0e, 0xd1, 0xc8, 0xc7, 0x0b, 0xc8, 0x0e, 0xc8, 0x81,
+ 0xc8, 0x3b, 0xec, 0x0e, 0xc8, 0x79, 0xc6, 0x24, 0x3b, 0x0e, 0xc8, 0x70,
+ 0xd0, 0x5a, 0x42, 0x0e, 0xd1, 0xc1, 0xc6, 0x03, 0x31, 0x0e, 0xd1, 0xb0,
+ 0xd0, 0x5a, 0x42, 0x0e, 0xd1, 0xb9, 0xc7, 0x5b, 0x27, 0x0e, 0xd1, 0xa8,
+ 0x48, 0xb8, 0xea, 0xc2, 0x32, 0x93, 0xca, 0xa4, 0x68, 0x0e, 0xd0, 0x79,
+ 0xcc, 0x81, 0x8d, 0x0e, 0xd0, 0x70, 0xc7, 0xc0, 0x66, 0x0e, 0xcf, 0xf1,
+ 0xd0, 0x5f, 0x72, 0x0e, 0xcf, 0xe9, 0x15, 0xc2, 0x32, 0x9f, 0xc7, 0x38,
+ 0xd9, 0x0e, 0xcf, 0xd1, 0xc5, 0xaf, 0xb5, 0x0e, 0xcf, 0xc9, 0xc4, 0xe0,
+ 0x43, 0x0e, 0xcf, 0xb9, 0x4a, 0x03, 0xde, 0x42, 0x32, 0xae, 0xca, 0xa3,
+ 0x78, 0x08, 0xae, 0xe3, 0x02, 0x32, 0xba, 0x97, 0x08, 0xad, 0xd9, 0x8b,
+ 0x08, 0xad, 0xc9, 0x83, 0x08, 0xad, 0x78, 0x94, 0x08, 0xad, 0xa8, 0x97,
+ 0x08, 0xad, 0x98, 0x8b, 0x08, 0xad, 0x88, 0xca, 0xa3, 0x78, 0x08, 0xae,
+ 0xd9, 0x97, 0x08, 0xac, 0x69, 0x8b, 0x08, 0xac, 0x59, 0x83, 0x08, 0xac,
+ 0x08, 0xd5, 0x33, 0x3e, 0x08, 0xae, 0xcb, 0x02, 0x32, 0xbe, 0x0a, 0xc2,
+ 0x32, 0xc2, 0x83, 0x08, 0xac, 0xe9, 0x16, 0x42, 0x32, 0xcc, 0x83, 0x08,
+ 0xad, 0x69, 0xc2, 0x0d, 0xf6, 0x08, 0xad, 0x61, 0xc2, 0x00, 0xd0, 0x08,
+ 0xad, 0x58, 0x83, 0x08, 0xad, 0x51, 0x47, 0xb2, 0x2e, 0x42, 0x32, 0xd6,
+ 0xc2, 0x00, 0xd0, 0x08, 0xad, 0x29, 0x83, 0x08, 0xad, 0x20, 0xc2, 0x00,
+ 0xd0, 0x08, 0xad, 0x19, 0x83, 0x08, 0xad, 0x10, 0x83, 0x08, 0xad, 0x09,
+ 0xc2, 0x00, 0xc1, 0x08, 0xac, 0xe1, 0xc2, 0x19, 0x2c, 0x08, 0xac, 0xb9,
+ 0xc2, 0x01, 0x30, 0x08, 0xac, 0x90, 0xc2, 0x00, 0xd0, 0x08, 0xad, 0x01,
+ 0x83, 0x08, 0xac, 0xf9, 0x06, 0x42, 0x32, 0xe4, 0xc2, 0x00, 0xd0, 0x08,
+ 0xac, 0xb1, 0x83, 0x08, 0xac, 0xa8, 0xc2, 0x00, 0xd0, 0x08, 0xac, 0xa1,
+ 0x83, 0x08, 0xac, 0x98, 0xc2, 0x00, 0xd0, 0x08, 0xac, 0x89, 0x83, 0x08,
+ 0xac, 0x80, 0xc2, 0x00, 0xd0, 0x08, 0xac, 0x79, 0x83, 0x08, 0xac, 0x70,
+ 0x97, 0x08, 0xac, 0x28, 0x8b, 0x08, 0xac, 0x18, 0x4b, 0x94, 0x7a, 0xc2,
+ 0x32, 0xee, 0x48, 0x1b, 0x0c, 0x42, 0x32, 0xfd, 0xc7, 0xc3, 0x61, 0x08,
+ 0xae, 0x09, 0xc5, 0x33, 0x5d, 0x08, 0xae, 0x01, 0x42, 0x07, 0xb2, 0xc2,
+ 0x33, 0x09, 0xc8, 0x14, 0x38, 0x08, 0xad, 0xe9, 0xcb, 0x1e, 0x89, 0x08,
+ 0xad, 0xe0, 0xc7, 0xc7, 0x3c, 0x01, 0x39, 0x09, 0xc7, 0x18, 0x68, 0x01,
+ 0x16, 0x30, 0xcf, 0x66, 0x66, 0x01, 0x5f, 0x51, 0xd0, 0x5d, 0x52, 0x01,
+ 0x5f, 0x58, 0xcc, 0x7e, 0x49, 0x00, 0x04, 0x31, 0xc5, 0x0d, 0x5c, 0x00,
+ 0x04, 0xc0, 0xc4, 0x1e, 0x97, 0x05, 0x46, 0x21, 0xc5, 0x40, 0xe7, 0x05,
+ 0x44, 0x08, 0x97, 0x05, 0x46, 0x19, 0x8b, 0x05, 0x46, 0x01, 0x83, 0x05,
+ 0x45, 0xa8, 0x91, 0x05, 0x46, 0x10, 0x87, 0x05, 0x45, 0xf8, 0x8e, 0x05,
+ 0x45, 0xe3, 0x02, 0x33, 0x15, 0x94, 0x05, 0x45, 0xd2, 0x02, 0x33, 0x19,
+ 0x8b, 0x05, 0x45, 0xb8, 0x97, 0x05, 0x45, 0xc8, 0xc2, 0x0d, 0xf6, 0x05,
+ 0x45, 0x81, 0x83, 0x05, 0x45, 0x89, 0xc2, 0x00, 0xd0, 0x05, 0x45, 0x78,
+ 0xc2, 0x00, 0xdb, 0x05, 0x45, 0x99, 0x83, 0x05, 0x45, 0x68, 0x83, 0x05,
+ 0x44, 0x19, 0x8b, 0x05, 0x44, 0x71, 0x97, 0x05, 0x44, 0x88, 0x8b, 0x05,
+ 0x44, 0x28, 0x97, 0x05, 0x44, 0x38, 0x47, 0xb2, 0x2e, 0xc2, 0x33, 0x1d,
+ 0x83, 0x05, 0x45, 0x70, 0x87, 0x05, 0x44, 0x68, 0x91, 0x05, 0x44, 0x80,
+ 0x83, 0x05, 0x44, 0x91, 0xc2, 0x00, 0xd0, 0x05, 0x44, 0x98, 0x83, 0x05,
+ 0x44, 0xa1, 0xc2, 0x00, 0xd0, 0x05, 0x44, 0xa8, 0xc2, 0x01, 0x30, 0x05,
+ 0x44, 0xb1, 0xc2, 0x19, 0x2c, 0x05, 0x44, 0xd9, 0xc2, 0x00, 0xc1, 0x05,
+ 0x45, 0x01, 0x83, 0x05, 0x45, 0x28, 0x83, 0x05, 0x44, 0xb9, 0xc2, 0x00,
+ 0xd0, 0x05, 0x44, 0xc0, 0x83, 0x05, 0x44, 0xc9, 0xc2, 0x00, 0xd0, 0x05,
+ 0x44, 0xd0, 0x16, 0xc2, 0x33, 0x2b, 0x83, 0x05, 0x45, 0x09, 0xc2, 0x00,
+ 0xd0, 0x05, 0x45, 0x10, 0x06, 0xc2, 0x33, 0x35, 0x83, 0x05, 0x45, 0x19,
+ 0xc2, 0x00, 0xd0, 0x05, 0x45, 0x20, 0x83, 0x05, 0x45, 0x31, 0xc2, 0x00,
+ 0xd0, 0x05, 0x45, 0x38, 0x83, 0x05, 0x45, 0x41, 0xc2, 0x00, 0xd0, 0x05,
+ 0x45, 0x48, 0xc4, 0x15, 0xe7, 0x05, 0x46, 0x81, 0xc3, 0x05, 0x14, 0x05,
+ 0x46, 0x89, 0x16, 0xc2, 0x33, 0x3f, 0x08, 0xc2, 0x33, 0x4b, 0x15, 0xc2,
+ 0x33, 0x57, 0xc5, 0x06, 0xdb, 0x05, 0x46, 0xc1, 0xc4, 0x26, 0x78, 0x05,
+ 0x46, 0xc8, 0xdd, 0x0a, 0x8a, 0x0f, 0xb3, 0xb9, 0x44, 0x05, 0x9e, 0x42,
+ 0x33, 0x63, 0xe0, 0x0b, 0x27, 0x0f, 0xb3, 0xc0, 0xc4, 0xe3, 0xf3, 0x00,
+ 0x41, 0xf1, 0xc3, 0x0d, 0xe8, 0x00, 0x41, 0x90, 0xc5, 0xd8, 0xe4, 0x00,
+ 0x40, 0xb8, 0x83, 0x00, 0x40, 0xf0, 0x83, 0x00, 0x40, 0xf8, 0xd0, 0x5f,
+ 0xc2, 0x01, 0x54, 0xb8, 0xd0, 0x5f, 0xc2, 0x01, 0x54, 0xc0, 0x07, 0xc2,
+ 0x33, 0x69, 0x44, 0x00, 0xbb, 0xc2, 0x33, 0x75, 0xc9, 0xb0, 0x98, 0x08,
+ 0x8e, 0x69, 0xca, 0xa0, 0x26, 0x08, 0x8e, 0x48, 0xc3, 0xad, 0x77, 0x08,
+ 0x8e, 0xd1, 0xd5, 0x34, 0x64, 0x08, 0x8e, 0x60, 0x45, 0x09, 0x98, 0xc2,
+ 0x33, 0xa3, 0xcb, 0x97, 0xf5, 0x08, 0x8e, 0x31, 0xc4, 0x19, 0x53, 0x08,
+ 0x8e, 0x28, 0x45, 0x00, 0xba, 0xc2, 0x33, 0xc7, 0xcd, 0x7b, 0xbe, 0x08,
+ 0x8e, 0x58, 0xc2, 0x00, 0xd0, 0x08, 0x8d, 0x91, 0x15, 0xc2, 0x33, 0xed,
+ 0x18, 0xc2, 0x33, 0xfd, 0x0e, 0xc2, 0x34, 0x07, 0xc2, 0x00, 0x39, 0x08,
+ 0x8d, 0x59, 0xc2, 0x19, 0x2c, 0x08, 0x8d, 0x51, 0xc2, 0x01, 0xc3, 0x08,
+ 0x8d, 0x49, 0x04, 0xc2, 0x34, 0x11, 0x12, 0xc2, 0x34, 0x1b, 0x10, 0xc2,
+ 0x34, 0x25, 0x06, 0xc2, 0x34, 0x3b, 0x16, 0xc2, 0x34, 0x49, 0x0c, 0xc2,
+ 0x34, 0x57, 0x05, 0xc2, 0x34, 0x61, 0x09, 0xc2, 0x34, 0x6b, 0x0d, 0xc2,
+ 0x34, 0x75, 0x83, 0x08, 0x8c, 0x1b, 0x02, 0x34, 0x7f, 0x91, 0x08, 0x8c,
+ 0x79, 0x87, 0x08, 0x8c, 0x69, 0x97, 0x08, 0x8c, 0x3b, 0x02, 0x34, 0x8b,
+ 0x8b, 0x08, 0x8c, 0x2a, 0x02, 0x34, 0x8f, 0xc2, 0x01, 0xbb, 0x08, 0x22,
+ 0x89, 0x0a, 0x42, 0x34, 0x93, 0x91, 0x08, 0x22, 0xa9, 0xc3, 0x14, 0xc8,
+ 0x08, 0x22, 0xb0, 0x83, 0x08, 0x22, 0xc1, 0x99, 0x08, 0x23, 0xf8, 0xc3,
+ 0x38, 0x86, 0x08, 0x22, 0xc9, 0xc4, 0xe2, 0x3b, 0x08, 0x23, 0x18, 0xc6,
+ 0x14, 0xc5, 0x08, 0x23, 0x01, 0xc3, 0x03, 0x4e, 0x08, 0x23, 0x28, 0x87,
+ 0x08, 0x23, 0x11, 0xc2, 0x00, 0x95, 0x08, 0x23, 0x58, 0x88, 0x08, 0x23,
+ 0x31, 0xc2, 0x00, 0x89, 0x08, 0x23, 0x91, 0xc2, 0x00, 0xd1, 0x08, 0x23,
+ 0xf0, 0xc2, 0x0f, 0xf5, 0x08, 0x23, 0x39, 0x03, 0xc2, 0x34, 0x9f, 0xc2,
+ 0x01, 0xa3, 0x08, 0x23, 0xd8, 0xc2, 0x01, 0xe2, 0x08, 0x23, 0x41, 0xc2,
+ 0x00, 0x58, 0x08, 0x23, 0x49, 0x8a, 0x08, 0x23, 0x69, 0xc2, 0x09, 0x3b,
+ 0x08, 0x23, 0x89, 0xc2, 0x00, 0x28, 0x08, 0x23, 0xb9, 0x14, 0xc2, 0x34,
+ 0xa7, 0xc2, 0x01, 0x29, 0x08, 0x23, 0xd0, 0x90, 0x08, 0x23, 0x51, 0xc2,
+ 0x00, 0xa4, 0x08, 0x23, 0x61, 0xc2, 0x06, 0x4e, 0x08, 0x23, 0xa1, 0xc3,
+ 0x0a, 0xe1, 0x08, 0x23, 0xa9, 0xc2, 0x00, 0x71, 0x08, 0x23, 0xb1, 0x94,
+ 0x08, 0x23, 0xc8, 0xe0, 0x09, 0xe7, 0x01, 0x4a, 0x20, 0xcd, 0x80, 0x6a,
+ 0x01, 0x57, 0x38, 0x00, 0x42, 0x34, 0xb1, 0xd6, 0x2e, 0x54, 0x01, 0x5a,
+ 0x79, 0x4c, 0x81, 0xa5, 0x42, 0x34, 0xbd, 0x00, 0x42, 0x34, 0xc3, 0xc3,
+ 0xe5, 0x8a, 0x0f, 0xb3, 0x09, 0xc9, 0xb4, 0x91, 0x0f, 0xb2, 0xc9, 0xc4,
+ 0x47, 0x23, 0x0f, 0xb2, 0x88, 0xc7, 0x10, 0x9c, 0x01, 0x5b, 0xc8, 0x00,
+ 0x42, 0x34, 0xcf, 0xc3, 0xe5, 0x8a, 0x0f, 0xb3, 0x19, 0xc9, 0xb4, 0x91,
+ 0x0f, 0xb2, 0xd9, 0xc4, 0x47, 0x23, 0x0f, 0xb2, 0x98, 0xc7, 0x10, 0x9c,
+ 0x01, 0x5b, 0xc0, 0xc2, 0x00, 0xd0, 0x08, 0xd3, 0x49, 0x83, 0x08, 0xd3,
+ 0x40, 0xc2, 0x00, 0xd0, 0x08, 0xd3, 0xb1, 0x83, 0x08, 0xd3, 0xa8, 0xc2,
+ 0x00, 0xd0, 0x08, 0xd3, 0x39, 0x83, 0x08, 0xd3, 0x30, 0x8e, 0x08, 0xd2,
+ 0xd1, 0x94, 0x08, 0xd2, 0xc8, 0x97, 0x08, 0xd2, 0xc1, 0x8b, 0x08, 0xd2,
+ 0xb8, 0x87, 0x08, 0xd2, 0xb0, 0x87, 0x08, 0xd2, 0x90, 0xca, 0x50, 0x5e,
+ 0x08, 0x7a, 0xb0, 0xc3, 0x77, 0x79, 0x08, 0x79, 0xf9, 0xc4, 0xdc, 0x2d,
+ 0x08, 0x79, 0xe0, 0xc5, 0xcf, 0xae, 0x0f, 0xbc, 0xb1, 0xc2, 0x00, 0x45,
+ 0x01, 0x99, 0x39, 0xc2, 0xd4, 0x88, 0x01, 0x9c, 0xa0, 0x11, 0xc2, 0x34,
+ 0xe7, 0x8f, 0x01, 0x9c, 0xc8, 0x44, 0x00, 0x8d, 0xc2, 0x34, 0xf3, 0xc4,
+ 0x89, 0x91, 0x01, 0x9a, 0xb9, 0x84, 0x01, 0x9e, 0xe8, 0x11, 0xc2, 0x35,
+ 0x2d, 0xd5, 0x32, 0x2d, 0x01, 0x56, 0x69, 0x8f, 0x01, 0x9e, 0x81, 0x90,
+ 0x01, 0x9e, 0x89, 0x9a, 0x01, 0x9e, 0x98, 0xca, 0x27, 0xba, 0x01, 0x14,
+ 0x83, 0x02, 0x35, 0x37, 0xc3, 0x67, 0x21, 0x01, 0x98, 0x49, 0xc3, 0x14,
+ 0x47, 0x01, 0x98, 0x51, 0x98, 0x01, 0x9b, 0xa8, 0xc7, 0x3c, 0x51, 0x01,
+ 0x14, 0x7b, 0x02, 0x35, 0x3d, 0x90, 0x01, 0x9e, 0x63, 0x02, 0x35, 0x43,
+ 0x97, 0x01, 0x9b, 0xd0, 0xc2, 0x02, 0xae, 0x01, 0x14, 0xa1, 0x03, 0xc2,
+ 0x35, 0x4f, 0x85, 0x01, 0x9e, 0x21, 0x86, 0x01, 0x9e, 0x29, 0xc8, 0xb5,
+ 0x42, 0x01, 0x9e, 0x31, 0x91, 0x01, 0x9e, 0x3b, 0x02, 0x35, 0x57, 0x8f,
+ 0x01, 0x9c, 0xea, 0x02, 0x35, 0x5d, 0xc3, 0x65, 0xba, 0x01, 0x10, 0xd1,
+ 0x0b, 0xc2, 0x35, 0x61, 0x17, 0xc2, 0x35, 0x73, 0x07, 0xc2, 0x35, 0x7f,
+ 0xc2, 0x01, 0x9d, 0x01, 0x9d, 0x6a, 0x02, 0x35, 0x8b, 0xcc, 0x86, 0xc1,
+ 0x0f, 0x90, 0x01, 0x89, 0x01, 0x96, 0x61, 0x83, 0x01, 0x9e, 0x53, 0x02,
+ 0x35, 0x94, 0x17, 0xc2, 0x35, 0x9a, 0x07, 0xc2, 0x35, 0xac, 0x11, 0xc2,
+ 0x35, 0xb8, 0x92, 0x01, 0x9e, 0x5b, 0x02, 0x35, 0xc0, 0x9c, 0x01, 0x9c,
+ 0x80, 0x8c, 0x0f, 0x8c, 0x81, 0x83, 0x01, 0x9b, 0x93, 0x02, 0x35, 0xc4,
+ 0xc3, 0x13, 0x6e, 0x01, 0x99, 0x29, 0xc3, 0x27, 0x49, 0x01, 0x99, 0x31,
+ 0x84, 0x01, 0x9e, 0x41, 0x8f, 0x01, 0x9b, 0xbb, 0x02, 0x35, 0xca, 0x8e,
+ 0x01, 0x9c, 0xb8, 0x11, 0xc2, 0x35, 0xce, 0x83, 0x01, 0x9d, 0x4b, 0x02,
+ 0x35, 0xe0, 0x0b, 0xc2, 0x35, 0xea, 0x07, 0xc2, 0x35, 0xf4, 0x8a, 0x01,
+ 0x9e, 0xb9, 0x8f, 0x01, 0x9e, 0xc1, 0xc2, 0x4c, 0x90, 0x01, 0x9e, 0xc9,
+ 0x94, 0x01, 0x9e, 0xd1, 0x85, 0x01, 0x9b, 0xb1, 0x88, 0x01, 0x9c, 0x51,
+ 0x95, 0x01, 0x9d, 0x81, 0x98, 0x01, 0x9d, 0xa1, 0x99, 0x01, 0x9d, 0xd0,
+ 0x14, 0xc2, 0x36, 0x04, 0x98, 0x01, 0x96, 0x71, 0xc7, 0xc5, 0x6e, 0x01,
+ 0x98, 0x39, 0xc4, 0x90, 0x43, 0x01, 0x98, 0x40, 0xc5, 0xd4, 0x07, 0x01,
+ 0x98, 0x01, 0xc5, 0xdc, 0xa4, 0x01, 0x98, 0x09, 0xc4, 0xe4, 0x4f, 0x01,
+ 0x98, 0x11, 0xc3, 0x3d, 0x51, 0x01, 0x98, 0x19, 0x97, 0x01, 0x9b, 0x99,
+ 0x8f, 0x01, 0x9e, 0x11, 0xc7, 0x23, 0x58, 0x01, 0x9e, 0xf8, 0x83, 0x01,
+ 0x9c, 0x23, 0x02, 0x36, 0x0e, 0xc5, 0xd9, 0x0c, 0x01, 0x98, 0x91, 0xc3,
+ 0x1a, 0x05, 0x01, 0x98, 0xa3, 0x02, 0x36, 0x18, 0x42, 0x00, 0x33, 0xc2,
+ 0x36, 0x2a, 0xc4, 0x2b, 0x09, 0x01, 0x98, 0xe1, 0x11, 0xc2, 0x36, 0x36,
+ 0x89, 0x01, 0x9c, 0x79, 0x8d, 0x01, 0x9e, 0x69, 0x8f, 0x01, 0x9c, 0xf3,
+ 0x02, 0x36, 0x42, 0x96, 0x01, 0x9e, 0x79, 0x84, 0x01, 0x9c, 0x29, 0xc3,
+ 0x00, 0x64, 0x01, 0x9c, 0x49, 0xc2, 0xd4, 0x88, 0x01, 0x9c, 0x89, 0x8e,
+ 0x01, 0x9c, 0xc1, 0xc2, 0x00, 0xb0, 0x01, 0x9d, 0x51, 0x98, 0x01, 0x9d,
+ 0xc1, 0x99, 0x01, 0x9d, 0xf1, 0xc4, 0xe3, 0xb3, 0x01, 0x9e, 0x00, 0x03,
+ 0xc2, 0x36, 0x46, 0x0b, 0xc2, 0x36, 0x56, 0xc5, 0xd2, 0x2a, 0x01, 0x98,
+ 0xc3, 0x02, 0x36, 0x68, 0x9b, 0x01, 0x9e, 0x49, 0x84, 0x01, 0x9c, 0x39,
+ 0xc2, 0xd4, 0x88, 0x01, 0x9c, 0x99, 0xc2, 0x00, 0xb0, 0x01, 0x9d, 0x60,
+ 0x03, 0xc2, 0x36, 0x6e, 0xc6, 0xd3, 0x31, 0x01, 0x99, 0x09, 0x43, 0x00,
+ 0xc4, 0xc2, 0x36, 0x7a, 0x94, 0x01, 0x9e, 0xd9, 0x98, 0x01, 0x9e, 0xe0,
+ 0x83, 0x01, 0x9c, 0x0b, 0x02, 0x36, 0x82, 0xc4, 0x07, 0x9b, 0x01, 0x99,
+ 0x49, 0x88, 0x01, 0x9c, 0x59, 0x8f, 0x01, 0x9c, 0xd1, 0x95, 0x01, 0x9d,
+ 0x89, 0x98, 0x01, 0x9d, 0xa9, 0x99, 0x01, 0x9d, 0xd8, 0x03, 0xc2, 0x36,
+ 0x88, 0xc3, 0xcd, 0xc8, 0x01, 0x99, 0x89, 0xc7, 0xc6, 0x86, 0x01, 0x99,
+ 0xa1, 0xc4, 0xe2, 0xdb, 0x01, 0x99, 0xe1, 0xc5, 0xde, 0x07, 0x01, 0x99,
+ 0xf1, 0x93, 0x01, 0x9e, 0x18, 0x83, 0x01, 0x9c, 0x1b, 0x02, 0x36, 0x92,
+ 0x0b, 0xc2, 0x36, 0xa8, 0x07, 0xc2, 0x36, 0xbb, 0x42, 0x03, 0x53, 0xc2,
+ 0x36, 0xca, 0x89, 0x01, 0x9c, 0x71, 0x00, 0xc2, 0x36, 0xea, 0x84, 0x01,
+ 0x9c, 0x33, 0x02, 0x36, 0xfa, 0xc2, 0x00, 0x95, 0x01, 0x9e, 0xb1, 0xc2,
+ 0xd4, 0x88, 0x01, 0x9c, 0x91, 0x8e, 0x01, 0x9c, 0xb1, 0x8f, 0x01, 0x9c,
+ 0xe3, 0x02, 0x37, 0x00, 0xc2, 0x00, 0xb0, 0x01, 0x9d, 0x59, 0x95, 0x01,
+ 0x9d, 0x99, 0x98, 0x01, 0x9d, 0xbb, 0x02, 0x37, 0x04, 0x99, 0x01, 0x9d,
+ 0xea, 0x02, 0x37, 0x0a, 0x42, 0x04, 0xc6, 0xc2, 0x37, 0x10, 0xc3, 0x93,
+ 0x9b, 0x01, 0x9a, 0x80, 0x11, 0xc2, 0x37, 0x1c, 0x45, 0x0b, 0x12, 0x42,
+ 0x37, 0x28, 0xc6, 0x13, 0x52, 0x01, 0x36, 0xe9, 0xc2, 0x00, 0xa6, 0x0f,
+ 0x8d, 0x51, 0xc6, 0xd2, 0x1d, 0x0f, 0x8d, 0x19, 0x07, 0xc2, 0x37, 0x34,
+ 0xc2, 0x07, 0xa3, 0x0f, 0x8c, 0xc1, 0xc5, 0x0b, 0x0a, 0x01, 0x4e, 0x41,
+ 0xcb, 0x12, 0x2e, 0x01, 0x4e, 0x39, 0x86, 0x0f, 0x8a, 0x61, 0x95, 0x0f,
+ 0x8a, 0x68, 0xc2, 0x17, 0x28, 0x01, 0x35, 0xf9, 0x48, 0xbc, 0xc2, 0x42,
+ 0x37, 0x40, 0xc4, 0x03, 0x4e, 0x01, 0x15, 0x01, 0x19, 0xc2, 0x37, 0x52,
+ 0xc6, 0x02, 0xde, 0x0f, 0x8c, 0xd8, 0xc4, 0x1d, 0x1e, 0x01, 0x14, 0xf9,
+ 0x98, 0x0f, 0x8a, 0x58, 0xc3, 0x25, 0xd6, 0x01, 0x14, 0xf1, 0xc2, 0x52,
+ 0xdc, 0x0f, 0x8a, 0x70, 0x55, 0x30, 0x23, 0xc2, 0x37, 0x5e, 0xc3, 0x8d,
+ 0x08, 0x0f, 0x8c, 0x91, 0x8e, 0x0f, 0x8c, 0x88, 0xc2, 0x00, 0x6c, 0x0f,
+ 0x8d, 0x61, 0x95, 0x0f, 0x8c, 0xd0, 0xc2, 0x7e, 0x61, 0x0f, 0x8d, 0x59,
+ 0xd7, 0x28, 0xcd, 0x0f, 0x8c, 0xc8, 0xc5, 0xd8, 0x62, 0x0f, 0x8d, 0x41,
+ 0xc2, 0x02, 0xbc, 0x0f, 0x8d, 0x39, 0x98, 0x0f, 0x8a, 0x51, 0x85, 0x0f,
+ 0x8d, 0x30, 0xd3, 0x40, 0x54, 0x0f, 0x8d, 0x21, 0x8d, 0x0f, 0x8c, 0xb8,
+ 0xcd, 0x77, 0x60, 0x0f, 0x8d, 0x01, 0x44, 0x09, 0x9e, 0xc2, 0x37, 0x78,
+ 0xc3, 0x02, 0xdf, 0x0f, 0x8c, 0x99, 0xd5, 0x35, 0x0c, 0x01, 0x4e, 0x28,
+ 0x89, 0x0f, 0x8c, 0xb1, 0xc2, 0x04, 0xe6, 0x0f, 0x8c, 0xa8, 0xc9, 0x2a,
+ 0xec, 0x01, 0x21, 0x30, 0xc2, 0x00, 0x74, 0x01, 0x20, 0x79, 0xc3, 0x00,
+ 0xa3, 0x01, 0x20, 0x70, 0xc4, 0x27, 0xe3, 0x01, 0x20, 0x11, 0xc7, 0xc3,
+ 0x92, 0x01, 0x20, 0x08, 0xc4, 0x6e, 0x67, 0x01, 0x21, 0x0b, 0x02, 0x37,
+ 0x82, 0x4d, 0x7e, 0xd7, 0x42, 0x37, 0x88, 0xc5, 0xd5, 0xc4, 0x01, 0x21,
+ 0x21, 0xd2, 0x49, 0x43, 0x01, 0x20, 0xa8, 0x45, 0x0a, 0x11, 0xc2, 0x37,
+ 0x98, 0xc5, 0xd6, 0x0f, 0x01, 0x20, 0x28, 0x49, 0xb3, 0xd4, 0xc2, 0x37,
+ 0xa2, 0xc2, 0x03, 0x4e, 0x00, 0x39, 0x08, 0x46, 0xcd, 0xbb, 0x42, 0x37,
+ 0xca, 0xc2, 0x39, 0x8b, 0x00, 0x39, 0x61, 0xc3, 0x1e, 0x1b, 0x00, 0x38,
+ 0xda, 0x02, 0x37, 0xdc, 0xc3, 0x11, 0xef, 0x00, 0x39, 0x59, 0xc4, 0x77,
+ 0x78, 0x00, 0x39, 0x41, 0xc6, 0x7e, 0x1b, 0x00, 0x39, 0x19, 0xd0, 0x58,
+ 0x42, 0x00, 0x38, 0x89, 0x47, 0xc9, 0x57, 0x42, 0x37, 0xe2, 0xc3, 0x04,
+ 0x5a, 0x00, 0x39, 0x51, 0xca, 0x9d, 0xc4, 0x00, 0x39, 0x38, 0xc3, 0x11,
+ 0x38, 0x00, 0x38, 0xf0, 0xc2, 0x00, 0x8e, 0x00, 0x38, 0xd0, 0xd2, 0x49,
+ 0x79, 0x00, 0x38, 0xb1, 0xc5, 0x49, 0x81, 0x00, 0x38, 0xa8, 0xc9, 0xad,
+ 0xa4, 0x00, 0x38, 0xa0, 0x00, 0xc2, 0x37, 0xf4, 0xcd, 0x75, 0x7f, 0x00,
+ 0x39, 0xe0, 0xca, 0xa1, 0x02, 0x00, 0x38, 0x69, 0xc9, 0xaa, 0x71, 0x00,
+ 0x38, 0x61, 0xc6, 0xaa, 0x74, 0x00, 0x38, 0x58, 0xc5, 0x05, 0x02, 0x00,
+ 0x39, 0xb9, 0xc5, 0x00, 0xd4, 0x00, 0x39, 0xb0, 0xc5, 0x00, 0x2c, 0x00,
+ 0x38, 0x39, 0xc4, 0x00, 0x49, 0x00, 0x38, 0x30, 0xc5, 0x33, 0x24, 0x00,
+ 0x38, 0x23, 0x02, 0x38, 0x00, 0xc9, 0x11, 0xf6, 0x00, 0x38, 0x10, 0xc5,
+ 0x33, 0x24, 0x00, 0x38, 0x1b, 0x02, 0x38, 0x06, 0xc9, 0x11, 0xf6, 0x00,
+ 0x38, 0x08, 0xc5, 0x00, 0xd4, 0x00, 0x39, 0xe9, 0xc5, 0x05, 0x02, 0x00,
+ 0x39, 0xf0, 0xc5, 0x00, 0xd4, 0x00, 0x3a, 0x19, 0xc5, 0x05, 0x02, 0x00,
+ 0x3a, 0x20, 0xc5, 0x00, 0xd4, 0x00, 0x3a, 0x29, 0xc5, 0x05, 0x02, 0x00,
+ 0x3a, 0x30, 0xc2, 0x01, 0x23, 0x05, 0x40, 0x89, 0x91, 0x05, 0x40, 0x80,
+ 0x91, 0x05, 0x40, 0x91, 0xc2, 0x01, 0x23, 0x05, 0x40, 0x98, 0xd1, 0x52,
+ 0xff, 0x0f, 0xa8, 0x51, 0xce, 0x6f, 0x1c, 0x0f, 0xa8, 0x49, 0xd3, 0x23,
+ 0xc8, 0x0f, 0xa8, 0x38, 0x00, 0x42, 0x38, 0x0c, 0xcf, 0x09, 0xf8, 0x01,
+ 0x4b, 0xd9, 0x42, 0x06, 0x62, 0x42, 0x38, 0x21, 0xc3, 0x02, 0xa3, 0x01,
+ 0x55, 0xf1, 0xcf, 0x60, 0xf3, 0x01, 0x56, 0x01, 0xd9, 0x1f, 0x18, 0x01,
+ 0x56, 0x10, 0xc6, 0x0e, 0xa4, 0x01, 0x56, 0xb9, 0xde, 0x0e, 0x8c, 0x01,
+ 0x56, 0xc0, 0x52, 0x47, 0xdb, 0xc2, 0x38, 0x2d, 0xcf, 0x1d, 0xed, 0x01,
+ 0x03, 0xe8, 0xca, 0x0e, 0xbe, 0x01, 0x03, 0xe1, 0xc4, 0x00, 0x2d, 0x01,
+ 0x03, 0xc0, 0xc4, 0x18, 0x10, 0x01, 0x03, 0xb9, 0xc2, 0x22, 0xcc, 0x01,
+ 0x03, 0xb0, 0xc3, 0x0d, 0x14, 0x01, 0x03, 0xa9, 0xc3, 0x09, 0x9e, 0x01,
+ 0x03, 0xa0, 0xc2, 0x02, 0xa0, 0x00, 0x05, 0x91, 0xc4, 0x02, 0xde, 0x00,
+ 0x05, 0x98, 0xc6, 0xca, 0xf7, 0x00, 0xe6, 0x11, 0xc7, 0xc6, 0x2b, 0x00,
+ 0xe6, 0x08, 0x45, 0x21, 0xed, 0xc2, 0x38, 0x35, 0x83, 0x00, 0xdc, 0xb0,
+ 0xc2, 0x00, 0xd0, 0x00, 0xdd, 0xe9, 0x83, 0x00, 0xdc, 0xc0, 0xc2, 0x2c,
+ 0x43, 0x00, 0xdd, 0xe1, 0x83, 0x00, 0xdc, 0xe0, 0xc2, 0x2c, 0x43, 0x00,
+ 0xdd, 0xd9, 0x83, 0x00, 0xdc, 0xd8, 0xc2, 0x19, 0x2c, 0x00, 0xdd, 0x79,
+ 0x83, 0x00, 0xdc, 0xf0, 0xc2, 0x00, 0xd0, 0x00, 0xdd, 0x71, 0x83, 0x00,
+ 0xdc, 0x50, 0x83, 0x00, 0xdc, 0xa1, 0xc2, 0x19, 0x2c, 0x00, 0xdc, 0x89,
+ 0xc2, 0x01, 0x30, 0x00, 0xdc, 0x68, 0x97, 0x00, 0xdc, 0x48, 0x87, 0x00,
+ 0xdc, 0x30, 0xc4, 0x18, 0x10, 0x00, 0xdd, 0xb9, 0xc2, 0x22, 0xcc, 0x00,
+ 0xdd, 0xb0, 0xc3, 0x0d, 0x14, 0x00, 0xdd, 0xa9, 0xc3, 0x09, 0x9e, 0x00,
+ 0xdd, 0xa0, 0xc4, 0x02, 0xde, 0x00, 0xdd, 0x99, 0xc2, 0x02, 0xa0, 0x00,
+ 0xdd, 0x90, 0xc2, 0x01, 0x4a, 0x00, 0xdd, 0x69, 0xc2, 0x01, 0xc3, 0x00,
+ 0xdd, 0x60, 0xc3, 0xd7, 0xd6, 0x00, 0xdd, 0x19, 0xc4, 0x89, 0x32, 0x00,
+ 0xdd, 0x10, 0xc5, 0xdb, 0xc3, 0x00, 0xdd, 0x51, 0x10, 0x42, 0x38, 0x3d,
+ 0xc7, 0xc6, 0x08, 0x00, 0xdd, 0x49, 0xc5, 0x0d, 0xe4, 0x00, 0xdd, 0x39,
+ 0xc7, 0xc3, 0xbc, 0x00, 0xdd, 0x31, 0xc4, 0xde, 0xff, 0x00, 0xdd, 0x29,
+ 0xc5, 0xd8, 0x9e, 0x00, 0xdd, 0x20, 0xcb, 0x0e, 0xbd, 0x01, 0x55, 0x81,
+ 0xcc, 0x24, 0x47, 0x01, 0x55, 0x90, 0xc8, 0x07, 0x5f, 0x01, 0x55, 0xb1,
+ 0xcf, 0x6a, 0x8f, 0x01, 0x55, 0xd0, 0xd1, 0x55, 0x52, 0x01, 0x14, 0x51,
+ 0xcb, 0x23, 0xa0, 0x01, 0x14, 0x33, 0x02, 0x38, 0x47, 0x46, 0x00, 0xd4,
+ 0x42, 0x38, 0x4d, 0xc6, 0x2d, 0xd0, 0x01, 0x56, 0x99, 0xc4, 0x0e, 0xa6,
+ 0x01, 0x56, 0xa8, 0xca, 0x22, 0x09, 0x0f, 0xb0, 0x1b, 0x02, 0x38, 0x65,
+ 0x0a, 0xc2, 0x38, 0x6b, 0x15, 0xc2, 0x38, 0x7d, 0xc4, 0x21, 0x23, 0x0f,
+ 0xcb, 0x90, 0xca, 0x22, 0x09, 0x0f, 0xb1, 0xd1, 0xd1, 0x55, 0x0e, 0x0f,
+ 0xb1, 0xd8, 0x47, 0xc2, 0x11, 0xc2, 0x38, 0x8c, 0x42, 0x0a, 0x8c, 0xc2,
+ 0x38, 0x98, 0xc3, 0x0d, 0xe5, 0x07, 0xf2, 0xa8, 0xc9, 0x81, 0x9c, 0x01,
+ 0x10, 0x53, 0x02, 0x38, 0xa2, 0xcf, 0x0f, 0x0a, 0x07, 0xf2, 0xb9, 0xc6,
+ 0xbc, 0x34, 0x07, 0xf2, 0xc1, 0xca, 0x0e, 0xbe, 0x07, 0xf3, 0x30, 0x4d,
+ 0x78, 0x7e, 0xc2, 0x38, 0xa8, 0x45, 0x00, 0x2d, 0xc2, 0x38, 0xc7, 0xce,
+ 0x61, 0xd5, 0x07, 0xf3, 0x40, 0xe0, 0x05, 0x07, 0x08, 0x59, 0xd9, 0xc4,
+ 0x1e, 0xc9, 0x00, 0x16, 0xe0, 0xc7, 0x2e, 0x21, 0x0f, 0xb7, 0x49, 0xc8,
+ 0x36, 0x21, 0x07, 0xf3, 0x01, 0xc7, 0x0b, 0x00, 0x07, 0xf3, 0x08, 0x43,
+ 0x00, 0x4b, 0xc2, 0x38, 0xd9, 0xcc, 0x8b, 0x11, 0x07, 0xf3, 0x20, 0xc8,
+ 0x60, 0xf4, 0x07, 0xf3, 0x11, 0xcb, 0x8e, 0x13, 0x07, 0xf3, 0x50, 0x9f,
+ 0x00, 0x04, 0x91, 0x9e, 0x00, 0x04, 0x88, 0xc3, 0x02, 0x9f, 0x00, 0x04,
+ 0x91, 0xc3, 0x05, 0x14, 0x00, 0x04, 0x88, 0xc5, 0xd7, 0xa4, 0x0f, 0xad,
+ 0xb0, 0xca, 0x37, 0x4e, 0x01, 0x13, 0xf1, 0xc5, 0x07, 0x62, 0x01, 0x13,
+ 0xe0, 0x4c, 0x24, 0x3b, 0xc2, 0x38, 0xeb, 0xcb, 0x0e, 0xbd, 0x01, 0x55,
+ 0x99, 0x44, 0x1f, 0xb2, 0xc2, 0x38, 0xf7, 0xcf, 0x6a, 0x8f, 0x01, 0x55,
+ 0xb8, 0xc3, 0x0d, 0xe5, 0x07, 0xf0, 0x99, 0xc3, 0x0a, 0x8c, 0x07, 0xf0,
+ 0x80, 0xcf, 0x0f, 0x0a, 0x07, 0xf0, 0xa9, 0xc6, 0xbc, 0x34, 0x07, 0xf1,
+ 0x89, 0xc6, 0xcb, 0x5d, 0x07, 0xf1, 0x90, 0x44, 0x00, 0x4a, 0xc2, 0x39,
+ 0x03, 0xc7, 0x80, 0x2f, 0x07, 0xf1, 0x98, 0xcb, 0x1a, 0x50, 0x07, 0xf1,
+ 0xb1, 0x05, 0xc2, 0x39, 0x31, 0xd6, 0x08, 0x88, 0x07, 0xf1, 0xd1, 0xd8,
+ 0x21, 0x83, 0x07, 0xf1, 0xe1, 0xd4, 0x38, 0xf4, 0x07, 0xf1, 0xf1, 0xce,
+ 0x25, 0xad, 0x07, 0xf2, 0x41, 0x46, 0x01, 0xfc, 0xc2, 0x39, 0x3d, 0xcd,
+ 0x0b, 0x91, 0x07, 0xf2, 0x00, 0xc5, 0x0a, 0x8a, 0x07, 0xf0, 0x89, 0xc9,
+ 0x11, 0xf6, 0x07, 0xf0, 0x90, 0xc3, 0x00, 0x3a, 0x0f, 0x85, 0x01, 0xca,
+ 0xa6, 0x98, 0x0f, 0x86, 0x78, 0xc6, 0xcf, 0x05, 0x0f, 0x85, 0x09, 0xc6,
+ 0x78, 0x78, 0x0f, 0x85, 0x89, 0xc8, 0xba, 0x2a, 0x0f, 0x86, 0x09, 0xc5,
+ 0xdd, 0x49, 0x0f, 0x86, 0x88, 0x46, 0xd2, 0xe9, 0xc2, 0x39, 0x49, 0x48,
+ 0xbe, 0x4a, 0xc2, 0x39, 0x61, 0x46, 0xa8, 0xfa, 0xc2, 0x39, 0x79, 0x45,
+ 0xdc, 0xf9, 0x42, 0x39, 0x91, 0x11, 0xc2, 0x39, 0xbb, 0x47, 0xc7, 0x2e,
+ 0x42, 0x39, 0xc7, 0x46, 0xd1, 0xf9, 0xc2, 0x39, 0xdf, 0x48, 0xb5, 0x32,
+ 0x42, 0x39, 0xf7, 0xc6, 0xcf, 0x05, 0x0f, 0x85, 0x41, 0xc6, 0x78, 0x78,
+ 0x0f, 0x85, 0xc1, 0xc8, 0xba, 0x2a, 0x0f, 0x86, 0x41, 0xc5, 0xdd, 0x49,
+ 0x0f, 0x86, 0xc0, 0xc6, 0xcf, 0x05, 0x0f, 0x85, 0x49, 0xc6, 0x78, 0x78,
+ 0x0f, 0x85, 0xc9, 0xc8, 0xba, 0x2a, 0x0f, 0x86, 0x49, 0xc5, 0xdd, 0x49,
+ 0x0f, 0x86, 0xc8, 0xc6, 0xcf, 0x05, 0x0f, 0x85, 0x59, 0xc6, 0x78, 0x78,
+ 0x0f, 0x85, 0xd9, 0xc8, 0xba, 0x2a, 0x0f, 0x86, 0x59, 0xc5, 0xdd, 0x49,
+ 0x0f, 0x86, 0xd8, 0x49, 0xae, 0x46, 0xc2, 0x3a, 0x0f, 0x47, 0x35, 0xce,
+ 0x42, 0x3a, 0x27, 0xc6, 0xcf, 0x05, 0x0f, 0x85, 0x69, 0xc6, 0x78, 0x78,
+ 0x0f, 0x85, 0xe9, 0xc8, 0xba, 0x2a, 0x0f, 0x86, 0x69, 0xc5, 0xdd, 0x49,
+ 0x0f, 0x86, 0xe8, 0xc2, 0x02, 0xa0, 0x01, 0x5e, 0x99, 0xc4, 0x02, 0xde,
+ 0x01, 0x5e, 0xa0, 0xc3, 0x09, 0x9e, 0x01, 0x5e, 0xa9, 0xc3, 0x0d, 0x14,
+ 0x01, 0x5e, 0xb0, 0x43, 0x03, 0x35, 0xc2, 0x3a, 0x3f, 0x45, 0x00, 0x8c,
+ 0xc2, 0x3a, 0x51, 0xd1, 0x0e, 0xb7, 0x01, 0x53, 0x90, 0xcb, 0x90, 0xe9,
+ 0x0f, 0xae, 0xf9, 0xc3, 0x00, 0x33, 0x0f, 0xa6, 0x18, 0x45, 0x02, 0x6d,
+ 0xc2, 0x3a, 0x6d, 0xcc, 0x43, 0x07, 0x01, 0x10, 0x10, 0x9c, 0x01, 0x25,
+ 0xa9, 0x9b, 0x01, 0x25, 0xa1, 0x9a, 0x01, 0x25, 0x99, 0x99, 0x01, 0x25,
+ 0x91, 0x98, 0x01, 0x25, 0x89, 0x97, 0x01, 0x25, 0x81, 0x96, 0x01, 0x25,
+ 0x79, 0x95, 0x01, 0x25, 0x71, 0x94, 0x01, 0x25, 0x69, 0x93, 0x01, 0x25,
+ 0x61, 0x92, 0x01, 0x25, 0x59, 0x91, 0x01, 0x25, 0x51, 0x90, 0x01, 0x25,
+ 0x49, 0x8f, 0x01, 0x25, 0x41, 0x8e, 0x01, 0x25, 0x39, 0x8d, 0x01, 0x25,
+ 0x31, 0x8c, 0x01, 0x25, 0x29, 0x8b, 0x01, 0x25, 0x21, 0x8a, 0x01, 0x25,
+ 0x19, 0x89, 0x01, 0x25, 0x11, 0x88, 0x01, 0x25, 0x09, 0x87, 0x01, 0x25,
+ 0x01, 0x86, 0x01, 0x24, 0xf9, 0x85, 0x01, 0x24, 0xf1, 0x84, 0x01, 0x24,
+ 0xe9, 0x83, 0x01, 0x24, 0xe0, 0x99, 0x0f, 0x89, 0x31, 0x9a, 0x0f, 0x89,
+ 0x39, 0x9b, 0x0f, 0x89, 0x41, 0x9c, 0x0f, 0x89, 0x49, 0x83, 0x0f, 0x88,
+ 0x81, 0x84, 0x0f, 0x88, 0x89, 0x85, 0x0f, 0x88, 0x91, 0x86, 0x0f, 0x88,
+ 0x99, 0x87, 0x0f, 0x88, 0xa1, 0x88, 0x0f, 0x88, 0xa9, 0x89, 0x0f, 0x88,
+ 0xb1, 0x8a, 0x0f, 0x88, 0xb9, 0x8b, 0x0f, 0x88, 0xc1, 0x8c, 0x0f, 0x88,
+ 0xc9, 0x8d, 0x0f, 0x88, 0xd1, 0x8e, 0x0f, 0x88, 0xd9, 0x8f, 0x0f, 0x88,
+ 0xe1, 0x90, 0x0f, 0x88, 0xe9, 0x91, 0x0f, 0x88, 0xf1, 0x92, 0x0f, 0x88,
+ 0xf9, 0x93, 0x0f, 0x89, 0x01, 0x94, 0x0f, 0x89, 0x09, 0x95, 0x0f, 0x89,
+ 0x11, 0x96, 0x0f, 0x89, 0x19, 0x97, 0x0f, 0x89, 0x21, 0x98, 0x0f, 0x89,
+ 0x28, 0x42, 0x00, 0x28, 0xc2, 0x3a, 0x85, 0xc7, 0x52, 0xcc, 0x01, 0x24,
+ 0x01, 0xc2, 0x00, 0xc4, 0x01, 0x23, 0xe8, 0xc7, 0x1f, 0x6e, 0x01, 0x24,
+ 0x29, 0xc5, 0x66, 0xb1, 0x01, 0x23, 0xf0, 0xc8, 0x48, 0x23, 0x01, 0x24,
+ 0x21, 0xc6, 0x44, 0x9c, 0x01, 0x24, 0x18, 0xc6, 0x14, 0x07, 0x01, 0x24,
+ 0x11, 0xc7, 0x34, 0x37, 0x01, 0x24, 0x08, 0xc4, 0x18, 0x10, 0x01, 0x23,
+ 0xd1, 0xc2, 0x22, 0xcc, 0x01, 0x23, 0xc8, 0xc3, 0x0d, 0x14, 0x01, 0x23,
+ 0xc1, 0xc3, 0x09, 0x9e, 0x01, 0x23, 0xb8, 0xc4, 0x02, 0xde, 0x01, 0x23,
+ 0xb1, 0xc2, 0x02, 0xa0, 0x01, 0x23, 0xa8, 0xc5, 0x8e, 0xdf, 0x01, 0x90,
+ 0x03, 0x02, 0x3a, 0x91, 0xc6, 0xbb, 0xec, 0x01, 0x90, 0x52, 0x02, 0x3a,
+ 0x97, 0xc2, 0x00, 0xd3, 0x01, 0x90, 0x78, 0xc5, 0xc0, 0x7d, 0x01, 0x90,
+ 0x13, 0x02, 0x3a, 0x9d, 0xc6, 0xc1, 0x86, 0x01, 0x90, 0x5a, 0x02, 0x3a,
+ 0xa3, 0xc2, 0x00, 0xd3, 0x01, 0x90, 0x88, 0xc2, 0x00, 0xd3, 0x01, 0x90,
+ 0x90, 0xc4, 0x79, 0xf3, 0x01, 0x90, 0x2b, 0x02, 0x3a, 0xa9, 0xc6, 0xba,
+ 0x7c, 0x01, 0x90, 0x62, 0x02, 0x3a, 0xaf, 0xc2, 0x00, 0xd3, 0x01, 0x90,
+ 0xa0, 0xc2, 0x00, 0xd3, 0x01, 0x90, 0xa8, 0xc4, 0xc6, 0x7a, 0x01, 0x90,
+ 0x43, 0x02, 0x3a, 0xb5, 0xc6, 0xc6, 0x79, 0x01, 0x90, 0x4a, 0x02, 0x3a,
+ 0xb9, 0xc2, 0x00, 0xd3, 0x01, 0x90, 0xd8, 0xc2, 0x02, 0xa0, 0x01, 0x91,
+ 0x09, 0xc4, 0x02, 0xde, 0x01, 0x91, 0x11, 0xc2, 0x00, 0xc4, 0x01, 0x91,
+ 0x48, 0xc3, 0x09, 0x9e, 0x01, 0x91, 0x19, 0x0b, 0xc2, 0x3a, 0xbf, 0xc7,
+ 0xc8, 0x9a, 0x01, 0x92, 0x00, 0xc2, 0x22, 0xcc, 0x01, 0x91, 0x29, 0x07,
+ 0xc2, 0x3a, 0xd1, 0x17, 0xc2, 0x3a, 0xdd, 0x16, 0xc2, 0x3a, 0xe7, 0xc6,
+ 0xcc, 0xbf, 0x01, 0x91, 0x99, 0xc6, 0xca, 0xe5, 0x01, 0x91, 0xa8, 0xc4,
+ 0x00, 0x2d, 0x01, 0x91, 0x39, 0xc4, 0x61, 0xc1, 0x01, 0x91, 0x79, 0xc9,
+ 0xaf, 0x8a, 0x01, 0x91, 0xe8, 0xc3, 0x02, 0x6e, 0x01, 0x91, 0x41, 0xc3,
+ 0x00, 0xc2, 0x01, 0x91, 0xa0, 0xc3, 0x01, 0x54, 0x01, 0x91, 0x51, 0xc4,
+ 0x04, 0x87, 0x01, 0x91, 0x70, 0xc4, 0x03, 0xd7, 0x01, 0x91, 0x61, 0xc3,
+ 0x29, 0x82, 0x01, 0x91, 0x68, 0xcd, 0x7b, 0x3c, 0x01, 0x91, 0xb9, 0xc3,
+ 0x03, 0x15, 0x01, 0x91, 0xd0, 0xc7, 0x75, 0x78, 0x01, 0x91, 0xc9, 0x15,
+ 0xc2, 0x3a, 0xf3, 0xc3, 0x29, 0x43, 0x01, 0x92, 0x18, 0xd1, 0x01, 0x68,
+ 0x01, 0x57, 0x91, 0xce, 0x33, 0x92, 0x01, 0x57, 0x98, 0xc5, 0x26, 0xf7,
+ 0x08, 0xd7, 0xb9, 0xc4, 0x0d, 0xe5, 0x08, 0xd7, 0x9a, 0x02, 0x3a, 0xfd,
+ 0x45, 0x21, 0xed, 0xc2, 0x3b, 0x03, 0x83, 0x08, 0xd6, 0x98, 0x83, 0x08,
+ 0xd6, 0xd8, 0x83, 0x08, 0xd6, 0xd0, 0xc2, 0x00, 0xd0, 0x08, 0xd6, 0xa1,
+ 0x83, 0x08, 0xd6, 0x68, 0xc2, 0x00, 0xd0, 0x08, 0xd6, 0x89, 0x83, 0x08,
+ 0xd6, 0x00, 0x83, 0x08, 0xd6, 0x81, 0xc2, 0x01, 0x30, 0x08, 0xd6, 0x38,
+ 0xc2, 0x00, 0xd0, 0x08, 0xd6, 0x79, 0x83, 0x08, 0xd6, 0x70, 0xc2, 0x00,
+ 0xd0, 0x08, 0xd6, 0x51, 0x83, 0x08, 0xd6, 0x08, 0xc5, 0x26, 0xf7, 0x08,
+ 0xd7, 0x71, 0xc4, 0x0d, 0xe5, 0x08, 0xd7, 0x5a, 0x02, 0x3b, 0x26, 0xc6,
+ 0x26, 0xf6, 0x08, 0xd7, 0x40, 0x16, 0xc2, 0x3b, 0x2c, 0x08, 0xc2, 0x3b,
+ 0x3c, 0xc3, 0x05, 0x14, 0x08, 0x43, 0xc8, 0xd3, 0x42, 0xb4, 0x08, 0x43,
+ 0xb9, 0x45, 0x02, 0x10, 0x42, 0x3b, 0x48, 0xc2, 0xbe, 0xd3, 0x0b, 0x5c,
+ 0x79, 0xc2, 0x19, 0x2d, 0x0b, 0x5c, 0x50, 0xc2, 0x24, 0x82, 0x0b, 0x5c,
+ 0x71, 0xc3, 0xa4, 0xa3, 0x0b, 0x5c, 0x41, 0xc2, 0x01, 0x24, 0x0b, 0x5c,
+ 0x10, 0x15, 0xc2, 0x3b, 0xb1, 0xc3, 0xe5, 0xcc, 0x0b, 0x5c, 0x28, 0xc2,
+ 0x19, 0x2d, 0x0b, 0x5c, 0x61, 0xc3, 0xe0, 0x95, 0x0b, 0x5b, 0xf0, 0x8f,
+ 0x0b, 0x5c, 0x49, 0xc2, 0xbe, 0xd3, 0x0b, 0x5c, 0x18, 0xc3, 0xe5, 0x33,
+ 0x0b, 0x5c, 0x01, 0xc2, 0x00, 0xfa, 0x0b, 0x5b, 0xf8, 0xc2, 0x20, 0xec,
+ 0x0b, 0x59, 0x79, 0xc3, 0xa6, 0x62, 0x0b, 0x59, 0x38, 0xc2, 0x20, 0xec,
+ 0x0b, 0x59, 0x61, 0x16, 0xc2, 0x3b, 0xc3, 0xc4, 0xe3, 0x17, 0x0b, 0x59,
+ 0x41, 0xc3, 0xdb, 0xb7, 0x0b, 0x59, 0x11, 0xc3, 0x20, 0xeb, 0x0b, 0x59,
+ 0x00, 0xc3, 0x57, 0x0c, 0x0b, 0x59, 0x49, 0xc3, 0x20, 0xeb, 0x0b, 0x59,
+ 0x29, 0xc2, 0x20, 0xec, 0x0b, 0x58, 0xf8, 0xc3, 0xe6, 0x53, 0x0b, 0x5b,
+ 0xa3, 0x02, 0x3b, 0xcf, 0xc7, 0xbf, 0xe1, 0x0b, 0x5a, 0x28, 0xca, 0xa7,
+ 0x6a, 0x0b, 0x5b, 0x99, 0xc4, 0x12, 0xc1, 0x0b, 0x59, 0xc8, 0xc5, 0xd5,
+ 0x51, 0x0b, 0x5b, 0x71, 0xc4, 0xdf, 0x2f, 0x0b, 0x5a, 0x08, 0xc2, 0x01,
+ 0x24, 0x0b, 0x5b, 0x21, 0x44, 0x19, 0x61, 0x42, 0x3b, 0xd5, 0x0a, 0xc2,
+ 0x3b, 0xed, 0xc9, 0xa8, 0x82, 0x0b, 0x59, 0xc0, 0x00, 0xc2, 0x3b, 0xf9,
+ 0x95, 0x0b, 0x5a, 0xd8, 0x98, 0x0b, 0x58, 0xd9, 0x84, 0x0b, 0x58, 0xd0,
+ 0x98, 0x0b, 0x58, 0x79, 0x84, 0x0b, 0x58, 0x70, 0x98, 0x0b, 0x58, 0x59,
+ 0x84, 0x0b, 0x58, 0x50, 0x98, 0x0b, 0x58, 0x29, 0x84, 0x0b, 0x58, 0x20,
+ 0x98, 0x0b, 0x58, 0xa9, 0x84, 0x0b, 0x58, 0xa0, 0x98, 0x0b, 0x58, 0x69,
+ 0x84, 0x0b, 0x58, 0x60, 0x98, 0x0b, 0x58, 0x89, 0x84, 0x0b, 0x58, 0x80,
+ 0x98, 0x0b, 0x58, 0x09, 0x84, 0x0b, 0x58, 0x00, 0xc5, 0x11, 0x55, 0x01,
+ 0x81, 0x00, 0x45, 0x00, 0x8c, 0xc2, 0x3c, 0x05, 0xc8, 0x7d, 0x5e, 0x0f,
+ 0xb2, 0x69, 0x14, 0xc2, 0x3c, 0x21, 0xcd, 0x80, 0x5d, 0x0f, 0xb2, 0x39,
+ 0xcf, 0x63, 0x78, 0x0f, 0xc9, 0xc1, 0x43, 0x03, 0x35, 0xc2, 0x3c, 0x27,
+ 0xc8, 0xb5, 0xb2, 0x0f, 0xce, 0xb8, 0xc4, 0x02, 0xde, 0x08, 0x48, 0xd9,
+ 0x19, 0xc2, 0x3c, 0x33, 0xc2, 0x00, 0xc4, 0x08, 0x48, 0xb8, 0xc8, 0x0d,
+ 0x03, 0x08, 0x48, 0xc8, 0xc2, 0x20, 0xec, 0x08, 0x48, 0xa9, 0xc2, 0x00,
+ 0x3d, 0x08, 0x48, 0x40, 0xc3, 0x11, 0xef, 0x08, 0x48, 0xa1, 0xc3, 0x01,
+ 0x9d, 0x08, 0x48, 0x89, 0xc3, 0x7e, 0x1b, 0x08, 0x48, 0x70, 0xc2, 0x00,
+ 0x74, 0x08, 0x48, 0x79, 0xc2, 0x01, 0xd0, 0x08, 0x48, 0x00, 0x96, 0x08,
+ 0x48, 0x38, 0x83, 0x05, 0x42, 0x01, 0xc2, 0x00, 0xd0, 0x05, 0x42, 0x08,
+ 0x83, 0x05, 0x42, 0x11, 0xc2, 0x01, 0x30, 0x05, 0x43, 0x28, 0xc2, 0x01,
+ 0x30, 0x05, 0x42, 0x19, 0xc2, 0x19, 0x2c, 0x05, 0x42, 0x39, 0x83, 0x05,
+ 0x42, 0x59, 0xc2, 0x00, 0xc1, 0x05, 0x43, 0x60, 0x83, 0x05, 0x42, 0x23,
+ 0x02, 0x3c, 0x3d, 0xc2, 0x00, 0xd0, 0x05, 0x42, 0x28, 0x83, 0x05, 0x42,
+ 0x41, 0xc2, 0x00, 0xd0, 0x05, 0x42, 0x49, 0x15, 0xc2, 0x3c, 0x43, 0x16,
+ 0x42, 0x3c, 0x4d, 0x83, 0x05, 0x42, 0x51, 0xc2, 0x02, 0x1c, 0x05, 0x42,
+ 0x91, 0xc2, 0x0e, 0x9a, 0x05, 0x43, 0x58, 0x83, 0x05, 0x42, 0x61, 0xc2,
+ 0x00, 0xd0, 0x05, 0x42, 0x68, 0xc2, 0x00, 0xd0, 0x05, 0x42, 0xa1, 0x83,
+ 0x05, 0x42, 0xa8, 0xc6, 0x24, 0x9c, 0x05, 0x42, 0xb1, 0xc2, 0x00, 0xd0,
+ 0x05, 0x42, 0xd1, 0x83, 0x05, 0x42, 0xd8, 0xcb, 0x91, 0xf1, 0x05, 0x43,
+ 0x69, 0xcb, 0x8f, 0xaa, 0x05, 0x43, 0x80, 0x87, 0x05, 0x43, 0x30, 0xc8,
+ 0xbc, 0x12, 0x05, 0x43, 0x71, 0xc4, 0x0c, 0x2b, 0x05, 0x43, 0x78, 0x4f,
+ 0x5c, 0xf3, 0xc2, 0x3c, 0x57, 0xd2, 0x47, 0xc9, 0x05, 0x43, 0x90, 0xc9,
+ 0xb4, 0xf4, 0x08, 0x0e, 0x89, 0xc8, 0xbf, 0x22, 0x08, 0x0f, 0x90, 0xc5,
+ 0x61, 0xba, 0x08, 0x0e, 0x99, 0xcd, 0x76, 0x1b, 0x08, 0x0f, 0x11, 0x96,
+ 0x08, 0x0f, 0x60, 0xc2, 0x00, 0x50, 0x08, 0x0f, 0x23, 0x02, 0x3c, 0x69,
+ 0xc4, 0xe4, 0x9f, 0x08, 0x0f, 0x30, 0x99, 0x08, 0x0e, 0xd1, 0xc7, 0xc9,
+ 0xce, 0x08, 0x0f, 0x08, 0xc4, 0xd3, 0x73, 0x08, 0x0f, 0x38, 0xc3, 0x19,
+ 0x78, 0x08, 0x0e, 0xd9, 0x92, 0x08, 0x0f, 0x40, 0xc8, 0x74, 0xc4, 0x00,
+ 0x4a, 0x91, 0xc6, 0x74, 0xc6, 0x00, 0x4a, 0x88, 0x42, 0x07, 0xb2, 0xc2,
+ 0x3c, 0x6f, 0x03, 0xc2, 0x3c, 0x7b, 0xc5, 0x33, 0x5d, 0x00, 0x49, 0xe1,
+ 0xcb, 0x1e, 0x89, 0x00, 0x48, 0x0b, 0x02, 0x3c, 0x87, 0xd4, 0x39, 0xa8,
+ 0x00, 0x48, 0x01, 0x15, 0xc2, 0x3c, 0x8b, 0xc8, 0xbe, 0xca, 0x05, 0x47,
+ 0xc1, 0xd9, 0x1e, 0x82, 0x05, 0x47, 0xa1, 0xd0, 0x5a, 0x12, 0x00, 0x4b,
+ 0x88, 0x99, 0x00, 0x4a, 0x79, 0x97, 0x00, 0x4a, 0x61, 0x8b, 0x00, 0x4a,
+ 0x41, 0x83, 0x00, 0x49, 0xf1, 0x9b, 0x05, 0x47, 0xf8, 0xc2, 0x49, 0x0c,
+ 0x00, 0x49, 0xd9, 0x87, 0x00, 0x49, 0xd0, 0x91, 0x00, 0x4a, 0x51, 0x87,
+ 0x00, 0x4a, 0x30, 0x91, 0x00, 0x4a, 0x49, 0x87, 0x00, 0x4a, 0x29, 0xc6,
+ 0xcf, 0x2f, 0x00, 0x4a, 0xa8, 0x94, 0x00, 0x4a, 0x1b, 0x02, 0x3c, 0x97,
+ 0x8e, 0x00, 0x4b, 0x12, 0x02, 0x3c, 0x9b, 0x97, 0x00, 0x4a, 0x13, 0x02,
+ 0x3c, 0x9f, 0x87, 0x00, 0x4a, 0xb0, 0x8b, 0x00, 0x4a, 0x00, 0x83, 0x00,
+ 0x49, 0xc9, 0xc7, 0xc4, 0xb1, 0x00, 0x4b, 0xd0, 0x83, 0x00, 0x49, 0xc1,
+ 0xc2, 0x0d, 0xf6, 0x00, 0x49, 0xb9, 0x0a, 0x42, 0x3c, 0xa3, 0x83, 0x00,
+ 0x49, 0xa9, 0x47, 0xb2, 0x2e, 0x42, 0x3c, 0xad, 0x0e, 0xc2, 0x3c, 0xbb,
+ 0x83, 0x00, 0x49, 0x90, 0xc2, 0x00, 0x39, 0x00, 0x49, 0x89, 0x83, 0x00,
+ 0x49, 0x81, 0xc2, 0x00, 0xd0, 0x00, 0x4a, 0xe8, 0x83, 0x00, 0x49, 0x79,
+ 0xc2, 0x19, 0x2c, 0x00, 0x4a, 0xf8, 0xc9, 0xad, 0x53, 0x00, 0x4b, 0xc0,
+ 0xc2, 0x00, 0xd0, 0x00, 0x49, 0x69, 0x83, 0x00, 0x49, 0x61, 0xc2, 0x01,
+ 0x5d, 0x00, 0x4b, 0xf8, 0xc2, 0x00, 0xd0, 0x00, 0x49, 0x59, 0x83, 0x00,
+ 0x49, 0x50, 0x10, 0xc2, 0x3c, 0xc5, 0x83, 0x00, 0x49, 0x41, 0xc2, 0x19,
+ 0x2c, 0x00, 0x48, 0xf1, 0xc2, 0x01, 0x30, 0x00, 0x48, 0xc8, 0xc2, 0x00,
+ 0xd0, 0x00, 0x49, 0x39, 0x83, 0x00, 0x49, 0x31, 0x06, 0x42, 0x3c, 0xcf,
+ 0xc2, 0x00, 0xd0, 0x00, 0x49, 0x29, 0x83, 0x00, 0x49, 0x21, 0x16, 0x42,
+ 0x3c, 0xdd, 0xc2, 0x00, 0xd0, 0x00, 0x48, 0xe9, 0x83, 0x00, 0x48, 0xe1,
+ 0xc2, 0x25, 0x3b, 0x00, 0x4b, 0xe0, 0xc2, 0x00, 0xd0, 0x00, 0x48, 0xd9,
+ 0x83, 0x00, 0x48, 0xd2, 0x02, 0x3c, 0xe7, 0x0a, 0xc2, 0x3c, 0xed, 0x83,
+ 0x00, 0x48, 0xb9, 0xc2, 0x01, 0x30, 0x00, 0x4b, 0xd9, 0xcb, 0x23, 0x34,
+ 0x00, 0x4b, 0xe8, 0x0a, 0xc2, 0x3c, 0xf7, 0x83, 0x00, 0x48, 0xa8, 0x97,
+ 0x00, 0x48, 0xa1, 0x8b, 0x00, 0x48, 0x81, 0x83, 0x00, 0x48, 0x31, 0x9b,
+ 0x05, 0x47, 0xf1, 0x99, 0x00, 0x4b, 0xa8, 0x87, 0x00, 0x4b, 0x99, 0xc2,
+ 0x49, 0x0c, 0x00, 0x4b, 0xa0, 0x97, 0x00, 0x48, 0x53, 0x02, 0x3d, 0x01,
+ 0x87, 0x00, 0x4b, 0xb0, 0x8b, 0x00, 0x48, 0x40, 0x83, 0x00, 0x4a, 0xd9,
+ 0xc2, 0x00, 0xd0, 0x00, 0x4b, 0xc8, 0xc4, 0x26, 0x78, 0x00, 0x4b, 0x79,
+ 0xc5, 0x06, 0xdb, 0x00, 0x4b, 0x71, 0x15, 0xc2, 0x3d, 0x05, 0x08, 0xc2,
+ 0x3d, 0x11, 0x16, 0xc2, 0x3d, 0x1d, 0xc3, 0x05, 0x14, 0x00, 0x4b, 0x39,
+ 0xc4, 0x15, 0xe7, 0x00, 0x4b, 0x30, 0x45, 0x2c, 0x86, 0xc2, 0x3d, 0x29,
+ 0x46, 0x2e, 0xee, 0xc2, 0x3d, 0x3f, 0xc2, 0x0c, 0x42, 0x08, 0x20, 0x61,
+ 0x11, 0xc2, 0x3d, 0x55, 0xc2, 0x14, 0x68, 0x08, 0x20, 0x71, 0xc3, 0x17,
+ 0x29, 0x08, 0x20, 0x79, 0x8a, 0x08, 0x20, 0x81, 0xc3, 0x6f, 0xb7, 0x08,
+ 0x20, 0x89, 0xc3, 0xb2, 0x36, 0x08, 0x20, 0x91, 0x16, 0xc2, 0x3d, 0x5d,
+ 0xc3, 0x80, 0x64, 0x08, 0x20, 0xa1, 0xc4, 0x46, 0xfd, 0x08, 0x20, 0xa9,
+ 0xc3, 0x30, 0xc1, 0x08, 0x20, 0xb1, 0xc3, 0x72, 0xc8, 0x08, 0x20, 0xb9,
+ 0xc3, 0x93, 0x51, 0x08, 0x20, 0xc1, 0x07, 0xc2, 0x3d, 0x69, 0xc3, 0x0a,
+ 0x85, 0x08, 0x20, 0xd1, 0x1c, 0x42, 0x3d, 0x91, 0x45, 0x2c, 0x86, 0xc2,
+ 0x3d, 0x9d, 0x46, 0x2e, 0xee, 0xc2, 0x3d, 0xb3, 0xc2, 0x0c, 0x42, 0x08,
+ 0x21, 0xa1, 0x11, 0xc2, 0x3d, 0xc9, 0xc2, 0x14, 0x68, 0x08, 0x21, 0xb1,
+ 0xc3, 0x17, 0x29, 0x08, 0x21, 0xb9, 0x8a, 0x08, 0x21, 0xc1, 0xc3, 0x6f,
+ 0xb7, 0x08, 0x21, 0xc9, 0xc3, 0xb2, 0x36, 0x08, 0x21, 0xd1, 0x16, 0xc2,
+ 0x3d, 0xd1, 0xc3, 0x80, 0x64, 0x08, 0x21, 0xe1, 0xc4, 0x46, 0xfd, 0x08,
+ 0x21, 0xe9, 0xc3, 0x30, 0xc1, 0x08, 0x21, 0xf1, 0xc3, 0x72, 0xc8, 0x08,
+ 0x21, 0xf9, 0xc3, 0x93, 0x51, 0x08, 0x22, 0x01, 0x07, 0xc2, 0x3d, 0xdd,
+ 0xc3, 0x0a, 0x85, 0x08, 0x22, 0x11, 0x1c, 0x42, 0x3e, 0x05, 0xc4, 0x00,
+ 0x49, 0x01, 0x1e, 0x61, 0xc5, 0x00, 0x2c, 0x01, 0x1d, 0xf8, 0xc4, 0x00,
+ 0x49, 0x01, 0x1e, 0x59, 0xc5, 0x00, 0x2c, 0x01, 0x1d, 0xf0, 0xc4, 0x8f,
+ 0x73, 0x0e, 0x98, 0x21, 0xc5, 0x73, 0xcb, 0x0e, 0x98, 0x18, 0xc9, 0x11,
+ 0xf6, 0x01, 0x24, 0x81, 0xc5, 0x0a, 0x8a, 0x0f, 0x88, 0x50, 0xc9, 0x11,
+ 0xf6, 0x01, 0x24, 0x79, 0xc5, 0x0a, 0x8a, 0x0f, 0x88, 0x48, 0x00, 0x42,
+ 0x3e, 0x11, 0x00, 0x42, 0x3e, 0x1d, 0x00, 0x42, 0x3e, 0x29, 0x00, 0x42,
+ 0x3e, 0x35, 0x00, 0x42, 0x3e, 0x41, 0x00, 0x42, 0x3e, 0x4d, 0xc9, 0x11,
+ 0xf6, 0x01, 0x24, 0x41, 0xc5, 0x0a, 0x8a, 0x0f, 0x88, 0x10, 0xc9, 0x11,
+ 0xf6, 0x0f, 0x88, 0x01, 0xc5, 0x0a, 0x8a, 0x0f, 0x88, 0x08, 0xc4, 0x26,
+ 0x78, 0x08, 0xca, 0xc9, 0xc5, 0x06, 0xdb, 0x08, 0xca, 0xc1, 0x15, 0xc2,
+ 0x3e, 0x59, 0x08, 0xc2, 0x3e, 0x65, 0x16, 0xc2, 0x3e, 0x71, 0xc3, 0x05,
+ 0x14, 0x08, 0xca, 0x89, 0xc4, 0x15, 0xe7, 0x08, 0xca, 0x80, 0x91, 0x08,
+ 0xc9, 0xc1, 0x03, 0xc2, 0x3e, 0x7d, 0x87, 0x08, 0xc9, 0xa9, 0x97, 0x08,
+ 0xc9, 0x9b, 0x02, 0x3e, 0x85, 0x8b, 0x08, 0xc9, 0x8a, 0x02, 0x3e, 0x89,
+ 0xc2, 0x00, 0xdb, 0x08, 0xc9, 0x71, 0x83, 0x08, 0xc9, 0x40, 0x83, 0x08,
+ 0xc9, 0x61, 0xc2, 0x0d, 0xf6, 0x08, 0xc9, 0x59, 0xc2, 0x00, 0xd0, 0x08,
+ 0xc9, 0x50, 0xc2, 0x19, 0x2c, 0x08, 0xc9, 0x31, 0x83, 0x08, 0xc9, 0x28,
+ 0xc2, 0x00, 0xd0, 0x08, 0xc9, 0x19, 0x83, 0x08, 0xc9, 0x10, 0xc2, 0x00,
+ 0xd0, 0x08, 0xc9, 0x09, 0x83, 0x08, 0xc9, 0x00, 0x83, 0x08, 0xc8, 0xf9,
+ 0xc2, 0x00, 0xc1, 0x08, 0xc8, 0xd1, 0xc2, 0x19, 0x2c, 0x08, 0xc8, 0xa9,
+ 0xc2, 0x01, 0x30, 0x08, 0xc8, 0x80, 0xc2, 0x00, 0xd0, 0x08, 0xc8, 0xf1,
+ 0x83, 0x08, 0xc8, 0xe9, 0x06, 0x42, 0x3e, 0x8d, 0xc2, 0x00, 0xd0, 0x08,
+ 0xc8, 0xe1, 0x83, 0x08, 0xc8, 0xd9, 0xc2, 0x01, 0x6f, 0x08, 0xc8, 0xb0,
+ 0xc2, 0x00, 0xd0, 0x08, 0xc8, 0x91, 0x83, 0x08, 0xc8, 0x88, 0xc2, 0x00,
+ 0xd0, 0x08, 0xc8, 0x79, 0x83, 0x08, 0xc8, 0x70, 0xc2, 0x00, 0xd0, 0x08,
+ 0xc8, 0x69, 0x83, 0x08, 0xc8, 0x60, 0x97, 0x08, 0xc8, 0x28, 0x8b, 0x08,
+ 0xc8, 0x18, 0x83, 0x08, 0xc8, 0x08, 0xc4, 0x03, 0x03, 0x01, 0x10, 0xa9,
+ 0xc3, 0x00, 0xbb, 0x00, 0x07, 0xb8, 0xc4, 0x26, 0x78, 0x01, 0x3c, 0x91,
+ 0xc5, 0x06, 0xdb, 0x01, 0x3c, 0x89, 0x15, 0xc2, 0x3e, 0x97, 0x08, 0xc2,
+ 0x3e, 0xa3, 0x16, 0xc2, 0x3e, 0xaf, 0xc3, 0x05, 0x14, 0x01, 0x3c, 0x51,
+ 0xc4, 0x15, 0xe7, 0x0f, 0x88, 0x60, 0xc4, 0x18, 0x10, 0x01, 0x3b, 0xe1,
+ 0xc2, 0x22, 0xcc, 0x01, 0x3b, 0xd8, 0xc3, 0x0d, 0x14, 0x01, 0x3b, 0xd1,
+ 0xc3, 0x09, 0x9e, 0x01, 0x3b, 0xc8, 0xc4, 0x02, 0xde, 0x01, 0x3b, 0xc1,
+ 0xc2, 0x02, 0xa0, 0x01, 0x3b, 0xb8, 0xc4, 0x18, 0x10, 0x01, 0x3c, 0x31,
+ 0xc2, 0x22, 0xcc, 0x01, 0x3c, 0x28, 0xc3, 0x0d, 0x14, 0x01, 0x3c, 0x21,
+ 0xc3, 0x09, 0x9e, 0x01, 0x3c, 0x18, 0xc4, 0x02, 0xde, 0x01, 0x3c, 0x11,
+ 0xc2, 0x02, 0xa0, 0x01, 0x3c, 0x08, 0xcf, 0x66, 0x66, 0x01, 0x58, 0xb1,
+ 0xd0, 0x5d, 0x52, 0x01, 0x58, 0xb9, 0xce, 0x74, 0xda, 0x01, 0x58, 0xc1,
+ 0xd1, 0x53, 0xba, 0x01, 0x58, 0xc8, 0xc9, 0x33, 0xad, 0x0f, 0xc8, 0x50,
+ 0xc9, 0x33, 0xad, 0x0f, 0xc8, 0x58, 0x42, 0x00, 0x2c, 0xc2, 0x3e, 0xbb,
+ 0x42, 0x02, 0xa0, 0x42, 0x3e, 0xc7, 0xcf, 0x5b, 0xc3, 0x0f, 0xc2, 0x99,
+ 0xcc, 0x88, 0xdd, 0x0f, 0xc1, 0xd8, 0x45, 0x11, 0x3a, 0xc2, 0x3e, 0xd3,
+ 0x51, 0x01, 0x51, 0x42, 0x3e, 0xdf, 0xc4, 0x01, 0xa3, 0x01, 0x0c, 0x9b,
+ 0x02, 0x3e, 0xeb, 0xc5, 0xdb, 0x50, 0x01, 0x70, 0xa0, 0xda, 0x1b, 0xd0,
+ 0x0f, 0xc4, 0xb8, 0xcb, 0x82, 0xba, 0x01, 0x0f, 0x19, 0xcb, 0x82, 0x36,
+ 0x01, 0x0e, 0x98, 0xc5, 0x01, 0xa2, 0x01, 0x58, 0x39, 0xd3, 0x43, 0xe4,
+ 0x01, 0x5c, 0x58, 0xa3, 0x0f, 0x82, 0x99, 0x9d, 0x0f, 0x82, 0x69, 0x9e,
+ 0x0f, 0x82, 0x71, 0x9f, 0x0f, 0x82, 0x79, 0xa0, 0x0f, 0x82, 0x81, 0xa1,
+ 0x0f, 0x82, 0x89, 0xa2, 0x0f, 0x82, 0x90, 0xa3, 0x0f, 0x81, 0xf1, 0xa1,
+ 0x0f, 0x81, 0xe1, 0x9d, 0x0f, 0x81, 0xc1, 0x9e, 0x0f, 0x81, 0xc9, 0x9f,
+ 0x0f, 0x81, 0xd1, 0xa0, 0x0f, 0x81, 0xd9, 0xa2, 0x0f, 0x81, 0xe8, 0xa0,
+ 0x0f, 0x81, 0xa1, 0x9f, 0x0f, 0x81, 0x99, 0x9e, 0x0f, 0x81, 0x91, 0x9d,
+ 0x0f, 0x81, 0x89, 0xa1, 0x0f, 0x81, 0xa9, 0xa2, 0x0f, 0x81, 0xb1, 0xa3,
+ 0x0f, 0x81, 0xb8, 0x9d, 0x0f, 0x81, 0xf9, 0x9e, 0x0f, 0x82, 0x01, 0x9f,
+ 0x0f, 0x82, 0x09, 0xa0, 0x0f, 0x82, 0x11, 0xa1, 0x0f, 0x82, 0x19, 0xa2,
+ 0x0f, 0x82, 0x21, 0xa3, 0x0f, 0x82, 0x28, 0x9d, 0x0f, 0x82, 0x31, 0x9e,
+ 0x0f, 0x82, 0x39, 0x9f, 0x0f, 0x82, 0x41, 0xa0, 0x0f, 0x82, 0x49, 0xa1,
+ 0x0f, 0x82, 0x51, 0xa2, 0x0f, 0x82, 0x59, 0xa3, 0x0f, 0x82, 0x60, 0x9d,
+ 0x0f, 0x82, 0xa1, 0x9e, 0x0f, 0x82, 0xa9, 0x9f, 0x0f, 0x82, 0xb1, 0xa0,
+ 0x0f, 0x82, 0xb9, 0xa1, 0x0f, 0x82, 0xc1, 0xa2, 0x0f, 0x82, 0xc9, 0xa3,
+ 0x0f, 0x82, 0xd0, 0x9d, 0x0f, 0x82, 0xd9, 0x9e, 0x0f, 0x82, 0xe1, 0x9f,
+ 0x0f, 0x82, 0xe9, 0xa0, 0x0f, 0x82, 0xf1, 0xa1, 0x0f, 0x82, 0xf9, 0xa2,
+ 0x0f, 0x83, 0x01, 0xa3, 0x0f, 0x83, 0x08, 0x9d, 0x0f, 0x83, 0x19, 0x9e,
+ 0x0f, 0x83, 0x21, 0x9f, 0x0f, 0x83, 0x29, 0xa0, 0x0f, 0x83, 0x31, 0xa1,
+ 0x0f, 0x83, 0x39, 0xa2, 0x0f, 0x83, 0x41, 0xa3, 0x0f, 0x83, 0x48, 0x9d,
+ 0x0f, 0x83, 0x51, 0x9e, 0x0f, 0x83, 0x59, 0x9f, 0x0f, 0x83, 0x61, 0xa0,
+ 0x0f, 0x83, 0x69, 0xa1, 0x0f, 0x83, 0x71, 0xa2, 0x0f, 0x83, 0x79, 0xa3,
+ 0x0f, 0x83, 0x80, 0x9d, 0x0f, 0x83, 0x89, 0x9e, 0x0f, 0x83, 0x91, 0x9f,
+ 0x0f, 0x83, 0x99, 0xa0, 0x0f, 0x83, 0xa1, 0xa1, 0x0f, 0x83, 0xa9, 0xa2,
+ 0x0f, 0x83, 0xb1, 0xa3, 0x0f, 0x83, 0xb8, 0x9d, 0x0f, 0x83, 0xc1, 0x9e,
+ 0x0f, 0x83, 0xc9, 0x9f, 0x0f, 0x83, 0xd1, 0xa0, 0x0f, 0x83, 0xd9, 0xa1,
+ 0x0f, 0x83, 0xe1, 0xa2, 0x0f, 0x83, 0xe9, 0xa3, 0x0f, 0x83, 0xf0, 0x9d,
+ 0x0f, 0x83, 0xf9, 0x9e, 0x0f, 0x84, 0x01, 0x9f, 0x0f, 0x84, 0x09, 0xa0,
+ 0x0f, 0x84, 0x11, 0xa1, 0x0f, 0x84, 0x19, 0xa2, 0x0f, 0x84, 0x21, 0xa3,
+ 0x0f, 0x84, 0x28, 0x9e, 0x0f, 0x84, 0x39, 0x9f, 0x0f, 0x84, 0x41, 0xa0,
+ 0x0f, 0x84, 0x49, 0xa1, 0x0f, 0x84, 0x51, 0xa2, 0x0f, 0x84, 0x59, 0xa3,
+ 0x0f, 0x84, 0x61, 0x9d, 0x0f, 0x84, 0x30, 0x9d, 0x0f, 0x84, 0x69, 0x9e,
+ 0x0f, 0x84, 0x71, 0x9f, 0x0f, 0x84, 0x79, 0xa0, 0x0f, 0x84, 0x81, 0xa1,
+ 0x0f, 0x84, 0x89, 0xa2, 0x0f, 0x84, 0x91, 0xa3, 0x0f, 0x84, 0x98, 0xc9,
+ 0xb0, 0x86, 0x01, 0x3d, 0xf9, 0x47, 0x20, 0x7d, 0xc2, 0x3e, 0xef, 0xca,
+ 0xa6, 0x8e, 0x01, 0x53, 0xa0, 0xc3, 0x01, 0x5d, 0x01, 0x1f, 0xc3, 0x02,
+ 0x3e, 0xfb, 0xc4, 0x02, 0x6d, 0x01, 0x00, 0xb0, 0xc4, 0x13, 0x85, 0x01,
+ 0x16, 0x99, 0xc6, 0xc4, 0x5e, 0x01, 0x57, 0x58, 0xc8, 0x06, 0xbf, 0x01,
+ 0x16, 0x91, 0xc4, 0x1e, 0x43, 0x01, 0x11, 0x60, 0x17, 0xc2, 0x3e, 0xff,
+ 0x46, 0x1f, 0x87, 0xc2, 0x3f, 0x17, 0x16, 0xc2, 0x3f, 0x23, 0xcf, 0x62,
+ 0xa6, 0x01, 0x57, 0xe8, 0x14, 0xc2, 0x3f, 0x2f, 0xc3, 0x25, 0xd6, 0x01,
+ 0x4f, 0xd0, 0xc5, 0xce, 0x22, 0x01, 0x01, 0x09, 0xc8, 0x32, 0xb8, 0x01,
+ 0x57, 0x50, 0xdd, 0x0f, 0xb9, 0x01, 0x00, 0xf9, 0xc5, 0x59, 0x93, 0x01,
+ 0x72, 0x00, 0x11, 0xc2, 0x3f, 0x3e, 0xdc, 0x13, 0x19, 0x01, 0x4c, 0xa8,
+ 0xc9, 0x00, 0xca, 0x01, 0x55, 0x0b, 0x02, 0x3f, 0x48, 0xcc, 0x07, 0xc7,
+ 0x01, 0x55, 0x10, 0x47, 0xc7, 0x4a, 0xc2, 0x3f, 0x4e, 0xcf, 0x60, 0x4e,
+ 0x01, 0x0a, 0x01, 0x48, 0x0b, 0x17, 0xc2, 0x3f, 0x5a, 0x46, 0x03, 0x13,
+ 0x42, 0x3f, 0x7f, 0x4c, 0x24, 0xe3, 0xc2, 0x3f, 0x8b, 0x48, 0x00, 0xda,
+ 0x42, 0x3f, 0x97, 0xc4, 0x1e, 0x97, 0x08, 0xc1, 0xc9, 0xc5, 0x40, 0xe7,
+ 0x08, 0xc1, 0xc0, 0x97, 0x08, 0xc1, 0xb1, 0x8b, 0x08, 0xc1, 0xa1, 0x83,
+ 0x08, 0xc1, 0x60, 0x94, 0x08, 0xc1, 0x90, 0x97, 0x08, 0xc1, 0x80, 0x8b,
+ 0x08, 0xc1, 0x70, 0xc2, 0x00, 0x39, 0x08, 0xc1, 0x59, 0x83, 0x08, 0xc1,
+ 0x20, 0x83, 0x08, 0xc1, 0x49, 0xc2, 0x0d, 0xf6, 0x08, 0xc1, 0x41, 0xc2,
+ 0x00, 0xd0, 0x08, 0xc1, 0x38, 0xc2, 0x00, 0xd0, 0x08, 0xc1, 0x09, 0x83,
+ 0x08, 0xc1, 0x00, 0xc2, 0x00, 0xd0, 0x08, 0xc0, 0xf9, 0x83, 0x08, 0xc0,
+ 0xf0, 0x83, 0x08, 0xc0, 0xe9, 0xc2, 0x00, 0xc1, 0x08, 0xc0, 0xc1, 0xc2,
+ 0x19, 0x2c, 0x08, 0xc0, 0x99, 0xc2, 0x01, 0x30, 0x08, 0xc0, 0x70, 0xc2,
+ 0x00, 0xd0, 0x08, 0xc0, 0xe1, 0x83, 0x08, 0xc0, 0xd9, 0x06, 0x42, 0x3f,
+ 0xa9, 0xc2, 0x00, 0xd0, 0x08, 0xc0, 0xd1, 0x83, 0x08, 0xc0, 0xc9, 0x16,
+ 0x42, 0x3f, 0xb3, 0xc2, 0x00, 0xd0, 0x08, 0xc0, 0x91, 0x83, 0x08, 0xc0,
+ 0x88, 0xc2, 0x00, 0xd0, 0x08, 0xc0, 0x81, 0x83, 0x08, 0xc0, 0x78, 0xc2,
+ 0x00, 0xd0, 0x08, 0xc0, 0x69, 0x83, 0x08, 0xc0, 0x60, 0xc2, 0x00, 0xd0,
+ 0x08, 0xc0, 0x59, 0x83, 0x08, 0xc0, 0x50, 0x97, 0x08, 0xc0, 0x49, 0x8b,
+ 0x08, 0xc0, 0x39, 0x83, 0x08, 0xc0, 0x08, 0x97, 0x08, 0xc0, 0x28, 0x8b,
+ 0x08, 0xc0, 0x18, 0x03, 0xc2, 0x3f, 0xbd, 0xc8, 0x00, 0x5f, 0x0d, 0xe4,
+ 0xc3, 0x02, 0x3f, 0xc9, 0xc4, 0x51, 0xb7, 0x0d, 0xe4, 0xb9, 0x0e, 0xc2,
+ 0x3f, 0xcf, 0xc6, 0x02, 0xd1, 0x0d, 0xe4, 0xa9, 0xc3, 0x02, 0xa3, 0x0d,
+ 0xe4, 0xa1, 0xc5, 0x1f, 0x0c, 0x0d, 0xe4, 0x91, 0xcb, 0x8f, 0x94, 0x0d,
+ 0xe4, 0x88, 0xc7, 0x27, 0x9b, 0x0d, 0xe3, 0xa8, 0xc3, 0x02, 0x6e, 0x0d,
+ 0xe4, 0x31, 0xc9, 0xac, 0xf0, 0x0d, 0xe4, 0x18, 0xc5, 0xd9, 0x39, 0x0d,
+ 0xe3, 0xc3, 0x02, 0x3f, 0xdb, 0xc2, 0x00, 0x71, 0x0d, 0xe3, 0xc8, 0x99,
+ 0x0d, 0xe3, 0x00, 0xc3, 0x02, 0xe9, 0x0d, 0xe1, 0xb9, 0x95, 0x0d, 0xe1,
+ 0xb0, 0x92, 0x0d, 0xe1, 0xa3, 0x02, 0x3f, 0xe1, 0x96, 0x0d, 0xe1, 0x93,
+ 0x02, 0x3f, 0xe7, 0x8c, 0x0d, 0xe1, 0x03, 0x02, 0x3f, 0xed, 0x95, 0x0d,
+ 0xe1, 0x51, 0xc8, 0x33, 0xae, 0x0d, 0xe1, 0x2b, 0x02, 0x3f, 0xf3, 0x8d,
+ 0x0d, 0xe1, 0xfb, 0x02, 0x3f, 0xf9, 0x8f, 0x0d, 0xe1, 0xe1, 0x90, 0x0d,
+ 0xe1, 0xd8, 0x8c, 0x0d, 0xe0, 0xa9, 0xc2, 0x08, 0x06, 0x0d, 0xe0, 0x91,
+ 0x11, 0xc2, 0x3f, 0xff, 0xc2, 0x00, 0xd1, 0x0d, 0xe3, 0x41, 0x07, 0xc2,
+ 0x40, 0x07, 0x97, 0x0d, 0xe2, 0xc0, 0x90, 0x0d, 0xe1, 0x83, 0x02, 0x40,
+ 0x13, 0x95, 0x0d, 0xe1, 0x4b, 0x02, 0x40, 0x19, 0x8f, 0x0d, 0xe0, 0xfb,
+ 0x02, 0x40, 0x1f, 0xc8, 0x33, 0xae, 0x0d, 0xe1, 0x1a, 0x02, 0x40, 0x25,
+ 0x8f, 0x0d, 0xe0, 0xf3, 0x02, 0x40, 0x2b, 0x95, 0x0d, 0xe1, 0x41, 0xc8,
+ 0x33, 0xae, 0x0d, 0xe1, 0x10, 0x83, 0x0d, 0xe3, 0x21, 0x8b, 0x0d, 0xe3,
+ 0x19, 0x91, 0x0d, 0xe3, 0x11, 0x97, 0x0d, 0xe3, 0x08, 0x90, 0x0d, 0xe0,
+ 0xeb, 0x02, 0x40, 0x31, 0x95, 0x0d, 0xe1, 0x39, 0xc8, 0x33, 0xae, 0x0d,
+ 0xe1, 0x08, 0x97, 0x0d, 0xe2, 0xb1, 0x8b, 0x0d, 0xe2, 0x68, 0x97, 0x0d,
+ 0xe2, 0xa9, 0x8b, 0x0d, 0xe2, 0x78, 0x8f, 0x0d, 0xe0, 0x79, 0xc3, 0x02,
+ 0xe9, 0x0d, 0xe1, 0xe8, 0x8f, 0x0d, 0xe3, 0x31, 0x90, 0x0d, 0xe3, 0x28,
+ 0xc7, 0x1b, 0x02, 0x00, 0x04, 0x69, 0xde, 0x0e, 0x50, 0x0f, 0xbe, 0x40,
+ 0x00, 0x42, 0x40, 0x37, 0xcf, 0x09, 0xf8, 0x01, 0x5a, 0x09, 0xd0, 0x03,
+ 0xb7, 0x01, 0x5a, 0x38, 0xda, 0x1c, 0xa0, 0x01, 0x30, 0xc9, 0xdf, 0x0c,
+ 0x27, 0x0f, 0xac, 0x89, 0xca, 0x3f, 0x35, 0x01, 0x5f, 0xf0, 0xc4, 0x1e,
+ 0xc9, 0x01, 0x11, 0xeb, 0x02, 0x40, 0x49, 0xcb, 0x94, 0x59, 0x01, 0x01,
+ 0xb9, 0x46, 0xcf, 0x95, 0x42, 0x40, 0x4f, 0xd3, 0x46, 0xb6, 0x01, 0x0a,
+ 0x19, 0xc8, 0x52, 0x00, 0x01, 0x02, 0x78, 0xcb, 0x92, 0xd8, 0x01, 0x02,
+ 0x59, 0xc4, 0x18, 0x26, 0x01, 0x01, 0xa8, 0xc5, 0x18, 0x25, 0x01, 0x01,
+ 0xb3, 0x02, 0x40, 0x5b, 0xcf, 0x68, 0xbe, 0x01, 0x57, 0x68, 0xce, 0x55,
+ 0x99, 0x01, 0x4d, 0x28, 0xca, 0xa1, 0x34, 0x01, 0x33, 0xc9, 0xca, 0x9d,
+ 0xce, 0x01, 0x33, 0xc1, 0xca, 0x9d, 0x42, 0x01, 0x33, 0xb9, 0xca, 0xa1,
+ 0x48, 0x01, 0x33, 0xb1, 0xca, 0x9d, 0x9c, 0x01, 0x33, 0xa9, 0xca, 0xa0,
+ 0x58, 0x01, 0x33, 0xa1, 0xca, 0x9a, 0x7c, 0x01, 0x33, 0x98, 0x83, 0x05,
+ 0x4a, 0x71, 0x97, 0x05, 0x4a, 0x68, 0x97, 0x05, 0x4a, 0x61, 0x8b, 0x05,
+ 0x4a, 0x50, 0xc2, 0x25, 0x3b, 0x05, 0x4a, 0x29, 0x83, 0x05, 0x49, 0xd8,
+ 0xc2, 0x01, 0x30, 0x05, 0x4a, 0x19, 0x83, 0x05, 0x49, 0x90, 0xd1, 0x3f,
+ 0xe4, 0x0f, 0xdc, 0x59, 0xd0, 0x05, 0xb7, 0x01, 0x16, 0x60, 0x00, 0x42,
+ 0x40, 0x61, 0xd3, 0x01, 0xb4, 0x01, 0x00, 0xc9, 0xd0, 0x58, 0xd2, 0x01,
+ 0x71, 0x38, 0xca, 0x6f, 0xb9, 0x0f, 0xaf, 0x49, 0xc4, 0x21, 0xdf, 0x0f,
+ 0xab, 0x42, 0x02, 0x40, 0x79, 0x42, 0x00, 0xa9, 0xc2, 0x40, 0x7f, 0x09,
+ 0x42, 0x40, 0x8b, 0x49, 0x05, 0xcb, 0xc2, 0x40, 0x9a, 0xd6, 0x13, 0x1f,
+ 0x01, 0x4c, 0xa0, 0xcc, 0x06, 0xdb, 0x01, 0x2c, 0xa9, 0xcd, 0x15, 0x02,
+ 0x0f, 0xdc, 0x38, 0x42, 0x00, 0x5b, 0xc2, 0x40, 0xa6, 0xcc, 0x01, 0xdb,
+ 0x0f, 0xdc, 0x69, 0xcb, 0x96, 0x7f, 0x0f, 0xdd, 0x99, 0xc6, 0x9e, 0xf4,
+ 0x0f, 0xdd, 0xd0, 0x00, 0x42, 0x40, 0xb2, 0xca, 0xa2, 0x74, 0x01, 0x1d,
+ 0x01, 0xc9, 0x57, 0x36, 0x01, 0x1c, 0xf9, 0xca, 0xa3, 0x5a, 0x01, 0x1c,
+ 0xf0, 0xc7, 0xb2, 0xec, 0x01, 0x4b, 0xe9, 0xd0, 0x4a, 0x77, 0x0f, 0xdc,
+ 0x48, 0x44, 0x01, 0x94, 0xc2, 0x40, 0xc4, 0xd3, 0x41, 0xf6, 0x01, 0x70,
+ 0x50, 0xcc, 0x86, 0xcd, 0x0f, 0xaf, 0x69, 0x44, 0x02, 0xdf, 0xc2, 0x40,
+ 0xd3, 0xde, 0x06, 0x69, 0x0f, 0xde, 0x18, 0xce, 0x01, 0xb9, 0x01, 0x00,
+ 0xe9, 0xcc, 0x8a, 0x09, 0x01, 0x4e, 0xd9, 0x03, 0xc2, 0x40, 0xdf, 0xcb,
+ 0x1a, 0x50, 0x01, 0x71, 0x48, 0xcb, 0x1a, 0x50, 0x01, 0x4c, 0x31, 0x05,
+ 0xc2, 0x40, 0xeb, 0xd2, 0x21, 0x89, 0x01, 0x80, 0xb9, 0xd6, 0x08, 0x88,
+ 0x01, 0x80, 0xc9, 0xce, 0x25, 0xad, 0x01, 0x80, 0xd8, 0x00, 0x42, 0x40,
+ 0xf7, 0x45, 0x01, 0x95, 0xc2, 0x41, 0x03, 0x44, 0x0b, 0x26, 0x42, 0x41,
+ 0x0f, 0xcd, 0x7e, 0x3b, 0x01, 0x0d, 0x01, 0x48, 0x01, 0x9a, 0x42, 0x41,
+ 0x1b, 0xcb, 0x6f, 0xff, 0x01, 0x0e, 0xe9, 0xca, 0x88, 0xdf, 0x0f, 0xc1,
+ 0xd0, 0xd0, 0x58, 0x62, 0x0f, 0xc2, 0x11, 0xc5, 0x01, 0xa2, 0x0f, 0xc2,
+ 0x30, 0x46, 0x01, 0x52, 0xc2, 0x41, 0x27, 0xc2, 0x02, 0x35, 0x0f, 0xd7,
+ 0x88, 0x45, 0x00, 0x8c, 0xc2, 0x41, 0x33, 0x16, 0xc2, 0x41, 0x6f, 0xd4,
+ 0x3b, 0x38, 0x01, 0x0e, 0x21, 0xc8, 0xae, 0xbc, 0x01, 0x0d, 0x33, 0x02,
+ 0x41, 0x7b, 0x03, 0x42, 0x41, 0x81, 0xc5, 0x01, 0xa2, 0x01, 0x0e, 0x93,
+ 0x02, 0x41, 0x8d, 0xca, 0x52, 0xc2, 0x01, 0x48, 0x68, 0xd3, 0x43, 0xe4,
+ 0x01, 0x5c, 0x51, 0xc5, 0x01, 0xa2, 0x01, 0x5c, 0xa8, 0xca, 0x50, 0x5e,
+ 0x00, 0x7e, 0xb8, 0xc7, 0x0d, 0x04, 0x01, 0x0b, 0x6b, 0x02, 0x41, 0x97,
+ 0xc8, 0x4b, 0x94, 0x01, 0x0b, 0x7a, 0x02, 0x41, 0x9d, 0xc3, 0x45, 0x6b,
+ 0x01, 0x0b, 0x63, 0x02, 0x41, 0xa3, 0xc2, 0x00, 0x5f, 0x01, 0x0b, 0x22,
+ 0x02, 0x41, 0xa7, 0xca, 0xa0, 0xda, 0x01, 0x0c, 0x28, 0xc9, 0x57, 0x20,
+ 0x01, 0x0c, 0x10, 0xc4, 0x22, 0x44, 0x01, 0x0b, 0x59, 0x91, 0x01, 0x0b,
+ 0x08, 0xc8, 0xbd, 0x82, 0x08, 0x0c, 0x81, 0xc8, 0x45, 0xf0, 0x08, 0x0c,
+ 0x98, 0x44, 0x1c, 0x74, 0xc2, 0x41, 0xab, 0xcf, 0x0c, 0x37, 0x0f, 0xac,
+ 0x80, 0xc8, 0x0d, 0x03, 0x08, 0x73, 0xc1, 0xc2, 0x0d, 0x10, 0x08, 0x73,
+ 0x78, 0xc8, 0x0d, 0x03, 0x08, 0x73, 0xb9, 0xc2, 0x0d, 0x10, 0x08, 0x73,
+ 0x70, 0xca, 0x37, 0x63, 0x08, 0x73, 0xb1, 0xc3, 0x45, 0x6b, 0x08, 0x73,
+ 0x68, 0xca, 0x9c, 0x5c, 0x08, 0x73, 0xa9, 0xc3, 0x0d, 0x0f, 0x08, 0x73,
+ 0x60, 0xcb, 0x13, 0xfa, 0x08, 0x73, 0xa1, 0xc4, 0x0d, 0x0e, 0x08, 0x73,
+ 0x58, 0xc9, 0x18, 0x05, 0x08, 0x73, 0x99, 0xc4, 0x18, 0x12, 0x08, 0x73,
+ 0x50, 0x4d, 0x7e, 0xbd, 0xc2, 0x41, 0xb1, 0xcd, 0x7e, 0x21, 0x00, 0xb5,
+ 0x00, 0x91, 0x00, 0xb7, 0x99, 0xce, 0x75, 0x12, 0x00, 0xb6, 0xf9, 0xc5,
+ 0xd4, 0xac, 0x00, 0xb6, 0xa9, 0x90, 0x00, 0xb5, 0x81, 0x87, 0x00, 0xb5,
+ 0x79, 0xc3, 0x05, 0x0d, 0x00, 0xb5, 0x48, 0x8a, 0x00, 0xb7, 0x93, 0x02,
+ 0x41, 0xc7, 0xc3, 0x13, 0x00, 0x00, 0xb7, 0x29, 0xd6, 0x2e, 0x28, 0x00,
+ 0xb6, 0x59, 0xc7, 0xc9, 0x5e, 0x00, 0xb6, 0x50, 0x43, 0x38, 0x85, 0x42,
+ 0x41, 0xcd, 0xcb, 0x96, 0xc1, 0x00, 0xb7, 0x41, 0xc2, 0x00, 0xbf, 0x00,
+ 0xb7, 0x09, 0xc2, 0x00, 0x75, 0x00, 0xb6, 0xeb, 0x02, 0x41, 0xd7, 0xc7,
+ 0xc5, 0x2f, 0x00, 0xb6, 0x39, 0xcc, 0x84, 0xf9, 0x00, 0xb6, 0x08, 0x4b,
+ 0x2e, 0x2e, 0xc2, 0x41, 0xdd, 0xd1, 0x55, 0xb8, 0x00, 0xb6, 0xd0, 0x07,
+ 0xc2, 0x41, 0xfb, 0xc3, 0x67, 0x02, 0x00, 0xb7, 0x19, 0xc6, 0xce, 0xf9,
+ 0x00, 0xb7, 0x10, 0xc2, 0x00, 0xb1, 0x00, 0xb7, 0x01, 0xc9, 0xaa, 0x0e,
+ 0x00, 0xb6, 0xb1, 0xc2, 0x00, 0x75, 0x00, 0xb5, 0xb1, 0xc2, 0x00, 0x8e,
+ 0x00, 0xb5, 0x38, 0xcb, 0x99, 0x97, 0x00, 0xb6, 0xf1, 0x46, 0xcb, 0xbd,
+ 0x42, 0x42, 0x05, 0xce, 0x72, 0x56, 0x00, 0xb6, 0x79, 0xd3, 0x42, 0xda,
+ 0x00, 0xb5, 0x30, 0xca, 0xa5, 0x08, 0x00, 0xb6, 0x49, 0xc3, 0x23, 0x1c,
+ 0x00, 0xb5, 0x59, 0xc3, 0x15, 0x66, 0x00, 0xb5, 0x51, 0xc6, 0xcb, 0xc9,
+ 0x00, 0xb5, 0x40, 0x07, 0xc2, 0x42, 0x11, 0xc2, 0x00, 0xb1, 0x00, 0xb5,
+ 0xc0, 0xc5, 0xd9, 0x75, 0x00, 0xb5, 0xd9, 0xc6, 0xcf, 0xa1, 0x00, 0xb5,
+ 0xd0, 0xcb, 0x95, 0x4b, 0x00, 0xb5, 0xc8, 0x94, 0x00, 0xb5, 0x18, 0x87,
+ 0x05, 0x28, 0x03, 0x02, 0x42, 0x1b, 0x90, 0x05, 0x2f, 0x10, 0x87, 0x05,
+ 0x2f, 0x23, 0x02, 0x42, 0x1f, 0x8b, 0x05, 0x29, 0x33, 0x02, 0x42, 0x27,
+ 0x83, 0x05, 0x2a, 0x63, 0x02, 0x42, 0x2b, 0x91, 0x05, 0x2d, 0xeb, 0x02,
+ 0x42, 0x2f, 0x97, 0x05, 0x2c, 0xba, 0x02, 0x42, 0x37, 0x87, 0x05, 0x2f,
+ 0x33, 0x02, 0x42, 0x3b, 0x8b, 0x05, 0x29, 0x43, 0x02, 0x42, 0x46, 0x83,
+ 0x05, 0x2a, 0x73, 0x02, 0x42, 0x4a, 0x91, 0x05, 0x2d, 0xfb, 0x02, 0x42,
+ 0x4e, 0x97, 0x05, 0x2c, 0xca, 0x02, 0x42, 0x59, 0x87, 0x05, 0x2f, 0x43,
+ 0x02, 0x42, 0x5d, 0x8b, 0x05, 0x29, 0x51, 0x83, 0x05, 0x2a, 0x81, 0x91,
+ 0x05, 0x2e, 0x0b, 0x02, 0x42, 0x61, 0x97, 0x05, 0x2c, 0xd8, 0x0a, 0xc2,
+ 0x42, 0x65, 0x87, 0x05, 0x2f, 0x53, 0x02, 0x42, 0x7f, 0x8b, 0x05, 0x29,
+ 0x61, 0x83, 0x05, 0x2a, 0x91, 0x91, 0x05, 0x2e, 0x1b, 0x02, 0x42, 0x83,
+ 0x97, 0x05, 0x2c, 0xe8, 0x04, 0xc2, 0x42, 0x87, 0x42, 0x1f, 0xad, 0xc2,
+ 0x42, 0xa1, 0x87, 0x05, 0x30, 0x43, 0x02, 0x42, 0xbb, 0x8b, 0x05, 0x2a,
+ 0x31, 0x83, 0x05, 0x2b, 0x71, 0x91, 0x05, 0x2e, 0xf3, 0x02, 0x42, 0xbf,
+ 0x97, 0x05, 0x2d, 0xb8, 0x12, 0xc2, 0x42, 0xc3, 0x87, 0x05, 0x30, 0x1b,
+ 0x02, 0x42, 0xe0, 0x8b, 0x05, 0x2a, 0x19, 0x83, 0x05, 0x2b, 0x53, 0x02,
+ 0x42, 0xe4, 0x91, 0x05, 0x2e, 0xdb, 0x02, 0x42, 0xe8, 0x97, 0x05, 0x2d,
+ 0xa0, 0x04, 0xc2, 0x42, 0xec, 0x87, 0x05, 0x30, 0x33, 0x02, 0x43, 0x06,
+ 0x8b, 0x05, 0x2a, 0x29, 0x83, 0x05, 0x2b, 0x69, 0x91, 0x05, 0x2e, 0xeb,
+ 0x02, 0x43, 0x0e, 0x97, 0x05, 0x2d, 0xb0, 0x87, 0x05, 0x2f, 0x8b, 0x02,
+ 0x43, 0x12, 0x8b, 0x05, 0x29, 0x89, 0x83, 0x05, 0x2a, 0xc1, 0x91, 0x05,
+ 0x2e, 0x4b, 0x02, 0x43, 0x16, 0x97, 0x05, 0x2d, 0x10, 0x87, 0x05, 0x2f,
+ 0x93, 0x02, 0x43, 0x1a, 0x8b, 0x05, 0x29, 0x91, 0x83, 0x05, 0x2a, 0xc9,
+ 0x91, 0x05, 0x2e, 0x53, 0x02, 0x43, 0x1e, 0x97, 0x05, 0x2d, 0x18, 0x87,
+ 0x05, 0x2f, 0x9b, 0x02, 0x43, 0x22, 0x0a, 0xc2, 0x43, 0x26, 0x8b, 0x05,
+ 0x29, 0x99, 0x83, 0x05, 0x2a, 0xd1, 0x91, 0x05, 0x2e, 0x5b, 0x02, 0x43,
+ 0x40, 0x97, 0x05, 0x2d, 0x20, 0x0a, 0xc2, 0x43, 0x44, 0x87, 0x05, 0x2f,
+ 0xcb, 0x02, 0x43, 0x62, 0x8b, 0x05, 0x29, 0xc9, 0x83, 0x05, 0x2b, 0x01,
+ 0x91, 0x05, 0x2e, 0x8b, 0x02, 0x43, 0x66, 0x97, 0x05, 0x2d, 0x50, 0x87,
+ 0x05, 0x2f, 0xbb, 0x02, 0x43, 0x6a, 0x8b, 0x05, 0x29, 0xb9, 0x83, 0x05,
+ 0x2a, 0xf1, 0x91, 0x05, 0x2e, 0x7b, 0x02, 0x43, 0x74, 0x97, 0x05, 0x2d,
+ 0x40, 0x87, 0x05, 0x2f, 0xc3, 0x02, 0x43, 0x78, 0x8b, 0x05, 0x29, 0xc1,
+ 0x83, 0x05, 0x2a, 0xf9, 0x91, 0x05, 0x2e, 0x83, 0x02, 0x43, 0x7c, 0x97,
+ 0x05, 0x2d, 0x48, 0x06, 0xc2, 0x43, 0x80, 0x0c, 0xc2, 0x43, 0x9a, 0x89,
+ 0x05, 0x30, 0x5b, 0x02, 0x43, 0xb4, 0x87, 0x05, 0x30, 0x4b, 0x02, 0x43,
+ 0xca, 0x1b, 0xc2, 0x43, 0xce, 0x8b, 0x05, 0x2a, 0x39, 0x83, 0x05, 0x2b,
+ 0x79, 0x91, 0x05, 0x2e, 0xfb, 0x02, 0x43, 0xe8, 0x97, 0x05, 0x2d, 0xc0,
+ 0x87, 0x05, 0x2f, 0xdb, 0x02, 0x43, 0xec, 0x0a, 0xc2, 0x43, 0xf0, 0x8b,
+ 0x05, 0x29, 0xd9, 0x83, 0x05, 0x2b, 0x11, 0x91, 0x05, 0x2e, 0x9b, 0x02,
+ 0x44, 0x0a, 0x97, 0x05, 0x2d, 0x60, 0x87, 0x05, 0x2f, 0xeb, 0x02, 0x44,
+ 0x0e, 0x0a, 0xc2, 0x44, 0x12, 0x8b, 0x05, 0x29, 0xe9, 0x83, 0x05, 0x2b,
+ 0x21, 0x91, 0x05, 0x2e, 0xab, 0x02, 0x44, 0x2c, 0x97, 0x05, 0x2d, 0x70,
+ 0x87, 0x05, 0x2f, 0xfb, 0x02, 0x44, 0x30, 0x8b, 0x05, 0x29, 0xf9, 0x83,
+ 0x05, 0x2b, 0x31, 0x91, 0x05, 0x2e, 0xbb, 0x02, 0x44, 0x34, 0x97, 0x05,
+ 0x2d, 0x80, 0x87, 0x05, 0x30, 0x03, 0x02, 0x44, 0x38, 0x8b, 0x05, 0x2a,
+ 0x01, 0x83, 0x05, 0x2b, 0x39, 0x91, 0x05, 0x2e, 0xc3, 0x02, 0x44, 0x3c,
+ 0x97, 0x05, 0x2d, 0x88, 0x87, 0x05, 0x30, 0x13, 0x02, 0x44, 0x40, 0x8b,
+ 0x05, 0x2a, 0x11, 0x83, 0x05, 0x2b, 0x49, 0x91, 0x05, 0x2e, 0xd3, 0x02,
+ 0x44, 0x44, 0x97, 0x05, 0x2d, 0x98, 0x90, 0x05, 0x29, 0x28, 0x90, 0x05,
+ 0x2a, 0x50, 0x91, 0x05, 0x2b, 0x8b, 0x02, 0x44, 0x48, 0x90, 0x05, 0x2d,
+ 0xd8, 0x90, 0x05, 0x2c, 0xb0, 0xc4, 0xe2, 0xaf, 0x05, 0x30, 0x99, 0xc2,
+ 0x04, 0xc6, 0x05, 0x30, 0xc0, 0xc4, 0xe2, 0xaf, 0x05, 0x30, 0xa1, 0xc3,
+ 0x38, 0x86, 0x05, 0x30, 0xe0, 0xc3, 0x00, 0x74, 0x05, 0x30, 0xa9, 0xc2,
+ 0x04, 0xc6, 0x05, 0x30, 0xc9, 0xc3, 0x08, 0x48, 0x05, 0x30, 0xe8, 0xc3,
+ 0x01, 0x95, 0x05, 0x30, 0xd1, 0x11, 0x42, 0x44, 0x4c, 0xc9, 0x57, 0x36,
+ 0x01, 0x1e, 0x81, 0x45, 0x00, 0x8c, 0x42, 0x44, 0x58, 0xc7, 0x33, 0xdf,
+ 0x00, 0x00, 0x5b, 0x02, 0x44, 0x64, 0xc4, 0x3b, 0x19, 0x01, 0x5b, 0xf8,
+ 0x00, 0x42, 0x44, 0x6a, 0xcb, 0x99, 0x1e, 0x01, 0x81, 0xa0, 0xcf, 0x15,
+ 0x36, 0x0f, 0xbd, 0xf9, 0xd2, 0x22, 0x49, 0x0f, 0xbe, 0x80, 0xc6, 0x02,
+ 0xd1, 0x0f, 0xbc, 0x41, 0xc6, 0x0b, 0x09, 0x0f, 0xbc, 0x90, 0xc6, 0x27,
+ 0x5e, 0x0f, 0xb3, 0xe1, 0xc6, 0x13, 0x52, 0x0f, 0xbd, 0x69, 0xd2, 0x4d,
+ 0x57, 0x0f, 0xbd, 0xc8, 0xce, 0x70, 0x5e, 0x00, 0xe7, 0x89, 0xcb, 0x95,
+ 0x98, 0x00, 0xe7, 0x5b, 0x02, 0x44, 0x76, 0xcc, 0x88, 0xc5, 0x00, 0xe7,
+ 0x51, 0xcc, 0x14, 0x41, 0x00, 0xe7, 0x48, 0xc8, 0x74, 0xc4, 0x00, 0xe7,
+ 0x31, 0xc6, 0x74, 0xc6, 0x00, 0xe7, 0x20, 0xca, 0xa5, 0x12, 0x00, 0xe7,
+ 0x40, 0xca, 0xa5, 0x12, 0x00, 0xe7, 0x38, 0xca, 0x9e, 0xe6, 0x00, 0xe7,
+ 0xc9, 0xc7, 0x02, 0x40, 0x00, 0xe6, 0xd0, 0xe0, 0x02, 0x27, 0x00, 0xe7,
+ 0x00, 0xca, 0xa4, 0x90, 0x00, 0xe6, 0xc8, 0x43, 0x00, 0x4b, 0xc2, 0x44,
+ 0x7c, 0xcc, 0x8b, 0x11, 0x70, 0x01, 0xe0, 0x4f, 0x0b, 0x17, 0xc2, 0x44,
+ 0x8e, 0x4d, 0x29, 0xb9, 0x42, 0x44, 0xf6, 0x42, 0x0a, 0x8c, 0xc2, 0x45,
+ 0x5e, 0xc3, 0x0d, 0xe5, 0x70, 0x01, 0xd0, 0xce, 0x25, 0xad, 0x70, 0x02,
+ 0xe9, 0xcb, 0x1a, 0x50, 0x70, 0x01, 0x49, 0xcd, 0x00, 0x32, 0x70, 0x03,
+ 0xe8, 0xc4, 0x26, 0x78, 0x70, 0x01, 0xc9, 0xc5, 0x06, 0xdb, 0x70, 0x01,
+ 0xc1, 0x15, 0xc2, 0x45, 0x68, 0x08, 0xc2, 0x45, 0x74, 0x16, 0xc2, 0x45,
+ 0x80, 0xc3, 0x05, 0x14, 0x70, 0x01, 0x89, 0xc4, 0x15, 0xe7, 0x70, 0x01,
+ 0x80, 0x83, 0x00, 0xbb, 0x41, 0xc2, 0x01, 0x30, 0x00, 0xbb, 0x28, 0xc9,
+ 0xa9, 0xc6, 0x00, 0xb8, 0xf8, 0x83, 0x00, 0xb8, 0x41, 0xc2, 0x01, 0x30,
+ 0x00, 0xb8, 0x28, 0x24, 0xc2, 0x45, 0x8c, 0x23, 0xc2, 0x45, 0xa8, 0x22,
+ 0xc2, 0x45, 0xd0, 0x21, 0xc2, 0x45, 0xf8, 0x20, 0xc2, 0x46, 0x20, 0x1f,
+ 0xc2, 0x46, 0x48, 0x1e, 0xc2, 0x46, 0x70, 0x1d, 0x42, 0x46, 0x98, 0xc4,
+ 0x26, 0x78, 0x0b, 0x56, 0x49, 0xc5, 0x06, 0xdb, 0x0b, 0x56, 0x41, 0x15,
+ 0xc2, 0x46, 0xc0, 0x08, 0xc2, 0x46, 0xcc, 0x16, 0xc2, 0x46, 0xd8, 0xc3,
+ 0x05, 0x14, 0x0b, 0x56, 0x09, 0xc4, 0x15, 0xe7, 0x0b, 0x56, 0x00, 0xc2,
+ 0x02, 0x1c, 0x0b, 0x55, 0xf1, 0x05, 0xc2, 0x46, 0xe4, 0x06, 0xc2, 0x46,
+ 0xee, 0x08, 0xc2, 0x46, 0xf8, 0xc2, 0x8d, 0x8f, 0x0b, 0x55, 0xd1, 0x16,
+ 0xc2, 0x47, 0x02, 0x0a, 0xc2, 0x47, 0x12, 0x09, 0xc2, 0x47, 0x1a, 0x15,
+ 0xc2, 0x47, 0x24, 0x10, 0xc2, 0x47, 0x2c, 0xc2, 0x00, 0x39, 0x0b, 0x55,
+ 0x91, 0x0e, 0xc2, 0x47, 0x42, 0x0f, 0xc2, 0x47, 0x4c, 0xc2, 0x01, 0x5d,
+ 0x0b, 0x55, 0x51, 0x12, 0xc2, 0x47, 0x60, 0xc2, 0x01, 0x4a, 0x0b, 0x55,
+ 0x31, 0xc2, 0x19, 0x2c, 0x0b, 0x55, 0x29, 0x0d, 0xc2, 0x47, 0x6a, 0x17,
+ 0xc2, 0x47, 0x74, 0x03, 0xc2, 0x47, 0x8c, 0x0b, 0xc2, 0x47, 0xa0, 0x07,
+ 0xc2, 0x47, 0xb0, 0x18, 0xc2, 0x47, 0xc0, 0x11, 0x42, 0x47, 0xd0, 0x18,
+ 0xc2, 0x47, 0xe0, 0x42, 0x14, 0x48, 0xc2, 0x47, 0xee, 0x0d, 0xc2, 0x48,
+ 0x00, 0x12, 0xc2, 0x48, 0x0a, 0xc7, 0xb4, 0xa5, 0x08, 0xfe, 0xc1, 0x03,
+ 0xc2, 0x48, 0x14, 0xc6, 0xcd, 0xd9, 0x08, 0xfe, 0xb1, 0xc3, 0x1e, 0xe5,
+ 0x08, 0xfe, 0xa8, 0xcb, 0x97, 0x9d, 0x08, 0xff, 0x49, 0xcb, 0x97, 0xa8,
+ 0x08, 0xff, 0x40, 0x83, 0x00, 0x5c, 0x2b, 0x02, 0x48, 0x20, 0x8b, 0x00,
+ 0x5c, 0x3b, 0x02, 0x48, 0x2c, 0x97, 0x00, 0x5c, 0x4b, 0x02, 0x48, 0x30,
+ 0x87, 0x00, 0x5c, 0x73, 0x02, 0x48, 0x34, 0x91, 0x00, 0x5c, 0x93, 0x02,
+ 0x48, 0x38, 0xc2, 0x02, 0x2b, 0x00, 0x5c, 0xa9, 0x10, 0xc2, 0x48, 0x3c,
+ 0xc2, 0x00, 0x64, 0x00, 0x5c, 0xd1, 0xc2, 0x25, 0x3b, 0x00, 0x5c, 0xe1,
+ 0x16, 0xc2, 0x48, 0x50, 0xc2, 0x00, 0xb0, 0x00, 0x5d, 0x51, 0xc2, 0x01,
+ 0xc3, 0x00, 0x5d, 0x71, 0xc2, 0x19, 0x2c, 0x00, 0x5d, 0x79, 0x14, 0xc2,
+ 0x48, 0x5a, 0x0e, 0xc2, 0x48, 0x64, 0xc2, 0x02, 0x41, 0x00, 0x5d, 0xa9,
+ 0x15, 0xc2, 0x48, 0x6c, 0xc2, 0x00, 0xd0, 0x00, 0x5d, 0xc8, 0xc4, 0x15,
+ 0xe7, 0x00, 0x5f, 0x31, 0xc3, 0x05, 0x14, 0x00, 0x5f, 0x39, 0x16, 0xc2,
+ 0x48, 0x7c, 0x08, 0xc2, 0x48, 0x88, 0x15, 0xc2, 0x48, 0x94, 0xc5, 0x06,
+ 0xdb, 0x00, 0x5f, 0x71, 0xc4, 0x26, 0x78, 0x00, 0x5f, 0x78, 0xc8, 0x08,
+ 0x79, 0x08, 0xfe, 0x99, 0x44, 0x22, 0xcb, 0xc2, 0x48, 0xa0, 0xca, 0x1e,
+ 0x15, 0x08, 0xfe, 0x69, 0xca, 0xa3, 0xfa, 0x08, 0xfe, 0x30, 0x45, 0x27,
+ 0x7a, 0xc2, 0x48, 0xac, 0xc7, 0x08, 0x79, 0x08, 0xfe, 0x81, 0x08, 0xc2,
+ 0x48, 0xb4, 0x45, 0x06, 0xdb, 0xc2, 0x48, 0xc0, 0x16, 0xc2, 0x48, 0xca,
+ 0x44, 0x22, 0xcb, 0xc2, 0x48, 0xda, 0xd8, 0x22, 0xbb, 0x08, 0xfe, 0x08,
+ 0x83, 0x00, 0x5d, 0xf1, 0x8b, 0x00, 0x5e, 0x41, 0x97, 0x00, 0x5e, 0x60,
+ 0x8b, 0x00, 0x5e, 0x00, 0x97, 0x00, 0x5e, 0x10, 0x87, 0x00, 0x5e, 0x38,
+ 0x91, 0x00, 0x5e, 0x58, 0xc7, 0x0d, 0x04, 0x00, 0x5f, 0x89, 0xc8, 0x4b,
+ 0x94, 0x00, 0x5f, 0x90, 0xc4, 0x18, 0x10, 0x08, 0xb6, 0x39, 0xc2, 0x22,
+ 0xcc, 0x08, 0xb6, 0x30, 0xc3, 0x0d, 0x14, 0x08, 0xb6, 0x29, 0xc3, 0x09,
+ 0x9e, 0x08, 0xb6, 0x20, 0xc4, 0x02, 0xde, 0x08, 0xb6, 0x19, 0xc2, 0x02,
+ 0xa0, 0x08, 0xb6, 0x10, 0xca, 0x9e, 0xaa, 0x08, 0xb5, 0xc1, 0x97, 0x08,
+ 0xb4, 0x49, 0x8b, 0x08, 0xb4, 0x39, 0x83, 0x08, 0xb4, 0x08, 0xc2, 0x00,
+ 0x39, 0x08, 0xb5, 0x51, 0x83, 0x08, 0xb5, 0x20, 0x83, 0x08, 0xb5, 0x41,
+ 0xc2, 0x00, 0xd0, 0x08, 0xb5, 0x38, 0xc2, 0x00, 0xd0, 0x08, 0xb5, 0x09,
+ 0x83, 0x08, 0xb5, 0x00, 0xc2, 0x00, 0xd0, 0x08, 0xb4, 0xf9, 0x83, 0x08,
+ 0xb4, 0xf0, 0x83, 0x08, 0xb4, 0xe9, 0xc2, 0x00, 0xc1, 0x08, 0xb4, 0xc1,
+ 0xc2, 0x19, 0x2c, 0x08, 0xb4, 0x99, 0xc2, 0x01, 0x30, 0x08, 0xb4, 0x70,
+ 0xc2, 0x00, 0xd0, 0x08, 0xb4, 0xe1, 0x83, 0x08, 0xb4, 0xd9, 0x06, 0x42,
+ 0x48, 0xe6, 0xc2, 0x00, 0xd0, 0x08, 0xb4, 0xd1, 0x83, 0x08, 0xb4, 0xc9,
+ 0x16, 0x42, 0x48, 0xf0, 0xc2, 0x00, 0xd0, 0x08, 0xb4, 0x91, 0x83, 0x08,
+ 0xb4, 0x88, 0xc2, 0x00, 0xd0, 0x08, 0xb4, 0x81, 0x83, 0x08, 0xb4, 0x78,
+ 0xc2, 0x00, 0xd0, 0x08, 0xb4, 0x69, 0x83, 0x08, 0xb4, 0x60, 0xc2, 0x00,
+ 0xd0, 0x08, 0xb4, 0x59, 0x83, 0x08, 0xb4, 0x50, 0x97, 0x08, 0xb4, 0x28,
+ 0x8b, 0x08, 0xb4, 0x18, 0xc4, 0x1e, 0x97, 0x08, 0xb5, 0xb1, 0xc5, 0x40,
+ 0xe7, 0x08, 0xb5, 0x60, 0x97, 0x08, 0xb5, 0xa9, 0x8b, 0x08, 0xb5, 0x99,
+ 0x83, 0x08, 0xb5, 0x68, 0x97, 0x08, 0xb5, 0x88, 0x8b, 0x08, 0xb5, 0x78,
+ 0xc3, 0x01, 0x95, 0x00, 0xd5, 0x61, 0xc2, 0x69, 0xa6, 0x00, 0xd5, 0x20,
+ 0xc5, 0xd7, 0x04, 0x00, 0xd5, 0x53, 0x02, 0x48, 0xfa, 0xc3, 0x29, 0xf7,
+ 0x00, 0xd5, 0x11, 0xc3, 0x1c, 0x9f, 0x00, 0xd3, 0x00, 0xc3, 0x04, 0xc6,
+ 0x00, 0xd5, 0x43, 0x02, 0x49, 0x00, 0xc3, 0x3f, 0x6f, 0x00, 0xd5, 0x19,
+ 0x44, 0xdf, 0xcf, 0x42, 0x49, 0x06, 0xc5, 0xd4, 0x98, 0x00, 0xd5, 0x39,
+ 0xc3, 0x71, 0xe5, 0x00, 0xd3, 0xd9, 0xc4, 0xe0, 0xe3, 0x00, 0xd3, 0xa2,
+ 0x02, 0x49, 0x12, 0xd4, 0x3c, 0x78, 0x00, 0xd5, 0x31, 0xc6, 0xd1, 0x81,
+ 0x00, 0xd3, 0xd0, 0xc4, 0xde, 0xb7, 0x00, 0xd5, 0x08, 0x9f, 0x00, 0xd3,
+ 0xb1, 0x9e, 0x00, 0xd3, 0xa8, 0xc4, 0x18, 0x10, 0x00, 0xd4, 0xb9, 0xc2,
+ 0x22, 0xcc, 0x00, 0xd4, 0xb0, 0xc3, 0x0d, 0x14, 0x00, 0xd4, 0xa9, 0xc3,
+ 0x09, 0x9e, 0x00, 0xd4, 0xa0, 0xc4, 0x02, 0xde, 0x00, 0xd4, 0x99, 0xc2,
+ 0x02, 0xa0, 0x00, 0xd4, 0x90, 0xc4, 0x18, 0x10, 0x00, 0xd4, 0x39, 0xc2,
+ 0x22, 0xcc, 0x00, 0xd4, 0x30, 0xc3, 0x0d, 0x14, 0x00, 0xd4, 0x29, 0xc3,
+ 0x09, 0x9e, 0x00, 0xd4, 0x20, 0xc4, 0x02, 0xde, 0x00, 0xd4, 0x19, 0xc2,
+ 0x02, 0xa0, 0x00, 0xd4, 0x10, 0xc2, 0x0d, 0xf6, 0x00, 0xd2, 0xf1, 0xc2,
+ 0x01, 0x5d, 0x00, 0xd2, 0xe9, 0x0f, 0xc2, 0x49, 0x18, 0xd4, 0x3c, 0xf0,
+ 0x00, 0xd2, 0xd9, 0x0e, 0xc2, 0x49, 0x22, 0xc9, 0xb4, 0x2e, 0x00, 0xd2,
+ 0xc8, 0x42, 0x01, 0x31, 0xc2, 0x49, 0x2e, 0x91, 0x00, 0xd3, 0x81, 0x9b,
+ 0x00, 0xd3, 0x68, 0xc6, 0xd2, 0xbf, 0x00, 0xd3, 0x91, 0xc6, 0xc6, 0xb8,
+ 0x00, 0xd3, 0x20, 0x8b, 0x00, 0xd3, 0x89, 0x87, 0x00, 0xd3, 0x79, 0x83,
+ 0x00, 0xd3, 0x18, 0x97, 0x00, 0xd3, 0x53, 0x02, 0x49, 0x3a, 0x87, 0x00,
+ 0xd3, 0x38, 0x8b, 0x00, 0xd3, 0x30, 0x83, 0x00, 0xd2, 0x1b, 0x02, 0x49,
+ 0x3e, 0x43, 0x02, 0x5f, 0xc2, 0x49, 0x42, 0xc2, 0x00, 0xdb, 0x00, 0xd2,
+ 0x51, 0xc2, 0x0f, 0xe1, 0x00, 0xd2, 0x20, 0x97, 0x00, 0xd2, 0x80, 0x8b,
+ 0x00, 0xd2, 0x70, 0xc2, 0x00, 0xd0, 0x00, 0xd2, 0x49, 0x15, 0xc2, 0x49,
+ 0x70, 0xc2, 0x19, 0x2c, 0x00, 0xd2, 0x01, 0xc2, 0x00, 0x87, 0x00, 0xd1,
+ 0xd1, 0x12, 0xc2, 0x49, 0x80, 0x16, 0xc2, 0x49, 0x8a, 0xc5, 0x3c, 0xf5,
+ 0x00, 0xd1, 0x71, 0x05, 0xc2, 0x49, 0x94, 0x0d, 0x42, 0x49, 0x9e, 0xc2,
+ 0x0f, 0xe1, 0x00, 0xd2, 0x11, 0x83, 0x00, 0xd2, 0x0a, 0x02, 0x49, 0xae,
+ 0x83, 0x00, 0xd1, 0xb1, 0xc2, 0x19, 0x2c, 0x00, 0xd1, 0x61, 0xc2, 0x01,
+ 0x30, 0x00, 0xd1, 0x30, 0xa3, 0x00, 0xcb, 0xa1, 0xa2, 0x00, 0xcb, 0x99,
+ 0xa1, 0x00, 0xcb, 0x91, 0xa0, 0x00, 0xcb, 0x89, 0x9f, 0x00, 0xcb, 0x80,
+ 0xc2, 0x00, 0xd0, 0x00, 0xcb, 0x09, 0x83, 0x00, 0xca, 0x98, 0xc5, 0xd8,
+ 0x3f, 0x05, 0x56, 0xf9, 0x90, 0x05, 0x56, 0xd8, 0x8f, 0x05, 0x55, 0xf1,
+ 0x90, 0x05, 0x55, 0xe9, 0x9b, 0x05, 0x55, 0xe1, 0xc2, 0x0f, 0xe1, 0x05,
+ 0x55, 0xd9, 0x83, 0x05, 0x55, 0x88, 0x83, 0x05, 0x55, 0xd1, 0x87, 0x05,
+ 0x55, 0x9a, 0x02, 0x49, 0xba, 0x83, 0x05, 0x55, 0xc0, 0x91, 0x05, 0x55,
+ 0x79, 0xc2, 0x01, 0x23, 0x05, 0x55, 0x69, 0xc2, 0x17, 0xbd, 0x05, 0x55,
+ 0x59, 0xc2, 0x01, 0xc8, 0x05, 0x55, 0x49, 0xc2, 0x00, 0x79, 0x05, 0x55,
+ 0x39, 0xc2, 0x42, 0xcd, 0x05, 0x55, 0x29, 0xc2, 0x00, 0xa2, 0x05, 0x55,
+ 0x19, 0xc2, 0x01, 0x03, 0x05, 0x55, 0x09, 0x12, 0xc2, 0x49, 0xbe, 0xc2,
+ 0x00, 0x6b, 0x05, 0x54, 0xd9, 0x10, 0xc2, 0x49, 0xc8, 0x16, 0xc2, 0x49,
+ 0xd8, 0xc2, 0x00, 0x58, 0x05, 0x54, 0x99, 0x05, 0xc2, 0x49, 0xe2, 0xc2,
+ 0x0f, 0x7b, 0x05, 0x54, 0x39, 0x0d, 0xc2, 0x49, 0xec, 0xc2, 0x00, 0xfb,
+ 0x05, 0x54, 0x78, 0x91, 0x05, 0x55, 0x71, 0xc2, 0x01, 0x23, 0x05, 0x55,
+ 0x61, 0xc2, 0x17, 0xbd, 0x05, 0x55, 0x51, 0xc2, 0x01, 0xc8, 0x05, 0x55,
+ 0x41, 0xc2, 0x00, 0x79, 0x05, 0x55, 0x31, 0xc2, 0x42, 0xcd, 0x05, 0x55,
+ 0x21, 0xc2, 0x00, 0xa2, 0x05, 0x55, 0x11, 0xc2, 0x01, 0x03, 0x05, 0x55,
+ 0x01, 0x12, 0xc2, 0x49, 0xf4, 0xc2, 0x00, 0x6b, 0x05, 0x54, 0xd1, 0x10,
+ 0xc2, 0x49, 0xfe, 0x16, 0xc2, 0x4a, 0x0e, 0xc2, 0x00, 0x58, 0x05, 0x54,
+ 0x91, 0x05, 0xc2, 0x4a, 0x18, 0xc2, 0x0f, 0x7b, 0x05, 0x54, 0x31, 0x0d,
+ 0xc2, 0x4a, 0x22, 0xc2, 0x00, 0xfb, 0x05, 0x54, 0x70, 0xd2, 0x49, 0xe5,
+ 0x0f, 0xb2, 0xb1, 0xd2, 0x47, 0x15, 0x0f, 0xb2, 0xa0, 0xc4, 0x02, 0xde,
+ 0x01, 0x0c, 0x59, 0xc2, 0x02, 0xa0, 0x01, 0x0c, 0x50, 0x9b, 0x01, 0x0a,
+ 0x21, 0x8e, 0x01, 0x0a, 0x11, 0x89, 0x01, 0x0a, 0x08, 0xd2, 0x49, 0xe5,
+ 0x0f, 0xb2, 0xb9, 0xd2, 0x47, 0x15, 0x0f, 0xb2, 0xa8, 0xc4, 0x00, 0x49,
+ 0x01, 0x34, 0xf9, 0xc5, 0x00, 0x2c, 0x01, 0x34, 0xf0, 0xc5, 0x00, 0x2c,
+ 0x0f, 0xaf, 0x39, 0xc4, 0x00, 0x49, 0x0f, 0xaf, 0x31, 0xc5, 0x05, 0x02,
+ 0x0f, 0xaf, 0x29, 0xc5, 0x00, 0xd4, 0x0f, 0xaf, 0x20, 0x4b, 0x03, 0x87,
+ 0xc2, 0x4a, 0x2a, 0xdf, 0x0d, 0x7c, 0x01, 0x5c, 0xc0, 0xe0, 0x0b, 0xe7,
+ 0x01, 0x5c, 0xc8, 0xe0, 0x07, 0xe7, 0x01, 0x3d, 0x18, 0xe0, 0x03, 0xc7,
+ 0x01, 0x5c, 0xd8, 0xc6, 0x13, 0x52, 0x0f, 0xbd, 0x41, 0xc4, 0x40, 0x89,
+ 0x01, 0x00, 0x48, 0xc5, 0xd6, 0x91, 0x00, 0x3d, 0x19, 0xc8, 0xb8, 0x1a,
+ 0x00, 0x3c, 0x79, 0xc4, 0xd8, 0x3b, 0x00, 0x3c, 0x70, 0x91, 0x00, 0x3d,
+ 0x01, 0xc7, 0xb4, 0xdb, 0x00, 0x3c, 0x99, 0xc3, 0x39, 0x6e, 0x00, 0x3c,
+ 0x63, 0x02, 0x4a, 0x36, 0xc3, 0x04, 0xc5, 0x00, 0x3c, 0xc0, 0x03, 0xc2,
+ 0x4a, 0x3c, 0xc5, 0xd7, 0x22, 0x00, 0x3c, 0x58, 0xc5, 0xd9, 0x20, 0x00,
+ 0x3c, 0xf1, 0x0a, 0xc2, 0x4a, 0x48, 0xc4, 0xe2, 0xd7, 0x00, 0x3c, 0x80,
+ 0xc3, 0x39, 0x6e, 0x00, 0x3c, 0xc9, 0xc2, 0x04, 0xc6, 0x00, 0x3c, 0x00,
+ 0x03, 0xc2, 0x4a, 0x54, 0x91, 0x00, 0x3d, 0x08, 0xc4, 0xe1, 0xff, 0x00,
+ 0x3c, 0x69, 0xc8, 0xb4, 0xda, 0x00, 0x3c, 0x28, 0xc4, 0xe1, 0x03, 0x00,
+ 0x3c, 0x39, 0xc3, 0x16, 0xc3, 0x00, 0x3d, 0x10, 0xc4, 0xd8, 0x3b, 0x00,
+ 0x3c, 0x31, 0xc3, 0x39, 0x6e, 0x00, 0x3c, 0xd0, 0xc4, 0x2b, 0xa7, 0x00,
+ 0x3c, 0x11, 0xc2, 0x04, 0xc6, 0x00, 0x3d, 0x88, 0x0d, 0xc2, 0x4a, 0x5e,
+ 0x10, 0xc2, 0x4a, 0x6a, 0x46, 0xcc, 0x6b, 0xc2, 0x4a, 0x7c, 0x15, 0xc2,
+ 0x4a, 0x91, 0x1b, 0xc2, 0x4a, 0x9d, 0x43, 0x5d, 0x85, 0xc2, 0x4a, 0xa9,
+ 0x16, 0xc2, 0x4a, 0xb5, 0xc9, 0xb4, 0x0a, 0x00, 0x70, 0xd1, 0x12, 0xc2,
+ 0x4a, 0xbf, 0x42, 0x01, 0x03, 0xc2, 0x4a, 0xcf, 0x0f, 0xc2, 0x4a, 0xde,
+ 0x14, 0xc2, 0x4a, 0xea, 0x0e, 0xc2, 0x4a, 0xf4, 0xc7, 0xc2, 0x5e, 0x00,
+ 0x71, 0x39, 0x43, 0x60, 0xe8, 0xc2, 0x4b, 0x04, 0xc5, 0xd9, 0xd9, 0x00,
+ 0x71, 0x69, 0xca, 0x9e, 0xbe, 0x00, 0x72, 0xd0, 0xc2, 0x02, 0xa0, 0x00,
+ 0x72, 0x91, 0xc4, 0x02, 0xde, 0x00, 0x72, 0x98, 0xc3, 0x09, 0x9e, 0x00,
+ 0x72, 0xa1, 0xc3, 0x0d, 0x14, 0x00, 0x72, 0xa8, 0xc2, 0x22, 0xcc, 0x00,
+ 0x72, 0xb1, 0xc4, 0x18, 0x10, 0x00, 0x72, 0xb8, 0x87, 0x0f, 0x15, 0x58,
+ 0x47, 0xc2, 0xe3, 0xc2, 0x4b, 0x10, 0x83, 0x0f, 0x14, 0x88, 0x91, 0x0f,
+ 0x15, 0x40, 0x97, 0x0f, 0x15, 0x18, 0xc2, 0x01, 0x30, 0x0f, 0x14, 0xc1,
+ 0x83, 0x0f, 0x14, 0xb8, 0xd0, 0x59, 0x72, 0x01, 0x4e, 0x69, 0xc8, 0x52,
+ 0x09, 0x01, 0x4e, 0x59, 0xc9, 0x16, 0x14, 0x01, 0x4e, 0x51, 0xcf, 0x13,
+ 0x5e, 0x0f, 0xb6, 0x30, 0xc4, 0x55, 0x73, 0x0e, 0x9a, 0x49, 0xc9, 0xaf,
+ 0x15, 0x0e, 0x99, 0xe0, 0xc5, 0xba, 0x65, 0x0e, 0x9a, 0x91, 0xc5, 0x08,
+ 0xe6, 0x0e, 0x9a, 0x70, 0xc6, 0xd0, 0x55, 0x0e, 0x99, 0xc1, 0x16, 0x42,
+ 0x4b, 0x24, 0xc7, 0xc0, 0x58, 0x0e, 0x99, 0xe9, 0xc4, 0x1d, 0xa8, 0x0e,
+ 0x99, 0x30, 0xc5, 0xd7, 0x63, 0x0e, 0x9a, 0x61, 0xc2, 0x00, 0x5f, 0x0e,
+ 0x99, 0x88, 0xc5, 0xd7, 0x7c, 0x0e, 0x99, 0x71, 0x0b, 0x42, 0x4b, 0x36,
+ 0xc5, 0x7c, 0xec, 0x01, 0x18, 0xa9, 0xc5, 0x36, 0xc0, 0x0f, 0xa6, 0xf2,
+ 0x02, 0x4b, 0x42, 0x49, 0x29, 0x29, 0xc2, 0x4b, 0x48, 0xca, 0x1e, 0x8a,
+ 0x00, 0x60, 0x08, 0xc7, 0x14, 0x39, 0x00, 0x60, 0x11, 0xc7, 0x7a, 0x7f,
+ 0x00, 0x61, 0xe8, 0xc5, 0x40, 0xe7, 0x00, 0x60, 0x19, 0xc4, 0x1e, 0x97,
+ 0x00, 0x62, 0x68, 0x83, 0x00, 0x60, 0x2b, 0x02, 0x4b, 0x54, 0x8b, 0x00,
+ 0x60, 0x3b, 0x02, 0x4b, 0x60, 0x97, 0x00, 0x60, 0x4b, 0x02, 0x4b, 0x64,
+ 0x18, 0xc2, 0x4b, 0x68, 0x87, 0x00, 0x60, 0x73, 0x02, 0x4b, 0x72, 0x91,
+ 0x00, 0x60, 0x93, 0x02, 0x4b, 0x76, 0x0d, 0xc2, 0x4b, 0x7a, 0x09, 0xc2,
+ 0x4b, 0x84, 0x10, 0xc2, 0x4b, 0x8e, 0x05, 0xc2, 0x4b, 0xa7, 0x0c, 0xc2,
+ 0x4b, 0xb1, 0x16, 0xc2, 0x4b, 0xbb, 0x06, 0xc2, 0x4b, 0xcf, 0x12, 0xc2,
+ 0x4b, 0xe3, 0x04, 0xc2, 0x4b, 0xed, 0xc2, 0x01, 0xc3, 0x00, 0x61, 0x71,
+ 0xc2, 0x19, 0x2c, 0x00, 0x61, 0x79, 0x14, 0xc2, 0x4b, 0xf7, 0x0e, 0xc2,
+ 0x4b, 0xff, 0x15, 0xc2, 0x4c, 0x07, 0xc2, 0x00, 0xd0, 0x00, 0x61, 0xc8,
+ 0x83, 0x00, 0x61, 0xf1, 0x8b, 0x00, 0x62, 0x41, 0x97, 0x00, 0x62, 0x60,
+ 0x8b, 0x00, 0x62, 0x00, 0x97, 0x00, 0x62, 0x10, 0x94, 0x00, 0x62, 0x1b,
+ 0x02, 0x4c, 0x17, 0x8e, 0x00, 0x63, 0x12, 0x02, 0x4c, 0x1b, 0x87, 0x00,
+ 0x62, 0x38, 0x91, 0x00, 0x62, 0x58, 0xc2, 0x02, 0xa0, 0x00, 0x63, 0x41,
+ 0xc4, 0x02, 0xde, 0x00, 0x63, 0x48, 0xc3, 0x09, 0x9e, 0x00, 0x63, 0x51,
+ 0xc3, 0x0d, 0x14, 0x00, 0x63, 0x58, 0xc2, 0x22, 0xcc, 0x00, 0x63, 0x61,
+ 0xc4, 0x18, 0x10, 0x00, 0x63, 0x68, 0xd2, 0x15, 0xf0, 0x00, 0x63, 0xc9,
+ 0xd3, 0x45, 0xbf, 0x00, 0x63, 0xe0, 0x47, 0xc3, 0x99, 0xc2, 0x4c, 0x1f,
+ 0x49, 0xaa, 0x8c, 0x42, 0x4c, 0x2b, 0x46, 0x00, 0xd4, 0xc2, 0x4c, 0x37,
+ 0x45, 0x00, 0x8c, 0x42, 0x4c, 0x43, 0xc5, 0x00, 0xd4, 0x01, 0x70, 0xf1,
+ 0xc5, 0x05, 0x02, 0x01, 0x70, 0xf8, 0xc4, 0x18, 0x10, 0x08, 0xa6, 0xb9,
+ 0xc2, 0x22, 0xcc, 0x08, 0xa6, 0xb0, 0xc3, 0x0d, 0x14, 0x08, 0xa6, 0xa9,
+ 0xc3, 0x09, 0x9e, 0x08, 0xa6, 0xa0, 0xc4, 0x02, 0xde, 0x08, 0xa6, 0x99,
+ 0xc2, 0x02, 0xa0, 0x08, 0xa6, 0x90, 0xc7, 0x7a, 0x7f, 0x08, 0xa6, 0x21,
+ 0xc7, 0x14, 0x39, 0x08, 0xa6, 0x00, 0xc5, 0x40, 0xe7, 0x08, 0xa6, 0x09,
+ 0xc4, 0x1e, 0x97, 0x08, 0xa6, 0x10, 0x97, 0x08, 0xa5, 0xf1, 0x8b, 0x08,
+ 0xa5, 0xd9, 0x83, 0x08, 0xa5, 0x80, 0x91, 0x08, 0xa5, 0xe9, 0x87, 0x08,
+ 0xa5, 0xd0, 0x8e, 0x08, 0xa5, 0xbb, 0x02, 0x4c, 0x4f, 0x94, 0x08, 0xa5,
+ 0xaa, 0x02, 0x4c, 0x53, 0x97, 0x08, 0xa5, 0xa0, 0x8b, 0x08, 0xa5, 0x90,
+ 0x83, 0x08, 0xa5, 0x71, 0xc2, 0x0d, 0xf6, 0x08, 0xa5, 0x69, 0xc2, 0x00,
+ 0xd0, 0x08, 0xa5, 0x60, 0x83, 0x08, 0xa5, 0x59, 0x47, 0xb2, 0x2e, 0x42,
+ 0x4c, 0x57, 0xc2, 0x00, 0xd0, 0x08, 0xa5, 0x31, 0x83, 0x08, 0xa5, 0x28,
+ 0xc2, 0x00, 0xd0, 0x08, 0xa5, 0x21, 0x83, 0x08, 0xa5, 0x18, 0x83, 0x08,
+ 0xa5, 0x11, 0xc2, 0x00, 0xc1, 0x08, 0xa4, 0xe9, 0xc2, 0x19, 0x2c, 0x08,
+ 0xa4, 0xc1, 0xc2, 0x01, 0x30, 0x08, 0xa4, 0x98, 0xc2, 0x00, 0xd0, 0x08,
+ 0xa5, 0x09, 0x83, 0x08, 0xa5, 0x01, 0x06, 0x42, 0x4c, 0x65, 0xc2, 0x00,
+ 0xd0, 0x08, 0xa4, 0xf9, 0x83, 0x08, 0xa4, 0xf1, 0x16, 0x42, 0x4c, 0x6f,
+ 0xc2, 0x00, 0xd0, 0x08, 0xa4, 0xb9, 0x83, 0x08, 0xa4, 0xb0, 0xc2, 0x00,
+ 0xd0, 0x08, 0xa4, 0xa9, 0x83, 0x08, 0xa4, 0xa0, 0xc2, 0x00, 0xd0, 0x08,
+ 0xa4, 0x91, 0x83, 0x08, 0xa4, 0x88, 0xc2, 0x00, 0xd0, 0x08, 0xa4, 0x81,
+ 0x83, 0x08, 0xa4, 0x78, 0x97, 0x08, 0xa4, 0x71, 0x8b, 0x08, 0xa4, 0x61,
+ 0x83, 0x08, 0xa4, 0x10, 0x97, 0x08, 0xa4, 0x30, 0x8b, 0x08, 0xa4, 0x20,
+ 0xc7, 0xc2, 0xa4, 0x00, 0x7e, 0x21, 0xc7, 0xc4, 0xfe, 0x00, 0x7e, 0x2b,
+ 0x02, 0x4c, 0x79, 0x12, 0xc2, 0x4c, 0x7f, 0xc6, 0xcc, 0x47, 0x00, 0x7e,
+ 0x4a, 0x02, 0x4c, 0x8b, 0x44, 0xa9, 0xbe, 0xc2, 0x4c, 0x8f, 0xcd, 0x75,
+ 0xf4, 0x00, 0x7b, 0xf1, 0xc8, 0x85, 0x06, 0x00, 0x7b, 0xf8, 0xc7, 0xbe,
+ 0xe3, 0x00, 0x79, 0xf1, 0xc8, 0xb8, 0xd2, 0x00, 0x7c, 0x38, 0xc8, 0xbe,
+ 0xe2, 0x00, 0x79, 0xf9, 0xc7, 0x4f, 0xa6, 0x00, 0x7c, 0x48, 0xc7, 0xc1,
+ 0x3f, 0x00, 0x7c, 0x31, 0xc9, 0x8e, 0x8e, 0x00, 0x7c, 0x40, 0xcb, 0x95,
+ 0xda, 0x00, 0x7c, 0x51, 0xcb, 0x99, 0x08, 0x00, 0x7c, 0x58, 0xcb, 0x8e,
+ 0x8c, 0x00, 0x7c, 0x69, 0xc8, 0x4f, 0xa5, 0x00, 0x7c, 0x71, 0xd1, 0x4f,
+ 0x9c, 0x00, 0x7c, 0x78, 0x0d, 0xc2, 0x4c, 0x9b, 0x09, 0xc2, 0x4c, 0xab,
+ 0x10, 0xc2, 0x4c, 0xb5, 0x05, 0xc2, 0x4c, 0xcb, 0xc2, 0x25, 0x3b, 0x00,
+ 0x7c, 0xb9, 0x16, 0xc2, 0x4c, 0xd5, 0x06, 0xc2, 0x4c, 0xe7, 0x12, 0xc2,
+ 0x4c, 0xf9, 0x04, 0xc2, 0x4d, 0x03, 0xc2, 0x01, 0xc3, 0x00, 0x7d, 0x41,
+ 0xc2, 0x01, 0x4a, 0x00, 0x7d, 0x69, 0x1c, 0xc2, 0x4d, 0x0d, 0xc2, 0x00,
+ 0x02, 0x00, 0x7d, 0x81, 0xc2, 0x19, 0x2c, 0x00, 0x7d, 0x89, 0xc2, 0x00,
+ 0x39, 0x00, 0x7d, 0x91, 0xc2, 0x00, 0xdb, 0x00, 0x7d, 0x99, 0x15, 0xc2,
+ 0x4d, 0x17, 0xc2, 0x00, 0xd0, 0x00, 0x7d, 0xb9, 0x83, 0x00, 0x7d, 0xc1,
+ 0x4b, 0x7f, 0xe8, 0x42, 0x4d, 0x27, 0x48, 0x16, 0x5f, 0xc2, 0x4d, 0x39,
+ 0xc5, 0x32, 0x89, 0x00, 0x78, 0xa0, 0xc2, 0x00, 0x45, 0x00, 0x79, 0xd1,
+ 0xc2, 0x02, 0x2c, 0x00, 0x79, 0xd8, 0xcf, 0x16, 0x5f, 0x00, 0x78, 0x21,
+ 0xdb, 0x16, 0x53, 0x00, 0x7e, 0x98, 0xcf, 0x16, 0x7a, 0x00, 0x78, 0x29,
+ 0xdb, 0x16, 0x6e, 0x00, 0x7e, 0xa0, 0xd4, 0x3f, 0x48, 0x00, 0x78, 0x31,
+ 0x4c, 0x82, 0xad, 0x42, 0x4d, 0x45, 0x0d, 0xc2, 0x4d, 0x51, 0xc9, 0xb5,
+ 0x0f, 0x00, 0x79, 0xa0, 0xc7, 0x16, 0x5f, 0x00, 0x78, 0x51, 0xcc, 0x2e,
+ 0x06, 0x00, 0x7e, 0x80, 0xc4, 0x01, 0xe2, 0x00, 0x78, 0x71, 0xc5, 0x32,
+ 0x89, 0x00, 0x7e, 0x92, 0x02, 0x4d, 0x5d, 0xc7, 0x70, 0x50, 0x00, 0x79,
+ 0xa9, 0xca, 0xa3, 0xe6, 0x00, 0x79, 0xb8, 0xc8, 0x32, 0x8b, 0x00, 0x78,
+ 0x79, 0xc7, 0xc1, 0x70, 0x00, 0x79, 0xc8, 0x83, 0x00, 0x7a, 0x01, 0xc2,
+ 0x00, 0xd0, 0x00, 0x7a, 0x09, 0xc3, 0x1d, 0x35, 0x00, 0x7b, 0x49, 0xc2,
+ 0x02, 0x2b, 0x00, 0x7b, 0x58, 0x83, 0x00, 0x7a, 0x11, 0xc2, 0x00, 0xd0,
+ 0x00, 0x7a, 0x18, 0xc2, 0x01, 0x30, 0x00, 0x7a, 0x21, 0xc2, 0x19, 0x2c,
+ 0x00, 0x7a, 0x49, 0xc2, 0x00, 0xc1, 0x00, 0x7a, 0x71, 0x83, 0x00, 0x7a,
+ 0x98, 0x83, 0x00, 0x7a, 0x29, 0xc2, 0x00, 0xd0, 0x00, 0x7a, 0x30, 0x16,
+ 0xc2, 0x4d, 0x63, 0x83, 0x00, 0x7a, 0x79, 0xc2, 0x00, 0xd0, 0x00, 0x7a,
+ 0x81, 0x15, 0x42, 0x4d, 0x6d, 0x06, 0xc2, 0x4d, 0x77, 0x83, 0x00, 0x7a,
+ 0x89, 0xc2, 0x00, 0xd0, 0x00, 0x7a, 0x91, 0x1c, 0x42, 0x4d, 0x81, 0x83,
+ 0x00, 0x7a, 0xa1, 0xc2, 0x00, 0xd0, 0x00, 0x7a, 0xa8, 0x83, 0x00, 0x7a,
+ 0xb1, 0xc2, 0x00, 0xd0, 0x00, 0x7a, 0xb8, 0xc2, 0x00, 0xd0, 0x00, 0x7a,
+ 0xf1, 0x83, 0x00, 0x7a, 0xf8, 0x83, 0x00, 0x7b, 0x11, 0xc2, 0x00, 0x39,
+ 0x00, 0x7b, 0x60, 0xc2, 0x00, 0xd0, 0x00, 0x7b, 0x21, 0xc2, 0x0d, 0xf6,
+ 0x00, 0x7b, 0x29, 0x83, 0x00, 0x7b, 0x30, 0xc2, 0x02, 0xa0, 0x00, 0x79,
+ 0x59, 0xc4, 0x02, 0xde, 0x00, 0x79, 0x60, 0xc3, 0x09, 0x9e, 0x00, 0x79,
+ 0x69, 0xc3, 0x0d, 0x14, 0x00, 0x79, 0x70, 0xc2, 0x22, 0xcc, 0x00, 0x79,
+ 0x79, 0xc4, 0x18, 0x10, 0x00, 0x79, 0x80, 0x94, 0x00, 0x7b, 0xb8, 0x8e,
+ 0x00, 0x7b, 0xc8, 0x84, 0x01, 0x69, 0x8b, 0x02, 0x4d, 0x8b, 0x89, 0x01,
+ 0x69, 0x9b, 0x02, 0x4d, 0x8f, 0x8c, 0x01, 0x69, 0xb1, 0x86, 0x01, 0x69,
+ 0xbb, 0x02, 0x4d, 0x96, 0x88, 0x01, 0x69, 0xe1, 0x8d, 0x01, 0x69, 0xeb,
+ 0x02, 0x4d, 0xa1, 0x8a, 0x01, 0x6a, 0x03, 0x02, 0x4d, 0xa8, 0x83, 0x01,
+ 0x6a, 0x21, 0x93, 0x01, 0x6a, 0x39, 0x9c, 0x01, 0x6b, 0x1b, 0x02, 0x4d,
+ 0xac, 0x8e, 0x01, 0x6a, 0x69, 0x8f, 0x01, 0x6a, 0x71, 0x90, 0x01, 0x6a,
+ 0x79, 0x92, 0x01, 0x6a, 0x91, 0x94, 0x01, 0x6a, 0xa3, 0x02, 0x4d, 0xb4,
+ 0x95, 0x01, 0x6a, 0xcb, 0x02, 0x4d, 0xb8, 0x96, 0x01, 0x6a, 0xe3, 0x02,
+ 0x4d, 0xc0, 0xc2, 0x11, 0xee, 0x01, 0x6a, 0xf1, 0x98, 0x01, 0x6b, 0x01,
+ 0x99, 0x01, 0x6b, 0x09, 0x9b, 0x01, 0x6b, 0x10, 0x9b, 0x01, 0x69, 0xd8,
+ 0x8d, 0x01, 0x69, 0xf3, 0x02, 0x4d, 0xc8, 0x8a, 0x01, 0x6a, 0x11, 0x93,
+ 0x01, 0x6a, 0x41, 0xc2, 0x25, 0xa1, 0x01, 0x6a, 0x61, 0x09, 0xc2, 0x4d,
+ 0xcc, 0xc2, 0x00, 0x75, 0x01, 0x6a, 0x88, 0xcb, 0x05, 0x1c, 0x01, 0x02,
+ 0xd1, 0xc6, 0x72, 0x26, 0x01, 0x01, 0x28, 0x0c, 0xc2, 0x4d, 0xd4, 0x0a,
+ 0xc2, 0x4d, 0xe0, 0x15, 0xc2, 0x4d, 0xec, 0x4b, 0x92, 0x75, 0xc2, 0x4e,
+ 0x00, 0x03, 0xc2, 0x4e, 0x18, 0x16, 0xc2, 0x4e, 0x2e, 0x49, 0xab, 0xf4,
+ 0xc2, 0x4e, 0x3c, 0x4a, 0x60, 0x7b, 0xc2, 0x4e, 0x70, 0x0d, 0xc2, 0x4e,
+ 0xa4, 0x49, 0x0d, 0xff, 0xc2, 0x4e, 0xb0, 0x13, 0xc2, 0x4e, 0xd2, 0x49,
+ 0xb1, 0x0d, 0xc2, 0x4e, 0xdc, 0x04, 0xc2, 0x4f, 0x00, 0x14, 0xc2, 0x4f,
+ 0x0c, 0x0f, 0xc2, 0x4f, 0x16, 0x4e, 0x74, 0x6a, 0xc2, 0x4f, 0x22, 0x49,
+ 0xb2, 0x00, 0xc2, 0x4f, 0x2c, 0x56, 0x2b, 0xaa, 0xc2, 0x4f, 0x56, 0xd6,
+ 0x30, 0xd2, 0x07, 0xef, 0xc0, 0x4d, 0x7f, 0x8d, 0xc2, 0x4f, 0x5c, 0x45,
+ 0x02, 0x10, 0x42, 0x4f, 0x68, 0x4a, 0x9a, 0xea, 0xc2, 0x4f, 0xe9, 0xcc,
+ 0x27, 0x7f, 0x00, 0x46, 0x88, 0xd4, 0x39, 0xf8, 0x00, 0x47, 0xf9, 0xcb,
+ 0x3a, 0x01, 0x00, 0x32, 0xc0, 0xc7, 0xc3, 0xca, 0x00, 0x44, 0xe1, 0xc7,
+ 0x2b, 0x4a, 0x00, 0x32, 0x98, 0x06, 0xc2, 0x4f, 0xfb, 0x03, 0xc2, 0x50,
+ 0x03, 0xc3, 0x85, 0xf5, 0x0f, 0x70, 0x09, 0xc4, 0x30, 0xc1, 0x0f, 0x70,
+ 0x11, 0xc3, 0x7e, 0x89, 0x0f, 0x70, 0x29, 0x42, 0x02, 0x1c, 0xc2, 0x50,
+ 0x0f, 0xc3, 0x14, 0x4b, 0x0f, 0x70, 0x39, 0x16, 0xc2, 0x50, 0x19, 0xc3,
+ 0x2b, 0xb9, 0x0f, 0x70, 0x49, 0x0d, 0xc2, 0x50, 0x27, 0x0e, 0xc2, 0x50,
+ 0x33, 0xc4, 0x19, 0x60, 0x0f, 0x70, 0x61, 0xc4, 0x3a, 0x01, 0x0f, 0x70,
+ 0x69, 0x15, 0xc2, 0x50, 0x3f, 0xc3, 0x0f, 0x9a, 0x0f, 0x70, 0x91, 0xc3,
+ 0x72, 0xf0, 0x0f, 0x70, 0x99, 0x48, 0x10, 0xb4, 0xc2, 0x50, 0x57, 0x49,
+ 0x18, 0x67, 0xc2, 0x50, 0xa9, 0xc3, 0xb1, 0x0d, 0x0f, 0x70, 0x81, 0xc5,
+ 0x92, 0x75, 0x0f, 0x70, 0xd8, 0xc3, 0x0a, 0x8c, 0x00, 0x32, 0x7b, 0x02,
+ 0x50, 0xb5, 0xcc, 0x85, 0x29, 0x00, 0x30, 0x68, 0xd6, 0x2f, 0x9e, 0x00,
+ 0x47, 0xdb, 0x02, 0x50, 0xc2, 0xc7, 0xc0, 0x51, 0x00, 0x44, 0xf0, 0xc5,
+ 0x00, 0xd4, 0x00, 0x47, 0xc3, 0x02, 0x50, 0xc8, 0xc5, 0x05, 0x02, 0x00,
+ 0x47, 0xd0, 0xce, 0x71, 0x14, 0x00, 0x44, 0x41, 0x9b, 0x00, 0x30, 0x40,
+ 0xe0, 0x08, 0xc7, 0x00, 0x37, 0x60, 0xce, 0x6d, 0xe8, 0x00, 0x47, 0xb1,
+ 0xcd, 0x00, 0xfa, 0x07, 0xf3, 0xd1, 0xcb, 0x64, 0x7b, 0x07, 0xf3, 0xd8,
+ 0xce, 0x00, 0xf9, 0x07, 0xf3, 0xa0, 0x00, 0xc2, 0x50, 0xce, 0xc3, 0x13,
+ 0x00, 0x00, 0x32, 0x5a, 0x02, 0x50, 0xe0, 0x45, 0x08, 0xcb, 0xc2, 0x50,
+ 0xe6, 0x44, 0x05, 0x36, 0xc2, 0x51, 0x3a, 0x42, 0x00, 0x87, 0xc2, 0x51,
+ 0x50, 0xc3, 0x2b, 0xb9, 0x00, 0x37, 0x31, 0xc3, 0x7e, 0x89, 0x00, 0x37,
+ 0x29, 0xc5, 0x4d, 0x40, 0x00, 0x30, 0xd1, 0xc5, 0x52, 0x4a, 0x00, 0x30,
+ 0xc8, 0xc3, 0x2d, 0x2c, 0x00, 0x32, 0x93, 0x02, 0x51, 0x5c, 0xd8, 0x22,
+ 0xeb, 0x00, 0x44, 0xe9, 0xcc, 0x86, 0x9d, 0x00, 0x32, 0xb0, 0x4a, 0xa3,
+ 0xf0, 0xc2, 0x51, 0x60, 0xc4, 0x00, 0x9d, 0x07, 0xdd, 0xf9, 0x16, 0xc2,
+ 0x51, 0x6c, 0x42, 0x00, 0x58, 0xc2, 0x51, 0x78, 0x4a, 0x3b, 0x79, 0xc2,
+ 0x51, 0x84, 0xcb, 0x8f, 0x7e, 0x07, 0xde, 0x10, 0x15, 0xc2, 0x51, 0x90,
+ 0xc9, 0xac, 0x0f, 0x00, 0x30, 0xa1, 0x42, 0x00, 0x39, 0xc2, 0x51, 0x9a,
+ 0xcf, 0x6b, 0x70, 0x00, 0x30, 0x89, 0xc5, 0xda, 0xc4, 0x00, 0x30, 0x78,
+ 0x00, 0x42, 0x51, 0xa6, 0x45, 0xd9, 0x57, 0xc2, 0x51, 0xb2, 0x49, 0x04,
+ 0xf9, 0xc2, 0x51, 0xbe, 0x48, 0x05, 0x14, 0x42, 0x51, 0xca, 0xc5, 0x19,
+ 0x75, 0x00, 0x32, 0x03, 0x02, 0x51, 0xd6, 0xcb, 0x92, 0xee, 0x07, 0xf3,
+ 0x98, 0xc5, 0x4d, 0x40, 0x00, 0x47, 0x33, 0x02, 0x51, 0xdc, 0xc5, 0x52,
+ 0x4a, 0x00, 0x47, 0x2b, 0x02, 0x51, 0xe2, 0xc5, 0x63, 0x73, 0x00, 0x47,
+ 0x22, 0x02, 0x51, 0xe8, 0xc5, 0x00, 0xd4, 0x00, 0x32, 0xa1, 0xc5, 0x05,
+ 0x02, 0x00, 0x32, 0xa8, 0xce, 0x74, 0x5c, 0x00, 0x44, 0x81, 0xcf, 0x65,
+ 0xee, 0x00, 0x30, 0x70, 0xc9, 0x0e, 0x6e, 0x00, 0x32, 0xe1, 0xd6, 0x31,
+ 0x6c, 0x00, 0x32, 0xd9, 0xcd, 0x31, 0x75, 0x00, 0x32, 0xd0, 0xc9, 0x08,
+ 0xcb, 0x00, 0x37, 0x59, 0xc8, 0xb9, 0x12, 0x00, 0x37, 0x50, 0xc4, 0x44,
+ 0x78, 0x00, 0x36, 0xe9, 0xc9, 0x5c, 0xe9, 0x00, 0x30, 0xe8, 0xc4, 0x18,
+ 0x10, 0x00, 0x33, 0x39, 0xc2, 0x22, 0xcc, 0x00, 0x33, 0x30, 0xc3, 0x0d,
+ 0x14, 0x00, 0x33, 0x29, 0xc3, 0x09, 0x9e, 0x00, 0x33, 0x20, 0xc4, 0x02,
+ 0xde, 0x00, 0x33, 0x19, 0xc2, 0x02, 0xa0, 0x00, 0x33, 0x10, 0xc3, 0xe6,
+ 0x1a, 0x07, 0xd8, 0xb9, 0xc3, 0x03, 0x0d, 0x07, 0xd8, 0xa9, 0xc3, 0x5f,
+ 0x44, 0x07, 0xd8, 0xa1, 0xc3, 0x2a, 0x91, 0x07, 0xd8, 0x98, 0xcc, 0x23,
+ 0x3f, 0x00, 0x2c, 0x41, 0xc2, 0x01, 0x48, 0x00, 0x2c, 0x10, 0x8a, 0x00,
+ 0x2c, 0x21, 0x90, 0x00, 0x2b, 0x78, 0xc3, 0xe5, 0xc0, 0x00, 0x2c, 0x19,
+ 0xc2, 0x16, 0x1c, 0x00, 0x2b, 0xd0, 0x91, 0x00, 0x2c, 0x09, 0x0a, 0xc2,
+ 0x51, 0xee, 0x83, 0x00, 0x2b, 0x70, 0xc2, 0x16, 0x1c, 0x00, 0x2c, 0x01,
+ 0x83, 0x00, 0x2b, 0xe0, 0xc3, 0xb8, 0x27, 0x00, 0x2b, 0xf9, 0x91, 0x00,
+ 0x2b, 0x49, 0xc9, 0xb0, 0x47, 0x00, 0x2b, 0x00, 0xc2, 0x04, 0xe6, 0x00,
+ 0x2b, 0xf1, 0x91, 0x00, 0x2b, 0xc0, 0xc2, 0x16, 0x1c, 0x00, 0x2b, 0xe9,
+ 0xc2, 0x00, 0xd0, 0x00, 0x2b, 0xb8, 0xc3, 0x64, 0x77, 0x00, 0x2b, 0xd9,
+ 0x83, 0x00, 0x2b, 0x88, 0xc3, 0x01, 0xe3, 0x00, 0x2b, 0x91, 0xc2, 0x03,
+ 0x4e, 0x00, 0x2b, 0x18, 0xc2, 0x01, 0x7f, 0x00, 0x2b, 0x51, 0x83, 0x00,
+ 0x2b, 0x30, 0x96, 0x00, 0x2b, 0x41, 0x8a, 0x00, 0x2b, 0x39, 0xc2, 0x11,
+ 0xee, 0x00, 0x2b, 0x28, 0x8a, 0x00, 0x2a, 0xa1, 0x90, 0x00, 0x29, 0xf8,
+ 0xc3, 0xe5, 0xc0, 0x00, 0x2a, 0x99, 0xc2, 0x16, 0x1c, 0x00, 0x2a, 0x50,
+ 0xc2, 0x01, 0x48, 0x00, 0x2a, 0x90, 0x91, 0x00, 0x2a, 0x89, 0x0a, 0xc2,
+ 0x51, 0xf8, 0x83, 0x00, 0x29, 0xf0, 0xc2, 0x16, 0x1c, 0x00, 0x2a, 0x81,
+ 0x83, 0x00, 0x2a, 0x60, 0xc3, 0xb8, 0x27, 0x00, 0x2a, 0x79, 0x91, 0x00,
+ 0x29, 0xc8, 0xc2, 0x04, 0xe6, 0x00, 0x2a, 0x71, 0x91, 0x00, 0x2a, 0x40,
+ 0xc2, 0x16, 0x1c, 0x00, 0x2a, 0x69, 0xc2, 0x00, 0xd0, 0x00, 0x2a, 0x38,
+ 0xc3, 0x64, 0x77, 0x00, 0x2a, 0x59, 0x83, 0x00, 0x2a, 0x08, 0xc3, 0x01,
+ 0xe3, 0x00, 0x2a, 0x11, 0xc2, 0x03, 0x4e, 0x00, 0x29, 0x98, 0xc2, 0x01,
+ 0x7f, 0x00, 0x29, 0xd1, 0x83, 0x00, 0x29, 0xb0, 0x96, 0x00, 0x29, 0xc1,
+ 0x8a, 0x00, 0x29, 0xb9, 0xc2, 0x11, 0xee, 0x00, 0x29, 0xa8, 0xc4, 0x14,
+ 0x74, 0x0f, 0x48, 0x09, 0xc2, 0x00, 0xd0, 0x0f, 0x48, 0x68, 0x83, 0x0f,
+ 0x48, 0x21, 0xc2, 0x01, 0x7f, 0x0f, 0x48, 0x38, 0xc9, 0xaf, 0x27, 0x0f,
+ 0x48, 0x29, 0xc2, 0x00, 0xd0, 0x0f, 0x49, 0x08, 0xc2, 0x01, 0x7f, 0x0f,
+ 0x48, 0x71, 0x83, 0x0f, 0x48, 0x90, 0xc2, 0x05, 0x1d, 0x0f, 0x48, 0x81,
+ 0xc2, 0x19, 0x2c, 0x0f, 0x48, 0xc9, 0xc2, 0x00, 0xd0, 0x0f, 0x48, 0xd8,
+ 0xc2, 0x0f, 0x9b, 0x0f, 0x48, 0x89, 0xc2, 0x00, 0xd0, 0x0f, 0x48, 0xe9,
+ 0xc2, 0x01, 0x53, 0x0f, 0x49, 0x00, 0x83, 0x0f, 0x48, 0xc1, 0xc2, 0x00,
+ 0x51, 0x0f, 0x48, 0xf0, 0x9f, 0x0f, 0xba, 0x19, 0xa0, 0x0f, 0xba, 0x20,
+ 0x02, 0x42, 0x52, 0x02, 0xc4, 0x1a, 0x05, 0x0f, 0xb8, 0xf1, 0xc6, 0x4c,
+ 0x49, 0x0f, 0xb9, 0x1a, 0x02, 0x52, 0x12, 0xc2, 0xe5, 0xfd, 0x0f, 0xbb,
+ 0x10, 0xc8, 0xb8, 0x5a, 0x0f, 0xba, 0xd0, 0x02, 0xc2, 0x52, 0x18, 0x44,
+ 0x00, 0x54, 0x42, 0x52, 0x24, 0xc2, 0xe5, 0xfd, 0x0f, 0xb9, 0xe0, 0xcc,
+ 0x8c, 0x85, 0x0f, 0xb9, 0x79, 0x02, 0x42, 0x52, 0x33, 0xc2, 0xe5, 0xfd,
+ 0x0f, 0xb8, 0xb8, 0x45, 0x3c, 0x54, 0xc2, 0x52, 0x3b, 0xc3, 0x00, 0x44,
+ 0x0f, 0xba, 0xf0, 0x44, 0x00, 0x54, 0x42, 0x52, 0x4d, 0xc2, 0xe5, 0xfd,
+ 0x0f, 0xba, 0xe8, 0xc5, 0xdd, 0x80, 0x0f, 0xb8, 0x43, 0x02, 0x52, 0x59,
+ 0xc5, 0xd7, 0x09, 0x0f, 0xb8, 0x32, 0x02, 0x52, 0x5f, 0xc2, 0xe5, 0xfd,
+ 0x0f, 0xb9, 0xb8, 0xa0, 0x0f, 0xb8, 0x91, 0x9f, 0x0f, 0xb8, 0x88, 0x9f,
+ 0x0a, 0x21, 0xd1, 0x9e, 0x0a, 0x21, 0xc9, 0x9d, 0x0a, 0x21, 0xc1, 0xa0,
+ 0x0a, 0x21, 0xd9, 0xa1, 0x0a, 0x21, 0xe1, 0xa2, 0x0a, 0x21, 0xe9, 0xa3,
+ 0x0a, 0x21, 0xf1, 0xa4, 0x0a, 0x21, 0xf9, 0xa5, 0x0a, 0x22, 0x01, 0xa6,
+ 0x0a, 0x22, 0x08, 0xa6, 0x0a, 0x21, 0xb9, 0xa5, 0x0a, 0x21, 0xb1, 0xa4,
+ 0x0a, 0x21, 0xa9, 0xa3, 0x0a, 0x21, 0x93, 0x02, 0x52, 0x65, 0xa2, 0x0a,
+ 0x21, 0x83, 0x02, 0x52, 0x6d, 0xa1, 0x0a, 0x21, 0x79, 0xa0, 0x0a, 0x21,
+ 0x71, 0x9f, 0x0a, 0x21, 0x69, 0x9e, 0x0a, 0x21, 0x5b, 0x02, 0x52, 0x71,
+ 0x9d, 0x0a, 0x21, 0x50, 0xa6, 0x0a, 0x21, 0x43, 0x02, 0x52, 0x75, 0xa5,
+ 0x0a, 0x21, 0x39, 0xa4, 0x0a, 0x21, 0x31, 0xa3, 0x0a, 0x21, 0x29, 0xa2,
+ 0x0a, 0x21, 0x21, 0xa1, 0x0a, 0x21, 0x19, 0xa0, 0x0a, 0x21, 0x11, 0x9f,
+ 0x0a, 0x21, 0x09, 0x9e, 0x0a, 0x21, 0x01, 0x9d, 0x0a, 0x20, 0xf8, 0xa6,
+ 0x0a, 0x20, 0xf1, 0xa5, 0x0a, 0x20, 0xe9, 0xa4, 0x0a, 0x20, 0xe1, 0xa3,
+ 0x0a, 0x20, 0xd3, 0x02, 0x52, 0x79, 0xa2, 0x0a, 0x20, 0xc9, 0xa1, 0x0a,
+ 0x20, 0xc1, 0xa0, 0x0a, 0x20, 0xb9, 0x9f, 0x0a, 0x20, 0xb1, 0x9e, 0x0a,
+ 0x20, 0xa9, 0x9d, 0x0a, 0x20, 0xa0, 0xa6, 0x0a, 0x20, 0x99, 0xa5, 0x0a,
+ 0x20, 0x91, 0xa4, 0x0a, 0x20, 0x89, 0xa3, 0x0a, 0x20, 0x81, 0xa2, 0x0a,
+ 0x20, 0x79, 0xa1, 0x0a, 0x20, 0x71, 0xa0, 0x0a, 0x20, 0x69, 0x9f, 0x0a,
+ 0x20, 0x61, 0x9e, 0x0a, 0x20, 0x59, 0x9d, 0x0a, 0x20, 0x4a, 0x02, 0x52,
+ 0x7d, 0xa6, 0x0a, 0x20, 0x41, 0xa5, 0x0a, 0x20, 0x39, 0xa4, 0x0a, 0x20,
+ 0x31, 0xa3, 0x0a, 0x20, 0x29, 0xa2, 0x0a, 0x20, 0x21, 0xa1, 0x0a, 0x20,
+ 0x19, 0xa0, 0x0a, 0x20, 0x11, 0x9f, 0x0a, 0x20, 0x09, 0x9e, 0x0a, 0x20,
+ 0x00, 0x9d, 0x0a, 0x22, 0x11, 0x9e, 0x0a, 0x22, 0x19, 0x9f, 0x0a, 0x22,
+ 0x21, 0xa0, 0x0a, 0x22, 0x29, 0xa1, 0x0a, 0x22, 0x31, 0xa2, 0x0a, 0x22,
+ 0x39, 0xa3, 0x0a, 0x22, 0x43, 0x02, 0x52, 0x81, 0xa4, 0x0a, 0x22, 0x61,
+ 0xa5, 0x0a, 0x22, 0x69, 0xa6, 0x0a, 0x22, 0x70, 0x9d, 0x0a, 0x22, 0x79,
+ 0x9e, 0x0a, 0x22, 0x81, 0x9f, 0x0a, 0x22, 0x89, 0xa0, 0x0a, 0x22, 0x91,
+ 0xa1, 0x0a, 0x22, 0x99, 0xa2, 0x0a, 0x22, 0xa1, 0xa3, 0x0a, 0x22, 0xa9,
+ 0xa4, 0x0a, 0x22, 0xb1, 0xa5, 0x0a, 0x22, 0xb9, 0xa6, 0x0a, 0x22, 0xc0,
+ 0x9d, 0x0a, 0x22, 0xc9, 0x9e, 0x0a, 0x22, 0xd1, 0x9f, 0x0a, 0x22, 0xd9,
+ 0xa0, 0x0a, 0x22, 0xe1, 0xa1, 0x0a, 0x22, 0xe9, 0xa2, 0x0a, 0x22, 0xf1,
+ 0xa3, 0x0a, 0x22, 0xf9, 0xa4, 0x0a, 0x23, 0x01, 0xa5, 0x0a, 0x23, 0x09,
+ 0xa6, 0x0a, 0x23, 0x10, 0x9d, 0x0a, 0x23, 0x19, 0x9e, 0x0a, 0x23, 0x21,
+ 0x9f, 0x0a, 0x23, 0x29, 0xa0, 0x0a, 0x23, 0x31, 0xa1, 0x0a, 0x23, 0x39,
+ 0xa2, 0x0a, 0x23, 0x41, 0xa3, 0x0a, 0x23, 0x49, 0xa4, 0x0a, 0x23, 0x53,
+ 0x02, 0x52, 0x8d, 0xa5, 0x0a, 0x23, 0x63, 0x02, 0x52, 0x91, 0xa6, 0x0a,
+ 0x23, 0x70, 0x9d, 0x0a, 0x23, 0x7b, 0x02, 0x52, 0x95, 0x9e, 0x0a, 0x23,
+ 0x8b, 0x02, 0x52, 0x99, 0x9f, 0x0a, 0x23, 0x9b, 0x02, 0x52, 0x9d, 0xa0,
+ 0x0a, 0x23, 0xa9, 0xa1, 0x0a, 0x23, 0xb3, 0x02, 0x52, 0xa1, 0xa2, 0x0a,
+ 0x23, 0xd3, 0x02, 0x52, 0xad, 0xa3, 0x0a, 0x23, 0xe9, 0xa4, 0x0a, 0x23,
+ 0xf3, 0x02, 0x52, 0xb5, 0xa5, 0x0a, 0x24, 0x11, 0xa6, 0x0a, 0x24, 0x18,
+ 0x9d, 0x0a, 0x24, 0x23, 0x02, 0x52, 0xc1, 0x9e, 0x0a, 0x24, 0x39, 0x9f,
+ 0x0a, 0x24, 0x41, 0xa0, 0x0a, 0x24, 0x49, 0xa1, 0x0a, 0x24, 0x51, 0xa2,
+ 0x0a, 0x24, 0x5b, 0x02, 0x52, 0xc9, 0xa3, 0x0a, 0x24, 0x69, 0xa4, 0x0a,
+ 0x24, 0x71, 0xa5, 0x0a, 0x24, 0x79, 0xa6, 0x0a, 0x24, 0x80, 0x9d, 0x0a,
+ 0x24, 0x89, 0x9e, 0x0a, 0x24, 0x91, 0x9f, 0x0a, 0x24, 0x99, 0xa0, 0x0a,
+ 0x24, 0xa1, 0xa1, 0x0a, 0x24, 0xa9, 0xa2, 0x0a, 0x24, 0xb3, 0x02, 0x52,
+ 0xcd, 0xa3, 0x0a, 0x24, 0xc1, 0xa4, 0x0a, 0x24, 0xc9, 0xa5, 0x0a, 0x24,
+ 0xd1, 0xa6, 0x0a, 0x24, 0xd8, 0x9d, 0x0a, 0x24, 0xe1, 0x9e, 0x0a, 0x24,
+ 0xe9, 0x9f, 0x0a, 0x24, 0xf1, 0xa0, 0x0a, 0x24, 0xf9, 0xa1, 0x0a, 0x25,
+ 0x01, 0xa2, 0x0a, 0x25, 0x0b, 0x02, 0x52, 0xd1, 0xa3, 0x0a, 0x25, 0x19,
+ 0xa4, 0x0a, 0x25, 0x21, 0xa5, 0x0a, 0x25, 0x29, 0xa6, 0x0a, 0x25, 0x30,
+ 0x9d, 0x0a, 0x25, 0x39, 0x9e, 0x0a, 0x25, 0x41, 0x9f, 0x0a, 0x25, 0x49,
+ 0xa0, 0x0a, 0x25, 0x51, 0xa1, 0x0a, 0x25, 0x59, 0xa2, 0x0a, 0x25, 0x61,
+ 0xa3, 0x0a, 0x25, 0x69, 0xa4, 0x0a, 0x25, 0x71, 0xa5, 0x0a, 0x25, 0x79,
+ 0xa6, 0x0a, 0x25, 0x80, 0x9d, 0x0a, 0x25, 0x89, 0x9e, 0x0a, 0x25, 0x91,
+ 0x9f, 0x0a, 0x25, 0x99, 0xa0, 0x0a, 0x25, 0xa1, 0xa1, 0x0a, 0x25, 0xa9,
+ 0xa2, 0x0a, 0x25, 0xb1, 0xa3, 0x0a, 0x25, 0xb9, 0xa4, 0x0a, 0x25, 0xc1,
+ 0xa5, 0x0a, 0x25, 0xc9, 0xa6, 0x0a, 0x25, 0xd0, 0x9d, 0x0a, 0x25, 0xd9,
+ 0x9e, 0x0a, 0x25, 0xe1, 0x9f, 0x0a, 0x25, 0xe9, 0xa0, 0x0a, 0x25, 0xf1,
+ 0xa1, 0x0a, 0x25, 0xf9, 0xa2, 0x0a, 0x26, 0x01, 0xa3, 0x0a, 0x26, 0x09,
+ 0xa4, 0x0a, 0x26, 0x11, 0xa5, 0x0a, 0x26, 0x19, 0xa6, 0x0a, 0x26, 0x20,
+ 0x9d, 0x0a, 0x26, 0x29, 0x9e, 0x0a, 0x26, 0x31, 0x9f, 0x0a, 0x26, 0x39,
+ 0xa0, 0x0a, 0x26, 0x41, 0xa1, 0x0a, 0x26, 0x49, 0xa2, 0x0a, 0x26, 0x51,
+ 0xa3, 0x0a, 0x26, 0x59, 0xa4, 0x0a, 0x26, 0x61, 0xa5, 0x0a, 0x26, 0x69,
+ 0xa6, 0x0a, 0x26, 0x70, 0x9d, 0x0a, 0x26, 0x79, 0x9e, 0x0a, 0x26, 0x81,
+ 0x9f, 0x0a, 0x26, 0x89, 0xa0, 0x0a, 0x26, 0x91, 0xa1, 0x0a, 0x26, 0x99,
+ 0xa2, 0x0a, 0x26, 0xa1, 0xa3, 0x0a, 0x26, 0xa9, 0xa4, 0x0a, 0x26, 0xb1,
+ 0xa5, 0x0a, 0x26, 0xb9, 0xa6, 0x0a, 0x26, 0xc0, 0x9d, 0x0a, 0x26, 0xc9,
+ 0x9e, 0x0a, 0x26, 0xd1, 0x9f, 0x0a, 0x26, 0xd9, 0xa0, 0x0a, 0x26, 0xe1,
+ 0xa1, 0x0a, 0x26, 0xe9, 0xa2, 0x0a, 0x26, 0xf1, 0xa3, 0x0a, 0x26, 0xf9,
+ 0xa4, 0x0a, 0x27, 0x01, 0xa5, 0x0a, 0x27, 0x09, 0xa6, 0x0a, 0x27, 0x10,
+ 0x9d, 0x0a, 0x27, 0x19, 0x9e, 0x0a, 0x27, 0x21, 0x9f, 0x0a, 0x27, 0x2b,
+ 0x02, 0x52, 0xd5, 0xa0, 0x0a, 0x27, 0x41, 0xa1, 0x0a, 0x27, 0x49, 0xa2,
+ 0x0a, 0x27, 0x51, 0xa3, 0x0a, 0x27, 0x59, 0xa4, 0x0a, 0x27, 0x63, 0x02,
+ 0x52, 0xdd, 0xa5, 0x0a, 0x27, 0x71, 0xa6, 0x0a, 0x27, 0x7a, 0x02, 0x52,
+ 0xe1, 0x9d, 0x0a, 0x27, 0x89, 0x9e, 0x0a, 0x27, 0x91, 0x9f, 0x0a, 0x27,
+ 0x99, 0xa0, 0x0a, 0x27, 0xa1, 0xa1, 0x0a, 0x27, 0xa9, 0xa2, 0x0a, 0x27,
+ 0xb3, 0x02, 0x52, 0xe5, 0xa3, 0x0a, 0x27, 0xc3, 0x02, 0x52, 0xe9, 0xa4,
+ 0x0a, 0x27, 0xd1, 0xa5, 0x0a, 0x27, 0xd9, 0xa6, 0x0a, 0x27, 0xe0, 0x9d,
+ 0x0a, 0x27, 0xe9, 0x9e, 0x0a, 0x27, 0xf1, 0x9f, 0x0a, 0x27, 0xf9, 0xa0,
+ 0x0a, 0x28, 0x01, 0xa1, 0x0a, 0x28, 0x09, 0xa2, 0x0a, 0x28, 0x11, 0xa3,
+ 0x0a, 0x28, 0x19, 0xa4, 0x0a, 0x28, 0x23, 0x02, 0x52, 0xed, 0xa5, 0x0a,
+ 0x28, 0x31, 0xa6, 0x0a, 0x28, 0x38, 0x9d, 0x0a, 0x28, 0x41, 0x9e, 0x0a,
+ 0x28, 0x49, 0x9f, 0x0a, 0x28, 0x51, 0xa0, 0x0a, 0x28, 0x59, 0xa1, 0x0a,
+ 0x28, 0x61, 0xa2, 0x0a, 0x28, 0x69, 0xa3, 0x0a, 0x28, 0x71, 0xa4, 0x0a,
+ 0x28, 0x79, 0xa5, 0x0a, 0x28, 0x81, 0xa6, 0x0a, 0x28, 0x88, 0x9d, 0x0a,
+ 0x28, 0x91, 0x9e, 0x0a, 0x28, 0x99, 0x9f, 0x0a, 0x28, 0xa1, 0xa0, 0x0a,
+ 0x28, 0xa9, 0xa1, 0x0a, 0x28, 0xb1, 0xa2, 0x0a, 0x28, 0xb9, 0xa3, 0x0a,
+ 0x28, 0xc1, 0xa4, 0x0a, 0x28, 0xc9, 0xa5, 0x0a, 0x28, 0xd1, 0xa6, 0x0a,
+ 0x28, 0xd8, 0x9d, 0x0a, 0x28, 0xe1, 0x9e, 0x0a, 0x28, 0xe9, 0x9f, 0x0a,
+ 0x28, 0xf1, 0xa0, 0x0a, 0x28, 0xf9, 0xa1, 0x0a, 0x29, 0x01, 0xa2, 0x0a,
+ 0x29, 0x09, 0xa3, 0x0a, 0x29, 0x11, 0xa4, 0x0a, 0x29, 0x19, 0xa5, 0x0a,
+ 0x29, 0x21, 0xa6, 0x0a, 0x29, 0x28, 0x9d, 0x0a, 0x29, 0x31, 0x9e, 0x0a,
+ 0x29, 0x39, 0x9f, 0x0a, 0x29, 0x41, 0xa0, 0x0a, 0x29, 0x49, 0xa1, 0x0a,
+ 0x29, 0x51, 0xa2, 0x0a, 0x29, 0x59, 0xa3, 0x0a, 0x29, 0x61, 0xa4, 0x0a,
+ 0x29, 0x6b, 0x02, 0x52, 0xf1, 0xa5, 0x0a, 0x29, 0x79, 0xa6, 0x0a, 0x29,
+ 0x80, 0x9d, 0x0a, 0x29, 0x89, 0x9e, 0x0a, 0x29, 0x91, 0x9f, 0x0a, 0x29,
+ 0x99, 0xa0, 0x0a, 0x29, 0xa1, 0xa1, 0x0a, 0x29, 0xa9, 0xa2, 0x0a, 0x29,
+ 0xb1, 0xa3, 0x0a, 0x29, 0xb9, 0xa4, 0x0a, 0x29, 0xc1, 0xa5, 0x0a, 0x29,
+ 0xc9, 0xa6, 0x0a, 0x29, 0xd0, 0x9d, 0x0a, 0x29, 0xd9, 0x9e, 0x0a, 0x29,
+ 0xe1, 0x9f, 0x0a, 0x29, 0xe9, 0xa0, 0x0a, 0x29, 0xf1, 0xa1, 0x0a, 0x29,
+ 0xf9, 0xa2, 0x0a, 0x2a, 0x01, 0xa3, 0x0a, 0x2a, 0x09, 0xa4, 0x0a, 0x2a,
+ 0x11, 0xa5, 0x0a, 0x2a, 0x19, 0xa6, 0x0a, 0x2a, 0x22, 0x02, 0x52, 0xf5,
+ 0x9d, 0x0a, 0x2a, 0x31, 0x9e, 0x0a, 0x2a, 0x39, 0x9f, 0x0a, 0x2a, 0x41,
+ 0xa0, 0x0a, 0x2a, 0x49, 0xa1, 0x0a, 0x2a, 0x53, 0x02, 0x52, 0xf9, 0xa2,
+ 0x0a, 0x2a, 0x61, 0xa3, 0x0a, 0x2a, 0x69, 0xa4, 0x0a, 0x2a, 0x71, 0xa5,
+ 0x0a, 0x2a, 0x79, 0xa6, 0x0a, 0x2a, 0x82, 0x02, 0x52, 0xfd, 0x9d, 0x0a,
+ 0x2a, 0x91, 0x9e, 0x0a, 0x2a, 0x99, 0x9f, 0x0a, 0x2a, 0xa1, 0xa0, 0x0a,
+ 0x2a, 0xa9, 0xa1, 0x0a, 0x2a, 0xb1, 0xa2, 0x0a, 0x2a, 0xb9, 0xa3, 0x0a,
+ 0x2a, 0xc1, 0xa4, 0x0a, 0x2a, 0xc9, 0xa5, 0x0a, 0x2a, 0xd1, 0xa6, 0x0a,
+ 0x2a, 0xda, 0x02, 0x53, 0x01, 0x9d, 0x0a, 0x2a, 0xe9, 0x9e, 0x0a, 0x2a,
+ 0xf1, 0x9f, 0x0a, 0x2a, 0xf9, 0xa0, 0x0a, 0x2b, 0x01, 0xa1, 0x0a, 0x2b,
+ 0x09, 0xa2, 0x0a, 0x2b, 0x11, 0xa3, 0x0a, 0x2b, 0x19, 0xa4, 0x0a, 0x2b,
+ 0x21, 0xa5, 0x0a, 0x2b, 0x29, 0xa6, 0x0a, 0x2b, 0x30, 0x9d, 0x0a, 0x2b,
+ 0x39, 0x9e, 0x0a, 0x2b, 0x41, 0x9f, 0x0a, 0x2b, 0x49, 0xa0, 0x0a, 0x2b,
+ 0x51, 0xa1, 0x0a, 0x2b, 0x59, 0xa2, 0x0a, 0x2b, 0x61, 0xa3, 0x0a, 0x2b,
+ 0x69, 0xa4, 0x0a, 0x2b, 0x71, 0xa5, 0x0a, 0x2b, 0x79, 0xa6, 0x0a, 0x2b,
+ 0x82, 0x02, 0x53, 0x05, 0x9d, 0x0a, 0x2b, 0x91, 0x9e, 0x0a, 0x2b, 0x99,
+ 0x1f, 0xc2, 0x53, 0x09, 0xa0, 0x0a, 0x2b, 0xb9, 0xa1, 0x0a, 0x2b, 0xc1,
+ 0xa2, 0x0a, 0x2b, 0xc9, 0xa3, 0x0a, 0x2b, 0xd3, 0x02, 0x53, 0x15, 0xa4,
+ 0x0a, 0x2b, 0xf1, 0xa5, 0x0a, 0x2b, 0xf9, 0xa6, 0x0a, 0x2c, 0x00, 0x9d,
+ 0x0a, 0x2c, 0x09, 0x9e, 0x0a, 0x2c, 0x11, 0x9f, 0x0a, 0x2c, 0x19, 0xa0,
+ 0x0a, 0x2c, 0x21, 0xa1, 0x0a, 0x2c, 0x29, 0xa2, 0x0a, 0x2c, 0x31, 0xa3,
+ 0x0a, 0x2c, 0x39, 0xa4, 0x0a, 0x2c, 0x41, 0xa5, 0x0a, 0x2c, 0x49, 0xa6,
+ 0x0a, 0x2c, 0x50, 0x9d, 0x0a, 0x2c, 0x59, 0x9e, 0x0a, 0x2c, 0x61, 0x9f,
+ 0x0a, 0x2c, 0x69, 0xa0, 0x0a, 0x2c, 0x71, 0xa1, 0x0a, 0x2c, 0x79, 0xa2,
+ 0x0a, 0x2c, 0x81, 0xa3, 0x0a, 0x2c, 0x89, 0xa4, 0x0a, 0x2c, 0x91, 0xa5,
+ 0x0a, 0x2c, 0x99, 0xa6, 0x0a, 0x2c, 0xa2, 0x02, 0x53, 0x21, 0x9d, 0x0a,
+ 0x2c, 0xb1, 0x9e, 0x0a, 0x2c, 0xb9, 0x9f, 0x0a, 0x2c, 0xc1, 0xa0, 0x0a,
+ 0x2c, 0xc9, 0xa1, 0x0a, 0x2c, 0xd3, 0x02, 0x53, 0x25, 0xa2, 0x0a, 0x2c,
+ 0xe1, 0xa3, 0x0a, 0x2c, 0xe9, 0xa4, 0x0a, 0x2c, 0xf1, 0xa5, 0x0a, 0x2c,
+ 0xfb, 0x02, 0x53, 0x29, 0xa6, 0x0a, 0x2d, 0x08, 0x9d, 0x0a, 0x2d, 0x11,
+ 0x9e, 0x0a, 0x2d, 0x1b, 0x02, 0x53, 0x2d, 0x9f, 0x0a, 0x2d, 0x29, 0xa0,
+ 0x0a, 0x2d, 0x31, 0xa1, 0x0a, 0x2d, 0x39, 0xa2, 0x0a, 0x2d, 0x41, 0xa3,
+ 0x0a, 0x2d, 0x49, 0xa4, 0x0a, 0x2d, 0x51, 0xa5, 0x0a, 0x2d, 0x59, 0xa6,
+ 0x0a, 0x2d, 0x60, 0x9d, 0x0a, 0x2d, 0x69, 0x9e, 0x0a, 0x2d, 0x73, 0x02,
+ 0x53, 0x31, 0x9f, 0x0a, 0x2d, 0x81, 0x20, 0xc2, 0x53, 0x35, 0xa1, 0x0a,
+ 0x2d, 0x99, 0xa2, 0x0a, 0x2d, 0xa1, 0xa3, 0x0a, 0x2d, 0xab, 0x02, 0x53,
+ 0x3f, 0xa4, 0x0a, 0x2d, 0xb9, 0xa5, 0x0a, 0x2d, 0xc1, 0xa6, 0x0a, 0x2d,
+ 0xc8, 0x9d, 0x0a, 0x2d, 0xd1, 0x9e, 0x0a, 0x2d, 0xd9, 0x9f, 0x0a, 0x2d,
+ 0xe1, 0xc7, 0xc6, 0xa9, 0x0a, 0x2d, 0xe9, 0xa1, 0x0a, 0x2d, 0xf1, 0xa2,
+ 0x0a, 0x2d, 0xf9, 0xa3, 0x0a, 0x2e, 0x01, 0xa4, 0x0a, 0x2e, 0x09, 0xa5,
+ 0x0a, 0x2e, 0x11, 0xa6, 0x0a, 0x2e, 0x18, 0x9d, 0x0a, 0x2e, 0x21, 0x9e,
+ 0x0a, 0x2e, 0x29, 0x9f, 0x0a, 0x2e, 0x31, 0xa0, 0x0a, 0x2e, 0x39, 0xa1,
+ 0x0a, 0x2e, 0x41, 0xa2, 0x0a, 0x2e, 0x49, 0xa3, 0x0a, 0x2e, 0x51, 0xa4,
+ 0x0a, 0x2e, 0x59, 0xa5, 0x0a, 0x2e, 0x61, 0xa6, 0x0a, 0x2e, 0x68, 0x1d,
+ 0xc2, 0x53, 0x43, 0x9e, 0x0a, 0x2e, 0x81, 0x9f, 0x0a, 0x2e, 0x89, 0xa0,
+ 0x0a, 0x2e, 0x91, 0xa1, 0x0a, 0x2e, 0x99, 0xa2, 0x0a, 0x2e, 0xa1, 0xa3,
+ 0x0a, 0x2e, 0xa9, 0xa4, 0x0a, 0x2e, 0xb1, 0xa5, 0x0a, 0x2e, 0xb9, 0xa6,
+ 0x0a, 0x2e, 0xc0, 0x9d, 0x0a, 0x2e, 0xc9, 0x9e, 0x0a, 0x2e, 0xd1, 0x9f,
+ 0x0a, 0x2e, 0xd9, 0xa0, 0x0a, 0x2e, 0xe1, 0xa1, 0x0a, 0x2e, 0xe9, 0xa2,
+ 0x0a, 0x2e, 0xf1, 0xa3, 0x0a, 0x2e, 0xf9, 0xa4, 0x0a, 0x2f, 0x01, 0xa5,
+ 0x0a, 0x2f, 0x09, 0xa6, 0x0a, 0x2f, 0x10, 0x9d, 0x0a, 0x2f, 0x19, 0x9e,
+ 0x0a, 0x2f, 0x21, 0x9f, 0x0a, 0x2f, 0x29, 0xa0, 0x0a, 0x2f, 0x31, 0xa1,
+ 0x0a, 0x2f, 0x39, 0xa2, 0x0a, 0x2f, 0x41, 0xa3, 0x0a, 0x2f, 0x49, 0xa4,
+ 0x0a, 0x2f, 0x51, 0xa5, 0x0a, 0x2f, 0x59, 0xa6, 0x0a, 0x2f, 0x60, 0x9d,
+ 0x0a, 0x2f, 0x69, 0x9e, 0x0a, 0x2f, 0x71, 0x9f, 0x0a, 0x2f, 0x79, 0xa0,
+ 0x0a, 0x2f, 0x81, 0xa1, 0x0a, 0x2f, 0x89, 0xa2, 0x0a, 0x2f, 0x91, 0xa3,
+ 0x0a, 0x2f, 0x99, 0xa4, 0x0a, 0x2f, 0xa1, 0xa5, 0x0a, 0x2f, 0xa9, 0xa6,
+ 0x0a, 0x2f, 0xb0, 0x9d, 0x0a, 0x2f, 0xbb, 0x02, 0x53, 0x4f, 0x9e, 0x0a,
+ 0x2f, 0xc9, 0x9f, 0x0a, 0x2f, 0xd1, 0xa0, 0x0a, 0x2f, 0xd9, 0xa1, 0x0a,
+ 0x2f, 0xe1, 0xa2, 0x0a, 0x2f, 0xe9, 0xa3, 0x0a, 0x2f, 0xf1, 0xa4, 0x0a,
+ 0x2f, 0xfb, 0x02, 0x53, 0x53, 0xa5, 0x0a, 0x30, 0x09, 0xa6, 0x0a, 0x30,
+ 0x10, 0x9d, 0x0a, 0x30, 0x19, 0x9e, 0x0a, 0x30, 0x21, 0x9f, 0x0a, 0x30,
+ 0x29, 0xa0, 0x0a, 0x30, 0x31, 0xa1, 0x0a, 0x30, 0x39, 0xa2, 0x0a, 0x30,
+ 0x41, 0xa3, 0x0a, 0x30, 0x49, 0xa4, 0x0a, 0x30, 0x51, 0xa5, 0x0a, 0x30,
+ 0x59, 0xa6, 0x0a, 0x30, 0x60, 0x9d, 0x0a, 0x30, 0x69, 0x9e, 0x0a, 0x30,
+ 0x71, 0x9f, 0x0a, 0x30, 0x79, 0xa0, 0x0a, 0x30, 0x81, 0xa1, 0x0a, 0x30,
+ 0x89, 0xa2, 0x0a, 0x30, 0x91, 0xa3, 0x0a, 0x30, 0x99, 0xa4, 0x0a, 0x30,
+ 0xa1, 0xa5, 0x0a, 0x30, 0xa9, 0xa6, 0x0a, 0x30, 0xb0, 0x9d, 0x0a, 0x30,
+ 0xb9, 0x9e, 0x0a, 0x30, 0xc1, 0x9f, 0x0a, 0x30, 0xc9, 0xa0, 0x0a, 0x30,
+ 0xd1, 0xa1, 0x0a, 0x30, 0xd9, 0xa2, 0x0a, 0x30, 0xe1, 0xa3, 0x0a, 0x30,
+ 0xe9, 0xa4, 0x0a, 0x30, 0xf1, 0xa5, 0x0a, 0x30, 0xf9, 0xa6, 0x0a, 0x31,
+ 0x00, 0x9d, 0x0a, 0x31, 0x09, 0x9e, 0x0a, 0x31, 0x11, 0x9f, 0x0a, 0x31,
+ 0x19, 0xa0, 0x0a, 0x31, 0x21, 0xa1, 0x0a, 0x31, 0x29, 0xa2, 0x0a, 0x31,
+ 0x31, 0xa3, 0x0a, 0x31, 0x39, 0xa4, 0x0a, 0x31, 0x40, 0x9e, 0x0a, 0x31,
+ 0x49, 0x9f, 0x0a, 0x31, 0x51, 0xa0, 0x0a, 0x31, 0x59, 0xa1, 0x0a, 0x31,
+ 0x61, 0xa2, 0x0a, 0x31, 0x69, 0xa3, 0x0a, 0x31, 0x71, 0xa4, 0x0a, 0x31,
+ 0x79, 0xa5, 0x0a, 0x31, 0x81, 0xa6, 0x0a, 0x31, 0x88, 0x9d, 0x0a, 0x31,
+ 0x91, 0x9e, 0x0a, 0x31, 0x99, 0x9f, 0x0a, 0x31, 0xa1, 0xa0, 0x0a, 0x31,
+ 0xa9, 0xa1, 0x0a, 0x31, 0xb1, 0xa2, 0x0a, 0x31, 0xb9, 0xa3, 0x0a, 0x31,
+ 0xc1, 0xa4, 0x0a, 0x31, 0xc9, 0xa5, 0x0a, 0x31, 0xd1, 0xa6, 0x0a, 0x31,
+ 0xd8, 0x9d, 0x0a, 0x31, 0xe1, 0x9e, 0x0a, 0x31, 0xe9, 0x9f, 0x0a, 0x31,
+ 0xf1, 0xa0, 0x0a, 0x31, 0xf9, 0xa1, 0x0a, 0x32, 0x01, 0xa2, 0x0a, 0x32,
+ 0x09, 0xa3, 0x0a, 0x32, 0x11, 0xa4, 0x0a, 0x32, 0x19, 0xa5, 0x0a, 0x32,
+ 0x21, 0xa6, 0x0a, 0x32, 0x28, 0xd1, 0x05, 0x75, 0x01, 0x5b, 0x79, 0xd4,
+ 0x3e, 0x1c, 0x01, 0x5c, 0x61, 0xd5, 0x36, 0x9b, 0x01, 0x5c, 0x69, 0xd3,
+ 0x44, 0xa2, 0x01, 0x5c, 0x71, 0xd2, 0x47, 0x93, 0x01, 0x5c, 0x78, 0xc8,
+ 0x2c, 0xb2, 0x01, 0x1b, 0x81, 0xc9, 0x24, 0x47, 0x01, 0x1b, 0x79, 0x05,
+ 0xc2, 0x53, 0x57, 0x06, 0xc2, 0x53, 0x63, 0x42, 0x02, 0xae, 0xc2, 0x53,
+ 0x76, 0xd0, 0x03, 0xb7, 0x01, 0x1a, 0x41, 0x42, 0x00, 0x49, 0xc2, 0x53,
+ 0x82, 0xcc, 0x07, 0xc7, 0x01, 0x1a, 0x21, 0xc9, 0x02, 0xfe, 0x01, 0x1a,
+ 0x11, 0xc5, 0x03, 0x02, 0x01, 0x1a, 0x09, 0xc3, 0xba, 0x27, 0x01, 0x19,
+ 0xd9, 0xc5, 0x00, 0xe2, 0x01, 0x19, 0xc0, 0xc9, 0x20, 0xa8, 0x01, 0x1b,
+ 0x09, 0xc3, 0xba, 0x27, 0x01, 0x1a, 0xa9, 0xc7, 0x80, 0x70, 0x01, 0x1a,
+ 0x88, 0xcb, 0x95, 0xf0, 0x01, 0x1b, 0x89, 0xca, 0x94, 0xf4, 0x01, 0x1b,
+ 0x31, 0x45, 0x9a, 0x3d, 0x42, 0x53, 0x8e, 0xc5, 0x1e, 0xc8, 0x01, 0x1b,
+ 0x59, 0xc9, 0x20, 0xa8, 0x01, 0x1b, 0x11, 0xc5, 0x05, 0xa2, 0x01, 0x1a,
+ 0x90, 0xc8, 0x52, 0x09, 0x01, 0x1a, 0xc9, 0xc5, 0x05, 0xa2, 0x01, 0x1a,
+ 0x58, 0xc2, 0x00, 0xb1, 0x01, 0x1a, 0xf9, 0xc3, 0x05, 0xa4, 0x01, 0x19,
+ 0xe8, 0xc2, 0x00, 0xf1, 0x01, 0x12, 0x2b, 0x02, 0x53, 0x9a, 0xcb, 0x23,
+ 0xa0, 0x01, 0x53, 0x80, 0xc2, 0x0c, 0x43, 0x08, 0x59, 0x99, 0x87, 0x08,
+ 0x59, 0x88, 0xc2, 0x00, 0x5f, 0x08, 0x59, 0x21, 0xc2, 0x0c, 0x43, 0x08,
+ 0x59, 0x19, 0x87, 0x08, 0x59, 0x10, 0x87, 0x08, 0x58, 0x38, 0x90, 0x08,
+ 0x58, 0x29, 0x91, 0x08, 0x58, 0x18, 0xc7, 0x0d, 0x04, 0x08, 0x08, 0xc9,
+ 0xc8, 0x4b, 0x94, 0x08, 0x09, 0x10, 0xc3, 0x02, 0xdf, 0x08, 0x08, 0x4b,
+ 0x02, 0x53, 0xa0, 0xc4, 0x0d, 0x0e, 0x08, 0x08, 0x92, 0x02, 0x53, 0xa4,
+ 0xc9, 0x57, 0x20, 0x08, 0x09, 0x58, 0xc4, 0x18, 0x12, 0x08, 0x08, 0x8b,
+ 0x02, 0x53, 0xaa, 0x91, 0x08, 0x08, 0x42, 0x02, 0x53, 0xb0, 0xc2, 0x00,
+ 0x5f, 0x08, 0x08, 0x5b, 0x02, 0x53, 0xb4, 0xc3, 0x45, 0x6b, 0x08, 0x08,
+ 0xa2, 0x02, 0x53, 0xb8, 0xc2, 0x00, 0x33, 0x08, 0x08, 0x53, 0x02, 0x53,
+ 0xbe, 0xc3, 0x0d, 0x0f, 0x08, 0x08, 0x9a, 0x02, 0x53, 0xc2, 0x00, 0xc2,
+ 0x53, 0xc8, 0xc2, 0x0d, 0x10, 0x08, 0x08, 0xaa, 0x02, 0x53, 0xd4, 0x00,
+ 0xc2, 0x53, 0xda, 0xc2, 0x0d, 0x10, 0x08, 0x08, 0xb2, 0x02, 0x53, 0xe6,
+ 0xc7, 0x0d, 0x04, 0x08, 0x09, 0x01, 0xc8, 0x4b, 0x94, 0x08, 0x09, 0x48,
+ 0xc9, 0x57, 0x20, 0x08, 0x09, 0x90, 0xc7, 0x0d, 0x04, 0x08, 0x09, 0x09,
+ 0xc8, 0x4b, 0x94, 0x08, 0x09, 0x50, 0xc9, 0x57, 0x20, 0x08, 0x09, 0x98,
+ 0xcc, 0x14, 0xcd, 0x08, 0x09, 0xc1, 0xcd, 0x7e, 0xb0, 0x08, 0x09, 0xd8,
+ 0xca, 0x01, 0x68, 0x01, 0x28, 0x03, 0x02, 0x53, 0xec, 0x06, 0xc2, 0x53,
+ 0xf2, 0xc2, 0x02, 0xae, 0x01, 0x2b, 0xab, 0x02, 0x53, 0xfc, 0xc4, 0x00,
+ 0x49, 0x01, 0x2b, 0xa3, 0x02, 0x54, 0x02, 0xc5, 0x00, 0x2c, 0x01, 0x2b,
+ 0xb1, 0x44, 0x13, 0x1d, 0xc2, 0x54, 0x08, 0xc8, 0x00, 0x5f, 0x01, 0x28,
+ 0x13, 0x02, 0x54, 0x14, 0x4f, 0x61, 0x5c, 0xc2, 0x54, 0x1a, 0x4c, 0x52,
+ 0xbb, 0x42, 0x54, 0x26, 0x50, 0x5c, 0x42, 0xc2, 0x54, 0x32, 0xdd, 0x11,
+ 0x34, 0x01, 0x2a, 0x29, 0xdd, 0x11, 0xff, 0x01, 0x2a, 0x19, 0x50, 0x11,
+ 0x39, 0x42, 0x54, 0x44, 0x45, 0x02, 0x9a, 0x42, 0x54, 0x56, 0xd0, 0x5e,
+ 0x62, 0x01, 0x2b, 0xf0, 0xc2, 0x01, 0x48, 0x01, 0x2b, 0xdb, 0x02, 0x54,
+ 0x66, 0x4a, 0xa2, 0xa6, 0x42, 0x54, 0x6c, 0x45, 0x02, 0x9a, 0x42, 0x54,
+ 0x78, 0xc8, 0x00, 0x5f, 0x01, 0x28, 0x59, 0xca, 0x01, 0x68, 0x01, 0x28,
+ 0x48, 0xc8, 0x00, 0x5f, 0x01, 0x28, 0x39, 0xca, 0x01, 0x68, 0x01, 0x28,
+ 0x28, 0xc8, 0x00, 0x5f, 0x01, 0x2a, 0x8b, 0x02, 0x54, 0x8a, 0x47, 0x54,
+ 0x42, 0xc2, 0x54, 0x90, 0x49, 0x45, 0xd2, 0xc2, 0x54, 0xa2, 0xca, 0x01,
+ 0x68, 0x01, 0x2a, 0x80, 0x4b, 0x99, 0xb8, 0xc2, 0x54, 0xb4, 0x4b, 0x8e,
+ 0x76, 0xc2, 0x54, 0xc6, 0x4a, 0x5c, 0x42, 0xc2, 0x54, 0xd8, 0x4a, 0x11,
+ 0x39, 0x42, 0x54, 0xf0, 0xd1, 0x53, 0x43, 0x01, 0x2b, 0x59, 0xcb, 0x8d,
+ 0x84, 0x01, 0x2b, 0x11, 0xcc, 0x89, 0xd9, 0x01, 0x2a, 0xf8, 0xd1, 0x53,
+ 0x32, 0x01, 0x2b, 0x51, 0xcb, 0x8e, 0xce, 0x01, 0x2b, 0x09, 0xcc, 0x87,
+ 0xa5, 0x01, 0x2a, 0xf0, 0xd0, 0x32, 0x47, 0x01, 0x2a, 0x11, 0xca, 0xa2,
+ 0xce, 0x01, 0x29, 0x41, 0xcb, 0x98, 0xe7, 0x01, 0x29, 0x00, 0xd0, 0x32,
+ 0x71, 0x01, 0x29, 0xf9, 0xca, 0xa2, 0xe2, 0x01, 0x29, 0x29, 0xcb, 0x98,
+ 0xdc, 0x01, 0x28, 0xe8, 0xd1, 0x53, 0x43, 0x01, 0x2b, 0x41, 0xcb, 0x8d,
+ 0x84, 0x01, 0x2a, 0xe1, 0xcc, 0x89, 0xd9, 0x01, 0x2a, 0xc8, 0xd1, 0x53,
+ 0x32, 0x01, 0x2b, 0x39, 0xcb, 0x8e, 0xce, 0x01, 0x2a, 0xd9, 0xcc, 0x87,
+ 0xa5, 0x01, 0x2a, 0xc0, 0xd5, 0x32, 0x6c, 0x01, 0x2a, 0x41, 0xd0, 0x32,
+ 0x71, 0x01, 0x29, 0xb9, 0x45, 0x00, 0x49, 0xc2, 0x55, 0x08, 0x46, 0x00,
+ 0x2c, 0x42, 0x55, 0x14, 0xd5, 0x32, 0x42, 0x01, 0x2a, 0x01, 0xd0, 0x32,
+ 0x47, 0x01, 0x29, 0xc1, 0x45, 0x00, 0x49, 0xc2, 0x55, 0x20, 0x46, 0x00,
+ 0x2c, 0x42, 0x55, 0x2c, 0xce, 0x72, 0xaa, 0x01, 0x2a, 0x49, 0xc8, 0x11,
+ 0xff, 0x01, 0x29, 0xc9, 0xca, 0x11, 0x34, 0x01, 0x29, 0x88, 0xce, 0x73,
+ 0x44, 0x01, 0x29, 0xf1, 0xc8, 0x11, 0x49, 0x01, 0x29, 0xb1, 0xca, 0x12,
+ 0x12, 0x01, 0x29, 0x70, 0xc5, 0x13, 0x67, 0x01, 0x18, 0xf9, 0xc3, 0x0a,
+ 0xea, 0x01, 0x18, 0x60, 0xc5, 0x13, 0x67, 0x01, 0x18, 0xf1, 0xc3, 0x0a,
+ 0xea, 0x01, 0x18, 0x68, 0x89, 0x01, 0x8d, 0x68, 0xc2, 0x1b, 0x88, 0x01,
+ 0x8d, 0x70, 0xc2, 0x1b, 0x88, 0x01, 0x8d, 0x78, 0x89, 0x01, 0x89, 0x21,
+ 0x90, 0x01, 0x8d, 0x48, 0x90, 0x01, 0x8d, 0x39, 0x89, 0x01, 0x8d, 0x90,
+ 0x89, 0x01, 0x89, 0x29, 0x90, 0x01, 0x8d, 0x28, 0x90, 0x01, 0x8d, 0x98,
+ 0xa2, 0x0f, 0xd8, 0xbb, 0x02, 0x55, 0x38, 0xa3, 0x0f, 0xd9, 0x38, 0xa0,
+ 0x0f, 0xd8, 0x33, 0x02, 0x55, 0x3c, 0xa2, 0x0f, 0xd8, 0x93, 0x02, 0x55,
+ 0x4e, 0xa1, 0x0f, 0xd8, 0x53, 0x02, 0x55, 0x52, 0xa3, 0x0f, 0xd9, 0x08,
+ 0xa3, 0x0f, 0xd9, 0x70, 0xa1, 0x0f, 0xd8, 0x63, 0x02, 0x55, 0x5d, 0xa3,
+ 0x0f, 0xd9, 0x19, 0xc2, 0x00, 0x22, 0x0f, 0xd9, 0x90, 0xa3, 0x0f, 0xd9,
+ 0x88, 0xa3, 0x0f, 0xd9, 0x49, 0xa2, 0x0f, 0xd8, 0xd2, 0x02, 0x55, 0x68,
+ 0xa3, 0x0f, 0xd9, 0x78, 0xa1, 0x0f, 0xd8, 0x6b, 0x02, 0x55, 0x6c, 0xa3,
+ 0x0f, 0xd9, 0x21, 0xa2, 0x0f, 0xd8, 0xa2, 0x02, 0x55, 0x77, 0xa2, 0x0f,
+ 0xd8, 0xc2, 0x02, 0x55, 0x7b, 0xa3, 0x0f, 0xd9, 0xa8, 0x45, 0xa6, 0x50,
+ 0xc2, 0x55, 0x7f, 0x46, 0x3b, 0x9d, 0xc2, 0x55, 0xb6, 0xd0, 0x5d, 0x02,
+ 0x01, 0x39, 0x61, 0xce, 0x71, 0x4c, 0x01, 0x37, 0x41, 0xc5, 0x02, 0xd2,
+ 0x01, 0x2e, 0x7b, 0x02, 0x55, 0xce, 0xc8, 0xb8, 0x3a, 0x01, 0x33, 0x18,
+ 0x4e, 0x70, 0xce, 0xc2, 0x55, 0xd2, 0xc7, 0x37, 0x27, 0x01, 0x38, 0x11,
+ 0xce, 0x73, 0xa6, 0x01, 0x38, 0x01, 0xc6, 0xcb, 0xcf, 0x01, 0x36, 0x39,
+ 0xc9, 0xb0, 0x1a, 0x01, 0x33, 0x01, 0x0f, 0xc2, 0x55, 0xde, 0xca, 0x50,
+ 0x80, 0x01, 0x30, 0xb9, 0xc3, 0x0e, 0x6b, 0x01, 0x30, 0x29, 0xcc, 0x83,
+ 0x01, 0x01, 0x30, 0x01, 0xc5, 0x0b, 0x0a, 0x01, 0x2d, 0x03, 0x02, 0x55,
+ 0xea, 0xd3, 0x40, 0x08, 0x0f, 0xab, 0x88, 0x44, 0xe1, 0x27, 0xc2, 0x55,
+ 0xee, 0xc4, 0x73, 0x5b, 0x01, 0x36, 0xf9, 0xd7, 0x28, 0x5a, 0x01, 0x36,
+ 0xb1, 0xc8, 0x36, 0xb4, 0x01, 0x30, 0x71, 0xd2, 0x49, 0xaf, 0x0f, 0xab,
+ 0xf8, 0x43, 0x01, 0x47, 0xc2, 0x56, 0x00, 0xc6, 0x3a, 0x1a, 0x01, 0x2e,
+ 0x33, 0x02, 0x56, 0x12, 0x14, 0x42, 0x56, 0x16, 0x44, 0x00, 0x2d, 0xc2,
+ 0x56, 0x22, 0xc8, 0x46, 0x71, 0x01, 0x2d, 0x61, 0xc6, 0xcd, 0x67, 0x0f,
+ 0x9f, 0xb0, 0x43, 0x00, 0x4a, 0xc2, 0x56, 0x34, 0x11, 0xc2, 0x56, 0x44,
+ 0x45, 0x17, 0x15, 0x42, 0x56, 0x50, 0x0e, 0xc2, 0x56, 0x5c, 0x11, 0x42,
+ 0x56, 0x68, 0xca, 0x9c, 0x20, 0x01, 0x35, 0xc1, 0x46, 0x01, 0xdc, 0x42,
+ 0x56, 0x74, 0xd9, 0x1f, 0x31, 0x01, 0x33, 0xd9, 0x12, 0x42, 0x56, 0x92,
+ 0x07, 0xc2, 0x56, 0xaa, 0xd5, 0x31, 0xc4, 0x0f, 0xad, 0x51, 0x11, 0x42,
+ 0x56, 0xb9, 0xcc, 0x88, 0x59, 0x01, 0x2d, 0x81, 0xc6, 0xc1, 0x01, 0x0f,
+ 0xac, 0x41, 0x42, 0x00, 0xc4, 0x42, 0x56, 0xc5, 0x46, 0x05, 0x87, 0xc2,
+ 0x56, 0xd1, 0x48, 0x4a, 0x54, 0x42, 0x56, 0xdd, 0xd0, 0x20, 0x66, 0x01,
+ 0x3d, 0xb1, 0xd0, 0x03, 0xb7, 0x01, 0x3d, 0xa9, 0xd0, 0x3c, 0x90, 0x01,
+ 0x3d, 0xa0, 0x85, 0x01, 0x09, 0x69, 0x9c, 0x01, 0x09, 0x41, 0x94, 0x01,
+ 0x08, 0xe1, 0x8b, 0x01, 0x08, 0x89, 0x8a, 0x01, 0x08, 0x60, 0xd0, 0x15,
+ 0x35, 0x01, 0x3a, 0x48, 0x9a, 0x01, 0x38, 0xb9, 0x42, 0x00, 0x6b, 0xc2,
+ 0x56, 0xef, 0xc8, 0x8e, 0xa5, 0x0f, 0xaf, 0xa0, 0xc3, 0x45, 0xa1, 0x00,
+ 0xda, 0xdb, 0x02, 0x56, 0xfc, 0xc5, 0xda, 0x3d, 0x00, 0xdb, 0x00, 0xc8,
+ 0xb6, 0xb2, 0x00, 0xdb, 0xe8, 0x46, 0xce, 0x2d, 0xc2, 0x57, 0x02, 0x49,
+ 0xb3, 0x7a, 0x42, 0x57, 0x14, 0x48, 0xb5, 0xf2, 0xc2, 0x57, 0x20, 0x46,
+ 0xce, 0x33, 0x42, 0x57, 0x2c, 0xc4, 0x8f, 0x44, 0x00, 0xdb, 0x99, 0xc5,
+ 0xd7, 0x4a, 0x00, 0xdb, 0x91, 0x44, 0xac, 0xc3, 0xc2, 0x57, 0x38, 0xc7,
+ 0x7c, 0x94, 0x00, 0xdb, 0x79, 0xc5, 0xdb, 0xc8, 0x00, 0xdb, 0x61, 0xc5,
+ 0xd7, 0xd1, 0x00, 0xdb, 0x58, 0x03, 0xc2, 0x57, 0x4a, 0x07, 0xc2, 0x57,
+ 0x5f, 0xc3, 0x00, 0x74, 0x00, 0xdb, 0x31, 0xc3, 0x38, 0x86, 0x00, 0xdb,
+ 0x19, 0xc3, 0x08, 0x48, 0x00, 0xdb, 0x08, 0xc5, 0x60, 0xcc, 0x00, 0xda,
+ 0xf9, 0xc7, 0xc2, 0x65, 0x00, 0xda, 0xe8, 0xc4, 0x18, 0x10, 0x00, 0xda,
+ 0xb9, 0xc2, 0x22, 0xcc, 0x00, 0xda, 0xb0, 0xc3, 0x0d, 0x14, 0x00, 0xda,
+ 0xa9, 0xc3, 0x09, 0x9e, 0x00, 0xda, 0xa0, 0xc4, 0x02, 0xde, 0x00, 0xda,
+ 0x99, 0xc2, 0x02, 0xa0, 0x00, 0xda, 0x90, 0xcb, 0x98, 0x37, 0x00, 0xda,
+ 0x61, 0xcb, 0x91, 0x6d, 0x00, 0xda, 0x59, 0xc5, 0xd7, 0xbd, 0x00, 0xd8,
+ 0x81, 0xc4, 0xa2, 0x33, 0x00, 0xd8, 0x2a, 0x02, 0x57, 0x6b, 0xc7, 0xc7,
+ 0x20, 0x00, 0xda, 0x41, 0xc4, 0xa2, 0x33, 0x00, 0xd8, 0x78, 0xc9, 0xae,
+ 0x61, 0x00, 0xda, 0x39, 0x83, 0x00, 0xd9, 0x12, 0x02, 0x57, 0x71, 0xc9,
+ 0xa9, 0x5a, 0x00, 0xda, 0x31, 0x83, 0x00, 0xd8, 0x9a, 0x02, 0x57, 0x75,
+ 0x43, 0x20, 0x27, 0x42, 0x57, 0x81, 0xc6, 0xb5, 0xac, 0x00, 0xd8, 0x6a,
+ 0x02, 0x57, 0x8d, 0xc5, 0xc4, 0x7b, 0x00, 0xd8, 0x5a, 0x02, 0x57, 0x93,
+ 0xc8, 0xbf, 0xaa, 0x00, 0xd9, 0x50, 0xc6, 0xcb, 0x93, 0x00, 0xd9, 0x40,
+ 0x83, 0x00, 0xd9, 0x33, 0x02, 0x57, 0x99, 0xc2, 0x19, 0x2c, 0x00, 0xd8,
+ 0xe1, 0xc2, 0x01, 0x30, 0x00, 0xd8, 0xb8, 0x42, 0x00, 0x4d, 0x42, 0x57,
+ 0x9f, 0xc5, 0xd4, 0xa2, 0x00, 0xd8, 0xd8, 0xc5, 0xd4, 0x48, 0x00, 0xd8,
+ 0xc8, 0xc5, 0xd7, 0x4f, 0x00, 0xd8, 0xb0, 0xc7, 0xc2, 0x6c, 0x00, 0xd8,
+ 0x90, 0xc7, 0xc2, 0x6c, 0x00, 0xd8, 0x50, 0xc7, 0xc2, 0x6c, 0x00, 0xd8,
+ 0x40, 0xc7, 0xc2, 0x6c, 0x00, 0xda, 0x18, 0xc5, 0x25, 0x91, 0x00, 0xd9,
+ 0xf3, 0x02, 0x57, 0xab, 0xc5, 0xc2, 0x6e, 0x00, 0xd9, 0xa8, 0xc7, 0xc2,
+ 0x6c, 0x00, 0xd9, 0xe8, 0xc7, 0xc2, 0x6c, 0x00, 0xd9, 0xd8, 0xc5, 0xd7,
+ 0xcc, 0x00, 0xd9, 0xc8, 0xc5, 0xd9, 0x70, 0x00, 0xd9, 0xb8, 0xc6, 0x1e,
+ 0x89, 0x00, 0xd8, 0x09, 0xc5, 0xd6, 0xaa, 0x00, 0xd8, 0x00, 0xc9, 0xae,
+ 0xfa, 0x0b, 0x57, 0xa1, 0xc5, 0x28, 0xb0, 0x0b, 0x57, 0x80, 0xc9, 0xaf,
+ 0x81, 0x0b, 0x57, 0x99, 0xc5, 0x28, 0xb0, 0x0b, 0x57, 0x88, 0x87, 0x0b,
+ 0x57, 0x59, 0xc3, 0x1b, 0x88, 0x0b, 0x56, 0x80, 0xc2, 0x14, 0x68, 0x0b,
+ 0x57, 0x00, 0x91, 0x0b, 0x57, 0x48, 0xc3, 0x2d, 0x2f, 0x0b, 0x57, 0x30,
+ 0xc3, 0x26, 0x76, 0x0b, 0x57, 0x21, 0xc2, 0x02, 0x0a, 0x0b, 0x56, 0xa8,
+ 0x91, 0x0b, 0x56, 0xf1, 0xc3, 0xdf, 0xb7, 0x0b, 0x56, 0xb8, 0xc2, 0x02,
+ 0xaa, 0x0b, 0x56, 0xe9, 0xc2, 0x02, 0x98, 0x0b, 0x56, 0xb0, 0xc3, 0x62,
+ 0x26, 0x0b, 0x56, 0xc1, 0x83, 0x0b, 0x56, 0x88, 0x42, 0x00, 0x56, 0xc2,
+ 0x57, 0xaf, 0x42, 0x00, 0x5d, 0xc2, 0x57, 0xf0, 0x42, 0x00, 0xa9, 0xc2,
+ 0x58, 0x30, 0x42, 0x00, 0xee, 0xc2, 0x58, 0x65, 0x42, 0x01, 0x60, 0xc2,
+ 0x58, 0xa5, 0x42, 0x01, 0x31, 0x42, 0x58, 0xdd, 0xc2, 0xd0, 0x00, 0x05,
+ 0x36, 0x29, 0x87, 0x05, 0x36, 0x50, 0x87, 0x05, 0x36, 0x41, 0xc2, 0x10,
+ 0x11, 0x05, 0x36, 0xb8, 0x96, 0x05, 0x35, 0xd9, 0xc2, 0xd0, 0x00, 0x05,
+ 0x36, 0x21, 0x90, 0x05, 0x36, 0x90, 0xc3, 0xe5, 0xab, 0x05, 0x37, 0x71,
+ 0xc4, 0xe0, 0xa3, 0x05, 0x37, 0x78, 0x87, 0x05, 0x35, 0x29, 0xc2, 0xd0,
+ 0x00, 0x05, 0x36, 0x81, 0x90, 0x05, 0x37, 0x08, 0x8b, 0x05, 0x35, 0x61,
+ 0xc2, 0x02, 0xe0, 0x05, 0x35, 0x68, 0x87, 0x05, 0x35, 0x31, 0x83, 0x05,
+ 0x35, 0x80, 0x96, 0x05, 0x37, 0x41, 0x90, 0x05, 0x37, 0x50, 0xc3, 0x7c,
+ 0x57, 0x05, 0x35, 0x91, 0xc3, 0x8b, 0xa9, 0x05, 0x35, 0xf1, 0xc2, 0x02,
+ 0xe0, 0x05, 0x36, 0x30, 0xc2, 0x10, 0x11, 0x05, 0x35, 0xe0, 0xc2, 0x02,
+ 0xe0, 0x05, 0x36, 0x39, 0xc2, 0x5d, 0xa1, 0x05, 0x37, 0x58, 0xc5, 0xde,
+ 0x75, 0x05, 0x36, 0x99, 0xc2, 0x01, 0x30, 0x05, 0x36, 0xa1, 0x83, 0x05,
+ 0x36, 0xa8, 0xc3, 0xd0, 0xd7, 0x05, 0x35, 0x79, 0x90, 0x05, 0x37, 0x10,
+ 0xc2, 0x00, 0xc4, 0x05, 0x37, 0x01, 0xc2, 0x04, 0xc6, 0x05, 0x37, 0x38,
+ 0xc2, 0x25, 0x9f, 0x05, 0x35, 0xb1, 0xc3, 0xd7, 0xe2, 0x05, 0x35, 0xc1,
+ 0x97, 0x05, 0x36, 0x01, 0x91, 0x05, 0x36, 0xb0, 0xc7, 0xc8, 0xd2, 0x05,
+ 0x37, 0x81, 0xc9, 0xb1, 0x16, 0x05, 0x37, 0x88, 0xc9, 0xab, 0x88, 0x01,
+ 0x5a, 0xd9, 0xcd, 0x7d, 0x2a, 0x01, 0x5a, 0xe8, 0x12, 0xc2, 0x59, 0x13,
+ 0xc5, 0xdd, 0x67, 0x00, 0xdf, 0xf1, 0xc8, 0xb8, 0x82, 0x00, 0xdf, 0xe0,
+ 0xd2, 0x48, 0x7d, 0x00, 0xdf, 0x78, 0x91, 0x00, 0xdf, 0x69, 0x8b, 0x00,
+ 0xdf, 0x58, 0x87, 0x00, 0xdf, 0x48, 0xc2, 0x01, 0x5d, 0x00, 0xdf, 0x19,
+ 0x83, 0x00, 0xde, 0xa2, 0x02, 0x59, 0x1f, 0xc2, 0x0e, 0x9a, 0x00, 0xdf,
+ 0x11, 0xc2, 0x19, 0x2c, 0x00, 0xdf, 0x01, 0xc2, 0x01, 0x30, 0x00, 0xde,
+ 0xe9, 0xca, 0x9d, 0x60, 0x00, 0xde, 0xb9, 0x83, 0x00, 0xde, 0x48, 0x4a,
+ 0x48, 0x83, 0xc2, 0x59, 0x25, 0x83, 0x00, 0xde, 0xc1, 0xca, 0x9b, 0x94,
+ 0x00, 0xde, 0xb0, 0xc7, 0xc8, 0xaf, 0x00, 0xde, 0x68, 0xc2, 0x00, 0xd0,
+ 0x00, 0x4c, 0xb3, 0x02, 0x59, 0x5f, 0x83, 0x00, 0x4c, 0xa8, 0x83, 0x00,
+ 0x4d, 0xc1, 0xc2, 0x0d, 0xf6, 0x00, 0x4d, 0xb9, 0xc2, 0x00, 0xd0, 0x00,
+ 0x4d, 0xb0, 0x83, 0x00, 0x4d, 0x83, 0x02, 0x59, 0x65, 0xc2, 0x00, 0x39,
+ 0x00, 0x4e, 0xe1, 0xc2, 0x00, 0xd0, 0x00, 0x4e, 0xe8, 0x83, 0x00, 0x4d,
+ 0x79, 0xc2, 0x19, 0x2c, 0x00, 0x4e, 0xf8, 0xc2, 0x00, 0xd0, 0x00, 0x4d,
+ 0x69, 0x83, 0x00, 0x4d, 0x60, 0xc2, 0x00, 0xd0, 0x00, 0x4d, 0x59, 0x83,
+ 0x00, 0x4d, 0x50, 0x83, 0x00, 0x4d, 0x41, 0xc2, 0x00, 0xc1, 0x00, 0x4d,
+ 0x19, 0xc2, 0x19, 0x2c, 0x00, 0x4c, 0xf1, 0xc2, 0x01, 0x30, 0x00, 0x4c,
+ 0xc8, 0xc2, 0x00, 0xd0, 0x00, 0x4d, 0x39, 0x83, 0x00, 0x4d, 0x31, 0x06,
+ 0x42, 0x59, 0x6b, 0xc2, 0x00, 0xd0, 0x00, 0x4d, 0x29, 0x83, 0x00, 0x4d,
+ 0x21, 0x16, 0x42, 0x59, 0x75, 0xc2, 0x00, 0xd0, 0x00, 0x4c, 0xe9, 0x83,
+ 0x00, 0x4c, 0xe0, 0xc2, 0x00, 0xd0, 0x00, 0x4c, 0xd9, 0x83, 0x00, 0x4c,
+ 0xd0, 0xc2, 0x00, 0xd0, 0x00, 0x4c, 0xc1, 0x83, 0x00, 0x4c, 0xb8, 0x97,
+ 0x00, 0x4c, 0xa1, 0x8b, 0x00, 0x4c, 0x81, 0x83, 0x00, 0x4c, 0x30, 0x8b,
+ 0x00, 0x4c, 0x40, 0x97, 0x00, 0x4c, 0x50, 0x47, 0xb2, 0x2e, 0xc2, 0x59,
+ 0x7f, 0xcd, 0x80, 0x36, 0x00, 0x4f, 0xe0, 0x42, 0x07, 0xb2, 0xc2, 0x59,
+ 0x8d, 0x03, 0xc2, 0x59, 0x99, 0xc5, 0x33, 0x5d, 0x00, 0x4d, 0xe1, 0xcb,
+ 0x1e, 0x89, 0x00, 0x4c, 0x08, 0x97, 0x00, 0x4e, 0x61, 0x8b, 0x00, 0x4e,
+ 0x41, 0x83, 0x00, 0x4d, 0xf0, 0x94, 0x00, 0x4e, 0x1b, 0x02, 0x59, 0xa5,
+ 0x8e, 0x00, 0x4f, 0x12, 0x02, 0x59, 0xa9, 0x97, 0x00, 0x4e, 0x10, 0x8b,
+ 0x00, 0x4e, 0x00, 0xc2, 0x02, 0xa0, 0x00, 0x4f, 0x41, 0xc4, 0x02, 0xde,
+ 0x00, 0x4f, 0x48, 0xc3, 0x09, 0x9e, 0x00, 0x4f, 0x51, 0xc3, 0x0d, 0x14,
+ 0x00, 0x4f, 0x58, 0xc2, 0x22, 0xcc, 0x00, 0x4f, 0x61, 0xc4, 0x18, 0x10,
+ 0x00, 0x4f, 0x68, 0xc3, 0x05, 0x14, 0x00, 0x4f, 0xa3, 0x02, 0x59, 0xad,
+ 0x16, 0xc2, 0x59, 0xb3, 0xc4, 0x09, 0x9d, 0x00, 0x4f, 0xb8, 0x1b, 0xc2,
+ 0x59, 0xbf, 0xc2, 0x00, 0x39, 0x00, 0xd0, 0x59, 0x83, 0x00, 0xd0, 0x51,
+ 0x09, 0x42, 0x59, 0xc9, 0xc2, 0x00, 0xb0, 0x00, 0xd0, 0x39, 0x83, 0x00,
+ 0xd0, 0x30, 0xa4, 0x01, 0x42, 0x03, 0x02, 0x59, 0xd3, 0x9e, 0x01, 0x40,
+ 0x0b, 0x02, 0x59, 0xd7, 0x9f, 0x01, 0x40, 0x13, 0x02, 0x5a, 0x05, 0xa0,
+ 0x01, 0x40, 0x23, 0x02, 0x5a, 0x2c, 0xa1, 0x01, 0x40, 0x43, 0x02, 0x5a,
+ 0x4c, 0xa2, 0x01, 0x40, 0x83, 0x02, 0x5a, 0x65, 0xa3, 0x01, 0x41, 0x03,
+ 0x02, 0x5a, 0x77, 0xa5, 0x01, 0x44, 0x00, 0x00, 0x42, 0x5a, 0x82, 0xc2,
+ 0x0d, 0x10, 0x08, 0x83, 0x18, 0x9b, 0x08, 0x83, 0x10, 0xc4, 0x18, 0x10,
+ 0x08, 0x82, 0xc3, 0x02, 0x5a, 0x8e, 0xc2, 0x22, 0xcc, 0x08, 0x82, 0xba,
+ 0x02, 0x5a, 0x94, 0x0b, 0xc2, 0x5a, 0x9a, 0x11, 0x42, 0x5a, 0xa6, 0x0a,
+ 0xc2, 0x5a, 0xb2, 0x19, 0xc2, 0x5a, 0xbe, 0xc2, 0x00, 0xc4, 0x08, 0x82,
+ 0xd8, 0x49, 0x5c, 0x83, 0x42, 0x5a, 0xc8, 0xc2, 0x00, 0xdb, 0x08, 0x81,
+ 0xa1, 0x83, 0x08, 0x81, 0x70, 0xc2, 0x00, 0xd0, 0x08, 0x81, 0x51, 0x83,
+ 0x08, 0x81, 0x48, 0xc2, 0x00, 0xd0, 0x08, 0x81, 0x41, 0x83, 0x08, 0x81,
+ 0x38, 0x83, 0x08, 0x81, 0x31, 0xc2, 0x00, 0xc1, 0x08, 0x81, 0x09, 0xc2,
+ 0x19, 0x2c, 0x08, 0x80, 0xe1, 0xc2, 0x01, 0x30, 0x08, 0x80, 0xb8, 0xc2,
+ 0x00, 0xd0, 0x08, 0x81, 0x29, 0x83, 0x08, 0x81, 0x21, 0x06, 0x42, 0x5a,
+ 0xe0, 0xc2, 0x00, 0xd0, 0x08, 0x81, 0x19, 0x83, 0x08, 0x81, 0x11, 0x16,
+ 0x42, 0x5a, 0xea, 0xc2, 0x00, 0xd0, 0x08, 0x80, 0xd9, 0x83, 0x08, 0x80,
+ 0xd0, 0xc2, 0x00, 0xd0, 0x08, 0x80, 0xc9, 0x83, 0x08, 0x80, 0xc0, 0xc2,
+ 0x00, 0xd0, 0x08, 0x80, 0xb1, 0x83, 0x08, 0x80, 0xa8, 0xc2, 0x00, 0xd0,
+ 0x08, 0x80, 0xa1, 0x83, 0x08, 0x80, 0x98, 0x97, 0x08, 0x80, 0x91, 0x8b,
+ 0x08, 0x80, 0x81, 0x83, 0x08, 0x80, 0x30, 0x47, 0xb2, 0x2e, 0xc2, 0x5a,
+ 0xf4, 0x83, 0x08, 0x81, 0x78, 0x97, 0x08, 0x80, 0x50, 0x8b, 0x08, 0x80,
+ 0x40, 0xc2, 0x00, 0xd0, 0x08, 0x81, 0x81, 0xc2, 0x0d, 0xf6, 0x08, 0x81,
+ 0x89, 0x83, 0x08, 0x81, 0x90, 0x91, 0x08, 0x82, 0x23, 0x02, 0x5b, 0x02,
+ 0x03, 0xc2, 0x5b, 0x08, 0x87, 0x08, 0x82, 0x11, 0x48, 0xb2, 0x2d, 0xc2,
+ 0x5b, 0x14, 0x97, 0x08, 0x81, 0xe3, 0x02, 0x5b, 0x22, 0x8b, 0x08, 0x81,
+ 0xd3, 0x02, 0x5b, 0x26, 0xce, 0x6e, 0x2e, 0x08, 0x81, 0xc8, 0xc4, 0x26,
+ 0x78, 0x08, 0x83, 0x79, 0xc5, 0x06, 0xdb, 0x08, 0x83, 0x71, 0x15, 0xc2,
+ 0x5b, 0x2a, 0x08, 0xc2, 0x5b, 0x36, 0x16, 0xc2, 0x5b, 0x42, 0xc3, 0x05,
+ 0x14, 0x08, 0x83, 0x39, 0xc4, 0x15, 0xe7, 0x08, 0x83, 0x30, 0xc4, 0x6e,
+ 0x13, 0x08, 0x82, 0x69, 0xc3, 0x02, 0x6e, 0x08, 0x82, 0x58, 0xc8, 0x3a,
+ 0x36, 0x08, 0x82, 0x51, 0x96, 0x08, 0x82, 0x48, 0x42, 0x00, 0xbd, 0xc2,
+ 0x5b, 0x4e, 0xc9, 0x79, 0x79, 0x0e, 0x83, 0x90, 0xc7, 0xc3, 0x1b, 0x0e,
+ 0x85, 0xa9, 0xc6, 0xc5, 0x06, 0x0e, 0x85, 0xa0, 0xc4, 0x99, 0xff, 0x0e,
+ 0x87, 0xa1, 0xc3, 0x2e, 0xd7, 0x0e, 0x83, 0xf8, 0x44, 0xe3, 0xbb, 0xc2,
+ 0x5b, 0x60, 0xc8, 0x9c, 0xe0, 0x0e, 0x80, 0xd8, 0x00, 0x42, 0x5b, 0x72,
+ 0xc5, 0xd6, 0xa5, 0x0e, 0x82, 0x10, 0x03, 0xc2, 0x5b, 0x7e, 0x11, 0x42,
+ 0x5b, 0x88, 0xc3, 0x03, 0x13, 0x0e, 0x83, 0xd1, 0xc9, 0xaa, 0x68, 0x0e,
+ 0x81, 0xb8, 0xc2, 0x00, 0xec, 0x0e, 0x87, 0x79, 0xc2, 0x01, 0x6c, 0x0e,
+ 0x87, 0x71, 0xc2, 0x00, 0x3c, 0x0e, 0x87, 0x69, 0xc2, 0x01, 0xdd, 0x0e,
+ 0x87, 0x61, 0xc2, 0x01, 0x30, 0x0e, 0x87, 0x59, 0xc3, 0x29, 0x6f, 0x0e,
+ 0x87, 0x51, 0xc2, 0x00, 0xb0, 0x0e, 0x87, 0x48, 0x90, 0x0e, 0x84, 0xb9,
+ 0xc9, 0x79, 0x79, 0x0e, 0x83, 0x98, 0x46, 0xce, 0xab, 0xc2, 0x5b, 0x94,
+ 0x46, 0xcb, 0x03, 0xc2, 0x5b, 0xa1, 0xc5, 0x4c, 0x93, 0x0e, 0x81, 0x18,
+ 0xc6, 0xd0, 0x01, 0x0e, 0x81, 0x99, 0xca, 0x6d, 0x0c, 0x0e, 0x80, 0x68,
+ 0xc5, 0xd6, 0x0a, 0x0e, 0x85, 0x09, 0xc4, 0xe1, 0x7f, 0x0e, 0x84, 0xd0,
+ 0xc5, 0xda, 0x42, 0x0e, 0x85, 0x01, 0x8b, 0x0e, 0x84, 0xf8, 0xc2, 0x00,
+ 0xba, 0x0e, 0x84, 0xf1, 0xc4, 0x01, 0x92, 0x0e, 0x84, 0xe8, 0x8b, 0x0e,
+ 0x84, 0xe1, 0xc5, 0xda, 0x42, 0x0e, 0x84, 0xd8, 0xc7, 0xc8, 0x93, 0x0e,
+ 0x83, 0x11, 0xc2, 0x01, 0xc3, 0x0e, 0x82, 0xe0, 0xc9, 0xa9, 0xe1, 0x0e,
+ 0x80, 0xf8, 0x00, 0x42, 0x5b, 0xad, 0x00, 0x42, 0x5b, 0xb7, 0xc4, 0xcf,
+ 0x8b, 0x0e, 0x80, 0x40, 0x45, 0xda, 0xe2, 0xc2, 0x5b, 0xc1, 0xc4, 0xc8,
+ 0x2c, 0x0e, 0x80, 0x98, 0xc8, 0xbe, 0x2a, 0x0e, 0x87, 0x31, 0xc5, 0xcf,
+ 0x3c, 0x0e, 0x84, 0x92, 0x02, 0x5b, 0xd3, 0x46, 0xd0, 0x07, 0xc2, 0x5b,
+ 0xd9, 0xc4, 0xc2, 0xa0, 0x0e, 0x84, 0xc8, 0x16, 0xc2, 0x5b, 0xeb, 0xd5,
+ 0x35, 0x4b, 0x0e, 0x86, 0x91, 0xdc, 0x13, 0x35, 0x0e, 0x86, 0x89, 0xd1,
+ 0x4f, 0x58, 0x0e, 0x86, 0x80, 0xc9, 0x9c, 0xdf, 0x0e, 0x84, 0x00, 0x43,
+ 0x01, 0x92, 0xc2, 0x5b, 0xf7, 0xd5, 0x35, 0x4b, 0x0e, 0x86, 0xb1, 0xdc,
+ 0x13, 0x35, 0x0e, 0x86, 0xa9, 0xd1, 0x4f, 0x58, 0x0e, 0x86, 0xa0, 0xc3,
+ 0x2e, 0xd7, 0x0e, 0x83, 0xe9, 0xc4, 0x99, 0xff, 0x0e, 0x83, 0xe0, 0xc4,
+ 0xde, 0x8f, 0x0e, 0x82, 0x99, 0xc6, 0xd0, 0x19, 0x0e, 0x80, 0x52, 0x02,
+ 0x5c, 0x03, 0xc5, 0xda, 0x1a, 0x0e, 0x86, 0x39, 0xc9, 0xb1, 0x9d, 0x0e,
+ 0x85, 0xe0, 0x47, 0x1a, 0x0a, 0xc2, 0x5c, 0x09, 0xcb, 0x98, 0xbb, 0x0e,
+ 0x85, 0xf0, 0xca, 0xa2, 0xec, 0x0e, 0x86, 0x21, 0xc8, 0xba, 0x3a, 0x0e,
+ 0x86, 0x18, 0x10, 0xc2, 0x5c, 0x15, 0xc2, 0x01, 0x6c, 0x0e, 0x86, 0x01,
+ 0xc2, 0x00, 0x3c, 0x0e, 0x85, 0xf9, 0xc2, 0x01, 0xdd, 0x0e, 0x85, 0xe9,
+ 0xc2, 0x00, 0xb0, 0x0e, 0x85, 0xd0, 0xcf, 0x6b, 0x07, 0x0e, 0x85, 0xc8,
+ 0x44, 0x3b, 0xaf, 0xc2, 0x5c, 0x21, 0xc4, 0x65, 0xea, 0x0e, 0x85, 0xb8,
+ 0xc3, 0x63, 0x2b, 0x0e, 0x82, 0x31, 0xc8, 0x9c, 0xe0, 0x0e, 0x80, 0xd0,
+ 0x47, 0xc9, 0x11, 0xc2, 0x5c, 0x2b, 0x44, 0x89, 0x3e, 0x42, 0x5c, 0x37,
+ 0x48, 0x6d, 0x79, 0xc2, 0x5c, 0x43, 0x42, 0x00, 0x2c, 0x42, 0x5c, 0x4f,
+ 0xce, 0x6d, 0x5c, 0x0e, 0x85, 0x29, 0xcc, 0x89, 0x3d, 0x0e, 0x85, 0x18,
+ 0xc6, 0xcf, 0x3b, 0x0e, 0x84, 0xb1, 0xc3, 0x1f, 0x1d, 0x0e, 0x84, 0x39,
+ 0x83, 0x0e, 0x81, 0x80, 0xc7, 0xc8, 0x31, 0x0e, 0x83, 0x81, 0x12, 0xc2,
+ 0x5c, 0x5b, 0xc7, 0xc4, 0xaa, 0x0e, 0x83, 0x69, 0x42, 0x00, 0xbd, 0x42,
+ 0x5c, 0x67, 0xcd, 0x78, 0xe6, 0x0e, 0x83, 0xc9, 0xc2, 0x01, 0xc3, 0x0e,
+ 0x81, 0x6a, 0x02, 0x5c, 0x71, 0xcf, 0x68, 0x0a, 0x0e, 0x84, 0x71, 0x16,
+ 0xc2, 0x5c, 0x7d, 0xcb, 0x8f, 0x52, 0x0e, 0x84, 0x59, 0xcc, 0x80, 0xd9,
+ 0x0e, 0x84, 0x50, 0xc3, 0x63, 0x2b, 0x0e, 0x82, 0x41, 0xc5, 0xcc, 0xcc,
+ 0x0e, 0x80, 0x21, 0xcb, 0x6d, 0x0b, 0x0e, 0x80, 0x18, 0xc7, 0xc8, 0x31,
+ 0x0e, 0x83, 0x89, 0xcb, 0x94, 0x17, 0x0e, 0x83, 0x79, 0xc7, 0xc4, 0xaa,
+ 0x0e, 0x83, 0x61, 0x90, 0x0e, 0x81, 0xca, 0x02, 0x5c, 0x89, 0xc2, 0x00,
+ 0x45, 0x0e, 0x80, 0xb9, 0x8b, 0x0e, 0x80, 0x00, 0x47, 0xc1, 0xee, 0xc2,
+ 0x5c, 0x8f, 0xc6, 0xcf, 0x89, 0x0e, 0x80, 0x4a, 0x02, 0x5c, 0x9b, 0xc4,
+ 0x77, 0x35, 0x0e, 0x82, 0x68, 0x16, 0xc2, 0x5c, 0x9f, 0xc2, 0x01, 0xc3,
+ 0x0e, 0x82, 0x08, 0xc3, 0x63, 0x2b, 0x0e, 0x82, 0xc1, 0xc5, 0xcc, 0xcc,
+ 0x0e, 0x80, 0x31, 0xcb, 0x6d, 0x0b, 0x0e, 0x80, 0x28, 0x94, 0x08, 0xe0,
+ 0x38, 0xd1, 0x51, 0xbc, 0x0f, 0xdc, 0xf9, 0xc2, 0x00, 0x49, 0x01, 0x2f,
+ 0xd0, 0x4e, 0x60, 0x6d, 0xc2, 0x5c, 0xa9, 0xcc, 0x80, 0xf1, 0x0f, 0xac,
+ 0x50, 0xc9, 0xb4, 0xac, 0x0f, 0xac, 0x61, 0xc5, 0xcd, 0x8c, 0x0f, 0xac,
+ 0x48, 0xd1, 0x51, 0xbc, 0x0f, 0xdc, 0xf1, 0xc2, 0x00, 0x49, 0x01, 0x2f,
+ 0xf8, 0x4e, 0x01, 0xf4, 0xc2, 0x5c, 0xb5, 0xdb, 0x17, 0x61, 0x01, 0x49,
+ 0xf0, 0x5b, 0x16, 0xa4, 0xc2, 0x5c, 0xc1, 0x46, 0x01, 0xc8, 0x42, 0x5c,
+ 0xcd, 0xce, 0x08, 0x79, 0x01, 0x2c, 0x31, 0xcd, 0x3f, 0xe8, 0x01, 0x2c,
+ 0x18, 0xc9, 0xae, 0xbb, 0x01, 0x3f, 0xf0, 0xc9, 0xae, 0xbb, 0x01, 0x3f,
+ 0xe0, 0xc9, 0xae, 0xbb, 0x01, 0x3f, 0xe8, 0xc9, 0xae, 0xbb, 0x01, 0x3f,
+ 0xd8, 0xcc, 0x82, 0x35, 0x01, 0x3f, 0xd1, 0xc5, 0x01, 0xa2, 0x01, 0x3f,
+ 0xb8, 0xcf, 0x64, 0xd1, 0x01, 0x52, 0xe9, 0xcb, 0x98, 0x42, 0x01, 0x52,
+ 0xd9, 0x42, 0x00, 0x58, 0x42, 0x5c, 0xdf, 0xc7, 0x16, 0x16, 0x01, 0x52,
+ 0x89, 0x45, 0x00, 0x5a, 0x42, 0x5c, 0xeb, 0x42, 0x00, 0xa9, 0xc2, 0x5c,
+ 0xf7, 0x09, 0x42, 0x5d, 0x09, 0xd3, 0x16, 0x91, 0x01, 0x4c, 0x99, 0x49,
+ 0x05, 0xcb, 0x42, 0x5d, 0x18, 0x49, 0x01, 0xd3, 0xc2, 0x5d, 0x24, 0xcc,
+ 0x01, 0xdb, 0x0f, 0xdc, 0x61, 0xc6, 0x02, 0xd1, 0x0f, 0xc8, 0x3b, 0x02,
+ 0x5d, 0x2a, 0x42, 0x00, 0x5b, 0xc2, 0x5d, 0x30, 0xcb, 0x96, 0x7f, 0x0f,
+ 0xdd, 0x91, 0xc6, 0x9e, 0xf4, 0x0f, 0xdd, 0xc8, 0xd0, 0x5b, 0xc2, 0x0f,
+ 0xc2, 0xc1, 0xd1, 0x55, 0x30, 0x01, 0x0f, 0xf9, 0xc5, 0x01, 0xa2, 0x01,
+ 0x0c, 0xa3, 0x02, 0x5d, 0x3c, 0xcc, 0x82, 0x35, 0x01, 0x0e, 0xa3, 0x02,
+ 0x5d, 0x40, 0x19, 0xc2, 0x5d, 0x46, 0xcb, 0x94, 0x22, 0x01, 0x58, 0x61,
+ 0xd5, 0x01, 0x92, 0x01, 0x5b, 0x20, 0xcc, 0x06, 0xdb, 0x01, 0x2c, 0x79,
+ 0xcd, 0x15, 0x02, 0x01, 0x2c, 0x70, 0xd1, 0x3f, 0xe4, 0x01, 0x2c, 0x49,
+ 0xd0, 0x05, 0xb7, 0x01, 0x16, 0x58, 0x00, 0x42, 0x5d, 0x52, 0xd3, 0x01,
+ 0xb4, 0x01, 0x00, 0xc1, 0xd0, 0x58, 0xd2, 0x01, 0x71, 0x30, 0x00, 0x42,
+ 0x5d, 0x6a, 0x44, 0x02, 0xdf, 0xc2, 0x5d, 0x7c, 0xcc, 0x86, 0xcd, 0x0f,
+ 0xaf, 0x61, 0xde, 0x06, 0x69, 0x0f, 0xde, 0x08, 0x44, 0x01, 0x94, 0xc2,
+ 0x5d, 0x88, 0xd3, 0x41, 0xf6, 0x01, 0x70, 0x48, 0xd0, 0x4a, 0x77, 0x01,
+ 0x2c, 0x59, 0xc7, 0xb2, 0xec, 0x01, 0x4b, 0xe0, 0xca, 0xa2, 0x74, 0x01,
+ 0x1c, 0xe9, 0xc9, 0x57, 0x36, 0x01, 0x1c, 0xe1, 0xca, 0xa3, 0x5a, 0x01,
+ 0x1c, 0xd8, 0xce, 0x01, 0xb9, 0x01, 0x00, 0xe1, 0xcc, 0x8a, 0x09, 0x01,
+ 0x4e, 0xd1, 0xcb, 0x1a, 0x50, 0x01, 0x71, 0x41, 0xcd, 0x0b, 0x91, 0x01,
+ 0x80, 0x50, 0xcb, 0x1a, 0x50, 0x01, 0x4c, 0x29, 0x05, 0xc2, 0x5d, 0x94,
+ 0xd2, 0x21, 0x89, 0x01, 0x80, 0xb1, 0xd6, 0x08, 0x88, 0x01, 0x80, 0xc1,
+ 0xce, 0x25, 0xad, 0x01, 0x80, 0xd0, 0xd6, 0x08, 0x88, 0x01, 0x4c, 0xb9,
+ 0xd2, 0x21, 0x89, 0x01, 0x80, 0x80, 0x50, 0x58, 0xb2, 0xc2, 0x5d, 0xa0,
+ 0x4e, 0x6c, 0x36, 0x42, 0x5d, 0xac, 0xda, 0x1b, 0xd0, 0x0f, 0xc4, 0xa0,
+ 0x45, 0x01, 0x95, 0xc2, 0x5d, 0xb8, 0x44, 0x0b, 0x26, 0x42, 0x5d, 0xc4,
+ 0xcd, 0x7e, 0x3b, 0x01, 0x0c, 0xf1, 0x48, 0x01, 0x9a, 0x42, 0x5d, 0xd0,
+ 0x45, 0x00, 0x8c, 0xc2, 0x5d, 0xdc, 0x16, 0xc2, 0x5e, 0x12, 0xd5, 0x10,
+ 0x87, 0x01, 0x0e, 0x31, 0xc8, 0xae, 0xbc, 0x01, 0x0d, 0x23, 0x02, 0x5e,
+ 0x1e, 0x03, 0x42, 0x5e, 0x24, 0xc5, 0x01, 0xa2, 0x01, 0x0e, 0x83, 0x02,
+ 0x5e, 0x30, 0xca, 0x52, 0xc2, 0x01, 0x48, 0x60, 0xcb, 0x6f, 0xff, 0x01,
+ 0x0e, 0xe1, 0xca, 0x88, 0xdf, 0x0f, 0xc1, 0xc0, 0x46, 0x01, 0x52, 0xc2,
+ 0x5e, 0x3a, 0xc2, 0x02, 0x35, 0x0f, 0xd7, 0x90, 0xd0, 0x58, 0x62, 0x0f,
+ 0xc2, 0x01, 0xc5, 0x01, 0xa2, 0x0f, 0xc2, 0x20, 0xc5, 0x01, 0xa2, 0x01,
+ 0x58, 0x29, 0xd3, 0x43, 0xe4, 0x01, 0x5c, 0x40, 0xca, 0x50, 0x5e, 0x00,
+ 0x7e, 0xc0, 0xca, 0x37, 0x4e, 0x01, 0x13, 0x91, 0xc5, 0x07, 0x62, 0x01,
+ 0x13, 0x20, 0x4a, 0x33, 0xad, 0x42, 0x5e, 0x46, 0xe0, 0x09, 0xc7, 0x01,
+ 0x54, 0x58, 0x47, 0xc7, 0x35, 0xc2, 0x5e, 0x55, 0x53, 0x40, 0x1b, 0x42,
+ 0x5e, 0x61, 0xe0, 0x07, 0x07, 0x01, 0x54, 0x88, 0xc2, 0x00, 0xd0, 0x00,
+ 0xe2, 0x71, 0x83, 0x00, 0xe2, 0x68, 0xc2, 0x00, 0xd0, 0x00, 0xe0, 0xc1,
+ 0x83, 0x00, 0xe0, 0xb8, 0xc7, 0xc0, 0x97, 0x00, 0xe1, 0xf0, 0xd2, 0x4d,
+ 0x57, 0x0f, 0xbd, 0xa9, 0xc6, 0x13, 0x52, 0x0f, 0xbd, 0x49, 0xc4, 0x01,
+ 0xe3, 0x01, 0x2c, 0x88, 0x44, 0x00, 0x2d, 0xc2, 0x5e, 0x67, 0xc3, 0x14,
+ 0xa7, 0x0f, 0xb4, 0x40, 0xe0, 0x08, 0x87, 0x01, 0x3b, 0x90, 0x52, 0x11,
+ 0x92, 0xc2, 0x5e, 0x6d, 0x44, 0x0d, 0x14, 0x42, 0x5e, 0x79, 0xd7, 0x2a,
+ 0xb0, 0x0f, 0xbe, 0x01, 0xd8, 0x22, 0x43, 0x0f, 0xbe, 0x90, 0xc7, 0x6f,
+ 0xbc, 0x0f, 0xaf, 0x88, 0x83, 0x05, 0x26, 0xe9, 0xc2, 0x00, 0xd0, 0x05,
+ 0x26, 0xf0, 0x44, 0x5d, 0xb5, 0xc2, 0x5e, 0x85, 0xc5, 0xdb, 0x87, 0x05,
+ 0x27, 0xc8, 0xc4, 0xb2, 0xf8, 0x00, 0x04, 0x50, 0xd6, 0x2e, 0xd8, 0x01,
+ 0x50, 0xa1, 0x45, 0x00, 0x8c, 0x42, 0x5e, 0xa3, 0x24, 0xc2, 0x5e, 0xaf,
+ 0x23, 0xc2, 0x5e, 0xc3, 0x42, 0xe5, 0x28, 0xc2, 0x5e, 0xdf, 0x04, 0xc2,
+ 0x5e, 0xff, 0xc4, 0xe4, 0xb7, 0x08, 0x30, 0xd9, 0x1e, 0xc2, 0x5f, 0x07,
+ 0x20, 0xc2, 0x5f, 0x19, 0x21, 0xc2, 0x5f, 0x39, 0x22, 0x42, 0x5f, 0x41,
+ 0x42, 0x00, 0x91, 0xc2, 0x5f, 0x69, 0x49, 0xa8, 0xca, 0xc2, 0x5f, 0x75,
+ 0x4a, 0xa2, 0xd8, 0x42, 0x5f, 0x7f, 0xc4, 0x18, 0x10, 0x00, 0xca, 0x69,
+ 0xc2, 0x22, 0xcc, 0x00, 0xca, 0x60, 0xc3, 0x0d, 0x14, 0x00, 0xca, 0x59,
+ 0xc3, 0x09, 0x9e, 0x00, 0xca, 0x50, 0xc4, 0x02, 0xde, 0x00, 0xca, 0x49,
+ 0xc2, 0x02, 0xa0, 0x00, 0xca, 0x40, 0xc3, 0x15, 0x31, 0x00, 0xca, 0x01,
+ 0xc4, 0xdf, 0x0f, 0x00, 0xc9, 0xd9, 0xc9, 0xac, 0xc3, 0x00, 0xc9, 0xd1,
+ 0xc9, 0xa9, 0x87, 0x00, 0xc9, 0xc8, 0xc2, 0x00, 0xdb, 0x00, 0xc9, 0xc1,
+ 0xc2, 0x00, 0x39, 0x00, 0xc9, 0xb9, 0xc2, 0x01, 0xc3, 0x00, 0xc9, 0xb1,
+ 0xc2, 0x00, 0xb0, 0x00, 0xc9, 0xa9, 0x10, 0xc2, 0x5f, 0x89, 0xc2, 0x01,
+ 0x6f, 0x00, 0xc9, 0x99, 0xc8, 0x14, 0x38, 0x00, 0xc9, 0x91, 0xc2, 0x02,
+ 0x2b, 0x00, 0xc9, 0x80, 0xc2, 0x01, 0x4a, 0x00, 0xc9, 0x59, 0xc2, 0x00,
+ 0x39, 0x00, 0xc9, 0x51, 0xc2, 0x19, 0x2c, 0x00, 0xc9, 0x48, 0x91, 0x00,
+ 0xc9, 0x43, 0x02, 0x5f, 0x93, 0x87, 0x00, 0xc9, 0x3b, 0x02, 0x5f, 0x97,
+ 0x83, 0x00, 0xc9, 0x03, 0x02, 0x5f, 0x9b, 0x97, 0x00, 0xc9, 0x11, 0x8b,
+ 0x00, 0xc9, 0x08, 0xc2, 0x00, 0x39, 0x00, 0xc8, 0xf1, 0xc2, 0x00, 0xd0,
+ 0x00, 0xc8, 0x61, 0x83, 0x00, 0xc8, 0x58, 0xc3, 0x2e, 0x0f, 0x00, 0xc8,
+ 0xe9, 0xc2, 0x00, 0xd0, 0x00, 0xc8, 0x21, 0x83, 0x00, 0xc8, 0x18, 0x83,
+ 0x00, 0xc8, 0xd9, 0xc2, 0x0d, 0xf6, 0x00, 0xc8, 0xd1, 0xc2, 0x00, 0xd0,
+ 0x00, 0xc8, 0xc8, 0x90, 0x00, 0xc8, 0x50, 0xc2, 0x00, 0xd0, 0x00, 0xc8,
+ 0x99, 0x83, 0x00, 0xc8, 0x90, 0xc2, 0x00, 0xd0, 0x00, 0xc8, 0x89, 0x83,
+ 0x00, 0xc8, 0x80, 0x83, 0x00, 0xc8, 0x79, 0xc2, 0x01, 0x30, 0x00, 0xc8,
+ 0x28, 0xc2, 0x00, 0xd0, 0x00, 0xc8, 0x71, 0x83, 0x00, 0xc8, 0x68, 0xc2,
+ 0x00, 0xd0, 0x00, 0xc8, 0x49, 0x83, 0x00, 0xc8, 0x40, 0xc2, 0x00, 0xd0,
+ 0x00, 0xc8, 0x39, 0x83, 0x00, 0xc8, 0x30, 0xc2, 0x00, 0xd0, 0x00, 0xc8,
+ 0x11, 0x83, 0x00, 0xc8, 0x08, 0x45, 0xdc, 0x72, 0xc2, 0x5f, 0xa3, 0x44,
+ 0x87, 0x22, 0x42, 0x5f, 0xaf, 0xc6, 0x0b, 0x09, 0x0f, 0xbf, 0x29, 0xc6,
+ 0x02, 0xd1, 0x0f, 0xa9, 0xa0, 0xc6, 0x02, 0xd1, 0x0f, 0xbf, 0x11, 0xc6,
+ 0x0b, 0x09, 0x0f, 0xbf, 0x48, 0x43, 0x02, 0x6f, 0xc2, 0x5f, 0xc1, 0x46,
+ 0x19, 0x02, 0x42, 0x5f, 0xcd, 0x43, 0x02, 0xa0, 0xc2, 0x5f, 0xdf, 0xdb,
+ 0x18, 0x54, 0x01, 0x57, 0xe0, 0xc6, 0x02, 0xd1, 0x0f, 0xbf, 0x09, 0xc6,
+ 0x0b, 0x09, 0x0f, 0xbf, 0x40, 0xc6, 0x02, 0xd1, 0x0f, 0xbf, 0x19, 0xc6,
+ 0x0b, 0x09, 0x0f, 0xbf, 0x50, 0x46, 0x02, 0x0f, 0xc2, 0x5f, 0xeb, 0x48,
+ 0x19, 0x9b, 0x42, 0x60, 0xa1, 0xcd, 0x78, 0x57, 0x00, 0xeb, 0xf1, 0xcd,
+ 0x7b, 0x63, 0x00, 0xeb, 0xd8, 0xc4, 0x74, 0x82, 0x01, 0x04, 0xa0, 0x96,
+ 0x00, 0xe8, 0xdb, 0x02, 0x60, 0xbd, 0x8e, 0x00, 0x14, 0xfb, 0x02, 0x60,
+ 0xc3, 0x87, 0x00, 0xe8, 0x3b, 0x02, 0x60, 0xc9, 0x9c, 0x00, 0xe9, 0x11,
+ 0x99, 0x00, 0xe9, 0x09, 0x98, 0x00, 0xe9, 0x01, 0x97, 0x00, 0xe8, 0xe1,
+ 0x94, 0x00, 0x14, 0x03, 0x02, 0x60, 0xd5, 0x92, 0x00, 0xe8, 0xc1, 0x91,
+ 0x00, 0xe8, 0x7b, 0x02, 0x60, 0xe7, 0x8f, 0x00, 0xe8, 0x69, 0x8d, 0x00,
+ 0xe8, 0x59, 0x8c, 0x00, 0xe8, 0x51, 0x86, 0x00, 0xe8, 0x29, 0x85, 0x00,
+ 0xe8, 0x21, 0x84, 0x00, 0x14, 0xcb, 0x02, 0x60, 0xf5, 0x83, 0x00, 0xe8,
+ 0x03, 0x02, 0x60, 0xfb, 0x89, 0x00, 0x13, 0x13, 0x02, 0x60, 0xff, 0x8b,
+ 0x00, 0x13, 0x53, 0x02, 0x61, 0x05, 0x90, 0x00, 0x13, 0xa1, 0x9b, 0x00,
+ 0x14, 0x79, 0x8a, 0x00, 0x14, 0xe1, 0x88, 0x05, 0x39, 0x81, 0x95, 0x05,
+ 0x39, 0x89, 0x93, 0x05, 0x3d, 0x78, 0xca, 0x45, 0x1d, 0x0e, 0xf8, 0x78,
+ 0xc4, 0x00, 0x32, 0x0e, 0xf8, 0x71, 0xc6, 0x01, 0x73, 0x00, 0x0d, 0xf0,
+ 0xd4, 0x01, 0x13, 0x0e, 0xf8, 0x50, 0xd8, 0x23, 0x33, 0x00, 0x15, 0x11,
+ 0xc8, 0xba, 0xda, 0x00, 0x0d, 0x50, 0xc5, 0x01, 0x0e, 0x00, 0x14, 0xc1,
+ 0xca, 0x54, 0x9e, 0x00, 0x15, 0x60, 0x9b, 0x00, 0x02, 0xcb, 0x02, 0x61,
+ 0x0b, 0x8f, 0x00, 0x02, 0x6b, 0x02, 0x61, 0x17, 0x97, 0x00, 0x02, 0xab,
+ 0x02, 0x61, 0x23, 0x91, 0x00, 0x02, 0x7b, 0x02, 0x61, 0x2d, 0x8b, 0x00,
+ 0x02, 0x4b, 0x02, 0x61, 0x51, 0x87, 0x00, 0x02, 0x2b, 0x02, 0x61, 0x67,
+ 0x83, 0x00, 0x02, 0x0b, 0x02, 0x61, 0x8f, 0x95, 0x00, 0x02, 0x9b, 0x02,
+ 0x61, 0xc5, 0x9c, 0x00, 0x02, 0xd3, 0x02, 0x61, 0xe7, 0x9a, 0x00, 0x02,
+ 0xc3, 0x02, 0x61, 0xed, 0x99, 0x00, 0x02, 0xbb, 0x02, 0x61, 0xf3, 0x98,
+ 0x00, 0x02, 0xb3, 0x02, 0x61, 0xff, 0x96, 0x00, 0x02, 0xa3, 0x02, 0x62,
+ 0x1b, 0x94, 0x00, 0x02, 0x93, 0x02, 0x62, 0x40, 0x92, 0x00, 0x02, 0x83,
+ 0x02, 0x62, 0x50, 0x90, 0x00, 0x02, 0x73, 0x02, 0x62, 0x56, 0x8e, 0x00,
+ 0x02, 0x63, 0x02, 0x62, 0x60, 0x8d, 0x00, 0x02, 0x5b, 0x02, 0x62, 0x6a,
+ 0x8a, 0x00, 0x02, 0x43, 0x02, 0x62, 0x70, 0x89, 0x00, 0x02, 0x3b, 0x02,
+ 0x62, 0x88, 0x88, 0x00, 0x02, 0x33, 0x02, 0x62, 0xa0, 0x86, 0x00, 0x02,
+ 0x23, 0x02, 0x62, 0xa6, 0x85, 0x00, 0x02, 0x1b, 0x02, 0x62, 0xb3, 0x84,
+ 0x00, 0x02, 0x13, 0x02, 0x62, 0xd4, 0x8c, 0x00, 0x02, 0x53, 0x02, 0x62,
+ 0xe6, 0x93, 0x00, 0x02, 0x8a, 0x02, 0x62, 0xec, 0xc2, 0x00, 0x0b, 0x00,
+ 0x09, 0x91, 0xc2, 0x49, 0x0c, 0x00, 0x0a, 0x90, 0x42, 0x01, 0x7c, 0xc2,
+ 0x62, 0xf2, 0x43, 0xe5, 0xc3, 0x42, 0x62, 0xfe, 0xc3, 0x91, 0x00, 0x00,
+ 0x74, 0x31, 0xc3, 0x1c, 0x63, 0x00, 0x74, 0x49, 0xc3, 0xe5, 0xf0, 0x00,
+ 0x74, 0x61, 0x10, 0xc2, 0x63, 0x0a, 0x42, 0x02, 0x10, 0xc2, 0x63, 0x16,
+ 0x06, 0xc2, 0x63, 0x20, 0xc3, 0x39, 0x6d, 0x00, 0x75, 0x01, 0xc3, 0x12,
+ 0xad, 0x00, 0x75, 0x60, 0xc4, 0xdf, 0x43, 0x00, 0x74, 0xe1, 0xc3, 0x02,
+ 0x45, 0x00, 0x74, 0xf0, 0xc3, 0x02, 0x45, 0x00, 0x74, 0x51, 0xc4, 0xdf,
+ 0x43, 0x00, 0x75, 0x50, 0xc2, 0x00, 0xd0, 0x00, 0x75, 0x41, 0xc2, 0x0d,
+ 0xf6, 0x00, 0x75, 0x48, 0xc4, 0xdf, 0x43, 0x00, 0x74, 0xb1, 0xc3, 0x02,
+ 0x45, 0x00, 0x74, 0xb8, 0xc2, 0x00, 0x45, 0x00, 0x74, 0xe9, 0xc2, 0x0c,
+ 0x42, 0x00, 0x74, 0xf8, 0xc3, 0x00, 0x74, 0x00, 0x75, 0x19, 0xc3, 0x65,
+ 0xba, 0x00, 0x75, 0x28, 0xd1, 0x51, 0xbc, 0x0f, 0xdc, 0xe9, 0xc2, 0x00,
+ 0x49, 0x01, 0x2f, 0xc8, 0x55, 0x0a, 0x4c, 0xc2, 0x63, 0x2a, 0x48, 0x0a,
+ 0x53, 0xc2, 0x63, 0x3c, 0x4a, 0x13, 0xe3, 0x42, 0x63, 0x48, 0xc6, 0x04,
+ 0xe1, 0x0f, 0xda, 0x91, 0xc5, 0x00, 0x2c, 0x0f, 0xda, 0x98, 0xd1, 0x51,
+ 0xbc, 0x0f, 0xdc, 0xe1, 0xc2, 0x00, 0x49, 0x01, 0x2f, 0xc0, 0xc6, 0x04,
+ 0xe1, 0x0f, 0xda, 0xb9, 0xc5, 0x00, 0x2c, 0x0f, 0xda, 0xc0, 0x55, 0x16,
+ 0xaa, 0xc2, 0x63, 0x54, 0x48, 0x0a, 0x53, 0xc2, 0x63, 0x66, 0x4a, 0x13,
+ 0xe3, 0x42, 0x63, 0x72, 0xd5, 0x35, 0x60, 0x0f, 0xdc, 0xd1, 0xd0, 0x06,
+ 0xd7, 0x0f, 0xdc, 0x00, 0xe0, 0x08, 0x67, 0x0f, 0xdb, 0x50, 0xe0, 0x0a,
+ 0x27, 0x0f, 0xdc, 0x90, 0xe0, 0x01, 0xc7, 0x0f, 0xdc, 0x88, 0xd9, 0x1b,
+ 0xd1, 0x0f, 0xc4, 0xa9, 0xcb, 0x8a, 0x46, 0x01, 0x0f, 0x5b, 0x02, 0x63,
+ 0x7e, 0xc8, 0xae, 0xbc, 0x01, 0x0f, 0x52, 0x02, 0x63, 0x84, 0xca, 0x03,
+ 0xdd, 0x0f, 0xc4, 0x89, 0x48, 0x01, 0x9a, 0x42, 0x63, 0x8a, 0xd1, 0x53,
+ 0x98, 0x01, 0x4a, 0x49, 0xd8, 0x05, 0xcf, 0x01, 0x5f, 0x68, 0x45, 0x00,
+ 0x8c, 0xc2, 0x63, 0x9f, 0xdc, 0x14, 0x15, 0x01, 0x0e, 0x29, 0xc8, 0xae,
+ 0xbc, 0x01, 0x0d, 0x29, 0xc6, 0x10, 0x9d, 0x01, 0x48, 0x91, 0xda, 0x1c,
+ 0x1e, 0x0f, 0xdd, 0xc0, 0xc5, 0x01, 0x4a, 0x01, 0x0d, 0xf9, 0x00, 0x42,
+ 0x63, 0xcf, 0xc5, 0x01, 0x4a, 0x01, 0x0d, 0xf1, 0x00, 0x42, 0x63, 0xe1,
+ 0xdb, 0x15, 0xb1, 0x01, 0x19, 0x21, 0xd2, 0x46, 0x6b, 0x01, 0x5d, 0xc8,
+ 0xd6, 0x31, 0x98, 0x01, 0x52, 0x41, 0xcc, 0x06, 0xbb, 0x01, 0x52, 0x30,
+ 0xca, 0xa4, 0xcc, 0x01, 0x52, 0x29, 0xc7, 0x80, 0x70, 0x01, 0x52, 0x11,
+ 0xca, 0x8d, 0xb1, 0x01, 0x52, 0x08, 0xcf, 0x15, 0x36, 0x0f, 0xbd, 0xf1,
+ 0x42, 0x00, 0xac, 0xc2, 0x63, 0xed, 0x48, 0x0a, 0xa9, 0x42, 0x63, 0xf3,
+ 0xc8, 0x00, 0xbf, 0x01, 0x3b, 0x11, 0xc6, 0x00, 0x91, 0x01, 0x3a, 0xb8,
+ 0xc6, 0x02, 0xd1, 0x0f, 0xbc, 0x39, 0xd6, 0x2e, 0xac, 0x01, 0x36, 0xd9,
+ 0xc6, 0x0b, 0x09, 0x0f, 0xbc, 0x88, 0xdd, 0x10, 0xc0, 0x0f, 0xb3, 0xd9,
+ 0xc5, 0x13, 0x53, 0x0f, 0xbd, 0x60, 0x4e, 0x47, 0x15, 0xc2, 0x64, 0x05,
+ 0x45, 0x20, 0x6c, 0x42, 0x64, 0x11, 0x45, 0x01, 0xb4, 0xc2, 0x64, 0x1d,
+ 0x42, 0x01, 0x0c, 0x42, 0x64, 0x29, 0x49, 0x01, 0xaa, 0xc2, 0x64, 0x35,
+ 0xc5, 0x01, 0xa2, 0x01, 0x3c, 0xd0, 0xc3, 0xe5, 0x8a, 0x0f, 0xb3, 0x21,
+ 0xc9, 0xb4, 0x91, 0x0f, 0xb2, 0xe0, 0xc9, 0x8e, 0x0a, 0x0f, 0xaa, 0x39,
+ 0xca, 0x9c, 0x48, 0x01, 0x5a, 0xa8, 0x48, 0x00, 0x29, 0xc2, 0x64, 0x41,
+ 0x00, 0x42, 0x64, 0x47, 0x50, 0x01, 0xa9, 0xc2, 0x64, 0x53, 0x51, 0x08,
+ 0xa9, 0x42, 0x64, 0x5f, 0xd7, 0x28, 0x88, 0x01, 0x3d, 0xd9, 0x46, 0x0a,
+ 0xef, 0x42, 0x64, 0x6b, 0xca, 0x22, 0x51, 0x0f, 0xbe, 0x99, 0xcd, 0x0e,
+ 0x61, 0x0f, 0xbe, 0xa0, 0x4b, 0x14, 0xd9, 0xc2, 0x64, 0x77, 0x00, 0x42,
+ 0x64, 0x89, 0xe0, 0x0c, 0x07, 0x01, 0x3d, 0x70, 0xd5, 0x03, 0xd2, 0x0f,
+ 0xc0, 0xc9, 0xdb, 0x17, 0x46, 0x0f, 0xc0, 0xe8, 0xe0, 0x0a, 0xa7, 0x01,
+ 0x3d, 0x40, 0xce, 0x6c, 0x60, 0x01, 0x3a, 0x31, 0xc7, 0xa7, 0xc7, 0x01,
+ 0x38, 0xa0, 0x46, 0x00, 0x8b, 0xc2, 0x64, 0x95, 0xc9, 0xb2, 0x48, 0x01,
+ 0x5a, 0xc8, 0xe0, 0x03, 0xa7, 0x01, 0x3d, 0x00, 0x45, 0x00, 0x5a, 0xc2,
+ 0x64, 0xa1, 0xc9, 0x99, 0x62, 0x0f, 0xa5, 0x91, 0x53, 0x08, 0xa7, 0x42,
+ 0x64, 0xad, 0xcb, 0x03, 0xbc, 0x01, 0x3c, 0xcb, 0x02, 0x64, 0xb9, 0x50,
+ 0x01, 0xa9, 0x42, 0x64, 0xbf, 0xc3, 0x05, 0x14, 0x0f, 0xc4, 0xe3, 0x02,
+ 0x64, 0xcb, 0xca, 0x9d, 0x2e, 0x0f, 0xc4, 0xe8, 0xcf, 0x15, 0x36, 0x0f,
+ 0xbd, 0x91, 0xd2, 0x22, 0x49, 0x0f, 0xbe, 0x50, 0xc6, 0x7c, 0x7b, 0x0f,
+ 0xa4, 0xe9, 0xc5, 0x01, 0xa2, 0x0f, 0xa4, 0xc1, 0xcf, 0x64, 0x68, 0x0f,
+ 0x9c, 0xa0, 0x9e, 0x0d, 0x85, 0x41, 0x9d, 0x0d, 0x85, 0x38, 0x9e, 0x0d,
+ 0x81, 0x09, 0x9d, 0x0d, 0x81, 0x00, 0xcd, 0x79, 0xb6, 0x07, 0xd8, 0xf9,
+ 0x47, 0x00, 0x58, 0xc2, 0x64, 0xcf, 0xc7, 0xc1, 0xaf, 0x00, 0x2f, 0x88,
+ 0x46, 0x00, 0x8b, 0x42, 0x64, 0xdb, 0x46, 0x00, 0x8b, 0x42, 0x64, 0xe7,
+ 0x46, 0x00, 0x8b, 0x42, 0x64, 0xf3, 0x46, 0x00, 0x8b, 0x42, 0x64, 0xff,
+ 0xc2, 0x04, 0xad, 0x00, 0x2f, 0x53, 0x02, 0x65, 0x0b, 0xc4, 0xd4, 0xda,
+ 0x00, 0x2f, 0x33, 0x02, 0x65, 0x11, 0xc2, 0x00, 0x3d, 0x00, 0x2e, 0xc2,
+ 0x02, 0x65, 0x17, 0xc3, 0x11, 0xef, 0x00, 0x2f, 0x4b, 0x02, 0x65, 0x1d,
+ 0xc5, 0xdc, 0x2c, 0x00, 0x2f, 0x0a, 0x02, 0x65, 0x23, 0xcc, 0x84, 0x75,
+ 0x07, 0xda, 0x40, 0xcc, 0x84, 0x75, 0x07, 0xda, 0x38, 0xc2, 0x00, 0x67,
+ 0x00, 0x2f, 0x1b, 0x02, 0x65, 0x29, 0xc3, 0xba, 0x37, 0x00, 0x2e, 0xd3,
+ 0x02, 0x65, 0x2f, 0xc5, 0xd4, 0xd9, 0x00, 0x2f, 0x29, 0xc3, 0x20, 0x18,
+ 0x00, 0x2e, 0xf9, 0xc3, 0x00, 0x4e, 0x00, 0x2e, 0xe8, 0xcc, 0x84, 0x75,
+ 0x07, 0xda, 0x00, 0xcc, 0x84, 0x75, 0x07, 0xd9, 0xf0, 0xcc, 0x84, 0x75,
+ 0x07, 0xd9, 0xe0, 0x46, 0x00, 0x8b, 0x42, 0x65, 0x35, 0xcc, 0x84, 0x75,
+ 0x07, 0xd9, 0xb0, 0xcb, 0x91, 0xa4, 0x07, 0xd9, 0xa1, 0x96, 0x00, 0x2e,
+ 0xb8, 0xcc, 0x84, 0x75, 0x07, 0xd9, 0x98, 0xcc, 0x84, 0x75, 0x07, 0xd9,
+ 0x90, 0x0e, 0xc2, 0x65, 0x41, 0xc3, 0x16, 0x5a, 0x00, 0x2f, 0x10, 0xc3,
+ 0x22, 0x14, 0x07, 0xd9, 0x41, 0xc4, 0x5d, 0xe2, 0x07, 0xd9, 0x39, 0xc9,
+ 0xb4, 0xb5, 0x07, 0xd9, 0x31, 0xc5, 0xa2, 0x83, 0x07, 0xd9, 0x29, 0xc3,
+ 0xba, 0x37, 0x07, 0xd9, 0x21, 0xc2, 0x01, 0x7f, 0x07, 0xd9, 0x19, 0xc5,
+ 0x40, 0x9a, 0x07, 0xd9, 0x11, 0xc4, 0x06, 0x5a, 0x07, 0xd9, 0x08, 0xc5,
+ 0xcc, 0xe4, 0x00, 0x2d, 0xc3, 0x02, 0x65, 0x50, 0xc5, 0xd8, 0xfd, 0x00,
+ 0x2d, 0xd8, 0xc6, 0x44, 0x50, 0x00, 0x2e, 0x11, 0x0a, 0xc2, 0x65, 0x56,
+ 0xc4, 0xa0, 0x89, 0x00, 0x2d, 0xb0, 0xc4, 0xd5, 0xa7, 0x00, 0x2d, 0xcb,
+ 0x02, 0x65, 0x62, 0xc4, 0xd5, 0x84, 0x00, 0x2d, 0xa1, 0x45, 0xd5, 0xb5,
+ 0x42, 0x65, 0x68, 0xc6, 0xcb, 0x63, 0x00, 0x2f, 0xa1, 0xc3, 0x26, 0x1a,
+ 0x00, 0x2f, 0x98, 0xc3, 0x0f, 0x99, 0x00, 0x2c, 0xc1, 0x44, 0xe3, 0xeb,
+ 0x42, 0x65, 0x7a, 0x46, 0xcf, 0x7d, 0xc2, 0x65, 0x86, 0xc3, 0x1e, 0x95,
+ 0x00, 0x2c, 0xd8, 0xc7, 0xc5, 0xad, 0x00, 0x2c, 0xe8, 0xc7, 0xc5, 0xfa,
+ 0x00, 0x2d, 0x30, 0xce, 0x73, 0xc2, 0x02, 0x6e, 0x01, 0xcc, 0x83, 0x31,
+ 0x02, 0x6e, 0xe9, 0xc7, 0xc4, 0x8e, 0x02, 0x6f, 0x88, 0x14, 0xc2, 0x65,
+ 0x92, 0xcc, 0x8b, 0xf5, 0x02, 0x6e, 0xe0, 0xc3, 0x08, 0x93, 0x02, 0x6f,
+ 0x79, 0xc7, 0xc8, 0xf5, 0x02, 0x6f, 0xb8, 0x12, 0xc2, 0x65, 0x9e, 0xc6,
+ 0xd3, 0x37, 0x02, 0x6e, 0xc8, 0xc7, 0xc9, 0x3b, 0x01, 0x5e, 0x19, 0xc7,
+ 0xc2, 0xc0, 0x01, 0x59, 0x18, 0xc7, 0x33, 0xdf, 0x00, 0x00, 0x4b, 0x02,
+ 0x65, 0xa8, 0xc4, 0x3b, 0x19, 0x01, 0x5b, 0xf0, 0x95, 0x0f, 0x9e, 0xc0,
+ 0xc4, 0x18, 0x10, 0x08, 0x69, 0xb9, 0xc2, 0x22, 0xcc, 0x08, 0x69, 0xb0,
+ 0xc3, 0x0d, 0x14, 0x08, 0x69, 0xa9, 0xc3, 0x09, 0x9e, 0x08, 0x69, 0xa0,
+ 0xc4, 0x02, 0xde, 0x08, 0x69, 0x99, 0xc2, 0x02, 0xa0, 0x08, 0x69, 0x90,
+ 0xc3, 0x0d, 0x23, 0x08, 0x69, 0x39, 0xc2, 0x00, 0xc1, 0x08, 0x69, 0x31,
+ 0xc4, 0x75, 0x13, 0x08, 0x69, 0x28, 0xc2, 0x19, 0x2c, 0x08, 0x68, 0xd9,
+ 0xc2, 0x01, 0x30, 0x08, 0x68, 0xd1, 0x83, 0x08, 0x68, 0xa8, 0x45, 0xd4,
+ 0x11, 0xc2, 0x65, 0xac, 0x83, 0x08, 0x68, 0x89, 0xc2, 0x00, 0xd0, 0x08,
+ 0x68, 0x40, 0xc2, 0x00, 0x39, 0x08, 0x68, 0x69, 0x83, 0x08, 0x68, 0x60,
+ 0xc2, 0x0e, 0x9a, 0x08, 0x68, 0x59, 0x83, 0x08, 0x68, 0x50, 0xc2, 0x01,
+ 0x6f, 0x08, 0x68, 0x21, 0x83, 0x08, 0x68, 0x18, 0x83, 0x08, 0x68, 0x79,
+ 0xc2, 0x00, 0xd0, 0x08, 0x68, 0x80, 0x83, 0x00, 0xb9, 0x41, 0xc2, 0x01,
+ 0x30, 0x00, 0xb9, 0x28, 0xc5, 0xd6, 0x8c, 0x00, 0x88, 0x2b, 0x02, 0x65,
+ 0xb8, 0x15, 0xc2, 0x65, 0xbc, 0xc5, 0x90, 0xe4, 0x00, 0x88, 0x93, 0x02,
+ 0x65, 0xcb, 0x12, 0xc2, 0x65, 0xd1, 0xc5, 0xb7, 0x9d, 0x00, 0x88, 0x5b,
+ 0x02, 0x65, 0xe9, 0xc5, 0xda, 0xe7, 0x00, 0x88, 0x33, 0x02, 0x65, 0xed,
+ 0x16, 0xc2, 0x65, 0xf1, 0x0d, 0xc2, 0x66, 0x00, 0xc5, 0xd9, 0x61, 0x00,
+ 0x88, 0x13, 0x02, 0x66, 0x15, 0x05, 0xc2, 0x66, 0x19, 0x42, 0x0c, 0x43,
+ 0xc2, 0x66, 0x2e, 0xc6, 0x92, 0x0c, 0x00, 0x8a, 0xf8, 0x49, 0xb4, 0x76,
+ 0xc2, 0x66, 0x3a, 0x49, 0xad, 0x02, 0x42, 0x66, 0x71, 0x0d, 0xc2, 0x66,
+ 0xb8, 0x15, 0xc2, 0x66, 0xcd, 0xc5, 0xd9, 0x61, 0x01, 0x89, 0xa3, 0x02,
+ 0x66, 0xdc, 0x16, 0xc2, 0x66, 0xe0, 0xc5, 0xd6, 0x8c, 0x01, 0x89, 0xcb,
+ 0x02, 0x66, 0xec, 0xc5, 0xda, 0xe7, 0x01, 0x8a, 0x0b, 0x02, 0x66, 0xf0,
+ 0x12, 0xc2, 0x66, 0xf4, 0x8b, 0x01, 0x8b, 0x1b, 0x02, 0x67, 0x09, 0x05,
+ 0xc2, 0x67, 0x0f, 0xc5, 0x90, 0xe4, 0x01, 0x8a, 0x71, 0x83, 0x01, 0x8a,
+ 0x7b, 0x02, 0x67, 0x1b, 0x1b, 0xc2, 0x67, 0x28, 0x87, 0x01, 0x8a, 0xa3,
+ 0x02, 0x67, 0x48, 0x91, 0x01, 0x8a, 0xbb, 0x02, 0x67, 0x50, 0x19, 0xc2,
+ 0x67, 0x54, 0x97, 0x01, 0x8a, 0xe0, 0x19, 0xc2, 0x67, 0x66, 0x0a, 0xc2,
+ 0x67, 0x70, 0xc2, 0x00, 0xc4, 0x01, 0x81, 0xc0, 0xc3, 0x09, 0x9e, 0x01,
+ 0x81, 0x21, 0xc3, 0x0d, 0x14, 0x01, 0x81, 0x28, 0xc2, 0x22, 0xcc, 0x01,
+ 0x81, 0x31, 0xc4, 0x18, 0x10, 0x01, 0x81, 0x38, 0xc8, 0x0d, 0x03, 0x08,
+ 0x47, 0xf8, 0xc5, 0x28, 0xee, 0x08, 0x47, 0xf1, 0xc2, 0x00, 0xc4, 0x08,
+ 0x47, 0xe8, 0xc2, 0x39, 0x8b, 0x08, 0x47, 0xa9, 0xc3, 0x1e, 0x1b, 0x08,
+ 0x47, 0x40, 0xc3, 0x11, 0xef, 0x08, 0x47, 0xa1, 0x03, 0x42, 0x67, 0x7c,
+ 0xc2, 0x17, 0xb6, 0x08, 0x47, 0x79, 0xc4, 0x36, 0xb5, 0x08, 0x47, 0x00,
+ 0xc2, 0x00, 0x8e, 0x08, 0x47, 0x38, 0x19, 0xc2, 0x67, 0x88, 0x15, 0xc2,
+ 0x67, 0x90, 0x83, 0x07, 0xfb, 0x89, 0x8b, 0x07, 0xfb, 0x91, 0x97, 0x07,
+ 0xfb, 0x99, 0x87, 0x07, 0xfb, 0xa1, 0x91, 0x07, 0xfb, 0xa9, 0x0d, 0xc2,
+ 0x67, 0xaa, 0x16, 0xc2, 0x67, 0xbe, 0x90, 0x07, 0xfc, 0xeb, 0x02, 0x67,
+ 0xd2, 0x0a, 0xc2, 0x67, 0xe6, 0x0f, 0xc2, 0x67, 0xfa, 0x1b, 0xc2, 0x68,
+ 0x0e, 0x14, 0x42, 0x68, 0x1a, 0xc5, 0x8e, 0xdf, 0x07, 0xfd, 0x0b, 0x02,
+ 0x68, 0x2e, 0xc6, 0xbb, 0xec, 0x07, 0xfd, 0xd8, 0x44, 0x3a, 0xbf, 0xc2,
+ 0x68, 0x34, 0xc3, 0x39, 0x37, 0x07, 0xfd, 0xa8, 0x02, 0x42, 0x68, 0x52,
+ 0xc4, 0x79, 0xf3, 0x07, 0xfd, 0x93, 0x02, 0x68, 0x74, 0xc6, 0xba, 0x7c,
+ 0x07, 0xfd, 0xe8, 0xc4, 0xb7, 0x9e, 0x07, 0xfd, 0xb8, 0xc4, 0xc6, 0x7a,
+ 0x07, 0xfd, 0xc1, 0xc6, 0xc6, 0x79, 0x07, 0xfd, 0xd0, 0xc6, 0xc1, 0x86,
+ 0x07, 0xfd, 0xe1, 0xc5, 0xc0, 0x7d, 0x07, 0xfd, 0x38, 0x87, 0x07, 0xfe,
+ 0x18, 0x83, 0x07, 0xfe, 0x23, 0x02, 0x68, 0x7a, 0x87, 0x07, 0xfe, 0x5b,
+ 0x02, 0x68, 0x7e, 0x91, 0x07, 0xfe, 0x91, 0x97, 0x07, 0xfe, 0xb9, 0x8b,
+ 0x07, 0xfe, 0xd8, 0x91, 0x07, 0xfe, 0x31, 0x97, 0x07, 0xfe, 0xd0, 0x87,
+ 0x07, 0xfe, 0x78, 0x83, 0x07, 0xfe, 0x6b, 0x02, 0x68, 0x82, 0x87, 0x07,
+ 0xfe, 0xab, 0x02, 0x68, 0x86, 0x8b, 0x07, 0xfe, 0xb0, 0x02, 0x42, 0x68,
+ 0x8a, 0xc2, 0x0c, 0x43, 0x0d, 0x80, 0x09, 0xc2, 0x14, 0x68, 0x0d, 0x88,
+ 0xf8, 0x19, 0xc2, 0x68, 0x96, 0x83, 0x01, 0x82, 0x09, 0x8b, 0x01, 0x82,
+ 0x19, 0x97, 0x01, 0x82, 0x29, 0x87, 0x01, 0x82, 0x39, 0x91, 0x01, 0x82,
+ 0x49, 0xc2, 0x00, 0x16, 0x01, 0x83, 0x19, 0x1b, 0xc2, 0x68, 0xa6, 0x0d,
+ 0x42, 0x68, 0xb2, 0xcd, 0x78, 0xcc, 0x0f, 0xdc, 0xb1, 0xc5, 0x01, 0xc2,
+ 0x0f, 0xdd, 0x88, 0xe0, 0x08, 0xa7, 0x0f, 0xdd, 0xa0, 0xc5, 0x68, 0x6e,
+ 0x01, 0x11, 0xf1, 0xc9, 0xaf, 0x4b, 0x01, 0x72, 0x2a, 0x02, 0x68, 0xba,
+ 0xc6, 0xca, 0xcd, 0x07, 0xff, 0xc9, 0xc9, 0x1b, 0x0a, 0x07, 0xff, 0xd1,
+ 0xca, 0x7c, 0x02, 0x07, 0xff, 0xd8, 0x43, 0x13, 0x6d, 0xc2, 0x68, 0xc0,
+ 0x46, 0x00, 0xd4, 0xc2, 0x68, 0xc6, 0x45, 0x00, 0x8c, 0x42, 0x68, 0xd2,
+ 0x42, 0x05, 0x1d, 0xc2, 0x68, 0xe4, 0xc7, 0x80, 0x70, 0x01, 0x50, 0xd9,
+ 0xcc, 0x06, 0xbb, 0x01, 0x50, 0xc9, 0xca, 0x9d, 0xb0, 0x01, 0x50, 0xc1,
+ 0xd9, 0x1f, 0x4a, 0x01, 0x50, 0xb9, 0xcd, 0x75, 0xa6, 0x01, 0x50, 0x70,
+ 0xd6, 0x30, 0xa6, 0x01, 0x50, 0xa9, 0xd1, 0x56, 0x40, 0x01, 0x50, 0x78,
+ 0xc3, 0x05, 0x14, 0x08, 0x5b, 0xc3, 0x02, 0x68, 0xf0, 0x16, 0xc2, 0x68,
+ 0xf4, 0xc4, 0x09, 0x9d, 0x08, 0x5b, 0xd8, 0x16, 0xc2, 0x69, 0x04, 0x15,
+ 0xc2, 0x69, 0x10, 0xc2, 0x00, 0x67, 0x08, 0x5b, 0x79, 0xc3, 0x20, 0x18,
+ 0x08, 0x5b, 0x69, 0xc8, 0xb9, 0x7a, 0x08, 0x5b, 0x61, 0xc6, 0xcf, 0xd7,
+ 0x08, 0x5b, 0x59, 0xc4, 0xe0, 0xe7, 0x08, 0x5b, 0x51, 0xc4, 0x4a, 0xb9,
+ 0x08, 0x5b, 0x49, 0xc2, 0x01, 0x7f, 0x08, 0x5b, 0x23, 0x02, 0x69, 0x1a,
+ 0xc5, 0x4a, 0xb3, 0x08, 0x5b, 0x31, 0xcd, 0x7e, 0x89, 0x08, 0x5b, 0x29,
+ 0xc6, 0x40, 0x9a, 0x08, 0x5b, 0x19, 0xc5, 0x9c, 0xa2, 0x08, 0x5b, 0x11,
+ 0xc4, 0xe3, 0x27, 0x08, 0x5b, 0x09, 0xc5, 0xa5, 0xfd, 0x08, 0x5b, 0x00,
+ 0xc3, 0x05, 0x14, 0x08, 0x5a, 0xc3, 0x02, 0x69, 0x20, 0x16, 0xc2, 0x69,
+ 0x24, 0xc4, 0x09, 0x9d, 0x08, 0x5a, 0xd8, 0x16, 0xc2, 0x69, 0x34, 0x15,
+ 0xc2, 0x69, 0x40, 0xc4, 0x5d, 0xe2, 0x08, 0x5a, 0x99, 0xc3, 0x00, 0x4e,
+ 0x08, 0x5a, 0x61, 0xc6, 0xcf, 0xd7, 0x08, 0x5a, 0x59, 0xc4, 0xe0, 0xe7,
+ 0x08, 0x5a, 0x51, 0xc4, 0x4a, 0xb9, 0x08, 0x5a, 0x49, 0xc2, 0x01, 0x7f,
+ 0x08, 0x5a, 0x23, 0x02, 0x69, 0x4a, 0xc5, 0x4a, 0xb3, 0x08, 0x5a, 0x31,
+ 0xc3, 0x7e, 0x89, 0x08, 0x5a, 0x29, 0xc6, 0x40, 0x9a, 0x08, 0x5a, 0x19,
+ 0xc5, 0x9c, 0xa2, 0x08, 0x5a, 0x11, 0xc4, 0xe3, 0x27, 0x08, 0x5a, 0x09,
+ 0x03, 0xc2, 0x69, 0x50, 0xc3, 0x20, 0x18, 0x08, 0x5a, 0x69, 0xc2, 0x00,
+ 0x67, 0x08, 0x5a, 0x81, 0xc4, 0xb9, 0x7e, 0x08, 0x5a, 0x90, 0xc3, 0x05,
+ 0x14, 0x00, 0x00, 0xf9, 0x16, 0xc2, 0x69, 0x5c, 0xc4, 0x09, 0x9d, 0x00,
+ 0x00, 0xe0, 0x4a, 0x0c, 0x8c, 0xc2, 0x69, 0x68, 0x49, 0x44, 0xee, 0xc2,
+ 0x69, 0x72, 0xc5, 0xdc, 0xa9, 0x0f, 0x65, 0x0b, 0x02, 0x69, 0x90, 0xc4,
+ 0x41, 0x55, 0x0f, 0x64, 0xf3, 0x02, 0x69, 0x96, 0xc4, 0x26, 0x78, 0x0f,
+ 0x63, 0xcb, 0x02, 0x69, 0x9c, 0xc5, 0x06, 0xdb, 0x0f, 0x63, 0xc3, 0x02,
+ 0x69, 0xa9, 0x15, 0xc2, 0x69, 0xb4, 0x08, 0xc2, 0x69, 0xc6, 0x16, 0xc2,
+ 0x69, 0xce, 0xc3, 0x05, 0x14, 0x0f, 0x63, 0x8a, 0x02, 0x69, 0xdf, 0xce,
+ 0x08, 0x73, 0x0f, 0x65, 0x79, 0x44, 0x05, 0x14, 0x42, 0x69, 0xe3, 0xc3,
+ 0x0d, 0x14, 0x0e, 0x9b, 0xb1, 0xc3, 0x09, 0x9e, 0x0e, 0x9b, 0xa8, 0xc4,
+ 0x02, 0xde, 0x0e, 0x9b, 0xa1, 0xc2, 0x02, 0xa0, 0x0e, 0x9b, 0x98, 0x0c,
+ 0xc2, 0x69, 0xef, 0xc8, 0xb6, 0x8a, 0x01, 0x96, 0x09, 0x42, 0x01, 0xc3,
+ 0xc2, 0x69, 0xf9, 0x03, 0xc2, 0x6a, 0x03, 0xc9, 0xa8, 0xee, 0x01, 0x96,
+ 0x41, 0xc7, 0xc9, 0x2d, 0x01, 0x96, 0x49, 0xc8, 0xbc, 0x22, 0x01, 0x96,
+ 0x51, 0x06, 0xc2, 0x6a, 0x0f, 0x45, 0xd6, 0x19, 0x42, 0x6a, 0x1b, 0xc5,
+ 0x00, 0x2c, 0x01, 0x7f, 0x81, 0xd0, 0x5d, 0x62, 0x01, 0x7f, 0x90, 0xc5,
+ 0x05, 0x02, 0x01, 0x7f, 0x89, 0xd0, 0x5d, 0x72, 0x01, 0x7f, 0x98, 0xc5,
+ 0x00, 0xd4, 0x01, 0x7f, 0xa9, 0xc5, 0x05, 0x02, 0x01, 0x7f, 0xb1, 0x0e,
+ 0xc2, 0x6a, 0x40, 0x46, 0x02, 0xae, 0x42, 0x6a, 0x4c, 0xc8, 0xbd, 0x1a,
+ 0x01, 0x8c, 0xa1, 0xc8, 0xb6, 0x72, 0x01, 0x8c, 0xd8, 0xc5, 0x01, 0xc2,
+ 0x01, 0x8c, 0xa9, 0xc7, 0x36, 0x55, 0x01, 0x8c, 0xe0, 0xc2, 0x00, 0xc4,
+ 0x08, 0x42, 0xdb, 0x02, 0x6a, 0x58, 0x19, 0xc2, 0x6a, 0x5e, 0xc4, 0x02,
+ 0xde, 0x08, 0x42, 0xd0, 0x00, 0x42, 0x6a, 0x68, 0xc2, 0x39, 0x8b, 0x08,
+ 0x42, 0xa9, 0xc3, 0x1e, 0x1b, 0x08, 0x42, 0x40, 0xc3, 0x11, 0xef, 0x08,
+ 0x42, 0xa1, 0x03, 0x42, 0x6a, 0x74, 0xc3, 0x16, 0x5a, 0x08, 0x42, 0x79,
+ 0xc4, 0x36, 0xb5, 0x08, 0x42, 0x00, 0xc2, 0x00, 0x8e, 0x08, 0x42, 0x38,
+ 0xca, 0xa7, 0x92, 0x0f, 0xd2, 0x43, 0x02, 0x6a, 0x80, 0xc4, 0xde, 0x83,
+ 0x01, 0x32, 0xb3, 0x02, 0x6a, 0x86, 0xc4, 0xe3, 0x93, 0x01, 0x32, 0xcb,
+ 0x02, 0x6a, 0x8c, 0x0d, 0xc2, 0x6a, 0x92, 0xc6, 0xca, 0xfd, 0x01, 0x32,
+ 0xbb, 0x02, 0x6a, 0xa4, 0xc5, 0xa8, 0xf7, 0x01, 0x32, 0xab, 0x02, 0x6a,
+ 0xaa, 0x47, 0x45, 0x86, 0x42, 0x6a, 0xb0, 0x00, 0x42, 0x6a, 0xcc, 0x46,
+ 0x00, 0x8b, 0x42, 0x6a, 0xd8, 0x03, 0xc2, 0x6a, 0xe4, 0xc5, 0xc2, 0xc2,
+ 0x01, 0x59, 0x08, 0xc7, 0xc6, 0xef, 0x01, 0x4e, 0xb1, 0xd0, 0x5a, 0x62,
+ 0x01, 0x59, 0x68, 0x00, 0x42, 0x6a, 0xf3, 0x00, 0x42, 0x6b, 0x05, 0xca,
+ 0x82, 0xd3, 0x01, 0x31, 0xd1, 0x44, 0x03, 0x15, 0x42, 0x6b, 0x14, 0xc9,
+ 0x8e, 0x0a, 0x0f, 0xaa, 0x31, 0xca, 0x9d, 0x1a, 0x01, 0x58, 0xe0, 0x00,
+ 0xc2, 0x6b, 0x1e, 0x4a, 0x01, 0xa9, 0x42, 0x6b, 0x2a, 0xe0, 0x0a, 0xc7,
+ 0x0f, 0xbd, 0x00, 0x00, 0x42, 0x6b, 0x3c, 0xc4, 0x5b, 0x26, 0x01, 0x36,
+ 0x09, 0xc3, 0x12, 0xb8, 0x01, 0x36, 0x00, 0x4a, 0x03, 0x3d, 0xc2, 0x6b,
+ 0x54, 0x4a, 0x01, 0xa9, 0x42, 0x6b, 0x66, 0x46, 0x01, 0x94, 0xc2, 0x6b,
+ 0x72, 0xc7, 0xc4, 0x80, 0x01, 0x1f, 0x10, 0x11, 0xc2, 0x6b, 0x78, 0xc2,
+ 0x00, 0xb3, 0x01, 0x34, 0x82, 0x02, 0x6b, 0x84, 0xc4, 0x0e, 0x6a, 0x01,
+ 0x39, 0x39, 0xc4, 0x11, 0xa4, 0x01, 0x5e, 0x70, 0x4a, 0x03, 0x3d, 0xc2,
+ 0x6b, 0x8a, 0x4a, 0x01, 0xa9, 0x42, 0x6b, 0x96, 0xc5, 0x06, 0x82, 0x01,
+ 0x30, 0xe9, 0xce, 0x24, 0xd5, 0x0f, 0xa2, 0x30, 0xc8, 0x01, 0x92, 0x01,
+ 0x2d, 0x9b, 0x02, 0x6b, 0xa6, 0xce, 0x6c, 0x8a, 0x01, 0x2d, 0xa9, 0xc7,
+ 0xc6, 0x6a, 0x0f, 0xde, 0x50, 0x15, 0xc2, 0x6b, 0xac, 0xc7, 0x3a, 0x19,
+ 0x01, 0x59, 0x31, 0xc7, 0x0a, 0xe0, 0x01, 0x59, 0x40, 0xc4, 0x2b, 0xf1,
+ 0x0f, 0x9f, 0x89, 0xc5, 0xbb, 0xcd, 0x01, 0x59, 0x00, 0xc9, 0x46, 0x70,
+ 0x01, 0x2d, 0x79, 0xc3, 0x01, 0x5d, 0x01, 0x57, 0xf1, 0xc7, 0x5a, 0x6b,
+ 0x01, 0x59, 0x78, 0xc4, 0x18, 0x10, 0x0f, 0x17, 0xb9, 0xc2, 0x22, 0xcc,
+ 0x0f, 0x17, 0xb0, 0xc3, 0x0d, 0x14, 0x0f, 0x17, 0xa9, 0xc3, 0x09, 0x9e,
+ 0x0f, 0x17, 0xa0, 0xc4, 0x02, 0xde, 0x0f, 0x17, 0x99, 0xc2, 0x02, 0xa0,
+ 0x0f, 0x17, 0x90, 0xc2, 0x00, 0xec, 0x0f, 0x17, 0x78, 0xc2, 0x00, 0xec,
+ 0x0f, 0x17, 0x68, 0xc2, 0x14, 0x77, 0x0f, 0x17, 0x59, 0x83, 0x0f, 0x16,
+ 0x30, 0xc2, 0x00, 0xc4, 0x0f, 0x17, 0x50, 0xc2, 0x19, 0x2c, 0x0f, 0x17,
+ 0x49, 0xc2, 0x01, 0x30, 0x0f, 0x16, 0xe9, 0x83, 0x0f, 0x16, 0x48, 0x83,
+ 0x0f, 0x16, 0x03, 0x02, 0x6b, 0xbe, 0xc2, 0x00, 0x75, 0x0f, 0x17, 0x21,
+ 0x97, 0x0f, 0x16, 0xb0, 0x90, 0x0f, 0x17, 0x38, 0x90, 0x0f, 0x17, 0x32,
+ 0x02, 0x6b, 0xc5, 0xc2, 0x00, 0x75, 0x0f, 0x17, 0x28, 0xc2, 0x00, 0x39,
+ 0x0f, 0x17, 0x09, 0xc2, 0x0d, 0xf6, 0x0f, 0x17, 0x01, 0xc2, 0x00, 0xd0,
+ 0x0f, 0x16, 0x61, 0x83, 0x0f, 0x16, 0x58, 0xc3, 0x64, 0x58, 0x0f, 0x16,
+ 0xf9, 0x83, 0x0f, 0x16, 0x40, 0xc2, 0x00, 0xd0, 0x0f, 0x16, 0xc9, 0x83,
+ 0x0f, 0x16, 0xa0, 0xc2, 0x00, 0xd0, 0x0f, 0x16, 0x79, 0x83, 0x0f, 0x16,
+ 0x70, 0x83, 0x0f, 0x16, 0x51, 0xc2, 0x00, 0xd0, 0x0f, 0x16, 0x38, 0xc6,
+ 0x18, 0x10, 0x08, 0xc7, 0x81, 0xc4, 0xd2, 0x1d, 0x08, 0xc7, 0x78, 0xc4,
+ 0x45, 0x6a, 0x08, 0xc7, 0x71, 0xc4, 0x4a, 0x2e, 0x08, 0xc7, 0x68, 0xc5,
+ 0x0d, 0x0d, 0x08, 0xc7, 0x61, 0xc5, 0x28, 0xee, 0x08, 0xc7, 0x59, 0xc2,
+ 0x00, 0xc4, 0x08, 0xc7, 0x50, 0xc4, 0x18, 0x10, 0x08, 0xc7, 0x39, 0xc2,
+ 0x22, 0xcc, 0x08, 0xc7, 0x30, 0xc3, 0x0d, 0x14, 0x08, 0xc7, 0x29, 0xc3,
+ 0x09, 0x9e, 0x08, 0xc7, 0x20, 0xc4, 0x02, 0xde, 0x08, 0xc7, 0x19, 0xc2,
+ 0x02, 0xa0, 0x08, 0xc7, 0x10, 0xc2, 0x25, 0x9f, 0x08, 0xc6, 0xf1, 0xc3,
+ 0xe5, 0xed, 0x08, 0xc6, 0xe8, 0xc2, 0x00, 0xb1, 0x08, 0xc6, 0xe1, 0x11,
+ 0xc2, 0x6b, 0xc9, 0xc3, 0xbe, 0x83, 0x08, 0xc6, 0xc8, 0x8f, 0x08, 0xc6,
+ 0xb1, 0x96, 0x08, 0xc6, 0xa9, 0xc2, 0x00, 0x75, 0x08, 0xc6, 0x50, 0xc3,
+ 0x38, 0x86, 0x08, 0xc6, 0x99, 0xc3, 0x4f, 0x37, 0x08, 0xc6, 0x00, 0xc2,
+ 0x04, 0xcd, 0x08, 0xc6, 0x88, 0x10, 0x42, 0x6b, 0xd5, 0x85, 0x08, 0xc6,
+ 0x79, 0x97, 0x08, 0xc6, 0x38, 0x97, 0x08, 0xc6, 0x1b, 0x02, 0x6b, 0xdd,
+ 0x91, 0x08, 0xc6, 0x29, 0x83, 0x08, 0xc6, 0x20, 0xc2, 0x25, 0x9f, 0x08,
+ 0xc5, 0xf1, 0xc3, 0xe5, 0xed, 0x08, 0xc5, 0xe8, 0xc2, 0x00, 0xb1, 0x08,
+ 0xc5, 0xe1, 0x11, 0xc2, 0x6b, 0xe1, 0xc3, 0xbe, 0x83, 0x08, 0xc5, 0xc8,
+ 0x8f, 0x08, 0xc5, 0xb1, 0x96, 0x08, 0xc5, 0xa9, 0xc2, 0x00, 0x75, 0x08,
+ 0xc5, 0x50, 0xc3, 0x38, 0x86, 0x08, 0xc5, 0x99, 0xc3, 0x4f, 0x37, 0x08,
+ 0xc5, 0x00, 0xc2, 0x04, 0xcd, 0x08, 0xc5, 0x88, 0x10, 0x42, 0x6b, 0xed,
+ 0x85, 0x08, 0xc5, 0x79, 0x97, 0x08, 0xc5, 0x38, 0x97, 0x08, 0xc5, 0x1b,
+ 0x02, 0x6b, 0xf5, 0x91, 0x08, 0xc5, 0x29, 0x83, 0x08, 0xc5, 0x20, 0xd3,
+ 0x46, 0x7d, 0x01, 0x39, 0x29, 0x43, 0x00, 0xbf, 0x42, 0x6b, 0xf9, 0xc4,
+ 0x01, 0xc3, 0x01, 0x02, 0xd9, 0xcb, 0x05, 0x1c, 0x01, 0x02, 0xc0, 0x12,
+ 0xc2, 0x6b, 0xff, 0xcc, 0x88, 0x1d, 0x0f, 0xc8, 0xa9, 0x16, 0xc2, 0x6c,
+ 0x11, 0x11, 0xc2, 0x6c, 0x1d, 0xcf, 0x60, 0x99, 0x0f, 0xb2, 0x29, 0xcc,
+ 0x87, 0x75, 0x0f, 0xb2, 0x21, 0xd0, 0x5a, 0xf2, 0x0f, 0xb0, 0xdb, 0x02,
+ 0x6c, 0x2f, 0x42, 0x00, 0x99, 0xc2, 0x6c, 0x35, 0xcf, 0x67, 0x0b, 0x0f,
+ 0xb1, 0x21, 0x0f, 0xc2, 0x6c, 0x41, 0xdb, 0x17, 0x7c, 0x0f, 0xc9, 0x59,
+ 0xda, 0x1b, 0xea, 0x0f, 0xcb, 0xa1, 0xce, 0x6d, 0x6a, 0x0f, 0xd7, 0x20,
+ 0xcf, 0x36, 0xc5, 0x01, 0x49, 0x61, 0xd0, 0x20, 0x66, 0x01, 0x49, 0x78,
+ 0xc4, 0x26, 0x78, 0x07, 0xf8, 0xc9, 0xc4, 0x15, 0xe7, 0x07, 0xf8, 0x81,
+ 0xc3, 0x05, 0x14, 0x07, 0xf8, 0x89, 0x16, 0xc2, 0x6c, 0x4d, 0x08, 0xc2,
+ 0x6c, 0x59, 0x15, 0xc2, 0x6c, 0x65, 0xc5, 0x06, 0xdb, 0x07, 0xf8, 0xc0,
+ 0xc3, 0x0d, 0xe5, 0x07, 0xf8, 0xd1, 0x42, 0x0a, 0x8c, 0x42, 0x6c, 0x71,
+ 0xcc, 0x8b, 0x11, 0x07, 0xf8, 0xe1, 0x43, 0x00, 0x4b, 0x42, 0x6c, 0x7b,
+ 0x4f, 0x0b, 0x17, 0xc2, 0x6c, 0x93, 0x4d, 0x29, 0xb9, 0x42, 0x6c, 0xfb,
+ 0xce, 0x25, 0xad, 0x07, 0xf9, 0xe9, 0xcd, 0x00, 0x32, 0x07, 0xfa, 0xe9,
+ 0xd1, 0x4f, 0x7a, 0x07, 0xfb, 0x01, 0xcb, 0x1a, 0x50, 0x07, 0xf8, 0x48,
+ 0xc9, 0xb2, 0xa2, 0x0f, 0x98, 0xd9, 0xc6, 0x00, 0x91, 0x0f, 0x98, 0x98,
+ 0x44, 0x1a, 0xce, 0xc2, 0x6d, 0x63, 0xc3, 0x01, 0xe2, 0x0b, 0x79, 0x90,
+ 0xa5, 0x0b, 0x7c, 0xc9, 0xa4, 0x0b, 0x7c, 0xc1, 0xa3, 0x0b, 0x7c, 0xb9,
+ 0xa2, 0x0b, 0x7c, 0xb1, 0xa1, 0x0b, 0x7c, 0xa9, 0xa0, 0x0b, 0x7c, 0xa1,
+ 0x9f, 0x0b, 0x7c, 0x98, 0x87, 0x0b, 0x7a, 0x49, 0x83, 0x0b, 0x79, 0xb9,
+ 0xc2, 0x00, 0xd0, 0x0b, 0x79, 0x71, 0xc2, 0x0d, 0xf6, 0x0b, 0x79, 0x50,
+ 0xc2, 0x19, 0x2c, 0x0b, 0x78, 0xe1, 0x83, 0x0b, 0x78, 0xd0, 0xca, 0x56,
+ 0xca, 0x0b, 0x7a, 0x80, 0xc2, 0x00, 0xd0, 0x0b, 0x79, 0x69, 0x83, 0x0b,
+ 0x79, 0x60, 0xc2, 0x00, 0xd0, 0x0b, 0x79, 0x21, 0x83, 0x0b, 0x79, 0x18,
+ 0xc2, 0x00, 0xd0, 0x0b, 0x78, 0xa9, 0x83, 0x0b, 0x78, 0xa0, 0xc2, 0x16,
+ 0x5a, 0x0b, 0x7a, 0x39, 0x83, 0x0b, 0x79, 0xc1, 0xc2, 0x00, 0xd0, 0x0b,
+ 0x79, 0x79, 0xc2, 0x02, 0x1c, 0x0b, 0x79, 0x58, 0xc2, 0x19, 0x2c, 0x0b,
+ 0x78, 0xe9, 0x83, 0x0b, 0x78, 0xd8, 0xc3, 0x90, 0x65, 0x0b, 0x79, 0xf9,
+ 0x10, 0xc2, 0x6d, 0x7b, 0xc2, 0x01, 0xc3, 0x0b, 0x78, 0x30, 0x15, 0xc2,
+ 0x6d, 0x85, 0xc2, 0x19, 0x2c, 0x0b, 0x7a, 0x01, 0x83, 0x0b, 0x79, 0xe8,
+ 0x83, 0x0b, 0x79, 0xe1, 0xc2, 0x00, 0xd0, 0x0b, 0x79, 0xb0, 0x15, 0xc2,
+ 0x6d, 0x8f, 0x83, 0x0b, 0x78, 0x69, 0xc2, 0x01, 0x6f, 0x0b, 0x78, 0x60,
+ 0xc2, 0x00, 0xd0, 0x0b, 0x79, 0x49, 0x83, 0x0b, 0x79, 0x40, 0xc2, 0x19,
+ 0x2c, 0x0b, 0x78, 0xc9, 0x83, 0x0b, 0x78, 0xc0, 0x90, 0x0b, 0x7b, 0x62,
+ 0x02, 0x6d, 0x99, 0xc2, 0x00, 0x75, 0x0b, 0x7c, 0x30, 0x90, 0x0b, 0x7b,
+ 0x1a, 0x02, 0x6d, 0x9d, 0x94, 0x0b, 0x7b, 0xa8, 0x89, 0x0b, 0x7a, 0xf8,
+ 0x94, 0x0b, 0x7c, 0x11, 0x9b, 0x0b, 0x7b, 0x00, 0x87, 0x0b, 0x7b, 0xa0,
+ 0x89, 0x0b, 0x7a, 0xc0, 0x00, 0x42, 0x6d, 0xa1, 0xcd, 0x0e, 0x61, 0x0f,
+ 0xbe, 0x19, 0xca, 0x22, 0x51, 0x0f, 0xbe, 0x08, 0xc6, 0x0b, 0x09, 0x0f,
+ 0xbc, 0x79, 0xc6, 0x02, 0xd1, 0x01, 0x35, 0x50, 0xd0, 0x5c, 0x62, 0x0f,
+ 0xbc, 0x29, 0xcb, 0x85, 0x72, 0x01, 0x35, 0x58, 0x00, 0xc2, 0x6d, 0xad,
+ 0xe0, 0x0b, 0x87, 0x01, 0x3b, 0x68, 0x00, 0xc2, 0x6d, 0xb9, 0xe0, 0x0b,
+ 0x87, 0x01, 0x3b, 0x60, 0x49, 0x35, 0x21, 0xc2, 0x6d, 0xc5, 0xd3, 0x3c,
+ 0xb5, 0x0f, 0xbd, 0x81, 0x4c, 0x0e, 0x55, 0x42, 0x6d, 0xd1, 0xd1, 0x52,
+ 0x11, 0x01, 0x35, 0x61, 0xc4, 0x01, 0xe3, 0x01, 0x2c, 0x91, 0xc6, 0x13,
+ 0x52, 0x0f, 0xbd, 0x51, 0x43, 0x4d, 0x57, 0x42, 0x6d, 0xdd, 0xcf, 0x15,
+ 0x36, 0x0f, 0xbd, 0xe1, 0xd2, 0x22, 0x49, 0x0f, 0xbe, 0x70, 0x9b, 0x0b,
+ 0x73, 0xfb, 0x02, 0x6d, 0xe9, 0x83, 0x0b, 0x73, 0x6b, 0x02, 0x6d, 0xed,
+ 0x91, 0x0b, 0x73, 0xeb, 0x02, 0x6d, 0xf7, 0x94, 0x0b, 0x73, 0xe1, 0x90,
+ 0x0b, 0x73, 0xdb, 0x02, 0x6d, 0xfb, 0x86, 0x0b, 0x73, 0xc9, 0x9a, 0x0b,
+ 0x73, 0xc1, 0x8a, 0x0b, 0x73, 0xb3, 0x02, 0x6e, 0x03, 0x93, 0x0b, 0x73,
+ 0xa9, 0x8e, 0x0b, 0x73, 0xa1, 0x97, 0x0b, 0x73, 0x91, 0x85, 0x0b, 0x73,
+ 0x89, 0x84, 0x0b, 0x73, 0x81, 0x87, 0x0b, 0x73, 0x79, 0x8c, 0x0b, 0x73,
+ 0x71, 0x8d, 0x0b, 0x73, 0x63, 0x02, 0x6e, 0x07, 0x8b, 0x0b, 0x73, 0x59,
+ 0x88, 0x0b, 0x73, 0x51, 0x89, 0x0b, 0x73, 0x49, 0x96, 0x0b, 0x73, 0x41,
+ 0x92, 0x0b, 0x73, 0x39, 0x9c, 0x0b, 0x73, 0x29, 0x99, 0x0b, 0x73, 0x19,
+ 0x98, 0x0b, 0x73, 0x11, 0x95, 0x0b, 0x73, 0x09, 0x8f, 0x0b, 0x73, 0x00,
+ 0x9b, 0x0b, 0x72, 0xfb, 0x02, 0x6e, 0x0b, 0x83, 0x0b, 0x72, 0x6b, 0x02,
+ 0x6e, 0x0f, 0x91, 0x0b, 0x72, 0xeb, 0x02, 0x6e, 0x19, 0x94, 0x0b, 0x72,
+ 0xe1, 0x90, 0x0b, 0x72, 0xdb, 0x02, 0x6e, 0x1d, 0x86, 0x0b, 0x72, 0xc9,
+ 0x9a, 0x0b, 0x72, 0xc1, 0x8a, 0x0b, 0x72, 0xb3, 0x02, 0x6e, 0x25, 0x93,
+ 0x0b, 0x72, 0xa9, 0x8e, 0x0b, 0x72, 0xa1, 0x97, 0x0b, 0x72, 0x91, 0x85,
+ 0x0b, 0x72, 0x89, 0x84, 0x0b, 0x72, 0x81, 0x87, 0x0b, 0x72, 0x79, 0x8c,
+ 0x0b, 0x72, 0x71, 0x8d, 0x0b, 0x72, 0x63, 0x02, 0x6e, 0x29, 0x8b, 0x0b,
+ 0x72, 0x59, 0x88, 0x0b, 0x72, 0x51, 0x89, 0x0b, 0x72, 0x49, 0x96, 0x0b,
+ 0x72, 0x41, 0x92, 0x0b, 0x72, 0x39, 0x9c, 0x0b, 0x72, 0x29, 0x99, 0x0b,
+ 0x72, 0x19, 0x98, 0x0b, 0x72, 0x11, 0x95, 0x0b, 0x72, 0x09, 0x8f, 0x0b,
+ 0x72, 0x00, 0xc4, 0x02, 0xde, 0x0b, 0x74, 0x1b, 0x02, 0x6e, 0x2d, 0xc2,
+ 0x02, 0xa0, 0x0b, 0x74, 0x12, 0x02, 0x6e, 0x33, 0xcf, 0x6b, 0x25, 0x0b,
+ 0x74, 0xa0, 0xc4, 0x18, 0x10, 0x0b, 0x74, 0x39, 0xc2, 0x22, 0xcc, 0x0b,
+ 0x74, 0x30, 0xc3, 0x0d, 0x14, 0x0b, 0x74, 0x29, 0xc3, 0x09, 0x9e, 0x0b,
+ 0x74, 0x20, 0xc7, 0x1f, 0x6e, 0x0b, 0x74, 0x91, 0xc5, 0x66, 0xb1, 0x0b,
+ 0x74, 0x58, 0xc8, 0x48, 0x23, 0x0b, 0x74, 0x89, 0xc6, 0x44, 0x9c, 0x0b,
+ 0x74, 0x80, 0xc6, 0x14, 0x07, 0x0b, 0x74, 0x79, 0xc7, 0x34, 0x37, 0x0b,
+ 0x74, 0x70, 0xc7, 0x52, 0xcc, 0x0b, 0x74, 0x69, 0xc5, 0x22, 0x43, 0x0b,
+ 0x74, 0x61, 0xc2, 0x00, 0xc4, 0x0b, 0x74, 0x50, 0xc6, 0x06, 0xaf, 0x01,
+ 0x1e, 0xb1, 0xc9, 0x67, 0xa7, 0x01, 0x1e, 0xa8, 0x24, 0xc2, 0x6e, 0x39,
+ 0x25, 0xc2, 0x6e, 0x75, 0x1f, 0xc2, 0x6e, 0xb1, 0x1e, 0xc2, 0x6e, 0xed,
+ 0x26, 0xc2, 0x6f, 0x29, 0x22, 0xc2, 0x6f, 0x65, 0x1d, 0xc2, 0x6f, 0xa1,
+ 0x21, 0xc2, 0x6f, 0xd7, 0x23, 0xc2, 0x70, 0x13, 0x20, 0x42, 0x70, 0x4f,
+ 0x26, 0xc2, 0x70, 0x8b, 0x20, 0xc2, 0x70, 0xbb, 0x1e, 0xc2, 0x70, 0xf7,
+ 0x23, 0xc2, 0x71, 0x33, 0x24, 0xc2, 0x71, 0x6f, 0x21, 0xc2, 0x71, 0xab,
+ 0x1d, 0xc2, 0x71, 0xe7, 0x22, 0xc2, 0x72, 0x23, 0x25, 0xc2, 0x72, 0x5f,
+ 0x1f, 0x42, 0x72, 0x9b, 0xc2, 0x02, 0xa0, 0x0f, 0x46, 0x41, 0xc4, 0x02,
+ 0xde, 0x0f, 0x46, 0x48, 0xc3, 0x09, 0x9e, 0x0f, 0x46, 0x51, 0xc3, 0x0d,
+ 0x14, 0x0f, 0x46, 0x58, 0xc2, 0x22, 0xcc, 0x0f, 0x46, 0x61, 0xc4, 0x18,
+ 0x10, 0x0f, 0x46, 0x68, 0x07, 0xc2, 0x72, 0xd7, 0xc8, 0x4b, 0x95, 0x0f,
+ 0x46, 0x98, 0x95, 0x0f, 0x46, 0x91, 0xca, 0xa2, 0x92, 0x0f, 0x46, 0xa8,
+ 0x16, 0xc2, 0x72, 0xe1, 0xcd, 0x76, 0xf8, 0x08, 0x4f, 0xf1, 0x07, 0xc2,
+ 0x72, 0xf3, 0x15, 0xc2, 0x72, 0xff, 0x08, 0xc2, 0x73, 0x0b, 0x44, 0x05,
+ 0x14, 0x42, 0x73, 0x17, 0xc4, 0x26, 0x78, 0x08, 0x4e, 0x43, 0x02, 0x73,
+ 0x23, 0xc5, 0x06, 0xdb, 0x08, 0x4e, 0x3b, 0x02, 0x73, 0x2d, 0x15, 0xc2,
+ 0x73, 0x37, 0x08, 0xc2, 0x73, 0x49, 0x16, 0xc2, 0x73, 0x51, 0xc3, 0x05,
+ 0x14, 0x08, 0x4e, 0x02, 0x02, 0x73, 0x62, 0x48, 0x3f, 0x14, 0xc2, 0x73,
+ 0x66, 0x46, 0x02, 0x0f, 0x42, 0x73, 0x72, 0xc2, 0xe5, 0xfd, 0x08, 0x4c,
+ 0xf8, 0xc2, 0x0e, 0x9a, 0x08, 0x4c, 0xe9, 0x16, 0xc2, 0x73, 0xd1, 0xc2,
+ 0x0f, 0x9a, 0x08, 0x4c, 0xb9, 0x0d, 0xc2, 0x73, 0xe3, 0x15, 0xc2, 0x73,
+ 0xed, 0xc3, 0xe6, 0x71, 0x08, 0x4c, 0x91, 0x83, 0x08, 0x4c, 0x01, 0x87,
+ 0x08, 0x4c, 0x09, 0x8b, 0x08, 0x4c, 0x11, 0x91, 0x08, 0x4c, 0x19, 0xc2,
+ 0x19, 0x2c, 0x08, 0x4c, 0x21, 0xc2, 0x01, 0x4a, 0x08, 0x4c, 0x29, 0xc2,
+ 0x01, 0x5d, 0x08, 0x4c, 0x33, 0x02, 0x73, 0xf8, 0xc2, 0x00, 0xb0, 0x08,
+ 0x4c, 0x41, 0xc2, 0x01, 0xc3, 0x08, 0x4c, 0x49, 0x10, 0xc2, 0x73, 0xfe,
+ 0xc2, 0x00, 0x39, 0x08, 0x4c, 0x73, 0x02, 0x74, 0x0c, 0xc2, 0x00, 0xdb,
+ 0x08, 0x4c, 0x80, 0x47, 0x22, 0x04, 0xc2, 0x74, 0x12, 0xcc, 0x8b, 0x4d,
+ 0x01, 0x4c, 0xd8, 0xc3, 0x7f, 0x18, 0x05, 0x5f, 0x29, 0x03, 0xc2, 0x74,
+ 0x18, 0x97, 0x05, 0x57, 0x70, 0xc3, 0x7f, 0x18, 0x05, 0x5f, 0x21, 0x8b,
+ 0x05, 0x57, 0x58, 0x97, 0x05, 0x57, 0x61, 0xc3, 0x7f, 0x18, 0x05, 0x5f,
+ 0x40, 0xc7, 0xc9, 0xe3, 0x05, 0x5f, 0x10, 0xc3, 0x71, 0x83, 0x05, 0x5e,
+ 0x4b, 0x02, 0x74, 0x20, 0x83, 0x05, 0x5e, 0x2b, 0x02, 0x74, 0x26, 0xc2,
+ 0x00, 0xc1, 0x05, 0x57, 0x41, 0xc2, 0x19, 0x2c, 0x05, 0x57, 0x18, 0xc2,
+ 0x00, 0x71, 0x05, 0x5e, 0x3b, 0x02, 0x74, 0x2c, 0x16, 0xc2, 0x74, 0x32,
+ 0xc3, 0x18, 0xb0, 0x05, 0x5e, 0x50, 0x83, 0x05, 0x5e, 0x23, 0x02, 0x74,
+ 0x3c, 0xc3, 0x08, 0x09, 0x05, 0x5e, 0x80, 0xc2, 0x01, 0x25, 0x05, 0x5e,
+ 0x03, 0x02, 0x74, 0x42, 0xc3, 0x18, 0xb0, 0x05, 0x5e, 0x40, 0xc3, 0x08,
+ 0x09, 0x05, 0x5e, 0xd1, 0x83, 0x05, 0x5e, 0xa8, 0xc3, 0x18, 0xb0, 0x05,
+ 0x5e, 0xc9, 0x06, 0xc2, 0x74, 0x48, 0xc2, 0x00, 0x71, 0x05, 0x5e, 0xb8,
+ 0xc3, 0x18, 0xb0, 0x05, 0x5e, 0xc1, 0xc2, 0x01, 0x25, 0x05, 0x5e, 0x90,
+ 0xc2, 0x0d, 0xf6, 0x05, 0x57, 0x51, 0xc2, 0x00, 0xd0, 0x05, 0x57, 0x49,
+ 0xc2, 0x00, 0xc2, 0x05, 0x5e, 0x08, 0x83, 0x05, 0x57, 0x11, 0xc2, 0x00,
+ 0x71, 0x05, 0x5e, 0x30, 0xc7, 0xc9, 0xe3, 0x05, 0x5e, 0xe8, 0xc7, 0xc9,
+ 0xe3, 0x05, 0x5e, 0xe0, 0xc3, 0x08, 0x09, 0x05, 0x5e, 0x99, 0xc2, 0x00,
+ 0x71, 0x05, 0x5e, 0xb0, 0xc9, 0xb1, 0xc1, 0x0f, 0xb5, 0xa9, 0xc7, 0x61,
+ 0x82, 0x0f, 0xb4, 0xf1, 0xc8, 0xb7, 0xaa, 0x0f, 0xb5, 0x00, 0xc2, 0x00,
+ 0x74, 0x01, 0x34, 0x59, 0xc3, 0x01, 0x95, 0x01, 0x34, 0x50, 0xe0, 0x01,
+ 0x27, 0x08, 0xb3, 0x60, 0x46, 0x00, 0x8b, 0x42, 0x74, 0x52, 0xcf, 0x01,
+ 0x38, 0x08, 0xb3, 0x31, 0xc8, 0x00, 0xbf, 0x08, 0xb3, 0x28, 0xcf, 0x01,
+ 0x38, 0x08, 0xb3, 0x21, 0xc8, 0x00, 0xbf, 0x08, 0xb3, 0x00, 0xc4, 0x26,
+ 0x78, 0x00, 0xc0, 0xc9, 0xc5, 0x06, 0xdb, 0x00, 0xc0, 0xc1, 0x15, 0xc2,
+ 0x74, 0x5e, 0x08, 0xc2, 0x74, 0x6a, 0x16, 0xc2, 0x74, 0x76, 0xc3, 0x05,
+ 0x14, 0x00, 0xc0, 0x89, 0xc4, 0x15, 0xe7, 0x00, 0xc0, 0x80, 0x45, 0xc2,
+ 0x59, 0x42, 0x74, 0x82, 0x48, 0xb1, 0x71, 0xc2, 0x74, 0xa4, 0xc2, 0x00,
+ 0x75, 0x00, 0xc1, 0x48, 0x44, 0x62, 0x5b, 0xc2, 0x74, 0xf0, 0xc2, 0x0d,
+ 0xf6, 0x00, 0xc1, 0xe1, 0x83, 0x00, 0xc1, 0x90, 0x83, 0x00, 0xc1, 0xa3,
+ 0x02, 0x75, 0x61, 0x8b, 0x00, 0xc2, 0x10, 0x44, 0x14, 0x85, 0xc2, 0x75,
+ 0x67, 0xc2, 0x00, 0xd0, 0x00, 0xc1, 0x89, 0x83, 0x00, 0xc1, 0x80, 0xc2,
+ 0x00, 0x0a, 0x00, 0xc2, 0x09, 0xc2, 0x00, 0x39, 0x00, 0xc1, 0xf9, 0x83,
+ 0x00, 0xc1, 0xe8, 0xc2, 0x00, 0xd0, 0x00, 0xc2, 0x01, 0x83, 0x00, 0xc1,
+ 0x78, 0xc2, 0x00, 0xd0, 0x00, 0xc1, 0xd9, 0x83, 0x00, 0xc1, 0xd0, 0x87,
+ 0x00, 0xc1, 0x38, 0x87, 0x00, 0xc1, 0x30, 0x87, 0x00, 0xc1, 0x28, 0xc4,
+ 0x09, 0x9d, 0x00, 0xc0, 0x79, 0x16, 0xc2, 0x75, 0xc9, 0xc3, 0x05, 0x14,
+ 0x00, 0xc0, 0x58, 0x45, 0x09, 0x98, 0xc2, 0x75, 0xd5, 0xcb, 0x97, 0xf5,
+ 0x08, 0xb2, 0x11, 0xc4, 0x19, 0x53, 0x08, 0xb2, 0x08, 0xc4, 0xe3, 0x83,
+ 0x08, 0xb2, 0x21, 0x03, 0xc2, 0x75, 0xf9, 0x42, 0x07, 0xb2, 0x42, 0x76,
+ 0x05, 0x03, 0xc2, 0x76, 0x11, 0x91, 0x08, 0xb1, 0xd9, 0x87, 0x08, 0xb1,
+ 0xc9, 0x48, 0xb2, 0x2d, 0xc2, 0x76, 0x1d, 0x97, 0x08, 0xb1, 0x9b, 0x02,
+ 0x76, 0x2b, 0x8b, 0x08, 0xb1, 0x8a, 0x02, 0x76, 0x2f, 0x0e, 0xc2, 0x76,
+ 0x33, 0xc2, 0x00, 0xd0, 0x08, 0xb1, 0x71, 0x15, 0xc2, 0x76, 0x3d, 0x18,
+ 0xc2, 0x76, 0x4d, 0xc2, 0x00, 0x39, 0x08, 0xb1, 0x41, 0xc2, 0x19, 0x2c,
+ 0x08, 0xb1, 0x39, 0xc2, 0x01, 0xc3, 0x08, 0xb1, 0x31, 0x04, 0xc2, 0x76,
+ 0x57, 0x12, 0xc2, 0x76, 0x61, 0x10, 0xc2, 0x76, 0x6b, 0x06, 0xc2, 0x76,
+ 0x81, 0x16, 0xc2, 0x76, 0x8f, 0x0c, 0xc2, 0x76, 0x9d, 0x05, 0xc2, 0x76,
+ 0xa7, 0x09, 0xc2, 0x76, 0xb1, 0x0d, 0xc2, 0x76, 0xbb, 0x83, 0x08, 0xb0,
+ 0x03, 0x02, 0x76, 0xc5, 0x91, 0x08, 0xb0, 0x61, 0x87, 0x08, 0xb0, 0x51,
+ 0x97, 0x08, 0xb0, 0x23, 0x02, 0x76, 0xd1, 0x8b, 0x08, 0xb0, 0x12, 0x02,
+ 0x76, 0xd5, 0x15, 0xc2, 0x76, 0xd9, 0x05, 0xc2, 0x76, 0xef, 0x14, 0xc2,
+ 0x77, 0x19, 0x0e, 0xc2, 0x77, 0x2f, 0x09, 0xc2, 0x77, 0x41, 0x04, 0xc2,
+ 0x77, 0x56, 0x06, 0xc2, 0x77, 0x62, 0x03, 0xc2, 0x77, 0x6c, 0x12, 0xc2,
+ 0x77, 0x7e, 0x16, 0xc2, 0x77, 0x8a, 0x17, 0xc2, 0x77, 0x96, 0x18, 0xc2,
+ 0x77, 0xa6, 0x0f, 0xc2, 0x77, 0xb2, 0x07, 0xc2, 0x77, 0xbc, 0x0a, 0xc2,
+ 0x77, 0xc8, 0x1b, 0xc2, 0x77, 0xd4, 0xca, 0x9c, 0xf2, 0x00, 0x17, 0xf0,
+ 0x89, 0x0e, 0xa1, 0xd3, 0x02, 0x77, 0xe0, 0x88, 0x0e, 0xa1, 0xc9, 0x87,
+ 0x0e, 0xa1, 0xc3, 0x02, 0x77, 0xe6, 0x86, 0x0e, 0xa1, 0xbb, 0x02, 0x77,
+ 0xf2, 0x85, 0x0e, 0xa1, 0xb3, 0x02, 0x77, 0xf8, 0x84, 0x0e, 0xa1, 0xab,
+ 0x02, 0x77, 0xfe, 0x83, 0x0e, 0xa1, 0xa3, 0x02, 0x78, 0x04, 0x91, 0x0e,
+ 0xa2, 0x13, 0x02, 0x78, 0x0a, 0x92, 0x0e, 0xa2, 0x1b, 0x02, 0x78, 0x0e,
+ 0x97, 0x0e, 0xa2, 0x43, 0x02, 0x78, 0x1e, 0x96, 0x0e, 0xa2, 0x3b, 0x02,
+ 0x78, 0x24, 0x95, 0x0e, 0xa2, 0x33, 0x02, 0x78, 0x33, 0x94, 0x0e, 0xa2,
+ 0x2b, 0x02, 0x78, 0x39, 0x9a, 0x0e, 0xa2, 0x5b, 0x02, 0x78, 0x3f, 0x90,
+ 0x0e, 0xa2, 0x0b, 0x02, 0x78, 0x43, 0x8f, 0x0e, 0xa2, 0x03, 0x02, 0x78,
+ 0x47, 0x8e, 0x0e, 0xa1, 0xfb, 0x02, 0x78, 0x4b, 0x8d, 0x0e, 0xa1, 0xf3,
+ 0x02, 0x78, 0x51, 0x8b, 0x0e, 0xa1, 0xe3, 0x02, 0x78, 0x57, 0x9c, 0x0e,
+ 0xa2, 0x6b, 0x02, 0x78, 0x5d, 0x9b, 0x0e, 0xa2, 0x61, 0x99, 0x0e, 0xa2,
+ 0x51, 0x98, 0x0e, 0xa2, 0x49, 0x93, 0x0e, 0xa2, 0x21, 0x8c, 0x0e, 0xa1,
+ 0xe9, 0x8a, 0x0e, 0xa1, 0xd8, 0xc8, 0x9c, 0x0e, 0x0e, 0xb8, 0xd9, 0xc9,
+ 0xaa, 0x9e, 0x0e, 0xb8, 0xc9, 0xd3, 0x43, 0x00, 0x0e, 0xb8, 0xa8, 0x91,
+ 0x0e, 0xa2, 0xe3, 0x02, 0x78, 0x63, 0x92, 0x0e, 0xa2, 0xeb, 0x02, 0x78,
+ 0x67, 0x85, 0x0e, 0xa2, 0x83, 0x02, 0x78, 0x77, 0x97, 0x0e, 0xa3, 0x13,
+ 0x02, 0x78, 0x7d, 0x96, 0x0e, 0xa3, 0x0b, 0x02, 0x78, 0x83, 0x95, 0x0e,
+ 0xa3, 0x03, 0x02, 0x78, 0x8f, 0x88, 0x0e, 0xa2, 0x9b, 0x02, 0x78, 0x95,
+ 0x94, 0x0e, 0xa2, 0xfb, 0x02, 0x78, 0x9b, 0x9a, 0x0e, 0xa3, 0x2b, 0x02,
+ 0x78, 0xa1, 0x90, 0x0e, 0xa2, 0xdb, 0x02, 0x78, 0xa5, 0x8f, 0x0e, 0xa2,
+ 0xd3, 0x02, 0x78, 0xa9, 0x8e, 0x0e, 0xa2, 0xcb, 0x02, 0x78, 0xad, 0x8d,
+ 0x0e, 0xa2, 0xc3, 0x02, 0x78, 0xb3, 0x8b, 0x0e, 0xa2, 0xb3, 0x02, 0x78,
+ 0xb9, 0x87, 0x0e, 0xa2, 0x93, 0x02, 0x78, 0xbf, 0x9c, 0x0e, 0xa3, 0x3b,
+ 0x02, 0x78, 0xcb, 0x86, 0x0e, 0xa2, 0x8b, 0x02, 0x78, 0xd1, 0x89, 0x0e,
+ 0xa2, 0xa3, 0x02, 0x78, 0xdd, 0x84, 0x0e, 0xa2, 0x7b, 0x02, 0x78, 0xe3,
+ 0x83, 0x0e, 0xa2, 0x73, 0x02, 0x78, 0xe9, 0x9b, 0x0e, 0xa3, 0x31, 0x99,
+ 0x0e, 0xa3, 0x21, 0x98, 0x0e, 0xa3, 0x19, 0x93, 0x0e, 0xa2, 0xf1, 0x8c,
+ 0x0e, 0xa2, 0xb8, 0x45, 0x03, 0x14, 0xc2, 0x78, 0xef, 0x46, 0x07, 0x2f,
+ 0x42, 0x79, 0x93, 0xc4, 0x26, 0x78, 0x0e, 0xbe, 0xb9, 0xc5, 0x06, 0xdb,
+ 0x0e, 0xbe, 0xb1, 0x15, 0xc2, 0x79, 0x9f, 0x08, 0xc2, 0x79, 0xab, 0x16,
+ 0xc2, 0x79, 0xb7, 0xc3, 0x05, 0x14, 0x0e, 0xbe, 0x79, 0xc4, 0x15, 0xe7,
+ 0x0e, 0xbe, 0x70, 0x86, 0x0e, 0xa0, 0x1b, 0x02, 0x79, 0xc3, 0x91, 0x0e,
+ 0xa0, 0x73, 0x02, 0x79, 0xcf, 0x92, 0x0e, 0xa0, 0x7b, 0x02, 0x79, 0xd3,
+ 0x85, 0x0e, 0xa0, 0x13, 0x02, 0x79, 0xe3, 0x97, 0x0e, 0xa0, 0xa3, 0x02,
+ 0x79, 0xe9, 0x96, 0x0e, 0xa0, 0x9b, 0x02, 0x79, 0xef, 0x95, 0x0e, 0xa0,
+ 0x93, 0x02, 0x79, 0xfe, 0x94, 0x0e, 0xa0, 0x8b, 0x02, 0x7a, 0x04, 0x9a,
+ 0x0e, 0xa0, 0xbb, 0x02, 0x7a, 0x0a, 0x90, 0x0e, 0xa0, 0x6b, 0x02, 0x7a,
+ 0x0e, 0x8f, 0x0e, 0xa0, 0x63, 0x02, 0x7a, 0x12, 0x8e, 0x0e, 0xa0, 0x5b,
+ 0x02, 0x7a, 0x16, 0x8d, 0x0e, 0xa0, 0x53, 0x02, 0x7a, 0x1c, 0x8b, 0x0e,
+ 0xa0, 0x43, 0x02, 0x7a, 0x22, 0x87, 0x0e, 0xa0, 0x23, 0x02, 0x7a, 0x28,
+ 0x9c, 0x0e, 0xa0, 0xcb, 0x02, 0x7a, 0x34, 0x89, 0x0e, 0xa0, 0x33, 0x02,
+ 0x7a, 0x3a, 0x84, 0x0e, 0xa0, 0x0b, 0x02, 0x7a, 0x40, 0x83, 0x0e, 0xa0,
+ 0x03, 0x02, 0x7a, 0x46, 0x9b, 0x0e, 0xa0, 0xc1, 0x99, 0x0e, 0xa0, 0xb1,
+ 0x98, 0x0e, 0xa0, 0xa9, 0x93, 0x0e, 0xa0, 0x81, 0x8c, 0x0e, 0xa0, 0x49,
+ 0x8a, 0x0e, 0xa0, 0x39, 0x88, 0x0e, 0xa0, 0x28, 0x12, 0xc2, 0x7a, 0x4c,
+ 0xca, 0x9c, 0xac, 0x0e, 0xba, 0xa1, 0xcc, 0x8b, 0x65, 0x0e, 0xba, 0x91,
+ 0xcc, 0x89, 0xfd, 0x0e, 0xba, 0x89, 0xce, 0x10, 0x3e, 0x0e, 0xba, 0x81,
+ 0x46, 0x03, 0x13, 0xc2, 0x7a, 0x5e, 0xc5, 0xdb, 0xf0, 0x0e, 0xb9, 0xa9,
+ 0x48, 0x0b, 0x17, 0x42, 0x7b, 0x02, 0xc8, 0x9c, 0x0e, 0x0e, 0xb7, 0x09,
+ 0xc9, 0xaa, 0x9e, 0x0e, 0xb6, 0xf9, 0xd3, 0x43, 0x00, 0x0e, 0xb6, 0xd8,
+ 0x46, 0x03, 0x13, 0xc2, 0x7b, 0xa3, 0x48, 0x0b, 0x17, 0x42, 0x7c, 0x0b,
+ 0xc4, 0x26, 0x78, 0x0e, 0xbf, 0xf9, 0xc5, 0x06, 0xdb, 0x0e, 0xbf, 0xf1,
+ 0x15, 0xc2, 0x7c, 0x73, 0x08, 0xc2, 0x7c, 0x7f, 0x16, 0xc2, 0x7c, 0x8b,
+ 0xc3, 0x05, 0x14, 0x0e, 0xbf, 0xb9, 0xc4, 0x15, 0xe7, 0x0e, 0xbf, 0xb0,
+ 0x9c, 0x0e, 0xb5, 0x19, 0x9b, 0x0e, 0xb5, 0x11, 0x9a, 0x0e, 0xb5, 0x09,
+ 0x99, 0x0e, 0xb5, 0x01, 0x98, 0x0e, 0xb4, 0xf9, 0x97, 0x0e, 0xb4, 0xf1,
+ 0x96, 0x0e, 0xb4, 0xe9, 0x95, 0x0e, 0xb4, 0xe1, 0x94, 0x0e, 0xb4, 0xd9,
+ 0x93, 0x0e, 0xb4, 0xd1, 0x92, 0x0e, 0xb4, 0xc9, 0x91, 0x0e, 0xb4, 0xc1,
+ 0x90, 0x0e, 0xb4, 0xb9, 0x8f, 0x0e, 0xb4, 0xb1, 0x8e, 0x0e, 0xb4, 0xa9,
+ 0x8d, 0x0e, 0xb4, 0xa1, 0x8c, 0x0e, 0xb4, 0x99, 0x8b, 0x0e, 0xb4, 0x91,
+ 0x8a, 0x0e, 0xb4, 0x89, 0x89, 0x0e, 0xb4, 0x81, 0x88, 0x0e, 0xb4, 0x79,
+ 0x87, 0x0e, 0xb4, 0x71, 0x86, 0x0e, 0xb4, 0x69, 0x85, 0x0e, 0xb4, 0x61,
+ 0x84, 0x0e, 0xb4, 0x59, 0x83, 0x0e, 0xb4, 0x50, 0x9c, 0x0e, 0xb4, 0x49,
+ 0x9b, 0x0e, 0xb4, 0x41, 0x9a, 0x0e, 0xb4, 0x39, 0x99, 0x0e, 0xb4, 0x31,
+ 0x98, 0x0e, 0xb4, 0x29, 0x97, 0x0e, 0xb4, 0x21, 0x96, 0x0e, 0xb4, 0x19,
+ 0x95, 0x0e, 0xb4, 0x11, 0x94, 0x0e, 0xb4, 0x09, 0x93, 0x0e, 0xb4, 0x01,
+ 0x92, 0x0e, 0xb3, 0xf9, 0x91, 0x0e, 0xb3, 0xf1, 0x90, 0x0e, 0xb3, 0xe9,
+ 0x8f, 0x0e, 0xb3, 0xe1, 0x8e, 0x0e, 0xb3, 0xd9, 0x8d, 0x0e, 0xb3, 0xd1,
+ 0x8c, 0x0e, 0xb3, 0xc9, 0x8b, 0x0e, 0xb3, 0xc1, 0x8a, 0x0e, 0xb3, 0xb9,
+ 0x89, 0x0e, 0xb3, 0xb1, 0x88, 0x0e, 0xb3, 0xa9, 0x87, 0x0e, 0xb3, 0xa1,
+ 0x86, 0x0e, 0xb3, 0x99, 0x85, 0x0e, 0xb3, 0x91, 0x84, 0x0e, 0xb3, 0x89,
+ 0x83, 0x0e, 0xb3, 0x80, 0x45, 0x58, 0xc2, 0xc2, 0x7c, 0x97, 0x46, 0x09,
+ 0x97, 0xc2, 0x7c, 0xd1, 0x47, 0xc7, 0x4a, 0xc2, 0x7c, 0xf5, 0x46, 0x03,
+ 0x13, 0xc2, 0x7d, 0x01, 0x48, 0x0b, 0x17, 0x42, 0x7d, 0x69, 0x46, 0x03,
+ 0x13, 0xc2, 0x7d, 0xd1, 0x48, 0x0b, 0x17, 0x42, 0x7e, 0x2d, 0xc4, 0x26,
+ 0x78, 0x0e, 0xbf, 0x09, 0xc5, 0x06, 0xdb, 0x0e, 0xbf, 0x01, 0x15, 0xc2,
+ 0x7e, 0x75, 0x08, 0xc2, 0x7e, 0x81, 0x16, 0xc2, 0x7e, 0x8d, 0xc3, 0x05,
+ 0x14, 0x0e, 0xbe, 0xc9, 0xc4, 0x15, 0xe7, 0x0e, 0xbe, 0xc0, 0x9c, 0x0e,
+ 0xab, 0x59, 0x9b, 0x0e, 0xab, 0x51, 0x9a, 0x0e, 0xab, 0x49, 0x99, 0x0e,
+ 0xab, 0x41, 0x98, 0x0e, 0xab, 0x39, 0x97, 0x0e, 0xab, 0x31, 0x96, 0x0e,
+ 0xab, 0x29, 0x95, 0x0e, 0xab, 0x21, 0x94, 0x0e, 0xab, 0x19, 0x93, 0x0e,
+ 0xab, 0x11, 0x92, 0x0e, 0xab, 0x09, 0x91, 0x0e, 0xab, 0x01, 0x90, 0x0e,
+ 0xaa, 0xf9, 0x8f, 0x0e, 0xaa, 0xf1, 0x8e, 0x0e, 0xaa, 0xe9, 0x8d, 0x0e,
+ 0xaa, 0xe1, 0x8c, 0x0e, 0xaa, 0xd9, 0x8b, 0x0e, 0xaa, 0xd1, 0x8a, 0x0e,
+ 0xaa, 0xc9, 0x89, 0x0e, 0xaa, 0xc1, 0x88, 0x0e, 0xaa, 0xb9, 0x87, 0x0e,
+ 0xaa, 0xb1, 0x86, 0x0e, 0xaa, 0xa9, 0x85, 0x0e, 0xaa, 0xa1, 0x84, 0x0e,
+ 0xaa, 0x99, 0x83, 0x0e, 0xaa, 0x90, 0x9b, 0x0e, 0xaa, 0x81, 0x9a, 0x0e,
+ 0xaa, 0x79, 0x99, 0x0e, 0xaa, 0x71, 0x98, 0x0e, 0xaa, 0x69, 0x97, 0x0e,
+ 0xaa, 0x61, 0x96, 0x0e, 0xaa, 0x59, 0x95, 0x0e, 0xaa, 0x51, 0x91, 0x0e,
+ 0xaa, 0x31, 0x8f, 0x0e, 0xaa, 0x21, 0x8e, 0x0e, 0xaa, 0x19, 0x8d, 0x0e,
+ 0xaa, 0x11, 0x8c, 0x0e, 0xaa, 0x09, 0x8b, 0x0e, 0xaa, 0x01, 0x89, 0x0e,
+ 0xa9, 0xf1, 0x88, 0x0e, 0xa9, 0xe9, 0x87, 0x0e, 0xa9, 0xe1, 0x86, 0x0e,
+ 0xa9, 0xd9, 0x84, 0x0e, 0xa9, 0xc9, 0x83, 0x0e, 0xa9, 0xc0, 0x46, 0x03,
+ 0x13, 0xc2, 0x7e, 0x99, 0x48, 0x0b, 0x17, 0x42, 0x7f, 0x01, 0xd5, 0x35,
+ 0x36, 0x01, 0x3f, 0x79, 0x46, 0x01, 0xfc, 0xc2, 0x7f, 0x55, 0xd4, 0x38,
+ 0xf4, 0x01, 0x3f, 0x59, 0xcd, 0x0b, 0x91, 0x01, 0x3f, 0x48, 0xd6, 0x08,
+ 0x88, 0x01, 0x3f, 0x61, 0xce, 0x25, 0xad, 0x01, 0x3f, 0x30, 0xc4, 0x18,
+ 0x10, 0x08, 0xea, 0xb9, 0xc2, 0x22, 0xcc, 0x08, 0xea, 0xb0, 0xc3, 0x0d,
+ 0x14, 0x08, 0xea, 0xa9, 0xc3, 0x09, 0x9e, 0x08, 0xea, 0xa0, 0xc4, 0x02,
+ 0xde, 0x08, 0xea, 0x99, 0xc2, 0x02, 0xa0, 0x08, 0xea, 0x90, 0x03, 0xc2,
+ 0x7f, 0x61, 0x91, 0x08, 0xe9, 0xe9, 0x87, 0x08, 0xe9, 0xd1, 0xc9, 0xb2,
+ 0x2d, 0x08, 0xe9, 0xb1, 0x97, 0x08, 0xe9, 0xa3, 0x02, 0x7f, 0x6d, 0x8b,
+ 0x08, 0xe9, 0x92, 0x02, 0x7f, 0x71, 0xc2, 0x00, 0x39, 0x08, 0xe9, 0x81,
+ 0xc2, 0x00, 0xd0, 0x08, 0xe8, 0xe1, 0x83, 0x08, 0xe8, 0xd9, 0x16, 0x42,
+ 0x7f, 0x75, 0xc3, 0x2d, 0xfd, 0x08, 0xe9, 0x79, 0xc2, 0x00, 0xd0, 0x08,
+ 0xe8, 0xa1, 0x83, 0x08, 0xe8, 0x98, 0xc3, 0x1d, 0x35, 0x08, 0xe9, 0x71,
+ 0xc2, 0x00, 0xd0, 0x08, 0xe8, 0x69, 0x83, 0x08, 0xe8, 0x60, 0xc2, 0x00,
+ 0xdb, 0x08, 0xe9, 0x69, 0x83, 0x08, 0xe9, 0x38, 0x83, 0x08, 0xe9, 0x59,
+ 0xc2, 0x0d, 0xf6, 0x08, 0xe9, 0x51, 0xc2, 0x00, 0xd0, 0x08, 0xe9, 0x48,
+ 0xc2, 0x00, 0xd0, 0x08, 0xe9, 0x19, 0x83, 0x08, 0xe9, 0x10, 0xc2, 0x00,
+ 0xd0, 0x08, 0xe9, 0x09, 0x83, 0x08, 0xe9, 0x00, 0x83, 0x08, 0xe8, 0xf9,
+ 0xc2, 0x00, 0xc1, 0x08, 0xe8, 0xd1, 0xc2, 0x19, 0x2c, 0x08, 0xe8, 0xa9,
+ 0xc2, 0x01, 0x30, 0x08, 0xe8, 0x80, 0xc2, 0x00, 0xd0, 0x08, 0xe8, 0xf1,
+ 0x83, 0x08, 0xe8, 0xe9, 0x06, 0x42, 0x7f, 0x7f, 0xc2, 0x00, 0xd0, 0x08,
+ 0xe8, 0x91, 0x83, 0x08, 0xe8, 0x88, 0xc2, 0x00, 0xd0, 0x08, 0xe8, 0x79,
+ 0x83, 0x08, 0xe8, 0x70, 0x97, 0x08, 0xe8, 0x59, 0x8b, 0x08, 0xe8, 0x41,
+ 0x83, 0x08, 0xe8, 0x08, 0x97, 0x08, 0xe8, 0x28, 0x8b, 0x08, 0xe8, 0x18,
+ 0xcb, 0x1e, 0x89, 0x08, 0xe5, 0xb1, 0xc8, 0x14, 0x38, 0x08, 0xe5, 0xa8,
+ 0x83, 0x08, 0xe5, 0x79, 0xc2, 0x00, 0xd0, 0x08, 0xe5, 0x71, 0x15, 0xc2,
+ 0x7f, 0x89, 0xc2, 0x00, 0xdb, 0x08, 0xe5, 0x59, 0xc2, 0x00, 0x39, 0x08,
+ 0xe5, 0x51, 0xc2, 0x19, 0x2c, 0x08, 0xe5, 0x49, 0x1c, 0xc2, 0x7f, 0x93,
+ 0xc2, 0x01, 0x4a, 0x08, 0xe5, 0x29, 0x06, 0xc2, 0x7f, 0x9d, 0x16, 0xc2,
+ 0x7f, 0xa7, 0xc2, 0x01, 0xc3, 0x08, 0xe5, 0x09, 0xc2, 0x01, 0x5d, 0x08,
+ 0xe5, 0x01, 0x12, 0xc2, 0x7f, 0xb5, 0x10, 0xc2, 0x7f, 0xbf, 0xc2, 0x25,
+ 0x3b, 0x08, 0xe4, 0xc1, 0x05, 0xc2, 0x7f, 0xcf, 0xc2, 0x01, 0x30, 0x08,
+ 0xe4, 0xa1, 0x0d, 0x42, 0x7f, 0xd9, 0x83, 0x08, 0xe4, 0x69, 0xc2, 0x00,
+ 0xd0, 0x08, 0xe4, 0x60, 0x83, 0x08, 0xe4, 0x39, 0xc2, 0x00, 0xd0, 0x08,
+ 0xe4, 0x30, 0xc2, 0x02, 0x1c, 0x08, 0xe4, 0x21, 0x83, 0x08, 0xe3, 0xe0,
+ 0x15, 0xc2, 0x7f, 0xe3, 0xc2, 0x00, 0xd0, 0x08, 0xe3, 0xd9, 0x83, 0x08,
+ 0xe3, 0xd0, 0xc2, 0x00, 0xd0, 0x08, 0xe3, 0xf9, 0x83, 0x08, 0xe3, 0xf0,
+ 0x83, 0x08, 0xe3, 0xe9, 0xc2, 0x19, 0x2c, 0x08, 0xe3, 0xc9, 0xc2, 0x01,
+ 0x30, 0x08, 0xe3, 0xa8, 0xc2, 0x00, 0xd0, 0x08, 0xe3, 0xb9, 0x83, 0x08,
+ 0xe3, 0xb0, 0xc2, 0x00, 0xd0, 0x08, 0xe3, 0x99, 0x83, 0x08, 0xe3, 0x90,
+ 0xd7, 0x29, 0x29, 0x00, 0x68, 0x01, 0xca, 0x1e, 0x8a, 0x00, 0x68, 0x09,
+ 0xce, 0x71, 0x5a, 0x00, 0x69, 0xe0, 0xc7, 0x14, 0x39, 0x00, 0x68, 0x11,
+ 0xc7, 0x7a, 0x7f, 0x00, 0x69, 0xe8, 0x0b, 0xc2, 0x7f, 0xed, 0xd2, 0x48,
+ 0xb3, 0x00, 0x69, 0xd8, 0xcd, 0x80, 0x36, 0x00, 0x68, 0x21, 0x47, 0xb2,
+ 0x2e, 0xc2, 0x7f, 0xf9, 0x83, 0x00, 0x69, 0xa8, 0x83, 0x00, 0x68, 0x31,
+ 0x8b, 0x00, 0x68, 0x81, 0x97, 0x00, 0x68, 0xa1, 0xc9, 0xa9, 0x90, 0x00,
+ 0x6a, 0xf8, 0x8b, 0x00, 0x68, 0x40, 0x97, 0x00, 0x68, 0x50, 0x87, 0x00,
+ 0x68, 0x78, 0x91, 0x00, 0x68, 0x98, 0x83, 0x00, 0x68, 0xa9, 0xc2, 0x00,
+ 0xd0, 0x00, 0x68, 0xb0, 0x83, 0x00, 0x68, 0xb9, 0xc2, 0x00, 0xd0, 0x00,
+ 0x68, 0xc0, 0xc2, 0x01, 0x30, 0x00, 0x68, 0xc9, 0xc2, 0x19, 0x2c, 0x00,
+ 0x68, 0xf1, 0x10, 0xc2, 0x80, 0x07, 0x83, 0x00, 0x69, 0x40, 0x83, 0x00,
+ 0x68, 0xd1, 0x0a, 0x42, 0x80, 0x11, 0x83, 0x00, 0x68, 0xe1, 0xc2, 0x00,
+ 0xd0, 0x00, 0x68, 0xe8, 0x16, 0xc2, 0x80, 0x1b, 0x83, 0x00, 0x69, 0x21,
+ 0xc2, 0x00, 0xd0, 0x00, 0x69, 0x28, 0x06, 0xc2, 0x80, 0x2b, 0x83, 0x00,
+ 0x69, 0x31, 0xc2, 0x00, 0xd0, 0x00, 0x69, 0x39, 0xc7, 0xc7, 0x58, 0x00,
+ 0x6a, 0x70, 0x83, 0x00, 0x69, 0x51, 0xc2, 0x00, 0xd0, 0x00, 0x69, 0x58,
+ 0x83, 0x00, 0x69, 0x61, 0xc2, 0x00, 0xd0, 0x00, 0x69, 0x68, 0x83, 0x00,
+ 0x69, 0x81, 0xc2, 0x00, 0x39, 0x00, 0x69, 0x88, 0x83, 0x00, 0x69, 0x91,
+ 0x0e, 0x42, 0x80, 0x35, 0xc2, 0x00, 0xd0, 0x00, 0x69, 0xb1, 0xc2, 0x0d,
+ 0xf6, 0x00, 0x69, 0xb9, 0x83, 0x00, 0x69, 0xc0, 0x83, 0x00, 0x69, 0xf1,
+ 0x8b, 0x00, 0x6a, 0x41, 0x97, 0x00, 0x6a, 0x60, 0x8b, 0x00, 0x6a, 0x00,
+ 0x97, 0x00, 0x6a, 0x10, 0x94, 0x00, 0x6a, 0x1b, 0x02, 0x80, 0x3f, 0x8e,
+ 0x00, 0x6b, 0x12, 0x02, 0x80, 0x43, 0x87, 0x00, 0x6a, 0x38, 0x91, 0x00,
+ 0x6a, 0x58, 0xd8, 0x22, 0xbb, 0x00, 0x6a, 0xc1, 0x08, 0xc2, 0x80, 0x47,
+ 0x16, 0xc2, 0x80, 0x53, 0xc7, 0x08, 0x79, 0x00, 0x6b, 0x99, 0xc4, 0x01,
+ 0xce, 0x00, 0x6b, 0xa1, 0xc9, 0x67, 0x38, 0x00, 0x6b, 0xb1, 0xc6, 0x06,
+ 0xdb, 0x00, 0x6b, 0xb8, 0xca, 0xa3, 0xfa, 0x00, 0x6a, 0xd1, 0xca, 0x1e,
+ 0x15, 0x00, 0x6a, 0xe9, 0xc8, 0x08, 0x79, 0x00, 0x6b, 0xa9, 0xca, 0xa7,
+ 0x88, 0x00, 0x6b, 0xc0, 0xc4, 0x15, 0xe7, 0x00, 0x6b, 0x31, 0xc3, 0x05,
+ 0x14, 0x00, 0x6b, 0x39, 0x16, 0xc2, 0x80, 0x5f, 0x08, 0xc2, 0x80, 0x6b,
+ 0x15, 0xc2, 0x80, 0x77, 0xc5, 0x06, 0xdb, 0x00, 0x6b, 0x71, 0xc4, 0x26,
+ 0x78, 0x00, 0x6b, 0x78, 0xc7, 0x0d, 0x04, 0x00, 0x6b, 0x89, 0xc8, 0x4b,
+ 0x94, 0x00, 0x6b, 0x90, 0x96, 0x08, 0x57, 0xa3, 0x02, 0x80, 0x83, 0xd3,
+ 0x44, 0x43, 0x08, 0x57, 0x90, 0xc8, 0x0d, 0x03, 0x08, 0x57, 0x78, 0xc5,
+ 0x28, 0xee, 0x08, 0x57, 0x71, 0xc2, 0x00, 0xc4, 0x08, 0x57, 0x68, 0xc2,
+ 0x39, 0x8b, 0x08, 0x57, 0x21, 0xc6, 0xd2, 0xc5, 0x08, 0x56, 0xa9, 0xc3,
+ 0x1e, 0x1b, 0x08, 0x56, 0x70, 0xc4, 0x3e, 0x5a, 0x08, 0x57, 0x19, 0xc3,
+ 0x11, 0xef, 0x08, 0x57, 0x11, 0x03, 0x42, 0x80, 0x89, 0xc4, 0xe0, 0x03,
+ 0x08, 0x57, 0x01, 0xc3, 0x2d, 0x8a, 0x08, 0x56, 0xf0, 0xc3, 0x2d, 0x8a,
+ 0x08, 0x56, 0xf9, 0xc3, 0x00, 0xb6, 0x08, 0x56, 0x88, 0xc4, 0x40, 0x95,
+ 0x08, 0x56, 0xd1, 0xc3, 0x16, 0x5a, 0x08, 0x56, 0xc9, 0xc4, 0x36, 0xb5,
+ 0x08, 0x56, 0x00, 0xc6, 0xd2, 0xc5, 0x08, 0x56, 0xa1, 0xc5, 0x40, 0x9b,
+ 0x08, 0x56, 0x28, 0xc4, 0xdc, 0xe6, 0x08, 0x56, 0x91, 0xc3, 0x00, 0xb6,
+ 0x08, 0x56, 0x80, 0xc2, 0x00, 0x8e, 0x08, 0x56, 0x68, 0xc5, 0xd6, 0x78,
+ 0x08, 0x56, 0x61, 0xc4, 0x40, 0x95, 0x08, 0x56, 0x58, 0xc5, 0xd6, 0x78,
+ 0x08, 0x56, 0x51, 0xc4, 0x40, 0x95, 0x08, 0x56, 0x48, 0xc5, 0xd5, 0xdd,
+ 0x08, 0x56, 0x21, 0xc4, 0x9c, 0xa3, 0x08, 0x56, 0x18, 0xc4, 0x9b, 0x90,
+ 0x08, 0x56, 0x11, 0xc3, 0x1e, 0x1b, 0x08, 0x56, 0x08, 0xc2, 0x00, 0x74,
+ 0x00, 0x42, 0xc1, 0x96, 0x00, 0x42, 0xab, 0x02, 0x80, 0x95, 0x95, 0x00,
+ 0x42, 0x73, 0x02, 0x80, 0x99, 0x94, 0x00, 0x42, 0x99, 0x93, 0x00, 0x42,
+ 0x91, 0x92, 0x00, 0x42, 0x81, 0x90, 0x00, 0x42, 0x69, 0x8f, 0x00, 0x42,
+ 0x61, 0x8e, 0x00, 0x42, 0x59, 0x8d, 0x00, 0x42, 0x53, 0x02, 0x80, 0xa1,
+ 0x9c, 0x00, 0x42, 0x31, 0x8a, 0x00, 0x42, 0x21, 0x86, 0x00, 0x42, 0x19,
+ 0x89, 0x00, 0x42, 0x11, 0x84, 0x00, 0x42, 0x08, 0x90, 0x00, 0x42, 0x79,
+ 0x96, 0x00, 0x42, 0x38, 0x14, 0xc2, 0x80, 0xa7, 0xc2, 0x00, 0xd0, 0x08,
+ 0x8b, 0x89, 0xc2, 0x0d, 0xf6, 0x08, 0x8b, 0x81, 0xc2, 0x02, 0x41, 0x08,
+ 0x8b, 0x79, 0xc2, 0x00, 0xdb, 0x08, 0x8b, 0x71, 0xc2, 0x01, 0xc3, 0x08,
+ 0x8b, 0x61, 0x04, 0xc2, 0x80, 0xb1, 0x12, 0xc2, 0x80, 0xbb, 0x10, 0xc2,
+ 0x80, 0xc5, 0x06, 0xc2, 0x80, 0xd5, 0x16, 0xc2, 0x80, 0xe3, 0x0c, 0xc2,
+ 0x80, 0xf1, 0x05, 0xc2, 0x80, 0xfb, 0x09, 0xc2, 0x81, 0x05, 0x0d, 0xc2,
+ 0x81, 0x0f, 0x91, 0x08, 0x8a, 0xa1, 0x87, 0x08, 0x8a, 0x99, 0x97, 0x08,
+ 0x8a, 0x91, 0x8b, 0x08, 0x8a, 0x89, 0x83, 0x08, 0x8a, 0x80, 0x05, 0xc2,
+ 0x81, 0x19, 0xc7, 0xc0, 0xcf, 0x0f, 0x80, 0xb8, 0x05, 0xc2, 0x81, 0x25,
+ 0xc7, 0xc0, 0xcf, 0x0f, 0x80, 0xa8, 0x05, 0xc2, 0x81, 0x31, 0xc7, 0xc0,
+ 0xcf, 0x0f, 0x80, 0xb0, 0x05, 0xc2, 0x81, 0x3d, 0xc7, 0xc0, 0xcf, 0x0f,
+ 0x80, 0xc0, 0x05, 0xc2, 0x81, 0x49, 0xc7, 0xc0, 0xcf, 0x0f, 0x80, 0x80,
+ 0x05, 0xc2, 0x81, 0x55, 0xc7, 0xc0, 0xcf, 0x0f, 0x80, 0x88, 0x05, 0xc2,
+ 0x81, 0x61, 0xc7, 0xc0, 0xcf, 0x0f, 0x80, 0x90, 0x05, 0xc2, 0x81, 0x6d,
+ 0xc7, 0xc0, 0xcf, 0x0f, 0x80, 0x98, 0x05, 0xc2, 0x81, 0x79, 0xc7, 0xc0,
+ 0xcf, 0x0f, 0x80, 0xa0, 0x46, 0x10, 0x79, 0xc2, 0x81, 0x85, 0xc4, 0xe3,
+ 0x7f, 0x0f, 0x9d, 0xe0, 0xcb, 0x8d, 0x0b, 0x0f, 0x9c, 0xc0, 0x9a, 0x01,
+ 0x38, 0xa9, 0xc4, 0x00, 0xba, 0x00, 0x06, 0xba, 0x02, 0x81, 0xeb, 0xc5,
+ 0x13, 0x84, 0x01, 0x14, 0x71, 0xce, 0x1f, 0x18, 0x01, 0x14, 0x68, 0xc2,
+ 0x00, 0xd0, 0x08, 0x95, 0x41, 0xc2, 0x00, 0x39, 0x08, 0x95, 0x39, 0x83,
+ 0x08, 0x95, 0x10, 0xc2, 0x00, 0xd0, 0x08, 0x94, 0xf9, 0x83, 0x08, 0x94,
+ 0xe8, 0xc2, 0x00, 0xd0, 0x08, 0x94, 0xe1, 0x83, 0x08, 0x94, 0xd8, 0x83,
+ 0x08, 0x94, 0xd1, 0xc2, 0x00, 0xc1, 0x08, 0x94, 0xa9, 0xc2, 0x19, 0x2c,
+ 0x08, 0x94, 0x78, 0xc2, 0x00, 0xd0, 0x08, 0x94, 0xc9, 0x83, 0x08, 0x94,
+ 0xc1, 0x06, 0x42, 0x81, 0xef, 0xc2, 0x00, 0xd0, 0x08, 0x94, 0xb9, 0x83,
+ 0x08, 0x94, 0xb1, 0x16, 0x42, 0x81, 0xff, 0x83, 0x08, 0x94, 0x61, 0xc2,
+ 0x25, 0x3b, 0x08, 0x94, 0x68, 0x83, 0x08, 0x94, 0x51, 0xc2, 0x00, 0xd0,
+ 0x08, 0x94, 0x58, 0xc2, 0x00, 0xd0, 0x08, 0x94, 0x41, 0x83, 0x08, 0x94,
+ 0x30, 0xc2, 0x00, 0xd0, 0x08, 0x94, 0x29, 0x83, 0x08, 0x94, 0x20, 0xc3,
+ 0x4d, 0x47, 0x05, 0x4f, 0x29, 0x45, 0x28, 0xb1, 0xc2, 0x82, 0x09, 0x48,
+ 0xba, 0xb2, 0x42, 0x82, 0x19, 0xc3, 0x02, 0x9f, 0x05, 0x53, 0xc9, 0xc3,
+ 0x05, 0x14, 0x05, 0x53, 0xc1, 0xcb, 0x0f, 0x09, 0x05, 0x53, 0xb8, 0x44,
+ 0x3d, 0xbb, 0x42, 0x82, 0x25, 0x48, 0x68, 0x93, 0x42, 0x82, 0x69, 0x83,
+ 0x00, 0x80, 0x59, 0xc2, 0x00, 0xd0, 0x00, 0x80, 0x60, 0x83, 0x00, 0x82,
+ 0x83, 0x02, 0x82, 0x89, 0x4b, 0x91, 0x8e, 0x42, 0x82, 0x8f, 0xc2, 0x19,
+ 0x2c, 0x00, 0x80, 0x51, 0x83, 0x00, 0x80, 0x78, 0x83, 0x00, 0x80, 0x69,
+ 0xc2, 0x00, 0xd0, 0x00, 0x80, 0x70, 0x87, 0x00, 0x81, 0x41, 0xc3, 0x20,
+ 0xf1, 0x00, 0x82, 0xd1, 0xc3, 0xe5, 0xf0, 0x00, 0x82, 0xd9, 0x42, 0x3f,
+ 0x98, 0x42, 0x82, 0x9b, 0xc3, 0x00, 0xcf, 0x00, 0x83, 0x29, 0xc3, 0x09,
+ 0x0e, 0x00, 0x83, 0x30, 0xc3, 0x3a, 0x09, 0x00, 0x83, 0x71, 0xc3, 0xdf,
+ 0x5b, 0x00, 0x83, 0x79, 0xc4, 0xaa, 0x0d, 0x00, 0x83, 0x80, 0x94, 0x00,
+ 0x82, 0x98, 0x8e, 0x00, 0x82, 0xa8, 0x8b, 0x00, 0x84, 0xe8, 0xc6, 0x00,
+ 0xd3, 0x00, 0x84, 0x28, 0x45, 0x03, 0x14, 0xc2, 0x82, 0xa3, 0x83, 0x01,
+ 0x85, 0xa9, 0x8b, 0x01, 0x85, 0xb9, 0x97, 0x01, 0x85, 0xc9, 0x87, 0x01,
+ 0x85, 0xd9, 0x91, 0x01, 0x85, 0xe8, 0x47, 0x78, 0xc0, 0x42, 0x82, 0xe0,
+ 0x8b, 0x01, 0x86, 0xfb, 0x02, 0x82, 0xee, 0x83, 0x01, 0x86, 0xf1, 0x97,
+ 0x01, 0x87, 0x01, 0x87, 0x01, 0x87, 0x09, 0x91, 0x01, 0x87, 0x10, 0x83,
+ 0x01, 0x85, 0x59, 0x8b, 0x01, 0x85, 0x69, 0x97, 0x01, 0x85, 0x79, 0x87,
+ 0x01, 0x85, 0x89, 0x91, 0x01, 0x85, 0x98, 0x83, 0x01, 0x85, 0x61, 0x8b,
+ 0x01, 0x85, 0x71, 0x97, 0x01, 0x85, 0x81, 0x87, 0x01, 0x85, 0x91, 0x91,
+ 0x01, 0x85, 0xa0, 0x83, 0x01, 0x85, 0xb1, 0x8b, 0x01, 0x85, 0xc1, 0x97,
+ 0x01, 0x85, 0xd1, 0x87, 0x01, 0x85, 0xe1, 0x91, 0x01, 0x85, 0xf0, 0x83,
+ 0x01, 0x85, 0xf9, 0x8b, 0x01, 0x86, 0x09, 0x97, 0x01, 0x86, 0x21, 0x87,
+ 0x01, 0x86, 0x31, 0x91, 0x01, 0x86, 0x40, 0x83, 0x01, 0x86, 0x01, 0x8b,
+ 0x01, 0x86, 0x11, 0x97, 0x01, 0x86, 0x29, 0x87, 0x01, 0x86, 0x39, 0x91,
+ 0x01, 0x86, 0x48, 0x83, 0x01, 0x86, 0x51, 0x8b, 0x01, 0x86, 0x59, 0x97,
+ 0x01, 0x86, 0x61, 0x87, 0x01, 0x86, 0x69, 0x91, 0x01, 0x86, 0x70, 0x83,
+ 0x01, 0x86, 0x79, 0x8b, 0x01, 0x86, 0x91, 0x97, 0x01, 0x86, 0xa9, 0x87,
+ 0x01, 0x86, 0xc1, 0x91, 0x01, 0x86, 0xd8, 0x83, 0x01, 0x86, 0x81, 0x8b,
+ 0x01, 0x86, 0x99, 0x97, 0x01, 0x86, 0xb1, 0x87, 0x01, 0x86, 0xc9, 0x91,
+ 0x01, 0x86, 0xe0, 0x83, 0x01, 0x86, 0x89, 0x8b, 0x01, 0x86, 0xa1, 0x97,
+ 0x01, 0x86, 0xb9, 0x87, 0x01, 0x86, 0xd1, 0x91, 0x01, 0x86, 0xe8, 0x83,
+ 0x01, 0x87, 0x21, 0x97, 0x01, 0x87, 0x31, 0x91, 0x01, 0x87, 0x40, 0x83,
+ 0x01, 0x87, 0x49, 0x8b, 0x01, 0x87, 0x51, 0x97, 0x01, 0x87, 0x59, 0x87,
+ 0x01, 0x87, 0x61, 0x91, 0x01, 0x87, 0x68, 0x83, 0x01, 0x87, 0x79, 0x8b,
+ 0x01, 0x87, 0x81, 0x87, 0x01, 0x87, 0x89, 0x91, 0x01, 0x87, 0x90, 0x97,
+ 0x01, 0x87, 0xa1, 0x83, 0x01, 0x87, 0xb9, 0x8b, 0x01, 0x87, 0xc1, 0x87,
+ 0x01, 0x87, 0xc9, 0x91, 0x01, 0x87, 0xd0, 0xc4, 0x1e, 0x97, 0x08, 0x85,
+ 0xc9, 0xc5, 0x40, 0xe7, 0x08, 0x84, 0x10, 0xc2, 0x00, 0xd0, 0x08, 0x84,
+ 0xd9, 0xc3, 0x40, 0xe2, 0x08, 0x84, 0xd1, 0x83, 0x08, 0x84, 0xc8, 0xc2,
+ 0x00, 0xd0, 0x08, 0x84, 0xc1, 0x83, 0x08, 0x84, 0xb8, 0xd2, 0x4a, 0x87,
+ 0x00, 0x64, 0x01, 0xc6, 0xc3, 0x62, 0x00, 0x64, 0x20, 0xc7, 0x14, 0x39,
+ 0x00, 0x64, 0x11, 0xc7, 0x7a, 0x7f, 0x00, 0x65, 0xe8, 0xc5, 0x40, 0xe7,
+ 0x00, 0x64, 0x19, 0xc4, 0x1e, 0x97, 0x00, 0x66, 0x68, 0x83, 0x00, 0x64,
+ 0x2b, 0x02, 0x82, 0xf4, 0x8b, 0x00, 0x64, 0x3b, 0x02, 0x83, 0x00, 0x97,
+ 0x00, 0x64, 0x4b, 0x02, 0x83, 0x04, 0x18, 0xc2, 0x83, 0x08, 0x87, 0x00,
+ 0x64, 0x73, 0x02, 0x83, 0x12, 0x91, 0x00, 0x64, 0x93, 0x02, 0x83, 0x16,
+ 0x0d, 0xc2, 0x83, 0x1a, 0x09, 0xc2, 0x83, 0x24, 0x10, 0xc2, 0x83, 0x2e,
+ 0x05, 0xc2, 0x83, 0x47, 0x0c, 0xc2, 0x83, 0x51, 0x16, 0xc2, 0x83, 0x5b,
+ 0x06, 0xc2, 0x83, 0x69, 0x12, 0xc2, 0x83, 0x77, 0x04, 0xc2, 0x83, 0x81,
+ 0xc2, 0x01, 0xc3, 0x00, 0x65, 0x71, 0xc2, 0x19, 0x2c, 0x00, 0x65, 0x79,
+ 0x14, 0xc2, 0x83, 0x8b, 0x0e, 0xc2, 0x83, 0x95, 0x15, 0xc2, 0x83, 0x9d,
+ 0xc2, 0x00, 0xd0, 0x00, 0x65, 0xc9, 0xc2, 0x00, 0x87, 0x00, 0x66, 0xf0,
+ 0x83, 0x00, 0x65, 0xf1, 0x8b, 0x00, 0x66, 0x41, 0x97, 0x00, 0x66, 0x60,
+ 0x8b, 0x00, 0x66, 0x00, 0x97, 0x00, 0x66, 0x10, 0x94, 0x00, 0x66, 0x1b,
+ 0x02, 0x83, 0xad, 0x8e, 0x00, 0x67, 0x12, 0x02, 0x83, 0xb1, 0x87, 0x00,
+ 0x66, 0x38, 0x91, 0x00, 0x66, 0x58, 0xc2, 0x02, 0xa0, 0x00, 0x67, 0x41,
+ 0xc4, 0x02, 0xde, 0x00, 0x67, 0x48, 0xc3, 0x09, 0x9e, 0x00, 0x67, 0x51,
+ 0xc3, 0x0d, 0x14, 0x00, 0x67, 0x58, 0xc2, 0x22, 0xcc, 0x00, 0x67, 0x61,
+ 0xc4, 0x18, 0x10, 0x00, 0x67, 0x68, 0xc2, 0x02, 0x6f, 0x01, 0x78, 0x03,
+ 0x02, 0x83, 0xb5, 0x12, 0xc2, 0x83, 0xbb, 0xc2, 0x18, 0xb3, 0x01, 0x7b,
+ 0xe0, 0x0b, 0xc2, 0x83, 0xc7, 0x07, 0xc2, 0x83, 0xd7, 0x03, 0xc2, 0x83,
+ 0xe7, 0xc3, 0x08, 0x48, 0x01, 0x7d, 0x3a, 0x02, 0x83, 0xf3, 0x11, 0xc2,
+ 0x83, 0xf9, 0x0b, 0xc2, 0x84, 0x1c, 0x14, 0xc2, 0x84, 0x2c, 0x07, 0x42,
+ 0x84, 0x3c, 0x0e, 0xc2, 0x84, 0x48, 0x07, 0xc2, 0x84, 0x52, 0x12, 0xc2,
+ 0x84, 0x68, 0x05, 0xc2, 0x84, 0x7e, 0xc4, 0x03, 0x14, 0x01, 0x79, 0x49,
+ 0x0a, 0xc2, 0x84, 0x8a, 0xc4, 0xb0, 0xd3, 0x01, 0x79, 0xc9, 0x16, 0xc2,
+ 0x84, 0x92, 0xc5, 0x0b, 0x0a, 0x01, 0x7a, 0x29, 0xc2, 0x05, 0x1d, 0x01,
+ 0x7a, 0x39, 0x03, 0xc2, 0x84, 0xa0, 0xc4, 0x49, 0x26, 0x01, 0x7b, 0x11,
+ 0x0b, 0xc2, 0x84, 0xb0, 0xc3, 0x56, 0x1d, 0x01, 0x7b, 0x51, 0xc4, 0x0d,
+ 0xed, 0x01, 0x7d, 0x98, 0x11, 0xc2, 0x84, 0xbc, 0xcf, 0x67, 0xec, 0x01,
+ 0x78, 0xb1, 0x07, 0xc2, 0x84, 0xc6, 0x03, 0x42, 0x84, 0xd0, 0xc2, 0x02,
+ 0xa0, 0x01, 0x78, 0x33, 0x02, 0x84, 0xe0, 0x03, 0xc2, 0x84, 0xe6, 0xc2,
+ 0x00, 0xc4, 0x01, 0x78, 0xb9, 0x42, 0x00, 0x33, 0xc2, 0x84, 0xf8, 0x14,
+ 0xc2, 0x85, 0x04, 0x0b, 0xc2, 0x85, 0x16, 0x11, 0x42, 0x85, 0x22, 0xc2,
+ 0x00, 0xd1, 0x01, 0x78, 0x41, 0x11, 0xc2, 0x85, 0x2e, 0x07, 0xc2, 0x85,
+ 0x3c, 0x0b, 0x42, 0x85, 0x48, 0x10, 0xc2, 0x85, 0x54, 0xc4, 0x00, 0x2d,
+ 0x01, 0x78, 0x59, 0x03, 0xc2, 0x85, 0x60, 0xc3, 0x18, 0x11, 0x01, 0x7e,
+ 0x8b, 0x02, 0x85, 0x6b, 0xc2, 0x0c, 0x43, 0x01, 0x7b, 0x61, 0xc9, 0xa9,
+ 0xf3, 0x01, 0x7e, 0x58, 0x11, 0xc2, 0x85, 0x71, 0x0e, 0xc2, 0x85, 0x8d,
+ 0xc4, 0xdf, 0xbb, 0x01, 0x79, 0x31, 0x03, 0xc2, 0x85, 0x9d, 0xc3, 0x25,
+ 0x4d, 0x01, 0x7d, 0x10, 0xc2, 0x00, 0x89, 0x01, 0x78, 0x71, 0x10, 0x42,
+ 0x85, 0xaf, 0xc4, 0x00, 0x27, 0x01, 0x78, 0x91, 0x14, 0xc2, 0x85, 0xbb,
+ 0xc3, 0x01, 0xc8, 0x01, 0x7b, 0xf1, 0xc2, 0x00, 0x2d, 0x01, 0x7c, 0xb8,
+ 0x14, 0xc2, 0x85, 0xc7, 0x11, 0xc2, 0x85, 0xd3, 0x07, 0xc2, 0x85, 0xdf,
+ 0x03, 0xc2, 0x85, 0xeb, 0x0a, 0xc2, 0x85, 0xfa, 0x42, 0x00, 0x74, 0x42,
+ 0x86, 0x06, 0x0b, 0xc2, 0x86, 0x0e, 0xc3, 0xbb, 0x1c, 0x01, 0x79, 0x39,
+ 0x03, 0xc2, 0x86, 0x20, 0xc2, 0x00, 0xa8, 0x01, 0x7c, 0xd1, 0xc2, 0x05,
+ 0x1d, 0x01, 0x7c, 0xd8, 0xc4, 0x46, 0xf6, 0x01, 0x78, 0xe1, 0xc2, 0x24,
+ 0xe2, 0x01, 0x7a, 0x21, 0x42, 0x01, 0xa3, 0xc2, 0x86, 0x2e, 0xc2, 0x02,
+ 0x35, 0x01, 0x7b, 0xe8, 0x91, 0x01, 0x79, 0x0b, 0x02, 0x86, 0x3a, 0x42,
+ 0x00, 0x39, 0xc2, 0x86, 0x46, 0xc3, 0x00, 0xfe, 0x01, 0x7d, 0x41, 0xc4,
+ 0xe0, 0x07, 0x01, 0x7e, 0x08, 0x0b, 0xc2, 0x86, 0x52, 0x11, 0xc2, 0x86,
+ 0x62, 0x14, 0xc2, 0x86, 0x7e, 0x03, 0xc2, 0x86, 0x90, 0x0e, 0xc2, 0x86,
+ 0x9c, 0xc3, 0x0e, 0x8b, 0x01, 0x7c, 0xb0, 0x11, 0xc2, 0x86, 0xae, 0xc2,
+ 0x00, 0x3d, 0x01, 0x7b, 0xc8, 0xc2, 0x00, 0x33, 0x01, 0x7a, 0x89, 0x0b,
+ 0xc2, 0x86, 0xb8, 0x03, 0xc2, 0x86, 0xd0, 0xc6, 0x14, 0xdb, 0x01, 0x7b,
+ 0xd9, 0xc3, 0x65, 0xba, 0x01, 0x7c, 0xe1, 0x0e, 0xc2, 0x86, 0xe2, 0x14,
+ 0x42, 0x86, 0xec, 0xc2, 0x00, 0x06, 0x01, 0x7a, 0xf9, 0x94, 0x01, 0x7b,
+ 0xc0, 0xc5, 0xd9, 0xf2, 0x01, 0x7c, 0xa9, 0xc6, 0xd0, 0xaf, 0x01, 0x7d,
+ 0x28, 0xa2, 0x0c, 0x66, 0xa9, 0xa1, 0x0c, 0x66, 0xa1, 0xa0, 0x0c, 0x66,
+ 0x99, 0x9f, 0x0c, 0x66, 0x91, 0x9e, 0x0c, 0x66, 0x89, 0x9d, 0x0c, 0x66,
+ 0x80, 0x88, 0x0c, 0x66, 0x79, 0x87, 0x0c, 0x66, 0x71, 0x86, 0x0c, 0x66,
+ 0x69, 0x85, 0x0c, 0x66, 0x61, 0x84, 0x0c, 0x66, 0x59, 0x83, 0x0c, 0x66,
+ 0x51, 0xa6, 0x0c, 0x66, 0x49, 0xa5, 0x0c, 0x66, 0x41, 0xa4, 0x0c, 0x66,
+ 0x39, 0xa3, 0x0c, 0x66, 0x31, 0xa2, 0x0c, 0x66, 0x29, 0xa1, 0x0c, 0x66,
+ 0x21, 0xa0, 0x0c, 0x66, 0x19, 0x9f, 0x0c, 0x66, 0x11, 0x9e, 0x0c, 0x66,
+ 0x09, 0x9d, 0x0c, 0x66, 0x00, 0x88, 0x0c, 0x65, 0xf9, 0x87, 0x0c, 0x65,
+ 0xf1, 0x86, 0x0c, 0x65, 0xe9, 0x85, 0x0c, 0x65, 0xe1, 0x84, 0x0c, 0x65,
+ 0xd9, 0x83, 0x0c, 0x65, 0xd1, 0xa6, 0x0c, 0x65, 0xc9, 0xa5, 0x0c, 0x65,
+ 0xc1, 0xa4, 0x0c, 0x65, 0xb9, 0xa3, 0x0c, 0x65, 0xb1, 0xa2, 0x0c, 0x65,
+ 0xa9, 0xa1, 0x0c, 0x65, 0xa1, 0xa0, 0x0c, 0x65, 0x99, 0x9f, 0x0c, 0x65,
+ 0x91, 0x9e, 0x0c, 0x65, 0x89, 0x9d, 0x0c, 0x65, 0x80, 0x88, 0x0c, 0x65,
+ 0x79, 0x87, 0x0c, 0x65, 0x71, 0x86, 0x0c, 0x65, 0x69, 0x85, 0x0c, 0x65,
+ 0x61, 0x84, 0x0c, 0x65, 0x59, 0x83, 0x0c, 0x65, 0x51, 0xa6, 0x0c, 0x65,
+ 0x49, 0xa5, 0x0c, 0x65, 0x41, 0xa4, 0x0c, 0x65, 0x39, 0xa3, 0x0c, 0x65,
+ 0x31, 0xa2, 0x0c, 0x65, 0x29, 0xa1, 0x0c, 0x65, 0x21, 0xa0, 0x0c, 0x65,
+ 0x19, 0x9f, 0x0c, 0x65, 0x11, 0x9e, 0x0c, 0x65, 0x09, 0x9d, 0x0c, 0x65,
+ 0x00, 0x88, 0x0c, 0x64, 0xf9, 0x87, 0x0c, 0x64, 0xf1, 0x86, 0x0c, 0x64,
+ 0xe9, 0x85, 0x0c, 0x64, 0xe1, 0x84, 0x0c, 0x64, 0xd9, 0x83, 0x0c, 0x64,
+ 0xd1, 0xa6, 0x0c, 0x64, 0xc9, 0xa5, 0x0c, 0x64, 0xc1, 0xa4, 0x0c, 0x64,
+ 0xb9, 0xa3, 0x0c, 0x64, 0xb1, 0xa2, 0x0c, 0x64, 0xa9, 0xa1, 0x0c, 0x64,
+ 0xa1, 0xa0, 0x0c, 0x64, 0x99, 0x9f, 0x0c, 0x64, 0x91, 0x9e, 0x0c, 0x64,
+ 0x89, 0x9d, 0x0c, 0x64, 0x80, 0x88, 0x0c, 0x64, 0x79, 0x87, 0x0c, 0x64,
+ 0x71, 0x86, 0x0c, 0x64, 0x69, 0x85, 0x0c, 0x64, 0x61, 0x84, 0x0c, 0x64,
+ 0x59, 0x83, 0x0c, 0x64, 0x51, 0xa6, 0x0c, 0x64, 0x49, 0xa5, 0x0c, 0x64,
+ 0x41, 0xa4, 0x0c, 0x64, 0x39, 0xa3, 0x0c, 0x64, 0x31, 0xa2, 0x0c, 0x64,
+ 0x29, 0xa1, 0x0c, 0x64, 0x21, 0xa0, 0x0c, 0x64, 0x19, 0x9f, 0x0c, 0x64,
+ 0x11, 0x9e, 0x0c, 0x64, 0x09, 0x9d, 0x0c, 0x64, 0x00, 0x88, 0x0c, 0x63,
+ 0xf9, 0x87, 0x0c, 0x63, 0xf1, 0x86, 0x0c, 0x63, 0xe9, 0x85, 0x0c, 0x63,
+ 0xe1, 0x84, 0x0c, 0x63, 0xd9, 0x83, 0x0c, 0x63, 0xd1, 0xa6, 0x0c, 0x63,
+ 0xc9, 0xa5, 0x0c, 0x63, 0xc1, 0xa4, 0x0c, 0x63, 0xb9, 0xa3, 0x0c, 0x63,
+ 0xb1, 0xa2, 0x0c, 0x63, 0xa9, 0xa1, 0x0c, 0x63, 0xa1, 0xa0, 0x0c, 0x63,
+ 0x99, 0x9f, 0x0c, 0x63, 0x91, 0x9e, 0x0c, 0x63, 0x89, 0x9d, 0x0c, 0x63,
+ 0x80, 0x88, 0x0c, 0x63, 0x79, 0x87, 0x0c, 0x63, 0x71, 0x86, 0x0c, 0x63,
+ 0x69, 0x85, 0x0c, 0x63, 0x61, 0x84, 0x0c, 0x63, 0x59, 0x83, 0x0c, 0x63,
+ 0x51, 0xa6, 0x0c, 0x63, 0x49, 0xa5, 0x0c, 0x63, 0x41, 0xa4, 0x0c, 0x63,
+ 0x39, 0xa3, 0x0c, 0x63, 0x31, 0xa2, 0x0c, 0x63, 0x29, 0xa1, 0x0c, 0x63,
+ 0x21, 0xa0, 0x0c, 0x63, 0x19, 0x9f, 0x0c, 0x63, 0x11, 0x9e, 0x0c, 0x63,
+ 0x09, 0x9d, 0x0c, 0x63, 0x00, 0x88, 0x0c, 0x62, 0xf9, 0x87, 0x0c, 0x62,
+ 0xf1, 0x86, 0x0c, 0x62, 0xe9, 0x85, 0x0c, 0x62, 0xe1, 0x84, 0x0c, 0x62,
+ 0xd9, 0x83, 0x0c, 0x62, 0xd1, 0xa6, 0x0c, 0x62, 0xc9, 0xa5, 0x0c, 0x62,
+ 0xc1, 0xa4, 0x0c, 0x62, 0xb9, 0xa3, 0x0c, 0x62, 0xb1, 0xa2, 0x0c, 0x62,
+ 0xa9, 0xa1, 0x0c, 0x62, 0xa1, 0xa0, 0x0c, 0x62, 0x99, 0x9f, 0x0c, 0x62,
+ 0x91, 0x9e, 0x0c, 0x62, 0x89, 0x9d, 0x0c, 0x62, 0x80, 0x88, 0x0c, 0x62,
+ 0x79, 0x87, 0x0c, 0x62, 0x71, 0x86, 0x0c, 0x62, 0x69, 0x85, 0x0c, 0x62,
+ 0x61, 0x84, 0x0c, 0x62, 0x59, 0x83, 0x0c, 0x62, 0x51, 0xa6, 0x0c, 0x62,
+ 0x49, 0xa5, 0x0c, 0x62, 0x41, 0xa4, 0x0c, 0x62, 0x39, 0xa3, 0x0c, 0x62,
+ 0x31, 0xa2, 0x0c, 0x62, 0x29, 0xa1, 0x0c, 0x62, 0x21, 0xa0, 0x0c, 0x62,
+ 0x19, 0x9f, 0x0c, 0x62, 0x11, 0x9e, 0x0c, 0x62, 0x09, 0x9d, 0x0c, 0x62,
+ 0x00, 0x88, 0x0c, 0x61, 0xf9, 0x87, 0x0c, 0x61, 0xf1, 0x86, 0x0c, 0x61,
+ 0xe9, 0x85, 0x0c, 0x61, 0xe1, 0x84, 0x0c, 0x61, 0xd9, 0x83, 0x0c, 0x61,
+ 0xd1, 0xa6, 0x0c, 0x61, 0xc9, 0xa5, 0x0c, 0x61, 0xc1, 0xa4, 0x0c, 0x61,
+ 0xb9, 0xa3, 0x0c, 0x61, 0xb1, 0xa2, 0x0c, 0x61, 0xa9, 0xa1, 0x0c, 0x61,
+ 0xa1, 0xa0, 0x0c, 0x61, 0x99, 0x9f, 0x0c, 0x61, 0x91, 0x9e, 0x0c, 0x61,
+ 0x89, 0x9d, 0x0c, 0x61, 0x80, 0x88, 0x0c, 0x61, 0x79, 0x87, 0x0c, 0x61,
+ 0x71, 0x86, 0x0c, 0x61, 0x69, 0x85, 0x0c, 0x61, 0x61, 0x84, 0x0c, 0x61,
+ 0x59, 0x83, 0x0c, 0x61, 0x51, 0xa6, 0x0c, 0x61, 0x49, 0xa5, 0x0c, 0x61,
+ 0x41, 0xa4, 0x0c, 0x61, 0x39, 0xa3, 0x0c, 0x61, 0x31, 0xa2, 0x0c, 0x61,
+ 0x29, 0xa1, 0x0c, 0x61, 0x21, 0xa0, 0x0c, 0x61, 0x19, 0x9f, 0x0c, 0x61,
+ 0x11, 0x9e, 0x0c, 0x61, 0x09, 0x9d, 0x0c, 0x61, 0x00, 0x88, 0x0c, 0x60,
+ 0xf9, 0x87, 0x0c, 0x60, 0xf1, 0x86, 0x0c, 0x60, 0xe9, 0x85, 0x0c, 0x60,
+ 0xe1, 0x84, 0x0c, 0x60, 0xd9, 0x83, 0x0c, 0x60, 0xd1, 0xa6, 0x0c, 0x60,
+ 0xc9, 0xa5, 0x0c, 0x60, 0xc1, 0xa4, 0x0c, 0x60, 0xb9, 0xa3, 0x0c, 0x60,
+ 0xb1, 0xa2, 0x0c, 0x60, 0xa9, 0xa1, 0x0c, 0x60, 0xa1, 0xa0, 0x0c, 0x60,
+ 0x99, 0x9f, 0x0c, 0x60, 0x91, 0x9e, 0x0c, 0x60, 0x89, 0x9d, 0x0c, 0x60,
+ 0x80, 0x88, 0x0c, 0x60, 0x79, 0x87, 0x0c, 0x60, 0x71, 0x86, 0x0c, 0x60,
+ 0x69, 0x85, 0x0c, 0x60, 0x61, 0x84, 0x0c, 0x60, 0x59, 0x83, 0x0c, 0x60,
+ 0x51, 0xa6, 0x0c, 0x60, 0x49, 0xa5, 0x0c, 0x60, 0x41, 0xa4, 0x0c, 0x60,
+ 0x39, 0xa3, 0x0c, 0x60, 0x31, 0xa2, 0x0c, 0x60, 0x29, 0xa1, 0x0c, 0x60,
+ 0x21, 0xa0, 0x0c, 0x60, 0x19, 0x9f, 0x0c, 0x60, 0x11, 0x9e, 0x0c, 0x60,
+ 0x09, 0x9d, 0x0c, 0x60, 0x00, 0x88, 0x0c, 0x5f, 0xf9, 0x87, 0x0c, 0x5f,
+ 0xf1, 0x86, 0x0c, 0x5f, 0xe9, 0x85, 0x0c, 0x5f, 0xe1, 0x84, 0x0c, 0x5f,
+ 0xd9, 0x83, 0x0c, 0x5f, 0xd1, 0xa6, 0x0c, 0x5f, 0xc9, 0xa5, 0x0c, 0x5f,
+ 0xc1, 0xa4, 0x0c, 0x5f, 0xb9, 0xa3, 0x0c, 0x5f, 0xb1, 0xa2, 0x0c, 0x5f,
+ 0xa9, 0xa1, 0x0c, 0x5f, 0xa1, 0xa0, 0x0c, 0x5f, 0x99, 0x9f, 0x0c, 0x5f,
+ 0x91, 0x9e, 0x0c, 0x5f, 0x89, 0x9d, 0x0c, 0x5f, 0x80, 0x88, 0x0c, 0x5f,
+ 0x79, 0x87, 0x0c, 0x5f, 0x71, 0x86, 0x0c, 0x5f, 0x69, 0x85, 0x0c, 0x5f,
+ 0x61, 0x84, 0x0c, 0x5f, 0x59, 0x83, 0x0c, 0x5f, 0x51, 0xa6, 0x0c, 0x5f,
+ 0x49, 0xa5, 0x0c, 0x5f, 0x41, 0xa4, 0x0c, 0x5f, 0x39, 0xa3, 0x0c, 0x5f,
+ 0x31, 0xa2, 0x0c, 0x5f, 0x29, 0xa1, 0x0c, 0x5f, 0x21, 0xa0, 0x0c, 0x5f,
+ 0x19, 0x9f, 0x0c, 0x5f, 0x11, 0x9e, 0x0c, 0x5f, 0x09, 0x9d, 0x0c, 0x5f,
+ 0x00, 0x88, 0x0c, 0x5e, 0xf9, 0x87, 0x0c, 0x5e, 0xf1, 0x86, 0x0c, 0x5e,
+ 0xe9, 0x85, 0x0c, 0x5e, 0xe1, 0x84, 0x0c, 0x5e, 0xd9, 0x83, 0x0c, 0x5e,
+ 0xd1, 0xa6, 0x0c, 0x5e, 0xc9, 0xa5, 0x0c, 0x5e, 0xc1, 0xa4, 0x0c, 0x5e,
+ 0xb9, 0xa3, 0x0c, 0x5e, 0xb1, 0xa2, 0x0c, 0x5e, 0xa9, 0xa1, 0x0c, 0x5e,
+ 0xa1, 0xa0, 0x0c, 0x5e, 0x99, 0x9f, 0x0c, 0x5e, 0x91, 0x9e, 0x0c, 0x5e,
+ 0x89, 0x9d, 0x0c, 0x5e, 0x80, 0x88, 0x0c, 0x5e, 0x79, 0x87, 0x0c, 0x5e,
+ 0x71, 0x86, 0x0c, 0x5e, 0x69, 0x85, 0x0c, 0x5e, 0x61, 0x84, 0x0c, 0x5e,
+ 0x59, 0x83, 0x0c, 0x5e, 0x51, 0xa6, 0x0c, 0x5e, 0x49, 0xa5, 0x0c, 0x5e,
+ 0x41, 0xa4, 0x0c, 0x5e, 0x39, 0xa3, 0x0c, 0x5e, 0x31, 0xa2, 0x0c, 0x5e,
+ 0x29, 0xa1, 0x0c, 0x5e, 0x21, 0xa0, 0x0c, 0x5e, 0x19, 0x9f, 0x0c, 0x5e,
+ 0x11, 0x9e, 0x0c, 0x5e, 0x09, 0x9d, 0x0c, 0x5e, 0x00, 0x88, 0x0c, 0x5d,
+ 0xf9, 0x87, 0x0c, 0x5d, 0xf1, 0x86, 0x0c, 0x5d, 0xe9, 0x85, 0x0c, 0x5d,
+ 0xe1, 0x84, 0x0c, 0x5d, 0xd9, 0x83, 0x0c, 0x5d, 0xd1, 0xa6, 0x0c, 0x5d,
+ 0xc9, 0xa5, 0x0c, 0x5d, 0xc1, 0xa4, 0x0c, 0x5d, 0xb9, 0xa3, 0x0c, 0x5d,
+ 0xb1, 0xa2, 0x0c, 0x5d, 0xa9, 0xa1, 0x0c, 0x5d, 0xa1, 0xa0, 0x0c, 0x5d,
+ 0x99, 0x9f, 0x0c, 0x5d, 0x91, 0x9e, 0x0c, 0x5d, 0x89, 0x9d, 0x0c, 0x5d,
+ 0x80, 0x88, 0x0c, 0x5d, 0x79, 0x87, 0x0c, 0x5d, 0x71, 0x86, 0x0c, 0x5d,
+ 0x69, 0x85, 0x0c, 0x5d, 0x61, 0x84, 0x0c, 0x5d, 0x59, 0x83, 0x0c, 0x5d,
+ 0x51, 0xa6, 0x0c, 0x5d, 0x49, 0xa5, 0x0c, 0x5d, 0x41, 0xa4, 0x0c, 0x5d,
+ 0x39, 0xa3, 0x0c, 0x5d, 0x31, 0xa2, 0x0c, 0x5d, 0x29, 0xa1, 0x0c, 0x5d,
+ 0x21, 0xa0, 0x0c, 0x5d, 0x19, 0x9f, 0x0c, 0x5d, 0x11, 0x9e, 0x0c, 0x5d,
+ 0x09, 0x9d, 0x0c, 0x5d, 0x00, 0x88, 0x0c, 0x5c, 0xf9, 0x87, 0x0c, 0x5c,
+ 0xf1, 0x86, 0x0c, 0x5c, 0xe9, 0x85, 0x0c, 0x5c, 0xe1, 0x84, 0x0c, 0x5c,
+ 0xd9, 0x83, 0x0c, 0x5c, 0xd1, 0xa6, 0x0c, 0x5c, 0xc9, 0xa5, 0x0c, 0x5c,
+ 0xc1, 0xa4, 0x0c, 0x5c, 0xb9, 0xa3, 0x0c, 0x5c, 0xb1, 0xa2, 0x0c, 0x5c,
+ 0xa9, 0xa1, 0x0c, 0x5c, 0xa1, 0xa0, 0x0c, 0x5c, 0x99, 0x9f, 0x0c, 0x5c,
+ 0x91, 0x9e, 0x0c, 0x5c, 0x89, 0x9d, 0x0c, 0x5c, 0x80, 0x88, 0x0c, 0x5c,
+ 0x79, 0x87, 0x0c, 0x5c, 0x71, 0x86, 0x0c, 0x5c, 0x69, 0x85, 0x0c, 0x5c,
+ 0x61, 0x84, 0x0c, 0x5c, 0x59, 0x83, 0x0c, 0x5c, 0x51, 0xa6, 0x0c, 0x5c,
+ 0x49, 0xa5, 0x0c, 0x5c, 0x41, 0xa4, 0x0c, 0x5c, 0x39, 0xa3, 0x0c, 0x5c,
+ 0x31, 0xa2, 0x0c, 0x5c, 0x29, 0xa1, 0x0c, 0x5c, 0x21, 0xa0, 0x0c, 0x5c,
+ 0x19, 0x9f, 0x0c, 0x5c, 0x11, 0x9e, 0x0c, 0x5c, 0x09, 0x9d, 0x0c, 0x5c,
+ 0x00, 0x88, 0x0c, 0x5b, 0xf9, 0x87, 0x0c, 0x5b, 0xf1, 0x86, 0x0c, 0x5b,
+ 0xe9, 0x85, 0x0c, 0x5b, 0xe1, 0x84, 0x0c, 0x5b, 0xd9, 0x83, 0x0c, 0x5b,
+ 0xd1, 0xa6, 0x0c, 0x5b, 0xc9, 0xa5, 0x0c, 0x5b, 0xc1, 0xa4, 0x0c, 0x5b,
+ 0xb9, 0xa3, 0x0c, 0x5b, 0xb1, 0xa2, 0x0c, 0x5b, 0xa9, 0xa1, 0x0c, 0x5b,
+ 0xa1, 0xa0, 0x0c, 0x5b, 0x99, 0x9f, 0x0c, 0x5b, 0x91, 0x9e, 0x0c, 0x5b,
+ 0x89, 0x9d, 0x0c, 0x5b, 0x80, 0x88, 0x0c, 0x5b, 0x79, 0x87, 0x0c, 0x5b,
+ 0x71, 0x86, 0x0c, 0x5b, 0x69, 0x85, 0x0c, 0x5b, 0x61, 0x84, 0x0c, 0x5b,
+ 0x59, 0x83, 0x0c, 0x5b, 0x51, 0xa6, 0x0c, 0x5b, 0x49, 0xa5, 0x0c, 0x5b,
+ 0x41, 0xa4, 0x0c, 0x5b, 0x39, 0xa3, 0x0c, 0x5b, 0x31, 0xa2, 0x0c, 0x5b,
+ 0x29, 0xa1, 0x0c, 0x5b, 0x21, 0xa0, 0x0c, 0x5b, 0x19, 0x9f, 0x0c, 0x5b,
+ 0x11, 0x9e, 0x0c, 0x5b, 0x09, 0x9d, 0x0c, 0x5b, 0x00, 0x88, 0x0c, 0x5a,
+ 0xf9, 0x87, 0x0c, 0x5a, 0xf1, 0x86, 0x0c, 0x5a, 0xe9, 0x85, 0x0c, 0x5a,
+ 0xe1, 0x84, 0x0c, 0x5a, 0xd9, 0x83, 0x0c, 0x5a, 0xd1, 0xa6, 0x0c, 0x5a,
+ 0xc9, 0xa5, 0x0c, 0x5a, 0xc1, 0xa4, 0x0c, 0x5a, 0xb9, 0xa3, 0x0c, 0x5a,
+ 0xb1, 0xa2, 0x0c, 0x5a, 0xa9, 0xa1, 0x0c, 0x5a, 0xa1, 0xa0, 0x0c, 0x5a,
+ 0x99, 0x9f, 0x0c, 0x5a, 0x91, 0x9e, 0x0c, 0x5a, 0x89, 0x9d, 0x0c, 0x5a,
+ 0x80, 0x88, 0x0c, 0x5a, 0x79, 0x87, 0x0c, 0x5a, 0x71, 0x86, 0x0c, 0x5a,
+ 0x69, 0x85, 0x0c, 0x5a, 0x61, 0x84, 0x0c, 0x5a, 0x59, 0x83, 0x0c, 0x5a,
+ 0x51, 0xa6, 0x0c, 0x5a, 0x49, 0xa5, 0x0c, 0x5a, 0x41, 0xa4, 0x0c, 0x5a,
+ 0x39, 0xa3, 0x0c, 0x5a, 0x31, 0xa2, 0x0c, 0x5a, 0x29, 0xa1, 0x0c, 0x5a,
+ 0x21, 0xa0, 0x0c, 0x5a, 0x19, 0x9f, 0x0c, 0x5a, 0x11, 0x9e, 0x0c, 0x5a,
+ 0x09, 0x9d, 0x0c, 0x5a, 0x00, 0x88, 0x0c, 0x59, 0xf9, 0x87, 0x0c, 0x59,
+ 0xf1, 0x86, 0x0c, 0x59, 0xe9, 0x85, 0x0c, 0x59, 0xe1, 0x84, 0x0c, 0x59,
+ 0xd9, 0x83, 0x0c, 0x59, 0xd1, 0xa6, 0x0c, 0x59, 0xc9, 0xa5, 0x0c, 0x59,
+ 0xc1, 0xa4, 0x0c, 0x59, 0xb9, 0xa3, 0x0c, 0x59, 0xb1, 0xa2, 0x0c, 0x59,
+ 0xa9, 0xa1, 0x0c, 0x59, 0xa1, 0xa0, 0x0c, 0x59, 0x99, 0x9f, 0x0c, 0x59,
+ 0x91, 0x9e, 0x0c, 0x59, 0x89, 0x9d, 0x0c, 0x59, 0x80, 0x88, 0x0c, 0x59,
+ 0x79, 0x87, 0x0c, 0x59, 0x71, 0x86, 0x0c, 0x59, 0x69, 0x85, 0x0c, 0x59,
+ 0x61, 0x84, 0x0c, 0x59, 0x59, 0x83, 0x0c, 0x59, 0x51, 0xa6, 0x0c, 0x59,
+ 0x49, 0xa5, 0x0c, 0x59, 0x41, 0xa4, 0x0c, 0x59, 0x39, 0xa3, 0x0c, 0x59,
+ 0x31, 0xa2, 0x0c, 0x59, 0x29, 0xa1, 0x0c, 0x59, 0x21, 0xa0, 0x0c, 0x59,
+ 0x19, 0x9f, 0x0c, 0x59, 0x11, 0x9e, 0x0c, 0x59, 0x09, 0x9d, 0x0c, 0x59,
+ 0x00, 0x88, 0x0c, 0x58, 0xf9, 0x87, 0x0c, 0x58, 0xf1, 0x86, 0x0c, 0x58,
+ 0xe9, 0x85, 0x0c, 0x58, 0xe1, 0x84, 0x0c, 0x58, 0xd9, 0x83, 0x0c, 0x58,
+ 0xd1, 0xa6, 0x0c, 0x58, 0xc9, 0xa5, 0x0c, 0x58, 0xc1, 0xa4, 0x0c, 0x58,
+ 0xb9, 0xa3, 0x0c, 0x58, 0xb1, 0xa2, 0x0c, 0x58, 0xa9, 0xa1, 0x0c, 0x58,
+ 0xa1, 0xa0, 0x0c, 0x58, 0x99, 0x9f, 0x0c, 0x58, 0x91, 0x9e, 0x0c, 0x58,
+ 0x89, 0x9d, 0x0c, 0x58, 0x80, 0x88, 0x0c, 0x58, 0x79, 0x87, 0x0c, 0x58,
+ 0x71, 0x86, 0x0c, 0x58, 0x69, 0x85, 0x0c, 0x58, 0x61, 0x84, 0x0c, 0x58,
+ 0x59, 0x83, 0x0c, 0x58, 0x51, 0xa6, 0x0c, 0x58, 0x49, 0xa5, 0x0c, 0x58,
+ 0x41, 0xa4, 0x0c, 0x58, 0x39, 0xa3, 0x0c, 0x58, 0x31, 0xa2, 0x0c, 0x58,
+ 0x29, 0xa1, 0x0c, 0x58, 0x21, 0xa0, 0x0c, 0x58, 0x19, 0x9f, 0x0c, 0x58,
+ 0x11, 0x9e, 0x0c, 0x58, 0x09, 0x9d, 0x0c, 0x58, 0x00, 0xc2, 0x00, 0xd0,
+ 0x08, 0x96, 0x59, 0xc2, 0x0e, 0x9a, 0x08, 0x96, 0x49, 0x83, 0x08, 0x96,
+ 0x40, 0xc2, 0x00, 0xd0, 0x08, 0x96, 0x39, 0x83, 0x08, 0x96, 0x30, 0xc4,
+ 0xdb, 0xfb, 0x08, 0x91, 0xf1, 0xc5, 0xd7, 0x18, 0x08, 0x91, 0xb8, 0xc2,
+ 0x0e, 0x9a, 0x08, 0x90, 0xe1, 0xc2, 0x00, 0xd0, 0x08, 0x90, 0xb9, 0x83,
+ 0x08, 0x90, 0xb0, 0xc2, 0x00, 0xd0, 0x08, 0x90, 0xa9, 0x83, 0x08, 0x90,
+ 0xa0, 0x02, 0xc2, 0x86, 0xf8, 0x00, 0x42, 0x87, 0x06, 0x43, 0x13, 0x3a,
+ 0xc2, 0x87, 0x12, 0x43, 0x71, 0xed, 0xc2, 0x87, 0x1a, 0xc9, 0xb0, 0xc5,
+ 0x00, 0xcf, 0x00, 0x44, 0xdf, 0x37, 0xc2, 0x87, 0x26, 0x43, 0x93, 0x74,
+ 0x42, 0x87, 0x32, 0xc3, 0x38, 0x5b, 0x00, 0xcf, 0x89, 0xc4, 0xe0, 0xaf,
+ 0x00, 0xcf, 0x08, 0x12, 0xc2, 0x87, 0x3e, 0x04, 0xc2, 0x87, 0x4d, 0xc4,
+ 0xda, 0x97, 0x00, 0xbf, 0x89, 0xc3, 0x18, 0x91, 0x00, 0xbf, 0x80, 0xc7,
+ 0xc6, 0x24, 0x00, 0xbe, 0xe9, 0xcc, 0x89, 0x31, 0x00, 0xbe, 0xe1, 0xc4,
+ 0xe0, 0x0b, 0x00, 0xbe, 0x78, 0xc6, 0xcd, 0xb5, 0x00, 0xbe, 0xd1, 0xc3,
+ 0x00, 0xd0, 0x00, 0xbe, 0xa1, 0xc6, 0xcd, 0x97, 0x00, 0xbe, 0x70, 0xc5,
+ 0xdc, 0x22, 0x00, 0xbe, 0xc1, 0x03, 0x42, 0x87, 0x59, 0xce, 0x71, 0xe6,
+ 0x00, 0xbe, 0xb1, 0xc4, 0xe4, 0x1f, 0x00, 0xbe, 0x90, 0xca, 0xa3, 0x50,
+ 0x00, 0xbe, 0x69, 0xc6, 0xcc, 0xdd, 0x00, 0xbe, 0x50, 0xc4, 0xe4, 0x17,
+ 0x00, 0xbe, 0x61, 0xc6, 0xd3, 0xd9, 0x00, 0xbe, 0x38, 0x97, 0x00, 0xbe,
+ 0x29, 0x8b, 0x00, 0xbe, 0x19, 0x87, 0x00, 0xbe, 0x11, 0x83, 0x00, 0xbd,
+ 0xb0, 0x91, 0x00, 0xbe, 0x21, 0x87, 0x00, 0xbd, 0xf0, 0x87, 0x00, 0xbe,
+ 0x01, 0x8b, 0x00, 0xbd, 0xc0, 0x83, 0x00, 0xbd, 0xf9, 0x9b, 0x00, 0xbd,
+ 0xd0, 0x83, 0x00, 0xbd, 0xe9, 0x97, 0x00, 0xbd, 0xe0, 0x97, 0x00, 0xbd,
+ 0x99, 0x8b, 0x00, 0xbd, 0x81, 0x83, 0x00, 0xbd, 0x21, 0x93, 0x00, 0xbd,
+ 0x18, 0xc3, 0x02, 0x9f, 0x00, 0xbd, 0x91, 0xc3, 0x05, 0x14, 0x00, 0xbd,
+ 0x88, 0x97, 0x00, 0xbd, 0x4b, 0x02, 0x87, 0x6b, 0x8d, 0x00, 0xbd, 0x40,
+ 0x8b, 0x00, 0xbd, 0x30, 0x91, 0x00, 0xbc, 0xb9, 0x83, 0x00, 0xbc, 0xa8,
+ 0x91, 0x00, 0xbc, 0x91, 0x83, 0x00, 0xbc, 0x80, 0x91, 0x00, 0xbc, 0x69,
+ 0x83, 0x00, 0xbc, 0x58, 0x91, 0x00, 0xbc, 0x41, 0x83, 0x00, 0xbc, 0x30,
+ 0x91, 0x00, 0xbc, 0x19, 0x83, 0x00, 0xbc, 0x08, 0xca, 0x97, 0xf6, 0x08,
+ 0x52, 0xb9, 0x96, 0x08, 0x52, 0x80, 0x91, 0x08, 0x50, 0x31, 0x87, 0x08,
+ 0x50, 0x29, 0xc9, 0xb2, 0x2d, 0x08, 0x50, 0x19, 0x97, 0x08, 0x50, 0x11,
+ 0x8b, 0x08, 0x50, 0x08, 0x16, 0xc2, 0x87, 0x6f, 0xc2, 0x00, 0xd0, 0x08,
+ 0x50, 0xd9, 0x83, 0x08, 0x50, 0xd0, 0xc2, 0x00, 0xd0, 0x08, 0x50, 0xe9,
+ 0x83, 0x08, 0x50, 0xe0, 0xcb, 0x20, 0x9d, 0x0f, 0xb0, 0xd1, 0xcc, 0x1d,
+ 0x4a, 0x0f, 0xb0, 0xc8, 0xd7, 0x2a, 0xf5, 0x0f, 0xd2, 0x68, 0x49, 0x2a,
+ 0xf5, 0x42, 0x87, 0x79, 0xc3, 0x00, 0x74, 0x0f, 0xd0, 0x03, 0x02, 0x87,
+ 0x85, 0xc5, 0x56, 0xa5, 0x0f, 0xd0, 0x22, 0x02, 0x87, 0x8b, 0x49, 0x2a,
+ 0xf5, 0x42, 0x87, 0x91, 0x49, 0x2a, 0xf5, 0x42, 0x87, 0x9d, 0x49, 0x2a,
+ 0xf5, 0x42, 0x87, 0xa9, 0x0d, 0xc2, 0x87, 0xb5, 0xc5, 0xa8, 0xf7, 0x0f,
+ 0xd1, 0x59, 0xc4, 0xde, 0x83, 0x0f, 0xd1, 0x61, 0xc6, 0xca, 0xfd, 0x0f,
+ 0xd1, 0x69, 0xc4, 0xe3, 0x93, 0x0f, 0xd1, 0x78, 0x43, 0x00, 0xbc, 0xc2,
+ 0x87, 0xc1, 0xc4, 0xe3, 0x5b, 0x08, 0xa2, 0x50, 0xcd, 0x80, 0x36, 0x08,
+ 0xa2, 0xf9, 0x47, 0xb2, 0x2e, 0x42, 0x87, 0xe9, 0x83, 0x08, 0xa1, 0x99,
+ 0xc2, 0x00, 0xd0, 0x08, 0xa1, 0x89, 0xc2, 0x0d, 0xf6, 0x08, 0xa1, 0x90,
+ 0x83, 0x08, 0xa1, 0x19, 0xc2, 0x00, 0xc1, 0x08, 0xa0, 0xf1, 0x1b, 0xc2,
+ 0x87, 0xf7, 0x09, 0xc2, 0x88, 0x01, 0xc2, 0x00, 0xd0, 0x08, 0xa1, 0x20,
+ 0xc2, 0x00, 0xd0, 0x08, 0xa1, 0x11, 0x83, 0x08, 0xa1, 0x09, 0x06, 0x42,
+ 0x88, 0x0b, 0xc2, 0x00, 0xd0, 0x08, 0xa1, 0x01, 0x83, 0x08, 0xa0, 0xf9,
+ 0x16, 0x42, 0x88, 0x15, 0xc2, 0x00, 0xd0, 0x08, 0xa0, 0xb9, 0x83, 0x08,
+ 0xa0, 0xb0, 0xc2, 0x00, 0xd0, 0x08, 0xa0, 0xa9, 0x83, 0x08, 0xa0, 0xa0,
+ 0xc2, 0x00, 0xd0, 0x08, 0xa0, 0x89, 0x83, 0x08, 0xa0, 0x80, 0xc2, 0x00,
+ 0xd0, 0x08, 0xa0, 0x79, 0x83, 0x08, 0xa0, 0x70, 0x97, 0x08, 0xa0, 0x69,
+ 0x8b, 0x08, 0xa0, 0x59, 0x83, 0x08, 0xa0, 0x08, 0x97, 0x08, 0xa0, 0x28,
+ 0x8b, 0x08, 0xa0, 0x18, 0x83, 0x08, 0xa1, 0x29, 0xc2, 0x00, 0xd0, 0x08,
+ 0xa1, 0x30, 0x83, 0x08, 0xa1, 0x39, 0xc2, 0x00, 0xd0, 0x08, 0xa1, 0x40,
+ 0x83, 0x08, 0xa1, 0x49, 0xc2, 0x00, 0xd0, 0x08, 0xa1, 0x50, 0x83, 0x08,
+ 0xa1, 0x61, 0xc2, 0x00, 0xd0, 0x08, 0xa1, 0x68, 0x83, 0x08, 0xa1, 0x71,
+ 0xc2, 0x00, 0xd0, 0x08, 0xa1, 0x78, 0xc5, 0x0a, 0x8a, 0x08, 0xa2, 0xd1,
+ 0xc5, 0x86, 0x20, 0x08, 0xa2, 0x60, 0xc4, 0x26, 0x78, 0x08, 0xa2, 0xc9,
+ 0xc5, 0x06, 0xdb, 0x08, 0xa2, 0xc1, 0x15, 0xc2, 0x88, 0x1f, 0x08, 0xc2,
+ 0x88, 0x2b, 0x16, 0xc2, 0x88, 0x37, 0xc3, 0x05, 0x14, 0x08, 0xa2, 0x89,
+ 0xc4, 0x15, 0xe7, 0x08, 0xa2, 0x80, 0x97, 0x08, 0xa2, 0x09, 0x8b, 0x08,
+ 0xa1, 0xf9, 0x83, 0x08, 0xa1, 0xa8, 0x8e, 0x08, 0xa1, 0xe3, 0x02, 0x88,
+ 0x43, 0x94, 0x08, 0xa1, 0xd2, 0x02, 0x88, 0x47, 0x97, 0x08, 0xa1, 0xc8,
+ 0x8b, 0x08, 0xa1, 0xb8, 0x98, 0x00, 0xce, 0xf8, 0xcd, 0x78, 0xf3, 0x00,
+ 0xce, 0xd1, 0x49, 0xac, 0xb1, 0x42, 0x88, 0x4b, 0xc4, 0x26, 0x78, 0x00,
+ 0xce, 0xc9, 0xc5, 0x06, 0xdb, 0x00, 0xce, 0xc1, 0x15, 0xc2, 0x88, 0x53,
+ 0x08, 0xc2, 0x88, 0x5f, 0x16, 0xc2, 0x88, 0x6b, 0xc3, 0x05, 0x14, 0x00,
+ 0xce, 0x89, 0xc4, 0x15, 0xe7, 0x00, 0xce, 0x80, 0x46, 0x26, 0xf7, 0xc2,
+ 0x88, 0x77, 0x44, 0x05, 0x36, 0xc2, 0x88, 0x92, 0x45, 0x08, 0xcb, 0x42,
+ 0x88, 0xe0, 0x0b, 0xc2, 0x89, 0x2e, 0x97, 0x00, 0xcd, 0x9b, 0x02, 0x89,
+ 0x36, 0x91, 0x00, 0xcd, 0xbb, 0x02, 0x89, 0x45, 0x03, 0xc2, 0x89, 0x50,
+ 0x87, 0x00, 0xcd, 0xa9, 0xcf, 0x6a, 0x35, 0x00, 0xcd, 0x80, 0x9c, 0x0f,
+ 0x8c, 0x49, 0x9b, 0x0f, 0x8c, 0x41, 0x9a, 0x0f, 0x8c, 0x39, 0x99, 0x0f,
+ 0x8c, 0x31, 0x98, 0x0f, 0x8c, 0x29, 0x97, 0x0f, 0x8c, 0x21, 0x96, 0x0f,
+ 0x8c, 0x19, 0x95, 0x0f, 0x8c, 0x11, 0x94, 0x0f, 0x8c, 0x09, 0x93, 0x0f,
+ 0x8c, 0x01, 0x92, 0x0f, 0x8b, 0xf9, 0x91, 0x0f, 0x8b, 0xf1, 0x90, 0x0f,
+ 0x8b, 0xe9, 0x8f, 0x0f, 0x8b, 0xe1, 0x8e, 0x0f, 0x8b, 0xd9, 0x8d, 0x0f,
+ 0x8b, 0xd1, 0x8c, 0x0f, 0x8b, 0xc9, 0x8b, 0x0f, 0x8b, 0xc1, 0x8a, 0x0f,
+ 0x8b, 0xb9, 0x89, 0x0f, 0x8b, 0xb1, 0x88, 0x0f, 0x8b, 0xa9, 0x87, 0x0f,
+ 0x8b, 0xa1, 0x86, 0x0f, 0x8b, 0x99, 0x85, 0x0f, 0x8b, 0x91, 0x84, 0x0f,
+ 0x8b, 0x89, 0x83, 0x0f, 0x8b, 0x80, 0x16, 0xc2, 0x89, 0x5f, 0xc8, 0x4b,
+ 0x5f, 0x01, 0x27, 0x99, 0x07, 0xc2, 0x89, 0x6b, 0x15, 0xc2, 0x89, 0x77,
+ 0x08, 0x42, 0x89, 0x83, 0x9c, 0x0f, 0x8b, 0x49, 0x9b, 0x0f, 0x8b, 0x41,
+ 0x9a, 0x0f, 0x8b, 0x39, 0x99, 0x0f, 0x8b, 0x31, 0x98, 0x0f, 0x8b, 0x29,
+ 0x97, 0x0f, 0x8b, 0x21, 0x96, 0x0f, 0x8b, 0x19, 0x95, 0x0f, 0x8b, 0x11,
+ 0x94, 0x0f, 0x8b, 0x09, 0x93, 0x0f, 0x8b, 0x01, 0x92, 0x0f, 0x8a, 0xf9,
+ 0x91, 0x0f, 0x8a, 0xf1, 0x90, 0x0f, 0x8a, 0xe9, 0x8f, 0x0f, 0x8a, 0xe1,
+ 0x8e, 0x0f, 0x8a, 0xd9, 0x8d, 0x0f, 0x8a, 0xd1, 0x8c, 0x0f, 0x8a, 0xc9,
+ 0x8b, 0x0f, 0x8a, 0xc1, 0x8a, 0x0f, 0x8a, 0xb9, 0x89, 0x0f, 0x8a, 0xb1,
+ 0x88, 0x0f, 0x8a, 0xa9, 0x87, 0x0f, 0x8a, 0xa1, 0x86, 0x0f, 0x8a, 0x99,
+ 0x85, 0x0f, 0x8a, 0x91, 0x84, 0x0f, 0x8a, 0x89, 0x83, 0x0f, 0x8a, 0x80,
+ 0x97, 0x08, 0xce, 0xe9, 0x8b, 0x08, 0xce, 0xd9, 0x83, 0x08, 0xce, 0x88,
+ 0x94, 0x08, 0xce, 0xb8, 0x97, 0x08, 0xce, 0xa8, 0x8b, 0x08, 0xce, 0x98,
+ 0xc7, 0x7a, 0x7f, 0x08, 0xcf, 0x09, 0xc7, 0x14, 0x39, 0x08, 0xce, 0xf0,
+ 0xc4, 0x1e, 0x97, 0x08, 0xcf, 0x01, 0xc5, 0x40, 0xe7, 0x08, 0xce, 0xf8,
+ 0xc2, 0x00, 0x39, 0x08, 0xce, 0x81, 0x83, 0x08, 0xce, 0x40, 0xc2, 0x00,
+ 0xdb, 0x08, 0xce, 0x79, 0x83, 0x08, 0xce, 0x48, 0x83, 0x08, 0xce, 0x69,
+ 0xc2, 0x0d, 0xf6, 0x08, 0xce, 0x61, 0xc2, 0x00, 0xd0, 0x08, 0xce, 0x58,
+ 0x83, 0x08, 0xce, 0x51, 0xc8, 0xb2, 0x2e, 0x08, 0xcd, 0x32, 0x02, 0x89,
+ 0x8f, 0xc2, 0x00, 0xd0, 0x08, 0xce, 0x29, 0x83, 0x08, 0xce, 0x20, 0xc2,
+ 0x00, 0xd0, 0x08, 0xce, 0x19, 0x83, 0x08, 0xce, 0x10, 0x83, 0x08, 0xce,
+ 0x09, 0xc2, 0x00, 0xc1, 0x08, 0xcd, 0xe1, 0xc2, 0x19, 0x2c, 0x08, 0xcd,
+ 0xb9, 0xc2, 0x01, 0x30, 0x08, 0xcd, 0x90, 0xc2, 0x00, 0xd0, 0x08, 0xce,
+ 0x01, 0x83, 0x08, 0xcd, 0xf9, 0x06, 0x42, 0x89, 0x93, 0xc2, 0x00, 0xd0,
+ 0x08, 0xcd, 0xf1, 0x83, 0x08, 0xcd, 0xe9, 0x16, 0x42, 0x89, 0x9d, 0xc2,
+ 0x00, 0xd0, 0x08, 0xcd, 0xb1, 0x83, 0x08, 0xcd, 0xa8, 0xc2, 0x00, 0xd0,
+ 0x08, 0xcd, 0xa1, 0x83, 0x08, 0xcd, 0x98, 0xc2, 0x00, 0xd0, 0x08, 0xcd,
+ 0x89, 0x83, 0x08, 0xcd, 0x80, 0xc2, 0x00, 0xd0, 0x08, 0xcd, 0x79, 0x83,
+ 0x08, 0xcd, 0x70, 0x97, 0x08, 0xcd, 0x69, 0x8b, 0x08, 0xcd, 0x59, 0x83,
+ 0x08, 0xcd, 0x08, 0x97, 0x08, 0xcd, 0x28, 0x8b, 0x08, 0xcd, 0x18, 0xc8,
+ 0x0d, 0x03, 0x08, 0x45, 0x78, 0x19, 0xc2, 0x89, 0xa7, 0xc2, 0x00, 0xc4,
+ 0x08, 0x45, 0x69, 0xc4, 0x02, 0xde, 0x08, 0x45, 0x48, 0xc3, 0x0d, 0x14,
+ 0x08, 0x45, 0x61, 0xc3, 0x09, 0x9e, 0x08, 0x45, 0x50, 0xc2, 0x39, 0x8b,
+ 0x08, 0x44, 0xf1, 0xc3, 0x1e, 0x1b, 0x08, 0x44, 0x58, 0xc3, 0x11, 0xef,
+ 0x08, 0x44, 0xe9, 0x03, 0x42, 0x89, 0xb1, 0xc4, 0x3e, 0x5a, 0x08, 0x44,
+ 0xe1, 0xc3, 0x20, 0x18, 0x08, 0x44, 0xa1, 0xc3, 0x00, 0x4e, 0x08, 0x44,
+ 0x91, 0xc6, 0xcf, 0xd7, 0x08, 0x44, 0x81, 0xc4, 0xe0, 0xe7, 0x08, 0x44,
+ 0x71, 0xc4, 0x4a, 0xb9, 0x08, 0x44, 0x61, 0xc2, 0x01, 0x7f, 0x08, 0x44,
+ 0x31, 0xc4, 0xe3, 0x27, 0x08, 0x44, 0x11, 0xc5, 0xa5, 0xfd, 0x08, 0x44,
+ 0x00, 0xc3, 0x16, 0x5a, 0x08, 0x44, 0xb9, 0xc4, 0x36, 0xb5, 0x08, 0x44,
+ 0x08, 0xc2, 0x00, 0x8e, 0x08, 0x44, 0x50, 0x49, 0x01, 0xaa, 0xc2, 0x89,
+ 0xbd, 0xcc, 0x82, 0x35, 0x01, 0x0e, 0xb9, 0x03, 0xc2, 0x89, 0xcf, 0xcb,
+ 0x01, 0xfc, 0x01, 0x58, 0x01, 0xcb, 0x94, 0x22, 0x01, 0x58, 0x41, 0xd5,
+ 0x01, 0x92, 0x01, 0x5b, 0x3b, 0x02, 0x89, 0xde, 0xd0, 0x5b, 0xc2, 0x0f,
+ 0xc2, 0xa8, 0x03, 0xc2, 0x89, 0xe4, 0xcc, 0x82, 0x35, 0x01, 0x0e, 0xb1,
+ 0x49, 0x01, 0xaa, 0xc2, 0x89, 0xf3, 0xcb, 0x01, 0xfc, 0x01, 0x58, 0x09,
+ 0xcb, 0x94, 0x22, 0x01, 0x58, 0x49, 0xd5, 0x01, 0x92, 0x01, 0x5b, 0x33,
+ 0x02, 0x8a, 0x05, 0xd0, 0x5b, 0xc2, 0x0f, 0xc2, 0xa0, 0x49, 0x53, 0xa9,
+ 0xc2, 0x8a, 0x0b, 0x43, 0x00, 0xe3, 0xc2, 0x8a, 0x17, 0xd0, 0x5f, 0x92,
+ 0x05, 0x41, 0xb9, 0xca, 0xa6, 0xc0, 0x05, 0x41, 0xc0, 0xe0, 0x0c, 0x07,
+ 0x01, 0x3d, 0x78, 0xd7, 0x27, 0xb9, 0x01, 0x17, 0x19, 0xd4, 0x3c, 0x50,
+ 0x01, 0x17, 0x10, 0xc9, 0x2d, 0xd0, 0x01, 0x14, 0x29, 0xc7, 0x3a, 0x20,
+ 0x01, 0x14, 0x20, 0xc2, 0x00, 0xdb, 0x0f, 0x08, 0xf1, 0x83, 0x0f, 0x08,
+ 0xe0, 0xc2, 0x8d, 0x8f, 0x0f, 0x08, 0x99, 0xc2, 0x0d, 0xf6, 0x0f, 0x08,
+ 0x69, 0x83, 0x0f, 0x08, 0x10, 0x84, 0x0d, 0x97, 0xd9, 0x83, 0x0d, 0x97,
+ 0xd1, 0xa6, 0x0d, 0x97, 0xc9, 0xa5, 0x0d, 0x97, 0xc1, 0xa4, 0x0d, 0x97,
+ 0xb9, 0xa3, 0x0d, 0x97, 0xb1, 0xa2, 0x0d, 0x97, 0xa9, 0xa1, 0x0d, 0x97,
+ 0xa1, 0xa0, 0x0d, 0x97, 0x99, 0x9f, 0x0d, 0x97, 0x91, 0x9e, 0x0d, 0x97,
+ 0x89, 0x9d, 0x0d, 0x97, 0x80, 0x88, 0x0d, 0x97, 0x79, 0x87, 0x0d, 0x97,
+ 0x71, 0x86, 0x0d, 0x97, 0x69, 0x83, 0x0d, 0x97, 0x51, 0xa6, 0x0d, 0x97,
+ 0x49, 0xa2, 0x0d, 0x97, 0x29, 0x85, 0x0d, 0x97, 0x61, 0x84, 0x0d, 0x97,
+ 0x59, 0xa5, 0x0d, 0x97, 0x41, 0xa4, 0x0d, 0x97, 0x39, 0xa3, 0x0d, 0x97,
+ 0x31, 0xa1, 0x0d, 0x97, 0x21, 0xa0, 0x0d, 0x97, 0x19, 0x9f, 0x0d, 0x97,
+ 0x11, 0x9e, 0x0d, 0x97, 0x09, 0x9d, 0x0d, 0x97, 0x00, 0x83, 0x0d, 0x95,
+ 0xd1, 0x88, 0x0d, 0x95, 0xf9, 0x87, 0x0d, 0x95, 0xf1, 0xa6, 0x0d, 0x95,
+ 0xc9, 0xa5, 0x0d, 0x95, 0xc1, 0xa4, 0x0d, 0x95, 0xb9, 0xa3, 0x0d, 0x95,
+ 0xb1, 0xa2, 0x0d, 0x95, 0xa9, 0xa1, 0x0d, 0x95, 0xa1, 0xa0, 0x0d, 0x95,
+ 0x99, 0x9f, 0x0d, 0x95, 0x91, 0x9e, 0x0d, 0x95, 0x89, 0x9d, 0x0d, 0x95,
+ 0x81, 0x84, 0x0d, 0x95, 0xd9, 0x85, 0x0d, 0x95, 0xe1, 0x86, 0x0d, 0x95,
+ 0xe8, 0x83, 0x0d, 0x94, 0xd1, 0xa6, 0x0d, 0x94, 0xc9, 0xa5, 0x0d, 0x94,
+ 0xc1, 0xa4, 0x0d, 0x94, 0xb9, 0xa3, 0x0d, 0x94, 0xb1, 0xa2, 0x0d, 0x94,
+ 0xa9, 0xa1, 0x0d, 0x94, 0xa1, 0xa0, 0x0d, 0x94, 0x99, 0x9f, 0x0d, 0x94,
+ 0x91, 0x9e, 0x0d, 0x94, 0x89, 0x9d, 0x0d, 0x94, 0x81, 0x88, 0x0d, 0x94,
+ 0xf9, 0x87, 0x0d, 0x94, 0xf1, 0x86, 0x0d, 0x94, 0xe9, 0x85, 0x0d, 0x94,
+ 0xe1, 0x84, 0x0d, 0x94, 0xd8, 0x88, 0x0d, 0x94, 0x79, 0x87, 0x0d, 0x94,
+ 0x71, 0x86, 0x0d, 0x94, 0x69, 0x85, 0x0d, 0x94, 0x61, 0x84, 0x0d, 0x94,
+ 0x59, 0x83, 0x0d, 0x94, 0x51, 0xa6, 0x0d, 0x94, 0x49, 0xa5, 0x0d, 0x94,
+ 0x41, 0xa4, 0x0d, 0x94, 0x39, 0xa3, 0x0d, 0x94, 0x31, 0xa2, 0x0d, 0x94,
+ 0x29, 0xa1, 0x0d, 0x94, 0x21, 0xa0, 0x0d, 0x94, 0x19, 0x9f, 0x0d, 0x94,
+ 0x11, 0x9e, 0x0d, 0x94, 0x09, 0x9d, 0x0d, 0x94, 0x00, 0x88, 0x0d, 0x93,
+ 0xf9, 0x87, 0x0d, 0x93, 0xf1, 0x86, 0x0d, 0x93, 0xe9, 0x85, 0x0d, 0x93,
+ 0xe1, 0x84, 0x0d, 0x93, 0xd9, 0x83, 0x0d, 0x93, 0xd1, 0xa6, 0x0d, 0x93,
+ 0xc9, 0xa5, 0x0d, 0x93, 0xc1, 0xa4, 0x0d, 0x93, 0xb9, 0xa3, 0x0d, 0x93,
+ 0xb1, 0xa2, 0x0d, 0x93, 0xa9, 0xa1, 0x0d, 0x93, 0xa1, 0xa0, 0x0d, 0x93,
+ 0x99, 0x9f, 0x0d, 0x93, 0x91, 0x9e, 0x0d, 0x93, 0x89, 0x9d, 0x0d, 0x93,
+ 0x80, 0x88, 0x0d, 0x93, 0x79, 0x87, 0x0d, 0x93, 0x71, 0x86, 0x0d, 0x93,
+ 0x69, 0x85, 0x0d, 0x93, 0x61, 0x84, 0x0d, 0x93, 0x59, 0x83, 0x0d, 0x93,
+ 0x51, 0xa6, 0x0d, 0x93, 0x49, 0xa5, 0x0d, 0x93, 0x41, 0xa4, 0x0d, 0x93,
+ 0x39, 0xa3, 0x0d, 0x93, 0x31, 0xa2, 0x0d, 0x93, 0x29, 0xa1, 0x0d, 0x93,
+ 0x21, 0xa0, 0x0d, 0x93, 0x19, 0x9f, 0x0d, 0x93, 0x11, 0x9e, 0x0d, 0x93,
+ 0x09, 0x9d, 0x0d, 0x93, 0x00, 0x88, 0x0d, 0x92, 0xf9, 0x87, 0x0d, 0x92,
+ 0xf1, 0x86, 0x0d, 0x92, 0xe9, 0x85, 0x0d, 0x92, 0xe1, 0x84, 0x0d, 0x92,
+ 0xd9, 0x83, 0x0d, 0x92, 0xd1, 0xa6, 0x0d, 0x92, 0xc9, 0xa5, 0x0d, 0x92,
+ 0xc1, 0xa4, 0x0d, 0x92, 0xb9, 0xa3, 0x0d, 0x92, 0xb1, 0xa2, 0x0d, 0x92,
+ 0xa9, 0xa1, 0x0d, 0x92, 0xa1, 0xa0, 0x0d, 0x92, 0x99, 0x9f, 0x0d, 0x92,
+ 0x91, 0x9e, 0x0d, 0x92, 0x89, 0x9d, 0x0d, 0x92, 0x80, 0x88, 0x0d, 0x92,
+ 0x79, 0x87, 0x0d, 0x92, 0x71, 0x86, 0x0d, 0x92, 0x69, 0x85, 0x0d, 0x92,
+ 0x61, 0x84, 0x0d, 0x92, 0x59, 0x83, 0x0d, 0x92, 0x51, 0xa6, 0x0d, 0x92,
+ 0x49, 0xa5, 0x0d, 0x92, 0x41, 0xa4, 0x0d, 0x92, 0x39, 0xa3, 0x0d, 0x92,
+ 0x31, 0xa2, 0x0d, 0x92, 0x29, 0xa1, 0x0d, 0x92, 0x21, 0xa0, 0x0d, 0x92,
+ 0x19, 0x9f, 0x0d, 0x92, 0x11, 0x9e, 0x0d, 0x92, 0x09, 0x9d, 0x0d, 0x92,
+ 0x00, 0x88, 0x0d, 0x91, 0xf9, 0x87, 0x0d, 0x91, 0xf1, 0x86, 0x0d, 0x91,
+ 0xe9, 0x85, 0x0d, 0x91, 0xe1, 0x84, 0x0d, 0x91, 0xd9, 0x83, 0x0d, 0x91,
+ 0xd1, 0xa6, 0x0d, 0x91, 0xc9, 0xa5, 0x0d, 0x91, 0xc1, 0xa4, 0x0d, 0x91,
+ 0xb9, 0xa3, 0x0d, 0x91, 0xb1, 0xa2, 0x0d, 0x91, 0xa9, 0xa1, 0x0d, 0x91,
+ 0xa1, 0xa0, 0x0d, 0x91, 0x99, 0x9f, 0x0d, 0x91, 0x91, 0x9e, 0x0d, 0x91,
+ 0x89, 0x9d, 0x0d, 0x91, 0x80, 0x88, 0x0d, 0x91, 0x79, 0x87, 0x0d, 0x91,
+ 0x71, 0x86, 0x0d, 0x91, 0x69, 0x85, 0x0d, 0x91, 0x61, 0x84, 0x0d, 0x91,
+ 0x59, 0x83, 0x0d, 0x91, 0x51, 0xa6, 0x0d, 0x91, 0x49, 0xa5, 0x0d, 0x91,
+ 0x41, 0xa4, 0x0d, 0x91, 0x39, 0xa3, 0x0d, 0x91, 0x31, 0xa2, 0x0d, 0x91,
+ 0x29, 0xa1, 0x0d, 0x91, 0x21, 0xa0, 0x0d, 0x91, 0x19, 0x9f, 0x0d, 0x91,
+ 0x11, 0x9e, 0x0d, 0x91, 0x09, 0x9d, 0x0d, 0x91, 0x00, 0x88, 0x0d, 0x90,
+ 0xf9, 0x87, 0x0d, 0x90, 0xf1, 0x86, 0x0d, 0x90, 0xe9, 0x85, 0x0d, 0x90,
+ 0xe1, 0x84, 0x0d, 0x90, 0xd9, 0x83, 0x0d, 0x90, 0xd1, 0xa6, 0x0d, 0x90,
+ 0xc9, 0xa5, 0x0d, 0x90, 0xc1, 0xa4, 0x0d, 0x90, 0xb9, 0xa3, 0x0d, 0x90,
+ 0xb1, 0xa2, 0x0d, 0x90, 0xa9, 0xa1, 0x0d, 0x90, 0xa1, 0xa0, 0x0d, 0x90,
+ 0x99, 0x9f, 0x0d, 0x90, 0x91, 0x9e, 0x0d, 0x90, 0x89, 0x9d, 0x0d, 0x90,
+ 0x80, 0x88, 0x0d, 0x90, 0x79, 0x87, 0x0d, 0x90, 0x71, 0x86, 0x0d, 0x90,
+ 0x69, 0x85, 0x0d, 0x90, 0x61, 0x84, 0x0d, 0x90, 0x59, 0x83, 0x0d, 0x90,
+ 0x51, 0xa6, 0x0d, 0x90, 0x49, 0xa5, 0x0d, 0x90, 0x41, 0xa4, 0x0d, 0x90,
+ 0x39, 0xa3, 0x0d, 0x90, 0x31, 0xa2, 0x0d, 0x90, 0x29, 0xa1, 0x0d, 0x90,
+ 0x21, 0xa0, 0x0d, 0x90, 0x19, 0x9f, 0x0d, 0x90, 0x11, 0x9e, 0x0d, 0x90,
+ 0x09, 0x9d, 0x0d, 0x90, 0x00, 0x88, 0x0d, 0x96, 0xf9, 0x87, 0x0d, 0x96,
+ 0xf1, 0x86, 0x0d, 0x96, 0xe9, 0x85, 0x0d, 0x96, 0xe1, 0x84, 0x0d, 0x96,
+ 0xd9, 0x83, 0x0d, 0x96, 0xd1, 0xa6, 0x0d, 0x96, 0xc9, 0xa5, 0x0d, 0x96,
+ 0xc1, 0xa4, 0x0d, 0x96, 0xb9, 0xa3, 0x0d, 0x96, 0xb1, 0xa2, 0x0d, 0x96,
+ 0xa9, 0xa1, 0x0d, 0x96, 0xa1, 0xa0, 0x0d, 0x96, 0x99, 0x9f, 0x0d, 0x96,
+ 0x91, 0x9e, 0x0d, 0x96, 0x89, 0x9d, 0x0d, 0x96, 0x80, 0x88, 0x0d, 0x96,
+ 0x79, 0x87, 0x0d, 0x96, 0x71, 0x86, 0x0d, 0x96, 0x69, 0x85, 0x0d, 0x96,
+ 0x61, 0x84, 0x0d, 0x96, 0x59, 0x83, 0x0d, 0x96, 0x51, 0xa6, 0x0d, 0x96,
+ 0x49, 0xa5, 0x0d, 0x96, 0x41, 0xa4, 0x0d, 0x96, 0x39, 0xa3, 0x0d, 0x96,
+ 0x31, 0xa2, 0x0d, 0x96, 0x29, 0xa1, 0x0d, 0x96, 0x21, 0xa0, 0x0d, 0x96,
+ 0x19, 0x9f, 0x0d, 0x96, 0x11, 0x9e, 0x0d, 0x96, 0x09, 0x9d, 0x0d, 0x96,
+ 0x00, 0x88, 0x0d, 0x95, 0x79, 0x87, 0x0d, 0x95, 0x71, 0x86, 0x0d, 0x95,
+ 0x69, 0x85, 0x0d, 0x95, 0x61, 0x84, 0x0d, 0x95, 0x59, 0x83, 0x0d, 0x95,
+ 0x51, 0xa6, 0x0d, 0x95, 0x49, 0xa5, 0x0d, 0x95, 0x41, 0xa4, 0x0d, 0x95,
+ 0x39, 0xa3, 0x0d, 0x95, 0x31, 0xa2, 0x0d, 0x95, 0x29, 0xa1, 0x0d, 0x95,
+ 0x21, 0xa0, 0x0d, 0x95, 0x19, 0x9f, 0x0d, 0x95, 0x11, 0x9e, 0x0d, 0x95,
+ 0x09, 0x9d, 0x0d, 0x95, 0x00, 0x88, 0x0d, 0x8f, 0xf9, 0x87, 0x0d, 0x8f,
+ 0xf1, 0x86, 0x0d, 0x8f, 0xe9, 0x85, 0x0d, 0x8f, 0xe1, 0x84, 0x0d, 0x8f,
+ 0xd9, 0x83, 0x0d, 0x8f, 0xd1, 0xa6, 0x0d, 0x8f, 0xc9, 0xa5, 0x0d, 0x8f,
+ 0xc1, 0xa4, 0x0d, 0x8f, 0xb9, 0xa3, 0x0d, 0x8f, 0xb1, 0xa2, 0x0d, 0x8f,
+ 0xa9, 0xa1, 0x0d, 0x8f, 0xa1, 0xa0, 0x0d, 0x8f, 0x99, 0x9f, 0x0d, 0x8f,
+ 0x91, 0x9e, 0x0d, 0x8f, 0x89, 0x9d, 0x0d, 0x8f, 0x80, 0x88, 0x0d, 0x8f,
+ 0x79, 0x87, 0x0d, 0x8f, 0x71, 0x86, 0x0d, 0x8f, 0x69, 0x85, 0x0d, 0x8f,
+ 0x61, 0x84, 0x0d, 0x8f, 0x59, 0x83, 0x0d, 0x8f, 0x51, 0xa6, 0x0d, 0x8f,
+ 0x49, 0xa5, 0x0d, 0x8f, 0x41, 0xa4, 0x0d, 0x8f, 0x39, 0xa3, 0x0d, 0x8f,
+ 0x31, 0xa2, 0x0d, 0x8f, 0x29, 0xa1, 0x0d, 0x8f, 0x21, 0xa0, 0x0d, 0x8f,
+ 0x19, 0x9f, 0x0d, 0x8f, 0x11, 0x9e, 0x0d, 0x8f, 0x09, 0x9d, 0x0d, 0x8f,
+ 0x00, 0x88, 0x0d, 0x8e, 0xf9, 0x87, 0x0d, 0x8e, 0xf1, 0x86, 0x0d, 0x8e,
+ 0xe9, 0x85, 0x0d, 0x8e, 0xe1, 0x84, 0x0d, 0x8e, 0xd9, 0x83, 0x0d, 0x8e,
+ 0xd1, 0xa6, 0x0d, 0x8e, 0xc9, 0xa5, 0x0d, 0x8e, 0xc1, 0xa4, 0x0d, 0x8e,
+ 0xb9, 0xa3, 0x0d, 0x8e, 0xb1, 0xa2, 0x0d, 0x8e, 0xa9, 0xa1, 0x0d, 0x8e,
+ 0xa1, 0xa0, 0x0d, 0x8e, 0x99, 0x9f, 0x0d, 0x8e, 0x91, 0x9e, 0x0d, 0x8e,
+ 0x89, 0x9d, 0x0d, 0x8e, 0x80, 0x88, 0x0d, 0x8e, 0x79, 0x87, 0x0d, 0x8e,
+ 0x71, 0x86, 0x0d, 0x8e, 0x69, 0x85, 0x0d, 0x8e, 0x61, 0x84, 0x0d, 0x8e,
+ 0x59, 0x83, 0x0d, 0x8e, 0x51, 0xa6, 0x0d, 0x8e, 0x49, 0xa5, 0x0d, 0x8e,
+ 0x41, 0xa4, 0x0d, 0x8e, 0x39, 0xa3, 0x0d, 0x8e, 0x31, 0xa2, 0x0d, 0x8e,
+ 0x29, 0xa1, 0x0d, 0x8e, 0x21, 0xa0, 0x0d, 0x8e, 0x19, 0x9f, 0x0d, 0x8e,
+ 0x11, 0x9e, 0x0d, 0x8e, 0x09, 0x9d, 0x0d, 0x8e, 0x00, 0x88, 0x0d, 0x8d,
+ 0xf9, 0x87, 0x0d, 0x8d, 0xf1, 0x86, 0x0d, 0x8d, 0xe9, 0x85, 0x0d, 0x8d,
+ 0xe1, 0x84, 0x0d, 0x8d, 0xd9, 0x83, 0x0d, 0x8d, 0xd1, 0xa6, 0x0d, 0x8d,
+ 0xc9, 0xa5, 0x0d, 0x8d, 0xc1, 0xa4, 0x0d, 0x8d, 0xb9, 0xa3, 0x0d, 0x8d,
+ 0xb1, 0xa2, 0x0d, 0x8d, 0xa9, 0xa1, 0x0d, 0x8d, 0xa1, 0xa0, 0x0d, 0x8d,
+ 0x99, 0x9f, 0x0d, 0x8d, 0x91, 0x9e, 0x0d, 0x8d, 0x89, 0x9d, 0x0d, 0x8d,
+ 0x80, 0x88, 0x0d, 0x8d, 0x79, 0x87, 0x0d, 0x8d, 0x71, 0x86, 0x0d, 0x8d,
+ 0x69, 0x85, 0x0d, 0x8d, 0x61, 0x84, 0x0d, 0x8d, 0x59, 0x83, 0x0d, 0x8d,
+ 0x51, 0xa6, 0x0d, 0x8d, 0x49, 0xa5, 0x0d, 0x8d, 0x41, 0xa4, 0x0d, 0x8d,
+ 0x39, 0xa3, 0x0d, 0x8d, 0x31, 0xa2, 0x0d, 0x8d, 0x29, 0xa1, 0x0d, 0x8d,
+ 0x21, 0xa0, 0x0d, 0x8d, 0x19, 0x9f, 0x0d, 0x8d, 0x11, 0x9e, 0x0d, 0x8d,
+ 0x09, 0x9d, 0x0d, 0x8d, 0x00, 0x88, 0x0d, 0x8c, 0xf9, 0x87, 0x0d, 0x8c,
+ 0xf1, 0x86, 0x0d, 0x8c, 0xe9, 0x85, 0x0d, 0x8c, 0xe1, 0x84, 0x0d, 0x8c,
+ 0xd9, 0x83, 0x0d, 0x8c, 0xd1, 0xa6, 0x0d, 0x8c, 0xc9, 0xa5, 0x0d, 0x8c,
+ 0xc1, 0xa4, 0x0d, 0x8c, 0xb9, 0xa3, 0x0d, 0x8c, 0xb1, 0xa2, 0x0d, 0x8c,
+ 0xa9, 0xa1, 0x0d, 0x8c, 0xa1, 0xa0, 0x0d, 0x8c, 0x99, 0x9f, 0x0d, 0x8c,
+ 0x91, 0x9e, 0x0d, 0x8c, 0x89, 0x9d, 0x0d, 0x8c, 0x80, 0x88, 0x0d, 0x8c,
+ 0x79, 0x87, 0x0d, 0x8c, 0x71, 0x86, 0x0d, 0x8c, 0x69, 0x85, 0x0d, 0x8c,
+ 0x61, 0x84, 0x0d, 0x8c, 0x59, 0x83, 0x0d, 0x8c, 0x51, 0xa6, 0x0d, 0x8c,
+ 0x49, 0xa5, 0x0d, 0x8c, 0x41, 0xa4, 0x0d, 0x8c, 0x39, 0xa3, 0x0d, 0x8c,
+ 0x31, 0xa2, 0x0d, 0x8c, 0x29, 0xa1, 0x0d, 0x8c, 0x21, 0xa0, 0x0d, 0x8c,
+ 0x19, 0x9f, 0x0d, 0x8c, 0x11, 0x9e, 0x0d, 0x8c, 0x09, 0x9d, 0x0d, 0x8c,
+ 0x00, 0x88, 0x0d, 0x8b, 0xf9, 0x87, 0x0d, 0x8b, 0xf1, 0x86, 0x0d, 0x8b,
+ 0xe9, 0x85, 0x0d, 0x8b, 0xe1, 0x84, 0x0d, 0x8b, 0xd9, 0x83, 0x0d, 0x8b,
+ 0xd1, 0xa6, 0x0d, 0x8b, 0xc9, 0xa5, 0x0d, 0x8b, 0xc1, 0xa4, 0x0d, 0x8b,
+ 0xb9, 0xa3, 0x0d, 0x8b, 0xb1, 0xa2, 0x0d, 0x8b, 0xa9, 0xa1, 0x0d, 0x8b,
+ 0xa1, 0xa0, 0x0d, 0x8b, 0x99, 0x9f, 0x0d, 0x8b, 0x91, 0x9e, 0x0d, 0x8b,
+ 0x89, 0x9d, 0x0d, 0x8b, 0x80, 0xcd, 0x79, 0x1a, 0x01, 0x24, 0xd9, 0xcd,
+ 0x7d, 0xac, 0x01, 0x24, 0x98, 0xcf, 0x69, 0x36, 0x01, 0x24, 0xb9, 0xc2,
+ 0x00, 0xbc, 0x00, 0x01, 0x18, 0xc2, 0x00, 0x39, 0x00, 0x3f, 0x51, 0xc3,
+ 0x1c, 0x63, 0x00, 0x3f, 0x49, 0xc2, 0x25, 0x3b, 0x00, 0x3f, 0x40, 0xc7,
+ 0xc3, 0xf4, 0x00, 0x3f, 0x38, 0xc7, 0xc3, 0xf4, 0x00, 0x3f, 0x00, 0xd0,
+ 0x5b, 0xa2, 0x01, 0x4d, 0xa1, 0xd1, 0x02, 0x56, 0x01, 0x4d, 0x99, 0xd2,
+ 0x4b, 0xdd, 0x01, 0x4d, 0x91, 0xc7, 0x80, 0x70, 0x01, 0x4d, 0x88, 0x43,
+ 0x00, 0xaf, 0x42, 0x8a, 0x23, 0x03, 0xc2, 0x8a, 0x2d, 0xcd, 0x79, 0xa9,
+ 0x0f, 0x98, 0x68, 0xa5, 0x09, 0x87, 0xe9, 0xa4, 0x09, 0x87, 0xe1, 0xa3,
+ 0x09, 0x87, 0xd9, 0xa1, 0x09, 0x87, 0xcb, 0x02, 0x8a, 0x39, 0xa0, 0x09,
+ 0x87, 0xc1, 0x9f, 0x09, 0x87, 0xb9, 0x9e, 0x09, 0x87, 0xb1, 0x9d, 0x09,
+ 0x87, 0xa8, 0xa6, 0x09, 0x87, 0xa1, 0xa5, 0x09, 0x87, 0x93, 0x02, 0x8a,
+ 0x3d, 0xa4, 0x09, 0x87, 0x89, 0xa3, 0x09, 0x87, 0x81, 0xa2, 0x09, 0x87,
+ 0x79, 0xa1, 0x09, 0x87, 0x71, 0xa0, 0x09, 0x87, 0x69, 0x9f, 0x09, 0x87,
+ 0x61, 0x9e, 0x09, 0x87, 0x59, 0x9d, 0x09, 0x87, 0x4a, 0x02, 0x8a, 0x41,
+ 0xa6, 0x09, 0x87, 0x41, 0xa5, 0x09, 0x87, 0x39, 0xa4, 0x09, 0x87, 0x2b,
+ 0x02, 0x8a, 0x45, 0xa3, 0x09, 0x87, 0x1b, 0x02, 0x8a, 0x49, 0xa2, 0x09,
+ 0x87, 0x11, 0xa1, 0x09, 0x87, 0x09, 0xa0, 0x09, 0x87, 0x01, 0x9f, 0x09,
+ 0x86, 0xf9, 0x9e, 0x09, 0x86, 0xf1, 0x9d, 0x09, 0x86, 0xe8, 0xa6, 0x09,
+ 0x86, 0xdb, 0x02, 0x8a, 0x4d, 0xa5, 0x09, 0x86, 0xcb, 0x02, 0x8a, 0x51,
+ 0xa4, 0x09, 0x86, 0xc1, 0xa3, 0x09, 0x86, 0xb9, 0xa2, 0x09, 0x86, 0xb1,
+ 0xa1, 0x09, 0x86, 0xa9, 0xa0, 0x09, 0x86, 0xa1, 0x9f, 0x09, 0x86, 0x99,
+ 0x9e, 0x09, 0x86, 0x90, 0x83, 0x09, 0x82, 0xa8, 0x9e, 0x09, 0x94, 0xd1,
+ 0x9d, 0x09, 0x94, 0xba, 0x02, 0x8a, 0x55, 0xa6, 0x09, 0x94, 0xb1, 0xa5,
+ 0x09, 0x94, 0xa9, 0xa4, 0x09, 0x94, 0xa1, 0xa3, 0x09, 0x94, 0x99, 0xa2,
+ 0x09, 0x94, 0x91, 0xa1, 0x09, 0x94, 0x89, 0xa0, 0x09, 0x94, 0x81, 0x9f,
+ 0x09, 0x94, 0x79, 0x9e, 0x09, 0x94, 0x71, 0x9d, 0x09, 0x94, 0x68, 0xa6,
+ 0x09, 0x94, 0x61, 0xa5, 0x09, 0x94, 0x59, 0xa4, 0x09, 0x94, 0x51, 0xa3,
+ 0x09, 0x94, 0x2b, 0x02, 0x8a, 0x5d, 0xa2, 0x09, 0x94, 0x21, 0xa1, 0x09,
+ 0x94, 0x19, 0xa0, 0x09, 0x94, 0x0b, 0x02, 0x8a, 0x6d, 0x9f, 0x09, 0x94,
+ 0x01, 0x9e, 0x09, 0x93, 0xf9, 0x9d, 0x09, 0x93, 0xea, 0x02, 0x8a, 0x71,
+ 0xa6, 0x09, 0x93, 0xdb, 0x02, 0x8a, 0x75, 0xa5, 0x09, 0x93, 0xd1, 0xa4,
+ 0x09, 0x93, 0xc9, 0xa3, 0x09, 0x93, 0xc1, 0xa2, 0x09, 0x93, 0xb3, 0x02,
+ 0x8a, 0x79, 0xa1, 0x09, 0x93, 0xa3, 0x02, 0x8a, 0x7d, 0xa0, 0x09, 0x93,
+ 0x99, 0x9f, 0x09, 0x93, 0x91, 0x9e, 0x09, 0x93, 0x89, 0x9d, 0x09, 0x93,
+ 0x7a, 0x02, 0x8a, 0x81, 0xa6, 0x09, 0x93, 0x6b, 0x02, 0x8a, 0x85, 0xa5,
+ 0x09, 0x93, 0x61, 0xa4, 0x09, 0x93, 0x59, 0xa3, 0x09, 0x93, 0x51, 0xa2,
+ 0x09, 0x93, 0x49, 0xa1, 0x09, 0x93, 0x41, 0xa0, 0x09, 0x93, 0x39, 0x9f,
+ 0x09, 0x93, 0x31, 0x9e, 0x09, 0x93, 0x29, 0x9d, 0x09, 0x93, 0x0a, 0x02,
+ 0x8a, 0x89, 0xa6, 0x09, 0x93, 0x01, 0xa5, 0x09, 0x92, 0xf9, 0xa4, 0x09,
+ 0x92, 0xf1, 0xa3, 0x09, 0x92, 0xbb, 0x02, 0x8a, 0x95, 0xa2, 0x09, 0x92,
+ 0xab, 0x02, 0x8a, 0xad, 0xa1, 0x09, 0x92, 0xa1, 0xa0, 0x09, 0x92, 0x99,
+ 0x9f, 0x09, 0x92, 0x91, 0x9e, 0x09, 0x92, 0x82, 0x02, 0x8a, 0xb1, 0xc3,
+ 0x02, 0x39, 0x09, 0xa1, 0xa9, 0xc5, 0xdd, 0xd0, 0x09, 0xa1, 0x98, 0xc3,
+ 0x02, 0x39, 0x09, 0xa1, 0xa1, 0xc5, 0xdd, 0xd0, 0x09, 0xa1, 0x90, 0xa2,
+ 0x09, 0x8c, 0xd1, 0xa1, 0x09, 0x8c, 0xc9, 0xa0, 0x09, 0x8c, 0xc1, 0x9f,
+ 0x09, 0x8c, 0xb9, 0x9e, 0x09, 0x8c, 0xab, 0x02, 0x8a, 0xb5, 0x9d, 0x09,
+ 0x8c, 0x9a, 0x02, 0x8a, 0xb9, 0xa6, 0x09, 0x8c, 0x8b, 0x02, 0x8a, 0xbd,
+ 0xa5, 0x09, 0x8c, 0x81, 0xa4, 0x09, 0x8c, 0x79, 0xa3, 0x09, 0x8c, 0x71,
+ 0xa2, 0x09, 0x8c, 0x63, 0x02, 0x8a, 0xc1, 0xa1, 0x09, 0x8c, 0x59, 0xa0,
+ 0x09, 0x8c, 0x51, 0x9f, 0x09, 0x8c, 0x49, 0x9e, 0x09, 0x8c, 0x40, 0x83,
+ 0x09, 0x8c, 0x28, 0x83, 0x09, 0x9d, 0x70, 0xa6, 0x09, 0x9d, 0x61, 0xa5,
+ 0x09, 0x9d, 0x59, 0xa4, 0x09, 0x9d, 0x4b, 0x02, 0x8a, 0xc5, 0xa3, 0x09,
+ 0x9d, 0x41, 0xa2, 0x09, 0x9d, 0x39, 0xa1, 0x09, 0x9d, 0x31, 0xa0, 0x09,
+ 0x9d, 0x23, 0x02, 0x8a, 0xc9, 0x9f, 0x09, 0x9d, 0x19, 0x9e, 0x09, 0x9d,
+ 0x0b, 0x02, 0x8a, 0xcd, 0x9d, 0x09, 0x9c, 0xfa, 0x02, 0x8a, 0xd1, 0xa6,
+ 0x09, 0x9c, 0xeb, 0x02, 0x8a, 0xd5, 0xa5, 0x09, 0x9c, 0xdb, 0x02, 0x8a,
+ 0xd9, 0xa4, 0x09, 0x9c, 0xd1, 0xa3, 0x09, 0x9c, 0xc9, 0xa2, 0x09, 0x9c,
+ 0xc1, 0xa1, 0x09, 0x9c, 0xb9, 0xa0, 0x09, 0x9c, 0xab, 0x02, 0x8a, 0xdd,
+ 0x9f, 0x09, 0x9c, 0xa1, 0x9e, 0x09, 0x9c, 0x99, 0x9d, 0x09, 0x9c, 0x32,
+ 0x02, 0x8a, 0xe1, 0xa6, 0x09, 0x9c, 0x29, 0xa5, 0x09, 0x9c, 0x21, 0xa4,
+ 0x09, 0x9c, 0x19, 0xa3, 0x09, 0x9c, 0x11, 0xa2, 0x09, 0x9c, 0x09, 0xa1,
+ 0x09, 0x9c, 0x01, 0xa0, 0x09, 0x9b, 0xf9, 0x9f, 0x09, 0x9b, 0xe3, 0x02,
+ 0x8b, 0x11, 0x9e, 0x09, 0x9b, 0xc3, 0x02, 0x8b, 0x19, 0x9d, 0x09, 0x9b,
+ 0xb8, 0xa6, 0x09, 0x9b, 0xb1, 0xa5, 0x09, 0x9b, 0xa9, 0xa4, 0x09, 0x9b,
+ 0x93, 0x02, 0x8b, 0x25, 0xa3, 0x09, 0x9b, 0x89, 0xa2, 0x09, 0x9b, 0x81,
+ 0xa1, 0x09, 0x9b, 0x79, 0xa0, 0x09, 0x9b, 0x71, 0x9f, 0x09, 0x9b, 0x63,
+ 0x02, 0x8b, 0x2d, 0x9e, 0x09, 0x9b, 0x12, 0x02, 0x8b, 0x31, 0x9f, 0x09,
+ 0xa1, 0x71, 0x9e, 0x09, 0xa1, 0x69, 0x9d, 0x09, 0xa1, 0x60, 0xa6, 0x09,
+ 0xa1, 0x59, 0xa5, 0x09, 0xa1, 0x51, 0xa4, 0x09, 0xa1, 0x49, 0xa3, 0x09,
+ 0xa1, 0x41, 0xa2, 0x09, 0xa1, 0x39, 0xa1, 0x09, 0xa1, 0x31, 0xa0, 0x09,
+ 0xa1, 0x29, 0x9f, 0x09, 0xa1, 0x21, 0x9e, 0x09, 0xa1, 0x19, 0x9d, 0x09,
+ 0xa1, 0x10, 0xa6, 0x09, 0xa1, 0x09, 0xa5, 0x09, 0xa1, 0x01, 0xa4, 0x09,
+ 0xa0, 0xf9, 0xa3, 0x09, 0xa0, 0xf1, 0xa2, 0x09, 0xa0, 0xe9, 0xa1, 0x09,
+ 0xa0, 0xe1, 0xa0, 0x09, 0xa0, 0xd9, 0x9f, 0x09, 0xa0, 0xd1, 0x9e, 0x09,
+ 0xa0, 0xc9, 0x9d, 0x09, 0xa0, 0xc0, 0xa6, 0x09, 0xa0, 0xb9, 0xa5, 0x09,
+ 0xa0, 0xb1, 0xa4, 0x09, 0xa0, 0x9b, 0x02, 0x8b, 0x55, 0xa3, 0x09, 0xa0,
+ 0x91, 0xa2, 0x09, 0xa0, 0x89, 0xa1, 0x09, 0xa0, 0x81, 0xa0, 0x09, 0xa0,
+ 0x79, 0x9f, 0x09, 0xa0, 0x71, 0x9e, 0x09, 0xa0, 0x68, 0xa6, 0x09, 0x82,
+ 0x71, 0xa5, 0x09, 0x82, 0x69, 0xa4, 0x09, 0x82, 0x61, 0xa3, 0x09, 0x82,
+ 0x59, 0xa2, 0x09, 0x82, 0x51, 0xa1, 0x09, 0x82, 0x49, 0xa0, 0x09, 0x82,
+ 0x41, 0x9f, 0x09, 0x82, 0x39, 0x9e, 0x09, 0x82, 0x31, 0x9d, 0x09, 0x82,
+ 0x28, 0xa6, 0x09, 0x82, 0x21, 0xa5, 0x09, 0x82, 0x19, 0xa4, 0x09, 0x82,
+ 0x11, 0xa3, 0x09, 0x82, 0x09, 0xa2, 0x09, 0x82, 0x01, 0xa1, 0x09, 0x81,
+ 0xf9, 0xa0, 0x09, 0x81, 0xf1, 0x9f, 0x09, 0x81, 0xe9, 0x9e, 0x09, 0x81,
+ 0xe1, 0x9d, 0x09, 0x81, 0xd8, 0xa6, 0x09, 0x81, 0xd1, 0xa5, 0x09, 0x81,
+ 0xc9, 0xa4, 0x09, 0x81, 0xc1, 0xa3, 0x09, 0x81, 0xb9, 0xa2, 0x09, 0x81,
+ 0xab, 0x02, 0x8b, 0x5d, 0xa1, 0x09, 0x81, 0xa1, 0xa0, 0x09, 0x81, 0x93,
+ 0x02, 0x8b, 0x61, 0x9f, 0x09, 0x81, 0x83, 0x02, 0x8b, 0x65, 0x9e, 0x09,
+ 0x81, 0x79, 0x9d, 0x09, 0x81, 0x6a, 0x02, 0x8b, 0x69, 0xa6, 0x09, 0x81,
+ 0x61, 0xa5, 0x09, 0x81, 0x59, 0xa4, 0x09, 0x81, 0x51, 0xa3, 0x09, 0x81,
+ 0x49, 0xa2, 0x09, 0x81, 0x41, 0xa1, 0x09, 0x81, 0x39, 0xa0, 0x09, 0x81,
+ 0x31, 0x9f, 0x09, 0x81, 0x23, 0x02, 0x8b, 0x6d, 0x9e, 0x09, 0x81, 0x19,
+ 0x9d, 0x09, 0x81, 0x10, 0xa6, 0x09, 0x81, 0x09, 0xa5, 0x09, 0x81, 0x01,
+ 0xa4, 0x09, 0x80, 0xf9, 0xa3, 0x09, 0x80, 0xf1, 0xa2, 0x09, 0x80, 0xe9,
+ 0xa1, 0x09, 0x80, 0xe1, 0xa0, 0x09, 0x80, 0xd9, 0x9f, 0x09, 0x80, 0xd1,
+ 0x9e, 0x09, 0x80, 0xc9, 0x9d, 0x09, 0x80, 0xc0, 0xa6, 0x09, 0x80, 0xb9,
+ 0xa5, 0x09, 0x80, 0xb1, 0xa4, 0x09, 0x80, 0xa3, 0x02, 0x8b, 0x71, 0xa3,
+ 0x09, 0x80, 0x99, 0xa2, 0x09, 0x80, 0x91, 0xa1, 0x09, 0x80, 0x83, 0x02,
+ 0x8b, 0x75, 0xa0, 0x09, 0x80, 0x79, 0x9f, 0x09, 0x80, 0x71, 0x9e, 0x09,
+ 0x80, 0x69, 0x9d, 0x09, 0x80, 0x60, 0xa6, 0x09, 0x80, 0x59, 0xa5, 0x09,
+ 0x80, 0x51, 0xa4, 0x09, 0x80, 0x49, 0xa3, 0x09, 0x80, 0x33, 0x02, 0x8b,
+ 0x79, 0xa2, 0x09, 0x80, 0x23, 0x02, 0x8b, 0x81, 0xa1, 0x09, 0x80, 0x19,
+ 0xa0, 0x09, 0x80, 0x11, 0x9f, 0x09, 0x80, 0x09, 0x9e, 0x09, 0x80, 0x00,
+ 0x8a, 0x09, 0xa0, 0x61, 0x89, 0x09, 0xa0, 0x59, 0x88, 0x09, 0xa0, 0x51,
+ 0x87, 0x09, 0xa0, 0x49, 0x86, 0x09, 0xa0, 0x41, 0x85, 0x09, 0xa0, 0x39,
+ 0x84, 0x09, 0xa0, 0x31, 0x83, 0x09, 0xa0, 0x28, 0x8b, 0x09, 0xa0, 0x19,
+ 0x8a, 0x09, 0xa0, 0x11, 0x89, 0x09, 0xa0, 0x09, 0x88, 0x09, 0xa0, 0x01,
+ 0x87, 0x09, 0x9f, 0xf9, 0x86, 0x09, 0x9f, 0xf1, 0x85, 0x09, 0x9f, 0xe9,
+ 0x84, 0x09, 0x9f, 0xe1, 0x83, 0x09, 0x9f, 0xd8, 0x83, 0x09, 0x9f, 0x80,
+ 0x83, 0x09, 0x9f, 0x70, 0x84, 0x09, 0x9f, 0x61, 0x83, 0x09, 0x9f, 0x58,
+ 0x86, 0x09, 0x9f, 0x49, 0x85, 0x09, 0x9f, 0x41, 0x84, 0x09, 0x9f, 0x39,
+ 0x83, 0x09, 0x9f, 0x30, 0x83, 0x09, 0x9e, 0x68, 0x83, 0x09, 0x9e, 0x30,
+ 0x83, 0x09, 0x9e, 0x20, 0x83, 0x09, 0x9e, 0x00, 0x83, 0x09, 0x9d, 0xd8,
+ 0x83, 0x09, 0x9d, 0xc8, 0x83, 0x09, 0x9d, 0x90, 0x83, 0x09, 0x9a, 0xb8,
+ 0x83, 0x09, 0x9a, 0x98, 0x83, 0x09, 0x9a, 0x60, 0x84, 0x09, 0x99, 0xd1,
+ 0x83, 0x09, 0x99, 0xc8, 0x83, 0x09, 0x99, 0x78, 0x83, 0x09, 0x99, 0x68,
+ 0x83, 0x09, 0x98, 0xe0, 0x83, 0x09, 0x98, 0xb0, 0x83, 0x09, 0x98, 0x98,
+ 0x83, 0x09, 0x98, 0x88, 0x83, 0x09, 0x98, 0x78, 0x83, 0x09, 0x98, 0x50,
+ 0x83, 0x09, 0x97, 0xd8, 0x84, 0x09, 0x97, 0x89, 0x83, 0x09, 0x97, 0x80,
+ 0x83, 0x09, 0x97, 0x30, 0x84, 0x09, 0x97, 0x11, 0x83, 0x09, 0x97, 0x08,
+ 0x83, 0x09, 0x96, 0xc0, 0x83, 0x09, 0x96, 0x98, 0x83, 0x09, 0x96, 0x18,
+ 0x83, 0x09, 0x95, 0xe0, 0x84, 0x09, 0x95, 0xa1, 0x83, 0x09, 0x95, 0x98,
+ 0x83, 0x09, 0x95, 0x88, 0x83, 0x09, 0x94, 0xf8, 0x83, 0x09, 0x94, 0xe0,
+ 0x9f, 0x09, 0x92, 0x73, 0x02, 0x8b, 0x85, 0x9e, 0x09, 0x92, 0x69, 0x9d,
+ 0x09, 0x92, 0x60, 0xa6, 0x09, 0x92, 0x59, 0xa5, 0x09, 0x92, 0x4b, 0x02,
+ 0x8b, 0x89, 0xa4, 0x09, 0x92, 0x41, 0xa3, 0x09, 0x92, 0x39, 0xa2, 0x09,
+ 0x92, 0x31, 0xa1, 0x09, 0x92, 0x29, 0xa0, 0x09, 0x92, 0x21, 0x9f, 0x09,
+ 0x92, 0x19, 0x9e, 0x09, 0x92, 0x0b, 0x02, 0x8b, 0x8d, 0x9d, 0x09, 0x91,
+ 0xfa, 0x02, 0x8b, 0x91, 0xa6, 0x09, 0x91, 0xf1, 0xa5, 0x09, 0x91, 0xe9,
+ 0xa4, 0x09, 0x91, 0xe1, 0xa3, 0x09, 0x91, 0xd9, 0xa2, 0x09, 0x91, 0xd1,
+ 0xa1, 0x09, 0x91, 0xc9, 0xa0, 0x09, 0x91, 0xc1, 0x9f, 0x09, 0x91, 0xb9,
+ 0x9e, 0x09, 0x91, 0xb0, 0xa6, 0x09, 0x91, 0xa1, 0xa5, 0x09, 0x91, 0x99,
+ 0xa4, 0x09, 0x91, 0x8b, 0x02, 0x8b, 0x95, 0xa3, 0x09, 0x91, 0x81, 0xa2,
+ 0x09, 0x91, 0x79, 0xa1, 0x09, 0x91, 0x71, 0xa0, 0x09, 0x91, 0x69, 0x9f,
+ 0x09, 0x91, 0x61, 0x9e, 0x09, 0x91, 0x59, 0x9d, 0x09, 0x91, 0x50, 0xa6,
+ 0x09, 0x91, 0x49, 0xa5, 0x09, 0x91, 0x41, 0xa4, 0x09, 0x91, 0x39, 0xa3,
+ 0x09, 0x91, 0x31, 0xa2, 0x09, 0x91, 0x23, 0x02, 0x8b, 0x99, 0xa1, 0x09,
+ 0x91, 0x19, 0xa0, 0x09, 0x91, 0x11, 0x9f, 0x09, 0x91, 0x09, 0x9e, 0x09,
+ 0x91, 0x00, 0x9f, 0x09, 0x90, 0xf9, 0x9e, 0x09, 0x90, 0xf1, 0x9d, 0x09,
+ 0x90, 0xe8, 0xa6, 0x09, 0x90, 0xe1, 0xa5, 0x09, 0x90, 0xd9, 0xa4, 0x09,
+ 0x90, 0xcb, 0x02, 0x8b, 0x9d, 0xa3, 0x09, 0x90, 0xc1, 0xa2, 0x09, 0x90,
+ 0xb3, 0x02, 0x8b, 0xa1, 0xa1, 0x09, 0x90, 0xa3, 0x02, 0x8b, 0xa5, 0xa0,
+ 0x09, 0x90, 0x93, 0x02, 0x8b, 0xa9, 0x9f, 0x09, 0x90, 0x89, 0x9e, 0x09,
+ 0x90, 0x81, 0x9d, 0x09, 0x90, 0x78, 0xa6, 0x09, 0x90, 0x71, 0xa5, 0x09,
+ 0x90, 0x69, 0xa4, 0x09, 0x90, 0x61, 0xa3, 0x09, 0x90, 0x59, 0xa2, 0x09,
+ 0x90, 0x4b, 0x02, 0x8b, 0xad, 0xa1, 0x09, 0x90, 0x41, 0xa0, 0x09, 0x90,
+ 0x39, 0x9f, 0x09, 0x90, 0x31, 0x9e, 0x09, 0x90, 0x29, 0x9d, 0x09, 0x90,
+ 0x20, 0xa6, 0x09, 0x90, 0x19, 0xa5, 0x09, 0x90, 0x03, 0x02, 0x8b, 0xb1,
+ 0xa4, 0x09, 0x8f, 0xf9, 0xa3, 0x09, 0x8f, 0xf1, 0xa2, 0x09, 0x8f, 0xe9,
+ 0xa1, 0x09, 0x8f, 0xe1, 0xa0, 0x09, 0x8f, 0xd9, 0x9f, 0x09, 0x8f, 0xd1,
+ 0x9e, 0x09, 0x8f, 0xc9, 0x9d, 0x09, 0x8f, 0xc0, 0xa6, 0x09, 0x8f, 0xb9,
+ 0xa5, 0x09, 0x8f, 0xb1, 0xa4, 0x09, 0x8f, 0xa9, 0xa3, 0x09, 0x8f, 0xa1,
+ 0xa2, 0x09, 0x8f, 0x99, 0xa1, 0x09, 0x8f, 0x91, 0xa0, 0x09, 0x8f, 0x89,
+ 0x9f, 0x09, 0x8f, 0x81, 0x9e, 0x09, 0x8f, 0x78, 0x83, 0x09, 0x8f, 0x50,
+ 0x84, 0x09, 0x8f, 0x11, 0x83, 0x09, 0x8f, 0x08, 0x83, 0x09, 0x8e, 0xf0,
+ 0x83, 0x09, 0x8e, 0xd0, 0x83, 0x09, 0x8e, 0xa8, 0x83, 0x09, 0x8e, 0x90,
+ 0x83, 0x09, 0x8e, 0x60, 0x83, 0x09, 0x8e, 0x50, 0x83, 0x09, 0x8e, 0x40,
+ 0x8a, 0x09, 0x8e, 0x21, 0x89, 0x09, 0x8e, 0x19, 0x88, 0x09, 0x8e, 0x11,
+ 0x87, 0x09, 0x8e, 0x09, 0x86, 0x09, 0x8e, 0x01, 0x85, 0x09, 0x8d, 0xf9,
+ 0x84, 0x09, 0x8d, 0xf1, 0x83, 0x09, 0x8d, 0xe8, 0x83, 0x09, 0x8d, 0xd0,
+ 0x83, 0x09, 0x8d, 0x90, 0x84, 0x09, 0x8d, 0x79, 0x83, 0x09, 0x8d, 0x70,
+ 0x83, 0x09, 0x8b, 0xa8, 0x83, 0x09, 0x8b, 0x90, 0x83, 0x09, 0x8b, 0x58,
+ 0x83, 0x09, 0x8b, 0x48, 0x83, 0x09, 0x8a, 0xf0, 0x83, 0x09, 0x8a, 0xb8,
+ 0x83, 0x09, 0x8a, 0x68, 0x84, 0x09, 0x8a, 0x41, 0x83, 0x09, 0x8a, 0x38,
+ 0x83, 0x09, 0x8a, 0x28, 0x85, 0x09, 0x89, 0xe1, 0x84, 0x09, 0x89, 0xd9,
+ 0x83, 0x09, 0x89, 0xd0, 0x83, 0x09, 0x89, 0xa8, 0x83, 0x09, 0x89, 0x98,
+ 0x83, 0x09, 0x89, 0x88, 0x83, 0x09, 0x89, 0x48, 0x83, 0x09, 0x89, 0x38,
+ 0x83, 0x09, 0x89, 0x00, 0x83, 0x09, 0x88, 0xa8, 0x83, 0x09, 0x88, 0x60,
+ 0x83, 0x09, 0x87, 0xf8, 0x8a, 0x09, 0x86, 0x89, 0x89, 0x09, 0x86, 0x81,
+ 0x88, 0x09, 0x86, 0x79, 0x87, 0x09, 0x86, 0x71, 0x86, 0x09, 0x86, 0x69,
+ 0x85, 0x09, 0x86, 0x61, 0x84, 0x09, 0x86, 0x59, 0x83, 0x09, 0x86, 0x50,
+ 0x83, 0x09, 0x85, 0xe0, 0x83, 0x09, 0x85, 0xc8, 0x8b, 0x09, 0x85, 0xb1,
+ 0x8a, 0x09, 0x85, 0xa9, 0x89, 0x09, 0x85, 0xa1, 0x88, 0x09, 0x85, 0x99,
+ 0x87, 0x09, 0x85, 0x91, 0x86, 0x09, 0x85, 0x89, 0x85, 0x09, 0x85, 0x81,
+ 0x84, 0x09, 0x85, 0x79, 0x83, 0x09, 0x85, 0x70, 0x83, 0x09, 0x85, 0x58,
+ 0x83, 0x09, 0x85, 0x40, 0x83, 0x09, 0x84, 0xd8, 0x83, 0x09, 0x84, 0xb8,
+ 0x83, 0x09, 0x84, 0x90, 0x83, 0x09, 0x83, 0xf0, 0x83, 0x09, 0x83, 0x38,
+ 0x85, 0x09, 0x82, 0xf1, 0x84, 0x09, 0x82, 0xe9, 0x83, 0x09, 0x82, 0xe0,
+ 0xc6, 0x02, 0xd1, 0x0f, 0xbc, 0x49, 0xc6, 0x0b, 0x09, 0x0f, 0xbc, 0x98,
+ 0xc6, 0x13, 0x52, 0x0f, 0xbd, 0x71, 0xd2, 0x4d, 0x57, 0x0f, 0xbd, 0xd0,
+ 0x45, 0x56, 0x42, 0x42, 0x8b, 0xb9, 0x83, 0x00, 0x95, 0x03, 0x02, 0x8b,
+ 0xe9, 0x97, 0x00, 0x95, 0x09, 0x8b, 0x00, 0x95, 0x11, 0x87, 0x00, 0x95,
+ 0x2b, 0x02, 0x8b, 0xed, 0x91, 0x00, 0x95, 0x33, 0x02, 0x8b, 0xf1, 0xc2,
+ 0x01, 0x4a, 0x00, 0x95, 0x38, 0x83, 0x00, 0x98, 0x58, 0x87, 0x00, 0x98,
+ 0x60, 0x83, 0x00, 0x98, 0x78, 0x83, 0x00, 0x98, 0x83, 0x02, 0x8b, 0xf5,
+ 0x8b, 0x00, 0x98, 0x91, 0x87, 0x00, 0x98, 0xaa, 0x02, 0x8b, 0xf9, 0x83,
+ 0x00, 0x98, 0xc3, 0x02, 0x8b, 0xfd, 0x97, 0x00, 0x98, 0xc9, 0x8b, 0x00,
+ 0x98, 0xd1, 0x87, 0x00, 0x98, 0xeb, 0x02, 0x8c, 0x01, 0x91, 0x00, 0x98,
+ 0xf1, 0x19, 0x42, 0x8c, 0x05, 0x83, 0x01, 0x6e, 0xc3, 0x02, 0x8c, 0x17,
+ 0x97, 0x01, 0x6e, 0xc9, 0x8b, 0x01, 0x6e, 0xd1, 0x87, 0x01, 0x6e, 0xeb,
+ 0x02, 0x8c, 0x1b, 0x91, 0x01, 0x6e, 0xf0, 0x19, 0xc2, 0x8c, 0x1f, 0x1b,
+ 0xc2, 0x8c, 0x2e, 0x83, 0x00, 0x90, 0x83, 0x02, 0x8c, 0x48, 0x97, 0x00,
+ 0x90, 0x89, 0x8b, 0x00, 0x90, 0x91, 0x87, 0x00, 0x90, 0xab, 0x02, 0x8c,
+ 0x4c, 0x91, 0x00, 0x90, 0xb0, 0x83, 0x00, 0x90, 0x18, 0x87, 0x00, 0x90,
+ 0x20, 0x83, 0x00, 0x90, 0x38, 0x91, 0x05, 0x59, 0x71, 0x87, 0x05, 0x59,
+ 0x6b, 0x02, 0x8c, 0x50, 0x83, 0x05, 0x59, 0x43, 0x02, 0x8c, 0x54, 0x8b,
+ 0x05, 0x59, 0x51, 0x97, 0x05, 0x59, 0x48, 0x83, 0x00, 0x93, 0x18, 0x87,
+ 0x00, 0x93, 0x20, 0x83, 0x01, 0x6c, 0x28, 0x83, 0x00, 0x93, 0x39, 0x8b,
+ 0x00, 0x9c, 0x29, 0x87, 0x00, 0x9c, 0x3a, 0x02, 0x8c, 0x58, 0x0a, 0xc2,
+ 0x8c, 0x5c, 0x83, 0x01, 0x6d, 0x43, 0x02, 0x8c, 0x7a, 0x97, 0x01, 0x6d,
+ 0x49, 0x8b, 0x01, 0x6d, 0x51, 0x87, 0x01, 0x6d, 0x6b, 0x02, 0x8c, 0x7e,
+ 0x91, 0x01, 0x6d, 0x70, 0x83, 0x00, 0x93, 0xd8, 0x87, 0x00, 0x93, 0xe0,
+ 0x83, 0x01, 0x6c, 0x38, 0x83, 0x00, 0x99, 0x43, 0x02, 0x8c, 0x82, 0x97,
+ 0x00, 0x99, 0x49, 0x8b, 0x00, 0x99, 0x51, 0x87, 0x00, 0x99, 0x6b, 0x02,
+ 0x8c, 0x86, 0x91, 0x00, 0x99, 0x73, 0x02, 0x8c, 0x8a, 0xc2, 0x01, 0x4a,
+ 0x00, 0x99, 0x78, 0x91, 0x05, 0x58, 0xb1, 0x87, 0x05, 0x58, 0xab, 0x02,
+ 0x8c, 0x8e, 0xc2, 0x04, 0xc6, 0x05, 0x58, 0x99, 0x8b, 0x05, 0x58, 0x91,
+ 0x97, 0x05, 0x58, 0x88, 0x0a, 0xc2, 0x8c, 0x92, 0x83, 0x00, 0x97, 0xc3,
+ 0x02, 0x8c, 0xab, 0x97, 0x00, 0x97, 0xc9, 0x8b, 0x00, 0x97, 0xd1, 0x87,
+ 0x00, 0x97, 0xeb, 0x02, 0x8c, 0xaf, 0x91, 0x00, 0x97, 0xf3, 0x02, 0x8c,
+ 0xb3, 0xc2, 0x01, 0x4a, 0x00, 0x97, 0xf8, 0x83, 0x00, 0x97, 0x98, 0x87,
+ 0x00, 0x97, 0xa0, 0x83, 0x01, 0x6c, 0x60, 0x91, 0x05, 0x58, 0x31, 0x87,
+ 0x05, 0x58, 0x2b, 0x02, 0x8c, 0xb7, 0xc2, 0x04, 0xc6, 0x05, 0x58, 0x19,
+ 0x8b, 0x05, 0x58, 0x11, 0x97, 0x05, 0x58, 0x08, 0x83, 0x00, 0x93, 0x98,
+ 0x87, 0x00, 0x93, 0xa0, 0x83, 0x01, 0x6c, 0x30, 0x83, 0x00, 0x99, 0x03,
+ 0x02, 0x8c, 0xbb, 0x97, 0x00, 0x99, 0x09, 0x8b, 0x00, 0x99, 0x11, 0x87,
+ 0x00, 0x99, 0x2b, 0x02, 0x8c, 0xbf, 0x91, 0x00, 0x99, 0x33, 0x02, 0x8c,
+ 0xc3, 0xc2, 0x01, 0x4a, 0x00, 0x99, 0x38, 0x83, 0x00, 0x99, 0xc3, 0x02,
+ 0x8c, 0xc7, 0x97, 0x00, 0x99, 0xc9, 0x8b, 0x00, 0x99, 0xd1, 0x87, 0x00,
+ 0x99, 0xeb, 0x02, 0x8c, 0xcb, 0x91, 0x00, 0x99, 0xf1, 0xc2, 0x01, 0x4a,
+ 0x00, 0x99, 0xf8, 0x83, 0x00, 0x9a, 0x03, 0x02, 0x8c, 0xcf, 0x97, 0x00,
+ 0x9a, 0x09, 0x8b, 0x00, 0x9a, 0x11, 0x87, 0x00, 0x9a, 0x2b, 0x02, 0x8c,
+ 0xd3, 0x91, 0x00, 0x9a, 0x32, 0x02, 0x8c, 0xd7, 0x83, 0x00, 0x90, 0x58,
+ 0x87, 0x00, 0x90, 0x60, 0x83, 0x01, 0x6c, 0x00, 0x83, 0x00, 0x90, 0xd8,
+ 0x87, 0x00, 0x90, 0xe0, 0x83, 0x01, 0x6c, 0x08, 0x83, 0x00, 0x90, 0xf9,
+ 0x8b, 0x00, 0x9c, 0x09, 0x87, 0x00, 0x9c, 0x1a, 0x02, 0x8c, 0xdb, 0x83,
+ 0x00, 0x91, 0x03, 0x02, 0x8c, 0xdf, 0x97, 0x00, 0x91, 0x09, 0x8b, 0x00,
+ 0x91, 0x11, 0x87, 0x00, 0x91, 0x2b, 0x02, 0x8c, 0xe3, 0x91, 0x00, 0x91,
+ 0x31, 0xc2, 0x01, 0x4a, 0x00, 0x91, 0x38, 0x83, 0x00, 0x91, 0x98, 0x87,
+ 0x00, 0x91, 0xa1, 0x48, 0xbd, 0x4a, 0x42, 0x8c, 0xe7, 0x83, 0x01, 0x6c,
+ 0x18, 0x83, 0x00, 0x91, 0xc3, 0x02, 0x8c, 0xff, 0x97, 0x00, 0x91, 0xc9,
+ 0x8b, 0x00, 0x91, 0xd1, 0x87, 0x00, 0x91, 0xeb, 0x02, 0x8d, 0x03, 0x91,
+ 0x00, 0x91, 0xf3, 0x02, 0x8d, 0x07, 0xc2, 0x01, 0x4a, 0x00, 0x91, 0xf8,
+ 0x83, 0x01, 0x6d, 0x03, 0x02, 0x8d, 0x0b, 0x97, 0x01, 0x6d, 0x09, 0x8b,
+ 0x01, 0x6d, 0x11, 0x87, 0x01, 0x6d, 0x2b, 0x02, 0x8d, 0x0f, 0x91, 0x01,
+ 0x6d, 0x30, 0x83, 0x00, 0x91, 0x58, 0x87, 0x00, 0x91, 0x60, 0x83, 0x01,
+ 0x6c, 0x10, 0x83, 0x00, 0x92, 0x18, 0x87, 0x00, 0x92, 0x20, 0x83, 0x00,
+ 0x92, 0x38, 0x83, 0x00, 0x92, 0x43, 0x02, 0x8d, 0x13, 0x8b, 0x00, 0x92,
+ 0x51, 0x87, 0x00, 0x92, 0x6a, 0x02, 0x8d, 0x17, 0x83, 0x00, 0x92, 0x83,
+ 0x02, 0x8d, 0x1b, 0x97, 0x00, 0x92, 0x89, 0x8b, 0x00, 0x92, 0x91, 0x87,
+ 0x00, 0x92, 0xab, 0x02, 0x8d, 0x1f, 0x91, 0x00, 0x92, 0xb1, 0x19, 0x42,
+ 0x8d, 0x23, 0x83, 0x01, 0x6e, 0x03, 0x02, 0x8d, 0x35, 0x97, 0x01, 0x6e,
+ 0x09, 0x8b, 0x01, 0x6e, 0x11, 0x87, 0x01, 0x6e, 0x2b, 0x02, 0x8d, 0x39,
+ 0x91, 0x01, 0x6e, 0x30, 0x83, 0x00, 0x93, 0x58, 0x87, 0x00, 0x93, 0x60,
+ 0x83, 0x00, 0x94, 0x18, 0x87, 0x00, 0x94, 0x20, 0x83, 0x00, 0x94, 0x38,
+ 0x83, 0x00, 0x94, 0x43, 0x02, 0x8d, 0x3d, 0x8b, 0x00, 0x94, 0x51, 0x87,
+ 0x00, 0x94, 0x6a, 0x02, 0x8d, 0x41, 0x83, 0x01, 0x6e, 0x83, 0x02, 0x8d,
+ 0x45, 0x97, 0x01, 0x6e, 0x89, 0x8b, 0x01, 0x6e, 0x91, 0x87, 0x01, 0x6e,
+ 0xab, 0x02, 0x8d, 0x49, 0x91, 0x01, 0x6e, 0xb0, 0x83, 0x00, 0x94, 0x98,
+ 0x87, 0x00, 0x94, 0xa0, 0x83, 0x01, 0x6c, 0x40, 0x83, 0x00, 0x94, 0xc3,
+ 0x02, 0x8d, 0x4d, 0x97, 0x00, 0x94, 0xc9, 0x8b, 0x00, 0x94, 0xd1, 0x87,
+ 0x00, 0x94, 0xeb, 0x02, 0x8d, 0x51, 0x91, 0x00, 0x94, 0xf3, 0x02, 0x8d,
+ 0x55, 0xc2, 0x01, 0x4a, 0x00, 0x94, 0xf8, 0x83, 0x00, 0x95, 0x58, 0x87,
+ 0x00, 0x95, 0x60, 0x83, 0x00, 0x95, 0x78, 0x83, 0x00, 0x95, 0x83, 0x02,
+ 0x8d, 0x59, 0x8b, 0x00, 0x95, 0x91, 0x87, 0x00, 0x95, 0xaa, 0x02, 0x8d,
+ 0x5d, 0x83, 0x00, 0x95, 0xc3, 0x02, 0x8d, 0x61, 0x97, 0x00, 0x95, 0xc9,
+ 0x8b, 0x00, 0x95, 0xd1, 0x87, 0x00, 0x95, 0xeb, 0x02, 0x8d, 0x65, 0x91,
+ 0x00, 0x95, 0xf1, 0x19, 0x42, 0x8d, 0x69, 0x83, 0x01, 0x6e, 0x43, 0x02,
+ 0x8d, 0x7b, 0x97, 0x01, 0x6e, 0x49, 0x8b, 0x01, 0x6e, 0x51, 0x87, 0x01,
+ 0x6e, 0x6b, 0x02, 0x8d, 0x7f, 0x91, 0x01, 0x6e, 0x70, 0x83, 0x00, 0x96,
+ 0x58, 0x87, 0x00, 0x96, 0x60, 0x83, 0x00, 0x96, 0x78, 0x83, 0x00, 0x99,
+ 0x83, 0x02, 0x8d, 0x83, 0x97, 0x00, 0x99, 0x89, 0x8b, 0x00, 0x99, 0x91,
+ 0x87, 0x00, 0x99, 0xab, 0x02, 0x8d, 0x8d, 0x91, 0x00, 0x99, 0xb3, 0x02,
+ 0x8d, 0x91, 0xc2, 0x01, 0x4a, 0x00, 0x99, 0xb8, 0x83, 0x00, 0x9a, 0x98,
+ 0x87, 0x00, 0x9a, 0xa0, 0x83, 0x01, 0x6c, 0x90, 0x83, 0x00, 0x9a, 0xb9,
+ 0x8b, 0x00, 0x9c, 0x69, 0x87, 0x00, 0x9c, 0x7a, 0x02, 0x8d, 0x95, 0x83,
+ 0x00, 0x96, 0xd8, 0x87, 0x00, 0x96, 0xe0, 0x83, 0x01, 0x6c, 0x58, 0x83,
+ 0x00, 0x97, 0x03, 0x02, 0x8d, 0x99, 0x97, 0x00, 0x97, 0x09, 0x8b, 0x00,
+ 0x97, 0x11, 0x87, 0x00, 0x97, 0x2b, 0x02, 0x8d, 0x9d, 0x91, 0x00, 0x97,
+ 0x31, 0xc2, 0x01, 0x4a, 0x00, 0x97, 0x38, 0x83, 0x01, 0x6d, 0x83, 0x02,
+ 0x8d, 0xa1, 0x97, 0x01, 0x6d, 0x89, 0x8b, 0x01, 0x6d, 0x91, 0x87, 0x01,
+ 0x6d, 0xab, 0x02, 0x8d, 0xa5, 0x91, 0x01, 0x6d, 0xb0, 0x83, 0x00, 0x97,
+ 0x58, 0x87, 0x00, 0x97, 0x60, 0x83, 0x00, 0x97, 0x78, 0x83, 0x00, 0x98,
+ 0x18, 0x87, 0x00, 0x98, 0x20, 0x83, 0x01, 0x6c, 0x70, 0x83, 0x00, 0x9a,
+ 0x58, 0x87, 0x00, 0x9a, 0x60, 0x83, 0x00, 0x9a, 0x79, 0x8b, 0x00, 0x9c,
+ 0x49, 0x87, 0x00, 0x9c, 0x5a, 0x02, 0x8d, 0xa9, 0xd5, 0x36, 0x47, 0x00,
+ 0x9a, 0xe9, 0xc4, 0x01, 0xc3, 0x00, 0x9a, 0xf8, 0xc7, 0x09, 0x0d, 0x01,
+ 0x3e, 0x91, 0xc9, 0x03, 0xc8, 0x01, 0x56, 0xc8, 0xd6, 0x2d, 0xba, 0x01,
+ 0x17, 0xc9, 0xc8, 0x52, 0x09, 0x01, 0x17, 0xc1, 0xc7, 0x80, 0x70, 0x01,
+ 0x17, 0xb1, 0xc9, 0x16, 0x14, 0x01, 0x17, 0xa9, 0x48, 0x00, 0x5f, 0xc2,
+ 0x8d, 0xad, 0xd6, 0x2c, 0x86, 0x01, 0x17, 0x90, 0xc3, 0x77, 0x79, 0x08,
+ 0x7f, 0x89, 0xc4, 0xdc, 0x2d, 0x08, 0x7f, 0x70, 0xc6, 0x06, 0xe1, 0x00,
+ 0x00, 0xb8, 0xc8, 0xb7, 0x72, 0x01, 0x16, 0xf9, 0xc8, 0xbf, 0x4a, 0x01,
+ 0x16, 0xf1, 0xcc, 0x07, 0xc7, 0x01, 0x16, 0xe9, 0xc9, 0x00, 0xca, 0x01,
+ 0x16, 0xe0, 0x03, 0xc2, 0x8d, 0xb3, 0x45, 0x00, 0x8c, 0x42, 0x8d, 0xc2,
+ 0x97, 0x08, 0xec, 0xa1, 0x8b, 0x08, 0xec, 0x89, 0x83, 0x08, 0xec, 0x50,
+ 0x97, 0x08, 0xec, 0x70, 0x8b, 0x08, 0xec, 0x60, 0xc2, 0x00, 0xd0, 0x08,
+ 0xec, 0x19, 0x83, 0x08, 0xec, 0x10, 0xc2, 0x00, 0xd0, 0x08, 0xeb, 0xf1,
+ 0x83, 0x08, 0xeb, 0xe8, 0x83, 0x00, 0x50, 0xb1, 0xc2, 0x00, 0xd0, 0x00,
+ 0x52, 0xc8, 0x83, 0x00, 0x50, 0xc1, 0xc2, 0x00, 0xd0, 0x00, 0x52, 0xd0,
+ 0x83, 0x00, 0x50, 0xf9, 0xc2, 0x00, 0xd0, 0x00, 0x51, 0x00, 0x83, 0x00,
+ 0x51, 0x09, 0xc2, 0x00, 0xd0, 0x00, 0x51, 0x10, 0x94, 0x00, 0x54, 0x5b,
+ 0x02, 0x8d, 0xd8, 0x8e, 0x00, 0x54, 0x62, 0x02, 0x8d, 0xdc, 0x83, 0x00,
+ 0x54, 0xf9, 0xc2, 0x00, 0xd0, 0x00, 0x55, 0x00, 0x83, 0x00, 0x55, 0x09,
+ 0xc2, 0x00, 0xd0, 0x00, 0x55, 0x10, 0x83, 0x00, 0x55, 0xf1, 0x8b, 0x00,
+ 0x56, 0x41, 0x97, 0x00, 0x56, 0x60, 0x8b, 0x00, 0x56, 0x00, 0x97, 0x00,
+ 0x56, 0x10, 0x94, 0x00, 0x56, 0x1b, 0x02, 0x8d, 0xe0, 0x8e, 0x00, 0x57,
+ 0x12, 0x02, 0x8d, 0xe4, 0x87, 0x00, 0x56, 0x29, 0x91, 0x00, 0x56, 0x48,
+ 0xcd, 0x7c, 0xb5, 0x0e, 0x92, 0x29, 0xcc, 0x8c, 0x0d, 0x08, 0x0c, 0x08,
+ 0x5b, 0x17, 0x97, 0xc2, 0x8d, 0xe8, 0xcc, 0x81, 0x21, 0x08, 0x0c, 0x68,
+ 0x55, 0x37, 0xd6, 0xc2, 0x8e, 0x10, 0xc4, 0x28, 0x48, 0x00, 0xff, 0x78,
+ 0xc4, 0x59, 0x13, 0x00, 0xff, 0xf3, 0x02, 0x8e, 0x3d, 0x49, 0x63, 0xd3,
+ 0xc2, 0x8e, 0x43, 0xcb, 0x9a, 0x26, 0x08, 0x0b, 0xd8, 0xc3, 0x46, 0x46,
+ 0x00, 0xff, 0xe9, 0x43, 0x02, 0x6f, 0xc2, 0x8e, 0x4f, 0xc8, 0xb6, 0xa2,
+ 0x08, 0x0b, 0xe1, 0xca, 0xa4, 0xf4, 0x08, 0x0c, 0x20, 0x0e, 0xc2, 0x8e,
+ 0x5e, 0xca, 0x9c, 0x84, 0x00, 0x1e, 0x79, 0xcc, 0x89, 0xfd, 0x00, 0x1f,
+ 0xa1, 0x49, 0x11, 0x74, 0xc2, 0x8e, 0x6a, 0xda, 0x1a, 0x64, 0x00, 0x1f,
+ 0xf0, 0x45, 0x03, 0x14, 0xc2, 0x8e, 0x76, 0x56, 0x2c, 0xde, 0xc2, 0x8e,
+ 0x88, 0xcc, 0x86, 0x49, 0x08, 0x0c, 0x61, 0xcd, 0x79, 0xdd, 0x08, 0x0d,
+ 0x00, 0xc4, 0x7a, 0x04, 0x00, 0xfd, 0xfb, 0x02, 0x8e, 0xa6, 0xca, 0x94,
+ 0x91, 0x00, 0xfe, 0x01, 0xcd, 0x42, 0x94, 0x00, 0xfd, 0xf1, 0xc8, 0x9c,
+ 0x0e, 0x00, 0x1e, 0xb1, 0xc9, 0xaa, 0x9e, 0x00, 0x1e, 0xa8, 0xc6, 0x57,
+ 0xbc, 0x00, 0xfd, 0xe9, 0x03, 0xc2, 0x8e, 0xac, 0xd0, 0x5b, 0xe2, 0x08,
+ 0x0c, 0x10, 0x46, 0x02, 0x0f, 0xc2, 0x8e, 0xb8, 0xd1, 0x56, 0x95, 0x00,
+ 0x1b, 0xa9, 0x46, 0x10, 0x38, 0xc2, 0x8e, 0xd4, 0xc9, 0xab, 0x76, 0x08,
+ 0x0c, 0x18, 0xcc, 0x4a, 0x69, 0x00, 0x1b, 0xd1, 0xc8, 0xab, 0xe3, 0x08,
+ 0x0b, 0xc8, 0xc4, 0x63, 0xdd, 0x00, 0x1c, 0x21, 0x0a, 0xc2, 0x8e, 0xe0,
+ 0x43, 0x02, 0xa0, 0xc2, 0x8e, 0xec, 0xca, 0xa1, 0xf2, 0x08, 0x0b, 0xd1,
+ 0xd1, 0x54, 0x53, 0x08, 0x0c, 0x48, 0xc9, 0xae, 0x10, 0x00, 0x1c, 0x39,
+ 0x4a, 0xa2, 0x9c, 0xc2, 0x8e, 0xf8, 0x14, 0x42, 0x8f, 0x2a, 0x43, 0x60,
+ 0xe8, 0xc2, 0x8f, 0x36, 0xdd, 0x10, 0x2f, 0x00, 0x1f, 0xb0, 0xce, 0x71,
+ 0x84, 0x08, 0x0b, 0xf9, 0xce, 0x72, 0x64, 0x08, 0x0c, 0x00, 0xcb, 0x20,
+ 0xb6, 0x00, 0x1e, 0x91, 0xd5, 0x31, 0xee, 0x00, 0x1e, 0x99, 0xd9, 0x20,
+ 0xa8, 0x00, 0x1e, 0xa0, 0xca, 0x37, 0x4e, 0x01, 0x17, 0x39, 0xc5, 0x07,
+ 0x62, 0x01, 0x13, 0x48, 0xc9, 0x00, 0xca, 0x01, 0x13, 0xb9, 0x43, 0x00,
+ 0xe2, 0xc2, 0x8f, 0x42, 0xd0, 0x5a, 0x92, 0x01, 0x53, 0xf3, 0x02, 0x8f,
+ 0x4e, 0xcb, 0x1a, 0x1a, 0x01, 0x54, 0x30, 0xc9, 0x07, 0x5e, 0x01, 0x13,
+ 0x39, 0xd1, 0x51, 0x01, 0x01, 0x55, 0x20, 0xd0, 0x03, 0xb7, 0x01, 0x4b,
+ 0xc1, 0x06, 0xc2, 0x8f, 0x54, 0x15, 0xc2, 0x8f, 0x5a, 0x0e, 0x42, 0x8f,
+ 0x66, 0xd8, 0x24, 0x3b, 0x01, 0x54, 0x41, 0xcf, 0x62, 0xb5, 0x01, 0x54,
+ 0x50, 0x8e, 0x08, 0x9b, 0x13, 0x02, 0x8f, 0x6c, 0x94, 0x08, 0x9a, 0x1a,
+ 0x02, 0x8f, 0x70, 0x97, 0x08, 0x9a, 0x61, 0x8b, 0x08, 0x9a, 0x41, 0x83,
+ 0x08, 0x99, 0xf0, 0x97, 0x08, 0x9a, 0x10, 0x8b, 0x08, 0x9a, 0x00, 0x47,
+ 0xb2, 0x2e, 0xc2, 0x8f, 0x74, 0x45, 0x04, 0xaf, 0xc2, 0x8f, 0x82, 0x83,
+ 0x08, 0x99, 0xa8, 0x83, 0x08, 0x99, 0xc1, 0xc2, 0x0d, 0xf6, 0x08, 0x99,
+ 0xb9, 0xc2, 0x00, 0xd0, 0x08, 0x99, 0xb0, 0xc2, 0x00, 0xdb, 0x08, 0x99,
+ 0x99, 0x83, 0x08, 0x99, 0x90, 0xc2, 0x00, 0xd0, 0x08, 0x99, 0x69, 0x83,
+ 0x08, 0x99, 0x60, 0xc2, 0x00, 0xd0, 0x08, 0x99, 0x59, 0x83, 0x08, 0x99,
+ 0x50, 0xc2, 0x00, 0xd0, 0x08, 0x99, 0x39, 0x83, 0x08, 0x99, 0x31, 0x06,
+ 0x42, 0x8f, 0x8e, 0xc2, 0x00, 0xd0, 0x08, 0x99, 0x29, 0x16, 0xc2, 0x8f,
+ 0x98, 0x83, 0x08, 0x99, 0x20, 0xc2, 0x19, 0x2c, 0x08, 0x98, 0xf1, 0xc2,
+ 0x01, 0x30, 0x08, 0x98, 0xc9, 0xc2, 0x00, 0xc1, 0x08, 0x99, 0x19, 0x83,
+ 0x08, 0x99, 0x40, 0xc2, 0x00, 0xd0, 0x08, 0x98, 0xe9, 0x83, 0x08, 0x98,
+ 0xe0, 0xc2, 0x00, 0xd0, 0x08, 0x98, 0xd9, 0x83, 0x08, 0x98, 0xd0, 0xc2,
+ 0x00, 0xd0, 0x08, 0x98, 0xc1, 0x83, 0x08, 0x98, 0xb8, 0xc2, 0x00, 0xd0,
+ 0x08, 0x98, 0xb1, 0x83, 0x08, 0x98, 0xa8, 0x97, 0x08, 0x98, 0xa1, 0x8b,
+ 0x08, 0x98, 0x81, 0x83, 0x08, 0x98, 0x30, 0x97, 0x08, 0x98, 0x50, 0x8b,
+ 0x08, 0x98, 0x40, 0xc4, 0x1e, 0x97, 0x08, 0x9a, 0x69, 0xc5, 0x40, 0xe7,
+ 0x08, 0x98, 0x18, 0xc7, 0x7a, 0x7f, 0x08, 0x99, 0xe9, 0xc7, 0x14, 0x39,
+ 0x08, 0x98, 0x10, 0xca, 0x1e, 0x8a, 0x08, 0x98, 0x09, 0xd7, 0x29, 0x29,
+ 0x08, 0x98, 0x00, 0x15, 0xc2, 0x8f, 0xa2, 0xdb, 0x17, 0x10, 0x0f, 0xc9,
+ 0x50, 0xc9, 0xb1, 0xee, 0x00, 0xe5, 0xf9, 0x95, 0x00, 0xe4, 0xd0, 0x03,
+ 0xc2, 0x8f, 0xae, 0xc2, 0x49, 0x0c, 0x00, 0xe5, 0xa9, 0xc2, 0x02, 0x0a,
+ 0x00, 0xe5, 0x91, 0x87, 0x00, 0xe5, 0x88, 0xc2, 0x00, 0xc4, 0x00, 0xe5,
+ 0xe9, 0xc2, 0x00, 0x74, 0x00, 0xe5, 0xd1, 0x90, 0x00, 0xe4, 0x80, 0xc9,
+ 0xb1, 0x04, 0x00, 0xe5, 0xc9, 0x03, 0x42, 0x8f, 0xb9, 0xc4, 0x8c, 0x72,
+ 0x00, 0xe5, 0xc1, 0x90, 0x00, 0xe4, 0xa0, 0xc3, 0x00, 0xd0, 0x00, 0xe5,
+ 0x79, 0xc2, 0x00, 0x71, 0x00, 0xe5, 0x58, 0x0a, 0xc2, 0x8f, 0xc1, 0xc2,
+ 0x00, 0x71, 0x00, 0xe5, 0x61, 0xc2, 0x00, 0xd1, 0x00, 0xe5, 0x50, 0xc3,
+ 0x11, 0xef, 0x00, 0xe5, 0x41, 0xc2, 0x00, 0xd1, 0x00, 0xe5, 0x08, 0xc3,
+ 0x00, 0xd0, 0x00, 0xe5, 0x31, 0xc2, 0x00, 0xd1, 0x00, 0xe4, 0x90, 0xc3,
+ 0x01, 0x50, 0x00, 0xe5, 0x29, 0xc2, 0x00, 0xd1, 0x00, 0xe4, 0xc8, 0xc3,
+ 0x01, 0x50, 0x00, 0xe5, 0x21, 0xc2, 0x00, 0xb1, 0x00, 0xe4, 0xf0, 0xc3,
+ 0x01, 0x50, 0x00, 0xe4, 0xf9, 0xc2, 0x00, 0xc4, 0x00, 0xe4, 0xb0, 0x90,
+ 0x00, 0x85, 0x01, 0xc2, 0x00, 0xc4, 0x00, 0x86, 0x68, 0xc2, 0x00, 0xd1,
+ 0x00, 0x85, 0x11, 0xc3, 0x00, 0xd0, 0x00, 0x85, 0xb0, 0xc2, 0x00, 0xc4,
+ 0x00, 0x85, 0x31, 0xc3, 0x01, 0x50, 0x00, 0x85, 0x78, 0x90, 0x00, 0x85,
+ 0x39, 0x94, 0x00, 0x85, 0x90, 0xc2, 0x00, 0xd1, 0x00, 0x85, 0x49, 0xc3,
+ 0x01, 0x50, 0x00, 0x85, 0xa8, 0xc2, 0x00, 0xb1, 0x00, 0x85, 0x71, 0xc3,
+ 0x01, 0x50, 0x00, 0x85, 0xa0, 0xc2, 0x00, 0xd1, 0x00, 0x85, 0x89, 0xc3,
+ 0x11, 0xef, 0x00, 0x85, 0xc0, 0x0a, 0xc2, 0x8f, 0xcd, 0xc2, 0x00, 0xd1,
+ 0x00, 0x85, 0xd1, 0xc2, 0x00, 0x71, 0x00, 0x85, 0xe0, 0xc2, 0x00, 0x71,
+ 0x00, 0x85, 0xd9, 0xc3, 0x00, 0xd0, 0x00, 0x85, 0xf8, 0x03, 0xc2, 0x8f,
+ 0xd9, 0x87, 0x00, 0x86, 0x09, 0xc2, 0x02, 0x0a, 0x00, 0x86, 0x11, 0xc2,
+ 0x49, 0x0c, 0x00, 0x86, 0x28, 0x90, 0x00, 0x86, 0x81, 0xc2, 0x00, 0x74,
+ 0x00, 0x87, 0xd1, 0xc2, 0x00, 0xc4, 0x00, 0x87, 0xe8, 0xc2, 0x00, 0xd1,
+ 0x00, 0x86, 0x91, 0xc3, 0x00, 0xd0, 0x00, 0x87, 0x30, 0x90, 0x00, 0x86,
+ 0xa1, 0xc4, 0x8c, 0x72, 0x00, 0x87, 0xc0, 0xc2, 0x00, 0xc4, 0x00, 0x86,
+ 0xb1, 0xc3, 0x01, 0x50, 0x00, 0x86, 0xf8, 0x03, 0xc2, 0x8f, 0xe1, 0xc9,
+ 0xb1, 0x04, 0x00, 0x87, 0xc8, 0xc2, 0x00, 0xd1, 0x00, 0x86, 0xc9, 0xc3,
+ 0x01, 0x50, 0x00, 0x87, 0x28, 0x95, 0x00, 0x86, 0xd1, 0xc9, 0xb1, 0xee,
+ 0x00, 0x87, 0xf8, 0xc2, 0x00, 0xb1, 0x00, 0x86, 0xf1, 0xc3, 0x01, 0x50,
+ 0x00, 0x87, 0x20, 0xc2, 0x00, 0xd1, 0x00, 0x87, 0x09, 0xc3, 0x11, 0xef,
+ 0x00, 0x87, 0x40, 0x0a, 0xc2, 0x8f, 0xe9, 0xc2, 0x00, 0xd1, 0x00, 0x87,
+ 0x51, 0xc2, 0x00, 0x71, 0x00, 0x87, 0x60, 0xc2, 0x00, 0x71, 0x00, 0x87,
+ 0x59, 0xc3, 0x00, 0xd0, 0x00, 0x87, 0x78, 0x03, 0xc2, 0x8f, 0xf5, 0x87,
+ 0x00, 0x87, 0x89, 0xc2, 0x02, 0x0a, 0x00, 0x87, 0x91, 0xc2, 0x49, 0x0c,
+ 0x00, 0x87, 0xa8, 0x90, 0x01, 0x68, 0x01, 0xc2, 0x00, 0xc4, 0x01, 0x69,
+ 0x68, 0xc2, 0x00, 0xd1, 0x01, 0x68, 0x11, 0xc3, 0x00, 0xd0, 0x01, 0x68,
+ 0xb0, 0xc2, 0x00, 0xc4, 0x01, 0x68, 0x31, 0xc3, 0x01, 0x50, 0x01, 0x68,
+ 0x78, 0x90, 0x01, 0x68, 0x39, 0x94, 0x01, 0x68, 0x90, 0xc2, 0x00, 0xd1,
+ 0x01, 0x68, 0x49, 0xc3, 0x01, 0x50, 0x01, 0x68, 0xa8, 0xc2, 0x00, 0xb1,
+ 0x01, 0x68, 0x71, 0xc3, 0x01, 0x50, 0x01, 0x68, 0xa0, 0xc2, 0x00, 0xd1,
+ 0x01, 0x68, 0x89, 0xc3, 0x11, 0xef, 0x01, 0x68, 0xc0, 0x0a, 0xc2, 0x90,
+ 0x00, 0xc2, 0x00, 0xd1, 0x01, 0x68, 0xd1, 0xc2, 0x00, 0x71, 0x01, 0x68,
+ 0xe0, 0xc2, 0x00, 0x71, 0x01, 0x68, 0xd9, 0xc3, 0x00, 0xd0, 0x01, 0x68,
+ 0xf8, 0x03, 0xc2, 0x90, 0x0c, 0x87, 0x01, 0x69, 0x09, 0xc2, 0x02, 0x0a,
+ 0x01, 0x69, 0x11, 0xc2, 0x49, 0x0c, 0x01, 0x69, 0x28, 0xc3, 0xc8, 0x92,
+ 0x01, 0x60, 0x09, 0xc6, 0xc8, 0x01, 0x01, 0x61, 0x40, 0xc4, 0xe4, 0x2b,
+ 0x01, 0x60, 0x21, 0xc4, 0xdf, 0x03, 0x01, 0x60, 0x39, 0xc5, 0xdd, 0xb2,
+ 0x01, 0x60, 0x60, 0x07, 0xc2, 0x90, 0x14, 0xc3, 0x01, 0xbd, 0x01, 0x61,
+ 0x09, 0x97, 0x01, 0x61, 0x19, 0x91, 0x01, 0x61, 0x30, 0xc6, 0xd3, 0x73,
+ 0x01, 0x60, 0x31, 0xc5, 0xda, 0x24, 0x01, 0x60, 0x40, 0x42, 0x25, 0xa1,
+ 0xc2, 0x90, 0x1e, 0xcb, 0x98, 0x00, 0x01, 0x60, 0x51, 0x47, 0x1c, 0xa0,
+ 0x42, 0x90, 0x28, 0xc6, 0xc4, 0x49, 0x01, 0x60, 0x71, 0xcf, 0x60, 0xd5,
+ 0x01, 0x61, 0x70, 0xc2, 0x06, 0xc6, 0x01, 0x60, 0x89, 0xc2, 0x00, 0x16,
+ 0x01, 0x60, 0xc8, 0xc5, 0xcb, 0xee, 0x01, 0x60, 0x91, 0x87, 0x01, 0x60,
+ 0xd0, 0xc4, 0xe4, 0xc7, 0x01, 0x60, 0xa1, 0x0a, 0xc2, 0x90, 0x34, 0xc9,
+ 0xae, 0x19, 0x01, 0x61, 0x11, 0xc8, 0xae, 0x6b, 0x01, 0x61, 0x22, 0x02,
+ 0x90, 0x41, 0xc5, 0xd9, 0x52, 0x01, 0x60, 0xa9, 0xc2, 0x00, 0xba, 0x01,
+ 0x60, 0xe1, 0xcb, 0x97, 0x92, 0x01, 0x61, 0x68, 0xc4, 0xac, 0x24, 0x01,
+ 0x60, 0xb9, 0xc3, 0x02, 0x44, 0x01, 0x61, 0x50, 0xc5, 0x7b, 0xac, 0x01,
+ 0x60, 0xe9, 0xcd, 0x7b, 0xa4, 0x01, 0x61, 0x78, 0xc3, 0xc8, 0x92, 0x01,
+ 0x61, 0x89, 0xc6, 0xc8, 0x01, 0x01, 0x62, 0xc0, 0xc4, 0xe4, 0x2b, 0x01,
+ 0x61, 0xa1, 0xc4, 0xdf, 0x03, 0x01, 0x61, 0xb9, 0xc5, 0xdd, 0xb2, 0x01,
+ 0x61, 0xe0, 0x07, 0xc2, 0x90, 0x47, 0xc3, 0x01, 0xbd, 0x01, 0x62, 0x89,
+ 0x97, 0x01, 0x62, 0x99, 0x91, 0x01, 0x62, 0xb0, 0xc6, 0xd3, 0x73, 0x01,
+ 0x61, 0xb1, 0xc5, 0xda, 0x24, 0x01, 0x61, 0xc0, 0x42, 0x25, 0xa1, 0xc2,
+ 0x90, 0x51, 0xcb, 0x98, 0x00, 0x01, 0x61, 0xd1, 0x47, 0x1c, 0xa0, 0x42,
+ 0x90, 0x5b, 0xc6, 0xc4, 0x49, 0x01, 0x61, 0xf1, 0xcf, 0x60, 0xd5, 0x01,
+ 0x62, 0xf0, 0xc2, 0x06, 0xc6, 0x01, 0x62, 0x09, 0xc2, 0x00, 0x16, 0x01,
+ 0x62, 0x48, 0xc5, 0xcb, 0xee, 0x01, 0x62, 0x11, 0x87, 0x01, 0x62, 0x50,
+ 0xc4, 0xe4, 0xc7, 0x01, 0x62, 0x21, 0x0a, 0xc2, 0x90, 0x67, 0xc9, 0xae,
+ 0x19, 0x01, 0x62, 0x91, 0xc8, 0xae, 0x6b, 0x01, 0x62, 0xa2, 0x02, 0x90,
+ 0x74, 0xc5, 0xd9, 0x52, 0x01, 0x62, 0x29, 0xc2, 0x00, 0xba, 0x01, 0x62,
+ 0x61, 0xcb, 0x97, 0x92, 0x01, 0x62, 0xe8, 0xc4, 0xac, 0x24, 0x01, 0x62,
+ 0x39, 0xc3, 0x02, 0x44, 0x01, 0x62, 0xd0, 0xc5, 0x7b, 0xac, 0x01, 0x62,
+ 0x69, 0xcd, 0x7b, 0xa4, 0x01, 0x62, 0xf8, 0xc7, 0x14, 0x39, 0x00, 0x58,
+ 0x11, 0xc7, 0x7a, 0x7f, 0x00, 0x59, 0xe8, 0xc5, 0x40, 0xe7, 0x00, 0x58,
+ 0x19, 0xc4, 0x1e, 0x97, 0x00, 0x5a, 0x68, 0x83, 0x00, 0x58, 0x31, 0x8b,
+ 0x00, 0x58, 0x81, 0x97, 0x00, 0x58, 0xa0, 0x8b, 0x00, 0x58, 0x40, 0x97,
+ 0x00, 0x58, 0x50, 0x47, 0xb2, 0x2e, 0xc2, 0x90, 0x7a, 0x83, 0x00, 0x59,
+ 0xa8, 0x83, 0x00, 0x58, 0xa9, 0xc2, 0x00, 0xd0, 0x00, 0x58, 0xb0, 0x83,
+ 0x00, 0x58, 0xb9, 0xc2, 0x00, 0xd0, 0x00, 0x58, 0xc0, 0xc2, 0x01, 0x30,
+ 0x00, 0x58, 0xc9, 0xc2, 0x19, 0x2c, 0x00, 0x58, 0xf1, 0xc2, 0x00, 0xc1,
+ 0x00, 0x59, 0x19, 0x83, 0x00, 0x59, 0x40, 0x83, 0x00, 0x58, 0xd1, 0xc2,
+ 0x00, 0xd0, 0x00, 0x58, 0xd8, 0x83, 0x00, 0x58, 0xe1, 0xc2, 0x00, 0xd0,
+ 0x00, 0x58, 0xe8, 0x16, 0xc2, 0x90, 0x88, 0x83, 0x00, 0x59, 0x21, 0xc2,
+ 0x00, 0xd0, 0x00, 0x59, 0x28, 0x06, 0xc2, 0x90, 0x92, 0x83, 0x00, 0x59,
+ 0x31, 0xc2, 0x00, 0xd0, 0x00, 0x59, 0x38, 0x83, 0x00, 0x59, 0x51, 0xc2,
+ 0x00, 0xd0, 0x00, 0x59, 0x58, 0x83, 0x00, 0x59, 0x61, 0xc2, 0x00, 0xd0,
+ 0x00, 0x59, 0x68, 0x83, 0x00, 0x59, 0x79, 0xc2, 0x19, 0x2c, 0x00, 0x5a,
+ 0xf8, 0x83, 0x00, 0x59, 0x81, 0xc2, 0x00, 0x39, 0x00, 0x5a, 0xe1, 0xc2,
+ 0x00, 0xd0, 0x00, 0x5a, 0xe8, 0x83, 0x00, 0x59, 0x91, 0xc2, 0x00, 0xdb,
+ 0x00, 0x59, 0x98, 0xc2, 0x00, 0xd0, 0x00, 0x59, 0xb1, 0xc2, 0x0d, 0xf6,
+ 0x00, 0x59, 0xb9, 0x83, 0x00, 0x59, 0xc0, 0x83, 0x00, 0x59, 0xf1, 0x8b,
+ 0x00, 0x5a, 0x41, 0x97, 0x00, 0x5a, 0x60, 0x8b, 0x00, 0x5a, 0x00, 0x97,
+ 0x00, 0x5a, 0x10, 0x94, 0x00, 0x5a, 0x1b, 0x02, 0x90, 0x9c, 0x8e, 0x00,
+ 0x5b, 0x12, 0x02, 0x90, 0xa0, 0xc2, 0x02, 0xa0, 0x00, 0x5b, 0x41, 0xc4,
+ 0x02, 0xde, 0x00, 0x5b, 0x48, 0xc3, 0x09, 0x9e, 0x00, 0x5b, 0x51, 0xc3,
+ 0x0d, 0x14, 0x00, 0x5b, 0x58, 0xc2, 0x22, 0xcc, 0x00, 0x5b, 0x61, 0xc4,
+ 0x18, 0x10, 0x00, 0x5b, 0x68, 0xc7, 0x08, 0x79, 0x00, 0x5b, 0x91, 0xc4,
+ 0x01, 0xce, 0x00, 0x5b, 0x99, 0xc9, 0x67, 0x38, 0x00, 0x5b, 0xa9, 0xc6,
+ 0x06, 0xdb, 0x00, 0x5b, 0xb0, 0xc8, 0x08, 0x79, 0x00, 0x5b, 0xa1, 0xca,
+ 0xa7, 0x88, 0x00, 0x5b, 0xb8, 0xc3, 0x02, 0xdf, 0x0f, 0x68, 0x1b, 0x02,
+ 0x90, 0xa4, 0xc4, 0x0d, 0x0e, 0x0f, 0x68, 0x62, 0x02, 0x90, 0xa8, 0x91,
+ 0x0f, 0x68, 0x13, 0x02, 0x90, 0xae, 0xc4, 0x18, 0x12, 0x0f, 0x68, 0x5a,
+ 0x02, 0x90, 0xb2, 0xc9, 0x57, 0x20, 0x0f, 0x69, 0x28, 0xc2, 0x00, 0x33,
+ 0x0f, 0x68, 0x23, 0x02, 0x90, 0xb8, 0xc3, 0x0d, 0x0f, 0x0f, 0x68, 0x6a,
+ 0x02, 0x90, 0xbc, 0xc2, 0x00, 0x5f, 0x0f, 0x68, 0x2b, 0x02, 0x90, 0xc2,
+ 0xc3, 0x45, 0x6b, 0x0f, 0x68, 0x72, 0x02, 0x90, 0xc6, 0xc7, 0x0d, 0x04,
+ 0x0f, 0x68, 0x99, 0xc8, 0x4b, 0x94, 0x0f, 0x68, 0xe0, 0xc2, 0x0d, 0x10,
+ 0x0f, 0x68, 0x7b, 0x02, 0x90, 0xcc, 0x00, 0x42, 0x90, 0xd2, 0xc2, 0x0d,
+ 0x10, 0x0f, 0x68, 0x83, 0x02, 0x90, 0xde, 0x00, 0x42, 0x90, 0xe4, 0xc9,
+ 0x57, 0x20, 0x0f, 0x69, 0x60, 0xc7, 0x0d, 0x04, 0x0f, 0x68, 0xd1, 0xc8,
+ 0x4b, 0x94, 0x0f, 0x69, 0x18, 0xc9, 0x57, 0x20, 0x0f, 0x69, 0x68, 0xc7,
+ 0x0d, 0x04, 0x0f, 0x68, 0xd9, 0xc8, 0x4b, 0x94, 0x0f, 0x69, 0x20, 0xc9,
+ 0x57, 0x20, 0x0f, 0x69, 0xd0, 0xc9, 0x57, 0x20, 0x0f, 0x69, 0xd8, 0xc8,
+ 0x0d, 0x03, 0x0f, 0x69, 0xc0, 0xc8, 0x0d, 0x03, 0x0f, 0x69, 0xc8, 0xc6,
+ 0x2d, 0xd0, 0x01, 0x3e, 0x21, 0xc4, 0x0e, 0xa6, 0x01, 0x3e, 0x18, 0xd8,
+ 0x21, 0x23, 0x01, 0x39, 0xe1, 0xc8, 0x0a, 0xff, 0x01, 0x39, 0x91, 0xca,
+ 0x22, 0x51, 0x01, 0x39, 0x59, 0xc5, 0x0d, 0x20, 0x01, 0x38, 0xd8, 0x9a,
+ 0x01, 0x21, 0x19, 0xc2, 0x01, 0x25, 0x0f, 0xa6, 0xb0, 0xc5, 0x5f, 0x98,
+ 0x0f, 0xae, 0x09, 0xca, 0x9e, 0xf0, 0x0f, 0xa6, 0x10, 0xcc, 0x81, 0x81,
+ 0x0f, 0xa7, 0x69, 0xcb, 0x9a, 0x5d, 0x0f, 0xa7, 0x60, 0xcd, 0x78, 0xa5,
+ 0x01, 0x1c, 0x81, 0xcd, 0x7a, 0x45, 0x01, 0x1c, 0x78, 0xc9, 0x3b, 0x79,
+ 0x08, 0x7c, 0x49, 0x44, 0x02, 0x9f, 0xc2, 0x90, 0xf0, 0xc3, 0x01, 0x5d,
+ 0x08, 0x7c, 0x30, 0x49, 0x04, 0xf9, 0xc2, 0x90, 0xfc, 0x44, 0x05, 0x18,
+ 0x42, 0x91, 0x08, 0x0e, 0xc2, 0x91, 0x14, 0xc3, 0xb5, 0x3e, 0x08, 0x7c,
+ 0x01, 0xc2, 0x00, 0x67, 0x08, 0x7b, 0xe1, 0x15, 0xc2, 0x91, 0x20, 0xc3,
+ 0x20, 0x18, 0x08, 0x7b, 0xd1, 0xc3, 0x00, 0x4e, 0x08, 0x7b, 0xc9, 0xc4,
+ 0xe0, 0xe7, 0x08, 0x7b, 0xb9, 0xc4, 0x4a, 0xb9, 0x08, 0x7b, 0xb1, 0xca,
+ 0x9b, 0x8a, 0x08, 0x7b, 0xa9, 0xc5, 0x4a, 0xb3, 0x08, 0x7b, 0xa1, 0xc3,
+ 0x7e, 0x89, 0x08, 0x7b, 0x99, 0xca, 0x9c, 0xa2, 0x08, 0x7b, 0x91, 0xc4,
+ 0xe3, 0x27, 0x08, 0x7b, 0x89, 0xc5, 0xa5, 0xfd, 0x08, 0x7b, 0x81, 0xc4,
+ 0x5d, 0xe2, 0x08, 0x7b, 0xf0, 0xd1, 0x53, 0xa9, 0x08, 0x79, 0x31, 0x47,
+ 0x34, 0x2f, 0xc2, 0x91, 0x2a, 0x0e, 0x42, 0x91, 0x3b, 0x43, 0x2f, 0x2a,
+ 0xc2, 0x91, 0x47, 0x47, 0x02, 0x0e, 0x42, 0x91, 0x53, 0xc3, 0x09, 0x41,
+ 0x08, 0x67, 0xe1, 0x42, 0x02, 0x09, 0xc2, 0x91, 0xb0, 0xc3, 0x05, 0x14,
+ 0x08, 0x67, 0xd2, 0x02, 0x91, 0xbc, 0x97, 0x08, 0x67, 0x53, 0x02, 0x91,
+ 0xc0, 0x87, 0x08, 0x66, 0x4b, 0x02, 0x91, 0xce, 0x4a, 0xa7, 0x74, 0xc2,
+ 0x92, 0x2e, 0x4b, 0x95, 0x61, 0xc2, 0x92, 0x3a, 0xc8, 0xb9, 0x6a, 0x08,
+ 0x67, 0x19, 0x91, 0x08, 0x66, 0xdb, 0x02, 0x92, 0x46, 0x83, 0x08, 0x66,
+ 0x03, 0x02, 0x92, 0x50, 0x8b, 0x08, 0x66, 0x83, 0x02, 0x92, 0x64, 0xc7,
+ 0xc9, 0x9d, 0x08, 0x66, 0x50, 0x87, 0x08, 0x64, 0x4b, 0x02, 0x92, 0x68,
+ 0xc8, 0xb9, 0x6a, 0x08, 0x65, 0x19, 0x91, 0x08, 0x64, 0xdb, 0x02, 0x92,
+ 0xc8, 0x4a, 0xa7, 0x74, 0xc2, 0x92, 0xd2, 0x4b, 0x95, 0x61, 0xc2, 0x92,
+ 0xde, 0x97, 0x08, 0x65, 0x53, 0x02, 0x92, 0xea, 0x83, 0x08, 0x64, 0x03,
+ 0x02, 0x92, 0xf8, 0x8b, 0x08, 0x64, 0x83, 0x02, 0x93, 0x0c, 0xc7, 0xc9,
+ 0x9d, 0x08, 0x64, 0x50, 0xc4, 0xe1, 0xaf, 0x08, 0x62, 0x41, 0x91, 0x08,
+ 0x60, 0x33, 0x02, 0x93, 0x10, 0x83, 0x08, 0x60, 0x03, 0x02, 0x93, 0x23,
+ 0x07, 0xc2, 0x93, 0x52, 0x8b, 0x08, 0x60, 0x1a, 0x02, 0x93, 0x72, 0x83,
+ 0x08, 0x60, 0x0b, 0x02, 0x93, 0x7a, 0x87, 0x08, 0x60, 0x2b, 0x02, 0x93,
+ 0xad, 0x11, 0xc2, 0x93, 0xbf, 0x8b, 0x08, 0x60, 0x22, 0x02, 0x93, 0xca,
+ 0x16, 0xc2, 0x93, 0xce, 0xc3, 0x05, 0x14, 0x08, 0x54, 0xe8, 0x42, 0x02,
+ 0x1c, 0xc2, 0x93, 0xda, 0x16, 0xc2, 0x93, 0xe4, 0xc3, 0x2b, 0xb9, 0x08,
+ 0x54, 0xd1, 0x09, 0xc2, 0x93, 0xf4, 0x42, 0x0e, 0x9a, 0xc2, 0x94, 0x00,
+ 0x43, 0xe6, 0x2c, 0xc2, 0x94, 0x08, 0xc3, 0x7e, 0x89, 0x08, 0x54, 0x29,
+ 0xc3, 0x0f, 0x9a, 0x08, 0x54, 0x21, 0xc4, 0x19, 0x60, 0x08, 0x54, 0x19,
+ 0x0a, 0xc2, 0x94, 0x14, 0xc3, 0x0d, 0xff, 0x08, 0x54, 0x09, 0xc3, 0x72,
+ 0xf0, 0x08, 0x54, 0x39, 0xc3, 0x85, 0xf5, 0x08, 0x54, 0x41, 0x0d, 0xc2,
+ 0x94, 0x20, 0xc4, 0x3a, 0x01, 0x08, 0x54, 0x61, 0xc3, 0x0d, 0xf6, 0x08,
+ 0x54, 0x71, 0xc3, 0xb1, 0x0d, 0x08, 0x54, 0x81, 0x03, 0x42, 0x94, 0x2c,
+ 0xcd, 0x7a, 0xa0, 0x0f, 0xad, 0x99, 0x44, 0x19, 0xb0, 0x42, 0x94, 0x38,
+ 0xc2, 0x00, 0xd1, 0x08, 0x1a, 0x81, 0xc3, 0x2b, 0x88, 0x08, 0x1a, 0x89,
+ 0xc3, 0x46, 0xf6, 0x08, 0x1a, 0x91, 0x06, 0xc2, 0x94, 0x4a, 0x87, 0x08,
+ 0x1a, 0xa3, 0x02, 0x94, 0x54, 0x1c, 0xc2, 0x94, 0x58, 0x8b, 0x08, 0x1a,
+ 0xcb, 0x02, 0x94, 0x64, 0xc4, 0xe0, 0xb3, 0x08, 0x1a, 0xd1, 0xc3, 0x39,
+ 0xa6, 0x08, 0x1a, 0xd9, 0xc5, 0xdb, 0x19, 0x08, 0x1a, 0xe1, 0xc5, 0xdb,
+ 0x6e, 0x08, 0x1a, 0xe9, 0x18, 0xc2, 0x94, 0x6c, 0xc4, 0xcf, 0x74, 0x08,
+ 0x1a, 0xf9, 0xc3, 0x26, 0x92, 0x08, 0x1b, 0x01, 0x15, 0xc2, 0x94, 0x78,
+ 0x16, 0xc2, 0x94, 0x82, 0x97, 0x08, 0x1b, 0x19, 0xc5, 0xdd, 0x1c, 0x08,
+ 0x1b, 0x21, 0x1b, 0xc2, 0x94, 0x8e, 0x91, 0x08, 0x1b, 0x4b, 0x02, 0x94,
+ 0xa8, 0xc2, 0x00, 0xd0, 0x08, 0x1b, 0x60, 0xc2, 0x00, 0x51, 0x08, 0x18,
+ 0x09, 0x0d, 0xc2, 0x94, 0xac, 0xc2, 0x00, 0x06, 0x08, 0x18, 0x19, 0x87,
+ 0x08, 0x18, 0x23, 0x02, 0x94, 0xbe, 0xc2, 0x00, 0x5f, 0x08, 0x18, 0x29,
+ 0xc2, 0x0a, 0xe2, 0x08, 0x18, 0x31, 0xc2, 0x01, 0x7f, 0x08, 0x18, 0x39,
+ 0x16, 0xc2, 0x94, 0xe2, 0x8b, 0x08, 0x18, 0x4b, 0x02, 0x94, 0xec, 0x83,
+ 0x08, 0x18, 0x01, 0x91, 0x08, 0x18, 0x79, 0x12, 0xc2, 0x94, 0xf0, 0x15,
+ 0xc2, 0x94, 0xfa, 0x97, 0x08, 0x18, 0xb3, 0x02, 0x95, 0x06, 0xc3, 0x28,
+ 0x28, 0x08, 0x18, 0xe1, 0xc2, 0x0c, 0x43, 0x08, 0x19, 0x69, 0xcc, 0x82,
+ 0xc5, 0x08, 0x19, 0x70, 0xc3, 0x05, 0x14, 0x08, 0x19, 0x01, 0x42, 0x02,
+ 0x09, 0xc2, 0x95, 0x0a, 0xc3, 0x09, 0x41, 0x08, 0x19, 0x10, 0x83, 0x00,
+ 0xe2, 0xf8, 0x99, 0x00, 0xe3, 0x19, 0x8f, 0x00, 0xe3, 0x11, 0x8c, 0x00,
+ 0xe3, 0x09, 0x8d, 0x00, 0xe3, 0x00, 0xc7, 0x56, 0x8e, 0x01, 0x5d, 0xd1,
+ 0xd1, 0x56, 0x84, 0x01, 0x5d, 0xd8, 0x90, 0x08, 0x25, 0x90, 0xc3, 0x1c,
+ 0x63, 0x08, 0x25, 0xb1, 0xc2, 0x02, 0x2b, 0x08, 0x25, 0xe9, 0xc2, 0x00,
+ 0xb0, 0x08, 0x26, 0x29, 0x16, 0x42, 0x95, 0x16, 0x83, 0x08, 0x26, 0x51,
+ 0xc2, 0x00, 0xd0, 0x08, 0x26, 0x60, 0x90, 0x08, 0x26, 0xd0, 0xc3, 0x1c,
+ 0x63, 0x08, 0x26, 0xf1, 0xc2, 0x02, 0x2b, 0x08, 0x27, 0x29, 0xc2, 0x00,
+ 0xb0, 0x08, 0x27, 0x69, 0x16, 0x42, 0x95, 0x20, 0x83, 0x08, 0x27, 0x91,
+ 0xc2, 0x00, 0xd0, 0x08, 0x27, 0xa0, 0x0d, 0xc2, 0x95, 0x2a, 0xcb, 0x93,
+ 0x7d, 0x0e, 0x7d, 0x89, 0xc8, 0x4e, 0x4b, 0x0e, 0x7d, 0x80, 0xc6, 0xca,
+ 0xa9, 0x0e, 0x7a, 0x88, 0x0d, 0xc2, 0x95, 0x36, 0x16, 0xc2, 0x95, 0x42,
+ 0x44, 0xe0, 0x6b, 0xc2, 0x95, 0x4e, 0x49, 0x75, 0xe7, 0xc2, 0x95, 0x5b,
+ 0xce, 0x69, 0xa0, 0x0e, 0x7c, 0xb9, 0x12, 0xc2, 0x95, 0x68, 0xce, 0x6d,
+ 0xa2, 0x0e, 0x7c, 0x98, 0x00, 0x42, 0x95, 0x72, 0x00, 0x42, 0x95, 0x87,
+ 0x42, 0x00, 0x97, 0xc2, 0x95, 0x93, 0xc8, 0xb8, 0x72, 0x0e, 0x7b, 0xf8,
+ 0xcb, 0x87, 0x3a, 0x0e, 0x7b, 0xe1, 0xce, 0x69, 0xa0, 0x0e, 0x7b, 0xd9,
+ 0xc8, 0x4e, 0x4b, 0x0e, 0x7b, 0xd1, 0xc8, 0xbf, 0x6a, 0x0e, 0x7b, 0xc8,
+ 0x45, 0x4e, 0x46, 0xc2, 0x95, 0x9f, 0xce, 0x69, 0xa0, 0x0e, 0x7b, 0xb8,
+ 0xc6, 0x6d, 0xaa, 0x0e, 0x7b, 0xa1, 0xca, 0x93, 0x7e, 0x0e, 0x7b, 0x98,
+ 0xcc, 0x84, 0x51, 0x0e, 0x7d, 0x59, 0xc7, 0xc8, 0x69, 0x0e, 0x7d, 0x51,
+ 0xc3, 0xe5, 0x9c, 0x0e, 0x7d, 0x48, 0xc8, 0xb8, 0x12, 0x0e, 0x79, 0x68,
+ 0xc8, 0xbb, 0xe2, 0x0e, 0x79, 0xc8, 0xc9, 0x78, 0xd9, 0x0e, 0x78, 0xc1,
+ 0x43, 0x01, 0x55, 0x42, 0x95, 0xab, 0xc5, 0x00, 0x2c, 0x0e, 0x78, 0x89,
+ 0xc4, 0x00, 0x49, 0x0e, 0x78, 0x28, 0xc7, 0x93, 0xee, 0x0e, 0x79, 0xb3,
+ 0x02, 0x95, 0xb7, 0xc6, 0xcb, 0x33, 0x0e, 0x79, 0x30, 0x15, 0xc2, 0x95,
+ 0xbd, 0x43, 0x01, 0x55, 0x42, 0x95, 0xc9, 0xc3, 0xe5, 0x2d, 0x0e, 0x79,
+ 0x51, 0xc2, 0x01, 0xc8, 0x0e, 0x79, 0x00, 0x43, 0x01, 0x55, 0xc2, 0x95,
+ 0xd5, 0x4d, 0x78, 0xd9, 0x42, 0x95, 0xe1, 0xc6, 0x42, 0x68, 0x0e, 0x78,
+ 0xf1, 0x42, 0x00, 0xe7, 0x42, 0x95, 0xed, 0xc5, 0x00, 0x2c, 0x0e, 0x78,
+ 0x91, 0xc4, 0x00, 0x49, 0x0e, 0x78, 0x30, 0xc6, 0x78, 0xdc, 0x0e, 0x78,
+ 0xe9, 0x4b, 0x8e, 0xfa, 0x42, 0x95, 0xf9, 0xc5, 0x00, 0x2c, 0x0e, 0x78,
+ 0xa1, 0xc4, 0x00, 0x49, 0x0e, 0x78, 0x40, 0xc5, 0x00, 0x2c, 0x0e, 0x78,
+ 0x81, 0xc4, 0x00, 0x49, 0x0e, 0x78, 0x20, 0xc5, 0x00, 0x2c, 0x0e, 0x78,
+ 0x69, 0xc4, 0x00, 0x49, 0x0e, 0x78, 0x08, 0xce, 0x1e, 0x74, 0x08, 0xd1,
+ 0xb0, 0xc3, 0x0d, 0x18, 0x05, 0x4e, 0x53, 0x02, 0x96, 0x05, 0xc4, 0xe3,
+ 0x8f, 0x05, 0x4e, 0x18, 0xc6, 0xcd, 0xfd, 0x05, 0x4e, 0x39, 0xc6, 0x45,
+ 0xa6, 0x05, 0x4e, 0x60, 0x17, 0xc2, 0x96, 0x0b, 0xc5, 0x3a, 0xbc, 0x05,
+ 0x4e, 0x40, 0xc6, 0xcb, 0x27, 0x05, 0x4c, 0x98, 0x42, 0x00, 0x4d, 0x42,
+ 0x96, 0x17, 0xc6, 0xcb, 0x21, 0x05, 0x4d, 0x60, 0xc6, 0xcb, 0x27, 0x05,
+ 0x4d, 0x40, 0x00, 0x42, 0x96, 0x23, 0x83, 0x05, 0x4d, 0x23, 0x02, 0x96,
+ 0x2f, 0xc2, 0x19, 0x2c, 0x05, 0x4c, 0xd3, 0x02, 0x96, 0x35, 0xc2, 0x01,
+ 0x30, 0x05, 0x4c, 0xa2, 0x02, 0x96, 0x3b, 0x83, 0x05, 0x4d, 0x13, 0x02,
+ 0x96, 0x44, 0xc2, 0x0e, 0x9a, 0x05, 0x4c, 0xea, 0x02, 0x96, 0x4a, 0x83,
+ 0x05, 0x4d, 0x03, 0x02, 0x96, 0x50, 0xc2, 0x01, 0x6f, 0x05, 0x4c, 0xda,
+ 0x02, 0x96, 0x56, 0xca, 0x60, 0x26, 0x05, 0x4c, 0xc8, 0xc6, 0xcb, 0x27,
+ 0x05, 0x4c, 0xb0, 0x00, 0x42, 0x96, 0x5c, 0x8b, 0x05, 0x4c, 0x68, 0x8b,
+ 0x05, 0x4c, 0x39, 0xc5, 0xd5, 0x2e, 0x05, 0x4c, 0x28, 0xc4, 0x04, 0x15,
+ 0x05, 0x4d, 0xd1, 0xc4, 0xdf, 0x53, 0x05, 0x4d, 0xa0, 0xcf, 0x6a, 0xe9,
+ 0x01, 0x2c, 0xf2, 0x02, 0x96, 0x68, 0x45, 0x02, 0x9a, 0x42, 0x96, 0x6e,
+ 0x97, 0x05, 0x22, 0xdb, 0x02, 0x96, 0x7a, 0x91, 0x05, 0x22, 0xbb, 0x02,
+ 0x96, 0x8d, 0x8b, 0x05, 0x22, 0x62, 0x02, 0x96, 0x99, 0x9b, 0x05, 0x22,
+ 0x33, 0x02, 0x96, 0xac, 0x97, 0x05, 0x22, 0x03, 0x02, 0x96, 0xbf, 0x91,
+ 0x05, 0x21, 0xeb, 0x02, 0x96, 0xd5, 0x8b, 0x05, 0x21, 0x9a, 0x02, 0x96,
+ 0xe1, 0x9b, 0x05, 0x1d, 0x3b, 0x02, 0x96, 0xf4, 0x97, 0x05, 0x1d, 0x0b,
+ 0x02, 0x97, 0x07, 0x87, 0x05, 0x1c, 0xeb, 0x02, 0x97, 0x1a, 0x91, 0x05,
+ 0x1c, 0xcb, 0x02, 0x97, 0x26, 0x83, 0x05, 0x1c, 0xb2, 0x02, 0x97, 0x2e,
+ 0xc2, 0x02, 0x0a, 0x05, 0x12, 0xf3, 0x02, 0x97, 0x3a, 0x83, 0x05, 0x13,
+ 0x13, 0x02, 0x97, 0x42, 0xc2, 0x01, 0xba, 0x05, 0x13, 0x33, 0x02, 0x97,
+ 0x4e, 0x91, 0x05, 0x13, 0x4b, 0x02, 0x97, 0x56, 0x87, 0x05, 0x13, 0x62,
+ 0x02, 0x97, 0x62, 0x8b, 0x05, 0x17, 0x7b, 0x02, 0x97, 0x6a, 0x83, 0x05,
+ 0x17, 0xb3, 0x02, 0x97, 0x7d, 0x97, 0x05, 0x17, 0xfb, 0x02, 0x97, 0x89,
+ 0x11, 0xc2, 0x97, 0x9f, 0x87, 0x05, 0x17, 0xeb, 0x02, 0x97, 0xa7, 0x9b,
+ 0x05, 0x18, 0x2a, 0x02, 0x97, 0xab, 0x8b, 0x05, 0x03, 0xc3, 0x02, 0x97,
+ 0xbe, 0x83, 0x05, 0x03, 0xfb, 0x02, 0x97, 0xd1, 0x91, 0x05, 0x04, 0x1b,
+ 0x02, 0x97, 0xdd, 0x97, 0x05, 0x04, 0x3b, 0x02, 0x97, 0xe9, 0x9b, 0x05,
+ 0x04, 0x6a, 0x02, 0x97, 0xfc, 0x8b, 0x05, 0x0a, 0x9b, 0x02, 0x98, 0x0f,
+ 0x83, 0x05, 0x0a, 0xcb, 0x02, 0x98, 0x22, 0x91, 0x05, 0x0a, 0xeb, 0x02,
+ 0x98, 0x2e, 0x87, 0x05, 0x0b, 0x03, 0x02, 0x98, 0x3a, 0x97, 0x05, 0x0b,
+ 0x22, 0x02, 0x98, 0x42, 0x96, 0x05, 0x0b, 0xe9, 0x9a, 0x05, 0x0b, 0xf1,
+ 0x92, 0x05, 0x0c, 0x01, 0x87, 0x05, 0x0c, 0x12, 0x02, 0x98, 0x55, 0x9a,
+ 0x05, 0x0c, 0x21, 0x92, 0x05, 0x0c, 0x30, 0x91, 0x05, 0x0c, 0x43, 0x02,
+ 0x98, 0x5d, 0x96, 0x05, 0x0c, 0x89, 0x9a, 0x05, 0x0c, 0x91, 0x92, 0x05,
+ 0x0c, 0xa1, 0x94, 0x05, 0x0c, 0xb2, 0x02, 0x98, 0x65, 0x96, 0x05, 0x0c,
+ 0x51, 0x9a, 0x05, 0x0c, 0x59, 0x92, 0x05, 0x0c, 0x68, 0x9a, 0x05, 0x0c,
+ 0x71, 0x92, 0x05, 0x0c, 0x80, 0x9b, 0x05, 0x21, 0x7b, 0x02, 0x98, 0x69,
+ 0x97, 0x05, 0x21, 0x4b, 0x02, 0x98, 0x75, 0x91, 0x05, 0x21, 0x2b, 0x02,
+ 0x98, 0x8f, 0x8b, 0x05, 0x20, 0xd2, 0x02, 0x98, 0x9b, 0x94, 0x05, 0x1f,
+ 0xdb, 0x02, 0x98, 0xae, 0x92, 0x05, 0x1f, 0xc9, 0x9a, 0x05, 0x1f, 0xb9,
+ 0x96, 0x05, 0x1f, 0xb0, 0x94, 0x05, 0x1f, 0xab, 0x02, 0x98, 0xb2, 0x92,
+ 0x05, 0x1f, 0x99, 0x9a, 0x05, 0x1f, 0x89, 0x96, 0x05, 0x1f, 0x81, 0x91,
+ 0x05, 0x1f, 0x52, 0x02, 0x98, 0xb6, 0x92, 0x05, 0x1f, 0x79, 0x9a, 0x05,
+ 0x1f, 0x69, 0x96, 0x05, 0x1f, 0x60, 0x87, 0x05, 0x1f, 0x33, 0x02, 0x98,
+ 0xc2, 0x92, 0x05, 0x1f, 0x19, 0x9a, 0x05, 0x1f, 0x09, 0x96, 0x05, 0x1f,
+ 0x00, 0x94, 0x05, 0x20, 0xbb, 0x02, 0x98, 0xce, 0x92, 0x05, 0x20, 0xa9,
+ 0x9a, 0x05, 0x20, 0x99, 0x96, 0x05, 0x20, 0x90, 0x94, 0x05, 0x20, 0x8b,
+ 0x02, 0x98, 0xd2, 0x92, 0x05, 0x20, 0x79, 0x9a, 0x05, 0x20, 0x69, 0x96,
+ 0x05, 0x20, 0x61, 0x91, 0x05, 0x20, 0x32, 0x02, 0x98, 0xd6, 0x92, 0x05,
+ 0x20, 0x59, 0x9a, 0x05, 0x20, 0x49, 0x96, 0x05, 0x20, 0x40, 0x87, 0x05,
+ 0x20, 0x13, 0x02, 0x98, 0xe2, 0x92, 0x05, 0x1f, 0xf9, 0x9a, 0x05, 0x1f,
+ 0xe9, 0x96, 0x05, 0x1f, 0xe0, 0x94, 0x05, 0x1e, 0xfb, 0x02, 0x98, 0xee,
+ 0x92, 0x05, 0x1e, 0xe9, 0x9a, 0x05, 0x1e, 0xd9, 0x96, 0x05, 0x1e, 0xd0,
+ 0x94, 0x05, 0x1e, 0xcb, 0x02, 0x98, 0xf2, 0x92, 0x05, 0x1e, 0xb9, 0x9a,
+ 0x05, 0x1e, 0xa9, 0x96, 0x05, 0x1e, 0xa1, 0x91, 0x05, 0x1e, 0x5a, 0x02,
+ 0x98, 0xf6, 0x92, 0x05, 0x1e, 0x99, 0x9a, 0x05, 0x1e, 0x88, 0x92, 0x05,
+ 0x1e, 0x81, 0x9a, 0x05, 0x1e, 0x71, 0x96, 0x05, 0x1e, 0x68, 0x92, 0x05,
+ 0x1e, 0x49, 0x9a, 0x05, 0x1e, 0x39, 0x96, 0x05, 0x1e, 0x30, 0x9b, 0x05,
+ 0x1c, 0x83, 0x02, 0x98, 0xfe, 0x97, 0x05, 0x1c, 0x53, 0x02, 0x99, 0x11,
+ 0x87, 0x05, 0x1c, 0x33, 0x02, 0x99, 0x2b, 0x91, 0x05, 0x1c, 0x13, 0x02,
+ 0x99, 0x37, 0x83, 0x05, 0x1b, 0xea, 0x02, 0x99, 0x43, 0x9b, 0x05, 0x1e,
+ 0x13, 0x02, 0x99, 0x47, 0x97, 0x05, 0x1d, 0xe3, 0x02, 0x99, 0x5a, 0x87,
+ 0x05, 0x1d, 0xc3, 0x02, 0x99, 0x74, 0x91, 0x05, 0x1d, 0xa3, 0x02, 0x99,
+ 0x80, 0x83, 0x05, 0x1d, 0x6a, 0x02, 0x99, 0x8c, 0x9b, 0x05, 0x1a, 0x13,
+ 0x02, 0x99, 0x98, 0x8b, 0x05, 0x19, 0x63, 0x02, 0x99, 0xab, 0x83, 0x05,
+ 0x19, 0x9b, 0x02, 0x99, 0xbe, 0x91, 0x05, 0x19, 0xbb, 0x02, 0x99, 0xca,
+ 0x87, 0x05, 0x19, 0xd3, 0x02, 0x99, 0xd6, 0x97, 0x05, 0x19, 0xf2, 0x02,
+ 0x99, 0xde, 0x96, 0x05, 0x18, 0x49, 0x9a, 0x05, 0x18, 0x51, 0x92, 0x05,
+ 0x18, 0x61, 0x87, 0x05, 0x18, 0x72, 0x02, 0x99, 0xea, 0x96, 0x05, 0x18,
+ 0x81, 0x9a, 0x05, 0x18, 0x89, 0x92, 0x05, 0x18, 0x98, 0x91, 0x05, 0x18,
+ 0xab, 0x02, 0x99, 0xf2, 0x96, 0x05, 0x18, 0xf1, 0x9a, 0x05, 0x18, 0xf9,
+ 0x92, 0x05, 0x19, 0x09, 0x94, 0x05, 0x19, 0x1a, 0x02, 0x99, 0xfa, 0x96,
+ 0x05, 0x18, 0xb9, 0x9a, 0x05, 0x18, 0xc1, 0x92, 0x05, 0x18, 0xd0, 0x9a,
+ 0x05, 0x18, 0xd9, 0x92, 0x05, 0x18, 0xe8, 0x96, 0x05, 0x19, 0x21, 0x9a,
+ 0x05, 0x19, 0x29, 0x92, 0x05, 0x19, 0x39, 0x94, 0x05, 0x19, 0x4a, 0x02,
+ 0x99, 0xfe, 0x9b, 0x05, 0x1b, 0xc3, 0x02, 0x9a, 0x02, 0x97, 0x05, 0x1b,
+ 0x93, 0x02, 0x9a, 0x15, 0x87, 0x05, 0x1b, 0x7b, 0x02, 0x9a, 0x2b, 0x91,
+ 0x05, 0x1b, 0x5b, 0x02, 0x9a, 0x37, 0x83, 0x05, 0x1b, 0x1a, 0x02, 0x9a,
+ 0x43, 0x94, 0x05, 0x16, 0x7b, 0x02, 0x9a, 0x4f, 0x96, 0x05, 0x16, 0x51,
+ 0x9a, 0x05, 0x16, 0x59, 0x92, 0x05, 0x16, 0x68, 0x92, 0x05, 0x16, 0x19,
+ 0x9a, 0x05, 0x16, 0x08, 0x96, 0x05, 0x16, 0x21, 0x9a, 0x05, 0x16, 0x29,
+ 0x92, 0x05, 0x16, 0x39, 0x94, 0x05, 0x16, 0x4b, 0x02, 0x9a, 0x53, 0x91,
+ 0x05, 0x15, 0xda, 0x02, 0x9a, 0x57, 0x96, 0x05, 0x15, 0x71, 0x9a, 0x05,
+ 0x15, 0x79, 0x92, 0x05, 0x15, 0x89, 0x87, 0x05, 0x15, 0xa2, 0x02, 0x9a,
+ 0x5f, 0x96, 0x05, 0x15, 0xb1, 0x9a, 0x05, 0x15, 0xb9, 0x92, 0x05, 0x15,
+ 0xc8, 0x96, 0x05, 0x15, 0xe9, 0x9a, 0x05, 0x15, 0xf1, 0x92, 0x05, 0x16,
+ 0x00, 0x9a, 0x05, 0x14, 0xf9, 0x92, 0x05, 0x15, 0x08, 0x92, 0x05, 0x14,
+ 0xf1, 0x9a, 0x05, 0x14, 0xe1, 0x96, 0x05, 0x14, 0xd8, 0x91, 0x05, 0x14,
+ 0xcb, 0x02, 0x9a, 0x6b, 0x96, 0x05, 0x15, 0x11, 0x9a, 0x05, 0x15, 0x19,
+ 0x92, 0x05, 0x15, 0x29, 0x94, 0x05, 0x15, 0x3a, 0x02, 0x9a, 0x73, 0x92,
+ 0x05, 0x14, 0xb9, 0x9a, 0x05, 0x14, 0xa9, 0x96, 0x05, 0x14, 0xa0, 0x87,
+ 0x05, 0x14, 0x93, 0x02, 0x9a, 0x77, 0x92, 0x05, 0x14, 0x81, 0x9a, 0x05,
+ 0x14, 0x71, 0x96, 0x05, 0x14, 0x68, 0x91, 0x05, 0x16, 0xeb, 0x02, 0x9a,
+ 0x7f, 0x83, 0x05, 0x16, 0xd3, 0x02, 0x9a, 0x87, 0x8b, 0x05, 0x16, 0x93,
+ 0x02, 0x9a, 0x93, 0x87, 0x05, 0x17, 0x03, 0x02, 0x9a, 0xa6, 0x97, 0x05,
+ 0x17, 0x1b, 0x02, 0x9a, 0xae, 0x9b, 0x05, 0x17, 0x4a, 0x02, 0x9a, 0xbd,
+ 0x9b, 0x05, 0x1a, 0xeb, 0x02, 0x9a, 0xd0, 0x97, 0x05, 0x1a, 0xbb, 0x02,
+ 0x9a, 0xe3, 0x87, 0x05, 0x1a, 0x9b, 0x02, 0x9a, 0xfd, 0x91, 0x05, 0x1a,
+ 0x7b, 0x02, 0x9b, 0x09, 0x83, 0x05, 0x1a, 0x42, 0x02, 0x9b, 0x15, 0x96,
+ 0x05, 0x15, 0x41, 0x9a, 0x05, 0x15, 0x49, 0x92, 0x05, 0x15, 0x59, 0x94,
+ 0x05, 0x15, 0x6a, 0x02, 0x9b, 0x21, 0x92, 0x05, 0x14, 0x61, 0x9a, 0x05,
+ 0x14, 0x50, 0x92, 0x05, 0x14, 0x49, 0x9a, 0x05, 0x14, 0x38, 0x91, 0x05,
+ 0x14, 0x2a, 0x02, 0x9b, 0x25, 0x92, 0x05, 0x14, 0x19, 0x9a, 0x05, 0x14,
+ 0x09, 0x96, 0x05, 0x14, 0x00, 0x92, 0x05, 0x13, 0xf9, 0x9a, 0x05, 0x13,
+ 0xe8, 0x87, 0x05, 0x12, 0xdb, 0x02, 0x9b, 0x2d, 0x91, 0x05, 0x12, 0xc3,
+ 0x02, 0x9b, 0x35, 0xc2, 0x01, 0xba, 0x05, 0x12, 0xa3, 0x02, 0x9b, 0x41,
+ 0x83, 0x05, 0x12, 0x83, 0x02, 0x9b, 0x4d, 0x8b, 0x05, 0x12, 0x42, 0x02,
+ 0x9b, 0x59, 0x96, 0x05, 0x13, 0x71, 0x87, 0x05, 0x13, 0x82, 0x02, 0x9b,
+ 0x6c, 0x96, 0x05, 0x13, 0x89, 0x9a, 0x05, 0x13, 0x91, 0x92, 0x05, 0x13,
+ 0xa0, 0x96, 0x05, 0x13, 0xa9, 0x9a, 0x05, 0x13, 0xb1, 0x92, 0x05, 0x13,
+ 0xc0, 0x96, 0x05, 0x13, 0xc9, 0x9a, 0x05, 0x13, 0xd1, 0x92, 0x05, 0x13,
+ 0xe0, 0x8b, 0x05, 0x04, 0x9b, 0x02, 0x9b, 0x70, 0x83, 0x05, 0x04, 0xd3,
+ 0x02, 0x9b, 0x83, 0x97, 0x05, 0x05, 0x2b, 0x02, 0x9b, 0x8f, 0x91, 0x05,
+ 0x05, 0x0b, 0x02, 0x9b, 0xa9, 0x9b, 0x05, 0x05, 0x52, 0x02, 0x9b, 0xb5,
+ 0x8b, 0x05, 0x0b, 0x53, 0x02, 0x9b, 0xc4, 0x83, 0x05, 0x0b, 0x93, 0x02,
+ 0x9b, 0xd7, 0x17, 0xc2, 0x9b, 0xe3, 0x11, 0xc2, 0x9b, 0xee, 0x87, 0x05,
+ 0x0b, 0xd2, 0x02, 0x9b, 0xfa, 0x8b, 0x05, 0x0c, 0xcb, 0x02, 0x9c, 0x02,
+ 0x83, 0x05, 0x0d, 0x03, 0x02, 0x9c, 0x15, 0x97, 0x05, 0x0d, 0x6b, 0x02,
+ 0x9c, 0x21, 0x91, 0x05, 0x0d, 0x33, 0x02, 0x9c, 0x3b, 0x87, 0x05, 0x0d,
+ 0x4b, 0x02, 0x9c, 0x43, 0x9b, 0x05, 0x0d, 0x9a, 0x02, 0x9c, 0x4b, 0x87,
+ 0x05, 0x23, 0xbb, 0x02, 0x9c, 0x5e, 0x92, 0x05, 0x23, 0xa1, 0x9a, 0x05,
+ 0x23, 0x91, 0x96, 0x05, 0x23, 0x88, 0x91, 0x05, 0x23, 0xdb, 0x02, 0x9c,
+ 0x6a, 0x96, 0x05, 0x24, 0x09, 0x9a, 0x05, 0x24, 0x11, 0x92, 0x05, 0x24,
+ 0x21, 0x94, 0x05, 0x24, 0x32, 0x02, 0x9c, 0x76, 0x96, 0x05, 0x23, 0xe9,
+ 0x9a, 0x05, 0x23, 0xf1, 0x92, 0x05, 0x24, 0x00, 0x96, 0x05, 0x24, 0x39,
+ 0x9a, 0x05, 0x24, 0x41, 0x92, 0x05, 0x24, 0x51, 0x94, 0x05, 0x24, 0x62,
+ 0x02, 0x9c, 0x7a, 0x94, 0x05, 0x23, 0x83, 0x02, 0x9c, 0x7e, 0x92, 0x05,
+ 0x23, 0x71, 0x9a, 0x05, 0x23, 0x61, 0x96, 0x05, 0x23, 0x58, 0x96, 0x05,
+ 0x22, 0xe9, 0x9a, 0x05, 0x22, 0xf1, 0x92, 0x05, 0x23, 0x01, 0x87, 0x05,
+ 0x23, 0x1a, 0x02, 0x9c, 0x82, 0x9a, 0x05, 0x23, 0x41, 0x92, 0x05, 0x23,
+ 0x51, 0x96, 0x05, 0x23, 0x38, 0x9a, 0x05, 0x23, 0x28, 0x97, 0x05, 0x12,
+ 0x13, 0x02, 0x9c, 0x8e, 0xc2, 0x02, 0x0a, 0x05, 0x11, 0x8b, 0x02, 0x9c,
+ 0xa8, 0x83, 0x05, 0x11, 0xa3, 0x02, 0x9c, 0xac, 0x91, 0x05, 0x11, 0xdb,
+ 0x02, 0x9c, 0xb8, 0x87, 0x05, 0x11, 0xf2, 0x02, 0x9c, 0xc4, 0x96, 0x05,
+ 0x05, 0x71, 0x9a, 0x05, 0x05, 0x79, 0x92, 0x05, 0x05, 0x89, 0x87, 0x05,
+ 0x05, 0x9a, 0x02, 0x9c, 0xcc, 0x96, 0x05, 0x05, 0xa9, 0x9a, 0x05, 0x05,
+ 0xb1, 0x92, 0x05, 0x05, 0xc0, 0x91, 0x05, 0x05, 0xdb, 0x02, 0x9c, 0xd4,
+ 0x96, 0x05, 0x06, 0x19, 0x9a, 0x05, 0x06, 0x21, 0x92, 0x05, 0x06, 0x31,
+ 0x94, 0x05, 0x06, 0x42, 0x02, 0x9c, 0xe0, 0x96, 0x05, 0x05, 0xe9, 0x9a,
+ 0x05, 0x05, 0xf1, 0x92, 0x05, 0x06, 0x00, 0x9a, 0x05, 0x06, 0x08, 0x96,
+ 0x05, 0x06, 0x49, 0x9a, 0x05, 0x06, 0x51, 0x92, 0x05, 0x06, 0x60, 0xcc,
+ 0x1c, 0x94, 0x05, 0x00, 0xa8, 0x96, 0x05, 0x00, 0x21, 0x9a, 0x05, 0x00,
+ 0x29, 0x92, 0x05, 0x00, 0x38, 0x96, 0x05, 0x00, 0xb1, 0x9a, 0x05, 0x00,
+ 0xb9, 0x92, 0x05, 0x00, 0xc9, 0x87, 0x05, 0x00, 0xe2, 0x02, 0x9c, 0xe4,
+ 0x96, 0x05, 0x00, 0xf1, 0x9a, 0x05, 0x00, 0xf9, 0x92, 0x05, 0x01, 0x08,
+ 0x91, 0x05, 0x01, 0x1b, 0x02, 0x9c, 0xf0, 0x96, 0x05, 0x01, 0x61, 0x9a,
+ 0x05, 0x01, 0x69, 0x92, 0x05, 0x01, 0x79, 0x94, 0x05, 0x01, 0x8a, 0x02,
+ 0x9c, 0xf8, 0x96, 0x05, 0x01, 0x29, 0x9a, 0x05, 0x01, 0x31, 0x92, 0x05,
+ 0x01, 0x40, 0x9a, 0x05, 0x01, 0x49, 0x92, 0x05, 0x01, 0x58, 0x96, 0x05,
+ 0x01, 0x91, 0x9a, 0x05, 0x01, 0x99, 0x92, 0x05, 0x01, 0xa9, 0x94, 0x05,
+ 0x01, 0xba, 0x02, 0x9c, 0xfc, 0x8b, 0x05, 0x02, 0xc3, 0x02, 0x9d, 0x00,
+ 0x83, 0x05, 0x03, 0x03, 0x02, 0x9d, 0x13, 0x97, 0x05, 0x03, 0x73, 0x02,
+ 0x9d, 0x1f, 0x91, 0x05, 0x03, 0x3b, 0x02, 0x9d, 0x39, 0x87, 0x05, 0x03,
+ 0x53, 0x02, 0x9d, 0x45, 0x9b, 0x05, 0x03, 0xa2, 0x02, 0x9d, 0x4d, 0x96,
+ 0x05, 0x01, 0xc1, 0x9a, 0x05, 0x01, 0xc9, 0x92, 0x05, 0x01, 0xd9, 0x87,
+ 0x05, 0x01, 0xea, 0x02, 0x9d, 0x59, 0x96, 0x05, 0x01, 0xf9, 0x9a, 0x05,
+ 0x02, 0x01, 0x92, 0x05, 0x02, 0x10, 0x91, 0x05, 0x02, 0x23, 0x02, 0x9d,
+ 0x61, 0x96, 0x05, 0x02, 0x51, 0x9a, 0x05, 0x02, 0x59, 0x92, 0x05, 0x02,
+ 0x69, 0x94, 0x05, 0x02, 0x7a, 0x02, 0x9d, 0x69, 0x96, 0x05, 0x02, 0x31,
+ 0x9a, 0x05, 0x02, 0x39, 0x92, 0x05, 0x02, 0x48, 0x96, 0x05, 0x02, 0x81,
+ 0x9a, 0x05, 0x02, 0x89, 0x92, 0x05, 0x02, 0x99, 0x94, 0x05, 0x02, 0xaa,
+ 0x02, 0x9d, 0x6d, 0x96, 0x05, 0x06, 0x69, 0x9a, 0x05, 0x06, 0x71, 0x92,
+ 0x05, 0x06, 0x80, 0x96, 0x05, 0x06, 0x89, 0x9a, 0x05, 0x06, 0x91, 0x92,
+ 0x05, 0x06, 0xa0, 0x9a, 0x05, 0x06, 0xa9, 0x92, 0x05, 0x06, 0xb8, 0x96,
+ 0x05, 0x06, 0xc1, 0x9a, 0x05, 0x06, 0xc9, 0x92, 0x05, 0x06, 0xd9, 0x94,
+ 0x05, 0x06, 0xea, 0x02, 0x9d, 0x71, 0x96, 0x05, 0x06, 0xf1, 0x9a, 0x05,
+ 0x06, 0xf9, 0x92, 0x05, 0x07, 0x08, 0x96, 0x05, 0x07, 0x11, 0x9a, 0x05,
+ 0x07, 0x19, 0x92, 0x05, 0x07, 0x29, 0x87, 0x05, 0x07, 0x42, 0x02, 0x9d,
+ 0x75, 0x96, 0x05, 0x07, 0x51, 0x9a, 0x05, 0x07, 0x59, 0x92, 0x05, 0x07,
+ 0x68, 0x96, 0x05, 0x07, 0x71, 0x9a, 0x05, 0x07, 0x79, 0x92, 0x05, 0x07,
+ 0x88, 0x9a, 0x05, 0x07, 0x91, 0x92, 0x05, 0x07, 0x98, 0x96, 0x05, 0x07,
+ 0xa1, 0x9a, 0x05, 0x07, 0xa9, 0x92, 0x05, 0x07, 0xb9, 0x94, 0x05, 0x07,
+ 0xca, 0x02, 0x9d, 0x81, 0x96, 0x05, 0x07, 0xd1, 0x9a, 0x05, 0x07, 0xd9,
+ 0x92, 0x05, 0x07, 0xe9, 0x94, 0x05, 0x07, 0xfa, 0x02, 0x9d, 0x85, 0x96,
+ 0x05, 0x08, 0x01, 0x9a, 0x05, 0x08, 0x09, 0x92, 0x05, 0x08, 0x19, 0x87,
+ 0x05, 0x08, 0x2a, 0x02, 0x9d, 0x89, 0x96, 0x05, 0x08, 0x39, 0x9a, 0x05,
+ 0x08, 0x41, 0x92, 0x05, 0x08, 0x50, 0x91, 0x05, 0x08, 0x63, 0x02, 0x9d,
+ 0x91, 0x96, 0x05, 0x08, 0xa1, 0x9a, 0x05, 0x08, 0xa9, 0x92, 0x05, 0x08,
+ 0xb9, 0x94, 0x05, 0x08, 0xca, 0x02, 0x9d, 0x95, 0x96, 0x05, 0x08, 0x69,
+ 0x9a, 0x05, 0x08, 0x71, 0x92, 0x05, 0x08, 0x80, 0x9a, 0x05, 0x08, 0x89,
+ 0x92, 0x05, 0x08, 0x98, 0x8b, 0x05, 0x09, 0xc3, 0x02, 0x9d, 0x99, 0x83,
+ 0x05, 0x09, 0xfb, 0x02, 0x9d, 0xac, 0x97, 0x05, 0x0a, 0x6b, 0x02, 0x9d,
+ 0xb8, 0x91, 0x05, 0x0a, 0x33, 0x02, 0x9d, 0xd2, 0x87, 0x05, 0x0a, 0x4a,
+ 0x02, 0x9d, 0xde, 0x96, 0x05, 0x08, 0xd1, 0x9a, 0x05, 0x08, 0xd9, 0x92,
+ 0x05, 0x08, 0xe9, 0x87, 0x05, 0x08, 0xfa, 0x02, 0x9d, 0xe6, 0x96, 0x05,
+ 0x09, 0x09, 0x9a, 0x05, 0x09, 0x11, 0x92, 0x05, 0x09, 0x20, 0x91, 0x05,
+ 0x09, 0x3b, 0x02, 0x9d, 0xee, 0x96, 0x05, 0x09, 0x81, 0x9a, 0x05, 0x09,
+ 0x89, 0x92, 0x05, 0x09, 0x99, 0x94, 0x05, 0x09, 0xaa, 0x02, 0x9d, 0xfa,
+ 0x96, 0x05, 0x09, 0x49, 0x9a, 0x05, 0x09, 0x51, 0x92, 0x05, 0x09, 0x60,
+ 0x9a, 0x05, 0x09, 0x69, 0x92, 0x05, 0x09, 0x78, 0x96, 0x05, 0x0d, 0xb9,
+ 0x9a, 0x05, 0x0d, 0xc1, 0x92, 0x05, 0x0d, 0xd1, 0x87, 0x05, 0x0d, 0xea,
+ 0x02, 0x9d, 0xfe, 0x96, 0x05, 0x0d, 0xf9, 0x9a, 0x05, 0x0e, 0x01, 0x92,
+ 0x05, 0x0e, 0x10, 0x91, 0x05, 0x0e, 0x2b, 0x02, 0x9e, 0x0a, 0x96, 0x05,
+ 0x0e, 0x71, 0x9a, 0x05, 0x0e, 0x79, 0x92, 0x05, 0x0e, 0x89, 0x94, 0x05,
+ 0x0e, 0x9a, 0x02, 0x9e, 0x16, 0x96, 0x05, 0x0e, 0x39, 0x9a, 0x05, 0x0e,
+ 0x41, 0x92, 0x05, 0x0e, 0x50, 0x9a, 0x05, 0x0e, 0x59, 0x92, 0x05, 0x0e,
+ 0x68, 0x96, 0x05, 0x0e, 0xa1, 0x9a, 0x05, 0x0e, 0xa9, 0x92, 0x05, 0x0e,
+ 0xb9, 0x94, 0x05, 0x0e, 0xca, 0x02, 0x9e, 0x1a, 0x96, 0x05, 0x0e, 0xd1,
+ 0x9a, 0x05, 0x0e, 0xd9, 0x92, 0x05, 0x0e, 0xe9, 0x87, 0x05, 0x0f, 0x02,
+ 0x02, 0x9e, 0x1e, 0x96, 0x05, 0x0f, 0x11, 0x9a, 0x05, 0x0f, 0x19, 0x92,
+ 0x05, 0x0f, 0x28, 0x91, 0x05, 0x0f, 0x43, 0x02, 0x9e, 0x2a, 0x96, 0x05,
+ 0x0f, 0x91, 0x9a, 0x05, 0x0f, 0x99, 0x92, 0x05, 0x0f, 0xa9, 0x94, 0x05,
+ 0x0f, 0xba, 0x02, 0x9e, 0x36, 0x96, 0x05, 0x0f, 0x51, 0x9a, 0x05, 0x0f,
+ 0x59, 0x92, 0x05, 0x0f, 0x68, 0x96, 0x05, 0x0f, 0x71, 0x9a, 0x05, 0x0f,
+ 0x79, 0x92, 0x05, 0x0f, 0x88, 0x8b, 0x05, 0x10, 0xb3, 0x02, 0x9e, 0x3a,
+ 0x83, 0x05, 0x10, 0xe3, 0x02, 0x9e, 0x49, 0x97, 0x05, 0x11, 0x63, 0x02,
+ 0x9e, 0x55, 0x91, 0x05, 0x11, 0x23, 0x02, 0x9e, 0x6f, 0x87, 0x05, 0x11,
+ 0x42, 0x02, 0x9e, 0x7b, 0x96, 0x05, 0x0f, 0xc1, 0x9a, 0x05, 0x0f, 0xc9,
+ 0x92, 0x05, 0x0f, 0xd9, 0x87, 0x05, 0x0f, 0xea, 0x02, 0x9e, 0x87, 0x96,
+ 0x05, 0x0f, 0xf9, 0x9a, 0x05, 0x10, 0x01, 0x92, 0x05, 0x10, 0x10, 0x91,
+ 0x05, 0x10, 0x23, 0x02, 0x9e, 0x8f, 0x96, 0x05, 0x10, 0x71, 0x9a, 0x05,
+ 0x10, 0x79, 0x92, 0x05, 0x10, 0x89, 0x94, 0x05, 0x10, 0x9a, 0x02, 0x9e,
+ 0x97, 0x96, 0x05, 0x10, 0x31, 0x9a, 0x05, 0x10, 0x39, 0x92, 0x05, 0x10,
+ 0x48, 0x96, 0x05, 0x10, 0x51, 0x9a, 0x05, 0x10, 0x59, 0x92, 0x05, 0x10,
+ 0x68, 0x87, 0x05, 0x25, 0xd8, 0xc2, 0x00, 0x7e, 0x05, 0x24, 0x99, 0xc2,
+ 0x00, 0x11, 0x05, 0x25, 0x38, 0x92, 0x05, 0x24, 0xa1, 0x96, 0x05, 0x25,
+ 0x18, 0x9b, 0x05, 0x25, 0x81, 0xc2, 0x00, 0x33, 0x05, 0x25, 0xd1, 0xc2,
+ 0x00, 0xfe, 0x05, 0x26, 0x01, 0xc2, 0x00, 0x11, 0x05, 0x26, 0x10, 0xc2,
+ 0x00, 0x11, 0x05, 0x24, 0xb1, 0xc2, 0x01, 0xba, 0x05, 0x25, 0x30, 0xc2,
+ 0x00, 0x8d, 0x05, 0x24, 0xc9, 0xc2, 0x01, 0xba, 0x05, 0x24, 0xf9, 0xc2,
+ 0x00, 0x11, 0x05, 0x25, 0xf8, 0x92, 0x05, 0x25, 0x11, 0x94, 0x05, 0x26,
+ 0x08, 0xc2, 0x00, 0xa4, 0x05, 0x25, 0x51, 0x9b, 0x05, 0x25, 0xa9, 0xc2,
+ 0x02, 0x0a, 0x05, 0x25, 0xb8, 0x8e, 0x08, 0x74, 0x60, 0xc3, 0x32, 0xce,
+ 0x08, 0x74, 0x41, 0xc2, 0x03, 0x4e, 0x08, 0x74, 0x38, 0x44, 0xe1, 0x77,
+ 0x42, 0x9e, 0x9b, 0x8b, 0x00, 0xa7, 0x70, 0x91, 0x00, 0xa8, 0xeb, 0x02,
+ 0x9e, 0xb9, 0x83, 0x00, 0xa9, 0x0b, 0x02, 0x9e, 0xc1, 0x8b, 0x00, 0xa8,
+ 0xcb, 0x02, 0x9e, 0xc5, 0x87, 0x00, 0xa8, 0xb8, 0x9b, 0x00, 0xc6, 0x09,
+ 0x83, 0x00, 0xa8, 0xb0, 0x9b, 0x00, 0xc6, 0x01, 0x91, 0x00, 0xa8, 0xa0,
+ 0x8b, 0x00, 0xa8, 0x90, 0xc2, 0x16, 0x1c, 0x00, 0xa4, 0x29, 0xc2, 0x14,
+ 0x77, 0x00, 0xa4, 0x31, 0xc2, 0x38, 0x2a, 0x00, 0xa4, 0x39, 0xc2, 0x02,
+ 0x98, 0x00, 0xa4, 0x40, 0x83, 0x00, 0xa8, 0x10, 0x8b, 0x00, 0xa7, 0xd0,
+ 0x91, 0x00, 0xa7, 0xf0, 0x43, 0x67, 0xcd, 0xc2, 0x9e, 0xc9, 0x0a, 0x42,
+ 0x9e, 0xde, 0xc4, 0xdf, 0x5b, 0x00, 0xa9, 0xe9, 0x19, 0xc2, 0x9e, 0xf3,
+ 0x15, 0xc2, 0x9e, 0xff, 0xc4, 0xe0, 0xc3, 0x00, 0xa4, 0x11, 0xc4, 0xe3,
+ 0x43, 0x00, 0xa5, 0x01, 0xc4, 0xda, 0xeb, 0x00, 0xa5, 0xd1, 0xc4, 0xe4,
+ 0x67, 0x00, 0xa6, 0x79, 0xc4, 0xde, 0xb6, 0x00, 0xa3, 0x28, 0x8b, 0x00,
+ 0xa6, 0x08, 0x91, 0x00, 0xc6, 0x60, 0x8b, 0x00, 0xc6, 0x40, 0x83, 0x00,
+ 0xa6, 0x68, 0x83, 0x00, 0xb3, 0xb0, 0x91, 0x00, 0xb3, 0xa0, 0x8b, 0x00,
+ 0xb3, 0x90, 0x8b, 0x00, 0xb3, 0x81, 0x83, 0x00, 0xac, 0xa2, 0x02, 0x9f,
+ 0x26, 0x91, 0x00, 0xac, 0x90, 0x8b, 0x00, 0xac, 0x80, 0x83, 0x00, 0xab,
+ 0xcb, 0x02, 0x9f, 0x2a, 0x91, 0x00, 0xab, 0xbb, 0x02, 0x9f, 0x2e, 0x8b,
+ 0x00, 0xab, 0xab, 0x02, 0x9f, 0x32, 0x87, 0x00, 0xab, 0xa0, 0x8b, 0x00,
+ 0xab, 0x18, 0x06, 0xc2, 0x9f, 0x36, 0x0c, 0xc2, 0x9f, 0x46, 0x09, 0xc2,
+ 0x9f, 0x67, 0x16, 0xc2, 0x9f, 0x89, 0x42, 0x11, 0xee, 0xc2, 0x9f, 0x99,
+ 0x1b, 0xc2, 0x9f, 0xb0, 0x0f, 0xc2, 0x9f, 0xc7, 0x10, 0xc2, 0x9f, 0xde,
+ 0x0d, 0xc2, 0x9f, 0xf9, 0x92, 0x00, 0xaf, 0x73, 0x02, 0xa0, 0x04, 0x8a,
+ 0x00, 0xa2, 0x5b, 0x02, 0xa0, 0x1b, 0x19, 0xc2, 0xa0, 0x29, 0x14, 0xc2,
+ 0xa0, 0x40, 0x0e, 0xc2, 0xa0, 0x57, 0xc2, 0x02, 0xe0, 0x00, 0xa0, 0x41,
+ 0x8b, 0x00, 0xa0, 0x4b, 0x02, 0xa0, 0x72, 0x9c, 0x00, 0xb2, 0x33, 0x02,
+ 0xa0, 0x78, 0x15, 0x42, 0xa0, 0x8f, 0x8b, 0x00, 0xa4, 0x50, 0x91, 0x00,
+ 0xa4, 0xd0, 0x8b, 0x00, 0xa4, 0xb0, 0x83, 0x00, 0xa4, 0xf0, 0x83, 0x00,
+ 0xad, 0xb9, 0x91, 0x00, 0xad, 0xb1, 0x8b, 0x00, 0xad, 0xa9, 0x87, 0x00,
+ 0xad, 0xa0, 0x83, 0x00, 0xad, 0xf9, 0x91, 0x00, 0xad, 0xf1, 0x8b, 0x00,
+ 0xad, 0xe9, 0x87, 0x00, 0xad, 0xe0, 0x83, 0x00, 0xad, 0xd9, 0x91, 0x00,
+ 0xad, 0xd1, 0x8b, 0x00, 0xad, 0xc9, 0x87, 0x00, 0xad, 0xc0, 0x91, 0x00,
+ 0xc7, 0x48, 0x83, 0x00, 0xab, 0x73, 0x02, 0xa0, 0xad, 0x91, 0x00, 0xab,
+ 0x6b, 0x02, 0xa0, 0xb1, 0xc2, 0x00, 0x28, 0x00, 0xc7, 0x29, 0x8b, 0x00,
+ 0xab, 0x61, 0x87, 0x00, 0xab, 0x58, 0x83, 0x00, 0xc7, 0x23, 0x02, 0xa0,
+ 0xb5, 0x87, 0x00, 0xc7, 0x18, 0x83, 0x00, 0xad, 0x63, 0x02, 0xa0, 0xb9,
+ 0x91, 0x00, 0xad, 0x53, 0x02, 0xa0, 0xbd, 0x8b, 0x00, 0xad, 0x43, 0x02,
+ 0xa0, 0xc1, 0x87, 0x00, 0xad, 0x38, 0x83, 0x00, 0xab, 0x38, 0x91, 0x00,
+ 0xab, 0x28, 0x8b, 0x00, 0xab, 0x10, 0x8b, 0x00, 0xa2, 0x68, 0x91, 0x00,
+ 0xa2, 0xf8, 0x8b, 0x00, 0xa2, 0xd8, 0x83, 0x00, 0xa3, 0x18, 0x46, 0x92,
+ 0x9a, 0xc2, 0xa0, 0xc5, 0xc5, 0xbc, 0x9d, 0x00, 0xc6, 0xe8, 0x48, 0xba,
+ 0x1a, 0x42, 0xa1, 0x0c, 0x83, 0x00, 0xaa, 0x70, 0x91, 0x00, 0xc6, 0x90,
+ 0x8b, 0x00, 0xc6, 0x80, 0x8b, 0x00, 0xaa, 0x28, 0x14, 0xc2, 0xa1, 0x1b,
+ 0x15, 0xc2, 0xa1, 0x25, 0xc5, 0x31, 0xee, 0x00, 0xa0, 0xf9, 0xc5, 0x1f,
+ 0x0c, 0x00, 0xa1, 0x01, 0xd0, 0x58, 0x02, 0x00, 0xa1, 0x09, 0xcd, 0x7f,
+ 0x3f, 0x00, 0xa1, 0x11, 0x42, 0x00, 0x58, 0xc2, 0xa1, 0x31, 0xca, 0x3b,
+ 0x06, 0x00, 0xa1, 0x39, 0xc4, 0x25, 0xd5, 0x00, 0xa1, 0x48, 0x8b, 0x00,
+ 0xaa, 0xa0, 0x8a, 0x00, 0xc6, 0xd8, 0x19, 0x42, 0xa1, 0x3d, 0x8b, 0x00,
+ 0xa9, 0x38, 0x83, 0x00, 0xa9, 0xd8, 0x91, 0x00, 0xa9, 0xb8, 0x8b, 0x00,
+ 0xa9, 0x98, 0xc3, 0x14, 0x72, 0x00, 0xa2, 0x41, 0xc2, 0x01, 0x24, 0x00,
+ 0xa1, 0xa8, 0x8b, 0x00, 0xa6, 0xa0, 0x83, 0x00, 0xad, 0x28, 0x91, 0x00,
+ 0xad, 0x18, 0x8b, 0x00, 0xad, 0x08, 0x8b, 0x00, 0xa7, 0x00, 0x91, 0x00,
+ 0xa7, 0x20, 0x83, 0x00, 0xa7, 0x40, 0x8b, 0x00, 0xa5, 0x20, 0x94, 0x00,
+ 0xaa, 0x91, 0x8e, 0x00, 0xa7, 0x60, 0xca, 0xa5, 0x8a, 0x00, 0xa8, 0x48,
+ 0x8b, 0x00, 0xa5, 0x80, 0x91, 0x00, 0xa5, 0xa0, 0x83, 0x00, 0xa5, 0xc0,
+ 0x9b, 0x00, 0xc5, 0xc9, 0x83, 0x00, 0xa4, 0x00, 0x8b, 0x00, 0xa3, 0xc0,
+ 0x91, 0x00, 0xa3, 0xe0, 0x8b, 0x00, 0xa3, 0x60, 0x9b, 0x00, 0xc5, 0xb1,
+ 0x91, 0x00, 0xa2, 0x10, 0x83, 0x00, 0xa2, 0x30, 0x8b, 0x00, 0xa1, 0xf0,
+ 0x8b, 0x00, 0xa1, 0x80, 0x8b, 0x00, 0xab, 0xf0, 0x97, 0x08, 0x15, 0xd9,
+ 0x9f, 0x08, 0x16, 0x41, 0xa0, 0x08, 0x16, 0x80, 0xc3, 0x4b, 0x13, 0x08,
+ 0x2a, 0x79, 0xc2, 0x0c, 0x42, 0x08, 0x2a, 0xa8, 0xc2, 0x00, 0x71, 0x08,
+ 0x29, 0xb9, 0x83, 0x08, 0x29, 0xd8, 0x83, 0x08, 0x29, 0xcb, 0x02, 0xa1,
+ 0x4b, 0xc2, 0x69, 0xa6, 0x08, 0x2a, 0x49, 0x8b, 0x08, 0x2a, 0x50, 0x94,
+ 0x08, 0x2a, 0x11, 0xc2, 0x17, 0xb6, 0x08, 0x2b, 0x00, 0x9b, 0x08, 0x2a,
+ 0x59, 0x99, 0x08, 0x2a, 0xf8, 0x83, 0x08, 0x29, 0xeb, 0x02, 0xa1, 0x4f,
+ 0xc2, 0x69, 0xa6, 0x08, 0x2a, 0xe8, 0xc2, 0x02, 0xa0, 0x01, 0x74, 0x19,
+ 0xc4, 0x02, 0xde, 0x01, 0x74, 0x20, 0xce, 0x70, 0x88, 0x01, 0x75, 0x31,
+ 0xc3, 0x00, 0xbf, 0x01, 0x76, 0x30, 0xc3, 0xac, 0xc1, 0x01, 0x76, 0x61,
+ 0xc4, 0x8e, 0x34, 0x01, 0x77, 0x40, 0x89, 0x01, 0x8f, 0x08, 0x83, 0x05,
+ 0x5b, 0xb1, 0x87, 0x05, 0x5b, 0xc1, 0x8b, 0x05, 0x5b, 0xc9, 0x91, 0x05,
+ 0x5b, 0xd1, 0x97, 0x05, 0x5b, 0xd9, 0x98, 0x05, 0x5b, 0xe0, 0x83, 0x05,
+ 0x5d, 0xf9, 0x87, 0x00, 0x9f, 0xc1, 0x8b, 0x00, 0x9f, 0xc9, 0x91, 0x00,
+ 0x9f, 0xd1, 0x97, 0x00, 0x9f, 0xd9, 0x98, 0x00, 0x9f, 0xe0, 0x98, 0x05,
+ 0x5d, 0xf1, 0x97, 0x05, 0x5d, 0xe9, 0x91, 0x05, 0x5d, 0xe1, 0x8b, 0x05,
+ 0x5d, 0xd9, 0x87, 0x05, 0x5d, 0xd1, 0x83, 0x05, 0x5d, 0xc8, 0x15, 0xc2,
+ 0xa1, 0x53, 0x0e, 0xc2, 0xa1, 0x6b, 0x83, 0x05, 0x5d, 0x21, 0x8b, 0x05,
+ 0x5d, 0x41, 0x87, 0x05, 0x5d, 0x30, 0x91, 0x05, 0x5c, 0x99, 0x8b, 0x05,
+ 0x5c, 0x91, 0x87, 0x05, 0x5c, 0x89, 0x83, 0x05, 0x5c, 0x73, 0x02, 0xa1,
+ 0x83, 0x97, 0x05, 0x5c, 0xa1, 0x98, 0x05, 0x5c, 0xa8, 0xc2, 0x00, 0xc1,
+ 0x05, 0x5c, 0x79, 0x83, 0x05, 0x5b, 0xe9, 0x87, 0x05, 0x5b, 0xf1, 0x8b,
+ 0x05, 0x5b, 0xf9, 0x91, 0x05, 0x5c, 0x01, 0x97, 0x05, 0x5c, 0x09, 0x98,
+ 0x05, 0x5c, 0x10, 0x97, 0x05, 0x5c, 0x69, 0x91, 0x05, 0x5c, 0x61, 0x8b,
+ 0x05, 0x5c, 0x59, 0x87, 0x05, 0x5c, 0x51, 0x83, 0x05, 0x5c, 0x49, 0x98,
+ 0x00, 0x9f, 0xe8, 0x98, 0x05, 0x5c, 0x41, 0x97, 0x05, 0x5c, 0x39, 0x91,
+ 0x05, 0x5c, 0x31, 0x8b, 0x05, 0x5c, 0x29, 0x87, 0x05, 0x5c, 0x21, 0x83,
+ 0x05, 0x5c, 0x18, 0x83, 0x05, 0x5c, 0xb1, 0x87, 0x05, 0x5c, 0xb9, 0x8b,
+ 0x05, 0x5c, 0xc1, 0x91, 0x05, 0x5c, 0xc9, 0x97, 0x05, 0x5c, 0xd1, 0x98,
+ 0x05, 0x5c, 0xd8, 0x83, 0x05, 0x5c, 0xe1, 0x87, 0x05, 0x5c, 0xf1, 0x8b,
+ 0x05, 0x5c, 0xf9, 0x91, 0x05, 0x5d, 0x01, 0x97, 0x05, 0x5d, 0x09, 0x98,
+ 0x05, 0x5d, 0x10, 0x83, 0x05, 0x5d, 0x19, 0x87, 0x05, 0x5d, 0x29, 0x8b,
+ 0x05, 0x5d, 0x39, 0x91, 0x05, 0x5d, 0x49, 0x97, 0x05, 0x5d, 0x51, 0x98,
+ 0x05, 0x5d, 0x59, 0xc2, 0x00, 0xdb, 0x05, 0x5d, 0x60, 0x83, 0x00, 0x9d,
+ 0x31, 0x87, 0x00, 0x9d, 0x41, 0x8b, 0x00, 0x9d, 0x49, 0x91, 0x00, 0x9d,
+ 0x51, 0x97, 0x00, 0x9d, 0x59, 0x98, 0x00, 0x9d, 0x60, 0x83, 0x00, 0x9d,
+ 0x69, 0x87, 0x00, 0x9d, 0x71, 0x8b, 0x00, 0x9d, 0x79, 0x91, 0x00, 0x9d,
+ 0x81, 0x97, 0x00, 0x9d, 0x89, 0x98, 0x00, 0x9d, 0x91, 0xc2, 0x00, 0xc1,
+ 0x00, 0x9d, 0xf8, 0x83, 0x00, 0x9d, 0x99, 0x87, 0x00, 0x9d, 0xa1, 0x8b,
+ 0x00, 0x9d, 0xa9, 0x91, 0x00, 0x9d, 0xb1, 0x97, 0x00, 0x9d, 0xb9, 0x98,
+ 0x00, 0x9d, 0xc0, 0x83, 0x00, 0x9d, 0xc9, 0x87, 0x00, 0x9d, 0xd1, 0x8b,
+ 0x00, 0x9d, 0xd9, 0x91, 0x00, 0x9d, 0xe1, 0x97, 0x00, 0x9d, 0xe9, 0x98,
+ 0x00, 0x9f, 0xa8, 0x83, 0x00, 0x9d, 0xf3, 0x02, 0xa1, 0x87, 0x87, 0x00,
+ 0x9e, 0x09, 0x8b, 0x00, 0x9e, 0x11, 0x91, 0x00, 0x9e, 0x19, 0x97, 0x00,
+ 0x9e, 0x21, 0x98, 0x00, 0x9e, 0x28, 0x83, 0x00, 0x9e, 0x31, 0x87, 0x00,
+ 0x9e, 0x39, 0x8b, 0x00, 0x9e, 0x41, 0x91, 0x00, 0x9e, 0x49, 0x97, 0x00,
+ 0x9e, 0x51, 0x98, 0x00, 0x9e, 0x58, 0x83, 0x00, 0x9e, 0x61, 0x87, 0x00,
+ 0x9e, 0x71, 0x8b, 0x00, 0x9e, 0x79, 0x91, 0x00, 0x9e, 0x81, 0x97, 0x00,
+ 0x9e, 0x89, 0x98, 0x00, 0x9e, 0x90, 0x83, 0x00, 0x9e, 0x99, 0x87, 0x00,
+ 0x9e, 0xa9, 0x8b, 0x00, 0x9e, 0xb9, 0x91, 0x00, 0x9e, 0xc9, 0x97, 0x00,
+ 0x9e, 0xd1, 0x98, 0x00, 0x9e, 0xd9, 0xc2, 0x00, 0xdb, 0x00, 0x9e, 0xe0,
+ 0x83, 0x00, 0x9e, 0xa1, 0x87, 0x00, 0x9e, 0xb1, 0x8b, 0x00, 0x9e, 0xc1,
+ 0x0e, 0xc2, 0xa1, 0x8b, 0x15, 0x42, 0xa1, 0xa3, 0x83, 0x00, 0x9f, 0x49,
+ 0x87, 0x00, 0x9f, 0x51, 0x8b, 0x00, 0x9f, 0x59, 0x91, 0x00, 0x9f, 0x61,
+ 0x97, 0x00, 0x9f, 0x69, 0x98, 0x00, 0x9f, 0x70, 0x83, 0x00, 0x9f, 0x79,
+ 0x87, 0x00, 0x9f, 0x81, 0x8b, 0x00, 0x9f, 0x89, 0x91, 0x00, 0x9f, 0x91,
+ 0x97, 0x00, 0x9f, 0x99, 0x98, 0x00, 0x9f, 0xa0, 0xc3, 0x0e, 0xa7, 0x00,
+ 0x04, 0x41, 0xd2, 0x49, 0x55, 0x00, 0x04, 0x48, 0xc3, 0x39, 0x6e, 0x08,
+ 0x88, 0xa1, 0xc2, 0x04, 0xc6, 0x08, 0x88, 0x98, 0xc3, 0x39, 0x6e, 0x08,
+ 0x88, 0x91, 0xc2, 0x04, 0xc6, 0x08, 0x88, 0x88, 0x8b, 0x08, 0x8a, 0x30,
+ 0x83, 0x08, 0x8a, 0x29, 0x97, 0x08, 0x89, 0x79, 0x8b, 0x08, 0x89, 0x68,
+ 0x8b, 0x08, 0x89, 0x80, 0x97, 0x08, 0x89, 0x58, 0x8b, 0x08, 0x89, 0x48,
+ 0xc4, 0x18, 0x10, 0x08, 0x89, 0xe9, 0xc2, 0x22, 0xcc, 0x08, 0x89, 0xe0,
+ 0xc3, 0x0d, 0x14, 0x08, 0x89, 0xd9, 0xc3, 0x09, 0x9e, 0x08, 0x89, 0xd0,
+ 0xc4, 0x02, 0xde, 0x08, 0x89, 0xc9, 0xc2, 0x02, 0xa0, 0x08, 0x89, 0xc0,
+ 0xc2, 0x0f, 0xe1, 0x05, 0x50, 0x51, 0x83, 0x05, 0x50, 0x58, 0xc2, 0x25,
+ 0x3b, 0x05, 0x50, 0x91, 0x83, 0x05, 0x50, 0x89, 0xc2, 0x0f, 0xe1, 0x05,
+ 0x50, 0x80, 0x89, 0x05, 0x52, 0x10, 0xc4, 0x18, 0x12, 0x08, 0x7e, 0x51,
+ 0x91, 0x08, 0x7e, 0x30, 0xd7, 0x27, 0x74, 0x0f, 0xaa, 0x08, 0xce, 0x74,
+ 0x24, 0x01, 0x72, 0x81, 0xcd, 0x79, 0x5b, 0x01, 0x72, 0x88, 0xc3, 0x02,
+ 0x44, 0x0f, 0x01, 0x51, 0xc4, 0xac, 0x24, 0x0f, 0x00, 0xb8, 0x47, 0x1c,
+ 0xa0, 0xc2, 0xa1, 0xbb, 0xcb, 0x98, 0x00, 0x0f, 0x00, 0x51, 0xc3, 0x78,
+ 0xde, 0x0f, 0x00, 0x48, 0xc6, 0xc8, 0x01, 0x0f, 0x01, 0x41, 0xc3, 0xc8,
+ 0x92, 0x0f, 0x00, 0x08, 0x91, 0x0f, 0x01, 0x31, 0x97, 0x0f, 0x01, 0x19,
+ 0xc3, 0x01, 0xbd, 0x0f, 0x01, 0x09, 0x07, 0x42, 0xa1, 0xc7, 0xc8, 0xae,
+ 0x6b, 0x0f, 0x01, 0x21, 0x0a, 0xc2, 0xa1, 0xd1, 0xc4, 0xe4, 0xc7, 0x0f,
+ 0x00, 0xa0, 0xc2, 0x00, 0xba, 0x0f, 0x00, 0xe1, 0xc5, 0xd9, 0x52, 0x0f,
+ 0x00, 0xa8, 0xc5, 0xdd, 0xb2, 0x0f, 0x00, 0x61, 0xc4, 0xe4, 0x2b, 0x0f,
+ 0x00, 0x20, 0xc5, 0xda, 0x24, 0x0f, 0x00, 0x41, 0xc6, 0xd3, 0x73, 0x0f,
+ 0x00, 0x30, 0x48, 0x23, 0x26, 0xc2, 0xa1, 0xdb, 0xcb, 0x94, 0x90, 0x00,
+ 0x1a, 0x11, 0xc7, 0xc8, 0xd9, 0x00, 0x1a, 0x19, 0xcf, 0x63, 0xd2, 0x00,
+ 0x1a, 0x21, 0xcd, 0x4a, 0x68, 0x00, 0x1a, 0x28, 0x45, 0xda, 0x51, 0xc2,
+ 0xa1, 0xe5, 0x42, 0x00, 0x5f, 0xc2, 0xa1, 0xf1, 0xcc, 0x85, 0x59, 0x00,
+ 0x1a, 0x78, 0xcc, 0x89, 0x25, 0x01, 0x06, 0xd1, 0xcb, 0x02, 0x5c, 0x01,
+ 0x06, 0xa0, 0xcb, 0x8e, 0xe4, 0x00, 0xee, 0x49, 0xc6, 0x60, 0xb1, 0x00,
+ 0xee, 0x38, 0xc6, 0x09, 0x01, 0x00, 0x18, 0x0b, 0x02, 0xa1, 0xf9, 0xc9,
+ 0x2b, 0x5f, 0x00, 0x1a, 0x08, 0x00, 0xc2, 0xa1, 0xff, 0x19, 0x42, 0xa2,
+ 0x17, 0xc7, 0x20, 0x88, 0x01, 0x06, 0xc1, 0xc5, 0x00, 0xd4, 0x00, 0x18,
+ 0x51, 0xc5, 0x05, 0x02, 0x00, 0x19, 0x28, 0xd0, 0x2c, 0x60, 0x01, 0x07,
+ 0x29, 0xcd, 0x52, 0x59, 0x00, 0x18, 0xa0, 0x03, 0xc2, 0xa2, 0x1d, 0x4c,
+ 0x02, 0x56, 0xc2, 0xa2, 0x29, 0x42, 0x00, 0xd0, 0xc2, 0xa2, 0x35, 0x4c,
+ 0x1a, 0x50, 0xc2, 0xa2, 0x41, 0xca, 0x9a, 0x3d, 0x00, 0x18, 0xc0, 0xdb,
+ 0x0b, 0x6c, 0x01, 0x07, 0x69, 0xcd, 0x7a, 0x38, 0x01, 0x07, 0x50, 0xd6,
+ 0x2c, 0x5a, 0x01, 0x07, 0x59, 0xd5, 0x36, 0x86, 0x01, 0x06, 0x91, 0x15,
+ 0x42, 0xa2, 0x4d, 0x97, 0x00, 0x1b, 0x3b, 0x02, 0xa2, 0x59, 0x91, 0x00,
+ 0x1b, 0x33, 0x02, 0xa2, 0x5f, 0x83, 0x00, 0x1b, 0x1b, 0x02, 0xa2, 0x65,
+ 0x99, 0x00, 0xef, 0x8b, 0x02, 0xa2, 0x7d, 0x87, 0x00, 0x1b, 0x23, 0x02,
+ 0xa2, 0x83, 0x92, 0x00, 0xef, 0x71, 0x8e, 0x00, 0xee, 0xeb, 0x02, 0xa2,
+ 0x8f, 0x88, 0x00, 0xef, 0x5b, 0x02, 0xa2, 0x9b, 0x95, 0x00, 0xef, 0x23,
+ 0x02, 0xa2, 0xa1, 0x84, 0x00, 0xef, 0x43, 0x02, 0xa2, 0xa7, 0x9c, 0x00,
+ 0xef, 0x31, 0x94, 0x00, 0x1b, 0x63, 0x02, 0xa2, 0xad, 0x90, 0x00, 0xef,
+ 0x01, 0x8d, 0x00, 0xee, 0xe1, 0x89, 0x00, 0xee, 0xd1, 0x8b, 0x00, 0x1b,
+ 0x2b, 0x02, 0xa2, 0xb1, 0x85, 0x00, 0x1b, 0x43, 0x02, 0xa2, 0xb7, 0x96,
+ 0x00, 0x1b, 0x6b, 0x02, 0xa2, 0xbd, 0x86, 0x00, 0x1b, 0x49, 0x8a, 0x00,
+ 0x1b, 0x51, 0x8f, 0x00, 0x1b, 0x59, 0x98, 0x00, 0x1b, 0x71, 0x9a, 0x00,
+ 0x1b, 0x78, 0x94, 0x00, 0xef, 0x11, 0x90, 0x00, 0xef, 0x09, 0x8f, 0x00,
+ 0xee, 0xf9, 0x8e, 0x00, 0xee, 0xf1, 0x89, 0x00, 0xee, 0xd8, 0xc9, 0x0f,
+ 0x6e, 0x07, 0xf1, 0x03, 0x02, 0xa2, 0xc3, 0xca, 0x09, 0xb7, 0x07, 0xf1,
+ 0x0a, 0x02, 0xa2, 0xc9, 0xc5, 0x05, 0x02, 0x00, 0x19, 0x81, 0xc7, 0x20,
+ 0x88, 0x00, 0x19, 0xa1, 0xcf, 0x66, 0x57, 0x07, 0xf1, 0x49, 0xd0, 0x5d,
+ 0x42, 0x07, 0xf1, 0x50, 0x00, 0xc2, 0xa2, 0xcf, 0xd3, 0x41, 0x5e, 0x00,
+ 0xd5, 0x80, 0x00, 0xc2, 0xa3, 0x1f, 0x44, 0x00, 0xde, 0x42, 0xa3, 0x31,
+ 0xcb, 0x03, 0xbc, 0x00, 0xd5, 0x99, 0xcb, 0x9a, 0x3c, 0x00, 0x18, 0xf0,
+ 0xcd, 0x7a, 0x79, 0x05, 0x47, 0x89, 0x47, 0x02, 0x0e, 0xc2, 0xa3, 0x3d,
+ 0x46, 0x09, 0x97, 0x42, 0xa3, 0x63, 0xc5, 0x50, 0xb1, 0x01, 0x07, 0x11,
+ 0xc5, 0x0b, 0x0a, 0x01, 0x06, 0xf0, 0xca, 0x02, 0xfd, 0x01, 0x07, 0x00,
+ 0xce, 0x74, 0xb0, 0x00, 0x24, 0x41, 0xcd, 0x33, 0xee, 0x05, 0x33, 0x88,
+ 0xc7, 0xc8, 0xee, 0x00, 0x24, 0x39, 0xcd, 0x7a, 0x04, 0x00, 0x24, 0x31,
+ 0x03, 0x42, 0xa3, 0x87, 0xc4, 0x90, 0x77, 0x00, 0x24, 0x1b, 0x02, 0xa3,
+ 0x93, 0xd0, 0x5c, 0xc2, 0x05, 0x33, 0x81, 0xd5, 0x33, 0xe6, 0x05, 0x33,
+ 0x90, 0x07, 0xc2, 0xa3, 0x97, 0x8b, 0x05, 0x33, 0xab, 0x02, 0xa3, 0xb2,
+ 0x97, 0x05, 0x33, 0xbb, 0x02, 0xa3, 0xbc, 0x1b, 0xc2, 0xa3, 0xc2, 0xc2,
+ 0x00, 0xd0, 0x01, 0x6f, 0x7b, 0x02, 0xa3, 0xd6, 0x15, 0xc2, 0xa3, 0xdc,
+ 0x91, 0x01, 0x6f, 0x53, 0x02, 0xa3, 0xe6, 0x04, 0xc2, 0xa3, 0xec, 0xc2,
+ 0x00, 0x5f, 0x01, 0x6f, 0x09, 0xc3, 0xc0, 0x19, 0x01, 0x6f, 0x11, 0x06,
+ 0xc2, 0xa3, 0xf6, 0x1c, 0xc2, 0xa4, 0x00, 0xc2, 0x02, 0x2b, 0x01, 0x6f,
+ 0x31, 0xc2, 0x00, 0x67, 0x01, 0x6f, 0x59, 0x16, 0xc2, 0xa4, 0x0a, 0xc3,
+ 0x28, 0x28, 0x01, 0x6f, 0x89, 0xc4, 0xe0, 0x1b, 0x01, 0x6f, 0xa1, 0x83,
+ 0x01, 0x6f, 0xb1, 0xcc, 0x82, 0x05, 0x01, 0x6f, 0xc9, 0xca, 0x51, 0x7f,
+ 0x01, 0x6f, 0xe8, 0xc6, 0x05, 0x01, 0x00, 0x19, 0x60, 0xc5, 0x00, 0xd4,
+ 0x00, 0x18, 0x9b, 0x02, 0xa4, 0x14, 0xc5, 0x05, 0x02, 0x00, 0x19, 0x30,
+ 0xc6, 0x05, 0x01, 0x07, 0xf1, 0x68, 0xcd, 0x42, 0x35, 0x00, 0x19, 0xa9,
+ 0xce, 0x2c, 0x62, 0x00, 0x19, 0xb8, 0xc7, 0xc1, 0x31, 0x00, 0xee, 0x59,
+ 0xc6, 0x05, 0x01, 0x00, 0x19, 0x70, 0xc5, 0x05, 0x02, 0x00, 0x19, 0x51,
+ 0xc5, 0x00, 0xd4, 0x00, 0x1a, 0x30, 0xc5, 0x00, 0xd4, 0x00, 0xef, 0xa9,
+ 0xc5, 0x05, 0x02, 0x00, 0x18, 0xe8, 0x4c, 0x83, 0x6d, 0xc2, 0xa4, 0x1a,
+ 0x42, 0x00, 0x38, 0x42, 0xa4, 0x26, 0xc5, 0x1d, 0x88, 0x00, 0xee, 0x61,
+ 0xc5, 0x1f, 0x0c, 0x00, 0xee, 0x31, 0xc5, 0x31, 0xee, 0x00, 0xee, 0x20,
+ 0xc5, 0x05, 0x02, 0x00, 0x19, 0x89, 0xc9, 0x0f, 0x6e, 0x07, 0xf1, 0x23,
+ 0x02, 0xa4, 0x35, 0xca, 0x09, 0xb7, 0x07, 0xf1, 0x2a, 0x02, 0xa4, 0x3b,
+ 0xc7, 0x20, 0x88, 0x00, 0xd5, 0xf1, 0xc5, 0x05, 0x02, 0x00, 0xd5, 0xe9,
+ 0xc5, 0x00, 0xd4, 0x00, 0xd5, 0xd8, 0xc4, 0x18, 0x10, 0x0e, 0x9b, 0x79,
+ 0xc2, 0x22, 0xcc, 0x0e, 0x9b, 0x70, 0xc3, 0x0d, 0x14, 0x0e, 0x9b, 0x69,
+ 0xc3, 0x09, 0x9e, 0x0e, 0x9b, 0x60, 0xc4, 0x02, 0xde, 0x0e, 0x9b, 0x59,
+ 0xc2, 0x02, 0xa0, 0x0e, 0x9b, 0x50, 0xc4, 0x18, 0x10, 0x0e, 0x9b, 0x31,
+ 0xc2, 0x22, 0xcc, 0x0e, 0x9b, 0x28, 0xc3, 0x0d, 0x14, 0x0e, 0x9b, 0x21,
+ 0xc3, 0x09, 0x9e, 0x0e, 0x9b, 0x18, 0xc4, 0x02, 0xde, 0x0e, 0x9b, 0x11,
+ 0xc2, 0x02, 0xa0, 0x0e, 0x9b, 0x08, 0xe0, 0x0a, 0x07, 0x01, 0x17, 0xd8,
+ 0xcc, 0x23, 0x9f, 0x01, 0x15, 0xa8, 0x0a, 0xc2, 0xa4, 0x41, 0xc3, 0x0b,
+ 0x65, 0x01, 0x64, 0xa9, 0xc2, 0x00, 0xba, 0x01, 0x64, 0xe8, 0xc3, 0x01,
+ 0x69, 0x00, 0x1f, 0x49, 0xc3, 0x00, 0xfe, 0x01, 0x64, 0x78, 0xc4, 0xd0,
+ 0x3f, 0x00, 0x1f, 0x59, 0xc3, 0x0a, 0x8c, 0x01, 0x64, 0x28, 0x0a, 0xc2,
+ 0xa4, 0x4b, 0xc2, 0x00, 0x59, 0x01, 0x64, 0x59, 0xc3, 0x07, 0x4a, 0x01,
+ 0x65, 0x29, 0xc4, 0x87, 0xf5, 0x01, 0x66, 0x08, 0xc2, 0x06, 0xdb, 0x00,
+ 0x1f, 0x79, 0xc4, 0xe2, 0x73, 0x01, 0x64, 0x39, 0x49, 0xa9, 0x00, 0x42,
+ 0xa4, 0x57, 0xc3, 0xe5, 0xe7, 0x01, 0x64, 0x09, 0xcc, 0x8c, 0x3d, 0x01,
+ 0x66, 0x48, 0xc5, 0xd6, 0xd7, 0x01, 0x64, 0x89, 0xc2, 0x20, 0xec, 0x01,
+ 0x65, 0x38, 0xc4, 0xe1, 0xcf, 0x01, 0x64, 0xb9, 0xca, 0xa7, 0x7e, 0x01,
+ 0x66, 0x88, 0xc2, 0x00, 0x59, 0x01, 0x65, 0x89, 0x43, 0x1d, 0xbb, 0x42,
+ 0xa4, 0x6f, 0x8b, 0x01, 0x65, 0x09, 0xc2, 0x00, 0xba, 0x01, 0x65, 0x78,
+ 0x8b, 0x01, 0x65, 0x59, 0xc2, 0x06, 0xdb, 0x00, 0x1f, 0x28, 0x4c, 0x1d,
+ 0xdd, 0xc2, 0xa4, 0x7b, 0xca, 0x9b, 0xa8, 0x01, 0x66, 0x18, 0xc2, 0x02,
+ 0xfa, 0x01, 0x67, 0x21, 0xc5, 0xd6, 0xe1, 0x01, 0x67, 0x48, 0xc6, 0xd1,
+ 0x21, 0x01, 0x67, 0x39, 0xc9, 0xa9, 0x75, 0x01, 0x67, 0x50, 0xc3, 0x01,
+ 0x69, 0x00, 0x1f, 0x41, 0xc3, 0x00, 0xfe, 0x01, 0x64, 0x70, 0xc4, 0xd0,
+ 0x3f, 0x00, 0x1f, 0x51, 0xc3, 0x0a, 0x8c, 0x01, 0x64, 0x20, 0x0a, 0xc2,
+ 0xa4, 0x93, 0xc2, 0x00, 0x59, 0x01, 0x64, 0x51, 0xc3, 0x07, 0x4a, 0x01,
+ 0x65, 0x21, 0xc4, 0x87, 0xf5, 0x01, 0x66, 0x00, 0xc2, 0x06, 0xdb, 0x00,
+ 0x1f, 0x71, 0xc4, 0xe2, 0x73, 0x01, 0x64, 0x31, 0x49, 0xa9, 0x00, 0x42,
+ 0xa4, 0x9f, 0xc3, 0xe5, 0xe7, 0x01, 0x64, 0x01, 0xcc, 0x8c, 0x3d, 0x01,
+ 0x66, 0x40, 0xc5, 0xd6, 0xd7, 0x01, 0x64, 0x81, 0xc2, 0x20, 0xec, 0x01,
+ 0x65, 0x30, 0xc3, 0x0b, 0x65, 0x01, 0x64, 0xa1, 0xc2, 0x00, 0xba, 0x01,
+ 0x64, 0xe1, 0x0a, 0x42, 0xa4, 0xb7, 0xc4, 0xe1, 0xcf, 0x01, 0x64, 0xb1,
+ 0xca, 0xa7, 0x7e, 0x01, 0x66, 0x80, 0xc2, 0x00, 0x59, 0x01, 0x65, 0x81,
+ 0x43, 0x1d, 0xbb, 0x42, 0xa4, 0xc1, 0x8b, 0x01, 0x65, 0x01, 0xc2, 0x00,
+ 0xba, 0x01, 0x65, 0x70, 0x8b, 0x01, 0x65, 0x51, 0xc2, 0x06, 0xdb, 0x00,
+ 0x1f, 0x20, 0x4c, 0x1d, 0xdd, 0xc2, 0xa4, 0xcd, 0xca, 0x9b, 0xa8, 0x01,
+ 0x66, 0x10, 0xc5, 0xd6, 0xc8, 0x01, 0x67, 0x81, 0xc5, 0x3b, 0x5e, 0x01,
+ 0x67, 0x88, 0xc2, 0x02, 0xa0, 0x08, 0x17, 0x11, 0xc4, 0x02, 0xde, 0x08,
+ 0x17, 0x18, 0xc3, 0x09, 0x9e, 0x08, 0x17, 0x21, 0xc3, 0x0d, 0x14, 0x08,
+ 0x17, 0x28, 0xc2, 0x22, 0xcc, 0x08, 0x17, 0x31, 0xc4, 0x18, 0x10, 0x08,
+ 0x17, 0x38, 0xc2, 0x00, 0xc4, 0x08, 0x17, 0x51, 0x19, 0xc2, 0xa4, 0xe5,
+ 0x0a, 0x42, 0xa4, 0xf1, 0x11, 0xc2, 0xa4, 0xfd, 0x0b, 0x42, 0xa5, 0x09,
+ 0x42, 0x22, 0xcc, 0xc2, 0xa5, 0x15, 0x44, 0x18, 0x10, 0x42, 0xa5, 0x21,
+ 0x9b, 0x08, 0x17, 0x89, 0xc8, 0x0d, 0x03, 0x08, 0x17, 0xd0, 0xc2, 0x0d,
+ 0x10, 0x08, 0x17, 0x91, 0xc8, 0x0d, 0x03, 0x08, 0x17, 0xd8, 0xd2, 0x4a,
+ 0x09, 0x01, 0x52, 0x80, 0xcc, 0x23, 0x9f, 0x01, 0x56, 0x88, 0xcc, 0x23,
+ 0x9f, 0x01, 0x56, 0x90, 0xe0, 0x05, 0x67, 0x0f, 0xa8, 0x0a, 0x02, 0xa5,
+ 0x2d, 0x44, 0x22, 0x44, 0xc2, 0xa5, 0x33, 0x11, 0x42, 0xa5, 0x3f, 0xc7,
+ 0xc1, 0xd9, 0x0f, 0xab, 0x29, 0xc7, 0xc7, 0x6d, 0x0f, 0xaa, 0xc8, 0xc7,
+ 0xc1, 0xd9, 0x0f, 0xaa, 0xf1, 0xc7, 0xc7, 0x6d, 0x0f, 0xaa, 0x90, 0xc7,
+ 0xc1, 0xd9, 0x0f, 0xab, 0x31, 0xc7, 0xc7, 0x6d, 0x0f, 0xaa, 0xd0, 0xc7,
+ 0xc1, 0xd9, 0x0f, 0xab, 0x19, 0xc7, 0xc7, 0x6d, 0x0f, 0xaa, 0xb8, 0xc7,
+ 0xc1, 0xd9, 0x0f, 0xab, 0x11, 0xc7, 0xc7, 0x6d, 0x0f, 0xaa, 0xb0, 0xc7,
+ 0xc1, 0xd9, 0x0f, 0xab, 0x09, 0xc7, 0xc7, 0x6d, 0x0f, 0xaa, 0xa8, 0xc7,
+ 0xc1, 0xd9, 0x0f, 0xab, 0x01, 0xc7, 0xc7, 0x6d, 0x0f, 0xaa, 0xa0, 0xc7,
+ 0xc1, 0xd9, 0x0f, 0xaa, 0xf9, 0xc7, 0xc7, 0x6d, 0x0f, 0xaa, 0x98, 0x00,
+ 0xc2, 0xa5, 0x4b, 0xc9, 0xae, 0xe8, 0x01, 0x36, 0x90, 0x0d, 0xc2, 0xa5,
+ 0x5a, 0xc5, 0xd9, 0x61, 0x01, 0x93, 0x0b, 0x02, 0xa5, 0x6c, 0x16, 0xc2,
+ 0xa5, 0x72, 0xc5, 0xd6, 0x8c, 0x01, 0x93, 0x1b, 0x02, 0xa5, 0x84, 0xc5,
+ 0xda, 0xe7, 0x01, 0x93, 0x23, 0x02, 0xa5, 0x8a, 0x12, 0xc2, 0xa5, 0x90,
+ 0xc4, 0xad, 0x2b, 0x01, 0x93, 0x33, 0x02, 0xa5, 0xa2, 0xc5, 0xb7, 0x9d,
+ 0x01, 0x93, 0x3b, 0x02, 0xa5, 0xa8, 0x05, 0xc2, 0xa5, 0xac, 0xc5, 0x90,
+ 0xe4, 0x01, 0x93, 0x6a, 0x02, 0xa5, 0xbe, 0xc4, 0x0e, 0x6a, 0x01, 0x39,
+ 0x51, 0xc6, 0x1c, 0xb4, 0x01, 0x4d, 0xf0, 0x44, 0x09, 0x99, 0xc2, 0xa5,
+ 0xc4, 0x48, 0x30, 0xf3, 0x42, 0xa5, 0xe8, 0xca, 0x30, 0xb2, 0x01, 0x14,
+ 0xc9, 0x0e, 0x42, 0xa5, 0xf4, 0x4d, 0x29, 0xb9, 0xc2, 0xa5, 0xfa, 0x4f,
+ 0x0b, 0x17, 0x42, 0xa6, 0x62, 0x42, 0x00, 0x28, 0xc2, 0xa6, 0xca, 0x44,
+ 0x0d, 0x0d, 0xc2, 0xa6, 0xd9, 0xc2, 0x00, 0xc4, 0x01, 0x23, 0x4a, 0x02,
+ 0xa6, 0xe6, 0x44, 0x00, 0x2d, 0xc2, 0xa6, 0xec, 0xc5, 0x66, 0xb1, 0x01,
+ 0x23, 0x50, 0x45, 0x18, 0x10, 0xc2, 0xa6, 0xf8, 0x43, 0x22, 0xcc, 0x42,
+ 0xa7, 0x04, 0x43, 0x14, 0x07, 0xc2, 0xa7, 0x10, 0x11, 0x42, 0xa7, 0x1d,
+ 0xc5, 0x03, 0xc7, 0x01, 0x1c, 0x50, 0xd6, 0x30, 0xe8, 0x01, 0x4d, 0xe1,
+ 0xc6, 0x01, 0xa1, 0x0f, 0x88, 0x70, 0xe0, 0x08, 0xe7, 0x01, 0x51, 0xb0,
+ 0x03, 0xc2, 0xa7, 0x2c, 0xc8, 0x2c, 0xb2, 0x01, 0x92, 0x21, 0x0d, 0xc2,
+ 0xa7, 0x44, 0x15, 0xc2, 0xa7, 0x50, 0xc3, 0x05, 0x14, 0x01, 0x94, 0x01,
+ 0x16, 0xc2, 0xa7, 0x74, 0x08, 0xc2, 0xa7, 0x86, 0x07, 0xc2, 0xa7, 0x96,
+ 0x10, 0xc2, 0xa7, 0xae, 0x0f, 0xc2, 0xa7, 0xb8, 0x19, 0xc2, 0xa7, 0xc8,
+ 0x0a, 0xc2, 0xa7, 0xd4, 0x05, 0xc2, 0xa7, 0xe0, 0x0e, 0xc2, 0xa7, 0xea,
+ 0xc5, 0xb9, 0xbc, 0x01, 0x94, 0xf1, 0xc4, 0xaa, 0xbb, 0x01, 0x95, 0x01,
+ 0x14, 0x42, 0xa7, 0xfc, 0x85, 0x0f, 0x89, 0x59, 0x94, 0x0f, 0x89, 0x60,
+ 0xc6, 0xcd, 0xf7, 0x01, 0x93, 0xe1, 0xc5, 0xde, 0x34, 0x01, 0x93, 0xe8,
+ 0x83, 0x01, 0x96, 0x81, 0x8b, 0x01, 0x96, 0x89, 0x97, 0x01, 0x96, 0x91,
+ 0x87, 0x01, 0x96, 0x99, 0x91, 0x01, 0x96, 0xa1, 0x0d, 0xc2, 0xa8, 0x06,
+ 0x15, 0xc2, 0xa8, 0x1a, 0x16, 0xc2, 0xa8, 0x2e, 0x10, 0xc2, 0xa8, 0x42,
+ 0x0a, 0xc2, 0xa8, 0x56, 0x0f, 0xc2, 0xa8, 0x6a, 0x1b, 0xc2, 0xa8, 0x7e,
+ 0x14, 0xc2, 0xa8, 0x8a, 0x19, 0x42, 0xa8, 0x9e, 0xe0, 0x02, 0xa7, 0x01,
+ 0x2e, 0xa8, 0xd4, 0x3d, 0x7c, 0x01, 0x2e, 0xa1, 0xca, 0x1e, 0x5f, 0x01,
+ 0x2e, 0x98, 0xcf, 0x63, 0x2d, 0x01, 0x2e, 0x91, 0xce, 0x66, 0x67, 0x01,
+ 0x2e, 0x80, 0xe0, 0x01, 0x67, 0x01, 0x4e, 0x18, 0xd8, 0x24, 0x83, 0x01,
+ 0x4e, 0x11, 0xcd, 0x76, 0x90, 0x01, 0x4d, 0xd8, 0x47, 0x03, 0x4c, 0x42,
+ 0xa8, 0xae, 0xd1, 0x51, 0xcd, 0x09, 0x1a, 0xf9, 0xc4, 0x58, 0xf5, 0x09,
+ 0x1a, 0xf0, 0xca, 0xa1, 0x20, 0x09, 0x1b, 0x38, 0x47, 0x03, 0x4c, 0xc2,
+ 0xa8, 0xb8, 0xc2, 0x0e, 0x9a, 0x09, 0x1a, 0x7a, 0x02, 0xa8, 0xfb, 0x00,
+ 0x42, 0xa9, 0x01, 0xa0, 0x09, 0x19, 0xb0, 0xc7, 0x6c, 0xd0, 0x09, 0x19,
+ 0x51, 0xcb, 0x94, 0x0c, 0x09, 0x19, 0x48, 0xc2, 0x02, 0xad, 0x09, 0x18,
+ 0x68, 0xda, 0x1b, 0x68, 0x09, 0x18, 0x81, 0xcc, 0x8b, 0x59, 0x09, 0x18,
+ 0x79, 0xd7, 0x29, 0x9c, 0x09, 0x18, 0x70, 0xc2, 0x00, 0x4e, 0x09, 0x1c,
+ 0xc3, 0x02, 0xa9, 0x0d, 0x97, 0x09, 0x19, 0x09, 0xc4, 0x55, 0x25, 0x09,
+ 0x19, 0x01, 0xc5, 0x03, 0x47, 0x09, 0x18, 0xf0, 0x47, 0x03, 0x4c, 0x42,
+ 0xa9, 0x13, 0xcd, 0x80, 0x84, 0x09, 0x1a, 0xd8, 0xc4, 0x38, 0xb4, 0x09,
+ 0x1a, 0xa9, 0xc2, 0x05, 0x52, 0x09, 0x1a, 0x9b, 0x02, 0xa9, 0x1f, 0x83,
+ 0x09, 0x1a, 0x90, 0xc7, 0x6c, 0xd0, 0x09, 0x18, 0xd3, 0x02, 0xa9, 0x23,
+ 0xc4, 0x39, 0xc8, 0x09, 0x18, 0xc9, 0x46, 0x03, 0x4d, 0xc2, 0xa9, 0x29,
+ 0xc6, 0xd0, 0x97, 0x09, 0x18, 0xa0, 0x47, 0x03, 0x4c, 0x42, 0xa9, 0x3e,
+ 0xd4, 0x39, 0x58, 0x09, 0x18, 0x50, 0xc9, 0xac, 0x18, 0x09, 0x29, 0xc8,
+ 0x47, 0x03, 0x4c, 0x42, 0xa9, 0x4a, 0x00, 0x42, 0xa9, 0x68, 0xc4, 0x39,
+ 0xc8, 0x09, 0x17, 0x79, 0x46, 0x03, 0x4d, 0xc2, 0xa9, 0x74, 0xc8, 0x0a,
+ 0xff, 0x09, 0x17, 0x60, 0x00, 0x42, 0xa9, 0x80, 0xca, 0x38, 0xae, 0x09,
+ 0x29, 0xc1, 0xc4, 0x39, 0xc8, 0x09, 0x16, 0xe0, 0xa1, 0x09, 0x16, 0xf2,
+ 0x02, 0xa9, 0x8f, 0x9f, 0x09, 0x16, 0xcb, 0x02, 0xa9, 0x95, 0xc3, 0x2b,
+ 0x88, 0x09, 0x16, 0xd1, 0xd2, 0x47, 0x27, 0x09, 0x16, 0xc0, 0x00, 0xc2,
+ 0xa9, 0x9b, 0xc2, 0x01, 0xe2, 0x09, 0x16, 0x03, 0x02, 0xa9, 0xb0, 0x90,
+ 0x09, 0x15, 0xf9, 0xc2, 0xe6, 0xab, 0x09, 0x15, 0xf0, 0xa3, 0x09, 0x15,
+ 0xbb, 0x02, 0xa9, 0xba, 0xc2, 0x38, 0x6a, 0x09, 0x15, 0xc9, 0xc2, 0xe5,
+ 0x8e, 0x09, 0x15, 0xc1, 0xa0, 0x09, 0x15, 0x72, 0x02, 0xa9, 0xc0, 0xc2,
+ 0x01, 0x6f, 0x09, 0x16, 0xb1, 0x94, 0x09, 0x16, 0x9b, 0x02, 0xa9, 0xc6,
+ 0xc3, 0x56, 0xa5, 0x09, 0x16, 0x91, 0x8f, 0x09, 0x16, 0x33, 0x02, 0xa9,
+ 0xca, 0x86, 0x09, 0x16, 0x1a, 0x02, 0xa9, 0xd0, 0x00, 0x42, 0xa9, 0xd6,
+ 0xd1, 0x56, 0xb7, 0x09, 0x15, 0x50, 0xa6, 0x09, 0x17, 0x50, 0xc3, 0x02,
+ 0x2c, 0x09, 0x17, 0x40, 0x9f, 0x09, 0x17, 0x28, 0xc3, 0xe4, 0xe2, 0x09,
+ 0x12, 0x93, 0x02, 0xa9, 0xf1, 0xa6, 0x09, 0x1c, 0x80, 0x49, 0x38, 0x6c,
+ 0x42, 0xa9, 0xf7, 0x00, 0x42, 0xaa, 0x03, 0xc2, 0x4d, 0x4c, 0x09, 0x13,
+ 0x6b, 0x02, 0xaa, 0x15, 0x00, 0x42, 0xaa, 0x19, 0x9f, 0x09, 0x12, 0x39,
+ 0xc8, 0xb7, 0xa2, 0x09, 0x12, 0x28, 0x94, 0x09, 0x12, 0x21, 0x00, 0x42,
+ 0xaa, 0x34, 0xc7, 0x6c, 0xd0, 0x09, 0x12, 0x59, 0x46, 0x03, 0x4d, 0x42,
+ 0xaa, 0x46, 0x00, 0xc2, 0xaa, 0x50, 0xa0, 0x09, 0x11, 0xca, 0x02, 0xaa,
+ 0x65, 0xc5, 0x39, 0xc7, 0x09, 0x11, 0x78, 0x8a, 0x09, 0x1c, 0x60, 0x9f,
+ 0x09, 0x11, 0x38, 0xc4, 0x39, 0xc8, 0x09, 0x11, 0x11, 0xca, 0x38, 0xae,
+ 0x09, 0x11, 0x08, 0x00, 0x42, 0xaa, 0x69, 0xc9, 0xac, 0xa8, 0x09, 0x10,
+ 0xf2, 0x02, 0xaa, 0x83, 0x00, 0x42, 0xaa, 0x89, 0x24, 0xc2, 0xaa, 0x93,
+ 0x23, 0xc2, 0xaa, 0x9f, 0xc3, 0xe5, 0x7e, 0x09, 0x27, 0xf9, 0x21, 0xc2,
+ 0xaa, 0xbd, 0x20, 0xc2, 0xaa, 0xd5, 0x1f, 0xc2, 0xaa, 0xe3, 0x1e, 0xc2,
+ 0xaa, 0xf5, 0x1d, 0x42, 0xab, 0x01, 0x84, 0x09, 0x0d, 0xc3, 0x02, 0xab,
+ 0x2b, 0x94, 0x09, 0x0f, 0x62, 0x02, 0xab, 0x2f, 0xca, 0x51, 0xd4, 0x09,
+ 0x0f, 0xaa, 0x02, 0xab, 0x33, 0xca, 0x8c, 0xf6, 0x09, 0x0f, 0x98, 0x97,
+ 0x09, 0x0c, 0x3b, 0x02, 0xab, 0x39, 0x0d, 0xc2, 0xab, 0x5a, 0x04, 0xc2,
+ 0xab, 0x68, 0x16, 0xc2, 0xab, 0x74, 0x15, 0xc2, 0xab, 0x7e, 0x12, 0xc2,
+ 0xab, 0x95, 0x0e, 0xc2, 0xab, 0x9d, 0xcd, 0x05, 0x5a, 0x09, 0x1c, 0x11,
+ 0x09, 0xc2, 0xab, 0xa8, 0x83, 0x09, 0x0a, 0xc3, 0x02, 0xab, 0xbd, 0xc2,
+ 0x2e, 0x48, 0x09, 0x0c, 0x61, 0xc2, 0x17, 0x99, 0x09, 0x0b, 0xe9, 0x10,
+ 0xc2, 0xab, 0xd0, 0x0f, 0xc2, 0xab, 0xda, 0x0b, 0xc2, 0xab, 0xe8, 0x07,
+ 0x42, 0xab, 0xf2, 0x00, 0x42, 0xab, 0xfe, 0xa1, 0x09, 0x0c, 0xd9, 0x9f,
+ 0x09, 0x0c, 0xd0, 0x00, 0x42, 0xac, 0x0a, 0xcf, 0x6a, 0x17, 0x09, 0x0c,
+ 0xb0, 0xa2, 0x09, 0x0c, 0x9b, 0x02, 0xac, 0x16, 0xa1, 0x09, 0x0c, 0x91,
+ 0xa0, 0x09, 0x0c, 0x89, 0x9f, 0x09, 0x0c, 0x80, 0xcd, 0x7b, 0x8a, 0x09,
+ 0x0c, 0x70, 0xcd, 0x7a, 0x5f, 0x09, 0x0d, 0xa0, 0xc5, 0x39, 0xc7, 0x09,
+ 0x0d, 0x88, 0xcd, 0x77, 0xe2, 0x09, 0x0d, 0x70, 0xe0, 0x05, 0x47, 0x09,
+ 0x0d, 0x58, 0xc3, 0x68, 0xd0, 0x09, 0x0d, 0x43, 0x02, 0xac, 0x1c, 0x8a,
+ 0x09, 0x0d, 0x39, 0xc2, 0x00, 0x65, 0x09, 0x0d, 0x30, 0x97, 0x09, 0x0d,
+ 0x13, 0x02, 0xac, 0x22, 0xc3, 0x62, 0x19, 0x09, 0x0d, 0x08, 0xc3, 0x02,
+ 0x2c, 0x09, 0x09, 0x73, 0x02, 0xac, 0x26, 0x97, 0x09, 0x09, 0xb1, 0xc3,
+ 0x04, 0x65, 0x09, 0x09, 0xa9, 0xc3, 0x20, 0x18, 0x09, 0x09, 0xa1, 0xc3,
+ 0x56, 0x1d, 0x09, 0x09, 0x99, 0xc3, 0x1a, 0xe7, 0x09, 0x09, 0x91, 0xc4,
+ 0x04, 0x59, 0x09, 0x09, 0x89, 0xc3, 0x62, 0x19, 0x09, 0x09, 0x80, 0xc4,
+ 0x58, 0xf5, 0x09, 0x09, 0x53, 0x02, 0xac, 0x30, 0xc4, 0x39, 0xc8, 0x09,
+ 0x09, 0x58, 0x47, 0x03, 0x4c, 0x42, 0xac, 0x36, 0x00, 0x42, 0xac, 0x54,
+ 0x00, 0x42, 0xac, 0x66, 0x17, 0xc2, 0xac, 0x72, 0xa4, 0x09, 0x09, 0x30,
+ 0xca, 0xa6, 0x48, 0x09, 0x09, 0x20, 0x8a, 0x09, 0x08, 0x8b, 0x02, 0xac,
+ 0x7c, 0xc2, 0x00, 0x65, 0x09, 0x08, 0x80, 0xa0, 0x09, 0x08, 0x53, 0x02,
+ 0xac, 0x80, 0x9f, 0x09, 0x08, 0x42, 0x02, 0xac, 0x86, 0x00, 0x42, 0xac,
+ 0x8c, 0xcb, 0x47, 0xaa, 0x09, 0x08, 0x19, 0x46, 0x03, 0x4d, 0x42, 0xac,
+ 0x98, 0x47, 0x03, 0x4c, 0x42, 0xac, 0xa0, 0x00, 0x42, 0xac, 0xaa, 0x00,
+ 0x42, 0xac, 0xb6, 0xa0, 0x09, 0x07, 0xe0, 0x9f, 0x09, 0x07, 0xba, 0x02,
+ 0xac, 0xc2, 0xc2, 0x00, 0xc2, 0x09, 0x07, 0xa1, 0xda, 0x1a, 0xe6, 0x09,
+ 0x07, 0x98, 0xd6, 0x1a, 0xea, 0x09, 0x07, 0x88, 0x46, 0x03, 0x4d, 0xc2,
+ 0xac, 0xc6, 0x4e, 0x6c, 0xd0, 0x42, 0xad, 0x01, 0xc2, 0x5c, 0x27, 0x09,
+ 0x25, 0x58, 0xc3, 0x0b, 0x64, 0x09, 0x25, 0x51, 0xc3, 0x51, 0xdb, 0x09,
+ 0x25, 0x49, 0x97, 0x09, 0x04, 0x99, 0x15, 0xc2, 0xad, 0x2b, 0xc2, 0x02,
+ 0x2f, 0x09, 0x04, 0x81, 0xc3, 0x1a, 0xf4, 0x09, 0x04, 0x79, 0xd1, 0x4e,
+ 0xe1, 0x09, 0x04, 0x70, 0xc7, 0x0b, 0x09, 0x09, 0x04, 0xe9, 0xcb, 0x96,
+ 0xed, 0x09, 0x04, 0xe1, 0xcb, 0x94, 0x38, 0x09, 0x04, 0xd9, 0x46, 0x03,
+ 0x4d, 0x42, 0xad, 0x37, 0x47, 0x03, 0x4c, 0xc2, 0xad, 0x46, 0xc2, 0x04,
+ 0x3d, 0x09, 0x04, 0x10, 0x47, 0x03, 0x4c, 0xc2, 0xad, 0x7e, 0x9f, 0x09,
+ 0x04, 0x00, 0xa1, 0x09, 0x04, 0x41, 0xa0, 0x09, 0x04, 0x2a, 0x02, 0xad,
+ 0x8a, 0xc7, 0x6c, 0xd0, 0x09, 0x03, 0xe9, 0xc4, 0x39, 0xc8, 0x09, 0x03,
+ 0xe1, 0xc7, 0xc6, 0x47, 0x09, 0x03, 0xd8, 0x9f, 0x09, 0x03, 0xb3, 0x02,
+ 0xad, 0x93, 0x47, 0x03, 0x4c, 0x42, 0xad, 0x99, 0xc9, 0xa3, 0x1e, 0x09,
+ 0x1b, 0xa8, 0xd3, 0x45, 0xac, 0x09, 0x03, 0xc0, 0x00, 0xc2, 0xad, 0xab,
+ 0xa0, 0x09, 0x1b, 0xa0, 0x03, 0x42, 0xad, 0xb7, 0x48, 0xb6, 0x2a, 0xc2,
+ 0xad, 0xbf, 0xcb, 0x94, 0x2d, 0x09, 0x02, 0x80, 0x9f, 0x09, 0x02, 0xa0,
+ 0xcb, 0x96, 0x95, 0x09, 0x02, 0x90, 0x47, 0x03, 0x4c, 0x42, 0xad, 0xd1,
+ 0xd0, 0x5d, 0xc2, 0x09, 0x24, 0x18, 0xc2, 0x7b, 0x95, 0x09, 0x02, 0x40,
+ 0xc2, 0x00, 0xb3, 0x09, 0x02, 0x31, 0xc9, 0xac, 0xba, 0x09, 0x02, 0x28,
+ 0xc8, 0x6a, 0x1e, 0x09, 0x02, 0x61, 0xc3, 0x1a, 0xf4, 0x09, 0x02, 0x59,
+ 0x83, 0x09, 0x02, 0x50, 0x46, 0x03, 0x4d, 0xc2, 0xad, 0xe3, 0xc4, 0x39,
+ 0xc8, 0x09, 0x00, 0xa8, 0x47, 0x03, 0x4c, 0x42, 0xae, 0x1a, 0xc3, 0xd1,
+ 0x2b, 0x09, 0x1b, 0x91, 0xc3, 0x04, 0x65, 0x09, 0x01, 0x60, 0xc3, 0x03,
+ 0x49, 0x09, 0x01, 0xf9, 0x9f, 0x09, 0x01, 0xf1, 0x00, 0x42, 0xae, 0x3c,
+ 0xca, 0x51, 0xd4, 0x09, 0x01, 0xa8, 0x4a, 0x9e, 0x64, 0xc2, 0xae, 0x4e,
+ 0xcb, 0x8f, 0x05, 0x09, 0x01, 0x79, 0xc7, 0xc6, 0x0f, 0x09, 0x01, 0x70,
+ 0xc3, 0x5d, 0xd1, 0x09, 0x01, 0x41, 0xc3, 0x04, 0x65, 0x09, 0x01, 0x39,
+ 0x0d, 0xc2, 0xae, 0x5a, 0xc2, 0x00, 0xd0, 0x09, 0x01, 0x21, 0xc4, 0x38,
+ 0xa9, 0x09, 0x01, 0x19, 0xc4, 0xe2, 0xab, 0x09, 0x01, 0x11, 0xc2, 0x00,
+ 0x65, 0x09, 0x01, 0x08, 0xcf, 0x68, 0x73, 0x09, 0x00, 0xf9, 0xc5, 0x9e,
+ 0x4b, 0x09, 0x00, 0xf0, 0x9f, 0x09, 0x1c, 0xa9, 0xc2, 0x00, 0x2d, 0x09,
+ 0x14, 0x52, 0x02, 0xae, 0x64, 0xcb, 0x94, 0x4e, 0x09, 0x14, 0x49, 0x46,
+ 0x03, 0x4d, 0x42, 0xae, 0x68, 0xc7, 0x0b, 0x09, 0x09, 0x0a, 0x91, 0xcb,
+ 0x96, 0xf8, 0x09, 0x0a, 0x89, 0xcb, 0x94, 0x43, 0x09, 0x0a, 0x81, 0xca,
+ 0x38, 0xae, 0x09, 0x0a, 0x78, 0x00, 0x42, 0xae, 0x85, 0xc7, 0x0b, 0x09,
+ 0x09, 0x0a, 0x21, 0xc3, 0x2b, 0x88, 0x09, 0x0a, 0x18, 0xcd, 0x77, 0xe2,
+ 0x09, 0x23, 0x70, 0xc2, 0x00, 0xd3, 0x09, 0x22, 0x49, 0xa1, 0x09, 0x22,
+ 0x41, 0xa0, 0x09, 0x22, 0x38, 0xcd, 0x77, 0xe2, 0x09, 0x23, 0x68, 0xa0,
+ 0x09, 0x22, 0x28, 0xc4, 0x45, 0x6a, 0x09, 0x23, 0x41, 0xc4, 0x4a, 0x2e,
+ 0x09, 0x23, 0x38, 0xcd, 0x77, 0xe2, 0x09, 0x23, 0x60, 0x00, 0xc2, 0xae,
+ 0x9d, 0xa0, 0x09, 0x22, 0x08, 0xcd, 0x77, 0xe2, 0x09, 0x23, 0x58, 0xc5,
+ 0x58, 0xf4, 0x09, 0x22, 0x70, 0xcd, 0x77, 0xe2, 0x09, 0x23, 0x50, 0xca,
+ 0x9d, 0x74, 0x09, 0x22, 0xe1, 0x43, 0x01, 0x50, 0x42, 0xae, 0xa5, 0xc3,
+ 0x5d, 0x9a, 0x09, 0x22, 0xa3, 0x02, 0xae, 0xad, 0xc3, 0x9f, 0x30, 0x09,
+ 0x21, 0xc8, 0xc5, 0x58, 0xf4, 0x09, 0x22, 0x68, 0x97, 0x09, 0x21, 0x11,
+ 0x9f, 0x09, 0x20, 0xc8, 0xcd, 0x77, 0xe2, 0x09, 0x23, 0x48, 0xc3, 0x5d,
+ 0x9a, 0x09, 0x22, 0x93, 0x02, 0xae, 0xb3, 0xc3, 0x9f, 0x30, 0x09, 0x21,
+ 0xc0, 0xc5, 0x58, 0xf4, 0x09, 0x22, 0x60, 0x00, 0xc2, 0xae, 0xb9, 0xa1,
+ 0x09, 0x21, 0xe8, 0x97, 0x09, 0x21, 0x81, 0x9f, 0x09, 0x21, 0x30, 0x97,
+ 0x09, 0x21, 0x09, 0x9f, 0x09, 0x20, 0xc0, 0xc3, 0x8f, 0x7a, 0x09, 0x23,
+ 0x19, 0xc3, 0x02, 0x2c, 0x09, 0x23, 0x00, 0xc9, 0xad, 0xf5, 0x09, 0x22,
+ 0xf9, 0xc4, 0xdd, 0x63, 0x09, 0x22, 0xc0, 0xce, 0x54, 0x64, 0x09, 0x22,
+ 0xe9, 0xc4, 0x04, 0x59, 0x09, 0x22, 0xd0, 0xc3, 0x5d, 0x9a, 0x09, 0x22,
+ 0x79, 0xc3, 0x9f, 0x30, 0x09, 0x21, 0xa0, 0x97, 0x09, 0x20, 0xf1, 0x9f,
+ 0x09, 0x20, 0xa8, 0xce, 0x54, 0x64, 0x09, 0x22, 0xf1, 0xc4, 0x04, 0x59,
+ 0x09, 0x22, 0xd8, 0xc3, 0x5d, 0x9a, 0x09, 0x22, 0x81, 0xc3, 0x9f, 0x30,
+ 0x09, 0x21, 0xa8, 0xc5, 0x58, 0xf4, 0x09, 0x22, 0x50, 0x97, 0x09, 0x21,
+ 0x69, 0x9f, 0x09, 0x21, 0x18, 0x97, 0x09, 0x20, 0xf9, 0x9f, 0x09, 0x20,
+ 0xb0, 0xc3, 0x5d, 0x9a, 0x09, 0x22, 0x89, 0xc3, 0x9f, 0x30, 0x09, 0x21,
+ 0xb2, 0x02, 0xae, 0xc1, 0xc5, 0x58, 0xf4, 0x09, 0x22, 0x58, 0xc2, 0xe5,
+ 0xf7, 0x09, 0x21, 0xd9, 0xc2, 0xe6, 0x89, 0x09, 0x21, 0xd0, 0x97, 0x09,
+ 0x21, 0x73, 0x02, 0xae, 0xc7, 0x9f, 0x09, 0x21, 0x22, 0x02, 0xae, 0xcd,
+ 0x97, 0x09, 0x21, 0x01, 0x9f, 0x09, 0x20, 0xb8, 0xc3, 0x02, 0x9b, 0x01,
+ 0x16, 0x79, 0xc2, 0x00, 0xbf, 0x01, 0x16, 0x70, 0x84, 0x09, 0x7e, 0x70,
+ 0x84, 0x09, 0x7c, 0xd8, 0x06, 0xc2, 0xae, 0xd3, 0xc6, 0x60, 0xb1, 0x00,
+ 0x27, 0x78, 0xca, 0x91, 0xbb, 0x00, 0x22, 0xa0, 0xc3, 0x2d, 0x1a, 0x00,
+ 0xe4, 0x39, 0xc9, 0xa8, 0x3a, 0x00, 0xe4, 0x31, 0xc2, 0x00, 0xac, 0x00,
+ 0xe4, 0x20, 0x46, 0x00, 0x8b, 0x42, 0xae, 0xdf, 0x87, 0x00, 0x22, 0x31,
+ 0xc2, 0x01, 0x7f, 0x00, 0x22, 0xd9, 0xc2, 0x00, 0x28, 0x05, 0x34, 0x79,
+ 0xc2, 0x00, 0x40, 0x05, 0x34, 0x88, 0xc5, 0x13, 0xb4, 0x00, 0xe4, 0x01,
+ 0xc6, 0x9b, 0xd4, 0x00, 0x23, 0xd8, 0xc2, 0x0a, 0xe2, 0x00, 0x28, 0x89,
+ 0xc3, 0xe5, 0x2a, 0x05, 0x32, 0x29, 0xc2, 0x13, 0xc0, 0x05, 0x32, 0xa9,
+ 0xc3, 0x3b, 0x0f, 0x05, 0x33, 0x08, 0x46, 0x00, 0x8b, 0x42, 0xae, 0xeb,
+ 0x46, 0x00, 0x8b, 0x42, 0xaf, 0x03, 0xca, 0xa5, 0x12, 0x00, 0x26, 0x70,
+ 0xcf, 0x69, 0x54, 0x00, 0x25, 0x58, 0xca, 0xa5, 0xb2, 0x00, 0x24, 0x78,
+ 0x1c, 0xc2, 0xaf, 0x21, 0x87, 0x00, 0x22, 0xab, 0x02, 0xaf, 0x2b, 0xc2,
+ 0x01, 0x7f, 0x00, 0x22, 0xf9, 0xc2, 0x00, 0x28, 0x05, 0x34, 0x18, 0x91,
+ 0x05, 0x34, 0xc9, 0xcb, 0x98, 0xa5, 0x05, 0x33, 0x68, 0xc2, 0x04, 0xab,
+ 0x05, 0x32, 0x48, 0xc2, 0x00, 0xd0, 0x00, 0x25, 0xdb, 0x02, 0xaf, 0x31,
+ 0x44, 0x2e, 0xf0, 0xc2, 0xaf, 0x37, 0xc2, 0x00, 0x28, 0x05, 0x34, 0xb9,
+ 0x83, 0x00, 0x22, 0x41, 0xc3, 0x1c, 0x63, 0x00, 0x22, 0x48, 0xcf, 0x6b,
+ 0x16, 0x00, 0x26, 0xd8, 0xcc, 0x23, 0x3f, 0x00, 0x25, 0x88, 0xc2, 0x00,
+ 0x06, 0x05, 0x33, 0x19, 0x07, 0xc2, 0xaf, 0x42, 0xc4, 0x00, 0xba, 0x00,
+ 0x22, 0x60, 0x46, 0x00, 0x8b, 0x42, 0xaf, 0x4a, 0xc3, 0xe5, 0x2a, 0x00,
+ 0x27, 0x09, 0xc3, 0x28, 0x28, 0x00, 0x25, 0xeb, 0x02, 0xaf, 0x56, 0xc2,
+ 0x00, 0xd0, 0x00, 0x25, 0x48, 0xc9, 0x20, 0xa8, 0x00, 0x26, 0x99, 0xc5,
+ 0x1d, 0x88, 0x00, 0x26, 0x88, 0x87, 0x00, 0x28, 0xc9, 0x96, 0x00, 0x23,
+ 0x18, 0x46, 0x00, 0x8b, 0x42, 0xaf, 0x5c, 0x43, 0x5d, 0xc0, 0xc2, 0xaf,
+ 0x68, 0xc3, 0x78, 0xc9, 0x00, 0x24, 0x08, 0x46, 0x00, 0x8b, 0x42, 0xaf,
+ 0x8a, 0x46, 0x00, 0x8b, 0xc2, 0xaf, 0xa2, 0xc7, 0x8a, 0x86, 0x00, 0x22,
+ 0x50, 0x46, 0x00, 0x8b, 0x42, 0xaf, 0xb4, 0xc6, 0xc3, 0x77, 0x00, 0x27,
+ 0x4b, 0x02, 0xaf, 0xcf, 0xc8, 0xba, 0x0a, 0x00, 0x25, 0x08, 0xc9, 0x98,
+ 0xa7, 0x05, 0x33, 0x59, 0xc5, 0xc8, 0x02, 0x00, 0x23, 0x58, 0xcb, 0x90,
+ 0x70, 0x00, 0x23, 0xe8, 0xc9, 0x20, 0xa8, 0x00, 0x27, 0x29, 0xc6, 0x60,
+ 0xb1, 0x00, 0x27, 0x19, 0xc5, 0x1f, 0x0c, 0x00, 0x22, 0xe8, 0x46, 0x00,
+ 0x8b, 0x42, 0xaf, 0xd5, 0xd9, 0x1e, 0xff, 0x00, 0x23, 0xb8, 0x16, 0x42,
+ 0xaf, 0xe1, 0x47, 0x01, 0x32, 0xc2, 0xaf, 0xeb, 0xc4, 0xe4, 0xbf, 0x05,
+ 0x32, 0x08, 0x87, 0x00, 0x21, 0xb3, 0x02, 0xaf, 0xf7, 0xc2, 0x00, 0x28,
+ 0x05, 0x34, 0x28, 0x46, 0x00, 0x8b, 0x42, 0xaf, 0xfd, 0x46, 0x00, 0x8b,
+ 0x42, 0xb0, 0x07, 0x46, 0x00, 0x8b, 0x42, 0xb0, 0x1f, 0xca, 0xa5, 0x12,
+ 0x00, 0x26, 0x68, 0xcf, 0x69, 0x54, 0x00, 0x25, 0x50, 0xca, 0xa5, 0xb2,
+ 0x00, 0x24, 0x70, 0x1c, 0xc2, 0xb0, 0x3d, 0x87, 0x00, 0x20, 0x2b, 0x02,
+ 0xb0, 0x47, 0xc2, 0x01, 0x7f, 0x00, 0x20, 0x79, 0xc2, 0x00, 0x28, 0x05,
+ 0x34, 0x10, 0x91, 0x05, 0x34, 0xc1, 0xcb, 0x98, 0xa5, 0x05, 0x33, 0x60,
+ 0xc2, 0x04, 0xab, 0x05, 0x32, 0x40, 0xc2, 0x00, 0xd0, 0x00, 0x25, 0xd3,
+ 0x02, 0xb0, 0x4d, 0x44, 0x2e, 0xf0, 0xc2, 0xb0, 0x53, 0x83, 0x00, 0x21,
+ 0x41, 0xc3, 0x1c, 0x63, 0x00, 0x21, 0x49, 0xc2, 0x00, 0x28, 0x05, 0x34,
+ 0xb0, 0xcf, 0x6b, 0x16, 0x00, 0x26, 0xd0, 0xcc, 0x23, 0x3f, 0x00, 0x25,
+ 0x80, 0xc4, 0x00, 0xba, 0x00, 0x21, 0x61, 0xc2, 0x00, 0x06, 0x05, 0x33,
+ 0x11, 0x07, 0x42, 0xb0, 0x5e, 0x46, 0x00, 0x8b, 0x42, 0xb0, 0x66, 0xc3,
+ 0xe5, 0x2a, 0x00, 0x27, 0x01, 0xc3, 0x28, 0x28, 0x00, 0x25, 0xe3, 0x02,
+ 0xb0, 0x72, 0xc2, 0x00, 0xd0, 0x00, 0x25, 0x40, 0xc9, 0x20, 0xa8, 0x00,
+ 0x26, 0x91, 0xc5, 0x1d, 0x88, 0x00, 0x26, 0x80, 0x87, 0x00, 0x28, 0xc1,
+ 0x96, 0x00, 0x23, 0x10, 0x46, 0x00, 0x8b, 0x42, 0xb0, 0x78, 0xc2, 0x0a,
+ 0xe2, 0x00, 0x28, 0x81, 0xc3, 0xe5, 0x2a, 0x05, 0x32, 0x21, 0xc2, 0x13,
+ 0xc0, 0x05, 0x32, 0xa1, 0xc3, 0x3b, 0x0f, 0x05, 0x33, 0x00, 0x43, 0x5d,
+ 0xc0, 0xc2, 0xb0, 0x84, 0xc3, 0x78, 0xc9, 0x00, 0x24, 0x00, 0x46, 0x00,
+ 0x8b, 0x42, 0xb0, 0xa6, 0x46, 0x00, 0x8b, 0xc2, 0xb0, 0xbe, 0xc7, 0x8a,
+ 0x86, 0x00, 0x21, 0x50, 0x46, 0x00, 0x8b, 0x42, 0xb0, 0xd0, 0x46, 0x00,
+ 0x8b, 0x42, 0xb0, 0xeb, 0x06, 0xc2, 0xb0, 0xf5, 0xc6, 0x60, 0xb1, 0x00,
+ 0x27, 0x70, 0xca, 0x91, 0xbb, 0x00, 0x20, 0x20, 0xc6, 0xc3, 0x77, 0x00,
+ 0x27, 0x43, 0x02, 0xb1, 0x01, 0xc8, 0xba, 0x0a, 0x00, 0x25, 0x00, 0xc9,
+ 0x98, 0xa7, 0x05, 0x33, 0x51, 0xc5, 0xc8, 0x02, 0x00, 0x23, 0x50, 0xcb,
+ 0x90, 0x70, 0x00, 0x23, 0xe0, 0xc9, 0x20, 0xa8, 0x00, 0x27, 0x21, 0xc6,
+ 0x60, 0xb1, 0x00, 0x27, 0x11, 0xc5, 0x1f, 0x0c, 0x00, 0x20, 0x68, 0x46,
+ 0x00, 0x8b, 0x42, 0xb1, 0x07, 0xd9, 0x1e, 0xff, 0x00, 0x23, 0xb0, 0x16,
+ 0x42, 0xb1, 0x13, 0x47, 0x01, 0x32, 0xc2, 0xb1, 0x1d, 0xc4, 0xe4, 0xbf,
+ 0x05, 0x32, 0x00, 0x87, 0x00, 0x20, 0xb3, 0x02, 0xb1, 0x29, 0xc2, 0x00,
+ 0x28, 0x05, 0x34, 0x20, 0x46, 0x00, 0x8b, 0x42, 0xb1, 0x2f, 0xc2, 0x01,
+ 0x7f, 0x00, 0x20, 0x59, 0x87, 0x00, 0x21, 0x31, 0xc2, 0x00, 0x28, 0x05,
+ 0x34, 0x71, 0xc2, 0x00, 0x40, 0x05, 0x34, 0x80, 0xe0, 0x01, 0xa7, 0x01,
+ 0x01, 0xc8, 0xc8, 0x4b, 0x94, 0x08, 0x8f, 0xa1, 0xc7, 0x0d, 0x04, 0x08,
+ 0x8f, 0x98, 0xc6, 0x18, 0x10, 0x08, 0x8f, 0x81, 0xc4, 0xd2, 0x1d, 0x08,
+ 0x8f, 0x78, 0xc4, 0x45, 0x6a, 0x08, 0x8f, 0x71, 0xc4, 0x4a, 0x2e, 0x08,
+ 0x8f, 0x68, 0xc5, 0x0d, 0x0d, 0x08, 0x8f, 0x61, 0xc5, 0x28, 0xee, 0x08,
+ 0x8f, 0x59, 0xc2, 0x00, 0xc4, 0x08, 0x8f, 0x50, 0xc4, 0x18, 0x10, 0x08,
+ 0x8f, 0x39, 0xc2, 0x22, 0xcc, 0x08, 0x8f, 0x30, 0xc3, 0x0d, 0x14, 0x08,
+ 0x8f, 0x29, 0xc3, 0x09, 0x9e, 0x08, 0x8f, 0x20, 0xc4, 0x02, 0xde, 0x08,
+ 0x8f, 0x19, 0xc2, 0x02, 0xa0, 0x08, 0x8f, 0x10, 0xc5, 0x69, 0xa7, 0x00,
+ 0x6c, 0x29, 0xc6, 0x8e, 0x9c, 0x00, 0x6c, 0x31, 0x07, 0xc2, 0xb1, 0x3b,
+ 0xc6, 0xd2, 0x47, 0x00, 0x6c, 0x99, 0xc6, 0xcc, 0xd1, 0x00, 0x6c, 0xb1,
+ 0x4a, 0xa1, 0xa2, 0xc2, 0xb1, 0x47, 0xcb, 0x8e, 0x97, 0x00, 0x6d, 0xc8,
+ 0xc5, 0x69, 0xa7, 0x00, 0x6c, 0x49, 0xc6, 0xd2, 0x47, 0x00, 0x6c, 0x51,
+ 0x42, 0x17, 0x99, 0xc2, 0xb1, 0x73, 0x42, 0x10, 0x37, 0x42, 0xb1, 0x7f,
+ 0xc5, 0x69, 0xa7, 0x00, 0x6c, 0x59, 0xc6, 0xcc, 0xd1, 0x00, 0x6c, 0x60,
+ 0xc5, 0x69, 0xa7, 0x00, 0x6c, 0x89, 0xc6, 0xd2, 0x3b, 0x00, 0x6c, 0x90,
+ 0xc5, 0x69, 0xa7, 0x00, 0x6c, 0xa1, 0xc6, 0x69, 0xa6, 0x00, 0x6c, 0xa8,
+ 0x03, 0xc2, 0xb1, 0x8b, 0x49, 0xb0, 0xe9, 0x42, 0xb1, 0x97, 0xc7, 0xca,
+ 0x29, 0x00, 0x6c, 0xf9, 0xc7, 0xc7, 0xc1, 0x00, 0x6d, 0x31, 0x06, 0x42,
+ 0xb1, 0xa9, 0xca, 0x4b, 0x0d, 0x00, 0x6d, 0x21, 0x42, 0x0d, 0xf6, 0x42,
+ 0xb1, 0xb5, 0xc7, 0xc4, 0xdb, 0x00, 0x6d, 0x89, 0xc7, 0xc2, 0x18, 0x00,
+ 0x6d, 0xe9, 0xc7, 0xc1, 0xa8, 0x00, 0x6e, 0x18, 0xc2, 0x02, 0xa0, 0x00,
+ 0x6f, 0x41, 0xc4, 0x02, 0xde, 0x00, 0x6f, 0x48, 0xc3, 0x09, 0x9e, 0x00,
+ 0x6f, 0x51, 0xc3, 0x0d, 0x14, 0x00, 0x6f, 0x58, 0xc2, 0x22, 0xcc, 0x00,
+ 0x6f, 0x61, 0xc4, 0x18, 0x10, 0x00, 0x6f, 0x68, 0xca, 0xa7, 0x60, 0x00,
+ 0x6e, 0x81, 0xc8, 0xb7, 0x82, 0x00, 0x6e, 0x91, 0xc9, 0xaf, 0x42, 0x00,
+ 0x6e, 0xa0, 0xc2, 0x02, 0x41, 0x00, 0x6e, 0xcb, 0x02, 0xb1, 0xc1, 0xc5,
+ 0xd8, 0x21, 0x00, 0x6e, 0xd8, 0xca, 0x9c, 0xb6, 0x00, 0x6f, 0x91, 0xc9,
+ 0x93, 0x53, 0x00, 0x6f, 0x98, 0x1e, 0xc2, 0xb1, 0xc7, 0xa6, 0x0e, 0xd5,
+ 0x41, 0xa5, 0x0e, 0xd5, 0x39, 0xa4, 0x0e, 0xd5, 0x31, 0xa3, 0x0e, 0xd5,
+ 0x29, 0xa2, 0x0e, 0xd5, 0x21, 0xa1, 0x0e, 0xd5, 0x19, 0xa0, 0x0e, 0xd5,
+ 0x11, 0x9f, 0x0e, 0xd5, 0x08, 0x4b, 0x40, 0xb3, 0xc2, 0xb1, 0xe3, 0x4a,
+ 0x18, 0xa5, 0x42, 0xb1, 0xfe, 0xa3, 0x0e, 0xd4, 0xf9, 0xa2, 0x0e, 0xd4,
+ 0xf1, 0xa1, 0x0e, 0xd4, 0xe9, 0xa0, 0x0e, 0xd4, 0xe1, 0x9f, 0x0e, 0xd4,
+ 0xd8, 0x15, 0xc2, 0xb2, 0x16, 0x46, 0x17, 0x14, 0x42, 0xb2, 0x22, 0xc8,
+ 0x00, 0x6f, 0x0e, 0xd0, 0x48, 0xc9, 0x6e, 0x18, 0x0e, 0xd3, 0x71, 0xc5,
+ 0xda, 0x5b, 0x0e, 0xd3, 0x68, 0xc9, 0x65, 0x4f, 0x0e, 0xc8, 0xd1, 0x45,
+ 0x03, 0x14, 0x42, 0xb2, 0x2e, 0xc8, 0x3b, 0xec, 0x0e, 0xc8, 0xc1, 0xc6,
+ 0x24, 0x3b, 0x0e, 0xc8, 0xb0, 0xcc, 0x83, 0x61, 0x0e, 0xd4, 0x31, 0xc5,
+ 0xd8, 0x1c, 0x0e, 0xd4, 0x29, 0x42, 0x01, 0x7f, 0xc2, 0xb2, 0x3a, 0xc5,
+ 0xdb, 0x5a, 0x0e, 0xd4, 0x19, 0xc5, 0x48, 0x65, 0x0e, 0xd4, 0x10, 0xd0,
+ 0x60, 0x02, 0x0e, 0xd4, 0x01, 0xcf, 0x6a, 0xad, 0x0e, 0xd3, 0xf8, 0x47,
+ 0xc2, 0x2d, 0xc2, 0xb2, 0x46, 0xcb, 0x98, 0x0b, 0x0e, 0xd3, 0xb0, 0x00,
+ 0xc2, 0xb2, 0x62, 0xd2, 0x4d, 0xf9, 0x0e, 0xd2, 0x98, 0xd3, 0x40, 0xb3,
+ 0x0e, 0xd3, 0xa1, 0x4a, 0x18, 0xa5, 0x42, 0xb2, 0x6e, 0x47, 0x0f, 0x81,
+ 0xc2, 0xb2, 0x7a, 0xd3, 0x46, 0xef, 0x0e, 0xd2, 0xf1, 0xd4, 0x38, 0xcc,
+ 0x0e, 0xd2, 0xe9, 0x44, 0x08, 0xba, 0xc2, 0xb2, 0x86, 0xcc, 0x82, 0x95,
+ 0x0e, 0xd2, 0xd1, 0xd0, 0x5b, 0x22, 0x0e, 0xd2, 0xc8, 0xc7, 0x0b, 0xc8,
+ 0x0e, 0xc8, 0x39, 0xc8, 0x3b, 0xec, 0x0e, 0xc8, 0x31, 0xc6, 0x24, 0x3b,
+ 0x0e, 0xc8, 0x28, 0x00, 0x42, 0xb2, 0x92, 0xc3, 0x01, 0xc8, 0x0e, 0xd1,
+ 0x79, 0xc6, 0x04, 0xcb, 0x0e, 0xd1, 0x71, 0xc4, 0x08, 0xcb, 0x0e, 0xd1,
+ 0x68, 0xc7, 0xc4, 0xe9, 0x0e, 0xcc, 0x39, 0x49, 0xab, 0x01, 0x42, 0xb2,
+ 0xa4, 0x4b, 0x99, 0x4a, 0xc2, 0xb2, 0xb0, 0xc7, 0xc4, 0xe9, 0x0e, 0xca,
+ 0x89, 0x49, 0xab, 0x01, 0x42, 0xb2, 0xc2, 0x4a, 0x18, 0xa5, 0xc2, 0xb2,
+ 0xce, 0x4b, 0x40, 0xb3, 0x42, 0xb2, 0xdb, 0xca, 0x45, 0x02, 0x0e, 0xd1,
+ 0x01, 0xc4, 0x03, 0xc8, 0x0e, 0xd0, 0xf9, 0xc2, 0x02, 0xae, 0x0e, 0xd0,
+ 0xf0, 0xc4, 0x91, 0x78, 0x0e, 0xd0, 0xe9, 0x46, 0xca, 0xbb, 0x42, 0xb2,
+ 0xea, 0x44, 0x06, 0xa6, 0xc2, 0xb2, 0xf6, 0x45, 0x01, 0xce, 0xc2, 0xb3,
+ 0x02, 0xc6, 0x07, 0xa1, 0x0e, 0xd0, 0xb1, 0xc8, 0xba, 0x92, 0x0e, 0xd0,
+ 0xa9, 0xc4, 0x05, 0x75, 0x0e, 0xd0, 0xa0, 0xc4, 0x03, 0xc8, 0x0e, 0xd0,
+ 0x61, 0xc7, 0x81, 0x92, 0x0e, 0xd0, 0x59, 0xc2, 0x02, 0xae, 0x0e, 0xd0,
+ 0x50, 0x08, 0xc2, 0xb3, 0x0e, 0xc5, 0x01, 0x95, 0x0e, 0xc4, 0x2b, 0x02,
+ 0xb3, 0x20, 0x0a, 0xc2, 0xb3, 0x24, 0x05, 0xc2, 0xb3, 0x36, 0xc4, 0x38,
+ 0xc1, 0x0e, 0xc3, 0xba, 0x02, 0xb3, 0x4c, 0x48, 0x51, 0x1b, 0xc2, 0xb3,
+ 0x50, 0xc3, 0x18, 0x26, 0x0e, 0xd0, 0x00, 0xc6, 0xd0, 0x37, 0x0e, 0xd1,
+ 0xa1, 0xc7, 0xa9, 0x6d, 0x0e, 0xd1, 0x98, 0xc3, 0xe5, 0x35, 0x0e, 0xd3,
+ 0x49, 0x48, 0x17, 0x7c, 0xc2, 0xb3, 0x5a, 0x19, 0xc2, 0xb3, 0x66, 0x58,
+ 0x22, 0x2b, 0xc2, 0xb3, 0x72, 0x15, 0xc2, 0xb3, 0x84, 0x45, 0xd9, 0x57,
+ 0xc2, 0xb3, 0x90, 0x45, 0xd8, 0x76, 0xc2, 0xb3, 0x9c, 0x05, 0xc2, 0xb3,
+ 0xa8, 0x46, 0xcb, 0x0f, 0xc2, 0xb3, 0xc0, 0x47, 0x2e, 0x48, 0xc2, 0xb3,
+ 0xd2, 0x04, 0xc2, 0xb3, 0xe4, 0x47, 0x2c, 0x2e, 0xc2, 0xb3, 0xf0, 0x47,
+ 0x00, 0x58, 0x42, 0xb4, 0x02, 0xc3, 0xe5, 0x35, 0x0e, 0xd3, 0x41, 0x48,
+ 0x17, 0x7c, 0xc2, 0xb4, 0x17, 0x19, 0xc2, 0xb4, 0x23, 0x4b, 0x22, 0x2b,
+ 0xc2, 0xb4, 0x2f, 0x45, 0xd9, 0x57, 0xc2, 0xb4, 0x3b, 0x45, 0xd8, 0x76,
+ 0xc2, 0xb4, 0x56, 0x05, 0xc2, 0xb4, 0x6e, 0x15, 0xc2, 0xb4, 0x86, 0x46,
+ 0xcb, 0x0f, 0xc2, 0xb4, 0x92, 0x47, 0x2e, 0x48, 0xc2, 0xb4, 0xa4, 0x04,
+ 0xc2, 0xb4, 0xb6, 0x47, 0x2c, 0x2e, 0xc2, 0xb4, 0xc2, 0x47, 0x00, 0x58,
+ 0x42, 0xb4, 0xd7, 0x48, 0x0b, 0xc8, 0xc2, 0xb4, 0xec, 0x48, 0xbf, 0xc2,
+ 0xc2, 0xb4, 0xf8, 0x45, 0xd5, 0xf1, 0x42, 0xb5, 0x0d, 0xd5, 0x37, 0x19,
+ 0x0e, 0xc9, 0x39, 0x43, 0x11, 0x49, 0xc2, 0xb5, 0x22, 0xcf, 0x65, 0x49,
+ 0x0e, 0xc9, 0x20, 0xc6, 0x00, 0x58, 0x0e, 0xd2, 0xc1, 0xc6, 0x24, 0x3b,
+ 0x0e, 0xd2, 0xb8, 0xc6, 0x13, 0x67, 0x0e, 0xd2, 0xb1, 0x46, 0x17, 0x8d,
+ 0x42, 0xb5, 0x2e, 0x00, 0x42, 0xb5, 0x40, 0x00, 0x42, 0xb5, 0x4c, 0xc9,
+ 0x46, 0x70, 0x0e, 0xd2, 0x53, 0x02, 0xb5, 0x58, 0xc4, 0x38, 0xc1, 0x0e,
+ 0xd2, 0x3b, 0x02, 0xb5, 0x5c, 0xc8, 0xbe, 0x0a, 0x0e, 0xd2, 0x31, 0xc7,
+ 0x27, 0xb2, 0x0e, 0xd2, 0x29, 0xc6, 0x02, 0xd1, 0x0e, 0xd2, 0x20, 0x00,
+ 0x42, 0xb5, 0x60, 0x00, 0x42, 0xb5, 0x6c, 0xc2, 0x02, 0xae, 0x0e, 0xd0,
+ 0x81, 0xc4, 0x03, 0xc8, 0x0e, 0xd0, 0x68, 0xcb, 0x90, 0xbd, 0x0e, 0xcf,
+ 0xdb, 0x02, 0xb5, 0x78, 0xc3, 0x01, 0xc8, 0x0e, 0xcf, 0xc0, 0xc5, 0x17,
+ 0x14, 0x0e, 0xcf, 0xb1, 0xc5, 0x03, 0x13, 0x0e, 0xcf, 0xa8, 0x97, 0x08,
+ 0xae, 0xe8, 0x8b, 0x08, 0xae, 0xd0, 0xd6, 0x2e, 0x96, 0x08, 0xae, 0xc1,
+ 0x83, 0x08, 0xac, 0xf0, 0xc2, 0x00, 0xd0, 0x08, 0xac, 0xc9, 0x83, 0x08,
+ 0xac, 0xc0, 0x8e, 0x08, 0xac, 0x43, 0x02, 0xb5, 0x7e, 0x94, 0x08, 0xac,
+ 0x32, 0x02, 0xb5, 0x82, 0xc2, 0x00, 0xd0, 0x08, 0xac, 0xd9, 0x83, 0x08,
+ 0xac, 0xd0, 0x45, 0x00, 0x8c, 0xc2, 0xb5, 0x86, 0xcb, 0x99, 0x76, 0x08,
+ 0xae, 0x7a, 0x02, 0xb5, 0xaa, 0xc3, 0x01, 0x5d, 0x08, 0xae, 0x29, 0xc3,
+ 0x02, 0xa3, 0x08, 0xae, 0x20, 0xc4, 0x1e, 0x97, 0x08, 0xad, 0xf9, 0xc5,
+ 0x40, 0xe7, 0x08, 0xad, 0xf0, 0x8e, 0x05, 0x45, 0xe8, 0x94, 0x05, 0x45,
+ 0xd8, 0x94, 0x05, 0x44, 0x43, 0x02, 0xb5, 0xb0, 0x8e, 0x05, 0x44, 0x52,
+ 0x02, 0xb5, 0xb4, 0x83, 0x05, 0x44, 0xe1, 0xc2, 0x00, 0xd0, 0x05, 0x44,
+ 0xe8, 0x83, 0x05, 0x44, 0xf1, 0xc2, 0x00, 0xd0, 0x05, 0x44, 0xf8, 0xc2,
+ 0x02, 0xa0, 0x05, 0x46, 0x91, 0xc4, 0x02, 0xde, 0x05, 0x46, 0x98, 0xc3,
+ 0x09, 0x9e, 0x05, 0x46, 0xa1, 0xc3, 0x0d, 0x14, 0x05, 0x46, 0xa8, 0xc2,
+ 0x22, 0xcc, 0x05, 0x46, 0xb1, 0xc4, 0x18, 0x10, 0x05, 0x46, 0xb8, 0xe0,
+ 0x0a, 0x87, 0x0f, 0xb3, 0xb0, 0x4b, 0x94, 0x85, 0xc2, 0xb5, 0xb8, 0xc7,
+ 0x1b, 0x0c, 0x08, 0x8e, 0x40, 0xc7, 0xc3, 0x61, 0x08, 0x8e, 0xd9, 0xd4,
+ 0x39, 0xa8, 0x08, 0x8e, 0x79, 0xc5, 0x33, 0x5d, 0x08, 0x8e, 0x51, 0xcb,
+ 0x93, 0xf6, 0x08, 0x8e, 0x19, 0xcb, 0x8f, 0xe1, 0x08, 0x8e, 0x11, 0x03,
+ 0xc2, 0xb5, 0xc0, 0x42, 0x07, 0xb2, 0xc2, 0xb5, 0xcc, 0xcb, 0x1e, 0x89,
+ 0x08, 0x8c, 0x00, 0xc4, 0x26, 0x78, 0x08, 0x8e, 0xc9, 0xc5, 0x06, 0xdb,
+ 0x08, 0x8e, 0xc1, 0x15, 0xc2, 0xb5, 0xd8, 0x08, 0xc2, 0xb5, 0xe4, 0x16,
+ 0xc2, 0xb5, 0xf0, 0xc3, 0x05, 0x14, 0x08, 0x8e, 0x89, 0xc4, 0x15, 0xe7,
+ 0x08, 0x8e, 0x80, 0xcf, 0x61, 0x11, 0x08, 0x8e, 0x71, 0x03, 0xc2, 0xb5,
+ 0xfc, 0x91, 0x08, 0x8d, 0xf1, 0x87, 0x08, 0x8d, 0xe1, 0x48, 0xb2, 0x2d,
+ 0xc2, 0xb6, 0x08, 0x97, 0x08, 0x8d, 0xb3, 0x02, 0xb6, 0x16, 0x8b, 0x08,
+ 0x8d, 0xa2, 0x02, 0xb6, 0x1a, 0x83, 0x08, 0x8d, 0x89, 0xc2, 0x0d, 0xf6,
+ 0x08, 0x8d, 0x81, 0xc2, 0x00, 0xd0, 0x08, 0x8d, 0x78, 0x83, 0x08, 0x8d,
+ 0x71, 0x47, 0xb2, 0x2e, 0x42, 0xb6, 0x1e, 0xc2, 0x00, 0xdb, 0x08, 0x8d,
+ 0x69, 0x83, 0x08, 0x8d, 0x60, 0xc2, 0x00, 0xd0, 0x08, 0x8d, 0x41, 0x83,
+ 0x08, 0x8d, 0x38, 0xc2, 0x00, 0xd0, 0x08, 0x8d, 0x31, 0x83, 0x08, 0x8d,
+ 0x28, 0x83, 0x08, 0x8d, 0x21, 0xc2, 0x00, 0xc1, 0x08, 0x8c, 0xf9, 0xc2,
+ 0x19, 0x2c, 0x08, 0x8c, 0xd1, 0xc2, 0x01, 0x30, 0x08, 0x8c, 0xa8, 0xc2,
+ 0x00, 0xd0, 0x08, 0x8d, 0x19, 0x83, 0x08, 0x8d, 0x11, 0x06, 0x42, 0xb6,
+ 0x2c, 0xc2, 0x00, 0xd0, 0x08, 0x8d, 0x09, 0x83, 0x08, 0x8d, 0x01, 0x16,
+ 0x42, 0xb6, 0x36, 0xc2, 0x00, 0xd0, 0x08, 0x8c, 0xc9, 0x83, 0x08, 0x8c,
+ 0xc0, 0xc2, 0x00, 0xd0, 0x08, 0x8c, 0xb9, 0x83, 0x08, 0x8c, 0xb0, 0xc2,
+ 0x00, 0xd0, 0x08, 0x8c, 0xa1, 0x83, 0x08, 0x8c, 0x98, 0xc2, 0x00, 0xd0,
+ 0x08, 0x8c, 0x91, 0x83, 0x08, 0x8c, 0x88, 0x97, 0x08, 0x8c, 0x81, 0x8b,
+ 0x08, 0x8c, 0x71, 0x83, 0x08, 0x8c, 0x20, 0x97, 0x08, 0x8c, 0x40, 0x8b,
+ 0x08, 0x8c, 0x30, 0xc3, 0x00, 0x2d, 0x08, 0x22, 0xa1, 0xc2, 0x17, 0x28,
+ 0x08, 0x22, 0xf0, 0x96, 0x08, 0x23, 0x81, 0x94, 0x08, 0x23, 0xe8, 0x87,
+ 0x08, 0x23, 0xc1, 0xc3, 0x5d, 0x32, 0x08, 0x23, 0xe0, 0xcd, 0x55, 0x9a,
+ 0x01, 0x57, 0x41, 0xd5, 0x32, 0xab, 0x01, 0x57, 0x48, 0xe0, 0x06, 0x07,
+ 0x01, 0x5a, 0xf8, 0xc9, 0x1f, 0x5a, 0x01, 0x49, 0x31, 0xd4, 0x3c, 0x28,
+ 0x01, 0x49, 0x50, 0xc9, 0xb4, 0x5b, 0x01, 0x0f, 0x91, 0xc9, 0x1f, 0x5a,
+ 0x01, 0x49, 0x29, 0xd4, 0x3c, 0xa0, 0x01, 0x49, 0x49, 0xd9, 0x20, 0x5d,
+ 0x01, 0x49, 0x68, 0xca, 0x9d, 0x06, 0x01, 0x37, 0xb1, 0xc2, 0x01, 0xbb,
+ 0x01, 0x1e, 0x68, 0x0e, 0xc2, 0xb6, 0x40, 0x46, 0x02, 0xae, 0xc2, 0xb6,
+ 0x4c, 0xd0, 0x5d, 0x52, 0x01, 0x2f, 0x41, 0xd8, 0x24, 0x0b, 0x01, 0x2d,
+ 0x49, 0xda, 0x1c, 0xd4, 0x01, 0x2d, 0x31, 0xcd, 0x7a, 0x93, 0x01, 0x2d,
+ 0x29, 0xcf, 0x64, 0xe0, 0x01, 0x2d, 0x21, 0xd1, 0x4f, 0xbe, 0x01, 0x4f,
+ 0x01, 0xce, 0x74, 0xda, 0x01, 0x58, 0x91, 0xd1, 0x53, 0xba, 0x01, 0x58,
+ 0x98, 0xc5, 0x0a, 0xe2, 0x01, 0x18, 0x89, 0x89, 0x01, 0x9e, 0x90, 0x44,
+ 0x1a, 0x16, 0x42, 0xb6, 0x58, 0x44, 0x1a, 0x16, 0x42, 0xb6, 0x64, 0xc4,
+ 0x78, 0x47, 0x01, 0x98, 0x21, 0xc2, 0x00, 0x43, 0x01, 0x98, 0x28, 0x92,
+ 0x01, 0x14, 0x99, 0x8e, 0x01, 0x9c, 0x40, 0xc9, 0xad, 0x0b, 0x01, 0x9b,
+ 0xf8, 0x00, 0x42, 0xb6, 0x70, 0xd5, 0x35, 0x8a, 0x01, 0x56, 0x71, 0xc5,
+ 0xd5, 0x06, 0x01, 0x9a, 0x89, 0xc2, 0x00, 0x39, 0x01, 0x9a, 0x90, 0xc3,
+ 0x71, 0xec, 0x01, 0x9a, 0x99, 0xc5, 0xd8, 0xf8, 0x01, 0x9a, 0xa0, 0xc2,
+ 0x14, 0x48, 0x01, 0x9a, 0xa9, 0xc6, 0xcc, 0x0b, 0x01, 0x9a, 0xb0, 0xc7,
+ 0x04, 0x32, 0x01, 0x9d, 0x72, 0x02, 0xb6, 0x7c, 0xc3, 0x19, 0x86, 0x01,
+ 0x99, 0x50, 0xc6, 0xca, 0x8b, 0x01, 0x99, 0x91, 0xc4, 0xe1, 0x3f, 0x01,
+ 0x99, 0x99, 0xc3, 0x00, 0xea, 0x01, 0x99, 0xa8, 0xc7, 0xc8, 0x8c, 0x01,
+ 0x99, 0xb1, 0xc4, 0xde, 0xfb, 0x01, 0x99, 0xc8, 0x90, 0x01, 0x99, 0xf9,
+ 0x11, 0x42, 0xb6, 0x82, 0x83, 0x01, 0x9b, 0x88, 0xc3, 0x14, 0xc6, 0x01,
+ 0x99, 0x20, 0x00, 0x42, 0xb6, 0x8c, 0xd0, 0x5f, 0x32, 0x01, 0x5e, 0x81,
+ 0xc4, 0x0f, 0xd7, 0x01, 0x99, 0xe9, 0xc3, 0x2d, 0x61, 0x01, 0x9a, 0x00,
+ 0x03, 0xc2, 0xb6, 0x98, 0xc5, 0xd6, 0x64, 0x01, 0x9c, 0x00, 0xc7, 0xc4,
+ 0xbf, 0x01, 0x99, 0x71, 0x0d, 0x42, 0xb6, 0xa4, 0xc2, 0x00, 0xfb, 0x01,
+ 0x99, 0xb9, 0x10, 0xc2, 0xb6, 0xae, 0xc3, 0x90, 0x19, 0x01, 0x99, 0xd8,
+ 0x89, 0x01, 0x96, 0x69, 0x47, 0xc0, 0xe4, 0x42, 0xb6, 0xba, 0xc3, 0x02,
+ 0x30, 0x01, 0x98, 0x59, 0x14, 0x42, 0xb6, 0xd8, 0xc6, 0xd2, 0x29, 0x01,
+ 0x98, 0xa9, 0xc7, 0xc3, 0xd1, 0x01, 0x98, 0xb1, 0xc5, 0xdb, 0xaa, 0x01,
+ 0x98, 0xb8, 0xc6, 0xcc, 0xfb, 0x01, 0x98, 0xd1, 0xc4, 0xe4, 0x27, 0x01,
+ 0x98, 0xd8, 0xc4, 0xdf, 0x4f, 0x01, 0x98, 0xe9, 0xc3, 0x79, 0x25, 0x01,
+ 0x98, 0xf0, 0x00, 0x42, 0xb6, 0xe4, 0xc3, 0x01, 0xe7, 0x01, 0x98, 0x71,
+ 0xc3, 0x51, 0xee, 0x01, 0x98, 0x79, 0x8e, 0x01, 0x9f, 0xf8, 0xc2, 0x01,
+ 0x30, 0x01, 0x98, 0x81, 0xc3, 0xe6, 0x5f, 0x01, 0x98, 0x89, 0xc5, 0xdc,
+ 0xae, 0x01, 0x98, 0x98, 0xc3, 0x0f, 0xd9, 0x01, 0x98, 0xc8, 0xc5, 0xd7,
+ 0xd6, 0x01, 0x98, 0xf9, 0xc6, 0xcb, 0xff, 0x01, 0x99, 0x00, 0x8b, 0x01,
+ 0x99, 0x11, 0x91, 0x01, 0x99, 0x18, 0xc2, 0x00, 0x10, 0x01, 0x99, 0x40,
+ 0xc5, 0xd6, 0xbe, 0x01, 0x99, 0x69, 0x94, 0x01, 0x9b, 0xa0, 0x0b, 0xc2,
+ 0xb6, 0xee, 0xc3, 0xe6, 0x71, 0x01, 0x9a, 0x29, 0xc4, 0xdf, 0x7b, 0x01,
+ 0x9a, 0x31, 0xc5, 0xda, 0x06, 0x01, 0x9a, 0x38, 0xc5, 0xdd, 0xdf, 0x01,
+ 0x9a, 0x41, 0xc2, 0x00, 0x2c, 0x01, 0x9a, 0x4b, 0x02, 0xb6, 0xfa, 0x8e,
+ 0x01, 0x9e, 0xa8, 0xc2, 0x01, 0x30, 0x01, 0x9a, 0x5b, 0x02, 0xb7, 0x00,
+ 0xc5, 0xc3, 0xd3, 0x01, 0x9a, 0x68, 0x88, 0x01, 0x9c, 0x61, 0x89, 0x01,
+ 0x9c, 0x69, 0x83, 0x01, 0x9c, 0x11, 0x8e, 0x01, 0x9c, 0xa9, 0x8f, 0x01,
+ 0x9c, 0xd9, 0x95, 0x01, 0x9d, 0x91, 0x98, 0x01, 0x9d, 0xb1, 0x99, 0x01,
+ 0x9d, 0xe0, 0x11, 0xc2, 0xb7, 0x06, 0xc7, 0x0b, 0x09, 0x01, 0x9d, 0x09,
+ 0xc5, 0xd9, 0x11, 0x01, 0x9d, 0x28, 0xc6, 0x03, 0x12, 0x01, 0x9e, 0xa0,
+ 0x00, 0x42, 0xb7, 0x15, 0xc5, 0x6d, 0xb4, 0x01, 0x9d, 0xc8, 0xc5, 0x6d,
+ 0xb4, 0x01, 0x9d, 0xf8, 0xc2, 0x00, 0x58, 0x01, 0x9a, 0x71, 0xc2, 0x17,
+ 0x99, 0x01, 0x9a, 0x78, 0x46, 0x19, 0xbb, 0xc2, 0xb7, 0x21, 0xc6, 0xd0,
+ 0xa3, 0x0f, 0x8d, 0x48, 0xce, 0x6e, 0x20, 0x0f, 0x8d, 0x29, 0x4f, 0x0b,
+ 0x17, 0x42, 0xb7, 0x2d, 0xcd, 0x7b, 0xf2, 0x0f, 0x8d, 0x09, 0xcb, 0x97,
+ 0x66, 0x0f, 0x8c, 0xe0, 0xc2, 0x00, 0x06, 0x0f, 0x90, 0x99, 0xc2, 0x0d,
+ 0xf6, 0x0f, 0x90, 0x11, 0xc4, 0xe0, 0xb7, 0x0f, 0x90, 0x08, 0xd2, 0x48,
+ 0xfb, 0x0f, 0x8d, 0x11, 0xc3, 0x28, 0xa9, 0x0f, 0x8c, 0xe8, 0x26, 0xc2,
+ 0xb7, 0x95, 0x22, 0xc2, 0xb7, 0xa1, 0x24, 0xc2, 0xb7, 0xd5, 0x23, 0xc2,
+ 0xb7, 0xf1, 0x25, 0xc2, 0xb8, 0x15, 0x42, 0xe6, 0x8f, 0x42, 0xb8, 0x27,
+ 0x8d, 0x0f, 0x8c, 0xf1, 0xcf, 0x05, 0x18, 0x01, 0x71, 0x60, 0xc9, 0x2a,
+ 0xec, 0x01, 0x21, 0x28, 0xc4, 0x09, 0x9d, 0x01, 0x20, 0xa1, 0x16, 0xc2,
+ 0xb8, 0x3d, 0xc3, 0x05, 0x14, 0x01, 0x20, 0x88, 0xc6, 0x01, 0xdb, 0x01,
+ 0x20, 0xc9, 0x16, 0x42, 0xb8, 0x49, 0xc3, 0x1d, 0x35, 0x00, 0x43, 0x51,
+ 0x42, 0x02, 0xa7, 0xc2, 0xb8, 0x58, 0xc2, 0x00, 0x39, 0x00, 0x43, 0x39,
+ 0xc3, 0x39, 0x6d, 0x00, 0x43, 0x31, 0x10, 0xc2, 0xb8, 0x62, 0xc3, 0x1f,
+ 0xdf, 0x00, 0x43, 0x19, 0xc2, 0x25, 0x3b, 0x00, 0x43, 0x08, 0xc7, 0xc2,
+ 0xf8, 0x00, 0x39, 0x79, 0xc6, 0xce, 0xdb, 0x00, 0x39, 0x71, 0xc5, 0xd7,
+ 0xae, 0x00, 0x39, 0x68, 0xc9, 0xad, 0xa4, 0x00, 0x38, 0xe0, 0xc2, 0x14,
+ 0xda, 0x00, 0x3a, 0x79, 0xc5, 0xdc, 0xe5, 0x00, 0x3a, 0x71, 0xc5, 0xd4,
+ 0x20, 0x00, 0x3a, 0x68, 0xc5, 0x05, 0x02, 0x00, 0x39, 0xd9, 0xc5, 0x00,
+ 0xd4, 0x00, 0x39, 0xd0, 0x48, 0x84, 0x8d, 0x42, 0xb8, 0x72, 0xcc, 0x84,
+ 0x8d, 0x00, 0x38, 0x40, 0xd1, 0x55, 0x52, 0x01, 0x14, 0x59, 0xcb, 0x23,
+ 0xa0, 0x01, 0x14, 0x3b, 0x02, 0xb8, 0x7e, 0x46, 0x00, 0xd4, 0x42, 0xb8,
+ 0x84, 0xc4, 0x0e, 0xa6, 0x01, 0x56, 0xa1, 0xc6, 0x2d, 0xd0, 0x01, 0x56,
+ 0xb0, 0x90, 0x01, 0x03, 0xf9, 0x8b, 0x01, 0x03, 0x88, 0x8f, 0x00, 0xdd,
+ 0xf9, 0x8d, 0x00, 0xdd, 0xf0, 0x09, 0xc2, 0xb8, 0x9c, 0xc5, 0xd4, 0xc0,
+ 0x00, 0xdc, 0x00, 0xcf, 0x33, 0xad, 0x01, 0x56, 0x18, 0xcb, 0x0e, 0xbd,
+ 0x01, 0x56, 0x29, 0xce, 0x33, 0x92, 0x01, 0x56, 0x39, 0xcf, 0x6a, 0x8f,
+ 0x01, 0x56, 0x49, 0xcc, 0x24, 0x47, 0x01, 0x56, 0x58, 0x45, 0x02, 0x9a,
+ 0x42, 0xb8, 0xa8, 0xc3, 0x3b, 0x36, 0x0f, 0xb0, 0x39, 0xc4, 0x75, 0x6e,
+ 0x0f, 0xb0, 0x41, 0xd0, 0x55, 0x0f, 0x0f, 0xb0, 0x68, 0xcb, 0x1d, 0x4b,
+ 0x0f, 0xb0, 0x53, 0x02, 0xb8, 0xba, 0xc9, 0xb4, 0xd0, 0x0f, 0xb0, 0x70,
+ 0x45, 0x00, 0x8c, 0xc2, 0xb8, 0xc0, 0xc9, 0xb4, 0x49, 0x01, 0x10, 0x68,
+ 0x83, 0x07, 0xf2, 0x81, 0xc9, 0xb4, 0x64, 0x07, 0xf3, 0x58, 0x46, 0x00,
+ 0x8b, 0x42, 0xb8, 0xcc, 0xc3, 0x05, 0x14, 0x01, 0x0b, 0x83, 0x02, 0xb8,
+ 0xd8, 0x08, 0xc2, 0xb8, 0xdc, 0x16, 0xc2, 0xb8, 0xe6, 0x07, 0xc2, 0xb8,
+ 0xf6, 0xc4, 0x26, 0x78, 0x01, 0x0b, 0xc1, 0x15, 0x42, 0xb9, 0x02, 0xcb,
+ 0x1a, 0x50, 0x07, 0xf2, 0xd1, 0xd6, 0x08, 0x88, 0x07, 0xf2, 0xf1, 0xcd,
+ 0x00, 0x32, 0x07, 0xf2, 0xe0, 0xcb, 0x1a, 0x50, 0x07, 0xf2, 0xc9, 0xcd,
+ 0x00, 0x32, 0x07, 0xf2, 0xd9, 0xd6, 0x08, 0x88, 0x07, 0xf2, 0xe8, 0xcb,
+ 0x0e, 0xbd, 0x01, 0x55, 0x79, 0xcc, 0x24, 0x47, 0x01, 0x55, 0x88, 0xc8,
+ 0x07, 0x5f, 0x01, 0x55, 0xa9, 0xcf, 0x6a, 0x8f, 0x01, 0x55, 0xc8, 0xcb,
+ 0x1a, 0x50, 0x07, 0xf1, 0xa9, 0xd6, 0x08, 0x88, 0x07, 0xf1, 0xc9, 0xd8,
+ 0x21, 0x83, 0x07, 0xf1, 0xd9, 0xd4, 0x38, 0xf4, 0x07, 0xf1, 0xe9, 0xcd,
+ 0x0b, 0x91, 0x07, 0xf1, 0xf9, 0x46, 0x01, 0xfc, 0xc2, 0xb9, 0x0e, 0xce,
+ 0x25, 0xad, 0x07, 0xf2, 0x39, 0x05, 0x42, 0xb9, 0x1a, 0xcc, 0x00, 0x33,
+ 0x07, 0xf1, 0xc1, 0xcd, 0x69, 0x65, 0x07, 0xf2, 0x10, 0x4e, 0x21, 0x89,
+ 0xc2, 0xb9, 0x26, 0xce, 0x69, 0x64, 0x07, 0xf2, 0x20, 0xc6, 0xcf, 0x05,
+ 0x0f, 0x85, 0x11, 0xc6, 0x78, 0x78, 0x0f, 0x85, 0x91, 0xc8, 0xba, 0x2a,
+ 0x0f, 0x86, 0x11, 0xc5, 0xdd, 0x49, 0x0f, 0x86, 0x90, 0xc6, 0xcf, 0x05,
+ 0x0f, 0x85, 0x19, 0xc6, 0x78, 0x78, 0x0f, 0x85, 0x99, 0xc8, 0xba, 0x2a,
+ 0x0f, 0x86, 0x19, 0xc5, 0xdd, 0x49, 0x0f, 0x86, 0x98, 0xc6, 0xcf, 0x05,
+ 0x0f, 0x85, 0x51, 0xc6, 0x78, 0x78, 0x0f, 0x85, 0xd1, 0xc8, 0xba, 0x2a,
+ 0x0f, 0x86, 0x51, 0xc5, 0xdd, 0x49, 0x0f, 0x86, 0xd0, 0x9e, 0x0f, 0x87,
+ 0x0b, 0x02, 0xb9, 0x32, 0x9f, 0x0f, 0x87, 0x13, 0x02, 0xb9, 0x5a, 0xa0,
+ 0x0f, 0x87, 0x19, 0xa1, 0x0f, 0x87, 0x21, 0xa2, 0x0f, 0x87, 0x29, 0xa3,
+ 0x0f, 0x87, 0x31, 0xa4, 0x0f, 0x87, 0x39, 0xa5, 0x0f, 0x87, 0x41, 0xa6,
+ 0x0f, 0x87, 0x48, 0x46, 0xc5, 0x7d, 0xc2, 0xb9, 0x62, 0xc2, 0x00, 0x95,
+ 0x0f, 0x87, 0x00, 0xc6, 0xcf, 0x05, 0x0f, 0x85, 0x29, 0xc6, 0x78, 0x78,
+ 0x0f, 0x85, 0xa9, 0xc8, 0xba, 0x2a, 0x0f, 0x86, 0x29, 0xc5, 0xdd, 0x49,
+ 0x0f, 0x86, 0xa8, 0xc6, 0xcf, 0x05, 0x0f, 0x85, 0x31, 0xc6, 0x78, 0x78,
+ 0x0f, 0x85, 0xb1, 0xc8, 0xba, 0x2a, 0x0f, 0x86, 0x31, 0xc5, 0xdd, 0x49,
+ 0x0f, 0x86, 0xb0, 0xc6, 0xcf, 0x05, 0x0f, 0x85, 0x39, 0xc6, 0x78, 0x78,
+ 0x0f, 0x85, 0xb9, 0xc8, 0xba, 0x2a, 0x0f, 0x86, 0x39, 0xc5, 0xdd, 0x49,
+ 0x0f, 0x86, 0xb8, 0xc6, 0xcf, 0x05, 0x0f, 0x85, 0x61, 0xc6, 0x78, 0x78,
+ 0x0f, 0x85, 0xe1, 0xc8, 0xba, 0x2a, 0x0f, 0x86, 0x61, 0xc5, 0xdd, 0x49,
+ 0x0f, 0x86, 0xe0, 0xc6, 0xcf, 0x05, 0x0f, 0x85, 0x71, 0xc6, 0x78, 0x78,
+ 0x0f, 0x85, 0xf1, 0xc8, 0xba, 0x2a, 0x0f, 0x86, 0x71, 0xc5, 0xdd, 0x49,
+ 0x0f, 0x86, 0xf0, 0xc8, 0x01, 0x92, 0x01, 0x51, 0xc9, 0xd1, 0x51, 0x56,
+ 0x01, 0x51, 0x71, 0xd0, 0x5b, 0x92, 0x01, 0x51, 0x68, 0xce, 0x6b, 0x8e,
+ 0x01, 0x51, 0x41, 0x15, 0xc2, 0xb9, 0x7a, 0x46, 0x33, 0x92, 0xc2, 0xb9,
+ 0x86, 0xc9, 0x0e, 0x6e, 0x01, 0x51, 0x29, 0xd7, 0x26, 0x60, 0x01, 0x51,
+ 0x18, 0xc2, 0x02, 0xae, 0x00, 0x04, 0x61, 0xc8, 0xbd, 0x3a, 0x00, 0x04,
+ 0x61, 0xc4, 0x03, 0xc8, 0x00, 0x04, 0x59, 0xc7, 0x27, 0xb2, 0x00, 0x04,
+ 0x58, 0xc3, 0x18, 0x13, 0x01, 0x24, 0x39, 0xc3, 0x22, 0x45, 0x01, 0x23,
+ 0xf8, 0xc2, 0x00, 0xd3, 0x01, 0x90, 0x70, 0xc2, 0x00, 0xd3, 0x01, 0x90,
+ 0xc0, 0xc2, 0x00, 0xd3, 0x01, 0x90, 0x80, 0xc2, 0x00, 0xd3, 0x01, 0x90,
+ 0xc8, 0xc2, 0x00, 0xd3, 0x01, 0x90, 0x98, 0xc2, 0x00, 0xd3, 0x01, 0x90,
+ 0xd0, 0x00, 0x42, 0xb9, 0x92, 0xc2, 0x00, 0xd3, 0x01, 0x90, 0xb8, 0xc2,
+ 0x00, 0x5f, 0x01, 0x91, 0x21, 0xc2, 0x01, 0x19, 0x01, 0x91, 0x59, 0xc7,
+ 0xc4, 0xf0, 0x01, 0x91, 0xb0, 0xc3, 0x18, 0x11, 0x01, 0x91, 0x31, 0xc2,
+ 0x01, 0xd0, 0x01, 0x92, 0x10, 0x90, 0x01, 0x91, 0x81, 0xc7, 0xc8, 0x54,
+ 0x01, 0x91, 0xe0, 0xc3, 0x04, 0x20, 0x01, 0x91, 0x89, 0xc3, 0xe5, 0x0f,
+ 0x01, 0x91, 0xd8, 0xc5, 0x53, 0x93, 0x01, 0x91, 0xf1, 0x96, 0x01, 0x92,
+ 0x08, 0xc6, 0x26, 0xf6, 0x08, 0xd7, 0xb0, 0x9b, 0x08, 0xd7, 0x21, 0x90,
+ 0x08, 0xd7, 0x03, 0x02, 0xb9, 0x9a, 0x99, 0x08, 0xd7, 0x11, 0x8e, 0x08,
+ 0xd7, 0x09, 0x8f, 0x08, 0xd6, 0xf9, 0x96, 0x08, 0xd6, 0xf1, 0x8d, 0x08,
+ 0xd6, 0xe9, 0x92, 0x08, 0xd6, 0xe0, 0xc6, 0x26, 0xf6, 0x08, 0xd7, 0x68,
+ 0x19, 0xc2, 0xb9, 0x9e, 0xc2, 0x00, 0xc4, 0x08, 0x43, 0xf1, 0xc4, 0x02,
+ 0xde, 0x08, 0x43, 0xd8, 0xc3, 0x0d, 0x14, 0x08, 0x43, 0xe9, 0xc3, 0x09,
+ 0x9e, 0x08, 0x43, 0xe0, 0x16, 0xc2, 0xb9, 0xa8, 0x15, 0xc2, 0xb9, 0xb4,
+ 0xc4, 0x5d, 0xe2, 0x08, 0x43, 0xa1, 0xc4, 0xb9, 0x7e, 0x08, 0x43, 0x99,
+ 0xc2, 0x00, 0x67, 0x08, 0x43, 0x89, 0x03, 0xc2, 0xb9, 0xbe, 0xc3, 0x20,
+ 0x18, 0x08, 0x43, 0x71, 0xc9, 0xb3, 0x5f, 0x08, 0x43, 0x69, 0xc3, 0x00,
+ 0x4e, 0x08, 0x43, 0x61, 0xc6, 0xcf, 0xd7, 0x08, 0x43, 0x59, 0xc4, 0xe0,
+ 0xe7, 0x08, 0x43, 0x51, 0xc4, 0x4a, 0xb9, 0x08, 0x43, 0x49, 0xc2, 0x01,
+ 0x7f, 0x08, 0x43, 0x23, 0x02, 0xb9, 0xca, 0xc5, 0x4a, 0xb3, 0x08, 0x43,
+ 0x31, 0xc3, 0x7e, 0x89, 0x08, 0x43, 0x29, 0xc6, 0x40, 0x9a, 0x08, 0x43,
+ 0x19, 0xc5, 0x9c, 0xa2, 0x08, 0x43, 0x11, 0xc4, 0xe3, 0x27, 0x08, 0x43,
+ 0x08, 0xc2, 0x15, 0xb0, 0x0b, 0x5c, 0x69, 0xc2, 0x00, 0x03, 0x0b, 0x5c,
+ 0x31, 0xc4, 0x9f, 0x7d, 0x0b, 0x5b, 0xe8, 0xc3, 0xa6, 0x62, 0x0b, 0x59,
+ 0x59, 0xc3, 0x48, 0x8d, 0x0b, 0x58, 0xe8, 0xc5, 0xd6, 0x23, 0x0b, 0x5b,
+ 0xa8, 0xc4, 0xe0, 0x3f, 0x0b, 0x59, 0xf9, 0xc3, 0x49, 0x2f, 0x0b, 0x59,
+ 0xf1, 0xc3, 0x79, 0xe7, 0x0b, 0x59, 0xe9, 0xc5, 0xda, 0x38, 0x0b, 0x59,
+ 0xe0, 0xc3, 0x44, 0x23, 0x0b, 0x59, 0xd1, 0xc2, 0x00, 0x7a, 0x0b, 0x59,
+ 0xb8, 0xc8, 0xbe, 0x3a, 0x0b, 0x5b, 0x01, 0xc9, 0x4b, 0x94, 0x0b, 0x5a,
+ 0xe8, 0x04, 0xc2, 0xb9, 0xd0, 0xcc, 0x87, 0x09, 0x0f, 0xb2, 0x79, 0xcc,
+ 0x85, 0xc5, 0x0f, 0xb2, 0x71, 0xc9, 0xa8, 0x31, 0x0f, 0xce, 0xa9, 0xc5,
+ 0xda, 0x01, 0x0f, 0xd6, 0x28, 0xe0, 0x07, 0x67, 0x0f, 0xb2, 0x60, 0xcb,
+ 0x92, 0x6a, 0x0f, 0xce, 0xb1, 0xce, 0x6e, 0x12, 0x0f, 0xce, 0xc0, 0x91,
+ 0x08, 0x48, 0xd1, 0xc4, 0x18, 0x12, 0x08, 0x48, 0xc0, 0xc9, 0x1e, 0x8b,
+ 0x05, 0x43, 0x98, 0x83, 0x05, 0x42, 0x81, 0xc2, 0x00, 0xd0, 0x05, 0x42,
+ 0x88, 0x83, 0x05, 0x43, 0x49, 0xc2, 0x00, 0xd0, 0x05, 0x43, 0x50, 0xc2,
+ 0x01, 0x4a, 0x05, 0x43, 0x39, 0xc2, 0x19, 0x2c, 0x05, 0x43, 0x41, 0xc2,
+ 0x00, 0x39, 0x05, 0x43, 0x88, 0xd4, 0x38, 0xe0, 0x08, 0x0f, 0xe8, 0xc4,
+ 0x1e, 0x97, 0x00, 0x4a, 0x69, 0xc5, 0x40, 0xe7, 0x00, 0x48, 0x18, 0xc7,
+ 0x7a, 0x7f, 0x00, 0x49, 0xe9, 0xc7, 0x14, 0x39, 0x00, 0x48, 0x10, 0x00,
+ 0x42, 0xb9, 0xdc, 0xc6, 0xc3, 0x62, 0x05, 0x47, 0xe1, 0xd2, 0x4a, 0x87,
+ 0x05, 0x47, 0x90, 0x94, 0x00, 0x4a, 0x20, 0x8e, 0x00, 0x4b, 0x18, 0x87,
+ 0x00, 0x4a, 0xb8, 0x83, 0x00, 0x49, 0xb1, 0x44, 0x2e, 0xf0, 0x42, 0xb9,
+ 0xec, 0x8e, 0x00, 0x48, 0x63, 0x02, 0xb9, 0xf8, 0x94, 0x00, 0x48, 0x5a,
+ 0x02, 0xb9, 0xfc, 0xc2, 0x00, 0xdb, 0x00, 0x49, 0xa1, 0x83, 0x00, 0x49,
+ 0x98, 0xc2, 0x00, 0xc1, 0x00, 0x49, 0x49, 0x83, 0x00, 0x49, 0x18, 0xc2,
+ 0x00, 0xd0, 0x00, 0x49, 0x11, 0x83, 0x00, 0x49, 0x09, 0x06, 0x42, 0xba,
+ 0x00, 0xc2, 0x00, 0xd0, 0x00, 0x49, 0x01, 0x83, 0x00, 0x48, 0xf8, 0x45,
+ 0xc7, 0x7d, 0x42, 0xba, 0x0a, 0x83, 0x00, 0x48, 0xc1, 0xc2, 0x00, 0xd0,
+ 0x00, 0x4a, 0xd0, 0x83, 0x00, 0x48, 0xb1, 0xc2, 0x00, 0xd0, 0x00, 0x4a,
+ 0xc8, 0x87, 0x00, 0x4b, 0xb8, 0xc4, 0x18, 0x10, 0x00, 0x4b, 0x69, 0xc2,
+ 0x22, 0xcc, 0x00, 0x4b, 0x60, 0xc3, 0x0d, 0x14, 0x00, 0x4b, 0x59, 0xc3,
+ 0x09, 0x9e, 0x00, 0x4b, 0x50, 0xc4, 0x02, 0xde, 0x00, 0x4b, 0x49, 0xc2,
+ 0x02, 0xa0, 0x00, 0x4b, 0x40, 0x8b, 0x08, 0x20, 0x01, 0x83, 0x08, 0x20,
+ 0x13, 0x02, 0xba, 0x16, 0x91, 0x08, 0x20, 0x23, 0x02, 0xba, 0x1a, 0x87,
+ 0x08, 0x20, 0x08, 0x8b, 0x08, 0x20, 0x31, 0x87, 0x08, 0x20, 0x39, 0x83,
+ 0x08, 0x20, 0x43, 0x02, 0xba, 0x1e, 0x91, 0x08, 0x20, 0x52, 0x02, 0xba,
+ 0x22, 0x99, 0x08, 0x20, 0x69, 0x8b, 0x08, 0x21, 0x30, 0xc2, 0x02, 0xe0,
+ 0x08, 0x20, 0x99, 0xc3, 0x0e, 0x65, 0x08, 0x20, 0xe0, 0x88, 0x08, 0x20,
+ 0xc9, 0xc2, 0x00, 0x8e, 0x08, 0x20, 0xd9, 0x95, 0x08, 0x20, 0xeb, 0x02,
+ 0xba, 0x26, 0x94, 0x08, 0x21, 0x09, 0x8e, 0x08, 0x21, 0x11, 0x8f, 0x08,
+ 0x21, 0x19, 0x90, 0x08, 0x21, 0x23, 0x02, 0xba, 0x2a, 0x99, 0x08, 0x21,
+ 0x38, 0xc2, 0x02, 0xe0, 0x08, 0x20, 0xf1, 0xc3, 0x0e, 0x65, 0x08, 0x21,
+ 0x00, 0x8b, 0x08, 0x21, 0x41, 0x87, 0x08, 0x21, 0x49, 0x83, 0x08, 0x21,
+ 0x53, 0x02, 0xba, 0x2e, 0x91, 0x08, 0x21, 0x62, 0x02, 0xba, 0x32, 0x8b,
+ 0x08, 0x21, 0x71, 0x87, 0x08, 0x21, 0x79, 0x83, 0x08, 0x21, 0x83, 0x02,
+ 0xba, 0x36, 0x91, 0x08, 0x21, 0x92, 0x02, 0xba, 0x3a, 0x99, 0x08, 0x21,
+ 0xa9, 0x8b, 0x08, 0x22, 0x70, 0xc2, 0x02, 0xe0, 0x08, 0x21, 0xd9, 0xc3,
+ 0x0e, 0x65, 0x08, 0x22, 0x20, 0x88, 0x08, 0x22, 0x09, 0xc2, 0x00, 0x8e,
+ 0x08, 0x22, 0x19, 0x95, 0x08, 0x22, 0x2b, 0x02, 0xba, 0x3e, 0x94, 0x08,
+ 0x22, 0x49, 0x8e, 0x08, 0x22, 0x51, 0x8f, 0x08, 0x22, 0x59, 0x90, 0x08,
+ 0x22, 0x63, 0x02, 0xba, 0x42, 0x99, 0x08, 0x22, 0x78, 0xc2, 0x02, 0xe0,
+ 0x08, 0x22, 0x31, 0xc3, 0x0e, 0x65, 0x08, 0x22, 0x40, 0xc9, 0x11, 0xf6,
+ 0x01, 0x24, 0x71, 0xc5, 0x0a, 0x8a, 0x0f, 0x88, 0x40, 0xc9, 0x11, 0xf6,
+ 0x01, 0x24, 0x69, 0xc5, 0x0a, 0x8a, 0x0f, 0x88, 0x38, 0xc9, 0x11, 0xf6,
+ 0x01, 0x24, 0x61, 0xc5, 0x0a, 0x8a, 0x0f, 0x88, 0x30, 0xc9, 0x11, 0xf6,
+ 0x01, 0x24, 0x59, 0xc5, 0x0a, 0x8a, 0x0f, 0x88, 0x28, 0xc9, 0x11, 0xf6,
+ 0x01, 0x24, 0x51, 0xc5, 0x0a, 0x8a, 0x0f, 0x88, 0x20, 0xc9, 0x11, 0xf6,
+ 0x01, 0x24, 0x49, 0xc5, 0x0a, 0x8a, 0x0f, 0x88, 0x18, 0xc4, 0x18, 0x10,
+ 0x08, 0xca, 0xb9, 0xc2, 0x22, 0xcc, 0x08, 0xca, 0xb0, 0xc3, 0x0d, 0x14,
+ 0x08, 0xca, 0xa9, 0xc3, 0x09, 0x9e, 0x08, 0xca, 0xa0, 0xc4, 0x02, 0xde,
+ 0x08, 0xca, 0x99, 0xc2, 0x02, 0xa0, 0x08, 0xca, 0x90, 0x8b, 0x08, 0xc9,
+ 0xb9, 0x83, 0x08, 0xc9, 0x80, 0x97, 0x08, 0xc9, 0xa0, 0x8b, 0x08, 0xc9,
+ 0x90, 0xc2, 0x00, 0xd0, 0x08, 0xc8, 0xc9, 0x83, 0x08, 0xc8, 0xc0, 0xc4,
+ 0x18, 0x10, 0x01, 0x3c, 0x81, 0xc2, 0x22, 0xcc, 0x01, 0x3c, 0x78, 0xc3,
+ 0x0d, 0x14, 0x01, 0x3c, 0x71, 0xc3, 0x09, 0x9e, 0x01, 0x3c, 0x68, 0xc4,
+ 0x02, 0xde, 0x01, 0x3c, 0x61, 0xc2, 0x02, 0xa0, 0x01, 0x3c, 0x58, 0x45,
+ 0x01, 0x95, 0xc2, 0xba, 0x46, 0xc9, 0x61, 0x53, 0x01, 0x48, 0x58, 0xcd,
+ 0x7e, 0x3b, 0x01, 0x0d, 0x09, 0x46, 0x01, 0x9a, 0x42, 0xba, 0x52, 0xc5,
+ 0x01, 0xa2, 0x0f, 0xc2, 0x39, 0xd0, 0x58, 0x62, 0x0f, 0xc2, 0x18, 0x44,
+ 0x00, 0x49, 0xc2, 0xba, 0x58, 0x45, 0x00, 0x2c, 0x42, 0xba, 0x62, 0x00,
+ 0x42, 0xba, 0x6c, 0xca, 0xa3, 0x64, 0x01, 0x27, 0xf1, 0x46, 0x09, 0x97,
+ 0x42, 0xba, 0x8a, 0x00, 0x42, 0xba, 0xa8, 0xc6, 0x2d, 0xd0, 0x01, 0x16,
+ 0x89, 0xc4, 0x0e, 0xa6, 0x01, 0x16, 0x81, 0xc6, 0xb7, 0x74, 0x01, 0x55,
+ 0xe1, 0xcd, 0x6c, 0x99, 0x01, 0x72, 0x20, 0xc5, 0x13, 0x84, 0x01, 0x52,
+ 0x79, 0xcc, 0x06, 0xbb, 0x01, 0x52, 0x70, 0xcd, 0x68, 0xc0, 0x01, 0x57,
+ 0x61, 0xcb, 0x8d, 0x42, 0x01, 0x72, 0x48, 0xc3, 0x03, 0x4e, 0x01, 0x01,
+ 0x9b, 0x02, 0xba, 0xb4, 0xc6, 0xbf, 0x4c, 0x01, 0x55, 0xd8, 0x19, 0xc2,
+ 0xba, 0xba, 0x46, 0x19, 0xbb, 0x42, 0xba, 0xc4, 0xce, 0x55, 0x99, 0x01,
+ 0x55, 0x18, 0x46, 0x03, 0x13, 0xc2, 0xba, 0xd0, 0xc9, 0xb2, 0xbd, 0x01,
+ 0x0a, 0x28, 0x92, 0x01, 0x08, 0xcb, 0x02, 0xba, 0xe0, 0xc5, 0x51, 0x51,
+ 0x01, 0x09, 0xf1, 0x9c, 0x01, 0x09, 0x21, 0x94, 0x01, 0x08, 0xe9, 0x93,
+ 0x01, 0x08, 0xd1, 0x90, 0x01, 0x08, 0xa9, 0x8a, 0x01, 0x08, 0x69, 0x85,
+ 0x01, 0x08, 0x10, 0xc5, 0x51, 0x51, 0x01, 0x09, 0xe9, 0xc2, 0x0b, 0x19,
+ 0x01, 0x09, 0xe0, 0xc9, 0x00, 0xca, 0x01, 0x54, 0xc9, 0xcc, 0x07, 0xc7,
+ 0x01, 0x54, 0xd0, 0x4c, 0x24, 0xe3, 0xc2, 0xba, 0xe4, 0xd5, 0x38, 0x3f,
+ 0x01, 0x57, 0xc9, 0xd8, 0x23, 0x93, 0x01, 0x57, 0xd0, 0xc2, 0x00, 0xd0,
+ 0x08, 0xc0, 0xb9, 0x83, 0x08, 0xc0, 0xb0, 0xc2, 0x00, 0xd0, 0x08, 0xc0,
+ 0xa9, 0x83, 0x08, 0xc0, 0xa0, 0xc4, 0x01, 0xa3, 0x0d, 0xe4, 0xc9, 0xc4,
+ 0x31, 0xef, 0x0d, 0xe4, 0x80, 0xc7, 0x27, 0x9b, 0x0d, 0xe3, 0x98, 0xc3,
+ 0x02, 0x6e, 0x0d, 0xe4, 0xb1, 0xc9, 0xac, 0xf0, 0x0d, 0xe4, 0x98, 0xc5,
+ 0x01, 0x22, 0x0d, 0xe3, 0xe0, 0xc2, 0x00, 0x2b, 0x0d, 0xe1, 0xa8, 0xc2,
+ 0x00, 0x2b, 0x0d, 0xe1, 0x98, 0xc2, 0x00, 0x3f, 0x0d, 0xe1, 0x70, 0xc6,
+ 0x05, 0x01, 0x0d, 0xe1, 0x30, 0xc2, 0x00, 0x2b, 0x0d, 0xe2, 0x00, 0x90,
+ 0x0d, 0xe3, 0x49, 0x99, 0x0d, 0xe2, 0x10, 0x90, 0x0d, 0xe3, 0x39, 0x87,
+ 0x0d, 0xe2, 0x71, 0x8a, 0x0d, 0xe2, 0x60, 0xc2, 0x00, 0x3f, 0x0d, 0xe1,
+ 0x88, 0xc9, 0x33, 0xad, 0x0d, 0xe1, 0x78, 0xc2, 0x00, 0x3f, 0x0d, 0xe1,
+ 0x68, 0xd2, 0x4e, 0x9b, 0x0d, 0xe1, 0x20, 0xc2, 0x00, 0x3f, 0x0d, 0xe1,
+ 0x60, 0xc2, 0x00, 0x3f, 0x0d, 0xe1, 0x58, 0xd0, 0x5e, 0xe2, 0x01, 0x3e,
+ 0x41, 0xd6, 0x30, 0x7a, 0x01, 0x4f, 0xb9, 0xc8, 0x18, 0x67, 0x01, 0x4f,
+ 0xa8, 0xc7, 0x0e, 0xbc, 0x01, 0x16, 0x68, 0xc9, 0xb2, 0xfc, 0x0f, 0xac,
+ 0x99, 0xc7, 0xc4, 0x6b, 0x0f, 0xac, 0x90, 0xcf, 0x01, 0xb8, 0x01, 0x80,
+ 0xe8, 0xcc, 0x84, 0x99, 0x01, 0x1d, 0x31, 0xc9, 0x57, 0x36, 0x01, 0x1d,
+ 0x29, 0xcc, 0x80, 0xcd, 0x01, 0x1d, 0x21, 0x45, 0x00, 0x8c, 0x42, 0xba,
+ 0xf0, 0x46, 0x00, 0x8b, 0x42, 0xbb, 0x0e, 0xd6, 0x06, 0xd1, 0x0f, 0xdb,
+ 0xf9, 0xd6, 0x2d, 0x36, 0x0f, 0xdb, 0xf0, 0xc2, 0x00, 0x49, 0x01, 0x10,
+ 0xfb, 0x02, 0xbb, 0x1a, 0xc9, 0xb2, 0x75, 0x0f, 0xaf, 0x78, 0xcc, 0x8a,
+ 0xed, 0x01, 0x3f, 0xa1, 0xcc, 0x12, 0x2d, 0x01, 0x0f, 0xa0, 0x44, 0x04,
+ 0x91, 0xc2, 0xbb, 0x1e, 0xc3, 0x04, 0x20, 0x01, 0x2c, 0x80, 0xca, 0xa2,
+ 0x74, 0x01, 0x1d, 0x69, 0xcc, 0x82, 0xe9, 0x01, 0x1d, 0x61, 0xca, 0xa3,
+ 0x5a, 0x01, 0x1d, 0x58, 0xc2, 0x00, 0x49, 0x01, 0x15, 0xfb, 0x02, 0xbb,
+ 0x2a, 0xd6, 0x14, 0xf9, 0x0f, 0xdb, 0x70, 0xcd, 0x3f, 0xe8, 0x0f, 0xdc,
+ 0x41, 0xce, 0x08, 0x79, 0x0f, 0xdc, 0x50, 0xd6, 0x30, 0xfe, 0x01, 0x4b,
+ 0x81, 0xcc, 0x0b, 0x92, 0x01, 0x80, 0x58, 0xcc, 0x00, 0x33, 0x01, 0x4c,
+ 0x21, 0xcd, 0x69, 0x65, 0x01, 0x80, 0x78, 0xd9, 0x1b, 0xd1, 0x0f, 0xc4,
+ 0xb1, 0xc9, 0xb0, 0x8f, 0x01, 0x0f, 0x80, 0xca, 0x03, 0xdd, 0x0f, 0xc4,
+ 0x91, 0x48, 0x01, 0x9a, 0x42, 0xbb, 0x30, 0xc5, 0x01, 0xa2, 0x01, 0x0e,
+ 0xd9, 0xca, 0x52, 0xc2, 0x01, 0x48, 0x78, 0x46, 0x02, 0x5c, 0xc2, 0xbb,
+ 0x45, 0xd1, 0x52, 0xbb, 0x01, 0x48, 0x80, 0xd6, 0x2b, 0x94, 0x01, 0x0e,
+ 0x61, 0x4a, 0x01, 0x58, 0x42, 0xbb, 0x51, 0xd5, 0x03, 0xd2, 0x0f, 0xc0,
+ 0xb1, 0x0e, 0xc2, 0xbb, 0x5d, 0x15, 0xc2, 0xbb, 0x69, 0x42, 0x00, 0x58,
+ 0xc2, 0xbb, 0x75, 0xcf, 0x2c, 0x35, 0x01, 0x0f, 0xc1, 0xd0, 0x58, 0x12,
+ 0x01, 0x0d, 0xa1, 0xc4, 0x01, 0x23, 0x01, 0x0d, 0x51, 0x16, 0xc2, 0xbb,
+ 0x81, 0xca, 0x9e, 0x28, 0x01, 0x4a, 0x29, 0xd9, 0x1f, 0xf9, 0x0f, 0xc0,
+ 0x31, 0xcc, 0x84, 0xb1, 0x0f, 0xc4, 0xd0, 0x43, 0x10, 0x9e, 0xc2, 0xbb,
+ 0x90, 0x47, 0x25, 0xf3, 0x42, 0xbb, 0x9c, 0xd1, 0x56, 0x73, 0x01, 0x49,
+ 0x00, 0x45, 0x00, 0xd5, 0xc2, 0xbb, 0xac, 0x43, 0x02, 0x9c, 0x42, 0xbb,
+ 0xc4, 0x00, 0xc2, 0xbb, 0xca, 0xc5, 0x14, 0xa5, 0x01, 0x48, 0xe0, 0xc9,
+ 0x57, 0x20, 0x01, 0x0c, 0x40, 0xc4, 0xe4, 0x87, 0x01, 0x0c, 0x00, 0x00,
+ 0x42, 0xbb, 0xd6, 0x00, 0x42, 0xbb, 0xe2, 0xe0, 0x0b, 0xc7, 0x0f, 0xac,
+ 0xb0, 0x03, 0xc2, 0xbb, 0xee, 0xc2, 0x16, 0x1c, 0x00, 0xb7, 0xb1, 0xc2,
+ 0x00, 0xfa, 0x00, 0xb7, 0xa9, 0xc2, 0x07, 0xa3, 0x00, 0xb7, 0xa0, 0x49,
+ 0xad, 0x1d, 0x42, 0xbb, 0xf8, 0xc2, 0x00, 0xe7, 0x00, 0xb5, 0xa1, 0x83,
+ 0x00, 0xb5, 0x90, 0xc3, 0x72, 0x57, 0x00, 0xb6, 0xe0, 0xc2, 0x1d, 0xc1,
+ 0x00, 0xb7, 0x31, 0xc6, 0xd2, 0x35, 0x00, 0xb6, 0xc1, 0xc5, 0xd6, 0x82,
+ 0x00, 0xb6, 0x29, 0xc8, 0xbf, 0x3a, 0x00, 0xb5, 0xe1, 0xc5, 0x71, 0x4d,
+ 0x00, 0xb5, 0x60, 0xc3, 0x67, 0x02, 0x00, 0xb7, 0x21, 0x90, 0x00, 0xb5,
+ 0x98, 0x8e, 0x00, 0xb6, 0xd9, 0x92, 0x00, 0xb6, 0xa1, 0x90, 0x00, 0xb6,
+ 0x00, 0x94, 0x00, 0xb6, 0x21, 0xc9, 0xb3, 0xe6, 0x00, 0xb5, 0xb8, 0x90,
+ 0x05, 0x28, 0x08, 0x87, 0x05, 0x28, 0x11, 0x90, 0x05, 0x2f, 0x28, 0x90,
+ 0x05, 0x29, 0x38, 0x90, 0x05, 0x2a, 0x68, 0x91, 0x05, 0x2b, 0x99, 0x90,
+ 0x05, 0x2d, 0xf0, 0x90, 0x05, 0x2c, 0xc0, 0x87, 0x05, 0x28, 0x1b, 0x02,
+ 0xbc, 0x2e, 0x90, 0x05, 0x2f, 0x38, 0x90, 0x05, 0x29, 0x48, 0x90, 0x05,
+ 0x2a, 0x78, 0x91, 0x05, 0x2b, 0xa3, 0x02, 0xbc, 0x32, 0x90, 0x05, 0x2e,
+ 0x00, 0x90, 0x05, 0x2c, 0xd0, 0x87, 0x05, 0x28, 0x28, 0x91, 0x05, 0x2b,
+ 0xb0, 0x87, 0x05, 0x2f, 0x4b, 0x02, 0xbc, 0x36, 0x8b, 0x05, 0x29, 0x59,
+ 0x83, 0x05, 0x2a, 0x89, 0x91, 0x05, 0x2e, 0x13, 0x02, 0xbc, 0x3a, 0x97,
+ 0x05, 0x2c, 0xe0, 0x87, 0x05, 0x28, 0x38, 0x91, 0x05, 0x2b, 0xc0, 0x87,
+ 0x05, 0x2f, 0x5b, 0x02, 0xbc, 0x3e, 0x8b, 0x05, 0x29, 0x69, 0x83, 0x05,
+ 0x2a, 0x99, 0x91, 0x05, 0x2e, 0x23, 0x02, 0xbc, 0x42, 0x97, 0x05, 0x2c,
+ 0xf0, 0x87, 0x05, 0x2f, 0x73, 0x02, 0xbc, 0x46, 0x8b, 0x05, 0x29, 0x79,
+ 0x83, 0x05, 0x2a, 0xb1, 0x91, 0x05, 0x2e, 0x33, 0x02, 0xbc, 0x4a, 0x97,
+ 0x05, 0x2d, 0x00, 0x87, 0x05, 0x29, 0x08, 0x91, 0x05, 0x2c, 0x90, 0x87,
+ 0x05, 0x2f, 0x63, 0x02, 0xbc, 0x4e, 0x8b, 0x05, 0x29, 0x71, 0x83, 0x05,
+ 0x2a, 0xa3, 0x02, 0xbc, 0x56, 0x91, 0x05, 0x2e, 0x2b, 0x02, 0xbc, 0x5a,
+ 0x97, 0x05, 0x2c, 0xf8, 0x87, 0x05, 0x28, 0xf0, 0x90, 0x05, 0x2b, 0x58,
+ 0x91, 0x05, 0x2c, 0x78, 0x87, 0x05, 0x2f, 0x7b, 0x02, 0xbc, 0x5e, 0x8b,
+ 0x05, 0x29, 0x81, 0x83, 0x05, 0x2a, 0xb9, 0x91, 0x05, 0x2e, 0x3b, 0x02,
+ 0xbc, 0x66, 0x97, 0x05, 0x2d, 0x08, 0x87, 0x05, 0x29, 0x01, 0x90, 0x05,
+ 0x30, 0x38, 0x91, 0x05, 0x2c, 0x88, 0x87, 0x05, 0x28, 0x60, 0x91, 0x05,
+ 0x2b, 0xe8, 0x87, 0x05, 0x28, 0x68, 0x91, 0x05, 0x2b, 0xf0, 0x87, 0x05,
+ 0x28, 0x70, 0x87, 0x05, 0x2f, 0xa3, 0x02, 0xbc, 0x6e, 0x8b, 0x05, 0x29,
+ 0xa1, 0x83, 0x05, 0x2a, 0xd9, 0x91, 0x05, 0x2e, 0x63, 0x02, 0xbc, 0x72,
+ 0x97, 0x05, 0x2d, 0x28, 0x91, 0x05, 0x2b, 0xf8, 0x87, 0x05, 0x2f, 0xab,
+ 0x02, 0xbc, 0x76, 0x0a, 0xc2, 0xbc, 0x7a, 0x8b, 0x05, 0x29, 0xa9, 0x83,
+ 0x05, 0x2a, 0xe1, 0x91, 0x05, 0x2e, 0x6b, 0x02, 0xbc, 0x94, 0x97, 0x05,
+ 0x2d, 0x30, 0x87, 0x05, 0x28, 0xa0, 0x91, 0x05, 0x2c, 0x28, 0x87, 0x05,
+ 0x28, 0x91, 0xc8, 0x4a, 0xd9, 0x05, 0x30, 0x60, 0x91, 0x05, 0x2c, 0x18,
+ 0x87, 0x05, 0x28, 0x98, 0x91, 0x05, 0x2c, 0x20, 0x87, 0x05, 0x2f, 0xd3,
+ 0x02, 0xbc, 0x98, 0x8b, 0x05, 0x29, 0xd1, 0x83, 0x05, 0x2b, 0x09, 0x91,
+ 0x05, 0x2e, 0x93, 0x02, 0xbc, 0x9c, 0x97, 0x05, 0x2d, 0x58, 0x87, 0x05,
+ 0x30, 0x0b, 0x02, 0xbc, 0xa6, 0x8b, 0x05, 0x2a, 0x09, 0x83, 0x05, 0x2b,
+ 0x41, 0x91, 0x05, 0x2e, 0xcb, 0x02, 0xbc, 0xaa, 0x97, 0x05, 0x2d, 0x90,
+ 0x09, 0xc2, 0xbc, 0xae, 0xc2, 0x00, 0xd1, 0x05, 0x2a, 0x59, 0xc2, 0x00,
+ 0x45, 0x05, 0x2d, 0xe1, 0xc2, 0x00, 0xc4, 0x05, 0x2f, 0x18, 0x87, 0x05,
+ 0x29, 0x10, 0x87, 0x05, 0x30, 0x53, 0x02, 0xbc, 0xc8, 0x8b, 0x05, 0x2a,
+ 0x41, 0x83, 0x05, 0x2b, 0x81, 0x91, 0x05, 0x2f, 0x03, 0x02, 0xbc, 0xcc,
+ 0x97, 0x05, 0x2d, 0xc8, 0x91, 0x05, 0x2c, 0x98, 0x87, 0x05, 0x28, 0xb0,
+ 0x87, 0x05, 0x2f, 0xe3, 0x02, 0xbc, 0xd0, 0x8b, 0x05, 0x29, 0xe1, 0x83,
+ 0x05, 0x2b, 0x19, 0x91, 0x05, 0x2e, 0xa3, 0x02, 0xbc, 0xd4, 0x97, 0x05,
+ 0x2d, 0x68, 0x91, 0x05, 0x2c, 0x38, 0x87, 0x05, 0x28, 0xc0, 0x87, 0x05,
+ 0x2f, 0xf3, 0x02, 0xbc, 0xd8, 0x8b, 0x05, 0x29, 0xf1, 0x83, 0x05, 0x2b,
+ 0x29, 0x91, 0x05, 0x2e, 0xb3, 0x02, 0xbc, 0xdc, 0x97, 0x05, 0x2d, 0x78,
+ 0x91, 0x05, 0x2c, 0x48, 0x87, 0x05, 0x28, 0xd0, 0x91, 0x05, 0x2c, 0x58,
+ 0x87, 0x05, 0x28, 0xd8, 0x91, 0x05, 0x2c, 0x60, 0x87, 0x05, 0x28, 0xe8,
+ 0x91, 0x05, 0x2c, 0x70, 0x90, 0x05, 0x2b, 0x90, 0xc3, 0x08, 0x48, 0x05,
+ 0x30, 0xd9, 0xc2, 0x37, 0xea, 0x05, 0x30, 0xf0, 0xca, 0x3b, 0x06, 0x01,
+ 0x1b, 0xf9, 0x47, 0x02, 0xd1, 0x42, 0xbc, 0xe0, 0xc4, 0xb2, 0xf8, 0x00,
+ 0x04, 0x50, 0xca, 0x99, 0x1f, 0x01, 0x81, 0x99, 0xca, 0x01, 0xc8, 0x01,
+ 0x81, 0xa8, 0xca, 0xa5, 0x12, 0x00, 0xe7, 0x60, 0xce, 0x25, 0xad, 0x70,
+ 0x02, 0xd9, 0xcb, 0x1a, 0x50, 0x70, 0x01, 0x41, 0xcd, 0x00, 0x32, 0x70,
+ 0x03, 0xd8, 0x9c, 0x70, 0x02, 0xd1, 0x9b, 0x70, 0x02, 0xc9, 0x9a, 0x70,
+ 0x02, 0xc1, 0x99, 0x70, 0x02, 0xb9, 0x98, 0x70, 0x02, 0xb1, 0x97, 0x70,
+ 0x02, 0xa9, 0x96, 0x70, 0x02, 0xa1, 0x95, 0x70, 0x02, 0x99, 0x94, 0x70,
+ 0x02, 0x91, 0x93, 0x70, 0x02, 0x89, 0x92, 0x70, 0x02, 0x81, 0x91, 0x70,
+ 0x02, 0x79, 0x90, 0x70, 0x02, 0x71, 0x8f, 0x70, 0x02, 0x69, 0x8e, 0x70,
+ 0x02, 0x61, 0x8d, 0x70, 0x02, 0x59, 0x8c, 0x70, 0x02, 0x51, 0x8b, 0x70,
+ 0x02, 0x49, 0x8a, 0x70, 0x02, 0x41, 0x89, 0x70, 0x02, 0x39, 0x88, 0x70,
+ 0x02, 0x31, 0x87, 0x70, 0x02, 0x29, 0x86, 0x70, 0x02, 0x21, 0x85, 0x70,
+ 0x02, 0x19, 0x84, 0x70, 0x02, 0x11, 0x83, 0x70, 0x02, 0x08, 0x9c, 0x70,
+ 0x03, 0xd1, 0x9b, 0x70, 0x03, 0xc9, 0x9a, 0x70, 0x03, 0xc1, 0x99, 0x70,
+ 0x03, 0xb9, 0x98, 0x70, 0x03, 0xb1, 0x97, 0x70, 0x03, 0xa9, 0x96, 0x70,
+ 0x03, 0xa1, 0x95, 0x70, 0x03, 0x99, 0x94, 0x70, 0x03, 0x91, 0x93, 0x70,
+ 0x03, 0x89, 0x92, 0x70, 0x03, 0x81, 0x91, 0x70, 0x03, 0x79, 0x90, 0x70,
+ 0x03, 0x71, 0x8f, 0x70, 0x03, 0x69, 0x8e, 0x70, 0x03, 0x61, 0x8d, 0x70,
+ 0x03, 0x59, 0x8c, 0x70, 0x03, 0x51, 0x8b, 0x70, 0x03, 0x49, 0x8a, 0x70,
+ 0x03, 0x41, 0x89, 0x70, 0x03, 0x39, 0x88, 0x70, 0x03, 0x31, 0x87, 0x70,
+ 0x03, 0x29, 0x86, 0x70, 0x03, 0x21, 0x85, 0x70, 0x03, 0x19, 0x84, 0x70,
+ 0x03, 0x11, 0x83, 0x70, 0x03, 0x08, 0xc9, 0xb4, 0x64, 0x70, 0x02, 0x01,
+ 0x83, 0x70, 0x01, 0x60, 0xc4, 0x18, 0x10, 0x70, 0x01, 0xb9, 0xc2, 0x22,
+ 0xcc, 0x70, 0x01, 0xb0, 0xc3, 0x0d, 0x14, 0x70, 0x01, 0xa9, 0xc3, 0x09,
+ 0x9e, 0x70, 0x01, 0xa0, 0xc4, 0x02, 0xde, 0x70, 0x01, 0x99, 0xc2, 0x02,
+ 0xa0, 0x70, 0x01, 0x90, 0x23, 0xc2, 0xbc, 0xec, 0x22, 0xc2, 0xbd, 0x10,
+ 0x21, 0xc2, 0xbd, 0x38, 0x20, 0xc2, 0xbd, 0x60, 0x1f, 0xc2, 0xbd, 0x88,
+ 0x1e, 0xc2, 0xbd, 0xb0, 0x1d, 0x42, 0xbd, 0xd8, 0x26, 0xc2, 0xbe, 0x00,
+ 0x25, 0xc2, 0xbe, 0x28, 0x24, 0xc2, 0xbe, 0x50, 0x23, 0xc2, 0xbe, 0x78,
+ 0x22, 0xc2, 0xbe, 0xa0, 0x21, 0xc2, 0xbe, 0xc8, 0x20, 0xc2, 0xbe, 0xf0,
+ 0x1f, 0xc2, 0xbf, 0x18, 0x1e, 0xc2, 0xbf, 0x40, 0x1d, 0x42, 0xbf, 0x68,
+ 0x26, 0xc2, 0xbf, 0x90, 0x25, 0xc2, 0xbf, 0xb8, 0x24, 0xc2, 0xbf, 0xe0,
+ 0x23, 0xc2, 0xc0, 0x08, 0x22, 0xc2, 0xc0, 0x30, 0x21, 0xc2, 0xc0, 0x58,
+ 0x20, 0xc2, 0xc0, 0x80, 0x1f, 0xc2, 0xc0, 0xa8, 0x1e, 0xc2, 0xc0, 0xd0,
+ 0x1d, 0x42, 0xc0, 0xf8, 0x26, 0xc2, 0xc1, 0x20, 0x25, 0xc2, 0xc1, 0x48,
+ 0x24, 0xc2, 0xc1, 0x70, 0x23, 0xc2, 0xc1, 0x98, 0x22, 0xc2, 0xc1, 0xc0,
+ 0x21, 0xc2, 0xc1, 0xe8, 0x20, 0xc2, 0xc2, 0x10, 0x1f, 0xc2, 0xc2, 0x38,
+ 0x1e, 0xc2, 0xc2, 0x60, 0x1d, 0x42, 0xc2, 0x88, 0x26, 0xc2, 0xc2, 0xb0,
+ 0x25, 0xc2, 0xc2, 0xd8, 0x24, 0xc2, 0xc3, 0x00, 0x23, 0xc2, 0xc3, 0x28,
+ 0x22, 0xc2, 0xc3, 0x50, 0x21, 0xc2, 0xc3, 0x78, 0x20, 0xc2, 0xc3, 0xa0,
+ 0x1f, 0xc2, 0xc3, 0xc8, 0x1e, 0xc2, 0xc3, 0xf0, 0x1d, 0x42, 0xc4, 0x18,
+ 0x26, 0xc2, 0xc4, 0x40, 0x25, 0xc2, 0xc4, 0x68, 0x24, 0xc2, 0xc4, 0x90,
+ 0x23, 0xc2, 0xc4, 0xb8, 0x22, 0xc2, 0xc4, 0xe0, 0x21, 0xc2, 0xc5, 0x08,
+ 0x20, 0xc2, 0xc5, 0x30, 0x1f, 0xc2, 0xc5, 0x58, 0x1e, 0xc2, 0xc5, 0x80,
+ 0x1d, 0x42, 0xc5, 0xa8, 0x26, 0xc2, 0xc5, 0xd0, 0x25, 0xc2, 0xc5, 0xf8,
+ 0x24, 0xc2, 0xc6, 0x20, 0x23, 0xc2, 0xc6, 0x48, 0x22, 0xc2, 0xc6, 0x70,
+ 0x21, 0xc2, 0xc6, 0x98, 0x20, 0xc2, 0xc6, 0xc0, 0x1f, 0xc2, 0xc6, 0xe8,
+ 0x1e, 0xc2, 0xc7, 0x10, 0x1d, 0x42, 0xc7, 0x38, 0x26, 0xc2, 0xc7, 0x60,
+ 0x25, 0xc2, 0xc7, 0x88, 0x24, 0xc2, 0xc7, 0xb0, 0x23, 0xc2, 0xc7, 0xd8,
+ 0x22, 0xc2, 0xc8, 0x00, 0x21, 0xc2, 0xc8, 0x28, 0x20, 0xc2, 0xc8, 0x50,
+ 0x1f, 0xc2, 0xc8, 0x78, 0x1e, 0xc2, 0xc8, 0xa0, 0x1d, 0x42, 0xc8, 0xc8,
+ 0xc4, 0x18, 0x10, 0x0b, 0x56, 0x39, 0xc2, 0x22, 0xcc, 0x0b, 0x56, 0x30,
+ 0xc3, 0x0d, 0x14, 0x0b, 0x56, 0x29, 0xc3, 0x09, 0x9e, 0x0b, 0x56, 0x20,
+ 0xc4, 0x02, 0xde, 0x0b, 0x56, 0x19, 0xc2, 0x02, 0xa0, 0x0b, 0x56, 0x10,
+ 0xc2, 0x00, 0xd0, 0x0b, 0x55, 0xe9, 0x83, 0x0b, 0x55, 0xa8, 0xc2, 0x00,
+ 0xd0, 0x0b, 0x55, 0xe1, 0x83, 0x0b, 0x55, 0x88, 0x83, 0x0b, 0x55, 0xd9,
+ 0xc7, 0xb4, 0x2f, 0x0b, 0x54, 0x80, 0xc2, 0x00, 0xd0, 0x0b, 0x55, 0xc9,
+ 0xc2, 0x0d, 0xf6, 0x0b, 0x55, 0xb1, 0x83, 0x0b, 0x55, 0x80, 0x16, 0xc2,
+ 0xc8, 0xec, 0x83, 0x0b, 0x55, 0x68, 0xc2, 0x00, 0xd0, 0x0b, 0x55, 0xb9,
+ 0x83, 0x0b, 0x55, 0x10, 0x0a, 0xc2, 0xc8, 0xf6, 0x83, 0x0b, 0x55, 0x20,
+ 0xc2, 0x00, 0xd0, 0x0b, 0x55, 0x99, 0x83, 0x0b, 0x55, 0x61, 0xc2, 0x19,
+ 0x2c, 0x0b, 0x55, 0x41, 0xc2, 0x01, 0x30, 0x0b, 0x55, 0x18, 0x83, 0x0b,
+ 0x55, 0x71, 0xc7, 0xc6, 0xda, 0x0b, 0x54, 0x88, 0x83, 0x0b, 0x55, 0x59,
+ 0x9a, 0x0b, 0x54, 0xf9, 0x93, 0x0b, 0x54, 0xf1, 0x85, 0x0b, 0x54, 0xe9,
+ 0x9c, 0x0b, 0x54, 0xe0, 0xc2, 0x00, 0xd0, 0x0b, 0x55, 0x49, 0x83, 0x0b,
+ 0x55, 0x38, 0xc2, 0x00, 0xd0, 0x0b, 0x55, 0x09, 0x83, 0x0b, 0x55, 0x00,
+ 0x0b, 0xc2, 0xc9, 0x00, 0x07, 0xc2, 0xc9, 0x14, 0x9a, 0x0b, 0x54, 0x39,
+ 0x93, 0x0b, 0x54, 0x31, 0x85, 0x0b, 0x54, 0x29, 0x9c, 0x0b, 0x54, 0x20,
+ 0x19, 0xc2, 0xc9, 0x24, 0x9a, 0x0b, 0x53, 0xb9, 0x93, 0x0b, 0x53, 0xb1,
+ 0x85, 0x0b, 0x53, 0xa9, 0x9c, 0x0b, 0x53, 0xa0, 0x9a, 0x0b, 0x54, 0x19,
+ 0x93, 0x0b, 0x54, 0x11, 0x85, 0x0b, 0x54, 0x09, 0x9c, 0x0b, 0x54, 0x00,
+ 0x9a, 0x0b, 0x53, 0xf9, 0x93, 0x0b, 0x53, 0xf1, 0x85, 0x0b, 0x53, 0xe9,
+ 0x9c, 0x0b, 0x53, 0xe0, 0x9a, 0x0b, 0x53, 0xd9, 0x93, 0x0b, 0x53, 0xd1,
+ 0x85, 0x0b, 0x53, 0xc9, 0x9c, 0x0b, 0x53, 0xc0, 0x9a, 0x0b, 0x53, 0x99,
+ 0x93, 0x0b, 0x53, 0x91, 0x85, 0x0b, 0x53, 0x89, 0x9c, 0x0b, 0x53, 0x80,
+ 0x03, 0xc2, 0xc9, 0x34, 0xc3, 0x29, 0x78, 0x08, 0xff, 0x19, 0x0b, 0x42,
+ 0xc9, 0x40, 0xc7, 0xc9, 0x1f, 0x08, 0xff, 0x81, 0xc7, 0xc9, 0xea, 0x08,
+ 0xfe, 0xe1, 0xc9, 0xb4, 0xa3, 0x08, 0xfe, 0xc8, 0x17, 0xc2, 0xc9, 0x4c,
+ 0xc4, 0xe2, 0x47, 0x08, 0xfe, 0xe8, 0x03, 0xc2, 0xc9, 0x58, 0xc2, 0x00,
+ 0x45, 0x08, 0xfe, 0xf8, 0xc8, 0xbc, 0xaa, 0x08, 0xfe, 0xb9, 0xc7, 0x14,
+ 0x39, 0x00, 0x5c, 0x10, 0x83, 0x00, 0x5c, 0x31, 0x8b, 0x00, 0x5c, 0x81,
+ 0x97, 0x00, 0x5c, 0xa0, 0x8b, 0x00, 0x5c, 0x40, 0x97, 0x00, 0x5c, 0x50,
+ 0x87, 0x00, 0x5c, 0x78, 0x91, 0x00, 0x5c, 0x98, 0xc2, 0x01, 0x30, 0x00,
+ 0x5c, 0xc9, 0xc2, 0x19, 0x2c, 0x00, 0x5c, 0xf1, 0x10, 0xc2, 0xc9, 0x6a,
+ 0x83, 0x00, 0x5d, 0x40, 0xc2, 0x01, 0x6f, 0x00, 0x5c, 0xf9, 0x83, 0x00,
+ 0x5d, 0x20, 0x83, 0x00, 0x5d, 0x81, 0xc2, 0x00, 0x39, 0x00, 0x5d, 0x88,
+ 0x83, 0x00, 0x5d, 0x91, 0x0e, 0x42, 0xc9, 0x74, 0xc2, 0x00, 0xd0, 0x00,
+ 0x5d, 0xb1, 0xc2, 0x0d, 0xf6, 0x00, 0x5d, 0xb9, 0x83, 0x00, 0x5d, 0xc0,
+ 0xc2, 0x02, 0xa0, 0x00, 0x5f, 0x41, 0xc4, 0x02, 0xde, 0x00, 0x5f, 0x48,
+ 0xc3, 0x09, 0x9e, 0x00, 0x5f, 0x51, 0xc3, 0x0d, 0x14, 0x00, 0x5f, 0x58,
+ 0xc2, 0x22, 0xcc, 0x00, 0x5f, 0x61, 0xc4, 0x18, 0x10, 0x00, 0x5f, 0x68,
+ 0xc6, 0xa7, 0x8c, 0x08, 0xfe, 0x71, 0xc9, 0xaf, 0xdb, 0x08, 0xfe, 0x38,
+ 0x9f, 0x08, 0xfe, 0x91, 0x9e, 0x08, 0xfe, 0x88, 0xc4, 0x9c, 0x07, 0x08,
+ 0xfe, 0x79, 0xc7, 0xc7, 0x74, 0x08, 0xfe, 0x20, 0x8a, 0x08, 0xfe, 0x61,
+ 0xc4, 0x1e, 0x1a, 0x08, 0xfe, 0x10, 0xc4, 0x0f, 0x1f, 0x08, 0xfe, 0x59,
+ 0xc8, 0x1e, 0x16, 0x08, 0xfe, 0x41, 0x0a, 0x42, 0xc9, 0x7e, 0x46, 0xcf,
+ 0x4d, 0xc2, 0xc9, 0x8a, 0xc8, 0xaf, 0xd2, 0x08, 0xfe, 0x18, 0xc2, 0x00,
+ 0xd0, 0x08, 0xb4, 0xb9, 0x83, 0x08, 0xb4, 0xb0, 0xc2, 0x00, 0xd0, 0x08,
+ 0xb4, 0xa9, 0x83, 0x08, 0xb4, 0xa0, 0xc3, 0x71, 0xf0, 0x00, 0xd5, 0x58,
+ 0xc3, 0x71, 0xf0, 0x00, 0xd5, 0x48, 0xca, 0xa2, 0x2e, 0x00, 0xd3, 0xe1,
+ 0x46, 0x28, 0xb0, 0x42, 0xc9, 0x92, 0xc4, 0x68, 0x94, 0x00, 0xd2, 0xc0,
+ 0x83, 0x00, 0xd2, 0xe1, 0x46, 0x30, 0xa0, 0x42, 0xc9, 0x9e, 0xc5, 0x2c,
+ 0xf5, 0x00, 0xd2, 0xd1, 0xca, 0xa1, 0xc0, 0x00, 0xd2, 0xb8, 0xc5, 0x00,
+ 0xd4, 0x00, 0xd3, 0x99, 0xc5, 0x05, 0x02, 0x00, 0xd3, 0x60, 0x87, 0x00,
+ 0xd3, 0x40, 0x87, 0x00, 0xd2, 0x98, 0xc2, 0x00, 0xd0, 0x00, 0xd2, 0x61,
+ 0xc2, 0x19, 0x2c, 0x00, 0xd1, 0xf9, 0x12, 0xc2, 0xc9, 0xaa, 0xc2, 0x00,
+ 0x87, 0x00, 0xd1, 0xe1, 0x16, 0xc2, 0xc9, 0xb4, 0xc5, 0x3c, 0xf5, 0x00,
+ 0xd1, 0x81, 0x05, 0xc2, 0xc9, 0xbe, 0xc2, 0x0d, 0xf6, 0x00, 0xd1, 0x51,
+ 0x0d, 0x42, 0xc9, 0xc8, 0x83, 0x00, 0xd2, 0x41, 0xc2, 0x0d, 0xf6, 0x00,
+ 0xd2, 0x39, 0xc2, 0x00, 0xd0, 0x00, 0xd2, 0x30, 0xc2, 0x00, 0xd0, 0x00,
+ 0xd1, 0xc9, 0x83, 0x00, 0xd1, 0xc0, 0xc2, 0x00, 0xd0, 0x00, 0xd1, 0x99,
+ 0x83, 0x00, 0xd1, 0x90, 0xc2, 0x00, 0xd0, 0x00, 0xd1, 0x41, 0x83, 0x00,
+ 0xd1, 0x38, 0xc2, 0x8d, 0x8f, 0x00, 0xd1, 0x11, 0xc2, 0x00, 0xd0, 0x00,
+ 0xd1, 0x09, 0x83, 0x00, 0xd1, 0x00, 0xc2, 0x00, 0xc1, 0x00, 0xd1, 0x89,
+ 0xc2, 0x01, 0x6f, 0x00, 0xd1, 0x68, 0x83, 0x05, 0x55, 0xc8, 0xc2, 0x01,
+ 0x23, 0x05, 0x54, 0xf9, 0x91, 0x05, 0x54, 0xe8, 0x91, 0x05, 0x54, 0xc9,
+ 0xc2, 0x0f, 0x7b, 0x05, 0x54, 0x49, 0xc2, 0x42, 0xcd, 0x05, 0x54, 0x88,
+ 0xc2, 0x01, 0x23, 0x05, 0x54, 0xb9, 0x91, 0x05, 0x54, 0xa8, 0x91, 0x05,
+ 0x54, 0x59, 0xc2, 0x01, 0x23, 0x05, 0x54, 0x68, 0x0a, 0xc2, 0xc9, 0xd8,
+ 0x91, 0x05, 0x54, 0x08, 0xc2, 0x01, 0x23, 0x05, 0x54, 0xf1, 0x91, 0x05,
+ 0x54, 0xe0, 0x91, 0x05, 0x54, 0xc1, 0xc2, 0x0f, 0x7b, 0x05, 0x54, 0x41,
+ 0xc2, 0x42, 0xcd, 0x05, 0x54, 0x80, 0xc2, 0x01, 0x23, 0x05, 0x54, 0xb1,
+ 0x91, 0x05, 0x54, 0xa0, 0xc2, 0x01, 0x23, 0x05, 0x54, 0x61, 0x91, 0x05,
+ 0x54, 0x50, 0x0a, 0xc2, 0xc9, 0xe2, 0x91, 0x05, 0x54, 0x00, 0xd5, 0x03,
+ 0xd2, 0x01, 0x5c, 0xd1, 0xc9, 0x03, 0xde, 0x01, 0x3d, 0x10, 0xc2, 0x10,
+ 0x37, 0x00, 0x3c, 0xd8, 0xc4, 0xd9, 0x21, 0x00, 0x3c, 0xf9, 0xc6, 0xb4,
+ 0xdc, 0x00, 0x3c, 0x88, 0xc4, 0xe2, 0xd7, 0x00, 0x3c, 0xe9, 0xc7, 0xb4,
+ 0xdb, 0x00, 0x3c, 0x08, 0xc6, 0xb4, 0xdc, 0x00, 0x3c, 0x91, 0x83, 0x00,
+ 0x3c, 0xe0, 0xc5, 0xd9, 0xd4, 0x00, 0x70, 0x09, 0x42, 0x01, 0x23, 0x42,
+ 0xc9, 0xec, 0xc6, 0xcf, 0x47, 0x00, 0x70, 0x39, 0x43, 0xcf, 0x48, 0xc2,
+ 0xc9, 0xf6, 0xc7, 0xc8, 0x38, 0x00, 0x72, 0x68, 0xc2, 0x00, 0xd1, 0x00,
+ 0x70, 0x43, 0x02, 0xca, 0x00, 0xc3, 0x00, 0x74, 0x00, 0x70, 0x49, 0xc2,
+ 0x49, 0x0c, 0x00, 0x70, 0x60, 0x42, 0x01, 0x7c, 0xc2, 0xca, 0x04, 0x44,
+ 0x14, 0x3d, 0x42, 0xca, 0x0e, 0x43, 0xe6, 0x14, 0xc2, 0xca, 0x2b, 0xc7,
+ 0xca, 0x68, 0x00, 0x72, 0x70, 0xc5, 0xdc, 0x90, 0x00, 0x70, 0x71, 0xc3,
+ 0x13, 0x4b, 0x00, 0x70, 0xa0, 0x42, 0x01, 0x7c, 0xc2, 0xca, 0x37, 0x0a,
+ 0x42, 0xca, 0x43, 0xc5, 0xd9, 0xc5, 0x00, 0x70, 0xd9, 0x0a, 0xc2, 0xca,
+ 0x4f, 0xc8, 0xb8, 0x7a, 0x00, 0x71, 0x78, 0xc3, 0x05, 0xad, 0x00, 0x70,
+ 0xeb, 0x02, 0xca, 0x5b, 0xc5, 0xd9, 0x7f, 0x00, 0x72, 0x78, 0xc4, 0x42,
+ 0x6d, 0x00, 0x71, 0x09, 0x42, 0x02, 0xfa, 0x42, 0xca, 0x5f, 0xc5, 0xd9,
+ 0xc0, 0x00, 0x71, 0x19, 0x97, 0x00, 0x71, 0x20, 0x42, 0x01, 0x7c, 0xc2,
+ 0xca, 0x6f, 0x97, 0x00, 0x71, 0x31, 0xca, 0xa4, 0xd6, 0x00, 0x72, 0x28,
+ 0xc3, 0x00, 0x7d, 0x00, 0x71, 0x59, 0xc6, 0xcc, 0x35, 0x00, 0x71, 0x70,
+ 0xc2, 0x10, 0x11, 0x0f, 0x15, 0x61, 0x87, 0x0f, 0x15, 0x3b, 0x02, 0xca,
+ 0x7b, 0x8b, 0x0f, 0x15, 0x12, 0x02, 0xca, 0x7f, 0xc6, 0x7b, 0xb6, 0x0e,
+ 0x98, 0xf1, 0xc3, 0x05, 0xaf, 0x0e, 0x98, 0xa9, 0xc7, 0xc5, 0x1a, 0x0e,
+ 0x98, 0x58, 0xc5, 0xdb, 0x55, 0x0e, 0x99, 0x61, 0xc6, 0xd0, 0x5b, 0x0e,
+ 0x98, 0xd8, 0xca, 0xa1, 0xfc, 0x0f, 0xab, 0xe0, 0xd1, 0x50, 0x9b, 0x00,
+ 0x60, 0x01, 0xce, 0x29, 0x32, 0x00, 0x60, 0x20, 0x83, 0x00, 0x60, 0x31,
+ 0x8b, 0x00, 0x60, 0x81, 0x97, 0x00, 0x60, 0xa0, 0x8b, 0x00, 0x60, 0x40,
+ 0x97, 0x00, 0x60, 0x50, 0x47, 0xb2, 0x2e, 0xc2, 0xca, 0x83, 0x83, 0x00,
+ 0x61, 0xa8, 0x87, 0x00, 0x60, 0x78, 0x91, 0x00, 0x60, 0x98, 0x83, 0x00,
+ 0x60, 0xa9, 0xc2, 0x00, 0xd0, 0x00, 0x60, 0xb0, 0x83, 0x00, 0x60, 0xb9,
+ 0xc2, 0x00, 0xd0, 0x00, 0x60, 0xc0, 0xc2, 0x01, 0x30, 0x00, 0x60, 0xc9,
+ 0xc2, 0x19, 0x2c, 0x00, 0x60, 0xf1, 0xc2, 0x00, 0xc1, 0x00, 0x61, 0x19,
+ 0x83, 0x00, 0x61, 0x42, 0x02, 0xca, 0x91, 0x83, 0x00, 0x60, 0xd1, 0xc2,
+ 0x00, 0xd0, 0x00, 0x60, 0xd8, 0x83, 0x00, 0x60, 0xe1, 0xc2, 0x00, 0xd0,
+ 0x00, 0x60, 0xe8, 0x16, 0xc2, 0xca, 0x97, 0x83, 0x00, 0x61, 0x21, 0xc2,
+ 0x00, 0xd0, 0x00, 0x61, 0x29, 0xc2, 0x0d, 0xf6, 0x00, 0x62, 0xc0, 0x06,
+ 0xc2, 0xca, 0xa1, 0x83, 0x00, 0x61, 0x31, 0xc2, 0x00, 0xd0, 0x00, 0x61,
+ 0x39, 0xc2, 0x02, 0x1c, 0x00, 0x62, 0xc8, 0x83, 0x00, 0x61, 0x51, 0xc2,
+ 0x00, 0xd0, 0x00, 0x61, 0x58, 0x83, 0x00, 0x61, 0x61, 0xc2, 0x00, 0xd0,
+ 0x00, 0x61, 0x68, 0x83, 0x00, 0x61, 0x81, 0x14, 0x42, 0xca, 0xab, 0x83,
+ 0x00, 0x61, 0x91, 0x0e, 0x42, 0xca, 0xb5, 0xc2, 0x00, 0xd0, 0x00, 0x61,
+ 0xb1, 0xc2, 0x0d, 0xf6, 0x00, 0x61, 0xb9, 0x83, 0x00, 0x61, 0xc0, 0x94,
+ 0x00, 0x62, 0x20, 0x8e, 0x00, 0x63, 0x18, 0xd2, 0x15, 0xf0, 0x00, 0x63,
+ 0xd1, 0xd3, 0x45, 0xbf, 0x00, 0x63, 0xe8, 0xd2, 0x15, 0xf0, 0x00, 0x63,
+ 0xd9, 0xd3, 0x45, 0xbf, 0x00, 0x63, 0xf0, 0xd0, 0x03, 0xb7, 0x01, 0x4b,
+ 0x91, 0xcf, 0x09, 0xf8, 0x01, 0x5a, 0x48, 0xcb, 0x93, 0x9e, 0x01, 0x53,
+ 0x59, 0xc9, 0x16, 0x14, 0x01, 0x53, 0x50, 0x8e, 0x08, 0xa5, 0xc0, 0x94,
+ 0x08, 0xa5, 0xb0, 0x8e, 0x08, 0xa4, 0x4b, 0x02, 0xca, 0xbf, 0x94, 0x08,
+ 0xa4, 0x3a, 0x02, 0xca, 0xc3, 0xc2, 0x00, 0xd0, 0x08, 0xa4, 0xe1, 0x83,
+ 0x08, 0xa4, 0xd8, 0xc2, 0x00, 0xd0, 0x08, 0xa4, 0xd1, 0x83, 0x08, 0xa4,
+ 0xc8, 0xca, 0xa5, 0xc6, 0x00, 0x7e, 0x38, 0xc9, 0xb3, 0x17, 0x00, 0x7e,
+ 0x31, 0xc6, 0xcf, 0x83, 0x00, 0x7e, 0x40, 0x00, 0x42, 0xca, 0xc7, 0x45,
+ 0xda, 0xbf, 0xc2, 0xca, 0xd9, 0x44, 0xe3, 0xef, 0x42, 0xca, 0xe3, 0x83,
+ 0x00, 0x7c, 0x81, 0xc2, 0x00, 0xd0, 0x00, 0x7c, 0x89, 0xc3, 0x1d, 0x35,
+ 0x00, 0x7d, 0xc8, 0x83, 0x00, 0x7c, 0x91, 0xc2, 0x00, 0xd0, 0x00, 0x7c,
+ 0x98, 0xc2, 0x01, 0x30, 0x00, 0x7c, 0xa1, 0xc2, 0x19, 0x2c, 0x00, 0x7c,
+ 0xc9, 0xc2, 0x00, 0xc1, 0x00, 0x7c, 0xf1, 0x83, 0x00, 0x7d, 0x18, 0x83,
+ 0x00, 0x7c, 0xa9, 0xc2, 0x00, 0xd0, 0x00, 0x7c, 0xb0, 0x16, 0xc2, 0xca,
+ 0xed, 0x83, 0x00, 0x7c, 0xf9, 0xc2, 0x00, 0xd0, 0x00, 0x7d, 0x01, 0x15,
+ 0x42, 0xca, 0xf7, 0x06, 0xc2, 0xcb, 0x01, 0x83, 0x00, 0x7d, 0x09, 0xc2,
+ 0x00, 0xd0, 0x00, 0x7d, 0x11, 0x1c, 0x42, 0xcb, 0x0b, 0x83, 0x00, 0x7d,
+ 0x21, 0xc2, 0x00, 0xd0, 0x00, 0x7d, 0x28, 0x83, 0x00, 0x7d, 0x31, 0xc2,
+ 0x00, 0xd0, 0x00, 0x7d, 0x38, 0xc2, 0x00, 0xd0, 0x00, 0x7d, 0x71, 0x83,
+ 0x00, 0x7d, 0x78, 0xc2, 0x00, 0xd0, 0x00, 0x7d, 0xa1, 0xc2, 0x0d, 0xf6,
+ 0x00, 0x7d, 0xa9, 0x83, 0x00, 0x7d, 0xb0, 0xc2, 0x01, 0x4a, 0x00, 0x7d,
+ 0xd1, 0xc2, 0x19, 0x2c, 0x00, 0x7d, 0xd9, 0xc2, 0x00, 0x39, 0x00, 0x7d,
+ 0xe0, 0xcb, 0x90, 0x0d, 0x00, 0x78, 0x09, 0x44, 0xe3, 0xbf, 0x42, 0xcb,
+ 0x15, 0xcb, 0x98, 0xfd, 0x00, 0x78, 0x99, 0xcc, 0x7c, 0xc3, 0x00, 0x79,
+ 0xb0, 0xca, 0x9c, 0x52, 0x00, 0x78, 0x49, 0xd4, 0x39, 0x44, 0x00, 0x7e,
+ 0x80, 0xc5, 0x01, 0xe1, 0x00, 0x78, 0x80, 0x83, 0x00, 0x7a, 0x51, 0xc2,
+ 0x00, 0xd0, 0x00, 0x7a, 0x58, 0x83, 0x00, 0x7a, 0xc9, 0xc2, 0x00, 0xd0,
+ 0x00, 0x7a, 0xd0, 0x83, 0x00, 0x7a, 0x61, 0xc2, 0x00, 0xd0, 0x00, 0x7a,
+ 0x68, 0x83, 0x00, 0x7a, 0xd9, 0xc2, 0x00, 0xd0, 0x00, 0x7a, 0xe0, 0x8a,
+ 0x01, 0x69, 0x90, 0x8a, 0x01, 0x6a, 0xb2, 0x02, 0xcb, 0x21, 0x8a, 0x01,
+ 0x69, 0xc1, 0x86, 0x01, 0x69, 0xca, 0x02, 0xcb, 0x25, 0x8a, 0x01, 0x6a,
+ 0x2a, 0x02, 0xcb, 0x29, 0x8a, 0x01, 0x6a, 0x18, 0x8a, 0x01, 0x6a, 0x51,
+ 0x9c, 0x01, 0x6b, 0x28, 0x94, 0x01, 0x6a, 0xa8, 0x95, 0x01, 0x6a, 0xd1,
+ 0x8a, 0x01, 0x6a, 0xd8, 0x8a, 0x01, 0x6a, 0xe9, 0x96, 0x01, 0x6a, 0xf8,
+ 0x8a, 0x01, 0x6a, 0x30, 0x90, 0x01, 0x6a, 0x81, 0x8a, 0x01, 0x6a, 0xb8,
+ 0x49, 0x19, 0x61, 0xc2, 0xcb, 0x2d, 0xce, 0x6f, 0xd2, 0x07, 0xef, 0xd8,
+ 0x48, 0x19, 0x6b, 0xc2, 0xcb, 0x45, 0x48, 0xab, 0xf5, 0x42, 0xcb, 0x5d,
+ 0x0a, 0xc2, 0xcb, 0x7b, 0x49, 0xb2, 0x6c, 0xc2, 0xcb, 0x87, 0x03, 0xc2,
+ 0xcb, 0xaf, 0xd4, 0x39, 0x6c, 0x07, 0xef, 0xf0, 0x44, 0x2b, 0xb9, 0xc2,
+ 0xcb, 0xb9, 0x45, 0x19, 0x60, 0xc2, 0xcb, 0xc5, 0x46, 0x30, 0xc1, 0xc2,
+ 0xcb, 0xcf, 0x4d, 0x06, 0x5a, 0x42, 0xcb, 0xdb, 0x48, 0x92, 0x78, 0xc2,
+ 0xcb, 0xe7, 0x0e, 0xc2, 0xcb, 0xff, 0xd2, 0x4b, 0x29, 0x07, 0xef, 0x99,
+ 0xcb, 0x90, 0x65, 0x07, 0xef, 0xf8, 0x03, 0xc2, 0xcc, 0x11, 0x0a, 0xc2,
+ 0xcc, 0x1d, 0x48, 0xab, 0xf5, 0x42, 0xcc, 0x29, 0x0a, 0xc2, 0xcc, 0x5d,
+ 0x45, 0x19, 0x60, 0xc2, 0xcc, 0x67, 0x44, 0x2b, 0xb9, 0xc2, 0xcc, 0x7d,
+ 0x4d, 0x06, 0x5a, 0xc2, 0xcc, 0x89, 0x46, 0x50, 0xf0, 0xc2, 0xcc, 0x95,
+ 0x45, 0x30, 0xc1, 0xc2, 0xcc, 0xa1, 0xce, 0x72, 0xf0, 0x07, 0xe4, 0x89,
+ 0xcf, 0x69, 0x81, 0x07, 0xe4, 0x91, 0xcf, 0x60, 0x8a, 0x07, 0xe4, 0xa0,
+ 0x0a, 0xc2, 0xcc, 0xab, 0x44, 0x2b, 0xb9, 0xc2, 0xcc, 0xb7, 0x4d, 0x06,
+ 0x5a, 0xc2, 0xcc, 0xc3, 0x45, 0x19, 0x60, 0xc2, 0xcc, 0xcf, 0x46, 0x50,
+ 0xf0, 0xc2, 0xcc, 0xe5, 0x45, 0x30, 0xc1, 0xc2, 0xcc, 0xf1, 0xce, 0x72,
+ 0xf0, 0x07, 0xe4, 0x51, 0xcf, 0x69, 0x81, 0x07, 0xe4, 0x59, 0xcf, 0x60,
+ 0x8a, 0x07, 0xe4, 0x68, 0x48, 0x0f, 0x9b, 0xc2, 0xcc, 0xfb, 0x49, 0x19,
+ 0x6a, 0x42, 0xcd, 0x25, 0x44, 0x2b, 0xb9, 0xc2, 0xcd, 0x43, 0x45, 0x06,
+ 0x5a, 0xc2, 0xcd, 0x4f, 0x45, 0x19, 0x60, 0xc2, 0xcd, 0x67, 0x45, 0x50,
+ 0xf0, 0xc2, 0xcd, 0x7d, 0x0a, 0xc2, 0xcd, 0x87, 0x45, 0x30, 0xc1, 0x42,
+ 0xcd, 0x93, 0x03, 0xc2, 0xcd, 0x9d, 0xcd, 0x7e, 0x55, 0x07, 0xea, 0x58,
+ 0x44, 0x2b, 0xb9, 0xc2, 0xcd, 0xa9, 0x4d, 0x06, 0x5a, 0xc2, 0xcd, 0xb5,
+ 0x45, 0x19, 0x60, 0xc2, 0xcd, 0xc1, 0x45, 0x50, 0xf0, 0xc2, 0xcd, 0xcb,
+ 0x45, 0x50, 0xf1, 0xc2, 0xcd, 0xd5, 0x46, 0x30, 0xc1, 0x42, 0xcd, 0xe1,
+ 0x48, 0xab, 0xf5, 0xc2, 0xcd, 0xed, 0xdc, 0x12, 0xa9, 0x07, 0xef, 0xe8,
+ 0x46, 0x2b, 0xba, 0xc2, 0xce, 0x21, 0x03, 0x42, 0xce, 0x27, 0x49, 0x19,
+ 0x61, 0xc2, 0xce, 0x3c, 0xd5, 0x38, 0x2a, 0x07, 0xef, 0xa0, 0x0b, 0xc2,
+ 0xce, 0x60, 0xcb, 0x64, 0x7b, 0x07, 0xe9, 0xd8, 0x46, 0x50, 0x13, 0xc2,
+ 0xce, 0x6c, 0x45, 0x50, 0xf0, 0xc2, 0xce, 0x78, 0x44, 0x19, 0x6a, 0xc2,
+ 0xce, 0x82, 0x46, 0x30, 0xc1, 0xc2, 0xce, 0x8c, 0x44, 0x72, 0xf0, 0xc2,
+ 0xce, 0x98, 0x4d, 0x06, 0x5a, 0xc2, 0xce, 0xa4, 0x44, 0x2b, 0xb9, 0x42,
+ 0xce, 0xb0, 0x60, 0x08, 0x07, 0x42, 0xce, 0xbc, 0xc5, 0x05, 0x02, 0x00,
+ 0x47, 0xc9, 0xc5, 0x00, 0xd4, 0x00, 0x47, 0xb8, 0x08, 0xc2, 0xce, 0xc6,
+ 0x09, 0xc2, 0xce, 0xd8, 0x0e, 0xc2, 0xce, 0xf9, 0x42, 0x1c, 0x52, 0xc2,
+ 0xcf, 0x08, 0x03, 0xc2, 0xcf, 0x18, 0x0d, 0xc2, 0xcf, 0x34, 0x16, 0xc2,
+ 0xcf, 0x50, 0xc3, 0xd5, 0x25, 0x00, 0x33, 0xf3, 0x02, 0xcf, 0x78, 0x1b,
+ 0xc2, 0xcf, 0x85, 0x14, 0xc2, 0xcf, 0x95, 0x42, 0x00, 0x51, 0xc2, 0xcf,
+ 0xb6, 0x97, 0x00, 0x36, 0x3b, 0x02, 0xcf, 0xc6, 0xc3, 0x0f, 0x9a, 0x00,
+ 0x32, 0x13, 0x02, 0xcf, 0xd0, 0x87, 0x00, 0x36, 0x83, 0x02, 0xcf, 0xd4,
+ 0x42, 0x02, 0x1c, 0xc2, 0xcf, 0xd8, 0x15, 0xc2, 0xcf, 0xe8, 0x06, 0xc2,
+ 0xd0, 0x15, 0xc2, 0x00, 0x5f, 0x00, 0x36, 0x5b, 0x02, 0xd0, 0x37, 0xc3,
+ 0x7e, 0x89, 0x00, 0x32, 0x43, 0x02, 0xd0, 0x42, 0x0f, 0xc2, 0xd0, 0x46,
+ 0xc2, 0x49, 0x0c, 0x00, 0x36, 0x33, 0x02, 0xd0, 0x55, 0x10, 0xc2, 0xd0,
+ 0x59, 0x0a, 0x42, 0xd0, 0x72, 0xd3, 0x43, 0xd1, 0x00, 0x46, 0x91, 0xc5,
+ 0x05, 0x02, 0x00, 0x46, 0x79, 0xc5, 0x00, 0xd4, 0x00, 0x46, 0x70, 0x11,
+ 0xc2, 0xd0, 0x88, 0x03, 0x42, 0xd0, 0x94, 0xc3, 0x00, 0x49, 0x0f, 0x70,
+ 0x01, 0xc2, 0x00, 0x74, 0x0f, 0x70, 0x78, 0xc2, 0x00, 0x74, 0x0f, 0x70,
+ 0x31, 0x8a, 0x0f, 0x70, 0xd0, 0x03, 0xc2, 0xd0, 0x9c, 0xc2, 0x16, 0x1c,
+ 0x0f, 0x70, 0xa9, 0x0a, 0x42, 0xd0, 0xa6, 0xc2, 0x0f, 0x9b, 0x0f, 0x70,
+ 0x51, 0xc3, 0x14, 0x4b, 0x0f, 0x70, 0xb8, 0xc2, 0x00, 0xc2, 0x0f, 0x70,
+ 0x59, 0x46, 0xce, 0x45, 0x42, 0xd0, 0xb2, 0xc3, 0x03, 0x26, 0x0f, 0x70,
+ 0x71, 0xc4, 0xdf, 0x93, 0x0f, 0x70, 0xa1, 0x49, 0x9f, 0xe0, 0xc2, 0xd1,
+ 0x16, 0xc2, 0x01, 0x9d, 0x0f, 0x70, 0x88, 0xc3, 0x85, 0xf5, 0x0f, 0x71,
+ 0x09, 0xc4, 0x30, 0xc1, 0x0f, 0x71, 0x11, 0x0a, 0xc2, 0xd1, 0x66, 0xc3,
+ 0x2b, 0xb9, 0x0f, 0x71, 0x49, 0x0d, 0xc2, 0xd1, 0x72, 0xc3, 0x0d, 0xff,
+ 0x0f, 0x71, 0x59, 0xc4, 0x19, 0x60, 0x0f, 0x71, 0x61, 0xc4, 0x3a, 0x01,
+ 0x0f, 0x71, 0x69, 0x15, 0xc2, 0xd1, 0x7e, 0xc3, 0x03, 0x0c, 0x0f, 0x71,
+ 0x79, 0xc3, 0xb1, 0x0d, 0x0f, 0x71, 0x81, 0xc3, 0x0f, 0x9a, 0x0f, 0x71,
+ 0x91, 0x16, 0xc2, 0xd1, 0x90, 0xc3, 0xb2, 0x00, 0x0f, 0x71, 0xc9, 0xc5,
+ 0x92, 0x75, 0x0f, 0x71, 0xd8, 0xda, 0x19, 0x60, 0x0f, 0x77, 0x81, 0xcc,
+ 0x88, 0x95, 0x0f, 0x77, 0x88, 0x00, 0xc2, 0xd1, 0x9c, 0xc3, 0x13, 0x00,
+ 0x00, 0x32, 0x62, 0x02, 0xd1, 0xae, 0xc9, 0x33, 0xad, 0x00, 0x47, 0xe0,
+ 0xc9, 0x33, 0xad, 0x00, 0x47, 0xe8, 0x45, 0x00, 0x8c, 0xc2, 0xd1, 0xb4,
+ 0xcd, 0x00, 0xfa, 0x07, 0xf3, 0xb1, 0xcb, 0x64, 0x7b, 0x07, 0xf3, 0xb8,
+ 0xce, 0x00, 0xf9, 0x07, 0xf3, 0x80, 0x19, 0xc2, 0xd1, 0xc6, 0x15, 0xc2,
+ 0xd1, 0xd2, 0x08, 0xc2, 0xd1, 0xe4, 0xc4, 0x3a, 0x01, 0x00, 0x37, 0x43,
+ 0x02, 0xd1, 0xf0, 0xc3, 0x0f, 0x9a, 0x00, 0x46, 0xb9, 0xc3, 0x03, 0x0c,
+ 0x00, 0x46, 0xb1, 0x42, 0x02, 0x1c, 0xc2, 0xd1, 0xf6, 0xc3, 0x2b, 0xb9,
+ 0x00, 0x37, 0x3b, 0x02, 0xd2, 0x00, 0x0f, 0xc2, 0xd2, 0x06, 0xd4, 0x3c,
+ 0x3c, 0x00, 0x37, 0x09, 0xd8, 0x21, 0x53, 0x00, 0x37, 0x01, 0xcc, 0x8c,
+ 0x91, 0x00, 0x36, 0xf9, 0x16, 0xc2, 0xd2, 0x12, 0xc4, 0x30, 0xc1, 0x00,
+ 0x36, 0xd1, 0x0e, 0x42, 0xd2, 0x1e, 0xcf, 0x60, 0x7b, 0x00, 0x46, 0xc9,
+ 0x19, 0xc2, 0xd2, 0x2a, 0xc4, 0x19, 0x60, 0x00, 0x37, 0x69, 0xc4, 0xdf,
+ 0x6b, 0x00, 0x37, 0x18, 0xc7, 0xbf, 0x83, 0x00, 0x46, 0x49, 0xc3, 0x00,
+ 0xcf, 0x00, 0x30, 0xc0, 0x00, 0x42, 0xd2, 0x36, 0xc5, 0x05, 0x02, 0x07,
+ 0xde, 0x09, 0xc5, 0x00, 0xd4, 0x07, 0xde, 0x00, 0x48, 0x04, 0xe7, 0xc2,
+ 0xd2, 0x48, 0x4a, 0x0e, 0x7d, 0x42, 0xd2, 0x5a, 0xd7, 0x2b, 0x0c, 0x07,
+ 0xdd, 0xe1, 0x42, 0x00, 0x30, 0x42, 0xd2, 0x6c, 0xc5, 0x05, 0x02, 0x07,
+ 0xdd, 0xd9, 0xc5, 0x00, 0xd4, 0x07, 0xdd, 0xd0, 0x46, 0xd1, 0x0f, 0xc2,
+ 0xd2, 0x78, 0x03, 0x42, 0xd2, 0x84, 0xcf, 0x63, 0xf0, 0x00, 0x30, 0x99,
+ 0xd0, 0x5a, 0x82, 0x00, 0x30, 0x90, 0xcd, 0x00, 0xfa, 0x07, 0xf3, 0xe1,
+ 0xcb, 0x64, 0x7b, 0x07, 0xf3, 0xe8, 0xc5, 0x05, 0x02, 0x00, 0x47, 0x79,
+ 0xc5, 0x00, 0xd4, 0x00, 0x47, 0x60, 0xc5, 0x05, 0x02, 0x00, 0x47, 0x71,
+ 0xc5, 0x00, 0xd4, 0x00, 0x47, 0x58, 0xc5, 0x05, 0x02, 0x00, 0x47, 0x69,
+ 0xc5, 0x00, 0xd4, 0x00, 0x47, 0x50, 0x46, 0x00, 0x8b, 0x42, 0xd2, 0x9c,
+ 0xc3, 0x13, 0x00, 0x00, 0x47, 0x48, 0xc3, 0x13, 0x00, 0x00, 0x47, 0x40,
+ 0xc3, 0x13, 0x00, 0x00, 0x47, 0x38, 0x83, 0x00, 0x2b, 0xc9, 0xc2, 0x16,
+ 0x1c, 0x00, 0x2b, 0x98, 0x83, 0x00, 0x2a, 0x49, 0xc2, 0x16, 0x1c, 0x00,
+ 0x2a, 0x18, 0x9f, 0x0f, 0xbb, 0x31, 0xa0, 0x0f, 0xbb, 0x39, 0xa1, 0x0f,
+ 0xbb, 0x41, 0xa2, 0x0f, 0xbb, 0x48, 0xc2, 0xe5, 0xfd, 0x0f, 0xb9, 0x20,
+ 0xa1, 0x0f, 0xb9, 0xa9, 0x9f, 0x0f, 0xb9, 0x99, 0xa0, 0x0f, 0xb9, 0xa0,
+ 0xc8, 0x8c, 0x89, 0x0f, 0xb9, 0x83, 0x02, 0xd2, 0xae, 0xc4, 0x1a, 0x05,
+ 0x0f, 0xb8, 0xf8, 0x9f, 0x0f, 0xb8, 0x59, 0xa0, 0x0f, 0xb8, 0x60, 0x48,
+ 0xba, 0x6a, 0xc2, 0xd2, 0xb4, 0xc8, 0x8c, 0x89, 0x0f, 0xb9, 0x61, 0xc6,
+ 0x4c, 0x49, 0x0f, 0xb9, 0x10, 0xc8, 0x8c, 0x89, 0x0f, 0xb9, 0x69, 0xd2,
+ 0x4c, 0x49, 0x0f, 0xb9, 0x30, 0xc2, 0xe5, 0xfd, 0x0f, 0xb8, 0x48, 0xc2,
+ 0xe5, 0xfd, 0x0f, 0xb8, 0x38, 0x84, 0x0a, 0x21, 0xa1, 0x83, 0x0a, 0x21,
+ 0x98, 0x83, 0x0a, 0x21, 0x88, 0x83, 0x0a, 0x21, 0x60, 0x83, 0x0a, 0x21,
+ 0x48, 0x83, 0x0a, 0x20, 0xd8, 0x83, 0x0a, 0x20, 0x50, 0x83, 0x0a, 0x22,
+ 0x49, 0x84, 0x0a, 0x22, 0x51, 0x85, 0x0a, 0x22, 0x58, 0x83, 0x0a, 0x23,
+ 0x58, 0x83, 0x0a, 0x23, 0x68, 0x83, 0x0a, 0x23, 0x80, 0x83, 0x0a, 0x23,
+ 0x90, 0x83, 0x0a, 0x23, 0xa0, 0x83, 0x0a, 0x23, 0xb9, 0x84, 0x0a, 0x23,
+ 0xc1, 0x85, 0x0a, 0x23, 0xc8, 0x83, 0x0a, 0x23, 0xd9, 0x84, 0x0a, 0x23,
+ 0xe0, 0x83, 0x0a, 0x23, 0xf9, 0x84, 0x0a, 0x24, 0x01, 0x85, 0x0a, 0x24,
+ 0x08, 0x83, 0x0a, 0x24, 0x29, 0x84, 0x0a, 0x24, 0x30, 0x83, 0x0a, 0x24,
+ 0x60, 0x83, 0x0a, 0x24, 0xb8, 0x83, 0x0a, 0x25, 0x10, 0x83, 0x0a, 0x27,
+ 0x31, 0x84, 0x0a, 0x27, 0x38, 0x83, 0x0a, 0x27, 0x68, 0x83, 0x0a, 0x27,
+ 0x80, 0x83, 0x0a, 0x27, 0xb8, 0x83, 0x0a, 0x27, 0xc8, 0x83, 0x0a, 0x28,
+ 0x28, 0x83, 0x0a, 0x29, 0x70, 0x83, 0x0a, 0x2a, 0x28, 0x83, 0x0a, 0x2a,
+ 0x58, 0x83, 0x0a, 0x2a, 0x88, 0x83, 0x0a, 0x2a, 0xe0, 0x83, 0x0a, 0x2b,
+ 0x88, 0x83, 0x0a, 0x2b, 0xa1, 0x84, 0x0a, 0x2b, 0xa9, 0x85, 0x0a, 0x2b,
+ 0xb0, 0x83, 0x0a, 0x2b, 0xd9, 0x84, 0x0a, 0x2b, 0xe1, 0x85, 0x0a, 0x2b,
+ 0xe8, 0x83, 0x0a, 0x2c, 0xa8, 0x83, 0x0a, 0x2c, 0xd8, 0x83, 0x0a, 0x2d,
+ 0x00, 0x83, 0x0a, 0x2d, 0x20, 0x83, 0x0a, 0x2d, 0x78, 0xc9, 0xae, 0xa0,
+ 0x0a, 0x2d, 0x89, 0x83, 0x0a, 0x2d, 0x90, 0x83, 0x0a, 0x2d, 0xb0, 0xd4,
+ 0x3f, 0x0c, 0x0a, 0x2e, 0x71, 0xd3, 0x44, 0xc8, 0x0a, 0x2e, 0x78, 0x83,
+ 0x0a, 0x2f, 0xc0, 0x83, 0x0a, 0x30, 0x00, 0xc4, 0x0d, 0xe4, 0x01, 0x1b,
+ 0x01, 0xc5, 0x02, 0xd2, 0x01, 0x19, 0xe0, 0x43, 0x01, 0x47, 0xc2, 0xd2,
+ 0xc0, 0xc2, 0x05, 0x03, 0x01, 0x1a, 0xa3, 0x02, 0xd2, 0xcc, 0x0b, 0x42,
+ 0xd2, 0xd2, 0xc6, 0xcd, 0xc1, 0x01, 0x1a, 0x99, 0xcb, 0x03, 0xbc, 0x01,
+ 0x1a, 0x80, 0xcd, 0x09, 0xfa, 0x01, 0x1a, 0x39, 0xc7, 0x00, 0xcc, 0x01,
+ 0x1a, 0x18, 0xc3, 0xba, 0x27, 0x01, 0x1a, 0x71, 0xc8, 0x52, 0x09, 0x01,
+ 0x1a, 0x50, 0xd0, 0x5b, 0xd2, 0x01, 0x12, 0x90, 0x00, 0x42, 0xd2, 0xde,
+ 0xc9, 0x57, 0x20, 0x08, 0x09, 0x68, 0xc9, 0x57, 0x20, 0x08, 0x09, 0x60,
+ 0x00, 0x42, 0xd2, 0xea, 0x00, 0x42, 0xd2, 0xf6, 0xc9, 0x57, 0x20, 0x08,
+ 0x09, 0x78, 0x00, 0x42, 0xd3, 0x02, 0xc9, 0x57, 0x20, 0x08, 0x09, 0x70,
+ 0xc7, 0x0d, 0x04, 0x08, 0x08, 0xf1, 0xc8, 0x4b, 0x94, 0x08, 0x09, 0x38,
+ 0xc9, 0x57, 0x20, 0x08, 0x09, 0x80, 0xc7, 0x0d, 0x04, 0x08, 0x08, 0xf9,
+ 0xc8, 0x4b, 0x94, 0x08, 0x09, 0x40, 0xc9, 0x57, 0x20, 0x08, 0x09, 0x88,
+ 0xd5, 0x35, 0xf3, 0x0f, 0xdd, 0x78, 0x48, 0x1e, 0x57, 0xc2, 0xd3, 0x0e,
+ 0x11, 0x42, 0xd3, 0x26, 0x45, 0x02, 0x9a, 0x42, 0xd3, 0x35, 0xd0, 0x5e,
+ 0x32, 0x01, 0x2b, 0xe0, 0x47, 0x54, 0x42, 0xc2, 0xd3, 0x45, 0x49, 0x45,
+ 0xd2, 0x42, 0xd3, 0x51, 0x45, 0x02, 0x9a, 0x42, 0xd3, 0x5d, 0xc8, 0x00,
+ 0x5f, 0x01, 0x28, 0x51, 0xca, 0x01, 0x68, 0x01, 0x28, 0x40, 0xc8, 0x00,
+ 0x5f, 0x01, 0x28, 0x31, 0xca, 0x01, 0x68, 0x01, 0x28, 0x20, 0xce, 0x72,
+ 0xaa, 0x01, 0x2a, 0x51, 0xc8, 0x11, 0xff, 0x01, 0x29, 0xd1, 0xca, 0x11,
+ 0x34, 0x01, 0x29, 0x90, 0xce, 0x73, 0x44, 0x01, 0x29, 0xe9, 0xc8, 0x11,
+ 0x49, 0x01, 0x29, 0xa9, 0xca, 0x12, 0x12, 0x01, 0x29, 0x68, 0x0e, 0xc2,
+ 0xd3, 0x6f, 0xca, 0x01, 0x68, 0x01, 0x29, 0xd9, 0xc5, 0x00, 0x2c, 0x01,
+ 0x28, 0xb8, 0x45, 0x02, 0x9a, 0x42, 0xd3, 0x7b, 0xc8, 0x00, 0x5f, 0x01,
+ 0x2a, 0x79, 0xca, 0x01, 0x68, 0x01, 0x2a, 0x68, 0xca, 0x01, 0x68, 0x01,
+ 0x2a, 0x59, 0xc4, 0x00, 0x49, 0x01, 0x29, 0x59, 0xc5, 0x00, 0x2c, 0x01,
+ 0x29, 0x18, 0x45, 0x02, 0x9a, 0x42, 0xd3, 0x8d, 0xca, 0x01, 0x68, 0x01,
+ 0x2b, 0x49, 0xc4, 0x00, 0x49, 0x01, 0x2a, 0xe9, 0xc5, 0x00, 0x2c, 0x01,
+ 0x2a, 0xd0, 0xca, 0x01, 0x68, 0x01, 0x2b, 0x31, 0xc4, 0x00, 0x49, 0x01,
+ 0x2a, 0xb9, 0xc5, 0x00, 0x2c, 0x01, 0x2a, 0xa0, 0xd1, 0x53, 0x43, 0x01,
+ 0x2b, 0x29, 0xcb, 0x8d, 0x84, 0x01, 0x2a, 0xb1, 0xcc, 0x89, 0xd9, 0x01,
+ 0x2a, 0x98, 0xd1, 0x53, 0x32, 0x01, 0x2b, 0x21, 0xcb, 0x8e, 0xce, 0x01,
+ 0x2a, 0xa9, 0xcc, 0x87, 0xa5, 0x01, 0x2a, 0x90, 0xd3, 0x42, 0x7b, 0x01,
+ 0x2a, 0x39, 0xd0, 0x32, 0x71, 0x01, 0x29, 0x79, 0x45, 0x00, 0x49, 0xc2,
+ 0xd3, 0x9f, 0x46, 0x00, 0x2c, 0x42, 0xd3, 0xab, 0xd3, 0x41, 0xaa, 0x01,
+ 0x2a, 0x09, 0xd0, 0x32, 0x47, 0x01, 0x29, 0x81, 0x45, 0x00, 0x49, 0xc2,
+ 0xd3, 0xb7, 0x46, 0x00, 0x2c, 0x42, 0xd3, 0xc3, 0xca, 0x11, 0x34, 0x01,
+ 0x29, 0x51, 0xc5, 0x11, 0x39, 0x01, 0x28, 0xc8, 0xca, 0x11, 0x34, 0x01,
+ 0x29, 0x11, 0xc5, 0x11, 0x39, 0x01, 0x28, 0xa8, 0xca, 0x12, 0x12, 0x01,
+ 0x29, 0x31, 0xc5, 0x07, 0xeb, 0x01, 0x28, 0xd0, 0xca, 0x12, 0x12, 0x01,
+ 0x28, 0xf1, 0xc5, 0x07, 0xeb, 0x01, 0x28, 0xb0, 0xa3, 0x0f, 0xd9, 0xb0,
+ 0xa2, 0x0f, 0xd8, 0xab, 0x02, 0xd3, 0xcf, 0xa1, 0x0f, 0xd8, 0x73, 0x02,
+ 0xd3, 0xd3, 0xa3, 0x0f, 0xd9, 0x28, 0xa3, 0x0f, 0xd9, 0x80, 0xa3, 0x0f,
+ 0xd9, 0x41, 0xa2, 0x0f, 0xd8, 0xca, 0x02, 0xd3, 0xdb, 0xa3, 0x0f, 0xd9,
+ 0x51, 0xa2, 0x0f, 0xd8, 0xda, 0x02, 0xd3, 0xdf, 0xa3, 0x0f, 0xd9, 0xc8,
+ 0xa3, 0x0f, 0xd9, 0x59, 0xa2, 0x0f, 0xd8, 0xe2, 0x02, 0xd3, 0xe3, 0xa3,
+ 0x0f, 0xd9, 0x98, 0xa3, 0x0f, 0xd9, 0xb8, 0xca, 0xa7, 0x92, 0x0f, 0xd2,
+ 0x4b, 0x02, 0xd3, 0xe7, 0x0d, 0xc2, 0xd3, 0xed, 0xc4, 0xe3, 0x93, 0x01,
+ 0x32, 0xfb, 0x02, 0xd3, 0xff, 0xc6, 0xca, 0xfd, 0x01, 0x32, 0xeb, 0x02,
+ 0xd4, 0x05, 0xc4, 0xde, 0x83, 0x01, 0x32, 0xe3, 0x02, 0xd4, 0x0b, 0xc5,
+ 0xa8, 0xf7, 0x01, 0x32, 0xdb, 0x02, 0xd4, 0x11, 0x47, 0x45, 0x86, 0x42,
+ 0xd4, 0x17, 0x4e, 0x6e, 0xe4, 0xc2, 0xd4, 0x33, 0x4e, 0x0e, 0x14, 0xc2,
+ 0xd4, 0x3f, 0x4c, 0x12, 0xe1, 0xc2, 0xd4, 0x4b, 0x4f, 0x61, 0x3e, 0x42,
+ 0xd4, 0x57, 0x00, 0x42, 0xd4, 0x63, 0xc6, 0x0b, 0x09, 0x0f, 0xbc, 0x69,
+ 0xc6, 0x02, 0xd1, 0x0f, 0xbc, 0x20, 0xca, 0x82, 0xd3, 0x01, 0x31, 0xd9,
+ 0x44, 0x03, 0x15, 0x42, 0xd4, 0x6f, 0x00, 0x42, 0xd4, 0x7f, 0xc6, 0x0b,
+ 0x09, 0x0f, 0xbc, 0x61, 0xc7, 0x3a, 0x19, 0x0f, 0xbc, 0xb9, 0xc7, 0x0a,
+ 0xe0, 0x0f, 0xbc, 0xe8, 0x4a, 0x01, 0xa9, 0xc2, 0xd4, 0x91, 0xd8, 0x24,
+ 0xcb, 0x0f, 0xad, 0x19, 0xdb, 0x03, 0xcc, 0x01, 0x5c, 0xf8, 0x00, 0x42,
+ 0xd4, 0xa9, 0x47, 0xbe, 0x33, 0xc2, 0xd4, 0xc7, 0xc5, 0xdd, 0x76, 0x0f,
+ 0x99, 0x10, 0x4a, 0x01, 0xa9, 0xc2, 0xd4, 0xd3, 0x46, 0x01, 0x4a, 0xc2,
+ 0xd4, 0xf5, 0x4a, 0x03, 0x3d, 0x42, 0xd5, 0x0a, 0x4a, 0x01, 0xa9, 0xc2,
+ 0xd5, 0x16, 0x00, 0xc2, 0xd5, 0x37, 0x46, 0x01, 0x4a, 0x42, 0xd5, 0x43,
+ 0x44, 0x00, 0x28, 0xc2, 0xd5, 0x4f, 0xc5, 0x0a, 0xe2, 0x01, 0x4f, 0x58,
+ 0xc6, 0x0b, 0x09, 0x01, 0x58, 0xd9, 0xc6, 0x02, 0xd1, 0x01, 0x59, 0x20,
+ 0xc6, 0x04, 0xa1, 0x01, 0x39, 0xf9, 0xc2, 0x00, 0xb3, 0x01, 0x34, 0x88,
+ 0xcf, 0x66, 0xde, 0x01, 0x39, 0x31, 0xc4, 0x18, 0xb3, 0x0f, 0xad, 0xf8,
+ 0x15, 0xc2, 0xd5, 0x5b, 0x06, 0xc2, 0xd5, 0x67, 0xd4, 0x3c, 0x14, 0x01,
+ 0x1f, 0xb3, 0x02, 0xd5, 0x76, 0xd7, 0x2a, 0x0f, 0x01, 0x1f, 0xab, 0x02,
+ 0xd5, 0x7c, 0x0e, 0x42, 0xd5, 0x82, 0x44, 0x00, 0x67, 0xc2, 0xd5, 0x91,
+ 0x4a, 0x01, 0xa9, 0xc2, 0xd5, 0x9d, 0xd8, 0x24, 0xcb, 0x0f, 0xad, 0x11,
+ 0xdb, 0x03, 0xcc, 0x01, 0x5c, 0xe8, 0xc3, 0x08, 0x7b, 0x0f, 0xad, 0x23,
+ 0x02, 0xd5, 0xb5, 0xc5, 0xc2, 0xc2, 0x01, 0x59, 0x10, 0xc7, 0xc6, 0xef,
+ 0x01, 0x4e, 0xb9, 0xd0, 0x5a, 0x62, 0x01, 0x59, 0x60, 0xc4, 0x2b, 0xf1,
+ 0x0f, 0x9f, 0x91, 0xc5, 0xbb, 0xcd, 0x01, 0x58, 0xf8, 0xc9, 0x46, 0x70,
+ 0x01, 0x2d, 0x71, 0xc7, 0x5a, 0x6b, 0x01, 0x59, 0x70, 0xc6, 0x0b, 0x09,
+ 0x01, 0x58, 0xe9, 0xc7, 0x3a, 0x19, 0x0f, 0xbc, 0xc1, 0xc7, 0x0a, 0xe0,
+ 0x0f, 0xbc, 0xf0, 0x9a, 0x01, 0x30, 0x83, 0x02, 0xd5, 0xbb, 0xcb, 0x8e,
+ 0xa2, 0x0f, 0xaf, 0xb0, 0xc8, 0xb6, 0xb2, 0x00, 0xdb, 0xf0, 0xc3, 0x00,
+ 0x74, 0x00, 0xdb, 0xe1, 0xc3, 0x38, 0x86, 0x00, 0xdb, 0xc9, 0xc3, 0x01,
+ 0x95, 0x00, 0xdb, 0xc0, 0xc2, 0x14, 0x49, 0x00, 0xdb, 0xd9, 0xc2, 0x06,
+ 0x4e, 0x00, 0xdb, 0xd0, 0xc2, 0x06, 0x4e, 0x00, 0xdb, 0xb9, 0xc2, 0x14,
+ 0x49, 0x00, 0xdb, 0xb0, 0xc2, 0x00, 0xb3, 0x00, 0xdb, 0xa9, 0xc2, 0x0b,
+ 0x47, 0x00, 0xdb, 0xa0, 0xc2, 0x01, 0x0f, 0x00, 0xdb, 0x73, 0x02, 0xd5,
+ 0xc1, 0xc2, 0x03, 0x66, 0x00, 0xdb, 0x6a, 0x02, 0xd5, 0xc7, 0xc2, 0x00,
+ 0x75, 0x00, 0xdb, 0x23, 0x02, 0xd5, 0xcd, 0xc3, 0x00, 0x74, 0x00, 0xdb,
+ 0x49, 0xc3, 0x0a, 0xe3, 0x00, 0xdb, 0x38, 0xc3, 0x38, 0x86, 0x00, 0xdb,
+ 0x41, 0xc2, 0x00, 0x75, 0x00, 0xdb, 0x10, 0xc7, 0xc2, 0x6c, 0x00, 0xd8,
+ 0x30, 0x00, 0x42, 0xd5, 0xd1, 0xc7, 0xc7, 0x20, 0x00, 0xda, 0x29, 0xca,
+ 0x60, 0x26, 0x00, 0xd8, 0xa0, 0xc2, 0x00, 0xb0, 0x00, 0xd9, 0x89, 0xc2,
+ 0x01, 0x30, 0x00, 0xd9, 0x80, 0xc7, 0xc2, 0x6c, 0x00, 0xd8, 0x70, 0xc7,
+ 0xc2, 0x6c, 0x00, 0xd8, 0x60, 0xc7, 0xbf, 0xef, 0x00, 0xd9, 0x08, 0xc3,
+ 0x1b, 0xe8, 0x00, 0xd9, 0x29, 0x45, 0x60, 0x22, 0x42, 0xd5, 0xe3, 0x00,
+ 0x42, 0xd5, 0xef, 0x0d, 0xc2, 0xd5, 0xfe, 0x97, 0x0b, 0x50, 0x21, 0xc4,
+ 0xdf, 0x8b, 0x0b, 0x51, 0xc1, 0x15, 0xc2, 0xd6, 0x1a, 0x16, 0xc2, 0xd6,
+ 0x34, 0x8f, 0x0b, 0x50, 0x8b, 0x02, 0xd6, 0x3e, 0x14, 0xc2, 0xd6, 0x50,
+ 0x0e, 0xc2, 0xd6, 0x5c, 0x19, 0xc2, 0xd6, 0x6a, 0xc3, 0xe6, 0x0e, 0x0b,
+ 0x51, 0x59, 0x12, 0xc2, 0xd6, 0x74, 0x10, 0xc2, 0xd6, 0x7e, 0x1b, 0xc2,
+ 0xd6, 0xa9, 0xc2, 0x02, 0xe0, 0x0b, 0x50, 0x30, 0x09, 0xc2, 0xd6, 0xb3,
+ 0x19, 0xc2, 0xd6, 0xbd, 0x0d, 0xc2, 0xd6, 0xc7, 0x10, 0xc2, 0xd6, 0xdd,
+ 0x16, 0xc2, 0xd7, 0x0a, 0x12, 0xc2, 0xd7, 0x1a, 0x14, 0xc2, 0xd7, 0x37,
+ 0x15, 0xc2, 0xd7, 0x47, 0x0e, 0xc2, 0xd7, 0x61, 0x18, 0xc2, 0xd7, 0x73,
+ 0x0f, 0xc2, 0xd7, 0x7d, 0x08, 0xc2, 0xd7, 0xb5, 0x1b, 0xc2, 0xd7, 0xcc,
+ 0x8b, 0x0b, 0x4e, 0xc1, 0x91, 0x0b, 0x4e, 0xb9, 0x83, 0x0b, 0x4e, 0xa8,
+ 0x10, 0xc2, 0xd7, 0xe6, 0x0e, 0xc2, 0xd8, 0x06, 0x8f, 0x0b, 0x4a, 0x8b,
+ 0x02, 0xd8, 0x1c, 0x16, 0xc2, 0xd8, 0x42, 0x0d, 0xc2, 0xd8, 0x5d, 0x15,
+ 0xc2, 0xd8, 0x74, 0x08, 0xc2, 0xd8, 0x8c, 0x1b, 0xc2, 0xd8, 0x98, 0x14,
+ 0xc2, 0xd8, 0xa8, 0x12, 0xc2, 0xd8, 0xba, 0x42, 0x00, 0x09, 0xc2, 0xd8,
+ 0xce, 0x19, 0x42, 0xd8, 0xda, 0x0d, 0xc2, 0xd8, 0xe6, 0x15, 0xc2, 0xd8,
+ 0xfa, 0x16, 0xc2, 0xd9, 0x08, 0x12, 0xc2, 0xd9, 0x18, 0x0e, 0xc2, 0xd9,
+ 0x22, 0x10, 0xc2, 0xd9, 0x30, 0x0f, 0xc2, 0xd9, 0x52, 0x1b, 0xc2, 0xd9,
+ 0x6c, 0x19, 0xc2, 0xd9, 0x7c, 0xc2, 0x17, 0x99, 0x0b, 0x46, 0x19, 0x43,
+ 0x2c, 0xdc, 0xc2, 0xd9, 0x88, 0xc4, 0xe3, 0x03, 0x0b, 0x46, 0x01, 0xc3,
+ 0xe6, 0x29, 0x0b, 0x45, 0xe1, 0x09, 0x42, 0xd9, 0x92, 0x10, 0xc2, 0xd9,
+ 0x9e, 0x0f, 0xc2, 0xd9, 0xb6, 0x12, 0xc2, 0xd9, 0xd1, 0x47, 0xc0, 0x19,
+ 0xc2, 0xd9, 0xe9, 0x0d, 0xc2, 0xd9, 0xf3, 0x0e, 0xc2, 0xda, 0x03, 0x42,
+ 0x14, 0xda, 0xc2, 0xda, 0x13, 0x15, 0xc2, 0xda, 0x1d, 0x16, 0xc2, 0xda,
+ 0x3b, 0xc5, 0xd5, 0xfb, 0x0b, 0x43, 0xb1, 0xc4, 0xa6, 0xdc, 0x0b, 0x43,
+ 0x99, 0x1b, 0x42, 0xda, 0x47, 0xc3, 0x0a, 0x85, 0x0b, 0x42, 0x91, 0x15,
+ 0xc2, 0xda, 0x53, 0x16, 0xc2, 0xda, 0x6d, 0x0d, 0xc2, 0xda, 0x7d, 0x0f,
+ 0xc2, 0xda, 0x91, 0x10, 0xc2, 0xda, 0xb1, 0x0e, 0xc2, 0xda, 0xe7, 0x12,
+ 0xc2, 0xdb, 0x00, 0x17, 0xc2, 0xdb, 0x16, 0xc3, 0x00, 0x79, 0x0b, 0x41,
+ 0xd1, 0xc4, 0xe0, 0x17, 0x0b, 0x41, 0xc9, 0x09, 0x42, 0xdb, 0x22, 0xc7,
+ 0xc8, 0x5b, 0x00, 0xdf, 0xf9, 0xc9, 0xaf, 0xc0, 0x00, 0xdf, 0xe8, 0x49,
+ 0xa9, 0x09, 0x42, 0xdb, 0x2e, 0xc2, 0x00, 0xdb, 0x00, 0xde, 0xf9, 0xc2,
+ 0x19, 0x2c, 0x00, 0xde, 0xe1, 0xc2, 0x0d, 0xf6, 0x00, 0xde, 0xc9, 0xc2,
+ 0x01, 0xc3, 0x00, 0xde, 0xa9, 0xc2, 0x00, 0x39, 0x00, 0xde, 0x99, 0xc2,
+ 0x01, 0x30, 0x00, 0xde, 0x79, 0xc2, 0x01, 0x4a, 0x00, 0xde, 0x61, 0xc2,
+ 0x00, 0xb0, 0x00, 0xde, 0x41, 0xc2, 0x00, 0xd0, 0x00, 0xde, 0x19, 0x83,
+ 0x00, 0xde, 0x08, 0xc6, 0xcd, 0x7f, 0x00, 0x4e, 0x70, 0x46, 0x00, 0x8b,
+ 0x42, 0xdb, 0x40, 0xc2, 0x00, 0xd0, 0x00, 0x4d, 0x11, 0x83, 0x00, 0x4d,
+ 0x08, 0xc2, 0x00, 0xd0, 0x00, 0x4d, 0x01, 0x83, 0x00, 0x4c, 0xf8, 0x94,
+ 0x00, 0x4c, 0x5b, 0x02, 0xdb, 0x4c, 0x8e, 0x00, 0x4c, 0x62, 0x02, 0xdb,
+ 0x50, 0xc4, 0x1e, 0x97, 0x00, 0x4e, 0x69, 0xc5, 0x40, 0xe7, 0x00, 0x4c,
+ 0x18, 0xc7, 0x7a, 0x7f, 0x00, 0x4d, 0xe9, 0xc7, 0x14, 0x39, 0x00, 0x4c,
+ 0x10, 0x94, 0x00, 0x4e, 0x20, 0x8e, 0x00, 0x4f, 0x18, 0xda, 0x1c, 0x04,
+ 0x00, 0x4f, 0xc0, 0xc2, 0x02, 0xa0, 0x00, 0x4f, 0xa9, 0xc4, 0x02, 0xde,
+ 0x00, 0x4f, 0xb0, 0xc2, 0x00, 0x64, 0x00, 0xd0, 0x79, 0x83, 0x00, 0xd0,
+ 0x70, 0xc2, 0x02, 0x2b, 0x00, 0xd0, 0x19, 0x83, 0x00, 0xd0, 0x10, 0xa5,
+ 0x01, 0x46, 0x00, 0x9f, 0x01, 0x40, 0x1b, 0x02, 0xdb, 0x54, 0xa0, 0x01,
+ 0x40, 0x2b, 0x02, 0xdb, 0x7b, 0xa1, 0x01, 0x40, 0x4b, 0x02, 0xdb, 0x9b,
+ 0xa2, 0x01, 0x40, 0x8b, 0x02, 0xdb, 0xb4, 0xa3, 0x01, 0x41, 0x0b, 0x02,
+ 0xdb, 0xc6, 0xa5, 0x01, 0x44, 0x09, 0xa4, 0x01, 0x42, 0x0a, 0x02, 0xdb,
+ 0xd1, 0xa0, 0x01, 0x40, 0x33, 0x02, 0xdb, 0xd5, 0xa1, 0x01, 0x40, 0x53,
+ 0x02, 0xdb, 0xf5, 0xa2, 0x01, 0x40, 0x93, 0x02, 0xdc, 0x0e, 0xa3, 0x01,
+ 0x41, 0x13, 0x02, 0xdc, 0x20, 0xa5, 0x01, 0x44, 0x11, 0xa4, 0x01, 0x42,
+ 0x12, 0x02, 0xdc, 0x2b, 0xa1, 0x01, 0x40, 0x63, 0x02, 0xdc, 0x2f, 0xa2,
+ 0x01, 0x40, 0xa3, 0x02, 0xdc, 0x48, 0xa3, 0x01, 0x41, 0x23, 0x02, 0xdc,
+ 0x5a, 0xa5, 0x01, 0x44, 0x21, 0xa4, 0x01, 0x42, 0x22, 0x02, 0xdc, 0x65,
+ 0xa2, 0x01, 0x40, 0xc3, 0x02, 0xdc, 0x69, 0xa3, 0x01, 0x41, 0x43, 0x02,
+ 0xdc, 0x7b, 0xa5, 0x01, 0x44, 0x41, 0xa4, 0x01, 0x42, 0x42, 0x02, 0xdc,
+ 0x86, 0xa3, 0x01, 0x41, 0x83, 0x02, 0xdc, 0x8a, 0xa5, 0x01, 0x44, 0x81,
+ 0xa4, 0x01, 0x42, 0x82, 0x02, 0xdc, 0x95, 0xa5, 0x01, 0x45, 0x01, 0xa4,
+ 0x01, 0x43, 0x02, 0x02, 0xdc, 0x99, 0xc8, 0x4b, 0x94, 0x08, 0x83, 0x29,
+ 0xc7, 0x0d, 0x04, 0x08, 0x83, 0x20, 0xc2, 0x0d, 0x10, 0x08, 0x83, 0x08,
+ 0xc2, 0x0d, 0x10, 0x08, 0x83, 0x00, 0xc3, 0x45, 0x6b, 0x08, 0x82, 0xf9,
+ 0xc2, 0x00, 0x5f, 0x08, 0x82, 0xb0, 0xc3, 0x0d, 0x0f, 0x08, 0x82, 0xf1,
+ 0xc2, 0x00, 0x33, 0x08, 0x82, 0xa8, 0xc4, 0x0d, 0x0e, 0x08, 0x82, 0xe9,
+ 0xc3, 0x02, 0xdf, 0x08, 0x82, 0xa0, 0xc4, 0x18, 0x12, 0x08, 0x82, 0xe1,
+ 0x91, 0x08, 0x82, 0x98, 0x42, 0x02, 0xa7, 0xc2, 0xdc, 0x9d, 0x46, 0x2e,
+ 0xee, 0xc2, 0xdc, 0xa7, 0xc4, 0xd8, 0xde, 0x08, 0x81, 0xb9, 0xc3, 0x7e,
+ 0x5e, 0x08, 0x81, 0xb0, 0xc2, 0x00, 0xd0, 0x08, 0x81, 0x01, 0x83, 0x08,
+ 0x80, 0xf8, 0xc2, 0x00, 0xd0, 0x08, 0x80, 0xf1, 0x83, 0x08, 0x80, 0xe8,
+ 0x8e, 0x08, 0x80, 0x6b, 0x02, 0xdc, 0xaf, 0x94, 0x08, 0x80, 0x5a, 0x02,
+ 0xdc, 0xb3, 0x4f, 0x66, 0x39, 0x42, 0xdc, 0xb7, 0x97, 0x08, 0x82, 0x29,
+ 0x8b, 0x08, 0x82, 0x19, 0x83, 0x08, 0x81, 0xc0, 0x8e, 0x08, 0x82, 0x03,
+ 0x02, 0xdc, 0xbf, 0x94, 0x08, 0x81, 0xf2, 0x02, 0xdc, 0xc3, 0x97, 0x08,
+ 0x81, 0xe8, 0x8b, 0x08, 0x81, 0xd8, 0xc4, 0x18, 0x10, 0x08, 0x83, 0x69,
+ 0xc2, 0x22, 0xcc, 0x08, 0x83, 0x60, 0xc3, 0x0d, 0x14, 0x08, 0x83, 0x59,
+ 0xc3, 0x09, 0x9e, 0x08, 0x83, 0x50, 0xc4, 0x02, 0xde, 0x08, 0x83, 0x49,
+ 0xc2, 0x02, 0xa0, 0x08, 0x83, 0x40, 0x44, 0xe3, 0xbb, 0xc2, 0xdc, 0xc7,
+ 0x4e, 0x6b, 0x44, 0xc2, 0xdc, 0xd3, 0xc8, 0x9c, 0xe0, 0x0e, 0x80, 0xb0,
+ 0xc4, 0x99, 0xff, 0x0e, 0x87, 0x99, 0xc4, 0xe4, 0xa7, 0x0e, 0x87, 0x89,
+ 0xc3, 0x2e, 0xd7, 0x0e, 0x82, 0x78, 0x44, 0xe3, 0xbb, 0xc2, 0xdc, 0xdf,
+ 0xc8, 0x9c, 0xe0, 0x0e, 0x80, 0xe0, 0x00, 0xc2, 0xdc, 0xf1, 0xc2, 0x01,
+ 0x6f, 0x0e, 0x81, 0x90, 0xc8, 0xbb, 0x0a, 0x0e, 0x82, 0xa1, 0xc8, 0xad,
+ 0x15, 0x0e, 0x82, 0x60, 0x42, 0x02, 0x32, 0xc2, 0xdc, 0xfb, 0x95, 0x0e,
+ 0x80, 0x8a, 0x02, 0xdd, 0x07, 0xc3, 0x63, 0x2b, 0x0e, 0x84, 0x21, 0xc8,
+ 0x9c, 0xe0, 0x0e, 0x81, 0x10, 0x16, 0xc2, 0xdd, 0x0b, 0xc7, 0xc3, 0x22,
+ 0x0e, 0x87, 0x18, 0x16, 0xc2, 0xdd, 0x17, 0xc7, 0xc3, 0x22, 0x0e, 0x86,
+ 0xf8, 0xc3, 0x63, 0x2b, 0x0e, 0x83, 0x29, 0xcc, 0x84, 0x5d, 0x0e, 0x81,
+ 0x59, 0xc8, 0x9c, 0xe0, 0x0e, 0x81, 0x50, 0x4f, 0x6b, 0x43, 0x42, 0xdd,
+ 0x23, 0xc7, 0xc0, 0xf9, 0x0e, 0x86, 0xe9, 0xc5, 0xcc, 0xcc, 0x0e, 0x86,
+ 0xe1, 0x46, 0xca, 0xf1, 0x42, 0xdd, 0x2f, 0x42, 0x00, 0x2c, 0xc2, 0xdd,
+ 0x3b, 0xcc, 0x2e, 0x8a, 0x0e, 0x86, 0x78, 0xd5, 0x35, 0xb4, 0x0e, 0x86,
+ 0xb9, 0xc8, 0x2e, 0x8e, 0x0e, 0x86, 0x68, 0xc6, 0xcc, 0xcb, 0x0e, 0x80,
+ 0x58, 0xc6, 0xd2, 0x5f, 0x0e, 0x86, 0x31, 0xc5, 0x1a, 0x11, 0x0e, 0x86,
+ 0x28, 0x42, 0x02, 0x32, 0xc2, 0xdd, 0x47, 0xc3, 0x09, 0xe5, 0x0e, 0x85,
+ 0xd8, 0xc2, 0x00, 0x45, 0x0e, 0x85, 0xc1, 0x83, 0x0e, 0x81, 0xa8, 0xce,
+ 0x6d, 0x78, 0x0e, 0x85, 0x99, 0xc5, 0x6d, 0x65, 0x0e, 0x85, 0x58, 0xcb,
+ 0x94, 0xbc, 0x0e, 0x85, 0x91, 0xc7, 0x6d, 0x63, 0x0e, 0x85, 0x10, 0xcd,
+ 0x7a, 0xfb, 0x0e, 0x85, 0x49, 0xc5, 0x6d, 0x65, 0x0e, 0x85, 0x40, 0xc6,
+ 0x92, 0x38, 0x0e, 0x85, 0x39, 0xc9, 0x6d, 0x7d, 0x0e, 0x85, 0x30, 0xca,
+ 0x94, 0x18, 0x0e, 0x83, 0x71, 0xc8, 0xb9, 0x3a, 0x0e, 0x83, 0x58, 0xc3,
+ 0x63, 0x2b, 0x0e, 0x83, 0x19, 0x03, 0x42, 0xdd, 0x53, 0xc7, 0xc5, 0x05,
+ 0x0e, 0x83, 0xc1, 0x48, 0xbf, 0x1a, 0x42, 0xdd, 0x5f, 0xcf, 0x65, 0xdf,
+ 0x0e, 0x84, 0x69, 0xcc, 0x85, 0xb9, 0x0e, 0x84, 0x60, 0xc4, 0x77, 0x35,
+ 0x0e, 0x82, 0xd0, 0xc3, 0x63, 0x2b, 0x0e, 0x82, 0xf9, 0xc8, 0x9c, 0xe0,
+ 0x0e, 0x81, 0xe8, 0x00, 0x42, 0xdd, 0x6b, 0xc9, 0xad, 0x14, 0x0e, 0x82,
+ 0x59, 0x8b, 0x0e, 0x82, 0x48, 0x5b, 0x18, 0xc0, 0xc2, 0xdd, 0x77, 0x46,
+ 0x02, 0xae, 0x42, 0xdd, 0x83, 0xc6, 0x0b, 0x09, 0x01, 0x3a, 0x89, 0xc6,
+ 0x02, 0xd1, 0x0f, 0xa9, 0xf0, 0xc6, 0x04, 0xe1, 0x0f, 0xda, 0x09, 0xc5,
+ 0x00, 0x2c, 0x0f, 0xda, 0x10, 0x55, 0x16, 0xaa, 0xc2, 0xdd, 0x95, 0x48,
+ 0x0a, 0x53, 0xc2, 0xdd, 0xa7, 0x4a, 0x13, 0xe3, 0x42, 0xdd, 0xb3, 0xc7,
+ 0x16, 0x16, 0x01, 0x52, 0x91, 0x45, 0x00, 0x5a, 0x42, 0xdd, 0xbf, 0xc7,
+ 0x80, 0x70, 0x01, 0x52, 0xf1, 0xc8, 0x52, 0x09, 0x01, 0x53, 0x00, 0x46,
+ 0x00, 0x2c, 0xc2, 0xdd, 0xcb, 0x46, 0x01, 0xc8, 0xc2, 0xdd, 0xd5, 0x46,
+ 0x02, 0xae, 0x42, 0xdd, 0xe1, 0xc9, 0xb2, 0x75, 0x0f, 0xaf, 0x71, 0xca,
+ 0x0b, 0x94, 0x01, 0x80, 0x42, 0x02, 0xdd, 0xed, 0xcc, 0x12, 0x2d, 0x01,
+ 0x59, 0x81, 0xcc, 0x8a, 0xed, 0x01, 0x59, 0x90, 0xe0, 0x09, 0xa7, 0x0f,
+ 0xdc, 0xa0, 0x46, 0x00, 0x8b, 0x42, 0xdd, 0xf3, 0x44, 0x04, 0x91, 0xc2,
+ 0xde, 0x03, 0xc3, 0x04, 0x20, 0x01, 0x2c, 0x60, 0x00, 0x42, 0xde, 0x0f,
+ 0x46, 0x00, 0x8b, 0x42, 0xde, 0x1b, 0xc9, 0xb0, 0x6b, 0x01, 0x0d, 0x69,
+ 0xca, 0x01, 0xfd, 0x01, 0x58, 0x20, 0xcc, 0x84, 0x99, 0x01, 0x1d, 0x19,
+ 0xc9, 0x57, 0x36, 0x01, 0x1d, 0x11, 0xcc, 0x80, 0xcd, 0x01, 0x1d, 0x09,
+ 0x45, 0x00, 0x8c, 0x42, 0xde, 0x27, 0xca, 0xa2, 0x74, 0x01, 0x1d, 0x49,
+ 0xcc, 0x82, 0xe9, 0x01, 0x1d, 0x41, 0xca, 0xa3, 0x5a, 0x01, 0x1d, 0x38,
+ 0xcd, 0x3f, 0xe8, 0x01, 0x2c, 0x69, 0xce, 0x08, 0x79, 0x01, 0x2c, 0x50,
+ 0xd6, 0x31, 0x40, 0x01, 0x4e, 0x79, 0xd6, 0x14, 0xf9, 0x0f, 0xdb, 0x60,
+ 0xcc, 0x00, 0x33, 0x01, 0x4c, 0x19, 0xcd, 0x69, 0x65, 0x01, 0x80, 0x70,
+ 0xcc, 0x84, 0x15, 0x01, 0x4a, 0x81, 0xca, 0xa4, 0x18, 0x01, 0x4a, 0x58,
+ 0xcc, 0x84, 0x15, 0x01, 0x4a, 0x51, 0xca, 0xa4, 0x18, 0x01, 0x4a, 0x70,
+ 0xca, 0x03, 0xdd, 0x0f, 0xc4, 0x81, 0x48, 0x01, 0x9a, 0x42, 0xde, 0x45,
+ 0xc5, 0x01, 0xa2, 0x01, 0x0e, 0xd1, 0xca, 0x52, 0xc2, 0x01, 0x48, 0x70,
+ 0x46, 0x02, 0x5c, 0xc2, 0xde, 0x5a, 0xd1, 0x52, 0xbb, 0x01, 0x59, 0xb8,
+ 0xd9, 0x1f, 0xf9, 0x0f, 0xc0, 0x21, 0x15, 0xc2, 0xde, 0x66, 0x42, 0x00,
+ 0x58, 0xc2, 0xde, 0x72, 0xcf, 0x2c, 0x35, 0x01, 0x0f, 0xb9, 0x0e, 0xc2,
+ 0xde, 0x7e, 0xc4, 0x01, 0x23, 0x01, 0x0d, 0x49, 0x16, 0xc2, 0xde, 0x8a,
+ 0xca, 0x9e, 0x28, 0x01, 0x4a, 0x31, 0xd5, 0x03, 0xd2, 0x0f, 0xc0, 0xa1,
+ 0xcc, 0x84, 0xb1, 0x0f, 0xc4, 0xc0, 0x43, 0x10, 0x9e, 0xc2, 0xde, 0x99,
+ 0x47, 0x25, 0xf3, 0x42, 0xde, 0xa8, 0xd1, 0x56, 0x73, 0x01, 0x48, 0xf8,
+ 0x45, 0x00, 0xd5, 0xc2, 0xde, 0xb8, 0x43, 0x02, 0x9c, 0x42, 0xde, 0xd0,
+ 0x00, 0xc2, 0xde, 0xd6, 0xc5, 0x14, 0xa5, 0x01, 0x48, 0xd8, 0xd7, 0x2a,
+ 0x26, 0x01, 0x0e, 0x59, 0x4a, 0x01, 0x58, 0x42, 0xde, 0xe2, 0xc6, 0x0e,
+ 0xe0, 0x01, 0x53, 0xf9, 0xc5, 0x00, 0xd4, 0x01, 0x54, 0x0a, 0x02, 0xde,
+ 0xee, 0xc8, 0x23, 0xa0, 0x01, 0x54, 0x69, 0xd2, 0x09, 0xd5, 0x01, 0x54,
+ 0x78, 0xe0, 0x07, 0xc7, 0x01, 0x54, 0x98, 0xe0, 0x08, 0x87, 0x01, 0x3b,
+ 0x98, 0xc4, 0x11, 0xa4, 0x01, 0x5e, 0x61, 0xc4, 0x0e, 0x6a, 0x0f, 0xbe,
+ 0x20, 0xcf, 0x15, 0x36, 0x0f, 0xbd, 0x79, 0xd2, 0x22, 0x49, 0x0f, 0xbe,
+ 0x48, 0xc2, 0x00, 0x43, 0x05, 0x27, 0xc1, 0xc3, 0xe4, 0xfa, 0x05, 0x27,
+ 0xd1, 0xc2, 0x00, 0x6b, 0x05, 0x27, 0xd9, 0xc2, 0x00, 0xc1, 0x05, 0x27,
+ 0xe1, 0xc3, 0xe6, 0x02, 0x05, 0x27, 0xe8, 0xdd, 0x10, 0xfa, 0x01, 0x50,
+ 0x99, 0xdc, 0x12, 0xfd, 0x01, 0x50, 0x90, 0x1e, 0xc2, 0xde, 0xf4, 0x1d,
+ 0xc2, 0xdf, 0x1e, 0xc7, 0xc8, 0x15, 0x08, 0x3a, 0xa1, 0xc5, 0xd6, 0x1e,
+ 0x08, 0x3a, 0xa8, 0x23, 0xc2, 0xdf, 0x52, 0x1d, 0xc2, 0xdf, 0x66, 0x1e,
+ 0xc2, 0xdf, 0x86, 0x1f, 0xc2, 0xdf, 0xae, 0x20, 0xc2, 0xdf, 0xd2, 0x21,
+ 0xc2, 0xdf, 0xde, 0x22, 0x42, 0xdf, 0xfe, 0x9d, 0x08, 0x3b, 0x01, 0x9e,
+ 0x08, 0x3b, 0x09, 0x9f, 0x08, 0x3b, 0x11, 0xa0, 0x08, 0x3b, 0x19, 0xa1,
+ 0x08, 0x3b, 0x21, 0xa2, 0x08, 0x3b, 0x29, 0xa3, 0x08, 0x3b, 0x31, 0xa4,
+ 0x08, 0x3b, 0x38, 0x1d, 0xc2, 0xe0, 0x22, 0x1e, 0x42, 0xe0, 0x46, 0xc6,
+ 0xcf, 0x41, 0x08, 0x32, 0x39, 0xc3, 0xe6, 0x50, 0x08, 0x32, 0x79, 0xc3,
+ 0xe6, 0x5c, 0x08, 0x32, 0x50, 0x1d, 0xc2, 0xe0, 0x6c, 0x1e, 0xc2, 0xe0,
+ 0x90, 0x1f, 0xc2, 0xe0, 0xb8, 0x20, 0xc2, 0xe0, 0xe0, 0x21, 0xc2, 0xe1,
+ 0x08, 0x22, 0xc2, 0xe1, 0x30, 0x23, 0xc2, 0xe1, 0x58, 0x24, 0x42, 0xe1,
+ 0x80, 0x1d, 0xc2, 0xe1, 0x88, 0x1e, 0x42, 0xe1, 0xc4, 0x1d, 0xc2, 0xe1,
+ 0xfa, 0x1e, 0xc2, 0xe2, 0x1a, 0x1f, 0xc2, 0xe2, 0x32, 0x20, 0xc2, 0xe2,
+ 0x56, 0x21, 0xc2, 0xe2, 0x7a, 0x22, 0xc2, 0xe2, 0x96, 0x23, 0xc2, 0xe2,
+ 0xba, 0x24, 0xc2, 0xe2, 0xd2, 0x25, 0xc2, 0xe2, 0xfa, 0x26, 0x42, 0xe3,
+ 0x22, 0x49, 0xae, 0x8e, 0xc2, 0xe3, 0x3a, 0x47, 0xc3, 0x7d, 0x42, 0xe3,
+ 0x62, 0x04, 0xc2, 0xe3, 0x8a, 0x48, 0xbf, 0x62, 0x42, 0xe3, 0x92, 0x1e,
+ 0xc2, 0xe3, 0xa2, 0xc9, 0xae, 0x2b, 0x08, 0x06, 0x90, 0x83, 0x00, 0xc9,
+ 0xa1, 0xc2, 0x01, 0x30, 0x00, 0xc9, 0x88, 0x91, 0x00, 0xc9, 0x28, 0x87,
+ 0x00, 0xc9, 0x18, 0x97, 0x00, 0xc9, 0x31, 0x8b, 0x00, 0xc9, 0x20, 0xc6,
+ 0x0b, 0x09, 0x0f, 0xbf, 0x59, 0xc6, 0x02, 0xd1, 0x0f, 0xbf, 0x20, 0xc7,
+ 0x3a, 0x19, 0x0f, 0xa9, 0xb9, 0xc6, 0x02, 0xd1, 0x0f, 0xa9, 0xa9, 0xc6,
+ 0x0b, 0x09, 0x0f, 0xbf, 0x30, 0xdf, 0x0d, 0x5d, 0x08, 0x59, 0xf9, 0xdd,
+ 0x11, 0xc5, 0x08, 0x59, 0xe8, 0xc7, 0x3a, 0x19, 0x0f, 0xa9, 0xb1, 0xc6,
+ 0x02, 0xd1, 0x0f, 0xbf, 0x01, 0xc6, 0x0b, 0x09, 0x0f, 0xbf, 0x38, 0xdf,
+ 0x0c, 0xa3, 0x08, 0x59, 0xf1, 0xdd, 0x05, 0x0a, 0x08, 0x59, 0xe0, 0x95,
+ 0x00, 0x03, 0x9b, 0x02, 0xe3, 0xb0, 0x85, 0x00, 0x03, 0x1b, 0x02, 0xe3,
+ 0xd4, 0x96, 0x00, 0x03, 0xa3, 0x02, 0xe3, 0xf8, 0x91, 0x00, 0x03, 0x7b,
+ 0x02, 0xe4, 0x32, 0x8b, 0x00, 0x03, 0x4b, 0x02, 0xe4, 0x56, 0x86, 0x00,
+ 0x03, 0x23, 0x02, 0xe4, 0x6a, 0x87, 0x00, 0x03, 0x2b, 0x02, 0xe4, 0x8b,
+ 0x94, 0x00, 0x03, 0x93, 0x02, 0xe4, 0xb9, 0x8e, 0x00, 0x03, 0x63, 0x02,
+ 0xe4, 0xd2, 0x88, 0x00, 0x03, 0x33, 0x02, 0xe5, 0x01, 0x9b, 0x00, 0x03,
+ 0xcb, 0x02, 0xe5, 0x10, 0x8f, 0x00, 0x03, 0x6b, 0x02, 0xe5, 0x1c, 0x97,
+ 0x00, 0x03, 0xab, 0x02, 0xe5, 0x2e, 0x83, 0x00, 0x03, 0x0b, 0x02, 0xe5,
+ 0x4b, 0x99, 0x00, 0x03, 0xbb, 0x02, 0xe5, 0x7c, 0x8a, 0x00, 0x03, 0x43,
+ 0x02, 0xe5, 0x82, 0x9c, 0x00, 0x03, 0xd3, 0x02, 0xe5, 0x9b, 0x9a, 0x00,
+ 0x03, 0xc3, 0x02, 0xe5, 0xa1, 0x98, 0x00, 0x03, 0xb3, 0x02, 0xe5, 0xa7,
+ 0x92, 0x00, 0x03, 0x83, 0x02, 0xe5, 0xc3, 0x90, 0x00, 0x03, 0x73, 0x02,
+ 0xe5, 0xcf, 0x8d, 0x00, 0x03, 0x5b, 0x02, 0xe5, 0xdd, 0x89, 0x00, 0x03,
+ 0x3b, 0x02, 0xe5, 0xe9, 0x84, 0x00, 0x03, 0x13, 0x02, 0xe6, 0x01, 0x8c,
+ 0x00, 0x03, 0x53, 0x02, 0xe6, 0x23, 0x93, 0x00, 0x03, 0x8a, 0x02, 0xe6,
+ 0x29, 0xc2, 0x00, 0x15, 0x07, 0xd8, 0x31, 0xc8, 0xb8, 0xe2, 0x07, 0xd8,
+ 0x29, 0x08, 0xc2, 0xe6, 0x35, 0xc2, 0x00, 0x0b, 0x00, 0x09, 0x99, 0xc2,
+ 0x49, 0x0c, 0x00, 0x0a, 0x98, 0x46, 0x45, 0x87, 0x42, 0xe6, 0x44, 0x46,
+ 0x00, 0x8b, 0x42, 0xe6, 0x58, 0xc2, 0x25, 0xa1, 0x00, 0xe9, 0x19, 0xc2,
+ 0x00, 0x8e, 0x00, 0xe8, 0x30, 0x48, 0x10, 0x2f, 0xc2, 0xe6, 0x64, 0xcf,
+ 0x6a, 0x26, 0x05, 0x5a, 0x31, 0xc2, 0x01, 0xdf, 0x05, 0x3b, 0xb0, 0x97,
+ 0x00, 0xe8, 0xa9, 0xc5, 0xd4, 0x9d, 0x00, 0xe8, 0x81, 0x87, 0x00, 0x13,
+ 0xb0, 0xc7, 0xc3, 0x84, 0x00, 0xe8, 0x18, 0x87, 0x00, 0xe8, 0x08, 0xca,
+ 0x1f, 0x59, 0x00, 0x14, 0xd8, 0xc9, 0xab, 0xb5, 0x00, 0x14, 0x08, 0x46,
+ 0x00, 0x8b, 0xc2, 0xe6, 0x6c, 0xc3, 0x3c, 0x63, 0x00, 0x10, 0xe0, 0x45,
+ 0x04, 0xcc, 0xc2, 0xe6, 0xa3, 0x46, 0x00, 0x8b, 0x42, 0xe6, 0xaf, 0x00,
+ 0xc2, 0xe6, 0xc1, 0xc6, 0x10, 0x3f, 0x00, 0x0d, 0x88, 0x46, 0x00, 0x8b,
+ 0xc2, 0xe6, 0xcd, 0x91, 0x05, 0x3a, 0x71, 0xc4, 0x6d, 0xb5, 0x05, 0x3d,
+ 0xb1, 0xcb, 0x8e, 0xc3, 0x05, 0x3e, 0x01, 0x44, 0x05, 0x76, 0xc2, 0xe7,
+ 0x18, 0x8b, 0x00, 0x0d, 0x11, 0x97, 0x00, 0x11, 0x10, 0x46, 0x00, 0x8b,
+ 0xc2, 0xe7, 0x20, 0x95, 0x05, 0x3b, 0x61, 0x47, 0x67, 0x21, 0xc2, 0xe7,
+ 0x61, 0xc3, 0x01, 0xbb, 0x00, 0x0c, 0xb0, 0x46, 0x00, 0x8b, 0xc2, 0xe7,
+ 0x79, 0x4e, 0x73, 0x36, 0xc2, 0xe7, 0xbd, 0x96, 0x05, 0x3b, 0x53, 0x02,
+ 0xe7, 0xc9, 0xc2, 0x00, 0x75, 0x00, 0x0a, 0x51, 0xc2, 0x01, 0xe2, 0x00,
+ 0x0d, 0x49, 0xc2, 0x25, 0xa1, 0x00, 0x0d, 0xba, 0x02, 0xe7, 0xcd, 0x46,
+ 0x00, 0x8b, 0xc2, 0xe7, 0xd1, 0x87, 0x00, 0x06, 0x33, 0x02, 0xe8, 0x18,
+ 0x83, 0x05, 0x39, 0x91, 0x91, 0x05, 0x39, 0xa1, 0x97, 0x05, 0x39, 0xb1,
+ 0x98, 0x05, 0x39, 0xc3, 0x02, 0xe8, 0x1e, 0x9b, 0x05, 0x39, 0xe1, 0xca,
+ 0xa4, 0x72, 0x05, 0x3e, 0x11, 0xc4, 0xde, 0x3f, 0x01, 0x63, 0x69, 0xc8,
+ 0xbd, 0x8a, 0x00, 0x0c, 0x48, 0xc6, 0xa2, 0xbb, 0x00, 0xf4, 0xf1, 0x46,
+ 0x00, 0x8b, 0xc2, 0xe8, 0x24, 0xc7, 0xc8, 0xfc, 0x05, 0x3c, 0x59, 0x05,
+ 0xc2, 0xe8, 0x47, 0xc8, 0xbe, 0x02, 0x05, 0x3e, 0xc1, 0x45, 0x03, 0x14,
+ 0x42, 0xe8, 0x53, 0x46, 0x00, 0x8b, 0x42, 0xe8, 0x5f, 0x47, 0x01, 0x32,
+ 0x42, 0xe8, 0x83, 0x46, 0x00, 0x8b, 0xc2, 0xe8, 0x8f, 0xc3, 0x95, 0x51,
+ 0x00, 0x0f, 0xb8, 0x46, 0x00, 0x8b, 0xc2, 0xe8, 0xab, 0x9b, 0x05, 0x3b,
+ 0x01, 0xcb, 0x91, 0x15, 0x05, 0x3b, 0x11, 0xc3, 0x02, 0x39, 0x05, 0x3b,
+ 0x41, 0x47, 0xc8, 0xcb, 0x42, 0xe8, 0xbb, 0x46, 0x00, 0x8b, 0xc2, 0xe8,
+ 0xcd, 0x9c, 0x05, 0x39, 0x41, 0xc7, 0xc3, 0xa0, 0x05, 0x39, 0x51, 0xc4,
+ 0x2a, 0xcc, 0x00, 0x06, 0xf3, 0x02, 0xe8, 0xed, 0x46, 0x45, 0x87, 0xc2,
+ 0xe8, 0xf6, 0x44, 0x05, 0x14, 0x42, 0xe9, 0x1b, 0x00, 0xc2, 0xe9, 0x2d,
+ 0x48, 0x10, 0x2f, 0xc2, 0xe9, 0x39, 0xca, 0xa6, 0x66, 0x05, 0x3a, 0xe0,
+ 0x46, 0x00, 0x8b, 0x42, 0xe9, 0x4f, 0x46, 0x00, 0x8b, 0xc2, 0xe9, 0x6b,
+ 0x8c, 0x00, 0x0e, 0x50, 0x46, 0x00, 0x8b, 0xc2, 0xe9, 0x95, 0x8c, 0x00,
+ 0x0e, 0x38, 0x46, 0x00, 0x8b, 0x42, 0xe9, 0xbf, 0x46, 0x00, 0x8b, 0xc2,
+ 0xe9, 0xe8, 0xc4, 0xde, 0xa3, 0x00, 0x0f, 0xb1, 0xc3, 0x0a, 0xe3, 0x05,
+ 0x39, 0x31, 0xc5, 0xd3, 0x2c, 0x01, 0x63, 0xa8, 0x46, 0x00, 0x8b, 0xc2,
+ 0xea, 0x02, 0x47, 0x23, 0x34, 0xc2, 0xea, 0x30, 0xc4, 0x38, 0x2c, 0x00,
+ 0x0c, 0xa1, 0xc2, 0x00, 0xd0, 0x00, 0x0d, 0x10, 0x46, 0x00, 0x8b, 0x42,
+ 0xea, 0x42, 0x46, 0x00, 0x8b, 0xc2, 0xea, 0x54, 0x9c, 0x00, 0x0f, 0x8a,
+ 0x02, 0xea, 0x74, 0x46, 0x00, 0x8b, 0xc2, 0xea, 0x7a, 0xc2, 0x00, 0x0a,
+ 0x05, 0x3d, 0x99, 0xc8, 0xba, 0x4a, 0x05, 0x39, 0x63, 0x02, 0xea, 0xa2,
+ 0xc2, 0x00, 0x45, 0x05, 0x3b, 0x71, 0xcf, 0x67, 0x1a, 0x05, 0x3e, 0x80,
+ 0x46, 0x00, 0x8b, 0xc2, 0xea, 0xa8, 0xc3, 0x04, 0x87, 0x05, 0x3d, 0xa1,
+ 0xc7, 0xc9, 0xb9, 0x05, 0x3a, 0x30, 0x46, 0x00, 0x8b, 0x42, 0xea, 0xcc,
+ 0x46, 0x00, 0x8b, 0x42, 0xea, 0xd6, 0xc4, 0xdf, 0x43, 0x00, 0x74, 0x11,
+ 0xc3, 0x02, 0x45, 0x00, 0x74, 0x20, 0xc2, 0x0f, 0x7b, 0x00, 0x76, 0xf1,
+ 0xc3, 0x4d, 0xc3, 0x00, 0x76, 0xf8, 0xc2, 0x19, 0x2c, 0x00, 0x74, 0x71,
+ 0xc2, 0x00, 0xc1, 0x00, 0x74, 0x98, 0x83, 0x00, 0x74, 0x79, 0xc2, 0x00,
+ 0xd0, 0x00, 0x74, 0x80, 0x06, 0xc2, 0xea, 0xe2, 0xc2, 0x00, 0xd0, 0x00,
+ 0x74, 0xc0, 0xc5, 0x00, 0x2c, 0x0f, 0xda, 0xa9, 0xc6, 0x04, 0xe1, 0x0f,
+ 0xda, 0xa1, 0xcc, 0x04, 0xcb, 0x0f, 0xdb, 0x38, 0x46, 0x01, 0xc8, 0xc2,
+ 0xea, 0xec, 0xd2, 0x4b, 0x83, 0x0f, 0xdb, 0x18, 0xd2, 0x4b, 0x83, 0x0f,
+ 0xdb, 0x11, 0x46, 0x01, 0xc8, 0x42, 0xea, 0xf8, 0xc6, 0x04, 0xe1, 0x0f,
+ 0xda, 0xc9, 0xc5, 0x00, 0x2c, 0x0f, 0xda, 0xd1, 0xcc, 0x04, 0xcb, 0x0f,
+ 0xda, 0xe0, 0x46, 0x02, 0xae, 0xc2, 0xeb, 0x04, 0xd2, 0x4c, 0x37, 0x0f,
+ 0xda, 0xf0, 0xd2, 0x4c, 0x37, 0x0f, 0xda, 0xe9, 0x46, 0x02, 0xae, 0x42,
+ 0xeb, 0x10, 0x46, 0x00, 0x8b, 0x42, 0xeb, 0x1c, 0xd4, 0x3e, 0x6c, 0x01,
+ 0x5d, 0xc0, 0xc5, 0x01, 0xa2, 0x01, 0x5b, 0x0b, 0x02, 0xeb, 0x28, 0xcc,
+ 0x82, 0xb9, 0x01, 0x5b, 0x59, 0xcd, 0x7c, 0xa8, 0x01, 0x5c, 0x28, 0xd5,
+ 0x03, 0xd2, 0x0f, 0xc0, 0xa9, 0xd8, 0x22, 0x5b, 0x0f, 0xc0, 0x49, 0xd9,
+ 0x1f, 0xf9, 0x0f, 0xc0, 0x29, 0x46, 0x03, 0x13, 0xc2, 0xeb, 0x2c, 0xcd,
+ 0x75, 0xa6, 0x01, 0x0e, 0xf1, 0x44, 0x08, 0xba, 0xc2, 0xeb, 0x38, 0xd1,
+ 0x01, 0x68, 0x01, 0x48, 0x49, 0xcc, 0x84, 0xb1, 0x0f, 0xc4, 0xc8, 0x47,
+ 0x13, 0x6d, 0xc2, 0xeb, 0x44, 0xc6, 0x10, 0x9d, 0x01, 0x4a, 0xc1, 0xc8,
+ 0xae, 0xbc, 0x01, 0x4b, 0x00, 0xc8, 0xae, 0xbc, 0x01, 0x4a, 0xe1, 0xc6,
+ 0x10, 0x9d, 0x01, 0x4a, 0xa0, 0xe0, 0x0a, 0xe7, 0x01, 0x3a, 0x58, 0xd6,
+ 0x2e, 0x6a, 0x01, 0x39, 0xc1, 0xca, 0x22, 0x51, 0x0f, 0xbe, 0x79, 0xcd,
+ 0x0e, 0x61, 0x0f, 0xbe, 0x88, 0xc3, 0xe5, 0x8a, 0x0f, 0xb3, 0x29, 0xc9,
+ 0xb4, 0x91, 0x0f, 0xb2, 0xe8, 0xc5, 0x01, 0xa2, 0x01, 0x3c, 0xc1, 0x49,
+ 0x01, 0xaa, 0x42, 0xeb, 0x4e, 0xdd, 0x0a, 0x8a, 0x01, 0x3a, 0xe1, 0x44,
+ 0x05, 0x9e, 0x42, 0xeb, 0x5a, 0xcf, 0x15, 0x36, 0x0f, 0xbd, 0xc1, 0xd2,
+ 0x22, 0x49, 0x0f, 0xbe, 0x60, 0xc3, 0xe5, 0x8a, 0x0f, 0xb3, 0x31, 0xc9,
+ 0xb4, 0x91, 0x0f, 0xb2, 0xf0, 0xe0, 0x0c, 0x07, 0x01, 0x3d, 0x68, 0x44,
+ 0x00, 0x58, 0xc2, 0xeb, 0x60, 0x44, 0x07, 0x69, 0x42, 0xeb, 0x66, 0xd0,
+ 0x08, 0x97, 0x01, 0x3b, 0x81, 0xd7, 0x0a, 0x90, 0x01, 0x3b, 0x70, 0xd5,
+ 0x03, 0xd2, 0x0f, 0xc0, 0xc1, 0xdb, 0x17, 0x46, 0x0f, 0xc0, 0xe0, 0xd1,
+ 0x56, 0x0d, 0x01, 0x3a, 0x19, 0xc8, 0x0a, 0xff, 0x01, 0x39, 0xe8, 0xd0,
+ 0x20, 0x66, 0x01, 0x3d, 0xc9, 0xd0, 0x03, 0xb7, 0x01, 0x3d, 0xc1, 0xd0,
+ 0x3c, 0x90, 0x01, 0x3d, 0xb8, 0x47, 0x3b, 0x9c, 0xc2, 0xeb, 0x6c, 0xc5,
+ 0x1c, 0xae, 0x01, 0x3b, 0x20, 0xd9, 0x1e, 0x1e, 0x01, 0x37, 0x19, 0xcd,
+ 0x78, 0x30, 0x01, 0x5a, 0xb8, 0xdd, 0x0a, 0x8a, 0x01, 0x3a, 0xf1, 0x44,
+ 0x05, 0x9e, 0x42, 0xeb, 0x78, 0xd5, 0x03, 0xd2, 0x0f, 0xc0, 0xd9, 0xdb,
+ 0x17, 0x46, 0x0f, 0xc0, 0xf8, 0x46, 0x00, 0x8b, 0x42, 0xeb, 0x7e, 0xd0,
+ 0x08, 0x97, 0x01, 0x3b, 0x89, 0xd7, 0x0a, 0x90, 0x01, 0x3b, 0x78, 0x00,
+ 0x42, 0xeb, 0x8a, 0xc3, 0x4a, 0xb9, 0x00, 0x2f, 0x91, 0xc3, 0x04, 0xac,
+ 0x00, 0x2f, 0x80, 0xc4, 0xe4, 0x8b, 0x07, 0xda, 0x71, 0xc6, 0x64, 0xa4,
+ 0x07, 0xda, 0x20, 0xc4, 0xe4, 0x8b, 0x07, 0xda, 0x69, 0xc6, 0x64, 0xa4,
+ 0x07, 0xd9, 0xd8, 0xc4, 0xe4, 0x8b, 0x07, 0xda, 0x61, 0xc6, 0x64, 0xa4,
+ 0x07, 0xd9, 0x88, 0xc5, 0xd5, 0xa6, 0x07, 0xda, 0x59, 0xc6, 0x64, 0xa4,
+ 0x07, 0xd9, 0xa8, 0xcc, 0x84, 0x75, 0x07, 0xda, 0x50, 0xcc, 0x84, 0x75,
+ 0x07, 0xda, 0x30, 0xcc, 0x84, 0x75, 0x07, 0xd9, 0xc0, 0x46, 0x00, 0x8b,
+ 0x42, 0xeb, 0x96, 0xcc, 0x84, 0x75, 0x07, 0xda, 0x08, 0xcc, 0x84, 0x75,
+ 0x07, 0xda, 0x18, 0xcc, 0x84, 0x75, 0x07, 0xd9, 0xd0, 0xc6, 0x64, 0xa4,
+ 0x07, 0xd9, 0xc9, 0xc5, 0xd5, 0x83, 0x07, 0xd8, 0xe8, 0xc2, 0x00, 0x07,
+ 0x00, 0x2e, 0x83, 0x02, 0xeb, 0xa3, 0x4a, 0x9f, 0x18, 0x42, 0xeb, 0xa9,
+ 0xc6, 0xcc, 0x59, 0x00, 0x2e, 0x38, 0xc6, 0x44, 0x50, 0x00, 0x2e, 0x09,
+ 0xc3, 0x62, 0x7d, 0x00, 0x2d, 0x80, 0xce, 0x6d, 0xda, 0x00, 0x2d, 0xd0,
+ 0xc6, 0xcc, 0xe3, 0x00, 0x2d, 0x99, 0xc5, 0x79, 0xbe, 0x00, 0x2d, 0x91,
+ 0xc5, 0xa0, 0x88, 0x00, 0x2d, 0x88, 0xc5, 0xd7, 0xfe, 0x00, 0x2c, 0xa9,
+ 0xc5, 0xcc, 0x5a, 0x00, 0x2c, 0xa0, 0xc6, 0xcc, 0xd7, 0x00, 0x2d, 0x49,
+ 0xc6, 0xd0, 0xe5, 0x00, 0x2d, 0x00, 0xc2, 0x4a, 0xce, 0x02, 0x6e, 0x31,
+ 0xce, 0x71, 0xa0, 0x02, 0x6f, 0x90, 0x11, 0xc2, 0xeb, 0xb5, 0xcc, 0x7f,
+ 0xdc, 0x02, 0x6e, 0xd8, 0x00, 0x42, 0xeb, 0xc1, 0xc2, 0x19, 0x2c, 0x08,
+ 0x68, 0xc9, 0xc2, 0x01, 0x4a, 0x08, 0x68, 0xb8, 0x02, 0x42, 0xeb, 0xcd,
+ 0x44, 0x3a, 0xbf, 0xc2, 0xeb, 0xf9, 0xc3, 0x39, 0x37, 0x00, 0x88, 0x4a,
+ 0x02, 0xec, 0x39, 0xc5, 0xd9, 0xca, 0x05, 0x4b, 0xd8, 0xc6, 0xba, 0x7c,
+ 0x00, 0x88, 0x8b, 0x02, 0xec, 0x3d, 0xc4, 0x79, 0xf3, 0x00, 0x88, 0x3b,
+ 0x02, 0xec, 0x41, 0xc6, 0xca, 0x0e, 0x00, 0x8a, 0x00, 0x02, 0x42, 0xec,
+ 0x45, 0x02, 0x42, 0xec, 0x6f, 0xc5, 0xc0, 0x7d, 0x00, 0x88, 0x1b, 0x02,
+ 0xec, 0x87, 0xc6, 0xc1, 0x86, 0x00, 0x88, 0x80, 0xc5, 0x8e, 0xdf, 0x00,
+ 0x88, 0x03, 0x02, 0xec, 0x8b, 0xc6, 0xbb, 0xec, 0x00, 0x88, 0x79, 0x47,
+ 0x79, 0xeb, 0x42, 0xec, 0x91, 0x02, 0x42, 0xec, 0xa7, 0xc4, 0xc6, 0x7a,
+ 0x00, 0x88, 0x63, 0x02, 0xec, 0xcb, 0x42, 0x00, 0x0a, 0xc2, 0xec, 0xd1,
+ 0x4a, 0xa3, 0x00, 0x42, 0xec, 0xe0, 0xc6, 0xb7, 0x9c, 0x00, 0x8a, 0x61,
+ 0xc9, 0x90, 0xe0, 0x00, 0x8a, 0xc8, 0xc6, 0x92, 0x0c, 0x00, 0x8b, 0x01,
+ 0x83, 0x00, 0x8b, 0x0b, 0x02, 0xec, 0xe8, 0x1b, 0xc2, 0xec, 0xf9, 0x87,
+ 0x00, 0x8b, 0x33, 0x02, 0xed, 0x1c, 0x91, 0x00, 0x8b, 0x4b, 0x02, 0xed,
+ 0x2a, 0x19, 0xc2, 0xed, 0x32, 0x97, 0x00, 0x8b, 0x73, 0x02, 0xed, 0x44,
+ 0x8b, 0x00, 0x8b, 0xab, 0x02, 0xed, 0x48, 0xca, 0xa6, 0x02, 0x00, 0x8d,
+ 0x10, 0x0d, 0xc2, 0xed, 0x4c, 0x15, 0xc2, 0xed, 0x61, 0xc5, 0xd9, 0x61,
+ 0x00, 0x8d, 0x5b, 0x02, 0xed, 0x70, 0x16, 0xc2, 0xed, 0x74, 0xc5, 0xd6,
+ 0x8c, 0x00, 0x8d, 0x7b, 0x02, 0xed, 0x83, 0xc5, 0xda, 0xe7, 0x00, 0x8d,
+ 0xbb, 0x02, 0xed, 0x87, 0x12, 0xc2, 0xed, 0x8b, 0xc5, 0xb7, 0x9d, 0x00,
+ 0x8d, 0xe3, 0x02, 0xed, 0xa6, 0x05, 0xc2, 0xed, 0xaa, 0xc5, 0x90, 0xe4,
+ 0x00, 0x8e, 0x13, 0x02, 0xed, 0xb9, 0x42, 0x0c, 0x43, 0x42, 0xed, 0xbd,
+ 0xc5, 0x8e, 0xdf, 0x01, 0x89, 0x8b, 0x02, 0xed, 0xcc, 0xc6, 0xbb, 0xec,
+ 0x01, 0x8a, 0x59, 0x47, 0x79, 0xeb, 0x42, 0xed, 0xd2, 0x44, 0x3a, 0xbf,
+ 0xc2, 0xed, 0xe2, 0xc3, 0x39, 0x37, 0x01, 0x8a, 0x2a, 0x02, 0xee, 0x12,
+ 0x02, 0x42, 0xee, 0x16, 0xc5, 0xc0, 0x7d, 0x01, 0x89, 0xb9, 0xc6, 0xc1,
+ 0x86, 0x01, 0x8a, 0x60, 0x02, 0x42, 0xee, 0x34, 0x02, 0x42, 0xee, 0x5d,
+ 0xc4, 0x79, 0xf3, 0x01, 0x8a, 0x13, 0x02, 0xee, 0x67, 0xc6, 0xba, 0x7c,
+ 0x01, 0x8a, 0x69, 0xc6, 0xca, 0x0e, 0x01, 0x8b, 0xf8, 0xc4, 0xb7, 0x9e,
+ 0x01, 0x8a, 0x38, 0xc4, 0xc6, 0x7a, 0x01, 0x8a, 0x41, 0xc6, 0xc6, 0x79,
+ 0x01, 0x8a, 0x50, 0x87, 0x01, 0x8a, 0x81, 0xc4, 0xa6, 0x08, 0x01, 0x8c,
+ 0x6a, 0x02, 0xee, 0x6b, 0x83, 0x01, 0x8a, 0x8b, 0x02, 0xee, 0x6f, 0x87,
+ 0x01, 0x8a, 0xb3, 0x02, 0xee, 0x73, 0x91, 0x01, 0x8a, 0xdb, 0x02, 0xee,
+ 0x83, 0x97, 0x01, 0x8b, 0x03, 0x02, 0xee, 0x87, 0x8b, 0x01, 0x8b, 0x10,
+ 0x91, 0x01, 0x8a, 0x99, 0x97, 0x01, 0x8b, 0x08, 0x87, 0x01, 0x8a, 0xd0,
+ 0x83, 0x01, 0x8a, 0xc3, 0x02, 0xee, 0x8b, 0x87, 0x01, 0x8a, 0xf3, 0x02,
+ 0xee, 0x8f, 0x8b, 0x01, 0x8a, 0xf8, 0x91, 0x01, 0x81, 0x11, 0xc4, 0x18,
+ 0x12, 0x01, 0x81, 0xc8, 0xc3, 0x02, 0xdf, 0x01, 0x81, 0x19, 0xc4, 0x0d,
+ 0x0e, 0x01, 0x81, 0xd0, 0xc3, 0x77, 0x79, 0x08, 0x47, 0x89, 0xc4, 0xdc,
+ 0x2d, 0x08, 0x47, 0x70, 0x91, 0x07, 0xfb, 0x31, 0x83, 0x07, 0xfc, 0xe0,
+ 0x45, 0x03, 0x14, 0xc2, 0xee, 0x93, 0x83, 0x07, 0xfb, 0xd9, 0x97, 0x07,
+ 0xfb, 0xe9, 0x87, 0x07, 0xfb, 0xf1, 0x91, 0x07, 0xfb, 0xf9, 0x8b, 0x07,
+ 0xfb, 0xe0, 0x83, 0x07, 0xfb, 0xb1, 0x8b, 0x07, 0xfb, 0xb9, 0x87, 0x07,
+ 0xfb, 0xc9, 0x91, 0x07, 0xfb, 0xd1, 0x97, 0x07, 0xfb, 0xc0, 0x83, 0x07,
+ 0xfc, 0x01, 0x8b, 0x07, 0xfc, 0x09, 0x97, 0x07, 0xfc, 0x11, 0x87, 0x07,
+ 0xfc, 0x19, 0x91, 0x07, 0xfc, 0x20, 0x87, 0x07, 0xfc, 0x41, 0x91, 0x07,
+ 0xfc, 0x49, 0x83, 0x07, 0xfc, 0x29, 0x8b, 0x07, 0xfc, 0x31, 0x97, 0x07,
+ 0xfc, 0x38, 0x8b, 0x07, 0xfc, 0x59, 0x97, 0x07, 0xfc, 0x61, 0x87, 0x07,
+ 0xfc, 0x69, 0x83, 0x07, 0xfc, 0x51, 0x91, 0x07, 0xfc, 0x70, 0x8b, 0x07,
+ 0xfc, 0x81, 0x91, 0x07, 0xfc, 0x99, 0x83, 0x07, 0xfc, 0x79, 0x97, 0x07,
+ 0xfc, 0x89, 0x87, 0x07, 0xfc, 0x90, 0x83, 0x07, 0xfc, 0xa1, 0x97, 0x07,
+ 0xfc, 0xa9, 0x91, 0x07, 0xfc, 0xb0, 0x97, 0x07, 0xfc, 0xc9, 0x87, 0x07,
+ 0xfc, 0xd1, 0x91, 0x07, 0xfc, 0xd9, 0x83, 0x07, 0xfc, 0xb9, 0x8b, 0x07,
+ 0xfc, 0xc0, 0xc5, 0xd9, 0xca, 0x07, 0xfd, 0x18, 0xc6, 0x8e, 0xde, 0x07,
+ 0xfd, 0x11, 0xc5, 0x79, 0xf2, 0x07, 0xfd, 0x99, 0xc4, 0xad, 0x2b, 0x07,
+ 0xfd, 0xb1, 0xc5, 0xdb, 0xff, 0x07, 0xfd, 0xc9, 0xc6, 0xc0, 0x7c, 0x07,
+ 0xfd, 0x40, 0xc6, 0x8e, 0xde, 0x07, 0xfd, 0x51, 0xc5, 0xda, 0xe7, 0x07,
+ 0xfd, 0x59, 0x12, 0xc2, 0xee, 0xb1, 0xc4, 0xad, 0x2b, 0x07, 0xfd, 0x69,
+ 0xc7, 0xc1, 0x85, 0x07, 0xfd, 0x71, 0xc5, 0x90, 0xe4, 0x07, 0xfd, 0x80,
+ 0xc5, 0xd9, 0xca, 0x07, 0xfd, 0xa0, 0x87, 0x07, 0xfe, 0x28, 0x91, 0x07,
+ 0xfe, 0x50, 0x87, 0x07, 0xfe, 0x70, 0x91, 0x07, 0xfe, 0xa0, 0xc5, 0xdb,
+ 0xff, 0x07, 0xfd, 0x29, 0xc5, 0x90, 0xe4, 0x07, 0xfd, 0x30, 0x91, 0x0d,
+ 0x8a, 0x91, 0x87, 0x0d, 0x8a, 0x89, 0x8b, 0x0d, 0x8a, 0x81, 0x83, 0x01,
+ 0x84, 0x70, 0x83, 0x01, 0x84, 0x19, 0x97, 0x01, 0x84, 0x29, 0x91, 0x01,
+ 0x84, 0x38, 0x83, 0x01, 0x84, 0xa9, 0x87, 0x01, 0x84, 0xb0, 0xd2, 0x4a,
+ 0x99, 0x01, 0x72, 0x30, 0xe0, 0x06, 0xa7, 0x01, 0x52, 0x58, 0xcf, 0x62,
+ 0x97, 0x01, 0x52, 0x49, 0xc5, 0x13, 0x84, 0x01, 0x52, 0x38, 0xcb, 0x2a,
+ 0xa5, 0x01, 0x52, 0x21, 0xc7, 0x80, 0x70, 0x01, 0x52, 0x19, 0xc3, 0x02,
+ 0xa3, 0x01, 0x52, 0x00, 0xc6, 0x52, 0x0b, 0x01, 0x50, 0xe1, 0xc3, 0x00,
+ 0x44, 0x01, 0x50, 0xd0, 0x00, 0x42, 0xee, 0xbd, 0x19, 0xc2, 0xee, 0xc9,
+ 0xc2, 0x00, 0xc4, 0x08, 0x5b, 0xe1, 0xc4, 0x02, 0xde, 0x08, 0x5b, 0xd0,
+ 0xc2, 0x39, 0x8b, 0x08, 0x5b, 0x91, 0xc3, 0x1e, 0x1b, 0x08, 0x5b, 0x40,
+ 0xc3, 0x11, 0xef, 0x08, 0x5b, 0x89, 0x03, 0x42, 0xee, 0xd3, 0xc2, 0x00,
+ 0x8e, 0x08, 0x5b, 0x38, 0x00, 0x42, 0xee, 0xdf, 0x19, 0xc2, 0xee, 0xeb,
+ 0xc2, 0x00, 0xc4, 0x08, 0x5a, 0xe1, 0xc4, 0x02, 0xde, 0x08, 0x5a, 0xd0,
+ 0xc2, 0x39, 0x8b, 0x08, 0x5a, 0xa9, 0xc3, 0x1e, 0x1b, 0x08, 0x5a, 0x40,
+ 0xc3, 0x11, 0xef, 0x08, 0x5a, 0xa1, 0x03, 0x42, 0xee, 0xf5, 0xc2, 0x00,
+ 0x8e, 0x08, 0x5a, 0x38, 0xc4, 0x36, 0xb5, 0x08, 0x5a, 0x01, 0xc3, 0x16,
+ 0x5a, 0x08, 0x5a, 0x78, 0xc2, 0x02, 0xa0, 0x00, 0x00, 0xf1, 0xc4, 0x02,
+ 0xde, 0x00, 0x00, 0xe8, 0x16, 0xc2, 0xef, 0x01, 0xc3, 0x05, 0x14, 0x0f,
+ 0x65, 0x88, 0xc4, 0x26, 0x78, 0x0f, 0x65, 0x59, 0xc5, 0x06, 0xdb, 0x0f,
+ 0x65, 0x51, 0x15, 0xc2, 0xef, 0x0d, 0x08, 0xc2, 0xef, 0x19, 0x16, 0xc2,
+ 0xef, 0x25, 0xc3, 0x05, 0x14, 0x0f, 0x65, 0x18, 0xc2, 0x00, 0xd1, 0x0f,
+ 0x65, 0x10, 0xc2, 0x00, 0xd1, 0x0f, 0x64, 0xf8, 0xc2, 0x0d, 0x10, 0x0f,
+ 0x64, 0x13, 0x02, 0xef, 0x31, 0x00, 0x42, 0xef, 0x37, 0x9b, 0x0f, 0x64,
+ 0x0b, 0x02, 0xef, 0x43, 0x00, 0x42, 0xef, 0x49, 0xc4, 0x18, 0x10, 0x0f,
+ 0x63, 0xbb, 0x02, 0xef, 0x55, 0xc2, 0x22, 0xcc, 0x0f, 0x63, 0xb2, 0x02,
+ 0xef, 0x62, 0x0b, 0xc2, 0xef, 0x6f, 0x11, 0x42, 0xef, 0x81, 0x0a, 0xc2,
+ 0xef, 0x93, 0x19, 0xc2, 0xef, 0xa5, 0xc2, 0x00, 0xc4, 0x0f, 0x63, 0xd2,
+ 0x02, 0xef, 0xb5, 0x00, 0x42, 0xef, 0xbb, 0xc4, 0x01, 0xce, 0x0f, 0x65,
+ 0x71, 0xc7, 0x08, 0x79, 0x0f, 0x65, 0x68, 0xc6, 0xcc, 0x2f, 0x01, 0x96,
+ 0x01, 0x17, 0x42, 0xef, 0xc7, 0xc3, 0x78, 0xc0, 0x01, 0x96, 0x11, 0x9b,
+ 0x01, 0x96, 0x20, 0xc4, 0xe3, 0xdf, 0x01, 0x96, 0x19, 0xc5, 0xd9, 0x4d,
+ 0x01, 0x96, 0x38, 0xc7, 0xc5, 0x44, 0x01, 0x96, 0x59, 0x43, 0x1a, 0xd3,
+ 0x42, 0xef, 0xd3, 0xc4, 0x15, 0xe7, 0x01, 0x9a, 0xc1, 0xc3, 0x05, 0x14,
+ 0x01, 0x9a, 0xc9, 0x16, 0xc2, 0xef, 0xf2, 0x08, 0xc2, 0xf0, 0x00, 0x15,
+ 0xc2, 0xf0, 0x0d, 0x07, 0xc2, 0xf0, 0x1f, 0xc4, 0x26, 0x78, 0x01, 0x9b,
+ 0x0a, 0x02, 0xf0, 0x2e, 0xc3, 0x00, 0x4a, 0x01, 0x7f, 0xb9, 0xc9, 0x03,
+ 0x68, 0x01, 0x7f, 0xd0, 0xc4, 0x00, 0x49, 0x01, 0x7f, 0xc1, 0xc5, 0x00,
+ 0x2c, 0x01, 0x7f, 0xc8, 0xc9, 0x57, 0x20, 0x08, 0x42, 0xf8, 0xc4, 0x18,
+ 0x12, 0x08, 0x42, 0xe1, 0x91, 0x08, 0x42, 0xc8, 0xc8, 0x4b, 0x94, 0x08,
+ 0x42, 0xf1, 0xc7, 0x0d, 0x04, 0x08, 0x42, 0xe8, 0xc4, 0xdc, 0x2d, 0x08,
+ 0x42, 0x71, 0xc3, 0x77, 0x79, 0x08, 0x42, 0x88, 0xd7, 0x2a, 0xf5, 0x0f,
+ 0xd2, 0x58, 0x49, 0x2a, 0xf5, 0x42, 0xf0, 0x34, 0x49, 0x2a, 0xf5, 0x42,
+ 0xf0, 0x40, 0xc5, 0x56, 0xa5, 0x01, 0x32, 0xc3, 0x02, 0xf0, 0x4c, 0xc3,
+ 0x00, 0x74, 0x01, 0x32, 0xa2, 0x02, 0xf0, 0x56, 0x49, 0x2a, 0xf5, 0x42,
+ 0xf0, 0x5c, 0x49, 0x2a, 0xf5, 0x42, 0xf0, 0x68, 0x0d, 0xc2, 0xf0, 0x74,
+ 0xc5, 0xa8, 0xf7, 0x0f, 0xd0, 0xf9, 0xc4, 0xde, 0x83, 0x0f, 0xd1, 0x01,
+ 0xc6, 0xca, 0xfd, 0x0f, 0xd1, 0x09, 0xc4, 0xe3, 0x93, 0x0f, 0xd1, 0x18,
+ 0xdd, 0x12, 0x1c, 0x0f, 0xbc, 0x51, 0x45, 0x00, 0x8c, 0x42, 0xf0, 0x80,
+ 0xcf, 0x61, 0x2f, 0x01, 0x3f, 0x19, 0xce, 0x6f, 0x2a, 0x01, 0x3f, 0x10,
+ 0xc2, 0x00, 0x61, 0x0f, 0xc8, 0x6b, 0x02, 0xf0, 0x98, 0x43, 0x11, 0x3c,
+ 0x42, 0xf0, 0x9e, 0x51, 0x0a, 0xc9, 0xc2, 0xf0, 0xaa, 0x45, 0x00, 0x8c,
+ 0xc2, 0xf0, 0xbc, 0xc6, 0x86, 0xfd, 0x0f, 0xa9, 0x98, 0x45, 0x00, 0x8c,
+ 0xc2, 0xf0, 0xd6, 0xcc, 0x85, 0xf5, 0x0f, 0x99, 0x2a, 0x02, 0xf0, 0xe2,
+ 0x15, 0xc2, 0xf0, 0xe8, 0xc7, 0x0a, 0xe0, 0x01, 0x59, 0x58, 0xca, 0xa6,
+ 0x34, 0x01, 0x36, 0xc9, 0x49, 0x01, 0xaa, 0x42, 0xf0, 0xf4, 0xc7, 0x46,
+ 0x3d, 0x01, 0x2e, 0x29, 0xce, 0x6c, 0x8a, 0x01, 0x2e, 0x19, 0xc8, 0x01,
+ 0x92, 0x01, 0x2e, 0x08, 0xd0, 0x5e, 0xa2, 0x01, 0x3e, 0x81, 0xc9, 0xaf,
+ 0xa5, 0x01, 0x36, 0x59, 0xc4, 0x22, 0xdc, 0x01, 0x33, 0x11, 0x51, 0x0a,
+ 0xc9, 0x42, 0xf1, 0x00, 0xc5, 0x06, 0x82, 0x01, 0x30, 0xf9, 0xcf, 0x66,
+ 0x84, 0x0f, 0xac, 0xb9, 0xce, 0x24, 0xd5, 0x0f, 0xa2, 0x38, 0xce, 0x6c,
+ 0x8a, 0x01, 0x2d, 0xf9, 0xc8, 0x01, 0x92, 0x01, 0x2d, 0xe8, 0xe0, 0x03,
+ 0x07, 0x01, 0x3e, 0x08, 0xc5, 0x04, 0xa2, 0x01, 0x3a, 0x01, 0xc3, 0x00,
+ 0x28, 0x0f, 0xa5, 0x70, 0x44, 0x00, 0x8b, 0x42, 0xf1, 0x12, 0xc5, 0x06,
+ 0x82, 0x01, 0x30, 0xf1, 0xce, 0x24, 0xd5, 0x0f, 0xa2, 0x48, 0x12, 0xc2,
+ 0xf1, 0x18, 0xce, 0x6c, 0x8a, 0x01, 0x2d, 0xc9, 0xc8, 0x01, 0x92, 0x01,
+ 0x2d, 0xb8, 0xc9, 0x33, 0xad, 0x01, 0x2f, 0x60, 0xcb, 0x51, 0x6d, 0x01,
+ 0x2f, 0xe9, 0xc5, 0x0b, 0x0a, 0x01, 0x2f, 0xd9, 0xc3, 0x0e, 0x6b, 0x01,
+ 0x5a, 0x80, 0x90, 0x0f, 0x17, 0x42, 0x02, 0xf1, 0x24, 0x89, 0x0f, 0x17,
+ 0x10, 0xc2, 0x01, 0xa3, 0x08, 0xc6, 0xd9, 0xc2, 0x01, 0xc8, 0x08, 0xc6,
+ 0xd0, 0x90, 0x08, 0xc6, 0x81, 0x9b, 0x08, 0xc6, 0x68, 0x8c, 0x08, 0xc6,
+ 0x70, 0xc2, 0x01, 0xa3, 0x08, 0xc5, 0xd9, 0xc2, 0x01, 0xc8, 0x08, 0xc5,
+ 0xd0, 0x90, 0x08, 0xc5, 0x81, 0x9b, 0x08, 0xc5, 0x68, 0x8c, 0x08, 0xc5,
+ 0x70, 0xe0, 0x04, 0x07, 0x01, 0x5c, 0xa0, 0xcc, 0x81, 0x2d, 0x0f, 0xcb,
+ 0xd1, 0xd7, 0x2a, 0xc7, 0x0f, 0xcb, 0x99, 0xca, 0xa5, 0xbc, 0x0f, 0xd7,
+ 0x18, 0xcb, 0x85, 0x1e, 0x0f, 0xb0, 0x11, 0xca, 0x9b, 0x3a, 0x0f, 0xc8,
+ 0x90, 0xc9, 0xad, 0x65, 0x0f, 0xb2, 0x31, 0x44, 0x05, 0x76, 0xc2, 0xf1,
+ 0x28, 0xd1, 0x55, 0xfc, 0x0f, 0xc9, 0x40, 0x45, 0x02, 0x9a, 0x42, 0xf1,
+ 0x37, 0xc8, 0x6c, 0x12, 0x0f, 0xb0, 0x99, 0xc8, 0xb8, 0xb2, 0x0f, 0xc9,
+ 0x00, 0xcb, 0x92, 0xcd, 0x0f, 0xb1, 0xb9, 0xc6, 0xcc, 0x29, 0x0f, 0xce,
+ 0x80, 0xc2, 0x02, 0xa0, 0x07, 0xf8, 0x91, 0xc4, 0x02, 0xde, 0x07, 0xf8,
+ 0x98, 0xc3, 0x09, 0x9e, 0x07, 0xf8, 0xa1, 0xc3, 0x0d, 0x14, 0x07, 0xf8,
+ 0xa8, 0xc2, 0x22, 0xcc, 0x07, 0xf8, 0xb1, 0xc4, 0x18, 0x10, 0x07, 0xf8,
+ 0xb8, 0xc9, 0xb4, 0x64, 0x07, 0xf9, 0x01, 0x83, 0x07, 0xf8, 0x60, 0xce,
+ 0x25, 0xad, 0x07, 0xf9, 0xd9, 0xcd, 0x00, 0x32, 0x07, 0xfa, 0xd9, 0xd1,
+ 0x4f, 0x7a, 0x07, 0xfa, 0xf9, 0xcb, 0x1a, 0x50, 0x07, 0xf8, 0x40, 0x83,
+ 0x07, 0xf9, 0x09, 0x84, 0x07, 0xf9, 0x11, 0x85, 0x07, 0xf9, 0x19, 0x86,
+ 0x07, 0xf9, 0x21, 0x87, 0x07, 0xf9, 0x29, 0x88, 0x07, 0xf9, 0x31, 0x89,
+ 0x07, 0xf9, 0x39, 0x8a, 0x07, 0xf9, 0x41, 0x8b, 0x07, 0xf9, 0x49, 0x8c,
+ 0x07, 0xf9, 0x51, 0x8d, 0x07, 0xf9, 0x59, 0x8e, 0x07, 0xf9, 0x61, 0x8f,
+ 0x07, 0xf9, 0x69, 0x95, 0x07, 0xf9, 0x99, 0x96, 0x07, 0xf9, 0xa1, 0x97,
+ 0x07, 0xf9, 0xa9, 0x98, 0x07, 0xf9, 0xb1, 0x99, 0x07, 0xf9, 0xb9, 0x9a,
+ 0x07, 0xf9, 0xc1, 0x9b, 0x07, 0xf9, 0xc9, 0x9c, 0x07, 0xf9, 0xd1, 0x90,
+ 0x07, 0xf9, 0x71, 0x91, 0x07, 0xf9, 0x79, 0x92, 0x07, 0xf9, 0x81, 0x93,
+ 0x07, 0xf9, 0x89, 0x94, 0x07, 0xf9, 0x90, 0x83, 0x07, 0xfa, 0x09, 0x84,
+ 0x07, 0xfa, 0x11, 0x85, 0x07, 0xfa, 0x19, 0x87, 0x07, 0xfa, 0x29, 0x88,
+ 0x07, 0xfa, 0x31, 0x89, 0x07, 0xfa, 0x39, 0x8a, 0x07, 0xfa, 0x41, 0x8b,
+ 0x07, 0xfa, 0x49, 0x8c, 0x07, 0xfa, 0x51, 0x8d, 0x07, 0xfa, 0x59, 0x8e,
+ 0x07, 0xfa, 0x61, 0x8f, 0x07, 0xfa, 0x69, 0x90, 0x07, 0xfa, 0x71, 0x91,
+ 0x07, 0xfa, 0x79, 0x92, 0x07, 0xfa, 0x81, 0x93, 0x07, 0xfa, 0x89, 0x94,
+ 0x07, 0xfa, 0x91, 0x95, 0x07, 0xfa, 0x99, 0x96, 0x07, 0xfa, 0xa1, 0x97,
+ 0x07, 0xfa, 0xa9, 0x98, 0x07, 0xfa, 0xb1, 0x99, 0x07, 0xfa, 0xb9, 0x9a,
+ 0x07, 0xfa, 0xc1, 0x9b, 0x07, 0xfa, 0xc9, 0x9c, 0x07, 0xfa, 0xd1, 0x86,
+ 0x07, 0xfa, 0x20, 0xa5, 0x0b, 0x7c, 0xf9, 0xa3, 0x0b, 0x7c, 0xf1, 0xa2,
+ 0x0b, 0x7c, 0xe9, 0xa1, 0x0b, 0x7c, 0xe1, 0x9f, 0x0b, 0x7c, 0xd9, 0x9e,
+ 0x0b, 0x7c, 0xd0, 0xc2, 0x01, 0x30, 0x0b, 0x79, 0x29, 0x83, 0x0b, 0x78,
+ 0x98, 0xc2, 0x19, 0x2c, 0x0b, 0x7a, 0x09, 0x83, 0x0b, 0x79, 0xf0, 0x83,
+ 0x0b, 0x79, 0xc9, 0xc2, 0x00, 0xd0, 0x0b, 0x79, 0x80, 0x89, 0x0b, 0x7b,
+ 0x68, 0x89, 0x0b, 0x7b, 0x20, 0xcb, 0x1b, 0xd5, 0x01, 0x51, 0xd1, 0x45,
+ 0x00, 0x8c, 0x42, 0xf1, 0x43, 0xd6, 0x30, 0x0c, 0x01, 0x3b, 0xa9, 0xd4,
+ 0x1a, 0x50, 0x01, 0x3b, 0x48, 0xd6, 0x30, 0x0c, 0x01, 0x3b, 0xa1, 0xd4,
+ 0x1a, 0x50, 0x01, 0x3b, 0x40, 0xda, 0x1a, 0x4a, 0x01, 0x3b, 0x59, 0xd9,
+ 0x1d, 0xec, 0x01, 0x3b, 0x50, 0xca, 0x22, 0x51, 0x0f, 0xbe, 0x29, 0xcd,
+ 0x0e, 0x61, 0x0f, 0xbe, 0x38, 0xcf, 0x15, 0x36, 0x0f, 0xbd, 0xb1, 0xd2,
+ 0x22, 0x49, 0x0f, 0xbe, 0x58, 0x97, 0x0b, 0x73, 0x98, 0x8b, 0x0b, 0x73,
+ 0xf1, 0xc3, 0x7a, 0xd8, 0x0b, 0x73, 0x20, 0x87, 0x0b, 0x73, 0xd0, 0x89,
+ 0x0b, 0x73, 0xb9, 0x9b, 0x0b, 0x73, 0xb8, 0x92, 0x0b, 0x73, 0xb0, 0x92,
+ 0x0b, 0x73, 0x30, 0x97, 0x0b, 0x72, 0x98, 0x8b, 0x0b, 0x72, 0xf1, 0xc3,
+ 0x7a, 0xd8, 0x0b, 0x72, 0x20, 0x87, 0x0b, 0x72, 0xd0, 0x89, 0x0b, 0x72,
+ 0xb9, 0x9b, 0x0b, 0x72, 0xb8, 0x92, 0x0b, 0x72, 0xb0, 0x92, 0x0b, 0x72,
+ 0x30, 0xcf, 0x6b, 0x25, 0x0b, 0x74, 0xb0, 0xcf, 0x6b, 0x25, 0x0b, 0x74,
+ 0xa8, 0xc4, 0xe0, 0x37, 0x0f, 0x41, 0xd1, 0xc4, 0xe2, 0x23, 0x0f, 0x41,
+ 0xa1, 0xc5, 0xd6, 0x14, 0x0f, 0x40, 0x29, 0xc4, 0xe2, 0x7b, 0x0f, 0x42,
+ 0xf1, 0xc5, 0xd4, 0x8e, 0x0f, 0x42, 0xe9, 0xc5, 0xd4, 0xf2, 0x0f, 0x44,
+ 0xc1, 0xc5, 0xd6, 0x28, 0x0f, 0x45, 0x09, 0xc6, 0xd2, 0x11, 0x0f, 0x45,
+ 0x59, 0xc5, 0xde, 0x43, 0x0f, 0x45, 0x61, 0xc4, 0xe2, 0x4f, 0x0f, 0x45,
+ 0xf8, 0xc5, 0xd6, 0x3c, 0x0f, 0x41, 0xc9, 0xc5, 0xd8, 0x03, 0x0f, 0x43,
+ 0x99, 0xc6, 0xd0, 0x49, 0x0f, 0x43, 0x79, 0xc4, 0xe2, 0x07, 0x0f, 0x43,
+ 0x01, 0xc4, 0xe4, 0x93, 0x0f, 0x42, 0xb9, 0xc5, 0xdc, 0xb3, 0x0f, 0x42,
+ 0x09, 0xc6, 0xce, 0x81, 0x0f, 0x43, 0xc9, 0xcb, 0x8d, 0xa5, 0x0f, 0x44,
+ 0x01, 0xc5, 0xd4, 0x61, 0x0f, 0x44, 0x79, 0xc4, 0xe3, 0x87, 0x0f, 0x45,
+ 0xe8, 0xc4, 0xe1, 0x93, 0x0f, 0x41, 0xc1, 0xc4, 0xe1, 0xbb, 0x0f, 0x41,
+ 0xb9, 0xc4, 0xe2, 0xa7, 0x0f, 0x41, 0xb1, 0xc4, 0xe1, 0x57, 0x0f, 0x41,
+ 0x81, 0xc4, 0xe4, 0x8f, 0x0f, 0x41, 0x79, 0xc4, 0xe2, 0x13, 0x0f, 0x42,
+ 0x61, 0xc4, 0xe1, 0xfb, 0x0f, 0x42, 0x59, 0xc4, 0xe2, 0xf7, 0x0f, 0x42,
+ 0x31, 0xc4, 0xe0, 0x2b, 0x0f, 0x42, 0x29, 0xc4, 0x38, 0x6b, 0x0f, 0x42,
+ 0x20, 0xc4, 0xe2, 0x27, 0x0f, 0x41, 0x71, 0xc3, 0xe4, 0xf7, 0x0f, 0x41,
+ 0x21, 0xc3, 0xd6, 0x5f, 0x0f, 0x41, 0x19, 0xc3, 0xe6, 0x26, 0x0f, 0x41,
+ 0x11, 0xc4, 0xe1, 0x37, 0x0f, 0x40, 0xe9, 0xc4, 0xb7, 0x12, 0x0f, 0x40,
+ 0xe1, 0xc4, 0xe3, 0x97, 0x0f, 0x40, 0xd9, 0xc4, 0xe2, 0x63, 0x0f, 0x42,
+ 0x01, 0xc4, 0xe1, 0xd7, 0x0f, 0x41, 0xf9, 0xc4, 0xe2, 0xff, 0x0f, 0x41,
+ 0xf0, 0xc4, 0xe1, 0xdb, 0x0f, 0x40, 0xf9, 0xc5, 0xd6, 0x2d, 0x0f, 0x40,
+ 0xc1, 0xc4, 0xd6, 0x96, 0x0f, 0x40, 0x21, 0xc4, 0xe3, 0x2f, 0x0f, 0x43,
+ 0x61, 0xc5, 0xd5, 0x0b, 0x0f, 0x42, 0x39, 0xc6, 0xd1, 0x9f, 0x0f, 0x43,
+ 0xb9, 0xc4, 0xe2, 0x33, 0x0f, 0x44, 0x69, 0xc5, 0xd5, 0x42, 0x0f, 0x45,
+ 0x01, 0xc6, 0xd0, 0x43, 0x0f, 0x45, 0x49, 0xc6, 0xd1, 0xb7, 0x0f, 0x46,
+ 0x18, 0xc5, 0xdd, 0x26, 0x0f, 0x40, 0xb9, 0xc5, 0xd4, 0x5c, 0x0f, 0x43,
+ 0xa1, 0xc5, 0xd7, 0x31, 0x0f, 0x43, 0x89, 0xc4, 0xe3, 0x53, 0x0f, 0x42,
+ 0x41, 0xc5, 0xd6, 0x37, 0x0f, 0x41, 0xd9, 0xc6, 0xd0, 0x13, 0x0f, 0x44,
+ 0x51, 0xc4, 0xe3, 0xe3, 0x0f, 0x44, 0x71, 0xc4, 0xd4, 0x61, 0x0f, 0x44,
+ 0x81, 0xc5, 0xd5, 0x9c, 0x0f, 0x45, 0x39, 0xc6, 0xd1, 0xd5, 0x0f, 0x46,
+ 0x08, 0xc5, 0xdc, 0x59, 0x0f, 0x40, 0xb1, 0xc5, 0xdd, 0x3a, 0x0f, 0x40,
+ 0xa9, 0xc5, 0xd4, 0x52, 0x0f, 0x40, 0xa1, 0xc4, 0xe1, 0xdf, 0x0f, 0x40,
+ 0x51, 0xc4, 0xe3, 0x77, 0x0f, 0x40, 0x49, 0xc4, 0xe2, 0xe7, 0x0f, 0x40,
+ 0x41, 0xc4, 0xe0, 0x7f, 0x0f, 0x40, 0x11, 0xc4, 0xe1, 0x2f, 0x0f, 0x40,
+ 0x09, 0xc4, 0xe0, 0xbf, 0x0f, 0x40, 0x00, 0xc5, 0xdd, 0xad, 0x0f, 0x40,
+ 0x91, 0xc4, 0xd2, 0x6b, 0x0f, 0x40, 0x71, 0xc4, 0xe1, 0xf7, 0x0f, 0x40,
+ 0x31, 0xc5, 0xd4, 0xa7, 0x0f, 0x43, 0x69, 0xc5, 0xdd, 0xcb, 0x0f, 0x43,
+ 0x59, 0xc4, 0xe0, 0xcb, 0x0f, 0x43, 0x49, 0xc6, 0xd3, 0xaf, 0x0f, 0x43,
+ 0xb1, 0xc6, 0xce, 0xed, 0x0f, 0x43, 0xc1, 0xc6, 0xd0, 0x61, 0x0f, 0x44,
+ 0xb1, 0xc6, 0xcf, 0x71, 0x0f, 0x45, 0x10, 0xc5, 0xd4, 0x34, 0x0f, 0x40,
+ 0x89, 0xc5, 0xd6, 0x96, 0x0f, 0x40, 0x19, 0xc4, 0xe0, 0x83, 0x0f, 0x42,
+ 0x89, 0xc4, 0xe2, 0x1b, 0x0f, 0x42, 0x51, 0xc4, 0xe1, 0xf3, 0x0f, 0x44,
+ 0x61, 0xc4, 0xe1, 0xb3, 0x0f, 0x44, 0x91, 0xc5, 0xd5, 0x10, 0x0f, 0x44,
+ 0xa1, 0xc6, 0xd0, 0x8b, 0x0f, 0x45, 0x99, 0xc5, 0xd5, 0x97, 0x0f, 0x45,
+ 0xa1, 0xc6, 0xd1, 0x33, 0x0f, 0x46, 0x20, 0xc5, 0xde, 0x52, 0x0f, 0x43,
+ 0x29, 0xc5, 0xdd, 0xf8, 0x0f, 0x43, 0x21, 0xc5, 0xd5, 0x33, 0x0f, 0x43,
+ 0x19, 0xc4, 0xe3, 0x3b, 0x0f, 0x42, 0xe1, 0xc4, 0xe2, 0x7f, 0x0f, 0x42,
+ 0xd9, 0xc4, 0xe2, 0xbf, 0x0f, 0x42, 0xd1, 0xc4, 0xe0, 0x33, 0x0f, 0x42,
+ 0xa9, 0xc4, 0xdf, 0xaf, 0x0f, 0x42, 0xa1, 0xc4, 0xe1, 0x4f, 0x0f, 0x42,
+ 0x99, 0xc4, 0xe3, 0xff, 0x0f, 0x42, 0x68, 0xc5, 0xd8, 0x80, 0x0f, 0x41,
+ 0xa9, 0xc4, 0xe2, 0x93, 0x0f, 0x41, 0x61, 0xc5, 0xd5, 0xb0, 0x0f, 0x40,
+ 0x79, 0xc5, 0xd7, 0x9f, 0x0f, 0x43, 0xa9, 0xc5, 0xd7, 0xef, 0x0f, 0x43,
+ 0x09, 0xc5, 0xde, 0x66, 0x0f, 0x44, 0x31, 0xc6, 0xd2, 0xa1, 0x0f, 0x45,
+ 0x89, 0xc5, 0xd4, 0x7f, 0x0f, 0x45, 0xb0, 0xc5, 0xd8, 0x7b, 0x0f, 0x41,
+ 0x99, 0xc4, 0xe2, 0xb7, 0x0f, 0x41, 0x59, 0xc4, 0xe0, 0xc7, 0x0f, 0x41,
+ 0x51, 0xc4, 0xe1, 0xcb, 0x0f, 0x41, 0x49, 0xc4, 0xe2, 0x67, 0x0f, 0x41,
+ 0x09, 0xc5, 0xdd, 0xa8, 0x0f, 0x40, 0x99, 0xc5, 0xde, 0x6b, 0x0f, 0x43,
+ 0x91, 0xc5, 0xd7, 0x59, 0x0f, 0x42, 0xf9, 0xc5, 0xd5, 0x47, 0x0f, 0x44,
+ 0xf9, 0xc6, 0xd3, 0x61, 0x0f, 0x45, 0xc0, 0xc4, 0xe1, 0xe3, 0x0f, 0x41,
+ 0x91, 0xc5, 0xd5, 0xba, 0x0f, 0x40, 0x69, 0xc4, 0xe2, 0x3f, 0x0f, 0x40,
+ 0x61, 0xc5, 0xd4, 0x4d, 0x0f, 0x43, 0x31, 0xc4, 0xe0, 0x87, 0x0f, 0x42,
+ 0x79, 0xc9, 0xac, 0xe7, 0x0f, 0x41, 0xe9, 0xc7, 0xc3, 0xb5, 0x0f, 0x43,
+ 0xd1, 0xc4, 0xe0, 0xcf, 0x0f, 0x44, 0x21, 0xc6, 0xcf, 0xb9, 0x0f, 0x45,
+ 0x21, 0xc5, 0xde, 0x16, 0x0f, 0x45, 0x90, 0xc5, 0xd4, 0xde, 0x0f, 0x41,
+ 0x89, 0xc4, 0xe3, 0x5f, 0x0f, 0x41, 0x39, 0xc4, 0xe0, 0x93, 0x0f, 0x41,
+ 0x29, 0xc5, 0xde, 0x5c, 0x0f, 0x43, 0x39, 0xc5, 0xdd, 0xc6, 0x0f, 0x42,
+ 0x81, 0xc4, 0xe2, 0x03, 0x0f, 0x44, 0x29, 0xc6, 0xd3, 0xb5, 0x0f, 0x44,
+ 0x39, 0xc6, 0xd0, 0xc7, 0x0f, 0x44, 0x41, 0xca, 0x9a, 0xc2, 0x0f, 0x44,
+ 0xe1, 0xc6, 0xd3, 0xcd, 0x0f, 0x46, 0x00, 0xc4, 0xe2, 0x0f, 0x0f, 0x41,
+ 0x69, 0xc5, 0xdc, 0xc2, 0x0f, 0x40, 0x39, 0xc4, 0xe3, 0x1f, 0x0f, 0x43,
+ 0x41, 0xc9, 0xa9, 0x24, 0x0f, 0x42, 0x91, 0xc7, 0xc5, 0xc2, 0x0f, 0x44,
+ 0x59, 0xc6, 0xce, 0x99, 0x0f, 0x44, 0xc9, 0xc5, 0xd6, 0x32, 0x0f, 0x44,
+ 0xd1, 0xc4, 0xe0, 0xdf, 0x0f, 0x45, 0x69, 0xc5, 0xd8, 0x35, 0x0f, 0x45,
+ 0xe1, 0xc6, 0xd1, 0x3f, 0x0f, 0x46, 0x10, 0xc3, 0xe5, 0x66, 0x0f, 0x41,
+ 0x41, 0xc5, 0xd8, 0x6c, 0x0f, 0x40, 0x81, 0xc4, 0xe3, 0xfb, 0x0f, 0x43,
+ 0x71, 0xc5, 0xd4, 0xe8, 0x0f, 0x42, 0xc1, 0xc6, 0xce, 0x9f, 0x0f, 0x43,
+ 0xd9, 0xc5, 0xd6, 0xa0, 0x0f, 0x44, 0x99, 0xca, 0xa0, 0x12, 0x0f, 0x44,
+ 0xf1, 0xc5, 0xd3, 0xf8, 0x0f, 0x45, 0x41, 0xc6, 0xd0, 0xbb, 0x0f, 0x45,
+ 0xb9, 0xc5, 0xd5, 0xbf, 0x0f, 0x45, 0xf0, 0xc3, 0xe5, 0xd2, 0x0f, 0x41,
+ 0x31, 0xc5, 0xd7, 0xea, 0x0f, 0x41, 0x01, 0xc5, 0xdc, 0x18, 0x0f, 0x43,
+ 0x11, 0xc5, 0xdd, 0xe9, 0x0f, 0x42, 0xb1, 0xc5, 0xd5, 0xab, 0x0f, 0x42,
+ 0x49, 0xcc, 0x89, 0xa9, 0x0f, 0x44, 0x09, 0xc5, 0xd4, 0xb6, 0x0f, 0x44,
+ 0x89, 0xcb, 0x8e, 0x81, 0x0f, 0x44, 0xe9, 0xc5, 0xd3, 0xee, 0x0f, 0x45,
+ 0x19, 0xc5, 0xd7, 0xf4, 0x0f, 0x45, 0x50, 0xc5, 0xdd, 0x94, 0x0f, 0x40,
+ 0xf1, 0xc6, 0xd3, 0xa3, 0x0f, 0x40, 0xc9, 0xc5, 0xd8, 0x0d, 0x0f, 0x42,
+ 0x71, 0xc4, 0x92, 0x28, 0x0f, 0x41, 0xe1, 0xc7, 0xc1, 0x46, 0x0f, 0x43,
+ 0xe1, 0xc7, 0xc8, 0x85, 0x0f, 0x43, 0xf1, 0xc4, 0xe2, 0xbb, 0x0f, 0x44,
+ 0x19, 0xc5, 0xd5, 0x38, 0x0f, 0x45, 0x29, 0xc5, 0xd4, 0x57, 0x0f, 0x45,
+ 0xa9, 0xc4, 0xe1, 0x53, 0x0f, 0x45, 0xd8, 0xc6, 0xce, 0x51, 0x0f, 0x40,
+ 0xd1, 0xc4, 0xd3, 0xaf, 0x0f, 0x43, 0x51, 0xc4, 0xe0, 0xdb, 0x0f, 0x42,
+ 0x19, 0xc5, 0xdd, 0xc1, 0x0f, 0x42, 0x11, 0xcb, 0x92, 0x28, 0x0f, 0x44,
+ 0x11, 0xc6, 0xd3, 0x55, 0x0f, 0x44, 0x49, 0xc6, 0xd2, 0x89, 0x0f, 0x44,
+ 0xb9, 0xc6, 0xd0, 0x85, 0x0f, 0x44, 0xd9, 0xc4, 0xdf, 0xf7, 0x0f, 0x45,
+ 0xc9, 0xc4, 0xe3, 0x3f, 0x0f, 0x45, 0xd0, 0xc5, 0xd7, 0x68, 0x0f, 0x40,
+ 0x59, 0xc6, 0xd3, 0x43, 0x0f, 0x43, 0x81, 0xc4, 0xd4, 0xe8, 0x0f, 0x42,
+ 0xc9, 0xc6, 0xd0, 0xdf, 0x0f, 0x43, 0xe9, 0xc7, 0xc7, 0x90, 0x0f, 0x43,
+ 0xf9, 0xc5, 0xd4, 0xc5, 0x0f, 0x44, 0xa9, 0xc5, 0xd6, 0x4b, 0x0f, 0x45,
+ 0x31, 0xc5, 0xd8, 0xd5, 0x0f, 0x45, 0x71, 0xc5, 0xde, 0x20, 0x0f, 0x45,
+ 0x79, 0xc5, 0xd6, 0x69, 0x0f, 0x45, 0x80, 0xc3, 0x57, 0x39, 0x0f, 0x46,
+ 0x81, 0x10, 0x42, 0xf1, 0x5b, 0xcb, 0x71, 0xb1, 0x08, 0x4f, 0xf9, 0xcd,
+ 0x7c, 0x9b, 0x08, 0x4f, 0xc1, 0xcb, 0x8d, 0xf2, 0x08, 0x4f, 0xb8, 0xcd,
+ 0x7d, 0x85, 0x08, 0x4f, 0xe9, 0xce, 0x71, 0xae, 0x08, 0x4d, 0xe0, 0xcd,
+ 0x71, 0xaf, 0x08, 0x4f, 0xe1, 0xcb, 0x91, 0x83, 0x08, 0x4f, 0xd8, 0xcc,
+ 0x8c, 0x79, 0x08, 0x4f, 0xd1, 0xcc, 0x86, 0xa9, 0x08, 0x4f, 0xc8, 0xc7,
+ 0x71, 0xb4, 0x08, 0x4f, 0xb1, 0xc4, 0x01, 0xce, 0x08, 0x4d, 0xe8, 0x00,
+ 0xc2, 0xf1, 0x65, 0xcb, 0x92, 0xb7, 0x08, 0x4f, 0x60, 0x00, 0xc2, 0xf1,
+ 0x74, 0xca, 0x92, 0xb8, 0x08, 0x4f, 0x58, 0xc4, 0x18, 0x10, 0x08, 0x4e,
+ 0x33, 0x02, 0xf1, 0x83, 0xc2, 0x22, 0xcc, 0x08, 0x4e, 0x2a, 0x02, 0xf1,
+ 0x90, 0x0b, 0xc2, 0xf1, 0x9d, 0x11, 0x42, 0xf1, 0xaf, 0x0a, 0xc2, 0xf1,
+ 0xc1, 0x19, 0xc2, 0xf1, 0xd3, 0xc2, 0x00, 0xc4, 0x08, 0x4e, 0x4a, 0x02,
+ 0xf1, 0xe3, 0x00, 0x42, 0xf1, 0xe9, 0xc3, 0xe5, 0xb1, 0x08, 0x4d, 0xf9,
+ 0xc3, 0x64, 0x84, 0x08, 0x4d, 0xf0, 0xc2, 0x0e, 0x9a, 0x08, 0x4d, 0xb9,
+ 0x16, 0xc2, 0xf1, 0xf8, 0xc2, 0x0f, 0x9a, 0x08, 0x4d, 0x99, 0x0d, 0xc2,
+ 0xf2, 0x04, 0x15, 0xc2, 0xf2, 0x0e, 0x83, 0x08, 0x4d, 0x03, 0x02, 0xf2,
+ 0x16, 0xc3, 0xe6, 0x71, 0x08, 0x4d, 0x71, 0xc2, 0x00, 0xdb, 0x08, 0x4d,
+ 0x61, 0xc2, 0x00, 0x39, 0x08, 0x4d, 0x59, 0x10, 0xc2, 0xf2, 0x1c, 0xc2,
+ 0x01, 0xc3, 0x08, 0x4d, 0x41, 0xc2, 0x00, 0xb0, 0x08, 0x4d, 0x39, 0xc2,
+ 0x01, 0x5d, 0x08, 0x4d, 0x31, 0xc2, 0x01, 0x4a, 0x08, 0x4d, 0x29, 0xc2,
+ 0x19, 0x2c, 0x08, 0x4d, 0x21, 0x91, 0x08, 0x4d, 0x19, 0x8b, 0x08, 0x4d,
+ 0x11, 0x87, 0x08, 0x4d, 0x08, 0x91, 0x08, 0x4c, 0xe1, 0x87, 0x08, 0x4c,
+ 0xd3, 0x02, 0xf2, 0x24, 0x83, 0x08, 0x4c, 0xc2, 0x02, 0xf2, 0x2a, 0x83,
+ 0x08, 0x4c, 0xb1, 0xc2, 0x00, 0xd0, 0x08, 0x4c, 0x88, 0x87, 0x08, 0x4c,
+ 0xa9, 0x83, 0x08, 0x4c, 0x9a, 0x02, 0xf2, 0x30, 0xc2, 0xe5, 0xfd, 0x08,
+ 0x4c, 0x38, 0x83, 0x08, 0x4c, 0x53, 0x02, 0xf2, 0x36, 0x87, 0x08, 0x4c,
+ 0x62, 0x02, 0xf2, 0x3c, 0xc2, 0xe5, 0xfd, 0x08, 0x4c, 0x78, 0x60, 0x03,
+ 0x27, 0x42, 0xf2, 0x42, 0x97, 0x05, 0x57, 0x79, 0x8b, 0x05, 0x57, 0x68,
+ 0xc7, 0xc9, 0xe3, 0x05, 0x5f, 0x08, 0xc7, 0xc9, 0xe3, 0x05, 0x5e, 0xf8,
+ 0xc7, 0xc9, 0xe3, 0x05, 0x5f, 0x00, 0xc2, 0x00, 0xd0, 0x05, 0x57, 0x29,
+ 0x83, 0x05, 0x57, 0x20, 0xc7, 0xc9, 0xe3, 0x05, 0x5e, 0xf0, 0xc7, 0xc9,
+ 0xe3, 0x05, 0x5e, 0xd8, 0xc2, 0x00, 0xd0, 0x05, 0x57, 0x39, 0x83, 0x05,
+ 0x57, 0x30, 0xcf, 0x01, 0x38, 0x08, 0xb3, 0x59, 0xc8, 0x00, 0xbf, 0x08,
+ 0xb3, 0x50, 0xc4, 0x18, 0x10, 0x00, 0xc0, 0xb9, 0xc2, 0x22, 0xcc, 0x00,
+ 0xc0, 0xb0, 0xc3, 0x0d, 0x14, 0x00, 0xc0, 0xa9, 0xc3, 0x09, 0x9e, 0x00,
+ 0xc0, 0xa0, 0xc4, 0x02, 0xde, 0x00, 0xc0, 0x99, 0xc2, 0x02, 0xa0, 0x00,
+ 0xc0, 0x90, 0x49, 0xb1, 0x70, 0xc2, 0xf2, 0x5a, 0xc3, 0xb4, 0xa6, 0x00,
+ 0xc3, 0xb9, 0xc2, 0x00, 0x87, 0x00, 0xc3, 0xb1, 0xc2, 0x00, 0x39, 0x00,
+ 0xc3, 0xa9, 0xc2, 0x02, 0x2b, 0x00, 0xc3, 0xa1, 0x8b, 0x00, 0xc3, 0x98,
+ 0x06, 0xc2, 0xf2, 0x8e, 0x45, 0x01, 0xce, 0xc2, 0xf2, 0x9b, 0x83, 0x00,
+ 0xc4, 0x3b, 0x02, 0xf2, 0xa5, 0x1c, 0xc2, 0xf2, 0xaf, 0xc3, 0x1d, 0x35,
+ 0x00, 0xc4, 0xa1, 0x12, 0xc2, 0xf2, 0xb9, 0x16, 0xc2, 0xf2, 0xc3, 0x10,
+ 0xc2, 0xf2, 0xd1, 0xc2, 0x00, 0x64, 0x00, 0xc4, 0x59, 0xc2, 0x02, 0x2b,
+ 0x00, 0xc4, 0x49, 0x8b, 0x00, 0xc4, 0x43, 0x02, 0xf2, 0xdd, 0xc6, 0x8c,
+ 0xa2, 0x00, 0xc4, 0x29, 0xc7, 0x62, 0x18, 0x00, 0xc4, 0x19, 0xcb, 0x96,
+ 0x32, 0x00, 0xc4, 0x08, 0x03, 0xc2, 0xf2, 0xe3, 0x06, 0xc2, 0xf2, 0xef,
+ 0xc3, 0x27, 0x57, 0x00, 0xc2, 0xd9, 0x0c, 0xc2, 0xf2, 0xf9, 0xc3, 0x39,
+ 0x6e, 0x00, 0xc2, 0xc9, 0xc2, 0x01, 0x30, 0x00, 0xc2, 0x73, 0x02, 0xf3,
+ 0x03, 0xc2, 0x02, 0x2b, 0x00, 0xc2, 0xb9, 0xc2, 0x01, 0x4a, 0x00, 0xc2,
+ 0xb1, 0xc2, 0x19, 0x2c, 0x00, 0xc2, 0xa9, 0x16, 0xc2, 0xf3, 0x07, 0xc3,
+ 0x1c, 0x63, 0x00, 0xc2, 0x91, 0xc2, 0x01, 0xc3, 0x00, 0xc2, 0x79, 0xc2,
+ 0x0f, 0x9a, 0x00, 0xc2, 0x69, 0xc2, 0x00, 0xb0, 0x00, 0xc2, 0x61, 0xc2,
+ 0x01, 0x5d, 0x00, 0xc2, 0x59, 0x97, 0x00, 0xc2, 0x3b, 0x02, 0xf3, 0x11,
+ 0x91, 0x00, 0xc2, 0x33, 0x02, 0xf3, 0x15, 0x8b, 0x00, 0xc2, 0x29, 0x87,
+ 0x00, 0xc2, 0x21, 0xcf, 0x66, 0x2a, 0x00, 0xc2, 0x18, 0xce, 0x17, 0xd4,
+ 0x00, 0xc3, 0xc0, 0x1c, 0xc2, 0xf3, 0x19, 0xc3, 0x1c, 0x63, 0x00, 0xc3,
+ 0x89, 0xc3, 0x47, 0xd9, 0x00, 0xc3, 0x81, 0x16, 0xc2, 0xf3, 0x23, 0xc2,
+ 0x00, 0xd0, 0x00, 0xc3, 0x2b, 0x02, 0xf3, 0x2d, 0xc2, 0x01, 0x30, 0x00,
+ 0xc3, 0x23, 0x02, 0xf3, 0x31, 0xc2, 0x00, 0x87, 0x00, 0xc3, 0x59, 0xc2,
+ 0x25, 0x3b, 0x00, 0xc3, 0x51, 0xc2, 0x0e, 0x9a, 0x00, 0xc3, 0x49, 0xc3,
+ 0x01, 0xe2, 0x00, 0xc3, 0x39, 0xc2, 0x00, 0xb0, 0x00, 0xc3, 0x31, 0xc2,
+ 0x02, 0x2b, 0x00, 0xc3, 0x19, 0xc3, 0x01, 0x95, 0x00, 0xc3, 0x11, 0x97,
+ 0x00, 0xc3, 0x0b, 0x02, 0xf3, 0x35, 0x8b, 0x00, 0xc2, 0xf3, 0x02, 0xf3,
+ 0x39, 0x87, 0x00, 0xc2, 0xe8, 0xc4, 0x02, 0xde, 0x00, 0xc0, 0x69, 0xc2,
+ 0x02, 0xa0, 0x00, 0xc0, 0x60, 0xc4, 0x26, 0x78, 0x08, 0xb2, 0xc9, 0xc5,
+ 0x06, 0xdb, 0x08, 0xb2, 0xc1, 0x15, 0xc2, 0xf3, 0x3d, 0x08, 0xc2, 0xf3,
+ 0x49, 0x16, 0xc2, 0xf3, 0x55, 0xc3, 0x05, 0x14, 0x08, 0xb2, 0x89, 0xc4,
+ 0x15, 0xe7, 0x08, 0xb2, 0x80, 0xca, 0xa0, 0xee, 0x08, 0xb2, 0x01, 0xc7,
+ 0x14, 0x39, 0x08, 0xb1, 0xe8, 0xc4, 0x1e, 0x97, 0x08, 0xb1, 0xf9, 0xc5,
+ 0x40, 0xe7, 0x08, 0xb1, 0xf0, 0x97, 0x08, 0xb1, 0xe1, 0x8b, 0x08, 0xb1,
+ 0xd1, 0x83, 0x08, 0xb1, 0x80, 0x8e, 0x08, 0xb1, 0xbb, 0x02, 0xf3, 0x61,
+ 0x94, 0x08, 0xb1, 0xaa, 0x02, 0xf3, 0x65, 0x97, 0x08, 0xb1, 0xa0, 0x8b,
+ 0x08, 0xb1, 0x90, 0xc2, 0x00, 0xdb, 0x08, 0xb1, 0x79, 0x83, 0x08, 0xb1,
+ 0x48, 0x83, 0x08, 0xb1, 0x69, 0xc2, 0x0d, 0xf6, 0x08, 0xb1, 0x61, 0xc2,
+ 0x00, 0xd0, 0x08, 0xb1, 0x58, 0x83, 0x08, 0xb1, 0x51, 0x47, 0xb2, 0x2e,
+ 0x42, 0xf3, 0x69, 0xc2, 0x00, 0xd0, 0x08, 0xb1, 0x29, 0x83, 0x08, 0xb1,
+ 0x20, 0xc2, 0x00, 0xd0, 0x08, 0xb1, 0x19, 0x83, 0x08, 0xb1, 0x10, 0x83,
+ 0x08, 0xb1, 0x09, 0xc2, 0x00, 0xc1, 0x08, 0xb0, 0xe1, 0xc2, 0x19, 0x2c,
+ 0x08, 0xb0, 0xb9, 0xc2, 0x01, 0x30, 0x08, 0xb0, 0x90, 0xc2, 0x00, 0xd0,
+ 0x08, 0xb1, 0x01, 0x83, 0x08, 0xb0, 0xf9, 0x06, 0x42, 0xf3, 0x77, 0xc2,
+ 0x00, 0xd0, 0x08, 0xb0, 0xf1, 0x83, 0x08, 0xb0, 0xe9, 0x16, 0x42, 0xf3,
+ 0x81, 0xc2, 0x00, 0xd0, 0x08, 0xb0, 0xb1, 0x83, 0x08, 0xb0, 0xa8, 0xc2,
+ 0x00, 0xd0, 0x08, 0xb0, 0xa1, 0x83, 0x08, 0xb0, 0x98, 0xc2, 0x00, 0xd0,
+ 0x08, 0xb0, 0x89, 0x83, 0x08, 0xb0, 0x80, 0xc2, 0x00, 0xd0, 0x08, 0xb0,
+ 0x79, 0x83, 0x08, 0xb0, 0x70, 0x97, 0x08, 0xb0, 0x69, 0x8b, 0x08, 0xb0,
+ 0x59, 0x83, 0x08, 0xb0, 0x08, 0x97, 0x08, 0xb0, 0x28, 0x8b, 0x08, 0xb0,
+ 0x18, 0x45, 0x03, 0x14, 0xc2, 0xf3, 0x8b, 0x4b, 0x07, 0x2a, 0xc2, 0xf4,
+ 0x2e, 0x4a, 0x9f, 0x5e, 0xc2, 0xf4, 0x3a, 0x0a, 0x42, 0xf4, 0x46, 0x48,
+ 0xba, 0x9a, 0xc2, 0xf4, 0x52, 0x47, 0x0b, 0x18, 0xc2, 0xf4, 0x64, 0x4d,
+ 0x77, 0x1f, 0xc2, 0xf4, 0xcb, 0xd0, 0x08, 0xf7, 0x00, 0x16, 0x31, 0x47,
+ 0x5e, 0xa8, 0xc2, 0xf4, 0xd7, 0xcb, 0x98, 0x21, 0x00, 0x16, 0xf9, 0xc4,
+ 0x0d, 0xe4, 0x05, 0x3c, 0x48, 0x45, 0x00, 0x2d, 0xc2, 0xf4, 0xe3, 0x07,
+ 0xc2, 0xf4, 0xf5, 0xca, 0x9f, 0xb8, 0x00, 0x16, 0xf1, 0x46, 0x0c, 0x27,
+ 0x42, 0xf4, 0xff, 0x44, 0x00, 0x4a, 0xc2, 0xf5, 0x1d, 0xcc, 0x79, 0x0e,
+ 0x08, 0x3d, 0xb9, 0x42, 0x00, 0x27, 0x42, 0xf5, 0x2f, 0xcb, 0x23, 0x34,
+ 0x00, 0x16, 0x03, 0x02, 0xf5, 0x39, 0xcb, 0x1f, 0x0d, 0x00, 0x16, 0x59,
+ 0xcb, 0x8f, 0x10, 0x00, 0x87, 0xe0, 0xcd, 0x80, 0x0f, 0x08, 0x3d, 0xa9,
+ 0x45, 0x3f, 0x0e, 0x42, 0xf5, 0x3f, 0xcb, 0x83, 0xe6, 0x08, 0x3d, 0xb1,
+ 0x11, 0x42, 0xf5, 0x4b, 0xcd, 0x7c, 0xdc, 0x08, 0x3d, 0xc1, 0xc9, 0x2d,
+ 0x85, 0x00, 0x15, 0xe1, 0xcb, 0x83, 0x0e, 0x00, 0x16, 0x50, 0xc4, 0x18,
+ 0x26, 0x00, 0x15, 0xc9, 0xc8, 0x60, 0xf4, 0x00, 0x16, 0xb0, 0xcb, 0x52,
+ 0x55, 0x00, 0x15, 0xd9, 0xcf, 0x33, 0x1a, 0x00, 0x16, 0x80, 0x42, 0x00,
+ 0x7f, 0xc2, 0xf5, 0x5d, 0xca, 0xa4, 0xea, 0x00, 0x17, 0x69, 0x95, 0x05,
+ 0x3b, 0x80, 0xcc, 0x36, 0x87, 0x00, 0x16, 0x41, 0xc6, 0xc1, 0x63, 0x00,
+ 0x17, 0x60, 0xc5, 0x60, 0xb2, 0x00, 0x16, 0x49, 0x0b, 0x42, 0xf5, 0x69,
+ 0x45, 0xd7, 0xc7, 0xc2, 0xf5, 0x73, 0x43, 0x02, 0x9c, 0x42, 0xf5, 0x7f,
+ 0x44, 0x08, 0xcc, 0xc2, 0xf5, 0x8b, 0xd4, 0x33, 0x15, 0x00, 0x16, 0x88,
+ 0xd6, 0x2f, 0x30, 0x00, 0x17, 0x51, 0xd7, 0x2b, 0x51, 0x00, 0x17, 0x58,
+ 0xc4, 0x38, 0x2c, 0x0e, 0xb7, 0x20, 0xc2, 0x01, 0x6f, 0x0e, 0xb7, 0x41,
+ 0xc6, 0x10, 0x3f, 0x0e, 0xb7, 0x30, 0xc4, 0xdb, 0x4c, 0x0e, 0xb7, 0x28,
+ 0xc2, 0x00, 0x0a, 0x0e, 0xb7, 0xc0, 0xc3, 0x04, 0x87, 0x0e, 0xb7, 0x18,
+ 0xc4, 0xde, 0x3f, 0x0e, 0xb7, 0x10, 0x0f, 0x42, 0xf5, 0x9d, 0xc2, 0x00,
+ 0xba, 0x0e, 0xb7, 0xc9, 0xc2, 0x00, 0x0a, 0x0e, 0xb7, 0xb9, 0x8b, 0x0e,
+ 0xb7, 0x88, 0xc6, 0x10, 0x3f, 0x0e, 0xb7, 0xb0, 0xc2, 0x20, 0xec, 0x0e,
+ 0xb7, 0xa9, 0xc4, 0x89, 0xfe, 0x0e, 0xb7, 0x4a, 0x02, 0xf5, 0xa9, 0xc4,
+ 0x1a, 0x73, 0x0e, 0xb7, 0xa0, 0xc2, 0x01, 0x23, 0x0e, 0xb7, 0x90, 0x8b,
+ 0x0e, 0xb7, 0x78, 0x97, 0x0e, 0xb7, 0x70, 0x97, 0x0e, 0xb7, 0x68, 0xc4,
+ 0xdd, 0x9a, 0x0e, 0xb7, 0x60, 0xc4, 0x8b, 0x66, 0x0e, 0xb7, 0x58, 0xc3,
+ 0x01, 0xbb, 0x0e, 0xb7, 0x50, 0xc3, 0x04, 0x87, 0x0e, 0xb7, 0x38, 0x0f,
+ 0x42, 0xf5, 0xaf, 0xc2, 0x00, 0xba, 0x0e, 0xb8, 0x99, 0xc2, 0x00, 0x0a,
+ 0x0e, 0xb8, 0x89, 0x8b, 0x0e, 0xb8, 0x58, 0xc2, 0x00, 0x0a, 0x0e, 0xb8,
+ 0x90, 0xc6, 0x10, 0x3f, 0x0e, 0xb8, 0x80, 0xc2, 0x20, 0xec, 0x0e, 0xb8,
+ 0x79, 0xc4, 0x89, 0xfe, 0x0e, 0xb8, 0x18, 0xc4, 0x1a, 0x73, 0x0e, 0xb8,
+ 0x70, 0xca, 0x91, 0x2c, 0x0e, 0xb8, 0x68, 0xc2, 0x01, 0x23, 0x0e, 0xb8,
+ 0x60, 0x8b, 0x0e, 0xb8, 0x48, 0x97, 0x0e, 0xb8, 0x40, 0x97, 0x0e, 0xb8,
+ 0x38, 0xc4, 0xdd, 0x9a, 0x0e, 0xb8, 0x30, 0xc4, 0x8b, 0x66, 0x0e, 0xb8,
+ 0x28, 0xc3, 0x01, 0xbb, 0x0e, 0xb8, 0x20, 0xc2, 0x01, 0x6f, 0x0e, 0xb8,
+ 0x11, 0xc6, 0x10, 0x3f, 0x0e, 0xb8, 0x00, 0xc3, 0x04, 0x87, 0x0e, 0xb8,
+ 0x08, 0xc4, 0xdb, 0x4c, 0x0e, 0xb7, 0xf9, 0x47, 0x3b, 0xc4, 0x42, 0xf5,
+ 0xbb, 0xc4, 0x38, 0x2c, 0x0e, 0xb7, 0xf0, 0xc3, 0x04, 0x87, 0x0e, 0xb7,
+ 0xe8, 0xc4, 0xde, 0x3f, 0x0e, 0xb7, 0xe0, 0x9c, 0x0e, 0xa1, 0x9b, 0x02,
+ 0xf5, 0xc3, 0x9b, 0x0e, 0xa1, 0x91, 0x9a, 0x0e, 0xa1, 0x8b, 0x02, 0xf5,
+ 0xc9, 0x99, 0x0e, 0xa1, 0x81, 0x98, 0x0e, 0xa1, 0x79, 0x97, 0x0e, 0xa1,
+ 0x73, 0x02, 0xf5, 0xcd, 0x86, 0x0e, 0xa0, 0xeb, 0x02, 0xf5, 0xd3, 0x91,
+ 0x0e, 0xa1, 0x43, 0x02, 0xf5, 0xdf, 0x92, 0x0e, 0xa1, 0x4b, 0x02, 0xf5,
+ 0xe3, 0x85, 0x0e, 0xa0, 0xe3, 0x02, 0xf5, 0xf3, 0x96, 0x0e, 0xa1, 0x6b,
+ 0x02, 0xf5, 0xf9, 0x95, 0x0e, 0xa1, 0x63, 0x02, 0xf6, 0x05, 0x88, 0x0e,
+ 0xa0, 0xfb, 0x02, 0xf6, 0x0b, 0x94, 0x0e, 0xa1, 0x5b, 0x02, 0xf6, 0x11,
+ 0x90, 0x0e, 0xa1, 0x3b, 0x02, 0xf6, 0x17, 0x8f, 0x0e, 0xa1, 0x33, 0x02,
+ 0xf6, 0x1b, 0x8e, 0x0e, 0xa1, 0x2b, 0x02, 0xf6, 0x1f, 0x8d, 0x0e, 0xa1,
+ 0x23, 0x02, 0xf6, 0x25, 0x8b, 0x0e, 0xa1, 0x13, 0x02, 0xf6, 0x2b, 0x87,
+ 0x0e, 0xa0, 0xf3, 0x02, 0xf6, 0x31, 0x89, 0x0e, 0xa1, 0x03, 0x02, 0xf6,
+ 0x3d, 0x84, 0x0e, 0xa0, 0xdb, 0x02, 0xf6, 0x43, 0x83, 0x0e, 0xa0, 0xd3,
+ 0x02, 0xf6, 0x49, 0x93, 0x0e, 0xa1, 0x51, 0x8c, 0x0e, 0xa1, 0x19, 0x8a,
+ 0x0e, 0xa1, 0x08, 0x46, 0x03, 0x13, 0xc2, 0xf6, 0x4f, 0x48, 0x0b, 0x17,
+ 0x42, 0xf6, 0xb7, 0xc4, 0x18, 0x10, 0x0e, 0xbe, 0xa9, 0xc2, 0x22, 0xcc,
+ 0x0e, 0xbe, 0xa0, 0xc3, 0x0d, 0x14, 0x0e, 0xbe, 0x99, 0xc3, 0x09, 0x9e,
+ 0x0e, 0xbe, 0x90, 0xc4, 0x02, 0xde, 0x0e, 0xbe, 0x89, 0xc2, 0x02, 0xa0,
+ 0x0e, 0xbe, 0x80, 0xc6, 0x51, 0x50, 0x0e, 0xbe, 0x51, 0xc4, 0xdb, 0x4c,
+ 0x0e, 0xb5, 0x58, 0x0f, 0x42, 0xf7, 0x1f, 0xc2, 0x00, 0xba, 0x0e, 0xb5,
+ 0xf9, 0xc2, 0x00, 0x0a, 0x0e, 0xb5, 0xe9, 0x8b, 0x0e, 0xb5, 0xb8, 0xc2,
+ 0x00, 0x0a, 0x0e, 0xb5, 0xf0, 0xc6, 0x10, 0x3f, 0x0e, 0xb5, 0xe0, 0xc2,
+ 0x20, 0xec, 0x0e, 0xb5, 0xd9, 0xc4, 0x89, 0xfe, 0x0e, 0xb5, 0x7a, 0x02,
+ 0xf7, 0x2b, 0xc4, 0x1a, 0x73, 0x0e, 0xb5, 0xd0, 0xc2, 0x01, 0x23, 0x0e,
+ 0xb5, 0xc0, 0x8b, 0x0e, 0xb5, 0xa8, 0x97, 0x0e, 0xb5, 0xa0, 0x97, 0x0e,
+ 0xb5, 0x98, 0xc4, 0xdd, 0x9a, 0x0e, 0xb5, 0x90, 0xc4, 0x8b, 0x66, 0x0e,
+ 0xb5, 0x88, 0xc3, 0x01, 0xbb, 0x0e, 0xb5, 0x80, 0xc2, 0x01, 0x6f, 0x0e,
+ 0xb5, 0x71, 0xc6, 0x10, 0x3f, 0x0e, 0xb5, 0x60, 0xc3, 0x04, 0x87, 0x0e,
+ 0xb5, 0x68, 0xc4, 0x38, 0x2c, 0x0e, 0xb5, 0x50, 0xc3, 0x04, 0x87, 0x0e,
+ 0xb5, 0x48, 0xc4, 0xde, 0x3f, 0x0e, 0xb5, 0x40, 0xc8, 0x9c, 0x0e, 0x0e,
+ 0xba, 0xa9, 0xc9, 0xaa, 0x9e, 0x0e, 0xba, 0x99, 0xd3, 0x43, 0x00, 0x0e,
+ 0xba, 0x78, 0x91, 0x0e, 0xa4, 0x83, 0x02, 0xf7, 0x31, 0x92, 0x0e, 0xa4,
+ 0x8b, 0x02, 0xf7, 0x35, 0x85, 0x0e, 0xa4, 0x23, 0x02, 0xf7, 0x45, 0x97,
+ 0x0e, 0xa4, 0xb3, 0x02, 0xf7, 0x4b, 0x96, 0x0e, 0xa4, 0xab, 0x02, 0xf7,
+ 0x51, 0x95, 0x0e, 0xa4, 0xa3, 0x02, 0xf7, 0x5d, 0x88, 0x0e, 0xa4, 0x3b,
+ 0x02, 0xf7, 0x63, 0x94, 0x0e, 0xa4, 0x9b, 0x02, 0xf7, 0x69, 0x9a, 0x0e,
+ 0xa4, 0xcb, 0x02, 0xf7, 0x6f, 0x90, 0x0e, 0xa4, 0x7b, 0x02, 0xf7, 0x73,
+ 0x8f, 0x0e, 0xa4, 0x73, 0x02, 0xf7, 0x77, 0x8e, 0x0e, 0xa4, 0x6b, 0x02,
+ 0xf7, 0x7b, 0x8d, 0x0e, 0xa4, 0x63, 0x02, 0xf7, 0x81, 0x8b, 0x0e, 0xa4,
+ 0x53, 0x02, 0xf7, 0x87, 0x87, 0x0e, 0xa4, 0x33, 0x02, 0xf7, 0x8d, 0x9c,
+ 0x0e, 0xa4, 0xdb, 0x02, 0xf7, 0x99, 0x86, 0x0e, 0xa4, 0x2b, 0x02, 0xf7,
+ 0x9f, 0x89, 0x0e, 0xa4, 0x43, 0x02, 0xf7, 0xa5, 0x84, 0x0e, 0xa4, 0x1b,
+ 0x02, 0xf7, 0xab, 0x83, 0x0e, 0xa4, 0x13, 0x02, 0xf7, 0xb1, 0x9b, 0x0e,
+ 0xa4, 0xd1, 0x99, 0x0e, 0xa4, 0xc1, 0x98, 0x0e, 0xa4, 0xb9, 0x93, 0x0e,
+ 0xa4, 0x91, 0x8c, 0x0e, 0xa4, 0x59, 0x8a, 0x0e, 0xa4, 0x48, 0x91, 0x0e,
+ 0xa3, 0xb3, 0x02, 0xf7, 0xb7, 0x92, 0x0e, 0xa3, 0xbb, 0x02, 0xf7, 0xbb,
+ 0x85, 0x0e, 0xa3, 0x53, 0x02, 0xf7, 0xcb, 0x97, 0x0e, 0xa3, 0xe3, 0x02,
+ 0xf7, 0xd1, 0x96, 0x0e, 0xa3, 0xdb, 0x02, 0xf7, 0xd7, 0x95, 0x0e, 0xa3,
+ 0xd3, 0x02, 0xf7, 0xe6, 0x94, 0x0e, 0xa3, 0xcb, 0x02, 0xf7, 0xec, 0x9a,
+ 0x0e, 0xa3, 0xfb, 0x02, 0xf7, 0xf2, 0x90, 0x0e, 0xa3, 0xab, 0x02, 0xf7,
+ 0xf6, 0x8f, 0x0e, 0xa3, 0xa3, 0x02, 0xf7, 0xfa, 0x8e, 0x0e, 0xa3, 0x9b,
+ 0x02, 0xf7, 0xfe, 0x8d, 0x0e, 0xa3, 0x93, 0x02, 0xf8, 0x04, 0x8b, 0x0e,
+ 0xa3, 0x83, 0x02, 0xf8, 0x0a, 0x87, 0x0e, 0xa3, 0x63, 0x02, 0xf8, 0x10,
+ 0x9c, 0x0e, 0xa4, 0x0b, 0x02, 0xf8, 0x1c, 0x86, 0x0e, 0xa3, 0x5b, 0x02,
+ 0xf8, 0x22, 0x89, 0x0e, 0xa3, 0x73, 0x02, 0xf8, 0x28, 0x84, 0x0e, 0xa3,
+ 0x4b, 0x02, 0xf8, 0x2e, 0x83, 0x0e, 0xa3, 0x43, 0x02, 0xf8, 0x34, 0x9b,
+ 0x0e, 0xa4, 0x01, 0x99, 0x0e, 0xa3, 0xf1, 0x98, 0x0e, 0xa3, 0xe9, 0x93,
+ 0x0e, 0xa3, 0xc1, 0x8c, 0x0e, 0xa3, 0x89, 0x8a, 0x0e, 0xa3, 0x79, 0x88,
+ 0x0e, 0xa3, 0x68, 0x9c, 0x0e, 0xac, 0xf9, 0x9b, 0x0e, 0xac, 0xf1, 0x9a,
+ 0x0e, 0xac, 0xe9, 0x99, 0x0e, 0xac, 0xe1, 0x98, 0x0e, 0xac, 0xd9, 0x97,
+ 0x0e, 0xac, 0xd1, 0x96, 0x0e, 0xac, 0xc9, 0x95, 0x0e, 0xac, 0xc1, 0x94,
+ 0x0e, 0xac, 0xb9, 0x93, 0x0e, 0xac, 0xb1, 0x92, 0x0e, 0xac, 0xa9, 0x91,
+ 0x0e, 0xac, 0xa1, 0x90, 0x0e, 0xac, 0x99, 0x8f, 0x0e, 0xac, 0x91, 0x8e,
+ 0x0e, 0xac, 0x89, 0x8d, 0x0e, 0xac, 0x81, 0x8c, 0x0e, 0xac, 0x79, 0x8b,
+ 0x0e, 0xac, 0x71, 0x8a, 0x0e, 0xac, 0x69, 0x89, 0x0e, 0xac, 0x61, 0x88,
+ 0x0e, 0xac, 0x59, 0x87, 0x0e, 0xac, 0x51, 0x86, 0x0e, 0xac, 0x49, 0x85,
+ 0x0e, 0xac, 0x41, 0x84, 0x0e, 0xac, 0x39, 0x83, 0x0e, 0xac, 0x30, 0x9c,
+ 0x0e, 0xac, 0x29, 0x9b, 0x0e, 0xac, 0x21, 0x9a, 0x0e, 0xac, 0x19, 0x99,
+ 0x0e, 0xac, 0x11, 0x98, 0x0e, 0xac, 0x09, 0x97, 0x0e, 0xac, 0x01, 0x96,
+ 0x0e, 0xab, 0xf9, 0x95, 0x0e, 0xab, 0xf1, 0x94, 0x0e, 0xab, 0xe9, 0x93,
+ 0x0e, 0xab, 0xe1, 0x92, 0x0e, 0xab, 0xd9, 0x91, 0x0e, 0xab, 0xd1, 0x90,
+ 0x0e, 0xab, 0xc9, 0x8f, 0x0e, 0xab, 0xc1, 0x8e, 0x0e, 0xab, 0xb9, 0x8d,
+ 0x0e, 0xab, 0xb1, 0x8c, 0x0e, 0xab, 0xa9, 0x8b, 0x0e, 0xab, 0xa1, 0x8a,
+ 0x0e, 0xab, 0x99, 0x89, 0x0e, 0xab, 0x91, 0x88, 0x0e, 0xab, 0x89, 0x87,
+ 0x0e, 0xab, 0x81, 0x86, 0x0e, 0xab, 0x79, 0x85, 0x0e, 0xab, 0x71, 0x84,
+ 0x0e, 0xab, 0x69, 0x83, 0x0e, 0xab, 0x60, 0xc4, 0x18, 0x10, 0x0e, 0xbf,
+ 0xe9, 0xc2, 0x22, 0xcc, 0x0e, 0xbf, 0xe0, 0xc3, 0x0d, 0x14, 0x0e, 0xbf,
+ 0xd9, 0xc3, 0x09, 0x9e, 0x0e, 0xbf, 0xd0, 0xc4, 0x02, 0xde, 0x0e, 0xbf,
+ 0xc9, 0xc2, 0x02, 0xa0, 0x0e, 0xbf, 0xc0, 0x46, 0x09, 0x97, 0xc2, 0xf8,
+ 0x3a, 0x47, 0xc7, 0x4a, 0xc2, 0xf8, 0x5e, 0x12, 0xc2, 0xf8, 0x8c, 0xca,
+ 0x9c, 0xac, 0x0e, 0xbc, 0x71, 0xcc, 0x8b, 0x65, 0x0e, 0xbc, 0x61, 0xcc,
+ 0x89, 0xfd, 0x0e, 0xbc, 0x59, 0xce, 0x10, 0x3e, 0x0e, 0xbc, 0x51, 0x46,
+ 0x03, 0x13, 0xc2, 0xf8, 0x9e, 0xc5, 0xdb, 0xf0, 0x0e, 0xbb, 0x79, 0x48,
+ 0x0b, 0x17, 0x42, 0xf9, 0x42, 0xc4, 0x26, 0x78, 0x0e, 0xbf, 0x59, 0xc5,
+ 0x06, 0xdb, 0x0e, 0xbf, 0x51, 0x15, 0xc2, 0xf9, 0xe3, 0x08, 0xc2, 0xf9,
+ 0xef, 0x16, 0xc2, 0xf9, 0xfb, 0xc3, 0x05, 0x14, 0x0e, 0xbf, 0x19, 0xc4,
+ 0x15, 0xe7, 0x0e, 0xbf, 0x10, 0x46, 0x03, 0x13, 0xc2, 0xfa, 0x07, 0x48,
+ 0x0b, 0x17, 0x42, 0xfa, 0x6f, 0x9c, 0x0e, 0xae, 0x99, 0x9b, 0x0e, 0xae,
+ 0x91, 0x9a, 0x0e, 0xae, 0x89, 0x99, 0x0e, 0xae, 0x81, 0x98, 0x0e, 0xae,
+ 0x79, 0x97, 0x0e, 0xae, 0x71, 0x96, 0x0e, 0xae, 0x69, 0x95, 0x0e, 0xae,
+ 0x61, 0x94, 0x0e, 0xae, 0x59, 0x93, 0x0e, 0xae, 0x51, 0x92, 0x0e, 0xae,
+ 0x49, 0x91, 0x0e, 0xae, 0x41, 0x90, 0x0e, 0xae, 0x39, 0x8f, 0x0e, 0xae,
+ 0x31, 0x8e, 0x0e, 0xae, 0x29, 0x8d, 0x0e, 0xae, 0x21, 0x8c, 0x0e, 0xae,
+ 0x19, 0x8b, 0x0e, 0xae, 0x11, 0x8a, 0x0e, 0xae, 0x09, 0x89, 0x0e, 0xae,
+ 0x01, 0x88, 0x0e, 0xad, 0xf9, 0x87, 0x0e, 0xad, 0xf1, 0x86, 0x0e, 0xad,
+ 0xe9, 0x85, 0x0e, 0xad, 0xe1, 0x84, 0x0e, 0xad, 0xd9, 0x83, 0x0e, 0xad,
+ 0xd0, 0x9c, 0x0e, 0xad, 0xc9, 0x9b, 0x0e, 0xad, 0xc1, 0x9a, 0x0e, 0xad,
+ 0xb9, 0x99, 0x0e, 0xad, 0xb1, 0x98, 0x0e, 0xad, 0xa9, 0x97, 0x0e, 0xad,
+ 0xa1, 0x96, 0x0e, 0xad, 0x99, 0x95, 0x0e, 0xad, 0x91, 0x94, 0x0e, 0xad,
+ 0x89, 0x93, 0x0e, 0xad, 0x81, 0x92, 0x0e, 0xad, 0x79, 0x91, 0x0e, 0xad,
+ 0x71, 0x90, 0x0e, 0xad, 0x69, 0x8f, 0x0e, 0xad, 0x61, 0x8e, 0x0e, 0xad,
+ 0x59, 0x8d, 0x0e, 0xad, 0x51, 0x8c, 0x0e, 0xad, 0x49, 0x8b, 0x0e, 0xad,
+ 0x41, 0x8a, 0x0e, 0xad, 0x39, 0x89, 0x0e, 0xad, 0x31, 0x88, 0x0e, 0xad,
+ 0x29, 0x87, 0x0e, 0xad, 0x21, 0x86, 0x0e, 0xad, 0x19, 0x85, 0x0e, 0xad,
+ 0x11, 0x84, 0x0e, 0xad, 0x09, 0x83, 0x0e, 0xad, 0x00, 0x9c, 0x0e, 0xa6,
+ 0x79, 0x9b, 0x0e, 0xa6, 0x71, 0x9a, 0x0e, 0xa6, 0x69, 0x99, 0x0e, 0xa6,
+ 0x61, 0x98, 0x0e, 0xa6, 0x59, 0x97, 0x0e, 0xa6, 0x51, 0x96, 0x0e, 0xa6,
+ 0x49, 0x95, 0x0e, 0xa6, 0x41, 0x94, 0x0e, 0xa6, 0x39, 0x93, 0x0e, 0xa6,
+ 0x31, 0x92, 0x0e, 0xa6, 0x29, 0x90, 0x0e, 0xa6, 0x19, 0x8f, 0x0e, 0xa6,
+ 0x11, 0x8e, 0x0e, 0xa6, 0x09, 0x8d, 0x0e, 0xa6, 0x01, 0x8c, 0x0e, 0xa5,
+ 0xf9, 0x8b, 0x0e, 0xa5, 0xf1, 0x8a, 0x0e, 0xa5, 0xe9, 0x88, 0x0e, 0xa5,
+ 0xd9, 0x86, 0x0e, 0xa5, 0xc9, 0x85, 0x0e, 0xa5, 0xc1, 0x84, 0x0e, 0xa5,
+ 0xb9, 0x83, 0x0e, 0xa5, 0xb0, 0x9c, 0x0e, 0xa5, 0xa9, 0x9b, 0x0e, 0xa5,
+ 0xa1, 0x9a, 0x0e, 0xa5, 0x99, 0x99, 0x0e, 0xa5, 0x91, 0x98, 0x0e, 0xa5,
+ 0x89, 0x97, 0x0e, 0xa5, 0x81, 0x96, 0x0e, 0xa5, 0x79, 0x95, 0x0e, 0xa5,
+ 0x71, 0x93, 0x0e, 0xa5, 0x61, 0x92, 0x0e, 0xa5, 0x59, 0x91, 0x0e, 0xa5,
+ 0x51, 0x90, 0x0e, 0xa5, 0x49, 0x8d, 0x0e, 0xa5, 0x31, 0x8c, 0x0e, 0xa5,
+ 0x29, 0x89, 0x0e, 0xa5, 0x11, 0x86, 0x0e, 0xa4, 0xf9, 0x85, 0x0e, 0xa4,
+ 0xf1, 0x83, 0x0e, 0xa4, 0xe0, 0xc4, 0x18, 0x10, 0x0e, 0xbe, 0xf9, 0xc2,
+ 0x22, 0xcc, 0x0e, 0xbe, 0xf0, 0xc3, 0x0d, 0x14, 0x0e, 0xbe, 0xe9, 0xc3,
+ 0x09, 0x9e, 0x0e, 0xbe, 0xe0, 0xc4, 0x02, 0xde, 0x0e, 0xbe, 0xd9, 0xc2,
+ 0x02, 0xa0, 0x0e, 0xbe, 0xd0, 0x9c, 0x0e, 0xa9, 0xb9, 0x9b, 0x0e, 0xa9,
+ 0xb1, 0x9a, 0x0e, 0xa9, 0xa9, 0x99, 0x0e, 0xa9, 0xa1, 0x98, 0x0e, 0xa9,
+ 0x99, 0x97, 0x0e, 0xa9, 0x91, 0x96, 0x0e, 0xa9, 0x89, 0x95, 0x0e, 0xa9,
+ 0x81, 0x94, 0x0e, 0xa9, 0x79, 0x93, 0x0e, 0xa9, 0x71, 0x92, 0x0e, 0xa9,
+ 0x69, 0x91, 0x0e, 0xa9, 0x61, 0x90, 0x0e, 0xa9, 0x59, 0x8f, 0x0e, 0xa9,
+ 0x51, 0x8e, 0x0e, 0xa9, 0x49, 0x8d, 0x0e, 0xa9, 0x41, 0x8c, 0x0e, 0xa9,
+ 0x39, 0x8b, 0x0e, 0xa9, 0x31, 0x8a, 0x0e, 0xa9, 0x29, 0x89, 0x0e, 0xa9,
+ 0x21, 0x88, 0x0e, 0xa9, 0x19, 0x87, 0x0e, 0xa9, 0x11, 0x86, 0x0e, 0xa9,
+ 0x09, 0x85, 0x0e, 0xa9, 0x01, 0x84, 0x0e, 0xa8, 0xf9, 0x83, 0x0e, 0xa8,
+ 0xf0, 0x9b, 0x0e, 0xa8, 0xe1, 0x9a, 0x0e, 0xa8, 0xd9, 0x99, 0x0e, 0xa8,
+ 0xd1, 0x98, 0x0e, 0xa8, 0xc9, 0x97, 0x0e, 0xa8, 0xc1, 0x96, 0x0e, 0xa8,
+ 0xb9, 0x95, 0x0e, 0xa8, 0xb1, 0x93, 0x0e, 0xa8, 0xa1, 0x92, 0x0e, 0xa8,
+ 0x99, 0x91, 0x0e, 0xa8, 0x91, 0x90, 0x0e, 0xa8, 0x89, 0x8f, 0x0e, 0xa8,
+ 0x81, 0x8e, 0x0e, 0xa8, 0x79, 0x8d, 0x0e, 0xa8, 0x71, 0x8c, 0x0e, 0xa8,
+ 0x69, 0x89, 0x0e, 0xa8, 0x51, 0x88, 0x0e, 0xa8, 0x49, 0x87, 0x0e, 0xa8,
+ 0x41, 0x86, 0x0e, 0xa8, 0x39, 0x84, 0x0e, 0xa8, 0x29, 0x83, 0x0e, 0xa8,
+ 0x20, 0xd6, 0x08, 0x88, 0x01, 0x3f, 0x69, 0xce, 0x25, 0xad, 0x01, 0x3f,
+ 0x38, 0x97, 0x08, 0xe9, 0xf9, 0x8b, 0x08, 0xe9, 0xe1, 0x83, 0x08, 0xe9,
+ 0x88, 0x97, 0x08, 0xe9, 0xa8, 0x8b, 0x08, 0xe9, 0x98, 0xc2, 0x00, 0xd0,
+ 0x08, 0xe8, 0xb9, 0x83, 0x08, 0xe8, 0xb0, 0xc2, 0x00, 0xd0, 0x08, 0xe8,
+ 0xc9, 0x83, 0x08, 0xe8, 0xc0, 0x83, 0x08, 0xe5, 0x69, 0xc2, 0x00, 0xd0,
+ 0x08, 0xe5, 0x60, 0x83, 0x08, 0xe5, 0x39, 0xc2, 0x00, 0xd0, 0x08, 0xe5,
+ 0x30, 0xc2, 0x02, 0x1c, 0x08, 0xe5, 0x21, 0x83, 0x08, 0xe4, 0xe0, 0x15,
+ 0xc2, 0xfa, 0xd7, 0xc2, 0x00, 0xd0, 0x08, 0xe4, 0xd9, 0x83, 0x08, 0xe4,
+ 0xd0, 0xc2, 0x00, 0xd0, 0x08, 0xe4, 0xf9, 0x83, 0x08, 0xe4, 0xf0, 0x83,
+ 0x08, 0xe4, 0xe9, 0xc2, 0x19, 0x2c, 0x08, 0xe4, 0xc9, 0xc2, 0x01, 0x30,
+ 0x08, 0xe4, 0xa8, 0xc2, 0x00, 0xd0, 0x08, 0xe4, 0xb9, 0x83, 0x08, 0xe4,
+ 0xb0, 0xc2, 0x00, 0xd0, 0x08, 0xe4, 0x99, 0x83, 0x08, 0xe4, 0x90, 0xc2,
+ 0x00, 0xd0, 0x08, 0xe4, 0x19, 0x83, 0x08, 0xe4, 0x10, 0xc5, 0x40, 0xe7,
+ 0x00, 0x68, 0x19, 0xc4, 0x1e, 0x97, 0x00, 0x6a, 0x68, 0x94, 0x00, 0x68,
+ 0x5b, 0x02, 0xfa, 0xe1, 0x8e, 0x00, 0x68, 0x62, 0x02, 0xfa, 0xe5, 0x83,
+ 0x00, 0x69, 0x19, 0xc2, 0x00, 0xc1, 0x00, 0x69, 0x48, 0x83, 0x00, 0x68,
+ 0xd9, 0x45, 0xd4, 0x7a, 0x42, 0xfa, 0xe9, 0x83, 0x00, 0x68, 0xf9, 0xc2,
+ 0x00, 0xd0, 0x00, 0x69, 0x01, 0xc2, 0x01, 0x6f, 0x00, 0x69, 0xd0, 0x83,
+ 0x00, 0x69, 0x09, 0xc2, 0x00, 0xd0, 0x00, 0x69, 0x10, 0x83, 0x00, 0x69,
+ 0x99, 0xc2, 0x00, 0xdb, 0x00, 0x69, 0xa0, 0x94, 0x00, 0x6a, 0x20, 0x8e,
+ 0x00, 0x6b, 0x18, 0xc7, 0xc7, 0x74, 0x00, 0x6a, 0xc9, 0xc4, 0x9c, 0x07,
+ 0x00, 0x6a, 0xf0, 0xc8, 0x1e, 0x16, 0x00, 0x6a, 0xd9, 0xc4, 0x0f, 0x1f,
+ 0x00, 0x6a, 0xe0, 0xc2, 0x02, 0xa0, 0x00, 0x6b, 0x41, 0xc4, 0x02, 0xde,
+ 0x00, 0x6b, 0x48, 0xc3, 0x09, 0x9e, 0x00, 0x6b, 0x51, 0xc3, 0x0d, 0x14,
+ 0x00, 0x6b, 0x58, 0xc2, 0x22, 0xcc, 0x00, 0x6b, 0x61, 0xc4, 0x18, 0x10,
+ 0x00, 0x6b, 0x68, 0xcb, 0x44, 0x4b, 0x08, 0x57, 0x98, 0xc3, 0x77, 0x79,
+ 0x08, 0x56, 0xe9, 0xc4, 0xdc, 0x2d, 0x08, 0x56, 0xc0, 0x96, 0x00, 0x42,
+ 0x40, 0x8a, 0x00, 0x42, 0xa1, 0x9c, 0x00, 0x42, 0x88, 0xc2, 0x0d, 0xf6,
+ 0x00, 0x42, 0x48, 0xc2, 0x00, 0x39, 0x08, 0x8b, 0x91, 0x83, 0x08, 0x8b,
+ 0x68, 0xc2, 0x00, 0xd0, 0x08, 0x8b, 0x59, 0x83, 0x08, 0x8b, 0x50, 0xc2,
+ 0x00, 0xd0, 0x08, 0x8b, 0x49, 0x83, 0x08, 0x8b, 0x40, 0x83, 0x08, 0x8b,
+ 0x39, 0xc2, 0x00, 0xc1, 0x08, 0x8b, 0x11, 0xc2, 0x19, 0x2c, 0x08, 0x8a,
+ 0xe8, 0xc2, 0x00, 0xd0, 0x08, 0x8b, 0x31, 0x83, 0x08, 0x8b, 0x29, 0x06,
+ 0x42, 0xfb, 0x09, 0xc2, 0x00, 0xd0, 0x08, 0x8b, 0x21, 0x83, 0x08, 0x8b,
+ 0x19, 0x16, 0x42, 0xfb, 0x13, 0xc2, 0x00, 0xd0, 0x08, 0x8a, 0xe1, 0x83,
+ 0x08, 0x8a, 0xd8, 0xc2, 0x00, 0xd0, 0x08, 0x8a, 0xd1, 0x83, 0x08, 0x8a,
+ 0xc8, 0xc2, 0x00, 0xd0, 0x08, 0x8a, 0xc1, 0x83, 0x08, 0x8a, 0xb8, 0xc2,
+ 0x00, 0xd0, 0x08, 0x8a, 0xb1, 0x83, 0x08, 0x8a, 0xa8, 0xc9, 0xa8, 0x4c,
+ 0x0f, 0x80, 0x71, 0xc6, 0x3a, 0x70, 0x0f, 0x81, 0x00, 0xc9, 0xa8, 0x4c,
+ 0x0f, 0x80, 0x61, 0xc6, 0x3a, 0x70, 0x0f, 0x80, 0xf0, 0xc9, 0xa8, 0x4c,
+ 0x0f, 0x80, 0x69, 0xc6, 0x3a, 0x70, 0x0f, 0x80, 0xf8, 0xc9, 0xa8, 0x4c,
+ 0x0f, 0x80, 0x79, 0xc6, 0x3a, 0x70, 0x0f, 0x81, 0x08, 0xc9, 0xa8, 0x4c,
+ 0x0f, 0x80, 0x39, 0xc6, 0x3a, 0x70, 0x0f, 0x80, 0xc8, 0xc9, 0xa8, 0x4c,
+ 0x0f, 0x80, 0x41, 0xc6, 0x3a, 0x70, 0x0f, 0x80, 0xd0, 0xc9, 0xa8, 0x4c,
+ 0x0f, 0x80, 0x49, 0xc6, 0x3a, 0x70, 0x0f, 0x80, 0xd8, 0xc9, 0xa8, 0x4c,
+ 0x0f, 0x80, 0x51, 0xc6, 0x3a, 0x70, 0x0f, 0x80, 0xe0, 0xc9, 0xa8, 0x4c,
+ 0x0f, 0x80, 0x59, 0xc6, 0x3a, 0x70, 0x0f, 0x80, 0xe8, 0x0d, 0xc2, 0xfb,
+ 0x1d, 0x15, 0xc2, 0xfb, 0x29, 0x12, 0xc2, 0xfb, 0x50, 0x16, 0xc2, 0xfb,
+ 0x6a, 0x05, 0xc2, 0xfb, 0x93, 0x18, 0xc2, 0xfb, 0xb7, 0x09, 0xc2, 0xfb,
+ 0xc3, 0x0f, 0xc2, 0xfb, 0xd6, 0x04, 0xc2, 0xfb, 0xf7, 0x0e, 0xc2, 0xfc,
+ 0x01, 0x08, 0xc2, 0xfc, 0x10, 0x06, 0xc2, 0xfc, 0x36, 0x19, 0xc2, 0xfc,
+ 0x4a, 0x42, 0x00, 0xd0, 0xc2, 0xfc, 0x56, 0x07, 0xc2, 0xfc, 0x62, 0x10,
+ 0xc2, 0xfc, 0x6e, 0x11, 0xc2, 0xfc, 0x86, 0xcd, 0x77, 0x05, 0x0e, 0x8c,
+ 0xc1, 0x9c, 0x0e, 0x8c, 0x71, 0x14, 0xc2, 0xfc, 0x98, 0x4b, 0x99, 0x8c,
+ 0xc2, 0xfc, 0xa0, 0x42, 0x00, 0xe3, 0xc2, 0xfc, 0xac, 0xca, 0x5c, 0x28,
+ 0x0e, 0x8a, 0x18, 0x00, 0x42, 0xfc, 0xb8, 0xc2, 0x00, 0xd0, 0x08, 0x94,
+ 0xa1, 0xc2, 0x0e, 0x9a, 0x08, 0x94, 0x99, 0x83, 0x08, 0x94, 0x90, 0x83,
+ 0x08, 0x94, 0x81, 0xc2, 0x00, 0xd0, 0x08, 0x94, 0x88, 0x9f, 0x00, 0x84,
+ 0x39, 0xa0, 0x00, 0x84, 0x41, 0xa2, 0x00, 0x84, 0x49, 0xa3, 0x00, 0x84,
+ 0x50, 0x45, 0x28, 0xb1, 0xc2, 0xfc, 0xc4, 0xcd, 0x7a, 0x6c, 0x00, 0x84,
+ 0x68, 0xc2, 0x00, 0x39, 0x05, 0x53, 0x99, 0xc2, 0x02, 0x1c, 0x05, 0x53,
+ 0x91, 0xc2, 0x8d, 0x8f, 0x05, 0x53, 0x89, 0xc2, 0x00, 0x87, 0x05, 0x53,
+ 0x79, 0xc3, 0x12, 0xad, 0x05, 0x53, 0x71, 0x0a, 0xc2, 0xfc, 0xcc, 0xc2,
+ 0x0d, 0xf6, 0x05, 0x53, 0x61, 0x10, 0xc2, 0xfc, 0xd6, 0x06, 0xc2, 0xfc,
+ 0xe0, 0x42, 0x02, 0x10, 0xc2, 0xfc, 0xea, 0x0c, 0xc2, 0xfc, 0xf4, 0x05,
+ 0xc2, 0xfc, 0xfe, 0xc2, 0x01, 0x30, 0x05, 0x53, 0x00, 0x04, 0xc2, 0xfd,
+ 0x08, 0x06, 0xc2, 0xfd, 0x12, 0xc3, 0x12, 0xad, 0x05, 0x4f, 0xd1, 0x10,
+ 0xc2, 0xfd, 0x20, 0x0c, 0xc2, 0xfd, 0x2c, 0x09, 0xc2, 0xfd, 0x36, 0xc2,
+ 0x00, 0x87, 0x05, 0x4f, 0x40, 0x42, 0x00, 0xbd, 0x42, 0xfd, 0x40, 0xc3,
+ 0x1c, 0x63, 0x05, 0x53, 0xf1, 0xc3, 0x01, 0xe2, 0x05, 0x53, 0xf8, 0x83,
+ 0x00, 0x82, 0xe1, 0x87, 0x00, 0x82, 0xe8, 0x90, 0x0d, 0x8b, 0x3b, 0x02,
+ 0xfd, 0x82, 0x19, 0xc2, 0xfd, 0x86, 0x83, 0x01, 0x85, 0x09, 0x8b, 0x01,
+ 0x85, 0x19, 0x97, 0x01, 0x85, 0x29, 0x87, 0x01, 0x85, 0x39, 0x91, 0x01,
+ 0x85, 0x49, 0x16, 0xc2, 0xfd, 0x96, 0x1b, 0xc2, 0xfd, 0x9e, 0x0d, 0xc2,
+ 0xfd, 0xaa, 0x15, 0xc2, 0xfd, 0xb6, 0x0a, 0xc2, 0xfd, 0xbe, 0xc2, 0x14,
+ 0x48, 0x01, 0x8f, 0xd1, 0x14, 0x42, 0xfd, 0xd2, 0x87, 0x0d, 0x80, 0x01,
+ 0xc2, 0x14, 0x68, 0x0d, 0x89, 0x11, 0x1b, 0x42, 0xfd, 0xe6, 0x45, 0xd8,
+ 0xdf, 0x42, 0xfd, 0xee, 0x83, 0x00, 0x64, 0x31, 0x8b, 0x00, 0x64, 0x81,
+ 0x97, 0x00, 0x64, 0xa0, 0x8b, 0x00, 0x64, 0x40, 0x97, 0x00, 0x64, 0x50,
+ 0x47, 0xb2, 0x2e, 0xc2, 0xfd, 0xfa, 0x83, 0x00, 0x65, 0xa8, 0x87, 0x00,
+ 0x64, 0x78, 0x91, 0x00, 0x64, 0x98, 0x83, 0x00, 0x64, 0xa9, 0xc2, 0x00,
+ 0xd0, 0x00, 0x64, 0xb0, 0x83, 0x00, 0x64, 0xb9, 0xc2, 0x00, 0xd0, 0x00,
+ 0x64, 0xc0, 0xc2, 0x01, 0x30, 0x00, 0x64, 0xc9, 0xc2, 0x19, 0x2c, 0x00,
+ 0x64, 0xf1, 0xc2, 0x00, 0xc1, 0x00, 0x65, 0x19, 0x83, 0x00, 0x65, 0x42,
+ 0x02, 0xfe, 0x08, 0x83, 0x00, 0x64, 0xd1, 0xc2, 0x00, 0xd0, 0x00, 0x64,
+ 0xd8, 0x83, 0x00, 0x64, 0xe1, 0xc2, 0x00, 0xd0, 0x00, 0x64, 0xe8, 0x16,
+ 0xc2, 0xfe, 0x0e, 0x83, 0x00, 0x65, 0x21, 0xc2, 0x00, 0xd0, 0x00, 0x65,
+ 0x28, 0x06, 0xc2, 0xfe, 0x18, 0x83, 0x00, 0x65, 0x31, 0xc2, 0x00, 0xd0,
+ 0x00, 0x65, 0x38, 0x83, 0x00, 0x65, 0x51, 0xc2, 0x00, 0xd0, 0x00, 0x65,
+ 0x58, 0x83, 0x00, 0x65, 0x61, 0xc2, 0x00, 0xd0, 0x00, 0x65, 0x68, 0x83,
+ 0x00, 0x65, 0x81, 0xc2, 0x00, 0x39, 0x00, 0x65, 0x88, 0x83, 0x00, 0x65,
+ 0x91, 0x0e, 0x42, 0xfe, 0x22, 0xc2, 0x00, 0xd0, 0x00, 0x65, 0xb1, 0xc2,
+ 0x0d, 0xf6, 0x00, 0x65, 0xb9, 0x83, 0x00, 0x65, 0xc0, 0x94, 0x00, 0x66,
+ 0x20, 0x8e, 0x00, 0x67, 0x18, 0xc4, 0xdf, 0x5f, 0x01, 0x79, 0x80, 0xc6,
+ 0x32, 0x33, 0x01, 0x78, 0x81, 0xc4, 0x76, 0x96, 0x01, 0x7c, 0x38, 0xc2,
+ 0x02, 0x6f, 0x01, 0x78, 0x09, 0x86, 0x01, 0x78, 0x39, 0xc2, 0x14, 0xda,
+ 0x01, 0x7b, 0x18, 0xc2, 0x0c, 0xa6, 0x01, 0x78, 0x49, 0x03, 0xc2, 0xfe,
+ 0x2c, 0xc2, 0x13, 0x4c, 0x01, 0x7d, 0x90, 0xc2, 0x00, 0x4e, 0x01, 0x79,
+ 0x51, 0xc2, 0x00, 0x3b, 0x01, 0x7a, 0x58, 0xc7, 0xc4, 0xe2, 0x01, 0x79,
+ 0xa8, 0x96, 0x01, 0x78, 0x13, 0x02, 0xfe, 0x36, 0xc6, 0xd1, 0xc9, 0x01,
+ 0x78, 0x61, 0xc2, 0x00, 0xbf, 0x01, 0x79, 0xf1, 0xc4, 0x17, 0xa1, 0x01,
+ 0x7a, 0x79, 0xc6, 0xce, 0x63, 0x01, 0x7a, 0xc1, 0x89, 0x01, 0x7a, 0xe8,
+ 0xc8, 0xab, 0xad, 0x01, 0x78, 0xc1, 0xc4, 0x02, 0xaf, 0x01, 0x7a, 0x19,
+ 0x15, 0x42, 0xfe, 0x3c, 0x9b, 0x01, 0x79, 0x91, 0xc2, 0x01, 0xdf, 0x01,
+ 0x7e, 0x71, 0xc4, 0x1e, 0x58, 0x01, 0x7e, 0x98, 0xc3, 0x04, 0xa7, 0x01,
+ 0x7a, 0x69, 0xc2, 0x00, 0x29, 0x01, 0x7e, 0x28, 0x03, 0xc2, 0xfe, 0x46,
+ 0xc3, 0x17, 0xbf, 0x01, 0x7a, 0xd0, 0xc4, 0x71, 0x24, 0x01, 0x78, 0x21,
+ 0xc2, 0x00, 0x65, 0x01, 0x78, 0xc9, 0xc2, 0x01, 0xd0, 0x01, 0x7c, 0x19,
+ 0x87, 0x01, 0x7c, 0x90, 0xc3, 0x01, 0x54, 0x01, 0x78, 0xa1, 0xc4, 0xab,
+ 0xbd, 0x01, 0x79, 0x61, 0x07, 0xc2, 0xfe, 0x52, 0xc4, 0xac, 0x23, 0x01,
+ 0x7b, 0x80, 0xc5, 0xd5, 0xd8, 0x01, 0x79, 0x01, 0xc4, 0x07, 0x30, 0x01,
+ 0x7a, 0x10, 0x11, 0xc2, 0xfe, 0x5e, 0x07, 0x42, 0xfe, 0x6a, 0x07, 0xc2,
+ 0xfe, 0x76, 0x11, 0xc2, 0xfe, 0x80, 0xc3, 0x02, 0x9b, 0x01, 0x7b, 0xa0,
+ 0x9b, 0x01, 0x7a, 0x41, 0xce, 0x6e, 0xf2, 0x01, 0x7d, 0xf9, 0xc2, 0x00,
+ 0x72, 0x01, 0x7e, 0x20, 0xc6, 0xcd, 0x0d, 0x01, 0x7b, 0x39, 0xc2, 0x8c,
+ 0x30, 0x01, 0x7b, 0xb8, 0xc2, 0x01, 0x25, 0x01, 0x78, 0x29, 0x14, 0x42,
+ 0xfe, 0x8d, 0x03, 0xc2, 0xfe, 0x97, 0xc2, 0x15, 0x10, 0x01, 0x7e, 0x38,
+ 0x0e, 0xc2, 0xfe, 0xa1, 0xc2, 0x02, 0x35, 0x01, 0x79, 0xf9, 0xc2, 0x01,
+ 0x29, 0x01, 0x7d, 0xe8, 0xc6, 0x07, 0x72, 0x01, 0x79, 0xb0, 0xc3, 0x00,
+ 0x5b, 0x01, 0x78, 0x79, 0xcc, 0x7f, 0x4d, 0x01, 0x7d, 0x89, 0xc2, 0x02,
+ 0xa7, 0x01, 0x7d, 0xe0, 0xc3, 0x10, 0xd0, 0x01, 0x79, 0x89, 0xc3, 0x0f,
+ 0xed, 0x01, 0x7e, 0xa0, 0xc2, 0x02, 0xe0, 0x01, 0x7a, 0x51, 0xc3, 0x00,
+ 0x3a, 0x01, 0x7b, 0x89, 0xc4, 0xe1, 0x6f, 0x01, 0x7e, 0x68, 0xc2, 0x00,
+ 0x49, 0x01, 0x7b, 0x09, 0xc3, 0x09, 0x3b, 0x01, 0x7c, 0x60, 0xc4, 0x5a,
+ 0xfe, 0x01, 0x7c, 0x31, 0xc3, 0x07, 0x6f, 0x01, 0x7e, 0x90, 0x17, 0xc2,
+ 0xfe, 0xad, 0xc2, 0x00, 0x45, 0x01, 0x7a, 0x49, 0x14, 0x42, 0xfe, 0xb7,
+ 0xc3, 0x0d, 0xe5, 0x01, 0x7b, 0x01, 0xc2, 0x00, 0xfe, 0x01, 0x7c, 0x08,
+ 0xc6, 0xcd, 0xa3, 0x01, 0x7c, 0x11, 0xc4, 0x1c, 0xb6, 0x01, 0x7e, 0x48,
+ 0xc3, 0x02, 0x11, 0x01, 0x78, 0x51, 0xc7, 0x63, 0x9d, 0x01, 0x78, 0xf0,
+ 0x94, 0x01, 0x7b, 0xfb, 0x02, 0xfe, 0xc3, 0x96, 0x01, 0x7d, 0xb8, 0xc3,
+ 0x00, 0x74, 0x01, 0x79, 0x18, 0xc3, 0x00, 0x5f, 0x01, 0x78, 0x69, 0xc4,
+ 0xde, 0xc3, 0x01, 0x79, 0x59, 0xc5, 0xda, 0x83, 0x01, 0x7a, 0x81, 0x99,
+ 0x01, 0x7a, 0xe1, 0xc3, 0x04, 0xa0, 0x01, 0x7c, 0x50, 0xc3, 0x43, 0x08,
+ 0x01, 0x78, 0xd1, 0x03, 0xc2, 0xfe, 0xc9, 0xc5, 0x78, 0xee, 0x01, 0x7c,
+ 0x80, 0xc2, 0x00, 0x5f, 0x01, 0x7b, 0x99, 0xc2, 0x00, 0x61, 0x01, 0x7c,
+ 0xf1, 0xc6, 0xc4, 0x8f, 0x01, 0x7e, 0x00, 0xc2, 0x11, 0xee, 0x01, 0x79,
+ 0x41, 0xc4, 0x00, 0x40, 0x01, 0x7c, 0x68, 0xc5, 0xc2, 0xd6, 0x01, 0x78,
+ 0xd9, 0xc6, 0xcc, 0x77, 0x01, 0x7a, 0xf0, 0xc2, 0x00, 0xb6, 0x01, 0x78,
+ 0x99, 0xc3, 0x08, 0x48, 0x01, 0x7d, 0x70, 0xc3, 0x12, 0xb8, 0x01, 0x79,
+ 0x29, 0xc2, 0x01, 0xc5, 0x01, 0x79, 0x78, 0xc4, 0xe1, 0x97, 0x01, 0x7a,
+ 0x71, 0xc2, 0x00, 0x15, 0x01, 0x7c, 0x88, 0xc3, 0x02, 0x11, 0x01, 0x7a,
+ 0xa1, 0xc2, 0x8c, 0x30, 0x01, 0x7d, 0x0a, 0x02, 0xfe, 0xd1, 0xc3, 0x01,
+ 0xfe, 0x01, 0x7b, 0x49, 0xc3, 0x04, 0xa6, 0x01, 0x7e, 0x30, 0x87, 0x01,
+ 0x7d, 0x19, 0x86, 0x01, 0x7d, 0xa8, 0xcc, 0x35, 0x8a, 0x01, 0x78, 0xa9,
+ 0xc3, 0x00, 0x5f, 0x01, 0x79, 0x71, 0xc2, 0x00, 0x89, 0x01, 0x7b, 0xb0,
+ 0x92, 0x01, 0x7a, 0x09, 0xc2, 0x00, 0x74, 0x01, 0x7d, 0x61, 0x96, 0x01,
+ 0x7e, 0x78, 0xc2, 0x00, 0x27, 0x01, 0x7b, 0x71, 0xc3, 0x0d, 0x14, 0x01,
+ 0x7c, 0x20, 0xc7, 0xc5, 0x4b, 0x01, 0x79, 0x11, 0xc2, 0x18, 0xb3, 0x01,
+ 0x7d, 0x30, 0xc2, 0x00, 0x74, 0x01, 0x7b, 0x91, 0xc2, 0x00, 0xcc, 0x01,
+ 0x7c, 0x58, 0x89, 0x01, 0x79, 0x21, 0xc4, 0x02, 0x10, 0x01, 0x7c, 0xf9,
+ 0xc2, 0x01, 0x4c, 0x01, 0x7e, 0x18, 0x99, 0x01, 0x79, 0xc1, 0xcb, 0x93,
+ 0x67, 0x01, 0x7b, 0x31, 0xc2, 0x00, 0xfe, 0x01, 0x7c, 0x41, 0xc2, 0x00,
+ 0x6d, 0x01, 0x7c, 0xe9, 0xc2, 0x02, 0x6f, 0x01, 0x7d, 0xd8, 0xc5, 0xdd,
+ 0xbc, 0x01, 0x79, 0xd1, 0xc4, 0x1e, 0xce, 0x01, 0x7a, 0x01, 0xc3, 0x5d,
+ 0xd1, 0x01, 0x7c, 0x00, 0xc4, 0x8e, 0x88, 0x01, 0x7b, 0xa9, 0xc4, 0xe2,
+ 0xa3, 0x01, 0x7c, 0xc0, 0xc3, 0x29, 0x82, 0x01, 0x7c, 0x71, 0xc2, 0x0f,
+ 0xe1, 0x01, 0x7d, 0x69, 0xc3, 0x00, 0x3a, 0x01, 0x7e, 0x50, 0x96, 0x01,
+ 0x7a, 0x31, 0xc2, 0x00, 0x40, 0x01, 0x7e, 0x80, 0xc2, 0x01, 0x19, 0x01,
+ 0x7a, 0xa9, 0xc3, 0x19, 0x78, 0x01, 0x7b, 0x29, 0xc3, 0x00, 0x2e, 0x01,
+ 0x7d, 0xf1, 0xc2, 0x01, 0xe2, 0x01, 0x7e, 0x10, 0xc4, 0x14, 0xdd, 0x01,
+ 0x7a, 0xb9, 0xc2, 0x00, 0x75, 0x01, 0x7a, 0xd9, 0xc2, 0x00, 0x89, 0x01,
+ 0x7d, 0x78, 0x9b, 0x01, 0x7d, 0xb1, 0xc3, 0x31, 0xf0, 0x01, 0x7e, 0xa8,
+ 0xc6, 0xd2, 0x59, 0x01, 0x7d, 0xc9, 0xc2, 0x13, 0x38, 0x01, 0x7e, 0x60,
+ 0x12, 0xc2, 0xfe, 0xd7, 0x04, 0xc2, 0xfe, 0xe3, 0x45, 0xda, 0x97, 0x42,
+ 0xfe, 0xef, 0xc3, 0x38, 0x5b, 0x00, 0xcf, 0xd1, 0xc4, 0xe0, 0xaf, 0x00,
+ 0xcf, 0x50, 0x02, 0xc2, 0xfe, 0xfb, 0x00, 0x42, 0xff, 0x0b, 0xc3, 0x38,
+ 0x5b, 0x00, 0xcf, 0x91, 0xc4, 0xe0, 0xaf, 0x00, 0xcf, 0x10, 0xc3, 0x38,
+ 0x5b, 0x00, 0xcf, 0xa1, 0xc4, 0xe0, 0xaf, 0x00, 0xcf, 0x20, 0xc3, 0x38,
+ 0x5b, 0x00, 0xcf, 0x99, 0xc4, 0xe0, 0xaf, 0x00, 0xcf, 0x18, 0xc3, 0x13,
+ 0x3a, 0x00, 0xbf, 0xab, 0x02, 0xff, 0x17, 0xc2, 0x25, 0x9f, 0x00, 0xbf,
+ 0x90, 0xc3, 0xdf, 0x37, 0x00, 0xbf, 0xa1, 0xc2, 0x06, 0xdb, 0x00, 0xbf,
+ 0x98, 0xc8, 0xbe, 0x82, 0x00, 0xbe, 0xa9, 0xc8, 0xb0, 0xc6, 0x00, 0xbe,
+ 0x99, 0xc4, 0xe4, 0x77, 0x00, 0xbe, 0x58, 0x98, 0x00, 0xbd, 0x50, 0x83,
+ 0x08, 0x51, 0xa1, 0xc2, 0x00, 0xd0, 0x08, 0x51, 0x98, 0xce, 0x2a, 0xfe,
+ 0x0f, 0xd0, 0xb1, 0xdb, 0x18, 0x03, 0x0f, 0xd2, 0x00, 0x49, 0x2a, 0xf5,
+ 0x42, 0xff, 0x1b, 0x49, 0x2a, 0xf5, 0x42, 0xff, 0x27, 0xce, 0x2a, 0xfe,
+ 0x0f, 0xd0, 0xc1, 0xdb, 0x18, 0x03, 0x0f, 0xd2, 0x10, 0xce, 0x2a, 0xfe,
+ 0x0f, 0xd0, 0xb9, 0xdb, 0x18, 0x03, 0x0f, 0xd2, 0x08, 0xce, 0x2a, 0xfe,
+ 0x0f, 0xd0, 0xd1, 0xdb, 0x18, 0x03, 0x0f, 0xd2, 0x20, 0xc3, 0x00, 0x74,
+ 0x0f, 0xd1, 0x51, 0xc5, 0x56, 0xa5, 0x0f, 0xd1, 0x70, 0xcb, 0x93, 0xf6,
+ 0x08, 0xa3, 0x09, 0xcb, 0x8f, 0xe1, 0x08, 0xa3, 0x01, 0xce, 0x6b, 0xfe,
+ 0x08, 0xa2, 0x41, 0x03, 0xc2, 0xff, 0x3d, 0xc5, 0x33, 0x5d, 0x08, 0xa2,
+ 0x31, 0x42, 0x07, 0xb2, 0xc2, 0xff, 0x49, 0xcb, 0x1e, 0x89, 0x08, 0xa2,
+ 0x18, 0x8e, 0x08, 0xa0, 0x43, 0x02, 0xff, 0x55, 0x94, 0x08, 0xa0, 0x32,
+ 0x02, 0xff, 0x59, 0xc2, 0x00, 0xd0, 0x08, 0xa0, 0xc9, 0x83, 0x08, 0xa0,
+ 0xc0, 0xc2, 0x00, 0xd0, 0x08, 0xa0, 0x99, 0x83, 0x08, 0xa0, 0x90, 0xc2,
+ 0x00, 0xd0, 0x08, 0xa0, 0xe9, 0x83, 0x08, 0xa0, 0xe0, 0xc2, 0x00, 0xd0,
+ 0x08, 0xa0, 0xd9, 0x83, 0x08, 0xa0, 0xd0, 0xc4, 0x18, 0x10, 0x08, 0xa2,
+ 0xb9, 0xc2, 0x22, 0xcc, 0x08, 0xa2, 0xb0, 0xc3, 0x0d, 0x14, 0x08, 0xa2,
+ 0xa9, 0xc3, 0x09, 0x9e, 0x08, 0xa2, 0xa0, 0xc4, 0x02, 0xde, 0x08, 0xa2,
+ 0x99, 0xc2, 0x02, 0xa0, 0x08, 0xa2, 0x90, 0x8e, 0x08, 0xa1, 0xe8, 0x94,
+ 0x08, 0xa1, 0xd8, 0x9f, 0x00, 0xce, 0x49, 0x9e, 0x00, 0xce, 0x40, 0xc4,
+ 0x18, 0x10, 0x00, 0xce, 0xb9, 0xc2, 0x22, 0xcc, 0x00, 0xce, 0xb0, 0xc3,
+ 0x0d, 0x14, 0x00, 0xce, 0xa9, 0xc3, 0x09, 0x9e, 0x00, 0xce, 0xa0, 0xc4,
+ 0x02, 0xde, 0x00, 0xce, 0x99, 0xc2, 0x02, 0xa0, 0x00, 0xce, 0x90, 0x84,
+ 0x00, 0xce, 0x39, 0x86, 0x00, 0xce, 0x31, 0x8d, 0x00, 0xce, 0x29, 0x8f,
+ 0x00, 0xce, 0x21, 0x90, 0x00, 0xce, 0x1b, 0x02, 0xff, 0x5d, 0x98, 0x00,
+ 0xce, 0x08, 0x15, 0xc2, 0xff, 0x61, 0x1a, 0xc2, 0xff, 0x6b, 0x0d, 0xc2,
+ 0xff, 0x75, 0xc2, 0x01, 0x5d, 0x00, 0xcd, 0x29, 0xc2, 0x0e, 0x9a, 0x00,
+ 0xcd, 0x21, 0xc2, 0x00, 0xd0, 0x00, 0xcd, 0x19, 0xc2, 0x00, 0xdb, 0x00,
+ 0xcc, 0xf9, 0xc2, 0x02, 0x41, 0x00, 0xcc, 0xf1, 0xc2, 0x00, 0x87, 0x00,
+ 0xcc, 0xe9, 0xc2, 0x01, 0xc3, 0x00, 0xcc, 0xc9, 0x12, 0xc2, 0xff, 0x7f,
+ 0x10, 0xc2, 0xff, 0x89, 0x16, 0xc2, 0xff, 0x93, 0xc2, 0x19, 0x2c, 0x00,
+ 0xcc, 0x69, 0xc2, 0x0f, 0x9a, 0x00, 0xcc, 0x08, 0x15, 0xc2, 0xff, 0xa3,
+ 0x1a, 0xc2, 0xff, 0xad, 0x0d, 0xc2, 0xff, 0xb7, 0xc2, 0x01, 0x5d, 0x00,
+ 0xcd, 0x11, 0xc2, 0x0e, 0x9a, 0x00, 0xcd, 0x09, 0xc2, 0x00, 0xd0, 0x00,
+ 0xcd, 0x01, 0xc2, 0x00, 0xdb, 0x00, 0xcc, 0xe1, 0xc2, 0x02, 0x41, 0x00,
+ 0xcc, 0xd9, 0xc2, 0x00, 0x87, 0x00, 0xcc, 0xd1, 0xc2, 0x01, 0xc3, 0x00,
+ 0xcc, 0xb1, 0x12, 0xc2, 0xff, 0xc1, 0x10, 0xc2, 0xff, 0xcb, 0x16, 0xc2,
+ 0xff, 0xd5, 0xc2, 0x19, 0x2c, 0x00, 0xcc, 0x51, 0xc2, 0x0f, 0x9a, 0x00,
+ 0xcc, 0x00, 0x9b, 0x00, 0xce, 0x01, 0x8b, 0x00, 0xcd, 0x90, 0x87, 0x00,
+ 0xcd, 0xcb, 0x02, 0xff, 0xe5, 0x9b, 0x00, 0xcd, 0xe1, 0x97, 0x00, 0xcd,
+ 0xa0, 0x83, 0x00, 0xcd, 0xc3, 0x02, 0xff, 0xe9, 0x9b, 0x00, 0xcd, 0xe8,
+ 0x83, 0x00, 0xcd, 0x8b, 0x02, 0xff, 0xed, 0x9b, 0x00, 0xcd, 0xd1, 0x87,
+ 0x00, 0xcd, 0xb0, 0x42, 0x00, 0x28, 0xc2, 0xff, 0xf1, 0xc7, 0x52, 0xcc,
+ 0x01, 0x27, 0x68, 0xc7, 0x1f, 0x6e, 0x01, 0x27, 0x91, 0xc5, 0x66, 0xb1,
+ 0x01, 0x27, 0x58, 0xc8, 0x48, 0x23, 0x01, 0x27, 0x89, 0xc6, 0x44, 0x9c,
+ 0x01, 0x27, 0x80, 0xc6, 0x14, 0x07, 0x01, 0x27, 0x79, 0xc7, 0x34, 0x37,
+ 0x01, 0x27, 0x70, 0x94, 0x08, 0xcd, 0x38, 0xc2, 0x00, 0xd0, 0x08, 0xcd,
+ 0xd9, 0x83, 0x08, 0xcd, 0xd0, 0xc2, 0x00, 0xd0, 0x08, 0xcd, 0xc9, 0x83,
+ 0x08, 0xcd, 0xc0, 0xc4, 0x18, 0x12, 0x08, 0x45, 0x71, 0x91, 0x08, 0x45,
+ 0x40, 0xc3, 0x77, 0x79, 0x08, 0x44, 0xc9, 0xc4, 0xdc, 0x2d, 0x08, 0x44,
+ 0xb0, 0xc3, 0xe5, 0x8a, 0x0f, 0xb3, 0x11, 0xc9, 0xb4, 0x91, 0x0f, 0xb2,
+ 0xd1, 0xc4, 0x47, 0x23, 0x0f, 0xb2, 0x90, 0xc4, 0x01, 0xa3, 0x01, 0x0c,
+ 0xbb, 0x02, 0xff, 0xfd, 0xd3, 0x3c, 0xa1, 0x01, 0x49, 0x10, 0xc7, 0x10,
+ 0x9c, 0x01, 0x5b, 0xb8, 0xc4, 0x01, 0xa3, 0x01, 0x0c, 0xb3, 0x03, 0x00,
+ 0x01, 0xd3, 0x3c, 0x8d, 0x01, 0x49, 0x08, 0xc3, 0xe5, 0x8a, 0x0f, 0xb3,
+ 0x01, 0xc9, 0xb4, 0x91, 0x0f, 0xb2, 0xc1, 0xc4, 0x47, 0x23, 0x0f, 0xb2,
+ 0x80, 0xc7, 0x10, 0x9c, 0x01, 0x5b, 0xb0, 0x44, 0x05, 0x14, 0xc3, 0x00,
+ 0x05, 0x46, 0x02, 0xdd, 0x43, 0x00, 0x1d, 0xc9, 0xac, 0x7b, 0x05, 0x41,
+ 0xb1, 0xca, 0xa1, 0xd4, 0x05, 0x41, 0xc8, 0x86, 0x0f, 0xae, 0x39, 0xc2,
+ 0x09, 0x3b, 0x0f, 0xae, 0x30, 0xcd, 0x7c, 0x81, 0x0f, 0x98, 0x79, 0xc7,
+ 0xc3, 0x29, 0x0f, 0x98, 0x70, 0x83, 0x09, 0x87, 0xd0, 0x83, 0x09, 0x87,
+ 0x98, 0x83, 0x09, 0x87, 0x50, 0x83, 0x09, 0x87, 0x30, 0x83, 0x09, 0x87,
+ 0x20, 0x83, 0x09, 0x86, 0xe0, 0x83, 0x09, 0x86, 0xd0, 0x84, 0x09, 0x94,
+ 0xc9, 0x83, 0x09, 0x94, 0xc0, 0x86, 0x09, 0x94, 0x49, 0x85, 0x09, 0x94,
+ 0x41, 0x84, 0x09, 0x94, 0x39, 0x83, 0x09, 0x94, 0x30, 0x83, 0x09, 0x94,
+ 0x10, 0x83, 0x09, 0x93, 0xf0, 0x83, 0x09, 0x93, 0xe0, 0x83, 0x09, 0x93,
+ 0xb8, 0x83, 0x09, 0x93, 0xa8, 0x83, 0x09, 0x93, 0x80, 0x83, 0x09, 0x93,
+ 0x70, 0x85, 0x09, 0x93, 0x21, 0x84, 0x09, 0x93, 0x19, 0x83, 0x09, 0x93,
+ 0x10, 0x88, 0x09, 0x92, 0xe9, 0x87, 0x09, 0x92, 0xe1, 0x86, 0x09, 0x92,
+ 0xd9, 0x85, 0x09, 0x92, 0xd1, 0x84, 0x09, 0x92, 0xc9, 0x83, 0x09, 0x92,
+ 0xc0, 0x83, 0x09, 0x92, 0xb0, 0x83, 0x09, 0x92, 0x88, 0x83, 0x09, 0x8c,
+ 0xb0, 0x83, 0x09, 0x8c, 0xa0, 0x83, 0x09, 0x8c, 0x90, 0x83, 0x09, 0x8c,
+ 0x68, 0x83, 0x09, 0x9d, 0x50, 0x83, 0x09, 0x9d, 0x28, 0x83, 0x09, 0x9d,
+ 0x10, 0x83, 0x09, 0x9d, 0x00, 0x83, 0x09, 0x9c, 0xf0, 0x83, 0x09, 0x9c,
+ 0xe0, 0x83, 0x09, 0x9c, 0xb0, 0x8e, 0x09, 0x9c, 0x91, 0x8d, 0x09, 0x9c,
+ 0x89, 0x8c, 0x09, 0x9c, 0x81, 0x8b, 0x09, 0x9c, 0x79, 0x8a, 0x09, 0x9c,
+ 0x71, 0x89, 0x09, 0x9c, 0x69, 0x88, 0x09, 0x9c, 0x61, 0x87, 0x09, 0x9c,
+ 0x59, 0x86, 0x09, 0x9c, 0x51, 0x85, 0x09, 0x9c, 0x49, 0x84, 0x09, 0x9c,
+ 0x41, 0x83, 0x09, 0x9c, 0x38, 0x84, 0x09, 0x9b, 0xf1, 0x83, 0x09, 0x9b,
+ 0xe8, 0x85, 0x09, 0x9b, 0xd9, 0x84, 0x09, 0x9b, 0xd1, 0x83, 0x09, 0x9b,
+ 0xc8, 0x84, 0x09, 0x9b, 0xa1, 0x83, 0x09, 0x9b, 0x98, 0x83, 0x09, 0x9b,
+ 0x68, 0x8b, 0x09, 0x9b, 0x59, 0x8a, 0x09, 0x9b, 0x51, 0x89, 0x09, 0x9b,
+ 0x49, 0x88, 0x09, 0x9b, 0x41, 0x87, 0x09, 0x9b, 0x39, 0x86, 0x09, 0x9b,
+ 0x31, 0x85, 0x09, 0x9b, 0x29, 0x84, 0x09, 0x9b, 0x21, 0x83, 0x09, 0x9b,
+ 0x18, 0x84, 0x09, 0xa0, 0xa9, 0x83, 0x09, 0xa0, 0xa0, 0x83, 0x09, 0x81,
+ 0xb0, 0x83, 0x09, 0x81, 0x98, 0x83, 0x09, 0x81, 0x88, 0x83, 0x09, 0x81,
+ 0x70, 0x83, 0x09, 0x81, 0x28, 0x83, 0x09, 0x80, 0xa8, 0x83, 0x09, 0x80,
+ 0x88, 0x84, 0x09, 0x80, 0x41, 0x83, 0x09, 0x80, 0x38, 0x83, 0x09, 0x80,
+ 0x28, 0x83, 0x09, 0x92, 0x78, 0x83, 0x09, 0x92, 0x50, 0x83, 0x09, 0x92,
+ 0x10, 0x83, 0x09, 0x92, 0x00, 0x83, 0x09, 0x91, 0x90, 0x83, 0x09, 0x91,
+ 0x28, 0x83, 0x09, 0x90, 0xd0, 0x83, 0x09, 0x90, 0xb8, 0x83, 0x09, 0x90,
+ 0xa8, 0x83, 0x09, 0x90, 0x98, 0x83, 0x09, 0x90, 0x50, 0x84, 0x09, 0x90,
+ 0x11, 0x83, 0x09, 0x90, 0x08, 0x42, 0x09, 0x0e, 0xc3, 0x00, 0x29, 0x42,
+ 0xbc, 0x43, 0xc3, 0x00, 0x33, 0x42, 0xc3, 0x98, 0xc3, 0x00, 0x3d, 0x42,
+ 0x04, 0x22, 0xc3, 0x00, 0x48, 0x42, 0xc5, 0xb6, 0xc3, 0x00, 0x53, 0x42,
+ 0xe6, 0x95, 0xc3, 0x00, 0x5d, 0x42, 0x30, 0xd1, 0xc3, 0x00, 0x68, 0xc4,
+ 0xdf, 0xab, 0x0f, 0x3f, 0x40, 0x83, 0x00, 0x95, 0x18, 0x87, 0x00, 0x95,
+ 0x20, 0x83, 0x01, 0x6c, 0x50, 0x83, 0x00, 0x98, 0x98, 0x87, 0x00, 0x98,
+ 0xa0, 0x83, 0x00, 0x98, 0xd8, 0x87, 0x00, 0x98, 0xe0, 0x83, 0x01, 0x6c,
+ 0x9b, 0x03, 0x00, 0x72, 0x8b, 0x01, 0x6c, 0xa1, 0x87, 0x01, 0x6c, 0xb2,
+ 0x03, 0x00, 0x76, 0x83, 0x01, 0x6e, 0xd8, 0x87, 0x01, 0x6e, 0xe0, 0x87,
+ 0x0f, 0x3f, 0x5b, 0x03, 0x00, 0x7a, 0x8b, 0x0f, 0x3f, 0x49, 0x83, 0x00,
+ 0x90, 0xb8, 0x91, 0x0f, 0x3f, 0x31, 0x87, 0x0f, 0x3f, 0x2b, 0x03, 0x00,
+ 0x7e, 0x83, 0x0f, 0x3f, 0x03, 0x03, 0x00, 0x82, 0x8b, 0x0f, 0x3f, 0x11,
+ 0x97, 0x0f, 0x3f, 0x08, 0x83, 0x00, 0x90, 0x98, 0x87, 0x00, 0x90, 0xa0,
+ 0x87, 0x05, 0x59, 0x60, 0x83, 0x05, 0x59, 0x58, 0x87, 0x00, 0x9c, 0x30,
+ 0x0a, 0xc3, 0x00, 0x86, 0x83, 0x01, 0x6d, 0xc3, 0x03, 0x00, 0xa0, 0x97,
+ 0x01, 0x6d, 0xc9, 0x8b, 0x01, 0x6d, 0xd1, 0x87, 0x01, 0x6d, 0xeb, 0x03,
+ 0x00, 0xa4, 0x91, 0x01, 0x6d, 0xf0, 0x83, 0x01, 0x6d, 0x58, 0x87, 0x01,
+ 0x6d, 0x60, 0x83, 0x00, 0x99, 0x58, 0x87, 0x00, 0x99, 0x60, 0x83, 0x01,
+ 0x6c, 0x80, 0x87, 0x05, 0x58, 0xa0, 0x91, 0x05, 0x58, 0x71, 0x87, 0x05,
+ 0x58, 0x6b, 0x03, 0x00, 0xa8, 0xc2, 0x04, 0xc6, 0x05, 0x58, 0x59, 0x8b,
+ 0x05, 0x58, 0x51, 0x97, 0x05, 0x58, 0x48, 0x83, 0x00, 0x97, 0xd8, 0x87,
+ 0x00, 0x97, 0xe0, 0x83, 0x01, 0x6c, 0x68, 0x87, 0x05, 0x58, 0x20, 0x83,
+ 0x00, 0x99, 0x18, 0x87, 0x00, 0x99, 0x20, 0x83, 0x01, 0x6c, 0x78, 0x83,
+ 0x00, 0x99, 0xd8, 0x87, 0x00, 0x99, 0xe0, 0x83, 0x00, 0x9a, 0x18, 0x87,
+ 0x00, 0x9a, 0x20, 0x83, 0x00, 0x9a, 0x38, 0x87, 0x00, 0x9c, 0x10, 0x83,
+ 0x00, 0x91, 0x18, 0x87, 0x00, 0x91, 0x20, 0xc3, 0x30, 0xd1, 0x00, 0x9c,
+ 0x01, 0xc3, 0xc5, 0xb6, 0x00, 0x9c, 0x21, 0xc3, 0xe0, 0x77, 0x00, 0x9c,
+ 0x41, 0xc3, 0x09, 0x0e, 0x00, 0x9c, 0x60, 0x83, 0x00, 0x91, 0xd8, 0x87,
+ 0x00, 0x91, 0xe0, 0x83, 0x01, 0x6c, 0x20, 0x83, 0x01, 0x6d, 0x18, 0x87,
+ 0x01, 0x6d, 0x20, 0x83, 0x00, 0x92, 0x58, 0x87, 0x00, 0x92, 0x60, 0x83,
+ 0x00, 0x92, 0x98, 0x87, 0x00, 0x92, 0xa0, 0x83, 0x00, 0x92, 0xc3, 0x03,
+ 0x00, 0xac, 0x8b, 0x00, 0x92, 0xd1, 0x87, 0x00, 0x92, 0xea, 0x03, 0x00,
+ 0xb0, 0x83, 0x01, 0x6e, 0x18, 0x87, 0x01, 0x6e, 0x20, 0x83, 0x00, 0x94,
+ 0x58, 0x87, 0x00, 0x94, 0x60, 0x83, 0x01, 0x6e, 0x98, 0x87, 0x01, 0x6e,
+ 0xa0, 0x83, 0x00, 0x94, 0xd8, 0x87, 0x00, 0x94, 0xe0, 0x83, 0x01, 0x6c,
+ 0x48, 0x83, 0x00, 0x95, 0x98, 0x87, 0x00, 0x95, 0xa0, 0x83, 0x00, 0x95,
+ 0xd8, 0x87, 0x00, 0x95, 0xe0, 0x83, 0x00, 0x96, 0x03, 0x03, 0x00, 0xb4,
+ 0x8b, 0x00, 0x96, 0x11, 0x87, 0x00, 0x96, 0x2a, 0x03, 0x00, 0xb8, 0x83,
+ 0x01, 0x6e, 0x58, 0x87, 0x01, 0x6e, 0x60, 0x48, 0x17, 0xb5, 0xc3, 0x00,
+ 0xbc, 0x83, 0x00, 0x99, 0x98, 0x87, 0x00, 0x99, 0xa0, 0x83, 0x01, 0x6c,
+ 0x88, 0x87, 0x00, 0x9c, 0x70, 0x83, 0x00, 0x97, 0x18, 0x87, 0x00, 0x97,
+ 0x20, 0x83, 0x01, 0x6d, 0x98, 0x87, 0x01, 0x6d, 0xa0, 0x87, 0x00, 0x9c,
+ 0x50, 0xe0, 0x0a, 0x07, 0x01, 0x17, 0x98, 0xd3, 0x36, 0x0a, 0x01, 0x4f,
+ 0x1b, 0x03, 0x00, 0xd6, 0x45, 0x00, 0xd5, 0x43, 0x00, 0xdc, 0x16, 0xc3,
+ 0x00, 0xf4, 0xc9, 0x0e, 0x6e, 0x01, 0x53, 0x31, 0xcb, 0x98, 0x2c, 0x01,
+ 0x55, 0x71, 0xce, 0x6c, 0xb4, 0x01, 0x5f, 0xc8, 0x94, 0x00, 0x57, 0x00,
+ 0x8e, 0x00, 0x57, 0x08, 0x94, 0x00, 0x56, 0x20, 0x8e, 0x00, 0x57, 0x18,
+ 0xa2, 0x0e, 0x91, 0x03, 0x03, 0x00, 0xfa, 0xa1, 0x0e, 0x90, 0xfb, 0x03,
+ 0x01, 0x0e, 0x20, 0xc3, 0x01, 0x2a, 0x9f, 0x0e, 0x90, 0xf3, 0x03, 0x01,
+ 0x42, 0x9e, 0x0e, 0x90, 0xeb, 0x03, 0x01, 0x5a, 0xa5, 0x0e, 0x91, 0x11,
+ 0xa4, 0x0e, 0x91, 0x08, 0xa2, 0x0e, 0x90, 0x23, 0x03, 0x01, 0x76, 0x9f,
+ 0x0e, 0x90, 0x0b, 0x03, 0x01, 0x8a, 0x9e, 0x0e, 0x90, 0x03, 0x03, 0x01,
+ 0x9e, 0xa6, 0x0e, 0x90, 0x41, 0xa5, 0x0e, 0x90, 0x39, 0xa4, 0x0e, 0x90,
+ 0x31, 0xa3, 0x0e, 0x90, 0x29, 0xa1, 0x0e, 0x90, 0x19, 0xa0, 0x0e, 0x90,
+ 0x10, 0x45, 0x02, 0x9a, 0x43, 0x01, 0xc6, 0x44, 0x02, 0x9b, 0xc3, 0x01,
+ 0xd8, 0xc5, 0x63, 0xdc, 0x00, 0x1c, 0x28, 0xc9, 0xb4, 0x13, 0x08, 0x0b,
+ 0xab, 0x03, 0x01, 0xea, 0xcc, 0x88, 0xd1, 0x08, 0x0c, 0x58, 0x46, 0x02,
+ 0x0f, 0xc3, 0x01, 0xf0, 0xd2, 0x1a, 0x6c, 0x00, 0x1f, 0xc8, 0xd3, 0x1a,
+ 0x6b, 0x00, 0x1f, 0xe9, 0xda, 0x1a, 0x64, 0x00, 0x1f, 0xf8, 0x47, 0x02,
+ 0x0e, 0xc3, 0x02, 0x6d, 0x49, 0x11, 0x74, 0xc3, 0x02, 0xe6, 0xda, 0x1a,
+ 0x64, 0x00, 0x1b, 0xe0, 0xc3, 0x11, 0xee, 0x00, 0xeb, 0x51, 0xc3, 0x1c,
+ 0x8d, 0x00, 0xeb, 0x49, 0xc3, 0x79, 0xe7, 0x00, 0xeb, 0x41, 0xc5, 0x51,
+ 0x51, 0x00, 0xeb, 0x39, 0xc4, 0x93, 0xa9, 0x00, 0xeb, 0x30, 0x45, 0x02,
+ 0x9a, 0x43, 0x02, 0xf2, 0xc8, 0x9c, 0x0e, 0x00, 0x1e, 0xb9, 0xca, 0x8b,
+ 0x67, 0x00, 0x1f, 0x80, 0x15, 0xc3, 0x03, 0x04, 0xcd, 0x78, 0xbf, 0x00,
+ 0x1e, 0xc1, 0xc3, 0xe5, 0xb4, 0x00, 0x1f, 0x99, 0xc7, 0x51, 0x4f, 0x00,
+ 0x1e, 0xe1, 0xc5, 0x78, 0xc7, 0x00, 0x1e, 0xf0, 0xcc, 0x1a, 0x72, 0x00,
+ 0x1f, 0x91, 0xce, 0x10, 0x3e, 0x00, 0x1f, 0xa8, 0xca, 0x89, 0xff, 0x00,
+ 0x1e, 0x89, 0x44, 0x02, 0xdf, 0x43, 0x03, 0x16, 0xcb, 0x8d, 0x4d, 0x08,
+ 0x0b, 0xb9, 0xca, 0x71, 0x88, 0x08, 0x0b, 0xe8, 0x46, 0xcd, 0x55, 0xc3,
+ 0x03, 0x22, 0x43, 0x14, 0xde, 0xc3, 0x03, 0x34, 0x16, 0xc3, 0x03, 0x40,
+ 0x4b, 0x8e, 0x4a, 0xc3, 0x03, 0x4c, 0x05, 0xc3, 0x03, 0x5b, 0xcd, 0x75,
+ 0xda, 0x08, 0x0b, 0x19, 0xd1, 0x4f, 0x8b, 0x08, 0x0b, 0x99, 0xd3, 0x45,
+ 0x60, 0x08, 0x0b, 0xa1, 0xd3, 0x46, 0x44, 0x08, 0x0b, 0x80, 0xc9, 0xac,
+ 0x33, 0x08, 0x0c, 0x31, 0xc9, 0xab, 0xfd, 0x08, 0x0c, 0x38, 0xc6, 0x00,
+ 0x91, 0x00, 0x1f, 0x89, 0xd2, 0x47, 0x5d, 0x00, 0x1f, 0xe0, 0xca, 0x37,
+ 0x4e, 0x01, 0x13, 0x99, 0xc5, 0x07, 0x62, 0x01, 0x13, 0x28, 0x4a, 0x33,
+ 0xad, 0x43, 0x03, 0x67, 0xe0, 0x02, 0x67, 0x01, 0x54, 0x60, 0x47, 0xc7,
+ 0x35, 0xc3, 0x03, 0x76, 0x50, 0x40, 0x1b, 0x43, 0x03, 0x82, 0xe0, 0x04,
+ 0x67, 0x01, 0x54, 0x90, 0x8e, 0x08, 0x9b, 0x18, 0x94, 0x08, 0x9a, 0x20,
+ 0x8e, 0x08, 0x98, 0x63, 0x03, 0x03, 0x88, 0x94, 0x08, 0x98, 0x5a, 0x03,
+ 0x03, 0x8c, 0xcf, 0x14, 0x31, 0x08, 0x9a, 0xf9, 0xc8, 0x14, 0x38, 0x08,
+ 0x9a, 0xf0, 0xc2, 0x00, 0xd0, 0x08, 0x99, 0x11, 0x83, 0x08, 0x99, 0x08,
+ 0xc2, 0x00, 0xd0, 0x08, 0x99, 0x01, 0x83, 0x08, 0x98, 0xf8, 0xcb, 0x1d,
+ 0x4b, 0x0f, 0xb0, 0x09, 0xc8, 0xb6, 0xc2, 0x0f, 0xc9, 0x48, 0x94, 0x00,
+ 0xe5, 0xa3, 0x03, 0x03, 0x90, 0x87, 0x00, 0xe5, 0x80, 0x94, 0x00, 0xe5,
+ 0x11, 0x90, 0x00, 0xe4, 0xb8, 0xc2, 0x00, 0xb1, 0x00, 0xe5, 0x69, 0xc2,
+ 0x00, 0x74, 0x00, 0xe5, 0x48, 0xc2, 0x00, 0x74, 0x00, 0x85, 0xc9, 0xc2,
+ 0x00, 0xb1, 0x00, 0x85, 0xe8, 0x87, 0x00, 0x86, 0x01, 0x94, 0x00, 0x86,
+ 0x20, 0x90, 0x00, 0x86, 0xb9, 0x94, 0x00, 0x87, 0x10, 0xc2, 0x00, 0x74,
+ 0x00, 0x87, 0x49, 0xc2, 0x00, 0xb1, 0x00, 0x87, 0x68, 0x87, 0x00, 0x87,
+ 0x81, 0x94, 0x00, 0x87, 0xa2, 0x03, 0x03, 0x96, 0xc2, 0x00, 0x74, 0x01,
+ 0x68, 0xc9, 0xc2, 0x00, 0xb1, 0x01, 0x68, 0xe8, 0x87, 0x01, 0x69, 0x01,
+ 0x94, 0x01, 0x69, 0x20, 0xc3, 0x00, 0x15, 0x01, 0x60, 0x29, 0x14, 0x43,
+ 0x03, 0x9c, 0x87, 0x01, 0x60, 0x49, 0xc4, 0x7a, 0xc3, 0x01, 0x61, 0x58,
+ 0xc9, 0xae, 0x6a, 0x01, 0x61, 0x39, 0xc7, 0xc8, 0x00, 0x01, 0x61, 0x48,
+ 0xc2, 0x01, 0x6f, 0x01, 0x60, 0xdb, 0x03, 0x03, 0xa4, 0x83, 0x01, 0x60,
+ 0xf0, 0xca, 0xa5, 0x12, 0x01, 0x61, 0x28, 0xc3, 0x00, 0x15, 0x01, 0x61,
+ 0xa9, 0x14, 0x43, 0x03, 0xaa, 0x87, 0x01, 0x61, 0xc9, 0xc4, 0x7a, 0xc3,
+ 0x01, 0x62, 0xd8, 0xc9, 0xae, 0x6a, 0x01, 0x62, 0xb9, 0xc7, 0xc8, 0x00,
+ 0x01, 0x62, 0xc8, 0xc2, 0x01, 0x6f, 0x01, 0x62, 0x5b, 0x03, 0x03, 0xb2,
+ 0x83, 0x01, 0x62, 0x70, 0xca, 0xa5, 0x12, 0x01, 0x62, 0xa8, 0x94, 0x00,
+ 0x58, 0x5b, 0x03, 0x03, 0xb8, 0x8e, 0x00, 0x58, 0x62, 0x03, 0x03, 0xbc,
+ 0x83, 0x00, 0x58, 0xf9, 0xc2, 0x00, 0xd0, 0x00, 0x59, 0x00, 0x83, 0x00,
+ 0x59, 0x09, 0xc2, 0x00, 0xd0, 0x00, 0x59, 0x10, 0x94, 0x00, 0x5a, 0x20,
+ 0x8e, 0x00, 0x5b, 0x18, 0x00, 0x43, 0x03, 0xc0, 0xc9, 0x57, 0x20, 0x0f,
+ 0x69, 0x38, 0x00, 0x43, 0x03, 0xcc, 0xc9, 0x57, 0x20, 0x0f, 0x69, 0x30,
+ 0x00, 0x43, 0x03, 0xd8, 0xc9, 0x57, 0x20, 0x0f, 0x69, 0x40, 0x00, 0x43,
+ 0x03, 0xe4, 0xc9, 0x57, 0x20, 0x0f, 0x69, 0x48, 0xc9, 0x57, 0x20, 0x0f,
+ 0x69, 0x50, 0xc7, 0x0d, 0x04, 0x0f, 0x68, 0xc1, 0xc8, 0x4b, 0x94, 0x0f,
+ 0x69, 0x08, 0xc9, 0x57, 0x20, 0x0f, 0x69, 0x58, 0xc7, 0x0d, 0x04, 0x0f,
+ 0x68, 0xc9, 0xc8, 0x4b, 0x94, 0x0f, 0x69, 0x10, 0xc4, 0x02, 0xa3, 0x08,
+ 0x7c, 0x41, 0xc4, 0x10, 0xa0, 0x08, 0x7c, 0x38, 0xc5, 0x05, 0x02, 0x08,
+ 0x7c, 0x29, 0xc5, 0x00, 0xd4, 0x08, 0x7c, 0x20, 0xc5, 0x05, 0x02, 0x08,
+ 0x7c, 0x19, 0xc5, 0x00, 0xd4, 0x08, 0x7c, 0x10, 0xc3, 0x26, 0x1a, 0x08,
+ 0x7c, 0x09, 0xc5, 0xcf, 0xd8, 0x08, 0x7b, 0xc0, 0x03, 0xc3, 0x03, 0xf0,
+ 0xc3, 0x11, 0xef, 0x08, 0x7b, 0xf8, 0xc3, 0x05, 0x14, 0x08, 0x78, 0xeb,
+ 0x03, 0x03, 0xfc, 0x16, 0xc3, 0x04, 0x02, 0x08, 0x43, 0x04, 0x10, 0x46,
+ 0x02, 0x0f, 0xc3, 0x04, 0x1c, 0xd3, 0x40, 0x8d, 0x08, 0x79, 0x38, 0xce,
+ 0x70, 0x26, 0x08, 0x53, 0xf9, 0x44, 0x00, 0x51, 0x43, 0x04, 0x7b, 0x16,
+ 0xc3, 0x04, 0x87, 0xc4, 0x4a, 0xb9, 0x08, 0x53, 0xd1, 0x06, 0xc3, 0x04,
+ 0x97, 0xc4, 0xdf, 0x07, 0x08, 0x53, 0xc1, 0x09, 0xc3, 0x04, 0xa3, 0xc4,
+ 0xe3, 0x27, 0x08, 0x53, 0x41, 0xc4, 0x5d, 0xe2, 0x08, 0x53, 0x39, 0x15,
+ 0xc3, 0x04, 0xaf, 0xc3, 0x7e, 0x89, 0x08, 0x53, 0x29, 0xc4, 0xb9, 0x7e,
+ 0x08, 0x53, 0x21, 0xc3, 0x00, 0x4e, 0x08, 0x53, 0x19, 0xc2, 0x01, 0x7f,
+ 0x08, 0x53, 0x03, 0x03, 0x04, 0xb9, 0xc6, 0xcf, 0xd7, 0x08, 0x53, 0x09,
+ 0x0d, 0xc3, 0x04, 0xbf, 0xc3, 0x20, 0x18, 0x08, 0x53, 0x61, 0xc2, 0x14,
+ 0xda, 0x08, 0x53, 0x81, 0x03, 0x43, 0x04, 0xcb, 0xc2, 0x00, 0x5f, 0x08,
+ 0x67, 0xd9, 0xc3, 0x45, 0x6b, 0x08, 0x67, 0xe8, 0x00, 0x43, 0x04, 0xd7,
+ 0x95, 0x08, 0x67, 0x91, 0x97, 0x08, 0x67, 0x59, 0xc2, 0x1e, 0xd5, 0x08,
+ 0x66, 0xa8, 0x90, 0x08, 0x66, 0xcb, 0x03, 0x04, 0xe3, 0x9c, 0x08, 0x67,
+ 0x7b, 0x03, 0x04, 0xf2, 0x98, 0x08, 0x67, 0x71, 0x85, 0x08, 0x66, 0x23,
+ 0x03, 0x04, 0xf6, 0x96, 0x08, 0x67, 0x33, 0x03, 0x04, 0xfe, 0x95, 0x08,
+ 0x67, 0x23, 0x03, 0x05, 0x02, 0x8f, 0x08, 0x66, 0xc3, 0x03, 0x05, 0x06,
+ 0x8e, 0x08, 0x66, 0xb3, 0x03, 0x05, 0x0a, 0x8d, 0x08, 0x66, 0x99, 0x8c,
+ 0x08, 0x66, 0x91, 0x8a, 0x08, 0x66, 0x79, 0x89, 0x08, 0x66, 0x6b, 0x03,
+ 0x05, 0x0e, 0x88, 0x08, 0x66, 0x61, 0x87, 0x08, 0x66, 0x59, 0x86, 0x08,
+ 0x66, 0x39, 0x84, 0x08, 0x66, 0x11, 0x92, 0x08, 0x67, 0x01, 0x94, 0x08,
+ 0x67, 0x10, 0xc2, 0x0f, 0xe1, 0x08, 0x67, 0x69, 0xc2, 0x49, 0x0c, 0x08,
+ 0x66, 0xf0, 0xc2, 0x0f, 0xe1, 0x08, 0x67, 0x61, 0xc2, 0x49, 0x0c, 0x08,
+ 0x66, 0xe8, 0x91, 0x08, 0x66, 0xe1, 0xc2, 0x02, 0xe0, 0x08, 0x66, 0xf8,
+ 0x8d, 0x08, 0x66, 0xa1, 0xc2, 0x02, 0x35, 0x08, 0x66, 0x41, 0xc2, 0x00,
+ 0x50, 0x08, 0x66, 0x19, 0x83, 0x08, 0x66, 0x08, 0x8b, 0x08, 0x66, 0x88,
+ 0x90, 0x08, 0x64, 0xcb, 0x03, 0x05, 0x12, 0x96, 0x08, 0x65, 0x33, 0x03,
+ 0x05, 0x21, 0x95, 0x08, 0x65, 0x23, 0x03, 0x05, 0x25, 0x92, 0x08, 0x65,
+ 0x01, 0x8f, 0x08, 0x64, 0xc3, 0x03, 0x05, 0x29, 0x8e, 0x08, 0x64, 0xb3,
+ 0x03, 0x05, 0x2d, 0x8d, 0x08, 0x64, 0x99, 0x8c, 0x08, 0x64, 0x91, 0x8a,
+ 0x08, 0x64, 0x79, 0x89, 0x08, 0x64, 0x6b, 0x03, 0x05, 0x31, 0x88, 0x08,
+ 0x64, 0x61, 0x87, 0x08, 0x64, 0x59, 0x86, 0x08, 0x64, 0x39, 0x85, 0x08,
+ 0x64, 0x23, 0x03, 0x05, 0x35, 0x84, 0x08, 0x64, 0x11, 0x94, 0x08, 0x65,
+ 0x11, 0x98, 0x08, 0x65, 0x71, 0x9c, 0x08, 0x65, 0x7a, 0x03, 0x05, 0x3d,
+ 0xc2, 0x02, 0xe0, 0x08, 0x64, 0xf9, 0x91, 0x08, 0x64, 0xe0, 0xc2, 0x49,
+ 0x0c, 0x08, 0x64, 0xf1, 0xc2, 0x0f, 0xe1, 0x08, 0x65, 0x68, 0xc2, 0x49,
+ 0x0c, 0x08, 0x64, 0xe9, 0xc2, 0x0f, 0xe1, 0x08, 0x65, 0x60, 0xc2, 0x1e,
+ 0xd5, 0x08, 0x64, 0xa9, 0x97, 0x08, 0x65, 0x59, 0x95, 0x08, 0x65, 0x90,
+ 0x8d, 0x08, 0x64, 0xa1, 0xc2, 0x02, 0x35, 0x08, 0x64, 0x41, 0xc2, 0x00,
+ 0x50, 0x08, 0x64, 0x19, 0x83, 0x08, 0x64, 0x08, 0x8b, 0x08, 0x64, 0x88,
+ 0x96, 0x08, 0x62, 0x39, 0x93, 0x08, 0x61, 0xc1, 0x87, 0x08, 0x60, 0x3b,
+ 0x03, 0x05, 0x41, 0x92, 0x08, 0x61, 0x80, 0x07, 0xc3, 0x05, 0x45, 0x96,
+ 0x08, 0x62, 0x19, 0x95, 0x08, 0x61, 0xeb, 0x03, 0x05, 0x6d, 0x94, 0x08,
+ 0x61, 0xd1, 0x93, 0x08, 0x61, 0xa1, 0x90, 0x08, 0x61, 0x19, 0x8e, 0x08,
+ 0x60, 0xf1, 0x9b, 0x08, 0x60, 0xb1, 0x86, 0x08, 0x60, 0x89, 0x89, 0x08,
+ 0x60, 0x69, 0x84, 0x08, 0x60, 0x48, 0xc2, 0x01, 0xe2, 0x08, 0x62, 0x09,
+ 0x10, 0xc3, 0x05, 0x71, 0x8f, 0x08, 0x61, 0x11, 0xc2, 0x00, 0x72, 0x08,
+ 0x61, 0x09, 0x9c, 0x08, 0x60, 0xa1, 0x92, 0x08, 0x61, 0x79, 0x85, 0x08,
+ 0x61, 0x90, 0x93, 0x08, 0x61, 0xb1, 0x85, 0x08, 0x61, 0x88, 0x87, 0x08,
+ 0x60, 0x13, 0x03, 0x05, 0x81, 0x96, 0x08, 0x62, 0x21, 0xc2, 0x01, 0xe2,
+ 0x08, 0x62, 0x01, 0x94, 0x08, 0x61, 0xd9, 0x93, 0x08, 0x61, 0xa9, 0x8e,
+ 0x08, 0x60, 0xf9, 0x9b, 0x08, 0x60, 0xb9, 0x86, 0x08, 0x60, 0x91, 0x89,
+ 0x08, 0x60, 0x71, 0x84, 0x08, 0x60, 0x51, 0xc2, 0x00, 0x75, 0x08, 0x61,
+ 0x60, 0xc2, 0x01, 0xe2, 0x08, 0x62, 0x11, 0x85, 0x08, 0x61, 0x99, 0x10,
+ 0xc3, 0x05, 0x9c, 0x9c, 0x08, 0x60, 0xa8, 0x93, 0x08, 0x61, 0xc9, 0x87,
+ 0x08, 0x60, 0x42, 0x03, 0x05, 0xa8, 0x93, 0x08, 0x61, 0xb8, 0xc5, 0x28,
+ 0xee, 0x08, 0x54, 0xf9, 0xc2, 0x00, 0xc4, 0x08, 0x54, 0xf0, 0x8a, 0x08,
+ 0x54, 0xe1, 0xc2, 0x00, 0x74, 0x08, 0x54, 0xc0, 0x0a, 0xc3, 0x05, 0xac,
+ 0xc2, 0x02, 0x98, 0x08, 0x54, 0xb9, 0xc2, 0x16, 0x1c, 0x08, 0x54, 0x48,
+ 0xc4, 0x92, 0x76, 0x08, 0x54, 0xb1, 0xc3, 0x12, 0xc2, 0x08, 0x54, 0xa0,
+ 0x8e, 0x08, 0x54, 0xa9, 0x86, 0x08, 0x54, 0x98, 0x9f, 0x08, 0x54, 0x31,
+ 0x9e, 0x08, 0x54, 0x51, 0xa0, 0x08, 0x54, 0x78, 0xc2, 0x02, 0x98, 0x08,
+ 0x54, 0x11, 0xc2, 0x16, 0x1c, 0x08, 0x54, 0x00, 0xc2, 0x0f, 0x9b, 0x08,
+ 0x54, 0x59, 0xc3, 0x14, 0x4b, 0x08, 0x54, 0x68, 0xc3, 0x00, 0x49, 0x08,
+ 0x54, 0x89, 0xc2, 0x00, 0x74, 0x08, 0x54, 0x90, 0x45, 0x00, 0xba, 0xc3,
+ 0x05, 0xb8, 0xcc, 0x1d, 0xc7, 0x08, 0x1e, 0x81, 0x47, 0x34, 0x2f, 0x43,
+ 0x06, 0x21, 0xc2, 0x00, 0x82, 0x08, 0x1a, 0x99, 0x1c, 0x43, 0x06, 0x31,
+ 0x88, 0x08, 0x1b, 0x58, 0xc3, 0xd3, 0x4c, 0x08, 0x1a, 0xa9, 0xc3, 0x13,
+ 0x4e, 0x08, 0x1a, 0xb8, 0x87, 0x08, 0x1b, 0x91, 0x83, 0x08, 0x1b, 0xa8,
+ 0xc3, 0xc1, 0x4b, 0x08, 0x1a, 0xf1, 0xc2, 0x00, 0x29, 0x08, 0x1b, 0x70,
+ 0xc2, 0x25, 0x9f, 0x08, 0x1b, 0x09, 0x0a, 0x43, 0x06, 0x3d, 0xc2, 0x02,
+ 0xfa, 0x08, 0x1b, 0x11, 0xc3, 0xc5, 0xef, 0x08, 0x1b, 0x68, 0xc2, 0x14,
+ 0x98, 0x08, 0x1b, 0x39, 0xc2, 0x00, 0x29, 0x08, 0x1b, 0x7b, 0x03, 0x06,
+ 0x49, 0x83, 0x08, 0x1b, 0xa3, 0x03, 0x06, 0x51, 0x97, 0x08, 0x1b, 0x98,
+ 0x91, 0x08, 0x1b, 0x50, 0x87, 0x08, 0x18, 0x11, 0x83, 0x08, 0x18, 0x51,
+ 0x97, 0x08, 0x18, 0x91, 0xc2, 0x01, 0x7f, 0x08, 0x18, 0xc8, 0x8e, 0x08,
+ 0x18, 0x59, 0x8f, 0x08, 0x18, 0x61, 0x90, 0x08, 0x18, 0x69, 0x95, 0x08,
+ 0x18, 0xa3, 0x03, 0x06, 0x55, 0x94, 0x08, 0x18, 0x9b, 0x03, 0x06, 0x5d,
+ 0xc2, 0x01, 0x26, 0x08, 0x18, 0xb9, 0x88, 0x08, 0x18, 0xd0, 0xc2, 0x01,
+ 0x7f, 0x08, 0x18, 0x41, 0x87, 0x08, 0x18, 0xa8, 0x8b, 0x08, 0x18, 0xe8,
+ 0x87, 0x08, 0x18, 0x81, 0xc2, 0x01, 0x7f, 0x08, 0x18, 0xc0, 0xc2, 0x01,
+ 0x7f, 0x08, 0x18, 0x89, 0xcb, 0x97, 0x3a, 0x08, 0x19, 0x78, 0x97, 0x08,
+ 0x18, 0xf0, 0xc2, 0x00, 0x5f, 0x08, 0x19, 0x09, 0xc3, 0x45, 0x6b, 0x08,
+ 0x19, 0x18, 0x83, 0x08, 0x26, 0x49, 0xc2, 0x0d, 0xf6, 0x08, 0x26, 0x58,
+ 0x83, 0x08, 0x27, 0x89, 0xc2, 0x0d, 0xf6, 0x08, 0x27, 0x98, 0x4b, 0x8b,
+ 0x36, 0xc3, 0x06, 0x61, 0xd2, 0x4e, 0x41, 0x0e, 0x7d, 0x90, 0x42, 0x14,
+ 0x98, 0xc3, 0x06, 0x6d, 0x46, 0x87, 0x3b, 0x43, 0x06, 0x7c, 0x45, 0xdc,
+ 0x3b, 0xc3, 0x06, 0x88, 0xce, 0x6e, 0x9e, 0x0e, 0x7c, 0xd0, 0x11, 0xc3,
+ 0x06, 0x9a, 0xc4, 0x69, 0xaa, 0x0e, 0x7d, 0x12, 0x03, 0x06, 0xac, 0x11,
+ 0xc3, 0x06, 0xb2, 0xc3, 0x2d, 0x1d, 0x0e, 0x7c, 0xda, 0x03, 0x06, 0xc1,
+ 0x11, 0xc3, 0x06, 0xc7, 0xc7, 0xbf, 0x6b, 0x0e, 0x7c, 0x90, 0xce, 0x69,
+ 0xa0, 0x0e, 0x7c, 0x89, 0x42, 0x00, 0x97, 0xc3, 0x06, 0xd3, 0xc9, 0xaa,
+ 0x83, 0x0e, 0x7c, 0x5a, 0x03, 0x06, 0xf1, 0xd4, 0x38, 0x54, 0x0e, 0x7a,
+ 0xd1, 0xc8, 0xbc, 0x2a, 0x0e, 0x7a, 0xb8, 0xc7, 0x78, 0xdb, 0x0e, 0x7c,
+ 0x01, 0xc8, 0x94, 0x9e, 0x0e, 0x7b, 0xf0, 0xc7, 0x6d, 0xa9, 0x0e, 0x7b,
+ 0xc1, 0xc8, 0x4e, 0x4b, 0x0e, 0x7b, 0xb0, 0xc5, 0x00, 0x2c, 0x0e, 0x78,
+ 0x71, 0xc4, 0x00, 0x49, 0x0e, 0x78, 0x10, 0xd5, 0x35, 0xf3, 0x0e, 0x79,
+ 0xb8, 0xc6, 0x42, 0x68, 0x0e, 0x78, 0xe1, 0x42, 0x00, 0xe7, 0x43, 0x06,
+ 0xf7, 0xc5, 0x00, 0x2c, 0x0e, 0x78, 0x99, 0xc4, 0x00, 0x49, 0x0e, 0x78,
+ 0x38, 0xc5, 0x00, 0x2c, 0x0e, 0x78, 0x79, 0xc4, 0x00, 0x49, 0x0e, 0x78,
+ 0x18, 0xc5, 0x00, 0x2c, 0x0e, 0x78, 0x61, 0xc4, 0x00, 0x49, 0x0e, 0x78,
+ 0x00, 0xc6, 0x78, 0xdc, 0x0e, 0x78, 0xc9, 0x4b, 0x8e, 0xfa, 0x43, 0x07,
+ 0x03, 0xc5, 0x00, 0x2c, 0x0e, 0x78, 0xb9, 0xc4, 0x00, 0x49, 0x0e, 0x78,
+ 0x58, 0xc5, 0xb3, 0x3f, 0x05, 0x4e, 0x58, 0xc4, 0xad, 0x29, 0x05, 0x4e,
+ 0x49, 0xc3, 0x06, 0x47, 0x05, 0x4e, 0x28, 0xc8, 0x60, 0x28, 0x05, 0x4d,
+ 0x81, 0xc4, 0x60, 0x22, 0x05, 0x4d, 0x78, 0xc5, 0x60, 0x21, 0x05, 0x4d,
+ 0x31, 0xc5, 0xdc, 0x68, 0x05, 0x4c, 0x48, 0xc6, 0xcb, 0x27, 0x05, 0x4c,
+ 0xf8, 0xc6, 0xcb, 0x27, 0x05, 0x4c, 0xc0, 0xc6, 0xcb, 0x45, 0x05, 0x4c,
+ 0x52, 0x03, 0x07, 0x0f, 0xca, 0x60, 0x26, 0x05, 0x4d, 0x18, 0xca, 0x60,
+ 0x26, 0x05, 0x4c, 0xf0, 0xc6, 0xcb, 0x27, 0x05, 0x4d, 0x08, 0xca, 0x60,
+ 0x26, 0x05, 0x4c, 0xe0, 0xc5, 0x60, 0x21, 0x05, 0x4c, 0x89, 0xc5, 0x91,
+ 0x73, 0x05, 0x4c, 0x80, 0xd0, 0x3d, 0x58, 0x01, 0x2c, 0xf8, 0x56, 0x2c,
+ 0x44, 0xc3, 0x07, 0x15, 0x46, 0x01, 0xc8, 0x43, 0x07, 0x21, 0x9a, 0x05,
+ 0x22, 0xd1, 0x96, 0x05, 0x22, 0xc9, 0x91, 0x05, 0x22, 0x9b, 0x03, 0x07,
+ 0x2d, 0x92, 0x05, 0x22, 0xe0, 0x92, 0x05, 0x22, 0xc1, 0x9a, 0x05, 0x22,
+ 0xb1, 0x96, 0x05, 0x22, 0xa8, 0x87, 0x05, 0x22, 0x83, 0x03, 0x07, 0x35,
+ 0x92, 0x05, 0x22, 0x69, 0x9a, 0x05, 0x22, 0x59, 0x96, 0x05, 0x22, 0x50,
+ 0x94, 0x05, 0x22, 0x4b, 0x03, 0x07, 0x41, 0x92, 0x05, 0x22, 0x39, 0x9a,
+ 0x05, 0x22, 0x29, 0x96, 0x05, 0x22, 0x20, 0x94, 0x05, 0x22, 0x1b, 0x03,
+ 0x07, 0x45, 0x92, 0x05, 0x22, 0x09, 0x9a, 0x05, 0x21, 0xf9, 0x91, 0x05,
+ 0x21, 0xd2, 0x03, 0x07, 0x49, 0x92, 0x05, 0x21, 0xf1, 0x9a, 0x05, 0x21,
+ 0xe1, 0x96, 0x05, 0x21, 0xd8, 0x87, 0x05, 0x21, 0xbb, 0x03, 0x07, 0x4d,
+ 0x92, 0x05, 0x21, 0xa1, 0x9a, 0x05, 0x21, 0x91, 0x96, 0x05, 0x21, 0x88,
+ 0x94, 0x05, 0x1d, 0x53, 0x03, 0x07, 0x59, 0x92, 0x05, 0x1d, 0x41, 0x9a,
+ 0x05, 0x1d, 0x31, 0x96, 0x05, 0x1d, 0x28, 0x94, 0x05, 0x1d, 0x23, 0x03,
+ 0x07, 0x5d, 0x92, 0x05, 0x1d, 0x11, 0x9a, 0x05, 0x1d, 0x01, 0x96, 0x05,
+ 0x1c, 0xf8, 0x92, 0x05, 0x1c, 0xf1, 0x9a, 0x05, 0x1c, 0xe1, 0x96, 0x05,
+ 0x1c, 0xd8, 0x92, 0x05, 0x1c, 0xd1, 0x9a, 0x05, 0x1c, 0xc0, 0x92, 0x05,
+ 0x1c, 0xb9, 0x9a, 0x05, 0x1c, 0xa9, 0x96, 0x05, 0x1c, 0xa0, 0x9a, 0x05,
+ 0x12, 0xe9, 0x92, 0x05, 0x12, 0xf8, 0x96, 0x05, 0x13, 0x01, 0x9a, 0x05,
+ 0x13, 0x09, 0x92, 0x05, 0x13, 0x18, 0x96, 0x05, 0x13, 0x21, 0x9a, 0x05,
+ 0x13, 0x28, 0x96, 0x05, 0x13, 0x39, 0x9a, 0x05, 0x13, 0x41, 0x92, 0x05,
+ 0x13, 0x50, 0x9a, 0x05, 0x13, 0x59, 0x92, 0x05, 0x13, 0x68, 0x96, 0x05,
+ 0x17, 0x69, 0x9a, 0x05, 0x17, 0x71, 0x92, 0x05, 0x17, 0x81, 0x87, 0x05,
+ 0x17, 0x92, 0x03, 0x07, 0x61, 0x96, 0x05, 0x17, 0xa1, 0x9a, 0x05, 0x17,
+ 0xa9, 0x92, 0x05, 0x17, 0xb8, 0x91, 0x05, 0x17, 0xcb, 0x03, 0x07, 0x69,
+ 0x9a, 0x05, 0x17, 0xf1, 0x92, 0x05, 0x18, 0x01, 0x94, 0x05, 0x18, 0x12,
+ 0x03, 0x07, 0x6d, 0x9a, 0x05, 0x17, 0xd1, 0x92, 0x05, 0x17, 0xd8, 0x9a,
+ 0x05, 0x17, 0xe0, 0x96, 0x05, 0x18, 0x19, 0x9a, 0x05, 0x18, 0x21, 0x92,
+ 0x05, 0x18, 0x31, 0x94, 0x05, 0x18, 0x42, 0x03, 0x07, 0x71, 0x96, 0x05,
+ 0x03, 0xb1, 0x9a, 0x05, 0x03, 0xb9, 0x92, 0x05, 0x03, 0xc9, 0x87, 0x05,
+ 0x03, 0xda, 0x03, 0x07, 0x75, 0x96, 0x05, 0x03, 0xe9, 0x9a, 0x05, 0x03,
+ 0xf1, 0x92, 0x05, 0x04, 0x00, 0x96, 0x05, 0x04, 0x09, 0x9a, 0x05, 0x04,
+ 0x11, 0x92, 0x05, 0x04, 0x20, 0x96, 0x05, 0x04, 0x29, 0x9a, 0x05, 0x04,
+ 0x31, 0x92, 0x05, 0x04, 0x41, 0x94, 0x05, 0x04, 0x52, 0x03, 0x07, 0x7d,
+ 0x96, 0x05, 0x04, 0x59, 0x9a, 0x05, 0x04, 0x61, 0x92, 0x05, 0x04, 0x71,
+ 0x94, 0x05, 0x04, 0x82, 0x03, 0x07, 0x81, 0x96, 0x05, 0x0a, 0x89, 0x9a,
+ 0x05, 0x0a, 0x91, 0x92, 0x05, 0x0a, 0xa1, 0x87, 0x05, 0x0a, 0xb2, 0x03,
+ 0x07, 0x85, 0x96, 0x05, 0x0a, 0xb9, 0x9a, 0x05, 0x0a, 0xc1, 0x92, 0x05,
+ 0x0a, 0xd0, 0x96, 0x05, 0x0a, 0xd9, 0x9a, 0x05, 0x0a, 0xe1, 0x92, 0x05,
+ 0x0a, 0xf0, 0x9a, 0x05, 0x0a, 0xf9, 0x92, 0x05, 0x0b, 0x08, 0x96, 0x05,
+ 0x0b, 0x11, 0x9a, 0x05, 0x0b, 0x19, 0x92, 0x05, 0x0b, 0x29, 0x94, 0x05,
+ 0x0b, 0x3a, 0x03, 0x07, 0x89, 0x9a, 0x05, 0x0c, 0x09, 0x92, 0x05, 0x0c,
+ 0x18, 0x9a, 0x05, 0x0c, 0x39, 0x92, 0x05, 0x0c, 0x48, 0x9a, 0x05, 0x0c,
+ 0xa8, 0x92, 0x05, 0x21, 0x81, 0x9a, 0x05, 0x21, 0x71, 0x96, 0x05, 0x21,
+ 0x68, 0x94, 0x05, 0x21, 0x63, 0x03, 0x07, 0x8d, 0x92, 0x05, 0x21, 0x51,
+ 0x9a, 0x05, 0x21, 0x41, 0x96, 0x05, 0x21, 0x39, 0x91, 0x05, 0x21, 0x0a,
+ 0x03, 0x07, 0x91, 0x92, 0x05, 0x21, 0x31, 0x9a, 0x05, 0x21, 0x21, 0x96,
+ 0x05, 0x21, 0x18, 0x87, 0x05, 0x20, 0xf3, 0x03, 0x07, 0x99, 0x92, 0x05,
+ 0x20, 0xd9, 0x9a, 0x05, 0x20, 0xc9, 0x96, 0x05, 0x20, 0xc0, 0x9a, 0x05,
+ 0x1f, 0xd0, 0x9a, 0x05, 0x1f, 0xa0, 0x92, 0x05, 0x1f, 0x59, 0x9a, 0x05,
+ 0x1f, 0x49, 0x96, 0x05, 0x1f, 0x40, 0x92, 0x05, 0x1f, 0x39, 0x9a, 0x05,
+ 0x1f, 0x29, 0x96, 0x05, 0x1f, 0x20, 0x9a, 0x05, 0x20, 0xb0, 0x9a, 0x05,
+ 0x20, 0x80, 0x92, 0x05, 0x20, 0x39, 0x9a, 0x05, 0x20, 0x29, 0x96, 0x05,
+ 0x20, 0x20, 0x92, 0x05, 0x20, 0x19, 0x9a, 0x05, 0x20, 0x09, 0x96, 0x05,
+ 0x20, 0x00, 0x9a, 0x05, 0x1e, 0xf0, 0x9a, 0x05, 0x1e, 0xc0, 0x92, 0x05,
+ 0x1e, 0x61, 0x9a, 0x05, 0x1e, 0x50, 0x94, 0x05, 0x1c, 0x9b, 0x03, 0x07,
+ 0xa5, 0x92, 0x05, 0x1c, 0x89, 0x9a, 0x05, 0x1c, 0x79, 0x96, 0x05, 0x1c,
+ 0x70, 0x94, 0x05, 0x1c, 0x6b, 0x03, 0x07, 0xa9, 0x92, 0x05, 0x1c, 0x59,
+ 0x9a, 0x05, 0x1c, 0x49, 0x96, 0x05, 0x1c, 0x41, 0x91, 0x05, 0x1b, 0xfa,
+ 0x03, 0x07, 0xad, 0x92, 0x05, 0x1c, 0x39, 0x9a, 0x05, 0x1c, 0x29, 0x96,
+ 0x05, 0x1c, 0x20, 0x92, 0x05, 0x1c, 0x19, 0x9a, 0x05, 0x1c, 0x09, 0x96,
+ 0x05, 0x1c, 0x00, 0x9a, 0x05, 0x1b, 0xe0, 0x94, 0x05, 0x1e, 0x2b, 0x03,
+ 0x07, 0xb1, 0x92, 0x05, 0x1e, 0x19, 0x9a, 0x05, 0x1e, 0x09, 0x96, 0x05,
+ 0x1e, 0x00, 0x94, 0x05, 0x1d, 0xfb, 0x03, 0x07, 0xb5, 0x92, 0x05, 0x1d,
+ 0xe9, 0x9a, 0x05, 0x1d, 0xd9, 0x96, 0x05, 0x1d, 0xd1, 0x91, 0x05, 0x1d,
+ 0x82, 0x03, 0x07, 0xb9, 0x92, 0x05, 0x1d, 0xc9, 0x9a, 0x05, 0x1d, 0xb9,
+ 0x96, 0x05, 0x1d, 0xb0, 0x92, 0x05, 0x1d, 0xa9, 0x9a, 0x05, 0x1d, 0x99,
+ 0x96, 0x05, 0x1d, 0x90, 0x92, 0x05, 0x1d, 0x71, 0x9a, 0x05, 0x1d, 0x61,
+ 0x96, 0x05, 0x1d, 0x58, 0x92, 0x05, 0x1a, 0x19, 0x94, 0x05, 0x1a, 0x2b,
+ 0x03, 0x07, 0xc1, 0x96, 0x05, 0x1a, 0x01, 0x9a, 0x05, 0x1a, 0x08, 0x96,
+ 0x05, 0x19, 0x51, 0x9a, 0x05, 0x19, 0x59, 0x92, 0x05, 0x19, 0x69, 0x87,
+ 0x05, 0x19, 0x7a, 0x03, 0x07, 0xc5, 0x96, 0x05, 0x19, 0x89, 0x9a, 0x05,
+ 0x19, 0x91, 0x92, 0x05, 0x19, 0xa0, 0x96, 0x05, 0x19, 0xa9, 0x9a, 0x05,
+ 0x19, 0xb1, 0x92, 0x05, 0x19, 0xc0, 0x9a, 0x05, 0x19, 0xc9, 0x92, 0x05,
+ 0x19, 0xd8, 0x96, 0x05, 0x19, 0xe1, 0x9a, 0x05, 0x19, 0xe9, 0x92, 0x05,
+ 0x19, 0xf8, 0x9a, 0x05, 0x18, 0x69, 0x92, 0x05, 0x18, 0x78, 0x9a, 0x05,
+ 0x18, 0xa1, 0x92, 0x05, 0x18, 0xb0, 0x9a, 0x05, 0x19, 0x10, 0x9a, 0x05,
+ 0x19, 0x40, 0x94, 0x05, 0x1b, 0xdb, 0x03, 0x07, 0xcd, 0x92, 0x05, 0x1b,
+ 0xc9, 0x9a, 0x05, 0x1b, 0xb9, 0x96, 0x05, 0x1b, 0xb0, 0x94, 0x05, 0x1b,
+ 0xab, 0x03, 0x07, 0xd1, 0x92, 0x05, 0x1b, 0x99, 0x9a, 0x05, 0x1b, 0x89,
+ 0x91, 0x05, 0x1b, 0x3a, 0x03, 0x07, 0xd5, 0x92, 0x05, 0x1b, 0x81, 0x9a,
+ 0x05, 0x1b, 0x71, 0x96, 0x05, 0x1b, 0x68, 0x92, 0x05, 0x1b, 0x61, 0x9a,
+ 0x05, 0x1b, 0x51, 0x96, 0x05, 0x1b, 0x48, 0x92, 0x05, 0x1b, 0x21, 0x96,
+ 0x05, 0x1b, 0x09, 0x9a, 0x05, 0x1b, 0x10, 0x9a, 0x05, 0x16, 0x70, 0x9a,
+ 0x05, 0x16, 0x40, 0x9a, 0x05, 0x15, 0xd1, 0x92, 0x05, 0x15, 0xe0, 0x96,
+ 0x05, 0x15, 0x91, 0x9a, 0x05, 0x15, 0x99, 0x92, 0x05, 0x15, 0xa8, 0x92,
+ 0x05, 0x14, 0xd1, 0x9a, 0x05, 0x14, 0xc0, 0x9a, 0x05, 0x15, 0x30, 0x92,
+ 0x05, 0x14, 0x99, 0x9a, 0x05, 0x14, 0x88, 0x9a, 0x05, 0x16, 0xe1, 0x92,
+ 0x05, 0x16, 0xf0, 0x92, 0x05, 0x16, 0xd9, 0x9a, 0x05, 0x16, 0xc9, 0x96,
+ 0x05, 0x16, 0xc0, 0x87, 0x05, 0x16, 0xb3, 0x03, 0x07, 0xe1, 0x92, 0x05,
+ 0x16, 0x99, 0x9a, 0x05, 0x16, 0x89, 0x96, 0x05, 0x16, 0x80, 0x9a, 0x05,
+ 0x16, 0xf9, 0x92, 0x05, 0x17, 0x08, 0x9a, 0x05, 0x17, 0x11, 0x92, 0x05,
+ 0x17, 0x21, 0x94, 0x05, 0x17, 0x32, 0x03, 0x07, 0xed, 0x96, 0x05, 0x17,
+ 0x39, 0x9a, 0x05, 0x17, 0x41, 0x92, 0x05, 0x17, 0x51, 0x94, 0x05, 0x17,
+ 0x62, 0x03, 0x07, 0xf1, 0x94, 0x05, 0x1b, 0x03, 0x03, 0x07, 0xf5, 0x92,
+ 0x05, 0x1a, 0xf1, 0x9a, 0x05, 0x1a, 0xe1, 0x96, 0x05, 0x1a, 0xd8, 0x94,
+ 0x05, 0x1a, 0xd3, 0x03, 0x07, 0xf9, 0x92, 0x05, 0x1a, 0xc1, 0x9a, 0x05,
+ 0x1a, 0xb1, 0x96, 0x05, 0x1a, 0xa9, 0x91, 0x05, 0x1a, 0x5a, 0x03, 0x07,
+ 0xfd, 0x92, 0x05, 0x1a, 0xa1, 0x9a, 0x05, 0x1a, 0x91, 0x96, 0x05, 0x1a,
+ 0x88, 0x92, 0x05, 0x1a, 0x81, 0x96, 0x05, 0x1a, 0x69, 0x9a, 0x05, 0x1a,
+ 0x70, 0x96, 0x05, 0x1a, 0x31, 0x9a, 0x05, 0x1a, 0x39, 0x92, 0x05, 0x1a,
+ 0x48, 0x9a, 0x05, 0x15, 0x60, 0x92, 0x05, 0x14, 0x31, 0x9a, 0x05, 0x14,
+ 0x20, 0x92, 0x05, 0x12, 0xe1, 0x9a, 0x05, 0x12, 0xd0, 0x92, 0x05, 0x12,
+ 0xc9, 0x9a, 0x05, 0x12, 0xb9, 0x96, 0x05, 0x12, 0xb0, 0x92, 0x05, 0x12,
+ 0xa9, 0x9a, 0x05, 0x12, 0x99, 0x96, 0x05, 0x12, 0x90, 0x92, 0x05, 0x12,
+ 0x89, 0x9a, 0x05, 0x12, 0x79, 0x96, 0x05, 0x12, 0x70, 0x96, 0x05, 0x12,
+ 0x31, 0x9a, 0x05, 0x12, 0x39, 0x92, 0x05, 0x12, 0x49, 0x87, 0x05, 0x12,
+ 0x62, 0x03, 0x08, 0x05, 0x9a, 0x05, 0x13, 0x78, 0x96, 0x05, 0x04, 0x89,
+ 0x9a, 0x05, 0x04, 0x91, 0x92, 0x05, 0x04, 0xa1, 0x87, 0x05, 0x04, 0xb2,
+ 0x03, 0x08, 0x11, 0x96, 0x05, 0x04, 0xc1, 0x9a, 0x05, 0x04, 0xc9, 0x92,
+ 0x05, 0x04, 0xd8, 0x91, 0x05, 0x04, 0xeb, 0x03, 0x08, 0x19, 0x96, 0x05,
+ 0x05, 0x19, 0x9a, 0x05, 0x05, 0x21, 0x92, 0x05, 0x05, 0x31, 0x94, 0x05,
+ 0x05, 0x42, 0x03, 0x08, 0x21, 0x96, 0x05, 0x04, 0xf9, 0x9a, 0x05, 0x05,
+ 0x01, 0x92, 0x05, 0x05, 0x10, 0x9a, 0x05, 0x05, 0x49, 0x92, 0x05, 0x05,
+ 0x59, 0x94, 0x05, 0x05, 0x6a, 0x03, 0x08, 0x25, 0x96, 0x05, 0x0b, 0x41,
+ 0x9a, 0x05, 0x0b, 0x49, 0x92, 0x05, 0x0b, 0x59, 0x87, 0x05, 0x0b, 0x72,
+ 0x03, 0x08, 0x29, 0x96, 0x05, 0x0b, 0x81, 0x9a, 0x05, 0x0b, 0x89, 0x92,
+ 0x05, 0x0b, 0x98, 0x91, 0x05, 0x0b, 0xab, 0x03, 0x08, 0x35, 0x96, 0x05,
+ 0x0b, 0xe0, 0x96, 0x05, 0x0b, 0xb1, 0x9a, 0x05, 0x0b, 0xb9, 0x92, 0x05,
+ 0x0b, 0xc0, 0x9a, 0x05, 0x0b, 0xc9, 0x92, 0x05, 0x0b, 0xd8, 0x96, 0x05,
+ 0x0c, 0xb9, 0x9a, 0x05, 0x0c, 0xc1, 0x92, 0x05, 0x0c, 0xd1, 0x87, 0x05,
+ 0x0c, 0xe2, 0x03, 0x08, 0x39, 0x96, 0x05, 0x0c, 0xf1, 0x9a, 0x05, 0x0c,
+ 0xf9, 0x92, 0x05, 0x0d, 0x08, 0x91, 0x05, 0x0d, 0x1b, 0x03, 0x08, 0x41,
+ 0x96, 0x05, 0x0d, 0x59, 0x9a, 0x05, 0x0d, 0x61, 0x92, 0x05, 0x0d, 0x71,
+ 0x94, 0x05, 0x0d, 0x82, 0x03, 0x08, 0x49, 0x9a, 0x05, 0x0d, 0x29, 0x92,
+ 0x05, 0x0d, 0x38, 0x9a, 0x05, 0x0d, 0x41, 0x92, 0x05, 0x0d, 0x50, 0x96,
+ 0x05, 0x0d, 0x89, 0x9a, 0x05, 0x0d, 0x91, 0x92, 0x05, 0x0d, 0xa1, 0x94,
+ 0x05, 0x0d, 0xb2, 0x03, 0x08, 0x4d, 0x9a, 0x05, 0x23, 0xb1, 0x96, 0x05,
+ 0x23, 0xa9, 0x92, 0x05, 0x23, 0xc0, 0x96, 0x05, 0x23, 0xc9, 0x9a, 0x05,
+ 0x23, 0xd1, 0x92, 0x05, 0x23, 0xe0, 0x9a, 0x05, 0x24, 0x28, 0x9a, 0x05,
+ 0x24, 0x58, 0x9a, 0x05, 0x23, 0x78, 0x96, 0x05, 0x23, 0x09, 0x9a, 0x05,
+ 0x23, 0x11, 0x92, 0x05, 0x23, 0x20, 0x92, 0x05, 0x12, 0x19, 0x94, 0x05,
+ 0x12, 0x2b, 0x03, 0x08, 0x51, 0x91, 0x05, 0x11, 0xbb, 0x03, 0x08, 0x55,
+ 0x96, 0x05, 0x12, 0x01, 0x9a, 0x05, 0x12, 0x08, 0x9a, 0x05, 0x11, 0x80,
+ 0x96, 0x05, 0x11, 0x91, 0x9a, 0x05, 0x11, 0x99, 0x92, 0x05, 0x11, 0xa8,
+ 0x96, 0x05, 0x11, 0xc9, 0x9a, 0x05, 0x11, 0xd1, 0x92, 0x05, 0x11, 0xe0,
+ 0x9a, 0x05, 0x11, 0xe9, 0x92, 0x05, 0x11, 0xf8, 0x9a, 0x05, 0x05, 0x91,
+ 0x92, 0x05, 0x05, 0xa0, 0x96, 0x05, 0x05, 0xc9, 0x9a, 0x05, 0x05, 0xd1,
+ 0x92, 0x05, 0x05, 0xe0, 0x9a, 0x05, 0x06, 0x38, 0x96, 0x05, 0x00, 0xd1,
+ 0x9a, 0x05, 0x00, 0xd9, 0x92, 0x05, 0x00, 0xe8, 0x9a, 0x05, 0x01, 0x11,
+ 0x92, 0x05, 0x01, 0x20, 0x9a, 0x05, 0x01, 0x80, 0x9a, 0x05, 0x01, 0xb0,
+ 0x96, 0x05, 0x02, 0xb1, 0x9a, 0x05, 0x02, 0xb9, 0x92, 0x05, 0x02, 0xc9,
+ 0x87, 0x05, 0x02, 0xe2, 0x03, 0x08, 0x5d, 0x96, 0x05, 0x02, 0xf1, 0x9a,
+ 0x05, 0x02, 0xf9, 0x92, 0x05, 0x03, 0x08, 0x91, 0x05, 0x03, 0x1b, 0x03,
+ 0x08, 0x69, 0x96, 0x05, 0x03, 0x61, 0x9a, 0x05, 0x03, 0x69, 0x92, 0x05,
+ 0x03, 0x79, 0x94, 0x05, 0x03, 0x8a, 0x03, 0x08, 0x71, 0x96, 0x05, 0x03,
+ 0x29, 0x9a, 0x05, 0x03, 0x31, 0x92, 0x05, 0x03, 0x40, 0x9a, 0x05, 0x03,
+ 0x49, 0x92, 0x05, 0x03, 0x58, 0x96, 0x05, 0x03, 0x91, 0x9a, 0x05, 0x03,
+ 0x99, 0x92, 0x05, 0x03, 0xa8, 0x9a, 0x05, 0x01, 0xe1, 0x92, 0x05, 0x01,
+ 0xf0, 0x9a, 0x05, 0x02, 0x19, 0x92, 0x05, 0x02, 0x28, 0x9a, 0x05, 0x02,
+ 0x70, 0x9a, 0x05, 0x02, 0xa0, 0x9a, 0x05, 0x06, 0xe0, 0x96, 0x05, 0x07,
+ 0x31, 0x9a, 0x05, 0x07, 0x39, 0x92, 0x05, 0x07, 0x48, 0x9a, 0x05, 0x07,
+ 0xc0, 0x9a, 0x05, 0x07, 0xf0, 0x9a, 0x05, 0x08, 0x21, 0x92, 0x05, 0x08,
+ 0x30, 0x9a, 0x05, 0x08, 0x58, 0x9a, 0x05, 0x08, 0xc0, 0x96, 0x05, 0x09,
+ 0xb1, 0x9a, 0x05, 0x09, 0xb9, 0x92, 0x05, 0x09, 0xc9, 0x87, 0x05, 0x09,
+ 0xda, 0x03, 0x08, 0x75, 0x96, 0x05, 0x09, 0xe9, 0x9a, 0x05, 0x09, 0xf1,
+ 0x92, 0x05, 0x0a, 0x00, 0x91, 0x05, 0x0a, 0x13, 0x03, 0x08, 0x7d, 0x96,
+ 0x05, 0x0a, 0x59, 0x9a, 0x05, 0x0a, 0x61, 0x92, 0x05, 0x0a, 0x71, 0x94,
+ 0x05, 0x0a, 0x82, 0x03, 0x08, 0x85, 0x96, 0x05, 0x0a, 0x21, 0x9a, 0x05,
+ 0x0a, 0x29, 0x92, 0x05, 0x0a, 0x38, 0x9a, 0x05, 0x0a, 0x41, 0x92, 0x05,
+ 0x0a, 0x50, 0x9a, 0x05, 0x08, 0xf1, 0x92, 0x05, 0x09, 0x00, 0x96, 0x05,
+ 0x09, 0x29, 0x9a, 0x05, 0x09, 0x31, 0x92, 0x05, 0x09, 0x40, 0x9a, 0x05,
+ 0x09, 0xa0, 0x96, 0x05, 0x0d, 0xd9, 0x9a, 0x05, 0x0d, 0xe1, 0x92, 0x05,
+ 0x0d, 0xf0, 0x96, 0x05, 0x0e, 0x19, 0x9a, 0x05, 0x0e, 0x21, 0x92, 0x05,
+ 0x0e, 0x30, 0x9a, 0x05, 0x0e, 0x90, 0x9a, 0x05, 0x0e, 0xc0, 0x96, 0x05,
+ 0x0e, 0xf1, 0x9a, 0x05, 0x0e, 0xf9, 0x92, 0x05, 0x0f, 0x08, 0x96, 0x05,
+ 0x0f, 0x31, 0x9a, 0x05, 0x0f, 0x39, 0x92, 0x05, 0x0f, 0x48, 0x9a, 0x05,
+ 0x0f, 0xb0, 0x96, 0x05, 0x10, 0xa1, 0x9a, 0x05, 0x10, 0xa9, 0x87, 0x05,
+ 0x10, 0xc2, 0x03, 0x08, 0x89, 0x96, 0x05, 0x10, 0xd1, 0x9a, 0x05, 0x10,
+ 0xd9, 0x92, 0x05, 0x10, 0xe8, 0x91, 0x05, 0x11, 0x03, 0x03, 0x08, 0x91,
+ 0x96, 0x05, 0x11, 0x51, 0x9a, 0x05, 0x11, 0x59, 0x92, 0x05, 0x11, 0x69,
+ 0x94, 0x05, 0x11, 0x7a, 0x03, 0x08, 0x9d, 0x96, 0x05, 0x11, 0x11, 0x9a,
+ 0x05, 0x11, 0x19, 0x92, 0x05, 0x11, 0x28, 0x96, 0x05, 0x11, 0x31, 0x9a,
+ 0x05, 0x11, 0x39, 0x92, 0x05, 0x11, 0x48, 0x9a, 0x05, 0x0f, 0xe1, 0x92,
+ 0x05, 0x0f, 0xf0, 0x9a, 0x05, 0x10, 0x19, 0x92, 0x05, 0x10, 0x28, 0x9a,
+ 0x05, 0x10, 0x90, 0x0c, 0xc3, 0x08, 0xa1, 0x0a, 0xc3, 0x08, 0xac, 0x42,
+ 0x01, 0xe2, 0xc3, 0x08, 0xbf, 0xc2, 0x16, 0x5a, 0x00, 0xaa, 0x09, 0xc2,
+ 0x00, 0x8e, 0x00, 0xa5, 0x11, 0x8f, 0x00, 0xa5, 0xf8, 0x9b, 0x00, 0xc6,
+ 0x11, 0x91, 0x00, 0xa8, 0xf8, 0x83, 0x00, 0xa9, 0x18, 0x8b, 0x00, 0xa8,
+ 0xd8, 0x83, 0x08, 0xd5, 0xd3, 0x03, 0x08, 0xd8, 0x91, 0x08, 0xd5, 0xc3,
+ 0x03, 0x08, 0xdc, 0x8b, 0x08, 0xd5, 0xb2, 0x03, 0x08, 0xe0, 0x83, 0x08,
+ 0xd5, 0xa3, 0x03, 0x08, 0xe4, 0x91, 0x08, 0xd5, 0x93, 0x03, 0x08, 0xe8,
+ 0x8b, 0x08, 0xd5, 0x82, 0x03, 0x08, 0xec, 0xc2, 0x04, 0xc6, 0x00, 0xa0,
+ 0xd9, 0xc2, 0x01, 0x24, 0x00, 0xa0, 0xb0, 0xc3, 0xe3, 0x43, 0x00, 0xa8,
+ 0x79, 0xc2, 0x04, 0x22, 0x00, 0xa8, 0x53, 0x03, 0x08, 0xf0, 0xc3, 0x01,
+ 0x8b, 0x00, 0xa8, 0x69, 0xc3, 0x14, 0x72, 0x00, 0xa8, 0x21, 0xc2, 0x1a,
+ 0xd1, 0x00, 0xa8, 0x59, 0xc3, 0x09, 0x0e, 0x00, 0xa8, 0x60, 0x8b, 0x00,
+ 0xac, 0x70, 0x83, 0x00, 0xab, 0xd0, 0x91, 0x00, 0xab, 0xc0, 0x8b, 0x00,
+ 0xab, 0xb0, 0x07, 0xc3, 0x08, 0xf4, 0x8b, 0x00, 0xa2, 0xa1, 0x0e, 0xc3,
+ 0x08, 0xfc, 0x1c, 0x43, 0x09, 0x13, 0xc2, 0x01, 0x4a, 0x00, 0xc7, 0x91,
+ 0x83, 0x00, 0xb0, 0xd9, 0x8b, 0x00, 0xb0, 0xc9, 0x87, 0x00, 0xb0, 0xbb,
+ 0x03, 0x09, 0x2a, 0x91, 0x00, 0xb0, 0xb1, 0x97, 0x00, 0xb0, 0xa1, 0x0c,
+ 0x43, 0x09, 0x2e, 0x19, 0xc3, 0x09, 0x45, 0x83, 0x00, 0xaf, 0xa3, 0x03,
+ 0x09, 0x4d, 0x8b, 0x00, 0xaf, 0x99, 0x87, 0x00, 0xaf, 0x8b, 0x03, 0x09,
+ 0x51, 0x91, 0x00, 0xaf, 0x81, 0x97, 0x00, 0xaf, 0x79, 0x0a, 0x43, 0x09,
+ 0x55, 0x16, 0xc3, 0x09, 0x6c, 0x15, 0xc3, 0x09, 0x87, 0x0a, 0xc3, 0x09,
+ 0x9e, 0x0e, 0x43, 0x09, 0xb5, 0x83, 0x00, 0xb3, 0x31, 0x8b, 0x00, 0xb3,
+ 0x29, 0x87, 0x00, 0xb3, 0x1b, 0x03, 0x09, 0xd0, 0x91, 0x00, 0xb3, 0x11,
+ 0x97, 0x00, 0xb3, 0x08, 0x83, 0x00, 0xb0, 0x99, 0x8b, 0x00, 0xb0, 0x91,
+ 0x87, 0x00, 0xb0, 0x83, 0x03, 0x09, 0xd4, 0x91, 0x00, 0xb0, 0x79, 0x97,
+ 0x00, 0xb0, 0x70, 0x83, 0x00, 0xb0, 0x69, 0x8b, 0x00, 0xb0, 0x61, 0x87,
+ 0x00, 0xb0, 0x53, 0x03, 0x09, 0xd8, 0x91, 0x00, 0xb0, 0x49, 0x97, 0x00,
+ 0xb0, 0x40, 0x83, 0x00, 0xb0, 0x39, 0x8b, 0x00, 0xb0, 0x31, 0x87, 0x00,
+ 0xb0, 0x23, 0x03, 0x09, 0xdc, 0x91, 0x00, 0xb0, 0x19, 0x97, 0x00, 0xb0,
+ 0x11, 0x89, 0x00, 0xa6, 0x88, 0x8d, 0x00, 0xb0, 0x0b, 0x03, 0x09, 0xe0,
+ 0x0a, 0x43, 0x09, 0xf7, 0x83, 0x00, 0xaf, 0x69, 0x8b, 0x00, 0xaf, 0x61,
+ 0x87, 0x00, 0xaf, 0x53, 0x03, 0x0a, 0x0e, 0x91, 0x00, 0xaf, 0x49, 0x97,
+ 0x00, 0xaf, 0x40, 0x19, 0xc3, 0x0a, 0x12, 0xc2, 0x02, 0xe0, 0x00, 0xa1,
+ 0xb1, 0x8b, 0x00, 0xa1, 0xb8, 0x83, 0x00, 0xae, 0xa9, 0x8b, 0x00, 0xae,
+ 0xa1, 0x87, 0x00, 0xae, 0x93, 0x03, 0x0a, 0x29, 0x91, 0x00, 0xae, 0x89,
+ 0x97, 0x00, 0xae, 0x80, 0x83, 0x00, 0xae, 0x79, 0x8b, 0x00, 0xae, 0x71,
+ 0x87, 0x00, 0xae, 0x63, 0x03, 0x0a, 0x2d, 0x91, 0x00, 0xae, 0x59, 0x97,
+ 0x00, 0xae, 0x50, 0x0a, 0xc3, 0x0a, 0x31, 0x97, 0x00, 0xb1, 0x11, 0x91,
+ 0x00, 0xb1, 0x19, 0x87, 0x00, 0xb1, 0x23, 0x03, 0x0a, 0x48, 0x8b, 0x00,
+ 0xb1, 0x31, 0x83, 0x00, 0xb1, 0x38, 0xc8, 0xbb, 0x62, 0x00, 0xb2, 0x38,
+ 0x97, 0x00, 0xb2, 0x01, 0x91, 0x00, 0xb2, 0x09, 0x87, 0x00, 0xb2, 0x13,
+ 0x03, 0x0a, 0x4c, 0x8b, 0x00, 0xb2, 0x21, 0x83, 0x00, 0xb2, 0x28, 0x97,
+ 0x00, 0xb2, 0x71, 0x91, 0x00, 0xb2, 0x79, 0x87, 0x00, 0xb2, 0x83, 0x03,
+ 0x0a, 0x50, 0x8b, 0x00, 0xb2, 0x91, 0x83, 0x00, 0xb2, 0x99, 0x8a, 0x00,
+ 0xb2, 0xd2, 0x03, 0x0a, 0x54, 0x83, 0x00, 0xc7, 0x38, 0x91, 0x00, 0xc7,
+ 0x30, 0x83, 0x00, 0xab, 0x40, 0x83, 0x00, 0xad, 0x68, 0x91, 0x00, 0xad,
+ 0x58, 0x8b, 0x00, 0xad, 0x48, 0x8e, 0x00, 0xa7, 0x5b, 0x03, 0x0a, 0x6b,
+ 0x94, 0x00, 0xaa, 0x8b, 0x03, 0x0a, 0x81, 0x16, 0xc3, 0x0a, 0x97, 0xc4,
+ 0xe0, 0x77, 0x00, 0xaa, 0xe1, 0x9b, 0x00, 0xaa, 0x03, 0x03, 0x0a, 0xa1,
+ 0x15, 0xc3, 0x0a, 0xa5, 0x92, 0x00, 0xa2, 0x53, 0x03, 0x0a, 0xaf, 0x42,
+ 0x28, 0x70, 0xc3, 0x0a, 0xb3, 0x19, 0xc3, 0x0a, 0xcc, 0x42, 0x04, 0x22,
+ 0xc3, 0x0a, 0xe5, 0x8f, 0x00, 0xa5, 0xe3, 0x03, 0x0a, 0xfe, 0x42, 0x01,
+ 0x49, 0x43, 0x0b, 0x02, 0xc8, 0xb7, 0x02, 0x00, 0xb3, 0xf1, 0xc2, 0x00,
+ 0x8e, 0x00, 0xac, 0xfa, 0x03, 0x0b, 0x0d, 0xc9, 0xb4, 0x25, 0x00, 0xc6,
+ 0xf9, 0x0b, 0x43, 0x0b, 0x21, 0xc9, 0xaa, 0x05, 0x00, 0xc6, 0xf1, 0xd6,
+ 0x2f, 0xca, 0x00, 0xa1, 0x40, 0x45, 0x00, 0x5a, 0xc3, 0x0b, 0x2d, 0xc7,
+ 0x32, 0xb9, 0x00, 0xa1, 0x50, 0x91, 0x00, 0xc6, 0x5b, 0x03, 0x0b, 0x39,
+ 0x8b, 0x00, 0xc6, 0x3a, 0x03, 0x0b, 0x3d, 0x96, 0x08, 0x2a, 0xb0, 0x8d,
+ 0x08, 0x2a, 0x80, 0x98, 0x05, 0x5d, 0xc1, 0x97, 0x05, 0x5d, 0xb9, 0x91,
+ 0x05, 0x5d, 0xb1, 0x8b, 0x05, 0x5d, 0xa9, 0x83, 0x05, 0x5d, 0x99, 0x87,
+ 0x05, 0x5d, 0xa0, 0x98, 0x05, 0x5d, 0x91, 0x83, 0x05, 0x5d, 0x69, 0x87,
+ 0x05, 0x5d, 0x71, 0x97, 0x05, 0x5d, 0x89, 0x8b, 0x05, 0x5d, 0x79, 0x91,
+ 0x05, 0x5d, 0x80, 0x8a, 0x05, 0x5c, 0x80, 0x8a, 0x00, 0x9e, 0x00, 0x83,
+ 0x00, 0x9e, 0xe9, 0x87, 0x00, 0x9e, 0xf1, 0x8b, 0x00, 0x9e, 0xf9, 0x91,
+ 0x00, 0x9f, 0x01, 0x97, 0x00, 0x9f, 0x09, 0x98, 0x00, 0x9f, 0x10, 0x83,
+ 0x00, 0x9f, 0x19, 0x87, 0x00, 0x9f, 0x21, 0x8b, 0x00, 0x9f, 0x29, 0x91,
+ 0x00, 0x9f, 0x31, 0x97, 0x00, 0x9f, 0x39, 0x98, 0x00, 0x9f, 0x40, 0xc7,
+ 0xc8, 0x00, 0x0f, 0x01, 0x49, 0xc9, 0xae, 0x6a, 0x0f, 0x01, 0x38, 0x14,
+ 0xc3, 0x0b, 0x41, 0xc3, 0x00, 0x15, 0x0f, 0x00, 0x28, 0x83, 0x0f, 0x00,
+ 0xf1, 0xc2, 0x01, 0x6f, 0x0f, 0x00, 0xd8, 0xc9, 0xaf, 0x93, 0x0e, 0x92,
+ 0x21, 0x16, 0x43, 0x0b, 0x49, 0x47, 0x02, 0x0e, 0xc3, 0x0b, 0x55, 0x46,
+ 0x09, 0x97, 0x43, 0x0b, 0x71, 0x02, 0xc3, 0x0b, 0x8b, 0x00, 0x43, 0x0b,
+ 0x97, 0xc6, 0x05, 0x01, 0x00, 0x18, 0xb8, 0x45, 0x01, 0xa2, 0xc3, 0x0b,
+ 0xa6, 0x42, 0x00, 0xd0, 0xc3, 0x0b, 0xb0, 0x4c, 0x1a, 0x50, 0xc3, 0x0b,
+ 0xbc, 0xca, 0x9a, 0x3d, 0x00, 0x18, 0xc8, 0xe0, 0x0b, 0x67, 0x01, 0x07,
+ 0x60, 0x44, 0x01, 0xa3, 0xc3, 0x0b, 0xc8, 0x45, 0x01, 0xb4, 0x43, 0x0b,
+ 0xd2, 0xc5, 0x00, 0xd4, 0x01, 0x07, 0x09, 0xc5, 0x05, 0x02, 0x00, 0x1a,
+ 0x68, 0xcb, 0x95, 0xe5, 0x01, 0x06, 0x81, 0x48, 0xbc, 0x3a, 0x43, 0x0b,
+ 0xde, 0xca, 0x9e, 0x6e, 0x00, 0xd6, 0x19, 0xca, 0x09, 0xf2, 0x00, 0xd6,
+ 0x08, 0xcd, 0x42, 0x35, 0x00, 0x19, 0xb1, 0xce, 0x2c, 0x62, 0x00, 0x19,
+ 0xc0, 0x46, 0x00, 0x8b, 0x43, 0x0b, 0xea, 0x46, 0x00, 0x8b, 0x43, 0x0b,
+ 0xf6, 0xcf, 0x6b, 0x16, 0x00, 0xef, 0x91, 0xc4, 0xde, 0x3f, 0x00, 0xef,
+ 0x39, 0x98, 0x00, 0xee, 0xb1, 0x91, 0x00, 0xee, 0xa9, 0x87, 0x00, 0xee,
+ 0xa0, 0xc6, 0x05, 0x01, 0x00, 0xd5, 0xf8, 0xc2, 0x01, 0xe2, 0x00, 0xef,
+ 0x79, 0xc2, 0x00, 0x8e, 0x00, 0xee, 0xc8, 0xd9, 0x1e, 0xb4, 0x00, 0xef,
+ 0x61, 0xc5, 0xb8, 0xe3, 0x00, 0xef, 0x28, 0xd5, 0x35, 0x21, 0x00, 0xee,
+ 0x98, 0xc4, 0x74, 0x82, 0x00, 0xef, 0x50, 0xc3, 0x04, 0x87, 0x00, 0xef,
+ 0x48, 0x00, 0x43, 0x0c, 0x02, 0x47, 0x67, 0x21, 0x43, 0x0c, 0x0e, 0xc8,
+ 0xbd, 0xf2, 0x00, 0xee, 0xb8, 0xcd, 0x76, 0x83, 0x00, 0xd6, 0x00, 0xc6,
+ 0x05, 0x01, 0x07, 0xf1, 0x38, 0xc6, 0x05, 0x01, 0x07, 0xf1, 0x40, 0x49,
+ 0x07, 0xbb, 0xc3, 0x0c, 0x1e, 0xce, 0x1d, 0x93, 0x00, 0x1b, 0x0b, 0x03,
+ 0x0c, 0x2a, 0xd0, 0x30, 0x6a, 0x00, 0xee, 0x69, 0x12, 0xc3, 0x0c, 0x30,
+ 0x11, 0xc3, 0x0c, 0x3c, 0xcc, 0x83, 0x0d, 0x00, 0x18, 0x59, 0xcc, 0x1f,
+ 0x0c, 0x00, 0x18, 0x79, 0xc8, 0x80, 0x2e, 0x00, 0x19, 0x99, 0x42, 0x00,
+ 0x2c, 0xc3, 0x0c, 0x48, 0xc5, 0x1d, 0x88, 0x00, 0x1a, 0xeb, 0x03, 0x0c,
+ 0x54, 0xc6, 0x60, 0xb1, 0x00, 0x1a, 0xf3, 0x03, 0x0c, 0x5a, 0xc5, 0x1e,
+ 0xc8, 0x00, 0x1b, 0x02, 0x03, 0x0c, 0x60, 0xc5, 0x05, 0x02, 0x00, 0x19,
+ 0x1b, 0x03, 0x0c, 0x64, 0xc5, 0x00, 0xd4, 0x00, 0x18, 0x3a, 0x03, 0x0c,
+ 0x6a, 0xcc, 0x83, 0x0d, 0x00, 0xee, 0x09, 0xcc, 0x1f, 0x0c, 0x00, 0xee,
+ 0x00, 0xc2, 0x07, 0xb2, 0x05, 0x47, 0x81, 0xc2, 0x00, 0x39, 0x05, 0x47,
+ 0x79, 0xc2, 0x00, 0xb0, 0x05, 0x47, 0x71, 0xc2, 0x00, 0xc1, 0x05, 0x47,
+ 0x69, 0xc2, 0x02, 0x2b, 0x05, 0x47, 0x61, 0x97, 0x05, 0x47, 0x59, 0x83,
+ 0x05, 0x47, 0x50, 0xc4, 0x26, 0x78, 0x05, 0x47, 0x49, 0xc5, 0x06, 0xdb,
+ 0x05, 0x47, 0x41, 0x15, 0xc3, 0x0c, 0x6e, 0x08, 0xc3, 0x0c, 0x7a, 0x16,
+ 0xc3, 0x0c, 0x86, 0xc3, 0x05, 0x14, 0x05, 0x47, 0x09, 0xc4, 0x15, 0xe7,
+ 0x05, 0x47, 0x00, 0xcc, 0x8a, 0x8d, 0x00, 0x24, 0x21, 0xc5, 0xc7, 0x29,
+ 0x05, 0x33, 0xe8, 0x00, 0x43, 0x0c, 0x92, 0x88, 0x05, 0x34, 0xf1, 0x8e,
+ 0x01, 0x6f, 0x39, 0x8f, 0x01, 0x6f, 0x41, 0x90, 0x01, 0x6f, 0x49, 0x94,
+ 0x01, 0x6f, 0x61, 0x95, 0x01, 0x6f, 0x6a, 0x03, 0x0c, 0x9e, 0x48, 0xb7,
+ 0x92, 0xc3, 0x0c, 0xa4, 0x87, 0x01, 0x6f, 0xb8, 0xcb, 0x91, 0xba, 0x05,
+ 0x33, 0xa0, 0x8b, 0x05, 0x33, 0xb1, 0xc3, 0x21, 0x51, 0x05, 0x33, 0xc9,
+ 0xc2, 0x00, 0xfe, 0x01, 0x6f, 0xd1, 0x97, 0x01, 0x6f, 0xd8, 0xc7, 0x8a,
+ 0x86, 0x05, 0x33, 0xc0, 0xc8, 0x84, 0xc1, 0x05, 0x33, 0xd1, 0x0a, 0x43,
+ 0x0c, 0xb2, 0xc4, 0x6d, 0xb5, 0x05, 0x33, 0xd8, 0x87, 0x01, 0x6f, 0x01,
+ 0xc6, 0xc8, 0x01, 0x01, 0x6f, 0xf0, 0x87, 0x01, 0x6f, 0x19, 0xc4, 0xdd,
+ 0xb2, 0x01, 0x6f, 0xc0, 0xc2, 0x01, 0x7f, 0x01, 0x6f, 0x21, 0x87, 0x01,
+ 0x6f, 0x28, 0x87, 0x01, 0x6f, 0x71, 0xc2, 0x00, 0x40, 0x01, 0x6f, 0x80,
+ 0xc6, 0x00, 0x2b, 0x00, 0x18, 0xa8, 0xc5, 0x00, 0xd4, 0x00, 0xd6, 0x39,
+ 0xc5, 0x05, 0x02, 0x00, 0x19, 0x58, 0xc3, 0x0f, 0xbe, 0x00, 0x18, 0x8b,
+ 0x03, 0x0c, 0xbc, 0xca, 0x32, 0xf5, 0x00, 0x19, 0xd0, 0xc6, 0x05, 0x01,
+ 0x07, 0xf1, 0x58, 0xc6, 0x05, 0x01, 0x07, 0xf1, 0x60, 0xc2, 0x06, 0xdb,
+ 0x00, 0x1f, 0x39, 0x8b, 0x01, 0x65, 0x68, 0xc3, 0x07, 0x4a, 0x00, 0x1f,
+ 0x69, 0xc2, 0x06, 0xdb, 0x00, 0x1f, 0x18, 0xc4, 0x06, 0x5a, 0x01, 0x65,
+ 0x99, 0xc4, 0xca, 0x0b, 0x01, 0x65, 0xc9, 0xc2, 0x00, 0xec, 0x01, 0x65,
+ 0xd9, 0xc4, 0x01, 0x68, 0x01, 0x66, 0x58, 0x47, 0xc1, 0x7e, 0xc3, 0x0c,
+ 0xc2, 0x47, 0x96, 0x0a, 0x43, 0x0c, 0xea, 0xc3, 0xd1, 0x8c, 0x01, 0x65,
+ 0xb9, 0xc2, 0x00, 0xec, 0x01, 0x65, 0xe9, 0xc4, 0x9b, 0xae, 0x01, 0x67,
+ 0x61, 0xc6, 0xd0, 0x3d, 0x01, 0x67, 0x70, 0xc3, 0x07, 0x4a, 0x00, 0x1f,
+ 0x61, 0xc2, 0x06, 0xdb, 0x00, 0x1f, 0x10, 0xc4, 0x06, 0x5a, 0x01, 0x65,
+ 0x91, 0xc4, 0xca, 0x0b, 0x01, 0x65, 0xc1, 0xc2, 0x00, 0xec, 0x01, 0x65,
+ 0xd1, 0xc4, 0x01, 0x68, 0x01, 0x66, 0x50, 0x8b, 0x01, 0x65, 0x61, 0xc2,
+ 0x06, 0xdb, 0x00, 0x1f, 0x30, 0x47, 0xc1, 0x7e, 0xc3, 0x0c, 0xfa, 0x47,
+ 0x96, 0x0a, 0x43, 0x0d, 0x22, 0xc3, 0xd1, 0x8c, 0x01, 0x65, 0xb1, 0xc2,
+ 0x00, 0xec, 0x01, 0x65, 0xe1, 0xc4, 0x9b, 0xae, 0x01, 0x67, 0x59, 0xc6,
+ 0xd0, 0x3d, 0x01, 0x67, 0x68, 0xc4, 0x18, 0x12, 0x08, 0x17, 0x59, 0xc9,
+ 0x18, 0x05, 0x08, 0x17, 0xa0, 0xc4, 0x0d, 0x0e, 0x08, 0x17, 0x61, 0xcb,
+ 0x13, 0xfa, 0x08, 0x17, 0xa8, 0xc3, 0x0d, 0x0f, 0x08, 0x17, 0x69, 0xca,
+ 0x9c, 0x5c, 0x08, 0x17, 0xb0, 0xc3, 0x45, 0x6b, 0x08, 0x17, 0x71, 0xca,
+ 0x37, 0x63, 0x08, 0x17, 0xb8, 0xc2, 0x0d, 0x10, 0x08, 0x17, 0x79, 0xc8,
+ 0x0d, 0x03, 0x08, 0x17, 0xc0, 0xc8, 0x0d, 0x03, 0x08, 0x17, 0xc9, 0xc2,
+ 0x0d, 0x10, 0x08, 0x17, 0x80, 0xd9, 0x20, 0x76, 0x0f, 0xa8, 0x10, 0xc7,
+ 0xc1, 0xd9, 0x0f, 0xab, 0x39, 0xc7, 0xc7, 0x6d, 0x0f, 0xaa, 0xd8, 0xc7,
+ 0xc1, 0xd9, 0x0f, 0xaa, 0xe9, 0xc7, 0xc7, 0x6d, 0x0f, 0xaa, 0x88, 0xc6,
+ 0xd0, 0xfd, 0x0f, 0xc8, 0x13, 0x03, 0x0d, 0x32, 0xc6, 0xcb, 0xf3, 0x0f,
+ 0xaa, 0x00, 0xc5, 0x8e, 0xdf, 0x01, 0x93, 0x03, 0x03, 0x0d, 0x38, 0xc6,
+ 0xbb, 0xec, 0x01, 0x93, 0x52, 0x03, 0x0d, 0x3e, 0xc2, 0x00, 0xd3, 0x01,
+ 0x93, 0x78, 0xc5, 0xc0, 0x7d, 0x01, 0x93, 0x13, 0x03, 0x0d, 0x44, 0xc6,
+ 0xc1, 0x86, 0x01, 0x93, 0x5a, 0x03, 0x0d, 0x4a, 0xc2, 0x00, 0xd3, 0x01,
+ 0x93, 0x88, 0xc2, 0x00, 0xd3, 0x01, 0x93, 0x90, 0xc4, 0x79, 0xf3, 0x01,
+ 0x93, 0x2b, 0x03, 0x0d, 0x50, 0xc6, 0xba, 0x7c, 0x01, 0x93, 0x62, 0x03,
+ 0x0d, 0x56, 0xc2, 0x00, 0xd3, 0x01, 0x93, 0xa0, 0x00, 0x43, 0x0d, 0x5c,
+ 0xc4, 0xc6, 0x7a, 0x01, 0x93, 0x43, 0x03, 0x0d, 0x64, 0xc6, 0xc6, 0x79,
+ 0x01, 0x93, 0x4a, 0x03, 0x0d, 0x6a, 0xc2, 0x00, 0xd3, 0x01, 0x93, 0xd8,
+ 0xc4, 0x15, 0xe7, 0x01, 0x27, 0x51, 0xc4, 0x26, 0x78, 0x01, 0x23, 0x41,
+ 0xc5, 0x06, 0xdb, 0x01, 0x23, 0x39, 0x15, 0xc3, 0x0d, 0x70, 0x08, 0xc3,
+ 0x0d, 0x7c, 0x16, 0xc3, 0x0d, 0x88, 0xc3, 0x05, 0x14, 0x01, 0x23, 0x00,
+ 0xc4, 0x03, 0x03, 0x01, 0x14, 0xc1, 0xc3, 0x00, 0xbb, 0x01, 0x51, 0xc0,
+ 0xe0, 0x02, 0xe7, 0x0f, 0x88, 0x78, 0x9c, 0x01, 0x27, 0x49, 0x9b, 0x01,
+ 0x27, 0x41, 0x9a, 0x01, 0x27, 0x39, 0x99, 0x01, 0x27, 0x31, 0x98, 0x01,
+ 0x27, 0x29, 0x97, 0x01, 0x27, 0x21, 0x96, 0x01, 0x27, 0x19, 0x95, 0x01,
+ 0x27, 0x11, 0x94, 0x01, 0x27, 0x09, 0x93, 0x01, 0x27, 0x01, 0x92, 0x01,
+ 0x26, 0xf9, 0x91, 0x01, 0x26, 0xf1, 0x90, 0x01, 0x26, 0xe9, 0x8f, 0x01,
+ 0x26, 0xe1, 0x8e, 0x01, 0x26, 0xd9, 0x8d, 0x01, 0x26, 0xd1, 0x8c, 0x01,
+ 0x26, 0xc9, 0x8b, 0x01, 0x26, 0xc1, 0x8a, 0x01, 0x26, 0xb9, 0x89, 0x01,
+ 0x26, 0xb1, 0x88, 0x01, 0x26, 0xa9, 0x87, 0x01, 0x26, 0xa1, 0x86, 0x01,
+ 0x26, 0x99, 0x85, 0x01, 0x26, 0x91, 0x84, 0x01, 0x26, 0x89, 0x83, 0x01,
+ 0x26, 0x80, 0x9c, 0x01, 0x26, 0x79, 0x9b, 0x01, 0x26, 0x71, 0x9a, 0x01,
+ 0x26, 0x69, 0x99, 0x01, 0x26, 0x61, 0x98, 0x01, 0x26, 0x59, 0x97, 0x01,
+ 0x26, 0x51, 0x96, 0x01, 0x26, 0x49, 0x95, 0x01, 0x26, 0x41, 0x94, 0x01,
+ 0x26, 0x39, 0x93, 0x01, 0x26, 0x31, 0x92, 0x01, 0x26, 0x29, 0x91, 0x01,
+ 0x26, 0x21, 0x90, 0x01, 0x26, 0x19, 0x8f, 0x01, 0x26, 0x11, 0x8e, 0x01,
+ 0x26, 0x09, 0x8d, 0x01, 0x26, 0x01, 0x8c, 0x01, 0x25, 0xf9, 0x8b, 0x01,
+ 0x25, 0xf1, 0x8a, 0x01, 0x25, 0xe9, 0x89, 0x01, 0x25, 0xe1, 0x88, 0x01,
+ 0x25, 0xd9, 0x87, 0x01, 0x25, 0xd1, 0x86, 0x01, 0x25, 0xc9, 0x85, 0x01,
+ 0x25, 0xc1, 0x84, 0x01, 0x25, 0xb9, 0x83, 0x01, 0x25, 0xb0, 0xc3, 0x18,
+ 0x13, 0x01, 0x23, 0x9b, 0x03, 0x0d, 0x94, 0xc3, 0x22, 0x45, 0x01, 0x23,
+ 0x58, 0xc3, 0x03, 0x26, 0x01, 0x23, 0x61, 0x9b, 0x01, 0x92, 0xd2, 0x03,
+ 0x0d, 0x98, 0xd0, 0x55, 0xa8, 0x01, 0x92, 0x40, 0xc3, 0x03, 0x26, 0x01,
+ 0x23, 0x89, 0xd1, 0x55, 0xa7, 0x01, 0x92, 0x78, 0xc3, 0x03, 0x26, 0x01,
+ 0x23, 0x81, 0xd1, 0x55, 0xa7, 0x01, 0x92, 0x70, 0xc3, 0x03, 0x26, 0x01,
+ 0x23, 0x79, 0xd1, 0x55, 0xa7, 0x01, 0x92, 0x68, 0xc3, 0x03, 0x26, 0x01,
+ 0x23, 0x71, 0x9b, 0x01, 0x95, 0xfa, 0x03, 0x0d, 0x9c, 0xc6, 0x34, 0x38,
+ 0x01, 0x23, 0x69, 0xc3, 0x0d, 0x0f, 0x01, 0x95, 0xaa, 0x03, 0x0d, 0xa2,
+ 0xc5, 0xdc, 0x13, 0x0f, 0x92, 0x89, 0xc8, 0xb9, 0xfa, 0x0f, 0x92, 0x81,
+ 0xc8, 0xb6, 0xda, 0x01, 0x94, 0xf9, 0xc7, 0xba, 0x63, 0x01, 0x95, 0x78,
+ 0xcb, 0x90, 0x2e, 0x01, 0x92, 0x29, 0xc3, 0x81, 0x06, 0x01, 0x92, 0x38,
+ 0xc5, 0xdc, 0xef, 0x01, 0x92, 0x31, 0xc2, 0x22, 0xcc, 0x01, 0x94, 0x29,
+ 0x07, 0xc3, 0x0d, 0xa6, 0x17, 0xc3, 0x0d, 0xb2, 0x16, 0xc3, 0x0d, 0xc2,
+ 0xc6, 0xcc, 0xbf, 0x01, 0x94, 0x99, 0xc6, 0xca, 0xe5, 0x01, 0x94, 0xa8,
+ 0xc2, 0x02, 0xa0, 0x01, 0x94, 0x09, 0xc4, 0x02, 0xde, 0x01, 0x94, 0x11,
+ 0xc2, 0x00, 0xc4, 0x01, 0x94, 0x48, 0xc3, 0x09, 0x9e, 0x01, 0x94, 0x19,
+ 0x0b, 0xc3, 0x0d, 0xce, 0xc5, 0x1b, 0xbd, 0x01, 0x94, 0xd8, 0xc4, 0x00,
+ 0x2d, 0x01, 0x94, 0x39, 0xc4, 0x61, 0xc1, 0x01, 0x94, 0x79, 0xc8, 0xbc,
+ 0xca, 0x01, 0x94, 0xe9, 0xc9, 0xaf, 0x8a, 0x01, 0x95, 0x68, 0x0b, 0xc3,
+ 0x0d, 0xe0, 0xc3, 0x00, 0xc2, 0x01, 0x94, 0xa0, 0xc3, 0x01, 0x54, 0x01,
+ 0x94, 0x51, 0x07, 0xc3, 0x0d, 0xec, 0xc3, 0x04, 0x85, 0x01, 0x94, 0xd0,
+ 0xc4, 0x03, 0xd7, 0x01, 0x94, 0x61, 0xc3, 0x29, 0x82, 0x01, 0x94, 0x68,
+ 0xc3, 0x04, 0xad, 0x01, 0x94, 0x91, 0xc3, 0x00, 0x2d, 0x01, 0x95, 0x20,
+ 0x11, 0xc3, 0x0d, 0xf8, 0xc5, 0x04, 0xe2, 0x01, 0x95, 0x28, 0xc4, 0xdd,
+ 0x72, 0x01, 0x94, 0xc1, 0xc2, 0x00, 0x27, 0x01, 0x95, 0x31, 0xc3, 0x00,
+ 0x4a, 0x01, 0x95, 0x38, 0x07, 0xc3, 0x0e, 0x0a, 0xc4, 0x00, 0x2d, 0x01,
+ 0x95, 0x40, 0x83, 0x01, 0x96, 0xa9, 0x8b, 0x01, 0x96, 0xb1, 0x97, 0x01,
+ 0x96, 0xb9, 0x87, 0x01, 0x96, 0xc1, 0x91, 0x01, 0x96, 0xc8, 0x83, 0x01,
+ 0x96, 0xd1, 0x8b, 0x01, 0x96, 0xd9, 0x97, 0x01, 0x96, 0xe1, 0x87, 0x01,
+ 0x96, 0xe9, 0x91, 0x01, 0x96, 0xf0, 0x83, 0x01, 0x96, 0xf9, 0x8b, 0x01,
+ 0x97, 0x01, 0x97, 0x01, 0x97, 0x09, 0x87, 0x01, 0x97, 0x11, 0x91, 0x01,
+ 0x97, 0x18, 0x83, 0x01, 0x97, 0x21, 0x8b, 0x01, 0x97, 0x29, 0x97, 0x01,
+ 0x97, 0x31, 0x87, 0x01, 0x97, 0x39, 0x91, 0x01, 0x97, 0x40, 0x83, 0x01,
+ 0x97, 0x49, 0x8b, 0x01, 0x97, 0x51, 0x97, 0x01, 0x97, 0x59, 0x87, 0x01,
+ 0x97, 0x61, 0x91, 0x01, 0x97, 0x68, 0x83, 0x01, 0x97, 0x71, 0x8b, 0x01,
+ 0x97, 0x79, 0x97, 0x01, 0x97, 0x81, 0x87, 0x01, 0x97, 0x89, 0x91, 0x01,
+ 0x97, 0x90, 0x83, 0x01, 0x97, 0x99, 0x97, 0x01, 0x97, 0xa1, 0x91, 0x01,
+ 0x97, 0xa8, 0x83, 0x01, 0x97, 0xb1, 0x8b, 0x01, 0x97, 0xb9, 0x97, 0x01,
+ 0x97, 0xc1, 0x87, 0x01, 0x97, 0xc9, 0x91, 0x01, 0x97, 0xd0, 0x83, 0x01,
+ 0x97, 0xd9, 0x8b, 0x01, 0x97, 0xe1, 0x87, 0x01, 0x97, 0xe9, 0x91, 0x01,
+ 0x97, 0xf0, 0xcf, 0x64, 0xc2, 0x09, 0x2a, 0x19, 0x83, 0x09, 0x1b, 0x60,
+ 0x0e, 0xc3, 0x0e, 0x14, 0x06, 0xc3, 0x0e, 0x1e, 0x17, 0xc3, 0x0e, 0x2a,
+ 0xc2, 0x00, 0x16, 0x09, 0x1a, 0x59, 0x15, 0xc3, 0x0e, 0x3a, 0xc2, 0x00,
+ 0xb0, 0x09, 0x1a, 0x41, 0xc3, 0x0f, 0xd6, 0x09, 0x1a, 0x39, 0xc2, 0x06,
+ 0x52, 0x09, 0x1a, 0x29, 0x0b, 0xc3, 0x0e, 0x46, 0xc2, 0x00, 0xd0, 0x09,
+ 0x1a, 0x09, 0x09, 0xc3, 0x0e, 0x56, 0xc3, 0x01, 0x5d, 0x09, 0x19, 0xd1,
+ 0x83, 0x09, 0x19, 0xc2, 0x03, 0x0e, 0x61, 0xc8, 0x03, 0x4c, 0x09, 0x1a,
+ 0x80, 0x46, 0x03, 0x4d, 0xc3, 0x0e, 0x67, 0xc8, 0x1d, 0x6f, 0x09, 0x29,
+ 0xe0, 0xc8, 0x4e, 0xea, 0x09, 0x18, 0xf8, 0xc2, 0x00, 0xb0, 0x09, 0x19,
+ 0x29, 0xc6, 0x45, 0xad, 0x09, 0x19, 0x20, 0x94, 0x09, 0x1a, 0xa0, 0xca,
+ 0x8d, 0x2d, 0x09, 0x18, 0xd8, 0xcf, 0x65, 0xd0, 0x09, 0x18, 0xbb, 0x03,
+ 0x0e, 0x7b, 0xc2, 0x02, 0x2f, 0x09, 0x18, 0xb1, 0xc3, 0x62, 0x19, 0x09,
+ 0x18, 0xa8, 0xca, 0x64, 0xc2, 0x09, 0x29, 0xd9, 0xc9, 0x5d, 0x99, 0x09,
+ 0x29, 0xd0, 0xc2, 0x04, 0x3d, 0x09, 0x17, 0xc9, 0xc4, 0x0b, 0x46, 0x09,
+ 0x17, 0xc1, 0x42, 0x01, 0xe2, 0xc3, 0x0e, 0x81, 0xc3, 0x6c, 0x49, 0x09,
+ 0x17, 0xa9, 0xc2, 0x01, 0x2d, 0x09, 0x17, 0xa0, 0xc7, 0x0b, 0x09, 0x09,
+ 0x17, 0x91, 0x42, 0x00, 0x9a, 0x43, 0x0e, 0x89, 0xc2, 0x02, 0x2f, 0x09,
+ 0x17, 0x71, 0xc2, 0x00, 0x0a, 0x09, 0x17, 0x68, 0xc8, 0xb6, 0xe2, 0x09,
+ 0x18, 0x1b, 0x03, 0x0e, 0x8f, 0xca, 0x38, 0xae, 0x09, 0x18, 0x10, 0xcf,
+ 0x69, 0x90, 0x09, 0x16, 0xf8, 0x46, 0x25, 0xd4, 0x43, 0x0e, 0x95, 0x45,
+ 0x25, 0xd5, 0xc3, 0x0e, 0xa1, 0xc8, 0xb6, 0xea, 0x09, 0x29, 0x93, 0x03,
+ 0x0e, 0xb3, 0xc2, 0x06, 0x47, 0x09, 0x15, 0xd8, 0xc3, 0x0d, 0xff, 0x09,
+ 0x16, 0x11, 0x9f, 0x09, 0x16, 0x08, 0xc5, 0x58, 0xf4, 0x09, 0x29, 0x88,
+ 0x47, 0x03, 0x4c, 0x43, 0x0e, 0xb7, 0x00, 0x43, 0x0e, 0xe0, 0x47, 0x03,
+ 0x4c, 0x43, 0x0e, 0xec, 0x47, 0x03, 0x4c, 0x43, 0x0f, 0x21, 0x46, 0x03,
+ 0x4d, 0xc3, 0x0f, 0x2b, 0xc4, 0x39, 0xc8, 0x09, 0x15, 0x43, 0x03, 0x0f,
+ 0x6e, 0xc8, 0xb6, 0xf2, 0x09, 0x15, 0x39, 0xc7, 0xb7, 0xa3, 0x09, 0x14,
+ 0xa0, 0x47, 0x03, 0x4c, 0x43, 0x0f, 0x74, 0xd0, 0x5f, 0xa2, 0x09, 0x12,
+ 0x89, 0xc7, 0x5d, 0x9b, 0x09, 0x12, 0x80, 0xd6, 0x2a, 0xf6, 0x09, 0x1c,
+ 0x99, 0xd6, 0x2b, 0x7e, 0x09, 0x16, 0xa9, 0xc4, 0x58, 0xf5, 0x09, 0x16,
+ 0xa0, 0x00, 0x43, 0x0f, 0xb8, 0xcc, 0x81, 0xf9, 0x09, 0x13, 0x5b, 0x03,
+ 0x0f, 0xc7, 0xc8, 0x20, 0x13, 0x09, 0x13, 0x51, 0xc4, 0x58, 0xf5, 0x09,
+ 0x13, 0x49, 0x4c, 0x20, 0x1c, 0x43, 0x0f, 0xcd, 0xcd, 0x76, 0x0e, 0x09,
+ 0x12, 0x19, 0xce, 0x75, 0x2e, 0x09, 0x12, 0x11, 0xc8, 0x1d, 0x6f, 0x09,
+ 0x12, 0x08, 0xc2, 0x04, 0x3d, 0x09, 0x12, 0x51, 0x83, 0x09, 0x12, 0x48,
+ 0xc9, 0xaf, 0x66, 0x09, 0x11, 0xb3, 0x03, 0x0f, 0xe8, 0xcd, 0x7a, 0x2b,
+ 0x09, 0x11, 0xc1, 0x46, 0x03, 0x4d, 0x43, 0x0f, 0xee, 0x00, 0x43, 0x0f,
+ 0xfe, 0x16, 0xc3, 0x10, 0x0a, 0xce, 0x73, 0x98, 0x09, 0x28, 0xc9, 0x15,
+ 0xc3, 0x10, 0x16, 0xcc, 0x8a, 0x15, 0x09, 0x10, 0x99, 0xcc, 0x83, 0xc1,
+ 0x09, 0x10, 0x90, 0xcd, 0x1a, 0xf3, 0x09, 0x10, 0xf8, 0xc7, 0x6c, 0xd0,
+ 0x09, 0x10, 0xd1, 0x11, 0x43, 0x10, 0x25, 0xc2, 0xe6, 0x8b, 0x09, 0x28,
+ 0xc1, 0xc2, 0xae, 0x2b, 0x09, 0x28, 0xb8, 0xc2, 0xe6, 0x79, 0x09, 0x28,
+ 0x6b, 0x03, 0x10, 0x31, 0xc2, 0xe1, 0xa2, 0x09, 0x28, 0x61, 0xc2, 0xe6,
+ 0x87, 0x09, 0x28, 0x0b, 0x03, 0x10, 0x37, 0xc2, 0x71, 0x49, 0x09, 0x28,
+ 0x00, 0x26, 0xc3, 0x10, 0x3d, 0xc2, 0xe6, 0x7b, 0x09, 0x27, 0xd1, 0xc2,
+ 0xe4, 0xef, 0x09, 0x27, 0xc9, 0x22, 0xc3, 0x10, 0x4d, 0x21, 0x43, 0x10,
+ 0x55, 0xc2, 0xe6, 0xa7, 0x09, 0x27, 0x79, 0x25, 0xc3, 0x10, 0x60, 0x21,
+ 0x43, 0x10, 0x68, 0x23, 0xc3, 0x10, 0x74, 0xc2, 0xe6, 0x83, 0x09, 0x27,
+ 0x39, 0x1f, 0xc3, 0x10, 0x7c, 0x1e, 0x43, 0x10, 0x88, 0xc2, 0xe4, 0xf2,
+ 0x09, 0x27, 0x09, 0xc2, 0xe6, 0x4a, 0x09, 0x27, 0x00, 0xc2, 0xe6, 0xa9,
+ 0x09, 0x26, 0xf9, 0x25, 0xc3, 0x10, 0x90, 0xd4, 0x3c, 0xc8, 0x09, 0x26,
+ 0xe1, 0xc2, 0xe5, 0x48, 0x09, 0x26, 0xd9, 0x22, 0xc3, 0x10, 0x9a, 0xc2,
+ 0xe6, 0x4a, 0x09, 0x26, 0xc1, 0x1f, 0xc3, 0x10, 0xa2, 0xc2, 0xe6, 0x4f,
+ 0x09, 0x26, 0xa8, 0x00, 0x43, 0x10, 0xaa, 0x00, 0x43, 0x10, 0xb6, 0xc8,
+ 0x38, 0x76, 0x09, 0x0f, 0xb0, 0x94, 0x09, 0x26, 0x9b, 0x03, 0x10, 0xc8,
+ 0xc4, 0xdd, 0x2c, 0x09, 0x26, 0x91, 0xc2, 0x01, 0xe2, 0x09, 0x0c, 0x59,
+ 0xcc, 0x82, 0x11, 0x09, 0x0c, 0x51, 0x86, 0x09, 0x0c, 0x49, 0x9f, 0x09,
+ 0x0c, 0x40, 0x83, 0x09, 0x26, 0x8b, 0x03, 0x10, 0xcc, 0x8b, 0x09, 0x0b,
+ 0x82, 0x03, 0x10, 0xd0, 0x97, 0x09, 0x26, 0x81, 0x8b, 0x09, 0x0a, 0xf9,
+ 0x03, 0x43, 0x10, 0xd4, 0x97, 0x09, 0x1c, 0x31, 0xc2, 0x00, 0xb1, 0x09,
+ 0x0c, 0x30, 0x0a, 0xc3, 0x10, 0xe2, 0xc4, 0xdf, 0x77, 0x09, 0x0c, 0x29,
+ 0xc2, 0x00, 0x2d, 0x09, 0x0c, 0x21, 0x83, 0x09, 0x0b, 0xf2, 0x03, 0x10,
+ 0xf7, 0x83, 0x09, 0x1c, 0x21, 0x8b, 0x09, 0x0b, 0xe0, 0x97, 0x09, 0x0b,
+ 0x9b, 0x03, 0x10, 0xfb, 0x8b, 0x09, 0x0b, 0x90, 0x97, 0x09, 0x0b, 0x5b,
+ 0x03, 0x10, 0xff, 0x8b, 0x09, 0x0b, 0x3b, 0x03, 0x11, 0x09, 0x83, 0x09,
+ 0x0b, 0x12, 0x03, 0x11, 0x18, 0x42, 0x01, 0xe2, 0xc3, 0x11, 0x29, 0xc4,
+ 0x99, 0xe3, 0x09, 0x1b, 0xf1, 0x86, 0x09, 0x0a, 0xca, 0x03, 0x11, 0x31,
+ 0xc2, 0x05, 0x1d, 0x09, 0x0b, 0xd9, 0x87, 0x09, 0x0b, 0xd0, 0x8b, 0x09,
+ 0x0b, 0xc3, 0x03, 0x11, 0x37, 0x87, 0x09, 0x0b, 0xa2, 0x03, 0x11, 0x3d,
+ 0x8f, 0x09, 0x0b, 0x71, 0xc2, 0x04, 0x2b, 0x09, 0x0b, 0x68, 0xc3, 0x05,
+ 0x4e, 0x09, 0x0b, 0x09, 0xc4, 0x9e, 0x4c, 0x09, 0x0b, 0x00, 0x4c, 0x87,
+ 0x99, 0xc3, 0x11, 0x43, 0xe0, 0x03, 0x47, 0x09, 0x0c, 0xe8, 0xcc, 0x83,
+ 0xcd, 0x09, 0x0c, 0xc9, 0xc9, 0x8d, 0x2e, 0x09, 0x0c, 0xc0, 0xca, 0xa7,
+ 0x4c, 0x09, 0x0c, 0xa0, 0xcc, 0x8a, 0x21, 0x09, 0x0d, 0x48, 0x86, 0x09,
+ 0x0d, 0x18, 0xd2, 0x05, 0x54, 0x09, 0x26, 0x79, 0x9f, 0x09, 0x09, 0x78,
+ 0xc5, 0x39, 0xc7, 0x09, 0x26, 0x70, 0xc2, 0x04, 0x3d, 0x09, 0x09, 0xe9,
+ 0xc4, 0x81, 0x55, 0x09, 0x09, 0xe1, 0xc6, 0x45, 0xad, 0x09, 0x09, 0xd9,
+ 0xc3, 0x01, 0xce, 0x09, 0x09, 0xd1, 0xc2, 0x00, 0xd1, 0x09, 0x09, 0xc8,
+ 0xd4, 0x38, 0xa4, 0x09, 0x26, 0x69, 0xce, 0x6c, 0x44, 0x09, 0x09, 0x09,
+ 0x46, 0x03, 0x4d, 0x43, 0x11, 0x49, 0x46, 0x03, 0x4d, 0xc3, 0x11, 0x55,
+ 0xc4, 0x39, 0xc8, 0x09, 0x08, 0xe8, 0xc2, 0x01, 0xe2, 0x09, 0x09, 0x41,
+ 0x90, 0x09, 0x09, 0x38, 0x00, 0x43, 0x11, 0x70, 0x47, 0x03, 0x4c, 0x43,
+ 0x11, 0x7a, 0xc5, 0x39, 0xc7, 0x09, 0x08, 0x48, 0xcc, 0x83, 0xd9, 0x09,
+ 0x08, 0x31, 0xc8, 0xb6, 0xfa, 0x09, 0x08, 0x28, 0x97, 0x09, 0x08, 0x11,
+ 0x87, 0x09, 0x08, 0x08, 0x97, 0x09, 0x26, 0x51, 0xc3, 0x51, 0xdb, 0x09,
+ 0x07, 0xf8, 0xd6, 0x2a, 0xf6, 0x09, 0x26, 0x49, 0xcd, 0x7a, 0x11, 0x09,
+ 0x07, 0x78, 0x46, 0x03, 0x4d, 0xc3, 0x11, 0x98, 0xc8, 0xb6, 0x22, 0x09,
+ 0x07, 0x68, 0x00, 0x43, 0x11, 0xe1, 0x15, 0xc3, 0x11, 0xf3, 0xc3, 0x6c,
+ 0x49, 0x09, 0x1b, 0xb9, 0x17, 0xc3, 0x11, 0xfd, 0x0e, 0xc3, 0x12, 0x05,
+ 0x0d, 0xc3, 0x12, 0x14, 0xc8, 0x6a, 0x1e, 0x09, 0x05, 0x59, 0xc2, 0x00,
+ 0xd0, 0x09, 0x05, 0x4b, 0x03, 0x12, 0x23, 0xc9, 0x75, 0x04, 0x09, 0x05,
+ 0x3b, 0x03, 0x12, 0x29, 0xc3, 0x62, 0x19, 0x09, 0x05, 0x31, 0x83, 0x09,
+ 0x05, 0x12, 0x03, 0x12, 0x2f, 0xc2, 0x06, 0x62, 0x09, 0x25, 0xa1, 0xc2,
+ 0x00, 0x4e, 0x09, 0x25, 0x93, 0x03, 0x12, 0x3c, 0xc2, 0x00, 0xdb, 0x09,
+ 0x25, 0x83, 0x03, 0x12, 0x40, 0xc8, 0x6a, 0x1e, 0x09, 0x25, 0x79, 0xc2,
+ 0x00, 0x0a, 0x09, 0x25, 0x71, 0xc3, 0x02, 0x2c, 0x09, 0x25, 0x68, 0xc2,
+ 0x01, 0x7f, 0x09, 0x04, 0x91, 0xc2, 0x00, 0x65, 0x09, 0x04, 0x88, 0xc2,
+ 0x00, 0x4e, 0x09, 0x04, 0xd1, 0xc4, 0x5d, 0x99, 0x09, 0x04, 0xc2, 0x03,
+ 0x12, 0x44, 0x15, 0xc3, 0x12, 0x4a, 0xc2, 0x0b, 0x19, 0x09, 0x25, 0x31,
+ 0xc2, 0x00, 0xec, 0x09, 0x25, 0x29, 0x0f, 0xc3, 0x12, 0x56, 0x0e, 0xc3,
+ 0x12, 0x66, 0x0d, 0xc3, 0x12, 0x70, 0xc8, 0x6a, 0x1e, 0x09, 0x24, 0xc9,
+ 0x0a, 0xc3, 0x12, 0x7a, 0x09, 0xc3, 0x12, 0x82, 0xc5, 0x9e, 0x4b, 0x09,
+ 0x24, 0x91, 0x06, 0xc3, 0x12, 0x8d, 0x03, 0x43, 0x12, 0x99, 0xc3, 0x04,
+ 0x65, 0x09, 0x1b, 0xb1, 0xc4, 0x73, 0x32, 0x09, 0x03, 0xf8, 0xc5, 0x39,
+ 0xc7, 0x09, 0x04, 0x32, 0x03, 0x12, 0xa8, 0xc9, 0xaa, 0xdd, 0x09, 0x24,
+ 0x60, 0xc5, 0xdd, 0x2b, 0x09, 0x24, 0x59, 0xc3, 0x04, 0x2a, 0x09, 0x24,
+ 0x51, 0xc3, 0x04, 0x65, 0x09, 0x03, 0xa8, 0xc9, 0x51, 0xd5, 0x09, 0x24,
+ 0x49, 0x4d, 0x68, 0xcd, 0x43, 0x12, 0xae, 0xa1, 0x09, 0x03, 0x89, 0xa0,
+ 0x09, 0x03, 0x80, 0xc9, 0xaa, 0x20, 0x09, 0x24, 0x39, 0xc2, 0x05, 0x1d,
+ 0x09, 0x02, 0x79, 0xc2, 0x00, 0x03, 0x09, 0x02, 0x70, 0xc2, 0x02, 0x1c,
+ 0x09, 0x24, 0x31, 0xc2, 0x00, 0xec, 0x09, 0x24, 0x29, 0xc3, 0x58, 0xf1,
+ 0x09, 0x24, 0x20, 0x42, 0x01, 0xe2, 0xc3, 0x12, 0xef, 0xc3, 0x20, 0x18,
+ 0x09, 0x1b, 0x83, 0x03, 0x12, 0xfb, 0xcf, 0x65, 0xd0, 0x09, 0x00, 0xa1,
+ 0xc5, 0x03, 0x47, 0x09, 0x00, 0x91, 0x0b, 0xc3, 0x13, 0x01, 0xc2, 0x00,
+ 0xd0, 0x09, 0x00, 0x79, 0x42, 0x01, 0x30, 0xc3, 0x13, 0x0d, 0xc9, 0x75,
+ 0x04, 0x09, 0x00, 0x61, 0xc4, 0x05, 0x4d, 0x09, 0x00, 0x58, 0x83, 0x09,
+ 0x1b, 0x89, 0xc4, 0x38, 0xb4, 0x09, 0x00, 0xd9, 0xc4, 0x55, 0x25, 0x09,
+ 0x00, 0xd1, 0xca, 0xa7, 0xb0, 0x09, 0x00, 0xc9, 0xc9, 0x5d, 0x99, 0x09,
+ 0x00, 0xc1, 0xc5, 0xd8, 0xa8, 0x09, 0x00, 0xb8, 0x49, 0x0d, 0x2d, 0xc3,
+ 0x13, 0x17, 0xc9, 0xa1, 0x21, 0x09, 0x01, 0xd1, 0xc9, 0x83, 0xac, 0x09,
+ 0x01, 0xc8, 0xc7, 0x0b, 0x09, 0x09, 0x01, 0x89, 0xd5, 0x37, 0xeb, 0x09,
+ 0x01, 0x80, 0x8b, 0x09, 0x01, 0x31, 0xc3, 0xe1, 0x68, 0x09, 0x01, 0x28,
+ 0x00, 0x43, 0x13, 0x24, 0x97, 0x09, 0x14, 0x3b, 0x03, 0x13, 0x30, 0x8b,
+ 0x09, 0x14, 0x2b, 0x03, 0x13, 0x34, 0x87, 0x09, 0x14, 0x21, 0x04, 0xc3,
+ 0x13, 0x38, 0x83, 0x09, 0x14, 0x02, 0x03, 0x13, 0x40, 0xc4, 0x39, 0xc8,
+ 0x09, 0x0a, 0x51, 0x42, 0x00, 0x9a, 0xc3, 0x13, 0x44, 0xc2, 0x00, 0x2c,
+ 0x09, 0x0a, 0x41, 0xc3, 0xe3, 0x01, 0x09, 0x0a, 0x38, 0x84, 0x09, 0x22,
+ 0x19, 0x83, 0x09, 0x22, 0x10, 0x97, 0x09, 0x21, 0x89, 0x9f, 0x09, 0x21,
+ 0x38, 0xcd, 0x77, 0xe2, 0x09, 0x22, 0xa8, 0xcd, 0x77, 0xe2, 0x09, 0x22,
+ 0x98, 0x84, 0x09, 0x21, 0xf9, 0x83, 0x09, 0x21, 0xf0, 0xcd, 0x77, 0xe2,
+ 0x09, 0x21, 0xb8, 0xcd, 0x77, 0xe2, 0x09, 0x21, 0x78, 0xcd, 0x77, 0xe2,
+ 0x09, 0x21, 0x28, 0xcb, 0x97, 0xc9, 0x00, 0x27, 0x99, 0xc8, 0x20, 0xa9,
+ 0x00, 0x27, 0x88, 0xc9, 0x25, 0xfa, 0x00, 0x25, 0x69, 0xcb, 0x99, 0xc3,
+ 0x05, 0x34, 0x58, 0xc9, 0x25, 0xfa, 0x00, 0x29, 0x79, 0xcb, 0x99, 0xc3,
+ 0x00, 0x29, 0x09, 0xc4, 0x01, 0x23, 0x00, 0x28, 0x99, 0xc4, 0x14, 0xa6,
+ 0x00, 0x26, 0x30, 0xc9, 0x6d, 0x45, 0x00, 0x29, 0x49, 0xcb, 0x99, 0xc3,
+ 0x00, 0x29, 0x19, 0xc4, 0x14, 0xa6, 0x00, 0x26, 0x51, 0xc4, 0x01, 0x23,
+ 0x00, 0x26, 0x41, 0xc9, 0x25, 0xfa, 0x00, 0x25, 0x18, 0xc2, 0x01, 0x7f,
+ 0x00, 0x29, 0x59, 0x87, 0x05, 0x34, 0x48, 0xc2, 0x01, 0xc8, 0x05, 0x32,
+ 0x18, 0xcf, 0x69, 0x54, 0x00, 0x29, 0x38, 0x8b, 0x00, 0x21, 0xcb, 0x03,
+ 0x13, 0x4a, 0x97, 0x00, 0x22, 0xf0, 0x8e, 0x05, 0x33, 0x29, 0x8f, 0x05,
+ 0x33, 0x38, 0xc9, 0x25, 0xfa, 0x00, 0x29, 0x29, 0xcb, 0x99, 0xc3, 0x00,
+ 0x25, 0x38, 0xcf, 0x69, 0x54, 0x00, 0x25, 0xf8, 0xc9, 0x20, 0xa8, 0x00,
+ 0x27, 0xc9, 0xc8, 0xbd, 0x7a, 0x05, 0x32, 0x88, 0xc3, 0xe6, 0x68, 0x00,
+ 0x28, 0x79, 0xc3, 0xc7, 0xce, 0x00, 0x28, 0x69, 0xc3, 0xd0, 0xbd, 0x00,
+ 0x28, 0x59, 0xc3, 0xe5, 0xde, 0x00, 0x28, 0x49, 0x06, 0xc3, 0x13, 0x50,
+ 0xc3, 0xe5, 0x3c, 0x00, 0x28, 0x28, 0xc4, 0x01, 0x23, 0x00, 0x26, 0x21,
+ 0xc6, 0x01, 0x73, 0x00, 0x24, 0xf9, 0xc9, 0x25, 0xfa, 0x00, 0x24, 0xd9,
+ 0xcf, 0x2c, 0x35, 0x00, 0x24, 0xe8, 0xc6, 0x01, 0x73, 0x00, 0x27, 0xf9,
+ 0xc4, 0x01, 0x23, 0x00, 0x27, 0xe9, 0xc9, 0x25, 0xfa, 0x00, 0x25, 0x98,
+ 0xc6, 0x01, 0x73, 0x00, 0x24, 0x9b, 0x03, 0x13, 0x60, 0xc9, 0x25, 0xfa,
+ 0x00, 0x27, 0xb9, 0xc6, 0x5e, 0xdc, 0x00, 0x24, 0x89, 0xcb, 0x99, 0xc3,
+ 0x00, 0x24, 0xa8, 0xcf, 0x6b, 0x16, 0x00, 0x27, 0x58, 0xc5, 0x1d, 0x88,
+ 0x00, 0x26, 0xb9, 0xc5, 0x1f, 0x0c, 0x00, 0x22, 0x80, 0x83, 0x05, 0x32,
+ 0x39, 0x46, 0x30, 0x28, 0x43, 0x13, 0x66, 0xc8, 0x20, 0xa9, 0x00, 0x26,
+ 0xf9, 0xc8, 0x25, 0xfb, 0x00, 0x24, 0xc8, 0x46, 0x00, 0x8b, 0x43, 0x13,
+ 0x86, 0xcf, 0x2c, 0x35, 0x00, 0x25, 0xc9, 0x06, 0x43, 0x13, 0x90, 0xc9,
+ 0x25, 0xfa, 0x00, 0x29, 0x71, 0xcb, 0x99, 0xc3, 0x00, 0x29, 0x01, 0xc4,
+ 0x01, 0x23, 0x00, 0x28, 0x91, 0xc4, 0x14, 0xa6, 0x00, 0x26, 0x28, 0xc9,
+ 0x6d, 0x45, 0x00, 0x29, 0x41, 0xcb, 0x99, 0xc3, 0x00, 0x29, 0x11, 0xc4,
+ 0x14, 0xa6, 0x00, 0x26, 0x49, 0xc4, 0x01, 0x23, 0x00, 0x26, 0x39, 0xc9,
+ 0x25, 0xfa, 0x00, 0x25, 0x10, 0xc2, 0x01, 0x7f, 0x00, 0x29, 0x51, 0x87,
+ 0x05, 0x34, 0x40, 0xc2, 0x01, 0xc8, 0x05, 0x32, 0x10, 0xcf, 0x69, 0x54,
+ 0x00, 0x29, 0x30, 0x8b, 0x00, 0x20, 0xcb, 0x03, 0x13, 0x9c, 0x97, 0x00,
+ 0x20, 0x70, 0x8e, 0x05, 0x33, 0x21, 0x8f, 0x05, 0x33, 0x30, 0xc9, 0x25,
+ 0xfa, 0x00, 0x29, 0x21, 0xcb, 0x99, 0xc3, 0x00, 0x25, 0x30, 0xcf, 0x69,
+ 0x54, 0x00, 0x25, 0xf0, 0xc9, 0x20, 0xa8, 0x00, 0x27, 0xc1, 0xc8, 0xbd,
+ 0x7a, 0x05, 0x32, 0x80, 0xc3, 0xe6, 0x68, 0x00, 0x28, 0x71, 0xc3, 0xc7,
+ 0xce, 0x00, 0x28, 0x61, 0xc3, 0xd0, 0xbd, 0x00, 0x28, 0x51, 0xc3, 0xe5,
+ 0xde, 0x00, 0x28, 0x41, 0x06, 0xc3, 0x13, 0xa2, 0xc3, 0xe5, 0x3c, 0x00,
+ 0x28, 0x20, 0xc4, 0x01, 0x23, 0x00, 0x26, 0x19, 0xc9, 0x25, 0xfa, 0x00,
+ 0x24, 0xd1, 0xcf, 0x2c, 0x35, 0x00, 0x24, 0xe1, 0xc6, 0x01, 0x73, 0x00,
+ 0x24, 0xf0, 0xc6, 0x01, 0x73, 0x00, 0x27, 0xf1, 0xc4, 0x01, 0x23, 0x00,
+ 0x27, 0xe1, 0xc9, 0x25, 0xfa, 0x00, 0x25, 0x90, 0xc6, 0x01, 0x73, 0x00,
+ 0x24, 0x93, 0x03, 0x13, 0xb2, 0xc9, 0x25, 0xfa, 0x00, 0x27, 0xb1, 0xc6,
+ 0x5e, 0xdc, 0x00, 0x24, 0x81, 0xcb, 0x99, 0xc3, 0x00, 0x24, 0xa0, 0x06,
+ 0xc3, 0x13, 0xb8, 0xcf, 0x2c, 0x35, 0x00, 0x25, 0xc0, 0xcb, 0x97, 0xc9,
+ 0x00, 0x27, 0x91, 0xc8, 0x20, 0xa9, 0x00, 0x27, 0x80, 0xcf, 0x6b, 0x16,
+ 0x00, 0x27, 0x50, 0xc5, 0x1d, 0x88, 0x00, 0x26, 0xb1, 0xc5, 0x1f, 0x0c,
+ 0x00, 0x20, 0x00, 0x83, 0x05, 0x32, 0x31, 0x46, 0x30, 0x28, 0x43, 0x13,
+ 0xc4, 0xc8, 0x20, 0xa9, 0x00, 0x26, 0xf1, 0xc8, 0x25, 0xfb, 0x00, 0x24,
+ 0xc0, 0x46, 0x00, 0x8b, 0x43, 0x13, 0xe4, 0xc9, 0x25, 0xfa, 0x00, 0x25,
+ 0x61, 0xcb, 0x99, 0xc3, 0x05, 0x34, 0x50, 0xc5, 0x69, 0xa7, 0x00, 0x6c,
+ 0x39, 0xc6, 0xd2, 0x3b, 0x00, 0x6c, 0x40, 0xc7, 0xc6, 0x32, 0x00, 0x6c,
+ 0xd1, 0xc7, 0xca, 0x29, 0x00, 0x6c, 0xe1, 0xc7, 0xc7, 0xdd, 0x00, 0x6d,
+ 0x01, 0xc7, 0xc7, 0xc1, 0x00, 0x6d, 0x11, 0x16, 0xc3, 0x13, 0xee, 0x06,
+ 0xc3, 0x13, 0xfa, 0xc7, 0xc8, 0x1c, 0x00, 0x6d, 0xa1, 0xc7, 0x8e, 0x9b,
+ 0x00, 0x6d, 0xb0, 0xc5, 0x69, 0xa7, 0x00, 0x6c, 0x69, 0xc6, 0xcc, 0xd1,
+ 0x00, 0x6c, 0x70, 0xc5, 0x69, 0xa7, 0x00, 0x6c, 0x79, 0xc6, 0xcc, 0xd1,
+ 0x00, 0x6c, 0x80, 0x4a, 0x9b, 0x62, 0xc3, 0x14, 0x06, 0xc5, 0x69, 0xa7,
+ 0x00, 0x6d, 0xc0, 0xc7, 0xc4, 0xdb, 0x00, 0x6d, 0x59, 0xc7, 0xc1, 0xa8,
+ 0x00, 0x6e, 0x11, 0xc7, 0xc2, 0x18, 0x00, 0x6e, 0x28, 0xc7, 0xc4, 0x25,
+ 0x00, 0x6d, 0x61, 0xc6, 0x8e, 0x9c, 0x00, 0x6d, 0x98, 0xd2, 0x4d, 0xc3,
+ 0x00, 0x6d, 0x29, 0xc5, 0x69, 0xa7, 0x00, 0x6e, 0x08, 0x45, 0xd7, 0x40,
+ 0x43, 0x14, 0x32, 0xa3, 0x0e, 0xd5, 0x79, 0xa2, 0x0e, 0xd5, 0x71, 0xa1,
+ 0x0e, 0xd5, 0x69, 0xa0, 0x0e, 0xd5, 0x61, 0x9f, 0x0e, 0xd5, 0x59, 0x9e,
+ 0x0e, 0xd5, 0x51, 0x9d, 0x0e, 0xd5, 0x48, 0xcb, 0x57, 0x45, 0x0e, 0xcf,
+ 0x0b, 0x03, 0x14, 0x44, 0xc6, 0x00, 0x58, 0x0e, 0xcf, 0x03, 0x03, 0x14,
+ 0x4a, 0xc6, 0x24, 0x3b, 0x0e, 0xce, 0xfa, 0x03, 0x14, 0x50, 0x48, 0x0c,
+ 0x8c, 0xc3, 0x14, 0x56, 0xc6, 0x00, 0x58, 0x0e, 0xcd, 0x1b, 0x03, 0x14,
+ 0x60, 0xc6, 0x24, 0x3b, 0x0e, 0xcd, 0x12, 0x03, 0x14, 0x66, 0xc9, 0x65,
+ 0x4f, 0x0e, 0xc8, 0xf9, 0x45, 0x03, 0x14, 0x43, 0x14, 0x6c, 0xc8, 0x3b,
+ 0xec, 0x0e, 0xc8, 0xe9, 0xc6, 0x24, 0x3b, 0x0e, 0xc8, 0xd8, 0xc8, 0x3b,
+ 0xec, 0x0e, 0xc8, 0xc9, 0xc6, 0x24, 0x3b, 0x0e, 0xc8, 0xb8, 0xc7, 0xc3,
+ 0x0d, 0x0e, 0xd4, 0x21, 0xc4, 0x00, 0x2d, 0x0e, 0xd4, 0x08, 0xa4, 0x0e,
+ 0xd3, 0xe9, 0xa3, 0x0e, 0xd3, 0xe1, 0xa2, 0x0e, 0xd3, 0xd9, 0xa1, 0x0e,
+ 0xd3, 0xd1, 0xa0, 0x0e, 0xd3, 0xc9, 0x9f, 0x0e, 0xd3, 0xc1, 0x9e, 0x0e,
+ 0xd3, 0xb8, 0xd0, 0x58, 0x82, 0x0e, 0xd2, 0xa9, 0xd0, 0x5a, 0xe2, 0x0e,
+ 0xd2, 0xa0, 0xcb, 0x93, 0xca, 0x0e, 0xd3, 0x99, 0xd0, 0x5b, 0x12, 0x0e,
+ 0xd3, 0x90, 0xcc, 0x35, 0xa8, 0x0e, 0xd3, 0x01, 0xcc, 0x5b, 0x22, 0x0e,
+ 0xd2, 0xf8, 0xd5, 0x35, 0x9f, 0x0e, 0xd2, 0xe1, 0xcc, 0x86, 0x31, 0x0e,
+ 0xd2, 0xd8, 0xc9, 0xb0, 0x50, 0x0e, 0xd3, 0x39, 0x43, 0x01, 0x55, 0xc3,
+ 0x14, 0x78, 0xc8, 0x51, 0x1b, 0x0e, 0xd3, 0x10, 0x4a, 0x18, 0xa5, 0xc3,
+ 0x14, 0x8a, 0x4b, 0x40, 0xb3, 0x43, 0x14, 0x9c, 0xc6, 0x2c, 0x2e, 0x0e,
+ 0xca, 0xa1, 0xc6, 0x00, 0x58, 0x0e, 0xca, 0x99, 0xc6, 0x24, 0x3b, 0x0e,
+ 0xca, 0x90, 0x4b, 0x40, 0xb3, 0xc3, 0x14, 0xae, 0x4a, 0x18, 0xa5, 0x43,
+ 0x14, 0xc0, 0x05, 0xc3, 0x14, 0xd2, 0xc8, 0x45, 0x27, 0x0e, 0xd1, 0x0a,
+ 0x03, 0x14, 0xde, 0xc6, 0x3b, 0x9c, 0x0e, 0xd1, 0x41, 0xc8, 0x45, 0x27,
+ 0x0e, 0xd1, 0x22, 0x03, 0x14, 0xe2, 0xc8, 0x3b, 0xec, 0x0e, 0xd0, 0xc1,
+ 0xc6, 0x24, 0x3b, 0x0e, 0xd0, 0xb8, 0xcd, 0x76, 0xd1, 0x0e, 0xd0, 0xe1,
+ 0xc5, 0x05, 0x74, 0x0e, 0xd0, 0xd0, 0xc6, 0x07, 0xa1, 0x0e, 0xd0, 0xd9,
+ 0xc4, 0x05, 0x75, 0x0e, 0xd0, 0xc8, 0xc3, 0x1d, 0xb1, 0x0e, 0xc8, 0x1b,
+ 0x03, 0x14, 0xe6, 0xc3, 0x00, 0xfd, 0x0e, 0xc2, 0xd2, 0x03, 0x14, 0xea,
+ 0x00, 0x43, 0x14, 0xee, 0xc4, 0x09, 0x39, 0x0e, 0xc3, 0xeb, 0x03, 0x15,
+ 0x0c, 0xc3, 0x01, 0x24, 0x0e, 0xc3, 0x5a, 0x03, 0x15, 0x10, 0x17, 0xc3,
+ 0x15, 0x14, 0xc3, 0xc9, 0xd8, 0x0e, 0xc3, 0x33, 0x03, 0x15, 0x24, 0xc5,
+ 0x02, 0xd2, 0x0e, 0xc3, 0xb2, 0x03, 0x15, 0x28, 0x00, 0x43, 0x15, 0x2c,
+ 0xc7, 0x05, 0x79, 0x0e, 0xd0, 0x31, 0x02, 0x43, 0x15, 0x50, 0x54, 0x3a,
+ 0x70, 0xc3, 0x15, 0x5c, 0xc6, 0xc1, 0xb7, 0x0e, 0xc9, 0x48, 0x59, 0x20,
+ 0x2b, 0xc3, 0x15, 0x68, 0x44, 0x1f, 0x0e, 0x43, 0x15, 0x74, 0x46, 0x17,
+ 0x14, 0xc3, 0x15, 0x84, 0x47, 0x01, 0xdb, 0xc3, 0x15, 0x90, 0x46, 0x03,
+ 0x13, 0x43, 0x15, 0x9c, 0xcf, 0x64, 0x4a, 0x0e, 0xcf, 0x11, 0x46, 0x2d,
+ 0x11, 0x43, 0x15, 0xa8, 0xc5, 0x03, 0x13, 0x0e, 0xce, 0xd9, 0x48, 0x20,
+ 0x37, 0x43, 0x15, 0xb4, 0xc5, 0x03, 0x13, 0x0e, 0xce, 0xd1, 0x48, 0x20,
+ 0x37, 0x43, 0x15, 0xc0, 0x45, 0x0e, 0xd5, 0xc3, 0x15, 0xcc, 0xc4, 0x6b,
+ 0x03, 0x0e, 0xcb, 0xb9, 0x46, 0x35, 0x01, 0xc3, 0x15, 0xed, 0xc4, 0x0d,
+ 0x21, 0x0e, 0xcb, 0x70, 0xc5, 0x17, 0x14, 0x0e, 0xcc, 0x01, 0xc6, 0x01,
+ 0xdb, 0x0e, 0xcb, 0xf9, 0xc5, 0x03, 0x13, 0x0e, 0xcb, 0xf0, 0xc5, 0x17,
+ 0x14, 0x0e, 0xcb, 0xe9, 0xc6, 0x01, 0xdb, 0x0e, 0xcb, 0xe1, 0xc5, 0x03,
+ 0x13, 0x0e, 0xcb, 0xd8, 0x43, 0x32, 0x37, 0xc3, 0x15, 0xff, 0xc3, 0x02,
+ 0x39, 0x0e, 0xcb, 0x98, 0x4c, 0x8b, 0xc5, 0xc3, 0x16, 0x11, 0xca, 0x91,
+ 0x42, 0x0e, 0xcb, 0x81, 0xd1, 0x51, 0x12, 0x0e, 0xcb, 0x78, 0xcb, 0x57,
+ 0x45, 0x0e, 0xcb, 0x63, 0x03, 0x16, 0x1d, 0xca, 0x91, 0x42, 0x0e, 0xcb,
+ 0x59, 0xc8, 0x45, 0x27, 0x0e, 0xcb, 0x50, 0x47, 0x3a, 0x70, 0xc3, 0x16,
+ 0x23, 0xc6, 0xc1, 0xb7, 0x0e, 0xc9, 0x40, 0x52, 0x47, 0xed, 0xc3, 0x16,
+ 0x2f, 0x44, 0x1f, 0x0e, 0x43, 0x16, 0x3b, 0x47, 0x01, 0xdb, 0xc3, 0x16,
+ 0x4d, 0x46, 0x03, 0x13, 0x43, 0x16, 0x59, 0x48, 0x20, 0x37, 0xc3, 0x16,
+ 0x65, 0xc5, 0x03, 0x13, 0x0e, 0xcc, 0xab, 0x03, 0x16, 0x71, 0xc5, 0x17,
+ 0x14, 0x0e, 0xcc, 0xb9, 0xc6, 0x01, 0xdb, 0x0e, 0xcc, 0xb0, 0x48, 0x20,
+ 0x37, 0xc3, 0x16, 0x77, 0xc5, 0x17, 0x14, 0x0e, 0xcc, 0xa1, 0xc6, 0x01,
+ 0xdb, 0x0e, 0xcc, 0x99, 0xc5, 0x03, 0x13, 0x0e, 0xcc, 0x90, 0x44, 0x0e,
+ 0xd5, 0xc3, 0x16, 0x83, 0x45, 0x6b, 0x03, 0xc3, 0x16, 0x8d, 0x46, 0x35,
+ 0x01, 0xc3, 0x16, 0x9f, 0xc4, 0x0d, 0x21, 0x0e, 0xc9, 0x98, 0xc6, 0x64,
+ 0x4a, 0x0e, 0xcd, 0x29, 0x46, 0x2d, 0x11, 0x43, 0x16, 0xb7, 0xc5, 0x17,
+ 0x14, 0x0e, 0xca, 0x51, 0xc6, 0x01, 0xdb, 0x0e, 0xca, 0x49, 0xc5, 0x03,
+ 0x13, 0x0e, 0xca, 0x40, 0xc5, 0x17, 0x14, 0x0e, 0xca, 0x39, 0xc6, 0x01,
+ 0xdb, 0x0e, 0xca, 0x31, 0xc5, 0x03, 0x13, 0x0e, 0xca, 0x28, 0x43, 0x32,
+ 0x37, 0xc3, 0x16, 0xc3, 0x44, 0x0a, 0x0f, 0x43, 0x16, 0xd5, 0xcb, 0x57,
+ 0x45, 0x0e, 0xc9, 0xb3, 0x03, 0x16, 0xe7, 0xca, 0x91, 0x42, 0x0e, 0xc9,
+ 0xa9, 0xd1, 0x51, 0x12, 0x0e, 0xc9, 0xa0, 0xcb, 0x57, 0x45, 0x0e, 0xc9,
+ 0x8b, 0x03, 0x16, 0xed, 0xca, 0x91, 0x42, 0x0e, 0xc9, 0x81, 0xc8, 0x45,
+ 0x27, 0x0e, 0xc9, 0x78, 0x48, 0xbf, 0xc2, 0xc3, 0x16, 0xf3, 0x45, 0xd5,
+ 0xf1, 0x43, 0x17, 0x08, 0xc5, 0x17, 0x14, 0x0e, 0xca, 0xdb, 0x03, 0x17,
+ 0x1d, 0xc6, 0x01, 0xdb, 0x0e, 0xca, 0xd1, 0xc5, 0x03, 0x13, 0x0e, 0xca,
+ 0xc8, 0xc5, 0x17, 0x14, 0x0e, 0xca, 0xbb, 0x03, 0x17, 0x23, 0xc6, 0x01,
+ 0xdb, 0x0e, 0xca, 0xb1, 0xc5, 0x03, 0x13, 0x0e, 0xca, 0xa8, 0x45, 0x11,
+ 0x17, 0xc3, 0x17, 0x29, 0xca, 0x65, 0x4e, 0x0e, 0xc9, 0x18, 0xc7, 0xc1,
+ 0xb6, 0x0e, 0xd1, 0xe9, 0xc7, 0x27, 0xb2, 0x0e, 0xd1, 0xe1, 0xc7, 0x81,
+ 0x92, 0x0e, 0xd1, 0xd8, 0xc6, 0xcf, 0x23, 0x0e, 0xd2, 0x91, 0xc7, 0x27,
+ 0xb2, 0x0e, 0xd2, 0x88, 0xc8, 0xbe, 0x0a, 0x0e, 0xd2, 0x79, 0xc7, 0x27,
+ 0xb2, 0x0e, 0xd2, 0x70, 0x00, 0x43, 0x17, 0x3b, 0x00, 0x43, 0x17, 0x47,
+ 0xc4, 0x05, 0x75, 0x0e, 0xd2, 0x19, 0xc8, 0xbe, 0x0a, 0x0e, 0xd2, 0x10,
+ 0xc4, 0x05, 0x75, 0x0e, 0xd2, 0x01, 0xc8, 0xbe, 0x0a, 0x0e, 0xd1, 0xf8,
+ 0xcc, 0x57, 0x44, 0x0e, 0xcf, 0xe0, 0x8e, 0x08, 0xac, 0x48, 0x94, 0x08,
+ 0xac, 0x38, 0x4c, 0x8b, 0x71, 0xc3, 0x17, 0x53, 0xd2, 0x4b, 0x3b, 0x08,
+ 0xae, 0xa1, 0xd3, 0x44, 0xb5, 0x08, 0xae, 0x99, 0x43, 0x01, 0x92, 0xc3,
+ 0x17, 0x65, 0xd0, 0x58, 0x22, 0x08, 0xae, 0x89, 0x50, 0x5d, 0x32, 0x43,
+ 0x17, 0x71, 0xca, 0x83, 0x03, 0x08, 0xae, 0x80, 0x94, 0x05, 0x44, 0x48,
+ 0x8e, 0x05, 0x44, 0x58, 0x9f, 0x08, 0x8e, 0xf9, 0x9e, 0x08, 0x8e, 0xf0,
+ 0xc7, 0x7a, 0x7f, 0x08, 0x8e, 0x09, 0xc7, 0x14, 0x39, 0x08, 0x8c, 0x08,
+ 0xc4, 0x1e, 0x97, 0x08, 0x8e, 0x01, 0xc5, 0x40, 0xe7, 0x08, 0x8c, 0x10,
+ 0xc4, 0x18, 0x10, 0x08, 0x8e, 0xb9, 0xc2, 0x22, 0xcc, 0x08, 0x8e, 0xb0,
+ 0xc3, 0x0d, 0x14, 0x08, 0x8e, 0xa9, 0xc3, 0x09, 0x9e, 0x08, 0x8e, 0xa0,
+ 0xc4, 0x02, 0xde, 0x08, 0x8e, 0x99, 0xc2, 0x02, 0xa0, 0x08, 0x8e, 0x90,
+ 0x97, 0x08, 0x8d, 0xf9, 0x8b, 0x08, 0x8d, 0xe9, 0x83, 0x08, 0x8d, 0x98,
+ 0x8e, 0x08, 0x8d, 0xd3, 0x03, 0x17, 0x83, 0x94, 0x08, 0x8d, 0xc2, 0x03,
+ 0x17, 0x87, 0x97, 0x08, 0x8d, 0xb8, 0x8b, 0x08, 0x8d, 0xa8, 0x8e, 0x08,
+ 0x8c, 0x5b, 0x03, 0x17, 0x8b, 0x94, 0x08, 0x8c, 0x4a, 0x03, 0x17, 0x8f,
+ 0xc2, 0x00, 0xd0, 0x08, 0x8c, 0xf1, 0x83, 0x08, 0x8c, 0xe8, 0xc2, 0x00,
+ 0xd0, 0x08, 0x8c, 0xe1, 0x83, 0x08, 0x8c, 0xd8, 0x45, 0x00, 0x27, 0xc3,
+ 0x17, 0x93, 0xce, 0x66, 0x67, 0x01, 0x2f, 0x38, 0x45, 0x00, 0x49, 0xc3,
+ 0x17, 0x9f, 0x46, 0x00, 0x2c, 0x43, 0x17, 0xab, 0xcc, 0x24, 0x47, 0x01,
+ 0x17, 0x29, 0xc8, 0x07, 0x5f, 0x01, 0x14, 0x90, 0xcc, 0x24, 0x47, 0x01,
+ 0x17, 0x21, 0xc8, 0x07, 0x5f, 0x01, 0x14, 0x88, 0xc7, 0x0b, 0x09, 0x01,
+ 0x9d, 0x01, 0xc5, 0xd9, 0x11, 0x01, 0x9d, 0x20, 0xc8, 0x0b, 0x08, 0x01,
+ 0x9d, 0x78, 0xc2, 0x17, 0x99, 0x01, 0x9a, 0x09, 0x90, 0x01, 0x9a, 0x10,
+ 0xc7, 0x0b, 0x09, 0x01, 0x9b, 0xc1, 0xc5, 0xd9, 0x11, 0x01, 0x9b, 0xc8,
+ 0xc5, 0xd9, 0x34, 0x01, 0x99, 0x59, 0xc2, 0x00, 0x16, 0x01, 0x99, 0x60,
+ 0xc3, 0x9f, 0x30, 0x01, 0x99, 0x79, 0x91, 0x01, 0x99, 0x80, 0xc3, 0xa9,
+ 0x98, 0x01, 0x99, 0xc1, 0xc2, 0x06, 0x62, 0x01, 0x99, 0xd0, 0xc5, 0xd6,
+ 0xff, 0x01, 0x97, 0xf9, 0xc6, 0xd1, 0xff, 0x01, 0x9b, 0xd9, 0xc6, 0xd1,
+ 0xcf, 0x01, 0x9b, 0xe1, 0xc7, 0xc5, 0x52, 0x01, 0x9b, 0xe9, 0xc5, 0xdb,
+ 0x05, 0x01, 0x9b, 0xf0, 0xc4, 0x89, 0x91, 0x01, 0x98, 0x61, 0xc4, 0xe4,
+ 0x6f, 0x01, 0x98, 0x68, 0x05, 0xc3, 0x17, 0xb7, 0xc7, 0x0b, 0x09, 0x01,
+ 0x9d, 0x10, 0xc4, 0xdd, 0xdf, 0x01, 0x9a, 0x19, 0xc2, 0x17, 0x99, 0x01,
+ 0x9a, 0x20, 0xc5, 0xd8, 0x26, 0x01, 0x9a, 0x50, 0xc3, 0x0f, 0xd9, 0x01,
+ 0x9a, 0x60, 0xc2, 0x02, 0x2e, 0x01, 0x9e, 0x09, 0xc5, 0x04, 0x34, 0x01,
+ 0x9d, 0x3a, 0x03, 0x17, 0xc3, 0xc7, 0x0b, 0x09, 0x01, 0x9c, 0xf9, 0xc5,
+ 0xd9, 0x11, 0x01, 0x9d, 0x18, 0xc2, 0x00, 0xbf, 0x01, 0x3e, 0x79, 0xc3,
+ 0x02, 0x9b, 0x01, 0x3e, 0x70, 0x95, 0x0f, 0x8a, 0x11, 0x94, 0x0f, 0x8a,
+ 0x09, 0x93, 0x0f, 0x8a, 0x01, 0x92, 0x0f, 0x89, 0xf9, 0x91, 0x0f, 0x89,
+ 0xf1, 0x90, 0x0f, 0x89, 0xe9, 0x8f, 0x0f, 0x89, 0xe1, 0x8e, 0x0f, 0x89,
+ 0xd9, 0x8d, 0x0f, 0x89, 0xd1, 0x8c, 0x0f, 0x89, 0xc9, 0x8b, 0x0f, 0x89,
+ 0xc1, 0x8a, 0x0f, 0x89, 0xb9, 0x89, 0x0f, 0x89, 0xb1, 0x88, 0x0f, 0x89,
+ 0xa9, 0x87, 0x0f, 0x89, 0xa1, 0x86, 0x0f, 0x89, 0x99, 0x83, 0x0f, 0x89,
+ 0x81, 0x84, 0x0f, 0x89, 0x89, 0x85, 0x0f, 0x89, 0x91, 0x96, 0x0f, 0x8a,
+ 0x19, 0x97, 0x0f, 0x8a, 0x21, 0x98, 0x0f, 0x8a, 0x29, 0x99, 0x0f, 0x8a,
+ 0x31, 0x9a, 0x0f, 0x8a, 0x39, 0x9b, 0x0f, 0x8a, 0x41, 0x9c, 0x0f, 0x8a,
+ 0x48, 0xc3, 0xe5, 0xbd, 0x0f, 0x91, 0xd9, 0xc3, 0xe5, 0xe1, 0x0f, 0x91,
+ 0x58, 0xc3, 0xe5, 0x8d, 0x0f, 0x91, 0xd1, 0x1f, 0xc3, 0x17, 0xc9, 0x21,
+ 0xc3, 0x17, 0xdb, 0x20, 0xc3, 0x17, 0xe7, 0xc3, 0xe4, 0xdf, 0x0f, 0x91,
+ 0x61, 0xc3, 0xe5, 0x09, 0x0f, 0x91, 0x21, 0xc3, 0xe5, 0xb7, 0x0f, 0x90,
+ 0xf1, 0xc3, 0xe6, 0x35, 0x0f, 0x90, 0xe9, 0x26, 0xc3, 0x17, 0xf3, 0xc3,
+ 0xe5, 0x42, 0x0f, 0x90, 0x88, 0x22, 0xc3, 0x17, 0xff, 0xc3, 0xe5, 0x12,
+ 0x0f, 0x91, 0x99, 0xc3, 0xe5, 0x1b, 0x0f, 0x91, 0x91, 0xc3, 0xe4, 0xf1,
+ 0x0f, 0x91, 0x09, 0xc3, 0xe5, 0xf6, 0x0f, 0x90, 0xd0, 0x42, 0xe4, 0xef,
+ 0xc3, 0x18, 0x0b, 0xc3, 0xe5, 0x27, 0x0f, 0x91, 0xa9, 0x1f, 0xc3, 0x18,
+ 0x13, 0x20, 0xc3, 0x18, 0x25, 0xc3, 0xe6, 0x38, 0x0f, 0x91, 0x31, 0x22,
+ 0xc3, 0x18, 0x31, 0xc3, 0xe5, 0x48, 0x0f, 0x90, 0xc8, 0xc3, 0xe4, 0xee,
+ 0x0f, 0x91, 0x81, 0xc3, 0xe5, 0x7b, 0x0f, 0x91, 0x19, 0xc3, 0xe5, 0x1e,
+ 0x0f, 0x90, 0xb0, 0xc2, 0x81, 0x20, 0x0f, 0x91, 0x69, 0x1d, 0xc3, 0x18,
+ 0x3d, 0xc2, 0xd5, 0x96, 0x0f, 0x90, 0xc1, 0xc2, 0x8c, 0x54, 0x0f, 0x90,
+ 0xa0, 0xc4, 0x02, 0xde, 0x01, 0x20, 0x99, 0xc2, 0x02, 0xa0, 0x01, 0x20,
+ 0x90, 0xcb, 0x98, 0xd1, 0x01, 0x20, 0x23, 0x03, 0x18, 0x45, 0xc3, 0x09,
+ 0x3f, 0x01, 0x20, 0x18, 0xc2, 0x00, 0xdb, 0x00, 0x43, 0x49, 0x83, 0x00,
+ 0x43, 0x40, 0x10, 0xc3, 0x18, 0x4b, 0xc2, 0x19, 0x2c, 0x00, 0x43, 0x11,
+ 0xc2, 0x01, 0x30, 0x00, 0x43, 0x00, 0xc4, 0x00, 0x49, 0x00, 0x38, 0x49,
+ 0xc5, 0x00, 0x2c, 0x00, 0x38, 0x48, 0xcf, 0x33, 0xad, 0x01, 0x56, 0x20,
+ 0xcb, 0x0e, 0xbd, 0x01, 0x56, 0x31, 0xce, 0x33, 0x92, 0x01, 0x56, 0x41,
+ 0xcf, 0x6a, 0x8f, 0x01, 0x56, 0x51, 0xcc, 0x24, 0x47, 0x01, 0x56, 0x60,
+ 0xc5, 0xd4, 0xcf, 0x00, 0xdc, 0x11, 0xc5, 0xd9, 0xcf, 0x00, 0xdc, 0x08,
+ 0xca, 0x6c, 0x10, 0x0f, 0xb0, 0x29, 0xcc, 0x1d, 0x4a, 0x0f, 0xb0, 0x21,
+ 0xd3, 0x41, 0x38, 0x0f, 0xb0, 0x30, 0x45, 0x02, 0x9a, 0x43, 0x18, 0x55,
+ 0xc7, 0x80, 0x70, 0x01, 0x17, 0xf1, 0x48, 0x00, 0x5f, 0x43, 0x18, 0x61,
+ 0xc7, 0x80, 0x70, 0x01, 0x17, 0xb9, 0x48, 0x00, 0x5f, 0x43, 0x18, 0x67,
+ 0x00, 0x43, 0x18, 0x6d, 0x0b, 0xc3, 0x18, 0x79, 0xc3, 0x09, 0x9e, 0x01,
+ 0x0b, 0x98, 0x19, 0xc3, 0x18, 0x88, 0xc2, 0x00, 0xc4, 0x01, 0x0b, 0xc9,
+ 0xc4, 0x02, 0xde, 0x01, 0x0b, 0x90, 0xc5, 0x66, 0xb1, 0x01, 0x0b, 0xd1,
+ 0xc4, 0x00, 0x2d, 0x01, 0x0b, 0xb8, 0xc4, 0x18, 0x10, 0x01, 0x0b, 0xb1,
+ 0xc2, 0x22, 0xcc, 0x01, 0x0b, 0xa8, 0xce, 0x69, 0x64, 0x07, 0xf2, 0x19,
+ 0xd2, 0x21, 0x89, 0x07, 0xf0, 0xb8, 0xcc, 0x00, 0x33, 0x07, 0xf1, 0xb9,
+ 0xcd, 0x69, 0x65, 0x07, 0xf2, 0x08, 0xc4, 0x00, 0x3b, 0x07, 0xf0, 0xc1,
+ 0xc4, 0xe0, 0xeb, 0x07, 0xf0, 0xc0, 0x9d, 0x0f, 0x87, 0x51, 0x9e, 0x0f,
+ 0x87, 0x59, 0x9f, 0x0f, 0x87, 0x61, 0xa0, 0x0f, 0x87, 0x69, 0xa1, 0x0f,
+ 0x87, 0x71, 0xa2, 0x0f, 0x87, 0x79, 0xa3, 0x0f, 0x87, 0x81, 0xa4, 0x0f,
+ 0x87, 0x89, 0xa5, 0x0f, 0x87, 0x91, 0xa6, 0x0f, 0x87, 0x98, 0x9d, 0x0f,
+ 0x87, 0xa1, 0x9e, 0x0f, 0x87, 0xa8, 0xc6, 0xcf, 0x05, 0x0f, 0x85, 0x21,
+ 0xc6, 0x78, 0x78, 0x0f, 0x85, 0xa1, 0xc8, 0xba, 0x2a, 0x0f, 0x86, 0x21,
+ 0xc5, 0xdd, 0x49, 0x0f, 0x86, 0xa0, 0xcc, 0x82, 0x4d, 0x01, 0x51, 0x39,
+ 0xd1, 0x4b, 0xde, 0x01, 0x51, 0x10, 0xc5, 0x05, 0x02, 0x01, 0x51, 0x31,
+ 0xc5, 0x00, 0xd4, 0x01, 0x51, 0x20, 0x83, 0x01, 0x90, 0xb1, 0x97, 0x01,
+ 0x90, 0xe0, 0x89, 0x08, 0xd7, 0x18, 0xc4, 0x18, 0x12, 0x08, 0x43, 0xf9,
+ 0x91, 0x08, 0x43, 0xd0, 0xc2, 0x39, 0x8b, 0x08, 0x43, 0xb1, 0xc3, 0x1e,
+ 0x1b, 0x08, 0x43, 0x40, 0xc3, 0x11, 0xef, 0x08, 0x43, 0xa9, 0x03, 0x43,
+ 0x18, 0x92, 0xc3, 0x16, 0x5a, 0x08, 0x43, 0x81, 0xc4, 0x36, 0xb5, 0x08,
+ 0x43, 0x00, 0xc2, 0x00, 0x8e, 0x08, 0x43, 0x38, 0xc3, 0x03, 0x15, 0x01,
+ 0x37, 0xc9, 0xc9, 0xa8, 0x8b, 0x0f, 0xa3, 0x88, 0xc8, 0x7a, 0x7e, 0x05,
+ 0x47, 0xb9, 0x16, 0xc3, 0x18, 0x9e, 0xc6, 0x1e, 0x95, 0x05, 0x47, 0x98,
+ 0x91, 0x00, 0x48, 0x91, 0x87, 0x00, 0x48, 0x71, 0x83, 0x00, 0x48, 0x20,
+ 0x8e, 0x00, 0x4b, 0x08, 0x94, 0x00, 0x4b, 0x00, 0xc2, 0x00, 0xd0, 0x00,
+ 0x4a, 0xe1, 0x83, 0x00, 0x4b, 0xf0, 0x91, 0x00, 0x48, 0x89, 0x87, 0x00,
+ 0x48, 0x69, 0x83, 0x00, 0x4b, 0x90, 0x8a, 0x08, 0x20, 0x18, 0x91, 0x08,
+ 0x20, 0x28, 0x8a, 0x08, 0x20, 0x48, 0x91, 0x08, 0x20, 0x58, 0x8a, 0x08,
+ 0x20, 0xf8, 0x89, 0x08, 0x21, 0x28, 0x8a, 0x08, 0x21, 0x58, 0x91, 0x08,
+ 0x21, 0x68, 0x8a, 0x08, 0x21, 0x88, 0x91, 0x08, 0x21, 0x98, 0x8a, 0x08,
+ 0x22, 0x38, 0x89, 0x08, 0x22, 0x68, 0xca, 0x03, 0xdd, 0x0f, 0xc4, 0x99,
+ 0x48, 0x01, 0x9a, 0x43, 0x18, 0xaa, 0xe0, 0x05, 0xc7, 0x01, 0x5f, 0x78,
+ 0xc5, 0x01, 0x4a, 0x01, 0x0e, 0x19, 0x00, 0x43, 0x18, 0xc5, 0xc5, 0x01,
+ 0x4a, 0x01, 0x0e, 0x11, 0x00, 0x43, 0x18, 0xd7, 0x45, 0x00, 0x8c, 0xc3,
+ 0x18, 0xe3, 0xda, 0x1b, 0x34, 0x01, 0x0f, 0xa9, 0xc8, 0xae, 0xbc, 0x01,
+ 0x0d, 0x39, 0xc6, 0x10, 0x9d, 0x01, 0x48, 0x99, 0xda, 0x1c, 0x1e, 0x0f,
+ 0xdd, 0xb8, 0xc4, 0x26, 0x78, 0x01, 0x27, 0xe9, 0xc5, 0x06, 0xdb, 0x01,
+ 0x27, 0xe1, 0x15, 0xc3, 0x19, 0x19, 0x08, 0xc3, 0x19, 0x25, 0x16, 0xc3,
+ 0x19, 0x31, 0xc3, 0x05, 0x14, 0x01, 0x27, 0xa8, 0x47, 0x00, 0x58, 0xc3,
+ 0x19, 0x3d, 0xce, 0x34, 0xd4, 0x01, 0x57, 0x18, 0xcf, 0x01, 0xb8, 0x01,
+ 0x80, 0xf0, 0x02, 0xc3, 0x19, 0x49, 0xc5, 0x27, 0xf9, 0x01, 0x00, 0xb8,
+ 0xc2, 0x00, 0xbf, 0x01, 0x52, 0xa1, 0xc3, 0x02, 0x9b, 0x01, 0x52, 0x98,
+ 0x8c, 0x01, 0x0a, 0x49, 0x8b, 0x01, 0x0a, 0x41, 0x87, 0x01, 0x0a, 0x39,
+ 0x86, 0x01, 0x0a, 0x30, 0x8b, 0x01, 0x09, 0xf8, 0xc9, 0x00, 0xca, 0x01,
+ 0x54, 0xd9, 0xcc, 0x07, 0xc7, 0x01, 0x54, 0xe0, 0xc5, 0x78, 0x04, 0x01,
+ 0x02, 0x31, 0x48, 0xbc, 0xfa, 0xc3, 0x19, 0x55, 0xc8, 0x52, 0x09, 0x01,
+ 0x4c, 0x61, 0xc6, 0x01, 0x73, 0x01, 0x72, 0xb1, 0xcd, 0x75, 0xa6, 0x01,
+ 0x72, 0xc0, 0xd1, 0x52, 0xff, 0x0f, 0xab, 0x51, 0xce, 0x6f, 0x1c, 0x0f,
+ 0xab, 0x48, 0x00, 0x43, 0x19, 0x61, 0xc6, 0x02, 0xd1, 0x01, 0x2e, 0xb9,
+ 0xc4, 0x0e, 0x6a, 0x01, 0x5f, 0x48, 0xd4, 0x3f, 0x5c, 0x01, 0x4e, 0x70,
+ 0xc5, 0x01, 0xa2, 0x01, 0x5b, 0x13, 0x03, 0x19, 0x82, 0xcc, 0x82, 0xb9,
+ 0x01, 0x5b, 0x61, 0xcd, 0x7c, 0xa8, 0x01, 0x5c, 0x30, 0x45, 0x00, 0x8c,
+ 0xc3, 0x19, 0x86, 0xc8, 0xae, 0xbc, 0x01, 0x48, 0x28, 0x44, 0x03, 0xc8,
+ 0xc3, 0x19, 0x96, 0x42, 0x02, 0xae, 0x43, 0x19, 0xa0, 0xd7, 0x22, 0x5c,
+ 0x0f, 0xc0, 0x51, 0xc3, 0x7e, 0x79, 0x01, 0x0d, 0x60, 0x45, 0x03, 0x14,
+ 0xc3, 0x19, 0xaa, 0xc5, 0x01, 0x74, 0x01, 0x0c, 0xd8, 0xd4, 0x2d, 0x64,
+ 0x01, 0x0f, 0xd9, 0xc9, 0xb3, 0xf8, 0x01, 0x48, 0x88, 0xc3, 0x14, 0xa7,
+ 0x01, 0x0d, 0x1b, 0x03, 0x19, 0xb6, 0x43, 0x00, 0x7e, 0x43, 0x19, 0xbc,
+ 0xc2, 0x00, 0xb1, 0x01, 0x0f, 0x29, 0xcc, 0x56, 0x78, 0x01, 0x48, 0xf0,
+ 0x9a, 0x01, 0x4a, 0x39, 0xcc, 0x07, 0xc7, 0x01, 0x5a, 0x19, 0xc8, 0xb7,
+ 0x52, 0x01, 0x5a, 0x20, 0xcf, 0x6a, 0x8f, 0x01, 0x4b, 0xa9, 0xce, 0x33,
+ 0x92, 0x01, 0x4b, 0xa1, 0xd5, 0x36, 0xef, 0x01, 0x4a, 0x11, 0x48, 0x61,
+ 0xd4, 0x43, 0x19, 0xc8, 0xe0, 0x06, 0xc7, 0x0f, 0xdd, 0xb0, 0x45, 0x00,
+ 0x8c, 0xc3, 0x19, 0xd4, 0xc8, 0xae, 0xbc, 0x01, 0x48, 0x38, 0xc8, 0x4b,
+ 0x94, 0x01, 0x0c, 0x39, 0xca, 0xa7, 0xce, 0x01, 0x0c, 0x30, 0xc8, 0x4b,
+ 0x94, 0x01, 0x0c, 0x09, 0xc7, 0x0d, 0x04, 0x01, 0x0b, 0x70, 0xc3, 0x23,
+ 0x1c, 0x00, 0xb7, 0xc1, 0x85, 0x00, 0xb7, 0xb8, 0xc2, 0x1d, 0xc1, 0x00,
+ 0xb7, 0x39, 0xc6, 0xd2, 0x35, 0x00, 0xb6, 0xc9, 0xc9, 0x25, 0x3a, 0x00,
+ 0xb6, 0x99, 0xc5, 0x72, 0x5f, 0x00, 0xb6, 0x81, 0xc5, 0x2e, 0x39, 0x00,
+ 0xb6, 0x61, 0xc4, 0x05, 0xf1, 0x00, 0xb6, 0x31, 0xc6, 0x57, 0x17, 0x00,
+ 0xb5, 0xf9, 0xc8, 0xbf, 0x3a, 0x00, 0xb5, 0xe9, 0xc5, 0x71, 0x4d, 0x00,
+ 0xb5, 0x68, 0x90, 0x05, 0x28, 0x20, 0x90, 0x05, 0x2b, 0xa8, 0x87, 0x05,
+ 0x28, 0x30, 0x91, 0x05, 0x2b, 0xb8, 0x87, 0x05, 0x28, 0x40, 0x91, 0x05,
+ 0x2b, 0xc8, 0x87, 0x05, 0x28, 0x50, 0x91, 0x05, 0x2b, 0xd8, 0x87, 0x05,
+ 0x28, 0x49, 0x90, 0x05, 0x2f, 0x68, 0x90, 0x05, 0x2a, 0xa8, 0x91, 0x05,
+ 0x2b, 0xd0, 0x87, 0x05, 0x28, 0x59, 0x90, 0x05, 0x2f, 0x80, 0x91, 0x05,
+ 0x2b, 0xe1, 0x90, 0x05, 0x2e, 0x40, 0x87, 0x05, 0x28, 0x78, 0x91, 0x05,
+ 0x2c, 0x00, 0x87, 0x05, 0x28, 0x80, 0x87, 0x05, 0x2f, 0xb3, 0x03, 0x19,
+ 0xe0, 0x8b, 0x05, 0x29, 0xb1, 0x83, 0x05, 0x2a, 0xe9, 0x91, 0x05, 0x2e,
+ 0x73, 0x03, 0x19, 0xe4, 0x97, 0x05, 0x2d, 0x38, 0x91, 0x05, 0x2c, 0x08,
+ 0x87, 0x05, 0x28, 0xa8, 0x91, 0x05, 0x2c, 0x31, 0x43, 0x00, 0x5c, 0x43,
+ 0x19, 0xe8, 0x87, 0x05, 0x28, 0xe0, 0x91, 0x05, 0x2c, 0x68, 0x87, 0x05,
+ 0x30, 0x23, 0x03, 0x1a, 0x06, 0x8b, 0x05, 0x2a, 0x21, 0x83, 0x05, 0x2b,
+ 0x61, 0x91, 0x05, 0x2e, 0xe3, 0x03, 0x1a, 0x0e, 0x97, 0x05, 0x2d, 0xa8,
+ 0x87, 0x05, 0x29, 0x18, 0x91, 0x05, 0x2c, 0xa0, 0x87, 0x05, 0x28, 0xb8,
+ 0x91, 0x05, 0x2c, 0x40, 0x87, 0x05, 0x28, 0xc8, 0x91, 0x05, 0x2c, 0x50,
+ 0xc5, 0x00, 0xd4, 0x01, 0x57, 0x79, 0xc5, 0x05, 0x02, 0x01, 0x57, 0x80,
+ 0xa5, 0x0c, 0x57, 0xf9, 0xa4, 0x0c, 0x57, 0xf1, 0xa3, 0x0c, 0x57, 0xe9,
+ 0xa2, 0x0c, 0x57, 0xe1, 0xa1, 0x0c, 0x57, 0xd9, 0xa0, 0x0c, 0x57, 0xd1,
+ 0x9f, 0x0c, 0x57, 0xc9, 0x9e, 0x0c, 0x57, 0xc1, 0x9d, 0x0c, 0x57, 0xb8,
+ 0xa6, 0x0c, 0x57, 0xb1, 0xa5, 0x0c, 0x57, 0xa9, 0xa4, 0x0c, 0x57, 0xa1,
+ 0xa3, 0x0c, 0x57, 0x99, 0xa2, 0x0c, 0x57, 0x91, 0xa1, 0x0c, 0x57, 0x89,
+ 0xa0, 0x0c, 0x57, 0x81, 0x9f, 0x0c, 0x57, 0x79, 0x9e, 0x0c, 0x57, 0x71,
+ 0x9d, 0x0c, 0x57, 0x68, 0xa6, 0x0c, 0x57, 0x61, 0xa5, 0x0c, 0x57, 0x59,
+ 0xa4, 0x0c, 0x57, 0x51, 0xa3, 0x0c, 0x57, 0x49, 0xa2, 0x0c, 0x57, 0x41,
+ 0xa1, 0x0c, 0x57, 0x39, 0xa0, 0x0c, 0x57, 0x31, 0x9f, 0x0c, 0x57, 0x29,
+ 0x9e, 0x0c, 0x57, 0x21, 0x9d, 0x0c, 0x57, 0x18, 0xa6, 0x0c, 0x57, 0x11,
+ 0xa5, 0x0c, 0x57, 0x09, 0xa4, 0x0c, 0x57, 0x01, 0xa3, 0x0c, 0x56, 0xf9,
+ 0xa2, 0x0c, 0x56, 0xf1, 0xa1, 0x0c, 0x56, 0xe9, 0xa0, 0x0c, 0x56, 0xe1,
+ 0x9f, 0x0c, 0x56, 0xd9, 0x9e, 0x0c, 0x56, 0xd1, 0x9d, 0x0c, 0x56, 0xc8,
+ 0xa6, 0x0c, 0x56, 0xc1, 0xa5, 0x0c, 0x56, 0xb9, 0xa4, 0x0c, 0x56, 0xb1,
+ 0xa3, 0x0c, 0x56, 0xa9, 0xa2, 0x0c, 0x56, 0xa1, 0xa1, 0x0c, 0x56, 0x99,
+ 0xa0, 0x0c, 0x56, 0x91, 0x9f, 0x0c, 0x56, 0x89, 0x9e, 0x0c, 0x56, 0x81,
+ 0x9d, 0x0c, 0x56, 0x78, 0xa6, 0x0c, 0x56, 0x71, 0xa5, 0x0c, 0x56, 0x69,
+ 0xa4, 0x0c, 0x56, 0x61, 0xa3, 0x0c, 0x56, 0x59, 0xa2, 0x0c, 0x56, 0x51,
+ 0xa1, 0x0c, 0x56, 0x49, 0xa0, 0x0c, 0x56, 0x41, 0x9f, 0x0c, 0x56, 0x39,
+ 0x9e, 0x0c, 0x56, 0x31, 0x9d, 0x0c, 0x56, 0x28, 0xa6, 0x0c, 0x56, 0x21,
+ 0xa5, 0x0c, 0x56, 0x19, 0xa4, 0x0c, 0x56, 0x11, 0xa3, 0x0c, 0x56, 0x09,
+ 0xa2, 0x0c, 0x56, 0x01, 0xa1, 0x0c, 0x55, 0xf9, 0xa0, 0x0c, 0x55, 0xf1,
+ 0x9f, 0x0c, 0x55, 0xe9, 0x9e, 0x0c, 0x55, 0xe1, 0x9d, 0x0c, 0x55, 0xd8,
+ 0xa6, 0x0c, 0x55, 0xd1, 0xa5, 0x0c, 0x55, 0xc9, 0xa4, 0x0c, 0x55, 0xc1,
+ 0xa3, 0x0c, 0x55, 0xb9, 0xa2, 0x0c, 0x55, 0xb1, 0xa1, 0x0c, 0x55, 0xa9,
+ 0xa0, 0x0c, 0x55, 0xa1, 0x9f, 0x0c, 0x55, 0x99, 0x9e, 0x0c, 0x55, 0x91,
+ 0x9d, 0x0c, 0x55, 0x88, 0xa6, 0x0c, 0x55, 0x81, 0xa5, 0x0c, 0x55, 0x79,
+ 0xa4, 0x0c, 0x55, 0x71, 0xa3, 0x0c, 0x55, 0x69, 0xa2, 0x0c, 0x55, 0x61,
+ 0xa1, 0x0c, 0x55, 0x59, 0xa0, 0x0c, 0x55, 0x51, 0x9f, 0x0c, 0x55, 0x49,
+ 0x9e, 0x0c, 0x55, 0x41, 0x9d, 0x0c, 0x55, 0x38, 0xa6, 0x0c, 0x55, 0x31,
+ 0xa5, 0x0c, 0x55, 0x29, 0xa4, 0x0c, 0x55, 0x21, 0xa3, 0x0c, 0x55, 0x19,
+ 0xa2, 0x0c, 0x55, 0x11, 0xa1, 0x0c, 0x55, 0x09, 0xa0, 0x0c, 0x55, 0x01,
+ 0x9f, 0x0c, 0x54, 0xf9, 0x9e, 0x0c, 0x54, 0xf1, 0x9d, 0x0c, 0x54, 0xe8,
+ 0xa6, 0x0c, 0x54, 0xe1, 0xa5, 0x0c, 0x54, 0xd9, 0xa4, 0x0c, 0x54, 0xd1,
+ 0xa3, 0x0c, 0x54, 0xc9, 0xa2, 0x0c, 0x54, 0xc1, 0xa1, 0x0c, 0x54, 0xb9,
+ 0xa0, 0x0c, 0x54, 0xb1, 0x9f, 0x0c, 0x54, 0xa9, 0x9e, 0x0c, 0x54, 0xa1,
+ 0x9d, 0x0c, 0x54, 0x98, 0xa6, 0x0c, 0x54, 0x91, 0xa5, 0x0c, 0x54, 0x89,
+ 0xa4, 0x0c, 0x54, 0x81, 0xa3, 0x0c, 0x54, 0x79, 0xa2, 0x0c, 0x54, 0x71,
+ 0xa1, 0x0c, 0x54, 0x69, 0xa0, 0x0c, 0x54, 0x61, 0x9f, 0x0c, 0x54, 0x59,
+ 0x9e, 0x0c, 0x54, 0x51, 0x9d, 0x0c, 0x54, 0x48, 0xa6, 0x0c, 0x54, 0x41,
+ 0xa5, 0x0c, 0x54, 0x39, 0xa4, 0x0c, 0x54, 0x31, 0xa3, 0x0c, 0x54, 0x29,
+ 0xa2, 0x0c, 0x54, 0x21, 0xa1, 0x0c, 0x54, 0x19, 0xa0, 0x0c, 0x54, 0x11,
+ 0x9f, 0x0c, 0x54, 0x09, 0x9e, 0x0c, 0x54, 0x01, 0x9d, 0x0c, 0x53, 0xf8,
+ 0xa6, 0x0c, 0x53, 0xf1, 0xa5, 0x0c, 0x53, 0xe9, 0xa4, 0x0c, 0x53, 0xe1,
+ 0xa3, 0x0c, 0x53, 0xd9, 0xa2, 0x0c, 0x53, 0xd1, 0xa1, 0x0c, 0x53, 0xc9,
+ 0xa0, 0x0c, 0x53, 0xc1, 0x9f, 0x0c, 0x53, 0xb9, 0x9e, 0x0c, 0x53, 0xb1,
+ 0x9d, 0x0c, 0x53, 0xa8, 0xa6, 0x0c, 0x53, 0xa1, 0xa5, 0x0c, 0x53, 0x99,
+ 0xa4, 0x0c, 0x53, 0x91, 0xa3, 0x0c, 0x53, 0x89, 0xa2, 0x0c, 0x53, 0x81,
+ 0xa1, 0x0c, 0x53, 0x79, 0xa0, 0x0c, 0x53, 0x71, 0x9f, 0x0c, 0x53, 0x69,
+ 0x9e, 0x0c, 0x53, 0x61, 0x9d, 0x0c, 0x53, 0x58, 0xa6, 0x0c, 0x53, 0x51,
+ 0xa5, 0x0c, 0x53, 0x49, 0xa4, 0x0c, 0x53, 0x41, 0xa3, 0x0c, 0x53, 0x39,
+ 0xa2, 0x0c, 0x53, 0x31, 0xa1, 0x0c, 0x53, 0x29, 0xa0, 0x0c, 0x53, 0x21,
+ 0x9f, 0x0c, 0x53, 0x19, 0x9e, 0x0c, 0x53, 0x11, 0x9d, 0x0c, 0x53, 0x08,
+ 0xa6, 0x0c, 0x53, 0x01, 0xa5, 0x0c, 0x52, 0xf9, 0xa4, 0x0c, 0x52, 0xf1,
+ 0xa3, 0x0c, 0x52, 0xe9, 0xa2, 0x0c, 0x52, 0xe1, 0xa1, 0x0c, 0x52, 0xd9,
+ 0xa0, 0x0c, 0x52, 0xd1, 0x9f, 0x0c, 0x52, 0xc9, 0x9e, 0x0c, 0x52, 0xc1,
+ 0x9d, 0x0c, 0x52, 0xb8, 0xa6, 0x0c, 0x52, 0xb1, 0xa5, 0x0c, 0x52, 0xa9,
+ 0xa4, 0x0c, 0x52, 0xa1, 0xa3, 0x0c, 0x52, 0x99, 0xa2, 0x0c, 0x52, 0x91,
+ 0xa1, 0x0c, 0x52, 0x89, 0xa0, 0x0c, 0x52, 0x81, 0x9f, 0x0c, 0x52, 0x79,
+ 0x9e, 0x0c, 0x52, 0x71, 0x9d, 0x0c, 0x52, 0x68, 0xa6, 0x0c, 0x52, 0x61,
+ 0xa5, 0x0c, 0x52, 0x59, 0xa4, 0x0c, 0x52, 0x51, 0xa3, 0x0c, 0x52, 0x49,
+ 0xa2, 0x0c, 0x52, 0x41, 0xa1, 0x0c, 0x52, 0x39, 0xa0, 0x0c, 0x52, 0x31,
+ 0x9f, 0x0c, 0x52, 0x29, 0x9e, 0x0c, 0x52, 0x21, 0x9d, 0x0c, 0x52, 0x18,
+ 0xa6, 0x0c, 0x52, 0x11, 0xa5, 0x0c, 0x52, 0x09, 0xa4, 0x0c, 0x52, 0x01,
+ 0xa3, 0x0c, 0x51, 0xf9, 0xa2, 0x0c, 0x51, 0xf1, 0xa1, 0x0c, 0x51, 0xe9,
+ 0xa0, 0x0c, 0x51, 0xe1, 0x9f, 0x0c, 0x51, 0xd9, 0x9e, 0x0c, 0x51, 0xd1,
+ 0x9d, 0x0c, 0x51, 0xc8, 0xa6, 0x0c, 0x51, 0xc1, 0xa5, 0x0c, 0x51, 0xb9,
+ 0xa4, 0x0c, 0x51, 0xb1, 0xa3, 0x0c, 0x51, 0xa9, 0xa2, 0x0c, 0x51, 0xa1,
+ 0xa1, 0x0c, 0x51, 0x99, 0xa0, 0x0c, 0x51, 0x91, 0x9f, 0x0c, 0x51, 0x89,
+ 0x9e, 0x0c, 0x51, 0x81, 0x9d, 0x0c, 0x51, 0x78, 0xa6, 0x0c, 0x51, 0x71,
+ 0xa5, 0x0c, 0x51, 0x69, 0xa4, 0x0c, 0x51, 0x61, 0xa3, 0x0c, 0x51, 0x59,
+ 0xa2, 0x0c, 0x51, 0x51, 0xa1, 0x0c, 0x51, 0x49, 0xa0, 0x0c, 0x51, 0x41,
+ 0x9f, 0x0c, 0x51, 0x39, 0x9e, 0x0c, 0x51, 0x31, 0x9d, 0x0c, 0x51, 0x28,
+ 0xa6, 0x0c, 0x51, 0x21, 0xa5, 0x0c, 0x51, 0x19, 0xa4, 0x0c, 0x51, 0x11,
+ 0xa3, 0x0c, 0x51, 0x09, 0xa2, 0x0c, 0x51, 0x01, 0xa1, 0x0c, 0x50, 0xf9,
+ 0xa0, 0x0c, 0x50, 0xf1, 0x9f, 0x0c, 0x50, 0xe9, 0x9e, 0x0c, 0x50, 0xe1,
+ 0x9d, 0x0c, 0x50, 0xd8, 0xa6, 0x0c, 0x50, 0xd1, 0xa5, 0x0c, 0x50, 0xc9,
+ 0xa4, 0x0c, 0x50, 0xc1, 0xa3, 0x0c, 0x50, 0xb9, 0xa2, 0x0c, 0x50, 0xb1,
+ 0xa1, 0x0c, 0x50, 0xa9, 0xa0, 0x0c, 0x50, 0xa1, 0x9f, 0x0c, 0x50, 0x99,
+ 0x9e, 0x0c, 0x50, 0x91, 0x9d, 0x0c, 0x50, 0x88, 0xa6, 0x0c, 0x50, 0x81,
+ 0xa5, 0x0c, 0x50, 0x79, 0xa4, 0x0c, 0x50, 0x71, 0xa3, 0x0c, 0x50, 0x69,
+ 0xa2, 0x0c, 0x50, 0x61, 0xa1, 0x0c, 0x50, 0x59, 0xa0, 0x0c, 0x50, 0x51,
+ 0x9f, 0x0c, 0x50, 0x49, 0x9e, 0x0c, 0x50, 0x41, 0x9d, 0x0c, 0x50, 0x38,
+ 0xa6, 0x0c, 0x50, 0x31, 0xa5, 0x0c, 0x50, 0x29, 0xa4, 0x0c, 0x50, 0x21,
+ 0xa3, 0x0c, 0x50, 0x19, 0xa2, 0x0c, 0x50, 0x11, 0xa1, 0x0c, 0x50, 0x09,
+ 0xa0, 0x0c, 0x50, 0x01, 0x9f, 0x0c, 0x4f, 0xf9, 0x9e, 0x0c, 0x4f, 0xf1,
+ 0x9d, 0x0c, 0x4f, 0xe8, 0xa6, 0x0c, 0x4f, 0xe1, 0xa5, 0x0c, 0x4f, 0xd9,
+ 0xa4, 0x0c, 0x4f, 0xd1, 0xa3, 0x0c, 0x4f, 0xc9, 0xa2, 0x0c, 0x4f, 0xc1,
+ 0xa1, 0x0c, 0x4f, 0xb9, 0xa0, 0x0c, 0x4f, 0xb1, 0x9f, 0x0c, 0x4f, 0xa9,
+ 0x9e, 0x0c, 0x4f, 0xa1, 0x9d, 0x0c, 0x4f, 0x98, 0xa6, 0x0c, 0x4f, 0x91,
+ 0xa5, 0x0c, 0x4f, 0x89, 0xa4, 0x0c, 0x4f, 0x81, 0xa3, 0x0c, 0x4f, 0x79,
+ 0xa2, 0x0c, 0x4f, 0x71, 0xa1, 0x0c, 0x4f, 0x69, 0xa0, 0x0c, 0x4f, 0x61,
+ 0x9f, 0x0c, 0x4f, 0x59, 0x9e, 0x0c, 0x4f, 0x51, 0x9d, 0x0c, 0x4f, 0x48,
+ 0xa6, 0x0c, 0x4f, 0x41, 0xa5, 0x0c, 0x4f, 0x39, 0xa4, 0x0c, 0x4f, 0x31,
+ 0xa3, 0x0c, 0x4f, 0x29, 0xa2, 0x0c, 0x4f, 0x21, 0xa1, 0x0c, 0x4f, 0x19,
+ 0xa0, 0x0c, 0x4f, 0x11, 0x9f, 0x0c, 0x4f, 0x09, 0x9e, 0x0c, 0x4f, 0x01,
+ 0x9d, 0x0c, 0x4e, 0xf8, 0xa6, 0x0c, 0x4e, 0xf1, 0xa5, 0x0c, 0x4e, 0xe9,
+ 0xa4, 0x0c, 0x4e, 0xe1, 0xa3, 0x0c, 0x4e, 0xd9, 0xa2, 0x0c, 0x4e, 0xd1,
+ 0xa1, 0x0c, 0x4e, 0xc9, 0xa0, 0x0c, 0x4e, 0xc1, 0x9f, 0x0c, 0x4e, 0xb9,
+ 0x9e, 0x0c, 0x4e, 0xb1, 0x9d, 0x0c, 0x4e, 0xa8, 0xa6, 0x0c, 0x4e, 0xa1,
+ 0xa5, 0x0c, 0x4e, 0x99, 0xa4, 0x0c, 0x4e, 0x91, 0xa3, 0x0c, 0x4e, 0x89,
+ 0xa2, 0x0c, 0x4e, 0x81, 0xa1, 0x0c, 0x4e, 0x79, 0xa0, 0x0c, 0x4e, 0x71,
+ 0x9f, 0x0c, 0x4e, 0x69, 0x9e, 0x0c, 0x4e, 0x61, 0x9d, 0x0c, 0x4e, 0x58,
+ 0xa6, 0x0c, 0x4e, 0x51, 0xa5, 0x0c, 0x4e, 0x49, 0xa4, 0x0c, 0x4e, 0x41,
+ 0xa3, 0x0c, 0x4e, 0x39, 0xa2, 0x0c, 0x4e, 0x31, 0xa1, 0x0c, 0x4e, 0x29,
+ 0xa0, 0x0c, 0x4e, 0x21, 0x9f, 0x0c, 0x4e, 0x19, 0x9e, 0x0c, 0x4e, 0x11,
+ 0x9d, 0x0c, 0x4e, 0x08, 0xa6, 0x0c, 0x4e, 0x01, 0xa5, 0x0c, 0x4d, 0xf9,
+ 0xa4, 0x0c, 0x4d, 0xf1, 0xa3, 0x0c, 0x4d, 0xe9, 0xa2, 0x0c, 0x4d, 0xe1,
+ 0xa1, 0x0c, 0x4d, 0xd9, 0xa0, 0x0c, 0x4d, 0xd1, 0x9f, 0x0c, 0x4d, 0xc9,
+ 0x9e, 0x0c, 0x4d, 0xc1, 0x9d, 0x0c, 0x4d, 0xb8, 0xa6, 0x0c, 0x4d, 0xb1,
+ 0xa5, 0x0c, 0x4d, 0xa9, 0xa4, 0x0c, 0x4d, 0xa1, 0xa3, 0x0c, 0x4d, 0x99,
+ 0xa2, 0x0c, 0x4d, 0x91, 0xa1, 0x0c, 0x4d, 0x89, 0xa0, 0x0c, 0x4d, 0x81,
+ 0x9f, 0x0c, 0x4d, 0x79, 0x9e, 0x0c, 0x4d, 0x71, 0x9d, 0x0c, 0x4d, 0x68,
+ 0xa6, 0x0c, 0x4d, 0x61, 0xa5, 0x0c, 0x4d, 0x59, 0xa4, 0x0c, 0x4d, 0x51,
+ 0xa3, 0x0c, 0x4d, 0x49, 0xa2, 0x0c, 0x4d, 0x41, 0xa1, 0x0c, 0x4d, 0x39,
+ 0xa0, 0x0c, 0x4d, 0x31, 0x9f, 0x0c, 0x4d, 0x29, 0x9e, 0x0c, 0x4d, 0x21,
+ 0x9d, 0x0c, 0x4d, 0x18, 0xa6, 0x0c, 0x4d, 0x11, 0xa5, 0x0c, 0x4d, 0x09,
+ 0xa4, 0x0c, 0x4d, 0x01, 0xa3, 0x0c, 0x4c, 0xf9, 0xa2, 0x0c, 0x4c, 0xf1,
+ 0xa1, 0x0c, 0x4c, 0xe9, 0xa0, 0x0c, 0x4c, 0xe1, 0x9f, 0x0c, 0x4c, 0xd9,
+ 0x9e, 0x0c, 0x4c, 0xd1, 0x9d, 0x0c, 0x4c, 0xc8, 0xa6, 0x0c, 0x4c, 0xc1,
+ 0xa5, 0x0c, 0x4c, 0xb9, 0xa4, 0x0c, 0x4c, 0xb1, 0xa3, 0x0c, 0x4c, 0xa9,
+ 0xa2, 0x0c, 0x4c, 0xa1, 0xa1, 0x0c, 0x4c, 0x99, 0xa0, 0x0c, 0x4c, 0x91,
+ 0x9f, 0x0c, 0x4c, 0x89, 0x9e, 0x0c, 0x4c, 0x81, 0x9d, 0x0c, 0x4c, 0x78,
+ 0xa6, 0x0c, 0x4c, 0x71, 0xa5, 0x0c, 0x4c, 0x69, 0xa4, 0x0c, 0x4c, 0x61,
+ 0xa3, 0x0c, 0x4c, 0x59, 0xa2, 0x0c, 0x4c, 0x51, 0xa1, 0x0c, 0x4c, 0x49,
+ 0xa0, 0x0c, 0x4c, 0x41, 0x9f, 0x0c, 0x4c, 0x39, 0x9e, 0x0c, 0x4c, 0x31,
+ 0x9d, 0x0c, 0x4c, 0x28, 0xa6, 0x0c, 0x4c, 0x21, 0xa5, 0x0c, 0x4c, 0x19,
+ 0xa4, 0x0c, 0x4c, 0x11, 0xa3, 0x0c, 0x4c, 0x09, 0xa2, 0x0c, 0x4c, 0x01,
+ 0xa1, 0x0c, 0x4b, 0xf9, 0xa0, 0x0c, 0x4b, 0xf1, 0x9f, 0x0c, 0x4b, 0xe9,
+ 0x9e, 0x0c, 0x4b, 0xe1, 0x9d, 0x0c, 0x4b, 0xd8, 0xa6, 0x0c, 0x4b, 0xd1,
+ 0xa5, 0x0c, 0x4b, 0xc9, 0xa4, 0x0c, 0x4b, 0xc1, 0xa3, 0x0c, 0x4b, 0xb9,
+ 0xa2, 0x0c, 0x4b, 0xb1, 0xa1, 0x0c, 0x4b, 0xa9, 0xa0, 0x0c, 0x4b, 0xa1,
+ 0x9f, 0x0c, 0x4b, 0x99, 0x9e, 0x0c, 0x4b, 0x91, 0x9d, 0x0c, 0x4b, 0x88,
+ 0xa6, 0x0c, 0x4b, 0x81, 0xa5, 0x0c, 0x4b, 0x79, 0xa4, 0x0c, 0x4b, 0x71,
+ 0xa3, 0x0c, 0x4b, 0x69, 0xa2, 0x0c, 0x4b, 0x61, 0xa1, 0x0c, 0x4b, 0x59,
+ 0xa0, 0x0c, 0x4b, 0x51, 0x9f, 0x0c, 0x4b, 0x49, 0x9e, 0x0c, 0x4b, 0x41,
+ 0x9d, 0x0c, 0x4b, 0x38, 0xa6, 0x0c, 0x4b, 0x31, 0xa5, 0x0c, 0x4b, 0x29,
+ 0xa4, 0x0c, 0x4b, 0x21, 0xa3, 0x0c, 0x4b, 0x19, 0xa2, 0x0c, 0x4b, 0x11,
+ 0xa1, 0x0c, 0x4b, 0x09, 0xa0, 0x0c, 0x4b, 0x01, 0x9f, 0x0c, 0x4a, 0xf9,
+ 0x9e, 0x0c, 0x4a, 0xf1, 0x9d, 0x0c, 0x4a, 0xe8, 0xa6, 0x0c, 0x4a, 0xe1,
+ 0xa5, 0x0c, 0x4a, 0xd9, 0xa4, 0x0c, 0x4a, 0xd1, 0xa3, 0x0c, 0x4a, 0xc9,
+ 0xa2, 0x0c, 0x4a, 0xc1, 0xa1, 0x0c, 0x4a, 0xb9, 0xa0, 0x0c, 0x4a, 0xb1,
+ 0x9f, 0x0c, 0x4a, 0xa9, 0x9e, 0x0c, 0x4a, 0xa1, 0x9d, 0x0c, 0x4a, 0x98,
+ 0xa6, 0x0c, 0x4a, 0x91, 0xa5, 0x0c, 0x4a, 0x89, 0xa4, 0x0c, 0x4a, 0x81,
+ 0xa3, 0x0c, 0x4a, 0x79, 0xa2, 0x0c, 0x4a, 0x71, 0xa1, 0x0c, 0x4a, 0x69,
+ 0xa0, 0x0c, 0x4a, 0x61, 0x9f, 0x0c, 0x4a, 0x59, 0x9e, 0x0c, 0x4a, 0x51,
+ 0x9d, 0x0c, 0x4a, 0x48, 0xa6, 0x0c, 0x4a, 0x41, 0xa5, 0x0c, 0x4a, 0x39,
+ 0xa4, 0x0c, 0x4a, 0x31, 0xa3, 0x0c, 0x4a, 0x29, 0xa2, 0x0c, 0x4a, 0x21,
+ 0xa1, 0x0c, 0x4a, 0x19, 0xa0, 0x0c, 0x4a, 0x11, 0x9f, 0x0c, 0x4a, 0x09,
+ 0x9e, 0x0c, 0x4a, 0x01, 0x9d, 0x0c, 0x49, 0xf8, 0xa6, 0x0c, 0x49, 0xf1,
+ 0xa5, 0x0c, 0x49, 0xe9, 0xa4, 0x0c, 0x49, 0xe1, 0xa3, 0x0c, 0x49, 0xd9,
+ 0xa2, 0x0c, 0x49, 0xd1, 0xa1, 0x0c, 0x49, 0xc9, 0xa0, 0x0c, 0x49, 0xc1,
+ 0x9f, 0x0c, 0x49, 0xb9, 0x9e, 0x0c, 0x49, 0xb1, 0x9d, 0x0c, 0x49, 0xa8,
+ 0xa6, 0x0c, 0x49, 0xa1, 0xa5, 0x0c, 0x49, 0x99, 0xa4, 0x0c, 0x49, 0x91,
+ 0xa3, 0x0c, 0x49, 0x89, 0xa2, 0x0c, 0x49, 0x81, 0xa1, 0x0c, 0x49, 0x79,
+ 0xa0, 0x0c, 0x49, 0x71, 0x9f, 0x0c, 0x49, 0x69, 0x9e, 0x0c, 0x49, 0x61,
+ 0x9d, 0x0c, 0x49, 0x58, 0xa6, 0x0c, 0x49, 0x51, 0xa5, 0x0c, 0x49, 0x49,
+ 0xa4, 0x0c, 0x49, 0x41, 0xa3, 0x0c, 0x49, 0x39, 0xa2, 0x0c, 0x49, 0x31,
+ 0xa1, 0x0c, 0x49, 0x29, 0xa0, 0x0c, 0x49, 0x21, 0x9f, 0x0c, 0x49, 0x19,
+ 0x9e, 0x0c, 0x49, 0x11, 0x9d, 0x0c, 0x49, 0x08, 0xa6, 0x0c, 0x49, 0x01,
+ 0xa5, 0x0c, 0x48, 0xf9, 0xa4, 0x0c, 0x48, 0xf1, 0xa3, 0x0c, 0x48, 0xe9,
+ 0xa2, 0x0c, 0x48, 0xe1, 0xa1, 0x0c, 0x48, 0xd9, 0xa0, 0x0c, 0x48, 0xd1,
+ 0x9f, 0x0c, 0x48, 0xc9, 0x9e, 0x0c, 0x48, 0xc1, 0x9d, 0x0c, 0x48, 0xb8,
+ 0xa6, 0x0c, 0x48, 0xb1, 0xa5, 0x0c, 0x48, 0xa9, 0xa4, 0x0c, 0x48, 0xa1,
+ 0xa3, 0x0c, 0x48, 0x99, 0xa2, 0x0c, 0x48, 0x91, 0xa1, 0x0c, 0x48, 0x89,
+ 0xa0, 0x0c, 0x48, 0x81, 0x9f, 0x0c, 0x48, 0x79, 0x9e, 0x0c, 0x48, 0x71,
+ 0x9d, 0x0c, 0x48, 0x68, 0xa6, 0x0c, 0x48, 0x61, 0xa5, 0x0c, 0x48, 0x59,
+ 0xa4, 0x0c, 0x48, 0x51, 0xa3, 0x0c, 0x48, 0x49, 0xa2, 0x0c, 0x48, 0x41,
+ 0xa1, 0x0c, 0x48, 0x39, 0xa0, 0x0c, 0x48, 0x31, 0x9f, 0x0c, 0x48, 0x29,
+ 0x9e, 0x0c, 0x48, 0x21, 0x9d, 0x0c, 0x48, 0x18, 0xa6, 0x0c, 0x48, 0x11,
+ 0xa5, 0x0c, 0x48, 0x09, 0xa4, 0x0c, 0x48, 0x01, 0xa3, 0x0c, 0x47, 0xf9,
+ 0xa2, 0x0c, 0x47, 0xf1, 0xa1, 0x0c, 0x47, 0xe9, 0xa0, 0x0c, 0x47, 0xe1,
+ 0x9f, 0x0c, 0x47, 0xd9, 0x9e, 0x0c, 0x47, 0xd1, 0x9d, 0x0c, 0x47, 0xc8,
+ 0xa6, 0x0c, 0x47, 0xc1, 0xa5, 0x0c, 0x47, 0xb9, 0xa4, 0x0c, 0x47, 0xb1,
+ 0xa3, 0x0c, 0x47, 0xa9, 0xa2, 0x0c, 0x47, 0xa1, 0xa1, 0x0c, 0x47, 0x99,
+ 0xa0, 0x0c, 0x47, 0x91, 0x9f, 0x0c, 0x47, 0x89, 0x9e, 0x0c, 0x47, 0x81,
+ 0x9d, 0x0c, 0x47, 0x78, 0xa6, 0x0c, 0x47, 0x71, 0xa5, 0x0c, 0x47, 0x69,
+ 0xa4, 0x0c, 0x47, 0x61, 0xa3, 0x0c, 0x47, 0x59, 0xa2, 0x0c, 0x47, 0x51,
+ 0xa1, 0x0c, 0x47, 0x49, 0xa0, 0x0c, 0x47, 0x41, 0x9f, 0x0c, 0x47, 0x39,
+ 0x9e, 0x0c, 0x47, 0x31, 0x9d, 0x0c, 0x47, 0x28, 0xa6, 0x0c, 0x47, 0x21,
+ 0xa5, 0x0c, 0x47, 0x19, 0xa4, 0x0c, 0x47, 0x11, 0xa3, 0x0c, 0x47, 0x09,
+ 0xa2, 0x0c, 0x47, 0x01, 0xa1, 0x0c, 0x46, 0xf9, 0xa0, 0x0c, 0x46, 0xf1,
+ 0x9f, 0x0c, 0x46, 0xe9, 0x9e, 0x0c, 0x46, 0xe1, 0x9d, 0x0c, 0x46, 0xd8,
+ 0xa6, 0x0c, 0x46, 0xd1, 0xa5, 0x0c, 0x46, 0xc9, 0xa4, 0x0c, 0x46, 0xc1,
+ 0xa3, 0x0c, 0x46, 0xb9, 0xa2, 0x0c, 0x46, 0xb1, 0xa1, 0x0c, 0x46, 0xa9,
+ 0xa0, 0x0c, 0x46, 0xa1, 0x9f, 0x0c, 0x46, 0x99, 0x9e, 0x0c, 0x46, 0x91,
+ 0x9d, 0x0c, 0x46, 0x88, 0xa6, 0x0c, 0x46, 0x81, 0xa5, 0x0c, 0x46, 0x79,
+ 0xa4, 0x0c, 0x46, 0x71, 0xa3, 0x0c, 0x46, 0x69, 0xa2, 0x0c, 0x46, 0x61,
+ 0xa1, 0x0c, 0x46, 0x59, 0xa0, 0x0c, 0x46, 0x51, 0x9f, 0x0c, 0x46, 0x49,
+ 0x9e, 0x0c, 0x46, 0x41, 0x9d, 0x0c, 0x46, 0x38, 0xa6, 0x0c, 0x46, 0x31,
+ 0xa5, 0x0c, 0x46, 0x29, 0xa4, 0x0c, 0x46, 0x21, 0xa3, 0x0c, 0x46, 0x19,
+ 0xa2, 0x0c, 0x46, 0x11, 0xa1, 0x0c, 0x46, 0x09, 0xa0, 0x0c, 0x46, 0x01,
+ 0x9f, 0x0c, 0x45, 0xf9, 0x9e, 0x0c, 0x45, 0xf1, 0x9d, 0x0c, 0x45, 0xe8,
+ 0xa6, 0x0c, 0x45, 0xe1, 0xa5, 0x0c, 0x45, 0xd9, 0xa4, 0x0c, 0x45, 0xd1,
+ 0xa3, 0x0c, 0x45, 0xc9, 0xa2, 0x0c, 0x45, 0xc1, 0xa1, 0x0c, 0x45, 0xb9,
+ 0xa0, 0x0c, 0x45, 0xb1, 0x9f, 0x0c, 0x45, 0xa9, 0x9e, 0x0c, 0x45, 0xa1,
+ 0x9d, 0x0c, 0x45, 0x98, 0xa6, 0x0c, 0x45, 0x91, 0xa5, 0x0c, 0x45, 0x89,
+ 0xa4, 0x0c, 0x45, 0x81, 0xa3, 0x0c, 0x45, 0x79, 0xa2, 0x0c, 0x45, 0x71,
+ 0xa1, 0x0c, 0x45, 0x69, 0xa0, 0x0c, 0x45, 0x61, 0x9f, 0x0c, 0x45, 0x59,
+ 0x9e, 0x0c, 0x45, 0x51, 0x9d, 0x0c, 0x45, 0x48, 0xa6, 0x0c, 0x45, 0x41,
+ 0xa5, 0x0c, 0x45, 0x39, 0xa4, 0x0c, 0x45, 0x31, 0xa3, 0x0c, 0x45, 0x29,
+ 0xa2, 0x0c, 0x45, 0x21, 0xa1, 0x0c, 0x45, 0x19, 0xa0, 0x0c, 0x45, 0x11,
+ 0x9f, 0x0c, 0x45, 0x09, 0x9e, 0x0c, 0x45, 0x01, 0x9d, 0x0c, 0x44, 0xf8,
+ 0xa6, 0x0c, 0x44, 0xf1, 0xa5, 0x0c, 0x44, 0xe9, 0xa4, 0x0c, 0x44, 0xe1,
+ 0xa3, 0x0c, 0x44, 0xd9, 0xa2, 0x0c, 0x44, 0xd1, 0xa1, 0x0c, 0x44, 0xc9,
+ 0xa0, 0x0c, 0x44, 0xc1, 0x9f, 0x0c, 0x44, 0xb9, 0x9e, 0x0c, 0x44, 0xb1,
+ 0x9d, 0x0c, 0x44, 0xa8, 0xa6, 0x0c, 0x44, 0xa1, 0xa5, 0x0c, 0x44, 0x99,
+ 0xa4, 0x0c, 0x44, 0x91, 0xa3, 0x0c, 0x44, 0x89, 0xa2, 0x0c, 0x44, 0x81,
+ 0xa1, 0x0c, 0x44, 0x79, 0xa0, 0x0c, 0x44, 0x71, 0x9f, 0x0c, 0x44, 0x69,
+ 0x9e, 0x0c, 0x44, 0x61, 0x9d, 0x0c, 0x44, 0x58, 0xa6, 0x0c, 0x44, 0x51,
+ 0xa5, 0x0c, 0x44, 0x49, 0xa4, 0x0c, 0x44, 0x41, 0xa3, 0x0c, 0x44, 0x39,
+ 0xa2, 0x0c, 0x44, 0x31, 0xa1, 0x0c, 0x44, 0x29, 0xa0, 0x0c, 0x44, 0x21,
+ 0x9f, 0x0c, 0x44, 0x19, 0x9e, 0x0c, 0x44, 0x11, 0x9d, 0x0c, 0x44, 0x08,
+ 0xa6, 0x0c, 0x44, 0x01, 0xa5, 0x0c, 0x43, 0xf9, 0xa4, 0x0c, 0x43, 0xf1,
+ 0xa3, 0x0c, 0x43, 0xe9, 0xa2, 0x0c, 0x43, 0xe1, 0xa1, 0x0c, 0x43, 0xd9,
+ 0xa0, 0x0c, 0x43, 0xd1, 0x9f, 0x0c, 0x43, 0xc9, 0x9e, 0x0c, 0x43, 0xc1,
+ 0x9d, 0x0c, 0x43, 0xb8, 0xa6, 0x0c, 0x43, 0xb1, 0xa5, 0x0c, 0x43, 0xa9,
+ 0xa4, 0x0c, 0x43, 0xa1, 0xa3, 0x0c, 0x43, 0x99, 0xa2, 0x0c, 0x43, 0x91,
+ 0xa1, 0x0c, 0x43, 0x89, 0xa0, 0x0c, 0x43, 0x81, 0x9f, 0x0c, 0x43, 0x79,
+ 0x9e, 0x0c, 0x43, 0x71, 0x9d, 0x0c, 0x43, 0x68, 0xa6, 0x0c, 0x43, 0x61,
+ 0xa5, 0x0c, 0x43, 0x59, 0xa4, 0x0c, 0x43, 0x51, 0xa3, 0x0c, 0x43, 0x49,
+ 0xa2, 0x0c, 0x43, 0x41, 0xa1, 0x0c, 0x43, 0x39, 0xa0, 0x0c, 0x43, 0x31,
+ 0x9f, 0x0c, 0x43, 0x29, 0x9e, 0x0c, 0x43, 0x21, 0x9d, 0x0c, 0x43, 0x18,
+ 0xa6, 0x0c, 0x43, 0x11, 0xa5, 0x0c, 0x43, 0x09, 0xa4, 0x0c, 0x43, 0x01,
+ 0xa3, 0x0c, 0x42, 0xf9, 0xa2, 0x0c, 0x42, 0xf1, 0xa1, 0x0c, 0x42, 0xe9,
+ 0xa0, 0x0c, 0x42, 0xe1, 0x9f, 0x0c, 0x42, 0xd9, 0x9e, 0x0c, 0x42, 0xd1,
+ 0x9d, 0x0c, 0x42, 0xc8, 0xa6, 0x0c, 0x42, 0xc1, 0xa5, 0x0c, 0x42, 0xb9,
+ 0xa4, 0x0c, 0x42, 0xb1, 0xa3, 0x0c, 0x42, 0xa9, 0xa2, 0x0c, 0x42, 0xa1,
+ 0xa1, 0x0c, 0x42, 0x99, 0xa0, 0x0c, 0x42, 0x91, 0x9f, 0x0c, 0x42, 0x89,
+ 0x9e, 0x0c, 0x42, 0x81, 0x9d, 0x0c, 0x42, 0x78, 0xa6, 0x0c, 0x42, 0x71,
+ 0xa5, 0x0c, 0x42, 0x69, 0xa4, 0x0c, 0x42, 0x61, 0xa3, 0x0c, 0x42, 0x59,
+ 0xa2, 0x0c, 0x42, 0x51, 0xa1, 0x0c, 0x42, 0x49, 0xa0, 0x0c, 0x42, 0x41,
+ 0x9f, 0x0c, 0x42, 0x39, 0x9e, 0x0c, 0x42, 0x31, 0x9d, 0x0c, 0x42, 0x28,
+ 0xa6, 0x0c, 0x42, 0x21, 0xa5, 0x0c, 0x42, 0x19, 0xa4, 0x0c, 0x42, 0x11,
+ 0xa3, 0x0c, 0x42, 0x09, 0xa2, 0x0c, 0x42, 0x01, 0xa1, 0x0c, 0x41, 0xf9,
+ 0xa0, 0x0c, 0x41, 0xf1, 0x9f, 0x0c, 0x41, 0xe9, 0x9e, 0x0c, 0x41, 0xe1,
+ 0x9d, 0x0c, 0x41, 0xd8, 0xa6, 0x0c, 0x41, 0xd1, 0xa5, 0x0c, 0x41, 0xc9,
+ 0xa4, 0x0c, 0x41, 0xc1, 0xa3, 0x0c, 0x41, 0xb9, 0xa2, 0x0c, 0x41, 0xb1,
+ 0xa1, 0x0c, 0x41, 0xa9, 0xa0, 0x0c, 0x41, 0xa1, 0x9f, 0x0c, 0x41, 0x99,
+ 0x9e, 0x0c, 0x41, 0x91, 0x9d, 0x0c, 0x41, 0x88, 0xa6, 0x0c, 0x41, 0x81,
+ 0xa5, 0x0c, 0x41, 0x79, 0xa4, 0x0c, 0x41, 0x71, 0xa3, 0x0c, 0x41, 0x69,
+ 0xa2, 0x0c, 0x41, 0x61, 0xa1, 0x0c, 0x41, 0x59, 0xa0, 0x0c, 0x41, 0x51,
+ 0x9f, 0x0c, 0x41, 0x49, 0x9e, 0x0c, 0x41, 0x41, 0x9d, 0x0c, 0x41, 0x38,
+ 0xa6, 0x0c, 0x41, 0x31, 0xa5, 0x0c, 0x41, 0x29, 0xa4, 0x0c, 0x41, 0x21,
+ 0xa3, 0x0c, 0x41, 0x19, 0xa2, 0x0c, 0x41, 0x11, 0xa1, 0x0c, 0x41, 0x09,
+ 0xa0, 0x0c, 0x41, 0x01, 0x9f, 0x0c, 0x40, 0xf9, 0x9e, 0x0c, 0x40, 0xf1,
+ 0x9d, 0x0c, 0x40, 0xe8, 0xa6, 0x0c, 0x40, 0xe1, 0xa5, 0x0c, 0x40, 0xd9,
+ 0xa4, 0x0c, 0x40, 0xd1, 0xa3, 0x0c, 0x40, 0xc9, 0xa2, 0x0c, 0x40, 0xc1,
+ 0xa1, 0x0c, 0x40, 0xb9, 0xa0, 0x0c, 0x40, 0xb1, 0x9f, 0x0c, 0x40, 0xa9,
+ 0x9e, 0x0c, 0x40, 0xa1, 0x9d, 0x0c, 0x40, 0x98, 0xa6, 0x0c, 0x40, 0x91,
+ 0xa5, 0x0c, 0x40, 0x89, 0xa4, 0x0c, 0x40, 0x81, 0xa3, 0x0c, 0x40, 0x79,
+ 0xa2, 0x0c, 0x40, 0x71, 0xa1, 0x0c, 0x40, 0x69, 0xa0, 0x0c, 0x40, 0x61,
+ 0x9f, 0x0c, 0x40, 0x59, 0x9e, 0x0c, 0x40, 0x51, 0x9d, 0x0c, 0x40, 0x48,
+ 0xa6, 0x0c, 0x40, 0x41, 0xa5, 0x0c, 0x40, 0x39, 0xa4, 0x0c, 0x40, 0x31,
+ 0xa3, 0x0c, 0x40, 0x29, 0xa2, 0x0c, 0x40, 0x21, 0xa1, 0x0c, 0x40, 0x19,
+ 0xa0, 0x0c, 0x40, 0x11, 0x9f, 0x0c, 0x40, 0x09, 0x9e, 0x0c, 0x40, 0x00,
+ 0xc2, 0x01, 0x6f, 0x0b, 0x55, 0xc1, 0x83, 0x0b, 0x55, 0x78, 0x83, 0x0b,
+ 0x55, 0xa1, 0x44, 0x2e, 0xf0, 0x43, 0x1a, 0x12, 0x17, 0xc3, 0x1a, 0x1e,
+ 0x9a, 0x0b, 0x54, 0x79, 0x93, 0x0b, 0x54, 0x71, 0x85, 0x0b, 0x54, 0x69,
+ 0x9c, 0x0b, 0x54, 0x60, 0x9a, 0x0b, 0x54, 0xb9, 0x93, 0x0b, 0x54, 0xb1,
+ 0x9c, 0x0b, 0x54, 0xa9, 0x85, 0x0b, 0x54, 0xa0, 0x9a, 0x0b, 0x54, 0x59,
+ 0x93, 0x0b, 0x54, 0x51, 0x85, 0x0b, 0x54, 0x49, 0x9c, 0x0b, 0x54, 0x40,
+ 0xc8, 0xb5, 0x2a, 0x08, 0xff, 0x89, 0xc6, 0xce, 0x27, 0x08, 0xff, 0x00,
+ 0xc5, 0x40, 0xe7, 0x00, 0x5c, 0x19, 0xc4, 0x1e, 0x97, 0x00, 0x5e, 0x68,
+ 0xc3, 0x7c, 0xc4, 0x08, 0xff, 0x11, 0xc4, 0xc9, 0xed, 0x08, 0xfe, 0xd0,
+ 0xc4, 0x70, 0x1e, 0x08, 0xff, 0x09, 0xc3, 0x00, 0xc1, 0x08, 0xfe, 0xf1,
+ 0xc6, 0xd1, 0x27, 0x08, 0xfe, 0xd8, 0x83, 0x00, 0x5d, 0x19, 0xc2, 0x00,
+ 0xc1, 0x00, 0x5d, 0x48, 0x83, 0x00, 0x5d, 0x99, 0xc2, 0x00, 0xdb, 0x00,
+ 0x5d, 0xa0, 0xcb, 0x8b, 0xe9, 0x08, 0xfe, 0x29, 0xd9, 0x1e, 0x05, 0x08,
+ 0xfe, 0x00, 0x9f, 0x08, 0xfe, 0x51, 0x9e, 0x08, 0xfe, 0x48, 0xa2, 0x00,
+ 0xd3, 0xc9, 0xa1, 0x00, 0xd3, 0xc1, 0xa0, 0x00, 0xd3, 0xb8, 0xc2, 0x00,
+ 0xdb, 0x00, 0xd2, 0xb1, 0xc2, 0x00, 0x39, 0x00, 0xd2, 0xa8, 0xc2, 0x00,
+ 0xd0, 0x00, 0xd1, 0xe9, 0x83, 0x00, 0xd1, 0xd8, 0xc2, 0x00, 0xd0, 0x00,
+ 0xd1, 0xa9, 0x83, 0x00, 0xd1, 0xa0, 0xc2, 0x00, 0xd0, 0x00, 0xd1, 0x59,
+ 0x83, 0x00, 0xd1, 0x48, 0xc2, 0x00, 0xd0, 0x00, 0xd1, 0x29, 0xc2, 0x8d,
+ 0x8f, 0x00, 0xd1, 0x21, 0x83, 0x00, 0xd1, 0x18, 0xc2, 0x01, 0x23, 0x05,
+ 0x54, 0x29, 0x91, 0x05, 0x54, 0x18, 0xc2, 0x01, 0x23, 0x05, 0x54, 0x21,
+ 0x91, 0x05, 0x54, 0x10, 0x00, 0xc3, 0x1a, 0x2e, 0xc3, 0x9b, 0x00, 0x00,
+ 0x72, 0xd8, 0xc2, 0x00, 0xc4, 0x00, 0x70, 0x99, 0x97, 0x00, 0x70, 0xc8,
+ 0x89, 0x00, 0x70, 0x50, 0x15, 0xc3, 0x1a, 0x3a, 0xc4, 0xde, 0xf3, 0x00,
+ 0x71, 0x48, 0x83, 0x00, 0x71, 0x83, 0x03, 0x1a, 0x4a, 0x8b, 0x00, 0x71,
+ 0xa3, 0x03, 0x1a, 0x5c, 0x97, 0x00, 0x71, 0xc3, 0x03, 0x1a, 0x60, 0x87,
+ 0x00, 0x72, 0x01, 0x91, 0x00, 0x72, 0x10, 0xc3, 0x00, 0x74, 0x00, 0x70,
+ 0x69, 0xc2, 0x06, 0x4e, 0x00, 0x71, 0x10, 0xc5, 0xd4, 0x25, 0x00, 0x70,
+ 0x79, 0xc3, 0x97, 0x59, 0x00, 0x70, 0xa8, 0x42, 0x01, 0x7c, 0xc3, 0x1a,
+ 0x6b, 0xc9, 0xb1, 0x3a, 0x00, 0x72, 0x60, 0x42, 0x01, 0x7c, 0xc3, 0x1a,
+ 0x7d, 0xc5, 0xd4, 0x2f, 0x00, 0x71, 0xd0, 0x90, 0x00, 0x70, 0xf8, 0x00,
+ 0xc3, 0x1a, 0x89, 0xc5, 0xd4, 0x98, 0x00, 0x72, 0x31, 0xc6, 0xd3, 0x07,
+ 0x00, 0x72, 0x38, 0xc4, 0x04, 0x15, 0x00, 0x71, 0x29, 0xc5, 0xdb, 0xf5,
+ 0x00, 0x71, 0x60, 0x91, 0x0f, 0x15, 0x48, 0x97, 0x0f, 0x15, 0x20, 0x94,
+ 0x00, 0x60, 0x5b, 0x03, 0x1a, 0x9f, 0x8e, 0x00, 0x60, 0x62, 0x03, 0x1a,
+ 0xa3, 0xcb, 0x90, 0x44, 0x00, 0x62, 0xe8, 0x83, 0x00, 0x60, 0xf9, 0xc2,
+ 0x00, 0xd0, 0x00, 0x61, 0x00, 0x83, 0x00, 0x61, 0x09, 0xc2, 0x00, 0xd0,
+ 0x00, 0x61, 0x10, 0x83, 0x00, 0x61, 0x89, 0xc2, 0x00, 0x39, 0x00, 0x62,
+ 0xd0, 0x83, 0x00, 0x61, 0x99, 0xc2, 0x00, 0xdb, 0x00, 0x61, 0xa0, 0x8e,
+ 0x08, 0xa4, 0x50, 0x94, 0x08, 0xa4, 0x40, 0xcb, 0x97, 0x19, 0x00, 0x7e,
+ 0x51, 0xcb, 0x8f, 0x47, 0x00, 0x7e, 0x59, 0xcb, 0x97, 0xd4, 0x00, 0x7e,
+ 0x60, 0x09, 0xc3, 0x1a, 0xa7, 0xc8, 0xbc, 0x82, 0x00, 0x78, 0xf8, 0x09,
+ 0xc3, 0x1a, 0xb9, 0xc9, 0xa9, 0xbd, 0x00, 0x7e, 0x70, 0x83, 0x00, 0x7c,
+ 0xd1, 0xc2, 0x00, 0xd0, 0x00, 0x7c, 0xd8, 0x83, 0x00, 0x7d, 0x49, 0xc2,
+ 0x00, 0xd0, 0x00, 0x7d, 0x50, 0x83, 0x00, 0x7c, 0xe1, 0xc2, 0x00, 0xd0,
+ 0x00, 0x7c, 0xe8, 0x83, 0x00, 0x7d, 0x59, 0xc2, 0x00, 0xd0, 0x00, 0x7d,
+ 0x60, 0xcc, 0x85, 0x05, 0x00, 0x78, 0x11, 0xcd, 0x75, 0x99, 0x00, 0x78,
+ 0x18, 0x8a, 0x01, 0x69, 0xa0, 0x8a, 0x01, 0x69, 0xd0, 0x8a, 0x01, 0x69,
+ 0xf8, 0x4d, 0x06, 0x5a, 0xc3, 0x1a, 0xcb, 0x45, 0x19, 0x60, 0xc3, 0x1a,
+ 0xd7, 0x44, 0x19, 0x6a, 0xc3, 0x1a, 0xe1, 0x44, 0x2b, 0xb9, 0x43, 0x1a,
+ 0xeb, 0x44, 0x2b, 0xb9, 0xc3, 0x1a, 0xf7, 0x4d, 0x06, 0x5a, 0xc3, 0x1b,
+ 0x03, 0x45, 0x19, 0x60, 0xc3, 0x1b, 0x0f, 0x45, 0x30, 0xc1, 0x43, 0x1b,
+ 0x19, 0xd1, 0x4f, 0xad, 0x07, 0xe2, 0xa1, 0xda, 0x1c, 0xba, 0x07, 0xe2,
+ 0x99, 0x45, 0x19, 0x60, 0xc3, 0x1b, 0x23, 0x46, 0x30, 0xc1, 0xc3, 0x1b,
+ 0x2d, 0xdd, 0x10, 0xa3, 0x07, 0xe6, 0xc8, 0x49, 0xb2, 0x6c, 0xc3, 0x1b,
+ 0x39, 0x4a, 0xa7, 0xe2, 0x43, 0x1b, 0x61, 0x4d, 0x06, 0x5a, 0xc3, 0x1b,
+ 0x79, 0x45, 0x19, 0x60, 0xc3, 0x1b, 0x85, 0x45, 0x50, 0xf0, 0xc3, 0x1b,
+ 0x95, 0x0a, 0xc3, 0x1b, 0xa5, 0x45, 0x30, 0xc1, 0xc3, 0x1b, 0xb1, 0x44,
+ 0x72, 0xf0, 0xc3, 0x1b, 0xc1, 0x44, 0x2b, 0xb9, 0x43, 0x1b, 0xcd, 0x47,
+ 0x06, 0xb4, 0xc3, 0x1b, 0xd9, 0x0e, 0x43, 0x1b, 0xfd, 0xcd, 0x00, 0xfa,
+ 0x07, 0xe7, 0xd1, 0xca, 0x26, 0xf7, 0x07, 0xe8, 0xb0, 0x0b, 0xc3, 0x1c,
+ 0x07, 0x45, 0x00, 0x8c, 0x43, 0x1c, 0x13, 0xcc, 0x00, 0xfb, 0x07, 0xe1,
+ 0x59, 0xcb, 0x10, 0xb5, 0x07, 0xe5, 0xe0, 0xca, 0x26, 0xf7, 0x07, 0xe8,
+ 0xa9, 0xcd, 0x00, 0xfa, 0x07, 0xe7, 0xc8, 0x4d, 0x06, 0x5a, 0xc3, 0x1c,
+ 0x25, 0x45, 0x19, 0x60, 0xc3, 0x1c, 0x31, 0x45, 0x30, 0xc1, 0xc3, 0x1c,
+ 0x3b, 0x44, 0x2b, 0xb9, 0x43, 0x1c, 0x45, 0x43, 0x06, 0x5c, 0xc3, 0x1c,
+ 0x51, 0x43, 0x14, 0x6d, 0xc3, 0x1c, 0x5d, 0xd1, 0x51, 0x9a, 0x07, 0xef,
+ 0x90, 0x47, 0x0e, 0x9d, 0xc3, 0x1c, 0x6d, 0xd2, 0x47, 0x81, 0x07, 0xea,
+ 0x70, 0x48, 0xab, 0xf5, 0xc3, 0x1c, 0x85, 0x46, 0x38, 0xb9, 0x43, 0x1c,
+ 0xb5, 0x44, 0x2b, 0xb9, 0xc3, 0x1c, 0xbb, 0x4d, 0x06, 0x5a, 0xc3, 0x1c,
+ 0xc7, 0xcf, 0x60, 0x8a, 0x07, 0xe3, 0x99, 0x45, 0x19, 0x60, 0xc3, 0x1c,
+ 0xd3, 0xcf, 0x69, 0x81, 0x07, 0xe3, 0x89, 0xce, 0x72, 0xf0, 0x07, 0xe3,
+ 0x81, 0x45, 0x50, 0xf0, 0xc3, 0x1c, 0xe9, 0x0a, 0xc3, 0x1c, 0xf3, 0x45,
+ 0x30, 0xc1, 0x43, 0x1c, 0xff, 0x43, 0x2b, 0xba, 0xc3, 0x1d, 0x09, 0x03,
+ 0x43, 0x1d, 0x15, 0xcb, 0x64, 0x7b, 0x07, 0xe7, 0x81, 0x0b, 0xc3, 0x1d,
+ 0x21, 0xca, 0x26, 0xf7, 0x07, 0xe4, 0x99, 0x45, 0x00, 0x8c, 0x43, 0x1d,
+ 0x2d, 0xcd, 0x00, 0xfa, 0x07, 0xe2, 0xd1, 0xca, 0x26, 0xf7, 0x07, 0xe4,
+ 0xb0, 0xcd, 0x00, 0xfa, 0x07, 0xe2, 0xc9, 0xca, 0x26, 0xf7, 0x07, 0xe4,
+ 0xa8, 0xcc, 0x00, 0xfb, 0x07, 0xe2, 0xb9, 0xcb, 0x10, 0xb5, 0x07, 0xe6,
+ 0xe0, 0x0b, 0xc3, 0x1d, 0x39, 0xd3, 0x43, 0x72, 0x07, 0xed, 0x78, 0x43,
+ 0x2b, 0xba, 0xc3, 0x1d, 0x45, 0x43, 0x02, 0x98, 0x43, 0x1d, 0x51, 0xcd,
+ 0x00, 0xfa, 0x07, 0xe2, 0x81, 0xca, 0x26, 0xf7, 0x07, 0xe4, 0x78, 0xcd,
+ 0x00, 0xfa, 0x07, 0xe2, 0x79, 0xca, 0x26, 0xf7, 0x07, 0xe4, 0x70, 0x0b,
+ 0xc3, 0x1d, 0x5b, 0xca, 0x26, 0xf7, 0x07, 0xe4, 0x61, 0x45, 0x00, 0x8c,
+ 0xc3, 0x1d, 0x67, 0xcb, 0x64, 0x7b, 0x07, 0xe7, 0x70, 0xcc, 0x00, 0xfb,
+ 0x07, 0xe2, 0x69, 0xcb, 0x10, 0xb5, 0x07, 0xe6, 0xa0, 0x0b, 0xc3, 0x1d,
+ 0x73, 0x45, 0x00, 0x8c, 0x43, 0x1d, 0x7f, 0x45, 0x19, 0x60, 0xc3, 0x1d,
+ 0x97, 0x44, 0x0d, 0xff, 0xc3, 0x1d, 0xad, 0x44, 0x2b, 0xb9, 0xc3, 0x1d,
+ 0xbd, 0x45, 0x06, 0x5a, 0xc3, 0x1d, 0xc9, 0x46, 0x50, 0xf0, 0xc3, 0x1d,
+ 0xdb, 0x45, 0x50, 0xf1, 0xc3, 0x1d, 0xe7, 0x46, 0x30, 0xc1, 0x43, 0x1d,
+ 0xf3, 0x46, 0x50, 0x13, 0xc3, 0x1d, 0xff, 0xd1, 0x54, 0x31, 0x07, 0xe0,
+ 0xd1, 0x46, 0x30, 0xc1, 0xc3, 0x1e, 0x0b, 0x4d, 0x06, 0x5a, 0xc3, 0x1e,
+ 0x17, 0x44, 0x2b, 0xb9, 0x43, 0x1e, 0x23, 0xca, 0x26, 0xf7, 0x07, 0xe4,
+ 0x39, 0xcd, 0x00, 0xfa, 0x07, 0xe2, 0x20, 0x48, 0x06, 0x5f, 0xc3, 0x1e,
+ 0x2f, 0x45, 0x00, 0x8c, 0xc3, 0x1e, 0x3b, 0xcd, 0x00, 0xfa, 0x07, 0xf7,
+ 0xd9, 0xca, 0x26, 0xf7, 0x07, 0xf7, 0xe0, 0xca, 0x26, 0xf7, 0x07, 0xe4,
+ 0x29, 0x0b, 0xc3, 0x1e, 0x47, 0xcb, 0x64, 0x7b, 0x07, 0xe7, 0x69, 0x45,
+ 0x00, 0x8c, 0x43, 0x1e, 0x53, 0x0b, 0xc3, 0x1e, 0x5f, 0x4a, 0x74, 0x6e,
+ 0x43, 0x1e, 0x6b, 0x43, 0x02, 0x98, 0xc3, 0x1e, 0x77, 0xcf, 0x64, 0xef,
+ 0x07, 0xe6, 0x68, 0x0b, 0xc3, 0x1e, 0x81, 0x45, 0x00, 0x8c, 0x43, 0x1e,
+ 0x8d, 0x47, 0x0f, 0x9c, 0xc3, 0x1e, 0x9f, 0x4a, 0xa6, 0xca, 0x43, 0x1e,
+ 0xb7, 0xca, 0x26, 0xf7, 0x07, 0xe3, 0xe9, 0xcd, 0x00, 0xfa, 0x07, 0xe1,
+ 0x90, 0xca, 0x26, 0xf7, 0x07, 0xe3, 0xe1, 0xcd, 0x00, 0xfa, 0x07, 0xe1,
+ 0x88, 0x0b, 0xc3, 0x1e, 0xbd, 0xd3, 0x43, 0x72, 0x07, 0xee, 0x08, 0x0b,
+ 0xc3, 0x1e, 0xc9, 0x4a, 0x74, 0x6e, 0x43, 0x1e, 0xd5, 0xcc, 0x00, 0xfb,
+ 0x07, 0xe1, 0x71, 0xcb, 0x10, 0xb5, 0x07, 0xe5, 0xf8, 0xcc, 0x00, 0xfb,
+ 0x07, 0xe1, 0x69, 0xcb, 0x10, 0xb5, 0x07, 0xe5, 0xf0, 0x44, 0x2b, 0xb9,
+ 0xc3, 0x1e, 0xe1, 0x4d, 0x06, 0x5a, 0xc3, 0x1e, 0xed, 0xcf, 0x60, 0x8a,
+ 0x07, 0xe3, 0x69, 0x45, 0x19, 0x60, 0xc3, 0x1e, 0xf9, 0xcf, 0x69, 0x81,
+ 0x07, 0xe3, 0x59, 0xce, 0x72, 0xf0, 0x07, 0xe3, 0x51, 0x45, 0x50, 0xf0,
+ 0xc3, 0x1f, 0x09, 0x0a, 0xc3, 0x1f, 0x13, 0x46, 0x30, 0xc1, 0x43, 0x1f,
+ 0x1f, 0xe0, 0x07, 0x27, 0x07, 0xe2, 0xe0, 0xce, 0x6d, 0x32, 0x07, 0xea,
+ 0x0b, 0x03, 0x1f, 0x2b, 0x46, 0xd2, 0x23, 0xc3, 0x1f, 0x35, 0xd2, 0x4e,
+ 0xad, 0x07, 0xef, 0xb0, 0xd1, 0x4f, 0xad, 0x07, 0xe2, 0x51, 0x45, 0x06,
+ 0x5a, 0xc3, 0x1f, 0x41, 0x45, 0x19, 0x60, 0xc3, 0x1f, 0x4d, 0x45, 0x50,
+ 0xf0, 0xc3, 0x1f, 0x5d, 0x44, 0x19, 0x6a, 0xc3, 0x1f, 0x67, 0x45, 0x30,
+ 0xc1, 0x43, 0x1f, 0x71, 0xcc, 0x00, 0xfb, 0x07, 0xe1, 0x41, 0xcb, 0x10,
+ 0xb5, 0x07, 0xe5, 0xc8, 0xcc, 0x00, 0xfb, 0x07, 0xe1, 0x29, 0xcb, 0x10,
+ 0xb5, 0x07, 0xe5, 0xb8, 0x0b, 0xc3, 0x1f, 0x7b, 0x4a, 0x74, 0x6e, 0x43,
+ 0x1f, 0x87, 0x0b, 0xc3, 0x1f, 0x93, 0x45, 0x00, 0x8c, 0x43, 0x1f, 0x9f,
+ 0xcc, 0x00, 0xfb, 0x07, 0xe1, 0x11, 0xcb, 0x10, 0xb5, 0x07, 0xe5, 0xa0,
+ 0xcd, 0x00, 0xfa, 0x07, 0xe8, 0x81, 0xca, 0x26, 0xf7, 0x07, 0xe9, 0x60,
+ 0xca, 0x26, 0xf7, 0x07, 0xe9, 0x19, 0xcd, 0x00, 0xfa, 0x07, 0xe8, 0x38,
+ 0xca, 0x26, 0xf7, 0x07, 0xe9, 0x21, 0xcd, 0x00, 0xfa, 0x07, 0xe8, 0x40,
+ 0x0b, 0xc3, 0x1f, 0xab, 0xca, 0x26, 0xf7, 0x07, 0xdf, 0xd0, 0xc8, 0xbf,
+ 0x82, 0x00, 0x36, 0x63, 0x03, 0x1f, 0xb7, 0xc2, 0x16, 0x1c, 0x00, 0x32,
+ 0x0a, 0x03, 0x1f, 0xbb, 0xc3, 0x1a, 0xe0, 0x00, 0x46, 0x41, 0xc4, 0x92,
+ 0x76, 0x00, 0x31, 0xd3, 0x03, 0x1f, 0xbf, 0xc2, 0x0f, 0x9b, 0x00, 0x35,
+ 0x7b, 0x03, 0x1f, 0xc3, 0xc3, 0xe5, 0x03, 0x00, 0x35, 0x9a, 0x03, 0x1f,
+ 0xc7, 0xc2, 0x00, 0xc2, 0x00, 0x32, 0x23, 0x03, 0x1f, 0xcb, 0xc7, 0xca,
+ 0x6f, 0x00, 0x45, 0x68, 0xc2, 0x00, 0x4f, 0x00, 0x31, 0x63, 0x03, 0x1f,
+ 0xcf, 0x8a, 0x00, 0x34, 0xc2, 0x03, 0x1f, 0xd3, 0x47, 0xbd, 0x8a, 0xc3,
+ 0x1f, 0xd7, 0xc2, 0x00, 0x74, 0x00, 0x31, 0xcb, 0x03, 0x1f, 0xec, 0xc3,
+ 0x00, 0x49, 0x00, 0x31, 0x3b, 0x03, 0x1f, 0xf0, 0x87, 0x00, 0x36, 0xa8,
+ 0xc4, 0xe2, 0xe3, 0x00, 0x35, 0x4b, 0x03, 0x1f, 0xf4, 0x03, 0xc3, 0x1f,
+ 0xf8, 0x47, 0x06, 0x53, 0xc3, 0x20, 0x05, 0xc3, 0x14, 0x4b, 0x00, 0x31,
+ 0x72, 0x03, 0x20, 0x17, 0xc4, 0xe3, 0x1b, 0x00, 0x34, 0x33, 0x03, 0x20,
+ 0x1b, 0xc3, 0x2f, 0xc8, 0x00, 0x33, 0xcb, 0x03, 0x20, 0x28, 0xc2, 0x16,
+ 0x1c, 0x00, 0x31, 0x53, 0x03, 0x20, 0x35, 0xc2, 0x02, 0x98, 0x00, 0x31,
+ 0xbb, 0x03, 0x20, 0x42, 0x0a, 0x43, 0x20, 0x46, 0x00, 0xc3, 0x20, 0x5e,
+ 0xc2, 0x16, 0x1c, 0x00, 0x35, 0x32, 0x03, 0x20, 0x74, 0xc2, 0x16, 0x1c,
+ 0x00, 0x32, 0x53, 0x03, 0x20, 0x78, 0x97, 0x00, 0x36, 0x42, 0x03, 0x20,
+ 0x7c, 0xc2, 0x16, 0x1c, 0x00, 0x31, 0x8b, 0x03, 0x20, 0x80, 0xcb, 0x96,
+ 0x53, 0x00, 0x45, 0x61, 0xc4, 0x3a, 0x01, 0x00, 0x35, 0xdb, 0x03, 0x20,
+ 0x84, 0xc3, 0x72, 0xf0, 0x00, 0x34, 0x8a, 0x03, 0x20, 0x88, 0x8a, 0x00,
+ 0x31, 0x43, 0x03, 0x20, 0x8c, 0xc2, 0x16, 0x1c, 0x00, 0x33, 0xda, 0x03,
+ 0x20, 0x99, 0x42, 0x00, 0x2d, 0xc3, 0x20, 0x9d, 0x00, 0x43, 0x20, 0xa3,
+ 0x00, 0x43, 0x20, 0xb8, 0x00, 0x43, 0x20, 0xce, 0xc2, 0x00, 0x74, 0x00,
+ 0x31, 0x93, 0x03, 0x20, 0xde, 0x8a, 0x00, 0x31, 0xc2, 0x03, 0x20, 0xe2,
+ 0xcb, 0x8c, 0xdf, 0x00, 0x45, 0x89, 0xc2, 0x01, 0x9d, 0x00, 0x31, 0xab,
+ 0x03, 0x20, 0xe6, 0xc4, 0xdf, 0x93, 0x00, 0x31, 0xa3, 0x03, 0x20, 0xea,
+ 0xc8, 0xba, 0x32, 0x00, 0x35, 0x51, 0xc3, 0x03, 0x26, 0x00, 0x31, 0x9b,
+ 0x03, 0x20, 0xee, 0xcf, 0x07, 0x2a, 0x00, 0x33, 0x80, 0x03, 0xc3, 0x20,
+ 0xf2, 0x42, 0x0e, 0x9a, 0xc3, 0x21, 0x09, 0xc2, 0x03, 0x66, 0x00, 0x34,
+ 0x73, 0x03, 0x21, 0x19, 0xc3, 0x2b, 0xb9, 0x00, 0x34, 0x23, 0x03, 0x21,
+ 0x1d, 0x47, 0x3b, 0xc4, 0x43, 0x21, 0x21, 0x00, 0xc3, 0x21, 0x33, 0x8a,
+ 0x00, 0x35, 0x22, 0x03, 0x21, 0x3f, 0x00, 0x43, 0x21, 0x43, 0xc3, 0x12,
+ 0xc2, 0x00, 0x32, 0x2b, 0x03, 0x21, 0x55, 0xc3, 0x01, 0xc4, 0x00, 0x30,
+ 0xe0, 0x00, 0x43, 0x21, 0x59, 0x89, 0x00, 0x35, 0x6b, 0x03, 0x21, 0x65,
+ 0xc3, 0x01, 0x54, 0x00, 0x32, 0x33, 0x03, 0x21, 0x72, 0xc3, 0x2b, 0xb9,
+ 0x00, 0x34, 0x1a, 0x03, 0x21, 0x76, 0x03, 0xc3, 0x21, 0x7a, 0xc2, 0x16,
+ 0x1c, 0x00, 0x32, 0x3b, 0x03, 0x21, 0x8a, 0xc9, 0xae, 0xd6, 0x00, 0x33,
+ 0xa2, 0x03, 0x21, 0x8e, 0x4c, 0x73, 0x54, 0xc3, 0x21, 0x92, 0x46, 0x3b,
+ 0xc5, 0x43, 0x21, 0xfa, 0x8e, 0x0f, 0x70, 0x19, 0x86, 0x0f, 0x70, 0xc8,
+ 0x8a, 0x0f, 0x70, 0x41, 0x45, 0x14, 0xa8, 0x43, 0x22, 0x12, 0xc2, 0x16,
+ 0x1c, 0x0f, 0x70, 0xb1, 0xc2, 0x00, 0x65, 0x0f, 0x70, 0xc0, 0x03, 0xc3,
+ 0x22, 0x50, 0xc3, 0x85, 0xf5, 0x0f, 0x74, 0x09, 0xc4, 0x30, 0xc1, 0x0f,
+ 0x74, 0x11, 0x42, 0x0e, 0x9a, 0xc3, 0x22, 0x5c, 0x0a, 0xc3, 0x22, 0x64,
+ 0xc3, 0x7e, 0x89, 0x0f, 0x74, 0x29, 0x42, 0x02, 0x1c, 0xc3, 0x22, 0x70,
+ 0x16, 0xc3, 0x22, 0x7a, 0xc3, 0x2b, 0xb9, 0x0f, 0x74, 0x49, 0xc3, 0x0d,
+ 0xff, 0x0f, 0x74, 0x59, 0xc4, 0x19, 0x60, 0x0f, 0x74, 0x61, 0xc4, 0x3a,
+ 0x01, 0x0f, 0x74, 0x69, 0x15, 0xc3, 0x22, 0x8a, 0xc3, 0xb1, 0x0d, 0x0f,
+ 0x74, 0x81, 0xc3, 0x0f, 0x9a, 0x0f, 0x74, 0x91, 0xc3, 0x72, 0xf0, 0x0f,
+ 0x74, 0x99, 0xc4, 0x14, 0x4a, 0x0f, 0x74, 0xb9, 0xc5, 0x92, 0x75, 0x0f,
+ 0x74, 0xd8, 0xc3, 0x85, 0xf5, 0x0f, 0x73, 0x09, 0xc4, 0x30, 0xc1, 0x0f,
+ 0x73, 0x11, 0x0a, 0xc3, 0x22, 0x9c, 0x16, 0xc3, 0x22, 0xa8, 0xc3, 0x2b,
+ 0xb9, 0x0f, 0x73, 0x49, 0x0d, 0xc3, 0x22, 0xba, 0xc4, 0x19, 0x60, 0x0f,
+ 0x73, 0x61, 0xc4, 0x3a, 0x01, 0x0f, 0x73, 0x69, 0x15, 0xc3, 0x22, 0xc6,
+ 0xc3, 0x03, 0x0c, 0x0f, 0x73, 0x79, 0xc3, 0xb1, 0x0d, 0x0f, 0x73, 0x81,
+ 0xc3, 0x0f, 0x9a, 0x0f, 0x73, 0x91, 0x06, 0xc3, 0x22, 0xd8, 0xc3, 0x74,
+ 0x6a, 0x0f, 0x73, 0xd1, 0xc5, 0x92, 0x75, 0x0f, 0x73, 0xd8, 0xc2, 0x16,
+ 0x1c, 0x0f, 0x71, 0x21, 0xc2, 0x02, 0x98, 0x0f, 0x71, 0x38, 0xc2, 0x0f,
+ 0x9b, 0x0f, 0x71, 0x51, 0xc3, 0x14, 0x4b, 0x0f, 0x71, 0xb8, 0xc3, 0x03,
+ 0x26, 0x0f, 0x71, 0x71, 0xc2, 0x01, 0x9d, 0x0f, 0x71, 0x89, 0xc4, 0xdf,
+ 0x93, 0x0f, 0x71, 0xa0, 0xc2, 0x16, 0x1c, 0x0f, 0x71, 0xa9, 0xc3, 0x64,
+ 0x77, 0x0f, 0x71, 0xb0, 0xc8, 0x33, 0xae, 0x00, 0x47, 0xf1, 0xcd, 0x00,
+ 0xfa, 0x07, 0xf3, 0xc1, 0xcb, 0x64, 0x7b, 0x07, 0xf3, 0xc8, 0xce, 0x00,
+ 0xf9, 0x07, 0xf3, 0x90, 0xc9, 0x16, 0x14, 0x00, 0x47, 0xa9, 0xc4, 0x00,
+ 0x9d, 0x00, 0x47, 0xa1, 0xc8, 0x02, 0x9f, 0x00, 0x32, 0xf0, 0xc2, 0x39,
+ 0x8b, 0x00, 0x47, 0x99, 0x44, 0x1d, 0xc8, 0x43, 0x22, 0xe4, 0xc9, 0xad,
+ 0xbf, 0x00, 0x47, 0x09, 0xc2, 0x01, 0x9d, 0x00, 0x46, 0xa9, 0xc3, 0x03,
+ 0x26, 0x00, 0x36, 0xe0, 0xce, 0x6f, 0x7e, 0x00, 0x47, 0x01, 0xc8, 0xbf,
+ 0x82, 0x00, 0x46, 0x50, 0xcb, 0x60, 0x7f, 0x00, 0x46, 0xc0, 0x8a, 0x00,
+ 0x46, 0x69, 0xc2, 0x00, 0x74, 0x00, 0x30, 0xb8, 0xdb, 0x17, 0xcd, 0x00,
+ 0x46, 0x58, 0xc4, 0x41, 0xc1, 0x00, 0x37, 0x21, 0x45, 0x30, 0xc2, 0x43,
+ 0x22, 0xfa, 0xc9, 0x02, 0xde, 0x00, 0x36, 0xd9, 0xc2, 0x02, 0x98, 0x00,
+ 0x30, 0xa8, 0xc7, 0xca, 0x7d, 0x00, 0x36, 0xc9, 0x48, 0x19, 0x9b, 0x43,
+ 0x23, 0x06, 0xc2, 0x39, 0x8b, 0x00, 0x46, 0x99, 0x44, 0x1d, 0xc8, 0x43,
+ 0x23, 0x18, 0xc5, 0x05, 0x02, 0x00, 0x46, 0x81, 0xcd, 0x00, 0xfa, 0x07,
+ 0xf3, 0xf1, 0xcb, 0x64, 0x7b, 0x07, 0xf3, 0xf8, 0x4b, 0x05, 0x29, 0xc3,
+ 0x23, 0x22, 0xc5, 0x05, 0x02, 0x07, 0xdd, 0xa9, 0xc5, 0x00, 0xd4, 0x07,
+ 0xdd, 0xa0, 0x53, 0x26, 0x03, 0xc3, 0x23, 0x2e, 0xc5, 0x05, 0x02, 0x07,
+ 0xdd, 0xb9, 0xc5, 0x00, 0xd4, 0x07, 0xdd, 0xb0, 0xc5, 0x05, 0x02, 0x07,
+ 0xdd, 0x99, 0xc5, 0x00, 0xd4, 0x07, 0xdd, 0x90, 0xd0, 0x5f, 0xd2, 0x00,
+ 0x37, 0xf1, 0xc9, 0x36, 0x20, 0x00, 0x37, 0xe8, 0xda, 0x1d, 0x22, 0x00,
+ 0x30, 0x81, 0xc4, 0xe4, 0x0f, 0x00, 0x30, 0x21, 0xc3, 0xa8, 0x39, 0x00,
+ 0x30, 0x19, 0xc3, 0x39, 0x71, 0x00, 0x30, 0x08, 0xce, 0x04, 0xf9, 0x00,
+ 0x44, 0x29, 0x4b, 0x97, 0x5b, 0xc3, 0x23, 0x3a, 0xce, 0x71, 0x06, 0x07,
+ 0xf3, 0x88, 0xc2, 0xe5, 0xfd, 0x0f, 0xb9, 0x88, 0xc8, 0x8c, 0x89, 0x0f,
+ 0xb9, 0x71, 0xc6, 0x4c, 0x49, 0x0f, 0xb9, 0x38, 0xcb, 0x03, 0xbc, 0x01,
+ 0x1a, 0xb9, 0xc6, 0xcd, 0xc1, 0x01, 0x1a, 0x60, 0xc2, 0x01, 0x6f, 0x01,
+ 0x1a, 0x68, 0xc5, 0x3a, 0x1b, 0x01, 0x19, 0xd1, 0xc4, 0x07, 0xb2, 0x01,
+ 0x19, 0xc8, 0xc7, 0x0d, 0x04, 0x08, 0x08, 0xd9, 0xc8, 0x4b, 0x94, 0x08,
+ 0x09, 0x20, 0xc7, 0x0d, 0x04, 0x08, 0x08, 0xd1, 0xc8, 0x4b, 0x94, 0x08,
+ 0x09, 0x18, 0xc7, 0x0d, 0x04, 0x08, 0x08, 0xe9, 0xc8, 0x4b, 0x94, 0x08,
+ 0x09, 0x30, 0xc7, 0x0d, 0x04, 0x08, 0x08, 0xe1, 0xc8, 0x4b, 0x94, 0x08,
+ 0x09, 0x28, 0xc7, 0x3a, 0x19, 0x0f, 0xdd, 0x71, 0x47, 0x04, 0xcb, 0xc3,
+ 0x23, 0x46, 0x46, 0x02, 0xae, 0xc3, 0x23, 0x52, 0xc5, 0x0d, 0x20, 0x01,
+ 0x2b, 0x98, 0xc2, 0x01, 0x48, 0x01, 0x2b, 0xbb, 0x03, 0x23, 0x64, 0x4a,
+ 0xa2, 0xa6, 0x43, 0x23, 0x6a, 0x0a, 0xc3, 0x23, 0x76, 0xc4, 0x00, 0x49,
+ 0x01, 0x28, 0xc1, 0xc5, 0x00, 0x2c, 0x01, 0x28, 0xa0, 0xc5, 0x00, 0x2c,
+ 0x01, 0x2b, 0x81, 0xc4, 0x00, 0x49, 0x01, 0x2b, 0x78, 0xc4, 0x00, 0x49,
+ 0x01, 0x2b, 0x71, 0xc5, 0x00, 0x2c, 0x01, 0x2b, 0x68, 0xca, 0x01, 0x68,
+ 0x01, 0x29, 0xe1, 0xc4, 0x00, 0x49, 0x01, 0x29, 0x21, 0xc5, 0x00, 0x2c,
+ 0x01, 0x28, 0xe0, 0xc9, 0x12, 0x0d, 0x01, 0x2b, 0xf9, 0xc3, 0x00, 0x4a,
+ 0x01, 0x28, 0xd8, 0xca, 0x01, 0x68, 0x01, 0x29, 0x99, 0xc4, 0x00, 0x49,
+ 0x01, 0x28, 0x99, 0xc5, 0x00, 0x2c, 0x01, 0x28, 0x78, 0xca, 0x01, 0x68,
+ 0x01, 0x2b, 0x61, 0xc4, 0x00, 0x49, 0x01, 0x2b, 0x19, 0xc5, 0x00, 0x2c,
+ 0x01, 0x2b, 0x00, 0xc8, 0x11, 0xff, 0x01, 0x29, 0x49, 0xc5, 0x11, 0x39,
+ 0x01, 0x28, 0x88, 0xc8, 0x11, 0xff, 0x01, 0x29, 0x09, 0xc5, 0x11, 0x39,
+ 0x01, 0x28, 0x68, 0xc8, 0x11, 0x49, 0x01, 0x29, 0x39, 0xc5, 0x07, 0xeb,
+ 0x01, 0x28, 0x90, 0xc8, 0x11, 0x49, 0x01, 0x28, 0xf9, 0xc5, 0x07, 0xeb,
+ 0x01, 0x28, 0x70, 0xa3, 0x0f, 0xd9, 0xa0, 0xa3, 0x0f, 0xd9, 0x61, 0xa2,
+ 0x0f, 0xd8, 0xe8, 0xa3, 0x0f, 0xd9, 0xc0, 0xa3, 0x0f, 0xd9, 0xd0, 0xa3,
+ 0x0f, 0xd9, 0xd8, 0xd7, 0x2a, 0xf5, 0x0f, 0xd2, 0x60, 0xc5, 0x56, 0xa5,
+ 0x01, 0x32, 0xf3, 0x03, 0x23, 0x82, 0xc3, 0x00, 0x74, 0x01, 0x32, 0xd2,
+ 0x03, 0x23, 0x8c, 0x49, 0x2a, 0xf5, 0x43, 0x23, 0x92, 0x49, 0x2a, 0xf5,
+ 0x43, 0x23, 0x9e, 0x49, 0x2a, 0xf5, 0x43, 0x23, 0xaa, 0x49, 0x2a, 0xf5,
+ 0x43, 0x23, 0xb6, 0x0d, 0xc3, 0x23, 0xc2, 0xc5, 0xa8, 0xf7, 0x0f, 0xd1,
+ 0x29, 0xc4, 0xde, 0x83, 0x0f, 0xd1, 0x31, 0xc6, 0xca, 0xfd, 0x0f, 0xd1,
+ 0x39, 0xc4, 0xe3, 0x93, 0x0f, 0xd1, 0x48, 0xcf, 0x14, 0x22, 0x01, 0x5d,
+ 0x71, 0xcd, 0x1b, 0x41, 0x01, 0x5d, 0x60, 0xcf, 0x09, 0xf8, 0x01, 0x5d,
+ 0x41, 0xd0, 0x03, 0xb7, 0x01, 0x5d, 0x48, 0xcf, 0x09, 0xf8, 0x01, 0x5d,
+ 0x51, 0xd0, 0x03, 0xb7, 0x01, 0x5d, 0x58, 0xcd, 0x1b, 0x41, 0x01, 0x5d,
+ 0x69, 0xcf, 0x14, 0x22, 0x01, 0x5d, 0x78, 0x45, 0x00, 0x8c, 0xc3, 0x23,
+ 0xce, 0xca, 0xa0, 0x62, 0x01, 0x1f, 0xd0, 0x15, 0xc3, 0x23, 0xe0, 0xc7,
+ 0x3a, 0x19, 0x01, 0x59, 0x49, 0xc7, 0x0a, 0xe0, 0x01, 0x59, 0x50, 0xc8,
+ 0xbe, 0xa2, 0x01, 0x1f, 0xc9, 0xc6, 0x86, 0xfd, 0x0f, 0xa9, 0x91, 0xc7,
+ 0x5e, 0xa7, 0x01, 0x5e, 0x00, 0xd8, 0x23, 0xab, 0x0f, 0xbc, 0x19, 0xce,
+ 0x6c, 0x8a, 0x01, 0x2d, 0xf1, 0xc8, 0x01, 0x92, 0x01, 0x2d, 0xe1, 0xcf,
+ 0x65, 0x94, 0x01, 0x1f, 0x60, 0xcd, 0x7d, 0xc6, 0x01, 0x3a, 0xb1, 0xc4,
+ 0x22, 0xdc, 0x01, 0x33, 0x31, 0xcf, 0x6a, 0x44, 0x01, 0x4f, 0x51, 0xc7,
+ 0x5e, 0xa7, 0x01, 0x5e, 0x09, 0xc8, 0xb5, 0xe2, 0x01, 0x5e, 0xf0, 0xc4,
+ 0x5b, 0x26, 0x01, 0x36, 0x19, 0xc3, 0x12, 0xb8, 0x01, 0x36, 0x10, 0xd8,
+ 0x23, 0xab, 0x0f, 0xbc, 0x11, 0x12, 0xc3, 0x23, 0xec, 0xce, 0x6c, 0x8a,
+ 0x01, 0x2d, 0xc1, 0xc8, 0x01, 0x92, 0x01, 0x2d, 0xb3, 0x03, 0x23, 0xf8,
+ 0xcf, 0x65, 0x94, 0x01, 0x1f, 0x4a, 0x03, 0x23, 0xfe, 0xc5, 0x01, 0xa2,
+ 0x01, 0x3d, 0x0b, 0x03, 0x24, 0x04, 0xc6, 0x1c, 0xb4, 0x01, 0x02, 0x69,
+ 0xd5, 0x03, 0xd2, 0x01, 0x5c, 0xf0, 0xc5, 0x06, 0x82, 0x01, 0x30, 0xd9,
+ 0xce, 0x24, 0xd5, 0x0f, 0xac, 0xe8, 0xd8, 0x23, 0xab, 0x0f, 0xbc, 0x01,
+ 0xc7, 0x46, 0x3d, 0x01, 0x2e, 0x21, 0xce, 0x6c, 0x8a, 0x01, 0x2e, 0x11,
+ 0xc8, 0x01, 0x92, 0x01, 0x2e, 0x01, 0xcf, 0x65, 0x94, 0x01, 0x1f, 0x52,
+ 0x03, 0x24, 0x0a, 0xca, 0xa6, 0x34, 0x01, 0x36, 0xc1, 0x49, 0x01, 0xaa,
+ 0x43, 0x24, 0x10, 0xc6, 0x1c, 0xb4, 0x01, 0x02, 0x61, 0xd5, 0x03, 0xd2,
+ 0x01, 0x5c, 0xe0, 0xcd, 0x2f, 0x72, 0x01, 0x2f, 0x19, 0xce, 0x23, 0xb5,
+ 0x01, 0x2f, 0x10, 0x45, 0x03, 0x14, 0xc3, 0x24, 0x1c, 0xc5, 0x0b, 0x0a,
+ 0x01, 0x2f, 0xe0, 0xd5, 0x2e, 0xad, 0x01, 0x1f, 0xbb, 0x03, 0x24, 0x2e,
+ 0xc6, 0x3a, 0x1a, 0x01, 0x59, 0x28, 0xc8, 0x5e, 0xa6, 0x01, 0x5e, 0x28,
+ 0xc8, 0x5e, 0xa6, 0x01, 0x5e, 0x40, 0xd5, 0x32, 0x03, 0x01, 0x1f, 0xa3,
+ 0x03, 0x24, 0x34, 0xc6, 0x0a, 0xe1, 0x01, 0x59, 0x38, 0xce, 0x23, 0xb5,
+ 0x01, 0x2f, 0x29, 0xcd, 0x2f, 0x72, 0x01, 0x2f, 0x20, 0xce, 0x6c, 0x8a,
+ 0x01, 0x2d, 0xa1, 0xc8, 0x01, 0x92, 0x01, 0x2d, 0x91, 0xcf, 0x65, 0x94,
+ 0x01, 0x1f, 0x59, 0xd8, 0x23, 0xab, 0x0f, 0xbc, 0x08, 0xc5, 0x22, 0xdb,
+ 0x01, 0x33, 0x28, 0x46, 0x00, 0x8b, 0x43, 0x24, 0x3a, 0xcd, 0x7c, 0x8e,
+ 0x00, 0xdb, 0x88, 0xcd, 0x7c, 0x8e, 0x00, 0xdb, 0x80, 0x00, 0x43, 0x24,
+ 0x54, 0xc4, 0xb5, 0x3e, 0x00, 0xd9, 0x19, 0xcf, 0x60, 0x21, 0x00, 0xd8,
+ 0xf1, 0xc5, 0xdc, 0x9f, 0x00, 0xd8, 0xe8, 0xc9, 0x60, 0x27, 0x00, 0xd9,
+ 0x01, 0xc9, 0xb3, 0x8c, 0x00, 0xd8, 0xf8, 0xc4, 0xa1, 0x14, 0x00, 0xd9,
+ 0xfb, 0x03, 0x24, 0x60, 0xc6, 0xc2, 0x6d, 0x00, 0xda, 0x00, 0x97, 0x0b,
+ 0x50, 0x29, 0x83, 0x0b, 0x50, 0x19, 0xc2, 0x00, 0xb0, 0x0b, 0x51, 0xb1,
+ 0x91, 0x0b, 0x51, 0x79, 0x07, 0xc3, 0x24, 0x66, 0xc3, 0x17, 0x29, 0x0b,
+ 0x50, 0xb0, 0xc4, 0xbf, 0xf1, 0x0b, 0x51, 0xb9, 0x0a, 0xc3, 0x24, 0x6e,
+ 0xc3, 0xd7, 0xe2, 0x0b, 0x50, 0xa9, 0x8b, 0x0b, 0x50, 0xa1, 0xc2, 0x5d,
+ 0xa1, 0x0b, 0x50, 0x90, 0xc2, 0x00, 0x3d, 0x0b, 0x51, 0xa9, 0x03, 0x43,
+ 0x24, 0x7c, 0x04, 0xc3, 0x24, 0x84, 0x91, 0x0b, 0x51, 0x99, 0x83, 0x0b,
+ 0x51, 0x91, 0xc4, 0xe2, 0x9f, 0x0b, 0x50, 0x68, 0x07, 0xc3, 0x24, 0x90,
+ 0x97, 0x0b, 0x51, 0x19, 0x0b, 0x43, 0x24, 0x9e, 0xc2, 0x7f, 0xc0, 0x0b,
+ 0x51, 0x71, 0x8b, 0x0b, 0x51, 0x69, 0x83, 0x0b, 0x50, 0x50, 0x83, 0x0b,
+ 0x51, 0x61, 0xc2, 0x0f, 0xe1, 0x0b, 0x51, 0x08, 0xc3, 0x8b, 0xa9, 0x0b,
+ 0x51, 0x51, 0x07, 0x43, 0x24, 0xa8, 0x09, 0xc3, 0x24, 0xb2, 0x8b, 0x0b,
+ 0x51, 0x21, 0xc3, 0x14, 0x09, 0x0b, 0x51, 0x01, 0xc3, 0x01, 0xe2, 0x0b,
+ 0x50, 0xf1, 0x0c, 0xc3, 0x24, 0xbe, 0x97, 0x0b, 0x50, 0xcb, 0x03, 0x24,
+ 0xca, 0xc3, 0x4f, 0x43, 0x0b, 0x50, 0x79, 0xc2, 0x16, 0x5a, 0x0b, 0x50,
+ 0x48, 0x83, 0x0b, 0x50, 0xe9, 0xc2, 0x7f, 0xc0, 0x0b, 0x50, 0xd8, 0x0a,
+ 0xc3, 0x24, 0xd0, 0x42, 0x00, 0x51, 0x43, 0x24, 0xe0, 0x17, 0xc3, 0x24,
+ 0xea, 0xc3, 0xd7, 0xe2, 0x0b, 0x4c, 0xf0, 0xc4, 0xe3, 0xf7, 0x0b, 0x4b,
+ 0xa1, 0x8b, 0x0b, 0x4f, 0xf1, 0x91, 0x0b, 0x4f, 0xc9, 0x07, 0xc3, 0x24,
+ 0xf2, 0x17, 0x43, 0x24, 0xfa, 0x09, 0xc3, 0x25, 0x0a, 0x06, 0xc3, 0x25,
+ 0x29, 0x42, 0x01, 0xe2, 0xc3, 0x25, 0x37, 0x83, 0x0b, 0x4f, 0xb3, 0x03,
+ 0x25, 0x41, 0x0c, 0xc3, 0x25, 0x45, 0x16, 0xc3, 0x25, 0x4f, 0x1c, 0xc3,
+ 0x25, 0x5b, 0x43, 0x70, 0x51, 0xc3, 0x25, 0x67, 0xc3, 0xbc, 0x2f, 0x0b,
+ 0x4d, 0x40, 0x03, 0xc3, 0x25, 0x73, 0x11, 0xc3, 0x25, 0x88, 0x07, 0xc3,
+ 0x25, 0x93, 0x17, 0x43, 0x25, 0x9e, 0x97, 0x0b, 0x4d, 0x03, 0x03, 0x25,
+ 0xab, 0x03, 0xc3, 0x25, 0xb7, 0x8b, 0x0b, 0x4f, 0xbb, 0x03, 0x25, 0xc4,
+ 0x07, 0xc3, 0x25, 0xc8, 0x91, 0x0b, 0x4c, 0xc2, 0x03, 0x25, 0xd2, 0x03,
+ 0xc3, 0x25, 0xd8, 0xc3, 0xd7, 0xe2, 0x0b, 0x4f, 0x79, 0xc5, 0xd4, 0x39,
+ 0x0b, 0x4c, 0x10, 0xc2, 0x00, 0x7a, 0x0b, 0x4b, 0x69, 0x0a, 0xc3, 0x25,
+ 0xe0, 0xc4, 0xb5, 0x1a, 0x0b, 0x4c, 0xd9, 0x07, 0xc3, 0x25, 0xf3, 0xc2,
+ 0x04, 0xc6, 0x0b, 0x4c, 0x28, 0x11, 0xc3, 0x25, 0xfb, 0x03, 0xc3, 0x26,
+ 0x07, 0x97, 0x0b, 0x4f, 0x69, 0xc5, 0xdc, 0xea, 0x0b, 0x4d, 0x98, 0xc2,
+ 0x00, 0x7a, 0x0b, 0x4b, 0x51, 0x07, 0x43, 0x26, 0x15, 0x42, 0x00, 0x51,
+ 0xc3, 0x26, 0x1f, 0xc2, 0x00, 0x45, 0x0b, 0x4f, 0xf9, 0x83, 0x0b, 0x4f,
+ 0xdb, 0x03, 0x26, 0x29, 0xc2, 0x00, 0xc4, 0x0b, 0x4f, 0xd1, 0x8b, 0x0b,
+ 0x4f, 0x73, 0x03, 0x26, 0x38, 0xc2, 0x07, 0xb2, 0x0b, 0x4e, 0x49, 0xc3,
+ 0x8b, 0xa9, 0x0b, 0x4e, 0x31, 0xc4, 0xe0, 0x4f, 0x0b, 0x4d, 0x79, 0x42,
+ 0x1f, 0xad, 0x43, 0x26, 0x3e, 0x83, 0x0b, 0x4d, 0xdb, 0x03, 0x26, 0x48,
+ 0x17, 0xc3, 0x26, 0x4c, 0xc2, 0x02, 0xe0, 0x0b, 0x4f, 0x59, 0xc2, 0x00,
+ 0x7a, 0x0b, 0x4e, 0x98, 0x17, 0xc3, 0x26, 0x57, 0x43, 0x8a, 0x2d, 0xc3,
+ 0x26, 0x6b, 0x42, 0x2c, 0x43, 0xc3, 0x26, 0x77, 0x0b, 0xc3, 0x26, 0x88,
+ 0xc2, 0x00, 0xb6, 0x0b, 0x4d, 0x60, 0x09, 0xc3, 0x26, 0x92, 0x15, 0xc3,
+ 0x26, 0x9a, 0x16, 0xc3, 0x26, 0xaa, 0x06, 0xc3, 0x26, 0xb4, 0x8b, 0x0b,
+ 0x4a, 0xd9, 0x97, 0x0b, 0x4a, 0xb9, 0x1b, 0xc3, 0x26, 0xc4, 0x0c, 0x43,
+ 0x26, 0xda, 0x07, 0xc3, 0x26, 0xf3, 0xc2, 0x7f, 0xc0, 0x0b, 0x4a, 0xf9,
+ 0xc2, 0x01, 0xdf, 0x0b, 0x48, 0xf1, 0xc3, 0x8f, 0x8a, 0x0b, 0x47, 0xb0,
+ 0x03, 0xc3, 0x27, 0x01, 0x07, 0xc3, 0x27, 0x0d, 0x04, 0xc3, 0x27, 0x17,
+ 0xc3, 0x9c, 0xc7, 0x0b, 0x4a, 0xf1, 0x97, 0x0b, 0x4a, 0x99, 0x08, 0xc3,
+ 0x27, 0x26, 0x42, 0x1f, 0xad, 0xc3, 0x27, 0x39, 0xc3, 0x07, 0x85, 0x0b,
+ 0x48, 0xc8, 0x07, 0xc3, 0x27, 0x4b, 0x97, 0x0b, 0x48, 0x8b, 0x03, 0x27,
+ 0x55, 0x8b, 0x0b, 0x4b, 0x09, 0xc2, 0x7f, 0xc0, 0x0b, 0x4a, 0x61, 0xc2,
+ 0x10, 0x11, 0x0b, 0x4a, 0x58, 0x97, 0x0b, 0x4a, 0x4b, 0x03, 0x27, 0x5b,
+ 0xc3, 0x17, 0x29, 0x0b, 0x4a, 0xb1, 0x07, 0xc3, 0x27, 0x69, 0xc4, 0xde,
+ 0xb3, 0x0b, 0x49, 0x08, 0x17, 0xc3, 0x27, 0x71, 0x03, 0xc3, 0x27, 0x7f,
+ 0x0a, 0xc3, 0x27, 0x87, 0xc2, 0x01, 0xbb, 0x0b, 0x49, 0x21, 0xc5, 0x8b,
+ 0xa8, 0x0b, 0x48, 0x60, 0xc8, 0xb5, 0xc2, 0x0b, 0x48, 0xa1, 0xc2, 0x04,
+ 0xc6, 0x0b, 0x4b, 0x28, 0xc6, 0xcb, 0x1b, 0x0b, 0x48, 0x29, 0x17, 0xc3,
+ 0x27, 0x9b, 0xc2, 0x00, 0xc4, 0x0b, 0x48, 0x68, 0x43, 0x03, 0x27, 0xc3,
+ 0x27, 0xa5, 0xc2, 0x25, 0x9f, 0x0b, 0x4a, 0x71, 0xc3, 0x7c, 0x57, 0x0b,
+ 0x49, 0x38, 0x17, 0xc3, 0x27, 0xb1, 0x07, 0xc3, 0x27, 0xbb, 0xc2, 0x00,
+ 0xb6, 0x0b, 0x49, 0xa9, 0xc2, 0x00, 0x7e, 0x0b, 0x49, 0x68, 0xc4, 0x8b,
+ 0xa8, 0x0b, 0x4a, 0x41, 0xc2, 0x04, 0xc6, 0x0b, 0x48, 0x90, 0xc4, 0xb5,
+ 0x1a, 0x0b, 0x47, 0xd9, 0xc2, 0x00, 0xb6, 0x0b, 0x47, 0x90, 0x07, 0xc3,
+ 0x27, 0xc5, 0x17, 0xc3, 0x27, 0xd3, 0xc2, 0x04, 0xc6, 0x0b, 0x45, 0x49,
+ 0xc5, 0x5c, 0x98, 0x0b, 0x45, 0x40, 0x0a, 0xc3, 0x27, 0xdd, 0x07, 0xc3,
+ 0x27, 0xe9, 0xc4, 0xa1, 0xee, 0x0b, 0x45, 0x78, 0x07, 0xc3, 0x27, 0xf5,
+ 0x42, 0x00, 0x8d, 0xc3, 0x27, 0xff, 0xc6, 0xcf, 0xc5, 0x0b, 0x45, 0x60,
+ 0xc2, 0x00, 0xc4, 0x0b, 0x47, 0x79, 0x0b, 0x43, 0x28, 0x0b, 0xc2, 0x14,
+ 0xbe, 0x0b, 0x47, 0x69, 0x97, 0x0b, 0x46, 0x69, 0x03, 0x43, 0x28, 0x15,
+ 0x03, 0xc3, 0x28, 0x1d, 0x09, 0xc3, 0x28, 0x27, 0x0c, 0xc3, 0x28, 0x3b,
+ 0x06, 0xc3, 0x28, 0x49, 0x15, 0xc3, 0x28, 0x5f, 0x16, 0xc3, 0x28, 0x79,
+ 0x1c, 0xc3, 0x28, 0x89, 0xd0, 0x5c, 0x92, 0x0b, 0x44, 0xc8, 0xc3, 0x8b,
+ 0xa9, 0x0b, 0x47, 0x39, 0xc3, 0x8f, 0x8a, 0x0b, 0x47, 0x31, 0x04, 0xc3,
+ 0x28, 0x93, 0x03, 0xc3, 0x28, 0xa6, 0xc6, 0xd1, 0x93, 0x0b, 0x45, 0xc0,
+ 0x17, 0xc3, 0x28, 0xae, 0xc2, 0x04, 0xc6, 0x0b, 0x46, 0xc9, 0xc3, 0x92,
+ 0xb4, 0x0b, 0x45, 0x38, 0xc2, 0x02, 0xae, 0x0b, 0x46, 0x89, 0xc7, 0xc5,
+ 0xbb, 0x0b, 0x44, 0x90, 0xc5, 0xdb, 0x0a, 0x0b, 0x46, 0x09, 0x9a, 0x0b,
+ 0x45, 0x88, 0x42, 0x00, 0xd0, 0xc3, 0x28, 0xbe, 0xc4, 0xe1, 0xb7, 0x0b,
+ 0x44, 0xc0, 0x09, 0xc3, 0x28, 0xc8, 0x15, 0xc3, 0x28, 0xd8, 0x1b, 0xc3,
+ 0x28, 0xe4, 0xc7, 0xc2, 0x81, 0x0b, 0x43, 0x29, 0xcb, 0x8f, 0x89, 0x0b,
+ 0x43, 0x20, 0x08, 0xc3, 0x28, 0xf0, 0x83, 0x0b, 0x44, 0x63, 0x03, 0x28,
+ 0xfc, 0x04, 0xc3, 0x29, 0x02, 0x42, 0x2c, 0x43, 0xc3, 0x29, 0x18, 0xc7,
+ 0xc5, 0xd7, 0x0b, 0x43, 0xf8, 0xc2, 0x00, 0x8d, 0x0b, 0x43, 0x39, 0xc6,
+ 0xcb, 0x4b, 0x0b, 0x44, 0x09, 0xc4, 0xdb, 0x8e, 0x0b, 0x43, 0x91, 0xc5,
+ 0xd9, 0x48, 0x0b, 0x43, 0x08, 0xc4, 0xdc, 0xeb, 0x0b, 0x43, 0x31, 0x90,
+ 0x0b, 0x43, 0x78, 0x0b, 0xc3, 0x29, 0x22, 0x42, 0x2c, 0x43, 0xc3, 0x29,
+ 0x2c, 0xc2, 0x00, 0xc2, 0x0b, 0x43, 0x00, 0xc2, 0x00, 0x3d, 0x0b, 0x44,
+ 0x49, 0x03, 0xc3, 0x29, 0x3e, 0xc8, 0xb6, 0x02, 0x0b, 0x42, 0xd8, 0x87,
+ 0x0b, 0x44, 0x29, 0xc2, 0xd0, 0x00, 0x0b, 0x44, 0x18, 0xc2, 0x0f, 0xe1,
+ 0x0b, 0x43, 0xe9, 0xc6, 0xcd, 0x3d, 0x0b, 0x43, 0xb9, 0x42, 0x01, 0x7f,
+ 0xc3, 0x29, 0x4a, 0xc5, 0xdd, 0xee, 0x0b, 0x42, 0xd1, 0xc3, 0x8f, 0x8a,
+ 0x0b, 0x42, 0xc8, 0xc3, 0x76, 0x32, 0x0b, 0x43, 0xc1, 0x42, 0x03, 0x53,
+ 0x43, 0x29, 0x56, 0xcc, 0x82, 0x71, 0x0b, 0x43, 0x11, 0xc5, 0xdc, 0x9a,
+ 0x0b, 0x42, 0xf0, 0x11, 0xc3, 0x29, 0x62, 0x0a, 0xc3, 0x29, 0x70, 0xc3,
+ 0x40, 0xe6, 0x0b, 0x41, 0x19, 0xc2, 0x5d, 0xa1, 0x0b, 0x40, 0xa9, 0xc6,
+ 0xce, 0x69, 0x0b, 0x40, 0x88, 0x42, 0x2c, 0x43, 0xc3, 0x29, 0x7e, 0x17,
+ 0xc3, 0x29, 0x8a, 0xc8, 0xb7, 0xb2, 0x0b, 0x40, 0x30, 0xc3, 0xe5, 0x6c,
+ 0x0b, 0x41, 0xd9, 0x03, 0xc3, 0x29, 0x96, 0xc3, 0x8f, 0x91, 0x0b, 0x41,
+ 0xa9, 0x07, 0x43, 0x29, 0xa0, 0x03, 0xc3, 0x29, 0xaa, 0x42, 0x01, 0x5d,
+ 0xc3, 0x29, 0xba, 0x11, 0xc3, 0x29, 0xc4, 0xcb, 0x92, 0xac, 0x0b, 0x41,
+ 0x29, 0xc5, 0xd1, 0x93, 0x0b, 0x41, 0x21, 0xc9, 0xb5, 0x18, 0x0b, 0x40,
+ 0x80, 0x03, 0xc3, 0x29, 0xd0, 0xc2, 0x00, 0xc4, 0x0b, 0x42, 0xa1, 0x42,
+ 0x01, 0xe2, 0xc3, 0x29, 0xda, 0x1b, 0xc3, 0x29, 0xe4, 0xc3, 0xe4, 0x60,
+ 0x0b, 0x42, 0x39, 0x09, 0xc3, 0x29, 0xf1, 0x0d, 0xc3, 0x2a, 0x03, 0x16,
+ 0xc3, 0x2a, 0x0f, 0x42, 0x0e, 0x9a, 0xc3, 0x2a, 0x1e, 0xc3, 0x3d, 0xb5,
+ 0x0b, 0x41, 0x61, 0x1c, 0x43, 0x2a, 0x2a, 0x97, 0x0b, 0x42, 0x9b, 0x03,
+ 0x2a, 0x36, 0xc5, 0x8e, 0x46, 0x0b, 0x41, 0xc1, 0xc6, 0xd0, 0xa9, 0x0b,
+ 0x40, 0xc1, 0xc4, 0xe1, 0x8f, 0x0b, 0x40, 0xb8, 0x03, 0xc3, 0x2a, 0x3c,
+ 0xc2, 0x02, 0xae, 0x0b, 0x41, 0x69, 0xc2, 0x00, 0x3d, 0x0b, 0x41, 0x51,
+ 0x43, 0x01, 0x55, 0x43, 0x2a, 0x52, 0xc6, 0xcc, 0xe9, 0x0b, 0x42, 0x21,
+ 0xc8, 0xbb, 0xaa, 0x0b, 0x41, 0x00, 0x45, 0xcf, 0x0c, 0xc3, 0x2a, 0x5e,
+ 0xc8, 0xbe, 0x62, 0x0b, 0x40, 0x08, 0xc2, 0x0d, 0xf6, 0x00, 0xde, 0xd1,
+ 0xc2, 0x00, 0xc1, 0x00, 0xde, 0x51, 0xc2, 0x00, 0xd0, 0x00, 0xde, 0x20,
+ 0xcf, 0x67, 0x92, 0x00, 0x4f, 0x81, 0xce, 0x6e, 0x74, 0x00, 0x4f, 0x88,
+ 0x94, 0x00, 0x4f, 0x00, 0x8e, 0x00, 0x4f, 0x08, 0xa0, 0x01, 0x40, 0x3b,
+ 0x03, 0x2a, 0x6a, 0xa1, 0x01, 0x40, 0x5b, 0x03, 0x2a, 0x8a, 0xa2, 0x01,
+ 0x40, 0x9b, 0x03, 0x2a, 0xa3, 0xa3, 0x01, 0x41, 0x1b, 0x03, 0x2a, 0xb5,
+ 0xa5, 0x01, 0x44, 0x19, 0xa4, 0x01, 0x42, 0x1a, 0x03, 0x2a, 0xc0, 0xa1,
+ 0x01, 0x40, 0x6b, 0x03, 0x2a, 0xc4, 0xa2, 0x01, 0x40, 0xab, 0x03, 0x2a,
+ 0xdd, 0xa3, 0x01, 0x41, 0x2b, 0x03, 0x2a, 0xef, 0xa5, 0x01, 0x44, 0x29,
+ 0xa4, 0x01, 0x42, 0x2a, 0x03, 0x2a, 0xfa, 0xa2, 0x01, 0x40, 0xcb, 0x03,
+ 0x2a, 0xfe, 0xa3, 0x01, 0x41, 0x4b, 0x03, 0x2b, 0x10, 0xa5, 0x01, 0x44,
+ 0x49, 0xa4, 0x01, 0x42, 0x4a, 0x03, 0x2b, 0x1b, 0xa3, 0x01, 0x41, 0x8b,
+ 0x03, 0x2b, 0x1f, 0xa5, 0x01, 0x44, 0x89, 0xa4, 0x01, 0x42, 0x8a, 0x03,
+ 0x2b, 0x2a, 0xa5, 0x01, 0x45, 0x09, 0xa4, 0x01, 0x43, 0x0a, 0x03, 0x2b,
+ 0x2e, 0xa5, 0x01, 0x46, 0x08, 0xa1, 0x01, 0x40, 0x73, 0x03, 0x2b, 0x32,
+ 0xa2, 0x01, 0x40, 0xb3, 0x03, 0x2b, 0x4b, 0xa3, 0x01, 0x41, 0x33, 0x03,
+ 0x2b, 0x5d, 0xa5, 0x01, 0x44, 0x31, 0xa4, 0x01, 0x42, 0x32, 0x03, 0x2b,
+ 0x68, 0xa2, 0x01, 0x40, 0xd3, 0x03, 0x2b, 0x6c, 0xa3, 0x01, 0x41, 0x53,
+ 0x03, 0x2b, 0x7e, 0xa5, 0x01, 0x44, 0x51, 0xa4, 0x01, 0x42, 0x52, 0x03,
+ 0x2b, 0x89, 0xa3, 0x01, 0x41, 0x93, 0x03, 0x2b, 0x8d, 0xa5, 0x01, 0x44,
+ 0x91, 0xa4, 0x01, 0x42, 0x92, 0x03, 0x2b, 0x98, 0xa5, 0x01, 0x45, 0x11,
+ 0xa4, 0x01, 0x43, 0x12, 0x03, 0x2b, 0x9c, 0xa5, 0x01, 0x46, 0x10, 0xa2,
+ 0x01, 0x40, 0xe3, 0x03, 0x2b, 0xa0, 0xa3, 0x01, 0x41, 0x63, 0x03, 0x2b,
+ 0xb2, 0xa5, 0x01, 0x44, 0x61, 0xa4, 0x01, 0x42, 0x62, 0x03, 0x2b, 0xbd,
+ 0xa3, 0x01, 0x41, 0xa3, 0x03, 0x2b, 0xc1, 0xa5, 0x01, 0x44, 0xa1, 0xa4,
+ 0x01, 0x42, 0xa2, 0x03, 0x2b, 0xcc, 0xa5, 0x01, 0x45, 0x21, 0xa4, 0x01,
+ 0x43, 0x22, 0x03, 0x2b, 0xd0, 0xa5, 0x01, 0x46, 0x20, 0xa3, 0x01, 0x41,
+ 0xc3, 0x03, 0x2b, 0xd4, 0xa5, 0x01, 0x44, 0xc1, 0xa4, 0x01, 0x42, 0xc2,
+ 0x03, 0x2b, 0xdf, 0xa5, 0x01, 0x45, 0x41, 0xa4, 0x01, 0x43, 0x42, 0x03,
+ 0x2b, 0xe3, 0xa5, 0x01, 0x46, 0x40, 0xa5, 0x01, 0x45, 0x81, 0xa4, 0x01,
+ 0x43, 0x82, 0x03, 0x2b, 0xe7, 0xa5, 0x01, 0x46, 0x80, 0xa5, 0x01, 0x47,
+ 0x00, 0x83, 0x08, 0x83, 0xa9, 0xc2, 0x00, 0xdb, 0x08, 0x81, 0xa8, 0x91,
+ 0x08, 0x83, 0x91, 0x87, 0x08, 0x83, 0x88, 0x8e, 0x08, 0x80, 0x70, 0x94,
+ 0x08, 0x80, 0x60, 0x91, 0x08, 0x83, 0xa1, 0x87, 0x08, 0x83, 0x98, 0x8e,
+ 0x08, 0x82, 0x08, 0x94, 0x08, 0x81, 0xf8, 0xc4, 0x99, 0xff, 0x0e, 0x87,
+ 0xa9, 0xc3, 0x2e, 0xd7, 0x0e, 0x84, 0x78, 0xc5, 0xa9, 0xe5, 0x0e, 0x84,
+ 0x89, 0xc8, 0xb2, 0xd8, 0x0e, 0x84, 0x80, 0xc4, 0x99, 0xff, 0x0e, 0x87,
+ 0x91, 0xc4, 0xe4, 0xa7, 0x0e, 0x87, 0x81, 0xc3, 0x2e, 0xd7, 0x0e, 0x82,
+ 0x70, 0xc3, 0x63, 0x2b, 0x0e, 0x84, 0x19, 0x03, 0x43, 0x2b, 0xeb, 0xd0,
+ 0x32, 0xc5, 0x0e, 0x85, 0x69, 0xcd, 0x77, 0x2c, 0x0e, 0x82, 0x90, 0x00,
+ 0x43, 0x2b, 0xf7, 0xc9, 0xb0, 0x35, 0x0e, 0x87, 0x29, 0xc7, 0xc5, 0x83,
+ 0x0e, 0x87, 0x20, 0xc9, 0xb0, 0x35, 0x0e, 0x87, 0x09, 0xc7, 0xc5, 0x83,
+ 0x0e, 0x87, 0x00, 0xc5, 0xa9, 0xe5, 0x0e, 0x84, 0xa9, 0x49, 0xb2, 0xd8,
+ 0x43, 0x2c, 0x03, 0xc5, 0xd9, 0x3e, 0x0e, 0x86, 0xd9, 0xc4, 0x80, 0xbc,
+ 0x0e, 0x86, 0xd0, 0xd5, 0x35, 0xb4, 0x0e, 0x86, 0x99, 0xc8, 0x2e, 0x8e,
+ 0x0e, 0x86, 0x70, 0xc3, 0x2e, 0xd7, 0x0e, 0x86, 0x11, 0xc4, 0x99, 0xff,
+ 0x0e, 0x86, 0x08, 0xc3, 0x15, 0x30, 0x0e, 0x82, 0x19, 0xc7, 0x9c, 0xe1,
+ 0x0e, 0x81, 0xb0, 0xc2, 0x6d, 0x08, 0x0e, 0x83, 0xb9, 0xc2, 0x00, 0xfb,
+ 0x0e, 0x83, 0xb0, 0xc3, 0x63, 0x2b, 0x0e, 0x82, 0xf1, 0xc8, 0x9c, 0xe0,
+ 0x0e, 0x81, 0xf0, 0xc6, 0x04, 0xe1, 0x0f, 0xd9, 0xe1, 0xc5, 0x00, 0x2c,
+ 0x0f, 0xd9, 0xe8, 0x55, 0x0a, 0x4c, 0xc3, 0x2c, 0x0f, 0x48, 0x0a, 0x53,
+ 0xc3, 0x2c, 0x21, 0x4a, 0x13, 0xe3, 0x43, 0x2c, 0x2d, 0xc6, 0x04, 0xe1,
+ 0x0f, 0xda, 0x19, 0xc5, 0x00, 0x2c, 0x0f, 0xda, 0x21, 0xcc, 0x04, 0xcb,
+ 0x0f, 0xda, 0x30, 0x46, 0x02, 0xae, 0xc3, 0x2c, 0x39, 0xd2, 0x4c, 0x37,
+ 0x0f, 0xda, 0x40, 0xd2, 0x4c, 0x37, 0x0f, 0xda, 0x39, 0x46, 0x02, 0xae,
+ 0x43, 0x2c, 0x45, 0xc7, 0x80, 0x70, 0x01, 0x53, 0x11, 0xc8, 0x52, 0x09,
+ 0x01, 0x53, 0x18, 0x16, 0xc3, 0x2c, 0x51, 0xd0, 0x57, 0xa2, 0x01, 0x3e,
+ 0xd0, 0x49, 0x09, 0xb3, 0xc3, 0x2c, 0x5d, 0xd0, 0x06, 0xd7, 0x0f, 0xdb,
+ 0xe0, 0x49, 0x09, 0xb3, 0xc3, 0x2c, 0x63, 0xd0, 0x06, 0xd7, 0x0f, 0xdb,
+ 0xe8, 0xc9, 0x33, 0xad, 0x01, 0x4c, 0x88, 0x16, 0xc3, 0x2c, 0x69, 0xc9,
+ 0x3b, 0x79, 0x0f, 0xc8, 0x19, 0xc3, 0x02, 0xa3, 0x0f, 0xc8, 0x30, 0xc6,
+ 0x02, 0xd1, 0x01, 0x2e, 0xb1, 0xc4, 0x0e, 0x6a, 0x01, 0x5f, 0x40, 0x45,
+ 0x00, 0x8c, 0xc3, 0x2c, 0x75, 0xd4, 0x3b, 0x4c, 0x01, 0x4a, 0x40, 0xc6,
+ 0x01, 0x73, 0x01, 0x0e, 0x71, 0xcf, 0x2c, 0x35, 0x01, 0x48, 0x20, 0xc5,
+ 0x78, 0x04, 0x01, 0x02, 0x29, 0x48, 0xbc, 0xfa, 0xc3, 0x2c, 0x87, 0xc8,
+ 0x52, 0x09, 0x01, 0x4c, 0x59, 0xc6, 0x01, 0x73, 0x01, 0x72, 0xa9, 0xcd,
+ 0x75, 0xa6, 0x01, 0x72, 0xb8, 0xc5, 0x01, 0xa2, 0x01, 0x5b, 0x03, 0x03,
+ 0x2c, 0x93, 0xcc, 0x82, 0xb9, 0x01, 0x5b, 0x51, 0xcd, 0x7c, 0xa8, 0x01,
+ 0x5c, 0x20, 0x45, 0x00, 0x8c, 0xc3, 0x2c, 0x97, 0xc8, 0xae, 0xbc, 0x01,
+ 0x59, 0xb0, 0x45, 0x03, 0x14, 0xc3, 0x2c, 0xa7, 0xc5, 0x01, 0x74, 0x01,
+ 0x0c, 0xd0, 0xd4, 0x2d, 0x64, 0x01, 0x0f, 0xd1, 0xc9, 0xb3, 0xf8, 0x01,
+ 0x59, 0xc0, 0xc3, 0x7e, 0x79, 0x01, 0x0d, 0x59, 0xd7, 0x22, 0x5c, 0x0f,
+ 0xc0, 0x40, 0xc3, 0x14, 0xa7, 0x01, 0x0d, 0x13, 0x03, 0x2c, 0xb3, 0x43,
+ 0x00, 0x7e, 0x43, 0x2c, 0xb9, 0xc2, 0x00, 0xb1, 0x01, 0x0f, 0x23, 0x03,
+ 0x2c, 0xc5, 0xcc, 0x56, 0x78, 0x01, 0x48, 0xe8, 0xc6, 0x0e, 0xa4, 0x01,
+ 0x4b, 0xd1, 0xc9, 0x00, 0xca, 0x01, 0x4b, 0xb9, 0x9a, 0x01, 0x59, 0xf0,
+ 0xce, 0x33, 0x92, 0x01, 0x4b, 0x99, 0xd6, 0x2f, 0x5c, 0x01, 0x4a, 0x19,
+ 0x48, 0x61, 0xd4, 0xc3, 0x2c, 0xcb, 0xcf, 0x6a, 0x8f, 0x01, 0x5a, 0x50,
+ 0xe0, 0x06, 0xc7, 0x0f, 0xdd, 0xa8, 0x45, 0x00, 0x8c, 0xc3, 0x2c, 0xd7,
+ 0xc8, 0xae, 0xbc, 0x01, 0x48, 0x30, 0x44, 0x03, 0xc8, 0xc3, 0x2c, 0xe3,
+ 0x42, 0x02, 0xae, 0x43, 0x2c, 0xed, 0xc6, 0x00, 0x2b, 0x01, 0x54, 0x18,
+ 0xc3, 0xe5, 0xea, 0x08, 0x3a, 0x71, 0xc3, 0x52, 0x99, 0x08, 0x3a, 0x69,
+ 0xc3, 0xdf, 0xaf, 0x08, 0x3a, 0x79, 0xc7, 0xc0, 0xc8, 0x08, 0x3a, 0x81,
+ 0xc5, 0xd6, 0x5f, 0x08, 0x3a, 0x89, 0xc4, 0xe2, 0x8b, 0x08, 0x3a, 0x91,
+ 0xc4, 0xe1, 0xd3, 0x08, 0x3a, 0x98, 0x26, 0xc3, 0x2c, 0xf7, 0xc3, 0xb6,
+ 0x4a, 0x08, 0x3a, 0x39, 0xc3, 0xd8, 0x0d, 0x08, 0x3a, 0x31, 0xc3, 0xd3,
+ 0xaf, 0x08, 0x3a, 0x29, 0xc3, 0xe2, 0x7b, 0x08, 0x3a, 0x21, 0xc3, 0xe6,
+ 0x32, 0x08, 0x3a, 0x19, 0xc3, 0xe6, 0x65, 0x08, 0x3a, 0x11, 0xc3, 0xe1,
+ 0x37, 0x08, 0x3a, 0x09, 0xc3, 0xc7, 0x9e, 0x08, 0x3a, 0x00, 0x9e, 0x08,
+ 0x39, 0x99, 0x9f, 0x08, 0x39, 0xa1, 0xa0, 0x08, 0x39, 0xa9, 0xa1, 0x08,
+ 0x39, 0xb1, 0x9d, 0x08, 0x39, 0x90, 0x9d, 0x08, 0x38, 0x19, 0x9e, 0x08,
+ 0x38, 0x21, 0x9f, 0x08, 0x38, 0x29, 0xa0, 0x08, 0x38, 0x31, 0xa1, 0x08,
+ 0x38, 0x39, 0xa3, 0x08, 0x38, 0x41, 0xa5, 0x08, 0x38, 0x49, 0xa6, 0x08,
+ 0x38, 0x50, 0x9d, 0x08, 0x38, 0x59, 0x9e, 0x08, 0x38, 0x61, 0x9f, 0x08,
+ 0x38, 0x69, 0xa0, 0x08, 0x38, 0x71, 0xa1, 0x08, 0x38, 0x79, 0xa2, 0x08,
+ 0x38, 0x81, 0xa3, 0x08, 0x38, 0x89, 0xa4, 0x08, 0x38, 0x91, 0xa5, 0x08,
+ 0x38, 0x99, 0xa6, 0x08, 0x38, 0xa0, 0x9d, 0x08, 0x38, 0xa9, 0x9e, 0x08,
+ 0x38, 0xb1, 0x9f, 0x08, 0x38, 0xb9, 0xa0, 0x08, 0x38, 0xc1, 0xa1, 0x08,
+ 0x38, 0xc9, 0xa3, 0x08, 0x38, 0xd1, 0xa4, 0x08, 0x38, 0xd9, 0xa5, 0x08,
+ 0x38, 0xe1, 0xa6, 0x08, 0x38, 0xe8, 0xa1, 0x08, 0x38, 0xf1, 0xa4, 0x08,
+ 0x38, 0xf9, 0xa5, 0x08, 0x39, 0x00, 0x9d, 0x08, 0x39, 0x09, 0x9f, 0x08,
+ 0x39, 0x11, 0xa0, 0x08, 0x39, 0x19, 0xa1, 0x08, 0x39, 0x21, 0xa2, 0x08,
+ 0x39, 0x29, 0xa3, 0x08, 0x39, 0x31, 0xa5, 0x08, 0x39, 0x39, 0xa6, 0x08,
+ 0x39, 0x40, 0xa0, 0x08, 0x39, 0x59, 0xa1, 0x08, 0x39, 0x61, 0xa2, 0x08,
+ 0x39, 0x69, 0xa3, 0x08, 0x39, 0x71, 0xa4, 0x08, 0x39, 0x79, 0xa5, 0x08,
+ 0x39, 0x81, 0x9e, 0x08, 0x39, 0x49, 0x9f, 0x08, 0x39, 0x51, 0xa6, 0x08,
+ 0x39, 0x88, 0x1d, 0xc3, 0x2d, 0x01, 0x1e, 0xc3, 0x2d, 0x25, 0x1f, 0xc3,
+ 0x2d, 0x39, 0x20, 0xc3, 0x2d, 0x66, 0x21, 0xc3, 0x2d, 0x7e, 0x22, 0xc3,
+ 0x2d, 0x9e, 0x23, 0xc3, 0x2d, 0xc2, 0x24, 0xc3, 0x2d, 0xda, 0x25, 0x43,
+ 0x2d, 0xf6, 0xc2, 0x8c, 0x53, 0x08, 0x32, 0x41, 0x1f, 0xc3, 0x2e, 0x0e,
+ 0x42, 0xd5, 0xf8, 0xc3, 0x2e, 0x1a, 0xc2, 0xe6, 0x8a, 0x08, 0x32, 0x81,
+ 0xc2, 0xe6, 0x7f, 0x08, 0x32, 0x89, 0x25, 0xc3, 0x2e, 0x22, 0xc2, 0xe6,
+ 0x86, 0x08, 0x32, 0xa0, 0x9e, 0x08, 0x32, 0xa9, 0x9f, 0x08, 0x32, 0xb1,
+ 0xa0, 0x08, 0x32, 0xb9, 0xa1, 0x08, 0x32, 0xc1, 0xa2, 0x08, 0x32, 0xc9,
+ 0xa3, 0x08, 0x32, 0xd1, 0xa4, 0x08, 0x32, 0xd9, 0xa5, 0x08, 0x32, 0xe1,
+ 0x26, 0x43, 0x2e, 0x2a, 0x9d, 0x08, 0x33, 0x01, 0x9e, 0x08, 0x33, 0x09,
+ 0x9f, 0x08, 0x33, 0x11, 0x20, 0xc3, 0x2e, 0x36, 0xa1, 0x08, 0x33, 0x31,
+ 0xa2, 0x08, 0x33, 0x39, 0xa3, 0x08, 0x33, 0x41, 0xa4, 0x08, 0x33, 0x49,
+ 0xa5, 0x08, 0x33, 0x51, 0xa6, 0x08, 0x33, 0x58, 0x9d, 0x08, 0x33, 0x61,
+ 0x9e, 0x08, 0x33, 0x69, 0x9f, 0x08, 0x33, 0x71, 0xa0, 0x08, 0x33, 0x79,
+ 0xa1, 0x08, 0x33, 0x81, 0xa2, 0x08, 0x33, 0x89, 0xa3, 0x08, 0x33, 0x91,
+ 0xa4, 0x08, 0x33, 0x99, 0xa5, 0x08, 0x33, 0xa1, 0xa6, 0x08, 0x33, 0xa8,
+ 0x9d, 0x08, 0x33, 0xb1, 0x9e, 0x08, 0x33, 0xb9, 0x9f, 0x08, 0x33, 0xc1,
+ 0xa0, 0x08, 0x33, 0xc9, 0xa1, 0x08, 0x33, 0xd1, 0xa2, 0x08, 0x33, 0xd9,
+ 0xa3, 0x08, 0x33, 0xe1, 0xa4, 0x08, 0x33, 0xe9, 0xa5, 0x08, 0x33, 0xf1,
+ 0xa6, 0x08, 0x33, 0xf8, 0x9d, 0x08, 0x34, 0x01, 0x9e, 0x08, 0x34, 0x09,
+ 0x9f, 0x08, 0x34, 0x11, 0xa0, 0x08, 0x34, 0x19, 0xa1, 0x08, 0x34, 0x21,
+ 0xa2, 0x08, 0x34, 0x29, 0xa3, 0x08, 0x34, 0x31, 0xa4, 0x08, 0x34, 0x39,
+ 0xa5, 0x08, 0x34, 0x41, 0xa6, 0x08, 0x34, 0x48, 0x9d, 0x08, 0x34, 0x51,
+ 0x9e, 0x08, 0x34, 0x59, 0x9f, 0x08, 0x34, 0x61, 0xa0, 0x08, 0x34, 0x69,
+ 0xa3, 0x08, 0x34, 0x81, 0xa4, 0x08, 0x34, 0x89, 0xa5, 0x08, 0x34, 0x91,
+ 0xa6, 0x08, 0x34, 0x99, 0xa1, 0x08, 0x34, 0x71, 0xa2, 0x08, 0x34, 0x78,
+ 0x9d, 0x08, 0x34, 0xa1, 0x9e, 0x08, 0x34, 0xa9, 0x9f, 0x08, 0x34, 0xb1,
+ 0xa0, 0x08, 0x34, 0xb9, 0xa1, 0x08, 0x34, 0xc1, 0xa2, 0x08, 0x34, 0xc9,
+ 0xa3, 0x08, 0x34, 0xd1, 0xa4, 0x08, 0x34, 0xd9, 0xa5, 0x08, 0x34, 0xe1,
+ 0xa6, 0x08, 0x34, 0xe8, 0x9d, 0x08, 0x34, 0xf1, 0x9e, 0x08, 0x34, 0xf8,
+ 0xc5, 0xdc, 0xb8, 0x08, 0x35, 0x01, 0xc5, 0xd5, 0x15, 0x08, 0x35, 0x09,
+ 0xc5, 0xd4, 0x1b, 0x08, 0x35, 0x11, 0xc5, 0xd8, 0x58, 0x08, 0x35, 0x19,
+ 0xc5, 0xd6, 0xd2, 0x08, 0x35, 0x21, 0xc5, 0xd6, 0xeb, 0x08, 0x35, 0x29,
+ 0xc5, 0xd7, 0x77, 0x08, 0x35, 0x31, 0xc5, 0xd5, 0x74, 0x08, 0x35, 0x39,
+ 0xc5, 0xdd, 0x9e, 0x08, 0x35, 0x41, 0xc5, 0xd9, 0xbb, 0x08, 0x35, 0x48,
+ 0xc5, 0xdc, 0xb8, 0x08, 0x35, 0x51, 0xc5, 0xd5, 0x15, 0x08, 0x35, 0x59,
+ 0xc5, 0xd4, 0x1b, 0x08, 0x35, 0x61, 0xc5, 0xd8, 0x58, 0x08, 0x35, 0x69,
+ 0xc5, 0xd6, 0xd2, 0x08, 0x35, 0x71, 0xc5, 0xd6, 0xeb, 0x08, 0x35, 0x79,
+ 0xc5, 0xd7, 0x77, 0x08, 0x35, 0x81, 0xc5, 0xd5, 0x74, 0x08, 0x35, 0x89,
+ 0xc5, 0xdd, 0x9e, 0x08, 0x35, 0x90, 0x9e, 0x08, 0x35, 0x99, 0x9f, 0x08,
+ 0x35, 0xa1, 0xa0, 0x08, 0x35, 0xa9, 0xa1, 0x08, 0x35, 0xb1, 0xa2, 0x08,
+ 0x35, 0xb9, 0xa3, 0x08, 0x35, 0xc1, 0xa5, 0x08, 0x35, 0xc9, 0xa6, 0x08,
+ 0x35, 0xd0, 0x9d, 0x08, 0x35, 0xd9, 0x9e, 0x08, 0x35, 0xe1, 0x9f, 0x08,
+ 0x35, 0xe9, 0xa0, 0x08, 0x35, 0xf1, 0xa2, 0x08, 0x35, 0xf9, 0xa3, 0x08,
+ 0x36, 0x00, 0x9d, 0x08, 0x36, 0x09, 0x9e, 0x08, 0x36, 0x11, 0xa0, 0x08,
+ 0x36, 0x19, 0xa1, 0x08, 0x36, 0x21, 0xa2, 0x08, 0x36, 0x29, 0xa3, 0x08,
+ 0x36, 0x31, 0xa4, 0x08, 0x36, 0x39, 0xa5, 0x08, 0x36, 0x41, 0xa6, 0x08,
+ 0x36, 0x48, 0x9d, 0x08, 0x36, 0x51, 0x9e, 0x08, 0x36, 0x59, 0x9f, 0x08,
+ 0x36, 0x61, 0xa1, 0x08, 0x36, 0x69, 0xa2, 0x08, 0x36, 0x71, 0xa3, 0x08,
+ 0x36, 0x79, 0xa4, 0x08, 0x36, 0x81, 0xa5, 0x08, 0x36, 0x89, 0xa6, 0x08,
+ 0x36, 0x90, 0x9d, 0x08, 0x36, 0x99, 0x9e, 0x08, 0x36, 0xa1, 0x9f, 0x08,
+ 0x36, 0xa9, 0xa2, 0x08, 0x36, 0xb1, 0xa4, 0x08, 0x36, 0xb9, 0xa5, 0x08,
+ 0x36, 0xc1, 0xa6, 0x08, 0x36, 0xc8, 0x9d, 0x08, 0x36, 0xd1, 0x9e, 0x08,
+ 0x36, 0xd9, 0x9f, 0x08, 0x36, 0xe1, 0xa0, 0x08, 0x36, 0xe9, 0xa1, 0x08,
+ 0x36, 0xf1, 0xa2, 0x08, 0x36, 0xf9, 0xa3, 0x08, 0x37, 0x01, 0xa4, 0x08,
+ 0x37, 0x09, 0xa6, 0x08, 0x37, 0x10, 0xa0, 0x08, 0x37, 0x19, 0xa1, 0x08,
+ 0x37, 0x21, 0xa2, 0x08, 0x37, 0x29, 0xa3, 0x08, 0x37, 0x31, 0xa5, 0x08,
+ 0x37, 0x39, 0xa6, 0x08, 0x37, 0x40, 0x9d, 0x08, 0x37, 0x49, 0x9e, 0x08,
+ 0x37, 0x51, 0x9f, 0x08, 0x37, 0x59, 0xa0, 0x08, 0x37, 0x61, 0xa1, 0x08,
+ 0x37, 0x69, 0xa2, 0x08, 0x37, 0x71, 0xa3, 0x08, 0x37, 0x79, 0xa4, 0x08,
+ 0x37, 0x81, 0xa5, 0x08, 0x37, 0x89, 0xa6, 0x08, 0x37, 0x90, 0x9d, 0x08,
+ 0x37, 0x99, 0x9e, 0x08, 0x37, 0xa1, 0x9f, 0x08, 0x37, 0xa9, 0xa0, 0x08,
+ 0x37, 0xb1, 0xa1, 0x08, 0x37, 0xb9, 0xa2, 0x08, 0x37, 0xc1, 0xa3, 0x08,
+ 0x37, 0xc9, 0xa4, 0x08, 0x37, 0xd1, 0xa5, 0x08, 0x37, 0xd9, 0xa6, 0x08,
+ 0x37, 0xe0, 0x9e, 0x08, 0x37, 0xe9, 0x9f, 0x08, 0x37, 0xf1, 0xa1, 0x08,
+ 0x37, 0xf9, 0xa2, 0x08, 0x38, 0x01, 0xa3, 0x08, 0x38, 0x09, 0xa5, 0x08,
+ 0x38, 0x10, 0x1d, 0xc3, 0x2e, 0x42, 0x1e, 0xc3, 0x2e, 0x78, 0x22, 0xc3,
+ 0x2e, 0xa8, 0x21, 0xc3, 0x2e, 0xde, 0x23, 0xc3, 0x2f, 0x0e, 0x25, 0xc3,
+ 0x2f, 0x3e, 0x24, 0xc3, 0x2f, 0x56, 0x1f, 0xc3, 0x2f, 0x8c, 0x20, 0xc3,
+ 0x2f, 0xc2, 0x26, 0x43, 0x2f, 0xf2, 0x1e, 0xc3, 0x2f, 0xfe, 0xc2, 0xe1,
+ 0x2e, 0x08, 0x02, 0x91, 0xc2, 0x00, 0x20, 0x08, 0x02, 0x99, 0x21, 0xc3,
+ 0x30, 0x06, 0xc2, 0x00, 0x22, 0x08, 0x02, 0xb1, 0x23, 0xc3, 0x30, 0x0e,
+ 0xc2, 0x3c, 0xc8, 0x08, 0x02, 0xc9, 0x25, 0x43, 0x30, 0x16, 0x1e, 0xc3,
+ 0x30, 0x26, 0x1f, 0x43, 0x30, 0x4a, 0xc3, 0xe5, 0xba, 0x08, 0x06, 0xf1,
+ 0x1f, 0xc3, 0x30, 0x5a, 0xc3, 0xe6, 0x4a, 0x08, 0x07, 0xd0, 0x1f, 0xc3,
+ 0x30, 0x6c, 0x20, 0xc3, 0x30, 0x78, 0xc8, 0xbe, 0x92, 0x08, 0x05, 0x20,
+ 0x46, 0x00, 0x8b, 0xc3, 0x30, 0x84, 0x05, 0xc3, 0x30, 0xb3, 0x0b, 0xc3,
+ 0x30, 0xc2, 0x03, 0xc3, 0x30, 0xce, 0xc8, 0xbf, 0x12, 0x05, 0x5a, 0x29,
+ 0xd1, 0x52, 0x66, 0x00, 0x14, 0x29, 0xc6, 0xa2, 0xbb, 0x00, 0x06, 0xf8,
+ 0x46, 0x00, 0x8b, 0xc3, 0x30, 0xda, 0xc2, 0x00, 0x0a, 0x05, 0x5a, 0x9b,
+ 0x03, 0x31, 0x08, 0x46, 0x17, 0x8d, 0xc3, 0x31, 0x0e, 0xc8, 0xba, 0x4a,
+ 0x05, 0x39, 0x6b, 0x03, 0x31, 0x1e, 0xc2, 0x00, 0x45, 0x05, 0x3b, 0x78,
+ 0xcb, 0x8d, 0x37, 0x00, 0x15, 0x3b, 0x03, 0x31, 0x24, 0x17, 0xc3, 0x31,
+ 0x2a, 0x46, 0x00, 0x8b, 0xc3, 0x31, 0x34, 0x0a, 0xc3, 0x31, 0x63, 0x11,
+ 0xc3, 0x31, 0x72, 0xc9, 0xab, 0x40, 0x00, 0x15, 0x33, 0x03, 0x31, 0x7e,
+ 0xd3, 0x45, 0x14, 0x00, 0x15, 0x41, 0x9c, 0x05, 0x39, 0x49, 0xc7, 0xc3,
+ 0xa0, 0x05, 0x39, 0x59, 0xcb, 0x98, 0x8f, 0x01, 0x63, 0xb8, 0x46, 0x00,
+ 0x8b, 0xc3, 0x31, 0x84, 0x44, 0x05, 0x76, 0xc3, 0x31, 0xda, 0x91, 0x05,
+ 0x3a, 0x79, 0xc4, 0x6d, 0xb5, 0x05, 0x3d, 0xb9, 0xcb, 0x8e, 0xc3, 0x05,
+ 0x3e, 0x09, 0x8b, 0x00, 0x0d, 0x19, 0x97, 0x00, 0x11, 0x18, 0x46, 0x00,
+ 0x8b, 0xc3, 0x31, 0xe8, 0x42, 0x01, 0xbb, 0xc3, 0x32, 0x32, 0x10, 0xc3,
+ 0x32, 0x3f, 0x95, 0x05, 0x3b, 0x68, 0x07, 0xc3, 0x32, 0x4b, 0x46, 0x00,
+ 0x8b, 0xc3, 0x32, 0x5a, 0x9c, 0x00, 0x0f, 0x9b, 0x03, 0x32, 0x87, 0x11,
+ 0xc3, 0x32, 0x8b, 0xc2, 0x01, 0xdf, 0x05, 0x3b, 0x89, 0xc9, 0xb2, 0x09,
+ 0x00, 0x11, 0xc0, 0xc2, 0x25, 0xa1, 0x00, 0x14, 0x93, 0x03, 0x32, 0x97,
+ 0xc2, 0x00, 0x75, 0x00, 0x0a, 0x5b, 0x03, 0x32, 0x9b, 0xc2, 0x01, 0xe2,
+ 0x00, 0x14, 0x1b, 0x03, 0x32, 0xa1, 0x46, 0x00, 0x8b, 0xc3, 0x32, 0xa7,
+ 0x4e, 0x73, 0x36, 0xc3, 0x32, 0xfd, 0x96, 0x05, 0x3b, 0x5a, 0x03, 0x33,
+ 0x09, 0x00, 0xc3, 0x33, 0x0d, 0x48, 0x10, 0x2f, 0xc3, 0x33, 0x19, 0xc8,
+ 0xb7, 0xda, 0x00, 0x13, 0x21, 0xc2, 0x01, 0xdf, 0x05, 0x3b, 0xaa, 0x03,
+ 0x33, 0x46, 0x46, 0x00, 0x8b, 0xc3, 0x33, 0x4c, 0x07, 0xc3, 0x33, 0x93,
+ 0xc5, 0xb8, 0xe3, 0x00, 0x0b, 0xfb, 0x03, 0x33, 0xa2, 0xc9, 0xab, 0x40,
+ 0x00, 0x15, 0x51, 0xc9, 0xa8, 0x67, 0x00, 0x15, 0x59, 0xc2, 0x01, 0xdf,
+ 0x05, 0x3b, 0x91, 0xd1, 0x4f, 0x47, 0x00, 0x0c, 0xd9, 0x8c, 0x00, 0x0e,
+ 0x48, 0xcb, 0x92, 0x5f, 0x00, 0x15, 0x4b, 0x03, 0x33, 0xa8, 0x46, 0x00,
+ 0x8b, 0x43, 0x33, 0xae, 0x46, 0x00, 0x8b, 0xc3, 0x33, 0xcc, 0xc3, 0x3c,
+ 0x63, 0x00, 0x10, 0xe8, 0x45, 0x04, 0xcc, 0xc3, 0x34, 0x07, 0x46, 0x00,
+ 0x8b, 0xc3, 0x34, 0x13, 0xc2, 0x01, 0xdf, 0x05, 0x3b, 0x98, 0x00, 0xc3,
+ 0x34, 0x37, 0xc6, 0x10, 0x3f, 0x00, 0x14, 0x53, 0x03, 0x34, 0x46, 0x87,
+ 0x00, 0xeb, 0x59, 0x91, 0x05, 0x5b, 0x19, 0x8b, 0x05, 0x5a, 0x81, 0x8f,
+ 0x05, 0x3b, 0xc0, 0x00, 0xc3, 0x34, 0x4c, 0xc4, 0xde, 0x3f, 0x00, 0x12,
+ 0x8b, 0x03, 0x34, 0x58, 0x87, 0x00, 0x07, 0x33, 0x03, 0x34, 0x5e, 0x83,
+ 0x05, 0x39, 0x99, 0x91, 0x05, 0x39, 0xa9, 0x97, 0x05, 0x39, 0xb9, 0x98,
+ 0x05, 0x39, 0xcb, 0x03, 0x34, 0x64, 0x9b, 0x05, 0x39, 0xe9, 0xca, 0xa4,
+ 0x72, 0x05, 0x3e, 0x18, 0x46, 0x00, 0x8b, 0x43, 0x34, 0x6a, 0x46, 0x00,
+ 0x8b, 0xc3, 0x34, 0x8c, 0xc3, 0x0a, 0xe3, 0x05, 0x39, 0x3b, 0x03, 0x34,
+ 0xb2, 0x98, 0x00, 0x0c, 0xa9, 0xc5, 0xd3, 0x2c, 0x01, 0x63, 0xb0, 0x46,
+ 0x00, 0x8b, 0x43, 0x34, 0xb8, 0x46, 0x00, 0x8b, 0x43, 0x34, 0xe8, 0x46,
+ 0x00, 0x8b, 0xc3, 0x34, 0xf8, 0x9b, 0x05, 0x3b, 0x09, 0xcb, 0x91, 0x15,
+ 0x05, 0x3b, 0x19, 0xc3, 0x02, 0x39, 0x05, 0x3b, 0x49, 0x47, 0xc8, 0xcb,
+ 0x43, 0x35, 0x1a, 0x46, 0x00, 0x8b, 0xc3, 0x35, 0x2c, 0xc2, 0x00, 0x0a,
+ 0x00, 0x13, 0xc0, 0x00, 0xc3, 0x35, 0x54, 0xc2, 0x01, 0xdf, 0x05, 0x3b,
+ 0xa1, 0x8c, 0x00, 0x0e, 0x60, 0x46, 0x00, 0x8b, 0xc3, 0x35, 0x60, 0xc2,
+ 0x00, 0x39, 0x00, 0x09, 0xc0, 0x46, 0x00, 0x8b, 0xc3, 0x35, 0x8f, 0x47,
+ 0x23, 0x34, 0xc3, 0x35, 0xc3, 0xc4, 0x38, 0x2c, 0x00, 0x13, 0x19, 0xc2,
+ 0x00, 0xd0, 0x00, 0x0d, 0x18, 0x46, 0x00, 0x8b, 0xc3, 0x35, 0xd5, 0xcc,
+ 0x8b, 0x95, 0x00, 0xe8, 0xb9, 0x03, 0xc3, 0x36, 0x05, 0x4b, 0x8d, 0x58,
+ 0xc3, 0x36, 0x11, 0xc7, 0xc9, 0xb9, 0x05, 0x3a, 0x39, 0xc3, 0x04, 0x87,
+ 0x05, 0x3d, 0xa8, 0x46, 0x00, 0x8b, 0x43, 0x36, 0x1c, 0x46, 0x00, 0x8b,
+ 0xc3, 0x36, 0x26, 0xc9, 0xae, 0xf1, 0x00, 0x11, 0xc8, 0x88, 0x07, 0xd8,
+ 0x03, 0x03, 0x36, 0x3b, 0x8e, 0x07, 0xd8, 0x11, 0x8b, 0x07, 0xd8, 0x08,
+ 0x8d, 0x0e, 0xf8, 0x81, 0x89, 0x0e, 0xf8, 0x11, 0x94, 0x00, 0xe8, 0xd1,
+ 0x8f, 0x05, 0x3f, 0xd1, 0x87, 0x01, 0x63, 0xd8, 0xc4, 0xa8, 0x1a, 0x0e,
+ 0xf8, 0x21, 0xc6, 0x01, 0x73, 0x00, 0xe8, 0x60, 0x94, 0x00, 0xe8, 0xc9,
+ 0x90, 0x00, 0xe8, 0x70, 0xc4, 0xb0, 0x8b, 0x00, 0xf7, 0xf1, 0xc5, 0x1e,
+ 0xc8, 0x00, 0xf7, 0xc1, 0xc4, 0x01, 0x23, 0x00, 0x0d, 0x9b, 0x03, 0x36,
+ 0x43, 0x06, 0xc3, 0x36, 0x49, 0xc5, 0x1f, 0x0c, 0x00, 0xf7, 0x91, 0xc5,
+ 0x31, 0xee, 0x00, 0x06, 0xe9, 0xca, 0x08, 0xf6, 0x00, 0x0b, 0xb1, 0xc6,
+ 0x60, 0xb1, 0x00, 0x11, 0x91, 0xc6, 0x01, 0x73, 0x00, 0x12, 0x70, 0x47,
+ 0xc0, 0x2e, 0xc3, 0x36, 0x55, 0xc8, 0xba, 0x02, 0x05, 0x3e, 0xb0, 0x44,
+ 0x05, 0x18, 0xc3, 0x36, 0x5f, 0xc5, 0x31, 0xee, 0x00, 0xf1, 0xf1, 0xc4,
+ 0x01, 0x23, 0x01, 0x63, 0x70, 0x45, 0x00, 0x8c, 0xc3, 0x36, 0x6b, 0xc3,
+ 0x01, 0x5d, 0x00, 0x12, 0x20, 0x42, 0x01, 0x23, 0xc3, 0x36, 0xb5, 0x05,
+ 0xc3, 0x36, 0xc4, 0x06, 0xc3, 0x36, 0xd3, 0x0f, 0xc3, 0x36, 0xe0, 0xc5,
+ 0x1e, 0xc8, 0x00, 0x06, 0xab, 0x03, 0x36, 0xef, 0xc6, 0x01, 0x73, 0x00,
+ 0x06, 0xc3, 0x03, 0x36, 0xf5, 0xc5, 0x1f, 0x0c, 0x00, 0x06, 0x91, 0xc5,
+ 0x31, 0xee, 0x00, 0x06, 0x99, 0x42, 0x01, 0xc8, 0xc3, 0x36, 0xfb, 0xc5,
+ 0x1d, 0x88, 0x00, 0x0a, 0x71, 0xc6, 0xcc, 0x8f, 0x00, 0x0f, 0x53, 0x03,
+ 0x37, 0x07, 0xce, 0x1d, 0x93, 0x00, 0x10, 0x70, 0x91, 0x00, 0x0c, 0x31,
+ 0x87, 0x00, 0x0c, 0x80, 0x06, 0xc3, 0x37, 0x0d, 0xca, 0x9e, 0x5a, 0x00,
+ 0xf6, 0x41, 0xc5, 0x1e, 0xc8, 0x00, 0x09, 0x43, 0x03, 0x37, 0x1a, 0xc5,
+ 0x1f, 0x0c, 0x00, 0x06, 0x61, 0xc5, 0x31, 0xee, 0x00, 0x06, 0x69, 0x05,
+ 0xc3, 0x37, 0x20, 0xc6, 0x60, 0xb1, 0x00, 0x09, 0x51, 0xc5, 0x1d, 0x88,
+ 0x00, 0x09, 0x61, 0xc6, 0xcc, 0x8f, 0x00, 0x09, 0x71, 0xc6, 0x01, 0x73,
+ 0x00, 0x0c, 0xb9, 0xce, 0x1d, 0x93, 0x00, 0x10, 0x50, 0x88, 0x05, 0x3b,
+ 0xd9, 0x89, 0x05, 0x3b, 0xe9, 0x94, 0x05, 0x3c, 0x11, 0x95, 0x05, 0x3c,
+ 0x21, 0x96, 0x05, 0x3c, 0x31, 0x86, 0x05, 0x3b, 0xc8, 0x05, 0xc3, 0x37,
+ 0x2c, 0xc5, 0x1e, 0xc8, 0x00, 0xf5, 0xe3, 0x03, 0x37, 0x44, 0xca, 0x9e,
+ 0x5a, 0x00, 0xf5, 0xd1, 0x06, 0xc3, 0x37, 0x4a, 0xc6, 0x60, 0xb1, 0x00,
+ 0x08, 0x93, 0x03, 0x37, 0x54, 0xc5, 0x1f, 0x0c, 0x00, 0x06, 0x41, 0xc5,
+ 0x31, 0xee, 0x00, 0x06, 0x49, 0xc5, 0x1d, 0x88, 0x00, 0x08, 0xa1, 0xc6,
+ 0xcc, 0x8f, 0x00, 0x08, 0xc1, 0xce, 0x1d, 0x93, 0x00, 0x10, 0x31, 0xc6,
+ 0x01, 0x73, 0x00, 0x12, 0x30, 0xc3, 0x00, 0x49, 0x05, 0x39, 0x11, 0xc2,
+ 0x00, 0x74, 0x05, 0x39, 0x20, 0x8a, 0x00, 0x06, 0x80, 0x00, 0x43, 0x37,
+ 0x5a, 0xc5, 0x1d, 0x88, 0x00, 0x08, 0x13, 0x03, 0x37, 0x66, 0x05, 0xc3,
+ 0x37, 0x6c, 0xca, 0x9e, 0x5a, 0x00, 0xf5, 0x11, 0x06, 0xc3, 0x37, 0x7b,
+ 0x45, 0x00, 0x9d, 0xc3, 0x37, 0x88, 0xce, 0x1d, 0x93, 0x00, 0x10, 0x11,
+ 0xc5, 0x1f, 0x0c, 0x00, 0x06, 0x01, 0xc5, 0x31, 0xee, 0x00, 0x06, 0x09,
+ 0xc5, 0x1e, 0xc8, 0x00, 0x06, 0x19, 0xc6, 0x60, 0xb1, 0x00, 0x08, 0x01,
+ 0xc6, 0xcc, 0x8f, 0x00, 0x08, 0x21, 0xc6, 0x01, 0x73, 0x00, 0x11, 0xd0,
+ 0x46, 0x00, 0x8b, 0x43, 0x37, 0x97, 0xd4, 0x3e, 0x6c, 0x05, 0x39, 0xd0,
+ 0x44, 0x05, 0x18, 0xc3, 0x37, 0xa3, 0x05, 0xc3, 0x37, 0xb2, 0xc5, 0x31,
+ 0xee, 0x00, 0x0a, 0xd3, 0x03, 0x37, 0xcd, 0xce, 0x38, 0xe6, 0x05, 0x3d,
+ 0x41, 0xc4, 0x01, 0x23, 0x05, 0x3e, 0x29, 0x15, 0x43, 0x37, 0xd3, 0xc6,
+ 0xbb, 0x8c, 0x05, 0x3d, 0x61, 0xc3, 0x74, 0x83, 0x00, 0x0c, 0x78, 0xd0,
+ 0x5f, 0x12, 0x00, 0x12, 0x51, 0xc9, 0xb1, 0xca, 0x05, 0x3d, 0x70, 0xca,
+ 0x64, 0x13, 0x00, 0xf4, 0xa1, 0x06, 0xc3, 0x37, 0xdf, 0x05, 0xc3, 0x37,
+ 0xeb, 0xcc, 0x51, 0x28, 0x05, 0x3e, 0x31, 0xc5, 0x31, 0xee, 0x00, 0x0b,
+ 0xc9, 0x15, 0xc3, 0x37, 0xf7, 0xc4, 0x01, 0x23, 0x00, 0x11, 0x20, 0xc8,
+ 0x20, 0xa9, 0x00, 0xf4, 0x61, 0xc8, 0x16, 0x15, 0x00, 0xf4, 0x50, 0x06,
+ 0xc3, 0x38, 0x03, 0xc5, 0x31, 0xee, 0x00, 0xf4, 0x11, 0xc5, 0x1f, 0x0c,
+ 0x00, 0xf4, 0x01, 0xc4, 0x01, 0x23, 0x01, 0x63, 0x91, 0xca, 0x08, 0xf6,
+ 0x00, 0x0b, 0xa0, 0x06, 0xc3, 0x38, 0x0f, 0xc5, 0x1e, 0xc8, 0x00, 0xf3,
+ 0xe1, 0xc4, 0x01, 0x23, 0x00, 0x0d, 0x90, 0xc2, 0x10, 0x11, 0x05, 0x3c,
+ 0xd1, 0xc2, 0x49, 0x0c, 0x05, 0x3c, 0xe1, 0xc2, 0x0f, 0xe1, 0x05, 0x3c,
+ 0xf0, 0x05, 0xc3, 0x38, 0x1b, 0xca, 0x64, 0x13, 0x00, 0xf3, 0x71, 0x06,
+ 0xc3, 0x38, 0x33, 0xc6, 0x01, 0x73, 0x00, 0x0b, 0x31, 0xc4, 0x01, 0x23,
+ 0x00, 0x0d, 0x61, 0xce, 0x01, 0x19, 0x00, 0x0d, 0x70, 0xcc, 0x23, 0x3f,
+ 0x05, 0x3b, 0x22, 0x03, 0x38, 0x3f, 0xc9, 0x67, 0x20, 0x05, 0x3b, 0xf1,
+ 0x8e, 0x05, 0x3c, 0x01, 0x8a, 0x05, 0x3c, 0x69, 0x8d, 0x05, 0x3d, 0x81,
+ 0x96, 0x05, 0x3d, 0x89, 0x8f, 0x00, 0x0c, 0xe1, 0x98, 0x00, 0x12, 0x29,
+ 0x83, 0x01, 0x63, 0x7a, 0x03, 0x38, 0x45, 0xc3, 0x22, 0xcb, 0x00, 0x0c,
+ 0x21, 0xc3, 0x02, 0x9f, 0x00, 0x0d, 0x39, 0xc4, 0x0d, 0x13, 0x00, 0x0d,
+ 0xe0, 0x45, 0x00, 0x8c, 0xc3, 0x38, 0x4b, 0xc7, 0xa6, 0x69, 0x05, 0x3a,
+ 0xd0, 0xca, 0x9a, 0xe0, 0x05, 0x39, 0xf1, 0xc6, 0x21, 0xa3, 0x05, 0x3d,
+ 0x59, 0x87, 0x00, 0x0c, 0x71, 0xc6, 0xd3, 0x2b, 0x05, 0x3f, 0xa8, 0xc9,
+ 0x16, 0x14, 0x00, 0xf2, 0xb1, 0xc5, 0x31, 0xee, 0x00, 0xf2, 0xa1, 0x15,
+ 0xc3, 0x38, 0x79, 0xc4, 0x01, 0x23, 0x00, 0x0d, 0x21, 0xc8, 0xbe, 0x9a,
+ 0x05, 0x3a, 0x90, 0x05, 0xc3, 0x38, 0x88, 0x0e, 0xc3, 0x38, 0x9a, 0x06,
+ 0xc3, 0x38, 0xac, 0xc5, 0x1f, 0x0c, 0x00, 0x0f, 0xc1, 0xc5, 0x1e, 0xc8,
+ 0x00, 0x06, 0x89, 0xc5, 0x31, 0xee, 0x00, 0x0a, 0x19, 0xce, 0x38, 0xe6,
+ 0x05, 0x3d, 0x21, 0xce, 0x6e, 0x04, 0x00, 0x0e, 0x58, 0x05, 0xc3, 0x38,
+ 0xb8, 0xca, 0x64, 0x13, 0x00, 0xf1, 0xd1, 0x42, 0x00, 0x58, 0xc3, 0x38,
+ 0xca, 0xcb, 0x8f, 0xb5, 0x05, 0x3a, 0x41, 0xc5, 0x31, 0xee, 0x00, 0x09,
+ 0xc9, 0x47, 0x04, 0xcb, 0xc3, 0x38, 0xd9, 0x15, 0xc3, 0x38, 0xe5, 0x04,
+ 0x43, 0x38, 0xf1, 0xca, 0x64, 0x13, 0x00, 0xf1, 0xa1, 0x06, 0xc3, 0x38,
+ 0xfd, 0xc5, 0x31, 0xee, 0x00, 0xf1, 0x81, 0xc6, 0x01, 0x73, 0x05, 0x3a,
+ 0x03, 0x03, 0x39, 0x0f, 0x05, 0xc3, 0x39, 0x15, 0xce, 0x38, 0xe6, 0x05,
+ 0x3d, 0x11, 0xc4, 0x01, 0x23, 0x00, 0x0c, 0xc0, 0xcb, 0x97, 0x2f, 0x00,
+ 0xf1, 0x51, 0x05, 0xc3, 0x39, 0x21, 0x06, 0xc3, 0x39, 0x33, 0xc6, 0x01,
+ 0x73, 0x00, 0x09, 0x31, 0xc4, 0x01, 0x23, 0x05, 0x3d, 0x50, 0xc6, 0x60,
+ 0xb1, 0x00, 0xf1, 0x01, 0xc5, 0x31, 0xee, 0x00, 0x0f, 0xa1, 0x05, 0xc3,
+ 0x39, 0x45, 0xc5, 0x1d, 0x88, 0x00, 0x08, 0xf1, 0xc9, 0x16, 0x14, 0x00,
+ 0x09, 0x01, 0xce, 0x38, 0xe6, 0x05, 0x3d, 0x01, 0xc4, 0x01, 0x23, 0x00,
+ 0x0c, 0x99, 0xc6, 0x01, 0x73, 0x00, 0x0f, 0x20, 0x97, 0x05, 0x3d, 0xf1,
+ 0x8b, 0x05, 0x3d, 0xe1, 0x83, 0x05, 0x3d, 0xd1, 0xc4, 0x00, 0xf0, 0x00,
+ 0x12, 0x08, 0xc9, 0x16, 0x14, 0x00, 0xf0, 0xf1, 0xc6, 0x01, 0x73, 0x05,
+ 0x3c, 0xc1, 0xc4, 0x01, 0x23, 0x00, 0x0c, 0x88, 0x05, 0xc3, 0x39, 0x57,
+ 0xca, 0x64, 0x13, 0x00, 0xf0, 0x71, 0x44, 0x05, 0x18, 0xc3, 0x39, 0x69,
+ 0x15, 0xc3, 0x39, 0x75, 0xc4, 0x01, 0x23, 0x00, 0x0c, 0x51, 0xc6, 0xcf,
+ 0xcb, 0x00, 0x0c, 0x58, 0xcb, 0x8e, 0x60, 0x00, 0x0e, 0x20, 0x05, 0xc3,
+ 0x39, 0x8a, 0xc5, 0x31, 0xee, 0x00, 0x08, 0x31, 0xc9, 0x16, 0x14, 0x00,
+ 0x08, 0x51, 0xc3, 0x01, 0x5d, 0x05, 0x3c, 0x91, 0xcc, 0x51, 0x28, 0x05,
+ 0x3e, 0x21, 0xc4, 0x01, 0x23, 0x00, 0x0c, 0x39, 0xc6, 0x01, 0x73, 0x00,
+ 0x11, 0xd8, 0xcb, 0x8e, 0x3f, 0x05, 0x39, 0x70, 0xca, 0x64, 0x13, 0x00,
+ 0xf0, 0x31, 0x44, 0x05, 0x18, 0xc3, 0x39, 0x9f, 0xc8, 0xbe, 0x9a, 0x05,
+ 0x3c, 0xb1, 0xc4, 0x01, 0x23, 0x00, 0x0c, 0x09, 0xc6, 0xcf, 0xcb, 0x00,
+ 0x0c, 0x11, 0xc6, 0x01, 0x73, 0x00, 0x12, 0x18, 0x05, 0xc3, 0x39, 0xab,
+ 0xc6, 0x01, 0x73, 0x00, 0x12, 0x40, 0xd8, 0x25, 0xeb, 0x05, 0x3a, 0xb1,
+ 0xcf, 0x3e, 0xad, 0x05, 0x3a, 0xc0, 0x83, 0x00, 0x74, 0x89, 0xc2, 0x00,
+ 0xd0, 0x00, 0x74, 0x90, 0xc6, 0x04, 0xe1, 0x0f, 0xda, 0xb1, 0xcc, 0x04,
+ 0xcb, 0x0f, 0xdb, 0x28, 0xcc, 0x04, 0xcb, 0x0f, 0xdb, 0x21, 0xc5, 0x00,
+ 0x2c, 0x0f, 0xdb, 0x30, 0xc6, 0x04, 0xe1, 0x0f, 0xda, 0xd9, 0xcc, 0x04,
+ 0xcb, 0x0f, 0xdb, 0x00, 0xcc, 0x04, 0xcb, 0x0f, 0xda, 0xf9, 0xc5, 0x00,
+ 0x2c, 0x0f, 0xdb, 0x08, 0xcc, 0x07, 0xbb, 0x01, 0x0f, 0x69, 0xce, 0x0e,
+ 0xf1, 0x01, 0x0f, 0x60, 0x00, 0x43, 0x39, 0xb7, 0xd2, 0x05, 0xd4, 0x0f,
+ 0xc0, 0x09, 0xd5, 0x03, 0xd2, 0x0f, 0xc0, 0x88, 0xca, 0x03, 0x87, 0x01,
+ 0x0d, 0x89, 0xc9, 0x01, 0x88, 0x01, 0x0d, 0x80, 0x06, 0xc3, 0x39, 0xc9,
+ 0xdf, 0x0d, 0x3e, 0x01, 0x4b, 0x18, 0xc3, 0xe5, 0x8a, 0x0f, 0xb3, 0x39,
+ 0xc9, 0xb4, 0x91, 0x0f, 0xb2, 0xf8, 0xe0, 0x0a, 0x87, 0x01, 0x3a, 0xd8,
+ 0xe0, 0x0b, 0x27, 0x01, 0x3b, 0x00, 0xe0, 0x0b, 0x27, 0x01, 0x3a, 0xf8,
+ 0xdc, 0x12, 0xe1, 0x01, 0x3d, 0x31, 0xde, 0x0e, 0x14, 0x01, 0x3d, 0x28,
+ 0xe0, 0x0a, 0x87, 0x01, 0x3a, 0xe8, 0xd5, 0x03, 0xd2, 0x0f, 0xc0, 0xd1,
+ 0xdb, 0x17, 0x46, 0x0f, 0xc0, 0xf0, 0xc4, 0x01, 0xce, 0x0f, 0xc4, 0xf1,
+ 0xc5, 0x06, 0x67, 0x0f, 0xc4, 0xf8, 0xc6, 0x64, 0xa4, 0x07, 0xda, 0x4b,
+ 0x03, 0x39, 0xcf, 0x15, 0x43, 0x39, 0xd5, 0x46, 0x00, 0x8b, 0x43, 0x39,
+ 0xe1, 0xc9, 0x60, 0xf3, 0x07, 0xd9, 0x49, 0xc4, 0x40, 0x95, 0x07, 0xd9,
+ 0x00, 0xc8, 0x4c, 0xcc, 0x02, 0x6e, 0x69, 0xc3, 0x00, 0x28, 0x02, 0x6f,
+ 0x08, 0xc3, 0x0e, 0xa7, 0x00, 0x04, 0x41, 0xd2, 0x49, 0x55, 0x00, 0x04,
+ 0x48, 0x0d, 0xc3, 0x39, 0xf3, 0x15, 0xc3, 0x3a, 0x05, 0xc5, 0x79, 0xf2,
+ 0x05, 0x4b, 0x49, 0xc5, 0xda, 0xe7, 0x05, 0x4b, 0x41, 0xc6, 0xc0, 0x7c,
+ 0x05, 0x4b, 0x31, 0xc5, 0xd9, 0x61, 0x00, 0x88, 0xc1, 0xc5, 0x90, 0xe4,
+ 0x00, 0x88, 0xd1, 0xc5, 0xdb, 0xff, 0x05, 0x4b, 0x68, 0xcb, 0x90, 0xde,
+ 0x05, 0x4b, 0xe1, 0x16, 0xc3, 0x3a, 0x11, 0xc5, 0xdb, 0xff, 0x00, 0x88,
+ 0x6b, 0x03, 0x3a, 0x1d, 0xc4, 0xad, 0x2b, 0x00, 0x88, 0x53, 0x03, 0x3a,
+ 0x23, 0xc6, 0x8e, 0xde, 0x00, 0x88, 0x09, 0xc5, 0x79, 0xf2, 0x00, 0x88,
+ 0x41, 0xc5, 0xd9, 0x61, 0x00, 0x88, 0xa1, 0xc5, 0xd6, 0x8c, 0x00, 0x88,
+ 0xc9, 0xc5, 0xb7, 0x9d, 0x00, 0x8a, 0x39, 0xc5, 0x90, 0xe4, 0x00, 0x8a,
+ 0xc0, 0x02, 0x43, 0x3a, 0x29, 0x02, 0x43, 0x3a, 0x5d, 0x02, 0x43, 0x3a,
+ 0x69, 0xc5, 0x90, 0xe4, 0x05, 0x4b, 0xb9, 0xc5, 0xd6, 0x8c, 0x05, 0x4b,
+ 0xb1, 0xc6, 0x8e, 0xde, 0x00, 0x8a, 0x09, 0x16, 0xc3, 0x3a, 0x8b, 0xc5,
+ 0xda, 0xe7, 0x00, 0x8a, 0x19, 0x12, 0xc3, 0x3a, 0x97, 0xc4, 0xad, 0x2b,
+ 0x00, 0x8a, 0x29, 0x05, 0x43, 0x3a, 0xa9, 0xc4, 0xad, 0x2b, 0x05, 0x4b,
+ 0x89, 0xc6, 0xc0, 0x7c, 0x05, 0x4b, 0x81, 0xc6, 0x8e, 0xde, 0x05, 0x4b,
+ 0x79, 0xc5, 0x79, 0xf2, 0x00, 0x88, 0xe0, 0x02, 0x43, 0x3a, 0xb5, 0xc7,
+ 0xc0, 0x7b, 0x00, 0x8a, 0xd0, 0xc5, 0xd6, 0x8c, 0x00, 0x88, 0xd9, 0xc5,
+ 0xda, 0xe7, 0x00, 0x88, 0xe9, 0x12, 0xc3, 0x3a, 0xd9, 0xca, 0xa7, 0x2e,
+ 0x00, 0x89, 0x60, 0xc6, 0x8e, 0xde, 0x00, 0x88, 0x99, 0xc6, 0xc0, 0x7c,
+ 0x00, 0x88, 0xa9, 0xc5, 0x79, 0xf2, 0x00, 0x88, 0xb1, 0xc4, 0xad, 0x2b,
+ 0x00, 0x8a, 0xd9, 0xc5, 0xdb, 0xff, 0x00, 0x8a, 0xe1, 0xc5, 0x90, 0xe4,
+ 0x00, 0x8a, 0xe8, 0xc6, 0xd1, 0x03, 0x00, 0x8a, 0x68, 0xc4, 0xc6, 0x7b,
+ 0x00, 0x88, 0x73, 0x03, 0x3a, 0xe5, 0x45, 0xd5, 0x1f, 0x43, 0x3a, 0xe9,
+ 0x15, 0xc3, 0x3a, 0xf1, 0x05, 0x43, 0x3a, 0xfd, 0x87, 0x00, 0x8b, 0x11,
+ 0x02, 0xc3, 0x3b, 0x09, 0xc4, 0xa6, 0x08, 0x00, 0x8c, 0xf2, 0x03, 0x3b,
+ 0x17, 0x83, 0x00, 0x8b, 0x1b, 0x03, 0x3b, 0x1b, 0x87, 0x00, 0x8b, 0x43,
+ 0x03, 0x3b, 0x23, 0x91, 0x00, 0x8b, 0x6b, 0x03, 0x3b, 0x2a, 0x97, 0x00,
+ 0x8b, 0x93, 0x03, 0x3b, 0x2e, 0x8b, 0x00, 0x8b, 0xa2, 0x03, 0x3b, 0x32,
+ 0x91, 0x00, 0x8b, 0x2b, 0x03, 0x3b, 0x38, 0x97, 0x00, 0x8b, 0x9a, 0x03,
+ 0x3b, 0x3c, 0x87, 0x00, 0x8b, 0x61, 0x02, 0x43, 0x3b, 0x40, 0x83, 0x00,
+ 0x8b, 0x53, 0x03, 0x3b, 0x56, 0x87, 0x00, 0x8b, 0x83, 0x03, 0x3b, 0x5a,
+ 0x8b, 0x00, 0x8b, 0x88, 0x02, 0x43, 0x3b, 0x5e, 0x02, 0x43, 0x3b, 0x7e,
+ 0xc5, 0x8e, 0xdf, 0x00, 0x8d, 0x43, 0x03, 0x3b, 0x9e, 0xc6, 0xbb, 0xec,
+ 0x00, 0x8d, 0xf9, 0x47, 0x79, 0xeb, 0x43, 0x3b, 0xa2, 0x44, 0x3a, 0xbf,
+ 0xc3, 0x3b, 0xb2, 0xc3, 0x39, 0x37, 0x00, 0x8d, 0xd2, 0x03, 0x3b, 0xf7,
+ 0x02, 0x43, 0x3b, 0xfb, 0xc5, 0xc0, 0x7d, 0x00, 0x8d, 0x73, 0x03, 0x3c,
+ 0x21, 0xc6, 0xc1, 0x86, 0x00, 0x8e, 0x00, 0x02, 0x43, 0x3c, 0x25, 0x02,
+ 0x43, 0x3c, 0x50, 0xc4, 0x79, 0xf3, 0x00, 0x8d, 0xc3, 0x03, 0x3c, 0x74,
+ 0xc6, 0xba, 0x7c, 0x00, 0x8e, 0x0b, 0x03, 0x3c, 0x78, 0xc6, 0xca, 0x0e,
+ 0x00, 0x8f, 0x5a, 0x03, 0x3c, 0x7c, 0x02, 0x43, 0x3c, 0x80, 0xc4, 0xc6,
+ 0x7a, 0x00, 0x8d, 0xeb, 0x03, 0x3c, 0x8a, 0xc6, 0xc6, 0x79, 0x00, 0x8d,
+ 0xf0, 0x02, 0x43, 0x3c, 0x8e, 0xc6, 0xb7, 0x9c, 0x00, 0x8f, 0x83, 0x03,
+ 0x3c, 0xa6, 0xc9, 0x90, 0xe0, 0x00, 0x8f, 0xc8, 0xc5, 0xd9, 0xca, 0x01,
+ 0x89, 0x98, 0xc5, 0xda, 0xe7, 0x01, 0x8b, 0x89, 0x12, 0xc3, 0x3c, 0xaa,
+ 0xca, 0xa7, 0x2e, 0x01, 0x8b, 0xc8, 0xc6, 0x8e, 0xde, 0x01, 0x89, 0x91,
+ 0xc6, 0xc0, 0x7c, 0x01, 0x89, 0xc1, 0xc5, 0x79, 0xf2, 0x01, 0x8a, 0x19,
+ 0xc4, 0xad, 0x2b, 0x01, 0x8a, 0x31, 0xc5, 0xdb, 0xff, 0x01, 0x8a, 0x49,
+ 0xc5, 0xd9, 0x61, 0x01, 0x8b, 0x29, 0xc5, 0xb7, 0x9d, 0x01, 0x8c, 0x01,
+ 0xc5, 0x90, 0xe4, 0x01, 0x8c, 0x28, 0x02, 0x43, 0x3c, 0xb6, 0xc5, 0xdb,
+ 0xff, 0x01, 0x89, 0xa9, 0xc5, 0x90, 0xe4, 0x01, 0x89, 0xb1, 0xc6, 0xc0,
+ 0x7c, 0x01, 0x8b, 0x31, 0xc4, 0xad, 0x2b, 0x01, 0x8b, 0x39, 0xc7, 0xca,
+ 0x0d, 0x01, 0x8b, 0x40, 0xc6, 0x8e, 0xde, 0x01, 0x89, 0xd3, 0x03, 0x3c,
+ 0xd4, 0xc5, 0xda, 0xe7, 0x01, 0x89, 0xd9, 0x12, 0xc3, 0x3c, 0xda, 0xc4,
+ 0xad, 0x2b, 0x01, 0x89, 0xe9, 0x16, 0xc3, 0x3c, 0xef, 0xc5, 0x90, 0xe4,
+ 0x01, 0x8a, 0x01, 0xcb, 0x90, 0xde, 0x01, 0x8b, 0x68, 0x12, 0xc3, 0x3c,
+ 0xfb, 0xc4, 0xad, 0x2b, 0x01, 0x8b, 0x78, 0x02, 0x43, 0x3d, 0x07, 0x87,
+ 0x01, 0x8c, 0x70, 0x87, 0x01, 0x8a, 0x90, 0x91, 0x01, 0x8a, 0xab, 0x03,
+ 0x3d, 0x20, 0xc6, 0xb7, 0x9c, 0x01, 0x8c, 0x0a, 0x03, 0x3d, 0x26, 0x02,
+ 0x43, 0x3d, 0x2a, 0x02, 0x43, 0x3d, 0x37, 0x87, 0x01, 0x8a, 0xc8, 0x91,
+ 0x01, 0x8a, 0xe8, 0x83, 0x07, 0xfb, 0x39, 0x8b, 0x07, 0xfb, 0x41, 0x97,
+ 0x07, 0xfb, 0x49, 0x87, 0x07, 0xfb, 0x51, 0x91, 0x07, 0xfb, 0x59, 0x1b,
+ 0xc3, 0x3d, 0x44, 0xc2, 0x00, 0x16, 0x07, 0xfb, 0x78, 0xc4, 0x79, 0xf3,
+ 0x07, 0xfd, 0x61, 0xc6, 0xba, 0x7c, 0x07, 0xfd, 0x78, 0xc8, 0x4b, 0x94,
+ 0x08, 0x5b, 0xf9, 0xc7, 0x0d, 0x04, 0x08, 0x5b, 0xf0, 0xc4, 0x18, 0x12,
+ 0x08, 0x5b, 0xe9, 0x91, 0x08, 0x5b, 0xc8, 0xc3, 0x77, 0x79, 0x08, 0x5b,
+ 0x81, 0xc4, 0xdc, 0x2d, 0x08, 0x5b, 0x70, 0xc8, 0x4b, 0x94, 0x08, 0x5a,
+ 0xf9, 0xc7, 0x0d, 0x04, 0x08, 0x5a, 0xf0, 0xc4, 0x18, 0x12, 0x08, 0x5a,
+ 0xe9, 0x91, 0x08, 0x5a, 0xc8, 0xc4, 0xdc, 0x2d, 0x08, 0x5a, 0x71, 0xc3,
+ 0x77, 0x79, 0x08, 0x5a, 0x88, 0xcb, 0x57, 0x1e, 0x0f, 0x65, 0x99, 0xc2,
+ 0x02, 0xa0, 0x0f, 0x65, 0x90, 0xc4, 0x18, 0x10, 0x0f, 0x65, 0x49, 0xc2,
+ 0x22, 0xcc, 0x0f, 0x65, 0x40, 0xc3, 0x0d, 0x14, 0x0f, 0x65, 0x39, 0xc3,
+ 0x09, 0x9e, 0x0f, 0x65, 0x30, 0xc4, 0x02, 0xde, 0x0f, 0x65, 0x29, 0xc2,
+ 0x02, 0xa0, 0x0f, 0x65, 0x20, 0xc9, 0x57, 0x20, 0x0f, 0x64, 0xe8, 0xc8,
+ 0x4b, 0x94, 0x0f, 0x64, 0xa1, 0xc7, 0x0d, 0x04, 0x0f, 0x64, 0x58, 0xc9,
+ 0x57, 0x20, 0x0f, 0x64, 0xe0, 0xc8, 0x4b, 0x94, 0x0f, 0x64, 0x99, 0xc7,
+ 0x0d, 0x04, 0x0f, 0x64, 0x50, 0xc2, 0x0d, 0x10, 0x0f, 0x64, 0x03, 0x03,
+ 0x3d, 0x50, 0x00, 0x43, 0x3d, 0x56, 0xc2, 0x0d, 0x10, 0x0f, 0x63, 0xfb,
+ 0x03, 0x3d, 0x62, 0x00, 0x43, 0x3d, 0x68, 0xc3, 0x45, 0x6b, 0x0f, 0x63,
+ 0xf3, 0x03, 0x3d, 0x74, 0xc2, 0x00, 0x5f, 0x0f, 0x63, 0xaa, 0x03, 0x3d,
+ 0x7a, 0xc3, 0x0d, 0x0f, 0x0f, 0x63, 0xeb, 0x03, 0x3d, 0x7e, 0xc2, 0x00,
+ 0x33, 0x0f, 0x63, 0xa2, 0x03, 0x3d, 0x84, 0xc4, 0x0d, 0x0e, 0x0f, 0x63,
+ 0xe3, 0x03, 0x3d, 0x88, 0xc3, 0x02, 0xdf, 0x0f, 0x63, 0x9a, 0x03, 0x3d,
+ 0x8e, 0xc4, 0x18, 0x12, 0x0f, 0x63, 0xdb, 0x03, 0x3d, 0x92, 0x91, 0x0f,
+ 0x63, 0x92, 0x03, 0x3d, 0x98, 0xc9, 0x57, 0x20, 0x0f, 0x64, 0xa8, 0xc8,
+ 0x4b, 0x94, 0x0f, 0x64, 0x61, 0xc7, 0x0d, 0x04, 0x0f, 0x64, 0x18, 0xc2,
+ 0x02, 0x6f, 0x01, 0x96, 0x29, 0xc2, 0x00, 0x35, 0x01, 0x96, 0x30, 0xc3,
+ 0x05, 0x14, 0x01, 0x9f, 0x01, 0x16, 0xc3, 0x3d, 0x9c, 0x08, 0xc3, 0x3d,
+ 0xaa, 0x15, 0xc3, 0x3d, 0xb7, 0x07, 0xc3, 0x3d, 0xc9, 0xc4, 0x26, 0x78,
+ 0x01, 0x9f, 0x42, 0x03, 0x3d, 0xd8, 0x19, 0xc3, 0x3d, 0xde, 0x0a, 0xc3,
+ 0x3d, 0xe6, 0xc2, 0x00, 0xc4, 0x01, 0x9b, 0x10, 0xc3, 0x09, 0x9e, 0x01,
+ 0x9a, 0xe3, 0x03, 0x3d, 0xf2, 0x0b, 0x43, 0x3d, 0xf8, 0xc2, 0x22, 0xcc,
+ 0x01, 0x9a, 0xf3, 0x03, 0x3e, 0x04, 0xc4, 0x18, 0x10, 0x01, 0x9a, 0xfa,
+ 0x03, 0x3e, 0x0a, 0xc4, 0x00, 0x2d, 0x01, 0x9b, 0x03, 0x03, 0x3e, 0x10,
+ 0xc5, 0x66, 0xb1, 0x01, 0x9b, 0x18, 0xc4, 0x14, 0x09, 0x01, 0x9b, 0x58,
+ 0xdb, 0x18, 0x03, 0x0f, 0xd1, 0xa9, 0xce, 0x2a, 0xfe, 0x0f, 0xd0, 0x58,
+ 0xce, 0x2a, 0xfe, 0x0f, 0xd0, 0x71, 0xdb, 0x18, 0x03, 0x0f, 0xd1, 0xc0,
+ 0x49, 0x2a, 0xf5, 0xc3, 0x3e, 0x16, 0x02, 0x43, 0x3e, 0x2c, 0x49, 0x2a,
+ 0xf5, 0x43, 0x3e, 0x3e, 0xce, 0x2a, 0xfe, 0x0f, 0xd0, 0x61, 0xdb, 0x18,
+ 0x03, 0x0f, 0xd1, 0xb0, 0xce, 0x2a, 0xfe, 0x0f, 0xd0, 0x51, 0xdb, 0x18,
+ 0x03, 0x0f, 0xd1, 0xa0, 0xc3, 0x00, 0x74, 0x0f, 0xd0, 0xf1, 0xc5, 0x56,
+ 0xa5, 0x0f, 0xd1, 0x10, 0xc8, 0x02, 0x9f, 0x01, 0x34, 0x39, 0x42, 0x00,
+ 0x58, 0xc3, 0x3e, 0x4a, 0x46, 0x02, 0xae, 0xc3, 0x3e, 0x56, 0x46, 0x01,
+ 0xc8, 0x43, 0x3e, 0x62, 0xc5, 0x22, 0xdb, 0x01, 0x33, 0x08, 0xca, 0xa7,
+ 0xc4, 0x01, 0x38, 0x29, 0xdc, 0x13, 0x51, 0x0f, 0xde, 0x00, 0xcd, 0x77,
+ 0xd5, 0x0f, 0xbc, 0xa9, 0xcc, 0x51, 0x6c, 0x01, 0x2d, 0x19, 0xd1, 0x51,
+ 0x67, 0x0f, 0xbc, 0xa0, 0x14, 0xc3, 0x3e, 0x6e, 0x0e, 0xc3, 0x3e, 0x7a,
+ 0x46, 0x02, 0xae, 0xc3, 0x3e, 0x86, 0xd7, 0x27, 0xe7, 0x01, 0x2f, 0x59,
+ 0xd4, 0x3d, 0x68, 0x01, 0x1c, 0x28, 0xc4, 0x5d, 0x32, 0x01, 0x31, 0xe1,
+ 0xcb, 0x93, 0x3b, 0x0f, 0x99, 0x20, 0xca, 0xa1, 0xac, 0x0f, 0x99, 0x30,
+ 0xc5, 0x0b, 0x0a, 0x01, 0x2d, 0x59, 0xc3, 0x0e, 0x6b, 0x01, 0x5a, 0x90,
+ 0xc5, 0x06, 0x82, 0x01, 0x30, 0xe1, 0xce, 0x24, 0xd5, 0x0f, 0xa2, 0x40,
+ 0xcd, 0x4a, 0x56, 0x01, 0x2e, 0x41, 0xd2, 0x4a, 0x51, 0x0f, 0xbc, 0xd1,
+ 0xce, 0x74, 0xa2, 0x0f, 0xbc, 0xd8, 0xe0, 0x08, 0x27, 0x01, 0x37, 0xf8,
+ 0xc6, 0x46, 0x3e, 0x01, 0x2d, 0xd9, 0xc7, 0xbb, 0xcb, 0x01, 0x5a, 0xa0,
+ 0x89, 0x0f, 0x17, 0x18, 0xc5, 0x00, 0xa2, 0x0f, 0xb1, 0x73, 0x03, 0x3e,
+ 0x92, 0xd8, 0x23, 0x4b, 0x0f, 0xd7, 0x10, 0xd3, 0x41, 0x38, 0x0f, 0xb0,
+ 0xe9, 0xcb, 0x91, 0x78, 0x0f, 0xb0, 0xe0, 0xcb, 0x93, 0x9e, 0x01, 0x51,
+ 0x61, 0xcc, 0x8b, 0xd1, 0x01, 0x51, 0x59, 0xc9, 0x0e, 0x6e, 0x01, 0x51,
+ 0x51, 0xcb, 0x52, 0x5b, 0x01, 0x51, 0x48, 0x95, 0x0f, 0x46, 0x89, 0xca,
+ 0xa2, 0x92, 0x0f, 0x46, 0xa0, 0xc7, 0x0d, 0x04, 0x08, 0x4e, 0xd3, 0x03,
+ 0x3e, 0x96, 0xc8, 0x4b, 0x94, 0x08, 0x4f, 0x18, 0xc7, 0x0d, 0x04, 0x08,
+ 0x4e, 0xcb, 0x03, 0x3e, 0x9c, 0xc8, 0x4b, 0x94, 0x08, 0x4f, 0x10, 0x00,
+ 0xc3, 0x3e, 0xa2, 0xc2, 0x0d, 0x10, 0x08, 0x4e, 0x7a, 0x03, 0x3e, 0xb1,
+ 0x00, 0xc3, 0x3e, 0xb7, 0xc2, 0x0d, 0x10, 0x08, 0x4e, 0x72, 0x03, 0x3e,
+ 0xc6, 0xc2, 0x00, 0x5f, 0x08, 0x4e, 0x23, 0x03, 0x3e, 0xcc, 0xc3, 0x45,
+ 0x6b, 0x08, 0x4e, 0x6a, 0x03, 0x3e, 0xd0, 0xc2, 0x00, 0x33, 0x08, 0x4e,
+ 0x1b, 0x03, 0x3e, 0xd6, 0xc3, 0x0d, 0x0f, 0x08, 0x4e, 0x62, 0x03, 0x3e,
+ 0xda, 0xc3, 0x02, 0xdf, 0x08, 0x4e, 0x13, 0x03, 0x3e, 0xe0, 0xc4, 0x0d,
+ 0x0e, 0x08, 0x4e, 0x5a, 0x03, 0x3e, 0xe4, 0x91, 0x08, 0x4e, 0x0b, 0x03,
+ 0x3e, 0xea, 0xc4, 0x18, 0x12, 0x08, 0x4e, 0x52, 0x03, 0x3e, 0xee, 0xc9,
+ 0x57, 0x20, 0x08, 0x4f, 0x20, 0xc7, 0x0d, 0x04, 0x08, 0x4e, 0x93, 0x03,
+ 0x3e, 0xf4, 0xc8, 0x4b, 0x94, 0x08, 0x4e, 0xd8, 0x91, 0x08, 0x4d, 0xb1,
+ 0x87, 0x08, 0x4d, 0xa9, 0x83, 0x08, 0x4d, 0xa0, 0x83, 0x08, 0x4d, 0x91,
+ 0xc2, 0x00, 0xd0, 0x08, 0x4d, 0x68, 0x87, 0x08, 0x4d, 0x89, 0x83, 0x08,
+ 0x4d, 0x78, 0xc9, 0x87, 0xed, 0x08, 0x4d, 0x80, 0x87, 0x08, 0x4d, 0x51,
+ 0x83, 0x08, 0x4d, 0x48, 0xc2, 0xe5, 0xfd, 0x08, 0x4c, 0xd8, 0xc2, 0xe5,
+ 0xfd, 0x08, 0x4c, 0xc8, 0xc2, 0xe5, 0xfd, 0x08, 0x4c, 0xa0, 0xc2, 0xe5,
+ 0xfd, 0x08, 0x4c, 0x58, 0xc2, 0xe5, 0xfd, 0x08, 0x4c, 0x68, 0x49, 0x3d,
+ 0x54, 0xc3, 0x3e, 0xfa, 0x4a, 0x2c, 0x4a, 0xc3, 0x3f, 0x06, 0x49, 0x45,
+ 0xd2, 0xc3, 0x3f, 0x12, 0x47, 0x54, 0x42, 0x43, 0x3f, 0x1e, 0xc3, 0x64,
+ 0x58, 0x00, 0xc5, 0x51, 0xc3, 0x39, 0x6d, 0x00, 0xc5, 0x41, 0x1c, 0xc3,
+ 0x3f, 0x2a, 0x05, 0xc3, 0x3f, 0x34, 0xc3, 0x1d, 0x35, 0x00, 0xc5, 0x11,
+ 0x06, 0xc3, 0x3f, 0x3e, 0x16, 0xc3, 0x3f, 0x4a, 0xc3, 0xe5, 0xf0, 0x00,
+ 0xc4, 0xe9, 0xc3, 0x20, 0xf1, 0x00, 0xc4, 0xd9, 0xc3, 0x91, 0x00, 0x00,
+ 0xc4, 0xd0, 0x83, 0x00, 0xc4, 0x8b, 0x03, 0x3f, 0x54, 0xc2, 0x0e, 0x9a,
+ 0x00, 0xc4, 0x70, 0xc2, 0x19, 0x2c, 0x00, 0xc5, 0x39, 0x97, 0x00, 0xc5,
+ 0x30, 0x8a, 0x00, 0xc4, 0xb9, 0xcb, 0x97, 0x71, 0x00, 0xc4, 0x00, 0x83,
+ 0x00, 0xc4, 0xb1, 0xc2, 0x00, 0xd0, 0x00, 0xc4, 0xa8, 0xc2, 0x00, 0xd0,
+ 0x00, 0xc4, 0x99, 0x83, 0x00, 0xc4, 0x90, 0x83, 0x00, 0xc4, 0x81, 0x16,
+ 0xc3, 0x3f, 0x60, 0xcb, 0x8c, 0x9d, 0x00, 0xc4, 0x30, 0xc2, 0x00, 0xc1,
+ 0x00, 0xc4, 0x79, 0xc2, 0x01, 0x30, 0x00, 0xc4, 0x50, 0xcf, 0x62, 0x10,
+ 0x00, 0xc4, 0x20, 0x48, 0xb1, 0x71, 0xc3, 0x3f, 0x6a, 0xc2, 0x00, 0x75,
+ 0x00, 0xc2, 0x50, 0xc2, 0x02, 0x1c, 0x00, 0xc2, 0xe1, 0x83, 0x00, 0xc2,
+ 0x88, 0xc2, 0x01, 0x94, 0x00, 0xc2, 0xd1, 0x83, 0x00, 0xc2, 0x98, 0x83,
+ 0x00, 0xc2, 0xc0, 0xc2, 0x0d, 0xf6, 0x00, 0xc2, 0xa1, 0x83, 0x00, 0xc2,
+ 0x80, 0x87, 0x00, 0xc2, 0x48, 0x87, 0x00, 0xc2, 0x40, 0xc2, 0x00, 0xd0,
+ 0x00, 0xc3, 0x91, 0x83, 0x00, 0xc3, 0x78, 0xc2, 0x0d, 0xf6, 0x00, 0xc3,
+ 0x71, 0x83, 0x00, 0xc3, 0x40, 0x83, 0x00, 0xc3, 0x68, 0x83, 0x00, 0xc3,
+ 0x60, 0x87, 0x00, 0xc3, 0x00, 0x9b, 0x00, 0xc2, 0xf8, 0xc4, 0x18, 0x10,
+ 0x08, 0xb2, 0xb9, 0xc2, 0x22, 0xcc, 0x08, 0xb2, 0xb0, 0xc3, 0x0d, 0x14,
+ 0x08, 0xb2, 0xa9, 0xc3, 0x09, 0x9e, 0x08, 0xb2, 0xa0, 0xc4, 0x02, 0xde,
+ 0x08, 0xb2, 0x99, 0xc2, 0x02, 0xa0, 0x08, 0xb2, 0x90, 0x8e, 0x08, 0xb1,
+ 0xc0, 0x94, 0x08, 0xb1, 0xb0, 0x8e, 0x08, 0xb0, 0x43, 0x03, 0x3f, 0x76,
+ 0x94, 0x08, 0xb0, 0x32, 0x03, 0x3f, 0x7a, 0xc2, 0x00, 0xd0, 0x08, 0xb0,
+ 0xd9, 0x83, 0x08, 0xb0, 0xd0, 0xc2, 0x00, 0xd0, 0x08, 0xb0, 0xc9, 0x83,
+ 0x08, 0xb0, 0xc0, 0x96, 0x00, 0xea, 0xbb, 0x03, 0x3f, 0x7e, 0x87, 0x00,
+ 0xea, 0x4b, 0x03, 0x3f, 0xab, 0x9c, 0x00, 0xed, 0xdb, 0x03, 0x3f, 0xc3,
+ 0x98, 0x00, 0xea, 0xdb, 0x03, 0x3f, 0xc9, 0x85, 0x00, 0xec, 0xe3, 0x03,
+ 0x3f, 0xcf, 0x97, 0x00, 0xea, 0xc3, 0x03, 0x3f, 0xe7, 0x95, 0x00, 0x17,
+ 0x13, 0x03, 0x3f, 0xf1, 0x92, 0x00, 0xea, 0xb3, 0x03, 0x40, 0x01, 0x84,
+ 0x00, 0xea, 0x3b, 0x03, 0x40, 0x07, 0x47, 0x01, 0x56, 0xc3, 0x40, 0x1f,
+ 0x8f, 0x00, 0xea, 0x83, 0x03, 0x40, 0x2b, 0x8e, 0x00, 0x17, 0x0b, 0x03,
+ 0x40, 0x31, 0x8c, 0x00, 0x15, 0x93, 0x03, 0x40, 0x52, 0x0b, 0xc3, 0x40,
+ 0x58, 0x86, 0x00, 0xea, 0x43, 0x03, 0x40, 0x64, 0x88, 0x00, 0xed, 0x03,
+ 0x03, 0x40, 0x80, 0x94, 0x00, 0x15, 0x9b, 0x03, 0x40, 0x86, 0x89, 0x00,
+ 0xea, 0x6b, 0x03, 0x40, 0x98, 0x83, 0x00, 0xea, 0x1b, 0x03, 0x40, 0xaa,
+ 0x91, 0x00, 0xea, 0x93, 0x03, 0x40, 0xba, 0x8d, 0x00, 0xea, 0x79, 0x8a,
+ 0x00, 0x15, 0x83, 0x03, 0x40, 0xc6, 0x99, 0x00, 0x15, 0xb9, 0x9b, 0x00,
+ 0x15, 0xc1, 0x9a, 0x00, 0x17, 0x19, 0x93, 0x08, 0x3d, 0x28, 0xd5, 0x33,
+ 0x14, 0x08, 0x3c, 0x11, 0xd0, 0x33, 0x19, 0x08, 0x3c, 0x08, 0xc9, 0x3d,
+ 0x18, 0x05, 0x39, 0x01, 0xc8, 0xae, 0xfb, 0x05, 0x39, 0x08, 0xc3, 0x63,
+ 0x85, 0x00, 0x17, 0xe9, 0xcf, 0x63, 0x00, 0x05, 0x3c, 0x50, 0xc2, 0x00,
+ 0xc4, 0x00, 0xeb, 0xc1, 0xc9, 0xa8, 0x3a, 0x05, 0x34, 0xe1, 0xc9, 0x84,
+ 0xc0, 0x05, 0x34, 0xe8, 0x99, 0x00, 0xea, 0x11, 0x97, 0x00, 0xea, 0x09,
+ 0x96, 0x00, 0xea, 0x01, 0x94, 0x00, 0xe9, 0xfb, 0x03, 0x40, 0xd5, 0x92,
+ 0x00, 0xe9, 0xf1, 0x91, 0x00, 0xe9, 0xe3, 0x03, 0x40, 0xdb, 0x90, 0x00,
+ 0xe9, 0xd1, 0x8f, 0x00, 0xe9, 0xc9, 0x8e, 0x00, 0xe9, 0xc1, 0x8d, 0x00,
+ 0xe9, 0xb9, 0x8c, 0x00, 0xe9, 0xb1, 0x8b, 0x00, 0xe9, 0xa9, 0x8a, 0x00,
+ 0xe9, 0xa3, 0x03, 0x40, 0xdf, 0x89, 0x00, 0xe9, 0x99, 0x87, 0x00, 0xe9,
+ 0x89, 0x86, 0x00, 0xe9, 0x81, 0x84, 0x00, 0xe9, 0x73, 0x03, 0x40, 0xe5,
+ 0x83, 0x00, 0xe9, 0x63, 0x03, 0x40, 0xeb, 0x85, 0x05, 0x3f, 0x91, 0x88,
+ 0x05, 0x3f, 0x99, 0x93, 0x05, 0x3f, 0xa1, 0x98, 0x01, 0x63, 0xe8, 0x43,
+ 0x03, 0x35, 0xc3, 0x40, 0xef, 0x44, 0x10, 0xd1, 0x43, 0x41, 0x07, 0xcf,
+ 0x61, 0x89, 0x00, 0x16, 0x91, 0xce, 0x0f, 0x6e, 0x00, 0x16, 0x98, 0xc4,
+ 0x32, 0xbc, 0x05, 0x5b, 0x59, 0xc9, 0x0f, 0x73, 0x00, 0x15, 0xf1, 0xc9,
+ 0x03, 0xde, 0x00, 0x16, 0x18, 0x47, 0x10, 0x30, 0xc3, 0x41, 0x1f, 0x16,
+ 0x43, 0x41, 0x2e, 0xc8, 0x4d, 0x8d, 0x05, 0x38, 0xd9, 0xca, 0x3e, 0xe4,
+ 0x05, 0x38, 0xe1, 0xd0, 0x0f, 0x09, 0x05, 0x38, 0xe9, 0xd9, 0x1d, 0x6f,
+ 0x05, 0x38, 0xf1, 0xc5, 0x33, 0x24, 0x00, 0x17, 0xc0, 0xc4, 0x32, 0xbc,
+ 0x05, 0x5b, 0x51, 0xc9, 0x0f, 0x73, 0x00, 0x15, 0xf9, 0xc9, 0x03, 0xde,
+ 0x00, 0x16, 0x10, 0x00, 0xc3, 0x41, 0x34, 0xd5, 0x34, 0xf7, 0x05, 0x38,
+ 0xd0, 0xcc, 0x23, 0x3f, 0x08, 0x3d, 0x98, 0xc9, 0x3d, 0x18, 0x00, 0x17,
+ 0xc9, 0xc8, 0xae, 0xfb, 0x00, 0x17, 0xd8, 0x45, 0x00, 0x5a, 0xc3, 0x41,
+ 0x74, 0x43, 0x11, 0x19, 0xc3, 0x41, 0x80, 0x42, 0x00, 0x30, 0x43, 0x41,
+ 0x8c, 0xc9, 0x03, 0xde, 0x00, 0x16, 0x21, 0xc4, 0x32, 0xbc, 0x00, 0x16,
+ 0xa0, 0x06, 0xc3, 0x41, 0x9e, 0xc8, 0x68, 0x56, 0x00, 0x16, 0xb8, 0x45,
+ 0x08, 0xcb, 0xc3, 0x41, 0xa8, 0x44, 0x05, 0x36, 0x43, 0x41, 0xba, 0xc9,
+ 0x3d, 0x18, 0x00, 0x17, 0xd1, 0xc8, 0xae, 0xfb, 0x00, 0x17, 0xe0, 0x47,
+ 0x19, 0x7a, 0xc3, 0x41, 0xcc, 0xd2, 0x4e, 0x89, 0x05, 0x38, 0x99, 0xc8,
+ 0x4e, 0x93, 0x00, 0x17, 0x30, 0xc3, 0x11, 0x7e, 0x0e, 0xb7, 0xd1, 0xc5,
+ 0xd8, 0x8f, 0x0e, 0xb7, 0x80, 0xc7, 0x00, 0x90, 0x0e, 0xb7, 0x98, 0xc3,
+ 0x11, 0x7e, 0x0e, 0xb8, 0xa1, 0xc5, 0xd8, 0x8f, 0x0e, 0xb8, 0x50, 0x8c,
+ 0x0e, 0xb5, 0x29, 0x8b, 0x0e, 0xb5, 0x20, 0xc3, 0x04, 0x87, 0x0e, 0xb6,
+ 0x38, 0x8b, 0x0e, 0xb6, 0x78, 0xc6, 0x10, 0x3f, 0x0e, 0xb6, 0xb0, 0xc6,
+ 0x51, 0x50, 0x0e, 0xbe, 0x59, 0xc4, 0xdb, 0x4c, 0x0e, 0xb6, 0x28, 0x0f,
+ 0x43, 0x41, 0xd8, 0xc2, 0x00, 0xba, 0x0e, 0xb6, 0xc9, 0xc2, 0x00, 0x0a,
+ 0x0e, 0xb6, 0xb9, 0x8b, 0x0e, 0xb6, 0x88, 0xc2, 0x00, 0x0a, 0x0e, 0xb6,
+ 0xc0, 0xc2, 0x20, 0xec, 0x0e, 0xb6, 0xa9, 0xc4, 0x89, 0xfe, 0x0e, 0xb6,
+ 0x48, 0xc4, 0x1a, 0x73, 0x0e, 0xb6, 0xa0, 0xca, 0x91, 0x2c, 0x0e, 0xb6,
+ 0x98, 0xc2, 0x01, 0x23, 0x0e, 0xb6, 0x90, 0x97, 0x0e, 0xb6, 0x70, 0x97,
+ 0x0e, 0xb6, 0x68, 0xc4, 0xdd, 0x9a, 0x0e, 0xb6, 0x60, 0xc4, 0x8b, 0x66,
+ 0x0e, 0xb6, 0x58, 0xc3, 0x01, 0xbb, 0x0e, 0xb6, 0x50, 0xc2, 0x01, 0x6f,
+ 0x0e, 0xb6, 0x41, 0xc6, 0x10, 0x3f, 0x0e, 0xb6, 0x30, 0xc4, 0x38, 0x2c,
+ 0x0e, 0xb6, 0x20, 0xc3, 0x04, 0x87, 0x0e, 0xb6, 0x18, 0xc4, 0xde, 0x3f,
+ 0x0e, 0xb6, 0x10, 0x9c, 0x0e, 0xa8, 0x19, 0x9b, 0x0e, 0xa8, 0x11, 0x9a,
+ 0x0e, 0xa8, 0x09, 0x99, 0x0e, 0xa8, 0x01, 0x98, 0x0e, 0xa7, 0xf9, 0x97,
+ 0x0e, 0xa7, 0xf1, 0x96, 0x0e, 0xa7, 0xe9, 0x95, 0x0e, 0xa7, 0xe1, 0x94,
+ 0x0e, 0xa7, 0xd9, 0x93, 0x0e, 0xa7, 0xd1, 0x92, 0x0e, 0xa7, 0xc9, 0x91,
+ 0x0e, 0xa7, 0xc1, 0x90, 0x0e, 0xa7, 0xb9, 0x8f, 0x0e, 0xa7, 0xb1, 0x8e,
+ 0x0e, 0xa7, 0xa9, 0x8d, 0x0e, 0xa7, 0xa1, 0x8c, 0x0e, 0xa7, 0x99, 0x8b,
+ 0x0e, 0xa7, 0x91, 0x8a, 0x0e, 0xa7, 0x89, 0x89, 0x0e, 0xa7, 0x81, 0x88,
+ 0x0e, 0xa7, 0x79, 0x87, 0x0e, 0xa7, 0x71, 0x86, 0x0e, 0xa7, 0x69, 0x85,
+ 0x0e, 0xa7, 0x61, 0x84, 0x0e, 0xa7, 0x59, 0x83, 0x0e, 0xa7, 0x50, 0x9c,
+ 0x0e, 0xa7, 0x49, 0x9b, 0x0e, 0xa7, 0x41, 0x9a, 0x0e, 0xa7, 0x39, 0x99,
+ 0x0e, 0xa7, 0x31, 0x98, 0x0e, 0xa7, 0x29, 0x97, 0x0e, 0xa7, 0x21, 0x96,
+ 0x0e, 0xa7, 0x19, 0x95, 0x0e, 0xa7, 0x11, 0x94, 0x0e, 0xa7, 0x09, 0x93,
+ 0x0e, 0xa7, 0x01, 0x92, 0x0e, 0xa6, 0xf9, 0x91, 0x0e, 0xa6, 0xf1, 0x90,
+ 0x0e, 0xa6, 0xe9, 0x8f, 0x0e, 0xa6, 0xe1, 0x8e, 0x0e, 0xa6, 0xd9, 0x8d,
+ 0x0e, 0xa6, 0xd1, 0x8c, 0x0e, 0xa6, 0xc9, 0x8b, 0x0e, 0xa6, 0xc1, 0x8a,
+ 0x0e, 0xa6, 0xb9, 0x89, 0x0e, 0xa6, 0xb1, 0x88, 0x0e, 0xa6, 0xa9, 0x87,
+ 0x0e, 0xa6, 0xa1, 0x86, 0x0e, 0xa6, 0x99, 0x85, 0x0e, 0xa6, 0x91, 0x84,
+ 0x0e, 0xa6, 0x89, 0x83, 0x0e, 0xa6, 0x80, 0xc3, 0x11, 0x7e, 0x0e, 0xb6,
+ 0x01, 0xc5, 0xd8, 0x8f, 0x0e, 0xb5, 0xb0, 0xc7, 0x00, 0x90, 0x0e, 0xb5,
+ 0xc8, 0x0f, 0x43, 0x41, 0xe4, 0xc2, 0x00, 0xba, 0x0e, 0xba, 0x69, 0xc2,
+ 0x00, 0x0a, 0x0e, 0xba, 0x59, 0x8b, 0x0e, 0xba, 0x28, 0xc2, 0x00, 0x0a,
+ 0x0e, 0xba, 0x60, 0xc6, 0x10, 0x3f, 0x0e, 0xba, 0x50, 0xc2, 0x20, 0xec,
+ 0x0e, 0xba, 0x49, 0xc4, 0x89, 0xfe, 0x0e, 0xb9, 0xe8, 0xc4, 0x1a, 0x73,
+ 0x0e, 0xba, 0x40, 0xca, 0x91, 0x2c, 0x0e, 0xba, 0x38, 0xc2, 0x01, 0x23,
+ 0x0e, 0xba, 0x30, 0x8b, 0x0e, 0xba, 0x18, 0x97, 0x0e, 0xba, 0x10, 0x97,
+ 0x0e, 0xba, 0x08, 0xc4, 0xdd, 0x9a, 0x0e, 0xba, 0x00, 0xc4, 0x8b, 0x66,
+ 0x0e, 0xb9, 0xf8, 0xc3, 0x01, 0xbb, 0x0e, 0xb9, 0xf0, 0xc2, 0x01, 0x6f,
+ 0x0e, 0xb9, 0xe1, 0xc6, 0x10, 0x3f, 0x0e, 0xb9, 0xd0, 0xc3, 0x04, 0x87,
+ 0x0e, 0xb9, 0xd8, 0xc4, 0xdb, 0x4c, 0x0e, 0xb9, 0xc8, 0xc4, 0x38, 0x2c,
+ 0x0e, 0xb9, 0xc0, 0xc3, 0x04, 0x87, 0x0e, 0xb9, 0xb8, 0xc4, 0xde, 0x3f,
+ 0x0e, 0xb9, 0xb0, 0x0f, 0x43, 0x41, 0xf0, 0xc2, 0x00, 0xba, 0x0e, 0xb9,
+ 0x99, 0xc2, 0x00, 0x0a, 0x0e, 0xb9, 0x89, 0x8b, 0x0e, 0xb9, 0x58, 0xc2,
+ 0x00, 0x0a, 0x0e, 0xb9, 0x90, 0xc6, 0x10, 0x3f, 0x0e, 0xb9, 0x80, 0xc2,
+ 0x20, 0xec, 0x0e, 0xb9, 0x79, 0xc4, 0x89, 0xfe, 0x0e, 0xb9, 0x1a, 0x03,
+ 0x41, 0xfc, 0xc4, 0x1a, 0x73, 0x0e, 0xb9, 0x70, 0xc2, 0x01, 0x23, 0x0e,
+ 0xb9, 0x60, 0x8b, 0x0e, 0xb9, 0x48, 0x97, 0x0e, 0xb9, 0x40, 0x97, 0x0e,
+ 0xb9, 0x38, 0xc4, 0xdd, 0x9a, 0x0e, 0xb9, 0x30, 0xc4, 0x8b, 0x66, 0x0e,
+ 0xb9, 0x28, 0xc3, 0x01, 0xbb, 0x0e, 0xb9, 0x20, 0xc2, 0x01, 0x6f, 0x0e,
+ 0xb9, 0x11, 0xc6, 0x10, 0x3f, 0x0e, 0xb9, 0x00, 0xc3, 0x04, 0x87, 0x0e,
+ 0xb9, 0x08, 0xc4, 0xdb, 0x4c, 0x0e, 0xb8, 0xf8, 0xc4, 0x38, 0x2c, 0x0e,
+ 0xb8, 0xf0, 0xc3, 0x04, 0x87, 0x0e, 0xb8, 0xe8, 0xc4, 0xde, 0x3f, 0x0e,
+ 0xb8, 0xe0, 0xc4, 0x26, 0x78, 0x0e, 0xbf, 0xa9, 0xc5, 0x06, 0xdb, 0x0e,
+ 0xbf, 0xa1, 0x15, 0xc3, 0x42, 0x02, 0x08, 0xc3, 0x42, 0x0e, 0x16, 0xc3,
+ 0x42, 0x1a, 0xc3, 0x05, 0x14, 0x0e, 0xbf, 0x69, 0xc4, 0x15, 0xe7, 0x0e,
+ 0xbf, 0x60, 0x12, 0xc3, 0x42, 0x26, 0xca, 0x9c, 0xac, 0x0e, 0xbe, 0x41,
+ 0xcc, 0x8b, 0x65, 0x0e, 0xbe, 0x31, 0xcc, 0x89, 0xfd, 0x0e, 0xbe, 0x29,
+ 0xce, 0x10, 0x3e, 0x0e, 0xbe, 0x21, 0x46, 0x03, 0x13, 0xc3, 0x42, 0x38,
+ 0xc5, 0xdb, 0xf0, 0x0e, 0xbd, 0x49, 0x48, 0x0b, 0x17, 0x43, 0x42, 0xdc,
+ 0xc8, 0x9c, 0x0e, 0x0e, 0xbc, 0x79, 0xc9, 0xaa, 0x9e, 0x0e, 0xbc, 0x69,
+ 0xd3, 0x43, 0x00, 0x0e, 0xbc, 0x48, 0x91, 0x0e, 0xaf, 0xe3, 0x03, 0x43,
+ 0x7d, 0x92, 0x0e, 0xaf, 0xeb, 0x03, 0x43, 0x81, 0x85, 0x0e, 0xaf, 0x83,
+ 0x03, 0x43, 0x91, 0x97, 0x0e, 0xb0, 0x13, 0x03, 0x43, 0x97, 0x96, 0x0e,
+ 0xb0, 0x0b, 0x03, 0x43, 0x9d, 0x95, 0x0e, 0xb0, 0x03, 0x03, 0x43, 0xa9,
+ 0x88, 0x0e, 0xaf, 0x9b, 0x03, 0x43, 0xaf, 0x94, 0x0e, 0xaf, 0xfb, 0x03,
+ 0x43, 0xb5, 0x9a, 0x0e, 0xb0, 0x2b, 0x03, 0x43, 0xbb, 0x90, 0x0e, 0xaf,
+ 0xdb, 0x03, 0x43, 0xbf, 0x8f, 0x0e, 0xaf, 0xd3, 0x03, 0x43, 0xc3, 0x8e,
+ 0x0e, 0xaf, 0xcb, 0x03, 0x43, 0xc7, 0x8d, 0x0e, 0xaf, 0xc3, 0x03, 0x43,
+ 0xcd, 0x8b, 0x0e, 0xaf, 0xb3, 0x03, 0x43, 0xd3, 0x87, 0x0e, 0xaf, 0x93,
+ 0x03, 0x43, 0xd9, 0x9c, 0x0e, 0xb0, 0x3b, 0x03, 0x43, 0xe5, 0x86, 0x0e,
+ 0xaf, 0x8b, 0x03, 0x43, 0xeb, 0x89, 0x0e, 0xaf, 0xa3, 0x03, 0x43, 0xf1,
+ 0x84, 0x0e, 0xaf, 0x7b, 0x03, 0x43, 0xf7, 0x83, 0x0e, 0xaf, 0x73, 0x03,
+ 0x43, 0xfd, 0x9b, 0x0e, 0xb0, 0x31, 0x99, 0x0e, 0xb0, 0x21, 0x98, 0x0e,
+ 0xb0, 0x19, 0x93, 0x0e, 0xaf, 0xf1, 0x8c, 0x0e, 0xaf, 0xb9, 0x8a, 0x0e,
+ 0xaf, 0xa8, 0x91, 0x0e, 0xaf, 0x13, 0x03, 0x44, 0x03, 0x92, 0x0e, 0xaf,
+ 0x1b, 0x03, 0x44, 0x07, 0x85, 0x0e, 0xae, 0xb3, 0x03, 0x44, 0x17, 0x97,
+ 0x0e, 0xaf, 0x43, 0x03, 0x44, 0x1d, 0x96, 0x0e, 0xaf, 0x3b, 0x03, 0x44,
+ 0x23, 0x95, 0x0e, 0xaf, 0x33, 0x03, 0x44, 0x32, 0x94, 0x0e, 0xaf, 0x2b,
+ 0x03, 0x44, 0x38, 0x9a, 0x0e, 0xaf, 0x5b, 0x03, 0x44, 0x3e, 0x90, 0x0e,
+ 0xaf, 0x0b, 0x03, 0x44, 0x42, 0x8f, 0x0e, 0xaf, 0x03, 0x03, 0x44, 0x46,
+ 0x8e, 0x0e, 0xae, 0xfb, 0x03, 0x44, 0x4a, 0x8d, 0x0e, 0xae, 0xf3, 0x03,
+ 0x44, 0x50, 0x8b, 0x0e, 0xae, 0xe3, 0x03, 0x44, 0x56, 0x87, 0x0e, 0xae,
+ 0xc3, 0x03, 0x44, 0x5c, 0x9c, 0x0e, 0xaf, 0x6b, 0x03, 0x44, 0x68, 0x86,
+ 0x0e, 0xae, 0xbb, 0x03, 0x44, 0x6e, 0x89, 0x0e, 0xae, 0xd3, 0x03, 0x44,
+ 0x74, 0x84, 0x0e, 0xae, 0xab, 0x03, 0x44, 0x7a, 0x83, 0x0e, 0xae, 0xa3,
+ 0x03, 0x44, 0x80, 0x9b, 0x0e, 0xaf, 0x61, 0x99, 0x0e, 0xaf, 0x51, 0x98,
+ 0x0e, 0xaf, 0x49, 0x93, 0x0e, 0xaf, 0x21, 0x8c, 0x0e, 0xae, 0xe9, 0x8a,
+ 0x0e, 0xae, 0xd9, 0x88, 0x0e, 0xae, 0xc8, 0xc4, 0x18, 0x10, 0x0e, 0xbf,
+ 0x49, 0xc2, 0x22, 0xcc, 0x0e, 0xbf, 0x40, 0xc3, 0x0d, 0x14, 0x0e, 0xbf,
+ 0x39, 0xc3, 0x09, 0x9e, 0x0e, 0xbf, 0x30, 0xc4, 0x02, 0xde, 0x0e, 0xbf,
+ 0x29, 0xc2, 0x02, 0xa0, 0x0e, 0xbf, 0x20, 0x9c, 0x0e, 0xb1, 0xd9, 0x9b,
+ 0x0e, 0xb1, 0xd1, 0x9a, 0x0e, 0xb1, 0xc9, 0x99, 0x0e, 0xb1, 0xc1, 0x98,
+ 0x0e, 0xb1, 0xb9, 0x97, 0x0e, 0xb1, 0xb1, 0x96, 0x0e, 0xb1, 0xa9, 0x95,
+ 0x0e, 0xb1, 0xa1, 0x94, 0x0e, 0xb1, 0x99, 0x93, 0x0e, 0xb1, 0x91, 0x92,
+ 0x0e, 0xb1, 0x89, 0x91, 0x0e, 0xb1, 0x81, 0x90, 0x0e, 0xb1, 0x79, 0x8f,
+ 0x0e, 0xb1, 0x71, 0x8e, 0x0e, 0xb1, 0x69, 0x8d, 0x0e, 0xb1, 0x61, 0x8c,
+ 0x0e, 0xb1, 0x59, 0x8b, 0x0e, 0xb1, 0x51, 0x8a, 0x0e, 0xb1, 0x49, 0x89,
+ 0x0e, 0xb1, 0x41, 0x88, 0x0e, 0xb1, 0x39, 0x87, 0x0e, 0xb1, 0x31, 0x86,
+ 0x0e, 0xb1, 0x29, 0x85, 0x0e, 0xb1, 0x21, 0x84, 0x0e, 0xb1, 0x19, 0x83,
+ 0x0e, 0xb1, 0x10, 0x9c, 0x0e, 0xb1, 0x09, 0x9b, 0x0e, 0xb1, 0x01, 0x9a,
+ 0x0e, 0xb0, 0xf9, 0x99, 0x0e, 0xb0, 0xf1, 0x98, 0x0e, 0xb0, 0xe9, 0x97,
+ 0x0e, 0xb0, 0xe1, 0x96, 0x0e, 0xb0, 0xd9, 0x95, 0x0e, 0xb0, 0xd1, 0x94,
+ 0x0e, 0xb0, 0xc9, 0x93, 0x0e, 0xb0, 0xc1, 0x92, 0x0e, 0xb0, 0xb9, 0x91,
+ 0x0e, 0xb0, 0xb1, 0x90, 0x0e, 0xb0, 0xa9, 0x8f, 0x0e, 0xb0, 0xa1, 0x8e,
+ 0x0e, 0xb0, 0x99, 0x8d, 0x0e, 0xb0, 0x91, 0x8c, 0x0e, 0xb0, 0x89, 0x8b,
+ 0x0e, 0xb0, 0x81, 0x8a, 0x0e, 0xb0, 0x79, 0x89, 0x0e, 0xb0, 0x71, 0x88,
+ 0x0e, 0xb0, 0x69, 0x87, 0x0e, 0xb0, 0x61, 0x86, 0x0e, 0xb0, 0x59, 0x85,
+ 0x0e, 0xb0, 0x51, 0x84, 0x0e, 0xb0, 0x49, 0x83, 0x0e, 0xb0, 0x40, 0xc2,
+ 0x00, 0xd0, 0x08, 0xe5, 0x19, 0x83, 0x08, 0xe5, 0x10, 0x94, 0x00, 0x6b,
+ 0x00, 0x8e, 0x00, 0x6b, 0x08, 0x8f, 0x00, 0x6a, 0xa1, 0x9b, 0x00, 0x6a,
+ 0xa9, 0x8e, 0x00, 0x6b, 0xeb, 0x03, 0x44, 0x86, 0x90, 0x00, 0x6b, 0xdb,
+ 0x03, 0x44, 0x8d, 0xc2, 0x01, 0xa3, 0x00, 0x6b, 0xe1, 0x8d, 0x00, 0x6b,
+ 0xf8, 0xc2, 0x00, 0xd0, 0x08, 0x8b, 0x09, 0x83, 0x08, 0x8b, 0x00, 0xc2,
+ 0x00, 0xd0, 0x08, 0x8a, 0xf9, 0x83, 0x08, 0x8a, 0xf0, 0xc4, 0x57, 0xbc,
+ 0x0e, 0x8f, 0x51, 0x46, 0xd1, 0x8d, 0x43, 0x44, 0x91, 0xc3, 0x01, 0x69,
+ 0x0e, 0x8f, 0x49, 0xc8, 0xb7, 0x7a, 0x0e, 0x8e, 0xb3, 0x03, 0x44, 0xb7,
+ 0x46, 0x1f, 0x87, 0xc3, 0x44, 0xbd, 0x07, 0xc3, 0x44, 0xc7, 0xc5, 0xd9,
+ 0xb1, 0x0e, 0x8c, 0x69, 0x0b, 0xc3, 0x44, 0xd3, 0x0a, 0x43, 0x44, 0xdd,
+ 0x07, 0xc3, 0x44, 0xe9, 0x11, 0xc3, 0x44, 0xf5, 0xc4, 0xdf, 0xe7, 0x0e,
+ 0x8c, 0x79, 0xd3, 0x42, 0x1c, 0x0e, 0x8a, 0xb1, 0xcc, 0x81, 0x75, 0x0e,
+ 0x8a, 0x20, 0xc7, 0xc8, 0x46, 0x0e, 0x8e, 0xc3, 0x03, 0x45, 0x04, 0x46,
+ 0xce, 0xcf, 0xc3, 0x45, 0x0a, 0xc3, 0x05, 0x9f, 0x0e, 0x8c, 0xbb, 0x03,
+ 0x45, 0x16, 0x94, 0x0e, 0x8c, 0xb3, 0x03, 0x45, 0x1a, 0x0a, 0xc3, 0x45,
+ 0x20, 0xcd, 0x79, 0xd0, 0x0e, 0x88, 0xb8, 0x0e, 0xc3, 0x45, 0x2c, 0x14,
+ 0xc3, 0x45, 0x36, 0x11, 0xc3, 0x45, 0x42, 0xd0, 0x5c, 0x22, 0x0e, 0x8a,
+ 0x29, 0xc7, 0xc8, 0x4d, 0x0e, 0x89, 0xa9, 0xc5, 0xac, 0x87, 0x0e, 0x89,
+ 0x09, 0xc6, 0xd3, 0x1f, 0x0e, 0x88, 0x98, 0xc4, 0x01, 0x2e, 0x0e, 0x8e,
+ 0x99, 0xcc, 0x8b, 0x89, 0x0e, 0x8a, 0xb8, 0x14, 0xc3, 0x45, 0x4c, 0x49,
+ 0xad, 0xad, 0xc3, 0x45, 0x58, 0xc5, 0xac, 0x87, 0x0e, 0x88, 0xf2, 0x03,
+ 0x45, 0x64, 0xc5, 0xc3, 0x54, 0x0e, 0x8d, 0xdb, 0x03, 0x45, 0x6a, 0xc5,
+ 0xc0, 0x9e, 0x0e, 0x8d, 0xb1, 0xc4, 0xe0, 0x2f, 0x0e, 0x8c, 0x81, 0x4d,
+ 0x7a, 0x1e, 0xc3, 0x45, 0x6e, 0x44, 0x1f, 0x19, 0x43, 0x45, 0x7a, 0x14,
+ 0xc3, 0x45, 0x86, 0x45, 0x3f, 0x0e, 0x43, 0x45, 0x90, 0xc4, 0xcb, 0x41,
+ 0x0e, 0x8d, 0xbb, 0x03, 0x45, 0xa8, 0xcf, 0x65, 0x76, 0x0e, 0x88, 0x30,
+ 0x44, 0xa1, 0xbe, 0xc3, 0x45, 0xac, 0x11, 0xc3, 0x45, 0xb8, 0x0b, 0xc3,
+ 0x45, 0xc4, 0x44, 0xb3, 0xb1, 0xc3, 0x45, 0xce, 0xc5, 0xac, 0x87, 0x0e,
+ 0x89, 0x13, 0x03, 0x45, 0xda, 0xc6, 0xcf, 0xef, 0x0e, 0x88, 0x82, 0x03,
+ 0x45, 0xe0, 0x03, 0xc3, 0x45, 0xe6, 0x07, 0xc3, 0x46, 0x01, 0x46, 0x00,
+ 0x59, 0xc3, 0x46, 0x0d, 0x49, 0xac, 0x84, 0x43, 0x46, 0x1f, 0xcf, 0x68,
+ 0xa0, 0x0e, 0x8d, 0x99, 0x45, 0xa6, 0x7b, 0x43, 0x46, 0x27, 0x43, 0x01,
+ 0xd0, 0xc3, 0x46, 0x33, 0xc9, 0xb4, 0x9a, 0x0e, 0x8d, 0x30, 0x43, 0x02,
+ 0x9c, 0xc3, 0x46, 0x45, 0x46, 0x06, 0xdc, 0x43, 0x46, 0x63, 0xca, 0xa3,
+ 0xbe, 0x0e, 0x8d, 0x39, 0xcc, 0x81, 0xb1, 0x0e, 0x8a, 0xc9, 0xcd, 0x77,
+ 0xae, 0x0e, 0x8a, 0xc1, 0x47, 0x83, 0xf2, 0x43, 0x46, 0x6f, 0x4f, 0x63,
+ 0x3c, 0xc3, 0x46, 0x7b, 0x42, 0x02, 0x6f, 0xc3, 0x46, 0xa2, 0x46, 0xb7,
+ 0xd4, 0x43, 0x46, 0xae, 0x0b, 0xc3, 0x46, 0xba, 0x07, 0x43, 0x46, 0xc6,
+ 0xc4, 0x03, 0xc8, 0x0e, 0x8c, 0x21, 0xc2, 0x02, 0xae, 0x0e, 0x8c, 0x18,
+ 0x46, 0x15, 0x04, 0xc3, 0x46, 0xd2, 0x4b, 0x90, 0x02, 0x43, 0x46, 0xe4,
+ 0x43, 0x03, 0x35, 0xc3, 0x46, 0xf0, 0x45, 0x00, 0x8c, 0x43, 0x47, 0x08,
+ 0x9f, 0x00, 0x84, 0x59, 0xa0, 0x00, 0x84, 0x60, 0xc2, 0x00, 0xd0, 0x05,
+ 0x53, 0x71, 0x83, 0x05, 0x53, 0x68, 0x83, 0x05, 0x53, 0x59, 0xc2, 0x19,
+ 0x2c, 0x05, 0x53, 0x28, 0xc2, 0x00, 0xd0, 0x05, 0x53, 0x51, 0x06, 0x43,
+ 0x47, 0x14, 0xc2, 0x00, 0xd0, 0x05, 0x53, 0x39, 0x83, 0x05, 0x53, 0x30,
+ 0xc2, 0x00, 0xd0, 0x05, 0x53, 0x21, 0x83, 0x05, 0x53, 0x18, 0xc2, 0x00,
+ 0xd0, 0x05, 0x53, 0x11, 0x83, 0x05, 0x53, 0x08, 0xc2, 0x00, 0xd0, 0x05,
+ 0x4f, 0xf1, 0x83, 0x05, 0x4f, 0xe8, 0xc2, 0x00, 0xd0, 0x05, 0x4f, 0xe1,
+ 0x83, 0x05, 0x4f, 0xd9, 0x06, 0x43, 0x47, 0x1e, 0xc2, 0x00, 0xc1, 0x05,
+ 0x4f, 0x79, 0xc2, 0x19, 0x2c, 0x05, 0x4f, 0x38, 0xc2, 0x00, 0xd0, 0x05,
+ 0x4f, 0x61, 0x83, 0x05, 0x4f, 0x58, 0xc2, 0x00, 0xd0, 0x05, 0x4f, 0x51,
+ 0x83, 0x05, 0x4f, 0x48, 0x04, 0xc3, 0x47, 0x28, 0x10, 0xc3, 0x47, 0x32,
+ 0xc3, 0xe5, 0xf0, 0x05, 0x4f, 0x11, 0x83, 0x00, 0x81, 0x11, 0x0d, 0xc3,
+ 0x47, 0x42, 0x09, 0xc3, 0x47, 0x4c, 0x05, 0xc3, 0x47, 0x56, 0xc2, 0x02,
+ 0x1c, 0x00, 0x83, 0xc9, 0xc2, 0x0e, 0x9a, 0x00, 0x83, 0xd9, 0xc3, 0x17,
+ 0xb2, 0x00, 0x83, 0xe9, 0xc2, 0x00, 0x87, 0x00, 0x83, 0xf1, 0xc3, 0x00,
+ 0xcf, 0x00, 0x84, 0x01, 0xc2, 0x00, 0xd0, 0x00, 0x84, 0x08, 0x97, 0x01,
+ 0x8f, 0xa0, 0x91, 0x0d, 0x8b, 0x31, 0x87, 0x0d, 0x8b, 0x29, 0x8b, 0x0d,
+ 0x8b, 0x21, 0x83, 0x01, 0x87, 0x70, 0x97, 0x01, 0x86, 0x19, 0x91, 0x01,
+ 0x8f, 0x98, 0x83, 0x01, 0x87, 0x19, 0x97, 0x01, 0x87, 0x29, 0x91, 0x01,
+ 0x87, 0x38, 0x83, 0x01, 0x87, 0xa9, 0x87, 0x01, 0x87, 0xb1, 0x97, 0x01,
+ 0x8f, 0x80, 0x8b, 0x01, 0x8f, 0x89, 0x97, 0x01, 0x8f, 0x90, 0x83, 0x01,
+ 0x8f, 0xa9, 0x8b, 0x01, 0x8f, 0xb1, 0x97, 0x01, 0x8f, 0xb9, 0x87, 0x01,
+ 0x8f, 0xc1, 0x91, 0x01, 0x8f, 0xc8, 0x83, 0x01, 0x8f, 0xd9, 0x8b, 0x01,
+ 0x8f, 0xe1, 0x97, 0x01, 0x8f, 0xe9, 0x87, 0x01, 0x8f, 0xf1, 0x91, 0x01,
+ 0x8f, 0xf8, 0x87, 0x0d, 0x89, 0x09, 0x8b, 0x0d, 0x89, 0x00, 0x4f, 0x60,
+ 0x3f, 0xc3, 0x47, 0x60, 0x45, 0x28, 0xb1, 0x43, 0x47, 0x7c, 0x94, 0x00,
+ 0x64, 0x5b, 0x03, 0x47, 0x94, 0x8e, 0x00, 0x64, 0x62, 0x03, 0x47, 0x98,
+ 0xcb, 0x90, 0x44, 0x00, 0x66, 0xe8, 0x83, 0x00, 0x64, 0xf9, 0xc2, 0x00,
+ 0xd0, 0x00, 0x65, 0x00, 0x83, 0x00, 0x65, 0x09, 0xc2, 0x00, 0xd0, 0x00,
+ 0x65, 0x10, 0x83, 0x00, 0x65, 0x99, 0xc2, 0x00, 0xdb, 0x00, 0x66, 0xf0,
+ 0xc4, 0x14, 0xdd, 0x01, 0x7d, 0x81, 0x88, 0x01, 0x7d, 0xa0, 0x44, 0x00,
+ 0xde, 0x43, 0x47, 0x9c, 0x8a, 0x01, 0x7b, 0x59, 0xc8, 0x92, 0xfa, 0x01,
+ 0x7d, 0x20, 0xc2, 0x01, 0xe2, 0x01, 0x78, 0x19, 0xc2, 0x00, 0x5f, 0x01,
+ 0x7d, 0x50, 0xc2, 0x00, 0xb1, 0x01, 0x7b, 0x69, 0xc3, 0x5f, 0x44, 0x01,
+ 0x7c, 0xa0, 0x44, 0xdf, 0x4b, 0xc3, 0x47, 0xa8, 0xc2, 0x01, 0xbb, 0x01,
+ 0x79, 0xb8, 0xc2, 0x02, 0x37, 0x01, 0x7b, 0xd1, 0xc2, 0x02, 0xa7, 0x01,
+ 0x7c, 0xc8, 0x92, 0x01, 0x79, 0xd9, 0xc2, 0x00, 0xc2, 0x01, 0x7a, 0x98,
+ 0x92, 0x01, 0x7a, 0x63, 0x03, 0x47, 0xb4, 0xc2, 0x02, 0x6f, 0x01, 0x7b,
+ 0x78, 0x90, 0x01, 0x7c, 0x99, 0xc2, 0x00, 0x40, 0x01, 0x7d, 0xd0, 0xc2,
+ 0x00, 0x61, 0x01, 0x79, 0xe1, 0x86, 0x01, 0x7d, 0xc0, 0xc4, 0xe3, 0x23,
+ 0x01, 0x79, 0xe9, 0xcc, 0x70, 0x8a, 0x01, 0x7a, 0xc8, 0xc2, 0x00, 0x8e,
+ 0x01, 0x78, 0xe9, 0x10, 0x43, 0x47, 0xba, 0xc3, 0x0e, 0x6b, 0x01, 0x7c,
+ 0x29, 0xc4, 0x03, 0x0e, 0x01, 0x7d, 0x00, 0xc2, 0x00, 0x8e, 0x01, 0x78,
+ 0xf8, 0x90, 0x01, 0x7a, 0x91, 0x99, 0x01, 0x7a, 0xb0, 0xca, 0x63, 0x9a,
+ 0x01, 0x7c, 0x78, 0x44, 0x23, 0x70, 0xc3, 0x47, 0xc4, 0x43, 0x71, 0xed,
+ 0x43, 0x47, 0xd0, 0x44, 0xdf, 0x37, 0xc3, 0x47, 0xdc, 0x43, 0x93, 0x74,
+ 0x43, 0x47, 0xe8, 0xc3, 0x38, 0x5b, 0x00, 0xcf, 0xd9, 0xc4, 0xe0, 0xaf,
+ 0x00, 0xcf, 0x58, 0x04, 0xc3, 0x47, 0xf4, 0x44, 0x71, 0xec, 0xc3, 0x48,
+ 0x00, 0x45, 0xda, 0x97, 0x43, 0x48, 0x0c, 0xc3, 0x38, 0x5b, 0x00, 0xcf,
+ 0xa9, 0xc4, 0xe0, 0xaf, 0x00, 0xcf, 0x28, 0x02, 0x43, 0x48, 0x18, 0xce,
+ 0x2a, 0xfe, 0x0f, 0xd0, 0xa9, 0xdb, 0x18, 0x03, 0x0f, 0xd1, 0xf8, 0xd2,
+ 0x4a, 0x2d, 0x0f, 0xd0, 0x41, 0xce, 0x2a, 0xfe, 0x0f, 0xd0, 0xc9, 0xdf,
+ 0x0d, 0x00, 0x0f, 0xd0, 0xe9, 0x16, 0x43, 0x48, 0x28, 0xc7, 0x7a, 0x7f,
+ 0x08, 0xa2, 0x39, 0xc7, 0x14, 0x39, 0x08, 0xa2, 0x20, 0xc5, 0x40, 0xe7,
+ 0x08, 0xa2, 0x29, 0xc4, 0x1e, 0x97, 0x08, 0xa2, 0x10, 0x8e, 0x08, 0xa0,
+ 0x48, 0x94, 0x08, 0xa0, 0x38, 0x89, 0x00, 0xce, 0x10, 0xc2, 0x00, 0xe4,
+ 0x00, 0xcd, 0x59, 0x83, 0x00, 0xcc, 0x60, 0xc2, 0x02, 0x41, 0x00, 0xcd,
+ 0x49, 0x83, 0x00, 0xcc, 0x30, 0xc2, 0x02, 0x41, 0x00, 0xcd, 0x41, 0x83,
+ 0x00, 0xcc, 0x28, 0xc2, 0x00, 0xd0, 0x00, 0xcc, 0xc1, 0x83, 0x00, 0xcc,
+ 0xb8, 0x83, 0x00, 0xcc, 0x99, 0xc2, 0x01, 0x30, 0x00, 0xcc, 0x38, 0xc2,
+ 0x00, 0xd0, 0x00, 0xcc, 0x91, 0x83, 0x00, 0xcc, 0x89, 0xc2, 0x0d, 0xf6,
+ 0x00, 0xcc, 0x58, 0xc2, 0x00, 0xe4, 0x00, 0xcd, 0x51, 0x83, 0x00, 0xcc,
+ 0x48, 0xc2, 0x02, 0x41, 0x00, 0xcd, 0x39, 0x83, 0x00, 0xcc, 0x18, 0xc2,
+ 0x02, 0x41, 0x00, 0xcd, 0x31, 0x83, 0x00, 0xcc, 0x10, 0xc2, 0x00, 0xd0,
+ 0x00, 0xcc, 0xa9, 0x83, 0x00, 0xcc, 0xa0, 0x83, 0x00, 0xcc, 0x81, 0xc2,
+ 0x01, 0x30, 0x00, 0xcc, 0x20, 0xc2, 0x00, 0xd0, 0x00, 0xcc, 0x79, 0x83,
+ 0x00, 0xcc, 0x71, 0xc2, 0x0d, 0xf6, 0x00, 0xcc, 0x40, 0x9b, 0x00, 0xcd,
+ 0xf8, 0x9b, 0x00, 0xcd, 0xf0, 0x9b, 0x00, 0xcd, 0xd8, 0xc3, 0x18, 0x13,
+ 0x01, 0x27, 0xa1, 0xc3, 0x22, 0x45, 0x01, 0x27, 0x60, 0x00, 0x43, 0x48,
+ 0x34, 0x00, 0x43, 0x48, 0x46, 0xc7, 0x08, 0x79, 0x05, 0x41, 0x81, 0xc4,
+ 0x01, 0xce, 0x05, 0x41, 0x89, 0xc9, 0x67, 0x38, 0x05, 0x41, 0x99, 0xc6,
+ 0x06, 0xdb, 0x05, 0x41, 0xa0, 0xc8, 0x08, 0x79, 0x05, 0x41, 0x91, 0xca,
+ 0xa7, 0x88, 0x05, 0x41, 0xa8, 0xc2, 0x02, 0xe0, 0x0f, 0x3f, 0xf1, 0x8b,
+ 0x0f, 0x3f, 0xe8, 0xc2, 0x02, 0xe0, 0x0f, 0x3f, 0xe1, 0x8b, 0x0f, 0x3f,
+ 0xd8, 0x87, 0x0f, 0x3f, 0xd3, 0x03, 0x48, 0x5e, 0x8b, 0x0f, 0x3f, 0xc0,
+ 0x87, 0x0f, 0x3f, 0xbb, 0x03, 0x48, 0x62, 0x8b, 0x0f, 0x3f, 0xa8, 0xc2,
+ 0x02, 0xe0, 0x0f, 0x3f, 0xa1, 0x8b, 0x0f, 0x3f, 0x98, 0x87, 0x0f, 0x3f,
+ 0x93, 0x03, 0x48, 0x66, 0x8b, 0x0f, 0x3f, 0x80, 0xc2, 0x02, 0xe0, 0x0f,
+ 0x3f, 0x71, 0x8b, 0x0f, 0x3f, 0x68, 0x83, 0x00, 0x98, 0xf8, 0x87, 0x01,
+ 0x6c, 0xa8, 0x87, 0x0f, 0x3f, 0x50, 0x87, 0x0f, 0x3f, 0x20, 0x83, 0x0f,
+ 0x3f, 0x18, 0x91, 0x05, 0x59, 0x31, 0x87, 0x05, 0x59, 0x2b, 0x03, 0x48,
+ 0x6a, 0x83, 0x05, 0x59, 0x03, 0x03, 0x48, 0x6e, 0x8b, 0x05, 0x59, 0x11,
+ 0x97, 0x05, 0x59, 0x08, 0x83, 0x01, 0x6d, 0xd8, 0x87, 0x01, 0x6d, 0xe0,
+ 0x87, 0x05, 0x58, 0x60, 0x83, 0x00, 0x92, 0xd8, 0x87, 0x00, 0x92, 0xe0,
+ 0x83, 0x00, 0x96, 0x18, 0x87, 0x00, 0x96, 0x20, 0x83, 0x00, 0x96, 0x83,
+ 0x03, 0x48, 0x72, 0x97, 0x00, 0x96, 0x89, 0x8b, 0x00, 0x96, 0x91, 0x87,
+ 0x00, 0x96, 0xab, 0x03, 0x48, 0x76, 0x91, 0x00, 0x96, 0xb0, 0xd1, 0x50,
+ 0xbd, 0x01, 0x4f, 0x20, 0xd0, 0x03, 0xb7, 0x01, 0x4b, 0x89, 0xce, 0x33,
+ 0x92, 0x01, 0x53, 0x99, 0xc9, 0x60, 0xf3, 0x01, 0x53, 0x89, 0xcf, 0x09,
+ 0xf8, 0x01, 0x5a, 0x00, 0xe0, 0x04, 0xe7, 0x01, 0x53, 0xb8, 0xa1, 0x0e,
+ 0x92, 0x09, 0xa0, 0x0e, 0x92, 0x01, 0x9f, 0x0e, 0x91, 0xf9, 0x9e, 0x0e,
+ 0x91, 0xf1, 0x9d, 0x0e, 0x91, 0xe8, 0xa6, 0x0e, 0x91, 0xe1, 0xa5, 0x0e,
+ 0x91, 0xd9, 0xa4, 0x0e, 0x91, 0xd1, 0xa2, 0x0e, 0x91, 0xc9, 0xa0, 0x0e,
+ 0x91, 0xc1, 0x9f, 0x0e, 0x91, 0xb9, 0x9d, 0x0e, 0x91, 0xb0, 0xa6, 0x0e,
+ 0x91, 0xa9, 0xa5, 0x0e, 0x91, 0xa1, 0xa4, 0x0e, 0x91, 0x99, 0xa3, 0x0e,
+ 0x91, 0x91, 0x9f, 0x0e, 0x91, 0x89, 0x9d, 0x0e, 0x91, 0x80, 0xa6, 0x0e,
+ 0x91, 0x79, 0xa4, 0x0e, 0x91, 0x71, 0xa3, 0x0e, 0x91, 0x69, 0xa2, 0x0e,
+ 0x91, 0x61, 0xa1, 0x0e, 0x91, 0x59, 0xa0, 0x0e, 0x91, 0x50, 0xa6, 0x0e,
+ 0x91, 0x49, 0xa5, 0x0e, 0x91, 0x41, 0xa4, 0x0e, 0x91, 0x39, 0xa1, 0x0e,
+ 0x91, 0x31, 0xa0, 0x0e, 0x91, 0x29, 0x9f, 0x0e, 0x91, 0x21, 0x9e, 0x0e,
+ 0x91, 0x18, 0xa1, 0x0e, 0x90, 0xe1, 0xa0, 0x0e, 0x90, 0xd9, 0x9f, 0x0e,
+ 0x90, 0xd1, 0x9e, 0x0e, 0x90, 0xc9, 0x9d, 0x0e, 0x90, 0xc0, 0xa1, 0x0e,
+ 0x90, 0xb9, 0xa0, 0x0e, 0x90, 0xb1, 0x9f, 0x0e, 0x90, 0xa9, 0x9e, 0x0e,
+ 0x90, 0xa1, 0x9d, 0x0e, 0x90, 0x98, 0xa6, 0x0e, 0x90, 0x91, 0xa5, 0x0e,
+ 0x90, 0x89, 0xa4, 0x0e, 0x90, 0x81, 0xa3, 0x0e, 0x90, 0x79, 0xa2, 0x0e,
+ 0x90, 0x71, 0xa1, 0x0e, 0x90, 0x69, 0xa0, 0x0e, 0x90, 0x61, 0x9f, 0x0e,
+ 0x90, 0x59, 0x9e, 0x0e, 0x90, 0x51, 0x9d, 0x0e, 0x90, 0x48, 0xcb, 0x94,
+ 0x90, 0x00, 0xfe, 0xf9, 0xc4, 0xe3, 0xab, 0x00, 0xfe, 0xf1, 0xc5, 0x28,
+ 0x47, 0x00, 0xfe, 0xe8, 0xc4, 0xe3, 0xab, 0x00, 0xff, 0x71, 0xc5, 0x28,
+ 0x47, 0x00, 0xff, 0x69, 0xcb, 0x94, 0x90, 0x00, 0xfe, 0x08, 0xcf, 0x6b,
+ 0x25, 0x08, 0x0b, 0xb0, 0x42, 0x00, 0x7a, 0xc3, 0x48, 0x7a, 0xc3, 0x79,
+ 0xe7, 0x00, 0x1d, 0x0b, 0x03, 0x48, 0x8c, 0xc7, 0x78, 0x4a, 0x00, 0x1d,
+ 0x2b, 0x03, 0x48, 0x92, 0xc4, 0x29, 0xc6, 0x00, 0x1c, 0xcb, 0x03, 0x48,
+ 0x98, 0x07, 0xc3, 0x48, 0x9e, 0x03, 0xc3, 0x48, 0xb0, 0xc4, 0x89, 0xfe,
+ 0x00, 0x1b, 0x81, 0x12, 0xc3, 0x48, 0xbf, 0xc3, 0xe5, 0xb4, 0x00, 0x1b,
+ 0xf9, 0xc4, 0x93, 0xa9, 0x00, 0x1c, 0x91, 0xc5, 0x51, 0x51, 0x00, 0x1c,
+ 0x99, 0xc5, 0xdb, 0x4b, 0x00, 0x1c, 0xa1, 0xc4, 0xde, 0x9b, 0x00, 0x1c,
+ 0xb1, 0x16, 0xc3, 0x48, 0xd5, 0xc5, 0x8b, 0x65, 0x00, 0x1c, 0xd1, 0xc5,
+ 0xdd, 0x99, 0x00, 0x1c, 0xd9, 0xc2, 0x14, 0x48, 0x00, 0x1c, 0xe1, 0xc2,
+ 0x06, 0xc6, 0x00, 0x1c, 0xe9, 0xc2, 0x07, 0x49, 0x00, 0x1c, 0xf1, 0x15,
+ 0xc3, 0x48, 0xe1, 0xc3, 0x11, 0xee, 0x00, 0x1d, 0x38, 0x42, 0x00, 0x7a,
+ 0xc3, 0x48, 0xf3, 0xc7, 0x78, 0x4a, 0x00, 0x1e, 0x2b, 0x03, 0x49, 0x05,
+ 0xc3, 0x79, 0xe7, 0x00, 0x1e, 0x0b, 0x03, 0x49, 0x0b, 0xc4, 0x29, 0xc6,
+ 0x00, 0x1d, 0xcb, 0x03, 0x49, 0x11, 0x07, 0xc3, 0x49, 0x17, 0x03, 0xc3,
+ 0x49, 0x29, 0xc4, 0x89, 0xfe, 0x00, 0x1b, 0x89, 0xc4, 0x93, 0xa9, 0x00,
+ 0x1d, 0x91, 0xc5, 0x51, 0x51, 0x00, 0x1d, 0x99, 0x06, 0xc3, 0x49, 0x38,
+ 0xc4, 0xde, 0x9b, 0x00, 0x1d, 0xb1, 0x16, 0xc3, 0x49, 0x44, 0x0d, 0xc3,
+ 0x49, 0x50, 0xc5, 0xdd, 0x99, 0x00, 0x1d, 0xd9, 0xc2, 0x14, 0x48, 0x00,
+ 0x1d, 0xe1, 0xc2, 0x06, 0xc6, 0x00, 0x1d, 0xe9, 0xc2, 0x07, 0x49, 0x00,
+ 0x1d, 0xf1, 0x12, 0xc3, 0x49, 0x5c, 0xcb, 0x91, 0x2b, 0x00, 0x1e, 0x11,
+ 0x15, 0xc3, 0x49, 0x72, 0xc3, 0x11, 0xee, 0x00, 0x1e, 0x38, 0xd3, 0x1a,
+ 0x6b, 0x00, 0x1b, 0xd9, 0xda, 0x1a, 0x64, 0x00, 0x1b, 0xe8, 0xcb, 0x94,
+ 0x90, 0x00, 0xfe, 0x79, 0xc4, 0xe3, 0xab, 0x00, 0xfe, 0x71, 0xc5, 0x28,
+ 0x47, 0x00, 0xfe, 0x68, 0x4d, 0x37, 0xb4, 0xc3, 0x49, 0x88, 0xc5, 0xd6,
+ 0xe6, 0x00, 0x1e, 0xd1, 0xc4, 0x87, 0xf5, 0x00, 0x1f, 0x00, 0xcd, 0x7f,
+ 0xc1, 0x08, 0x0b, 0xc1, 0xca, 0x71, 0x88, 0x08, 0x0b, 0xf0, 0x44, 0x05,
+ 0x14, 0xc3, 0x49, 0xa4, 0x42, 0x02, 0x09, 0xc3, 0x49, 0xba, 0x44, 0x57,
+ 0x1d, 0x43, 0x49, 0xcc, 0xd1, 0x52, 0x88, 0x08, 0x0a, 0xc1, 0x48, 0xb9,
+ 0xaa, 0x43, 0x49, 0xdc, 0x48, 0xbd, 0x62, 0xc3, 0x49, 0xee, 0x4a, 0x9f,
+ 0xea, 0x43, 0x4a, 0x01, 0xc3, 0x02, 0x9f, 0x08, 0x0a, 0xdb, 0x03, 0x4a,
+ 0x10, 0xcc, 0x37, 0x61, 0x08, 0x0b, 0x60, 0xd4, 0x3d, 0xf4, 0x08, 0x0a,
+ 0xe9, 0xd5, 0x37, 0x58, 0x08, 0x0b, 0x78, 0xc6, 0x0e, 0xe0, 0x01, 0x54,
+ 0x01, 0xc5, 0x00, 0xd4, 0x01, 0x54, 0x12, 0x03, 0x4a, 0x16, 0xc8, 0x23,
+ 0xa0, 0x01, 0x54, 0x71, 0xcf, 0x02, 0x78, 0x01, 0x54, 0x80, 0xe0, 0x00,
+ 0xc7, 0x01, 0x54, 0xa0, 0x8e, 0x08, 0x9b, 0x08, 0x94, 0x08, 0x9b, 0x00,
+ 0xc6, 0x42, 0xd4, 0x00, 0xe5, 0xf0, 0xc6, 0x42, 0xd4, 0x00, 0x87, 0xf0,
+ 0x97, 0x01, 0x60, 0xf9, 0x8b, 0x01, 0x61, 0x00, 0xc3, 0x87, 0xc2, 0x01,
+ 0x61, 0x60, 0x97, 0x01, 0x62, 0x79, 0x8b, 0x01, 0x62, 0x80, 0xc3, 0x87,
+ 0xc2, 0x01, 0x62, 0xe0, 0x94, 0x00, 0x5b, 0x00, 0x8e, 0x00, 0x5b, 0x08,
+ 0xc7, 0x0d, 0x04, 0x0f, 0x68, 0xa9, 0xc8, 0x4b, 0x94, 0x0f, 0x68, 0xf0,
+ 0xc7, 0x0d, 0x04, 0x0f, 0x68, 0xa1, 0xc8, 0x4b, 0x94, 0x0f, 0x68, 0xe8,
+ 0xc7, 0x0d, 0x04, 0x0f, 0x68, 0xb1, 0xc8, 0x4b, 0x94, 0x0f, 0x68, 0xf8,
+ 0xc7, 0x0d, 0x04, 0x0f, 0x68, 0xb9, 0xc8, 0x4b, 0x94, 0x0f, 0x69, 0x00,
+ 0xc4, 0xdc, 0x2d, 0x08, 0x7b, 0xd9, 0xc3, 0x77, 0x79, 0x08, 0x7b, 0xe8,
+ 0xc8, 0x0d, 0x03, 0x08, 0x79, 0x28, 0x0a, 0xc3, 0x4a, 0x1c, 0x19, 0xc3,
+ 0x4a, 0x28, 0xc2, 0x00, 0xc4, 0x08, 0x79, 0x10, 0xc3, 0x0d, 0x14, 0x08,
+ 0x79, 0x09, 0xc3, 0x09, 0x9e, 0x08, 0x79, 0x00, 0x46, 0x26, 0xf7, 0xc3,
+ 0x4a, 0x32, 0xc3, 0xb5, 0x3e, 0x08, 0x78, 0xd1, 0x15, 0xc3, 0x4a, 0x5f,
+ 0xd0, 0x5d, 0xe2, 0x08, 0x78, 0xc1, 0xc2, 0x00, 0x67, 0x08, 0x78, 0xa1,
+ 0x03, 0xc3, 0x4a, 0x69, 0xc3, 0x20, 0x18, 0x08, 0x78, 0x71, 0xc3, 0x00,
+ 0x4e, 0x08, 0x78, 0x69, 0xc6, 0xcf, 0xd7, 0x08, 0x78, 0x61, 0xc4, 0xe0,
+ 0xe7, 0x08, 0x78, 0x59, 0xc4, 0x4a, 0xb9, 0x08, 0x78, 0x51, 0xc2, 0x01,
+ 0x7f, 0x08, 0x78, 0x2b, 0x03, 0x4a, 0x73, 0xc5, 0x4a, 0xb3, 0x08, 0x78,
+ 0x41, 0xc3, 0x7e, 0x89, 0x08, 0x78, 0x39, 0xc5, 0x9c, 0xa2, 0x08, 0x78,
+ 0x21, 0xc4, 0xe3, 0x27, 0x08, 0x78, 0x10, 0xc5, 0x45, 0x69, 0x08, 0x53,
+ 0xf1, 0xc3, 0x05, 0x14, 0x08, 0x53, 0xe8, 0x0a, 0xc3, 0x4a, 0x79, 0xc3,
+ 0x1e, 0x1b, 0x08, 0x53, 0xb9, 0xc2, 0x39, 0x8b, 0x08, 0x53, 0x48, 0x42,
+ 0x00, 0xd0, 0xc3, 0x4a, 0x85, 0xc5, 0x40, 0x9b, 0x08, 0x53, 0xa8, 0xc4,
+ 0xdf, 0xc3, 0x08, 0x53, 0xb1, 0xc4, 0x9c, 0xa3, 0x08, 0x53, 0xa0, 0xc3,
+ 0x11, 0xef, 0x08, 0x53, 0x31, 0x03, 0x43, 0x4a, 0x91, 0xc2, 0x00, 0x8e,
+ 0x08, 0x53, 0x10, 0xc3, 0x00, 0xb6, 0x08, 0x53, 0x59, 0xc4, 0x9b, 0x90,
+ 0x08, 0x53, 0x68, 0xc3, 0x00, 0x49, 0x08, 0x53, 0x89, 0xc2, 0x17, 0xb6,
+ 0x08, 0x53, 0x90, 0xc7, 0x0d, 0x04, 0x08, 0x67, 0xf1, 0xc8, 0x4b, 0x94,
+ 0x08, 0x67, 0xf8, 0x96, 0x08, 0x67, 0x3b, 0x03, 0x4a, 0xa1, 0x9b, 0x08,
+ 0x66, 0xd1, 0x85, 0x08, 0x66, 0x28, 0x95, 0x08, 0x67, 0x80, 0x8a, 0x08,
+ 0x67, 0x49, 0x95, 0x08, 0x66, 0x30, 0x9b, 0x08, 0x67, 0x40, 0x9c, 0x08,
+ 0x67, 0x28, 0x92, 0x08, 0x67, 0x08, 0x9b, 0x08, 0x66, 0xb8, 0x9b, 0x08,
+ 0x66, 0x70, 0x96, 0x08, 0x65, 0x3b, 0x03, 0x4a, 0xa7, 0x9b, 0x08, 0x64,
+ 0xd1, 0x85, 0x08, 0x64, 0x28, 0x9b, 0x08, 0x65, 0x40, 0x9c, 0x08, 0x65,
+ 0x28, 0x92, 0x08, 0x65, 0x08, 0x9b, 0x08, 0x64, 0xb8, 0x9b, 0x08, 0x64,
+ 0x70, 0x95, 0x08, 0x64, 0x31, 0x8a, 0x08, 0x65, 0x48, 0x95, 0x08, 0x65,
+ 0x80, 0x8d, 0x08, 0x60, 0xe0, 0x96, 0x08, 0x62, 0x29, 0x95, 0x08, 0x61,
+ 0xf1, 0x94, 0x08, 0x61, 0xe1, 0x90, 0x08, 0x61, 0x21, 0x8e, 0x08, 0x61,
+ 0x01, 0x8d, 0x08, 0x60, 0xd1, 0x9b, 0x08, 0x60, 0xc1, 0x86, 0x08, 0x60,
+ 0x99, 0x89, 0x08, 0x60, 0x79, 0x84, 0x08, 0x60, 0x58, 0x8a, 0x08, 0x61,
+ 0xf8, 0x85, 0x08, 0x61, 0x41, 0x96, 0x08, 0x61, 0x31, 0x9b, 0x08, 0x61,
+ 0x51, 0x89, 0x08, 0x61, 0x68, 0x96, 0x08, 0x62, 0x31, 0x90, 0x08, 0x61,
+ 0x2b, 0x03, 0x4a, 0xad, 0x8d, 0x08, 0x60, 0xd9, 0x9b, 0x08, 0x60, 0xc9,
+ 0x89, 0x08, 0x60, 0x81, 0x84, 0x08, 0x60, 0x60, 0x96, 0x08, 0x61, 0x39,
+ 0x85, 0x08, 0x61, 0x49, 0x9b, 0x08, 0x61, 0x58, 0x8d, 0x08, 0x60, 0xe8,
+ 0xc2, 0x16, 0x1c, 0x08, 0x54, 0xd9, 0xc2, 0x00, 0x65, 0x08, 0x54, 0xc8,
+ 0x83, 0x08, 0x1d, 0x03, 0x03, 0x4a, 0xb1, 0x8b, 0x08, 0x1d, 0x09, 0x97,
+ 0x08, 0x1d, 0x11, 0x0d, 0xc3, 0x4a, 0xba, 0x09, 0xc3, 0x4a, 0xc2, 0x1a,
+ 0xc3, 0x4a, 0xca, 0xc2, 0x00, 0x64, 0x08, 0x1d, 0x41, 0x0c, 0xc3, 0x4a,
+ 0xd4, 0x16, 0xc3, 0x4a, 0xdc, 0x06, 0xc3, 0x4a, 0xea, 0xc2, 0x00, 0xb0,
+ 0x08, 0x1d, 0x89, 0x04, 0xc3, 0x4a, 0xf9, 0xc2, 0x00, 0x87, 0x08, 0x1d,
+ 0x99, 0x10, 0xc3, 0x4b, 0x06, 0x0f, 0xc3, 0x4b, 0x0e, 0xc2, 0x19, 0x2c,
+ 0x08, 0x1d, 0xc9, 0x18, 0xc3, 0x4b, 0x1a, 0x14, 0xc3, 0x4b, 0x22, 0xc2,
+ 0x00, 0xdb, 0x08, 0x1d, 0xf1, 0x15, 0xc3, 0x4b, 0x2a, 0xc2, 0x02, 0x1c,
+ 0x08, 0x1e, 0x01, 0xc2, 0x00, 0xd0, 0x08, 0x1e, 0x18, 0xc3, 0x05, 0x14,
+ 0x08, 0x1e, 0x89, 0x16, 0xc3, 0x4b, 0x3a, 0xc7, 0x0d, 0x04, 0x08, 0x1e,
+ 0xa8, 0xc3, 0xd3, 0x4c, 0x08, 0x1a, 0xb1, 0xc3, 0x02, 0x44, 0x08, 0x1a,
+ 0xc0, 0xc3, 0xc1, 0x4b, 0x08, 0x1b, 0x29, 0xc5, 0xdc, 0xf4, 0x08, 0x1b,
+ 0x30, 0x97, 0x08, 0x1b, 0x41, 0x8b, 0x08, 0x1b, 0x80, 0x96, 0x08, 0x1b,
+ 0x88, 0x8a, 0x08, 0x18, 0x71, 0x95, 0x08, 0x18, 0xf8, 0x95, 0x08, 0x18,
+ 0xd8, 0xce, 0x69, 0xa0, 0x0e, 0x7d, 0xa1, 0xc8, 0x4e, 0x4b, 0x0e, 0x7d,
+ 0x98, 0xc7, 0x4e, 0x43, 0x0e, 0x7d, 0xab, 0x03, 0x4b, 0x44, 0xc7, 0xa6,
+ 0x73, 0x0e, 0x7c, 0xa0, 0xce, 0x69, 0xa0, 0x0e, 0x7c, 0xc9, 0xc9, 0x92,
+ 0x8d, 0x0e, 0x7c, 0xc0, 0xc9, 0xac, 0xd5, 0x0e, 0x7d, 0x71, 0xc9, 0x92,
+ 0x8d, 0x0e, 0x7d, 0x69, 0xc8, 0xbc, 0xa2, 0x0e, 0x7d, 0x60, 0xca, 0xa6,
+ 0x70, 0x0e, 0x7d, 0x2b, 0x03, 0x4b, 0x48, 0xc9, 0x92, 0x8d, 0x0e, 0x7d,
+ 0x1a, 0x03, 0x4b, 0x4e, 0xd6, 0x2d, 0x0a, 0x0e, 0x7d, 0x00, 0xc9, 0x92,
+ 0x8d, 0x0e, 0x7c, 0xeb, 0x03, 0x4b, 0x54, 0xca, 0xa6, 0x70, 0x0e, 0x7c,
+ 0xe0, 0xcc, 0x87, 0x39, 0x0e, 0x7c, 0xf0, 0xc7, 0x92, 0x8f, 0x0e, 0x7c,
+ 0xb1, 0xcb, 0x92, 0x8b, 0x0e, 0x7c, 0xa8, 0xc8, 0x94, 0x9e, 0x0e, 0x7c,
+ 0x3b, 0x03, 0x4b, 0x5a, 0xd0, 0x5d, 0xb2, 0x0e, 0x7c, 0x71, 0xc5, 0xd4,
+ 0xca, 0x0e, 0x7c, 0x69, 0xc7, 0x78, 0xdb, 0x0e, 0x7c, 0x42, 0x03, 0x4b,
+ 0x60, 0xcb, 0x95, 0x56, 0x0e, 0x7c, 0x60, 0xc6, 0x78, 0xdc, 0x0e, 0x78,
+ 0xd9, 0x4b, 0x8e, 0xfa, 0x43, 0x4b, 0x66, 0xc5, 0x00, 0x2c, 0x0e, 0x78,
+ 0xa9, 0xc4, 0x00, 0x49, 0x0e, 0x78, 0x48, 0xc8, 0xbc, 0x4a, 0x05, 0x4c,
+ 0x58, 0xc5, 0x00, 0x2c, 0x01, 0x2c, 0xe1, 0xc4, 0x00, 0x49, 0x01, 0x2c,
+ 0xd8, 0xc5, 0x00, 0x2c, 0x01, 0x2c, 0xd1, 0xd4, 0x3d, 0x54, 0x01, 0x2c,
+ 0xc8, 0x92, 0x05, 0x22, 0xa1, 0x9a, 0x05, 0x22, 0x90, 0x92, 0x05, 0x22,
+ 0x89, 0x9a, 0x05, 0x22, 0x79, 0x96, 0x05, 0x22, 0x70, 0x9a, 0x05, 0x22,
+ 0x40, 0x9a, 0x05, 0x22, 0x10, 0x9a, 0x05, 0x21, 0xc8, 0x92, 0x05, 0x21,
+ 0xc1, 0x9a, 0x05, 0x21, 0xb1, 0x96, 0x05, 0x21, 0xa8, 0x9a, 0x05, 0x1d,
+ 0x48, 0x9a, 0x05, 0x1d, 0x18, 0x9a, 0x05, 0x17, 0x89, 0x92, 0x05, 0x17,
+ 0x98, 0x9a, 0x05, 0x17, 0xc0, 0x9a, 0x05, 0x18, 0x08, 0x9a, 0x05, 0x18,
+ 0x38, 0x9a, 0x05, 0x03, 0xd1, 0x92, 0x05, 0x03, 0xe0, 0x9a, 0x05, 0x04,
+ 0x48, 0x9a, 0x05, 0x04, 0x78, 0x9a, 0x05, 0x0a, 0xa8, 0x9a, 0x05, 0x0b,
+ 0x30, 0x9a, 0x05, 0x21, 0x58, 0x92, 0x05, 0x21, 0x11, 0x9a, 0x05, 0x21,
+ 0x00, 0x92, 0x05, 0x20, 0xf9, 0x9a, 0x05, 0x20, 0xe9, 0x96, 0x05, 0x20,
+ 0xe0, 0x9a, 0x05, 0x1c, 0x90, 0x9a, 0x05, 0x1c, 0x60, 0x9a, 0x05, 0x1b,
+ 0xf0, 0x9a, 0x05, 0x1e, 0x20, 0x9a, 0x05, 0x1d, 0xf0, 0x92, 0x05, 0x1d,
+ 0x89, 0x9a, 0x05, 0x1d, 0x78, 0x9a, 0x05, 0x1a, 0x20, 0x9a, 0x05, 0x19,
+ 0x71, 0x92, 0x05, 0x19, 0x80, 0x9a, 0x05, 0x1b, 0xd0, 0x9a, 0x05, 0x1b,
+ 0xa0, 0x92, 0x05, 0x1b, 0x41, 0x9a, 0x05, 0x1b, 0x31, 0x96, 0x05, 0x1b,
+ 0x28, 0x92, 0x05, 0x16, 0xb9, 0x9a, 0x05, 0x16, 0xa9, 0x96, 0x05, 0x16,
+ 0xa0, 0x9a, 0x05, 0x17, 0x28, 0x9a, 0x05, 0x17, 0x58, 0x9a, 0x05, 0x1a,
+ 0xf8, 0x9a, 0x05, 0x1a, 0xc8, 0x9a, 0x05, 0x1a, 0x51, 0x92, 0x05, 0x1a,
+ 0x60, 0x96, 0x05, 0x12, 0x51, 0x9a, 0x05, 0x12, 0x59, 0x92, 0x05, 0x12,
+ 0x68, 0x9a, 0x05, 0x04, 0xa9, 0x92, 0x05, 0x04, 0xb8, 0x9a, 0x05, 0x04,
+ 0xe1, 0x92, 0x05, 0x04, 0xf0, 0x9a, 0x05, 0x05, 0x38, 0x9a, 0x05, 0x05,
+ 0x60, 0x96, 0x05, 0x0b, 0x61, 0x9a, 0x05, 0x0b, 0x69, 0x92, 0x05, 0x0b,
+ 0x78, 0x9a, 0x05, 0x0b, 0xa0, 0x9a, 0x05, 0x0c, 0xd9, 0x92, 0x05, 0x0c,
+ 0xe8, 0x9a, 0x05, 0x0d, 0x11, 0x92, 0x05, 0x0d, 0x20, 0x9a, 0x05, 0x0d,
+ 0x78, 0x9a, 0x05, 0x0d, 0xa8, 0x9a, 0x05, 0x12, 0x20, 0x9a, 0x05, 0x11,
+ 0xb1, 0x92, 0x05, 0x11, 0xc0, 0x96, 0x05, 0x02, 0xd1, 0x9a, 0x05, 0x02,
+ 0xd9, 0x92, 0x05, 0x02, 0xe8, 0x9a, 0x05, 0x03, 0x11, 0x92, 0x05, 0x03,
+ 0x20, 0x9a, 0x05, 0x03, 0x80, 0x9a, 0x05, 0x09, 0xd1, 0x92, 0x05, 0x09,
+ 0xe0, 0x9a, 0x05, 0x0a, 0x09, 0x92, 0x05, 0x0a, 0x18, 0x9a, 0x05, 0x0a,
+ 0x78, 0x9a, 0x05, 0x10, 0xb9, 0x92, 0x05, 0x10, 0xc8, 0x96, 0x05, 0x10,
+ 0xf1, 0x9a, 0x05, 0x10, 0xf9, 0x92, 0x05, 0x11, 0x08, 0x9a, 0x05, 0x11,
+ 0x70, 0x97, 0x00, 0xb0, 0xab, 0x03, 0x4b, 0x72, 0x8b, 0x00, 0xb0, 0xd0,
+ 0x91, 0x00, 0xae, 0x13, 0x03, 0x4b, 0x76, 0x83, 0x00, 0xae, 0x19, 0x8b,
+ 0x00, 0xae, 0x09, 0x87, 0x00, 0xae, 0x00, 0x91, 0x00, 0xac, 0xcb, 0x03,
+ 0x4b, 0x7a, 0xc2, 0x00, 0x28, 0x00, 0xc7, 0x51, 0x83, 0x00, 0xac, 0xd1,
+ 0x8b, 0x00, 0xac, 0xc1, 0x87, 0x00, 0xac, 0xb8, 0x83, 0x08, 0xd5, 0xd8,
+ 0x91, 0x08, 0xd5, 0xc8, 0x8b, 0x08, 0xd5, 0xb8, 0x83, 0x08, 0xd5, 0xa8,
+ 0x91, 0x08, 0xd5, 0x98, 0x8b, 0x08, 0xd5, 0x88, 0x83, 0x00, 0xa8, 0x70,
+ 0x10, 0xc3, 0x4b, 0x7e, 0x87, 0x00, 0xa2, 0x98, 0x83, 0x00, 0xb1, 0x69,
+ 0x8b, 0x00, 0xb1, 0x61, 0x87, 0x00, 0xb1, 0x53, 0x03, 0x4b, 0x8a, 0x91,
+ 0x00, 0xb1, 0x49, 0x97, 0x00, 0xb1, 0x40, 0x97, 0x00, 0xb2, 0x41, 0x91,
+ 0x00, 0xb2, 0x49, 0x87, 0x00, 0xb2, 0x53, 0x03, 0x4b, 0x8e, 0x8b, 0x00,
+ 0xb2, 0x61, 0x83, 0x00, 0xb2, 0x68, 0x87, 0x00, 0xb0, 0xc0, 0x97, 0x00,
+ 0xb0, 0xe1, 0x91, 0x00, 0xb0, 0xe9, 0x87, 0x00, 0xb0, 0xf3, 0x03, 0x4b,
+ 0x92, 0x8b, 0x00, 0xb1, 0x01, 0x83, 0x00, 0xb1, 0x08, 0x83, 0x00, 0xc7,
+ 0x81, 0x97, 0x00, 0xc7, 0x68, 0x83, 0x00, 0xc7, 0x78, 0x87, 0x00, 0xaf,
+ 0x90, 0x83, 0x00, 0xae, 0x49, 0x8b, 0x00, 0xae, 0x41, 0x87, 0x00, 0xae,
+ 0x33, 0x03, 0x4b, 0x96, 0x91, 0x00, 0xae, 0x29, 0x97, 0x00, 0xae, 0x20,
+ 0x15, 0xc3, 0x4b, 0x9a, 0x83, 0x00, 0xaf, 0x39, 0x8b, 0x00, 0xaf, 0x31,
+ 0x87, 0x00, 0xaf, 0x23, 0x03, 0x4b, 0xb1, 0x91, 0x00, 0xaf, 0x19, 0x97,
+ 0x00, 0xaf, 0x10, 0x83, 0x00, 0xb3, 0x01, 0x8b, 0x00, 0xb2, 0xf9, 0x87,
+ 0x00, 0xb2, 0xeb, 0x03, 0x4b, 0xb5, 0x97, 0x00, 0xb2, 0xd9, 0x91, 0x00,
+ 0xb2, 0xe0, 0x83, 0x00, 0xaf, 0x09, 0x8b, 0x00, 0xaf, 0x01, 0x87, 0x00,
+ 0xae, 0xf3, 0x03, 0x4b, 0xb9, 0x91, 0x00, 0xae, 0xe9, 0x97, 0x00, 0xae,
+ 0xe0, 0x0a, 0xc3, 0x4b, 0xbd, 0x97, 0x00, 0xb1, 0xd1, 0x91, 0x00, 0xb1,
+ 0xd9, 0x87, 0x00, 0xb1, 0xe3, 0x03, 0x4b, 0xd4, 0x8b, 0x00, 0xb1, 0xf1,
+ 0x83, 0x00, 0xb1, 0xf8, 0x87, 0x00, 0xb3, 0x20, 0x87, 0x00, 0xb0, 0x88,
+ 0x87, 0x00, 0xb0, 0x58, 0x87, 0x00, 0xb0, 0x28, 0x83, 0x00, 0xb0, 0x01,
+ 0x8b, 0x00, 0xaf, 0xf9, 0x87, 0x00, 0xaf, 0xeb, 0x03, 0x4b, 0xd8, 0x91,
+ 0x00, 0xaf, 0xe1, 0x97, 0x00, 0xaf, 0xd8, 0x83, 0x00, 0xaf, 0xd1, 0x8b,
+ 0x00, 0xaf, 0xc9, 0x87, 0x00, 0xaf, 0xbb, 0x03, 0x4b, 0xdc, 0x91, 0x00,
+ 0xaf, 0xb1, 0x97, 0x00, 0xaf, 0xa8, 0x87, 0x00, 0xaf, 0x58, 0x83, 0x00,
+ 0xae, 0xd9, 0x8b, 0x00, 0xae, 0xd1, 0x87, 0x00, 0xae, 0xc3, 0x03, 0x4b,
+ 0xe0, 0x91, 0x00, 0xae, 0xb9, 0x97, 0x00, 0xae, 0xb0, 0x87, 0x00, 0xae,
+ 0x98, 0x87, 0x00, 0xae, 0x68, 0x83, 0x00, 0xb1, 0x99, 0x8b, 0x00, 0xb1,
+ 0x91, 0x87, 0x00, 0xb1, 0x83, 0x03, 0x4b, 0xe4, 0x91, 0x00, 0xb1, 0x79,
+ 0x97, 0x00, 0xb1, 0x70, 0x87, 0x00, 0xb1, 0x28, 0x87, 0x00, 0xb2, 0x18,
+ 0x87, 0x00, 0xb2, 0x88, 0x97, 0x00, 0xb2, 0xa1, 0x91, 0x00, 0xb2, 0xa9,
+ 0x87, 0x00, 0xb2, 0xb3, 0x03, 0x4b, 0xe8, 0x8b, 0x00, 0xb2, 0xc1, 0x83,
+ 0x00, 0xb2, 0xc8, 0x83, 0x00, 0xaa, 0x6b, 0x03, 0x4b, 0xec, 0x91, 0x00,
+ 0xaa, 0x53, 0x03, 0x4b, 0xf0, 0x87, 0x00, 0xaa, 0x21, 0x19, 0x43, 0x4b,
+ 0xf4, 0x83, 0x00, 0xac, 0x69, 0x91, 0x00, 0xac, 0x61, 0x8b, 0x00, 0xac,
+ 0x59, 0x87, 0x00, 0xac, 0x51, 0xc3, 0x14, 0x72, 0x00, 0xaa, 0x78, 0xc4,
+ 0xdf, 0xc7, 0x00, 0xab, 0x49, 0x19, 0x43, 0x4c, 0x0d, 0x19, 0x43, 0x4c,
+ 0x26, 0x42, 0x15, 0xa6, 0xc3, 0x4c, 0x3f, 0x19, 0x43, 0x4c, 0x58, 0x19,
+ 0x43, 0x4c, 0x71, 0x91, 0x00, 0xa4, 0xcb, 0x03, 0x4c, 0x8a, 0x8b, 0x00,
+ 0xa4, 0xab, 0x03, 0x4c, 0x8e, 0x87, 0x00, 0xa4, 0x99, 0x83, 0x00, 0xa4,
+ 0xea, 0x03, 0x4c, 0x92, 0x83, 0x00, 0xa0, 0xc3, 0x03, 0x4c, 0x96, 0x91,
+ 0x00, 0xa0, 0x9b, 0x03, 0x4c, 0x9a, 0x8b, 0x00, 0xa0, 0x7b, 0x03, 0x4c,
+ 0x9e, 0x87, 0x00, 0xa0, 0x68, 0x83, 0x00, 0xa3, 0xfb, 0x03, 0x4c, 0xa2,
+ 0x87, 0x00, 0xa3, 0xa9, 0x8b, 0x00, 0xa3, 0xbb, 0x03, 0x4c, 0xa6, 0x91,
+ 0x00, 0xa3, 0xda, 0x03, 0x4c, 0xaa, 0x19, 0x43, 0x4c, 0xae, 0x87, 0x00,
+ 0xa6, 0x51, 0x83, 0x00, 0xa6, 0x62, 0x03, 0x4c, 0xc7, 0x19, 0xc3, 0x4c,
+ 0xcb, 0x83, 0x00, 0xac, 0xf1, 0x91, 0x00, 0xac, 0xe9, 0x8b, 0x00, 0xac,
+ 0xe1, 0x87, 0x00, 0xac, 0xd8, 0xcd, 0x61, 0x8b, 0x00, 0xa1, 0x19, 0xc2,
+ 0x00, 0x75, 0x00, 0xa1, 0x20, 0xc5, 0x31, 0xee, 0x00, 0xa1, 0x29, 0xd6,
+ 0x2e, 0xee, 0x00, 0xa1, 0x30, 0x91, 0x00, 0xc6, 0x68, 0x8b, 0x00, 0xc6,
+ 0x48, 0x8b, 0x0f, 0x01, 0x01, 0x97, 0x0f, 0x00, 0xf8, 0xc8, 0xb5, 0x5a,
+ 0x0e, 0x92, 0x19, 0xc6, 0xcd, 0xe5, 0x0e, 0x92, 0x10, 0xc2, 0x00, 0xb0,
+ 0x08, 0x9b, 0xa1, 0xc2, 0x07, 0xb2, 0x08, 0x9b, 0x99, 0xc2, 0x00, 0xc1,
+ 0x08, 0x9b, 0x91, 0xc2, 0x02, 0x2b, 0x08, 0x9b, 0x89, 0x83, 0x08, 0x9b,
+ 0x80, 0xc3, 0x22, 0xcb, 0x08, 0x9b, 0x61, 0x08, 0xc3, 0x4c, 0xe6, 0x16,
+ 0xc3, 0x4c, 0xf2, 0xc3, 0x05, 0x14, 0x08, 0x9b, 0x39, 0xc4, 0x15, 0xe7,
+ 0x08, 0x9b, 0x30, 0xcb, 0x8e, 0xef, 0x00, 0xee, 0x41, 0xc6, 0x60, 0xb1,
+ 0x00, 0xee, 0x28, 0xc6, 0x09, 0x01, 0x00, 0x18, 0x03, 0x03, 0x4c, 0xfe,
+ 0xc9, 0x2b, 0x5f, 0x00, 0x1a, 0x00, 0x00, 0xc3, 0x4d, 0x04, 0x45, 0x03,
+ 0xe3, 0x43, 0x4d, 0x10, 0xcb, 0x95, 0xe5, 0x01, 0x06, 0x89, 0x48, 0xbc,
+ 0x3a, 0x43, 0x4d, 0x1a, 0xcb, 0x93, 0xe0, 0x00, 0xd6, 0x21, 0xcb, 0x92,
+ 0xe3, 0x00, 0xd6, 0x10, 0x00, 0xc3, 0x4d, 0x26, 0x45, 0x03, 0xe3, 0x43,
+ 0x4d, 0x32, 0xc5, 0x00, 0xd4, 0x00, 0x18, 0xd1, 0xc5, 0x05, 0x02, 0x00,
+ 0x1a, 0x48, 0xc5, 0x05, 0x02, 0x00, 0x18, 0xe1, 0xc5, 0x00, 0xd4, 0x00,
+ 0x1a, 0x88, 0xc9, 0x20, 0xa8, 0x00, 0xef, 0xa1, 0xdb, 0x19, 0x11, 0x00,
+ 0xef, 0x80, 0xc9, 0x20, 0xa8, 0x00, 0xef, 0x99, 0xdb, 0x19, 0x11, 0x00,
+ 0xef, 0x68, 0xc7, 0xa6, 0x69, 0x00, 0xef, 0x19, 0xc5, 0x05, 0x02, 0x00,
+ 0xee, 0x50, 0x86, 0x00, 0xee, 0xc1, 0x96, 0x00, 0xd6, 0x71, 0x94, 0x00,
+ 0xd6, 0x69, 0x89, 0x00, 0xd6, 0x60, 0xce, 0x42, 0x34, 0x01, 0x07, 0x31,
+ 0x45, 0x02, 0x6d, 0x43, 0x4d, 0x3e, 0xc6, 0x05, 0x01, 0x00, 0xef, 0xe0,
+ 0x49, 0x60, 0xf4, 0xc3, 0x4d, 0x4a, 0xd0, 0x57, 0x92, 0x00, 0xd5, 0xe0,
+ 0xce, 0x6d, 0xf6, 0x00, 0xd5, 0xc1, 0xc7, 0x7d, 0xa5, 0x00, 0x19, 0xf8,
+ 0xc8, 0x65, 0xaa, 0x00, 0x1a, 0xd1, 0xd4, 0x3c, 0x64, 0x00, 0x1b, 0x10,
+ 0xc6, 0x05, 0x01, 0x00, 0x1a, 0xe0, 0xc6, 0x05, 0x01, 0x00, 0x1a, 0xf8,
+ 0x00, 0x43, 0x4d, 0x56, 0xc5, 0x00, 0x48, 0x00, 0xef, 0xd0, 0x00, 0x43,
+ 0x4d, 0x62, 0xc4, 0x18, 0x10, 0x05, 0x47, 0x39, 0xc2, 0x22, 0xcc, 0x05,
+ 0x47, 0x30, 0xc3, 0x0d, 0x14, 0x05, 0x47, 0x29, 0xc3, 0x09, 0x9e, 0x05,
+ 0x47, 0x20, 0xc4, 0x02, 0xde, 0x05, 0x47, 0x19, 0xc2, 0x02, 0xa0, 0x05,
+ 0x47, 0x10, 0xc9, 0x0f, 0x6e, 0x07, 0xf1, 0x71, 0xca, 0x09, 0xb7, 0x07,
+ 0xf1, 0x78, 0xc3, 0xe6, 0x62, 0x01, 0x6f, 0xa8, 0x87, 0x05, 0x34, 0xf9,
+ 0x83, 0x01, 0x6f, 0xe1, 0xc7, 0xc8, 0x00, 0x01, 0x6f, 0xf8, 0x83, 0x01,
+ 0x6f, 0x91, 0xc3, 0x1c, 0x63, 0x01, 0x6f, 0x98, 0xc6, 0x05, 0x01, 0x00,
+ 0x19, 0x78, 0xc3, 0x03, 0x0c, 0x01, 0x65, 0xa9, 0xc3, 0xb8, 0xf8, 0x01,
+ 0x65, 0xf9, 0x42, 0x01, 0xe2, 0xc3, 0x4d, 0x6e, 0xc3, 0x26, 0x1a, 0x01,
+ 0x66, 0x39, 0x0a, 0xc3, 0x4d, 0x7a, 0xc6, 0xd0, 0x3d, 0x01, 0x66, 0xb9,
+ 0xc3, 0xe5, 0x24, 0x01, 0x66, 0xc8, 0xc5, 0xda, 0x9c, 0x01, 0x66, 0xe9,
+ 0x10, 0xc3, 0x4d, 0x8d, 0xc3, 0xe4, 0xf4, 0x01, 0x67, 0x18, 0xc3, 0x03,
+ 0x0c, 0x01, 0x65, 0xa1, 0xc3, 0xb8, 0xf8, 0x01, 0x65, 0xf1, 0x42, 0x01,
+ 0xe2, 0xc3, 0x4d, 0x99, 0xc3, 0x26, 0x1a, 0x01, 0x66, 0x31, 0x0a, 0xc3,
+ 0x4d, 0xa5, 0xc6, 0xd0, 0x3d, 0x01, 0x66, 0xb1, 0xc3, 0xe5, 0x24, 0x01,
+ 0x66, 0xc0, 0xc5, 0xda, 0x9c, 0x01, 0x66, 0xe1, 0x10, 0xc3, 0x4d, 0xb8,
+ 0xc3, 0xe4, 0xf4, 0x01, 0x67, 0x10, 0x46, 0x00, 0x8b, 0x43, 0x4d, 0xc4,
+ 0xc2, 0x00, 0xd3, 0x01, 0x93, 0x70, 0xc2, 0x00, 0xd3, 0x01, 0x93, 0xc0,
+ 0xc2, 0x00, 0xd3, 0x01, 0x93, 0x80, 0xc2, 0x00, 0xd3, 0x01, 0x93, 0xc8,
+ 0xc2, 0x00, 0xd3, 0x01, 0x93, 0x98, 0xc2, 0x00, 0xd3, 0x01, 0x93, 0xd0,
+ 0x83, 0x01, 0x93, 0xa9, 0x97, 0x01, 0x93, 0xf0, 0xc2, 0x00, 0xd3, 0x01,
+ 0x93, 0xb0, 0xc2, 0x00, 0xd3, 0x01, 0x93, 0xb8, 0xc4, 0x18, 0x10, 0x01,
+ 0x23, 0x31, 0xc2, 0x22, 0xcc, 0x01, 0x23, 0x28, 0xc3, 0x0d, 0x14, 0x01,
+ 0x23, 0x21, 0xc3, 0x09, 0x9e, 0x01, 0x23, 0x18, 0xc4, 0x02, 0xde, 0x01,
+ 0x23, 0x11, 0xc2, 0x02, 0xa0, 0x01, 0x23, 0x08, 0x00, 0x43, 0x4d, 0xd0,
+ 0x00, 0x43, 0x4d, 0xee, 0xd0, 0x55, 0xa8, 0x01, 0x92, 0x60, 0x00, 0x43,
+ 0x4e, 0x0c, 0xc3, 0x18, 0x11, 0x01, 0x94, 0x31, 0xc4, 0xe3, 0x8b, 0x01,
+ 0x94, 0xc8, 0x90, 0x01, 0x94, 0x81, 0xc6, 0xd2, 0x8f, 0x01, 0x94, 0xe1,
+ 0xc7, 0xc8, 0x54, 0x01, 0x95, 0x60, 0xc3, 0x04, 0x20, 0x01, 0x94, 0x89,
+ 0xc3, 0xe5, 0x0f, 0x01, 0x95, 0x58, 0xc2, 0x00, 0x5f, 0x01, 0x94, 0x21,
+ 0xc2, 0x01, 0x19, 0x01, 0x94, 0x59, 0xc7, 0xc4, 0xf0, 0x01, 0x94, 0xb0,
+ 0xc2, 0x02, 0x6f, 0x01, 0x94, 0x41, 0xc3, 0x00, 0x2e, 0x01, 0x95, 0x80,
+ 0xc3, 0x01, 0x6f, 0x01, 0x94, 0x71, 0xc6, 0xca, 0xc1, 0x01, 0x95, 0x48,
+ 0xcc, 0x7b, 0x3d, 0x01, 0x94, 0xb9, 0xc2, 0x18, 0x8b, 0x01, 0x95, 0x11,
+ 0xc5, 0xc7, 0xc8, 0x01, 0x95, 0x18, 0x15, 0xc3, 0x4e, 0x2a, 0xc6, 0xce,
+ 0x75, 0x01, 0x95, 0x50, 0x17, 0xc3, 0x4e, 0x34, 0xc6, 0xcd, 0x79, 0x09,
+ 0x29, 0xf8, 0xc4, 0xe1, 0x9f, 0x09, 0x29, 0xf1, 0xc2, 0x05, 0x1d, 0x09,
+ 0x19, 0xd8, 0xc4, 0xdc, 0xae, 0x09, 0x1a, 0x71, 0x86, 0x09, 0x1a, 0x69,
+ 0xc9, 0xab, 0x25, 0x09, 0x1a, 0x60, 0xc3, 0x69, 0x97, 0x09, 0x1a, 0x51,
+ 0xc2, 0x01, 0x7f, 0x09, 0x1a, 0x48, 0xc2, 0x01, 0xe2, 0x09, 0x1a, 0x21,
+ 0x8f, 0x09, 0x1a, 0x19, 0xc2, 0x04, 0x2b, 0x09, 0x1a, 0x10, 0x97, 0x09,
+ 0x1a, 0x01, 0x83, 0x09, 0x19, 0xe2, 0x03, 0x4e, 0x3c, 0xc5, 0xcb, 0x88,
+ 0x09, 0x19, 0xc8, 0x17, 0xc3, 0x4e, 0x4a, 0xc3, 0x20, 0x18, 0x09, 0x19,
+ 0x81, 0xc2, 0x00, 0xd0, 0x09, 0x19, 0x79, 0x03, 0x43, 0x4e, 0x55, 0xc5,
+ 0x39, 0xc7, 0x09, 0x18, 0xc0, 0x97, 0x09, 0x17, 0xb9, 0x87, 0x09, 0x17,
+ 0xb0, 0xe0, 0x04, 0x47, 0x09, 0x17, 0x88, 0xda, 0x1a, 0xe6, 0x09, 0x18,
+ 0x20, 0xcb, 0x8d, 0xc6, 0x09, 0x29, 0xb9, 0xcc, 0x84, 0x21, 0x09, 0x29,
+ 0xb0, 0xc3, 0x40, 0xe7, 0x09, 0x29, 0xa9, 0xc4, 0xe3, 0xa3, 0x09, 0x29,
+ 0xa1, 0xc4, 0xc5, 0xa3, 0x09, 0x29, 0x98, 0x00, 0x43, 0x4e, 0x5f, 0x97,
+ 0x09, 0x15, 0xab, 0x03, 0x4e, 0x6b, 0xc3, 0x05, 0x9e, 0x09, 0x15, 0xa1,
+ 0xc4, 0x5d, 0xd2, 0x09, 0x15, 0x99, 0xc2, 0x02, 0x6f, 0x09, 0x15, 0x91,
+ 0xc4, 0x38, 0xa9, 0x09, 0x15, 0x89, 0xc3, 0x62, 0x19, 0x09, 0x15, 0x81,
+ 0x83, 0x09, 0x15, 0x78, 0xd6, 0x2b, 0xd6, 0x09, 0x16, 0xa9, 0xc4, 0x58,
+ 0xf5, 0x09, 0x16, 0xa0, 0xc3, 0x13, 0x51, 0x09, 0x16, 0x89, 0xc3, 0x49,
+ 0x41, 0x09, 0x16, 0x81, 0xc3, 0x65, 0x57, 0x09, 0x16, 0x79, 0xc6, 0xd0,
+ 0x97, 0x09, 0x16, 0x71, 0xc3, 0x04, 0x2a, 0x09, 0x16, 0x63, 0x03, 0x4e,
+ 0x71, 0xc3, 0x1a, 0xf4, 0x09, 0x16, 0x59, 0xc3, 0x03, 0x30, 0x09, 0x16,
+ 0x51, 0x04, 0xc3, 0x4e, 0x77, 0x83, 0x09, 0x16, 0x38, 0xc2, 0x03, 0x4e,
+ 0x09, 0x16, 0x29, 0x83, 0x09, 0x16, 0x20, 0x42, 0x01, 0x6f, 0xc3, 0x4e,
+ 0x83, 0x15, 0xc3, 0x4e, 0x8d, 0xc2, 0x00, 0xc4, 0x09, 0x29, 0x71, 0xc8,
+ 0x6a, 0x1e, 0x09, 0x1c, 0xb1, 0x17, 0xc3, 0x4e, 0x97, 0xc3, 0x20, 0x18,
+ 0x09, 0x14, 0xf1, 0xc2, 0x02, 0x2f, 0x09, 0x14, 0xe9, 0xc3, 0x81, 0xc8,
+ 0x09, 0x14, 0xe1, 0x0d, 0xc3, 0x4e, 0xad, 0xc2, 0x00, 0xd0, 0x09, 0x14,
+ 0xc9, 0xc2, 0x05, 0xc3, 0x09, 0x14, 0xbb, 0x03, 0x4e, 0xb9, 0x83, 0x09,
+ 0x14, 0xb0, 0xc9, 0xa9, 0xab, 0x09, 0x29, 0x68, 0x97, 0x09, 0x29, 0x53,
+ 0x03, 0x4e, 0xbd, 0xcc, 0x36, 0x5c, 0x09, 0x29, 0x49, 0x0f, 0xc3, 0x4e,
+ 0xd5, 0xc7, 0xc9, 0x26, 0x09, 0x29, 0x39, 0xc5, 0xdd, 0x0d, 0x09, 0x29,
+ 0x31, 0xc2, 0x00, 0x0a, 0x09, 0x29, 0x29, 0x09, 0xc3, 0x4e, 0xe1, 0xc8,
+ 0xb9, 0x4a, 0x09, 0x29, 0x11, 0xc3, 0x15, 0x2e, 0x09, 0x1c, 0x89, 0xc3,
+ 0x04, 0x65, 0x09, 0x12, 0xd3, 0x03, 0x4e, 0xec, 0x10, 0xc3, 0x4e, 0xf2,
+ 0x03, 0x43, 0x4e, 0xfc, 0xcf, 0x68, 0xcd, 0x09, 0x13, 0xc3, 0x03, 0x4f,
+ 0x09, 0x4a, 0xa4, 0xa4, 0x43, 0x4f, 0x0f, 0xd1, 0x56, 0xea, 0x09, 0x13,
+ 0x60, 0xc3, 0x5d, 0xd1, 0x09, 0x13, 0x41, 0xc3, 0x13, 0x51, 0x09, 0x13,
+ 0x33, 0x03, 0x4f, 0x4b, 0xc4, 0x4a, 0x0f, 0x09, 0x13, 0x29, 0xc3, 0x1a,
+ 0xf4, 0x09, 0x13, 0x20, 0x47, 0x03, 0x4c, 0x43, 0x4f, 0x51, 0xc2, 0x02,
+ 0x1c, 0x09, 0x11, 0xa9, 0xc3, 0x51, 0xdb, 0x09, 0x11, 0xa1, 0x83, 0x09,
+ 0x11, 0x98, 0x46, 0x03, 0x4d, 0xc3, 0x4f, 0x63, 0xc4, 0x39, 0xc8, 0x09,
+ 0x11, 0xe8, 0x45, 0x03, 0x4e, 0xc3, 0x4f, 0x76, 0xc3, 0x58, 0xf6, 0x09,
+ 0x10, 0x88, 0xc6, 0x6c, 0xd1, 0x09, 0x10, 0xab, 0x03, 0x4f, 0xc6, 0xc6,
+ 0x0b, 0x0a, 0x09, 0x10, 0xa0, 0xcd, 0x7c, 0x0c, 0x09, 0x10, 0xc9, 0xc9,
+ 0xb2, 0x3f, 0x09, 0x10, 0xc0, 0x47, 0x03, 0x4c, 0x43, 0x4f, 0xcc, 0x47,
+ 0x03, 0x4c, 0x43, 0x4f, 0xf7, 0xa2, 0x09, 0x27, 0xf1, 0xa0, 0x09, 0x27,
+ 0xe9, 0x9f, 0x09, 0x27, 0xe1, 0x9d, 0x09, 0x27, 0xd8, 0xa4, 0x09, 0x27,
+ 0xc1, 0x9d, 0x09, 0x27, 0xb8, 0xa6, 0x09, 0x27, 0x8b, 0x03, 0x50, 0x1d,
+ 0x9e, 0x09, 0x27, 0x80, 0xa1, 0x09, 0x27, 0x71, 0xa0, 0x09, 0x27, 0x68,
+ 0xa5, 0x09, 0x27, 0x61, 0xa4, 0x09, 0x27, 0x59, 0xa0, 0x09, 0x27, 0x50,
+ 0xa3, 0x09, 0x27, 0x49, 0xa2, 0x09, 0x27, 0x40, 0xa5, 0x09, 0x27, 0x31,
+ 0xa2, 0x09, 0x27, 0x29, 0x9d, 0x09, 0x27, 0x20, 0xa6, 0x09, 0x27, 0x19,
+ 0x9d, 0x09, 0x27, 0x10, 0xce, 0x71, 0x3e, 0x09, 0x26, 0xf1, 0x9d, 0x09,
+ 0x26, 0xe8, 0x9e, 0x09, 0x26, 0xd1, 0x9d, 0x09, 0x26, 0xc8, 0xa2, 0x09,
+ 0x26, 0xb9, 0x9e, 0x09, 0x26, 0xb0, 0x46, 0x03, 0x4d, 0xc3, 0x50, 0x23,
+ 0xc7, 0x0b, 0x09, 0x09, 0x0f, 0x58, 0xc4, 0x39, 0xc8, 0x09, 0x0f, 0x7b,
+ 0x03, 0x50, 0x6d, 0xc9, 0xa6, 0x49, 0x09, 0x0f, 0x6a, 0x03, 0x50, 0x73,
+ 0x9f, 0x09, 0x1c, 0x38, 0x8d, 0x09, 0x0b, 0x78, 0x86, 0x09, 0x0b, 0x88,
+ 0x94, 0x09, 0x0a, 0xf1, 0xc3, 0x03, 0x47, 0x09, 0x0a, 0xe9, 0x86, 0x09,
+ 0x0a, 0xe0, 0x97, 0x09, 0x0c, 0x1b, 0x03, 0x50, 0x79, 0xc2, 0x02, 0xfb,
+ 0x09, 0x0c, 0x11, 0x87, 0x09, 0x0c, 0x09, 0x83, 0x09, 0x0c, 0x00, 0x94,
+ 0x09, 0x0b, 0xf8, 0x8f, 0x09, 0x1c, 0x18, 0x86, 0x09, 0x1c, 0x09, 0xc2,
+ 0xe6, 0x97, 0x09, 0x0b, 0x60, 0xc2, 0x01, 0xe2, 0x09, 0x1c, 0x03, 0x03,
+ 0x50, 0x7d, 0xc2, 0x38, 0x6a, 0x09, 0x0b, 0x40, 0x94, 0x09, 0x0b, 0x2b,
+ 0x03, 0x50, 0x81, 0xc7, 0x5d, 0x9b, 0x09, 0x0b, 0x21, 0x8e, 0x09, 0x0b,
+ 0x18, 0xa0, 0x09, 0x1b, 0xf9, 0x9f, 0x09, 0x0a, 0xd8, 0xc9, 0xaa, 0xf8,
+ 0x09, 0x0a, 0xd0, 0xcb, 0x97, 0xdf, 0x09, 0x0b, 0xc8, 0x46, 0x25, 0xd4,
+ 0x43, 0x50, 0x87, 0xe0, 0x03, 0x47, 0x09, 0x0c, 0xf0, 0xc3, 0x51, 0xdb,
+ 0x09, 0x09, 0x01, 0xca, 0xa3, 0x82, 0x09, 0x08, 0xf8, 0xc8, 0x6a, 0x1e,
+ 0x09, 0x26, 0x61, 0xcd, 0x79, 0x68, 0x09, 0x08, 0xe1, 0xc3, 0x20, 0x18,
+ 0x09, 0x08, 0xd9, 0xc3, 0x32, 0xbf, 0x09, 0x08, 0xca, 0x03, 0x50, 0x99,
+ 0x16, 0xc3, 0x50, 0x9f, 0xcd, 0x47, 0xaa, 0x09, 0x08, 0x90, 0xc2, 0x00,
+ 0xb0, 0x09, 0x08, 0x79, 0xcb, 0x92, 0x12, 0x09, 0x08, 0x71, 0xc3, 0x04,
+ 0x2a, 0x09, 0x08, 0x69, 0xc9, 0x5d, 0x99, 0x09, 0x08, 0x61, 0xca, 0xa3,
+ 0xb4, 0x09, 0x08, 0x58, 0xc4, 0xde, 0xe7, 0x09, 0x26, 0x41, 0x15, 0xc3,
+ 0x50, 0xab, 0x10, 0xc3, 0x50, 0xb9, 0x0f, 0xc3, 0x50, 0xc9, 0x0e, 0xc3,
+ 0x50, 0xd9, 0x0d, 0xc3, 0x50, 0xe6, 0x0a, 0xc3, 0x50, 0xf7, 0x09, 0xc3,
+ 0x51, 0x07, 0x07, 0xc3, 0x51, 0x15, 0x06, 0xc3, 0x51, 0x29, 0x04, 0xc3,
+ 0x51, 0x38, 0x03, 0xc3, 0x51, 0x45, 0x97, 0x09, 0x07, 0x53, 0x03, 0x51,
+ 0x61, 0xc4, 0x38, 0xb4, 0x09, 0x07, 0x49, 0xc2, 0x00, 0xb0, 0x09, 0x07,
+ 0x11, 0x0b, 0x43, 0x51, 0x68, 0xcd, 0x79, 0xc3, 0x09, 0x07, 0xd1, 0xc9,
+ 0xaf, 0x78, 0x09, 0x07, 0xc9, 0xc4, 0x58, 0xf5, 0x09, 0x07, 0xc0, 0x97,
+ 0x09, 0x25, 0xa9, 0xc2, 0x01, 0x7f, 0x09, 0x1b, 0xc0, 0x86, 0x09, 0x05,
+ 0xa1, 0x9f, 0x09, 0x05, 0x98, 0x97, 0x09, 0x05, 0x91, 0x8b, 0x09, 0x05,
+ 0x89, 0x83, 0x09, 0x05, 0x7a, 0x03, 0x51, 0x74, 0xc2, 0x36, 0x6f, 0x09,
+ 0x05, 0x71, 0xc5, 0x45, 0xae, 0x09, 0x05, 0x62, 0x03, 0x51, 0x7a, 0xc5,
+ 0x39, 0xc7, 0x09, 0x05, 0x50, 0xc5, 0x39, 0xc7, 0x09, 0x05, 0x40, 0x90,
+ 0x09, 0x05, 0x29, 0xc9, 0xaa, 0xef, 0x09, 0x05, 0x1a, 0x03, 0x51, 0x80,
+ 0x95, 0x09, 0x25, 0x98, 0x8e, 0x09, 0x25, 0x88, 0xc5, 0x58, 0xf4, 0x09,
+ 0x04, 0xc8, 0xc6, 0x6a, 0x20, 0x09, 0x25, 0x41, 0xc2, 0x01, 0x7f, 0x09,
+ 0x25, 0x38, 0x8b, 0x09, 0x25, 0x21, 0xc2, 0x00, 0xcb, 0x09, 0x25, 0x19,
+ 0xc3, 0x02, 0x2c, 0x09, 0x25, 0x10, 0xcc, 0x84, 0xed, 0x09, 0x25, 0x09,
+ 0x03, 0x43, 0x51, 0x86, 0x17, 0xc3, 0x51, 0x93, 0xc5, 0x45, 0xae, 0x09,
+ 0x24, 0xd0, 0x8b, 0x09, 0x24, 0xc1, 0x83, 0x09, 0x24, 0xb8, 0x8b, 0x09,
+ 0x24, 0xa3, 0x03, 0x51, 0xa0, 0x83, 0x09, 0x24, 0x98, 0xc2, 0x05, 0x1d,
+ 0x09, 0x24, 0x89, 0xc2, 0x00, 0x74, 0x09, 0x24, 0x80, 0xc2, 0x01, 0xe2,
+ 0x09, 0x24, 0x73, 0x03, 0x51, 0xac, 0xc4, 0x99, 0xe3, 0x09, 0x24, 0x68,
+ 0xc5, 0x39, 0xc7, 0x09, 0x04, 0x38, 0x17, 0xc3, 0x51, 0xb2, 0xc4, 0x38,
+ 0xb4, 0x09, 0x03, 0x59, 0xc2, 0x00, 0xba, 0x09, 0x03, 0x51, 0xcc, 0x36,
+ 0x5c, 0x09, 0x03, 0x49, 0xc2, 0x02, 0x6f, 0x09, 0x03, 0x41, 0x0e, 0xc3,
+ 0x51, 0xbe, 0xc3, 0x32, 0xbf, 0x09, 0x03, 0x19, 0xc2, 0x01, 0x29, 0x09,
+ 0x03, 0x0b, 0x03, 0x51, 0xc9, 0xc2, 0x00, 0xd0, 0x09, 0x03, 0x01, 0x09,
+ 0xc3, 0x51, 0xcf, 0x04, 0xc3, 0x51, 0xe3, 0x03, 0x43, 0x51, 0xed, 0xc2,
+ 0x5d, 0xd4, 0x09, 0x24, 0x09, 0xc3, 0x26, 0x1a, 0x09, 0x00, 0x98, 0xc5,
+ 0x58, 0xf4, 0x09, 0x24, 0x00, 0xc3, 0x0f, 0xd6, 0x09, 0x00, 0x89, 0xc7,
+ 0x6a, 0x1f, 0x09, 0x00, 0x80, 0xc7, 0x5d, 0x9b, 0x09, 0x00, 0x71, 0x8e,
+ 0x09, 0x00, 0x68, 0xc8, 0x0d, 0x2d, 0x09, 0x01, 0xe3, 0x03, 0x51, 0xf9,
+ 0x16, 0x43, 0x51, 0xff, 0xce, 0x71, 0xca, 0x09, 0x14, 0x71, 0x46, 0x03,
+ 0x4d, 0x43, 0x52, 0x05, 0x9f, 0x09, 0x14, 0x40, 0x84, 0x09, 0x14, 0x30,
+ 0x97, 0x09, 0x14, 0x19, 0x8b, 0x09, 0x14, 0x10, 0x84, 0x09, 0x14, 0x08,
+ 0xe0, 0x04, 0x27, 0x09, 0x0a, 0x48, 0xca, 0xa5, 0x12, 0x00, 0x24, 0x58,
+ 0xc3, 0xe5, 0x3c, 0x00, 0x28, 0x39, 0xc2, 0x1c, 0x52, 0x00, 0x28, 0x19,
+ 0x87, 0x00, 0x28, 0x08, 0xc9, 0x20, 0xb1, 0x00, 0x27, 0xd8, 0xc3, 0x2d,
+ 0x1a, 0x05, 0x32, 0x99, 0x83, 0x05, 0x32, 0xb9, 0xd1, 0x51, 0x78, 0x05,
+ 0x32, 0xe9, 0x87, 0x00, 0x23, 0x29, 0xca, 0x51, 0x7f, 0x00, 0x23, 0x49,
+ 0xc7, 0xc8, 0x00, 0x00, 0x23, 0x68, 0x06, 0xc3, 0x52, 0x17, 0xc5, 0x1d,
+ 0x88, 0x00, 0x26, 0x10, 0xc8, 0x25, 0xfb, 0x00, 0x25, 0xb9, 0xc8, 0x20,
+ 0xa9, 0x00, 0x27, 0xa8, 0xca, 0xa5, 0x12, 0x00, 0x24, 0x50, 0xc3, 0xe5,
+ 0x3c, 0x00, 0x28, 0x31, 0xc2, 0x1c, 0x52, 0x00, 0x28, 0x11, 0x87, 0x00,
+ 0x28, 0x00, 0xc9, 0x20, 0xb1, 0x00, 0x27, 0xd0, 0xc8, 0x20, 0xa9, 0x00,
+ 0x27, 0xa1, 0xc8, 0x25, 0xfb, 0x00, 0x25, 0xb0, 0xc3, 0x2d, 0x1a, 0x05,
+ 0x32, 0x91, 0x83, 0x05, 0x32, 0xb1, 0xd1, 0x51, 0x78, 0x05, 0x32, 0xe1,
+ 0x87, 0x00, 0x23, 0x21, 0xca, 0x51, 0x7f, 0x00, 0x23, 0x41, 0xc7, 0xc8,
+ 0x00, 0x00, 0x23, 0x60, 0x06, 0xc3, 0x52, 0x23, 0xc5, 0x1d, 0x88, 0x00,
+ 0x26, 0x08, 0xc7, 0xc7, 0xeb, 0x00, 0x6d, 0x39, 0xc6, 0x8e, 0x9c, 0x00,
+ 0x6d, 0x68, 0xc7, 0xc4, 0x25, 0x00, 0x6d, 0x49, 0xc6, 0x8e, 0x9c, 0x00,
+ 0x6d, 0x78, 0xc7, 0xc6, 0x32, 0x00, 0x6c, 0xd9, 0xc7, 0xca, 0x29, 0x00,
+ 0x6c, 0xe9, 0xc7, 0xc7, 0xdd, 0x00, 0x6d, 0x09, 0xc7, 0xc7, 0xc1, 0x00,
+ 0x6d, 0x19, 0x16, 0xc3, 0x52, 0x2f, 0x06, 0xc3, 0x52, 0x3b, 0xc7, 0xc8,
+ 0x1c, 0x00, 0x6d, 0xa9, 0xc7, 0x8e, 0x9b, 0x00, 0x6d, 0xb8, 0xca, 0x63,
+ 0xc8, 0x00, 0x6e, 0xe1, 0xcf, 0x63, 0xc3, 0x00, 0x6e, 0xe9, 0xcb, 0x93,
+ 0x51, 0x00, 0x6e, 0xf0, 0x49, 0x20, 0x36, 0x43, 0x52, 0x47, 0x49, 0x20,
+ 0x36, 0x43, 0x52, 0x53, 0x49, 0x20, 0x36, 0x43, 0x52, 0x5f, 0x4c, 0x87,
+ 0x45, 0xc3, 0x52, 0x6b, 0x87, 0x0e, 0xcd, 0x20, 0x49, 0x20, 0x36, 0x43,
+ 0x52, 0x77, 0x49, 0x20, 0x36, 0x43, 0x52, 0x83, 0xc8, 0x3b, 0xec, 0x0e,
+ 0xc8, 0xf1, 0xc6, 0x24, 0x3b, 0x0e, 0xc8, 0xe0, 0xc4, 0x17, 0x93, 0x0e,
+ 0xd3, 0x2b, 0x03, 0x52, 0x8f, 0xc6, 0x5a, 0xfc, 0x0e, 0xd3, 0x1a, 0x03,
+ 0x52, 0x95, 0xcb, 0x57, 0x45, 0x0e, 0xcc, 0x31, 0xc6, 0x00, 0x58, 0x0e,
+ 0xcc, 0x29, 0xc6, 0x24, 0x3b, 0x0e, 0xcc, 0x20, 0xcb, 0x57, 0x45, 0x0e,
+ 0xcc, 0x19, 0xc6, 0x00, 0x58, 0x0e, 0xcc, 0x11, 0xc6, 0x24, 0x3b, 0x0e,
+ 0xcc, 0x08, 0xcb, 0x57, 0x45, 0x0e, 0xca, 0x81, 0xc6, 0x00, 0x58, 0x0e,
+ 0xca, 0x79, 0xc6, 0x24, 0x3b, 0x0e, 0xca, 0x70, 0xcb, 0x57, 0x45, 0x0e,
+ 0xca, 0x69, 0xc6, 0x00, 0x58, 0x0e, 0xca, 0x61, 0xc6, 0x24, 0x3b, 0x0e,
+ 0xca, 0x58, 0xc7, 0x04, 0x12, 0x0e, 0xd1, 0x49, 0xc5, 0x19, 0x2f, 0x0e,
+ 0xd1, 0x38, 0x00, 0x43, 0x52, 0x9b, 0x00, 0x43, 0x52, 0xa7, 0x00, 0x43,
+ 0x52, 0xb3, 0x00, 0x43, 0x52, 0xe3, 0xc5, 0x06, 0x82, 0x0e, 0xc0, 0x2b,
+ 0x03, 0x53, 0x02, 0xd2, 0x13, 0x89, 0x0e, 0xc6, 0xa3, 0x03, 0x53, 0x06,
+ 0x45, 0x00, 0x9d, 0xc3, 0x53, 0x0a, 0x47, 0x13, 0x95, 0x43, 0x53, 0x16,
+ 0x00, 0x43, 0x53, 0x25, 0x00, 0x43, 0x53, 0x68, 0x92, 0x0e, 0xc3, 0x6b,
+ 0x03, 0x53, 0x80, 0xc6, 0xbc, 0x5c, 0x0e, 0xc3, 0xaa, 0x03, 0x53, 0x84,
+ 0x00, 0x43, 0x53, 0x88, 0x00, 0x43, 0x53, 0xa9, 0xcb, 0x13, 0x90, 0x0e,
+ 0xc5, 0x91, 0xc9, 0xad, 0x9b, 0x0e, 0xc4, 0xa9, 0x46, 0x0e, 0xce, 0xc3,
+ 0x53, 0xc4, 0xc8, 0xbc, 0x62, 0x0e, 0xc3, 0xc9, 0xd3, 0x46, 0x57, 0x0e,
+ 0xc2, 0xb1, 0xc5, 0x06, 0x82, 0x0e, 0xc0, 0x18, 0x4b, 0x40, 0xb3, 0xc3,
+ 0x53, 0xd0, 0x4a, 0x18, 0xa5, 0x43, 0x53, 0xdc, 0xc6, 0x00, 0x58, 0x0e,
+ 0xcf, 0xa1, 0xc6, 0x24, 0x3b, 0x0e, 0xcf, 0x98, 0xc6, 0x00, 0x58, 0x0e,
+ 0xcf, 0x81, 0xc6, 0x24, 0x3b, 0x0e, 0xcf, 0x78, 0xc5, 0x17, 0x14, 0x0e,
+ 0xce, 0xf1, 0x15, 0xc3, 0x53, 0xee, 0x48, 0x20, 0x37, 0x43, 0x53, 0xfa,
+ 0xc6, 0x00, 0x58, 0x0e, 0xcf, 0x61, 0xc6, 0x24, 0x3b, 0x0e, 0xcf, 0x48,
+ 0xc6, 0x00, 0x58, 0x0e, 0xcf, 0x59, 0xc6, 0x24, 0x3b, 0x0e, 0xcf, 0x40,
+ 0xc6, 0x00, 0x58, 0x0e, 0xcf, 0x51, 0xc6, 0x24, 0x3b, 0x0e, 0xcf, 0x38,
+ 0xca, 0x91, 0x42, 0x0e, 0xcb, 0x49, 0x49, 0x45, 0x27, 0x43, 0x54, 0x06,
+ 0x46, 0x20, 0xe5, 0xc3, 0x54, 0x1b, 0x48, 0xb7, 0x3a, 0x43, 0x54, 0x27,
+ 0x46, 0x20, 0xe5, 0xc3, 0x54, 0x33, 0x48, 0xb7, 0x3a, 0x43, 0x54, 0x45,
+ 0xc8, 0xbb, 0x22, 0x0e, 0xce, 0xc9, 0xc5, 0x17, 0x14, 0x0e, 0xce, 0xbb,
+ 0x03, 0x54, 0x51, 0xc6, 0x01, 0xdb, 0x0e, 0xce, 0xb1, 0xc5, 0x03, 0x13,
+ 0x0e, 0xce, 0xa9, 0x48, 0x20, 0x37, 0x43, 0x54, 0x57, 0xc5, 0x17, 0x14,
+ 0x0e, 0xcb, 0xb1, 0xc6, 0x01, 0xdb, 0x0e, 0xcb, 0xa9, 0xc5, 0x03, 0x13,
+ 0x0e, 0xcb, 0xa0, 0xc5, 0x17, 0x14, 0x0e, 0xcb, 0xd1, 0xc6, 0x01, 0xdb,
+ 0x0e, 0xcb, 0xc9, 0xc5, 0x03, 0x13, 0x0e, 0xcb, 0xc0, 0xca, 0x91, 0x42,
+ 0x0e, 0xcb, 0x91, 0xc8, 0x51, 0x1b, 0x0e, 0xcb, 0x88, 0xcb, 0x91, 0x41,
+ 0x0e, 0xcb, 0x68, 0xc6, 0x00, 0x58, 0x0e, 0xcf, 0x91, 0xc6, 0x24, 0x3b,
+ 0x0e, 0xcf, 0x88, 0xc6, 0x00, 0x58, 0x0e, 0xcf, 0x71, 0xc6, 0x24, 0x3b,
+ 0x0e, 0xcf, 0x68, 0x4e, 0x6d, 0x86, 0xc3, 0x54, 0x63, 0x48, 0x20, 0x37,
+ 0xc3, 0x54, 0x75, 0x46, 0x0e, 0xd4, 0x43, 0x54, 0x81, 0xc6, 0x00, 0x58,
+ 0x0e, 0xcf, 0x31, 0xc6, 0x24, 0x3b, 0x0e, 0xcf, 0x20, 0xc6, 0x00, 0x58,
+ 0x0e, 0xcf, 0x29, 0xc6, 0x24, 0x3b, 0x0e, 0xcf, 0x18, 0xc5, 0xdd, 0x17,
+ 0x0e, 0xcd, 0x79, 0xca, 0x9e, 0x8c, 0x0e, 0xcd, 0x40, 0xc7, 0x00, 0x57,
+ 0x0e, 0xcc, 0xc0, 0xc5, 0xdd, 0x17, 0x0e, 0xcd, 0x71, 0xca, 0x9e, 0x8c,
+ 0x0e, 0xcd, 0x38, 0x00, 0xc3, 0x54, 0x8d, 0x48, 0xbb, 0x7a, 0x43, 0x54,
+ 0x9d, 0xc5, 0x17, 0x14, 0x0e, 0xca, 0x09, 0xc6, 0x01, 0xdb, 0x0e, 0xca,
+ 0x01, 0xc5, 0x03, 0x13, 0x0e, 0xc9, 0xf8, 0xc8, 0x5a, 0x49, 0x0e, 0xc9,
+ 0xf1, 0xc5, 0x17, 0x14, 0x0e, 0xc9, 0xe9, 0xc6, 0x01, 0xdb, 0x0e, 0xc9,
+ 0xe1, 0xc5, 0x03, 0x13, 0x0e, 0xc9, 0xd8, 0xca, 0x91, 0x42, 0x0e, 0xc9,
+ 0x71, 0x49, 0x45, 0x27, 0x43, 0x54, 0xa9, 0xc5, 0x17, 0x14, 0x0e, 0xca,
+ 0x21, 0xc6, 0x01, 0xdb, 0x0e, 0xca, 0x19, 0xc5, 0x03, 0x13, 0x0e, 0xca,
+ 0x10, 0xc5, 0x17, 0x14, 0x0e, 0xc9, 0xd1, 0xc6, 0x01, 0xdb, 0x0e, 0xc9,
+ 0xc9, 0xc5, 0x03, 0x13, 0x0e, 0xc9, 0xc0, 0xcb, 0x91, 0x41, 0x0e, 0xc9,
+ 0xb8, 0xcb, 0x91, 0x41, 0x0e, 0xc9, 0x90, 0xc5, 0x17, 0x14, 0x0e, 0xcb,
+ 0x1b, 0x03, 0x54, 0xbe, 0xc6, 0x01, 0xdb, 0x0e, 0xcb, 0x11, 0xc5, 0x03,
+ 0x13, 0x0e, 0xcb, 0x08, 0xc5, 0x17, 0x14, 0x0e, 0xca, 0xfb, 0x03, 0x54,
+ 0xc4, 0xc6, 0x01, 0xdb, 0x0e, 0xca, 0xf1, 0xc5, 0x03, 0x13, 0x0e, 0xca,
+ 0xe8, 0xc2, 0x00, 0x15, 0x0e, 0xca, 0xe0, 0xc2, 0x00, 0x15, 0x0e, 0xca,
+ 0xc0, 0x4c, 0x8b, 0xc5, 0xc3, 0x54, 0xca, 0xc5, 0x03, 0x13, 0x0e, 0xc9,
+ 0x11, 0xc5, 0x17, 0x14, 0x0e, 0xc9, 0x08, 0xc4, 0x94, 0xa5, 0x0e, 0xd2,
+ 0x61, 0xc8, 0xbe, 0x0a, 0x0e, 0xd2, 0x58, 0xc4, 0x94, 0xa5, 0x0e, 0xd2,
+ 0x49, 0xc8, 0xbe, 0x0a, 0x0e, 0xd2, 0x40, 0xcf, 0x63, 0x96, 0x08, 0xae,
+ 0xb9, 0xce, 0x6f, 0x8c, 0x08, 0xae, 0xb1, 0xc4, 0x5d, 0x32, 0x08, 0xae,
+ 0xa8, 0xcd, 0x44, 0xbb, 0x08, 0xae, 0x91, 0x49, 0xb1, 0x43, 0x43, 0x54,
+ 0xd6, 0xd0, 0x5f, 0x22, 0x08, 0xae, 0x71, 0xd0, 0x58, 0x22, 0x08, 0xae,
+ 0x69, 0xc9, 0x44, 0xbf, 0x08, 0xae, 0x60, 0x8e, 0x08, 0x8d, 0xd8, 0x94,
+ 0x08, 0x8d, 0xc8, 0x8e, 0x08, 0x8c, 0x60, 0x94, 0x08, 0x8c, 0x50, 0xd9,
+ 0x1e, 0x50, 0x01, 0x2f, 0x51, 0xd8, 0x25, 0x5b, 0x01, 0x58, 0xa8, 0xd3,
+ 0x1e, 0x56, 0x01, 0x2f, 0x49, 0xd3, 0x43, 0x26, 0x01, 0x2d, 0x38, 0xd2,
+ 0x47, 0x6f, 0x01, 0x2d, 0x41, 0xd3, 0x1e, 0x56, 0x01, 0x58, 0xa0, 0xc6,
+ 0x0b, 0x18, 0x01, 0x9e, 0x71, 0xc4, 0xd9, 0x12, 0x01, 0x9d, 0x30, 0xc8,
+ 0x0b, 0x08, 0x01, 0x9d, 0x40, 0xc2, 0xe5, 0xa5, 0x0f, 0x91, 0xc9, 0xc2,
+ 0xe6, 0x91, 0x0f, 0x91, 0x01, 0xc2, 0x83, 0xe4, 0x0f, 0x90, 0xe0, 0xc2,
+ 0x71, 0x49, 0x0f, 0x91, 0xa1, 0xc2, 0xe6, 0x81, 0x0f, 0x91, 0x28, 0xc2,
+ 0xe6, 0xa5, 0x0f, 0x91, 0x71, 0xc2, 0x09, 0x02, 0x0f, 0x90, 0x90, 0xc2,
+ 0xe6, 0x85, 0x0f, 0x90, 0xb9, 0xc2, 0xe6, 0x93, 0x0f, 0x90, 0xa8, 0xc2,
+ 0xe0, 0x7e, 0x0f, 0x91, 0xc1, 0xc2, 0x7e, 0x13, 0x0f, 0x91, 0x10, 0xa5,
+ 0x0f, 0x91, 0xb9, 0xa6, 0x0f, 0x91, 0xb0, 0xc2, 0xe6, 0x5a, 0x0f, 0x91,
+ 0x89, 0xc2, 0xe5, 0x7d, 0x0f, 0x91, 0x39, 0xc2, 0xe6, 0x8d, 0x0f, 0x90,
+ 0x80, 0xc2, 0x3c, 0xd4, 0x0f, 0x91, 0x79, 0xc2, 0xe5, 0x7e, 0x0f, 0x91,
+ 0x40, 0xc2, 0xae, 0x95, 0x0f, 0x90, 0xf9, 0xc2, 0xe6, 0xa3, 0x0f, 0x90,
+ 0xd8, 0xa6, 0x0f, 0x91, 0x51, 0x9d, 0x0f, 0x91, 0x48, 0xc6, 0x06, 0xe1,
+ 0x01, 0x20, 0xb8, 0xc2, 0x00, 0xc1, 0x00, 0x43, 0x29, 0x83, 0x00, 0x43,
+ 0x20, 0xd3, 0x43, 0x5f, 0x0f, 0xc9, 0x69, 0xcc, 0x87, 0xf9, 0x0f, 0xcb,
+ 0x80, 0xe0, 0x0a, 0x07, 0x01, 0x17, 0xe0, 0xe0, 0x0a, 0x07, 0x01, 0x17,
+ 0xa0, 0xc8, 0x4b, 0x94, 0x01, 0x0b, 0xf9, 0xc7, 0x0d, 0x04, 0x01, 0x0b,
+ 0xe8, 0xc2, 0x00, 0x5f, 0x01, 0x0b, 0xa3, 0x03, 0x54, 0xe2, 0xc3, 0x45,
+ 0x6b, 0x01, 0x0b, 0xe0, 0xc4, 0x22, 0x44, 0x01, 0x0b, 0xd9, 0x91, 0x01,
+ 0x0b, 0x88, 0xc3, 0x77, 0x79, 0x08, 0x43, 0x91, 0xc4, 0xdc, 0x2d, 0x08,
+ 0x43, 0x78, 0xc4, 0x02, 0xde, 0x05, 0x47, 0xb1, 0xc2, 0x02, 0xa0, 0x05,
+ 0x47, 0xa8, 0xc5, 0x01, 0xa2, 0x01, 0x5b, 0x1b, 0x03, 0x54, 0xe8, 0xcc,
+ 0x2e, 0x48, 0x01, 0x5a, 0x69, 0xcc, 0x82, 0xb9, 0x01, 0x5b, 0x69, 0xcd,
+ 0x7c, 0xa8, 0x01, 0x5c, 0x38, 0x47, 0x13, 0x6d, 0xc3, 0x54, 0xec, 0xc6,
+ 0x10, 0x9d, 0x01, 0x4a, 0xc9, 0xc8, 0xae, 0xbc, 0x01, 0x4b, 0x08, 0xc8,
+ 0xae, 0xbc, 0x01, 0x4a, 0xe9, 0xc6, 0x10, 0x9d, 0x01, 0x4a, 0xa8, 0xd8,
+ 0x22, 0x5b, 0x0f, 0xc0, 0x59, 0x46, 0x03, 0x13, 0xc3, 0x54, 0xf6, 0xcd,
+ 0x75, 0xa6, 0x01, 0x0e, 0xf9, 0xd0, 0x59, 0x42, 0x01, 0x0d, 0xa9, 0x44,
+ 0x08, 0xba, 0xc3, 0x55, 0x02, 0xd1, 0x01, 0x68, 0x01, 0x48, 0x41, 0xd9,
+ 0x1f, 0xf9, 0x0f, 0xc0, 0x39, 0xd5, 0x03, 0xd2, 0x0f, 0xc0, 0xb9, 0xcc,
+ 0x84, 0xb1, 0x0f, 0xc4, 0xd8, 0xc4, 0x18, 0x10, 0x01, 0x27, 0xd9, 0xc2,
+ 0x22, 0xcc, 0x01, 0x27, 0xd0, 0xc3, 0x0d, 0x14, 0x01, 0x27, 0xc9, 0xc3,
+ 0x09, 0x9e, 0x01, 0x27, 0xc0, 0xc4, 0x02, 0xde, 0x01, 0x27, 0xb9, 0xc2,
+ 0x02, 0xa0, 0x01, 0x27, 0xb0, 0xcf, 0x05, 0x98, 0x01, 0x15, 0x59, 0xce,
+ 0x34, 0xd4, 0x01, 0x57, 0x28, 0xd0, 0x0f, 0xc6, 0x01, 0x00, 0xf1, 0xd9,
+ 0x0f, 0xbd, 0x01, 0x72, 0x10, 0xca, 0x9f, 0xa4, 0x01, 0x4c, 0x81, 0xcd,
+ 0x7f, 0x80, 0x01, 0x4c, 0x70, 0x45, 0x00, 0x8c, 0xc3, 0x55, 0x0e, 0xd3,
+ 0x41, 0x71, 0x01, 0x4c, 0xe1, 0xc7, 0x00, 0x38, 0x01, 0x80, 0x4b, 0x03,
+ 0x55, 0x1a, 0xd3, 0x19, 0x81, 0x01, 0x70, 0x01, 0xda, 0x19, 0x7a, 0x01,
+ 0x70, 0x08, 0x00, 0x43, 0x55, 0x20, 0xcf, 0x2c, 0x35, 0x01, 0x48, 0x01,
+ 0xd6, 0x2d, 0x62, 0x01, 0x48, 0x09, 0x16, 0x43, 0x55, 0x32, 0xc5, 0x01,
+ 0x4a, 0x01, 0x0e, 0x09, 0x00, 0x43, 0x55, 0x41, 0xc5, 0x01, 0x4a, 0x01,
+ 0x0e, 0x01, 0x00, 0x43, 0x55, 0x59, 0xd2, 0x05, 0xd4, 0x0f, 0xc0, 0x11,
+ 0xd5, 0x03, 0xd2, 0x0f, 0xc0, 0x90, 0x46, 0x00, 0x8b, 0x43, 0x55, 0x6b,
+ 0xc9, 0x03, 0xc8, 0x01, 0x58, 0x71, 0xc7, 0x09, 0x0d, 0x01, 0x58, 0x78,
+ 0xcf, 0x6a, 0x8f, 0x01, 0x5a, 0x41, 0xce, 0x33, 0x92, 0x01, 0x5a, 0x60,
+ 0xc6, 0x01, 0x73, 0x01, 0x0e, 0x79, 0xcf, 0x2c, 0x35, 0x01, 0x48, 0x18,
+ 0x87, 0x05, 0x28, 0x88, 0x91, 0x05, 0x2c, 0x10, 0xc2, 0x00, 0x87, 0x05,
+ 0x30, 0x81, 0xc2, 0x02, 0x2b, 0x05, 0x30, 0x89, 0xc3, 0x19, 0xe1, 0x05,
+ 0x30, 0x91, 0xc2, 0x01, 0xc3, 0x05, 0x31, 0x51, 0xc2, 0x00, 0x58, 0x05,
+ 0x31, 0x58, 0x87, 0x05, 0x28, 0xf9, 0x90, 0x05, 0x30, 0x28, 0x91, 0x05,
+ 0x2c, 0x80, 0xc3, 0xe5, 0x36, 0x0b, 0x54, 0x99, 0xc3, 0xe5, 0x06, 0x0b,
+ 0x54, 0x90, 0x9a, 0x0b, 0x54, 0xd9, 0x93, 0x0b, 0x54, 0xd1, 0x85, 0x0b,
+ 0x54, 0xc9, 0x9c, 0x0b, 0x54, 0xc0, 0x42, 0x06, 0x46, 0xc3, 0x55, 0x77,
+ 0xc7, 0xc4, 0x02, 0x00, 0x70, 0x30, 0x91, 0x00, 0x70, 0x59, 0xc3, 0x14,
+ 0x6b, 0x00, 0x71, 0x41, 0xc2, 0x00, 0xe4, 0x00, 0x71, 0x50, 0x83, 0x00,
+ 0x71, 0x91, 0x8f, 0x00, 0x71, 0x99, 0x87, 0x00, 0x72, 0x09, 0x46, 0xce,
+ 0x87, 0x43, 0x55, 0x8f, 0x8b, 0x00, 0x71, 0xa8, 0x87, 0x00, 0x71, 0xb3,
+ 0x03, 0x55, 0x9b, 0x97, 0x00, 0x71, 0xc8, 0x42, 0x00, 0x8e, 0xc3, 0x55,
+ 0x9f, 0xca, 0xa5, 0x30, 0x00, 0x70, 0x89, 0xc7, 0xc6, 0xbe, 0x00, 0x70,
+ 0x90, 0x42, 0x00, 0xb7, 0xc3, 0x55, 0xaf, 0xc7, 0xc1, 0x54, 0x00, 0x71,
+ 0x00, 0xc8, 0xb9, 0x42, 0x00, 0x71, 0x89, 0xc2, 0x13, 0x4c, 0x00, 0x72,
+ 0x41, 0x16, 0xc3, 0x55, 0xbb, 0xc8, 0xb5, 0x3a, 0x00, 0x72, 0x58, 0x94,
+ 0x00, 0x63, 0x00, 0x8e, 0x00, 0x63, 0x08, 0xc3, 0xad, 0xf4, 0x00, 0x78,
+ 0xd1, 0xc4, 0x97, 0x19, 0x00, 0x78, 0xd9, 0xc3, 0x60, 0x54, 0x00, 0x78,
+ 0xe0, 0xc3, 0xad, 0xf4, 0x00, 0x78, 0xe9, 0xc4, 0x97, 0x19, 0x00, 0x78,
+ 0xf1, 0xc3, 0x60, 0x54, 0x00, 0x7e, 0x78, 0xcd, 0x00, 0xfa, 0x07, 0xe8,
+ 0x09, 0xca, 0x26, 0xf7, 0x07, 0xe8, 0xe8, 0x0b, 0xc3, 0x55, 0xc7, 0x45,
+ 0x00, 0x8c, 0x43, 0x55, 0xd3, 0x0b, 0xc3, 0x55, 0xe5, 0x45, 0x00, 0x8c,
+ 0x43, 0x55, 0xf1, 0xca, 0x26, 0xf7, 0x07, 0xe8, 0xf1, 0xcd, 0x00, 0xfa,
+ 0x07, 0xe8, 0x10, 0xcd, 0x00, 0xfa, 0x07, 0xe8, 0x01, 0xca, 0x26, 0xf7,
+ 0x07, 0xe8, 0xe0, 0xcd, 0x00, 0xfa, 0x07, 0xe7, 0xf9, 0xca, 0x26, 0xf7,
+ 0x07, 0xe8, 0xd8, 0x0b, 0xc3, 0x55, 0xfd, 0x45, 0x00, 0x8c, 0x43, 0x56,
+ 0x09, 0x0b, 0xc3, 0x56, 0x15, 0xd3, 0x43, 0x72, 0x07, 0xed, 0xf8, 0x0b,
+ 0xc3, 0x56, 0x21, 0x45, 0x00, 0x8c, 0x43, 0x56, 0x2d, 0xcc, 0x00, 0xfb,
+ 0x07, 0xe2, 0x89, 0xcb, 0x10, 0xb5, 0x07, 0xe6, 0xb8, 0x44, 0x2b, 0xb9,
+ 0xc3, 0x56, 0x39, 0x0a, 0xc3, 0x56, 0x45, 0x45, 0x19, 0x60, 0xc3, 0x56,
+ 0x51, 0x4d, 0x06, 0x5a, 0xc3, 0x56, 0x67, 0x45, 0x30, 0xc1, 0xc3, 0x56,
+ 0x73, 0x45, 0x50, 0xf0, 0xc3, 0x56, 0x89, 0x44, 0x72, 0xf0, 0x43, 0x56,
+ 0x99, 0x45, 0x4d, 0x40, 0xc3, 0x56, 0xa5, 0x45, 0x52, 0x4a, 0xc3, 0x56,
+ 0xaf, 0x46, 0xd2, 0xa7, 0xc3, 0x56, 0xb9, 0xde, 0x07, 0x29, 0x07, 0xe3,
+ 0x18, 0xcd, 0x00, 0xfa, 0x07, 0xe7, 0xd9, 0xca, 0x26, 0xf7, 0x07, 0xe8,
+ 0xb8, 0x0b, 0xc3, 0x56, 0xc5, 0x45, 0x00, 0x8c, 0xc3, 0x56, 0xd1, 0xcb,
+ 0x64, 0x7b, 0x07, 0xe7, 0x38, 0x0b, 0xc3, 0x56, 0xe3, 0xcb, 0x64, 0x7b,
+ 0x07, 0xe9, 0xb1, 0x45, 0x00, 0x8c, 0x43, 0x56, 0xef, 0x43, 0x02, 0x98,
+ 0xc3, 0x56, 0xfb, 0x43, 0x2b, 0xba, 0x43, 0x57, 0x0b, 0x0b, 0xc3, 0x57,
+ 0x17, 0xcb, 0x64, 0x7b, 0x07, 0xe9, 0xa1, 0x45, 0x00, 0x8c, 0x43, 0x57,
+ 0x23, 0xca, 0x26, 0xf7, 0x07, 0xe9, 0x51, 0xcd, 0x00, 0xfa, 0x07, 0xe8,
+ 0x70, 0xcd, 0x00, 0xfa, 0x07, 0xe7, 0xe1, 0xca, 0x26, 0xf7, 0x07, 0xe8,
+ 0xc0, 0x45, 0x19, 0x60, 0xc3, 0x57, 0x2f, 0x44, 0x19, 0x6a, 0xc3, 0x57,
+ 0x39, 0x44, 0x72, 0xf0, 0xc3, 0x57, 0x43, 0xd1, 0x50, 0xf0, 0x07, 0xe5,
+ 0x91, 0x4d, 0x06, 0x5a, 0xc3, 0x57, 0x4f, 0x44, 0x2b, 0xb9, 0x43, 0x57,
+ 0x5b, 0x42, 0x00, 0xdb, 0xc3, 0x57, 0x67, 0x03, 0x43, 0x57, 0x71, 0xcc,
+ 0x00, 0xfb, 0x07, 0xe1, 0x61, 0xcb, 0x10, 0xb5, 0x07, 0xe5, 0xe8, 0xce,
+ 0x43, 0x77, 0x07, 0xeb, 0xd1, 0xd7, 0x26, 0xea, 0x07, 0xeb, 0xd9, 0xcf,
+ 0x67, 0x65, 0x07, 0xeb, 0xc8, 0xcd, 0x00, 0xfa, 0x07, 0xe7, 0xb9, 0xca,
+ 0x26, 0xf7, 0x07, 0xe8, 0x98, 0x0b, 0xc3, 0x57, 0x7d, 0x45, 0x00, 0x8c,
+ 0x43, 0x57, 0x89, 0x0b, 0xc3, 0x57, 0x9b, 0x4a, 0x74, 0x6e, 0x43, 0x57,
+ 0xa7, 0xca, 0x26, 0xf7, 0x07, 0xe8, 0xa1, 0xcd, 0x00, 0xfa, 0x07, 0xe7,
+ 0xc0, 0x5e, 0x0d, 0xba, 0xc3, 0x57, 0xb3, 0x4e, 0x6e, 0xba, 0x43, 0x57,
+ 0xbf, 0x0b, 0xc3, 0x57, 0xcb, 0xcc, 0x82, 0xa1, 0x07, 0xea, 0x69, 0xcf,
+ 0x65, 0x1c, 0x07, 0xef, 0xb8, 0x44, 0x2b, 0xb9, 0xc3, 0x57, 0xd5, 0x4d,
+ 0x06, 0x5a, 0xc3, 0x57, 0xe1, 0x45, 0x19, 0x60, 0xc3, 0x57, 0xed, 0x45,
+ 0x50, 0xf1, 0x43, 0x57, 0xfd, 0x44, 0x2b, 0xb9, 0xc3, 0x58, 0x09, 0x4d,
+ 0x06, 0x5a, 0xc3, 0x58, 0x15, 0xcf, 0x60, 0x8a, 0x07, 0xe3, 0xc9, 0x45,
+ 0x19, 0x60, 0xc3, 0x58, 0x21, 0xcf, 0x69, 0x81, 0x07, 0xe3, 0xb9, 0xce,
+ 0x72, 0xf0, 0x07, 0xe3, 0xb1, 0xd2, 0x4a, 0xbd, 0x07, 0xe0, 0x89, 0xcf,
+ 0x64, 0x77, 0x07, 0xe7, 0x30, 0xe0, 0x07, 0x27, 0x07, 0xe2, 0xd8, 0xca,
+ 0x26, 0xf7, 0x07, 0xe3, 0xa9, 0xcd, 0x00, 0xfa, 0x07, 0xe0, 0x80, 0xca,
+ 0x26, 0xf7, 0x07, 0xe3, 0xa1, 0xcd, 0x00, 0xfa, 0x07, 0xe0, 0x78, 0xca,
+ 0x26, 0xf7, 0x07, 0xe3, 0x91, 0x0b, 0xc3, 0x58, 0x31, 0xcb, 0x64, 0x7b,
+ 0x07, 0xe7, 0x19, 0x45, 0x00, 0x8c, 0x43, 0x58, 0x3d, 0x0b, 0xc3, 0x58,
+ 0x5b, 0x45, 0x00, 0x8c, 0x43, 0x58, 0x67, 0x43, 0x02, 0x98, 0xc3, 0x58,
+ 0x79, 0x43, 0x2b, 0xba, 0x43, 0x58, 0x83, 0x0b, 0xc3, 0x58, 0x8f, 0x45,
+ 0x00, 0x8c, 0x43, 0x58, 0x9b, 0xcb, 0x64, 0x7b, 0x07, 0xe7, 0x89, 0xcc,
+ 0x10, 0xb4, 0x07, 0xe6, 0xf0, 0x4f, 0x08, 0x0b, 0xc3, 0x58, 0xad, 0x42,
+ 0x00, 0x8f, 0x43, 0x58, 0xf5, 0xcc, 0x00, 0xfb, 0x07, 0xe2, 0xc1, 0xcb,
+ 0x10, 0xb5, 0x07, 0xe6, 0xe8, 0x45, 0x19, 0x60, 0xc3, 0x58, 0xff, 0xce,
+ 0x43, 0x77, 0x07, 0xed, 0x80, 0xcc, 0x00, 0xfb, 0x07, 0xe2, 0xa9, 0xcb,
+ 0x10, 0xb5, 0x07, 0xe6, 0xd0, 0xcb, 0x64, 0x7b, 0x07, 0xe7, 0x79, 0xcc,
+ 0x10, 0xb4, 0x07, 0xe6, 0xb0, 0x0b, 0xc3, 0x59, 0x0b, 0x45, 0x00, 0x8c,
+ 0x43, 0x59, 0x17, 0xcc, 0x00, 0xfb, 0x07, 0xe2, 0x71, 0xcb, 0x10, 0xb5,
+ 0x07, 0xe6, 0xa8, 0xce, 0x43, 0x77, 0x07, 0xec, 0xd1, 0xd7, 0x26, 0xea,
+ 0x07, 0xec, 0xd8, 0xcc, 0x00, 0xfb, 0x07, 0xe2, 0x59, 0xcb, 0x10, 0xb5,
+ 0x07, 0xe6, 0x90, 0xd7, 0x26, 0xea, 0x07, 0xec, 0xc9, 0x44, 0x19, 0x6a,
+ 0xc3, 0x59, 0x29, 0xce, 0x43, 0x77, 0x07, 0xee, 0x39, 0x45, 0x19, 0x60,
+ 0x43, 0x59, 0x35, 0xcb, 0x64, 0x7b, 0x07, 0xe7, 0x61, 0xca, 0x26, 0xf7,
+ 0x07, 0xe4, 0x11, 0x0b, 0xc3, 0x59, 0x41, 0x45, 0x00, 0x8c, 0x43, 0x59,
+ 0x4d, 0xcb, 0x64, 0x7b, 0x07, 0xe7, 0x59, 0xca, 0x26, 0xf7, 0x07, 0xe4,
+ 0x09, 0x0b, 0x43, 0x59, 0x59, 0xca, 0x26, 0xf7, 0x07, 0xe4, 0x21, 0xcd,
+ 0x00, 0xfa, 0x07, 0xe1, 0xf0, 0x48, 0x06, 0x5f, 0xc3, 0x59, 0x65, 0xca,
+ 0x26, 0xf7, 0x07, 0xe4, 0x01, 0xcd, 0x00, 0xfa, 0x07, 0xe1, 0xb8, 0xcc,
+ 0x00, 0xfb, 0x07, 0xe1, 0xd1, 0xcb, 0x10, 0xb5, 0x07, 0xe6, 0x30, 0xcc,
+ 0x00, 0xfb, 0x07, 0xe1, 0xc9, 0xcb, 0x10, 0xb5, 0x07, 0xe6, 0x28, 0xcc,
+ 0x00, 0xfb, 0x07, 0xe1, 0xc1, 0xcb, 0x10, 0xb5, 0x07, 0xe6, 0x20, 0xcc,
+ 0x00, 0xfb, 0x07, 0xe0, 0xd9, 0xcb, 0x10, 0xb5, 0x07, 0xe5, 0x60, 0xcc,
+ 0x00, 0xfb, 0x07, 0xe0, 0xc9, 0xcb, 0x10, 0xb5, 0x07, 0xe5, 0x58, 0xca,
+ 0x26, 0xf7, 0x07, 0xe8, 0xf9, 0xcd, 0x00, 0xfa, 0x07, 0xe8, 0x18, 0xca,
+ 0x26, 0xf7, 0x07, 0xe9, 0x01, 0xcd, 0x00, 0xfa, 0x07, 0xe8, 0x20, 0xca,
+ 0x26, 0xf7, 0x07, 0xe4, 0x31, 0xcd, 0x00, 0xfa, 0x07, 0xe2, 0x18, 0x4c,
+ 0x82, 0x59, 0xc3, 0x59, 0x71, 0x46, 0x08, 0x09, 0x43, 0x59, 0x7d, 0xcc,
+ 0x00, 0xfb, 0x07, 0xe2, 0x11, 0xcb, 0x10, 0xb5, 0x07, 0xe6, 0x60, 0x44,
+ 0x19, 0x6a, 0xc3, 0x59, 0x89, 0xce, 0x43, 0x77, 0x07, 0xed, 0x68, 0xcc,
+ 0x00, 0xfb, 0x07, 0xe2, 0x09, 0xcb, 0x10, 0xb5, 0x07, 0xe6, 0x58, 0xca,
+ 0x26, 0xf7, 0x07, 0xec, 0x29, 0xcc, 0x10, 0xb4, 0x07, 0xec, 0x30, 0x0b,
+ 0xc3, 0x59, 0x95, 0x45, 0x00, 0x8c, 0x43, 0x59, 0xa1, 0xcc, 0x00, 0xfb,
+ 0x07, 0xe1, 0xf9, 0xcb, 0x10, 0xb5, 0x07, 0xe6, 0x48, 0x45, 0x30, 0xc1,
+ 0xc3, 0x59, 0xb3, 0x45, 0x19, 0x60, 0xc3, 0x59, 0xbf, 0xce, 0x43, 0x77,
+ 0x07, 0xed, 0x60, 0x44, 0x2b, 0xb9, 0xc3, 0x59, 0xcb, 0x4d, 0x06, 0x5a,
+ 0xc3, 0x59, 0xd7, 0x45, 0x19, 0x60, 0xc3, 0x59, 0xe3, 0x45, 0x50, 0xf1,
+ 0x43, 0x59, 0xed, 0xe0, 0x00, 0xe7, 0x07, 0xef, 0x88, 0xcc, 0x00, 0xfb,
+ 0x07, 0xe1, 0x81, 0xcb, 0x10, 0xb5, 0x07, 0xe6, 0x08, 0xcc, 0x00, 0xfb,
+ 0x07, 0xe1, 0x79, 0xcb, 0x10, 0xb5, 0x07, 0xe6, 0x00, 0xca, 0x26, 0xf7,
+ 0x07, 0xeb, 0xe1, 0xcc, 0x10, 0xb4, 0x07, 0xeb, 0xe8, 0xca, 0x26, 0xf7,
+ 0x07, 0xe3, 0x79, 0xcd, 0x00, 0xfa, 0x07, 0xe0, 0x50, 0xca, 0x26, 0xf7,
+ 0x07, 0xe3, 0x71, 0xcd, 0x00, 0xfa, 0x07, 0xe0, 0x48, 0xca, 0x26, 0xf7,
+ 0x07, 0xe3, 0x61, 0x0b, 0xc3, 0x59, 0xf9, 0xcb, 0x64, 0x7b, 0x07, 0xe7,
+ 0x08, 0x0b, 0xc3, 0x5a, 0x05, 0xd3, 0x43, 0x72, 0x07, 0xec, 0xf0, 0x43,
+ 0x02, 0x98, 0xc3, 0x5a, 0x11, 0x43, 0x2b, 0xba, 0x43, 0x5a, 0x1b, 0xcc,
+ 0x00, 0xfb, 0x07, 0xe0, 0x29, 0xcb, 0x10, 0xb5, 0x07, 0xe4, 0xe0, 0xc2,
+ 0x04, 0xc6, 0x07, 0xea, 0x11, 0x17, 0x43, 0x5a, 0x27, 0xc8, 0xb8, 0x52,
+ 0x07, 0xea, 0x79, 0xc7, 0x6d, 0x34, 0x07, 0xea, 0x00, 0xd5, 0x1c, 0xbf,
+ 0x07, 0xe2, 0x49, 0xca, 0x26, 0xf7, 0x07, 0xe4, 0x40, 0x0b, 0xc3, 0x5a,
+ 0x34, 0xca, 0x26, 0xf7, 0x07, 0xe4, 0x49, 0xd3, 0x43, 0x72, 0x07, 0xed,
+ 0x88, 0x0b, 0xc3, 0x5a, 0x40, 0x45, 0x00, 0x8c, 0x43, 0x5a, 0x4c, 0x0b,
+ 0xc3, 0x5a, 0x5e, 0x45, 0x00, 0x8c, 0x43, 0x5a, 0x6a, 0x0b, 0xc3, 0x5a,
+ 0x7c, 0x45, 0x00, 0x8c, 0x43, 0x5a, 0x88, 0xcc, 0x00, 0xfb, 0x07, 0xe1,
+ 0x21, 0xcb, 0x10, 0xb5, 0x07, 0xe5, 0xb0, 0xca, 0x26, 0xf7, 0x07, 0xeb,
+ 0x79, 0xcc, 0x10, 0xb4, 0x07, 0xeb, 0x80, 0xcc, 0x00, 0xfb, 0x07, 0xe1,
+ 0x19, 0xcb, 0x10, 0xb5, 0x07, 0xe5, 0xa8, 0xd7, 0x26, 0xea, 0x07, 0xeb,
+ 0x71, 0xce, 0x43, 0x77, 0x07, 0xed, 0x58, 0xcb, 0x10, 0xb5, 0x07, 0xdf,
+ 0xd9, 0xcc, 0x00, 0xfb, 0x07, 0xdf, 0xc8, 0x00, 0x43, 0x5a, 0xa0, 0x00,
+ 0x43, 0x5a, 0xb6, 0x00, 0x43, 0x5a, 0xcc, 0x00, 0x43, 0x5a, 0xe2, 0x00,
+ 0x43, 0x5a, 0xf8, 0x00, 0x43, 0x5b, 0x08, 0x00, 0x43, 0x5b, 0x1e, 0x00,
+ 0x43, 0x5b, 0x34, 0xc3, 0x0f, 0x9a, 0x00, 0x45, 0xe3, 0x03, 0x5b, 0x40,
+ 0xc4, 0x3a, 0x01, 0x00, 0x45, 0xe9, 0xc3, 0xb1, 0x0d, 0x00, 0x45, 0xd8,
+ 0x00, 0x43, 0x5b, 0x46, 0x00, 0x43, 0x5b, 0x5c, 0x00, 0x43, 0x5b, 0x75,
+ 0x88, 0x00, 0x32, 0x1b, 0x03, 0x5b, 0x8b, 0xca, 0xa2, 0x1a, 0x00, 0x31,
+ 0x00, 0xc2, 0x13, 0xc0, 0x00, 0x36, 0x4b, 0x03, 0x5b, 0x8f, 0xc2, 0x49,
+ 0x0c, 0x00, 0x36, 0x2a, 0x03, 0x5b, 0x93, 0x00, 0x43, 0x5b, 0x97, 0x00,
+ 0xc3, 0x5b, 0xa7, 0xc2, 0x16, 0x1c, 0x00, 0x34, 0x3a, 0x03, 0x5b, 0xbd,
+ 0x00, 0xc3, 0x5b, 0xc1, 0xc2, 0x16, 0x1c, 0x00, 0x33, 0xd2, 0x03, 0x5b,
+ 0xd7, 0x00, 0xc3, 0x5b, 0xdb, 0xc2, 0x16, 0x1c, 0x00, 0x33, 0xfa, 0x03,
+ 0x5b, 0xef, 0x00, 0x43, 0x5b, 0xf3, 0xc6, 0xd0, 0x91, 0x00, 0x44, 0x31,
+ 0xc2, 0x00, 0x65, 0x00, 0x31, 0x83, 0x03, 0x5c, 0x09, 0xc2, 0x16, 0x1c,
+ 0x00, 0x31, 0x5a, 0x03, 0x5c, 0x0d, 0x4b, 0x88, 0x05, 0xc3, 0x5c, 0x11,
+ 0xcb, 0x64, 0x7b, 0x07, 0xda, 0xc9, 0x0b, 0xc3, 0x5c, 0x1b, 0xca, 0x26,
+ 0xf7, 0x07, 0xda, 0xb8, 0x00, 0x43, 0x5c, 0x27, 0x00, 0x43, 0x5c, 0x37,
+ 0x00, 0x43, 0x5c, 0x56, 0x00, 0x43, 0x5c, 0x62, 0x00, 0x43, 0x5c, 0x74,
+ 0x00, 0x43, 0x5c, 0x84, 0x00, 0xc3, 0x5c, 0x90, 0xc2, 0x16, 0x1c, 0x00,
+ 0x34, 0x02, 0x03, 0x5c, 0xa6, 0x00, 0x43, 0x5c, 0xaa, 0x60, 0x06, 0x47,
+ 0x43, 0x5c, 0xba, 0xd0, 0x5f, 0x02, 0x00, 0x33, 0xbb, 0x03, 0x5c, 0xc6,
+ 0xca, 0x26, 0xf7, 0x07, 0xde, 0xc1, 0xcd, 0x00, 0xfa, 0x07, 0xde, 0xb8,
+ 0x45, 0x00, 0x8c, 0xc3, 0x5c, 0xcc, 0xca, 0x26, 0xf7, 0x07, 0xf6, 0xb1,
+ 0x0b, 0xc3, 0x5c, 0xd8, 0xcb, 0x64, 0x7b, 0x07, 0xf6, 0xc0, 0xcb, 0x64,
+ 0x7b, 0x07, 0xdf, 0x39, 0x0b, 0xc3, 0x5c, 0xe4, 0xca, 0x26, 0xf7, 0x07,
+ 0xdf, 0x28, 0x00, 0x43, 0x5c, 0xf0, 0x00, 0x43, 0x5d, 0x02, 0x00, 0x43,
+ 0x5d, 0x12, 0x00, 0x43, 0x5d, 0x28, 0x00, 0x43, 0x5d, 0x3e, 0x8e, 0x00,
+ 0x31, 0x7b, 0x03, 0x5d, 0x54, 0xc3, 0x01, 0xce, 0x00, 0x34, 0x63, 0x03,
+ 0x5d, 0x58, 0x86, 0x00, 0x31, 0xb2, 0x03, 0x5d, 0x5c, 0x8e, 0x00, 0x34,
+ 0x43, 0x03, 0x5d, 0x60, 0xc3, 0x01, 0xce, 0x00, 0x34, 0x6a, 0x03, 0x5d,
+ 0x64, 0x00, 0x43, 0x5d, 0x68, 0x00, 0x43, 0x5d, 0x74, 0xc3, 0xb1, 0x0d,
+ 0x00, 0x35, 0x09, 0xc3, 0x0f, 0x9a, 0x00, 0x33, 0x79, 0xc3, 0x85, 0xf5,
+ 0x00, 0x33, 0x70, 0xca, 0x26, 0xf7, 0x07, 0xde, 0xf9, 0xcd, 0x00, 0xfa,
+ 0x07, 0xde, 0xf0, 0x00, 0x43, 0x5d, 0x84, 0x45, 0x00, 0x8c, 0xc3, 0x5d,
+ 0x94, 0xcd, 0x00, 0xfa, 0x07, 0xf7, 0x69, 0xca, 0x26, 0xf7, 0x07, 0xf7,
+ 0x70, 0x00, 0x43, 0x5d, 0xb5, 0xca, 0x26, 0xf7, 0x07, 0xde, 0xd1, 0xcd,
+ 0x00, 0xfa, 0x07, 0xde, 0xc8, 0x00, 0xc3, 0x5d, 0xcb, 0xc3, 0xe6, 0x23,
+ 0x00, 0x35, 0x8a, 0x03, 0x5d, 0xdb, 0x00, 0x43, 0x5d, 0xdf, 0x00, 0x43,
+ 0x5d, 0xfe, 0x8a, 0x00, 0x31, 0x6b, 0x03, 0x5e, 0x0e, 0xc3, 0x08, 0x0b,
+ 0x00, 0x31, 0x0a, 0x03, 0x5e, 0x12, 0x00, 0x43, 0x5e, 0x18, 0x00, 0x43,
+ 0x5e, 0x40, 0x16, 0xc3, 0x5e, 0x52, 0x15, 0xc3, 0x5e, 0x62, 0xc3, 0x72,
+ 0xf0, 0x0f, 0x75, 0x99, 0xc3, 0x0f, 0x9a, 0x0f, 0x75, 0x91, 0xc3, 0xb1,
+ 0x0d, 0x0f, 0x75, 0x81, 0xc3, 0x03, 0x0c, 0x0f, 0x75, 0x79, 0xc4, 0x3a,
+ 0x01, 0x0f, 0x75, 0x69, 0xc4, 0x19, 0x60, 0x0f, 0x75, 0x61, 0xc3, 0x0d,
+ 0xff, 0x0f, 0x75, 0x59, 0xc3, 0x2b, 0xb9, 0x0f, 0x75, 0x49, 0xc3, 0x14,
+ 0x4b, 0x0f, 0x75, 0x39, 0x42, 0x02, 0x1c, 0xc3, 0x5e, 0x74, 0xc3, 0x7e,
+ 0x89, 0x0f, 0x75, 0x29, 0x42, 0x0e, 0x9a, 0xc3, 0x5e, 0x7e, 0xc4, 0x30,
+ 0xc1, 0x0f, 0x75, 0x11, 0xc3, 0x85, 0xf5, 0x0f, 0x75, 0x09, 0xc4, 0x14,
+ 0x4a, 0x0f, 0x75, 0xb9, 0xc5, 0x92, 0x75, 0x0f, 0x75, 0xd8, 0xc3, 0x85,
+ 0xf5, 0x0f, 0x70, 0xe1, 0xc4, 0x3a, 0x01, 0x0f, 0x70, 0xe9, 0xc3, 0xb1,
+ 0x0d, 0x0f, 0x70, 0xf1, 0xc3, 0x0f, 0x9a, 0x0f, 0x70, 0xf8, 0xc4, 0x30,
+ 0xc1, 0x0f, 0x72, 0x11, 0xc3, 0x14, 0x4b, 0x0f, 0x72, 0x39, 0xc3, 0x2b,
+ 0xb9, 0x0f, 0x72, 0x49, 0xc3, 0x0d, 0xff, 0x0f, 0x72, 0x59, 0xc4, 0x3a,
+ 0x01, 0x0f, 0x72, 0x69, 0x15, 0xc3, 0x5e, 0x86, 0xc3, 0x03, 0x0c, 0x0f,
+ 0x72, 0x79, 0xc3, 0x0f, 0x9a, 0x0f, 0x72, 0x91, 0xc4, 0x14, 0x4a, 0x0f,
+ 0x72, 0xb9, 0x06, 0xc3, 0x5e, 0x98, 0xc5, 0x92, 0x75, 0x0f, 0x72, 0xd8,
+ 0xc3, 0x00, 0x49, 0x0f, 0x74, 0x01, 0xc2, 0x00, 0x74, 0x0f, 0x74, 0x78,
+ 0x8e, 0x0f, 0x74, 0x19, 0x86, 0x0f, 0x74, 0xc8, 0xc2, 0x16, 0x1c, 0x0f,
+ 0x74, 0x21, 0xc2, 0x02, 0x98, 0x0f, 0x74, 0x38, 0xc2, 0x00, 0x74, 0x0f,
+ 0x74, 0x31, 0x8a, 0x0f, 0x74, 0xd0, 0xc2, 0x02, 0x98, 0x0f, 0x74, 0x41,
+ 0xc2, 0x16, 0x1c, 0x0f, 0x74, 0xa9, 0x0a, 0x43, 0x5e, 0xa4, 0xc3, 0x03,
+ 0x26, 0x0f, 0x74, 0x71, 0xc2, 0x01, 0x9d, 0x0f, 0x74, 0x89, 0xc4, 0xdf,
+ 0x93, 0x0f, 0x74, 0xa0, 0xc2, 0x16, 0x1c, 0x0f, 0x73, 0x21, 0xc2, 0x02,
+ 0x98, 0x0f, 0x73, 0x38, 0xc2, 0x02, 0x98, 0x0f, 0x73, 0x41, 0xc2, 0x16,
+ 0x1c, 0x0f, 0x73, 0xa9, 0xc3, 0x64, 0x77, 0x0f, 0x73, 0xb0, 0xc2, 0x0f,
+ 0x9b, 0x0f, 0x73, 0x51, 0xc3, 0x14, 0x4b, 0x0f, 0x73, 0xb8, 0xc3, 0x03,
+ 0x26, 0x0f, 0x73, 0x71, 0xc2, 0x01, 0x9d, 0x0f, 0x73, 0x89, 0xc4, 0xdf,
+ 0x93, 0x0f, 0x73, 0xa0, 0xc2, 0x01, 0x9d, 0x0f, 0x73, 0xc9, 0x47, 0x3b,
+ 0xc4, 0x43, 0x5e, 0xb0, 0xc5, 0xda, 0xd3, 0x00, 0x46, 0xf9, 0xc3, 0xe5,
+ 0x63, 0x00, 0x46, 0xf1, 0x42, 0x0d, 0xf6, 0xc3, 0x5e, 0xbc, 0x03, 0x43,
+ 0x5e, 0xc6, 0xcc, 0x00, 0xfb, 0x00, 0x37, 0x11, 0xcb, 0x10, 0xb5, 0x00,
+ 0x36, 0xc0, 0xde, 0x0f, 0x9a, 0x00, 0x36, 0xb9, 0xde, 0x0d, 0xf6, 0x00,
+ 0x36, 0xb1, 0xd6, 0x2f, 0x88, 0x00, 0x30, 0xb0, 0xc7, 0xc9, 0xf8, 0x00,
+ 0x44, 0xd9, 0x0b, 0x43, 0x5e, 0xe4, 0xc5, 0x05, 0x02, 0x07, 0xdd, 0xf1,
+ 0xc5, 0x00, 0xd4, 0x07, 0xdd, 0xe8, 0xc5, 0x05, 0x02, 0x07, 0xdd, 0xc9,
+ 0xc5, 0x00, 0xd4, 0x07, 0xdd, 0xc0, 0xc3, 0x7e, 0x89, 0x00, 0x44, 0x21,
+ 0xc5, 0x08, 0x09, 0x00, 0x44, 0x18, 0x49, 0x04, 0xd2, 0xc3, 0x5e, 0xf0,
+ 0x48, 0x0a, 0x53, 0x43, 0x5e, 0xfc, 0x51, 0x13, 0xe3, 0xc3, 0x5f, 0x0e,
+ 0xd3, 0x43, 0x98, 0x01, 0x2b, 0x91, 0xd3, 0x43, 0xbe, 0x01, 0x2b, 0x88,
+ 0x45, 0x02, 0x9a, 0x43, 0x5f, 0x20, 0xc8, 0x00, 0x5f, 0x01, 0x2a, 0x71,
+ 0xca, 0x01, 0x68, 0x01, 0x2a, 0x60, 0xc9, 0xb0, 0x3e, 0x01, 0x2b, 0xe9,
+ 0xc9, 0x01, 0x69, 0x01, 0x29, 0xa0, 0x49, 0x2a, 0xf5, 0xc3, 0x5f, 0x32,
+ 0x02, 0x43, 0x5f, 0x48, 0x49, 0x2a, 0xf5, 0x43, 0x5f, 0x5a, 0xce, 0x2a,
+ 0xfe, 0x0f, 0xd0, 0xa1, 0xdb, 0x18, 0x03, 0x0f, 0xd1, 0xf0, 0xce, 0x2a,
+ 0xfe, 0x0f, 0xd0, 0x91, 0xdb, 0x18, 0x03, 0x0f, 0xd1, 0xe0, 0xce, 0x2a,
+ 0xfe, 0x0f, 0xd0, 0x89, 0xdb, 0x18, 0x03, 0x0f, 0xd1, 0xd8, 0xce, 0x2a,
+ 0xfe, 0x0f, 0xd0, 0x81, 0xdb, 0x18, 0x03, 0x0f, 0xd1, 0xd0, 0xc3, 0x00,
+ 0x74, 0x0f, 0xd1, 0x21, 0xc5, 0x56, 0xa5, 0x0f, 0xd1, 0x40, 0xce, 0x6f,
+ 0x38, 0x01, 0x34, 0x49, 0xcf, 0x6a, 0x9e, 0x01, 0x34, 0x41, 0xca, 0x3e,
+ 0xe4, 0x01, 0x4f, 0x68, 0xc5, 0x0b, 0x0a, 0x01, 0x2d, 0x51, 0xc3, 0x0e,
+ 0x6b, 0x01, 0x5a, 0x88, 0xc6, 0x46, 0x3e, 0x01, 0x2d, 0xd1, 0xc7, 0xbb,
+ 0xcb, 0x01, 0x5a, 0x98, 0xd9, 0x20, 0x44, 0x01, 0x1f, 0x78, 0xd2, 0x1c,
+ 0x40, 0x01, 0x1f, 0x68, 0xc4, 0x01, 0x9b, 0x01, 0x3d, 0x20, 0xd2, 0x1c,
+ 0x40, 0x01, 0x1f, 0x70, 0xc5, 0x06, 0x82, 0x01, 0x30, 0xd1, 0xce, 0x24,
+ 0xd5, 0x0f, 0xac, 0xe0, 0xc6, 0x0b, 0x09, 0x01, 0x2f, 0xf1, 0xc7, 0x3a,
+ 0x19, 0x0f, 0xbc, 0xc9, 0xc7, 0x0a, 0xe0, 0x0f, 0xbc, 0xf8, 0xc8, 0x5e,
+ 0xa6, 0x01, 0x5e, 0x30, 0xc8, 0x5e, 0xa6, 0x01, 0x5e, 0x38, 0x9a, 0x01,
+ 0x30, 0x91, 0xc5, 0x6b, 0x02, 0x01, 0x30, 0x89, 0x04, 0xc3, 0x5f, 0x66,
+ 0xc8, 0x8e, 0xa5, 0x0f, 0xaf, 0xa9, 0xc7, 0xc0, 0xba, 0x01, 0x5d, 0xe8,
+ 0xc4, 0xe0, 0x97, 0x00, 0xdb, 0x51, 0xc6, 0xcf, 0x59, 0x00, 0xdb, 0x28,
+ 0xc7, 0xc2, 0x6c, 0x00, 0xda, 0x08, 0x90, 0x0b, 0x51, 0x31, 0x96, 0x0b,
+ 0x50, 0xb8, 0x91, 0x0b, 0x51, 0x49, 0x97, 0x0b, 0x50, 0xe1, 0xc2, 0x25,
+ 0x9f, 0x0b, 0x50, 0x98, 0x83, 0x0b, 0x50, 0x71, 0x87, 0x0b, 0x50, 0x40,
+ 0xc2, 0x04, 0xc6, 0x0b, 0x51, 0xa1, 0xc2, 0x00, 0xc4, 0x0b, 0x51, 0x80,
+ 0x90, 0x0b, 0x51, 0x89, 0xc2, 0xd0, 0x00, 0x0b, 0x51, 0x29, 0x87, 0x0b,
+ 0x50, 0x38, 0xc2, 0x02, 0xe0, 0x0b, 0x50, 0x61, 0x8b, 0x0b, 0x50, 0x58,
+ 0x87, 0x0b, 0x51, 0x11, 0xc2, 0xd0, 0x00, 0x0b, 0x50, 0xf8, 0xc2, 0x01,
+ 0x30, 0x0b, 0x51, 0x41, 0xc5, 0xde, 0x75, 0x0b, 0x51, 0x38, 0xc3, 0x8b,
+ 0xa9, 0x0b, 0x50, 0xd1, 0xc3, 0x7c, 0x57, 0x0b, 0x50, 0x80, 0xc2, 0x10,
+ 0x11, 0x0b, 0x50, 0xc0, 0xc2, 0x00, 0x7a, 0x0b, 0x50, 0x11, 0x07, 0xc3,
+ 0x5f, 0x72, 0xc5, 0xd8, 0xe9, 0x0b, 0x4d, 0x10, 0xc2, 0xd0, 0x00, 0x0b,
+ 0x4d, 0xa9, 0x96, 0x0b, 0x4d, 0x48, 0x91, 0x0b, 0x4b, 0xa9, 0x87, 0x0b,
+ 0x4f, 0x50, 0x17, 0xc3, 0x5f, 0x7a, 0x96, 0x0b, 0x4d, 0xb8, 0x96, 0x0b,
+ 0x4e, 0x61, 0xc2, 0x00, 0x3d, 0x0b, 0x4d, 0x59, 0xc2, 0x00, 0x11, 0x0b,
+ 0x4b, 0xd0, 0x0d, 0xc3, 0x5f, 0x84, 0x83, 0x0b, 0x4f, 0x91, 0xc3, 0x8b,
+ 0xa9, 0x0b, 0x4f, 0x03, 0x03, 0x5f, 0x95, 0x09, 0xc3, 0x5f, 0x99, 0xc6,
+ 0xce, 0xa5, 0x0b, 0x4d, 0x19, 0x11, 0x43, 0x5f, 0xa1, 0xc2, 0x05, 0x1d,
+ 0x0b, 0x4b, 0x81, 0x03, 0xc3, 0x5f, 0xa9, 0x0b, 0x43, 0x5f, 0xb3, 0x17,
+ 0xc3, 0x5f, 0xbd, 0xc3, 0x8f, 0x8a, 0x0b, 0x4b, 0xe0, 0x87, 0x0b, 0x4e,
+ 0x28, 0x07, 0xc3, 0x5f, 0xc7, 0xc5, 0xc0, 0x3e, 0x0b, 0x4c, 0x50, 0xc2,
+ 0x00, 0xb6, 0x0b, 0x4e, 0x71, 0xc2, 0x01, 0xdf, 0x0b, 0x4d, 0xe0, 0xc2,
+ 0x92, 0xb5, 0x0b, 0x4e, 0x09, 0xc2, 0x5c, 0x9b, 0x0b, 0x4d, 0x38, 0xc7,
+ 0x0b, 0xc8, 0x0b, 0x4e, 0x01, 0xc7, 0xc8, 0xa1, 0x0b, 0x4d, 0x68, 0x8f,
+ 0x0b, 0x4b, 0x91, 0x93, 0x0b, 0x4e, 0xe1, 0x83, 0x0b, 0x4e, 0xdb, 0x03,
+ 0x5f, 0xd4, 0xc8, 0xbc, 0x02, 0x0b, 0x4c, 0x78, 0x91, 0x0b, 0x4b, 0xcb,
+ 0x03, 0x5f, 0xd8, 0x93, 0x0b, 0x4e, 0xb0, 0x90, 0x0b, 0x50, 0x01, 0x97,
+ 0x0b, 0x4f, 0xea, 0x03, 0x5f, 0xdc, 0x8f, 0x0b, 0x4d, 0x53, 0x03, 0x5f,
+ 0xe2, 0xc2, 0x10, 0x11, 0x0b, 0x4c, 0xb0, 0x03, 0xc3, 0x5f, 0xe8, 0x87,
+ 0x0b, 0x4f, 0x49, 0x8f, 0x0b, 0x4c, 0x88, 0x83, 0x0b, 0x4b, 0x63, 0x03,
+ 0x5f, 0xf0, 0x42, 0x00, 0xaf, 0x43, 0x5f, 0xf4, 0x07, 0x43, 0x60, 0x00,
+ 0x17, 0xc3, 0x60, 0x0a, 0xc2, 0x00, 0x4f, 0x0b, 0x4c, 0x20, 0xc2, 0x00,
+ 0x45, 0x0b, 0x4e, 0x10, 0x93, 0x0b, 0x4b, 0x71, 0x87, 0x0b, 0x4f, 0x80,
+ 0x91, 0x0b, 0x4f, 0x9b, 0x03, 0x60, 0x12, 0xc2, 0x14, 0xbe, 0x0b, 0x4e,
+ 0xf1, 0xc5, 0x8b, 0xa8, 0x0b, 0x4d, 0x20, 0x96, 0x0b, 0x4c, 0x81, 0x87,
+ 0x0b, 0x4b, 0xb0, 0x11, 0xc3, 0x60, 0x16, 0x93, 0x0b, 0x4f, 0xc1, 0x8f,
+ 0x0b, 0x4b, 0xd8, 0x92, 0x0b, 0x4b, 0x49, 0x93, 0x0b, 0x4e, 0xc9, 0xc2,
+ 0x00, 0xc2, 0x0b, 0x4c, 0xf8, 0x87, 0x0b, 0x4f, 0x61, 0xc3, 0x8b, 0xa9,
+ 0x0b, 0x4c, 0xe8, 0xc2, 0x01, 0xdf, 0x0b, 0x4b, 0x41, 0x87, 0x0b, 0x4d,
+ 0x30, 0x93, 0x0b, 0x4f, 0xe1, 0x87, 0x0b, 0x4d, 0xc3, 0x03, 0x60, 0x1e,
+ 0x92, 0x0b, 0x4c, 0x58, 0xc2, 0x02, 0xe0, 0x0b, 0x4e, 0x18, 0xc2, 0x00,
+ 0xc4, 0x0b, 0x4d, 0x29, 0x83, 0x0b, 0x4c, 0x38, 0x93, 0x0b, 0x50, 0x08,
+ 0x00, 0xc3, 0x60, 0x22, 0x87, 0x0b, 0x4d, 0xa2, 0x03, 0x60, 0x32, 0x90,
+ 0x0b, 0x4f, 0x29, 0x93, 0x0b, 0x4f, 0x21, 0xc3, 0xb5, 0x1b, 0x0b, 0x4f,
+ 0x09, 0xc2, 0x00, 0xe2, 0x0b, 0x4d, 0x90, 0xc5, 0x00, 0x99, 0x0b, 0x4f,
+ 0x19, 0xc8, 0xb7, 0x2a, 0x0b, 0x4f, 0x10, 0x9a, 0x0b, 0x4e, 0xf9, 0xc2,
+ 0x10, 0x11, 0x0b, 0x4c, 0xbb, 0x03, 0x60, 0x36, 0x8f, 0x0b, 0x4d, 0xf0,
+ 0x96, 0x0b, 0x4d, 0x71, 0xc2, 0x02, 0xe0, 0x0b, 0x4c, 0xa0, 0x09, 0xc3,
+ 0x60, 0x3a, 0x0d, 0x43, 0x60, 0x50, 0xc2, 0x01, 0xdf, 0x0b, 0x4a, 0x01,
+ 0x0a, 0xc3, 0x60, 0x6e, 0x43, 0x8f, 0x8a, 0x43, 0x60, 0x7a, 0x07, 0xc3,
+ 0x60, 0x82, 0xc2, 0x5d, 0xa1, 0x0b, 0x4b, 0x10, 0xc2, 0x00, 0xc2, 0x0b,
+ 0x49, 0xb9, 0x07, 0xc3, 0x60, 0x8c, 0xc2, 0x00, 0x45, 0x0b, 0x48, 0xc0,
+ 0x8b, 0x0b, 0x4a, 0x69, 0xc2, 0x0f, 0xe1, 0x0b, 0x49, 0x79, 0xc2, 0x00,
+ 0x3d, 0x0b, 0x49, 0x11, 0xc2, 0x00, 0xc2, 0x0b, 0x47, 0xd0, 0xc3, 0xdf,
+ 0x8c, 0x0b, 0x4a, 0x39, 0x42, 0x2c, 0x43, 0xc3, 0x60, 0x96, 0xc2, 0x00,
+ 0xb6, 0x0b, 0x48, 0x11, 0x8b, 0x0b, 0x47, 0x9a, 0x03, 0x60, 0xa0, 0x17,
+ 0xc3, 0x60, 0xa6, 0xc3, 0xd0, 0xd7, 0x0b, 0x4a, 0x79, 0x96, 0x0b, 0x49,
+ 0x80, 0xc5, 0xda, 0x33, 0x0b, 0x4a, 0x11, 0xc5, 0xd9, 0x9d, 0x0b, 0x48,
+ 0x50, 0x17, 0xc3, 0x60, 0xb0, 0xc3, 0xd0, 0xd7, 0x0b, 0x4a, 0x80, 0xc2,
+ 0x04, 0xc6, 0x0b, 0x49, 0x03, 0x03, 0x60, 0xb8, 0xc2, 0x01, 0xba, 0x0b,
+ 0x47, 0x88, 0xc3, 0x8f, 0x8a, 0x0b, 0x49, 0x91, 0x42, 0x2c, 0x43, 0xc3,
+ 0x60, 0xbe, 0x91, 0x0b, 0x48, 0xea, 0x03, 0x60, 0xc8, 0xc3, 0x8f, 0x8a,
+ 0x0b, 0x48, 0xe1, 0xc3, 0x5c, 0x9f, 0x0b, 0x48, 0xd1, 0xc4, 0xe4, 0x1b,
+ 0x0b, 0x48, 0xb0, 0x17, 0xc3, 0x60, 0xcc, 0xc3, 0xd0, 0xd7, 0x0b, 0x49,
+ 0x40, 0xc2, 0x01, 0xbb, 0x0b, 0x49, 0xe8, 0x93, 0x0b, 0x49, 0xf9, 0x90,
+ 0x0b, 0x49, 0xd1, 0xc2, 0x00, 0x7a, 0x0b, 0x48, 0x30, 0x17, 0xc3, 0x60,
+ 0xda, 0x96, 0x0b, 0x48, 0x20, 0xc2, 0x10, 0x11, 0x0b, 0x49, 0xc9, 0x97,
+ 0x0b, 0x4a, 0x91, 0x87, 0x0b, 0x48, 0x18, 0x93, 0x0b, 0x4b, 0x21, 0x92,
+ 0x0b, 0x48, 0x38, 0xc2, 0x7f, 0xc0, 0x0b, 0x4a, 0xe1, 0x97, 0x0b, 0x4a,
+ 0xc1, 0x07, 0xc3, 0x60, 0xee, 0xc2, 0x25, 0x9f, 0x0b, 0x4a, 0xa0, 0x11,
+ 0xc3, 0x60, 0xf6, 0xc3, 0xe5, 0x00, 0x0b, 0x49, 0x28, 0xc4, 0xb5, 0xd8,
+ 0x0b, 0x4b, 0x01, 0xc3, 0x1a, 0x7c, 0x0b, 0x4a, 0x50, 0x93, 0x0b, 0x4a,
+ 0xe9, 0xc2, 0x00, 0xa4, 0x0b, 0x48, 0xd8, 0x87, 0x0b, 0x4a, 0xd1, 0xc4,
+ 0xc3, 0x35, 0x0b, 0x49, 0x70, 0x42, 0x00, 0xbd, 0xc3, 0x60, 0xfe, 0x17,
+ 0xc3, 0x61, 0x0a, 0x96, 0x0b, 0x46, 0x48, 0xca, 0x9c, 0x7a, 0x0b, 0x46,
+ 0xa9, 0x96, 0x0b, 0x46, 0x70, 0xc2, 0x14, 0xbe, 0x0b, 0x47, 0x41, 0xc3,
+ 0xdf, 0x8c, 0x0b, 0x46, 0xd8, 0xc4, 0xdf, 0x1b, 0x0b, 0x46, 0xe1, 0xc2,
+ 0xd0, 0x00, 0x0b, 0x45, 0x50, 0x96, 0x0b, 0x47, 0x81, 0xc5, 0xd7, 0xe0,
+ 0x0b, 0x45, 0xd0, 0xc4, 0xd2, 0x85, 0x0b, 0x46, 0x31, 0xc5, 0xda, 0x56,
+ 0x0b, 0x45, 0x70, 0x90, 0x0b, 0x47, 0x71, 0xc5, 0xd6, 0x87, 0x0b, 0x44,
+ 0xe0, 0x8f, 0x0b, 0x46, 0x29, 0x92, 0x0b, 0x45, 0xb0, 0x93, 0x0b, 0x47,
+ 0x61, 0xc6, 0xcb, 0xe7, 0x0b, 0x45, 0x90, 0xc2, 0x5c, 0x9b, 0x0b, 0x47,
+ 0x59, 0x09, 0xc3, 0x61, 0x18, 0xc2, 0x00, 0x7a, 0x0b, 0x46, 0x81, 0x0d,
+ 0x43, 0x61, 0x25, 0x07, 0xc3, 0x61, 0x31, 0x03, 0xc3, 0x61, 0x3d, 0xc3,
+ 0xdf, 0x8c, 0x0b, 0x45, 0x68, 0x03, 0xc3, 0x61, 0x47, 0x42, 0x2c, 0x43,
+ 0xc3, 0x61, 0x4f, 0xc3, 0x83, 0xad, 0x0b, 0x45, 0x59, 0xc4, 0xc8, 0xbe,
+ 0x0b, 0x44, 0xe8, 0x17, 0xc3, 0x61, 0x59, 0xc2, 0x00, 0x7a, 0x0b, 0x46,
+ 0x99, 0xc3, 0x88, 0xcf, 0x0b, 0x45, 0xf9, 0x83, 0x0b, 0x45, 0xf1, 0xc5,
+ 0xb5, 0x19, 0x0b, 0x45, 0x28, 0x07, 0xc3, 0x61, 0x63, 0xc2, 0x04, 0xc6,
+ 0x0b, 0x45, 0xa1, 0xc6, 0xd0, 0x79, 0x0b, 0x44, 0xd0, 0xc3, 0x47, 0x4a,
+ 0x0b, 0x45, 0x19, 0x83, 0x0b, 0x44, 0x80, 0x03, 0xc3, 0x61, 0x6d, 0x07,
+ 0xc3, 0x61, 0x79, 0x8b, 0x0b, 0x46, 0xeb, 0x03, 0x61, 0x89, 0x17, 0x43,
+ 0x61, 0x93, 0x07, 0xc3, 0x61, 0x9d, 0x00, 0x43, 0x61, 0xa9, 0xc3, 0xe5,
+ 0x00, 0x0b, 0x47, 0x21, 0xc7, 0xc5, 0xd0, 0x0b, 0x45, 0x11, 0x8f, 0x0b,
+ 0x44, 0x88, 0x92, 0x0b, 0x45, 0x01, 0xc3, 0x82, 0x78, 0x0b, 0x44, 0xb0,
+ 0x09, 0xc3, 0x61, 0xb5, 0xc2, 0x00, 0x7a, 0x0b, 0x44, 0x71, 0xca, 0x9c,
+ 0xc0, 0x0b, 0x43, 0xa0, 0xc2, 0x00, 0xc4, 0x0b, 0x44, 0x59, 0xc4, 0xc1,
+ 0x3b, 0x0b, 0x42, 0xb8, 0xc5, 0xda, 0xfb, 0x0b, 0x44, 0x01, 0xc7, 0xc1,
+ 0x4d, 0x0b, 0x43, 0x68, 0xc9, 0xac, 0x69, 0x0b, 0x43, 0x59, 0xc4, 0x96,
+ 0xdd, 0x0b, 0x43, 0xe0, 0x43, 0x7c, 0x4f, 0x43, 0x61, 0xca, 0xc3, 0x8f,
+ 0x91, 0x0b, 0x44, 0x21, 0xc4, 0x85, 0xb7, 0x0b, 0x43, 0xf1, 0xca, 0x9a,
+ 0xd6, 0x0b, 0x43, 0x61, 0x03, 0x43, 0x61, 0xd6, 0xc8, 0xb7, 0xe2, 0x0b,
+ 0x44, 0x11, 0x93, 0x0b, 0x43, 0xc8, 0x93, 0x0b, 0x44, 0x69, 0xc3, 0x12,
+ 0xc2, 0x0b, 0x42, 0xe8, 0xc3, 0x7c, 0x57, 0x0b, 0x44, 0x31, 0xc4, 0xde,
+ 0xab, 0x0b, 0x43, 0x81, 0xc3, 0xe5, 0x5d, 0x0b, 0x43, 0x70, 0xc4, 0xb3,
+ 0x92, 0x0b, 0x43, 0x89, 0xcc, 0x83, 0x91, 0x0b, 0x43, 0x18, 0xc6, 0xcf,
+ 0xfb, 0x0b, 0x43, 0x51, 0xc6, 0xd3, 0x25, 0x0b, 0x43, 0x48, 0xc5, 0xda,
+ 0x10, 0x0b, 0x43, 0x41, 0xc9, 0xa9, 0x63, 0x0b, 0x42, 0xc0, 0x96, 0x0b,
+ 0x42, 0x59, 0x93, 0x0b, 0x41, 0xe1, 0xc4, 0xe4, 0x5f, 0x0b, 0x41, 0x80,
+ 0xcc, 0x8a, 0x2d, 0x0b, 0x42, 0x01, 0x0b, 0xc3, 0x61, 0xe2, 0x17, 0x43,
+ 0x61, 0xee, 0xc3, 0xb5, 0x1b, 0x0b, 0x42, 0x51, 0xc6, 0xd1, 0x99, 0x0b,
+ 0x41, 0x88, 0xc3, 0x48, 0xc4, 0x0b, 0x41, 0x71, 0xc7, 0xb1, 0xde, 0x0b,
+ 0x40, 0x60, 0x93, 0x0b, 0x42, 0x81, 0xc2, 0x00, 0x87, 0x0b, 0x41, 0x38,
+ 0x96, 0x0b, 0x41, 0x99, 0xc8, 0xb8, 0xfa, 0x0b, 0x40, 0x98, 0x07, 0xc3,
+ 0x61, 0xf8, 0xc7, 0xc9, 0x49, 0x0b, 0x41, 0xe9, 0xc5, 0xda, 0x0b, 0x0b,
+ 0x40, 0x78, 0x93, 0x0b, 0x42, 0xb1, 0xc3, 0x16, 0x59, 0x0b, 0x42, 0x40,
+ 0x42, 0x00, 0x7a, 0xc3, 0x62, 0x11, 0xca, 0xa1, 0xe8, 0x0b, 0x40, 0xf0,
+ 0x93, 0x0b, 0x42, 0xa9, 0xc6, 0xb7, 0xb4, 0x0b, 0x40, 0x20, 0x83, 0x0b,
+ 0x42, 0x89, 0xc3, 0x8f, 0x8a, 0x0b, 0x42, 0x68, 0x8b, 0x0b, 0x42, 0x7b,
+ 0x03, 0x62, 0x1d, 0xc2, 0x00, 0x3d, 0x0b, 0x42, 0x48, 0xc3, 0x53, 0x54,
+ 0x0b, 0x42, 0x29, 0x43, 0xe6, 0x05, 0xc3, 0x62, 0x23, 0xc4, 0x08, 0x6b,
+ 0x0b, 0x40, 0x68, 0xc5, 0x9c, 0x7f, 0x0b, 0x42, 0x19, 0xc4, 0x09, 0x91,
+ 0x0b, 0x40, 0xa0, 0xc2, 0x00, 0xb6, 0x0b, 0x41, 0xfb, 0x03, 0x62, 0x2f,
+ 0xc5, 0xdc, 0xbd, 0x0b, 0x40, 0x90, 0xc9, 0xb2, 0xc6, 0x0b, 0x41, 0xa1,
+ 0xc9, 0x82, 0x74, 0x0b, 0x41, 0x48, 0xc7, 0xc3, 0x30, 0x0b, 0x40, 0xf9,
+ 0xc6, 0xb7, 0xb4, 0x0b, 0x40, 0x38, 0xc3, 0x48, 0xc4, 0x0b, 0x41, 0x78,
+ 0x03, 0xc3, 0x62, 0x33, 0xc9, 0x82, 0x74, 0x0b, 0x41, 0x41, 0xc5, 0xda,
+ 0x65, 0x0b, 0x40, 0xe9, 0xc4, 0x99, 0x41, 0x0b, 0x40, 0xd8, 0x4d, 0x7c,
+ 0x4d, 0xc3, 0x62, 0x3d, 0x4b, 0x98, 0xf2, 0x43, 0x62, 0x49, 0xc6, 0xcf,
+ 0x0b, 0x0b, 0x41, 0x09, 0xc3, 0x82, 0x78, 0x0b, 0x40, 0xe0, 0xa1, 0x01,
+ 0x40, 0x7b, 0x03, 0x62, 0x55, 0xa2, 0x01, 0x40, 0xbb, 0x03, 0x62, 0x6e,
+ 0xa3, 0x01, 0x41, 0x3b, 0x03, 0x62, 0x80, 0xa5, 0x01, 0x44, 0x39, 0xa4,
+ 0x01, 0x42, 0x3a, 0x03, 0x62, 0x8b, 0xa2, 0x01, 0x40, 0xdb, 0x03, 0x62,
+ 0x8f, 0xa3, 0x01, 0x41, 0x5b, 0x03, 0x62, 0xa1, 0xa5, 0x01, 0x44, 0x59,
+ 0xa4, 0x01, 0x42, 0x5a, 0x03, 0x62, 0xac, 0xa3, 0x01, 0x41, 0x9b, 0x03,
+ 0x62, 0xb0, 0xa5, 0x01, 0x44, 0x99, 0xa4, 0x01, 0x42, 0x9a, 0x03, 0x62,
+ 0xbb, 0xa5, 0x01, 0x45, 0x19, 0xa4, 0x01, 0x43, 0x1a, 0x03, 0x62, 0xbf,
+ 0xa5, 0x01, 0x46, 0x18, 0xa2, 0x01, 0x40, 0xeb, 0x03, 0x62, 0xc3, 0xa3,
+ 0x01, 0x41, 0x6b, 0x03, 0x62, 0xd5, 0xa5, 0x01, 0x44, 0x69, 0xa4, 0x01,
+ 0x42, 0x6a, 0x03, 0x62, 0xe0, 0xa3, 0x01, 0x41, 0xab, 0x03, 0x62, 0xe4,
+ 0xa5, 0x01, 0x44, 0xa9, 0xa4, 0x01, 0x42, 0xaa, 0x03, 0x62, 0xef, 0xa5,
+ 0x01, 0x45, 0x29, 0xa4, 0x01, 0x43, 0x2a, 0x03, 0x62, 0xf3, 0xa5, 0x01,
+ 0x46, 0x28, 0xa3, 0x01, 0x41, 0xcb, 0x03, 0x62, 0xf7, 0xa5, 0x01, 0x44,
+ 0xc9, 0xa4, 0x01, 0x42, 0xca, 0x03, 0x63, 0x02, 0xa5, 0x01, 0x45, 0x49,
+ 0xa4, 0x01, 0x43, 0x4a, 0x03, 0x63, 0x06, 0xa5, 0x01, 0x46, 0x48, 0xa5,
+ 0x01, 0x45, 0x89, 0xa4, 0x01, 0x43, 0x8a, 0x03, 0x63, 0x0a, 0xa5, 0x01,
+ 0x46, 0x88, 0xa5, 0x01, 0x47, 0x08, 0xa2, 0x01, 0x40, 0xf3, 0x03, 0x63,
+ 0x0e, 0xa3, 0x01, 0x41, 0x73, 0x03, 0x63, 0x20, 0xa5, 0x01, 0x44, 0x71,
+ 0xa4, 0x01, 0x42, 0x72, 0x03, 0x63, 0x2b, 0xa3, 0x01, 0x41, 0xb3, 0x03,
+ 0x63, 0x2f, 0xa5, 0x01, 0x44, 0xb1, 0xa4, 0x01, 0x42, 0xb2, 0x03, 0x63,
+ 0x3a, 0xa5, 0x01, 0x45, 0x31, 0xa4, 0x01, 0x43, 0x32, 0x03, 0x63, 0x3e,
+ 0xa5, 0x01, 0x46, 0x30, 0xa3, 0x01, 0x41, 0xd3, 0x03, 0x63, 0x42, 0xa5,
+ 0x01, 0x44, 0xd1, 0xa4, 0x01, 0x42, 0xd2, 0x03, 0x63, 0x4d, 0xa5, 0x01,
+ 0x45, 0x51, 0xa4, 0x01, 0x43, 0x52, 0x03, 0x63, 0x51, 0xa5, 0x01, 0x46,
+ 0x50, 0xa5, 0x01, 0x45, 0x91, 0xa4, 0x01, 0x43, 0x92, 0x03, 0x63, 0x55,
+ 0xa5, 0x01, 0x46, 0x90, 0xa5, 0x01, 0x47, 0x10, 0xa3, 0x01, 0x41, 0xe3,
+ 0x03, 0x63, 0x59, 0xa5, 0x01, 0x44, 0xe1, 0xa4, 0x01, 0x42, 0xe2, 0x03,
+ 0x63, 0x64, 0xa5, 0x01, 0x45, 0x61, 0xa4, 0x01, 0x43, 0x62, 0x03, 0x63,
+ 0x68, 0xa5, 0x01, 0x46, 0x60, 0xa5, 0x01, 0x45, 0xa1, 0xa4, 0x01, 0x43,
+ 0xa2, 0x03, 0x63, 0x6c, 0xa5, 0x01, 0x46, 0xa0, 0xa5, 0x01, 0x47, 0x20,
+ 0xa5, 0x01, 0x45, 0xc1, 0xa4, 0x01, 0x43, 0xc2, 0x03, 0x63, 0x70, 0xa5,
+ 0x01, 0x46, 0xc0, 0xa5, 0x01, 0x47, 0x40, 0xa5, 0x01, 0x47, 0x80, 0xc3,
+ 0x15, 0x30, 0x0e, 0x84, 0x11, 0xc7, 0x9c, 0xe1, 0x0e, 0x84, 0x08, 0xc3,
+ 0x63, 0x2b, 0x0e, 0x82, 0x89, 0xc5, 0xcc, 0xcc, 0x0e, 0x80, 0x90, 0xc3,
+ 0x2e, 0xd7, 0x0e, 0x84, 0xa1, 0xc4, 0x99, 0xff, 0x0e, 0x84, 0x98, 0xc6,
+ 0x04, 0xe1, 0x0f, 0xd9, 0xf1, 0xc5, 0x00, 0x2c, 0x0f, 0xd9, 0xf9, 0xcc,
+ 0x04, 0xcb, 0x0f, 0xda, 0x88, 0x46, 0x01, 0xc8, 0xc3, 0x63, 0x74, 0xd2,
+ 0x4b, 0x83, 0x0f, 0xda, 0x68, 0xd2, 0x4b, 0x83, 0x0f, 0xda, 0x61, 0x46,
+ 0x01, 0xc8, 0x43, 0x63, 0x80, 0xc6, 0x04, 0xe1, 0x0f, 0xda, 0x29, 0xcc,
+ 0x04, 0xcb, 0x0f, 0xda, 0x50, 0xcc, 0x04, 0xcb, 0x0f, 0xda, 0x49, 0xc5,
+ 0x00, 0x2c, 0x0f, 0xda, 0x58, 0xd4, 0x35, 0x61, 0x0f, 0xdc, 0xd9, 0xc3,
+ 0x00, 0x3a, 0x01, 0x3e, 0xd8, 0xe0, 0x08, 0x67, 0x0f, 0xdb, 0x48, 0xe0,
+ 0x08, 0x67, 0x0f, 0xdb, 0x58, 0xc7, 0x02, 0xa0, 0x0f, 0xc8, 0x29, 0xc9,
+ 0x02, 0xde, 0x0f, 0xc8, 0x20, 0xd6, 0x2d, 0x62, 0x01, 0x0f, 0xe1, 0xcf,
+ 0x2c, 0x35, 0x01, 0x0f, 0xc9, 0xc6, 0x01, 0x73, 0x01, 0x0d, 0x70, 0xcd,
+ 0x7f, 0x80, 0x01, 0x4c, 0x79, 0xca, 0x9f, 0xa4, 0x01, 0x4c, 0x68, 0x00,
+ 0x43, 0x63, 0x8c, 0xcf, 0x2c, 0x35, 0x01, 0x59, 0xa1, 0xd6, 0x2d, 0x62,
+ 0x01, 0x59, 0xa9, 0x16, 0x43, 0x63, 0x9e, 0xd2, 0x05, 0xd4, 0x0f, 0xc0,
+ 0x01, 0xd5, 0x03, 0xd2, 0x0f, 0xc0, 0x80, 0x46, 0x00, 0x8b, 0x43, 0x63,
+ 0xad, 0xc9, 0x03, 0xc8, 0x01, 0x58, 0x81, 0xc7, 0x09, 0x0d, 0x01, 0x58,
+ 0x88, 0xdd, 0x10, 0x86, 0x01, 0x0d, 0xc8, 0xcf, 0x6a, 0x8f, 0x01, 0x5a,
+ 0x11, 0xce, 0x33, 0x92, 0x01, 0x5a, 0x58, 0xc6, 0x01, 0x73, 0x01, 0x0e,
+ 0x69, 0xcf, 0x2c, 0x35, 0x01, 0x48, 0x10, 0xc5, 0x01, 0x4a, 0x01, 0x0d,
+ 0xe9, 0x00, 0x43, 0x63, 0xb9, 0xc5, 0x01, 0x4a, 0x01, 0x0d, 0xe1, 0x00,
+ 0x43, 0x63, 0xd1, 0x02, 0xc3, 0x63, 0xe3, 0xc2, 0x00, 0x48, 0x08, 0x3a,
+ 0x40, 0x9e, 0x08, 0x30, 0x01, 0x9f, 0x08, 0x30, 0x09, 0xa0, 0x08, 0x30,
+ 0x11, 0xa1, 0x08, 0x30, 0x19, 0xa2, 0x08, 0x30, 0x21, 0xa3, 0x08, 0x30,
+ 0x29, 0xa4, 0x08, 0x30, 0x31, 0xa5, 0x08, 0x30, 0x39, 0xa6, 0x08, 0x30,
+ 0x40, 0x9d, 0x08, 0x30, 0x49, 0xa0, 0x08, 0x30, 0x59, 0xa3, 0x08, 0x30,
+ 0x61, 0xa4, 0x08, 0x30, 0x69, 0x9e, 0x08, 0x30, 0x50, 0x9d, 0x08, 0x30,
+ 0x71, 0x9e, 0x08, 0x30, 0x7b, 0x03, 0x63, 0xfb, 0x9f, 0x08, 0x30, 0x93,
+ 0x03, 0x64, 0x03, 0xa0, 0x08, 0x30, 0xab, 0x03, 0x64, 0x0b, 0xa1, 0x08,
+ 0x30, 0xb9, 0xa3, 0x08, 0x30, 0xc1, 0xa4, 0x08, 0x30, 0xc9, 0xa5, 0x08,
+ 0x30, 0xd1, 0xa6, 0x08, 0x30, 0xe0, 0x9d, 0x08, 0x30, 0xe9, 0x9e, 0x08,
+ 0x30, 0xf1, 0xa1, 0x08, 0x30, 0xf9, 0xa4, 0x08, 0x31, 0x01, 0xa5, 0x08,
+ 0x31, 0x09, 0xa6, 0x08, 0x31, 0x10, 0x9d, 0x08, 0x31, 0x19, 0x9e, 0x08,
+ 0x31, 0x21, 0xa1, 0x08, 0x31, 0x29, 0xa2, 0x08, 0x31, 0x31, 0xa3, 0x08,
+ 0x31, 0x39, 0xa4, 0x08, 0x31, 0x41, 0xa5, 0x08, 0x31, 0x49, 0xa6, 0x08,
+ 0x31, 0x50, 0x9d, 0x08, 0x31, 0x59, 0x9e, 0x08, 0x31, 0x61, 0xa0, 0x08,
+ 0x31, 0x69, 0xa1, 0x08, 0x31, 0x71, 0xa2, 0x08, 0x31, 0x79, 0xa3, 0x08,
+ 0x31, 0x81, 0xa4, 0x08, 0x31, 0x89, 0xa5, 0x08, 0x31, 0x91, 0xa6, 0x08,
+ 0x31, 0x98, 0x9d, 0x08, 0x31, 0xa1, 0x9e, 0x08, 0x31, 0xa9, 0xa2, 0x08,
+ 0x31, 0xb1, 0xa3, 0x08, 0x31, 0xb9, 0xa4, 0x08, 0x31, 0xc1, 0xa6, 0x08,
+ 0x31, 0xc8, 0x9d, 0x08, 0x31, 0xd1, 0xa0, 0x08, 0x31, 0xd9, 0xa1, 0x08,
+ 0x31, 0xe1, 0xa3, 0x08, 0x31, 0xe9, 0xa4, 0x08, 0x31, 0xf1, 0xa5, 0x08,
+ 0x31, 0xf9, 0xa6, 0x08, 0x32, 0x00, 0x9d, 0x08, 0x32, 0x09, 0x9e, 0x08,
+ 0x32, 0x11, 0x9f, 0x08, 0x32, 0x19, 0xa3, 0x08, 0x32, 0x29, 0xa4, 0x08,
+ 0x32, 0x31, 0xa2, 0x08, 0x32, 0x20, 0x9f, 0x08, 0x32, 0x59, 0xa0, 0x08,
+ 0x32, 0x61, 0x9d, 0x08, 0x32, 0x48, 0x83, 0x08, 0x32, 0x69, 0x84, 0x08,
+ 0x32, 0x70, 0x9d, 0x08, 0x32, 0x91, 0xa5, 0x08, 0x32, 0x98, 0x83, 0x08,
+ 0x32, 0xe9, 0x84, 0x08, 0x32, 0xf1, 0x85, 0x08, 0x32, 0xf8, 0x83, 0x08,
+ 0x33, 0x19, 0x84, 0x08, 0x33, 0x21, 0x85, 0x08, 0x33, 0x28, 0xc3, 0xe5,
+ 0x90, 0x08, 0x00, 0x01, 0xc4, 0xe1, 0xa3, 0x08, 0x00, 0xc9, 0xc4, 0xe0,
+ 0x1f, 0x08, 0x00, 0xf1, 0xc4, 0xe2, 0xfb, 0x08, 0x01, 0x99, 0xc4, 0xe3,
+ 0x47, 0x08, 0x01, 0xa9, 0xc4, 0xe1, 0x13, 0x08, 0x00, 0x29, 0xc4, 0xae,
+ 0x2d, 0x08, 0x00, 0x39, 0xc4, 0xdf, 0xd3, 0x08, 0x01, 0x59, 0xc4, 0xe2,
+ 0x2b, 0x08, 0x01, 0x70, 0xc4, 0xe2, 0xcf, 0x08, 0x00, 0x41, 0xc4, 0xe0,
+ 0x27, 0x08, 0x00, 0xa9, 0xc4, 0xe0, 0xf3, 0x08, 0x01, 0x09, 0xc4, 0xe2,
+ 0x6f, 0x08, 0x01, 0xe1, 0xc3, 0xe5, 0xd5, 0x08, 0x00, 0x21, 0xc4, 0xe3,
+ 0x6b, 0x08, 0x00, 0xb9, 0xc4, 0xe1, 0xab, 0x08, 0x01, 0x19, 0xc4, 0xdf,
+ 0xcb, 0x08, 0x01, 0x80, 0xc4, 0xe0, 0xd3, 0x08, 0x00, 0x49, 0xc4, 0xdf,
+ 0xef, 0x08, 0x00, 0xe1, 0xc4, 0xe3, 0x67, 0x08, 0x00, 0xe9, 0xc4, 0xe3,
+ 0xe3, 0x08, 0x01, 0x11, 0xc4, 0xe2, 0xd3, 0x08, 0x01, 0xb9, 0xc4, 0xe1,
+ 0x87, 0x08, 0x00, 0x51, 0xc4, 0xe0, 0x3b, 0x08, 0x01, 0x51, 0xc4, 0xe2,
+ 0x43, 0x08, 0x01, 0x89, 0xc4, 0xe2, 0x17, 0x08, 0x01, 0x90, 0xc4, 0xe2,
+ 0xcb, 0x08, 0x00, 0x81, 0xc4, 0xe4, 0xcf, 0x08, 0x01, 0xc9, 0xc4, 0xc5,
+ 0xa6, 0x08, 0x01, 0xd1, 0xc4, 0xe0, 0x9f, 0x08, 0x02, 0x09, 0xc5, 0xdc,
+ 0x31, 0x08, 0x02, 0x29, 0xc4, 0xe2, 0x87, 0x08, 0x00, 0x31, 0xc4, 0xe3,
+ 0x2b, 0x08, 0x00, 0x59, 0xc4, 0xe1, 0x5b, 0x08, 0x01, 0x78, 0xc4, 0xe1,
+ 0x9b, 0x08, 0x00, 0x89, 0xc4, 0xe2, 0x1f, 0x08, 0x01, 0xb1, 0xc5, 0xd4,
+ 0xbb, 0x08, 0x02, 0x39, 0xc5, 0xdc, 0xd6, 0x08, 0x02, 0x51, 0xc5, 0xd8,
+ 0x8a, 0x08, 0x02, 0x59, 0xc3, 0x71, 0x3e, 0x08, 0x00, 0x19, 0xc4, 0xe2,
+ 0xb3, 0x08, 0x00, 0x71, 0xc4, 0xe4, 0xdb, 0x08, 0x01, 0x40, 0xc4, 0xe0,
+ 0xbb, 0x08, 0x00, 0x99, 0xc4, 0xdc, 0x27, 0x08, 0x00, 0xa1, 0xc4, 0xe2,
+ 0x8f, 0x08, 0x02, 0x11, 0xc5, 0xd5, 0x29, 0x08, 0x02, 0x60, 0xc4, 0xe0,
+ 0x23, 0x08, 0x00, 0xb1, 0xc4, 0xdf, 0xe3, 0x08, 0x00, 0xf9, 0xc4, 0xe1,
+ 0xef, 0x08, 0x01, 0x21, 0xc4, 0xe3, 0x73, 0x08, 0x01, 0xc1, 0xc4, 0xe2,
+ 0xdf, 0x08, 0x01, 0xe9, 0xc5, 0xdc, 0xe0, 0x08, 0x02, 0x19, 0xc5, 0xd8,
+ 0xa3, 0x08, 0x02, 0x41, 0xc4, 0xd0, 0x73, 0x08, 0x00, 0x79, 0xc4, 0xe4,
+ 0x0b, 0x08, 0x00, 0x90, 0xc4, 0xe2, 0xc3, 0x08, 0x00, 0xd1, 0xc4, 0xe0,
+ 0xef, 0x08, 0x01, 0x29, 0xc4, 0xe4, 0x83, 0x08, 0x01, 0xf9, 0xc5, 0xde,
+ 0x2f, 0x08, 0x02, 0x31, 0xc3, 0xe4, 0xeb, 0x08, 0x00, 0x11, 0xc4, 0xe0,
+ 0x0f, 0x08, 0x00, 0xc1, 0xc4, 0xe2, 0x5b, 0x08, 0x01, 0x49, 0xc4, 0xe1,
+ 0xa7, 0x08, 0x01, 0x61, 0xc4, 0xe2, 0x97, 0x08, 0x02, 0x00, 0xc4, 0xe3,
+ 0xd7, 0x08, 0x00, 0xd9, 0xc4, 0xe2, 0x2f, 0x08, 0x01, 0x01, 0xc4, 0xe2,
+ 0x53, 0x08, 0x01, 0xa1, 0xc5, 0xd8, 0x12, 0x08, 0x02, 0x49, 0xc3, 0xe2,
+ 0x0f, 0x08, 0x00, 0x09, 0xc4, 0xe1, 0xc3, 0x08, 0x00, 0x69, 0xc4, 0xdf,
+ 0xd7, 0x08, 0x01, 0x31, 0xc4, 0xe1, 0x5f, 0x08, 0x01, 0x68, 0xc5, 0xd4,
+ 0xb1, 0x08, 0x02, 0x69, 0xc5, 0xdd, 0x30, 0x08, 0x02, 0x20, 0xa5, 0x08,
+ 0x02, 0x81, 0xa6, 0x08, 0x02, 0x88, 0xa4, 0x08, 0x02, 0xa1, 0xa6, 0x08,
+ 0x02, 0xa8, 0xa0, 0x08, 0x02, 0xb9, 0xa1, 0x08, 0x02, 0xc0, 0x9f, 0x08,
+ 0x02, 0xd1, 0xa0, 0x08, 0x02, 0xd9, 0xa3, 0x08, 0x02, 0xe1, 0xa6, 0x08,
+ 0x02, 0xe8, 0x1d, 0xc3, 0x64, 0x0f, 0x1f, 0xc3, 0x64, 0x35, 0x20, 0xc3,
+ 0x64, 0x53, 0x21, 0xc3, 0x64, 0x63, 0x22, 0xc3, 0x64, 0x7d, 0x23, 0xc3,
+ 0x64, 0xa1, 0x24, 0xc3, 0x64, 0xcd, 0x25, 0xc3, 0x64, 0xf5, 0x26, 0x43,
+ 0x65, 0x11, 0x1f, 0xc3, 0x65, 0x1b, 0x20, 0xc3, 0x65, 0x27, 0x21, 0xc3,
+ 0x65, 0x45, 0x22, 0x43, 0x65, 0x6d, 0x1d, 0xc3, 0x65, 0x93, 0x1e, 0xc3,
+ 0x65, 0xbb, 0x1f, 0xc3, 0x65, 0xe3, 0xc2, 0xc9, 0x2b, 0x08, 0x07, 0xc8,
+ 0xc6, 0xd0, 0x73, 0x08, 0x04, 0x99, 0xc8, 0xb6, 0x4a, 0x08, 0x04, 0xa0,
+ 0xc6, 0xd2, 0xe3, 0x08, 0x04, 0xc9, 0xc7, 0xc5, 0x59, 0x08, 0x04, 0xc0,
+ 0x05, 0xc3, 0x65, 0xfb, 0x44, 0x05, 0x18, 0xc3, 0x66, 0x1c, 0xc5, 0x31,
+ 0xee, 0x00, 0x0a, 0xdb, 0x03, 0x66, 0x2b, 0xcc, 0x51, 0x28, 0x00, 0xec,
+ 0x51, 0xcc, 0x1e, 0xc1, 0x00, 0xeb, 0xa1, 0xc4, 0x01, 0x23, 0x00, 0x14,
+ 0x11, 0xce, 0x38, 0xe6, 0x05, 0x3d, 0x49, 0x15, 0x43, 0x66, 0x31, 0xc3,
+ 0x74, 0x83, 0x00, 0x12, 0xcb, 0x03, 0x66, 0x3d, 0x45, 0x07, 0x30, 0x43,
+ 0x66, 0x43, 0x47, 0x39, 0xfa, 0xc3, 0x66, 0x51, 0xc7, 0xbe, 0x03, 0x05,
+ 0x3e, 0xc8, 0xc7, 0xca, 0x22, 0x05, 0x5b, 0x01, 0xc6, 0xc8, 0xfd, 0x05,
+ 0x3c, 0x60, 0xce, 0x01, 0x19, 0x0e, 0xf8, 0xe9, 0x05, 0xc3, 0x66, 0x68,
+ 0xc5, 0x31, 0xee, 0x00, 0x08, 0x39, 0xc9, 0x16, 0x14, 0x00, 0x08, 0x59,
+ 0xc3, 0x01, 0x5d, 0x05, 0x3c, 0x99, 0xcc, 0x51, 0x28, 0x05, 0x3c, 0xa1,
+ 0xc4, 0x01, 0x23, 0x00, 0x0c, 0x41, 0xc6, 0x01, 0x73, 0x00, 0x11, 0xe0,
+ 0x4a, 0xa3, 0xc8, 0x43, 0x66, 0x83, 0xcf, 0x61, 0xe3, 0x00, 0x12, 0xf1,
+ 0x11, 0xc3, 0x66, 0x8f, 0xc9, 0x67, 0x20, 0x05, 0x3e, 0x88, 0xcb, 0x8e,
+ 0x3f, 0x05, 0x39, 0x78, 0x46, 0x00, 0x8b, 0x43, 0x66, 0x9b, 0x45, 0x45,
+ 0x88, 0xc3, 0x66, 0xa7, 0x8f, 0x05, 0x3b, 0xb8, 0xc4, 0x01, 0x23, 0x00,
+ 0x0d, 0x6b, 0x03, 0x67, 0x00, 0x06, 0xc3, 0x67, 0x06, 0x05, 0xc3, 0x67,
+ 0x12, 0xca, 0x64, 0x13, 0x00, 0xf3, 0x79, 0xcc, 0x1e, 0xc1, 0x00, 0xeb,
+ 0xa9, 0xce, 0x01, 0x19, 0x00, 0x14, 0x41, 0xcc, 0x51, 0x28, 0x00, 0x0d,
+ 0x59, 0xc6, 0x01, 0x73, 0x00, 0x0b, 0x38, 0xd3, 0x3f, 0x83, 0x00, 0xeb,
+ 0xd1, 0xc3, 0x00, 0xbf, 0x00, 0x07, 0xf2, 0x03, 0x67, 0x30, 0xc8, 0xad,
+ 0x81, 0x00, 0xe8, 0xb1, 0x43, 0x02, 0x6f, 0x43, 0x67, 0x39, 0xd4, 0x01,
+ 0x13, 0x05, 0x5b, 0x38, 0xce, 0x01, 0x19, 0x0e, 0xf8, 0xd9, 0x42, 0x01,
+ 0x23, 0xc3, 0x67, 0x4b, 0x05, 0xc3, 0x67, 0x5a, 0x06, 0xc3, 0x67, 0x69,
+ 0xc6, 0x60, 0xb1, 0x00, 0x0a, 0x6b, 0x03, 0x67, 0x76, 0xc5, 0x1e, 0xc8,
+ 0x00, 0x07, 0xab, 0x03, 0x67, 0x7c, 0xc6, 0x01, 0x73, 0x00, 0x07, 0xc3,
+ 0x03, 0x67, 0x82, 0xc5, 0x1f, 0x0c, 0x00, 0x07, 0x91, 0xc5, 0x31, 0xee,
+ 0x00, 0x07, 0x99, 0x42, 0x01, 0xc8, 0xc3, 0x67, 0x88, 0xc5, 0x1d, 0x88,
+ 0x00, 0x0a, 0x79, 0xc6, 0xcc, 0x8f, 0x00, 0x0f, 0x5b, 0x03, 0x67, 0x9a,
+ 0xce, 0x1d, 0x93, 0x00, 0x10, 0x78, 0x91, 0x00, 0x12, 0xa3, 0x03, 0x67,
+ 0xa0, 0x87, 0x00, 0x12, 0xda, 0x03, 0x67, 0xaa, 0xc6, 0x01, 0x73, 0x00,
+ 0x13, 0x43, 0x03, 0x67, 0xb0, 0x06, 0xc3, 0x67, 0xb6, 0xca, 0x9e, 0x5a,
+ 0x00, 0xf6, 0x49, 0xc5, 0x1e, 0xc8, 0x00, 0x09, 0x4b, 0x03, 0x67, 0xc3,
+ 0xce, 0x01, 0x19, 0x00, 0xec, 0xb1, 0xc5, 0x1f, 0x0c, 0x00, 0x07, 0x61,
+ 0xc5, 0x31, 0xee, 0x00, 0x07, 0x69, 0x05, 0xc3, 0x67, 0xc9, 0xc6, 0x60,
+ 0xb1, 0x00, 0x09, 0x59, 0xc5, 0x1d, 0x88, 0x00, 0x09, 0x69, 0xc6, 0xcc,
+ 0x8f, 0x00, 0x09, 0x79, 0xce, 0x1d, 0x93, 0x00, 0x10, 0x58, 0x83, 0x00,
+ 0x13, 0x4b, 0x03, 0x67, 0xd5, 0xc7, 0xca, 0x53, 0x05, 0x5b, 0x08, 0x46,
+ 0x51, 0xbb, 0xc3, 0x67, 0xdb, 0x47, 0x1d, 0x71, 0x43, 0x67, 0xf3, 0xca,
+ 0x9a, 0x86, 0x00, 0x15, 0x23, 0x03, 0x67, 0xff, 0xc3, 0x80, 0x9f, 0x00,
+ 0xf4, 0xf8, 0x05, 0xc3, 0x68, 0x05, 0xca, 0x64, 0x13, 0x00, 0xf0, 0x79,
+ 0x44, 0x05, 0x18, 0xc3, 0x68, 0x1d, 0xc4, 0x01, 0x23, 0x00, 0x12, 0xbb,
+ 0x03, 0x68, 0x29, 0xcc, 0x51, 0x28, 0x00, 0xec, 0x09, 0xcc, 0x1e, 0xc1,
+ 0x00, 0xeb, 0x69, 0x15, 0xc3, 0x68, 0x2f, 0x16, 0x43, 0x68, 0x3b, 0x00,
+ 0x43, 0x68, 0x47, 0x45, 0x00, 0x5a, 0xc3, 0x68, 0x56, 0x46, 0x3b, 0xc5,
+ 0x43, 0x68, 0x69, 0x00, 0x43, 0x68, 0x74, 0x46, 0x00, 0x8b, 0x43, 0x68,
+ 0x80, 0x46, 0x00, 0x8b, 0x43, 0x68, 0x8c, 0x05, 0xc3, 0x68, 0xa7, 0xc5,
+ 0x1e, 0xc8, 0x00, 0xf5, 0xeb, 0x03, 0x68, 0xbf, 0xca, 0x9e, 0x5a, 0x00,
+ 0xf5, 0xd9, 0x06, 0xc3, 0x68, 0xc5, 0xc6, 0x60, 0xb1, 0x00, 0x08, 0x9b,
+ 0x03, 0x68, 0xcf, 0xce, 0x01, 0x19, 0x00, 0xec, 0x91, 0xc8, 0xbe, 0x9a,
+ 0x05, 0x59, 0xa1, 0xc5, 0x1f, 0x0c, 0x00, 0x07, 0x41, 0xc5, 0x31, 0xee,
+ 0x00, 0x07, 0x49, 0xc5, 0x1d, 0x88, 0x00, 0x08, 0xa9, 0xc6, 0xcc, 0x8f,
+ 0x00, 0x08, 0xc9, 0xce, 0x1d, 0x93, 0x00, 0x10, 0x39, 0xc6, 0x01, 0x73,
+ 0x00, 0x12, 0x39, 0xc5, 0x22, 0x9e, 0x01, 0x63, 0xc0, 0xc3, 0x00, 0x49,
+ 0x05, 0x39, 0x19, 0xc2, 0x00, 0x74, 0x05, 0x39, 0x28, 0x8a, 0x00, 0x07,
+ 0x80, 0x44, 0x00, 0x8c, 0xc3, 0x68, 0xd5, 0xc7, 0xa6, 0x69, 0x05, 0x3a,
+ 0xd8, 0x87, 0x00, 0x12, 0xc3, 0x03, 0x68, 0xdf, 0x8d, 0x0e, 0xf8, 0x19,
+ 0xc8, 0xbb, 0x8a, 0x0e, 0xf8, 0x09, 0x85, 0x01, 0x0c, 0x23, 0x03, 0x68,
+ 0xe5, 0xc6, 0x21, 0xa3, 0x00, 0x12, 0xe3, 0x03, 0x68, 0xeb, 0xcf, 0x61,
+ 0x6b, 0x00, 0x13, 0xf9, 0xc6, 0xd3, 0x2b, 0x05, 0x3f, 0xb0, 0xc8, 0xa6,
+ 0x68, 0x05, 0x3a, 0xe8, 0x04, 0xc3, 0x68, 0xf1, 0xc8, 0x61, 0x72, 0x0e,
+ 0xf8, 0x89, 0x05, 0xc3, 0x69, 0x00, 0xca, 0x64, 0x13, 0x00, 0xf1, 0xd9,
+ 0x42, 0x00, 0x58, 0xc3, 0x69, 0x18, 0xcc, 0x51, 0x28, 0x00, 0xec, 0x29,
+ 0x47, 0x04, 0xcb, 0xc3, 0x69, 0x27, 0xcf, 0x68, 0x64, 0x05, 0x59, 0xb9,
+ 0xce, 0x01, 0x19, 0x00, 0x13, 0x6b, 0x03, 0x69, 0x39, 0xcb, 0x8f, 0xb5,
+ 0x05, 0x3a, 0x49, 0xc5, 0x31, 0xee, 0x00, 0x09, 0xd1, 0xc6, 0x01, 0x73,
+ 0x00, 0x0a, 0x10, 0xc2, 0x25, 0xa1, 0x00, 0x13, 0x73, 0x03, 0x69, 0x3f,
+ 0xc5, 0xd9, 0x07, 0x05, 0x59, 0xa8, 0x46, 0x00, 0x8b, 0x43, 0x69, 0x45,
+ 0xcb, 0x90, 0x5a, 0x0e, 0xf8, 0x00, 0xc9, 0x16, 0x14, 0x00, 0xf0, 0xf9,
+ 0xcc, 0x51, 0x28, 0x00, 0xec, 0x11, 0xcc, 0x1e, 0xc1, 0x00, 0xeb, 0x71,
+ 0xc6, 0x01, 0x73, 0x05, 0x3c, 0xc9, 0xc4, 0x01, 0x23, 0x00, 0x0c, 0x90,
+ 0xc4, 0xb0, 0x8b, 0x00, 0xf7, 0xf9, 0xc5, 0x1e, 0xc8, 0x00, 0xf7, 0xc9,
+ 0xc4, 0x01, 0x23, 0x00, 0x0d, 0xa3, 0x03, 0x69, 0x4f, 0x06, 0xc3, 0x69,
+ 0x55, 0xc5, 0x1f, 0x0c, 0x00, 0xf7, 0x99, 0xca, 0x9e, 0xe6, 0x00, 0xf4,
+ 0xc9, 0x15, 0xc3, 0x69, 0x61, 0xc5, 0x31, 0xee, 0x00, 0x07, 0xe9, 0xca,
+ 0x08, 0xf6, 0x00, 0x0b, 0xb9, 0xc6, 0x60, 0xb1, 0x00, 0x11, 0x98, 0x47,
+ 0xc0, 0x2e, 0xc3, 0x69, 0x6d, 0xc8, 0xba, 0x02, 0x05, 0x3e, 0xb8, 0x44,
+ 0x05, 0x18, 0xc3, 0x69, 0x77, 0xc5, 0x31, 0xee, 0x00, 0xf1, 0xf9, 0xcc,
+ 0x51, 0x28, 0x00, 0xec, 0x31, 0xcc, 0x1e, 0xc1, 0x00, 0xeb, 0x79, 0xcc,
+ 0x4d, 0x15, 0x05, 0x59, 0xd1, 0xc4, 0x01, 0x23, 0x00, 0x13, 0x88, 0x45,
+ 0x00, 0x8c, 0xc3, 0x69, 0x83, 0xc3, 0x01, 0x5d, 0x00, 0x14, 0x4a, 0x03,
+ 0x69, 0xcf, 0xcc, 0x23, 0x3f, 0x00, 0xeb, 0xf8, 0x45, 0x00, 0x8c, 0xc3,
+ 0x69, 0xd5, 0xce, 0x74, 0x78, 0x05, 0x59, 0x88, 0xd4, 0x01, 0x13, 0x00,
+ 0xec, 0x80, 0x46, 0x00, 0x8b, 0x43, 0x6a, 0x1a, 0xd4, 0x3e, 0x6c, 0x05,
+ 0x39, 0xd8, 0xca, 0x9e, 0xe6, 0x00, 0xf4, 0xc1, 0x06, 0xc3, 0x6a, 0x26,
+ 0xc5, 0x31, 0xee, 0x00, 0xf4, 0x19, 0xc5, 0x1f, 0x0c, 0x00, 0xf4, 0x09,
+ 0xca, 0x08, 0xf6, 0x00, 0x0b, 0xa9, 0xc4, 0x01, 0x23, 0x01, 0x63, 0x98,
+ 0xca, 0x64, 0x13, 0x00, 0xf4, 0xb1, 0xcb, 0x97, 0x2f, 0x00, 0xf1, 0x59,
+ 0x05, 0xc3, 0x6a, 0x32, 0x06, 0xc3, 0x6a, 0x44, 0xc4, 0x01, 0x23, 0x00,
+ 0x13, 0x31, 0xc6, 0x01, 0x73, 0x00, 0x09, 0x39, 0xcc, 0x51, 0x28, 0x05,
+ 0x3c, 0xa8, 0xca, 0x1f, 0x59, 0x00, 0x13, 0x38, 0xca, 0x64, 0x13, 0x00,
+ 0xf4, 0xa9, 0x06, 0xc3, 0x6a, 0x56, 0x05, 0xc3, 0x6a, 0x62, 0xcc, 0x51,
+ 0x28, 0x00, 0xec, 0x71, 0xcc, 0x1e, 0xc1, 0x00, 0xeb, 0xb1, 0xce, 0x01,
+ 0x19, 0x00, 0x14, 0x81, 0xc5, 0x31, 0xee, 0x00, 0x0b, 0xd1, 0x15, 0xc3,
+ 0x6a, 0x74, 0xc4, 0x01, 0x23, 0x00, 0x11, 0x28, 0x06, 0xc3, 0x6a, 0x80,
+ 0xcc, 0x51, 0x28, 0x00, 0xec, 0x69, 0x42, 0x01, 0xc8, 0x43, 0x6a, 0x8c,
+ 0x06, 0xc3, 0x6a, 0x9b, 0xc5, 0x1e, 0xc8, 0x00, 0xf3, 0xe9, 0xcc, 0x51,
+ 0x28, 0x00, 0xec, 0x61, 0xc4, 0x01, 0x23, 0x00, 0x14, 0x59, 0xca, 0x9f,
+ 0x4a, 0x01, 0x63, 0x89, 0xc4, 0x00, 0x32, 0x01, 0x63, 0xa0, 0xc2, 0x10,
+ 0x11, 0x05, 0x3c, 0xd9, 0xc2, 0x49, 0x0c, 0x05, 0x3c, 0xe9, 0xc2, 0x0f,
+ 0xe1, 0x05, 0x3c, 0xf8, 0xc9, 0x16, 0x14, 0x00, 0xf2, 0xb9, 0xc5, 0x31,
+ 0xee, 0x00, 0xf2, 0xa9, 0xcc, 0x51, 0x28, 0x00, 0xec, 0x41, 0x15, 0xc3,
+ 0x6a, 0xa7, 0xcc, 0x1e, 0xc1, 0x00, 0xeb, 0x89, 0xc8, 0xbe, 0x9a, 0x05,
+ 0x3a, 0x99, 0xc4, 0x01, 0x23, 0x00, 0x0d, 0x28, 0x45, 0x00, 0x8c, 0xc3,
+ 0x6a, 0xb6, 0xd6, 0x2d, 0x78, 0x00, 0x0a, 0x48, 0xca, 0x64, 0x13, 0x00,
+ 0xf1, 0xa9, 0x06, 0xc3, 0x6a, 0xec, 0xc5, 0x31, 0xee, 0x00, 0xf1, 0x89,
+ 0xcc, 0x51, 0x28, 0x00, 0xec, 0x21, 0xc6, 0x01, 0x73, 0x05, 0x3a, 0x0b,
+ 0x03, 0x6a, 0xfe, 0x05, 0xc3, 0x6b, 0x04, 0xce, 0x38, 0xe6, 0x05, 0x3d,
+ 0x19, 0xc4, 0x01, 0x23, 0x00, 0x0c, 0xc8, 0xc6, 0x60, 0xb1, 0x00, 0xf1,
+ 0x09, 0xcc, 0x51, 0x28, 0x00, 0xec, 0x19, 0xc5, 0x31, 0xee, 0x00, 0x0f,
+ 0xa9, 0xc4, 0x01, 0x23, 0x00, 0x13, 0x01, 0x05, 0xc3, 0x6b, 0x10, 0xc5,
+ 0x1d, 0x88, 0x00, 0x08, 0xf9, 0xc9, 0x16, 0x14, 0x00, 0x09, 0x09, 0xce,
+ 0x38, 0xe6, 0x05, 0x3d, 0x09, 0xc6, 0x01, 0x73, 0x00, 0x0f, 0x28, 0x8b,
+ 0x05, 0x3d, 0xe9, 0x83, 0x05, 0x3d, 0xd9, 0x97, 0x05, 0x3d, 0xf9, 0xc4,
+ 0x00, 0xf0, 0x00, 0x12, 0x10, 0xca, 0x64, 0x13, 0x00, 0xf0, 0x39, 0x44,
+ 0x05, 0x18, 0xc3, 0x6b, 0x22, 0xcc, 0x51, 0x28, 0x00, 0xec, 0x01, 0xcc,
+ 0x1e, 0xc1, 0x00, 0xeb, 0x61, 0xc8, 0xbe, 0x9a, 0x05, 0x3c, 0xb9, 0xc6,
+ 0x01, 0x73, 0x00, 0x0c, 0x01, 0xc6, 0xcf, 0xcb, 0x00, 0x0c, 0x19, 0xc4,
+ 0x01, 0x23, 0x00, 0x12, 0x98, 0xca, 0xa4, 0x9a, 0x05, 0x5a, 0x69, 0x45,
+ 0x7b, 0x4a, 0x43, 0x6b, 0x2e, 0x91, 0x05, 0x59, 0xeb, 0x03, 0x6b, 0x3c,
+ 0x87, 0x05, 0x59, 0x90, 0x05, 0xc3, 0x6b, 0x42, 0xc6, 0x01, 0x73, 0x00,
+ 0x12, 0x48, 0xc4, 0x01, 0x23, 0x00, 0x15, 0x03, 0x03, 0x6b, 0x54, 0xd8,
+ 0x25, 0xeb, 0x05, 0x3a, 0xb9, 0xcf, 0x3e, 0xad, 0x05, 0x3a, 0xc8, 0x8e,
+ 0x07, 0xd8, 0x21, 0x8b, 0x07, 0xd8, 0x18, 0xc6, 0x00, 0xd3, 0x00, 0xf7,
+ 0xb0, 0x43, 0x05, 0x19, 0xc3, 0x6b, 0x5a, 0xc8, 0x20, 0xa9, 0x00, 0x0b,
+ 0xc0, 0x98, 0x00, 0xf7, 0xe1, 0xc2, 0x02, 0xa7, 0x00, 0xf7, 0xd0, 0xc5,
+ 0x05, 0x02, 0x00, 0xf2, 0x11, 0xc5, 0x00, 0xd4, 0x00, 0xf2, 0x00, 0x42,
+ 0x01, 0x23, 0xc3, 0x6b, 0x66, 0x06, 0xc3, 0x6b, 0x75, 0xc6, 0x60, 0xb1,
+ 0x00, 0x0b, 0x53, 0x03, 0x6b, 0x82, 0xc5, 0x1e, 0xc8, 0x00, 0x0b, 0x43,
+ 0x03, 0x6b, 0x88, 0x05, 0xc3, 0x6b, 0x8c, 0xc5, 0x1f, 0x0c, 0x00, 0x06,
+ 0xc9, 0xc5, 0x31, 0xee, 0x00, 0x06, 0xd1, 0xc6, 0x01, 0x73, 0x05, 0x3d,
+ 0xc1, 0xc5, 0x1d, 0x88, 0x00, 0x0b, 0x61, 0xca, 0x9e, 0xe6, 0x00, 0x0b,
+ 0x71, 0xce, 0x1d, 0x93, 0x00, 0x10, 0xb1, 0xc6, 0xcc, 0x8f, 0x00, 0x0b,
+ 0x90, 0xc2, 0x00, 0xc0, 0x00, 0x0d, 0x03, 0x03, 0x6b, 0x9b, 0xc8, 0x9e,
+ 0x5c, 0x00, 0xf6, 0x70, 0xc9, 0x08, 0xf7, 0x00, 0x06, 0xa3, 0x03, 0x6b,
+ 0xa1, 0xc4, 0x65, 0xe2, 0x00, 0x0e, 0x88, 0x11, 0xc3, 0x6b, 0xa7, 0xc8,
+ 0x20, 0xa9, 0x00, 0x06, 0xb2, 0x03, 0x6b, 0xb3, 0xc5, 0x60, 0xb2, 0x00,
+ 0x0a, 0x63, 0x03, 0x6b, 0xb9, 0xcb, 0x1e, 0xc2, 0x00, 0x0c, 0xf8, 0x45,
+ 0x02, 0x9a, 0x43, 0x6b, 0xbf, 0xca, 0x9b, 0xda, 0x00, 0x0f, 0xf0, 0xd1,
+ 0x53, 0x76, 0x05, 0x3a, 0x51, 0xc2, 0x00, 0x11, 0x05, 0x3a, 0x60, 0xcb,
+ 0x98, 0x58, 0x00, 0x0f, 0x60, 0x11, 0xc3, 0x6b, 0xd1, 0xc8, 0x20, 0xa9,
+ 0x00, 0x06, 0x7a, 0x03, 0x6b, 0xdd, 0xc6, 0x05, 0x01, 0x00, 0xf1, 0x60,
+ 0xc9, 0x08, 0xf7, 0x00, 0x06, 0x71, 0xc4, 0x65, 0xe2, 0x00, 0x0e, 0x78,
+ 0xc9, 0x08, 0xf7, 0x00, 0x06, 0x53, 0x03, 0x6b, 0xe3, 0xc6, 0xbd, 0xf4,
+ 0x00, 0x11, 0x43, 0x03, 0x6b, 0xe7, 0xc4, 0x65, 0xe2, 0x00, 0x08, 0xd0,
+ 0xc6, 0x05, 0x01, 0x00, 0xf0, 0xd0, 0x11, 0xc3, 0x6b, 0xed, 0xc8, 0x20,
+ 0xa9, 0x00, 0x06, 0x58, 0x45, 0x02, 0x9a, 0x43, 0x6b, 0xf9, 0xc8, 0x0f,
+ 0xbd, 0x00, 0x0d, 0xc1, 0xca, 0x8e, 0x61, 0x00, 0x0f, 0x70, 0x45, 0x02,
+ 0x9a, 0x43, 0x6c, 0x05, 0xc9, 0x08, 0xf7, 0x00, 0x06, 0x13, 0x03, 0x6c,
+ 0x23, 0xc4, 0x65, 0xe2, 0x00, 0x0e, 0x68, 0x11, 0xc3, 0x6c, 0x29, 0xc8,
+ 0x20, 0xa9, 0x00, 0x06, 0x22, 0x03, 0x6c, 0x35, 0xc5, 0x05, 0x02, 0x00,
+ 0xf0, 0x01, 0xc5, 0x00, 0xd4, 0x00, 0x06, 0x2a, 0x03, 0x6c, 0x3b, 0xc5,
+ 0x31, 0xee, 0x00, 0x0f, 0xe1, 0xc6, 0x60, 0xb1, 0x00, 0x0f, 0x10, 0xc5,
+ 0x05, 0x02, 0x00, 0xf3, 0x13, 0x03, 0x6c, 0x41, 0xc5, 0x00, 0xd4, 0x00,
+ 0xf3, 0x00, 0xc4, 0x65, 0xe2, 0x00, 0x0b, 0x03, 0x03, 0x6c, 0x47, 0xc9,
+ 0x08, 0xf7, 0x00, 0x0a, 0xe1, 0xc6, 0xbd, 0xf4, 0x00, 0x0a, 0xf1, 0xca,
+ 0xa7, 0x1a, 0x00, 0x10, 0xc0, 0xce, 0x16, 0x0f, 0x00, 0xf3, 0x20, 0xd3,
+ 0x42, 0x2f, 0x05, 0x3e, 0x49, 0xc9, 0xb4, 0xeb, 0x01, 0x63, 0xf0, 0x43,
+ 0x05, 0x19, 0xc3, 0x6c, 0x4d, 0xc8, 0x25, 0xfb, 0x01, 0x63, 0x58, 0xc9,
+ 0x08, 0xf7, 0x00, 0xf4, 0x81, 0xc4, 0x65, 0xe2, 0x00, 0x0b, 0xe8, 0xc5,
+ 0x01, 0x74, 0x00, 0x0d, 0xa9, 0xc9, 0xb4, 0xeb, 0x01, 0x63, 0xf8, 0x43,
+ 0x05, 0x19, 0xc3, 0x6c, 0x59, 0xc8, 0x20, 0xa9, 0x00, 0xf4, 0x20, 0xc8,
+ 0x0e, 0x6f, 0x00, 0xf3, 0xf1, 0xce, 0x3e, 0xae, 0x05, 0x3a, 0xf0, 0xcf,
+ 0x68, 0x82, 0x00, 0xf3, 0x81, 0xc6, 0xbd, 0xf4, 0x00, 0x0b, 0x11, 0xc4,
+ 0x65, 0xe2, 0x00, 0x0b, 0x21, 0xca, 0xa7, 0x1a, 0x00, 0x10, 0xd0, 0x43,
+ 0x05, 0x19, 0xc3, 0x6c, 0x65, 0xce, 0x3e, 0xae, 0x00, 0x11, 0xf0, 0xd2,
+ 0x25, 0xf1, 0x05, 0x3b, 0x30, 0xc4, 0xde, 0x3f, 0x01, 0x63, 0x80, 0xca,
+ 0x64, 0x13, 0x00, 0xf2, 0xf1, 0x42, 0x00, 0x58, 0xc3, 0x6c, 0x71, 0xce,
+ 0x38, 0xe6, 0x05, 0x3d, 0x31, 0xc5, 0x31, 0xee, 0x00, 0x0a, 0xa1, 0x05,
+ 0xc3, 0x6c, 0x7d, 0xce, 0x1d, 0x93, 0x00, 0x10, 0x91, 0xc6, 0x01, 0x73,
+ 0x00, 0x12, 0x61, 0xc4, 0x14, 0xa6, 0x01, 0x63, 0x20, 0xc5, 0x01, 0x74,
+ 0x01, 0x63, 0x1b, 0x03, 0x6c, 0x89, 0xcc, 0x89, 0x01, 0x05, 0x3a, 0xa0,
+ 0xcf, 0x68, 0x82, 0x00, 0xf2, 0x51, 0xc6, 0xbd, 0xf4, 0x00, 0x0a, 0x29,
+ 0xc4, 0x65, 0xe2, 0x00, 0x0a, 0x38, 0xc9, 0x64, 0x14, 0x00, 0xf2, 0x41,
+ 0xc8, 0x6d, 0x46, 0x00, 0x0c, 0xe9, 0xcd, 0x7b, 0x08, 0x00, 0x11, 0x00,
+ 0x43, 0x05, 0x19, 0xc3, 0x6c, 0x8f, 0xc8, 0x25, 0xfb, 0x05, 0x3c, 0x80,
+ 0xcf, 0x68, 0x82, 0x00, 0xf1, 0xe1, 0xc6, 0xbd, 0xf4, 0x00, 0x09, 0xd9,
+ 0xc4, 0x65, 0xe2, 0x00, 0x09, 0xe8, 0xc7, 0x0e, 0x70, 0x00, 0xf1, 0xb3,
+ 0x03, 0x6c, 0x9b, 0xc8, 0xa7, 0x26, 0x01, 0x63, 0x00, 0xc3, 0x02, 0xa3,
+ 0x00, 0x09, 0xf9, 0xc5, 0x1e, 0xc8, 0x01, 0x63, 0x10, 0xc5, 0x01, 0x74,
+ 0x00, 0x0a, 0x09, 0xcd, 0x6e, 0x05, 0x00, 0x0e, 0x40, 0xc2, 0x00, 0xb1,
+ 0x00, 0x11, 0xe9, 0xc3, 0x3a, 0xe6, 0x05, 0x3d, 0x68, 0xc8, 0x0e, 0x6f,
+ 0x00, 0xf1, 0x91, 0xce, 0x3e, 0xae, 0x05, 0x3a, 0x11, 0xc8, 0x25, 0xfb,
+ 0x01, 0x63, 0x48, 0xd4, 0x3e, 0xa8, 0x05, 0x3a, 0x20, 0xc6, 0xbd, 0xf4,
+ 0x00, 0x09, 0xb1, 0xc4, 0x65, 0xe2, 0x00, 0x0f, 0x40, 0xc6, 0xbd, 0xf4,
+ 0x00, 0xf1, 0x41, 0xc9, 0x08, 0xf7, 0x00, 0x09, 0x21, 0xc4, 0x65, 0xe2,
+ 0x00, 0x10, 0xf0, 0xc8, 0x20, 0xa9, 0x00, 0xf1, 0x31, 0x43, 0x05, 0x19,
+ 0xc3, 0x6c, 0xa1, 0xc8, 0x25, 0xfb, 0x01, 0x63, 0x38, 0xc9, 0x08, 0xf7,
+ 0x00, 0x08, 0xe1, 0xc6, 0xbd, 0xf4, 0x00, 0x09, 0x11, 0xc4, 0x65, 0xe2,
+ 0x00, 0x0f, 0x30, 0xcf, 0x68, 0x82, 0x00, 0xf0, 0x91, 0xc6, 0xbd, 0xf4,
+ 0x00, 0xf0, 0x81, 0xc4, 0x65, 0xe2, 0x00, 0x08, 0x70, 0xc5, 0x05, 0x02,
+ 0x00, 0xf0, 0x61, 0xc5, 0x00, 0xd4, 0x00, 0xf0, 0x50, 0xcd, 0x77, 0xa1,
+ 0x00, 0x0f, 0x93, 0x03, 0x6c, 0xad, 0xc5, 0x01, 0x74, 0x00, 0x08, 0x81,
+ 0xd3, 0x42, 0x2f, 0x05, 0x3e, 0x38, 0xc6, 0xbd, 0xf4, 0x00, 0x06, 0x3b,
+ 0x03, 0x6c, 0xb3, 0xc9, 0x08, 0xf7, 0x00, 0x08, 0x41, 0xc4, 0x65, 0xe2,
+ 0x00, 0x08, 0x60, 0xc5, 0x05, 0x02, 0x00, 0xf0, 0x21, 0xc5, 0x00, 0xd4,
+ 0x00, 0xf0, 0x10, 0xc9, 0x08, 0xf7, 0x00, 0x09, 0xa1, 0xcb, 0x4d, 0x16,
+ 0x05, 0x3d, 0x90, 0x45, 0x00, 0x8c, 0xc3, 0x6c, 0xb9, 0xc6, 0x10, 0x9d,
+ 0x01, 0x5b, 0x89, 0x4c, 0x14, 0x15, 0x43, 0x6c, 0xe3, 0xe0, 0x01, 0x47,
+ 0x01, 0x4b, 0x70, 0x46, 0x05, 0x39, 0x43, 0x6c, 0xe9, 0xc6, 0x44, 0x50,
+ 0x07, 0xd9, 0x59, 0xc7, 0x44, 0x4f, 0x07, 0xd9, 0x50, 0xc5, 0x64, 0xae,
+ 0x07, 0xd9, 0x81, 0xc5, 0x79, 0xbe, 0x07, 0xd9, 0x71, 0xc6, 0xcc, 0xe3,
+ 0x07, 0xd9, 0x78, 0xcc, 0x79, 0xeb, 0x05, 0x4b, 0x59, 0xc5, 0x8e, 0xdf,
+ 0x05, 0x4b, 0x21, 0xc6, 0xbb, 0xec, 0x05, 0x4b, 0x70, 0xc3, 0x39, 0x37,
+ 0x05, 0x4b, 0x61, 0x44, 0x3a, 0xbf, 0x43, 0x6c, 0xf5, 0xc6, 0xc1, 0x86,
+ 0x05, 0x4b, 0xc9, 0xc5, 0xc0, 0x7d, 0x00, 0x88, 0x20, 0xc6, 0xce, 0xb1,
+ 0x05, 0x4b, 0xc0, 0xc6, 0xd1, 0x57, 0x05, 0x4b, 0xa8, 0x0d, 0xc3, 0x6d,
+ 0x07, 0xc5, 0xd9, 0x61, 0x00, 0x89, 0x71, 0x16, 0xc3, 0x6d, 0x13, 0xc5,
+ 0xd6, 0x8c, 0x00, 0x89, 0x81, 0xc5, 0xda, 0xe7, 0x00, 0x89, 0x89, 0x12,
+ 0xc3, 0x6d, 0x1f, 0xc9, 0xad, 0x26, 0x00, 0x89, 0xa1, 0xc5, 0xb7, 0x9d,
+ 0x00, 0x89, 0xa9, 0x05, 0xc3, 0x6d, 0x2e, 0xc5, 0x90, 0xe4, 0x00, 0x89,
+ 0xd8, 0xc5, 0x90, 0xe4, 0x05, 0x4b, 0xd1, 0xc5, 0x79, 0xf2, 0x00, 0x8a,
+ 0xb0, 0xc5, 0x90, 0xe4, 0x05, 0x4b, 0xa1, 0x0d, 0xc3, 0x6d, 0x3a, 0x15,
+ 0xc3, 0x6d, 0x46, 0xc5, 0xd9, 0x61, 0x00, 0x88, 0xf9, 0x16, 0xc3, 0x6d,
+ 0x55, 0x05, 0xc3, 0x6d, 0x61, 0xc7, 0xba, 0x7b, 0x00, 0x89, 0x50, 0xc5,
+ 0xc0, 0x7d, 0x00, 0x8a, 0x11, 0xc6, 0xc1, 0x86, 0x00, 0x8a, 0x50, 0xc4,
+ 0x79, 0xf3, 0x00, 0x8a, 0x21, 0xc6, 0xca, 0x0e, 0x00, 0x8a, 0x31, 0xc6,
+ 0xba, 0x7c, 0x00, 0x8a, 0x58, 0xc4, 0xc6, 0x7a, 0x00, 0x8a, 0x41, 0xc6,
+ 0xc6, 0x79, 0x00, 0x8a, 0x48, 0xc5, 0xdb, 0xff, 0x05, 0x4b, 0x19, 0xc4,
+ 0xad, 0x2b, 0x05, 0x4b, 0x11, 0xc5, 0x79, 0xf2, 0x05, 0x4b, 0x09, 0xc5,
+ 0xda, 0xe7, 0x05, 0x4b, 0x01, 0xc6, 0x8e, 0xde, 0x00, 0x88, 0xb9, 0xc5,
+ 0xd6, 0x8c, 0x00, 0x8a, 0xf0, 0xc4, 0x79, 0xf3, 0x00, 0x89, 0x59, 0xc6,
+ 0xba, 0x7c, 0x00, 0x8a, 0xb8, 0x02, 0x43, 0x6d, 0x6d, 0x15, 0xc3, 0x6d,
+ 0x79, 0x05, 0x43, 0x6d, 0x85, 0xc3, 0x39, 0x37, 0x00, 0x89, 0xf1, 0x44,
+ 0x3a, 0xbf, 0x43, 0x6d, 0x91, 0xc4, 0xc6, 0x7a, 0x00, 0x8a, 0x81, 0xc6,
+ 0xc6, 0x79, 0x00, 0x8a, 0xa8, 0x91, 0x00, 0x8b, 0xb1, 0x97, 0x00, 0x8b,
+ 0xb9, 0xc2, 0x2c, 0x43, 0x00, 0x8d, 0x18, 0x02, 0x43, 0x6d, 0x9d, 0x87,
+ 0x00, 0x8b, 0x21, 0x02, 0x43, 0x6d, 0xb0, 0x91, 0x00, 0x8b, 0x3a, 0x03,
+ 0x6d, 0xbe, 0x02, 0x43, 0x6d, 0xc2, 0x02, 0x43, 0x6d, 0xdb, 0xc2, 0x27,
+ 0x51, 0x00, 0x8c, 0xb8, 0x02, 0x43, 0x6d, 0xfe, 0x02, 0x43, 0x6e, 0x0c,
+ 0x87, 0x00, 0x8c, 0x03, 0x03, 0x6e, 0x1f, 0x1b, 0xc3, 0x6e, 0x23, 0x91,
+ 0x00, 0x8c, 0x13, 0x03, 0x6e, 0x31, 0x97, 0x00, 0x8c, 0x18, 0x87, 0x00,
+ 0x8b, 0x58, 0x91, 0x00, 0x8b, 0x78, 0x83, 0x00, 0x8c, 0x4b, 0x03, 0x6e,
+ 0x37, 0xc5, 0xd9, 0xa7, 0x00, 0x8c, 0x59, 0xc2, 0x0c, 0x43, 0x00, 0x8c,
+ 0x63, 0x03, 0x6e, 0x3b, 0x97, 0x00, 0x8c, 0x69, 0xc3, 0xe5, 0xc9, 0x06,
+ 0xbd, 0xb0, 0x83, 0x00, 0x8c, 0xc3, 0x03, 0x6e, 0x3f, 0x1b, 0xc3, 0x6e,
+ 0x45, 0x91, 0x00, 0x8c, 0xd3, 0x03, 0x6e, 0x5b, 0x97, 0x00, 0x8c, 0xd9,
+ 0xc2, 0x2c, 0x43, 0x00, 0x8c, 0xe1, 0x8b, 0x06, 0xbe, 0x20, 0x02, 0x43,
+ 0x6e, 0x61, 0xc5, 0xda, 0xe7, 0x00, 0x8f, 0x11, 0x12, 0xc3, 0x6e, 0x8e,
+ 0xc5, 0xd6, 0x8c, 0x06, 0xbe, 0xe8, 0xc6, 0x8e, 0xde, 0x00, 0x8d, 0x49,
+ 0xc4, 0xad, 0x2b, 0x00, 0x8d, 0xdb, 0x03, 0x6e, 0x9a, 0xc5, 0xd6, 0x8c,
+ 0x00, 0x8e, 0x83, 0x03, 0x6e, 0x9e, 0xc8, 0xb7, 0x9a, 0x00, 0x8f, 0x71,
+ 0xc5, 0xb7, 0x9d, 0x00, 0x8f, 0x71, 0xc5, 0xd9, 0x61, 0x00, 0x8f, 0xf9,
+ 0xc6, 0xc0, 0x7c, 0x06, 0xbe, 0x6b, 0x03, 0x6e, 0xa4, 0xc5, 0xda, 0xe7,
+ 0x06, 0xbf, 0x01, 0xc5, 0x79, 0xf2, 0x06, 0xbf, 0x31, 0xc5, 0xdb, 0xff,
+ 0x06, 0xbf, 0xc8, 0x02, 0x43, 0x6e, 0xaa, 0x05, 0xc3, 0x6e, 0xcc, 0xc5,
+ 0x90, 0xe4, 0x00, 0x8d, 0x69, 0xc6, 0x8e, 0xde, 0x00, 0x8e, 0x29, 0x16,
+ 0xc3, 0x6e, 0xd8, 0xc4, 0xad, 0x2b, 0x00, 0x8e, 0x39, 0xc7, 0xca, 0x0d,
+ 0x00, 0x8e, 0x41, 0xc5, 0xd6, 0x8c, 0x06, 0xbe, 0x58, 0x02, 0x43, 0x6e,
+ 0xe4, 0x0d, 0xc3, 0x6f, 0x09, 0xc5, 0xda, 0xe7, 0x00, 0x8d, 0x8b, 0x03,
+ 0x6f, 0x1e, 0x12, 0xc3, 0x6f, 0x22, 0x15, 0xc3, 0x6f, 0x37, 0x16, 0xc3,
+ 0x6f, 0x43, 0xc5, 0x90, 0xe4, 0x00, 0x8d, 0xb1, 0xc5, 0xd9, 0x61, 0x00,
+ 0x8e, 0x69, 0x42, 0x0c, 0x43, 0x43, 0x6f, 0x52, 0xc6, 0x8e, 0xde, 0x00,
+ 0x8e, 0xd1, 0xc5, 0xd6, 0x8c, 0x00, 0x8e, 0xd9, 0x12, 0xc3, 0x6f, 0x61,
+ 0x15, 0xc3, 0x6f, 0x70, 0x05, 0xc3, 0x6f, 0x7c, 0xc5, 0x90, 0xe4, 0x00,
+ 0x8f, 0x09, 0xc5, 0xd9, 0x61, 0x06, 0xbe, 0xf0, 0x02, 0x43, 0x6f, 0x88,
+ 0x02, 0x43, 0x6f, 0xb6, 0x02, 0x43, 0x6f, 0xc8, 0x0d, 0xc3, 0x6f, 0xd4,
+ 0xcb, 0x8e, 0xd9, 0x00, 0x8f, 0x68, 0x02, 0x43, 0x6f, 0xe0, 0xc5, 0xd9,
+ 0x61, 0x00, 0x8f, 0xa9, 0xc5, 0xd6, 0x8c, 0x00, 0x8f, 0xb1, 0xc5, 0xda,
+ 0xe7, 0x00, 0x8f, 0xb9, 0xc5, 0x79, 0xf2, 0x00, 0x8f, 0xc0, 0x02, 0x43,
+ 0x6f, 0xec, 0xc4, 0x79, 0xf3, 0x01, 0x8b, 0xc1, 0xc6, 0xba, 0x7c, 0x01,
+ 0x8c, 0x20, 0xc6, 0x8e, 0xde, 0x01, 0x8b, 0xd1, 0xc5, 0xd9, 0x61, 0x01,
+ 0x8b, 0xd9, 0xc6, 0xc0, 0x7c, 0x01, 0x8b, 0xe1, 0xc5, 0x79, 0xf2, 0x01,
+ 0x8b, 0xe9, 0xc5, 0xdb, 0xff, 0x01, 0x8b, 0xf0, 0xc5, 0xd9, 0xca, 0x01,
+ 0x8b, 0x48, 0xc4, 0x79, 0xf3, 0x01, 0x89, 0xe3, 0x03, 0x70, 0x06, 0xc6,
+ 0xba, 0x7c, 0x01, 0x89, 0xf9, 0xc6, 0xca, 0x0e, 0x01, 0x8b, 0x60, 0xc6,
+ 0xc1, 0x86, 0x01, 0x89, 0xf1, 0xc5, 0xc0, 0x7d, 0x01, 0x8b, 0x50, 0xc4,
+ 0x79, 0xf3, 0x01, 0x8b, 0x71, 0xc6, 0xca, 0x0e, 0x01, 0x8b, 0x80, 0xc4,
+ 0xad, 0x2b, 0x01, 0x8a, 0x23, 0x03, 0x70, 0x0c, 0xc6, 0x8e, 0xde, 0x01,
+ 0x8b, 0x91, 0x16, 0xc3, 0x70, 0x10, 0xc5, 0xdb, 0xff, 0x01, 0x8b, 0xb0,
+ 0xc8, 0x90, 0xe1, 0x01, 0x8c, 0x30, 0x02, 0x43, 0x70, 0x1c, 0xc2, 0x19,
+ 0x2c, 0x01, 0x8c, 0x3b, 0x03, 0x70, 0x28, 0x8b, 0x01, 0x8c, 0x48, 0xc2,
+ 0x0c, 0x43, 0x01, 0x8c, 0x5b, 0x03, 0x70, 0x2c, 0x8b, 0x01, 0x8c, 0x60,
+ 0x83, 0x07, 0xfb, 0x61, 0x97, 0x07, 0xfb, 0x69, 0x91, 0x07, 0xfb, 0x70,
+ 0xc9, 0x57, 0x20, 0x0f, 0x64, 0xd8, 0xc8, 0x4b, 0x94, 0x0f, 0x64, 0x91,
+ 0xc7, 0x0d, 0x04, 0x0f, 0x64, 0x48, 0xc9, 0x57, 0x20, 0x0f, 0x64, 0xd0,
+ 0xc8, 0x4b, 0x94, 0x0f, 0x64, 0x89, 0xc7, 0x0d, 0x04, 0x0f, 0x64, 0x40,
+ 0xc9, 0x57, 0x20, 0x0f, 0x64, 0xc8, 0x00, 0x43, 0x70, 0x30, 0xc9, 0x57,
+ 0x20, 0x0f, 0x64, 0xc0, 0x00, 0x43, 0x70, 0x3c, 0xc9, 0x57, 0x20, 0x0f,
+ 0x64, 0xb8, 0x00, 0x43, 0x70, 0x48, 0xc9, 0x57, 0x20, 0x0f, 0x64, 0xb0,
+ 0x00, 0x43, 0x70, 0x54, 0x19, 0xc3, 0x70, 0x60, 0x0a, 0xc3, 0x70, 0x68,
+ 0xc2, 0x00, 0xc4, 0x01, 0x9f, 0x48, 0xc3, 0x09, 0x9e, 0x01, 0x9f, 0x1b,
+ 0x03, 0x70, 0x74, 0x0b, 0x43, 0x70, 0x7a, 0xc2, 0x22, 0xcc, 0x01, 0x9f,
+ 0x2b, 0x03, 0x70, 0x86, 0xc4, 0x18, 0x10, 0x01, 0x9f, 0x32, 0x03, 0x70,
+ 0x8c, 0xc4, 0x00, 0x2d, 0x01, 0x9f, 0x3b, 0x03, 0x70, 0x92, 0xc5, 0x66,
+ 0xb1, 0x01, 0x9f, 0x50, 0xc4, 0x14, 0x09, 0x01, 0x9f, 0x90, 0x91, 0x01,
+ 0x9a, 0xd1, 0x07, 0x43, 0x70, 0x98, 0xc3, 0x02, 0xdf, 0x01, 0x9a, 0xd9,
+ 0xc6, 0x52, 0xcd, 0x01, 0x9b, 0x28, 0xc4, 0x14, 0x09, 0x01, 0x9b, 0x30,
+ 0xc2, 0x00, 0x5f, 0x01, 0x9a, 0xe9, 0xc5, 0x14, 0x08, 0x01, 0x9b, 0x38,
+ 0xc4, 0x14, 0x09, 0x01, 0x9b, 0x40, 0xc4, 0x14, 0x09, 0x01, 0x9b, 0x48,
+ 0xc3, 0x03, 0x26, 0x01, 0x9b, 0x50, 0xd2, 0x4a, 0x2d, 0x0f, 0xd0, 0x31,
+ 0xce, 0x2a, 0xfe, 0x0f, 0xd0, 0x69, 0xdf, 0x0d, 0x00, 0x0f, 0xd0, 0xd9,
+ 0x16, 0x43, 0x70, 0xa7, 0xc5, 0xa8, 0xf7, 0x0f, 0xd2, 0x71, 0xc4, 0xde,
+ 0x83, 0x0f, 0xd2, 0x79, 0xc6, 0xca, 0xfd, 0x0f, 0xd2, 0x80, 0xce, 0x2a,
+ 0xfe, 0x0f, 0xd0, 0x49, 0xdb, 0x18, 0x03, 0x0f, 0xd1, 0x98, 0xc7, 0x02,
+ 0x54, 0x01, 0x34, 0x31, 0xc8, 0x3e, 0xe6, 0x01, 0x4f, 0x60, 0xce, 0x3d,
+ 0x7c, 0x01, 0x2f, 0xb9, 0xcd, 0x02, 0xb4, 0x01, 0x2f, 0xa0, 0xce, 0x3d,
+ 0x7c, 0x01, 0x2f, 0xb1, 0xcd, 0x02, 0xb4, 0x01, 0x2f, 0xa8, 0xce, 0x61,
+ 0x30, 0x01, 0x3f, 0x29, 0xce, 0x13, 0x5f, 0x01, 0x2d, 0x10, 0xcd, 0x6f,
+ 0x2b, 0x01, 0x3f, 0x21, 0x45, 0x00, 0x27, 0x43, 0x70, 0xb3, 0xce, 0x3d,
+ 0x7c, 0x01, 0x2f, 0x99, 0xcd, 0x02, 0xb4, 0x01, 0x2f, 0x80, 0x00, 0x43,
+ 0x70, 0xbf, 0xc9, 0x57, 0x20, 0x08, 0x4f, 0xa8, 0xc9, 0x57, 0x20, 0x08,
+ 0x4f, 0xa0, 0xc7, 0x0d, 0x04, 0x08, 0x4e, 0xc3, 0x03, 0x70, 0xcb, 0xc8,
+ 0x4b, 0x94, 0x08, 0x4f, 0x08, 0xc9, 0x57, 0x20, 0x08, 0x4f, 0x50, 0xc7,
+ 0x0d, 0x04, 0x08, 0x4e, 0xbb, 0x03, 0x70, 0xd1, 0xc8, 0x4b, 0x94, 0x08,
+ 0x4f, 0x00, 0xc9, 0x57, 0x20, 0x08, 0x4f, 0x48, 0x00, 0x43, 0x70, 0xd7,
+ 0xc9, 0x57, 0x20, 0x08, 0x4f, 0x40, 0x00, 0x43, 0x70, 0xe6, 0xc9, 0x57,
+ 0x20, 0x08, 0x4f, 0x38, 0x00, 0x43, 0x70, 0xf5, 0xc9, 0x57, 0x20, 0x08,
+ 0x4f, 0x30, 0x00, 0x43, 0x71, 0x04, 0xc9, 0x57, 0x20, 0x08, 0x4f, 0x28,
+ 0xc9, 0x57, 0x20, 0x08, 0x4f, 0x68, 0xc4, 0x03, 0xc8, 0x01, 0x4d, 0x79,
+ 0xc2, 0x02, 0xae, 0x01, 0x4d, 0x68, 0xc4, 0x03, 0xc8, 0x01, 0x4d, 0x71,
+ 0xc2, 0x02, 0xae, 0x01, 0x4d, 0x60, 0xc4, 0x00, 0x49, 0x01, 0x4d, 0x59,
+ 0xc5, 0x00, 0x2c, 0x01, 0x4d, 0x50, 0xc4, 0x00, 0x49, 0x01, 0x4d, 0x49,
+ 0xc5, 0x00, 0x2c, 0x01, 0x4d, 0x40, 0x83, 0x00, 0xc5, 0x29, 0xc2, 0x00,
+ 0xd0, 0x00, 0xc5, 0x20, 0xc2, 0x19, 0x2c, 0x00, 0xc5, 0x19, 0x83, 0x00,
+ 0xc4, 0xe0, 0xc2, 0x00, 0xd0, 0x00, 0xc5, 0x09, 0xc3, 0x40, 0xe2, 0x00,
+ 0xc4, 0xf8, 0x83, 0x00, 0xc5, 0x01, 0xc2, 0x01, 0x6f, 0x00, 0xc4, 0xf0,
+ 0xc5, 0x7c, 0x16, 0x00, 0xc5, 0x49, 0xc4, 0xe4, 0xa3, 0x00, 0xc4, 0x10,
+ 0xc2, 0x00, 0xd0, 0x00, 0xc4, 0x69, 0x83, 0x00, 0xc4, 0x60, 0xc3, 0xb4,
+ 0xa6, 0x00, 0xc4, 0xc9, 0xc2, 0x01, 0x6f, 0x00, 0xc4, 0xc0, 0x8e, 0x08,
+ 0xb0, 0x48, 0x94, 0x08, 0xb0, 0x38, 0xc4, 0x89, 0xfe, 0x00, 0xed, 0xf9,
+ 0x46, 0x45, 0x87, 0xc3, 0x71, 0x13, 0x46, 0x00, 0x8b, 0xc3, 0x71, 0x45,
+ 0xc9, 0xad, 0x80, 0x00, 0xea, 0xa1, 0xd3, 0x45, 0x14, 0x08, 0x3d, 0x59,
+ 0xc9, 0xab, 0x40, 0x08, 0x3d, 0x63, 0x03, 0x71, 0x51, 0xcb, 0x8d, 0x37,
+ 0x08, 0x3d, 0x70, 0xc2, 0x25, 0xa1, 0x00, 0xed, 0xf1, 0xc2, 0x01, 0xe2,
+ 0x00, 0xed, 0xa1, 0xc2, 0x00, 0x8e, 0x00, 0xec, 0xf1, 0xc2, 0x00, 0x75,
+ 0x00, 0xea, 0x88, 0x46, 0x00, 0x8b, 0x43, 0x71, 0x57, 0x46, 0x00, 0x8b,
+ 0x43, 0x71, 0x63, 0x47, 0x0b, 0x18, 0xc3, 0x71, 0x6f, 0xca, 0x45, 0x1d,
+ 0x00, 0xec, 0xe9, 0xc2, 0x00, 0x0a, 0x00, 0xeb, 0x09, 0x46, 0x17, 0x8d,
+ 0x43, 0x71, 0xa8, 0xc6, 0x10, 0x3f, 0x00, 0xed, 0xb9, 0x00, 0x43, 0x71,
+ 0xb4, 0x46, 0x00, 0x8b, 0xc3, 0x71, 0xc0, 0x05, 0xc3, 0x71, 0xcc, 0xc9,
+ 0xa8, 0x94, 0x00, 0xea, 0xc8, 0xc2, 0x00, 0x0a, 0x00, 0xed, 0x90, 0xc7,
+ 0xc3, 0x76, 0x00, 0xed, 0x89, 0xc3, 0x04, 0x87, 0x00, 0xea, 0xe9, 0xcc,
+ 0x8b, 0x95, 0x00, 0xea, 0xa9, 0xca, 0x1f, 0x59, 0x08, 0x3c, 0x28, 0xce,
+ 0x01, 0x19, 0x00, 0xed, 0x79, 0xc9, 0x6d, 0x45, 0x00, 0xed, 0x70, 0xca,
+ 0x1f, 0x59, 0x00, 0xed, 0x60, 0x46, 0x00, 0x8b, 0xc3, 0x71, 0xd8, 0xca,
+ 0x9f, 0xcc, 0x05, 0x3f, 0xc9, 0xc9, 0xab, 0x40, 0x08, 0x3c, 0xc9, 0xc9,
+ 0xa8, 0x67, 0x08, 0x3c, 0xd1, 0xc3, 0xe6, 0x41, 0x08, 0x3c, 0xf2, 0x03,
+ 0x71, 0xf9, 0xd2, 0x4d, 0x0f, 0x00, 0xed, 0x40, 0xc3, 0x01, 0xbb, 0x00,
+ 0xed, 0x29, 0xcc, 0x23, 0x3f, 0x00, 0xed, 0x20, 0xd4, 0x3b, 0xc4, 0x00,
+ 0xed, 0x0b, 0x03, 0x71, 0xff, 0x07, 0xc3, 0x72, 0x05, 0x46, 0x00, 0x8b,
+ 0xc3, 0x72, 0x11, 0xc9, 0xa8, 0x67, 0x08, 0x3c, 0x3a, 0x03, 0x72, 0x20,
+ 0xcb, 0x92, 0x5f, 0x08, 0x3c, 0x80, 0x48, 0x10, 0x2f, 0xc3, 0x72, 0x26,
+ 0xc8, 0xb7, 0xda, 0x08, 0x3c, 0x89, 0x46, 0x00, 0x8b, 0x43, 0x72, 0x36,
+ 0x45, 0x29, 0xb4, 0xc3, 0x72, 0x42, 0xc4, 0x38, 0x2c, 0x00, 0x17, 0x01,
+ 0xca, 0x1f, 0x59, 0x08, 0x3c, 0x98, 0xc2, 0x00, 0x74, 0x00, 0xea, 0xe1,
+ 0xc4, 0xde, 0x3f, 0x00, 0xea, 0x29, 0x87, 0x08, 0x3c, 0x18, 0x44, 0x05,
+ 0x76, 0xc3, 0x72, 0x4e, 0xcc, 0x23, 0x3f, 0x08, 0x3d, 0x10, 0xc3, 0x0a,
+ 0xe3, 0x05, 0x5a, 0xe3, 0x03, 0x72, 0x56, 0x46, 0x00, 0x8b, 0x43, 0x72,
+ 0x5c, 0x48, 0x10, 0x2f, 0x43, 0x72, 0x68, 0x97, 0x00, 0xe9, 0xe8, 0xcc,
+ 0x23, 0x3f, 0x05, 0x3f, 0xc0, 0xc7, 0xc3, 0x84, 0x00, 0xe9, 0x78, 0x87,
+ 0x00, 0xe9, 0x68, 0xc4, 0x2a, 0xa0, 0x05, 0x38, 0x01, 0xc5, 0xdb, 0xcd,
+ 0x05, 0x38, 0x11, 0xc2, 0x00, 0xe3, 0x05, 0x38, 0x21, 0xc2, 0x17, 0x99,
+ 0x05, 0x38, 0x30, 0xc4, 0x2a, 0xa0, 0x05, 0x38, 0x09, 0xc5, 0xdb, 0xcd,
+ 0x05, 0x38, 0x19, 0xc2, 0x00, 0xe3, 0x05, 0x38, 0x29, 0xc2, 0x17, 0x99,
+ 0x05, 0x38, 0x38, 0xcc, 0x23, 0x33, 0x00, 0x16, 0x0b, 0x03, 0x72, 0x70,
+ 0xc5, 0x0a, 0x8a, 0x00, 0x15, 0xe8, 0xe0, 0x01, 0x07, 0x08, 0x3d, 0xc8,
+ 0xcd, 0x36, 0x86, 0x00, 0x16, 0x61, 0xc6, 0x60, 0xb1, 0x00, 0x16, 0x69,
+ 0xcc, 0x1f, 0x0c, 0x00, 0x16, 0x71, 0xcc, 0x83, 0x0d, 0x00, 0x16, 0x79,
+ 0x42, 0x00, 0x58, 0xc3, 0x72, 0x76, 0x44, 0x00, 0x49, 0xc3, 0x72, 0x82,
+ 0xd9, 0x1d, 0x6f, 0x05, 0x38, 0xf9, 0x16, 0xc3, 0x72, 0x91, 0xcc, 0x4d,
+ 0x8d, 0x00, 0x17, 0x81, 0x42, 0x00, 0x2c, 0xc3, 0x72, 0x9d, 0xd1, 0x08,
+ 0xf6, 0x05, 0x3c, 0x40, 0xc5, 0x18, 0x25, 0x00, 0x15, 0xd1, 0xca, 0x2d,
+ 0x84, 0x00, 0x17, 0x70, 0xc9, 0x03, 0xde, 0x00, 0x16, 0x29, 0xc4, 0x32,
+ 0xbc, 0x00, 0x16, 0xa8, 0xcc, 0x07, 0xbb, 0x05, 0x38, 0xb9, 0xc5, 0x03,
+ 0x02, 0x05, 0x38, 0xc1, 0xce, 0x0e, 0xf1, 0x05, 0x38, 0xc8, 0x00, 0xc3,
+ 0x72, 0xa9, 0x44, 0x04, 0xce, 0x43, 0x72, 0xbb, 0x47, 0x19, 0x7a, 0xc3,
+ 0x72, 0xc7, 0xd2, 0x4e, 0x89, 0x05, 0x38, 0x91, 0xc8, 0x4e, 0x93, 0x00,
+ 0x17, 0x28, 0x47, 0x19, 0x7a, 0xc3, 0x72, 0xd3, 0xd2, 0x4e, 0x89, 0x05,
+ 0x38, 0xb1, 0xc8, 0x4e, 0x93, 0x00, 0x17, 0x48, 0xc8, 0x4e, 0x93, 0x05,
+ 0x38, 0x49, 0xd2, 0x4e, 0x89, 0x05, 0x38, 0x70, 0xc3, 0x11, 0x7e, 0x0e,
+ 0xb6, 0xd1, 0xc5, 0xd8, 0x8f, 0x0e, 0xb6, 0x80, 0xc3, 0x11, 0x7e, 0x0e,
+ 0xba, 0x71, 0xc5, 0xd8, 0x8f, 0x0e, 0xba, 0x20, 0xc3, 0x11, 0x7e, 0x0e,
+ 0xb9, 0xa1, 0xc5, 0xd8, 0x8f, 0x0e, 0xb9, 0x50, 0xc7, 0x00, 0x90, 0x0e,
+ 0xb9, 0x68, 0xc4, 0x18, 0x10, 0x0e, 0xbf, 0x99, 0xc2, 0x22, 0xcc, 0x0e,
+ 0xbf, 0x90, 0xc3, 0x0d, 0x14, 0x0e, 0xbf, 0x89, 0xc3, 0x09, 0x9e, 0x0e,
+ 0xbf, 0x80, 0xc4, 0x02, 0xde, 0x0e, 0xbf, 0x79, 0xc2, 0x02, 0xa0, 0x0e,
+ 0xbf, 0x70, 0xc8, 0x9c, 0x0e, 0x0e, 0xbe, 0x49, 0xc9, 0xaa, 0x9e, 0x0e,
+ 0xbe, 0x39, 0xd3, 0x43, 0x00, 0x0e, 0xbe, 0x18, 0x91, 0x0e, 0xb3, 0x23,
+ 0x03, 0x72, 0xdf, 0x92, 0x0e, 0xb3, 0x2b, 0x03, 0x72, 0xe3, 0x85, 0x0e,
+ 0xb2, 0xc3, 0x03, 0x72, 0xf3, 0x97, 0x0e, 0xb3, 0x53, 0x03, 0x72, 0xf9,
+ 0x96, 0x0e, 0xb3, 0x4b, 0x03, 0x72, 0xff, 0x95, 0x0e, 0xb3, 0x43, 0x03,
+ 0x73, 0x0b, 0x88, 0x0e, 0xb2, 0xdb, 0x03, 0x73, 0x11, 0x94, 0x0e, 0xb3,
+ 0x3b, 0x03, 0x73, 0x17, 0x9a, 0x0e, 0xb3, 0x6b, 0x03, 0x73, 0x1d, 0x90,
+ 0x0e, 0xb3, 0x1b, 0x03, 0x73, 0x21, 0x8f, 0x0e, 0xb3, 0x13, 0x03, 0x73,
+ 0x25, 0x8e, 0x0e, 0xb3, 0x0b, 0x03, 0x73, 0x29, 0x8d, 0x0e, 0xb3, 0x03,
+ 0x03, 0x73, 0x2f, 0x8b, 0x0e, 0xb2, 0xf3, 0x03, 0x73, 0x35, 0x87, 0x0e,
+ 0xb2, 0xd3, 0x03, 0x73, 0x3b, 0x9c, 0x0e, 0xb3, 0x7b, 0x03, 0x73, 0x47,
+ 0x86, 0x0e, 0xb2, 0xcb, 0x03, 0x73, 0x4d, 0x89, 0x0e, 0xb2, 0xe3, 0x03,
+ 0x73, 0x53, 0x84, 0x0e, 0xb2, 0xbb, 0x03, 0x73, 0x59, 0x83, 0x0e, 0xb2,
+ 0xb3, 0x03, 0x73, 0x5f, 0x9b, 0x0e, 0xb3, 0x71, 0x99, 0x0e, 0xb3, 0x61,
+ 0x98, 0x0e, 0xb3, 0x59, 0x93, 0x0e, 0xb3, 0x31, 0x8c, 0x0e, 0xb2, 0xf9,
+ 0x8a, 0x0e, 0xb2, 0xe8, 0x91, 0x0e, 0xb2, 0x53, 0x03, 0x73, 0x65, 0x92,
+ 0x0e, 0xb2, 0x5b, 0x03, 0x73, 0x69, 0x85, 0x0e, 0xb1, 0xf3, 0x03, 0x73,
+ 0x79, 0x97, 0x0e, 0xb2, 0x83, 0x03, 0x73, 0x7f, 0x96, 0x0e, 0xb2, 0x7b,
+ 0x03, 0x73, 0x85, 0x95, 0x0e, 0xb2, 0x73, 0x03, 0x73, 0x94, 0x94, 0x0e,
+ 0xb2, 0x6b, 0x03, 0x73, 0x9a, 0x9a, 0x0e, 0xb2, 0x9b, 0x03, 0x73, 0xa0,
+ 0x90, 0x0e, 0xb2, 0x4b, 0x03, 0x73, 0xa4, 0x8f, 0x0e, 0xb2, 0x43, 0x03,
+ 0x73, 0xa8, 0x8e, 0x0e, 0xb2, 0x3b, 0x03, 0x73, 0xac, 0x8d, 0x0e, 0xb2,
+ 0x33, 0x03, 0x73, 0xb2, 0x8b, 0x0e, 0xb2, 0x23, 0x03, 0x73, 0xb8, 0x87,
+ 0x0e, 0xb2, 0x03, 0x03, 0x73, 0xbe, 0x9c, 0x0e, 0xb2, 0xab, 0x03, 0x73,
+ 0xca, 0x86, 0x0e, 0xb1, 0xfb, 0x03, 0x73, 0xd0, 0x89, 0x0e, 0xb2, 0x13,
+ 0x03, 0x73, 0xd6, 0x84, 0x0e, 0xb1, 0xeb, 0x03, 0x73, 0xdc, 0x83, 0x0e,
+ 0xb1, 0xe3, 0x03, 0x73, 0xe2, 0x9b, 0x0e, 0xb2, 0xa1, 0x99, 0x0e, 0xb2,
+ 0x91, 0x98, 0x0e, 0xb2, 0x89, 0x93, 0x0e, 0xb2, 0x61, 0x8c, 0x0e, 0xb2,
+ 0x29, 0x8a, 0x0e, 0xb2, 0x19, 0x88, 0x0e, 0xb2, 0x08, 0x0f, 0x43, 0x73,
+ 0xe8, 0xc2, 0x00, 0xba, 0x0e, 0xbc, 0x39, 0xc2, 0x00, 0x0a, 0x0e, 0xbc,
+ 0x29, 0x8b, 0x0e, 0xbb, 0xf8, 0xc2, 0x00, 0x0a, 0x0e, 0xbc, 0x30, 0xc6,
+ 0x10, 0x3f, 0x0e, 0xbc, 0x20, 0xc2, 0x20, 0xec, 0x0e, 0xbc, 0x19, 0xc4,
+ 0x89, 0xfe, 0x0e, 0xbb, 0xb8, 0xc4, 0x1a, 0x73, 0x0e, 0xbc, 0x10, 0xca,
+ 0x91, 0x2c, 0x0e, 0xbc, 0x08, 0xc2, 0x01, 0x23, 0x0e, 0xbc, 0x00, 0x8b,
+ 0x0e, 0xbb, 0xe8, 0x97, 0x0e, 0xbb, 0xe0, 0x97, 0x0e, 0xbb, 0xd8, 0xc4,
+ 0xdd, 0x9a, 0x0e, 0xbb, 0xd0, 0xc4, 0x8b, 0x66, 0x0e, 0xbb, 0xc8, 0xc3,
+ 0x01, 0xbb, 0x0e, 0xbb, 0xc0, 0xc2, 0x01, 0x6f, 0x0e, 0xbb, 0xb1, 0xc6,
+ 0x10, 0x3f, 0x0e, 0xbb, 0xa0, 0xc3, 0x04, 0x87, 0x0e, 0xbb, 0xa8, 0xc4,
+ 0xdb, 0x4c, 0x0e, 0xbb, 0x98, 0xc4, 0x38, 0x2c, 0x0e, 0xbb, 0x90, 0xc3,
+ 0x04, 0x87, 0x0e, 0xbb, 0x88, 0xc4, 0xde, 0x3f, 0x0e, 0xbb, 0x80, 0x0f,
+ 0x43, 0x73, 0xf4, 0xc2, 0x00, 0xba, 0x0e, 0xbb, 0x69, 0xc2, 0x00, 0x0a,
+ 0x0e, 0xbb, 0x59, 0x8b, 0x0e, 0xbb, 0x28, 0xc2, 0x00, 0x0a, 0x0e, 0xbb,
+ 0x60, 0xc6, 0x10, 0x3f, 0x0e, 0xbb, 0x50, 0xc2, 0x20, 0xec, 0x0e, 0xbb,
+ 0x49, 0xc4, 0x89, 0xfe, 0x0e, 0xba, 0xea, 0x03, 0x74, 0x00, 0xc4, 0x1a,
+ 0x73, 0x0e, 0xbb, 0x40, 0xc2, 0x01, 0x23, 0x0e, 0xbb, 0x30, 0x8b, 0x0e,
+ 0xbb, 0x18, 0x97, 0x0e, 0xbb, 0x10, 0x97, 0x0e, 0xbb, 0x08, 0xc4, 0xdd,
+ 0x9a, 0x0e, 0xbb, 0x00, 0xc4, 0x8b, 0x66, 0x0e, 0xba, 0xf8, 0xc3, 0x01,
+ 0xbb, 0x0e, 0xba, 0xf0, 0xc2, 0x01, 0x6f, 0x0e, 0xba, 0xe1, 0xc6, 0x10,
+ 0x3f, 0x0e, 0xba, 0xd0, 0xc3, 0x04, 0x87, 0x0e, 0xba, 0xd8, 0xc4, 0xdb,
+ 0x4c, 0x0e, 0xba, 0xc8, 0xc4, 0x38, 0x2c, 0x0e, 0xba, 0xc0, 0xc3, 0x04,
+ 0x87, 0x0e, 0xba, 0xb8, 0xc4, 0xde, 0x3f, 0x0e, 0xba, 0xb0, 0x8e, 0x00,
+ 0x6b, 0xf2, 0x03, 0x74, 0x06, 0x90, 0x00, 0x6b, 0xd0, 0x08, 0xc3, 0x74,
+ 0x0a, 0x07, 0xc3, 0x74, 0x16, 0x52, 0x48, 0xa1, 0xc3, 0x74, 0x22, 0xc9,
+ 0xb2, 0xe1, 0x0e, 0x8f, 0x19, 0xca, 0xa6, 0x7a, 0x0e, 0x8f, 0x11, 0xcf,
+ 0x61, 0xc5, 0x0e, 0x8f, 0x09, 0xc6, 0xcb, 0x39, 0x0e, 0x8e, 0xf0, 0xc7,
+ 0xc8, 0xe7, 0x0e, 0x8e, 0xd8, 0x84, 0x0e, 0x8e, 0x91, 0x49, 0x32, 0x9d,
+ 0x43, 0x74, 0x2e, 0x42, 0x02, 0x2f, 0xc3, 0x74, 0x3a, 0xc3, 0x61, 0xff,
+ 0x0e, 0x88, 0x58, 0x1a, 0xc3, 0x74, 0x46, 0xcc, 0x82, 0x29, 0x0e, 0x88,
+ 0x00, 0x44, 0x28, 0xcb, 0xc3, 0x74, 0x4e, 0xcb, 0x96, 0x11, 0x0e, 0x88,
+ 0x28, 0xcc, 0x81, 0x69, 0x0e, 0x8e, 0xe9, 0x44, 0xa1, 0x2c, 0x43, 0x74,
+ 0x5a, 0xc7, 0xc7, 0xc8, 0x0e, 0x8e, 0xcb, 0x03, 0x74, 0x66, 0xc5, 0xda,
+ 0x4c, 0x0e, 0x8e, 0xa0, 0xca, 0xa5, 0xe4, 0x0e, 0x8e, 0xe0, 0x5b, 0x15,
+ 0x0f, 0xc3, 0x74, 0x6c, 0x59, 0x15, 0x11, 0x43, 0x74, 0x7b, 0x00, 0x43,
+ 0x74, 0x8a, 0x46, 0x01, 0x94, 0x43, 0x74, 0x96, 0x4c, 0x8b, 0xe9, 0xc3,
+ 0x74, 0xa2, 0xce, 0x74, 0x94, 0x0e, 0x88, 0xc0, 0x0b, 0xc3, 0x74, 0xae,
+ 0x4f, 0x60, 0x5d, 0x43, 0x74, 0xba, 0xc3, 0xe6, 0x2f, 0x0e, 0x8e, 0x79,
+ 0xc7, 0xb2, 0x1d, 0x0e, 0x8c, 0x90, 0x0f, 0xc3, 0x74, 0xc6, 0xc2, 0x0e,
+ 0x9a, 0x0e, 0x88, 0x60, 0x48, 0xbb, 0xc2, 0xc3, 0x74, 0xd2, 0x49, 0xb1,
+ 0x67, 0x43, 0x74, 0xde, 0xc4, 0x03, 0xc8, 0x0e, 0x8d, 0x91, 0xc2, 0x02,
+ 0xae, 0x0e, 0x8d, 0x88, 0x48, 0xb7, 0xd2, 0x43, 0x74, 0xea, 0x00, 0x43,
+ 0x74, 0xf6, 0xc5, 0x02, 0xc2, 0x0e, 0x8a, 0x99, 0xc5, 0x01, 0xfc, 0x0e,
+ 0x8a, 0x90, 0xc5, 0x5e, 0x2d, 0x0e, 0x89, 0xd1, 0xd0, 0x5e, 0x22, 0x0e,
+ 0x89, 0x48, 0x07, 0xc3, 0x75, 0x02, 0x42, 0x00, 0x3a, 0x43, 0x75, 0x0c,
+ 0xc6, 0x2c, 0xfc, 0x0e, 0x8b, 0xc9, 0xc4, 0xdf, 0x3b, 0x0e, 0x8b, 0xb9,
+ 0xc3, 0x1e, 0x19, 0x0e, 0x8b, 0xa9, 0xc4, 0xd8, 0xf4, 0x0e, 0x8b, 0x98,
+ 0x00, 0x43, 0x75, 0x16, 0xc5, 0x02, 0xc2, 0x0e, 0x8e, 0x01, 0xc5, 0x01,
+ 0xfc, 0x0e, 0x8d, 0xf8, 0xc3, 0x08, 0x7c, 0x0e, 0x8c, 0x89, 0x43, 0xb1,
+ 0x5e, 0x43, 0x75, 0x22, 0x10, 0xc3, 0x75, 0x2e, 0xcd, 0x7d, 0x03, 0x0e,
+ 0x88, 0xd0, 0xc4, 0x03, 0xc8, 0x0e, 0x89, 0x69, 0xc2, 0x02, 0xae, 0x0e,
+ 0x89, 0x60, 0x48, 0xb7, 0xd2, 0x43, 0x75, 0x3a, 0xc6, 0x05, 0x01, 0x0e,
+ 0x88, 0x88, 0xc2, 0x15, 0x10, 0x0e, 0x8d, 0xa3, 0x03, 0x75, 0x46, 0xc5,
+ 0xd6, 0x5a, 0x0e, 0x88, 0x51, 0xc7, 0xc9, 0x65, 0x0e, 0x88, 0x49, 0xcc,
+ 0x81, 0xd5, 0x0e, 0x88, 0x20, 0xca, 0x9e, 0xa0, 0x0e, 0x8d, 0x49, 0xc9,
+ 0xb2, 0x1b, 0x0e, 0x8c, 0x98, 0xc4, 0x35, 0x36, 0x0e, 0x89, 0x59, 0xc5,
+ 0xa2, 0xba, 0x0e, 0x89, 0x51, 0xc7, 0x44, 0x3c, 0x0e, 0x88, 0x08, 0x9f,
+ 0x0e, 0x89, 0x31, 0x9e, 0x0e, 0x89, 0x28, 0xc4, 0x23, 0x2e, 0x0e, 0x8a,
+ 0xe9, 0xc4, 0x2c, 0x0d, 0x0e, 0x89, 0xd8, 0xca, 0xa1, 0x2a, 0x0e, 0x8d,
+ 0x81, 0xc4, 0x23, 0x2e, 0x0e, 0x8a, 0xf1, 0xc4, 0x2c, 0x0d, 0x0e, 0x89,
+ 0xe0, 0xc9, 0xab, 0x13, 0x0e, 0x8d, 0x41, 0xc6, 0x2c, 0xfc, 0x0e, 0x8b,
+ 0xd1, 0xc4, 0xdf, 0x3b, 0x0e, 0x8b, 0xc1, 0xc3, 0x1e, 0x19, 0x0e, 0x8b,
+ 0xb1, 0xc4, 0xd8, 0xf4, 0x0e, 0x8b, 0xa0, 0xc4, 0x23, 0x2e, 0x0e, 0x8b,
+ 0x01, 0xc4, 0x2c, 0x0d, 0x0e, 0x89, 0xf0, 0xc4, 0x03, 0xc8, 0x0e, 0x89,
+ 0x79, 0xc2, 0x02, 0xae, 0x0e, 0x89, 0x70, 0x9e, 0x0e, 0x8c, 0xdb, 0x03,
+ 0x75, 0x4c, 0xa6, 0x0e, 0x8d, 0x19, 0xa5, 0x0e, 0x8d, 0x11, 0xa4, 0x0e,
+ 0x8d, 0x09, 0xa3, 0x0e, 0x8d, 0x01, 0xa2, 0x0e, 0x8c, 0xf9, 0xa1, 0x0e,
+ 0x8c, 0xf1, 0xa0, 0x0e, 0x8c, 0xe9, 0x9f, 0x0e, 0x8c, 0xe0, 0x57, 0x28,
+ 0xe4, 0xc3, 0x75, 0x54, 0xcb, 0x74, 0x97, 0x0e, 0x88, 0xb0, 0xc5, 0xd7,
+ 0x6d, 0x0e, 0x89, 0xb9, 0xc4, 0xe2, 0x4b, 0x0e, 0x89, 0xb0, 0xc9, 0xa8,
+ 0x79, 0x0e, 0x8c, 0x61, 0xcf, 0x61, 0xf2, 0x0e, 0x88, 0x38, 0x44, 0x61,
+ 0xf8, 0xc3, 0x75, 0x60, 0xd3, 0x44, 0x30, 0x0e, 0x88, 0x18, 0xc4, 0x23,
+ 0x2e, 0x0e, 0x8a, 0xf9, 0xc4, 0x2c, 0x0d, 0x0e, 0x89, 0xe9, 0x45, 0x2b,
+ 0x5f, 0x43, 0x75, 0x6c, 0xc5, 0xd7, 0x6d, 0x0e, 0x89, 0xc9, 0xc4, 0xe2,
+ 0x4b, 0x0e, 0x89, 0xc0, 0xc8, 0x01, 0x92, 0x01, 0x51, 0xd9, 0xcd, 0x76,
+ 0x35, 0x01, 0x51, 0xb9, 0xd1, 0x51, 0x56, 0x01, 0x51, 0xa9, 0xd0, 0x5b,
+ 0x92, 0x01, 0x51, 0xa0, 0xc8, 0x52, 0x09, 0x01, 0x51, 0x89, 0xc9, 0x16,
+ 0x14, 0x01, 0x51, 0x80, 0xc2, 0x00, 0xd0, 0x05, 0x53, 0x49, 0x83, 0x05,
+ 0x53, 0x40, 0xc2, 0x00, 0xd0, 0x05, 0x4f, 0x71, 0x83, 0x05, 0x4f, 0x68,
+ 0xc2, 0x00, 0xd0, 0x05, 0x4f, 0x21, 0x83, 0x00, 0x83, 0xf8, 0xc2, 0x00,
+ 0xc1, 0x05, 0x4f, 0x19, 0xc2, 0x19, 0x2c, 0x00, 0x83, 0xd1, 0x83, 0x00,
+ 0x83, 0xe0, 0x83, 0x00, 0x83, 0xa9, 0xc2, 0x00, 0xd0, 0x00, 0x83, 0xb0,
+ 0x83, 0x00, 0x83, 0xb9, 0xc2, 0x00, 0xd0, 0x05, 0x4f, 0x00, 0x83, 0x00,
+ 0x83, 0xc1, 0xc2, 0x00, 0xd0, 0x05, 0x4f, 0x08, 0xa5, 0x0d, 0x7f, 0xf1,
+ 0xa4, 0x0d, 0x7f, 0xe9, 0xa2, 0x0d, 0x7f, 0xd9, 0xa1, 0x0d, 0x7f, 0xd1,
+ 0xa0, 0x0d, 0x7f, 0xc9, 0x9f, 0x0d, 0x7f, 0xc1, 0x9e, 0x0d, 0x7f, 0xb8,
+ 0xa5, 0x0d, 0x7f, 0xb1, 0xa4, 0x0d, 0x7f, 0xa9, 0xa2, 0x0d, 0x7f, 0x99,
+ 0xa1, 0x0d, 0x7f, 0x91, 0xa0, 0x0d, 0x7f, 0x89, 0x9f, 0x0d, 0x7f, 0x80,
+ 0x94, 0x00, 0x67, 0x00, 0x8e, 0x00, 0x67, 0x08, 0xc5, 0xde, 0x4d, 0x01,
+ 0x79, 0xa1, 0xc4, 0xb6, 0xdb, 0x01, 0x7b, 0x40, 0xc5, 0x8c, 0xf0, 0x01,
+ 0x79, 0x99, 0xca, 0xa3, 0x14, 0x01, 0x7d, 0x58, 0xc4, 0x2a, 0xa0, 0x01,
+ 0x7c, 0x48, 0xc4, 0x03, 0x0b, 0x01, 0x79, 0x69, 0x86, 0x01, 0x7d, 0x48,
+ 0xc3, 0x38, 0x5b, 0x00, 0xcf, 0xf9, 0xc4, 0xe0, 0xaf, 0x00, 0xcf, 0x78,
+ 0xc3, 0x38, 0x5b, 0x00, 0xcf, 0xe1, 0xc4, 0xe0, 0xaf, 0x00, 0xcf, 0x60,
+ 0xc3, 0x38, 0x5b, 0x00, 0xcf, 0xf1, 0xc4, 0xe0, 0xaf, 0x00, 0xcf, 0x70,
+ 0xc3, 0x38, 0x5b, 0x00, 0xcf, 0xe9, 0xc4, 0xe0, 0xaf, 0x00, 0xcf, 0x68,
+ 0x44, 0xdf, 0x37, 0xc3, 0x75, 0x78, 0x43, 0x93, 0x74, 0x43, 0x75, 0x84,
+ 0xc3, 0x38, 0x5b, 0x00, 0xcf, 0xb9, 0xc4, 0xe0, 0xaf, 0x00, 0xcf, 0x38,
+ 0xc3, 0x38, 0x5b, 0x00, 0xcf, 0xb1, 0xc4, 0xe0, 0xaf, 0x00, 0xcf, 0x30,
+ 0x04, 0xc3, 0x75, 0x90, 0xc3, 0x71, 0xec, 0x00, 0xbf, 0xb9, 0xc4, 0xda,
+ 0x97, 0x00, 0xbf, 0xb0, 0x4b, 0x18, 0x04, 0xc3, 0x75, 0x9c, 0xdc, 0x13,
+ 0xf9, 0x0f, 0xd2, 0x38, 0xc9, 0x1f, 0x5a, 0x01, 0x49, 0x21, 0xd4, 0x3c,
+ 0x8c, 0x01, 0x49, 0x41, 0x49, 0x0d, 0x20, 0x43, 0x75, 0xa8, 0x43, 0x01,
+ 0x7b, 0xc3, 0x75, 0xb4, 0xc9, 0x1f, 0x5a, 0x01, 0x49, 0x19, 0xd4, 0x39,
+ 0xd0, 0x01, 0x49, 0x39, 0xd9, 0x20, 0x5d, 0x01, 0x49, 0x90, 0x87, 0x0f,
+ 0x3f, 0xc8, 0x87, 0x0f, 0x3f, 0xb0, 0x87, 0x0f, 0x3f, 0x88, 0x87, 0x05,
+ 0x59, 0x20, 0x83, 0x05, 0x59, 0x18, 0x83, 0x00, 0x96, 0x98, 0x87, 0x00,
+ 0x96, 0xa0, 0xc3, 0x11, 0x7e, 0x00, 0x1d, 0x4b, 0x03, 0x75, 0xc0, 0xc5,
+ 0xd8, 0x8f, 0x00, 0x1c, 0xfa, 0x03, 0x75, 0xc6, 0xcb, 0x8f, 0x9f, 0x00,
+ 0xff, 0x60, 0x46, 0x00, 0x8b, 0x43, 0x75, 0xcc, 0x46, 0x00, 0x8b, 0x43,
+ 0x75, 0xe6, 0xc2, 0x01, 0x6f, 0x00, 0x1c, 0xbb, 0x03, 0x76, 0x09, 0xc6,
+ 0x10, 0x3f, 0x00, 0x1c, 0xaa, 0x03, 0x76, 0x0f, 0xc4, 0xde, 0x3f, 0x00,
+ 0x1c, 0x8b, 0x03, 0x76, 0x15, 0xcc, 0x87, 0xed, 0x00, 0x1b, 0x90, 0xd1,
+ 0x51, 0x45, 0x00, 0x1b, 0xb1, 0x8b, 0x00, 0x1d, 0x01, 0xc2, 0x00, 0x0a,
+ 0x00, 0x1d, 0x31, 0xc2, 0x00, 0xba, 0x00, 0x1d, 0x40, 0xc4, 0x89, 0xfe,
+ 0x00, 0x1c, 0xc1, 0xc2, 0x20, 0xec, 0x00, 0x1d, 0x20, 0xc4, 0x1a, 0x73,
+ 0x00, 0x1d, 0x19, 0xc2, 0x01, 0x23, 0x00, 0x1f, 0xb9, 0xc2, 0x00, 0xd1,
+ 0x00, 0x1f, 0xd0, 0xc3, 0x11, 0x7e, 0x00, 0x1e, 0x4b, 0x03, 0x76, 0x1b,
+ 0xc5, 0xd8, 0x8f, 0x00, 0x1d, 0xfa, 0x03, 0x76, 0x21, 0x46, 0x00, 0x8b,
+ 0x43, 0x76, 0x27, 0x46, 0x00, 0x8b, 0x43, 0x76, 0x45, 0x46, 0x00, 0x8b,
+ 0x43, 0x76, 0x51, 0xc2, 0x01, 0x6f, 0x00, 0x1d, 0xbb, 0x03, 0x76, 0x6f,
+ 0xc6, 0x10, 0x3f, 0x00, 0x1d, 0xaa, 0x03, 0x76, 0x75, 0xc4, 0xde, 0x3f,
+ 0x00, 0x1d, 0x8b, 0x03, 0x76, 0x7b, 0x47, 0x78, 0xc0, 0x43, 0x76, 0x81,
+ 0xc4, 0xdb, 0x4c, 0x00, 0x1d, 0xa1, 0xc6, 0x51, 0x50, 0x00, 0x1e, 0xe8,
+ 0xc4, 0x89, 0xfe, 0x00, 0x1d, 0xc1, 0xc2, 0x20, 0xec, 0x00, 0x1e, 0x20,
+ 0xc4, 0x8b, 0x66, 0x00, 0x1d, 0xd1, 0xc4, 0x78, 0xc8, 0x00, 0x1e, 0xf8,
+ 0x8b, 0x00, 0x1e, 0x01, 0xc2, 0x00, 0x0a, 0x00, 0x1e, 0x31, 0xc2, 0x00,
+ 0xba, 0x00, 0x1e, 0x41, 0xd1, 0x51, 0x45, 0x00, 0x1b, 0xb8, 0xc4, 0x1a,
+ 0x73, 0x00, 0x1e, 0x19, 0xc5, 0xd6, 0xe6, 0x00, 0x1e, 0xd9, 0xc2, 0x01,
+ 0x23, 0x00, 0x1f, 0xc1, 0x03, 0x43, 0x76, 0x8d, 0x12, 0xc3, 0x76, 0x97,
+ 0xc3, 0x79, 0xe7, 0x00, 0xe9, 0x49, 0xc5, 0xdd, 0x99, 0x00, 0xe9, 0x39,
+ 0xc5, 0x51, 0x51, 0x00, 0xe9, 0x31, 0xc5, 0x9b, 0xd5, 0x05, 0x5b, 0x28,
+ 0xc7, 0x08, 0x79, 0x08, 0x0a, 0x01, 0x0a, 0xc3, 0x76, 0xa1, 0xc7, 0x3e,
+ 0x00, 0x08, 0x0a, 0x11, 0x49, 0x57, 0x21, 0x43, 0x76, 0xad, 0xc2, 0x00,
+ 0x5f, 0x08, 0x0a, 0x1b, 0x03, 0x76, 0xb9, 0xc3, 0x45, 0x6b, 0x08, 0x0a,
+ 0x22, 0x03, 0x76, 0xbd, 0x16, 0xc3, 0x76, 0xc1, 0xc7, 0x67, 0xc7, 0x08,
+ 0x0a, 0x81, 0xc4, 0x45, 0x6f, 0x08, 0x0a, 0xb8, 0xc3, 0x05, 0x14, 0x08,
+ 0x0a, 0xd1, 0xc3, 0x09, 0x41, 0x08, 0x0b, 0x11, 0xc5, 0x45, 0x69, 0x08,
+ 0x0b, 0x40, 0xc3, 0x05, 0x14, 0x08, 0x0a, 0xcb, 0x03, 0x76, 0xcd, 0x16,
+ 0xc3, 0x76, 0xd1, 0x42, 0x02, 0x09, 0x43, 0x76, 0xe1, 0x42, 0x02, 0x09,
+ 0xc3, 0x76, 0xed, 0xc3, 0x09, 0x41, 0x08, 0x0b, 0x02, 0x03, 0x76, 0xff,
+ 0xc9, 0x3d, 0xff, 0x08, 0x0a, 0xf0, 0xc5, 0x00, 0x48, 0x01, 0x54, 0x20,
+ 0xc4, 0x0d, 0x0e, 0x08, 0x79, 0x21, 0xc3, 0x02, 0xdf, 0x08, 0x78, 0xf8,
+ 0xc4, 0x18, 0x12, 0x08, 0x79, 0x19, 0x91, 0x08, 0x78, 0xf0, 0xc3, 0xb5,
+ 0x3e, 0x08, 0x78, 0xdb, 0x03, 0x77, 0x05, 0xc5, 0xd9, 0xde, 0x08, 0x78,
+ 0xb3, 0x03, 0x77, 0x0b, 0xc3, 0x20, 0x18, 0x08, 0x78, 0x7b, 0x03, 0x77,
+ 0x11, 0xc2, 0x01, 0x7f, 0x08, 0x78, 0x31, 0xc4, 0xe3, 0x27, 0x08, 0x78,
+ 0x19, 0xc5, 0xa5, 0xfd, 0x08, 0x78, 0x08, 0xc3, 0x11, 0xef, 0x08, 0x78,
+ 0xc9, 0x03, 0x43, 0x77, 0x17, 0x0e, 0xc3, 0x77, 0x23, 0xc3, 0x16, 0x5a,
+ 0x08, 0x78, 0x90, 0xc2, 0x00, 0x8e, 0x08, 0x78, 0x48, 0xc3, 0x1e, 0x1b,
+ 0x08, 0x53, 0xe1, 0xc2, 0x39, 0x8b, 0x08, 0x53, 0xd8, 0xc4, 0x40, 0x9c,
+ 0x08, 0x53, 0xc9, 0xc3, 0x77, 0x79, 0x08, 0x53, 0x98, 0x96, 0x08, 0x53,
+ 0x51, 0xc3, 0x77, 0x79, 0x08, 0x53, 0x71, 0xc4, 0xdc, 0x2d, 0x08, 0x53,
+ 0x78, 0xcc, 0x89, 0xb5, 0x08, 0x67, 0x88, 0xcc, 0x89, 0xb5, 0x08, 0x65,
+ 0x88, 0x89, 0x08, 0x61, 0x70, 0xc9, 0xb1, 0x28, 0x08, 0x1e, 0x42, 0x03,
+ 0x77, 0x2f, 0x83, 0x08, 0x1d, 0x19, 0x97, 0x08, 0x1d, 0x20, 0x83, 0x08,
+ 0x1d, 0x29, 0x97, 0x08, 0x1d, 0x30, 0x83, 0x08, 0x1d, 0x39, 0xcb, 0x95,
+ 0x09, 0x08, 0x1e, 0x58, 0x83, 0x08, 0x1d, 0x49, 0x8b, 0x08, 0x1d, 0x50,
+ 0x83, 0x08, 0x1d, 0x59, 0x97, 0x08, 0x1d, 0x61, 0xc2, 0x00, 0xd0, 0x08,
+ 0x1d, 0x80, 0x83, 0x08, 0x1d, 0x6b, 0x03, 0x77, 0x3b, 0x8b, 0x08, 0x1d,
+ 0x71, 0x97, 0x08, 0x1d, 0x78, 0x83, 0x08, 0x1d, 0x93, 0x03, 0x77, 0x44,
+ 0xc6, 0xcc, 0x11, 0x08, 0x1e, 0x78, 0x83, 0x08, 0x1d, 0xa1, 0x97, 0x08,
+ 0x1d, 0xa8, 0x83, 0x08, 0x1d, 0xb1, 0x8b, 0x08, 0x1d, 0xb9, 0x97, 0x08,
+ 0x1d, 0xc0, 0x83, 0x08, 0x1d, 0xd1, 0x8b, 0x08, 0x1d, 0xd8, 0x83, 0x08,
+ 0x1d, 0xe1, 0x97, 0x08, 0x1d, 0xe8, 0x83, 0x08, 0x1d, 0xf9, 0xc2, 0x00,
+ 0xd0, 0x08, 0x1e, 0x09, 0xc2, 0x0d, 0xf6, 0x08, 0x1e, 0x10, 0x19, 0xc3,
+ 0x77, 0x4a, 0xc2, 0x00, 0xc4, 0x08, 0x1e, 0x98, 0x00, 0x43, 0x77, 0x54,
+ 0xca, 0xa2, 0x6a, 0x0e, 0x7d, 0x30, 0x46, 0x00, 0x8b, 0x43, 0x77, 0x66,
+ 0xcc, 0x87, 0x39, 0x0e, 0x7c, 0xf8, 0x43, 0x94, 0x9b, 0x43, 0x77, 0x72,
+ 0xcb, 0x94, 0x9b, 0x0e, 0x7c, 0x50, 0xc5, 0x00, 0x2c, 0x0e, 0x78, 0xb1,
+ 0xc4, 0x00, 0x49, 0x0e, 0x78, 0x50, 0x97, 0x00, 0xc7, 0x88, 0x91, 0x00,
+ 0xc7, 0x60, 0x91, 0x00, 0xc7, 0x58, 0xc5, 0x01, 0x6f, 0x00, 0xc7, 0xa9,
+ 0xc5, 0xdb, 0xa5, 0x00, 0xc7, 0x70, 0x87, 0x00, 0xb1, 0x58, 0x87, 0x00,
+ 0xb2, 0x58, 0x87, 0x00, 0xb0, 0xf8, 0x87, 0x00, 0xae, 0x38, 0x83, 0x00,
+ 0xb3, 0x61, 0x8b, 0x00, 0xb3, 0x59, 0x87, 0x00, 0xb3, 0x4b, 0x03, 0x77,
+ 0x7e, 0x91, 0x00, 0xb3, 0x41, 0x97, 0x00, 0xb3, 0x38, 0x87, 0x00, 0xaf,
+ 0x28, 0x87, 0x00, 0xb2, 0xf0, 0x87, 0x00, 0xae, 0xf8, 0x8b, 0x00, 0xb1,
+ 0xc1, 0x87, 0x00, 0xb1, 0xb3, 0x03, 0x77, 0x82, 0x91, 0x00, 0xb1, 0xa9,
+ 0x97, 0x00, 0xb1, 0xa1, 0x83, 0x00, 0xb1, 0xc8, 0x87, 0x00, 0xb1, 0xe8,
+ 0x87, 0x00, 0xaf, 0xf0, 0x87, 0x00, 0xaf, 0xc0, 0x87, 0x00, 0xae, 0xc8,
+ 0x87, 0x00, 0xb1, 0x88, 0x87, 0x00, 0xb2, 0xb8, 0x83, 0x00, 0xc7, 0x10,
+ 0x91, 0x00, 0xc7, 0x08, 0x87, 0x00, 0xa6, 0xe9, 0x8b, 0x00, 0xa6, 0xfb,
+ 0x03, 0x77, 0x86, 0x91, 0x00, 0xa7, 0x1b, 0x03, 0x77, 0x8a, 0x83, 0x00,
+ 0xa7, 0x3a, 0x03, 0x77, 0x8e, 0x8b, 0x00, 0xa2, 0xd3, 0x03, 0x77, 0x92,
+ 0x87, 0x00, 0xa2, 0xc1, 0x91, 0x00, 0xa2, 0xf3, 0x03, 0x77, 0x96, 0x83,
+ 0x00, 0xa3, 0x12, 0x03, 0x77, 0x9a, 0x83, 0x00, 0xa9, 0xd3, 0x03, 0x77,
+ 0x9e, 0x91, 0x00, 0xa9, 0xb3, 0x03, 0x77, 0xa2, 0x8b, 0x00, 0xa9, 0x93,
+ 0x03, 0x77, 0xa6, 0x87, 0x00, 0xa9, 0x80, 0x83, 0x00, 0xa9, 0x13, 0x03,
+ 0x77, 0xaa, 0x8b, 0x00, 0xa8, 0xd3, 0x03, 0x77, 0xae, 0x87, 0x00, 0xa8,
+ 0xc1, 0x91, 0x00, 0xa8, 0xf2, 0x03, 0x77, 0xb2, 0x83, 0x00, 0xa8, 0x0b,
+ 0x03, 0x77, 0xb6, 0x87, 0x00, 0xa7, 0xb9, 0x8b, 0x00, 0xa7, 0xcb, 0x03,
+ 0x77, 0xba, 0x91, 0x00, 0xa7, 0xea, 0x03, 0x77, 0xbe, 0x83, 0x00, 0xa2,
+ 0x2b, 0x03, 0x77, 0xc2, 0x91, 0x00, 0xa2, 0x0b, 0x03, 0x77, 0xc6, 0x8b,
+ 0x00, 0xa1, 0xeb, 0x03, 0x77, 0xca, 0x87, 0x00, 0xa1, 0xd8, 0x91, 0x00,
+ 0xa4, 0xd8, 0x8b, 0x00, 0xa4, 0xb8, 0x83, 0x00, 0xa4, 0xf8, 0x83, 0x00,
+ 0xa0, 0xd0, 0x91, 0x00, 0xa0, 0xa8, 0x8b, 0x00, 0xa0, 0x88, 0x83, 0x00,
+ 0xa4, 0x08, 0x8b, 0x00, 0xa3, 0xc8, 0x91, 0x00, 0xa3, 0xe8, 0x87, 0x00,
+ 0xa5, 0x69, 0x8b, 0x00, 0xa5, 0x7b, 0x03, 0x77, 0xce, 0x91, 0x00, 0xa5,
+ 0x9b, 0x03, 0x77, 0xd2, 0x83, 0x00, 0xa5, 0xba, 0x03, 0x77, 0xd6, 0x83,
+ 0x00, 0xa6, 0x70, 0x83, 0x00, 0xb3, 0xe3, 0x03, 0x77, 0xda, 0x91, 0x00,
+ 0xb3, 0xd3, 0x03, 0x77, 0xde, 0x8b, 0x00, 0xb3, 0xc3, 0x03, 0x77, 0xe2,
+ 0xc2, 0x02, 0xe0, 0x00, 0xb3, 0xb8, 0xc3, 0x0d, 0x14, 0x08, 0x9b, 0x59,
+ 0xc3, 0x09, 0x9e, 0x08, 0x9b, 0x50, 0xc4, 0x02, 0xde, 0x08, 0x9b, 0x49,
+ 0xc2, 0x02, 0xa0, 0x08, 0x9b, 0x40, 0xc6, 0x05, 0x01, 0x00, 0x18, 0xb0,
+ 0xc5, 0x05, 0x02, 0x01, 0x07, 0x79, 0xc5, 0x00, 0xd4, 0x01, 0x06, 0xb8,
+ 0x03, 0xc3, 0x77, 0xe6, 0xc5, 0x05, 0x02, 0x00, 0x1a, 0xa8, 0xc5, 0x05,
+ 0x02, 0x00, 0x19, 0xc9, 0xc5, 0x00, 0xd4, 0x00, 0x1a, 0xb8, 0xc5, 0x05,
+ 0x02, 0x01, 0x07, 0x71, 0xc5, 0x00, 0xd4, 0x01, 0x06, 0xb0, 0xc5, 0x00,
+ 0xd4, 0x00, 0xef, 0xf1, 0xc5, 0x05, 0x02, 0x00, 0x1a, 0xa0, 0xc5, 0x00,
+ 0xd4, 0x00, 0x18, 0x71, 0xc5, 0x05, 0x02, 0x00, 0x1a, 0x40, 0xc5, 0x05,
+ 0x02, 0x00, 0xd6, 0x51, 0xc5, 0x00, 0xd4, 0x00, 0xd6, 0x48, 0xc9, 0x0f,
+ 0x6e, 0x07, 0xf1, 0x11, 0xca, 0x09, 0xb7, 0x07, 0xf1, 0x18, 0xc4, 0x00,
+ 0x49, 0x00, 0xef, 0xc1, 0xc5, 0x00, 0x2c, 0x00, 0x1a, 0xc0, 0xc2, 0x06,
+ 0xdb, 0x01, 0x66, 0x29, 0xc3, 0x07, 0x4a, 0x01, 0x66, 0xd8, 0xc3, 0x01,
+ 0x69, 0x01, 0x66, 0x69, 0x83, 0x01, 0x66, 0x7b, 0x03, 0x77, 0xf2, 0xc2,
+ 0x06, 0xdb, 0x01, 0x66, 0x98, 0xc2, 0x04, 0x2b, 0x01, 0x66, 0xf9, 0xc2,
+ 0x16, 0x5a, 0x01, 0x67, 0x08, 0xc2, 0x06, 0xdb, 0x01, 0x66, 0x21, 0xc3,
+ 0x07, 0x4a, 0x01, 0x66, 0xd0, 0xc3, 0x01, 0x69, 0x01, 0x66, 0x61, 0x83,
+ 0x01, 0x66, 0x73, 0x03, 0x77, 0xf6, 0xc2, 0x06, 0xdb, 0x01, 0x66, 0x90,
+ 0xc2, 0x04, 0x2b, 0x01, 0x66, 0xf1, 0xc2, 0x16, 0x5a, 0x01, 0x67, 0x00,
+ 0xc8, 0x02, 0x9f, 0x0f, 0xc8, 0x09, 0xc9, 0x3b, 0x79, 0x0f, 0xc8, 0x00,
+ 0x42, 0x00, 0x45, 0xc3, 0x77, 0xfa, 0x16, 0xc3, 0x78, 0x04, 0x08, 0xc3,
+ 0x78, 0x10, 0x15, 0xc3, 0x78, 0x1c, 0xc5, 0x06, 0xdb, 0x01, 0x92, 0xc1,
+ 0xc4, 0x26, 0x78, 0x01, 0x92, 0xc8, 0x42, 0x00, 0x45, 0xc3, 0x78, 0x28,
+ 0x16, 0xc3, 0x78, 0x32, 0x08, 0xc3, 0x78, 0x3e, 0x15, 0xc3, 0x78, 0x4a,
+ 0xc5, 0x06, 0xdb, 0x01, 0x95, 0x99, 0xc4, 0x26, 0x78, 0x01, 0x95, 0xa0,
+ 0x42, 0x00, 0x45, 0xc3, 0x78, 0x56, 0x16, 0xc3, 0x78, 0x60, 0x08, 0xc3,
+ 0x78, 0x6c, 0x15, 0xc3, 0x78, 0x78, 0xc5, 0x06, 0xdb, 0x01, 0x95, 0xe9,
+ 0xc4, 0x26, 0x78, 0x01, 0x95, 0xf0, 0x96, 0x01, 0x95, 0x09, 0xc5, 0x53,
+ 0x93, 0x01, 0x95, 0x70, 0xa0, 0x09, 0x2a, 0x01, 0x8f, 0x09, 0x1a, 0x30,
+ 0x94, 0x09, 0x19, 0xf9, 0xc7, 0x5d, 0x9b, 0x09, 0x19, 0xf1, 0x8e, 0x09,
+ 0x19, 0xe8, 0x86, 0x09, 0x29, 0xe9, 0x9f, 0x09, 0x19, 0x8a, 0x03, 0x78,
+ 0x84, 0x8e, 0x09, 0x19, 0x71, 0x46, 0x25, 0xd4, 0x43, 0x78, 0x8a, 0xd9,
+ 0x1f, 0xe0, 0x09, 0x15, 0xe9, 0xd9, 0x1a, 0xe7, 0x09, 0x15, 0xe0, 0xc7,
+ 0x25, 0xd4, 0x09, 0x15, 0xb0, 0xc5, 0x39, 0xc7, 0x09, 0x16, 0x68, 0xc4,
+ 0x96, 0x9c, 0x09, 0x16, 0x49, 0xc2, 0x00, 0x65, 0x09, 0x16, 0x40, 0xc2,
+ 0x38, 0xb6, 0x09, 0x29, 0x81, 0x84, 0x09, 0x15, 0x08, 0x0a, 0xc3, 0x78,
+ 0x96, 0xc2, 0x00, 0x65, 0x09, 0x14, 0xf8, 0xc2, 0x01, 0xe2, 0x09, 0x15,
+ 0x31, 0x94, 0x09, 0x15, 0x29, 0x8f, 0x09, 0x15, 0x21, 0x84, 0x09, 0x15,
+ 0x19, 0x9f, 0x09, 0x15, 0x10, 0xc2, 0x00, 0x33, 0x09, 0x14, 0xd9, 0xc2,
+ 0x06, 0x4e, 0x09, 0x14, 0xd0, 0x84, 0x09, 0x14, 0xc0, 0xc4, 0xdc, 0xae,
+ 0x09, 0x29, 0x61, 0xc7, 0x65, 0xd1, 0x09, 0x29, 0x59, 0xc2, 0x01, 0xe2,
+ 0x09, 0x12, 0xf9, 0xca, 0xa0, 0xb2, 0x09, 0x12, 0xf0, 0xc3, 0x02, 0x2c,
+ 0x09, 0x29, 0x41, 0xd0, 0x5e, 0x12, 0x09, 0x12, 0xb8, 0x17, 0xc3, 0x78,
+ 0xa2, 0x8b, 0x09, 0x1c, 0x92, 0x03, 0x78, 0xaa, 0x47, 0x25, 0xd4, 0x43,
+ 0x78, 0xb0, 0xc2, 0x05, 0x1d, 0x09, 0x12, 0xc9, 0x87, 0x09, 0x12, 0xc0,
+ 0xc2, 0x01, 0xe2, 0x09, 0x12, 0xa3, 0x03, 0x78, 0xbf, 0x90, 0x09, 0x12,
+ 0x98, 0xc2, 0x02, 0xad, 0x09, 0x13, 0xc8, 0xc2, 0x5d, 0xd4, 0x09, 0x13,
+ 0xb9, 0xc5, 0xda, 0x7e, 0x09, 0x13, 0xb1, 0xc2, 0x02, 0x6f, 0x09, 0x13,
+ 0xa9, 0xc2, 0x00, 0xdb, 0x09, 0x13, 0xa1, 0xc4, 0xe1, 0x67, 0x09, 0x13,
+ 0x99, 0xc8, 0x6a, 0x1e, 0x09, 0x13, 0x91, 0xc3, 0x6c, 0x49, 0x09, 0x13,
+ 0x89, 0xc3, 0x84, 0x21, 0x09, 0x13, 0x81, 0xc2, 0x01, 0x2d, 0x09, 0x13,
+ 0x79, 0xc6, 0xcb, 0x87, 0x09, 0x13, 0x70, 0xd9, 0x20, 0x12, 0x09, 0x13,
+ 0x38, 0xc3, 0x32, 0xbf, 0x09, 0x29, 0x09, 0xc2, 0x01, 0x30, 0x09, 0x29,
+ 0x01, 0xc9, 0xb1, 0x8b, 0x09, 0x11, 0xb8, 0xc2, 0x02, 0x1c, 0x09, 0x1c,
+ 0x69, 0xc2, 0x01, 0xdd, 0x09, 0x11, 0xe1, 0x83, 0x09, 0x11, 0xd2, 0x03,
+ 0x78, 0xc5, 0x16, 0xc3, 0x78, 0xcb, 0xc3, 0x0b, 0x64, 0x09, 0x28, 0xe3,
+ 0x03, 0x78, 0xd7, 0x0a, 0xc3, 0x78, 0xdd, 0xc4, 0x04, 0x59, 0x09, 0x28,
+ 0xd1, 0x15, 0xc3, 0x78, 0xe9, 0xc4, 0x73, 0x32, 0x09, 0x10, 0x03, 0x03,
+ 0x78, 0xf3, 0x10, 0xc3, 0x78, 0xf7, 0xca, 0xa7, 0xb0, 0x09, 0x10, 0x59,
+ 0x42, 0x00, 0xdb, 0xc3, 0x78, 0xff, 0x0d, 0xc3, 0x79, 0x0b, 0xc2, 0x03,
+ 0x4e, 0x09, 0x10, 0x21, 0xc9, 0x5d, 0x99, 0x09, 0x10, 0x11, 0xc3, 0x62,
+ 0x19, 0x09, 0x0f, 0xf9, 0xc2, 0x00, 0x65, 0x09, 0x0f, 0xf0, 0xca, 0x8d,
+ 0x2d, 0x09, 0x1c, 0x48, 0x17, 0xc3, 0x79, 0x15, 0xcd, 0x7b, 0x56, 0x09,
+ 0x28, 0xa1, 0xd5, 0x36, 0x5c, 0x09, 0x28, 0x99, 0xc2, 0x00, 0xec, 0x09,
+ 0x28, 0x91, 0xc3, 0x04, 0x2a, 0x09, 0x28, 0x83, 0x03, 0x79, 0x1f, 0xc2,
+ 0x01, 0x30, 0x09, 0x28, 0x79, 0xc3, 0xd5, 0x59, 0x09, 0x28, 0x70, 0x17,
+ 0xc3, 0x79, 0x25, 0x16, 0xc3, 0x79, 0x33, 0xc2, 0x00, 0xdb, 0x09, 0x28,
+ 0x31, 0xc3, 0xaa, 0xfe, 0x09, 0x28, 0x29, 0xce, 0x75, 0x04, 0x09, 0x28,
+ 0x21, 0xc3, 0x62, 0x19, 0x09, 0x28, 0x19, 0xc3, 0x02, 0x2c, 0x09, 0x28,
+ 0x10, 0x47, 0x03, 0x4c, 0x43, 0x79, 0x3d, 0xca, 0x9e, 0x00, 0x09, 0x26,
+ 0xa1, 0x09, 0xc3, 0x79, 0x55, 0x97, 0x09, 0x0f, 0x2b, 0x03, 0x79, 0x69,
+ 0x16, 0xc3, 0x79, 0x7f, 0x15, 0xc3, 0x79, 0x89, 0xc2, 0x02, 0x6f, 0x09,
+ 0x0e, 0xd9, 0x0f, 0xc3, 0x79, 0x93, 0x0e, 0xc3, 0x79, 0xa0, 0x0d, 0xc3,
+ 0x79, 0xb3, 0x0b, 0xc3, 0x79, 0xbe, 0x0a, 0xc3, 0x79, 0xcb, 0xc2, 0x00,
+ 0xc4, 0x09, 0x0e, 0x19, 0xc3, 0x14, 0x96, 0x09, 0x0e, 0x11, 0x04, 0xc3,
+ 0x79, 0xd8, 0x83, 0x09, 0x0d, 0xca, 0x03, 0x79, 0xe2, 0xd4, 0x39, 0xbc,
+ 0x09, 0x0f, 0x80, 0xc9, 0xa6, 0x17, 0x09, 0x0f, 0x70, 0x8e, 0x09, 0x1c,
+ 0x28, 0x00, 0x43, 0x79, 0xf6, 0xd1, 0x55, 0x1f, 0x09, 0x0b, 0x30, 0xc2,
+ 0x00, 0xac, 0x09, 0x0b, 0xb9, 0xc2, 0x04, 0x2b, 0x09, 0x0b, 0xb1, 0xc2,
+ 0x05, 0xc3, 0x09, 0x0b, 0xa8, 0xcf, 0x6a, 0x17, 0x09, 0x08, 0xd0, 0x45,
+ 0x03, 0x4e, 0xc3, 0x7a, 0x02, 0xc3, 0x58, 0xf6, 0x09, 0x08, 0xa8, 0x0a,
+ 0xc3, 0x7a, 0x14, 0xc2, 0x01, 0xdf, 0x09, 0x07, 0x41, 0x03, 0x43, 0x7a,
+ 0x1f, 0x87, 0x09, 0x26, 0x23, 0x03, 0x7a, 0x27, 0xc2, 0x05, 0x1d, 0x09,
+ 0x07, 0x02, 0x03, 0x7a, 0x2d, 0xc3, 0x5d, 0xd1, 0x09, 0x26, 0x19, 0x8b,
+ 0x09, 0x06, 0xf9, 0xc9, 0xa7, 0xb1, 0x09, 0x06, 0xf0, 0xc2, 0x53, 0x31,
+ 0x09, 0x26, 0x11, 0x83, 0x09, 0x06, 0xea, 0x03, 0x7a, 0x33, 0x17, 0xc3,
+ 0x7a, 0x3a, 0xc2, 0x02, 0xfb, 0x09, 0x06, 0xd3, 0x03, 0x7a, 0x46, 0x03,
+ 0x43, 0x7a, 0x4c, 0x03, 0xc3, 0x7a, 0x56, 0xc3, 0xc5, 0xa4, 0x09, 0x06,
+ 0xa9, 0xc9, 0xaa, 0x44, 0x09, 0x06, 0xa0, 0x83, 0x09, 0x25, 0xdb, 0x03,
+ 0x7a, 0x63, 0x8b, 0x09, 0x06, 0x6a, 0x03, 0x7a, 0x70, 0xc3, 0x1a, 0x52,
+ 0x09, 0x25, 0xd1, 0x90, 0x09, 0x06, 0x4b, 0x03, 0x7a, 0x7d, 0x8e, 0x09,
+ 0x06, 0x3a, 0x03, 0x7a, 0x83, 0x17, 0xc3, 0x7a, 0x89, 0x8b, 0x09, 0x06,
+ 0x23, 0x03, 0x7a, 0x93, 0x83, 0x09, 0x06, 0x18, 0x03, 0xc3, 0x7a, 0x99,
+ 0xc2, 0x00, 0x33, 0x09, 0x06, 0x0a, 0x03, 0x7a, 0xa9, 0xc2, 0x01, 0xe2,
+ 0x09, 0x05, 0xeb, 0x03, 0x7a, 0xaf, 0x90, 0x09, 0x05, 0xe3, 0x03, 0x7a,
+ 0xb6, 0xd0, 0x58, 0xf2, 0x09, 0x05, 0xd9, 0x46, 0x25, 0xd4, 0x43, 0x7a,
+ 0xbc, 0x86, 0x09, 0x07, 0x5a, 0x03, 0x7a, 0xce, 0xd3, 0x40, 0xa0, 0x09,
+ 0x06, 0xb9, 0xc7, 0x6a, 0x1f, 0x09, 0x06, 0xb0, 0xcb, 0x8c, 0xf5, 0x09,
+ 0x05, 0x80, 0xc8, 0x0b, 0x08, 0x09, 0x05, 0x68, 0xca, 0x8c, 0xf6, 0x09,
+ 0x05, 0x20, 0x8f, 0x09, 0x24, 0xfb, 0x03, 0x7a, 0xd4, 0xc5, 0xdc, 0x36,
+ 0x09, 0x24, 0xf0, 0xc4, 0x5d, 0xd2, 0x09, 0x24, 0xe3, 0x03, 0x7a, 0xda,
+ 0x94, 0x09, 0x24, 0xd8, 0xc2, 0x01, 0xe2, 0x09, 0x24, 0xb1, 0xc7, 0xc4,
+ 0x4f, 0x09, 0x24, 0xa8, 0xc8, 0x10, 0x61, 0x09, 0x24, 0x78, 0x47, 0x5d,
+ 0xd5, 0xc3, 0x7a, 0xe0, 0xc2, 0x01, 0xe2, 0x09, 0x03, 0x68, 0x97, 0x09,
+ 0x03, 0x2b, 0x03, 0x7a, 0xec, 0x83, 0x09, 0x03, 0x20, 0xc8, 0x36, 0x68,
+ 0x09, 0x03, 0x10, 0xc2, 0x04, 0x3d, 0x09, 0x02, 0xf9, 0x8b, 0x09, 0x02,
+ 0xeb, 0x03, 0x7a, 0xf6, 0x83, 0x09, 0x02, 0xda, 0x03, 0x7a, 0xfc, 0x8b,
+ 0x09, 0x02, 0xd1, 0xc4, 0x4f, 0x68, 0x09, 0x02, 0xc8, 0xc3, 0x01, 0xc3,
+ 0x09, 0x02, 0xc1, 0xca, 0x97, 0xbe, 0x09, 0x02, 0xb8, 0xdf, 0x0d, 0x1f,
+ 0x09, 0x01, 0xe8, 0xe0, 0x0b, 0x47, 0x09, 0x01, 0xd8, 0xc2, 0x02, 0x1c,
+ 0x09, 0x14, 0x69, 0xc2, 0x04, 0x3d, 0x09, 0x14, 0x61, 0xc3, 0x45, 0xb0,
+ 0x09, 0x14, 0x58, 0xc8, 0x20, 0xa9, 0x00, 0x26, 0xe9, 0xc8, 0x25, 0xfb,
+ 0x00, 0x24, 0xb8, 0xc8, 0x20, 0xa9, 0x00, 0x26, 0xe1, 0xc8, 0x25, 0xfb,
+ 0x00, 0x24, 0xb0, 0xc7, 0xc7, 0xeb, 0x00, 0x6d, 0x41, 0xc6, 0x8e, 0x9c,
+ 0x00, 0x6d, 0x70, 0xc7, 0xc4, 0x25, 0x00, 0x6d, 0x51, 0xc6, 0x8e, 0x9c,
+ 0x00, 0x6d, 0x80, 0xc5, 0x20, 0xe5, 0x0e, 0xce, 0xa1, 0xc7, 0xb7, 0x3a,
+ 0x0e, 0xce, 0x28, 0xc5, 0x20, 0xe5, 0x0e, 0xce, 0x99, 0xc7, 0xb7, 0x3a,
+ 0x0e, 0xce, 0x20, 0xc5, 0x20, 0xe5, 0x0e, 0xce, 0x91, 0xc7, 0xb7, 0x3a,
+ 0x0e, 0xce, 0x18, 0xc5, 0xdd, 0x17, 0x0e, 0xcd, 0x99, 0xca, 0x9e, 0x8c,
+ 0x0e, 0xcd, 0x60, 0xc5, 0xdd, 0x17, 0x0e, 0xcd, 0x91, 0xca, 0x9e, 0x8c,
+ 0x0e, 0xcd, 0x58, 0xc5, 0xdd, 0x17, 0x0e, 0xcd, 0x89, 0xca, 0x9e, 0x8c,
+ 0x0e, 0xcd, 0x50, 0xc9, 0x51, 0x1a, 0x0e, 0xd3, 0x30, 0xc9, 0x51, 0x1a,
+ 0x0e, 0xd3, 0x20, 0xcb, 0x57, 0x45, 0x0e, 0xd1, 0x19, 0xc6, 0x00, 0x58,
+ 0x0e, 0xd1, 0x10, 0xcb, 0x57, 0x45, 0x0e, 0xd1, 0x31, 0xc6, 0x00, 0x58,
+ 0x0e, 0xd1, 0x28, 0xc4, 0x0e, 0x65, 0x0e, 0xc8, 0x21, 0xc5, 0x0e, 0xce,
+ 0x0e, 0xc7, 0xab, 0x03, 0x7b, 0x02, 0xc5, 0x06, 0x82, 0x0e, 0xc0, 0x03,
+ 0x03, 0x7b, 0x06, 0x47, 0x04, 0xcb, 0xc3, 0x7b, 0x0a, 0x45, 0x00, 0x9d,
+ 0xc3, 0x7b, 0x2f, 0x47, 0x13, 0x95, 0xc3, 0x7b, 0x5c, 0xdb, 0x18, 0xdb,
+ 0x0e, 0xc2, 0x50, 0x46, 0xd1, 0x5d, 0xc3, 0x7b, 0x84, 0x46, 0x0e, 0xce,
+ 0xc3, 0x7b, 0x99, 0xc4, 0x0e, 0x65, 0x0e, 0xc2, 0xe3, 0x03, 0x7b, 0xab,
+ 0xd4, 0x3a, 0xac, 0x0e, 0xc2, 0xd9, 0x08, 0x43, 0x7b, 0xaf, 0x00, 0x43,
+ 0x7b, 0xc1, 0x00, 0x43, 0x7b, 0xd9, 0xc6, 0x13, 0x95, 0x0e, 0xc5, 0x99,
+ 0xdd, 0x11, 0x17, 0x0e, 0xc5, 0x68, 0xc5, 0x06, 0x82, 0x0e, 0xc5, 0x1b,
+ 0x03, 0x7b, 0xe5, 0xc2, 0x02, 0xae, 0x0e, 0xc4, 0xb0, 0xc5, 0x06, 0x82,
+ 0x0e, 0xc0, 0x23, 0x03, 0x7b, 0xee, 0xc6, 0x04, 0xcb, 0x0e, 0xc6, 0x2b,
+ 0x03, 0x7b, 0xf2, 0xc4, 0x00, 0x9d, 0x0e, 0xc5, 0x3b, 0x03, 0x7b, 0xf8,
+ 0xc6, 0x13, 0x95, 0x0e, 0xc4, 0x53, 0x03, 0x7b, 0xfe, 0x46, 0x0e, 0xce,
+ 0xc3, 0x7c, 0x02, 0xc8, 0xbc, 0x62, 0x0e, 0xc4, 0x11, 0xc4, 0x05, 0x75,
+ 0x0e, 0xc3, 0xdb, 0x03, 0x7c, 0x11, 0xc5, 0x03, 0x13, 0x0e, 0xc3, 0xf1,
+ 0x08, 0x43, 0x7c, 0x15, 0x47, 0x04, 0xcb, 0xc3, 0x7c, 0x21, 0x52, 0x3c,
+ 0x00, 0xc3, 0x7c, 0x30, 0xca, 0x4c, 0x69, 0x0e, 0xc5, 0xc9, 0xc8, 0xbc,
+ 0x5a, 0x0e, 0xc3, 0x50, 0x00, 0x43, 0x7c, 0x42, 0x00, 0x43, 0x7c, 0x6f,
+ 0xde, 0x0e, 0xc8, 0x0e, 0xc7, 0x49, 0xdc, 0x13, 0x89, 0x0e, 0xc6, 0xb3,
+ 0x03, 0x7c, 0x81, 0x46, 0x0e, 0xce, 0xc3, 0x7c, 0x87, 0xc8, 0xbc, 0x62,
+ 0x0e, 0xc3, 0x41, 0xd6, 0x18, 0xdb, 0x0e, 0xc2, 0x48, 0x47, 0x04, 0xcb,
+ 0xc3, 0x7c, 0x93, 0xc5, 0x06, 0x82, 0x0e, 0xc0, 0x0b, 0x03, 0x7c, 0xa2,
+ 0xcb, 0x13, 0x90, 0x0e, 0xc5, 0x89, 0x47, 0x13, 0x95, 0x43, 0x7c, 0xa6,
+ 0xc7, 0x27, 0xb2, 0x0e, 0xc3, 0xd1, 0xc4, 0x0e, 0xe2, 0x0e, 0xc3, 0xc0,
+ 0xc5, 0x0e, 0xd4, 0x0e, 0xd0, 0x29, 0xc8, 0x45, 0x27, 0x0e, 0xd0, 0x18,
+ 0xc5, 0x0e, 0xd4, 0x0e, 0xd0, 0x21, 0xc4, 0x00, 0x70, 0x0e, 0xd0, 0x11,
+ 0xc8, 0x45, 0x27, 0x0e, 0xd0, 0x08, 0xc4, 0x03, 0x14, 0x0e, 0xce, 0xe9,
+ 0xc4, 0xa2, 0x4c, 0x0e, 0xce, 0xe0, 0x46, 0x20, 0xe5, 0xc3, 0x7c, 0xb2,
+ 0x48, 0xb7, 0x3a, 0x43, 0x7c, 0xbe, 0xc5, 0x17, 0x14, 0x0e, 0xcb, 0x3b,
+ 0x03, 0x7c, 0xca, 0xc6, 0x01, 0xdb, 0x0e, 0xcb, 0x31, 0xc5, 0x03, 0x13,
+ 0x0e, 0xcb, 0x28, 0x46, 0x17, 0x14, 0xc3, 0x7c, 0xd0, 0x46, 0x03, 0x13,
+ 0x43, 0x7c, 0xdc, 0x46, 0x17, 0x14, 0xc3, 0x7c, 0xe8, 0x46, 0x03, 0x13,
+ 0x43, 0x7c, 0xf4, 0x47, 0x2c, 0x2e, 0xc3, 0x7d, 0x00, 0xcc, 0x8a, 0x39,
+ 0x0e, 0xce, 0x49, 0xcc, 0x81, 0xe1, 0x0e, 0xce, 0x40, 0x46, 0x17, 0x14,
+ 0xc3, 0x7d, 0x0c, 0x46, 0x03, 0x13, 0x43, 0x7d, 0x18, 0xc2, 0x00, 0x15,
+ 0x0e, 0xce, 0xc0, 0x46, 0x20, 0xe5, 0xc3, 0x7d, 0x24, 0x48, 0xb7, 0x3a,
+ 0x43, 0x7d, 0x30, 0xc5, 0x17, 0x14, 0x0e, 0xcd, 0xb1, 0xc6, 0x01, 0xdb,
+ 0x0e, 0xcd, 0xa9, 0xc5, 0x03, 0x13, 0x0e, 0xcd, 0xa0, 0xc5, 0xdd, 0x17,
+ 0x0e, 0xcd, 0x81, 0xca, 0x9e, 0x8c, 0x0e, 0xcd, 0x48, 0x47, 0x2c, 0x2e,
+ 0xc3, 0x7d, 0x3c, 0x47, 0x00, 0x58, 0x43, 0x7d, 0x4e, 0x0a, 0xc3, 0x7d,
+ 0x60, 0x42, 0x00, 0x8e, 0xc3, 0x7d, 0x6c, 0x48, 0x15, 0x02, 0x43, 0x7d,
+ 0x78, 0xc6, 0x01, 0xdb, 0x0e, 0xcd, 0x09, 0xc5, 0x03, 0x13, 0x0e, 0xcd,
+ 0x00, 0xc5, 0x17, 0x14, 0x0e, 0xc9, 0x63, 0x03, 0x7d, 0x8d, 0xc6, 0x01,
+ 0xdb, 0x0e, 0xc9, 0x59, 0xc5, 0x03, 0x13, 0x0e, 0xc9, 0x50, 0xc2, 0x00,
+ 0x15, 0x0e, 0xcb, 0x20, 0xc2, 0x00, 0x15, 0x0e, 0xcb, 0x00, 0xc5, 0x03,
+ 0x13, 0x0e, 0xc9, 0x31, 0xc5, 0x17, 0x14, 0x0e, 0xc9, 0x28, 0xd0, 0x59,
+ 0x02, 0x08, 0xae, 0x59, 0xd2, 0x48, 0x8f, 0x08, 0xae, 0x50, 0xc8, 0x0d,
+ 0x03, 0x01, 0x0b, 0xf0, 0x00, 0x43, 0x7d, 0x93, 0xdf, 0x0d, 0x3e, 0x01,
+ 0x4b, 0x79, 0x06, 0x43, 0x7d, 0xa5, 0xd2, 0x05, 0xd4, 0x0f, 0xc0, 0x19,
+ 0xd5, 0x03, 0xd2, 0x0f, 0xc0, 0x98, 0xca, 0x03, 0x87, 0x01, 0x0d, 0x99,
+ 0xc9, 0x01, 0x88, 0x01, 0x0d, 0x90, 0xd6, 0x2e, 0x3e, 0x01, 0x1b, 0xe1,
+ 0xc3, 0x13, 0x1d, 0x01, 0x15, 0xf0, 0xc9, 0x33, 0xad, 0x01, 0x4c, 0x90,
+ 0x45, 0x00, 0x8c, 0xc3, 0x7d, 0xab, 0xc6, 0x10, 0x9d, 0x01, 0x5b, 0x91,
+ 0x44, 0x00, 0x9a, 0x43, 0x7d, 0xd5, 0xc3, 0x14, 0xa7, 0x01, 0x48, 0xb3,
+ 0x03, 0x7d, 0xdb, 0xd2, 0x05, 0xd5, 0x01, 0x5f, 0x70, 0xcf, 0x62, 0x3d,
+ 0x01, 0x4b, 0x69, 0x46, 0x00, 0xd4, 0xc3, 0x7d, 0xe1, 0xc6, 0x10, 0x9d,
+ 0x01, 0x4a, 0xb9, 0xc8, 0xae, 0xbc, 0x01, 0x4a, 0xf8, 0x46, 0x00, 0xd4,
+ 0xc3, 0x7d, 0xe7, 0xc8, 0xae, 0xbc, 0x01, 0x4a, 0xd9, 0xc6, 0x10, 0x9d,
+ 0x01, 0x4a, 0x98, 0xcf, 0x2c, 0x35, 0x01, 0x48, 0xa1, 0xd6, 0x2d, 0x62,
+ 0x01, 0x48, 0xa8, 0xc2, 0x02, 0xfa, 0x00, 0x70, 0x11, 0xc3, 0x05, 0x21,
+ 0x00, 0x70, 0x19, 0xc3, 0x0c, 0x26, 0x00, 0x70, 0x21, 0xc2, 0x00, 0x45,
+ 0x00, 0x70, 0x28, 0xc3, 0x93, 0x9b, 0x00, 0x72, 0x19, 0xc4, 0xcb, 0x97,
+ 0x00, 0x72, 0x20, 0x87, 0x00, 0x71, 0xb8, 0x03, 0xc3, 0x7d, 0xef, 0xc3,
+ 0x38, 0x86, 0x00, 0x70, 0xb1, 0xc3, 0x08, 0x48, 0x00, 0x70, 0xc0, 0xc3,
+ 0x38, 0x86, 0x00, 0x70, 0xe1, 0xc2, 0x00, 0xd1, 0x00, 0x70, 0xf0, 0xc2,
+ 0x01, 0x23, 0x00, 0x72, 0x49, 0xc2, 0x00, 0x2c, 0x00, 0x72, 0x50, 0xcc,
+ 0x00, 0xfb, 0x07, 0xe0, 0xb1, 0xcb, 0x10, 0xb5, 0x07, 0xe5, 0x40, 0x44,
+ 0x19, 0x6a, 0xc3, 0x7d, 0xf9, 0xce, 0x43, 0x77, 0x07, 0xed, 0x29, 0xd7,
+ 0x26, 0xea, 0x07, 0xed, 0x38, 0xcc, 0x00, 0xfb, 0x07, 0xe0, 0xa9, 0xcb,
+ 0x10, 0xb5, 0x07, 0xe5, 0x38, 0xd7, 0x26, 0xea, 0x07, 0xed, 0x31, 0xce,
+ 0x43, 0x77, 0x07, 0xed, 0xf0, 0xcc, 0x00, 0xfb, 0x07, 0xe0, 0xc1, 0xcb,
+ 0x10, 0xb5, 0x07, 0xe5, 0x50, 0xce, 0x43, 0x77, 0x07, 0xea, 0xd1, 0xd7,
+ 0x26, 0xea, 0x07, 0xea, 0xd8, 0xcc, 0x00, 0xfb, 0x07, 0xe0, 0xb9, 0xcb,
+ 0x10, 0xb5, 0x07, 0xe5, 0x48, 0xcc, 0x00, 0xfb, 0x07, 0xe2, 0x91, 0xcb,
+ 0x10, 0xb5, 0x07, 0xe6, 0xc0, 0xd1, 0x30, 0xc1, 0x07, 0xec, 0x99, 0xd1,
+ 0x50, 0x13, 0x07, 0xec, 0xa0, 0xcd, 0x00, 0xfa, 0x07, 0xe7, 0xf1, 0xca,
+ 0x26, 0xf7, 0x07, 0xe8, 0xd0, 0x43, 0x2b, 0xba, 0xc3, 0x7e, 0x05, 0x43,
+ 0x02, 0x98, 0x43, 0x7e, 0x11, 0xcb, 0x64, 0x7b, 0x07, 0xe7, 0x49, 0xca,
+ 0x26, 0xf7, 0x07, 0xe9, 0x41, 0x0b, 0xc3, 0x7e, 0x27, 0x45, 0x00, 0x8c,
+ 0x43, 0x7e, 0x33, 0xca, 0x26, 0xf7, 0x07, 0xe8, 0xc9, 0xcd, 0x00, 0xfa,
+ 0x07, 0xe7, 0xe8, 0xca, 0x26, 0xf7, 0x07, 0xe9, 0x29, 0x0b, 0xc3, 0x7e,
+ 0x3f, 0xd3, 0x43, 0x72, 0x07, 0xeb, 0x49, 0xcb, 0x64, 0x7b, 0x07, 0xe9,
+ 0xb8, 0xca, 0x26, 0xf7, 0x07, 0xe9, 0x39, 0x0b, 0xc3, 0x7e, 0x4b, 0xcb,
+ 0x64, 0x7b, 0x07, 0xe9, 0xc8, 0xca, 0x26, 0xf7, 0x07, 0xe9, 0x49, 0xcd,
+ 0x00, 0xfa, 0x07, 0xe8, 0x68, 0x00, 0xc3, 0x7e, 0x57, 0xd1, 0x56, 0x51,
+ 0x07, 0xe2, 0xf8, 0x00, 0xc3, 0x7e, 0x63, 0xd1, 0x56, 0x51, 0x07, 0xe2,
+ 0xf0, 0xcb, 0x64, 0x7b, 0x07, 0xe7, 0x91, 0xcd, 0x00, 0xfa, 0x07, 0xe3,
+ 0x00, 0xcc, 0x00, 0xfb, 0x07, 0xe0, 0xf9, 0xcb, 0x10, 0xb5, 0x07, 0xe5,
+ 0x80, 0x44, 0x19, 0x6a, 0xc3, 0x7e, 0x6f, 0xd1, 0x30, 0xc1, 0x07, 0xeb,
+ 0x09, 0x45, 0x19, 0x60, 0x43, 0x7e, 0x7b, 0xcc, 0x00, 0xfb, 0x07, 0xe0,
+ 0xf1, 0xcb, 0x10, 0xb5, 0x07, 0xe5, 0x78, 0xd7, 0x26, 0xea, 0x07, 0xed,
+ 0x41, 0xce, 0x43, 0x77, 0x07, 0xee, 0x30, 0x0b, 0xc3, 0x7e, 0x87, 0xcb,
+ 0x64, 0x7b, 0x07, 0xe9, 0xa9, 0xd6, 0x30, 0xbc, 0x07, 0xea, 0xe0, 0xcc,
+ 0x10, 0xb4, 0x07, 0xe9, 0x89, 0xcb, 0x64, 0x7b, 0x07, 0xe7, 0x40, 0xcc,
+ 0x00, 0xfb, 0x07, 0xe0, 0xe1, 0xcb, 0x10, 0xb5, 0x07, 0xe5, 0x68, 0xd0,
+ 0x50, 0xf1, 0x07, 0xea, 0xe9, 0xd7, 0x26, 0xea, 0x07, 0xea, 0xf0, 0x0b,
+ 0xc3, 0x7e, 0x93, 0x4a, 0x74, 0x6e, 0x43, 0x7e, 0x9f, 0x0b, 0xc3, 0x7e,
+ 0xab, 0x45, 0x00, 0x8c, 0x43, 0x7e, 0xb7, 0xcd, 0x00, 0xfa, 0x07, 0xe8,
+ 0x79, 0xca, 0x26, 0xf7, 0x07, 0xe9, 0x58, 0xca, 0x26, 0xf7, 0x07, 0xe9,
+ 0x09, 0xcd, 0x00, 0xfa, 0x07, 0xe8, 0x28, 0xca, 0x26, 0xf7, 0x07, 0xe9,
+ 0x11, 0xcd, 0x00, 0xfa, 0x07, 0xe8, 0x30, 0x43, 0x12, 0xad, 0xc3, 0x7e,
+ 0xc3, 0x00, 0x43, 0x7e, 0xcd, 0xcd, 0x77, 0x53, 0x07, 0xee, 0x79, 0xcf,
+ 0x30, 0xd9, 0x07, 0xef, 0xa8, 0xcc, 0x00, 0xfb, 0x07, 0xe1, 0x51, 0xcb,
+ 0x10, 0xb5, 0x07, 0xe5, 0xd8, 0xce, 0x43, 0x77, 0x07, 0xed, 0xb1, 0x45,
+ 0x19, 0x60, 0xc3, 0x7e, 0xd9, 0xd7, 0x26, 0xea, 0x07, 0xeb, 0xc0, 0xcc,
+ 0x00, 0xfb, 0x07, 0xe1, 0x49, 0xcb, 0x10, 0xb5, 0x07, 0xe5, 0xd0, 0xca,
+ 0x26, 0xf7, 0x07, 0xeb, 0xa9, 0xcc, 0x10, 0xb4, 0x07, 0xee, 0x20, 0xcd,
+ 0x00, 0xfa, 0x07, 0xe2, 0xe9, 0xca, 0x26, 0xf7, 0x07, 0xe4, 0x80, 0xca,
+ 0x26, 0xf7, 0x07, 0xe9, 0xe1, 0xcd, 0x00, 0xfa, 0x07, 0xe9, 0xe8, 0x49,
+ 0x82, 0xa3, 0xc3, 0x7e, 0xe5, 0x0f, 0x43, 0x7e, 0xef, 0xcd, 0x00, 0xfa,
+ 0x07, 0xe7, 0xb1, 0xca, 0x26, 0xf7, 0x07, 0xe8, 0x90, 0xcd, 0x00, 0xfa,
+ 0x07, 0xe7, 0xa9, 0xca, 0x26, 0xf7, 0x07, 0xe8, 0x88, 0x0b, 0xc3, 0x7e,
+ 0xfb, 0xcb, 0x64, 0x7b, 0x07, 0xe9, 0xd1, 0x45, 0x00, 0x8c, 0x43, 0x7f,
+ 0x07, 0xcc, 0x00, 0xfb, 0x07, 0xe1, 0x31, 0xcb, 0x10, 0xb5, 0x07, 0xe5,
+ 0xc0, 0xca, 0x26, 0xf7, 0x07, 0xe3, 0xd9, 0xcd, 0x00, 0xfa, 0x07, 0xe0,
+ 0xa0, 0xca, 0x26, 0xf7, 0x07, 0xe3, 0xd1, 0xcd, 0x00, 0xfa, 0x07, 0xe0,
+ 0x98, 0xca, 0x26, 0xf7, 0x07, 0xe3, 0xc1, 0x0b, 0xc3, 0x7f, 0x19, 0xcb,
+ 0x64, 0x7b, 0x07, 0xe7, 0x28, 0xcc, 0x00, 0xfb, 0x07, 0xe0, 0x71, 0xcb,
+ 0x10, 0xb5, 0x07, 0xe5, 0x20, 0xd1, 0x30, 0xc1, 0x07, 0xea, 0xa9, 0xd0,
+ 0x50, 0xf1, 0x07, 0xea, 0xb1, 0xd1, 0x50, 0xf0, 0x07, 0xea, 0xb9, 0xce,
+ 0x43, 0x77, 0x07, 0xed, 0x19, 0xd7, 0x26, 0xea, 0x07, 0xed, 0x20, 0xcc,
+ 0x00, 0xfb, 0x07, 0xe0, 0x69, 0xcb, 0x10, 0xb5, 0x07, 0xe5, 0x18, 0xd1,
+ 0x50, 0x13, 0x07, 0xea, 0xa1, 0xce, 0x43, 0x77, 0x07, 0xed, 0x09, 0xd7,
+ 0x26, 0xea, 0x07, 0xed, 0x10, 0x0b, 0xc3, 0x7f, 0x25, 0x45, 0x00, 0x8c,
+ 0x43, 0x7f, 0x31, 0xcc, 0x10, 0xb4, 0x07, 0xe5, 0x29, 0xcb, 0x64, 0x7b,
+ 0x07, 0xe7, 0x20, 0xcc, 0x00, 0xfb, 0x07, 0xe0, 0x59, 0xcb, 0x10, 0xb5,
+ 0x07, 0xe5, 0x08, 0xd1, 0x50, 0x13, 0x07, 0xea, 0x81, 0xce, 0x43, 0x77,
+ 0x07, 0xec, 0xf9, 0xd7, 0x26, 0xea, 0x07, 0xed, 0x00, 0x1b, 0xc3, 0x7f,
+ 0x3d, 0x03, 0xc3, 0x7f, 0x49, 0xcf, 0x60, 0x8a, 0x07, 0xe3, 0x39, 0x45,
+ 0x19, 0x60, 0xc3, 0x7f, 0x55, 0xcf, 0x69, 0x81, 0x07, 0xe3, 0x29, 0xce,
+ 0x72, 0xf0, 0x07, 0xe3, 0x21, 0x0a, 0xc3, 0x7f, 0x65, 0x46, 0x30, 0xc1,
+ 0xc3, 0x7f, 0x71, 0x42, 0x00, 0x5d, 0xc3, 0x7f, 0x7d, 0x43, 0x94, 0xf6,
+ 0xc3, 0x7f, 0x87, 0x42, 0x03, 0x53, 0xc3, 0x7f, 0x93, 0x44, 0xdf, 0x2b,
+ 0xc3, 0x7f, 0x9f, 0xd1, 0x50, 0xf0, 0x07, 0xe4, 0xc8, 0x0b, 0xc3, 0x7f,
+ 0xab, 0xd3, 0x43, 0x72, 0x07, 0xed, 0x70, 0xca, 0x26, 0xf7, 0x07, 0xec,
+ 0xe1, 0xcc, 0x10, 0xb4, 0x07, 0xec, 0xe8, 0xcc, 0x00, 0xfb, 0x07, 0xe2,
+ 0x61, 0xcb, 0x10, 0xb5, 0x07, 0xe6, 0x98, 0xd1, 0x50, 0x13, 0x07, 0xec,
+ 0xa9, 0xd7, 0x26, 0xea, 0x07, 0xec, 0xb1, 0xce, 0x43, 0x77, 0x07, 0xed,
+ 0x98, 0xcc, 0x10, 0xb4, 0x07, 0xed, 0xc1, 0xca, 0x26, 0xf7, 0x07, 0xed,
+ 0xe8, 0xca, 0x26, 0xf7, 0x07, 0xec, 0xb9, 0xcc, 0x10, 0xb4, 0x07, 0xec,
+ 0xc0, 0xcc, 0x00, 0xfb, 0x07, 0xe1, 0xe1, 0xcb, 0x10, 0xb5, 0x07, 0xe6,
+ 0x40, 0x45, 0x19, 0x60, 0xc3, 0x7f, 0xb7, 0xce, 0x43, 0x77, 0x07, 0xed,
+ 0xb8, 0xcc, 0x00, 0xfb, 0x07, 0xe1, 0xd9, 0xcb, 0x10, 0xb5, 0x07, 0xe6,
+ 0x38, 0xca, 0x26, 0xf7, 0x07, 0xe4, 0x19, 0xcd, 0x00, 0xfa, 0x07, 0xe1,
+ 0xe8, 0xcd, 0x00, 0xfa, 0x07, 0xf7, 0xa9, 0xca, 0x26, 0xf7, 0x07, 0xf7,
+ 0xb0, 0x46, 0x05, 0x34, 0xc3, 0x7f, 0xc3, 0x46, 0x00, 0xd4, 0x43, 0x7f,
+ 0xcf, 0xca, 0x26, 0xf7, 0x07, 0xec, 0x39, 0xcc, 0x10, 0xb4, 0x07, 0xec,
+ 0x40, 0xcc, 0x00, 0xfb, 0x07, 0xe2, 0x01, 0xcb, 0x10, 0xb5, 0x07, 0xe6,
+ 0x50, 0x45, 0x19, 0x60, 0xc3, 0x7f, 0xdb, 0xce, 0x43, 0x77, 0x07, 0xec,
+ 0x09, 0xd7, 0x26, 0xea, 0x07, 0xec, 0x10, 0xca, 0x26, 0xf7, 0x07, 0xec,
+ 0x21, 0xcc, 0x10, 0xb4, 0x07, 0xec, 0x18, 0xcc, 0x10, 0xb4, 0x07, 0xed,
+ 0xd1, 0xca, 0x26, 0xf7, 0x07, 0xed, 0xe0, 0xca, 0x26, 0xf7, 0x07, 0xe3,
+ 0xf9, 0xcd, 0x00, 0xfa, 0x07, 0xe1, 0xb0, 0xca, 0x26, 0xf7, 0x07, 0xe3,
+ 0xf1, 0xcd, 0x00, 0xfa, 0x07, 0xe1, 0xa8, 0x0b, 0xc3, 0x7f, 0xe7, 0x45,
+ 0x00, 0x8c, 0x43, 0x7f, 0xf3, 0xcc, 0x00, 0xfb, 0x07, 0xe1, 0x99, 0xcb,
+ 0x10, 0xb5, 0x07, 0xe6, 0x10, 0xcc, 0x00, 0xfb, 0x07, 0xe0, 0x41, 0xcb,
+ 0x10, 0xb5, 0x07, 0xe4, 0xf8, 0xcc, 0x00, 0xfb, 0x07, 0xe0, 0x39, 0xcb,
+ 0x10, 0xb5, 0x07, 0xe4, 0xf0, 0x0b, 0xc3, 0x80, 0x05, 0xd3, 0x43, 0x72,
+ 0x07, 0xee, 0x10, 0xcb, 0x64, 0x7b, 0x07, 0xe7, 0x11, 0xcc, 0x10, 0xb4,
+ 0x07, 0xe5, 0x00, 0x8f, 0x07, 0xea, 0x1b, 0x03, 0x80, 0x11, 0xc3, 0x3a,
+ 0x09, 0x07, 0xea, 0x28, 0xcc, 0x00, 0xfb, 0x07, 0xe2, 0x41, 0xcb, 0x10,
+ 0xb5, 0x07, 0xe6, 0x88, 0xcc, 0x00, 0xfb, 0x07, 0xe2, 0x39, 0xcb, 0x10,
+ 0xb5, 0x07, 0xe6, 0x80, 0xd1, 0x30, 0xc1, 0x07, 0xec, 0x71, 0xd1, 0x50,
+ 0x13, 0x07, 0xec, 0x79, 0xce, 0x43, 0x77, 0x07, 0xed, 0xc8, 0xcc, 0x00,
+ 0xfb, 0x07, 0xe2, 0x31, 0xcb, 0x10, 0xb5, 0x07, 0xe6, 0x78, 0xd1, 0x30,
+ 0xc1, 0x07, 0xec, 0x49, 0xd1, 0x50, 0x13, 0x07, 0xec, 0x51, 0xce, 0x43,
+ 0x77, 0x07, 0xec, 0x58, 0xcc, 0x00, 0xfb, 0x07, 0xe2, 0x29, 0xcb, 0x10,
+ 0xb5, 0x07, 0xe6, 0x70, 0xd0, 0x50, 0xf1, 0x07, 0xec, 0x61, 0xd1, 0x50,
+ 0x13, 0x07, 0xec, 0x69, 0xce, 0x43, 0x77, 0x07, 0xee, 0x01, 0xd1, 0x50,
+ 0xf0, 0x07, 0xec, 0x90, 0xcb, 0x64, 0x7b, 0x07, 0xdf, 0xf9, 0x0b, 0xc3,
+ 0x80, 0x17, 0xca, 0x26, 0xf7, 0x07, 0xdf, 0xe9, 0x45, 0x00, 0x8c, 0x43,
+ 0x80, 0x23, 0x45, 0x00, 0x8c, 0xc3, 0x80, 0x33, 0x0b, 0xc3, 0x80, 0x3d,
+ 0xca, 0x26, 0xf7, 0x07, 0xf6, 0x91, 0xcb, 0x64, 0x7b, 0x07, 0xf6, 0xa0,
+ 0x45, 0x00, 0x8c, 0xc3, 0x80, 0x49, 0x0b, 0xc3, 0x80, 0x55, 0xca, 0x26,
+ 0xf7, 0x07, 0xf6, 0x71, 0xcb, 0x64, 0x7b, 0x07, 0xf6, 0x80, 0x45, 0x00,
+ 0x8c, 0xc3, 0x80, 0x61, 0xcb, 0x64, 0x7b, 0x07, 0xdc, 0xa9, 0x0b, 0xc3,
+ 0x80, 0x71, 0xca, 0x26, 0xf7, 0x07, 0xdc, 0x98, 0xcb, 0x64, 0x7b, 0x07,
+ 0xdc, 0xc9, 0x0b, 0xc3, 0x80, 0x7d, 0xca, 0x26, 0xf7, 0x07, 0xdc, 0xb8,
+ 0x45, 0x00, 0x8c, 0xc3, 0x80, 0x89, 0x0b, 0xc3, 0x80, 0xa1, 0xca, 0x26,
+ 0xf7, 0x07, 0xf6, 0xf1, 0xcb, 0x64, 0x7b, 0x07, 0xf7, 0x00, 0x46, 0x02,
+ 0xd8, 0xc3, 0x80, 0xad, 0x0b, 0xc3, 0x80, 0xb9, 0xca, 0x26, 0xf7, 0x07,
+ 0xf4, 0xf1, 0xcb, 0x64, 0x7b, 0x07, 0xf5, 0x00, 0xca, 0x26, 0xf7, 0x07,
+ 0xdc, 0x59, 0xcd, 0x00, 0xfa, 0x07, 0xdc, 0x50, 0xd6, 0x2c, 0xc8, 0x00,
+ 0x46, 0x20, 0x46, 0x02, 0xd8, 0xc3, 0x80, 0xc5, 0xcb, 0x64, 0x7b, 0x07,
+ 0xf6, 0x61, 0x0b, 0xc3, 0x80, 0xd1, 0xca, 0x26, 0xf7, 0x07, 0xf6, 0x50,
+ 0x19, 0xc3, 0x80, 0xdd, 0xc7, 0x06, 0x5f, 0x00, 0x32, 0x4b, 0x03, 0x80,
+ 0xec, 0xcd, 0x00, 0xfa, 0x07, 0xf4, 0x69, 0xca, 0x26, 0xf7, 0x07, 0xf4,
+ 0x70, 0x45, 0x00, 0x8c, 0xc3, 0x80, 0xf0, 0xcb, 0x64, 0x7b, 0x07, 0xdc,
+ 0x89, 0x0b, 0xc3, 0x81, 0x00, 0xca, 0x26, 0xf7, 0x07, 0xdc, 0x78, 0x00,
+ 0x43, 0x81, 0x0c, 0x00, 0x43, 0x81, 0x22, 0x00, 0x43, 0x81, 0x2e, 0x0b,
+ 0xc3, 0x81, 0x3a, 0xca, 0x26, 0xf7, 0x07, 0xf5, 0x31, 0xcb, 0x64, 0x7b,
+ 0x07, 0xf5, 0x40, 0x45, 0x00, 0x8c, 0xc3, 0x81, 0x46, 0xcb, 0x64, 0x7b,
+ 0x07, 0xdb, 0xe9, 0x0b, 0xc3, 0x81, 0x52, 0xca, 0x26, 0xf7, 0x07, 0xdb,
+ 0xd8, 0x00, 0x43, 0x81, 0x5e, 0xcc, 0x88, 0x05, 0x00, 0x46, 0x01, 0xcb,
+ 0x64, 0x7b, 0x07, 0xdb, 0x49, 0x0b, 0xc3, 0x81, 0x6e, 0xca, 0x26, 0xf7,
+ 0x07, 0xdb, 0x38, 0x00, 0x43, 0x81, 0x7a, 0x45, 0x00, 0x8c, 0xc3, 0x81,
+ 0x8a, 0x0f, 0xc3, 0x81, 0x9c, 0x0b, 0xc3, 0x81, 0xab, 0xca, 0x26, 0xf7,
+ 0x07, 0xf4, 0xb0, 0x00, 0x43, 0x81, 0xb7, 0x45, 0x00, 0x8c, 0xc3, 0x81,
+ 0xc7, 0x0b, 0xc3, 0x81, 0xd1, 0xca, 0x26, 0xf7, 0x07, 0xf6, 0x11, 0xcb,
+ 0x64, 0x7b, 0x07, 0xf6, 0x20, 0x00, 0x43, 0x81, 0xdd, 0x00, 0x43, 0x81,
+ 0xe9, 0x98, 0x00, 0x45, 0xf1, 0xca, 0xa6, 0xd4, 0x00, 0x45, 0xb8, 0xcb,
+ 0x10, 0xb5, 0x07, 0xda, 0xc1, 0xcc, 0x00, 0xfb, 0x07, 0xda, 0xb0, 0xcb,
+ 0x64, 0x7b, 0x07, 0xdb, 0x89, 0x0b, 0xc3, 0x81, 0xf9, 0xca, 0x26, 0xf7,
+ 0x07, 0xdb, 0x78, 0x45, 0x00, 0x8c, 0xc3, 0x82, 0x05, 0xc6, 0x17, 0xce,
+ 0x00, 0x36, 0x93, 0x03, 0x82, 0x18, 0x0b, 0xc3, 0x82, 0x1c, 0xca, 0x26,
+ 0xf7, 0x07, 0xf7, 0x91, 0xcb, 0x64, 0x7b, 0x07, 0xf7, 0xa0, 0xca, 0x26,
+ 0xf7, 0x07, 0xde, 0xe1, 0xcd, 0x00, 0xfa, 0x07, 0xde, 0xd8, 0x45, 0x00,
+ 0x8c, 0xc3, 0x82, 0x28, 0xcd, 0x00, 0xfa, 0x07, 0xf5, 0x69, 0xca, 0x26,
+ 0xf7, 0x07, 0xf5, 0x70, 0xcb, 0x64, 0x7b, 0x07, 0xdd, 0x19, 0x0b, 0xc3,
+ 0x82, 0x59, 0xca, 0x26, 0xf7, 0x07, 0xdd, 0x08, 0xca, 0x26, 0xf7, 0x07,
+ 0xdc, 0x69, 0xcd, 0x00, 0xfa, 0x07, 0xdc, 0x60, 0x45, 0x00, 0x8c, 0xc3,
+ 0x82, 0x65, 0x0b, 0xc3, 0x82, 0x81, 0xca, 0x26, 0xf7, 0x07, 0xf4, 0x81,
+ 0xcb, 0x64, 0x7b, 0x07, 0xf4, 0x90, 0x00, 0x43, 0x82, 0x8d, 0xcb, 0x64,
+ 0x7b, 0x07, 0xda, 0xa9, 0x0b, 0xc3, 0x82, 0x9d, 0xca, 0x26, 0xf7, 0x07,
+ 0xda, 0x98, 0xcb, 0x64, 0x7b, 0x07, 0xdf, 0x49, 0xcc, 0x10, 0xb4, 0x07,
+ 0xdf, 0x40, 0xce, 0x00, 0xf9, 0x07, 0xde, 0xe8, 0x44, 0x05, 0x18, 0xc3,
+ 0x82, 0xa9, 0xd0, 0x0e, 0x7c, 0x00, 0x35, 0x40, 0xcb, 0x10, 0xb5, 0x07,
+ 0xf6, 0xb9, 0xcc, 0x00, 0xfb, 0x07, 0xf6, 0xa8, 0xcb, 0x10, 0xb5, 0x07,
+ 0xdf, 0x31, 0xcc, 0x00, 0xfb, 0x07, 0xdf, 0x20, 0xd5, 0x35, 0x75, 0x00,
+ 0x45, 0x91, 0xcd, 0x00, 0xfa, 0x07, 0xf5, 0x79, 0xca, 0x26, 0xf7, 0x07,
+ 0xf5, 0x80, 0x0b, 0xc3, 0x82, 0xb8, 0xca, 0x26, 0xf7, 0x07, 0xf6, 0x31,
+ 0xcb, 0x64, 0x7b, 0x07, 0xf6, 0x40, 0x46, 0x02, 0xd8, 0xc3, 0x82, 0xc4,
+ 0x0b, 0xc3, 0x82, 0xd0, 0xca, 0x26, 0xf7, 0x07, 0xf5, 0xd1, 0xcb, 0x64,
+ 0x7b, 0x07, 0xf5, 0xe0, 0xce, 0x6d, 0xe8, 0x00, 0x37, 0xd1, 0x0b, 0xc3,
+ 0x82, 0xdc, 0xca, 0x26, 0xf7, 0x07, 0xf5, 0xb1, 0xcb, 0x64, 0x7b, 0x07,
+ 0xf5, 0xc0, 0x45, 0x00, 0x8c, 0xc3, 0x82, 0xe8, 0x0b, 0xc3, 0x83, 0x0a,
+ 0xca, 0x26, 0xf7, 0x07, 0xf5, 0x91, 0xcb, 0x64, 0x7b, 0x07, 0xf5, 0xa0,
+ 0x00, 0x43, 0x83, 0x16, 0x00, 0x43, 0x83, 0x28, 0x00, 0x43, 0x83, 0x34,
+ 0x00, 0x43, 0x83, 0x4a, 0x00, 0x43, 0x83, 0x56, 0xca, 0x26, 0xf7, 0x07,
+ 0xdc, 0x39, 0xcd, 0x00, 0xfa, 0x07, 0xdc, 0x30, 0xcb, 0x64, 0x7b, 0x07,
+ 0xdb, 0xa9, 0x0b, 0xc3, 0x83, 0x62, 0xca, 0x26, 0xf7, 0x07, 0xdb, 0x98,
+ 0xcb, 0x64, 0x7b, 0x07, 0xdb, 0x69, 0x0b, 0xc3, 0x83, 0x6e, 0xca, 0x26,
+ 0xf7, 0x07, 0xdb, 0x58, 0x44, 0x05, 0x18, 0xc3, 0x83, 0x7a, 0xce, 0x1e,
+ 0x29, 0x00, 0x36, 0x51, 0xc4, 0x00, 0x9d, 0x00, 0x36, 0x21, 0xcb, 0x08,
+ 0x09, 0x00, 0x31, 0x23, 0x03, 0x83, 0x86, 0x5d, 0x10, 0x12, 0x43, 0x83,
+ 0x8a, 0x45, 0x00, 0x8c, 0xc3, 0x83, 0x96, 0x0b, 0xc3, 0x83, 0xa2, 0xca,
+ 0x26, 0xf7, 0x07, 0xf7, 0x11, 0xcb, 0x64, 0x7b, 0x07, 0xf7, 0x20, 0xcb,
+ 0x64, 0x7b, 0x07, 0xde, 0xb1, 0x0b, 0xc3, 0x83, 0xae, 0xca, 0x26, 0xf7,
+ 0x07, 0xde, 0xa0, 0x00, 0x43, 0x83, 0xba, 0x45, 0x00, 0x8c, 0xc3, 0x83,
+ 0xca, 0xc6, 0x3a, 0x06, 0x00, 0x35, 0xd3, 0x03, 0x83, 0xe6, 0x0b, 0xc3,
+ 0x83, 0xea, 0xca, 0x26, 0xf7, 0x07, 0xf7, 0x31, 0xcb, 0x64, 0x7b, 0x07,
+ 0xf7, 0x40, 0xcb, 0x64, 0x7b, 0x07, 0xdb, 0xc9, 0x0b, 0xc3, 0x83, 0xf6,
+ 0xca, 0x26, 0xf7, 0x07, 0xdb, 0xb8, 0x00, 0x43, 0x84, 0x02, 0xce, 0x00,
+ 0xf9, 0x07, 0xf4, 0x00, 0xcb, 0x98, 0x6e, 0x00, 0x35, 0xf3, 0x03, 0x84,
+ 0x18, 0xc4, 0xe0, 0x63, 0x00, 0x36, 0x0b, 0x03, 0x84, 0x1c, 0x45, 0x00,
+ 0x8c, 0xc3, 0x84, 0x20, 0x0b, 0xc3, 0x84, 0x2f, 0xca, 0x26, 0xf7, 0x07,
+ 0xf7, 0x51, 0xcb, 0x64, 0x7b, 0x07, 0xf7, 0x60, 0xc3, 0x2b, 0xb9, 0x00,
+ 0x33, 0xc1, 0xc4, 0x06, 0x5a, 0x00, 0x33, 0xa9, 0xc3, 0x7e, 0x89, 0x00,
+ 0x33, 0xb0, 0xc2, 0x16, 0x1c, 0x0f, 0x75, 0xa9, 0xc2, 0x02, 0x98, 0x0f,
+ 0x75, 0x41, 0x0a, 0x43, 0x84, 0x3b, 0xc4, 0xdf, 0x93, 0x0f, 0x75, 0xa1,
+ 0xc2, 0x01, 0x9d, 0x0f, 0x75, 0x89, 0xc3, 0x03, 0x26, 0x0f, 0x75, 0x70,
+ 0xc2, 0x00, 0x74, 0x0f, 0x75, 0x31, 0x8a, 0x0f, 0x75, 0xd0, 0x8e, 0x0f,
+ 0x75, 0x19, 0x86, 0x0f, 0x75, 0xc8, 0xc3, 0x03, 0x26, 0x0f, 0x72, 0x71,
+ 0xc2, 0x01, 0x9d, 0x0f, 0x72, 0x89, 0xc4, 0xdf, 0x93, 0x0f, 0x72, 0xa0,
+ 0xc2, 0x01, 0x9d, 0x0f, 0x72, 0xc9, 0x47, 0x3b, 0xc4, 0x43, 0x84, 0x47,
+ 0xc2, 0x16, 0x1c, 0x0f, 0x74, 0xb1, 0xc2, 0x00, 0x65, 0x0f, 0x74, 0xc0,
+ 0xc3, 0x85, 0xf5, 0x0f, 0x73, 0xe1, 0xc3, 0xb1, 0x0d, 0x0f, 0x73, 0xf0,
+ 0xc3, 0x33, 0x5f, 0x00, 0x46, 0xe9, 0x8a, 0x00, 0x46, 0x60, 0xc6, 0xcb,
+ 0x9f, 0x00, 0x46, 0xe1, 0xc7, 0xc1, 0xc4, 0x00, 0x46, 0xd9, 0xcb, 0x92,
+ 0x49, 0x00, 0x46, 0xd1, 0xc5, 0xd6, 0x55, 0x00, 0x46, 0xa1, 0xc5, 0xde,
+ 0x57, 0x00, 0x44, 0xc0, 0xc5, 0xdb, 0xaf, 0x00, 0x44, 0xd1, 0xc6, 0xcb,
+ 0x6f, 0x00, 0x44, 0xc8, 0x4b, 0x13, 0xdd, 0xc3, 0x84, 0x53, 0xcc, 0x04,
+ 0xdb, 0x0f, 0xdd, 0x18, 0xdc, 0x13, 0xdd, 0x0f, 0xdd, 0x3b, 0x03, 0x84,
+ 0x59, 0xcc, 0x04, 0xdb, 0x0f, 0xdd, 0x12, 0x03, 0x84, 0x5f, 0xc4, 0x00,
+ 0x49, 0x0f, 0xdd, 0x03, 0x03, 0x84, 0x65, 0xc5, 0x00, 0x2c, 0x0f, 0xdd,
+ 0x0a, 0x03, 0x84, 0x69, 0xca, 0x01, 0x68, 0x01, 0x29, 0x61, 0xc4, 0x00,
+ 0x49, 0x01, 0x28, 0x81, 0xc5, 0x00, 0x2c, 0x01, 0x28, 0x60, 0x16, 0xc3,
+ 0x84, 0x6d, 0xd2, 0x4a, 0x2d, 0x0f, 0xd0, 0x39, 0xce, 0x2a, 0xfe, 0x0f,
+ 0xd0, 0x99, 0xdf, 0x0d, 0x00, 0x0f, 0xd0, 0xe0, 0xc5, 0xa8, 0xf7, 0x0f,
+ 0xd2, 0x89, 0xc4, 0xde, 0x83, 0x0f, 0xd2, 0x91, 0xc6, 0xca, 0xfd, 0x0f,
+ 0xd2, 0x98, 0xce, 0x2a, 0xfe, 0x0f, 0xd0, 0x79, 0xdb, 0x18, 0x03, 0x0f,
+ 0xd1, 0xc8, 0x44, 0x1d, 0xba, 0xc3, 0x84, 0x79, 0xc5, 0xc0, 0x74, 0x0f,
+ 0xaf, 0x98, 0x17, 0xc3, 0x84, 0x85, 0x96, 0x0b, 0x4d, 0xd0, 0x9a, 0x0b,
+ 0x4f, 0x31, 0xc2, 0x10, 0x11, 0x0b, 0x4c, 0xd0, 0x83, 0x0b, 0x4b, 0x9b,
+ 0x03, 0x84, 0x93, 0x17, 0xc3, 0x84, 0x99, 0x42, 0x2c, 0x43, 0x43, 0x84,
+ 0xa1, 0x96, 0x0b, 0x4f, 0x88, 0x17, 0xc3, 0x84, 0xab, 0x07, 0x43, 0x84,
+ 0xbb, 0x93, 0x0b, 0x4c, 0x01, 0x92, 0x0b, 0x4b, 0xe8, 0x42, 0x01, 0x31,
+ 0xc3, 0x84, 0xca, 0x92, 0x0b, 0x4b, 0x30, 0xc2, 0x5c, 0x9b, 0x0b, 0x4d,
+ 0x81, 0x93, 0x0b, 0x4c, 0x70, 0xc2, 0x00, 0x11, 0x0b, 0x4b, 0x79, 0x87,
+ 0x0b, 0x4c, 0x08, 0x87, 0x0b, 0x4e, 0xa3, 0x03, 0x84, 0xd6, 0xc2, 0xd0,
+ 0x00, 0x0b, 0x4c, 0x18, 0x93, 0x0b, 0x4d, 0x08, 0x90, 0x0b, 0x4b, 0x38,
+ 0xc3, 0x8b, 0xaa, 0x0b, 0x4c, 0xe0, 0xc2, 0x10, 0x11, 0x0b, 0x4c, 0xc8,
+ 0x87, 0x0b, 0x4b, 0x89, 0x93, 0x0b, 0x4e, 0x50, 0x8f, 0x0b, 0x4b, 0xc0,
+ 0xc5, 0xdb, 0x32, 0x0b, 0x4e, 0xd1, 0xc5, 0xd9, 0xa2, 0x0b, 0x4e, 0x88,
+ 0x96, 0x0b, 0x4e, 0x69, 0xc2, 0x00, 0xe2, 0x0b, 0x4d, 0x88, 0x9a, 0x0b,
+ 0x4f, 0x39, 0x96, 0x0b, 0x4d, 0xe8, 0x93, 0x0b, 0x4f, 0xa0, 0x90, 0x0b,
+ 0x4b, 0x59, 0x96, 0x0b, 0x4c, 0x60, 0x8f, 0x0b, 0x4b, 0xf0, 0xc6, 0xcb,
+ 0x7b, 0x0b, 0x4f, 0xa9, 0xc4, 0x05, 0x2e, 0x0b, 0x4e, 0x91, 0x8b, 0x0b,
+ 0x4e, 0x40, 0x96, 0x0b, 0x4e, 0x20, 0x96, 0x0b, 0x4e, 0x78, 0xc3, 0xc5,
+ 0xd2, 0x0b, 0x4a, 0x29, 0x03, 0xc3, 0x84, 0xdc, 0xc3, 0xd7, 0xe2, 0x0b,
+ 0x49, 0xd9, 0xc4, 0xc2, 0x61, 0x0b, 0x49, 0x98, 0xc3, 0x8f, 0x8a, 0x0b,
+ 0x49, 0xe1, 0xc3, 0x17, 0x29, 0x0b, 0x48, 0x99, 0x42, 0x2c, 0x43, 0xc3,
+ 0x84, 0xe9, 0xc2, 0x00, 0xb6, 0x0b, 0x47, 0xf1, 0xc2, 0x05, 0x1d, 0x0b,
+ 0x47, 0xe0, 0xc2, 0x00, 0xa4, 0x0b, 0x4a, 0x31, 0xc2, 0x02, 0xe0, 0x0b,
+ 0x47, 0xc0, 0x96, 0x0b, 0x49, 0x59, 0x92, 0x0b, 0x48, 0xf8, 0xc2, 0x01,
+ 0xdf, 0x0b, 0x49, 0xc1, 0x87, 0x0b, 0x4a, 0xc8, 0x87, 0x0b, 0x48, 0xa9,
+ 0xc2, 0xd0, 0x00, 0x0b, 0x48, 0x48, 0xc3, 0x7c, 0x57, 0x0b, 0x48, 0x71,
+ 0x96, 0x0b, 0x47, 0xb8, 0xc2, 0x02, 0xe0, 0x0b, 0x47, 0xa8, 0x8f, 0x0b,
+ 0x4a, 0x21, 0xc3, 0x48, 0xc4, 0x0b, 0x48, 0xb8, 0x90, 0x0b, 0x49, 0xf1,
+ 0x96, 0x0b, 0x48, 0x58, 0xc6, 0x18, 0x0e, 0x0b, 0x4b, 0x18, 0xc2, 0x10,
+ 0x11, 0x0b, 0x49, 0x51, 0x96, 0x0b, 0x48, 0x40, 0x90, 0x0b, 0x47, 0xa0,
+ 0x90, 0x0b, 0x4a, 0x09, 0xc3, 0xb5, 0x1b, 0x0b, 0x49, 0x19, 0x96, 0x0b,
+ 0x48, 0x00, 0x92, 0x0b, 0x49, 0x61, 0x8f, 0x0b, 0x49, 0x31, 0xc8, 0xb7,
+ 0xba, 0x0b, 0x48, 0x79, 0xc7, 0xc3, 0x37, 0x0b, 0x47, 0xf8, 0x17, 0xc3,
+ 0x84, 0xf5, 0x87, 0x0b, 0x47, 0xe8, 0x92, 0x0b, 0x49, 0xb1, 0x8f, 0x0b,
+ 0x49, 0xa0, 0xc3, 0xc9, 0xd8, 0x0b, 0x47, 0x49, 0xc7, 0xc7, 0x66, 0x0b,
+ 0x47, 0x50, 0x8f, 0x0b, 0x47, 0x11, 0x15, 0xc3, 0x84, 0xff, 0xc3, 0xe6,
+ 0x08, 0x0b, 0x45, 0x08, 0x97, 0x0b, 0x46, 0x53, 0x03, 0x85, 0x0b, 0xc2,
+ 0x00, 0xc4, 0x0b, 0x44, 0x98, 0xc2, 0x5c, 0x9b, 0x0b, 0x44, 0xa9, 0xc9,
+ 0xb1, 0xdc, 0x0b, 0x44, 0x78, 0xc2, 0xd0, 0x00, 0x0b, 0x47, 0x29, 0xc3,
+ 0xd0, 0xd7, 0x0b, 0x46, 0x40, 0x8f, 0x0b, 0x46, 0x79, 0xc2, 0x00, 0x4f,
+ 0x0b, 0x46, 0x20, 0x92, 0x0b, 0x46, 0xd1, 0x8f, 0x0b, 0x46, 0xb8, 0x96,
+ 0x0b, 0x45, 0xe9, 0xc5, 0xdb, 0x14, 0x0b, 0x44, 0xa0, 0x90, 0x0b, 0x46,
+ 0xb1, 0xc7, 0xc7, 0x43, 0x0b, 0x46, 0x38, 0x90, 0x0b, 0x46, 0xa1, 0xc5,
+ 0xda, 0x6f, 0x0b, 0x45, 0xc8, 0x42, 0x01, 0x31, 0xc3, 0x85, 0x21, 0xc3,
+ 0x16, 0x59, 0x0b, 0x46, 0xf8, 0x17, 0xc3, 0x85, 0x2d, 0xc3, 0x82, 0x78,
+ 0x0b, 0x46, 0x11, 0xc5, 0xd8, 0xee, 0x0b, 0x44, 0xb8, 0xc5, 0xd6, 0x87,
+ 0x0b, 0x45, 0xb9, 0x96, 0x0b, 0x45, 0x30, 0xc3, 0x7c, 0x57, 0x0b, 0x46,
+ 0x61, 0x87, 0x0b, 0x45, 0x20, 0xc3, 0x8e, 0x97, 0x0b, 0x46, 0xf1, 0xc2,
+ 0x00, 0xba, 0x0b, 0x46, 0x58, 0xc5, 0xda, 0x10, 0x0b, 0x46, 0xc1, 0xc7,
+ 0xc6, 0x71, 0x0b, 0x45, 0x98, 0xc6, 0xd1, 0x1b, 0x0b, 0x43, 0xa9, 0xc3,
+ 0x76, 0x32, 0x0b, 0x44, 0x51, 0xc3, 0x8f, 0x91, 0x0b, 0x43, 0xd2, 0x03,
+ 0x85, 0x35, 0xc3, 0xe5, 0x93, 0x0b, 0x44, 0x41, 0xc6, 0xce, 0xbd, 0x0b,
+ 0x44, 0x38, 0xc4, 0x9c, 0x80, 0x0b, 0x42, 0xf9, 0xc7, 0xca, 0x4c, 0x0b,
+ 0x42, 0xe0, 0xc3, 0x82, 0x78, 0x0b, 0x41, 0xf1, 0xca, 0xa5, 0x80, 0x0b,
+ 0x40, 0x40, 0x8f, 0x0b, 0x41, 0xb9, 0xc7, 0xc1, 0x38, 0x0b, 0x40, 0x28,
+ 0x8f, 0x0b, 0x42, 0x73, 0x03, 0x85, 0x3b, 0xc2, 0x00, 0xba, 0x0b, 0x42,
+ 0x31, 0xc3, 0x16, 0x59, 0x0b, 0x41, 0x91, 0xc4, 0x2c, 0x42, 0x0b, 0x40,
+ 0xd0, 0xc3, 0x4e, 0x64, 0x0b, 0x41, 0xb1, 0xc3, 0xe5, 0x5d, 0x0b, 0x41,
+ 0x30, 0xcc, 0x8b, 0xa1, 0x0b, 0x42, 0x08, 0xc5, 0xd9, 0xe3, 0x0b, 0x40,
+ 0xb1, 0xc5, 0xb7, 0xb5, 0x0b, 0x40, 0x00, 0x00, 0x43, 0x85, 0x4d, 0x8f,
+ 0x0b, 0x42, 0x61, 0xc3, 0x82, 0x78, 0x0b, 0x42, 0x10, 0xc2, 0x01, 0x5d,
+ 0x0b, 0x40, 0x51, 0xc5, 0xa9, 0x67, 0x0b, 0x40, 0x48, 0xc2, 0x01, 0x5d,
+ 0x0b, 0x40, 0x19, 0xc5, 0xa9, 0x67, 0x0b, 0x40, 0x10, 0xa2, 0x01, 0x40,
+ 0xfb, 0x03, 0x85, 0x59, 0xa3, 0x01, 0x41, 0x7b, 0x03, 0x85, 0x6b, 0xa5,
+ 0x01, 0x44, 0x79, 0xa4, 0x01, 0x42, 0x7a, 0x03, 0x85, 0x76, 0xa3, 0x01,
+ 0x41, 0xbb, 0x03, 0x85, 0x7a, 0xa5, 0x01, 0x44, 0xb9, 0xa4, 0x01, 0x42,
+ 0xba, 0x03, 0x85, 0x85, 0xa5, 0x01, 0x45, 0x39, 0xa4, 0x01, 0x43, 0x3a,
+ 0x03, 0x85, 0x89, 0xa5, 0x01, 0x46, 0x38, 0xa3, 0x01, 0x41, 0xdb, 0x03,
+ 0x85, 0x8d, 0xa5, 0x01, 0x44, 0xd9, 0xa4, 0x01, 0x42, 0xda, 0x03, 0x85,
+ 0x98, 0xa5, 0x01, 0x45, 0x59, 0xa4, 0x01, 0x43, 0x5a, 0x03, 0x85, 0x9c,
+ 0xa5, 0x01, 0x46, 0x58, 0xa5, 0x01, 0x45, 0x99, 0xa4, 0x01, 0x43, 0x9a,
+ 0x03, 0x85, 0xa0, 0xa5, 0x01, 0x46, 0x98, 0xa5, 0x01, 0x47, 0x18, 0xa3,
+ 0x01, 0x41, 0xeb, 0x03, 0x85, 0xa4, 0xa5, 0x01, 0x44, 0xe9, 0xa4, 0x01,
+ 0x42, 0xea, 0x03, 0x85, 0xaf, 0xa5, 0x01, 0x45, 0x69, 0xa4, 0x01, 0x43,
+ 0x6a, 0x03, 0x85, 0xb3, 0xa5, 0x01, 0x46, 0x68, 0xa5, 0x01, 0x45, 0xa9,
+ 0xa4, 0x01, 0x43, 0xaa, 0x03, 0x85, 0xb7, 0xa5, 0x01, 0x46, 0xa8, 0xa5,
+ 0x01, 0x47, 0x28, 0xa5, 0x01, 0x45, 0xc9, 0xa4, 0x01, 0x43, 0xca, 0x03,
+ 0x85, 0xbb, 0xa5, 0x01, 0x46, 0xc8, 0xa5, 0x01, 0x47, 0x48, 0xa5, 0x01,
+ 0x47, 0x88, 0xa3, 0x01, 0x41, 0xf3, 0x03, 0x85, 0xbf, 0xa5, 0x01, 0x44,
+ 0xf1, 0xa4, 0x01, 0x42, 0xf2, 0x03, 0x85, 0xca, 0xa5, 0x01, 0x45, 0x71,
+ 0xa4, 0x01, 0x43, 0x72, 0x03, 0x85, 0xce, 0xa5, 0x01, 0x46, 0x70, 0xa5,
+ 0x01, 0x45, 0xb1, 0xa4, 0x01, 0x43, 0xb2, 0x03, 0x85, 0xd2, 0xa5, 0x01,
+ 0x46, 0xb0, 0xa5, 0x01, 0x47, 0x30, 0xa5, 0x01, 0x45, 0xd1, 0xa4, 0x01,
+ 0x43, 0xd2, 0x03, 0x85, 0xd6, 0xa5, 0x01, 0x46, 0xd0, 0xa5, 0x01, 0x47,
+ 0x50, 0xa5, 0x01, 0x47, 0x90, 0xa5, 0x01, 0x45, 0xe1, 0xa4, 0x01, 0x43,
+ 0xe2, 0x03, 0x85, 0xda, 0xa5, 0x01, 0x46, 0xe0, 0xa5, 0x01, 0x47, 0x60,
+ 0xa5, 0x01, 0x47, 0xa0, 0xa5, 0x01, 0x47, 0xc0, 0xc6, 0x04, 0xe1, 0x0f,
+ 0xda, 0x01, 0xcc, 0x04, 0xcb, 0x0f, 0xda, 0x78, 0xcc, 0x04, 0xcb, 0x0f,
+ 0xda, 0x71, 0xc5, 0x00, 0x2c, 0x0f, 0xda, 0x80, 0x45, 0x00, 0x8c, 0xc3,
+ 0x85, 0xde, 0xc6, 0x10, 0x9d, 0x01, 0x5b, 0x81, 0x45, 0x03, 0x55, 0x43,
+ 0x86, 0x08, 0xc3, 0x14, 0xa7, 0x01, 0x59, 0xdb, 0x03, 0x86, 0x0e, 0xd2,
+ 0x05, 0xd5, 0x01, 0x5f, 0x60, 0xcf, 0x2c, 0x35, 0x01, 0x59, 0xc9, 0xd6,
+ 0x2d, 0x62, 0x01, 0x59, 0xd0, 0xcf, 0x62, 0x3d, 0x01, 0x4b, 0x59, 0x47,
+ 0x92, 0xe3, 0xc3, 0x86, 0x14, 0xc8, 0xae, 0xbc, 0x01, 0x4a, 0xf1, 0xc6,
+ 0x10, 0x9d, 0x01, 0x4a, 0xb0, 0x46, 0x00, 0xd4, 0xc3, 0x86, 0x1a, 0xc8,
+ 0xae, 0xbc, 0x01, 0x4a, 0xd1, 0xc6, 0x10, 0x9d, 0x01, 0x4a, 0x90, 0xc4,
+ 0xe1, 0xbf, 0x08, 0x3a, 0x61, 0xc4, 0xe2, 0xc7, 0x08, 0x3a, 0x59, 0xc4,
+ 0xe0, 0x7b, 0x08, 0x3a, 0x51, 0xc4, 0xe1, 0x2b, 0x08, 0x3a, 0x48, 0x88,
+ 0x08, 0x30, 0x81, 0x8f, 0x08, 0x30, 0x88, 0x88, 0x08, 0x30, 0x99, 0x8f,
+ 0x08, 0x30, 0xa0, 0x8f, 0x08, 0x30, 0xb0, 0xc5, 0xdc, 0x27, 0x08, 0x04,
+ 0x01, 0xc7, 0xc5, 0xa6, 0x08, 0x04, 0x09, 0xc6, 0xcf, 0x65, 0x08, 0x04,
+ 0x11, 0x23, 0xc3, 0x86, 0x24, 0x24, 0xc3, 0x86, 0x30, 0x25, 0xc3, 0x86,
+ 0x3c, 0x26, 0xc3, 0x86, 0x48, 0x22, 0x43, 0x86, 0x54, 0xc7, 0xc6, 0x1d,
+ 0x08, 0x04, 0x71, 0xc8, 0xb7, 0x12, 0x08, 0x04, 0x79, 0xc7, 0xc9, 0xc0,
+ 0x08, 0x04, 0x81, 0xc7, 0xc1, 0x23, 0x08, 0x04, 0x89, 0xc9, 0xa9, 0x1b,
+ 0x08, 0x04, 0x90, 0xc5, 0xdd, 0x03, 0x08, 0x04, 0xa9, 0xc6, 0xd1, 0xb1,
+ 0x08, 0x04, 0xb1, 0x9f, 0x08, 0x04, 0xb8, 0xc8, 0xba, 0xea, 0x08, 0x04,
+ 0xd1, 0xc6, 0xd2, 0x17, 0x08, 0x04, 0xd9, 0x9f, 0x08, 0x04, 0xe1, 0xc6,
+ 0xd2, 0x6b, 0x08, 0x04, 0xe9, 0xa3, 0x08, 0x04, 0xf0, 0x9d, 0x08, 0x04,
+ 0xf9, 0xc6, 0xd3, 0x01, 0x08, 0x05, 0x01, 0x9f, 0x08, 0x05, 0x09, 0xa0,
+ 0x08, 0x05, 0x11, 0xa1, 0x08, 0x05, 0x19, 0xa4, 0x08, 0x05, 0x29, 0xa5,
+ 0x08, 0x05, 0x31, 0xc7, 0xc5, 0x8a, 0x08, 0x05, 0x38, 0x9d, 0x08, 0x05,
+ 0x41, 0x9e, 0x08, 0x05, 0x49, 0xc9, 0xaf, 0x5d, 0x08, 0x05, 0x51, 0xc8,
+ 0xbe, 0x1a, 0x08, 0x05, 0x59, 0xa1, 0x08, 0x05, 0x61, 0xa2, 0x08, 0x05,
+ 0x69, 0xa3, 0x08, 0x05, 0x71, 0xa4, 0x08, 0x05, 0x79, 0xa5, 0x08, 0x05,
+ 0x81, 0xa6, 0x08, 0x05, 0x88, 0x9d, 0x08, 0x05, 0x91, 0x9f, 0x08, 0x05,
+ 0xa1, 0xc7, 0xc8, 0xa8, 0x08, 0x05, 0xa9, 0xa1, 0x08, 0x05, 0xb1, 0xa4,
+ 0x08, 0x05, 0xc1, 0xa5, 0x08, 0x05, 0xc9, 0xa6, 0x08, 0x05, 0xd1, 0x9e,
+ 0x08, 0x05, 0x99, 0xc6, 0xd0, 0xd3, 0x08, 0x05, 0xb8, 0x9d, 0x08, 0x05,
+ 0xd9, 0x9e, 0x08, 0x05, 0xe1, 0x9f, 0x08, 0x05, 0xe9, 0xa0, 0x08, 0x05,
+ 0xf1, 0xa1, 0x08, 0x05, 0xf9, 0xa2, 0x08, 0x06, 0x01, 0xa6, 0x08, 0x06,
+ 0x08, 0x9d, 0x08, 0x06, 0x11, 0xc8, 0xb7, 0xea, 0x08, 0x06, 0x18, 0xcb,
+ 0x8d, 0x00, 0x08, 0x06, 0x21, 0xc9, 0xaa, 0x32, 0x08, 0x06, 0x28, 0xc7,
+ 0xc6, 0x40, 0x08, 0x06, 0x31, 0xc7, 0xc7, 0x9e, 0x08, 0x06, 0x39, 0x9f,
+ 0x08, 0x06, 0x41, 0xc7, 0xc1, 0x2a, 0x08, 0x06, 0x49, 0xa1, 0x08, 0x06,
+ 0x51, 0xa3, 0x08, 0x06, 0x58, 0xc9, 0xad, 0x2f, 0x08, 0x06, 0x69, 0xcf,
+ 0x6b, 0x61, 0x08, 0x06, 0x71, 0xc7, 0xc2, 0x26, 0x08, 0x06, 0x79, 0xa2,
+ 0x08, 0x06, 0x81, 0xa3, 0x08, 0x06, 0x89, 0xa5, 0x08, 0x06, 0x99, 0xa6,
+ 0x08, 0x06, 0xa1, 0xd1, 0x52, 0x99, 0x08, 0x06, 0x60, 0x9e, 0x08, 0x06,
+ 0xa9, 0x9f, 0x08, 0x06, 0xb1, 0xa0, 0x08, 0x06, 0xb9, 0xc6, 0xcf, 0x5f,
+ 0x08, 0x06, 0xc1, 0xa2, 0x08, 0x06, 0xc9, 0xa3, 0x08, 0x06, 0xd1, 0xa4,
+ 0x08, 0x06, 0xd9, 0xa5, 0x08, 0x06, 0xe1, 0xa6, 0x08, 0x06, 0xe8, 0x9d,
+ 0x08, 0x06, 0xf9, 0x9e, 0x08, 0x07, 0x01, 0x9f, 0x08, 0x07, 0x09, 0xa0,
+ 0x08, 0x07, 0x11, 0xa1, 0x08, 0x07, 0x19, 0xa2, 0x08, 0x07, 0x21, 0xa4,
+ 0x08, 0x07, 0x31, 0xa5, 0x08, 0x07, 0x39, 0xa6, 0x08, 0x07, 0x41, 0xa3,
+ 0x08, 0x07, 0x28, 0x9d, 0x08, 0x07, 0x49, 0x9e, 0x08, 0x07, 0x51, 0x9f,
+ 0x08, 0x07, 0x59, 0xa0, 0x08, 0x07, 0x61, 0xa1, 0x08, 0x07, 0x69, 0xa2,
+ 0x08, 0x07, 0x71, 0xa4, 0x08, 0x07, 0x81, 0xa3, 0x08, 0x07, 0x79, 0xa5,
+ 0x08, 0x07, 0x89, 0xa6, 0x08, 0x07, 0x90, 0x9e, 0x08, 0x07, 0x99, 0x9f,
+ 0x08, 0x07, 0xa1, 0xa3, 0x08, 0x07, 0xa9, 0xa4, 0x08, 0x07, 0xb1, 0xa5,
+ 0x08, 0x07, 0xb9, 0xa6, 0x08, 0x07, 0xc0, 0xc3, 0x00, 0x33, 0x0e, 0xf8,
+ 0xf1, 0xc4, 0x65, 0xe2, 0x00, 0x0b, 0x0b, 0x03, 0x86, 0x66, 0xc9, 0x08,
+ 0xf7, 0x00, 0x0a, 0xe9, 0xca, 0xa7, 0x1a, 0x00, 0x10, 0xc9, 0xc6, 0xbd,
+ 0xf4, 0x00, 0x0a, 0xf8, 0xc5, 0x05, 0x02, 0x00, 0xf3, 0x1b, 0x03, 0x86,
+ 0x6c, 0xc5, 0x00, 0xd4, 0x00, 0xf3, 0x08, 0xce, 0x16, 0x0f, 0x00, 0xf3,
+ 0x28, 0xd3, 0x42, 0x2f, 0x05, 0x3e, 0x51, 0xc9, 0xb4, 0xeb, 0x00, 0x11,
+ 0xf8, 0x46, 0x00, 0x8b, 0x43, 0x86, 0x72, 0x94, 0x05, 0x5a, 0x5b, 0x03,
+ 0x86, 0x7e, 0x89, 0x00, 0x13, 0x0a, 0x03, 0x86, 0x84, 0xc8, 0xb7, 0xc2,
+ 0x00, 0xe8, 0xf9, 0xcd, 0x7c, 0x26, 0x00, 0xe8, 0xf1, 0x97, 0x00, 0xe8,
+ 0xe9, 0x91, 0x00, 0xe8, 0x8a, 0x03, 0x86, 0x8a, 0xc6, 0xbd, 0xf4, 0x00,
+ 0x07, 0x3b, 0x03, 0x86, 0x96, 0xc9, 0x08, 0xf7, 0x00, 0x08, 0x49, 0xc4,
+ 0x65, 0xe2, 0x00, 0x08, 0x69, 0xc3, 0x00, 0x33, 0x00, 0x12, 0xa8, 0xca,
+ 0xa3, 0xaa, 0x05, 0x5a, 0xa9, 0xca, 0x4c, 0x63, 0x05, 0x5a, 0xa0, 0xc4,
+ 0x6d, 0xb5, 0x00, 0x13, 0xb9, 0xc5, 0x21, 0xa4, 0x00, 0x14, 0xd0, 0xce,
+ 0x01, 0x19, 0x0e, 0xf8, 0xe1, 0xcc, 0x51, 0x28, 0x0e, 0xf8, 0xb8, 0x94,
+ 0x00, 0x13, 0xcb, 0x03, 0x86, 0x9c, 0x96, 0x00, 0x14, 0x3b, 0x03, 0x86,
+ 0xa2, 0x9b, 0x00, 0x14, 0x73, 0x03, 0x86, 0xa8, 0x89, 0x00, 0xeb, 0xb9,
+ 0x11, 0xc3, 0x86, 0xae, 0x8b, 0x00, 0xe8, 0x4b, 0x03, 0x86, 0xc4, 0x83,
+ 0x00, 0x12, 0x83, 0x03, 0x86, 0xca, 0xc2, 0x03, 0xd4, 0x05, 0x5a, 0x89,
+ 0x8a, 0x00, 0x13, 0x2b, 0x03, 0x86, 0xd4, 0x8f, 0x00, 0x13, 0x7b, 0x03,
+ 0x86, 0xdd, 0x98, 0x00, 0x14, 0x61, 0x99, 0x00, 0x14, 0x69, 0x8d, 0x00,
+ 0x14, 0xf1, 0x8e, 0x05, 0x3c, 0x09, 0xc5, 0xdb, 0x4b, 0x00, 0x0c, 0x69,
+ 0x87, 0x00, 0x0e, 0xe8, 0xd3, 0x42, 0xed, 0x0e, 0xf8, 0x48, 0x42, 0x01,
+ 0x94, 0xc3, 0x86, 0xe3, 0x43, 0x05, 0x19, 0x43, 0x86, 0xef, 0xcf, 0x68,
+ 0x82, 0x00, 0xf3, 0x89, 0xc6, 0xbd, 0xf4, 0x00, 0x0b, 0x19, 0xc4, 0x65,
+ 0xe2, 0x00, 0x0b, 0x29, 0xca, 0xa7, 0x1a, 0x00, 0x10, 0xd9, 0xc3, 0x00,
+ 0x33, 0x00, 0x11, 0xb0, 0xcc, 0x23, 0x3f, 0x05, 0x3b, 0x2a, 0x03, 0x86,
+ 0xfb, 0xc3, 0x22, 0xcb, 0x00, 0x0c, 0x29, 0xc3, 0x02, 0x9f, 0x00, 0x0d,
+ 0x41, 0xc4, 0x0d, 0x13, 0x00, 0x0d, 0xe8, 0xc2, 0x00, 0xc0, 0x00, 0x0d,
+ 0x0b, 0x03, 0x87, 0x01, 0xc8, 0x9e, 0x5c, 0x00, 0xf6, 0x78, 0xc9, 0x08,
+ 0xf7, 0x00, 0x07, 0xa3, 0x03, 0x87, 0x07, 0xc4, 0x65, 0xe2, 0x00, 0x0e,
+ 0x90, 0x11, 0xc3, 0x87, 0x0d, 0xc8, 0x20, 0xa9, 0x00, 0x07, 0xb2, 0x03,
+ 0x87, 0x19, 0x45, 0x02, 0x9a, 0x43, 0x87, 0x1f, 0x45, 0x02, 0x9a, 0x43,
+ 0x87, 0x2b, 0xca, 0x9b, 0xda, 0x00, 0x0f, 0xf8, 0xd1, 0x53, 0x76, 0x05,
+ 0x3a, 0x59, 0xc2, 0x00, 0x11, 0x05, 0x3a, 0x69, 0xcd, 0x7d, 0xfa, 0x01,
+ 0x63, 0xd0, 0xcb, 0x98, 0x58, 0x00, 0x0f, 0x68, 0x46, 0x00, 0x8b, 0xc3,
+ 0x87, 0x3d, 0x87, 0x05, 0x5b, 0x10, 0xd4, 0x01, 0x13, 0x00, 0xec, 0x98,
+ 0xd3, 0x42, 0xed, 0x0e, 0xf8, 0xd0, 0x11, 0xc3, 0x87, 0x49, 0xc8, 0x20,
+ 0xa9, 0x00, 0x07, 0x7a, 0x03, 0x87, 0x55, 0xc6, 0x05, 0x01, 0x00, 0xf1,
+ 0x68, 0xc9, 0x08, 0xf7, 0x00, 0x07, 0x71, 0xc4, 0x65, 0xe2, 0x00, 0x0e,
+ 0x80, 0xcc, 0x23, 0x3f, 0x00, 0xeb, 0xe0, 0x89, 0x00, 0xeb, 0xc9, 0x88,
+ 0x05, 0x3b, 0xe1, 0x94, 0x05, 0x3c, 0x19, 0x95, 0x05, 0x3c, 0x29, 0x96,
+ 0x05, 0x3c, 0x39, 0x86, 0x05, 0x3b, 0xd0, 0xc5, 0xde, 0x3e, 0x05, 0x5b,
+ 0x21, 0xc2, 0x49, 0x0c, 0x05, 0x5a, 0x00, 0x46, 0x00, 0x8b, 0x43, 0x87,
+ 0x5b, 0xcf, 0x68, 0x82, 0x00, 0xf0, 0x99, 0xc6, 0xbd, 0xf4, 0x00, 0xf0,
+ 0x89, 0xc4, 0x65, 0xe2, 0x00, 0x08, 0x79, 0xc3, 0x00, 0x33, 0x00, 0x11,
+ 0x08, 0xc5, 0x05, 0x02, 0x00, 0xf0, 0x69, 0xc5, 0x00, 0xd4, 0x00, 0xf0,
+ 0x58, 0xc9, 0xa9, 0x2d, 0x00, 0xec, 0x88, 0xd3, 0x42, 0x2f, 0x05, 0x3e,
+ 0x41, 0xc5, 0x01, 0x74, 0x00, 0x08, 0x88, 0xc5, 0xcf, 0xcc, 0x00, 0x0c,
+ 0x61, 0xc3, 0x14, 0xa7, 0x00, 0x12, 0xb0, 0xc7, 0x45, 0x16, 0x00, 0x15,
+ 0x1b, 0x03, 0x87, 0x67, 0xca, 0x8e, 0x61, 0x00, 0x0e, 0x30, 0x94, 0x05,
+ 0x5a, 0x43, 0x03, 0x87, 0x6d, 0xc5, 0x42, 0xe8, 0x05, 0x3e, 0x99, 0xc4,
+ 0x95, 0x50, 0x05, 0x3e, 0xa8, 0x8c, 0x00, 0x11, 0xbb, 0x03, 0x87, 0x73,
+ 0x8b, 0x00, 0x09, 0x88, 0x45, 0x00, 0x8c, 0xc3, 0x87, 0x7c, 0xc8, 0x0f,
+ 0xbd, 0x00, 0x0d, 0xc8, 0xcc, 0x51, 0x28, 0x0e, 0xf8, 0xa1, 0xcc, 0x4d,
+ 0x15, 0x05, 0x59, 0xe0, 0xca, 0xa7, 0x24, 0x0e, 0xf8, 0x5b, 0x03, 0x87,
+ 0x92, 0xce, 0x01, 0x19, 0x00, 0xec, 0xc1, 0xcc, 0x51, 0x28, 0x00, 0xec,
+ 0x59, 0xc4, 0x00, 0x32, 0x00, 0x14, 0x30, 0xc9, 0x08, 0xf7, 0x00, 0x07,
+ 0x53, 0x03, 0x87, 0x98, 0xc6, 0xbd, 0xf4, 0x00, 0x11, 0x4b, 0x03, 0x87,
+ 0x9c, 0xc4, 0x65, 0xe2, 0x00, 0x08, 0xd8, 0xc6, 0x05, 0x01, 0x00, 0xf0,
+ 0xd8, 0x11, 0xc3, 0x87, 0xa2, 0xc8, 0x20, 0xa9, 0x00, 0x07, 0x58, 0x45,
+ 0x02, 0x9a, 0x43, 0x87, 0xae, 0x00, 0xc3, 0x87, 0xba, 0xca, 0x4b, 0x1f,
+ 0x05, 0x5a, 0x38, 0xc2, 0x00, 0x75, 0x0e, 0xf8, 0x38, 0xc9, 0x33, 0xad,
+ 0x05, 0x39, 0xf8, 0x46, 0x00, 0x8b, 0x43, 0x87, 0xfb, 0xc3, 0x3a, 0xe6,
+ 0x00, 0x13, 0x63, 0x03, 0x88, 0x07, 0xc2, 0x00, 0xb1, 0x00, 0x0c, 0xd0,
+ 0xcf, 0x68, 0x82, 0x00, 0xf1, 0xe9, 0xc6, 0xbd, 0xf4, 0x00, 0x09, 0xe1,
+ 0xc4, 0x65, 0xe2, 0x00, 0x09, 0xf1, 0xc3, 0x00, 0x33, 0x00, 0x11, 0xa0,
+ 0xc7, 0x0e, 0x70, 0x00, 0xf1, 0xbb, 0x03, 0x88, 0x0d, 0x45, 0x00, 0x5a,
+ 0x43, 0x88, 0x13, 0xc4, 0x00, 0x9d, 0x05, 0x59, 0xc9, 0xc5, 0x1e, 0xc8,
+ 0x00, 0x13, 0x59, 0xc3, 0x02, 0xa3, 0x00, 0x0a, 0x00, 0xc9, 0xaa, 0xcb,
+ 0x05, 0x3c, 0x70, 0xd4, 0x01, 0x13, 0x0e, 0xf8, 0x28, 0xcb, 0x8f, 0xb5,
+ 0x00, 0xf4, 0xe9, 0x06, 0x43, 0x88, 0x1f, 0xc6, 0x00, 0xd3, 0x00, 0xf7,
+ 0xb8, 0x43, 0x05, 0x19, 0xc3, 0x88, 0x2b, 0xc8, 0x20, 0xa9, 0x00, 0x07,
+ 0xf8, 0xce, 0x36, 0x39, 0x05, 0x5a, 0xd1, 0xc5, 0x01, 0x74, 0x00, 0x12,
+ 0x78, 0x98, 0x00, 0xf7, 0xe9, 0xc2, 0x02, 0xa7, 0x00, 0xf7, 0xd8, 0xc5,
+ 0x05, 0x02, 0x00, 0xf2, 0x19, 0xc5, 0x00, 0xd4, 0x00, 0xf2, 0x08, 0x42,
+ 0x01, 0x23, 0xc3, 0x88, 0x37, 0x06, 0xc3, 0x88, 0x46, 0xc6, 0x60, 0xb1,
+ 0x00, 0x0b, 0x5b, 0x03, 0x88, 0x53, 0xc5, 0x1e, 0xc8, 0x00, 0x0b, 0x4b,
+ 0x03, 0x88, 0x59, 0x05, 0xc3, 0x88, 0x5d, 0x14, 0xc3, 0x88, 0x6c, 0xc9,
+ 0x6d, 0x45, 0x05, 0x5a, 0x91, 0x15, 0xc3, 0x88, 0x78, 0xc5, 0x1f, 0x0c,
+ 0x00, 0x07, 0xc9, 0xc5, 0x31, 0xee, 0x00, 0x07, 0xd1, 0xc5, 0x1d, 0x88,
+ 0x00, 0x0b, 0x69, 0xc6, 0xcc, 0x8f, 0x00, 0x0b, 0x99, 0xce, 0x1d, 0x93,
+ 0x00, 0x10, 0xb8, 0xd5, 0x36, 0x32, 0x05, 0x5a, 0x78, 0xc5, 0x1d, 0x88,
+ 0x00, 0x08, 0x1b, 0x03, 0x88, 0x84, 0x05, 0xc3, 0x88, 0x8a, 0xca, 0x9e,
+ 0x5a, 0x00, 0xf5, 0x19, 0x06, 0xc3, 0x88, 0x99, 0x14, 0xc3, 0x88, 0xa6,
+ 0xce, 0x1d, 0x93, 0x00, 0x10, 0x19, 0xc5, 0x1f, 0x0c, 0x00, 0x07, 0x01,
+ 0xc5, 0x31, 0xee, 0x00, 0x07, 0x09, 0xc5, 0x1e, 0xc8, 0x00, 0x07, 0x19,
+ 0xc6, 0x60, 0xb1, 0x00, 0x08, 0x09, 0xc6, 0xcc, 0x8f, 0x00, 0x08, 0x29,
+ 0xc6, 0x01, 0x73, 0x01, 0x63, 0x28, 0xc5, 0x31, 0xee, 0x00, 0x0f, 0xe9,
+ 0xc6, 0x60, 0xb1, 0x00, 0x0f, 0x18, 0x43, 0x05, 0x19, 0xc3, 0x88, 0xb0,
+ 0xc8, 0x20, 0xa9, 0x00, 0xf4, 0x28, 0xc6, 0xbd, 0xf4, 0x00, 0xf1, 0x49,
+ 0xc9, 0x08, 0xf7, 0x00, 0x09, 0x29, 0xc4, 0x65, 0xe2, 0x00, 0x10, 0xf8,
+ 0xc8, 0x20, 0xa9, 0x00, 0xf1, 0x39, 0x43, 0x05, 0x19, 0xc3, 0x88, 0xbc,
+ 0xc8, 0x25, 0xfb, 0x01, 0x63, 0x40, 0x43, 0x05, 0x19, 0xc3, 0x88, 0xc8,
+ 0xc8, 0x25, 0xfb, 0x01, 0x63, 0x60, 0xc9, 0x08, 0xf7, 0x00, 0xf4, 0x89,
+ 0xc3, 0x00, 0x33, 0x00, 0x14, 0x89, 0xc4, 0x65, 0xe2, 0x00, 0x0b, 0xf0,
+ 0xc5, 0x01, 0x74, 0x00, 0x0d, 0xb1, 0xc9, 0xb4, 0xeb, 0x00, 0x12, 0x00,
+ 0xc8, 0x20, 0xa9, 0x00, 0xf4, 0x69, 0xc8, 0x16, 0x15, 0x00, 0xf4, 0x58,
+ 0xcb, 0x95, 0xae, 0x05, 0x5a, 0xbb, 0x03, 0x88, 0xd4, 0xcc, 0x4c, 0x61,
+ 0x05, 0x5a, 0xb0, 0xc8, 0x0e, 0x6f, 0x00, 0xf3, 0xf9, 0xce, 0x3e, 0xae,
+ 0x05, 0x3a, 0xf8, 0xc5, 0x01, 0x74, 0x00, 0xeb, 0xeb, 0x03, 0x88, 0xd8,
+ 0xcc, 0x89, 0x01, 0x05, 0x3a, 0xa8, 0x05, 0xc3, 0x88, 0xde, 0x0e, 0xc3,
+ 0x88, 0xfc, 0x06, 0xc3, 0x89, 0x0e, 0xcc, 0x51, 0x28, 0x00, 0xec, 0x39,
+ 0xcc, 0x1e, 0xc1, 0x00, 0xeb, 0x81, 0xc5, 0x1f, 0x0c, 0x00, 0x0f, 0xc9,
+ 0xce, 0x01, 0x19, 0x00, 0x13, 0x99, 0xc5, 0x1e, 0xc8, 0x00, 0x07, 0x89,
+ 0xc5, 0x31, 0xee, 0x00, 0x0a, 0x21, 0xce, 0x38, 0xe6, 0x05, 0x3d, 0x28,
+ 0xc8, 0x0e, 0x6f, 0x00, 0xf1, 0x99, 0xce, 0x3e, 0xae, 0x05, 0x3a, 0x19,
+ 0xc8, 0x25, 0xfb, 0x01, 0x63, 0x50, 0xd4, 0x3e, 0xa8, 0x05, 0x3a, 0x28,
+ 0xc6, 0xbd, 0xf4, 0x00, 0x09, 0xb9, 0xc4, 0x65, 0xe2, 0x00, 0x0f, 0x48,
+ 0xc9, 0x08, 0xf7, 0x00, 0x08, 0xe9, 0xc6, 0xbd, 0xf4, 0x00, 0x09, 0x19,
+ 0xc4, 0x65, 0xe2, 0x00, 0x0f, 0x38, 0xc5, 0x05, 0x02, 0x00, 0xf0, 0x29,
+ 0xc5, 0x00, 0xd4, 0x00, 0xf0, 0x18, 0x87, 0x05, 0x59, 0x99, 0xc5, 0xde,
+ 0x3e, 0x05, 0x59, 0x81, 0x91, 0x00, 0x13, 0xa8, 0xcc, 0x23, 0x3f, 0x05,
+ 0x59, 0xf0, 0xcb, 0x4d, 0x16, 0x00, 0x14, 0xe9, 0xc9, 0x08, 0xf7, 0x00,
+ 0x09, 0xa9, 0xc4, 0x65, 0xe2, 0x00, 0x0f, 0x80, 0xc5, 0x41, 0x20, 0x00,
+ 0x12, 0x58, 0xc5, 0x05, 0x02, 0x00, 0xf7, 0xa1, 0xc5, 0x00, 0xd4, 0x00,
+ 0xf4, 0x70, 0xc2, 0x00, 0xc0, 0x00, 0x0d, 0x7b, 0x03, 0x89, 0x1a, 0xc8,
+ 0x9e, 0x5c, 0x00, 0xf7, 0x30, 0x11, 0xc3, 0x89, 0x20, 0xc8, 0x20, 0xa9,
+ 0x00, 0x06, 0xe2, 0x03, 0x89, 0x2c, 0xce, 0x74, 0xe8, 0x00, 0xf3, 0xd0,
+ 0x00, 0x43, 0x89, 0x30, 0xc9, 0x08, 0xf7, 0x00, 0x06, 0xdb, 0x03, 0x89,
+ 0x3c, 0xc4, 0x65, 0xe2, 0x00, 0x0e, 0x98, 0x45, 0x02, 0x9a, 0x43, 0x89,
+ 0x42, 0x45, 0x02, 0x9a, 0x43, 0x89, 0x60, 0x42, 0x00, 0x30, 0xc3, 0x89,
+ 0x7e, 0x45, 0x00, 0x5a, 0x43, 0x89, 0x8d, 0xcb, 0x98, 0x58, 0x00, 0x11,
+ 0x50, 0x45, 0x02, 0x9a, 0x43, 0x89, 0x99, 0xc9, 0x20, 0xa8, 0x00, 0xf2,
+ 0x71, 0xc5, 0x31, 0xee, 0x00, 0xf2, 0x61, 0xc6, 0x60, 0xb1, 0x00, 0x11,
+ 0x60, 0x42, 0x00, 0x30, 0xc3, 0x89, 0xa5, 0xca, 0x1f, 0x07, 0x00, 0x10,
+ 0x40, 0xca, 0x9b, 0xda, 0x00, 0xf1, 0x70, 0x00, 0x43, 0x89, 0xb1, 0xca,
+ 0x9b, 0x80, 0x00, 0xf0, 0xe0, 0x42, 0x00, 0x30, 0xc3, 0x89, 0xbd, 0xca,
+ 0x1f, 0x07, 0x00, 0x10, 0x20, 0xc5, 0x31, 0xee, 0x00, 0xf0, 0xb1, 0xc5,
+ 0x1f, 0x0c, 0x00, 0xf0, 0xa0, 0xc9, 0x0e, 0x6e, 0x00, 0xf5, 0xb1, 0xc5,
+ 0x1e, 0xc8, 0x00, 0xf5, 0xa1, 0xca, 0x9e, 0x5a, 0x00, 0xf5, 0x91, 0xc5,
+ 0x1f, 0x0c, 0x00, 0xf5, 0x81, 0xc5, 0x31, 0xee, 0x00, 0xf5, 0x70, 0x45,
+ 0x02, 0x9a, 0x43, 0x89, 0xc9, 0x42, 0x00, 0x30, 0xc3, 0x89, 0xe7, 0xca,
+ 0x1f, 0x07, 0x00, 0x10, 0x00, 0xcb, 0x98, 0x58, 0x00, 0x0e, 0xf0, 0xca,
+ 0x9b, 0xda, 0x00, 0x0f, 0xd0, 0xce, 0x16, 0x0f, 0x00, 0xf3, 0x40, 0xce,
+ 0x16, 0x0f, 0x00, 0xf3, 0x30, 0xc5, 0x05, 0x02, 0x00, 0xf4, 0x91, 0xc5,
+ 0x00, 0xd4, 0x00, 0x0b, 0xd8, 0xc5, 0x05, 0x02, 0x00, 0xf4, 0x41, 0xc5,
+ 0x00, 0xd4, 0x00, 0xf4, 0x30, 0xc5, 0x05, 0x02, 0x00, 0xf3, 0x61, 0xc5,
+ 0x00, 0xd4, 0x00, 0xf3, 0x50, 0x42, 0x00, 0x30, 0xc3, 0x89, 0xf6, 0xca,
+ 0x1f, 0x07, 0x00, 0x10, 0x80, 0xc6, 0xbd, 0xf4, 0x00, 0x0a, 0xb1, 0xc4,
+ 0x65, 0xe2, 0x00, 0x0a, 0xc0, 0xd2, 0x25, 0xf1, 0x05, 0x3a, 0x80, 0xc5,
+ 0x05, 0x02, 0x00, 0xf2, 0x31, 0xc5, 0x00, 0xd4, 0x00, 0xf2, 0x20, 0xcb,
+ 0x98, 0x58, 0x00, 0xf1, 0xc0, 0xc5, 0x05, 0x02, 0x00, 0xf1, 0x21, 0xc5,
+ 0x00, 0xd4, 0x00, 0xf1, 0x10, 0xcb, 0x8e, 0x60, 0x00, 0x0e, 0x28, 0xca,
+ 0x9b, 0xda, 0x00, 0xf0, 0x40, 0xd0, 0x57, 0xc2, 0x0f, 0xc1, 0x89, 0xcb,
+ 0x57, 0xc7, 0x0f, 0xc1, 0x69, 0xca, 0xa0, 0x08, 0x0f, 0xc1, 0x49, 0x49,
+ 0xa8, 0xdc, 0xc3, 0x8a, 0x05, 0xd8, 0x24, 0xb3, 0x01, 0x5b, 0xd9, 0xcc,
+ 0x84, 0x09, 0x0f, 0xc1, 0x09, 0xcc, 0x82, 0x1d, 0x0f, 0xc1, 0x28, 0xe0,
+ 0x03, 0xe7, 0x01, 0x5c, 0x08, 0xc6, 0x44, 0x50, 0x07, 0xd9, 0x69, 0xc7,
+ 0x44, 0x4f, 0x07, 0xd9, 0x60, 0xc5, 0x79, 0xf2, 0x05, 0x4b, 0x51, 0xc6,
+ 0xc0, 0x7c, 0x05, 0x4b, 0x39, 0xc6, 0x8e, 0xde, 0x05, 0x4b, 0x28, 0xc5,
+ 0x8e, 0xdf, 0x00, 0x89, 0x69, 0xc6, 0xbb, 0xec, 0x00, 0x89, 0xc0, 0xc5,
+ 0xc0, 0x7d, 0x00, 0x89, 0x79, 0xc6, 0xc1, 0x86, 0x00, 0x89, 0xc8, 0xc4,
+ 0x79, 0xf3, 0x00, 0x89, 0x93, 0x03, 0x8a, 0x11, 0xc6, 0xba, 0x7c, 0x00,
+ 0x89, 0xd0, 0xc4, 0xc6, 0x7a, 0x00, 0x89, 0xb1, 0xc6, 0xc6, 0x79, 0x00,
+ 0x89, 0xb8, 0xc6, 0xbb, 0xec, 0x05, 0x4b, 0x99, 0xc5, 0x8e, 0xdf, 0x00,
+ 0x88, 0xf0, 0xc3, 0x39, 0x37, 0x00, 0x89, 0x0b, 0x03, 0x8a, 0x17, 0xc8,
+ 0xad, 0x27, 0x00, 0x89, 0x28, 0xc5, 0xc0, 0x7d, 0x00, 0x89, 0x01, 0xc6,
+ 0xc1, 0x86, 0x00, 0x89, 0x48, 0xc4, 0xc6, 0x7a, 0x00, 0x89, 0x39, 0xc6,
+ 0xc6, 0x79, 0x00, 0x89, 0x40, 0xc7, 0xbb, 0xeb, 0x00, 0x8a, 0x91, 0xc5,
+ 0x90, 0xe4, 0x00, 0x8a, 0x98, 0xc3, 0x39, 0x37, 0x00, 0x89, 0xe1, 0x44,
+ 0x3a, 0xbf, 0x43, 0x8a, 0x1b, 0xc4, 0xc6, 0x7a, 0x00, 0x8a, 0x71, 0xc6,
+ 0xc6, 0x79, 0x00, 0x8a, 0xa0, 0xc4, 0xad, 0x2b, 0x00, 0x89, 0xf9, 0xc5,
+ 0xdb, 0xff, 0x00, 0x8a, 0x88, 0x87, 0x06, 0xbe, 0x33, 0x03, 0x8a, 0x27,
+ 0x97, 0x00, 0x8d, 0x01, 0x8b, 0x00, 0x8d, 0x09, 0x83, 0x06, 0xbe, 0x28,
+ 0x91, 0x00, 0x8b, 0xc1, 0xc2, 0x42, 0xcd, 0x00, 0x8b, 0xc9, 0x97, 0x00,
+ 0x8d, 0x20, 0x02, 0x43, 0x8a, 0x2b, 0x1b, 0xc3, 0x8a, 0x39, 0x91, 0x00,
+ 0x8c, 0x39, 0x8b, 0x00, 0x8c, 0x41, 0x83, 0x06, 0xbd, 0x93, 0x03, 0x8a,
+ 0x46, 0xc2, 0x02, 0x66, 0x06, 0xbd, 0xa0, 0x83, 0x00, 0x8c, 0x73, 0x03,
+ 0x8a, 0x4a, 0x87, 0x00, 0x8c, 0x83, 0x03, 0x8a, 0x4e, 0xc2, 0x0c, 0x43,
+ 0x00, 0x8c, 0x93, 0x03, 0x8a, 0x52, 0x97, 0x00, 0x8c, 0x99, 0x8b, 0x00,
+ 0x8c, 0xa1, 0x91, 0x06, 0xbd, 0xc0, 0x91, 0x00, 0x8b, 0xd1, 0x97, 0x00,
+ 0x8b, 0xd9, 0xc2, 0x2c, 0x43, 0x00, 0x8b, 0xe0, 0x97, 0x00, 0x8c, 0xa9,
+ 0x87, 0x06, 0xbd, 0xdb, 0x03, 0x8a, 0x56, 0x83, 0x06, 0xbd, 0xc9, 0x91,
+ 0x06, 0xbd, 0xe0, 0x91, 0x00, 0x8b, 0xf8, 0x87, 0x00, 0x8c, 0x0b, 0x03,
+ 0x8a, 0x5e, 0x83, 0x00, 0x8d, 0x32, 0x03, 0x8a, 0x62, 0xc2, 0x09, 0x90,
+ 0x06, 0xbd, 0x88, 0x87, 0x00, 0x8c, 0x50, 0x91, 0x06, 0xbd, 0xa8, 0xc4,
+ 0xa6, 0x08, 0x00, 0x8c, 0xe8, 0x83, 0x00, 0x8c, 0xcb, 0x03, 0x8a, 0x66,
+ 0x87, 0x06, 0xbe, 0x03, 0x03, 0x8a, 0x70, 0x91, 0x06, 0xbe, 0x11, 0x97,
+ 0x06, 0xbe, 0x18, 0xc2, 0x09, 0x90, 0x06, 0xbe, 0x08, 0xc4, 0xad, 0x2b,
+ 0x00, 0x8d, 0x53, 0x03, 0x8a, 0x74, 0xc5, 0xd6, 0x8c, 0x00, 0x8e, 0x19,
+ 0xc5, 0xd9, 0x61, 0x00, 0x8f, 0xd1, 0xc5, 0x79, 0xf2, 0x00, 0x8f, 0xd9,
+ 0xc7, 0xc6, 0x78, 0x00, 0x8f, 0xe1, 0xc7, 0xbb, 0xeb, 0x00, 0x8f, 0xe9,
+ 0xc5, 0x90, 0xe4, 0x00, 0x8f, 0xf0, 0xc4, 0x79, 0xf3, 0x00, 0x8f, 0x31,
+ 0xc6, 0xba, 0x7c, 0x00, 0x8f, 0xa0, 0x02, 0x43, 0x8a, 0x7a, 0xc8, 0xbb,
+ 0xea, 0x06, 0xbe, 0xb8, 0xc6, 0xd1, 0x57, 0x06, 0xbe, 0x70, 0x0d, 0xc3,
+ 0x8a, 0x86, 0x16, 0xc3, 0x8a, 0x92, 0xc5, 0xd6, 0x8c, 0x00, 0x8f, 0x49,
+ 0x12, 0xc3, 0x8a, 0x9e, 0xc5, 0xda, 0xe7, 0x06, 0xbf, 0x51, 0x05, 0xc3,
+ 0x8a, 0xaa, 0xc5, 0x90, 0xe4, 0x06, 0xbf, 0x90, 0xc4, 0xc6, 0x7a, 0x00,
+ 0x8d, 0x61, 0xc6, 0xc6, 0x79, 0x06, 0xbe, 0x60, 0xc5, 0xc0, 0x7d, 0x00,
+ 0x8e, 0x31, 0xc6, 0xc1, 0x86, 0x00, 0x8e, 0x48, 0xc6, 0x8e, 0xde, 0x00,
+ 0x8e, 0x51, 0xc5, 0xd6, 0x8c, 0x00, 0x8e, 0x59, 0xc5, 0x79, 0xf2, 0x06,
+ 0xbe, 0x79, 0xc4, 0xad, 0x2b, 0x06, 0xbe, 0x83, 0x03, 0x8a, 0xb6, 0x05,
+ 0xc3, 0x8a, 0xbc, 0xc7, 0xc1, 0x85, 0x06, 0xbe, 0xa0, 0xc5, 0x8e, 0xdf,
+ 0x00, 0x8d, 0x83, 0x03, 0x8a, 0xc8, 0xcc, 0x79, 0xeb, 0x00, 0x8e, 0xa9,
+ 0xc6, 0xbb, 0xec, 0x00, 0x8e, 0xc0, 0x02, 0x43, 0x8a, 0xcc, 0xc4, 0x79,
+ 0xf3, 0x00, 0x8d, 0x93, 0x03, 0x8a, 0xde, 0xc6, 0xba, 0x7c, 0x00, 0x8d,
+ 0xa9, 0xc6, 0xca, 0x0e, 0x00, 0x8e, 0xb8, 0xc3, 0x39, 0x37, 0x00, 0x8d,
+ 0x99, 0x44, 0x3a, 0xbf, 0x43, 0x8a, 0xe2, 0xc6, 0xc1, 0x86, 0x00, 0x8d,
+ 0xa1, 0xc5, 0xc0, 0x7d, 0x00, 0x8e, 0x72, 0x03, 0x8a, 0xee, 0xc9, 0x90,
+ 0xe0, 0x00, 0x8e, 0xcb, 0x03, 0x8a, 0xf4, 0xc6, 0xb7, 0x9c, 0x06, 0xbe,
+ 0xd8, 0xc4, 0x79, 0xf3, 0x00, 0x8e, 0xe3, 0x03, 0x8a, 0xfa, 0xc6, 0xca,
+ 0x0e, 0x00, 0x8e, 0xf8, 0xc3, 0x39, 0x37, 0x00, 0x8e, 0xe9, 0x44, 0x3a,
+ 0xbf, 0x43, 0x8b, 0x00, 0xc6, 0xc6, 0x79, 0x00, 0x8f, 0x01, 0xc4, 0xc6,
+ 0x7a, 0x06, 0xbf, 0x10, 0xc4, 0xad, 0x2b, 0x00, 0x8d, 0xcb, 0x03, 0x8b,
+ 0x0c, 0xc5, 0xd6, 0x8c, 0x00, 0x8f, 0x1b, 0x03, 0x8b, 0x12, 0xc7, 0xba,
+ 0x7b, 0x00, 0x8f, 0x21, 0xc5, 0x90, 0xe4, 0x00, 0x8f, 0x29, 0xc6, 0xc0,
+ 0x7c, 0x06, 0xbf, 0x19, 0xc5, 0xda, 0xe7, 0x06, 0xbf, 0x29, 0x05, 0x43,
+ 0x8b, 0x18, 0xc5, 0x79, 0xf2, 0x00, 0x8f, 0x99, 0xc4, 0xad, 0x2b, 0x06,
+ 0xbf, 0xd1, 0xc7, 0xc1, 0x85, 0x06, 0xbf, 0xd8, 0xc5, 0x79, 0xf2, 0x06,
+ 0xbf, 0x99, 0xcd, 0x79, 0xea, 0x06, 0xbf, 0xa0, 0xc5, 0x8e, 0xdf, 0x00,
+ 0x8f, 0x61, 0xc6, 0xbb, 0xec, 0x00, 0x8f, 0x78, 0xc5, 0x79, 0xf2, 0x06,
+ 0xbf, 0xb9, 0xca, 0xa7, 0x2e, 0x06, 0xbf, 0xc0, 0x0d, 0xc3, 0x8b, 0x24,
+ 0x15, 0xc3, 0x8b, 0x30, 0xc7, 0xca, 0x0d, 0x00, 0x8f, 0x91, 0xc5, 0xda,
+ 0xe7, 0x06, 0xbf, 0xa9, 0xc5, 0x90, 0xe4, 0x06, 0xbf, 0xb0, 0xc5, 0xd9,
+ 0xca, 0x01, 0x8b, 0x58, 0x02, 0x43, 0x8b, 0x3c, 0xc5, 0xc0, 0x7d, 0x01,
+ 0x8b, 0x99, 0xc6, 0xc1, 0x86, 0x01, 0x8b, 0xb8, 0xc4, 0xad, 0x2b, 0x01,
+ 0x8c, 0x11, 0xc7, 0xca, 0x0d, 0x01, 0x8c, 0x18, 0x87, 0x01, 0x8c, 0x40,
+ 0x91, 0x01, 0x8c, 0x50, 0xc8, 0x4b, 0x94, 0x0f, 0x64, 0x81, 0xc7, 0x0d,
+ 0x04, 0x0f, 0x64, 0x38, 0xc8, 0x4b, 0x94, 0x0f, 0x64, 0x79, 0xc7, 0x0d,
+ 0x04, 0x0f, 0x64, 0x30, 0xc8, 0x4b, 0x94, 0x0f, 0x64, 0x71, 0xc7, 0x0d,
+ 0x04, 0x0f, 0x64, 0x28, 0xc8, 0x4b, 0x94, 0x0f, 0x64, 0x69, 0xc7, 0x0d,
+ 0x04, 0x0f, 0x64, 0x20, 0x91, 0x01, 0x9f, 0x09, 0x07, 0x43, 0x8b, 0x48,
+ 0xc3, 0x02, 0xdf, 0x01, 0x9f, 0x11, 0x43, 0x0d, 0x0e, 0x43, 0x8b, 0x57,
+ 0xc4, 0x14, 0x09, 0x01, 0x9f, 0x68, 0xc2, 0x00, 0x5f, 0x01, 0x9f, 0x21,
+ 0xc5, 0x14, 0x08, 0x01, 0x9f, 0x70, 0xc4, 0x14, 0x09, 0x01, 0x9f, 0x78,
+ 0xc4, 0x14, 0x09, 0x01, 0x9f, 0x80, 0xc3, 0x03, 0x26, 0x01, 0x9f, 0x88,
+ 0xc3, 0x22, 0x45, 0x01, 0x9b, 0x21, 0xc3, 0x18, 0x13, 0x01, 0x9b, 0x62,
+ 0x03, 0x8b, 0x64, 0x4b, 0x18, 0x04, 0xc3, 0x8b, 0x68, 0xdc, 0x13, 0xf9,
+ 0x0f, 0xd2, 0x28, 0xce, 0x3d, 0x7c, 0x01, 0x2f, 0x91, 0xcd, 0x02, 0xb4,
+ 0x01, 0x2f, 0x88, 0xce, 0x6c, 0x0c, 0x0f, 0xb1, 0x81, 0xc8, 0xba, 0xaa,
+ 0x0f, 0xc9, 0x70, 0xc9, 0x57, 0x20, 0x08, 0x4f, 0x98, 0xc9, 0x57, 0x20,
+ 0x08, 0x4f, 0x90, 0xc7, 0x0d, 0x04, 0x08, 0x4e, 0xb3, 0x03, 0x8b, 0x74,
+ 0xc8, 0x4b, 0x94, 0x08, 0x4e, 0xf8, 0xc7, 0x0d, 0x04, 0x08, 0x4e, 0xab,
+ 0x03, 0x8b, 0x7a, 0xc8, 0x4b, 0x94, 0x08, 0x4e, 0xf0, 0xc7, 0x0d, 0x04,
+ 0x08, 0x4e, 0xa3, 0x03, 0x8b, 0x80, 0xc8, 0x4b, 0x94, 0x08, 0x4e, 0xe8,
+ 0xc7, 0x0d, 0x04, 0x08, 0x4e, 0x9b, 0x03, 0x8b, 0x86, 0xc8, 0x4b, 0x94,
+ 0x08, 0x4e, 0xe0, 0x98, 0x00, 0xed, 0xd1, 0x8f, 0x00, 0xea, 0xd3, 0x03,
+ 0x8b, 0x8c, 0x8a, 0x00, 0xed, 0x19, 0x83, 0x00, 0xea, 0x23, 0x03, 0x8b,
+ 0x92, 0x8b, 0x00, 0xea, 0x71, 0xc6, 0x21, 0xa3, 0x00, 0xea, 0x61, 0x99,
+ 0x05, 0x5b, 0x49, 0x94, 0x00, 0x15, 0xa3, 0x03, 0x8b, 0x9c, 0x9b, 0x08,
+ 0x3d, 0x02, 0x03, 0x8b, 0xa2, 0xcc, 0x51, 0x28, 0x00, 0xed, 0xa9, 0xce,
+ 0x01, 0x19, 0x08, 0x3d, 0x78, 0xd4, 0x01, 0x13, 0x08, 0x3d, 0x68, 0xc4,
+ 0x00, 0x32, 0x00, 0xed, 0xe9, 0xce, 0x01, 0x19, 0x00, 0xed, 0xe0, 0xc4,
+ 0x01, 0x23, 0x00, 0xed, 0xc9, 0xca, 0x9f, 0x4a, 0x08, 0x3d, 0x80, 0x97,
+ 0x00, 0xed, 0xc1, 0x90, 0x00, 0xed, 0x81, 0x8e, 0x00, 0xed, 0x5b, 0x03,
+ 0x8b, 0xa8, 0x8b, 0x00, 0xed, 0x33, 0x03, 0x8b, 0xae, 0x84, 0x08, 0x3c,
+ 0x21, 0xc2, 0x04, 0xc6, 0x08, 0x3c, 0x01, 0x9b, 0x08, 0x3d, 0x91, 0x89,
+ 0x08, 0x3c, 0x93, 0x03, 0x8b, 0xba, 0x8a, 0x08, 0x3c, 0xb1, 0xc2, 0x49,
+ 0x0c, 0x08, 0x3d, 0x19, 0x94, 0x08, 0x3d, 0x50, 0xcf, 0x61, 0xe3, 0x08,
+ 0x3c, 0x79, 0xc5, 0x9b, 0xd5, 0x08, 0x3d, 0x20, 0xc3, 0x01, 0x5d, 0x00,
+ 0xed, 0xb1, 0xce, 0x6d, 0x40, 0x05, 0x5a, 0xf8, 0xc4, 0x01, 0x23, 0x00,
+ 0xed, 0x99, 0xc4, 0x00, 0x32, 0x08, 0x3d, 0xd0, 0xc6, 0xbb, 0x8c, 0x00,
+ 0xed, 0x11, 0xc3, 0x74, 0x83, 0x00, 0xea, 0x50, 0xcc, 0x51, 0x28, 0x00,
+ 0xed, 0x51, 0xce, 0x01, 0x19, 0x00, 0xed, 0x4b, 0x03, 0x8b, 0xc0, 0xcc,
+ 0x1e, 0xc1, 0x05, 0x5a, 0xf1, 0xcf, 0x68, 0x64, 0x05, 0x5a, 0xe9, 0xc4,
+ 0xa8, 0x1a, 0x08, 0x3c, 0xd8, 0xd4, 0x01, 0x13, 0x08, 0x3c, 0xf8, 0xc9,
+ 0x20, 0xb1, 0x08, 0x3c, 0xc0, 0xc3, 0x80, 0x9f, 0x00, 0xea, 0xf9, 0xca,
+ 0x9a, 0x86, 0x08, 0x3c, 0x50, 0xc4, 0x01, 0x23, 0x08, 0x3c, 0x63, 0x03,
+ 0x8b, 0xc6, 0xc4, 0x14, 0xa6, 0x08, 0x3c, 0x58, 0x46, 0x00, 0x8b, 0x43,
+ 0x8b, 0xcc, 0xc6, 0x21, 0xa3, 0x00, 0xec, 0xf9, 0x87, 0x08, 0x3c, 0x71,
+ 0xcc, 0x23, 0x33, 0x00, 0x17, 0x20, 0xc4, 0x14, 0xa6, 0x08, 0x3d, 0x41,
+ 0xc8, 0x61, 0x72, 0x08, 0x3d, 0x48, 0xc3, 0x1c, 0x8d, 0x00, 0xeb, 0x01,
+ 0xc5, 0x51, 0x51, 0x00, 0xea, 0xf0, 0x91, 0x00, 0xea, 0x99, 0x87, 0x00,
+ 0xea, 0x58, 0xca, 0x1f, 0x59, 0x08, 0x3c, 0xb8, 0xc4, 0x01, 0x23, 0x00,
+ 0x15, 0x89, 0xc6, 0x01, 0x73, 0x08, 0x3c, 0xa8, 0x90, 0x00, 0xe9, 0xd9,
+ 0x87, 0x00, 0xe9, 0x90, 0xcc, 0x23, 0x3f, 0x08, 0x3d, 0xa0, 0x45, 0x19,
+ 0x7c, 0xc3, 0x8b, 0xd8, 0xcc, 0x3e, 0xe6, 0x00, 0x17, 0x78, 0xce, 0x4e,
+ 0x8d, 0x05, 0x38, 0xa9, 0xc6, 0x01, 0xa1, 0x00, 0x17, 0xfa, 0x03, 0x8b,
+ 0xe4, 0xc7, 0x4e, 0x94, 0x00, 0x17, 0x41, 0xc4, 0x1e, 0xc9, 0x00, 0x17,
+ 0xb8, 0xcd, 0x2f, 0xa1, 0x00, 0x17, 0x91, 0xc2, 0x00, 0x75, 0x00, 0x17,
+ 0x98, 0x47, 0x19, 0x7a, 0xc3, 0x8b, 0xea, 0xd2, 0x4e, 0x89, 0x05, 0x38,
+ 0xa1, 0xc8, 0x4e, 0x93, 0x00, 0x17, 0x38, 0xcc, 0x1f, 0x0c, 0x00, 0x17,
+ 0xa1, 0x47, 0x00, 0x58, 0x43, 0x8b, 0xf6, 0xc8, 0x4e, 0x93, 0x05, 0x38,
+ 0x41, 0xd2, 0x4e, 0x89, 0x05, 0x38, 0x68, 0xc8, 0x4e, 0x93, 0x05, 0x38,
+ 0x61, 0xd2, 0x4e, 0x89, 0x05, 0x38, 0x88, 0x0f, 0x43, 0x8c, 0x02, 0xc2,
+ 0x00, 0xba, 0x0e, 0xbe, 0x09, 0xc2, 0x00, 0x0a, 0x0e, 0xbd, 0xf9, 0x8b,
+ 0x0e, 0xbd, 0xc8, 0xc2, 0x00, 0x0a, 0x0e, 0xbe, 0x00, 0xc6, 0x10, 0x3f,
+ 0x0e, 0xbd, 0xf0, 0xc2, 0x20, 0xec, 0x0e, 0xbd, 0xe9, 0xc4, 0x89, 0xfe,
+ 0x0e, 0xbd, 0x88, 0xc4, 0x1a, 0x73, 0x0e, 0xbd, 0xe0, 0xca, 0x91, 0x2c,
+ 0x0e, 0xbd, 0xd8, 0xc2, 0x01, 0x23, 0x0e, 0xbd, 0xd0, 0x8b, 0x0e, 0xbd,
+ 0xb8, 0x97, 0x0e, 0xbd, 0xb0, 0x97, 0x0e, 0xbd, 0xa8, 0xc4, 0xdd, 0x9a,
+ 0x0e, 0xbd, 0xa0, 0xc4, 0x8b, 0x66, 0x0e, 0xbd, 0x98, 0xc3, 0x01, 0xbb,
+ 0x0e, 0xbd, 0x90, 0xc2, 0x01, 0x6f, 0x0e, 0xbd, 0x81, 0xc6, 0x10, 0x3f,
+ 0x0e, 0xbd, 0x70, 0xc3, 0x04, 0x87, 0x0e, 0xbd, 0x78, 0xc4, 0xdb, 0x4c,
+ 0x0e, 0xbd, 0x68, 0xc4, 0x38, 0x2c, 0x0e, 0xbd, 0x60, 0xc3, 0x04, 0x87,
+ 0x0e, 0xbd, 0x58, 0xc4, 0xde, 0x3f, 0x0e, 0xbd, 0x50, 0x0f, 0x43, 0x8c,
+ 0x0e, 0xc2, 0x00, 0xba, 0x0e, 0xbd, 0x39, 0xc2, 0x00, 0x0a, 0x0e, 0xbd,
+ 0x29, 0x8b, 0x0e, 0xbc, 0xf8, 0xc2, 0x00, 0x0a, 0x0e, 0xbd, 0x30, 0xc6,
+ 0x10, 0x3f, 0x0e, 0xbd, 0x20, 0xc2, 0x20, 0xec, 0x0e, 0xbd, 0x19, 0xc4,
+ 0x89, 0xfe, 0x0e, 0xbc, 0xba, 0x03, 0x8c, 0x1a, 0xc4, 0x1a, 0x73, 0x0e,
+ 0xbd, 0x10, 0xc2, 0x01, 0x23, 0x0e, 0xbd, 0x00, 0x8b, 0x0e, 0xbc, 0xe8,
+ 0x97, 0x0e, 0xbc, 0xe0, 0x97, 0x0e, 0xbc, 0xd8, 0xc4, 0xdd, 0x9a, 0x0e,
+ 0xbc, 0xd0, 0xc4, 0x8b, 0x66, 0x0e, 0xbc, 0xc8, 0xc3, 0x01, 0xbb, 0x0e,
+ 0xbc, 0xc0, 0xc2, 0x01, 0x6f, 0x0e, 0xbc, 0xb1, 0xc6, 0x10, 0x3f, 0x0e,
+ 0xbc, 0xa0, 0xc3, 0x04, 0x87, 0x0e, 0xbc, 0xa8, 0xc4, 0xdb, 0x4c, 0x0e,
+ 0xbc, 0x98, 0xc4, 0x38, 0x2c, 0x0e, 0xbc, 0x90, 0xc3, 0x04, 0x87, 0x0e,
+ 0xbc, 0x88, 0xc4, 0xde, 0x3f, 0x0e, 0xbc, 0x80, 0xc3, 0x11, 0x7e, 0x0e,
+ 0xbc, 0x41, 0xc5, 0xd8, 0x8f, 0x0e, 0xbb, 0xf0, 0xc3, 0x11, 0x7e, 0x0e,
+ 0xbb, 0x71, 0xc5, 0xd8, 0x8f, 0x0e, 0xbb, 0x20, 0xc7, 0x00, 0x90, 0x0e,
+ 0xbb, 0x38, 0x8e, 0x00, 0x6a, 0xb0, 0xc8, 0xb3, 0xb1, 0x0e, 0x8f, 0x41,
+ 0xc9, 0xaf, 0xae, 0x0e, 0x8f, 0x00, 0x50, 0x59, 0xd2, 0xc3, 0x8c, 0x20,
+ 0xcb, 0x94, 0xdd, 0x0e, 0x8e, 0xf8, 0xc2, 0x02, 0xae, 0x0e, 0x8f, 0x29,
+ 0xc4, 0x03, 0xc8, 0x0e, 0x8f, 0x20, 0xc5, 0x02, 0xc2, 0x0e, 0x8a, 0x39,
+ 0xc5, 0x01, 0xfc, 0x0e, 0x8a, 0x30, 0x47, 0xc3, 0x53, 0xc3, 0x8c, 0x2c,
+ 0x47, 0xc6, 0x94, 0x43, 0x8c, 0x3e, 0x16, 0xc3, 0x8c, 0x50, 0x02, 0x43,
+ 0x8c, 0x5c, 0xc4, 0x03, 0xc8, 0x0e, 0x89, 0x89, 0xc2, 0x02, 0xae, 0x0e,
+ 0x89, 0x80, 0xc7, 0xc4, 0x9c, 0x0e, 0x8d, 0x79, 0xc4, 0x01, 0xc3, 0x0e,
+ 0x8d, 0x70, 0xc7, 0xc8, 0xe7, 0x0e, 0x8e, 0xd0, 0xca, 0x68, 0x19, 0x0e,
+ 0x8e, 0x5b, 0x03, 0x8c, 0x68, 0xc8, 0x68, 0x1b, 0x0e, 0x8e, 0x50, 0xc8,
+ 0x68, 0x1b, 0x0e, 0x8e, 0x3b, 0x03, 0x8c, 0x6e, 0xca, 0x68, 0x19, 0x0e,
+ 0x8e, 0x40, 0xc2, 0x02, 0xae, 0x0e, 0x8c, 0xd1, 0xc5, 0x03, 0x02, 0x0e,
+ 0x8c, 0xc8, 0x55, 0x32, 0x96, 0xc3, 0x8c, 0x74, 0x4a, 0x32, 0x9c, 0x43,
+ 0x8c, 0x80, 0xc4, 0x23, 0x2e, 0x0e, 0x8b, 0x11, 0xc4, 0x2c, 0x0d, 0x0e,
+ 0x8a, 0x00, 0xc5, 0xdb, 0xeb, 0x0e, 0x8e, 0xb9, 0xc3, 0x30, 0xf3, 0x0e,
+ 0x8e, 0xa8, 0xc5, 0x02, 0xc2, 0x0e, 0x8a, 0xd9, 0xc5, 0x01, 0xfc, 0x0e,
+ 0x8a, 0xd0, 0x47, 0x1d, 0xd4, 0xc3, 0x8c, 0x98, 0xc8, 0xb9, 0x62, 0x0e,
+ 0x89, 0xa0, 0xc6, 0xd1, 0xe1, 0x0e, 0x8e, 0x89, 0xc6, 0xcb, 0x39, 0x0e,
+ 0x8e, 0x80, 0xc8, 0xbc, 0x72, 0x0e, 0x8c, 0xa9, 0xc5, 0x03, 0x02, 0x0e,
+ 0x8c, 0xa0, 0xc5, 0xd7, 0x6d, 0x0e, 0x89, 0x01, 0xc4, 0xe2, 0x4b, 0x0e,
+ 0x88, 0xf8, 0xc4, 0x2c, 0x0d, 0x0e, 0x8e, 0x29, 0xc5, 0x02, 0xc2, 0x0e,
+ 0x8d, 0xe0, 0x18, 0xc3, 0x8c, 0xd7, 0xc8, 0xbe, 0x42, 0x0e, 0x88, 0x90,
+ 0xc3, 0x00, 0x3c, 0x0e, 0x88, 0xa9, 0x87, 0x0e, 0x88, 0xa0, 0xcf, 0x68,
+ 0x19, 0x0e, 0x8e, 0x11, 0xcd, 0x68, 0x1b, 0x0e, 0x8e, 0x08, 0xd0, 0x5b,
+ 0x02, 0x0e, 0x88, 0xe9, 0xca, 0x74, 0x98, 0x0e, 0x88, 0xc8, 0x4e, 0x6d,
+ 0xbe, 0xc3, 0x8c, 0xe4, 0xca, 0x44, 0x39, 0x0e, 0x88, 0x10, 0xc5, 0xd7,
+ 0x6d, 0x0e, 0x89, 0x21, 0xc4, 0xe2, 0x4b, 0x0e, 0x89, 0x18, 0xc4, 0x63,
+ 0xf2, 0x0e, 0x8d, 0xa8, 0x9e, 0x0e, 0x8d, 0x29, 0x9d, 0x0e, 0x8d, 0x20,
+ 0xc4, 0x23, 0x2e, 0x0e, 0x8b, 0x21, 0xc4, 0x2c, 0x0d, 0x0e, 0x8a, 0x10,
+ 0x4a, 0xa1, 0xde, 0xc3, 0x8c, 0xf0, 0xc5, 0x02, 0xa2, 0x0e, 0x88, 0x40,
+ 0xc4, 0x35, 0x36, 0x0e, 0x89, 0x99, 0xc5, 0xa2, 0xba, 0x0e, 0x89, 0x90,
+ 0xc3, 0x38, 0x5b, 0x00, 0xcf, 0xc9, 0xc4, 0xe0, 0xaf, 0x00, 0xcf, 0x48,
+ 0xc3, 0x38, 0x5b, 0x00, 0xcf, 0xc1, 0xc4, 0xe0, 0xaf, 0x00, 0xcf, 0x40,
+ 0xc3, 0xdf, 0x37, 0x00, 0xbf, 0xc9, 0xc2, 0x06, 0xdb, 0x00, 0xbf, 0xc0,
+ 0xd3, 0x45, 0x4d, 0x0f, 0xd1, 0x91, 0xcf, 0x18, 0x0f, 0x0f, 0xd2, 0x18,
+ 0xd0, 0x3c, 0x90, 0x01, 0x49, 0x71, 0xd0, 0x3c, 0x2c, 0x01, 0x49, 0x88,
+ 0xc6, 0x13, 0x66, 0x01, 0x0f, 0x89, 0xc8, 0xb8, 0xca, 0x01, 0x0d, 0xc0,
+ 0x46, 0x00, 0x8b, 0x43, 0x8c, 0xfc, 0x46, 0x00, 0x8b, 0x43, 0x8d, 0x1b,
+ 0xc4, 0xe3, 0xab, 0x00, 0xff, 0x59, 0x18, 0xc3, 0x8d, 0x3f, 0xc6, 0x60,
+ 0xb1, 0x00, 0xff, 0x49, 0x06, 0xc3, 0x8d, 0x4b, 0xc5, 0x63, 0xdc, 0x00,
+ 0x1c, 0x70, 0xc4, 0xe3, 0xab, 0x00, 0xfe, 0xd9, 0x18, 0xc3, 0x8d, 0x5a,
+ 0xc6, 0x60, 0xb1, 0x00, 0xfe, 0xc9, 0x06, 0xc3, 0x8d, 0x66, 0xc5, 0xd8,
+ 0xc1, 0x00, 0xf9, 0xc3, 0x03, 0x8d, 0x75, 0xc5, 0x63, 0xdc, 0x00, 0x1c,
+ 0x50, 0x46, 0x00, 0x8b, 0x43, 0x8d, 0x7b, 0x46, 0x00, 0x8b, 0x43, 0x8d,
+ 0x9a, 0x46, 0x00, 0x8b, 0x43, 0x8d, 0xbe, 0x46, 0x00, 0x8b, 0x43, 0x8d,
+ 0xe1, 0x46, 0x00, 0x8b, 0x43, 0x8e, 0x0c, 0x06, 0xc3, 0x8e, 0x30, 0x12,
+ 0xc3, 0x8e, 0x42, 0xc6, 0x60, 0xb1, 0x00, 0xff, 0x09, 0x18, 0xc3, 0x8e,
+ 0x51, 0xc4, 0xe3, 0xab, 0x00, 0xfb, 0xd9, 0xc5, 0x63, 0xdc, 0x00, 0x1e,
+ 0x68, 0xc5, 0x6c, 0xa6, 0x00, 0xff, 0x29, 0xc5, 0xd8, 0xc1, 0x00, 0xff,
+ 0x20, 0x06, 0xc3, 0x8e, 0x5d, 0x12, 0xc3, 0x8e, 0x6f, 0xc6, 0x60, 0xb1,
+ 0x00, 0xfe, 0x89, 0x18, 0xc3, 0x8e, 0x7e, 0xc4, 0xe3, 0xab, 0x00, 0xfb,
+ 0xb9, 0xc5, 0x63, 0xdc, 0x00, 0x1d, 0x78, 0x46, 0x00, 0x8b, 0x43, 0x8e,
+ 0x8a, 0x46, 0x00, 0x8b, 0x43, 0x8e, 0xb5, 0x46, 0x00, 0x8b, 0x43, 0x8e,
+ 0xd9, 0xc5, 0x78, 0xc7, 0x00, 0x1e, 0xc9, 0xc5, 0x87, 0xf4, 0x00, 0x1b,
+ 0x98, 0x90, 0x00, 0x1f, 0xd9, 0xc3, 0x87, 0xf6, 0x00, 0x1f, 0x08, 0xc2,
+ 0x00, 0xba, 0x00, 0xe9, 0x51, 0x8b, 0x00, 0xe9, 0x40, 0xc3, 0x01, 0xcf,
+ 0x08, 0x0a, 0x09, 0x47, 0x0d, 0x05, 0x43, 0x8f, 0x05, 0xc7, 0xb9, 0xdb,
+ 0x08, 0x0a, 0x69, 0xc7, 0x67, 0xc7, 0x08, 0x0a, 0xa0, 0x00, 0x43, 0x8f,
+ 0x11, 0x00, 0x43, 0x8f, 0x24, 0xc6, 0xb9, 0xdc, 0x08, 0x0a, 0x49, 0xcf,
+ 0x67, 0xbf, 0x08, 0x0a, 0xa8, 0x00, 0x43, 0x8f, 0x2e, 0xc2, 0x02, 0xa0,
+ 0x08, 0x0a, 0xe1, 0xc2, 0x00, 0xc4, 0x08, 0x0b, 0x21, 0x0a, 0x43, 0x8f,
+ 0x3a, 0xc3, 0x45, 0x6b, 0x08, 0x0b, 0x49, 0x43, 0x00, 0xc7, 0x43, 0x8f,
+ 0x46, 0xc2, 0x00, 0x5f, 0x08, 0x0a, 0xfb, 0x03, 0x8f, 0x52, 0xc3, 0x45,
+ 0x6b, 0x08, 0x0b, 0x32, 0x03, 0x8f, 0x58, 0xcf, 0x6b, 0x25, 0x08, 0x0b,
+ 0x08, 0xd3, 0x41, 0x12, 0x08, 0x78, 0xe0, 0xd3, 0x41, 0x12, 0x08, 0x78,
+ 0xb8, 0xd3, 0x41, 0x12, 0x08, 0x78, 0x80, 0xc3, 0x77, 0x79, 0x08, 0x78,
+ 0xa9, 0xc4, 0xdc, 0x2d, 0x08, 0x78, 0x88, 0xcc, 0x85, 0xdd, 0x08, 0x78,
+ 0x99, 0xc3, 0x36, 0xb6, 0x08, 0x78, 0x00, 0xc2, 0xe5, 0xfd, 0x08, 0x1e,
+ 0x49, 0xc2, 0x00, 0xd0, 0x08, 0x1e, 0x50, 0xc7, 0xc1, 0x8c, 0x08, 0x1e,
+ 0x62, 0x03, 0x8f, 0x5e, 0xc2, 0x01, 0x30, 0x08, 0x1e, 0x70, 0x91, 0x08,
+ 0x1e, 0x91, 0xc4, 0x18, 0x12, 0x08, 0x1e, 0xa0, 0xc7, 0xca, 0x06, 0x0e,
+ 0x7d, 0xf1, 0x44, 0xe0, 0x6b, 0xc3, 0x8f, 0x64, 0xc9, 0x92, 0x8d, 0x0e,
+ 0x7d, 0xb0, 0xd0, 0x58, 0xe2, 0x0e, 0x7d, 0x21, 0xd0, 0x2d, 0x10, 0x0e,
+ 0x7d, 0x08, 0xcb, 0x93, 0xeb, 0x0e, 0x7c, 0x79, 0xc7, 0x78, 0xdb, 0x0e,
+ 0x7c, 0x48, 0x87, 0x00, 0xb3, 0x50, 0x87, 0x00, 0xb1, 0xb8, 0x8b, 0x00,
+ 0xa7, 0x08, 0x91, 0x00, 0xa7, 0x28, 0x83, 0x00, 0xa7, 0x48, 0x8b, 0x00,
+ 0xa2, 0xe0, 0x91, 0x00, 0xa3, 0x00, 0x83, 0x00, 0xa3, 0x20, 0x83, 0x00,
+ 0xa9, 0xe0, 0x91, 0x00, 0xa9, 0xc0, 0x8b, 0x00, 0xa9, 0xa0, 0x83, 0x00,
+ 0xa9, 0x20, 0x8b, 0x00, 0xa8, 0xe0, 0x91, 0x00, 0xa9, 0x00, 0x83, 0x00,
+ 0xa8, 0x18, 0x8b, 0x00, 0xa7, 0xd8, 0x91, 0x00, 0xa7, 0xf8, 0x83, 0x00,
+ 0xa2, 0x38, 0x91, 0x00, 0xa2, 0x18, 0x8b, 0x00, 0xa1, 0xf8, 0x8b, 0x00,
+ 0xa5, 0x88, 0x91, 0x00, 0xa5, 0xa8, 0x83, 0x00, 0xa5, 0xc8, 0x83, 0x00,
+ 0xb3, 0xe8, 0x91, 0x00, 0xb3, 0xd8, 0x8b, 0x00, 0xb3, 0xc8, 0x43, 0x02,
+ 0x9c, 0xc3, 0x8f, 0x71, 0xc4, 0x00, 0xd5, 0x00, 0x1a, 0x80, 0x96, 0x01,
+ 0x66, 0xa8, 0x96, 0x01, 0x66, 0xa0, 0xcd, 0x0d, 0xad, 0x01, 0x92, 0x49,
+ 0x87, 0x01, 0x92, 0x88, 0xc2, 0x02, 0xa0, 0x01, 0x92, 0x91, 0xc4, 0x02,
+ 0xde, 0x01, 0x92, 0x98, 0xc3, 0x09, 0x9e, 0x01, 0x92, 0xa1, 0xc3, 0x0d,
+ 0x14, 0x01, 0x92, 0xa8, 0xc2, 0x22, 0xcc, 0x01, 0x92, 0xb1, 0xc4, 0x18,
+ 0x10, 0x01, 0x92, 0xb8, 0xcd, 0x0d, 0xad, 0x01, 0x92, 0x51, 0x87, 0x01,
+ 0x92, 0xd8, 0xc2, 0x02, 0xa0, 0x01, 0x92, 0xe1, 0xc4, 0x02, 0xde, 0x01,
+ 0x92, 0xe8, 0xc3, 0x09, 0x9e, 0x01, 0x92, 0xf1, 0xc3, 0x0d, 0x14, 0x01,
+ 0x92, 0xf8, 0xc2, 0x22, 0xcc, 0x01, 0x95, 0x89, 0xc4, 0x18, 0x10, 0x01,
+ 0x95, 0x90, 0xcd, 0x0d, 0xad, 0x01, 0x92, 0x59, 0x87, 0x01, 0x95, 0xb0,
+ 0xc2, 0x02, 0xa0, 0x01, 0x95, 0xb9, 0xc4, 0x02, 0xde, 0x01, 0x95, 0xc0,
+ 0xc3, 0x09, 0x9e, 0x01, 0x95, 0xc9, 0xc3, 0x0d, 0x14, 0x01, 0x95, 0xd0,
+ 0xc2, 0x22, 0xcc, 0x01, 0x95, 0xd9, 0xc4, 0x18, 0x10, 0x01, 0x95, 0xe0,
+ 0x46, 0x25, 0xd4, 0x43, 0x8f, 0x7d, 0xc2, 0x00, 0xc1, 0x09, 0x19, 0x69,
+ 0xc2, 0x00, 0xd0, 0x09, 0x19, 0x60, 0xc9, 0xb4, 0x6d, 0x09, 0x29, 0x79,
+ 0xc2, 0x02, 0xfb, 0x09, 0x15, 0x00, 0x8e, 0x09, 0x29, 0x21, 0x86, 0x09,
+ 0x12, 0xb0, 0xc2, 0x01, 0xe2, 0x09, 0x29, 0x18, 0xc2, 0x01, 0xe2, 0x09,
+ 0x12, 0xe3, 0x03, 0x8f, 0x89, 0xc3, 0x01, 0xb2, 0x09, 0x12, 0xd8, 0xc9,
+ 0x40, 0xaa, 0x09, 0x12, 0xa8, 0xc8, 0xb5, 0x8a, 0x09, 0x11, 0xd8, 0xc3,
+ 0x38, 0xb5, 0x09, 0x28, 0xf1, 0xc3, 0x0b, 0x47, 0x09, 0x10, 0x80, 0xd2,
+ 0x36, 0x5f, 0x09, 0x28, 0xe8, 0xc2, 0x00, 0x65, 0x09, 0x28, 0xd9, 0xcb,
+ 0x8d, 0x2c, 0x09, 0x10, 0x18, 0xc2, 0x06, 0x47, 0x09, 0x1c, 0x59, 0x0b,
+ 0x43, 0x8f, 0x8f, 0x00, 0x43, 0x8f, 0x9b, 0x97, 0x09, 0x10, 0x69, 0x87,
+ 0x09, 0x10, 0x60, 0xc3, 0x03, 0x49, 0x09, 0x10, 0x51, 0xc9, 0x40, 0xaa,
+ 0x09, 0x10, 0x48, 0x8b, 0x09, 0x10, 0x41, 0x42, 0x01, 0x9d, 0x43, 0x8f,
+ 0xa7, 0xc4, 0xdc, 0xae, 0x09, 0x28, 0xb1, 0x86, 0x09, 0x28, 0xa8, 0xc5,
+ 0x39, 0xc7, 0x09, 0x28, 0x88, 0xc4, 0xdc, 0xae, 0x09, 0x28, 0x59, 0x86,
+ 0x09, 0x28, 0x51, 0x9f, 0x09, 0x28, 0x48, 0x87, 0x09, 0x28, 0x41, 0xc2,
+ 0x00, 0xb1, 0x09, 0x28, 0x38, 0xca, 0xa6, 0xfc, 0x09, 0x27, 0xb1, 0x49,
+ 0x36, 0x5c, 0xc3, 0x8f, 0xb2, 0xc3, 0x04, 0x2a, 0x09, 0x27, 0x99, 0xc2,
+ 0x08, 0x6d, 0x09, 0x27, 0x90, 0x8b, 0x09, 0x1c, 0x41, 0xc2, 0x04, 0x3d,
+ 0x09, 0x0e, 0x33, 0x03, 0x8f, 0xbe, 0x83, 0x09, 0x0e, 0x22, 0x03, 0x8f,
+ 0xc4, 0xc2, 0x01, 0xe2, 0x09, 0x0f, 0x51, 0x86, 0x09, 0x0f, 0x49, 0xca,
+ 0xa0, 0xb2, 0x09, 0x0f, 0x41, 0x46, 0x25, 0xd4, 0x43, 0x8f, 0xc8, 0xd8,
+ 0x25, 0xd3, 0x09, 0x0f, 0x21, 0x03, 0x43, 0x8f, 0xd2, 0xc2, 0x01, 0xdf,
+ 0x09, 0x0f, 0x09, 0x0a, 0x43, 0x8f, 0xdc, 0xc3, 0x5d, 0xd1, 0x09, 0x0e,
+ 0xd1, 0x87, 0x09, 0x0e, 0xc2, 0x03, 0x8f, 0xf1, 0x97, 0x09, 0x0e, 0xb3,
+ 0x03, 0x8f, 0xf7, 0xc3, 0x04, 0x5a, 0x09, 0x0e, 0xa9, 0xc4, 0x03, 0x48,
+ 0x09, 0x0e, 0xa0, 0x17, 0xc3, 0x8f, 0xfb, 0x8b, 0x09, 0x0e, 0x7a, 0x03,
+ 0x90, 0x06, 0x8f, 0x09, 0x0e, 0x63, 0x03, 0x90, 0x0a, 0xc7, 0x6a, 0x1f,
+ 0x09, 0x0e, 0x58, 0xcb, 0x8d, 0x21, 0x09, 0x0e, 0x51, 0x83, 0x09, 0x0e,
+ 0x42, 0x03, 0x90, 0x10, 0x8b, 0x09, 0x0e, 0x09, 0xc2, 0x01, 0x9d, 0x09,
+ 0x0e, 0x00, 0xcc, 0x83, 0xa9, 0x09, 0x0d, 0xf9, 0x90, 0x09, 0x0d, 0xf1,
+ 0x8e, 0x09, 0x0d, 0xe9, 0x46, 0x25, 0xd4, 0x43, 0x90, 0x14, 0xcd, 0x47,
+ 0xaa, 0x09, 0x0b, 0x51, 0xc8, 0x54, 0x29, 0x09, 0x0b, 0x48, 0xd2, 0x47,
+ 0xa5, 0x09, 0x26, 0x59, 0xc4, 0x38, 0xb4, 0x09, 0x08, 0xa1, 0xc3, 0x62,
+ 0x19, 0x09, 0x08, 0x98, 0x0b, 0xc3, 0x90, 0x26, 0x87, 0x09, 0x07, 0x2a,
+ 0x03, 0x90, 0x2e, 0x94, 0x09, 0x07, 0x21, 0x8e, 0x09, 0x07, 0x18, 0x46,
+ 0x25, 0xd4, 0x43, 0x90, 0x34, 0xc9, 0x20, 0x12, 0x09, 0x07, 0x08, 0x8f,
+ 0x09, 0x26, 0x02, 0x03, 0x90, 0x40, 0xd0, 0x5d, 0xd2, 0x09, 0x25, 0xf9,
+ 0xc9, 0xaa, 0x17, 0x09, 0x06, 0xe0, 0xc9, 0xaa, 0xef, 0x09, 0x06, 0xd8,
+ 0xc4, 0x45, 0xaf, 0x09, 0x06, 0xc9, 0x8d, 0x09, 0x06, 0xc0, 0x46, 0x25,
+ 0xd4, 0xc3, 0x90, 0x46, 0x8e, 0x09, 0x06, 0x92, 0x03, 0x90, 0x50, 0x94,
+ 0x09, 0x06, 0x63, 0x03, 0x90, 0x56, 0xc7, 0x5d, 0x9b, 0x09, 0x06, 0x58,
+ 0xca, 0x9c, 0x66, 0x09, 0x06, 0x81, 0xa1, 0x09, 0x06, 0x72, 0x03, 0x90,
+ 0x5c, 0xd0, 0x5d, 0x92, 0x09, 0x06, 0x50, 0xc8, 0xaa, 0xef, 0x09, 0x06,
+ 0x40, 0x48, 0x6c, 0xd6, 0xc3, 0x90, 0x62, 0x84, 0x09, 0x06, 0x30, 0x42,
+ 0x00, 0x47, 0x43, 0x90, 0x6e, 0xc4, 0x38, 0x68, 0x09, 0x25, 0xb1, 0xc9,
+ 0xaa, 0x5f, 0x09, 0x06, 0x01, 0x86, 0x09, 0x05, 0xf8, 0xc8, 0xaa, 0x60,
+ 0x09, 0x06, 0x10, 0x9f, 0x09, 0x1b, 0xd2, 0x03, 0x90, 0x7a, 0xd0, 0x5b,
+ 0x42, 0x09, 0x1b, 0xc8, 0xc3, 0x04, 0x2a, 0x09, 0x05, 0xd1, 0xc2, 0x00,
+ 0xd0, 0x09, 0x05, 0xc9, 0xca, 0xa4, 0x4a, 0x09, 0x05, 0xc0, 0xc8, 0xb5,
+ 0x92, 0x09, 0x07, 0x60, 0xca, 0x51, 0xd4, 0x09, 0x25, 0x00, 0xcc, 0x5d,
+ 0xd6, 0x09, 0x24, 0xe8, 0xc4, 0x4a, 0x0f, 0x09, 0x1b, 0x99, 0xc4, 0xe0,
+ 0x5f, 0x09, 0x03, 0x60, 0x8f, 0x09, 0x03, 0x39, 0xcb, 0x97, 0xbe, 0x09,
+ 0x03, 0x30, 0xc2, 0x38, 0x6a, 0x09, 0x02, 0xf0, 0xca, 0x97, 0xbe, 0x09,
+ 0x02, 0xe0, 0x00, 0x43, 0x90, 0x80, 0x00, 0x43, 0x90, 0xa4, 0x14, 0xc3,
+ 0x90, 0xd8, 0xc6, 0x13, 0x95, 0x0e, 0xc6, 0x61, 0x46, 0x0e, 0xce, 0xc3,
+ 0x90, 0xe4, 0xc2, 0x02, 0xae, 0x0e, 0xc6, 0x33, 0x03, 0x90, 0xfa, 0xc4,
+ 0x03, 0xc8, 0x0e, 0xc6, 0x21, 0xcf, 0x62, 0x2e, 0x0e, 0xc0, 0xe0, 0xc5,
+ 0x0e, 0xce, 0x0e, 0xc5, 0xc1, 0xc5, 0x06, 0x82, 0x0e, 0xc5, 0xb9, 0xc6,
+ 0x04, 0xcb, 0x0e, 0xc5, 0xa3, 0x03, 0x91, 0x00, 0xc6, 0x13, 0x95, 0x0e,
+ 0xc5, 0x81, 0xce, 0x3a, 0x9d, 0x0e, 0xc5, 0x79, 0xc2, 0x02, 0xae, 0x0e,
+ 0xc5, 0x71, 0xc4, 0x03, 0xc8, 0x0e, 0xc5, 0x58, 0xc5, 0x06, 0x82, 0x0e,
+ 0xc5, 0x03, 0x03, 0x91, 0x04, 0x16, 0xc3, 0x91, 0x0a, 0xc4, 0x18, 0xf2,
+ 0x0e, 0xc4, 0xc1, 0xce, 0x3a, 0x9d, 0x0e, 0xc4, 0xb9, 0xc2, 0x02, 0xae,
+ 0x0e, 0xc4, 0x91, 0xc4, 0x03, 0xc8, 0x0e, 0xc4, 0x72, 0x03, 0x91, 0x16,
+ 0xc6, 0x13, 0x95, 0x0e, 0xc3, 0x29, 0xc6, 0x04, 0xe1, 0x0e, 0xc3, 0x13,
+ 0x03, 0x91, 0x1a, 0xd0, 0x5a, 0x02, 0x0e, 0xc3, 0x08, 0xc7, 0x27, 0xb2,
+ 0x0e, 0xc3, 0x01, 0xc4, 0x18, 0xf2, 0x0e, 0xc2, 0xf9, 0xc4, 0x0e, 0xe2,
+ 0x0e, 0xc2, 0xe8, 0x00, 0x43, 0x91, 0x23, 0xd2, 0x26, 0x32, 0x0e, 0xc2,
+ 0x63, 0x03, 0x91, 0x32, 0xcb, 0x18, 0xdc, 0x0e, 0xc2, 0x22, 0x03, 0x91,
+ 0x36, 0xc5, 0x0e, 0xce, 0x0e, 0xc7, 0xa3, 0x03, 0x91, 0x3a, 0xcb, 0x13,
+ 0x90, 0x0e, 0xc6, 0x1b, 0x03, 0x91, 0x3e, 0x47, 0x04, 0xcb, 0x43, 0x91,
+ 0x44, 0xc2, 0x00, 0x74, 0x0e, 0xc6, 0x99, 0xc3, 0x00, 0xa3, 0x0e, 0xc6,
+ 0x90, 0xd2, 0x4c, 0x6d, 0x0e, 0xc4, 0xfa, 0x03, 0x91, 0x50, 0x00, 0x43,
+ 0x91, 0x56, 0xcc, 0x13, 0x8f, 0x0e, 0xc6, 0x88, 0xdd, 0x11, 0xa8, 0x0e,
+ 0xc5, 0x60, 0x00, 0x43, 0x91, 0x71, 0xd3, 0x40, 0xff, 0x0e, 0xc4, 0x21,
+ 0xc4, 0x0e, 0xe2, 0x0e, 0xc4, 0x02, 0x03, 0x91, 0x80, 0x00, 0x43, 0x91,
+ 0x86, 0xd7, 0x26, 0x32, 0x0e, 0xc2, 0xa9, 0xd5, 0x18, 0xdc, 0x0e, 0xc2,
+ 0x58, 0xd5, 0x13, 0x90, 0x0e, 0xc6, 0xd3, 0x03, 0x91, 0x92, 0xc5, 0x0e,
+ 0xce, 0x0e, 0xc6, 0x50, 0xc5, 0x16, 0xca, 0x0e, 0xc5, 0xf9, 0xc2, 0x00,
+ 0x74, 0x0e, 0xc5, 0xf1, 0xc3, 0x00, 0xa3, 0x0e, 0xc5, 0xe8, 0xc5, 0x06,
+ 0x82, 0x0e, 0xc0, 0x13, 0x03, 0x91, 0x96, 0xd2, 0x13, 0x89, 0x0e, 0xc6,
+ 0x81, 0x46, 0x0e, 0xce, 0xc3, 0x91, 0x9a, 0xc4, 0x05, 0x75, 0x0e, 0xc3,
+ 0x63, 0x03, 0x91, 0xa6, 0xc8, 0xbc, 0x62, 0x0e, 0xc3, 0x89, 0xd3, 0x46,
+ 0x57, 0x0e, 0xc2, 0x9a, 0x03, 0x91, 0xaa, 0xd5, 0x37, 0x04, 0x0e, 0xc6,
+ 0x79, 0xd4, 0x3c, 0x00, 0x0e, 0xc5, 0xe1, 0xc4, 0x05, 0x75, 0x0e, 0xc3,
+ 0xa0, 0xc5, 0x37, 0x20, 0x0e, 0xc6, 0xb8, 0xc7, 0x27, 0xb2, 0x0e, 0xc3,
+ 0x49, 0xc4, 0x0e, 0xe2, 0x0e, 0xc3, 0x38, 0xcb, 0x13, 0x90, 0x0e, 0xc6,
+ 0x73, 0x03, 0x91, 0xb0, 0xc2, 0x02, 0xae, 0x0e, 0xc6, 0x38, 0x00, 0x43,
+ 0x91, 0xb6, 0xc5, 0x06, 0x82, 0x0e, 0xc5, 0x09, 0xc2, 0x02, 0xae, 0x0e,
+ 0xc4, 0xa0, 0xc5, 0x17, 0x14, 0x0e, 0xce, 0x89, 0xc5, 0x03, 0x13, 0x0e,
+ 0xce, 0x80, 0xc5, 0x17, 0x14, 0x0e, 0xce, 0x11, 0xc5, 0x03, 0x13, 0x0e,
+ 0xce, 0x08, 0xc2, 0x00, 0x15, 0x0e, 0xcb, 0x40, 0xc6, 0x00, 0x58, 0x0e,
+ 0xce, 0x79, 0xc6, 0x24, 0x3b, 0x0e, 0xce, 0x68, 0xc6, 0x00, 0x58, 0x0e,
+ 0xce, 0x71, 0xc6, 0x24, 0x3b, 0x0e, 0xce, 0x60, 0xc6, 0x00, 0x58, 0x0e,
+ 0xce, 0x01, 0xc6, 0x24, 0x3b, 0x0e, 0xcd, 0xf0, 0xc6, 0x00, 0x58, 0x0e,
+ 0xcd, 0xf9, 0xc6, 0x24, 0x3b, 0x0e, 0xcd, 0xe8, 0xcc, 0x8a, 0xf9, 0x0e,
+ 0xce, 0x59, 0xcc, 0x82, 0x89, 0x0e, 0xce, 0x50, 0xc6, 0x2c, 0x2e, 0x0e,
+ 0xcd, 0xe1, 0xc6, 0x00, 0x58, 0x0e, 0xcd, 0xd0, 0xc6, 0x2c, 0x2e, 0x0e,
+ 0xcd, 0xd9, 0xc6, 0x00, 0x58, 0x0e, 0xcd, 0xc8, 0xc5, 0x17, 0x14, 0x0e,
+ 0xce, 0x39, 0xc5, 0x03, 0x13, 0x0e, 0xce, 0x30, 0xc5, 0x17, 0x14, 0x0e,
+ 0xcd, 0xc1, 0xc5, 0x03, 0x13, 0x0e, 0xcd, 0xb8, 0xc5, 0x17, 0x14, 0x0e,
+ 0xcc, 0xf1, 0xc6, 0x01, 0xdb, 0x0e, 0xcc, 0xe9, 0xc5, 0x03, 0x13, 0x0e,
+ 0xcc, 0xe0, 0xc5, 0x17, 0x14, 0x0e, 0xcc, 0xd9, 0xc6, 0x01, 0xdb, 0x0e,
+ 0xcc, 0xd1, 0xc5, 0x03, 0x13, 0x0e, 0xcc, 0xc8, 0x47, 0x20, 0x38, 0xc3,
+ 0x91, 0xd1, 0x4b, 0x27, 0x7b, 0x43, 0x91, 0xdd, 0xcb, 0x93, 0x1a, 0x0e,
+ 0xcc, 0xf9, 0x53, 0x41, 0xd0, 0x43, 0x91, 0xf2, 0xc5, 0x17, 0x14, 0x0e,
+ 0xcc, 0x53, 0x03, 0x91, 0xfe, 0xc6, 0x01, 0xdb, 0x0e, 0xcc, 0x49, 0xc5,
+ 0x03, 0x13, 0x0e, 0xcc, 0x40, 0xc2, 0x00, 0x15, 0x0e, 0xc9, 0x68, 0x45,
+ 0x00, 0x8c, 0xc3, 0x92, 0x04, 0xc6, 0x10, 0x9d, 0x01, 0x5b, 0x99, 0x4a,
+ 0x01, 0x88, 0x43, 0x92, 0x2e, 0xe0, 0x01, 0x47, 0x01, 0x4b, 0x28, 0xd0,
+ 0x57, 0xc2, 0x0f, 0xc1, 0x91, 0xcb, 0x57, 0xc7, 0x0f, 0xc1, 0x71, 0xca,
+ 0xa0, 0x08, 0x0f, 0xc1, 0x51, 0x47, 0x00, 0x58, 0xc3, 0x92, 0x34, 0x49,
+ 0xa8, 0xdc, 0xc3, 0x92, 0x40, 0xcc, 0x84, 0x09, 0x0f, 0xc1, 0x11, 0xcc,
+ 0x82, 0x1d, 0x0f, 0xc1, 0x30, 0xe0, 0x01, 0x87, 0x01, 0x5c, 0x10, 0x46,
+ 0x00, 0x8b, 0x43, 0x92, 0x4c, 0xe0, 0x09, 0x67, 0x01, 0x4b, 0x48, 0x0e,
+ 0xc3, 0x92, 0x58, 0x14, 0x43, 0x92, 0x64, 0x90, 0x00, 0x70, 0x81, 0xc3,
+ 0x00, 0xd0, 0x00, 0x70, 0xb8, 0xca, 0x26, 0xf7, 0x07, 0xea, 0xc1, 0xcc,
+ 0x10, 0xb4, 0x07, 0xea, 0xc8, 0xcb, 0x64, 0x7b, 0x07, 0xe7, 0x51, 0xcc,
+ 0x10, 0xb4, 0x07, 0xe9, 0x90, 0x0b, 0xc3, 0x92, 0x6a, 0xca, 0x26, 0xf7,
+ 0x07, 0xe9, 0x31, 0xcb, 0x64, 0x7b, 0x07, 0xe9, 0xc1, 0x45, 0x00, 0x8c,
+ 0x43, 0x92, 0x76, 0xcb, 0x10, 0xb5, 0x07, 0xe9, 0x81, 0xcc, 0x00, 0xfb,
+ 0x07, 0xe8, 0x60, 0x45, 0x50, 0xf0, 0xc3, 0x92, 0x82, 0x45, 0x19, 0x60,
+ 0x43, 0x92, 0x8e, 0xcb, 0x10, 0xb5, 0x07, 0xe9, 0x69, 0xcc, 0x00, 0xfb,
+ 0x07, 0xe8, 0x48, 0xcb, 0x10, 0xb5, 0x07, 0xe9, 0x79, 0xcc, 0x00, 0xfb,
+ 0x07, 0xe8, 0x58, 0xcb, 0x64, 0x7b, 0x07, 0xe7, 0xa1, 0xcd, 0x00, 0xfa,
+ 0x07, 0xe3, 0x10, 0xcb, 0x64, 0x7b, 0x07, 0xe7, 0x99, 0xcd, 0x00, 0xfa,
+ 0x07, 0xe3, 0x08, 0xca, 0x26, 0xf7, 0x07, 0xea, 0xf9, 0xcc, 0x10, 0xb4,
+ 0x07, 0xeb, 0x00, 0xca, 0x26, 0xf7, 0x07, 0xeb, 0x11, 0xcc, 0x10, 0xb4,
+ 0x07, 0xeb, 0x18, 0xcc, 0x00, 0xfb, 0x07, 0xe0, 0xe9, 0xcb, 0x10, 0xb5,
+ 0x07, 0xe5, 0x70, 0xcc, 0x00, 0xfb, 0x07, 0xe1, 0x09, 0xcb, 0x10, 0xb5,
+ 0x07, 0xe5, 0x98, 0xca, 0x26, 0xf7, 0x07, 0xeb, 0x31, 0xcc, 0x10, 0xb4,
+ 0x07, 0xee, 0x28, 0xcc, 0x00, 0xfb, 0x07, 0xe1, 0x01, 0xcb, 0x10, 0xb5,
+ 0x07, 0xe5, 0x88, 0x44, 0x19, 0x6a, 0xc3, 0x92, 0x9a, 0xce, 0x43, 0x77,
+ 0x07, 0xed, 0x48, 0xd3, 0x40, 0x41, 0x07, 0xea, 0x31, 0x0a, 0x43, 0x92,
+ 0xa6, 0x47, 0xa6, 0xcd, 0xc3, 0x92, 0xb2, 0xcd, 0x00, 0xfa, 0x07, 0xef,
+ 0xc8, 0xca, 0x26, 0xf7, 0x07, 0xeb, 0xb1, 0xcc, 0x10, 0xb4, 0x07, 0xeb,
+ 0xb8, 0x8f, 0x07, 0xea, 0x39, 0xcd, 0x76, 0x28, 0x07, 0xea, 0x50, 0xca,
+ 0x82, 0xa3, 0x07, 0xea, 0x41, 0xcc, 0x82, 0xa1, 0x07, 0xea, 0x48, 0xcc,
+ 0x00, 0xfb, 0x07, 0xe1, 0x39, 0xcb, 0x10, 0xb5, 0x07, 0xe9, 0x98, 0x44,
+ 0x19, 0x6a, 0xc3, 0x92, 0xb8, 0xd1, 0x50, 0x13, 0x07, 0xeb, 0x99, 0xce,
+ 0x43, 0x77, 0x07, 0xeb, 0xa0, 0xcc, 0x00, 0xfb, 0x07, 0xe0, 0x91, 0xcb,
+ 0x10, 0xb5, 0x07, 0xe5, 0x30, 0xcc, 0x00, 0xfb, 0x07, 0xe0, 0x61, 0xcb,
+ 0x10, 0xb5, 0x07, 0xe5, 0x10, 0x45, 0x30, 0xc1, 0xc3, 0x92, 0xc4, 0xd1,
+ 0x50, 0x13, 0x07, 0xea, 0x98, 0x43, 0x2b, 0xba, 0xc3, 0x92, 0xd0, 0x42,
+ 0x03, 0x53, 0x43, 0x92, 0xdc, 0x44, 0x06, 0x5b, 0xc3, 0x92, 0xe8, 0x42,
+ 0x00, 0x5d, 0x43, 0x92, 0xfa, 0xca, 0x26, 0xf7, 0x07, 0xe3, 0x31, 0x0b,
+ 0xc3, 0x93, 0x06, 0xcb, 0x64, 0x7b, 0x07, 0xe6, 0xf8, 0x44, 0x50, 0xf2,
+ 0xc3, 0x93, 0x12, 0x43, 0x2b, 0xba, 0x43, 0x93, 0x1e, 0xcc, 0x00, 0xfb,
+ 0x07, 0xe0, 0x01, 0xcb, 0x10, 0xb5, 0x07, 0xe4, 0xb8, 0x0b, 0xc3, 0x93,
+ 0x2a, 0xca, 0x26, 0xf7, 0x07, 0xdf, 0xb8, 0xca, 0x26, 0xf7, 0x07, 0xdf,
+ 0x99, 0xcd, 0x00, 0xfa, 0x07, 0xdf, 0x90, 0xca, 0x26, 0xf7, 0x07, 0xdf,
+ 0x89, 0xcd, 0x00, 0xfa, 0x07, 0xdf, 0x80, 0xca, 0x26, 0xf7, 0x07, 0xdf,
+ 0x79, 0xcd, 0x00, 0xfa, 0x07, 0xdf, 0x70, 0xcc, 0x00, 0xfb, 0x07, 0xe2,
+ 0xb1, 0xcb, 0x10, 0xb5, 0x07, 0xe6, 0xd8, 0xca, 0x26, 0xf7, 0x07, 0xed,
+ 0xd9, 0xcc, 0x10, 0xb4, 0x07, 0xee, 0x18, 0xcd, 0x00, 0xfa, 0x07, 0xf7,
+ 0xc9, 0xca, 0x26, 0xf7, 0x07, 0xf7, 0xd0, 0xcd, 0x00, 0xfa, 0x07, 0xf7,
+ 0xb9, 0xca, 0x26, 0xf7, 0x07, 0xf7, 0xc0, 0xca, 0x26, 0xf7, 0x07, 0xec,
+ 0x01, 0xcc, 0x10, 0xb4, 0x07, 0xed, 0xa8, 0xcc, 0x00, 0xfb, 0x07, 0xe1,
+ 0xa1, 0xcb, 0x10, 0xb5, 0x07, 0xe6, 0x18, 0x44, 0x19, 0x6a, 0xc3, 0x93,
+ 0x36, 0xcf, 0x67, 0x65, 0x07, 0xeb, 0xf9, 0xce, 0x43, 0x77, 0x07, 0xed,
+ 0x90, 0xcc, 0x00, 0xfb, 0x07, 0xe0, 0x31, 0xcb, 0x10, 0xb5, 0x07, 0xe4,
+ 0xe8, 0xc2, 0x04, 0xc6, 0x07, 0xea, 0x20, 0xcb, 0x10, 0xb5, 0x07, 0xdf,
+ 0xf1, 0xcc, 0x00, 0xfb, 0x07, 0xdf, 0xe0, 0x16, 0xc3, 0x93, 0x42, 0xca,
+ 0x35, 0x7a, 0x00, 0x31, 0xe9, 0x5c, 0x10, 0x12, 0x43, 0x93, 0x4e, 0x44,
+ 0x05, 0x18, 0xc3, 0x93, 0x58, 0x16, 0x43, 0x93, 0x67, 0xcc, 0x00, 0xfb,
+ 0x07, 0xf6, 0x89, 0xcb, 0x10, 0xb5, 0x07, 0xf6, 0x98, 0xd0, 0x0e, 0x7c,
+ 0x00, 0x46, 0x19, 0xc9, 0x0e, 0x6e, 0x00, 0x37, 0xe0, 0xcc, 0x00, 0xfb,
+ 0x07, 0xf6, 0x69, 0xcb, 0x10, 0xb5, 0x07, 0xf6, 0x78, 0xcf, 0x67, 0xb0,
+ 0x00, 0x45, 0x81, 0x16, 0xc3, 0x93, 0x73, 0xc4, 0x00, 0x9d, 0x00, 0x35,
+ 0x80, 0xcb, 0x10, 0xb5, 0x07, 0xdc, 0xa1, 0xcc, 0x00, 0xfb, 0x07, 0xdc,
+ 0x90, 0xcb, 0x10, 0xb5, 0x07, 0xdc, 0xc1, 0xcc, 0x00, 0xfb, 0x07, 0xdc,
+ 0xb0, 0x46, 0x03, 0x13, 0xc3, 0x93, 0x7f, 0x42, 0x00, 0x58, 0xc3, 0x93,
+ 0x89, 0x4b, 0x0e, 0x7c, 0xc3, 0x93, 0x95, 0xc3, 0x01, 0x5d, 0x00, 0x3b,
+ 0x50, 0xcc, 0x00, 0xfb, 0x07, 0xf6, 0xe9, 0xcb, 0x10, 0xb5, 0x07, 0xf6,
+ 0xf8, 0x4a, 0x0e, 0x7d, 0xc3, 0x93, 0xa1, 0xcd, 0x04, 0xe7, 0x00, 0x45,
+ 0x10, 0xcc, 0x00, 0xfb, 0x07, 0xf4, 0xe9, 0xcb, 0x10, 0xb5, 0x07, 0xf4,
+ 0xf8, 0x4a, 0x0e, 0x7d, 0xc3, 0x93, 0xad, 0x48, 0x04, 0xe7, 0x43, 0x93,
+ 0xbf, 0xcc, 0x00, 0xfb, 0x07, 0xf6, 0x49, 0xcb, 0x10, 0xb5, 0x07, 0xf6,
+ 0x58, 0x44, 0x00, 0x8d, 0xc3, 0x93, 0xcb, 0xc4, 0x3e, 0x06, 0x00, 0x33,
+ 0x8a, 0x03, 0x94, 0x01, 0x00, 0x43, 0x94, 0x05, 0xc7, 0x31, 0x5f, 0x00,
+ 0x46, 0x11, 0x16, 0xc3, 0x94, 0x11, 0xc9, 0x16, 0x14, 0x00, 0x3b, 0x10,
+ 0xcc, 0x00, 0xfb, 0x07, 0xdc, 0x71, 0xcb, 0x10, 0xb5, 0x07, 0xdc, 0x80,
+ 0x45, 0x00, 0x8c, 0xc3, 0x94, 0x1d, 0x0b, 0xc3, 0x94, 0x2d, 0xcb, 0x64,
+ 0x7b, 0x07, 0xf6, 0xe1, 0xca, 0x26, 0xf7, 0x07, 0xf6, 0xd0, 0xca, 0x26,
+ 0xf7, 0x07, 0xdf, 0x19, 0xcd, 0x00, 0xfa, 0x07, 0xdf, 0x10, 0xca, 0x26,
+ 0xf7, 0x07, 0xdf, 0x09, 0xcd, 0x00, 0xfa, 0x07, 0xdf, 0x00, 0xcc, 0x00,
+ 0xfb, 0x07, 0xf5, 0x29, 0xcb, 0x10, 0xb5, 0x07, 0xf5, 0x38, 0xc7, 0x31,
+ 0x5f, 0x00, 0x46, 0x09, 0xc9, 0x16, 0x14, 0x00, 0x35, 0xf8, 0xcb, 0x10,
+ 0xb5, 0x07, 0xdb, 0xe1, 0xcc, 0x00, 0xfb, 0x07, 0xdb, 0xd0, 0xcb, 0x64,
+ 0x7b, 0x07, 0xdc, 0x09, 0x0b, 0xc3, 0x94, 0x39, 0xca, 0x26, 0xf7, 0x07,
+ 0xdb, 0xf8, 0xcb, 0x10, 0xb5, 0x07, 0xdb, 0x41, 0xcc, 0x00, 0xfb, 0x07,
+ 0xdb, 0x30, 0x0b, 0xc3, 0x94, 0x45, 0xca, 0x26, 0xf7, 0x07, 0xda, 0xf9,
+ 0xcb, 0x64, 0x7b, 0x07, 0xdb, 0x08, 0x46, 0x03, 0x13, 0xc3, 0x94, 0x51,
+ 0xc4, 0x00, 0x9d, 0x00, 0x33, 0xe1, 0xda, 0x1b, 0x4e, 0x00, 0x33, 0xe8,
+ 0xc6, 0xcb, 0x51, 0x00, 0x31, 0x4b, 0x03, 0x94, 0x5b, 0xca, 0x64, 0x7c,
+ 0x07, 0xf4, 0xc0, 0xcc, 0x00, 0xfb, 0x07, 0xf4, 0xa9, 0xcb, 0x10, 0xb5,
+ 0x07, 0xf4, 0xb8, 0xcb, 0x64, 0x7b, 0x07, 0xdb, 0x29, 0x0b, 0xc3, 0x94,
+ 0x5f, 0xca, 0x26, 0xf7, 0x07, 0xdb, 0x18, 0x16, 0xc3, 0x94, 0x6b, 0xc9,
+ 0x0e, 0x6e, 0x00, 0x44, 0x58, 0xcc, 0x00, 0xfb, 0x07, 0xf6, 0x09, 0xcb,
+ 0x10, 0xb5, 0x07, 0xf6, 0x18, 0xcd, 0x00, 0xfa, 0x07, 0xf5, 0x59, 0xca,
+ 0x26, 0xf7, 0x07, 0xf5, 0x60, 0x0b, 0xc3, 0x94, 0x77, 0xca, 0x26, 0xf7,
+ 0x07, 0xf4, 0xd1, 0xcb, 0x64, 0x7b, 0x07, 0xf4, 0xe0, 0xcb, 0x10, 0xb5,
+ 0x07, 0xdb, 0x81, 0xcc, 0x00, 0xfb, 0x07, 0xdb, 0x70, 0x16, 0xc3, 0x94,
+ 0x83, 0xc7, 0x31, 0x5f, 0x00, 0x36, 0x71, 0xcb, 0x08, 0x09, 0x00, 0x31,
+ 0x32, 0x03, 0x94, 0x95, 0x00, 0x43, 0x94, 0x99, 0xcc, 0x00, 0xfb, 0x07,
+ 0xf7, 0x89, 0xcb, 0x10, 0xb5, 0x07, 0xf7, 0x98, 0x15, 0xc3, 0x94, 0xab,
+ 0xc4, 0xb0, 0x8b, 0x00, 0x45, 0x51, 0xca, 0x35, 0x7a, 0x00, 0x37, 0x79,
+ 0xcf, 0x3b, 0x79, 0x00, 0x34, 0xc9, 0x49, 0x04, 0xf9, 0xc3, 0x94, 0xb7,
+ 0xc9, 0x0e, 0x6e, 0x00, 0x34, 0xa3, 0x03, 0x94, 0xc3, 0xc4, 0x00, 0x9d,
+ 0x00, 0x34, 0x99, 0xcb, 0x08, 0x09, 0x00, 0x3b, 0x60, 0xcc, 0x00, 0xfb,
+ 0x07, 0xdd, 0x01, 0xcb, 0x10, 0xb5, 0x07, 0xdd, 0x10, 0x46, 0x03, 0x13,
+ 0xc3, 0x94, 0xc9, 0xcb, 0x08, 0x09, 0x00, 0x45, 0x09, 0xd6, 0x31, 0x56,
+ 0x00, 0x3a, 0xa9, 0x16, 0xc3, 0x94, 0xd6, 0xde, 0x0e, 0x6e, 0x00, 0x3a,
+ 0x88, 0xcc, 0x00, 0xfb, 0x07, 0xf4, 0x79, 0xcb, 0x10, 0xb5, 0x07, 0xf4,
+ 0x88, 0xcb, 0x64, 0x7b, 0x07, 0xda, 0xe9, 0x0b, 0xc3, 0x94, 0xe2, 0xca,
+ 0x26, 0xf7, 0x07, 0xda, 0xd8, 0xcb, 0x10, 0xb5, 0x07, 0xda, 0xa1, 0xcc,
+ 0x00, 0xfb, 0x07, 0xda, 0x90, 0xc5, 0x05, 0x02, 0x00, 0x45, 0x2b, 0x03,
+ 0x94, 0xee, 0xc5, 0x00, 0xd4, 0x00, 0x35, 0x38, 0xcc, 0x00, 0xfb, 0x07,
+ 0xf6, 0x29, 0xcb, 0x10, 0xb5, 0x07, 0xf6, 0x38, 0x4a, 0x0e, 0x7d, 0xc3,
+ 0x94, 0xf4, 0xcd, 0x04, 0xfa, 0x00, 0x34, 0xe8, 0xcc, 0x00, 0xfb, 0x07,
+ 0xf5, 0xc9, 0xcb, 0x10, 0xb5, 0x07, 0xf5, 0xd8, 0xcc, 0x00, 0xfb, 0x07,
+ 0xf5, 0xa9, 0xcb, 0x10, 0xb5, 0x07, 0xf5, 0xb8, 0x16, 0xc3, 0x95, 0x00,
+ 0xd7, 0x29, 0x57, 0x00, 0x34, 0xd1, 0xca, 0x35, 0x7a, 0x00, 0x3b, 0xf1,
+ 0x46, 0x09, 0x3f, 0xc3, 0x95, 0x0f, 0xcf, 0x3b, 0x79, 0x00, 0x3a, 0xe1,
+ 0x44, 0x03, 0x13, 0x43, 0x95, 0x15, 0xcc, 0x00, 0xfb, 0x07, 0xf5, 0x89,
+ 0xcb, 0x10, 0xb5, 0x07, 0xf5, 0x98, 0x45, 0x00, 0x8c, 0xc3, 0x95, 0x1b,
+ 0xcd, 0x00, 0xfa, 0x07, 0xf5, 0x49, 0xca, 0x26, 0xf7, 0x07, 0xf5, 0x50,
+ 0xca, 0x26, 0xf7, 0x07, 0xdc, 0x29, 0xcd, 0x00, 0xfa, 0x07, 0xdc, 0x20,
+ 0xce, 0x6d, 0xe8, 0x00, 0x37, 0xd9, 0x0b, 0xc3, 0x95, 0x3a, 0xca, 0x26,
+ 0xf7, 0x07, 0xf5, 0xf1, 0xcb, 0x64, 0x7b, 0x07, 0xf6, 0x00, 0xca, 0x26,
+ 0xf7, 0x07, 0xdc, 0x49, 0xcd, 0x00, 0xfa, 0x07, 0xdc, 0x40, 0xca, 0x26,
+ 0xf7, 0x07, 0xdc, 0x19, 0xcd, 0x00, 0xfa, 0x07, 0xdc, 0x10, 0xcb, 0x10,
+ 0xb5, 0x07, 0xdb, 0xa1, 0xcc, 0x00, 0xfb, 0x07, 0xdb, 0x90, 0xcb, 0x10,
+ 0xb5, 0x07, 0xdb, 0x61, 0xcc, 0x00, 0xfb, 0x07, 0xdb, 0x50, 0xc6, 0x1b,
+ 0xd1, 0x00, 0x45, 0x59, 0xc5, 0x00, 0xd4, 0x00, 0x36, 0x78, 0x00, 0x43,
+ 0x95, 0x46, 0xc8, 0xbf, 0x42, 0x00, 0x3b, 0xc1, 0xca, 0x9f, 0x72, 0x00,
+ 0x3b, 0xc8, 0xd0, 0x0e, 0x7c, 0x00, 0x45, 0x39, 0x44, 0x05, 0x18, 0x43,
+ 0x95, 0x52, 0xcc, 0x00, 0xfb, 0x07, 0xf7, 0x09, 0xcb, 0x10, 0xb5, 0x07,
+ 0xf7, 0x18, 0xcb, 0x10, 0xb5, 0x07, 0xde, 0xa9, 0xcc, 0x00, 0xfb, 0x07,
+ 0xde, 0x98, 0xcb, 0x64, 0x7b, 0x07, 0xdc, 0xe9, 0x0b, 0xc3, 0x95, 0x5e,
+ 0xca, 0x26, 0xf7, 0x07, 0xdc, 0xd8, 0xd0, 0x31, 0x56, 0x00, 0x44, 0x49,
+ 0x16, 0xc3, 0x95, 0x6a, 0xc4, 0x00, 0x9d, 0x00, 0x35, 0xe1, 0xc9, 0x0e,
+ 0x6e, 0x00, 0x35, 0xc9, 0x46, 0x03, 0x13, 0x43, 0x95, 0x76, 0x00, 0x43,
+ 0x95, 0x80, 0xcc, 0x00, 0xfb, 0x07, 0xf7, 0x29, 0xcb, 0x10, 0xb5, 0x07,
+ 0xf7, 0x38, 0xcb, 0x10, 0xb5, 0x07, 0xdb, 0xc1, 0xcc, 0x00, 0xfb, 0x07,
+ 0xdb, 0xb0, 0x45, 0x00, 0x8c, 0xc3, 0x95, 0x8c, 0x0b, 0xc3, 0x95, 0xa8,
+ 0xca, 0x26, 0xf7, 0x07, 0xf5, 0x11, 0xcb, 0x64, 0x7b, 0x07, 0xf5, 0x20,
+ 0x00, 0x43, 0x95, 0xb4, 0x00, 0x43, 0x95, 0xc4, 0xc9, 0xab, 0xeb, 0x00,
+ 0x36, 0x03, 0x03, 0x95, 0xda, 0xca, 0x35, 0x7a, 0x00, 0x37, 0xf8, 0xcc,
+ 0x00, 0xfb, 0x07, 0xf7, 0x49, 0xcb, 0x10, 0xb5, 0x07, 0xf7, 0x58, 0xc2,
+ 0x16, 0x1c, 0x0f, 0x75, 0xb1, 0xc2, 0x00, 0x65, 0x0f, 0x75, 0xc0, 0xc4,
+ 0x3a, 0x01, 0x0f, 0x72, 0xe9, 0xc3, 0x0f, 0x9a, 0x0f, 0x72, 0xf8, 0xe0,
+ 0x0a, 0x47, 0x0f, 0xdd, 0x68, 0xd0, 0x04, 0xd7, 0x0f, 0xdd, 0x60, 0xd0,
+ 0x13, 0xe9, 0x0f, 0xdd, 0x30, 0x00, 0x43, 0x95, 0xde, 0x00, 0x43, 0x95,
+ 0xed, 0x4b, 0x18, 0x04, 0xc3, 0x95, 0xfc, 0xdc, 0x13, 0xf9, 0x0f, 0xd2,
+ 0x30, 0xc5, 0x6b, 0x02, 0x0f, 0xaf, 0xc9, 0xc8, 0x8e, 0xa5, 0x0f, 0xaf,
+ 0xb8, 0xc2, 0x10, 0x11, 0x0b, 0x4e, 0x39, 0x90, 0x0b, 0x4c, 0xa9, 0x9a,
+ 0x0b, 0x4c, 0x40, 0xc3, 0x14, 0x83, 0x0b, 0x4d, 0xc8, 0x8f, 0x0b, 0x4e,
+ 0x59, 0x92, 0x0b, 0x4d, 0xb0, 0xc3, 0x7c, 0x57, 0x0b, 0x4c, 0x49, 0x9a,
+ 0x0b, 0x4b, 0xf8, 0x92, 0x0b, 0x4e, 0x81, 0xcb, 0x99, 0x3f, 0x0b, 0x4c,
+ 0x99, 0xc3, 0x82, 0x78, 0x0b, 0x4c, 0x30, 0xc3, 0x8b, 0xa9, 0x0b, 0x4d,
+ 0xfb, 0x03, 0x96, 0x08, 0xc3, 0xd0, 0xd7, 0x0b, 0x4c, 0x68, 0xc8, 0xb9,
+ 0xd2, 0x0b, 0x4e, 0xe9, 0xc8, 0xbb, 0x72, 0x0b, 0x4c, 0x90, 0xc6, 0xcc,
+ 0xa7, 0x0b, 0x4f, 0x40, 0x92, 0x0b, 0x4a, 0x19, 0xc2, 0x00, 0xc2, 0x0b,
+ 0x49, 0x8a, 0x03, 0x96, 0x0c, 0xc3, 0x8b, 0xaa, 0x0b, 0x49, 0x49, 0xc2,
+ 0x00, 0x2c, 0x0b, 0x48, 0x80, 0x9a, 0x0b, 0x4a, 0xa9, 0xc2, 0x10, 0x11,
+ 0x0b, 0x48, 0x08, 0xc3, 0xd7, 0xe2, 0x0b, 0x47, 0x01, 0xc6, 0xd2, 0x83,
+ 0x0b, 0x44, 0xf8, 0xc3, 0x49, 0x27, 0x0b, 0x46, 0x91, 0x8f, 0x0b, 0x45,
+ 0xd9, 0xc2, 0x00, 0x45, 0x0b, 0x45, 0xa9, 0xc8, 0xb9, 0x5a, 0x0b, 0x45,
+ 0x80, 0xc6, 0xce, 0x15, 0x0b, 0x47, 0x19, 0xcc, 0x8b, 0xb9, 0x0b, 0x44,
+ 0xf0, 0x9a, 0x0b, 0x47, 0x09, 0x8f, 0x0b, 0x44, 0xd8, 0xc6, 0x17, 0x13,
+ 0x0b, 0x43, 0xd8, 0xc4, 0x61, 0x79, 0x0b, 0x41, 0x59, 0xc4, 0xde, 0xc7,
+ 0x0b, 0x40, 0x71, 0xc6, 0xcd, 0x43, 0x0b, 0x40, 0x58, 0xc4, 0xe4, 0x7b,
+ 0x0b, 0x41, 0x11, 0xc4, 0xe4, 0x9b, 0x0b, 0x40, 0xc8, 0xa3, 0x01, 0x41,
+ 0xfb, 0x03, 0x96, 0x12, 0xa5, 0x01, 0x44, 0xf9, 0xa4, 0x01, 0x42, 0xfa,
+ 0x03, 0x96, 0x1d, 0xa5, 0x01, 0x45, 0x79, 0xa4, 0x01, 0x43, 0x7a, 0x03,
+ 0x96, 0x21, 0xa5, 0x01, 0x46, 0x78, 0xa5, 0x01, 0x45, 0xb9, 0xa4, 0x01,
+ 0x43, 0xba, 0x03, 0x96, 0x25, 0xa5, 0x01, 0x46, 0xb8, 0xa5, 0x01, 0x47,
+ 0x38, 0xa5, 0x01, 0x45, 0xd9, 0xa4, 0x01, 0x43, 0xda, 0x03, 0x96, 0x29,
+ 0xa5, 0x01, 0x46, 0xd8, 0xa5, 0x01, 0x47, 0x58, 0xa5, 0x01, 0x47, 0x98,
+ 0xa5, 0x01, 0x45, 0xe9, 0xa4, 0x01, 0x43, 0xea, 0x03, 0x96, 0x2d, 0xa5,
+ 0x01, 0x46, 0xe8, 0xa5, 0x01, 0x47, 0x68, 0xa5, 0x01, 0x47, 0xa8, 0xa5,
+ 0x01, 0x47, 0xc8, 0xa5, 0x01, 0x45, 0xf1, 0xa4, 0x01, 0x43, 0xf2, 0x03,
+ 0x96, 0x31, 0xa5, 0x01, 0x46, 0xf0, 0xa5, 0x01, 0x47, 0x70, 0xa5, 0x01,
+ 0x47, 0xb0, 0xa5, 0x01, 0x47, 0xd0, 0xa5, 0x01, 0x47, 0xe0, 0xd0, 0x57,
+ 0xc2, 0x0f, 0xc1, 0x81, 0xcb, 0x57, 0xc7, 0x0f, 0xc1, 0x61, 0x49, 0xa8,
+ 0xdc, 0xc3, 0x96, 0x35, 0x47, 0x00, 0x58, 0xc3, 0x96, 0x41, 0xcc, 0x84,
+ 0x09, 0x0f, 0xc1, 0x01, 0xcc, 0x82, 0x1d, 0x0f, 0xc1, 0x21, 0xca, 0xa0,
+ 0x08, 0x0f, 0xc1, 0x40, 0xe0, 0x03, 0x87, 0x01, 0x5c, 0x00, 0x46, 0x00,
+ 0x8b, 0x43, 0x96, 0x4d, 0xe0, 0x06, 0xe7, 0x01, 0x4b, 0x38, 0x0e, 0xc3,
+ 0x96, 0x59, 0xdf, 0x0c, 0xc2, 0x01, 0x4b, 0x30, 0xc5, 0xdb, 0xbe, 0x08,
+ 0x04, 0x39, 0xc5, 0xdc, 0x81, 0x08, 0x04, 0x30, 0xca, 0x9d, 0x88, 0x08,
+ 0x04, 0x41, 0xc9, 0xa9, 0xb4, 0x08, 0x04, 0x48, 0xc5, 0xdc, 0x77, 0x08,
+ 0x04, 0x51, 0xc6, 0xd3, 0x67, 0x08, 0x04, 0x58, 0xc5, 0xdc, 0x04, 0x08,
+ 0x04, 0x61, 0xc6, 0xd3, 0x6d, 0x08, 0x04, 0x68, 0xc6, 0xcc, 0xb3, 0x08,
+ 0x04, 0x19, 0xc6, 0xd2, 0x0b, 0x08, 0x04, 0x21, 0xca, 0xa7, 0x38, 0x08,
+ 0x04, 0x28, 0xce, 0x16, 0x0f, 0x00, 0xf3, 0x38, 0xce, 0x16, 0x0f, 0x00,
+ 0xf3, 0x48, 0xce, 0x01, 0x19, 0x00, 0xec, 0xa9, 0xc4, 0x01, 0x23, 0x00,
+ 0x12, 0xd0, 0xca, 0xa2, 0xb0, 0x05, 0x5a, 0x60, 0xd2, 0x4d, 0x0f, 0x05,
+ 0x59, 0xb0, 0xcc, 0x23, 0x3f, 0x00, 0xe8, 0x99, 0xc5, 0xd4, 0x9d, 0x00,
+ 0xe8, 0x90, 0xca, 0x9b, 0xda, 0x00, 0xf0, 0x48, 0x46, 0x00, 0x8b, 0x43,
+ 0x96, 0x65, 0xca, 0x45, 0x1d, 0x0e, 0xf8, 0x68, 0xca, 0xa8, 0x14, 0x0e,
+ 0xf8, 0x30, 0x87, 0x00, 0xe8, 0xa3, 0x03, 0x96, 0x86, 0xc5, 0x21, 0xa4,
+ 0x00, 0xe8, 0x41, 0xc7, 0xc5, 0xc9, 0x05, 0x5a, 0x1a, 0x03, 0x96, 0x8c,
+ 0xc8, 0x67, 0x21, 0x05, 0x3b, 0xf8, 0x87, 0x00, 0xe8, 0x11, 0xc4, 0xde,
+ 0x3f, 0x00, 0x12, 0x90, 0xce, 0x61, 0x6c, 0x00, 0x15, 0x72, 0x03, 0x96,
+ 0x92, 0xce, 0x74, 0x86, 0x00, 0x13, 0x80, 0xc6, 0x20, 0xab, 0x00, 0xf4,
+ 0xb9, 0xcc, 0x3e, 0xb0, 0x01, 0x63, 0x30, 0xc5, 0x05, 0x02, 0x00, 0xf3,
+ 0x69, 0xc5, 0x00, 0xd4, 0x00, 0xf3, 0x58, 0xd2, 0x25, 0xf1, 0x05, 0x3b,
+ 0x38, 0x45, 0x02, 0x9a, 0x43, 0x96, 0x98, 0x45, 0x02, 0x9a, 0x43, 0x96,
+ 0xb6, 0x42, 0x00, 0x30, 0xc3, 0x96, 0xd4, 0x45, 0x00, 0x5a, 0x43, 0x96,
+ 0xe3, 0xcb, 0x98, 0x58, 0x00, 0x11, 0x58, 0xc5, 0x31, 0xee, 0x00, 0xf2,
+ 0x99, 0xc5, 0x1f, 0x0c, 0x00, 0xf2, 0x88, 0xc9, 0x20, 0xa8, 0x00, 0xf2,
+ 0x79, 0xc5, 0x31, 0xee, 0x00, 0xf2, 0x69, 0xc6, 0x60, 0xb1, 0x00, 0x11,
+ 0x68, 0xce, 0x01, 0x19, 0x00, 0xec, 0xb9, 0xc6, 0x01, 0x73, 0x05, 0x59,
+ 0xf8, 0xc7, 0x0e, 0x70, 0x00, 0xf6, 0x59, 0xca, 0x1f, 0x07, 0x00, 0x10,
+ 0x48, 0xca, 0x9b, 0xda, 0x00, 0xf1, 0x78, 0xce, 0x01, 0x19, 0x0e, 0xf8,
+ 0xc9, 0xcc, 0x51, 0x28, 0x0e, 0xf8, 0x90, 0x46, 0x00, 0x8b, 0x43, 0x96,
+ 0xef, 0xd2, 0x4d, 0x0f, 0x05, 0x5a, 0x50, 0xcc, 0x23, 0x3f, 0x00, 0x12,
+ 0xfa, 0x03, 0x96, 0xfb, 0xcc, 0x51, 0x28, 0x0e, 0xf8, 0xc1, 0xce, 0x01,
+ 0x19, 0x00, 0xec, 0xd1, 0x05, 0xc3, 0x97, 0x01, 0xc4, 0x14, 0xa6, 0x00,
+ 0x0d, 0xd0, 0xc9, 0xaa, 0x95, 0x0e, 0xf8, 0x60, 0x00, 0x43, 0x97, 0x0d,
+ 0xca, 0x9b, 0x80, 0x00, 0xf0, 0xe8, 0x42, 0x00, 0x30, 0xc3, 0x97, 0x19,
+ 0xca, 0x1f, 0x07, 0x00, 0x10, 0x28, 0xc5, 0x31, 0xee, 0x00, 0xf0, 0xb9,
+ 0xc5, 0x1f, 0x0c, 0x00, 0xf0, 0xa8, 0xc8, 0x61, 0x72, 0x00, 0x13, 0xf3,
+ 0x03, 0x97, 0x25, 0x0e, 0xc3, 0x97, 0x2b, 0x42, 0x00, 0x58, 0xc3, 0x97,
+ 0x37, 0xcc, 0x51, 0x28, 0x00, 0xec, 0x49, 0xcc, 0x1e, 0xc1, 0x00, 0xeb,
+ 0x91, 0x05, 0xc3, 0x97, 0x43, 0xc4, 0x14, 0xa6, 0x00, 0x13, 0xe9, 0xce,
+ 0x38, 0xe6, 0x05, 0x3d, 0x39, 0xc5, 0x31, 0xee, 0x00, 0x0a, 0xa9, 0xce,
+ 0x1d, 0x93, 0x00, 0x10, 0x99, 0xc6, 0x01, 0x73, 0x00, 0x12, 0x68, 0xce,
+ 0x01, 0x19, 0x00, 0xec, 0xa1, 0xc4, 0x01, 0x23, 0x00, 0x12, 0xe8, 0xd1,
+ 0x51, 0x23, 0x0e, 0xf8, 0x98, 0xcb, 0x98, 0x58, 0x00, 0xf1, 0xc8, 0xcc,
+ 0x1e, 0xc1, 0x05, 0x59, 0xc1, 0xc3, 0x01, 0x5d, 0x01, 0x63, 0x08, 0xce,
+ 0x3e, 0xae, 0x00, 0xf4, 0xe1, 0xc8, 0x16, 0x15, 0x00, 0xf4, 0xd8, 0xc5,
+ 0x05, 0x02, 0x00, 0xf7, 0xa9, 0xc5, 0x00, 0xd4, 0x00, 0xf4, 0x78, 0xc2,
+ 0x00, 0xc0, 0x00, 0x0d, 0x83, 0x03, 0x97, 0x55, 0xc8, 0x9e, 0x5c, 0x00,
+ 0xf7, 0x38, 0x11, 0xc3, 0x97, 0x5b, 0xc8, 0x20, 0xa9, 0x00, 0x07, 0xe2,
+ 0x03, 0x97, 0x67, 0xce, 0x74, 0xe8, 0x00, 0xf3, 0xd8, 0x00, 0x43, 0x97,
+ 0x6b, 0xc9, 0x08, 0xf7, 0x00, 0x07, 0xdb, 0x03, 0x97, 0x77, 0xc4, 0x65,
+ 0xe2, 0x00, 0x0e, 0xa0, 0xcd, 0x01, 0x1a, 0x00, 0xec, 0xc9, 0xc9, 0x9e,
+ 0xe7, 0x00, 0x0b, 0x78, 0xce, 0x36, 0x39, 0x05, 0x5a, 0x71, 0xc5, 0x01,
+ 0x74, 0x05, 0x3d, 0xc8, 0x45, 0x02, 0x9a, 0x43, 0x97, 0x7d, 0xc9, 0x08,
+ 0xf7, 0x00, 0x07, 0x13, 0x03, 0x97, 0x9b, 0xc4, 0x65, 0xe2, 0x00, 0x0e,
+ 0x70, 0x11, 0xc3, 0x97, 0xa1, 0xc8, 0x20, 0xa9, 0x00, 0x07, 0x22, 0x03,
+ 0x97, 0xad, 0x0b, 0xc3, 0x97, 0xb3, 0xcd, 0x01, 0x1a, 0x00, 0xec, 0x78,
+ 0xc5, 0x05, 0x02, 0x00, 0xf4, 0x49, 0xc5, 0x00, 0xd4, 0x00, 0xf4, 0x38,
+ 0xc5, 0x05, 0x02, 0x00, 0xf1, 0x29, 0xc5, 0x00, 0xd4, 0x00, 0xf1, 0x18,
+ 0xc5, 0x05, 0x02, 0x00, 0xf4, 0x99, 0xc5, 0x00, 0xd4, 0x00, 0x0b, 0xe0,
+ 0x00, 0x43, 0x97, 0xbf, 0xd2, 0x25, 0xf1, 0x05, 0x3a, 0x88, 0xcf, 0x68,
+ 0x82, 0x00, 0xf2, 0x59, 0xcb, 0x4d, 0x16, 0x05, 0x59, 0xd9, 0xc6, 0xbd,
+ 0xf4, 0x00, 0x0a, 0x31, 0xc4, 0x65, 0xe2, 0x00, 0x0a, 0x41, 0xc3, 0x00,
+ 0x33, 0x00, 0x11, 0xa8, 0xc9, 0x64, 0x14, 0x00, 0xf2, 0x49, 0xc8, 0x6d,
+ 0x46, 0x00, 0x13, 0x91, 0xcd, 0x7b, 0x08, 0x00, 0x0c, 0xf0, 0x43, 0x05,
+ 0x19, 0xc3, 0x97, 0xcb, 0xc8, 0x25, 0xfb, 0x05, 0x3c, 0x88, 0x45, 0x02,
+ 0x9a, 0x43, 0x97, 0xd7, 0xc7, 0x0e, 0x70, 0x00, 0xf7, 0x21, 0x45, 0x00,
+ 0x5a, 0x43, 0x97, 0xf5, 0x00, 0x43, 0x98, 0x01, 0xc9, 0x9b, 0xdb, 0x00,
+ 0xf3, 0xc1, 0xc5, 0x05, 0x02, 0x00, 0xf3, 0xa0, 0xc6, 0x05, 0x01, 0x00,
+ 0xf3, 0xb0, 0xc9, 0x0e, 0x6e, 0x00, 0xf7, 0x11, 0xc5, 0x1e, 0xc8, 0x00,
+ 0xf7, 0x01, 0xca, 0x9e, 0x5a, 0x00, 0xf6, 0xf1, 0xc5, 0x1f, 0x0c, 0x00,
+ 0xf6, 0xe1, 0xc5, 0x31, 0xee, 0x00, 0xf6, 0xd0, 0xc9, 0x0e, 0x6e, 0x00,
+ 0xf6, 0xc1, 0xc5, 0x1e, 0xc8, 0x00, 0xf6, 0xb1, 0xca, 0x9e, 0x5a, 0x00,
+ 0xf6, 0xa1, 0xc5, 0x1f, 0x0c, 0x00, 0xf6, 0x91, 0xc5, 0x31, 0xee, 0x00,
+ 0xf6, 0x80, 0xc5, 0x05, 0x02, 0x00, 0xf6, 0x61, 0xc5, 0x00, 0xd4, 0x00,
+ 0x11, 0x72, 0x03, 0x98, 0x0d, 0xc5, 0x31, 0xee, 0x00, 0x0a, 0x81, 0xc5,
+ 0x1f, 0x0c, 0x00, 0x10, 0x60, 0xc5, 0x31, 0xee, 0x00, 0xf2, 0x91, 0xc5,
+ 0x1f, 0x0c, 0x00, 0xf2, 0x80, 0xc5, 0x05, 0x02, 0x00, 0xf6, 0x51, 0xc5,
+ 0x00, 0xd4, 0x00, 0x09, 0x80, 0x44, 0x02, 0x9b, 0xc3, 0x98, 0x13, 0xc5,
+ 0x05, 0x02, 0x00, 0xf0, 0xc0, 0xc5, 0x05, 0x02, 0x00, 0xf5, 0xc1, 0xc5,
+ 0x00, 0xd4, 0x00, 0x08, 0xb0, 0xc9, 0x0e, 0x6e, 0x00, 0xf5, 0x61, 0xc5,
+ 0x1e, 0xc8, 0x00, 0xf5, 0x51, 0xca, 0x9e, 0x5a, 0x00, 0xf5, 0x41, 0xc5,
+ 0x1f, 0x0c, 0x00, 0xf5, 0x31, 0xc5, 0x31, 0xee, 0x00, 0xf5, 0x20, 0xc5,
+ 0x05, 0x02, 0x00, 0xf5, 0x01, 0xc5, 0x00, 0xd4, 0x00, 0x11, 0x32, 0x03,
+ 0x98, 0x31, 0xc5, 0x05, 0x02, 0x00, 0xf2, 0xd3, 0x03, 0x98, 0x37, 0xc5,
+ 0x00, 0xd4, 0x00, 0xf2, 0xc0, 0xca, 0x03, 0x87, 0x01, 0x5d, 0x19, 0xc9,
+ 0x01, 0x88, 0x01, 0x5d, 0x10, 0xc7, 0xc2, 0x03, 0x00, 0x89, 0x98, 0x02,
+ 0x43, 0x98, 0x3d, 0xc4, 0xad, 0x2b, 0x00, 0x89, 0xe9, 0xc5, 0xdb, 0xff,
+ 0x00, 0x8a, 0x78, 0x91, 0x00, 0x8c, 0xf8, 0x91, 0x00, 0x8b, 0xe9, 0x97,
+ 0x00, 0x8b, 0xf1, 0xc2, 0x19, 0x2c, 0x00, 0x8d, 0x28, 0x83, 0x00, 0x8c,
+ 0x23, 0x03, 0x98, 0x53, 0xc2, 0x02, 0x66, 0x00, 0x8c, 0x30, 0x87, 0x06,
+ 0xbd, 0x98, 0x87, 0x06, 0xbd, 0xb8, 0x91, 0x00, 0x8c, 0x78, 0x91, 0x00,
+ 0x8c, 0x88, 0x97, 0x00, 0x8c, 0xb1, 0x91, 0x06, 0xbd, 0xd0, 0x91, 0x06,
+ 0xbd, 0x80, 0x87, 0x00, 0x8d, 0x38, 0xc2, 0x37, 0xea, 0x06, 0xbd, 0xe9,
+ 0x87, 0x06, 0xbd, 0xf0, 0x91, 0x06, 0xbd, 0xf8, 0xc7, 0xc2, 0x03, 0x00,
+ 0x8e, 0x20, 0xc6, 0x8e, 0xde, 0x06, 0xbf, 0x61, 0xc6, 0xc0, 0x7c, 0x06,
+ 0xbf, 0x68, 0xc5, 0x8e, 0xdf, 0x00, 0x8f, 0x39, 0xcc, 0x79, 0xeb, 0x06,
+ 0xbf, 0x58, 0xc5, 0xc0, 0x7d, 0x00, 0x8f, 0x41, 0xc6, 0xc1, 0x86, 0x06,
+ 0xbf, 0x88, 0xc4, 0x79, 0xf3, 0x00, 0x8f, 0x51, 0xc6, 0xca, 0x0e, 0x06,
+ 0xbf, 0x70, 0xc4, 0xc6, 0x7a, 0x06, 0xbf, 0x79, 0xc6, 0xc6, 0x79, 0x06,
+ 0xbf, 0x80, 0xc7, 0xc2, 0x03, 0x06, 0xbe, 0x88, 0xc4, 0xc6, 0x7a, 0x06,
+ 0xbe, 0x91, 0xc6, 0xc6, 0x79, 0x06, 0xbe, 0x98, 0x02, 0x43, 0x98, 0x57,
+ 0xc6, 0x8e, 0xde, 0x00, 0x8e, 0x89, 0xc4, 0xad, 0x2b, 0x00, 0x8e, 0x91,
+ 0xc5, 0x90, 0xe4, 0x06, 0xbe, 0xc0, 0x02, 0x43, 0x98, 0x63, 0xc4, 0xad,
+ 0x2b, 0x00, 0x8e, 0xb1, 0xc6, 0x8e, 0xde, 0x06, 0xbe, 0xa8, 0xc6, 0xce,
+ 0xb1, 0x00, 0x8e, 0x78, 0xc6, 0xce, 0xb1, 0x06, 0xbe, 0xe0, 0xc5, 0xd9,
+ 0xca, 0x06, 0xbf, 0x08, 0xc4, 0xad, 0x2b, 0x00, 0x8e, 0xf1, 0xc5, 0xd9,
+ 0x61, 0x06, 0xbe, 0xf8, 0xc7, 0xc0, 0x7b, 0x06, 0xbf, 0x38, 0xc8, 0xba,
+ 0x7a, 0x06, 0xbf, 0x20, 0xc4, 0xc6, 0x7a, 0x06, 0xbf, 0x41, 0xc6, 0xc6,
+ 0x79, 0x06, 0xbf, 0x48, 0xc5, 0x8e, 0xdf, 0x00, 0x8f, 0x61, 0xc6, 0xbb,
+ 0xec, 0x00, 0x8f, 0x78, 0xca, 0x8e, 0xda, 0x00, 0x8f, 0x69, 0xc3, 0x39,
+ 0x37, 0x00, 0x8f, 0x88, 0xc6, 0x8e, 0xde, 0x01, 0x8b, 0xa1, 0xc6, 0xc0,
+ 0x7c, 0x01, 0x8b, 0xa8, 0xc3, 0x22, 0x45, 0x01, 0x9f, 0x59, 0xc3, 0x18,
+ 0x13, 0x01, 0x9f, 0x9a, 0x03, 0x98, 0x7b, 0xc3, 0x03, 0x26, 0x01, 0x9f,
+ 0x61, 0x9b, 0x01, 0x9f, 0xea, 0x03, 0x98, 0x7f, 0x02, 0x43, 0x98, 0x85,
+ 0xd3, 0x45, 0x4d, 0x0f, 0xd1, 0x81, 0xcf, 0x18, 0x0f, 0x0f, 0xd1, 0xb8,
+ 0xc9, 0x57, 0x20, 0x08, 0x4f, 0x88, 0xc9, 0x57, 0x20, 0x08, 0x4f, 0x80,
+ 0xc9, 0x57, 0x20, 0x08, 0x4f, 0x78, 0xc9, 0x57, 0x20, 0x08, 0x4f, 0x70,
+ 0xce, 0x74, 0x86, 0x00, 0xed, 0x68, 0xc4, 0xde, 0x3f, 0x00, 0xec, 0xd9,
+ 0x87, 0x00, 0xea, 0x30, 0x46, 0x00, 0x8b, 0x43, 0x98, 0x95, 0xca, 0xa8,
+ 0x14, 0x08, 0x3d, 0x08, 0xca, 0xa8, 0x14, 0x08, 0x3c, 0xe0, 0xcc, 0x23,
+ 0x3f, 0x00, 0xed, 0x39, 0xc9, 0xab, 0xb5, 0x00, 0x15, 0xb0, 0xca, 0x1f,
+ 0x59, 0x08, 0x3c, 0xa0, 0xc9, 0xaa, 0xcb, 0x08, 0x3c, 0xe8, 0xc9, 0xa9,
+ 0x2d, 0x08, 0x3c, 0x68, 0xc4, 0x00, 0x32, 0x08, 0x3c, 0x49, 0xce, 0x01,
+ 0x19, 0x08, 0x3c, 0x40, 0xc8, 0x4e, 0x93, 0x05, 0x38, 0x59, 0xd2, 0x4e,
+ 0x89, 0x05, 0x38, 0x80, 0xc4, 0x01, 0x9b, 0x00, 0x17, 0x88, 0xc8, 0x4e,
+ 0x93, 0x05, 0x38, 0x51, 0xd2, 0x4e, 0x89, 0x05, 0x38, 0x78, 0xcc, 0x1f,
+ 0x0c, 0x00, 0x17, 0xa9, 0xcc, 0x83, 0x0d, 0x00, 0x17, 0xb0, 0xc3, 0x11,
+ 0x7e, 0x0e, 0xbe, 0x11, 0xc5, 0xd8, 0x8f, 0x0e, 0xbd, 0xc0, 0xc3, 0x11,
+ 0x7e, 0x0e, 0xbd, 0x41, 0xc5, 0xd8, 0x8f, 0x0e, 0xbc, 0xf0, 0xc7, 0x00,
+ 0x90, 0x0e, 0xbd, 0x08, 0xc2, 0x02, 0xae, 0x0e, 0x8f, 0x39, 0xc4, 0x03,
+ 0xc8, 0x0e, 0x8f, 0x30, 0xc4, 0x2c, 0x0d, 0x0e, 0x8e, 0x31, 0xc5, 0x02,
+ 0xc2, 0x0e, 0x8d, 0xf1, 0xc5, 0x01, 0xfc, 0x0e, 0x8d, 0xe8, 0xc4, 0x2c,
+ 0x0d, 0x0e, 0x8e, 0x21, 0xc5, 0x02, 0xc2, 0x0e, 0x8d, 0xd1, 0xc5, 0x01,
+ 0xfc, 0x0e, 0x8d, 0xc8, 0x49, 0xaf, 0xd2, 0xc3, 0x98, 0xa4, 0x46, 0x67,
+ 0x3c, 0x43, 0x98, 0xb0, 0xd0, 0x5b, 0x02, 0x0e, 0x88, 0xe1, 0xca, 0x74,
+ 0x98, 0x0e, 0x88, 0xd8, 0x4c, 0x7e, 0x07, 0x43, 0x98, 0xbc, 0xcd, 0x7e,
+ 0x07, 0x0e, 0x8e, 0x48, 0xc5, 0x02, 0xc2, 0x0e, 0x8a, 0xa9, 0xc5, 0x01,
+ 0xfc, 0x0e, 0x8a, 0xa0, 0x43, 0x11, 0x49, 0xc3, 0x98, 0xc8, 0x45, 0x11,
+ 0x17, 0xc3, 0x98, 0xda, 0x46, 0x00, 0x2c, 0xc3, 0x98, 0xe6, 0x45, 0x00,
+ 0x49, 0x43, 0x98, 0xf2, 0x15, 0xc3, 0x98, 0xfe, 0xc8, 0xbe, 0xfa, 0x0e,
+ 0x8d, 0x61, 0xc6, 0xcd, 0x9d, 0x0e, 0x8d, 0x59, 0x42, 0x00, 0x58, 0xc3,
+ 0x99, 0x14, 0x16, 0xc3, 0x99, 0x26, 0xc4, 0x93, 0xd1, 0x0e, 0x8c, 0x49,
+ 0x42, 0x01, 0x09, 0xc3, 0x99, 0x30, 0xc3, 0x07, 0x30, 0x0e, 0x8c, 0x31,
+ 0xc5, 0xdb, 0x69, 0x0e, 0x8c, 0x11, 0x03, 0xc3, 0x99, 0x3c, 0xc7, 0xc2,
+ 0x73, 0x0e, 0x8b, 0xfa, 0x03, 0x99, 0x4b, 0xc2, 0x00, 0xfa, 0x0e, 0x8d,
+ 0xc3, 0x03, 0x99, 0x51, 0x87, 0x0e, 0x8a, 0xe0, 0xa0, 0x0e, 0x8b, 0x61,
+ 0x9f, 0x0e, 0x8b, 0x59, 0x9e, 0x0e, 0x8b, 0x50, 0xa0, 0x0e, 0x88, 0x79,
+ 0x9f, 0x0e, 0x88, 0x71, 0x9e, 0x0e, 0x88, 0x68, 0x12, 0xc3, 0x99, 0x57,
+ 0xc4, 0xe3, 0xab, 0x00, 0xff, 0xd9, 0xc5, 0x28, 0x47, 0x00, 0xff, 0xd1,
+ 0xc5, 0x6c, 0xa6, 0x00, 0xfb, 0x4b, 0x03, 0x99, 0x66, 0xc5, 0x63, 0xdc,
+ 0x00, 0x1c, 0x78, 0xc4, 0xe3, 0xab, 0x00, 0xff, 0xc9, 0xc5, 0x28, 0x47,
+ 0x00, 0xff, 0xc1, 0xc5, 0x6c, 0xa6, 0x00, 0xfa, 0x4b, 0x03, 0x99, 0x6c,
+ 0xc5, 0xd8, 0xc1, 0x00, 0xfa, 0x43, 0x03, 0x99, 0x72, 0xc5, 0x63, 0xdc,
+ 0x00, 0x1c, 0x60, 0xc4, 0x28, 0x48, 0x00, 0xff, 0x51, 0xc5, 0xd6, 0x41,
+ 0x00, 0xff, 0x40, 0xc4, 0x59, 0x13, 0x00, 0xfa, 0xcb, 0x03, 0x99, 0x78,
+ 0xc8, 0x63, 0xd3, 0x00, 0x1d, 0x58, 0xc4, 0x28, 0x48, 0x00, 0xfe, 0xd1,
+ 0xc5, 0xd6, 0x41, 0x00, 0xfe, 0xc0, 0xc4, 0x59, 0x13, 0x00, 0xf9, 0xcb,
+ 0x03, 0x99, 0x7e, 0xc8, 0x63, 0xd3, 0x00, 0x1d, 0x50, 0x45, 0x02, 0x9a,
+ 0x43, 0x99, 0x84, 0x12, 0xc3, 0x99, 0x96, 0xc4, 0xe3, 0xab, 0x00, 0xfe,
+ 0x59, 0xc5, 0x28, 0x47, 0x00, 0xfe, 0x51, 0xc5, 0x6c, 0xa6, 0x00, 0xf9,
+ 0x4b, 0x03, 0x99, 0xa5, 0xc5, 0x63, 0xdc, 0x00, 0x1c, 0x48, 0xc4, 0xe3,
+ 0xab, 0x00, 0xfe, 0x49, 0xc5, 0x28, 0x47, 0x00, 0xfe, 0x41, 0xc5, 0x6c,
+ 0xa6, 0x00, 0xf8, 0xcb, 0x03, 0x99, 0xab, 0xc5, 0xd8, 0xc1, 0x00, 0xf8,
+ 0xc3, 0x03, 0x99, 0xb1, 0xc5, 0x63, 0xdc, 0x00, 0x1c, 0x40, 0x12, 0xc3,
+ 0x99, 0xb7, 0xc4, 0xe3, 0xab, 0x00, 0xfd, 0xd9, 0x18, 0xc3, 0x99, 0xc6,
+ 0xc6, 0x60, 0xb1, 0x00, 0xfd, 0xc9, 0xc5, 0x6c, 0xa6, 0x00, 0xf8, 0x4b,
+ 0x03, 0x99, 0xd2, 0xc5, 0x63, 0xdc, 0x00, 0x1c, 0x30, 0x12, 0xc3, 0x99,
+ 0xd8, 0xc4, 0xe3, 0xab, 0x00, 0xfb, 0xeb, 0x03, 0x99, 0xea, 0xcd, 0x4a,
+ 0x68, 0x00, 0xff, 0x99, 0xc5, 0x28, 0x47, 0x00, 0xfb, 0xe3, 0x03, 0x99,
+ 0xf0, 0xc5, 0x6c, 0xa6, 0x00, 0xfb, 0x0b, 0x03, 0x99, 0xf6, 0xc5, 0x63,
+ 0xdc, 0x00, 0x1e, 0x70, 0xc4, 0xe3, 0xab, 0x00, 0xfb, 0xc9, 0xc5, 0x28,
+ 0x47, 0x00, 0xfb, 0xc1, 0xc5, 0x6c, 0xa6, 0x00, 0xfa, 0x0b, 0x03, 0x99,
+ 0xfc, 0xc5, 0xd8, 0xc1, 0x00, 0xfa, 0x03, 0x03, 0x9a, 0x02, 0xc5, 0x63,
+ 0xdc, 0x00, 0x1e, 0x60, 0xc8, 0x63, 0xd3, 0x00, 0x1e, 0x5b, 0x03, 0x9a,
+ 0x08, 0xc4, 0x59, 0x13, 0x00, 0xfa, 0x8a, 0x03, 0x9a, 0x0e, 0xca, 0x94,
+ 0x91, 0x00, 0xff, 0x31, 0xc4, 0x7a, 0x04, 0x00, 0xfa, 0x82, 0x03, 0x9a,
+ 0x14, 0xc5, 0xd6, 0x41, 0x00, 0xff, 0x01, 0xc4, 0x28, 0x48, 0x00, 0xfb,
+ 0xd0, 0xc8, 0x63, 0xd3, 0x00, 0x1e, 0x53, 0x03, 0x9a, 0x1a, 0xc4, 0x59,
+ 0x13, 0x00, 0xf9, 0x8a, 0x03, 0x9a, 0x20, 0xca, 0x94, 0x91, 0x00, 0xfe,
+ 0xb1, 0xc4, 0x7a, 0x04, 0x00, 0xf9, 0x82, 0x03, 0x9a, 0x26, 0xc5, 0xd6,
+ 0x41, 0x00, 0xfe, 0x81, 0xc4, 0x28, 0x48, 0x00, 0xfb, 0xb0, 0x12, 0xc3,
+ 0x9a, 0x2c, 0xc4, 0xe3, 0xab, 0x00, 0xfb, 0xab, 0x03, 0x9a, 0x3e, 0xcd,
+ 0x4a, 0x68, 0x00, 0xfe, 0x19, 0xc5, 0x28, 0x47, 0x00, 0xfb, 0xa3, 0x03,
+ 0x9a, 0x44, 0xc5, 0x6c, 0xa6, 0x00, 0xf9, 0x0b, 0x03, 0x9a, 0x4a, 0xc5,
+ 0x63, 0xdc, 0x00, 0x1d, 0x70, 0xc4, 0xe3, 0xab, 0x00, 0xfb, 0x99, 0xc5,
+ 0x28, 0x47, 0x00, 0xfb, 0x91, 0xc5, 0x6c, 0xa6, 0x00, 0xf8, 0x8b, 0x03,
+ 0x9a, 0x50, 0xc5, 0xd8, 0xc1, 0x00, 0xf8, 0x83, 0x03, 0x9a, 0x56, 0xc5,
+ 0x63, 0xdc, 0x00, 0x1d, 0x68, 0x12, 0xc3, 0x9a, 0x5c, 0xc4, 0xe3, 0xab,
+ 0x00, 0xfb, 0x8b, 0x03, 0x9a, 0x6e, 0xcd, 0x4a, 0x68, 0x00, 0xfd, 0x99,
+ 0x18, 0xc3, 0x9a, 0x74, 0xc6, 0x60, 0xb1, 0x00, 0xfd, 0x89, 0xc5, 0x6c,
+ 0xa6, 0x00, 0xf8, 0x0b, 0x03, 0x9a, 0x83, 0xc5, 0x63, 0xdc, 0x00, 0x1d,
+ 0x60, 0xc7, 0xb9, 0xdb, 0x08, 0x0a, 0x59, 0xc7, 0x67, 0xc7, 0x08, 0x0a,
+ 0x90, 0xc7, 0x0d, 0x04, 0x08, 0x0a, 0x2b, 0x03, 0x9a, 0x89, 0x16, 0xc3,
+ 0x9a, 0x8d, 0xc7, 0x67, 0xc7, 0x08, 0x0a, 0x78, 0x16, 0xc3, 0x9a, 0x9c,
+ 0xc7, 0x67, 0xc7, 0x08, 0x0a, 0x88, 0xc7, 0x0d, 0x04, 0x08, 0x0b, 0x51,
+ 0xc8, 0x4b, 0x94, 0x08, 0x0b, 0x88, 0xc4, 0x0d, 0x0e, 0x08, 0x0b, 0x29,
+ 0xcb, 0x13, 0xfa, 0x08, 0x0b, 0x58, 0xc8, 0x4b, 0x94, 0x08, 0x0b, 0x91,
+ 0xc7, 0x0d, 0x04, 0x08, 0x0b, 0x70, 0xc8, 0x0d, 0x03, 0x08, 0x0b, 0x68,
+ 0xcf, 0x6b, 0x25, 0x08, 0x0b, 0x38, 0xc2, 0xe5, 0xfd, 0x08, 0x1e, 0x68,
+ 0x11, 0xc3, 0x9a, 0xab, 0xc4, 0x69, 0xaa, 0x0e, 0x7d, 0xca, 0x03, 0x9a,
+ 0xbd, 0xd4, 0x3e, 0xe4, 0x00, 0xef, 0xf9, 0xd2, 0x4d, 0x8d, 0x00, 0x1a,
+ 0xb0, 0xc2, 0x01, 0x2d, 0x09, 0x19, 0x99, 0xc3, 0x02, 0x2c, 0x09, 0x19,
+ 0x90, 0xc9, 0x40, 0xaa, 0x09, 0x12, 0xe8, 0xca, 0x9c, 0x98, 0x09, 0x10,
+ 0x79, 0xc9, 0x40, 0xaa, 0x09, 0x10, 0x70, 0xc8, 0xaa, 0xf0, 0x09, 0x1c,
+ 0x51, 0xc4, 0x58, 0xf5, 0x09, 0x10, 0x08, 0xa0, 0x09, 0x10, 0x33, 0x03,
+ 0x9a, 0xc3, 0x9f, 0x09, 0x10, 0x28, 0xcc, 0x36, 0x65, 0x09, 0x27, 0xa9,
+ 0xc3, 0x36, 0x6e, 0x09, 0x27, 0xa0, 0xc9, 0xab, 0x25, 0x09, 0x0e, 0x38,
+ 0x94, 0x09, 0x0e, 0x28, 0xc8, 0x65, 0xd0, 0x09, 0x0f, 0x39, 0x83, 0x09,
+ 0x0f, 0x30, 0xc2, 0x38, 0xb6, 0x09, 0x0f, 0x19, 0x89, 0x09, 0x0f, 0x10,
+ 0xc2, 0x5d, 0xd4, 0x09, 0x0e, 0xfb, 0x03, 0x9a, 0xc9, 0x4e, 0x72, 0x8e,
+ 0xc3, 0x9a, 0xcf, 0xca, 0xa6, 0x16, 0x09, 0x0e, 0xe0, 0xc8, 0xa7, 0xb2,
+ 0x09, 0x0e, 0xc8, 0x8e, 0x09, 0x0e, 0xb8, 0x8e, 0x09, 0x0e, 0x93, 0x03,
+ 0x9a, 0xdb, 0xa0, 0x09, 0x0e, 0x88, 0x90, 0x09, 0x0e, 0x80, 0x46, 0x25,
+ 0xd4, 0x43, 0x9a, 0xe1, 0x8e, 0x09, 0x0e, 0x48, 0xc3, 0x1d, 0x23, 0x09,
+ 0x0d, 0xe1, 0xc3, 0x1a, 0xf4, 0x09, 0x0d, 0xd9, 0xca, 0xa4, 0x4a, 0x09,
+ 0x0d, 0xd0, 0x8f, 0x09, 0x26, 0x39, 0x86, 0x09, 0x07, 0x38, 0xc9, 0xab,
+ 0xd0, 0x09, 0x07, 0x30, 0xc2, 0x04, 0x2b, 0x09, 0x26, 0x31, 0xc2, 0x8d,
+ 0xc6, 0x09, 0x26, 0x28, 0xca, 0x51, 0xd4, 0x09, 0x26, 0x08, 0x83, 0x09,
+ 0x25, 0xf1, 0xcc, 0x81, 0x15, 0x09, 0x06, 0x88, 0xc8, 0xaa, 0xef, 0x09,
+ 0x06, 0x98, 0x46, 0x25, 0xd4, 0x43, 0x9a, 0xed, 0xc7, 0x25, 0xd4, 0x09,
+ 0x06, 0x78, 0xc6, 0x45, 0xad, 0x09, 0x25, 0xc9, 0xc8, 0x6a, 0x1e, 0x09,
+ 0x25, 0xc0, 0xc4, 0x39, 0xc8, 0x09, 0x25, 0xb9, 0xc9, 0xa6, 0x49, 0x09,
+ 0x06, 0x28, 0xc9, 0xab, 0x37, 0x09, 0x05, 0xf0, 0x45, 0x03, 0x55, 0xc3,
+ 0x9a, 0xf9, 0x46, 0x1f, 0x67, 0xc3, 0x9b, 0x05, 0x48, 0x0b, 0xc8, 0xc3,
+ 0x9b, 0x1b, 0xc7, 0x27, 0xb2, 0x0e, 0xc7, 0xd1, 0x45, 0x13, 0x6f, 0xc3,
+ 0x9b, 0x30, 0xc4, 0x0e, 0x65, 0x0e, 0xc7, 0xb0, 0x46, 0x0e, 0xce, 0xc3,
+ 0x9b, 0x42, 0x14, 0xc3, 0x9b, 0x64, 0xc6, 0x04, 0xcb, 0x0e, 0xc0, 0x73,
+ 0x03, 0x9b, 0x70, 0xc6, 0x58, 0xac, 0x0e, 0xc0, 0x5b, 0x03, 0x9b, 0x74,
+ 0xd0, 0x58, 0xa2, 0x0e, 0xc0, 0x9b, 0x03, 0x9b, 0x78, 0xc4, 0x18, 0xf2,
+ 0x0e, 0xc0, 0x33, 0x03, 0x9b, 0x7e, 0xc6, 0xcc, 0x41, 0x0e, 0xc0, 0x50,
+ 0xca, 0x13, 0x91, 0x0e, 0xc6, 0x69, 0xcd, 0x3a, 0x9e, 0x0e, 0xc6, 0x40,
+ 0xc6, 0x13, 0x95, 0x0e, 0xc6, 0x59, 0x47, 0xc6, 0xcc, 0xc3, 0x9b, 0x84,
+ 0x05, 0xc3, 0x9b, 0x90, 0xcf, 0x64, 0xb3, 0x0e, 0xc1, 0x80, 0xcb, 0x4d,
+ 0x82, 0x0e, 0xc6, 0x48, 0x00, 0x43, 0x9b, 0x9c, 0xc6, 0x0e, 0xcd, 0x0e,
+ 0xc4, 0xe0, 0xc4, 0x0e, 0xcf, 0x0e, 0xc4, 0xd1, 0xcc, 0x86, 0xe5, 0x0e,
+ 0xc4, 0xc8, 0x00, 0x43, 0x9b, 0xa8, 0xcb, 0x4d, 0x82, 0x0e, 0xc3, 0x1a,
+ 0x03, 0x9b, 0xb4, 0xca, 0x4d, 0x83, 0x0e, 0xc2, 0xf1, 0xd3, 0x46, 0x57,
+ 0x0e, 0xc2, 0x6a, 0x03, 0x9b, 0xba, 0x00, 0x43, 0x9b, 0xbe, 0x00, 0x43,
+ 0x9b, 0xd9, 0x00, 0x43, 0x9b, 0xee, 0xc4, 0x0c, 0x4d, 0x0e, 0xc6, 0x10,
+ 0xc6, 0x13, 0x95, 0x0e, 0xc5, 0x41, 0xc4, 0x00, 0x9d, 0x0e, 0xc4, 0x48,
+ 0xc4, 0x0c, 0x4d, 0x0e, 0xc4, 0xf0, 0xc5, 0x0e, 0xce, 0x0e, 0xc7, 0x83,
+ 0x03, 0x9b, 0xfa, 0xc6, 0x58, 0xac, 0x0e, 0xc6, 0xd9, 0xcb, 0x13, 0x90,
+ 0x0e, 0xc6, 0x09, 0x47, 0x04, 0xcb, 0x43, 0x9b, 0xfe, 0xc5, 0x06, 0x82,
+ 0x0e, 0xc5, 0x13, 0x03, 0x9c, 0x0d, 0xc5, 0x0e, 0xce, 0x0e, 0xc4, 0xd8,
+ 0xcf, 0x69, 0xdb, 0x0e, 0xc4, 0x18, 0xc8, 0xbc, 0x62, 0x0e, 0xc4, 0x09,
+ 0x46, 0x0e, 0xce, 0x43, 0x9c, 0x13, 0x00, 0x43, 0x9c, 0x1f, 0x00, 0x43,
+ 0x9c, 0x2b, 0xc7, 0x27, 0xb2, 0x0e, 0xc3, 0x99, 0xc4, 0x0e, 0xe2, 0x0e,
+ 0xc3, 0x78, 0x00, 0x43, 0x9c, 0x3a, 0xc5, 0x05, 0x74, 0x0e, 0xc2, 0xa0,
+ 0xc5, 0x18, 0xf1, 0x0e, 0xc6, 0xa8, 0xcb, 0x13, 0x90, 0x0e, 0xc5, 0xd9,
+ 0xc6, 0x04, 0xcb, 0x0e, 0xc0, 0x7b, 0x03, 0x9c, 0x46, 0xc5, 0x58, 0xac,
+ 0x0e, 0xc0, 0x69, 0xc4, 0x18, 0xf2, 0x0e, 0xc0, 0x38, 0xc5, 0xdd, 0x17,
+ 0x0e, 0xcd, 0x69, 0xca, 0x9e, 0x8c, 0x0e, 0xcd, 0x30, 0xc5, 0x17, 0x14,
+ 0x0e, 0xcc, 0x73, 0x03, 0x9c, 0x4a, 0xc6, 0x01, 0xdb, 0x0e, 0xcc, 0x69,
+ 0xc5, 0x03, 0x13, 0x0e, 0xcc, 0x60, 0xc6, 0x01, 0xdb, 0x0e, 0xcc, 0x89,
+ 0xc5, 0x03, 0x13, 0x0e, 0xcc, 0x80, 0xc2, 0x00, 0x15, 0x0e, 0xcc, 0x58,
+ 0xcb, 0x57, 0xc7, 0x0f, 0xc1, 0x79, 0xca, 0xa0, 0x08, 0x0f, 0xc1, 0x59,
+ 0x49, 0xa8, 0xdc, 0xc3, 0x9c, 0x50, 0xd8, 0x24, 0xb3, 0x01, 0x5b, 0xe9,
+ 0xcc, 0x84, 0x09, 0x0f, 0xc1, 0x19, 0xcc, 0x82, 0x1d, 0x0f, 0xc1, 0x39,
+ 0xd0, 0x57, 0xc2, 0x0f, 0xc1, 0x98, 0xe0, 0x09, 0x47, 0x01, 0x5c, 0x18,
+ 0xcf, 0x2c, 0x35, 0x01, 0x5b, 0xe1, 0xd1, 0x01, 0x68, 0x01, 0x5b, 0xe0,
+ 0xc7, 0x09, 0x0d, 0x01, 0x5d, 0x29, 0xc9, 0x03, 0xc8, 0x01, 0x5d, 0x38,
+ 0xcf, 0x2c, 0x35, 0x01, 0x48, 0xb9, 0xd6, 0x2d, 0x62, 0x01, 0x48, 0xc0,
+ 0xc8, 0x62, 0x44, 0x01, 0x4b, 0x61, 0xdd, 0x10, 0xdd, 0x01, 0x4b, 0x40,
+ 0xe0, 0x06, 0xe7, 0x01, 0x4b, 0x20, 0xcc, 0x00, 0xfb, 0x07, 0xe8, 0x51,
+ 0xcb, 0x10, 0xb5, 0x07, 0xe9, 0x70, 0x45, 0x19, 0x60, 0xc3, 0x9c, 0x5c,
+ 0xce, 0x43, 0x77, 0x07, 0xed, 0x50, 0xcc, 0x10, 0xb4, 0x07, 0xeb, 0x59,
+ 0xca, 0x26, 0xf7, 0x07, 0xeb, 0x50, 0xca, 0x26, 0xf7, 0x07, 0xeb, 0x61,
+ 0xcc, 0x10, 0xb4, 0x07, 0xeb, 0x68, 0xcc, 0x10, 0xb4, 0x07, 0xeb, 0x29,
+ 0xca, 0x26, 0xf7, 0x07, 0xeb, 0x20, 0xdc, 0x14, 0x69, 0x07, 0xea, 0x61,
+ 0xd2, 0x49, 0x9d, 0x07, 0xef, 0xd0, 0xe0, 0x00, 0xe7, 0x07, 0xef, 0x80,
+ 0xca, 0x26, 0xf7, 0x07, 0xeb, 0x89, 0xcc, 0x10, 0xb4, 0x07, 0xeb, 0x90,
+ 0xca, 0x26, 0xf7, 0x07, 0xea, 0x89, 0xcc, 0x10, 0xb4, 0x07, 0xea, 0x90,
+ 0xca, 0x26, 0xf7, 0x07, 0xe3, 0x49, 0xcd, 0x00, 0xfa, 0x07, 0xe0, 0x20,
+ 0xca, 0x26, 0xf7, 0x07, 0xdf, 0xa9, 0xcd, 0x00, 0xfa, 0x07, 0xdf, 0xa0,
+ 0x48, 0x06, 0x5f, 0xc3, 0x9c, 0x68, 0xca, 0x26, 0xf7, 0x07, 0xdf, 0x59,
+ 0xcd, 0x00, 0xfa, 0x07, 0xdf, 0x50, 0xca, 0x26, 0xf7, 0x07, 0xdf, 0x69,
+ 0xcd, 0x00, 0xfa, 0x07, 0xdf, 0x60, 0xcc, 0x00, 0xfb, 0x07, 0xe0, 0x11,
+ 0xcb, 0x10, 0xb5, 0x07, 0xe4, 0xd0, 0xcc, 0x00, 0xfb, 0x07, 0xe0, 0x09,
+ 0xcb, 0x10, 0xb5, 0x07, 0xe4, 0xc0, 0xcb, 0x64, 0x7b, 0x07, 0xe7, 0x01,
+ 0xcc, 0x10, 0xb4, 0x07, 0xe4, 0xd8, 0xcb, 0x10, 0xb5, 0x07, 0xdf, 0xc1,
+ 0xcc, 0x00, 0xfb, 0x07, 0xdf, 0xb0, 0xca, 0x26, 0xf7, 0x07, 0xeb, 0xf1,
+ 0xcc, 0x10, 0xb4, 0x07, 0xed, 0xa0, 0xcf, 0x0e, 0x7d, 0x00, 0x31, 0xf9,
+ 0xcd, 0x04, 0xe7, 0x00, 0x31, 0xf0, 0xca, 0x09, 0x9d, 0x00, 0x3b, 0xb9,
+ 0x16, 0x43, 0x9c, 0x74, 0xc5, 0x05, 0x02, 0x00, 0x35, 0x1b, 0x03, 0x9c,
+ 0x80, 0xcb, 0x98, 0xb0, 0x00, 0x35, 0x10, 0x4a, 0x0e, 0x7d, 0xc3, 0x9c,
+ 0x86, 0xcd, 0x04, 0xfa, 0x00, 0x3b, 0x00, 0xcf, 0x0e, 0x7d, 0x00, 0x35,
+ 0xa1, 0xcd, 0x04, 0xfa, 0x00, 0x35, 0x90, 0xd7, 0x2b, 0x3a, 0x00, 0x46,
+ 0x39, 0x98, 0x00, 0x35, 0xa8, 0xc8, 0xa7, 0x26, 0x00, 0x45, 0x31, 0xc7,
+ 0x16, 0x16, 0x00, 0x35, 0xb0, 0xc5, 0x05, 0x02, 0x00, 0x35, 0xc1, 0xc5,
+ 0x00, 0xd4, 0x00, 0x35, 0xb8, 0xc5, 0x05, 0x02, 0x00, 0x46, 0x31, 0xc5,
+ 0x00, 0xd4, 0x00, 0x46, 0x28, 0xc5, 0x05, 0x02, 0x00, 0x45, 0x99, 0xc5,
+ 0x00, 0xd4, 0x00, 0x35, 0x01, 0xd8, 0x26, 0x03, 0x00, 0x3a, 0xf0, 0xc5,
+ 0x00, 0xd4, 0x00, 0x3a, 0xe9, 0xd0, 0x25, 0x7b, 0x00, 0x3a, 0xf8, 0x49,
+ 0xb2, 0xab, 0xc3, 0x9c, 0x92, 0xd3, 0x45, 0x3a, 0x00, 0x43, 0x93, 0x03,
+ 0x9c, 0xba, 0xc9, 0x16, 0x14, 0x00, 0x43, 0xd1, 0xd2, 0x4e, 0x53, 0x00,
+ 0x43, 0x99, 0x4b, 0x5e, 0x02, 0xc3, 0x9c, 0xc0, 0x46, 0x08, 0x09, 0xc3,
+ 0x9c, 0xcc, 0xcb, 0x82, 0x59, 0x00, 0x31, 0x13, 0x03, 0x9c, 0xde, 0x5d,
+ 0x10, 0x12, 0x43, 0x9c, 0xe2, 0x00, 0x43, 0x9c, 0xee, 0xcd, 0x00, 0xfa,
+ 0x07, 0xf7, 0x79, 0xca, 0x26, 0xf7, 0x07, 0xf7, 0x80, 0x48, 0x04, 0xe7,
+ 0xc3, 0x9c, 0xfa, 0x4a, 0x0e, 0x7d, 0x43, 0x9d, 0x06, 0x44, 0x05, 0x18,
+ 0xc3, 0x9d, 0x18, 0x16, 0xc3, 0x9d, 0x24, 0xc4, 0x00, 0x9d, 0x00, 0x35,
+ 0x58, 0xcb, 0x10, 0xb5, 0x07, 0xf6, 0xd9, 0xcc, 0x00, 0xfb, 0x07, 0xf6,
+ 0xc8, 0xcb, 0x10, 0xb5, 0x07, 0xdc, 0x01, 0xcc, 0x00, 0xfb, 0x07, 0xdb,
+ 0xf0, 0xcb, 0x10, 0xb5, 0x07, 0xdb, 0x01, 0xcc, 0x00, 0xfb, 0x07, 0xda,
+ 0xf0, 0x98, 0x00, 0x45, 0xf9, 0xc9, 0xad, 0xda, 0x00, 0x45, 0xc0, 0x00,
+ 0x43, 0x9d, 0x30, 0xcb, 0x10, 0xb5, 0x07, 0xdb, 0x21, 0xcc, 0x00, 0xfb,
+ 0x07, 0xdb, 0x10, 0xcd, 0x04, 0xe7, 0x00, 0x45, 0x19, 0x4a, 0x0e, 0x7d,
+ 0x43, 0x9d, 0x42, 0xcc, 0x00, 0xfb, 0x07, 0xf4, 0xc9, 0xcb, 0x10, 0xb5,
+ 0x07, 0xf4, 0xd8, 0x52, 0x16, 0x02, 0xc3, 0x9d, 0x4e, 0xcf, 0x67, 0xce,
+ 0x00, 0x36, 0x89, 0xc3, 0x14, 0xa7, 0x00, 0x36, 0x68, 0x00, 0x43, 0x9d,
+ 0x60, 0x45, 0x00, 0x8c, 0xc3, 0x9d, 0x70, 0xca, 0x26, 0xf7, 0x07, 0xdd,
+ 0x79, 0xcd, 0x00, 0xfa, 0x07, 0xdd, 0x70, 0x45, 0x03, 0x14, 0xc3, 0x9d,
+ 0x7f, 0xc5, 0x01, 0x74, 0x00, 0x3a, 0xd8, 0xc5, 0x00, 0xd4, 0x00, 0x34,
+ 0xb9, 0xd0, 0x25, 0x7b, 0x00, 0x3b, 0x58, 0xce, 0x16, 0x0f, 0x00, 0x34,
+ 0xb0, 0xca, 0xa6, 0xd4, 0x00, 0x45, 0xb1, 0x98, 0x00, 0x3a, 0xb2, 0x03,
+ 0x9d, 0x92, 0xdb, 0x16, 0x02, 0x00, 0x3a, 0xa1, 0x4a, 0x0e, 0x7d, 0x43,
+ 0x9d, 0x98, 0xcb, 0x10, 0xb5, 0x07, 0xda, 0xe1, 0xcc, 0x00, 0xfb, 0x07,
+ 0xda, 0xd0, 0xd2, 0x49, 0xc1, 0x00, 0x45, 0xa8, 0xc5, 0x05, 0x02, 0x00,
+ 0x45, 0x79, 0xc5, 0x00, 0xd4, 0x00, 0x34, 0xf0, 0xcf, 0x67, 0xce, 0x00,
+ 0x34, 0xdb, 0x03, 0x9d, 0xa7, 0xd8, 0x25, 0x73, 0x00, 0x3b, 0x68, 0xe0,
+ 0x0a, 0x67, 0x00, 0x3b, 0xe8, 0xe0, 0x02, 0x87, 0x00, 0x3b, 0x80, 0x16,
+ 0xc3, 0x9d, 0xad, 0x49, 0x1d, 0x6f, 0xc3, 0x9d, 0xb9, 0xcf, 0x3b, 0x79,
+ 0x00, 0x34, 0x81, 0xc9, 0x0e, 0x6e, 0x00, 0x34, 0x53, 0x03, 0x9d, 0xc3,
+ 0xc4, 0x00, 0x9d, 0x00, 0x34, 0x48, 0xcc, 0x00, 0xfb, 0x07, 0xf5, 0xe9,
+ 0xcb, 0x10, 0xb5, 0x07, 0xf5, 0xf8, 0xcd, 0x00, 0xfa, 0x07, 0xf4, 0x29,
+ 0xca, 0x26, 0xf7, 0x07, 0xf4, 0x30, 0xc5, 0x00, 0xd4, 0x00, 0x3b, 0x29,
+ 0xc5, 0x05, 0x02, 0x00, 0x3b, 0x30, 0xcb, 0x10, 0xb5, 0x07, 0xdc, 0xe1,
+ 0xcc, 0x00, 0xfb, 0x07, 0xdc, 0xd0, 0xcf, 0x0e, 0x7d, 0x00, 0x35, 0xe9,
+ 0xcd, 0x04, 0xfa, 0x00, 0x3b, 0x38, 0xc3, 0x02, 0x97, 0x00, 0x3b, 0x41,
+ 0x98, 0x00, 0x3b, 0x48, 0xcd, 0x00, 0xfa, 0x07, 0xdc, 0xf1, 0xca, 0x26,
+ 0xf7, 0x07, 0xdc, 0xf8, 0xd6, 0x31, 0x56, 0x00, 0x44, 0x51, 0x16, 0xc3,
+ 0x9d, 0xc9, 0xcb, 0x08, 0x09, 0x00, 0x34, 0x09, 0x46, 0x09, 0x3f, 0xc3,
+ 0x9d, 0xd5, 0x58, 0x24, 0x9b, 0x43, 0x9d, 0xdb, 0xcc, 0x00, 0xfb, 0x07,
+ 0xf5, 0x09, 0xcb, 0x10, 0xb5, 0x07, 0xf5, 0x18, 0xcb, 0x64, 0x7b, 0x07,
+ 0xdd, 0x69, 0x0b, 0xc3, 0x9d, 0xe5, 0xca, 0x26, 0xf7, 0x07, 0xdd, 0x58,
+ 0xcb, 0x64, 0x7b, 0x07, 0xdd, 0x49, 0x0b, 0xc3, 0x9d, 0xf1, 0xca, 0x26,
+ 0xf7, 0x07, 0xdd, 0x39, 0xd0, 0x5f, 0x02, 0x00, 0x36, 0x10, 0x00, 0x43,
+ 0x9d, 0xfd, 0xcf, 0x04, 0xd8, 0x0f, 0xdd, 0x23, 0x03, 0x9e, 0x09, 0xe0,
+ 0x04, 0xc7, 0x0f, 0xdd, 0x40, 0xcf, 0x04, 0xd8, 0x0f, 0xdd, 0x2b, 0x03,
+ 0x9e, 0x0f, 0xdf, 0x0c, 0xe1, 0x0f, 0xdd, 0x48, 0xd3, 0x45, 0x4d, 0x0f,
+ 0xd1, 0x89, 0xcf, 0x18, 0x0f, 0x0f, 0xd1, 0xe8, 0x96, 0x0b, 0x4b, 0xb8,
+ 0xc2, 0x10, 0x11, 0x0b, 0x47, 0xc8, 0xa5, 0x01, 0x45, 0xf9, 0xa4, 0x01,
+ 0x43, 0xfa, 0x03, 0x9e, 0x15, 0xa5, 0x01, 0x46, 0xf8, 0xa5, 0x01, 0x47,
+ 0x78, 0xa5, 0x01, 0x47, 0xb8, 0xa5, 0x01, 0x47, 0xd8, 0xa5, 0x01, 0x47,
+ 0xe8, 0xa5, 0x01, 0x47, 0xf0, 0xc7, 0x09, 0x0d, 0x01, 0x5d, 0x21, 0xc9,
+ 0x03, 0xc8, 0x01, 0x5d, 0x30, 0xcf, 0x2c, 0x35, 0x01, 0x5b, 0xd1, 0xd1,
+ 0x01, 0x68, 0x01, 0x5b, 0xd0, 0xcf, 0x2c, 0x35, 0x01, 0x59, 0xe1, 0xd6,
+ 0x2d, 0x62, 0x01, 0x59, 0xe8, 0xc8, 0x62, 0x44, 0x01, 0x4b, 0x51, 0xdf,
+ 0x09, 0x68, 0x01, 0x4b, 0x10, 0xcc, 0x51, 0x28, 0x0e, 0xf8, 0xa9, 0xc8,
+ 0x74, 0x8c, 0x00, 0x13, 0xd3, 0x03, 0x9e, 0x19, 0xcc, 0x1e, 0xc1, 0x05,
+ 0x5b, 0x41, 0xc4, 0x01, 0x23, 0x00, 0x13, 0xd9, 0xc4, 0x14, 0xa6, 0x01,
+ 0x63, 0xc8, 0x46, 0x00, 0x8b, 0x43, 0x9e, 0x1f, 0xcc, 0x23, 0x3f, 0x05,
+ 0x5a, 0x20, 0xc9, 0xa9, 0x2d, 0x00, 0x15, 0x78, 0xc9, 0x0e, 0x6e, 0x00,
+ 0xf7, 0x19, 0xc5, 0x1e, 0xc8, 0x00, 0xf7, 0x09, 0xca, 0x9e, 0x5a, 0x00,
+ 0xf6, 0xf9, 0xc5, 0x1f, 0x0c, 0x00, 0xf6, 0xe9, 0xc5, 0x31, 0xee, 0x00,
+ 0xf6, 0xd8, 0xc9, 0x0e, 0x6e, 0x00, 0xf6, 0xc9, 0xc5, 0x1e, 0xc8, 0x00,
+ 0xf6, 0xb9, 0xca, 0x9e, 0x5a, 0x00, 0xf6, 0xa9, 0xc5, 0x1f, 0x0c, 0x00,
+ 0xf6, 0x99, 0xc5, 0x31, 0xee, 0x00, 0xf6, 0x88, 0xc5, 0x05, 0x02, 0x00,
+ 0xf6, 0x69, 0xc5, 0x00, 0xd4, 0x00, 0x11, 0x7a, 0x03, 0x9e, 0x2b, 0xc5,
+ 0x31, 0xee, 0x00, 0x0a, 0x89, 0xc5, 0x1f, 0x0c, 0x00, 0x10, 0x68, 0xce,
+ 0x01, 0x19, 0x05, 0x5b, 0x31, 0xc4, 0x00, 0x32, 0x00, 0x15, 0x28, 0xc9,
+ 0x20, 0xb1, 0x00, 0x14, 0x20, 0xc3, 0x00, 0x33, 0x00, 0x14, 0x99, 0xc4,
+ 0x65, 0xe2, 0x00, 0x0f, 0x78, 0x44, 0x02, 0x9b, 0xc3, 0x9e, 0x31, 0xc5,
+ 0x05, 0x02, 0x00, 0xf0, 0xc8, 0xc5, 0x05, 0x02, 0x00, 0xf5, 0xc9, 0xc5,
+ 0x00, 0xd4, 0x00, 0x08, 0xb8, 0x45, 0x02, 0x9a, 0x43, 0x9e, 0x4f, 0xc9,
+ 0x64, 0x14, 0x00, 0xf2, 0xf9, 0xc7, 0x74, 0x8d, 0x00, 0x13, 0xe0, 0x42,
+ 0x00, 0x30, 0xc3, 0x9e, 0x5b, 0xca, 0x1f, 0x07, 0x00, 0x10, 0x88, 0xcb,
+ 0x4d, 0x16, 0x05, 0x5a, 0x49, 0xc6, 0xbd, 0xf4, 0x00, 0x0a, 0xb9, 0xc4,
+ 0x65, 0xe2, 0x00, 0x0a, 0xc8, 0x45, 0x02, 0x9a, 0x43, 0x9e, 0x6a, 0xc7,
+ 0x0e, 0x70, 0x00, 0xf7, 0x29, 0x45, 0x00, 0x5a, 0x43, 0x9e, 0x88, 0x00,
+ 0x43, 0x9e, 0x94, 0xc9, 0x9b, 0xdb, 0x00, 0xf3, 0xc9, 0xc5, 0x05, 0x02,
+ 0x00, 0xf3, 0xa8, 0xc6, 0x05, 0x01, 0x00, 0xf3, 0xb8, 0xc9, 0x0e, 0x6e,
+ 0x00, 0xf5, 0xb9, 0xc5, 0x1e, 0xc8, 0x00, 0xf5, 0xa9, 0xca, 0x9e, 0x5a,
+ 0x00, 0xf5, 0x99, 0xc5, 0x1f, 0x0c, 0x00, 0xf5, 0x89, 0xc5, 0x31, 0xee,
+ 0x00, 0xf5, 0x78, 0x45, 0x02, 0x9a, 0x43, 0x9e, 0xa0, 0x42, 0x00, 0x30,
+ 0xc3, 0x9e, 0xbe, 0xca, 0x1f, 0x07, 0x00, 0x10, 0x08, 0xcb, 0x98, 0x58,
+ 0x00, 0x0e, 0xf8, 0xcd, 0x61, 0x8b, 0x00, 0xf4, 0xd1, 0x43, 0x00, 0x75,
+ 0x43, 0x9e, 0xcd, 0xca, 0x25, 0x08, 0x05, 0x5a, 0xc9, 0xd2, 0x4c, 0x5b,
+ 0x05, 0x5a, 0xc0, 0xc5, 0x05, 0x02, 0x00, 0xf2, 0x39, 0xc5, 0x00, 0xd4,
+ 0x00, 0xf2, 0x28, 0xc9, 0x0e, 0x6e, 0x00, 0xf7, 0x81, 0xc5, 0x1e, 0xc8,
+ 0x00, 0xf7, 0x71, 0xca, 0x9e, 0x5a, 0x00, 0xf7, 0x61, 0xc5, 0x1f, 0x0c,
+ 0x00, 0xf7, 0x51, 0xc5, 0x31, 0xee, 0x00, 0xf7, 0x40, 0xc5, 0x31, 0xee,
+ 0x00, 0x0b, 0x81, 0xc5, 0x1f, 0x0c, 0x00, 0x10, 0xa0, 0xc5, 0x05, 0x02,
+ 0x00, 0xf3, 0x91, 0x44, 0x02, 0x9b, 0x43, 0x9e, 0xdc, 0xcb, 0x98, 0x58,
+ 0x00, 0x11, 0x80, 0xc9, 0x0e, 0x6e, 0x00, 0xf6, 0x31, 0xc5, 0x1e, 0xc8,
+ 0x00, 0xf6, 0x21, 0xca, 0x9e, 0x5a, 0x00, 0xf6, 0x11, 0xc5, 0x1f, 0x0c,
+ 0x00, 0xf6, 0x01, 0xc5, 0x31, 0xee, 0x00, 0xf5, 0xf0, 0xcb, 0x98, 0x58,
+ 0x00, 0x0f, 0x00, 0xcb, 0x98, 0x58, 0x00, 0xf2, 0xe0, 0x16, 0xc3, 0x9e,
+ 0xf4, 0xc6, 0x8e, 0xde, 0x00, 0x89, 0x11, 0xc5, 0x79, 0xf2, 0x00, 0x89,
+ 0x21, 0xc5, 0xdb, 0xff, 0x00, 0x89, 0x30, 0x87, 0x00, 0x8c, 0x28, 0xc4,
+ 0xad, 0x2b, 0x00, 0x8e, 0x61, 0xc5, 0x90, 0xe4, 0x06, 0xbe, 0xb0, 0xc4,
+ 0xad, 0x2b, 0x00, 0x8e, 0x99, 0xc5, 0x90, 0xe4, 0x00, 0x8e, 0xa1, 0xc6,
+ 0xc0, 0x7c, 0x06, 0xbe, 0xc9, 0xc7, 0xba, 0x7b, 0x06, 0xbe, 0xd0, 0x02,
+ 0x43, 0x9f, 0x00, 0xc4, 0xe4, 0x57, 0x01, 0x9f, 0xf0, 0xc3, 0x05, 0x14,
+ 0x01, 0x9b, 0x69, 0x16, 0xc3, 0x9f, 0x1e, 0xc4, 0x09, 0x9d, 0x01, 0x9b,
+ 0x80, 0xc4, 0x01, 0x23, 0x00, 0x15, 0xa9, 0xc8, 0x74, 0x8c, 0x08, 0x3d,
+ 0x32, 0x03, 0x9f, 0x2a, 0xc4, 0x23, 0x2e, 0x0e, 0x8b, 0x19, 0xc4, 0x2c,
+ 0x0d, 0x0e, 0x8a, 0x08, 0xc4, 0x23, 0x2e, 0x0e, 0x8b, 0x09, 0xc4, 0x2c,
+ 0x0d, 0x0e, 0x89, 0xf8, 0xa0, 0x0e, 0x8e, 0x71, 0x9f, 0x0e, 0x8e, 0x69,
+ 0x9e, 0x0e, 0x8e, 0x60, 0x46, 0x00, 0x2c, 0xc3, 0x9f, 0x30, 0xc5, 0x02,
+ 0xc2, 0x0e, 0x8a, 0x49, 0xc5, 0x01, 0xfc, 0x0e, 0x8a, 0x40, 0xc5, 0x02,
+ 0xc2, 0x0e, 0x8a, 0x79, 0xc5, 0x01, 0xfc, 0x0e, 0x8a, 0x70, 0xc5, 0x02,
+ 0xc2, 0x0e, 0x8a, 0x69, 0xc5, 0x01, 0xfc, 0x0e, 0x8a, 0x60, 0xc5, 0x02,
+ 0xc2, 0x0e, 0x8a, 0x59, 0xc5, 0x01, 0xfc, 0x0e, 0x8a, 0x50, 0xcd, 0x7f,
+ 0x18, 0x0e, 0x8d, 0x69, 0xc4, 0xe4, 0xd3, 0x0e, 0x8c, 0x41, 0x16, 0xc3,
+ 0x9f, 0x3c, 0xd0, 0x5f, 0x42, 0x0e, 0x8b, 0x30, 0xc6, 0xd1, 0xc3, 0x0e,
+ 0x8d, 0x51, 0xcb, 0x91, 0x62, 0x0e, 0x8c, 0x51, 0xc2, 0x00, 0x8d, 0x0e,
+ 0x8c, 0x28, 0x14, 0xc3, 0x9f, 0x48, 0xc5, 0xd9, 0xac, 0x0e, 0x8b, 0xe8,
+ 0xc2, 0x00, 0x7e, 0x0e, 0x8c, 0x39, 0x43, 0xe5, 0x96, 0x43, 0x9f, 0x54,
+ 0xc5, 0x09, 0x02, 0x0e, 0x8b, 0xdb, 0x03, 0x9f, 0x68, 0xcf, 0x65, 0x67,
+ 0x0e, 0x8b, 0x68, 0xc9, 0xab, 0x1c, 0x0e, 0x8c, 0x00, 0xc5, 0x5e, 0x2d,
+ 0x0e, 0x8e, 0x18, 0xcd, 0x42, 0x94, 0x00, 0xff, 0xe1, 0xc4, 0x7a, 0x04,
+ 0x00, 0xfb, 0x42, 0x03, 0x9f, 0x6e, 0x45, 0x02, 0x9a, 0x43, 0x9f, 0x74,
+ 0x45, 0x02, 0x9a, 0x43, 0x9f, 0x8a, 0x45, 0x02, 0x9a, 0x43, 0x9f, 0x96,
+ 0x45, 0x02, 0x9a, 0x43, 0x9f, 0xa2, 0x45, 0x02, 0x9a, 0x43, 0x9f, 0xb4,
+ 0xcb, 0x94, 0x90, 0x00, 0xf9, 0xf1, 0xc4, 0xe3, 0xab, 0x00, 0xf9, 0xe1,
+ 0xc5, 0x28, 0x47, 0x00, 0xf9, 0xd0, 0xcd, 0x42, 0x94, 0x00, 0xfe, 0x61,
+ 0xc4, 0x7a, 0x04, 0x00, 0xf9, 0x42, 0x03, 0x9f, 0xc6, 0x45, 0x02, 0x9a,
+ 0x43, 0x9f, 0xcc, 0x45, 0x02, 0x9a, 0x43, 0x9f, 0xe2, 0x45, 0x02, 0x9a,
+ 0x43, 0x9f, 0xee, 0xcd, 0x42, 0x94, 0x00, 0xfd, 0xe1, 0xc4, 0x7a, 0x04,
+ 0x00, 0xf8, 0x42, 0x03, 0x9f, 0xfa, 0xc4, 0x28, 0x48, 0x00, 0xfd, 0xd1,
+ 0xc5, 0xd6, 0x41, 0x00, 0xfd, 0xc0, 0x45, 0x02, 0x9a, 0x43, 0xa0, 0x00,
+ 0xca, 0x94, 0x91, 0x00, 0xff, 0xb3, 0x03, 0xa0, 0x16, 0xc4, 0x7a, 0x04,
+ 0x00, 0xfb, 0x02, 0x03, 0xa0, 0x1c, 0xd2, 0x4a, 0x63, 0x00, 0xff, 0xa0,
+ 0xd2, 0x4a, 0x63, 0x00, 0xff, 0x90, 0x45, 0x02, 0x9a, 0x43, 0xa0, 0x22,
+ 0x45, 0x02, 0x9a, 0x43, 0xa0, 0x43, 0x45, 0x02, 0x9a, 0x43, 0xa0, 0x4f,
+ 0x45, 0x02, 0x9a, 0x43, 0xa0, 0x5b, 0x45, 0x02, 0x9a, 0x43, 0xa0, 0x73,
+ 0x45, 0x02, 0x9a, 0x43, 0xa0, 0x85, 0x45, 0x02, 0x9a, 0x43, 0xa0, 0x97,
+ 0x45, 0x02, 0x9a, 0x43, 0xa0, 0xaf, 0x45, 0x02, 0x9a, 0x43, 0xa0, 0xc1,
+ 0xca, 0x94, 0x91, 0x00, 0xfe, 0x33, 0x03, 0xa0, 0xd3, 0xc4, 0x7a, 0x04,
+ 0x00, 0xf9, 0x02, 0x03, 0xa0, 0xd9, 0xd2, 0x4a, 0x63, 0x00, 0xfe, 0x20,
+ 0xd2, 0x4a, 0x63, 0x00, 0xfe, 0x10, 0x45, 0x02, 0x9a, 0x43, 0xa0, 0xdf,
+ 0x45, 0x02, 0x9a, 0x43, 0xa1, 0x00, 0x45, 0x02, 0x9a, 0x43, 0xa1, 0x0c,
+ 0xca, 0x94, 0x91, 0x00, 0xfd, 0xb3, 0x03, 0xa1, 0x18, 0xc4, 0x7a, 0x04,
+ 0x00, 0xf8, 0x02, 0x03, 0xa1, 0x1e, 0xd2, 0x4a, 0x63, 0x00, 0xfd, 0xa0,
+ 0xc4, 0x28, 0x48, 0x00, 0xfb, 0x83, 0x03, 0xa1, 0x24, 0xc5, 0xd6, 0x41,
+ 0x00, 0xfd, 0x80, 0x45, 0x02, 0x9a, 0x43, 0xa1, 0x2a, 0x00, 0x43, 0xa1,
+ 0x4b, 0xc7, 0x33, 0xe6, 0x08, 0x0a, 0x33, 0x03, 0xa1, 0x57, 0xc6, 0xb9,
+ 0xdc, 0x08, 0x0a, 0x40, 0xc7, 0x33, 0xe6, 0x08, 0x0a, 0x3b, 0x03, 0xa1,
+ 0x5d, 0xc6, 0xb9, 0xdc, 0x08, 0x0a, 0x50, 0xca, 0xa6, 0x70, 0x0e, 0x7d,
+ 0xe3, 0x03, 0xa1, 0x63, 0xc9, 0x92, 0x8d, 0x0e, 0x7d, 0xd2, 0x03, 0xa1,
+ 0x69, 0xd6, 0x2d, 0x0a, 0x0e, 0x7d, 0xb8, 0xc9, 0x40, 0xaa, 0x09, 0x10,
+ 0x38, 0xca, 0xa3, 0x1e, 0x09, 0x0f, 0x00, 0xc4, 0x58, 0xf5, 0x09, 0x0e,
+ 0xf1, 0xca, 0x9e, 0x46, 0x09, 0x0e, 0xe8, 0xcf, 0x6a, 0xbc, 0x09, 0x0e,
+ 0x98, 0xc2, 0x10, 0x37, 0x09, 0x0e, 0x71, 0xc2, 0x00, 0xd0, 0x09, 0x0e,
+ 0x68, 0xc2, 0x02, 0x6f, 0x09, 0x25, 0xe9, 0xc2, 0x01, 0xdd, 0x09, 0x25,
+ 0xe0, 0xd4, 0x3a, 0x98, 0x0e, 0xc8, 0x11, 0xcb, 0x92, 0xa1, 0x0e, 0xc7,
+ 0xf8, 0xcc, 0x18, 0xdb, 0x0e, 0xc8, 0x09, 0x16, 0xc3, 0xa1, 0x6f, 0xc9,
+ 0xad, 0x9b, 0x0e, 0xc4, 0x99, 0xca, 0xa1, 0x70, 0x0e, 0xc0, 0x40, 0xcb,
+ 0x13, 0x90, 0x0e, 0xc7, 0xe9, 0xcb, 0x13, 0x89, 0x0e, 0xc7, 0xe1, 0xcc,
+ 0x85, 0x95, 0x0e, 0xc7, 0xda, 0x03, 0xa1, 0x7b, 0xc4, 0x18, 0xf2, 0x0e,
+ 0xc7, 0xc9, 0xc9, 0x13, 0x9c, 0x0e, 0xc7, 0xc1, 0xc8, 0x1e, 0x56, 0x0e,
+ 0xc7, 0xb8, 0x05, 0xc3, 0xa1, 0x81, 0xc4, 0x01, 0x23, 0x0e, 0xc7, 0x33,
+ 0x03, 0xa1, 0x8e, 0x4e, 0x6b, 0xd4, 0xc3, 0xa1, 0x94, 0xc4, 0x0e, 0xe2,
+ 0x0e, 0xc6, 0xe3, 0x03, 0xa1, 0xa0, 0x47, 0xc6, 0xcc, 0x43, 0xa1, 0xa4,
+ 0xca, 0x13, 0x91, 0x0e, 0xc5, 0xd1, 0xcd, 0x3a, 0x9e, 0x0e, 0xc0, 0x48,
+ 0x00, 0x43, 0xa1, 0xb0, 0x00, 0x43, 0xa1, 0xe5, 0x47, 0x0e, 0xcd, 0x43,
+ 0xa1, 0xf4, 0xcc, 0x8a, 0x5d, 0x0e, 0xc0, 0xe8, 0xc8, 0x64, 0xba, 0x0e,
+ 0xc2, 0x11, 0x4a, 0x9b, 0x1c, 0x43, 0xa2, 0x00, 0x4d, 0x76, 0xc4, 0xc3,
+ 0xa2, 0x0c, 0xce, 0x70, 0xdc, 0x0e, 0xc1, 0xb0, 0xcf, 0x3a, 0x9d, 0x0e,
+ 0xc5, 0xb1, 0xc9, 0x13, 0x9c, 0x0e, 0xc5, 0xa8, 0xce, 0x70, 0xea, 0x0e,
+ 0xc4, 0x89, 0x47, 0xc6, 0x63, 0x43, 0xa2, 0x18, 0xc5, 0x18, 0xf1, 0x0e,
+ 0xc3, 0x20, 0x00, 0x43, 0xa2, 0x24, 0xc6, 0x58, 0xac, 0x0e, 0xc2, 0xbb,
+ 0x03, 0xa2, 0x30, 0xcd, 0x27, 0xac, 0x0e, 0xc2, 0x91, 0xc4, 0x18, 0xf2,
+ 0x0e, 0xc2, 0x81, 0xc9, 0xb4, 0x40, 0x0e, 0xc2, 0x70, 0xc9, 0x13, 0x9c,
+ 0x0e, 0xc2, 0x3b, 0x03, 0xa2, 0x34, 0xc6, 0x58, 0xac, 0x0e, 0xc2, 0x31,
+ 0xc4, 0x18, 0xf2, 0x0e, 0xc2, 0x28, 0xc2, 0x00, 0x74, 0x0e, 0xc7, 0x99,
+ 0xc3, 0x00, 0xa3, 0x0e, 0xc7, 0x90, 0x00, 0x43, 0xa2, 0x3a, 0xc6, 0x13,
+ 0x95, 0x0e, 0xc5, 0x31, 0xc4, 0x00, 0x9d, 0x0e, 0xc4, 0x42, 0x03, 0xa2,
+ 0x4a, 0xc6, 0x0e, 0xcd, 0x0e, 0xc4, 0xe8, 0xc4, 0x0e, 0xe2, 0x0e, 0xc3,
+ 0xf9, 0xc7, 0x27, 0xb2, 0x0e, 0xc3, 0xe0, 0xc2, 0x00, 0x74, 0x0e, 0xc6,
+ 0xc9, 0xc3, 0x00, 0xa3, 0x0e, 0xc6, 0xc0, 0xc5, 0x0e, 0xce, 0x0e, 0xc7,
+ 0x63, 0x03, 0xa2, 0x50, 0xcb, 0x13, 0x90, 0x0e, 0xc6, 0x00, 0x46, 0x0e,
+ 0xce, 0xc3, 0xa2, 0x56, 0xc8, 0xbc, 0x62, 0x0e, 0xc3, 0x80, 0x00, 0x43,
+ 0xa2, 0x62, 0xc2, 0x00, 0x15, 0x0e, 0xcc, 0x78, 0xca, 0x03, 0x87, 0x01,
+ 0x5d, 0x09, 0xc9, 0x01, 0x88, 0x01, 0x5d, 0x00, 0xcc, 0x10, 0xb4, 0x07,
+ 0xeb, 0x41, 0xca, 0x26, 0xf7, 0x07, 0xeb, 0x38, 0xca, 0x26, 0xf7, 0x07,
+ 0xe3, 0x41, 0xcd, 0x00, 0xfa, 0x07, 0xe0, 0x18, 0xca, 0x9f, 0x72, 0x00,
+ 0x3b, 0xb1, 0xc8, 0xbf, 0x42, 0x00, 0x3b, 0xa8, 0xd5, 0x0e, 0x77, 0x00,
+ 0x45, 0x20, 0xc5, 0x05, 0x02, 0x00, 0x35, 0x29, 0xd6, 0x2d, 0x8e, 0x00,
+ 0x3b, 0x08, 0x45, 0x00, 0x49, 0xc3, 0xa2, 0x7a, 0x14, 0xc3, 0xa2, 0x86,
+ 0xd2, 0x4d, 0x33, 0x00, 0x43, 0xab, 0x03, 0xa2, 0x92, 0xcf, 0x63, 0x69,
+ 0x00, 0x43, 0x8b, 0x03, 0xa2, 0x98, 0xc5, 0x4d, 0x40, 0x00, 0x43, 0xa1,
+ 0xc5, 0x63, 0x73, 0x00, 0x43, 0x80, 0x45, 0x02, 0x9a, 0x43, 0xa2, 0x9e,
+ 0xc5, 0x05, 0x02, 0x00, 0x33, 0x99, 0xc5, 0x00, 0xd4, 0x00, 0x33, 0x90,
+ 0xc5, 0x05, 0x02, 0x00, 0x31, 0x2b, 0x03, 0xa2, 0xaa, 0xc5, 0x00, 0xd4,
+ 0x00, 0x31, 0x1a, 0x03, 0xa2, 0xae, 0x00, 0x43, 0xa2, 0xb2, 0xc8, 0xbf,
+ 0x42, 0x00, 0x3b, 0x99, 0xca, 0x9f, 0x72, 0x00, 0x3b, 0xa0, 0xca, 0x26,
+ 0xf7, 0x07, 0xda, 0x89, 0xcd, 0x00, 0xfa, 0x07, 0xda, 0x80, 0xd0, 0x05,
+ 0x29, 0x00, 0x44, 0x69, 0xc5, 0x00, 0xd4, 0x00, 0x31, 0xd8, 0xc5, 0x05,
+ 0x02, 0x00, 0x31, 0xe1, 0xc5, 0x00, 0xd4, 0x00, 0x3b, 0x19, 0xd6, 0x2d,
+ 0x8e, 0x00, 0x3b, 0x20, 0xc5, 0x05, 0x02, 0x00, 0x45, 0xa1, 0xc5, 0x00,
+ 0xd4, 0x00, 0x35, 0x60, 0xcf, 0x67, 0xce, 0x00, 0x35, 0x71, 0xcd, 0x04,
+ 0xe7, 0x00, 0x3b, 0xf8, 0xc4, 0xe0, 0x63, 0x00, 0x36, 0x19, 0xcd, 0x00,
+ 0xfa, 0x07, 0xf4, 0x99, 0xca, 0x26, 0xf7, 0x07, 0xf4, 0xa0, 0xc5, 0x05,
+ 0x02, 0x00, 0x44, 0x61, 0xc5, 0x00, 0xd4, 0x00, 0x34, 0xf8, 0xd0, 0x59,
+ 0xa2, 0x00, 0x45, 0xd1, 0xc9, 0x16, 0x14, 0x00, 0x45, 0x49, 0xcb, 0x08,
+ 0x09, 0x00, 0x45, 0x40, 0x0b, 0xc3, 0xa2, 0xbe, 0xca, 0x26, 0xf7, 0x07,
+ 0xf4, 0x51, 0xcb, 0x64, 0x7b, 0x07, 0xf4, 0x60, 0xcb, 0x08, 0x09, 0x00,
+ 0x36, 0x9b, 0x03, 0xa2, 0xca, 0x5d, 0x10, 0x12, 0x43, 0xa2, 0xce, 0xca,
+ 0x59, 0xa8, 0x00, 0x45, 0xc9, 0x98, 0x00, 0x34, 0x93, 0x03, 0xa2, 0xda,
+ 0xde, 0x02, 0x89, 0x00, 0x3b, 0x88, 0xc6, 0x05, 0x01, 0x00, 0x45, 0x00,
+ 0xd6, 0x2d, 0x8e, 0x00, 0x3a, 0x93, 0x03, 0xa2, 0xe0, 0xd2, 0x4b, 0xcb,
+ 0x00, 0x3a, 0x80, 0xd5, 0x0e, 0x77, 0x00, 0x34, 0xe0, 0x4a, 0x0e, 0x7d,
+ 0xc3, 0xa2, 0xe6, 0x46, 0x02, 0xa0, 0x43, 0xa2, 0xf2, 0x98, 0x00, 0x37,
+ 0x71, 0xcd, 0x31, 0x5f, 0x00, 0x3a, 0xd0, 0xce, 0x05, 0x39, 0x00, 0x34,
+ 0x58, 0x4a, 0x0e, 0x7d, 0xc3, 0xa2, 0xf8, 0x48, 0x04, 0xe7, 0x43, 0xa3,
+ 0x04, 0xe0, 0x09, 0x87, 0x00, 0x3b, 0xe0, 0xc5, 0x05, 0x02, 0x00, 0x3b,
+ 0x71, 0x03, 0x43, 0xa3, 0x10, 0xcb, 0x10, 0xb5, 0x07, 0xdd, 0x61, 0xcc,
+ 0x00, 0xfb, 0x07, 0xdd, 0x50, 0xcb, 0x10, 0xb5, 0x07, 0xdd, 0x41, 0xcc,
+ 0x00, 0xfb, 0x07, 0xdd, 0x30, 0xca, 0x26, 0xf7, 0x07, 0xdd, 0x29, 0xcd,
+ 0x00, 0xfa, 0x07, 0xdd, 0x20, 0xd0, 0x13, 0xe9, 0x0f, 0xdd, 0x58, 0xcf,
+ 0x0a, 0x48, 0x0f, 0xdd, 0x50, 0xa5, 0x01, 0x47, 0xf8, 0xd3, 0x42, 0xed,
+ 0x0e, 0xf8, 0x40, 0xd1, 0x01, 0x68, 0x05, 0x5a, 0x11, 0xc6, 0x01, 0x73,
+ 0x05, 0x5a, 0x08, 0xcb, 0x98, 0x58, 0x00, 0x11, 0x88, 0xc9, 0x0e, 0x6e,
+ 0x00, 0xf6, 0x39, 0xc5, 0x1e, 0xc8, 0x00, 0xf6, 0x29, 0xca, 0x9e, 0x5a,
+ 0x00, 0xf6, 0x19, 0xc5, 0x1f, 0x0c, 0x00, 0xf6, 0x09, 0xc5, 0x31, 0xee,
+ 0x00, 0xf5, 0xf8, 0xcc, 0x51, 0x28, 0x0e, 0xf8, 0xb1, 0xcc, 0x1e, 0xc1,
+ 0x00, 0xeb, 0x98, 0xc5, 0x05, 0x02, 0x00, 0xf2, 0xdb, 0x03, 0xa3, 0x1c,
+ 0xc5, 0x00, 0xd4, 0x00, 0xf2, 0xc8, 0xc9, 0x0e, 0x6e, 0x00, 0xf7, 0x89,
+ 0xc5, 0x1e, 0xc8, 0x00, 0xf7, 0x79, 0xca, 0x9e, 0x5a, 0x00, 0xf7, 0x69,
+ 0xc5, 0x1f, 0x0c, 0x00, 0xf7, 0x59, 0xc5, 0x31, 0xee, 0x00, 0xf7, 0x48,
+ 0xc5, 0x31, 0xee, 0x00, 0x0b, 0x89, 0xc5, 0x1f, 0x0c, 0x00, 0x10, 0xa8,
+ 0xc5, 0x05, 0x02, 0x00, 0xf3, 0x99, 0x44, 0x02, 0x9b, 0x43, 0xa3, 0x22,
+ 0xc9, 0x0e, 0x6e, 0x00, 0xf5, 0x69, 0xc5, 0x1e, 0xc8, 0x00, 0xf5, 0x59,
+ 0xca, 0x9e, 0x5a, 0x00, 0xf5, 0x49, 0xc5, 0x1f, 0x0c, 0x00, 0xf5, 0x39,
+ 0xc5, 0x31, 0xee, 0x00, 0xf5, 0x28, 0xc5, 0x05, 0x02, 0x00, 0xf5, 0x09,
+ 0xc5, 0x00, 0xd4, 0x00, 0x11, 0x3a, 0x03, 0xa3, 0x3a, 0xc5, 0x05, 0x02,
+ 0x00, 0xf0, 0x09, 0xc5, 0x00, 0xd4, 0x00, 0x07, 0x2a, 0x03, 0xa3, 0x40,
+ 0xc6, 0x60, 0xb1, 0x00, 0x0e, 0xa9, 0xc5, 0x31, 0xee, 0x00, 0x0e, 0xb9,
+ 0xc5, 0x8e, 0x66, 0x00, 0x0e, 0xc9, 0xc5, 0x1f, 0x0c, 0x00, 0x0e, 0xd8,
+ 0xc6, 0xc1, 0x86, 0x05, 0x4b, 0x91, 0xc5, 0xc0, 0x7d, 0x00, 0x89, 0x18,
+ 0xc3, 0x05, 0x14, 0x01, 0x9f, 0xa1, 0x16, 0xc3, 0xa3, 0x46, 0x08, 0xc3,
+ 0xa3, 0x52, 0x15, 0xc3, 0xa3, 0x5e, 0xc5, 0x06, 0xdb, 0x01, 0x9f, 0xd9,
+ 0xc4, 0x26, 0x78, 0x01, 0x9f, 0xe0, 0xc2, 0x02, 0xa0, 0x01, 0x9b, 0x71,
+ 0xc4, 0x02, 0xde, 0x01, 0x9b, 0x78, 0xd3, 0x42, 0xed, 0x08, 0x3d, 0x38,
+ 0xc5, 0x02, 0xc2, 0x0e, 0x8a, 0x89, 0xc5, 0x01, 0xfc, 0x0e, 0x8a, 0x80,
+ 0x45, 0xab, 0x1f, 0xc3, 0xa3, 0x6a, 0xc2, 0x00, 0x4f, 0x0e, 0x8b, 0x28,
+ 0xcb, 0x90, 0xf4, 0x0e, 0x8c, 0x59, 0x46, 0x6d, 0xc6, 0x43, 0xa3, 0x74,
+ 0xa2, 0x0e, 0x8b, 0x91, 0xa1, 0x0e, 0x8b, 0x89, 0xa0, 0x0e, 0x8b, 0x81,
+ 0x9f, 0x0e, 0x8b, 0x79, 0x9e, 0x0e, 0x8b, 0x70, 0xc9, 0xab, 0x1c, 0x0e,
+ 0x8c, 0x08, 0x45, 0x02, 0x9a, 0x43, 0xa3, 0x80, 0x12, 0xc3, 0xa3, 0x96,
+ 0xc4, 0xe3, 0xab, 0x00, 0xfb, 0x6b, 0x03, 0xa3, 0xa5, 0xc5, 0x28, 0x47,
+ 0x00, 0xfb, 0x5a, 0x03, 0xa3, 0xab, 0xc4, 0xe3, 0xab, 0x00, 0xfa, 0x69,
+ 0xc5, 0x28, 0x47, 0x00, 0xfa, 0x58, 0xc4, 0xe3, 0xab, 0x00, 0xfa, 0x61,
+ 0xc5, 0x28, 0x47, 0x00, 0xfa, 0x50, 0xcb, 0x94, 0x90, 0x00, 0xfa, 0xf9,
+ 0xc4, 0xe3, 0xab, 0x00, 0xfa, 0xe9, 0xc5, 0x28, 0x47, 0x00, 0xfa, 0xd8,
+ 0xcb, 0x94, 0x90, 0x00, 0xf9, 0xf9, 0xc4, 0xe3, 0xab, 0x00, 0xf9, 0xe9,
+ 0xc5, 0x28, 0x47, 0x00, 0xf9, 0xd8, 0x45, 0x02, 0x9a, 0x43, 0xa3, 0xb1,
+ 0x12, 0xc3, 0xa3, 0xc7, 0xc4, 0xe3, 0xab, 0x00, 0xf9, 0x6b, 0x03, 0xa3,
+ 0xd6, 0xc5, 0x28, 0x47, 0x00, 0xf9, 0x5a, 0x03, 0xa3, 0xdc, 0xc4, 0xe3,
+ 0xab, 0x00, 0xf8, 0xe9, 0xc5, 0x28, 0x47, 0x00, 0xf8, 0xd8, 0xc4, 0xe3,
+ 0xab, 0x00, 0xf8, 0xe1, 0xc5, 0x28, 0x47, 0x00, 0xf8, 0xd0, 0x45, 0x02,
+ 0x9a, 0x43, 0xa3, 0xe2, 0x12, 0xc3, 0xa3, 0xf8, 0xc4, 0xe3, 0xab, 0x00,
+ 0xf8, 0x6b, 0x03, 0xa4, 0x07, 0xc5, 0x28, 0x47, 0x00, 0xf8, 0x5a, 0x03,
+ 0xa4, 0x0d, 0xd2, 0x4a, 0x63, 0x00, 0xff, 0xb8, 0x45, 0x02, 0x9a, 0x43,
+ 0xa4, 0x13, 0xcb, 0x94, 0x90, 0x00, 0xfb, 0x3b, 0x03, 0xa4, 0x34, 0xc4,
+ 0xe3, 0xab, 0x00, 0xfb, 0x2b, 0x03, 0xa4, 0x3a, 0xc5, 0x28, 0x47, 0x00,
+ 0xfb, 0x1b, 0x03, 0xa4, 0x40, 0xcd, 0x4a, 0x68, 0x00, 0xfd, 0x08, 0xc4,
+ 0xe3, 0xab, 0x00, 0xfa, 0x29, 0xc5, 0x28, 0x47, 0x00, 0xfa, 0x18, 0xc4,
+ 0xe3, 0xab, 0x00, 0xfa, 0x21, 0xc5, 0x28, 0x47, 0x00, 0xfa, 0x10, 0xcb,
+ 0x94, 0x90, 0x00, 0xff, 0x39, 0xc4, 0xe3, 0xab, 0x00, 0xff, 0x19, 0xc5,
+ 0x28, 0x47, 0x00, 0xff, 0x11, 0xc5, 0x63, 0xdc, 0x00, 0x1d, 0x80, 0xcb,
+ 0x94, 0x90, 0x00, 0xfa, 0xb9, 0xc4, 0xe3, 0xab, 0x00, 0xfa, 0xa9, 0xc5,
+ 0x28, 0x47, 0x00, 0xfa, 0x98, 0xcb, 0x94, 0x90, 0x00, 0xfa, 0xb1, 0xc4,
+ 0xe3, 0xab, 0x00, 0xfa, 0xa1, 0xc5, 0x28, 0x47, 0x00, 0xfa, 0x90, 0xcb,
+ 0x94, 0x90, 0x00, 0xfe, 0xb9, 0xc4, 0xe3, 0xab, 0x00, 0xfe, 0x99, 0xc5,
+ 0x28, 0x47, 0x00, 0xfe, 0x91, 0xc5, 0x63, 0xdc, 0x00, 0x1c, 0x80, 0xcb,
+ 0x94, 0x90, 0x00, 0xf9, 0xb9, 0xc4, 0xe3, 0xab, 0x00, 0xf9, 0xa9, 0xc5,
+ 0x28, 0x47, 0x00, 0xf9, 0x98, 0xcb, 0x94, 0x90, 0x00, 0xf9, 0xb1, 0xc4,
+ 0xe3, 0xab, 0x00, 0xf9, 0xa1, 0xc5, 0x28, 0x47, 0x00, 0xf9, 0x90, 0xd2,
+ 0x4a, 0x63, 0x00, 0xfe, 0x38, 0x45, 0x02, 0x9a, 0x43, 0xa4, 0x46, 0xcb,
+ 0x94, 0x90, 0x00, 0xf9, 0x3b, 0x03, 0xa4, 0x67, 0xc4, 0xe3, 0xab, 0x00,
+ 0xf9, 0x2b, 0x03, 0xa4, 0x6d, 0xc5, 0x28, 0x47, 0x00, 0xf9, 0x1b, 0x03,
+ 0xa4, 0x73, 0xcd, 0x4a, 0x68, 0x00, 0xfc, 0x88, 0xc4, 0xe3, 0xab, 0x00,
+ 0xf8, 0xa9, 0xc5, 0x28, 0x47, 0x00, 0xf8, 0x98, 0xc4, 0xe3, 0xab, 0x00,
+ 0xf8, 0xa1, 0xc5, 0x28, 0x47, 0x00, 0xf8, 0x90, 0xd2, 0x4a, 0x63, 0x00,
+ 0xfd, 0xb8, 0x45, 0x02, 0x9a, 0x43, 0xa4, 0x79, 0xd2, 0x4a, 0x63, 0x00,
+ 0xfd, 0x90, 0xcb, 0x94, 0x90, 0x00, 0xf8, 0x3b, 0x03, 0xa4, 0x9a, 0xc4,
+ 0xe3, 0xab, 0x00, 0xf8, 0x2b, 0x03, 0xa4, 0xa0, 0xc5, 0x28, 0x47, 0x00,
+ 0xf8, 0x1b, 0x03, 0xa4, 0xa6, 0xcd, 0x4a, 0x68, 0x00, 0xfc, 0x08, 0xc7,
+ 0xb9, 0xdb, 0x08, 0x0a, 0x61, 0xc7, 0x67, 0xc7, 0x08, 0x0a, 0x98, 0xc8,
+ 0xb9, 0xda, 0x08, 0x0a, 0x70, 0xc8, 0x67, 0xc6, 0x08, 0x0a, 0xb0, 0xca,
+ 0xa2, 0x6a, 0x0e, 0x7d, 0xe8, 0x46, 0x00, 0x8b, 0x43, 0xa4, 0xac, 0xcc,
+ 0x89, 0x61, 0x0e, 0xc8, 0x01, 0xca, 0x92, 0xa2, 0x0e, 0xc7, 0xf0, 0xc9,
+ 0x67, 0x79, 0x0e, 0xc1, 0x60, 0xc5, 0x02, 0xd2, 0x0e, 0xc7, 0x5b, 0x03,
+ 0xa4, 0xb8, 0x17, 0x43, 0xa4, 0xbe, 0x4a, 0x6d, 0x50, 0x43, 0xa4, 0xc8,
+ 0xc4, 0x18, 0xf2, 0x0e, 0xc7, 0x29, 0xc8, 0x45, 0x27, 0x0e, 0xc7, 0x20,
+ 0x00, 0x43, 0xa4, 0xd4, 0xcc, 0x85, 0x41, 0x0e, 0xc1, 0xd9, 0xcd, 0x7e,
+ 0x96, 0x0e, 0xc1, 0xd0, 0x05, 0xc3, 0xa4, 0xe6, 0xc6, 0x13, 0x95, 0x0e,
+ 0xc5, 0x21, 0x14, 0xc3, 0xa4, 0xf5, 0xc5, 0x0e, 0xce, 0x0e, 0xc0, 0xf3,
+ 0x03, 0xa5, 0x04, 0xd7, 0x27, 0xa2, 0x0e, 0xc1, 0x39, 0xc6, 0x58, 0xac,
+ 0x0e, 0xc0, 0x93, 0x03, 0xa5, 0x08, 0xc4, 0x18, 0xf2, 0x0e, 0xc0, 0x83,
+ 0x03, 0xa5, 0x0e, 0xd3, 0x45, 0x27, 0x0e, 0xc1, 0x00, 0xc9, 0x6d, 0x53,
+ 0x0e, 0xc0, 0xa3, 0x03, 0xa5, 0x14, 0xc3, 0x01, 0xc8, 0x0e, 0xc0, 0x60,
+ 0xc9, 0x13, 0x9c, 0x0e, 0xc1, 0x29, 0xc4, 0x0e, 0xe2, 0x0e, 0xc1, 0x20,
+ 0xc7, 0x1a, 0xc5, 0x0e, 0xc2, 0x09, 0xc2, 0x02, 0xae, 0x0e, 0xc2, 0x00,
+ 0xc6, 0x58, 0xac, 0x0e, 0xc1, 0xc9, 0xc2, 0x02, 0xae, 0x0e, 0xc1, 0xc0,
+ 0xc6, 0x3b, 0x9c, 0x0e, 0xc4, 0x81, 0xc8, 0x45, 0x27, 0x0e, 0xc4, 0x78,
+ 0xc4, 0x18, 0xf2, 0x0e, 0xc2, 0x89, 0xc9, 0xb4, 0x40, 0x0e, 0xc2, 0x78,
+ 0x00, 0x43, 0xa5, 0x1a, 0xc6, 0xcd, 0x73, 0x0e, 0xc2, 0x40, 0x15, 0xc3,
+ 0xa5, 0x26, 0xc5, 0x17, 0x14, 0x0e, 0xc7, 0x79, 0xc4, 0x05, 0x75, 0x0e,
+ 0xc7, 0x70, 0xca, 0x13, 0x9b, 0x0e, 0xc4, 0x68, 0xc5, 0x05, 0x74, 0x0e,
+ 0xc7, 0x68, 0xc7, 0x27, 0xb2, 0x0e, 0xc3, 0x91, 0xc4, 0x0e, 0xe2, 0x0e,
+ 0xc3, 0x70, 0x45, 0x0d, 0x20, 0xc3, 0xa5, 0x32, 0xc6, 0x13, 0x95, 0x0e,
+ 0xc5, 0x29, 0xc4, 0x00, 0x9d, 0x0e, 0xc4, 0x39, 0xc5, 0x0e, 0xce, 0x0e,
+ 0xc0, 0xf8, 0xc5, 0x08, 0x09, 0x00, 0x44, 0x11, 0xc9, 0x4d, 0xde, 0x00,
+ 0x43, 0xc0, 0x45, 0x00, 0x2d, 0xc3, 0xa5, 0x3e, 0x49, 0x9a, 0xeb, 0x43,
+ 0xa5, 0x4a, 0x45, 0x02, 0x9a, 0x43, 0xa5, 0x56, 0x45, 0x02, 0x9a, 0x43,
+ 0xa5, 0x62, 0xc9, 0xaf, 0x6f, 0x00, 0x43, 0xf9, 0xc9, 0x16, 0x14, 0x00,
+ 0x43, 0xe0, 0x00, 0x43, 0xa5, 0x6e, 0x00, 0x43, 0xa5, 0x7a, 0xcd, 0x00,
+ 0xfa, 0x07, 0xf4, 0x09, 0xca, 0x26, 0xf7, 0x07, 0xf4, 0x10, 0xcc, 0x00,
+ 0xfb, 0x07, 0xf4, 0x49, 0xcb, 0x10, 0xb5, 0x07, 0xf4, 0x58, 0x00, 0x43,
+ 0xa5, 0x86, 0xca, 0x9f, 0x72, 0x00, 0x3b, 0xd9, 0xc8, 0xbf, 0x42, 0x00,
+ 0x3b, 0xd0, 0xc6, 0x05, 0x01, 0x00, 0x34, 0xa8, 0xd3, 0x1e, 0x24, 0x00,
+ 0x3a, 0x98, 0xc5, 0x05, 0x02, 0x00, 0x45, 0x71, 0xcf, 0x1b, 0x59, 0x00,
+ 0x34, 0x78, 0xe0, 0x05, 0x27, 0x00, 0x3a, 0xc8, 0xc5, 0x00, 0xd4, 0x00,
+ 0x34, 0x29, 0xd6, 0x2d, 0x8e, 0x00, 0x3a, 0xc0, 0xce, 0x73, 0x6e, 0x00,
+ 0x34, 0x11, 0xc5, 0x00, 0xd4, 0x00, 0x3a, 0xb8, 0xcb, 0x02, 0x9c, 0x00,
+ 0x3b, 0x79, 0xc4, 0x00, 0xd5, 0x00, 0x3b, 0x90, 0xcb, 0x98, 0x58, 0x00,
+ 0xf2, 0xe8, 0xc6, 0x60, 0xb1, 0x00, 0x0e, 0xb1, 0xc5, 0x31, 0xee, 0x00,
+ 0x0e, 0xc1, 0xc5, 0x8e, 0x66, 0x00, 0x0e, 0xd1, 0xc5, 0x1f, 0x0c, 0x00,
+ 0x0e, 0xe0, 0xcb, 0x98, 0x58, 0x00, 0x0f, 0x08, 0xca, 0x9b, 0xda, 0x00,
+ 0x0f, 0xd8, 0xc2, 0x02, 0xa0, 0x01, 0x9f, 0xa9, 0xc4, 0x02, 0xde, 0x01,
+ 0x9f, 0xb0, 0xc3, 0x09, 0x9e, 0x01, 0x9f, 0xb9, 0xc3, 0x0d, 0x14, 0x01,
+ 0x9f, 0xc0, 0xc2, 0x22, 0xcc, 0x01, 0x9f, 0xc9, 0xc4, 0x18, 0x10, 0x01,
+ 0x9f, 0xd0, 0xc6, 0xd2, 0xb9, 0x0e, 0x8b, 0xf1, 0x91, 0x0e, 0x8b, 0xe0,
+ 0xa0, 0x0e, 0x8b, 0x49, 0x9f, 0x0e, 0x8b, 0x41, 0x9e, 0x0e, 0x8b, 0x38,
+ 0x12, 0xc3, 0xa5, 0x92, 0xc4, 0xe3, 0xab, 0x00, 0xfb, 0x63, 0x03, 0xa5,
+ 0xa1, 0xc5, 0x28, 0x47, 0x00, 0xfb, 0x52, 0x03, 0xa5, 0xa7, 0xca, 0x94,
+ 0x91, 0x00, 0xfb, 0x7b, 0x03, 0xa5, 0xad, 0xcd, 0x42, 0x94, 0x00, 0xfd,
+ 0x48, 0xd3, 0x42, 0x8e, 0x00, 0xfd, 0x68, 0xd3, 0x42, 0x8e, 0x00, 0xfd,
+ 0x58, 0x12, 0xc3, 0xa5, 0xb3, 0xc4, 0xe3, 0xab, 0x00, 0xf9, 0x63, 0x03,
+ 0xa5, 0xc2, 0xc5, 0x28, 0x47, 0x00, 0xf9, 0x52, 0x03, 0xa5, 0xc8, 0xca,
+ 0x94, 0x91, 0x00, 0xf9, 0x7b, 0x03, 0xa5, 0xce, 0xcd, 0x42, 0x94, 0x00,
+ 0xfc, 0xc8, 0xd3, 0x42, 0x8e, 0x00, 0xfc, 0xe8, 0xd3, 0x42, 0x8e, 0x00,
+ 0xfc, 0xd8, 0x12, 0xc3, 0xa5, 0xd4, 0xc4, 0xe3, 0xab, 0x00, 0xf8, 0x63,
+ 0x03, 0xa5, 0xe3, 0xc5, 0x28, 0x47, 0x00, 0xf8, 0x52, 0x03, 0xa5, 0xe9,
+ 0xca, 0x94, 0x91, 0x00, 0xf8, 0x7b, 0x03, 0xa5, 0xef, 0xcd, 0x42, 0x94,
+ 0x00, 0xfc, 0x48, 0xd3, 0x42, 0x8e, 0x00, 0xfc, 0x68, 0xd3, 0x42, 0x8e,
+ 0x00, 0xfc, 0x58, 0xcb, 0x94, 0x90, 0x00, 0xfb, 0x33, 0x03, 0xa5, 0xf5,
+ 0xc4, 0xe3, 0xab, 0x00, 0xfb, 0x23, 0x03, 0xa5, 0xfb, 0xc5, 0x28, 0x47,
+ 0x00, 0xfb, 0x13, 0x03, 0xa6, 0x01, 0xcd, 0x4a, 0x68, 0x00, 0xfd, 0x00,
+ 0xd2, 0x4a, 0x63, 0x00, 0xfd, 0x38, 0xd2, 0x4a, 0x63, 0x00, 0xfd, 0x28,
+ 0xd2, 0x4a, 0x63, 0x00, 0xfd, 0x18, 0xcb, 0x94, 0x90, 0x00, 0xf9, 0x33,
+ 0x03, 0xa6, 0x07, 0xc4, 0xe3, 0xab, 0x00, 0xf9, 0x23, 0x03, 0xa6, 0x0d,
+ 0xc5, 0x28, 0x47, 0x00, 0xf9, 0x13, 0x03, 0xa6, 0x13, 0xcd, 0x4a, 0x68,
+ 0x00, 0xfc, 0x80, 0xd2, 0x4a, 0x63, 0x00, 0xfc, 0xb8, 0xd2, 0x4a, 0x63,
+ 0x00, 0xfc, 0xa8, 0xd2, 0x4a, 0x63, 0x00, 0xfc, 0x98, 0xcb, 0x94, 0x90,
+ 0x00, 0xf8, 0x33, 0x03, 0xa6, 0x19, 0xc4, 0xe3, 0xab, 0x00, 0xf8, 0x23,
+ 0x03, 0xa6, 0x1f, 0xc5, 0x28, 0x47, 0x00, 0xf8, 0x13, 0x03, 0xa6, 0x25,
+ 0xcd, 0x4a, 0x68, 0x00, 0xfc, 0x00, 0xd2, 0x4a, 0x63, 0x00, 0xfc, 0x38,
+ 0xd2, 0x4a, 0x63, 0x00, 0xfc, 0x28, 0xd2, 0x4a, 0x63, 0x00, 0xfc, 0x18,
+ 0xd0, 0x58, 0xe2, 0x0e, 0x7d, 0xd9, 0xd0, 0x2d, 0x10, 0x0e, 0x7d, 0xc0,
+ 0xcb, 0x6d, 0x51, 0x0e, 0xc1, 0xe0, 0x14, 0xc3, 0xa6, 0x2b, 0xce, 0x6d,
+ 0x4e, 0x0e, 0xc1, 0xb8, 0xc6, 0x58, 0xac, 0x0e, 0xc2, 0x19, 0xc2, 0x02,
+ 0xae, 0x0e, 0xc1, 0x88, 0x46, 0x06, 0x82, 0xc3, 0xa6, 0x37, 0xc9, 0xb3,
+ 0xc2, 0x0e, 0xc7, 0x11, 0x46, 0x0e, 0xce, 0x43, 0xa6, 0x43, 0x44, 0x0d,
+ 0x21, 0xc3, 0xa6, 0x55, 0xc8, 0x13, 0x9d, 0x0e, 0xc0, 0xaa, 0x03, 0xa6,
+ 0x64, 0xc3, 0x00, 0x74, 0x0e, 0xc4, 0x33, 0x03, 0xa6, 0x68, 0xce, 0x3a,
+ 0x9e, 0x0e, 0xc0, 0x88, 0x00, 0x43, 0xa6, 0x6c, 0xd2, 0x4d, 0x7b, 0x0e,
+ 0xc1, 0x18, 0xcf, 0x69, 0xea, 0x0e, 0xc1, 0x08, 0xcb, 0x4d, 0x82, 0x0e,
+ 0xc1, 0x30, 0xc8, 0xbc, 0x62, 0x0e, 0xc2, 0xc9, 0xca, 0x4d, 0x83, 0x0e,
+ 0xc2, 0xc0, 0xc4, 0x03, 0x14, 0x0e, 0xc7, 0x89, 0xc3, 0x06, 0xa7, 0x0e,
+ 0xc6, 0xe8, 0xc7, 0x13, 0x94, 0x0e, 0xc5, 0x51, 0xc2, 0x00, 0xa8, 0x0e,
+ 0xc0, 0xd8, 0xc5, 0x08, 0x09, 0x00, 0x44, 0x09, 0xc9, 0x4d, 0xde, 0x00,
+ 0x43, 0xb8, 0xc5, 0x05, 0x02, 0x00, 0x43, 0xc9, 0xc5, 0x00, 0xd4, 0x00,
+ 0x43, 0xb0, 0xc9, 0xaf, 0x6f, 0x00, 0x44, 0x01, 0xc9, 0x16, 0x14, 0x00,
+ 0x43, 0xe8, 0xc9, 0xaf, 0x6f, 0x00, 0x43, 0xf1, 0xc9, 0x16, 0x14, 0x00,
+ 0x43, 0xd8, 0xca, 0x26, 0xf7, 0x07, 0xf4, 0x41, 0xcd, 0x00, 0xfa, 0x07,
+ 0xf4, 0x38, 0xcd, 0x00, 0xfa, 0x07, 0xf4, 0x19, 0xca, 0x26, 0xf7, 0x07,
+ 0xf4, 0x20, 0xca, 0x26, 0xf7, 0x07, 0xdd, 0x89, 0xcd, 0x00, 0xfa, 0x07,
+ 0xdd, 0x80, 0xca, 0x94, 0x91, 0x00, 0xfb, 0x73, 0x03, 0xa6, 0x83, 0xcd,
+ 0x42, 0x94, 0x00, 0xfd, 0x40, 0xd3, 0x42, 0x8e, 0x00, 0xfd, 0x60, 0xd3,
+ 0x42, 0x8e, 0x00, 0xfd, 0x50, 0xd3, 0x42, 0x8e, 0x00, 0xfd, 0x78, 0xca,
+ 0x94, 0x91, 0x00, 0xf9, 0x73, 0x03, 0xa6, 0x89, 0xcd, 0x42, 0x94, 0x00,
+ 0xfc, 0xc0, 0xd3, 0x42, 0x8e, 0x00, 0xfc, 0xe0, 0xd3, 0x42, 0x8e, 0x00,
+ 0xfc, 0xd0, 0xd3, 0x42, 0x8e, 0x00, 0xfc, 0xf8, 0xca, 0x94, 0x91, 0x00,
+ 0xf8, 0x73, 0x03, 0xa6, 0x8f, 0xcd, 0x42, 0x94, 0x00, 0xfc, 0x40, 0xd3,
+ 0x42, 0x8e, 0x00, 0xfc, 0x60, 0xd3, 0x42, 0x8e, 0x00, 0xfc, 0x50, 0xd3,
+ 0x42, 0x8e, 0x00, 0xfc, 0x78, 0xd2, 0x4a, 0x63, 0x00, 0xfd, 0x30, 0xd2,
+ 0x4a, 0x63, 0x00, 0xfd, 0x20, 0xd2, 0x4a, 0x63, 0x00, 0xfd, 0x10, 0xd2,
+ 0x4a, 0x63, 0x00, 0xfc, 0xb0, 0xd2, 0x4a, 0x63, 0x00, 0xfc, 0xa0, 0xd2,
+ 0x4a, 0x63, 0x00, 0xfc, 0x90, 0xd2, 0x4a, 0x63, 0x00, 0xfc, 0x30, 0xd2,
+ 0x4a, 0x63, 0x00, 0xfc, 0x20, 0xd2, 0x4a, 0x63, 0x00, 0xfc, 0x10, 0x49,
+ 0x0e, 0xd7, 0xc3, 0xa6, 0x95, 0xc5, 0xbc, 0x5d, 0x0e, 0xc7, 0x38, 0xc5,
+ 0x58, 0xac, 0x0e, 0xc7, 0x19, 0xc4, 0x18, 0xf2, 0x0e, 0xc7, 0x08, 0xc4,
+ 0x18, 0xf2, 0x0e, 0xc7, 0x01, 0xc9, 0x13, 0x9c, 0x0e, 0xc6, 0xf9, 0xc8,
+ 0x1e, 0x56, 0x0e, 0xc6, 0xf0, 0xc7, 0x13, 0x94, 0x0e, 0xc5, 0x49, 0xc2,
+ 0x00, 0xa8, 0x0e, 0xc0, 0xd2, 0x03, 0xa6, 0xa1, 0x00, 0x43, 0xa6, 0xa7,
+ 0x00, 0x43, 0xa6, 0xcb, 0xc6, 0x77, 0x82, 0x0e, 0xc1, 0xfb, 0x03, 0xa6,
+ 0xd7, 0x05, 0xc3, 0xa6, 0xdd, 0x0a, 0xc3, 0xa6, 0xef, 0xc4, 0x18, 0xf2,
+ 0x0e, 0xc1, 0x10, 0xd3, 0x42, 0x8e, 0x00, 0xfd, 0x70, 0xd3, 0x42, 0x8e,
+ 0x00, 0xfc, 0xf0, 0xd3, 0x42, 0x8e, 0x00, 0xfc, 0x70, 0xc5, 0x16, 0xca,
+ 0x0e, 0xc7, 0x51, 0xc6, 0x0e, 0xe0, 0x0e, 0xc7, 0x40, 0xcb, 0x4d, 0x82,
+ 0x0e, 0xc1, 0x98, 0xc6, 0xcc, 0x41, 0x0e, 0xc0, 0xc3, 0x03, 0xa6, 0xfb,
+ 0x46, 0x0e, 0xce, 0xc3, 0xa7, 0x01, 0xc6, 0x58, 0xac, 0x0e, 0xc0, 0xcb,
+ 0x03, 0xa7, 0x10, 0xcb, 0x99, 0xad, 0x0e, 0xc0, 0xb9, 0xca, 0xa1, 0x70,
+ 0x0e, 0xc0, 0xb0, 0xc9, 0x13, 0x9c, 0x0e, 0xc4, 0x61, 0xc4, 0x18, 0xf2,
+ 0x0e, 0xc4, 0x58, 0xc4, 0x0c, 0x4d, 0x0e, 0xc1, 0xf0, 0xcf, 0x62, 0xd3,
+ 0x0e, 0xc1, 0xe9, 0xc6, 0x20, 0x7d, 0x0e, 0xc1, 0x49, 0xc5, 0x70, 0xdc,
+ 0x0e, 0xc1, 0x40, 0xc5, 0x58, 0xad, 0x0e, 0xc1, 0x59, 0xc5, 0x64, 0xb4,
+ 0x0e, 0xc1, 0x50, 0xce, 0x27, 0xab, 0x0e, 0xc1, 0xa8, 0xc7, 0x27, 0xb2,
+ 0x0e, 0xc1, 0xa1, 0xc4, 0x0e, 0xe2, 0x0e, 0xc1, 0x6a, 0x03, 0xa7, 0x16,
+ 0xcb, 0x4d, 0x82, 0x0e, 0xc1, 0x90, 0x00, 0x43, 0xa7, 0x1a, 0xc4, 0x18,
+ 0xf2, 0x0e, 0xc1, 0x79, 0xc9, 0x13, 0x9c, 0x0e, 0xc1, 0x70, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0};
+const uint8_t *UnicodeNameToCodepointIndex = UnicodeNameToCodepointIndex_;
+const std::size_t UnicodeNameToCodepointIndexSize = 239405;
+const std::size_t UnicodeNameToCodepointLargestNameSize = 74;
+
+} // namespace unicode
+} // namespace sys
+} // namespace llvm
diff --git a/llvm/lib/Support/Unix/COM.inc b/llvm/lib/Support/Unix/COM.inc
index 03a690ac3766..d97b59ac02cf 100644
--- a/llvm/lib/Support/Unix/COM.inc
+++ b/llvm/lib/Support/Unix/COM.inc
@@ -21,6 +21,6 @@ namespace sys {
InitializeCOMRAII::InitializeCOMRAII(COMThreadingMode Threading,
bool SpeedOverMemory) {}
-InitializeCOMRAII::~InitializeCOMRAII() {}
+InitializeCOMRAII::~InitializeCOMRAII() = default;
}
}
diff --git a/llvm/lib/Support/Unix/Memory.inc b/llvm/lib/Support/Unix/Memory.inc
index b83477e0e4cc..5e008069dd98 100644
--- a/llvm/lib/Support/Unix/Memory.inc
+++ b/llvm/lib/Support/Unix/Memory.inc
@@ -16,6 +16,7 @@
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Process.h"
+#include "llvm/Support/Valgrind.h"
#ifdef HAVE_SYS_MMAN_H
#include <sys/mman.h>
diff --git a/llvm/lib/Support/Unix/Path.inc b/llvm/lib/Support/Unix/Path.inc
index 788460d657fe..2ae7c6dc47e0 100644
--- a/llvm/lib/Support/Unix/Path.inc
+++ b/llvm/lib/Support/Unix/Path.inc
@@ -194,7 +194,7 @@ getprogpath(char ret[PATH_MAX], const char *bin)
/// GetMainExecutable - Return the path to the main executable, given the
/// value of argv[0] from program startup.
-std::string getMainExecutable(const char *argv0, void *MainAddr) {
+std::string getMainExecutableImpl(const char *argv0, void *MainAddr) {
#if defined(__APPLE__)
// On OS X the executable path is saved to the stack by dyld. Reading it
// from there is much faster than calling dladdr, especially for large
@@ -874,12 +874,14 @@ void mapped_file_region::unmapImpl() {
void mapped_file_region::dontNeedImpl() {
assert(Mode == mapped_file_region::readonly);
+ if (!Mapping)
+ return;
#if defined(__MVS__) || defined(_AIX)
// If we don't have madvise, or it isn't beneficial, treat this as a no-op.
- return;
+#elif defined(POSIX_MADV_DONTNEED)
+ ::posix_madvise(Mapping, Size, POSIX_MADV_DONTNEED);
#else
- if (Mapping)
- ::madvise(Mapping, Size, MADV_DONTNEED);
+ ::madvise(Mapping, Size, MADV_DONTNEED);
#endif
}
@@ -948,7 +950,15 @@ ErrorOr<basic_file_status> directory_entry::status() const {
return s;
}
-#if !defined(F_GETPATH)
+//
+// FreeBSD optionally provides /proc/self/fd, but it is incompatible with
+// Linux. The thing to use is realpath.
+//
+#if !defined(__FreeBSD__)
+#define TRY_PROC_SELF_FD
+#endif
+
+#if !defined(F_GETPATH) && defined(TRY_PROC_SELF_FD)
static bool hasProcSelfFD() {
// If we have a /proc filesystem mounted, we can quickly establish the
// real name of the file with readlink
@@ -1135,6 +1145,7 @@ std::error_code openFileForRead(const Twine &Name, int &ResultFD,
RealPath->append(Buffer, Buffer + strlen(Buffer));
#else
char Buffer[PATH_MAX];
+#if defined(TRY_PROC_SELF_FD)
if (hasProcSelfFD()) {
char ProcPath[64];
snprintf(ProcPath, sizeof(ProcPath), "/proc/self/fd/%d", ResultFD);
@@ -1142,14 +1153,17 @@ std::error_code openFileForRead(const Twine &Name, int &ResultFD,
if (CharCount > 0)
RealPath->append(Buffer, Buffer + CharCount);
} else {
+#endif
SmallString<128> Storage;
StringRef P = Name.toNullTerminatedStringRef(Storage);
// Use ::realpath to get the real path name
if (::realpath(P.begin(), Buffer) != nullptr)
RealPath->append(Buffer, Buffer + strlen(Buffer));
+#if defined(TRY_PROC_SELF_FD)
}
#endif
+#endif
return std::error_code();
}
diff --git a/llvm/lib/Support/Unix/Process.inc b/llvm/lib/Support/Unix/Process.inc
index d3d9fb7d7187..3c2d118977c5 100644
--- a/llvm/lib/Support/Unix/Process.inc
+++ b/llvm/lib/Support/Unix/Process.inc
@@ -331,6 +331,23 @@ extern "C" int tigetnum(char *capname);
static ManagedStatic<std::mutex> TermColorMutex;
#endif
+bool checkTerminalEnvironmentForColors() {
+ if (const char *TermStr = std::getenv("TERM")) {
+ return StringSwitch<bool>(TermStr)
+ .Case("ansi", true)
+ .Case("cygwin", true)
+ .Case("linux", true)
+ .StartsWith("screen", true)
+ .StartsWith("xterm", true)
+ .StartsWith("vt100", true)
+ .StartsWith("rxvt", true)
+ .EndsWith("color", true)
+ .Default(false);
+ }
+
+ return false;
+}
+
static bool terminalHasColors(int fd) {
#ifdef LLVM_ENABLE_TERMINFO
// First, acquire a global lock because these C routines are thread hostile.
@@ -356,7 +373,8 @@ static bool terminalHasColors(int fd) {
//
// The 'tigetnum' routine returns -2 or -1 on errors, and might return 0 if
// the terminfo says that no colors are supported.
- bool HasColors = tigetnum(const_cast<char *>("colors")) > 0;
+ int colors_ti = tigetnum(const_cast<char *>("colors"));
+ bool HasColors = colors_ti >= 0 ? colors_ti : checkTerminalEnvironmentForColors();
// Now extract the structure allocated by setupterm and free its memory
// through a really silly dance.
@@ -364,27 +382,12 @@ static bool terminalHasColors(int fd) {
(void)del_curterm(termp); // Drop any errors here.
// Return true if we found a color capabilities for the current terminal.
- if (HasColors)
- return true;
+ return HasColors;
#else
// When the terminfo database is not available, check if the current terminal
// is one of terminals that are known to support ANSI color escape codes.
- if (const char *TermStr = std::getenv("TERM")) {
- return StringSwitch<bool>(TermStr)
- .Case("ansi", true)
- .Case("cygwin", true)
- .Case("linux", true)
- .StartsWith("screen", true)
- .StartsWith("xterm", true)
- .StartsWith("vt100", true)
- .StartsWith("rxvt", true)
- .EndsWith("color", true)
- .Default(false);
- }
+ return checkTerminalEnvironmentForColors();
#endif
-
- // Otherwise, be conservative.
- return false;
}
bool Process::FileDescriptorHasColors(int fd) {
diff --git a/llvm/lib/Support/Unix/Signals.inc b/llvm/lib/Support/Unix/Signals.inc
index 575e2aab1eab..bf145bffe8bf 100644
--- a/llvm/lib/Support/Unix/Signals.inc
+++ b/llvm/lib/Support/Unix/Signals.inc
@@ -79,8 +79,8 @@
using namespace llvm;
-static RETSIGTYPE SignalHandler(int Sig); // defined below.
-static RETSIGTYPE InfoSignalHandler(int Sig); // defined below.
+static void SignalHandler(int Sig); // defined below.
+static void InfoSignalHandler(int Sig); // defined below.
using SignalHandlerFunctionType = void (*)();
/// The function to call if ctrl-c is pressed.
@@ -362,7 +362,7 @@ void sys::CleanupOnSignal(uintptr_t Context) {
}
// The signal handler that runs.
-static RETSIGTYPE SignalHandler(int Sig) {
+static void SignalHandler(int Sig) {
// Restore the signal behavior to default, so that the program actually
// crashes when we return and the signal reissues. This also ensures that if
// we crash in our signal handler that the program will terminate immediately
@@ -406,7 +406,7 @@ static RETSIGTYPE SignalHandler(int Sig) {
#endif
}
-static RETSIGTYPE InfoSignalHandler(int Sig) {
+static void InfoSignalHandler(int Sig) {
SaveAndRestore<int> SaveErrnoDuringASignalHandler(errno);
if (SignalHandlerFunctionType CurrentInfoFunction = InfoSignalFunction)
CurrentInfoFunction();
@@ -432,6 +432,10 @@ void llvm::sys::SetOneShotPipeSignalFunction(void (*Handler)()) {
}
void llvm::sys::DefaultOneShotPipeSignalHandler() {
+ // UNIX03 conformance requires a non-zero exit code and an error message
+ // to stderr when writing to a closed stdout fails.
+ errs() << "error: write on a pipe with no reader\n";
+
// Send a special return code that drivers can check for, from sysexits.h.
exit(EX_IOERR);
}
diff --git a/llvm/lib/Support/Unix/ThreadLocal.inc b/llvm/lib/Support/Unix/ThreadLocal.inc
index a402ae980424..0a958a2b952f 100644
--- a/llvm/lib/Support/Unix/ThreadLocal.inc
+++ b/llvm/lib/Support/Unix/ThreadLocal.inc
@@ -17,8 +17,6 @@
#include "llvm/Config/config.h"
-#if defined(HAVE_PTHREAD_H) && defined(HAVE_PTHREAD_GETSPECIFIC)
-
#include <cassert>
#include <pthread.h>
#include <stdlib.h>
@@ -58,13 +56,3 @@ void ThreadLocalImpl::removeInstance() {
}
}
-#else
-namespace llvm {
-using namespace sys;
-ThreadLocalImpl::ThreadLocalImpl() : data() { }
-ThreadLocalImpl::~ThreadLocalImpl() { }
-void ThreadLocalImpl::setInstance(const void* d) { data = const_cast<void*>(d);}
-void *ThreadLocalImpl::getInstance() { return data; }
-void ThreadLocalImpl::removeInstance() { setInstance(0); }
-}
-#endif
diff --git a/llvm/lib/Support/Unix/Threading.inc b/llvm/lib/Support/Unix/Threading.inc
index 5de1cf071ba9..99f64b4f553d 100644
--- a/llvm/lib/Support/Unix/Threading.inc
+++ b/llvm/lib/Support/Unix/Threading.inc
@@ -18,6 +18,7 @@
#if defined(__APPLE__)
#include <mach/mach_init.h>
#include <mach/mach_port.h>
+#include <pthread/qos.h>
#endif
#include <pthread.h>
@@ -258,27 +259,29 @@ SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) {
// SCHED_OTHER the standard round-robin time-sharing policy;
return !pthread_setschedparam(
pthread_self(),
- Priority == ThreadPriority::Background ? SCHED_IDLE : SCHED_OTHER,
+ // FIXME: consider SCHED_BATCH for Low
+ Priority == ThreadPriority::Default ? SCHED_OTHER : SCHED_IDLE,
&priority)
? SetThreadPriorityResult::SUCCESS
: SetThreadPriorityResult::FAILURE;
#elif defined(__APPLE__)
- // https://developer.apple.com/library/archive/documentation/System/Conceptual/ManPages_iPhoneOS/man2/getpriority.2.html
- // When setting a thread into background state the scheduling priority is set
- // to lowest value, disk and network IO are throttled. Network IO will be
- // throttled for any sockets the thread opens after going into background
- // state. Any previously opened sockets are not affected.
-
- // https://developer.apple.com/library/archive/documentation/System/Conceptual/ManPages_iPhoneOS/man3/getiopolicy_np.3.html
- // I/Os with THROTTLE policy are called THROTTLE I/Os. If a THROTTLE I/O
- // request occurs within a small time window (usually a fraction of a second)
- // of another NORMAL I/O request, the thread that issues the THROTTLE I/O is
- // forced to sleep for a certain interval. This slows down the thread that
- // issues the THROTTLE I/O so that NORMAL I/Os can utilize most of the disk
- // I/O bandwidth.
- return !setpriority(PRIO_DARWIN_THREAD, 0,
- Priority == ThreadPriority::Background ? PRIO_DARWIN_BG
- : 0)
+ // https://developer.apple.com/documentation/apple-silicon/tuning-your-code-s-performance-for-apple-silicon
+ //
+ // Background - Applies to work that isn’t visible to the user and may take significant
+ // time to complete. Examples include indexing, backing up, or synchronizing data. This
+ // class emphasizes energy efficiency.
+ //
+ // Utility - Applies to work that takes anywhere from a few seconds to a few minutes to
+ // complete. Examples include downloading a document or importing data. This class
+ // offers a balance between responsiveness, performance, and energy efficiency.
+ const auto qosClass = [&](){
+ switch (Priority) {
+ case ThreadPriority::Background: return QOS_CLASS_BACKGROUND;
+ case ThreadPriority::Low: return QOS_CLASS_UTILITY;
+ case ThreadPriority::Default: return QOS_CLASS_DEFAULT;
+ }
+ }();
+ return !pthread_set_qos_class_self_np(qosClass, 0)
? SetThreadPriorityResult::SUCCESS
: SetThreadPriorityResult::FAILURE;
#endif
diff --git a/llvm/lib/Support/VirtualFileSystem.cpp b/llvm/lib/Support/VirtualFileSystem.cpp
index f15e301874c4..21f0c39bfd6e 100644
--- a/llvm/lib/Support/VirtualFileSystem.cpp
+++ b/llvm/lib/Support/VirtualFileSystem.cpp
@@ -151,6 +151,10 @@ bool FileSystem::exists(const Twine &Path) {
return Status && Status->exists();
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void FileSystem::dump() const { print(dbgs(), PrintType::RecursiveContents); }
+#endif
+
#ifndef NDEBUG
static bool isTraversalComponent(StringRef Component) {
return Component.equals("..") || Component.equals(".");
@@ -273,6 +277,10 @@ public:
std::error_code getRealPath(const Twine &Path,
SmallVectorImpl<char> &Output) const override;
+protected:
+ void printImpl(raw_ostream &OS, PrintType Type,
+ unsigned IndentLevel) const override;
+
private:
// If this FS has its own working dir, use it to make Path absolute.
// The returned twine is safe to use as long as both Storage and Path live.
@@ -354,6 +362,17 @@ RealFileSystem::getRealPath(const Twine &Path,
return llvm::sys::fs::real_path(adjustPath(Path, Storage), Output);
}
+void RealFileSystem::printImpl(raw_ostream &OS, PrintType Type,
+ unsigned IndentLevel) const {
+ printIndent(OS, IndentLevel);
+ OS << "RealFileSystem using ";
+ if (WD)
+ OS << "own";
+ else
+ OS << "process";
+ OS << " CWD\n";
+}
+
IntrusiveRefCntPtr<FileSystem> vfs::getRealFileSystem() {
static IntrusiveRefCntPtr<FileSystem> FS(new RealFileSystem(true));
return FS;
@@ -459,6 +478,19 @@ OverlayFileSystem::getRealPath(const Twine &Path,
return errc::no_such_file_or_directory;
}
+void OverlayFileSystem::printImpl(raw_ostream &OS, PrintType Type,
+ unsigned IndentLevel) const {
+ printIndent(OS, IndentLevel);
+ OS << "OverlayFileSystem\n";
+ if (Type == PrintType::Summary)
+ return;
+
+ if (Type == PrintType::Contents)
+ Type = PrintType::Summary;
+ for (auto FS : overlays_range())
+ FS->print(OS, Type, IndentLevel + 1);
+}
+
llvm::vfs::detail::DirIterImpl::~DirIterImpl() = default;
namespace {
@@ -467,28 +499,25 @@ namespace {
class CombiningDirIterImpl : public llvm::vfs::detail::DirIterImpl {
using FileSystemPtr = llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem>;
- /// File systems to check for entries in. Processed in reverse order.
- SmallVector<FileSystemPtr, 8> FSList;
- /// The directory iterator for the current filesystem.
+ /// Iterators to combine, processed in reverse order.
+ SmallVector<directory_iterator, 8> IterList;
+ /// The iterator currently being traversed.
directory_iterator CurrentDirIter;
- /// The path of the directory to iterate the entries of.
- std::string DirPath;
/// The set of names already returned as entries.
llvm::StringSet<> SeenNames;
- /// Sets \c CurrentDirIter to an iterator of \c DirPath in the next file
- /// system in the list, or leaves it as is (at its end position) if we've
- /// already gone through them all.
- std::error_code incrementFS() {
- while (!FSList.empty()) {
- std::error_code EC;
- CurrentDirIter = FSList.back()->dir_begin(DirPath, EC);
- FSList.pop_back();
- if (EC && EC != errc::no_such_file_or_directory)
- return EC;
+ /// Sets \c CurrentDirIter to the next iterator in the list, or leaves it as
+ /// is (at its end position) if we've already gone through them all.
+ std::error_code incrementIter(bool IsFirstTime) {
+ while (!IterList.empty()) {
+ CurrentDirIter = IterList.back();
+ IterList.pop_back();
if (CurrentDirIter != directory_iterator())
break; // found
}
+
+ if (IsFirstTime && CurrentDirIter == directory_iterator())
+ return errc::no_such_file_or_directory;
return {};
}
@@ -499,7 +528,7 @@ class CombiningDirIterImpl : public llvm::vfs::detail::DirIterImpl {
if (!IsFirstTime)
CurrentDirIter.increment(EC);
if (!EC && CurrentDirIter == directory_iterator())
- EC = incrementFS();
+ EC = incrementIter(IsFirstTime);
return EC;
}
@@ -520,23 +549,24 @@ class CombiningDirIterImpl : public llvm::vfs::detail::DirIterImpl {
public:
CombiningDirIterImpl(ArrayRef<FileSystemPtr> FileSystems, std::string Dir,
- std::error_code &EC)
- : FSList(FileSystems.begin(), FileSystems.end()),
- DirPath(std::move(Dir)) {
- if (!FSList.empty()) {
- CurrentDirIter = FSList.back()->dir_begin(DirPath, EC);
- FSList.pop_back();
- if (!EC || EC == errc::no_such_file_or_directory)
- EC = incrementImpl(true);
+ std::error_code &EC) {
+ for (auto FS : FileSystems) {
+ std::error_code FEC;
+ directory_iterator Iter = FS->dir_begin(Dir, FEC);
+ if (FEC && FEC != errc::no_such_file_or_directory) {
+ EC = FEC;
+ return;
+ }
+ if (!FEC)
+ IterList.push_back(Iter);
}
+ EC = incrementImpl(true);
}
- CombiningDirIterImpl(directory_iterator FirstIter, FileSystemPtr Fallback,
- std::string FallbackDir, std::error_code &EC)
- : FSList({Fallback}), CurrentDirIter(FirstIter),
- DirPath(std::move(FallbackDir)) {
- if (!EC || EC == errc::no_such_file_or_directory)
- EC = incrementImpl(true);
+ CombiningDirIterImpl(ArrayRef<directory_iterator> DirIters,
+ std::error_code &EC)
+ : IterList(DirIters.begin(), DirIters.end()) {
+ EC = incrementImpl(true);
}
std::error_code increment() override { return incrementImpl(false); }
@@ -546,8 +576,11 @@ public:
directory_iterator OverlayFileSystem::dir_begin(const Twine &Dir,
std::error_code &EC) {
- return directory_iterator(
+ directory_iterator Combined = directory_iterator(
std::make_shared<CombiningDirIterImpl>(FSList, Dir.str(), EC));
+ if (EC)
+ return {};
+ return Combined;
}
void ProxyFileSystem::anchor() {}
@@ -557,10 +590,15 @@ namespace vfs {
namespace detail {
-enum InMemoryNodeKind { IME_File, IME_Directory, IME_HardLink };
+enum InMemoryNodeKind {
+ IME_File,
+ IME_Directory,
+ IME_HardLink,
+ IME_SymbolicLink,
+};
/// The in memory file system is a tree of Nodes. Every node can either be a
-/// file , hardlink or a directory.
+/// file, symlink, hardlink or a directory.
class InMemoryNode {
InMemoryNodeKind Kind;
std::string FileName;
@@ -629,6 +667,30 @@ public:
}
};
+class InMemorySymbolicLink : public InMemoryNode {
+ std::string TargetPath;
+ Status Stat;
+
+public:
+ InMemorySymbolicLink(StringRef Path, StringRef TargetPath, Status Stat)
+ : InMemoryNode(Path, IME_SymbolicLink), TargetPath(std::move(TargetPath)),
+ Stat(Stat) {}
+
+ std::string toString(unsigned Indent) const override {
+ return std::string(Indent, ' ') + "SymbolicLink to -> " + TargetPath;
+ }
+
+ Status getStatus(const Twine &RequestedName) const override {
+ return Status::copyWithNewName(Stat, RequestedName);
+ }
+
+ StringRef getTargetPath() const { return TargetPath; }
+
+ static bool classof(const InMemoryNode *N) {
+ return N->getKind() == IME_SymbolicLink;
+ }
+};
+
/// Adapt a InMemoryFile for VFS' File interface. The goal is to make
/// \p InMemoryFileAdaptor mimic as much as possible the behavior of
/// \p RealFile.
@@ -677,7 +739,7 @@ public:
UniqueID getUniqueID() const { return Stat.getUniqueID(); }
- InMemoryNode *getChild(StringRef Name) {
+ InMemoryNode *getChild(StringRef Name) const {
auto I = Entries.find(Name);
if (I != Entries.end())
return I->second.get();
@@ -773,10 +835,10 @@ bool InMemoryFileSystem::addFile(const Twine &P, time_t ModificationTime,
detail::InMemoryDirectory *Dir = Root.get();
auto I = llvm::sys::path::begin(Path), E = sys::path::end(Path);
- const auto ResolvedUser = User.getValueOr(0);
- const auto ResolvedGroup = Group.getValueOr(0);
- const auto ResolvedType = Type.getValueOr(sys::fs::file_type::regular_file);
- const auto ResolvedPerms = Perms.getValueOr(sys::fs::all_all);
+ const auto ResolvedUser = User.value_or(0);
+ const auto ResolvedGroup = Group.value_or(0);
+ const auto ResolvedType = Type.value_or(sys::fs::file_type::regular_file);
+ const auto ResolvedPerms = Perms.value_or(sys::fs::all_all);
// Any intermediate directories we create should be accessible by
// the owner, even if Perms says otherwise for the final path.
const auto NewDirectoryPerms = ResolvedPerms | sys::fs::owner_all;
@@ -864,22 +926,23 @@ bool InMemoryFileSystem::addFileNoOwn(const Twine &P, time_t ModificationTime,
});
}
-static ErrorOr<const detail::InMemoryNode *>
-lookupInMemoryNode(const InMemoryFileSystem &FS, detail::InMemoryDirectory *Dir,
- const Twine &P) {
+detail::NamedNodeOrError
+InMemoryFileSystem::lookupNode(const Twine &P, bool FollowFinalSymlink,
+ size_t SymlinkDepth) const {
SmallString<128> Path;
P.toVector(Path);
// Fix up relative paths. This just prepends the current working directory.
- std::error_code EC = FS.makeAbsolute(Path);
+ std::error_code EC = makeAbsolute(Path);
assert(!EC);
(void)EC;
- if (FS.useNormalizedPaths())
+ if (useNormalizedPaths())
llvm::sys::path::remove_dots(Path, /*remove_dot_dot=*/true);
+ const detail::InMemoryDirectory *Dir = Root.get();
if (Path.empty())
- return Dir;
+ return detail::NamedNodeOrError(Path, Dir);
auto I = llvm::sys::path::begin(Path), E = llvm::sys::path::end(Path);
while (true) {
@@ -888,43 +951,99 @@ lookupInMemoryNode(const InMemoryFileSystem &FS, detail::InMemoryDirectory *Dir,
if (!Node)
return errc::no_such_file_or_directory;
+ if (auto Symlink = dyn_cast<detail::InMemorySymbolicLink>(Node)) {
+ // If we're at the end of the path, and we're not following through
+ // terminal symlinks, then we're done.
+ if (I == E && !FollowFinalSymlink)
+ return detail::NamedNodeOrError(Path, Symlink);
+
+ if (SymlinkDepth > InMemoryFileSystem::MaxSymlinkDepth)
+ return errc::no_such_file_or_directory;
+
+ SmallString<128> TargetPath = Symlink->getTargetPath();
+ if (std::error_code EC = makeAbsolute(TargetPath))
+ return EC;
+
+ // Keep going with the target. We always want to follow symlinks here
+ // because we're either at the end of a path that we want to follow, or
+ // not at the end of a path, in which case we need to follow the symlink
+ // regardless.
+ auto Target =
+ lookupNode(TargetPath, /*FollowFinalSymlink=*/true, SymlinkDepth + 1);
+ if (!Target || I == E)
+ return Target;
+
+ if (!isa<detail::InMemoryDirectory>(*Target))
+ return errc::no_such_file_or_directory;
+
+ // Otherwise, continue on the search in the symlinked directory.
+ Dir = cast<detail::InMemoryDirectory>(*Target);
+ continue;
+ }
+
// Return the file if it's at the end of the path.
if (auto File = dyn_cast<detail::InMemoryFile>(Node)) {
if (I == E)
- return File;
+ return detail::NamedNodeOrError(Path, File);
return errc::no_such_file_or_directory;
}
// If Node is HardLink then return the resolved file.
if (auto File = dyn_cast<detail::InMemoryHardLink>(Node)) {
if (I == E)
- return &File->getResolvedFile();
+ return detail::NamedNodeOrError(Path, &File->getResolvedFile());
return errc::no_such_file_or_directory;
}
// Traverse directories.
Dir = cast<detail::InMemoryDirectory>(Node);
if (I == E)
- return Dir;
+ return detail::NamedNodeOrError(Path, Dir);
}
}
-bool InMemoryFileSystem::addHardLink(const Twine &FromPath,
- const Twine &ToPath) {
- auto FromNode = lookupInMemoryNode(*this, Root.get(), FromPath);
- auto ToNode = lookupInMemoryNode(*this, Root.get(), ToPath);
+bool InMemoryFileSystem::addHardLink(const Twine &NewLink,
+ const Twine &Target) {
+ auto NewLinkNode = lookupNode(NewLink, /*FollowFinalSymlink=*/false);
+ // Whether symlinks in the hardlink target are followed is
+ // implementation-defined in POSIX.
+ // We're following symlinks here to be consistent with macOS.
+ auto TargetNode = lookupNode(Target, /*FollowFinalSymlink=*/true);
// FromPath must not have been added before. ToPath must have been added
// before. Resolved ToPath must be a File.
- if (!ToNode || FromNode || !isa<detail::InMemoryFile>(*ToNode))
+ if (!TargetNode || NewLinkNode || !isa<detail::InMemoryFile>(*TargetNode))
return false;
- return addFile(FromPath, 0, nullptr, None, None, None, None,
+ return addFile(NewLink, 0, nullptr, None, None, None, None,
[&](detail::NewInMemoryNodeInfo NNI) {
return std::make_unique<detail::InMemoryHardLink>(
- NNI.Path.str(), *cast<detail::InMemoryFile>(*ToNode));
+ NNI.Path.str(),
+ *cast<detail::InMemoryFile>(*TargetNode));
+ });
+}
+
+bool InMemoryFileSystem::addSymbolicLink(const Twine &NewLink,
+ const Twine &Target,
+ time_t ModificationTime,
+ Optional<uint32_t> User,
+ Optional<uint32_t> Group,
+ Optional<llvm::sys::fs::perms> Perms) {
+ auto NewLinkNode = lookupNode(NewLink, /*FollowFinalSymlink=*/false);
+ if (NewLinkNode)
+ return false;
+
+ SmallString<128> NewLinkStr, TargetStr;
+ NewLink.toVector(NewLinkStr);
+ Target.toVector(TargetStr);
+
+ return addFile(NewLinkStr, ModificationTime, nullptr, User, Group,
+ sys::fs::file_type::symlink_file, Perms,
+ [&](detail::NewInMemoryNodeInfo NNI) {
+ return std::make_unique<detail::InMemorySymbolicLink>(
+ NewLinkStr, TargetStr, NNI.makeStatus());
});
}
llvm::ErrorOr<Status> InMemoryFileSystem::status(const Twine &Path) {
- auto Node = lookupInMemoryNode(*this, Root.get(), Path);
+ auto Node = lookupNode(Path, /*FollowFinalSymlink=*/true);
if (Node)
return (*Node)->getStatus(Path);
return Node.getError();
@@ -932,7 +1051,7 @@ llvm::ErrorOr<Status> InMemoryFileSystem::status(const Twine &Path) {
llvm::ErrorOr<std::unique_ptr<File>>
InMemoryFileSystem::openFileForRead(const Twine &Path) {
- auto Node = lookupInMemoryNode(*this, Root.get(), Path);
+ auto Node = lookupNode(Path,/*FollowFinalSymlink=*/true);
if (!Node)
return Node.getError();
@@ -946,10 +1065,9 @@ InMemoryFileSystem::openFileForRead(const Twine &Path) {
return make_error_code(llvm::errc::invalid_argument);
}
-namespace {
-
/// Adaptor from InMemoryDir::iterator to directory_iterator.
-class InMemoryDirIterator : public llvm::vfs::detail::DirIterImpl {
+class InMemoryFileSystem::DirIterator : public llvm::vfs::detail::DirIterImpl {
+ const InMemoryFileSystem *FS;
detail::InMemoryDirectory::const_iterator I;
detail::InMemoryDirectory::const_iterator E;
std::string RequestedDirName;
@@ -967,6 +1085,13 @@ class InMemoryDirIterator : public llvm::vfs::detail::DirIterImpl {
case detail::IME_Directory:
Type = sys::fs::file_type::directory_file;
break;
+ case detail::IME_SymbolicLink:
+ if (auto SymlinkTarget =
+ FS->lookupNode(Path, /*FollowFinalSymlink=*/true)) {
+ Path = SymlinkTarget.getName();
+ Type = (*SymlinkTarget)->getStatus(Path).getType();
+ }
+ break;
}
CurrentEntry = directory_entry(std::string(Path.str()), Type);
} else {
@@ -977,11 +1102,12 @@ class InMemoryDirIterator : public llvm::vfs::detail::DirIterImpl {
}
public:
- InMemoryDirIterator() = default;
+ DirIterator() = default;
- explicit InMemoryDirIterator(const detail::InMemoryDirectory &Dir,
- std::string RequestedDirName)
- : I(Dir.begin()), E(Dir.end()),
+ DirIterator(const InMemoryFileSystem *FS,
+ const detail::InMemoryDirectory &Dir,
+ std::string RequestedDirName)
+ : FS(FS), I(Dir.begin()), E(Dir.end()),
RequestedDirName(std::move(RequestedDirName)) {
setCurrentEntry();
}
@@ -993,22 +1119,20 @@ public:
}
};
-} // namespace
-
directory_iterator InMemoryFileSystem::dir_begin(const Twine &Dir,
std::error_code &EC) {
- auto Node = lookupInMemoryNode(*this, Root.get(), Dir);
+ auto Node = lookupNode(Dir, /*FollowFinalSymlink=*/true);
if (!Node) {
EC = Node.getError();
- return directory_iterator(std::make_shared<InMemoryDirIterator>());
+ return directory_iterator(std::make_shared<DirIterator>());
}
if (auto *DirNode = dyn_cast<detail::InMemoryDirectory>(*Node))
return directory_iterator(
- std::make_shared<InMemoryDirIterator>(*DirNode, Dir.str()));
+ std::make_shared<DirIterator>(this, *DirNode, Dir.str()));
EC = make_error_code(llvm::errc::not_a_directory);
- return directory_iterator(std::make_shared<InMemoryDirIterator>());
+ return directory_iterator(std::make_shared<DirIterator>());
}
std::error_code InMemoryFileSystem::setCurrentWorkingDirectory(const Twine &P) {
@@ -1046,6 +1170,12 @@ std::error_code InMemoryFileSystem::isLocal(const Twine &Path, bool &Result) {
return {};
}
+void InMemoryFileSystem::printImpl(raw_ostream &OS, PrintType PrintContents,
+ unsigned IndentLevel) const {
+ printIndent(OS, IndentLevel);
+ OS << "InMemoryFileSystem\n";
+}
+
} // namespace vfs
} // namespace llvm
@@ -1079,6 +1209,14 @@ static llvm::SmallString<256> canonicalize(llvm::StringRef Path) {
return result;
}
+/// Whether the error and entry specify a file/directory that was not found.
+static bool isFileNotFound(std::error_code EC,
+ RedirectingFileSystem::Entry *E = nullptr) {
+ if (E && !isa<RedirectingFileSystem::DirectoryRemapEntry>(E))
+ return false;
+ return EC == llvm::errc::no_such_file_or_directory;
+}
+
} // anonymous namespace
@@ -1255,49 +1393,93 @@ directory_iterator RedirectingFileSystem::dir_begin(const Twine &Dir,
ErrorOr<RedirectingFileSystem::LookupResult> Result = lookupPath(Path);
if (!Result) {
- EC = Result.getError();
- if (shouldFallBackToExternalFS(EC))
+ if (Redirection != RedirectKind::RedirectOnly &&
+ isFileNotFound(Result.getError()))
return ExternalFS->dir_begin(Path, EC);
+
+ EC = Result.getError();
return {};
}
// Use status to make sure the path exists and refers to a directory.
ErrorOr<Status> S = status(Path, Dir, *Result);
if (!S) {
- if (shouldFallBackToExternalFS(S.getError(), Result->E))
+ if (Redirection != RedirectKind::RedirectOnly &&
+ isFileNotFound(S.getError(), Result->E))
return ExternalFS->dir_begin(Dir, EC);
+
EC = S.getError();
return {};
}
+
if (!S->isDirectory()) {
- EC = std::error_code(static_cast<int>(errc::not_a_directory),
- std::system_category());
+ EC = errc::not_a_directory;
return {};
}
// Create the appropriate directory iterator based on whether we found a
// DirectoryRemapEntry or DirectoryEntry.
- directory_iterator DirIter;
+ directory_iterator RedirectIter;
+ std::error_code RedirectEC;
if (auto ExtRedirect = Result->getExternalRedirect()) {
auto RE = cast<RedirectingFileSystem::RemapEntry>(Result->E);
- DirIter = ExternalFS->dir_begin(*ExtRedirect, EC);
+ RedirectIter = ExternalFS->dir_begin(*ExtRedirect, RedirectEC);
if (!RE->useExternalName(UseExternalNames)) {
// Update the paths in the results to use the virtual directory's path.
- DirIter =
+ RedirectIter =
directory_iterator(std::make_shared<RedirectingFSDirRemapIterImpl>(
- std::string(Path), DirIter));
+ std::string(Path), RedirectIter));
}
} else {
auto DE = cast<DirectoryEntry>(Result->E);
- DirIter = directory_iterator(std::make_shared<RedirectingFSDirIterImpl>(
- Path, DE->contents_begin(), DE->contents_end(), EC));
+ RedirectIter =
+ directory_iterator(std::make_shared<RedirectingFSDirIterImpl>(
+ Path, DE->contents_begin(), DE->contents_end(), RedirectEC));
+ }
+
+ if (RedirectEC) {
+ if (RedirectEC != errc::no_such_file_or_directory) {
+ EC = RedirectEC;
+ return {};
+ }
+ RedirectIter = {};
}
- if (!shouldUseExternalFS())
- return DirIter;
- return directory_iterator(std::make_shared<CombiningDirIterImpl>(
- DirIter, ExternalFS, std::string(Path), EC));
+ if (Redirection == RedirectKind::RedirectOnly) {
+ EC = RedirectEC;
+ return RedirectIter;
+ }
+
+ std::error_code ExternalEC;
+ directory_iterator ExternalIter = ExternalFS->dir_begin(Path, ExternalEC);
+ if (ExternalEC) {
+ if (ExternalEC != errc::no_such_file_or_directory) {
+ EC = ExternalEC;
+ return {};
+ }
+ ExternalIter = {};
+ }
+
+ SmallVector<directory_iterator, 2> Iters;
+ switch (Redirection) {
+ case RedirectKind::Fallthrough:
+ Iters.push_back(ExternalIter);
+ Iters.push_back(RedirectIter);
+ break;
+ case RedirectKind::Fallback:
+ Iters.push_back(RedirectIter);
+ Iters.push_back(ExternalIter);
+ break;
+ default:
+ llvm_unreachable("unhandled RedirectKind");
+ }
+
+ directory_iterator Combined{
+ std::make_shared<CombiningDirIterImpl>(Iters, EC)};
+ if (EC)
+ return {};
+ return Combined;
}
void RedirectingFileSystem::setExternalContentsPrefixDir(StringRef PrefixDir) {
@@ -1309,7 +1491,16 @@ StringRef RedirectingFileSystem::getExternalContentsPrefixDir() const {
}
void RedirectingFileSystem::setFallthrough(bool Fallthrough) {
- IsFallthrough = Fallthrough;
+ if (Fallthrough) {
+ Redirection = RedirectingFileSystem::RedirectKind::Fallthrough;
+ } else {
+ Redirection = RedirectingFileSystem::RedirectKind::RedirectOnly;
+ }
+}
+
+void RedirectingFileSystem::setRedirection(
+ RedirectingFileSystem::RedirectKind Kind) {
+ Redirection = Kind;
}
std::vector<StringRef> RedirectingFileSystem::getRoots() const {
@@ -1319,34 +1510,59 @@ std::vector<StringRef> RedirectingFileSystem::getRoots() const {
return R;
}
-void RedirectingFileSystem::dump(raw_ostream &OS) const {
+void RedirectingFileSystem::printImpl(raw_ostream &OS, PrintType Type,
+ unsigned IndentLevel) const {
+ printIndent(OS, IndentLevel);
+ OS << "RedirectingFileSystem (UseExternalNames: "
+ << (UseExternalNames ? "true" : "false") << ")\n";
+ if (Type == PrintType::Summary)
+ return;
+
for (const auto &Root : Roots)
- dumpEntry(OS, Root.get());
+ printEntry(OS, Root.get(), IndentLevel);
+
+ printIndent(OS, IndentLevel);
+ OS << "ExternalFS:\n";
+ ExternalFS->print(OS, Type == PrintType::Contents ? PrintType::Summary : Type,
+ IndentLevel + 1);
}
-void RedirectingFileSystem::dumpEntry(raw_ostream &OS,
- RedirectingFileSystem::Entry *E,
- int NumSpaces) const {
- StringRef Name = E->getName();
- for (int i = 0, e = NumSpaces; i < e; ++i)
- OS << " ";
- OS << "'" << Name.str().c_str() << "'"
- << "\n";
+void RedirectingFileSystem::printEntry(raw_ostream &OS,
+ RedirectingFileSystem::Entry *E,
+ unsigned IndentLevel) const {
+ printIndent(OS, IndentLevel);
+ OS << "'" << E->getName() << "'";
- if (E->getKind() == RedirectingFileSystem::EK_Directory) {
- auto *DE = dyn_cast<RedirectingFileSystem::DirectoryEntry>(E);
- assert(DE && "Should be a directory");
+ switch (E->getKind()) {
+ case EK_Directory: {
+ auto *DE = cast<RedirectingFileSystem::DirectoryEntry>(E);
+ OS << "\n";
for (std::unique_ptr<Entry> &SubEntry :
llvm::make_range(DE->contents_begin(), DE->contents_end()))
- dumpEntry(OS, SubEntry.get(), NumSpaces + 2);
+ printEntry(OS, SubEntry.get(), IndentLevel + 1);
+ break;
+ }
+ case EK_DirectoryRemap:
+ case EK_File: {
+ auto *RE = cast<RedirectingFileSystem::RemapEntry>(E);
+ OS << " -> '" << RE->getExternalContentsPath() << "'";
+ switch (RE->getUseName()) {
+ case NK_NotSet:
+ break;
+ case NK_External:
+ OS << " (UseExternalName: true)";
+ break;
+ case NK_Virtual:
+ OS << " (UseExternalName: false)";
+ break;
+ }
+ OS << "\n";
+ break;
+ }
}
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD void RedirectingFileSystem::dump() const { dump(dbgs()); }
-#endif
-
/// A helper class to hold the common YAML parsing state.
class llvm::vfs::RedirectingFileSystemParser {
yaml::Stream &Stream;
@@ -1388,6 +1604,23 @@ class llvm::vfs::RedirectingFileSystemParser {
return false;
}
+ Optional<RedirectingFileSystem::RedirectKind>
+ parseRedirectKind(yaml::Node *N) {
+ SmallString<12> Storage;
+ StringRef Value;
+ if (!parseScalarString(N, Value, Storage))
+ return None;
+
+ if (Value.equals_insensitive("fallthrough")) {
+ return RedirectingFileSystem::RedirectKind::Fallthrough;
+ } else if (Value.equals_insensitive("fallback")) {
+ return RedirectingFileSystem::RedirectKind::Fallback;
+ } else if (Value.equals_insensitive("redirect-only")) {
+ return RedirectingFileSystem::RedirectKind::RedirectOnly;
+ }
+ return None;
+ }
+
struct KeyStatus {
bool Required;
bool Seen = false;
@@ -1731,6 +1964,7 @@ public:
KeyStatusPair("use-external-names", false),
KeyStatusPair("overlay-relative", false),
KeyStatusPair("fallthrough", false),
+ KeyStatusPair("redirecting-with", false),
KeyStatusPair("roots", true),
};
@@ -1789,8 +2023,34 @@ public:
if (!parseScalarBool(I.getValue(), FS->UseExternalNames))
return false;
} else if (Key == "fallthrough") {
- if (!parseScalarBool(I.getValue(), FS->IsFallthrough))
+ if (Keys["redirecting-with"].Seen) {
+ error(I.getValue(),
+ "'fallthrough' and 'redirecting-with' are mutually exclusive");
+ return false;
+ }
+
+ bool ShouldFallthrough = false;
+ if (!parseScalarBool(I.getValue(), ShouldFallthrough))
+ return false;
+
+ if (ShouldFallthrough) {
+ FS->Redirection = RedirectingFileSystem::RedirectKind::Fallthrough;
+ } else {
+ FS->Redirection = RedirectingFileSystem::RedirectKind::RedirectOnly;
+ }
+ } else if (Key == "redirecting-with") {
+ if (Keys["fallthrough"].Seen) {
+ error(I.getValue(),
+ "'fallthrough' and 'redirecting-with' are mutually exclusive");
+ return false;
+ }
+
+ if (auto Kind = parseRedirectKind(I.getValue())) {
+ FS->Redirection = *Kind;
+ } else {
+ error(I.getValue(), "expected valid redirect kind");
return false;
+ }
} else {
llvm_unreachable("key missing from Keys");
}
@@ -1923,13 +2183,6 @@ RedirectingFileSystem::LookupResult::LookupResult(
}
}
-bool RedirectingFileSystem::shouldFallBackToExternalFS(
- std::error_code EC, RedirectingFileSystem::Entry *E) const {
- if (E && !isa<RedirectingFileSystem::DirectoryRemapEntry>(E))
- return false;
- return shouldUseExternalFS() && EC == llvm::errc::no_such_file_or_directory;
-}
-
std::error_code
RedirectingFileSystem::makeCanonical(SmallVectorImpl<char> &Path) const {
if (std::error_code EC = makeAbsolute(Path))
@@ -2001,9 +2254,16 @@ RedirectingFileSystem::lookupPathImpl(
static Status getRedirectedFileStatus(const Twine &OriginalPath,
bool UseExternalNames,
Status ExternalStatus) {
+ // The path has been mapped by some nested VFS and exposes an external path,
+ // don't override it with the original path.
+ if (ExternalStatus.ExposesExternalVFSPath)
+ return ExternalStatus;
+
Status S = ExternalStatus;
if (!UseExternalNames)
S = Status::copyWithNewName(S, OriginalPath);
+ else
+ S.ExposesExternalVFSPath = true;
S.IsVFSMapped = true;
return S;
}
@@ -2032,11 +2292,13 @@ ErrorOr<Status> RedirectingFileSystem::status(
ErrorOr<Status>
RedirectingFileSystem::getExternalStatus(const Twine &CanonicalPath,
const Twine &OriginalPath) const {
- if (auto Result = ExternalFS->status(CanonicalPath)) {
- return Result.get().copyWithNewName(Result.get(), OriginalPath);
- } else {
- return Result.getError();
- }
+ auto Result = ExternalFS->status(CanonicalPath);
+
+ // The path has been mapped by some nested VFS, don't override it with the
+ // original path.
+ if (!Result || Result->ExposesExternalVFSPath)
+ return Result;
+ return Status::copyWithNewName(Result.get(), OriginalPath);
}
ErrorOr<Status> RedirectingFileSystem::status(const Twine &OriginalPath) {
@@ -2046,17 +2308,31 @@ ErrorOr<Status> RedirectingFileSystem::status(const Twine &OriginalPath) {
if (std::error_code EC = makeCanonical(CanonicalPath))
return EC;
+ if (Redirection == RedirectKind::Fallback) {
+ // Attempt to find the original file first, only falling back to the
+ // mapped file if that fails.
+ ErrorOr<Status> S = getExternalStatus(CanonicalPath, OriginalPath);
+ if (S)
+ return S;
+ }
+
ErrorOr<RedirectingFileSystem::LookupResult> Result =
lookupPath(CanonicalPath);
if (!Result) {
- if (shouldFallBackToExternalFS(Result.getError())) {
+ // Was not able to map file, fallthrough to using the original path if
+ // that was the specified redirection type.
+ if (Redirection == RedirectKind::Fallthrough &&
+ isFileNotFound(Result.getError()))
return getExternalStatus(CanonicalPath, OriginalPath);
- }
return Result.getError();
}
ErrorOr<Status> S = status(CanonicalPath, OriginalPath, *Result);
- if (!S && shouldFallBackToExternalFS(S.getError(), Result->E)) {
+ if (!S && Redirection == RedirectKind::Fallthrough &&
+ isFileNotFound(S.getError(), Result->E)) {
+ // Mapped the file but it wasn't found in the underlying filesystem,
+ // fallthrough to using the original path if that was the specified
+ // redirection type.
return getExternalStatus(CanonicalPath, OriginalPath);
}
@@ -2092,7 +2368,9 @@ public:
ErrorOr<std::unique_ptr<File>>
File::getWithPath(ErrorOr<std::unique_ptr<File>> Result, const Twine &P) {
- if (!Result)
+ // See \c getRedirectedFileStatus - don't update path if it's exposing an
+ // external path.
+ if (!Result || (*Result)->status()->ExposesExternalVFSPath)
return Result;
ErrorOr<std::unique_ptr<File>> F = std::move(*Result);
@@ -2110,13 +2388,24 @@ RedirectingFileSystem::openFileForRead(const Twine &OriginalPath) {
if (std::error_code EC = makeCanonical(CanonicalPath))
return EC;
+ if (Redirection == RedirectKind::Fallback) {
+ // Attempt to find the original file first, only falling back to the
+ // mapped file if that fails.
+ auto F = File::getWithPath(ExternalFS->openFileForRead(CanonicalPath),
+ OriginalPath);
+ if (F)
+ return F;
+ }
+
ErrorOr<RedirectingFileSystem::LookupResult> Result =
lookupPath(CanonicalPath);
if (!Result) {
- if (shouldFallBackToExternalFS(Result.getError()))
+ // Was not able to map file, fallthrough to using the original path if
+ // that was the specified redirection type.
+ if (Redirection == RedirectKind::Fallthrough &&
+ isFileNotFound(Result.getError()))
return File::getWithPath(ExternalFS->openFileForRead(CanonicalPath),
OriginalPath);
-
return Result.getError();
}
@@ -2133,9 +2422,14 @@ RedirectingFileSystem::openFileForRead(const Twine &OriginalPath) {
auto ExternalFile = File::getWithPath(
ExternalFS->openFileForRead(CanonicalRemappedPath), ExtRedirect);
if (!ExternalFile) {
- if (shouldFallBackToExternalFS(ExternalFile.getError(), Result->E))
+ if (Redirection == RedirectKind::Fallthrough &&
+ isFileNotFound(ExternalFile.getError(), Result->E)) {
+ // Mapped the file but it wasn't found in the underlying filesystem,
+ // fallthrough to using the original path if that was the specified
+ // redirection type.
return File::getWithPath(ExternalFS->openFileForRead(CanonicalPath),
OriginalPath);
+ }
return ExternalFile;
}
@@ -2143,7 +2437,8 @@ RedirectingFileSystem::openFileForRead(const Twine &OriginalPath) {
if (!ExternalStatus)
return ExternalStatus.getError();
- // FIXME: Update the status with the name and VFSMapped.
+ // Otherwise, the file was successfully remapped. Mark it as such. Also
+ // replace the underlying path if the external name is being used.
Status S = getRedirectedFileStatus(
OriginalPath, RE->useExternalName(UseExternalNames), *ExternalStatus);
return std::unique_ptr<File>(
@@ -2151,18 +2446,30 @@ RedirectingFileSystem::openFileForRead(const Twine &OriginalPath) {
}
std::error_code
-RedirectingFileSystem::getRealPath(const Twine &Path_,
+RedirectingFileSystem::getRealPath(const Twine &OriginalPath,
SmallVectorImpl<char> &Output) const {
- SmallString<256> Path;
- Path_.toVector(Path);
+ SmallString<256> CanonicalPath;
+ OriginalPath.toVector(CanonicalPath);
- if (std::error_code EC = makeCanonical(Path))
+ if (std::error_code EC = makeCanonical(CanonicalPath))
return EC;
- ErrorOr<RedirectingFileSystem::LookupResult> Result = lookupPath(Path);
+ if (Redirection == RedirectKind::Fallback) {
+ // Attempt to find the original file first, only falling back to the
+ // mapped file if that fails.
+ std::error_code EC = ExternalFS->getRealPath(CanonicalPath, Output);
+ if (!EC)
+ return EC;
+ }
+
+ ErrorOr<RedirectingFileSystem::LookupResult> Result =
+ lookupPath(CanonicalPath);
if (!Result) {
- if (shouldFallBackToExternalFS(Result.getError()))
- return ExternalFS->getRealPath(Path, Output);
+ // Was not able to map file, fallthrough to using the original path if
+ // that was the specified redirection type.
+ if (Redirection == RedirectKind::Fallthrough &&
+ isFileNotFound(Result.getError()))
+ return ExternalFS->getRealPath(CanonicalPath, Output);
return Result.getError();
}
@@ -2170,16 +2477,21 @@ RedirectingFileSystem::getRealPath(const Twine &Path_,
// path in the external file system.
if (auto ExtRedirect = Result->getExternalRedirect()) {
auto P = ExternalFS->getRealPath(*ExtRedirect, Output);
- if (!P && shouldFallBackToExternalFS(P, Result->E)) {
- return ExternalFS->getRealPath(Path, Output);
+ if (P && Redirection == RedirectKind::Fallthrough &&
+ isFileNotFound(P, Result->E)) {
+ // Mapped the file but it wasn't found in the underlying filesystem,
+ // fallthrough to using the original path if that was the specified
+ // redirection type.
+ return ExternalFS->getRealPath(CanonicalPath, Output);
}
return P;
}
- // If we found a DirectoryEntry, still fall back to ExternalFS if allowed,
- // because directories don't have a single external contents path.
- return shouldUseExternalFS() ? ExternalFS->getRealPath(Path, Output)
- : llvm::errc::invalid_argument;
+ // If we found a DirectoryEntry, still fallthrough to the original path if
+ // allowed, because directories don't have a single external contents path.
+ if (Redirection == RedirectKind::Fallthrough)
+ return ExternalFS->getRealPath(CanonicalPath, Output);
+ return llvm::errc::invalid_argument;
}
std::unique_ptr<FileSystem>
@@ -2355,14 +2667,14 @@ void JSONWriter::write(ArrayRef<YAMLVFSEntry> Entries,
OS << "{\n"
" 'version': 0,\n";
- if (IsCaseSensitive.hasValue())
+ if (IsCaseSensitive)
OS << " 'case-sensitive': '"
<< (IsCaseSensitive.getValue() ? "true" : "false") << "',\n";
- if (UseExternalNames.hasValue())
+ if (UseExternalNames)
OS << " 'use-external-names': '"
<< (UseExternalNames.getValue() ? "true" : "false") << "',\n";
bool UseOverlayRelative = false;
- if (IsOverlayRelative.hasValue()) {
+ if (IsOverlayRelative) {
UseOverlayRelative = IsOverlayRelative.getValue();
OS << " 'overlay-relative': '" << (UseOverlayRelative ? "true" : "false")
<< "',\n";
diff --git a/llvm/lib/Support/Windows/Path.inc b/llvm/lib/Support/Windows/Path.inc
index 5f1a364ea1a8..433c62900a3f 100644
--- a/llvm/lib/Support/Windows/Path.inc
+++ b/llvm/lib/Support/Windows/Path.inc
@@ -130,7 +130,7 @@ namespace fs {
const file_t kInvalidFile = INVALID_HANDLE_VALUE;
-std::string getMainExecutable(const char *argv0, void *MainExecAddr) {
+std::string getMainExecutableImpl(const char *argv0, void *MainExecAddr) {
SmallVector<wchar_t, MAX_PATH> PathName;
PathName.resize_for_overwrite(PathName.capacity());
DWORD Size = ::GetModuleFileNameW(NULL, PathName.data(), PathName.size());
diff --git a/llvm/lib/Support/Windows/Process.inc b/llvm/lib/Support/Windows/Process.inc
index dfaab1613de1..b0c55a77bc93 100644
--- a/llvm/lib/Support/Windows/Process.inc
+++ b/llvm/lib/Support/Windows/Process.inc
@@ -156,9 +156,10 @@ static std::error_code WildcardExpand(StringRef Arg,
// Don't expand Arg if it does not contain any wildcard characters. This is
// the common case. Also don't wildcard expand /?. Always treat it as an
- // option.
+ // option. Paths that start with \\?\ are absolute paths, and aren't
+ // expected to be used with wildcard expressions.
if (Arg.find_first_of("*?") == StringRef::npos || Arg == "/?" ||
- Arg == "-?") {
+ Arg == "-?" || Arg.startswith("\\\\?\\")) {
Args.push_back(Arg.data());
return EC;
}
@@ -247,7 +248,7 @@ windows::GetCommandLineArguments(SmallVectorImpl<const char *> &Args,
SmallVector<const char *, 20> TmpArgs;
StringSaver Saver(Alloc);
- cl::TokenizeWindowsCommandLine(Cmd, Saver, TmpArgs, /*MarkEOLs=*/false);
+ cl::TokenizeWindowsCommandLineFull(Cmd, Saver, TmpArgs, /*MarkEOLs=*/false);
for (const char *Arg : TmpArgs) {
EC = WildcardExpand(Arg, Args, Saver);
@@ -255,6 +256,9 @@ windows::GetCommandLineArguments(SmallVectorImpl<const char *> &Args,
return EC;
}
+ if (Args.size() == 0)
+ return std::make_error_code(std::errc::invalid_argument);
+
SmallVector<char, MAX_PATH> Arg0(Args[0], Args[0] + strlen(Args[0]));
SmallVector<char, MAX_PATH> Filename;
sys::path::remove_filename(Arg0);
diff --git a/llvm/lib/Support/Windows/Program.inc b/llvm/lib/Support/Windows/Program.inc
index ee633411584f..58de140a60d1 100644
--- a/llvm/lib/Support/Windows/Program.inc
+++ b/llvm/lib/Support/Windows/Program.inc
@@ -10,6 +10,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/Errc.h"
@@ -18,12 +19,12 @@
#include "llvm/Support/Windows/WindowsSupport.h"
#include "llvm/Support/WindowsError.h"
#include "llvm/Support/raw_ostream.h"
-#include <psapi.h>
#include <cstdio>
#include <fcntl.h>
#include <io.h>
#include <malloc.h>
#include <numeric>
+#include <psapi.h>
//===----------------------------------------------------------------------===//
//=== WARNING: Implementation here must contain only Win32 specific code
diff --git a/llvm/lib/Support/Windows/Signals.inc b/llvm/lib/Support/Windows/Signals.inc
index 32186bbe5160..32477de5184b 100644
--- a/llvm/lib/Support/Windows/Signals.inc
+++ b/llvm/lib/Support/Windows/Signals.inc
@@ -159,6 +159,10 @@ static fpSymInitialize fSymInitialize;
typedef BOOL (WINAPI *fpEnumerateLoadedModules)(HANDLE,PENUMLOADED_MODULES_CALLBACK64,PVOID);
static fpEnumerateLoadedModules fEnumerateLoadedModules;
+static bool isDebugHelpInitialized() {
+ return fStackWalk64 && fSymInitialize && fSymSetOptions && fMiniDumpWriteDump;
+}
+
static bool load64BitDebugHelp(void) {
HMODULE hLib = ::LoadLibraryW(L"Dbghelp.dll");
if (hLib) {
@@ -181,7 +185,7 @@ static bool load64BitDebugHelp(void) {
fEnumerateLoadedModules = (fpEnumerateLoadedModules)
::GetProcAddress(hLib, "EnumerateLoadedModules64");
}
- return fStackWalk64 && fSymInitialize && fSymSetOptions && fMiniDumpWriteDump;
+ return isDebugHelpInitialized();
}
using namespace llvm;
@@ -296,6 +300,12 @@ static bool findModulesAndOffsets(void **StackTrace, int Depth,
static void PrintStackTraceForThread(llvm::raw_ostream &OS, HANDLE hProcess,
HANDLE hThread, STACKFRAME64 &StackFrame,
CONTEXT *Context) {
+ // It's possible that DbgHelp.dll hasn't been loaded yet (e.g. if this
+ // function is called before the main program called `llvm::InitLLVM`).
+ // In this case just return, not stacktrace will be printed.
+ if (!isDebugHelpInitialized())
+ return;
+
// Initialize the symbol handler.
fSymSetOptions(SYMOPT_DEFERRED_LOADS | SYMOPT_LOAD_LINES);
fSymInitialize(hProcess, NULL, TRUE);
@@ -327,24 +337,24 @@ static void PrintStackTraceForThread(llvm::raw_ostream &OS, HANDLE hProcess,
OS << format("0x%08lX", static_cast<DWORD>(PC));
#endif
-// Print the parameters. Assume there are four.
-#if defined(_M_X64) || defined(_M_ARM64)
- OS << format(" (0x%016llX 0x%016llX 0x%016llX 0x%016llX)",
- StackFrame.Params[0], StackFrame.Params[1], StackFrame.Params[2],
- StackFrame.Params[3]);
-#elif defined(_M_IX86) || defined(_M_ARM)
- OS << format(" (0x%08lX 0x%08lX 0x%08lX 0x%08lX)",
- static_cast<DWORD>(StackFrame.Params[0]),
- static_cast<DWORD>(StackFrame.Params[1]),
- static_cast<DWORD>(StackFrame.Params[2]),
- static_cast<DWORD>(StackFrame.Params[3]));
-#endif
// Verify the PC belongs to a module in this process.
if (!fSymGetModuleBase64(hProcess, PC)) {
OS << " <unknown module>\n";
continue;
}
+ IMAGEHLP_MODULE64 M;
+ memset(&M, 0, sizeof(IMAGEHLP_MODULE64));
+ M.SizeOfStruct = sizeof(IMAGEHLP_MODULE64);
+ if (fSymGetModuleInfo64(hProcess, fSymGetModuleBase64(hProcess, PC), &M)) {
+ DWORD64 const disp = PC - M.BaseOfImage;
+ OS << format(", %s(0x%016llX) + 0x%llX byte(s)",
+ static_cast<char *>(M.ImageName), M.BaseOfImage,
+ static_cast<long long>(disp));
+ } else {
+ OS << ", <unknown module>";
+ }
+
// Print the symbol name.
char buffer[512];
IMAGEHLP_SYMBOL64 *symbol = reinterpret_cast<IMAGEHLP_SYMBOL64 *>(buffer);
@@ -359,20 +369,16 @@ static void PrintStackTraceForThread(llvm::raw_ostream &OS, HANDLE hProcess,
}
buffer[511] = 0;
- if (dwDisp > 0)
- OS << format(", %s() + 0x%llX bytes(s)", (const char*)symbol->Name,
- dwDisp);
- else
- OS << format(", %s", (const char*)symbol->Name);
+ OS << format(", %s() + 0x%llX byte(s)", static_cast<char *>(symbol->Name),
+ static_cast<long long>(dwDisp));
// Print the source file and line number information.
IMAGEHLP_LINE64 line = {};
DWORD dwLineDisp;
line.SizeOfStruct = sizeof(line);
if (fSymGetLineFromAddr64(hProcess, PC, &dwLineDisp, &line)) {
- OS << format(", %s, line %lu", line.FileName, line.LineNumber);
- if (dwLineDisp > 0)
- OS << format(" + 0x%lX byte(s)", dwLineDisp);
+ OS << format(", %s, line %lu + 0x%lX byte(s)", line.FileName,
+ line.LineNumber, dwLineDisp);
}
OS << '\n';
@@ -811,6 +817,12 @@ void sys::CleanupOnSignal(uintptr_t Context) {
static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) {
Cleanup(true);
+ // Write out the exception code.
+ if (ep && ep->ExceptionRecord)
+ llvm::errs() << format("Exception Code: 0x%08X",
+ ep->ExceptionRecord->ExceptionCode)
+ << "\n";
+
// We'll automatically write a Minidump file here to help diagnose
// the nasty sorts of crashes that aren't 100% reproducible from a set of
// inputs (or in the event that the user is unable or unwilling to provide a
diff --git a/llvm/lib/Support/Windows/Threading.inc b/llvm/lib/Support/Windows/Threading.inc
index 7b48ca8fb1fb..11f34817dbbf 100644
--- a/llvm/lib/Support/Windows/Threading.inc
+++ b/llvm/lib/Support/Windows/Threading.inc
@@ -27,8 +27,8 @@ namespace llvm {
HANDLE
llvm_execute_on_thread_impl(unsigned(__stdcall *ThreadFunc)(void *), void *Arg,
llvm::Optional<unsigned> StackSizeInBytes) {
- HANDLE hThread = (HANDLE)::_beginthreadex(
- NULL, StackSizeInBytes.getValueOr(0), ThreadFunc, Arg, 0, NULL);
+ HANDLE hThread = (HANDLE)::_beginthreadex(NULL, StackSizeInBytes.value_or(0),
+ ThreadFunc, Arg, 0, NULL);
if (!hThread) {
ReportLastErrorFatal("_beginthreadex failed");
@@ -120,8 +120,10 @@ SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) {
// End background processing mode. The system restores the resource scheduling
// priorities of the thread as they were before the thread entered background
// processing mode.
+ //
+ // FIXME: consider THREAD_PRIORITY_BELOW_NORMAL for Low
return SetThreadPriority(GetCurrentThread(),
- Priority == ThreadPriority::Background
+ Priority != ThreadPriority::Default
? THREAD_MODE_BACKGROUND_BEGIN
: THREAD_MODE_BACKGROUND_END)
? SetThreadPriorityResult::SUCCESS
diff --git a/llvm/lib/Support/WithColor.cpp b/llvm/lib/Support/WithColor.cpp
index b1aa709862d8..abc9fb3e5d60 100644
--- a/llvm/lib/Support/WithColor.cpp
+++ b/llvm/lib/Support/WithColor.cpp
@@ -33,6 +33,14 @@ struct CreateUseColor {
static ManagedStatic<cl::opt<cl::boolOrDefault>, CreateUseColor> UseColor;
void llvm::initWithColorOptions() { *UseColor; }
+static bool DefaultAutoDetectFunction(const raw_ostream &OS) {
+ return *UseColor == cl::BOU_UNSET ? OS.has_colors()
+ : *UseColor == cl::BOU_TRUE;
+}
+
+WithColor::AutoDetectFunctionType WithColor::AutoDetectFunction =
+ DefaultAutoDetectFunction;
+
WithColor::WithColor(raw_ostream &OS, HighlightColor Color, ColorMode Mode)
: OS(OS), Mode(Mode) {
// Detect color from terminal type unless the user passed the --color option.
@@ -127,8 +135,7 @@ bool WithColor::colorsEnabled() {
case ColorMode::Disable:
return false;
case ColorMode::Auto:
- return *UseColor == cl::BOU_UNSET ? OS.has_colors()
- : *UseColor == cl::BOU_TRUE;
+ return AutoDetectFunction(OS);
}
llvm_unreachable("All cases handled above.");
}
@@ -159,3 +166,12 @@ void WithColor::defaultWarningHandler(Error Warning) {
WithColor::warning() << Info.message() << '\n';
});
}
+
+WithColor::AutoDetectFunctionType WithColor::defaultAutoDetectFunction() {
+ return DefaultAutoDetectFunction;
+}
+
+void WithColor::setAutoDetectFunction(
+ AutoDetectFunctionType NewAutoDetectFunction) {
+ AutoDetectFunction = NewAutoDetectFunction;
+}
diff --git a/llvm/lib/Support/YAMLParser.cpp b/llvm/lib/Support/YAMLParser.cpp
index 200261d3ed5c..578ce228079b 100644
--- a/llvm/lib/Support/YAMLParser.cpp
+++ b/llvm/lib/Support/YAMLParser.cpp
@@ -392,6 +392,9 @@ private:
/// Pos is whitespace or a new line
bool isBlankOrBreak(StringRef::iterator Position);
+ /// Return true if the line is a line break, false otherwise.
+ bool isLineEmpty(StringRef Line);
+
/// Consume a single b-break[28] if it's present at the current position.
///
/// Return false if the code unit at the current position isn't a line break.
@@ -470,6 +473,18 @@ private:
/// Scan a block scalar starting with | or >.
bool scanBlockScalar(bool IsLiteral);
+ /// Scan a block scalar style indicator and header.
+ ///
+ /// Note: This is distinct from scanBlockScalarHeader to mirror the fact that
+ /// YAML does not consider the style indicator to be a part of the header.
+ ///
+ /// Return false if an error occurred.
+ bool scanBlockScalarIndicators(char &StyleIndicator, char &ChompingIndicator,
+ unsigned &IndentIndicator, bool &IsDone);
+
+ /// Scan a style indicator in a block scalar header.
+ char scanBlockStyleIndicator();
+
/// Scan a chomping indicator in a block scalar header.
char scanBlockChompingIndicator();
@@ -1034,6 +1049,13 @@ bool Scanner::isBlankOrBreak(StringRef::iterator Position) {
*Position == '\n';
}
+bool Scanner::isLineEmpty(StringRef Line) {
+ for (const auto *Position = Line.begin(); Position != Line.end(); ++Position)
+ if (!isBlankOrBreak(Position))
+ return false;
+ return true;
+}
+
bool Scanner::consumeLineBreakIfPresent() {
auto Next = skip_b_break(Current);
if (Next == Current)
@@ -1516,6 +1538,25 @@ bool Scanner::scanAliasOrAnchor(bool IsAlias) {
return true;
}
+bool Scanner::scanBlockScalarIndicators(char &StyleIndicator,
+ char &ChompingIndicator,
+ unsigned &IndentIndicator,
+ bool &IsDone) {
+ StyleIndicator = scanBlockStyleIndicator();
+ if (!scanBlockScalarHeader(ChompingIndicator, IndentIndicator, IsDone))
+ return false;
+ return true;
+}
+
+char Scanner::scanBlockStyleIndicator() {
+ char Indicator = ' ';
+ if (Current != End && (*Current == '>' || *Current == '|')) {
+ Indicator = *Current;
+ skip(1);
+ }
+ return Indicator;
+}
+
char Scanner::scanBlockChompingIndicator() {
char Indicator = ' ';
if (Current != End && (*Current == '+' || *Current == '-')) {
@@ -1654,19 +1695,19 @@ bool Scanner::scanBlockScalarIndent(unsigned BlockIndent,
}
bool Scanner::scanBlockScalar(bool IsLiteral) {
- // Eat '|' or '>'
assert(*Current == '|' || *Current == '>');
- skip(1);
-
+ char StyleIndicator;
char ChompingIndicator;
unsigned BlockIndent;
bool IsDone = false;
- if (!scanBlockScalarHeader(ChompingIndicator, BlockIndent, IsDone))
+ if (!scanBlockScalarIndicators(StyleIndicator, ChompingIndicator, BlockIndent,
+ IsDone))
return false;
if (IsDone)
return true;
+ bool IsFolded = StyleIndicator == '>';
- auto Start = Current;
+ const auto *Start = Current;
unsigned BlockExitIndent = Indent < 0 ? 0 : (unsigned)Indent;
unsigned LineBreaks = 0;
if (BlockIndent == 0) {
@@ -1687,6 +1728,22 @@ bool Scanner::scanBlockScalar(bool IsLiteral) {
auto LineStart = Current;
advanceWhile(&Scanner::skip_nb_char);
if (LineStart != Current) {
+ if (LineBreaks && IsFolded && !Scanner::isLineEmpty(Str)) {
+ // The folded style "folds" any single line break between content into a
+ // single space, except when that content is "empty" (only contains
+ // whitespace) in which case the line break is left as-is.
+ if (LineBreaks == 1) {
+ Str.append(LineBreaks,
+ isLineEmpty(StringRef(LineStart, Current - LineStart))
+ ? '\n'
+ : ' ');
+ }
+ // If we saw a single line break, we are completely replacing it and so
+ // want `LineBreaks == 0`. Otherwise this decrement accounts for the
+ // fact that the first line break is "trimmed", only being used to
+ // signal a sequence of line breaks which should not be folded.
+ LineBreaks--;
+ }
Str.append(LineBreaks, '\n');
Str.append(StringRef(LineStart, Current - LineStart));
LineBreaks = 0;
@@ -1840,11 +1897,11 @@ bool Scanner::fetchMoreTokens() {
Stream::Stream(StringRef Input, SourceMgr &SM, bool ShowColors,
std::error_code *EC)
- : scanner(new Scanner(Input, SM, ShowColors, EC)), CurrentDoc() {}
+ : scanner(new Scanner(Input, SM, ShowColors, EC)) {}
Stream::Stream(MemoryBufferRef InputBuffer, SourceMgr &SM, bool ShowColors,
std::error_code *EC)
- : scanner(new Scanner(InputBuffer, SM, ShowColors, EC)), CurrentDoc() {}
+ : scanner(new Scanner(InputBuffer, SM, ShowColors, EC)) {}
Stream::~Stream() = default;
diff --git a/llvm/lib/Support/Z3Solver.cpp b/llvm/lib/Support/Z3Solver.cpp
index 9485536d1312..b49d8d2afbb3 100644
--- a/llvm/lib/Support/Z3Solver.cpp
+++ b/llvm/lib/Support/Z3Solver.cpp
@@ -6,16 +6,18 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/Twine.h"
#include "llvm/Config/config.h"
#include "llvm/Support/SMTAPI.h"
-#include <set>
using namespace llvm;
#if LLVM_WITH_Z3
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+
+#include <set>
+
#include <z3.h>
namespace {
diff --git a/llvm/lib/Support/raw_ostream.cpp b/llvm/lib/Support/raw_ostream.cpp
index 69d4fe96bee8..98ceea3c3c7a 100644
--- a/llvm/lib/Support/raw_ostream.cpp
+++ b/llvm/lib/Support/raw_ostream.cpp
@@ -408,7 +408,7 @@ raw_ostream &raw_ostream::operator<<(const FormattedBytes &FB) {
const size_t Size = Bytes.size();
HexPrintStyle HPS = FB.Upper ? HexPrintStyle::Upper : HexPrintStyle::Lower;
uint64_t OffsetWidth = 0;
- if (FB.FirstByteOffset.hasValue()) {
+ if (FB.FirstByteOffset) {
// Figure out how many nibbles are needed to print the largest offset
// represented by this data set, so that we can align the offset field
// to the right width.
@@ -428,7 +428,7 @@ raw_ostream &raw_ostream::operator<<(const FormattedBytes &FB) {
while (!Bytes.empty()) {
indent(FB.IndentLevel);
- if (FB.FirstByteOffset.hasValue()) {
+ if (FB.FirstByteOffset) {
uint64_t Offset = FB.FirstByteOffset.getValue();
llvm::write_hex(*this, Offset + LineIndex, HPS, OffsetWidth);
*this << ": ";
diff --git a/llvm/lib/Support/regcomp.c b/llvm/lib/Support/regcomp.c
index ee2a1d87a267..24d01121820b 100644
--- a/llvm/lib/Support/regcomp.c
+++ b/llvm/lib/Support/regcomp.c
@@ -249,10 +249,10 @@ static char nuls[10]; /* place to point scanner in event of error */
*/
#define PEEK() (*p->next)
#define PEEK2() (*(p->next+1))
-#define MORE() (p->next < p->end)
-#define MORE2() (p->next+1 < p->end)
+#define MORE() (p->end - p->next > 0)
+#define MORE2() (p->end - p->next > 1)
#define SEE(c) (MORE() && PEEK() == (c))
-#define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
+#define SEETWO(a, b) (MORE2() && PEEK() == (a) && PEEK2() == (b))
#define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0)
#define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
#define NEXT() (p->next++)
@@ -800,15 +800,17 @@ p_bracket(struct parse *p)
int invert = 0;
/* Dept of Truly Sickening Special-Case Kludges */
- if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
- EMIT(OBOW, 0);
- NEXTn(6);
- return;
- }
- if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
- EMIT(OEOW, 0);
- NEXTn(6);
- return;
+ if (p->end - p->next > 5) {
+ if (strncmp(p->next, "[:<:]]", 6) == 0) {
+ EMIT(OBOW, 0);
+ NEXTn(6);
+ return;
+ }
+ if (strncmp(p->next, "[:>:]]", 6) == 0) {
+ EMIT(OEOW, 0);
+ NEXTn(6);
+ return;
+ }
}
if ((cs = allocset(p)) == NULL) {
diff --git a/llvm/lib/Support/regengine.inc b/llvm/lib/Support/regengine.inc
index 41787aff1242..02680e23ddb8 100644
--- a/llvm/lib/Support/regengine.inc
+++ b/llvm/lib/Support/regengine.inc
@@ -53,6 +53,7 @@
#define at sat
#define match smat
#define nope snope
+#define step_back sstep_back
#endif
#ifdef LNAMES
#define matcher lmatcher
@@ -65,6 +66,7 @@
#define at lat
#define match lmat
#define nope lnope
+#define step_back lstep_back
#endif
/* another structure passed up and down to avoid zillions of parameters */
@@ -288,6 +290,38 @@ matcher(struct re_guts *g, const char *string, size_t nmatch,
return(0);
}
+/* Step back from "stop" to a position where the strip startst..stopst might
+ * match. This can always conservatively return "stop - 1", but may return an
+ * earlier position if matches at later positions are impossible. */
+static const char *
+step_back(struct re_guts *g, const char *start, const char *stop, sopno startst,
+ sopno stopst)
+{
+ /* Always step back at least one character. */
+ assert(stop > start);
+ const char *res = stop - 1;
+
+ /* Check whether the strip startst..stropst starts with a fixed character,
+ * ignoring any closing parentheses. If not, return a conservative result. */
+ for (;;) {
+ if (startst >= stopst)
+ return res;
+ if (OP(g->strip[startst]) != ORPAREN)
+ break;
+ startst++;
+ }
+ if (OP(g->strip[startst]) != OCHAR)
+ return res;
+
+ /* Find the character that starts the following match. */
+ char ch = OPND(g->strip[startst]);
+ for (; res != start; --res) {
+ if (*res == ch)
+ break;
+ }
+ return res;
+}
+
/*
- dissect - figure out what matched what, no back references
*/
@@ -358,7 +392,7 @@ dissect(struct match *m, const char *start, const char *stop, sopno startst,
if (tail == stop)
break; /* yes! */
/* no -- try a shorter match for this one */
- stp = rest - 1;
+ stp = step_back(m->g, sp, rest, es, stopst);
assert(stp >= sp); /* it did work */
}
ssub = ss + 1;
@@ -383,7 +417,7 @@ dissect(struct match *m, const char *start, const char *stop, sopno startst,
if (tail == stop)
break; /* yes! */
/* no -- try a shorter match for this one */
- stp = rest - 1;
+ stp = step_back(m->g, sp, rest, es, stopst);
assert(stp >= sp); /* it did work */
}
ssub = ss + 1;
@@ -1032,3 +1066,4 @@ pchar(int ch)
#undef at
#undef match
#undef nope
+#undef step_back
diff --git a/llvm/lib/Support/xxhash.cpp b/llvm/lib/Support/xxhash.cpp
index e9dceed2c4ae..9a3f5faa336b 100644
--- a/llvm/lib/Support/xxhash.cpp
+++ b/llvm/lib/Support/xxhash.cpp
@@ -39,7 +39,6 @@
#include "llvm/Support/Endian.h"
#include <stdlib.h>
-#include <string.h>
using namespace llvm;
using namespace support;
diff --git a/llvm/lib/TableGen/Error.cpp b/llvm/lib/TableGen/Error.cpp
index 6104573b4b25..ebe9129ebaeb 100644
--- a/llvm/lib/TableGen/Error.cpp
+++ b/llvm/lib/TableGen/Error.cpp
@@ -157,8 +157,8 @@ void PrintFatalError(const RecordVal *RecVal, const Twine &Msg) {
// Check an assertion: Obtain the condition value and be sure it is true.
// If not, print a nonfatal error along with the message.
void CheckAssert(SMLoc Loc, Init *Condition, Init *Message) {
- auto *CondValue = dyn_cast_or_null<IntInit>(
- Condition->convertInitializerTo(IntRecTy::get()));
+ auto *CondValue = dyn_cast_or_null<IntInit>(Condition->convertInitializerTo(
+ IntRecTy::get(Condition->getRecordKeeper())));
if (!CondValue)
PrintError(Loc, "assert condition must of type bit, bits, or int.");
else if (!CondValue->getValue()) {
diff --git a/llvm/lib/TableGen/Parser.cpp b/llvm/lib/TableGen/Parser.cpp
new file mode 100644
index 000000000000..818ded19432b
--- /dev/null
+++ b/llvm/lib/TableGen/Parser.cpp
@@ -0,0 +1,39 @@
+//===- Parser.cpp - Top-Level TableGen Parser implementation --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/TableGen/Parser.h"
+#include "TGParser.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/TableGen/Record.h"
+
+using namespace llvm;
+
+bool llvm::TableGenParseFile(SourceMgr &InputSrcMgr, RecordKeeper &Records) {
+ // Initialize the global TableGen source manager by temporarily taking control
+ // of the input buffer in `SrcMgr`. This is kind of a hack, but allows for
+ // preserving TableGen's current awkward diagnostic behavior. If we can remove
+ // this reliance, we could drop all of this.
+ SrcMgr = SourceMgr();
+ SrcMgr.takeSourceBuffersFrom(InputSrcMgr);
+ SrcMgr.setIncludeDirs(InputSrcMgr.getIncludeDirs());
+ SrcMgr.setDiagHandler(InputSrcMgr.getDiagHandler(),
+ InputSrcMgr.getDiagContext());
+
+ // Setup the record keeper and try to parse the file.
+ auto *MainFileBuffer = SrcMgr.getMemoryBuffer(SrcMgr.getMainFileID());
+ Records.saveInputFilename(MainFileBuffer->getBufferIdentifier().str());
+
+ TGParser Parser(SrcMgr, /*Macros=*/None, Records);
+ bool ParseResult = Parser.ParseFile();
+
+ // After parsing, reclaim the source manager buffers from TableGen's global
+ // manager.
+ InputSrcMgr.takeSourceBuffersFrom(SrcMgr);
+ SrcMgr = SourceMgr();
+ return ParseResult;
+}
diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp
index 58d8c9936896..6c205104d569 100644
--- a/llvm/lib/TableGen/Record.cpp
+++ b/llvm/lib/TableGen/Record.cpp
@@ -24,7 +24,6 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TableGen/Error.h"
@@ -46,14 +45,17 @@ using namespace llvm;
namespace llvm {
namespace detail {
-/// This class contains all of the contextual static state of the Record
-/// classes. This allows for better lifetime management and control of the used
-/// static data.
-struct RecordContext {
- RecordContext()
- : AnyRecord(0), TrueBitInit(true, &SharedBitRecTy),
+/// This class represents the internal implementation of the RecordKeeper.
+/// It contains all of the contextual static state of the Record classes. It is
+/// kept out-of-line to simplify dependencies, and also make it easier for
+/// internal classes to access the uniquer state of the keeper.
+struct RecordKeeperImpl {
+ RecordKeeperImpl(RecordKeeper &RK)
+ : SharedBitRecTy(RK), SharedIntRecTy(RK), SharedStringRecTy(RK),
+ SharedDagRecTy(RK), AnyRecord(RK, 0), TheUnsetInit(RK),
+ TrueBitInit(true, &SharedBitRecTy),
FalseBitInit(false, &SharedBitRecTy), StringInitStringPool(Allocator),
- StringInitCodePool(Allocator), LastRecordID(0) {}
+ StringInitCodePool(Allocator), AnonCounter(0), LastRecordID(0) {}
BumpPtrAllocator Allocator;
std::vector<BitsRecTy *> SharedBitsRecTys;
@@ -77,6 +79,7 @@ struct RecordContext {
FoldingSet<TernOpInit> TheTernOpInitPool;
FoldingSet<FoldOpInit> TheFoldOpInitPool;
FoldingSet<IsAOpInit> TheIsAOpInitPool;
+ FoldingSet<ExistsOpInit> TheExistsOpInitPool;
DenseMap<std::pair<RecTy *, Init *>, VarInit *> TheVarInitPool;
DenseMap<std::pair<TypedInit *, unsigned>, VarBitInit *> TheVarBitInitPool;
DenseMap<std::pair<TypedInit *, unsigned>, VarListElementInit *>
@@ -85,14 +88,14 @@ struct RecordContext {
DenseMap<std::pair<Init *, StringInit *>, FieldInit *> TheFieldInitPool;
FoldingSet<CondOpInit> TheCondOpInitPool;
FoldingSet<DagInit> TheDagInitPool;
+ FoldingSet<RecordRecTy> RecordTypePool;
+ unsigned AnonCounter;
unsigned LastRecordID;
};
} // namespace detail
} // namespace llvm
-ManagedStatic<detail::RecordContext> Context;
-
//===----------------------------------------------------------------------===//
// Type implementations
//===----------------------------------------------------------------------===//
@@ -103,7 +106,7 @@ LLVM_DUMP_METHOD void RecTy::dump() const { print(errs()); }
ListRecTy *RecTy::getListTy() {
if (!ListTy)
- ListTy = new(Context->Allocator) ListRecTy(this);
+ ListTy = new (RK.getImpl().Allocator) ListRecTy(this);
return ListTy;
}
@@ -114,7 +117,9 @@ bool RecTy::typeIsConvertibleTo(const RecTy *RHS) const {
bool RecTy::typeIsA(const RecTy *RHS) const { return this == RHS; }
-BitRecTy *BitRecTy::get() { return &Context->SharedBitRecTy; }
+BitRecTy *BitRecTy::get(RecordKeeper &RK) {
+ return &RK.getImpl().SharedBitRecTy;
+}
bool BitRecTy::typeIsConvertibleTo(const RecTy *RHS) const{
if (RecTy::typeIsConvertibleTo(RHS) || RHS->getRecTyKind() == IntRecTyKind)
@@ -124,12 +129,13 @@ bool BitRecTy::typeIsConvertibleTo(const RecTy *RHS) const{
return false;
}
-BitsRecTy *BitsRecTy::get(unsigned Sz) {
- if (Sz >= Context->SharedBitsRecTys.size())
- Context->SharedBitsRecTys.resize(Sz + 1);
- BitsRecTy *&Ty = Context->SharedBitsRecTys[Sz];
+BitsRecTy *BitsRecTy::get(RecordKeeper &RK, unsigned Sz) {
+ detail::RecordKeeperImpl &RKImpl = RK.getImpl();
+ if (Sz >= RKImpl.SharedBitsRecTys.size())
+ RKImpl.SharedBitsRecTys.resize(Sz + 1);
+ BitsRecTy *&Ty = RKImpl.SharedBitsRecTys[Sz];
if (!Ty)
- Ty = new (Context->Allocator) BitsRecTy(Sz);
+ Ty = new (RKImpl.Allocator) BitsRecTy(RK, Sz);
return Ty;
}
@@ -150,14 +156,18 @@ bool BitsRecTy::typeIsA(const RecTy *RHS) const {
return false;
}
-IntRecTy *IntRecTy::get() { return &Context->SharedIntRecTy; }
+IntRecTy *IntRecTy::get(RecordKeeper &RK) {
+ return &RK.getImpl().SharedIntRecTy;
+}
bool IntRecTy::typeIsConvertibleTo(const RecTy *RHS) const {
RecTyKind kind = RHS->getRecTyKind();
return kind==BitRecTyKind || kind==BitsRecTyKind || kind==IntRecTyKind;
}
-StringRecTy *StringRecTy::get() { return &Context->SharedStringRecTy; }
+StringRecTy *StringRecTy::get(RecordKeeper &RK) {
+ return &RK.getImpl().SharedStringRecTy;
+}
std::string StringRecTy::getAsString() const {
return "string";
@@ -184,7 +194,9 @@ bool ListRecTy::typeIsA(const RecTy *RHS) const {
return false;
}
-DagRecTy *DagRecTy::get() { return &Context->SharedDagRecTy; }
+DagRecTy *DagRecTy::get(RecordKeeper &RK) {
+ return &RK.getImpl().SharedDagRecTy;
+}
std::string DagRecTy::getAsString() const {
return "dag";
@@ -197,12 +209,13 @@ static void ProfileRecordRecTy(FoldingSetNodeID &ID,
ID.AddPointer(R);
}
-RecordRecTy *RecordRecTy::get(ArrayRef<Record *> UnsortedClasses) {
+RecordRecTy *RecordRecTy::get(RecordKeeper &RK,
+ ArrayRef<Record *> UnsortedClasses) {
+ detail::RecordKeeperImpl &RKImpl = RK.getImpl();
if (UnsortedClasses.empty())
- return &Context->AnyRecord;
+ return &RKImpl.AnyRecord;
- FoldingSet<RecordRecTy> &ThePool =
- UnsortedClasses[0]->getRecords().RecordTypePool;
+ FoldingSet<RecordRecTy> &ThePool = RKImpl.RecordTypePool;
SmallVector<Record *, 4> Classes(UnsortedClasses.begin(),
UnsortedClasses.end());
@@ -227,14 +240,18 @@ RecordRecTy *RecordRecTy::get(ArrayRef<Record *> UnsortedClasses) {
}
#endif
- void *Mem = Context->Allocator.Allocate(
+ void *Mem = RKImpl.Allocator.Allocate(
totalSizeToAlloc<Record *>(Classes.size()), alignof(RecordRecTy));
- RecordRecTy *Ty = new(Mem) RecordRecTy(Classes.size());
+ RecordRecTy *Ty = new (Mem) RecordRecTy(RK, Classes.size());
std::uninitialized_copy(Classes.begin(), Classes.end(),
Ty->getTrailingObjects<Record *>());
ThePool.InsertNode(Ty, IP);
return Ty;
}
+RecordRecTy *RecordRecTy::get(Record *Class) {
+ assert(Class && "unexpected null class");
+ return get(Class->getRecords(), Class);
+}
void RecordRecTy::Profile(FoldingSetNodeID &ID) const {
ProfileRecordRecTy(ID, getClasses());
@@ -294,7 +311,7 @@ static RecordRecTy *resolveRecordTypes(RecordRecTy *T1, RecordRecTy *T2) {
}
}
- return RecordRecTy::get(CommonSuperClasses);
+ return RecordRecTy::get(T1->getRecordKeeper(), CommonSuperClasses);
}
RecTy *llvm::resolveTypes(RecTy *T1, RecTy *T2) {
@@ -333,7 +350,15 @@ void Init::anchor() {}
LLVM_DUMP_METHOD void Init::dump() const { return print(errs()); }
#endif
-UnsetInit *UnsetInit::get() { return &Context->TheUnsetInit; }
+RecordKeeper &Init::getRecordKeeper() const {
+ if (auto *TyInit = dyn_cast<TypedInit>(this))
+ return TyInit->getType()->getRecordKeeper();
+ return cast<UnsetInit>(this)->getRecordKeeper();
+}
+
+UnsetInit *UnsetInit::get(RecordKeeper &RK) {
+ return &RK.getImpl().TheUnsetInit;
+}
Init *UnsetInit::getCastTo(RecTy *Ty) const {
return const_cast<UnsetInit *>(this);
@@ -343,8 +368,8 @@ Init *UnsetInit::convertInitializerTo(RecTy *Ty) const {
return const_cast<UnsetInit *>(this);
}
-BitInit *BitInit::get(bool V) {
- return V ? &Context->TrueBitInit : &Context->FalseBitInit;
+BitInit *BitInit::get(RecordKeeper &RK, bool V) {
+ return V ? &RK.getImpl().TrueBitInit : &RK.getImpl().FalseBitInit;
}
Init *BitInit::convertInitializerTo(RecTy *Ty) const {
@@ -352,12 +377,12 @@ Init *BitInit::convertInitializerTo(RecTy *Ty) const {
return const_cast<BitInit *>(this);
if (isa<IntRecTy>(Ty))
- return IntInit::get(getValue());
+ return IntInit::get(getRecordKeeper(), getValue());
if (auto *BRT = dyn_cast<BitsRecTy>(Ty)) {
// Can only convert single bit.
if (BRT->getNumBits() == 1)
- return BitsInit::get(const_cast<BitInit *>(this));
+ return BitsInit::get(getRecordKeeper(), const_cast<BitInit *>(this));
}
return nullptr;
@@ -371,20 +396,21 @@ ProfileBitsInit(FoldingSetNodeID &ID, ArrayRef<Init *> Range) {
ID.AddPointer(I);
}
-BitsInit *BitsInit::get(ArrayRef<Init *> Range) {
+BitsInit *BitsInit::get(RecordKeeper &RK, ArrayRef<Init *> Range) {
FoldingSetNodeID ID;
ProfileBitsInit(ID, Range);
+ detail::RecordKeeperImpl &RKImpl = RK.getImpl();
void *IP = nullptr;
- if (BitsInit *I = Context->TheBitsInitPool.FindNodeOrInsertPos(ID, IP))
+ if (BitsInit *I = RKImpl.TheBitsInitPool.FindNodeOrInsertPos(ID, IP))
return I;
- void *Mem = Context->Allocator.Allocate(
- totalSizeToAlloc<Init *>(Range.size()), alignof(BitsInit));
- BitsInit *I = new(Mem) BitsInit(Range.size());
+ void *Mem = RKImpl.Allocator.Allocate(totalSizeToAlloc<Init *>(Range.size()),
+ alignof(BitsInit));
+ BitsInit *I = new (Mem) BitsInit(RK, Range.size());
std::uninitialized_copy(Range.begin(), Range.end(),
I->getTrailingObjects<Init *>());
- Context->TheBitsInitPool.InsertNode(I, IP);
+ RKImpl.TheBitsInitPool.InsertNode(I, IP);
return I;
}
@@ -412,7 +438,7 @@ Init *BitsInit::convertInitializerTo(RecTy *Ty) const {
Result |= static_cast<int64_t>(Bit->getValue()) << i;
else
return nullptr;
- return IntInit::get(Result);
+ return IntInit::get(getRecordKeeper(), Result);
}
return nullptr;
@@ -427,7 +453,7 @@ BitsInit::convertInitializerBitRange(ArrayRef<unsigned> Bits) const {
return nullptr;
NewBits[i] = getBit(Bits[i]);
}
- return BitsInit::get(NewBits);
+ return BitsInit::get(getRecordKeeper(), NewBits);
}
bool BitsInit::isConcrete() const {
@@ -482,15 +508,15 @@ Init *BitsInit::resolveReferences(Resolver &R) const {
}
if (Changed)
- return BitsInit::get(NewBits);
+ return BitsInit::get(getRecordKeeper(), NewBits);
return const_cast<BitsInit *>(this);
}
-IntInit *IntInit::get(int64_t V) {
- IntInit *&I = Context->TheIntInitPool[V];
+IntInit *IntInit::get(RecordKeeper &RK, int64_t V) {
+ IntInit *&I = RK.getImpl().TheIntInitPool[V];
if (!I)
- I = new (Context->Allocator) IntInit(V);
+ I = new (RK.getImpl().Allocator) IntInit(RK, V);
return I;
}
@@ -511,7 +537,7 @@ Init *IntInit::convertInitializerTo(RecTy *Ty) const {
if (isa<BitRecTy>(Ty)) {
int64_t Val = getValue();
if (Val != 0 && Val != 1) return nullptr; // Only accept 0 or 1 for a bit!
- return BitInit::get(Val != 0);
+ return BitInit::get(getRecordKeeper(), Val != 0);
}
if (auto *BRT = dyn_cast<BitsRecTy>(Ty)) {
@@ -522,9 +548,10 @@ Init *IntInit::convertInitializerTo(RecTy *Ty) const {
SmallVector<Init *, 16> NewBits(BRT->getNumBits());
for (unsigned i = 0; i != BRT->getNumBits(); ++i)
- NewBits[i] = BitInit::get(Value & ((i < 64) ? (1LL << i) : 0));
+ NewBits[i] =
+ BitInit::get(getRecordKeeper(), Value & ((i < 64) ? (1LL << i) : 0));
- return BitsInit::get(NewBits);
+ return BitsInit::get(getRecordKeeper(), NewBits);
}
return nullptr;
@@ -538,17 +565,18 @@ IntInit::convertInitializerBitRange(ArrayRef<unsigned> Bits) const {
if (Bits[i] >= 64)
return nullptr;
- NewBits[i] = BitInit::get(Value & (INT64_C(1) << Bits[i]));
+ NewBits[i] =
+ BitInit::get(getRecordKeeper(), Value & (INT64_C(1) << Bits[i]));
}
- return BitsInit::get(NewBits);
+ return BitsInit::get(getRecordKeeper(), NewBits);
}
-AnonymousNameInit *AnonymousNameInit::get(unsigned V) {
- return new (Context->Allocator) AnonymousNameInit(V);
+AnonymousNameInit *AnonymousNameInit::get(RecordKeeper &RK, unsigned V) {
+ return new (RK.getImpl().Allocator) AnonymousNameInit(RK, V);
}
StringInit *AnonymousNameInit::getNameInit() const {
- return StringInit::get(getAsString());
+ return StringInit::get(getRecordKeeper(), getAsString());
}
std::string AnonymousNameInit::getAsString() const {
@@ -565,12 +593,13 @@ Init *AnonymousNameInit::resolveReferences(Resolver &R) const {
return New;
}
-StringInit *StringInit::get(StringRef V, StringFormat Fmt) {
- auto &InitMap = Fmt == SF_String ? Context->StringInitStringPool
- : Context->StringInitCodePool;
+StringInit *StringInit::get(RecordKeeper &RK, StringRef V, StringFormat Fmt) {
+ detail::RecordKeeperImpl &RKImpl = RK.getImpl();
+ auto &InitMap = Fmt == SF_String ? RKImpl.StringInitStringPool
+ : RKImpl.StringInitCodePool;
auto &Entry = *InitMap.insert(std::make_pair(V, nullptr)).first;
if (!Entry.second)
- Entry.second = new (Context->Allocator) StringInit(Entry.getKey(), Fmt);
+ Entry.second = new (RKImpl.Allocator) StringInit(RK, Entry.getKey(), Fmt);
return Entry.second;
}
@@ -595,19 +624,20 @@ ListInit *ListInit::get(ArrayRef<Init *> Range, RecTy *EltTy) {
FoldingSetNodeID ID;
ProfileListInit(ID, Range, EltTy);
+ detail::RecordKeeperImpl &RK = EltTy->getRecordKeeper().getImpl();
void *IP = nullptr;
- if (ListInit *I = Context->TheListInitPool.FindNodeOrInsertPos(ID, IP))
+ if (ListInit *I = RK.TheListInitPool.FindNodeOrInsertPos(ID, IP))
return I;
assert(Range.empty() || !isa<TypedInit>(Range[0]) ||
cast<TypedInit>(Range[0])->getType()->typeIsConvertibleTo(EltTy));
- void *Mem = Context->Allocator.Allocate(
- totalSizeToAlloc<Init *>(Range.size()), alignof(ListInit));
+ void *Mem = RK.Allocator.Allocate(totalSizeToAlloc<Init *>(Range.size()),
+ alignof(ListInit));
ListInit *I = new (Mem) ListInit(Range.size(), EltTy);
std::uninitialized_copy(Range.begin(), Range.end(),
I->getTrailingObjects<Init *>());
- Context->TheListInitPool.InsertNode(I, IP);
+ RK.TheListInitPool.InsertNode(I, IP);
return I;
}
@@ -714,7 +744,7 @@ std::string ListInit::getAsString() const {
}
Init *OpInit::getBit(unsigned Bit) const {
- if (getType() == BitRecTy::get())
+ if (getType() == BitRecTy::get(getRecordKeeper()))
return const_cast<OpInit*>(this);
return VarBitInit::get(const_cast<OpInit*>(this), Bit);
}
@@ -730,12 +760,13 @@ UnOpInit *UnOpInit::get(UnaryOp Opc, Init *LHS, RecTy *Type) {
FoldingSetNodeID ID;
ProfileUnOpInit(ID, Opc, LHS, Type);
+ detail::RecordKeeperImpl &RK = Type->getRecordKeeper().getImpl();
void *IP = nullptr;
- if (UnOpInit *I = Context->TheUnOpInitPool.FindNodeOrInsertPos(ID, IP))
+ if (UnOpInit *I = RK.TheUnOpInitPool.FindNodeOrInsertPos(ID, IP))
return I;
- UnOpInit *I = new (Context->Allocator) UnOpInit(Opc, LHS, Type);
- Context->TheUnOpInitPool.InsertNode(I, IP);
+ UnOpInit *I = new (RK.Allocator) UnOpInit(Opc, LHS, Type);
+ RK.TheUnOpInitPool.InsertNode(I, IP);
return I;
}
@@ -744,6 +775,7 @@ void UnOpInit::Profile(FoldingSetNodeID &ID) const {
}
Init *UnOpInit::Fold(Record *CurRec, bool IsFinal) const {
+ RecordKeeper &RK = getRecordKeeper();
switch (getOpcode()) {
case CAST:
if (isa<StringRecTy>(getType())) {
@@ -751,11 +783,11 @@ Init *UnOpInit::Fold(Record *CurRec, bool IsFinal) const {
return LHSs;
if (DefInit *LHSd = dyn_cast<DefInit>(LHS))
- return StringInit::get(LHSd->getAsString());
+ return StringInit::get(RK, LHSd->getAsString());
- if (IntInit *LHSi =
- dyn_cast_or_null<IntInit>(LHS->convertInitializerTo(IntRecTy::get())))
- return StringInit::get(LHSi->getAsString());
+ if (IntInit *LHSi = dyn_cast_or_null<IntInit>(
+ LHS->convertInitializerTo(IntRecTy::get(RK))))
+ return StringInit::get(RK, LHSi->getAsString());
} else if (isa<RecordRecTy>(getType())) {
if (StringInit *Name = dyn_cast<StringInit>(LHS)) {
@@ -800,9 +832,9 @@ Init *UnOpInit::Fold(Record *CurRec, bool IsFinal) const {
break;
case NOT:
- if (IntInit *LHSi =
- dyn_cast_or_null<IntInit>(LHS->convertInitializerTo(IntRecTy::get())))
- return IntInit::get(LHSi->getValue() ? 0 : 1);
+ if (IntInit *LHSi = dyn_cast_or_null<IntInit>(
+ LHS->convertInitializerTo(IntRecTy::get(RK))))
+ return IntInit::get(RK, LHSi->getValue() ? 0 : 1);
break;
case HEAD:
@@ -823,20 +855,20 @@ Init *UnOpInit::Fold(Record *CurRec, bool IsFinal) const {
case SIZE:
if (ListInit *LHSl = dyn_cast<ListInit>(LHS))
- return IntInit::get(LHSl->size());
+ return IntInit::get(RK, LHSl->size());
if (DagInit *LHSd = dyn_cast<DagInit>(LHS))
- return IntInit::get(LHSd->arg_size());
+ return IntInit::get(RK, LHSd->arg_size());
if (StringInit *LHSs = dyn_cast<StringInit>(LHS))
- return IntInit::get(LHSs->getValue().size());
+ return IntInit::get(RK, LHSs->getValue().size());
break;
case EMPTY:
if (ListInit *LHSl = dyn_cast<ListInit>(LHS))
- return IntInit::get(LHSl->empty());
+ return IntInit::get(RK, LHSl->empty());
if (DagInit *LHSd = dyn_cast<DagInit>(LHS))
- return IntInit::get(LHSd->arg_empty());
+ return IntInit::get(RK, LHSd->arg_empty());
if (StringInit *LHSs = dyn_cast<StringInit>(LHS))
- return IntInit::get(LHSs->getValue().empty());
+ return IntInit::get(RK, LHSs->getValue().empty());
break;
case GETDAGOP:
@@ -893,12 +925,13 @@ BinOpInit *BinOpInit::get(BinaryOp Opc, Init *LHS, Init *RHS, RecTy *Type) {
FoldingSetNodeID ID;
ProfileBinOpInit(ID, Opc, LHS, RHS, Type);
+ detail::RecordKeeperImpl &RK = LHS->getRecordKeeper().getImpl();
void *IP = nullptr;
- if (BinOpInit *I = Context->TheBinOpInitPool.FindNodeOrInsertPos(ID, IP))
+ if (BinOpInit *I = RK.TheBinOpInitPool.FindNodeOrInsertPos(ID, IP))
return I;
- BinOpInit *I = new (Context->Allocator) BinOpInit(Opc, LHS, RHS, Type);
- Context->TheBinOpInitPool.InsertNode(I, IP);
+ BinOpInit *I = new (RK.Allocator) BinOpInit(Opc, LHS, RHS, Type);
+ RK.TheBinOpInitPool.InsertNode(I, IP);
return I;
}
@@ -910,15 +943,15 @@ static StringInit *ConcatStringInits(const StringInit *I0,
const StringInit *I1) {
SmallString<80> Concat(I0->getValue());
Concat.append(I1->getValue());
- return StringInit::get(Concat,
- StringInit::determineFormat(I0->getFormat(),
- I1->getFormat()));
+ return StringInit::get(
+ I0->getRecordKeeper(), Concat,
+ StringInit::determineFormat(I0->getFormat(), I1->getFormat()));
}
static StringInit *interleaveStringList(const ListInit *List,
const StringInit *Delim) {
if (List->size() == 0)
- return StringInit::get("");
+ return StringInit::get(List->getRecordKeeper(), "");
StringInit *Element = dyn_cast<StringInit>(List->getElement(0));
if (!Element)
return nullptr;
@@ -933,30 +966,29 @@ static StringInit *interleaveStringList(const ListInit *List,
Result.append(Element->getValue());
Fmt = StringInit::determineFormat(Fmt, Element->getFormat());
}
- return StringInit::get(Result, Fmt);
+ return StringInit::get(List->getRecordKeeper(), Result, Fmt);
}
static StringInit *interleaveIntList(const ListInit *List,
const StringInit *Delim) {
+ RecordKeeper &RK = List->getRecordKeeper();
if (List->size() == 0)
- return StringInit::get("");
- IntInit *Element =
- dyn_cast_or_null<IntInit>(List->getElement(0)
- ->convertInitializerTo(IntRecTy::get()));
+ return StringInit::get(RK, "");
+ IntInit *Element = dyn_cast_or_null<IntInit>(
+ List->getElement(0)->convertInitializerTo(IntRecTy::get(RK)));
if (!Element)
return nullptr;
SmallString<80> Result(Element->getAsString());
for (unsigned I = 1, E = List->size(); I < E; ++I) {
Result.append(Delim->getValue());
- IntInit *Element =
- dyn_cast_or_null<IntInit>(List->getElement(I)
- ->convertInitializerTo(IntRecTy::get()));
+ IntInit *Element = dyn_cast_or_null<IntInit>(
+ List->getElement(I)->convertInitializerTo(IntRecTy::get(RK)));
if (!Element)
return nullptr;
Result.append(Element->getAsString());
}
- return StringInit::get(Result);
+ return StringInit::get(RK, Result);
}
Init *BinOpInit::getStrConcat(Init *I0, Init *I1) {
@@ -964,7 +996,8 @@ Init *BinOpInit::getStrConcat(Init *I0, Init *I1) {
if (const StringInit *I0s = dyn_cast<StringInit>(I0))
if (const StringInit *I1s = dyn_cast<StringInit>(I1))
return ConcatStringInits(I0s, I1s);
- return BinOpInit::get(BinOpInit::STRCONCAT, I0, I1, StringRecTy::get());
+ return BinOpInit::get(BinOpInit::STRCONCAT, I0, I1,
+ StringRecTy::get(I0->getRecordKeeper()));
}
static ListInit *ConcatListInits(const ListInit *LHS,
@@ -1003,7 +1036,7 @@ Init *BinOpInit::Fold(Record *CurRec) const {
}
Init *Op = LOp ? LOp : ROp;
if (!Op)
- Op = UnsetInit::get();
+ Op = UnsetInit::get(getRecordKeeper());
SmallVector<Init*, 8> Args;
SmallVector<StringInit*, 8> ArgNames;
@@ -1067,10 +1100,10 @@ Init *BinOpInit::Fold(Record *CurRec) const {
case GE:
case GT: {
// First see if we have two bit, bits, or int.
- IntInit *LHSi =
- dyn_cast_or_null<IntInit>(LHS->convertInitializerTo(IntRecTy::get()));
- IntInit *RHSi =
- dyn_cast_or_null<IntInit>(RHS->convertInitializerTo(IntRecTy::get()));
+ IntInit *LHSi = dyn_cast_or_null<IntInit>(
+ LHS->convertInitializerTo(IntRecTy::get(getRecordKeeper())));
+ IntInit *RHSi = dyn_cast_or_null<IntInit>(
+ RHS->convertInitializerTo(IntRecTy::get(getRecordKeeper())));
if (LHSi && RHSi) {
bool Result;
@@ -1083,7 +1116,7 @@ Init *BinOpInit::Fold(Record *CurRec) const {
case GT: Result = LHSi->getValue() > RHSi->getValue(); break;
default: llvm_unreachable("unhandled comparison");
}
- return BitInit::get(Result);
+ return BitInit::get(getRecordKeeper(), Result);
}
// Next try strings.
@@ -1101,7 +1134,7 @@ Init *BinOpInit::Fold(Record *CurRec) const {
case GT: Result = LHSs->getValue() > RHSs->getValue(); break;
default: llvm_unreachable("unhandled comparison");
}
- return BitInit::get(Result);
+ return BitInit::get(getRecordKeeper(), Result);
}
// Finally, !eq and !ne can be used with records.
@@ -1109,8 +1142,8 @@ Init *BinOpInit::Fold(Record *CurRec) const {
DefInit *LHSd = dyn_cast<DefInit>(LHS);
DefInit *RHSd = dyn_cast<DefInit>(RHS);
if (LHSd && RHSd)
- return BitInit::get((getOpcode() == EQ) ? LHSd == RHSd
- : LHSd != RHSd);
+ return BitInit::get(getRecordKeeper(),
+ (getOpcode() == EQ) ? LHSd == RHSd : LHSd != RHSd);
}
break;
@@ -1138,10 +1171,10 @@ Init *BinOpInit::Fold(Record *CurRec) const {
case SHL:
case SRA:
case SRL: {
- IntInit *LHSi =
- dyn_cast_or_null<IntInit>(LHS->convertInitializerTo(IntRecTy::get()));
- IntInit *RHSi =
- dyn_cast_or_null<IntInit>(RHS->convertInitializerTo(IntRecTy::get()));
+ IntInit *LHSi = dyn_cast_or_null<IntInit>(
+ LHS->convertInitializerTo(IntRecTy::get(getRecordKeeper())));
+ IntInit *RHSi = dyn_cast_or_null<IntInit>(
+ RHS->convertInitializerTo(IntRecTy::get(getRecordKeeper())));
if (LHSi && RHSi) {
int64_t LHSv = LHSi->getValue(), RHSv = RHSi->getValue();
int64_t Result;
@@ -1157,7 +1190,7 @@ Init *BinOpInit::Fold(Record *CurRec) const {
case SRA: Result = LHSv >> RHSv; break;
case SRL: Result = (uint64_t)LHSv >> (uint64_t)RHSv; break;
}
- return IntInit::get(Result);
+ return IntInit::get(getRecordKeeper(), Result);
}
break;
}
@@ -1218,12 +1251,13 @@ TernOpInit *TernOpInit::get(TernaryOp Opc, Init *LHS, Init *MHS, Init *RHS,
FoldingSetNodeID ID;
ProfileTernOpInit(ID, Opc, LHS, MHS, RHS, Type);
+ detail::RecordKeeperImpl &RK = LHS->getRecordKeeper().getImpl();
void *IP = nullptr;
- if (TernOpInit *I = Context->TheTernOpInitPool.FindNodeOrInsertPos(ID, IP))
+ if (TernOpInit *I = RK.TheTernOpInitPool.FindNodeOrInsertPos(ID, IP))
return I;
- TernOpInit *I = new (Context->Allocator) TernOpInit(Opc, LHS, MHS, RHS, Type);
- Context->TheTernOpInitPool.InsertNode(I, IP);
+ TernOpInit *I = new (RK.Allocator) TernOpInit(Opc, LHS, MHS, RHS, Type);
+ RK.TheTernOpInitPool.InsertNode(I, IP);
return I;
}
@@ -1296,8 +1330,9 @@ static Init *FilterHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type,
Init *Include = ItemApply(LHS, Item, RHS, CurRec);
if (!Include)
return nullptr;
- if (IntInit *IncludeInt = dyn_cast_or_null<IntInit>(
- Include->convertInitializerTo(IntRecTy::get()))) {
+ if (IntInit *IncludeInt =
+ dyn_cast_or_null<IntInit>(Include->convertInitializerTo(
+ IntRecTy::get(LHS->getRecordKeeper())))) {
if (IncludeInt->getValue())
NewList.push_back(Item);
} else {
@@ -1311,6 +1346,7 @@ static Init *FilterHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type,
}
Init *TernOpInit::Fold(Record *CurRec) const {
+ RecordKeeper &RK = getRecordKeeper();
switch (getOpcode()) {
case SUBST: {
DefInit *LHSd = dyn_cast<DefInit>(LHS);
@@ -1351,7 +1387,7 @@ Init *TernOpInit::Fold(Record *CurRec) const {
idx = found + MHSs->getValue().size();
}
- return StringInit::get(Val);
+ return StringInit::get(RK, Val);
}
break;
}
@@ -1370,7 +1406,7 @@ Init *TernOpInit::Fold(Record *CurRec) const {
case IF: {
if (IntInit *LHSi = dyn_cast_or_null<IntInit>(
- LHS->convertInitializerTo(IntRecTy::get()))) {
+ LHS->convertInitializerTo(IntRecTy::get(RK)))) {
if (LHSi->getValue())
return MHS;
return RHS;
@@ -1391,8 +1427,8 @@ Init *TernOpInit::Fold(Record *CurRec) const {
SmallVector<std::pair<Init *, StringInit *>, 8> Children;
unsigned Size = MHSl ? MHSl->size() : RHSl->size();
for (unsigned i = 0; i != Size; ++i) {
- Init *Node = MHSl ? MHSl->getElement(i) : UnsetInit::get();
- Init *Name = RHSl ? RHSl->getElement(i) : UnsetInit::get();
+ Init *Node = MHSl ? MHSl->getElement(i) : UnsetInit::get(RK);
+ Init *Name = RHSl ? RHSl->getElement(i) : UnsetInit::get(RK);
if (!isa<StringInit>(Name) && !isa<UnsetInit>(Name))
return const_cast<TernOpInit *>(this);
Children.emplace_back(Node, dyn_cast<StringInit>(Name));
@@ -1417,7 +1453,7 @@ Init *TernOpInit::Fold(Record *CurRec) const {
std::to_string(Start));
if (Length < 0)
PrintError(CurRec->getLoc(), "!substr length must be nonnegative");
- return StringInit::get(LHSs->getValue().substr(Start, Length),
+ return StringInit::get(RK, LHSs->getValue().substr(Start, Length),
LHSs->getFormat());
}
break;
@@ -1437,8 +1473,8 @@ Init *TernOpInit::Fold(Record *CurRec) const {
std::to_string(Start));
auto I = LHSs->getValue().find(MHSs->getValue(), Start);
if (I == std::string::npos)
- return IntInit::get(-1);
- return IntInit::get(I);
+ return IntInit::get(RK, -1);
+ return IntInit::get(RK, I);
}
break;
}
@@ -1452,7 +1488,7 @@ Init *TernOpInit::resolveReferences(Resolver &R) const {
if (getOpcode() == IF && lhs != LHS) {
if (IntInit *Value = dyn_cast_or_null<IntInit>(
- lhs->convertInitializerTo(IntRecTy::get()))) {
+ lhs->convertInitializerTo(IntRecTy::get(getRecordKeeper())))) {
// Short-circuit
if (Value->getValue())
return MHS->resolveReferences(R);
@@ -1506,17 +1542,16 @@ static void ProfileFoldOpInit(FoldingSetNodeID &ID, Init *Start, Init *List,
FoldOpInit *FoldOpInit::get(Init *Start, Init *List, Init *A, Init *B,
Init *Expr, RecTy *Type) {
-
FoldingSetNodeID ID;
ProfileFoldOpInit(ID, Start, List, A, B, Expr, Type);
+ detail::RecordKeeperImpl &RK = Start->getRecordKeeper().getImpl();
void *IP = nullptr;
- if (FoldOpInit *I = Context->TheFoldOpInitPool.FindNodeOrInsertPos(ID, IP))
+ if (FoldOpInit *I = RK.TheFoldOpInitPool.FindNodeOrInsertPos(ID, IP))
return I;
- FoldOpInit *I =
- new (Context->Allocator) FoldOpInit(Start, List, A, B, Expr, Type);
- Context->TheFoldOpInitPool.InsertNode(I, IP);
+ FoldOpInit *I = new (RK.Allocator) FoldOpInit(Start, List, A, B, Expr, Type);
+ RK.TheFoldOpInitPool.InsertNode(I, IP);
return I;
}
@@ -1575,12 +1610,13 @@ IsAOpInit *IsAOpInit::get(RecTy *CheckType, Init *Expr) {
FoldingSetNodeID ID;
ProfileIsAOpInit(ID, CheckType, Expr);
+ detail::RecordKeeperImpl &RK = Expr->getRecordKeeper().getImpl();
void *IP = nullptr;
- if (IsAOpInit *I = Context->TheIsAOpInitPool.FindNodeOrInsertPos(ID, IP))
+ if (IsAOpInit *I = RK.TheIsAOpInitPool.FindNodeOrInsertPos(ID, IP))
return I;
- IsAOpInit *I = new (Context->Allocator) IsAOpInit(CheckType, Expr);
- Context->TheIsAOpInitPool.InsertNode(I, IP);
+ IsAOpInit *I = new (RK.Allocator) IsAOpInit(CheckType, Expr);
+ RK.TheIsAOpInitPool.InsertNode(I, IP);
return I;
}
@@ -1592,17 +1628,17 @@ Init *IsAOpInit::Fold() const {
if (TypedInit *TI = dyn_cast<TypedInit>(Expr)) {
// Is the expression type known to be (a subclass of) the desired type?
if (TI->getType()->typeIsConvertibleTo(CheckType))
- return IntInit::get(1);
+ return IntInit::get(getRecordKeeper(), 1);
if (isa<RecordRecTy>(CheckType)) {
// If the target type is not a subclass of the expression type, or if
// the expression has fully resolved to a record, we know that it can't
// be of the required type.
if (!CheckType->typeIsConvertibleTo(TI->getType()) || isa<DefInit>(Expr))
- return IntInit::get(0);
+ return IntInit::get(getRecordKeeper(), 0);
} else {
// We treat non-record types as not castable.
- return IntInit::get(0);
+ return IntInit::get(getRecordKeeper(), 0);
}
}
return const_cast<IsAOpInit *>(this);
@@ -1625,6 +1661,81 @@ std::string IsAOpInit::getAsString() const {
.str();
}
+static void ProfileExistsOpInit(FoldingSetNodeID &ID, RecTy *CheckType,
+ Init *Expr) {
+ ID.AddPointer(CheckType);
+ ID.AddPointer(Expr);
+}
+
+ExistsOpInit *ExistsOpInit::get(RecTy *CheckType, Init *Expr) {
+ FoldingSetNodeID ID;
+ ProfileExistsOpInit(ID, CheckType, Expr);
+
+ detail::RecordKeeperImpl &RK = Expr->getRecordKeeper().getImpl();
+ void *IP = nullptr;
+ if (ExistsOpInit *I = RK.TheExistsOpInitPool.FindNodeOrInsertPos(ID, IP))
+ return I;
+
+ ExistsOpInit *I = new (RK.Allocator) ExistsOpInit(CheckType, Expr);
+ RK.TheExistsOpInitPool.InsertNode(I, IP);
+ return I;
+}
+
+void ExistsOpInit::Profile(FoldingSetNodeID &ID) const {
+ ProfileExistsOpInit(ID, CheckType, Expr);
+}
+
+Init *ExistsOpInit::Fold(Record *CurRec, bool IsFinal) const {
+ if (StringInit *Name = dyn_cast<StringInit>(Expr)) {
+ if (!CurRec && !IsFinal)
+ return const_cast<ExistsOpInit *>(this);
+
+ // Self-references are allowed, but their resolution is delayed until
+ // the final resolve to ensure that we get the correct type for them.
+ auto *Anonymous = dyn_cast<AnonymousNameInit>(CurRec->getNameInit());
+ if (Name == CurRec->getNameInit() ||
+ (Anonymous && Name == Anonymous->getNameInit())) {
+ if (!IsFinal)
+ return const_cast<ExistsOpInit *>(this);
+
+ // No doubt that there exists a record, so we should check if types are
+ // compatiable.
+ return IntInit::get(getRecordKeeper(),
+ CurRec->getType()->typeIsA(CheckType));
+ }
+
+ // Look up all defined records to see if we can find one.
+ Record *D = CheckType->getRecordKeeper().getDef(Name->getValue());
+ if (!D) {
+ if (IsFinal)
+ return IntInit::get(getRecordKeeper(), 0);
+ return const_cast<ExistsOpInit *>(this);
+ }
+
+ // Check if types are compatiable.
+ return IntInit::get(getRecordKeeper(),
+ DefInit::get(D)->getType()->typeIsA(CheckType));
+ }
+ return const_cast<ExistsOpInit *>(this);
+}
+
+Init *ExistsOpInit::resolveReferences(Resolver &R) const {
+ Init *NewExpr = Expr->resolveReferences(R);
+ if (Expr != NewExpr || R.isFinal())
+ return get(CheckType, NewExpr)->Fold(R.getCurrentRecord(), R.isFinal());
+ return const_cast<ExistsOpInit *>(this);
+}
+
+Init *ExistsOpInit::getBit(unsigned Bit) const {
+ return VarBitInit::get(const_cast<ExistsOpInit *>(this), Bit);
+}
+
+std::string ExistsOpInit::getAsString() const {
+ return (Twine("!exists<") + CheckType->getAsString() + ">(" +
+ Expr->getAsString() + ")")
+ .str();
+}
+
RecTy *TypedInit::getFieldType(StringInit *FieldName) const {
if (RecordRecTy *RecordType = dyn_cast<RecordRecTy>(getType())) {
for (Record *Rec : RecordType->getClasses()) {
@@ -1642,7 +1753,7 @@ TypedInit::convertInitializerTo(RecTy *Ty) const {
if (isa<BitRecTy>(getType()) && isa<BitsRecTy>(Ty) &&
cast<BitsRecTy>(Ty)->getNumBits() == 1)
- return BitsInit::get({const_cast<TypedInit *>(this)});
+ return BitsInit::get(getRecordKeeper(), {const_cast<TypedInit *>(this)});
return nullptr;
}
@@ -1660,7 +1771,7 @@ Init *TypedInit::convertInitializerBitRange(ArrayRef<unsigned> Bits) const {
NewBits.push_back(VarBitInit::get(const_cast<TypedInit *>(this), Bit));
}
- return BitsInit::get(NewBits);
+ return BitsInit::get(getRecordKeeper(), NewBits);
}
Init *TypedInit::getCastTo(RecTy *Ty) const {
@@ -1698,14 +1809,15 @@ Init *TypedInit::convertInitListSlice(ArrayRef<unsigned> Elements) const {
VarInit *VarInit::get(StringRef VN, RecTy *T) {
- Init *Value = StringInit::get(VN);
+ Init *Value = StringInit::get(T->getRecordKeeper(), VN);
return VarInit::get(Value, T);
}
VarInit *VarInit::get(Init *VN, RecTy *T) {
- VarInit *&I = Context->TheVarInitPool[std::make_pair(T, VN)];
+ detail::RecordKeeperImpl &RK = T->getRecordKeeper().getImpl();
+ VarInit *&I = RK.TheVarInitPool[std::make_pair(T, VN)];
if (!I)
- I = new (Context->Allocator) VarInit(VN, T);
+ I = new (RK.Allocator) VarInit(VN, T);
return I;
}
@@ -1715,7 +1827,7 @@ StringRef VarInit::getName() const {
}
Init *VarInit::getBit(unsigned Bit) const {
- if (getType() == BitRecTy::get())
+ if (getType() == BitRecTy::get(getRecordKeeper()))
return const_cast<VarInit*>(this);
return VarBitInit::get(const_cast<VarInit*>(this), Bit);
}
@@ -1727,9 +1839,10 @@ Init *VarInit::resolveReferences(Resolver &R) const {
}
VarBitInit *VarBitInit::get(TypedInit *T, unsigned B) {
- VarBitInit *&I = Context->TheVarBitInitPool[std::make_pair(T, B)];
+ detail::RecordKeeperImpl &RK = T->getRecordKeeper().getImpl();
+ VarBitInit *&I = RK.TheVarBitInitPool[std::make_pair(T, B)];
if (!I)
- I = new(Context->Allocator) VarBitInit(T, B);
+ I = new (RK.Allocator) VarBitInit(T, B);
return I;
}
@@ -1746,10 +1859,10 @@ Init *VarBitInit::resolveReferences(Resolver &R) const {
}
VarListElementInit *VarListElementInit::get(TypedInit *T, unsigned E) {
- VarListElementInit *&I =
- Context->TheVarListElementInitPool[std::make_pair(T, E)];
+ detail::RecordKeeperImpl &RK = T->getRecordKeeper().getImpl();
+ VarListElementInit *&I = RK.TheVarListElementInitPool[std::make_pair(T, E)];
if (!I)
- I = new (Context->Allocator) VarListElementInit(T, E);
+ I = new (RK.Allocator) VarListElementInit(T, E);
return I;
}
@@ -1771,7 +1884,7 @@ Init *VarListElementInit::resolveReferences(Resolver &R) const {
}
Init *VarListElementInit::getBit(unsigned Bit) const {
- if (getType() == BitRecTy::get())
+ if (getType() == BitRecTy::get(getRecordKeeper()))
return const_cast<VarListElementInit*>(this);
return VarBitInit::get(const_cast<VarListElementInit*>(this), Bit);
}
@@ -1808,20 +1921,25 @@ static void ProfileVarDefInit(FoldingSetNodeID &ID,
ID.AddPointer(I);
}
+VarDefInit::VarDefInit(Record *Class, unsigned N)
+ : TypedInit(IK_VarDefInit, RecordRecTy::get(Class)), Class(Class),
+ NumArgs(N) {}
+
VarDefInit *VarDefInit::get(Record *Class, ArrayRef<Init *> Args) {
FoldingSetNodeID ID;
ProfileVarDefInit(ID, Class, Args);
+ detail::RecordKeeperImpl &RK = Class->getRecords().getImpl();
void *IP = nullptr;
- if (VarDefInit *I = Context->TheVarDefInitPool.FindNodeOrInsertPos(ID, IP))
+ if (VarDefInit *I = RK.TheVarDefInitPool.FindNodeOrInsertPos(ID, IP))
return I;
- void *Mem = Context->Allocator.Allocate(totalSizeToAlloc<Init *>(Args.size()),
- alignof(VarDefInit));
+ void *Mem = RK.Allocator.Allocate(totalSizeToAlloc<Init *>(Args.size()),
+ alignof(VarDefInit));
VarDefInit *I = new (Mem) VarDefInit(Class, Args.size());
std::uninitialized_copy(Args.begin(), Args.end(),
I->getTrailingObjects<Init *>());
- Context->TheVarDefInitPool.InsertNode(I, IP);
+ RK.TheVarDefInitPool.InsertNode(I, IP);
return I;
}
@@ -1927,14 +2045,15 @@ std::string VarDefInit::getAsString() const {
}
FieldInit *FieldInit::get(Init *R, StringInit *FN) {
- FieldInit *&I = Context->TheFieldInitPool[std::make_pair(R, FN)];
+ detail::RecordKeeperImpl &RK = R->getRecordKeeper().getImpl();
+ FieldInit *&I = RK.TheFieldInitPool[std::make_pair(R, FN)];
if (!I)
- I = new (Context->Allocator) FieldInit(R, FN);
+ I = new (RK.Allocator) FieldInit(R, FN);
return I;
}
Init *FieldInit::getBit(unsigned Bit) const {
- if (getType() == BitRecTy::get())
+ if (getType() == BitRecTy::get(getRecordKeeper()))
return const_cast<FieldInit*>(this);
return VarBitInit::get(const_cast<FieldInit*>(this), Bit);
}
@@ -1992,20 +2111,20 @@ void CondOpInit::Profile(FoldingSetNodeID &ID) const {
ValType);
}
-CondOpInit *
-CondOpInit::get(ArrayRef<Init *> CondRange,
- ArrayRef<Init *> ValRange, RecTy *Ty) {
+CondOpInit *CondOpInit::get(ArrayRef<Init *> CondRange,
+ ArrayRef<Init *> ValRange, RecTy *Ty) {
assert(CondRange.size() == ValRange.size() &&
"Number of conditions and values must match!");
FoldingSetNodeID ID;
ProfileCondOpInit(ID, CondRange, ValRange, Ty);
+ detail::RecordKeeperImpl &RK = Ty->getRecordKeeper().getImpl();
void *IP = nullptr;
- if (CondOpInit *I = Context->TheCondOpInitPool.FindNodeOrInsertPos(ID, IP))
+ if (CondOpInit *I = RK.TheCondOpInitPool.FindNodeOrInsertPos(ID, IP))
return I;
- void *Mem = Context->Allocator.Allocate(
+ void *Mem = RK.Allocator.Allocate(
totalSizeToAlloc<Init *>(2 * CondRange.size()), alignof(BitsInit));
CondOpInit *I = new(Mem) CondOpInit(CondRange.size(), Ty);
@@ -2013,7 +2132,7 @@ CondOpInit::get(ArrayRef<Init *> CondRange,
I->getTrailingObjects<Init *>());
std::uninitialized_copy(ValRange.begin(), ValRange.end(),
I->getTrailingObjects<Init *>()+CondRange.size());
- Context->TheCondOpInitPool.InsertNode(I, IP);
+ RK.TheCondOpInitPool.InsertNode(I, IP);
return I;
}
@@ -2041,16 +2160,18 @@ Init *CondOpInit::resolveReferences(Resolver &R) const {
}
Init *CondOpInit::Fold(Record *CurRec) const {
+ RecordKeeper &RK = getRecordKeeper();
for ( unsigned i = 0; i < NumConds; ++i) {
Init *Cond = getCond(i);
Init *Val = getVal(i);
if (IntInit *CondI = dyn_cast_or_null<IntInit>(
- Cond->convertInitializerTo(IntRecTy::get()))) {
+ Cond->convertInitializerTo(IntRecTy::get(RK)))) {
if (CondI->getValue())
return Val->convertInitializerTo(getValType());
- } else
- return const_cast<CondOpInit *>(this);
+ } else {
+ return const_cast<CondOpInit *>(this);
+ }
}
PrintFatalError(CurRec->getLoc(),
@@ -2120,11 +2241,12 @@ DagInit *DagInit::get(Init *V, StringInit *VN, ArrayRef<Init *> ArgRange,
FoldingSetNodeID ID;
ProfileDagInit(ID, V, VN, ArgRange, NameRange);
+ detail::RecordKeeperImpl &RK = V->getRecordKeeper().getImpl();
void *IP = nullptr;
- if (DagInit *I = Context->TheDagInitPool.FindNodeOrInsertPos(ID, IP))
+ if (DagInit *I = RK.TheDagInitPool.FindNodeOrInsertPos(ID, IP))
return I;
- void *Mem = Context->Allocator.Allocate(
+ void *Mem = RK.Allocator.Allocate(
totalSizeToAlloc<Init *, StringInit *>(ArgRange.size(), NameRange.size()),
alignof(BitsInit));
DagInit *I = new (Mem) DagInit(V, VN, ArgRange.size(), NameRange.size());
@@ -2132,7 +2254,7 @@ DagInit *DagInit::get(Init *V, StringInit *VN, ArrayRef<Init *> ArgRange,
I->getTrailingObjects<Init *>());
std::uninitialized_copy(NameRange.begin(), NameRange.end(),
I->getTrailingObjects<StringInit *>());
- Context->TheDagInitPool.InsertNode(I, IP);
+ RK.TheDagInitPool.InsertNode(I, IP);
return I;
}
@@ -2209,7 +2331,7 @@ std::string DagInit::getAsString() const {
RecordVal::RecordVal(Init *N, RecTy *T, FieldKind K)
: Name(N), TyAndKind(T, K) {
- setValue(UnsetInit::get());
+ setValue(UnsetInit::get(N->getRecordKeeper()));
assert(Value && "Cannot create unset value for current type!");
}
@@ -2217,7 +2339,7 @@ RecordVal::RecordVal(Init *N, RecTy *T, FieldKind K)
// a source location.
RecordVal::RecordVal(Init *N, SMLoc Loc, RecTy *T, FieldKind K)
: Name(N), Loc(Loc), TyAndKind(T, K) {
- setValue(UnsetInit::get());
+ setValue(UnsetInit::get(N->getRecordKeeper()));
assert(Value && "Cannot create unset value for current type!");
}
@@ -2226,7 +2348,7 @@ StringRef RecordVal::getName() const {
}
std::string RecordVal::getPrintType() const {
- if (getType() == StringRecTy::get()) {
+ if (getType() == StringRecTy::get(getRecordKeeper())) {
if (auto *StrInit = dyn_cast<StringInit>(Value)) {
if (StrInit->hasCodeFormat())
return "code";
@@ -2252,7 +2374,7 @@ bool RecordVal::setValue(Init *V) {
Bits.reserve(BTy->getNumBits());
for (unsigned I = 0, E = BTy->getNumBits(); I < E; ++I)
Bits.push_back(Value->getBit(I));
- Value = BitsInit::get(Bits);
+ Value = BitsInit::get(V->getRecordKeeper(), Bits);
}
}
}
@@ -2277,7 +2399,7 @@ bool RecordVal::setValue(Init *V, SMLoc NewLoc) {
Bits.reserve(BTy->getNumBits());
for (unsigned I = 0, E = BTy->getNumBits(); I < E; ++I)
Bits.push_back(Value->getBit(I));
- Value = BitsInit::get(Bits);
+ Value = BitsInit::get(getRecordKeeper(), Bits);
}
}
}
@@ -2313,16 +2435,20 @@ void Record::checkName() {
RecordRecTy *Record::getType() {
SmallVector<Record *, 4> DirectSCs;
getDirectSuperClasses(DirectSCs);
- return RecordRecTy::get(DirectSCs);
+ return RecordRecTy::get(TrackedRecords, DirectSCs);
}
DefInit *Record::getDefInit() {
- if (!CorrespondingDefInit)
- CorrespondingDefInit = new (Context->Allocator) DefInit(this);
+ if (!CorrespondingDefInit) {
+ CorrespondingDefInit =
+ new (TrackedRecords.getImpl().Allocator) DefInit(this);
+ }
return CorrespondingDefInit;
}
-unsigned Record::getNewUID() { return Context->LastRecordID++; }
+unsigned Record::getNewUID(RecordKeeper &RK) {
+ return RK.getImpl().LastRecordID++;
+}
void Record::setName(Init *NewName) {
Name = NewName;
@@ -2472,7 +2598,7 @@ Init *Record::getValueInit(StringRef FieldName) const {
StringRef Record::getValueAsString(StringRef FieldName) const {
llvm::Optional<StringRef> S = getValueAsOptionalString(FieldName);
- if (!S.hasValue())
+ if (!S)
PrintFatalError(getLoc(), "Record `" + getName() +
"' does not have a field named `" + FieldName + "'!\n");
return S.getValue();
@@ -2671,6 +2797,10 @@ void Record::checkUnusedTemplateArgs() {
}
}
+RecordKeeper::RecordKeeper()
+ : Impl(std::make_unique<detail::RecordKeeperImpl>(*this)) {}
+RecordKeeper::~RecordKeeper() = default;
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void RecordKeeper::dump() const { errs() << *this; }
#endif
@@ -2689,7 +2819,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const RecordKeeper &RK) {
/// GetNewAnonymousName - Generate a unique anonymous name that can be used as
/// an identifier.
Init *RecordKeeper::getNewAnonymousName() {
- return AnonymousNameInit::get(AnonCounter++);
+ return AnonymousNameInit::get(*this, getImpl().AnonCounter++);
}
// These functions implement the phase timing facility. Starting a timer
@@ -2733,11 +2863,10 @@ void RecordKeeper::stopBackendTimer() {
}
}
-// We cache the record vectors for single classes. Many backends request
-// the same vectors multiple times.
-std::vector<Record *> RecordKeeper::getAllDerivedDefinitions(
- StringRef ClassName) const {
-
+std::vector<Record *>
+RecordKeeper::getAllDerivedDefinitions(StringRef ClassName) const {
+ // We cache the record vectors for single classes. Many backends request
+ // the same vectors multiple times.
auto Pair = ClassRecordsMap.try_emplace(ClassName);
if (Pair.second)
Pair.first->second = getAllDerivedDefinitions(makeArrayRef(ClassName));
@@ -2768,6 +2897,12 @@ std::vector<Record *> RecordKeeper::getAllDerivedDefinitions(
return Defs;
}
+std::vector<Record *>
+RecordKeeper::getAllDerivedDefinitionsIfDefined(StringRef ClassName) const {
+ return getClass(ClassName) ? getAllDerivedDefinitions(ClassName)
+ : std::vector<Record *>();
+}
+
Init *MapResolver::resolve(Init *VarName) {
auto It = Map.find(VarName);
if (It == Map.end())
diff --git a/llvm/lib/TableGen/TGLexer.cpp b/llvm/lib/TableGen/TGLexer.cpp
index 25079fe33edb..2a4ee4473b56 100644
--- a/llvm/lib/TableGen/TGLexer.cpp
+++ b/llvm/lib/TableGen/TGLexer.cpp
@@ -55,10 +55,8 @@ TGLexer::TGLexer(SourceMgr &SM, ArrayRef<std::string> Macros) : SrcMgr(SM) {
std::make_unique<std::vector<PreprocessorControlDesc>>());
// Put all macros defined in the command line into the DefinedMacros set.
- std::for_each(Macros.begin(), Macros.end(),
- [this](const std::string &MacroName) {
- DefinedMacros.insert(MacroName);
- });
+ for (const std::string &MacroName : Macros)
+ DefinedMacros.insert(MacroName);
}
SMLoc TGLexer::getLoc() const {
@@ -586,6 +584,7 @@ tgtok::TokKind TGLexer::LexExclaim() {
.Case("find", tgtok::XFind)
.Cases("setdagop", "setop", tgtok::XSetDagOp) // !setop is deprecated.
.Cases("getdagop", "getop", tgtok::XGetDagOp) // !getop is deprecated.
+ .Case("exists", tgtok::XExists)
.Default(tgtok::Error);
return Kind != tgtok::Error ? Kind : ReturnError(Start-1, "Unknown operator");
diff --git a/llvm/lib/TableGen/TGLexer.h b/llvm/lib/TableGen/TGLexer.h
index 857ba09782e8..459ba0f4af64 100644
--- a/llvm/lib/TableGen/TGLexer.h
+++ b/llvm/lib/TableGen/TGLexer.h
@@ -56,6 +56,7 @@ namespace tgtok {
XListConcat, XListSplat, XStrConcat, XInterleave, XSubstr, XFind, XCast,
XSubst, XForEach, XFilter, XFoldl, XHead, XTail, XSize, XEmpty, XIf,
XCond, XEq, XIsA, XDag, XNe, XLe, XLt, XGe, XGt, XSetDagOp, XGetDagOp,
+ XExists,
// Boolean literals.
TrueVal, FalseVal,
@@ -337,7 +338,7 @@ private:
//
// The method returns true upon reaching the first non-whitespace symbol
// or EOF, CurPtr is set to point to this symbol. The method returns false,
- // if an error occured during skipping of a C-style comment.
+ // if an error occurred during skipping of a C-style comment.
bool prepSkipLineBegin();
// Skip any whitespaces or comments after a preprocessing directive.
@@ -345,7 +346,7 @@ private:
// or end of the file. If there is a multiline C-style comment
// after the preprocessing directive, the method skips
// the comment, so the final CurPtr may point to one of the next lines.
- // The method returns false, if an error occured during skipping
+ // The method returns false, if an error occurred during skipping
// C- or C++-style comment, or a non-whitespace symbol appears
// after the preprocessing directive.
//
diff --git a/llvm/lib/TableGen/TGParser.cpp b/llvm/lib/TableGen/TGParser.cpp
index 90646a0c642d..acf93dc3d792 100644
--- a/llvm/lib/TableGen/TGParser.cpp
+++ b/llvm/lib/TableGen/TGParser.cpp
@@ -112,14 +112,15 @@ static void checkConcrete(Record &R) {
/// Return an Init with a qualifier prefix referring
/// to CurRec's name.
-static Init *QualifyName(Record &CurRec, MultiClass *CurMultiClass,
- Init *Name, StringRef Scoper) {
- Init *NewName =
- BinOpInit::getStrConcat(CurRec.getNameInit(), StringInit::get(Scoper));
+static Init *QualifyName(Record &CurRec, MultiClass *CurMultiClass, Init *Name,
+ StringRef Scoper) {
+ RecordKeeper &RK = CurRec.getRecords();
+ Init *NewName = BinOpInit::getStrConcat(CurRec.getNameInit(),
+ StringInit::get(RK, Scoper));
NewName = BinOpInit::getStrConcat(NewName, Name);
if (CurMultiClass && Scoper != "::") {
Init *Prefix = BinOpInit::getStrConcat(CurMultiClass->Rec.getNameInit(),
- StringInit::get("::"));
+ StringInit::get(RK, "::"));
NewName = BinOpInit::getStrConcat(Prefix, NewName);
}
@@ -131,7 +132,8 @@ static Init *QualifyName(Record &CurRec, MultiClass *CurMultiClass,
/// Return the qualified version of the implicit 'NAME' template argument.
static Init *QualifiedNameOfImplicitName(Record &Rec,
MultiClass *MC = nullptr) {
- return QualifyName(Rec, MC, StringInit::get("NAME"), MC ? "::" : ":");
+ return QualifyName(Rec, MC, StringInit::get(Rec.getRecords(), "NAME"),
+ MC ? "::" : ":");
}
static Init *QualifiedNameOfImplicitName(MultiClass *MC) {
@@ -187,7 +189,7 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, Init *ValName,
"' is not a bits type");
// Convert the incoming value to a bits type of the appropriate size...
- Init *BI = V->getCastTo(BitsRecTy::get(BitList.size()));
+ Init *BI = V->getCastTo(BitsRecTy::get(Records, BitList.size()));
if (!BI)
return Error(Loc, "Initializer is not compatible with bit range");
@@ -206,7 +208,7 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, Init *ValName,
if (!NewBits[i])
NewBits[i] = CurVal->getBit(i);
- V = BitsInit::get(NewBits);
+ V = BitsInit::get(Records, NewBits);
}
if (RV->setValue(V, Loc)) {
@@ -262,8 +264,8 @@ bool TGParser::AddSubClass(Record *CurRec, SubClassReference &SubClass) {
Init *Name;
if (CurRec->isClass())
- Name =
- VarInit::get(QualifiedNameOfImplicitName(*CurRec), StringRecTy::get());
+ Name = VarInit::get(QualifiedNameOfImplicitName(*CurRec),
+ StringRecTy::get(Records));
else
Name = CurRec->getNameInit();
R.set(QualifiedNameOfImplicitName(*SC), Name);
@@ -333,9 +335,9 @@ bool TGParser::AddSubMultiClass(MultiClass *CurMC,
}
}
- TemplateArgs.emplace_back(
- QualifiedNameOfImplicitName(SMC),
- VarInit::get(QualifiedNameOfImplicitName(CurMC), StringRecTy::get()));
+ TemplateArgs.emplace_back(QualifiedNameOfImplicitName(SMC),
+ VarInit::get(QualifiedNameOfImplicitName(CurMC),
+ StringRecTy::get(Records)));
// Add all of the defs in the subclass into the current multiclass.
return resolve(SMC->Entries, TemplateArgs, false, &CurMC->Entries);
@@ -540,7 +542,7 @@ Init *TGParser::ParseObjectName(MultiClass *CurMultiClass) {
// These are all of the tokens that can begin an object body.
// Some of these can also begin values but we disallow those cases
// because they are unlikely to be useful.
- return UnsetInit::get();
+ return UnsetInit::get(Records);
default:
break;
}
@@ -549,7 +551,7 @@ Init *TGParser::ParseObjectName(MultiClass *CurMultiClass) {
if (CurMultiClass)
CurRec = &CurMultiClass->Rec;
- Init *Name = ParseValue(CurRec, StringRecTy::get(), ParseNameMode);
+ Init *Name = ParseValue(CurRec, StringRecTy::get(Records), ParseNameMode);
if (!Name)
return nullptr;
@@ -558,8 +560,8 @@ Init *TGParser::ParseObjectName(MultiClass *CurMultiClass) {
HasReferenceResolver R(NameStr);
Name->resolveReferences(R);
if (!R.found())
- Name = BinOpInit::getStrConcat(VarInit::get(NameStr, StringRecTy::get()),
- Name);
+ Name = BinOpInit::getStrConcat(
+ VarInit::get(NameStr, StringRecTy::get(Records)), Name);
}
return Name;
@@ -812,12 +814,21 @@ RecTy *TGParser::ParseType() {
switch (Lex.getCode()) {
default: TokError("Unknown token when expecting a type"); return nullptr;
case tgtok::String:
- case tgtok::Code: Lex.Lex(); return StringRecTy::get();
- case tgtok::Bit: Lex.Lex(); return BitRecTy::get();
- case tgtok::Int: Lex.Lex(); return IntRecTy::get();
- case tgtok::Dag: Lex.Lex(); return DagRecTy::get();
+ case tgtok::Code:
+ Lex.Lex();
+ return StringRecTy::get(Records);
+ case tgtok::Bit:
+ Lex.Lex();
+ return BitRecTy::get(Records);
+ case tgtok::Int:
+ Lex.Lex();
+ return IntRecTy::get(Records);
+ case tgtok::Dag:
+ Lex.Lex();
+ return DagRecTy::get(Records);
case tgtok::Id:
- if (Record *R = ParseClassID()) return RecordRecTy::get(R);
+ if (Record *R = ParseClassID())
+ return RecordRecTy::get(R);
TokError("unknown class name");
return nullptr;
case tgtok::Bits: {
@@ -835,7 +846,7 @@ RecTy *TGParser::ParseType() {
return nullptr;
}
Lex.Lex(); // Eat '>'
- return BitsRecTy::get(Val);
+ return BitsRecTy::get(Records, Val);
}
case tgtok::List: {
if (Lex.Lex() != tgtok::less) { // Eat 'bits'
@@ -878,7 +889,7 @@ Init *TGParser::ParseIDValue(Record *CurRec, StringInit *Name, SMLoc NameLoc,
RV->setUsed(true);
return VarInit::get(TemplateArgName, RV->getType());
} else if (Name->getValue() == "NAME") {
- return VarInit::get(TemplateArgName, StringRecTy::get());
+ return VarInit::get(TemplateArgName, StringRecTy::get(Records));
}
}
@@ -947,7 +958,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
case tgtok::XNOT:
Lex.Lex(); // eat the operation
Code = UnOpInit::NOT;
- Type = IntRecTy::get();
+ Type = IntRecTy::get(Records);
break;
case tgtok::XHead:
Lex.Lex(); // eat the operation
@@ -960,12 +971,12 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
case tgtok::XSize:
Lex.Lex();
Code = UnOpInit::SIZE;
- Type = IntRecTy::get();
+ Type = IntRecTy::get(Records);
break;
case tgtok::XEmpty:
Lex.Lex(); // eat the operation
Code = UnOpInit::EMPTY;
- Type = IntRecTy::get();
+ Type = IntRecTy::get(Records);
break;
case tgtok::XGetDagOp:
Lex.Lex(); // eat the operation
@@ -985,7 +996,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
// but keep parsing, to consume the operand
}
} else {
- Type = RecordRecTy::get({});
+ Type = RecordRecTy::get(Records, {});
}
Code = UnOpInit::GETDAGOP;
break;
@@ -1085,6 +1096,52 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
return (IsAOpInit::get(Type, LHS))->Fold();
}
+ case tgtok::XExists: {
+ // Value ::= !exists '<' Type '>' '(' Value ')'
+ Lex.Lex(); // eat the operation
+
+ RecTy *Type = ParseOperatorType();
+ if (!Type)
+ return nullptr;
+
+ if (!consume(tgtok::l_paren)) {
+ TokError("expected '(' after type of !exists");
+ return nullptr;
+ }
+
+ SMLoc ExprLoc = Lex.getLoc();
+ Init *Expr = ParseValue(CurRec);
+ if (!Expr)
+ return nullptr;
+
+ TypedInit *ExprType = dyn_cast<TypedInit>(Expr);
+ if (!ExprType) {
+ Error(ExprLoc, "expected string type argument in !exists operator");
+ return nullptr;
+ }
+
+ RecordRecTy *RecType = dyn_cast<RecordRecTy>(ExprType->getType());
+ if (RecType) {
+ Error(ExprLoc,
+ "expected string type argument in !exists operator, please "
+ "use !isa instead");
+ return nullptr;
+ }
+
+ StringRecTy *SType = dyn_cast<StringRecTy>(ExprType->getType());
+ if (!SType) {
+ Error(ExprLoc, "expected string type argument in !exists operator");
+ return nullptr;
+ }
+
+ if (!consume(tgtok::r_paren)) {
+ TokError("expected ')' in !exists");
+ return nullptr;
+ }
+
+ return (ExistsOpInit::get(Type, Expr))->Fold(CurRec);
+ }
+
case tgtok::XConcat:
case tgtok::XADD:
case tgtok::XSUB:
@@ -1143,8 +1200,8 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
llvm_unreachable("Unhandled code!");
case tgtok::XConcat:
case tgtok::XSetDagOp:
- Type = DagRecTy::get();
- ArgType = DagRecTy::get();
+ Type = DagRecTy::get(Records);
+ ArgType = DagRecTy::get(Records);
break;
case tgtok::XAND:
case tgtok::XOR:
@@ -1155,8 +1212,8 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
case tgtok::XADD:
case tgtok::XSUB:
case tgtok::XMUL:
- Type = IntRecTy::get();
- ArgType = IntRecTy::get();
+ Type = IntRecTy::get(Records);
+ ArgType = IntRecTy::get(Records);
break;
case tgtok::XEq:
case tgtok::XNe:
@@ -1164,7 +1221,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
case tgtok::XLt:
case tgtok::XGe:
case tgtok::XGt:
- Type = BitRecTy::get();
+ Type = BitRecTy::get(Records);
// ArgType for the comparison operators is not yet known.
break;
case tgtok::XListConcat:
@@ -1175,11 +1232,11 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
// Can't do any typechecking until we parse the first argument.
break;
case tgtok::XStrConcat:
- Type = StringRecTy::get();
- ArgType = StringRecTy::get();
+ Type = StringRecTy::get(Records);
+ ArgType = StringRecTy::get(Records);
break;
case tgtok::XInterleave:
- Type = StringRecTy::get();
+ Type = StringRecTy::get(Records);
// The first argument type is not yet known.
}
@@ -1253,9 +1310,9 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
break;
case BinOpInit::EQ:
case BinOpInit::NE:
- if (!ArgType->typeIsConvertibleTo(IntRecTy::get()) &&
- !ArgType->typeIsConvertibleTo(StringRecTy::get()) &&
- !ArgType->typeIsConvertibleTo(RecordRecTy::get({}))) {
+ if (!ArgType->typeIsConvertibleTo(IntRecTy::get(Records)) &&
+ !ArgType->typeIsConvertibleTo(StringRecTy::get(Records)) &&
+ !ArgType->typeIsConvertibleTo(RecordRecTy::get(Records, {}))) {
Error(InitLoc, Twine("expected bit, bits, int, string, or record; "
"got value of type '") + ArgType->getAsString() +
"'");
@@ -1266,8 +1323,8 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
case BinOpInit::LT:
case BinOpInit::GE:
case BinOpInit::GT:
- if (!ArgType->typeIsConvertibleTo(IntRecTy::get()) &&
- !ArgType->typeIsConvertibleTo(StringRecTy::get())) {
+ if (!ArgType->typeIsConvertibleTo(IntRecTy::get(Records)) &&
+ !ArgType->typeIsConvertibleTo(StringRecTy::get(Records))) {
Error(InitLoc, Twine("expected bit, bits, int, or string; "
"got value of type '") + ArgType->getAsString() +
"'");
@@ -1277,8 +1334,9 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
case BinOpInit::INTERLEAVE:
switch (InitList.size()) {
case 1: // First argument must be a list of strings or integers.
- if (ArgType != StringRecTy::get()->getListTy() &&
- !ArgType->typeIsConvertibleTo(IntRecTy::get()->getListTy())) {
+ if (ArgType != StringRecTy::get(Records)->getListTy() &&
+ !ArgType->typeIsConvertibleTo(
+ IntRecTy::get(Records)->getListTy())) {
Error(InitLoc, Twine("expected list of string, int, bits, or bit; "
"got value of type '") +
ArgType->getAsString() + "'");
@@ -1323,7 +1381,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
case BinOpInit::SETDAGOP:
// After parsing the first dag argument, switch to expecting
// a record, with no restriction on its superclasses.
- ArgType = RecordRecTy::get({});
+ ArgType = RecordRecTy::get(Records, {});
break;
default:
break;
@@ -1383,7 +1441,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
default: llvm_unreachable("Unhandled code!");
case tgtok::XDag:
Code = TernOpInit::DAG;
- Type = DagRecTy::get();
+ Type = DagRecTy::get(Records);
ItemType = nullptr;
break;
case tgtok::XIf:
@@ -1445,7 +1503,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
Error(RHSLoc, "could not determine type of the name list in !dag");
return nullptr;
}
- if (RHSt && StringRecTy::get()->getListTy() != RHSt->getType()) {
+ if (RHSt && StringRecTy::get(Records)->getListTy() != RHSt->getType()) {
Error(RHSLoc, Twine("expected list<string>, got type '") +
RHSt->getType()->getAsString() + "'");
return nullptr;
@@ -1465,16 +1523,16 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
if (TypedInit *MHSt = dyn_cast<TypedInit>(MHS))
MHSTy = MHSt->getType();
if (BitsInit *MHSbits = dyn_cast<BitsInit>(MHS))
- MHSTy = BitsRecTy::get(MHSbits->getNumBits());
+ MHSTy = BitsRecTy::get(Records, MHSbits->getNumBits());
if (isa<BitInit>(MHS))
- MHSTy = BitRecTy::get();
+ MHSTy = BitRecTy::get(Records);
if (TypedInit *RHSt = dyn_cast<TypedInit>(RHS))
RHSTy = RHSt->getType();
if (BitsInit *RHSbits = dyn_cast<BitsInit>(RHS))
- RHSTy = BitsRecTy::get(RHSbits->getNumBits());
+ RHSTy = BitsRecTy::get(Records, RHSbits->getNumBits());
if (isa<BitInit>(RHS))
- RHSTy = BitRecTy::get();
+ RHSTy = BitRecTy::get(Records);
// For UnsetInit, it's typed from the other hand.
if (isa<UnsetInit>(MHS))
@@ -1569,7 +1627,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
return nullptr;
}
- Init *A = StringInit::get(Lex.getCurStrVal());
+ Init *A = StringInit::get(Records, Lex.getCurStrVal());
if (CurRec && CurRec->getValue(A)) {
TokError((Twine("left !foldl variable '") + A->getAsString() +
"' already defined")
@@ -1587,7 +1645,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
return nullptr;
}
- Init *B = StringInit::get(Lex.getCurStrVal());
+ Init *B = StringInit::get(Records, Lex.getCurStrVal());
if (CurRec && CurRec->getValue(B)) {
TokError((Twine("right !foldl variable '") + B->getAsString() +
"' already defined")
@@ -1679,7 +1737,7 @@ RecTy *TGParser::ParseOperatorType() {
/// Substr ::= !substr(string, start-int [, length-int]) => string
Init *TGParser::ParseOperationSubstr(Record *CurRec, RecTy *ItemType) {
TernOpInit::TernaryOp Code = TernOpInit::SUBSTR;
- RecTy *Type = StringRecTy::get();
+ RecTy *Type = StringRecTy::get(Records);
Lex.Lex(); // eat the operation
@@ -1710,7 +1768,7 @@ Init *TGParser::ParseOperationSubstr(Record *CurRec, RecTy *ItemType) {
if (!RHS)
return nullptr;
} else {
- RHS = IntInit::get(std::numeric_limits<int64_t>::max());
+ RHS = IntInit::get(Records, std::numeric_limits<int64_t>::max());
}
if (!consume(tgtok::r_paren)) {
@@ -1767,7 +1825,7 @@ Init *TGParser::ParseOperationSubstr(Record *CurRec, RecTy *ItemType) {
/// Substr ::= !find(string, string [, start-int]) => int
Init *TGParser::ParseOperationFind(Record *CurRec, RecTy *ItemType) {
TernOpInit::TernaryOp Code = TernOpInit::FIND;
- RecTy *Type = IntRecTy::get();
+ RecTy *Type = IntRecTy::get(Records);
Lex.Lex(); // eat the operation
@@ -1798,7 +1856,7 @@ Init *TGParser::ParseOperationFind(Record *CurRec, RecTy *ItemType) {
if (!RHS)
return nullptr;
} else {
- RHS = IntInit::get(0);
+ RHS = IntInit::get(Records, 0);
}
if (!consume(tgtok::r_paren)) {
@@ -1868,7 +1926,7 @@ Init *TGParser::ParseOperationForEachFilter(Record *CurRec, RecTy *ItemType) {
return nullptr;
}
- Init *LHS = StringInit::get(Lex.getCurStrVal());
+ Init *LHS = StringInit::get(Records, Lex.getCurStrVal());
Lex.Lex(); // eat the ID.
if (CurRec && CurRec->getValue(LHS)) {
@@ -1908,7 +1966,7 @@ Init *TGParser::ParseOperationForEachFilter(Record *CurRec, RecTy *ItemType) {
if (ListRecTy *OutListTy = dyn_cast<ListRecTy>(ItemType)) {
ExprEltType = (Operation == tgtok::XForEach)
? OutListTy->getElementType()
- : IntRecTy::get();
+ : IntRecTy::get(Records);
} else {
Error(OpLoc,
"expected value of type '" +
@@ -2028,9 +2086,9 @@ Init *TGParser::ParseOperationCond(Record *CurRec, RecTy *ItemType) {
if (TypedInit *Vt = dyn_cast<TypedInit>(V))
VTy = Vt->getType();
if (BitsInit *Vbits = dyn_cast<BitsInit>(V))
- VTy = BitsRecTy::get(Vbits->getNumBits());
+ VTy = BitsRecTy::get(Records, Vbits->getNumBits());
if (isa<BitInit>(V))
- VTy = BitRecTy::get();
+ VTy = BitRecTy::get(Records);
if (Type == nullptr) {
if (!isa<UnsetInit>(V))
@@ -2084,23 +2142,23 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
default: TokError("Unknown or reserved token when parsing a value"); break;
case tgtok::TrueVal:
- R = IntInit::get(1);
+ R = IntInit::get(Records, 1);
Lex.Lex();
break;
case tgtok::FalseVal:
- R = IntInit::get(0);
+ R = IntInit::get(Records, 0);
Lex.Lex();
break;
case tgtok::IntVal:
- R = IntInit::get(Lex.getCurIntVal());
+ R = IntInit::get(Records, Lex.getCurIntVal());
Lex.Lex();
break;
case tgtok::BinaryIntVal: {
auto BinaryVal = Lex.getCurBinaryIntVal();
SmallVector<Init*, 16> Bits(BinaryVal.second);
for (unsigned i = 0, e = BinaryVal.second; i != e; ++i)
- Bits[i] = BitInit::get(BinaryVal.first & (1LL << i));
- R = BitsInit::get(Bits);
+ Bits[i] = BitInit::get(Records, BinaryVal.first & (1LL << i));
+ R = BitsInit::get(Records, Bits);
Lex.Lex();
break;
}
@@ -2114,20 +2172,20 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
Lex.Lex();
}
- R = StringInit::get(Val);
+ R = StringInit::get(Records, Val);
break;
}
case tgtok::CodeFragment:
- R = StringInit::get(Lex.getCurStrVal(), StringInit::SF_Code);
+ R = StringInit::get(Records, Lex.getCurStrVal(), StringInit::SF_Code);
Lex.Lex();
break;
case tgtok::question:
- R = UnsetInit::get();
+ R = UnsetInit::get(Records);
Lex.Lex();
break;
case tgtok::Id: {
SMLoc NameLoc = Lex.getLoc();
- StringInit *Name = StringInit::get(Lex.getCurStrVal());
+ StringInit *Name = StringInit::get(Records, Lex.getCurStrVal());
if (Lex.Lex() != tgtok::less) // consume the Id.
return ParseIDValue(CurRec, Name, NameLoc, Mode); // Value ::= IDValue
@@ -2202,7 +2260,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
// Fallthrough to try convert this to a bit.
}
// All other values must be convertible to just a single bit.
- Init *Bit = Vals[i]->getCastTo(BitRecTy::get());
+ Init *Bit = Vals[i]->getCastTo(BitRecTy::get(Records));
if (!Bit) {
Error(BraceLoc, "Element #" + Twine(i) + " (" + Vals[i]->getAsString() +
") is not convertable to a bit");
@@ -2211,7 +2269,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
NewBits.push_back(Bit);
}
std::reverse(NewBits.begin(), NewBits.end());
- return BitsInit::get(NewBits);
+ return BitsInit::get(Records, NewBits);
}
case tgtok::l_square: { // Value ::= '[' ValueList ']'
Lex.Lex(); // eat the '['
@@ -2322,7 +2380,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
TokError("expected variable name in dag operator");
return nullptr;
}
- OperatorName = StringInit::get(Lex.getCurStrVal());
+ OperatorName = StringInit::get(Records, Lex.getCurStrVal());
Lex.Lex(); // eat the VarName.
}
@@ -2346,6 +2404,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
case tgtok::XEmpty:
case tgtok::XCast:
case tgtok::XGetDagOp: // Value ::= !unop '(' Value ')'
+ case tgtok::XExists:
case tgtok::XIsA:
case tgtok::XConcat:
case tgtok::XDag:
@@ -2451,7 +2510,7 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) {
TokError("expected field identifier after '.'");
return nullptr;
}
- StringInit *FieldName = StringInit::get(Lex.getCurStrVal());
+ StringInit *FieldName = StringInit::get(Records, Lex.getCurStrVal());
if (!Result->getFieldType(FieldName)) {
TokError("Cannot access field '" + Lex.getCurStrVal() + "' of value '" +
Result->getAsString() + "'");
@@ -2494,9 +2553,9 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) {
// Create a !strconcat() operation, first casting each operand to
// a string if necessary.
- if (LHS->getType() != StringRecTy::get()) {
+ if (LHS->getType() != StringRecTy::get(Records)) {
auto CastLHS = dyn_cast<TypedInit>(
- UnOpInit::get(UnOpInit::CAST, LHS, StringRecTy::get())
+ UnOpInit::get(UnOpInit::CAST, LHS, StringRecTy::get(Records))
->Fold(CurRec));
if (!CastLHS) {
Error(PasteLoc,
@@ -2518,7 +2577,7 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) {
// because they are unlikely to be useful.
// Trailing paste, concat with an empty string.
- RHS = StringInit::get("");
+ RHS = StringInit::get(Records, "");
break;
default:
@@ -2531,9 +2590,9 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) {
return nullptr;
}
- if (RHS->getType() != StringRecTy::get()) {
+ if (RHS->getType() != StringRecTy::get(Records)) {
auto CastRHS = dyn_cast<TypedInit>(
- UnOpInit::get(UnOpInit::CAST, RHS, StringRecTy::get())
+ UnOpInit::get(UnOpInit::CAST, RHS, StringRecTy::get(Records))
->Fold(CurRec));
if (!CastRHS) {
Error(PasteLoc,
@@ -2566,8 +2625,8 @@ void TGParser::ParseDagArgList(
// DagArg ::= VARNAME
if (Lex.getCode() == tgtok::VarName) {
// A missing value is treated like '?'.
- StringInit *VarName = StringInit::get(Lex.getCurStrVal());
- Result.emplace_back(UnsetInit::get(), VarName);
+ StringInit *VarName = StringInit::get(Records, Lex.getCurStrVal());
+ Result.emplace_back(UnsetInit::get(Records), VarName);
Lex.Lex();
} else {
// DagArg ::= Value (':' VARNAME)?
@@ -2585,7 +2644,7 @@ void TGParser::ParseDagArgList(
Result.clear();
return;
}
- VarName = StringInit::get(Lex.getCurStrVal());
+ VarName = StringInit::get(Records, Lex.getCurStrVal());
Lex.Lex(); // eat the VarName.
}
@@ -2692,7 +2751,7 @@ Init *TGParser::ParseDeclaration(Record *CurRec,
}
SMLoc IdLoc = Lex.getLoc();
- Init *DeclName = StringInit::get(Str);
+ Init *DeclName = StringInit::get(Records, Str);
Lex.Lex();
bool BadField;
@@ -2745,7 +2804,7 @@ VarInit *TGParser::ParseForeachDeclaration(Init *&ForeachListValue) {
return nullptr;
}
- Init *DeclName = StringInit::get(Lex.getCurStrVal());
+ Init *DeclName = StringInit::get(Records, Lex.getCurStrVal());
Lex.Lex();
// If a value is present, parse it.
@@ -2799,10 +2858,10 @@ VarInit *TGParser::ParseForeachDeclaration(Init *&ForeachListValue) {
if (!Ranges.empty()) {
assert(!IterType && "Type already initialized?");
- IterType = IntRecTy::get();
+ IterType = IntRecTy::get(Records);
std::vector<Init *> Values;
for (unsigned R : Ranges)
- Values.push_back(IntInit::get(R));
+ Values.push_back(IntInit::get(Records, R));
ForeachListValue = ListInit::get(Values, IterType);
}
@@ -2879,7 +2938,7 @@ bool TGParser::ParseBodyItem(Record *CurRec) {
return TokError("expected field identifier after let");
SMLoc IdLoc = Lex.getLoc();
- StringInit *FieldName = StringInit::get(Lex.getCurStrVal());
+ StringInit *FieldName = StringInit::get(Records, Lex.getCurStrVal());
Lex.Lex(); // eat the field name.
SmallVector<unsigned, 16> BitList;
@@ -2898,7 +2957,7 @@ bool TGParser::ParseBodyItem(Record *CurRec) {
if (!BitList.empty() && isa<BitsRecTy>(Type)) {
// When assigning to a subset of a 'bits' object, expect the RHS to have
// the type of that subset instead of the type of the whole object.
- Type = BitsRecTy::get(BitList.size());
+ Type = BitsRecTy::get(Records, BitList.size());
}
Init *Val = ParseValue(CurRec, Type);
@@ -3056,7 +3115,7 @@ bool TGParser::ParseDefset() {
if (Lex.getCode() != tgtok::Id)
return TokError("expected identifier");
- StringInit *DeclName = StringInit::get(Lex.getCurStrVal());
+ StringInit *DeclName = StringInit::get(Records, Lex.getCurStrVal());
if (Records.getGlobal(DeclName->getValue()))
return TokError("def or global variable of this name already exists");
@@ -3093,7 +3152,7 @@ bool TGParser::ParseDefvar() {
if (Lex.getCode() != tgtok::Id)
return TokError("expected identifier");
- StringInit *DeclName = StringInit::get(Lex.getCurStrVal());
+ StringInit *DeclName = StringInit::get(Records, Lex.getCurStrVal());
if (CurLocalScope) {
if (CurLocalScope->varAlreadyDefined(DeclName->getValue()))
return TokError("local variable of this name already exists");
@@ -3201,10 +3260,10 @@ bool TGParser::ParseIf(MultiClass *CurMultiClass) {
// loop, over a list of length 0 or 1 depending on the condition, and with no
// iteration variable being assigned.
- ListInit *EmptyList = ListInit::get({}, BitRecTy::get());
+ ListInit *EmptyList = ListInit::get({}, BitRecTy::get(Records));
ListInit *SingletonList =
- ListInit::get({BitInit::get(true)}, BitRecTy::get());
- RecTy *BitListTy = ListRecTy::get(BitRecTy::get());
+ ListInit::get({BitInit::get(Records, true)}, BitRecTy::get(Records));
+ RecTy *BitListTy = ListRecTy::get(BitRecTy::get(Records));
// The foreach containing the then-clause selects SingletonList if
// the condition is true.
@@ -3369,7 +3428,7 @@ void TGParser::ParseLetList(SmallVectorImpl<LetRecord> &Result) {
return;
}
- StringInit *Name = StringInit::get(Lex.getCurStrVal());
+ StringInit *Name = StringInit::get(Records, Lex.getCurStrVal());
SMLoc NameLoc = Lex.getLoc();
Lex.Lex(); // Eat the identifier.
@@ -3570,7 +3629,7 @@ bool TGParser::ParseDefm(MultiClass *CurMultiClass) {
if (CurMultiClass)
DefmName = BinOpInit::getStrConcat(
VarInit::get(QualifiedNameOfImplicitName(CurMultiClass),
- StringRecTy::get()),
+ StringRecTy::get(Records)),
DefmName);
}
diff --git a/llvm/lib/TableGen/TGParser.h b/llvm/lib/TableGen/TGParser.h
index 00883c858d58..d4b928c62fd7 100644
--- a/llvm/lib/TableGen/TGParser.h
+++ b/llvm/lib/TableGen/TGParser.h
@@ -45,7 +45,7 @@ namespace llvm {
void dump() const;
- RecordsEntry() {}
+ RecordsEntry() = default;
RecordsEntry(std::unique_ptr<Record> Rec) : Rec(std::move(Rec)) {}
RecordsEntry(std::unique_ptr<ForeachLoop> Loop)
: Loop(std::move(Loop)) {}
diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h
index 4d1464901777..a6065d4ed9ec 100644
--- a/llvm/lib/Target/AArch64/AArch64.h
+++ b/llvm/lib/Target/AArch64/AArch64.h
@@ -16,6 +16,8 @@
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "Utils/AArch64BaseInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Target/TargetMachine.h"
@@ -71,6 +73,7 @@ void initializeAArch64A53Fix835769Pass(PassRegistry&);
void initializeAArch64A57FPLoadBalancingPass(PassRegistry&);
void initializeAArch64AdvSIMDScalarPass(PassRegistry&);
void initializeAArch64BranchTargetsPass(PassRegistry&);
+void initializeAArch64CFIFixupPass(PassRegistry&);
void initializeAArch64CollectLOHPass(PassRegistry&);
void initializeAArch64CondBrTuningPass(PassRegistry &);
void initializeAArch64CompressJumpTablesPass(PassRegistry&);
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index 9a04b28a8b8f..f092c039b58e 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -64,6 +64,10 @@ def FeatureLSE : SubtargetFeature<"lse", "HasLSE", "true",
def FeatureLSE2 : SubtargetFeature<"lse2", "HasLSE2", "true",
"Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules">;
+def FeatureLDAPR : SubtargetFeature<"ldapr", "HasLDAPR", "true",
+ "Use LDAPR to lower atomic loads; experimental until we "
+ "have more testing/a formal correctness proof">;
+
def FeatureOutlineAtomics : SubtargetFeature<"outline-atomics", "OutlineAtomics", "true",
"Enable out of line atomics to support LSE instructions">;
@@ -154,6 +158,10 @@ def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
def FeatureZCZeroingGP : SubtargetFeature<"zcz-gp", "HasZeroCycleZeroingGP", "true",
"Has zero-cycle zeroing instructions for generic registers">;
+// It is generally beneficial to rewrite "fmov s0, wzr" to "movi d0, #0".
+// as movi is more efficient across all cores. Newer cores can eliminate
+// fmovs early and there is no difference with movi, but this not true for
+// all implementations.
def FeatureNoZCZeroingFP : SubtargetFeature<"no-zcz-fp", "HasZeroCycleZeroingFP", "false",
"Has no zero-cycle zeroing instructions for FP registers">;
@@ -168,7 +176,7 @@ def FeatureZCZeroingFPWorkaround : SubtargetFeature<"zcz-fp-workaround",
"The zero-cycle floating-point zeroing instruction has a bug">;
def FeatureStrictAlign : SubtargetFeature<"strict-align",
- "StrictAlign", "true",
+ "RequiresStrictAlign", "true",
"Disallow all unaligned memory "
"access">;
@@ -190,11 +198,11 @@ def FeaturePredictableSelectIsExpensive : SubtargetFeature<
"Prefer likely predicted branches over selects">;
def FeatureCustomCheapAsMoveHandling : SubtargetFeature<"custom-cheap-as-move",
- "CustomAsCheapAsMove", "true",
+ "HasCustomCheapAsMoveHandling", "true",
"Use custom handling of cheap instructions">;
def FeatureExynosCheapAsMoveHandling : SubtargetFeature<"exynos-cheap-as-move",
- "ExynosAsCheapAsMove", "true",
+ "HasExynosCheapAsMoveHandling", "true",
"Use Exynos specific handling of cheap instructions",
[FeatureCustomCheapAsMoveHandling]>;
@@ -202,12 +210,16 @@ def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler",
"UsePostRAScheduler", "true", "Schedule again after register allocation">;
def FeatureSlowMisaligned128Store : SubtargetFeature<"slow-misaligned-128store",
- "Misaligned128StoreIsSlow", "true", "Misaligned 128 bit stores are slow">;
+ "IsMisaligned128StoreSlow", "true", "Misaligned 128 bit stores are slow">;
def FeatureSlowPaired128 : SubtargetFeature<"slow-paired-128",
- "Paired128IsSlow", "true", "Paired 128 bit loads and stores are slow">;
+ "IsPaired128Slow", "true", "Paired 128 bit loads and stores are slow">;
+
+def FeatureAscendStoreAddress : SubtargetFeature<"ascend-store-address",
+ "IsStoreAddressAscend", "false",
+ "Schedule vector stores by ascending address">;
-def FeatureSlowSTRQro : SubtargetFeature<"slow-strqro-store", "STRQroIsSlow",
+def FeatureSlowSTRQro : SubtargetFeature<"slow-strqro-store", "IsSTRQroSlow",
"true", "STR of Q register with register offset is slow">;
def FeatureAlternateSExtLoadCVTF32Pattern : SubtargetFeature<
@@ -246,6 +258,10 @@ def FeatureFuseCryptoEOR : SubtargetFeature<
"fuse-crypto-eor", "HasFuseCryptoEOR", "true",
"CPU fuses AES/PMULL and EOR operations">;
+def FeatureFuseAdrpAdd : SubtargetFeature<
+ "fuse-adrp-add", "HasFuseAdrpAdd", "true",
+ "CPU fuses adrp+add operations">;
+
def FeatureFuseLiterals : SubtargetFeature<
"fuse-literals", "HasFuseLiterals", "true",
"CPU fuses literal generation operations">;
@@ -438,13 +454,8 @@ def FeatureEnhancedCounterVirtualization :
def FeatureRME : SubtargetFeature<"rme", "HasRME",
"true", "Enable Realm Management Extension">;
-// A subset of SVE(2) instructions are legal in Streaming SVE execution mode
-// defined by SME.
-def FeatureStreamingSVE : SubtargetFeature<"streaming-sve",
- "HasStreamingSVE", "true",
- "Enable subset of SVE(2) instructions for Streaming SVE execution mode">;
def FeatureSME : SubtargetFeature<"sme", "HasSME", "true",
- "Enable Scalable Matrix Extension (SME)", [FeatureStreamingSVE, FeatureBF16]>;
+ "Enable Scalable Matrix Extension (SME)", [FeatureBF16, FeatureUseScalarIncVL]>;
def FeatureSMEF64 : SubtargetFeature<"sme-f64", "HasSMEF64", "true",
"Enable Scalable Matrix Extension (SME) F64F64 instructions", [FeatureSME]>;
@@ -464,6 +475,11 @@ def FeatureEL3 : SubtargetFeature<"el3", "HasEL3", "true",
def FeatureFixCortexA53_835769 : SubtargetFeature<"fix-cortex-a53-835769",
"FixCortexA53_835769", "true", "Mitigate Cortex-A53 Erratum 835769">;
+def FeatureNoBTIAtReturnTwice : SubtargetFeature<"no-bti-at-return-twice",
+ "NoBTIAtReturnTwice", "true",
+ "Don't place a BTI instruction "
+ "after a return-twice">;
+
//===----------------------------------------------------------------------===//
// Architectures.
//
@@ -534,7 +550,18 @@ def HasV8_0rOps : SubtargetFeature<
FeaturePAuth, FeatureRCPC,
//v8.4
FeatureDotProd, FeatureTRACEV8_4, FeatureTLB_RMI,
- FeatureFlagM, FeatureDIT, FeatureSEL2, FeatureRCPC_IMMO]>;
+ FeatureFlagM, FeatureDIT, FeatureSEL2, FeatureRCPC_IMMO,
+ // Not mandatory in v8.0-R, but included here on the grounds that it
+ // only enables names of system registers
+ FeatureSpecRestrict
+ ]>;
+
+// Only intended to be used by disassemblers.
+def FeatureAll
+ : SubtargetFeature<"all", "IsAll", "true", "Enable all instructions", []>;
+
+class AssemblerPredicateWithAll<dag cond, string name="">
+ : AssemblerPredicate<(any_of FeatureAll, cond), name>;
//===----------------------------------------------------------------------===//
// Register File Description
@@ -552,6 +579,7 @@ include "AArch64Schedule.td"
include "AArch64InstrInfo.td"
include "AArch64SchedPredicates.td"
include "AArch64SchedPredExynos.td"
+include "AArch64SchedPredAmpere.td"
include "AArch64Combine.td"
def AArch64InstrInfo : InstrInfo;
@@ -596,7 +624,7 @@ class AArch64Unsupported { list<Predicate> F; }
def SVEUnsupported : AArch64Unsupported {
let F = [HasSVE, HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3,
- HasSVE2BitPerm, HasSVEorStreamingSVE, HasSVE2orStreamingSVE];
+ HasSVE2BitPerm, HasSVEorSME, HasSVE2orSME];
}
def PAUnsupported : AArch64Unsupported {
@@ -621,6 +649,7 @@ include "AArch64SchedThunderX2T99.td"
include "AArch64SchedA64FX.td"
include "AArch64SchedThunderX3T110.td"
include "AArch64SchedTSV110.td"
+include "AArch64SchedAmpere1.td"
def TuneA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35",
"Cortex-A35 ARM processors">;
@@ -649,6 +678,7 @@ def TuneA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
FeatureFuseAES,
FeatureBalanceFPOps,
FeatureCustomCheapAsMoveHandling,
+ FeatureFuseAdrpAdd,
FeatureFuseLiterals,
FeaturePostRAScheduler,
FeaturePredictableSelectIsExpensive]>;
@@ -657,11 +687,13 @@ def TuneA65 : SubtargetFeature<"a65", "ARMProcFamily", "CortexA65",
"Cortex-A65 ARM processors", [
FeatureFuseAES,
FeatureFuseAddress,
+ FeatureFuseAdrpAdd,
FeatureFuseLiterals]>;
def TuneA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72",
"Cortex-A72 ARM processors", [
FeatureFuseAES,
+ FeatureFuseAdrpAdd,
FeatureFuseLiterals]>;
def TuneA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73",
@@ -802,6 +834,7 @@ def TuneAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14",
FeatureFuseArithmeticLogic,
FeatureFuseCCSelect,
FeatureFuseCryptoEOR,
+ FeatureFuseAdrpAdd,
FeatureFuseLiterals,
FeatureZCRegMove,
FeatureZCZeroing]>;
@@ -813,13 +846,15 @@ def TuneExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
FeatureFuseAddress,
FeatureFuseAES,
FeatureFuseCCSelect,
+ FeatureFuseAdrpAdd,
FeatureFuseLiterals,
FeatureLSLFast,
FeaturePostRAScheduler,
FeaturePredictableSelectIsExpensive]>;
-def TuneExynosM4 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
- "Samsung Exynos-M3 processors",
+// Re-uses some scheduling and tunings from the ExynosM3 proc family.
+def TuneExynosM4 : SubtargetFeature<"exynosm4", "ARMProcFamily", "ExynosM3",
+ "Samsung Exynos-M4 processors",
[FeatureArithmeticBccFusion,
FeatureArithmeticCbzFusion,
FeatureExynosCheapAsMoveHandling,
@@ -828,6 +863,7 @@ def TuneExynosM4 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
FeatureFuseAES,
FeatureFuseArithmeticLogic,
FeatureFuseCCSelect,
+ FeatureFuseAdrpAdd,
FeatureFuseLiterals,
FeatureLSLFast,
FeaturePostRAScheduler,
@@ -934,6 +970,16 @@ def TuneTSV110 : SubtargetFeature<"tsv110", "ARMProcFamily", "TSV110",
FeatureFuseAES,
FeaturePostRAScheduler]>;
+def TuneAmpere1 : SubtargetFeature<"ampere1", "ARMProcFamily", "Ampere1",
+ "Ampere Computing Ampere-1 processors", [
+ FeaturePostRAScheduler,
+ FeatureFuseAES,
+ FeatureLSLFast,
+ FeatureAggressiveFMA,
+ FeatureArithmeticBccFusion,
+ FeatureCmpBccFusion,
+ FeatureFuseAddress,
+ FeatureFuseLiterals]>;
def ProcessorFeatures {
list<SubtargetFeature> A53 = [HasV8_0aOps, FeatureCRC, FeatureCrypto,
@@ -947,13 +993,14 @@ def ProcessorFeatures {
FeatureFP16FML];
list<SubtargetFeature> A65 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
FeatureNEON, FeatureFullFP16, FeatureDotProd,
- FeatureRCPC, FeatureSSBS, FeatureRAS];
+ FeatureRCPC, FeatureSSBS, FeatureRAS,
+ FeaturePerfMon];
list<SubtargetFeature> A76 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
FeatureNEON, FeatureFullFP16, FeatureDotProd,
- FeatureRCPC, FeatureSSBS];
+ FeatureRCPC, FeatureSSBS, FeaturePerfMon];
list<SubtargetFeature> A77 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
FeatureNEON, FeatureFullFP16, FeatureDotProd,
- FeatureRCPC];
+ FeatureRCPC, FeaturePerfMon, FeatureSSBS];
list<SubtargetFeature> A78 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
FeatureNEON, FeatureFullFP16, FeatureDotProd,
FeatureRCPC, FeaturePerfMon, FeatureSPE,
@@ -968,14 +1015,15 @@ def ProcessorFeatures {
FeatureSVE2BitPerm, FeatureBF16, FeatureMatMulInt8];
list<SubtargetFeature> R82 = [HasV8_0rOps, FeaturePerfMon, FeatureFullFP16,
FeatureFP16FML, FeatureSSBS, FeaturePredRes,
- FeatureSB, FeatureSpecRestrict];
+ FeatureSB];
list<SubtargetFeature> X1 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
FeatureNEON, FeatureRCPC, FeaturePerfMon,
- FeatureSPE, FeatureFullFP16, FeatureDotProd];
+ FeatureSPE, FeatureFullFP16, FeatureDotProd,
+ FeatureSSBS];
list<SubtargetFeature> X1C = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
FeatureNEON, FeatureRCPC, FeaturePerfMon,
FeatureSPE, FeatureFullFP16, FeatureDotProd,
- FeaturePAuth];
+ FeaturePAuth, FeatureSSBS];
list<SubtargetFeature> X2 = [HasV9_0aOps, FeatureNEON, FeaturePerfMon,
FeatureMatMulInt8, FeatureBF16, FeatureAM,
FeatureMTE, FeatureETE, FeatureSVE2BitPerm,
@@ -1012,13 +1060,15 @@ def ProcessorFeatures {
FeatureRDM];
list<SubtargetFeature> NeoverseE1 = [HasV8_2aOps, FeatureCrypto, FeatureDotProd,
FeatureFPARMv8, FeatureFullFP16, FeatureNEON,
- FeatureRCPC, FeatureSSBS];
+ FeatureRCPC, FeatureSSBS, FeaturePerfMon];
list<SubtargetFeature> NeoverseN1 = [HasV8_2aOps, FeatureCrypto, FeatureDotProd,
FeatureFPARMv8, FeatureFullFP16, FeatureNEON,
- FeatureRCPC, FeatureSPE, FeatureSSBS];
+ FeatureRCPC, FeatureSPE, FeatureSSBS,
+ FeaturePerfMon];
list<SubtargetFeature> NeoverseN2 = [HasV8_5aOps, FeatureBF16, FeatureETE,
FeatureMatMulInt8, FeatureMTE, FeatureSVE2,
- FeatureSVE2BitPerm, FeatureTRBE, FeatureCrypto];
+ FeatureSVE2BitPerm, FeatureTRBE, FeatureCrypto,
+ FeaturePerfMon];
list<SubtargetFeature> Neoverse512TVB = [HasV8_4aOps, FeatureBF16, FeatureCacheDeepPersist,
FeatureCrypto, FeatureFPARMv8, FeatureFP16FML,
FeatureFullFP16, FeatureMatMulInt8, FeatureNEON,
@@ -1041,17 +1091,20 @@ def ProcessorFeatures {
list<SubtargetFeature> TSV110 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
FeatureNEON, FeaturePerfMon, FeatureSPE,
FeatureFullFP16, FeatureFP16FML, FeatureDotProd];
+ list<SubtargetFeature> Ampere1 = [HasV8_6aOps, FeatureNEON, FeaturePerfMon,
+ FeatureMTE, FeatureSSBS];
// ETE and TRBE are future architecture extensions. We temporarily enable them
// by default for users targeting generic AArch64. The extensions do not
// affect code generated by the compiler and can be used only by explicitly
// mentioning the new system register names in assembly.
- list<SubtargetFeature> Generic = [FeatureFPARMv8, FeatureNEON, FeaturePerfMon, FeatureETE];
+ list<SubtargetFeature> Generic = [FeatureFPARMv8, FeatureNEON, FeatureETE];
}
-
+// FeatureFuseAdrpAdd is enabled under Generic to allow linker merging
+// optimizations.
def : ProcessorModel<"generic", CortexA55Model, ProcessorFeatures.Generic,
- [FeatureFuseAES, FeaturePostRAScheduler]>;
+ [FeatureFuseAES, FeatureFuseAdrpAdd, FeaturePostRAScheduler]>;
def : ProcessorModel<"cortex-a35", CortexA53Model, ProcessorFeatures.A53,
[TuneA35]>;
def : ProcessorModel<"cortex-a34", CortexA53Model, ProcessorFeatures.A53,
@@ -1178,6 +1231,10 @@ def : ProcessorModel<"a64fx", A64FXModel, ProcessorFeatures.A64FX,
def : ProcessorModel<"carmel", NoSchedModel, ProcessorFeatures.Carmel,
[TuneCarmel]>;
+// Ampere Computing
+def : ProcessorModel<"ampere1", Ampere1Model, ProcessorFeatures.Ampere1,
+ [TuneAmpere1]>;
+
//===----------------------------------------------------------------------===//
// Assembly parser
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp b/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp
index 4cdf5f144437..37a65b64a885 100644
--- a/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp
+++ b/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp
@@ -223,6 +223,7 @@ AArch64A53Fix835769::runOnBasicBlock(MachineBasicBlock &MBB) {
if (isFirstInstructionInSequence(PrevInstr) &&
isSecondInstructionInSequence(CurrInstr)) {
LLVM_DEBUG(dbgs() << " ** pattern found at Idx " << Idx << "!\n");
+ (void) Idx;
Sequences.push_back(CurrInstr);
}
}
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index b54a0eaba7d1..ef4860979dd3 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -132,7 +132,7 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override {
AArch64FI = MF.getInfo<AArch64FunctionInfo>();
- STI = static_cast<const AArch64Subtarget*>(&MF.getSubtarget());
+ STI = &MF.getSubtarget<AArch64Subtarget>();
SetupMachineFunction(MF);
@@ -143,10 +143,10 @@ public:
int Type =
COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT;
- OutStreamer->BeginCOFFSymbolDef(CurrentFnSym);
- OutStreamer->EmitCOFFSymbolStorageClass(Scl);
- OutStreamer->EmitCOFFSymbolType(Type);
- OutStreamer->EndCOFFSymbolDef();
+ OutStreamer->beginCOFFSymbolDef(CurrentFnSym);
+ OutStreamer->emitCOFFSymbolStorageClass(Scl);
+ OutStreamer->emitCOFFSymbolType(Type);
+ OutStreamer->endCOFFSymbolDef();
}
// Emit the rest of the function body.
@@ -204,10 +204,10 @@ void AArch64AsmPrinter::emitStartOfAsmFile(Module &M) {
// Emit an absolute @feat.00 symbol. This appears to be some kind of
// compiler features bitfield read by link.exe.
MCSymbol *S = MMI->getContext().getOrCreateSymbol(StringRef("@feat.00"));
- OutStreamer->BeginCOFFSymbolDef(S);
- OutStreamer->EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC);
- OutStreamer->EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_NULL);
- OutStreamer->EndCOFFSymbolDef();
+ OutStreamer->beginCOFFSymbolDef(S);
+ OutStreamer->emitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC);
+ OutStreamer->emitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_NULL);
+ OutStreamer->endCOFFSymbolDef();
int64_t Feat00Flags = 0;
if (M.getModuleFlag("cfguard")) {
@@ -251,7 +251,7 @@ void AArch64AsmPrinter::emitFunctionHeaderComment() {
const AArch64FunctionInfo *FI = MF->getInfo<AArch64FunctionInfo>();
Optional<std::string> OutlinerString = FI->getOutliningStyle();
if (OutlinerString != None)
- OutStreamer->GetCommentOS() << ' ' << OutlinerString;
+ OutStreamer->getCommentOS() << ' ' << OutlinerString;
}
void AArch64AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI)
@@ -378,10 +378,10 @@ void AArch64AsmPrinter::emitHwasanMemaccessSymbols(Module &M) {
bool CompileKernel =
(AccessInfo >> HWASanAccessInfo::CompileKernelShift) & 1;
- OutStreamer->SwitchSection(OutContext.getELFSection(
+ OutStreamer->switchSection(OutContext.getELFSection(
".text.hot", ELF::SHT_PROGBITS,
- ELF::SHF_EXECINSTR | ELF::SHF_ALLOC | ELF::SHF_GROUP, 0,
- Sym->getName(), /*IsComdat=*/true));
+ ELF::SHF_EXECINSTR | ELF::SHF_ALLOC | ELF::SHF_GROUP, 0, Sym->getName(),
+ /*IsComdat=*/true));
OutStreamer->emitSymbolAttribute(Sym, MCSA_ELF_TypeFunction);
OutStreamer->emitSymbolAttribute(Sym, MCSA_Weak);
@@ -827,7 +827,7 @@ void AArch64AsmPrinter::emitJumpTableInfo() {
const TargetLoweringObjectFile &TLOF = getObjFileLowering();
MCSection *ReadOnlySec = TLOF.getSectionForJumpTable(MF->getFunction(), TM);
- OutStreamer->SwitchSection(ReadOnlySec);
+ OutStreamer->switchSection(ReadOnlySec);
auto AFI = MF->getInfo<AArch64FunctionInfo>();
for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) {
@@ -865,7 +865,7 @@ void AArch64AsmPrinter::emitFunctionEntryLabel() {
if (MF->getFunction().getCallingConv() == CallingConv::AArch64_VectorCall ||
MF->getFunction().getCallingConv() ==
CallingConv::AArch64_SVE_VectorCall ||
- STI->getRegisterInfo()->hasSVEArgsOrReturn(MF)) {
+ MF->getInfo<AArch64FunctionInfo>()->isSVECC()) {
auto *TS =
static_cast<AArch64TargetStreamer *>(OutStreamer->getTargetStreamer());
TS->emitDirectiveVariantPCS(CurrentFnSym);
@@ -1129,7 +1129,8 @@ void AArch64AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI) {
void AArch64AsmPrinter::emitFMov0(const MachineInstr &MI) {
Register DestReg = MI.getOperand(0).getReg();
- if (STI->hasZeroCycleZeroingFP() && !STI->hasZeroCycleZeroingFPWorkaround()) {
+ if (STI->hasZeroCycleZeroingFP() && !STI->hasZeroCycleZeroingFPWorkaround() &&
+ STI->hasNEON()) {
// Convert H/S register to corresponding D register
if (AArch64::H0 <= DestReg && DestReg <= AArch64::H31)
DestReg = AArch64::D0 + (DestReg - AArch64::H0);
@@ -1262,7 +1263,7 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
break;
case AArch64::DBG_VALUE:
- case AArch64::DBG_VALUE_LIST: {
+ case AArch64::DBG_VALUE_LIST:
if (isVerbose() && OutStreamer->hasRawTextSupport()) {
SmallString<128> TmpStr;
raw_svector_ostream OS(TmpStr);
@@ -1282,8 +1283,18 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
OutStreamer->emitCFIBKeyFrame();
return;
- }
- }
+ }
+
+ case AArch64::EMITMTETAGGED: {
+ ExceptionHandling ExceptionHandlingType = MAI->getExceptionHandlingType();
+ if (ExceptionHandlingType != ExceptionHandling::DwarfCFI &&
+ ExceptionHandlingType != ExceptionHandling::ARM)
+ return;
+
+ if (getFunctionCFISectionType(*MF) != CFISection::None)
+ OutStreamer->emitCFIMTETaggedFrame();
+ return;
+ }
// Tail calls use pseudo instructions so they have the proper code-gen
// attributes (isCall, isReturn, etc.). We lower them to the real
diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
index f26151536a58..c0da242a26de 100644
--- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
@@ -82,9 +82,9 @@ def CC_AArch64_AAPCS : CallingConv<[
nxv2bf16, nxv4bf16, nxv8bf16, nxv2f32, nxv4f32, nxv2f64],
CCPassIndirect<i64>>,
- CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1],
+ CCIfType<[nxv1i1, nxv2i1, nxv4i1, nxv8i1, nxv16i1],
CCAssignToReg<[P0, P1, P2, P3]>>,
- CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1],
+ CCIfType<[nxv1i1, nxv2i1, nxv4i1, nxv8i1, nxv16i1],
CCPassIndirect<i64>>,
// Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
@@ -149,7 +149,7 @@ def RetCC_AArch64_AAPCS : CallingConv<[
nxv2bf16, nxv4bf16, nxv8bf16, nxv2f32, nxv4f32, nxv2f64],
CCAssignToReg<[Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7]>>,
- CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1],
+ CCIfType<[nxv1i1, nxv2i1, nxv4i1, nxv8i1, nxv16i1],
CCAssignToReg<[P0, P1, P2, P3]>>
]>;
diff --git a/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp b/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp
index ac243347b24d..d12689970dc5 100644
--- a/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp
+++ b/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp
@@ -528,10 +528,8 @@ static void handleNormalInst(const MachineInstr &MI, LOHInfo *LOHInfos) {
// count as MultiUser or block optimization. This is especially important on
// arm64_32, where any memory operation is likely to be an explicit use of
// xN and an implicit use of wN (the base address register).
- if (!UsesSeen.count(Idx)) {
+ if (UsesSeen.insert(Idx).second)
handleUse(MI, MO, LOHInfos[Idx]);
- UsesSeen.insert(Idx);
- }
}
}
@@ -559,7 +557,7 @@ bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) {
// Walk the basic block backwards and update the per register state machine
// in the process.
for (const MachineInstr &MI :
- instructionsWithoutDebug(MBB.rbegin(), MBB.rend())) {
+ instructionsWithoutDebug(MBB.instr_rbegin(), MBB.instr_rend())) {
unsigned Opcode = MI.getOpcode();
switch (Opcode) {
case AArch64::ADDXri:
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 1994e0eb7fb9..18c111255e53 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -217,7 +217,7 @@ def AArch64PostLegalizerLoweringHelper
// Post-legalization combines which are primarily optimizations.
def AArch64PostLegalizerCombinerHelper
: GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper",
- [copy_prop, erase_undef_store, combines_for_extload,
+ [copy_prop, combines_for_extload,
sext_trunc_sextload, mutate_anyext_to_zext,
hoist_logic_op_with_same_opcode_hands,
redundant_and, xor_of_and_with_same_reg,
@@ -228,6 +228,6 @@ def AArch64PostLegalizerCombinerHelper
select_combines, fold_merge_to_zext,
constant_fold, identity_combines,
ptr_add_immed_chain, overlapping_and,
- split_store_zero_128]> {
+ split_store_zero_128, undef_combines]> {
let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule";
}
diff --git a/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp
index 82e8df3b73f9..343f888b7552 100644
--- a/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp
@@ -247,8 +247,8 @@ void SSACCmpConv::updateTailPHIs() {
for (unsigned oi = I.getNumOperands(); oi > 2; oi -= 2) {
// PHI operands are (Reg, MBB) at (oi-2, oi-1).
if (I.getOperand(oi - 1).getMBB() == CmpBB) {
- I.RemoveOperand(oi - 1);
- I.RemoveOperand(oi - 2);
+ I.removeOperand(oi - 1);
+ I.removeOperand(oi - 2);
}
}
}
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index b0f739cc26e6..910f8cdede75 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -86,6 +86,7 @@ private:
unsigned N);
bool expandCALL_RVMARKER(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI);
+ bool expandCALL_BTI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI);
};
@@ -759,6 +760,37 @@ bool AArch64ExpandPseudo::expandCALL_RVMARKER(
return true;
}
+bool AArch64ExpandPseudo::expandCALL_BTI(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) {
+ // Expand CALL_BTI pseudo to:
+ // - a branch to the call target
+ // - a BTI instruction
+ // Mark the sequence as a bundle, to avoid passes moving other code in
+ // between.
+
+ MachineInstr &MI = *MBBI;
+ MachineOperand &CallTarget = MI.getOperand(0);
+ assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
+ "invalid operand for regular call");
+ unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
+ MachineInstr *Call =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr();
+ Call->addOperand(CallTarget);
+
+ MachineInstr *BTI =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::HINT))
+ // BTI J so that setjmp can to BR to this.
+ .addImm(36)
+ .getInstr();
+
+ if (MI.shouldUpdateCallSiteInfo())
+ MBB.getParent()->moveCallSiteInfo(&MI, Call);
+
+ MI.eraseFromParent();
+ finalizeBundle(MBB, Call->getIterator(), std::next(BTI->getIterator()));
+ return true;
+}
+
bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
Register CtxReg = MBBI->getOperand(0).getReg();
@@ -1238,6 +1270,8 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2);
case AArch64::BLR_RVMARKER:
return expandCALL_RVMARKER(MBB, MBBI);
+ case AArch64::BLR_BTI:
+ return expandCALL_BTI(MBB, MBBI);
case AArch64::StoreSwiftAsyncContext:
return expandStoreSwiftAsyncContext(MBB, MBBI);
}
diff --git a/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp b/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
index 793663ef97d7..6de374125466 100644
--- a/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
@@ -813,7 +813,7 @@ void FalkorHWPFFix::runOnLoop(MachineLoop &L, MachineFunction &Fn) {
}
bool FalkorHWPFFix::runOnMachineFunction(MachineFunction &Fn) {
- auto &ST = static_cast<const AArch64Subtarget &>(Fn.getSubtarget());
+ auto &ST = Fn.getSubtarget<AArch64Subtarget>();
if (ST.getProcFamily() != AArch64Subtarget::Falkor)
return false;
diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index c67fa62c7a92..49fffa01a974 100644
--- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -14,6 +14,7 @@
#include "AArch64.h"
#include "AArch64CallingConvention.h"
+#include "AArch64MachineFunctionInfo.h"
#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
@@ -282,8 +283,7 @@ public:
explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
const TargetLibraryInfo *LibInfo)
: FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
- Subtarget =
- &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget());
+ Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>();
Context = &FuncInfo.Fn->getContext();
}
@@ -3127,6 +3127,13 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
if (!Callee && !Symbol)
return false;
+ // Allow SelectionDAG isel to handle calls to functions like setjmp that need
+ // a bti instruction following the call.
+ if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
+ !Subtarget->noBTIAtReturnTwice() &&
+ MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
+ return false;
+
// Allow SelectionDAG isel to handle tail calls.
if (IsTailCall)
return false;
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index a4d20735e2b1..78babdf9f1f0 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -117,6 +117,72 @@
//
// FIXME: also explain the redzone concept.
//
+// An example of the prologue:
+//
+// .globl __foo
+// .align 2
+// __foo:
+// Ltmp0:
+// .cfi_startproc
+// .cfi_personality 155, ___gxx_personality_v0
+// Leh_func_begin:
+// .cfi_lsda 16, Lexception33
+//
+// stp xa,bx, [sp, -#offset]!
+// ...
+// stp x28, x27, [sp, #offset-32]
+// stp fp, lr, [sp, #offset-16]
+// add fp, sp, #offset - 16
+// sub sp, sp, #1360
+//
+// The Stack:
+// +-------------------------------------------+
+// 10000 | ........ | ........ | ........ | ........ |
+// 10004 | ........ | ........ | ........ | ........ |
+// +-------------------------------------------+
+// 10008 | ........ | ........ | ........ | ........ |
+// 1000c | ........ | ........ | ........ | ........ |
+// +===========================================+
+// 10010 | X28 Register |
+// 10014 | X28 Register |
+// +-------------------------------------------+
+// 10018 | X27 Register |
+// 1001c | X27 Register |
+// +===========================================+
+// 10020 | Frame Pointer |
+// 10024 | Frame Pointer |
+// +-------------------------------------------+
+// 10028 | Link Register |
+// 1002c | Link Register |
+// +===========================================+
+// 10030 | ........ | ........ | ........ | ........ |
+// 10034 | ........ | ........ | ........ | ........ |
+// +-------------------------------------------+
+// 10038 | ........ | ........ | ........ | ........ |
+// 1003c | ........ | ........ | ........ | ........ |
+// +-------------------------------------------+
+//
+// [sp] = 10030 :: >>initial value<<
+// sp = 10020 :: stp fp, lr, [sp, #-16]!
+// fp = sp == 10020 :: mov fp, sp
+// [sp] == 10020 :: stp x28, x27, [sp, #-16]!
+// sp == 10010 :: >>final value<<
+//
+// The frame pointer (w29) points to address 10020. If we use an offset of
+// '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24
+// for w27, and -32 for w28:
+//
+// Ltmp1:
+// .cfi_def_cfa w29, 16
+// Ltmp2:
+// .cfi_offset w30, -8
+// Ltmp3:
+// .cfi_offset w29, -16
+// Ltmp4:
+// .cfi_offset w27, -24
+// Ltmp5:
+// .cfi_offset w28, -32
+//
//===----------------------------------------------------------------------===//
#include "AArch64FrameLowering.h"
@@ -126,6 +192,7 @@
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -154,7 +221,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/LEB128.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
@@ -187,7 +253,7 @@ static cl::opt<bool> OrderFrameObjects("aarch64-order-frame-objects",
cl::init(true), cl::Hidden);
cl::opt<bool> EnableHomogeneousPrologEpilog(
- "homogeneous-prolog-epilog", cl::init(false), cl::ZeroOrMore, cl::Hidden,
+ "homogeneous-prolog-epilog", cl::Hidden,
cl::desc("Emit homogeneous prologue and epilogue for the size "
"optimization (default = off)"));
@@ -233,6 +299,7 @@ static int64_t getArgumentStackToRestore(MachineFunction &MF,
static bool produceCompactUnwindFrame(MachineFunction &MF);
static bool needsWinCFI(const MachineFunction &MF);
static StackOffset getSVEStackSize(const MachineFunction &MF);
+static bool needsShadowCallStackPrologueEpilogue(MachineFunction &MF);
/// Returns true if a homogeneous prolog or epilog code can be emitted
/// for the size optimization. If possible, a frame helper call is injected.
@@ -440,137 +507,309 @@ MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
return MBB.erase(I);
}
-// Convenience function to create a DWARF expression for
-// Expr + NumBytes + NumVGScaledBytes * AArch64::VG
-static void appendVGScaledOffsetExpr(SmallVectorImpl<char> &Expr,
- int NumBytes, int NumVGScaledBytes, unsigned VG,
- llvm::raw_string_ostream &Comment) {
- uint8_t buffer[16];
+void AArch64FrameLowering::emitCalleeSavedGPRLocations(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
- if (NumBytes) {
- Expr.push_back(dwarf::DW_OP_consts);
- Expr.append(buffer, buffer + encodeSLEB128(NumBytes, buffer));
- Expr.push_back((uint8_t)dwarf::DW_OP_plus);
- Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes);
- }
+ const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+ if (CSI.empty())
+ return;
- if (NumVGScaledBytes) {
- Expr.push_back((uint8_t)dwarf::DW_OP_consts);
- Expr.append(buffer, buffer + encodeSLEB128(NumVGScaledBytes, buffer));
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
+ const TargetInstrInfo &TII = *STI.getInstrInfo();
+ DebugLoc DL = MBB.findDebugLoc(MBBI);
- Expr.push_back((uint8_t)dwarf::DW_OP_bregx);
- Expr.append(buffer, buffer + encodeULEB128(VG, buffer));
- Expr.push_back(0);
+ for (const auto &Info : CSI) {
+ if (MFI.getStackID(Info.getFrameIdx()) == TargetStackID::ScalableVector)
+ continue;
- Expr.push_back((uint8_t)dwarf::DW_OP_mul);
- Expr.push_back((uint8_t)dwarf::DW_OP_plus);
+ assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
+ unsigned DwarfReg = TRI.getDwarfRegNum(Info.getReg(), true);
- Comment << (NumVGScaledBytes < 0 ? " - " : " + ")
- << std::abs(NumVGScaledBytes) << " * VG";
+ int64_t Offset =
+ MFI.getObjectOffset(Info.getFrameIdx()) - getOffsetOfLocalArea();
+ unsigned CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
+ BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
}
}
-// Creates an MCCFIInstruction:
-// { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr }
-MCCFIInstruction AArch64FrameLowering::createDefCFAExpressionFromSP(
- const TargetRegisterInfo &TRI, const StackOffset &OffsetFromSP) const {
- int64_t NumBytes, NumVGScaledBytes;
- AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(OffsetFromSP, NumBytes,
- NumVGScaledBytes);
+void AArch64FrameLowering::emitCalleeSavedSVELocations(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+
+ // Add callee saved registers to move list.
+ const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+ if (CSI.empty())
+ return;
+
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
+ const TargetInstrInfo &TII = *STI.getInstrInfo();
+ DebugLoc DL = MBB.findDebugLoc(MBBI);
+ AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
+
+ for (const auto &Info : CSI) {
+ if (!(MFI.getStackID(Info.getFrameIdx()) == TargetStackID::ScalableVector))
+ continue;
+
+ // Not all unwinders may know about SVE registers, so assume the lowest
+ // common demoninator.
+ assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
+ unsigned Reg = Info.getReg();
+ if (!static_cast<const AArch64RegisterInfo &>(TRI).regNeedsCFI(Reg, Reg))
+ continue;
+
+ StackOffset Offset =
+ StackOffset::getScalable(MFI.getObjectOffset(Info.getFrameIdx())) -
+ StackOffset::getFixed(AFI.getCalleeSavedStackSize(MFI));
- std::string CommentBuffer = "sp";
- llvm::raw_string_ostream Comment(CommentBuffer);
+ unsigned CFIIndex = MF.addFrameInst(createCFAOffset(TRI, Reg, Offset));
+ BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+}
- // Build up the expression (SP + NumBytes + NumVGScaledBytes * AArch64::VG)
- SmallString<64> Expr;
- Expr.push_back((uint8_t)(dwarf::DW_OP_breg0 + /*SP*/ 31));
- Expr.push_back(0);
- appendVGScaledOffsetExpr(Expr, NumBytes, NumVGScaledBytes,
- TRI.getDwarfRegNum(AArch64::VG, true), Comment);
+void AArch64FrameLowering::emitCalleeSavedFrameMoves(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
+ emitCalleeSavedGPRLocations(MBB, MBBI);
+ emitCalleeSavedSVELocations(MBB, MBBI);
+}
- // Wrap this into DW_CFA_def_cfa.
- SmallString<64> DefCfaExpr;
- DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
- uint8_t buffer[16];
- DefCfaExpr.append(buffer,
- buffer + encodeULEB128(Expr.size(), buffer));
- DefCfaExpr.append(Expr.str());
- return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(),
- Comment.str());
+static void insertCFISameValue(const MCInstrDesc &Desc, MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertPt,
+ unsigned DwarfReg) {
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::createSameValue(nullptr, DwarfReg));
+ BuildMI(MBB, InsertPt, DebugLoc(), Desc).addCFIIndex(CFIIndex);
}
-MCCFIInstruction AArch64FrameLowering::createCfaOffset(
- const TargetRegisterInfo &TRI, unsigned Reg,
- const StackOffset &OffsetFromDefCFA) const {
- int64_t NumBytes, NumVGScaledBytes;
- AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
- OffsetFromDefCFA, NumBytes, NumVGScaledBytes);
+void AArch64FrameLowering::resetCFIToInitialState(
+ MachineBasicBlock &MBB) const {
- unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
+ MachineFunction &MF = *MBB.getParent();
+ const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ const auto &TRI =
+ static_cast<const AArch64RegisterInfo &>(*Subtarget.getRegisterInfo());
+ const auto &MFI = *MF.getInfo<AArch64FunctionInfo>();
- // Non-scalable offsets can use DW_CFA_offset directly.
- if (!NumVGScaledBytes)
- return MCCFIInstruction::createOffset(nullptr, DwarfReg, NumBytes);
+ const MCInstrDesc &CFIDesc = TII.get(TargetOpcode::CFI_INSTRUCTION);
+ DebugLoc DL;
- std::string CommentBuffer;
- llvm::raw_string_ostream Comment(CommentBuffer);
- Comment << printReg(Reg, &TRI) << " @ cfa";
+ // Reset the CFA to `SP + 0`.
+ MachineBasicBlock::iterator InsertPt = MBB.begin();
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
+ nullptr, TRI.getDwarfRegNum(AArch64::SP, true), 0));
+ BuildMI(MBB, InsertPt, DL, CFIDesc).addCFIIndex(CFIIndex);
- // Build up expression (NumBytes + NumVGScaledBytes * AArch64::VG)
- SmallString<64> OffsetExpr;
- appendVGScaledOffsetExpr(OffsetExpr, NumBytes, NumVGScaledBytes,
- TRI.getDwarfRegNum(AArch64::VG, true), Comment);
+ // Flip the RA sign state.
+ if (MFI.shouldSignReturnAddress()) {
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
+ BuildMI(MBB, InsertPt, DL, CFIDesc).addCFIIndex(CFIIndex);
+ }
- // Wrap this into DW_CFA_expression
- SmallString<64> CfaExpr;
- CfaExpr.push_back(dwarf::DW_CFA_expression);
- uint8_t buffer[16];
- CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
- CfaExpr.append(buffer, buffer + encodeULEB128(OffsetExpr.size(), buffer));
- CfaExpr.append(OffsetExpr.str());
+ // Shadow call stack uses X18, reset it.
+ if (needsShadowCallStackPrologueEpilogue(MF))
+ insertCFISameValue(CFIDesc, MF, MBB, InsertPt,
+ TRI.getDwarfRegNum(AArch64::X18, true));
- return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), Comment.str());
+ // Emit .cfi_same_value for callee-saved registers.
+ const std::vector<CalleeSavedInfo> &CSI =
+ MF.getFrameInfo().getCalleeSavedInfo();
+ for (const auto &Info : CSI) {
+ unsigned Reg = Info.getReg();
+ if (!TRI.regNeedsCFI(Reg, Reg))
+ continue;
+ insertCFISameValue(CFIDesc, MF, MBB, InsertPt,
+ TRI.getDwarfRegNum(Reg, true));
+ }
}
-void AArch64FrameLowering::emitCalleeSavedFrameMoves(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
+static void emitCalleeSavedRestores(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ bool SVE) {
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
- const TargetSubtargetInfo &STI = MF.getSubtarget();
- const TargetRegisterInfo *TRI = STI.getRegisterInfo();
- const TargetInstrInfo *TII = STI.getInstrInfo();
- DebugLoc DL = MBB.findDebugLoc(MBBI);
- // Add callee saved registers to move list.
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
if (CSI.empty())
return;
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
+ const TargetInstrInfo &TII = *STI.getInstrInfo();
+ DebugLoc DL = MBB.findDebugLoc(MBBI);
+
for (const auto &Info : CSI) {
- Register Reg = Info.getReg();
+ if (SVE !=
+ (MFI.getStackID(Info.getFrameIdx()) == TargetStackID::ScalableVector))
+ continue;
- // Not all unwinders may know about SVE registers, so assume the lowest
- // common demoninator.
- unsigned NewReg;
- if (static_cast<const AArch64RegisterInfo *>(TRI)->regNeedsCFI(Reg, NewReg))
- Reg = NewReg;
- else
+ unsigned Reg = Info.getReg();
+ if (SVE &&
+ !static_cast<const AArch64RegisterInfo &>(TRI).regNeedsCFI(Reg, Reg))
continue;
- StackOffset Offset;
- if (MFI.getStackID(Info.getFrameIdx()) == TargetStackID::ScalableVector) {
- AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
- Offset =
- StackOffset::getScalable(MFI.getObjectOffset(Info.getFrameIdx())) -
- StackOffset::getFixed(AFI->getCalleeSavedStackSize(MFI));
- } else {
- Offset = StackOffset::getFixed(MFI.getObjectOffset(Info.getFrameIdx()) -
- getOffsetOfLocalArea());
- }
- unsigned CFIIndex = MF.addFrameInst(createCfaOffset(*TRI, Reg, Offset));
- BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createRestore(
+ nullptr, TRI.getDwarfRegNum(Info.getReg(), true)));
+ BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
- .setMIFlags(MachineInstr::FrameSetup);
+ .setMIFlags(MachineInstr::FrameDestroy);
+ }
+}
+
+void AArch64FrameLowering::emitCalleeSavedGPRRestores(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
+ emitCalleeSavedRestores(MBB, MBBI, false);
+}
+
+void AArch64FrameLowering::emitCalleeSavedSVERestores(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
+ emitCalleeSavedRestores(MBB, MBBI, true);
+}
+
+static MCRegister getRegisterOrZero(MCRegister Reg, bool HasSVE) {
+ switch (Reg.id()) {
+ default:
+ // The called routine is expected to preserve r19-r28
+ // r29 and r30 are used as frame pointer and link register resp.
+ return 0;
+
+ // GPRs
+#define CASE(n) \
+ case AArch64::W##n: \
+ case AArch64::X##n: \
+ return AArch64::X##n
+ CASE(0);
+ CASE(1);
+ CASE(2);
+ CASE(3);
+ CASE(4);
+ CASE(5);
+ CASE(6);
+ CASE(7);
+ CASE(8);
+ CASE(9);
+ CASE(10);
+ CASE(11);
+ CASE(12);
+ CASE(13);
+ CASE(14);
+ CASE(15);
+ CASE(16);
+ CASE(17);
+ CASE(18);
+#undef CASE
+
+ // FPRs
+#define CASE(n) \
+ case AArch64::B##n: \
+ case AArch64::H##n: \
+ case AArch64::S##n: \
+ case AArch64::D##n: \
+ case AArch64::Q##n: \
+ return HasSVE ? AArch64::Z##n : AArch64::Q##n
+ CASE(0);
+ CASE(1);
+ CASE(2);
+ CASE(3);
+ CASE(4);
+ CASE(5);
+ CASE(6);
+ CASE(7);
+ CASE(8);
+ CASE(9);
+ CASE(10);
+ CASE(11);
+ CASE(12);
+ CASE(13);
+ CASE(14);
+ CASE(15);
+ CASE(16);
+ CASE(17);
+ CASE(18);
+ CASE(19);
+ CASE(20);
+ CASE(21);
+ CASE(22);
+ CASE(23);
+ CASE(24);
+ CASE(25);
+ CASE(26);
+ CASE(27);
+ CASE(28);
+ CASE(29);
+ CASE(30);
+ CASE(31);
+#undef CASE
+ }
+}
+
+void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
+ MachineBasicBlock &MBB) const {
+ // Insertion point.
+ MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
+
+ // Fake a debug loc.
+ DebugLoc DL;
+ if (MBBI != MBB.end())
+ DL = MBBI->getDebugLoc();
+
+ const MachineFunction &MF = *MBB.getParent();
+ const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
+ const AArch64RegisterInfo &TRI = *STI.getRegisterInfo();
+
+ BitVector GPRsToZero(TRI.getNumRegs());
+ BitVector FPRsToZero(TRI.getNumRegs());
+ bool HasSVE = STI.hasSVE();
+ for (MCRegister Reg : RegsToZero.set_bits()) {
+ if (TRI.isGeneralPurposeRegister(MF, Reg)) {
+ // For GPRs, we only care to clear out the 64-bit register.
+ if (MCRegister XReg = getRegisterOrZero(Reg, HasSVE))
+ GPRsToZero.set(XReg);
+ } else if (AArch64::FPR128RegClass.contains(Reg) ||
+ AArch64::FPR64RegClass.contains(Reg) ||
+ AArch64::FPR32RegClass.contains(Reg) ||
+ AArch64::FPR16RegClass.contains(Reg) ||
+ AArch64::FPR8RegClass.contains(Reg)) {
+ // For FPRs,
+ if (MCRegister XReg = getRegisterOrZero(Reg, HasSVE))
+ FPRsToZero.set(XReg);
+ }
+ }
+
+ const AArch64InstrInfo &TII = *STI.getInstrInfo();
+
+ // Zero out GPRs.
+ for (MCRegister Reg : GPRsToZero.set_bits())
+ BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVi64imm), Reg).addImm(0);
+
+ // Zero out FP/vector registers.
+ for (MCRegister Reg : FPRsToZero.set_bits())
+ if (HasSVE)
+ BuildMI(MBB, MBBI, DL, TII.get(AArch64::DUP_ZI_D), Reg)
+ .addImm(0)
+ .addImm(0);
+ else
+ BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVIv2d_ns), Reg).addImm(0);
+
+ if (HasSVE) {
+ for (MCRegister PReg :
+ {AArch64::P0, AArch64::P1, AArch64::P2, AArch64::P3, AArch64::P4,
+ AArch64::P5, AArch64::P6, AArch64::P7, AArch64::P8, AArch64::P9,
+ AArch64::P10, AArch64::P11, AArch64::P12, AArch64::P13, AArch64::P14,
+ AArch64::P15}) {
+ if (RegsToZero[PReg])
+ BuildMI(MBB, MBBI, DL, TII.get(AArch64::PFALSE), PReg);
+ }
}
}
@@ -881,16 +1120,9 @@ static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI,
static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc,
- bool NeedsWinCFI, bool *HasWinCFI, bool InProlog = true) {
- // Ignore instructions that do not operate on SP, i.e. shadow call stack
- // instructions and associated CFI instruction.
- while (MBBI->getOpcode() == AArch64::STRXpost ||
- MBBI->getOpcode() == AArch64::LDRXpre ||
- MBBI->getOpcode() == AArch64::CFI_INSTRUCTION) {
- if (MBBI->getOpcode() != AArch64::CFI_INSTRUCTION)
- assert(MBBI->getOperand(0).getReg() != AArch64::SP);
- ++MBBI;
- }
+ bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI,
+ MachineInstr::MIFlag FrameFlag = MachineInstr::FrameSetup,
+ int CFAOffset = 0) {
unsigned NewOpc;
switch (MBBI->getOpcode()) {
default:
@@ -949,12 +1181,14 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
// If the first store isn't right where we want SP then we can't fold the
// update in so create a normal arithmetic instruction instead.
+ MachineFunction &MF = *MBB.getParent();
if (MBBI->getOperand(MBBI->getNumOperands() - 1).getImm() != 0 ||
CSStackSizeInc < MinOffset || CSStackSizeInc > MaxOffset) {
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
- StackOffset::getFixed(CSStackSizeInc), TII,
- InProlog ? MachineInstr::FrameSetup
- : MachineInstr::FrameDestroy);
+ StackOffset::getFixed(CSStackSizeInc), TII, FrameFlag,
+ false, false, nullptr, EmitCFI,
+ StackOffset::getFixed(CFAOffset));
+
return std::prev(MBBI);
}
@@ -981,8 +1215,15 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
// Generate a new SEH code that corresponds to the new instruction.
if (NeedsWinCFI) {
*HasWinCFI = true;
- InsertSEH(*MIB, *TII,
- InProlog ? MachineInstr::FrameSetup : MachineInstr::FrameDestroy);
+ InsertSEH(*MIB, *TII, FrameFlag);
+ }
+
+ if (EmitCFI) {
+ unsigned CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset - CSStackSizeInc));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(FrameFlag);
}
return std::prev(MBB.erase(MBBI));
@@ -998,16 +1239,6 @@ static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
return;
unsigned Opc = MI.getOpcode();
-
- // Ignore instructions that do not operate on SP, i.e. shadow call stack
- // instructions and associated CFI instruction.
- if (Opc == AArch64::STRXpost || Opc == AArch64::LDRXpre ||
- Opc == AArch64::CFI_INSTRUCTION) {
- if (Opc != AArch64::CFI_INSTRUCTION)
- assert(MI.getOperand(0).getReg() != AArch64::SP);
- return;
- }
-
unsigned Scale;
switch (Opc) {
case AArch64::STPXi:
@@ -1049,38 +1280,6 @@ static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
}
}
-static void adaptForLdStOpt(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator FirstSPPopI,
- MachineBasicBlock::iterator LastPopI) {
- // Sometimes (when we restore in the same order as we save), we can end up
- // with code like this:
- //
- // ldp x26, x25, [sp]
- // ldp x24, x23, [sp, #16]
- // ldp x22, x21, [sp, #32]
- // ldp x20, x19, [sp, #48]
- // add sp, sp, #64
- //
- // In this case, it is always better to put the first ldp at the end, so
- // that the load-store optimizer can run and merge the ldp and the add into
- // a post-index ldp.
- // If we managed to grab the first pop instruction, move it to the end.
- if (ReverseCSRRestoreSeq)
- MBB.splice(FirstSPPopI, &MBB, LastPopI);
- // We should end up with something like this now:
- //
- // ldp x24, x23, [sp, #16]
- // ldp x22, x21, [sp, #32]
- // ldp x20, x19, [sp, #48]
- // ldp x26, x25, [sp]
- // add sp, sp, #64
- //
- // and the load-store optimizer can merge the last two instructions into:
- //
- // ldp x26, x25, [sp], #64
- //
-}
-
static bool isTargetWindows(const MachineFunction &MF) {
return MF.getSubtarget<AArch64Subtarget>().isTargetWindows();
}
@@ -1099,6 +1298,80 @@ static bool IsSVECalleeSave(MachineBasicBlock::iterator I) {
}
}
+static bool needsShadowCallStackPrologueEpilogue(MachineFunction &MF) {
+ if (!(llvm::any_of(
+ MF.getFrameInfo().getCalleeSavedInfo(),
+ [](const auto &Info) { return Info.getReg() == AArch64::LR; }) &&
+ MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack)))
+ return false;
+
+ if (!MF.getSubtarget<AArch64Subtarget>().isXRegisterReserved(18))
+ report_fatal_error("Must reserve x18 to use shadow call stack");
+
+ return true;
+}
+
+static void emitShadowCallStackPrologue(const TargetInstrInfo &TII,
+ MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, bool NeedsWinCFI,
+ bool NeedsUnwindInfo) {
+ // Shadow call stack prolog: str x30, [x18], #8
+ BuildMI(MBB, MBBI, DL, TII.get(AArch64::STRXpost))
+ .addReg(AArch64::X18, RegState::Define)
+ .addReg(AArch64::LR)
+ .addReg(AArch64::X18)
+ .addImm(8)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // This instruction also makes x18 live-in to the entry block.
+ MBB.addLiveIn(AArch64::X18);
+
+ if (NeedsWinCFI)
+ BuildMI(MBB, MBBI, DL, TII.get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ if (NeedsUnwindInfo) {
+ // Emit a CFI instruction that causes 8 to be subtracted from the value of
+ // x18 when unwinding past this frame.
+ static const char CFIInst[] = {
+ dwarf::DW_CFA_val_expression,
+ 18, // register
+ 2, // length
+ static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
+ static_cast<char>(-8) & 0x7f, // addend (sleb128)
+ };
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createEscape(
+ nullptr, StringRef(CFIInst, sizeof(CFIInst))));
+ BuildMI(MBB, MBBI, DL, TII.get(AArch64::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+}
+
+static void emitShadowCallStackEpilogue(const TargetInstrInfo &TII,
+ MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL) {
+ // Shadow call stack epilog: ldr x30, [x18, #-8]!
+ BuildMI(MBB, MBBI, DL, TII.get(AArch64::LDRXpre))
+ .addReg(AArch64::X18, RegState::Define)
+ .addReg(AArch64::LR, RegState::Define)
+ .addReg(AArch64::X18)
+ .addImm(-8)
+ .setMIFlag(MachineInstr::FrameDestroy);
+
+ if (MF.getInfo<AArch64FunctionInfo>()->needsAsyncDwarfUnwindInfo()) {
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, 18));
+ BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameDestroy);
+ }
+}
+
void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -1109,8 +1382,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineModuleInfo &MMI = MF.getMMI();
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
- bool needsFrameMoves =
- MF.needsFrameMoves() && !MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
+ bool EmitCFI = AFI->needsDwarfUnwindInfo();
bool HasFP = hasFP(MF);
bool NeedsWinCFI = needsWinCFI(MF);
bool HasWinCFI = false;
@@ -1128,8 +1400,11 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
DebugLoc DL;
const auto &MFnI = *MF.getInfo<AArch64FunctionInfo>();
- if (MFnI.shouldSignReturnAddress()) {
+ if (needsShadowCallStackPrologueEpilogue(MF))
+ emitShadowCallStackPrologue(*TII, MF, MBB, MBBI, DL, NeedsWinCFI,
+ MFnI.needsDwarfUnwindInfo());
+ if (MFnI.shouldSignReturnAddress()) {
unsigned PACI;
if (MFnI.shouldSignWithBKey()) {
BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITBKEY))
@@ -1145,12 +1420,17 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
.addReg(AArch64::LR)
.addReg(AArch64::SP, RegState::InternalRead);
MI.setMIFlag(MachineInstr::FrameSetup);
-
- unsigned CFIIndex =
- MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
- BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex)
- .setMIFlags(MachineInstr::FrameSetup);
+ if (EmitCFI) {
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+ }
+ if (EmitCFI && MFnI.isMTETagged()) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITMTETAGGED))
+ .setMIFlag(MachineInstr::FrameSetup);
}
// We signal the presence of a Swift extended frame to external tools by
@@ -1227,7 +1507,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(-NumBytes), TII,
MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
- if (needsFrameMoves) {
+ if (EmitCFI) {
// Label used to tie together the PROLOG_LABEL and the MachineMoves.
MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
// Encode the stack size of the leaf function.
@@ -1261,14 +1541,16 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
assert(!SVEStackSize && "Cannot combine SP bump with SVE");
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(-NumBytes), TII,
- MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
+ MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI,
+ EmitCFI);
NumBytes = 0;
} else if (HomPrologEpilog) {
// Stack has been already adjusted.
NumBytes -= PrologueSaveSize;
} else if (PrologueSaveSize != 0) {
MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(
- MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI, &HasWinCFI);
+ MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI, &HasWinCFI,
+ EmitCFI);
NumBytes -= PrologueSaveSize;
}
assert(NumBytes >= 0 && "Negative stack allocation size!?");
@@ -1322,8 +1604,27 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
StackOffset::getFixed(FPOffset), TII,
MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
}
+ if (EmitCFI) {
+ // Define the current CFA rule to use the provided FP.
+ const int OffsetToFirstCalleeSaveFromFP =
+ AFI->getCalleeSaveBaseToFrameRecordOffset() -
+ AFI->getCalleeSavedStackSize();
+ Register FramePtr = RegInfo->getFrameRegister(MF);
+ unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
+ nullptr, Reg, FixedObject - OffsetToFirstCalleeSaveFromFP));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
}
+ // Now emit the moves for whatever callee saved regs we have (including FP,
+ // LR if those are saved). Frame instructions for SVE register are emitted
+ // later, after the instruction which actually save SVE regs.
+ if (EmitCFI)
+ emitCalleeSavedGPRLocations(MBB, MBBI);
+
if (windowsRequiresStackProbe(MF, NumBytes)) {
uint64_t NumWords = NumBytes >> 4;
if (NeedsWinCFI) {
@@ -1436,14 +1737,21 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
}
// Allocate space for the callee saves (if any).
- emitFrameOffset(MBB, CalleeSavesBegin, DL, AArch64::SP, AArch64::SP,
- -AllocateBefore, TII,
- MachineInstr::FrameSetup);
+ emitFrameOffset(
+ MBB, CalleeSavesBegin, DL, AArch64::SP, AArch64::SP, -AllocateBefore, TII,
+ MachineInstr::FrameSetup, false, false, nullptr,
+ EmitCFI && !HasFP && AllocateBefore,
+ StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes));
+
+ if (EmitCFI)
+ emitCalleeSavedSVELocations(MBB, CalleeSavesEnd);
// Finally allocate remaining SVE stack space.
emitFrameOffset(MBB, CalleeSavesEnd, DL, AArch64::SP, AArch64::SP,
- -AllocateAfter, TII,
- MachineInstr::FrameSetup);
+ -AllocateAfter, TII, MachineInstr::FrameSetup, false, false,
+ nullptr, EmitCFI && !HasFP && AllocateAfter,
+ AllocateBefore + StackOffset::getFixed(
+ (int64_t)MFI.getStackSize() - NumBytes));
// Allocate space for the rest of the frame.
if (NumBytes) {
@@ -1458,14 +1766,17 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
}
// If we're a leaf function, try using the red zone.
- if (!canUseRedZone(MF))
+ if (!canUseRedZone(MF)) {
// FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
// the correct value here, as NumBytes also includes padding bytes,
// which shouldn't be counted here.
- emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP,
- StackOffset::getFixed(-NumBytes), TII,
- MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
-
+ emitFrameOffset(
+ MBB, MBBI, DL, scratchSPReg, AArch64::SP,
+ StackOffset::getFixed(-NumBytes), TII, MachineInstr::FrameSetup,
+ false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
+ SVEStackSize +
+ StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes));
+ }
if (NeedsRealignment) {
const unsigned NrBitsToZero = Log2(MFI.getMaxAlign());
assert(NrBitsToZero > 1);
@@ -1532,109 +1843,6 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
MBB.addLiveIn(AArch64::X1);
}
}
-
- if (needsFrameMoves) {
- // An example of the prologue:
- //
- // .globl __foo
- // .align 2
- // __foo:
- // Ltmp0:
- // .cfi_startproc
- // .cfi_personality 155, ___gxx_personality_v0
- // Leh_func_begin:
- // .cfi_lsda 16, Lexception33
- //
- // stp xa,bx, [sp, -#offset]!
- // ...
- // stp x28, x27, [sp, #offset-32]
- // stp fp, lr, [sp, #offset-16]
- // add fp, sp, #offset - 16
- // sub sp, sp, #1360
- //
- // The Stack:
- // +-------------------------------------------+
- // 10000 | ........ | ........ | ........ | ........ |
- // 10004 | ........ | ........ | ........ | ........ |
- // +-------------------------------------------+
- // 10008 | ........ | ........ | ........ | ........ |
- // 1000c | ........ | ........ | ........ | ........ |
- // +===========================================+
- // 10010 | X28 Register |
- // 10014 | X28 Register |
- // +-------------------------------------------+
- // 10018 | X27 Register |
- // 1001c | X27 Register |
- // +===========================================+
- // 10020 | Frame Pointer |
- // 10024 | Frame Pointer |
- // +-------------------------------------------+
- // 10028 | Link Register |
- // 1002c | Link Register |
- // +===========================================+
- // 10030 | ........ | ........ | ........ | ........ |
- // 10034 | ........ | ........ | ........ | ........ |
- // +-------------------------------------------+
- // 10038 | ........ | ........ | ........ | ........ |
- // 1003c | ........ | ........ | ........ | ........ |
- // +-------------------------------------------+
- //
- // [sp] = 10030 :: >>initial value<<
- // sp = 10020 :: stp fp, lr, [sp, #-16]!
- // fp = sp == 10020 :: mov fp, sp
- // [sp] == 10020 :: stp x28, x27, [sp, #-16]!
- // sp == 10010 :: >>final value<<
- //
- // The frame pointer (w29) points to address 10020. If we use an offset of
- // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24
- // for w27, and -32 for w28:
- //
- // Ltmp1:
- // .cfi_def_cfa w29, 16
- // Ltmp2:
- // .cfi_offset w30, -8
- // Ltmp3:
- // .cfi_offset w29, -16
- // Ltmp4:
- // .cfi_offset w27, -24
- // Ltmp5:
- // .cfi_offset w28, -32
-
- if (HasFP) {
- const int OffsetToFirstCalleeSaveFromFP =
- AFI->getCalleeSaveBaseToFrameRecordOffset() -
- AFI->getCalleeSavedStackSize();
- Register FramePtr = RegInfo->getFrameRegister(MF);
-
- // Define the current CFA rule to use the provided FP.
- unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
- unsigned CFIIndex = MF.addFrameInst(
- MCCFIInstruction::cfiDefCfa(nullptr, Reg, FixedObject - OffsetToFirstCalleeSaveFromFP));
- BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex)
- .setMIFlags(MachineInstr::FrameSetup);
- } else {
- unsigned CFIIndex;
- if (SVEStackSize) {
- const TargetSubtargetInfo &STI = MF.getSubtarget();
- const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
- StackOffset TotalSize =
- SVEStackSize + StackOffset::getFixed((int64_t)MFI.getStackSize());
- CFIIndex = MF.addFrameInst(createDefCFAExpressionFromSP(TRI, TotalSize));
- } else {
- // Encode the stack size of the leaf function.
- CFIIndex = MF.addFrameInst(
- MCCFIInstruction::cfiDefCfaOffset(nullptr, MFI.getStackSize()));
- }
- BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex)
- .setMIFlags(MachineInstr::FrameSetup);
- }
-
- // Now emit the moves for whatever callee saved regs we have (including FP,
- // LR if those are saved).
- emitCalleeSavedFrameMoves(MBB, MBBI);
- }
}
static void InsertReturnAddressAuth(MachineFunction &MF,
@@ -1653,7 +1861,8 @@ static void InsertReturnAddressAuth(MachineFunction &MF,
// The AUTIASP instruction assembles to a hint instruction before v8.3a so
// this instruction can safely used for any v8a architecture.
// From v8.3a onwards there are optimised authenticate LR and return
- // instructions, namely RETA{A,B}, that can be used instead.
+ // instructions, namely RETA{A,B}, that can be used instead. In this case the
+ // DW_CFA_AARCH64_negate_ra_state can't be emitted.
if (Subtarget.hasPAuth() && MBBI != MBB.end() &&
MBBI->getOpcode() == AArch64::RET_ReallyLR) {
BuildMI(MBB, MBBI, DL,
@@ -1665,6 +1874,12 @@ static void InsertReturnAddressAuth(MachineFunction &MF,
MBB, MBBI, DL,
TII->get(MFI.shouldSignWithBKey() ? AArch64::AUTIBSP : AArch64::AUTIASP))
.setMIFlag(MachineInstr::FrameDestroy);
+
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameDestroy);
}
}
@@ -1686,6 +1901,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
DebugLoc DL;
bool NeedsWinCFI = needsWinCFI(MF);
+ bool EmitCFI = MF.getInfo<AArch64FunctionInfo>()->needsAsyncDwarfUnwindInfo();
bool HasWinCFI = false;
bool IsFunclet = false;
auto WinCFI = make_scope_exit([&]() { assert(HasWinCFI == MF.hasWinCFI()); });
@@ -1695,6 +1911,14 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
IsFunclet = isFuncletReturnInstr(*MBBI);
}
+ auto FinishingTouches = make_scope_exit([&]() {
+ InsertReturnAddressAuth(MF, MBB);
+ if (needsShadowCallStackPrologueEpilogue(MF))
+ emitShadowCallStackEpilogue(*TII, MF, MBB, MBB.getFirstTerminator(), DL);
+ if (EmitCFI)
+ emitCalleeSavedGPRRestores(MBB, MBB.getFirstTerminator());
+ });
+
int64_t NumBytes = IsFunclet ? getWinEHFuncletFrameSize(MF)
: MFI.getStackSize();
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
@@ -1707,36 +1931,6 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// How much of the stack used by incoming arguments this function is expected
// to restore in this particular epilogue.
int64_t ArgumentStackToRestore = getArgumentStackToRestore(MF, MBB);
-
- // The stack frame should be like below,
- //
- // ---------------------- ---
- // | | |
- // | BytesInStackArgArea| CalleeArgStackSize
- // | (NumReusableBytes) | (of tail call)
- // | | ---
- // | | |
- // ---------------------| --- |
- // | | | |
- // | CalleeSavedReg | | |
- // | (CalleeSavedStackSize)| | |
- // | | | |
- // ---------------------| | NumBytes
- // | | StackSize (StackAdjustUp)
- // | LocalStackSize | | |
- // | (covering callee | | |
- // | args) | | |
- // | | | |
- // ---------------------- --- ---
- //
- // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize
- // = StackSize + ArgumentPopSize
- //
- // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
- // it as the 2nd argument of AArch64ISD::TC_RETURN.
-
- auto Cleanup = make_scope_exit([&] { InsertReturnAddressAuth(MF, MBB); });
-
bool IsWin64 =
Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
@@ -1771,9 +1965,11 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(MBB, NumBytes);
// Assume we can't combine the last pop with the sp restore.
+ bool CombineAfterCSRBump = false;
if (!CombineSPBump && PrologueSaveSize != 0) {
MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());
- while (AArch64InstrInfo::isSEHInstruction(*Pop))
+ while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION ||
+ AArch64InstrInfo::isSEHInstruction(*Pop))
Pop = std::prev(Pop);
// Converting the last ldp to a post-index ldp is valid only if the last
// ldp's offset is 0.
@@ -1781,15 +1977,17 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// If the offset is 0 and the AfterCSR pop is not actually trying to
// allocate more stack for arguments (in space that an untimely interrupt
// may clobber), convert it to a post-index ldp.
- if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0)
+ if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) {
convertCalleeSaveRestoreToSPPrePostIncDec(
- MBB, Pop, DL, TII, PrologueSaveSize, NeedsWinCFI, &HasWinCFI, false);
- else {
+ MBB, Pop, DL, TII, PrologueSaveSize, NeedsWinCFI, &HasWinCFI, EmitCFI,
+ MachineInstr::FrameDestroy, PrologueSaveSize);
+ } else {
// If not, make sure to emit an add after the last ldp.
// We're doing this by transfering the size to be restored from the
// adjustment *before* the CSR pops to the adjustment *after* the CSR
// pops.
AfterCSRPopSize += PrologueSaveSize;
+ CombineAfterCSRBump = true;
}
}
@@ -1822,15 +2020,27 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
}
if (hasFP(MF) && AFI->hasSwiftAsyncContext()) {
- // We need to reset FP to its untagged state on return. Bit 60 is currently
- // used to show the presence of an extended frame.
-
- // BIC x29, x29, #0x1000_0000_0000_0000
- BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::ANDXri),
- AArch64::FP)
- .addUse(AArch64::FP)
- .addImm(0x10fe)
- .setMIFlag(MachineInstr::FrameDestroy);
+ switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
+ case SwiftAsyncFramePointerMode::DeploymentBased:
+ // Avoid the reload as it is GOT relative, and instead fall back to the
+ // hardcoded value below. This allows a mismatch between the OS and
+ // application without immediately terminating on the difference.
+ LLVM_FALLTHROUGH;
+ case SwiftAsyncFramePointerMode::Always:
+ // We need to reset FP to its untagged state on return. Bit 60 is
+ // currently used to show the presence of an extended frame.
+
+ // BIC x29, x29, #0x1000_0000_0000_0000
+ BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::ANDXri),
+ AArch64::FP)
+ .addUse(AArch64::FP)
+ .addImm(0x10fe)
+ .setMIFlag(MachineInstr::FrameDestroy);
+ break;
+
+ case SwiftAsyncFramePointerMode::Never:
+ break;
+ }
}
const StackOffset &SVEStackSize = getSVEStackSize(MF);
@@ -1838,10 +2048,22 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// If there is a single SP update, insert it before the ret and we're done.
if (CombineSPBump) {
assert(!SVEStackSize && "Cannot combine SP bump with SVE");
+
+ // When we are about to restore the CSRs, the CFA register is SP again.
+ if (EmitCFI && hasFP(MF)) {
+ const AArch64RegisterInfo &RegInfo = *Subtarget.getRegisterInfo();
+ unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP, true);
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::cfiDefCfa(nullptr, Reg, NumBytes));
+ BuildMI(MBB, LastPopI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameDestroy);
+ }
+
emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(NumBytes + (int64_t)AfterCSRPopSize),
TII, MachineInstr::FrameDestroy, false, NeedsWinCFI,
- &HasWinCFI);
+ &HasWinCFI, EmitCFI, StackOffset::getFixed(NumBytes));
if (HasWinCFI)
BuildMI(MBB, MBB.getFirstTerminator(), DL,
TII->get(AArch64::SEH_EpilogEnd))
@@ -1873,30 +2095,44 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// Deallocate the SVE area.
if (SVEStackSize) {
- if (AFI->isStackRealigned()) {
- if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize())
+ // If we have stack realignment or variable sized objects on the stack,
+ // restore the stack pointer from the frame pointer prior to SVE CSR
+ // restoration.
+ if (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) {
+ if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
// Set SP to start of SVE callee-save area from which they can
// be reloaded. The code below will deallocate the stack space
// space by moving FP -> SP.
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::FP,
StackOffset::getScalable(-CalleeSavedSize), TII,
MachineInstr::FrameDestroy);
+ }
} else {
if (AFI->getSVECalleeSavedStackSize()) {
// Deallocate the non-SVE locals first before we can deallocate (and
// restore callee saves) from the SVE area.
- emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
- StackOffset::getFixed(NumBytes), TII,
- MachineInstr::FrameDestroy);
+ emitFrameOffset(
+ MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
+ StackOffset::getFixed(NumBytes), TII, MachineInstr::FrameDestroy,
+ false, false, nullptr, EmitCFI && !hasFP(MF),
+ SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize));
NumBytes = 0;
}
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
- DeallocateBefore, TII, MachineInstr::FrameDestroy);
+ DeallocateBefore, TII, MachineInstr::FrameDestroy, false,
+ false, nullptr, EmitCFI && !hasFP(MF),
+ SVEStackSize +
+ StackOffset::getFixed(NumBytes + PrologueSaveSize));
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
- DeallocateAfter, TII, MachineInstr::FrameDestroy);
+ DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
+ false, nullptr, EmitCFI && !hasFP(MF),
+ DeallocateAfter +
+ StackOffset::getFixed(NumBytes + PrologueSaveSize));
}
+ if (EmitCFI)
+ emitCalleeSavedSVERestores(MBB, RestoreEnd);
}
if (!hasFP(MF)) {
@@ -1906,23 +2142,24 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
if (RedZone && AfterCSRPopSize == 0)
return;
+ // Pop the local variables off the stack. If there are no callee-saved
+ // registers, it means we are actually positioned at the terminator and can
+ // combine stack increment for the locals and the stack increment for
+ // callee-popped arguments into (possibly) a single instruction and be done.
bool NoCalleeSaveRestore = PrologueSaveSize == 0;
int64_t StackRestoreBytes = RedZone ? 0 : NumBytes;
if (NoCalleeSaveRestore)
StackRestoreBytes += AfterCSRPopSize;
+ emitFrameOffset(
+ MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
+ StackOffset::getFixed(StackRestoreBytes), TII,
+ MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI, EmitCFI,
+ StackOffset::getFixed((RedZone ? 0 : NumBytes) + PrologueSaveSize));
+
// If we were able to combine the local stack pop with the argument pop,
// then we're done.
- bool Done = NoCalleeSaveRestore || AfterCSRPopSize == 0;
-
- // If we're done after this, make sure to help the load store optimizer.
- if (Done)
- adaptForLdStOpt(MBB, MBB.getFirstTerminator(), LastPopI);
-
- emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
- StackOffset::getFixed(StackRestoreBytes), TII,
- MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
- if (Done) {
+ if (NoCalleeSaveRestore || AfterCSRPopSize == 0) {
if (HasWinCFI) {
BuildMI(MBB, MBB.getFirstTerminator(), DL,
TII->get(AArch64::SEH_EpilogEnd))
@@ -1948,29 +2185,29 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
StackOffset::getFixed(NumBytes), TII,
MachineInstr::FrameDestroy, false, NeedsWinCFI);
+ // When we are about to restore the CSRs, the CFA register is SP again.
+ if (EmitCFI && hasFP(MF)) {
+ const AArch64RegisterInfo &RegInfo = *Subtarget.getRegisterInfo();
+ unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP, true);
+ unsigned CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::cfiDefCfa(nullptr, Reg, PrologueSaveSize));
+ BuildMI(MBB, LastPopI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameDestroy);
+ }
+
// This must be placed after the callee-save restore code because that code
// assumes the SP is at the same location as it was after the callee-save save
// code in the prologue.
if (AfterCSRPopSize) {
assert(AfterCSRPopSize > 0 && "attempting to reallocate arg stack that an "
"interrupt may have clobbered");
- // Find an insertion point for the first ldp so that it goes before the
- // shadow call stack epilog instruction. This ensures that the restore of
- // lr from x18 is placed after the restore from sp.
- auto FirstSPPopI = MBB.getFirstTerminator();
- while (FirstSPPopI != Begin) {
- auto Prev = std::prev(FirstSPPopI);
- if (Prev->getOpcode() != AArch64::LDRXpre ||
- Prev->getOperand(0).getReg() == AArch64::SP)
- break;
- FirstSPPopI = Prev;
- }
- adaptForLdStOpt(MBB, FirstSPPopI, LastPopI);
-
- emitFrameOffset(MBB, FirstSPPopI, DL, AArch64::SP, AArch64::SP,
- StackOffset::getFixed(AfterCSRPopSize), TII,
- MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
+ emitFrameOffset(
+ MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
+ StackOffset::getFixed(AfterCSRPopSize), TII, MachineInstr::FrameDestroy,
+ false, NeedsWinCFI, &HasWinCFI, EmitCFI,
+ StackOffset::getFixed(CombineAfterCSRBump ? PrologueSaveSize : 0));
}
if (HasWinCFI)
BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd))
@@ -2061,8 +2298,9 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
// right thing for the emergency spill slot.
bool UseFP = false;
if (AFI->hasStackFrame() && !isSVE) {
- // We shouldn't prefer using the FP when there is an SVE area
- // in between the FP and the non-SVE locals/spills.
+ // We shouldn't prefer using the FP to access fixed-sized stack objects when
+ // there are scalable (SVE) objects in between the FP and the fixed-sized
+ // objects.
PreferFP &= !SVEStackSize;
// Note: Keeping the following as multiple 'if' statements rather than
@@ -2083,7 +2321,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
// offsets is smaller than for positive ones. If an offset is available
// via the FP and the SP, use whichever is closest.
bool FPOffsetFits = !ForSimm || FPOffset >= -256;
- PreferFP |= Offset > -FPOffset;
+ PreferFP |= Offset > -FPOffset && !SVEStackSize;
if (MFI.hasVarSizedObjects()) {
// If we have variable sized objects, we can use either FP or BP, as the
@@ -2270,7 +2508,7 @@ struct RegPairInfo {
static void computeCalleeSaveRegisterPairs(
MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI,
const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs,
- bool &NeedShadowCallStackProlog, bool NeedsFrameRecord) {
+ bool NeedsFrameRecord) {
if (CSI.empty())
return;
@@ -2349,15 +2587,6 @@ static void computeCalleeSaveRegisterPairs(
}
}
- // If either of the registers to be saved is the lr register, it means that
- // we also need to save lr in the shadow call stack.
- if ((RPI.Reg1 == AArch64::LR || RPI.Reg2 == AArch64::LR) &&
- MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack)) {
- if (!MF.getSubtarget<AArch64Subtarget>().isXRegisterReserved(18))
- report_fatal_error("Must reserve x18 to use shadow call stack");
- NeedShadowCallStackProlog = true;
- }
-
// GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
// list to come in sorted by frame index so that we can issue the store
// pair instructions directly. Assert if we see anything otherwise.
@@ -2476,43 +2705,9 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
DebugLoc DL;
SmallVector<RegPairInfo, 8> RegPairs;
- bool NeedShadowCallStackProlog = false;
- computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs,
- NeedShadowCallStackProlog, hasFP(MF));
- const MachineRegisterInfo &MRI = MF.getRegInfo();
-
- if (NeedShadowCallStackProlog) {
- // Shadow call stack prolog: str x30, [x18], #8
- BuildMI(MBB, MI, DL, TII.get(AArch64::STRXpost))
- .addReg(AArch64::X18, RegState::Define)
- .addReg(AArch64::LR)
- .addReg(AArch64::X18)
- .addImm(8)
- .setMIFlag(MachineInstr::FrameSetup);
-
- if (NeedsWinCFI)
- BuildMI(MBB, MI, DL, TII.get(AArch64::SEH_Nop))
- .setMIFlag(MachineInstr::FrameSetup);
-
- // Emit a CFI instruction that causes 8 to be subtracted from the value of
- // x18 when unwinding past this frame.
- static const char CFIInst[] = {
- dwarf::DW_CFA_val_expression,
- 18, // register
- 2, // length
- static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
- static_cast<char>(-8) & 0x7f, // addend (sleb128)
- };
- unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createEscape(
- nullptr, StringRef(CFIInst, sizeof(CFIInst))));
- BuildMI(MBB, MI, DL, TII.get(AArch64::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex)
- .setMIFlag(MachineInstr::FrameSetup);
-
- // This instruction also makes x18 live-in to the entry block.
- MBB.addLiveIn(AArch64::X18);
- }
+ computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, hasFP(MF));
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
if (homogeneousPrologEpilog(MF)) {
auto MIB = BuildMI(MBB, MI, DL, TII.get(AArch64::HOM_Prolog))
.setMIFlag(MachineInstr::FrameSetup);
@@ -2622,7 +2817,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
}
bool AArch64FrameLowering::restoreCalleeSavedRegisters(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
@@ -2630,14 +2825,12 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
SmallVector<RegPairInfo, 8> RegPairs;
bool NeedsWinCFI = needsWinCFI(MF);
- if (MI != MBB.end())
- DL = MI->getDebugLoc();
+ if (MBBI != MBB.end())
+ DL = MBBI->getDebugLoc();
- bool NeedShadowCallStackProlog = false;
- computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs,
- NeedShadowCallStackProlog, hasFP(MF));
+ computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, hasFP(MF));
- auto EmitMI = [&](const RegPairInfo &RPI) {
+ auto EmitMI = [&](const RegPairInfo &RPI) -> MachineBasicBlock::iterator {
unsigned Reg1 = RPI.Reg1;
unsigned Reg2 = RPI.Reg2;
@@ -2694,7 +2887,7 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
std::swap(Reg1, Reg2);
std::swap(FrameIdxReg1, FrameIdxReg2);
}
- MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc));
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII.get(LdrOpc));
if (RPI.isPaired()) {
MIB.addReg(Reg2, getDefRegState(true));
MIB.addMemOperand(MF.getMachineMemOperand(
@@ -2711,6 +2904,8 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
MachineMemOperand::MOLoad, Size, Alignment));
if (NeedsWinCFI)
InsertSEH(MIB, TII, MachineInstr::FrameDestroy);
+
+ return MIB->getIterator();
};
// SVE objects are always restored in reverse order.
@@ -2718,31 +2913,33 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
if (RPI.isScalable())
EmitMI(RPI);
- if (ReverseCSRRestoreSeq) {
- for (const RegPairInfo &RPI : reverse(RegPairs))
- if (!RPI.isScalable())
- EmitMI(RPI);
- } else if (homogeneousPrologEpilog(MF, &MBB)) {
- auto MIB = BuildMI(MBB, MI, DL, TII.get(AArch64::HOM_Epilog))
+ if (homogeneousPrologEpilog(MF, &MBB)) {
+ auto MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::HOM_Epilog))
.setMIFlag(MachineInstr::FrameDestroy);
for (auto &RPI : RegPairs) {
MIB.addReg(RPI.Reg1, RegState::Define);
MIB.addReg(RPI.Reg2, RegState::Define);
}
return true;
- } else
- for (const RegPairInfo &RPI : RegPairs)
- if (!RPI.isScalable())
- EmitMI(RPI);
-
- if (NeedShadowCallStackProlog) {
- // Shadow call stack epilog: ldr x30, [x18, #-8]!
- BuildMI(MBB, MI, DL, TII.get(AArch64::LDRXpre))
- .addReg(AArch64::X18, RegState::Define)
- .addReg(AArch64::LR, RegState::Define)
- .addReg(AArch64::X18)
- .addImm(-8)
- .setMIFlag(MachineInstr::FrameDestroy);
+ }
+
+ if (ReverseCSRRestoreSeq) {
+ MachineBasicBlock::iterator First = MBB.end();
+ for (const RegPairInfo &RPI : reverse(RegPairs)) {
+ if (RPI.isScalable())
+ continue;
+ MachineBasicBlock::iterator It = EmitMI(RPI);
+ if (First == MBB.end())
+ First = It;
+ }
+ if (First != MBB.end())
+ MBB.splice(MBBI, &MBB, First);
+ } else {
+ for (const RegPairInfo &RPI : RegPairs) {
+ if (RPI.isScalable())
+ continue;
+ (void)EmitMI(RPI);
+ }
}
return true;
@@ -2941,6 +3138,15 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
// stack slots for them.
MachineFrameInfo &MFI = MF.getFrameInfo();
auto *AFI = MF.getInfo<AArch64FunctionInfo>();
+
+ bool UsesWinAAPCS = isTargetWindows(MF);
+ if (UsesWinAAPCS && hasFP(MF) && AFI->hasSwiftAsyncContext()) {
+ int FrameIdx = MFI.CreateStackObject(8, Align(16), true);
+ AFI->setSwiftAsyncContextFrameIdx(FrameIdx);
+ if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
+ if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
+ }
+
for (auto &CS : CSI) {
Register Reg = CS.getReg();
const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
@@ -2954,7 +3160,8 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
// Grab 8 bytes below FP for the extended asynchronous frame info.
- if (hasFP(MF) && AFI->hasSwiftAsyncContext() && Reg == AArch64::FP) {
+ if (hasFP(MF) && AFI->hasSwiftAsyncContext() && !UsesWinAAPCS &&
+ Reg == AArch64::FP) {
FrameIdx = MFI.CreateStackObject(8, Alignment, true);
AFI->setSwiftAsyncContextFrameIdx(FrameIdx);
if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
@@ -3190,7 +3397,7 @@ public:
// instructions. May skip if the replacement is not profitable. May invalidate
// the input iterator and replace it with a valid one.
void emitCode(MachineBasicBlock::iterator &InsertI,
- const AArch64FrameLowering *TFI, bool IsLast);
+ const AArch64FrameLowering *TFI, bool TryMergeSPUpdate);
};
void TagStoreEdit::emitUnrolled(MachineBasicBlock::iterator InsertI) {
@@ -3329,7 +3536,8 @@ void mergeMemRefs(const SmallVectorImpl<TagStoreInstr> &TSE,
}
void TagStoreEdit::emitCode(MachineBasicBlock::iterator &InsertI,
- const AArch64FrameLowering *TFI, bool IsLast) {
+ const AArch64FrameLowering *TFI,
+ bool TryMergeSPUpdate) {
if (TagStores.empty())
return;
TagStoreInstr &FirstTagStore = TagStores[0];
@@ -3359,8 +3567,8 @@ void TagStoreEdit::emitCode(MachineBasicBlock::iterator &InsertI,
emitUnrolled(InsertI);
} else {
MachineInstr *UpdateInstr = nullptr;
- int64_t TotalOffset;
- if (IsLast) {
+ int64_t TotalOffset = 0;
+ if (TryMergeSPUpdate) {
// See if we can merge base register update into the STGloop.
// This is done in AArch64LoadStoreOptimizer for "normal" stores,
// but STGloop is way too unusual for that, and also it only
@@ -3505,7 +3713,7 @@ MachineBasicBlock::iterator tryMergeAdjacentSTG(MachineBasicBlock::iterator II,
for (auto &Instr : Instrs) {
if (EndOffset && *EndOffset != Instr.Offset) {
// Found a gap.
- TSE.emitCode(InsertI, TFI, /*IsLast = */ false);
+ TSE.emitCode(InsertI, TFI, /*TryMergeSPUpdate = */ false);
TSE.clear();
}
@@ -3513,7 +3721,11 @@ MachineBasicBlock::iterator tryMergeAdjacentSTG(MachineBasicBlock::iterator II,
EndOffset = Instr.Offset + Instr.Size;
}
- TSE.emitCode(InsertI, TFI, /*IsLast = */ true);
+ // Multiple FP/SP updates in a loop cannot be described by CFI instructions.
+ TSE.emitCode(InsertI, TFI, /*TryMergeSPUpdate = */
+ !MBB->getParent()
+ ->getInfo<AArch64FunctionInfo>()
+ ->needsAsyncDwarfUnwindInfo());
return InsertI;
}
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index 31f57cbc49f2..f59860a24d9b 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -29,6 +29,8 @@ public:
void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) const;
+ void resetCFIToInitialState(MachineBasicBlock &MBB) const override;
+
MachineBasicBlock::iterator
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const override;
@@ -141,13 +143,20 @@ private:
int64_t assignSVEStackObjectOffsets(MachineFrameInfo &MF,
int &MinCSFrameIndex,
int &MaxCSFrameIndex) const;
- MCCFIInstruction
- createDefCFAExpressionFromSP(const TargetRegisterInfo &TRI,
- const StackOffset &OffsetFromSP) const;
- MCCFIInstruction createCfaOffset(const TargetRegisterInfo &MRI, unsigned DwarfReg,
- const StackOffset &OffsetFromDefCFA) const;
bool shouldCombineCSRLocalStackBumpInEpilogue(MachineBasicBlock &MBB,
unsigned StackBumpBytes) const;
+ void emitCalleeSavedGPRLocations(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) const;
+ void emitCalleeSavedSVELocations(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) const;
+ void emitCalleeSavedGPRRestores(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) const;
+ void emitCalleeSavedSVERestores(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) const;
+
+ /// Emit target zero call-used regs.
+ void emitZeroCallUsedRegs(BitVector RegsToZero,
+ MachineBasicBlock &MBB) const override;
};
} // End llvm namespace
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 899f069abdd4..82fe5772c99d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -159,6 +159,22 @@ public:
return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
}
+ bool SelectExtractHigh(SDValue N, SDValue &Res) {
+ if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0);
+ if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
+ !isa<ConstantSDNode>(N->getOperand(1)))
+ return false;
+ EVT VT = N->getValueType(0);
+ EVT LVT = N->getOperand(0).getValueType();
+ unsigned Index = N->getConstantOperandVal(1);
+ if (!VT.is64BitVector() || !LVT.is128BitVector() ||
+ Index != VT.getVectorNumElements())
+ return false;
+ Res = N->getOperand(0);
+ return true;
+ }
+
bool SelectDupZeroOrUndef(SDValue N) {
switch(N->getOpcode()) {
case ISD::UNDEF:
@@ -204,6 +220,11 @@ public:
return SelectSVEAddSubImm(N, VT, Imm, Shift);
}
+ template <MVT::SimpleValueType VT>
+ bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
+ return SelectSVECpyDupImm(N, VT, Imm, Shift);
+ }
+
template <MVT::SimpleValueType VT, bool Invert = false>
bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
return SelectSVELogicalImm(N, VT, Imm, Invert);
@@ -219,6 +240,16 @@ public:
return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
}
+ bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
+ if (N->getOpcode() != ISD::SPLAT_VECTOR)
+ return false;
+
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1,
+ /* High */ EltVT.getFixedSizeInBits(),
+ /* AllowSaturation */ true, Imm);
+ }
+
// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
template<signed Min, signed Max, signed Scale, bool Shift>
bool SelectCntImm(SDValue N, SDValue &Imm) {
@@ -257,6 +288,15 @@ public:
return false;
}
+ template <unsigned BaseReg> bool ImmToTile(SDValue N, SDValue &Imm) {
+ if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
+ uint64_t C = CI->getZExtValue();
+ Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
+ return true;
+ }
+ return false;
+ }
+
/// Form sequences of consecutive 64/128-bit registers for use in NEON
/// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
/// between 1 and 4 elements. If it contains a single element that is returned
@@ -300,6 +340,11 @@ public:
return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
}
+ template <unsigned Scale>
+ bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
+ return SelectSMETileSlice(N, Scale, Vector, Offset);
+ }
+
void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
@@ -357,10 +402,8 @@ private:
bool SelectCMP_SWAP(SDNode *N);
- bool SelectSVE8BitLslImm(SDValue N, SDValue &Imm, SDValue &Shift);
-
bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
-
+ bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
@@ -370,6 +413,8 @@ private:
bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
SDValue &Offset);
+ bool SelectSMETileSlice(SDValue N, unsigned Scale, SDValue &Vector,
+ SDValue &Offset);
bool SelectAllActivePredicate(SDValue N);
};
@@ -822,9 +867,17 @@ bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
Reg = N.getOperand(0);
- // Don't match if free 32-bit -> 64-bit zext can be used instead.
- if (Ext == AArch64_AM::UXTW &&
- Reg->getValueType(0).getSizeInBits() == 32 && isDef32(*Reg.getNode()))
+ // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
+ // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
+ auto isDef32 = [](SDValue N) {
+ unsigned Opc = N.getOpcode();
+ return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
+ Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
+ Opc != ISD::AssertZext && Opc != ISD::AssertAlign &&
+ Opc != ISD::FREEZE;
+ };
+ if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&
+ isDef32(Reg))
return false;
}
@@ -1852,6 +1905,7 @@ static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
VT = Opd0->getValueType(0);
} else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
Opd0 = Op0->getOperand(0);
+ ClampMSB = (VT == MVT::i32);
} else if (BiggerPattern) {
// Let's pretend a 0 shift right has been performed.
// The resulting code will be at least as good as the original one
@@ -2710,8 +2764,16 @@ static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
// shift the needed bits into place.
SDLoc DL(N);
unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
+ uint64_t LsrImm = LSB;
+ if (Src->hasOneUse() &&
+ isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
+ (LsrImm + LSB) < BitWidth) {
+ Src = Src->getOperand(0);
+ LsrImm += LSB;
+ }
+
SDNode *LSR = CurDAG->getMachineNode(
- ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LSB, DL, VT),
+ ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
// BFXIL is an alias of BFM, so translate to BFM operands.
@@ -2827,15 +2889,15 @@ bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
SDValue Add1 = ShiftAmt->getOperand(1);
uint64_t Add0Imm;
uint64_t Add1Imm;
- // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
- // to avoid the ADD/SUB.
- if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0))
+ if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {
+ // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
+ // to avoid the ADD/SUB.
NewShiftAmt = Add0;
- // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
- // generate a NEG instead of a SUB of a constant.
- else if (ShiftAmt->getOpcode() == ISD::SUB &&
- isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
- (Add0Imm % Size == 0)) {
+ } else if (ShiftAmt->getOpcode() == ISD::SUB &&
+ isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
+ (Add0Imm % Size == 0)) {
+ // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
+ // to generate a NEG instead of a SUB from a constant.
unsigned NegOpc;
unsigned ZeroReg;
EVT SubVT = ShiftAmt->getValueType(0);
@@ -2852,6 +2914,26 @@ bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
MachineSDNode *Neg =
CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
NewShiftAmt = SDValue(Neg, 0);
+ } else if (ShiftAmt->getOpcode() == ISD::SUB &&
+ isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {
+ // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
+ // to generate a NOT instead of a SUB from a constant.
+ unsigned NotOpc;
+ unsigned ZeroReg;
+ EVT SubVT = ShiftAmt->getValueType(0);
+ if (SubVT == MVT::i32) {
+ NotOpc = AArch64::ORNWrr;
+ ZeroReg = AArch64::WZR;
+ } else {
+ assert(SubVT == MVT::i64);
+ NotOpc = AArch64::ORNXrr;
+ ZeroReg = AArch64::XZR;
+ }
+ SDValue Zero =
+ CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
+ MachineSDNode *Not =
+ CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);
+ NewShiftAmt = SDValue(Not, 0);
} else
return false;
} else {
@@ -3108,72 +3190,81 @@ bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
return true;
}
-bool AArch64DAGToDAGISel::SelectSVE8BitLslImm(SDValue N, SDValue &Base,
- SDValue &Offset) {
- auto C = dyn_cast<ConstantSDNode>(N);
- if (!C)
+bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
+ SDValue &Shift) {
+ if (!isa<ConstantSDNode>(N))
return false;
- auto Ty = N->getValueType(0);
-
- int64_t Imm = C->getSExtValue();
SDLoc DL(N);
-
- if ((Imm >= -128) && (Imm <= 127)) {
- Base = CurDAG->getTargetConstant(Imm, DL, Ty);
- Offset = CurDAG->getTargetConstant(0, DL, Ty);
- return true;
- }
-
- if (((Imm % 256) == 0) && (Imm >= -32768) && (Imm <= 32512)) {
- Base = CurDAG->getTargetConstant(Imm/256, DL, Ty);
- Offset = CurDAG->getTargetConstant(8, DL, Ty);
+ uint64_t Val = cast<ConstantSDNode>(N)
+ ->getAPIntValue()
+ .trunc(VT.getFixedSizeInBits())
+ .getZExtValue();
+
+ switch (VT.SimpleTy) {
+ case MVT::i8:
+ // All immediates are supported.
+ Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
+ Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
return true;
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ // Support 8bit unsigned immediates.
+ if (Val <= 255) {
+ Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
+ Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
+ return true;
+ }
+ // Support 16bit unsigned immediates that are a multiple of 256.
+ if (Val <= 65280 && Val % 256 == 0) {
+ Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
+ Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
+ return true;
+ }
+ break;
+ default:
+ break;
}
return false;
}
-bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift) {
- if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
- const int64_t ImmVal = CNode->getSExtValue();
- SDLoc DL(N);
+bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
+ SDValue &Shift) {
+ if (!isa<ConstantSDNode>(N))
+ return false;
- switch (VT.SimpleTy) {
- case MVT::i8:
- // Can always select i8s, no shift, mask the immediate value to
- // deal with sign-extended value from lowering.
+ SDLoc DL(N);
+ int64_t Val = cast<ConstantSDNode>(N)
+ ->getAPIntValue()
+ .trunc(VT.getFixedSizeInBits())
+ .getSExtValue();
+
+ switch (VT.SimpleTy) {
+ case MVT::i8:
+ // All immediates are supported.
+ Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
+ Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
+ return true;
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ // Support 8bit signed immediates.
+ if (Val >= -128 && Val <= 127) {
Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
- Imm = CurDAG->getTargetConstant(ImmVal & 0xFF, DL, MVT::i32);
+ Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
+ return true;
+ }
+ // Support 16bit signed immediates that are a multiple of 256.
+ if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) {
+ Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
+ Imm = CurDAG->getTargetConstant((Val >> 8) & 0xFF, DL, MVT::i32);
return true;
- case MVT::i16:
- // i16 values get sign-extended to 32-bits during lowering.
- if ((ImmVal & 0xFF) == ImmVal) {
- Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
- Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
- return true;
- } else if ((ImmVal & 0xFF) == 0) {
- Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
- Imm = CurDAG->getTargetConstant((ImmVal >> 8) & 0xFF, DL, MVT::i32);
- return true;
- }
- break;
- case MVT::i32:
- case MVT::i64:
- // Range of immediate won't trigger signedness problems for 32/64b.
- if ((ImmVal & 0xFF) == ImmVal) {
- Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
- Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
- return true;
- } else if ((ImmVal & 0xFF00) == ImmVal) {
- Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
- Imm = CurDAG->getTargetConstant(ImmVal >> 8, DL, MVT::i32);
- return true;
- }
- break;
- default:
- break;
}
+ break;
+ default:
+ break;
}
return false;
@@ -3901,7 +3992,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
true);
return;
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
- (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
+ VT == MVT::nxv8bf16) {
SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
true);
return;
@@ -3922,7 +4013,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
true);
return;
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
- (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
+ VT == MVT::nxv8bf16) {
SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
true);
return;
@@ -3943,7 +4034,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
true);
return;
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
- (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
+ VT == MVT::nxv8bf16) {
SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
true);
return;
@@ -4267,7 +4358,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
return;
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
- (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
+ VT == MVT::nxv8bf16) {
SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
return;
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
@@ -4284,7 +4375,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
return;
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
- (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
+ VT == MVT::nxv8bf16) {
SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
return;
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
@@ -4301,7 +4392,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
return;
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
- (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
+ VT == MVT::nxv8bf16) {
SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
return;
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
@@ -4911,7 +5002,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B);
return;
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
- (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
+ VT == MVT::nxv8bf16) {
SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H);
return;
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
@@ -4928,7 +5019,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B);
return;
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
- (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
+ VT == MVT::nxv8bf16) {
SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H);
return;
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
@@ -4945,7 +5036,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B);
return;
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
- (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
+ VT == MVT::nxv8bf16) {
SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H);
return;
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
@@ -5033,6 +5124,10 @@ static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) {
const unsigned IntNo =
cast<ConstantSDNode>(Root->getOperand(1))->getZExtValue();
+ if (IntNo == Intrinsic::aarch64_sme_ldr ||
+ IntNo == Intrinsic::aarch64_sme_str)
+ return MVT::nxv16i8;
+
if (IntNo != Intrinsic::aarch64_sve_prf)
return EVT();
@@ -5051,12 +5146,19 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
SDValue &OffImm) {
const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
const DataLayout &DL = CurDAG->getDataLayout();
+ const MachineFrameInfo &MFI = MF->getFrameInfo();
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
- OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
- return true;
+ // We can only encode VL scaled offsets, so only fold in frame indexes
+ // referencing SVE objects.
+ if (FI == 0 || MFI.getStackID(FI) == TargetStackID::ScalableVector) {
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
+ OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
+ return true;
+ }
+
+ return false;
}
if (MemVT == EVT())
@@ -5083,7 +5185,10 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
+ // We can only encode VL scaled offsets, so only fold in frame indexes
+ // referencing SVE objects.
+ if (FI == 0 || MFI.getStackID(FI) == TargetStackID::ScalableVector)
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
}
OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
@@ -5149,3 +5254,30 @@ bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
return TLI->isAllActivePredicate(*CurDAG, N);
}
+
+bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned Scale,
+ SDValue &Base, SDValue &Offset) {
+ if (N.getOpcode() != ISD::ADD) {
+ Base = N;
+ Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
+ return true;
+ }
+
+ // Process an ADD node.
+ const SDValue LHS = N.getOperand(0);
+ const SDValue RHS = N.getOperand(1);
+
+ if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
+ int64_t ImmOff = C->getSExtValue();
+ unsigned MaxSize = (1 << Scale) - 1;
+
+ if (ImmOff < 0 || ImmOff > MaxSize)
+ return false;
+
+ Base = LHS;
+ Offset = CurDAG->getTargetConstant(ImmOff, SDLoc(N), MVT::i64);
+ return true;
+ }
+
+ return false;
+}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index c539c8617d99..abfe2d507111 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -208,6 +208,7 @@ static bool isMergePassthruOpcode(unsigned Opc) {
case AArch64ISD::BSWAP_MERGE_PASSTHRU:
case AArch64ISD::REVH_MERGE_PASSTHRU:
case AArch64ISD::REVW_MERGE_PASSTHRU:
+ case AArch64ISD::REVD_MERGE_PASSTHRU:
case AArch64ISD::CTLZ_MERGE_PASSTHRU:
case AArch64ISD::CTPOP_MERGE_PASSTHRU:
case AArch64ISD::DUP_MERGE_PASSTHRU:
@@ -289,8 +290,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
addQRTypeForNEON(MVT::v8bf16);
}
- if (Subtarget->hasSVE()) {
+ if (Subtarget->hasSVE() || Subtarget->hasSME()) {
// Add legal sve predicate types
+ addRegisterClass(MVT::nxv1i1, &AArch64::PPRRegClass);
addRegisterClass(MVT::nxv2i1, &AArch64::PPRRegClass);
addRegisterClass(MVT::nxv4i1, &AArch64::PPRRegClass);
addRegisterClass(MVT::nxv8i1, &AArch64::PPRRegClass);
@@ -324,50 +326,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
if (useSVEForFixedLengthVectorVT(VT))
addRegisterClass(VT, &AArch64::ZPRRegClass);
}
-
- for (auto VT : { MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64 }) {
- setOperationAction(ISD::SADDSAT, VT, Legal);
- setOperationAction(ISD::UADDSAT, VT, Legal);
- setOperationAction(ISD::SSUBSAT, VT, Legal);
- setOperationAction(ISD::USUBSAT, VT, Legal);
- setOperationAction(ISD::UREM, VT, Expand);
- setOperationAction(ISD::SREM, VT, Expand);
- setOperationAction(ISD::SDIVREM, VT, Expand);
- setOperationAction(ISD::UDIVREM, VT, Expand);
- }
-
- for (auto VT :
- { MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
- MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })
- setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Legal);
-
- for (auto VT :
- { MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32, MVT::nxv4f32,
- MVT::nxv2f64 }) {
- setCondCodeAction(ISD::SETO, VT, Expand);
- setCondCodeAction(ISD::SETOLT, VT, Expand);
- setCondCodeAction(ISD::SETLT, VT, Expand);
- setCondCodeAction(ISD::SETOLE, VT, Expand);
- setCondCodeAction(ISD::SETLE, VT, Expand);
- setCondCodeAction(ISD::SETULT, VT, Expand);
- setCondCodeAction(ISD::SETULE, VT, Expand);
- setCondCodeAction(ISD::SETUGE, VT, Expand);
- setCondCodeAction(ISD::SETUGT, VT, Expand);
- setCondCodeAction(ISD::SETUEQ, VT, Expand);
- setCondCodeAction(ISD::SETUNE, VT, Expand);
-
- setOperationAction(ISD::FREM, VT, Expand);
- setOperationAction(ISD::FPOW, VT, Expand);
- setOperationAction(ISD::FPOWI, VT, Expand);
- setOperationAction(ISD::FCOS, VT, Expand);
- setOperationAction(ISD::FSIN, VT, Expand);
- setOperationAction(ISD::FSINCOS, VT, Expand);
- setOperationAction(ISD::FEXP, VT, Expand);
- setOperationAction(ISD::FEXP2, VT, Expand);
- setOperationAction(ISD::FLOG, VT, Expand);
- setOperationAction(ISD::FLOG2, VT, Expand);
- setOperationAction(ISD::FLOG10, VT, Expand);
- }
}
// Compute derived properties from the register classes
@@ -389,7 +347,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Custom);
setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
- setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+ setOperationAction(ISD::BRCOND, MVT::Other, Custom);
setOperationAction(ISD::BR_CC, MVT::i32, Custom);
setOperationAction(ISD::BR_CC, MVT::i64, Custom);
setOperationAction(ISD::BR_CC, MVT::f16, Custom);
@@ -448,6 +406,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SELECT, MVT::f128, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
+ // FIXME: f128 FMINIMUM and FMAXIMUM (including STRICT versions) currently
+ // aren't handled.
// Lowering for many of the conversions is actually specified by the non-f128
// type. The LowerXXX function will be trivial when f128 isn't involved.
@@ -508,16 +468,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// BlockAddress
setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
- // Add/Sub overflow ops with MVT::Glues are lowered to NZCV dependences.
- setOperationAction(ISD::ADDC, MVT::i32, Custom);
- setOperationAction(ISD::ADDE, MVT::i32, Custom);
- setOperationAction(ISD::SUBC, MVT::i32, Custom);
- setOperationAction(ISD::SUBE, MVT::i32, Custom);
- setOperationAction(ISD::ADDC, MVT::i64, Custom);
- setOperationAction(ISD::ADDE, MVT::i64, Custom);
- setOperationAction(ISD::SUBC, MVT::i64, Custom);
- setOperationAction(ISD::SUBE, MVT::i64, Custom);
-
// AArch64 lacks both left-rotate and popcount instructions.
setOperationAction(ISD::ROTL, MVT::i32, Expand);
setOperationAction(ISD::ROTL, MVT::i64, Expand);
@@ -568,6 +518,15 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UMULO, MVT::i32, Custom);
setOperationAction(ISD::UMULO, MVT::i64, Custom);
+ setOperationAction(ISD::ADDCARRY, MVT::i32, Custom);
+ setOperationAction(ISD::ADDCARRY, MVT::i64, Custom);
+ setOperationAction(ISD::SUBCARRY, MVT::i32, Custom);
+ setOperationAction(ISD::SUBCARRY, MVT::i64, Custom);
+ setOperationAction(ISD::SADDO_CARRY, MVT::i32, Custom);
+ setOperationAction(ISD::SADDO_CARRY, MVT::i64, Custom);
+ setOperationAction(ISD::SSUBO_CARRY, MVT::i32, Custom);
+ setOperationAction(ISD::SSUBO_CARRY, MVT::i64, Custom);
+
setOperationAction(ISD::FSIN, MVT::f32, Expand);
setOperationAction(ISD::FSIN, MVT::f64, Expand);
setOperationAction(ISD::FCOS, MVT::f32, Expand);
@@ -581,64 +540,41 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
else
setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
- setOperationAction(ISD::FREM, MVT::f16, Promote);
- setOperationAction(ISD::FREM, MVT::v4f16, Expand);
- setOperationAction(ISD::FREM, MVT::v8f16, Expand);
- setOperationAction(ISD::FPOW, MVT::f16, Promote);
- setOperationAction(ISD::FPOW, MVT::v4f16, Expand);
- setOperationAction(ISD::FPOW, MVT::v8f16, Expand);
- setOperationAction(ISD::FPOWI, MVT::f16, Promote);
- setOperationAction(ISD::FPOWI, MVT::v4f16, Expand);
- setOperationAction(ISD::FPOWI, MVT::v8f16, Expand);
- setOperationAction(ISD::FCOS, MVT::f16, Promote);
- setOperationAction(ISD::FCOS, MVT::v4f16, Expand);
- setOperationAction(ISD::FCOS, MVT::v8f16, Expand);
- setOperationAction(ISD::FSIN, MVT::f16, Promote);
- setOperationAction(ISD::FSIN, MVT::v4f16, Expand);
- setOperationAction(ISD::FSIN, MVT::v8f16, Expand);
- setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
- setOperationAction(ISD::FSINCOS, MVT::v4f16, Expand);
- setOperationAction(ISD::FSINCOS, MVT::v8f16, Expand);
- setOperationAction(ISD::FEXP, MVT::f16, Promote);
- setOperationAction(ISD::FEXP, MVT::v4f16, Expand);
- setOperationAction(ISD::FEXP, MVT::v8f16, Expand);
- setOperationAction(ISD::FEXP2, MVT::f16, Promote);
- setOperationAction(ISD::FEXP2, MVT::v4f16, Expand);
- setOperationAction(ISD::FEXP2, MVT::v8f16, Expand);
- setOperationAction(ISD::FLOG, MVT::f16, Promote);
- setOperationAction(ISD::FLOG, MVT::v4f16, Expand);
- setOperationAction(ISD::FLOG, MVT::v8f16, Expand);
- setOperationAction(ISD::FLOG2, MVT::f16, Promote);
- setOperationAction(ISD::FLOG2, MVT::v4f16, Expand);
- setOperationAction(ISD::FLOG2, MVT::v8f16, Expand);
- setOperationAction(ISD::FLOG10, MVT::f16, Promote);
- setOperationAction(ISD::FLOG10, MVT::v4f16, Expand);
- setOperationAction(ISD::FLOG10, MVT::v8f16, Expand);
+ for (auto Op : {ISD::FREM, ISD::FPOW, ISD::FPOWI,
+ ISD::FCOS, ISD::FSIN, ISD::FSINCOS,
+ ISD::FEXP, ISD::FEXP2, ISD::FLOG,
+ ISD::FLOG2, ISD::FLOG10, ISD::STRICT_FREM,
+ ISD::STRICT_FPOW, ISD::STRICT_FPOWI, ISD::STRICT_FCOS,
+ ISD::STRICT_FSIN, ISD::STRICT_FEXP, ISD::STRICT_FEXP2,
+ ISD::STRICT_FLOG, ISD::STRICT_FLOG2, ISD::STRICT_FLOG10}) {
+ setOperationAction(Op, MVT::f16, Promote);
+ setOperationAction(Op, MVT::v4f16, Expand);
+ setOperationAction(Op, MVT::v8f16, Expand);
+ }
if (!Subtarget->hasFullFP16()) {
- setOperationAction(ISD::SELECT, MVT::f16, Promote);
- setOperationAction(ISD::SELECT_CC, MVT::f16, Promote);
- setOperationAction(ISD::SETCC, MVT::f16, Promote);
- setOperationAction(ISD::BR_CC, MVT::f16, Promote);
- setOperationAction(ISD::FADD, MVT::f16, Promote);
- setOperationAction(ISD::FSUB, MVT::f16, Promote);
- setOperationAction(ISD::FMUL, MVT::f16, Promote);
- setOperationAction(ISD::FDIV, MVT::f16, Promote);
- setOperationAction(ISD::FMA, MVT::f16, Promote);
- setOperationAction(ISD::FNEG, MVT::f16, Promote);
- setOperationAction(ISD::FABS, MVT::f16, Promote);
- setOperationAction(ISD::FCEIL, MVT::f16, Promote);
- setOperationAction(ISD::FSQRT, MVT::f16, Promote);
- setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
- setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
- setOperationAction(ISD::FRINT, MVT::f16, Promote);
- setOperationAction(ISD::FROUND, MVT::f16, Promote);
- setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote);
- setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
- setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
- setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
- setOperationAction(ISD::FMINIMUM, MVT::f16, Promote);
- setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote);
+ for (auto Op :
+ {ISD::SELECT, ISD::SELECT_CC, ISD::SETCC,
+ ISD::BR_CC, ISD::FADD, ISD::FSUB,
+ ISD::FMUL, ISD::FDIV, ISD::FMA,
+ ISD::FNEG, ISD::FABS, ISD::FCEIL,
+ ISD::FSQRT, ISD::FFLOOR, ISD::FNEARBYINT,
+ ISD::FRINT, ISD::FROUND, ISD::FROUNDEVEN,
+ ISD::FTRUNC, ISD::FMINNUM, ISD::FMAXNUM,
+ ISD::FMINIMUM, ISD::FMAXIMUM, ISD::STRICT_FADD,
+ ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV,
+ ISD::STRICT_FMA, ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR,
+ ISD::STRICT_FSQRT, ISD::STRICT_FRINT, ISD::STRICT_FNEARBYINT,
+ ISD::STRICT_FROUND, ISD::STRICT_FTRUNC, ISD::STRICT_FROUNDEVEN,
+ ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM, ISD::STRICT_FMINIMUM,
+ ISD::STRICT_FMAXIMUM})
+ setOperationAction(Op, MVT::f16, Promote);
+
+ // Round-to-integer need custom lowering for fp16, as Promote doesn't work
+ // because the result type is integer.
+ for (auto Op : {ISD::STRICT_LROUND, ISD::STRICT_LLROUND, ISD::STRICT_LRINT,
+ ISD::STRICT_LLRINT})
+ setOperationAction(Op, MVT::f16, Custom);
// promote v4f16 to v4f32 when that is known to be safe.
setOperationAction(ISD::FADD, MVT::v4f16, Promote);
@@ -691,37 +627,35 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
}
// AArch64 has implementations of a lot of rounding-like FP operations.
- for (MVT Ty : {MVT::f32, MVT::f64}) {
- setOperationAction(ISD::FFLOOR, Ty, Legal);
- setOperationAction(ISD::FNEARBYINT, Ty, Legal);
- setOperationAction(ISD::FCEIL, Ty, Legal);
- setOperationAction(ISD::FRINT, Ty, Legal);
- setOperationAction(ISD::FTRUNC, Ty, Legal);
- setOperationAction(ISD::FROUND, Ty, Legal);
- setOperationAction(ISD::FROUNDEVEN, Ty, Legal);
- setOperationAction(ISD::FMINNUM, Ty, Legal);
- setOperationAction(ISD::FMAXNUM, Ty, Legal);
- setOperationAction(ISD::FMINIMUM, Ty, Legal);
- setOperationAction(ISD::FMAXIMUM, Ty, Legal);
- setOperationAction(ISD::LROUND, Ty, Legal);
- setOperationAction(ISD::LLROUND, Ty, Legal);
- setOperationAction(ISD::LRINT, Ty, Legal);
- setOperationAction(ISD::LLRINT, Ty, Legal);
- }
-
- if (Subtarget->hasFullFP16()) {
- setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal);
- setOperationAction(ISD::FFLOOR, MVT::f16, Legal);
- setOperationAction(ISD::FCEIL, MVT::f16, Legal);
- setOperationAction(ISD::FRINT, MVT::f16, Legal);
- setOperationAction(ISD::FTRUNC, MVT::f16, Legal);
- setOperationAction(ISD::FROUND, MVT::f16, Legal);
- setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal);
- setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
- setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
- setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
- setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
- }
+ for (auto Op :
+ {ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL,
+ ISD::FRINT, ISD::FTRUNC, ISD::FROUND,
+ ISD::FROUNDEVEN, ISD::FMINNUM, ISD::FMAXNUM,
+ ISD::FMINIMUM, ISD::FMAXIMUM, ISD::LROUND,
+ ISD::LLROUND, ISD::LRINT, ISD::LLRINT,
+ ISD::STRICT_FFLOOR, ISD::STRICT_FCEIL, ISD::STRICT_FNEARBYINT,
+ ISD::STRICT_FRINT, ISD::STRICT_FTRUNC, ISD::STRICT_FROUNDEVEN,
+ ISD::STRICT_FROUND, ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM,
+ ISD::STRICT_FMINIMUM, ISD::STRICT_FMAXIMUM, ISD::STRICT_LROUND,
+ ISD::STRICT_LLROUND, ISD::STRICT_LRINT, ISD::STRICT_LLRINT}) {
+ for (MVT Ty : {MVT::f32, MVT::f64})
+ setOperationAction(Op, Ty, Legal);
+ if (Subtarget->hasFullFP16())
+ setOperationAction(Op, MVT::f16, Legal);
+ }
+
+ // Basic strict FP operations are legal
+ for (auto Op : {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
+ ISD::STRICT_FDIV, ISD::STRICT_FMA, ISD::STRICT_FSQRT}) {
+ for (MVT Ty : {MVT::f32, MVT::f64})
+ setOperationAction(Op, Ty, Legal);
+ if (Subtarget->hasFullFP16())
+ setOperationAction(Op, MVT::f16, Legal);
+ }
+
+ // Strict conversion to a larger type is legal
+ for (auto VT : {MVT::f32, MVT::f64})
+ setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal);
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
@@ -891,47 +825,33 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// Vector add and sub nodes may conceal a high-half opportunity.
// Also, try to fold ADD into CSINC/CSINV..
- setTargetDAGCombine(ISD::ADD);
- setTargetDAGCombine(ISD::ABS);
- setTargetDAGCombine(ISD::SUB);
- setTargetDAGCombine(ISD::XOR);
- setTargetDAGCombine(ISD::SINT_TO_FP);
- setTargetDAGCombine(ISD::UINT_TO_FP);
-
- setTargetDAGCombine(ISD::FP_TO_SINT);
- setTargetDAGCombine(ISD::FP_TO_UINT);
- setTargetDAGCombine(ISD::FP_TO_SINT_SAT);
- setTargetDAGCombine(ISD::FP_TO_UINT_SAT);
- setTargetDAGCombine(ISD::FDIV);
+ setTargetDAGCombine({ISD::ADD, ISD::ABS, ISD::SUB, ISD::XOR, ISD::SINT_TO_FP,
+ ISD::UINT_TO_FP});
+
+ setTargetDAGCombine({ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::FP_TO_SINT_SAT,
+ ISD::FP_TO_UINT_SAT, ISD::FDIV});
// Try and combine setcc with csel
setTargetDAGCombine(ISD::SETCC);
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
- setTargetDAGCombine(ISD::ANY_EXTEND);
- setTargetDAGCombine(ISD::ZERO_EXTEND);
- setTargetDAGCombine(ISD::SIGN_EXTEND);
- setTargetDAGCombine(ISD::VECTOR_SPLICE);
- setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
- setTargetDAGCombine(ISD::TRUNCATE);
- setTargetDAGCombine(ISD::CONCAT_VECTORS);
- setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
- setTargetDAGCombine(ISD::STORE);
+ setTargetDAGCombine({ISD::ANY_EXTEND, ISD::ZERO_EXTEND, ISD::SIGN_EXTEND,
+ ISD::VECTOR_SPLICE, ISD::SIGN_EXTEND_INREG,
+ ISD::CONCAT_VECTORS, ISD::EXTRACT_SUBVECTOR,
+ ISD::INSERT_SUBVECTOR, ISD::STORE});
if (Subtarget->supportsAddressTopByteIgnored())
setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::MUL);
- setTargetDAGCombine(ISD::SELECT);
- setTargetDAGCombine(ISD::VSELECT);
+ setTargetDAGCombine({ISD::SELECT, ISD::VSELECT});
+
+ setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN,
+ ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,
+ ISD::VECREDUCE_ADD, ISD::STEP_VECTOR});
- setTargetDAGCombine(ISD::INTRINSIC_VOID);
- setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
- setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
- setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
- setTargetDAGCombine(ISD::VECREDUCE_ADD);
- setTargetDAGCombine(ISD::STEP_VECTOR);
+ setTargetDAGCombine({ISD::MGATHER, ISD::MSCATTER});
setTargetDAGCombine(ISD::FP_EXTEND);
@@ -980,43 +900,29 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
if (Subtarget->hasNEON()) {
// FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
// silliness like this:
- setOperationAction(ISD::FABS, MVT::v1f64, Expand);
- setOperationAction(ISD::FADD, MVT::v1f64, Expand);
- setOperationAction(ISD::FCEIL, MVT::v1f64, Expand);
- setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand);
- setOperationAction(ISD::FCOS, MVT::v1f64, Expand);
- setOperationAction(ISD::FDIV, MVT::v1f64, Expand);
- setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand);
- setOperationAction(ISD::FMA, MVT::v1f64, Expand);
- setOperationAction(ISD::FMUL, MVT::v1f64, Expand);
- setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand);
- setOperationAction(ISD::FNEG, MVT::v1f64, Expand);
- setOperationAction(ISD::FPOW, MVT::v1f64, Expand);
- setOperationAction(ISD::FREM, MVT::v1f64, Expand);
- setOperationAction(ISD::FROUND, MVT::v1f64, Expand);
- setOperationAction(ISD::FROUNDEVEN, MVT::v1f64, Expand);
- setOperationAction(ISD::FRINT, MVT::v1f64, Expand);
- setOperationAction(ISD::FSIN, MVT::v1f64, Expand);
- setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand);
- setOperationAction(ISD::FSQRT, MVT::v1f64, Expand);
- setOperationAction(ISD::FSUB, MVT::v1f64, Expand);
- setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand);
- setOperationAction(ISD::SETCC, MVT::v1f64, Expand);
- setOperationAction(ISD::BR_CC, MVT::v1f64, Expand);
- setOperationAction(ISD::SELECT, MVT::v1f64, Expand);
- setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand);
- setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand);
-
- setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand);
- setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand);
- setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand);
- setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand);
- setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand);
-
- setOperationAction(ISD::FP_TO_SINT_SAT, MVT::v1i64, Expand);
- setOperationAction(ISD::FP_TO_UINT_SAT, MVT::v1i64, Expand);
-
- setOperationAction(ISD::MUL, MVT::v1i64, Expand);
+ for (auto Op :
+ {ISD::SELECT, ISD::SELECT_CC, ISD::SETCC,
+ ISD::BR_CC, ISD::FADD, ISD::FSUB,
+ ISD::FMUL, ISD::FDIV, ISD::FMA,
+ ISD::FNEG, ISD::FABS, ISD::FCEIL,
+ ISD::FSQRT, ISD::FFLOOR, ISD::FNEARBYINT,
+ ISD::FRINT, ISD::FROUND, ISD::FROUNDEVEN,
+ ISD::FTRUNC, ISD::FMINNUM, ISD::FMAXNUM,
+ ISD::FMINIMUM, ISD::FMAXIMUM, ISD::STRICT_FADD,
+ ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV,
+ ISD::STRICT_FMA, ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR,
+ ISD::STRICT_FSQRT, ISD::STRICT_FRINT, ISD::STRICT_FNEARBYINT,
+ ISD::STRICT_FROUND, ISD::STRICT_FTRUNC, ISD::STRICT_FROUNDEVEN,
+ ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM, ISD::STRICT_FMINIMUM,
+ ISD::STRICT_FMAXIMUM})
+ setOperationAction(Op, MVT::v1f64, Expand);
+
+ for (auto Op :
+ {ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::SINT_TO_FP, ISD::UINT_TO_FP,
+ ISD::FP_ROUND, ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT, ISD::MUL,
+ ISD::STRICT_FP_TO_SINT, ISD::STRICT_FP_TO_UINT,
+ ISD::STRICT_SINT_TO_FP, ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_ROUND})
+ setOperationAction(Op, MVT::v1i64, Expand);
// AArch64 doesn't have a direct vector ->f32 conversion instructions for
// elements smaller than i32, so promote the input to i32 first.
@@ -1024,14 +930,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32);
// Similarly, there is no direct i32 -> f64 vector conversion instruction.
- setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
- setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom);
// Or, direct i32 -> f16 vector conversion. Set it so custom, so the
// conversion happens in two steps: v4i32 -> v4f32 -> v4f16
- setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
+ for (auto Op : {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::STRICT_SINT_TO_FP,
+ ISD::STRICT_UINT_TO_FP})
+ for (auto VT : {MVT::v2i32, MVT::v2i64, MVT::v4i32})
+ setOperationAction(Op, VT, Custom);
if (Subtarget->hasFullFP16()) {
setOperationAction(ISD::SINT_TO_FP, MVT::v8i8, Custom);
@@ -1088,6 +992,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
for (MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
MVT::v4i32}) {
+ setOperationAction(ISD::AVGFLOORS, VT, Legal);
+ setOperationAction(ISD::AVGFLOORU, VT, Legal);
+ setOperationAction(ISD::AVGCEILS, VT, Legal);
+ setOperationAction(ISD::AVGCEILU, VT, Legal);
setOperationAction(ISD::ABDS, VT, Legal);
setOperationAction(ISD::ABDU, VT, Legal);
}
@@ -1141,31 +1049,18 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
}
// AArch64 has implementations of a lot of rounding-like FP operations.
- for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) {
- setOperationAction(ISD::FFLOOR, Ty, Legal);
- setOperationAction(ISD::FNEARBYINT, Ty, Legal);
- setOperationAction(ISD::FCEIL, Ty, Legal);
- setOperationAction(ISD::FRINT, Ty, Legal);
- setOperationAction(ISD::FTRUNC, Ty, Legal);
- setOperationAction(ISD::FROUND, Ty, Legal);
- setOperationAction(ISD::FROUNDEVEN, Ty, Legal);
- }
-
- if (Subtarget->hasFullFP16()) {
- for (MVT Ty : {MVT::v4f16, MVT::v8f16}) {
- setOperationAction(ISD::FFLOOR, Ty, Legal);
- setOperationAction(ISD::FNEARBYINT, Ty, Legal);
- setOperationAction(ISD::FCEIL, Ty, Legal);
- setOperationAction(ISD::FRINT, Ty, Legal);
- setOperationAction(ISD::FTRUNC, Ty, Legal);
- setOperationAction(ISD::FROUND, Ty, Legal);
- setOperationAction(ISD::FROUNDEVEN, Ty, Legal);
- }
+ for (auto Op :
+ {ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC,
+ ISD::FROUND, ISD::FROUNDEVEN, ISD::STRICT_FFLOOR,
+ ISD::STRICT_FNEARBYINT, ISD::STRICT_FCEIL, ISD::STRICT_FRINT,
+ ISD::STRICT_FTRUNC, ISD::STRICT_FROUND, ISD::STRICT_FROUNDEVEN}) {
+ for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64})
+ setOperationAction(Op, Ty, Legal);
+ if (Subtarget->hasFullFP16())
+ for (MVT Ty : {MVT::v4f16, MVT::v8f16})
+ setOperationAction(Op, Ty, Legal);
}
- if (Subtarget->hasSVE())
- setOperationAction(ISD::VSCALE, MVT::i32, Custom);
-
setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom);
setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
@@ -1174,6 +1069,17 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
+
+ // ADDP custom lowering
+ for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
+ setOperationAction(ISD::ADD, VT, Custom);
+ // FADDP custom lowering
+ for (MVT VT : { MVT::v16f16, MVT::v8f32, MVT::v4f64 })
+ setOperationAction(ISD::FADD, VT, Custom);
+ }
+
+ if (Subtarget->hasSME()) {
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
}
if (Subtarget->hasSVE()) {
@@ -1194,7 +1100,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MUL, VT, Custom);
setOperationAction(ISD::MULHS, VT, Custom);
setOperationAction(ISD::MULHU, VT, Custom);
- setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
+ setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
@@ -1224,6 +1130,15 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::ROTL, VT, Expand);
setOperationAction(ISD::ROTR, VT, Expand);
+
+ setOperationAction(ISD::SADDSAT, VT, Legal);
+ setOperationAction(ISD::UADDSAT, VT, Legal);
+ setOperationAction(ISD::SSUBSAT, VT, Legal);
+ setOperationAction(ISD::USUBSAT, VT, Legal);
+ setOperationAction(ISD::UREM, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::SDIVREM, VT, Expand);
+ setOperationAction(ISD::UDIVREM, VT, Expand);
}
// Illegal unpacked integer vector types.
@@ -1234,10 +1149,16 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// Legalize unpacked bitcasts to REINTERPRET_CAST.
for (auto VT : {MVT::nxv2i16, MVT::nxv4i16, MVT::nxv2i32, MVT::nxv2bf16,
- MVT::nxv2f16, MVT::nxv4f16, MVT::nxv2f32})
+ MVT::nxv4bf16, MVT::nxv2f16, MVT::nxv4f16, MVT::nxv2f32})
setOperationAction(ISD::BITCAST, VT, Custom);
- for (auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) {
+ for (auto VT :
+ { MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
+ MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })
+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Legal);
+
+ for (auto VT :
+ {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1, MVT::nxv1i1}) {
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
@@ -1269,18 +1190,33 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MSCATTER, VT, Custom);
}
- for (MVT VT : MVT::fp_scalable_vector_valuetypes()) {
- for (MVT InnerVT : MVT::fp_scalable_vector_valuetypes()) {
- // Avoid marking truncating FP stores as legal to prevent the
- // DAGCombiner from creating unsupported truncating stores.
+ // Firstly, exclude all scalable vector extending loads/truncating stores,
+ // include both integer and floating scalable vector.
+ for (MVT VT : MVT::scalable_vector_valuetypes()) {
+ for (MVT InnerVT : MVT::scalable_vector_valuetypes()) {
setTruncStoreAction(VT, InnerVT, Expand);
- // SVE does not have floating-point extending loads.
setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
}
}
+ // Then, selectively enable those which we directly support.
+ setTruncStoreAction(MVT::nxv2i64, MVT::nxv2i8, Legal);
+ setTruncStoreAction(MVT::nxv2i64, MVT::nxv2i16, Legal);
+ setTruncStoreAction(MVT::nxv2i64, MVT::nxv2i32, Legal);
+ setTruncStoreAction(MVT::nxv4i32, MVT::nxv4i8, Legal);
+ setTruncStoreAction(MVT::nxv4i32, MVT::nxv4i16, Legal);
+ setTruncStoreAction(MVT::nxv8i16, MVT::nxv8i8, Legal);
+ for (auto Op : {ISD::ZEXTLOAD, ISD::SEXTLOAD, ISD::EXTLOAD}) {
+ setLoadExtAction(Op, MVT::nxv2i64, MVT::nxv2i8, Legal);
+ setLoadExtAction(Op, MVT::nxv2i64, MVT::nxv2i16, Legal);
+ setLoadExtAction(Op, MVT::nxv2i64, MVT::nxv2i32, Legal);
+ setLoadExtAction(Op, MVT::nxv4i32, MVT::nxv4i8, Legal);
+ setLoadExtAction(Op, MVT::nxv4i32, MVT::nxv4i16, Legal);
+ setLoadExtAction(Op, MVT::nxv8i16, MVT::nxv8i8, Legal);
+ }
+
// SVE supports truncating stores of 64 and 128-bit vectors
setTruncStoreAction(MVT::v2i64, MVT::v2i8, Custom);
setTruncStoreAction(MVT::v2i64, MVT::v2i16, Custom);
@@ -1295,7 +1231,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Custom);
- setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
+ setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::FADD, VT, Custom);
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
@@ -1326,6 +1262,29 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
+ setOperationAction(ISD::FREM, VT, Expand);
+ setOperationAction(ISD::FPOW, VT, Expand);
+ setOperationAction(ISD::FPOWI, VT, Expand);
+ setOperationAction(ISD::FCOS, VT, Expand);
+ setOperationAction(ISD::FSIN, VT, Expand);
+ setOperationAction(ISD::FSINCOS, VT, Expand);
+ setOperationAction(ISD::FEXP, VT, Expand);
+ setOperationAction(ISD::FEXP2, VT, Expand);
+ setOperationAction(ISD::FLOG, VT, Expand);
+ setOperationAction(ISD::FLOG2, VT, Expand);
+ setOperationAction(ISD::FLOG10, VT, Expand);
+
+ setCondCodeAction(ISD::SETO, VT, Expand);
+ setCondCodeAction(ISD::SETOLT, VT, Expand);
+ setCondCodeAction(ISD::SETLT, VT, Expand);
+ setCondCodeAction(ISD::SETOLE, VT, Expand);
+ setCondCodeAction(ISD::SETLE, VT, Expand);
+ setCondCodeAction(ISD::SETULT, VT, Expand);
+ setCondCodeAction(ISD::SETULE, VT, Expand);
+ setCondCodeAction(ISD::SETUGE, VT, Expand);
+ setCondCodeAction(ISD::SETUGT, VT, Expand);
+ setCondCodeAction(ISD::SETUEQ, VT, Expand);
+ setCondCodeAction(ISD::SETONE, VT, Expand);
}
for (auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
@@ -1334,13 +1293,23 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MSCATTER, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
+ setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
}
- setOperationAction(ISD::SPLAT_VECTOR, MVT::nxv8bf16, Custom);
-
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
+ // NEON doesn't support integer divides, but SVE does
+ for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
+ MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
+ setOperationAction(ISD::SDIV, VT, Custom);
+ setOperationAction(ISD::UDIV, VT, Custom);
+ }
+
+ // NEON doesn't support 64-bit vector integer muls, but SVE does.
+ setOperationAction(ISD::MUL, MVT::v1i64, Custom);
+ setOperationAction(ISD::MUL, MVT::v2i64, Custom);
+
// NOTE: Currently this has to happen after computeRegisterProperties rather
// than the preferred option of combining it with the addRegisterClass call.
if (Subtarget->useSVEForFixedLengthVectors()) {
@@ -1367,32 +1336,14 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CTLZ, MVT::v1i64, Custom);
setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
- setOperationAction(ISD::MUL, MVT::v1i64, Custom);
- setOperationAction(ISD::MUL, MVT::v2i64, Custom);
setOperationAction(ISD::MULHS, MVT::v1i64, Custom);
setOperationAction(ISD::MULHS, MVT::v2i64, Custom);
setOperationAction(ISD::MULHU, MVT::v1i64, Custom);
setOperationAction(ISD::MULHU, MVT::v2i64, Custom);
- setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
- setOperationAction(ISD::SDIV, MVT::v16i8, Custom);
- setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
- setOperationAction(ISD::SDIV, MVT::v8i16, Custom);
- setOperationAction(ISD::SDIV, MVT::v2i32, Custom);
- setOperationAction(ISD::SDIV, MVT::v4i32, Custom);
- setOperationAction(ISD::SDIV, MVT::v1i64, Custom);
- setOperationAction(ISD::SDIV, MVT::v2i64, Custom);
setOperationAction(ISD::SMAX, MVT::v1i64, Custom);
setOperationAction(ISD::SMAX, MVT::v2i64, Custom);
setOperationAction(ISD::SMIN, MVT::v1i64, Custom);
setOperationAction(ISD::SMIN, MVT::v2i64, Custom);
- setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
- setOperationAction(ISD::UDIV, MVT::v16i8, Custom);
- setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
- setOperationAction(ISD::UDIV, MVT::v8i16, Custom);
- setOperationAction(ISD::UDIV, MVT::v2i32, Custom);
- setOperationAction(ISD::UDIV, MVT::v4i32, Custom);
- setOperationAction(ISD::UDIV, MVT::v1i64, Custom);
- setOperationAction(ISD::UDIV, MVT::v2i64, Custom);
setOperationAction(ISD::UMAX, MVT::v1i64, Custom);
setOperationAction(ISD::UMAX, MVT::v2i64, Custom);
setOperationAction(ISD::UMIN, MVT::v1i64, Custom);
@@ -1426,6 +1377,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv4i1, MVT::nxv4i32);
setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv8i1, MVT::nxv8i16);
setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv16i1, MVT::nxv16i8);
+
+ setOperationAction(ISD::VSCALE, MVT::i32, Custom);
}
if (Subtarget->hasMOPS() && Subtarget->hasMTE()) {
@@ -1434,6 +1387,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
}
PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
+
+ IsStrictFPEnabled = true;
}
void AArch64TargetLowering::addTypeForNEON(MVT VT) {
@@ -1490,10 +1445,10 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) {
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
- setOperationAction(ISD::FP_TO_SINT, VT, Custom);
- setOperationAction(ISD::FP_TO_UINT, VT, Custom);
- setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
- setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
+ for (unsigned Opcode :
+ {ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::FP_TO_SINT_SAT,
+ ISD::FP_TO_UINT_SAT, ISD::STRICT_FP_TO_SINT, ISD::STRICT_FP_TO_UINT})
+ setOperationAction(Opcode, VT, Custom);
if (!VT.isFloatingPoint())
setOperationAction(ISD::ABS, VT, Legal);
@@ -1503,14 +1458,39 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) {
for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
setOperationAction(Opcode, VT, Legal);
- // F[MIN|MAX][NUM|NAN] are available for all FP NEON types.
+ // F[MIN|MAX][NUM|NAN] and simple strict operations are available for all FP
+ // NEON types.
if (VT.isFloatingPoint() &&
VT.getVectorElementType() != MVT::bf16 &&
(VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
for (unsigned Opcode :
- {ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM})
+ {ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM,
+ ISD::STRICT_FMINIMUM, ISD::STRICT_FMAXIMUM, ISD::STRICT_FMINNUM,
+ ISD::STRICT_FMAXNUM, ISD::STRICT_FADD, ISD::STRICT_FSUB,
+ ISD::STRICT_FMUL, ISD::STRICT_FDIV, ISD::STRICT_FMA,
+ ISD::STRICT_FSQRT})
setOperationAction(Opcode, VT, Legal);
+ // Strict fp extend and trunc are legal
+ if (VT.isFloatingPoint() && VT.getScalarSizeInBits() != 16)
+ setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal);
+ if (VT.isFloatingPoint() && VT.getScalarSizeInBits() != 64)
+ setOperationAction(ISD::STRICT_FP_ROUND, VT, Legal);
+
+ // FIXME: We could potentially make use of the vector comparison instructions
+ // for STRICT_FSETCC and STRICT_FSETCSS, but there's a number of
+ // complications:
+ // * FCMPEQ/NE are quiet comparisons, the rest are signalling comparisons,
+ // so we would need to expand when the condition code doesn't match the
+ // kind of comparison.
+ // * Some kinds of comparison require more than one FCMXY instruction so
+ // would need to be expanded instead.
+ // * The lowering of the non-strict versions involves target-specific ISD
+ // nodes so we would likely need to add strict versions of all of them and
+ // handle them appropriately.
+ setOperationAction(ISD::STRICT_FSETCC, VT, Expand);
+ setOperationAction(ISD::STRICT_FSETCCS, VT, Expand);
+
if (Subtarget->isLittleEndian()) {
for (unsigned im = (unsigned)ISD::PRE_INC;
im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
@@ -1526,9 +1506,11 @@ bool AArch64TargetLowering::shouldExpandGetActiveLaneMask(EVT ResVT,
if (!Subtarget->hasSVE())
return true;
- // We can only support legal predicate result types.
+ // We can only support legal predicate result types. We can use the SVE
+ // whilelo instruction for generating fixed-width predicates too.
if (ResVT != MVT::nxv2i1 && ResVT != MVT::nxv4i1 && ResVT != MVT::nxv8i1 &&
- ResVT != MVT::nxv16i1)
+ ResVT != MVT::nxv16i1 && ResVT != MVT::v2i1 && ResVT != MVT::v4i1 &&
+ ResVT != MVT::v8i1 && ResVT != MVT::v16i1)
return true;
// The whilelo instruction only works with i32 or i64 scalar inputs.
@@ -1559,7 +1541,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
setCondCodeAction(ISD::SETUGE, VT, Expand);
setCondCodeAction(ISD::SETUGT, VT, Expand);
setCondCodeAction(ISD::SETUEQ, VT, Expand);
- setCondCodeAction(ISD::SETUNE, VT, Expand);
+ setCondCodeAction(ISD::SETONE, VT, Expand);
}
// Mark integer truncating stores/extending loads as having custom lowering
@@ -1830,11 +1812,21 @@ bool AArch64TargetLowering::targetShrinkDemandedConstant(
/// computeKnownBitsForTargetNode - Determine which of the bits specified in
/// Mask are known to be either zero or one and return them Known.
void AArch64TargetLowering::computeKnownBitsForTargetNode(
- const SDValue Op, KnownBits &Known,
- const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
+ const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
+ const SelectionDAG &DAG, unsigned Depth) const {
switch (Op.getOpcode()) {
default:
break;
+ case AArch64ISD::DUP: {
+ SDValue SrcOp = Op.getOperand(0);
+ Known = DAG.computeKnownBits(SrcOp, Depth + 1);
+ if (SrcOp.getValueSizeInBits() != Op.getScalarValueSizeInBits()) {
+ assert(SrcOp.getValueSizeInBits() > Op.getScalarValueSizeInBits() &&
+ "Expected DUP implicit truncation");
+ Known = Known.trunc(Op.getScalarValueSizeInBits());
+ }
+ break;
+ }
case AArch64ISD::CSEL: {
KnownBits Known2;
Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
@@ -2006,7 +1998,6 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ)
MAKE_CASE(AArch64ISD::ABDS_PRED)
MAKE_CASE(AArch64ISD::ABDU_PRED)
- MAKE_CASE(AArch64ISD::ADD_PRED)
MAKE_CASE(AArch64ISD::MUL_PRED)
MAKE_CASE(AArch64ISD::MULHS_PRED)
MAKE_CASE(AArch64ISD::MULHU_PRED)
@@ -2016,7 +2007,6 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::SMIN_PRED)
MAKE_CASE(AArch64ISD::SRA_PRED)
MAKE_CASE(AArch64ISD::SRL_PRED)
- MAKE_CASE(AArch64ISD::SUB_PRED)
MAKE_CASE(AArch64ISD::UDIV_PRED)
MAKE_CASE(AArch64ISD::UMAX_PRED)
MAKE_CASE(AArch64ISD::UMIN_PRED)
@@ -2061,6 +2051,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::DUPLANE16)
MAKE_CASE(AArch64ISD::DUPLANE32)
MAKE_CASE(AArch64ISD::DUPLANE64)
+ MAKE_CASE(AArch64ISD::DUPLANE128)
MAKE_CASE(AArch64ISD::MOVI)
MAKE_CASE(AArch64ISD::MOVIshift)
MAKE_CASE(AArch64ISD::MOVIedit)
@@ -2108,10 +2099,6 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::FCMLTz)
MAKE_CASE(AArch64ISD::SADDV)
MAKE_CASE(AArch64ISD::UADDV)
- MAKE_CASE(AArch64ISD::SRHADD)
- MAKE_CASE(AArch64ISD::URHADD)
- MAKE_CASE(AArch64ISD::SHADD)
- MAKE_CASE(AArch64ISD::UHADD)
MAKE_CASE(AArch64ISD::SDOT)
MAKE_CASE(AArch64ISD::UDOT)
MAKE_CASE(AArch64ISD::SMINV)
@@ -2150,6 +2137,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::FMINNMV_PRED)
MAKE_CASE(AArch64ISD::FMUL_PRED)
MAKE_CASE(AArch64ISD::FSUB_PRED)
+ MAKE_CASE(AArch64ISD::RDSVL)
MAKE_CASE(AArch64ISD::BIC)
MAKE_CASE(AArch64ISD::BIT)
MAKE_CASE(AArch64ISD::CBZ)
@@ -2267,10 +2255,13 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::BSWAP_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::REVH_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::REVW_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::REVD_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::CTLZ_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::CTPOP_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::DUP_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::INDEX_VECTOR)
+ MAKE_CASE(AArch64ISD::ADDP)
+ MAKE_CASE(AArch64ISD::SADDLP)
MAKE_CASE(AArch64ISD::UADDLP)
MAKE_CASE(AArch64ISD::CALL_RVMARKER)
MAKE_CASE(AArch64ISD::ASSERT_ZEXT_BOOL)
@@ -2278,6 +2269,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::MOPS_MEMSET_TAGGING)
MAKE_CASE(AArch64ISD::MOPS_MEMCOPY)
MAKE_CASE(AArch64ISD::MOPS_MEMMOVE)
+ MAKE_CASE(AArch64ISD::CALL_BTI)
}
#undef MAKE_CASE
return nullptr;
@@ -2351,6 +2343,92 @@ MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet(
return BB;
}
+MachineBasicBlock *
+AArch64TargetLowering::EmitTileLoad(unsigned Opc, unsigned BaseReg,
+ MachineInstr &MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc));
+
+ MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define);
+ MIB.add(MI.getOperand(1)); // slice index register
+ MIB.add(MI.getOperand(2)); // slice index offset
+ MIB.add(MI.getOperand(3)); // pg
+ MIB.add(MI.getOperand(4)); // base
+ MIB.add(MI.getOperand(5)); // offset
+
+ MI.eraseFromParent(); // The pseudo is gone now.
+ return BB;
+}
+
+MachineBasicBlock *
+AArch64TargetLowering::EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const {
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ MachineInstrBuilder MIB =
+ BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::LDR_ZA));
+
+ MIB.addReg(AArch64::ZA, RegState::Define);
+ MIB.add(MI.getOperand(0)); // Vector select register
+ MIB.add(MI.getOperand(1)); // Vector select offset
+ MIB.add(MI.getOperand(2)); // Base
+ MIB.add(MI.getOperand(1)); // Offset, same as vector select offset
+
+ MI.eraseFromParent(); // The pseudo is gone now.
+ return BB;
+}
+
+MachineBasicBlock *
+AArch64TargetLowering::EmitMopa(unsigned Opc, unsigned BaseReg,
+ MachineInstr &MI, MachineBasicBlock *BB) const {
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc));
+
+ MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define);
+ MIB.addReg(BaseReg + MI.getOperand(0).getImm());
+ MIB.add(MI.getOperand(1)); // pn
+ MIB.add(MI.getOperand(2)); // pm
+ MIB.add(MI.getOperand(3)); // zn
+ MIB.add(MI.getOperand(4)); // zm
+
+ MI.eraseFromParent(); // The pseudo is gone now.
+ return BB;
+}
+
+MachineBasicBlock *
+AArch64TargetLowering::EmitInsertVectorToTile(unsigned Opc, unsigned BaseReg,
+ MachineInstr &MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc));
+
+ MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define);
+ MIB.addReg(BaseReg + MI.getOperand(0).getImm());
+ MIB.add(MI.getOperand(1)); // Slice index register
+ MIB.add(MI.getOperand(2)); // Slice index offset
+ MIB.add(MI.getOperand(3)); // pg
+ MIB.add(MI.getOperand(4)); // zn
+
+ MI.eraseFromParent(); // The pseudo is gone now.
+ return BB;
+}
+
+MachineBasicBlock *
+AArch64TargetLowering::EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const {
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ MachineInstrBuilder MIB =
+ BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::ZERO_M));
+ MIB.add(MI.getOperand(0)); // Mask
+
+ unsigned Mask = MI.getOperand(0).getImm();
+ for (unsigned I = 0; I < 8; I++) {
+ if (Mask & (1 << I))
+ MIB.addDef(AArch64::ZAD0 + I, RegState::ImplicitDefine);
+ }
+
+ MI.eraseFromParent(); // The pseudo is gone now.
+ return BB;
+}
+
MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
MachineInstr &MI, MachineBasicBlock *BB) const {
switch (MI.getOpcode()) {
@@ -2366,9 +2444,14 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
case TargetOpcode::STATEPOINT:
// STATEPOINT is a pseudo instruction which has no implicit defs/uses
// while bl call instruction (where statepoint will be lowered at the end)
- // has implicit def. Add this implicit dead def here as a workaround.
- MI.addOperand(*MI.getMF(), MachineOperand::CreateReg(AArch64::LR, true,
- true, false, true));
+ // has implicit def. This def is early-clobber as it will be set at
+ // the moment of the call and earlier than any use is read.
+ // Add this implicit dead def here as a workaround.
+ MI.addOperand(*MI.getMF(),
+ MachineOperand::CreateReg(
+ AArch64::LR, /*isDef*/ true,
+ /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
+ /*isUndef*/ false, /*isEarlyClobber*/ true));
LLVM_FALLTHROUGH;
case TargetOpcode::STACKMAP:
case TargetOpcode::PATCHPOINT:
@@ -2376,6 +2459,108 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
case AArch64::CATCHRET:
return EmitLoweredCatchRet(MI, BB);
+ case AArch64::LD1_MXIPXX_H_PSEUDO_B:
+ return EmitTileLoad(AArch64::LD1_MXIPXX_H_B, AArch64::ZAB0, MI, BB);
+ case AArch64::LD1_MXIPXX_H_PSEUDO_H:
+ return EmitTileLoad(AArch64::LD1_MXIPXX_H_H, AArch64::ZAH0, MI, BB);
+ case AArch64::LD1_MXIPXX_H_PSEUDO_S:
+ return EmitTileLoad(AArch64::LD1_MXIPXX_H_S, AArch64::ZAS0, MI, BB);
+ case AArch64::LD1_MXIPXX_H_PSEUDO_D:
+ return EmitTileLoad(AArch64::LD1_MXIPXX_H_D, AArch64::ZAD0, MI, BB);
+ case AArch64::LD1_MXIPXX_H_PSEUDO_Q:
+ return EmitTileLoad(AArch64::LD1_MXIPXX_H_Q, AArch64::ZAQ0, MI, BB);
+ case AArch64::LD1_MXIPXX_V_PSEUDO_B:
+ return EmitTileLoad(AArch64::LD1_MXIPXX_V_B, AArch64::ZAB0, MI, BB);
+ case AArch64::LD1_MXIPXX_V_PSEUDO_H:
+ return EmitTileLoad(AArch64::LD1_MXIPXX_V_H, AArch64::ZAH0, MI, BB);
+ case AArch64::LD1_MXIPXX_V_PSEUDO_S:
+ return EmitTileLoad(AArch64::LD1_MXIPXX_V_S, AArch64::ZAS0, MI, BB);
+ case AArch64::LD1_MXIPXX_V_PSEUDO_D:
+ return EmitTileLoad(AArch64::LD1_MXIPXX_V_D, AArch64::ZAD0, MI, BB);
+ case AArch64::LD1_MXIPXX_V_PSEUDO_Q:
+ return EmitTileLoad(AArch64::LD1_MXIPXX_V_Q, AArch64::ZAQ0, MI, BB);
+ case AArch64::LDR_ZA_PSEUDO:
+ return EmitFill(MI, BB);
+ case AArch64::BFMOPA_MPPZZ_PSEUDO:
+ return EmitMopa(AArch64::BFMOPA_MPPZZ, AArch64::ZAS0, MI, BB);
+ case AArch64::BFMOPS_MPPZZ_PSEUDO:
+ return EmitMopa(AArch64::BFMOPS_MPPZZ, AArch64::ZAS0, MI, BB);
+ case AArch64::FMOPAL_MPPZZ_PSEUDO:
+ return EmitMopa(AArch64::FMOPAL_MPPZZ, AArch64::ZAS0, MI, BB);
+ case AArch64::FMOPSL_MPPZZ_PSEUDO:
+ return EmitMopa(AArch64::FMOPSL_MPPZZ, AArch64::ZAS0, MI, BB);
+ case AArch64::FMOPA_MPPZZ_S_PSEUDO:
+ return EmitMopa(AArch64::FMOPA_MPPZZ_S, AArch64::ZAS0, MI, BB);
+ case AArch64::FMOPS_MPPZZ_S_PSEUDO:
+ return EmitMopa(AArch64::FMOPS_MPPZZ_S, AArch64::ZAS0, MI, BB);
+ case AArch64::FMOPA_MPPZZ_D_PSEUDO:
+ return EmitMopa(AArch64::FMOPA_MPPZZ_D, AArch64::ZAD0, MI, BB);
+ case AArch64::FMOPS_MPPZZ_D_PSEUDO:
+ return EmitMopa(AArch64::FMOPS_MPPZZ_D, AArch64::ZAD0, MI, BB);
+ case AArch64::SMOPA_MPPZZ_S_PSEUDO:
+ return EmitMopa(AArch64::SMOPA_MPPZZ_S, AArch64::ZAS0, MI, BB);
+ case AArch64::SMOPS_MPPZZ_S_PSEUDO:
+ return EmitMopa(AArch64::SMOPS_MPPZZ_S, AArch64::ZAS0, MI, BB);
+ case AArch64::UMOPA_MPPZZ_S_PSEUDO:
+ return EmitMopa(AArch64::UMOPA_MPPZZ_S, AArch64::ZAS0, MI, BB);
+ case AArch64::UMOPS_MPPZZ_S_PSEUDO:
+ return EmitMopa(AArch64::UMOPS_MPPZZ_S, AArch64::ZAS0, MI, BB);
+ case AArch64::SUMOPA_MPPZZ_S_PSEUDO:
+ return EmitMopa(AArch64::SUMOPA_MPPZZ_S, AArch64::ZAS0, MI, BB);
+ case AArch64::SUMOPS_MPPZZ_S_PSEUDO:
+ return EmitMopa(AArch64::SUMOPS_MPPZZ_S, AArch64::ZAS0, MI, BB);
+ case AArch64::USMOPA_MPPZZ_S_PSEUDO:
+ return EmitMopa(AArch64::USMOPA_MPPZZ_S, AArch64::ZAS0, MI, BB);
+ case AArch64::USMOPS_MPPZZ_S_PSEUDO:
+ return EmitMopa(AArch64::USMOPS_MPPZZ_S, AArch64::ZAS0, MI, BB);
+ case AArch64::SMOPA_MPPZZ_D_PSEUDO:
+ return EmitMopa(AArch64::SMOPA_MPPZZ_D, AArch64::ZAD0, MI, BB);
+ case AArch64::SMOPS_MPPZZ_D_PSEUDO:
+ return EmitMopa(AArch64::SMOPS_MPPZZ_D, AArch64::ZAD0, MI, BB);
+ case AArch64::UMOPA_MPPZZ_D_PSEUDO:
+ return EmitMopa(AArch64::UMOPA_MPPZZ_D, AArch64::ZAD0, MI, BB);
+ case AArch64::UMOPS_MPPZZ_D_PSEUDO:
+ return EmitMopa(AArch64::UMOPS_MPPZZ_D, AArch64::ZAD0, MI, BB);
+ case AArch64::SUMOPA_MPPZZ_D_PSEUDO:
+ return EmitMopa(AArch64::SUMOPA_MPPZZ_D, AArch64::ZAD0, MI, BB);
+ case AArch64::SUMOPS_MPPZZ_D_PSEUDO:
+ return EmitMopa(AArch64::SUMOPS_MPPZZ_D, AArch64::ZAD0, MI, BB);
+ case AArch64::USMOPA_MPPZZ_D_PSEUDO:
+ return EmitMopa(AArch64::USMOPA_MPPZZ_D, AArch64::ZAD0, MI, BB);
+ case AArch64::USMOPS_MPPZZ_D_PSEUDO:
+ return EmitMopa(AArch64::USMOPS_MPPZZ_D, AArch64::ZAD0, MI, BB);
+ case AArch64::INSERT_MXIPZ_H_PSEUDO_B:
+ return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_H_B, AArch64::ZAB0, MI,
+ BB);
+ case AArch64::INSERT_MXIPZ_H_PSEUDO_H:
+ return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_H_H, AArch64::ZAH0, MI,
+ BB);
+ case AArch64::INSERT_MXIPZ_H_PSEUDO_S:
+ return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_H_S, AArch64::ZAS0, MI,
+ BB);
+ case AArch64::INSERT_MXIPZ_H_PSEUDO_D:
+ return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_H_D, AArch64::ZAD0, MI,
+ BB);
+ case AArch64::INSERT_MXIPZ_H_PSEUDO_Q:
+ return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_H_Q, AArch64::ZAQ0, MI,
+ BB);
+ case AArch64::INSERT_MXIPZ_V_PSEUDO_B:
+ return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_V_B, AArch64::ZAB0, MI,
+ BB);
+ case AArch64::INSERT_MXIPZ_V_PSEUDO_H:
+ return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_V_H, AArch64::ZAH0, MI,
+ BB);
+ case AArch64::INSERT_MXIPZ_V_PSEUDO_S:
+ return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_V_S, AArch64::ZAS0, MI,
+ BB);
+ case AArch64::INSERT_MXIPZ_V_PSEUDO_D:
+ return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_V_D, AArch64::ZAD0, MI,
+ BB);
+ case AArch64::INSERT_MXIPZ_V_PSEUDO_Q:
+ return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_V_Q, AArch64::ZAQ0, MI,
+ BB);
+ case AArch64::ZERO_M_PSEUDO:
+ return EmitZero(MI, BB);
}
}
@@ -2596,7 +2781,17 @@ static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl,
bool IsSignaling) {
EVT VT = LHS.getValueType();
assert(VT != MVT::f128);
- assert(VT != MVT::f16 && "Lowering of strict fp16 not yet implemented");
+
+ const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();
+
+ if (VT == MVT::f16 && !FullFP16) {
+ LHS = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {MVT::f32, MVT::Other},
+ {Chain, LHS});
+ RHS = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {MVT::f32, MVT::Other},
+ {LHS.getValue(1), RHS});
+ Chain = RHS.getValue(1);
+ VT = MVT::f32;
+ }
unsigned Opcode =
IsSignaling ? AArch64ISD::STRICT_FCMPE : AArch64ISD::STRICT_FCMP;
return DAG.getNode(Opcode, dl, {VT, MVT::Other}, {Chain, LHS, RHS});
@@ -2605,8 +2800,7 @@ static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl,
static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
const SDLoc &dl, SelectionDAG &DAG) {
EVT VT = LHS.getValueType();
- const bool FullFP16 =
- static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
+ const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();
if (VT.isFloatingPoint()) {
assert(VT != MVT::f128);
@@ -2714,8 +2908,7 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
AArch64CC::CondCode OutCC,
const SDLoc &DL, SelectionDAG &DAG) {
unsigned Opcode = 0;
- const bool FullFP16 =
- static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
+ const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();
if (LHS.getValueType().isFloatingPoint()) {
assert(LHS.getValueType() != MVT::f128);
@@ -3282,40 +3475,68 @@ SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const {
return Op;
}
-static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
- EVT VT = Op.getValueType();
+// If Invert is false, sets 'C' bit of NZCV to 0 if value is 0, else sets 'C'
+// bit to 1. If Invert is true, sets 'C' bit of NZCV to 1 if value is 0, else
+// sets 'C' bit to 0.
+static SDValue valueToCarryFlag(SDValue Value, SelectionDAG &DAG, bool Invert) {
+ SDLoc DL(Value);
+ EVT VT = Value.getValueType();
+ SDValue Op0 = Invert ? DAG.getConstant(0, DL, VT) : Value;
+ SDValue Op1 = Invert ? Value : DAG.getConstant(1, DL, VT);
+ SDValue Cmp =
+ DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::Glue), Op0, Op1);
+ return Cmp.getValue(1);
+}
- // Let legalize expand this if it isn't a legal type yet.
- if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+// If Invert is false, value is 1 if 'C' bit of NZCV is 1, else 0.
+// If Invert is true, value is 0 if 'C' bit of NZCV is 1, else 1.
+static SDValue carryFlagToValue(SDValue Flag, EVT VT, SelectionDAG &DAG,
+ bool Invert) {
+ assert(Flag.getResNo() == 1);
+ SDLoc DL(Flag);
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ SDValue One = DAG.getConstant(1, DL, VT);
+ unsigned Cond = Invert ? AArch64CC::LO : AArch64CC::HS;
+ SDValue CC = DAG.getConstant(Cond, DL, MVT::i32);
+ return DAG.getNode(AArch64ISD::CSEL, DL, VT, One, Zero, CC, Flag);
+}
+
+// Value is 1 if 'V' bit of NZCV is 1, else 0
+static SDValue overflowFlagToValue(SDValue Flag, EVT VT, SelectionDAG &DAG) {
+ assert(Flag.getResNo() == 1);
+ SDLoc DL(Flag);
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ SDValue One = DAG.getConstant(1, DL, VT);
+ SDValue CC = DAG.getConstant(AArch64CC::VS, DL, MVT::i32);
+ return DAG.getNode(AArch64ISD::CSEL, DL, VT, One, Zero, CC, Flag);
+}
+
+// This lowering is inefficient, but it will get cleaned up by
+// `foldOverflowCheck`
+static SDValue lowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG, unsigned Opcode,
+ bool IsSigned) {
+ EVT VT0 = Op.getValue(0).getValueType();
+ EVT VT1 = Op.getValue(1).getValueType();
+
+ if (VT0 != MVT::i32 && VT0 != MVT::i64)
return SDValue();
- SDVTList VTs = DAG.getVTList(VT, MVT::i32);
+ bool InvertCarry = Opcode == AArch64ISD::SBCS;
+ SDValue OpLHS = Op.getOperand(0);
+ SDValue OpRHS = Op.getOperand(1);
+ SDValue OpCarryIn = valueToCarryFlag(Op.getOperand(2), DAG, InvertCarry);
- unsigned Opc;
- bool ExtraOp = false;
- switch (Op.getOpcode()) {
- default:
- llvm_unreachable("Invalid code");
- case ISD::ADDC:
- Opc = AArch64ISD::ADDS;
- break;
- case ISD::SUBC:
- Opc = AArch64ISD::SUBS;
- break;
- case ISD::ADDE:
- Opc = AArch64ISD::ADCS;
- ExtraOp = true;
- break;
- case ISD::SUBE:
- Opc = AArch64ISD::SBCS;
- ExtraOp = true;
- break;
- }
+ SDLoc DL(Op);
+ SDVTList VTs = DAG.getVTList(VT0, VT1);
+
+ SDValue Sum = DAG.getNode(Opcode, DL, DAG.getVTList(VT0, MVT::Glue), OpLHS,
+ OpRHS, OpCarryIn);
- if (!ExtraOp)
- return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1));
- return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1),
- Op.getOperand(2));
+ SDValue OutFlag =
+ IsSigned ? overflowFlagToValue(Sum.getValue(1), VT1, DAG)
+ : carryFlagToValue(Sum.getValue(1), VT1, DAG, InvertCarry);
+
+ return DAG.getNode(ISD::MERGE_VALUES, DL, VTs, Sum, OutFlag);
}
static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
@@ -3417,7 +3638,8 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
// Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
// Any additional optimization in this function should be recorded
// in the cost tables.
- EVT InVT = Op.getOperand(0).getValueType();
+ bool IsStrict = Op->isStrictFPOpcode();
+ EVT InVT = Op.getOperand(IsStrict ? 1 : 0).getValueType();
EVT VT = Op.getValueType();
if (VT.isScalableVector()) {
@@ -3437,6 +3659,12 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
!Subtarget->hasFullFP16()) {
MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
SDLoc dl(Op);
+ if (IsStrict) {
+ SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NewVT, MVT::Other},
+ {Op.getOperand(0), Op.getOperand(1)});
+ return DAG.getNode(Op.getOpcode(), dl, {VT, MVT::Other},
+ {Ext.getValue(1), Ext.getValue(0)});
+ }
return DAG.getNode(
Op.getOpcode(), dl, Op.getValueType(),
DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
@@ -3446,6 +3674,13 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
uint64_t InVTSize = InVT.getFixedSizeInBits();
if (VTSize < InVTSize) {
SDLoc dl(Op);
+ if (IsStrict) {
+ InVT = InVT.changeVectorElementTypeToInteger();
+ SDValue Cv = DAG.getNode(Op.getOpcode(), dl, {InVT, MVT::Other},
+ {Op.getOperand(0), Op.getOperand(1)});
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
+ return DAG.getMergeValues({Trunc, Cv.getValue(1)}, dl);
+ }
SDValue Cv =
DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
Op.getOperand(0));
@@ -3457,10 +3692,30 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
MVT ExtVT =
MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()),
VT.getVectorNumElements());
+ if (IsStrict) {
+ SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {ExtVT, MVT::Other},
+ {Op.getOperand(0), Op.getOperand(1)});
+ return DAG.getNode(Op.getOpcode(), dl, {VT, MVT::Other},
+ {Ext.getValue(1), Ext.getValue(0)});
+ }
SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0));
return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
}
+ // Use a scalar operation for conversions between single-element vectors of
+ // the same size.
+ if (NumElts == 1) {
+ SDLoc dl(Op);
+ SDValue Extract = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, InVT.getScalarType(),
+ Op.getOperand(IsStrict ? 1 : 0), DAG.getConstant(0, dl, MVT::i64));
+ EVT ScalarVT = VT.getScalarType();
+ if (IsStrict)
+ return DAG.getNode(Op.getOpcode(), dl, {ScalarVT, MVT::Other},
+ {Op.getOperand(0), Extract});
+ return DAG.getNode(Op.getOpcode(), dl, ScalarVT, Extract);
+ }
+
// Type changing conversions are illegal.
return Op;
}
@@ -3475,8 +3730,14 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
// f16 conversions are promoted to f32 when full fp16 is not supported.
if (SrcVal.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
- assert(!IsStrict && "Lowering of strict fp16 not yet implemented");
SDLoc dl(Op);
+ if (IsStrict) {
+ SDValue Ext =
+ DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {MVT::f32, MVT::Other},
+ {Op.getOperand(0), SrcVal});
+ return DAG.getNode(Op.getOpcode(), dl, {Op.getValueType(), MVT::Other},
+ {Ext.getValue(1), Ext.getValue(0)});
+ }
return DAG.getNode(
Op.getOpcode(), dl, Op.getValueType(),
DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, SrcVal));
@@ -3507,7 +3768,7 @@ AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(SDValue Op,
"Saturation width cannot exceed result width");
// TODO: Consider lowering to SVE operations, as in LowerVectorFP_TO_INT.
- // Currently, the `llvm.fpto[su]i.sat.*` instrinsics don't accept scalable
+ // Currently, the `llvm.fpto[su]i.sat.*` intrinsics don't accept scalable
// types, so this is hard to reach.
if (DstVT.isScalableVector())
return SDValue();
@@ -3545,17 +3806,14 @@ AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(SDValue Op,
SDValue Sat;
if (Op.getOpcode() == ISD::FP_TO_SINT_SAT) {
SDValue MinC = DAG.getConstant(
- APInt::getSignedMaxValue(SatWidth).sextOrSelf(SrcElementWidth), DL,
- IntVT);
+ APInt::getSignedMaxValue(SatWidth).sext(SrcElementWidth), DL, IntVT);
SDValue Min = DAG.getNode(ISD::SMIN, DL, IntVT, NativeCvt, MinC);
SDValue MaxC = DAG.getConstant(
- APInt::getSignedMinValue(SatWidth).sextOrSelf(SrcElementWidth), DL,
- IntVT);
+ APInt::getSignedMinValue(SatWidth).sext(SrcElementWidth), DL, IntVT);
Sat = DAG.getNode(ISD::SMAX, DL, IntVT, Min, MaxC);
} else {
SDValue MinC = DAG.getConstant(
- APInt::getAllOnesValue(SatWidth).zextOrSelf(SrcElementWidth), DL,
- IntVT);
+ APInt::getAllOnesValue(SatWidth).zext(SrcElementWidth), DL, IntVT);
Sat = DAG.getNode(ISD::UMIN, DL, IntVT, NativeCvt, MinC);
}
@@ -3604,14 +3862,14 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
SDValue Sat;
if (Op.getOpcode() == ISD::FP_TO_SINT_SAT) {
SDValue MinC = DAG.getConstant(
- APInt::getSignedMaxValue(SatWidth).sextOrSelf(DstWidth), DL, DstVT);
+ APInt::getSignedMaxValue(SatWidth).sext(DstWidth), DL, DstVT);
SDValue Min = DAG.getNode(ISD::SMIN, DL, DstVT, NativeCvt, MinC);
SDValue MaxC = DAG.getConstant(
- APInt::getSignedMinValue(SatWidth).sextOrSelf(DstWidth), DL, DstVT);
+ APInt::getSignedMinValue(SatWidth).sext(DstWidth), DL, DstVT);
Sat = DAG.getNode(ISD::SMAX, DL, DstVT, Min, MaxC);
} else {
SDValue MinC = DAG.getConstant(
- APInt::getAllOnesValue(SatWidth).zextOrSelf(DstWidth), DL, DstVT);
+ APInt::getAllOnesValue(SatWidth).zext(DstWidth), DL, DstVT);
Sat = DAG.getNode(ISD::UMIN, DL, DstVT, NativeCvt, MinC);
}
@@ -3623,9 +3881,10 @@ SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
// Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
// Any additional optimization in this function should be recorded
// in the cost tables.
+ bool IsStrict = Op->isStrictFPOpcode();
EVT VT = Op.getValueType();
SDLoc dl(Op);
- SDValue In = Op.getOperand(0);
+ SDValue In = Op.getOperand(IsStrict ? 1 : 0);
EVT InVT = In.getValueType();
unsigned Opc = Op.getOpcode();
bool IsSigned = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
@@ -3653,6 +3912,13 @@ SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
MVT CastVT =
MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
InVT.getVectorNumElements());
+ if (IsStrict) {
+ In = DAG.getNode(Opc, dl, {CastVT, MVT::Other},
+ {Op.getOperand(0), In});
+ return DAG.getNode(
+ ISD::STRICT_FP_ROUND, dl, {VT, MVT::Other},
+ {In.getValue(1), In.getValue(0), DAG.getIntPtrConstant(0, dl)});
+ }
In = DAG.getNode(Opc, dl, CastVT, In);
return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl));
}
@@ -3661,9 +3927,24 @@ SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
EVT CastVT = VT.changeVectorElementTypeToInteger();
In = DAG.getNode(CastOpc, dl, CastVT, In);
+ if (IsStrict)
+ return DAG.getNode(Opc, dl, {VT, MVT::Other}, {Op.getOperand(0), In});
return DAG.getNode(Opc, dl, VT, In);
}
+ // Use a scalar operation for conversions between single-element vectors of
+ // the same size.
+ if (VT.getVectorNumElements() == 1) {
+ SDValue Extract = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, InVT.getScalarType(),
+ In, DAG.getConstant(0, dl, MVT::i64));
+ EVT ScalarVT = VT.getScalarType();
+ if (IsStrict)
+ return DAG.getNode(Op.getOpcode(), dl, {ScalarVT, MVT::Other},
+ {Op.getOperand(0), Extract});
+ return DAG.getNode(Op.getOpcode(), dl, ScalarVT, Extract);
+ }
+
return Op;
}
@@ -3676,10 +3957,15 @@ SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
// f16 conversions are promoted to f32 when full fp16 is not supported.
- if (Op.getValueType() == MVT::f16 &&
- !Subtarget->hasFullFP16()) {
- assert(!IsStrict && "Lowering of strict fp16 not yet implemented");
+ if (Op.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
SDLoc dl(Op);
+ if (IsStrict) {
+ SDValue Val = DAG.getNode(Op.getOpcode(), dl, {MVT::f32, MVT::Other},
+ {Op.getOperand(0), SrcVal});
+ return DAG.getNode(
+ ISD::STRICT_FP_ROUND, dl, {MVT::f16, MVT::Other},
+ {Val.getValue(1), Val.getValue(0), DAG.getIntPtrConstant(0, dl)});
+ }
return DAG.getNode(
ISD::FP_ROUND, dl, MVT::f16,
DAG.getNode(Op.getOpcode(), dl, MVT::f32, SrcVal),
@@ -3742,6 +4028,14 @@ SDValue AArch64TargetLowering::LowerBITCAST(SDValue Op,
return LowerFixedLengthBitcastToSVE(Op, DAG);
if (OpVT.isScalableVector()) {
+ // Bitcasting between unpacked vector types of different element counts is
+ // not a NOP because the live elements are laid out differently.
+ // 01234567
+ // e.g. nxv2i32 = XX??XX??
+ // nxv4f16 = X?X?X?X?
+ if (OpVT.getVectorElementCount() != ArgVT.getVectorElementCount())
+ return SDValue();
+
if (isTypeLegal(OpVT) && !isTypeLegal(ArgVT)) {
assert(OpVT.isFloatingPoint() && !ArgVT.isFloatingPoint() &&
"Expected int->fp bitcast!");
@@ -3964,7 +4258,7 @@ SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
bool OverrideNEON = VT == MVT::v2i64 || VT == MVT::v1i64;
if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT, OverrideNEON))
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED, OverrideNEON);
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED);
// Multiplications are only custom-lowered for 128-bit vectors so that
// VMULL can be detected. Otherwise v2i64 multiplications are not legal.
@@ -4059,10 +4353,26 @@ static SDValue lowerConvertToSVBool(SDValue Op, SelectionDAG &DAG) {
case AArch64ISD::SETCC_MERGE_ZERO:
return Reinterpret;
case ISD::INTRINSIC_WO_CHAIN:
- if (InOp.getConstantOperandVal(0) == Intrinsic::aarch64_sve_ptrue)
+ switch (InOp.getConstantOperandVal(0)) {
+ case Intrinsic::aarch64_sve_ptrue:
+ case Intrinsic::aarch64_sve_cmpeq_wide:
+ case Intrinsic::aarch64_sve_cmpne_wide:
+ case Intrinsic::aarch64_sve_cmpge_wide:
+ case Intrinsic::aarch64_sve_cmpgt_wide:
+ case Intrinsic::aarch64_sve_cmplt_wide:
+ case Intrinsic::aarch64_sve_cmple_wide:
+ case Intrinsic::aarch64_sve_cmphs_wide:
+ case Intrinsic::aarch64_sve_cmphi_wide:
+ case Intrinsic::aarch64_sve_cmplo_wide:
+ case Intrinsic::aarch64_sve_cmpls_wide:
return Reinterpret;
+ }
}
+ // Splat vectors of one will generate ptrue instructions
+ if (ISD::isConstantSplatVectorAllOnes(InOp.getNode()))
+ return Reinterpret;
+
// Otherwise, zero the newly introduced lanes.
SDValue Mask = getPTrue(DAG, DL, InVT, AArch64SVEPredPattern::all);
SDValue MaskReinterpret =
@@ -4073,12 +4383,12 @@ static SDValue lowerConvertToSVBool(SDValue Op, SelectionDAG &DAG) {
SDValue AArch64TargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
unsigned IntNo = Op.getConstantOperandVal(1);
+ SDLoc DL(Op);
switch (IntNo) {
default:
return SDValue(); // Don't custom lower most intrinsics.
case Intrinsic::aarch64_mops_memset_tag: {
auto Node = cast<MemIntrinsicSDNode>(Op.getNode());
- SDLoc DL(Op);
SDValue Chain = Node->getChain();
SDValue Dst = Op.getOperand(2);
SDValue Val = Op.getOperand(3);
@@ -4100,6 +4410,15 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
// changed.
return DAG.getMergeValues({MS.getValue(0), MS.getValue(2)}, DL);
}
+ case Intrinsic::aarch64_sme_get_pstatesm: {
+ SDValue Chain = Op.getOperand(0);
+ SDValue MRS = DAG.getNode(
+ AArch64ISD::MRS, DL, DAG.getVTList(MVT::i64, MVT::Glue, MVT::Other),
+ Chain, DAG.getConstant(AArch64SysReg::SVCR, DL, MVT::i64));
+ SDValue Mask = DAG.getConstant(/* PSTATE.SM */ 1, DL, MVT::i64);
+ SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, MRS, Mask);
+ return DAG.getMergeValues({And, Chain}, DL);
+ }
}
}
@@ -4196,6 +4515,26 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::aarch64_sve_clz:
return DAG.getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+ case Intrinsic::aarch64_sme_cntsb:
+ return DAG.getNode(AArch64ISD::RDSVL, dl, Op.getValueType(),
+ DAG.getConstant(1, dl, MVT::i32));
+ case Intrinsic::aarch64_sme_cntsh: {
+ SDValue One = DAG.getConstant(1, dl, MVT::i32);
+ SDValue Bytes = DAG.getNode(AArch64ISD::RDSVL, dl, Op.getValueType(), One);
+ return DAG.getNode(ISD::SRL, dl, Op.getValueType(), Bytes, One);
+ }
+ case Intrinsic::aarch64_sme_cntsw: {
+ SDValue Bytes = DAG.getNode(AArch64ISD::RDSVL, dl, Op.getValueType(),
+ DAG.getConstant(1, dl, MVT::i32));
+ return DAG.getNode(ISD::SRL, dl, Op.getValueType(), Bytes,
+ DAG.getConstant(2, dl, MVT::i32));
+ }
+ case Intrinsic::aarch64_sme_cntsd: {
+ SDValue Bytes = DAG.getNode(AArch64ISD::RDSVL, dl, Op.getValueType(),
+ DAG.getConstant(1, dl, MVT::i32));
+ return DAG.getNode(ISD::SRL, dl, Op.getValueType(), Bytes,
+ DAG.getConstant(3, dl, MVT::i32));
+ }
case Intrinsic::aarch64_sve_cnt: {
SDValue Data = Op.getOperand(3);
// CTPOP only supports integer operands.
@@ -4300,6 +4639,9 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::aarch64_sve_revw:
return DAG.getNode(AArch64ISD::REVW_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+ case Intrinsic::aarch64_sve_revd:
+ return DAG.getNode(AArch64ISD::REVD_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_sxtb:
return DAG.getNode(
AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
@@ -4336,7 +4678,6 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(2), Op.getOperand(3),
DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
Op.getOperand(1));
-
case Intrinsic::localaddress: {
const auto &MF = DAG.getMachineFunction();
const auto *RegInfo = Subtarget->getRegisterInfo();
@@ -4382,9 +4723,9 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
IntNo == Intrinsic::aarch64_neon_shadd);
bool IsRoundingAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
IntNo == Intrinsic::aarch64_neon_urhadd);
- unsigned Opcode =
- IsSignedAdd ? (IsRoundingAdd ? AArch64ISD::SRHADD : AArch64ISD::SHADD)
- : (IsRoundingAdd ? AArch64ISD::URHADD : AArch64ISD::UHADD);
+ unsigned Opcode = IsSignedAdd
+ ? (IsRoundingAdd ? ISD::AVGCEILS : ISD::AVGFLOORS)
+ : (IsRoundingAdd ? ISD::AVGCEILU : ISD::AVGFLOORU);
return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2));
}
@@ -4395,8 +4736,11 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2));
}
+ case Intrinsic::aarch64_neon_saddlp:
case Intrinsic::aarch64_neon_uaddlp: {
- unsigned Opcode = AArch64ISD::UADDLP;
+ unsigned Opcode = IntNo == Intrinsic::aarch64_neon_uaddlp
+ ? AArch64ISD::UADDLP
+ : AArch64ISD::SADDLP;
return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1));
}
case Intrinsic::aarch64_neon_sdot:
@@ -4428,19 +4772,26 @@ bool AArch64TargetLowering::shouldExtendGSIndex(EVT VT, EVT &EltTy) const {
return false;
}
-bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const {
- if (VT.getVectorElementType() == MVT::i32 &&
- VT.getVectorElementCount().getKnownMinValue() >= 4 &&
- !VT.isFixedLengthVector())
- return true;
+bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT IndexVT,
+ EVT DataVT) const {
+ // SVE only supports implicit extension of 32-bit indices.
+ if (!Subtarget->hasSVE() || IndexVT.getVectorElementType() != MVT::i32)
+ return false;
- return false;
+ // Indices cannot be smaller than the main data type.
+ if (IndexVT.getScalarSizeInBits() < DataVT.getScalarSizeInBits())
+ return false;
+
+ // Scalable vectors with "vscale * 2" or fewer elements sit within a 64-bit
+ // element container type, which would violate the previous clause.
+ return DataVT.isFixedLengthVector() || DataVT.getVectorMinNumElements() > 2;
}
bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
return ExtVal.getValueType().isScalableVector() ||
- useSVEForFixedLengthVectorVT(ExtVal.getValueType(),
- /*OverrideNEON=*/true);
+ useSVEForFixedLengthVectorVT(
+ ExtVal.getValueType(),
+ /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors());
}
unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
@@ -4466,29 +4817,6 @@ unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
return AddrModes.find(Key)->second;
}
-unsigned getScatterVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
- std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = {
- {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ false),
- AArch64ISD::SST1_PRED},
- {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ true),
- AArch64ISD::SST1_UXTW_PRED},
- {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ false),
- AArch64ISD::SST1_PRED},
- {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ true),
- AArch64ISD::SST1_SXTW_PRED},
- {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ false),
- AArch64ISD::SST1_SCALED_PRED},
- {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ true),
- AArch64ISD::SST1_UXTW_SCALED_PRED},
- {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ false),
- AArch64ISD::SST1_SCALED_PRED},
- {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ true),
- AArch64ISD::SST1_SXTW_SCALED_PRED},
- };
- auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
- return AddrModes.find(Key)->second;
-}
-
unsigned getSignExtendedGatherOpcode(unsigned Opcode) {
switch (Opcode) {
default:
@@ -4511,267 +4839,184 @@ unsigned getSignExtendedGatherOpcode(unsigned Opcode) {
}
}
-bool getGatherScatterIndexIsExtended(SDValue Index) {
- unsigned Opcode = Index.getOpcode();
- if (Opcode == ISD::SIGN_EXTEND_INREG)
- return true;
-
- if (Opcode == ISD::AND) {
- SDValue Splat = Index.getOperand(1);
- if (Splat.getOpcode() != ISD::SPLAT_VECTOR)
- return false;
- ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Splat.getOperand(0));
- if (!Mask || Mask->getZExtValue() != 0xFFFFFFFF)
- return false;
- return true;
- }
-
- return false;
-}
-
-// If the base pointer of a masked gather or scatter is null, we
-// may be able to swap BasePtr & Index and use the vector + register
-// or vector + immediate addressing mode, e.g.
-// VECTOR + REGISTER:
-// getelementptr nullptr, <vscale x N x T> (splat(%offset)) + %indices)
-// -> getelementptr %offset, <vscale x N x T> %indices
-// VECTOR + IMMEDIATE:
-// getelementptr nullptr, <vscale x N x T> (splat(#x)) + %indices)
-// -> getelementptr #x, <vscale x N x T> %indices
-void selectGatherScatterAddrMode(SDValue &BasePtr, SDValue &Index, EVT MemVT,
- unsigned &Opcode, bool IsGather,
- SelectionDAG &DAG) {
- if (!isNullConstant(BasePtr))
- return;
-
- // FIXME: This will not match for fixed vector type codegen as the nodes in
- // question will have fixed<->scalable conversions around them. This should be
- // moved to a DAG combine or complex pattern so that is executes after all of
- // the fixed vector insert and extracts have been removed. This deficiency
- // will result in a sub-optimal addressing mode being used, i.e. an ADD not
- // being folded into the scatter/gather.
- ConstantSDNode *Offset = nullptr;
- if (Index.getOpcode() == ISD::ADD)
- if (auto SplatVal = DAG.getSplatValue(Index.getOperand(1))) {
- if (isa<ConstantSDNode>(SplatVal))
- Offset = cast<ConstantSDNode>(SplatVal);
- else {
- BasePtr = SplatVal;
- Index = Index->getOperand(0);
- return;
- }
- }
-
- unsigned NewOp =
- IsGather ? AArch64ISD::GLD1_IMM_MERGE_ZERO : AArch64ISD::SST1_IMM_PRED;
-
- if (!Offset) {
- std::swap(BasePtr, Index);
- Opcode = NewOp;
- return;
- }
-
- uint64_t OffsetVal = Offset->getZExtValue();
- unsigned ScalarSizeInBytes = MemVT.getScalarSizeInBits() / 8;
- auto ConstOffset = DAG.getConstant(OffsetVal, SDLoc(Index), MVT::i64);
-
- if (OffsetVal % ScalarSizeInBytes || OffsetVal / ScalarSizeInBytes > 31) {
- // Index is out of range for the immediate addressing mode
- BasePtr = ConstOffset;
- Index = Index->getOperand(0);
- return;
- }
-
- // Immediate is in range
- Opcode = NewOp;
- BasePtr = Index->getOperand(0);
- Index = ConstOffset;
-}
-
SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op,
SelectionDAG &DAG) const {
- SDLoc DL(Op);
MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(Op);
- assert(MGT && "Can only custom lower gather load nodes");
-
- bool IsFixedLength = MGT->getMemoryVT().isFixedLengthVector();
- SDValue Index = MGT->getIndex();
+ SDLoc DL(Op);
SDValue Chain = MGT->getChain();
SDValue PassThru = MGT->getPassThru();
SDValue Mask = MGT->getMask();
SDValue BasePtr = MGT->getBasePtr();
- ISD::LoadExtType ExtTy = MGT->getExtensionType();
-
- ISD::MemIndexType IndexType = MGT->getIndexType();
- bool IsScaled =
- IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED;
- bool IsSigned =
- IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED;
- bool IdxNeedsExtend =
- getGatherScatterIndexIsExtended(Index) ||
- Index.getSimpleValueType().getVectorElementType() == MVT::i32;
- bool ResNeedsSignExtend = ExtTy == ISD::EXTLOAD || ExtTy == ISD::SEXTLOAD;
-
- EVT VT = PassThru.getSimpleValueType();
- EVT IndexVT = Index.getSimpleValueType();
+ SDValue Index = MGT->getIndex();
+ SDValue Scale = MGT->getScale();
+ EVT VT = Op.getValueType();
EVT MemVT = MGT->getMemoryVT();
- SDValue InputVT = DAG.getValueType(MemVT);
-
- if (VT.getVectorElementType() == MVT::bf16 &&
- !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
- return SDValue();
+ ISD::LoadExtType ExtType = MGT->getExtensionType();
+ ISD::MemIndexType IndexType = MGT->getIndexType();
- if (IsFixedLength) {
+ // SVE supports zero (and so undef) passthrough values only, everything else
+ // must be handled manually by an explicit select on the load's output.
+ if (!PassThru->isUndef() && !isZerosVector(PassThru.getNode())) {
+ SDValue Ops[] = {Chain, DAG.getUNDEF(VT), Mask, BasePtr, Index, Scale};
+ SDValue Load =
+ DAG.getMaskedGather(MGT->getVTList(), MemVT, DL, Ops,
+ MGT->getMemOperand(), IndexType, ExtType);
+ SDValue Select = DAG.getSelect(DL, VT, Mask, Load, PassThru);
+ return DAG.getMergeValues({Select, Load.getValue(1)}, DL);
+ }
+
+ bool IsScaled = MGT->isIndexScaled();
+ bool IsSigned = MGT->isIndexSigned();
+
+ // SVE supports an index scaled by sizeof(MemVT.elt) only, everything else
+ // must be calculated before hand.
+ uint64_t ScaleVal = cast<ConstantSDNode>(Scale)->getZExtValue();
+ if (IsScaled && ScaleVal != MemVT.getScalarStoreSize()) {
+ assert(isPowerOf2_64(ScaleVal) && "Expecting power-of-two types");
+ EVT IndexVT = Index.getValueType();
+ Index = DAG.getNode(ISD::SHL, DL, IndexVT, Index,
+ DAG.getConstant(Log2_32(ScaleVal), DL, IndexVT));
+ Scale = DAG.getTargetConstant(1, DL, Scale.getValueType());
+
+ SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
+ return DAG.getMaskedGather(MGT->getVTList(), MemVT, DL, Ops,
+ MGT->getMemOperand(), IndexType, ExtType);
+ }
+
+ // Lower fixed length gather to a scalable equivalent.
+ if (VT.isFixedLengthVector()) {
assert(Subtarget->useSVEForFixedLengthVectors() &&
- "Cannot lower when not using SVE for fixed vectors");
- if (MemVT.getScalarSizeInBits() <= IndexVT.getScalarSizeInBits()) {
- IndexVT = getContainerForFixedLengthVector(DAG, IndexVT);
- MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType());
- } else {
- MemVT = getContainerForFixedLengthVector(DAG, MemVT);
- IndexVT = MemVT.changeTypeToInteger();
- }
- InputVT = DAG.getValueType(MemVT.changeTypeToInteger());
- Mask = DAG.getNode(
- ISD::SIGN_EXTEND, DL,
- VT.changeVectorElementType(IndexVT.getVectorElementType()), Mask);
- }
-
- if (PassThru->isUndef() || isZerosVector(PassThru.getNode()))
- PassThru = SDValue();
-
- if (VT.isFloatingPoint() && !IsFixedLength) {
- // Handle FP data by using an integer gather and casting the result.
- if (PassThru) {
- EVT PassThruVT = getPackedSVEVectorVT(VT.getVectorElementCount());
- PassThru = getSVESafeBitCast(PassThruVT, PassThru, DAG);
- }
- InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger());
- }
-
- SDVTList VTs = DAG.getVTList(IndexVT, MVT::Other);
-
- if (getGatherScatterIndexIsExtended(Index))
- Index = Index.getOperand(0);
-
- unsigned Opcode = getGatherVecOpcode(IsScaled, IsSigned, IdxNeedsExtend);
- selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
- /*isGather=*/true, DAG);
-
- if (ResNeedsSignExtend)
- Opcode = getSignExtendedGatherOpcode(Opcode);
-
- if (IsFixedLength) {
- if (Index.getSimpleValueType().isFixedLengthVector())
- Index = convertToScalableVector(DAG, IndexVT, Index);
- if (BasePtr.getSimpleValueType().isFixedLengthVector())
- BasePtr = convertToScalableVector(DAG, IndexVT, BasePtr);
+ "Cannot lower when not using SVE for fixed vectors!");
+
+ // NOTE: Handle floating-point as if integer then bitcast the result.
+ EVT DataVT = VT.changeVectorElementTypeToInteger();
+ MemVT = MemVT.changeVectorElementTypeToInteger();
+
+ // Find the smallest integer fixed length vector we can use for the gather.
+ EVT PromotedVT = VT.changeVectorElementType(MVT::i32);
+ if (DataVT.getVectorElementType() == MVT::i64 ||
+ Index.getValueType().getVectorElementType() == MVT::i64 ||
+ Mask.getValueType().getVectorElementType() == MVT::i64)
+ PromotedVT = VT.changeVectorElementType(MVT::i64);
+
+ // Promote vector operands except for passthrough, which we know is either
+ // undef or zero, and thus best constructed directly.
+ unsigned ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ Index = DAG.getNode(ExtOpcode, DL, PromotedVT, Index);
+ Mask = DAG.getNode(ISD::SIGN_EXTEND, DL, PromotedVT, Mask);
+
+ // A promoted result type forces the need for an extending load.
+ if (PromotedVT != DataVT && ExtType == ISD::NON_EXTLOAD)
+ ExtType = ISD::EXTLOAD;
+
+ EVT ContainerVT = getContainerForFixedLengthVector(DAG, PromotedVT);
+
+ // Convert fixed length vector operands to scalable.
+ MemVT = ContainerVT.changeVectorElementType(MemVT.getVectorElementType());
+ Index = convertToScalableVector(DAG, ContainerVT, Index);
Mask = convertFixedMaskToScalableVector(Mask, DAG);
- }
-
- SDValue Ops[] = {Chain, Mask, BasePtr, Index, InputVT};
- SDValue Result = DAG.getNode(Opcode, DL, VTs, Ops);
- Chain = Result.getValue(1);
-
- if (IsFixedLength) {
- Result = convertFromScalableVector(
- DAG, VT.changeVectorElementType(IndexVT.getVectorElementType()),
- Result);
- Result = DAG.getNode(ISD::TRUNCATE, DL, VT.changeTypeToInteger(), Result);
- Result = DAG.getNode(ISD::BITCAST, DL, VT, Result);
-
- if (PassThru)
- Result = DAG.getSelect(DL, VT, MGT->getMask(), Result, PassThru);
- } else {
- if (PassThru)
- Result = DAG.getSelect(DL, IndexVT, Mask, Result, PassThru);
-
+ PassThru = PassThru->isUndef() ? DAG.getUNDEF(ContainerVT)
+ : DAG.getConstant(0, DL, ContainerVT);
+
+ // Emit equivalent scalable vector gather.
+ SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
+ SDValue Load =
+ DAG.getMaskedGather(DAG.getVTList(ContainerVT, MVT::Other), MemVT, DL,
+ Ops, MGT->getMemOperand(), IndexType, ExtType);
+
+ // Extract fixed length data then convert to the required result type.
+ SDValue Result = convertFromScalableVector(DAG, PromotedVT, Load);
+ Result = DAG.getNode(ISD::TRUNCATE, DL, DataVT, Result);
if (VT.isFloatingPoint())
- Result = getSVESafeBitCast(VT, Result, DAG);
+ Result = DAG.getNode(ISD::BITCAST, DL, VT, Result);
+
+ return DAG.getMergeValues({Result, Load.getValue(1)}, DL);
}
- return DAG.getMergeValues({Result, Chain}, DL);
+ // Everything else is legal.
+ return Op;
}
SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op,
SelectionDAG &DAG) const {
- SDLoc DL(Op);
MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(Op);
- assert(MSC && "Can only custom lower scatter store nodes");
- bool IsFixedLength = MSC->getMemoryVT().isFixedLengthVector();
-
- SDValue Index = MSC->getIndex();
+ SDLoc DL(Op);
SDValue Chain = MSC->getChain();
SDValue StoreVal = MSC->getValue();
SDValue Mask = MSC->getMask();
SDValue BasePtr = MSC->getBasePtr();
-
- ISD::MemIndexType IndexType = MSC->getIndexType();
- bool IsScaled =
- IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED;
- bool IsSigned =
- IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED;
- bool NeedsExtend =
- getGatherScatterIndexIsExtended(Index) ||
- Index.getSimpleValueType().getVectorElementType() == MVT::i32;
-
- EVT VT = StoreVal.getSimpleValueType();
- EVT IndexVT = Index.getSimpleValueType();
- SDVTList VTs = DAG.getVTList(MVT::Other);
+ SDValue Index = MSC->getIndex();
+ SDValue Scale = MSC->getScale();
+ EVT VT = StoreVal.getValueType();
EVT MemVT = MSC->getMemoryVT();
- SDValue InputVT = DAG.getValueType(MemVT);
+ ISD::MemIndexType IndexType = MSC->getIndexType();
+ bool Truncating = MSC->isTruncatingStore();
- if (VT.getVectorElementType() == MVT::bf16 &&
- !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
- return SDValue();
+ bool IsScaled = MSC->isIndexScaled();
+ bool IsSigned = MSC->isIndexSigned();
+
+ // SVE supports an index scaled by sizeof(MemVT.elt) only, everything else
+ // must be calculated before hand.
+ uint64_t ScaleVal = cast<ConstantSDNode>(Scale)->getZExtValue();
+ if (IsScaled && ScaleVal != MemVT.getScalarStoreSize()) {
+ assert(isPowerOf2_64(ScaleVal) && "Expecting power-of-two types");
+ EVT IndexVT = Index.getValueType();
+ Index = DAG.getNode(ISD::SHL, DL, IndexVT, Index,
+ DAG.getConstant(Log2_32(ScaleVal), DL, IndexVT));
+ Scale = DAG.getTargetConstant(1, DL, Scale.getValueType());
- if (IsFixedLength) {
+ SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
+ return DAG.getMaskedScatter(MSC->getVTList(), MemVT, DL, Ops,
+ MSC->getMemOperand(), IndexType, Truncating);
+ }
+
+ // Lower fixed length scatter to a scalable equivalent.
+ if (VT.isFixedLengthVector()) {
assert(Subtarget->useSVEForFixedLengthVectors() &&
- "Cannot lower when not using SVE for fixed vectors");
- if (MemVT.getScalarSizeInBits() <= IndexVT.getScalarSizeInBits()) {
- IndexVT = getContainerForFixedLengthVector(DAG, IndexVT);
- MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType());
- } else {
- MemVT = getContainerForFixedLengthVector(DAG, MemVT);
- IndexVT = MemVT.changeTypeToInteger();
+ "Cannot lower when not using SVE for fixed vectors!");
+
+ // Once bitcast we treat floating-point scatters as if integer.
+ if (VT.isFloatingPoint()) {
+ VT = VT.changeVectorElementTypeToInteger();
+ MemVT = MemVT.changeVectorElementTypeToInteger();
+ StoreVal = DAG.getNode(ISD::BITCAST, DL, VT, StoreVal);
}
- InputVT = DAG.getValueType(MemVT.changeTypeToInteger());
-
- StoreVal =
- DAG.getNode(ISD::BITCAST, DL, VT.changeTypeToInteger(), StoreVal);
- StoreVal = DAG.getNode(
- ISD::ANY_EXTEND, DL,
- VT.changeVectorElementType(IndexVT.getVectorElementType()), StoreVal);
- StoreVal = convertToScalableVector(DAG, IndexVT, StoreVal);
- Mask = DAG.getNode(
- ISD::SIGN_EXTEND, DL,
- VT.changeVectorElementType(IndexVT.getVectorElementType()), Mask);
- } else if (VT.isFloatingPoint()) {
- // Handle FP data by casting the data so an integer scatter can be used.
- EVT StoreValVT = getPackedSVEVectorVT(VT.getVectorElementCount());
- StoreVal = getSVESafeBitCast(StoreValVT, StoreVal, DAG);
- InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger());
- }
-
- if (getGatherScatterIndexIsExtended(Index))
- Index = Index.getOperand(0);
-
- unsigned Opcode = getScatterVecOpcode(IsScaled, IsSigned, NeedsExtend);
- selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
- /*isGather=*/false, DAG);
-
- if (IsFixedLength) {
- if (Index.getSimpleValueType().isFixedLengthVector())
- Index = convertToScalableVector(DAG, IndexVT, Index);
- if (BasePtr.getSimpleValueType().isFixedLengthVector())
- BasePtr = convertToScalableVector(DAG, IndexVT, BasePtr);
+
+ // Find the smallest integer fixed length vector we can use for the scatter.
+ EVT PromotedVT = VT.changeVectorElementType(MVT::i32);
+ if (VT.getVectorElementType() == MVT::i64 ||
+ Index.getValueType().getVectorElementType() == MVT::i64 ||
+ Mask.getValueType().getVectorElementType() == MVT::i64)
+ PromotedVT = VT.changeVectorElementType(MVT::i64);
+
+ // Promote vector operands.
+ unsigned ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ Index = DAG.getNode(ExtOpcode, DL, PromotedVT, Index);
+ Mask = DAG.getNode(ISD::SIGN_EXTEND, DL, PromotedVT, Mask);
+ StoreVal = DAG.getNode(ISD::ANY_EXTEND, DL, PromotedVT, StoreVal);
+
+ // A promoted value type forces the need for a truncating store.
+ if (PromotedVT != VT)
+ Truncating = true;
+
+ EVT ContainerVT = getContainerForFixedLengthVector(DAG, PromotedVT);
+
+ // Convert fixed length vector operands to scalable.
+ MemVT = ContainerVT.changeVectorElementType(MemVT.getVectorElementType());
+ Index = convertToScalableVector(DAG, ContainerVT, Index);
Mask = convertFixedMaskToScalableVector(Mask, DAG);
+ StoreVal = convertToScalableVector(DAG, ContainerVT, StoreVal);
+
+ // Emit equivalent scalable vector scatter.
+ SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
+ return DAG.getMaskedScatter(MSC->getVTList(), MemVT, DL, Ops,
+ MSC->getMemOperand(), IndexType, Truncating);
}
- SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, InputVT};
- return DAG.getNode(Opcode, DL, VTs, Ops);
+ // Everything else is legal.
+ return Op;
}
SDValue AArch64TargetLowering::LowerMLOAD(SDValue Op, SelectionDAG &DAG) const {
@@ -4780,7 +5025,9 @@ SDValue AArch64TargetLowering::LowerMLOAD(SDValue Op, SelectionDAG &DAG) const {
assert(LoadNode && "Expected custom lowering of a masked load node");
EVT VT = Op->getValueType(0);
- if (useSVEForFixedLengthVectorVT(VT, true))
+ if (useSVEForFixedLengthVectorVT(
+ VT,
+ /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors()))
return LowerFixedLengthVectorMLoadToSVE(Op, DAG);
SDValue PassThru = LoadNode->getPassThru();
@@ -4847,7 +5094,9 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
EVT MemVT = StoreNode->getMemoryVT();
if (VT.isVector()) {
- if (useSVEForFixedLengthVectorVT(VT, true))
+ if (useSVEForFixedLengthVectorVT(
+ VT,
+ /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors()))
return LowerFixedLengthVectorStoreToSVE(Op, DAG);
unsigned AS = StoreNode->getAddressSpace();
@@ -5007,6 +5256,22 @@ SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
Cmp.getValue(1));
}
+static SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Cond = Op.getOperand(1);
+ SDValue Dest = Op.getOperand(2);
+
+ AArch64CC::CondCode CC;
+ if (SDValue Cmp = emitConjunction(DAG, Cond, CC)) {
+ SDLoc dl(Op);
+ SDValue CCVal = DAG.getConstant(CC, dl, MVT::i32);
+ return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
+ Cmp);
+ }
+
+ return SDValue();
+}
+
SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
LLVM_DEBUG(dbgs() << "Custom lowering: ");
@@ -5026,6 +5291,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::STRICT_FSETCC:
case ISD::STRICT_FSETCCS:
return LowerSETCC(Op, DAG);
+ case ISD::BRCOND:
+ return LowerBRCOND(Op, DAG);
case ISD::BR_CC:
return LowerBR_CC(Op, DAG);
case ISD::SELECT:
@@ -5046,11 +5313,14 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerVACOPY(Op, DAG);
case ISD::VAARG:
return LowerVAARG(Op, DAG);
- case ISD::ADDC:
- case ISD::ADDE:
- case ISD::SUBC:
- case ISD::SUBE:
- return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
+ case ISD::ADDCARRY:
+ return lowerADDSUBCARRY(Op, DAG, AArch64ISD::ADCS, false /*unsigned*/);
+ case ISD::SUBCARRY:
+ return lowerADDSUBCARRY(Op, DAG, AArch64ISD::SBCS, false /*unsigned*/);
+ case ISD::SADDO_CARRY:
+ return lowerADDSUBCARRY(Op, DAG, AArch64ISD::ADCS, true /*signed*/);
+ case ISD::SSUBO_CARRY:
+ return lowerADDSUBCARRY(Op, DAG, AArch64ISD::SBCS, true /*signed*/);
case ISD::SADDO:
case ISD::UADDO:
case ISD::SSUBO:
@@ -5165,11 +5435,9 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::MUL:
return LowerMUL(Op, DAG);
case ISD::MULHS:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHS_PRED,
- /*OverrideNEON=*/true);
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHS_PRED);
case ISD::MULHU:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHU_PRED,
- /*OverrideNEON=*/true);
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHU_PRED);
case ISD::INTRINSIC_W_CHAIN:
return LowerINTRINSIC_W_CHAIN(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN:
@@ -5234,11 +5502,9 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerFixedLengthVectorLoadToSVE(Op, DAG);
return LowerLOAD(Op, DAG);
case ISD::ADD:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::ADD_PRED);
case ISD::AND:
- return LowerToScalableOp(Op, DAG);
case ISD::SUB:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::SUB_PRED);
+ return LowerToScalableOp(Op, DAG);
case ISD::FMAXIMUM:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAX_PRED);
case ISD::FMAXNUM:
@@ -5260,12 +5526,23 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::BSWAP:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::BSWAP_MERGE_PASSTHRU);
case ISD::CTLZ:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTLZ_MERGE_PASSTHRU,
- /*OverrideNEON=*/true);
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTLZ_MERGE_PASSTHRU);
case ISD::CTTZ:
return LowerCTTZ(Op, DAG);
case ISD::VECTOR_SPLICE:
return LowerVECTOR_SPLICE(Op, DAG);
+ case ISD::STRICT_LROUND:
+ case ISD::STRICT_LLROUND:
+ case ISD::STRICT_LRINT:
+ case ISD::STRICT_LLRINT: {
+ assert(Op.getOperand(1).getValueType() == MVT::f16 &&
+ "Expected custom lowering of rounding operations only for f16");
+ SDLoc DL(Op);
+ SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
+ {Op.getOperand(0), Op.getOperand(1)});
+ return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
+ {Ext.getValue(1), Ext.getValue(0)});
+ }
}
}
@@ -5275,10 +5552,7 @@ bool AArch64TargetLowering::mergeStoresAfterLegalization(EVT VT) const {
bool AArch64TargetLowering::useSVEForFixedLengthVectorVT(
EVT VT, bool OverrideNEON) const {
- if (!Subtarget->useSVEForFixedLengthVectors())
- return false;
-
- if (!VT.isFixedLengthVector())
+ if (!VT.isFixedLengthVector() || !VT.isSimple())
return false;
// Don't use SVE for vectors we cannot scalarize if required.
@@ -5300,12 +5574,16 @@ bool AArch64TargetLowering::useSVEForFixedLengthVectorVT(
// All SVE implementations support NEON sized vectors.
if (OverrideNEON && (VT.is128BitVector() || VT.is64BitVector()))
- return true;
+ return Subtarget->hasSVE();
// Ensure NEON MVTs only belong to a single register class.
if (VT.getFixedSizeInBits() <= 128)
return false;
+ // Ensure wider than NEON code generation is enabled.
+ if (!Subtarget->useSVEForFixedLengthVectors())
+ return false;
+
// Don't use SVE for types that don't fit.
if (VT.getFixedSizeInBits() > Subtarget->getMinSVEVectorSizeInBits())
return false;
@@ -5322,6 +5600,36 @@ bool AArch64TargetLowering::useSVEForFixedLengthVectorVT(
// Calling Convention Implementation
//===----------------------------------------------------------------------===//
+static unsigned getIntrinsicID(const SDNode *N) {
+ unsigned Opcode = N->getOpcode();
+ switch (Opcode) {
+ default:
+ return Intrinsic::not_intrinsic;
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ if (IID < Intrinsic::num_intrinsics)
+ return IID;
+ return Intrinsic::not_intrinsic;
+ }
+ }
+}
+
+bool AArch64TargetLowering::isReassocProfitable(SelectionDAG &DAG, SDValue N0,
+ SDValue N1) const {
+ if (!N0.hasOneUse())
+ return false;
+
+ unsigned IID = getIntrinsicID(N1.getNode());
+ // Avoid reassociating expressions that can be lowered to smlal/umlal.
+ if (IID == Intrinsic::aarch64_neon_umull ||
+ N1.getOpcode() == AArch64ISD::UMULL ||
+ IID == Intrinsic::aarch64_neon_smull ||
+ N1.getOpcode() == AArch64ISD::SMULL)
+ return N0.getOpcode() != ISD::ADD;
+
+ return true;
+}
+
/// Selects the correct CCAssignFn for a given CallingConvention value.
CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
bool IsVarArg) const {
@@ -5368,8 +5676,16 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
MachineFunction &MF = DAG.getMachineFunction();
+ const Function &F = MF.getFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
- bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
+ bool IsWin64 = Subtarget->isCallingConvWin64(F.getCallingConv());
+ AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
+
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(CallConv, F.getReturnType(), F.getAttributes(), Outs,
+ DAG.getTargetLoweringInfo(), MF.getDataLayout());
+ if (any_of(Outs, [](ISD::OutputArg &Out){ return Out.VT.isScalableVector(); }))
+ FuncInfo->setIsSVECC(true);
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
@@ -5383,7 +5699,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
// we use a special version of AnalyzeFormalArguments to pass in ValVT and
// LocVT.
unsigned NumArgs = Ins.size();
- Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin();
+ Function::const_arg_iterator CurOrigArg = F.arg_begin();
unsigned CurArgIdx = 0;
for (unsigned i = 0; i != NumArgs; ++i) {
MVT ValVT = Ins[i].VT;
@@ -5454,11 +5770,13 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
else if (RegVT == MVT::f128 || RegVT.is128BitVector())
RC = &AArch64::FPR128RegClass;
else if (RegVT.isScalableVector() &&
- RegVT.getVectorElementType() == MVT::i1)
+ RegVT.getVectorElementType() == MVT::i1) {
+ FuncInfo->setIsSVECC(true);
RC = &AArch64::PPRRegClass;
- else if (RegVT.isScalableVector())
+ } else if (RegVT.isScalableVector()) {
+ FuncInfo->setIsSVECC(true);
RC = &AArch64::ZPRRegClass;
- else
+ } else
llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
// Transform the arguments in physical registers into virtual ones.
@@ -5580,7 +5898,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
// i1 arguments are zero-extended to i8 by the caller. Emit a
// hint to reflect this.
if (Ins[i].isOrigArg()) {
- Argument *OrigArg = MF.getFunction().getArg(Ins[i].getOrigArgIndex());
+ Argument *OrigArg = F.getArg(Ins[i].getOrigArgIndex());
if (OrigArg->getType()->isIntegerTy(1)) {
if (!Ins[i].Flags.isZExt()) {
ArgValue = DAG.getNode(AArch64ISD::ASSERT_ZEXT_BOOL, DL,
@@ -5595,7 +5913,6 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
assert((ArgLocs.size() + ExtraArgLocs) == Ins.size());
// varargs
- AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
if (isVarArg) {
if (!Subtarget->isTargetDarwin() || IsWin64) {
// The AAPCS variadic function ABI is identical to the non-variadic
@@ -5843,14 +6160,62 @@ static bool mayTailCallThisCC(CallingConv::ID CC) {
}
}
+static void analyzeCallOperands(const AArch64TargetLowering &TLI,
+ const AArch64Subtarget *Subtarget,
+ const TargetLowering::CallLoweringInfo &CLI,
+ CCState &CCInfo) {
+ const SelectionDAG &DAG = CLI.DAG;
+ CallingConv::ID CalleeCC = CLI.CallConv;
+ bool IsVarArg = CLI.IsVarArg;
+ const SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
+ bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC);
+
+ unsigned NumArgs = Outs.size();
+ for (unsigned i = 0; i != NumArgs; ++i) {
+ MVT ArgVT = Outs[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+
+ bool UseVarArgCC = false;
+ if (IsVarArg) {
+ // On Windows, the fixed arguments in a vararg call are passed in GPRs
+ // too, so use the vararg CC to force them to integer registers.
+ if (IsCalleeWin64) {
+ UseVarArgCC = true;
+ } else {
+ UseVarArgCC = !Outs[i].IsFixed;
+ }
+ } else {
+ // Get type of the original argument.
+ EVT ActualVT =
+ TLI.getValueType(DAG.getDataLayout(), CLI.Args[Outs[i].OrigArgIndex].Ty,
+ /*AllowUnknown*/ true);
+ MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ArgVT;
+ // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
+ if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
+ ArgVT = MVT::i8;
+ else if (ActualMVT == MVT::i16)
+ ArgVT = MVT::i16;
+ }
+
+ CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CalleeCC, UseVarArgCC);
+ bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
+ assert(!Res && "Call operand has unhandled type");
+ (void)Res;
+ }
+}
+
bool AArch64TargetLowering::isEligibleForTailCallOptimization(
- SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
+ const CallLoweringInfo &CLI) const {
+ CallingConv::ID CalleeCC = CLI.CallConv;
if (!mayTailCallThisCC(CalleeCC))
return false;
+ SDValue Callee = CLI.Callee;
+ bool IsVarArg = CLI.IsVarArg;
+ const SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
+ const SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
+ const SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+ const SelectionDAG &DAG = CLI.DAG;
MachineFunction &MF = DAG.getMachineFunction();
const Function &CallerF = MF.getFunction();
CallingConv::ID CallerCC = CallerF.getCallingConv();
@@ -5860,7 +6225,7 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
// The check for matching callee-saved regs will determine whether it is
// eligible for TCO.
if ((CallerCC == CallingConv::C || CallerCC == CallingConv::Fast) &&
- AArch64RegisterInfo::hasSVEArgsOrReturn(&MF))
+ MF.getInfo<AArch64FunctionInfo>()->isSVECC())
CallerCC = CallingConv::AArch64_SVE_VectorCall;
bool CCMatch = CallerCC == CalleeCC;
@@ -5915,30 +6280,14 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
// I want anyone implementing a new calling convention to think long and hard
// about this assert.
- assert((!isVarArg || CalleeCC == CallingConv::C) &&
+ assert((!IsVarArg || CalleeCC == CallingConv::C) &&
"Unexpected variadic calling convention");
LLVMContext &C = *DAG.getContext();
- if (isVarArg && !Outs.empty()) {
- // At least two cases here: if caller is fastcc then we can't have any
- // memory arguments (we'd be expected to clean up the stack afterwards). If
- // caller is C then we could potentially use its argument area.
-
- // FIXME: for now we take the most conservative of these in both cases:
- // disallow all variadic memory operands.
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
-
- CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true));
- for (const CCValAssign &ArgLoc : ArgLocs)
- if (!ArgLoc.isRegLoc())
- return false;
- }
-
// Check that the call results are passed in the same way.
if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
- CCAssignFnForCall(CalleeCC, isVarArg),
- CCAssignFnForCall(CallerCC, isVarArg)))
+ CCAssignFnForCall(CalleeCC, IsVarArg),
+ CCAssignFnForCall(CallerCC, IsVarArg)))
return false;
// The callee has to preserve all registers the caller needs to preserve.
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
@@ -5958,9 +6307,22 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
return true;
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
+ CCState CCInfo(CalleeCC, IsVarArg, MF, ArgLocs, C);
+
+ analyzeCallOperands(*this, Subtarget, CLI, CCInfo);
- CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
+ if (IsVarArg && !(CLI.CB && CLI.CB->isMustTailCall())) {
+ // When we are musttail, additional checks have been done and we can safely ignore this check
+ // At least two cases here: if caller is fastcc then we can't have any
+ // memory arguments (we'd be expected to clean up the stack afterwards). If
+ // caller is C then we could potentially use its argument area.
+
+ // FIXME: for now we take the most conservative of these in both cases:
+ // disallow all variadic memory operands.
+ for (const CCValAssign &ArgLoc : ArgLocs)
+ if (!ArgLoc.isRegLoc())
+ return false;
+ }
const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
@@ -6051,7 +6413,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
SDValue Chain = CLI.Chain;
SDValue Callee = CLI.Callee;
bool &IsTailCall = CLI.IsTailCall;
- CallingConv::ID CallConv = CLI.CallConv;
+ CallingConv::ID &CallConv = CLI.CallConv;
bool IsVarArg = CLI.IsVarArg;
MachineFunction &MF = DAG.getMachineFunction();
@@ -6061,7 +6423,12 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
bool IsSibCall = false;
- bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CallConv);
+ bool GuardWithBTI = false;
+
+ if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) &&
+ !Subtarget->noBTIAtReturnTwice()) {
+ GuardWithBTI = FuncInfo->branchTargetEnforcement();
+ }
// Check callee args/returns for SVE registers and set calling convention
// accordingly.
@@ -6079,8 +6446,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
if (IsTailCall) {
// Check if it's really possible to do a tail call.
- IsTailCall = isEligibleForTailCallOptimization(
- Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
+ IsTailCall = isEligibleForTailCallOptimization(CLI);
// A sibling call is one where we're under the usual C ABI and not planning
// to change that but can still do a tail call:
@@ -6101,56 +6467,17 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
if (IsVarArg) {
- // Handle fixed and variable vector arguments differently.
- // Variable vector arguments always go into memory.
unsigned NumArgs = Outs.size();
for (unsigned i = 0; i != NumArgs; ++i) {
- MVT ArgVT = Outs[i].VT;
- if (!Outs[i].IsFixed && ArgVT.isScalableVector())
+ if (!Outs[i].IsFixed && Outs[i].VT.isScalableVector())
report_fatal_error("Passing SVE types to variadic functions is "
"currently not supported");
-
- ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
- bool UseVarArgCC = !Outs[i].IsFixed;
- // On Windows, the fixed arguments in a vararg call are passed in GPRs
- // too, so use the vararg CC to force them to integer registers.
- if (IsCalleeWin64)
- UseVarArgCC = true;
- CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, UseVarArgCC);
- bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
- assert(!Res && "Call operand has unhandled type");
- (void)Res;
- }
- } else {
- // At this point, Outs[].VT may already be promoted to i32. To correctly
- // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
- // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
- // Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here
- // we use a special version of AnalyzeCallOperands to pass in ValVT and
- // LocVT.
- unsigned NumArgs = Outs.size();
- for (unsigned i = 0; i != NumArgs; ++i) {
- MVT ValVT = Outs[i].VT;
- // Get type of the original argument.
- EVT ActualVT = getValueType(DAG.getDataLayout(),
- CLI.getArgs()[Outs[i].OrigArgIndex].Ty,
- /*AllowUnknown*/ true);
- MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT;
- ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
- // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
- if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
- ValVT = MVT::i8;
- else if (ActualMVT == MVT::i16)
- ValVT = MVT::i16;
-
- CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
- bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, ArgFlags, CCInfo);
- assert(!Res && "Call operand has unhandled type");
- (void)Res;
}
}
+ analyzeCallOperands(*this, Subtarget, CLI, CCInfo);
+
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
@@ -6536,7 +6863,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
Function *ARCFn = *objcarc::getAttachedARCFunction(CLI.CB);
auto GA = DAG.getTargetGlobalAddress(ARCFn, DL, PtrVT);
Ops.insert(Ops.begin() + 1, GA);
- }
+ } else if (GuardWithBTI)
+ CallOpc = AArch64ISD::CALL_BTI;
// Returns a chain and a flag for retval copy to use.
Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
@@ -7313,103 +7641,88 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
SelectionDAG &DAG) const {
+ if (!Subtarget->hasNEON())
+ return SDValue();
+
EVT VT = Op.getValueType();
+ EVT IntVT = VT.changeTypeToInteger();
SDLoc DL(Op);
SDValue In1 = Op.getOperand(0);
SDValue In2 = Op.getOperand(1);
EVT SrcVT = In2.getValueType();
- if (VT.isScalableVector()) {
- if (VT != SrcVT)
- return SDValue();
+ if (SrcVT.bitsLT(VT))
+ In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
+ else if (SrcVT.bitsGT(VT))
+ In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0, DL));
- // copysign(x,y) -> (y & SIGN_MASK) | (x & ~SIGN_MASK)
- //
- // A possible alternative sequence involves using FNEG_MERGE_PASSTHRU;
- // maybe useful for copysign operations with mismatched VTs.
- //
- // IntVT here is chosen so it's a legal type with the same element width
- // as the input.
- EVT IntVT =
+ if (VT.isScalableVector())
+ IntVT =
getPackedSVEVectorVT(VT.getVectorElementType().changeTypeToInteger());
- unsigned NumBits = VT.getScalarSizeInBits();
- SDValue SignMask = DAG.getConstant(APInt::getSignMask(NumBits), DL, IntVT);
- SDValue InvSignMask = DAG.getNOT(DL, SignMask, IntVT);
- SDValue Sign = DAG.getNode(ISD::AND, DL, IntVT, SignMask,
- getSVESafeBitCast(IntVT, In2, DAG));
- SDValue Magnitude = DAG.getNode(ISD::AND, DL, IntVT, InvSignMask,
- getSVESafeBitCast(IntVT, In1, DAG));
- SDValue IntResult = DAG.getNode(ISD::OR, DL, IntVT, Sign, Magnitude);
- return getSVESafeBitCast(VT, IntResult, DAG);
- }
- if (!Subtarget->hasNEON())
+ if (VT != In2.getValueType())
return SDValue();
- if (SrcVT.bitsLT(VT))
- In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
- else if (SrcVT.bitsGT(VT))
- In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0, DL));
+ auto BitCast = [this](EVT VT, SDValue Op, SelectionDAG &DAG) {
+ if (VT.isScalableVector())
+ return getSVESafeBitCast(VT, Op, DAG);
- EVT VecVT;
- uint64_t EltMask;
- SDValue VecVal1, VecVal2;
+ return DAG.getBitcast(VT, Op);
+ };
- auto setVecVal = [&] (int Idx) {
+ SDValue VecVal1, VecVal2;
+ EVT VecVT;
+ auto SetVecVal = [&](int Idx = -1) {
if (!VT.isVector()) {
- VecVal1 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
- DAG.getUNDEF(VecVT), In1);
- VecVal2 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
- DAG.getUNDEF(VecVT), In2);
+ VecVal1 =
+ DAG.getTargetInsertSubreg(Idx, DL, VecVT, DAG.getUNDEF(VecVT), In1);
+ VecVal2 =
+ DAG.getTargetInsertSubreg(Idx, DL, VecVT, DAG.getUNDEF(VecVT), In2);
} else {
- VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1);
- VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2);
+ VecVal1 = BitCast(VecVT, In1, DAG);
+ VecVal2 = BitCast(VecVT, In2, DAG);
}
};
-
- if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) {
- VecVT = (VT == MVT::v2f32 ? MVT::v2i32 : MVT::v4i32);
- EltMask = 0x80000000ULL;
- setVecVal(AArch64::ssub);
- } else if (VT == MVT::f64 || VT == MVT::v2f64) {
+ if (VT.isVector()) {
+ VecVT = IntVT;
+ SetVecVal();
+ } else if (VT == MVT::f64) {
VecVT = MVT::v2i64;
-
- // We want to materialize a mask with the high bit set, but the AdvSIMD
- // immediate moves cannot materialize that in a single instruction for
- // 64-bit elements. Instead, materialize zero and then negate it.
- EltMask = 0;
-
- setVecVal(AArch64::dsub);
- } else if (VT == MVT::f16 || VT == MVT::v4f16 || VT == MVT::v8f16) {
- VecVT = (VT == MVT::v4f16 ? MVT::v4i16 : MVT::v8i16);
- EltMask = 0x8000ULL;
- setVecVal(AArch64::hsub);
+ SetVecVal(AArch64::dsub);
+ } else if (VT == MVT::f32) {
+ VecVT = MVT::v4i32;
+ SetVecVal(AArch64::ssub);
+ } else if (VT == MVT::f16) {
+ VecVT = MVT::v8i16;
+ SetVecVal(AArch64::hsub);
} else {
llvm_unreachable("Invalid type for copysign!");
}
- SDValue BuildVec = DAG.getConstant(EltMask, DL, VecVT);
+ unsigned BitWidth = In1.getScalarValueSizeInBits();
+ SDValue SignMaskV = DAG.getConstant(~APInt::getSignMask(BitWidth), DL, VecVT);
- // If we couldn't materialize the mask above, then the mask vector will be
- // the zero vector, and we need to negate it here.
+ // We want to materialize a mask with every bit but the high bit set, but the
+ // AdvSIMD immediate moves cannot materialize that in a single instruction for
+ // 64-bit elements. Instead, materialize all bits set and then negate that.
if (VT == MVT::f64 || VT == MVT::v2f64) {
- BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, BuildVec);
- BuildVec = DAG.getNode(ISD::FNEG, DL, MVT::v2f64, BuildVec);
- BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, BuildVec);
+ SignMaskV = DAG.getConstant(APInt::getAllOnes(BitWidth), DL, VecVT);
+ SignMaskV = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, SignMaskV);
+ SignMaskV = DAG.getNode(ISD::FNEG, DL, MVT::v2f64, SignMaskV);
+ SignMaskV = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, SignMaskV);
}
- SDValue Sel =
- DAG.getNode(AArch64ISD::BIT, DL, VecVT, VecVal1, VecVal2, BuildVec);
-
+ SDValue BSP =
+ DAG.getNode(AArch64ISD::BSP, DL, VecVT, SignMaskV, VecVal1, VecVal2);
if (VT == MVT::f16)
- return DAG.getTargetExtractSubreg(AArch64::hsub, DL, VT, Sel);
+ return DAG.getTargetExtractSubreg(AArch64::hsub, DL, VT, BSP);
if (VT == MVT::f32)
- return DAG.getTargetExtractSubreg(AArch64::ssub, DL, VT, Sel);
- else if (VT == MVT::f64)
- return DAG.getTargetExtractSubreg(AArch64::dsub, DL, VT, Sel);
- else
- return DAG.getNode(ISD::BITCAST, DL, VT, Sel);
+ return DAG.getTargetExtractSubreg(AArch64::ssub, DL, VT, BSP);
+ if (VT == MVT::f64)
+ return DAG.getTargetExtractSubreg(AArch64::dsub, DL, VT, BSP);
+
+ return BitCast(VT, BSP, DAG);
}
SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
@@ -7485,7 +7798,8 @@ SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
SDValue AArch64TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
assert(VT.isScalableVector() ||
- useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true));
+ useSVEForFixedLengthVectorVT(
+ VT, /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors()));
SDLoc DL(Op);
SDValue RBIT = DAG.getNode(ISD::BITREVERSE, DL, VT, Op.getOperand(0));
@@ -7517,22 +7831,19 @@ SDValue AArch64TargetLowering::LowerMinMax(SDValue Op,
}
if (VT.isScalableVector() ||
- useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true)) {
+ useSVEForFixedLengthVectorVT(
+ VT, /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors())) {
switch (Opcode) {
default:
llvm_unreachable("Wrong instruction");
case ISD::SMAX:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED,
- /*OverrideNEON=*/true);
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED);
case ISD::SMIN:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED,
- /*OverrideNEON=*/true);
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED);
case ISD::UMAX:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED,
- /*OverrideNEON=*/true);
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED);
case ISD::UMIN:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED,
- /*OverrideNEON=*/true);
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED);
}
}
@@ -7547,9 +7858,9 @@ SDValue AArch64TargetLowering::LowerBitreverse(SDValue Op,
EVT VT = Op.getValueType();
if (VT.isScalableVector() ||
- useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true))
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::BITREVERSE_MERGE_PASSTHRU,
- true);
+ useSVEForFixedLengthVectorVT(
+ VT, /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors()))
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::BITREVERSE_MERGE_PASSTHRU);
SDLoc DL(Op);
SDValue REVB;
@@ -8990,12 +9301,13 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
if (V.isUndef())
continue;
else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
- !isa<ConstantSDNode>(V.getOperand(1))) {
+ !isa<ConstantSDNode>(V.getOperand(1)) ||
+ V.getOperand(0).getValueType().isScalableVector()) {
LLVM_DEBUG(
dbgs() << "Reshuffle failed: "
"a shuffle can only come from building a vector from "
- "various elements of other vectors, provided their "
- "indices are constant\n");
+ "various elements of other fixed-width vectors, provided "
+ "their indices are constant\n");
return SDValue();
}
@@ -9011,10 +9323,72 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
Source->MaxElt = std::max(Source->MaxElt, EltNo);
}
+ // If we have 3 or 4 sources, try to generate a TBL, which will at least be
+ // better than moving to/from gpr registers for larger vectors.
+ if ((Sources.size() == 3 || Sources.size() == 4) && NumElts > 4) {
+ // Construct a mask for the tbl. We may need to adjust the index for types
+ // larger than i8.
+ SmallVector<unsigned, 16> Mask;
+ unsigned OutputFactor = VT.getScalarSizeInBits() / 8;
+ for (unsigned I = 0; I < NumElts; ++I) {
+ SDValue V = Op.getOperand(I);
+ if (V.isUndef()) {
+ for (unsigned OF = 0; OF < OutputFactor; OF++)
+ Mask.push_back(-1);
+ continue;
+ }
+ // Set the Mask lanes adjusted for the size of the input and output
+ // lanes. The Mask is always i8, so it will set OutputFactor lanes per
+ // output element, adjusted in their positions per input and output types.
+ unsigned Lane = V.getConstantOperandVal(1);
+ for (unsigned S = 0; S < Sources.size(); S++) {
+ if (V.getOperand(0) == Sources[S].Vec) {
+ unsigned InputSize = Sources[S].Vec.getScalarValueSizeInBits();
+ unsigned InputBase = 16 * S + Lane * InputSize / 8;
+ for (unsigned OF = 0; OF < OutputFactor; OF++)
+ Mask.push_back(InputBase + OF);
+ break;
+ }
+ }
+ }
+
+ // Construct the tbl3/tbl4 out of an intrinsic, the sources converted to
+ // v16i8, and the TBLMask
+ SmallVector<SDValue, 16> TBLOperands;
+ TBLOperands.push_back(DAG.getConstant(Sources.size() == 3
+ ? Intrinsic::aarch64_neon_tbl3
+ : Intrinsic::aarch64_neon_tbl4,
+ dl, MVT::i32));
+ for (unsigned i = 0; i < Sources.size(); i++) {
+ SDValue Src = Sources[i].Vec;
+ EVT SrcVT = Src.getValueType();
+ Src = DAG.getBitcast(SrcVT.is64BitVector() ? MVT::v8i8 : MVT::v16i8, Src);
+ assert((SrcVT.is64BitVector() || SrcVT.is128BitVector()) &&
+ "Expected a legally typed vector");
+ if (SrcVT.is64BitVector())
+ Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i8, Src,
+ DAG.getUNDEF(MVT::v8i8));
+ TBLOperands.push_back(Src);
+ }
+
+ SmallVector<SDValue, 16> TBLMask;
+ for (unsigned i = 0; i < Mask.size(); i++)
+ TBLMask.push_back(DAG.getConstant(Mask[i], dl, MVT::i32));
+ assert((Mask.size() == 8 || Mask.size() == 16) &&
+ "Expected a v8i8 or v16i8 Mask");
+ TBLOperands.push_back(
+ DAG.getBuildVector(Mask.size() == 8 ? MVT::v8i8 : MVT::v16i8, dl, TBLMask));
+
+ SDValue Shuffle =
+ DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl,
+ Mask.size() == 8 ? MVT::v8i8 : MVT::v16i8, TBLOperands);
+ return DAG.getBitcast(VT, Shuffle);
+ }
+
if (Sources.size() > 2) {
- LLVM_DEBUG(
- dbgs() << "Reshuffle failed: currently only do something sane when at "
- "most two source vectors are involved\n");
+ LLVM_DEBUG(dbgs() << "Reshuffle failed: currently only do something "
+ << "sensible when at most two source vectors are "
+ << "involved\n");
return SDValue();
}
@@ -9039,8 +9413,8 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
for (auto &Src : Sources) {
EVT SrcVT = Src.ShuffleVec.getValueType();
- uint64_t SrcVTSize = SrcVT.getFixedSizeInBits();
- if (SrcVTSize == VTSize)
+ TypeSize SrcVTSize = SrcVT.getSizeInBits();
+ if (SrcVTSize == TypeSize::Fixed(VTSize))
continue;
// This stage of the search produces a source with the same element type as
@@ -9049,7 +9423,7 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits();
EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
- if (SrcVTSize < VTSize) {
+ if (SrcVTSize.getFixedValue() < VTSize) {
assert(2 * SrcVTSize == VTSize);
// We can pad out the smaller vector for free, so if it's part of a
// shuffle...
@@ -9059,7 +9433,7 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
continue;
}
- if (SrcVTSize != 2 * VTSize) {
+ if (SrcVTSize.getFixedValue() != 2 * VTSize) {
LLVM_DEBUG(
dbgs() << "Reshuffle failed: result vector too small to extract\n");
return SDValue();
@@ -9205,6 +9579,56 @@ static bool isSingletonEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
return true;
}
+// Detect patterns of a0,a1,a2,a3,b0,b1,b2,b3,c0,c1,c2,c3,d0,d1,d2,d3 from
+// v4i32s. This is really a truncate, which we can construct out of (legal)
+// concats and truncate nodes.
+static SDValue ReconstructTruncateFromBuildVector(SDValue V, SelectionDAG &DAG) {
+ if (V.getValueType() != MVT::v16i8)
+ return SDValue();
+ assert(V.getNumOperands() == 16 && "Expected 16 operands on the BUILDVECTOR");
+
+ for (unsigned X = 0; X < 4; X++) {
+ // Check the first item in each group is an extract from lane 0 of a v4i32
+ // or v4i16.
+ SDValue BaseExt = V.getOperand(X * 4);
+ if (BaseExt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ (BaseExt.getOperand(0).getValueType() != MVT::v4i16 &&
+ BaseExt.getOperand(0).getValueType() != MVT::v4i32) ||
+ !isa<ConstantSDNode>(BaseExt.getOperand(1)) ||
+ BaseExt.getConstantOperandVal(1) != 0)
+ return SDValue();
+ SDValue Base = BaseExt.getOperand(0);
+ // And check the other items are extracts from the same vector.
+ for (unsigned Y = 1; Y < 4; Y++) {
+ SDValue Ext = V.getOperand(X * 4 + Y);
+ if (Ext.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ Ext.getOperand(0) != Base ||
+ !isa<ConstantSDNode>(Ext.getOperand(1)) ||
+ Ext.getConstantOperandVal(1) != Y)
+ return SDValue();
+ }
+ }
+
+ // Turn the buildvector into a series of truncates and concates, which will
+ // become uzip1's. Any v4i32s we found get truncated to v4i16, which are
+ // concat together to produce 2 v8i16. These are both truncated and concat
+ // together.
+ SDLoc DL(V);
+ SDValue Trunc[4] = {
+ V.getOperand(0).getOperand(0), V.getOperand(4).getOperand(0),
+ V.getOperand(8).getOperand(0), V.getOperand(12).getOperand(0)};
+ for (int I = 0; I < 4; I++)
+ if (Trunc[I].getValueType() == MVT::v4i32)
+ Trunc[I] = DAG.getNode(ISD::TRUNCATE, DL, MVT::v4i16, Trunc[I]);
+ SDValue Concat0 =
+ DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16, Trunc[0], Trunc[1]);
+ SDValue Concat1 =
+ DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16, Trunc[2], Trunc[3]);
+ SDValue Trunc0 = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, Concat0);
+ SDValue Trunc1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, Concat1);
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, Trunc0, Trunc1);
+}
+
/// Check if a vector shuffle corresponds to a DUP instructions with a larger
/// element width than the vector lane type. If that is the case the function
/// returns true and writes the value of the DUP instruction lane operand into
@@ -9534,8 +9958,12 @@ static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG) {
}
/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
-/// the specified operations to build the shuffle.
-static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
+/// the specified operations to build the shuffle. ID is the perfect-shuffle
+//ID, V1 and V2 are the original shuffle inputs. PFEntry is the Perfect shuffle
+//table entry and LHS/RHS are the immediate inputs for this stage of the
+//shuffle.
+static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1,
+ SDValue V2, unsigned PFEntry, SDValue LHS,
SDValue RHS, SelectionDAG &DAG,
const SDLoc &dl) {
unsigned OpNum = (PFEntry >> 26) & 0x0F;
@@ -9552,12 +9980,13 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
OP_VEXT1,
OP_VEXT2,
OP_VEXT3,
- OP_VUZPL, // VUZP, left result
- OP_VUZPR, // VUZP, right result
- OP_VZIPL, // VZIP, left result
- OP_VZIPR, // VZIP, right result
- OP_VTRNL, // VTRN, left result
- OP_VTRNR // VTRN, right result
+ OP_VUZPL, // VUZP, left result
+ OP_VUZPR, // VUZP, right result
+ OP_VZIPL, // VZIP, left result
+ OP_VZIPR, // VZIP, right result
+ OP_VTRNL, // VTRN, left result
+ OP_VTRNR, // VTRN, right result
+ OP_MOVLANE // Move lane. RHSID is the lane to move into
};
if (OpNum == OP_COPY) {
@@ -9567,9 +9996,71 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
return RHS;
}
+ if (OpNum == OP_MOVLANE) {
+ // Decompose a PerfectShuffle ID to get the Mask for lane Elt
+ auto getPFIDLane = [](unsigned ID, int Elt) -> int {
+ assert(Elt < 4 && "Expected Perfect Lanes to be less than 4");
+ Elt = 3 - Elt;
+ while (Elt > 0) {
+ ID /= 9;
+ Elt--;
+ }
+ return (ID % 9 == 8) ? -1 : ID % 9;
+ };
+
+ // For OP_MOVLANE shuffles, the RHSID represents the lane to move into. We
+ // get the lane to move from from the PFID, which is always from the
+ // original vectors (V1 or V2).
+ SDValue OpLHS = GeneratePerfectShuffle(
+ LHSID, V1, V2, PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
+ EVT VT = OpLHS.getValueType();
+ assert(RHSID < 8 && "Expected a lane index for RHSID!");
+ unsigned ExtLane = 0;
+ SDValue Input;
+
+ // OP_MOVLANE are either D movs (if bit 0x4 is set) or S movs. D movs
+ // convert into a higher type.
+ if (RHSID & 0x4) {
+ int MaskElt = getPFIDLane(ID, (RHSID & 0x01) << 1) >> 1;
+ if (MaskElt == -1)
+ MaskElt = (getPFIDLane(ID, ((RHSID & 0x01) << 1) + 1) - 1) >> 1;
+ assert(MaskElt >= 0 && "Didn't expect an undef movlane index!");
+ ExtLane = MaskElt < 2 ? MaskElt : (MaskElt - 2);
+ Input = MaskElt < 2 ? V1 : V2;
+ if (VT.getScalarSizeInBits() == 16) {
+ Input = DAG.getBitcast(MVT::v2f32, Input);
+ OpLHS = DAG.getBitcast(MVT::v2f32, OpLHS);
+ } else {
+ assert(VT.getScalarSizeInBits() == 32 &&
+ "Expected 16 or 32 bit shuffle elemements");
+ Input = DAG.getBitcast(MVT::v2f64, Input);
+ OpLHS = DAG.getBitcast(MVT::v2f64, OpLHS);
+ }
+ } else {
+ int MaskElt = getPFIDLane(ID, RHSID);
+ assert(MaskElt >= 0 && "Didn't expect an undef movlane index!");
+ ExtLane = MaskElt < 4 ? MaskElt : (MaskElt - 4);
+ Input = MaskElt < 4 ? V1 : V2;
+ // Be careful about creating illegal types. Use f16 instead of i16.
+ if (VT == MVT::v4i16) {
+ Input = DAG.getBitcast(MVT::v4f16, Input);
+ OpLHS = DAG.getBitcast(MVT::v4f16, OpLHS);
+ }
+ }
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ Input.getValueType().getVectorElementType(),
+ Input, DAG.getVectorIdxConstant(ExtLane, dl));
+ SDValue Ins =
+ DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Input.getValueType(), OpLHS,
+ Ext, DAG.getVectorIdxConstant(RHSID & 0x3, dl));
+ return DAG.getBitcast(VT, Ins);
+ }
+
SDValue OpLHS, OpRHS;
- OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
- OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
+ OpLHS = GeneratePerfectShuffle(LHSID, V1, V2, PerfectShuffleTable[LHSID], LHS,
+ RHS, DAG, dl);
+ OpRHS = GeneratePerfectShuffle(RHSID, V1, V2, PerfectShuffleTable[RHSID], LHS,
+ RHS, DAG, dl);
EVT VT = OpLHS.getValueType();
switch (OpNum) {
@@ -9648,14 +10139,16 @@ static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask,
EVT EltVT = Op.getValueType().getVectorElementType();
unsigned BytesPerElt = EltVT.getSizeInBits() / 8;
- SmallVector<SDValue, 8> TBLMask;
- for (int Val : ShuffleMask) {
- for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
- unsigned Offset = Byte + Val * BytesPerElt;
- TBLMask.push_back(DAG.getConstant(Offset, DL, MVT::i32));
- }
+ bool Swap = false;
+ if (V1.isUndef() || isZerosVector(V1.getNode())) {
+ std::swap(V1, V2);
+ Swap = true;
}
+ // If the V2 source is undef or zero then we can use a tbl1, as tbl1 will fill
+ // out of range values with 0s. We do need to make sure that any out-of-range
+ // values are really out-of-range for a v16i8 vector.
+ bool IsUndefOrZero = V2.isUndef() || isZerosVector(V2.getNode());
MVT IndexVT = MVT::v8i8;
unsigned IndexLen = 8;
if (Op.getValueSizeInBits() == 128) {
@@ -9663,11 +10156,23 @@ static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask,
IndexLen = 16;
}
+ SmallVector<SDValue, 8> TBLMask;
+ for (int Val : ShuffleMask) {
+ for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
+ unsigned Offset = Byte + Val * BytesPerElt;
+ if (Swap)
+ Offset = Offset < IndexLen ? Offset + IndexLen : Offset - IndexLen;
+ if (IsUndefOrZero && Offset >= IndexLen)
+ Offset = 255;
+ TBLMask.push_back(DAG.getConstant(Offset, DL, MVT::i32));
+ }
+ }
+
SDValue V1Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V1);
SDValue V2Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V2);
SDValue Shuffle;
- if (V2.getNode()->isUndef()) {
+ if (IsUndefOrZero) {
if (IndexLen == 8)
V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V1Cst);
Shuffle = DAG.getNode(
@@ -9732,6 +10237,10 @@ static SDValue constructDup(SDValue V, int Lane, SDLoc dl, EVT VT,
if (ExtIdxInBits % CastedEltBitWidth != 0)
return false;
+ // Can't handle cases where vector size is not 128-bit
+ if (!Extract.getOperand(0).getValueType().is128BitVector())
+ return false;
+
// Update the lane value by offsetting with the scaled extract index.
LaneC += ExtIdxInBits / CastedEltBitWidth;
@@ -10014,10 +10523,8 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
PFIndexes[2] * 9 + PFIndexes[3];
unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
- unsigned Cost = (PFEntry >> 30);
-
- if (Cost <= 4)
- return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
+ return GeneratePerfectShuffle(PFTableIndex, V1, V2, PFEntry, V1, V2, DAG,
+ dl);
}
return GenerateTBL(Op, ShuffleMask, DAG);
@@ -10025,56 +10532,33 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
- SDLoc dl(Op);
EVT VT = Op.getValueType();
- EVT ElemVT = VT.getScalarType();
- SDValue SplatVal = Op.getOperand(0);
if (useSVEForFixedLengthVectorVT(VT))
return LowerToScalableOp(Op, DAG);
- // Extend input splat value where needed to fit into a GPR (32b or 64b only)
- // FPRs don't have this restriction.
- switch (ElemVT.getSimpleVT().SimpleTy) {
- case MVT::i1: {
- // The only legal i1 vectors are SVE vectors, so we can use SVE-specific
- // lowering code.
- if (auto *ConstVal = dyn_cast<ConstantSDNode>(SplatVal)) {
- // We can hande the zero case during isel.
- if (ConstVal->isZero())
- return Op;
- if (ConstVal->isOne())
- return getPTrue(DAG, dl, VT, AArch64SVEPredPattern::all);
- }
- // The general case of i1. There isn't any natural way to do this,
- // so we use some trickery with whilelo.
- SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i64);
- SplatVal = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i64, SplatVal,
- DAG.getValueType(MVT::i1));
- SDValue ID = DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, dl,
- MVT::i64);
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, ID,
- DAG.getConstant(0, dl, MVT::i64), SplatVal);
- }
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i32);
- break;
- case MVT::i64:
- SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i64);
- break;
- case MVT::f16:
- case MVT::bf16:
- case MVT::f32:
- case MVT::f64:
- // Fine as is
- break;
- default:
- report_fatal_error("Unsupported SPLAT_VECTOR input operand type");
- }
+ assert(VT.isScalableVector() && VT.getVectorElementType() == MVT::i1 &&
+ "Unexpected vector type!");
+
+ // We can handle the constant cases during isel.
+ if (isa<ConstantSDNode>(Op.getOperand(0)))
+ return Op;
- return DAG.getNode(AArch64ISD::DUP, dl, VT, SplatVal);
+ // There isn't a natural way to handle the general i1 case, so we use some
+ // trickery with whilelo.
+ SDLoc DL(Op);
+ SDValue SplatVal = DAG.getAnyExtOrTrunc(Op.getOperand(0), DL, MVT::i64);
+ SplatVal = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, SplatVal,
+ DAG.getValueType(MVT::i1));
+ SDValue ID =
+ DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, DL, MVT::i64);
+ SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
+ if (VT == MVT::nxv1i1)
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::nxv1i1,
+ DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::nxv2i1, ID,
+ Zero, SplatVal),
+ Zero);
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, ID, Zero, SplatVal);
}
SDValue AArch64TargetLowering::LowerDUPQLane(SDValue Op,
@@ -10090,18 +10574,17 @@ SDValue AArch64TargetLowering::LowerDUPQLane(SDValue Op,
return SDValue();
// The DUPQ operation is indepedent of element type so normalise to i64s.
- SDValue V = DAG.getNode(ISD::BITCAST, DL, MVT::nxv2i64, Op.getOperand(1));
SDValue Idx128 = Op.getOperand(2);
// DUPQ can be used when idx is in range.
auto *CIdx = dyn_cast<ConstantSDNode>(Idx128);
if (CIdx && (CIdx->getZExtValue() <= 3)) {
SDValue CI = DAG.getTargetConstant(CIdx->getZExtValue(), DL, MVT::i64);
- SDNode *DUPQ =
- DAG.getMachineNode(AArch64::DUP_ZZI_Q, DL, MVT::nxv2i64, V, CI);
- return DAG.getNode(ISD::BITCAST, DL, VT, SDValue(DUPQ, 0));
+ return DAG.getNode(AArch64ISD::DUPLANE128, DL, VT, Op.getOperand(1), CI);
}
+ SDValue V = DAG.getNode(ISD::BITCAST, DL, MVT::nxv2i64, Op.getOperand(1));
+
// The ACLE says this must produce the same result as:
// svtbl(data, svadd_x(svptrue_b64(),
// svand_x(svptrue_b64(), svindex_u64(0, 1), 1),
@@ -10358,20 +10841,6 @@ static bool isAllConstantBuildVector(const SDValue &PotentialBVec,
return true;
}
-static unsigned getIntrinsicID(const SDNode *N) {
- unsigned Opcode = N->getOpcode();
- switch (Opcode) {
- default:
- return Intrinsic::not_intrinsic;
- case ISD::INTRINSIC_WO_CHAIN: {
- unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
- if (IID < Intrinsic::num_intrinsics)
- return IID;
- return Intrinsic::not_intrinsic;
- }
- }
-}
-
// Attempt to form a vector S[LR]I from (or (and X, BvecC1), (lsl Y, C2)),
// to (SLI X, Y, C2), where X and Y have matching vector types, BvecC1 is a
// BUILD_VECTORs with constant element C1, C2 is a constant, and:
@@ -10822,6 +11291,12 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
return SDValue();
}
+ // Detect patterns of a0,a1,a2,a3,b0,b1,b2,b3,c0,c1,c2,c3,d0,d1,d2,d3 from
+ // v4i32s. This is really a truncate, which we can construct out of (legal)
+ // concats and truncate nodes.
+ if (SDValue M = ReconstructTruncateFromBuildVector(Op, DAG))
+ return M;
+
// Empirical tests suggest this is rarely worth it for vectors of length <= 2.
if (NumElts >= 4) {
if (SDValue shuffle = ReconstructShuffle(Op, DAG))
@@ -11121,29 +11596,36 @@ SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
if (VT.getVectorElementCount() != (InVT.getVectorElementCount() * 2))
return SDValue();
- EVT WideVT;
- SDValue ExtVec;
+ // Here narrow and wide refers to the vector element types. After "casting"
+ // both vectors must have the same bit length and so because the subvector
+ // has fewer elements, those elements need to be bigger.
+ EVT NarrowVT = getPackedSVEVectorVT(VT.getVectorElementCount());
+ EVT WideVT = getPackedSVEVectorVT(InVT.getVectorElementCount());
+ // NOP cast operands to the largest legal vector of the same element count.
if (VT.isFloatingPoint()) {
- // The InVT type should be legal. We can safely cast the unpacked
- // subvector from InVT -> VT.
- WideVT = VT;
- ExtVec = getSVESafeBitCast(VT, Vec1, DAG);
+ Vec0 = getSVESafeBitCast(NarrowVT, Vec0, DAG);
+ Vec1 = getSVESafeBitCast(WideVT, Vec1, DAG);
} else {
- // Extend elements of smaller vector...
- WideVT = InVT.widenIntegerVectorElementType(*(DAG.getContext()));
- ExtVec = DAG.getNode(ISD::ANY_EXTEND, DL, WideVT, Vec1);
+ // Legal integer vectors are already their largest so Vec0 is fine as is.
+ Vec1 = DAG.getNode(ISD::ANY_EXTEND, DL, WideVT, Vec1);
}
+ // To replace the top/bottom half of vector V with vector SubV we widen the
+ // preserved half of V, concatenate this to SubV (the order depending on the
+ // half being replaced) and then narrow the result.
+ SDValue Narrow;
if (Idx == 0) {
SDValue HiVec0 = DAG.getNode(AArch64ISD::UUNPKHI, DL, WideVT, Vec0);
- return DAG.getNode(AArch64ISD::UZP1, DL, VT, ExtVec, HiVec0);
- } else if (Idx == InVT.getVectorMinNumElements()) {
+ Narrow = DAG.getNode(AArch64ISD::UZP1, DL, NarrowVT, Vec1, HiVec0);
+ } else {
+ assert(Idx == InVT.getVectorMinNumElements() &&
+ "Invalid subvector index!");
SDValue LoVec0 = DAG.getNode(AArch64ISD::UUNPKLO, DL, WideVT, Vec0);
- return DAG.getNode(AArch64ISD::UZP1, DL, VT, LoVec0, ExtVec);
+ Narrow = DAG.getNode(AArch64ISD::UZP1, DL, NarrowVT, LoVec0, Vec1);
}
- return SDValue();
+ return getSVESafeBitCast(VT, Narrow, DAG);
}
if (Idx == 0 && isPackedVectorType(VT, DAG)) {
@@ -11249,21 +11731,8 @@ bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
if (VT.getVectorNumElements() == 4 &&
(VT.is128BitVector() || VT.is64BitVector())) {
- unsigned PFIndexes[4];
- for (unsigned i = 0; i != 4; ++i) {
- if (M[i] < 0)
- PFIndexes[i] = 8;
- else
- PFIndexes[i] = M[i];
- }
-
- // Compute the index in the perfect shuffle table.
- unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
- PFIndexes[2] * 9 + PFIndexes[3];
- unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
- unsigned Cost = (PFEntry >> 30);
-
- if (Cost <= 4)
+ unsigned Cost = getPerfectShuffleCost(M);
+ if (Cost <= 1)
return true;
}
@@ -11360,9 +11829,6 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
unsigned EltSize = VT.getScalarSizeInBits();
switch (Op.getOpcode()) {
- default:
- llvm_unreachable("unexpected shift opcode");
-
case ISD::SHL:
if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT))
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SHL_PRED);
@@ -11405,7 +11871,7 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
return NegShiftLeft;
}
- return SDValue();
+ llvm_unreachable("unexpected shift opcode");
}
static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
@@ -11525,8 +11991,7 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
return DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
}
- const bool FullFP16 =
- static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
+ const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();
// Make v4f16 (only) fcmp operations utilise vector instructions
// v8f16 support will be a litle more complicated
@@ -11594,7 +12059,8 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
(Op.getOpcode() != ISD::VECREDUCE_ADD &&
SrcVT.getVectorElementType() == MVT::i64);
if (SrcVT.isScalableVector() ||
- useSVEForFixedLengthVectorVT(SrcVT, OverrideNEON)) {
+ useSVEForFixedLengthVectorVT(
+ SrcVT, OverrideNEON && Subtarget->useSVEForFixedLengthVectors())) {
if (SrcVT.getVectorElementType() == MVT::i1)
return LowerPredReductionToSVE(Op, DAG);
@@ -11659,7 +12125,7 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
SDValue AArch64TargetLowering::LowerATOMIC_LOAD_SUB(SDValue Op,
SelectionDAG &DAG) const {
- auto &Subtarget = static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
+ auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
return SDValue();
@@ -11676,7 +12142,7 @@ SDValue AArch64TargetLowering::LowerATOMIC_LOAD_SUB(SDValue Op,
SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op,
SelectionDAG &DAG) const {
- auto &Subtarget = static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
+ auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
return SDValue();
@@ -11772,8 +12238,8 @@ SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op,
SDLoc DL(Op);
APInt MulImm = cast<ConstantSDNode>(Op.getOperand(0))->getAPIntValue();
- return DAG.getZExtOrTrunc(DAG.getVScale(DL, MVT::i64, MulImm.sextOrSelf(64)),
- DL, VT);
+ return DAG.getZExtOrTrunc(DAG.getVScale(DL, MVT::i64, MulImm.sext(64)), DL,
+ VT);
}
/// Set the IntrinsicInfo for the `aarch64_sve_st<N>` intrinsics.
@@ -11867,23 +12333,23 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
}
case Intrinsic::aarch64_ldaxr:
case Intrinsic::aarch64_ldxr: {
- PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
+ Type *ValTy = I.getParamElementType(0);
Info.opc = ISD::INTRINSIC_W_CHAIN;
- Info.memVT = MVT::getVT(PtrTy->getPointerElementType());
+ Info.memVT = MVT::getVT(ValTy);
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
- Info.align = DL.getABITypeAlign(PtrTy->getPointerElementType());
+ Info.align = DL.getABITypeAlign(ValTy);
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
return true;
}
case Intrinsic::aarch64_stlxr:
case Intrinsic::aarch64_stxr: {
- PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
+ Type *ValTy = I.getParamElementType(1);
Info.opc = ISD::INTRINSIC_W_CHAIN;
- Info.memVT = MVT::getVT(PtrTy->getPointerElementType());
+ Info.memVT = MVT::getVT(ValTy);
Info.ptrVal = I.getArgOperand(1);
Info.offset = 0;
- Info.align = DL.getABITypeAlign(PtrTy->getPointerElementType());
+ Info.align = DL.getABITypeAlign(ValTy);
Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
return true;
}
@@ -11906,22 +12372,23 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
return true;
case Intrinsic::aarch64_sve_ldnt1: {
- PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
+ Type *ElTy = cast<VectorType>(I.getType())->getElementType();
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(I.getType());
Info.ptrVal = I.getArgOperand(1);
Info.offset = 0;
- Info.align = DL.getABITypeAlign(PtrTy->getPointerElementType());
+ Info.align = DL.getABITypeAlign(ElTy);
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MONonTemporal;
return true;
}
case Intrinsic::aarch64_sve_stnt1: {
- PointerType *PtrTy = cast<PointerType>(I.getArgOperand(2)->getType());
+ Type *ElTy =
+ cast<VectorType>(I.getArgOperand(0)->getType())->getElementType();
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(I.getOperand(0)->getType());
Info.ptrVal = I.getArgOperand(2);
Info.offset = 0;
- Info.align = DL.getABITypeAlign(PtrTy->getPointerElementType());
+ Info.align = DL.getABITypeAlign(ElTy);
Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MONonTemporal;
return true;
}
@@ -12007,8 +12474,7 @@ bool AArch64TargetLowering::isProfitableToHoist(Instruction *I) const {
Instruction *User = I->user_back();
- if (User &&
- !(User->getOpcode() == Instruction::FSub ||
+ if (!(User->getOpcode() == Instruction::FSub ||
User->getOpcode() == Instruction::FAdd))
return true;
@@ -12194,9 +12660,6 @@ static bool isSplatShuffle(Value *V) {
/// shufflevectors extracts and/or sext/zext can be folded into (u,s)subl(2).
bool AArch64TargetLowering::shouldSinkOperands(
Instruction *I, SmallVectorImpl<Use *> &Ops) const {
- if (!I->getType()->isVectorTy())
- return false;
-
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
case Intrinsic::aarch64_neon_smull:
@@ -12208,6 +12671,12 @@ bool AArch64TargetLowering::shouldSinkOperands(
}
LLVM_FALLTHROUGH;
+ case Intrinsic::fma:
+ if (isa<VectorType>(I->getType()) &&
+ cast<VectorType>(I->getType())->getElementType()->isHalfTy() &&
+ !Subtarget->hasFullFP16())
+ return false;
+ LLVM_FALLTHROUGH;
case Intrinsic::aarch64_neon_sqdmull:
case Intrinsic::aarch64_neon_sqdmulh:
case Intrinsic::aarch64_neon_sqrdmulh:
@@ -12217,7 +12686,52 @@ bool AArch64TargetLowering::shouldSinkOperands(
if (isSplatShuffle(II->getOperand(1)))
Ops.push_back(&II->getOperandUse(1));
return !Ops.empty();
-
+ case Intrinsic::aarch64_sme_write_horiz:
+ case Intrinsic::aarch64_sme_write_vert:
+ case Intrinsic::aarch64_sme_writeq_horiz:
+ case Intrinsic::aarch64_sme_writeq_vert: {
+ auto *Idx = dyn_cast<Instruction>(II->getOperand(1));
+ if (!Idx || Idx->getOpcode() != Instruction::Add)
+ return false;
+ Ops.push_back(&II->getOperandUse(1));
+ return true;
+ }
+ case Intrinsic::aarch64_sme_read_horiz:
+ case Intrinsic::aarch64_sme_read_vert:
+ case Intrinsic::aarch64_sme_readq_horiz:
+ case Intrinsic::aarch64_sme_readq_vert:
+ case Intrinsic::aarch64_sme_ld1b_vert:
+ case Intrinsic::aarch64_sme_ld1h_vert:
+ case Intrinsic::aarch64_sme_ld1w_vert:
+ case Intrinsic::aarch64_sme_ld1d_vert:
+ case Intrinsic::aarch64_sme_ld1q_vert:
+ case Intrinsic::aarch64_sme_st1b_vert:
+ case Intrinsic::aarch64_sme_st1h_vert:
+ case Intrinsic::aarch64_sme_st1w_vert:
+ case Intrinsic::aarch64_sme_st1d_vert:
+ case Intrinsic::aarch64_sme_st1q_vert:
+ case Intrinsic::aarch64_sme_ld1b_horiz:
+ case Intrinsic::aarch64_sme_ld1h_horiz:
+ case Intrinsic::aarch64_sme_ld1w_horiz:
+ case Intrinsic::aarch64_sme_ld1d_horiz:
+ case Intrinsic::aarch64_sme_ld1q_horiz:
+ case Intrinsic::aarch64_sme_st1b_horiz:
+ case Intrinsic::aarch64_sme_st1h_horiz:
+ case Intrinsic::aarch64_sme_st1w_horiz:
+ case Intrinsic::aarch64_sme_st1d_horiz:
+ case Intrinsic::aarch64_sme_st1q_horiz: {
+ auto *Idx = dyn_cast<Instruction>(II->getOperand(3));
+ if (!Idx || Idx->getOpcode() != Instruction::Add)
+ return false;
+ Ops.push_back(&II->getOperandUse(3));
+ return true;
+ }
+ case Intrinsic::aarch64_neon_pmull:
+ if (!areExtractShuffleVectors(II->getOperand(0), II->getOperand(1)))
+ return false;
+ Ops.push_back(&II->getOperandUse(0));
+ Ops.push_back(&II->getOperandUse(1));
+ return true;
case Intrinsic::aarch64_neon_pmull64:
if (!areOperandsOfVmullHighP64(II->getArgOperand(0),
II->getArgOperand(1)))
@@ -12225,12 +12739,14 @@ bool AArch64TargetLowering::shouldSinkOperands(
Ops.push_back(&II->getArgOperandUse(0));
Ops.push_back(&II->getArgOperandUse(1));
return true;
-
default:
return false;
}
}
+ if (!I->getType()->isVectorTy())
+ return false;
+
switch (I->getOpcode()) {
case Instruction::Sub:
case Instruction::Add: {
@@ -12745,12 +13261,15 @@ SDValue AArch64TargetLowering::LowerSVEStructLoad(unsigned Intrinsic,
assert(VT.isScalableVector() && "Can only lower scalable vectors");
unsigned N, Opcode;
- static std::map<unsigned, std::pair<unsigned, unsigned>> IntrinsicMap = {
- {Intrinsic::aarch64_sve_ld2, {2, AArch64ISD::SVE_LD2_MERGE_ZERO}},
- {Intrinsic::aarch64_sve_ld3, {3, AArch64ISD::SVE_LD3_MERGE_ZERO}},
- {Intrinsic::aarch64_sve_ld4, {4, AArch64ISD::SVE_LD4_MERGE_ZERO}}};
-
- std::tie(N, Opcode) = IntrinsicMap[Intrinsic];
+ static const std::pair<unsigned, std::pair<unsigned, unsigned>>
+ IntrinsicMap[] = {
+ {Intrinsic::aarch64_sve_ld2, {2, AArch64ISD::SVE_LD2_MERGE_ZERO}},
+ {Intrinsic::aarch64_sve_ld3, {3, AArch64ISD::SVE_LD3_MERGE_ZERO}},
+ {Intrinsic::aarch64_sve_ld4, {4, AArch64ISD::SVE_LD4_MERGE_ZERO}}};
+
+ std::tie(N, Opcode) = llvm::find_if(IntrinsicMap, [&](auto P) {
+ return P.first == Intrinsic;
+ })->second;
assert(VT.getVectorElementCount().getKnownMinValue() % N == 0 &&
"invalid tuple vector type!");
@@ -12850,7 +13369,7 @@ bool AArch64TargetLowering::isLegalAddImmediate(int64_t Immed) const {
// (mul (add x, c1), c2) -> (add (mul x, c2), c2*c1) in DAGCombine,
// if the folding leads to worse code.
bool AArch64TargetLowering::isMulAddWithConstProfitable(
- const SDValue &AddNode, const SDValue &ConstNode) const {
+ SDValue AddNode, SDValue ConstNode) const {
// Let the DAGCombiner decide for vector types and large types.
const EVT VT = AddNode.getValueType();
if (VT.isVector() || VT.getScalarSizeInBits() > 64)
@@ -13025,6 +13544,28 @@ AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
return true;
}
+bool AArch64TargetLowering::shouldFoldConstantShiftPairToMask(
+ const SDNode *N, CombineLevel Level) const {
+ assert(((N->getOpcode() == ISD::SHL &&
+ N->getOperand(0).getOpcode() == ISD::SRL) ||
+ (N->getOpcode() == ISD::SRL &&
+ N->getOperand(0).getOpcode() == ISD::SHL)) &&
+ "Expected shift-shift mask");
+ // Don't allow multiuse shift folding with the same shift amount.
+ if (!N->getOperand(0)->hasOneUse())
+ return false;
+
+ // Only fold srl(shl(x,c1),c2) iff C1 >= C2 to prevent loss of UBFX patterns.
+ EVT VT = N->getValueType(0);
+ if (N->getOpcode() == ISD::SRL && (VT == MVT::i32 || VT == MVT::i64)) {
+ auto *C1 = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1));
+ auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ return (!C1 || !C2 || C1->getZExtValue() >= C2->getZExtValue());
+ }
+
+ return true;
+}
+
bool AArch64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const {
assert(Ty->isIntegerTy());
@@ -13221,6 +13762,61 @@ static SDValue performVecReduceAddCombine(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), Dot);
}
+// Given an (integer) vecreduce, we know the order of the inputs does not
+// matter. We can convert UADDV(add(zext(extract_lo(x)), zext(extract_hi(x))))
+// into UADDV(UADDLP(x)). This can also happen through an extra add, where we
+// transform UADDV(add(y, add(zext(extract_lo(x)), zext(extract_hi(x))))).
+static SDValue performUADDVCombine(SDNode *N, SelectionDAG &DAG) {
+ auto DetectAddExtract = [&](SDValue A) {
+ // Look for add(zext(extract_lo(x)), zext(extract_hi(x))), returning
+ // UADDLP(x) if found.
+ if (A.getOpcode() != ISD::ADD)
+ return SDValue();
+ EVT VT = A.getValueType();
+ SDValue Op0 = A.getOperand(0);
+ SDValue Op1 = A.getOperand(1);
+ if (Op0.getOpcode() != Op0.getOpcode() ||
+ (Op0.getOpcode() != ISD::ZERO_EXTEND &&
+ Op0.getOpcode() != ISD::SIGN_EXTEND))
+ return SDValue();
+ SDValue Ext0 = Op0.getOperand(0);
+ SDValue Ext1 = Op1.getOperand(0);
+ if (Ext0.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
+ Ext1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
+ Ext0.getOperand(0) != Ext1.getOperand(0))
+ return SDValue();
+ // Check that the type is twice the add types, and the extract are from
+ // upper/lower parts of the same source.
+ if (Ext0.getOperand(0).getValueType().getVectorNumElements() !=
+ VT.getVectorNumElements() * 2)
+ return SDValue();
+ if ((Ext0.getConstantOperandVal(1) != 0 &&
+ Ext1.getConstantOperandVal(1) != VT.getVectorNumElements()) &&
+ (Ext1.getConstantOperandVal(1) != 0 &&
+ Ext0.getConstantOperandVal(1) != VT.getVectorNumElements()))
+ return SDValue();
+ unsigned Opcode = Op0.getOpcode() == ISD::ZERO_EXTEND ? AArch64ISD::UADDLP
+ : AArch64ISD::SADDLP;
+ return DAG.getNode(Opcode, SDLoc(A), VT, Ext0.getOperand(0));
+ };
+
+ SDValue A = N->getOperand(0);
+ if (SDValue R = DetectAddExtract(A))
+ return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), R);
+ if (A.getOpcode() == ISD::ADD) {
+ if (SDValue R = DetectAddExtract(A.getOperand(0)))
+ return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
+ DAG.getNode(ISD::ADD, SDLoc(A), A.getValueType(), R,
+ A.getOperand(1)));
+ if (SDValue R = DetectAddExtract(A.getOperand(1)))
+ return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
+ DAG.getNode(ISD::ADD, SDLoc(A), A.getValueType(), R,
+ A.getOperand(0)));
+ }
+ return SDValue();
+}
+
+
static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
@@ -13279,6 +13875,60 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
}
+SDValue
+AArch64TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDNode *> &Created) const {
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
+ if (isIntDivCheap(N->getValueType(0), Attr))
+ return SDValue(N, 0); // Lower SREM as SREM
+
+ EVT VT = N->getValueType(0);
+
+ // For scalable and fixed types, mark them as cheap so we can handle it much
+ // later. This allows us to handle larger than legal types.
+ if (VT.isScalableVector() || Subtarget->useSVEForFixedLengthVectors())
+ return SDValue(N, 0);
+
+ // fold (srem X, pow2)
+ if ((VT != MVT::i32 && VT != MVT::i64) ||
+ !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
+ return SDValue();
+
+ unsigned Lg2 = Divisor.countTrailingZeros();
+ if (Lg2 == 0)
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue N0 = N->getOperand(0);
+ SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ SDValue CCVal, CSNeg;
+ if (Lg2 == 1) {
+ SDValue Cmp = getAArch64Cmp(N0, Zero, ISD::SETGE, CCVal, DAG, DL);
+ SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, Pow2MinusOne);
+ CSNeg = DAG.getNode(AArch64ISD::CSNEG, DL, VT, And, And, CCVal, Cmp);
+
+ Created.push_back(Cmp.getNode());
+ Created.push_back(And.getNode());
+ } else {
+ SDValue CCVal = DAG.getConstant(AArch64CC::MI, DL, MVT_CC);
+ SDVTList VTs = DAG.getVTList(VT, MVT::i32);
+
+ SDValue Negs = DAG.getNode(AArch64ISD::SUBS, DL, VTs, Zero, N0);
+ SDValue AndPos = DAG.getNode(ISD::AND, DL, VT, N0, Pow2MinusOne);
+ SDValue AndNeg = DAG.getNode(ISD::AND, DL, VT, Negs, Pow2MinusOne);
+ CSNeg = DAG.getNode(AArch64ISD::CSNEG, DL, VT, AndPos, AndNeg, CCVal,
+ Negs.getValue(1));
+
+ Created.push_back(Negs.getNode());
+ Created.push_back(AndPos.getNode());
+ Created.push_back(AndNeg.getNode());
+ }
+
+ return CSNeg;
+}
+
static bool IsSVECntIntrinsic(SDValue S) {
switch(getIntrinsicID(S.getNode())) {
default:
@@ -13300,11 +13950,10 @@ static bool IsSVECntIntrinsic(SDValue S) {
/// operations need a bit more inspection to get this information.
///
/// \param Extend The SDNode from the DAG that represents the extend operation
-/// \param DAG The SelectionDAG hosting the \p Extend node
///
/// \returns The type representing the \p Extend source type, or \p MVT::Other
/// if no valid type can be determined
-static EVT calculatePreExtendType(SDValue Extend, SelectionDAG &DAG) {
+static EVT calculatePreExtendType(SDValue Extend) {
switch (Extend.getOpcode()) {
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
@@ -13337,102 +13986,90 @@ static EVT calculatePreExtendType(SDValue Extend, SelectionDAG &DAG) {
default:
return MVT::Other;
}
-
- llvm_unreachable("Code path unhandled in calculatePreExtendType!");
}
-/// Combines a dup(sext/zext) node pattern into sext/zext(dup)
-/// making use of the vector SExt/ZExt rather than the scalar SExt/ZExt
-static SDValue performCommonVectorExtendCombine(SDValue VectorShuffle,
- SelectionDAG &DAG) {
-
- ShuffleVectorSDNode *ShuffleNode =
- dyn_cast<ShuffleVectorSDNode>(VectorShuffle.getNode());
- if (!ShuffleNode)
- return SDValue();
-
- // Ensuring the mask is zero before continuing
- if (!ShuffleNode->isSplat() || ShuffleNode->getSplatIndex() != 0)
- return SDValue();
-
- SDValue InsertVectorElt = VectorShuffle.getOperand(0);
-
- if (InsertVectorElt.getOpcode() != ISD::INSERT_VECTOR_ELT)
- return SDValue();
-
- SDValue InsertLane = InsertVectorElt.getOperand(2);
- ConstantSDNode *Constant = dyn_cast<ConstantSDNode>(InsertLane.getNode());
- // Ensures the insert is inserting into lane 0
- if (!Constant || Constant->getZExtValue() != 0)
+/// Combines a buildvector(sext/zext) or shuffle(sext/zext, undef) node pattern
+/// into sext/zext(buildvector) or sext/zext(shuffle) making use of the vector
+/// SExt/ZExt rather than the scalar SExt/ZExt
+static SDValue performBuildShuffleExtendCombine(SDValue BV, SelectionDAG &DAG) {
+ EVT VT = BV.getValueType();
+ if (BV.getOpcode() != ISD::BUILD_VECTOR &&
+ BV.getOpcode() != ISD::VECTOR_SHUFFLE)
return SDValue();
- SDValue Extend = InsertVectorElt.getOperand(1);
+ // Use the first item in the buildvector/shuffle to get the size of the
+ // extend, and make sure it looks valid.
+ SDValue Extend = BV->getOperand(0);
unsigned ExtendOpcode = Extend.getOpcode();
-
bool IsSExt = ExtendOpcode == ISD::SIGN_EXTEND ||
ExtendOpcode == ISD::SIGN_EXTEND_INREG ||
ExtendOpcode == ISD::AssertSext;
if (!IsSExt && ExtendOpcode != ISD::ZERO_EXTEND &&
ExtendOpcode != ISD::AssertZext && ExtendOpcode != ISD::AND)
return SDValue();
-
- EVT TargetType = VectorShuffle.getValueType();
- EVT PreExtendType = calculatePreExtendType(Extend, DAG);
-
- if ((TargetType != MVT::v8i16 && TargetType != MVT::v4i32 &&
- TargetType != MVT::v2i64) ||
- (PreExtendType == MVT::Other))
+ // Shuffle inputs are vector, limit to SIGN_EXTEND and ZERO_EXTEND to ensure
+ // calculatePreExtendType will work without issue.
+ if (BV.getOpcode() == ISD::VECTOR_SHUFFLE &&
+ ExtendOpcode != ISD::SIGN_EXTEND && ExtendOpcode != ISD::ZERO_EXTEND)
return SDValue();
// Restrict valid pre-extend data type
- if (PreExtendType != MVT::i8 && PreExtendType != MVT::i16 &&
- PreExtendType != MVT::i32)
- return SDValue();
-
- EVT PreExtendVT = TargetType.changeVectorElementType(PreExtendType);
-
- if (PreExtendVT.getVectorElementCount() != TargetType.getVectorElementCount())
- return SDValue();
-
- if (TargetType.getScalarSizeInBits() != PreExtendVT.getScalarSizeInBits() * 2)
+ EVT PreExtendType = calculatePreExtendType(Extend);
+ if (PreExtendType == MVT::Other ||
+ PreExtendType.getScalarSizeInBits() != VT.getScalarSizeInBits() / 2)
return SDValue();
- SDLoc DL(VectorShuffle);
-
- SDValue InsertVectorNode = DAG.getNode(
- InsertVectorElt.getOpcode(), DL, PreExtendVT, DAG.getUNDEF(PreExtendVT),
- DAG.getAnyExtOrTrunc(Extend.getOperand(0), DL, PreExtendType),
- DAG.getConstant(0, DL, MVT::i64));
-
- std::vector<int> ShuffleMask(TargetType.getVectorNumElements());
-
- SDValue VectorShuffleNode =
- DAG.getVectorShuffle(PreExtendVT, DL, InsertVectorNode,
- DAG.getUNDEF(PreExtendVT), ShuffleMask);
-
- SDValue ExtendNode = DAG.getNode(IsSExt ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
- DL, TargetType, VectorShuffleNode);
+ // Make sure all other operands are equally extended
+ for (SDValue Op : drop_begin(BV->ops())) {
+ if (Op.isUndef())
+ continue;
+ unsigned Opc = Op.getOpcode();
+ bool OpcIsSExt = Opc == ISD::SIGN_EXTEND || Opc == ISD::SIGN_EXTEND_INREG ||
+ Opc == ISD::AssertSext;
+ if (OpcIsSExt != IsSExt || calculatePreExtendType(Op) != PreExtendType)
+ return SDValue();
+ }
- return ExtendNode;
+ SDValue NBV;
+ SDLoc DL(BV);
+ if (BV.getOpcode() == ISD::BUILD_VECTOR) {
+ EVT PreExtendVT = VT.changeVectorElementType(PreExtendType);
+ EVT PreExtendLegalType =
+ PreExtendType.getScalarSizeInBits() < 32 ? MVT::i32 : PreExtendType;
+ SmallVector<SDValue, 8> NewOps;
+ for (SDValue Op : BV->ops())
+ NewOps.push_back(Op.isUndef() ? DAG.getUNDEF(PreExtendLegalType)
+ : DAG.getAnyExtOrTrunc(Op.getOperand(0), DL,
+ PreExtendLegalType));
+ NBV = DAG.getNode(ISD::BUILD_VECTOR, DL, PreExtendVT, NewOps);
+ } else { // BV.getOpcode() == ISD::VECTOR_SHUFFLE
+ EVT PreExtendVT = VT.changeVectorElementType(PreExtendType.getScalarType());
+ NBV = DAG.getVectorShuffle(PreExtendVT, DL, BV.getOperand(0).getOperand(0),
+ BV.getOperand(1).isUndef()
+ ? DAG.getUNDEF(PreExtendVT)
+ : BV.getOperand(1).getOperand(0),
+ cast<ShuffleVectorSDNode>(BV)->getMask());
+ }
+ return DAG.getNode(IsSExt ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL, VT, NBV);
}
/// Combines a mul(dup(sext/zext)) node pattern into mul(sext/zext(dup))
/// making use of the vector SExt/ZExt rather than the scalar SExt/ZExt
static SDValue performMulVectorExtendCombine(SDNode *Mul, SelectionDAG &DAG) {
// If the value type isn't a vector, none of the operands are going to be dups
- if (!Mul->getValueType(0).isVector())
+ EVT VT = Mul->getValueType(0);
+ if (VT != MVT::v8i16 && VT != MVT::v4i32 && VT != MVT::v2i64)
return SDValue();
- SDValue Op0 = performCommonVectorExtendCombine(Mul->getOperand(0), DAG);
- SDValue Op1 = performCommonVectorExtendCombine(Mul->getOperand(1), DAG);
+ SDValue Op0 = performBuildShuffleExtendCombine(Mul->getOperand(0), DAG);
+ SDValue Op1 = performBuildShuffleExtendCombine(Mul->getOperand(1), DAG);
// Neither operands have been changed, don't make any further changes
if (!Op0 && !Op1)
return SDValue();
SDLoc DL(Mul);
- return DAG.getNode(Mul->getOpcode(), DL, Mul->getValueType(0),
- Op0 ? Op0 : Mul->getOperand(0),
+ return DAG.getNode(Mul->getOpcode(), DL, VT, Op0 ? Op0 : Mul->getOperand(0),
Op1 ? Op1 : Mul->getOperand(1));
}
@@ -13649,7 +14286,7 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
!cast<LoadSDNode>(N0)->isVolatile()) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
- LN0->getPointerInfo(), LN0->getAlignment(),
+ LN0->getPointerInfo(), LN0->getAlign(),
LN0->getMemOperand()->getFlags());
// Make sure successors of the original load stay after it by updating them
@@ -13676,8 +14313,10 @@ static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
SDValue Op = N->getOperand(0);
- if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
- Op.getOpcode() != ISD::FMUL)
+ if (!Op.getValueType().isSimple() || Op.getOpcode() != ISD::FMUL)
+ return SDValue();
+
+ if (!Op.getValueType().is64BitVector() && !Op.getValueType().is128BitVector())
return SDValue();
SDValue ConstVec = Op->getOperand(1);
@@ -13713,7 +14352,7 @@ static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
if (N->getOpcode() == ISD::FP_TO_SINT_SAT ||
N->getOpcode() == ISD::FP_TO_UINT_SAT) {
EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
- if (SatVT.getScalarSizeInBits() != IntBits)
+ if (SatVT.getScalarSizeInBits() != IntBits || IntBits != FloatBits)
return SDValue();
}
@@ -13956,15 +14595,85 @@ static SDValue tryCombineToBSL(SDNode *N,
return SDValue();
}
+// Given a tree of and/or(csel(0, 1, cc0), csel(0, 1, cc1)), we may be able to
+// convert to csel(ccmp(.., cc0)), depending on cc1:
+
+// (AND (CSET cc0 cmp0) (CSET cc1 (CMP x1 y1)))
+// =>
+// (CSET cc1 (CCMP x1 y1 !cc1 cc0 cmp0))
+//
+// (OR (CSET cc0 cmp0) (CSET cc1 (CMP x1 y1)))
+// =>
+// (CSET cc1 (CCMP x1 y1 cc1 !cc0 cmp0))
+static SDValue performANDORCSELCombine(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ SDValue CSel0 = N->getOperand(0);
+ SDValue CSel1 = N->getOperand(1);
+
+ if (CSel0.getOpcode() != AArch64ISD::CSEL ||
+ CSel1.getOpcode() != AArch64ISD::CSEL)
+ return SDValue();
+
+ if (!CSel0->hasOneUse() || !CSel1->hasOneUse())
+ return SDValue();
+
+ if (!isNullConstant(CSel0.getOperand(0)) ||
+ !isOneConstant(CSel0.getOperand(1)) ||
+ !isNullConstant(CSel1.getOperand(0)) ||
+ !isOneConstant(CSel1.getOperand(1)))
+ return SDValue();
+
+ SDValue Cmp0 = CSel0.getOperand(3);
+ SDValue Cmp1 = CSel1.getOperand(3);
+ AArch64CC::CondCode CC0 = (AArch64CC::CondCode)CSel0.getConstantOperandVal(2);
+ AArch64CC::CondCode CC1 = (AArch64CC::CondCode)CSel1.getConstantOperandVal(2);
+ if (!Cmp0->hasOneUse() || !Cmp1->hasOneUse())
+ return SDValue();
+ if (Cmp1.getOpcode() != AArch64ISD::SUBS &&
+ Cmp0.getOpcode() == AArch64ISD::SUBS) {
+ std::swap(Cmp0, Cmp1);
+ std::swap(CC0, CC1);
+ }
+
+ if (Cmp1.getOpcode() != AArch64ISD::SUBS)
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue CCmp;
+
+ if (N->getOpcode() == ISD::AND) {
+ AArch64CC::CondCode InvCC0 = AArch64CC::getInvertedCondCode(CC0);
+ SDValue Condition = DAG.getConstant(InvCC0, DL, MVT_CC);
+ unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(CC1);
+ SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
+ CCmp = DAG.getNode(AArch64ISD::CCMP, DL, MVT_CC, Cmp1.getOperand(0),
+ Cmp1.getOperand(1), NZCVOp, Condition, Cmp0);
+ } else {
+ SDLoc DL(N);
+ AArch64CC::CondCode InvCC1 = AArch64CC::getInvertedCondCode(CC1);
+ SDValue Condition = DAG.getConstant(CC0, DL, MVT_CC);
+ unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvCC1);
+ SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
+ CCmp = DAG.getNode(AArch64ISD::CCMP, DL, MVT_CC, Cmp1.getOperand(0),
+ Cmp1.getOperand(1), NZCVOp, Condition, Cmp0);
+ }
+ return DAG.getNode(AArch64ISD::CSEL, DL, VT, CSel0.getOperand(0),
+ CSel0.getOperand(1), DAG.getConstant(CC1, DL, MVT::i32),
+ CCmp);
+}
+
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
- // Attempt to form an EXTR from (or (shl VAL1, #N), (srl VAL2, #RegWidth-N))
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
+ if (SDValue R = performANDORCSELCombine(N, DAG))
+ return R;
+
if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();
+ // Attempt to form an EXTR from (or (shl VAL1, #N), (srl VAL2, #RegWidth-N))
if (SDValue Res = tryCombineToEXTR(N, DCI))
return Res;
@@ -14015,7 +14724,7 @@ static SDValue performSVEAndCombine(SDNode *N,
SDValue UnpkOp = Src->getOperand(0);
SDValue Dup = N->getOperand(1);
- if (Dup.getOpcode() != AArch64ISD::DUP)
+ if (Dup.getOpcode() != ISD::SPLAT_VECTOR)
return SDValue();
SDLoc DL(N);
@@ -14038,8 +14747,7 @@ static SDValue performSVEAndCombine(SDNode *N,
// Otherwise, make sure we propagate the AND to the operand
// of the unpack
- Dup = DAG.getNode(AArch64ISD::DUP, DL,
- UnpkOp->getValueType(0),
+ Dup = DAG.getNode(ISD::SPLAT_VECTOR, DL, UnpkOp->getValueType(0),
DAG.getConstant(Mask.zextOrTrunc(32), DL, MVT::i32));
SDValue And = DAG.getNode(ISD::AND, DL,
@@ -14097,20 +14805,34 @@ static SDValue performANDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
SelectionDAG &DAG = DCI.DAG;
SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
EVT VT = N->getValueType(0);
- if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
+
+ if (SDValue R = performANDORCSELCombine(N, DAG))
+ return R;
+
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();
+ // Although NEON has no EORV instruction, when only the least significant bit
+ // is required the operation is synonymous with ADDV.
+ if (LHS.getOpcode() == ISD::VECREDUCE_XOR && isOneConstant(RHS) &&
+ LHS.getOperand(0).getValueType().isFixedLengthVector() &&
+ LHS.hasOneUse()) {
+ SDLoc DL(N);
+ SDValue ADDV = DAG.getNode(ISD::VECREDUCE_ADD, DL, VT, LHS.getOperand(0));
+ return DAG.getNode(ISD::AND, DL, VT, ADDV, RHS);
+ }
+
if (VT.isScalableVector())
return performSVEAndCombine(N, DCI);
// The combining code below works only for NEON vectors. In particular, it
// does not work for SVE when dealing with vectors wider than 128 bits.
- if (!(VT.is64BitVector() || VT.is128BitVector()))
+ if (!VT.is64BitVector() && !VT.is128BitVector())
return SDValue();
- BuildVectorSDNode *BVN =
- dyn_cast<BuildVectorSDNode>(N->getOperand(1).getNode());
+ BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
if (!BVN)
return SDValue();
@@ -14141,107 +14863,125 @@ static SDValue performANDCombine(SDNode *N,
return SDValue();
}
-// Attempt to form urhadd(OpA, OpB) from
-// truncate(vlshr(sub(zext(OpB), xor(zext(OpA), Ones(ElemSizeInBits))), 1))
-// or uhadd(OpA, OpB) from truncate(vlshr(add(zext(OpA), zext(OpB)), 1)).
-// The original form of the first expression is
-// truncate(srl(add(zext(OpB), add(zext(OpA), 1)), 1)) and the
-// (OpA + OpB + 1) subexpression will have been changed to (OpB - (~OpA)).
-// Before this function is called the srl will have been lowered to
-// AArch64ISD::VLSHR.
-// This pass can also recognize signed variants of the patterns that use sign
-// extension instead of zero extension and form a srhadd(OpA, OpB) or a
-// shadd(OpA, OpB) from them.
-static SDValue
-performVectorTruncateCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
- SelectionDAG &DAG) {
- EVT VT = N->getValueType(0);
+static bool hasPairwiseAdd(unsigned Opcode, EVT VT, bool FullFP16) {
+ switch (Opcode) {
+ case ISD::STRICT_FADD:
+ case ISD::FADD:
+ return (FullFP16 && VT == MVT::f16) || VT == MVT::f32 || VT == MVT::f64;
+ case ISD::ADD:
+ return VT == MVT::i64;
+ default:
+ return false;
+ }
+}
- // Since we are looking for a right shift by a constant value of 1 and we are
- // operating on types at least 16 bits in length (sign/zero extended OpA and
- // OpB, which are at least 8 bits), it follows that the truncate will always
- // discard the shifted-in bit and therefore the right shift will be logical
- // regardless of the signedness of OpA and OpB.
- SDValue Shift = N->getOperand(0);
- if (Shift.getOpcode() != AArch64ISD::VLSHR)
+static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op,
+ AArch64CC::CondCode Cond);
+
+static bool isPredicateCCSettingOp(SDValue N) {
+ if ((N.getOpcode() == ISD::SETCC) ||
+ (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+ (N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilege ||
+ N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilegt ||
+ N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehi ||
+ N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehs ||
+ N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilele ||
+ N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelo ||
+ N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilels ||
+ N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelt ||
+ // get_active_lane_mask is lowered to a whilelo instruction.
+ N.getConstantOperandVal(0) == Intrinsic::get_active_lane_mask)))
+ return true;
+
+ return false;
+}
+
+// Materialize : i1 = extract_vector_elt t37, Constant:i64<0>
+// ... into: "ptrue p, all" + PTEST
+static SDValue
+performFirstTrueTestVectorCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const AArch64Subtarget *Subtarget) {
+ assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT);
+ // Make sure PTEST can be legalised with illegal types.
+ if (!Subtarget->hasSVE() || DCI.isBeforeLegalize())
return SDValue();
- // Is the right shift using an immediate value of 1?
- uint64_t ShiftAmount = Shift.getConstantOperandVal(1);
- if (ShiftAmount != 1)
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N0.getValueType();
+
+ if (!VT.isScalableVector() || VT.getVectorElementType() != MVT::i1 ||
+ !isNullConstant(N->getOperand(1)))
return SDValue();
- SDValue ExtendOpA, ExtendOpB;
- SDValue ShiftOp0 = Shift.getOperand(0);
- unsigned ShiftOp0Opc = ShiftOp0.getOpcode();
- if (ShiftOp0Opc == ISD::SUB) {
+ // Restricted the DAG combine to only cases where we're extracting from a
+ // flag-setting operation.
+ if (!isPredicateCCSettingOp(N0))
+ return SDValue();
- SDValue Xor = ShiftOp0.getOperand(1);
- if (Xor.getOpcode() != ISD::XOR)
- return SDValue();
+ // Extracts of lane 0 for SVE can be expressed as PTEST(Op, FIRST) ? 1 : 0
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue Pg = getPTrue(DAG, SDLoc(N), VT, AArch64SVEPredPattern::all);
+ return getPTest(DAG, N->getValueType(0), Pg, N0, AArch64CC::FIRST_ACTIVE);
+}
- // Is the XOR using a constant amount of all ones in the right hand side?
- uint64_t C;
- if (!isAllConstantBuildVector(Xor.getOperand(1), C))
- return SDValue();
+// Materialize : Idx = (add (mul vscale, NumEls), -1)
+// i1 = extract_vector_elt t37, Constant:i64<Idx>
+// ... into: "ptrue p, all" + PTEST
+static SDValue
+performLastTrueTestVectorCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const AArch64Subtarget *Subtarget) {
+ assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT);
+ // Make sure PTEST is legal types.
+ if (!Subtarget->hasSVE() || DCI.isBeforeLegalize())
+ return SDValue();
- unsigned ElemSizeInBits = VT.getScalarSizeInBits();
- APInt CAsAPInt(ElemSizeInBits, C);
- if (CAsAPInt != APInt::getAllOnes(ElemSizeInBits))
- return SDValue();
+ SDValue N0 = N->getOperand(0);
+ EVT OpVT = N0.getValueType();
- ExtendOpA = Xor.getOperand(0);
- ExtendOpB = ShiftOp0.getOperand(0);
- } else if (ShiftOp0Opc == ISD::ADD) {
- ExtendOpA = ShiftOp0.getOperand(0);
- ExtendOpB = ShiftOp0.getOperand(1);
- } else
+ if (!OpVT.isScalableVector() || OpVT.getVectorElementType() != MVT::i1)
return SDValue();
- unsigned ExtendOpAOpc = ExtendOpA.getOpcode();
- unsigned ExtendOpBOpc = ExtendOpB.getOpcode();
- if (!(ExtendOpAOpc == ExtendOpBOpc &&
- (ExtendOpAOpc == ISD::ZERO_EXTEND || ExtendOpAOpc == ISD::SIGN_EXTEND)))
+ // Idx == (add (mul vscale, NumEls), -1)
+ SDValue Idx = N->getOperand(1);
+ if (Idx.getOpcode() != ISD::ADD || !isAllOnesConstant(Idx.getOperand(1)))
return SDValue();
- // Is the result of the right shift being truncated to the same value type as
- // the original operands, OpA and OpB?
- SDValue OpA = ExtendOpA.getOperand(0);
- SDValue OpB = ExtendOpB.getOperand(0);
- EVT OpAVT = OpA.getValueType();
- assert(ExtendOpA.getValueType() == ExtendOpB.getValueType());
- if (!(VT == OpAVT && OpAVT == OpB.getValueType()))
+ SDValue VS = Idx.getOperand(0);
+ if (VS.getOpcode() != ISD::VSCALE)
return SDValue();
- SDLoc DL(N);
- bool IsSignExtend = ExtendOpAOpc == ISD::SIGN_EXTEND;
- bool IsRHADD = ShiftOp0Opc == ISD::SUB;
- unsigned HADDOpc = IsSignExtend
- ? (IsRHADD ? AArch64ISD::SRHADD : AArch64ISD::SHADD)
- : (IsRHADD ? AArch64ISD::URHADD : AArch64ISD::UHADD);
- SDValue ResultHADD = DAG.getNode(HADDOpc, DL, VT, OpA, OpB);
+ unsigned NumEls = OpVT.getVectorElementCount().getKnownMinValue();
+ if (VS.getConstantOperandVal(0) != NumEls)
+ return SDValue();
- return ResultHADD;
+ // Extracts of lane EC-1 for SVE can be expressed as PTEST(Op, LAST) ? 1 : 0
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue Pg = getPTrue(DAG, SDLoc(N), OpVT, AArch64SVEPredPattern::all);
+ return getPTest(DAG, N->getValueType(0), Pg, N0, AArch64CC::LAST_ACTIVE);
}
-static bool hasPairwiseAdd(unsigned Opcode, EVT VT, bool FullFP16) {
- switch (Opcode) {
- case ISD::FADD:
- return (FullFP16 && VT == MVT::f16) || VT == MVT::f32 || VT == MVT::f64;
- case ISD::ADD:
- return VT == MVT::i64;
- default:
- return false;
- }
-}
+static SDValue
+performExtractVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
+ const AArch64Subtarget *Subtarget) {
+ assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT);
+ if (SDValue Res = performFirstTrueTestVectorCombine(N, DCI, Subtarget))
+ return Res;
+ if (SDValue Res = performLastTrueTestVectorCombine(N, DCI, Subtarget))
+ return Res;
-static SDValue performExtractVectorEltCombine(SDNode *N, SelectionDAG &DAG) {
+ SelectionDAG &DAG = DCI.DAG;
SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
ConstantSDNode *ConstantN1 = dyn_cast<ConstantSDNode>(N1);
EVT VT = N->getValueType(0);
- const bool FullFP16 =
- static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
+ const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();
+ bool IsStrict = N0->isStrictFPOpcode();
+
+ // extract(dup x) -> x
+ if (N0.getOpcode() == AArch64ISD::DUP)
+ return DAG.getZExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
// Rewrite for pairwise fadd pattern
// (f32 (extract_vector_elt
@@ -14250,11 +14990,14 @@ static SDValue performExtractVectorEltCombine(SDNode *N, SelectionDAG &DAG) {
// ->
// (f32 (fadd (extract_vector_elt (vXf32 Other) 0)
// (extract_vector_elt (vXf32 Other) 1))
+ // For strict_fadd we need to make sure the old strict_fadd can be deleted, so
+ // we can only do this when it's used only by the extract_vector_elt.
if (ConstantN1 && ConstantN1->getZExtValue() == 0 &&
- hasPairwiseAdd(N0->getOpcode(), VT, FullFP16)) {
+ hasPairwiseAdd(N0->getOpcode(), VT, FullFP16) &&
+ (!IsStrict || N0.hasOneUse())) {
SDLoc DL(N0);
- SDValue N00 = N0->getOperand(0);
- SDValue N01 = N0->getOperand(1);
+ SDValue N00 = N0->getOperand(IsStrict ? 1 : 0);
+ SDValue N01 = N0->getOperand(IsStrict ? 2 : 1);
ShuffleVectorSDNode *Shuffle = dyn_cast<ShuffleVectorSDNode>(N01);
SDValue Other = N00;
@@ -14267,11 +15010,23 @@ static SDValue performExtractVectorEltCombine(SDNode *N, SelectionDAG &DAG) {
if (Shuffle && Shuffle->getMaskElt(0) == 1 &&
Other == Shuffle->getOperand(0)) {
- return DAG.getNode(N0->getOpcode(), DL, VT,
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Other,
- DAG.getConstant(0, DL, MVT::i64)),
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Other,
- DAG.getConstant(1, DL, MVT::i64)));
+ SDValue Extract1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Other,
+ DAG.getConstant(0, DL, MVT::i64));
+ SDValue Extract2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Other,
+ DAG.getConstant(1, DL, MVT::i64));
+ if (!IsStrict)
+ return DAG.getNode(N0->getOpcode(), DL, VT, Extract1, Extract2);
+
+ // For strict_fadd we need uses of the final extract_vector to be replaced
+ // with the strict_fadd, but we also need uses of the chain output of the
+ // original strict_fadd to use the chain output of the new strict_fadd as
+ // otherwise it may not be deleted.
+ SDValue Ret = DAG.getNode(N0->getOpcode(), DL,
+ {VT, MVT::Other},
+ {N0->getOperand(0), Extract1, Extract2});
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Ret);
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Ret.getValue(1));
+ return SDValue(N, 0);
}
}
@@ -14321,25 +15076,61 @@ static SDValue performConcatVectorsCombine(SDNode *N,
}
}
+ if (N->getOperand(0).getValueType() == MVT::v4i8) {
+ // If we have a concat of v4i8 loads, convert them to a buildvector of f32
+ // loads to prevent having to go through the v4i8 load legalization that
+ // needs to extend each element into a larger type.
+ if (N->getNumOperands() % 2 == 0 && all_of(N->op_values(), [](SDValue V) {
+ if (V.getValueType() != MVT::v4i8)
+ return false;
+ if (V.isUndef())
+ return true;
+ LoadSDNode *LD = dyn_cast<LoadSDNode>(V);
+ return LD && V.hasOneUse() && LD->isSimple() && !LD->isIndexed() &&
+ LD->getExtensionType() == ISD::NON_EXTLOAD;
+ })) {
+ EVT NVT =
+ EVT::getVectorVT(*DAG.getContext(), MVT::f32, N->getNumOperands());
+ SmallVector<SDValue> Ops;
+
+ for (unsigned i = 0; i < N->getNumOperands(); i++) {
+ SDValue V = N->getOperand(i);
+ if (V.isUndef())
+ Ops.push_back(DAG.getUNDEF(MVT::f32));
+ else {
+ LoadSDNode *LD = cast<LoadSDNode>(V);
+ SDValue NewLoad =
+ DAG.getLoad(MVT::f32, dl, LD->getChain(), LD->getBasePtr(),
+ LD->getMemOperand());
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLoad.getValue(1));
+ Ops.push_back(NewLoad);
+ }
+ }
+ return DAG.getBitcast(N->getValueType(0),
+ DAG.getBuildVector(NVT, dl, Ops));
+ }
+ }
+
+
// Wait 'til after everything is legalized to try this. That way we have
// legal vector types and such.
if (DCI.isBeforeLegalizeOps())
return SDValue();
- // Optimise concat_vectors of two [us]rhadds or [us]hadds that use extracted
- // subvectors from the same original vectors. Combine these into a single
- // [us]rhadd or [us]hadd that operates on the two original vectors. Example:
- // (v16i8 (concat_vectors (v8i8 (urhadd (extract_subvector (v16i8 OpA, <0>),
- // extract_subvector (v16i8 OpB,
- // <0>))),
- // (v8i8 (urhadd (extract_subvector (v16i8 OpA, <8>),
- // extract_subvector (v16i8 OpB,
- // <8>)))))
+ // Optimise concat_vectors of two [us]avgceils or [us]avgfloors that use
+ // extracted subvectors from the same original vectors. Combine these into a
+ // single avg that operates on the two original vectors.
+ // avgceil is the target independant name for rhadd, avgfloor is a hadd.
+ // Example:
+ // (concat_vectors (v8i8 (avgceils (extract_subvector (v16i8 OpA, <0>),
+ // extract_subvector (v16i8 OpB, <0>))),
+ // (v8i8 (avgceils (extract_subvector (v16i8 OpA, <8>),
+ // extract_subvector (v16i8 OpB, <8>)))))
// ->
- // (v16i8(urhadd(v16i8 OpA, v16i8 OpB)))
+ // (v16i8(avgceils(v16i8 OpA, v16i8 OpB)))
if (N->getNumOperands() == 2 && N0Opc == N1Opc &&
- (N0Opc == AArch64ISD::URHADD || N0Opc == AArch64ISD::SRHADD ||
- N0Opc == AArch64ISD::UHADD || N0Opc == AArch64ISD::SHADD)) {
+ (N0Opc == ISD::AVGCEILU || N0Opc == ISD::AVGCEILS ||
+ N0Opc == ISD::AVGFLOORU || N0Opc == ISD::AVGFLOORS)) {
SDValue N00 = N0->getOperand(0);
SDValue N01 = N0->getOperand(1);
SDValue N10 = N1->getOperand(0);
@@ -14411,6 +15202,29 @@ static SDValue performConcatVectorsCombine(SDNode *N,
}
static SDValue
+performExtractSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG) {
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (!VT.isScalableVector() || VT.getVectorElementType() != MVT::i1)
+ return SDValue();
+
+ SDValue V = N->getOperand(0);
+
+ // NOTE: This combine exists in DAGCombiner, but that version's legality check
+ // blocks this combine because the non-const case requires custom lowering.
+ //
+ // ty1 extract_vector(ty2 splat(const))) -> ty1 splat(const)
+ if (V.getOpcode() == ISD::SPLAT_VECTOR)
+ if (isa<ConstantSDNode>(V.getOperand(0)))
+ return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V.getOperand(0));
+
+ return SDValue();
+}
+
+static SDValue
performInsertSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
SDLoc DL(N);
@@ -14470,33 +15284,34 @@ static SDValue tryCombineFixedPointConvert(SDNode *N,
// Check the operand and see if it originates from a lane extract.
SDValue Op1 = N->getOperand(1);
- if (Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
- // Yep, no additional predication needed. Perform the transform.
- SDValue IID = N->getOperand(0);
- SDValue Shift = N->getOperand(2);
- SDValue Vec = Op1.getOperand(0);
- SDValue Lane = Op1.getOperand(1);
- EVT ResTy = N->getValueType(0);
- EVT VecResTy;
- SDLoc DL(N);
+ if (Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return SDValue();
- // The vector width should be 128 bits by the time we get here, even
- // if it started as 64 bits (the extract_vector handling will have
- // done so).
- assert(Vec.getValueSizeInBits() == 128 &&
- "unexpected vector size on extract_vector_elt!");
- if (Vec.getValueType() == MVT::v4i32)
- VecResTy = MVT::v4f32;
- else if (Vec.getValueType() == MVT::v2i64)
- VecResTy = MVT::v2f64;
- else
- llvm_unreachable("unexpected vector type!");
+ // Yep, no additional predication needed. Perform the transform.
+ SDValue IID = N->getOperand(0);
+ SDValue Shift = N->getOperand(2);
+ SDValue Vec = Op1.getOperand(0);
+ SDValue Lane = Op1.getOperand(1);
+ EVT ResTy = N->getValueType(0);
+ EVT VecResTy;
+ SDLoc DL(N);
- SDValue Convert =
- DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VecResTy, IID, Vec, Shift);
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResTy, Convert, Lane);
- }
- return SDValue();
+ // The vector width should be 128 bits by the time we get here, even
+ // if it started as 64 bits (the extract_vector handling will have
+ // done so). Bail if it is not.
+ if (Vec.getValueSizeInBits() != 128)
+ return SDValue();
+
+ if (Vec.getValueType() == MVT::v4i32)
+ VecResTy = MVT::v4f32;
+ else if (Vec.getValueType() == MVT::v2i64)
+ VecResTy = MVT::v2f64;
+ else
+ return SDValue();
+
+ SDValue Convert =
+ DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VecResTy, IID, Vec, Shift);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResTy, Convert, Lane);
}
// AArch64 high-vector "long" operations are formed by performing the non-high
@@ -14515,6 +15330,11 @@ static SDValue tryCombineFixedPointConvert(SDNode *N,
// It also supports immediate DUP-like nodes (MOVI/MVNi), which we can fold
// similarly here.
static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG) {
+ MVT VT = N.getSimpleValueType();
+ if (N.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ N.getConstantOperandVal(1) == 0)
+ N = N.getOperand(0);
+
switch (N.getOpcode()) {
case AArch64ISD::DUP:
case AArch64ISD::DUPLANE8:
@@ -14535,18 +15355,19 @@ static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG) {
return SDValue();
}
- MVT NarrowTy = N.getSimpleValueType();
- if (!NarrowTy.is64BitVector())
+ if (!VT.is64BitVector())
return SDValue();
- MVT ElementTy = NarrowTy.getVectorElementType();
- unsigned NumElems = NarrowTy.getVectorNumElements();
- MVT NewVT = MVT::getVectorVT(ElementTy, NumElems * 2);
+ SDLoc DL(N);
+ unsigned NumElems = VT.getVectorNumElements();
+ if (N.getValueType().is64BitVector()) {
+ MVT ElementTy = VT.getVectorElementType();
+ MVT NewVT = MVT::getVectorVT(ElementTy, NumElems * 2);
+ N = DAG.getNode(N->getOpcode(), DL, NewVT, N->ops());
+ }
- SDLoc dl(N);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NarrowTy,
- DAG.getNode(N->getOpcode(), dl, NewVT, N->ops()),
- DAG.getConstant(NumElems, dl, MVT::i64));
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, N,
+ DAG.getConstant(NumElems, DL, MVT::i64));
}
static bool isEssentiallyExtractHighSubvector(SDValue N) {
@@ -14696,7 +15517,7 @@ static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG) {
}
// ADD(UADDV a, UADDV b) --> UADDV(ADD a, b)
-static SDValue performUADDVCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue performAddUADDVCombine(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
// Only scalar integer and vector types.
if (N->getOpcode() != ISD::ADD || !VT.isScalarInteger())
@@ -14732,6 +15553,81 @@ static SDValue performUADDVCombine(SDNode *N, SelectionDAG &DAG) {
DAG.getConstant(0, DL, MVT::i64));
}
+/// Perform the scalar expression combine in the form of:
+/// CSEL(c, 1, cc) + b => CSINC(b+c, b, cc)
+/// CSNEG(c, -1, cc) + b => CSINC(b+c, b, cc)
+static SDValue performAddCSelIntoCSinc(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ if (!VT.isScalarInteger() || N->getOpcode() != ISD::ADD)
+ return SDValue();
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ // Handle commutivity.
+ if (LHS.getOpcode() != AArch64ISD::CSEL &&
+ LHS.getOpcode() != AArch64ISD::CSNEG) {
+ std::swap(LHS, RHS);
+ if (LHS.getOpcode() != AArch64ISD::CSEL &&
+ LHS.getOpcode() != AArch64ISD::CSNEG) {
+ return SDValue();
+ }
+ }
+
+ if (!LHS.hasOneUse())
+ return SDValue();
+
+ AArch64CC::CondCode AArch64CC =
+ static_cast<AArch64CC::CondCode>(LHS.getConstantOperandVal(2));
+
+ // The CSEL should include a const one operand, and the CSNEG should include
+ // One or NegOne operand.
+ ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(LHS.getOperand(0));
+ ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
+ if (!CTVal || !CFVal)
+ return SDValue();
+
+ if (!(LHS.getOpcode() == AArch64ISD::CSEL &&
+ (CTVal->isOne() || CFVal->isOne())) &&
+ !(LHS.getOpcode() == AArch64ISD::CSNEG &&
+ (CTVal->isOne() || CFVal->isAllOnes())))
+ return SDValue();
+
+ // Switch CSEL(1, c, cc) to CSEL(c, 1, !cc)
+ if (LHS.getOpcode() == AArch64ISD::CSEL && CTVal->isOne() &&
+ !CFVal->isOne()) {
+ std::swap(CTVal, CFVal);
+ AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
+ }
+
+ SDLoc DL(N);
+ // Switch CSNEG(1, c, cc) to CSNEG(-c, -1, !cc)
+ if (LHS.getOpcode() == AArch64ISD::CSNEG && CTVal->isOne() &&
+ !CFVal->isAllOnes()) {
+ APInt C = -1 * CFVal->getAPIntValue();
+ CTVal = cast<ConstantSDNode>(DAG.getConstant(C, DL, VT));
+ CFVal = cast<ConstantSDNode>(DAG.getAllOnesConstant(DL, VT));
+ AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
+ }
+
+ // It might be neutral for larger constants, as the immediate need to be
+ // materialized in a register.
+ APInt ADDC = CTVal->getAPIntValue();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.isLegalAddImmediate(ADDC.getSExtValue()))
+ return SDValue();
+
+ assert(((LHS.getOpcode() == AArch64ISD::CSEL && CFVal->isOne()) ||
+ (LHS.getOpcode() == AArch64ISD::CSNEG && CFVal->isAllOnes())) &&
+ "Unexpected constant value");
+
+ SDValue NewNode = DAG.getNode(ISD::ADD, DL, VT, RHS, SDValue(CTVal, 0));
+ SDValue CCVal = DAG.getConstant(AArch64CC, DL, MVT::i32);
+ SDValue Cmp = LHS.getOperand(3);
+
+ return DAG.getNode(AArch64ISD::CSINC, DL, VT, NewNode, RHS, CCVal, Cmp);
+}
+
// ADD(UDOT(zero, x, y), A) --> UDOT(A, x, y)
static SDValue performAddDotCombine(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
@@ -14755,6 +15651,49 @@ static SDValue performAddDotCombine(SDNode *N, SelectionDAG &DAG) {
Dot.getOperand(2));
}
+static bool isNegatedInteger(SDValue Op) {
+ return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0));
+}
+
+static SDValue getNegatedInteger(SDValue Op, SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ return DAG.getNode(ISD::SUB, DL, VT, Zero, Op);
+}
+
+// Try to fold
+//
+// (neg (csel X, Y)) -> (csel (neg X), (neg Y))
+//
+// The folding helps csel to be matched with csneg without generating
+// redundant neg instruction, which includes negation of the csel expansion
+// of abs node lowered by lowerABS.
+static SDValue performNegCSelCombine(SDNode *N, SelectionDAG &DAG) {
+ if (!isNegatedInteger(SDValue(N, 0)))
+ return SDValue();
+
+ SDValue CSel = N->getOperand(1);
+ if (CSel.getOpcode() != AArch64ISD::CSEL || !CSel->hasOneUse())
+ return SDValue();
+
+ SDValue N0 = CSel.getOperand(0);
+ SDValue N1 = CSel.getOperand(1);
+
+ // If both of them is not negations, it's not worth the folding as it
+ // introduces two additional negations while reducing one negation.
+ if (!isNegatedInteger(N0) && !isNegatedInteger(N1))
+ return SDValue();
+
+ SDValue N0N = getNegatedInteger(N0, DAG);
+ SDValue N1N = getNegatedInteger(N1, DAG);
+
+ SDLoc DL(N);
+ EVT VT = CSel.getValueType();
+ return DAG.getNode(AArch64ISD::CSEL, DL, VT, N0N, N1N, CSel.getOperand(2),
+ CSel.getOperand(3));
+}
+
// The basic add/sub long vector instructions have variants with "2" on the end
// which act on the high-half of their inputs. They are normally matched by
// patterns like:
@@ -14808,14 +15747,120 @@ static SDValue performAddSubLongCombine(SDNode *N,
return DAG.getNode(N->getOpcode(), SDLoc(N), VT, LHS, RHS);
}
+static bool isCMP(SDValue Op) {
+ return Op.getOpcode() == AArch64ISD::SUBS &&
+ !Op.getNode()->hasAnyUseOfValue(0);
+}
+
+// (CSEL 1 0 CC Cond) => CC
+// (CSEL 0 1 CC Cond) => !CC
+static Optional<AArch64CC::CondCode> getCSETCondCode(SDValue Op) {
+ if (Op.getOpcode() != AArch64ISD::CSEL)
+ return None;
+ auto CC = static_cast<AArch64CC::CondCode>(Op.getConstantOperandVal(2));
+ if (CC == AArch64CC::AL || CC == AArch64CC::NV)
+ return None;
+ SDValue OpLHS = Op.getOperand(0);
+ SDValue OpRHS = Op.getOperand(1);
+ if (isOneConstant(OpLHS) && isNullConstant(OpRHS))
+ return CC;
+ if (isNullConstant(OpLHS) && isOneConstant(OpRHS))
+ return getInvertedCondCode(CC);
+
+ return None;
+}
+
+// (ADC{S} l r (CMP (CSET HS carry) 1)) => (ADC{S} l r carry)
+// (SBC{S} l r (CMP 0 (CSET LO carry))) => (SBC{S} l r carry)
+static SDValue foldOverflowCheck(SDNode *Op, SelectionDAG &DAG, bool IsAdd) {
+ SDValue CmpOp = Op->getOperand(2);
+ if (!isCMP(CmpOp))
+ return SDValue();
+
+ if (IsAdd) {
+ if (!isOneConstant(CmpOp.getOperand(1)))
+ return SDValue();
+ } else {
+ if (!isNullConstant(CmpOp.getOperand(0)))
+ return SDValue();
+ }
+
+ SDValue CsetOp = CmpOp->getOperand(IsAdd ? 0 : 1);
+ auto CC = getCSETCondCode(CsetOp);
+ if (CC != (IsAdd ? AArch64CC::HS : AArch64CC::LO))
+ return SDValue();
+
+ return DAG.getNode(Op->getOpcode(), SDLoc(Op), Op->getVTList(),
+ Op->getOperand(0), Op->getOperand(1),
+ CsetOp.getOperand(3));
+}
+
+// (ADC x 0 cond) => (CINC x HS cond)
+static SDValue foldADCToCINC(SDNode *N, SelectionDAG &DAG) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDValue Cond = N->getOperand(2);
+
+ if (!isNullConstant(RHS))
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ // (CINC x cc cond) <=> (CSINC x x !cc cond)
+ SDValue CC = DAG.getConstant(AArch64CC::LO, DL, MVT::i32);
+ return DAG.getNode(AArch64ISD::CSINC, DL, VT, LHS, LHS, CC, Cond);
+}
+
+// Transform vector add(zext i8 to i32, zext i8 to i32)
+// into sext(add(zext(i8 to i16), zext(i8 to i16)) to i32)
+// This allows extra uses of saddl/uaddl at the lower vector widths, and less
+// extends.
+static SDValue performVectorAddSubExtCombine(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ if (!VT.isFixedLengthVector() || VT.getSizeInBits() <= 128 ||
+ (N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
+ N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND) ||
+ (N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
+ N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND) ||
+ N->getOperand(0).getOperand(0).getValueType() !=
+ N->getOperand(1).getOperand(0).getValueType())
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0).getOperand(0);
+ SDValue N1 = N->getOperand(1).getOperand(0);
+ EVT InVT = N0.getValueType();
+
+ EVT S1 = InVT.getScalarType();
+ EVT S2 = VT.getScalarType();
+ if ((S2 == MVT::i32 && S1 == MVT::i8) ||
+ (S2 == MVT::i64 && (S1 == MVT::i8 || S1 == MVT::i16))) {
+ SDLoc DL(N);
+ EVT HalfVT = EVT::getVectorVT(*DAG.getContext(),
+ S2.getHalfSizedIntegerVT(*DAG.getContext()),
+ VT.getVectorElementCount());
+ SDValue NewN0 = DAG.getNode(N->getOperand(0).getOpcode(), DL, HalfVT, N0);
+ SDValue NewN1 = DAG.getNode(N->getOperand(1).getOpcode(), DL, HalfVT, N1);
+ SDValue NewOp = DAG.getNode(N->getOpcode(), DL, HalfVT, NewN0, NewN1);
+ return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewOp);
+ }
+ return SDValue();
+}
+
static SDValue performAddSubCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
// Try to change sum of two reductions.
- if (SDValue Val = performUADDVCombine(N, DAG))
+ if (SDValue Val = performAddUADDVCombine(N, DAG))
return Val;
if (SDValue Val = performAddDotCombine(N, DAG))
return Val;
+ if (SDValue Val = performAddCSelIntoCSinc(N, DAG))
+ return Val;
+ if (SDValue Val = performNegCSelCombine(N, DAG))
+ return Val;
+ if (SDValue Val = performVectorAddSubExtCombine(N, DAG))
+ return Val;
return performAddSubLongCombine(N, DCI, DAG);
}
@@ -15176,6 +16221,9 @@ static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) {
return false;
}
+ if (ISD::isConstantSplatVectorAllOnes(N.getNode()))
+ return true;
+
// "ptrue p.<ty>, all" can be considered all active when <ty> is the same size
// or smaller than the implicit element type represented by N.
// NOTE: A larger element count implies a smaller element type.
@@ -15186,8 +16234,7 @@ static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) {
// If we're compiling for a specific vector-length, we can check if the
// pattern's VL equals that of the scalable vector at runtime.
if (N.getOpcode() == AArch64ISD::PTRUE) {
- const auto &Subtarget =
- static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
+ const auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
unsigned MinSVESize = Subtarget.getMinSVEVectorSizeInBits();
unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits();
if (MaxSVESize && MinSVESize == MaxSVESize) {
@@ -15233,6 +16280,39 @@ static SDValue performIntrinsicCombine(SDNode *N,
switch (IID) {
default:
break;
+ case Intrinsic::get_active_lane_mask: {
+ SDValue Res = SDValue();
+ EVT VT = N->getValueType(0);
+ if (VT.isFixedLengthVector()) {
+ // We can use the SVE whilelo instruction to lower this intrinsic by
+ // creating the appropriate sequence of scalable vector operations and
+ // then extracting a fixed-width subvector from the scalable vector.
+
+ SDLoc DL(N);
+ SDValue ID =
+ DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, DL, MVT::i64);
+
+ EVT WhileVT = EVT::getVectorVT(
+ *DAG.getContext(), MVT::i1,
+ ElementCount::getScalable(VT.getVectorNumElements()));
+
+ // Get promoted scalable vector VT, i.e. promote nxv4i1 -> nxv4i32.
+ EVT PromVT = getPromotedVTForPredicate(WhileVT);
+
+ // Get the fixed-width equivalent of PromVT for extraction.
+ EVT ExtVT =
+ EVT::getVectorVT(*DAG.getContext(), PromVT.getVectorElementType(),
+ VT.getVectorElementCount());
+
+ Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, WhileVT, ID,
+ N->getOperand(1), N->getOperand(2));
+ Res = DAG.getNode(ISD::SIGN_EXTEND, DL, PromVT, Res);
+ Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtVT, Res,
+ DAG.getConstant(0, DL, MVT::i64));
+ Res = DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
+ }
+ return Res;
+ }
case Intrinsic::aarch64_neon_vcvtfxs2fp:
case Intrinsic::aarch64_neon_vcvtfxu2fp:
return tryCombineFixedPointConvert(N, DCI, DAG);
@@ -15261,7 +16341,11 @@ static SDValue performIntrinsicCombine(SDNode *N,
return DAG.getNode(ISD::FMINNUM, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_neon_smull:
+ return DAG.getNode(AArch64ISD::SMULL, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_neon_umull:
+ return DAG.getNode(AArch64ISD::UMULL, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_neon_pmull:
case Intrinsic::aarch64_neon_sqdmull:
return tryCombineLongOpWithDup(IID, N, DCI, DAG);
@@ -15350,6 +16434,10 @@ static SDValue performIntrinsicCombine(SDNode *N,
return convertMergedOpToPredOp(N, ISD::XOR, DAG, true);
case Intrinsic::aarch64_sve_orr:
return convertMergedOpToPredOp(N, ISD::OR, DAG, true);
+ case Intrinsic::aarch64_sve_sabd:
+ return convertMergedOpToPredOp(N, ISD::ABDS, DAG, true);
+ case Intrinsic::aarch64_sve_uabd:
+ return convertMergedOpToPredOp(N, ISD::ABDU, DAG, true);
case Intrinsic::aarch64_sve_sqadd:
return convertMergedOpToPredOp(N, ISD::SADDSAT, DAG, true);
case Intrinsic::aarch64_sve_sqsub:
@@ -15538,7 +16626,7 @@ static SDValue performExtendCombine(SDNode *N,
static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St,
SDValue SplatVal, unsigned NumVecElts) {
assert(!St.isTruncatingStore() && "cannot split truncating vector store");
- unsigned OrigAlignment = St.getAlignment();
+ Align OrigAlignment = St.getAlign();
unsigned EltOffset = SplatVal.getValueType().getSizeInBits() / 8;
// Create scalar stores. This is at least as good as the code sequence for a
@@ -15563,7 +16651,7 @@ static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St,
unsigned Offset = EltOffset;
while (--NumVecElts) {
- unsigned Alignment = MinAlign(OrigAlignment, Offset);
+ Align Alignment = commonAlignment(OrigAlignment, Offset);
SDValue OffsetPtr =
DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
DAG.getConstant(BaseOffset + Offset, DL, MVT::i64));
@@ -15636,10 +16724,6 @@ static SDValue performLDNT1Combine(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
EVT PtrTy = N->getOperand(3).getValueType();
- if (VT == MVT::nxv8bf16 &&
- !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
- return SDValue();
-
EVT LoadVT = VT;
if (VT.isFloatingPoint())
LoadVT = VT.changeTypeToInteger();
@@ -15667,9 +16751,6 @@ static SDValue performLD1ReplicateCombine(SDNode *N, SelectionDAG &DAG) {
"Unsupported opcode.");
SDLoc DL(N);
EVT VT = N->getValueType(0);
- if (VT == MVT::nxv8bf16 &&
- !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
- return SDValue();
EVT LoadVT = VT;
if (VT.isFloatingPoint())
@@ -15692,10 +16773,6 @@ static SDValue performST1Combine(SDNode *N, SelectionDAG &DAG) {
EVT HwSrcVt = getSVEContainerType(DataVT);
SDValue InputVT = DAG.getValueType(DataVT);
- if (DataVT == MVT::nxv8bf16 &&
- !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
- return SDValue();
-
if (DataVT.isFloatingPoint())
InputVT = DAG.getValueType(HwSrcVt);
@@ -15722,10 +16799,6 @@ static SDValue performSTNT1Combine(SDNode *N, SelectionDAG &DAG) {
EVT DataVT = Data.getValueType();
EVT PtrTy = N->getOperand(4).getValueType();
- if (DataVT == MVT::nxv8bf16 &&
- !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
- return SDValue();
-
if (DataVT.isFloatingPoint())
Data = DAG.getNode(ISD::BITCAST, DL, DataVT.changeTypeToInteger(), Data);
@@ -15912,8 +16985,8 @@ static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
// extensions can use this to mark that it does not want splitting to happen
// (by underspecifying alignment to be 1 or 2). Furthermore, the chance of
// eliminating alignment hazards is only 1 in 8 for alignment of 2.
- if (VT.getSizeInBits() != 128 || S->getAlignment() >= 16 ||
- S->getAlignment() <= 2)
+ if (VT.getSizeInBits() != 128 || S->getAlign() >= Align(16) ||
+ S->getAlign() <= Align(2))
return SDValue();
// If we get a splat of a scalar convert this vector store to a store of
@@ -15934,11 +17007,11 @@ static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
SDValue BasePtr = S->getBasePtr();
SDValue NewST1 =
DAG.getStore(S->getChain(), DL, SubVector0, BasePtr, S->getPointerInfo(),
- S->getAlignment(), S->getMemOperand()->getFlags());
+ S->getAlign(), S->getMemOperand()->getFlags());
SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
DAG.getConstant(8, DL, MVT::i64));
return DAG.getStore(NewST1.getValue(0), DL, SubVector1, OffsetPtr,
- S->getPointerInfo(), S->getAlignment(),
+ S->getPointerInfo(), S->getAlign(),
S->getMemOperand()->getFlags());
}
@@ -15970,6 +17043,33 @@ static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG) {
SDValue Op1 = N->getOperand(1);
EVT ResVT = N->getValueType(0);
+ // uzp1(x, undef) -> concat(truncate(x), undef)
+ if (Op1.getOpcode() == ISD::UNDEF) {
+ EVT BCVT = MVT::Other, HalfVT = MVT::Other;
+ switch (ResVT.getSimpleVT().SimpleTy) {
+ default:
+ break;
+ case MVT::v16i8:
+ BCVT = MVT::v8i16;
+ HalfVT = MVT::v8i8;
+ break;
+ case MVT::v8i16:
+ BCVT = MVT::v4i32;
+ HalfVT = MVT::v4i16;
+ break;
+ case MVT::v4i32:
+ BCVT = MVT::v2i64;
+ HalfVT = MVT::v2i32;
+ break;
+ }
+ if (BCVT != MVT::Other) {
+ SDValue BC = DAG.getBitcast(BCVT, Op0);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, BC);
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Trunc,
+ DAG.getUNDEF(HalfVT));
+ }
+ }
+
// uzp1(unpklo(uzp1(x, y)), z) => uzp1(x, z)
if (Op0.getOpcode() == AArch64ISD::UUNPKLO) {
if (Op0.getOperand(0).getOpcode() == AArch64ISD::UZP1) {
@@ -16267,6 +17367,152 @@ static SDValue performSTORECombine(SDNode *N,
return SDValue();
}
+/// \return true if part of the index was folded into the Base.
+static bool foldIndexIntoBase(SDValue &BasePtr, SDValue &Index, SDValue Scale,
+ SDLoc DL, SelectionDAG &DAG) {
+ // This function assumes a vector of i64 indices.
+ EVT IndexVT = Index.getValueType();
+ if (!IndexVT.isVector() || IndexVT.getVectorElementType() != MVT::i64)
+ return false;
+
+ // Simplify:
+ // BasePtr = Ptr
+ // Index = X + splat(Offset)
+ // ->
+ // BasePtr = Ptr + Offset * scale.
+ // Index = X
+ if (Index.getOpcode() == ISD::ADD) {
+ if (auto Offset = DAG.getSplatValue(Index.getOperand(1))) {
+ Offset = DAG.getNode(ISD::MUL, DL, MVT::i64, Offset, Scale);
+ BasePtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr, Offset);
+ Index = Index.getOperand(0);
+ return true;
+ }
+ }
+
+ // Simplify:
+ // BasePtr = Ptr
+ // Index = (X + splat(Offset)) << splat(Shift)
+ // ->
+ // BasePtr = Ptr + (Offset << Shift) * scale)
+ // Index = X << splat(shift)
+ if (Index.getOpcode() == ISD::SHL &&
+ Index.getOperand(0).getOpcode() == ISD::ADD) {
+ SDValue Add = Index.getOperand(0);
+ SDValue ShiftOp = Index.getOperand(1);
+ SDValue OffsetOp = Add.getOperand(1);
+ if (auto Shift = DAG.getSplatValue(ShiftOp))
+ if (auto Offset = DAG.getSplatValue(OffsetOp)) {
+ Offset = DAG.getNode(ISD::SHL, DL, MVT::i64, Offset, Shift);
+ Offset = DAG.getNode(ISD::MUL, DL, MVT::i64, Offset, Scale);
+ BasePtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr, Offset);
+ Index = DAG.getNode(ISD::SHL, DL, Index.getValueType(),
+ Add.getOperand(0), ShiftOp);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+// Analyse the specified address returning true if a more optimal addressing
+// mode is available. When returning true all parameters are updated to reflect
+// their recommended values.
+static bool findMoreOptimalIndexType(const MaskedGatherScatterSDNode *N,
+ SDValue &BasePtr, SDValue &Index,
+ SelectionDAG &DAG) {
+ // Try to iteratively fold parts of the index into the base pointer to
+ // simplify the index as much as possible.
+ bool Changed = false;
+ while (foldIndexIntoBase(BasePtr, Index, N->getScale(), SDLoc(N), DAG))
+ Changed = true;
+
+ // Only consider element types that are pointer sized as smaller types can
+ // be easily promoted.
+ EVT IndexVT = Index.getValueType();
+ if (IndexVT.getVectorElementType() != MVT::i64 || IndexVT == MVT::nxv2i64)
+ return Changed;
+
+ // Match:
+ // Index = step(const)
+ int64_t Stride = 0;
+ if (Index.getOpcode() == ISD::STEP_VECTOR)
+ Stride = cast<ConstantSDNode>(Index.getOperand(0))->getSExtValue();
+
+ // Match:
+ // Index = step(const) << shift(const)
+ else if (Index.getOpcode() == ISD::SHL &&
+ Index.getOperand(0).getOpcode() == ISD::STEP_VECTOR) {
+ SDValue RHS = Index.getOperand(1);
+ if (auto *Shift =
+ dyn_cast_or_null<ConstantSDNode>(DAG.getSplatValue(RHS))) {
+ int64_t Step = (int64_t)Index.getOperand(0).getConstantOperandVal(1);
+ Stride = Step << Shift->getZExtValue();
+ }
+ }
+
+ // Return early because no supported pattern is found.
+ if (Stride == 0)
+ return Changed;
+
+ if (Stride < std::numeric_limits<int32_t>::min() ||
+ Stride > std::numeric_limits<int32_t>::max())
+ return Changed;
+
+ const auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
+ unsigned MaxVScale =
+ Subtarget.getMaxSVEVectorSizeInBits() / AArch64::SVEBitsPerBlock;
+ int64_t LastElementOffset =
+ IndexVT.getVectorMinNumElements() * Stride * MaxVScale;
+
+ if (LastElementOffset < std::numeric_limits<int32_t>::min() ||
+ LastElementOffset > std::numeric_limits<int32_t>::max())
+ return Changed;
+
+ EVT NewIndexVT = IndexVT.changeVectorElementType(MVT::i32);
+ // Stride does not scale explicitly by 'Scale', because it happens in
+ // the gather/scatter addressing mode.
+ Index = DAG.getNode(ISD::STEP_VECTOR, SDLoc(N), NewIndexVT,
+ DAG.getTargetConstant(Stride, SDLoc(N), MVT::i32));
+ return true;
+}
+
+static SDValue performMaskedGatherScatterCombine(
+ SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) {
+ MaskedGatherScatterSDNode *MGS = cast<MaskedGatherScatterSDNode>(N);
+ assert(MGS && "Can only combine gather load or scatter store nodes");
+
+ if (!DCI.isBeforeLegalize())
+ return SDValue();
+
+ SDLoc DL(MGS);
+ SDValue Chain = MGS->getChain();
+ SDValue Scale = MGS->getScale();
+ SDValue Index = MGS->getIndex();
+ SDValue Mask = MGS->getMask();
+ SDValue BasePtr = MGS->getBasePtr();
+ ISD::MemIndexType IndexType = MGS->getIndexType();
+
+ if (!findMoreOptimalIndexType(MGS, BasePtr, Index, DAG))
+ return SDValue();
+
+ // Here we catch such cases early and change MGATHER's IndexType to allow
+ // the use of an Index that's more legalisation friendly.
+ if (auto *MGT = dyn_cast<MaskedGatherSDNode>(MGS)) {
+ SDValue PassThru = MGT->getPassThru();
+ SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
+ return DAG.getMaskedGather(
+ DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
+ Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
+ }
+ auto *MSC = cast<MaskedScatterSDNode>(MGS);
+ SDValue Data = MSC->getValue();
+ SDValue Ops[] = {Chain, Data, Mask, BasePtr, Index, Scale};
+ return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL,
+ Ops, MSC->getMemOperand(), IndexType,
+ MSC->isTruncatingStore());
+}
+
/// Target-specific DAG combine function for NEON load/store intrinsics
/// to merge base address updates.
static SDValue performNEONPostLDSTCombine(SDNode *N,
@@ -16723,6 +17969,47 @@ static SDValue performBRCONDCombine(SDNode *N,
return SDValue();
}
+static SDValue foldCSELofCTTZ(SDNode *N, SelectionDAG &DAG) {
+ unsigned CC = N->getConstantOperandVal(2);
+ SDValue SUBS = N->getOperand(3);
+ SDValue Zero, CTTZ;
+
+ if (CC == AArch64CC::EQ && SUBS.getOpcode() == AArch64ISD::SUBS) {
+ Zero = N->getOperand(0);
+ CTTZ = N->getOperand(1);
+ } else if (CC == AArch64CC::NE && SUBS.getOpcode() == AArch64ISD::SUBS) {
+ Zero = N->getOperand(1);
+ CTTZ = N->getOperand(0);
+ } else
+ return SDValue();
+
+ if ((CTTZ.getOpcode() != ISD::CTTZ && CTTZ.getOpcode() != ISD::TRUNCATE) ||
+ (CTTZ.getOpcode() == ISD::TRUNCATE &&
+ CTTZ.getOperand(0).getOpcode() != ISD::CTTZ))
+ return SDValue();
+
+ assert((CTTZ.getValueType() == MVT::i32 || CTTZ.getValueType() == MVT::i64) &&
+ "Illegal type in CTTZ folding");
+
+ if (!isNullConstant(Zero) || !isNullConstant(SUBS.getOperand(1)))
+ return SDValue();
+
+ SDValue X = CTTZ.getOpcode() == ISD::TRUNCATE
+ ? CTTZ.getOperand(0).getOperand(0)
+ : CTTZ.getOperand(0);
+
+ if (X != SUBS.getOperand(0))
+ return SDValue();
+
+ unsigned BitWidth = CTTZ.getOpcode() == ISD::TRUNCATE
+ ? CTTZ.getOperand(0).getValueSizeInBits()
+ : CTTZ.getValueSizeInBits();
+ SDValue BitWidthMinusOne =
+ DAG.getConstant(BitWidth - 1, SDLoc(N), CTTZ.getValueType());
+ return DAG.getNode(ISD::AND, SDLoc(N), CTTZ.getValueType(), CTTZ,
+ BitWidthMinusOne);
+}
+
// Optimize CSEL instructions
static SDValue performCSELCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
@@ -16731,6 +18018,11 @@ static SDValue performCSELCombine(SDNode *N,
if (N->getOperand(0) == N->getOperand(1))
return N->getOperand(0);
+ // CSEL 0, cttz(X), eq(X, 0) -> AND cttz bitwidth-1
+ // CSEL cttz(X), 0, ne(X, 0) -> AND cttz bitwidth-1
+ if (SDValue Folded = foldCSELofCTTZ(N, DAG))
+ return Folded;
+
return performCONDCombine(N, DCI, DAG, 2, 3);
}
@@ -16739,14 +18031,14 @@ static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) {
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
// setcc (csel 0, 1, cond, X), 1, ne ==> csel 0, 1, !cond, X
if (Cond == ISD::SETNE && isOneConstant(RHS) &&
LHS->getOpcode() == AArch64ISD::CSEL &&
isNullConstant(LHS->getOperand(0)) && isOneConstant(LHS->getOperand(1)) &&
LHS->hasOneUse()) {
- SDLoc DL(N);
-
// Invert CSEL's condition.
auto *OpCC = cast<ConstantSDNode>(LHS.getOperand(2));
auto OldCond = static_cast<AArch64CC::CondCode>(OpCC->getZExtValue());
@@ -16757,9 +18049,48 @@ static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) {
DAG.getNode(AArch64ISD::CSEL, DL, LHS.getValueType(), LHS.getOperand(0),
LHS.getOperand(1), DAG.getConstant(NewCond, DL, MVT::i32),
LHS.getOperand(3));
- return DAG.getZExtOrTrunc(CSEL, DL, N->getValueType(0));
+ return DAG.getZExtOrTrunc(CSEL, DL, VT);
}
+ // setcc (srl x, imm), 0, ne ==> setcc (and x, (-1 << imm)), 0, ne
+ if (Cond == ISD::SETNE && isNullConstant(RHS) &&
+ LHS->getOpcode() == ISD::SRL && isa<ConstantSDNode>(LHS->getOperand(1)) &&
+ LHS->hasOneUse()) {
+ EVT TstVT = LHS->getValueType(0);
+ if (TstVT.isScalarInteger() && TstVT.getFixedSizeInBits() <= 64) {
+ // this pattern will get better opt in emitComparison
+ uint64_t TstImm = -1ULL << LHS->getConstantOperandVal(1);
+ SDValue TST = DAG.getNode(ISD::AND, DL, TstVT, LHS->getOperand(0),
+ DAG.getConstant(TstImm, DL, TstVT));
+ return DAG.getNode(ISD::SETCC, DL, VT, TST, RHS, N->getOperand(2));
+ }
+ }
+
+ return SDValue();
+}
+
+// Replace a flag-setting operator (eg ANDS) with the generic version
+// (eg AND) if the flag is unused.
+static SDValue performFlagSettingCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ unsigned GenericOpcode) {
+ SDLoc DL(N);
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+
+ // If the flag result isn't used, convert back to a generic opcode.
+ if (!N->hasAnyUseOfValue(1)) {
+ SDValue Res = DCI.DAG.getNode(GenericOpcode, DL, VT, N->ops());
+ return DCI.DAG.getMergeValues({Res, DCI.DAG.getConstant(0, DL, MVT::i32)},
+ DL);
+ }
+
+ // Combine identical generic nodes into this node, re-using the result.
+ if (SDNode *Generic = DCI.DAG.getNodeIfExists(
+ GenericOpcode, DCI.DAG.getVTList(VT), {LHS, RHS}))
+ DCI.CombineTo(Generic, SDValue(N, 0));
+
return SDValue();
}
@@ -16801,27 +18132,46 @@ static SDValue performSetCCPunpkCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
-static SDValue performSetccMergeZeroCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue
+performSetccMergeZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
assert(N->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO &&
"Unexpected opcode!");
+ SelectionDAG &DAG = DCI.DAG;
SDValue Pred = N->getOperand(0);
SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);
ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(3))->get();
- // setcc_merge_zero pred (sign_extend (setcc_merge_zero ... pred ...)), 0, ne
- // => inner setcc_merge_zero
- if (Cond == ISD::SETNE && isZerosVector(RHS.getNode()) &&
- LHS->getOpcode() == ISD::SIGN_EXTEND &&
- LHS->getOperand(0)->getValueType(0) == N->getValueType(0) &&
- LHS->getOperand(0)->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO &&
- LHS->getOperand(0)->getOperand(0) == Pred)
- return LHS->getOperand(0);
-
if (SDValue V = performSetCCPunpkCombine(N, DAG))
return V;
+ if (Cond == ISD::SETNE && isZerosVector(RHS.getNode()) &&
+ LHS->getOpcode() == ISD::SIGN_EXTEND &&
+ LHS->getOperand(0)->getValueType(0) == N->getValueType(0)) {
+ // setcc_merge_zero(
+ // pred, extend(setcc_merge_zero(pred, ...)), != splat(0))
+ // => setcc_merge_zero(pred, ...)
+ if (LHS->getOperand(0)->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO &&
+ LHS->getOperand(0)->getOperand(0) == Pred)
+ return LHS->getOperand(0);
+
+ // setcc_merge_zero(
+ // all_active, extend(nxvNi1 ...), != splat(0))
+ // -> nxvNi1 ...
+ if (isAllActivePredicate(DAG, Pred))
+ return LHS->getOperand(0);
+
+ // setcc_merge_zero(
+ // pred, extend(nxvNi1 ...), != splat(0))
+ // -> nxvNi1 and(pred, ...)
+ if (DCI.isAfterLegalizeDAG())
+ // Do this after legalization to allow more folds on setcc_merge_zero
+ // to be recognized.
+ return DAG.getNode(ISD::AND, SDLoc(N), N->getValueType(0),
+ LHS->getOperand(0), Pred);
+ }
+
return SDValue();
}
@@ -16928,12 +18278,53 @@ static SDValue performTBZCombine(SDNode *N,
DAG.getConstant(Bit, DL, MVT::i64), N->getOperand(3));
}
+// Swap vselect operands where it may allow a predicated operation to achieve
+// the `sel`.
+//
+// (vselect (setcc ( condcode) (_) (_)) (a) (op (a) (b)))
+// => (vselect (setcc (!condcode) (_) (_)) (op (a) (b)) (a))
+static SDValue trySwapVSelectOperands(SDNode *N, SelectionDAG &DAG) {
+ auto SelectA = N->getOperand(1);
+ auto SelectB = N->getOperand(2);
+ auto NTy = N->getValueType(0);
+
+ if (!NTy.isScalableVector())
+ return SDValue();
+ SDValue SetCC = N->getOperand(0);
+ if (SetCC.getOpcode() != ISD::SETCC || !SetCC.hasOneUse())
+ return SDValue();
+
+ switch (SelectB.getOpcode()) {
+ default:
+ return SDValue();
+ case ISD::FMUL:
+ case ISD::FSUB:
+ case ISD::FADD:
+ break;
+ }
+ if (SelectA != SelectB.getOperand(0))
+ return SDValue();
+
+ ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
+ ISD::CondCode InverseCC =
+ ISD::getSetCCInverse(CC, SetCC.getOperand(0).getValueType());
+ auto InverseSetCC =
+ DAG.getSetCC(SDLoc(SetCC), SetCC.getValueType(), SetCC.getOperand(0),
+ SetCC.getOperand(1), InverseCC);
+
+ return DAG.getNode(ISD::VSELECT, SDLoc(N), NTy,
+ {InverseSetCC, SelectB, SelectA});
+}
+
// vselect (v1i1 setcc) ->
// vselect (v1iXX setcc) (XX is the size of the compared operand type)
// FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as
// condition. If it can legalize "VSELECT v1i1" correctly, no need to combine
// such VSELECT.
static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) {
+ if (auto SwapResult = trySwapVSelectOperands(N, DAG))
+ return SwapResult;
+
SDValue N0 = N->getOperand(0);
EVT CCVT = N0.getValueType();
@@ -17064,6 +18455,24 @@ static SDValue performSelectCombine(SDNode *N,
return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2));
}
+static SDValue performDUPCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ EVT VT = N->getValueType(0);
+ // If "v2i32 DUP(x)" and "v4i32 DUP(x)" both exist, use an extract from the
+ // 128bit vector version.
+ if (VT.is64BitVector() && DCI.isAfterLegalizeDAG()) {
+ EVT LVT = VT.getDoubleNumVectorElementsVT(*DCI.DAG.getContext());
+ if (SDNode *LN = DCI.DAG.getNodeIfExists(
+ N->getOpcode(), DCI.DAG.getVTList(LVT), {N->getOperand(0)})) {
+ SDLoc DL(N);
+ return DCI.DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SDValue(LN, 0),
+ DCI.DAG.getConstant(0, DL, MVT::i64));
+ }
+ }
+
+ return performPostLD1Combine(N, DCI, false);
+}
+
/// Get rid of unnecessary NVCASTs (that don't change the type).
static SDValue performNVCASTCombine(SDNode *N) {
if (N->getValueType(0) == N->getOperand(0).getValueType())
@@ -17104,13 +18513,14 @@ static SDValue performGlobalAddressCombine(SDNode *N, SelectionDAG &DAG,
// Check whether folding this offset is legal. It must not go out of bounds of
// the referenced object to avoid violating the code model, and must be
- // smaller than 2^21 because this is the largest offset expressible in all
- // object formats.
+ // smaller than 2^20 because this is the largest offset expressible in all
+ // object formats. (The IMAGE_REL_ARM64_PAGEBASE_REL21 relocation in COFF
+ // stores an immediate signed 21 bit offset.)
//
// This check also prevents us from folding negative offsets, which will end
// up being treated in the same way as large positive ones. They could also
// cause code model violations, and aren't really common enough to matter.
- if (Offset >= (1 << 21))
+ if (Offset >= (1 << 20))
return SDValue();
const GlobalValue *GV = GN->getGlobal();
@@ -17621,7 +19031,7 @@ performInsertVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
return performPostLD1Combine(N, DCI, true);
}
-SDValue performSVESpliceCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue performSVESpliceCombine(SDNode *N, SelectionDAG &DAG) {
EVT Ty = N->getValueType(0);
if (Ty.isInteger())
return SDValue();
@@ -17643,9 +19053,9 @@ SDValue performSVESpliceCombine(SDNode *N, SelectionDAG &DAG) {
return DAG.getBitcast(Ty, Trunc);
}
-SDValue performFPExtendCombine(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const AArch64Subtarget *Subtarget) {
+static SDValue performFPExtendCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const AArch64Subtarget *Subtarget) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -17675,6 +19085,31 @@ SDValue performFPExtendCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue performBSPExpandForSVE(SDNode *N, SelectionDAG &DAG,
+ const AArch64Subtarget *Subtarget,
+ bool fixedSVEVectorVT) {
+ EVT VT = N->getValueType(0);
+
+ // Don't expand for SVE2
+ if (!VT.isScalableVector() || Subtarget->hasSVE2() || Subtarget->hasSME())
+ return SDValue();
+
+ // Don't expand for NEON
+ if (VT.isFixedLengthVector() && !fixedSVEVectorVT)
+ return SDValue();
+
+ SDLoc DL(N);
+
+ SDValue Mask = N->getOperand(0);
+ SDValue In1 = N->getOperand(1);
+ SDValue In2 = N->getOperand(2);
+
+ SDValue InvMask = DAG.getNOT(DL, Mask, VT);
+ SDValue Sel = DAG.getNode(ISD::AND, DL, VT, Mask, In1);
+ SDValue SelInv = DAG.getNode(ISD::AND, DL, VT, InvMask, In2);
+ return DAG.getNode(ISD::OR, DL, VT, Sel, SelInv);
+}
+
SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -17685,6 +19120,22 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::ADD:
case ISD::SUB:
return performAddSubCombine(N, DCI, DAG);
+ case AArch64ISD::ANDS:
+ return performFlagSettingCombine(N, DCI, ISD::AND);
+ case AArch64ISD::ADC:
+ if (auto R = foldOverflowCheck(N, DAG, /* IsAdd */ true))
+ return R;
+ return foldADCToCINC(N, DAG);
+ case AArch64ISD::SBC:
+ return foldOverflowCheck(N, DAG, /* IsAdd */ false);
+ case AArch64ISD::ADCS:
+ if (auto R = foldOverflowCheck(N, DAG, /* IsAdd */ true))
+ return R;
+ return performFlagSettingCombine(N, DCI, AArch64ISD::ADC);
+ case AArch64ISD::SBCS:
+ if (auto R = foldOverflowCheck(N, DAG, /* IsAdd */ false))
+ return R;
+ return performFlagSettingCombine(N, DCI, AArch64ISD::SBC);
case ISD::XOR:
return performXorCombine(N, DAG, DCI, Subtarget);
case ISD::MUL:
@@ -17711,10 +19162,10 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performExtendCombine(N, DCI, DAG);
case ISD::SIGN_EXTEND_INREG:
return performSignExtendInRegCombine(N, DCI, DAG);
- case ISD::TRUNCATE:
- return performVectorTruncateCombine(N, DCI, DAG);
case ISD::CONCAT_VECTORS:
return performConcatVectorsCombine(N, DCI, DAG);
+ case ISD::EXTRACT_SUBVECTOR:
+ return performExtractSubvectorCombine(N, DCI, DAG);
case ISD::INSERT_SUBVECTOR:
return performInsertSubvectorCombine(N, DCI, DAG);
case ISD::SELECT:
@@ -17729,6 +19180,9 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
break;
case ISD::STORE:
return performSTORECombine(N, DCI, DAG, Subtarget);
+ case ISD::MGATHER:
+ case ISD::MSCATTER:
+ return performMaskedGatherScatterCombine(N, DCI, DAG);
case ISD::VECTOR_SPLICE:
return performSVESpliceCombine(N, DAG);
case ISD::FP_EXTEND:
@@ -17741,7 +19195,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
case AArch64ISD::CSEL:
return performCSELCombine(N, DCI, DAG);
case AArch64ISD::DUP:
- return performPostLD1Combine(N, DCI, false);
+ return performDUPCombine(N, DCI);
case AArch64ISD::NVCAST:
return performNVCASTCombine(N);
case AArch64ISD::SPLICE:
@@ -17752,7 +19206,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
case AArch64ISD::UZP1:
return performUzpCombine(N, DAG);
case AArch64ISD::SETCC_MERGE_ZERO:
- return performSetccMergeZeroCombine(N, DAG);
+ return performSetccMergeZeroCombine(N, DCI);
case AArch64ISD::GLD1_MERGE_ZERO:
case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
@@ -17773,12 +19227,20 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performVectorShiftCombine(N, *this, DCI);
case AArch64ISD::SUNPKLO:
return performSunpkloCombine(N, DAG);
+ case AArch64ISD::BSP:
+ return performBSPExpandForSVE(
+ N, DAG, Subtarget, useSVEForFixedLengthVectorVT(N->getValueType(0)));
case ISD::INSERT_VECTOR_ELT:
return performInsertVectorEltCombine(N, DCI);
case ISD::EXTRACT_VECTOR_ELT:
- return performExtractVectorEltCombine(N, DAG);
+ return performExtractVectorEltCombine(N, DCI, Subtarget);
case ISD::VECREDUCE_ADD:
return performVecReduceAddCombine(N, DCI.DAG, Subtarget);
+ case AArch64ISD::UADDV:
+ return performUADDVCombine(N, DAG);
+ case AArch64ISD::SMULL:
+ case AArch64ISD::UMULL:
+ return tryCombineLongOpWithDup(Intrinsic::not_intrinsic, N, DCI, DAG);
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN:
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
@@ -18152,6 +19614,15 @@ void AArch64TargetLowering::ReplaceBITCASTResults(
if (VT.isScalableVector() && !isTypeLegal(VT) && isTypeLegal(SrcVT)) {
assert(!VT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
"Expected fp->int bitcast!");
+
+ // Bitcasting between unpacked vector types of different element counts is
+ // not a NOP because the live elements are laid out differently.
+ // 01234567
+ // e.g. nxv2i32 = XX??XX??
+ // nxv4f16 = X?X?X?X?
+ if (VT.getVectorElementCount() != SrcVT.getVectorElementCount())
+ return;
+
SDValue CastResult = getSVESafeBitCast(getSVEContainerType(VT), Op, DAG);
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, CastResult));
return;
@@ -18169,6 +19640,53 @@ void AArch64TargetLowering::ReplaceBITCASTResults(
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Op));
}
+static void ReplaceAddWithADDP(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG,
+ const AArch64Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ if (!VT.is256BitVector() ||
+ (VT.getScalarType().isFloatingPoint() &&
+ !N->getFlags().hasAllowReassociation()) ||
+ (VT.getScalarType() == MVT::f16 && !Subtarget->hasFullFP16()))
+ return;
+
+ SDValue X = N->getOperand(0);
+ auto *Shuf = dyn_cast<ShuffleVectorSDNode>(N->getOperand(1));
+ if (!Shuf) {
+ Shuf = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0));
+ X = N->getOperand(1);
+ if (!Shuf)
+ return;
+ }
+
+ if (Shuf->getOperand(0) != X || !Shuf->getOperand(1)->isUndef())
+ return;
+
+ // Check the mask is 1,0,3,2,5,4,...
+ ArrayRef<int> Mask = Shuf->getMask();
+ for (int I = 0, E = Mask.size(); I < E; I++)
+ if (Mask[I] != (I % 2 == 0 ? I + 1 : I - 1))
+ return;
+
+ SDLoc DL(N);
+ auto LoHi = DAG.SplitVector(X, DL);
+ assert(LoHi.first.getValueType() == LoHi.second.getValueType());
+ SDValue Addp = DAG.getNode(AArch64ISD::ADDP, N, LoHi.first.getValueType(),
+ LoHi.first, LoHi.second);
+
+ // Shuffle the elements back into order.
+ SmallVector<int> NMask;
+ for (unsigned I = 0, E = VT.getVectorNumElements() / 2; I < E; I++) {
+ NMask.push_back(I);
+ NMask.push_back(I);
+ }
+ Results.push_back(
+ DAG.getVectorShuffle(VT, DL,
+ DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Addp,
+ DAG.getUNDEF(LoHi.first.getValueType())),
+ DAG.getUNDEF(VT), NMask));
+}
+
static void ReplaceReductionResults(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG, unsigned InterOp,
@@ -18346,6 +19864,10 @@ void AArch64TargetLowering::ReplaceNodeResults(
case ISD::VECREDUCE_UMIN:
Results.push_back(LowerVECREDUCE(SDValue(N, 0), DAG));
return;
+ case ISD::ADD:
+ case ISD::FADD:
+ ReplaceAddWithADDP(N, Results, DAG, Subtarget);
+ return;
case ISD::CTPOP:
if (SDValue Result = LowerCTPOP(SDValue(N, 0), DAG))
@@ -18406,8 +19928,10 @@ void AArch64TargetLowering::ReplaceNodeResults(
ReplaceExtractSubVectorResults(N, Results, DAG);
return;
case ISD::INSERT_SUBVECTOR:
- // Custom lowering has been requested for INSERT_SUBVECTOR -- but delegate
- // to common code for result type legalisation
+ case ISD::CONCAT_VECTORS:
+ // Custom lowering has been requested for INSERT_SUBVECTOR and
+ // CONCAT_VECTORS -- but delegate to common code for result type
+ // legalisation
return;
case ISD::INTRINSIC_WO_CHAIN: {
EVT VT = N->getValueType(0);
@@ -18485,11 +20009,11 @@ bool AArch64TargetLowering::isOpSuitableForLDPSTP(const Instruction *I) const {
if (auto LI = dyn_cast<LoadInst>(I))
return LI->getType()->getPrimitiveSizeInBits() == 128 &&
- LI->getAlignment() >= 16;
+ LI->getAlign() >= Align(16);
if (auto SI = dyn_cast<StoreInst>(I))
return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
- SI->getAlignment() >= 16;
+ SI->getAlign() >= Align(16);
return false;
}
@@ -18502,12 +20026,12 @@ bool AArch64TargetLowering::shouldInsertFencesForAtomic(
// Loads and stores less than 128-bits are already atomic; ones above that
// are doomed anyway, so defer to the default libcall and blame the OS when
// things go wrong.
-bool AArch64TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
+TargetLoweringBase::AtomicExpansionKind
+AArch64TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
- if (Size != 128)
- return false;
-
- return !isOpSuitableForLDPSTP(SI);
+ if (Size != 128 || isOpSuitableForLDPSTP(SI))
+ return AtomicExpansionKind::None;
+ return AtomicExpansionKind::Expand;
}
// Loads and stores less than 128-bits are already atomic; ones above that
@@ -18627,7 +20151,10 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilderBase &Builder,
const DataLayout &DL = M->getDataLayout();
IntegerType *IntEltTy = Builder.getIntNTy(DL.getTypeSizeInBits(ValueTy));
- Value *Trunc = Builder.CreateTrunc(Builder.CreateCall(Ldxr, Addr), IntEltTy);
+ CallInst *CI = Builder.CreateCall(Ldxr, Addr);
+ CI->addParamAttr(
+ 0, Attribute::get(Builder.getContext(), Attribute::ElementType, ValueTy));
+ Value *Trunc = Builder.CreateTrunc(CI, IntEltTy);
return Builder.CreateBitCast(Trunc, ValueTy);
}
@@ -18668,10 +20195,13 @@ Value *AArch64TargetLowering::emitStoreConditional(IRBuilderBase &Builder,
IntegerType *IntValTy = Builder.getIntNTy(DL.getTypeSizeInBits(Val->getType()));
Val = Builder.CreateBitCast(Val, IntValTy);
- return Builder.CreateCall(Stxr,
- {Builder.CreateZExtOrBitCast(
- Val, Stxr->getFunctionType()->getParamType(0)),
- Addr});
+ CallInst *CI = Builder.CreateCall(
+ Stxr, {Builder.CreateZExtOrBitCast(
+ Val, Stxr->getFunctionType()->getParamType(0)),
+ Addr});
+ CI->addParamAttr(1, Attribute::get(Builder.getContext(),
+ Attribute::ElementType, Val->getType()));
+ return CI;
}
bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters(
@@ -18993,8 +20523,7 @@ static SDValue getPredicateForFixedLengthVector(SelectionDAG &DAG, SDLoc &DL,
// For vectors that are exactly getMaxSVEVectorSizeInBits big, we can use
// AArch64SVEPredPattern::all, which can enable the use of unpredicated
// variants of instructions when available.
- const auto &Subtarget =
- static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
+ const auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
unsigned MinSVESize = Subtarget.getMinSVEVectorSizeInBits();
unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits();
if (MaxSVESize && MinSVESize == MaxSVESize &&
@@ -19080,22 +20609,23 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorLoadToSVE(
MemVT = MemVT.changeTypeToInteger();
}
- auto NewLoad = DAG.getMaskedLoad(
+ SDValue NewLoad = DAG.getMaskedLoad(
LoadVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(), Pg,
DAG.getUNDEF(LoadVT), MemVT, Load->getMemOperand(),
Load->getAddressingMode(), Load->getExtensionType());
+ SDValue Result = NewLoad;
if (VT.isFloatingPoint() && Load->getExtensionType() == ISD::EXTLOAD) {
EVT ExtendVT = ContainerVT.changeVectorElementType(
Load->getMemoryVT().getVectorElementType());
- NewLoad = getSVESafeBitCast(ExtendVT, NewLoad, DAG);
- NewLoad = DAG.getNode(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU, DL, ContainerVT,
- Pg, NewLoad, DAG.getUNDEF(ContainerVT));
+ Result = getSVESafeBitCast(ExtendVT, Result, DAG);
+ Result = DAG.getNode(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU, DL, ContainerVT,
+ Pg, Result, DAG.getUNDEF(ContainerVT));
}
- auto Result = convertFromScalableVector(DAG, VT, NewLoad);
- SDValue MergedValues[2] = {Result, Load->getChain()};
+ Result = convertFromScalableVector(DAG, VT, Result);
+ SDValue MergedValues[2] = {Result, NewLoad.getValue(1)};
return DAG.getMergeValues(MergedValues, DL);
}
@@ -19143,19 +20673,20 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorMLoadToSVE(
IsPassThruZeroOrUndef = true;
}
- auto NewLoad = DAG.getMaskedLoad(
+ SDValue NewLoad = DAG.getMaskedLoad(
ContainerVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(),
Mask, PassThru, Load->getMemoryVT(), Load->getMemOperand(),
Load->getAddressingMode(), Load->getExtensionType());
+ SDValue Result = NewLoad;
if (!IsPassThruZeroOrUndef) {
SDValue OldPassThru =
convertToScalableVector(DAG, ContainerVT, Load->getPassThru());
- NewLoad = DAG.getSelect(DL, ContainerVT, Mask, NewLoad, OldPassThru);
+ Result = DAG.getSelect(DL, ContainerVT, Mask, Result, OldPassThru);
}
- auto Result = convertFromScalableVector(DAG, VT, NewLoad);
- SDValue MergedValues[2] = {Result, Load->getChain()};
+ Result = convertFromScalableVector(DAG, VT, Result);
+ SDValue MergedValues[2] = {Result, NewLoad.getValue(1)};
return DAG.getMergeValues(MergedValues, DL);
}
@@ -19232,7 +20763,7 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE(
// Scalable vector i32/i64 DIV is supported.
if (EltVT == MVT::i32 || EltVT == MVT::i64)
- return LowerToPredicatedOp(Op, DAG, PredOpcode, /*OverrideNEON=*/true);
+ return LowerToPredicatedOp(Op, DAG, PredOpcode);
// Scalable vector i8/i16 DIV is not supported. Promote it to i32.
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
@@ -19387,13 +20918,13 @@ SDValue AArch64TargetLowering::LowerFixedLengthInsertVectorElt(
// NOTE: The results for inactive lanes are undefined.
SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
SelectionDAG &DAG,
- unsigned NewOp,
- bool OverrideNEON) const {
+ unsigned NewOp) const {
EVT VT = Op.getValueType();
SDLoc DL(Op);
auto Pg = getPredicateForVector(DAG, DL, VT);
- if (useSVEForFixedLengthVectorVT(VT, OverrideNEON)) {
+ if (VT.isFixedLengthVector()) {
+ assert(isTypeLegal(VT) && "Expected only legal fixed-width types");
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
// Create list of operands by converting existing ones to scalable types.
@@ -19411,8 +20942,8 @@ SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
continue;
}
- assert(useSVEForFixedLengthVectorVT(V.getValueType(), OverrideNEON) &&
- "Only fixed length vectors are supported!");
+ assert(isTypeLegal(V.getValueType()) &&
+ "Expected only legal fixed-width types");
Operands.push_back(convertToScalableVector(DAG, ContainerVT, V));
}
@@ -19543,7 +21074,9 @@ SDValue AArch64TargetLowering::LowerReductionToSVE(unsigned Opcode,
SDValue VecOp = ScalarOp.getOperand(0);
EVT SrcVT = VecOp.getValueType();
- if (useSVEForFixedLengthVectorVT(SrcVT, true)) {
+ if (useSVEForFixedLengthVectorVT(
+ SrcVT,
+ /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors())) {
EVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT);
VecOp = convertToScalableVector(DAG, ContainerVT, VecOp);
}
@@ -19950,6 +21483,17 @@ SDValue AArch64TargetLowering::getSVESafeBitCast(EVT VT, SDValue Op,
EVT PackedVT = getPackedSVEVectorVT(VT.getVectorElementType());
EVT PackedInVT = getPackedSVEVectorVT(InVT.getVectorElementType());
+ // Safe bitcasting between unpacked vector types of different element counts
+ // is currently unsupported because the following is missing the necessary
+ // work to ensure the result's elements live where they're supposed to within
+ // an SVE register.
+ // 01234567
+ // e.g. nxv2i32 = XX??XX??
+ // nxv4f16 = X?X?X?X?
+ assert((VT.getVectorElementCount() == InVT.getVectorElementCount() ||
+ VT == PackedVT || InVT == PackedInVT) &&
+ "Unexpected bitcast!");
+
// Pack input if required.
if (InVT != PackedInVT)
Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, PackedInVT, Op);
@@ -20016,6 +21560,13 @@ bool AArch64TargetLowering::SimplifyDemandedBitsForTargetNode(
Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
}
+bool AArch64TargetLowering::isTargetCanonicalConstantNode(SDValue Op) const {
+ return Op.getOpcode() == AArch64ISD::DUP ||
+ (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ Op.getOperand(0).getOpcode() == AArch64ISD::DUP) ||
+ TargetLowering::isTargetCanonicalConstantNode(Op);
+}
+
bool AArch64TargetLowering::isConstantUnsignedBitfieldExtractLegal(
unsigned Opc, LLT Ty1, LLT Ty2) const {
return Ty1 == Ty2 && (Ty1 == LLT::scalar(32) || Ty1 == LLT::scalar(64));
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 2138c0ffe70a..06ea918ea32e 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -55,6 +55,8 @@ enum NodeType : unsigned {
// x29, x29` marker instruction.
CALL_RVMARKER,
+ CALL_BTI, // Function call followed by a BTI instruction.
+
// Produces the full sequence of instructions for getting the thread pointer
// offset of a variable into X0, using the TLSDesc model.
TLSDESC_CALLSEQ,
@@ -79,7 +81,6 @@ enum NodeType : unsigned {
// Predicated instructions where inactive lanes produce undefined results.
ABDS_PRED,
ABDU_PRED,
- ADD_PRED,
FADD_PRED,
FDIV_PRED,
FMA_PRED,
@@ -98,7 +99,6 @@ enum NodeType : unsigned {
SMIN_PRED,
SRA_PRED,
SRL_PRED,
- SUB_PRED,
UDIV_PRED,
UMAX_PRED,
UMIN_PRED,
@@ -158,6 +158,7 @@ enum NodeType : unsigned {
DUPLANE16,
DUPLANE32,
DUPLANE64,
+ DUPLANE128,
// Vector immedate moves
MOVI,
@@ -232,15 +233,10 @@ enum NodeType : unsigned {
SADDV,
UADDV,
- // Vector halving addition
- SHADD,
- UHADD,
-
- // Vector rounding halving addition
- SRHADD,
- URHADD,
-
- // Unsigned Add Long Pairwise
+ // Add Pairwise of two vectors
+ ADDP,
+ // Add Long Pairwise
+ SADDLP,
UADDLP,
// udot/sdot instructions
@@ -411,6 +407,10 @@ enum NodeType : unsigned {
SSTNT1_PRED,
SSTNT1_INDEX_PRED,
+ // SME
+ RDSVL,
+ REVD_MERGE_PASSTHRU,
+
// Asserts that a function argument (i32) is zero-extended to i8 by
// the caller
ASSERT_ZEXT_BOOL,
@@ -462,23 +462,6 @@ enum NodeType : unsigned {
} // end namespace AArch64ISD
-namespace {
-
-// Any instruction that defines a 32-bit result zeros out the high half of the
-// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
-// be copying from a truncate. But any other 32-bit operation will zero-extend
-// up to 64 bits. AssertSext/AssertZext aren't saying anything about the upper
-// 32 bits, they're probably just qualifying a CopyFromReg.
-static inline bool isDef32(const SDNode &N) {
- unsigned Opc = N.getOpcode();
- return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
- Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
- Opc != ISD::AssertZext && Opc != ISD::AssertAlign &&
- Opc != ISD::FREEZE;
-}
-
-} // end anonymous namespace
-
namespace AArch64 {
/// Possible values of current rounding mode, which is specified in bits
/// 23:22 of FPCR.
@@ -501,6 +484,11 @@ public:
explicit AArch64TargetLowering(const TargetMachine &TM,
const AArch64Subtarget &STI);
+ /// Control the following reassociation of operands: (op (op x, c1), y) -> (op
+ /// (op x, y), c1) where N0 is (op x, c1) and N1 is y.
+ bool isReassocProfitable(SelectionDAG &DAG, SDValue N0,
+ SDValue N1) const override;
+
/// Selects the correct CCAssignFn for a given CallingConvention value.
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
@@ -573,6 +561,17 @@ public:
MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
MachineBasicBlock *BB) const;
+ MachineBasicBlock *EmitTileLoad(unsigned Opc, unsigned BaseReg,
+ MachineInstr &MI,
+ MachineBasicBlock *BB) const;
+ MachineBasicBlock *EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const;
+ MachineBasicBlock *EmitMopa(unsigned Opc, unsigned BaseReg, MachineInstr &MI,
+ MachineBasicBlock *BB) const;
+ MachineBasicBlock *EmitInsertVectorToTile(unsigned Opc, unsigned BaseReg,
+ MachineInstr &MI,
+ MachineBasicBlock *BB) const;
+ MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const;
+
MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *MBB) const override;
@@ -610,8 +609,8 @@ public:
bool isLegalAddImmediate(int64_t) const override;
bool isLegalICmpImmediate(int64_t) const override;
- bool isMulAddWithConstProfitable(const SDValue &AddNode,
- const SDValue &ConstNode) const override;
+ bool isMulAddWithConstProfitable(SDValue AddNode,
+ SDValue ConstNode) const override;
bool shouldConsiderGEPOffsetSplit() const override;
@@ -651,6 +650,10 @@ public:
bool isDesirableToCommuteWithShift(const SDNode *N,
CombineLevel Level) const override;
+ /// Return true if it is profitable to fold a pair of shifts into a mask.
+ bool shouldFoldConstantShiftPairToMask(const SDNode *N,
+ CombineLevel Level) const override;
+
/// Returns true if it is beneficial to convert a load of a constant
/// to just the constant itself.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
@@ -680,7 +683,8 @@ public:
TargetLoweringBase::AtomicExpansionKind
shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
- bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
+ TargetLoweringBase::AtomicExpansionKind
+ shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
TargetLoweringBase::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
@@ -898,11 +902,8 @@ private:
SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
- bool isEligibleForTailCallOptimization(
- SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const;
+ bool
+ isEligibleForTailCallOptimization(const CallLoweringInfo &CLI) const;
/// Finds the incoming stack arguments which overlap the given fixed stack
/// object and incorporates their load into the current chain. This prevents
@@ -980,8 +981,8 @@ private:
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, unsigned NewOp,
- bool OverrideNEON = false) const;
+ SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG,
+ unsigned NewOp) const;
SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
@@ -1052,6 +1053,8 @@ private:
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
SmallVectorImpl<SDNode *> &Created) const override;
+ SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
+ SmallVectorImpl<SDNode *> &Created) const override;
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
int &ExtraSteps, bool &UseOneConst,
bool Reciprocal) const override;
@@ -1093,7 +1096,7 @@ private:
}
bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override;
- bool shouldRemoveExtendFromGSIndex(EVT VT) const override;
+ bool shouldRemoveExtendFromGSIndex(EVT IndexVT, EVT DataVT) const override;
bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
@@ -1129,6 +1132,8 @@ private:
TargetLoweringOpt &TLO,
unsigned Depth) const override;
+ bool isTargetCanonicalConstantNode(SDValue Op) const override;
+
// Normally SVE is only used for byte size vectors that do not fit within a
// NEON vector. This changes when OverrideNEON is true, allowing SVE to be
// used for 64bit and 128bit vectors as well.
diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index b220929514f9..c477a44b13b2 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -27,22 +27,43 @@ def : Pat<(atomic_fence (timm), (timm)), (DMB (i32 0xb))>;
// supported, but when they're relaxed and anything can be used, all the
// standard modes would be valid and may give efficiency gains.
+// An atomic load operation that does not need either acquire or release
+// semantics.
+class relaxed_load<PatFrag base>
+ : PatFrag<(ops node:$ptr), (base node:$ptr)> {
+ let IsAtomic = 1;
+ let IsAtomicOrderingAcquireOrStronger = 0;
+}
+
// A atomic load operation that actually needs acquire semantics.
class acquiring_load<PatFrag base>
: PatFrag<(ops node:$ptr), (base node:$ptr)> {
let IsAtomic = 1;
- let IsAtomicOrderingAcquireOrStronger = 1;
+ let IsAtomicOrderingAcquire = 1;
}
-// An atomic load operation that does not need either acquire or release
-// semantics.
-class relaxed_load<PatFrag base>
+// An atomic load operation that needs sequential consistency.
+class seq_cst_load<PatFrag base>
: PatFrag<(ops node:$ptr), (base node:$ptr)> {
let IsAtomic = 1;
- let IsAtomicOrderingAcquireOrStronger = 0;
+ let IsAtomicOrderingSequentiallyConsistent = 1;
+}
+
+// RCPC extension, currently opt-in under a separate feature.
+let Predicates = [HasLDAPR] in {
+ // v8.3 Release Consistent Processor Consistent support, optional in v8.2.
+ // 8-bit loads
+ def : Pat<(acquiring_load<atomic_load_8> GPR64sp:$ptr), (LDAPRB GPR64sp:$ptr)>;
+ // 16-bit loads
+ def : Pat<(acquiring_load<atomic_load_16> GPR64sp:$ptr), (LDAPRH GPR64sp:$ptr)>;
+ // 32-bit loads
+ def : Pat<(acquiring_load<atomic_load_32> GPR64sp:$ptr), (LDAPRW GPR64sp:$ptr)>;
+ // 64-bit loads
+ def : Pat<(acquiring_load<atomic_load_64> GPR64sp:$ptr), (LDAPRX GPR64sp:$ptr)>;
}
// 8-bit loads
+def : Pat<(seq_cst_load<atomic_load_8> GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>;
def : Pat<(acquiring_load<atomic_load_8> GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>;
def : Pat<(relaxed_load<atomic_load_8> (ro_Windexed8 GPR64sp:$Rn, GPR32:$Rm,
ro_Wextend8:$offset)),
@@ -58,6 +79,7 @@ def : Pat<(relaxed_load<atomic_load_8>
(LDURBBi GPR64sp:$Rn, simm9:$offset)>;
// 16-bit loads
+def : Pat<(seq_cst_load<atomic_load_16> GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>;
def : Pat<(acquiring_load<atomic_load_16> GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>;
def : Pat<(relaxed_load<atomic_load_16> (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
ro_Wextend16:$extend)),
@@ -73,6 +95,7 @@ def : Pat<(relaxed_load<atomic_load_16>
(LDURHHi GPR64sp:$Rn, simm9:$offset)>;
// 32-bit loads
+def : Pat<(seq_cst_load<atomic_load_32> GPR64sp:$ptr), (LDARW GPR64sp:$ptr)>;
def : Pat<(acquiring_load<atomic_load_32> GPR64sp:$ptr), (LDARW GPR64sp:$ptr)>;
def : Pat<(relaxed_load<atomic_load_32> (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm,
ro_Wextend32:$extend)),
@@ -88,6 +111,7 @@ def : Pat<(relaxed_load<atomic_load_32>
(LDURWi GPR64sp:$Rn, simm9:$offset)>;
// 64-bit loads
+def : Pat<(seq_cst_load<atomic_load_64> GPR64sp:$ptr), (LDARX GPR64sp:$ptr)>;
def : Pat<(acquiring_load<atomic_load_64> GPR64sp:$ptr), (LDARX GPR64sp:$ptr)>;
def : Pat<(relaxed_load<atomic_load_64> (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
ro_Wextend64:$extend)),
@@ -490,7 +514,8 @@ def CMP_SWAP_64 : Pseudo<(outs GPR64:$Rd, GPR32:$scratch),
let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi,@earlyclobber $scratch",
mayLoad = 1, mayStore = 1 in {
-class cmp_swap_128 : Pseudo<(outs GPR64:$RdLo, GPR64:$RdHi, GPR32common:$scratch),
+class cmp_swap_128 : Pseudo<(outs GPR64common:$RdLo, GPR64common:$RdHi,
+ GPR32common:$scratch),
(ins GPR64:$addr, GPR64:$desiredLo, GPR64:$desiredHi,
GPR64:$newLo, GPR64:$newHi), []>,
Sched<[WriteAtomic]>;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 4c1e41b7efee..78bc1b8c6f02 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -109,15 +109,19 @@ class TriOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$MHS, node:$RHS), res>;
class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
class UnOpFrag<dag res> : PatFrag<(ops node:$LHS), res>;
-// Helper fragment for an extract of the high portion of a 128-bit vector.
+// Helper fragment for an extract of the high portion of a 128-bit vector. The
+// ComplexPattern match both extract_subvector and bitcast(extract_subvector(..)).
def extract_high_v16i8 :
- UnOpFrag<(extract_subvector (v16i8 node:$LHS), (i64 8))>;
+ ComplexPattern<v8i8, 1, "SelectExtractHigh", [extract_subvector, bitconvert]>;
def extract_high_v8i16 :
- UnOpFrag<(extract_subvector (v8i16 node:$LHS), (i64 4))>;
+ ComplexPattern<v4i16, 1, "SelectExtractHigh", [extract_subvector, bitconvert]>;
def extract_high_v4i32 :
- UnOpFrag<(extract_subvector (v4i32 node:$LHS), (i64 2))>;
-def extract_high_v2i64 :
- UnOpFrag<(extract_subvector (v2i64 node:$LHS), (i64 1))>;
+ ComplexPattern<v2i32, 1, "SelectExtractHigh", [extract_subvector, bitconvert]>;
+
+def extract_high_dup_v8i16 :
+ BinOpFrag<(extract_subvector (v8i16 (AArch64duplane16 (v8i16 node:$LHS), node:$RHS)), (i64 4))>;
+def extract_high_dup_v4i32 :
+ BinOpFrag<(extract_subvector (v4i32 (AArch64duplane32 (v4i32 node:$LHS), node:$RHS)), (i64 2))>;
//===----------------------------------------------------------------------===//
// Asm Operand Classes.
@@ -1178,6 +1182,13 @@ def fpimm32XForm : SDNodeXForm<fpimm, [{
return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
}]>;
+def fpimm32SIMDModImmType4XForm : SDNodeXForm<fpimm, [{
+ uint32_t enc = AArch64_AM::encodeAdvSIMDModImmType4(N->getValueAPF()
+ .bitcastToAPInt()
+ .getZExtValue());
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
+ }]>;
+
def fpimm64XForm : SDNodeXForm<fpimm, [{
APFloat InVal = N->getValueAPF();
uint32_t enc = AArch64_AM::getFP64Imm(InVal);
@@ -1199,6 +1210,13 @@ def fpimm32 : Operand<f32>,
let ParserMatchClass = FPImmOperand;
let PrintMethod = "printFPImmOperand";
}
+
+def fpimm32SIMDModImmType4 : FPImmLeaf<f32, [{
+ uint64_t Enc = Imm.bitcastToAPInt().getZExtValue();
+ return Enc != 0 && AArch64_AM::isAdvSIMDModImmType4(Enc << 32 | Enc);
+ }], fpimm32SIMDModImmType4XForm> {
+}
+
def fpimm64 : Operand<f64>,
FPImmLeaf<f64, [{
return AArch64_AM::getFP64Imm(Imm) != -1;
@@ -1234,6 +1252,9 @@ def gi_fpimm32 : GICustomOperandRenderer<"renderFPImm32">,
GISDNodeXFormEquiv<fpimm32XForm>;
def gi_fpimm64 : GICustomOperandRenderer<"renderFPImm64">,
GISDNodeXFormEquiv<fpimm64XForm>;
+def gi_fpimm32SIMDModImmType4 :
+ GICustomOperandRenderer<"renderFPImm32SIMDModImmType4">,
+ GISDNodeXFormEquiv<fpimm32SIMDModImmType4XForm>;
// Vector lane operands
class AsmVectorIndex<int Min, int Max, string NamePrefix=""> : AsmOperandClass {
@@ -1261,8 +1282,12 @@ def VectorIndexHOperand : AsmVectorIndex<0, 7>;
def VectorIndexSOperand : AsmVectorIndex<0, 3>;
def VectorIndexDOperand : AsmVectorIndex<0, 1>;
-defm VectorIndex0 : VectorIndex<i64, VectorIndex0Operand,
+let OperandNamespace = "AArch64" in {
+ let OperandType = "OPERAND_IMPLICIT_IMM_0" in {
+ defm VectorIndex0 : VectorIndex<i64, VectorIndex0Operand,
[{ return ((uint64_t)Imm) == 0; }]>;
+ }
+}
defm VectorIndex1 : VectorIndex<i64, VectorIndex1Operand,
[{ return ((uint64_t)Imm) == 1; }]>;
defm VectorIndexB : VectorIndex<i64, VectorIndexBOperand,
@@ -1312,6 +1337,8 @@ def sme_elm_idx0_0 : Operand<i64>, ImmLeaf<i64, [{
}]> {
let ParserMatchClass = Imm0_0Operand;
let PrintMethod = "printMatrixIndex";
+ let OperandNamespace = "AArch64";
+ let OperandType = "OPERAND_IMPLICIT_IMM_0";
}
def sme_elm_idx0_1 : Operand<i64>, ImmLeaf<i64, [{
return ((uint64_t)Imm) <= 1;
@@ -4512,8 +4539,9 @@ multiclass MemTagStore<bits<2> opc1, string insn> {
//---
let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in
-class ExceptionGeneration<bits<3> op1, bits<2> ll, string asm>
- : I<(outs), (ins timm32_0_65535:$imm), asm, "\t$imm", "", []>,
+class ExceptionGeneration<bits<3> op1, bits<2> ll, string asm,
+ list<dag> pattern = []>
+ : I<(outs), (ins timm32_0_65535:$imm), asm, "\t$imm", "", pattern>,
Sched<[WriteSys]> {
bits<16> imm;
let Inst{31-24} = 0b11010100;
@@ -4542,6 +4570,7 @@ let Predicates = [HasFPARMv8] in {
// Floating point to integer conversion
//---
+let mayRaiseFPException = 1 in
class BaseFPToIntegerUnscaled<bits<2> type, bits<2> rmode, bits<3> opcode,
RegisterClass srcType, RegisterClass dstType,
string asm, list<dag> pattern>
@@ -4561,7 +4590,7 @@ class BaseFPToIntegerUnscaled<bits<2> type, bits<2> rmode, bits<3> opcode,
let Inst{4-0} = Rd;
}
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1 in
class BaseFPToInteger<bits<2> type, bits<2> rmode, bits<3> opcode,
RegisterClass srcType, RegisterClass dstType,
Operand immType, string asm, list<dag> pattern>
@@ -4683,7 +4712,7 @@ multiclass FPToIntegerScaled<bits<2> rmode, bits<3> opcode, string asm,
// Integer to floating point conversion
//---
-let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
+let mayStore = 0, mayLoad = 0, hasSideEffects = 0, mayRaiseFPException = 1 in
class BaseIntegerToFP<bit isUnsigned,
RegisterClass srcType, RegisterClass dstType,
Operand immType, string asm, list<dag> pattern>
@@ -4701,6 +4730,7 @@ class BaseIntegerToFP<bit isUnsigned,
let Inst{4-0} = Rd;
}
+let mayRaiseFPException = 1 in
class BaseIntegerToFPUnscaled<bit isUnsigned,
RegisterClass srcType, RegisterClass dstType,
ValueType dvt, string asm, SDPatternOperator node>
@@ -4937,6 +4967,7 @@ multiclass UnscaledConversion<string asm> {
// Floating point conversion
//---
+let mayRaiseFPException = 1 in
class BaseFPConversion<bits<2> type, bits<2> opcode, RegisterClass dstType,
RegisterClass srcType, string asm, list<dag> pattern>
: I<(outs dstType:$Rd), (ins srcType:$Rn), asm, "\t$Rd, $Rn", "", pattern>,
@@ -4963,15 +4994,15 @@ multiclass FPConversion<string asm> {
// Half-precision to Double-precision
def DHr : BaseFPConversion<0b11, 0b01, FPR64, FPR16, asm,
- [(set FPR64:$Rd, (fpextend (f16 FPR16:$Rn)))]>;
+ [(set FPR64:$Rd, (any_fpextend (f16 FPR16:$Rn)))]>;
// Half-precision to Single-precision
def SHr : BaseFPConversion<0b11, 0b00, FPR32, FPR16, asm,
- [(set FPR32:$Rd, (fpextend (f16 FPR16:$Rn)))]>;
+ [(set FPR32:$Rd, (any_fpextend (f16 FPR16:$Rn)))]>;
// Single-precision to Double-precision
def DSr : BaseFPConversion<0b00, 0b01, FPR64, FPR32, asm,
- [(set FPR64:$Rd, (fpextend FPR32:$Rn))]>;
+ [(set FPR64:$Rd, (any_fpextend FPR32:$Rn))]>;
// Single-precision to Half-precision
def HSr : BaseFPConversion<0b00, 0b11, FPR16, FPR32, asm,
@@ -4999,8 +5030,9 @@ class BaseSingleOperandFPData<bits<6> opcode, RegisterClass regtype,
}
multiclass SingleOperandFPData<bits<4> opcode, string asm,
- SDPatternOperator node = null_frag> {
-
+ SDPatternOperator node = null_frag,
+ int fpexceptions = 1> {
+ let mayRaiseFPException = fpexceptions in {
def Hr : BaseSingleOperandFPData<{0b00,opcode}, FPR16, f16, asm, node> {
let Inst{23-22} = 0b11; // 16-bit size flag
let Predicates = [HasFullFP16];
@@ -5013,8 +5045,14 @@ multiclass SingleOperandFPData<bits<4> opcode, string asm,
def Dr : BaseSingleOperandFPData<{0b00,opcode}, FPR64, f64, asm, node> {
let Inst{23-22} = 0b01; // 64-bit size flag
}
+ }
}
+multiclass SingleOperandFPDataNoException<bits<4> opcode, string asm,
+ SDPatternOperator node = null_frag>
+ : SingleOperandFPData<opcode, asm, node, 0>;
+
+let mayRaiseFPException = 1 in
multiclass SingleOperandFPNo16<bits<6> opcode, string asm,
SDPatternOperator node = null_frag>{
@@ -5035,7 +5073,7 @@ multiclass FRIntNNT<bits<2> opcode, string asm, SDPatternOperator node = null_fr
// Two operand floating point data processing
//---
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1 in
class BaseTwoOperandFPData<bits<4> opcode, RegisterClass regtype,
string asm, list<dag> pat>
: I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm),
@@ -5075,7 +5113,8 @@ multiclass TwoOperandFPData<bits<4> opcode, string asm,
}
}
-multiclass TwoOperandFPDataNeg<bits<4> opcode, string asm, SDNode node> {
+multiclass TwoOperandFPDataNeg<bits<4> opcode, string asm,
+ SDPatternOperator node> {
def Hrr : BaseTwoOperandFPData<opcode, FPR16, asm,
[(set (f16 FPR16:$Rd), (fneg (node (f16 FPR16:$Rn), (f16 FPR16:$Rm))))]> {
let Inst{23-22} = 0b11; // 16-bit size flag
@@ -5098,6 +5137,7 @@ multiclass TwoOperandFPDataNeg<bits<4> opcode, string asm, SDNode node> {
// Three operand floating point data processing
//---
+let mayRaiseFPException = 1 in
class BaseThreeOperandFPData<bit isNegated, bit isSub,
RegisterClass regtype, string asm, list<dag> pat>
: I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, regtype: $Ra),
@@ -5142,7 +5182,7 @@ multiclass ThreeOperandFPData<bit isNegated, bit isSub,string asm,
// Floating point data comparisons
//---
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1 in
class BaseOneOperandFPComparison<bit signalAllNans,
RegisterClass regtype, string asm,
list<dag> pat>
@@ -5161,7 +5201,7 @@ class BaseOneOperandFPComparison<bit signalAllNans,
let PostEncoderMethod = "fixOneOperandFPComparison";
}
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1 in
class BaseTwoOperandFPComparison<bit signalAllNans, RegisterClass regtype,
string asm, list<dag> pat>
: I<(outs), (ins regtype:$Rn, regtype:$Rm), asm, "\t$Rn, $Rm", "", pat>,
@@ -5218,7 +5258,7 @@ multiclass FPComparison<bit signalAllNans, string asm,
// Floating point conditional comparisons
//---
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1 in
class BaseFPCondComparison<bit signalAllNans, RegisterClass regtype,
string mnemonic, list<dag> pat>
: I<(outs), (ins regtype:$Rn, regtype:$Rm, imm32_0_15:$nzcv, ccode:$cond),
@@ -5544,6 +5584,7 @@ multiclass SIMDThreeSameVectorB<bit U, bits<5> opc, string asm,
}
// As above, but only floating point elements supported.
+let mayRaiseFPException = 1 in
multiclass SIMDThreeSameVectorFP<bit U, bit S, bits<3> opc,
string asm, SDPatternOperator OpNode> {
let Predicates = [HasNEON, HasFullFP16] in {
@@ -5565,6 +5606,7 @@ multiclass SIMDThreeSameVectorFP<bit U, bit S, bits<3> opc,
[(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>;
}
+let mayRaiseFPException = 1 in
multiclass SIMDThreeSameVectorFPCmp<bit U, bit S, bits<3> opc,
string asm,
SDPatternOperator OpNode> {
@@ -5587,6 +5629,7 @@ multiclass SIMDThreeSameVectorFPCmp<bit U, bit S, bits<3> opc,
[(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>;
}
+let mayRaiseFPException = 1 in
multiclass SIMDThreeSameVectorFPTied<bit U, bit S, bits<3> opc,
string asm, SDPatternOperator OpNode> {
let Predicates = [HasNEON, HasFullFP16] in {
@@ -5614,6 +5657,7 @@ multiclass SIMDThreeSameVectorFPTied<bit U, bit S, bits<3> opc,
}
// As above, but D and B sized elements unsupported.
+let mayRaiseFPException = 1 in
multiclass SIMDThreeSameVectorHS<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode> {
def v4i16 : BaseSIMDThreeSameVector<0, U, 0b011, opc, V64,
@@ -5718,6 +5762,7 @@ multiclass SIMDThreeSameVectorDot<bit U, bit Mixed, string asm, SDPatternOperato
// ARMv8.2-A Fused Multiply Add-Long Instructions (Vector): These instructions
// select inputs from 4H vectors and accumulate outputs to a 2S vector (or from
// 8H to 4S, when Q=1).
+let mayRaiseFPException = 1 in
class BaseSIMDThreeSameVectorFML<bit Q, bit U, bit b13, bits<3> size, string asm, string kind1,
string kind2, RegisterOperand RegType,
ValueType AccumType, ValueType InputType,
@@ -5986,7 +6031,9 @@ multiclass SIMDTwoVectorBH<bit U, bits<5> opc, string asm,
// Supports H, S and D element sizes, uses high bit of the size field
// as an extra opcode bit.
multiclass SIMDTwoVectorFP<bit U, bit S, bits<5> opc, string asm,
- SDPatternOperator OpNode> {
+ SDPatternOperator OpNode,
+ int fpexceptions = 1> {
+ let mayRaiseFPException = fpexceptions in {
let Predicates = [HasNEON, HasFullFP16] in {
def v4f16 : BaseSIMDTwoSameVector<0, U, {S,1}, opc, 0b11, V64,
asm, ".4h", ".4h",
@@ -6004,9 +6051,15 @@ multiclass SIMDTwoVectorFP<bit U, bit S, bits<5> opc, string asm,
def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b00, V128,
asm, ".2d", ".2d",
[(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>;
+ }
}
+multiclass SIMDTwoVectorFPNoException<bit U, bit S, bits<5> opc, string asm,
+ SDPatternOperator OpNode>
+ : SIMDTwoVectorFP<U, S, opc, asm, OpNode, 0>;
+
// Supports only S and D element sizes
+let mayRaiseFPException = 1 in
multiclass SIMDTwoVectorSD<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode = null_frag> {
@@ -6036,7 +6089,7 @@ multiclass SIMDTwoVectorS<bit U, bit S, bits<5> opc, string asm,
[(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
}
-
+let mayRaiseFPException = 1 in
multiclass SIMDTwoVectorFPToInt<bit U, bit S, bits<5> opc, string asm,
SDPatternOperator OpNode> {
let Predicates = [HasNEON, HasFullFP16] in {
@@ -6058,6 +6111,7 @@ multiclass SIMDTwoVectorFPToInt<bit U, bit S, bits<5> opc, string asm,
[(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>;
}
+let mayRaiseFPException = 1 in
multiclass SIMDTwoVectorIntToFP<bit U, bit S, bits<5> opc, string asm,
SDPatternOperator OpNode> {
let Predicates = [HasNEON, HasFullFP16] in {
@@ -6209,6 +6263,7 @@ multiclass SIMDCmpTwoVector<bit U, bits<5> opc, string asm,
multiclass SIMDFPCmpTwoVector<bit U, bit S, bits<5> opc,
string asm, SDNode OpNode> {
+ let mayRaiseFPException = 1 in {
let Predicates = [HasNEON, HasFullFP16] in {
def v4i16rz : BaseSIMDCmpTwoVector<0, U, {S,1}, 0b11, opc, V64,
asm, ".4h", "0.0",
@@ -6226,6 +6281,7 @@ multiclass SIMDFPCmpTwoVector<bit U, bit S, bits<5> opc,
def v2i64rz : BaseSIMDCmpTwoVector<1, U, {S,1}, 0b00, opc, V128,
asm, ".2d", "0.0",
v2i64, v2f64, OpNode>;
+ }
let Predicates = [HasNEON, HasFullFP16] in {
def : InstAlias<asm # "\t$Vd.4h, $Vn.4h, #0",
@@ -6253,7 +6309,7 @@ multiclass SIMDFPCmpTwoVector<bit U, bit S, bits<5> opc,
(!cast<Instruction>(NAME # v2i64rz) V128:$Vd, V128:$Vn), 0>;
}
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1 in
class BaseSIMDFPCvtTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode,
RegisterOperand outtype, RegisterOperand intype,
string asm, string VdTy, string VnTy,
@@ -6275,7 +6331,7 @@ class BaseSIMDFPCvtTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode,
let Inst{4-0} = Rd;
}
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1 in
class BaseSIMDFPCvtTwoVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
RegisterOperand outtype, RegisterOperand intype,
string asm, string VdTy, string VnTy,
@@ -6457,8 +6513,8 @@ multiclass SIMDDifferentThreeVectorBD<bit U, bits<4> opc, string asm,
asm#"2", ".1q", ".2d", ".2d", []>;
}
- def : Pat<(v8i16 (IntOp (v8i8 (extract_high_v16i8 V128:$Rn)),
- (v8i8 (extract_high_v16i8 V128:$Rm)))),
+ def : Pat<(v8i16 (IntOp (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn))),
+ (v8i8 (extract_high_v16i8 (v16i8 V128:$Rm))))),
(!cast<Instruction>(NAME#"v16i8") V128:$Rn, V128:$Rm)>;
}
@@ -6471,8 +6527,8 @@ multiclass SIMDLongThreeVectorHS<bit U, bits<4> opc, string asm,
def v8i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b011, opc,
V128, V128, V128,
asm#"2", ".4s", ".8h", ".8h",
- [(set (v4i32 V128:$Rd), (OpNode (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16 V128:$Rm)))]>;
+ [(set (v4i32 V128:$Rd), (OpNode (extract_high_v8i16 (v8i16 V128:$Rn)),
+ (extract_high_v8i16 (v8i16 V128:$Rm))))]>;
def v2i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b100, opc,
V128, V64, V64,
asm, ".2d", ".2s", ".2s",
@@ -6480,8 +6536,8 @@ multiclass SIMDLongThreeVectorHS<bit U, bits<4> opc, string asm,
def v4i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b101, opc,
V128, V128, V128,
asm#"2", ".2d", ".4s", ".4s",
- [(set (v2i64 V128:$Rd), (OpNode (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32 V128:$Rm)))]>;
+ [(set (v2i64 V128:$Rd), (OpNode (extract_high_v4i32 (v4i32 V128:$Rn)),
+ (extract_high_v4i32 (v4i32 V128:$Rm))))]>;
}
multiclass SIMDLongThreeVectorBHSabdl<bit U, bits<4> opc, string asm,
@@ -6495,8 +6551,8 @@ multiclass SIMDLongThreeVectorBHSabdl<bit U, bits<4> opc, string asm,
V128, V128, V128,
asm#"2", ".8h", ".16b", ".16b",
[(set (v8i16 V128:$Rd),
- (zext (v8i8 (OpNode (extract_high_v16i8 V128:$Rn),
- (extract_high_v16i8 V128:$Rm)))))]>;
+ (zext (v8i8 (OpNode (extract_high_v16i8 (v16i8 V128:$Rn)),
+ (extract_high_v16i8 (v16i8 V128:$Rm))))))]>;
def v4i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b010, opc,
V128, V64, V64,
asm, ".4s", ".4h", ".4h",
@@ -6506,8 +6562,8 @@ multiclass SIMDLongThreeVectorBHSabdl<bit U, bits<4> opc, string asm,
V128, V128, V128,
asm#"2", ".4s", ".8h", ".8h",
[(set (v4i32 V128:$Rd),
- (zext (v4i16 (OpNode (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16 V128:$Rm)))))]>;
+ (zext (v4i16 (OpNode (extract_high_v8i16 (v8i16 V128:$Rn)),
+ (extract_high_v8i16 (v8i16 V128:$Rm))))))]>;
def v2i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b100, opc,
V128, V64, V64,
asm, ".2d", ".2s", ".2s",
@@ -6517,8 +6573,8 @@ multiclass SIMDLongThreeVectorBHSabdl<bit U, bits<4> opc, string asm,
V128, V128, V128,
asm#"2", ".2d", ".4s", ".4s",
[(set (v2i64 V128:$Rd),
- (zext (v2i32 (OpNode (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32 V128:$Rm)))))]>;
+ (zext (v2i32 (OpNode (extract_high_v4i32 (v4i32 V128:$Rn)),
+ (extract_high_v4i32 (v4i32 V128:$Rm))))))]>;
}
multiclass SIMDLongThreeVectorTiedBHSabal<bit U, bits<4> opc,
@@ -6535,8 +6591,8 @@ multiclass SIMDLongThreeVectorTiedBHSabal<bit U, bits<4> opc,
asm#"2", ".8h", ".16b", ".16b",
[(set (v8i16 V128:$dst),
(add (v8i16 V128:$Rd),
- (zext (v8i8 (OpNode (extract_high_v16i8 V128:$Rn),
- (extract_high_v16i8 V128:$Rm))))))]>;
+ (zext (v8i8 (OpNode (extract_high_v16i8 (v16i8 V128:$Rn)),
+ (extract_high_v16i8 (v16i8 V128:$Rm)))))))]>;
def v4i16_v4i32 : BaseSIMDDifferentThreeVectorTied<U, 0b010, opc,
V128, V64, V64,
asm, ".4s", ".4h", ".4h",
@@ -6548,8 +6604,8 @@ multiclass SIMDLongThreeVectorTiedBHSabal<bit U, bits<4> opc,
asm#"2", ".4s", ".8h", ".8h",
[(set (v4i32 V128:$dst),
(add (v4i32 V128:$Rd),
- (zext (v4i16 (OpNode (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16 V128:$Rm))))))]>;
+ (zext (v4i16 (OpNode (extract_high_v8i16 (v8i16 V128:$Rn)),
+ (extract_high_v8i16 (v8i16 V128:$Rm)))))))]>;
def v2i32_v2i64 : BaseSIMDDifferentThreeVectorTied<U, 0b100, opc,
V128, V64, V64,
asm, ".2d", ".2s", ".2s",
@@ -6561,8 +6617,8 @@ multiclass SIMDLongThreeVectorTiedBHSabal<bit U, bits<4> opc,
asm#"2", ".2d", ".4s", ".4s",
[(set (v2i64 V128:$dst),
(add (v2i64 V128:$Rd),
- (zext (v2i32 (OpNode (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32 V128:$Rm))))))]>;
+ (zext (v2i32 (OpNode (extract_high_v4i32 (v4i32 V128:$Rn)),
+ (extract_high_v4i32 (v4i32 V128:$Rm)))))))]>;
}
multiclass SIMDLongThreeVectorBHS<bit U, bits<4> opc, string asm,
@@ -6574,8 +6630,8 @@ multiclass SIMDLongThreeVectorBHS<bit U, bits<4> opc, string asm,
def v16i8_v8i16 : BaseSIMDDifferentThreeVector<U, 0b001, opc,
V128, V128, V128,
asm#"2", ".8h", ".16b", ".16b",
- [(set (v8i16 V128:$Rd), (OpNode (extract_high_v16i8 V128:$Rn),
- (extract_high_v16i8 V128:$Rm)))]>;
+ [(set (v8i16 V128:$Rd), (OpNode (extract_high_v16i8 (v16i8 V128:$Rn)),
+ (extract_high_v16i8 (v16i8 V128:$Rm))))]>;
def v4i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b010, opc,
V128, V64, V64,
asm, ".4s", ".4h", ".4h",
@@ -6583,8 +6639,8 @@ multiclass SIMDLongThreeVectorBHS<bit U, bits<4> opc, string asm,
def v8i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b011, opc,
V128, V128, V128,
asm#"2", ".4s", ".8h", ".8h",
- [(set (v4i32 V128:$Rd), (OpNode (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16 V128:$Rm)))]>;
+ [(set (v4i32 V128:$Rd), (OpNode (extract_high_v8i16 (v8i16 V128:$Rn)),
+ (extract_high_v8i16 (v8i16 V128:$Rm))))]>;
def v2i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b100, opc,
V128, V64, V64,
asm, ".2d", ".2s", ".2s",
@@ -6592,8 +6648,8 @@ multiclass SIMDLongThreeVectorBHS<bit U, bits<4> opc, string asm,
def v4i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b101, opc,
V128, V128, V128,
asm#"2", ".2d", ".4s", ".4s",
- [(set (v2i64 V128:$Rd), (OpNode (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32 V128:$Rm)))]>;
+ [(set (v2i64 V128:$Rd), (OpNode (extract_high_v4i32 (v4i32 V128:$Rn)),
+ (extract_high_v4i32 (v4i32 V128:$Rm))))]>;
}
multiclass SIMDLongThreeVectorTiedBHS<bit U, bits<4> opc,
@@ -6609,8 +6665,8 @@ multiclass SIMDLongThreeVectorTiedBHS<bit U, bits<4> opc,
asm#"2", ".8h", ".16b", ".16b",
[(set (v8i16 V128:$dst),
(OpNode (v8i16 V128:$Rd),
- (extract_high_v16i8 V128:$Rn),
- (extract_high_v16i8 V128:$Rm)))]>;
+ (extract_high_v16i8 (v16i8 V128:$Rn)),
+ (extract_high_v16i8 (v16i8 V128:$Rm))))]>;
def v4i16_v4i32 : BaseSIMDDifferentThreeVectorTied<U, 0b010, opc,
V128, V64, V64,
asm, ".4s", ".4h", ".4h",
@@ -6621,8 +6677,8 @@ multiclass SIMDLongThreeVectorTiedBHS<bit U, bits<4> opc,
asm#"2", ".4s", ".8h", ".8h",
[(set (v4i32 V128:$dst),
(OpNode (v4i32 V128:$Rd),
- (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16 V128:$Rm)))]>;
+ (extract_high_v8i16 (v8i16 V128:$Rn)),
+ (extract_high_v8i16 (v8i16 V128:$Rm))))]>;
def v2i32_v2i64 : BaseSIMDDifferentThreeVectorTied<U, 0b100, opc,
V128, V64, V64,
asm, ".2d", ".2s", ".2s",
@@ -6633,8 +6689,8 @@ multiclass SIMDLongThreeVectorTiedBHS<bit U, bits<4> opc,
asm#"2", ".2d", ".4s", ".4s",
[(set (v2i64 V128:$dst),
(OpNode (v2i64 V128:$Rd),
- (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32 V128:$Rm)))]>;
+ (extract_high_v4i32 (v4i32 V128:$Rn)),
+ (extract_high_v4i32 (v4i32 V128:$Rm))))]>;
}
multiclass SIMDLongThreeVectorSQDMLXTiedHS<bit U, bits<4> opc, string asm,
@@ -6651,8 +6707,8 @@ multiclass SIMDLongThreeVectorSQDMLXTiedHS<bit U, bits<4> opc, string asm,
asm#"2", ".4s", ".8h", ".8h",
[(set (v4i32 V128:$dst),
(Accum (v4i32 V128:$Rd),
- (v4i32 (int_aarch64_neon_sqdmull (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16 V128:$Rm)))))]>;
+ (v4i32 (int_aarch64_neon_sqdmull (extract_high_v8i16 (v8i16 V128:$Rn)),
+ (extract_high_v8i16 (v8i16 V128:$Rm))))))]>;
def v2i32_v2i64 : BaseSIMDDifferentThreeVectorTied<U, 0b100, opc,
V128, V64, V64,
asm, ".2d", ".2s", ".2s",
@@ -6665,8 +6721,8 @@ multiclass SIMDLongThreeVectorSQDMLXTiedHS<bit U, bits<4> opc, string asm,
asm#"2", ".2d", ".4s", ".4s",
[(set (v2i64 V128:$dst),
(Accum (v2i64 V128:$Rd),
- (v2i64 (int_aarch64_neon_sqdmull (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32 V128:$Rm)))))]>;
+ (v2i64 (int_aarch64_neon_sqdmull (extract_high_v4i32 (v4i32 V128:$Rn)),
+ (extract_high_v4i32 (v4i32 V128:$Rm))))))]>;
}
multiclass SIMDWideThreeVectorBHS<bit U, bits<4> opc, string asm,
@@ -6679,7 +6735,7 @@ multiclass SIMDWideThreeVectorBHS<bit U, bits<4> opc, string asm,
V128, V128, V128,
asm#"2", ".8h", ".8h", ".16b",
[(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn),
- (extract_high_v16i8 V128:$Rm)))]>;
+ (extract_high_v16i8 (v16i8 V128:$Rm))))]>;
def v4i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b010, opc,
V128, V128, V64,
asm, ".4s", ".4s", ".4h",
@@ -6688,7 +6744,7 @@ multiclass SIMDWideThreeVectorBHS<bit U, bits<4> opc, string asm,
V128, V128, V128,
asm#"2", ".4s", ".4s", ".8h",
[(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn),
- (extract_high_v8i16 V128:$Rm)))]>;
+ (extract_high_v8i16 (v8i16 V128:$Rm))))]>;
def v2i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b100, opc,
V128, V128, V64,
asm, ".2d", ".2d", ".2s",
@@ -6697,7 +6753,7 @@ multiclass SIMDWideThreeVectorBHS<bit U, bits<4> opc, string asm,
V128, V128, V128,
asm#"2", ".2d", ".2d", ".4s",
[(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn),
- (extract_high_v4i32 V128:$Rm)))]>;
+ (extract_high_v4i32 (v4i32 V128:$Rm))))]>;
}
//----------------------------------------------------------------------------
@@ -6876,7 +6932,7 @@ multiclass SIMDThreeScalarHSTied<bit U, bit R, bits<5> opc, string asm> {
multiclass SIMDFPThreeScalar<bit U, bit S, bits<3> opc, string asm,
SDPatternOperator OpNode = null_frag,
Predicate pred = HasNEON> {
- let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
+ let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1 in {
let Predicates = [pred] in {
def NAME#64 : BaseSIMDThreeScalar<U, {S,0b11}, {0b11,opc}, FPR64, asm,
[(set (f64 FPR64:$Rd), (OpNode (f64 FPR64:$Rn), (f64 FPR64:$Rm)))]>;
@@ -6895,7 +6951,7 @@ multiclass SIMDFPThreeScalar<bit U, bit S, bits<3> opc, string asm,
multiclass SIMDThreeScalarFPCmp<bit U, bit S, bits<3> opc, string asm,
SDPatternOperator OpNode = null_frag> {
- let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
+ let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1 in {
def NAME#64 : BaseSIMDThreeScalar<U, {S,0b11}, {0b11,opc}, FPR64, asm,
[(set (i64 FPR64:$Rd), (OpNode (f64 FPR64:$Rn), (f64 FPR64:$Rm)))]>;
def NAME#32 : BaseSIMDThreeScalar<U, {S,0b01}, {0b11,opc}, FPR32, asm,
@@ -7025,6 +7081,7 @@ class BaseSIMDCmpTwoScalar<bit U, bits<2> size, bits<2> size2, bits<5> opcode,
let Inst{4-0} = Rd;
}
+let mayRaiseFPException = 1 in
class SIMDInexactCvtTwoScalar<bits<5> opcode, string asm>
: I<(outs FPR32:$Rd), (ins FPR64:$Rn), asm, "\t$Rd, $Rn", "",
[(set (f32 FPR32:$Rd), (int_aarch64_sisd_fcvtxn (f64 FPR64:$Rn)))]>,
@@ -7048,11 +7105,13 @@ multiclass SIMDCmpTwoScalarD<bit U, bits<5> opc, string asm,
multiclass SIMDFPCmpTwoScalar<bit U, bit S, bits<5> opc, string asm,
SDPatternOperator OpNode> {
+ let mayRaiseFPException = 1 in {
def v1i64rz : BaseSIMDCmpTwoScalar<U, {S,1}, 0b00, opc, FPR64, asm, "0.0">;
def v1i32rz : BaseSIMDCmpTwoScalar<U, {S,0}, 0b00, opc, FPR32, asm, "0.0">;
let Predicates = [HasNEON, HasFullFP16] in {
def v1i16rz : BaseSIMDCmpTwoScalar<U, {S,1}, 0b11, opc, FPR16, asm, "0.0">;
}
+ }
def : InstAlias<asm # "\t$Rd, $Rn, #0",
(!cast<Instruction>(NAME # v1i64rz) FPR64:$Rd, FPR64:$Rn), 0>;
@@ -7076,6 +7135,7 @@ multiclass SIMDTwoScalarD<bit U, bits<5> opc, string asm,
(!cast<Instruction>(NAME # "v1i64") FPR64:$Rn)>;
}
+let mayRaiseFPException = 1 in
multiclass SIMDFPTwoScalar<bit U, bit S, bits<5> opc, string asm,
Predicate pred = HasNEON> {
let Predicates = [pred] in {
@@ -7087,6 +7147,7 @@ multiclass SIMDFPTwoScalar<bit U, bit S, bits<5> opc, string asm,
}
}
+let mayRaiseFPException = 1 in
multiclass SIMDFPTwoScalarCVT<bit U, bit S, bits<5> opc, string asm,
SDPatternOperator OpNode> {
def v1i64 : BaseSIMDTwoScalar<U, {S,1}, 0b00, opc, FPR64, FPR64, asm,
@@ -7169,6 +7230,7 @@ multiclass SIMDPairwiseScalarD<bit U, bits<5> opc, string asm> {
asm, ".2d">;
}
+let mayRaiseFPException = 1 in
multiclass SIMDFPPairwiseScalar<bit S, bits<5> opc, string asm> {
let Predicates = [HasNEON, HasFullFP16] in {
def v2i16p : BaseSIMDPairwiseScalar<0, {S,0}, opc, FPR16Op, V64,
@@ -7232,6 +7294,7 @@ multiclass SIMDAcrossLanesHSD<bit U, bits<5> opcode, string asm> {
asm, ".4s", []>;
}
+let mayRaiseFPException = 1 in
multiclass SIMDFPAcrossLanes<bits<5> opcode, bit sz1, string asm,
Intrinsic intOp> {
let Predicates = [HasNEON, HasFullFP16] in {
@@ -7351,7 +7414,7 @@ class SIMDMovAlias<string asm, string size, Instruction inst,
multiclass SMov {
// SMOV with vector index of 0 are legal in Scalable Matrix Extension (SME)
// streaming mode.
- let Predicates = [HasNEONorStreamingSVE] in {
+ let Predicates = [HasNEONorSME] in {
def vi8to32_idx0 : SIMDSMov<0, ".b", GPR32, VectorIndex0> {
let Inst{20-16} = 0b00001;
}
@@ -7398,7 +7461,7 @@ multiclass SMov {
multiclass UMov {
// UMOV with vector index of 0 are legal in Scalable Matrix Extension (SME)
// streaming mode.
- let Predicates = [HasNEONorStreamingSVE] in {
+ let Predicates = [HasNEONorSME] in {
def vi8_idx0 : SIMDUMov<0, ".b", v16i8, GPR32, VectorIndex0> {
let Inst{20-16} = 0b00001;
}
@@ -8048,6 +8111,7 @@ multiclass SIMDThreeSameVectorBF16DotI<bit U, string asm> {
".2h", V128, v4f32, v8bf16>;
}
+let mayRaiseFPException = 1 in
class SIMDBF16MLAL<bit Q, string asm, SDPatternOperator OpNode>
: BaseSIMDThreeSameVectorTied<Q, 0b1, 0b110, 0b11111, V128, asm, ".4s",
[(set (v4f32 V128:$dst), (OpNode (v4f32 V128:$Rd),
@@ -8056,6 +8120,7 @@ class SIMDBF16MLAL<bit Q, string asm, SDPatternOperator OpNode>
let AsmString = !strconcat(asm, "{\t$Rd.4s, $Rn.8h, $Rm.8h}");
}
+let mayRaiseFPException = 1 in
class SIMDBF16MLALIndex<bit Q, string asm, SDPatternOperator OpNode>
: I<(outs V128:$dst),
(ins V128:$Rd, V128:$Rn, V128_lo:$Rm, VectorIndexH:$idx), asm,
@@ -8095,18 +8160,21 @@ class SIMDThreeSameVectorBF16MatrixMul<string asm>
", $Rm", ".8h", "}");
}
+let mayRaiseFPException = 1 in
class SIMD_BFCVTN
: BaseSIMDMixedTwoVector<0, 0, 0b10, 0b10110, V128, V128,
"bfcvtn", ".4h", ".4s",
[(set (v8bf16 V128:$Rd),
(int_aarch64_neon_bfcvtn (v4f32 V128:$Rn)))]>;
+let mayRaiseFPException = 1 in
class SIMD_BFCVTN2
: BaseSIMDMixedTwoVectorTied<1, 0, 0b10, 0b10110, V128, V128,
"bfcvtn2", ".8h", ".4s",
[(set (v8bf16 V128:$dst),
(int_aarch64_neon_bfcvtn2 (v8bf16 V128:$Rd), (v4f32 V128:$Rn)))]>;
+let mayRaiseFPException = 1 in
class BF16ToSinglePrecision<string asm>
: I<(outs FPR16:$Rd), (ins FPR32:$Rn), asm, "\t$Rd, $Rn", "",
[(set (bf16 FPR16:$Rd), (int_aarch64_neon_bfcvt (f32 FPR32:$Rn)))]>,
@@ -8160,6 +8228,7 @@ multiclass SIMDThreeSameVectorDotIndex<bit U, bit Mixed, bits<2> size, string as
}
// ARMv8.2-A Fused Multiply Add-Long Instructions (Indexed)
+let mayRaiseFPException = 1 in
class BaseSIMDThreeSameVectorFMLIndex<bit Q, bit U, bits<4> opc, string asm,
string dst_kind, string lhs_kind,
string rhs_kind, RegisterOperand RegType,
@@ -8187,6 +8256,7 @@ multiclass SIMDThreeSameVectorFMLIndex<bit U, bits<4> opc, string asm,
V128, v4f32, v8f16, OpNode>;
}
+let mayRaiseFPException = 1 in
multiclass SIMDFPIndexed<bit U, bits<4> opc, string asm,
SDPatternOperator OpNode> {
let Predicates = [HasNEON, HasFullFP16] in {
@@ -8369,6 +8439,7 @@ multiclass SIMDFPIndexedTiedPatterns<string INST, SDPatternOperator OpNode> {
V128:$Rm, VectorIndexD:$idx)>;
}
+let mayRaiseFPException = 1 in
multiclass SIMDFPIndexedTied<bit U, bits<4> opc, string asm> {
let Predicates = [HasNEON, HasFullFP16] in {
def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b00, opc, V64, V64,
@@ -8701,9 +8772,8 @@ multiclass SIMDIndexedLongSD<bit U, bits<4> opc, string asm,
V128_lo, VectorIndexH,
asm#"2", ".4s", ".4s", ".8h", ".h",
[(set (v4i32 V128:$Rd),
- (OpNode (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
- VectorIndexH:$idx))))]> {
+ (OpNode (extract_high_v8i16 (v8i16 V128:$Rn)),
+ (extract_high_dup_v8i16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx)))]> {
bits<3> idx;
let Inst{11} = idx{2};
@@ -8728,9 +8798,8 @@ multiclass SIMDIndexedLongSD<bit U, bits<4> opc, string asm,
V128, VectorIndexS,
asm#"2", ".2d", ".2d", ".4s", ".s",
[(set (v2i64 V128:$Rd),
- (OpNode (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32 (AArch64duplane32 (v4i32 V128:$Rm),
- VectorIndexS:$idx))))]> {
+ (OpNode (extract_high_v4i32 (v4i32 V128:$Rn)),
+ (extract_high_dup_v4i32 (v4i32 V128:$Rm), VectorIndexS:$idx)))]> {
bits<2> idx;
let Inst{11} = idx{1};
let Inst{21} = idx{0};
@@ -8793,10 +8862,8 @@ multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
[(set (v4i32 V128:$dst),
(Accum (v4i32 V128:$Rd),
(v4i32 (int_aarch64_neon_sqdmull
- (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16
- (AArch64duplane16 (v8i16 V128_lo:$Rm),
- VectorIndexH:$idx))))))]> {
+ (extract_high_v8i16 (v8i16 V128:$Rn)),
+ (extract_high_dup_v8i16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx)))))]> {
bits<3> idx;
let Inst{11} = idx{2};
let Inst{21} = idx{1};
@@ -8825,10 +8892,8 @@ multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
[(set (v2i64 V128:$dst),
(Accum (v2i64 V128:$Rd),
(v2i64 (int_aarch64_neon_sqdmull
- (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32
- (AArch64duplane32 (v4i32 V128:$Rm),
- VectorIndexS:$idx))))))]> {
+ (extract_high_v4i32 (v4i32 V128:$Rn)),
+ (extract_high_dup_v4i32 (v4i32 V128:$Rm), VectorIndexS:$idx)))))]> {
bits<2> idx;
let Inst{11} = idx{1};
let Inst{21} = idx{0};
@@ -8881,9 +8946,8 @@ multiclass SIMDVectorIndexedLongSD<bit U, bits<4> opc, string asm,
V128_lo, VectorIndexH,
asm#"2", ".4s", ".4s", ".8h", ".h",
[(set (v4i32 V128:$Rd),
- (OpNode (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
- VectorIndexH:$idx))))]> {
+ (OpNode (extract_high_v8i16 (v8i16 V128:$Rn)),
+ (extract_high_dup_v8i16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx)))]> {
bits<3> idx;
let Inst{11} = idx{2};
@@ -8908,9 +8972,8 @@ multiclass SIMDVectorIndexedLongSD<bit U, bits<4> opc, string asm,
V128, VectorIndexS,
asm#"2", ".2d", ".2d", ".4s", ".s",
[(set (v2i64 V128:$Rd),
- (OpNode (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32 (AArch64duplane32 (v4i32 V128:$Rm),
- VectorIndexS:$idx))))]> {
+ (OpNode (extract_high_v4i32 (v4i32 V128:$Rn)),
+ (extract_high_dup_v4i32 (v4i32 V128:$Rm), VectorIndexS:$idx)))]> {
bits<2> idx;
let Inst{11} = idx{1};
let Inst{21} = idx{0};
@@ -8940,9 +9003,8 @@ multiclass SIMDVectorIndexedLongSDTied<bit U, bits<4> opc, string asm,
asm#"2", ".4s", ".4s", ".8h", ".h",
[(set (v4i32 V128:$dst),
(OpNode (v4i32 V128:$Rd),
- (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
- VectorIndexH:$idx))))]> {
+ (extract_high_v8i16 (v8i16 V128:$Rn)),
+ (extract_high_dup_v8i16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx)))]> {
bits<3> idx;
let Inst{11} = idx{2};
let Inst{21} = idx{1};
@@ -8967,9 +9029,8 @@ multiclass SIMDVectorIndexedLongSDTied<bit U, bits<4> opc, string asm,
asm#"2", ".2d", ".2d", ".4s", ".s",
[(set (v2i64 V128:$dst),
(OpNode (v2i64 V128:$Rd),
- (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32 (AArch64duplane32 (v4i32 V128:$Rm),
- VectorIndexS:$idx))))]> {
+ (extract_high_v4i32 (v4i32 V128:$Rn)),
+ (extract_high_dup_v4i32 (v4i32 V128:$Rm), VectorIndexS:$idx)))]> {
bits<2> idx;
let Inst{11} = idx{1};
let Inst{21} = idx{0};
@@ -9654,7 +9715,7 @@ multiclass SIMDVectorLShiftLongBHSD<bit U, bits<5> opc, string asm,
V128, V128, vecshiftL8,
asm#"2", ".8h", ".16b",
[(set (v8i16 V128:$Rd),
- (OpNode (extract_high_v16i8 V128:$Rn), vecshiftL8:$imm))]> {
+ (OpNode (extract_high_v16i8 (v16i8 V128:$Rn)), vecshiftL8:$imm))]> {
bits<3> imm;
let Inst{18-16} = imm;
}
@@ -9670,7 +9731,7 @@ multiclass SIMDVectorLShiftLongBHSD<bit U, bits<5> opc, string asm,
V128, V128, vecshiftL16,
asm#"2", ".4s", ".8h",
[(set (v4i32 V128:$Rd),
- (OpNode (extract_high_v8i16 V128:$Rn), vecshiftL16:$imm))]> {
+ (OpNode (extract_high_v8i16 (v8i16 V128:$Rn)), vecshiftL16:$imm))]> {
bits<4> imm;
let Inst{19-16} = imm;
@@ -9687,7 +9748,7 @@ multiclass SIMDVectorLShiftLongBHSD<bit U, bits<5> opc, string asm,
V128, V128, vecshiftL32,
asm#"2", ".2d", ".4s",
[(set (v2i64 V128:$Rd),
- (OpNode (extract_high_v4i32 V128:$Rn), vecshiftL32:$imm))]> {
+ (OpNode (extract_high_v4i32 (v4i32 V128:$Rn)), vecshiftL32:$imm))]> {
bits<5> imm;
let Inst{20-16} = imm;
}
@@ -10671,7 +10732,7 @@ def complexrotateopodd : Operand<i32>, TImmLeaf<i32, [{ return Imm >= 0 && Imm <
let ParserMatchClass = ComplexRotationOperand<180, 90, "Odd">;
let PrintMethod = "printComplexRotationOp<180, 90>";
}
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1 in
class BaseSIMDThreeSameVectorComplex<bit Q, bit U, bits<2> size, bits<3> opcode,
RegisterOperand regtype, Operand rottype,
string asm, string kind, list<dag> pattern>
@@ -10742,7 +10803,7 @@ multiclass SIMDThreeSameVectorComplexHSD<bit U, bits<3> opcode, Operand rottype,
}
}
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1 in
class BaseSIMDThreeSameVectorTiedComplex<bit Q, bit U, bits<2> size,
bits<3> opcode,
RegisterOperand regtype,
@@ -10814,7 +10875,7 @@ multiclass SIMDThreeSameVectorTiedComplexHSD<bit U, bits<3> opcode,
}
}
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1 in
class BaseSIMDIndexedTiedComplex<bit Q, bit U, bit Scalar, bits<2> size,
bit opc1, bit opc2, RegisterOperand dst_reg,
RegisterOperand lhs_reg,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index a9191924129c..835a7b6cc81d 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -42,6 +42,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LEB128.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -1094,7 +1095,10 @@ bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr &MI,
return true;
default:;
}
- return isSEHInstruction(MI);
+ if (isSEHInstruction(MI))
+ return true;
+ auto Next = std::next(MI.getIterator());
+ return Next != MBB->end() && Next->isCFIInstruction();
}
/// analyzeCompare - For a comparison instruction, return the source registers
@@ -1435,7 +1439,7 @@ bool AArch64InstrInfo::optimizeCompareInstr(
return false;
const MCInstrDesc &MCID = get(NewOpc);
CmpInstr.setDesc(MCID);
- CmpInstr.RemoveOperand(DeadNZCVIdx);
+ CmpInstr.removeOperand(DeadNZCVIdx);
bool succeeded = UpdateOperandRegClass(CmpInstr);
(void)succeeded;
assert(succeeded && "Some operands reg class are incompatible!");
@@ -1547,27 +1551,6 @@ findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr &Instr) {
}
}
-namespace {
-
-struct UsedNZCV {
- bool N = false;
- bool Z = false;
- bool C = false;
- bool V = false;
-
- UsedNZCV() = default;
-
- UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
- this->N |= UsedFlags.N;
- this->Z |= UsedFlags.Z;
- this->C |= UsedFlags.C;
- this->V |= UsedFlags.V;
- return *this;
- }
-};
-
-} // end anonymous namespace
-
/// Find a condition code used by the instruction.
/// Returns AArch64CC::Invalid if either the instruction does not use condition
/// codes or we don't optimize CmpInstr in the presence of such instructions.
@@ -1622,15 +1605,15 @@ static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
return UsedFlags;
}
-/// \returns Conditions flags used after \p CmpInstr in its MachineBB if they
-/// are not containing C or V flags and NZCV flags are not alive in successors
-/// of the same \p CmpInstr and \p MI parent. \returns None otherwise.
+/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
+/// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
+/// \returns None otherwise.
///
/// Collect instructions using that flags in \p CCUseInstrs if provided.
-static Optional<UsedNZCV>
-examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr,
- const TargetRegisterInfo &TRI,
- SmallVectorImpl<MachineInstr *> *CCUseInstrs = nullptr) {
+Optional<UsedNZCV>
+llvm::examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr,
+ const TargetRegisterInfo &TRI,
+ SmallVectorImpl<MachineInstr *> *CCUseInstrs) {
MachineBasicBlock *CmpParent = CmpInstr.getParent();
if (MI.getParent() != CmpParent)
return None;
@@ -1652,8 +1635,6 @@ examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr,
if (Instr.modifiesRegister(AArch64::NZCV, &TRI))
break;
}
- if (NZCVUsedAfterCmp.C || NZCVUsedAfterCmp.V)
- return None;
return NZCVUsedAfterCmp;
}
@@ -1684,7 +1665,8 @@ static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr,
if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
return false;
- if (!examineCFlagsUse(MI, CmpInstr, TRI))
+ Optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
+ if (!NZVCUsed || NZVCUsed->C || NZVCUsed->V)
return false;
AccessKind AccessToCheck = AK_Write;
@@ -1773,7 +1755,7 @@ static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr,
examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
// Condition flags are not used in CmpInstr basic block successors and only
// Z or N flags allowed to be used after CmpInstr within its basic block
- if (!NZCVUsedAfterCmp)
+ if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V)
return false;
// Z or N flag used after CmpInstr must correspond to the flag used in MI
if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||
@@ -2270,6 +2252,19 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
case AArch64::LD1SW_D_IMM:
case AArch64::LD1D_IMM:
+ case AArch64::LD2B_IMM:
+ case AArch64::LD2H_IMM:
+ case AArch64::LD2W_IMM:
+ case AArch64::LD2D_IMM:
+ case AArch64::LD3B_IMM:
+ case AArch64::LD3H_IMM:
+ case AArch64::LD3W_IMM:
+ case AArch64::LD3D_IMM:
+ case AArch64::LD4B_IMM:
+ case AArch64::LD4H_IMM:
+ case AArch64::LD4W_IMM:
+ case AArch64::LD4D_IMM:
+
case AArch64::ST1B_IMM:
case AArch64::ST1B_H_IMM:
case AArch64::ST1B_S_IMM:
@@ -2281,6 +2276,19 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
case AArch64::ST1W_D_IMM:
case AArch64::ST1D_IMM:
+ case AArch64::ST2B_IMM:
+ case AArch64::ST2H_IMM:
+ case AArch64::ST2W_IMM:
+ case AArch64::ST2D_IMM:
+ case AArch64::ST3B_IMM:
+ case AArch64::ST3H_IMM:
+ case AArch64::ST3W_IMM:
+ case AArch64::ST3D_IMM:
+ case AArch64::ST4B_IMM:
+ case AArch64::ST4H_IMM:
+ case AArch64::ST4W_IMM:
+ case AArch64::ST4D_IMM:
+
case AArch64::LD1RB_IMM:
case AArch64::LD1RB_H_IMM:
case AArch64::LD1RB_S_IMM:
@@ -2897,6 +2905,45 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
MinOffset = -8;
MaxOffset = 7;
break;
+ case AArch64::LD2B_IMM:
+ case AArch64::LD2H_IMM:
+ case AArch64::LD2W_IMM:
+ case AArch64::LD2D_IMM:
+ case AArch64::ST2B_IMM:
+ case AArch64::ST2H_IMM:
+ case AArch64::ST2W_IMM:
+ case AArch64::ST2D_IMM:
+ Scale = TypeSize::Scalable(32);
+ Width = SVEMaxBytesPerVector * 2;
+ MinOffset = -8;
+ MaxOffset = 7;
+ break;
+ case AArch64::LD3B_IMM:
+ case AArch64::LD3H_IMM:
+ case AArch64::LD3W_IMM:
+ case AArch64::LD3D_IMM:
+ case AArch64::ST3B_IMM:
+ case AArch64::ST3H_IMM:
+ case AArch64::ST3W_IMM:
+ case AArch64::ST3D_IMM:
+ Scale = TypeSize::Scalable(48);
+ Width = SVEMaxBytesPerVector * 3;
+ MinOffset = -8;
+ MaxOffset = 7;
+ break;
+ case AArch64::LD4B_IMM:
+ case AArch64::LD4H_IMM:
+ case AArch64::LD4W_IMM:
+ case AArch64::LD4D_IMM:
+ case AArch64::ST4B_IMM:
+ case AArch64::ST4H_IMM:
+ case AArch64::ST4W_IMM:
+ case AArch64::ST4D_IMM:
+ Scale = TypeSize::Scalable(64);
+ Width = SVEMaxBytesPerVector * 4;
+ MinOffset = -8;
+ MaxOffset = 7;
+ break;
case AArch64::LD1B_H_IMM:
case AArch64::LD1SB_H_IMM:
case AArch64::LD1H_S_IMM:
@@ -3105,6 +3152,86 @@ bool AArch64InstrInfo::isPreLdSt(const MachineInstr &MI) {
return isPreLd(MI) || isPreSt(MI);
}
+bool AArch64InstrInfo::isPairedLdSt(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ case AArch64::LDPSi:
+ case AArch64::LDPSWi:
+ case AArch64::LDPDi:
+ case AArch64::LDPQi:
+ case AArch64::LDPWi:
+ case AArch64::LDPXi:
+ case AArch64::STPSi:
+ case AArch64::STPDi:
+ case AArch64::STPQi:
+ case AArch64::STPWi:
+ case AArch64::STPXi:
+ case AArch64::STGPi:
+ return true;
+ }
+}
+
+const MachineOperand &AArch64InstrInfo::getLdStBaseOp(const MachineInstr &MI) {
+ unsigned Idx =
+ AArch64InstrInfo::isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI) ? 2
+ : 1;
+ return MI.getOperand(Idx);
+}
+
+const MachineOperand &
+AArch64InstrInfo::getLdStOffsetOp(const MachineInstr &MI) {
+ unsigned Idx =
+ AArch64InstrInfo::isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI) ? 3
+ : 2;
+ return MI.getOperand(Idx);
+}
+
+static const TargetRegisterClass *getRegClass(const MachineInstr &MI,
+ Register Reg) {
+ if (MI.getParent() == nullptr)
+ return nullptr;
+ const MachineFunction *MF = MI.getParent()->getParent();
+ return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;
+}
+
+bool AArch64InstrInfo::isQForm(const MachineInstr &MI) {
+ auto IsQFPR = [&](const MachineOperand &Op) {
+ if (!Op.isReg())
+ return false;
+ auto Reg = Op.getReg();
+ if (Reg.isPhysical())
+ return AArch64::FPR128RegClass.contains(Reg);
+ const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
+ return TRC == &AArch64::FPR128RegClass ||
+ TRC == &AArch64::FPR128_loRegClass;
+ };
+ return llvm::any_of(MI.operands(), IsQFPR);
+}
+
+bool AArch64InstrInfo::isFpOrNEON(const MachineInstr &MI) {
+ auto IsFPR = [&](const MachineOperand &Op) {
+ if (!Op.isReg())
+ return false;
+ auto Reg = Op.getReg();
+ if (Reg.isPhysical())
+ return AArch64::FPR128RegClass.contains(Reg) ||
+ AArch64::FPR64RegClass.contains(Reg) ||
+ AArch64::FPR32RegClass.contains(Reg) ||
+ AArch64::FPR16RegClass.contains(Reg) ||
+ AArch64::FPR8RegClass.contains(Reg);
+
+ const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
+ return TRC == &AArch64::FPR128RegClass ||
+ TRC == &AArch64::FPR128_loRegClass ||
+ TRC == &AArch64::FPR64RegClass ||
+ TRC == &AArch64::FPR64_loRegClass ||
+ TRC == &AArch64::FPR32RegClass || TRC == &AArch64::FPR16RegClass ||
+ TRC == &AArch64::FPR8RegClass;
+ };
+ return llvm::any_of(MI.operands(), IsFPR);
+}
+
// Scale the unscaled offsets. Returns false if the unscaled offset can't be
// scaled.
static bool scaleOffset(unsigned Opc, int64_t &Offset) {
@@ -3370,7 +3497,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Copy a Predicate register by ORRing with itself.
if (AArch64::PPRRegClass.contains(DestReg) &&
AArch64::PPRRegClass.contains(SrcReg)) {
- assert(Subtarget.hasSVE() && "Unexpected SVE register.");
+ assert((Subtarget.hasSVE() || Subtarget.hasSME()) &&
+ "Unexpected SVE register.");
BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
.addReg(SrcReg) // Pg
.addReg(SrcReg)
@@ -3381,7 +3509,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Copy a Z register by ORRing with itself.
if (AArch64::ZPRRegClass.contains(DestReg) &&
AArch64::ZPRRegClass.contains(SrcReg)) {
- assert(Subtarget.hasSVE() && "Unexpected SVE register.");
+ assert((Subtarget.hasSVE() || Subtarget.hasSME()) &&
+ "Unexpected SVE register.");
BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
.addReg(SrcReg)
.addReg(SrcReg, getKillRegState(KillSrc));
@@ -3391,6 +3520,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Copy a Z register pair by copying the individual sub-registers.
if (AArch64::ZPR2RegClass.contains(DestReg) &&
AArch64::ZPR2RegClass.contains(SrcReg)) {
+ assert((Subtarget.hasSVE() || Subtarget.hasSME()) &&
+ "Unexpected SVE register.");
static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
Indices);
@@ -3400,6 +3531,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Copy a Z register triple by copying the individual sub-registers.
if (AArch64::ZPR3RegClass.contains(DestReg) &&
AArch64::ZPR3RegClass.contains(SrcReg)) {
+ assert((Subtarget.hasSVE() || Subtarget.hasSME()) &&
+ "Unexpected SVE register.");
static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
AArch64::zsub2};
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
@@ -3410,6 +3543,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Copy a Z register quad by copying the individual sub-registers.
if (AArch64::ZPR4RegClass.contains(DestReg) &&
AArch64::ZPR4RegClass.contains(SrcReg)) {
+ assert((Subtarget.hasSVE() || Subtarget.hasSME()) &&
+ "Unexpected SVE register.");
static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
AArch64::zsub2, AArch64::zsub3};
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
@@ -3979,6 +4114,119 @@ void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
}
}
+// Convenience function to create a DWARF expression for
+// Expr + NumBytes + NumVGScaledBytes * AArch64::VG
+static void appendVGScaledOffsetExpr(SmallVectorImpl<char> &Expr, int NumBytes,
+ int NumVGScaledBytes, unsigned VG,
+ llvm::raw_string_ostream &Comment) {
+ uint8_t buffer[16];
+
+ if (NumBytes) {
+ Expr.push_back(dwarf::DW_OP_consts);
+ Expr.append(buffer, buffer + encodeSLEB128(NumBytes, buffer));
+ Expr.push_back((uint8_t)dwarf::DW_OP_plus);
+ Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes);
+ }
+
+ if (NumVGScaledBytes) {
+ Expr.push_back((uint8_t)dwarf::DW_OP_consts);
+ Expr.append(buffer, buffer + encodeSLEB128(NumVGScaledBytes, buffer));
+
+ Expr.push_back((uint8_t)dwarf::DW_OP_bregx);
+ Expr.append(buffer, buffer + encodeULEB128(VG, buffer));
+ Expr.push_back(0);
+
+ Expr.push_back((uint8_t)dwarf::DW_OP_mul);
+ Expr.push_back((uint8_t)dwarf::DW_OP_plus);
+
+ Comment << (NumVGScaledBytes < 0 ? " - " : " + ")
+ << std::abs(NumVGScaledBytes) << " * VG";
+ }
+}
+
+// Creates an MCCFIInstruction:
+// { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr }
+static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI,
+ unsigned Reg,
+ const StackOffset &Offset) {
+ int64_t NumBytes, NumVGScaledBytes;
+ AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(Offset, NumBytes,
+ NumVGScaledBytes);
+ std::string CommentBuffer;
+ llvm::raw_string_ostream Comment(CommentBuffer);
+
+ if (Reg == AArch64::SP)
+ Comment << "sp";
+ else if (Reg == AArch64::FP)
+ Comment << "fp";
+ else
+ Comment << printReg(Reg, &TRI);
+
+ // Build up the expression (Reg + NumBytes + NumVGScaledBytes * AArch64::VG)
+ SmallString<64> Expr;
+ unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
+ Expr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
+ Expr.push_back(0);
+ appendVGScaledOffsetExpr(Expr, NumBytes, NumVGScaledBytes,
+ TRI.getDwarfRegNum(AArch64::VG, true), Comment);
+
+ // Wrap this into DW_CFA_def_cfa.
+ SmallString<64> DefCfaExpr;
+ DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
+ uint8_t buffer[16];
+ DefCfaExpr.append(buffer, buffer + encodeULEB128(Expr.size(), buffer));
+ DefCfaExpr.append(Expr.str());
+ return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(),
+ Comment.str());
+}
+
+MCCFIInstruction llvm::createDefCFA(const TargetRegisterInfo &TRI,
+ unsigned FrameReg, unsigned Reg,
+ const StackOffset &Offset,
+ bool LastAdjustmentWasScalable) {
+ if (Offset.getScalable())
+ return createDefCFAExpression(TRI, Reg, Offset);
+
+ if (FrameReg == Reg && !LastAdjustmentWasScalable)
+ return MCCFIInstruction::cfiDefCfaOffset(nullptr, int(Offset.getFixed()));
+
+ unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
+ return MCCFIInstruction::cfiDefCfa(nullptr, DwarfReg, (int)Offset.getFixed());
+}
+
+MCCFIInstruction llvm::createCFAOffset(const TargetRegisterInfo &TRI,
+ unsigned Reg,
+ const StackOffset &OffsetFromDefCFA) {
+ int64_t NumBytes, NumVGScaledBytes;
+ AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
+ OffsetFromDefCFA, NumBytes, NumVGScaledBytes);
+
+ unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
+
+ // Non-scalable offsets can use DW_CFA_offset directly.
+ if (!NumVGScaledBytes)
+ return MCCFIInstruction::createOffset(nullptr, DwarfReg, NumBytes);
+
+ std::string CommentBuffer;
+ llvm::raw_string_ostream Comment(CommentBuffer);
+ Comment << printReg(Reg, &TRI) << " @ cfa";
+
+ // Build up expression (NumBytes + NumVGScaledBytes * AArch64::VG)
+ SmallString<64> OffsetExpr;
+ appendVGScaledOffsetExpr(OffsetExpr, NumBytes, NumVGScaledBytes,
+ TRI.getDwarfRegNum(AArch64::VG, true), Comment);
+
+ // Wrap this into DW_CFA_expression
+ SmallString<64> CfaExpr;
+ CfaExpr.push_back(dwarf::DW_CFA_expression);
+ uint8_t buffer[16];
+ CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
+ CfaExpr.append(buffer, buffer + encodeULEB128(OffsetExpr.size(), buffer));
+ CfaExpr.append(OffsetExpr.str());
+
+ return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), Comment.str());
+}
+
// Helper function to emit a frame offset adjustment from a given
// pointer (SrcReg), stored into DestReg. This function is explicit
// in that it requires the opcode.
@@ -3988,7 +4236,8 @@ static void emitFrameOffsetAdj(MachineBasicBlock &MBB,
unsigned SrcReg, int64_t Offset, unsigned Opc,
const TargetInstrInfo *TII,
MachineInstr::MIFlag Flag, bool NeedsWinCFI,
- bool *HasWinCFI) {
+ bool *HasWinCFI, bool EmitCFAOffset,
+ StackOffset CFAOffset, unsigned FrameReg) {
int Sign = 1;
unsigned MaxEncoding, ShiftSize;
switch (Opc) {
@@ -4013,6 +4262,13 @@ static void emitFrameOffsetAdj(MachineBasicBlock &MBB,
llvm_unreachable("Unsupported opcode");
}
+ // `Offset` can be in bytes or in "scalable bytes".
+ int VScale = 1;
+ if (Opc == AArch64::ADDVL_XXI)
+ VScale = 16;
+ else if (Opc == AArch64::ADDPL_XXI)
+ VScale = 2;
+
// FIXME: If the offset won't fit in 24-bits, compute the offset into a
// scratch register. If DestReg is a virtual register, use it as the
// scratch register; otherwise, create a new virtual register (to be
@@ -4050,6 +4306,26 @@ static void emitFrameOffsetAdj(MachineBasicBlock &MBB,
AArch64_AM::getShifterImm(AArch64_AM::LSL, LocalShiftSize));
MBI = MBI.setMIFlag(Flag);
+ auto Change =
+ VScale == 1
+ ? StackOffset::getFixed(ThisVal << LocalShiftSize)
+ : StackOffset::getScalable(VScale * (ThisVal << LocalShiftSize));
+ if (Sign == -1 || Opc == AArch64::SUBXri || Opc == AArch64::SUBSXri)
+ CFAOffset += Change;
+ else
+ CFAOffset -= Change;
+ if (EmitCFAOffset && DestReg == TmpReg) {
+ MachineFunction &MF = *MBB.getParent();
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
+
+ unsigned CFIIndex = MF.addFrameInst(
+ createDefCFA(TRI, FrameReg, DestReg, CFAOffset, VScale != 1));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(Flag);
+ }
+
if (NeedsWinCFI) {
assert(Sign == 1 && "SEH directives should always have a positive sign");
int Imm = (int)(ThisVal << LocalShiftSize);
@@ -4086,7 +4362,9 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
unsigned DestReg, unsigned SrcReg,
StackOffset Offset, const TargetInstrInfo *TII,
MachineInstr::MIFlag Flag, bool SetNZCV,
- bool NeedsWinCFI, bool *HasWinCFI) {
+ bool NeedsWinCFI, bool *HasWinCFI,
+ bool EmitCFAOffset, StackOffset CFAOffset,
+ unsigned FrameReg) {
int64_t Bytes, NumPredicateVectors, NumDataVectors;
AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
Offset, Bytes, NumPredicateVectors, NumDataVectors);
@@ -4101,8 +4379,13 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri;
}
emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag,
- NeedsWinCFI, HasWinCFI);
+ NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
+ FrameReg);
+ CFAOffset += (Opc == AArch64::ADDXri || Opc == AArch64::ADDSXri)
+ ? StackOffset::getFixed(-Bytes)
+ : StackOffset::getFixed(Bytes);
SrcReg = DestReg;
+ FrameReg = DestReg;
}
assert(!(SetNZCV && (NumPredicateVectors || NumDataVectors)) &&
@@ -4112,14 +4395,17 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
if (NumDataVectors) {
emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors,
- AArch64::ADDVL_XXI, TII, Flag, NeedsWinCFI, nullptr);
+ AArch64::ADDVL_XXI, TII, Flag, NeedsWinCFI, nullptr,
+ EmitCFAOffset, CFAOffset, FrameReg);
+ CFAOffset += StackOffset::getScalable(-NumDataVectors * 16);
SrcReg = DestReg;
}
if (NumPredicateVectors) {
assert(DestReg != AArch64::SP && "Unaligned access to SP");
emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors,
- AArch64::ADDPL_XXI, TII, Flag, NeedsWinCFI, nullptr);
+ AArch64::ADDPL_XXI, TII, Flag, NeedsWinCFI, nullptr,
+ EmitCFAOffset, CFAOffset, FrameReg);
}
}
@@ -4151,6 +4437,9 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
return nullptr;
}
+ // Nothing can folded with copy from/to NZCV.
+ if (SrcReg == AArch64::NZCV || DstReg == AArch64::NZCV)
+ return nullptr;
}
// Handle the case where a copy is being spilled or filled but the source
@@ -4577,6 +4866,10 @@ static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
return false;
}
+ if (isCombineInstrSettingFlag(CombineOpc) &&
+ MI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
+ return false;
+
return true;
}
@@ -4919,6 +5212,10 @@ static bool getFMULPatterns(MachineInstr &Root,
MachineInstr *MI = nullptr;
if (MO.isReg() && Register::isVirtualRegister(MO.getReg()))
MI = MRI.getUniqueVRegDef(MO.getReg());
+ // Ignore No-op COPYs in FMUL(COPY(DUP(..)))
+ if (MI && MI->getOpcode() == TargetOpcode::COPY &&
+ MI->getOperand(1).getReg().isVirtual())
+ MI = MRI.getUniqueVRegDef(MI->getOperand(1).getReg());
if (MI && MI->getOpcode() == Opcode) {
Patterns.push_back(Pattern);
return true;
@@ -5073,6 +5370,42 @@ bool AArch64InstrInfo::isThroughputPattern(
} // end switch (Pattern)
return false;
}
+
+/// Find other MI combine patterns.
+static bool getMiscPatterns(MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern> &Patterns)
+{
+ // A - (B + C) ==> (A - B) - C or (A - C) - B
+ unsigned Opc = Root.getOpcode();
+ MachineBasicBlock &MBB = *Root.getParent();
+
+ switch (Opc) {
+ case AArch64::SUBWrr:
+ case AArch64::SUBSWrr:
+ case AArch64::SUBXrr:
+ case AArch64::SUBSXrr:
+ // Found candidate root.
+ break;
+ default:
+ return false;
+ }
+
+ if (isCombineInstrSettingFlag(Opc) &&
+ Root.findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
+ return false;
+
+ if (canCombine(MBB, Root.getOperand(2), AArch64::ADDWrr) ||
+ canCombine(MBB, Root.getOperand(2), AArch64::ADDSWrr) ||
+ canCombine(MBB, Root.getOperand(2), AArch64::ADDXrr) ||
+ canCombine(MBB, Root.getOperand(2), AArch64::ADDSXrr)) {
+ Patterns.push_back(MachineCombinerPattern::SUBADD_OP1);
+ Patterns.push_back(MachineCombinerPattern::SUBADD_OP2);
+ return true;
+ }
+
+ return false;
+}
+
/// Return true when there is potentially a faster code sequence for an
/// instruction chain ending in \p Root. All potential patterns are listed in
/// the \p Pattern vector. Pattern should be sorted in priority order since the
@@ -5090,6 +5423,10 @@ bool AArch64InstrInfo::getMachineCombinerPatterns(
if (getFMAPatterns(Root, Patterns))
return true;
+ // Other patterns
+ if (getMiscPatterns(Root, Patterns))
+ return true;
+
return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
DoRegPressureReduce);
}
@@ -5190,6 +5527,9 @@ genIndexedMultiply(MachineInstr &Root,
MachineInstr *Dup =
MF.getRegInfo().getUniqueVRegDef(Root.getOperand(IdxDupOp).getReg());
+ if (Dup->getOpcode() == TargetOpcode::COPY)
+ Dup = MRI.getUniqueVRegDef(Dup->getOperand(1).getReg());
+
Register DupSrcReg = Dup->getOperand(1).getReg();
MRI.clearKillFlags(DupSrcReg);
MRI.constrainRegClass(DupSrcReg, RC);
@@ -5337,6 +5677,53 @@ static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
return MUL;
}
+/// Do the following transformation
+/// A - (B + C) ==> (A - B) - C
+/// A - (B + C) ==> (A - C) - B
+static void
+genSubAdd2SubSub(MachineFunction &MF, MachineRegisterInfo &MRI,
+ const TargetInstrInfo *TII, MachineInstr &Root,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ unsigned IdxOpd1,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {
+ assert(IdxOpd1 == 1 || IdxOpd1 == 2);
+ unsigned IdxOtherOpd = IdxOpd1 == 1 ? 2 : 1;
+ MachineInstr *AddMI = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
+
+ Register ResultReg = Root.getOperand(0).getReg();
+ Register RegA = Root.getOperand(1).getReg();
+ bool RegAIsKill = Root.getOperand(1).isKill();
+ Register RegB = AddMI->getOperand(IdxOpd1).getReg();
+ bool RegBIsKill = AddMI->getOperand(IdxOpd1).isKill();
+ Register RegC = AddMI->getOperand(IdxOtherOpd).getReg();
+ bool RegCIsKill = AddMI->getOperand(IdxOtherOpd).isKill();
+ Register NewVR = MRI.createVirtualRegister(MRI.getRegClass(RegA));
+
+ unsigned Opcode = Root.getOpcode();
+ if (Opcode == AArch64::SUBSWrr)
+ Opcode = AArch64::SUBWrr;
+ else if (Opcode == AArch64::SUBSXrr)
+ Opcode = AArch64::SUBXrr;
+ else
+ assert((Opcode == AArch64::SUBWrr || Opcode == AArch64::SUBXrr) &&
+ "Unexpected instruction opcode.");
+
+ MachineInstrBuilder MIB1 =
+ BuildMI(MF, Root.getDebugLoc(), TII->get(Opcode), NewVR)
+ .addReg(RegA, getKillRegState(RegAIsKill))
+ .addReg(RegB, getKillRegState(RegBIsKill));
+ MachineInstrBuilder MIB2 =
+ BuildMI(MF, Root.getDebugLoc(), TII->get(Opcode), ResultReg)
+ .addReg(NewVR, getKillRegState(true))
+ .addReg(RegC, getKillRegState(RegCIsKill));
+
+ InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
+ InsInstrs.push_back(MIB1);
+ InsInstrs.push_back(MIB2);
+ DelInstrs.push_back(AddMI);
+}
+
/// When getMachineCombinerPatterns() finds potential patterns,
/// this function generates the instructions that could replace the
/// original code sequence
@@ -5359,6 +5746,18 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
DelInstrs, InstrIdxForVirtReg);
return;
+ case MachineCombinerPattern::SUBADD_OP1:
+ // A - (B + C)
+ // ==> (A - B) - C
+ genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 1,
+ InstrIdxForVirtReg);
+ break;
+ case MachineCombinerPattern::SUBADD_OP2:
+ // A - (B + C)
+ // ==> (A - C) - B
+ genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 2,
+ InstrIdxForVirtReg);
+ break;
case MachineCombinerPattern::MULADDW_OP1:
case MachineCombinerPattern::MULADDX_OP1:
// MUL I=A,B,0
@@ -6214,6 +6613,14 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
if (MUL)
DelInstrs.push_back(MUL);
DelInstrs.push_back(&Root);
+
+ // Set the flags on the inserted instructions to be the merged flags of the
+ // instructions that we have combined.
+ uint16_t Flags = Root.getFlags();
+ if (MUL)
+ Flags = Root.mergeFlagsWith(*MUL);
+ for (auto *MI : InsInstrs)
+ MI->setFlags(Flags);
}
/// Replace csincr-branch sequence by simple conditional branch
@@ -6526,13 +6933,12 @@ enum MachineOutlinerMBBFlags {
UnsafeRegsDead = 0x8
};
-unsigned
-AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
- assert(C.LRUWasSet && "LRU wasn't set?");
+Register
+AArch64InstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {
MachineFunction *MF = C.getMF();
- const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
- MF->getSubtarget().getRegisterInfo());
-
+ const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
+ const AArch64RegisterInfo *ARI =
+ static_cast<const AArch64RegisterInfo *>(&TRI);
// Check if there is an available register across the sequence that we can
// use.
for (unsigned Reg : AArch64::GPR64RegClass) {
@@ -6540,12 +6946,11 @@ AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
Reg != AArch64::LR && // LR is not reserved, but don't use it.
Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
Reg != AArch64::X17 && // Ditto for X17.
- C.LRU.available(Reg) && C.UsedInSequence.available(Reg))
+ C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
+ C.isAvailableInsideSeq(Reg, TRI))
return Reg;
}
-
- // No suitable register. Return 0.
- return 0u;
+ return Register();
}
static bool
@@ -6691,10 +7096,8 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
unsigned FlagsSetInAll = 0xF;
// Compute liveness information for each candidate, and set FlagsSetInAll.
- std::for_each(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
- [&FlagsSetInAll](outliner::Candidate &C) {
- FlagsSetInAll &= C.Flags;
- });
+ for (outliner::Candidate &C : RepeatedSequenceLocs)
+ FlagsSetInAll &= C.Flags;
// According to the AArch64 Procedure Call Standard, the following are
// undefined on entry/exit from a function call:
@@ -6712,10 +7115,8 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
// to compute liveness here.
if (C.Flags & UnsafeRegsDead)
return false;
- C.initLRU(TRI);
- LiveRegUnits LRU = C.LRU;
- return (!LRU.available(AArch64::W16) || !LRU.available(AArch64::W17) ||
- !LRU.available(AArch64::NZCV));
+ return C.isAnyUnavailableAcrossOrOutOfSeq(
+ {AArch64::W16, AArch64::W17, AArch64::NZCV}, TRI);
};
// Are there any candidates where those registers are live?
@@ -6752,12 +7153,10 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
// We check to see if CFI Instructions are present, and if they are
// we find the number of CFI Instructions in the candidates.
unsigned CFICount = 0;
- MachineBasicBlock::iterator MBBI = RepeatedSequenceLocs[0].front();
- for (unsigned Loc = RepeatedSequenceLocs[0].getStartIdx();
- Loc < RepeatedSequenceLocs[0].getEndIdx() + 1; Loc++) {
- if (MBBI->isCFIInstruction())
+ for (auto &I : make_range(RepeatedSequenceLocs[0].front(),
+ std::next(RepeatedSequenceLocs[0].back()))) {
+ if (I.isCFIInstruction())
CFICount++;
- MBBI++;
}
// We compare the number of found CFI Instructions to the number of CFI
@@ -6860,8 +7259,6 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
// Check if we have to save LR.
for (outliner::Candidate &C : RepeatedSequenceLocs) {
- C.initLRU(TRI);
-
// If we have a noreturn caller, then we're going to be conservative and
// say that we have to save LR. If we don't have a ret at the end of the
// block, then we can't reason about liveness accurately.
@@ -6872,7 +7269,7 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn);
// Is LR available? If so, we don't need a save.
- if (C.LRU.available(AArch64::LR) && !IsNoReturn) {
+ if (C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI) && !IsNoReturn) {
NumBytesNoStackCalls += 4;
C.setCallInfo(MachineOutlinerNoLRSave, 4);
CandidatesWithoutStackFixups.push_back(C);
@@ -6888,7 +7285,7 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
// Is SP used in the sequence at all? If not, we don't have to modify
// the stack, so we are guaranteed to get the same frame.
- else if (C.UsedInSequence.available(AArch64::SP)) {
+ else if (C.isAvailableInsideSeq(AArch64::SP, TRI)) {
NumBytesNoStackCalls += 12;
C.setCallInfo(MachineOutlinerDefault, 12);
CandidatesWithoutStackFixups.push_back(C);
@@ -6957,11 +7354,12 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
// LR to (ie one extra stack save/restore).
//
if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
- erase_if(RepeatedSequenceLocs, [this](outliner::Candidate &C) {
+ erase_if(RepeatedSequenceLocs, [this, &TRI](outliner::Candidate &C) {
return (std::any_of(
C.front(), std::next(C.back()),
[](const MachineInstr &MI) { return MI.isCall(); })) &&
- (!C.LRU.available(AArch64::LR) || !findRegisterToSaveLRTo(C));
+ (!C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI) ||
+ !findRegisterToSaveLRTo(C));
});
}
}
@@ -7032,7 +7430,7 @@ bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
// modify the stack. Check if hasRedZone is true or unknown; if yes, don't
// outline from it.
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
- if (!AFI || AFI->hasRedZone().getValueOr(true))
+ if (!AFI || AFI->hasRedZone().value_or(true))
return false;
// FIXME: Teach the outliner to generate/handle Windows unwind info.
@@ -7053,8 +7451,8 @@ bool AArch64InstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
"Suitable Machine Function for outlining must track liveness");
LiveRegUnits LRU(getRegisterInfo());
- std::for_each(MBB.rbegin(), MBB.rend(),
- [&LRU](MachineInstr &MI) { LRU.accumulate(MI); });
+ for (MachineInstr &MI : llvm::reverse(MBB))
+ LRU.accumulate(MI);
// Check if each of the unsafe registers are available...
bool W16AvailableInBlock = LRU.available(AArch64::W16);
@@ -7333,14 +7731,17 @@ static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB,
.addReg(AArch64::SP, RegState::InternalRead);
MI.setMIFlag(MachineInstr::FrameSetup);
- unsigned CFIIndex =
- MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
- BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex)
- .setMIFlags(MachineInstr::FrameSetup);
+ if (MF.getInfo<AArch64FunctionInfo>()->needsDwarfUnwindInfo()) {
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
+ BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
// If v8.3a features are available we can replace a RET instruction by
- // RETAA or RETAB and omit the AUT instructions
+ // RETAA or RETAB and omit the AUT instructions. In this case the
+ // DW_CFA_AARCH64_negate_ra_state can't be emitted.
if (Subtarget.hasPAuth() && MBBAUT != MBB.end() &&
MBBAUT->getOpcode() == AArch64::RET) {
BuildMI(MBB, MBBAUT, DL,
@@ -7353,6 +7754,11 @@ static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB,
TII->get(ShouldSignReturnAddrWithAKey ? AArch64::AUTIASP
: AArch64::AUTIBSP))
.setMIFlag(MachineInstr::FrameDestroy);
+ unsigned CFIIndexAuth =
+ MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
+ BuildMI(MBB, MBBAUT, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndexAuth)
+ .setMIFlags(MachineInstr::FrameDestroy);
}
}
}
@@ -7424,24 +7830,26 @@ void AArch64InstrInfo::buildOutlinedFrame(
.addImm(-16);
It = MBB.insert(It, STRXpre);
- const TargetSubtargetInfo &STI = MF.getSubtarget();
- const MCRegisterInfo *MRI = STI.getRegisterInfo();
- unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true);
-
- // Add a CFI saying the stack was moved 16 B down.
- int64_t StackPosEntry =
- MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 16));
- BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
- .addCFIIndex(StackPosEntry)
- .setMIFlags(MachineInstr::FrameSetup);
-
- // Add a CFI saying that the LR that we want to find is now 16 B higher than
- // before.
- int64_t LRPosEntry =
- MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, -16));
- BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
- .addCFIIndex(LRPosEntry)
- .setMIFlags(MachineInstr::FrameSetup);
+ if (MF.getInfo<AArch64FunctionInfo>()->needsDwarfUnwindInfo()) {
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ const MCRegisterInfo *MRI = STI.getRegisterInfo();
+ unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true);
+
+ // Add a CFI saying the stack was moved 16 B down.
+ int64_t StackPosEntry =
+ MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 16));
+ BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
+ .addCFIIndex(StackPosEntry)
+ .setMIFlags(MachineInstr::FrameSetup);
+
+ // Add a CFI saying that the LR that we want to find is now 16 B higher
+ // than before.
+ int64_t LRPosEntry = MF.addFrameInst(
+ MCCFIInstruction::createOffset(nullptr, DwarfReg, -16));
+ BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
+ .addCFIIndex(LRPosEntry)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
// Insert a restore before the terminator for the function.
MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
@@ -7495,7 +7903,7 @@ void AArch64InstrInfo::buildOutlinedFrame(
MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
- MachineFunction &MF, const outliner::Candidate &C) const {
+ MachineFunction &MF, outliner::Candidate &C) const {
// Are we tail calling?
if (C.CallConstructionID == MachineOutlinerTailCall) {
@@ -7526,8 +7934,8 @@ MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
if (C.CallConstructionID == MachineOutlinerRegSave) {
// FIXME: This logic should be sunk into a target-specific interface so that
// we don't have to recompute the register.
- unsigned Reg = findRegisterToSaveLRTo(C);
- assert(Reg != 0 && "No callee-saved register available?");
+ Register Reg = findRegisterToSaveLRTo(C);
+ assert(Reg && "No callee-saved register available?");
// LR has to be a live in so that we can save it.
if (!MBB.isLiveIn(AArch64::LR))
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 1054bea40e68..b7a6ac301cdc 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -103,6 +103,21 @@ public:
/// Returns whether the instruction is a pre-indexed load/store.
static bool isPreLdSt(const MachineInstr &MI);
+ /// Returns whether the instruction is a paired load/store.
+ static bool isPairedLdSt(const MachineInstr &MI);
+
+ /// Returns the base register operator of a load/store.
+ static const MachineOperand &getLdStBaseOp(const MachineInstr &MI);
+
+ /// Returns the the immediate offset operator of a load/store.
+ static const MachineOperand &getLdStOffsetOp(const MachineInstr &MI);
+
+ /// Returns whether the instruction is FP or NEON.
+ static bool isFpOrNEON(const MachineInstr &MI);
+
+ /// Returns whether the instruction is in Q form (128 bit operands)
+ static bool isQForm(const MachineInstr &MI);
+
/// Returns the index for the immediate for a given instruction.
static unsigned getLoadStoreImmIdx(unsigned Opc);
@@ -283,7 +298,7 @@ public:
MachineBasicBlock::iterator
insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
MachineBasicBlock::iterator &It, MachineFunction &MF,
- const outliner::Candidate &C) const override;
+ outliner::Candidate &C) const override;
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override;
/// Returns the vector element size (B, H, S or D) of an SVE opcode.
uint64_t getElementSizeForOpcode(unsigned Opc) const;
@@ -347,7 +362,7 @@ private:
/// Returns an unused general-purpose register which can be used for
/// constructing an outlined call if one exists. Returns 0 otherwise.
- unsigned findRegisterToSaveLRTo(const outliner::Candidate &C) const;
+ Register findRegisterToSaveLRTo(outliner::Candidate &C) const;
/// Remove a ptest of a predicate-generating operation that already sets, or
/// can be made to set, the condition codes in an identical manner
@@ -356,12 +371,45 @@ private:
const MachineRegisterInfo *MRI) const;
};
+struct UsedNZCV {
+ bool N = false;
+ bool Z = false;
+ bool C = false;
+ bool V = false;
+
+ UsedNZCV() = default;
+
+ UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
+ this->N |= UsedFlags.N;
+ this->Z |= UsedFlags.Z;
+ this->C |= UsedFlags.C;
+ this->V |= UsedFlags.V;
+ return *this;
+ }
+};
+
+/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
+/// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
+/// \returns None otherwise.
+///
+/// Collect instructions using that flags in \p CCUseInstrs if provided.
+Optional<UsedNZCV>
+examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr,
+ const TargetRegisterInfo &TRI,
+ SmallVectorImpl<MachineInstr *> *CCUseInstrs = nullptr);
+
/// Return true if there is an instruction /after/ \p DefMI and before \p UseMI
/// which either reads or clobbers NZCV.
bool isNZCVTouchedInInstructionRange(const MachineInstr &DefMI,
const MachineInstr &UseMI,
const TargetRegisterInfo *TRI);
+MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg,
+ unsigned Reg, const StackOffset &Offset,
+ bool LastAdjustmentWasScalable = true);
+MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg,
+ const StackOffset &OffsetFromDefCFA);
+
/// emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg
/// plus Offset. This is intended to be used from within the prolog/epilog
/// insertion (PEI) pass, where a virtual scratch register may be allocated
@@ -371,7 +419,9 @@ void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
StackOffset Offset, const TargetInstrInfo *TII,
MachineInstr::MIFlag = MachineInstr::NoFlags,
bool SetNZCV = false, bool NeedsWinCFI = false,
- bool *HasWinCFI = nullptr);
+ bool *HasWinCFI = nullptr, bool EmitCFAOffset = false,
+ StackOffset InitialOffset = {},
+ unsigned FrameReg = AArch64::SP);
/// rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the
/// FP. Return false if the offset could not be handled directly in MI, and
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 83bf89ff97c5..3802a45ad6c1 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -14,196 +14,196 @@
// ARM Instruction Predicate Definitions.
//
def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">,
- AssemblerPredicate<(all_of HasV8_1aOps), "armv8.1a">;
+ AssemblerPredicateWithAll<(all_of HasV8_1aOps), "armv8.1a">;
def HasV8_2a : Predicate<"Subtarget->hasV8_2aOps()">,
- AssemblerPredicate<(all_of HasV8_2aOps), "armv8.2a">;
+ AssemblerPredicateWithAll<(all_of HasV8_2aOps), "armv8.2a">;
def HasV8_3a : Predicate<"Subtarget->hasV8_3aOps()">,
- AssemblerPredicate<(all_of HasV8_3aOps), "armv8.3a">;
+ AssemblerPredicateWithAll<(all_of HasV8_3aOps), "armv8.3a">;
def HasV8_4a : Predicate<"Subtarget->hasV8_4aOps()">,
- AssemblerPredicate<(all_of HasV8_4aOps), "armv8.4a">;
+ AssemblerPredicateWithAll<(all_of HasV8_4aOps), "armv8.4a">;
def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">,
- AssemblerPredicate<(all_of HasV8_5aOps), "armv8.5a">;
+ AssemblerPredicateWithAll<(all_of HasV8_5aOps), "armv8.5a">;
def HasV8_6a : Predicate<"Subtarget->hasV8_6aOps()">,
- AssemblerPredicate<(all_of HasV8_6aOps), "armv8.6a">;
+ AssemblerPredicateWithAll<(all_of HasV8_6aOps), "armv8.6a">;
def HasV8_7a : Predicate<"Subtarget->hasV8_7aOps()">,
- AssemblerPredicate<(all_of HasV8_7aOps), "armv8.7a">;
+ AssemblerPredicateWithAll<(all_of HasV8_7aOps), "armv8.7a">;
def HasV9_0a : Predicate<"Subtarget->hasV9_0aOps()">,
- AssemblerPredicate<(all_of HasV9_0aOps), "armv9-a">;
+ AssemblerPredicateWithAll<(all_of HasV9_0aOps), "armv9-a">;
def HasV9_1a : Predicate<"Subtarget->hasV9_1aOps()">,
- AssemblerPredicate<(all_of HasV9_1aOps), "armv9.1a">;
+ AssemblerPredicateWithAll<(all_of HasV9_1aOps), "armv9.1a">;
def HasV9_2a : Predicate<"Subtarget->hasV9_2aOps()">,
- AssemblerPredicate<(all_of HasV9_2aOps), "armv9.2a">;
+ AssemblerPredicateWithAll<(all_of HasV9_2aOps), "armv9.2a">;
def HasV9_3a : Predicate<"Subtarget->hasV9_3aOps()">,
- AssemblerPredicate<(all_of HasV9_3aOps), "armv9.3a">;
+ AssemblerPredicateWithAll<(all_of HasV9_3aOps), "armv9.3a">;
def HasV8_0r : Predicate<"Subtarget->hasV8_0rOps()">,
- AssemblerPredicate<(all_of HasV8_0rOps), "armv8-r">;
+ AssemblerPredicateWithAll<(all_of HasV8_0rOps), "armv8-r">;
def HasEL2VMSA : Predicate<"Subtarget->hasEL2VMSA()">,
- AssemblerPredicate<(all_of FeatureEL2VMSA), "el2vmsa">;
+ AssemblerPredicateWithAll<(all_of FeatureEL2VMSA), "el2vmsa">;
def HasEL3 : Predicate<"Subtarget->hasEL3()">,
- AssemblerPredicate<(all_of FeatureEL3), "el3">;
+ AssemblerPredicateWithAll<(all_of FeatureEL3), "el3">;
def HasVH : Predicate<"Subtarget->hasVH()">,
- AssemblerPredicate<(all_of FeatureVH), "vh">;
+ AssemblerPredicateWithAll<(all_of FeatureVH), "vh">;
def HasLOR : Predicate<"Subtarget->hasLOR()">,
- AssemblerPredicate<(all_of FeatureLOR), "lor">;
+ AssemblerPredicateWithAll<(all_of FeatureLOR), "lor">;
def HasPAuth : Predicate<"Subtarget->hasPAuth()">,
- AssemblerPredicate<(all_of FeaturePAuth), "pauth">;
+ AssemblerPredicateWithAll<(all_of FeaturePAuth), "pauth">;
def HasJS : Predicate<"Subtarget->hasJS()">,
- AssemblerPredicate<(all_of FeatureJS), "jsconv">;
+ AssemblerPredicateWithAll<(all_of FeatureJS), "jsconv">;
def HasCCIDX : Predicate<"Subtarget->hasCCIDX()">,
- AssemblerPredicate<(all_of FeatureCCIDX), "ccidx">;
+ AssemblerPredicateWithAll<(all_of FeatureCCIDX), "ccidx">;
def HasComplxNum : Predicate<"Subtarget->hasComplxNum()">,
- AssemblerPredicate<(all_of FeatureComplxNum), "complxnum">;
+ AssemblerPredicateWithAll<(all_of FeatureComplxNum), "complxnum">;
def HasNV : Predicate<"Subtarget->hasNV()">,
- AssemblerPredicate<(all_of FeatureNV), "nv">;
+ AssemblerPredicateWithAll<(all_of FeatureNV), "nv">;
def HasMPAM : Predicate<"Subtarget->hasMPAM()">,
- AssemblerPredicate<(all_of FeatureMPAM), "mpam">;
+ AssemblerPredicateWithAll<(all_of FeatureMPAM), "mpam">;
def HasDIT : Predicate<"Subtarget->hasDIT()">,
- AssemblerPredicate<(all_of FeatureDIT), "dit">;
+ AssemblerPredicateWithAll<(all_of FeatureDIT), "dit">;
def HasTRACEV8_4 : Predicate<"Subtarget->hasTRACEV8_4()">,
- AssemblerPredicate<(all_of FeatureTRACEV8_4), "tracev8.4">;
+ AssemblerPredicateWithAll<(all_of FeatureTRACEV8_4), "tracev8.4">;
def HasAM : Predicate<"Subtarget->hasAM()">,
- AssemblerPredicate<(all_of FeatureAM), "am">;
+ AssemblerPredicateWithAll<(all_of FeatureAM), "am">;
def HasSEL2 : Predicate<"Subtarget->hasSEL2()">,
- AssemblerPredicate<(all_of FeatureSEL2), "sel2">;
+ AssemblerPredicateWithAll<(all_of FeatureSEL2), "sel2">;
def HasTLB_RMI : Predicate<"Subtarget->hasTLB_RMI()">,
- AssemblerPredicate<(all_of FeatureTLB_RMI), "tlb-rmi">;
+ AssemblerPredicateWithAll<(all_of FeatureTLB_RMI), "tlb-rmi">;
def HasFlagM : Predicate<"Subtarget->hasFlagM()">,
- AssemblerPredicate<(all_of FeatureFlagM), "flagm">;
+ AssemblerPredicateWithAll<(all_of FeatureFlagM), "flagm">;
def HasRCPC_IMMO : Predicate<"Subtarget->hasRCPCImm()">,
- AssemblerPredicate<(all_of FeatureRCPC_IMMO), "rcpc-immo">;
+ AssemblerPredicateWithAll<(all_of FeatureRCPC_IMMO), "rcpc-immo">;
def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">,
- AssemblerPredicate<(all_of FeatureFPARMv8), "fp-armv8">;
+ AssemblerPredicateWithAll<(all_of FeatureFPARMv8), "fp-armv8">;
def HasNEON : Predicate<"Subtarget->hasNEON()">,
- AssemblerPredicate<(all_of FeatureNEON), "neon">;
+ AssemblerPredicateWithAll<(all_of FeatureNEON), "neon">;
def HasCrypto : Predicate<"Subtarget->hasCrypto()">,
- AssemblerPredicate<(all_of FeatureCrypto), "crypto">;
+ AssemblerPredicateWithAll<(all_of FeatureCrypto), "crypto">;
def HasSM4 : Predicate<"Subtarget->hasSM4()">,
- AssemblerPredicate<(all_of FeatureSM4), "sm4">;
+ AssemblerPredicateWithAll<(all_of FeatureSM4), "sm4">;
def HasSHA3 : Predicate<"Subtarget->hasSHA3()">,
- AssemblerPredicate<(all_of FeatureSHA3), "sha3">;
+ AssemblerPredicateWithAll<(all_of FeatureSHA3), "sha3">;
def HasSHA2 : Predicate<"Subtarget->hasSHA2()">,
- AssemblerPredicate<(all_of FeatureSHA2), "sha2">;
+ AssemblerPredicateWithAll<(all_of FeatureSHA2), "sha2">;
def HasAES : Predicate<"Subtarget->hasAES()">,
- AssemblerPredicate<(all_of FeatureAES), "aes">;
+ AssemblerPredicateWithAll<(all_of FeatureAES), "aes">;
def HasDotProd : Predicate<"Subtarget->hasDotProd()">,
- AssemblerPredicate<(all_of FeatureDotProd), "dotprod">;
+ AssemblerPredicateWithAll<(all_of FeatureDotProd), "dotprod">;
def HasCRC : Predicate<"Subtarget->hasCRC()">,
- AssemblerPredicate<(all_of FeatureCRC), "crc">;
+ AssemblerPredicateWithAll<(all_of FeatureCRC), "crc">;
def HasLSE : Predicate<"Subtarget->hasLSE()">,
- AssemblerPredicate<(all_of FeatureLSE), "lse">;
+ AssemblerPredicateWithAll<(all_of FeatureLSE), "lse">;
def HasNoLSE : Predicate<"!Subtarget->hasLSE()">;
def HasRAS : Predicate<"Subtarget->hasRAS()">,
- AssemblerPredicate<(all_of FeatureRAS), "ras">;
+ AssemblerPredicateWithAll<(all_of FeatureRAS), "ras">;
def HasRDM : Predicate<"Subtarget->hasRDM()">,
- AssemblerPredicate<(all_of FeatureRDM), "rdm">;
+ AssemblerPredicateWithAll<(all_of FeatureRDM), "rdm">;
def HasPerfMon : Predicate<"Subtarget->hasPerfMon()">;
def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">,
- AssemblerPredicate<(all_of FeatureFullFP16), "fullfp16">;
+ AssemblerPredicateWithAll<(all_of FeatureFullFP16), "fullfp16">;
def HasFP16FML : Predicate<"Subtarget->hasFP16FML()">,
- AssemblerPredicate<(all_of FeatureFP16FML), "fp16fml">;
+ AssemblerPredicateWithAll<(all_of FeatureFP16FML), "fp16fml">;
def HasSPE : Predicate<"Subtarget->hasSPE()">,
- AssemblerPredicate<(all_of FeatureSPE), "spe">;
+ AssemblerPredicateWithAll<(all_of FeatureSPE), "spe">;
def HasFuseAES : Predicate<"Subtarget->hasFuseAES()">,
- AssemblerPredicate<(all_of FeatureFuseAES),
+ AssemblerPredicateWithAll<(all_of FeatureFuseAES),
"fuse-aes">;
def HasSVE : Predicate<"Subtarget->hasSVE()">,
- AssemblerPredicate<(all_of FeatureSVE), "sve">;
+ AssemblerPredicateWithAll<(all_of FeatureSVE), "sve">;
def HasSVE2 : Predicate<"Subtarget->hasSVE2()">,
- AssemblerPredicate<(all_of FeatureSVE2), "sve2">;
+ AssemblerPredicateWithAll<(all_of FeatureSVE2), "sve2">;
def HasSVE2AES : Predicate<"Subtarget->hasSVE2AES()">,
- AssemblerPredicate<(all_of FeatureSVE2AES), "sve2-aes">;
+ AssemblerPredicateWithAll<(all_of FeatureSVE2AES), "sve2-aes">;
def HasSVE2SM4 : Predicate<"Subtarget->hasSVE2SM4()">,
- AssemblerPredicate<(all_of FeatureSVE2SM4), "sve2-sm4">;
+ AssemblerPredicateWithAll<(all_of FeatureSVE2SM4), "sve2-sm4">;
def HasSVE2SHA3 : Predicate<"Subtarget->hasSVE2SHA3()">,
- AssemblerPredicate<(all_of FeatureSVE2SHA3), "sve2-sha3">;
+ AssemblerPredicateWithAll<(all_of FeatureSVE2SHA3), "sve2-sha3">;
def HasSVE2BitPerm : Predicate<"Subtarget->hasSVE2BitPerm()">,
- AssemblerPredicate<(all_of FeatureSVE2BitPerm), "sve2-bitperm">;
+ AssemblerPredicateWithAll<(all_of FeatureSVE2BitPerm), "sve2-bitperm">;
def HasSME : Predicate<"Subtarget->hasSME()">,
- AssemblerPredicate<(all_of FeatureSME), "sme">;
+ AssemblerPredicateWithAll<(all_of FeatureSME), "sme">;
def HasSMEF64 : Predicate<"Subtarget->hasSMEF64()">,
- AssemblerPredicate<(all_of FeatureSMEF64), "sme-f64">;
+ AssemblerPredicateWithAll<(all_of FeatureSMEF64), "sme-f64">;
def HasSMEI64 : Predicate<"Subtarget->hasSMEI64()">,
- AssemblerPredicate<(all_of FeatureSMEI64), "sme-i64">;
-def HasStreamingSVE : Predicate<"Subtarget->hasStreamingSVE()">,
- AssemblerPredicate<(all_of FeatureStreamingSVE), "streaming-sve">;
+ AssemblerPredicateWithAll<(all_of FeatureSMEI64), "sme-i64">;
// A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
// they should be enabled if either has been specified.
-def HasSVEorStreamingSVE
- : Predicate<"Subtarget->hasSVE() || Subtarget->hasStreamingSVE()">,
- AssemblerPredicate<(any_of FeatureSVE, FeatureStreamingSVE),
- "streaming-sve or sve">;
-def HasSVE2orStreamingSVE
- : Predicate<"Subtarget->hasSVE2() || Subtarget->hasStreamingSVE()">,
- AssemblerPredicate<(any_of FeatureSVE2, FeatureStreamingSVE),
- "streaming-sve or sve2">;
+def HasSVEorSME
+ : Predicate<"Subtarget->hasSVE() || Subtarget->hasSME()">,
+ AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSME),
+ "sve or sme">;
+def HasSVE2orSME
+ : Predicate<"Subtarget->hasSVE2() || Subtarget->hasSME()">,
+ AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME),
+ "sve2 or sme">;
// A subset of NEON instructions are legal in Streaming SVE execution mode,
// they should be enabled if either has been specified.
-def HasNEONorStreamingSVE
- : Predicate<"Subtarget->hasNEON() || Subtarget->hasStreamingSVE()">,
- AssemblerPredicate<(any_of FeatureNEON, FeatureStreamingSVE),
- "streaming-sve or neon">;
+def HasNEONorSME
+ : Predicate<"Subtarget->hasNEON() || Subtarget->hasSME()">,
+ AssemblerPredicateWithAll<(any_of FeatureNEON, FeatureSME),
+ "neon or sme">;
def HasRCPC : Predicate<"Subtarget->hasRCPC()">,
- AssemblerPredicate<(all_of FeatureRCPC), "rcpc">;
+ AssemblerPredicateWithAll<(all_of FeatureRCPC), "rcpc">;
+def HasLDAPR : Predicate<"Subtarget->hasLDAPR()">,
+ AssemblerPredicateWithAll<(all_of FeatureLDAPR), "ldapr">;
def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">,
- AssemblerPredicate<(all_of FeatureAltFPCmp), "altnzcv">;
+ AssemblerPredicateWithAll<(all_of FeatureAltFPCmp), "altnzcv">;
def HasFRInt3264 : Predicate<"Subtarget->hasFRInt3264()">,
- AssemblerPredicate<(all_of FeatureFRInt3264), "frint3264">;
+ AssemblerPredicateWithAll<(all_of FeatureFRInt3264), "frint3264">;
def HasSB : Predicate<"Subtarget->hasSB()">,
- AssemblerPredicate<(all_of FeatureSB), "sb">;
+ AssemblerPredicateWithAll<(all_of FeatureSB), "sb">;
def HasPredRes : Predicate<"Subtarget->hasPredRes()">,
- AssemblerPredicate<(all_of FeaturePredRes), "predres">;
+ AssemblerPredicateWithAll<(all_of FeaturePredRes), "predres">;
def HasCCDP : Predicate<"Subtarget->hasCCDP()">,
- AssemblerPredicate<(all_of FeatureCacheDeepPersist), "ccdp">;
+ AssemblerPredicateWithAll<(all_of FeatureCacheDeepPersist), "ccdp">;
def HasBTI : Predicate<"Subtarget->hasBTI()">,
- AssemblerPredicate<(all_of FeatureBranchTargetId), "bti">;
+ AssemblerPredicateWithAll<(all_of FeatureBranchTargetId), "bti">;
def HasMTE : Predicate<"Subtarget->hasMTE()">,
- AssemblerPredicate<(all_of FeatureMTE), "mte">;
+ AssemblerPredicateWithAll<(all_of FeatureMTE), "mte">;
def HasTME : Predicate<"Subtarget->hasTME()">,
- AssemblerPredicate<(all_of FeatureTME), "tme">;
+ AssemblerPredicateWithAll<(all_of FeatureTME), "tme">;
def HasETE : Predicate<"Subtarget->hasETE()">,
- AssemblerPredicate<(all_of FeatureETE), "ete">;
+ AssemblerPredicateWithAll<(all_of FeatureETE), "ete">;
def HasTRBE : Predicate<"Subtarget->hasTRBE()">,
- AssemblerPredicate<(all_of FeatureTRBE), "trbe">;
+ AssemblerPredicateWithAll<(all_of FeatureTRBE), "trbe">;
def HasBF16 : Predicate<"Subtarget->hasBF16()">,
- AssemblerPredicate<(all_of FeatureBF16), "bf16">;
+ AssemblerPredicateWithAll<(all_of FeatureBF16), "bf16">;
def HasMatMulInt8 : Predicate<"Subtarget->hasMatMulInt8()">,
- AssemblerPredicate<(all_of FeatureMatMulInt8), "i8mm">;
+ AssemblerPredicateWithAll<(all_of FeatureMatMulInt8), "i8mm">;
def HasMatMulFP32 : Predicate<"Subtarget->hasMatMulFP32()">,
- AssemblerPredicate<(all_of FeatureMatMulFP32), "f32mm">;
+ AssemblerPredicateWithAll<(all_of FeatureMatMulFP32), "f32mm">;
def HasMatMulFP64 : Predicate<"Subtarget->hasMatMulFP64()">,
- AssemblerPredicate<(all_of FeatureMatMulFP64), "f64mm">;
+ AssemblerPredicateWithAll<(all_of FeatureMatMulFP64), "f64mm">;
def HasXS : Predicate<"Subtarget->hasXS()">,
- AssemblerPredicate<(all_of FeatureXS), "xs">;
+ AssemblerPredicateWithAll<(all_of FeatureXS), "xs">;
def HasWFxT : Predicate<"Subtarget->hasWFxT()">,
- AssemblerPredicate<(all_of FeatureWFxT), "wfxt">;
+ AssemblerPredicateWithAll<(all_of FeatureWFxT), "wfxt">;
def HasLS64 : Predicate<"Subtarget->hasLS64()">,
- AssemblerPredicate<(all_of FeatureLS64), "ls64">;
+ AssemblerPredicateWithAll<(all_of FeatureLS64), "ls64">;
def HasBRBE : Predicate<"Subtarget->hasBRBE()">,
- AssemblerPredicate<(all_of FeatureBRBE), "brbe">;
+ AssemblerPredicateWithAll<(all_of FeatureBRBE), "brbe">;
def HasSPE_EEF : Predicate<"Subtarget->hasSPE_EEF()">,
- AssemblerPredicate<(all_of FeatureSPE_EEF), "spe-eef">;
+ AssemblerPredicateWithAll<(all_of FeatureSPE_EEF), "spe-eef">;
def HasHBC : Predicate<"Subtarget->hasHBC()">,
- AssemblerPredicate<(all_of FeatureHBC), "hbc">;
+ AssemblerPredicateWithAll<(all_of FeatureHBC), "hbc">;
def HasMOPS : Predicate<"Subtarget->hasMOPS()">,
- AssemblerPredicate<(all_of FeatureMOPS), "mops">;
+ AssemblerPredicateWithAll<(all_of FeatureMOPS), "mops">;
def IsLE : Predicate<"Subtarget->isLittleEndian()">;
def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
def IsWindows : Predicate<"Subtarget->isTargetWindows()">;
@@ -350,49 +350,49 @@ def nonext_masked_load :
cast<MaskedLoadSDNode>(N)->isUnindexed() &&
!cast<MaskedLoadSDNode>(N)->isNonTemporal();
}]>;
-// sign extending masked load fragments.
-def asext_masked_load :
+// Any/Zero extending masked load fragments.
+def azext_masked_load :
PatFrag<(ops node:$ptr, node:$pred, node:$def),
(masked_ld node:$ptr, undef, node:$pred, node:$def),[{
return (cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD ||
- cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD) &&
+ cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD) &&
cast<MaskedLoadSDNode>(N)->isUnindexed();
}]>;
-def asext_masked_load_i8 :
+def azext_masked_load_i8 :
PatFrag<(ops node:$ptr, node:$pred, node:$def),
- (asext_masked_load node:$ptr, node:$pred, node:$def), [{
+ (azext_masked_load node:$ptr, node:$pred, node:$def), [{
return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
}]>;
-def asext_masked_load_i16 :
+def azext_masked_load_i16 :
PatFrag<(ops node:$ptr, node:$pred, node:$def),
- (asext_masked_load node:$ptr, node:$pred, node:$def), [{
+ (azext_masked_load node:$ptr, node:$pred, node:$def), [{
return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
}]>;
-def asext_masked_load_i32 :
+def azext_masked_load_i32 :
PatFrag<(ops node:$ptr, node:$pred, node:$def),
- (asext_masked_load node:$ptr, node:$pred, node:$def), [{
+ (azext_masked_load node:$ptr, node:$pred, node:$def), [{
return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
}]>;
-// zero extending masked load fragments.
-def zext_masked_load :
+// Sign extending masked load fragments.
+def sext_masked_load :
PatFrag<(ops node:$ptr, node:$pred, node:$def),
(masked_ld node:$ptr, undef, node:$pred, node:$def), [{
- return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD &&
+ return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD &&
cast<MaskedLoadSDNode>(N)->isUnindexed();
}]>;
-def zext_masked_load_i8 :
+def sext_masked_load_i8 :
PatFrag<(ops node:$ptr, node:$pred, node:$def),
- (zext_masked_load node:$ptr, node:$pred, node:$def), [{
+ (sext_masked_load node:$ptr, node:$pred, node:$def), [{
return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
}]>;
-def zext_masked_load_i16 :
+def sext_masked_load_i16 :
PatFrag<(ops node:$ptr, node:$pred, node:$def),
- (zext_masked_load node:$ptr, node:$pred, node:$def), [{
+ (sext_masked_load node:$ptr, node:$pred, node:$def), [{
return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
}]>;
-def zext_masked_load_i32 :
+def sext_masked_load_i32 :
PatFrag<(ops node:$ptr, node:$pred, node:$def),
- (zext_masked_load node:$ptr, node:$pred, node:$def), [{
+ (sext_masked_load node:$ptr, node:$pred, node:$def), [{
return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
}]>;
@@ -443,6 +443,58 @@ def non_temporal_store :
cast<MaskedStoreSDNode>(N)->isNonTemporal();
}]>;
+multiclass masked_gather_scatter<PatFrags GatherScatterOp> {
+ // offsets = (signed)Index << sizeof(elt)
+ def NAME#_signed_scaled :
+ PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx),
+ (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{
+ auto MGS = cast<MaskedGatherScatterSDNode>(N);
+ bool Signed = MGS->isIndexSigned() ||
+ MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
+ return Signed && MGS->isIndexScaled();
+ }]>;
+ // offsets = (signed)Index
+ def NAME#_signed_unscaled :
+ PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx),
+ (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{
+ auto MGS = cast<MaskedGatherScatterSDNode>(N);
+ bool Signed = MGS->isIndexSigned() ||
+ MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
+ return Signed && !MGS->isIndexScaled();
+ }]>;
+ // offsets = (unsigned)Index << sizeof(elt)
+ def NAME#_unsigned_scaled :
+ PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx),
+ (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{
+ auto MGS = cast<MaskedGatherScatterSDNode>(N);
+ bool Signed = MGS->isIndexSigned() ||
+ MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
+ return !Signed && MGS->isIndexScaled();
+ }]>;
+ // offsets = (unsigned)Index
+ def NAME#_unsigned_unscaled :
+ PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx),
+ (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{
+ auto MGS = cast<MaskedGatherScatterSDNode>(N);
+ bool Signed = MGS->isIndexSigned() ||
+ MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
+ return !Signed && !MGS->isIndexScaled();
+ }]>;
+}
+
+defm nonext_masked_gather : masked_gather_scatter<nonext_masked_gather>;
+defm azext_masked_gather_i8 : masked_gather_scatter<azext_masked_gather_i8>;
+defm azext_masked_gather_i16 : masked_gather_scatter<azext_masked_gather_i16>;
+defm azext_masked_gather_i32 : masked_gather_scatter<azext_masked_gather_i32>;
+defm sext_masked_gather_i8 : masked_gather_scatter<sext_masked_gather_i8>;
+defm sext_masked_gather_i16 : masked_gather_scatter<sext_masked_gather_i16>;
+defm sext_masked_gather_i32 : masked_gather_scatter<sext_masked_gather_i32>;
+
+defm nontrunc_masked_scatter : masked_gather_scatter<nontrunc_masked_scatter>;
+defm trunc_masked_scatter_i8 : masked_gather_scatter<trunc_masked_scatter_i8>;
+defm trunc_masked_scatter_i16 : masked_gather_scatter<trunc_masked_scatter_i16>;
+defm trunc_masked_scatter_i32 : masked_gather_scatter<trunc_masked_scatter_i32>;
+
// top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise
def top16Zero: PatLeaf<(i32 GPR32:$src), [{
return SDValue(N,0)->getValueType(0) == MVT::i32 &&
@@ -473,6 +525,11 @@ def AArch64call : SDNode<"AArch64ISD::CALL",
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
+def AArch64call_bti : SDNode<"AArch64ISD::CALL_BTI",
+ SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+
def AArch64call_rvmarker: SDNode<"AArch64ISD::CALL_RVMARKER",
SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
@@ -526,6 +583,7 @@ def AArch64duplane8 : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>;
def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>;
def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>;
def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>;
+def AArch64duplane128 : SDNode<"AArch64ISD::DUPLANE128", SDT_AArch64DupLane>;
def AArch64insr : SDNode<"AArch64ISD::INSR", SDT_AArch64Insr>;
@@ -612,8 +670,10 @@ def AArch64NvCast : SDNode<"AArch64ISD::NVCAST", SDTUnaryOp>;
def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
SDTCisSameAs<1, 2>]>;
-def AArch64smull : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull>;
-def AArch64umull : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull>;
+def AArch64smull : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull,
+ [SDNPCommutative]>;
+def AArch64umull : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull,
+ [SDNPCommutative]>;
def AArch64frecpe : SDNode<"AArch64ISD::FRECPE", SDTFPUnaryOp>;
def AArch64frecps : SDNode<"AArch64ISD::FRECPS", SDTFPBinOp>;
@@ -630,11 +690,6 @@ def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>;
def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>;
def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
-def AArch64srhadd : SDNode<"AArch64ISD::SRHADD", SDT_AArch64binvec>;
-def AArch64urhadd : SDNode<"AArch64ISD::URHADD", SDT_AArch64binvec>;
-def AArch64shadd : SDNode<"AArch64ISD::SHADD", SDT_AArch64binvec>;
-def AArch64uhadd : SDNode<"AArch64ISD::UHADD", SDT_AArch64binvec>;
-
def AArch64uabd : PatFrags<(ops node:$lhs, node:$rhs),
[(abdu node:$lhs, node:$rhs),
(int_aarch64_neon_uabd node:$lhs, node:$rhs)]>;
@@ -642,10 +697,21 @@ def AArch64sabd : PatFrags<(ops node:$lhs, node:$rhs),
[(abds node:$lhs, node:$rhs),
(int_aarch64_neon_sabd node:$lhs, node:$rhs)]>;
+def AArch64addp_n : SDNode<"AArch64ISD::ADDP", SDT_AArch64Zip>;
def AArch64uaddlp_n : SDNode<"AArch64ISD::UADDLP", SDT_AArch64uaddlp>;
+def AArch64saddlp_n : SDNode<"AArch64ISD::SADDLP", SDT_AArch64uaddlp>;
+def AArch64addp : PatFrags<(ops node:$Rn, node:$Rm),
+ [(AArch64addp_n node:$Rn, node:$Rm),
+ (int_aarch64_neon_addp node:$Rn, node:$Rm)]>;
def AArch64uaddlp : PatFrags<(ops node:$src),
[(AArch64uaddlp_n node:$src),
(int_aarch64_neon_uaddlp node:$src)]>;
+def AArch64saddlp : PatFrags<(ops node:$src),
+ [(AArch64saddlp_n node:$src),
+ (int_aarch64_neon_saddlp node:$src)]>;
+def AArch64faddp : PatFrags<(ops node:$Rn, node:$Rm),
+ [(AArch64addp_n node:$Rn, node:$Rm),
+ (int_aarch64_neon_faddp node:$Rn, node:$Rm)]>;
def SDT_AArch64SETTAG : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
def AArch64stg : SDNode<"AArch64ISD::STG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
@@ -669,6 +735,22 @@ def AArch64tbl : SDNode<"AArch64ISD::TBL", SDT_AArch64TBL>;
def AArch64mrs : SDNode<"AArch64ISD::MRS",
SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, i32>]>,
[SDNPHasChain, SDNPOutGlue]>;
+
+// Match add node and also treat an 'or' node is as an 'add' if the or'ed operands
+// have no common bits.
+def add_and_or_is_add : PatFrags<(ops node:$lhs, node:$rhs),
+ [(add node:$lhs, node:$rhs), (or node:$lhs, node:$rhs)],[{
+ if (N->getOpcode() == ISD::ADD)
+ return true;
+ return CurDAG->haveNoCommonBitsSet(N->getOperand(0), N->getOperand(1));
+}]> {
+ let GISelPredicateCode = [{
+ // Only handle G_ADD for now. FIXME. build capability to compute whether
+ // operands of G_OR have common bits set or not.
+ return MI.getOpcode() == TargetOpcode::G_ADD;
+ }];
+}
+
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
@@ -939,7 +1021,7 @@ def : Pat<(v2f32 (int_aarch64_neon_bfdot
VectorIndexS:$idx)>;
}
-let Predicates = [HasNEONorStreamingSVE, HasBF16] in {
+let Predicates = [HasNEONorSME, HasBF16] in {
def BFCVT : BF16ToSinglePrecision<"bfcvt">;
}
@@ -1025,6 +1107,15 @@ def : EOR3_pattern<v8i16>;
def : EOR3_pattern<v4i32>;
def : EOR3_pattern<v2i64>;
+class BCAX_pattern<ValueType VecTy>
+ : Pat<(xor (VecTy V128:$Vn), (and (VecTy V128:$Vm), (vnot (VecTy V128:$Va)))),
+ (BCAX (VecTy V128:$Vn), (VecTy V128:$Vm), (VecTy V128:$Va))>;
+
+def : BCAX_pattern<v16i8>;
+def : BCAX_pattern<v8i16>;
+def : BCAX_pattern<v4i32>;
+def : BCAX_pattern<v2i64>;
+
def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v16i8>;
def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v8i16>;
def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v4i32>;
@@ -2073,6 +2164,10 @@ def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>;
def : Pat<(srl (bswap top16Zero:$Rn), (i64 16)), (REV16Wr GPR32:$Rn)>;
def : Pat<(srl (bswap top32Zero:$Rn), (i64 32)), (REV32Xr GPR64:$Rn)>;
+def : Pat<(or (and (srl GPR64:$Rn, (i64 8)), (i64 0x00ff00ff00ff00ff)),
+ (and (shl GPR64:$Rn, (i64 8)), (i64 0xff00ff00ff00ff00))),
+ (REV16Xr GPR64:$Rn)>;
+
//===----------------------------------------------------------------------===//
// Bitfield immediate extraction instruction.
//===----------------------------------------------------------------------===//
@@ -2320,6 +2415,8 @@ let isCall = 1, Defs = [LR], Uses = [SP] in {
PseudoInstExpansion<(BLR GPR64:$Rn)>;
def BLR_RVMARKER : Pseudo<(outs), (ins variable_ops), []>,
Sched<[WriteBrReg]>;
+ def BLR_BTI : Pseudo<(outs), (ins variable_ops), []>,
+ Sched<[WriteBrReg]>;
} // isCall
def : Pat<(AArch64call GPR64:$Rn),
@@ -2333,6 +2430,10 @@ def : Pat<(AArch64call_rvmarker (i64 tglobaladdr:$rvfunc), GPR64:$Rn),
(BLR_RVMARKER tglobaladdr:$rvfunc, GPR64:$Rn)>,
Requires<[NoSLSBLRMitigation]>;
+def : Pat<(AArch64call_bti GPR64:$Rn),
+ (BLR_BTI GPR64:$Rn)>,
+ Requires<[NoSLSBLRMitigation]>;
+
let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
def BR : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>;
} // isBranch, isTerminator, isBarrier, isIndirectBranch
@@ -2359,6 +2460,10 @@ def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []>, Sched<[]> {
// augmentation string.
def EMITBKEY : Pseudo<(outs), (ins), []>, Sched<[]> {}
+// Pseudo instruction to tell the streamer to emit a 'G' character into the
+// augmentation string.
+def EMITMTETAGGED : Pseudo<(outs), (ins), []>, Sched<[]> {}
+
// FIXME: maybe the scratch register used shouldn't be fixed to X1?
// FIXME: can "hasSideEffects be dropped?
// This gets lowered to an instruction sequence which takes 16 bytes
@@ -2409,7 +2514,8 @@ def : Pat<(AArch64call texternalsym:$func), (BL texternalsym:$func)>;
// Exception generation instructions.
//===----------------------------------------------------------------------===//
let isTrap = 1 in {
-def BRK : ExceptionGeneration<0b001, 0b00, "brk">;
+def BRK : ExceptionGeneration<0b001, 0b00, "brk",
+ [(int_aarch64_break timm32_0_65535:$imm)]>;
}
def DCPS1 : ExceptionGeneration<0b101, 0b01, "dcps1">;
def DCPS2 : ExceptionGeneration<0b101, 0b10, "dcps2">;
@@ -3891,24 +3997,24 @@ defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fround, "FCVTAU">;
let Predicates = [HasFullFP16] in {
- def : Pat<(i32 (lround f16:$Rn)),
+ def : Pat<(i32 (any_lround f16:$Rn)),
(!cast<Instruction>(FCVTASUWHr) f16:$Rn)>;
- def : Pat<(i64 (lround f16:$Rn)),
+ def : Pat<(i64 (any_lround f16:$Rn)),
(!cast<Instruction>(FCVTASUXHr) f16:$Rn)>;
- def : Pat<(i64 (llround f16:$Rn)),
+ def : Pat<(i64 (any_llround f16:$Rn)),
(!cast<Instruction>(FCVTASUXHr) f16:$Rn)>;
}
-def : Pat<(i32 (lround f32:$Rn)),
+def : Pat<(i32 (any_lround f32:$Rn)),
(!cast<Instruction>(FCVTASUWSr) f32:$Rn)>;
-def : Pat<(i32 (lround f64:$Rn)),
+def : Pat<(i32 (any_lround f64:$Rn)),
(!cast<Instruction>(FCVTASUWDr) f64:$Rn)>;
-def : Pat<(i64 (lround f32:$Rn)),
+def : Pat<(i64 (any_lround f32:$Rn)),
(!cast<Instruction>(FCVTASUXSr) f32:$Rn)>;
-def : Pat<(i64 (lround f64:$Rn)),
+def : Pat<(i64 (any_lround f64:$Rn)),
(!cast<Instruction>(FCVTASUXDr) f64:$Rn)>;
-def : Pat<(i64 (llround f32:$Rn)),
+def : Pat<(i64 (any_llround f32:$Rn)),
(!cast<Instruction>(FCVTASUXSr) f32:$Rn)>;
-def : Pat<(i64 (llround f64:$Rn)),
+def : Pat<(i64 (any_llround f64:$Rn)),
(!cast<Instruction>(FCVTASUXDr) f64:$Rn)>;
//===----------------------------------------------------------------------===//
@@ -3949,20 +4055,20 @@ defm FCVT : FPConversion<"fcvt">;
// Floating point single operand instructions.
//===----------------------------------------------------------------------===//
-defm FABS : SingleOperandFPData<0b0001, "fabs", fabs>;
-defm FMOV : SingleOperandFPData<0b0000, "fmov">;
-defm FNEG : SingleOperandFPData<0b0010, "fneg", fneg>;
-defm FRINTA : SingleOperandFPData<0b1100, "frinta", fround>;
-defm FRINTI : SingleOperandFPData<0b1111, "frinti", fnearbyint>;
-defm FRINTM : SingleOperandFPData<0b1010, "frintm", ffloor>;
-defm FRINTN : SingleOperandFPData<0b1000, "frintn", froundeven>;
-defm FRINTP : SingleOperandFPData<0b1001, "frintp", fceil>;
+defm FABS : SingleOperandFPDataNoException<0b0001, "fabs", fabs>;
+defm FMOV : SingleOperandFPDataNoException<0b0000, "fmov">;
+defm FNEG : SingleOperandFPDataNoException<0b0010, "fneg", fneg>;
+defm FRINTA : SingleOperandFPData<0b1100, "frinta", any_fround>;
+defm FRINTI : SingleOperandFPData<0b1111, "frinti", any_fnearbyint>;
+defm FRINTM : SingleOperandFPData<0b1010, "frintm", any_ffloor>;
+defm FRINTN : SingleOperandFPData<0b1000, "frintn", any_froundeven>;
+defm FRINTP : SingleOperandFPData<0b1001, "frintp", any_fceil>;
-defm FRINTX : SingleOperandFPData<0b1110, "frintx", frint>;
-defm FRINTZ : SingleOperandFPData<0b1011, "frintz", ftrunc>;
+defm FRINTX : SingleOperandFPData<0b1110, "frintx", any_frint>;
+defm FRINTZ : SingleOperandFPData<0b1011, "frintz", any_ftrunc>;
let SchedRW = [WriteFDiv] in {
-defm FSQRT : SingleOperandFPData<0b0011, "fsqrt", fsqrt>;
+defm FSQRT : SingleOperandFPData<0b0011, "fsqrt", any_fsqrt>;
}
let Predicates = [HasFRInt3264] in {
@@ -3972,44 +4078,48 @@ let Predicates = [HasFRInt3264] in {
defm FRINT64X : FRIntNNT<0b11, "frint64x", int_aarch64_frint64x>;
} // HasFRInt3264
+// Emitting strict_lrint as two instructions is valid as any exceptions that
+// occur will happen in exactly one of the instructions (e.g. if the input is
+// not an integer the inexact exception will happen in the FRINTX but not then
+// in the FCVTZS as the output of FRINTX is an integer).
let Predicates = [HasFullFP16] in {
- def : Pat<(i32 (lrint f16:$Rn)),
+ def : Pat<(i32 (any_lrint f16:$Rn)),
(FCVTZSUWHr (!cast<Instruction>(FRINTXHr) f16:$Rn))>;
- def : Pat<(i64 (lrint f16:$Rn)),
+ def : Pat<(i64 (any_lrint f16:$Rn)),
(FCVTZSUXHr (!cast<Instruction>(FRINTXHr) f16:$Rn))>;
- def : Pat<(i64 (llrint f16:$Rn)),
+ def : Pat<(i64 (any_llrint f16:$Rn)),
(FCVTZSUXHr (!cast<Instruction>(FRINTXHr) f16:$Rn))>;
}
-def : Pat<(i32 (lrint f32:$Rn)),
+def : Pat<(i32 (any_lrint f32:$Rn)),
(FCVTZSUWSr (!cast<Instruction>(FRINTXSr) f32:$Rn))>;
-def : Pat<(i32 (lrint f64:$Rn)),
+def : Pat<(i32 (any_lrint f64:$Rn)),
(FCVTZSUWDr (!cast<Instruction>(FRINTXDr) f64:$Rn))>;
-def : Pat<(i64 (lrint f32:$Rn)),
+def : Pat<(i64 (any_lrint f32:$Rn)),
(FCVTZSUXSr (!cast<Instruction>(FRINTXSr) f32:$Rn))>;
-def : Pat<(i64 (lrint f64:$Rn)),
+def : Pat<(i64 (any_lrint f64:$Rn)),
(FCVTZSUXDr (!cast<Instruction>(FRINTXDr) f64:$Rn))>;
-def : Pat<(i64 (llrint f32:$Rn)),
+def : Pat<(i64 (any_llrint f32:$Rn)),
(FCVTZSUXSr (!cast<Instruction>(FRINTXSr) f32:$Rn))>;
-def : Pat<(i64 (llrint f64:$Rn)),
+def : Pat<(i64 (any_llrint f64:$Rn)),
(FCVTZSUXDr (!cast<Instruction>(FRINTXDr) f64:$Rn))>;
//===----------------------------------------------------------------------===//
// Floating point two operand instructions.
//===----------------------------------------------------------------------===//
-defm FADD : TwoOperandFPData<0b0010, "fadd", fadd>;
+defm FADD : TwoOperandFPData<0b0010, "fadd", any_fadd>;
let SchedRW = [WriteFDiv] in {
-defm FDIV : TwoOperandFPData<0b0001, "fdiv", fdiv>;
+defm FDIV : TwoOperandFPData<0b0001, "fdiv", any_fdiv>;
}
-defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", fmaxnum>;
-defm FMAX : TwoOperandFPData<0b0100, "fmax", fmaximum>;
-defm FMINNM : TwoOperandFPData<0b0111, "fminnm", fminnum>;
-defm FMIN : TwoOperandFPData<0b0101, "fmin", fminimum>;
+defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", any_fmaxnum>;
+defm FMAX : TwoOperandFPData<0b0100, "fmax", any_fmaximum>;
+defm FMINNM : TwoOperandFPData<0b0111, "fminnm", any_fminnum>;
+defm FMIN : TwoOperandFPData<0b0101, "fmin", any_fminimum>;
let SchedRW = [WriteFMul] in {
-defm FMUL : TwoOperandFPData<0b0000, "fmul", fmul>;
-defm FNMUL : TwoOperandFPDataNeg<0b1000, "fnmul", fmul>;
+defm FMUL : TwoOperandFPData<0b0000, "fmul", any_fmul>;
+defm FNMUL : TwoOperandFPDataNeg<0b1000, "fnmul", any_fmul>;
}
-defm FSUB : TwoOperandFPData<0b0011, "fsub", fsub>;
+defm FSUB : TwoOperandFPData<0b0011, "fsub", any_fsub>;
def : Pat<(v1f64 (fmaximum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
(FMAXDrr FPR64:$Rn, FPR64:$Rm)>;
@@ -4024,13 +4134,13 @@ def : Pat<(v1f64 (fminnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
// Floating point three operand instructions.
//===----------------------------------------------------------------------===//
-defm FMADD : ThreeOperandFPData<0, 0, "fmadd", fma>;
+defm FMADD : ThreeOperandFPData<0, 0, "fmadd", any_fma>;
defm FMSUB : ThreeOperandFPData<0, 1, "fmsub",
- TriOpFrag<(fma node:$LHS, (fneg node:$MHS), node:$RHS)> >;
+ TriOpFrag<(any_fma node:$LHS, (fneg node:$MHS), node:$RHS)> >;
defm FNMADD : ThreeOperandFPData<1, 0, "fnmadd",
- TriOpFrag<(fneg (fma node:$LHS, node:$MHS, node:$RHS))> >;
+ TriOpFrag<(fneg (any_fma node:$LHS, node:$MHS, node:$RHS))> >;
defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub",
- TriOpFrag<(fma node:$LHS, node:$MHS, (fneg node:$RHS))> >;
+ TriOpFrag<(any_fma node:$LHS, node:$MHS, (fneg node:$RHS))> >;
// The following def pats catch the case where the LHS of an FMA is negated.
// The TriOpFrag above catches the case where the middle operand is negated.
@@ -4159,25 +4269,25 @@ def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))),
(zext (v8i8 V64:$opB))),
(AArch64vashr v8i16:$src, (i32 15))))),
(UABDLv8i8_v8i16 V64:$opA, V64:$opB)>;
-def : Pat<(abs (v8i16 (sub (zext (extract_high_v16i8 V128:$opA)),
- (zext (extract_high_v16i8 V128:$opB))))),
+def : Pat<(abs (v8i16 (sub (zext (extract_high_v16i8 (v16i8 V128:$opA))),
+ (zext (extract_high_v16i8 (v16i8 V128:$opB)))))),
(UABDLv16i8_v8i16 V128:$opA, V128:$opB)>;
def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))),
- (v8i16 (add (sub (zext (extract_high_v16i8 V128:$opA)),
- (zext (extract_high_v16i8 V128:$opB))),
+ (v8i16 (add (sub (zext (extract_high_v16i8 (v16i8 V128:$opA))),
+ (zext (extract_high_v16i8 (v16i8 V128:$opB)))),
(AArch64vashr v8i16:$src, (i32 15))))),
(UABDLv16i8_v8i16 V128:$opA, V128:$opB)>;
def : Pat<(abs (v4i32 (sub (zext (v4i16 V64:$opA)),
(zext (v4i16 V64:$opB))))),
(UABDLv4i16_v4i32 V64:$opA, V64:$opB)>;
-def : Pat<(abs (v4i32 (sub (zext (extract_high_v8i16 V128:$opA)),
- (zext (extract_high_v8i16 V128:$opB))))),
+def : Pat<(abs (v4i32 (sub (zext (extract_high_v8i16 (v8i16 V128:$opA))),
+ (zext (extract_high_v8i16 (v8i16 V128:$opB)))))),
(UABDLv8i16_v4i32 V128:$opA, V128:$opB)>;
def : Pat<(abs (v2i64 (sub (zext (v2i32 V64:$opA)),
(zext (v2i32 V64:$opB))))),
(UABDLv2i32_v2i64 V64:$opA, V64:$opB)>;
-def : Pat<(abs (v2i64 (sub (zext (extract_high_v4i32 V128:$opA)),
- (zext (extract_high_v4i32 V128:$opB))))),
+def : Pat<(abs (v2i64 (sub (zext (extract_high_v4i32 (v4i32 V128:$opA))),
+ (zext (extract_high_v4i32 (v4i32 V128:$opB)))))),
(UABDLv4i32_v2i64 V128:$opA, V128:$opB)>;
defm ABS : SIMDTwoVectorBHSD<0, 0b01011, "abs", abs>;
@@ -4189,7 +4299,7 @@ defm CMGT : SIMDCmpTwoVector<0, 0b01000, "cmgt", AArch64cmgtz>;
defm CMLE : SIMDCmpTwoVector<1, 0b01001, "cmle", AArch64cmlez>;
defm CMLT : SIMDCmpTwoVector<0, 0b01010, "cmlt", AArch64cmltz>;
defm CNT : SIMDTwoVectorB<0, 0b00, 0b00101, "cnt", ctpop>;
-defm FABS : SIMDTwoVectorFP<0, 1, 0b01111, "fabs", fabs>;
+defm FABS : SIMDTwoVectorFPNoException<0, 1, 0b01111, "fabs", fabs>;
def : Pat<(v8i8 (AArch64vashr (v8i8 V64:$Rn), (i32 7))),
(CMLTv8i8rz V64:$Rn)>;
@@ -4219,9 +4329,9 @@ def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (v4i16 V64:$Rn))),
def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn),
(i64 4)))),
(FCVTLv8i16 V128:$Rn)>;
-def : Pat<(v2f64 (fpextend (v2f32 V64:$Rn))), (FCVTLv2i32 V64:$Rn)>;
+def : Pat<(v2f64 (any_fpextend (v2f32 V64:$Rn))), (FCVTLv2i32 V64:$Rn)>;
-def : Pat<(v4f32 (fpextend (v4f16 V64:$Rn))), (FCVTLv4i16 V64:$Rn)>;
+def : Pat<(v4f32 (any_fpextend (v4f16 V64:$Rn))), (FCVTLv4i16 V64:$Rn)>;
defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>;
defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>;
@@ -4233,16 +4343,16 @@ def : Pat<(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn))),
def : Pat<(concat_vectors V64:$Rd,
(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))),
(FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
-def : Pat<(v2f32 (fpround (v2f64 V128:$Rn))), (FCVTNv2i32 V128:$Rn)>;
-def : Pat<(v4f16 (fpround (v4f32 V128:$Rn))), (FCVTNv4i16 V128:$Rn)>;
-def : Pat<(concat_vectors V64:$Rd, (v2f32 (fpround (v2f64 V128:$Rn)))),
+def : Pat<(v2f32 (any_fpround (v2f64 V128:$Rn))), (FCVTNv2i32 V128:$Rn)>;
+def : Pat<(v4f16 (any_fpround (v4f32 V128:$Rn))), (FCVTNv4i16 V128:$Rn)>;
+def : Pat<(concat_vectors V64:$Rd, (v2f32 (any_fpround (v2f64 V128:$Rn)))),
(FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>;
defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_aarch64_neon_fcvtpu>;
defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn",
int_aarch64_neon_fcvtxn>;
-defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", fp_to_sint>;
-defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", fp_to_uint>;
+defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", any_fp_to_sint>;
+defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", any_fp_to_uint>;
// AArch64's FCVT instructions saturate when out of range.
multiclass SIMDTwoVectorFPToIntSatPats<SDNode to_int_sat, string INST> {
@@ -4272,15 +4382,15 @@ def : Pat<(v2i32 (int_aarch64_neon_fcvtzu v2f32:$Rn)), (FCVTZUv2f32 $Rn)>;
def : Pat<(v4i32 (int_aarch64_neon_fcvtzu v4f32:$Rn)), (FCVTZUv4f32 $Rn)>;
def : Pat<(v2i64 (int_aarch64_neon_fcvtzu v2f64:$Rn)), (FCVTZUv2f64 $Rn)>;
-defm FNEG : SIMDTwoVectorFP<1, 1, 0b01111, "fneg", fneg>;
+defm FNEG : SIMDTwoVectorFPNoException<1, 1, 0b01111, "fneg", fneg>;
defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>;
-defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", fround>;
-defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", fnearbyint>;
-defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", ffloor>;
-defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", froundeven>;
-defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", fceil>;
-defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", frint>;
-defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", ftrunc>;
+defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", any_fround>;
+defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", any_fnearbyint>;
+defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", any_ffloor>;
+defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", any_froundeven>;
+defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", any_fceil>;
+defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", any_frint>;
+defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", any_ftrunc>;
let Predicates = [HasFRInt3264] in {
defm FRINT32Z : FRIntNNTVector<0, 0, "frint32z", int_aarch64_neon_frint32z>;
@@ -4290,7 +4400,7 @@ let Predicates = [HasFRInt3264] in {
} // HasFRInt3264
defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>;
-defm FSQRT : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", fsqrt>;
+defm FSQRT : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", any_fsqrt>;
defm NEG : SIMDTwoVectorBHSD<1, 0b01011, "neg",
UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
defm NOT : SIMDTwoVectorB<1, 0b00, 0b00101, "not", vnot>;
@@ -4312,9 +4422,9 @@ defm REV16 : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", AArch64rev16>;
defm REV32 : SIMDTwoVectorBH<1, 0b00000, "rev32", AArch64rev32>;
defm REV64 : SIMDTwoVectorBHS<0, 0b00000, "rev64", AArch64rev64>;
defm SADALP : SIMDLongTwoVectorTied<0, 0b00110, "sadalp",
- BinOpFrag<(add node:$LHS, (int_aarch64_neon_saddlp node:$RHS))> >;
-defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", int_aarch64_neon_saddlp>;
-defm SCVTF : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", sint_to_fp>;
+ BinOpFrag<(add node:$LHS, (AArch64saddlp node:$RHS))> >;
+defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", AArch64saddlp>;
+defm SCVTF : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", any_sint_to_fp>;
defm SHLL : SIMDVectorLShiftLongBySizeBHS;
defm SQABS : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_aarch64_neon_sqabs>;
defm SQNEG : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_aarch64_neon_sqneg>;
@@ -4324,7 +4434,7 @@ defm SUQADD : SIMDTwoVectorBHSDTied<0, 0b00011, "suqadd",int_aarch64_neon_suqadd
defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp",
BinOpFrag<(add node:$LHS, (AArch64uaddlp node:$RHS))> >;
defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp", AArch64uaddlp>;
-defm UCVTF : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", uint_to_fp>;
+defm UCVTF : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", any_uint_to_fp>;
defm UQXTN : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_aarch64_neon_uqxtn>;
defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_aarch64_neon_urecpe>;
defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_aarch64_neon_ursqrte>;
@@ -4348,15 +4458,15 @@ def : Pat<(v4f32 (AArch64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>;
multiclass SIMDVectorLShiftLongBySizeBHSPats<SDPatternOperator ext> {
def : Pat<(AArch64vshl (v8i16 (ext (v8i8 V64:$Rn))), (i32 8)),
(SHLLv8i8 V64:$Rn)>;
- def : Pat<(AArch64vshl (v8i16 (ext (extract_high_v16i8 V128:$Rn))), (i32 8)),
+ def : Pat<(AArch64vshl (v8i16 (ext (extract_high_v16i8 (v16i8 V128:$Rn)))), (i32 8)),
(SHLLv16i8 V128:$Rn)>;
def : Pat<(AArch64vshl (v4i32 (ext (v4i16 V64:$Rn))), (i32 16)),
(SHLLv4i16 V64:$Rn)>;
- def : Pat<(AArch64vshl (v4i32 (ext (extract_high_v8i16 V128:$Rn))), (i32 16)),
+ def : Pat<(AArch64vshl (v4i32 (ext (extract_high_v8i16 (v8i16 V128:$Rn)))), (i32 16)),
(SHLLv8i16 V128:$Rn)>;
def : Pat<(AArch64vshl (v2i64 (ext (v2i32 V64:$Rn))), (i32 32)),
(SHLLv2i32 V64:$Rn)>;
- def : Pat<(AArch64vshl (v2i64 (ext (extract_high_v4i32 V128:$Rn))), (i32 32)),
+ def : Pat<(AArch64vshl (v2i64 (ext (extract_high_v4i32 (v4i32 V128:$Rn)))), (i32 32)),
(SHLLv4i32 V128:$Rn)>;
}
@@ -4426,7 +4536,7 @@ def : Pat<(v8i16 (concat_vectors
//===----------------------------------------------------------------------===//
defm ADD : SIMDThreeSameVector<0, 0b10000, "add", add>;
-defm ADDP : SIMDThreeSameVector<0, 0b10111, "addp", int_aarch64_neon_addp>;
+defm ADDP : SIMDThreeSameVector<0, 0b10111, "addp", AArch64addp>;
defm CMEQ : SIMDThreeSameVector<1, 0b10001, "cmeq", AArch64cmeq>;
defm CMGE : SIMDThreeSameVector<0, 0b00111, "cmge", AArch64cmge>;
defm CMGT : SIMDThreeSameVector<0, 0b00110, "cmgt", AArch64cmgt>;
@@ -4447,33 +4557,33 @@ def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, V
}
defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",int_aarch64_neon_facge>;
defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",int_aarch64_neon_facgt>;
-defm FADDP : SIMDThreeSameVectorFP<1,0,0b010,"faddp",int_aarch64_neon_faddp>;
-defm FADD : SIMDThreeSameVectorFP<0,0,0b010,"fadd", fadd>;
+defm FADDP : SIMDThreeSameVectorFP<1,0,0b010,"faddp", AArch64faddp>;
+defm FADD : SIMDThreeSameVectorFP<0,0,0b010,"fadd", any_fadd>;
defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
-defm FDIV : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", fdiv>;
+defm FDIV : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", any_fdiv>;
defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b000,"fmaxnmp", int_aarch64_neon_fmaxnmp>;
-defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", fmaxnum>;
+defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", any_fmaxnum>;
defm FMAXP : SIMDThreeSameVectorFP<1,0,0b110,"fmaxp", int_aarch64_neon_fmaxp>;
-defm FMAX : SIMDThreeSameVectorFP<0,0,0b110,"fmax", fmaximum>;
+defm FMAX : SIMDThreeSameVectorFP<0,0,0b110,"fmax", any_fmaximum>;
defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b000,"fminnmp", int_aarch64_neon_fminnmp>;
-defm FMINNM : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", fminnum>;
+defm FMINNM : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", any_fminnum>;
defm FMINP : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>;
-defm FMIN : SIMDThreeSameVectorFP<0,1,0b110,"fmin", fminimum>;
+defm FMIN : SIMDThreeSameVectorFP<0,1,0b110,"fmin", any_fminimum>;
// NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the
// instruction expects the addend first, while the fma intrinsic puts it last.
defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b001, "fmla",
- TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >;
+ TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)> >;
defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b001, "fmls",
- TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
+ TriOpFrag<(any_fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
defm FMULX : SIMDThreeSameVectorFP<0,0,0b011,"fmulx", int_aarch64_neon_fmulx>;
-defm FMUL : SIMDThreeSameVectorFP<1,0,0b011,"fmul", fmul>;
+defm FMUL : SIMDThreeSameVectorFP<1,0,0b011,"fmul", any_fmul>;
defm FRECPS : SIMDThreeSameVectorFP<0,0,0b111,"frecps", int_aarch64_neon_frecps>;
defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b111,"frsqrts", int_aarch64_neon_frsqrts>;
-defm FSUB : SIMDThreeSameVectorFP<0,1,0b010,"fsub", fsub>;
+defm FSUB : SIMDThreeSameVectorFP<0,1,0b010,"fsub", any_fsub>;
// MLA and MLS are generated in MachineCombine
defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", null_frag>;
@@ -4484,7 +4594,7 @@ defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>;
defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba",
TriOpFrag<(add node:$LHS, (AArch64sabd node:$MHS, node:$RHS))> >;
defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", AArch64sabd>;
-defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", AArch64shadd>;
+defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", avgfloors>;
defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>;
defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>;
defm SMAX : SIMDThreeSameVectorBHS<0,0b01100,"smax", smax>;
@@ -4496,14 +4606,14 @@ defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrd
defm SQRSHL : SIMDThreeSameVector<0,0b01011,"sqrshl", int_aarch64_neon_sqrshl>;
defm SQSHL : SIMDThreeSameVector<0,0b01001,"sqshl", int_aarch64_neon_sqshl>;
defm SQSUB : SIMDThreeSameVector<0,0b00101,"sqsub", int_aarch64_neon_sqsub>;
-defm SRHADD : SIMDThreeSameVectorBHS<0,0b00010,"srhadd", AArch64srhadd>;
+defm SRHADD : SIMDThreeSameVectorBHS<0,0b00010,"srhadd", avgceils>;
defm SRSHL : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>;
defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>;
defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>;
defm UABA : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba",
TriOpFrag<(add node:$LHS, (AArch64uabd node:$MHS, node:$RHS))> >;
defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", AArch64uabd>;
-defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", AArch64uhadd>;
+defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", avgflooru>;
defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>;
defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>;
defm UMAX : SIMDThreeSameVectorBHS<1,0b01100,"umax", umax>;
@@ -4513,7 +4623,7 @@ defm UQADD : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>;
defm UQRSHL : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>;
defm UQSHL : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>;
defm UQSUB : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>;
-defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", AArch64urhadd>;
+defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", avgceilu>;
defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>;
defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>;
defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah",
@@ -4753,11 +4863,13 @@ defm CMTST : SIMDThreeScalarD<0, 0b10001, "cmtst", AArch64cmtst>;
defm FABD : SIMDFPThreeScalar<1, 1, 0b010, "fabd", int_aarch64_sisd_fabd>;
def : Pat<(v1f64 (int_aarch64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
(FABD64 FPR64:$Rn, FPR64:$Rm)>;
-let Predicates = [HasFullFP16] in {
+let Predicates = [HasNEON, HasFullFP16] in {
def : Pat<(fabs (fsub f16:$Rn, f16:$Rm)), (FABD16 f16:$Rn, f16:$Rm)>;
}
+let Predicates = [HasNEON] in {
def : Pat<(fabs (fsub f32:$Rn, f32:$Rm)), (FABD32 f32:$Rn, f32:$Rm)>;
def : Pat<(fabs (fsub f64:$Rn, f64:$Rm)), (FABD64 f64:$Rn, f64:$Rm)>;
+}
defm FACGE : SIMDThreeScalarFPCmp<1, 0, 0b101, "facge",
int_aarch64_neon_facge>;
defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt",
@@ -4765,9 +4877,9 @@ defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt",
defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
-defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx, HasNEONorStreamingSVE>;
-defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps, HasNEONorStreamingSVE>;
-defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONorStreamingSVE>;
+defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx, HasNEONorSME>;
+defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps, HasNEONorSME>;
+defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONorSME>;
defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>;
defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>;
defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
@@ -4862,9 +4974,9 @@ defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu">;
def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">;
defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">;
defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">;
-defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe", HasNEONorStreamingSVE>;
-defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx", HasNEONorStreamingSVE>;
-defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte", HasNEONorStreamingSVE>;
+defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe", HasNEONorSME>;
+defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx", HasNEONorSME>;
+defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte", HasNEONorSME>;
defm NEG : SIMDTwoScalarD< 1, 0b01011, "neg",
UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
defm SCVTF : SIMDFPTwoScalarCVT< 0, 0, 0b11101, "scvtf", AArch64sitof>;
@@ -4980,23 +5092,21 @@ def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
// int values in FP registers using the corresponding NEON instructions to
// avoid more costly int <-> fp register transfers.
let Predicates = [HasNEON] in {
-def : Pat<(f64 (sint_to_fp (i64 (fp_to_sint f64:$Rn)))),
+def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint f64:$Rn)))),
(SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>;
-def : Pat<(f32 (sint_to_fp (i32 (fp_to_sint f32:$Rn)))),
+def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint f32:$Rn)))),
(SCVTFv1i32 (i32 (FCVTZSv1i32 f32:$Rn)))>;
-def : Pat<(f64 (uint_to_fp (i64 (fp_to_uint f64:$Rn)))),
+def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint f64:$Rn)))),
(UCVTFv1i64 (i64 (FCVTZUv1i64 f64:$Rn)))>;
-def : Pat<(f32 (uint_to_fp (i32 (fp_to_uint f32:$Rn)))),
+def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint f32:$Rn)))),
(UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>;
let Predicates = [HasFullFP16] in {
-def : Pat<(f16 (sint_to_fp (i32 (fp_to_sint f16:$Rn)))),
+def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))),
(SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>;
-def : Pat<(f16 (uint_to_fp (i32 (fp_to_uint f16:$Rn)))),
+def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))),
(UCVTFv1i16 (f16 (FCVTZUv1f16 f16:$Rn)))>;
}
-}
-
// If an integer is about to be converted to a floating point value,
// just load it on the floating point unit.
// Here are the patterns for 8 and 16-bits to float.
@@ -5083,6 +5193,7 @@ def : Pat <(f64 (uint_to_fp (i32
(LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>;
// 64-bits -> double are handled in target specific dag combine:
// performIntToFpCombine.
+} // let Predicates = [HasNEON]
//===----------------------------------------------------------------------===//
// Advanced SIMD three different-sized vector instructions.
@@ -5102,10 +5213,10 @@ defm SADDL : SIMDLongThreeVectorBHS< 0, 0b0000, "saddl",
defm SADDW : SIMDWideThreeVectorBHS< 0, 0b0001, "saddw",
BinOpFrag<(add node:$LHS, (sext node:$RHS))>>;
defm SMLAL : SIMDLongThreeVectorTiedBHS<0, 0b1000, "smlal",
- TriOpFrag<(add node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>;
+ TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>;
defm SMLSL : SIMDLongThreeVectorTiedBHS<0, 0b1010, "smlsl",
- TriOpFrag<(sub node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>;
-defm SMULL : SIMDLongThreeVectorBHS<0, 0b1100, "smull", int_aarch64_neon_smull>;
+ TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>;
+defm SMULL : SIMDLongThreeVectorBHS<0, 0b1100, "smull", AArch64smull>;
defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal",
int_aarch64_neon_sqadd>;
defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl",
@@ -5123,10 +5234,10 @@ defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl",
defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw",
BinOpFrag<(add node:$LHS, (zanyext node:$RHS))>>;
defm UMLAL : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal",
- TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
+ TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>;
defm UMLSL : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl",
- TriOpFrag<(sub node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
-defm UMULL : SIMDLongThreeVectorBHS<1, 0b1100, "umull", int_aarch64_neon_umull>;
+ TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>;
+defm UMULL : SIMDLongThreeVectorBHS<1, 0b1100, "umull", AArch64umull>;
defm USUBL : SIMDLongThreeVectorBHS<1, 0b0010, "usubl",
BinOpFrag<(sub (zanyext node:$LHS), (zanyext node:$RHS))>>;
defm USUBW : SIMDWideThreeVectorBHS< 1, 0b0011, "usubw",
@@ -5161,74 +5272,15 @@ multiclass Neon_mul_acc_widen_patterns<SDPatternOperator opnode, SDPatternOperat
V64:$Rn, V64:$Rm)), dsub)>;
}
-defm : Neon_mul_acc_widen_patterns<add, int_aarch64_neon_umull,
+defm : Neon_mul_acc_widen_patterns<add, AArch64umull,
UMLALv8i8_v8i16, UMLALv4i16_v4i32, UMLALv2i32_v2i64>;
-defm : Neon_mul_acc_widen_patterns<add, int_aarch64_neon_smull,
+defm : Neon_mul_acc_widen_patterns<add, AArch64smull,
SMLALv8i8_v8i16, SMLALv4i16_v4i32, SMLALv2i32_v2i64>;
-defm : Neon_mul_acc_widen_patterns<sub, int_aarch64_neon_umull,
+defm : Neon_mul_acc_widen_patterns<sub, AArch64umull,
UMLSLv8i8_v8i16, UMLSLv4i16_v4i32, UMLSLv2i32_v2i64>;
-defm : Neon_mul_acc_widen_patterns<sub, int_aarch64_neon_smull,
+defm : Neon_mul_acc_widen_patterns<sub, AArch64smull,
SMLSLv8i8_v8i16, SMLSLv4i16_v4i32, SMLSLv2i32_v2i64>;
-// Additional patterns for SMULL and UMULL
-multiclass Neon_mul_widen_patterns<SDPatternOperator opnode,
- Instruction INST8B, Instruction INST4H, Instruction INST2S> {
- def : Pat<(v8i16 (opnode (v8i8 V64:$Rn), (v8i8 V64:$Rm))),
- (INST8B V64:$Rn, V64:$Rm)>;
- def : Pat<(v4i32 (opnode (v4i16 V64:$Rn), (v4i16 V64:$Rm))),
- (INST4H V64:$Rn, V64:$Rm)>;
- def : Pat<(v2i64 (opnode (v2i32 V64:$Rn), (v2i32 V64:$Rm))),
- (INST2S V64:$Rn, V64:$Rm)>;
-}
-
-defm : Neon_mul_widen_patterns<AArch64smull, SMULLv8i8_v8i16,
- SMULLv4i16_v4i32, SMULLv2i32_v2i64>;
-defm : Neon_mul_widen_patterns<AArch64umull, UMULLv8i8_v8i16,
- UMULLv4i16_v4i32, UMULLv2i32_v2i64>;
-
-// Patterns for smull2/umull2.
-multiclass Neon_mul_high_patterns<SDPatternOperator opnode,
- Instruction INST8B, Instruction INST4H, Instruction INST2S> {
- def : Pat<(v8i16 (opnode (extract_high_v16i8 V128:$Rn),
- (extract_high_v16i8 V128:$Rm))),
- (INST8B V128:$Rn, V128:$Rm)>;
- def : Pat<(v4i32 (opnode (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16 V128:$Rm))),
- (INST4H V128:$Rn, V128:$Rm)>;
- def : Pat<(v2i64 (opnode (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32 V128:$Rm))),
- (INST2S V128:$Rn, V128:$Rm)>;
-}
-
-defm : Neon_mul_high_patterns<AArch64smull, SMULLv16i8_v8i16,
- SMULLv8i16_v4i32, SMULLv4i32_v2i64>;
-defm : Neon_mul_high_patterns<AArch64umull, UMULLv16i8_v8i16,
- UMULLv8i16_v4i32, UMULLv4i32_v2i64>;
-
-// Additional patterns for SMLAL/SMLSL and UMLAL/UMLSL
-multiclass Neon_mulacc_widen_patterns<SDPatternOperator opnode,
- Instruction INST8B, Instruction INST4H, Instruction INST2S> {
- def : Pat<(v8i16 (opnode (v8i16 V128:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm))),
- (INST8B V128:$Rd, V64:$Rn, V64:$Rm)>;
- def : Pat<(v4i32 (opnode (v4i32 V128:$Rd), (v4i16 V64:$Rn), (v4i16 V64:$Rm))),
- (INST4H V128:$Rd, V64:$Rn, V64:$Rm)>;
- def : Pat<(v2i64 (opnode (v2i64 V128:$Rd), (v2i32 V64:$Rn), (v2i32 V64:$Rm))),
- (INST2S V128:$Rd, V64:$Rn, V64:$Rm)>;
-}
-
-defm : Neon_mulacc_widen_patterns<
- TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>,
- SMLALv8i8_v8i16, SMLALv4i16_v4i32, SMLALv2i32_v2i64>;
-defm : Neon_mulacc_widen_patterns<
- TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>,
- UMLALv8i8_v8i16, UMLALv4i16_v4i32, UMLALv2i32_v2i64>;
-defm : Neon_mulacc_widen_patterns<
- TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>,
- SMLSLv8i8_v8i16, SMLSLv4i16_v4i32, SMLSLv2i32_v2i64>;
-defm : Neon_mulacc_widen_patterns<
- TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>,
- UMLSLv8i8_v8i16, UMLSLv4i16_v4i32, UMLSLv2i32_v2i64>;
-
// Patterns for 64-bit pmull
def : Pat<(int_aarch64_neon_pmull64 V64:$Rn, V64:$Rm),
(PMULLv1i64 V64:$Rn, V64:$Rm)>;
@@ -5392,19 +5444,22 @@ defm FMAXP : SIMDFPPairwiseScalar<0, 0b01111, "fmaxp">;
defm FMINNMP : SIMDFPPairwiseScalar<1, 0b01100, "fminnmp">;
defm FMINP : SIMDFPPairwiseScalar<1, 0b01111, "fminp">;
+// Only the lower half of the result of the inner FADDP is used in the patterns
+// below, so the second operand does not matter. Re-use the first input
+// operand, so no additional dependencies need to be introduced.
let Predicates = [HasFullFP16] in {
def : Pat<(f16 (vecreduce_fadd (v8f16 V128:$Rn))),
(FADDPv2i16p
(EXTRACT_SUBREG
- (FADDPv8f16 (FADDPv8f16 V128:$Rn, (v8f16 (IMPLICIT_DEF))), (v8f16 (IMPLICIT_DEF))),
+ (FADDPv8f16 (FADDPv8f16 V128:$Rn, V128:$Rn), V128:$Rn),
dsub))>;
def : Pat<(f16 (vecreduce_fadd (v4f16 V64:$Rn))),
- (FADDPv2i16p (FADDPv4f16 V64:$Rn, (v4f16 (IMPLICIT_DEF))))>;
+ (FADDPv2i16p (FADDPv4f16 V64:$Rn, V64:$Rn))>;
}
def : Pat<(f32 (vecreduce_fadd (v4f32 V128:$Rn))),
(FADDPv2i32p
(EXTRACT_SUBREG
- (FADDPv4f32 V128:$Rn, (v4f32 (IMPLICIT_DEF))),
+ (FADDPv4f32 V128:$Rn, V128:$Rn),
dsub))>;
def : Pat<(f32 (vecreduce_fadd (v2f32 V64:$Rn))),
(FADDPv2i32p V64:$Rn)>;
@@ -5856,24 +5911,28 @@ defm FMAXV : SIMDFPAcrossLanes<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>;
defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>;
defm FMINV : SIMDFPAcrossLanes<0b01111, 1, "fminv", int_aarch64_neon_fminv>;
-// Patterns for uaddv(uaddlp(x)) ==> uaddlv
-def : Pat<(i32 (vector_extract (v8i16 (insert_subvector undef,
- (v4i16 (AArch64uaddv (v4i16 (AArch64uaddlp (v8i8 V64:$op))))),
- (i64 0))), (i64 0))),
- (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
- (UADDLVv8i8v V64:$op), hsub), ssub)>;
-def : Pat<(i32 (vector_extract (v8i16 (AArch64uaddv (v8i16 (AArch64uaddlp
- (v16i8 V128:$op))))), (i64 0))),
- (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
- (UADDLVv16i8v V128:$op), hsub), ssub)>;
-def : Pat<(v4i32 (AArch64uaddv (v4i32 (AArch64uaddlp (v8i16 V128:$op))))),
- (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (UADDLVv8i16v V128:$op), ssub)>;
-
-// Patterns for addp(uaddlp(x))) ==> uaddlv
-def : Pat<(v2i32 (AArch64uaddv (v2i32 (AArch64uaddlp (v4i16 V64:$op))))),
- (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (UADDLVv4i16v V64:$op), ssub)>;
-def : Pat<(v2i64 (AArch64uaddv (v2i64 (AArch64uaddlp (v4i32 V128:$op))))),
- (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (UADDLVv4i32v V128:$op), dsub)>;
+multiclass SIMDAcrossLaneLongPairIntrinsic<string Opc, SDPatternOperator addlp> {
+ // Patterns for addv(addlp(x)) ==> addlv
+ def : Pat<(i32 (vector_extract (v8i16 (insert_subvector undef,
+ (v4i16 (AArch64uaddv (v4i16 (addlp (v8i8 V64:$op))))),
+ (i64 0))), (i64 0))),
+ (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
+ (!cast<Instruction>(Opc#"v8i8v") V64:$op), hsub), ssub)>;
+ def : Pat<(i32 (vector_extract (v8i16 (AArch64uaddv (v8i16 (addlp (v16i8 V128:$op))))), (i64 0))),
+ (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
+ (!cast<Instruction>(Opc#"v16i8v") V128:$op), hsub), ssub)>;
+ def : Pat<(v4i32 (AArch64uaddv (v4i32 (addlp (v8i16 V128:$op))))),
+ (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v8i16v") V128:$op), ssub)>;
+
+ // Patterns for addp(addlp(x))) ==> addlv
+ def : Pat<(v2i32 (AArch64uaddv (v2i32 (addlp (v4i16 V64:$op))))),
+ (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v4i16v") V64:$op), ssub)>;
+ def : Pat<(v2i64 (AArch64uaddv (v2i64 (addlp (v4i32 V128:$op))))),
+ (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v4i32v") V128:$op), dsub)>;
+}
+
+defm : SIMDAcrossLaneLongPairIntrinsic<"UADDLV", AArch64uaddlp>;
+defm : SIMDAcrossLaneLongPairIntrinsic<"SADDLV", AArch64saddlp>;
// Patterns for across-vector intrinsics, that have a node equivalent, that
// returns a vector (with only the low lane defined) instead of a scalar.
@@ -6185,6 +6244,14 @@ def : Pat<(v8i8 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
let isReMaterializable = 1, isAsCheapAsAMove = 1 in
defm MOVI : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">;
+let Predicates = [HasNEON] in {
+ // Using the MOVI to materialize fp constants.
+ def : Pat<(f32 fpimm32SIMDModImmType4:$in),
+ (EXTRACT_SUBREG (MOVIv2i32 (fpimm32SIMDModImmType4XForm f32:$in),
+ (i32 24)),
+ ssub)>;
+}
+
def : InstAlias<"movi $Vd.4h, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>;
def : InstAlias<"movi $Vd.8h, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
def : InstAlias<"movi $Vd.2s, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>;
@@ -6273,18 +6340,18 @@ let hasSideEffects = 0 in {
// On the other hand, there are quite a few valid combinatorial options due to
// the commutativity of multiplication and the fact that (-x) * y = x * (-y).
defm : SIMDFPIndexedTiedPatterns<"FMLA",
- TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)>>;
+ TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)>>;
defm : SIMDFPIndexedTiedPatterns<"FMLA",
- TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)>>;
+ TriOpFrag<(any_fma node:$MHS, node:$RHS, node:$LHS)>>;
defm : SIMDFPIndexedTiedPatterns<"FMLS",
- TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
+ TriOpFrag<(any_fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
defm : SIMDFPIndexedTiedPatterns<"FMLS",
- TriOpFrag<(fma node:$RHS, (fneg node:$MHS), node:$LHS)> >;
+ TriOpFrag<(any_fma node:$RHS, (fneg node:$MHS), node:$LHS)> >;
defm : SIMDFPIndexedTiedPatterns<"FMLS",
- TriOpFrag<(fma (fneg node:$RHS), node:$MHS, node:$LHS)> >;
+ TriOpFrag<(any_fma (fneg node:$RHS), node:$MHS, node:$LHS)> >;
defm : SIMDFPIndexedTiedPatterns<"FMLS",
- TriOpFrag<(fma (fneg node:$MHS), node:$RHS, node:$LHS)> >;
+ TriOpFrag<(any_fma (fneg node:$MHS), node:$RHS, node:$LHS)> >;
multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> {
// 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit
@@ -6363,22 +6430,22 @@ multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> {
}
defm : FMLSIndexedAfterNegPatterns<
- TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >;
+ TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)> >;
defm : FMLSIndexedAfterNegPatterns<
- TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)> >;
+ TriOpFrag<(any_fma node:$MHS, node:$RHS, node:$LHS)> >;
defm FMULX : SIMDFPIndexed<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>;
-defm FMUL : SIMDFPIndexed<0, 0b1001, "fmul", fmul>;
+defm FMUL : SIMDFPIndexed<0, 0b1001, "fmul", any_fmul>;
-def : Pat<(v2f32 (fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
+def : Pat<(v2f32 (any_fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
(FMULv2i32_indexed V64:$Rn,
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub),
(i64 0))>;
-def : Pat<(v4f32 (fmul V128:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
+def : Pat<(v4f32 (any_fmul V128:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
(FMULv4i32_indexed V128:$Rn,
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub),
(i64 0))>;
-def : Pat<(v2f64 (fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))),
+def : Pat<(v2f64 (any_fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))),
(FMULv2i64_indexed V128:$Rn,
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rm, dsub),
(i64 0))>;
@@ -6397,11 +6464,10 @@ defm MLS : SIMDVectorIndexedHSTied<1, 0b0100, "mls", null_frag>;
defm MUL : SIMDVectorIndexedHS<0, 0b1000, "mul", mul>;
defm SMLAL : SIMDVectorIndexedLongSDTied<0, 0b0010, "smlal",
- TriOpFrag<(add node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>;
+ TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>;
defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl",
- TriOpFrag<(sub node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>;
-defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull",
- int_aarch64_neon_smull>;
+ TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>;
+defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull", AArch64smull>;
defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal",
int_aarch64_neon_sqadd>;
defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl",
@@ -6412,11 +6478,10 @@ defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh",
int_aarch64_neon_sqrdmlsh>;
defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>;
defm UMLAL : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal",
- TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
+ TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>;
defm UMLSL : SIMDVectorIndexedLongSDTied<1, 0b0110, "umlsl",
- TriOpFrag<(sub node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
-defm UMULL : SIMDVectorIndexedLongSD<1, 0b1010, "umull",
- int_aarch64_neon_umull>;
+ TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>;
+defm UMULL : SIMDVectorIndexedLongSD<1, 0b1010, "umull", AArch64umull>;
// A scalar sqdmull with the second operand being a vector lane can be
// handled directly with the indexed instruction encoding.
@@ -6425,22 +6490,6 @@ def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
VectorIndexS:$idx)),
(SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>;
-// Match add node and also treat an 'or' node is as an 'add' if the or'ed operands
-// have no common bits.
-def add_and_or_is_add : PatFrags<(ops node:$lhs, node:$rhs),
- [(add node:$lhs, node:$rhs), (or node:$lhs, node:$rhs)],[{
- if (N->getOpcode() == ISD::ADD)
- return true;
- return CurDAG->haveNoCommonBitsSet(N->getOperand(0), N->getOperand(1));
-}]> {
- let GISelPredicateCode = [{
- // Only handle G_ADD for now. FIXME. build capability to compute whether
- // operands of G_OR have common bits set or not.
- return MI.getOpcode() == TargetOpcode::G_ADD;
- }];
-}
-
-
//----------------------------------------------------------------------------
// AdvSIMD scalar shift instructions
//----------------------------------------------------------------------------
@@ -6480,7 +6529,7 @@ def : Pat<(v1f64 (int_aarch64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn),
def : Pat<(int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm),
(SCVTFs FPR32:$Rn, vecshiftR32:$imm)>;
-// Patterns for FP16 Instrinsics - requires reg copy to/from as i16s not supported.
+// Patterns for FP16 Intrinsics - requires reg copy to/from as i16s not supported.
def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 (sext_inreg FPR32:$Rn, i16)), vecshiftR16:$imm)),
(SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
@@ -6787,7 +6836,7 @@ class SExtLoadi8CVTf32Pat<dag addrmode, dag INST>
dsub)),
0),
ssub)))>,
- Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32]>;
+ Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>;
def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext),
(LDRBroW GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>;
@@ -6807,7 +6856,8 @@ class SExtLoadi16CVTf32Pat<dag addrmode, dag INST>
INST,
hsub),
0),
- ssub)))>, Requires<[NotForCodeSize]>;
+ ssub)))>,
+ Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>;
def : SExtLoadi16CVTf32Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
(LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
@@ -6841,7 +6891,7 @@ class SExtLoadi16CVTf64Pat<dag addrmode, dag INST>
dsub)),
0),
dsub)))>,
- Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32]>;
+ Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>;
def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
(LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
@@ -6860,7 +6910,8 @@ class SExtLoadi32CVTf64Pat<dag addrmode, dag INST>
INST,
ssub),
0),
- dsub)))>, Requires<[NotForCodeSize]>;
+ dsub)))>,
+ Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>;
def : SExtLoadi32CVTf64Pat<(ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext),
(LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext)>;
@@ -7216,14 +7267,6 @@ def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_aarch64_crypto_sha256su0
//----------------------------------------------------------------------------
// FIXME: Like for X86, these should go in their own separate .td file.
-def def32 : PatLeaf<(i32 GPR32:$src), [{
- return isDef32(*N);
-}]>;
-
-// In the case of a 32-bit def that is known to implicitly zero-extend,
-// we can use a SUBREG_TO_REG.
-def : Pat<(i64 (zext def32:$src)), (SUBREG_TO_REG (i64 0), GPR32:$src, sub_32)>;
-
// For an anyext, we don't care what the high bits are, so we can perform an
// INSERT_SUBREF into an IMPLICIT_DEF.
def : Pat<(i64 (anyext GPR32:$src)),
@@ -7387,99 +7430,16 @@ def : Pat<(v4i32 (mulhu V128:$Rn, V128:$Rm)),
//
// Natural vector casts (64 bit)
-def : Pat<(v8i8 (AArch64NvCast (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>;
-def : Pat<(v4i16 (AArch64NvCast (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>;
-def : Pat<(v4f16 (AArch64NvCast (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>;
-def : Pat<(v4bf16 (AArch64NvCast (v2i32 FPR64:$src))), (v4bf16 FPR64:$src)>;
-def : Pat<(v2i32 (AArch64NvCast (v2i32 FPR64:$src))), (v2i32 FPR64:$src)>;
-def : Pat<(v2f32 (AArch64NvCast (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>;
-def : Pat<(v1i64 (AArch64NvCast (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>;
-
-def : Pat<(v8i8 (AArch64NvCast (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>;
-def : Pat<(v4i16 (AArch64NvCast (v4i16 FPR64:$src))), (v4i16 FPR64:$src)>;
-def : Pat<(v4f16 (AArch64NvCast (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>;
-def : Pat<(v4bf16 (AArch64NvCast (v4i16 FPR64:$src))), (v4bf16 FPR64:$src)>;
-def : Pat<(v2i32 (AArch64NvCast (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>;
-def : Pat<(v1i64 (AArch64NvCast (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>;
-
-def : Pat<(v8i8 (AArch64NvCast (v8i8 FPR64:$src))), (v8i8 FPR64:$src)>;
-def : Pat<(v4i16 (AArch64NvCast (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>;
-def : Pat<(v4f16 (AArch64NvCast (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>;
-def : Pat<(v4bf16 (AArch64NvCast (v8i8 FPR64:$src))), (v4bf16 FPR64:$src)>;
-def : Pat<(v2i32 (AArch64NvCast (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>;
-def : Pat<(v2f32 (AArch64NvCast (v8i8 FPR64:$src))), (v2f32 FPR64:$src)>;
-def : Pat<(v1i64 (AArch64NvCast (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>;
-
-def : Pat<(v8i8 (AArch64NvCast (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
-def : Pat<(v4i16 (AArch64NvCast (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
-def : Pat<(v4f16 (AArch64NvCast (f64 FPR64:$src))), (v4f16 FPR64:$src)>;
-def : Pat<(v4bf16 (AArch64NvCast (f64 FPR64:$src))), (v4bf16 FPR64:$src)>;
-def : Pat<(v2i32 (AArch64NvCast (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
-def : Pat<(v2f32 (AArch64NvCast (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
-def : Pat<(v1i64 (AArch64NvCast (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
-def : Pat<(v1f64 (AArch64NvCast (f64 FPR64:$src))), (v1f64 FPR64:$src)>;
-
-def : Pat<(v8i8 (AArch64NvCast (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>;
-def : Pat<(v4i16 (AArch64NvCast (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>;
-def : Pat<(v2i32 (AArch64NvCast (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>;
-def : Pat<(v2f32 (AArch64NvCast (v2f32 FPR64:$src))), (v2f32 FPR64:$src)>;
-def : Pat<(v1i64 (AArch64NvCast (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>;
-def : Pat<(v1f64 (AArch64NvCast (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>;
+foreach VT = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, v1f64, f64 ] in
+ foreach VT2 = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, v1f64, f64 ] in
+ def : Pat<(VT (AArch64NvCast (VT2 FPR64:$src))),
+ (VT FPR64:$src)>;
// Natural vector casts (128 bit)
-def : Pat<(v16i8 (AArch64NvCast (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>;
-def : Pat<(v8i16 (AArch64NvCast (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>;
-def : Pat<(v8f16 (AArch64NvCast (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>;
-def : Pat<(v8bf16 (AArch64NvCast (v4i32 FPR128:$src))), (v8bf16 FPR128:$src)>;
-def : Pat<(v4i32 (AArch64NvCast (v4i32 FPR128:$src))), (v4i32 FPR128:$src)>;
-def : Pat<(v4f32 (AArch64NvCast (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>;
-def : Pat<(v2i64 (AArch64NvCast (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>;
-def : Pat<(v2f64 (AArch64NvCast (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>;
-
-def : Pat<(v16i8 (AArch64NvCast (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>;
-def : Pat<(v8i16 (AArch64NvCast (v8i16 FPR128:$src))), (v8i16 FPR128:$src)>;
-def : Pat<(v8f16 (AArch64NvCast (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>;
-def : Pat<(v8bf16 (AArch64NvCast (v8i16 FPR128:$src))), (v8bf16 FPR128:$src)>;
-def : Pat<(v4i32 (AArch64NvCast (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>;
-def : Pat<(v2i64 (AArch64NvCast (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>;
-def : Pat<(v4f32 (AArch64NvCast (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>;
-def : Pat<(v2f64 (AArch64NvCast (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>;
-
-def : Pat<(v16i8 (AArch64NvCast (v16i8 FPR128:$src))), (v16i8 FPR128:$src)>;
-def : Pat<(v8i16 (AArch64NvCast (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>;
-def : Pat<(v8f16 (AArch64NvCast (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>;
-def : Pat<(v8bf16 (AArch64NvCast (v16i8 FPR128:$src))), (v8bf16 FPR128:$src)>;
-def : Pat<(v4i32 (AArch64NvCast (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>;
-def : Pat<(v2i64 (AArch64NvCast (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>;
-def : Pat<(v4f32 (AArch64NvCast (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>;
-def : Pat<(v2f64 (AArch64NvCast (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>;
-
-def : Pat<(v16i8 (AArch64NvCast (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>;
-def : Pat<(v8i16 (AArch64NvCast (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>;
-def : Pat<(v8f16 (AArch64NvCast (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>;
-def : Pat<(v8bf16 (AArch64NvCast (v2i64 FPR128:$src))), (v8bf16 FPR128:$src)>;
-def : Pat<(v4i32 (AArch64NvCast (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>;
-def : Pat<(v2i64 (AArch64NvCast (v2i64 FPR128:$src))), (v2i64 FPR128:$src)>;
-def : Pat<(v4f32 (AArch64NvCast (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>;
-def : Pat<(v2f64 (AArch64NvCast (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>;
-
-def : Pat<(v16i8 (AArch64NvCast (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>;
-def : Pat<(v8i16 (AArch64NvCast (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>;
-def : Pat<(v4i32 (AArch64NvCast (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>;
-def : Pat<(v4f32 (AArch64NvCast (v4f32 FPR128:$src))), (v4f32 FPR128:$src)>;
-def : Pat<(v2i64 (AArch64NvCast (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>;
-def : Pat<(v8f16 (AArch64NvCast (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>;
-def : Pat<(v8bf16 (AArch64NvCast (v4f32 FPR128:$src))), (v8bf16 FPR128:$src)>;
-def : Pat<(v2f64 (AArch64NvCast (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>;
-
-def : Pat<(v16i8 (AArch64NvCast (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>;
-def : Pat<(v8i16 (AArch64NvCast (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>;
-def : Pat<(v4i32 (AArch64NvCast (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>;
-def : Pat<(v2i64 (AArch64NvCast (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>;
-def : Pat<(v2f64 (AArch64NvCast (v2f64 FPR128:$src))), (v2f64 FPR128:$src)>;
-def : Pat<(v8f16 (AArch64NvCast (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>;
-def : Pat<(v8bf16 (AArch64NvCast (v2f64 FPR128:$src))), (v8bf16 FPR128:$src)>;
-def : Pat<(v4f32 (AArch64NvCast (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>;
+foreach VT = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in
+ foreach VT2 = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in
+ def : Pat<(VT (AArch64NvCast (VT2 FPR128:$src))),
+ (VT FPR128:$src)>;
let Predicates = [IsLE] in {
def : Pat<(v8i8 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
@@ -8093,17 +8053,17 @@ defm : InsertSubvectorUndef<i64>;
def : Pat<(i64 (add (vector_extract (v2i64 FPR128:$Rn), (i64 0)),
(vector_extract (v2i64 FPR128:$Rn), (i64 1)))),
(i64 (ADDPv2i64p (v2i64 FPR128:$Rn)))>;
-def : Pat<(f64 (fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)),
- (vector_extract (v2f64 FPR128:$Rn), (i64 1)))),
+def : Pat<(f64 (any_fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)),
+ (vector_extract (v2f64 FPR128:$Rn), (i64 1)))),
(f64 (FADDPv2i64p (v2f64 FPR128:$Rn)))>;
// vector_extract on 64-bit vectors gets promoted to a 128 bit vector,
// so we match on v4f32 here, not v2f32. This will also catch adding
// the low two lanes of a true v4f32 vector.
-def : Pat<(fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)),
- (vector_extract (v4f32 FPR128:$Rn), (i64 1))),
+def : Pat<(any_fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)),
+ (vector_extract (v4f32 FPR128:$Rn), (i64 1))),
(f32 (FADDPv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>;
-def : Pat<(fadd (vector_extract (v8f16 FPR128:$Rn), (i64 0)),
- (vector_extract (v8f16 FPR128:$Rn), (i64 1))),
+def : Pat<(any_fadd (vector_extract (v8f16 FPR128:$Rn), (i64 0)),
+ (vector_extract (v8f16 FPR128:$Rn), (i64 1))),
(f16 (FADDPv2i16p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>;
// Scalar 64-bit shifts in FPR64 registers.
diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 6aefc1fdb599..eaf39fc0dbb1 100644
--- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -9,6 +9,12 @@
// This file contains a pass that performs load / store related peephole
// optimizations. This pass should be run after register allocation.
//
+// The pass runs after the PrologEpilogInserter where we emit the CFI
+// instructions. In order to preserve the correctness of the unwind informaiton,
+// the pass should not change the order of any two instructions, one of which
+// has the FrameSetup/FrameDestroy flag or, alternatively, apply an add-hoc fix
+// to unwind information.
+//
//===----------------------------------------------------------------------===//
#include "AArch64InstrInfo.h"
@@ -31,6 +37,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
@@ -549,26 +556,6 @@ static unsigned getPostIndexedOpcode(unsigned Opc) {
}
}
-static bool isPairedLdSt(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- default:
- return false;
- case AArch64::LDPSi:
- case AArch64::LDPSWi:
- case AArch64::LDPDi:
- case AArch64::LDPQi:
- case AArch64::LDPWi:
- case AArch64::LDPXi:
- case AArch64::STPSi:
- case AArch64::STPDi:
- case AArch64::STPQi:
- case AArch64::STPWi:
- case AArch64::STPXi:
- case AArch64::STGPi:
- return true;
- }
-}
-
static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI) {
unsigned OpcA = FirstMI.getOpcode();
@@ -603,7 +590,7 @@ static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI) {
// Returns the scale and offset range of pre/post indexed variants of MI.
static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale,
int &MinOffset, int &MaxOffset) {
- bool IsPaired = isPairedLdSt(MI);
+ bool IsPaired = AArch64InstrInfo::isPairedLdSt(MI);
bool IsTagStore = isTagStore(MI);
// ST*G and all paired ldst have the same scale in pre/post-indexed variants
// as in the "unsigned offset" variant.
@@ -625,17 +612,8 @@ static MachineOperand &getLdStRegOp(MachineInstr &MI,
bool IsPreLdSt = AArch64InstrInfo::isPreLdSt(MI);
if (IsPreLdSt)
PairedRegOp += 1;
- unsigned Idx = isPairedLdSt(MI) || IsPreLdSt ? PairedRegOp : 0;
- return MI.getOperand(Idx);
-}
-
-static const MachineOperand &getLdStBaseOp(const MachineInstr &MI) {
- unsigned Idx = isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI) ? 2 : 1;
- return MI.getOperand(Idx);
-}
-
-static const MachineOperand &getLdStOffsetOp(const MachineInstr &MI) {
- unsigned Idx = isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI) ? 3 : 2;
+ unsigned Idx =
+ AArch64InstrInfo::isPairedLdSt(MI) || IsPreLdSt ? PairedRegOp : 0;
return MI.getOperand(Idx);
}
@@ -645,12 +623,14 @@ static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst,
assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st.");
int LoadSize = TII->getMemScale(LoadInst);
int StoreSize = TII->getMemScale(StoreInst);
- int UnscaledStOffset = TII->hasUnscaledLdStOffset(StoreInst)
- ? getLdStOffsetOp(StoreInst).getImm()
- : getLdStOffsetOp(StoreInst).getImm() * StoreSize;
- int UnscaledLdOffset = TII->hasUnscaledLdStOffset(LoadInst)
- ? getLdStOffsetOp(LoadInst).getImm()
- : getLdStOffsetOp(LoadInst).getImm() * LoadSize;
+ int UnscaledStOffset =
+ TII->hasUnscaledLdStOffset(StoreInst)
+ ? AArch64InstrInfo::getLdStOffsetOp(StoreInst).getImm()
+ : AArch64InstrInfo::getLdStOffsetOp(StoreInst).getImm() * StoreSize;
+ int UnscaledLdOffset =
+ TII->hasUnscaledLdStOffset(LoadInst)
+ ? AArch64InstrInfo::getLdStOffsetOp(LoadInst).getImm()
+ : AArch64InstrInfo::getLdStOffsetOp(LoadInst).getImm() * LoadSize;
return (UnscaledStOffset <= UnscaledLdOffset) &&
(UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
}
@@ -729,7 +709,7 @@ static bool isMergeableLdStUpdate(MachineInstr &MI) {
case AArch64::STPWi:
case AArch64::STPXi:
// Make sure this is a reg+imm (as opposed to an address reloc).
- if (!getLdStOffsetOp(MI).isImm())
+ if (!AArch64InstrInfo::getLdStOffsetOp(MI).isImm())
return false;
return true;
@@ -763,17 +743,18 @@ AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
// Also based on MergeForward is from where we copy the base register operand
// so we get the flags compatible with the input code.
const MachineOperand &BaseRegOp =
- MergeForward ? getLdStBaseOp(*MergeMI) : getLdStBaseOp(*I);
+ MergeForward ? AArch64InstrInfo::getLdStBaseOp(*MergeMI)
+ : AArch64InstrInfo::getLdStBaseOp(*I);
// Which register is Rt and which is Rt2 depends on the offset order.
MachineInstr *RtMI;
- if (getLdStOffsetOp(*I).getImm() ==
- getLdStOffsetOp(*MergeMI).getImm() + OffsetStride)
+ if (AArch64InstrInfo::getLdStOffsetOp(*I).getImm() ==
+ AArch64InstrInfo::getLdStOffsetOp(*MergeMI).getImm() + OffsetStride)
RtMI = &*MergeMI;
else
RtMI = &*I;
- int OffsetImm = getLdStOffsetOp(*RtMI).getImm();
+ int OffsetImm = AArch64InstrInfo::getLdStOffsetOp(*RtMI).getImm();
// Change the scaled offset from small to large type.
if (IsScaled) {
assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge");
@@ -923,6 +904,7 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
assert(all_of(MI.operands(),
[this, &RenameReg](const MachineOperand &MOP) {
return !MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
+ MOP.isUndef() ||
!TRI->regsOverlap(MOP.getReg(), *RenameReg);
}) &&
"Rename register used between paired instruction, trashing the "
@@ -936,10 +918,11 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
// Also based on MergeForward is from where we copy the base register operand
// so we get the flags compatible with the input code.
const MachineOperand &BaseRegOp =
- MergeForward ? getLdStBaseOp(*Paired) : getLdStBaseOp(*I);
+ MergeForward ? AArch64InstrInfo::getLdStBaseOp(*Paired)
+ : AArch64InstrInfo::getLdStBaseOp(*I);
- int Offset = getLdStOffsetOp(*I).getImm();
- int PairedOffset = getLdStOffsetOp(*Paired).getImm();
+ int Offset = AArch64InstrInfo::getLdStOffsetOp(*I).getImm();
+ int PairedOffset = AArch64InstrInfo::getLdStOffsetOp(*Paired).getImm();
bool PairedIsUnscaled = TII->hasUnscaledLdStOffset(Paired->getOpcode());
if (IsUnscaled != PairedIsUnscaled) {
// We're trying to pair instructions that differ in how they are scaled. If
@@ -974,7 +957,7 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
RtMI = &*I;
Rt2MI = &*Paired;
}
- int OffsetImm = getLdStOffsetOp(*RtMI).getImm();
+ int OffsetImm = AArch64InstrInfo::getLdStOffsetOp(*RtMI).getImm();
// Scale the immediate offset, if necessary.
if (TII->hasUnscaledLdStOffset(RtMI->getOpcode())) {
assert(!(OffsetImm % TII->getMemScale(*RtMI)) &&
@@ -1132,12 +1115,14 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
assert(IsUnscaled == TII->hasUnscaledLdStOffset(*StoreI) &&
"Unsupported ld/st match");
assert(LoadSize <= StoreSize && "Invalid load size");
- int UnscaledLdOffset = IsUnscaled
- ? getLdStOffsetOp(*LoadI).getImm()
- : getLdStOffsetOp(*LoadI).getImm() * LoadSize;
- int UnscaledStOffset = IsUnscaled
- ? getLdStOffsetOp(*StoreI).getImm()
- : getLdStOffsetOp(*StoreI).getImm() * StoreSize;
+ int UnscaledLdOffset =
+ IsUnscaled
+ ? AArch64InstrInfo::getLdStOffsetOp(*LoadI).getImm()
+ : AArch64InstrInfo::getLdStOffsetOp(*LoadI).getImm() * LoadSize;
+ int UnscaledStOffset =
+ IsUnscaled
+ ? AArch64InstrInfo::getLdStOffsetOp(*StoreI).getImm()
+ : AArch64InstrInfo::getLdStOffsetOp(*StoreI).getImm() * StoreSize;
int Width = LoadSize * 8;
Register DestReg =
IsStoreXReg ? Register(TRI->getMatchingSuperReg(
@@ -1235,7 +1220,7 @@ bool AArch64LoadStoreOpt::findMatchingStore(
MachineBasicBlock::iterator B = I->getParent()->begin();
MachineBasicBlock::iterator MBBI = I;
MachineInstr &LoadMI = *I;
- Register BaseReg = getLdStBaseOp(LoadMI).getReg();
+ Register BaseReg = AArch64InstrInfo::getLdStBaseOp(LoadMI).getReg();
// If the load is the first instruction in the block, there's obviously
// not any matching store.
@@ -1264,7 +1249,8 @@ bool AArch64LoadStoreOpt::findMatchingStore(
// Also we can't handle stores without an immediate offset operand,
// while the operand might be the address for a global variable.
if (MI.mayStore() && isMatchingStore(LoadMI, MI) &&
- BaseReg == getLdStBaseOp(MI).getReg() && getLdStOffsetOp(MI).isImm() &&
+ BaseReg == AArch64InstrInfo::getLdStBaseOp(MI).getReg() &&
+ AArch64InstrInfo::getLdStOffsetOp(MI).isImm() &&
isLdOffsetInRangeOfSt(LoadMI, MI, TII) &&
ModifiedRegUnits.available(getLdStRegOp(MI).getReg())) {
StoreI = MBBI;
@@ -1467,18 +1453,19 @@ canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween,
return true;
}
-// Check if we can find a physical register for renaming. This register must:
-// * not be defined up to FirstMI (checking DefinedInBB)
-// * not used between the MI and the defining instruction of the register to
-// rename (checked using UsedInBetween).
+// Check if we can find a physical register for renaming \p Reg. This register
+// must:
+// * not be defined already in \p DefinedInBB; DefinedInBB must contain all
+// defined registers up to the point where the renamed register will be used,
+// * not used in \p UsedInBetween; UsedInBetween must contain all accessed
+// registers in the range the rename register will be used,
// * is available in all used register classes (checked using RequiredClasses).
static Optional<MCPhysReg> tryToFindRegisterToRename(
- MachineInstr &FirstMI, MachineInstr &MI, LiveRegUnits &DefinedInBB,
+ const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB,
LiveRegUnits &UsedInBetween,
SmallPtrSetImpl<const TargetRegisterClass *> &RequiredClasses,
const TargetRegisterInfo *TRI) {
- auto &MF = *FirstMI.getParent()->getParent();
- MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ const MachineRegisterInfo &RegInfo = MF.getRegInfo();
// Checks if any sub- or super-register of PR is callee saved.
auto AnySubOrSuperRegCalleePreserved = [&MF, TRI](MCPhysReg PR) {
@@ -1499,7 +1486,7 @@ static Optional<MCPhysReg> tryToFindRegisterToRename(
});
};
- auto *RegClass = TRI->getMinimalPhysRegClass(getLdStRegOp(FirstMI).getReg());
+ auto *RegClass = TRI->getMinimalPhysRegClass(Reg);
for (const MCPhysReg &PR : *RegClass) {
if (DefinedInBB.available(PR) && UsedInBetween.available(PR) &&
!RegInfo.isReserved(PR) && !AnySubOrSuperRegCalleePreserved(PR) &&
@@ -1530,8 +1517,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
bool MayLoad = FirstMI.mayLoad();
bool IsUnscaled = TII->hasUnscaledLdStOffset(FirstMI);
Register Reg = getLdStRegOp(FirstMI).getReg();
- Register BaseReg = getLdStBaseOp(FirstMI).getReg();
- int Offset = getLdStOffsetOp(FirstMI).getImm();
+ Register BaseReg = AArch64InstrInfo::getLdStBaseOp(FirstMI).getReg();
+ int Offset = AArch64InstrInfo::getLdStOffsetOp(FirstMI).getImm();
int OffsetStride = IsUnscaled ? TII->getMemScale(FirstMI) : 1;
bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI);
@@ -1566,7 +1553,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
Flags.setSExtIdx(-1);
if (areCandidatesToMergeOrPair(FirstMI, MI, Flags, TII) &&
- getLdStOffsetOp(MI).isImm()) {
+ AArch64InstrInfo::getLdStOffsetOp(MI).isImm()) {
assert(MI.mayLoadOrStore() && "Expected memory operation.");
// If we've found another instruction with the same opcode, check to see
// if the base and offset are compatible with our starting instruction.
@@ -1574,8 +1561,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// check for +1/-1. Make sure to check the new instruction offset is
// actually an immediate and not a symbolic reference destined for
// a relocation.
- Register MIBaseReg = getLdStBaseOp(MI).getReg();
- int MIOffset = getLdStOffsetOp(MI).getImm();
+ Register MIBaseReg = AArch64InstrInfo::getLdStBaseOp(MI).getReg();
+ int MIOffset = AArch64InstrInfo::getLdStOffsetOp(MI).getImm();
bool MIIsUnscaled = TII->hasUnscaledLdStOffset(MI);
if (IsUnscaled != MIIsUnscaled) {
// We're trying to pair instructions that differ in how they are scaled.
@@ -1606,15 +1593,16 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// can't be paired: bail and keep looking.
if (IsPreLdSt) {
bool IsOutOfBounds = MIOffset != TII->getMemScale(MI);
- bool IsBaseRegUsed =
- !UsedRegUnits.available(getLdStBaseOp(MI).getReg());
- bool IsBaseRegModified =
- !ModifiedRegUnits.available(getLdStBaseOp(MI).getReg());
+ bool IsBaseRegUsed = !UsedRegUnits.available(
+ AArch64InstrInfo::getLdStBaseOp(MI).getReg());
+ bool IsBaseRegModified = !ModifiedRegUnits.available(
+ AArch64InstrInfo::getLdStBaseOp(MI).getReg());
// If the stored value and the address of the second instruction is
// the same, it needs to be using the updated register and therefore
// it must not be folded.
- bool IsMIRegTheSame = TRI->regsOverlap(getLdStRegOp(MI).getReg(),
- getLdStBaseOp(MI).getReg());
+ bool IsMIRegTheSame =
+ TRI->regsOverlap(getLdStRegOp(MI).getReg(),
+ AArch64InstrInfo::getLdStBaseOp(MI).getReg());
if (IsOutOfBounds || IsBaseRegUsed || IsBaseRegModified ||
IsMIRegTheSame) {
LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
@@ -1722,8 +1710,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
if (*MaybeCanRename) {
Optional<MCPhysReg> MaybeRenameReg = tryToFindRegisterToRename(
- FirstMI, MI, DefinedInBB, UsedInBetween, RequiredClasses,
- TRI);
+ *FirstMI.getParent()->getParent(), Reg, DefinedInBB,
+ UsedInBetween, RequiredClasses, TRI);
if (MaybeRenameReg) {
Flags.setRenameReg(*MaybeRenameReg);
Flags.setMergeForward(true);
@@ -1760,6 +1748,28 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
return E;
}
+static MachineBasicBlock::iterator
+maybeMoveCFI(MachineInstr &MI, MachineBasicBlock::iterator MaybeCFI) {
+ auto End = MI.getParent()->end();
+ if (MaybeCFI == End ||
+ MaybeCFI->getOpcode() != TargetOpcode::CFI_INSTRUCTION ||
+ !(MI.getFlag(MachineInstr::FrameSetup) ||
+ MI.getFlag(MachineInstr::FrameDestroy)) ||
+ AArch64InstrInfo::getLdStBaseOp(MI).getReg() != AArch64::SP)
+ return End;
+
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ unsigned CFIIndex = MaybeCFI->getOperand(0).getCFIIndex();
+ const MCCFIInstruction &CFI = MF.getFrameInstructions()[CFIIndex];
+ switch (CFI.getOperation()) {
+ case MCCFIInstruction::OpDefCfa:
+ case MCCFIInstruction::OpDefCfaOffset:
+ return MaybeCFI;
+ default:
+ return End;
+ }
+}
+
MachineBasicBlock::iterator
AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Update,
@@ -1769,6 +1779,12 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
"Unexpected base register update instruction to merge!");
MachineBasicBlock::iterator E = I->getParent()->end();
MachineBasicBlock::iterator NextI = next_nodbg(I, E);
+
+ // If updating the SP and the following instruction is CFA offset related CFI
+ // instruction move it after the merged instruction.
+ MachineBasicBlock::iterator CFI =
+ IsPreIdx ? maybeMoveCFI(*Update, next_nodbg(Update, E)) : E;
+
// Return the instruction following the merged instruction, which is
// the instruction following our unmerged load. Unless that's the add/sub
// instruction we're merging, in which case it's the one after that.
@@ -1786,12 +1802,12 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
MachineInstrBuilder MIB;
int Scale, MinOffset, MaxOffset;
getPrePostIndexedMemOpInfo(*I, Scale, MinOffset, MaxOffset);
- if (!isPairedLdSt(*I)) {
+ if (!AArch64InstrInfo::isPairedLdSt(*I)) {
// Non-paired instruction.
MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
.add(getLdStRegOp(*Update))
.add(getLdStRegOp(*I))
- .add(getLdStBaseOp(*I))
+ .add(AArch64InstrInfo::getLdStBaseOp(*I))
.addImm(Value / Scale)
.setMemRefs(I->memoperands())
.setMIFlags(I->mergeFlagsWith(*Update));
@@ -1801,12 +1817,15 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
.add(getLdStRegOp(*Update))
.add(getLdStRegOp(*I, 0))
.add(getLdStRegOp(*I, 1))
- .add(getLdStBaseOp(*I))
+ .add(AArch64InstrInfo::getLdStBaseOp(*I))
.addImm(Value / Scale)
.setMemRefs(I->memoperands())
.setMIFlags(I->mergeFlagsWith(*Update));
}
- (void)MIB;
+ if (CFI != E) {
+ MachineBasicBlock *MBB = I->getParent();
+ MBB->splice(std::next(MIB.getInstr()->getIterator()), MBB, CFI);
+ }
if (IsPreIdx) {
++NumPreFolded;
@@ -1888,8 +1907,9 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
MachineInstr &MemMI = *I;
MachineBasicBlock::iterator MBBI = I;
- Register BaseReg = getLdStBaseOp(MemMI).getReg();
- int MIUnscaledOffset = getLdStOffsetOp(MemMI).getImm() * TII->getMemScale(MemMI);
+ Register BaseReg = AArch64InstrInfo::getLdStBaseOp(MemMI).getReg();
+ int MIUnscaledOffset = AArch64InstrInfo::getLdStOffsetOp(MemMI).getImm() *
+ TII->getMemScale(MemMI);
// Scan forward looking for post-index opportunities. Updating instructions
// can't be formed if the memory instruction doesn't have the offset we're
@@ -1904,7 +1924,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
// behavior in this case unlike normal stores, and always performs writeback
// after reading the source register value.
if (!isTagStore(MemMI) && MemMI.getOpcode() != AArch64::STGPi) {
- bool IsPairedInsn = isPairedLdSt(MemMI);
+ bool IsPairedInsn = AArch64InstrInfo::isPairedLdSt(MemMI);
for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
Register DestReg = getLdStRegOp(MemMI, i).getReg();
if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
@@ -1965,8 +1985,8 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
MachineBasicBlock::iterator MBBI = I;
MachineFunction &MF = *MemMI.getMF();
- Register BaseReg = getLdStBaseOp(MemMI).getReg();
- int Offset = getLdStOffsetOp(MemMI).getImm();
+ Register BaseReg = AArch64InstrInfo::getLdStBaseOp(MemMI).getReg();
+ int Offset = AArch64InstrInfo::getLdStOffsetOp(MemMI).getImm();
// If the load/store is the first instruction in the block, there's obviously
// not any matching update. Ditto if the memory offset isn't zero.
@@ -1975,7 +1995,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
// If the base register overlaps a destination register, we can't
// merge the update.
if (!isTagStore(MemMI)) {
- bool IsPairedInsn = isPairedLdSt(MemMI);
+ bool IsPairedInsn = AArch64InstrInfo::isPairedLdSt(MemMI);
for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
Register DestReg = getLdStRegOp(MemMI, i).getReg();
if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
@@ -2045,7 +2065,7 @@ bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
// Make sure this is a reg+imm.
// FIXME: It is possible to extend it to handle reg+reg cases.
- if (!getLdStOffsetOp(MI).isImm())
+ if (!AArch64InstrInfo::getLdStOffsetOp(MI).isImm())
return false;
// Look backward up to LdStLimit instructions.
@@ -2099,7 +2119,7 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
// range, plus allow an extra one in case we find a later insn that matches
// with Offset-1)
bool IsUnscaled = TII->hasUnscaledLdStOffset(MI);
- int Offset = getLdStOffsetOp(MI).getImm();
+ int Offset = AArch64InstrInfo::getLdStOffsetOp(MI).getImm();
int OffsetStride = IsUnscaled ? TII->getMemScale(MI) : 1;
// Allow one more for offset.
if (Offset > 0)
@@ -2166,7 +2186,8 @@ bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
// The immediate in the load/store is scaled by the size of the memory
// operation. The immediate in the add we're looking for,
// however, is not, so adjust here.
- int UnscaledOffset = getLdStOffsetOp(MI).getImm() * TII->getMemScale(MI);
+ int UnscaledOffset =
+ AArch64InstrInfo::getLdStOffsetOp(MI).getImm() * TII->getMemScale(MI);
// Look forward to try to find a pre-index instruction. For example,
// ldr x1, [x0, #64]
@@ -2268,7 +2289,7 @@ bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
if (skipFunction(Fn.getFunction()))
return false;
- Subtarget = &static_cast<const AArch64Subtarget &>(Fn.getSubtarget());
+ Subtarget = &Fn.getSubtarget<AArch64Subtarget>();
TII = static_cast<const AArch64InstrInfo *>(Subtarget->getInstrInfo());
TRI = Subtarget->getRegisterInfo();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
index 1fc5617b49f6..5c7fb0deecd0 100644
--- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
@@ -60,12 +60,13 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
MachineLoopInfo *MLI;
MachineRegisterInfo *MRI;
+ using OpcodePair = std::pair<unsigned, unsigned>;
template <typename T>
using SplitAndOpcFunc =
- std::function<Optional<unsigned>(T, unsigned, T &, T &)>;
+ std::function<Optional<OpcodePair>(T, unsigned, T &, T &)>;
using BuildMIFunc =
- std::function<void(MachineInstr &, unsigned, unsigned, unsigned, Register,
- Register, Register)>;
+ std::function<void(MachineInstr &, OpcodePair, unsigned, unsigned,
+ Register, Register, Register)>;
/// For instructions where an immediate operand could be split into two
/// separate immediate instructions, use the splitTwoPartImm two handle the
@@ -83,20 +84,19 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
/// %dst = <Instr>ri %tmp (encode half IMM) [...]
template <typename T>
bool splitTwoPartImm(MachineInstr &MI,
- SmallSetVector<MachineInstr *, 8> &ToBeRemoved,
SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr);
bool checkMovImmInstr(MachineInstr &MI, MachineInstr *&MovMI,
MachineInstr *&SubregToRegMI);
template <typename T>
- bool visitADDSUB(unsigned PosOpc, unsigned NegOpc, MachineInstr &MI,
- SmallSetVector<MachineInstr *, 8> &ToBeRemoved);
+ bool visitADDSUB(unsigned PosOpc, unsigned NegOpc, MachineInstr &MI);
template <typename T>
- bool visitAND(unsigned Opc, MachineInstr &MI,
- SmallSetVector<MachineInstr *, 8> &ToBeRemoved);
- bool visitORR(MachineInstr &MI,
- SmallSetVector<MachineInstr *, 8> &ToBeRemoved);
+ bool visitADDSSUBS(OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI);
+
+ template <typename T>
+ bool visitAND(unsigned Opc, MachineInstr &MI);
+ bool visitORR(MachineInstr &MI);
bool runOnMachineFunction(MachineFunction &MF) override;
StringRef getPassName() const override {
@@ -157,8 +157,7 @@ static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
template <typename T>
bool AArch64MIPeepholeOpt::visitAND(
- unsigned Opc, MachineInstr &MI,
- SmallSetVector<MachineInstr *, 8> &ToBeRemoved) {
+ unsigned Opc, MachineInstr &MI) {
// Try below transformation.
//
// MOVi32imm + ANDWrr ==> ANDWri + ANDWri
@@ -170,28 +169,27 @@ bool AArch64MIPeepholeOpt::visitAND(
// mov + and instructions.
return splitTwoPartImm<T>(
- MI, ToBeRemoved,
- [Opc](T Imm, unsigned RegSize, T &Imm0, T &Imm1) -> Optional<unsigned> {
+ MI,
+ [Opc](T Imm, unsigned RegSize, T &Imm0, T &Imm1) -> Optional<OpcodePair> {
if (splitBitmaskImm(Imm, RegSize, Imm0, Imm1))
- return Opc;
+ return std::make_pair(Opc, Opc);
return None;
},
- [&TII = TII](MachineInstr &MI, unsigned Opcode, unsigned Imm0,
+ [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
unsigned Imm1, Register SrcReg, Register NewTmpReg,
Register NewDstReg) {
DebugLoc DL = MI.getDebugLoc();
MachineBasicBlock *MBB = MI.getParent();
- BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg)
+ BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
.addReg(SrcReg)
.addImm(Imm0);
- BuildMI(*MBB, MI, DL, TII->get(Opcode), NewDstReg)
+ BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
.addReg(NewTmpReg)
.addImm(Imm1);
});
}
-bool AArch64MIPeepholeOpt::visitORR(
- MachineInstr &MI, SmallSetVector<MachineInstr *, 8> &ToBeRemoved) {
+bool AArch64MIPeepholeOpt::visitORR(MachineInstr &MI) {
// Check this ORR comes from below zero-extend pattern.
//
// def : Pat<(i64 (zext GPR32:$src)),
@@ -216,19 +214,38 @@ bool AArch64MIPeepholeOpt::visitORR(
// zero-extend, we do not need the zero-extend. Let's check the MI's opcode is
// real AArch64 instruction and if it is not, do not process the opcode
// conservatively.
- if (SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END)
+ if (SrcMI->getOpcode() == TargetOpcode::COPY &&
+ SrcMI->getOperand(1).getReg().isVirtual()) {
+ const TargetRegisterClass *RC =
+ MRI->getRegClass(SrcMI->getOperand(1).getReg());
+
+ // A COPY from an FPR will become a FMOVSWr, so do so now so that we know
+ // that the upper bits are zero.
+ if (RC != &AArch64::FPR32RegClass &&
+ ((RC != &AArch64::FPR64RegClass && RC != &AArch64::FPR128RegClass) ||
+ SrcMI->getOperand(1).getSubReg() != AArch64::ssub))
+ return false;
+ Register CpySrc = SrcMI->getOperand(1).getReg();
+ if (SrcMI->getOperand(1).getSubReg() == AArch64::ssub) {
+ CpySrc = MRI->createVirtualRegister(&AArch64::FPR32RegClass);
+ BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), CpySrc)
+ .add(SrcMI->getOperand(1));
+ }
+ BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(),
+ TII->get(AArch64::FMOVSWr), SrcMI->getOperand(0).getReg())
+ .addReg(CpySrc);
+ SrcMI->eraseFromParent();
+ }
+ else if (SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END)
return false;
Register DefReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(2).getReg();
MRI->replaceRegWith(DefReg, SrcReg);
MRI->clearKillFlags(SrcReg);
- // replaceRegWith changes MI's definition register. Keep it for SSA form until
- // deleting MI.
- MI.getOperand(0).setReg(DefReg);
- ToBeRemoved.insert(&MI);
-
LLVM_DEBUG(dbgs() << "Removed: " << MI << "\n");
+ MI.eraseFromParent();
return true;
}
@@ -255,8 +272,7 @@ static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1) {
template <typename T>
bool AArch64MIPeepholeOpt::visitADDSUB(
- unsigned PosOpc, unsigned NegOpc, MachineInstr &MI,
- SmallSetVector<MachineInstr *, 8> &ToBeRemoved) {
+ unsigned PosOpc, unsigned NegOpc, MachineInstr &MI) {
// Try below transformation.
//
// MOVi32imm + ADDWrr ==> ADDWri + ADDWri
@@ -271,25 +287,65 @@ bool AArch64MIPeepholeOpt::visitADDSUB(
// multiple `mov` + `and/sub` instructions.
return splitTwoPartImm<T>(
- MI, ToBeRemoved,
+ MI,
[PosOpc, NegOpc](T Imm, unsigned RegSize, T &Imm0,
- T &Imm1) -> Optional<unsigned> {
+ T &Imm1) -> Optional<OpcodePair> {
if (splitAddSubImm(Imm, RegSize, Imm0, Imm1))
- return PosOpc;
+ return std::make_pair(PosOpc, PosOpc);
if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1))
- return NegOpc;
+ return std::make_pair(NegOpc, NegOpc);
return None;
},
- [&TII = TII](MachineInstr &MI, unsigned Opcode, unsigned Imm0,
+ [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
unsigned Imm1, Register SrcReg, Register NewTmpReg,
Register NewDstReg) {
DebugLoc DL = MI.getDebugLoc();
MachineBasicBlock *MBB = MI.getParent();
- BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg)
+ BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
.addReg(SrcReg)
.addImm(Imm0)
.addImm(12);
- BuildMI(*MBB, MI, DL, TII->get(Opcode), NewDstReg)
+ BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
+ .addReg(NewTmpReg)
+ .addImm(Imm1)
+ .addImm(0);
+ });
+}
+
+template <typename T>
+bool AArch64MIPeepholeOpt::visitADDSSUBS(
+ OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI) {
+ // Try the same transformation as ADDSUB but with additional requirement
+ // that the condition code usages are only for Equal and Not Equal
+ return splitTwoPartImm<T>(
+ MI,
+ [PosOpcs, NegOpcs, &MI, &TRI = TRI, &MRI = MRI](
+ T Imm, unsigned RegSize, T &Imm0, T &Imm1) -> Optional<OpcodePair> {
+ OpcodePair OP;
+ if (splitAddSubImm(Imm, RegSize, Imm0, Imm1))
+ OP = PosOpcs;
+ else if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1))
+ OP = NegOpcs;
+ else
+ return None;
+ // Check conditional uses last since it is expensive for scanning
+ // proceeding instructions
+ MachineInstr &SrcMI = *MRI->getUniqueVRegDef(MI.getOperand(1).getReg());
+ Optional<UsedNZCV> NZCVUsed = examineCFlagsUse(SrcMI, MI, *TRI);
+ if (!NZCVUsed || NZCVUsed->C || NZCVUsed->V)
+ return None;
+ return OP;
+ },
+ [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
+ unsigned Imm1, Register SrcReg, Register NewTmpReg,
+ Register NewDstReg) {
+ DebugLoc DL = MI.getDebugLoc();
+ MachineBasicBlock *MBB = MI.getParent();
+ BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
+ .addReg(SrcReg)
+ .addImm(Imm0)
+ .addImm(12);
+ BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
.addReg(NewTmpReg)
.addImm(Imm1)
.addImm(0);
@@ -338,7 +394,7 @@ bool AArch64MIPeepholeOpt::checkMovImmInstr(MachineInstr &MI,
template <typename T>
bool AArch64MIPeepholeOpt::splitTwoPartImm(
- MachineInstr &MI, SmallSetVector<MachineInstr *, 8> &ToBeRemoved,
+ MachineInstr &MI,
SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr) {
unsigned RegSize = sizeof(T) * 8;
assert((RegSize == 32 || RegSize == 64) &&
@@ -357,39 +413,63 @@ bool AArch64MIPeepholeOpt::splitTwoPartImm(
// number since it was sign extended when we assign to the 64-bit Imm.
if (SubregToRegMI)
Imm &= 0xFFFFFFFF;
- unsigned Opcode;
+ OpcodePair Opcode;
if (auto R = SplitAndOpc(Imm, RegSize, Imm0, Imm1))
- Opcode = R.getValue();
+ Opcode = *R;
else
return false;
- // Create new ADD/SUB MIs.
+ // Create new MIs using the first and second opcodes. Opcodes might differ for
+ // flag setting operations that should only set flags on second instruction.
+ // NewTmpReg = Opcode.first SrcReg Imm0
+ // NewDstReg = Opcode.second NewTmpReg Imm1
+
+ // Determine register classes for destinations and register operands
MachineFunction *MF = MI.getMF();
- const TargetRegisterClass *RC =
- TII->getRegClass(TII->get(Opcode), 0, TRI, *MF);
- const TargetRegisterClass *ORC =
- TII->getRegClass(TII->get(Opcode), 1, TRI, *MF);
+ const TargetRegisterClass *FirstInstrDstRC =
+ TII->getRegClass(TII->get(Opcode.first), 0, TRI, *MF);
+ const TargetRegisterClass *FirstInstrOperandRC =
+ TII->getRegClass(TII->get(Opcode.first), 1, TRI, *MF);
+ const TargetRegisterClass *SecondInstrDstRC =
+ (Opcode.first == Opcode.second)
+ ? FirstInstrDstRC
+ : TII->getRegClass(TII->get(Opcode.second), 0, TRI, *MF);
+ const TargetRegisterClass *SecondInstrOperandRC =
+ (Opcode.first == Opcode.second)
+ ? FirstInstrOperandRC
+ : TII->getRegClass(TII->get(Opcode.second), 1, TRI, *MF);
+
+ // Get old registers destinations and new register destinations
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
- Register NewTmpReg = MRI->createVirtualRegister(RC);
- Register NewDstReg = MRI->createVirtualRegister(RC);
-
- MRI->constrainRegClass(SrcReg, RC);
- MRI->constrainRegClass(NewTmpReg, ORC);
- MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg));
-
+ Register NewTmpReg = MRI->createVirtualRegister(FirstInstrDstRC);
+ // In the situation that DstReg is not Virtual (likely WZR or XZR), we want to
+ // reuse that same destination register.
+ Register NewDstReg = DstReg.isVirtual()
+ ? MRI->createVirtualRegister(SecondInstrDstRC)
+ : DstReg;
+
+ // Constrain registers based on their new uses
+ MRI->constrainRegClass(SrcReg, FirstInstrOperandRC);
+ MRI->constrainRegClass(NewTmpReg, SecondInstrOperandRC);
+ if (DstReg != NewDstReg)
+ MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg));
+
+ // Call the delegating operation to build the instruction
BuildInstr(MI, Opcode, Imm0, Imm1, SrcReg, NewTmpReg, NewDstReg);
- MRI->replaceRegWith(DstReg, NewDstReg);
// replaceRegWith changes MI's definition register. Keep it for SSA form until
- // deleting MI.
- MI.getOperand(0).setReg(DstReg);
+ // deleting MI. Only if we made a new destination register.
+ if (DstReg != NewDstReg) {
+ MRI->replaceRegWith(DstReg, NewDstReg);
+ MI.getOperand(0).setReg(DstReg);
+ }
// Record the MIs need to be removed.
- ToBeRemoved.insert(&MI);
+ MI.eraseFromParent();
if (SubregToRegMI)
- ToBeRemoved.insert(SubregToRegMI);
- ToBeRemoved.insert(MovMI);
+ SubregToRegMI->eraseFromParent();
+ MovMI->eraseFromParent();
return true;
}
@@ -407,45 +487,57 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
assert(MRI->isSSA() && "Expected to be run on SSA form!");
bool Changed = false;
- SmallSetVector<MachineInstr *, 8> ToBeRemoved;
for (MachineBasicBlock &MBB : MF) {
- for (MachineInstr &MI : MBB) {
+ for (MachineInstr &MI : make_early_inc_range(MBB)) {
switch (MI.getOpcode()) {
default:
break;
case AArch64::ANDWrr:
- Changed = visitAND<uint32_t>(AArch64::ANDWri, MI, ToBeRemoved);
+ Changed = visitAND<uint32_t>(AArch64::ANDWri, MI);
break;
case AArch64::ANDXrr:
- Changed = visitAND<uint64_t>(AArch64::ANDXri, MI, ToBeRemoved);
+ Changed = visitAND<uint64_t>(AArch64::ANDXri, MI);
break;
case AArch64::ORRWrs:
- Changed = visitORR(MI, ToBeRemoved);
+ Changed = visitORR(MI);
break;
case AArch64::ADDWrr:
- Changed = visitADDSUB<uint32_t>(AArch64::ADDWri, AArch64::SUBWri, MI,
- ToBeRemoved);
+ Changed = visitADDSUB<uint32_t>(AArch64::ADDWri, AArch64::SUBWri, MI);
break;
case AArch64::SUBWrr:
- Changed = visitADDSUB<uint32_t>(AArch64::SUBWri, AArch64::ADDWri, MI,
- ToBeRemoved);
+ Changed = visitADDSUB<uint32_t>(AArch64::SUBWri, AArch64::ADDWri, MI);
break;
case AArch64::ADDXrr:
- Changed = visitADDSUB<uint64_t>(AArch64::ADDXri, AArch64::SUBXri, MI,
- ToBeRemoved);
+ Changed = visitADDSUB<uint64_t>(AArch64::ADDXri, AArch64::SUBXri, MI);
break;
case AArch64::SUBXrr:
- Changed = visitADDSUB<uint64_t>(AArch64::SUBXri, AArch64::ADDXri, MI,
- ToBeRemoved);
+ Changed = visitADDSUB<uint64_t>(AArch64::SUBXri, AArch64::ADDXri, MI);
+ break;
+ case AArch64::ADDSWrr:
+ Changed = visitADDSSUBS<uint32_t>({AArch64::ADDWri, AArch64::ADDSWri},
+ {AArch64::SUBWri, AArch64::SUBSWri},
+ MI);
+ break;
+ case AArch64::SUBSWrr:
+ Changed = visitADDSSUBS<uint32_t>({AArch64::SUBWri, AArch64::SUBSWri},
+ {AArch64::ADDWri, AArch64::ADDSWri},
+ MI);
+ break;
+ case AArch64::ADDSXrr:
+ Changed = visitADDSSUBS<uint64_t>({AArch64::ADDXri, AArch64::ADDSXri},
+ {AArch64::SUBXri, AArch64::SUBSXri},
+ MI);
+ break;
+ case AArch64::SUBSXrr:
+ Changed = visitADDSSUBS<uint64_t>({AArch64::SUBXri, AArch64::SUBSXri},
+ {AArch64::ADDXri, AArch64::ADDSXri},
+ MI);
break;
}
}
}
- for (MachineInstr *MI : ToBeRemoved)
- MI->eraseFromParent();
-
return Changed;
}
diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
index 6950675c5d53..a2ab2b855d80 100644
--- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
@@ -15,8 +15,11 @@
#include "AArch64MachineFunctionInfo.h"
#include "AArch64InstrInfo.h"
-#include <llvm/IR/Metadata.h>
-#include <llvm/IR/Module.h>
+#include "AArch64Subtarget.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
using namespace llvm;
@@ -30,7 +33,7 @@ void yaml::AArch64FunctionInfo::mappingImpl(yaml::IO &YamlIO) {
void AArch64FunctionInfo::initializeBaseYamlFields(
const yaml::AArch64FunctionInfo &YamlMFI) {
- if (YamlMFI.HasRedZone.hasValue())
+ if (YamlMFI.HasRedZone)
HasRedZone = YamlMFI.HasRedZone;
}
@@ -77,15 +80,17 @@ static bool ShouldSignWithBKey(const Function &F) {
return Key.equals_insensitive("b_key");
}
-AArch64FunctionInfo::AArch64FunctionInfo(MachineFunction &MF) : MF(MF) {
+AArch64FunctionInfo::AArch64FunctionInfo(MachineFunction &MF_) : MF(&MF_) {
// If we already know that the function doesn't have a redzone, set
// HasRedZone here.
- if (MF.getFunction().hasFnAttribute(Attribute::NoRedZone))
+ if (MF->getFunction().hasFnAttribute(Attribute::NoRedZone))
HasRedZone = false;
- const Function &F = MF.getFunction();
+ const Function &F = MF->getFunction();
std::tie(SignReturnAddress, SignReturnAddressAll) = GetSignReturnAddress(F);
SignWithBKey = ShouldSignWithBKey(F);
+ // TODO: skip functions that have no instrumented allocas for optimization
+ IsMTETagged = F.hasFnAttribute(Attribute::SanitizeMemTag);
if (!F.hasFnAttribute("branch-target-enforcement")) {
if (const auto *BTE = mdconst::extract_or_null<ConstantInt>(
@@ -101,6 +106,15 @@ AArch64FunctionInfo::AArch64FunctionInfo(MachineFunction &MF) : MF(MF) {
BranchTargetEnforcement = BTIEnable.equals_insensitive("true");
}
+MachineFunctionInfo *AArch64FunctionInfo::clone(
+ BumpPtrAllocator &Allocator, MachineFunction &DestMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
+ const {
+ AArch64FunctionInfo *InfoClone = DestMF.cloneInfo<AArch64FunctionInfo>(*this);
+ InfoClone->MF = &DestMF;
+ return InfoClone;
+}
+
bool AArch64FunctionInfo::shouldSignReturnAddress(bool SpillsLR) const {
if (!SignReturnAddress)
return false;
@@ -111,6 +125,27 @@ bool AArch64FunctionInfo::shouldSignReturnAddress(bool SpillsLR) const {
bool AArch64FunctionInfo::shouldSignReturnAddress() const {
return shouldSignReturnAddress(llvm::any_of(
- MF.getFrameInfo().getCalleeSavedInfo(),
+ MF->getFrameInfo().getCalleeSavedInfo(),
[](const auto &Info) { return Info.getReg() == AArch64::LR; }));
}
+
+bool AArch64FunctionInfo::needsDwarfUnwindInfo() const {
+ if (!NeedsDwarfUnwindInfo)
+ NeedsDwarfUnwindInfo = MF->needsFrameMoves() &&
+ !MF->getTarget().getMCAsmInfo()->usesWindowsCFI();
+
+ return *NeedsDwarfUnwindInfo;
+}
+
+bool AArch64FunctionInfo::needsAsyncDwarfUnwindInfo() const {
+ if (!NeedsAsyncDwarfUnwindInfo) {
+ const Function &F = MF->getFunction();
+ // The check got "minsize" is because epilogue unwind info is not emitted
+ // (yet) for homogeneous epilogues, outlined functions, and functions
+ // outlined from.
+ NeedsAsyncDwarfUnwindInfo = needsDwarfUnwindInfo() &&
+ F.getUWTableKind() == UWTableKind::Async &&
+ !F.hasMinSize();
+ }
+ return *NeedsAsyncDwarfUnwindInfo;
+}
diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index e5e08e6c00d6..f070f989a5b7 100644
--- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -19,6 +19,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MIRYamlMapping.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCLinkerOptimizationHint.h"
@@ -36,7 +37,7 @@ class MachineInstr;
/// contains private AArch64-specific information for each MachineFunction.
class AArch64FunctionInfo final : public MachineFunctionInfo {
/// Backreference to the machine function.
- MachineFunction &MF;
+ MachineFunction *MF;
/// Number of bytes of arguments this function has on the stack. If the callee
/// is expected to restore the argument stack this should be a multiple of 16,
@@ -115,7 +116,8 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
/// SRetReturnReg - sret lowering includes returning the value of the
/// returned struct in a register. This field holds the virtual register into
/// which the sret argument is passed.
- unsigned SRetReturnReg = 0;
+ Register SRetReturnReg;
+
/// SVE stack size (for predicates and data vectors) are maintained here
/// rather than in FrameInfo, as the placement and Stack IDs are target
/// specific.
@@ -173,9 +175,29 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
/// The stack slot where the Swift asynchronous context is stored.
int SwiftAsyncContextFrameIdx = std::numeric_limits<int>::max();
+ bool IsMTETagged = false;
+
+ /// The function has Scalable Vector or Scalable Predicate register argument
+ /// or return type
+ bool IsSVECC = false;
+
+ /// True if the function need unwind information.
+ mutable Optional<bool> NeedsDwarfUnwindInfo;
+
+ /// True if the function need asynchronous unwind information.
+ mutable Optional<bool> NeedsAsyncDwarfUnwindInfo;
+
public:
explicit AArch64FunctionInfo(MachineFunction &MF);
+ MachineFunctionInfo *
+ clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
+ const override;
+
+ bool isSVECC() const { return IsSVECC; };
+ void setIsSVECC(bool s) { IsSVECC = s; };
+
void initializeBaseYamlFields(const yaml::AArch64FunctionInfo &YamlMFI);
unsigned getBytesInStackArgArea() const { return BytesInStackArgArea; }
@@ -395,6 +417,7 @@ public:
bool shouldSignReturnAddress(bool SpillsLR) const;
bool shouldSignWithBKey() const { return SignWithBKey; }
+ bool isMTETagged() const { return IsMTETagged; }
bool branchTargetEnforcement() const { return BranchTargetEnforcement; }
@@ -408,6 +431,9 @@ public:
}
int getSwiftAsyncContextFrameIdx() const { return SwiftAsyncContextFrameIdx; }
+ bool needsDwarfUnwindInfo() const;
+ bool needsAsyncDwarfUnwindInfo() const;
+
private:
// Hold the lists of LOHs.
MILOHContainer LOHContainerSet;
diff --git a/llvm/lib/Target/AArch64/AArch64MachineScheduler.cpp b/llvm/lib/Target/AArch64/AArch64MachineScheduler.cpp
new file mode 100644
index 000000000000..6c8845ee8598
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64MachineScheduler.cpp
@@ -0,0 +1,82 @@
+//===- AArch64MachineScheduler.cpp - MI Scheduler for AArch64 -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64MachineScheduler.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64Subtarget.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+
+using namespace llvm;
+
+static bool needReorderStoreMI(const MachineInstr *MI) {
+ if (!MI)
+ return false;
+
+ switch (MI->getOpcode()) {
+ default:
+ return false;
+ case AArch64::STURQi:
+ case AArch64::STRQui:
+ if (MI->getMF()->getSubtarget<AArch64Subtarget>().isStoreAddressAscend())
+ return false;
+ LLVM_FALLTHROUGH;
+ case AArch64::STPQi:
+ return AArch64InstrInfo::getLdStOffsetOp(*MI).isImm();
+ }
+
+ return false;
+}
+
+// Return true if two stores with same base address may overlap writes
+static bool mayOverlapWrite(const MachineInstr &MI0, const MachineInstr &MI1,
+ int64_t &Off0, int64_t &Off1) {
+ const MachineOperand &Base0 = AArch64InstrInfo::getLdStBaseOp(MI0);
+ const MachineOperand &Base1 = AArch64InstrInfo::getLdStBaseOp(MI1);
+
+ // May overlapping writes if two store instructions without same base
+ if (!Base0.isIdenticalTo(Base1))
+ return true;
+
+ int StoreSize0 = AArch64InstrInfo::getMemScale(MI0);
+ int StoreSize1 = AArch64InstrInfo::getMemScale(MI1);
+ Off0 = AArch64InstrInfo::hasUnscaledLdStOffset(MI0.getOpcode())
+ ? AArch64InstrInfo::getLdStOffsetOp(MI0).getImm()
+ : AArch64InstrInfo::getLdStOffsetOp(MI0).getImm() * StoreSize0;
+ Off1 = AArch64InstrInfo::hasUnscaledLdStOffset(MI1.getOpcode())
+ ? AArch64InstrInfo::getLdStOffsetOp(MI1).getImm()
+ : AArch64InstrInfo::getLdStOffsetOp(MI1).getImm() * StoreSize1;
+
+ const MachineInstr &MI = (Off0 < Off1) ? MI0 : MI1;
+ int Multiples = AArch64InstrInfo::isPairedLdSt(MI) ? 2 : 1;
+ int StoreSize = AArch64InstrInfo::getMemScale(MI) * Multiples;
+
+ return llabs(Off0 - Off1) < StoreSize;
+}
+
+bool AArch64PostRASchedStrategy::tryCandidate(SchedCandidate &Cand,
+ SchedCandidate &TryCand) {
+ bool OriginalResult = PostGenericScheduler::tryCandidate(Cand, TryCand);
+
+ if (Cand.isValid()) {
+ MachineInstr *Instr0 = TryCand.SU->getInstr();
+ MachineInstr *Instr1 = Cand.SU->getInstr();
+
+ if (!needReorderStoreMI(Instr0) || !needReorderStoreMI(Instr1))
+ return OriginalResult;
+
+ int64_t Off0, Off1;
+ // With the same base address and non-overlapping writes.
+ if (!mayOverlapWrite(*Instr0, *Instr1, Off0, Off1)) {
+ TryCand.Reason = NodeOrder;
+ // Order them by ascending offsets.
+ return Off0 < Off1;
+ }
+ }
+
+ return OriginalResult;
+}
diff --git a/llvm/lib/Target/AArch64/AArch64MachineScheduler.h b/llvm/lib/Target/AArch64/AArch64MachineScheduler.h
new file mode 100644
index 000000000000..23df015986d1
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64MachineScheduler.h
@@ -0,0 +1,33 @@
+//===- AArch64MachineScheduler.h - Custom AArch64 MI scheduler --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Custom AArch64 MI scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64MACHINESCHEDULER_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64MACHINESCHEDULER_H
+
+#include "llvm/CodeGen/MachineScheduler.h"
+
+namespace llvm {
+
+/// A MachineSchedStrategy implementation for AArch64 post RA scheduling.
+class AArch64PostRASchedStrategy : public PostGenericScheduler {
+public:
+ AArch64PostRASchedStrategy(const MachineSchedContext *C) :
+ PostGenericScheduler(C) {}
+
+protected:
+ bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand) override;
+};
+
+} // end namespace llvm
+
+#endif
+
diff --git a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
index e8217eaf6ed5..c7657f37d16d 100644
--- a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
@@ -157,16 +157,19 @@ static bool isCryptoEORPair(const MachineInstr *FirstMI,
return false;
}
-/// Literal generation.
-static bool isLiteralsPair(const MachineInstr *FirstMI,
- const MachineInstr &SecondMI) {
+static bool isAdrpAddPair(const MachineInstr *FirstMI,
+ const MachineInstr &SecondMI) {
// Assume the 1st instr to be a wildcard if it is unspecified.
-
- // PC relative address.
if ((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::ADRP) &&
SecondMI.getOpcode() == AArch64::ADDXri)
return true;
+ return false;
+}
+/// Literal generation.
+static bool isLiteralsPair(const MachineInstr *FirstMI,
+ const MachineInstr &SecondMI) {
+ // Assume the 1st instr to be a wildcard if it is unspecified.
// 32 bit immediate.
if ((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::MOVZWi) &&
(SecondMI.getOpcode() == AArch64::MOVKWi &&
@@ -397,6 +400,8 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
return true;
if (ST.hasFuseCryptoEOR() && isCryptoEORPair(FirstMI, SecondMI))
return true;
+ if (ST.hasFuseAdrpAdd() && isAdrpAddPair(FirstMI, SecondMI))
+ return true;
if (ST.hasFuseLiterals() && isLiteralsPair(FirstMI, SecondMI))
return true;
if (ST.hasFuseAddress() && isAddressLdStPair(FirstMI, SecondMI))
diff --git a/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h b/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h
index f443cd03935c..4555f1a3ebb0 100644
--- a/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h
+++ b/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h
@@ -14,6577 +14,6608 @@
#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64PERFECTSHUFFLE_H
#define LLVM_LIB_TARGET_AARCH64_AARCH64PERFECTSHUFFLE_H
+#include "llvm/ADT/ArrayRef.h"
+
// 31 entries have cost 0
-// 242 entries have cost 1
-// 1447 entries have cost 2
-// 3602 entries have cost 3
-// 1237 entries have cost 4
-// 2 entries have cost 5
+// 756 entries have cost 1
+// 3690 entries have cost 2
+// 2084 entries have cost 3
// This table is 6561*4 = 26244 bytes in size.
-static const unsigned PerfectShuffleTable[6561+1] = {
- 135053414U, // <0,0,0,0>: Cost 1 vdup0 LHS
- 1543503974U, // <0,0,0,1>: Cost 2 vext2 <0,0,0,0>, LHS
- 2618572962U, // <0,0,0,2>: Cost 3 vext2 <0,2,0,0>, <0,2,0,0>
- 2568054923U, // <0,0,0,3>: Cost 3 vext1 <3,0,0,0>, <3,0,0,0>
- 1476398390U, // <0,0,0,4>: Cost 2 vext1 <0,0,0,0>, RHS
- 2550140624U, // <0,0,0,5>: Cost 3 vext1 <0,0,0,0>, <5,1,7,3>
- 2550141434U, // <0,0,0,6>: Cost 3 vext1 <0,0,0,0>, <6,2,7,3>
- 2591945711U, // <0,0,0,7>: Cost 3 vext1 <7,0,0,0>, <7,0,0,0>
- 135053414U, // <0,0,0,u>: Cost 1 vdup0 LHS
- 2886516736U, // <0,0,1,0>: Cost 3 vzipl LHS, <0,0,0,0>
- 1812775014U, // <0,0,1,1>: Cost 2 vzipl LHS, LHS
- 1618133094U, // <0,0,1,2>: Cost 2 vext3 <1,2,3,0>, LHS
- 2625209292U, // <0,0,1,3>: Cost 3 vext2 <1,3,0,0>, <1,3,0,0>
- 2886558034U, // <0,0,1,4>: Cost 3 vzipl LHS, <0,4,1,5>
- 2617246864U, // <0,0,1,5>: Cost 3 vext2 <0,0,0,0>, <1,5,3,7>
- 3659723031U, // <0,0,1,6>: Cost 4 vext1 <6,0,0,1>, <6,0,0,1>
- 2591953904U, // <0,0,1,7>: Cost 3 vext1 <7,0,0,1>, <7,0,0,1>
- 1812775581U, // <0,0,1,u>: Cost 2 vzipl LHS, LHS
- 3020734464U, // <0,0,2,0>: Cost 3 vtrnl LHS, <0,0,0,0>
- 3020734474U, // <0,0,2,1>: Cost 3 vtrnl LHS, <0,0,1,1>
- 1946992742U, // <0,0,2,2>: Cost 2 vtrnl LHS, LHS
- 2631181989U, // <0,0,2,3>: Cost 3 vext2 <2,3,0,0>, <2,3,0,0>
- 3020734668U, // <0,0,2,4>: Cost 3 vtrnl LHS, <0,2,4,6>
- 3826550569U, // <0,0,2,5>: Cost 4 vuzpl <0,2,0,2>, <2,4,5,6>
- 2617247674U, // <0,0,2,6>: Cost 3 vext2 <0,0,0,0>, <2,6,3,7>
- 2591962097U, // <0,0,2,7>: Cost 3 vext1 <7,0,0,2>, <7,0,0,2>
- 1946992796U, // <0,0,2,u>: Cost 2 vtrnl LHS, LHS
- 2635163787U, // <0,0,3,0>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0>
- 2686419196U, // <0,0,3,1>: Cost 3 vext3 <0,3,1,0>, <0,3,1,0>
- 2686492933U, // <0,0,3,2>: Cost 3 vext3 <0,3,2,0>, <0,3,2,0>
- 2617248156U, // <0,0,3,3>: Cost 3 vext2 <0,0,0,0>, <3,3,3,3>
- 2617248258U, // <0,0,3,4>: Cost 3 vext2 <0,0,0,0>, <3,4,5,6>
- 3826551298U, // <0,0,3,5>: Cost 4 vuzpl <0,2,0,2>, <3,4,5,6>
- 3690990200U, // <0,0,3,6>: Cost 4 vext2 <0,0,0,0>, <3,6,0,7>
- 3713551042U, // <0,0,3,7>: Cost 4 vext2 <3,7,0,0>, <3,7,0,0>
- 2635163787U, // <0,0,3,u>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0>
- 2617248658U, // <0,0,4,0>: Cost 3 vext2 <0,0,0,0>, <4,0,5,1>
- 2888450150U, // <0,0,4,1>: Cost 3 vzipl <0,4,1,5>, LHS
- 3021570150U, // <0,0,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS
- 3641829519U, // <0,0,4,3>: Cost 4 vext1 <3,0,0,4>, <3,0,0,4>
- 3021570252U, // <0,0,4,4>: Cost 3 vtrnl <0,2,4,6>, <0,2,4,6>
- 1543507254U, // <0,0,4,5>: Cost 2 vext2 <0,0,0,0>, RHS
- 2752810294U, // <0,0,4,6>: Cost 3 vuzpl <0,2,0,2>, RHS
- 3786998152U, // <0,0,4,7>: Cost 4 vext3 <4,7,5,0>, <0,4,7,5>
- 1543507497U, // <0,0,4,u>: Cost 2 vext2 <0,0,0,0>, RHS
- 2684354972U, // <0,0,5,0>: Cost 3 vext3 <0,0,0,0>, <0,5,0,7>
- 2617249488U, // <0,0,5,1>: Cost 3 vext2 <0,0,0,0>, <5,1,7,3>
- 3765617070U, // <0,0,5,2>: Cost 4 vext3 <1,2,3,0>, <0,5,2,7>
- 3635865780U, // <0,0,5,3>: Cost 4 vext1 <2,0,0,5>, <3,0,4,5>
- 2617249734U, // <0,0,5,4>: Cost 3 vext2 <0,0,0,0>, <5,4,7,6>
- 2617249796U, // <0,0,5,5>: Cost 3 vext2 <0,0,0,0>, <5,5,5,5>
- 2718712274U, // <0,0,5,6>: Cost 3 vext3 <5,6,7,0>, <0,5,6,7>
- 2617249960U, // <0,0,5,7>: Cost 3 vext2 <0,0,0,0>, <5,7,5,7>
- 2720039396U, // <0,0,5,u>: Cost 3 vext3 <5,u,7,0>, <0,5,u,7>
- 2684355053U, // <0,0,6,0>: Cost 3 vext3 <0,0,0,0>, <0,6,0,7>
- 3963609190U, // <0,0,6,1>: Cost 4 vzipl <0,6,2,7>, LHS
- 2617250298U, // <0,0,6,2>: Cost 3 vext2 <0,0,0,0>, <6,2,7,3>
- 3796435464U, // <0,0,6,3>: Cost 4 vext3 <6,3,7,0>, <0,6,3,7>
- 3659762998U, // <0,0,6,4>: Cost 4 vext1 <6,0,0,6>, RHS
- 3659763810U, // <0,0,6,5>: Cost 4 vext1 <6,0,0,6>, <5,6,7,0>
- 2617250616U, // <0,0,6,6>: Cost 3 vext2 <0,0,0,0>, <6,6,6,6>
- 2657727309U, // <0,0,6,7>: Cost 3 vext2 <6,7,0,0>, <6,7,0,0>
- 2658390942U, // <0,0,6,u>: Cost 3 vext2 <6,u,0,0>, <6,u,0,0>
- 2659054575U, // <0,0,7,0>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0>
- 3635880854U, // <0,0,7,1>: Cost 4 vext1 <2,0,0,7>, <1,2,3,0>
- 3635881401U, // <0,0,7,2>: Cost 4 vext1 <2,0,0,7>, <2,0,0,7>
- 3734787298U, // <0,0,7,3>: Cost 4 vext2 <7,3,0,0>, <7,3,0,0>
- 2617251174U, // <0,0,7,4>: Cost 3 vext2 <0,0,0,0>, <7,4,5,6>
- 3659772002U, // <0,0,7,5>: Cost 4 vext1 <6,0,0,7>, <5,6,7,0>
- 3659772189U, // <0,0,7,6>: Cost 4 vext1 <6,0,0,7>, <6,0,0,7>
- 2617251436U, // <0,0,7,7>: Cost 3 vext2 <0,0,0,0>, <7,7,7,7>
- 2659054575U, // <0,0,7,u>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0>
- 135053414U, // <0,0,u,0>: Cost 1 vdup0 LHS
- 1817419878U, // <0,0,u,1>: Cost 2 vzipl LHS, LHS
- 1947435110U, // <0,0,u,2>: Cost 2 vtrnl LHS, LHS
- 2568120467U, // <0,0,u,3>: Cost 3 vext1 <3,0,0,u>, <3,0,0,u>
- 1476463926U, // <0,0,u,4>: Cost 2 vext1 <0,0,0,u>, RHS
- 1543510170U, // <0,0,u,5>: Cost 2 vext2 <0,0,0,0>, RHS
- 2752813210U, // <0,0,u,6>: Cost 3 vuzpl <0,2,0,2>, RHS
- 2592011255U, // <0,0,u,7>: Cost 3 vext1 <7,0,0,u>, <7,0,0,u>
- 135053414U, // <0,0,u,u>: Cost 1 vdup0 LHS
- 2618581002U, // <0,1,0,0>: Cost 3 vext2 <0,2,0,1>, <0,0,1,1>
- 1557446758U, // <0,1,0,1>: Cost 2 vext2 <2,3,0,1>, LHS
- 2618581155U, // <0,1,0,2>: Cost 3 vext2 <0,2,0,1>, <0,2,0,1>
- 2690548468U, // <0,1,0,3>: Cost 3 vext3 <1,0,3,0>, <1,0,3,0>
- 2626543954U, // <0,1,0,4>: Cost 3 vext2 <1,5,0,1>, <0,4,1,5>
- 4094985216U, // <0,1,0,5>: Cost 4 vtrnl <0,2,0,2>, <1,3,5,7>
- 2592019278U, // <0,1,0,6>: Cost 3 vext1 <7,0,1,0>, <6,7,0,1>
- 2592019448U, // <0,1,0,7>: Cost 3 vext1 <7,0,1,0>, <7,0,1,0>
- 1557447325U, // <0,1,0,u>: Cost 2 vext2 <2,3,0,1>, LHS
- 1476476938U, // <0,1,1,0>: Cost 2 vext1 <0,0,1,1>, <0,0,1,1>
- 2886517556U, // <0,1,1,1>: Cost 3 vzipl LHS, <1,1,1,1>
- 2886517654U, // <0,1,1,2>: Cost 3 vzipl LHS, <1,2,3,0>
- 2886517720U, // <0,1,1,3>: Cost 3 vzipl LHS, <1,3,1,3>
- 1476480310U, // <0,1,1,4>: Cost 2 vext1 <0,0,1,1>, RHS
- 2886558864U, // <0,1,1,5>: Cost 3 vzipl LHS, <1,5,3,7>
- 2550223354U, // <0,1,1,6>: Cost 3 vext1 <0,0,1,1>, <6,2,7,3>
- 2550223856U, // <0,1,1,7>: Cost 3 vext1 <0,0,1,1>, <7,0,0,1>
- 1476482862U, // <0,1,1,u>: Cost 2 vext1 <0,0,1,1>, LHS
- 1494401126U, // <0,1,2,0>: Cost 2 vext1 <3,0,1,2>, LHS
- 3020735284U, // <0,1,2,1>: Cost 3 vtrnl LHS, <1,1,1,1>
- 2562172349U, // <0,1,2,2>: Cost 3 vext1 <2,0,1,2>, <2,0,1,2>
- 835584U, // <0,1,2,3>: Cost 0 copy LHS
- 1494404406U, // <0,1,2,4>: Cost 2 vext1 <3,0,1,2>, RHS
- 3020735488U, // <0,1,2,5>: Cost 3 vtrnl LHS, <1,3,5,7>
- 2631190458U, // <0,1,2,6>: Cost 3 vext2 <2,3,0,1>, <2,6,3,7>
- 1518294010U, // <0,1,2,7>: Cost 2 vext1 <7,0,1,2>, <7,0,1,2>
- 835584U, // <0,1,2,u>: Cost 0 copy LHS
- 2692318156U, // <0,1,3,0>: Cost 3 vext3 <1,3,0,0>, <1,3,0,0>
- 2691875800U, // <0,1,3,1>: Cost 3 vext3 <1,2,3,0>, <1,3,1,3>
- 2691875806U, // <0,1,3,2>: Cost 3 vext3 <1,2,3,0>, <1,3,2,0>
- 2692539367U, // <0,1,3,3>: Cost 3 vext3 <1,3,3,0>, <1,3,3,0>
- 2562182454U, // <0,1,3,4>: Cost 3 vext1 <2,0,1,3>, RHS
- 2691875840U, // <0,1,3,5>: Cost 3 vext3 <1,2,3,0>, <1,3,5,7>
- 2692760578U, // <0,1,3,6>: Cost 3 vext3 <1,3,6,0>, <1,3,6,0>
- 2639817411U, // <0,1,3,7>: Cost 3 vext2 <3,7,0,1>, <3,7,0,1>
- 2691875863U, // <0,1,3,u>: Cost 3 vext3 <1,2,3,0>, <1,3,u,3>
- 2568159334U, // <0,1,4,0>: Cost 3 vext1 <3,0,1,4>, LHS
- 4095312692U, // <0,1,4,1>: Cost 4 vtrnl <0,2,4,6>, <1,1,1,1>
- 2568160934U, // <0,1,4,2>: Cost 3 vext1 <3,0,1,4>, <2,3,0,1>
- 2568161432U, // <0,1,4,3>: Cost 3 vext1 <3,0,1,4>, <3,0,1,4>
- 2568162614U, // <0,1,4,4>: Cost 3 vext1 <3,0,1,4>, RHS
- 1557450038U, // <0,1,4,5>: Cost 2 vext2 <2,3,0,1>, RHS
- 2754235702U, // <0,1,4,6>: Cost 3 vuzpl <0,4,1,5>, RHS
- 2592052220U, // <0,1,4,7>: Cost 3 vext1 <7,0,1,4>, <7,0,1,4>
- 1557450281U, // <0,1,4,u>: Cost 2 vext2 <2,3,0,1>, RHS
- 3765617775U, // <0,1,5,0>: Cost 4 vext3 <1,2,3,0>, <1,5,0,1>
- 2647781007U, // <0,1,5,1>: Cost 3 vext2 <5,1,0,1>, <5,1,0,1>
- 3704934138U, // <0,1,5,2>: Cost 4 vext2 <2,3,0,1>, <5,2,3,0>
- 2691875984U, // <0,1,5,3>: Cost 3 vext3 <1,2,3,0>, <1,5,3,7>
- 2657734598U, // <0,1,5,4>: Cost 3 vext2 <6,7,0,1>, <5,4,7,6>
- 2650435539U, // <0,1,5,5>: Cost 3 vext2 <5,5,0,1>, <5,5,0,1>
- 2651099172U, // <0,1,5,6>: Cost 3 vext2 <5,6,0,1>, <5,6,0,1>
- 2651762805U, // <0,1,5,7>: Cost 3 vext2 <5,7,0,1>, <5,7,0,1>
- 2691876029U, // <0,1,5,u>: Cost 3 vext3 <1,2,3,0>, <1,5,u,7>
- 2592063590U, // <0,1,6,0>: Cost 3 vext1 <7,0,1,6>, LHS
- 3765617871U, // <0,1,6,1>: Cost 4 vext3 <1,2,3,0>, <1,6,1,7>
- 2654417337U, // <0,1,6,2>: Cost 3 vext2 <6,2,0,1>, <6,2,0,1>
- 3765617889U, // <0,1,6,3>: Cost 4 vext3 <1,2,3,0>, <1,6,3,7>
- 2592066870U, // <0,1,6,4>: Cost 3 vext1 <7,0,1,6>, RHS
- 3765617907U, // <0,1,6,5>: Cost 4 vext3 <1,2,3,0>, <1,6,5,7>
- 2657071869U, // <0,1,6,6>: Cost 3 vext2 <6,6,0,1>, <6,6,0,1>
- 1583993678U, // <0,1,6,7>: Cost 2 vext2 <6,7,0,1>, <6,7,0,1>
- 1584657311U, // <0,1,6,u>: Cost 2 vext2 <6,u,0,1>, <6,u,0,1>
- 2657735672U, // <0,1,7,0>: Cost 3 vext2 <6,7,0,1>, <7,0,1,0>
- 2657735808U, // <0,1,7,1>: Cost 3 vext2 <6,7,0,1>, <7,1,7,1>
- 2631193772U, // <0,1,7,2>: Cost 3 vext2 <2,3,0,1>, <7,2,3,0>
- 2661053667U, // <0,1,7,3>: Cost 3 vext2 <7,3,0,1>, <7,3,0,1>
- 2657736038U, // <0,1,7,4>: Cost 3 vext2 <6,7,0,1>, <7,4,5,6>
- 3721524621U, // <0,1,7,5>: Cost 4 vext2 <5,1,0,1>, <7,5,1,0>
- 2657736158U, // <0,1,7,6>: Cost 3 vext2 <6,7,0,1>, <7,6,1,0>
- 2657736300U, // <0,1,7,7>: Cost 3 vext2 <6,7,0,1>, <7,7,7,7>
- 2657736322U, // <0,1,7,u>: Cost 3 vext2 <6,7,0,1>, <7,u,1,2>
- 1494450278U, // <0,1,u,0>: Cost 2 vext1 <3,0,1,u>, LHS
- 1557452590U, // <0,1,u,1>: Cost 2 vext2 <2,3,0,1>, LHS
- 2754238254U, // <0,1,u,2>: Cost 3 vuzpl <0,4,1,5>, LHS
- 835584U, // <0,1,u,3>: Cost 0 copy LHS
- 1494453558U, // <0,1,u,4>: Cost 2 vext1 <3,0,1,u>, RHS
- 1557452954U, // <0,1,u,5>: Cost 2 vext2 <2,3,0,1>, RHS
- 2754238618U, // <0,1,u,6>: Cost 3 vuzpl <0,4,1,5>, RHS
- 1518343168U, // <0,1,u,7>: Cost 2 vext1 <7,0,1,u>, <7,0,1,u>
- 835584U, // <0,1,u,u>: Cost 0 copy LHS
- 2752299008U, // <0,2,0,0>: Cost 3 vuzpl LHS, <0,0,0,0>
- 1544847462U, // <0,2,0,1>: Cost 2 vext2 <0,2,0,2>, LHS
- 1678557286U, // <0,2,0,2>: Cost 2 vuzpl LHS, LHS
- 2696521165U, // <0,2,0,3>: Cost 3 vext3 <2,0,3,0>, <2,0,3,0>
- 2752340172U, // <0,2,0,4>: Cost 3 vuzpl LHS, <0,2,4,6>
- 2691876326U, // <0,2,0,5>: Cost 3 vext3 <1,2,3,0>, <2,0,5,7>
- 2618589695U, // <0,2,0,6>: Cost 3 vext2 <0,2,0,2>, <0,6,2,7>
- 2592093185U, // <0,2,0,7>: Cost 3 vext1 <7,0,2,0>, <7,0,2,0>
- 1678557340U, // <0,2,0,u>: Cost 2 vuzpl LHS, LHS
- 2618589942U, // <0,2,1,0>: Cost 3 vext2 <0,2,0,2>, <1,0,3,2>
- 2752299828U, // <0,2,1,1>: Cost 3 vuzpl LHS, <1,1,1,1>
- 2886518376U, // <0,2,1,2>: Cost 3 vzipl LHS, <2,2,2,2>
- 2752299766U, // <0,2,1,3>: Cost 3 vuzpl LHS, <1,0,3,2>
- 2550295862U, // <0,2,1,4>: Cost 3 vext1 <0,0,2,1>, RHS
- 2752340992U, // <0,2,1,5>: Cost 3 vuzpl LHS, <1,3,5,7>
- 2886559674U, // <0,2,1,6>: Cost 3 vzipl LHS, <2,6,3,7>
- 3934208106U, // <0,2,1,7>: Cost 4 vuzpr <7,0,1,2>, <0,1,2,7>
- 2752340771U, // <0,2,1,u>: Cost 3 vuzpl LHS, <1,0,u,2>
- 1476558868U, // <0,2,2,0>: Cost 2 vext1 <0,0,2,2>, <0,0,2,2>
- 2226628029U, // <0,2,2,1>: Cost 3 vrev <2,0,1,2>
- 2752300648U, // <0,2,2,2>: Cost 3 vuzpl LHS, <2,2,2,2>
- 3020736114U, // <0,2,2,3>: Cost 3 vtrnl LHS, <2,2,3,3>
- 1476562230U, // <0,2,2,4>: Cost 2 vext1 <0,0,2,2>, RHS
- 2550304464U, // <0,2,2,5>: Cost 3 vext1 <0,0,2,2>, <5,1,7,3>
- 2618591162U, // <0,2,2,6>: Cost 3 vext2 <0,2,0,2>, <2,6,3,7>
- 2550305777U, // <0,2,2,7>: Cost 3 vext1 <0,0,2,2>, <7,0,0,2>
- 1476564782U, // <0,2,2,u>: Cost 2 vext1 <0,0,2,2>, LHS
- 2618591382U, // <0,2,3,0>: Cost 3 vext2 <0,2,0,2>, <3,0,1,2>
- 2752301206U, // <0,2,3,1>: Cost 3 vuzpl LHS, <3,0,1,2>
- 3826043121U, // <0,2,3,2>: Cost 4 vuzpl LHS, <3,1,2,3>
- 2752301468U, // <0,2,3,3>: Cost 3 vuzpl LHS, <3,3,3,3>
- 2618591746U, // <0,2,3,4>: Cost 3 vext2 <0,2,0,2>, <3,4,5,6>
- 2752301570U, // <0,2,3,5>: Cost 3 vuzpl LHS, <3,4,5,6>
- 3830688102U, // <0,2,3,6>: Cost 4 vuzpl LHS, <3,2,6,3>
- 2698807012U, // <0,2,3,7>: Cost 3 vext3 <2,3,7,0>, <2,3,7,0>
- 2752301269U, // <0,2,3,u>: Cost 3 vuzpl LHS, <3,0,u,2>
- 2562261094U, // <0,2,4,0>: Cost 3 vext1 <2,0,2,4>, LHS
- 4095313828U, // <0,2,4,1>: Cost 4 vtrnl <0,2,4,6>, <2,6,1,3>
- 2226718152U, // <0,2,4,2>: Cost 3 vrev <2,0,2,4>
- 2568235169U, // <0,2,4,3>: Cost 3 vext1 <3,0,2,4>, <3,0,2,4>
- 2562264374U, // <0,2,4,4>: Cost 3 vext1 <2,0,2,4>, RHS
- 1544850742U, // <0,2,4,5>: Cost 2 vext2 <0,2,0,2>, RHS
- 1678560566U, // <0,2,4,6>: Cost 2 vuzpl LHS, RHS
- 2592125957U, // <0,2,4,7>: Cost 3 vext1 <7,0,2,4>, <7,0,2,4>
- 1678560584U, // <0,2,4,u>: Cost 2 vuzpl LHS, RHS
- 2691876686U, // <0,2,5,0>: Cost 3 vext3 <1,2,3,0>, <2,5,0,7>
- 2618592976U, // <0,2,5,1>: Cost 3 vext2 <0,2,0,2>, <5,1,7,3>
- 3765618528U, // <0,2,5,2>: Cost 4 vext3 <1,2,3,0>, <2,5,2,7>
- 3765618536U, // <0,2,5,3>: Cost 4 vext3 <1,2,3,0>, <2,5,3,6>
- 2618593222U, // <0,2,5,4>: Cost 3 vext2 <0,2,0,2>, <5,4,7,6>
- 2752303108U, // <0,2,5,5>: Cost 3 vuzpl LHS, <5,5,5,5>
- 2618593378U, // <0,2,5,6>: Cost 3 vext2 <0,2,0,2>, <5,6,7,0>
- 2824785206U, // <0,2,5,7>: Cost 3 vuzpr <1,0,3,2>, RHS
- 2824785207U, // <0,2,5,u>: Cost 3 vuzpr <1,0,3,2>, RHS
- 2752303950U, // <0,2,6,0>: Cost 3 vuzpl LHS, <6,7,0,1>
- 3830690081U, // <0,2,6,1>: Cost 4 vuzpl LHS, <6,0,1,2>
- 2618593786U, // <0,2,6,2>: Cost 3 vext2 <0,2,0,2>, <6,2,7,3>
- 2691876794U, // <0,2,6,3>: Cost 3 vext3 <1,2,3,0>, <2,6,3,7>
- 2752303990U, // <0,2,6,4>: Cost 3 vuzpl LHS, <6,7,4,5>
- 3830690445U, // <0,2,6,5>: Cost 4 vuzpl LHS, <6,4,5,6>
- 2752303928U, // <0,2,6,6>: Cost 3 vuzpl LHS, <6,6,6,6>
- 2657743695U, // <0,2,6,7>: Cost 3 vext2 <6,7,0,2>, <6,7,0,2>
- 2691876839U, // <0,2,6,u>: Cost 3 vext3 <1,2,3,0>, <2,6,u,7>
- 2659070961U, // <0,2,7,0>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2>
- 2659734594U, // <0,2,7,1>: Cost 3 vext2 <7,1,0,2>, <7,1,0,2>
- 3734140051U, // <0,2,7,2>: Cost 4 vext2 <7,2,0,2>, <7,2,0,2>
- 2701166596U, // <0,2,7,3>: Cost 3 vext3 <2,7,3,0>, <2,7,3,0>
- 2662389094U, // <0,2,7,4>: Cost 3 vext2 <7,5,0,2>, <7,4,5,6>
- 2662389126U, // <0,2,7,5>: Cost 3 vext2 <7,5,0,2>, <7,5,0,2>
- 3736794583U, // <0,2,7,6>: Cost 4 vext2 <7,6,0,2>, <7,6,0,2>
- 2752304748U, // <0,2,7,7>: Cost 3 vuzpl LHS, <7,7,7,7>
- 2659070961U, // <0,2,7,u>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2>
- 1476608026U, // <0,2,u,0>: Cost 2 vext1 <0,0,2,u>, <0,0,2,u>
- 1544853294U, // <0,2,u,1>: Cost 2 vext2 <0,2,0,2>, LHS
- 1678563118U, // <0,2,u,2>: Cost 2 vuzpl LHS, LHS
- 3021178482U, // <0,2,u,3>: Cost 3 vtrnl LHS, <2,2,3,3>
- 1476611382U, // <0,2,u,4>: Cost 2 vext1 <0,0,2,u>, RHS
- 1544853658U, // <0,2,u,5>: Cost 2 vext2 <0,2,0,2>, RHS
- 1678563482U, // <0,2,u,6>: Cost 2 vuzpl LHS, RHS
- 2824785449U, // <0,2,u,7>: Cost 3 vuzpr <1,0,3,2>, RHS
- 1678563172U, // <0,2,u,u>: Cost 2 vuzpl LHS, LHS
- 2556329984U, // <0,3,0,0>: Cost 3 vext1 <1,0,3,0>, <0,0,0,0>
- 2686421142U, // <0,3,0,1>: Cost 3 vext3 <0,3,1,0>, <3,0,1,2>
- 2562303437U, // <0,3,0,2>: Cost 3 vext1 <2,0,3,0>, <2,0,3,0>
- 4094986652U, // <0,3,0,3>: Cost 4 vtrnl <0,2,0,2>, <3,3,3,3>
- 2556333366U, // <0,3,0,4>: Cost 3 vext1 <1,0,3,0>, RHS
- 4094986754U, // <0,3,0,5>: Cost 4 vtrnl <0,2,0,2>, <3,4,5,6>
- 3798796488U, // <0,3,0,6>: Cost 4 vext3 <6,7,3,0>, <3,0,6,7>
- 3776530634U, // <0,3,0,7>: Cost 4 vext3 <3,0,7,0>, <3,0,7,0>
- 2556335918U, // <0,3,0,u>: Cost 3 vext1 <1,0,3,0>, LHS
- 2886518934U, // <0,3,1,0>: Cost 3 vzipl LHS, <3,0,1,2>
- 2556338933U, // <0,3,1,1>: Cost 3 vext1 <1,0,3,1>, <1,0,3,1>
- 2691877105U, // <0,3,1,2>: Cost 3 vext3 <1,2,3,0>, <3,1,2,3>
- 2886519196U, // <0,3,1,3>: Cost 3 vzipl LHS, <3,3,3,3>
- 2886519298U, // <0,3,1,4>: Cost 3 vzipl LHS, <3,4,5,6>
- 4095740418U, // <0,3,1,5>: Cost 4 vtrnl <0,3,1,4>, <3,4,5,6>
- 3659944242U, // <0,3,1,6>: Cost 4 vext1 <6,0,3,1>, <6,0,3,1>
- 3769600286U, // <0,3,1,7>: Cost 4 vext3 <1,u,3,0>, <3,1,7,3>
- 2886519582U, // <0,3,1,u>: Cost 3 vzipl LHS, <3,u,1,2>
- 1482604646U, // <0,3,2,0>: Cost 2 vext1 <1,0,3,2>, LHS
- 1482605302U, // <0,3,2,1>: Cost 2 vext1 <1,0,3,2>, <1,0,3,2>
- 2556348008U, // <0,3,2,2>: Cost 3 vext1 <1,0,3,2>, <2,2,2,2>
- 3020736924U, // <0,3,2,3>: Cost 3 vtrnl LHS, <3,3,3,3>
- 1482607926U, // <0,3,2,4>: Cost 2 vext1 <1,0,3,2>, RHS
- 3020737026U, // <0,3,2,5>: Cost 3 vtrnl LHS, <3,4,5,6>
- 2598154746U, // <0,3,2,6>: Cost 3 vext1 <u,0,3,2>, <6,2,7,3>
- 2598155258U, // <0,3,2,7>: Cost 3 vext1 <u,0,3,2>, <7,0,1,2>
- 1482610478U, // <0,3,2,u>: Cost 2 vext1 <1,0,3,2>, LHS
- 3692341398U, // <0,3,3,0>: Cost 4 vext2 <0,2,0,3>, <3,0,1,2>
- 2635851999U, // <0,3,3,1>: Cost 3 vext2 <3,1,0,3>, <3,1,0,3>
- 3636069840U, // <0,3,3,2>: Cost 4 vext1 <2,0,3,3>, <2,0,3,3>
- 2691877276U, // <0,3,3,3>: Cost 3 vext3 <1,2,3,0>, <3,3,3,3>
- 3961522690U, // <0,3,3,4>: Cost 4 vzipl <0,3,1,4>, <3,4,5,6>
- 3826797058U, // <0,3,3,5>: Cost 4 vuzpl <0,2,3,5>, <3,4,5,6>
- 3703622282U, // <0,3,3,6>: Cost 4 vext2 <2,1,0,3>, <3,6,2,7>
- 3769600452U, // <0,3,3,7>: Cost 4 vext3 <1,u,3,0>, <3,3,7,7>
- 2640497430U, // <0,3,3,u>: Cost 3 vext2 <3,u,0,3>, <3,u,0,3>
- 3962194070U, // <0,3,4,0>: Cost 4 vzipl <0,4,1,5>, <3,0,1,2>
- 2232617112U, // <0,3,4,1>: Cost 3 vrev <3,0,1,4>
- 2232690849U, // <0,3,4,2>: Cost 3 vrev <3,0,2,4>
- 4095314332U, // <0,3,4,3>: Cost 4 vtrnl <0,2,4,6>, <3,3,3,3>
- 3962194434U, // <0,3,4,4>: Cost 4 vzipl <0,4,1,5>, <3,4,5,6>
- 2691877378U, // <0,3,4,5>: Cost 3 vext3 <1,2,3,0>, <3,4,5,6>
- 3826765110U, // <0,3,4,6>: Cost 4 vuzpl <0,2,3,1>, RHS
- 3665941518U, // <0,3,4,7>: Cost 4 vext1 <7,0,3,4>, <7,0,3,4>
- 2691877405U, // <0,3,4,u>: Cost 3 vext3 <1,2,3,0>, <3,4,u,6>
- 3630112870U, // <0,3,5,0>: Cost 4 vext1 <1,0,3,5>, LHS
- 3630113526U, // <0,3,5,1>: Cost 4 vext1 <1,0,3,5>, <1,0,3,2>
- 4035199734U, // <0,3,5,2>: Cost 4 vzipr <1,4,0,5>, <1,0,3,2>
- 3769600578U, // <0,3,5,3>: Cost 4 vext3 <1,u,3,0>, <3,5,3,7>
- 2232846516U, // <0,3,5,4>: Cost 3 vrev <3,0,4,5>
- 3779037780U, // <0,3,5,5>: Cost 4 vext3 <3,4,5,0>, <3,5,5,7>
- 2718714461U, // <0,3,5,6>: Cost 3 vext3 <5,6,7,0>, <3,5,6,7>
- 2706106975U, // <0,3,5,7>: Cost 3 vext3 <3,5,7,0>, <3,5,7,0>
- 2233141464U, // <0,3,5,u>: Cost 3 vrev <3,0,u,5>
- 2691877496U, // <0,3,6,0>: Cost 3 vext3 <1,2,3,0>, <3,6,0,7>
- 3727511914U, // <0,3,6,1>: Cost 4 vext2 <6,1,0,3>, <6,1,0,3>
- 3765619338U, // <0,3,6,2>: Cost 4 vext3 <1,2,3,0>, <3,6,2,7>
- 3765619347U, // <0,3,6,3>: Cost 4 vext3 <1,2,3,0>, <3,6,3,7>
- 3765987996U, // <0,3,6,4>: Cost 4 vext3 <1,2,u,0>, <3,6,4,7>
- 3306670270U, // <0,3,6,5>: Cost 4 vrev <3,0,5,6>
- 3792456365U, // <0,3,6,6>: Cost 4 vext3 <5,6,7,0>, <3,6,6,6>
- 2706770608U, // <0,3,6,7>: Cost 3 vext3 <3,6,7,0>, <3,6,7,0>
- 2706844345U, // <0,3,6,u>: Cost 3 vext3 <3,6,u,0>, <3,6,u,0>
- 3769600707U, // <0,3,7,0>: Cost 4 vext3 <1,u,3,0>, <3,7,0,1>
- 2659742787U, // <0,3,7,1>: Cost 3 vext2 <7,1,0,3>, <7,1,0,3>
- 3636102612U, // <0,3,7,2>: Cost 4 vext1 <2,0,3,7>, <2,0,3,7>
- 3769600740U, // <0,3,7,3>: Cost 4 vext3 <1,u,3,0>, <3,7,3,7>
- 3769600747U, // <0,3,7,4>: Cost 4 vext3 <1,u,3,0>, <3,7,4,5>
- 3769600758U, // <0,3,7,5>: Cost 4 vext3 <1,u,3,0>, <3,7,5,7>
- 3659993400U, // <0,3,7,6>: Cost 4 vext1 <6,0,3,7>, <6,0,3,7>
- 3781176065U, // <0,3,7,7>: Cost 4 vext3 <3,7,7,0>, <3,7,7,0>
- 2664388218U, // <0,3,7,u>: Cost 3 vext2 <7,u,0,3>, <7,u,0,3>
- 1482653798U, // <0,3,u,0>: Cost 2 vext1 <1,0,3,u>, LHS
- 1482654460U, // <0,3,u,1>: Cost 2 vext1 <1,0,3,u>, <1,0,3,u>
- 2556397160U, // <0,3,u,2>: Cost 3 vext1 <1,0,3,u>, <2,2,2,2>
- 3021179292U, // <0,3,u,3>: Cost 3 vtrnl LHS, <3,3,3,3>
- 1482657078U, // <0,3,u,4>: Cost 2 vext1 <1,0,3,u>, RHS
- 3021179394U, // <0,3,u,5>: Cost 3 vtrnl LHS, <3,4,5,6>
- 2598203898U, // <0,3,u,6>: Cost 3 vext1 <u,0,3,u>, <6,2,7,3>
- 2708097874U, // <0,3,u,7>: Cost 3 vext3 <3,u,7,0>, <3,u,7,0>
- 1482659630U, // <0,3,u,u>: Cost 2 vext1 <1,0,3,u>, LHS
- 2617278468U, // <0,4,0,0>: Cost 3 vext2 <0,0,0,4>, <0,0,0,4>
- 2618605670U, // <0,4,0,1>: Cost 3 vext2 <0,2,0,4>, LHS
- 2618605734U, // <0,4,0,2>: Cost 3 vext2 <0,2,0,4>, <0,2,0,4>
- 3642091695U, // <0,4,0,3>: Cost 4 vext1 <3,0,4,0>, <3,0,4,0>
- 2753134796U, // <0,4,0,4>: Cost 3 vuzpl <0,2,4,6>, <0,2,4,6>
- 2718714770U, // <0,4,0,5>: Cost 3 vext3 <5,6,7,0>, <4,0,5,1>
- 3021245750U, // <0,4,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS
- 3665982483U, // <0,4,0,7>: Cost 4 vext1 <7,0,4,0>, <7,0,4,0>
- 3021245768U, // <0,4,0,u>: Cost 3 vtrnl <0,2,0,2>, RHS
- 2568355942U, // <0,4,1,0>: Cost 3 vext1 <3,0,4,1>, LHS
- 3692348212U, // <0,4,1,1>: Cost 4 vext2 <0,2,0,4>, <1,1,1,1>
- 3692348310U, // <0,4,1,2>: Cost 4 vext2 <0,2,0,4>, <1,2,3,0>
- 2568358064U, // <0,4,1,3>: Cost 3 vext1 <3,0,4,1>, <3,0,4,1>
- 2568359222U, // <0,4,1,4>: Cost 3 vext1 <3,0,4,1>, RHS
- 1812778294U, // <0,4,1,5>: Cost 2 vzipl LHS, RHS
- 3022671158U, // <0,4,1,6>: Cost 3 vtrnl <0,4,1,5>, RHS
- 2592248852U, // <0,4,1,7>: Cost 3 vext1 <7,0,4,1>, <7,0,4,1>
- 1812778537U, // <0,4,1,u>: Cost 2 vzipl LHS, RHS
- 2568364134U, // <0,4,2,0>: Cost 3 vext1 <3,0,4,2>, LHS
- 2238573423U, // <0,4,2,1>: Cost 3 vrev <4,0,1,2>
- 3692349032U, // <0,4,2,2>: Cost 4 vext2 <0,2,0,4>, <2,2,2,2>
- 2631214761U, // <0,4,2,3>: Cost 3 vext2 <2,3,0,4>, <2,3,0,4>
- 2568367414U, // <0,4,2,4>: Cost 3 vext1 <3,0,4,2>, RHS
- 2887028022U, // <0,4,2,5>: Cost 3 vzipl <0,2,0,2>, RHS
- 1946996022U, // <0,4,2,6>: Cost 2 vtrnl LHS, RHS
- 2592257045U, // <0,4,2,7>: Cost 3 vext1 <7,0,4,2>, <7,0,4,2>
- 1946996040U, // <0,4,2,u>: Cost 2 vtrnl LHS, RHS
- 3692349590U, // <0,4,3,0>: Cost 4 vext2 <0,2,0,4>, <3,0,1,2>
- 3826878614U, // <0,4,3,1>: Cost 4 vuzpl <0,2,4,6>, <3,0,1,2>
- 3826878625U, // <0,4,3,2>: Cost 4 vuzpl <0,2,4,6>, <3,0,2,4>
- 3692349852U, // <0,4,3,3>: Cost 4 vext2 <0,2,0,4>, <3,3,3,3>
- 3692349954U, // <0,4,3,4>: Cost 4 vext2 <0,2,0,4>, <3,4,5,6>
- 3826878978U, // <0,4,3,5>: Cost 4 vuzpl <0,2,4,6>, <3,4,5,6>
- 4095200566U, // <0,4,3,6>: Cost 4 vtrnl <0,2,3,1>, RHS
- 3713583814U, // <0,4,3,7>: Cost 4 vext2 <3,7,0,4>, <3,7,0,4>
- 3692350238U, // <0,4,3,u>: Cost 4 vext2 <0,2,0,4>, <3,u,1,2>
- 2550464552U, // <0,4,4,0>: Cost 3 vext1 <0,0,4,4>, <0,0,4,4>
- 3962194914U, // <0,4,4,1>: Cost 4 vzipl <0,4,1,5>, <4,1,5,0>
- 3693677631U, // <0,4,4,2>: Cost 4 vext2 <0,4,0,4>, <4,2,6,3>
- 3642124467U, // <0,4,4,3>: Cost 4 vext1 <3,0,4,4>, <3,0,4,4>
- 2718715088U, // <0,4,4,4>: Cost 3 vext3 <5,6,7,0>, <4,4,4,4>
- 2618608950U, // <0,4,4,5>: Cost 3 vext2 <0,2,0,4>, RHS
- 2753137974U, // <0,4,4,6>: Cost 3 vuzpl <0,2,4,6>, RHS
- 3666015255U, // <0,4,4,7>: Cost 4 vext1 <7,0,4,4>, <7,0,4,4>
- 2618609193U, // <0,4,4,u>: Cost 3 vext2 <0,2,0,4>, RHS
- 2568388710U, // <0,4,5,0>: Cost 3 vext1 <3,0,4,5>, LHS
- 2568389526U, // <0,4,5,1>: Cost 3 vext1 <3,0,4,5>, <1,2,3,0>
- 3636159963U, // <0,4,5,2>: Cost 4 vext1 <2,0,4,5>, <2,0,4,5>
- 2568390836U, // <0,4,5,3>: Cost 3 vext1 <3,0,4,5>, <3,0,4,5>
- 2568391990U, // <0,4,5,4>: Cost 3 vext1 <3,0,4,5>, RHS
- 2718715180U, // <0,4,5,5>: Cost 3 vext3 <5,6,7,0>, <4,5,5,6>
- 1618136374U, // <0,4,5,6>: Cost 2 vext3 <1,2,3,0>, RHS
- 2592281624U, // <0,4,5,7>: Cost 3 vext1 <7,0,4,5>, <7,0,4,5>
- 1618136392U, // <0,4,5,u>: Cost 2 vext3 <1,2,3,0>, RHS
- 2550480938U, // <0,4,6,0>: Cost 3 vext1 <0,0,4,6>, <0,0,4,6>
- 3826880801U, // <0,4,6,1>: Cost 4 vuzpl <0,2,4,6>, <6,0,1,2>
- 2562426332U, // <0,4,6,2>: Cost 3 vext1 <2,0,4,6>, <2,0,4,6>
- 3786190181U, // <0,4,6,3>: Cost 4 vext3 <4,6,3,0>, <4,6,3,0>
- 2718715252U, // <0,4,6,4>: Cost 3 vext3 <5,6,7,0>, <4,6,4,6>
- 3826881165U, // <0,4,6,5>: Cost 4 vuzpl <0,2,4,6>, <6,4,5,6>
- 2712669568U, // <0,4,6,6>: Cost 3 vext3 <4,6,6,0>, <4,6,6,0>
- 2657760081U, // <0,4,6,7>: Cost 3 vext2 <6,7,0,4>, <6,7,0,4>
- 2718715284U, // <0,4,6,u>: Cost 3 vext3 <5,6,7,0>, <4,6,u,2>
- 3654090854U, // <0,4,7,0>: Cost 4 vext1 <5,0,4,7>, LHS
- 3934229326U, // <0,4,7,1>: Cost 4 vuzpr <7,0,1,4>, <6,7,0,1>
- 3734156437U, // <0,4,7,2>: Cost 4 vext2 <7,2,0,4>, <7,2,0,4>
- 3734820070U, // <0,4,7,3>: Cost 4 vext2 <7,3,0,4>, <7,3,0,4>
- 3654094134U, // <0,4,7,4>: Cost 4 vext1 <5,0,4,7>, RHS
- 2713259464U, // <0,4,7,5>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0>
- 2713333201U, // <0,4,7,6>: Cost 3 vext3 <4,7,6,0>, <4,7,6,0>
- 3654095866U, // <0,4,7,7>: Cost 4 vext1 <5,0,4,7>, <7,0,1,2>
- 2713259464U, // <0,4,7,u>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0>
- 2568413286U, // <0,4,u,0>: Cost 3 vext1 <3,0,4,u>, LHS
- 2618611502U, // <0,4,u,1>: Cost 3 vext2 <0,2,0,4>, LHS
- 2753140526U, // <0,4,u,2>: Cost 3 vuzpl <0,2,4,6>, LHS
- 2568415415U, // <0,4,u,3>: Cost 3 vext1 <3,0,4,u>, <3,0,4,u>
- 2568416566U, // <0,4,u,4>: Cost 3 vext1 <3,0,4,u>, RHS
- 1817423158U, // <0,4,u,5>: Cost 2 vzipl LHS, RHS
- 1947438390U, // <0,4,u,6>: Cost 2 vtrnl LHS, RHS
- 2592306203U, // <0,4,u,7>: Cost 3 vext1 <7,0,4,u>, <7,0,4,u>
- 1947438408U, // <0,4,u,u>: Cost 2 vtrnl LHS, RHS
- 3630219264U, // <0,5,0,0>: Cost 4 vext1 <1,0,5,0>, <0,0,0,0>
- 2625912934U, // <0,5,0,1>: Cost 3 vext2 <1,4,0,5>, LHS
- 3692355748U, // <0,5,0,2>: Cost 4 vext2 <0,2,0,5>, <0,2,0,2>
- 3693019384U, // <0,5,0,3>: Cost 4 vext2 <0,3,0,5>, <0,3,0,5>
- 3630222646U, // <0,5,0,4>: Cost 4 vext1 <1,0,5,0>, RHS
- 3699655062U, // <0,5,0,5>: Cost 4 vext2 <1,4,0,5>, <0,5,0,1>
- 2718715508U, // <0,5,0,6>: Cost 3 vext3 <5,6,7,0>, <5,0,6,1>
- 3087011126U, // <0,5,0,7>: Cost 3 vtrnr <0,0,0,0>, RHS
- 2625913501U, // <0,5,0,u>: Cost 3 vext2 <1,4,0,5>, LHS
- 1500659814U, // <0,5,1,0>: Cost 2 vext1 <4,0,5,1>, LHS
- 2886520528U, // <0,5,1,1>: Cost 3 vzipl LHS, <5,1,7,3>
- 2574403176U, // <0,5,1,2>: Cost 3 vext1 <4,0,5,1>, <2,2,2,2>
- 2574403734U, // <0,5,1,3>: Cost 3 vext1 <4,0,5,1>, <3,0,1,2>
- 1500662674U, // <0,5,1,4>: Cost 2 vext1 <4,0,5,1>, <4,0,5,1>
- 2886520836U, // <0,5,1,5>: Cost 3 vzipl LHS, <5,5,5,5>
- 2886520930U, // <0,5,1,6>: Cost 3 vzipl LHS, <5,6,7,0>
- 2718715600U, // <0,5,1,7>: Cost 3 vext3 <5,6,7,0>, <5,1,7,3>
- 1500665646U, // <0,5,1,u>: Cost 2 vext1 <4,0,5,1>, LHS
- 2556493926U, // <0,5,2,0>: Cost 3 vext1 <1,0,5,2>, LHS
- 2244546120U, // <0,5,2,1>: Cost 3 vrev <5,0,1,2>
- 3692357256U, // <0,5,2,2>: Cost 4 vext2 <0,2,0,5>, <2,2,5,7>
- 2568439994U, // <0,5,2,3>: Cost 3 vext1 <3,0,5,2>, <3,0,5,2>
- 2556497206U, // <0,5,2,4>: Cost 3 vext1 <1,0,5,2>, RHS
- 3020738564U, // <0,5,2,5>: Cost 3 vtrnl LHS, <5,5,5,5>
- 4027877161U, // <0,5,2,6>: Cost 4 vzipr <0,2,0,2>, <2,4,5,6>
- 3093220662U, // <0,5,2,7>: Cost 3 vtrnr <1,0,3,2>, RHS
- 3093220663U, // <0,5,2,u>: Cost 3 vtrnr <1,0,3,2>, RHS
- 3699656854U, // <0,5,3,0>: Cost 4 vext2 <1,4,0,5>, <3,0,1,2>
- 3699656927U, // <0,5,3,1>: Cost 4 vext2 <1,4,0,5>, <3,1,0,3>
- 3699657006U, // <0,5,3,2>: Cost 4 vext2 <1,4,0,5>, <3,2,0,1>
- 3699657116U, // <0,5,3,3>: Cost 4 vext2 <1,4,0,5>, <3,3,3,3>
- 2637859284U, // <0,5,3,4>: Cost 3 vext2 <3,4,0,5>, <3,4,0,5>
- 3790319453U, // <0,5,3,5>: Cost 4 vext3 <5,3,5,0>, <5,3,5,0>
- 3699657354U, // <0,5,3,6>: Cost 4 vext2 <1,4,0,5>, <3,6,2,7>
- 2716725103U, // <0,5,3,7>: Cost 3 vext3 <5,3,7,0>, <5,3,7,0>
- 2716798840U, // <0,5,3,u>: Cost 3 vext3 <5,3,u,0>, <5,3,u,0>
- 2661747602U, // <0,5,4,0>: Cost 3 vext2 <7,4,0,5>, <4,0,5,1>
- 3630252810U, // <0,5,4,1>: Cost 4 vext1 <1,0,5,4>, <1,0,5,4>
- 3636225507U, // <0,5,4,2>: Cost 4 vext1 <2,0,5,4>, <2,0,5,4>
- 3716910172U, // <0,5,4,3>: Cost 4 vext2 <4,3,0,5>, <4,3,0,5>
- 3962195892U, // <0,5,4,4>: Cost 4 vzipl <0,4,1,5>, <5,4,5,6>
- 2625916214U, // <0,5,4,5>: Cost 3 vext2 <1,4,0,5>, RHS
- 3718901071U, // <0,5,4,6>: Cost 4 vext2 <4,6,0,5>, <4,6,0,5>
- 2718715846U, // <0,5,4,7>: Cost 3 vext3 <5,6,7,0>, <5,4,7,6>
- 2625916457U, // <0,5,4,u>: Cost 3 vext2 <1,4,0,5>, RHS
- 3791278034U, // <0,5,5,0>: Cost 4 vext3 <5,5,0,0>, <5,5,0,0>
- 3791351771U, // <0,5,5,1>: Cost 4 vext3 <5,5,1,0>, <5,5,1,0>
- 3318386260U, // <0,5,5,2>: Cost 4 vrev <5,0,2,5>
- 3791499245U, // <0,5,5,3>: Cost 4 vext3 <5,5,3,0>, <5,5,3,0>
- 3318533734U, // <0,5,5,4>: Cost 4 vrev <5,0,4,5>
- 2718715908U, // <0,5,5,5>: Cost 3 vext3 <5,6,7,0>, <5,5,5,5>
- 2657767522U, // <0,5,5,6>: Cost 3 vext2 <6,7,0,5>, <5,6,7,0>
- 2718715928U, // <0,5,5,7>: Cost 3 vext3 <5,6,7,0>, <5,5,7,7>
- 2718715937U, // <0,5,5,u>: Cost 3 vext3 <5,6,7,0>, <5,5,u,7>
- 2592358502U, // <0,5,6,0>: Cost 3 vext1 <7,0,5,6>, LHS
- 3792015404U, // <0,5,6,1>: Cost 4 vext3 <5,6,1,0>, <5,6,1,0>
- 3731509754U, // <0,5,6,2>: Cost 4 vext2 <6,7,0,5>, <6,2,7,3>
- 3785748546U, // <0,5,6,3>: Cost 4 vext3 <4,5,6,0>, <5,6,3,4>
- 2592361782U, // <0,5,6,4>: Cost 3 vext1 <7,0,5,6>, RHS
- 2592362594U, // <0,5,6,5>: Cost 3 vext1 <7,0,5,6>, <5,6,7,0>
- 3785748576U, // <0,5,6,6>: Cost 4 vext3 <4,5,6,0>, <5,6,6,7>
- 1644974178U, // <0,5,6,7>: Cost 2 vext3 <5,6,7,0>, <5,6,7,0>
- 1645047915U, // <0,5,6,u>: Cost 2 vext3 <5,6,u,0>, <5,6,u,0>
- 2562506854U, // <0,5,7,0>: Cost 3 vext1 <2,0,5,7>, LHS
- 2562507670U, // <0,5,7,1>: Cost 3 vext1 <2,0,5,7>, <1,2,3,0>
- 2562508262U, // <0,5,7,2>: Cost 3 vext1 <2,0,5,7>, <2,0,5,7>
- 3636250774U, // <0,5,7,3>: Cost 4 vext1 <2,0,5,7>, <3,0,1,2>
- 2562510134U, // <0,5,7,4>: Cost 3 vext1 <2,0,5,7>, RHS
- 2718716072U, // <0,5,7,5>: Cost 3 vext3 <5,6,7,0>, <5,7,5,7>
- 2718716074U, // <0,5,7,6>: Cost 3 vext3 <5,6,7,0>, <5,7,6,0>
- 2719379635U, // <0,5,7,7>: Cost 3 vext3 <5,7,7,0>, <5,7,7,0>
- 2562512686U, // <0,5,7,u>: Cost 3 vext1 <2,0,5,7>, LHS
- 1500717158U, // <0,5,u,0>: Cost 2 vext1 <4,0,5,u>, LHS
- 2625918766U, // <0,5,u,1>: Cost 3 vext2 <1,4,0,5>, LHS
- 2719674583U, // <0,5,u,2>: Cost 3 vext3 <5,u,2,0>, <5,u,2,0>
- 2568489152U, // <0,5,u,3>: Cost 3 vext1 <3,0,5,u>, <3,0,5,u>
- 1500720025U, // <0,5,u,4>: Cost 2 vext1 <4,0,5,u>, <4,0,5,u>
- 2625919130U, // <0,5,u,5>: Cost 3 vext2 <1,4,0,5>, RHS
- 2586407243U, // <0,5,u,6>: Cost 3 vext1 <6,0,5,u>, <6,0,5,u>
- 1646301444U, // <0,5,u,7>: Cost 2 vext3 <5,u,7,0>, <5,u,7,0>
- 1646375181U, // <0,5,u,u>: Cost 2 vext3 <5,u,u,0>, <5,u,u,0>
- 2586411110U, // <0,6,0,0>: Cost 3 vext1 <6,0,6,0>, LHS
- 2619949158U, // <0,6,0,1>: Cost 3 vext2 <0,4,0,6>, LHS
- 2619949220U, // <0,6,0,2>: Cost 3 vext2 <0,4,0,6>, <0,2,0,2>
- 3785748789U, // <0,6,0,3>: Cost 4 vext3 <4,5,6,0>, <6,0,3,4>
- 2619949386U, // <0,6,0,4>: Cost 3 vext2 <0,4,0,6>, <0,4,0,6>
- 2586415202U, // <0,6,0,5>: Cost 3 vext1 <6,0,6,0>, <5,6,7,0>
- 2586415436U, // <0,6,0,6>: Cost 3 vext1 <6,0,6,0>, <6,0,6,0>
- 2952793398U, // <0,6,0,7>: Cost 3 vzipr <0,0,0,0>, RHS
- 2619949725U, // <0,6,0,u>: Cost 3 vext2 <0,4,0,6>, LHS
- 2562531430U, // <0,6,1,0>: Cost 3 vext1 <2,0,6,1>, LHS
- 3693691700U, // <0,6,1,1>: Cost 4 vext2 <0,4,0,6>, <1,1,1,1>
- 2886521338U, // <0,6,1,2>: Cost 3 vzipl LHS, <6,2,7,3>
- 3693691864U, // <0,6,1,3>: Cost 4 vext2 <0,4,0,6>, <1,3,1,3>
- 2562534710U, // <0,6,1,4>: Cost 3 vext1 <2,0,6,1>, RHS
- 2580450932U, // <0,6,1,5>: Cost 3 vext1 <5,0,6,1>, <5,0,6,1>
- 2886521656U, // <0,6,1,6>: Cost 3 vzipl LHS, <6,6,6,6>
- 2966736182U, // <0,6,1,7>: Cost 3 vzipr <2,3,0,1>, RHS
- 2966736183U, // <0,6,1,u>: Cost 3 vzipr <2,3,0,1>, RHS
- 1500741734U, // <0,6,2,0>: Cost 2 vext1 <4,0,6,2>, LHS
- 2250518817U, // <0,6,2,1>: Cost 3 vrev <6,0,1,2>
- 2574485096U, // <0,6,2,2>: Cost 3 vext1 <4,0,6,2>, <2,2,2,2>
- 2631894694U, // <0,6,2,3>: Cost 3 vext2 <2,4,0,6>, <2,3,0,1>
- 1500744604U, // <0,6,2,4>: Cost 2 vext1 <4,0,6,2>, <4,0,6,2>
- 2574487248U, // <0,6,2,5>: Cost 3 vext1 <4,0,6,2>, <5,1,7,3>
- 3020739384U, // <0,6,2,6>: Cost 3 vtrnl LHS, <6,6,6,6>
- 2954136886U, // <0,6,2,7>: Cost 3 vzipr <0,2,0,2>, RHS
- 1500747566U, // <0,6,2,u>: Cost 2 vext1 <4,0,6,2>, LHS
- 3693693078U, // <0,6,3,0>: Cost 4 vext2 <0,4,0,6>, <3,0,1,2>
- 3705637136U, // <0,6,3,1>: Cost 4 vext2 <2,4,0,6>, <3,1,5,7>
- 3705637192U, // <0,6,3,2>: Cost 4 vext2 <2,4,0,6>, <3,2,3,0>
- 3693693340U, // <0,6,3,3>: Cost 4 vext2 <0,4,0,6>, <3,3,3,3>
- 2637867477U, // <0,6,3,4>: Cost 3 vext2 <3,4,0,6>, <3,4,0,6>
- 3705637424U, // <0,6,3,5>: Cost 4 vext2 <2,4,0,6>, <3,5,1,7>
- 3666154056U, // <0,6,3,6>: Cost 4 vext1 <7,0,6,3>, <6,3,7,0>
- 2722697800U, // <0,6,3,7>: Cost 3 vext3 <6,3,7,0>, <6,3,7,0>
- 2722771537U, // <0,6,3,u>: Cost 3 vext3 <6,3,u,0>, <6,3,u,0>
- 2562556006U, // <0,6,4,0>: Cost 3 vext1 <2,0,6,4>, LHS
- 4095316257U, // <0,6,4,1>: Cost 4 vtrnl <0,2,4,6>, <6,0,1,2>
- 2562557420U, // <0,6,4,2>: Cost 3 vext1 <2,0,6,4>, <2,0,6,4>
- 3636299926U, // <0,6,4,3>: Cost 4 vext1 <2,0,6,4>, <3,0,1,2>
- 2562559286U, // <0,6,4,4>: Cost 3 vext1 <2,0,6,4>, RHS
- 2619952438U, // <0,6,4,5>: Cost 3 vext2 <0,4,0,6>, RHS
- 2723287696U, // <0,6,4,6>: Cost 3 vext3 <6,4,6,0>, <6,4,6,0>
- 4027895094U, // <0,6,4,7>: Cost 4 vzipr <0,2,0,4>, RHS
- 2619952681U, // <0,6,4,u>: Cost 3 vext2 <0,4,0,6>, RHS
- 2718716594U, // <0,6,5,0>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7>
- 3648250774U, // <0,6,5,1>: Cost 4 vext1 <4,0,6,5>, <1,2,3,0>
- 3792458436U, // <0,6,5,2>: Cost 4 vext3 <5,6,7,0>, <6,5,2,7>
- 3705638767U, // <0,6,5,3>: Cost 5 vext2 <2,4,0,6>, <5,3,7,0>
- 3648252831U, // <0,6,5,4>: Cost 4 vext1 <4,0,6,5>, <4,0,6,5>
- 3797619416U, // <0,6,5,5>: Cost 4 vext3 <6,5,5,0>, <6,5,5,0>
- 3792458472U, // <0,6,5,6>: Cost 4 vext3 <5,6,7,0>, <6,5,6,7>
- 4035202358U, // <0,6,5,7>: Cost 4 vzipr <1,4,0,5>, RHS
- 2718716594U, // <0,6,5,u>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7>
- 3786412796U, // <0,6,6,0>: Cost 4 vext3 <4,6,6,0>, <6,6,0,0>
- 3792458504U, // <0,6,6,1>: Cost 4 vext3 <5,6,7,0>, <6,6,1,3>
- 3728200126U, // <0,6,6,2>: Cost 4 vext2 <6,2,0,6>, <6,2,0,6>
- 3798135575U, // <0,6,6,3>: Cost 4 vext3 <6,6,3,0>, <6,6,3,0>
- 3786412836U, // <0,6,6,4>: Cost 4 vext3 <4,6,6,0>, <6,6,4,4>
- 3792458543U, // <0,6,6,5>: Cost 4 vext3 <5,6,7,0>, <6,6,5,6>
- 2718716728U, // <0,6,6,6>: Cost 3 vext3 <5,6,7,0>, <6,6,6,6>
- 2718716738U, // <0,6,6,7>: Cost 3 vext3 <5,6,7,0>, <6,6,7,7>
- 2718716747U, // <0,6,6,u>: Cost 3 vext3 <5,6,7,0>, <6,6,u,7>
- 2718716750U, // <0,6,7,0>: Cost 3 vext3 <5,6,7,0>, <6,7,0,1>
- 2724909910U, // <0,6,7,1>: Cost 3 vext3 <6,7,1,0>, <6,7,1,0>
- 3636323823U, // <0,6,7,2>: Cost 4 vext1 <2,0,6,7>, <2,0,6,7>
- 2725057384U, // <0,6,7,3>: Cost 3 vext3 <6,7,3,0>, <6,7,3,0>
- 2718716790U, // <0,6,7,4>: Cost 3 vext3 <5,6,7,0>, <6,7,4,5>
- 2718716800U, // <0,6,7,5>: Cost 3 vext3 <5,6,7,0>, <6,7,5,6>
- 3792458629U, // <0,6,7,6>: Cost 4 vext3 <5,6,7,0>, <6,7,6,2>
- 2725352332U, // <0,6,7,7>: Cost 3 vext3 <6,7,7,0>, <6,7,7,0>
- 2718716822U, // <0,6,7,u>: Cost 3 vext3 <5,6,7,0>, <6,7,u,1>
- 1500790886U, // <0,6,u,0>: Cost 2 vext1 <4,0,6,u>, LHS
- 2619954990U, // <0,6,u,1>: Cost 3 vext2 <0,4,0,6>, LHS
- 2562590192U, // <0,6,u,2>: Cost 3 vext1 <2,0,6,u>, <2,0,6,u>
- 2725721017U, // <0,6,u,3>: Cost 3 vext3 <6,u,3,0>, <6,u,3,0>
- 1500793762U, // <0,6,u,4>: Cost 2 vext1 <4,0,6,u>, <4,0,6,u>
- 2619955354U, // <0,6,u,5>: Cost 3 vext2 <0,4,0,6>, RHS
- 2725942228U, // <0,6,u,6>: Cost 3 vext3 <6,u,6,0>, <6,u,6,0>
- 2954186038U, // <0,6,u,7>: Cost 3 vzipr <0,2,0,u>, RHS
- 1500796718U, // <0,6,u,u>: Cost 2 vext1 <4,0,6,u>, LHS
- 2256401391U, // <0,7,0,0>: Cost 3 vrev <7,0,0,0>
- 2632564838U, // <0,7,0,1>: Cost 3 vext2 <2,5,0,7>, LHS
- 2256548865U, // <0,7,0,2>: Cost 3 vrev <7,0,2,0>
- 3700998396U, // <0,7,0,3>: Cost 4 vext2 <1,6,0,7>, <0,3,1,0>
- 2718716952U, // <0,7,0,4>: Cost 3 vext3 <5,6,7,0>, <7,0,4,5>
- 2718716962U, // <0,7,0,5>: Cost 3 vext3 <5,6,7,0>, <7,0,5,6>
- 2621284845U, // <0,7,0,6>: Cost 3 vext2 <0,6,0,7>, <0,6,0,7>
- 3904685542U, // <0,7,0,7>: Cost 4 vuzpr <2,0,5,7>, <2,0,5,7>
- 2632565405U, // <0,7,0,u>: Cost 3 vext2 <2,5,0,7>, LHS
- 2256409584U, // <0,7,1,0>: Cost 3 vrev <7,0,0,1>
- 3706307380U, // <0,7,1,1>: Cost 4 vext2 <2,5,0,7>, <1,1,1,1>
- 2632565654U, // <0,7,1,2>: Cost 3 vext2 <2,5,0,7>, <1,2,3,0>
- 3769603168U, // <0,7,1,3>: Cost 4 vext3 <1,u,3,0>, <7,1,3,5>
- 2256704532U, // <0,7,1,4>: Cost 3 vrev <7,0,4,1>
- 3769603184U, // <0,7,1,5>: Cost 4 vext3 <1,u,3,0>, <7,1,5,3>
- 3700999366U, // <0,7,1,6>: Cost 4 vext2 <1,6,0,7>, <1,6,0,7>
- 2886522476U, // <0,7,1,7>: Cost 3 vzipl LHS, <7,7,7,7>
- 2256999480U, // <0,7,1,u>: Cost 3 vrev <7,0,u,1>
- 2586501222U, // <0,7,2,0>: Cost 3 vext1 <6,0,7,2>, LHS
- 1182749690U, // <0,7,2,1>: Cost 2 vrev <7,0,1,2>
- 3636356595U, // <0,7,2,2>: Cost 4 vext1 <2,0,7,2>, <2,0,7,2>
- 2727711916U, // <0,7,2,3>: Cost 3 vext3 <7,2,3,0>, <7,2,3,0>
- 2586504502U, // <0,7,2,4>: Cost 3 vext1 <6,0,7,2>, RHS
- 2632566606U, // <0,7,2,5>: Cost 3 vext2 <2,5,0,7>, <2,5,0,7>
- 2586505559U, // <0,7,2,6>: Cost 3 vext1 <6,0,7,2>, <6,0,7,2>
- 3020740204U, // <0,7,2,7>: Cost 3 vtrnl LHS, <7,7,7,7>
- 1183265849U, // <0,7,2,u>: Cost 2 vrev <7,0,u,2>
- 3701000342U, // <0,7,3,0>: Cost 4 vext2 <1,6,0,7>, <3,0,1,2>
- 3706308849U, // <0,7,3,1>: Cost 4 vext2 <2,5,0,7>, <3,1,2,3>
- 3330315268U, // <0,7,3,2>: Cost 4 vrev <7,0,2,3>
- 3706309020U, // <0,7,3,3>: Cost 4 vext2 <2,5,0,7>, <3,3,3,3>
- 3706309122U, // <0,7,3,4>: Cost 4 vext2 <2,5,0,7>, <3,4,5,6>
- 3712281127U, // <0,7,3,5>: Cost 4 vext2 <3,5,0,7>, <3,5,0,7>
- 2639202936U, // <0,7,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7>
- 3802412321U, // <0,7,3,7>: Cost 4 vext3 <7,3,7,0>, <7,3,7,0>
- 2640530202U, // <0,7,3,u>: Cost 3 vext2 <3,u,0,7>, <3,u,0,7>
- 3654287462U, // <0,7,4,0>: Cost 4 vext1 <5,0,7,4>, LHS
- 2256507900U, // <0,7,4,1>: Cost 3 vrev <7,0,1,4>
- 2256581637U, // <0,7,4,2>: Cost 3 vrev <7,0,2,4>
- 3660262008U, // <0,7,4,3>: Cost 4 vext1 <6,0,7,4>, <3,6,0,7>
- 3786413405U, // <0,7,4,4>: Cost 4 vext3 <4,6,6,0>, <7,4,4,6>
- 2632568118U, // <0,7,4,5>: Cost 3 vext2 <2,5,0,7>, RHS
- 3718917457U, // <0,7,4,6>: Cost 4 vext2 <4,6,0,7>, <4,6,0,7>
- 3787003255U, // <0,7,4,7>: Cost 4 vext3 <4,7,5,0>, <7,4,7,5>
- 2632568361U, // <0,7,4,u>: Cost 3 vext2 <2,5,0,7>, RHS
- 3706310268U, // <0,7,5,0>: Cost 4 vext2 <2,5,0,7>, <5,0,7,0>
- 3792459156U, // <0,7,5,1>: Cost 4 vext3 <5,6,7,0>, <7,5,1,7>
- 3330331654U, // <0,7,5,2>: Cost 4 vrev <7,0,2,5>
- 3722899255U, // <0,7,5,3>: Cost 4 vext2 <5,3,0,7>, <5,3,0,7>
- 2256737304U, // <0,7,5,4>: Cost 3 vrev <7,0,4,5>
- 3724226521U, // <0,7,5,5>: Cost 4 vext2 <5,5,0,7>, <5,5,0,7>
- 2718717377U, // <0,7,5,6>: Cost 3 vext3 <5,6,7,0>, <7,5,6,7>
- 2729997763U, // <0,7,5,7>: Cost 3 vext3 <7,5,7,0>, <7,5,7,0>
- 2720044499U, // <0,7,5,u>: Cost 3 vext3 <5,u,7,0>, <7,5,u,7>
- 3712946517U, // <0,7,6,0>: Cost 4 vext2 <3,6,0,7>, <6,0,7,0>
- 2256524286U, // <0,7,6,1>: Cost 3 vrev <7,0,1,6>
- 3792459246U, // <0,7,6,2>: Cost 4 vext3 <5,6,7,0>, <7,6,2,7>
- 3796440567U, // <0,7,6,3>: Cost 4 vext3 <6,3,7,0>, <7,6,3,7>
- 3654307126U, // <0,7,6,4>: Cost 4 vext1 <5,0,7,6>, RHS
- 2656457394U, // <0,7,6,5>: Cost 3 vext2 <6,5,0,7>, <6,5,0,7>
- 3792459281U, // <0,7,6,6>: Cost 4 vext3 <5,6,7,0>, <7,6,6,6>
- 2730661396U, // <0,7,6,7>: Cost 3 vext3 <7,6,7,0>, <7,6,7,0>
- 2658448293U, // <0,7,6,u>: Cost 3 vext2 <6,u,0,7>, <6,u,0,7>
- 3787003431U, // <0,7,7,0>: Cost 4 vext3 <4,7,5,0>, <7,7,0,1>
- 3654312854U, // <0,7,7,1>: Cost 4 vext1 <5,0,7,7>, <1,2,3,0>
- 3654313446U, // <0,7,7,2>: Cost 4 vext1 <5,0,7,7>, <2,0,5,7>
- 3804771905U, // <0,7,7,3>: Cost 4 vext3 <7,7,3,0>, <7,7,3,0>
- 3654315318U, // <0,7,7,4>: Cost 4 vext1 <5,0,7,7>, RHS
- 3654315651U, // <0,7,7,5>: Cost 4 vext1 <5,0,7,7>, <5,0,7,7>
- 3660288348U, // <0,7,7,6>: Cost 4 vext1 <6,0,7,7>, <6,0,7,7>
- 2718717548U, // <0,7,7,7>: Cost 3 vext3 <5,6,7,0>, <7,7,7,7>
- 2664420990U, // <0,7,7,u>: Cost 3 vext2 <7,u,0,7>, <7,u,0,7>
- 2256466935U, // <0,7,u,0>: Cost 3 vrev <7,0,0,u>
- 1182798848U, // <0,7,u,1>: Cost 2 vrev <7,0,1,u>
- 2256614409U, // <0,7,u,2>: Cost 3 vrev <7,0,2,u>
- 2731693714U, // <0,7,u,3>: Cost 3 vext3 <7,u,3,0>, <7,u,3,0>
- 2256761883U, // <0,7,u,4>: Cost 3 vrev <7,0,4,u>
- 2632571034U, // <0,7,u,5>: Cost 3 vext2 <2,5,0,7>, RHS
- 2669066421U, // <0,7,u,6>: Cost 3 vext2 <u,6,0,7>, <u,6,0,7>
- 2731988662U, // <0,7,u,7>: Cost 3 vext3 <7,u,7,0>, <7,u,7,0>
- 1183315007U, // <0,7,u,u>: Cost 2 vrev <7,0,u,u>
- 135053414U, // <0,u,0,0>: Cost 1 vdup0 LHS
- 1544896614U, // <0,u,0,1>: Cost 2 vext2 <0,2,0,u>, LHS
- 1678999654U, // <0,u,0,2>: Cost 2 vuzpl LHS, LHS
- 2691880677U, // <0,u,0,3>: Cost 3 vext3 <1,2,3,0>, <u,0,3,2>
- 1476988214U, // <0,u,0,4>: Cost 2 vext1 <0,0,u,0>, RHS
- 2718791419U, // <0,u,0,5>: Cost 3 vext3 <5,6,u,0>, <u,0,5,6>
- 3021248666U, // <0,u,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS
- 2592535607U, // <0,u,0,7>: Cost 3 vext1 <7,0,u,0>, <7,0,u,0>
- 135053414U, // <0,u,0,u>: Cost 1 vdup0 LHS
- 1476993097U, // <0,u,1,0>: Cost 2 vext1 <0,0,u,1>, <0,0,u,1>
- 1812780846U, // <0,u,1,1>: Cost 2 vzipl LHS, LHS
- 1618138926U, // <0,u,1,2>: Cost 2 vext3 <1,2,3,0>, LHS
- 2752742134U, // <0,u,1,3>: Cost 3 vuzpl LHS, <1,0,3,2>
- 1476996406U, // <0,u,1,4>: Cost 2 vext1 <0,0,u,1>, RHS
- 1812781210U, // <0,u,1,5>: Cost 2 vzipl LHS, RHS
- 2887006416U, // <0,u,1,6>: Cost 3 vzipl LHS, <u,6,3,7>
- 2966736200U, // <0,u,1,7>: Cost 3 vzipr <2,3,0,1>, RHS
- 1812781413U, // <0,u,1,u>: Cost 2 vzipl LHS, LHS
- 1482973286U, // <0,u,2,0>: Cost 2 vext1 <1,0,u,2>, LHS
- 1482973987U, // <0,u,2,1>: Cost 2 vext1 <1,0,u,2>, <1,0,u,2>
- 1946998574U, // <0,u,2,2>: Cost 2 vtrnl LHS, LHS
- 835584U, // <0,u,2,3>: Cost 0 copy LHS
- 1482976566U, // <0,u,2,4>: Cost 2 vext1 <1,0,u,2>, RHS
- 3020781631U, // <0,u,2,5>: Cost 3 vtrnl LHS, <u,4,5,6>
- 1946998938U, // <0,u,2,6>: Cost 2 vtrnl LHS, RHS
- 1518810169U, // <0,u,2,7>: Cost 2 vext1 <7,0,u,2>, <7,0,u,2>
- 835584U, // <0,u,2,u>: Cost 0 copy LHS
- 2618640534U, // <0,u,3,0>: Cost 3 vext2 <0,2,0,u>, <3,0,1,2>
- 2752743574U, // <0,u,3,1>: Cost 3 vuzpl LHS, <3,0,1,2>
- 2636556597U, // <0,u,3,2>: Cost 3 vext2 <3,2,0,u>, <3,2,0,u>
- 2752743836U, // <0,u,3,3>: Cost 3 vuzpl LHS, <3,3,3,3>
- 2618640898U, // <0,u,3,4>: Cost 3 vext2 <0,2,0,u>, <3,4,5,6>
- 2752743938U, // <0,u,3,5>: Cost 3 vuzpl LHS, <3,4,5,6>
- 2639202936U, // <0,u,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7>
- 2639874762U, // <0,u,3,7>: Cost 3 vext2 <3,7,0,u>, <3,7,0,u>
- 2752743637U, // <0,u,3,u>: Cost 3 vuzpl LHS, <3,0,u,2>
- 2562703462U, // <0,u,4,0>: Cost 3 vext1 <2,0,u,4>, LHS
- 2888455982U, // <0,u,4,1>: Cost 3 vzipl <0,4,1,5>, LHS
- 3021575982U, // <0,u,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS
- 2568677591U, // <0,u,4,3>: Cost 3 vext1 <3,0,u,4>, <3,0,u,4>
- 2562706742U, // <0,u,4,4>: Cost 3 vext1 <2,0,u,4>, RHS
- 1544899894U, // <0,u,4,5>: Cost 2 vext2 <0,2,0,u>, RHS
- 1679002934U, // <0,u,4,6>: Cost 2 vuzpl LHS, RHS
- 2718718033U, // <0,u,4,7>: Cost 3 vext3 <5,6,7,0>, <u,4,7,6>
- 1679002952U, // <0,u,4,u>: Cost 2 vuzpl LHS, RHS
- 2568683622U, // <0,u,5,0>: Cost 3 vext1 <3,0,u,5>, LHS
- 2568684438U, // <0,u,5,1>: Cost 3 vext1 <3,0,u,5>, <1,2,3,0>
- 3765622902U, // <0,u,5,2>: Cost 4 vext3 <1,2,3,0>, <u,5,2,7>
- 2691881087U, // <0,u,5,3>: Cost 3 vext3 <1,2,3,0>, <u,5,3,7>
- 2568686902U, // <0,u,5,4>: Cost 3 vext1 <3,0,u,5>, RHS
- 2650492890U, // <0,u,5,5>: Cost 3 vext2 <5,5,0,u>, <5,5,0,u>
- 1618139290U, // <0,u,5,6>: Cost 2 vext3 <1,2,3,0>, RHS
- 2824834358U, // <0,u,5,7>: Cost 3 vuzpr <1,0,3,u>, RHS
- 1618139308U, // <0,u,5,u>: Cost 2 vext3 <1,2,3,0>, RHS
- 2592579686U, // <0,u,6,0>: Cost 3 vext1 <7,0,u,6>, LHS
- 2262496983U, // <0,u,6,1>: Cost 3 vrev <u,0,1,6>
- 2654474688U, // <0,u,6,2>: Cost 3 vext2 <6,2,0,u>, <6,2,0,u>
- 2691881168U, // <0,u,6,3>: Cost 3 vext3 <1,2,3,0>, <u,6,3,7>
- 2592582966U, // <0,u,6,4>: Cost 3 vext1 <7,0,u,6>, RHS
- 2656465587U, // <0,u,6,5>: Cost 3 vext2 <6,5,0,u>, <6,5,0,u>
- 2657129220U, // <0,u,6,6>: Cost 3 vext2 <6,6,0,u>, <6,6,0,u>
- 1584051029U, // <0,u,6,7>: Cost 2 vext2 <6,7,0,u>, <6,7,0,u>
- 1584714662U, // <0,u,6,u>: Cost 2 vext2 <6,u,0,u>, <6,u,0,u>
- 2562728038U, // <0,u,7,0>: Cost 3 vext1 <2,0,u,7>, LHS
- 2562728854U, // <0,u,7,1>: Cost 3 vext1 <2,0,u,7>, <1,2,3,0>
- 2562729473U, // <0,u,7,2>: Cost 3 vext1 <2,0,u,7>, <2,0,u,7>
- 2661111018U, // <0,u,7,3>: Cost 3 vext2 <7,3,0,u>, <7,3,0,u>
- 2562731318U, // <0,u,7,4>: Cost 3 vext1 <2,0,u,7>, RHS
- 2718718258U, // <0,u,7,5>: Cost 3 vext3 <5,6,7,0>, <u,7,5,6>
- 2586620261U, // <0,u,7,6>: Cost 3 vext1 <6,0,u,7>, <6,0,u,7>
- 2657793644U, // <0,u,7,7>: Cost 3 vext2 <6,7,0,u>, <7,7,7,7>
- 2562733870U, // <0,u,7,u>: Cost 3 vext1 <2,0,u,7>, LHS
- 135053414U, // <0,u,u,0>: Cost 1 vdup0 LHS
- 1544902446U, // <0,u,u,1>: Cost 2 vext2 <0,2,0,u>, LHS
- 1679005486U, // <0,u,u,2>: Cost 2 vuzpl LHS, LHS
- 835584U, // <0,u,u,3>: Cost 0 copy LHS
- 1483025718U, // <0,u,u,4>: Cost 2 vext1 <1,0,u,u>, RHS
- 1544902810U, // <0,u,u,5>: Cost 2 vext2 <0,2,0,u>, RHS
- 1679005850U, // <0,u,u,6>: Cost 2 vuzpl LHS, RHS
- 1518859327U, // <0,u,u,7>: Cost 2 vext1 <7,0,u,u>, <7,0,u,u>
- 835584U, // <0,u,u,u>: Cost 0 copy LHS
- 2689744896U, // <1,0,0,0>: Cost 3 vext3 <0,u,1,1>, <0,0,0,0>
- 1610694666U, // <1,0,0,1>: Cost 2 vext3 <0,0,1,1>, <0,0,1,1>
- 2689744916U, // <1,0,0,2>: Cost 3 vext3 <0,u,1,1>, <0,0,2,2>
- 2619310332U, // <1,0,0,3>: Cost 3 vext2 <0,3,1,0>, <0,3,1,0>
- 2684657701U, // <1,0,0,4>: Cost 3 vext3 <0,0,4,1>, <0,0,4,1>
- 2620637598U, // <1,0,0,5>: Cost 3 vext2 <0,5,1,0>, <0,5,1,0>
- 3708977654U, // <1,0,0,6>: Cost 4 vext2 <3,0,1,0>, <0,6,1,7>
- 3666351168U, // <1,0,0,7>: Cost 4 vext1 <7,1,0,0>, <7,1,0,0>
- 1611210825U, // <1,0,0,u>: Cost 2 vext3 <0,0,u,1>, <0,0,u,1>
- 2556780646U, // <1,0,1,0>: Cost 3 vext1 <1,1,0,1>, LHS
- 2556781355U, // <1,0,1,1>: Cost 3 vext1 <1,1,0,1>, <1,1,0,1>
- 1616003174U, // <1,0,1,2>: Cost 2 vext3 <0,u,1,1>, LHS
- 3693052888U, // <1,0,1,3>: Cost 4 vext2 <0,3,1,0>, <1,3,1,3>
- 2556783926U, // <1,0,1,4>: Cost 3 vext1 <1,1,0,1>, RHS
- 2580672143U, // <1,0,1,5>: Cost 3 vext1 <5,1,0,1>, <5,1,0,1>
- 2724839566U, // <1,0,1,6>: Cost 3 vext3 <6,7,0,1>, <0,1,6,7>
- 3654415354U, // <1,0,1,7>: Cost 4 vext1 <5,1,0,1>, <7,0,1,2>
- 1616003228U, // <1,0,1,u>: Cost 2 vext3 <0,u,1,1>, LHS
- 2685690019U, // <1,0,2,0>: Cost 3 vext3 <0,2,0,1>, <0,2,0,1>
- 2685763756U, // <1,0,2,1>: Cost 3 vext3 <0,2,1,1>, <0,2,1,1>
- 2698297524U, // <1,0,2,2>: Cost 3 vext3 <2,3,0,1>, <0,2,2,0>
- 2685911230U, // <1,0,2,3>: Cost 3 vext3 <0,2,3,1>, <0,2,3,1>
- 2689745100U, // <1,0,2,4>: Cost 3 vext3 <0,u,1,1>, <0,2,4,6>
- 3764814038U, // <1,0,2,5>: Cost 4 vext3 <1,1,1,1>, <0,2,5,7>
- 2724839640U, // <1,0,2,6>: Cost 3 vext3 <6,7,0,1>, <0,2,6,0>
- 2592625658U, // <1,0,2,7>: Cost 3 vext1 <7,1,0,2>, <7,0,1,2>
- 2686279915U, // <1,0,2,u>: Cost 3 vext3 <0,2,u,1>, <0,2,u,1>
- 3087843328U, // <1,0,3,0>: Cost 3 vtrnr LHS, <0,0,0,0>
- 3087843338U, // <1,0,3,1>: Cost 3 vtrnr LHS, <0,0,1,1>
- 67944550U, // <1,0,3,2>: Cost 1 vrev LHS
- 2568743135U, // <1,0,3,3>: Cost 3 vext1 <3,1,0,3>, <3,1,0,3>
- 2562772278U, // <1,0,3,4>: Cost 3 vext1 <2,1,0,3>, RHS
- 4099850454U, // <1,0,3,5>: Cost 4 vtrnl <1,0,3,2>, <0,2,5,7>
- 3704998538U, // <1,0,3,6>: Cost 4 vext2 <2,3,1,0>, <3,6,2,7>
- 2592633923U, // <1,0,3,7>: Cost 3 vext1 <7,1,0,3>, <7,1,0,3>
- 68386972U, // <1,0,3,u>: Cost 1 vrev LHS
- 2620640146U, // <1,0,4,0>: Cost 3 vext2 <0,5,1,0>, <4,0,5,1>
- 2689745234U, // <1,0,4,1>: Cost 3 vext3 <0,u,1,1>, <0,4,1,5>
- 2689745244U, // <1,0,4,2>: Cost 3 vext3 <0,u,1,1>, <0,4,2,6>
- 3760980320U, // <1,0,4,3>: Cost 4 vext3 <0,4,3,1>, <0,4,3,1>
- 3761054057U, // <1,0,4,4>: Cost 4 vext3 <0,4,4,1>, <0,4,4,1>
- 2619313462U, // <1,0,4,5>: Cost 3 vext2 <0,3,1,0>, RHS
- 3761201531U, // <1,0,4,6>: Cost 4 vext3 <0,4,6,1>, <0,4,6,1>
- 3666383940U, // <1,0,4,7>: Cost 4 vext1 <7,1,0,4>, <7,1,0,4>
- 2619313705U, // <1,0,4,u>: Cost 3 vext2 <0,3,1,0>, RHS
- 4029300736U, // <1,0,5,0>: Cost 4 vzipr <0,4,1,5>, <0,0,0,0>
- 2895249510U, // <1,0,5,1>: Cost 3 vzipl <1,5,3,7>, LHS
- 3028287590U, // <1,0,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS
- 3642501345U, // <1,0,5,3>: Cost 4 vext1 <3,1,0,5>, <3,1,0,5>
- 2215592058U, // <1,0,5,4>: Cost 3 vrev <0,1,4,5>
- 3724242907U, // <1,0,5,5>: Cost 4 vext2 <5,5,1,0>, <5,5,1,0>
- 3724906540U, // <1,0,5,6>: Cost 4 vext2 <5,6,1,0>, <5,6,1,0>
- 3911118134U, // <1,0,5,7>: Cost 4 vuzpr <3,1,3,0>, RHS
- 3028287644U, // <1,0,5,u>: Cost 3 vtrnl <1,3,5,7>, LHS
- 3762086375U, // <1,0,6,0>: Cost 4 vext3 <0,6,0,1>, <0,6,0,1>
- 2698297846U, // <1,0,6,1>: Cost 3 vext3 <2,3,0,1>, <0,6,1,7>
- 3760022015U, // <1,0,6,2>: Cost 4 vext3 <0,2,u,1>, <0,6,2,7>
- 3642509538U, // <1,0,6,3>: Cost 4 vext1 <3,1,0,6>, <3,1,0,6>
- 3762381323U, // <1,0,6,4>: Cost 4 vext3 <0,6,4,1>, <0,6,4,1>
- 3730215604U, // <1,0,6,5>: Cost 4 vext2 <6,5,1,0>, <6,5,1,0>
- 3730879237U, // <1,0,6,6>: Cost 4 vext2 <6,6,1,0>, <6,6,1,0>
- 2657801046U, // <1,0,6,7>: Cost 3 vext2 <6,7,1,0>, <6,7,1,0>
- 2658464679U, // <1,0,6,u>: Cost 3 vext2 <6,u,1,0>, <6,u,1,0>
- 2659128312U, // <1,0,7,0>: Cost 3 vext2 <7,0,1,0>, <7,0,1,0>
- 4047898278U, // <1,0,7,1>: Cost 4 vzipr <3,5,1,7>, <2,3,0,1>
- 2215460970U, // <1,0,7,2>: Cost 3 vrev <0,1,2,7>
- 3734861035U, // <1,0,7,3>: Cost 4 vext2 <7,3,1,0>, <7,3,1,0>
- 3731543398U, // <1,0,7,4>: Cost 4 vext2 <6,7,1,0>, <7,4,5,6>
- 3736188301U, // <1,0,7,5>: Cost 4 vext2 <7,5,1,0>, <7,5,1,0>
- 2663110110U, // <1,0,7,6>: Cost 3 vext2 <7,6,1,0>, <7,6,1,0>
- 3731543660U, // <1,0,7,7>: Cost 4 vext2 <6,7,1,0>, <7,7,7,7>
- 2664437376U, // <1,0,7,u>: Cost 3 vext2 <7,u,1,0>, <7,u,1,0>
- 3087884288U, // <1,0,u,0>: Cost 3 vtrnr LHS, <0,0,0,0>
- 1616003730U, // <1,0,u,1>: Cost 2 vext3 <0,u,1,1>, <0,u,1,1>
- 67985515U, // <1,0,u,2>: Cost 1 vrev LHS
- 2689893028U, // <1,0,u,3>: Cost 3 vext3 <0,u,3,1>, <0,u,3,1>
- 2689745586U, // <1,0,u,4>: Cost 3 vext3 <0,u,1,1>, <0,u,4,6>
- 2619316378U, // <1,0,u,5>: Cost 3 vext2 <0,3,1,0>, RHS
- 2669082807U, // <1,0,u,6>: Cost 3 vext2 <u,6,1,0>, <u,6,1,0>
- 2592674888U, // <1,0,u,7>: Cost 3 vext1 <7,1,0,u>, <7,1,0,u>
- 68427937U, // <1,0,u,u>: Cost 1 vrev LHS
- 1543585802U, // <1,1,0,0>: Cost 2 vext2 <0,0,1,1>, <0,0,1,1>
- 1548894310U, // <1,1,0,1>: Cost 2 vext2 <0,u,1,1>, LHS
- 2618654892U, // <1,1,0,2>: Cost 3 vext2 <0,2,1,1>, <0,2,1,1>
- 2689745654U, // <1,1,0,3>: Cost 3 vext3 <0,u,1,1>, <1,0,3,2>
- 2622636370U, // <1,1,0,4>: Cost 3 vext2 <0,u,1,1>, <0,4,1,5>
- 2620645791U, // <1,1,0,5>: Cost 3 vext2 <0,5,1,1>, <0,5,1,1>
- 3696378367U, // <1,1,0,6>: Cost 4 vext2 <0,u,1,1>, <0,6,2,7>
- 3666424905U, // <1,1,0,7>: Cost 4 vext1 <7,1,1,0>, <7,1,1,0>
- 1548894866U, // <1,1,0,u>: Cost 2 vext2 <0,u,1,1>, <0,u,1,1>
- 1483112550U, // <1,1,1,0>: Cost 2 vext1 <1,1,1,1>, LHS
- 202162278U, // <1,1,1,1>: Cost 1 vdup1 LHS
- 2622636950U, // <1,1,1,2>: Cost 3 vext2 <0,u,1,1>, <1,2,3,0>
- 2622637016U, // <1,1,1,3>: Cost 3 vext2 <0,u,1,1>, <1,3,1,3>
- 1483115830U, // <1,1,1,4>: Cost 2 vext1 <1,1,1,1>, RHS
- 2622637200U, // <1,1,1,5>: Cost 3 vext2 <0,u,1,1>, <1,5,3,7>
- 2622637263U, // <1,1,1,6>: Cost 3 vext2 <0,u,1,1>, <1,6,1,7>
- 2592691274U, // <1,1,1,7>: Cost 3 vext1 <7,1,1,1>, <7,1,1,1>
- 202162278U, // <1,1,1,u>: Cost 1 vdup1 LHS
- 2550890588U, // <1,1,2,0>: Cost 3 vext1 <0,1,1,2>, <0,1,1,2>
- 2617329183U, // <1,1,2,1>: Cost 3 vext2 <0,0,1,1>, <2,1,3,1>
- 2622637672U, // <1,1,2,2>: Cost 3 vext2 <0,u,1,1>, <2,2,2,2>
- 2622637734U, // <1,1,2,3>: Cost 3 vext2 <0,u,1,1>, <2,3,0,1>
- 2550893878U, // <1,1,2,4>: Cost 3 vext1 <0,1,1,2>, RHS
- 3696379744U, // <1,1,2,5>: Cost 4 vext2 <0,u,1,1>, <2,5,2,7>
- 2622638010U, // <1,1,2,6>: Cost 3 vext2 <0,u,1,1>, <2,6,3,7>
- 3804554170U, // <1,1,2,7>: Cost 4 vext3 <7,7,0,1>, <1,2,7,0>
- 2622638139U, // <1,1,2,u>: Cost 3 vext2 <0,u,1,1>, <2,u,0,1>
- 2622638230U, // <1,1,3,0>: Cost 3 vext2 <0,u,1,1>, <3,0,1,2>
- 3087844148U, // <1,1,3,1>: Cost 3 vtrnr LHS, <1,1,1,1>
- 4161585244U, // <1,1,3,2>: Cost 4 vtrnr LHS, <0,1,1,2>
- 2014101606U, // <1,1,3,3>: Cost 2 vtrnr LHS, LHS
- 2622638594U, // <1,1,3,4>: Cost 3 vext2 <0,u,1,1>, <3,4,5,6>
- 2689745920U, // <1,1,3,5>: Cost 3 vext3 <0,u,1,1>, <1,3,5,7>
- 3763487753U, // <1,1,3,6>: Cost 4 vext3 <0,u,1,1>, <1,3,6,7>
- 2592707660U, // <1,1,3,7>: Cost 3 vext1 <7,1,1,3>, <7,1,1,3>
- 2014101611U, // <1,1,3,u>: Cost 2 vtrnr LHS, LHS
- 2556878950U, // <1,1,4,0>: Cost 3 vext1 <1,1,1,4>, LHS
- 2221335351U, // <1,1,4,1>: Cost 3 vrev <1,1,1,4>
- 3696380988U, // <1,1,4,2>: Cost 4 vext2 <0,u,1,1>, <4,2,6,0>
- 3763487805U, // <1,1,4,3>: Cost 4 vext3 <0,u,1,1>, <1,4,3,5>
- 2556882230U, // <1,1,4,4>: Cost 3 vext1 <1,1,1,4>, RHS
- 1548897590U, // <1,1,4,5>: Cost 2 vext2 <0,u,1,1>, RHS
- 2758184246U, // <1,1,4,6>: Cost 3 vuzpl <1,1,1,1>, RHS
- 3666457677U, // <1,1,4,7>: Cost 4 vext1 <7,1,1,4>, <7,1,1,4>
- 1548897833U, // <1,1,4,u>: Cost 2 vext2 <0,u,1,1>, RHS
- 2693653615U, // <1,1,5,0>: Cost 3 vext3 <1,5,0,1>, <1,5,0,1>
- 2617331408U, // <1,1,5,1>: Cost 3 vext2 <0,0,1,1>, <5,1,7,3>
- 4029302934U, // <1,1,5,2>: Cost 4 vzipr <0,4,1,5>, <3,0,1,2>
- 2689746064U, // <1,1,5,3>: Cost 3 vext3 <0,u,1,1>, <1,5,3,7>
- 2221564755U, // <1,1,5,4>: Cost 3 vrev <1,1,4,5>
- 2955559250U, // <1,1,5,5>: Cost 3 vzipr <0,4,1,5>, <0,4,1,5>
- 2617331810U, // <1,1,5,6>: Cost 3 vext2 <0,0,1,1>, <5,6,7,0>
- 2825293110U, // <1,1,5,7>: Cost 3 vuzpr <1,1,1,1>, RHS
- 2689746109U, // <1,1,5,u>: Cost 3 vext3 <0,u,1,1>, <1,5,u,7>
- 3696382241U, // <1,1,6,0>: Cost 4 vext2 <0,u,1,1>, <6,0,1,2>
- 2689746127U, // <1,1,6,1>: Cost 3 vext3 <0,u,1,1>, <1,6,1,7>
- 2617332218U, // <1,1,6,2>: Cost 3 vext2 <0,0,1,1>, <6,2,7,3>
- 3763487969U, // <1,1,6,3>: Cost 4 vext3 <0,u,1,1>, <1,6,3,7>
- 3696382605U, // <1,1,6,4>: Cost 4 vext2 <0,u,1,1>, <6,4,5,6>
- 4029309266U, // <1,1,6,5>: Cost 4 vzipr <0,4,1,6>, <0,4,1,5>
- 2617332536U, // <1,1,6,6>: Cost 3 vext2 <0,0,1,1>, <6,6,6,6>
- 2724840702U, // <1,1,6,7>: Cost 3 vext3 <6,7,0,1>, <1,6,7,0>
- 2725504263U, // <1,1,6,u>: Cost 3 vext3 <6,u,0,1>, <1,6,u,0>
- 2617332720U, // <1,1,7,0>: Cost 3 vext2 <0,0,1,1>, <7,0,0,1>
- 2659800138U, // <1,1,7,1>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1>
- 3691074717U, // <1,1,7,2>: Cost 4 vext2 <0,0,1,1>, <7,2,1,3>
- 4167811174U, // <1,1,7,3>: Cost 4 vtrnr <1,1,5,7>, LHS
- 2617333094U, // <1,1,7,4>: Cost 3 vext2 <0,0,1,1>, <7,4,5,6>
- 3295396702U, // <1,1,7,5>: Cost 4 vrev <1,1,5,7>
- 3803891014U, // <1,1,7,6>: Cost 4 vext3 <7,6,0,1>, <1,7,6,0>
- 2617333356U, // <1,1,7,7>: Cost 3 vext2 <0,0,1,1>, <7,7,7,7>
- 2659800138U, // <1,1,7,u>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1>
- 1483112550U, // <1,1,u,0>: Cost 2 vext1 <1,1,1,1>, LHS
- 202162278U, // <1,1,u,1>: Cost 1 vdup1 LHS
- 2622642056U, // <1,1,u,2>: Cost 3 vext2 <0,u,1,1>, <u,2,3,3>
- 2014142566U, // <1,1,u,3>: Cost 2 vtrnr LHS, LHS
- 1483115830U, // <1,1,u,4>: Cost 2 vext1 <1,1,1,1>, RHS
- 1548900506U, // <1,1,u,5>: Cost 2 vext2 <0,u,1,1>, RHS
- 2622642384U, // <1,1,u,6>: Cost 3 vext2 <0,u,1,1>, <u,6,3,7>
- 2825293353U, // <1,1,u,7>: Cost 3 vuzpr <1,1,1,1>, RHS
- 202162278U, // <1,1,u,u>: Cost 1 vdup1 LHS
- 2635251712U, // <1,2,0,0>: Cost 3 vext2 <3,0,1,2>, <0,0,0,0>
- 1561509990U, // <1,2,0,1>: Cost 2 vext2 <3,0,1,2>, LHS
- 2618663085U, // <1,2,0,2>: Cost 3 vext2 <0,2,1,2>, <0,2,1,2>
- 2696529358U, // <1,2,0,3>: Cost 3 vext3 <2,0,3,1>, <2,0,3,1>
- 2635252050U, // <1,2,0,4>: Cost 3 vext2 <3,0,1,2>, <0,4,1,5>
- 3769533926U, // <1,2,0,5>: Cost 4 vext3 <1,u,2,1>, <2,0,5,7>
- 2621317617U, // <1,2,0,6>: Cost 3 vext2 <0,6,1,2>, <0,6,1,2>
- 2659140170U, // <1,2,0,7>: Cost 3 vext2 <7,0,1,2>, <0,7,2,1>
- 1561510557U, // <1,2,0,u>: Cost 2 vext2 <3,0,1,2>, LHS
- 2623308516U, // <1,2,1,0>: Cost 3 vext2 <1,0,1,2>, <1,0,1,2>
- 2635252532U, // <1,2,1,1>: Cost 3 vext2 <3,0,1,2>, <1,1,1,1>
- 2631271318U, // <1,2,1,2>: Cost 3 vext2 <2,3,1,2>, <1,2,3,0>
- 2958180454U, // <1,2,1,3>: Cost 3 vzipr <0,u,1,1>, LHS
- 2550959414U, // <1,2,1,4>: Cost 3 vext1 <0,1,2,1>, RHS
- 2635252880U, // <1,2,1,5>: Cost 3 vext2 <3,0,1,2>, <1,5,3,7>
- 2635252952U, // <1,2,1,6>: Cost 3 vext2 <3,0,1,2>, <1,6,2,7>
- 3732882731U, // <1,2,1,7>: Cost 4 vext2 <7,0,1,2>, <1,7,3,0>
- 2958180459U, // <1,2,1,u>: Cost 3 vzipr <0,u,1,1>, LHS
- 2629281213U, // <1,2,2,0>: Cost 3 vext2 <2,0,1,2>, <2,0,1,2>
- 2635253280U, // <1,2,2,1>: Cost 3 vext2 <3,0,1,2>, <2,1,3,2>
- 2618664552U, // <1,2,2,2>: Cost 3 vext2 <0,2,1,2>, <2,2,2,2>
- 2689746546U, // <1,2,2,3>: Cost 3 vext3 <0,u,1,1>, <2,2,3,3>
- 3764815485U, // <1,2,2,4>: Cost 4 vext3 <1,1,1,1>, <2,2,4,5>
- 3760023176U, // <1,2,2,5>: Cost 4 vext3 <0,2,u,1>, <2,2,5,7>
- 2635253690U, // <1,2,2,6>: Cost 3 vext2 <3,0,1,2>, <2,6,3,7>
- 2659141610U, // <1,2,2,7>: Cost 3 vext2 <7,0,1,2>, <2,7,0,1>
- 2689746591U, // <1,2,2,u>: Cost 3 vext3 <0,u,1,1>, <2,2,u,3>
- 403488870U, // <1,2,3,0>: Cost 1 vext1 LHS, LHS
- 1477231350U, // <1,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
- 1477232232U, // <1,2,3,2>: Cost 2 vext1 LHS, <2,2,2,2>
- 1477233052U, // <1,2,3,3>: Cost 2 vext1 LHS, <3,3,3,3>
- 403492150U, // <1,2,3,4>: Cost 1 vext1 LHS, RHS
- 1525010128U, // <1,2,3,5>: Cost 2 vext1 LHS, <5,1,7,3>
- 1525010938U, // <1,2,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
- 1525011450U, // <1,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2>
- 403494702U, // <1,2,3,u>: Cost 1 vext1 LHS, LHS
- 2641226607U, // <1,2,4,0>: Cost 3 vext2 <4,0,1,2>, <4,0,1,2>
- 3624723446U, // <1,2,4,1>: Cost 4 vext1 <0,1,2,4>, <1,3,4,6>
- 3301123609U, // <1,2,4,2>: Cost 4 vrev <2,1,2,4>
- 2598759198U, // <1,2,4,3>: Cost 3 vext1 <u,1,2,4>, <3,u,1,2>
- 2659142864U, // <1,2,4,4>: Cost 3 vext2 <7,0,1,2>, <4,4,4,4>
- 1561513270U, // <1,2,4,5>: Cost 2 vext2 <3,0,1,2>, RHS
- 2659143028U, // <1,2,4,6>: Cost 3 vext2 <7,0,1,2>, <4,6,4,6>
- 2659143112U, // <1,2,4,7>: Cost 3 vext2 <7,0,1,2>, <4,7,5,0>
- 1561513513U, // <1,2,4,u>: Cost 2 vext2 <3,0,1,2>, RHS
- 2550988902U, // <1,2,5,0>: Cost 3 vext1 <0,1,2,5>, LHS
- 2550989824U, // <1,2,5,1>: Cost 3 vext1 <0,1,2,5>, <1,3,5,7>
- 3624732264U, // <1,2,5,2>: Cost 4 vext1 <0,1,2,5>, <2,2,2,2>
- 2955559014U, // <1,2,5,3>: Cost 3 vzipr <0,4,1,5>, LHS
- 2550992182U, // <1,2,5,4>: Cost 3 vext1 <0,1,2,5>, RHS
- 2659143684U, // <1,2,5,5>: Cost 3 vext2 <7,0,1,2>, <5,5,5,5>
- 2659143778U, // <1,2,5,6>: Cost 3 vext2 <7,0,1,2>, <5,6,7,0>
- 2659143848U, // <1,2,5,7>: Cost 3 vext2 <7,0,1,2>, <5,7,5,7>
- 2550994734U, // <1,2,5,u>: Cost 3 vext1 <0,1,2,5>, LHS
- 2700289945U, // <1,2,6,0>: Cost 3 vext3 <2,6,0,1>, <2,6,0,1>
- 2635256232U, // <1,2,6,1>: Cost 3 vext2 <3,0,1,2>, <6,1,7,2>
- 2659144186U, // <1,2,6,2>: Cost 3 vext2 <7,0,1,2>, <6,2,7,3>
- 2689746874U, // <1,2,6,3>: Cost 3 vext3 <0,u,1,1>, <2,6,3,7>
- 3763488705U, // <1,2,6,4>: Cost 4 vext3 <0,u,1,1>, <2,6,4,5>
- 3763488716U, // <1,2,6,5>: Cost 4 vext3 <0,u,1,1>, <2,6,5,7>
- 2659144504U, // <1,2,6,6>: Cost 3 vext2 <7,0,1,2>, <6,6,6,6>
- 2657817432U, // <1,2,6,7>: Cost 3 vext2 <6,7,1,2>, <6,7,1,2>
- 2689746919U, // <1,2,6,u>: Cost 3 vext3 <0,u,1,1>, <2,6,u,7>
- 1585402874U, // <1,2,7,0>: Cost 2 vext2 <7,0,1,2>, <7,0,1,2>
- 2659144770U, // <1,2,7,1>: Cost 3 vext2 <7,0,1,2>, <7,1,0,2>
- 3708998858U, // <1,2,7,2>: Cost 4 vext2 <3,0,1,2>, <7,2,6,3>
- 2635257059U, // <1,2,7,3>: Cost 3 vext2 <3,0,1,2>, <7,3,0,1>
- 2659145062U, // <1,2,7,4>: Cost 3 vext2 <7,0,1,2>, <7,4,5,6>
- 3732886916U, // <1,2,7,5>: Cost 4 vext2 <7,0,1,2>, <7,5,0,0>
- 3732886998U, // <1,2,7,6>: Cost 4 vext2 <7,0,1,2>, <7,6,0,1>
- 2659145255U, // <1,2,7,7>: Cost 3 vext2 <7,0,1,2>, <7,7,0,1>
- 1590711938U, // <1,2,7,u>: Cost 2 vext2 <7,u,1,2>, <7,u,1,2>
- 403529835U, // <1,2,u,0>: Cost 1 vext1 LHS, LHS
- 1477272310U, // <1,2,u,1>: Cost 2 vext1 LHS, <1,0,3,2>
- 1477273192U, // <1,2,u,2>: Cost 2 vext1 LHS, <2,2,2,2>
- 1477273750U, // <1,2,u,3>: Cost 2 vext1 LHS, <3,0,1,2>
- 403533110U, // <1,2,u,4>: Cost 1 vext1 LHS, RHS
- 1561516186U, // <1,2,u,5>: Cost 2 vext2 <3,0,1,2>, RHS
- 1525051898U, // <1,2,u,6>: Cost 2 vext1 LHS, <6,2,7,3>
- 1525052410U, // <1,2,u,7>: Cost 2 vext1 LHS, <7,0,1,2>
- 403535662U, // <1,2,u,u>: Cost 1 vext1 LHS, LHS
- 2819407872U, // <1,3,0,0>: Cost 3 vuzpr LHS, <0,0,0,0>
- 1551564902U, // <1,3,0,1>: Cost 2 vext2 <1,3,1,3>, LHS
- 2819408630U, // <1,3,0,2>: Cost 3 vuzpr LHS, <1,0,3,2>
- 2619334911U, // <1,3,0,3>: Cost 3 vext2 <0,3,1,3>, <0,3,1,3>
- 2625306962U, // <1,3,0,4>: Cost 3 vext2 <1,3,1,3>, <0,4,1,5>
- 3832725879U, // <1,3,0,5>: Cost 4 vuzpl <1,2,3,0>, <0,4,5,6>
- 3699048959U, // <1,3,0,6>: Cost 4 vext2 <1,3,1,3>, <0,6,2,7>
- 3776538827U, // <1,3,0,7>: Cost 4 vext3 <3,0,7,1>, <3,0,7,1>
- 1551565469U, // <1,3,0,u>: Cost 2 vext2 <1,3,1,3>, LHS
- 2618671862U, // <1,3,1,0>: Cost 3 vext2 <0,2,1,3>, <1,0,3,2>
- 2819408692U, // <1,3,1,1>: Cost 3 vuzpr LHS, <1,1,1,1>
- 2624643975U, // <1,3,1,2>: Cost 3 vext2 <1,2,1,3>, <1,2,1,3>
- 1745666150U, // <1,3,1,3>: Cost 2 vuzpr LHS, LHS
- 2557005110U, // <1,3,1,4>: Cost 3 vext1 <1,1,3,1>, RHS
- 2625307792U, // <1,3,1,5>: Cost 3 vext2 <1,3,1,3>, <1,5,3,7>
- 3698386127U, // <1,3,1,6>: Cost 4 vext2 <1,2,1,3>, <1,6,1,7>
- 2592838748U, // <1,3,1,7>: Cost 3 vext1 <7,1,3,1>, <7,1,3,1>
- 1745666155U, // <1,3,1,u>: Cost 2 vuzpr LHS, LHS
- 2819408790U, // <1,3,2,0>: Cost 3 vuzpr LHS, <1,2,3,0>
- 2625308193U, // <1,3,2,1>: Cost 3 vext2 <1,3,1,3>, <2,1,3,3>
- 2819408036U, // <1,3,2,2>: Cost 3 vuzpr LHS, <0,2,0,2>
- 2819851890U, // <1,3,2,3>: Cost 3 vuzpr LHS, <2,2,3,3>
- 2819408794U, // <1,3,2,4>: Cost 3 vuzpr LHS, <1,2,3,4>
- 3893149890U, // <1,3,2,5>: Cost 4 vuzpr LHS, <0,2,3,5>
- 2819408076U, // <1,3,2,6>: Cost 3 vuzpr LHS, <0,2,4,6>
- 3772041583U, // <1,3,2,7>: Cost 4 vext3 <2,3,0,1>, <3,2,7,3>
- 2819408042U, // <1,3,2,u>: Cost 3 vuzpr LHS, <0,2,0,u>
- 1483276390U, // <1,3,3,0>: Cost 2 vext1 <1,1,3,3>, LHS
- 1483277128U, // <1,3,3,1>: Cost 2 vext1 <1,1,3,3>, <1,1,3,3>
- 2557019752U, // <1,3,3,2>: Cost 3 vext1 <1,1,3,3>, <2,2,2,2>
- 2819408856U, // <1,3,3,3>: Cost 3 vuzpr LHS, <1,3,1,3>
- 1483279670U, // <1,3,3,4>: Cost 2 vext1 <1,1,3,3>, RHS
- 2819409614U, // <1,3,3,5>: Cost 3 vuzpr LHS, <2,3,4,5>
- 2598826490U, // <1,3,3,6>: Cost 3 vext1 <u,1,3,3>, <6,2,7,3>
- 3087844352U, // <1,3,3,7>: Cost 3 vtrnr LHS, <1,3,5,7>
- 1483282222U, // <1,3,3,u>: Cost 2 vext1 <1,1,3,3>, LHS
- 2568970342U, // <1,3,4,0>: Cost 3 vext1 <3,1,3,4>, LHS
- 2568971224U, // <1,3,4,1>: Cost 3 vext1 <3,1,3,4>, <1,3,1,3>
- 3832761290U, // <1,3,4,2>: Cost 4 vuzpl <1,2,3,4>, <4,1,2,3>
- 2233428219U, // <1,3,4,3>: Cost 3 vrev <3,1,3,4>
- 2568973622U, // <1,3,4,4>: Cost 3 vext1 <3,1,3,4>, RHS
- 1551568182U, // <1,3,4,5>: Cost 2 vext2 <1,3,1,3>, RHS
- 2819410434U, // <1,3,4,6>: Cost 3 vuzpr LHS, <3,4,5,6>
- 3666605151U, // <1,3,4,7>: Cost 4 vext1 <7,1,3,4>, <7,1,3,4>
- 1551568425U, // <1,3,4,u>: Cost 2 vext2 <1,3,1,3>, RHS
- 2563006566U, // <1,3,5,0>: Cost 3 vext1 <2,1,3,5>, LHS
- 2568979456U, // <1,3,5,1>: Cost 3 vext1 <3,1,3,5>, <1,3,5,7>
- 2563008035U, // <1,3,5,2>: Cost 3 vext1 <2,1,3,5>, <2,1,3,5>
- 2233436412U, // <1,3,5,3>: Cost 3 vrev <3,1,3,5>
- 2563009846U, // <1,3,5,4>: Cost 3 vext1 <2,1,3,5>, RHS
- 2867187716U, // <1,3,5,5>: Cost 3 vuzpr LHS, <5,5,5,5>
- 2655834214U, // <1,3,5,6>: Cost 3 vext2 <6,4,1,3>, <5,6,7,4>
- 1745669430U, // <1,3,5,7>: Cost 2 vuzpr LHS, RHS
- 1745669431U, // <1,3,5,u>: Cost 2 vuzpr LHS, RHS
- 2867187810U, // <1,3,6,0>: Cost 3 vuzpr LHS, <5,6,7,0>
- 3699052931U, // <1,3,6,1>: Cost 4 vext2 <1,3,1,3>, <6,1,3,1>
- 2654507460U, // <1,3,6,2>: Cost 3 vext2 <6,2,1,3>, <6,2,1,3>
- 3766291091U, // <1,3,6,3>: Cost 4 vext3 <1,3,3,1>, <3,6,3,7>
- 2655834726U, // <1,3,6,4>: Cost 3 vext2 <6,4,1,3>, <6,4,1,3>
- 3923384562U, // <1,3,6,5>: Cost 4 vuzpr <5,1,7,3>, <u,6,7,5>
- 2657161992U, // <1,3,6,6>: Cost 3 vext2 <6,6,1,3>, <6,6,1,3>
- 2819852218U, // <1,3,6,7>: Cost 3 vuzpr LHS, <2,6,3,7>
- 2819852219U, // <1,3,6,u>: Cost 3 vuzpr LHS, <2,6,3,u>
- 2706926275U, // <1,3,7,0>: Cost 3 vext3 <3,7,0,1>, <3,7,0,1>
- 2659816524U, // <1,3,7,1>: Cost 3 vext2 <7,1,1,3>, <7,1,1,3>
- 3636766245U, // <1,3,7,2>: Cost 4 vext1 <2,1,3,7>, <2,1,3,7>
- 2867187903U, // <1,3,7,3>: Cost 3 vuzpr LHS, <5,7,u,3>
- 2625312102U, // <1,3,7,4>: Cost 3 vext2 <1,3,1,3>, <7,4,5,6>
- 2867188598U, // <1,3,7,5>: Cost 3 vuzpr LHS, <6,7,4,5>
- 3728250344U, // <1,3,7,6>: Cost 4 vext2 <6,2,1,3>, <7,6,2,1>
- 2867187880U, // <1,3,7,7>: Cost 3 vuzpr LHS, <5,7,5,7>
- 2707516171U, // <1,3,7,u>: Cost 3 vext3 <3,7,u,1>, <3,7,u,1>
- 1483317350U, // <1,3,u,0>: Cost 2 vext1 <1,1,3,u>, LHS
- 1483318093U, // <1,3,u,1>: Cost 2 vext1 <1,1,3,u>, <1,1,3,u>
- 2819410718U, // <1,3,u,2>: Cost 3 vuzpr LHS, <3,u,1,2>
- 1745666717U, // <1,3,u,3>: Cost 2 vuzpr LHS, LHS
- 1483320630U, // <1,3,u,4>: Cost 2 vext1 <1,1,3,u>, RHS
- 1551571098U, // <1,3,u,5>: Cost 2 vext2 <1,3,1,3>, RHS
- 2819410758U, // <1,3,u,6>: Cost 3 vuzpr LHS, <3,u,5,6>
- 1745669673U, // <1,3,u,7>: Cost 2 vuzpr LHS, RHS
- 1745666722U, // <1,3,u,u>: Cost 2 vuzpr LHS, LHS
- 2617352205U, // <1,4,0,0>: Cost 3 vext2 <0,0,1,4>, <0,0,1,4>
- 2619342950U, // <1,4,0,1>: Cost 3 vext2 <0,3,1,4>, LHS
- 3692421295U, // <1,4,0,2>: Cost 4 vext2 <0,2,1,4>, <0,2,1,4>
- 2619343104U, // <1,4,0,3>: Cost 3 vext2 <0,3,1,4>, <0,3,1,4>
- 2617352530U, // <1,4,0,4>: Cost 3 vext2 <0,0,1,4>, <0,4,1,5>
- 1634880402U, // <1,4,0,5>: Cost 2 vext3 <4,0,5,1>, <4,0,5,1>
- 2713930652U, // <1,4,0,6>: Cost 3 vext3 <4,u,5,1>, <4,0,6,2>
- 3732898396U, // <1,4,0,7>: Cost 4 vext2 <7,0,1,4>, <0,7,4,1>
- 1635101613U, // <1,4,0,u>: Cost 2 vext3 <4,0,u,1>, <4,0,u,1>
- 3693085430U, // <1,4,1,0>: Cost 4 vext2 <0,3,1,4>, <1,0,3,2>
- 2623988535U, // <1,4,1,1>: Cost 3 vext2 <1,1,1,4>, <1,1,1,4>
- 3693085590U, // <1,4,1,2>: Cost 4 vext2 <0,3,1,4>, <1,2,3,0>
- 3692422134U, // <1,4,1,3>: Cost 4 vext2 <0,2,1,4>, <1,3,4,6>
- 3693085726U, // <1,4,1,4>: Cost 4 vext2 <0,3,1,4>, <1,4,0,1>
- 2892401974U, // <1,4,1,5>: Cost 3 vzipl <1,1,1,1>, RHS
- 3026619702U, // <1,4,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS
- 3800206324U, // <1,4,1,7>: Cost 4 vext3 <7,0,4,1>, <4,1,7,0>
- 2892402217U, // <1,4,1,u>: Cost 3 vzipl <1,1,1,1>, RHS
- 3966978927U, // <1,4,2,0>: Cost 4 vzipl <1,2,3,4>, <4,0,1,2>
- 3966979018U, // <1,4,2,1>: Cost 4 vzipl <1,2,3,4>, <4,1,2,3>
- 3693086312U, // <1,4,2,2>: Cost 4 vext2 <0,3,1,4>, <2,2,2,2>
- 2635269798U, // <1,4,2,3>: Cost 3 vext2 <3,0,1,4>, <2,3,0,1>
- 3966979280U, // <1,4,2,4>: Cost 4 vzipl <1,2,3,4>, <4,4,4,4>
- 2893204790U, // <1,4,2,5>: Cost 3 vzipl <1,2,3,0>, RHS
- 3693086650U, // <1,4,2,6>: Cost 4 vext2 <0,3,1,4>, <2,6,3,7>
- 3666662502U, // <1,4,2,7>: Cost 4 vext1 <7,1,4,2>, <7,1,4,2>
- 2893205033U, // <1,4,2,u>: Cost 3 vzipl <1,2,3,0>, RHS
- 2563063910U, // <1,4,3,0>: Cost 3 vext1 <2,1,4,3>, LHS
- 2563064730U, // <1,4,3,1>: Cost 3 vext1 <2,1,4,3>, <1,2,3,4>
- 2563065386U, // <1,4,3,2>: Cost 3 vext1 <2,1,4,3>, <2,1,4,3>
- 3693087132U, // <1,4,3,3>: Cost 4 vext2 <0,3,1,4>, <3,3,3,3>
- 2619345410U, // <1,4,3,4>: Cost 3 vext2 <0,3,1,4>, <3,4,5,6>
- 3087843666U, // <1,4,3,5>: Cost 3 vtrnr LHS, <0,4,1,5>
- 3087843676U, // <1,4,3,6>: Cost 3 vtrnr LHS, <0,4,2,6>
- 3666670695U, // <1,4,3,7>: Cost 4 vext1 <7,1,4,3>, <7,1,4,3>
- 3087843669U, // <1,4,3,u>: Cost 3 vtrnr LHS, <0,4,1,u>
- 2620672914U, // <1,4,4,0>: Cost 3 vext2 <0,5,1,4>, <4,0,5,1>
- 3630842706U, // <1,4,4,1>: Cost 4 vext1 <1,1,4,4>, <1,1,4,4>
- 3313069003U, // <1,4,4,2>: Cost 4 vrev <4,1,2,4>
- 3642788100U, // <1,4,4,3>: Cost 4 vext1 <3,1,4,4>, <3,1,4,4>
- 2713930960U, // <1,4,4,4>: Cost 3 vext3 <4,u,5,1>, <4,4,4,4>
- 2619346230U, // <1,4,4,5>: Cost 3 vext2 <0,3,1,4>, RHS
- 2713930980U, // <1,4,4,6>: Cost 3 vext3 <4,u,5,1>, <4,4,6,6>
- 3736882642U, // <1,4,4,7>: Cost 4 vext2 <7,6,1,4>, <4,7,6,1>
- 2619346473U, // <1,4,4,u>: Cost 3 vext2 <0,3,1,4>, RHS
- 2557108326U, // <1,4,5,0>: Cost 3 vext1 <1,1,4,5>, LHS
- 2557109075U, // <1,4,5,1>: Cost 3 vext1 <1,1,4,5>, <1,1,4,5>
- 2598913774U, // <1,4,5,2>: Cost 3 vext1 <u,1,4,5>, <2,3,u,1>
- 3630852246U, // <1,4,5,3>: Cost 4 vext1 <1,1,4,5>, <3,0,1,2>
- 2557111606U, // <1,4,5,4>: Cost 3 vext1 <1,1,4,5>, RHS
- 2895252790U, // <1,4,5,5>: Cost 3 vzipl <1,5,3,7>, RHS
- 1616006454U, // <1,4,5,6>: Cost 2 vext3 <0,u,1,1>, RHS
- 3899059510U, // <1,4,5,7>: Cost 4 vuzpr <1,1,1,4>, RHS
- 1616006472U, // <1,4,5,u>: Cost 2 vext3 <0,u,1,1>, RHS
- 2557116518U, // <1,4,6,0>: Cost 3 vext1 <1,1,4,6>, LHS
- 2557117236U, // <1,4,6,1>: Cost 3 vext1 <1,1,4,6>, <1,1,1,1>
- 3630859880U, // <1,4,6,2>: Cost 4 vext1 <1,1,4,6>, <2,2,2,2>
- 2569062550U, // <1,4,6,3>: Cost 3 vext1 <3,1,4,6>, <3,0,1,2>
- 2557119798U, // <1,4,6,4>: Cost 3 vext1 <1,1,4,6>, RHS
- 3763490174U, // <1,4,6,5>: Cost 4 vext3 <0,u,1,1>, <4,6,5,7>
- 3763490183U, // <1,4,6,6>: Cost 4 vext3 <0,u,1,1>, <4,6,6,7>
- 2712751498U, // <1,4,6,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1>
- 2557122350U, // <1,4,6,u>: Cost 3 vext1 <1,1,4,6>, LHS
- 2659161084U, // <1,4,7,0>: Cost 3 vext2 <7,0,1,4>, <7,0,1,4>
- 3732903040U, // <1,4,7,1>: Cost 4 vext2 <7,0,1,4>, <7,1,7,1>
- 3734230174U, // <1,4,7,2>: Cost 4 vext2 <7,2,1,4>, <7,2,1,4>
- 3734893807U, // <1,4,7,3>: Cost 4 vext2 <7,3,1,4>, <7,3,1,4>
- 3660729654U, // <1,4,7,4>: Cost 4 vext1 <6,1,4,7>, RHS
- 3786493384U, // <1,4,7,5>: Cost 4 vext3 <4,6,7,1>, <4,7,5,0>
- 2713341394U, // <1,4,7,6>: Cost 3 vext3 <4,7,6,1>, <4,7,6,1>
- 3660731386U, // <1,4,7,7>: Cost 4 vext1 <6,1,4,7>, <7,0,1,2>
- 2664470148U, // <1,4,7,u>: Cost 3 vext2 <7,u,1,4>, <7,u,1,4>
- 2557132902U, // <1,4,u,0>: Cost 3 vext1 <1,1,4,u>, LHS
- 2619348782U, // <1,4,u,1>: Cost 3 vext2 <0,3,1,4>, LHS
- 2563106351U, // <1,4,u,2>: Cost 3 vext1 <2,1,4,u>, <2,1,4,u>
- 2713783816U, // <1,4,u,3>: Cost 3 vext3 <4,u,3,1>, <4,u,3,1>
- 2622666815U, // <1,4,u,4>: Cost 3 vext2 <0,u,1,4>, <u,4,5,6>
- 1640189466U, // <1,4,u,5>: Cost 2 vext3 <4,u,5,1>, <4,u,5,1>
- 1616006697U, // <1,4,u,6>: Cost 2 vext3 <0,u,1,1>, RHS
- 2712751498U, // <1,4,u,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1>
- 1616006715U, // <1,4,u,u>: Cost 2 vext3 <0,u,1,1>, RHS
- 2620014592U, // <1,5,0,0>: Cost 3 vext2 <0,4,1,5>, <0,0,0,0>
- 1546272870U, // <1,5,0,1>: Cost 2 vext2 <0,4,1,5>, LHS
- 2618687664U, // <1,5,0,2>: Cost 3 vext2 <0,2,1,5>, <0,2,1,5>
- 3693093120U, // <1,5,0,3>: Cost 4 vext2 <0,3,1,5>, <0,3,1,4>
- 1546273106U, // <1,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5>
- 2620678563U, // <1,5,0,5>: Cost 3 vext2 <0,5,1,5>, <0,5,1,5>
- 2714668660U, // <1,5,0,6>: Cost 3 vext3 <5,0,6,1>, <5,0,6,1>
- 3772042877U, // <1,5,0,7>: Cost 4 vext3 <2,3,0,1>, <5,0,7,1>
- 1546273437U, // <1,5,0,u>: Cost 2 vext2 <0,4,1,5>, LHS
- 2620015350U, // <1,5,1,0>: Cost 3 vext2 <0,4,1,5>, <1,0,3,2>
- 2620015412U, // <1,5,1,1>: Cost 3 vext2 <0,4,1,5>, <1,1,1,1>
- 2620015510U, // <1,5,1,2>: Cost 3 vext2 <0,4,1,5>, <1,2,3,0>
- 2618688512U, // <1,5,1,3>: Cost 3 vext2 <0,2,1,5>, <1,3,5,7>
- 2620015677U, // <1,5,1,4>: Cost 3 vext2 <0,4,1,5>, <1,4,3,5>
- 2620015727U, // <1,5,1,5>: Cost 3 vext2 <0,4,1,5>, <1,5,0,1>
- 2620015859U, // <1,5,1,6>: Cost 3 vext2 <0,4,1,5>, <1,6,5,7>
- 3093728566U, // <1,5,1,7>: Cost 3 vtrnr <1,1,1,1>, RHS
- 2620015981U, // <1,5,1,u>: Cost 3 vext2 <0,4,1,5>, <1,u,1,3>
- 3692430816U, // <1,5,2,0>: Cost 4 vext2 <0,2,1,5>, <2,0,5,1>
- 2620016163U, // <1,5,2,1>: Cost 3 vext2 <0,4,1,5>, <2,1,3,5>
- 2620016232U, // <1,5,2,2>: Cost 3 vext2 <0,4,1,5>, <2,2,2,2>
- 2620016294U, // <1,5,2,3>: Cost 3 vext2 <0,4,1,5>, <2,3,0,1>
- 3693758221U, // <1,5,2,4>: Cost 4 vext2 <0,4,1,5>, <2,4,2,5>
- 3692431209U, // <1,5,2,5>: Cost 4 vext2 <0,2,1,5>, <2,5,3,7>
- 2620016570U, // <1,5,2,6>: Cost 3 vext2 <0,4,1,5>, <2,6,3,7>
- 4173598006U, // <1,5,2,7>: Cost 4 vtrnr <2,1,3,2>, RHS
- 2620016699U, // <1,5,2,u>: Cost 3 vext2 <0,4,1,5>, <2,u,0,1>
- 2620016790U, // <1,5,3,0>: Cost 3 vext2 <0,4,1,5>, <3,0,1,2>
- 2569110672U, // <1,5,3,1>: Cost 3 vext1 <3,1,5,3>, <1,5,3,7>
- 3693758785U, // <1,5,3,2>: Cost 4 vext2 <0,4,1,5>, <3,2,2,2>
- 2620017052U, // <1,5,3,3>: Cost 3 vext2 <0,4,1,5>, <3,3,3,3>
- 2620017154U, // <1,5,3,4>: Cost 3 vext2 <0,4,1,5>, <3,4,5,6>
- 3135623172U, // <1,5,3,5>: Cost 3 vtrnr LHS, <5,5,5,5>
- 4161587048U, // <1,5,3,6>: Cost 4 vtrnr LHS, <2,5,3,6>
- 2014104886U, // <1,5,3,7>: Cost 2 vtrnr LHS, RHS
- 2014104887U, // <1,5,3,u>: Cost 2 vtrnr LHS, RHS
- 2620017554U, // <1,5,4,0>: Cost 3 vext2 <0,4,1,5>, <4,0,5,1>
- 2620017634U, // <1,5,4,1>: Cost 3 vext2 <0,4,1,5>, <4,1,5,0>
- 3693759551U, // <1,5,4,2>: Cost 4 vext2 <0,4,1,5>, <4,2,6,3>
- 3642861837U, // <1,5,4,3>: Cost 4 vext1 <3,1,5,4>, <3,1,5,4>
- 2575092710U, // <1,5,4,4>: Cost 3 vext1 <4,1,5,4>, <4,1,5,4>
- 1546276150U, // <1,5,4,5>: Cost 2 vext2 <0,4,1,5>, RHS
- 2759855414U, // <1,5,4,6>: Cost 3 vuzpl <1,3,5,7>, RHS
- 2713931718U, // <1,5,4,7>: Cost 3 vext3 <4,u,5,1>, <5,4,7,6>
- 1546276393U, // <1,5,4,u>: Cost 2 vext2 <0,4,1,5>, RHS
- 2557182054U, // <1,5,5,0>: Cost 3 vext1 <1,1,5,5>, LHS
- 2557182812U, // <1,5,5,1>: Cost 3 vext1 <1,1,5,5>, <1,1,5,5>
- 3630925347U, // <1,5,5,2>: Cost 4 vext1 <1,1,5,5>, <2,1,3,5>
- 4029301675U, // <1,5,5,3>: Cost 4 vzipr <0,4,1,5>, <1,2,5,3>
- 2557185334U, // <1,5,5,4>: Cost 3 vext1 <1,1,5,5>, RHS
- 2713931780U, // <1,5,5,5>: Cost 3 vext3 <4,u,5,1>, <5,5,5,5>
- 2667794530U, // <1,5,5,6>: Cost 3 vext2 <u,4,1,5>, <5,6,7,0>
- 2713931800U, // <1,5,5,7>: Cost 3 vext3 <4,u,5,1>, <5,5,7,7>
- 2557187886U, // <1,5,5,u>: Cost 3 vext1 <1,1,5,5>, LHS
- 2718208036U, // <1,5,6,0>: Cost 3 vext3 <5,6,0,1>, <5,6,0,1>
- 2620019115U, // <1,5,6,1>: Cost 3 vext2 <0,4,1,5>, <6,1,7,5>
- 2667794938U, // <1,5,6,2>: Cost 3 vext2 <u,4,1,5>, <6,2,7,3>
- 3787673666U, // <1,5,6,3>: Cost 4 vext3 <4,u,5,1>, <5,6,3,4>
- 3693761165U, // <1,5,6,4>: Cost 4 vext2 <0,4,1,5>, <6,4,5,6>
- 3319279297U, // <1,5,6,5>: Cost 4 vrev <5,1,5,6>
- 2667795256U, // <1,5,6,6>: Cost 3 vext2 <u,4,1,5>, <6,6,6,6>
- 2713931874U, // <1,5,6,7>: Cost 3 vext3 <4,u,5,1>, <5,6,7,0>
- 2713931883U, // <1,5,6,u>: Cost 3 vext3 <4,u,5,1>, <5,6,u,0>
- 2557198438U, // <1,5,7,0>: Cost 3 vext1 <1,1,5,7>, LHS
- 2557199156U, // <1,5,7,1>: Cost 3 vext1 <1,1,5,7>, <1,1,1,1>
- 2569143974U, // <1,5,7,2>: Cost 3 vext1 <3,1,5,7>, <2,3,0,1>
- 2569144592U, // <1,5,7,3>: Cost 3 vext1 <3,1,5,7>, <3,1,5,7>
- 2557201718U, // <1,5,7,4>: Cost 3 vext1 <1,1,5,7>, RHS
- 2713931944U, // <1,5,7,5>: Cost 3 vext3 <4,u,5,1>, <5,7,5,7>
- 3787673770U, // <1,5,7,6>: Cost 4 vext3 <4,u,5,1>, <5,7,6,0>
- 2719387828U, // <1,5,7,7>: Cost 3 vext3 <5,7,7,1>, <5,7,7,1>
- 2557204270U, // <1,5,7,u>: Cost 3 vext1 <1,1,5,7>, LHS
- 2620020435U, // <1,5,u,0>: Cost 3 vext2 <0,4,1,5>, <u,0,1,2>
- 1546278702U, // <1,5,u,1>: Cost 2 vext2 <0,4,1,5>, LHS
- 2620020616U, // <1,5,u,2>: Cost 3 vext2 <0,4,1,5>, <u,2,3,3>
- 2620020668U, // <1,5,u,3>: Cost 3 vext2 <0,4,1,5>, <u,3,0,1>
- 1594054682U, // <1,5,u,4>: Cost 2 vext2 <u,4,1,5>, <u,4,1,5>
- 1546279066U, // <1,5,u,5>: Cost 2 vext2 <0,4,1,5>, RHS
- 2620020944U, // <1,5,u,6>: Cost 3 vext2 <0,4,1,5>, <u,6,3,7>
- 2014145846U, // <1,5,u,7>: Cost 2 vtrnr LHS, RHS
- 2014145847U, // <1,5,u,u>: Cost 2 vtrnr LHS, RHS
- 3692437504U, // <1,6,0,0>: Cost 4 vext2 <0,2,1,6>, <0,0,0,0>
- 2618695782U, // <1,6,0,1>: Cost 3 vext2 <0,2,1,6>, LHS
- 2618695857U, // <1,6,0,2>: Cost 3 vext2 <0,2,1,6>, <0,2,1,6>
- 3794161970U, // <1,6,0,3>: Cost 4 vext3 <6,0,3,1>, <6,0,3,1>
- 2620023122U, // <1,6,0,4>: Cost 3 vext2 <0,4,1,6>, <0,4,1,5>
- 2620686756U, // <1,6,0,5>: Cost 3 vext2 <0,5,1,6>, <0,5,1,6>
- 2621350389U, // <1,6,0,6>: Cost 3 vext2 <0,6,1,6>, <0,6,1,6>
- 4028599606U, // <1,6,0,7>: Cost 4 vzipr <0,3,1,0>, RHS
- 2618696349U, // <1,6,0,u>: Cost 3 vext2 <0,2,1,6>, LHS
- 3692438262U, // <1,6,1,0>: Cost 4 vext2 <0,2,1,6>, <1,0,3,2>
- 2625995572U, // <1,6,1,1>: Cost 3 vext2 <1,4,1,6>, <1,1,1,1>
- 3692438422U, // <1,6,1,2>: Cost 4 vext2 <0,2,1,6>, <1,2,3,0>
- 3692438488U, // <1,6,1,3>: Cost 4 vext2 <0,2,1,6>, <1,3,1,3>
- 2625995820U, // <1,6,1,4>: Cost 3 vext2 <1,4,1,6>, <1,4,1,6>
- 3692438672U, // <1,6,1,5>: Cost 4 vext2 <0,2,1,6>, <1,5,3,7>
- 3692438720U, // <1,6,1,6>: Cost 4 vext2 <0,2,1,6>, <1,6,0,1>
- 2958183734U, // <1,6,1,7>: Cost 3 vzipr <0,u,1,1>, RHS
- 2958183735U, // <1,6,1,u>: Cost 3 vzipr <0,u,1,1>, RHS
- 2721526201U, // <1,6,2,0>: Cost 3 vext3 <6,2,0,1>, <6,2,0,1>
- 3692439097U, // <1,6,2,1>: Cost 4 vext2 <0,2,1,6>, <2,1,6,0>
- 3692439144U, // <1,6,2,2>: Cost 4 vext2 <0,2,1,6>, <2,2,2,2>
- 3692439206U, // <1,6,2,3>: Cost 4 vext2 <0,2,1,6>, <2,3,0,1>
- 3636948278U, // <1,6,2,4>: Cost 4 vext1 <2,1,6,2>, RHS
- 3787674092U, // <1,6,2,5>: Cost 4 vext3 <4,u,5,1>, <6,2,5,7>
- 2618697658U, // <1,6,2,6>: Cost 3 vext2 <0,2,1,6>, <2,6,3,7>
- 2970799414U, // <1,6,2,7>: Cost 3 vzipr <3,0,1,2>, RHS
- 2970799415U, // <1,6,2,u>: Cost 3 vzipr <3,0,1,2>, RHS
- 2563211366U, // <1,6,3,0>: Cost 3 vext1 <2,1,6,3>, LHS
- 3699738854U, // <1,6,3,1>: Cost 4 vext2 <1,4,1,6>, <3,1,1,1>
- 2563212860U, // <1,6,3,2>: Cost 3 vext1 <2,1,6,3>, <2,1,6,3>
- 3692439964U, // <1,6,3,3>: Cost 4 vext2 <0,2,1,6>, <3,3,3,3>
- 2563214646U, // <1,6,3,4>: Cost 3 vext1 <2,1,6,3>, RHS
- 4191820018U, // <1,6,3,5>: Cost 4 vtrnr <5,1,7,3>, <u,6,7,5>
- 2587103648U, // <1,6,3,6>: Cost 3 vext1 <6,1,6,3>, <6,1,6,3>
- 3087845306U, // <1,6,3,7>: Cost 3 vtrnr LHS, <2,6,3,7>
- 3087845307U, // <1,6,3,u>: Cost 3 vtrnr LHS, <2,6,3,u>
- 3693767570U, // <1,6,4,0>: Cost 4 vext2 <0,4,1,6>, <4,0,5,1>
- 3693767650U, // <1,6,4,1>: Cost 4 vext2 <0,4,1,6>, <4,1,5,0>
- 3636962877U, // <1,6,4,2>: Cost 4 vext1 <2,1,6,4>, <2,1,6,4>
- 3325088134U, // <1,6,4,3>: Cost 4 vrev <6,1,3,4>
- 3693767898U, // <1,6,4,4>: Cost 4 vext2 <0,4,1,6>, <4,4,5,5>
- 2618699062U, // <1,6,4,5>: Cost 3 vext2 <0,2,1,6>, RHS
- 3833670966U, // <1,6,4,6>: Cost 4 vuzpl <1,3,6,7>, RHS
- 4028632374U, // <1,6,4,7>: Cost 4 vzipr <0,3,1,4>, RHS
- 2618699305U, // <1,6,4,u>: Cost 3 vext2 <0,2,1,6>, RHS
- 3693768264U, // <1,6,5,0>: Cost 4 vext2 <0,4,1,6>, <5,0,1,2>
- 3630998373U, // <1,6,5,1>: Cost 4 vext1 <1,1,6,5>, <1,1,6,5>
- 3636971070U, // <1,6,5,2>: Cost 4 vext1 <2,1,6,5>, <2,1,6,5>
- 3642943767U, // <1,6,5,3>: Cost 4 vext1 <3,1,6,5>, <3,1,6,5>
- 3693768628U, // <1,6,5,4>: Cost 4 vext2 <0,4,1,6>, <5,4,5,6>
- 3732918276U, // <1,6,5,5>: Cost 4 vext2 <7,0,1,6>, <5,5,5,5>
- 2620690530U, // <1,6,5,6>: Cost 3 vext2 <0,5,1,6>, <5,6,7,0>
- 2955562294U, // <1,6,5,7>: Cost 3 vzipr <0,4,1,5>, RHS
- 2955562295U, // <1,6,5,u>: Cost 3 vzipr <0,4,1,5>, RHS
- 2724180733U, // <1,6,6,0>: Cost 3 vext3 <6,6,0,1>, <6,6,0,1>
- 3631006566U, // <1,6,6,1>: Cost 4 vext1 <1,1,6,6>, <1,1,6,6>
- 3631007674U, // <1,6,6,2>: Cost 4 vext1 <1,1,6,6>, <2,6,3,7>
- 3692442184U, // <1,6,6,3>: Cost 4 vext2 <0,2,1,6>, <6,3,7,0>
- 3631009078U, // <1,6,6,4>: Cost 4 vext1 <1,1,6,6>, RHS
- 3787674416U, // <1,6,6,5>: Cost 4 vext3 <4,u,5,1>, <6,6,5,7>
- 2713932600U, // <1,6,6,6>: Cost 3 vext3 <4,u,5,1>, <6,6,6,6>
- 2713932610U, // <1,6,6,7>: Cost 3 vext3 <4,u,5,1>, <6,6,7,7>
- 2713932619U, // <1,6,6,u>: Cost 3 vext3 <4,u,5,1>, <6,6,u,7>
- 1651102542U, // <1,6,7,0>: Cost 2 vext3 <6,7,0,1>, <6,7,0,1>
- 2724918103U, // <1,6,7,1>: Cost 3 vext3 <6,7,1,1>, <6,7,1,1>
- 2698302306U, // <1,6,7,2>: Cost 3 vext3 <2,3,0,1>, <6,7,2,3>
- 3642960153U, // <1,6,7,3>: Cost 4 vext1 <3,1,6,7>, <3,1,6,7>
- 2713932662U, // <1,6,7,4>: Cost 3 vext3 <4,u,5,1>, <6,7,4,5>
- 2725213051U, // <1,6,7,5>: Cost 3 vext3 <6,7,5,1>, <6,7,5,1>
- 2724844426U, // <1,6,7,6>: Cost 3 vext3 <6,7,0,1>, <6,7,6,7>
- 4035956022U, // <1,6,7,7>: Cost 4 vzipr <1,5,1,7>, RHS
- 1651692438U, // <1,6,7,u>: Cost 2 vext3 <6,7,u,1>, <6,7,u,1>
- 1651766175U, // <1,6,u,0>: Cost 2 vext3 <6,u,0,1>, <6,u,0,1>
- 2618701614U, // <1,6,u,1>: Cost 3 vext2 <0,2,1,6>, LHS
- 3135663508U, // <1,6,u,2>: Cost 3 vtrnr LHS, <4,6,u,2>
- 3692443580U, // <1,6,u,3>: Cost 4 vext2 <0,2,1,6>, <u,3,0,1>
- 2713932743U, // <1,6,u,4>: Cost 3 vext3 <4,u,5,1>, <6,u,4,5>
- 2618701978U, // <1,6,u,5>: Cost 3 vext2 <0,2,1,6>, RHS
- 2622683344U, // <1,6,u,6>: Cost 3 vext2 <0,u,1,6>, <u,6,3,7>
- 3087886266U, // <1,6,u,7>: Cost 3 vtrnr LHS, <2,6,3,7>
- 1652356071U, // <1,6,u,u>: Cost 2 vext3 <6,u,u,1>, <6,u,u,1>
- 2726171632U, // <1,7,0,0>: Cost 3 vext3 <7,0,0,1>, <7,0,0,1>
- 2626666598U, // <1,7,0,1>: Cost 3 vext2 <1,5,1,7>, LHS
- 3695100067U, // <1,7,0,2>: Cost 4 vext2 <0,6,1,7>, <0,2,0,1>
- 3707044102U, // <1,7,0,3>: Cost 4 vext2 <2,6,1,7>, <0,3,2,1>
- 2726466580U, // <1,7,0,4>: Cost 3 vext3 <7,0,4,1>, <7,0,4,1>
- 3654921933U, // <1,7,0,5>: Cost 4 vext1 <5,1,7,0>, <5,1,7,0>
- 2621358582U, // <1,7,0,6>: Cost 3 vext2 <0,6,1,7>, <0,6,1,7>
- 2622022215U, // <1,7,0,7>: Cost 3 vext2 <0,7,1,7>, <0,7,1,7>
- 2626667165U, // <1,7,0,u>: Cost 3 vext2 <1,5,1,7>, LHS
- 2593128550U, // <1,7,1,0>: Cost 3 vext1 <7,1,7,1>, LHS
- 2626667316U, // <1,7,1,1>: Cost 3 vext2 <1,5,1,7>, <1,1,1,1>
- 3700409238U, // <1,7,1,2>: Cost 4 vext2 <1,5,1,7>, <1,2,3,0>
- 2257294428U, // <1,7,1,3>: Cost 3 vrev <7,1,3,1>
- 2593131830U, // <1,7,1,4>: Cost 3 vext1 <7,1,7,1>, RHS
- 2626667646U, // <1,7,1,5>: Cost 3 vext2 <1,5,1,7>, <1,5,1,7>
- 2627331279U, // <1,7,1,6>: Cost 3 vext2 <1,6,1,7>, <1,6,1,7>
- 2593133696U, // <1,7,1,7>: Cost 3 vext1 <7,1,7,1>, <7,1,7,1>
- 2628658545U, // <1,7,1,u>: Cost 3 vext2 <1,u,1,7>, <1,u,1,7>
- 2587164774U, // <1,7,2,0>: Cost 3 vext1 <6,1,7,2>, LHS
- 3701073445U, // <1,7,2,1>: Cost 4 vext2 <1,6,1,7>, <2,1,3,7>
- 3700409960U, // <1,7,2,2>: Cost 4 vext2 <1,5,1,7>, <2,2,2,2>
- 2638612134U, // <1,7,2,3>: Cost 3 vext2 <3,5,1,7>, <2,3,0,1>
- 2587168054U, // <1,7,2,4>: Cost 3 vext1 <6,1,7,2>, RHS
- 3706382167U, // <1,7,2,5>: Cost 4 vext2 <2,5,1,7>, <2,5,1,7>
- 2587169192U, // <1,7,2,6>: Cost 3 vext1 <6,1,7,2>, <6,1,7,2>
- 3660911610U, // <1,7,2,7>: Cost 4 vext1 <6,1,7,2>, <7,0,1,2>
- 2587170606U, // <1,7,2,u>: Cost 3 vext1 <6,1,7,2>, LHS
- 1507459174U, // <1,7,3,0>: Cost 2 vext1 <5,1,7,3>, LHS
- 2569257984U, // <1,7,3,1>: Cost 3 vext1 <3,1,7,3>, <1,3,5,7>
- 2581202536U, // <1,7,3,2>: Cost 3 vext1 <5,1,7,3>, <2,2,2,2>
- 2569259294U, // <1,7,3,3>: Cost 3 vext1 <3,1,7,3>, <3,1,7,3>
- 1507462454U, // <1,7,3,4>: Cost 2 vext1 <5,1,7,3>, RHS
- 1507462864U, // <1,7,3,5>: Cost 2 vext1 <5,1,7,3>, <5,1,7,3>
- 2581205498U, // <1,7,3,6>: Cost 3 vext1 <5,1,7,3>, <6,2,7,3>
- 2581206010U, // <1,7,3,7>: Cost 3 vext1 <5,1,7,3>, <7,0,1,2>
- 1507465006U, // <1,7,3,u>: Cost 2 vext1 <5,1,7,3>, LHS
- 2728826164U, // <1,7,4,0>: Cost 3 vext3 <7,4,0,1>, <7,4,0,1>
- 3654951732U, // <1,7,4,1>: Cost 4 vext1 <5,1,7,4>, <1,1,1,1>
- 3330987094U, // <1,7,4,2>: Cost 4 vrev <7,1,2,4>
- 3331060831U, // <1,7,4,3>: Cost 4 vrev <7,1,3,4>
- 3787674971U, // <1,7,4,4>: Cost 4 vext3 <4,u,5,1>, <7,4,4,4>
- 2626669878U, // <1,7,4,5>: Cost 3 vext2 <1,5,1,7>, RHS
- 3785979241U, // <1,7,4,6>: Cost 4 vext3 <4,6,0,1>, <7,4,6,0>
- 3787085176U, // <1,7,4,7>: Cost 4 vext3 <4,7,6,1>, <7,4,7,6>
- 2626670121U, // <1,7,4,u>: Cost 3 vext2 <1,5,1,7>, RHS
- 2569273446U, // <1,7,5,0>: Cost 3 vext1 <3,1,7,5>, LHS
- 2569274368U, // <1,7,5,1>: Cost 3 vext1 <3,1,7,5>, <1,3,5,7>
- 3643016808U, // <1,7,5,2>: Cost 4 vext1 <3,1,7,5>, <2,2,2,2>
- 2569275680U, // <1,7,5,3>: Cost 3 vext1 <3,1,7,5>, <3,1,7,5>
- 2569276726U, // <1,7,5,4>: Cost 3 vext1 <3,1,7,5>, RHS
- 4102034790U, // <1,7,5,5>: Cost 4 vtrnl <1,3,5,7>, <7,4,5,6>
- 2651222067U, // <1,7,5,6>: Cost 3 vext2 <5,6,1,7>, <5,6,1,7>
- 3899378998U, // <1,7,5,7>: Cost 4 vuzpr <1,1,5,7>, RHS
- 2569279278U, // <1,7,5,u>: Cost 3 vext1 <3,1,7,5>, LHS
- 2730153430U, // <1,7,6,0>: Cost 3 vext3 <7,6,0,1>, <7,6,0,1>
- 2724845022U, // <1,7,6,1>: Cost 3 vext3 <6,7,0,1>, <7,6,1,0>
- 3643025338U, // <1,7,6,2>: Cost 4 vext1 <3,1,7,6>, <2,6,3,7>
- 3643025697U, // <1,7,6,3>: Cost 4 vext1 <3,1,7,6>, <3,1,7,6>
- 3643026742U, // <1,7,6,4>: Cost 4 vext1 <3,1,7,6>, RHS
- 3654971091U, // <1,7,6,5>: Cost 4 vext1 <5,1,7,6>, <5,1,7,6>
- 3787675153U, // <1,7,6,6>: Cost 4 vext3 <4,u,5,1>, <7,6,6,6>
- 2724845076U, // <1,7,6,7>: Cost 3 vext3 <6,7,0,1>, <7,6,7,0>
- 2725508637U, // <1,7,6,u>: Cost 3 vext3 <6,u,0,1>, <7,6,u,0>
- 2730817063U, // <1,7,7,0>: Cost 3 vext3 <7,7,0,1>, <7,7,0,1>
- 3631088436U, // <1,7,7,1>: Cost 4 vext1 <1,1,7,7>, <1,1,1,1>
- 3660949158U, // <1,7,7,2>: Cost 4 vext1 <6,1,7,7>, <2,3,0,1>
- 3801904705U, // <1,7,7,3>: Cost 4 vext3 <7,3,0,1>, <7,7,3,0>
- 3631090998U, // <1,7,7,4>: Cost 4 vext1 <1,1,7,7>, RHS
- 2662503828U, // <1,7,7,5>: Cost 3 vext2 <7,5,1,7>, <7,5,1,7>
- 3660951981U, // <1,7,7,6>: Cost 4 vext1 <6,1,7,7>, <6,1,7,7>
- 2713933420U, // <1,7,7,7>: Cost 3 vext3 <4,u,5,1>, <7,7,7,7>
- 2731406959U, // <1,7,7,u>: Cost 3 vext3 <7,7,u,1>, <7,7,u,1>
- 1507500134U, // <1,7,u,0>: Cost 2 vext1 <5,1,7,u>, LHS
- 2626672430U, // <1,7,u,1>: Cost 3 vext2 <1,5,1,7>, LHS
- 2581243496U, // <1,7,u,2>: Cost 3 vext1 <5,1,7,u>, <2,2,2,2>
- 2569300259U, // <1,7,u,3>: Cost 3 vext1 <3,1,7,u>, <3,1,7,u>
- 1507503414U, // <1,7,u,4>: Cost 2 vext1 <5,1,7,u>, RHS
- 1507503829U, // <1,7,u,5>: Cost 2 vext1 <5,1,7,u>, <5,1,7,u>
- 2581246458U, // <1,7,u,6>: Cost 3 vext1 <5,1,7,u>, <6,2,7,3>
- 2581246970U, // <1,7,u,7>: Cost 3 vext1 <5,1,7,u>, <7,0,1,2>
- 1507505966U, // <1,7,u,u>: Cost 2 vext1 <5,1,7,u>, LHS
- 1543643153U, // <1,u,0,0>: Cost 2 vext2 <0,0,1,u>, <0,0,1,u>
- 1546297446U, // <1,u,0,1>: Cost 2 vext2 <0,4,1,u>, LHS
- 2819448852U, // <1,u,0,2>: Cost 3 vuzpr LHS, <0,0,2,2>
- 2619375876U, // <1,u,0,3>: Cost 3 vext2 <0,3,1,u>, <0,3,1,u>
- 1546297685U, // <1,u,0,4>: Cost 2 vext2 <0,4,1,u>, <0,4,1,u>
- 1658771190U, // <1,u,0,5>: Cost 2 vext3 <u,0,5,1>, <u,0,5,1>
- 2736789248U, // <1,u,0,6>: Cost 3 vext3 <u,7,0,1>, <u,0,6,2>
- 2659189376U, // <1,u,0,7>: Cost 3 vext2 <7,0,1,u>, <0,7,u,1>
- 1546298013U, // <1,u,0,u>: Cost 2 vext2 <0,4,1,u>, LHS
- 1483112550U, // <1,u,1,0>: Cost 2 vext1 <1,1,1,1>, LHS
- 202162278U, // <1,u,1,1>: Cost 1 vdup1 LHS
- 1616009006U, // <1,u,1,2>: Cost 2 vext3 <0,u,1,1>, LHS
- 1745707110U, // <1,u,1,3>: Cost 2 vuzpr LHS, LHS
- 1483115830U, // <1,u,1,4>: Cost 2 vext1 <1,1,1,1>, RHS
- 2620040336U, // <1,u,1,5>: Cost 3 vext2 <0,4,1,u>, <1,5,3,7>
- 3026622618U, // <1,u,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS
- 2958183752U, // <1,u,1,7>: Cost 3 vzipr <0,u,1,1>, RHS
- 202162278U, // <1,u,1,u>: Cost 1 vdup1 LHS
- 2819449750U, // <1,u,2,0>: Cost 3 vuzpr LHS, <1,2,3,0>
- 2893207342U, // <1,u,2,1>: Cost 3 vzipl <1,2,3,0>, LHS
- 2819448996U, // <1,u,2,2>: Cost 3 vuzpr LHS, <0,2,0,2>
- 2819450482U, // <1,u,2,3>: Cost 3 vuzpr LHS, <2,2,3,3>
- 2819449754U, // <1,u,2,4>: Cost 3 vuzpr LHS, <1,2,3,4>
- 2893207706U, // <1,u,2,5>: Cost 3 vzipl <1,2,3,0>, RHS
- 2819449036U, // <1,u,2,6>: Cost 3 vuzpr LHS, <0,2,4,6>
- 2970799432U, // <1,u,2,7>: Cost 3 vzipr <3,0,1,2>, RHS
- 2819449002U, // <1,u,2,u>: Cost 3 vuzpr LHS, <0,2,0,u>
- 403931292U, // <1,u,3,0>: Cost 1 vext1 LHS, LHS
- 1477673718U, // <1,u,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
- 115726126U, // <1,u,3,2>: Cost 1 vrev LHS
- 2014102173U, // <1,u,3,3>: Cost 2 vtrnr LHS, LHS
- 403934518U, // <1,u,3,4>: Cost 1 vext1 LHS, RHS
- 1507536601U, // <1,u,3,5>: Cost 2 vext1 <5,1,u,3>, <5,1,u,3>
- 1525453306U, // <1,u,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
- 2014105129U, // <1,u,3,7>: Cost 2 vtrnr LHS, RHS
- 403937070U, // <1,u,3,u>: Cost 1 vext1 LHS, LHS
- 2620042157U, // <1,u,4,0>: Cost 3 vext2 <0,4,1,u>, <4,0,u,1>
- 2620042237U, // <1,u,4,1>: Cost 3 vext2 <0,4,1,u>, <4,1,u,0>
- 2263217967U, // <1,u,4,2>: Cost 3 vrev <u,1,2,4>
- 2569341224U, // <1,u,4,3>: Cost 3 vext1 <3,1,u,4>, <3,1,u,4>
- 2569342262U, // <1,u,4,4>: Cost 3 vext1 <3,1,u,4>, RHS
- 1546300726U, // <1,u,4,5>: Cost 2 vext2 <0,4,1,u>, RHS
- 2819449180U, // <1,u,4,6>: Cost 3 vuzpr LHS, <0,4,2,6>
- 2724845649U, // <1,u,4,7>: Cost 3 vext3 <6,7,0,1>, <u,4,7,6>
- 1546300969U, // <1,u,4,u>: Cost 2 vext2 <0,4,1,u>, RHS
- 2551431270U, // <1,u,5,0>: Cost 3 vext1 <0,1,u,5>, LHS
- 2551432192U, // <1,u,5,1>: Cost 3 vext1 <0,1,u,5>, <1,3,5,7>
- 3028293422U, // <1,u,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS
- 2955559068U, // <1,u,5,3>: Cost 3 vzipr <0,4,1,5>, LHS
- 2551434550U, // <1,u,5,4>: Cost 3 vext1 <0,1,u,5>, RHS
- 2895255706U, // <1,u,5,5>: Cost 3 vzipl <1,5,3,7>, RHS
- 1616009370U, // <1,u,5,6>: Cost 2 vext3 <0,u,1,1>, RHS
- 1745710390U, // <1,u,5,7>: Cost 2 vuzpr LHS, RHS
- 1745710391U, // <1,u,5,u>: Cost 2 vuzpr LHS, RHS
- 2653221159U, // <1,u,6,0>: Cost 3 vext2 <6,0,1,u>, <6,0,1,u>
- 2725509303U, // <1,u,6,1>: Cost 3 vext3 <6,u,0,1>, <u,6,1,0>
- 2659193338U, // <1,u,6,2>: Cost 3 vext2 <7,0,1,u>, <6,2,7,3>
- 2689751248U, // <1,u,6,3>: Cost 3 vext3 <0,u,1,1>, <u,6,3,7>
- 2867228774U, // <1,u,6,4>: Cost 3 vuzpr LHS, <5,6,7,4>
- 3764820194U, // <1,u,6,5>: Cost 4 vext3 <1,1,1,1>, <u,6,5,7>
- 2657202957U, // <1,u,6,6>: Cost 3 vext2 <6,6,1,u>, <6,6,1,u>
- 2819450810U, // <1,u,6,7>: Cost 3 vuzpr LHS, <2,6,3,7>
- 2819450811U, // <1,u,6,u>: Cost 3 vuzpr LHS, <2,6,3,u>
- 1585452032U, // <1,u,7,0>: Cost 2 vext2 <7,0,1,u>, <7,0,1,u>
- 2557420340U, // <1,u,7,1>: Cost 3 vext1 <1,1,u,7>, <1,1,1,1>
- 2569365158U, // <1,u,7,2>: Cost 3 vext1 <3,1,u,7>, <2,3,0,1>
- 2569365803U, // <1,u,7,3>: Cost 3 vext1 <3,1,u,7>, <3,1,u,7>
- 2557422902U, // <1,u,7,4>: Cost 3 vext1 <1,1,u,7>, RHS
- 2662512021U, // <1,u,7,5>: Cost 3 vext2 <7,5,1,u>, <7,5,1,u>
- 2724845884U, // <1,u,7,6>: Cost 3 vext3 <6,7,0,1>, <u,7,6,7>
- 2659194476U, // <1,u,7,7>: Cost 3 vext2 <7,0,1,u>, <7,7,7,7>
- 1590761096U, // <1,u,7,u>: Cost 2 vext2 <7,u,1,u>, <7,u,1,u>
- 403972257U, // <1,u,u,0>: Cost 1 vext1 LHS, LHS
- 202162278U, // <1,u,u,1>: Cost 1 vdup1 LHS
- 115767091U, // <1,u,u,2>: Cost 1 vrev LHS
- 1745707677U, // <1,u,u,3>: Cost 2 vuzpr LHS, LHS
- 403975478U, // <1,u,u,4>: Cost 1 vext1 LHS, RHS
- 1546303642U, // <1,u,u,5>: Cost 2 vext2 <0,4,1,u>, RHS
- 1616009613U, // <1,u,u,6>: Cost 2 vext3 <0,u,1,1>, RHS
- 1745710633U, // <1,u,u,7>: Cost 2 vuzpr LHS, RHS
- 403978030U, // <1,u,u,u>: Cost 1 vext1 LHS, LHS
- 2551463936U, // <2,0,0,0>: Cost 3 vext1 <0,2,0,0>, <0,0,0,0>
- 2685698058U, // <2,0,0,1>: Cost 3 vext3 <0,2,0,2>, <0,0,1,1>
- 1610776596U, // <2,0,0,2>: Cost 2 vext3 <0,0,2,2>, <0,0,2,2>
- 2619384069U, // <2,0,0,3>: Cost 3 vext2 <0,3,2,0>, <0,3,2,0>
- 2551467318U, // <2,0,0,4>: Cost 3 vext1 <0,2,0,0>, RHS
- 3899836596U, // <2,0,0,5>: Cost 4 vuzpr <1,2,3,0>, <3,0,4,5>
- 2621374968U, // <2,0,0,6>: Cost 3 vext2 <0,6,2,0>, <0,6,2,0>
- 4168271334U, // <2,0,0,7>: Cost 4 vtrnr <1,2,3,0>, <2,0,5,7>
- 1611219018U, // <2,0,0,u>: Cost 2 vext3 <0,0,u,2>, <0,0,u,2>
- 2551472138U, // <2,0,1,0>: Cost 3 vext1 <0,2,0,1>, <0,0,1,1>
- 2690564186U, // <2,0,1,1>: Cost 3 vext3 <1,0,3,2>, <0,1,1,0>
- 1611956326U, // <2,0,1,2>: Cost 2 vext3 <0,2,0,2>, LHS
- 2826092646U, // <2,0,1,3>: Cost 3 vuzpr <1,2,3,0>, LHS
- 2551475510U, // <2,0,1,4>: Cost 3 vext1 <0,2,0,1>, RHS
- 3692463248U, // <2,0,1,5>: Cost 4 vext2 <0,2,2,0>, <1,5,3,7>
- 2587308473U, // <2,0,1,6>: Cost 3 vext1 <6,2,0,1>, <6,2,0,1>
- 3661050874U, // <2,0,1,7>: Cost 4 vext1 <6,2,0,1>, <7,0,1,2>
- 1611956380U, // <2,0,1,u>: Cost 2 vext3 <0,2,0,2>, LHS
- 1477738598U, // <2,0,2,0>: Cost 2 vext1 <0,2,0,2>, LHS
- 2551481078U, // <2,0,2,1>: Cost 3 vext1 <0,2,0,2>, <1,0,3,2>
- 2551481796U, // <2,0,2,2>: Cost 3 vext1 <0,2,0,2>, <2,0,2,0>
- 2551482518U, // <2,0,2,3>: Cost 3 vext1 <0,2,0,2>, <3,0,1,2>
- 1477741878U, // <2,0,2,4>: Cost 2 vext1 <0,2,0,2>, RHS
- 2551484112U, // <2,0,2,5>: Cost 3 vext1 <0,2,0,2>, <5,1,7,3>
- 2551484759U, // <2,0,2,6>: Cost 3 vext1 <0,2,0,2>, <6,0,7,2>
- 2551485434U, // <2,0,2,7>: Cost 3 vext1 <0,2,0,2>, <7,0,1,2>
- 1477744430U, // <2,0,2,u>: Cost 2 vext1 <0,2,0,2>, LHS
- 2953625600U, // <2,0,3,0>: Cost 3 vzipr LHS, <0,0,0,0>
- 2953627302U, // <2,0,3,1>: Cost 3 vzipr LHS, <2,3,0,1>
- 2953625764U, // <2,0,3,2>: Cost 3 vzipr LHS, <0,2,0,2>
- 4027369695U, // <2,0,3,3>: Cost 4 vzipr LHS, <3,1,0,3>
- 3625233718U, // <2,0,3,4>: Cost 4 vext1 <0,2,0,3>, RHS
- 3899836110U, // <2,0,3,5>: Cost 4 vuzpr <1,2,3,0>, <2,3,4,5>
- 4032012618U, // <2,0,3,6>: Cost 4 vzipr LHS, <0,4,0,6>
- 3899835392U, // <2,0,3,7>: Cost 4 vuzpr <1,2,3,0>, <1,3,5,7>
- 2953625770U, // <2,0,3,u>: Cost 3 vzipr LHS, <0,2,0,u>
- 2551496806U, // <2,0,4,0>: Cost 3 vext1 <0,2,0,4>, LHS
- 2685698386U, // <2,0,4,1>: Cost 3 vext3 <0,2,0,2>, <0,4,1,5>
- 2685698396U, // <2,0,4,2>: Cost 3 vext3 <0,2,0,2>, <0,4,2,6>
- 3625240726U, // <2,0,4,3>: Cost 4 vext1 <0,2,0,4>, <3,0,1,2>
- 2551500086U, // <2,0,4,4>: Cost 3 vext1 <0,2,0,4>, RHS
- 2618723638U, // <2,0,4,5>: Cost 3 vext2 <0,2,2,0>, RHS
- 2765409590U, // <2,0,4,6>: Cost 3 vuzpl <2,3,0,1>, RHS
- 3799990664U, // <2,0,4,7>: Cost 4 vext3 <7,0,1,2>, <0,4,7,5>
- 2685698450U, // <2,0,4,u>: Cost 3 vext3 <0,2,0,2>, <0,4,u,6>
- 3625246822U, // <2,0,5,0>: Cost 4 vext1 <0,2,0,5>, LHS
- 3289776304U, // <2,0,5,1>: Cost 4 vrev <0,2,1,5>
- 2690564526U, // <2,0,5,2>: Cost 3 vext3 <1,0,3,2>, <0,5,2,7>
- 3289923778U, // <2,0,5,3>: Cost 4 vrev <0,2,3,5>
- 2216255691U, // <2,0,5,4>: Cost 3 vrev <0,2,4,5>
- 3726307332U, // <2,0,5,5>: Cost 4 vext2 <5,u,2,0>, <5,5,5,5>
- 3726307426U, // <2,0,5,6>: Cost 4 vext2 <5,u,2,0>, <5,6,7,0>
- 2826095926U, // <2,0,5,7>: Cost 3 vuzpr <1,2,3,0>, RHS
- 2216550639U, // <2,0,5,u>: Cost 3 vrev <0,2,u,5>
- 4162420736U, // <2,0,6,0>: Cost 4 vtrnr <0,2,4,6>, <0,0,0,0>
- 2901885030U, // <2,0,6,1>: Cost 3 vzipl <2,6,3,7>, LHS
- 2685698559U, // <2,0,6,2>: Cost 3 vext3 <0,2,0,2>, <0,6,2,7>
- 3643173171U, // <2,0,6,3>: Cost 4 vext1 <3,2,0,6>, <3,2,0,6>
- 2216263884U, // <2,0,6,4>: Cost 3 vrev <0,2,4,6>
- 3730289341U, // <2,0,6,5>: Cost 4 vext2 <6,5,2,0>, <6,5,2,0>
- 3726308152U, // <2,0,6,6>: Cost 4 vext2 <5,u,2,0>, <6,6,6,6>
- 3899836346U, // <2,0,6,7>: Cost 4 vuzpr <1,2,3,0>, <2,6,3,7>
- 2216558832U, // <2,0,6,u>: Cost 3 vrev <0,2,u,6>
- 2659202049U, // <2,0,7,0>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0>
- 3726308437U, // <2,0,7,1>: Cost 4 vext2 <5,u,2,0>, <7,1,2,3>
- 2726249034U, // <2,0,7,2>: Cost 3 vext3 <7,0,1,2>, <0,7,2,1>
- 3734934772U, // <2,0,7,3>: Cost 4 vext2 <7,3,2,0>, <7,3,2,0>
- 3726308710U, // <2,0,7,4>: Cost 4 vext2 <5,u,2,0>, <7,4,5,6>
- 3726308814U, // <2,0,7,5>: Cost 4 vext2 <5,u,2,0>, <7,5,u,2>
- 3736925671U, // <2,0,7,6>: Cost 4 vext2 <7,6,2,0>, <7,6,2,0>
- 3726308972U, // <2,0,7,7>: Cost 4 vext2 <5,u,2,0>, <7,7,7,7>
- 2659202049U, // <2,0,7,u>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0>
- 1477787750U, // <2,0,u,0>: Cost 2 vext1 <0,2,0,u>, LHS
- 2953668262U, // <2,0,u,1>: Cost 3 vzipr LHS, <2,3,0,1>
- 1611956893U, // <2,0,u,2>: Cost 2 vext3 <0,2,0,2>, LHS
- 2551531670U, // <2,0,u,3>: Cost 3 vext1 <0,2,0,u>, <3,0,1,2>
- 1477791030U, // <2,0,u,4>: Cost 2 vext1 <0,2,0,u>, RHS
- 2618726554U, // <2,0,u,5>: Cost 3 vext2 <0,2,2,0>, RHS
- 2765412506U, // <2,0,u,6>: Cost 3 vuzpl <2,3,0,1>, RHS
- 2826096169U, // <2,0,u,7>: Cost 3 vuzpr <1,2,3,0>, RHS
- 1611956947U, // <2,0,u,u>: Cost 2 vext3 <0,2,0,2>, LHS
- 2569453670U, // <2,1,0,0>: Cost 3 vext1 <3,2,1,0>, LHS
- 2619392102U, // <2,1,0,1>: Cost 3 vext2 <0,3,2,1>, LHS
- 3759440619U, // <2,1,0,2>: Cost 4 vext3 <0,2,0,2>, <1,0,2,0>
- 1616823030U, // <2,1,0,3>: Cost 2 vext3 <1,0,3,2>, <1,0,3,2>
- 2569456950U, // <2,1,0,4>: Cost 3 vext1 <3,2,1,0>, RHS
- 2690712328U, // <2,1,0,5>: Cost 3 vext3 <1,0,5,2>, <1,0,5,2>
- 3661115841U, // <2,1,0,6>: Cost 4 vext1 <6,2,1,0>, <6,2,1,0>
- 2622046794U, // <2,1,0,7>: Cost 3 vext2 <0,7,2,1>, <0,7,2,1>
- 1617191715U, // <2,1,0,u>: Cost 2 vext3 <1,0,u,2>, <1,0,u,2>
- 2551545958U, // <2,1,1,0>: Cost 3 vext1 <0,2,1,1>, LHS
- 2685698868U, // <2,1,1,1>: Cost 3 vext3 <0,2,0,2>, <1,1,1,1>
- 2628682646U, // <2,1,1,2>: Cost 3 vext2 <1,u,2,1>, <1,2,3,0>
- 2685698888U, // <2,1,1,3>: Cost 3 vext3 <0,2,0,2>, <1,1,3,3>
- 2551549238U, // <2,1,1,4>: Cost 3 vext1 <0,2,1,1>, RHS
- 3693134992U, // <2,1,1,5>: Cost 4 vext2 <0,3,2,1>, <1,5,3,7>
- 3661124034U, // <2,1,1,6>: Cost 4 vext1 <6,2,1,1>, <6,2,1,1>
- 3625292794U, // <2,1,1,7>: Cost 4 vext1 <0,2,1,1>, <7,0,1,2>
- 2685698933U, // <2,1,1,u>: Cost 3 vext3 <0,2,0,2>, <1,1,u,3>
- 2551554150U, // <2,1,2,0>: Cost 3 vext1 <0,2,1,2>, LHS
- 3893649571U, // <2,1,2,1>: Cost 4 vuzpr <0,2,0,1>, <0,2,0,1>
- 2551555688U, // <2,1,2,2>: Cost 3 vext1 <0,2,1,2>, <2,2,2,2>
- 2685698966U, // <2,1,2,3>: Cost 3 vext3 <0,2,0,2>, <1,2,3,0>
- 2551557430U, // <2,1,2,4>: Cost 3 vext1 <0,2,1,2>, RHS
- 3763422123U, // <2,1,2,5>: Cost 4 vext3 <0,u,0,2>, <1,2,5,3>
- 3693135802U, // <2,1,2,6>: Cost 4 vext2 <0,3,2,1>, <2,6,3,7>
- 2726249402U, // <2,1,2,7>: Cost 3 vext3 <7,0,1,2>, <1,2,7,0>
- 2685699011U, // <2,1,2,u>: Cost 3 vext3 <0,2,0,2>, <1,2,u,0>
- 2551562342U, // <2,1,3,0>: Cost 3 vext1 <0,2,1,3>, LHS
- 2953625610U, // <2,1,3,1>: Cost 3 vzipr LHS, <0,0,1,1>
- 2953627798U, // <2,1,3,2>: Cost 3 vzipr LHS, <3,0,1,2>
- 2953626584U, // <2,1,3,3>: Cost 3 vzipr LHS, <1,3,1,3>
- 2551565622U, // <2,1,3,4>: Cost 3 vext1 <0,2,1,3>, RHS
- 2953625938U, // <2,1,3,5>: Cost 3 vzipr LHS, <0,4,1,5>
- 2587398596U, // <2,1,3,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3>
- 4032013519U, // <2,1,3,7>: Cost 4 vzipr LHS, <1,6,1,7>
- 2953625617U, // <2,1,3,u>: Cost 3 vzipr LHS, <0,0,1,u>
- 2690565154U, // <2,1,4,0>: Cost 3 vext3 <1,0,3,2>, <1,4,0,5>
- 3625313270U, // <2,1,4,1>: Cost 4 vext1 <0,2,1,4>, <1,3,4,6>
- 3771532340U, // <2,1,4,2>: Cost 4 vext3 <2,2,2,2>, <1,4,2,5>
- 1148404634U, // <2,1,4,3>: Cost 2 vrev <1,2,3,4>
- 3625315638U, // <2,1,4,4>: Cost 4 vext1 <0,2,1,4>, RHS
- 2619395382U, // <2,1,4,5>: Cost 3 vext2 <0,3,2,1>, RHS
- 3837242678U, // <2,1,4,6>: Cost 4 vuzpl <2,0,1,2>, RHS
- 3799991394U, // <2,1,4,7>: Cost 4 vext3 <7,0,1,2>, <1,4,7,6>
- 1148773319U, // <2,1,4,u>: Cost 2 vrev <1,2,u,4>
- 2551578726U, // <2,1,5,0>: Cost 3 vext1 <0,2,1,5>, LHS
- 2551579648U, // <2,1,5,1>: Cost 3 vext1 <0,2,1,5>, <1,3,5,7>
- 3625321952U, // <2,1,5,2>: Cost 4 vext1 <0,2,1,5>, <2,0,5,1>
- 2685699216U, // <2,1,5,3>: Cost 3 vext3 <0,2,0,2>, <1,5,3,7>
- 2551582006U, // <2,1,5,4>: Cost 3 vext1 <0,2,1,5>, RHS
- 3740913668U, // <2,1,5,5>: Cost 4 vext2 <u,3,2,1>, <5,5,5,5>
- 3661156806U, // <2,1,5,6>: Cost 4 vext1 <6,2,1,5>, <6,2,1,5>
- 3893652790U, // <2,1,5,7>: Cost 4 vuzpr <0,2,0,1>, RHS
- 2685699261U, // <2,1,5,u>: Cost 3 vext3 <0,2,0,2>, <1,5,u,7>
- 2551586918U, // <2,1,6,0>: Cost 3 vext1 <0,2,1,6>, LHS
- 3625329398U, // <2,1,6,1>: Cost 4 vext1 <0,2,1,6>, <1,0,3,2>
- 2551588794U, // <2,1,6,2>: Cost 3 vext1 <0,2,1,6>, <2,6,3,7>
- 3088679014U, // <2,1,6,3>: Cost 3 vtrnr <0,2,4,6>, LHS
- 2551590198U, // <2,1,6,4>: Cost 3 vext1 <0,2,1,6>, RHS
- 4029382994U, // <2,1,6,5>: Cost 4 vzipr <0,4,2,6>, <0,4,1,5>
- 3625333560U, // <2,1,6,6>: Cost 4 vext1 <0,2,1,6>, <6,6,6,6>
- 3731624800U, // <2,1,6,7>: Cost 4 vext2 <6,7,2,1>, <6,7,2,1>
- 2551592750U, // <2,1,6,u>: Cost 3 vext1 <0,2,1,6>, LHS
- 2622051322U, // <2,1,7,0>: Cost 3 vext2 <0,7,2,1>, <7,0,1,2>
- 3733615699U, // <2,1,7,1>: Cost 4 vext2 <7,1,2,1>, <7,1,2,1>
- 3795125538U, // <2,1,7,2>: Cost 4 vext3 <6,1,7,2>, <1,7,2,0>
- 2222171037U, // <2,1,7,3>: Cost 3 vrev <1,2,3,7>
- 3740915046U, // <2,1,7,4>: Cost 4 vext2 <u,3,2,1>, <7,4,5,6>
- 3296060335U, // <2,1,7,5>: Cost 4 vrev <1,2,5,7>
- 3736933864U, // <2,1,7,6>: Cost 4 vext2 <7,6,2,1>, <7,6,2,1>
- 3805300055U, // <2,1,7,7>: Cost 4 vext3 <7,u,1,2>, <1,7,7,u>
- 2669827714U, // <2,1,7,u>: Cost 3 vext2 <u,7,2,1>, <7,u,1,2>
- 2551603302U, // <2,1,u,0>: Cost 3 vext1 <0,2,1,u>, LHS
- 2953666570U, // <2,1,u,1>: Cost 3 vzipr LHS, <0,0,1,1>
- 2953668758U, // <2,1,u,2>: Cost 3 vzipr LHS, <3,0,1,2>
- 1148437406U, // <2,1,u,3>: Cost 2 vrev <1,2,3,u>
- 2551606582U, // <2,1,u,4>: Cost 3 vext1 <0,2,1,u>, RHS
- 2953666898U, // <2,1,u,5>: Cost 3 vzipr LHS, <0,4,1,5>
- 2587398596U, // <2,1,u,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3>
- 2669828370U, // <2,1,u,7>: Cost 3 vext2 <u,7,2,1>, <u,7,2,1>
- 1148806091U, // <2,1,u,u>: Cost 2 vrev <1,2,u,u>
- 1543667732U, // <2,2,0,0>: Cost 2 vext2 <0,0,2,2>, <0,0,2,2>
- 1548976230U, // <2,2,0,1>: Cost 2 vext2 <0,u,2,2>, LHS
- 2685699524U, // <2,2,0,2>: Cost 3 vext3 <0,2,0,2>, <2,0,2,0>
- 2685699535U, // <2,2,0,3>: Cost 3 vext3 <0,2,0,2>, <2,0,3,2>
- 2551614774U, // <2,2,0,4>: Cost 3 vext1 <0,2,2,0>, RHS
- 3704422830U, // <2,2,0,5>: Cost 4 vext2 <2,2,2,2>, <0,5,2,7>
- 3893657642U, // <2,2,0,6>: Cost 4 vuzpr <0,2,0,2>, <0,0,4,6>
- 3770574323U, // <2,2,0,7>: Cost 4 vext3 <2,0,7,2>, <2,0,7,2>
- 1548976796U, // <2,2,0,u>: Cost 2 vext2 <0,u,2,2>, <0,u,2,2>
- 2622718710U, // <2,2,1,0>: Cost 3 vext2 <0,u,2,2>, <1,0,3,2>
- 2622718772U, // <2,2,1,1>: Cost 3 vext2 <0,u,2,2>, <1,1,1,1>
- 2622718870U, // <2,2,1,2>: Cost 3 vext2 <0,u,2,2>, <1,2,3,0>
- 2819915878U, // <2,2,1,3>: Cost 3 vuzpr <0,2,0,2>, LHS
- 3625364790U, // <2,2,1,4>: Cost 4 vext1 <0,2,2,1>, RHS
- 2622719120U, // <2,2,1,5>: Cost 3 vext2 <0,u,2,2>, <1,5,3,7>
- 3760031292U, // <2,2,1,6>: Cost 4 vext3 <0,2,u,2>, <2,1,6,3>
- 3667170468U, // <2,2,1,7>: Cost 4 vext1 <7,2,2,1>, <7,2,2,1>
- 2819915883U, // <2,2,1,u>: Cost 3 vuzpr <0,2,0,2>, LHS
- 1489829990U, // <2,2,2,0>: Cost 2 vext1 <2,2,2,2>, LHS
- 2563572470U, // <2,2,2,1>: Cost 3 vext1 <2,2,2,2>, <1,0,3,2>
- 269271142U, // <2,2,2,2>: Cost 1 vdup2 LHS
- 2685699698U, // <2,2,2,3>: Cost 3 vext3 <0,2,0,2>, <2,2,3,3>
- 1489833270U, // <2,2,2,4>: Cost 2 vext1 <2,2,2,2>, RHS
- 2685699720U, // <2,2,2,5>: Cost 3 vext3 <0,2,0,2>, <2,2,5,7>
- 2622719930U, // <2,2,2,6>: Cost 3 vext2 <0,u,2,2>, <2,6,3,7>
- 2593436837U, // <2,2,2,7>: Cost 3 vext1 <7,2,2,2>, <7,2,2,2>
- 269271142U, // <2,2,2,u>: Cost 1 vdup2 LHS
- 2685699750U, // <2,2,3,0>: Cost 3 vext3 <0,2,0,2>, <2,3,0,1>
- 2690565806U, // <2,2,3,1>: Cost 3 vext3 <1,0,3,2>, <2,3,1,0>
- 2953627240U, // <2,2,3,2>: Cost 3 vzipr LHS, <2,2,2,2>
- 1879883878U, // <2,2,3,3>: Cost 2 vzipr LHS, LHS
- 2685699790U, // <2,2,3,4>: Cost 3 vext3 <0,2,0,2>, <2,3,4,5>
- 3893659342U, // <2,2,3,5>: Cost 4 vuzpr <0,2,0,2>, <2,3,4,5>
- 2958270812U, // <2,2,3,6>: Cost 3 vzipr LHS, <0,4,2,6>
- 2593445030U, // <2,2,3,7>: Cost 3 vext1 <7,2,2,3>, <7,2,2,3>
- 1879883883U, // <2,2,3,u>: Cost 2 vzipr LHS, LHS
- 2551644262U, // <2,2,4,0>: Cost 3 vext1 <0,2,2,4>, LHS
- 3625386742U, // <2,2,4,1>: Cost 4 vext1 <0,2,2,4>, <1,0,3,2>
- 2551645902U, // <2,2,4,2>: Cost 3 vext1 <0,2,2,4>, <2,3,4,5>
- 3759441686U, // <2,2,4,3>: Cost 4 vext3 <0,2,0,2>, <2,4,3,5>
- 2551647542U, // <2,2,4,4>: Cost 3 vext1 <0,2,2,4>, RHS
- 1548979510U, // <2,2,4,5>: Cost 2 vext2 <0,u,2,2>, RHS
- 2764901686U, // <2,2,4,6>: Cost 3 vuzpl <2,2,2,2>, RHS
- 3667195047U, // <2,2,4,7>: Cost 4 vext1 <7,2,2,4>, <7,2,2,4>
- 1548979753U, // <2,2,4,u>: Cost 2 vext2 <0,u,2,2>, RHS
- 3696463432U, // <2,2,5,0>: Cost 4 vext2 <0,u,2,2>, <5,0,1,2>
- 2617413328U, // <2,2,5,1>: Cost 3 vext2 <0,0,2,2>, <5,1,7,3>
- 2685699936U, // <2,2,5,2>: Cost 3 vext3 <0,2,0,2>, <2,5,2,7>
- 4027383910U, // <2,2,5,3>: Cost 4 vzipr <0,1,2,5>, LHS
- 2228201085U, // <2,2,5,4>: Cost 3 vrev <2,2,4,5>
- 2617413636U, // <2,2,5,5>: Cost 3 vext2 <0,0,2,2>, <5,5,5,5>
- 2617413730U, // <2,2,5,6>: Cost 3 vext2 <0,0,2,2>, <5,6,7,0>
- 2819919158U, // <2,2,5,7>: Cost 3 vuzpr <0,2,0,2>, RHS
- 2819919159U, // <2,2,5,u>: Cost 3 vuzpr <0,2,0,2>, RHS
- 3625402554U, // <2,2,6,0>: Cost 4 vext1 <0,2,2,6>, <0,2,2,6>
- 3760031652U, // <2,2,6,1>: Cost 4 vext3 <0,2,u,2>, <2,6,1,3>
- 2617414138U, // <2,2,6,2>: Cost 3 vext2 <0,0,2,2>, <6,2,7,3>
- 2685700026U, // <2,2,6,3>: Cost 3 vext3 <0,2,0,2>, <2,6,3,7>
- 3625405750U, // <2,2,6,4>: Cost 4 vext1 <0,2,2,6>, RHS
- 3760031692U, // <2,2,6,5>: Cost 4 vext3 <0,2,u,2>, <2,6,5,7>
- 3088679116U, // <2,2,6,6>: Cost 3 vtrnr <0,2,4,6>, <0,2,4,6>
- 2657891169U, // <2,2,6,7>: Cost 3 vext2 <6,7,2,2>, <6,7,2,2>
- 2685700071U, // <2,2,6,u>: Cost 3 vext3 <0,2,0,2>, <2,6,u,7>
- 2726250474U, // <2,2,7,0>: Cost 3 vext3 <7,0,1,2>, <2,7,0,1>
- 3704427616U, // <2,2,7,1>: Cost 4 vext2 <2,2,2,2>, <7,1,3,5>
- 2660545701U, // <2,2,7,2>: Cost 3 vext2 <7,2,2,2>, <7,2,2,2>
- 4030718054U, // <2,2,7,3>: Cost 4 vzipr <0,6,2,7>, LHS
- 2617415014U, // <2,2,7,4>: Cost 3 vext2 <0,0,2,2>, <7,4,5,6>
- 3302033032U, // <2,2,7,5>: Cost 4 vrev <2,2,5,7>
- 3661246929U, // <2,2,7,6>: Cost 4 vext1 <6,2,2,7>, <6,2,2,7>
- 2617415276U, // <2,2,7,7>: Cost 3 vext2 <0,0,2,2>, <7,7,7,7>
- 2731558962U, // <2,2,7,u>: Cost 3 vext3 <7,u,1,2>, <2,7,u,1>
- 1489829990U, // <2,2,u,0>: Cost 2 vext1 <2,2,2,2>, LHS
- 1548982062U, // <2,2,u,1>: Cost 2 vext2 <0,u,2,2>, LHS
- 269271142U, // <2,2,u,2>: Cost 1 vdup2 LHS
- 1879924838U, // <2,2,u,3>: Cost 2 vzipr LHS, LHS
- 1489833270U, // <2,2,u,4>: Cost 2 vext1 <2,2,2,2>, RHS
- 1548982426U, // <2,2,u,5>: Cost 2 vext2 <0,u,2,2>, RHS
- 2953666908U, // <2,2,u,6>: Cost 3 vzipr LHS, <0,4,2,6>
- 2819919401U, // <2,2,u,7>: Cost 3 vuzpr <0,2,0,2>, RHS
- 269271142U, // <2,2,u,u>: Cost 1 vdup2 LHS
- 1544339456U, // <2,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
- 470597734U, // <2,3,0,1>: Cost 1 vext2 LHS, LHS
- 1548984484U, // <2,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
- 2619408648U, // <2,3,0,3>: Cost 3 vext2 <0,3,2,3>, <0,3,2,3>
- 1548984658U, // <2,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
- 2665857454U, // <2,3,0,5>: Cost 3 vext2 LHS, <0,5,2,7>
- 2622726655U, // <2,3,0,6>: Cost 3 vext2 LHS, <0,6,2,7>
- 2593494188U, // <2,3,0,7>: Cost 3 vext1 <7,2,3,0>, <7,2,3,0>
- 470598301U, // <2,3,0,u>: Cost 1 vext2 LHS, LHS
- 1544340214U, // <2,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
- 1544340276U, // <2,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
- 1544340374U, // <2,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
- 1548985304U, // <2,3,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
- 2551696694U, // <2,3,1,4>: Cost 3 vext1 <0,2,3,1>, RHS
- 1548985488U, // <2,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
- 2622727375U, // <2,3,1,6>: Cost 3 vext2 LHS, <1,6,1,7>
- 2665858347U, // <2,3,1,7>: Cost 3 vext2 LHS, <1,7,3,0>
- 1548985709U, // <2,3,1,u>: Cost 2 vext2 LHS, <1,u,1,3>
- 2622727613U, // <2,3,2,0>: Cost 3 vext2 LHS, <2,0,1,2>
- 2622727711U, // <2,3,2,1>: Cost 3 vext2 LHS, <2,1,3,1>
- 1544341096U, // <2,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2>
- 1544341158U, // <2,3,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
- 2622727958U, // <2,3,2,4>: Cost 3 vext2 LHS, <2,4,3,5>
- 2622728032U, // <2,3,2,5>: Cost 3 vext2 LHS, <2,5,2,7>
- 1548986298U, // <2,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
- 2665859050U, // <2,3,2,7>: Cost 3 vext2 LHS, <2,7,0,1>
- 1548986427U, // <2,3,2,u>: Cost 2 vext2 LHS, <2,u,0,1>
- 1548986518U, // <2,3,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
- 2622728415U, // <2,3,3,1>: Cost 3 vext2 LHS, <3,1,0,3>
- 1489913458U, // <2,3,3,2>: Cost 2 vext1 <2,2,3,3>, <2,2,3,3>
- 1544341916U, // <2,3,3,3>: Cost 2 vext2 LHS, <3,3,3,3>
- 1548986882U, // <2,3,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
- 2665859632U, // <2,3,3,5>: Cost 3 vext2 LHS, <3,5,1,7>
- 2234304870U, // <2,3,3,6>: Cost 3 vrev <3,2,6,3>
- 2958271632U, // <2,3,3,7>: Cost 3 vzipr LHS, <1,5,3,7>
- 1548987166U, // <2,3,3,u>: Cost 2 vext2 LHS, <3,u,1,2>
- 1483948134U, // <2,3,4,0>: Cost 2 vext1 <1,2,3,4>, LHS
- 1483948954U, // <2,3,4,1>: Cost 2 vext1 <1,2,3,4>, <1,2,3,4>
- 2622729276U, // <2,3,4,2>: Cost 3 vext2 LHS, <4,2,6,0>
- 2557692054U, // <2,3,4,3>: Cost 3 vext1 <1,2,3,4>, <3,0,1,2>
- 1483951414U, // <2,3,4,4>: Cost 2 vext1 <1,2,3,4>, RHS
- 470601014U, // <2,3,4,5>: Cost 1 vext2 LHS, RHS
- 1592118644U, // <2,3,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
- 2593526960U, // <2,3,4,7>: Cost 3 vext1 <7,2,3,4>, <7,2,3,4>
- 470601257U, // <2,3,4,u>: Cost 1 vext2 LHS, RHS
- 2551726182U, // <2,3,5,0>: Cost 3 vext1 <0,2,3,5>, LHS
- 1592118992U, // <2,3,5,1>: Cost 2 vext2 LHS, <5,1,7,3>
- 2665860862U, // <2,3,5,2>: Cost 3 vext2 LHS, <5,2,3,4>
- 2551728642U, // <2,3,5,3>: Cost 3 vext1 <0,2,3,5>, <3,4,5,6>
- 1592119238U, // <2,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
- 1592119300U, // <2,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
- 1592119394U, // <2,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0>
- 1592119464U, // <2,3,5,7>: Cost 2 vext2 LHS, <5,7,5,7>
- 1592119545U, // <2,3,5,u>: Cost 2 vext2 LHS, <5,u,5,7>
- 2622730529U, // <2,3,6,0>: Cost 3 vext2 LHS, <6,0,1,2>
- 2557707164U, // <2,3,6,1>: Cost 3 vext1 <1,2,3,6>, <1,2,3,6>
- 1592119802U, // <2,3,6,2>: Cost 2 vext2 LHS, <6,2,7,3>
- 2665861682U, // <2,3,6,3>: Cost 3 vext2 LHS, <6,3,4,5>
- 2622730893U, // <2,3,6,4>: Cost 3 vext2 LHS, <6,4,5,6>
- 2665861810U, // <2,3,6,5>: Cost 3 vext2 LHS, <6,5,0,7>
- 1592120120U, // <2,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
- 1592120142U, // <2,3,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
- 1592120223U, // <2,3,6,u>: Cost 2 vext2 LHS, <6,u,0,1>
- 1592120314U, // <2,3,7,0>: Cost 2 vext2 LHS, <7,0,1,2>
- 2659890261U, // <2,3,7,1>: Cost 3 vext2 <7,1,2,3>, <7,1,2,3>
- 2660553894U, // <2,3,7,2>: Cost 3 vext2 <7,2,2,3>, <7,2,2,3>
- 2665862371U, // <2,3,7,3>: Cost 3 vext2 LHS, <7,3,0,1>
- 1592120678U, // <2,3,7,4>: Cost 2 vext2 LHS, <7,4,5,6>
- 2665862534U, // <2,3,7,5>: Cost 3 vext2 LHS, <7,5,0,2>
- 2665862614U, // <2,3,7,6>: Cost 3 vext2 LHS, <7,6,0,1>
- 1592120940U, // <2,3,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
- 1592120962U, // <2,3,7,u>: Cost 2 vext2 LHS, <7,u,1,2>
- 1548990163U, // <2,3,u,0>: Cost 2 vext2 LHS, <u,0,1,2>
- 470603566U, // <2,3,u,1>: Cost 1 vext2 LHS, LHS
- 1548990341U, // <2,3,u,2>: Cost 2 vext2 LHS, <u,2,3,0>
- 1548990396U, // <2,3,u,3>: Cost 2 vext2 LHS, <u,3,0,1>
- 1548990527U, // <2,3,u,4>: Cost 2 vext2 LHS, <u,4,5,6>
- 470603930U, // <2,3,u,5>: Cost 1 vext2 LHS, RHS
- 1548990672U, // <2,3,u,6>: Cost 2 vext2 LHS, <u,6,3,7>
- 1592121600U, // <2,3,u,7>: Cost 2 vext2 LHS, <u,7,0,1>
- 470604133U, // <2,3,u,u>: Cost 1 vext2 LHS, LHS
- 2617425942U, // <2,4,0,0>: Cost 3 vext2 <0,0,2,4>, <0,0,2,4>
- 2618753126U, // <2,4,0,1>: Cost 3 vext2 <0,2,2,4>, LHS
- 2618753208U, // <2,4,0,2>: Cost 3 vext2 <0,2,2,4>, <0,2,2,4>
- 2619416841U, // <2,4,0,3>: Cost 3 vext2 <0,3,2,4>, <0,3,2,4>
- 2587593628U, // <2,4,0,4>: Cost 3 vext1 <6,2,4,0>, <4,0,6,2>
- 2712832914U, // <2,4,0,5>: Cost 3 vext3 <4,6,u,2>, <4,0,5,1>
- 1634962332U, // <2,4,0,6>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2>
- 3799993252U, // <2,4,0,7>: Cost 4 vext3 <7,0,1,2>, <4,0,7,1>
- 1634962332U, // <2,4,0,u>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2>
- 2619417334U, // <2,4,1,0>: Cost 3 vext2 <0,3,2,4>, <1,0,3,2>
- 3692495668U, // <2,4,1,1>: Cost 4 vext2 <0,2,2,4>, <1,1,1,1>
- 2625389466U, // <2,4,1,2>: Cost 3 vext2 <1,3,2,4>, <1,2,3,4>
- 2826125414U, // <2,4,1,3>: Cost 3 vuzpr <1,2,3,4>, LHS
- 3699794995U, // <2,4,1,4>: Cost 4 vext2 <1,4,2,4>, <1,4,2,4>
- 3692496016U, // <2,4,1,5>: Cost 4 vext2 <0,2,2,4>, <1,5,3,7>
- 3763424238U, // <2,4,1,6>: Cost 4 vext3 <0,u,0,2>, <4,1,6,3>
- 3667317942U, // <2,4,1,7>: Cost 4 vext1 <7,2,4,1>, <7,2,4,1>
- 2826125419U, // <2,4,1,u>: Cost 3 vuzpr <1,2,3,4>, LHS
- 2629371336U, // <2,4,2,0>: Cost 3 vext2 <2,0,2,4>, <2,0,2,4>
- 3699131946U, // <2,4,2,1>: Cost 4 vext2 <1,3,2,4>, <2,1,4,3>
- 2630698602U, // <2,4,2,2>: Cost 3 vext2 <2,2,2,4>, <2,2,2,4>
- 2618754766U, // <2,4,2,3>: Cost 3 vext2 <0,2,2,4>, <2,3,4,5>
- 2826126234U, // <2,4,2,4>: Cost 3 vuzpr <1,2,3,4>, <1,2,3,4>
- 2899119414U, // <2,4,2,5>: Cost 3 vzipl <2,2,2,2>, RHS
- 3033337142U, // <2,4,2,6>: Cost 3 vtrnl <2,2,2,2>, RHS
- 3800214597U, // <2,4,2,7>: Cost 4 vext3 <7,0,4,2>, <4,2,7,0>
- 2899119657U, // <2,4,2,u>: Cost 3 vzipl <2,2,2,2>, RHS
- 2635344033U, // <2,4,3,0>: Cost 3 vext2 <3,0,2,4>, <3,0,2,4>
- 4032012325U, // <2,4,3,1>: Cost 4 vzipr LHS, <0,0,4,1>
- 3692497228U, // <2,4,3,2>: Cost 4 vext2 <0,2,2,4>, <3,2,3,4>
- 3692497308U, // <2,4,3,3>: Cost 4 vext2 <0,2,2,4>, <3,3,3,3>
- 3001404624U, // <2,4,3,4>: Cost 3 vzipr LHS, <4,4,4,4>
- 2953627342U, // <2,4,3,5>: Cost 3 vzipr LHS, <2,3,4,5>
- 2953625804U, // <2,4,3,6>: Cost 3 vzipr LHS, <0,2,4,6>
- 3899868160U, // <2,4,3,7>: Cost 4 vuzpr <1,2,3,4>, <1,3,5,7>
- 2953625806U, // <2,4,3,u>: Cost 3 vzipr LHS, <0,2,4,u>
- 2710916266U, // <2,4,4,0>: Cost 3 vext3 <4,4,0,2>, <4,4,0,2>
- 3899869648U, // <2,4,4,1>: Cost 4 vuzpr <1,2,3,4>, <3,4,0,1>
- 3899869658U, // <2,4,4,2>: Cost 4 vuzpr <1,2,3,4>, <3,4,1,2>
- 3899868930U, // <2,4,4,3>: Cost 4 vuzpr <1,2,3,4>, <2,4,1,3>
- 2712833232U, // <2,4,4,4>: Cost 3 vext3 <4,6,u,2>, <4,4,4,4>
- 2618756406U, // <2,4,4,5>: Cost 3 vext2 <0,2,2,4>, RHS
- 2765737270U, // <2,4,4,6>: Cost 3 vuzpl <2,3,4,5>, RHS
- 4168304426U, // <2,4,4,7>: Cost 4 vtrnr <1,2,3,4>, <2,4,5,7>
- 2618756649U, // <2,4,4,u>: Cost 3 vext2 <0,2,2,4>, RHS
- 2551800011U, // <2,4,5,0>: Cost 3 vext1 <0,2,4,5>, <0,2,4,5>
- 2569716470U, // <2,4,5,1>: Cost 3 vext1 <3,2,4,5>, <1,0,3,2>
- 2563745405U, // <2,4,5,2>: Cost 3 vext1 <2,2,4,5>, <2,2,4,5>
- 2569718102U, // <2,4,5,3>: Cost 3 vext1 <3,2,4,5>, <3,2,4,5>
- 2551803190U, // <2,4,5,4>: Cost 3 vext1 <0,2,4,5>, RHS
- 3625545732U, // <2,4,5,5>: Cost 4 vext1 <0,2,4,5>, <5,5,5,5>
- 1611959606U, // <2,4,5,6>: Cost 2 vext3 <0,2,0,2>, RHS
- 2826128694U, // <2,4,5,7>: Cost 3 vuzpr <1,2,3,4>, RHS
- 1611959624U, // <2,4,5,u>: Cost 2 vext3 <0,2,0,2>, RHS
- 1478066278U, // <2,4,6,0>: Cost 2 vext1 <0,2,4,6>, LHS
- 2551808758U, // <2,4,6,1>: Cost 3 vext1 <0,2,4,6>, <1,0,3,2>
- 2551809516U, // <2,4,6,2>: Cost 3 vext1 <0,2,4,6>, <2,0,6,4>
- 2551810198U, // <2,4,6,3>: Cost 3 vext1 <0,2,4,6>, <3,0,1,2>
- 1478069558U, // <2,4,6,4>: Cost 2 vext1 <0,2,4,6>, RHS
- 2901888310U, // <2,4,6,5>: Cost 3 vzipl <2,6,3,7>, RHS
- 2551812920U, // <2,4,6,6>: Cost 3 vext1 <0,2,4,6>, <6,6,6,6>
- 2726251914U, // <2,4,6,7>: Cost 3 vext3 <7,0,1,2>, <4,6,7,1>
- 1478072110U, // <2,4,6,u>: Cost 2 vext1 <0,2,4,6>, LHS
- 2659234821U, // <2,4,7,0>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4>
- 3786722726U, // <2,4,7,1>: Cost 4 vext3 <4,7,1,2>, <4,7,1,2>
- 3734303911U, // <2,4,7,2>: Cost 4 vext2 <7,2,2,4>, <7,2,2,4>
- 3734967544U, // <2,4,7,3>: Cost 4 vext2 <7,3,2,4>, <7,3,2,4>
- 3727005030U, // <2,4,7,4>: Cost 4 vext2 <6,0,2,4>, <7,4,5,6>
- 2726251976U, // <2,4,7,5>: Cost 3 vext3 <7,0,1,2>, <4,7,5,0>
- 2726251986U, // <2,4,7,6>: Cost 3 vext3 <7,0,1,2>, <4,7,6,1>
- 3727005292U, // <2,4,7,7>: Cost 4 vext2 <6,0,2,4>, <7,7,7,7>
- 2659234821U, // <2,4,7,u>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4>
- 1478082662U, // <2,4,u,0>: Cost 2 vext1 <0,2,4,u>, LHS
- 2618758958U, // <2,4,u,1>: Cost 3 vext2 <0,2,2,4>, LHS
- 2551826024U, // <2,4,u,2>: Cost 3 vext1 <0,2,4,u>, <2,2,2,2>
- 2551826582U, // <2,4,u,3>: Cost 3 vext1 <0,2,4,u>, <3,0,1,2>
- 1478085942U, // <2,4,u,4>: Cost 2 vext1 <0,2,4,u>, RHS
- 2953668302U, // <2,4,u,5>: Cost 3 vzipr LHS, <2,3,4,5>
- 1611959849U, // <2,4,u,6>: Cost 2 vext3 <0,2,0,2>, RHS
- 2826128937U, // <2,4,u,7>: Cost 3 vuzpr <1,2,3,4>, RHS
- 1611959867U, // <2,4,u,u>: Cost 2 vext3 <0,2,0,2>, RHS
- 3691839488U, // <2,5,0,0>: Cost 4 vext2 <0,1,2,5>, <0,0,0,0>
- 2618097766U, // <2,5,0,1>: Cost 3 vext2 <0,1,2,5>, LHS
- 2620088484U, // <2,5,0,2>: Cost 3 vext2 <0,4,2,5>, <0,2,0,2>
- 2619425034U, // <2,5,0,3>: Cost 3 vext2 <0,3,2,5>, <0,3,2,5>
- 2620088667U, // <2,5,0,4>: Cost 3 vext2 <0,4,2,5>, <0,4,2,5>
- 2620752300U, // <2,5,0,5>: Cost 3 vext2 <0,5,2,5>, <0,5,2,5>
- 3693830655U, // <2,5,0,6>: Cost 4 vext2 <0,4,2,5>, <0,6,2,7>
- 3094531382U, // <2,5,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS
- 2618098333U, // <2,5,0,u>: Cost 3 vext2 <0,1,2,5>, LHS
- 3691840246U, // <2,5,1,0>: Cost 4 vext2 <0,1,2,5>, <1,0,3,2>
- 3691840308U, // <2,5,1,1>: Cost 4 vext2 <0,1,2,5>, <1,1,1,1>
- 2626061206U, // <2,5,1,2>: Cost 3 vext2 <1,4,2,5>, <1,2,3,0>
- 2618098688U, // <2,5,1,3>: Cost 3 vext2 <0,1,2,5>, <1,3,5,7>
- 2626061364U, // <2,5,1,4>: Cost 3 vext2 <1,4,2,5>, <1,4,2,5>
- 3691840656U, // <2,5,1,5>: Cost 4 vext2 <0,1,2,5>, <1,5,3,7>
- 3789082310U, // <2,5,1,6>: Cost 4 vext3 <5,1,6,2>, <5,1,6,2>
- 2712833744U, // <2,5,1,7>: Cost 3 vext3 <4,6,u,2>, <5,1,7,3>
- 2628715896U, // <2,5,1,u>: Cost 3 vext2 <1,u,2,5>, <1,u,2,5>
- 3693831613U, // <2,5,2,0>: Cost 4 vext2 <0,4,2,5>, <2,0,1,2>
- 4026698642U, // <2,5,2,1>: Cost 4 vzipr <0,0,2,2>, <4,0,5,1>
- 2632033896U, // <2,5,2,2>: Cost 3 vext2 <2,4,2,5>, <2,2,2,2>
- 3691841190U, // <2,5,2,3>: Cost 4 vext2 <0,1,2,5>, <2,3,0,1>
- 2632034061U, // <2,5,2,4>: Cost 3 vext2 <2,4,2,5>, <2,4,2,5>
- 3691841352U, // <2,5,2,5>: Cost 4 vext2 <0,1,2,5>, <2,5,0,1>
- 3691841466U, // <2,5,2,6>: Cost 4 vext2 <0,1,2,5>, <2,6,3,7>
- 3088354614U, // <2,5,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS
- 3088354615U, // <2,5,2,u>: Cost 3 vtrnr <0,2,0,2>, RHS
- 2557829222U, // <2,5,3,0>: Cost 3 vext1 <1,2,5,3>, LHS
- 2557830059U, // <2,5,3,1>: Cost 3 vext1 <1,2,5,3>, <1,2,5,3>
- 2575746766U, // <2,5,3,2>: Cost 3 vext1 <4,2,5,3>, <2,3,4,5>
- 3691841948U, // <2,5,3,3>: Cost 4 vext2 <0,1,2,5>, <3,3,3,3>
- 2619427330U, // <2,5,3,4>: Cost 3 vext2 <0,3,2,5>, <3,4,5,6>
- 2581720847U, // <2,5,3,5>: Cost 3 vext1 <5,2,5,3>, <5,2,5,3>
- 2953628162U, // <2,5,3,6>: Cost 3 vzipr LHS, <3,4,5,6>
- 2953626624U, // <2,5,3,7>: Cost 3 vzipr LHS, <1,3,5,7>
- 2953626625U, // <2,5,3,u>: Cost 3 vzipr LHS, <1,3,5,u>
- 2569781350U, // <2,5,4,0>: Cost 3 vext1 <3,2,5,4>, LHS
- 3631580076U, // <2,5,4,1>: Cost 4 vext1 <1,2,5,4>, <1,2,5,4>
- 2569782990U, // <2,5,4,2>: Cost 3 vext1 <3,2,5,4>, <2,3,4,5>
- 2569783646U, // <2,5,4,3>: Cost 3 vext1 <3,2,5,4>, <3,2,5,4>
- 2569784630U, // <2,5,4,4>: Cost 3 vext1 <3,2,5,4>, RHS
- 2618101046U, // <2,5,4,5>: Cost 3 vext2 <0,1,2,5>, RHS
- 3893905922U, // <2,5,4,6>: Cost 4 vuzpr <0,2,3,5>, <3,4,5,6>
- 3094564150U, // <2,5,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS
- 2618101289U, // <2,5,4,u>: Cost 3 vext2 <0,1,2,5>, RHS
- 2551873638U, // <2,5,5,0>: Cost 3 vext1 <0,2,5,5>, LHS
- 3637560320U, // <2,5,5,1>: Cost 4 vext1 <2,2,5,5>, <1,3,5,7>
- 3637560966U, // <2,5,5,2>: Cost 4 vext1 <2,2,5,5>, <2,2,5,5>
- 3723030343U, // <2,5,5,3>: Cost 4 vext2 <5,3,2,5>, <5,3,2,5>
- 2551876918U, // <2,5,5,4>: Cost 3 vext1 <0,2,5,5>, RHS
- 2712834052U, // <2,5,5,5>: Cost 3 vext3 <4,6,u,2>, <5,5,5,5>
- 4028713474U, // <2,5,5,6>: Cost 4 vzipr <0,3,2,5>, <3,4,5,6>
- 2712834072U, // <2,5,5,7>: Cost 3 vext3 <4,6,u,2>, <5,5,7,7>
- 2712834081U, // <2,5,5,u>: Cost 3 vext3 <4,6,u,2>, <5,5,u,7>
- 2575769702U, // <2,5,6,0>: Cost 3 vext1 <4,2,5,6>, LHS
- 3631596462U, // <2,5,6,1>: Cost 4 vext1 <1,2,5,6>, <1,2,5,6>
- 2655924730U, // <2,5,6,2>: Cost 3 vext2 <6,4,2,5>, <6,2,7,3>
- 3643541856U, // <2,5,6,3>: Cost 4 vext1 <3,2,5,6>, <3,2,5,6>
- 2655924849U, // <2,5,6,4>: Cost 3 vext2 <6,4,2,5>, <6,4,2,5>
- 3787755607U, // <2,5,6,5>: Cost 4 vext3 <4,u,6,2>, <5,6,5,7>
- 4029385218U, // <2,5,6,6>: Cost 4 vzipr <0,4,2,6>, <3,4,5,6>
- 3088682294U, // <2,5,6,7>: Cost 3 vtrnr <0,2,4,6>, RHS
- 3088682295U, // <2,5,6,u>: Cost 3 vtrnr <0,2,4,6>, RHS
- 2563833958U, // <2,5,7,0>: Cost 3 vext1 <2,2,5,7>, LHS
- 2551890678U, // <2,5,7,1>: Cost 3 vext1 <0,2,5,7>, <1,0,3,2>
- 2563835528U, // <2,5,7,2>: Cost 3 vext1 <2,2,5,7>, <2,2,5,7>
- 3637577878U, // <2,5,7,3>: Cost 4 vext1 <2,2,5,7>, <3,0,1,2>
- 2563837238U, // <2,5,7,4>: Cost 3 vext1 <2,2,5,7>, RHS
- 2712834216U, // <2,5,7,5>: Cost 3 vext3 <4,6,u,2>, <5,7,5,7>
- 2712834220U, // <2,5,7,6>: Cost 3 vext3 <4,6,u,2>, <5,7,6,2>
- 4174449974U, // <2,5,7,7>: Cost 4 vtrnr <2,2,5,7>, RHS
- 2563839790U, // <2,5,7,u>: Cost 3 vext1 <2,2,5,7>, LHS
- 2563842150U, // <2,5,u,0>: Cost 3 vext1 <2,2,5,u>, LHS
- 2618103598U, // <2,5,u,1>: Cost 3 vext2 <0,1,2,5>, LHS
- 2563843721U, // <2,5,u,2>: Cost 3 vext1 <2,2,5,u>, <2,2,5,u>
- 2569816418U, // <2,5,u,3>: Cost 3 vext1 <3,2,5,u>, <3,2,5,u>
- 2622748735U, // <2,5,u,4>: Cost 3 vext2 <0,u,2,5>, <u,4,5,6>
- 2618103962U, // <2,5,u,5>: Cost 3 vext2 <0,1,2,5>, RHS
- 2953669122U, // <2,5,u,6>: Cost 3 vzipr LHS, <3,4,5,6>
- 2953667584U, // <2,5,u,7>: Cost 3 vzipr LHS, <1,3,5,7>
- 2618104165U, // <2,5,u,u>: Cost 3 vext2 <0,1,2,5>, LHS
- 2620096512U, // <2,6,0,0>: Cost 3 vext2 <0,4,2,6>, <0,0,0,0>
- 1546354790U, // <2,6,0,1>: Cost 2 vext2 <0,4,2,6>, LHS
- 2620096676U, // <2,6,0,2>: Cost 3 vext2 <0,4,2,6>, <0,2,0,2>
- 3693838588U, // <2,6,0,3>: Cost 4 vext2 <0,4,2,6>, <0,3,1,0>
- 1546355036U, // <2,6,0,4>: Cost 2 vext2 <0,4,2,6>, <0,4,2,6>
- 3694502317U, // <2,6,0,5>: Cost 4 vext2 <0,5,2,6>, <0,5,2,6>
- 2551911246U, // <2,6,0,6>: Cost 3 vext1 <0,2,6,0>, <6,7,0,1>
- 2720723287U, // <2,6,0,7>: Cost 3 vext3 <6,0,7,2>, <6,0,7,2>
- 1546355357U, // <2,6,0,u>: Cost 2 vext2 <0,4,2,6>, LHS
- 2620097270U, // <2,6,1,0>: Cost 3 vext2 <0,4,2,6>, <1,0,3,2>
- 2620097332U, // <2,6,1,1>: Cost 3 vext2 <0,4,2,6>, <1,1,1,1>
- 2620097430U, // <2,6,1,2>: Cost 3 vext2 <0,4,2,6>, <1,2,3,0>
- 2820243558U, // <2,6,1,3>: Cost 3 vuzpr <0,2,4,6>, LHS
- 2620097598U, // <2,6,1,4>: Cost 3 vext2 <0,4,2,6>, <1,4,3,6>
- 2620097680U, // <2,6,1,5>: Cost 3 vext2 <0,4,2,6>, <1,5,3,7>
- 3693839585U, // <2,6,1,6>: Cost 4 vext2 <0,4,2,6>, <1,6,3,7>
- 2721386920U, // <2,6,1,7>: Cost 3 vext3 <6,1,7,2>, <6,1,7,2>
- 2820243563U, // <2,6,1,u>: Cost 3 vuzpr <0,2,4,6>, LHS
- 2714014137U, // <2,6,2,0>: Cost 3 vext3 <4,u,6,2>, <6,2,0,1>
- 2712834500U, // <2,6,2,1>: Cost 3 vext3 <4,6,u,2>, <6,2,1,3>
- 2620098152U, // <2,6,2,2>: Cost 3 vext2 <0,4,2,6>, <2,2,2,2>
- 2620098214U, // <2,6,2,3>: Cost 3 vext2 <0,4,2,6>, <2,3,0,1>
- 2632042254U, // <2,6,2,4>: Cost 3 vext2 <2,4,2,6>, <2,4,2,6>
- 2712834540U, // <2,6,2,5>: Cost 3 vext3 <4,6,u,2>, <6,2,5,7>
- 2820243660U, // <2,6,2,6>: Cost 3 vuzpr <0,2,4,6>, <0,2,4,6>
- 2958265654U, // <2,6,2,7>: Cost 3 vzipr <0,u,2,2>, RHS
- 2620098619U, // <2,6,2,u>: Cost 3 vext2 <0,4,2,6>, <2,u,0,1>
- 2620098710U, // <2,6,3,0>: Cost 3 vext2 <0,4,2,6>, <3,0,1,2>
- 3893986982U, // <2,6,3,1>: Cost 4 vuzpr <0,2,4,6>, <2,3,0,1>
- 2569848762U, // <2,6,3,2>: Cost 3 vext1 <3,2,6,3>, <2,6,3,7>
- 2620098972U, // <2,6,3,3>: Cost 3 vext2 <0,4,2,6>, <3,3,3,3>
- 2620099074U, // <2,6,3,4>: Cost 3 vext2 <0,4,2,6>, <3,4,5,6>
- 3893987022U, // <2,6,3,5>: Cost 4 vuzpr <0,2,4,6>, <2,3,4,5>
- 3001404644U, // <2,6,3,6>: Cost 3 vzipr LHS, <4,4,6,6>
- 1879887158U, // <2,6,3,7>: Cost 2 vzipr LHS, RHS
- 1879887159U, // <2,6,3,u>: Cost 2 vzipr LHS, RHS
- 2620099484U, // <2,6,4,0>: Cost 3 vext2 <0,4,2,6>, <4,0,6,2>
- 2620099566U, // <2,6,4,1>: Cost 3 vext2 <0,4,2,6>, <4,1,6,3>
- 2620099644U, // <2,6,4,2>: Cost 3 vext2 <0,4,2,6>, <4,2,6,0>
- 3643599207U, // <2,6,4,3>: Cost 4 vext1 <3,2,6,4>, <3,2,6,4>
- 2575830080U, // <2,6,4,4>: Cost 3 vext1 <4,2,6,4>, <4,2,6,4>
- 1546358070U, // <2,6,4,5>: Cost 2 vext2 <0,4,2,6>, RHS
- 2667875700U, // <2,6,4,6>: Cost 3 vext2 <u,4,2,6>, <4,6,4,6>
- 4028042550U, // <2,6,4,7>: Cost 4 vzipr <0,2,2,4>, RHS
- 1546358313U, // <2,6,4,u>: Cost 2 vext2 <0,4,2,6>, RHS
- 3693841992U, // <2,6,5,0>: Cost 4 vext2 <0,4,2,6>, <5,0,1,2>
- 2667876048U, // <2,6,5,1>: Cost 3 vext2 <u,4,2,6>, <5,1,7,3>
- 2712834756U, // <2,6,5,2>: Cost 3 vext3 <4,6,u,2>, <6,5,2,7>
- 3643607400U, // <2,6,5,3>: Cost 4 vext1 <3,2,6,5>, <3,2,6,5>
- 2252091873U, // <2,6,5,4>: Cost 3 vrev <6,2,4,5>
- 2667876356U, // <2,6,5,5>: Cost 3 vext2 <u,4,2,6>, <5,5,5,5>
- 2667876450U, // <2,6,5,6>: Cost 3 vext2 <u,4,2,6>, <5,6,7,0>
- 2820246838U, // <2,6,5,7>: Cost 3 vuzpr <0,2,4,6>, RHS
- 2820246839U, // <2,6,5,u>: Cost 3 vuzpr <0,2,4,6>, RHS
- 2563899494U, // <2,6,6,0>: Cost 3 vext1 <2,2,6,6>, LHS
- 3893988683U, // <2,6,6,1>: Cost 4 vuzpr <0,2,4,6>, <4,6,0,1>
- 2563901072U, // <2,6,6,2>: Cost 3 vext1 <2,2,6,6>, <2,2,6,6>
- 3893987236U, // <2,6,6,3>: Cost 4 vuzpr <0,2,4,6>, <2,6,1,3>
- 2563902774U, // <2,6,6,4>: Cost 3 vext1 <2,2,6,6>, RHS
- 3893988723U, // <2,6,6,5>: Cost 4 vuzpr <0,2,4,6>, <4,6,4,5>
- 2712834872U, // <2,6,6,6>: Cost 3 vext3 <4,6,u,2>, <6,6,6,6>
- 2955644214U, // <2,6,6,7>: Cost 3 vzipr <0,4,2,6>, RHS
- 2955644215U, // <2,6,6,u>: Cost 3 vzipr <0,4,2,6>, RHS
- 2712834894U, // <2,6,7,0>: Cost 3 vext3 <4,6,u,2>, <6,7,0,1>
- 2724926296U, // <2,6,7,1>: Cost 3 vext3 <6,7,1,2>, <6,7,1,2>
- 2725000033U, // <2,6,7,2>: Cost 3 vext3 <6,7,2,2>, <6,7,2,2>
- 2702365544U, // <2,6,7,3>: Cost 3 vext3 <3,0,1,2>, <6,7,3,0>
- 2712834934U, // <2,6,7,4>: Cost 3 vext3 <4,6,u,2>, <6,7,4,5>
- 3776107393U, // <2,6,7,5>: Cost 4 vext3 <3,0,1,2>, <6,7,5,7>
- 2725294981U, // <2,6,7,6>: Cost 3 vext3 <6,7,6,2>, <6,7,6,2>
- 2726253452U, // <2,6,7,7>: Cost 3 vext3 <7,0,1,2>, <6,7,7,0>
- 2712834966U, // <2,6,7,u>: Cost 3 vext3 <4,6,u,2>, <6,7,u,1>
- 2620102355U, // <2,6,u,0>: Cost 3 vext2 <0,4,2,6>, <u,0,1,2>
- 1546360622U, // <2,6,u,1>: Cost 2 vext2 <0,4,2,6>, LHS
- 2620102536U, // <2,6,u,2>: Cost 3 vext2 <0,4,2,6>, <u,2,3,3>
- 2820244125U, // <2,6,u,3>: Cost 3 vuzpr <0,2,4,6>, LHS
- 1594136612U, // <2,6,u,4>: Cost 2 vext2 <u,4,2,6>, <u,4,2,6>
- 1546360986U, // <2,6,u,5>: Cost 2 vext2 <0,4,2,6>, RHS
- 2620102864U, // <2,6,u,6>: Cost 3 vext2 <0,4,2,6>, <u,6,3,7>
- 1879928118U, // <2,6,u,7>: Cost 2 vzipr LHS, RHS
- 1879928119U, // <2,6,u,u>: Cost 2 vzipr LHS, RHS
- 2726179825U, // <2,7,0,0>: Cost 3 vext3 <7,0,0,2>, <7,0,0,2>
- 1652511738U, // <2,7,0,1>: Cost 2 vext3 <7,0,1,2>, <7,0,1,2>
- 2621431972U, // <2,7,0,2>: Cost 3 vext2 <0,6,2,7>, <0,2,0,2>
- 2257949868U, // <2,7,0,3>: Cost 3 vrev <7,2,3,0>
- 2726474773U, // <2,7,0,4>: Cost 3 vext3 <7,0,4,2>, <7,0,4,2>
- 2620768686U, // <2,7,0,5>: Cost 3 vext2 <0,5,2,7>, <0,5,2,7>
- 2621432319U, // <2,7,0,6>: Cost 3 vext2 <0,6,2,7>, <0,6,2,7>
- 2599760953U, // <2,7,0,7>: Cost 3 vext1 <u,2,7,0>, <7,0,u,2>
- 1653027897U, // <2,7,0,u>: Cost 2 vext3 <7,0,u,2>, <7,0,u,2>
- 2639348470U, // <2,7,1,0>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2>
- 3695174452U, // <2,7,1,1>: Cost 4 vext2 <0,6,2,7>, <1,1,1,1>
- 3695174550U, // <2,7,1,2>: Cost 4 vext2 <0,6,2,7>, <1,2,3,0>
- 3694511104U, // <2,7,1,3>: Cost 4 vext2 <0,5,2,7>, <1,3,5,7>
- 3713090594U, // <2,7,1,4>: Cost 4 vext2 <3,6,2,7>, <1,4,0,5>
- 3693184144U, // <2,7,1,5>: Cost 4 vext2 <0,3,2,7>, <1,5,3,7>
- 2627405016U, // <2,7,1,6>: Cost 3 vext2 <1,6,2,7>, <1,6,2,7>
- 3799995519U, // <2,7,1,7>: Cost 4 vext3 <7,0,1,2>, <7,1,7,0>
- 2639348470U, // <2,7,1,u>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2>
- 3695175101U, // <2,7,2,0>: Cost 4 vext2 <0,6,2,7>, <2,0,1,2>
- 3643655168U, // <2,7,2,1>: Cost 4 vext1 <3,2,7,2>, <1,3,5,7>
- 2257892517U, // <2,7,2,2>: Cost 3 vrev <7,2,2,2>
- 3695175334U, // <2,7,2,3>: Cost 4 vext2 <0,6,2,7>, <2,3,0,1>
- 3695175465U, // <2,7,2,4>: Cost 4 vext2 <0,6,2,7>, <2,4,5,6>
- 2632714080U, // <2,7,2,5>: Cost 3 vext2 <2,5,2,7>, <2,5,2,7>
- 2633377713U, // <2,7,2,6>: Cost 3 vext2 <2,6,2,7>, <2,6,2,7>
- 3695175658U, // <2,7,2,7>: Cost 4 vext2 <0,6,2,7>, <2,7,0,1>
- 2634704979U, // <2,7,2,u>: Cost 3 vext2 <2,u,2,7>, <2,u,2,7>
- 1514094694U, // <2,7,3,0>: Cost 2 vext1 <6,2,7,3>, LHS
- 2569921680U, // <2,7,3,1>: Cost 3 vext1 <3,2,7,3>, <1,5,3,7>
- 2587838056U, // <2,7,3,2>: Cost 3 vext1 <6,2,7,3>, <2,2,2,2>
- 2569922927U, // <2,7,3,3>: Cost 3 vext1 <3,2,7,3>, <3,2,7,3>
- 1514097974U, // <2,7,3,4>: Cost 2 vext1 <6,2,7,3>, RHS
- 2581868321U, // <2,7,3,5>: Cost 3 vext1 <5,2,7,3>, <5,2,7,3>
- 1514099194U, // <2,7,3,6>: Cost 2 vext1 <6,2,7,3>, <6,2,7,3>
- 2587841530U, // <2,7,3,7>: Cost 3 vext1 <6,2,7,3>, <7,0,1,2>
- 1514100526U, // <2,7,3,u>: Cost 2 vext1 <6,2,7,3>, LHS
- 2708706617U, // <2,7,4,0>: Cost 3 vext3 <4,0,6,2>, <7,4,0,6>
- 3649643418U, // <2,7,4,1>: Cost 4 vext1 <4,2,7,4>, <1,2,3,4>
- 3649644330U, // <2,7,4,2>: Cost 4 vext1 <4,2,7,4>, <2,4,5,7>
- 2257982640U, // <2,7,4,3>: Cost 3 vrev <7,2,3,4>
- 3649645641U, // <2,7,4,4>: Cost 4 vext1 <4,2,7,4>, <4,2,7,4>
- 2621435190U, // <2,7,4,5>: Cost 3 vext2 <0,6,2,7>, RHS
- 2712835441U, // <2,7,4,6>: Cost 3 vext3 <4,6,u,2>, <7,4,6,u>
- 3799995762U, // <2,7,4,7>: Cost 4 vext3 <7,0,1,2>, <7,4,7,0>
- 2621435433U, // <2,7,4,u>: Cost 3 vext2 <0,6,2,7>, RHS
- 2729497990U, // <2,7,5,0>: Cost 3 vext3 <7,5,0,2>, <7,5,0,2>
- 3643679744U, // <2,7,5,1>: Cost 4 vext1 <3,2,7,5>, <1,3,5,7>
- 3637708424U, // <2,7,5,2>: Cost 4 vext1 <2,2,7,5>, <2,2,5,7>
- 3643681137U, // <2,7,5,3>: Cost 4 vext1 <3,2,7,5>, <3,2,7,5>
- 2599800118U, // <2,7,5,4>: Cost 3 vext1 <u,2,7,5>, RHS
- 3786577334U, // <2,7,5,5>: Cost 4 vext3 <4,6,u,2>, <7,5,5,5>
- 3786577345U, // <2,7,5,6>: Cost 4 vext3 <4,6,u,2>, <7,5,6,7>
- 2599802214U, // <2,7,5,7>: Cost 3 vext1 <u,2,7,5>, <7,4,5,6>
- 2599802670U, // <2,7,5,u>: Cost 3 vext1 <u,2,7,5>, LHS
- 2581889126U, // <2,7,6,0>: Cost 3 vext1 <5,2,7,6>, LHS
- 3643687936U, // <2,7,6,1>: Cost 4 vext1 <3,2,7,6>, <1,3,5,7>
- 2663240186U, // <2,7,6,2>: Cost 3 vext2 <7,6,2,7>, <6,2,7,3>
- 3643689330U, // <2,7,6,3>: Cost 4 vext1 <3,2,7,6>, <3,2,7,6>
- 2581892406U, // <2,7,6,4>: Cost 3 vext1 <5,2,7,6>, RHS
- 2581892900U, // <2,7,6,5>: Cost 3 vext1 <5,2,7,6>, <5,2,7,6>
- 2587865597U, // <2,7,6,6>: Cost 3 vext1 <6,2,7,6>, <6,2,7,6>
- 3786577428U, // <2,7,6,7>: Cost 4 vext3 <4,6,u,2>, <7,6,7,0>
- 2581894958U, // <2,7,6,u>: Cost 3 vext1 <5,2,7,6>, LHS
- 2726254119U, // <2,7,7,0>: Cost 3 vext3 <7,0,1,2>, <7,7,0,1>
- 3804640817U, // <2,7,7,1>: Cost 4 vext3 <7,7,1,2>, <7,7,1,2>
- 3637724826U, // <2,7,7,2>: Cost 4 vext1 <2,2,7,7>, <2,2,7,7>
- 3734992123U, // <2,7,7,3>: Cost 4 vext2 <7,3,2,7>, <7,3,2,7>
- 2552040758U, // <2,7,7,4>: Cost 3 vext1 <0,2,7,7>, RHS
- 3799995992U, // <2,7,7,5>: Cost 4 vext3 <7,0,1,2>, <7,7,5,5>
- 2663241198U, // <2,7,7,6>: Cost 3 vext2 <7,6,2,7>, <7,6,2,7>
- 2712835692U, // <2,7,7,7>: Cost 3 vext3 <4,6,u,2>, <7,7,7,7>
- 2731562607U, // <2,7,7,u>: Cost 3 vext3 <7,u,1,2>, <7,7,u,1>
- 1514135654U, // <2,7,u,0>: Cost 2 vext1 <6,2,7,u>, LHS
- 1657820802U, // <2,7,u,1>: Cost 2 vext3 <7,u,1,2>, <7,u,1,2>
- 2587879016U, // <2,7,u,2>: Cost 3 vext1 <6,2,7,u>, <2,2,2,2>
- 2569963892U, // <2,7,u,3>: Cost 3 vext1 <3,2,7,u>, <3,2,7,u>
- 1514138934U, // <2,7,u,4>: Cost 2 vext1 <6,2,7,u>, RHS
- 2621438106U, // <2,7,u,5>: Cost 3 vext2 <0,6,2,7>, RHS
- 1514140159U, // <2,7,u,6>: Cost 2 vext1 <6,2,7,u>, <6,2,7,u>
- 2587882490U, // <2,7,u,7>: Cost 3 vext1 <6,2,7,u>, <7,0,1,2>
- 1514141486U, // <2,7,u,u>: Cost 2 vext1 <6,2,7,u>, LHS
- 1544380416U, // <2,u,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
- 470638699U, // <2,u,0,1>: Cost 1 vext2 LHS, LHS
- 1544380580U, // <2,u,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
- 1658631909U, // <2,u,0,3>: Cost 2 vext3 <u,0,3,2>, <u,0,3,2>
- 1544380754U, // <2,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
- 2665898414U, // <2,u,0,5>: Cost 3 vext2 LHS, <0,5,2,7>
- 1658853120U, // <2,u,0,6>: Cost 2 vext3 <u,0,6,2>, <u,0,6,2>
- 3094531625U, // <2,u,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS
- 470639261U, // <2,u,0,u>: Cost 1 vext2 LHS, LHS
- 1544381174U, // <2,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
- 1544381236U, // <2,u,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
- 1544381334U, // <2,u,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
- 1544381400U, // <2,u,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
- 2618123325U, // <2,u,1,4>: Cost 3 vext2 LHS, <1,4,3,5>
- 1544381584U, // <2,u,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
- 2618123489U, // <2,u,1,6>: Cost 3 vext2 LHS, <1,6,3,7>
- 2726254427U, // <2,u,1,7>: Cost 3 vext3 <7,0,1,2>, <u,1,7,3>
- 1544381823U, // <2,u,1,u>: Cost 2 vext2 LHS, <1,u,3,3>
- 1478328422U, // <2,u,2,0>: Cost 2 vext1 <0,2,u,2>, LHS
- 2618123807U, // <2,u,2,1>: Cost 3 vext2 LHS, <2,1,3,1>
- 269271142U, // <2,u,2,2>: Cost 1 vdup2 LHS
- 1544382118U, // <2,u,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
- 1478331702U, // <2,u,2,4>: Cost 2 vext1 <0,2,u,2>, RHS
- 2618124136U, // <2,u,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
- 1544382394U, // <2,u,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
- 3088354857U, // <2,u,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS
- 269271142U, // <2,u,2,u>: Cost 1 vdup2 LHS
- 1544382614U, // <2,u,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
- 2953627374U, // <2,u,3,1>: Cost 3 vzipr LHS, <2,3,u,1>
- 1490282143U, // <2,u,3,2>: Cost 2 vext1 <2,2,u,3>, <2,2,u,3>
- 1879883932U, // <2,u,3,3>: Cost 2 vzipr LHS, LHS
- 1544382978U, // <2,u,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
- 2953627378U, // <2,u,3,5>: Cost 3 vzipr LHS, <2,3,u,5>
- 1514172931U, // <2,u,3,6>: Cost 2 vext1 <6,2,u,3>, <6,2,u,3>
- 1879887176U, // <2,u,3,7>: Cost 2 vzipr LHS, RHS
- 1879883937U, // <2,u,3,u>: Cost 2 vzipr LHS, LHS
- 1484316774U, // <2,u,4,0>: Cost 2 vext1 <1,2,u,4>, LHS
- 1484317639U, // <2,u,4,1>: Cost 2 vext1 <1,2,u,4>, <1,2,u,4>
- 2552088270U, // <2,u,4,2>: Cost 3 vext1 <0,2,u,4>, <2,3,4,5>
- 1190213513U, // <2,u,4,3>: Cost 2 vrev <u,2,3,4>
- 1484320054U, // <2,u,4,4>: Cost 2 vext1 <1,2,u,4>, RHS
- 470641974U, // <2,u,4,5>: Cost 1 vext2 LHS, RHS
- 1592159604U, // <2,u,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
- 3094564393U, // <2,u,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS
- 470642217U, // <2,u,4,u>: Cost 1 vext2 LHS, RHS
- 2552094959U, // <2,u,5,0>: Cost 3 vext1 <0,2,u,5>, <0,2,u,5>
- 1592159952U, // <2,u,5,1>: Cost 2 vext2 LHS, <5,1,7,3>
- 2564040353U, // <2,u,5,2>: Cost 3 vext1 <2,2,u,5>, <2,2,u,5>
- 2690275455U, // <2,u,5,3>: Cost 3 vext3 <0,u,u,2>, <u,5,3,7>
- 1592160198U, // <2,u,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
- 1592160260U, // <2,u,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
- 1611962522U, // <2,u,5,6>: Cost 2 vext3 <0,2,0,2>, RHS
- 1592160424U, // <2,u,5,7>: Cost 2 vext2 LHS, <5,7,5,7>
- 1611962540U, // <2,u,5,u>: Cost 2 vext3 <0,2,0,2>, RHS
- 1478361190U, // <2,u,6,0>: Cost 2 vext1 <0,2,u,6>, LHS
- 2552103670U, // <2,u,6,1>: Cost 3 vext1 <0,2,u,6>, <1,0,3,2>
- 1592160762U, // <2,u,6,2>: Cost 2 vext2 LHS, <6,2,7,3>
- 2685704400U, // <2,u,6,3>: Cost 3 vext3 <0,2,0,2>, <u,6,3,7>
- 1478364470U, // <2,u,6,4>: Cost 2 vext1 <0,2,u,6>, RHS
- 2901891226U, // <2,u,6,5>: Cost 3 vzipl <2,6,3,7>, RHS
- 1592161080U, // <2,u,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
- 1592161102U, // <2,u,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
- 1478367022U, // <2,u,6,u>: Cost 2 vext1 <0,2,u,6>, LHS
- 1592161274U, // <2,u,7,0>: Cost 2 vext2 LHS, <7,0,1,2>
- 2659931226U, // <2,u,7,1>: Cost 3 vext2 <7,1,2,u>, <7,1,2,u>
- 2564056739U, // <2,u,7,2>: Cost 3 vext1 <2,2,u,7>, <2,2,u,7>
- 2665903331U, // <2,u,7,3>: Cost 3 vext2 LHS, <7,3,0,1>
- 1592161638U, // <2,u,7,4>: Cost 2 vext2 LHS, <7,4,5,6>
- 2665903494U, // <2,u,7,5>: Cost 3 vext2 LHS, <7,5,0,2>
- 2587947527U, // <2,u,7,6>: Cost 3 vext1 <6,2,u,7>, <6,2,u,7>
- 1592161900U, // <2,u,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
- 1592161922U, // <2,u,7,u>: Cost 2 vext2 LHS, <7,u,1,2>
- 1478377574U, // <2,u,u,0>: Cost 2 vext1 <0,2,u,u>, LHS
- 470644526U, // <2,u,u,1>: Cost 1 vext2 LHS, LHS
- 269271142U, // <2,u,u,2>: Cost 1 vdup2 LHS
- 1879924892U, // <2,u,u,3>: Cost 2 vzipr LHS, LHS
- 1478380854U, // <2,u,u,4>: Cost 2 vext1 <0,2,u,u>, RHS
- 470644890U, // <2,u,u,5>: Cost 1 vext2 LHS, RHS
- 1611962765U, // <2,u,u,6>: Cost 2 vext3 <0,2,0,2>, RHS
- 1879928136U, // <2,u,u,7>: Cost 2 vzipr LHS, RHS
- 470645093U, // <2,u,u,u>: Cost 1 vext2 LHS, LHS
- 1611448320U, // <3,0,0,0>: Cost 2 vext3 LHS, <0,0,0,0>
- 1611890698U, // <3,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1>
- 1611890708U, // <3,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2>
- 3763576860U, // <3,0,0,3>: Cost 4 vext3 LHS, <0,0,3,1>
- 2689835045U, // <3,0,0,4>: Cost 3 vext3 LHS, <0,0,4,1>
- 3698508206U, // <3,0,0,5>: Cost 4 vext2 <1,2,3,0>, <0,5,2,7>
- 3763576887U, // <3,0,0,6>: Cost 4 vext3 LHS, <0,0,6,1>
- 3667678434U, // <3,0,0,7>: Cost 4 vext1 <7,3,0,0>, <7,3,0,0>
- 1616093258U, // <3,0,0,u>: Cost 2 vext3 LHS, <0,0,u,2>
- 1490337894U, // <3,0,1,0>: Cost 2 vext1 <2,3,0,1>, LHS
- 2685632602U, // <3,0,1,1>: Cost 3 vext3 LHS, <0,1,1,0>
- 537706598U, // <3,0,1,2>: Cost 1 vext3 LHS, LHS
- 2624766936U, // <3,0,1,3>: Cost 3 vext2 <1,2,3,0>, <1,3,1,3>
- 1490341174U, // <3,0,1,4>: Cost 2 vext1 <2,3,0,1>, RHS
- 2624767120U, // <3,0,1,5>: Cost 3 vext2 <1,2,3,0>, <1,5,3,7>
- 2732966030U, // <3,0,1,6>: Cost 3 vext3 LHS, <0,1,6,7>
- 2593944803U, // <3,0,1,7>: Cost 3 vext1 <7,3,0,1>, <7,3,0,1>
- 537706652U, // <3,0,1,u>: Cost 1 vext3 LHS, LHS
- 1611890852U, // <3,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
- 2685632684U, // <3,0,2,1>: Cost 3 vext3 LHS, <0,2,1,1>
- 2685632692U, // <3,0,2,2>: Cost 3 vext3 LHS, <0,2,2,0>
- 2685632702U, // <3,0,2,3>: Cost 3 vext3 LHS, <0,2,3,1>
- 1611890892U, // <3,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
- 2732966102U, // <3,0,2,5>: Cost 3 vext3 LHS, <0,2,5,7>
- 2624767930U, // <3,0,2,6>: Cost 3 vext2 <1,2,3,0>, <2,6,3,7>
- 2685632744U, // <3,0,2,7>: Cost 3 vext3 LHS, <0,2,7,7>
- 1611890924U, // <3,0,2,u>: Cost 2 vext3 LHS, <0,2,u,2>
- 2624768150U, // <3,0,3,0>: Cost 3 vext2 <1,2,3,0>, <3,0,1,2>
- 2685632764U, // <3,0,3,1>: Cost 3 vext3 LHS, <0,3,1,0>
- 2685632774U, // <3,0,3,2>: Cost 3 vext3 LHS, <0,3,2,1>
- 2624768412U, // <3,0,3,3>: Cost 3 vext2 <1,2,3,0>, <3,3,3,3>
- 2624768514U, // <3,0,3,4>: Cost 3 vext2 <1,2,3,0>, <3,4,5,6>
- 3702491714U, // <3,0,3,5>: Cost 4 vext2 <1,u,3,0>, <3,5,3,7>
- 2624768632U, // <3,0,3,6>: Cost 3 vext2 <1,2,3,0>, <3,6,0,7>
- 3702491843U, // <3,0,3,7>: Cost 4 vext2 <1,u,3,0>, <3,7,0,1>
- 2686959934U, // <3,0,3,u>: Cost 3 vext3 <0,3,u,3>, <0,3,u,3>
- 2689835336U, // <3,0,4,0>: Cost 3 vext3 LHS, <0,4,0,4>
- 1611891026U, // <3,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5>
- 1611891036U, // <3,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6>
- 3763577184U, // <3,0,4,3>: Cost 4 vext3 LHS, <0,4,3,1>
- 2689835374U, // <3,0,4,4>: Cost 3 vext3 LHS, <0,4,4,6>
- 1551027510U, // <3,0,4,5>: Cost 2 vext2 <1,2,3,0>, RHS
- 2666573172U, // <3,0,4,6>: Cost 3 vext2 <u,2,3,0>, <4,6,4,6>
- 3667711206U, // <3,0,4,7>: Cost 4 vext1 <7,3,0,4>, <7,3,0,4>
- 1616093586U, // <3,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6>
- 2685190556U, // <3,0,5,0>: Cost 3 vext3 LHS, <0,5,0,7>
- 2666573520U, // <3,0,5,1>: Cost 3 vext2 <u,2,3,0>, <5,1,7,3>
- 3040886886U, // <3,0,5,2>: Cost 3 vtrnl <3,4,5,6>, LHS
- 3625912834U, // <3,0,5,3>: Cost 4 vext1 <0,3,0,5>, <3,4,5,6>
- 2666573766U, // <3,0,5,4>: Cost 3 vext2 <u,2,3,0>, <5,4,7,6>
- 2666573828U, // <3,0,5,5>: Cost 3 vext2 <u,2,3,0>, <5,5,5,5>
- 2732966354U, // <3,0,5,6>: Cost 3 vext3 LHS, <0,5,6,7>
- 2666573992U, // <3,0,5,7>: Cost 3 vext2 <u,2,3,0>, <5,7,5,7>
- 3040886940U, // <3,0,5,u>: Cost 3 vtrnl <3,4,5,6>, LHS
- 2685190637U, // <3,0,6,0>: Cost 3 vext3 LHS, <0,6,0,7>
- 2732966390U, // <3,0,6,1>: Cost 3 vext3 LHS, <0,6,1,7>
- 2689835519U, // <3,0,6,2>: Cost 3 vext3 LHS, <0,6,2,7>
- 3667724438U, // <3,0,6,3>: Cost 4 vext1 <7,3,0,6>, <3,0,1,2>
- 3763577355U, // <3,0,6,4>: Cost 4 vext3 LHS, <0,6,4,1>
- 3806708243U, // <3,0,6,5>: Cost 4 vext3 LHS, <0,6,5,0>
- 2666574648U, // <3,0,6,6>: Cost 3 vext2 <u,2,3,0>, <6,6,6,6>
- 2657948520U, // <3,0,6,7>: Cost 3 vext2 <6,7,3,0>, <6,7,3,0>
- 2689835573U, // <3,0,6,u>: Cost 3 vext3 LHS, <0,6,u,7>
- 2666574842U, // <3,0,7,0>: Cost 3 vext2 <u,2,3,0>, <7,0,1,2>
- 2685633095U, // <3,0,7,1>: Cost 3 vext3 LHS, <0,7,1,7>
- 2660603052U, // <3,0,7,2>: Cost 3 vext2 <7,2,3,0>, <7,2,3,0>
- 3643844997U, // <3,0,7,3>: Cost 4 vext1 <3,3,0,7>, <3,3,0,7>
- 2666575206U, // <3,0,7,4>: Cost 3 vext2 <u,2,3,0>, <7,4,5,6>
- 3655790391U, // <3,0,7,5>: Cost 4 vext1 <5,3,0,7>, <5,3,0,7>
- 3731690968U, // <3,0,7,6>: Cost 4 vext2 <6,7,3,0>, <7,6,0,3>
- 2666575468U, // <3,0,7,7>: Cost 3 vext2 <u,2,3,0>, <7,7,7,7>
- 2664584850U, // <3,0,7,u>: Cost 3 vext2 <7,u,3,0>, <7,u,3,0>
- 1616093834U, // <3,0,u,0>: Cost 2 vext3 LHS, <0,u,0,2>
- 1611891346U, // <3,0,u,1>: Cost 2 vext3 LHS, <0,u,1,1>
- 537707165U, // <3,0,u,2>: Cost 1 vext3 LHS, LHS
- 2689835684U, // <3,0,u,3>: Cost 3 vext3 LHS, <0,u,3,1>
- 1616093874U, // <3,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6>
- 1551030426U, // <3,0,u,5>: Cost 2 vext2 <1,2,3,0>, RHS
- 2624772304U, // <3,0,u,6>: Cost 3 vext2 <1,2,3,0>, <u,6,3,7>
- 2594002154U, // <3,0,u,7>: Cost 3 vext1 <7,3,0,u>, <7,3,0,u>
- 537707219U, // <3,0,u,u>: Cost 1 vext3 LHS, LHS
- 2552201318U, // <3,1,0,0>: Cost 3 vext1 <0,3,1,0>, LHS
- 2618802278U, // <3,1,0,1>: Cost 3 vext2 <0,2,3,1>, LHS
- 2618802366U, // <3,1,0,2>: Cost 3 vext2 <0,2,3,1>, <0,2,3,1>
- 1611449078U, // <3,1,0,3>: Cost 2 vext3 LHS, <1,0,3,2>
- 2552204598U, // <3,1,0,4>: Cost 3 vext1 <0,3,1,0>, RHS
- 2732966663U, // <3,1,0,5>: Cost 3 vext3 LHS, <1,0,5,1>
- 3906258396U, // <3,1,0,6>: Cost 4 vuzpr <2,3,0,1>, <2,0,4,6>
- 3667752171U, // <3,1,0,7>: Cost 4 vext1 <7,3,1,0>, <7,3,1,0>
- 1611891491U, // <3,1,0,u>: Cost 2 vext3 LHS, <1,0,u,2>
- 2689835819U, // <3,1,1,0>: Cost 3 vext3 LHS, <1,1,0,1>
- 1611449140U, // <3,1,1,1>: Cost 2 vext3 LHS, <1,1,1,1>
- 2624775063U, // <3,1,1,2>: Cost 3 vext2 <1,2,3,1>, <1,2,3,1>
- 1611891528U, // <3,1,1,3>: Cost 2 vext3 LHS, <1,1,3,3>
- 2689835859U, // <3,1,1,4>: Cost 3 vext3 LHS, <1,1,4,5>
- 2689835868U, // <3,1,1,5>: Cost 3 vext3 LHS, <1,1,5,5>
- 3763577701U, // <3,1,1,6>: Cost 4 vext3 LHS, <1,1,6,5>
- 3765273452U, // <3,1,1,7>: Cost 4 vext3 <1,1,7,3>, <1,1,7,3>
- 1611891573U, // <3,1,1,u>: Cost 2 vext3 LHS, <1,1,u,3>
- 2629420494U, // <3,1,2,0>: Cost 3 vext2 <2,0,3,1>, <2,0,3,1>
- 2689835911U, // <3,1,2,1>: Cost 3 vext3 LHS, <1,2,1,3>
- 2564163248U, // <3,1,2,2>: Cost 3 vext1 <2,3,1,2>, <2,3,1,2>
- 1611449238U, // <3,1,2,3>: Cost 2 vext3 LHS, <1,2,3,0>
- 2564164918U, // <3,1,2,4>: Cost 3 vext1 <2,3,1,2>, RHS
- 2689835947U, // <3,1,2,5>: Cost 3 vext3 LHS, <1,2,5,3>
- 3692545978U, // <3,1,2,6>: Cost 4 vext2 <0,2,3,1>, <2,6,3,7>
- 2732966842U, // <3,1,2,7>: Cost 3 vext3 LHS, <1,2,7,0>
- 1611891651U, // <3,1,2,u>: Cost 2 vext3 LHS, <1,2,u,0>
- 1484456038U, // <3,1,3,0>: Cost 2 vext1 <1,3,1,3>, LHS
- 1611891672U, // <3,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3>
- 2685633502U, // <3,1,3,2>: Cost 3 vext3 LHS, <1,3,2,0>
- 2685633512U, // <3,1,3,3>: Cost 3 vext3 LHS, <1,3,3,1>
- 1484459318U, // <3,1,3,4>: Cost 2 vext1 <1,3,1,3>, RHS
- 1611891712U, // <3,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7>
- 2689836041U, // <3,1,3,6>: Cost 3 vext3 LHS, <1,3,6,7>
- 2733409294U, // <3,1,3,7>: Cost 3 vext3 LHS, <1,3,7,3>
- 1611891735U, // <3,1,3,u>: Cost 2 vext3 LHS, <1,3,u,3>
- 2552234086U, // <3,1,4,0>: Cost 3 vext1 <0,3,1,4>, LHS
- 2732966955U, // <3,1,4,1>: Cost 3 vext3 LHS, <1,4,1,5>
- 2732966964U, // <3,1,4,2>: Cost 3 vext3 LHS, <1,4,2,5>
- 2685633597U, // <3,1,4,3>: Cost 3 vext3 LHS, <1,4,3,5>
- 2552237366U, // <3,1,4,4>: Cost 3 vext1 <0,3,1,4>, RHS
- 2618805558U, // <3,1,4,5>: Cost 3 vext2 <0,2,3,1>, RHS
- 2769472822U, // <3,1,4,6>: Cost 3 vuzpl <3,0,1,2>, RHS
- 3667784943U, // <3,1,4,7>: Cost 4 vext1 <7,3,1,4>, <7,3,1,4>
- 2685633642U, // <3,1,4,u>: Cost 3 vext3 LHS, <1,4,u,5>
- 2689836143U, // <3,1,5,0>: Cost 3 vext3 LHS, <1,5,0,1>
- 2564187280U, // <3,1,5,1>: Cost 3 vext1 <2,3,1,5>, <1,5,3,7>
- 2564187827U, // <3,1,5,2>: Cost 3 vext1 <2,3,1,5>, <2,3,1,5>
- 1611891856U, // <3,1,5,3>: Cost 2 vext3 LHS, <1,5,3,7>
- 2689836183U, // <3,1,5,4>: Cost 3 vext3 LHS, <1,5,4,5>
- 3759375522U, // <3,1,5,5>: Cost 4 vext3 LHS, <1,5,5,7>
- 3720417378U, // <3,1,5,6>: Cost 4 vext2 <4,u,3,1>, <5,6,7,0>
- 2832518454U, // <3,1,5,7>: Cost 3 vuzpr <2,3,0,1>, RHS
- 1611891901U, // <3,1,5,u>: Cost 2 vext3 LHS, <1,5,u,7>
- 3763578048U, // <3,1,6,0>: Cost 4 vext3 LHS, <1,6,0,1>
- 2689836239U, // <3,1,6,1>: Cost 3 vext3 LHS, <1,6,1,7>
- 2732967128U, // <3,1,6,2>: Cost 3 vext3 LHS, <1,6,2,7>
- 2685633761U, // <3,1,6,3>: Cost 3 vext3 LHS, <1,6,3,7>
- 3763578088U, // <3,1,6,4>: Cost 4 vext3 LHS, <1,6,4,5>
- 2689836275U, // <3,1,6,5>: Cost 3 vext3 LHS, <1,6,5,7>
- 3763578108U, // <3,1,6,6>: Cost 4 vext3 LHS, <1,6,6,7>
- 2732967166U, // <3,1,6,7>: Cost 3 vext3 LHS, <1,6,7,0>
- 2685633806U, // <3,1,6,u>: Cost 3 vext3 LHS, <1,6,u,7>
- 3631972454U, // <3,1,7,0>: Cost 4 vext1 <1,3,1,7>, LHS
- 2659947612U, // <3,1,7,1>: Cost 3 vext2 <7,1,3,1>, <7,1,3,1>
- 4036102294U, // <3,1,7,2>: Cost 4 vzipr <1,5,3,7>, <3,0,1,2>
- 3095396454U, // <3,1,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS
- 3631975734U, // <3,1,7,4>: Cost 4 vext1 <1,3,1,7>, RHS
- 2222982144U, // <3,1,7,5>: Cost 3 vrev <1,3,5,7>
- 3296797705U, // <3,1,7,6>: Cost 4 vrev <1,3,6,7>
- 3720418924U, // <3,1,7,7>: Cost 4 vext2 <4,u,3,1>, <7,7,7,7>
- 3095396459U, // <3,1,7,u>: Cost 3 vtrnr <1,3,5,7>, LHS
- 1484496998U, // <3,1,u,0>: Cost 2 vext1 <1,3,1,u>, LHS
- 1611892077U, // <3,1,u,1>: Cost 2 vext3 LHS, <1,u,1,3>
- 2685633907U, // <3,1,u,2>: Cost 3 vext3 LHS, <1,u,2,0>
- 1611892092U, // <3,1,u,3>: Cost 2 vext3 LHS, <1,u,3,0>
- 1484500278U, // <3,1,u,4>: Cost 2 vext1 <1,3,1,u>, RHS
- 1611892117U, // <3,1,u,5>: Cost 2 vext3 LHS, <1,u,5,7>
- 2685633950U, // <3,1,u,6>: Cost 3 vext3 LHS, <1,u,6,7>
- 2832518697U, // <3,1,u,7>: Cost 3 vuzpr <2,3,0,1>, RHS
- 1611892140U, // <3,1,u,u>: Cost 2 vext3 LHS, <1,u,u,3>
- 2623455232U, // <3,2,0,0>: Cost 3 vext2 <1,0,3,2>, <0,0,0,0>
- 1549713510U, // <3,2,0,1>: Cost 2 vext2 <1,0,3,2>, LHS
- 2689836484U, // <3,2,0,2>: Cost 3 vext3 LHS, <2,0,2,0>
- 2685633997U, // <3,2,0,3>: Cost 3 vext3 LHS, <2,0,3,0>
- 2623455570U, // <3,2,0,4>: Cost 3 vext2 <1,0,3,2>, <0,4,1,5>
- 2732967398U, // <3,2,0,5>: Cost 3 vext3 LHS, <2,0,5,7>
- 2689836524U, // <3,2,0,6>: Cost 3 vext3 LHS, <2,0,6,4>
- 2229044964U, // <3,2,0,7>: Cost 3 vrev <2,3,7,0>
- 1549714077U, // <3,2,0,u>: Cost 2 vext2 <1,0,3,2>, LHS
- 1549714166U, // <3,2,1,0>: Cost 2 vext2 <1,0,3,2>, <1,0,3,2>
- 2623456052U, // <3,2,1,1>: Cost 3 vext2 <1,0,3,2>, <1,1,1,1>
- 2623456150U, // <3,2,1,2>: Cost 3 vext2 <1,0,3,2>, <1,2,3,0>
- 2685634079U, // <3,2,1,3>: Cost 3 vext3 LHS, <2,1,3,1>
- 2552286518U, // <3,2,1,4>: Cost 3 vext1 <0,3,2,1>, RHS
- 2623456400U, // <3,2,1,5>: Cost 3 vext2 <1,0,3,2>, <1,5,3,7>
- 2689836604U, // <3,2,1,6>: Cost 3 vext3 LHS, <2,1,6,3>
- 3667834101U, // <3,2,1,7>: Cost 4 vext1 <7,3,2,1>, <7,3,2,1>
- 1155385070U, // <3,2,1,u>: Cost 2 vrev <2,3,u,1>
- 2689836629U, // <3,2,2,0>: Cost 3 vext3 LHS, <2,2,0,1>
- 2689836640U, // <3,2,2,1>: Cost 3 vext3 LHS, <2,2,1,3>
- 1611449960U, // <3,2,2,2>: Cost 2 vext3 LHS, <2,2,2,2>
- 1611892338U, // <3,2,2,3>: Cost 2 vext3 LHS, <2,2,3,3>
- 2689836669U, // <3,2,2,4>: Cost 3 vext3 LHS, <2,2,4,5>
- 2689836680U, // <3,2,2,5>: Cost 3 vext3 LHS, <2,2,5,7>
- 2689836688U, // <3,2,2,6>: Cost 3 vext3 LHS, <2,2,6,6>
- 3763578518U, // <3,2,2,7>: Cost 4 vext3 LHS, <2,2,7,3>
- 1611892383U, // <3,2,2,u>: Cost 2 vext3 LHS, <2,2,u,3>
- 1611450022U, // <3,2,3,0>: Cost 2 vext3 LHS, <2,3,0,1>
- 2685191854U, // <3,2,3,1>: Cost 3 vext3 LHS, <2,3,1,0>
- 2685191865U, // <3,2,3,2>: Cost 3 vext3 LHS, <2,3,2,2>
- 2685191875U, // <3,2,3,3>: Cost 3 vext3 LHS, <2,3,3,3>
- 1611450062U, // <3,2,3,4>: Cost 2 vext3 LHS, <2,3,4,5>
- 2732967635U, // <3,2,3,5>: Cost 3 vext3 LHS, <2,3,5,1>
- 2732967645U, // <3,2,3,6>: Cost 3 vext3 LHS, <2,3,6,2>
- 2732967652U, // <3,2,3,7>: Cost 3 vext3 LHS, <2,3,7,0>
- 1611450094U, // <3,2,3,u>: Cost 2 vext3 LHS, <2,3,u,1>
- 2558279782U, // <3,2,4,0>: Cost 3 vext1 <1,3,2,4>, LHS
- 2558280602U, // <3,2,4,1>: Cost 3 vext1 <1,3,2,4>, <1,2,3,4>
- 2732967692U, // <3,2,4,2>: Cost 3 vext3 LHS, <2,4,2,4>
- 2685634326U, // <3,2,4,3>: Cost 3 vext3 LHS, <2,4,3,5>
- 2558283062U, // <3,2,4,4>: Cost 3 vext1 <1,3,2,4>, RHS
- 1549716790U, // <3,2,4,5>: Cost 2 vext2 <1,0,3,2>, RHS
- 2689836844U, // <3,2,4,6>: Cost 3 vext3 LHS, <2,4,6,0>
- 2229077736U, // <3,2,4,7>: Cost 3 vrev <2,3,7,4>
- 1549717033U, // <3,2,4,u>: Cost 2 vext2 <1,0,3,2>, RHS
- 2552316006U, // <3,2,5,0>: Cost 3 vext1 <0,3,2,5>, LHS
- 2228643507U, // <3,2,5,1>: Cost 3 vrev <2,3,1,5>
- 2689836896U, // <3,2,5,2>: Cost 3 vext3 LHS, <2,5,2,7>
- 2685634408U, // <3,2,5,3>: Cost 3 vext3 LHS, <2,5,3,6>
- 1155122894U, // <3,2,5,4>: Cost 2 vrev <2,3,4,5>
- 2665263108U, // <3,2,5,5>: Cost 3 vext2 <u,0,3,2>, <5,5,5,5>
- 2689836932U, // <3,2,5,6>: Cost 3 vext3 LHS, <2,5,6,7>
- 2665263272U, // <3,2,5,7>: Cost 3 vext2 <u,0,3,2>, <5,7,5,7>
- 1155417842U, // <3,2,5,u>: Cost 2 vrev <2,3,u,5>
- 2689836953U, // <3,2,6,0>: Cost 3 vext3 LHS, <2,6,0,1>
- 2689836964U, // <3,2,6,1>: Cost 3 vext3 LHS, <2,6,1,3>
- 2689836976U, // <3,2,6,2>: Cost 3 vext3 LHS, <2,6,2,6>
- 1611892666U, // <3,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7>
- 2689836993U, // <3,2,6,4>: Cost 3 vext3 LHS, <2,6,4,5>
- 2689837004U, // <3,2,6,5>: Cost 3 vext3 LHS, <2,6,5,7>
- 2689837013U, // <3,2,6,6>: Cost 3 vext3 LHS, <2,6,6,7>
- 2665263950U, // <3,2,6,7>: Cost 3 vext2 <u,0,3,2>, <6,7,0,1>
- 1611892711U, // <3,2,6,u>: Cost 2 vext3 LHS, <2,6,u,7>
- 2665264122U, // <3,2,7,0>: Cost 3 vext2 <u,0,3,2>, <7,0,1,2>
- 2623460419U, // <3,2,7,1>: Cost 3 vext2 <1,0,3,2>, <7,1,0,3>
- 4169138340U, // <3,2,7,2>: Cost 4 vtrnr <1,3,5,7>, <0,2,0,2>
- 2962358374U, // <3,2,7,3>: Cost 3 vzipr <1,5,3,7>, LHS
- 2665264486U, // <3,2,7,4>: Cost 3 vext2 <u,0,3,2>, <7,4,5,6>
- 2228954841U, // <3,2,7,5>: Cost 3 vrev <2,3,5,7>
- 2229028578U, // <3,2,7,6>: Cost 3 vrev <2,3,6,7>
- 2665264748U, // <3,2,7,7>: Cost 3 vext2 <u,0,3,2>, <7,7,7,7>
- 2962358379U, // <3,2,7,u>: Cost 3 vzipr <1,5,3,7>, LHS
- 1611892795U, // <3,2,u,0>: Cost 2 vext3 LHS, <2,u,0,1>
- 1549719342U, // <3,2,u,1>: Cost 2 vext2 <1,0,3,2>, LHS
- 1611449960U, // <3,2,u,2>: Cost 2 vext3 LHS, <2,2,2,2>
- 1611892824U, // <3,2,u,3>: Cost 2 vext3 LHS, <2,u,3,3>
- 1611892835U, // <3,2,u,4>: Cost 2 vext3 LHS, <2,u,4,5>
- 1549719706U, // <3,2,u,5>: Cost 2 vext2 <1,0,3,2>, RHS
- 2689837168U, // <3,2,u,6>: Cost 3 vext3 LHS, <2,u,6,0>
- 2665265408U, // <3,2,u,7>: Cost 3 vext2 <u,0,3,2>, <u,7,0,1>
- 1611892867U, // <3,2,u,u>: Cost 2 vext3 LHS, <2,u,u,1>
- 2685192331U, // <3,3,0,0>: Cost 3 vext3 LHS, <3,0,0,0>
- 1611450518U, // <3,3,0,1>: Cost 2 vext3 LHS, <3,0,1,2>
- 2685634717U, // <3,3,0,2>: Cost 3 vext3 LHS, <3,0,2,0>
- 2564294806U, // <3,3,0,3>: Cost 3 vext1 <2,3,3,0>, <3,0,1,2>
- 2685634736U, // <3,3,0,4>: Cost 3 vext3 LHS, <3,0,4,1>
- 2732968122U, // <3,3,0,5>: Cost 3 vext3 LHS, <3,0,5,2>
- 3763579075U, // <3,3,0,6>: Cost 4 vext3 LHS, <3,0,6,2>
- 4034053264U, // <3,3,0,7>: Cost 4 vzipr <1,2,3,0>, <1,5,3,7>
- 1611450581U, // <3,3,0,u>: Cost 2 vext3 LHS, <3,0,u,2>
- 2685192415U, // <3,3,1,0>: Cost 3 vext3 LHS, <3,1,0,3>
- 1550385992U, // <3,3,1,1>: Cost 2 vext2 <1,1,3,3>, <1,1,3,3>
- 2685192433U, // <3,3,1,2>: Cost 3 vext3 LHS, <3,1,2,3>
- 2685634808U, // <3,3,1,3>: Cost 3 vext3 LHS, <3,1,3,1>
- 2558332214U, // <3,3,1,4>: Cost 3 vext1 <1,3,3,1>, RHS
- 2685634828U, // <3,3,1,5>: Cost 3 vext3 LHS, <3,1,5,3>
- 3759376661U, // <3,3,1,6>: Cost 4 vext3 LHS, <3,1,6,3>
- 2703477022U, // <3,3,1,7>: Cost 3 vext3 <3,1,7,3>, <3,1,7,3>
- 1555031423U, // <3,3,1,u>: Cost 2 vext2 <1,u,3,3>, <1,u,3,3>
- 2564309094U, // <3,3,2,0>: Cost 3 vext1 <2,3,3,2>, LHS
- 2630100513U, // <3,3,2,1>: Cost 3 vext2 <2,1,3,3>, <2,1,3,3>
- 1557022322U, // <3,3,2,2>: Cost 2 vext2 <2,2,3,3>, <2,2,3,3>
- 2685192520U, // <3,3,2,3>: Cost 3 vext3 LHS, <3,2,3,0>
- 2564312374U, // <3,3,2,4>: Cost 3 vext1 <2,3,3,2>, RHS
- 2732968286U, // <3,3,2,5>: Cost 3 vext3 LHS, <3,2,5,4>
- 2685634918U, // <3,3,2,6>: Cost 3 vext3 LHS, <3,2,6,3>
- 2704140655U, // <3,3,2,7>: Cost 3 vext3 <3,2,7,3>, <3,2,7,3>
- 1561004120U, // <3,3,2,u>: Cost 2 vext2 <2,u,3,3>, <2,u,3,3>
- 1496547430U, // <3,3,3,0>: Cost 2 vext1 <3,3,3,3>, LHS
- 2624129256U, // <3,3,3,1>: Cost 3 vext2 <1,1,3,3>, <3,1,1,3>
- 2630764866U, // <3,3,3,2>: Cost 3 vext2 <2,2,3,3>, <3,2,2,3>
- 336380006U, // <3,3,3,3>: Cost 1 vdup3 LHS
- 1496550710U, // <3,3,3,4>: Cost 2 vext1 <3,3,3,3>, RHS
- 2732968368U, // <3,3,3,5>: Cost 3 vext3 LHS, <3,3,5,5>
- 2624129683U, // <3,3,3,6>: Cost 3 vext2 <1,1,3,3>, <3,6,3,7>
- 2594182400U, // <3,3,3,7>: Cost 3 vext1 <7,3,3,3>, <7,3,3,3>
- 336380006U, // <3,3,3,u>: Cost 1 vdup3 LHS
- 2558353510U, // <3,3,4,0>: Cost 3 vext1 <1,3,3,4>, LHS
- 2558354411U, // <3,3,4,1>: Cost 3 vext1 <1,3,3,4>, <1,3,3,4>
- 2564327108U, // <3,3,4,2>: Cost 3 vext1 <2,3,3,4>, <2,3,3,4>
- 2564327938U, // <3,3,4,3>: Cost 3 vext1 <2,3,3,4>, <3,4,5,6>
- 2960343962U, // <3,3,4,4>: Cost 3 vzipr <1,2,3,4>, <1,2,3,4>
- 1611893250U, // <3,3,4,5>: Cost 2 vext3 LHS, <3,4,5,6>
- 2771619126U, // <3,3,4,6>: Cost 3 vuzpl <3,3,3,3>, RHS
- 4034086032U, // <3,3,4,7>: Cost 4 vzipr <1,2,3,4>, <1,5,3,7>
- 1611893277U, // <3,3,4,u>: Cost 2 vext3 LHS, <3,4,u,6>
- 2558361702U, // <3,3,5,0>: Cost 3 vext1 <1,3,3,5>, LHS
- 2558362604U, // <3,3,5,1>: Cost 3 vext1 <1,3,3,5>, <1,3,3,5>
- 2558363342U, // <3,3,5,2>: Cost 3 vext1 <1,3,3,5>, <2,3,4,5>
- 2732968512U, // <3,3,5,3>: Cost 3 vext3 LHS, <3,5,3,5>
- 2558364982U, // <3,3,5,4>: Cost 3 vext1 <1,3,3,5>, RHS
- 3101279950U, // <3,3,5,5>: Cost 3 vtrnr <2,3,4,5>, <2,3,4,5>
- 2665934946U, // <3,3,5,6>: Cost 3 vext2 <u,1,3,3>, <5,6,7,0>
- 2826636598U, // <3,3,5,7>: Cost 3 vuzpr <1,3,1,3>, RHS
- 2826636599U, // <3,3,5,u>: Cost 3 vuzpr <1,3,1,3>, RHS
- 2732968568U, // <3,3,6,0>: Cost 3 vext3 LHS, <3,6,0,7>
- 3763579521U, // <3,3,6,1>: Cost 4 vext3 LHS, <3,6,1,7>
- 2732968586U, // <3,3,6,2>: Cost 3 vext3 LHS, <3,6,2,7>
- 2732968595U, // <3,3,6,3>: Cost 3 vext3 LHS, <3,6,3,7>
- 2732968604U, // <3,3,6,4>: Cost 3 vext3 LHS, <3,6,4,7>
- 3763579557U, // <3,3,6,5>: Cost 4 vext3 LHS, <3,6,5,7>
- 2732968621U, // <3,3,6,6>: Cost 3 vext3 LHS, <3,6,6,6>
- 2657973099U, // <3,3,6,7>: Cost 3 vext2 <6,7,3,3>, <6,7,3,3>
- 2658636732U, // <3,3,6,u>: Cost 3 vext2 <6,u,3,3>, <6,u,3,3>
- 2558378086U, // <3,3,7,0>: Cost 3 vext1 <1,3,3,7>, LHS
- 2558378990U, // <3,3,7,1>: Cost 3 vext1 <1,3,3,7>, <1,3,3,7>
- 2564351687U, // <3,3,7,2>: Cost 3 vext1 <2,3,3,7>, <2,3,3,7>
- 2661291264U, // <3,3,7,3>: Cost 3 vext2 <7,3,3,3>, <7,3,3,3>
- 2558381366U, // <3,3,7,4>: Cost 3 vext1 <1,3,3,7>, RHS
- 2732968694U, // <3,3,7,5>: Cost 3 vext3 LHS, <3,7,5,7>
- 3781126907U, // <3,3,7,6>: Cost 4 vext3 <3,7,6,3>, <3,7,6,3>
- 3095397376U, // <3,3,7,7>: Cost 3 vtrnr <1,3,5,7>, <1,3,5,7>
- 2558383918U, // <3,3,7,u>: Cost 3 vext1 <1,3,3,7>, LHS
- 1496547430U, // <3,3,u,0>: Cost 2 vext1 <3,3,3,3>, LHS
- 1611893534U, // <3,3,u,1>: Cost 2 vext3 LHS, <3,u,1,2>
- 1592858504U, // <3,3,u,2>: Cost 2 vext2 <u,2,3,3>, <u,2,3,3>
- 336380006U, // <3,3,u,3>: Cost 1 vdup3 LHS
- 1496550710U, // <3,3,u,4>: Cost 2 vext1 <3,3,3,3>, RHS
- 1611893574U, // <3,3,u,5>: Cost 2 vext3 LHS, <3,u,5,6>
- 2690280268U, // <3,3,u,6>: Cost 3 vext3 LHS, <3,u,6,3>
- 2826636841U, // <3,3,u,7>: Cost 3 vuzpr <1,3,1,3>, RHS
- 336380006U, // <3,3,u,u>: Cost 1 vdup3 LHS
- 2624798720U, // <3,4,0,0>: Cost 3 vext2 <1,2,3,4>, <0,0,0,0>
- 1551056998U, // <3,4,0,1>: Cost 2 vext2 <1,2,3,4>, LHS
- 2624798884U, // <3,4,0,2>: Cost 3 vext2 <1,2,3,4>, <0,2,0,2>
- 3693232384U, // <3,4,0,3>: Cost 4 vext2 <0,3,3,4>, <0,3,1,4>
- 2624799058U, // <3,4,0,4>: Cost 3 vext2 <1,2,3,4>, <0,4,1,5>
- 1659227026U, // <3,4,0,5>: Cost 2 vext3 LHS, <4,0,5,1>
- 1659227036U, // <3,4,0,6>: Cost 2 vext3 LHS, <4,0,6,2>
- 3667973382U, // <3,4,0,7>: Cost 4 vext1 <7,3,4,0>, <7,3,4,0>
- 1551057565U, // <3,4,0,u>: Cost 2 vext2 <1,2,3,4>, LHS
- 2624799478U, // <3,4,1,0>: Cost 3 vext2 <1,2,3,4>, <1,0,3,2>
- 2624799540U, // <3,4,1,1>: Cost 3 vext2 <1,2,3,4>, <1,1,1,1>
- 1551057818U, // <3,4,1,2>: Cost 2 vext2 <1,2,3,4>, <1,2,3,4>
- 2624799704U, // <3,4,1,3>: Cost 3 vext2 <1,2,3,4>, <1,3,1,3>
- 2564377910U, // <3,4,1,4>: Cost 3 vext1 <2,3,4,1>, RHS
- 2689838050U, // <3,4,1,5>: Cost 3 vext3 LHS, <4,1,5,0>
- 2689838062U, // <3,4,1,6>: Cost 3 vext3 LHS, <4,1,6,3>
- 2628117807U, // <3,4,1,7>: Cost 3 vext2 <1,7,3,4>, <1,7,3,4>
- 1555039616U, // <3,4,1,u>: Cost 2 vext2 <1,u,3,4>, <1,u,3,4>
- 3626180710U, // <3,4,2,0>: Cost 4 vext1 <0,3,4,2>, LHS
- 2624800298U, // <3,4,2,1>: Cost 3 vext2 <1,2,3,4>, <2,1,4,3>
- 2624800360U, // <3,4,2,2>: Cost 3 vext2 <1,2,3,4>, <2,2,2,2>
- 2624800422U, // <3,4,2,3>: Cost 3 vext2 <1,2,3,4>, <2,3,0,1>
- 2624800514U, // <3,4,2,4>: Cost 3 vext2 <1,2,3,4>, <2,4,1,3>
- 2709965878U, // <3,4,2,5>: Cost 3 vext3 <4,2,5,3>, <4,2,5,3>
- 2689838140U, // <3,4,2,6>: Cost 3 vext3 LHS, <4,2,6,0>
- 2634090504U, // <3,4,2,7>: Cost 3 vext2 <2,7,3,4>, <2,7,3,4>
- 2689838158U, // <3,4,2,u>: Cost 3 vext3 LHS, <4,2,u,0>
- 2624800918U, // <3,4,3,0>: Cost 3 vext2 <1,2,3,4>, <3,0,1,2>
- 2636081403U, // <3,4,3,1>: Cost 3 vext2 <3,1,3,4>, <3,1,3,4>
- 2636745036U, // <3,4,3,2>: Cost 3 vext2 <3,2,3,4>, <3,2,3,4>
- 2624801180U, // <3,4,3,3>: Cost 3 vext2 <1,2,3,4>, <3,3,3,3>
- 2624801232U, // <3,4,3,4>: Cost 3 vext2 <1,2,3,4>, <3,4,0,1>
- 2905836854U, // <3,4,3,5>: Cost 3 vzipl <3,3,3,3>, RHS
- 3040054582U, // <3,4,3,6>: Cost 3 vtrnl <3,3,3,3>, RHS
- 3702524611U, // <3,4,3,7>: Cost 4 vext2 <1,u,3,4>, <3,7,0,1>
- 2624801566U, // <3,4,3,u>: Cost 3 vext2 <1,2,3,4>, <3,u,1,2>
- 2564399206U, // <3,4,4,0>: Cost 3 vext1 <2,3,4,4>, LHS
- 2564400026U, // <3,4,4,1>: Cost 3 vext1 <2,3,4,4>, <1,2,3,4>
- 2564400845U, // <3,4,4,2>: Cost 3 vext1 <2,3,4,4>, <2,3,4,4>
- 2570373542U, // <3,4,4,3>: Cost 3 vext1 <3,3,4,4>, <3,3,4,4>
- 1659227344U, // <3,4,4,4>: Cost 2 vext3 LHS, <4,4,4,4>
- 1551060278U, // <3,4,4,5>: Cost 2 vext2 <1,2,3,4>, RHS
- 1659227364U, // <3,4,4,6>: Cost 2 vext3 LHS, <4,4,6,6>
- 3668006154U, // <3,4,4,7>: Cost 4 vext1 <7,3,4,4>, <7,3,4,4>
- 1551060521U, // <3,4,4,u>: Cost 2 vext2 <1,2,3,4>, RHS
- 1490665574U, // <3,4,5,0>: Cost 2 vext1 <2,3,4,5>, LHS
- 2689838341U, // <3,4,5,1>: Cost 3 vext3 LHS, <4,5,1,3>
- 1490667214U, // <3,4,5,2>: Cost 2 vext1 <2,3,4,5>, <2,3,4,5>
- 2564409494U, // <3,4,5,3>: Cost 3 vext1 <2,3,4,5>, <3,0,1,2>
- 1490668854U, // <3,4,5,4>: Cost 2 vext1 <2,3,4,5>, RHS
- 2689838381U, // <3,4,5,5>: Cost 3 vext3 LHS, <4,5,5,7>
- 537709878U, // <3,4,5,6>: Cost 1 vext3 LHS, RHS
- 2594272523U, // <3,4,5,7>: Cost 3 vext1 <7,3,4,5>, <7,3,4,5>
- 537709896U, // <3,4,5,u>: Cost 1 vext3 LHS, RHS
- 2689838411U, // <3,4,6,0>: Cost 3 vext3 LHS, <4,6,0,1>
- 2558444534U, // <3,4,6,1>: Cost 3 vext1 <1,3,4,6>, <1,3,4,6>
- 2666607098U, // <3,4,6,2>: Cost 3 vext2 <u,2,3,4>, <6,2,7,3>
- 2558446082U, // <3,4,6,3>: Cost 3 vext1 <1,3,4,6>, <3,4,5,6>
- 1659227508U, // <3,4,6,4>: Cost 2 vext3 LHS, <4,6,4,6>
- 2689838462U, // <3,4,6,5>: Cost 3 vext3 LHS, <4,6,5,7>
- 2689838471U, // <3,4,6,6>: Cost 3 vext3 LHS, <4,6,6,7>
- 2657981292U, // <3,4,6,7>: Cost 3 vext2 <6,7,3,4>, <6,7,3,4>
- 1659227540U, // <3,4,6,u>: Cost 2 vext3 LHS, <4,6,u,2>
- 2666607610U, // <3,4,7,0>: Cost 3 vext2 <u,2,3,4>, <7,0,1,2>
- 3702527072U, // <3,4,7,1>: Cost 4 vext2 <1,u,3,4>, <7,1,3,5>
- 2660635824U, // <3,4,7,2>: Cost 3 vext2 <7,2,3,4>, <7,2,3,4>
- 3644139945U, // <3,4,7,3>: Cost 4 vext1 <3,3,4,7>, <3,3,4,7>
- 2666607974U, // <3,4,7,4>: Cost 3 vext2 <u,2,3,4>, <7,4,5,6>
- 2732969416U, // <3,4,7,5>: Cost 3 vext3 LHS, <4,7,5,0>
- 2732969425U, // <3,4,7,6>: Cost 3 vext3 LHS, <4,7,6,0>
- 2666608236U, // <3,4,7,7>: Cost 3 vext2 <u,2,3,4>, <7,7,7,7>
- 2664617622U, // <3,4,7,u>: Cost 3 vext2 <7,u,3,4>, <7,u,3,4>
- 1490690150U, // <3,4,u,0>: Cost 2 vext1 <2,3,4,u>, LHS
- 1551062830U, // <3,4,u,1>: Cost 2 vext2 <1,2,3,4>, LHS
- 1490691793U, // <3,4,u,2>: Cost 2 vext1 <2,3,4,u>, <2,3,4,u>
- 2624804796U, // <3,4,u,3>: Cost 3 vext2 <1,2,3,4>, <u,3,0,1>
- 1490693430U, // <3,4,u,4>: Cost 2 vext1 <2,3,4,u>, RHS
- 1551063194U, // <3,4,u,5>: Cost 2 vext2 <1,2,3,4>, RHS
- 537710121U, // <3,4,u,6>: Cost 1 vext3 LHS, RHS
- 2594297102U, // <3,4,u,7>: Cost 3 vext1 <7,3,4,u>, <7,3,4,u>
- 537710139U, // <3,4,u,u>: Cost 1 vext3 LHS, RHS
- 3692576768U, // <3,5,0,0>: Cost 4 vext2 <0,2,3,5>, <0,0,0,0>
- 2618835046U, // <3,5,0,1>: Cost 3 vext2 <0,2,3,5>, LHS
- 2618835138U, // <3,5,0,2>: Cost 3 vext2 <0,2,3,5>, <0,2,3,5>
- 3692577024U, // <3,5,0,3>: Cost 4 vext2 <0,2,3,5>, <0,3,1,4>
- 2689838690U, // <3,5,0,4>: Cost 3 vext3 LHS, <5,0,4,1>
- 2732969579U, // <3,5,0,5>: Cost 3 vext3 LHS, <5,0,5,1>
- 2732969588U, // <3,5,0,6>: Cost 3 vext3 LHS, <5,0,6,1>
- 2246963055U, // <3,5,0,7>: Cost 3 vrev <5,3,7,0>
- 2618835613U, // <3,5,0,u>: Cost 3 vext2 <0,2,3,5>, LHS
- 2594308198U, // <3,5,1,0>: Cost 3 vext1 <7,3,5,1>, LHS
- 3692577588U, // <3,5,1,1>: Cost 4 vext2 <0,2,3,5>, <1,1,1,1>
- 2624807835U, // <3,5,1,2>: Cost 3 vext2 <1,2,3,5>, <1,2,3,5>
- 2625471468U, // <3,5,1,3>: Cost 3 vext2 <1,3,3,5>, <1,3,3,5>
- 2626135101U, // <3,5,1,4>: Cost 3 vext2 <1,4,3,5>, <1,4,3,5>
- 2594311888U, // <3,5,1,5>: Cost 3 vext1 <7,3,5,1>, <5,1,7,3>
- 3699877107U, // <3,5,1,6>: Cost 4 vext2 <1,4,3,5>, <1,6,5,7>
- 1641680592U, // <3,5,1,7>: Cost 2 vext3 <5,1,7,3>, <5,1,7,3>
- 1641754329U, // <3,5,1,u>: Cost 2 vext3 <5,1,u,3>, <5,1,u,3>
- 3692578274U, // <3,5,2,0>: Cost 4 vext2 <0,2,3,5>, <2,0,5,3>
- 2630116899U, // <3,5,2,1>: Cost 3 vext2 <2,1,3,5>, <2,1,3,5>
- 3692578408U, // <3,5,2,2>: Cost 4 vext2 <0,2,3,5>, <2,2,2,2>
- 2625472206U, // <3,5,2,3>: Cost 3 vext2 <1,3,3,5>, <2,3,4,5>
- 2632107798U, // <3,5,2,4>: Cost 3 vext2 <2,4,3,5>, <2,4,3,5>
- 2715938575U, // <3,5,2,5>: Cost 3 vext3 <5,2,5,3>, <5,2,5,3>
- 3692578746U, // <3,5,2,6>: Cost 4 vext2 <0,2,3,5>, <2,6,3,7>
- 2716086049U, // <3,5,2,7>: Cost 3 vext3 <5,2,7,3>, <5,2,7,3>
- 2634762330U, // <3,5,2,u>: Cost 3 vext2 <2,u,3,5>, <2,u,3,5>
- 3692578966U, // <3,5,3,0>: Cost 4 vext2 <0,2,3,5>, <3,0,1,2>
- 2636089596U, // <3,5,3,1>: Cost 3 vext2 <3,1,3,5>, <3,1,3,5>
- 3699214668U, // <3,5,3,2>: Cost 4 vext2 <1,3,3,5>, <3,2,3,4>
- 2638080412U, // <3,5,3,3>: Cost 3 vext2 <3,4,3,5>, <3,3,3,3>
- 2618837506U, // <3,5,3,4>: Cost 3 vext2 <0,2,3,5>, <3,4,5,6>
- 2832844494U, // <3,5,3,5>: Cost 3 vuzpr <2,3,4,5>, <2,3,4,5>
- 4033415682U, // <3,5,3,6>: Cost 4 vzipr <1,1,3,3>, <3,4,5,6>
- 3095072054U, // <3,5,3,7>: Cost 3 vtrnr <1,3,1,3>, RHS
- 3095072055U, // <3,5,3,u>: Cost 3 vtrnr <1,3,1,3>, RHS
- 2600304742U, // <3,5,4,0>: Cost 3 vext1 <u,3,5,4>, LHS
- 3763580815U, // <3,5,4,1>: Cost 4 vext3 LHS, <5,4,1,5>
- 2564474582U, // <3,5,4,2>: Cost 3 vext1 <2,3,5,4>, <2,3,5,4>
- 3699879044U, // <3,5,4,3>: Cost 4 vext2 <1,4,3,5>, <4,3,5,0>
- 2600308022U, // <3,5,4,4>: Cost 3 vext1 <u,3,5,4>, RHS
- 2618838326U, // <3,5,4,5>: Cost 3 vext2 <0,2,3,5>, RHS
- 2772454710U, // <3,5,4,6>: Cost 3 vuzpl <3,4,5,6>, RHS
- 1659228102U, // <3,5,4,7>: Cost 2 vext3 LHS, <5,4,7,6>
- 1659228111U, // <3,5,4,u>: Cost 2 vext3 LHS, <5,4,u,6>
- 2570453094U, // <3,5,5,0>: Cost 3 vext1 <3,3,5,5>, LHS
- 2624810704U, // <3,5,5,1>: Cost 3 vext2 <1,2,3,5>, <5,1,7,3>
- 2570454734U, // <3,5,5,2>: Cost 3 vext1 <3,3,5,5>, <2,3,4,5>
- 2570455472U, // <3,5,5,3>: Cost 3 vext1 <3,3,5,5>, <3,3,5,5>
- 2570456374U, // <3,5,5,4>: Cost 3 vext1 <3,3,5,5>, RHS
- 1659228164U, // <3,5,5,5>: Cost 2 vext3 LHS, <5,5,5,5>
- 2732969998U, // <3,5,5,6>: Cost 3 vext3 LHS, <5,5,6,6>
- 1659228184U, // <3,5,5,7>: Cost 2 vext3 LHS, <5,5,7,7>
- 1659228193U, // <3,5,5,u>: Cost 2 vext3 LHS, <5,5,u,7>
- 2732970020U, // <3,5,6,0>: Cost 3 vext3 LHS, <5,6,0,1>
- 2732970035U, // <3,5,6,1>: Cost 3 vext3 LHS, <5,6,1,7>
- 2564490968U, // <3,5,6,2>: Cost 3 vext1 <2,3,5,6>, <2,3,5,6>
- 2732970050U, // <3,5,6,3>: Cost 3 vext3 LHS, <5,6,3,4>
- 2732970060U, // <3,5,6,4>: Cost 3 vext3 LHS, <5,6,4,5>
- 2732970071U, // <3,5,6,5>: Cost 3 vext3 LHS, <5,6,5,7>
- 2732970080U, // <3,5,6,6>: Cost 3 vext3 LHS, <5,6,6,7>
- 1659228258U, // <3,5,6,7>: Cost 2 vext3 LHS, <5,6,7,0>
- 1659228267U, // <3,5,6,u>: Cost 2 vext3 LHS, <5,6,u,0>
- 1484783718U, // <3,5,7,0>: Cost 2 vext1 <1,3,5,7>, LHS
- 1484784640U, // <3,5,7,1>: Cost 2 vext1 <1,3,5,7>, <1,3,5,7>
- 2558527080U, // <3,5,7,2>: Cost 3 vext1 <1,3,5,7>, <2,2,2,2>
- 2558527638U, // <3,5,7,3>: Cost 3 vext1 <1,3,5,7>, <3,0,1,2>
- 1484786998U, // <3,5,7,4>: Cost 2 vext1 <1,3,5,7>, RHS
- 1659228328U, // <3,5,7,5>: Cost 2 vext3 LHS, <5,7,5,7>
- 2732970154U, // <3,5,7,6>: Cost 3 vext3 LHS, <5,7,6,0>
- 2558531180U, // <3,5,7,7>: Cost 3 vext1 <1,3,5,7>, <7,7,7,7>
- 1484789550U, // <3,5,7,u>: Cost 2 vext1 <1,3,5,7>, LHS
- 1484791910U, // <3,5,u,0>: Cost 2 vext1 <1,3,5,u>, LHS
- 1484792833U, // <3,5,u,1>: Cost 2 vext1 <1,3,5,u>, <1,3,5,u>
- 2558535272U, // <3,5,u,2>: Cost 3 vext1 <1,3,5,u>, <2,2,2,2>
- 2558535830U, // <3,5,u,3>: Cost 3 vext1 <1,3,5,u>, <3,0,1,2>
- 1484795190U, // <3,5,u,4>: Cost 2 vext1 <1,3,5,u>, RHS
- 1659228409U, // <3,5,u,5>: Cost 2 vext3 LHS, <5,u,5,7>
- 2772457626U, // <3,5,u,6>: Cost 3 vuzpl <3,4,5,6>, RHS
- 1646326023U, // <3,5,u,7>: Cost 2 vext3 <5,u,7,3>, <5,u,7,3>
- 1484797742U, // <3,5,u,u>: Cost 2 vext1 <1,3,5,u>, LHS
- 2558541926U, // <3,6,0,0>: Cost 3 vext1 <1,3,6,0>, LHS
- 2689839393U, // <3,6,0,1>: Cost 3 vext3 LHS, <6,0,1,2>
- 2689839404U, // <3,6,0,2>: Cost 3 vext3 LHS, <6,0,2,4>
- 3706519808U, // <3,6,0,3>: Cost 4 vext2 <2,5,3,6>, <0,3,1,4>
- 2689839420U, // <3,6,0,4>: Cost 3 vext3 LHS, <6,0,4,2>
- 2732970314U, // <3,6,0,5>: Cost 3 vext3 LHS, <6,0,5,7>
- 2732970316U, // <3,6,0,6>: Cost 3 vext3 LHS, <6,0,6,0>
- 2960313654U, // <3,6,0,7>: Cost 3 vzipr <1,2,3,0>, RHS
- 2689839456U, // <3,6,0,u>: Cost 3 vext3 LHS, <6,0,u,2>
- 3763581290U, // <3,6,1,0>: Cost 4 vext3 LHS, <6,1,0,3>
- 3763581297U, // <3,6,1,1>: Cost 4 vext3 LHS, <6,1,1,1>
- 2624816028U, // <3,6,1,2>: Cost 3 vext2 <1,2,3,6>, <1,2,3,6>
- 3763581315U, // <3,6,1,3>: Cost 4 vext3 LHS, <6,1,3,1>
- 2626143294U, // <3,6,1,4>: Cost 3 vext2 <1,4,3,6>, <1,4,3,6>
- 3763581335U, // <3,6,1,5>: Cost 4 vext3 LHS, <6,1,5,3>
- 2721321376U, // <3,6,1,6>: Cost 3 vext3 <6,1,6,3>, <6,1,6,3>
- 2721395113U, // <3,6,1,7>: Cost 3 vext3 <6,1,7,3>, <6,1,7,3>
- 2628797826U, // <3,6,1,u>: Cost 3 vext2 <1,u,3,6>, <1,u,3,6>
- 2594390118U, // <3,6,2,0>: Cost 3 vext1 <7,3,6,2>, LHS
- 2721616324U, // <3,6,2,1>: Cost 3 vext3 <6,2,1,3>, <6,2,1,3>
- 2630788725U, // <3,6,2,2>: Cost 3 vext2 <2,2,3,6>, <2,2,3,6>
- 3763581395U, // <3,6,2,3>: Cost 4 vext3 LHS, <6,2,3,0>
- 2632115991U, // <3,6,2,4>: Cost 3 vext2 <2,4,3,6>, <2,4,3,6>
- 2632779624U, // <3,6,2,5>: Cost 3 vext2 <2,5,3,6>, <2,5,3,6>
- 2594394618U, // <3,6,2,6>: Cost 3 vext1 <7,3,6,2>, <6,2,7,3>
- 1648316922U, // <3,6,2,7>: Cost 2 vext3 <6,2,7,3>, <6,2,7,3>
- 1648390659U, // <3,6,2,u>: Cost 2 vext3 <6,2,u,3>, <6,2,u,3>
- 3693914262U, // <3,6,3,0>: Cost 4 vext2 <0,4,3,6>, <3,0,1,2>
- 3638281176U, // <3,6,3,1>: Cost 4 vext1 <2,3,6,3>, <1,3,1,3>
- 3696568678U, // <3,6,3,2>: Cost 4 vext2 <0,u,3,6>, <3,2,6,3>
- 2638088604U, // <3,6,3,3>: Cost 3 vext2 <3,4,3,6>, <3,3,3,3>
- 2632780290U, // <3,6,3,4>: Cost 3 vext2 <2,5,3,6>, <3,4,5,6>
- 3712494145U, // <3,6,3,5>: Cost 4 vext2 <3,5,3,6>, <3,5,3,6>
- 3698559612U, // <3,6,3,6>: Cost 4 vext2 <1,2,3,6>, <3,6,1,2>
- 2959674678U, // <3,6,3,7>: Cost 3 vzipr <1,1,3,3>, RHS
- 2959674679U, // <3,6,3,u>: Cost 3 vzipr <1,1,3,3>, RHS
- 3763581536U, // <3,6,4,0>: Cost 4 vext3 LHS, <6,4,0,6>
- 2722943590U, // <3,6,4,1>: Cost 3 vext3 <6,4,1,3>, <6,4,1,3>
- 2732970609U, // <3,6,4,2>: Cost 3 vext3 LHS, <6,4,2,5>
- 3698560147U, // <3,6,4,3>: Cost 4 vext2 <1,2,3,6>, <4,3,6,6>
- 2732970628U, // <3,6,4,4>: Cost 3 vext3 LHS, <6,4,4,6>
- 2689839757U, // <3,6,4,5>: Cost 3 vext3 LHS, <6,4,5,6>
- 2732970640U, // <3,6,4,6>: Cost 3 vext3 LHS, <6,4,6,0>
- 2960346422U, // <3,6,4,7>: Cost 3 vzipr <1,2,3,4>, RHS
- 2689839784U, // <3,6,4,u>: Cost 3 vext3 LHS, <6,4,u,6>
- 2576498790U, // <3,6,5,0>: Cost 3 vext1 <4,3,6,5>, LHS
- 3650241270U, // <3,6,5,1>: Cost 4 vext1 <4,3,6,5>, <1,0,3,2>
- 2732970692U, // <3,6,5,2>: Cost 3 vext3 LHS, <6,5,2,7>
- 2576501250U, // <3,6,5,3>: Cost 3 vext1 <4,3,6,5>, <3,4,5,6>
- 2576501906U, // <3,6,5,4>: Cost 3 vext1 <4,3,6,5>, <4,3,6,5>
- 3650244622U, // <3,6,5,5>: Cost 4 vext1 <4,3,6,5>, <5,5,6,6>
- 4114633528U, // <3,6,5,6>: Cost 4 vtrnl <3,4,5,6>, <6,6,6,6>
- 2732970735U, // <3,6,5,7>: Cost 3 vext3 LHS, <6,5,7,5>
- 2576504622U, // <3,6,5,u>: Cost 3 vext1 <4,3,6,5>, LHS
- 2732970749U, // <3,6,6,0>: Cost 3 vext3 LHS, <6,6,0,1>
- 2724270856U, // <3,6,6,1>: Cost 3 vext3 <6,6,1,3>, <6,6,1,3>
- 2624819706U, // <3,6,6,2>: Cost 3 vext2 <1,2,3,6>, <6,2,7,3>
- 3656223234U, // <3,6,6,3>: Cost 4 vext1 <5,3,6,6>, <3,4,5,6>
- 2732970788U, // <3,6,6,4>: Cost 3 vext3 LHS, <6,6,4,4>
- 2732970800U, // <3,6,6,5>: Cost 3 vext3 LHS, <6,6,5,7>
- 1659228984U, // <3,6,6,6>: Cost 2 vext3 LHS, <6,6,6,6>
- 1659228994U, // <3,6,6,7>: Cost 2 vext3 LHS, <6,6,7,7>
- 1659229003U, // <3,6,6,u>: Cost 2 vext3 LHS, <6,6,u,7>
- 1659229006U, // <3,6,7,0>: Cost 2 vext3 LHS, <6,7,0,1>
- 2558600201U, // <3,6,7,1>: Cost 3 vext1 <1,3,6,7>, <1,3,6,7>
- 2558601146U, // <3,6,7,2>: Cost 3 vext1 <1,3,6,7>, <2,6,3,7>
- 2725081963U, // <3,6,7,3>: Cost 3 vext3 <6,7,3,3>, <6,7,3,3>
- 1659229046U, // <3,6,7,4>: Cost 2 vext3 LHS, <6,7,4,5>
- 2715423611U, // <3,6,7,5>: Cost 3 vext3 <5,1,7,3>, <6,7,5,1>
- 2722059141U, // <3,6,7,6>: Cost 3 vext3 <6,2,7,3>, <6,7,6,2>
- 2962361654U, // <3,6,7,7>: Cost 3 vzipr <1,5,3,7>, RHS
- 1659229078U, // <3,6,7,u>: Cost 2 vext3 LHS, <6,7,u,1>
- 1659229087U, // <3,6,u,0>: Cost 2 vext3 LHS, <6,u,0,1>
- 2689840041U, // <3,6,u,1>: Cost 3 vext3 LHS, <6,u,1,2>
- 2558609339U, // <3,6,u,2>: Cost 3 vext1 <1,3,6,u>, <2,6,3,u>
- 2576525853U, // <3,6,u,3>: Cost 3 vext1 <4,3,6,u>, <3,4,u,6>
- 1659229127U, // <3,6,u,4>: Cost 2 vext3 LHS, <6,u,4,5>
- 2689840081U, // <3,6,u,5>: Cost 3 vext3 LHS, <6,u,5,6>
- 1659228984U, // <3,6,u,6>: Cost 2 vext3 LHS, <6,6,6,6>
- 1652298720U, // <3,6,u,7>: Cost 2 vext3 <6,u,7,3>, <6,u,7,3>
- 1659229159U, // <3,6,u,u>: Cost 2 vext3 LHS, <6,u,u,1>
- 2626813952U, // <3,7,0,0>: Cost 3 vext2 <1,5,3,7>, <0,0,0,0>
- 1553072230U, // <3,7,0,1>: Cost 2 vext2 <1,5,3,7>, LHS
- 2626814116U, // <3,7,0,2>: Cost 3 vext2 <1,5,3,7>, <0,2,0,2>
- 3700556028U, // <3,7,0,3>: Cost 4 vext2 <1,5,3,7>, <0,3,1,0>
- 2626814290U, // <3,7,0,4>: Cost 3 vext2 <1,5,3,7>, <0,4,1,5>
- 2582507375U, // <3,7,0,5>: Cost 3 vext1 <5,3,7,0>, <5,3,7,0>
- 2588480072U, // <3,7,0,6>: Cost 3 vext1 <6,3,7,0>, <6,3,7,0>
- 2732971055U, // <3,7,0,7>: Cost 3 vext3 LHS, <7,0,7,1>
- 1553072797U, // <3,7,0,u>: Cost 2 vext2 <1,5,3,7>, LHS
- 2626814710U, // <3,7,1,0>: Cost 3 vext2 <1,5,3,7>, <1,0,3,2>
- 2626814772U, // <3,7,1,1>: Cost 3 vext2 <1,5,3,7>, <1,1,1,1>
- 2626814870U, // <3,7,1,2>: Cost 3 vext2 <1,5,3,7>, <1,2,3,0>
- 2625487854U, // <3,7,1,3>: Cost 3 vext2 <1,3,3,7>, <1,3,3,7>
- 2582514998U, // <3,7,1,4>: Cost 3 vext1 <5,3,7,1>, RHS
- 1553073296U, // <3,7,1,5>: Cost 2 vext2 <1,5,3,7>, <1,5,3,7>
- 2627478753U, // <3,7,1,6>: Cost 3 vext2 <1,6,3,7>, <1,6,3,7>
- 2727367810U, // <3,7,1,7>: Cost 3 vext3 <7,1,7,3>, <7,1,7,3>
- 1555064195U, // <3,7,1,u>: Cost 2 vext2 <1,u,3,7>, <1,u,3,7>
- 2588491878U, // <3,7,2,0>: Cost 3 vext1 <6,3,7,2>, LHS
- 3700557318U, // <3,7,2,1>: Cost 4 vext2 <1,5,3,7>, <2,1,0,3>
- 2626815592U, // <3,7,2,2>: Cost 3 vext2 <1,5,3,7>, <2,2,2,2>
- 2626815654U, // <3,7,2,3>: Cost 3 vext2 <1,5,3,7>, <2,3,0,1>
- 2588495158U, // <3,7,2,4>: Cost 3 vext1 <6,3,7,2>, RHS
- 2632787817U, // <3,7,2,5>: Cost 3 vext2 <2,5,3,7>, <2,5,3,7>
- 1559709626U, // <3,7,2,6>: Cost 2 vext2 <2,6,3,7>, <2,6,3,7>
- 2728031443U, // <3,7,2,7>: Cost 3 vext3 <7,2,7,3>, <7,2,7,3>
- 1561036892U, // <3,7,2,u>: Cost 2 vext2 <2,u,3,7>, <2,u,3,7>
- 2626816150U, // <3,7,3,0>: Cost 3 vext2 <1,5,3,7>, <3,0,1,2>
- 2626816268U, // <3,7,3,1>: Cost 3 vext2 <1,5,3,7>, <3,1,5,3>
- 2633451878U, // <3,7,3,2>: Cost 3 vext2 <2,6,3,7>, <3,2,6,3>
- 2626816412U, // <3,7,3,3>: Cost 3 vext2 <1,5,3,7>, <3,3,3,3>
- 2626816514U, // <3,7,3,4>: Cost 3 vext2 <1,5,3,7>, <3,4,5,6>
- 2638760514U, // <3,7,3,5>: Cost 3 vext2 <3,5,3,7>, <3,5,3,7>
- 2639424147U, // <3,7,3,6>: Cost 3 vext2 <3,6,3,7>, <3,6,3,7>
- 2826961920U, // <3,7,3,7>: Cost 3 vuzpr <1,3,5,7>, <1,3,5,7>
- 2626816798U, // <3,7,3,u>: Cost 3 vext2 <1,5,3,7>, <3,u,1,2>
- 2582536294U, // <3,7,4,0>: Cost 3 vext1 <5,3,7,4>, LHS
- 2582537360U, // <3,7,4,1>: Cost 3 vext1 <5,3,7,4>, <1,5,3,7>
- 2588510138U, // <3,7,4,2>: Cost 3 vext1 <6,3,7,4>, <2,6,3,7>
- 3700558996U, // <3,7,4,3>: Cost 4 vext2 <1,5,3,7>, <4,3,6,7>
- 2582539574U, // <3,7,4,4>: Cost 3 vext1 <5,3,7,4>, RHS
- 1553075510U, // <3,7,4,5>: Cost 2 vext2 <1,5,3,7>, RHS
- 2588512844U, // <3,7,4,6>: Cost 3 vext1 <6,3,7,4>, <6,3,7,4>
- 2564625766U, // <3,7,4,7>: Cost 3 vext1 <2,3,7,4>, <7,4,5,6>
- 1553075753U, // <3,7,4,u>: Cost 2 vext2 <1,5,3,7>, RHS
- 2732971398U, // <3,7,5,0>: Cost 3 vext3 LHS, <7,5,0,2>
- 2626817744U, // <3,7,5,1>: Cost 3 vext2 <1,5,3,7>, <5,1,7,3>
- 3700559649U, // <3,7,5,2>: Cost 4 vext2 <1,5,3,7>, <5,2,7,3>
- 2626817903U, // <3,7,5,3>: Cost 3 vext2 <1,5,3,7>, <5,3,7,0>
- 2258728203U, // <3,7,5,4>: Cost 3 vrev <7,3,4,5>
- 2732971446U, // <3,7,5,5>: Cost 3 vext3 LHS, <7,5,5,5>
- 2732971457U, // <3,7,5,6>: Cost 3 vext3 LHS, <7,5,6,7>
- 2826964278U, // <3,7,5,7>: Cost 3 vuzpr <1,3,5,7>, RHS
- 2826964279U, // <3,7,5,u>: Cost 3 vuzpr <1,3,5,7>, RHS
- 2732971478U, // <3,7,6,0>: Cost 3 vext3 LHS, <7,6,0,1>
- 2732971486U, // <3,7,6,1>: Cost 3 vext3 LHS, <7,6,1,0>
- 2633454074U, // <3,7,6,2>: Cost 3 vext2 <2,6,3,7>, <6,2,7,3>
- 2633454152U, // <3,7,6,3>: Cost 3 vext2 <2,6,3,7>, <6,3,7,0>
- 2732971518U, // <3,7,6,4>: Cost 3 vext3 LHS, <7,6,4,5>
- 2732971526U, // <3,7,6,5>: Cost 3 vext3 LHS, <7,6,5,4>
- 2732971537U, // <3,7,6,6>: Cost 3 vext3 LHS, <7,6,6,6>
- 2732971540U, // <3,7,6,7>: Cost 3 vext3 LHS, <7,6,7,0>
- 2726041124U, // <3,7,6,u>: Cost 3 vext3 <6,u,7,3>, <7,6,u,7>
- 2570616934U, // <3,7,7,0>: Cost 3 vext1 <3,3,7,7>, LHS
- 2570617856U, // <3,7,7,1>: Cost 3 vext1 <3,3,7,7>, <1,3,5,7>
- 2564646635U, // <3,7,7,2>: Cost 3 vext1 <2,3,7,7>, <2,3,7,7>
- 2570619332U, // <3,7,7,3>: Cost 3 vext1 <3,3,7,7>, <3,3,7,7>
- 2570620214U, // <3,7,7,4>: Cost 3 vext1 <3,3,7,7>, RHS
- 2582564726U, // <3,7,7,5>: Cost 3 vext1 <5,3,7,7>, <5,3,7,7>
- 2588537423U, // <3,7,7,6>: Cost 3 vext1 <6,3,7,7>, <6,3,7,7>
- 1659229804U, // <3,7,7,7>: Cost 2 vext3 LHS, <7,7,7,7>
- 1659229804U, // <3,7,7,u>: Cost 2 vext3 LHS, <7,7,7,7>
- 2626819795U, // <3,7,u,0>: Cost 3 vext2 <1,5,3,7>, <u,0,1,2>
- 1553078062U, // <3,7,u,1>: Cost 2 vext2 <1,5,3,7>, LHS
- 2626819973U, // <3,7,u,2>: Cost 3 vext2 <1,5,3,7>, <u,2,3,0>
- 2826961565U, // <3,7,u,3>: Cost 3 vuzpr <1,3,5,7>, LHS
- 2626820159U, // <3,7,u,4>: Cost 3 vext2 <1,5,3,7>, <u,4,5,6>
- 1553078426U, // <3,7,u,5>: Cost 2 vext2 <1,5,3,7>, RHS
- 1595545808U, // <3,7,u,6>: Cost 2 vext2 <u,6,3,7>, <u,6,3,7>
- 1659229804U, // <3,7,u,7>: Cost 2 vext3 LHS, <7,7,7,7>
- 1553078629U, // <3,7,u,u>: Cost 2 vext2 <1,5,3,7>, LHS
- 1611448320U, // <3,u,0,0>: Cost 2 vext3 LHS, <0,0,0,0>
- 1611896531U, // <3,u,0,1>: Cost 2 vext3 LHS, <u,0,1,2>
- 1659672284U, // <3,u,0,2>: Cost 2 vext3 LHS, <u,0,2,2>
- 1616099045U, // <3,u,0,3>: Cost 2 vext3 LHS, <u,0,3,2>
- 2685638381U, // <3,u,0,4>: Cost 3 vext3 LHS, <u,0,4,1>
- 1663874806U, // <3,u,0,5>: Cost 2 vext3 LHS, <u,0,5,1>
- 1663874816U, // <3,u,0,6>: Cost 2 vext3 LHS, <u,0,6,2>
- 2960313672U, // <3,u,0,7>: Cost 3 vzipr <1,2,3,0>, RHS
- 1611896594U, // <3,u,0,u>: Cost 2 vext3 LHS, <u,0,u,2>
- 1549763324U, // <3,u,1,0>: Cost 2 vext2 <1,0,3,u>, <1,0,3,u>
- 1550426957U, // <3,u,1,1>: Cost 2 vext2 <1,1,3,u>, <1,1,3,u>
- 537712430U, // <3,u,1,2>: Cost 1 vext3 LHS, LHS
- 1616541495U, // <3,u,1,3>: Cost 2 vext3 LHS, <u,1,3,3>
- 1490930998U, // <3,u,1,4>: Cost 2 vext1 <2,3,u,1>, RHS
- 1553081489U, // <3,u,1,5>: Cost 2 vext2 <1,5,3,u>, <1,5,3,u>
- 2627486946U, // <3,u,1,6>: Cost 3 vext2 <1,6,3,u>, <1,6,3,u>
- 1659230043U, // <3,u,1,7>: Cost 2 vext3 LHS, <u,1,7,3>
- 537712484U, // <3,u,1,u>: Cost 1 vext3 LHS, LHS
- 1611890852U, // <3,u,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
- 2624833102U, // <3,u,2,1>: Cost 3 vext2 <1,2,3,u>, <2,1,u,3>
- 1557063287U, // <3,u,2,2>: Cost 2 vext2 <2,2,3,u>, <2,2,3,u>
- 1616099205U, // <3,u,2,3>: Cost 2 vext3 LHS, <u,2,3,0>
- 1611890892U, // <3,u,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
- 2689841054U, // <3,u,2,5>: Cost 3 vext3 LHS, <u,2,5,7>
- 1559717819U, // <3,u,2,6>: Cost 2 vext2 <2,6,3,u>, <2,6,3,u>
- 1659230124U, // <3,u,2,7>: Cost 2 vext3 LHS, <u,2,7,3>
- 1616541618U, // <3,u,2,u>: Cost 2 vext3 LHS, <u,2,u,0>
- 1611896764U, // <3,u,3,0>: Cost 2 vext3 LHS, <u,3,0,1>
- 1484973079U, // <3,u,3,1>: Cost 2 vext1 <1,3,u,3>, <1,3,u,3>
- 2685638607U, // <3,u,3,2>: Cost 3 vext3 LHS, <u,3,2,2>
- 336380006U, // <3,u,3,3>: Cost 1 vdup3 LHS
- 1611896804U, // <3,u,3,4>: Cost 2 vext3 LHS, <u,3,4,5>
- 1616541679U, // <3,u,3,5>: Cost 2 vext3 LHS, <u,3,5,7>
- 2690283512U, // <3,u,3,6>: Cost 3 vext3 LHS, <u,3,6,7>
- 2959674696U, // <3,u,3,7>: Cost 3 vzipr <1,1,3,3>, RHS
- 336380006U, // <3,u,3,u>: Cost 1 vdup3 LHS
- 2558722150U, // <3,u,4,0>: Cost 3 vext1 <1,3,u,4>, LHS
- 1659672602U, // <3,u,4,1>: Cost 2 vext3 LHS, <u,4,1,5>
- 1659672612U, // <3,u,4,2>: Cost 2 vext3 LHS, <u,4,2,6>
- 2689841196U, // <3,u,4,3>: Cost 3 vext3 LHS, <u,4,3,5>
- 1659227344U, // <3,u,4,4>: Cost 2 vext3 LHS, <4,4,4,4>
- 1611896895U, // <3,u,4,5>: Cost 2 vext3 LHS, <u,4,5,6>
- 1663875144U, // <3,u,4,6>: Cost 2 vext3 LHS, <u,4,6,6>
- 1659230289U, // <3,u,4,7>: Cost 2 vext3 LHS, <u,4,7,6>
- 1611896922U, // <3,u,4,u>: Cost 2 vext3 LHS, <u,4,u,6>
- 1490960486U, // <3,u,5,0>: Cost 2 vext1 <2,3,u,5>, LHS
- 2689841261U, // <3,u,5,1>: Cost 3 vext3 LHS, <u,5,1,7>
- 1490962162U, // <3,u,5,2>: Cost 2 vext1 <2,3,u,5>, <2,3,u,5>
- 1616541823U, // <3,u,5,3>: Cost 2 vext3 LHS, <u,5,3,7>
- 1490963766U, // <3,u,5,4>: Cost 2 vext1 <2,3,u,5>, RHS
- 1659228164U, // <3,u,5,5>: Cost 2 vext3 LHS, <5,5,5,5>
- 537712794U, // <3,u,5,6>: Cost 1 vext3 LHS, RHS
- 1659230371U, // <3,u,5,7>: Cost 2 vext3 LHS, <u,5,7,7>
- 537712812U, // <3,u,5,u>: Cost 1 vext3 LHS, RHS
- 2689841327U, // <3,u,6,0>: Cost 3 vext3 LHS, <u,6,0,1>
- 2558739482U, // <3,u,6,1>: Cost 3 vext1 <1,3,u,6>, <1,3,u,6>
- 2689841351U, // <3,u,6,2>: Cost 3 vext3 LHS, <u,6,2,7>
- 1616099536U, // <3,u,6,3>: Cost 2 vext3 LHS, <u,6,3,7>
- 1659227508U, // <3,u,6,4>: Cost 2 vext3 LHS, <4,6,4,6>
- 2690283746U, // <3,u,6,5>: Cost 3 vext3 LHS, <u,6,5,7>
- 1659228984U, // <3,u,6,6>: Cost 2 vext3 LHS, <6,6,6,6>
- 1659230445U, // <3,u,6,7>: Cost 2 vext3 LHS, <u,6,7,0>
- 1616099581U, // <3,u,6,u>: Cost 2 vext3 LHS, <u,6,u,7>
- 1485004902U, // <3,u,7,0>: Cost 2 vext1 <1,3,u,7>, LHS
- 1485005851U, // <3,u,7,1>: Cost 2 vext1 <1,3,u,7>, <1,3,u,7>
- 2558748264U, // <3,u,7,2>: Cost 3 vext1 <1,3,u,7>, <2,2,2,2>
- 3095397021U, // <3,u,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS
- 1485008182U, // <3,u,7,4>: Cost 2 vext1 <1,3,u,7>, RHS
- 1659228328U, // <3,u,7,5>: Cost 2 vext3 LHS, <5,7,5,7>
- 2722060599U, // <3,u,7,6>: Cost 3 vext3 <6,2,7,3>, <u,7,6,2>
- 1659229804U, // <3,u,7,7>: Cost 2 vext3 LHS, <7,7,7,7>
- 1485010734U, // <3,u,7,u>: Cost 2 vext1 <1,3,u,7>, LHS
- 1616099665U, // <3,u,u,0>: Cost 2 vext3 LHS, <u,u,0,1>
- 1611897179U, // <3,u,u,1>: Cost 2 vext3 LHS, <u,u,1,2>
- 537712997U, // <3,u,u,2>: Cost 1 vext3 LHS, LHS
- 336380006U, // <3,u,u,3>: Cost 1 vdup3 LHS
- 1616099705U, // <3,u,u,4>: Cost 2 vext3 LHS, <u,u,4,5>
- 1611897219U, // <3,u,u,5>: Cost 2 vext3 LHS, <u,u,5,6>
- 537713037U, // <3,u,u,6>: Cost 1 vext3 LHS, RHS
- 1659230607U, // <3,u,u,7>: Cost 2 vext3 LHS, <u,u,7,0>
- 537713051U, // <3,u,u,u>: Cost 1 vext3 LHS, LHS
- 2691907584U, // <4,0,0,0>: Cost 3 vext3 <1,2,3,4>, <0,0,0,0>
- 2691907594U, // <4,0,0,1>: Cost 3 vext3 <1,2,3,4>, <0,0,1,1>
- 2691907604U, // <4,0,0,2>: Cost 3 vext3 <1,2,3,4>, <0,0,2,2>
- 3709862144U, // <4,0,0,3>: Cost 4 vext2 <3,1,4,0>, <0,3,1,4>
- 2684682280U, // <4,0,0,4>: Cost 3 vext3 <0,0,4,4>, <0,0,4,4>
- 3694600633U, // <4,0,0,5>: Cost 4 vext2 <0,5,4,0>, <0,5,4,0>
- 3291431290U, // <4,0,0,6>: Cost 4 vrev <0,4,6,0>
- 3668342067U, // <4,0,0,7>: Cost 4 vext1 <7,4,0,0>, <7,4,0,0>
- 2691907657U, // <4,0,0,u>: Cost 3 vext3 <1,2,3,4>, <0,0,u,1>
- 2570715238U, // <4,0,1,0>: Cost 3 vext1 <3,4,0,1>, LHS
- 2570716058U, // <4,0,1,1>: Cost 3 vext1 <3,4,0,1>, <1,2,3,4>
- 1618165862U, // <4,0,1,2>: Cost 2 vext3 <1,2,3,4>, LHS
- 2570717648U, // <4,0,1,3>: Cost 3 vext1 <3,4,0,1>, <3,4,0,1>
- 2570718518U, // <4,0,1,4>: Cost 3 vext1 <3,4,0,1>, RHS
- 2594607206U, // <4,0,1,5>: Cost 3 vext1 <7,4,0,1>, <5,6,7,4>
- 3662377563U, // <4,0,1,6>: Cost 4 vext1 <6,4,0,1>, <6,4,0,1>
- 2594608436U, // <4,0,1,7>: Cost 3 vext1 <7,4,0,1>, <7,4,0,1>
- 1618165916U, // <4,0,1,u>: Cost 2 vext3 <1,2,3,4>, LHS
- 2685714598U, // <4,0,2,0>: Cost 3 vext3 <0,2,0,4>, <0,2,0,4>
- 3759530159U, // <4,0,2,1>: Cost 4 vext3 <0,2,1,4>, <0,2,1,4>
- 2685862072U, // <4,0,2,2>: Cost 3 vext3 <0,2,2,4>, <0,2,2,4>
- 2631476937U, // <4,0,2,3>: Cost 3 vext2 <2,3,4,0>, <2,3,4,0>
- 2685714636U, // <4,0,2,4>: Cost 3 vext3 <0,2,0,4>, <0,2,4,6>
- 3765649622U, // <4,0,2,5>: Cost 4 vext3 <1,2,3,4>, <0,2,5,7>
- 2686157020U, // <4,0,2,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4>
- 3668358453U, // <4,0,2,7>: Cost 4 vext1 <7,4,0,2>, <7,4,0,2>
- 2686304494U, // <4,0,2,u>: Cost 3 vext3 <0,2,u,4>, <0,2,u,4>
- 3632529510U, // <4,0,3,0>: Cost 4 vext1 <1,4,0,3>, LHS
- 2686451968U, // <4,0,3,1>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4>
- 2686525705U, // <4,0,3,2>: Cost 3 vext3 <0,3,2,4>, <0,3,2,4>
- 3760341266U, // <4,0,3,3>: Cost 4 vext3 <0,3,3,4>, <0,3,3,4>
- 3632532790U, // <4,0,3,4>: Cost 4 vext1 <1,4,0,3>, RHS
- 3913254606U, // <4,0,3,5>: Cost 4 vuzpr <3,4,5,0>, <2,3,4,5>
- 3705219740U, // <4,0,3,6>: Cost 4 vext2 <2,3,4,0>, <3,6,4,7>
- 3713845990U, // <4,0,3,7>: Cost 4 vext2 <3,7,4,0>, <3,7,4,0>
- 2686451968U, // <4,0,3,u>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4>
- 2552823910U, // <4,0,4,0>: Cost 3 vext1 <0,4,0,4>, LHS
- 2691907922U, // <4,0,4,1>: Cost 3 vext3 <1,2,3,4>, <0,4,1,5>
- 2691907932U, // <4,0,4,2>: Cost 3 vext3 <1,2,3,4>, <0,4,2,6>
- 3626567830U, // <4,0,4,3>: Cost 4 vext1 <0,4,0,4>, <3,0,1,2>
- 2552827190U, // <4,0,4,4>: Cost 3 vext1 <0,4,0,4>, RHS
- 2631478582U, // <4,0,4,5>: Cost 3 vext2 <2,3,4,0>, RHS
- 3626570017U, // <4,0,4,6>: Cost 4 vext1 <0,4,0,4>, <6,0,1,2>
- 3668374839U, // <4,0,4,7>: Cost 4 vext1 <7,4,0,4>, <7,4,0,4>
- 2552829742U, // <4,0,4,u>: Cost 3 vext1 <0,4,0,4>, LHS
- 2558804070U, // <4,0,5,0>: Cost 3 vext1 <1,4,0,5>, LHS
- 1839644774U, // <4,0,5,1>: Cost 2 vzipl RHS, LHS
- 2913386660U, // <4,0,5,2>: Cost 3 vzipl RHS, <0,2,0,2>
- 2570750420U, // <4,0,5,3>: Cost 3 vext1 <3,4,0,5>, <3,4,0,5>
- 2558807350U, // <4,0,5,4>: Cost 3 vext1 <1,4,0,5>, RHS
- 3987128750U, // <4,0,5,5>: Cost 4 vzipl RHS, <0,5,2,7>
- 3987128822U, // <4,0,5,6>: Cost 4 vzipl RHS, <0,6,1,7>
- 2594641208U, // <4,0,5,7>: Cost 3 vext1 <7,4,0,5>, <7,4,0,5>
- 1839645341U, // <4,0,5,u>: Cost 2 vzipl RHS, LHS
- 2552840294U, // <4,0,6,0>: Cost 3 vext1 <0,4,0,6>, LHS
- 3047604234U, // <4,0,6,1>: Cost 3 vtrnl RHS, <0,0,1,1>
- 1973862502U, // <4,0,6,2>: Cost 2 vtrnl RHS, LHS
- 2570758613U, // <4,0,6,3>: Cost 3 vext1 <3,4,0,6>, <3,4,0,6>
- 2552843574U, // <4,0,6,4>: Cost 3 vext1 <0,4,0,6>, RHS
- 2217664887U, // <4,0,6,5>: Cost 3 vrev <0,4,5,6>
- 3662418528U, // <4,0,6,6>: Cost 4 vext1 <6,4,0,6>, <6,4,0,6>
- 2658022257U, // <4,0,6,7>: Cost 3 vext2 <6,7,4,0>, <6,7,4,0>
- 1973862556U, // <4,0,6,u>: Cost 2 vtrnl RHS, LHS
- 3731764218U, // <4,0,7,0>: Cost 4 vext2 <6,7,4,0>, <7,0,1,2>
- 3988324454U, // <4,0,7,1>: Cost 4 vzipl <4,7,5,0>, LHS
- 4122034278U, // <4,0,7,2>: Cost 4 vtrnl <4,6,7,1>, LHS
- 3735082246U, // <4,0,7,3>: Cost 4 vext2 <7,3,4,0>, <7,3,4,0>
- 3731764536U, // <4,0,7,4>: Cost 4 vext2 <6,7,4,0>, <7,4,0,5>
- 3937145718U, // <4,0,7,5>: Cost 4 vuzpr <7,4,5,0>, <6,7,4,5>
- 3737073145U, // <4,0,7,6>: Cost 4 vext2 <7,6,4,0>, <7,6,4,0>
- 3731764844U, // <4,0,7,7>: Cost 4 vext2 <6,7,4,0>, <7,7,7,7>
- 4122034332U, // <4,0,7,u>: Cost 4 vtrnl <4,6,7,1>, LHS
- 2552856678U, // <4,0,u,0>: Cost 3 vext1 <0,4,0,u>, LHS
- 1841635430U, // <4,0,u,1>: Cost 2 vzipl RHS, LHS
- 1618166429U, // <4,0,u,2>: Cost 2 vext3 <1,2,3,4>, LHS
- 2570774999U, // <4,0,u,3>: Cost 3 vext1 <3,4,0,u>, <3,4,0,u>
- 2552859958U, // <4,0,u,4>: Cost 3 vext1 <0,4,0,u>, RHS
- 2631481498U, // <4,0,u,5>: Cost 3 vext2 <2,3,4,0>, RHS
- 2686157020U, // <4,0,u,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4>
- 2594665787U, // <4,0,u,7>: Cost 3 vext1 <7,4,0,u>, <7,4,0,u>
- 1618166483U, // <4,0,u,u>: Cost 2 vext3 <1,2,3,4>, LHS
- 2617548837U, // <4,1,0,0>: Cost 3 vext2 <0,0,4,1>, <0,0,4,1>
- 2622857318U, // <4,1,0,1>: Cost 3 vext2 <0,u,4,1>, LHS
- 3693281484U, // <4,1,0,2>: Cost 4 vext2 <0,3,4,1>, <0,2,4,6>
- 2691908342U, // <4,1,0,3>: Cost 3 vext3 <1,2,3,4>, <1,0,3,2>
- 2622857554U, // <4,1,0,4>: Cost 3 vext2 <0,u,4,1>, <0,4,1,5>
- 3764470538U, // <4,1,0,5>: Cost 4 vext3 <1,0,5,4>, <1,0,5,4>
- 3695272459U, // <4,1,0,6>: Cost 4 vext2 <0,6,4,1>, <0,6,4,1>
- 3733094980U, // <4,1,0,7>: Cost 4 vext2 <7,0,4,1>, <0,7,1,4>
- 2622857885U, // <4,1,0,u>: Cost 3 vext2 <0,u,4,1>, LHS
- 3696599798U, // <4,1,1,0>: Cost 4 vext2 <0,u,4,1>, <1,0,3,2>
- 2691097399U, // <4,1,1,1>: Cost 3 vext3 <1,1,1,4>, <1,1,1,4>
- 2631484314U, // <4,1,1,2>: Cost 3 vext2 <2,3,4,1>, <1,2,3,4>
- 2691908424U, // <4,1,1,3>: Cost 3 vext3 <1,2,3,4>, <1,1,3,3>
- 3696600125U, // <4,1,1,4>: Cost 4 vext2 <0,u,4,1>, <1,4,3,5>
- 3696600175U, // <4,1,1,5>: Cost 4 vext2 <0,u,4,1>, <1,5,0,1>
- 3696600307U, // <4,1,1,6>: Cost 4 vext2 <0,u,4,1>, <1,6,5,7>
- 3668423997U, // <4,1,1,7>: Cost 4 vext1 <7,4,1,1>, <7,4,1,1>
- 2691908469U, // <4,1,1,u>: Cost 3 vext3 <1,2,3,4>, <1,1,u,3>
- 2570797158U, // <4,1,2,0>: Cost 3 vext1 <3,4,1,2>, LHS
- 2570797978U, // <4,1,2,1>: Cost 3 vext1 <3,4,1,2>, <1,2,3,4>
- 3696600680U, // <4,1,2,2>: Cost 4 vext2 <0,u,4,1>, <2,2,2,2>
- 1618166682U, // <4,1,2,3>: Cost 2 vext3 <1,2,3,4>, <1,2,3,4>
- 2570800438U, // <4,1,2,4>: Cost 3 vext1 <3,4,1,2>, RHS
- 3765650347U, // <4,1,2,5>: Cost 4 vext3 <1,2,3,4>, <1,2,5,3>
- 3696601018U, // <4,1,2,6>: Cost 4 vext2 <0,u,4,1>, <2,6,3,7>
- 3668432190U, // <4,1,2,7>: Cost 4 vext1 <7,4,1,2>, <7,4,1,2>
- 1618535367U, // <4,1,2,u>: Cost 2 vext3 <1,2,u,4>, <1,2,u,4>
- 2564833382U, // <4,1,3,0>: Cost 3 vext1 <2,4,1,3>, LHS
- 2691908568U, // <4,1,3,1>: Cost 3 vext3 <1,2,3,4>, <1,3,1,3>
- 2691908578U, // <4,1,3,2>: Cost 3 vext3 <1,2,3,4>, <1,3,2,4>
- 2692572139U, // <4,1,3,3>: Cost 3 vext3 <1,3,3,4>, <1,3,3,4>
- 2564836662U, // <4,1,3,4>: Cost 3 vext1 <2,4,1,3>, RHS
- 2691908608U, // <4,1,3,5>: Cost 3 vext3 <1,2,3,4>, <1,3,5,7>
- 2588725862U, // <4,1,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
- 3662468090U, // <4,1,3,7>: Cost 4 vext1 <6,4,1,3>, <7,0,1,2>
- 2691908631U, // <4,1,3,u>: Cost 3 vext3 <1,2,3,4>, <1,3,u,3>
- 3760194590U, // <4,1,4,0>: Cost 4 vext3 <0,3,1,4>, <1,4,0,1>
- 3693947874U, // <4,1,4,1>: Cost 4 vext2 <0,4,4,1>, <4,1,5,0>
- 3765650484U, // <4,1,4,2>: Cost 4 vext3 <1,2,3,4>, <1,4,2,5>
- 3113877606U, // <4,1,4,3>: Cost 3 vtrnr <4,4,4,4>, LHS
- 3760194630U, // <4,1,4,4>: Cost 4 vext3 <0,3,1,4>, <1,4,4,5>
- 2622860598U, // <4,1,4,5>: Cost 3 vext2 <0,u,4,1>, RHS
- 3297436759U, // <4,1,4,6>: Cost 4 vrev <1,4,6,4>
- 3800007772U, // <4,1,4,7>: Cost 4 vext3 <7,0,1,4>, <1,4,7,0>
- 2622860841U, // <4,1,4,u>: Cost 3 vext2 <0,u,4,1>, RHS
- 1479164006U, // <4,1,5,0>: Cost 2 vext1 <0,4,1,5>, LHS
- 2552906486U, // <4,1,5,1>: Cost 3 vext1 <0,4,1,5>, <1,0,3,2>
- 2552907299U, // <4,1,5,2>: Cost 3 vext1 <0,4,1,5>, <2,1,3,5>
- 2552907926U, // <4,1,5,3>: Cost 3 vext1 <0,4,1,5>, <3,0,1,2>
- 1479167286U, // <4,1,5,4>: Cost 2 vext1 <0,4,1,5>, RHS
- 2913387664U, // <4,1,5,5>: Cost 3 vzipl RHS, <1,5,3,7>
- 2600686074U, // <4,1,5,6>: Cost 3 vext1 <u,4,1,5>, <6,2,7,3>
- 2600686586U, // <4,1,5,7>: Cost 3 vext1 <u,4,1,5>, <7,0,1,2>
- 1479169838U, // <4,1,5,u>: Cost 2 vext1 <0,4,1,5>, LHS
- 2552914022U, // <4,1,6,0>: Cost 3 vext1 <0,4,1,6>, LHS
- 2558886708U, // <4,1,6,1>: Cost 3 vext1 <1,4,1,6>, <1,1,1,1>
- 4028205206U, // <4,1,6,2>: Cost 4 vzipr <0,2,4,6>, <3,0,1,2>
- 3089858662U, // <4,1,6,3>: Cost 3 vtrnr <0,4,2,6>, LHS
- 2552917302U, // <4,1,6,4>: Cost 3 vext1 <0,4,1,6>, RHS
- 2223637584U, // <4,1,6,5>: Cost 3 vrev <1,4,5,6>
- 4121347081U, // <4,1,6,6>: Cost 4 vtrnl RHS, <1,3,6,7>
- 3721155406U, // <4,1,6,7>: Cost 4 vext2 <5,0,4,1>, <6,7,0,1>
- 2552919854U, // <4,1,6,u>: Cost 3 vext1 <0,4,1,6>, LHS
- 2659357716U, // <4,1,7,0>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1>
- 3733763173U, // <4,1,7,1>: Cost 4 vext2 <7,1,4,1>, <7,1,4,1>
- 3734426806U, // <4,1,7,2>: Cost 4 vext2 <7,2,4,1>, <7,2,4,1>
- 2695226671U, // <4,1,7,3>: Cost 3 vext3 <1,7,3,4>, <1,7,3,4>
- 3721155942U, // <4,1,7,4>: Cost 4 vext2 <5,0,4,1>, <7,4,5,6>
- 3721155976U, // <4,1,7,5>: Cost 4 vext2 <5,0,4,1>, <7,5,0,4>
- 3662500458U, // <4,1,7,6>: Cost 4 vext1 <6,4,1,7>, <6,4,1,7>
- 3721156204U, // <4,1,7,7>: Cost 4 vext2 <5,0,4,1>, <7,7,7,7>
- 2659357716U, // <4,1,7,u>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1>
- 1479188582U, // <4,1,u,0>: Cost 2 vext1 <0,4,1,u>, LHS
- 2552931062U, // <4,1,u,1>: Cost 3 vext1 <0,4,1,u>, <1,0,3,2>
- 2552931944U, // <4,1,u,2>: Cost 3 vext1 <0,4,1,u>, <2,2,2,2>
- 1622148480U, // <4,1,u,3>: Cost 2 vext3 <1,u,3,4>, <1,u,3,4>
- 1479191862U, // <4,1,u,4>: Cost 2 vext1 <0,4,1,u>, RHS
- 2622863514U, // <4,1,u,5>: Cost 3 vext2 <0,u,4,1>, RHS
- 2588725862U, // <4,1,u,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
- 2600686586U, // <4,1,u,7>: Cost 3 vext1 <u,4,1,5>, <7,0,1,2>
- 1479194414U, // <4,1,u,u>: Cost 2 vext1 <0,4,1,u>, LHS
- 2617557030U, // <4,2,0,0>: Cost 3 vext2 <0,0,4,2>, <0,0,4,2>
- 2622865510U, // <4,2,0,1>: Cost 3 vext2 <0,u,4,2>, LHS
- 2622865612U, // <4,2,0,2>: Cost 3 vext2 <0,u,4,2>, <0,2,4,6>
- 3693289753U, // <4,2,0,3>: Cost 4 vext2 <0,3,4,2>, <0,3,4,2>
- 2635473244U, // <4,2,0,4>: Cost 3 vext2 <3,0,4,2>, <0,4,2,6>
- 3765650918U, // <4,2,0,5>: Cost 4 vext3 <1,2,3,4>, <2,0,5,7>
- 2696775148U, // <4,2,0,6>: Cost 3 vext3 <2,0,6,4>, <2,0,6,4>
- 3695944285U, // <4,2,0,7>: Cost 4 vext2 <0,7,4,2>, <0,7,4,2>
- 2622866077U, // <4,2,0,u>: Cost 3 vext2 <0,u,4,2>, LHS
- 3696607990U, // <4,2,1,0>: Cost 4 vext2 <0,u,4,2>, <1,0,3,2>
- 3696608052U, // <4,2,1,1>: Cost 4 vext2 <0,u,4,2>, <1,1,1,1>
- 3696608150U, // <4,2,1,2>: Cost 4 vext2 <0,u,4,2>, <1,2,3,0>
- 3895574630U, // <4,2,1,3>: Cost 4 vuzpr <0,4,u,2>, LHS
- 2691909162U, // <4,2,1,4>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3>
- 3696608400U, // <4,2,1,5>: Cost 4 vext2 <0,u,4,2>, <1,5,3,7>
- 3760784956U, // <4,2,1,6>: Cost 4 vext3 <0,4,0,4>, <2,1,6,3>
- 3773908549U, // <4,2,1,7>: Cost 5 vext3 <2,5,7,4>, <2,1,7,3>
- 2691909162U, // <4,2,1,u>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3>
- 3696608748U, // <4,2,2,0>: Cost 4 vext2 <0,u,4,2>, <2,0,6,4>
- 3696608828U, // <4,2,2,1>: Cost 4 vext2 <0,u,4,2>, <2,1,6,3>
- 2691909224U, // <4,2,2,2>: Cost 3 vext3 <1,2,3,4>, <2,2,2,2>
- 2691909234U, // <4,2,2,3>: Cost 3 vext3 <1,2,3,4>, <2,2,3,3>
- 3759605368U, // <4,2,2,4>: Cost 4 vext3 <0,2,2,4>, <2,2,4,0>
- 3696609156U, // <4,2,2,5>: Cost 4 vext2 <0,u,4,2>, <2,5,6,7>
- 3760785040U, // <4,2,2,6>: Cost 4 vext3 <0,4,0,4>, <2,2,6,6>
- 3668505927U, // <4,2,2,7>: Cost 4 vext1 <7,4,2,2>, <7,4,2,2>
- 2691909279U, // <4,2,2,u>: Cost 3 vext3 <1,2,3,4>, <2,2,u,3>
- 2691909286U, // <4,2,3,0>: Cost 3 vext3 <1,2,3,4>, <2,3,0,1>
- 3764840111U, // <4,2,3,1>: Cost 4 vext3 <1,1,1,4>, <2,3,1,1>
- 3765651129U, // <4,2,3,2>: Cost 4 vext3 <1,2,3,4>, <2,3,2,2>
- 2698544836U, // <4,2,3,3>: Cost 3 vext3 <2,3,3,4>, <2,3,3,4>
- 2685863630U, // <4,2,3,4>: Cost 3 vext3 <0,2,2,4>, <2,3,4,5>
- 2698692310U, // <4,2,3,5>: Cost 3 vext3 <2,3,5,4>, <2,3,5,4>
- 3772507871U, // <4,2,3,6>: Cost 4 vext3 <2,3,6,4>, <2,3,6,4>
- 2698839784U, // <4,2,3,7>: Cost 3 vext3 <2,3,7,4>, <2,3,7,4>
- 2691909358U, // <4,2,3,u>: Cost 3 vext3 <1,2,3,4>, <2,3,u,1>
- 2564915302U, // <4,2,4,0>: Cost 3 vext1 <2,4,2,4>, LHS
- 2564916122U, // <4,2,4,1>: Cost 3 vext1 <2,4,2,4>, <1,2,3,4>
- 2564917004U, // <4,2,4,2>: Cost 3 vext1 <2,4,2,4>, <2,4,2,4>
- 2699208469U, // <4,2,4,3>: Cost 3 vext3 <2,4,3,4>, <2,4,3,4>
- 2564918582U, // <4,2,4,4>: Cost 3 vext1 <2,4,2,4>, RHS
- 2622868790U, // <4,2,4,5>: Cost 3 vext2 <0,u,4,2>, RHS
- 2229667632U, // <4,2,4,6>: Cost 3 vrev <2,4,6,4>
- 3800082229U, // <4,2,4,7>: Cost 4 vext3 <7,0,2,4>, <2,4,7,0>
- 2622869033U, // <4,2,4,u>: Cost 3 vext2 <0,u,4,2>, RHS
- 2552979558U, // <4,2,5,0>: Cost 3 vext1 <0,4,2,5>, LHS
- 2558952342U, // <4,2,5,1>: Cost 3 vext1 <1,4,2,5>, <1,2,3,0>
- 2564925032U, // <4,2,5,2>: Cost 3 vext1 <2,4,2,5>, <2,2,2,2>
- 2967060582U, // <4,2,5,3>: Cost 3 vzipr <2,3,4,5>, LHS
- 2552982838U, // <4,2,5,4>: Cost 3 vext1 <0,4,2,5>, RHS
- 3987130190U, // <4,2,5,5>: Cost 4 vzipl RHS, <2,5,0,7>
- 2913388474U, // <4,2,5,6>: Cost 3 vzipl RHS, <2,6,3,7>
- 3895577910U, // <4,2,5,7>: Cost 4 vuzpr <0,4,u,2>, RHS
- 2552985390U, // <4,2,5,u>: Cost 3 vext1 <0,4,2,5>, LHS
- 1479245926U, // <4,2,6,0>: Cost 2 vext1 <0,4,2,6>, LHS
- 2552988406U, // <4,2,6,1>: Cost 3 vext1 <0,4,2,6>, <1,0,3,2>
- 2552989288U, // <4,2,6,2>: Cost 3 vext1 <0,4,2,6>, <2,2,2,2>
- 2954461286U, // <4,2,6,3>: Cost 3 vzipr <0,2,4,6>, LHS
- 1479249206U, // <4,2,6,4>: Cost 2 vext1 <0,4,2,6>, RHS
- 2229610281U, // <4,2,6,5>: Cost 3 vrev <2,4,5,6>
- 2600767994U, // <4,2,6,6>: Cost 3 vext1 <u,4,2,6>, <6,2,7,3>
- 2600768506U, // <4,2,6,7>: Cost 3 vext1 <u,4,2,6>, <7,0,1,2>
- 1479251758U, // <4,2,6,u>: Cost 2 vext1 <0,4,2,6>, LHS
- 2659365909U, // <4,2,7,0>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2>
- 3733771366U, // <4,2,7,1>: Cost 4 vext2 <7,1,4,2>, <7,1,4,2>
- 3734434999U, // <4,2,7,2>: Cost 4 vext2 <7,2,4,2>, <7,2,4,2>
- 2701199368U, // <4,2,7,3>: Cost 3 vext3 <2,7,3,4>, <2,7,3,4>
- 4175774618U, // <4,2,7,4>: Cost 4 vtrnr <2,4,5,7>, <1,2,3,4>
- 3303360298U, // <4,2,7,5>: Cost 4 vrev <2,4,5,7>
- 3727136217U, // <4,2,7,6>: Cost 4 vext2 <6,0,4,2>, <7,6,0,4>
- 3727136364U, // <4,2,7,7>: Cost 4 vext2 <6,0,4,2>, <7,7,7,7>
- 2659365909U, // <4,2,7,u>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2>
- 1479262310U, // <4,2,u,0>: Cost 2 vext1 <0,4,2,u>, LHS
- 2553004790U, // <4,2,u,1>: Cost 3 vext1 <0,4,2,u>, <1,0,3,2>
- 2553005672U, // <4,2,u,2>: Cost 3 vext1 <0,4,2,u>, <2,2,2,2>
- 2954477670U, // <4,2,u,3>: Cost 3 vzipr <0,2,4,u>, LHS
- 1479265590U, // <4,2,u,4>: Cost 2 vext1 <0,4,2,u>, RHS
- 2622871706U, // <4,2,u,5>: Cost 3 vext2 <0,u,4,2>, RHS
- 2229700404U, // <4,2,u,6>: Cost 3 vrev <2,4,6,u>
- 2600784890U, // <4,2,u,7>: Cost 3 vext1 <u,4,2,u>, <7,0,1,2>
- 1479268142U, // <4,2,u,u>: Cost 2 vext1 <0,4,2,u>, LHS
- 3765651595U, // <4,3,0,0>: Cost 4 vext3 <1,2,3,4>, <3,0,0,0>
- 2691909782U, // <4,3,0,1>: Cost 3 vext3 <1,2,3,4>, <3,0,1,2>
- 2702452897U, // <4,3,0,2>: Cost 3 vext3 <3,0,2,4>, <3,0,2,4>
- 3693297946U, // <4,3,0,3>: Cost 4 vext2 <0,3,4,3>, <0,3,4,3>
- 3760711856U, // <4,3,0,4>: Cost 4 vext3 <0,3,u,4>, <3,0,4,1>
- 2235533820U, // <4,3,0,5>: Cost 3 vrev <3,4,5,0>
- 3309349381U, // <4,3,0,6>: Cost 4 vrev <3,4,6,0>
- 3668563278U, // <4,3,0,7>: Cost 4 vext1 <7,4,3,0>, <7,4,3,0>
- 2691909845U, // <4,3,0,u>: Cost 3 vext3 <1,2,3,4>, <3,0,u,2>
- 2235173328U, // <4,3,1,0>: Cost 3 vrev <3,4,0,1>
- 3764840678U, // <4,3,1,1>: Cost 4 vext3 <1,1,1,4>, <3,1,1,1>
- 2630173594U, // <4,3,1,2>: Cost 3 vext2 <2,1,4,3>, <1,2,3,4>
- 2703190267U, // <4,3,1,3>: Cost 3 vext3 <3,1,3,4>, <3,1,3,4>
- 3760195840U, // <4,3,1,4>: Cost 4 vext3 <0,3,1,4>, <3,1,4,0>
- 3765651724U, // <4,3,1,5>: Cost 4 vext3 <1,2,3,4>, <3,1,5,3>
- 3309357574U, // <4,3,1,6>: Cost 4 vrev <3,4,6,1>
- 3769633054U, // <4,3,1,7>: Cost 4 vext3 <1,u,3,4>, <3,1,7,3>
- 2703558952U, // <4,3,1,u>: Cost 3 vext3 <3,1,u,4>, <3,1,u,4>
- 3626770534U, // <4,3,2,0>: Cost 4 vext1 <0,4,3,2>, LHS
- 2630174250U, // <4,3,2,1>: Cost 3 vext2 <2,1,4,3>, <2,1,4,3>
- 3765651777U, // <4,3,2,2>: Cost 4 vext3 <1,2,3,4>, <3,2,2,2>
- 2703853900U, // <4,3,2,3>: Cost 3 vext3 <3,2,3,4>, <3,2,3,4>
- 3626773814U, // <4,3,2,4>: Cost 4 vext1 <0,4,3,2>, RHS
- 2704001374U, // <4,3,2,5>: Cost 3 vext3 <3,2,5,4>, <3,2,5,4>
- 3765651814U, // <4,3,2,6>: Cost 4 vext3 <1,2,3,4>, <3,2,6,3>
- 3769633135U, // <4,3,2,7>: Cost 4 vext3 <1,u,3,4>, <3,2,7,3>
- 2634819681U, // <4,3,2,u>: Cost 3 vext2 <2,u,4,3>, <2,u,4,3>
- 3765651839U, // <4,3,3,0>: Cost 4 vext3 <1,2,3,4>, <3,3,0,1>
- 3765651848U, // <4,3,3,1>: Cost 4 vext3 <1,2,3,4>, <3,3,1,1>
- 3710552404U, // <4,3,3,2>: Cost 4 vext2 <3,2,4,3>, <3,2,4,3>
- 2691910044U, // <4,3,3,3>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3>
- 2704591270U, // <4,3,3,4>: Cost 3 vext3 <3,3,4,4>, <3,3,4,4>
- 3769633202U, // <4,3,3,5>: Cost 4 vext3 <1,u,3,4>, <3,3,5,7>
- 3703917212U, // <4,3,3,6>: Cost 4 vext2 <2,1,4,3>, <3,6,4,7>
- 3769633220U, // <4,3,3,7>: Cost 4 vext3 <1,u,3,4>, <3,3,7,7>
- 2691910044U, // <4,3,3,u>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3>
- 2691910096U, // <4,3,4,0>: Cost 3 vext3 <1,2,3,4>, <3,4,0,1>
- 2691910106U, // <4,3,4,1>: Cost 3 vext3 <1,2,3,4>, <3,4,1,2>
- 2564990741U, // <4,3,4,2>: Cost 3 vext1 <2,4,3,4>, <2,4,3,4>
- 3765651946U, // <4,3,4,3>: Cost 4 vext3 <1,2,3,4>, <3,4,3,0>
- 2691910136U, // <4,3,4,4>: Cost 3 vext3 <1,2,3,4>, <3,4,4,5>
- 2686454274U, // <4,3,4,5>: Cost 3 vext3 <0,3,1,4>, <3,4,5,6>
- 2235640329U, // <4,3,4,6>: Cost 3 vrev <3,4,6,4>
- 3801483792U, // <4,3,4,7>: Cost 4 vext3 <7,2,3,4>, <3,4,7,2>
- 2691910168U, // <4,3,4,u>: Cost 3 vext3 <1,2,3,4>, <3,4,u,1>
- 2559025254U, // <4,3,5,0>: Cost 3 vext1 <1,4,3,5>, LHS
- 2559026237U, // <4,3,5,1>: Cost 3 vext1 <1,4,3,5>, <1,4,3,5>
- 2564998862U, // <4,3,5,2>: Cost 3 vext1 <2,4,3,5>, <2,3,4,5>
- 2570971548U, // <4,3,5,3>: Cost 3 vext1 <3,4,3,5>, <3,3,3,3>
- 2559028534U, // <4,3,5,4>: Cost 3 vext1 <1,4,3,5>, RHS
- 4163519477U, // <4,3,5,5>: Cost 4 vtrnr <0,4,1,5>, <1,3,4,5>
- 3309390346U, // <4,3,5,6>: Cost 4 vrev <3,4,6,5>
- 2706139747U, // <4,3,5,7>: Cost 3 vext3 <3,5,7,4>, <3,5,7,4>
- 2559031086U, // <4,3,5,u>: Cost 3 vext1 <1,4,3,5>, LHS
- 2559033446U, // <4,3,6,0>: Cost 3 vext1 <1,4,3,6>, LHS
- 2559034430U, // <4,3,6,1>: Cost 3 vext1 <1,4,3,6>, <1,4,3,6>
- 2565007127U, // <4,3,6,2>: Cost 3 vext1 <2,4,3,6>, <2,4,3,6>
- 2570979740U, // <4,3,6,3>: Cost 3 vext1 <3,4,3,6>, <3,3,3,3>
- 2559036726U, // <4,3,6,4>: Cost 3 vext1 <1,4,3,6>, RHS
- 1161841154U, // <4,3,6,5>: Cost 2 vrev <3,4,5,6>
- 4028203932U, // <4,3,6,6>: Cost 4 vzipr <0,2,4,6>, <1,2,3,6>
- 2706803380U, // <4,3,6,7>: Cost 3 vext3 <3,6,7,4>, <3,6,7,4>
- 1162062365U, // <4,3,6,u>: Cost 2 vrev <3,4,u,6>
- 3769633475U, // <4,3,7,0>: Cost 4 vext3 <1,u,3,4>, <3,7,0,1>
- 3769633488U, // <4,3,7,1>: Cost 4 vext3 <1,u,3,4>, <3,7,1,5>
- 3638757144U, // <4,3,7,2>: Cost 4 vext1 <2,4,3,7>, <2,4,3,7>
- 3769633508U, // <4,3,7,3>: Cost 4 vext3 <1,u,3,4>, <3,7,3,7>
- 3769633515U, // <4,3,7,4>: Cost 4 vext3 <1,u,3,4>, <3,7,4,5>
- 3769633526U, // <4,3,7,5>: Cost 4 vext3 <1,u,3,4>, <3,7,5,7>
- 3662647932U, // <4,3,7,6>: Cost 4 vext1 <6,4,3,7>, <6,4,3,7>
- 3781208837U, // <4,3,7,7>: Cost 4 vext3 <3,7,7,4>, <3,7,7,4>
- 3769633547U, // <4,3,7,u>: Cost 4 vext3 <1,u,3,4>, <3,7,u,1>
- 2559049830U, // <4,3,u,0>: Cost 3 vext1 <1,4,3,u>, LHS
- 2691910430U, // <4,3,u,1>: Cost 3 vext3 <1,2,3,4>, <3,u,1,2>
- 2565023513U, // <4,3,u,2>: Cost 3 vext1 <2,4,3,u>, <2,4,3,u>
- 2707835698U, // <4,3,u,3>: Cost 3 vext3 <3,u,3,4>, <3,u,3,4>
- 2559053110U, // <4,3,u,4>: Cost 3 vext1 <1,4,3,u>, RHS
- 1161857540U, // <4,3,u,5>: Cost 2 vrev <3,4,5,u>
- 2235673101U, // <4,3,u,6>: Cost 3 vrev <3,4,6,u>
- 2708130646U, // <4,3,u,7>: Cost 3 vext3 <3,u,7,4>, <3,u,7,4>
- 1162078751U, // <4,3,u,u>: Cost 2 vrev <3,4,u,u>
- 2617573416U, // <4,4,0,0>: Cost 3 vext2 <0,0,4,4>, <0,0,4,4>
- 1570373734U, // <4,4,0,1>: Cost 2 vext2 <4,4,4,4>, LHS
- 2779676774U, // <4,4,0,2>: Cost 3 vuzpl <4,6,4,6>, LHS
- 3760196480U, // <4,4,0,3>: Cost 4 vext3 <0,3,1,4>, <4,0,3,1>
- 2576977100U, // <4,4,0,4>: Cost 3 vext1 <4,4,4,0>, <4,4,4,0>
- 2718747538U, // <4,4,0,5>: Cost 3 vext3 <5,6,7,4>, <4,0,5,1>
- 2718747548U, // <4,4,0,6>: Cost 3 vext3 <5,6,7,4>, <4,0,6,2>
- 3668637015U, // <4,4,0,7>: Cost 4 vext1 <7,4,4,0>, <7,4,4,0>
- 1570374301U, // <4,4,0,u>: Cost 2 vext2 <4,4,4,4>, LHS
- 2644116214U, // <4,4,1,0>: Cost 3 vext2 <4,4,4,4>, <1,0,3,2>
- 2644116276U, // <4,4,1,1>: Cost 3 vext2 <4,4,4,4>, <1,1,1,1>
- 2691910602U, // <4,4,1,2>: Cost 3 vext3 <1,2,3,4>, <4,1,2,3>
- 2644116440U, // <4,4,1,3>: Cost 3 vext2 <4,4,4,4>, <1,3,1,3>
- 2711227356U, // <4,4,1,4>: Cost 3 vext3 <4,4,4,4>, <4,1,4,3>
- 2709310438U, // <4,4,1,5>: Cost 3 vext3 <4,1,5,4>, <4,1,5,4>
- 3765652462U, // <4,4,1,6>: Cost 4 vext3 <1,2,3,4>, <4,1,6,3>
- 3768970231U, // <4,4,1,7>: Cost 4 vext3 <1,7,3,4>, <4,1,7,3>
- 2695891968U, // <4,4,1,u>: Cost 3 vext3 <1,u,3,4>, <4,1,u,3>
- 3703260634U, // <4,4,2,0>: Cost 4 vext2 <2,0,4,4>, <2,0,4,4>
- 3765652499U, // <4,4,2,1>: Cost 4 vext3 <1,2,3,4>, <4,2,1,4>
- 2644117096U, // <4,4,2,2>: Cost 3 vext2 <4,4,4,4>, <2,2,2,2>
- 2631509709U, // <4,4,2,3>: Cost 3 vext2 <2,3,4,4>, <2,3,4,4>
- 2644117269U, // <4,4,2,4>: Cost 3 vext2 <4,4,4,4>, <2,4,3,4>
- 3705251698U, // <4,4,2,5>: Cost 4 vext2 <2,3,4,4>, <2,5,4,7>
- 2710047808U, // <4,4,2,6>: Cost 3 vext3 <4,2,6,4>, <4,2,6,4>
- 3783863369U, // <4,4,2,7>: Cost 4 vext3 <4,2,7,4>, <4,2,7,4>
- 2634827874U, // <4,4,2,u>: Cost 3 vext2 <2,u,4,4>, <2,u,4,4>
- 2644117654U, // <4,4,3,0>: Cost 3 vext2 <4,4,4,4>, <3,0,1,2>
- 3638797210U, // <4,4,3,1>: Cost 4 vext1 <2,4,4,3>, <1,2,3,4>
- 3638798082U, // <4,4,3,2>: Cost 4 vext1 <2,4,4,3>, <2,4,1,3>
- 2637482406U, // <4,4,3,3>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4>
- 2638146039U, // <4,4,3,4>: Cost 3 vext2 <3,4,4,4>, <3,4,4,4>
- 3913287374U, // <4,4,3,5>: Cost 4 vuzpr <3,4,5,4>, <2,3,4,5>
- 3765652625U, // <4,4,3,6>: Cost 4 vext3 <1,2,3,4>, <4,3,6,4>
- 3713878762U, // <4,4,3,7>: Cost 4 vext2 <3,7,4,4>, <3,7,4,4>
- 2637482406U, // <4,4,3,u>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4>
- 1503264870U, // <4,4,4,0>: Cost 2 vext1 <4,4,4,4>, LHS
- 2577007514U, // <4,4,4,1>: Cost 3 vext1 <4,4,4,4>, <1,2,3,4>
- 2577008232U, // <4,4,4,2>: Cost 3 vext1 <4,4,4,4>, <2,2,2,2>
- 2571037175U, // <4,4,4,3>: Cost 3 vext1 <3,4,4,4>, <3,4,4,4>
- 161926454U, // <4,4,4,4>: Cost 1 vdup0 RHS
- 1570377014U, // <4,4,4,5>: Cost 2 vext2 <4,4,4,4>, RHS
- 2779680054U, // <4,4,4,6>: Cost 3 vuzpl <4,6,4,6>, RHS
- 2594927963U, // <4,4,4,7>: Cost 3 vext1 <7,4,4,4>, <7,4,4,4>
- 161926454U, // <4,4,4,u>: Cost 1 vdup0 RHS
- 2571042918U, // <4,4,5,0>: Cost 3 vext1 <3,4,4,5>, LHS
- 2571043738U, // <4,4,5,1>: Cost 3 vext1 <3,4,4,5>, <1,2,3,4>
- 3638814495U, // <4,4,5,2>: Cost 4 vext1 <2,4,4,5>, <2,4,4,5>
- 2571045368U, // <4,4,5,3>: Cost 3 vext1 <3,4,4,5>, <3,4,4,5>
- 2571046198U, // <4,4,5,4>: Cost 3 vext1 <3,4,4,5>, RHS
- 1839648054U, // <4,4,5,5>: Cost 2 vzipl RHS, RHS
- 1618169142U, // <4,4,5,6>: Cost 2 vext3 <1,2,3,4>, RHS
- 2594936156U, // <4,4,5,7>: Cost 3 vext1 <7,4,4,5>, <7,4,4,5>
- 1618169160U, // <4,4,5,u>: Cost 2 vext3 <1,2,3,4>, RHS
- 2553135206U, // <4,4,6,0>: Cost 3 vext1 <0,4,4,6>, LHS
- 3626877686U, // <4,4,6,1>: Cost 4 vext1 <0,4,4,6>, <1,0,3,2>
- 2565080782U, // <4,4,6,2>: Cost 3 vext1 <2,4,4,6>, <2,3,4,5>
- 2571053561U, // <4,4,6,3>: Cost 3 vext1 <3,4,4,6>, <3,4,4,6>
- 2553138486U, // <4,4,6,4>: Cost 3 vext1 <0,4,4,6>, RHS
- 2241555675U, // <4,4,6,5>: Cost 3 vrev <4,4,5,6>
- 1973865782U, // <4,4,6,6>: Cost 2 vtrnl RHS, RHS
- 2658055029U, // <4,4,6,7>: Cost 3 vext2 <6,7,4,4>, <6,7,4,4>
- 1973865800U, // <4,4,6,u>: Cost 2 vtrnl RHS, RHS
- 2644120570U, // <4,4,7,0>: Cost 3 vext2 <4,4,4,4>, <7,0,1,2>
- 3638829978U, // <4,4,7,1>: Cost 4 vext1 <2,4,4,7>, <1,2,3,4>
- 3638830881U, // <4,4,7,2>: Cost 4 vext1 <2,4,4,7>, <2,4,4,7>
- 3735115018U, // <4,4,7,3>: Cost 4 vext2 <7,3,4,4>, <7,3,4,4>
- 2662036827U, // <4,4,7,4>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4>
- 2713292236U, // <4,4,7,5>: Cost 3 vext3 <4,7,5,4>, <4,7,5,4>
- 2713365973U, // <4,4,7,6>: Cost 3 vext3 <4,7,6,4>, <4,7,6,4>
- 2644121196U, // <4,4,7,7>: Cost 3 vext2 <4,4,4,4>, <7,7,7,7>
- 2662036827U, // <4,4,7,u>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4>
- 1503297638U, // <4,4,u,0>: Cost 2 vext1 <4,4,4,u>, LHS
- 1570379566U, // <4,4,u,1>: Cost 2 vext2 <4,4,4,4>, LHS
- 2779682606U, // <4,4,u,2>: Cost 3 vuzpl <4,6,4,6>, LHS
- 2571069947U, // <4,4,u,3>: Cost 3 vext1 <3,4,4,u>, <3,4,4,u>
- 161926454U, // <4,4,u,4>: Cost 1 vdup0 RHS
- 1841638710U, // <4,4,u,5>: Cost 2 vzipl RHS, RHS
- 1618169385U, // <4,4,u,6>: Cost 2 vext3 <1,2,3,4>, RHS
- 2594960735U, // <4,4,u,7>: Cost 3 vext1 <7,4,4,u>, <7,4,4,u>
- 161926454U, // <4,4,u,u>: Cost 1 vdup0 RHS
- 2631516160U, // <4,5,0,0>: Cost 3 vext2 <2,3,4,5>, <0,0,0,0>
- 1557774438U, // <4,5,0,1>: Cost 2 vext2 <2,3,4,5>, LHS
- 2618908875U, // <4,5,0,2>: Cost 3 vext2 <0,2,4,5>, <0,2,4,5>
- 2571078140U, // <4,5,0,3>: Cost 3 vext1 <3,4,5,0>, <3,4,5,0>
- 2626871634U, // <4,5,0,4>: Cost 3 vext2 <1,5,4,5>, <0,4,1,5>
- 3705258414U, // <4,5,0,5>: Cost 4 vext2 <2,3,4,5>, <0,5,2,7>
- 2594968438U, // <4,5,0,6>: Cost 3 vext1 <7,4,5,0>, <6,7,4,5>
- 2594968928U, // <4,5,0,7>: Cost 3 vext1 <7,4,5,0>, <7,4,5,0>
- 1557775005U, // <4,5,0,u>: Cost 2 vext2 <2,3,4,5>, LHS
- 2631516918U, // <4,5,1,0>: Cost 3 vext2 <2,3,4,5>, <1,0,3,2>
- 2624217939U, // <4,5,1,1>: Cost 3 vext2 <1,1,4,5>, <1,1,4,5>
- 2631517078U, // <4,5,1,2>: Cost 3 vext2 <2,3,4,5>, <1,2,3,0>
- 2821341286U, // <4,5,1,3>: Cost 3 vuzpr <0,4,1,5>, LHS
- 3895086054U, // <4,5,1,4>: Cost 4 vuzpr <0,4,1,5>, <4,1,5,4>
- 2626872471U, // <4,5,1,5>: Cost 3 vext2 <1,5,4,5>, <1,5,4,5>
- 3895083131U, // <4,5,1,6>: Cost 4 vuzpr <0,4,1,5>, <0,1,4,6>
- 2718748368U, // <4,5,1,7>: Cost 3 vext3 <5,6,7,4>, <5,1,7,3>
- 2821341291U, // <4,5,1,u>: Cost 3 vuzpr <0,4,1,5>, LHS
- 2571092070U, // <4,5,2,0>: Cost 3 vext1 <3,4,5,2>, LHS
- 3699287585U, // <4,5,2,1>: Cost 4 vext2 <1,3,4,5>, <2,1,3,3>
- 2630854269U, // <4,5,2,2>: Cost 3 vext2 <2,2,4,5>, <2,2,4,5>
- 1557776078U, // <4,5,2,3>: Cost 2 vext2 <2,3,4,5>, <2,3,4,5>
- 2631517974U, // <4,5,2,4>: Cost 3 vext2 <2,3,4,5>, <2,4,3,5>
- 3692652384U, // <4,5,2,5>: Cost 4 vext2 <0,2,4,5>, <2,5,2,7>
- 2631518138U, // <4,5,2,6>: Cost 3 vext2 <2,3,4,5>, <2,6,3,7>
- 4164013366U, // <4,5,2,7>: Cost 4 vtrnr <0,4,u,2>, RHS
- 1561094243U, // <4,5,2,u>: Cost 2 vext2 <2,u,4,5>, <2,u,4,5>
- 2631518358U, // <4,5,3,0>: Cost 3 vext2 <2,3,4,5>, <3,0,1,2>
- 3895084710U, // <4,5,3,1>: Cost 4 vuzpr <0,4,1,5>, <2,3,0,1>
- 2631518540U, // <4,5,3,2>: Cost 3 vext2 <2,3,4,5>, <3,2,3,4>
- 2631518620U, // <4,5,3,3>: Cost 3 vext2 <2,3,4,5>, <3,3,3,3>
- 2631518716U, // <4,5,3,4>: Cost 3 vext2 <2,3,4,5>, <3,4,5,0>
- 2631518784U, // <4,5,3,5>: Cost 3 vext2 <2,3,4,5>, <3,5,3,5>
- 2658060980U, // <4,5,3,6>: Cost 3 vext2 <6,7,4,5>, <3,6,7,4>
- 2640145131U, // <4,5,3,7>: Cost 3 vext2 <3,7,4,5>, <3,7,4,5>
- 2631519006U, // <4,5,3,u>: Cost 3 vext2 <2,3,4,5>, <3,u,1,2>
- 2571108454U, // <4,5,4,0>: Cost 3 vext1 <3,4,5,4>, LHS
- 3632907342U, // <4,5,4,1>: Cost 4 vext1 <1,4,5,4>, <1,4,5,4>
- 2571110094U, // <4,5,4,2>: Cost 3 vext1 <3,4,5,4>, <2,3,4,5>
- 2571110912U, // <4,5,4,3>: Cost 3 vext1 <3,4,5,4>, <3,4,5,4>
- 2571111734U, // <4,5,4,4>: Cost 3 vext1 <3,4,5,4>, RHS
- 1557777718U, // <4,5,4,5>: Cost 2 vext2 <2,3,4,5>, RHS
- 2645454195U, // <4,5,4,6>: Cost 3 vext2 <4,6,4,5>, <4,6,4,5>
- 2718748614U, // <4,5,4,7>: Cost 3 vext3 <5,6,7,4>, <5,4,7,6>
- 1557777961U, // <4,5,4,u>: Cost 2 vext2 <2,3,4,5>, RHS
- 1503346790U, // <4,5,5,0>: Cost 2 vext1 <4,4,5,5>, LHS
- 2913398480U, // <4,5,5,1>: Cost 3 vzipl RHS, <5,1,7,3>
- 2631519998U, // <4,5,5,2>: Cost 3 vext2 <2,3,4,5>, <5,2,3,4>
- 2577090710U, // <4,5,5,3>: Cost 3 vext1 <4,4,5,5>, <3,0,1,2>
- 1503349978U, // <4,5,5,4>: Cost 2 vext1 <4,4,5,5>, <4,4,5,5>
- 2631520260U, // <4,5,5,5>: Cost 3 vext2 <2,3,4,5>, <5,5,5,5>
- 2913390690U, // <4,5,5,6>: Cost 3 vzipl RHS, <5,6,7,0>
- 2821344566U, // <4,5,5,7>: Cost 3 vuzpr <0,4,1,5>, RHS
- 1503352622U, // <4,5,5,u>: Cost 2 vext1 <4,4,5,5>, LHS
- 1497383014U, // <4,5,6,0>: Cost 2 vext1 <3,4,5,6>, LHS
- 2559181904U, // <4,5,6,1>: Cost 3 vext1 <1,4,5,6>, <1,4,5,6>
- 2565154601U, // <4,5,6,2>: Cost 3 vext1 <2,4,5,6>, <2,4,5,6>
- 1497385474U, // <4,5,6,3>: Cost 2 vext1 <3,4,5,6>, <3,4,5,6>
- 1497386294U, // <4,5,6,4>: Cost 2 vext1 <3,4,5,6>, RHS
- 3047608324U, // <4,5,6,5>: Cost 3 vtrnl RHS, <5,5,5,5>
- 2571129656U, // <4,5,6,6>: Cost 3 vext1 <3,4,5,6>, <6,6,6,6>
- 27705344U, // <4,5,6,7>: Cost 0 copy RHS
- 27705344U, // <4,5,6,u>: Cost 0 copy RHS
- 2565161062U, // <4,5,7,0>: Cost 3 vext1 <2,4,5,7>, LHS
- 2565161882U, // <4,5,7,1>: Cost 3 vext1 <2,4,5,7>, <1,2,3,4>
- 2565162794U, // <4,5,7,2>: Cost 3 vext1 <2,4,5,7>, <2,4,5,7>
- 2661381387U, // <4,5,7,3>: Cost 3 vext2 <7,3,4,5>, <7,3,4,5>
- 2565164342U, // <4,5,7,4>: Cost 3 vext1 <2,4,5,7>, RHS
- 2718748840U, // <4,5,7,5>: Cost 3 vext3 <5,6,7,4>, <5,7,5,7>
- 2718748846U, // <4,5,7,6>: Cost 3 vext3 <5,6,7,4>, <5,7,6,4>
- 2719412407U, // <4,5,7,7>: Cost 3 vext3 <5,7,7,4>, <5,7,7,4>
- 2565166894U, // <4,5,7,u>: Cost 3 vext1 <2,4,5,7>, LHS
- 1497399398U, // <4,5,u,0>: Cost 2 vext1 <3,4,5,u>, LHS
- 1557780270U, // <4,5,u,1>: Cost 2 vext2 <2,3,4,5>, LHS
- 2631522181U, // <4,5,u,2>: Cost 3 vext2 <2,3,4,5>, <u,2,3,0>
- 1497401860U, // <4,5,u,3>: Cost 2 vext1 <3,4,5,u>, <3,4,5,u>
- 1497402678U, // <4,5,u,4>: Cost 2 vext1 <3,4,5,u>, RHS
- 1557780634U, // <4,5,u,5>: Cost 2 vext2 <2,3,4,5>, RHS
- 2631522512U, // <4,5,u,6>: Cost 3 vext2 <2,3,4,5>, <u,6,3,7>
- 27705344U, // <4,5,u,7>: Cost 0 copy RHS
- 27705344U, // <4,5,u,u>: Cost 0 copy RHS
- 2618916864U, // <4,6,0,0>: Cost 3 vext2 <0,2,4,6>, <0,0,0,0>
- 1545175142U, // <4,6,0,1>: Cost 2 vext2 <0,2,4,6>, LHS
- 1545175244U, // <4,6,0,2>: Cost 2 vext2 <0,2,4,6>, <0,2,4,6>
- 3692658940U, // <4,6,0,3>: Cost 4 vext2 <0,2,4,6>, <0,3,1,0>
- 2618917202U, // <4,6,0,4>: Cost 3 vext2 <0,2,4,6>, <0,4,1,5>
- 3852910806U, // <4,6,0,5>: Cost 4 vuzpl RHS, <0,2,5,7>
- 2253525648U, // <4,6,0,6>: Cost 3 vrev <6,4,6,0>
- 4040764726U, // <4,6,0,7>: Cost 4 vzipr <2,3,4,0>, RHS
- 1545175709U, // <4,6,0,u>: Cost 2 vext2 <0,2,4,6>, LHS
- 2618917622U, // <4,6,1,0>: Cost 3 vext2 <0,2,4,6>, <1,0,3,2>
- 2618917684U, // <4,6,1,1>: Cost 3 vext2 <0,2,4,6>, <1,1,1,1>
- 2618917782U, // <4,6,1,2>: Cost 3 vext2 <0,2,4,6>, <1,2,3,0>
- 2618917848U, // <4,6,1,3>: Cost 3 vext2 <0,2,4,6>, <1,3,1,3>
- 3692659773U, // <4,6,1,4>: Cost 4 vext2 <0,2,4,6>, <1,4,3,5>
- 2618918032U, // <4,6,1,5>: Cost 3 vext2 <0,2,4,6>, <1,5,3,7>
- 3692659937U, // <4,6,1,6>: Cost 4 vext2 <0,2,4,6>, <1,6,3,7>
- 4032146742U, // <4,6,1,7>: Cost 4 vzipr <0,u,4,1>, RHS
- 2618918253U, // <4,6,1,u>: Cost 3 vext2 <0,2,4,6>, <1,u,1,3>
- 2618918380U, // <4,6,2,0>: Cost 3 vext2 <0,2,4,6>, <2,0,6,4>
- 2618918460U, // <4,6,2,1>: Cost 3 vext2 <0,2,4,6>, <2,1,6,3>
- 2618918504U, // <4,6,2,2>: Cost 3 vext2 <0,2,4,6>, <2,2,2,2>
- 2618918566U, // <4,6,2,3>: Cost 3 vext2 <0,2,4,6>, <2,3,0,1>
- 2618918679U, // <4,6,2,4>: Cost 3 vext2 <0,2,4,6>, <2,4,3,6>
- 2618918788U, // <4,6,2,5>: Cost 3 vext2 <0,2,4,6>, <2,5,6,7>
- 2618918842U, // <4,6,2,6>: Cost 3 vext2 <0,2,4,6>, <2,6,3,7>
- 2718749178U, // <4,6,2,7>: Cost 3 vext3 <5,6,7,4>, <6,2,7,3>
- 2618918971U, // <4,6,2,u>: Cost 3 vext2 <0,2,4,6>, <2,u,0,1>
- 2618919062U, // <4,6,3,0>: Cost 3 vext2 <0,2,4,6>, <3,0,1,2>
- 2636171526U, // <4,6,3,1>: Cost 3 vext2 <3,1,4,6>, <3,1,4,6>
- 3692661057U, // <4,6,3,2>: Cost 4 vext2 <0,2,4,6>, <3,2,2,2>
- 2618919324U, // <4,6,3,3>: Cost 3 vext2 <0,2,4,6>, <3,3,3,3>
- 2618919426U, // <4,6,3,4>: Cost 3 vext2 <0,2,4,6>, <3,4,5,6>
- 2638826058U, // <4,6,3,5>: Cost 3 vext2 <3,5,4,6>, <3,5,4,6>
- 3913303030U, // <4,6,3,6>: Cost 4 vuzpr <3,4,5,6>, <1,3,4,6>
- 2722730572U, // <4,6,3,7>: Cost 3 vext3 <6,3,7,4>, <6,3,7,4>
- 2618919710U, // <4,6,3,u>: Cost 3 vext2 <0,2,4,6>, <3,u,1,2>
- 2565210214U, // <4,6,4,0>: Cost 3 vext1 <2,4,6,4>, LHS
- 2718749286U, // <4,6,4,1>: Cost 3 vext3 <5,6,7,4>, <6,4,1,3>
- 2565211952U, // <4,6,4,2>: Cost 3 vext1 <2,4,6,4>, <2,4,6,4>
- 2571184649U, // <4,6,4,3>: Cost 3 vext1 <3,4,6,4>, <3,4,6,4>
- 2565213494U, // <4,6,4,4>: Cost 3 vext1 <2,4,6,4>, RHS
- 1545178422U, // <4,6,4,5>: Cost 2 vext2 <0,2,4,6>, RHS
- 1705430326U, // <4,6,4,6>: Cost 2 vuzpl RHS, RHS
- 2595075437U, // <4,6,4,7>: Cost 3 vext1 <7,4,6,4>, <7,4,6,4>
- 1545178665U, // <4,6,4,u>: Cost 2 vext2 <0,2,4,6>, RHS
- 2565218406U, // <4,6,5,0>: Cost 3 vext1 <2,4,6,5>, LHS
- 2645462736U, // <4,6,5,1>: Cost 3 vext2 <4,6,4,6>, <5,1,7,3>
- 2913399290U, // <4,6,5,2>: Cost 3 vzipl RHS, <6,2,7,3>
- 3913305394U, // <4,6,5,3>: Cost 4 vuzpr <3,4,5,6>, <4,5,6,3>
- 2645462982U, // <4,6,5,4>: Cost 3 vext2 <4,6,4,6>, <5,4,7,6>
- 2779172868U, // <4,6,5,5>: Cost 3 vuzpl RHS, <5,5,5,5>
- 2913391416U, // <4,6,5,6>: Cost 3 vzipl RHS, <6,6,6,6>
- 2821426486U, // <4,6,5,7>: Cost 3 vuzpr <0,4,2,6>, RHS
- 2821426487U, // <4,6,5,u>: Cost 3 vuzpr <0,4,2,6>, RHS
- 1503428710U, // <4,6,6,0>: Cost 2 vext1 <4,4,6,6>, LHS
- 2577171190U, // <4,6,6,1>: Cost 3 vext1 <4,4,6,6>, <1,0,3,2>
- 2645463546U, // <4,6,6,2>: Cost 3 vext2 <4,6,4,6>, <6,2,7,3>
- 2577172630U, // <4,6,6,3>: Cost 3 vext1 <4,4,6,6>, <3,0,1,2>
- 1503431908U, // <4,6,6,4>: Cost 2 vext1 <4,4,6,6>, <4,4,6,6>
- 2253501069U, // <4,6,6,5>: Cost 3 vrev <6,4,5,6>
- 2618921784U, // <4,6,6,6>: Cost 3 vext2 <0,2,4,6>, <6,6,6,6>
- 2954464566U, // <4,6,6,7>: Cost 3 vzipr <0,2,4,6>, RHS
- 1503434542U, // <4,6,6,u>: Cost 2 vext1 <4,4,6,6>, LHS
- 2645464058U, // <4,6,7,0>: Cost 3 vext2 <4,6,4,6>, <7,0,1,2>
- 2779173882U, // <4,6,7,1>: Cost 3 vuzpl RHS, <7,0,1,2>
- 3638978355U, // <4,6,7,2>: Cost 4 vext1 <2,4,6,7>, <2,4,6,7>
- 2725090156U, // <4,6,7,3>: Cost 3 vext3 <6,7,3,4>, <6,7,3,4>
- 2645464422U, // <4,6,7,4>: Cost 3 vext2 <4,6,4,6>, <7,4,5,6>
- 2779174246U, // <4,6,7,5>: Cost 3 vuzpl RHS, <7,4,5,6>
- 3852915914U, // <4,6,7,6>: Cost 4 vuzpl RHS, <7,2,6,3>
- 2779174508U, // <4,6,7,7>: Cost 3 vuzpl RHS, <7,7,7,7>
- 2779173945U, // <4,6,7,u>: Cost 3 vuzpl RHS, <7,0,u,2>
- 1503445094U, // <4,6,u,0>: Cost 2 vext1 <4,4,6,u>, LHS
- 1545180974U, // <4,6,u,1>: Cost 2 vext2 <0,2,4,6>, LHS
- 1705432878U, // <4,6,u,2>: Cost 2 vuzpl RHS, LHS
- 2618922940U, // <4,6,u,3>: Cost 3 vext2 <0,2,4,6>, <u,3,0,1>
- 1503448294U, // <4,6,u,4>: Cost 2 vext1 <4,4,6,u>, <4,4,6,u>
- 1545181338U, // <4,6,u,5>: Cost 2 vext2 <0,2,4,6>, RHS
- 1705433242U, // <4,6,u,6>: Cost 2 vuzpl RHS, RHS
- 2954480950U, // <4,6,u,7>: Cost 3 vzipr <0,2,4,u>, RHS
- 1545181541U, // <4,6,u,u>: Cost 2 vext2 <0,2,4,6>, LHS
- 3706601472U, // <4,7,0,0>: Cost 4 vext2 <2,5,4,7>, <0,0,0,0>
- 2632859750U, // <4,7,0,1>: Cost 3 vext2 <2,5,4,7>, LHS
- 2726343685U, // <4,7,0,2>: Cost 3 vext3 <7,0,2,4>, <7,0,2,4>
- 3701293312U, // <4,7,0,3>: Cost 4 vext2 <1,6,4,7>, <0,3,1,4>
- 3706601810U, // <4,7,0,4>: Cost 4 vext2 <2,5,4,7>, <0,4,1,5>
- 2259424608U, // <4,7,0,5>: Cost 3 vrev <7,4,5,0>
- 3695321617U, // <4,7,0,6>: Cost 4 vext2 <0,6,4,7>, <0,6,4,7>
- 3800454194U, // <4,7,0,7>: Cost 4 vext3 <7,0,7,4>, <7,0,7,4>
- 2632860317U, // <4,7,0,u>: Cost 3 vext2 <2,5,4,7>, LHS
- 2259064116U, // <4,7,1,0>: Cost 3 vrev <7,4,0,1>
- 3700630324U, // <4,7,1,1>: Cost 4 vext2 <1,5,4,7>, <1,1,1,1>
- 2632860570U, // <4,7,1,2>: Cost 3 vext2 <2,5,4,7>, <1,2,3,4>
- 3769635936U, // <4,7,1,3>: Cost 4 vext3 <1,u,3,4>, <7,1,3,5>
- 3656920374U, // <4,7,1,4>: Cost 4 vext1 <5,4,7,1>, RHS
- 3700630681U, // <4,7,1,5>: Cost 4 vext2 <1,5,4,7>, <1,5,4,7>
- 3701294314U, // <4,7,1,6>: Cost 4 vext2 <1,6,4,7>, <1,6,4,7>
- 3793818754U, // <4,7,1,7>: Cost 4 vext3 <5,u,7,4>, <7,1,7,3>
- 2259654012U, // <4,7,1,u>: Cost 3 vrev <7,4,u,1>
- 3656925286U, // <4,7,2,0>: Cost 4 vext1 <5,4,7,2>, LHS
- 3706603050U, // <4,7,2,1>: Cost 4 vext2 <2,5,4,7>, <2,1,4,3>
- 3706603112U, // <4,7,2,2>: Cost 4 vext2 <2,5,4,7>, <2,2,2,2>
- 2727744688U, // <4,7,2,3>: Cost 3 vext3 <7,2,3,4>, <7,2,3,4>
- 3705939745U, // <4,7,2,4>: Cost 4 vext2 <2,4,4,7>, <2,4,4,7>
- 2632861554U, // <4,7,2,5>: Cost 3 vext2 <2,5,4,7>, <2,5,4,7>
- 3706603450U, // <4,7,2,6>: Cost 4 vext2 <2,5,4,7>, <2,6,3,7>
- 3792491731U, // <4,7,2,7>: Cost 4 vext3 <5,6,7,4>, <7,2,7,3>
- 2634852453U, // <4,7,2,u>: Cost 3 vext2 <2,u,4,7>, <2,u,4,7>
- 3706603670U, // <4,7,3,0>: Cost 4 vext2 <2,5,4,7>, <3,0,1,2>
- 3662906266U, // <4,7,3,1>: Cost 4 vext1 <6,4,7,3>, <1,2,3,4>
- 3725183326U, // <4,7,3,2>: Cost 4 vext2 <5,6,4,7>, <3,2,5,4>
- 3706603932U, // <4,7,3,3>: Cost 4 vext2 <2,5,4,7>, <3,3,3,3>
- 3701295618U, // <4,7,3,4>: Cost 4 vext2 <1,6,4,7>, <3,4,5,6>
- 2638834251U, // <4,7,3,5>: Cost 3 vext2 <3,5,4,7>, <3,5,4,7>
- 2639497884U, // <4,7,3,6>: Cost 3 vext2 <3,6,4,7>, <3,6,4,7>
- 3802445093U, // <4,7,3,7>: Cost 4 vext3 <7,3,7,4>, <7,3,7,4>
- 2640825150U, // <4,7,3,u>: Cost 3 vext2 <3,u,4,7>, <3,u,4,7>
- 2718750004U, // <4,7,4,0>: Cost 3 vext3 <5,6,7,4>, <7,4,0,1>
- 3706604490U, // <4,7,4,1>: Cost 4 vext2 <2,5,4,7>, <4,1,2,3>
- 3656943474U, // <4,7,4,2>: Cost 4 vext1 <5,4,7,4>, <2,5,4,7>
- 3779884371U, // <4,7,4,3>: Cost 4 vext3 <3,5,7,4>, <7,4,3,5>
- 2259383643U, // <4,7,4,4>: Cost 3 vrev <7,4,4,4>
- 2632863030U, // <4,7,4,5>: Cost 3 vext2 <2,5,4,7>, RHS
- 2259531117U, // <4,7,4,6>: Cost 3 vrev <7,4,6,4>
- 3907340074U, // <4,7,4,7>: Cost 4 vuzpr <2,4,5,7>, <2,4,5,7>
- 2632863273U, // <4,7,4,u>: Cost 3 vext2 <2,5,4,7>, RHS
- 2913391610U, // <4,7,5,0>: Cost 3 vzipl RHS, <7,0,1,2>
- 3645006848U, // <4,7,5,1>: Cost 4 vext1 <3,4,7,5>, <1,3,5,7>
- 2589181646U, // <4,7,5,2>: Cost 3 vext1 <6,4,7,5>, <2,3,4,5>
- 3645008403U, // <4,7,5,3>: Cost 4 vext1 <3,4,7,5>, <3,4,7,5>
- 2913391974U, // <4,7,5,4>: Cost 3 vzipl RHS, <7,4,5,6>
- 2583211973U, // <4,7,5,5>: Cost 3 vext1 <5,4,7,5>, <5,4,7,5>
- 2589184670U, // <4,7,5,6>: Cost 3 vext1 <6,4,7,5>, <6,4,7,5>
- 2913392236U, // <4,7,5,7>: Cost 3 vzipl RHS, <7,7,7,7>
- 2913392258U, // <4,7,5,u>: Cost 3 vzipl RHS, <7,u,1,2>
- 1509474406U, // <4,7,6,0>: Cost 2 vext1 <5,4,7,6>, LHS
- 3047609338U, // <4,7,6,1>: Cost 3 vtrnl RHS, <7,0,1,2>
- 2583217768U, // <4,7,6,2>: Cost 3 vext1 <5,4,7,6>, <2,2,2,2>
- 2583218326U, // <4,7,6,3>: Cost 3 vext1 <5,4,7,6>, <3,0,1,2>
- 1509477686U, // <4,7,6,4>: Cost 2 vext1 <5,4,7,6>, RHS
- 1509478342U, // <4,7,6,5>: Cost 2 vext1 <5,4,7,6>, <5,4,7,6>
- 2583220730U, // <4,7,6,6>: Cost 3 vext1 <5,4,7,6>, <6,2,7,3>
- 3047609964U, // <4,7,6,7>: Cost 3 vtrnl RHS, <7,7,7,7>
- 1509480238U, // <4,7,6,u>: Cost 2 vext1 <5,4,7,6>, LHS
- 3650994278U, // <4,7,7,0>: Cost 4 vext1 <4,4,7,7>, LHS
- 3650995098U, // <4,7,7,1>: Cost 4 vext1 <4,4,7,7>, <1,2,3,4>
- 3650996010U, // <4,7,7,2>: Cost 4 vext1 <4,4,7,7>, <2,4,5,7>
- 3804804677U, // <4,7,7,3>: Cost 4 vext3 <7,7,3,4>, <7,7,3,4>
- 3650997486U, // <4,7,7,4>: Cost 4 vext1 <4,4,7,7>, <4,4,7,7>
- 2662725039U, // <4,7,7,5>: Cost 3 vext2 <7,5,4,7>, <7,5,4,7>
- 3662942880U, // <4,7,7,6>: Cost 4 vext1 <6,4,7,7>, <6,4,7,7>
- 2718750316U, // <4,7,7,7>: Cost 3 vext3 <5,6,7,4>, <7,7,7,7>
- 2664715938U, // <4,7,7,u>: Cost 3 vext2 <7,u,4,7>, <7,u,4,7>
- 1509490790U, // <4,7,u,0>: Cost 2 vext1 <5,4,7,u>, LHS
- 2632865582U, // <4,7,u,1>: Cost 3 vext2 <2,5,4,7>, LHS
- 2583234152U, // <4,7,u,2>: Cost 3 vext1 <5,4,7,u>, <2,2,2,2>
- 2583234710U, // <4,7,u,3>: Cost 3 vext1 <5,4,7,u>, <3,0,1,2>
- 1509494070U, // <4,7,u,4>: Cost 2 vext1 <5,4,7,u>, RHS
- 1509494728U, // <4,7,u,5>: Cost 2 vext1 <5,4,7,u>, <5,4,7,u>
- 2583237114U, // <4,7,u,6>: Cost 3 vext1 <5,4,7,u>, <6,2,7,3>
- 3047757420U, // <4,7,u,7>: Cost 3 vtrnl RHS, <7,7,7,7>
- 1509496622U, // <4,7,u,u>: Cost 2 vext1 <5,4,7,u>, LHS
- 2618933248U, // <4,u,0,0>: Cost 3 vext2 <0,2,4,u>, <0,0,0,0>
- 1545191526U, // <4,u,0,1>: Cost 2 vext2 <0,2,4,u>, LHS
- 1545191630U, // <4,u,0,2>: Cost 2 vext2 <0,2,4,u>, <0,2,4,u>
- 2691913445U, // <4,u,0,3>: Cost 3 vext3 <1,2,3,4>, <u,0,3,2>
- 2618933586U, // <4,u,0,4>: Cost 3 vext2 <0,2,4,u>, <0,4,1,5>
- 2265397305U, // <4,u,0,5>: Cost 3 vrev <u,4,5,0>
- 2595189625U, // <4,u,0,6>: Cost 3 vext1 <7,4,u,0>, <6,7,4,u>
- 2595190139U, // <4,u,0,7>: Cost 3 vext1 <7,4,u,0>, <7,4,u,0>
- 1545192093U, // <4,u,0,u>: Cost 2 vext2 <0,2,4,u>, LHS
- 2618934006U, // <4,u,1,0>: Cost 3 vext2 <0,2,4,u>, <1,0,3,2>
- 2618934068U, // <4,u,1,1>: Cost 3 vext2 <0,2,4,u>, <1,1,1,1>
- 1618171694U, // <4,u,1,2>: Cost 2 vext3 <1,2,3,4>, LHS
- 2618934232U, // <4,u,1,3>: Cost 3 vext2 <0,2,4,u>, <1,3,1,3>
- 2695894848U, // <4,u,1,4>: Cost 3 vext3 <1,u,3,4>, <u,1,4,3>
- 2618934416U, // <4,u,1,5>: Cost 3 vext2 <0,2,4,u>, <1,5,3,7>
- 3692676321U, // <4,u,1,6>: Cost 4 vext2 <0,2,4,u>, <1,6,3,7>
- 2718750555U, // <4,u,1,7>: Cost 3 vext3 <5,6,7,4>, <u,1,7,3>
- 1618171748U, // <4,u,1,u>: Cost 2 vext3 <1,2,3,4>, LHS
- 2553397350U, // <4,u,2,0>: Cost 3 vext1 <0,4,u,2>, LHS
- 2630215215U, // <4,u,2,1>: Cost 3 vext2 <2,1,4,u>, <2,1,4,u>
- 2618934888U, // <4,u,2,2>: Cost 3 vext2 <0,2,4,u>, <2,2,2,2>
- 1557800657U, // <4,u,2,3>: Cost 2 vext2 <2,3,4,u>, <2,3,4,u>
- 2618935065U, // <4,u,2,4>: Cost 3 vext2 <0,2,4,u>, <2,4,3,u>
- 2733864859U, // <4,u,2,5>: Cost 3 vext3 <u,2,5,4>, <u,2,5,4>
- 2618935226U, // <4,u,2,6>: Cost 3 vext2 <0,2,4,u>, <2,6,3,7>
- 2718750636U, // <4,u,2,7>: Cost 3 vext3 <5,6,7,4>, <u,2,7,3>
- 1561118822U, // <4,u,2,u>: Cost 2 vext2 <2,u,4,u>, <2,u,4,u>
- 2618935446U, // <4,u,3,0>: Cost 3 vext2 <0,2,4,u>, <3,0,1,2>
- 2779318422U, // <4,u,3,1>: Cost 3 vuzpl RHS, <3,0,1,2>
- 2636851545U, // <4,u,3,2>: Cost 3 vext2 <3,2,4,u>, <3,2,4,u>
- 2618935708U, // <4,u,3,3>: Cost 3 vext2 <0,2,4,u>, <3,3,3,3>
- 2618935810U, // <4,u,3,4>: Cost 3 vext2 <0,2,4,u>, <3,4,5,6>
- 2691913711U, // <4,u,3,5>: Cost 3 vext3 <1,2,3,4>, <u,3,5,7>
- 2588725862U, // <4,u,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
- 2640169710U, // <4,u,3,7>: Cost 3 vext2 <3,7,4,u>, <3,7,4,u>
- 2618936094U, // <4,u,3,u>: Cost 3 vext2 <0,2,4,u>, <3,u,1,2>
- 1503559782U, // <4,u,4,0>: Cost 2 vext1 <4,4,u,4>, LHS
- 2692282391U, // <4,u,4,1>: Cost 3 vext3 <1,2,u,4>, <u,4,1,2>
- 2565359426U, // <4,u,4,2>: Cost 3 vext1 <2,4,u,4>, <2,4,u,4>
- 2571332123U, // <4,u,4,3>: Cost 3 vext1 <3,4,u,4>, <3,4,u,4>
- 161926454U, // <4,u,4,4>: Cost 1 vdup0 RHS
- 1545194806U, // <4,u,4,5>: Cost 2 vext2 <0,2,4,u>, RHS
- 1705577782U, // <4,u,4,6>: Cost 2 vuzpl RHS, RHS
- 2718750801U, // <4,u,4,7>: Cost 3 vext3 <5,6,7,4>, <u,4,7,6>
- 161926454U, // <4,u,4,u>: Cost 1 vdup0 RHS
- 1479164006U, // <4,u,5,0>: Cost 2 vext1 <0,4,1,5>, LHS
- 1839650606U, // <4,u,5,1>: Cost 2 vzipl RHS, LHS
- 2565367502U, // <4,u,5,2>: Cost 3 vext1 <2,4,u,5>, <2,3,4,5>
- 3089777309U, // <4,u,5,3>: Cost 3 vtrnr <0,4,1,5>, LHS
- 1479167286U, // <4,u,5,4>: Cost 2 vext1 <0,4,1,5>, RHS
- 1839650970U, // <4,u,5,5>: Cost 2 vzipl RHS, RHS
- 1618172058U, // <4,u,5,6>: Cost 2 vext3 <1,2,3,4>, RHS
- 3089780265U, // <4,u,5,7>: Cost 3 vtrnr <0,4,1,5>, RHS
- 1618172076U, // <4,u,5,u>: Cost 2 vext3 <1,2,3,4>, RHS
- 1479688294U, // <4,u,6,0>: Cost 2 vext1 <0,4,u,6>, LHS
- 2553430774U, // <4,u,6,1>: Cost 3 vext1 <0,4,u,6>, <1,0,3,2>
- 1973868334U, // <4,u,6,2>: Cost 2 vtrnl RHS, LHS
- 1497606685U, // <4,u,6,3>: Cost 2 vext1 <3,4,u,6>, <3,4,u,6>
- 1479691574U, // <4,u,6,4>: Cost 2 vext1 <0,4,u,6>, RHS
- 1509552079U, // <4,u,6,5>: Cost 2 vext1 <5,4,u,6>, <5,4,u,6>
- 1973868698U, // <4,u,6,6>: Cost 2 vtrnl RHS, RHS
- 27705344U, // <4,u,6,7>: Cost 0 copy RHS
- 27705344U, // <4,u,6,u>: Cost 0 copy RHS
- 2565382246U, // <4,u,7,0>: Cost 3 vext1 <2,4,u,7>, LHS
- 2565383066U, // <4,u,7,1>: Cost 3 vext1 <2,4,u,7>, <1,2,3,4>
- 2565384005U, // <4,u,7,2>: Cost 3 vext1 <2,4,u,7>, <2,4,u,7>
- 2661405966U, // <4,u,7,3>: Cost 3 vext2 <7,3,4,u>, <7,3,4,u>
- 2565385526U, // <4,u,7,4>: Cost 3 vext1 <2,4,u,7>, RHS
- 2779321702U, // <4,u,7,5>: Cost 3 vuzpl RHS, <7,4,5,6>
- 2589274793U, // <4,u,7,6>: Cost 3 vext1 <6,4,u,7>, <6,4,u,7>
- 2779321964U, // <4,u,7,7>: Cost 3 vuzpl RHS, <7,7,7,7>
- 2565388078U, // <4,u,7,u>: Cost 3 vext1 <2,4,u,7>, LHS
- 1479704678U, // <4,u,u,0>: Cost 2 vext1 <0,4,u,u>, LHS
- 1545197358U, // <4,u,u,1>: Cost 2 vext2 <0,2,4,u>, LHS
- 1618172261U, // <4,u,u,2>: Cost 2 vext3 <1,2,3,4>, LHS
- 1497623071U, // <4,u,u,3>: Cost 2 vext1 <3,4,u,u>, <3,4,u,u>
- 161926454U, // <4,u,u,4>: Cost 1 vdup0 RHS
- 1545197722U, // <4,u,u,5>: Cost 2 vext2 <0,2,4,u>, RHS
- 1618172301U, // <4,u,u,6>: Cost 2 vext3 <1,2,3,4>, RHS
- 27705344U, // <4,u,u,7>: Cost 0 copy RHS
- 27705344U, // <4,u,u,u>: Cost 0 copy RHS
- 2687123456U, // <5,0,0,0>: Cost 3 vext3 <0,4,1,5>, <0,0,0,0>
- 2687123466U, // <5,0,0,1>: Cost 3 vext3 <0,4,1,5>, <0,0,1,1>
- 2687123476U, // <5,0,0,2>: Cost 3 vext3 <0,4,1,5>, <0,0,2,2>
- 3710599434U, // <5,0,0,3>: Cost 4 vext2 <3,2,5,0>, <0,3,2,5>
- 2642166098U, // <5,0,0,4>: Cost 3 vext2 <4,1,5,0>, <0,4,1,5>
- 3657060306U, // <5,0,0,5>: Cost 4 vext1 <5,5,0,0>, <5,5,0,0>
- 3292094923U, // <5,0,0,6>: Cost 4 vrev <0,5,6,0>
- 3669005700U, // <5,0,0,7>: Cost 4 vext1 <7,5,0,0>, <7,5,0,0>
- 2687123530U, // <5,0,0,u>: Cost 3 vext3 <0,4,1,5>, <0,0,u,2>
- 2559434854U, // <5,0,1,0>: Cost 3 vext1 <1,5,0,1>, LHS
- 2559435887U, // <5,0,1,1>: Cost 3 vext1 <1,5,0,1>, <1,5,0,1>
- 1613381734U, // <5,0,1,2>: Cost 2 vext3 <0,4,1,5>, LHS
- 3698656256U, // <5,0,1,3>: Cost 4 vext2 <1,2,5,0>, <1,3,5,7>
- 2559438134U, // <5,0,1,4>: Cost 3 vext1 <1,5,0,1>, RHS
- 2583326675U, // <5,0,1,5>: Cost 3 vext1 <5,5,0,1>, <5,5,0,1>
- 3715908851U, // <5,0,1,6>: Cost 4 vext2 <4,1,5,0>, <1,6,5,7>
- 3657069562U, // <5,0,1,7>: Cost 4 vext1 <5,5,0,1>, <7,0,1,2>
- 1613381788U, // <5,0,1,u>: Cost 2 vext3 <0,4,1,5>, LHS
- 2686017700U, // <5,0,2,0>: Cost 3 vext3 <0,2,4,5>, <0,2,0,2>
- 2685796528U, // <5,0,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5>
- 2698625208U, // <5,0,2,2>: Cost 3 vext3 <2,3,4,5>, <0,2,2,4>
- 2685944002U, // <5,0,2,3>: Cost 3 vext3 <0,2,3,5>, <0,2,3,5>
- 2686017739U, // <5,0,2,4>: Cost 3 vext3 <0,2,4,5>, <0,2,4,5>
- 2686091476U, // <5,0,2,5>: Cost 3 vext3 <0,2,5,5>, <0,2,5,5>
- 2725167324U, // <5,0,2,6>: Cost 3 vext3 <6,7,4,5>, <0,2,6,4>
- 2595280230U, // <5,0,2,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6>
- 2686312687U, // <5,0,2,u>: Cost 3 vext3 <0,2,u,5>, <0,2,u,5>
- 3760128248U, // <5,0,3,0>: Cost 4 vext3 <0,3,0,5>, <0,3,0,5>
- 3759685888U, // <5,0,3,1>: Cost 4 vext3 <0,2,3,5>, <0,3,1,4>
- 2686533898U, // <5,0,3,2>: Cost 3 vext3 <0,3,2,5>, <0,3,2,5>
- 3760349459U, // <5,0,3,3>: Cost 4 vext3 <0,3,3,5>, <0,3,3,5>
- 2638187004U, // <5,0,3,4>: Cost 3 vext2 <3,4,5,0>, <3,4,5,0>
- 3776348452U, // <5,0,3,5>: Cost 4 vext3 <3,0,4,5>, <0,3,5,4>
- 3713256094U, // <5,0,3,6>: Cost 4 vext2 <3,6,5,0>, <3,6,5,0>
- 3914064896U, // <5,0,3,7>: Cost 4 vuzpr <3,5,7,0>, <1,3,5,7>
- 2686976320U, // <5,0,3,u>: Cost 3 vext3 <0,3,u,5>, <0,3,u,5>
- 2559459430U, // <5,0,4,0>: Cost 3 vext1 <1,5,0,4>, LHS
- 1613381970U, // <5,0,4,1>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5>
- 2687123804U, // <5,0,4,2>: Cost 3 vext3 <0,4,1,5>, <0,4,2,6>
- 3761013092U, // <5,0,4,3>: Cost 4 vext3 <0,4,3,5>, <0,4,3,5>
- 2559462710U, // <5,0,4,4>: Cost 3 vext1 <1,5,0,4>, RHS
- 2638187830U, // <5,0,4,5>: Cost 3 vext2 <3,4,5,0>, RHS
- 3761234303U, // <5,0,4,6>: Cost 4 vext3 <0,4,6,5>, <0,4,6,5>
- 2646150600U, // <5,0,4,7>: Cost 3 vext2 <4,7,5,0>, <4,7,5,0>
- 1613381970U, // <5,0,4,u>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5>
- 3766763926U, // <5,0,5,0>: Cost 4 vext3 <1,4,0,5>, <0,5,0,1>
- 2919268454U, // <5,0,5,1>: Cost 3 vzipl <5,5,5,5>, LHS
- 3053486182U, // <5,0,5,2>: Cost 3 vtrnl <5,5,5,5>, LHS
- 3723210589U, // <5,0,5,3>: Cost 4 vext2 <5,3,5,0>, <5,3,5,0>
- 3766763966U, // <5,0,5,4>: Cost 4 vext3 <1,4,0,5>, <0,5,4,5>
- 2650796031U, // <5,0,5,5>: Cost 3 vext2 <5,5,5,0>, <5,5,5,0>
- 3719893090U, // <5,0,5,6>: Cost 4 vext2 <4,7,5,0>, <5,6,7,0>
- 3914067254U, // <5,0,5,7>: Cost 4 vuzpr <3,5,7,0>, RHS
- 2919269021U, // <5,0,5,u>: Cost 3 vzipl <5,5,5,5>, LHS
- 4047519744U, // <5,0,6,0>: Cost 4 vzipr <3,4,5,6>, <0,0,0,0>
- 2920038502U, // <5,0,6,1>: Cost 3 vzipl <5,6,7,0>, LHS
- 3759759871U, // <5,0,6,2>: Cost 4 vext3 <0,2,4,5>, <0,6,2,7>
- 3645164070U, // <5,0,6,3>: Cost 4 vext1 <3,5,0,6>, <3,5,0,6>
- 3762414095U, // <5,0,6,4>: Cost 4 vext3 <0,6,4,5>, <0,6,4,5>
- 3993780690U, // <5,0,6,5>: Cost 4 vzipl <5,6,7,0>, <0,5,6,7>
- 3719893816U, // <5,0,6,6>: Cost 4 vext2 <4,7,5,0>, <6,6,6,6>
- 2662077302U, // <5,0,6,7>: Cost 3 vext2 <7,4,5,0>, <6,7,4,5>
- 2920039069U, // <5,0,6,u>: Cost 3 vzipl <5,6,7,0>, LHS
- 2565455974U, // <5,0,7,0>: Cost 3 vext1 <2,5,0,7>, LHS
- 2565456790U, // <5,0,7,1>: Cost 3 vext1 <2,5,0,7>, <1,2,3,0>
- 2565457742U, // <5,0,7,2>: Cost 3 vext1 <2,5,0,7>, <2,5,0,7>
- 3639199894U, // <5,0,7,3>: Cost 4 vext1 <2,5,0,7>, <3,0,1,2>
- 2565459254U, // <5,0,7,4>: Cost 3 vext1 <2,5,0,7>, RHS
- 2589347938U, // <5,0,7,5>: Cost 3 vext1 <6,5,0,7>, <5,6,7,0>
- 2589348530U, // <5,0,7,6>: Cost 3 vext1 <6,5,0,7>, <6,5,0,7>
- 4188456422U, // <5,0,7,7>: Cost 4 vtrnr RHS, <2,0,5,7>
- 2565461806U, // <5,0,7,u>: Cost 3 vext1 <2,5,0,7>, LHS
- 2687124106U, // <5,0,u,0>: Cost 3 vext3 <0,4,1,5>, <0,u,0,2>
- 1616036502U, // <5,0,u,1>: Cost 2 vext3 <0,u,1,5>, <0,u,1,5>
- 1613382301U, // <5,0,u,2>: Cost 2 vext3 <0,4,1,5>, LHS
- 2689925800U, // <5,0,u,3>: Cost 3 vext3 <0,u,3,5>, <0,u,3,5>
- 2687124146U, // <5,0,u,4>: Cost 3 vext3 <0,4,1,5>, <0,u,4,6>
- 2638190746U, // <5,0,u,5>: Cost 3 vext2 <3,4,5,0>, RHS
- 2589356723U, // <5,0,u,6>: Cost 3 vext1 <6,5,0,u>, <6,5,0,u>
- 2595280230U, // <5,0,u,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6>
- 1613382355U, // <5,0,u,u>: Cost 2 vext3 <0,4,1,5>, LHS
- 2646818816U, // <5,1,0,0>: Cost 3 vext2 <4,u,5,1>, <0,0,0,0>
- 1573077094U, // <5,1,0,1>: Cost 2 vext2 <4,u,5,1>, LHS
- 2646818980U, // <5,1,0,2>: Cost 3 vext2 <4,u,5,1>, <0,2,0,2>
- 2687124214U, // <5,1,0,3>: Cost 3 vext3 <0,4,1,5>, <1,0,3,2>
- 2641510738U, // <5,1,0,4>: Cost 3 vext2 <4,0,5,1>, <0,4,1,5>
- 2641510814U, // <5,1,0,5>: Cost 3 vext2 <4,0,5,1>, <0,5,1,0>
- 3720561142U, // <5,1,0,6>: Cost 4 vext2 <4,u,5,1>, <0,6,1,7>
- 3298141357U, // <5,1,0,7>: Cost 4 vrev <1,5,7,0>
- 1573077661U, // <5,1,0,u>: Cost 2 vext2 <4,u,5,1>, LHS
- 2223891567U, // <5,1,1,0>: Cost 3 vrev <1,5,0,1>
- 2687124276U, // <5,1,1,1>: Cost 3 vext3 <0,4,1,5>, <1,1,1,1>
- 2646819734U, // <5,1,1,2>: Cost 3 vext2 <4,u,5,1>, <1,2,3,0>
- 2687124296U, // <5,1,1,3>: Cost 3 vext3 <0,4,1,5>, <1,1,3,3>
- 2691326803U, // <5,1,1,4>: Cost 3 vext3 <1,1,4,5>, <1,1,4,5>
- 2691400540U, // <5,1,1,5>: Cost 3 vext3 <1,1,5,5>, <1,1,5,5>
- 3765216101U, // <5,1,1,6>: Cost 4 vext3 <1,1,6,5>, <1,1,6,5>
- 3765289838U, // <5,1,1,7>: Cost 4 vext3 <1,1,7,5>, <1,1,7,5>
- 2687124341U, // <5,1,1,u>: Cost 3 vext3 <0,4,1,5>, <1,1,u,3>
- 3297641584U, // <5,1,2,0>: Cost 4 vrev <1,5,0,2>
- 3763520391U, // <5,1,2,1>: Cost 4 vext3 <0,u,1,5>, <1,2,1,3>
- 2646820456U, // <5,1,2,2>: Cost 3 vext2 <4,u,5,1>, <2,2,2,2>
- 2687124374U, // <5,1,2,3>: Cost 3 vext3 <0,4,1,5>, <1,2,3,0>
- 2691990436U, // <5,1,2,4>: Cost 3 vext3 <1,2,4,5>, <1,2,4,5>
- 2687124395U, // <5,1,2,5>: Cost 3 vext3 <0,4,1,5>, <1,2,5,3>
- 2646820794U, // <5,1,2,6>: Cost 3 vext2 <4,u,5,1>, <2,6,3,7>
- 3808199610U, // <5,1,2,7>: Cost 4 vext3 <u,3,4,5>, <1,2,7,0>
- 2687124419U, // <5,1,2,u>: Cost 3 vext3 <0,4,1,5>, <1,2,u,0>
- 2577440870U, // <5,1,3,0>: Cost 3 vext1 <4,5,1,3>, LHS
- 2687124440U, // <5,1,3,1>: Cost 3 vext3 <0,4,1,5>, <1,3,1,3>
- 3759686627U, // <5,1,3,2>: Cost 4 vext3 <0,2,3,5>, <1,3,2,5>
- 2692580332U, // <5,1,3,3>: Cost 3 vext3 <1,3,3,5>, <1,3,3,5>
- 2687124469U, // <5,1,3,4>: Cost 3 vext3 <0,4,1,5>, <1,3,4,5>
- 2685207552U, // <5,1,3,5>: Cost 3 vext3 <0,1,2,5>, <1,3,5,7>
- 3760866313U, // <5,1,3,6>: Cost 4 vext3 <0,4,1,5>, <1,3,6,7>
- 2692875280U, // <5,1,3,7>: Cost 3 vext3 <1,3,7,5>, <1,3,7,5>
- 2687124503U, // <5,1,3,u>: Cost 3 vext3 <0,4,1,5>, <1,3,u,3>
- 1567771538U, // <5,1,4,0>: Cost 2 vext2 <4,0,5,1>, <4,0,5,1>
- 2693096491U, // <5,1,4,1>: Cost 3 vext3 <1,4,1,5>, <1,4,1,5>
- 2693170228U, // <5,1,4,2>: Cost 3 vext3 <1,4,2,5>, <1,4,2,5>
- 2687124541U, // <5,1,4,3>: Cost 3 vext3 <0,4,1,5>, <1,4,3,5>
- 2646822096U, // <5,1,4,4>: Cost 3 vext2 <4,u,5,1>, <4,4,4,4>
- 1573080374U, // <5,1,4,5>: Cost 2 vext2 <4,u,5,1>, RHS
- 2646822260U, // <5,1,4,6>: Cost 3 vext2 <4,u,5,1>, <4,6,4,6>
- 3298174129U, // <5,1,4,7>: Cost 4 vrev <1,5,7,4>
- 1573080602U, // <5,1,4,u>: Cost 2 vext2 <4,u,5,1>, <4,u,5,1>
- 2687124591U, // <5,1,5,0>: Cost 3 vext3 <0,4,1,5>, <1,5,0,1>
- 2646822543U, // <5,1,5,1>: Cost 3 vext2 <4,u,5,1>, <5,1,0,1>
- 3760866433U, // <5,1,5,2>: Cost 4 vext3 <0,4,1,5>, <1,5,2,1>
- 2687124624U, // <5,1,5,3>: Cost 3 vext3 <0,4,1,5>, <1,5,3,7>
- 2687124631U, // <5,1,5,4>: Cost 3 vext3 <0,4,1,5>, <1,5,4,5>
- 2646822916U, // <5,1,5,5>: Cost 3 vext2 <4,u,5,1>, <5,5,5,5>
- 2646823010U, // <5,1,5,6>: Cost 3 vext2 <4,u,5,1>, <5,6,7,0>
- 2646823080U, // <5,1,5,7>: Cost 3 vext2 <4,u,5,1>, <5,7,5,7>
- 2687124663U, // <5,1,5,u>: Cost 3 vext3 <0,4,1,5>, <1,5,u,1>
- 2553577574U, // <5,1,6,0>: Cost 3 vext1 <0,5,1,6>, LHS
- 3763520719U, // <5,1,6,1>: Cost 4 vext3 <0,u,1,5>, <1,6,1,7>
- 2646823418U, // <5,1,6,2>: Cost 3 vext2 <4,u,5,1>, <6,2,7,3>
- 3760866529U, // <5,1,6,3>: Cost 4 vext3 <0,4,1,5>, <1,6,3,7>
- 2553580854U, // <5,1,6,4>: Cost 3 vext1 <0,5,1,6>, RHS
- 2687124723U, // <5,1,6,5>: Cost 3 vext3 <0,4,1,5>, <1,6,5,7>
- 2646823736U, // <5,1,6,6>: Cost 3 vext2 <4,u,5,1>, <6,6,6,6>
- 2646823758U, // <5,1,6,7>: Cost 3 vext2 <4,u,5,1>, <6,7,0,1>
- 2646823839U, // <5,1,6,u>: Cost 3 vext2 <4,u,5,1>, <6,u,0,1>
- 2559557734U, // <5,1,7,0>: Cost 3 vext1 <1,5,1,7>, LHS
- 2559558452U, // <5,1,7,1>: Cost 3 vext1 <1,5,1,7>, <1,1,1,1>
- 2571503270U, // <5,1,7,2>: Cost 3 vext1 <3,5,1,7>, <2,3,0,1>
- 2040971366U, // <5,1,7,3>: Cost 2 vtrnr RHS, LHS
- 2559561014U, // <5,1,7,4>: Cost 3 vext1 <1,5,1,7>, RHS
- 2595393232U, // <5,1,7,5>: Cost 3 vext1 <7,5,1,7>, <5,1,7,3>
- 4188455035U, // <5,1,7,6>: Cost 4 vtrnr RHS, <0,1,4,6>
- 2646824556U, // <5,1,7,7>: Cost 3 vext2 <4,u,5,1>, <7,7,7,7>
- 2040971371U, // <5,1,7,u>: Cost 2 vtrnr RHS, LHS
- 1591662326U, // <5,1,u,0>: Cost 2 vext2 <u,0,5,1>, <u,0,5,1>
- 1573082926U, // <5,1,u,1>: Cost 2 vext2 <4,u,5,1>, LHS
- 2695824760U, // <5,1,u,2>: Cost 3 vext3 <1,u,2,5>, <1,u,2,5>
- 2040979558U, // <5,1,u,3>: Cost 2 vtrnr RHS, LHS
- 2687124874U, // <5,1,u,4>: Cost 3 vext3 <0,4,1,5>, <1,u,4,5>
- 1573083290U, // <5,1,u,5>: Cost 2 vext2 <4,u,5,1>, RHS
- 2646825168U, // <5,1,u,6>: Cost 3 vext2 <4,u,5,1>, <u,6,3,7>
- 2646825216U, // <5,1,u,7>: Cost 3 vext2 <4,u,5,1>, <u,7,0,1>
- 2040979563U, // <5,1,u,u>: Cost 2 vtrnr RHS, LHS
- 3702652928U, // <5,2,0,0>: Cost 4 vext2 <1,u,5,2>, <0,0,0,0>
- 2628911206U, // <5,2,0,1>: Cost 3 vext2 <1,u,5,2>, LHS
- 2641518756U, // <5,2,0,2>: Cost 3 vext2 <4,0,5,2>, <0,2,0,2>
- 3759760847U, // <5,2,0,3>: Cost 4 vext3 <0,2,4,5>, <2,0,3,2>
- 3760866775U, // <5,2,0,4>: Cost 4 vext3 <0,4,1,5>, <2,0,4,1>
- 3759539680U, // <5,2,0,5>: Cost 4 vext3 <0,2,1,5>, <2,0,5,1>
- 3760866796U, // <5,2,0,6>: Cost 4 vext3 <0,4,1,5>, <2,0,6,4>
- 3304114054U, // <5,2,0,7>: Cost 4 vrev <2,5,7,0>
- 2628911773U, // <5,2,0,u>: Cost 3 vext2 <1,u,5,2>, LHS
- 2623603464U, // <5,2,1,0>: Cost 3 vext2 <1,0,5,2>, <1,0,5,2>
- 3698008921U, // <5,2,1,1>: Cost 4 vext2 <1,1,5,2>, <1,1,5,2>
- 3633325603U, // <5,2,1,2>: Cost 4 vext1 <1,5,2,1>, <2,1,3,5>
- 2687125027U, // <5,2,1,3>: Cost 3 vext3 <0,4,1,5>, <2,1,3,5>
- 3633327414U, // <5,2,1,4>: Cost 4 vext1 <1,5,2,1>, RHS
- 3759539760U, // <5,2,1,5>: Cost 4 vext3 <0,2,1,5>, <2,1,5,0>
- 3760866876U, // <5,2,1,6>: Cost 4 vext3 <0,4,1,5>, <2,1,6,3>
- 3304122247U, // <5,2,1,7>: Cost 4 vrev <2,5,7,1>
- 2687125072U, // <5,2,1,u>: Cost 3 vext3 <0,4,1,5>, <2,1,u,5>
- 3633332326U, // <5,2,2,0>: Cost 4 vext1 <1,5,2,2>, LHS
- 3759760992U, // <5,2,2,1>: Cost 4 vext3 <0,2,4,5>, <2,2,1,3>
- 2687125096U, // <5,2,2,2>: Cost 3 vext3 <0,4,1,5>, <2,2,2,2>
- 2687125106U, // <5,2,2,3>: Cost 3 vext3 <0,4,1,5>, <2,2,3,3>
- 2697963133U, // <5,2,2,4>: Cost 3 vext3 <2,2,4,5>, <2,2,4,5>
- 3759466120U, // <5,2,2,5>: Cost 4 vext3 <0,2,0,5>, <2,2,5,7>
- 3760866960U, // <5,2,2,6>: Cost 4 vext3 <0,4,1,5>, <2,2,6,6>
- 3771926168U, // <5,2,2,7>: Cost 4 vext3 <2,2,7,5>, <2,2,7,5>
- 2687125151U, // <5,2,2,u>: Cost 3 vext3 <0,4,1,5>, <2,2,u,3>
- 2687125158U, // <5,2,3,0>: Cost 3 vext3 <0,4,1,5>, <2,3,0,1>
- 2698405555U, // <5,2,3,1>: Cost 3 vext3 <2,3,1,5>, <2,3,1,5>
- 2577516238U, // <5,2,3,2>: Cost 3 vext1 <4,5,2,3>, <2,3,4,5>
- 3759687365U, // <5,2,3,3>: Cost 4 vext3 <0,2,3,5>, <2,3,3,5>
- 1624884942U, // <5,2,3,4>: Cost 2 vext3 <2,3,4,5>, <2,3,4,5>
- 2698700503U, // <5,2,3,5>: Cost 3 vext3 <2,3,5,5>, <2,3,5,5>
- 3772368608U, // <5,2,3,6>: Cost 4 vext3 <2,3,4,5>, <2,3,6,5>
- 3702655716U, // <5,2,3,7>: Cost 4 vext2 <1,u,5,2>, <3,7,3,7>
- 1625179890U, // <5,2,3,u>: Cost 2 vext3 <2,3,u,5>, <2,3,u,5>
- 2641521555U, // <5,2,4,0>: Cost 3 vext2 <4,0,5,2>, <4,0,5,2>
- 3772368642U, // <5,2,4,1>: Cost 4 vext3 <2,3,4,5>, <2,4,1,3>
- 2699142925U, // <5,2,4,2>: Cost 3 vext3 <2,4,2,5>, <2,4,2,5>
- 2698626838U, // <5,2,4,3>: Cost 3 vext3 <2,3,4,5>, <2,4,3,5>
- 2698626848U, // <5,2,4,4>: Cost 3 vext3 <2,3,4,5>, <2,4,4,6>
- 2628914486U, // <5,2,4,5>: Cost 3 vext2 <1,u,5,2>, RHS
- 2645503353U, // <5,2,4,6>: Cost 3 vext2 <4,6,5,2>, <4,6,5,2>
- 3304146826U, // <5,2,4,7>: Cost 4 vrev <2,5,7,4>
- 2628914729U, // <5,2,4,u>: Cost 3 vext2 <1,u,5,2>, RHS
- 2553643110U, // <5,2,5,0>: Cost 3 vext1 <0,5,2,5>, LHS
- 3758950227U, // <5,2,5,1>: Cost 4 vext3 <0,1,2,5>, <2,5,1,3>
- 3759761248U, // <5,2,5,2>: Cost 4 vext3 <0,2,4,5>, <2,5,2,7>
- 2982396006U, // <5,2,5,3>: Cost 3 vzipr <4,u,5,5>, LHS
- 2553646390U, // <5,2,5,4>: Cost 3 vext1 <0,5,2,5>, RHS
- 2553647108U, // <5,2,5,5>: Cost 3 vext1 <0,5,2,5>, <5,5,5,5>
- 3760867204U, // <5,2,5,6>: Cost 4 vext3 <0,4,1,5>, <2,5,6,7>
- 3702657141U, // <5,2,5,7>: Cost 4 vext2 <1,u,5,2>, <5,7,0,1>
- 2982396011U, // <5,2,5,u>: Cost 3 vzipr <4,u,5,5>, LHS
- 3627393126U, // <5,2,6,0>: Cost 4 vext1 <0,5,2,6>, LHS
- 3760867236U, // <5,2,6,1>: Cost 4 vext3 <0,4,1,5>, <2,6,1,3>
- 2645504506U, // <5,2,6,2>: Cost 3 vext2 <4,6,5,2>, <6,2,7,3>
- 2687125434U, // <5,2,6,3>: Cost 3 vext3 <0,4,1,5>, <2,6,3,7>
- 2700617665U, // <5,2,6,4>: Cost 3 vext3 <2,6,4,5>, <2,6,4,5>
- 3760867276U, // <5,2,6,5>: Cost 4 vext3 <0,4,1,5>, <2,6,5,7>
- 3763521493U, // <5,2,6,6>: Cost 4 vext3 <0,u,1,5>, <2,6,6,7>
- 3719246670U, // <5,2,6,7>: Cost 4 vext2 <4,6,5,2>, <6,7,0,1>
- 2687125479U, // <5,2,6,u>: Cost 3 vext3 <0,4,1,5>, <2,6,u,7>
- 2565603430U, // <5,2,7,0>: Cost 3 vext1 <2,5,2,7>, LHS
- 2553660150U, // <5,2,7,1>: Cost 3 vext1 <0,5,2,7>, <1,0,3,2>
- 2565605216U, // <5,2,7,2>: Cost 3 vext1 <2,5,2,7>, <2,5,2,7>
- 2961178726U, // <5,2,7,3>: Cost 3 vzipr <1,3,5,7>, LHS
- 2565606710U, // <5,2,7,4>: Cost 3 vext1 <2,5,2,7>, RHS
- 4034920552U, // <5,2,7,5>: Cost 4 vzipr <1,3,5,7>, <0,1,2,5>
- 3114713292U, // <5,2,7,6>: Cost 3 vtrnr RHS, <0,2,4,6>
- 3702658668U, // <5,2,7,7>: Cost 4 vext2 <1,u,5,2>, <7,7,7,7>
- 2961178731U, // <5,2,7,u>: Cost 3 vzipr <1,3,5,7>, LHS
- 2687125563U, // <5,2,u,0>: Cost 3 vext3 <0,4,1,5>, <2,u,0,1>
- 2628917038U, // <5,2,u,1>: Cost 3 vext2 <1,u,5,2>, LHS
- 2565613409U, // <5,2,u,2>: Cost 3 vext1 <2,5,2,u>, <2,5,2,u>
- 2687125592U, // <5,2,u,3>: Cost 3 vext3 <0,4,1,5>, <2,u,3,3>
- 1628203107U, // <5,2,u,4>: Cost 2 vext3 <2,u,4,5>, <2,u,4,5>
- 2628917402U, // <5,2,u,5>: Cost 3 vext2 <1,u,5,2>, RHS
- 2702092405U, // <5,2,u,6>: Cost 3 vext3 <2,u,6,5>, <2,u,6,5>
- 3304179598U, // <5,2,u,7>: Cost 4 vrev <2,5,7,u>
- 1628498055U, // <5,2,u,u>: Cost 2 vext3 <2,u,u,5>, <2,u,u,5>
- 3760867467U, // <5,3,0,0>: Cost 4 vext3 <0,4,1,5>, <3,0,0,0>
- 2687125654U, // <5,3,0,1>: Cost 3 vext3 <0,4,1,5>, <3,0,1,2>
- 3759761565U, // <5,3,0,2>: Cost 4 vext3 <0,2,4,5>, <3,0,2,0>
- 3633391766U, // <5,3,0,3>: Cost 4 vext1 <1,5,3,0>, <3,0,1,2>
- 2687125680U, // <5,3,0,4>: Cost 3 vext3 <0,4,1,5>, <3,0,4,1>
- 3760277690U, // <5,3,0,5>: Cost 4 vext3 <0,3,2,5>, <3,0,5,2>
- 3310013014U, // <5,3,0,6>: Cost 4 vrev <3,5,6,0>
- 2236344927U, // <5,3,0,7>: Cost 3 vrev <3,5,7,0>
- 2687125717U, // <5,3,0,u>: Cost 3 vext3 <0,4,1,5>, <3,0,u,2>
- 3760867551U, // <5,3,1,0>: Cost 4 vext3 <0,4,1,5>, <3,1,0,3>
- 3760867558U, // <5,3,1,1>: Cost 4 vext3 <0,4,1,5>, <3,1,1,1>
- 2624938923U, // <5,3,1,2>: Cost 3 vext2 <1,2,5,3>, <1,2,5,3>
- 2703198460U, // <5,3,1,3>: Cost 3 vext3 <3,1,3,5>, <3,1,3,5>
- 3760867587U, // <5,3,1,4>: Cost 4 vext3 <0,4,1,5>, <3,1,4,3>
- 2636219536U, // <5,3,1,5>: Cost 3 vext2 <3,1,5,3>, <1,5,3,7>
- 3698681075U, // <5,3,1,6>: Cost 4 vext2 <1,2,5,3>, <1,6,5,7>
- 2703493408U, // <5,3,1,7>: Cost 3 vext3 <3,1,7,5>, <3,1,7,5>
- 2628920721U, // <5,3,1,u>: Cost 3 vext2 <1,u,5,3>, <1,u,5,3>
- 3766765870U, // <5,3,2,0>: Cost 4 vext3 <1,4,0,5>, <3,2,0,1>
- 3698681379U, // <5,3,2,1>: Cost 4 vext2 <1,2,5,3>, <2,1,3,5>
- 3760867649U, // <5,3,2,2>: Cost 4 vext3 <0,4,1,5>, <3,2,2,2>
- 2698627404U, // <5,3,2,3>: Cost 3 vext3 <2,3,4,5>, <3,2,3,4>
- 2703935830U, // <5,3,2,4>: Cost 3 vext3 <3,2,4,5>, <3,2,4,5>
- 2698627422U, // <5,3,2,5>: Cost 3 vext3 <2,3,4,5>, <3,2,5,4>
- 3760867686U, // <5,3,2,6>: Cost 4 vext3 <0,4,1,5>, <3,2,6,3>
- 3769788783U, // <5,3,2,7>: Cost 4 vext3 <1,u,5,5>, <3,2,7,3>
- 2701945209U, // <5,3,2,u>: Cost 3 vext3 <2,u,4,5>, <3,2,u,4>
- 3760867711U, // <5,3,3,0>: Cost 4 vext3 <0,4,1,5>, <3,3,0,1>
- 2636220684U, // <5,3,3,1>: Cost 3 vext2 <3,1,5,3>, <3,1,5,3>
- 3772369298U, // <5,3,3,2>: Cost 4 vext3 <2,3,4,5>, <3,3,2,2>
- 2687125916U, // <5,3,3,3>: Cost 3 vext3 <0,4,1,5>, <3,3,3,3>
- 2704599463U, // <5,3,3,4>: Cost 3 vext3 <3,3,4,5>, <3,3,4,5>
- 2704673200U, // <5,3,3,5>: Cost 3 vext3 <3,3,5,5>, <3,3,5,5>
- 3709962935U, // <5,3,3,6>: Cost 4 vext2 <3,1,5,3>, <3,6,7,7>
- 3772369346U, // <5,3,3,7>: Cost 4 vext3 <2,3,4,5>, <3,3,7,5>
- 2704894411U, // <5,3,3,u>: Cost 3 vext3 <3,3,u,5>, <3,3,u,5>
- 2704968148U, // <5,3,4,0>: Cost 3 vext3 <3,4,0,5>, <3,4,0,5>
- 3698682850U, // <5,3,4,1>: Cost 4 vext2 <1,2,5,3>, <4,1,5,0>
- 2642857014U, // <5,3,4,2>: Cost 3 vext2 <4,2,5,3>, <4,2,5,3>
- 2705189359U, // <5,3,4,3>: Cost 3 vext3 <3,4,3,5>, <3,4,3,5>
- 2705263096U, // <5,3,4,4>: Cost 3 vext3 <3,4,4,5>, <3,4,4,5>
- 2685946370U, // <5,3,4,5>: Cost 3 vext3 <0,2,3,5>, <3,4,5,6>
- 3779152394U, // <5,3,4,6>: Cost 4 vext3 <3,4,6,5>, <3,4,6,5>
- 2236377699U, // <5,3,4,7>: Cost 3 vrev <3,5,7,4>
- 2687126045U, // <5,3,4,u>: Cost 3 vext3 <0,4,1,5>, <3,4,u,6>
- 2571632742U, // <5,3,5,0>: Cost 3 vext1 <3,5,3,5>, LHS
- 2559689870U, // <5,3,5,1>: Cost 3 vext1 <1,5,3,5>, <1,5,3,5>
- 2571634382U, // <5,3,5,2>: Cost 3 vext1 <3,5,3,5>, <2,3,4,5>
- 2571635264U, // <5,3,5,3>: Cost 3 vext1 <3,5,3,5>, <3,5,3,5>
- 2571636022U, // <5,3,5,4>: Cost 3 vext1 <3,5,3,5>, RHS
- 2559692804U, // <5,3,5,5>: Cost 3 vext1 <1,5,3,5>, <5,5,5,5>
- 3720581218U, // <5,3,5,6>: Cost 4 vext2 <4,u,5,3>, <5,6,7,0>
- 2236385892U, // <5,3,5,7>: Cost 3 vrev <3,5,7,5>
- 2571638574U, // <5,3,5,u>: Cost 3 vext1 <3,5,3,5>, LHS
- 2565668966U, // <5,3,6,0>: Cost 3 vext1 <2,5,3,6>, LHS
- 3633439887U, // <5,3,6,1>: Cost 4 vext1 <1,5,3,6>, <1,5,3,6>
- 2565670760U, // <5,3,6,2>: Cost 3 vext1 <2,5,3,6>, <2,5,3,6>
- 2565671426U, // <5,3,6,3>: Cost 3 vext1 <2,5,3,6>, <3,4,5,6>
- 2565672246U, // <5,3,6,4>: Cost 3 vext1 <2,5,3,6>, RHS
- 3639414630U, // <5,3,6,5>: Cost 4 vext1 <2,5,3,6>, <5,3,6,0>
- 4047521640U, // <5,3,6,6>: Cost 4 vzipr <3,4,5,6>, <2,5,3,6>
- 2725169844U, // <5,3,6,7>: Cost 3 vext3 <6,7,4,5>, <3,6,7,4>
- 2565674798U, // <5,3,6,u>: Cost 3 vext1 <2,5,3,6>, LHS
- 1485963366U, // <5,3,7,0>: Cost 2 vext1 <1,5,3,7>, LHS
- 1485964432U, // <5,3,7,1>: Cost 2 vext1 <1,5,3,7>, <1,5,3,7>
- 2559706728U, // <5,3,7,2>: Cost 3 vext1 <1,5,3,7>, <2,2,2,2>
- 2559707286U, // <5,3,7,3>: Cost 3 vext1 <1,5,3,7>, <3,0,1,2>
- 1485966646U, // <5,3,7,4>: Cost 2 vext1 <1,5,3,7>, RHS
- 2559708880U, // <5,3,7,5>: Cost 3 vext1 <1,5,3,7>, <5,1,7,3>
- 2601513466U, // <5,3,7,6>: Cost 3 vext1 <u,5,3,7>, <6,2,7,3>
- 3114714112U, // <5,3,7,7>: Cost 3 vtrnr RHS, <1,3,5,7>
- 1485969198U, // <5,3,7,u>: Cost 2 vext1 <1,5,3,7>, LHS
- 1485971558U, // <5,3,u,0>: Cost 2 vext1 <1,5,3,u>, LHS
- 1485972625U, // <5,3,u,1>: Cost 2 vext1 <1,5,3,u>, <1,5,3,u>
- 2559714920U, // <5,3,u,2>: Cost 3 vext1 <1,5,3,u>, <2,2,2,2>
- 2559715478U, // <5,3,u,3>: Cost 3 vext1 <1,5,3,u>, <3,0,1,2>
- 1485974838U, // <5,3,u,4>: Cost 2 vext1 <1,5,3,u>, RHS
- 2687126342U, // <5,3,u,5>: Cost 3 vext3 <0,4,1,5>, <3,u,5,6>
- 2601521658U, // <5,3,u,6>: Cost 3 vext1 <u,5,3,u>, <6,2,7,3>
- 2236410471U, // <5,3,u,7>: Cost 3 vrev <3,5,7,u>
- 1485977390U, // <5,3,u,u>: Cost 2 vext1 <1,5,3,u>, LHS
- 3627491430U, // <5,4,0,0>: Cost 4 vext1 <0,5,4,0>, LHS
- 2636890214U, // <5,4,0,1>: Cost 3 vext2 <3,2,5,4>, LHS
- 3703333028U, // <5,4,0,2>: Cost 4 vext2 <2,0,5,4>, <0,2,0,2>
- 3782249348U, // <5,4,0,3>: Cost 4 vext3 <4,0,3,5>, <4,0,3,5>
- 2642198866U, // <5,4,0,4>: Cost 3 vext2 <4,1,5,4>, <0,4,1,5>
- 2687126418U, // <5,4,0,5>: Cost 3 vext3 <0,4,1,5>, <4,0,5,1>
- 2242243887U, // <5,4,0,6>: Cost 3 vrev <4,5,6,0>
- 3316059448U, // <5,4,0,7>: Cost 4 vrev <4,5,7,0>
- 2636890781U, // <5,4,0,u>: Cost 3 vext2 <3,2,5,4>, LHS
- 2241809658U, // <5,4,1,0>: Cost 3 vrev <4,5,0,1>
- 3698025307U, // <5,4,1,1>: Cost 4 vext2 <1,1,5,4>, <1,1,5,4>
- 3698688940U, // <5,4,1,2>: Cost 4 vext2 <1,2,5,4>, <1,2,5,4>
- 3698689024U, // <5,4,1,3>: Cost 4 vext2 <1,2,5,4>, <1,3,5,7>
- 3700016206U, // <5,4,1,4>: Cost 4 vext2 <1,4,5,4>, <1,4,5,4>
- 2687126498U, // <5,4,1,5>: Cost 3 vext3 <0,4,1,5>, <4,1,5,0>
- 3760868336U, // <5,4,1,6>: Cost 4 vext3 <0,4,1,5>, <4,1,6,5>
- 3316067641U, // <5,4,1,7>: Cost 4 vrev <4,5,7,1>
- 2242399554U, // <5,4,1,u>: Cost 3 vrev <4,5,u,1>
- 3703334371U, // <5,4,2,0>: Cost 4 vext2 <2,0,5,4>, <2,0,5,4>
- 3703998004U, // <5,4,2,1>: Cost 4 vext2 <2,1,5,4>, <2,1,5,4>
- 3704661637U, // <5,4,2,2>: Cost 4 vext2 <2,2,5,4>, <2,2,5,4>
- 2636891854U, // <5,4,2,3>: Cost 3 vext2 <3,2,5,4>, <2,3,4,5>
- 3705988903U, // <5,4,2,4>: Cost 4 vext2 <2,4,5,4>, <2,4,5,4>
- 2698628150U, // <5,4,2,5>: Cost 3 vext3 <2,3,4,5>, <4,2,5,3>
- 3760868415U, // <5,4,2,6>: Cost 4 vext3 <0,4,1,5>, <4,2,6,3>
- 3783871562U, // <5,4,2,7>: Cost 4 vext3 <4,2,7,5>, <4,2,7,5>
- 2666752099U, // <5,4,2,u>: Cost 3 vext2 <u,2,5,4>, <2,u,4,5>
- 3639459942U, // <5,4,3,0>: Cost 4 vext1 <2,5,4,3>, LHS
- 3709970701U, // <5,4,3,1>: Cost 4 vext2 <3,1,5,4>, <3,1,5,4>
- 2636892510U, // <5,4,3,2>: Cost 3 vext2 <3,2,5,4>, <3,2,5,4>
- 3710634396U, // <5,4,3,3>: Cost 4 vext2 <3,2,5,4>, <3,3,3,3>
- 2638219776U, // <5,4,3,4>: Cost 3 vext2 <3,4,5,4>, <3,4,5,4>
- 3766987908U, // <5,4,3,5>: Cost 4 vext3 <1,4,3,5>, <4,3,5,0>
- 2710719634U, // <5,4,3,6>: Cost 3 vext3 <4,3,6,5>, <4,3,6,5>
- 3914097664U, // <5,4,3,7>: Cost 4 vuzpr <3,5,7,4>, <1,3,5,7>
- 2640874308U, // <5,4,3,u>: Cost 3 vext2 <3,u,5,4>, <3,u,5,4>
- 2583642214U, // <5,4,4,0>: Cost 3 vext1 <5,5,4,4>, LHS
- 2642201574U, // <5,4,4,1>: Cost 3 vext2 <4,1,5,4>, <4,1,5,4>
- 3710635062U, // <5,4,4,2>: Cost 4 vext2 <3,2,5,4>, <4,2,5,3>
- 3717270664U, // <5,4,4,3>: Cost 4 vext2 <4,3,5,4>, <4,3,5,4>
- 2713963728U, // <5,4,4,4>: Cost 3 vext3 <4,u,5,5>, <4,4,4,4>
- 1637567706U, // <5,4,4,5>: Cost 2 vext3 <4,4,5,5>, <4,4,5,5>
- 2242276659U, // <5,4,4,6>: Cost 3 vrev <4,5,6,4>
- 2646183372U, // <5,4,4,7>: Cost 3 vext2 <4,7,5,4>, <4,7,5,4>
- 1637788917U, // <5,4,4,u>: Cost 2 vext3 <4,4,u,5>, <4,4,u,5>
- 2559762534U, // <5,4,5,0>: Cost 3 vext1 <1,5,4,5>, LHS
- 2559763607U, // <5,4,5,1>: Cost 3 vext1 <1,5,4,5>, <1,5,4,5>
- 2698628366U, // <5,4,5,2>: Cost 3 vext3 <2,3,4,5>, <4,5,2,3>
- 3633506454U, // <5,4,5,3>: Cost 4 vext1 <1,5,4,5>, <3,0,1,2>
- 2559765814U, // <5,4,5,4>: Cost 3 vext1 <1,5,4,5>, RHS
- 2583654395U, // <5,4,5,5>: Cost 3 vext1 <5,5,4,5>, <5,5,4,5>
- 1613385014U, // <5,4,5,6>: Cost 2 vext3 <0,4,1,5>, RHS
- 3901639990U, // <5,4,5,7>: Cost 4 vuzpr <1,5,0,4>, RHS
- 1613385032U, // <5,4,5,u>: Cost 2 vext3 <0,4,1,5>, RHS
- 2559770726U, // <5,4,6,0>: Cost 3 vext1 <1,5,4,6>, LHS
- 2559771648U, // <5,4,6,1>: Cost 3 vext1 <1,5,4,6>, <1,3,5,7>
- 3633514088U, // <5,4,6,2>: Cost 4 vext1 <1,5,4,6>, <2,2,2,2>
- 2571717122U, // <5,4,6,3>: Cost 3 vext1 <3,5,4,6>, <3,4,5,6>
- 2559774006U, // <5,4,6,4>: Cost 3 vext1 <1,5,4,6>, RHS
- 2712636796U, // <5,4,6,5>: Cost 3 vext3 <4,6,5,5>, <4,6,5,5>
- 3760868743U, // <5,4,6,6>: Cost 4 vext3 <0,4,1,5>, <4,6,6,7>
- 2712784270U, // <5,4,6,7>: Cost 3 vext3 <4,6,7,5>, <4,6,7,5>
- 2559776558U, // <5,4,6,u>: Cost 3 vext1 <1,5,4,6>, LHS
- 2565750886U, // <5,4,7,0>: Cost 3 vext1 <2,5,4,7>, LHS
- 2565751706U, // <5,4,7,1>: Cost 3 vext1 <2,5,4,7>, <1,2,3,4>
- 2565752690U, // <5,4,7,2>: Cost 3 vext1 <2,5,4,7>, <2,5,4,7>
- 2571725387U, // <5,4,7,3>: Cost 3 vext1 <3,5,4,7>, <3,5,4,7>
- 2565754166U, // <5,4,7,4>: Cost 3 vext1 <2,5,4,7>, RHS
- 3114713426U, // <5,4,7,5>: Cost 3 vtrnr RHS, <0,4,1,5>
- 94817590U, // <5,4,7,6>: Cost 1 vrev RHS
- 2595616175U, // <5,4,7,7>: Cost 3 vext1 <7,5,4,7>, <7,5,4,7>
- 94965064U, // <5,4,7,u>: Cost 1 vrev RHS
- 2559787110U, // <5,4,u,0>: Cost 3 vext1 <1,5,4,u>, LHS
- 2559788186U, // <5,4,u,1>: Cost 3 vext1 <1,5,4,u>, <1,5,4,u>
- 2242014483U, // <5,4,u,2>: Cost 3 vrev <4,5,2,u>
- 2667419628U, // <5,4,u,3>: Cost 3 vext2 <u,3,5,4>, <u,3,5,4>
- 2559790390U, // <5,4,u,4>: Cost 3 vext1 <1,5,4,u>, RHS
- 1640222238U, // <5,4,u,5>: Cost 2 vext3 <4,u,5,5>, <4,u,5,5>
- 94825783U, // <5,4,u,6>: Cost 1 vrev RHS
- 2714111536U, // <5,4,u,7>: Cost 3 vext3 <4,u,7,5>, <4,u,7,5>
- 94973257U, // <5,4,u,u>: Cost 1 vrev RHS
- 2646851584U, // <5,5,0,0>: Cost 3 vext2 <4,u,5,5>, <0,0,0,0>
- 1573109862U, // <5,5,0,1>: Cost 2 vext2 <4,u,5,5>, LHS
- 2646851748U, // <5,5,0,2>: Cost 3 vext2 <4,u,5,5>, <0,2,0,2>
- 3760279130U, // <5,5,0,3>: Cost 4 vext3 <0,3,2,5>, <5,0,3,2>
- 2687127138U, // <5,5,0,4>: Cost 3 vext3 <0,4,1,5>, <5,0,4,1>
- 2248142847U, // <5,5,0,5>: Cost 3 vrev <5,5,5,0>
- 3720593910U, // <5,5,0,6>: Cost 4 vext2 <4,u,5,5>, <0,6,1,7>
- 4182502710U, // <5,5,0,7>: Cost 4 vtrnr <3,5,7,0>, RHS
- 1573110429U, // <5,5,0,u>: Cost 2 vext2 <4,u,5,5>, LHS
- 2646852342U, // <5,5,1,0>: Cost 3 vext2 <4,u,5,5>, <1,0,3,2>
- 2624291676U, // <5,5,1,1>: Cost 3 vext2 <1,1,5,5>, <1,1,5,5>
- 2646852502U, // <5,5,1,2>: Cost 3 vext2 <4,u,5,5>, <1,2,3,0>
- 2646852568U, // <5,5,1,3>: Cost 3 vext2 <4,u,5,5>, <1,3,1,3>
- 2715217591U, // <5,5,1,4>: Cost 3 vext3 <5,1,4,5>, <5,1,4,5>
- 2628936848U, // <5,5,1,5>: Cost 3 vext2 <1,u,5,5>, <1,5,3,7>
- 3698033907U, // <5,5,1,6>: Cost 4 vext2 <1,1,5,5>, <1,6,5,7>
- 2713964240U, // <5,5,1,7>: Cost 3 vext3 <4,u,5,5>, <5,1,7,3>
- 2628937107U, // <5,5,1,u>: Cost 3 vext2 <1,u,5,5>, <1,u,5,5>
- 3645497446U, // <5,5,2,0>: Cost 4 vext1 <3,5,5,2>, LHS
- 3760869099U, // <5,5,2,1>: Cost 4 vext3 <0,4,1,5>, <5,2,1,3>
- 2646853224U, // <5,5,2,2>: Cost 3 vext2 <4,u,5,5>, <2,2,2,2>
- 2698628862U, // <5,5,2,3>: Cost 3 vext3 <2,3,4,5>, <5,2,3,4>
- 3772370694U, // <5,5,2,4>: Cost 4 vext3 <2,3,4,5>, <5,2,4,3>
- 2713964303U, // <5,5,2,5>: Cost 3 vext3 <4,u,5,5>, <5,2,5,3>
- 2646853562U, // <5,5,2,6>: Cost 3 vext2 <4,u,5,5>, <2,6,3,7>
- 4038198272U, // <5,5,2,7>: Cost 4 vzipr <1,u,5,2>, <1,3,5,7>
- 2701946667U, // <5,5,2,u>: Cost 3 vext3 <2,u,4,5>, <5,2,u,4>
- 2646853782U, // <5,5,3,0>: Cost 3 vext2 <4,u,5,5>, <3,0,1,2>
- 3698034922U, // <5,5,3,1>: Cost 4 vext2 <1,1,5,5>, <3,1,1,5>
- 3702679919U, // <5,5,3,2>: Cost 4 vext2 <1,u,5,5>, <3,2,7,3>
- 2637564336U, // <5,5,3,3>: Cost 3 vext2 <3,3,5,5>, <3,3,5,5>
- 2646854146U, // <5,5,3,4>: Cost 3 vext2 <4,u,5,5>, <3,4,5,6>
- 2638891602U, // <5,5,3,5>: Cost 3 vext2 <3,5,5,5>, <3,5,5,5>
- 3702680247U, // <5,5,3,6>: Cost 4 vext2 <1,u,5,5>, <3,6,7,7>
- 3702680259U, // <5,5,3,7>: Cost 4 vext2 <1,u,5,5>, <3,7,0,1>
- 2646854430U, // <5,5,3,u>: Cost 3 vext2 <4,u,5,5>, <3,u,1,2>
- 2646854546U, // <5,5,4,0>: Cost 3 vext2 <4,u,5,5>, <4,0,5,1>
- 2642209767U, // <5,5,4,1>: Cost 3 vext2 <4,1,5,5>, <4,1,5,5>
- 3711306806U, // <5,5,4,2>: Cost 4 vext2 <3,3,5,5>, <4,2,5,3>
- 3645516369U, // <5,5,4,3>: Cost 4 vext1 <3,5,5,4>, <3,5,5,4>
- 1570458842U, // <5,5,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5>
- 1573113142U, // <5,5,4,5>: Cost 2 vext2 <4,u,5,5>, RHS
- 2645527932U, // <5,5,4,6>: Cost 3 vext2 <4,6,5,5>, <4,6,5,5>
- 2713964486U, // <5,5,4,7>: Cost 3 vext3 <4,u,5,5>, <5,4,7,6>
- 1573113374U, // <5,5,4,u>: Cost 2 vext2 <4,u,5,5>, <4,u,5,5>
- 1509982310U, // <5,5,5,0>: Cost 2 vext1 <5,5,5,5>, LHS
- 2646855376U, // <5,5,5,1>: Cost 3 vext2 <4,u,5,5>, <5,1,7,3>
- 2583725672U, // <5,5,5,2>: Cost 3 vext1 <5,5,5,5>, <2,2,2,2>
- 2583726230U, // <5,5,5,3>: Cost 3 vext1 <5,5,5,5>, <3,0,1,2>
- 1509985590U, // <5,5,5,4>: Cost 2 vext1 <5,5,5,5>, RHS
- 229035318U, // <5,5,5,5>: Cost 1 vdup1 RHS
- 2646855778U, // <5,5,5,6>: Cost 3 vext2 <4,u,5,5>, <5,6,7,0>
- 2646855848U, // <5,5,5,7>: Cost 3 vext2 <4,u,5,5>, <5,7,5,7>
- 229035318U, // <5,5,5,u>: Cost 1 vdup1 RHS
- 2577760358U, // <5,5,6,0>: Cost 3 vext1 <4,5,5,6>, LHS
- 3633587361U, // <5,5,6,1>: Cost 4 vext1 <1,5,5,6>, <1,5,5,6>
- 2646856186U, // <5,5,6,2>: Cost 3 vext2 <4,u,5,5>, <6,2,7,3>
- 3633588738U, // <5,5,6,3>: Cost 4 vext1 <1,5,5,6>, <3,4,5,6>
- 2718535756U, // <5,5,6,4>: Cost 3 vext3 <5,6,4,5>, <5,6,4,5>
- 2644202223U, // <5,5,6,5>: Cost 3 vext2 <4,4,5,5>, <6,5,7,5>
- 2973780482U, // <5,5,6,6>: Cost 3 vzipr <3,4,5,6>, <3,4,5,6>
- 2646856526U, // <5,5,6,7>: Cost 3 vext2 <4,u,5,5>, <6,7,0,1>
- 2646856607U, // <5,5,6,u>: Cost 3 vext2 <4,u,5,5>, <6,u,0,1>
- 2571796582U, // <5,5,7,0>: Cost 3 vext1 <3,5,5,7>, LHS
- 3633595392U, // <5,5,7,1>: Cost 4 vext1 <1,5,5,7>, <1,3,5,7>
- 2571798222U, // <5,5,7,2>: Cost 3 vext1 <3,5,5,7>, <2,3,4,5>
- 2571799124U, // <5,5,7,3>: Cost 3 vext1 <3,5,5,7>, <3,5,5,7>
- 2571799862U, // <5,5,7,4>: Cost 3 vext1 <3,5,5,7>, RHS
- 3114717188U, // <5,5,7,5>: Cost 3 vtrnr RHS, <5,5,5,5>
- 4034923010U, // <5,5,7,6>: Cost 4 vzipr <1,3,5,7>, <3,4,5,6>
- 2040974646U, // <5,5,7,7>: Cost 2 vtrnr RHS, RHS
- 2040974647U, // <5,5,7,u>: Cost 2 vtrnr RHS, RHS
- 1509982310U, // <5,5,u,0>: Cost 2 vext1 <5,5,5,5>, LHS
- 1573115694U, // <5,5,u,1>: Cost 2 vext2 <4,u,5,5>, LHS
- 2571806414U, // <5,5,u,2>: Cost 3 vext1 <3,5,5,u>, <2,3,4,5>
- 2571807317U, // <5,5,u,3>: Cost 3 vext1 <3,5,5,u>, <3,5,5,u>
- 1509985590U, // <5,5,u,4>: Cost 2 vext1 <5,5,5,5>, RHS
- 229035318U, // <5,5,u,5>: Cost 1 vdup1 RHS
- 2646857936U, // <5,5,u,6>: Cost 3 vext2 <4,u,5,5>, <u,6,3,7>
- 2040982838U, // <5,5,u,7>: Cost 2 vtrnr RHS, RHS
- 229035318U, // <5,5,u,u>: Cost 1 vdup1 RHS
- 2638233600U, // <5,6,0,0>: Cost 3 vext2 <3,4,5,6>, <0,0,0,0>
- 1564491878U, // <5,6,0,1>: Cost 2 vext2 <3,4,5,6>, LHS
- 2632261796U, // <5,6,0,2>: Cost 3 vext2 <2,4,5,6>, <0,2,0,2>
- 2638233856U, // <5,6,0,3>: Cost 3 vext2 <3,4,5,6>, <0,3,1,4>
- 2638233938U, // <5,6,0,4>: Cost 3 vext2 <3,4,5,6>, <0,4,1,5>
- 3706003885U, // <5,6,0,5>: Cost 4 vext2 <2,4,5,6>, <0,5,2,6>
- 3706003967U, // <5,6,0,6>: Cost 4 vext2 <2,4,5,6>, <0,6,2,7>
- 4047473974U, // <5,6,0,7>: Cost 4 vzipr <3,4,5,0>, RHS
- 1564492445U, // <5,6,0,u>: Cost 2 vext2 <3,4,5,6>, LHS
- 2638234358U, // <5,6,1,0>: Cost 3 vext2 <3,4,5,6>, <1,0,3,2>
- 2638234420U, // <5,6,1,1>: Cost 3 vext2 <3,4,5,6>, <1,1,1,1>
- 2638234518U, // <5,6,1,2>: Cost 3 vext2 <3,4,5,6>, <1,2,3,0>
- 2638234584U, // <5,6,1,3>: Cost 3 vext2 <3,4,5,6>, <1,3,1,3>
- 2626290768U, // <5,6,1,4>: Cost 3 vext2 <1,4,5,6>, <1,4,5,6>
- 2638234768U, // <5,6,1,5>: Cost 3 vext2 <3,4,5,6>, <1,5,3,7>
- 3700032719U, // <5,6,1,6>: Cost 4 vext2 <1,4,5,6>, <1,6,1,7>
- 2982366518U, // <5,6,1,7>: Cost 3 vzipr <4,u,5,1>, RHS
- 2628945300U, // <5,6,1,u>: Cost 3 vext2 <1,u,5,6>, <1,u,5,6>
- 3706004925U, // <5,6,2,0>: Cost 4 vext2 <2,4,5,6>, <2,0,1,2>
- 3711976966U, // <5,6,2,1>: Cost 4 vext2 <3,4,5,6>, <2,1,0,3>
- 2638235240U, // <5,6,2,2>: Cost 3 vext2 <3,4,5,6>, <2,2,2,2>
- 2638235302U, // <5,6,2,3>: Cost 3 vext2 <3,4,5,6>, <2,3,0,1>
- 2632263465U, // <5,6,2,4>: Cost 3 vext2 <2,4,5,6>, <2,4,5,6>
- 2638235496U, // <5,6,2,5>: Cost 3 vext2 <3,4,5,6>, <2,5,3,6>
- 2638235578U, // <5,6,2,6>: Cost 3 vext2 <3,4,5,6>, <2,6,3,7>
- 2713965050U, // <5,6,2,7>: Cost 3 vext3 <4,u,5,5>, <6,2,7,3>
- 2634917997U, // <5,6,2,u>: Cost 3 vext2 <2,u,5,6>, <2,u,5,6>
- 2638235798U, // <5,6,3,0>: Cost 3 vext2 <3,4,5,6>, <3,0,1,2>
- 3711977695U, // <5,6,3,1>: Cost 4 vext2 <3,4,5,6>, <3,1,0,3>
- 3710650720U, // <5,6,3,2>: Cost 4 vext2 <3,2,5,6>, <3,2,5,6>
- 2638236060U, // <5,6,3,3>: Cost 3 vext2 <3,4,5,6>, <3,3,3,3>
- 1564494338U, // <5,6,3,4>: Cost 2 vext2 <3,4,5,6>, <3,4,5,6>
- 2638236234U, // <5,6,3,5>: Cost 3 vext2 <3,4,5,6>, <3,5,4,6>
- 3711978104U, // <5,6,3,6>: Cost 4 vext2 <3,4,5,6>, <3,6,0,7>
- 4034227510U, // <5,6,3,7>: Cost 4 vzipr <1,2,5,3>, RHS
- 1567148870U, // <5,6,3,u>: Cost 2 vext2 <3,u,5,6>, <3,u,5,6>
- 2577817702U, // <5,6,4,0>: Cost 3 vext1 <4,5,6,4>, LHS
- 3700034544U, // <5,6,4,1>: Cost 4 vext2 <1,4,5,6>, <4,1,6,5>
- 2723033713U, // <5,6,4,2>: Cost 3 vext3 <6,4,2,5>, <6,4,2,5>
- 2638236818U, // <5,6,4,3>: Cost 3 vext2 <3,4,5,6>, <4,3,6,5>
- 2644208859U, // <5,6,4,4>: Cost 3 vext2 <4,4,5,6>, <4,4,5,6>
- 1564495158U, // <5,6,4,5>: Cost 2 vext2 <3,4,5,6>, RHS
- 2645536125U, // <5,6,4,6>: Cost 3 vext2 <4,6,5,6>, <4,6,5,6>
- 2723402398U, // <5,6,4,7>: Cost 3 vext3 <6,4,7,5>, <6,4,7,5>
- 1564495401U, // <5,6,4,u>: Cost 2 vext2 <3,4,5,6>, RHS
- 2577825894U, // <5,6,5,0>: Cost 3 vext1 <4,5,6,5>, LHS
- 2662125264U, // <5,6,5,1>: Cost 3 vext2 <7,4,5,6>, <5,1,7,3>
- 3775836867U, // <5,6,5,2>: Cost 4 vext3 <2,u,6,5>, <6,5,2,6>
- 3711979343U, // <5,6,5,3>: Cost 4 vext2 <3,4,5,6>, <5,3,3,4>
- 2650181556U, // <5,6,5,4>: Cost 3 vext2 <5,4,5,6>, <5,4,5,6>
- 2662125572U, // <5,6,5,5>: Cost 3 vext2 <7,4,5,6>, <5,5,5,5>
- 2638237732U, // <5,6,5,6>: Cost 3 vext2 <3,4,5,6>, <5,6,0,1>
- 2982399286U, // <5,6,5,7>: Cost 3 vzipr <4,u,5,5>, RHS
- 2982399287U, // <5,6,5,u>: Cost 3 vzipr <4,u,5,5>, RHS
- 2583806054U, // <5,6,6,0>: Cost 3 vext1 <5,5,6,6>, LHS
- 3711979910U, // <5,6,6,1>: Cost 4 vext2 <3,4,5,6>, <6,1,3,4>
- 2662126074U, // <5,6,6,2>: Cost 3 vext2 <7,4,5,6>, <6,2,7,3>
- 2583808514U, // <5,6,6,3>: Cost 3 vext1 <5,5,6,6>, <3,4,5,6>
- 2583809334U, // <5,6,6,4>: Cost 3 vext1 <5,5,6,6>, RHS
- 2583810062U, // <5,6,6,5>: Cost 3 vext1 <5,5,6,6>, <5,5,6,6>
- 2638238520U, // <5,6,6,6>: Cost 3 vext2 <3,4,5,6>, <6,6,6,6>
- 2973781302U, // <5,6,6,7>: Cost 3 vzipr <3,4,5,6>, RHS
- 2973781303U, // <5,6,6,u>: Cost 3 vzipr <3,4,5,6>, RHS
- 430358630U, // <5,6,7,0>: Cost 1 vext1 RHS, LHS
- 1504101110U, // <5,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2>
- 1504101992U, // <5,6,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
- 1504102550U, // <5,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2>
- 430361910U, // <5,6,7,4>: Cost 1 vext1 RHS, RHS
- 1504104390U, // <5,6,7,5>: Cost 2 vext1 RHS, <5,4,7,6>
- 1504105272U, // <5,6,7,6>: Cost 2 vext1 RHS, <6,6,6,6>
- 1504106092U, // <5,6,7,7>: Cost 2 vext1 RHS, <7,7,7,7>
- 430364462U, // <5,6,7,u>: Cost 1 vext1 RHS, LHS
- 430366822U, // <5,6,u,0>: Cost 1 vext1 RHS, LHS
- 1564497710U, // <5,6,u,1>: Cost 2 vext2 <3,4,5,6>, LHS
- 1504110184U, // <5,6,u,2>: Cost 2 vext1 RHS, <2,2,2,2>
- 1504110742U, // <5,6,u,3>: Cost 2 vext1 RHS, <3,0,1,2>
- 430370103U, // <5,6,u,4>: Cost 1 vext1 RHS, RHS
- 1564498074U, // <5,6,u,5>: Cost 2 vext2 <3,4,5,6>, RHS
- 1504113146U, // <5,6,u,6>: Cost 2 vext1 RHS, <6,2,7,3>
- 1504113658U, // <5,6,u,7>: Cost 2 vext1 RHS, <7,0,1,2>
- 430372654U, // <5,6,u,u>: Cost 1 vext1 RHS, LHS
- 2625634304U, // <5,7,0,0>: Cost 3 vext2 <1,3,5,7>, <0,0,0,0>
- 1551892582U, // <5,7,0,1>: Cost 2 vext2 <1,3,5,7>, LHS
- 2625634468U, // <5,7,0,2>: Cost 3 vext2 <1,3,5,7>, <0,2,0,2>
- 2571889247U, // <5,7,0,3>: Cost 3 vext1 <3,5,7,0>, <3,5,7,0>
- 2625634642U, // <5,7,0,4>: Cost 3 vext2 <1,3,5,7>, <0,4,1,5>
- 2595778728U, // <5,7,0,5>: Cost 3 vext1 <7,5,7,0>, <5,7,5,7>
- 3699376639U, // <5,7,0,6>: Cost 4 vext2 <1,3,5,7>, <0,6,2,7>
- 2260235715U, // <5,7,0,7>: Cost 3 vrev <7,5,7,0>
- 1551893149U, // <5,7,0,u>: Cost 2 vext2 <1,3,5,7>, LHS
- 2625635062U, // <5,7,1,0>: Cost 3 vext2 <1,3,5,7>, <1,0,3,2>
- 2624308020U, // <5,7,1,1>: Cost 3 vext2 <1,1,5,7>, <1,1,1,1>
- 2625635222U, // <5,7,1,2>: Cost 3 vext2 <1,3,5,7>, <1,2,3,0>
- 1551893504U, // <5,7,1,3>: Cost 2 vext2 <1,3,5,7>, <1,3,5,7>
- 2571898166U, // <5,7,1,4>: Cost 3 vext1 <3,5,7,1>, RHS
- 2625635472U, // <5,7,1,5>: Cost 3 vext2 <1,3,5,7>, <1,5,3,7>
- 2627626227U, // <5,7,1,6>: Cost 3 vext2 <1,6,5,7>, <1,6,5,7>
- 3702031684U, // <5,7,1,7>: Cost 4 vext2 <1,7,5,7>, <1,7,5,7>
- 1555211669U, // <5,7,1,u>: Cost 2 vext2 <1,u,5,7>, <1,u,5,7>
- 2629617126U, // <5,7,2,0>: Cost 3 vext2 <2,0,5,7>, <2,0,5,7>
- 3699377670U, // <5,7,2,1>: Cost 4 vext2 <1,3,5,7>, <2,1,0,3>
- 2625635944U, // <5,7,2,2>: Cost 3 vext2 <1,3,5,7>, <2,2,2,2>
- 2625636006U, // <5,7,2,3>: Cost 3 vext2 <1,3,5,7>, <2,3,0,1>
- 2632271658U, // <5,7,2,4>: Cost 3 vext2 <2,4,5,7>, <2,4,5,7>
- 2625636201U, // <5,7,2,5>: Cost 3 vext2 <1,3,5,7>, <2,5,3,7>
- 2625636282U, // <5,7,2,6>: Cost 3 vext2 <1,3,5,7>, <2,6,3,7>
- 3708004381U, // <5,7,2,7>: Cost 4 vext2 <2,7,5,7>, <2,7,5,7>
- 2625636411U, // <5,7,2,u>: Cost 3 vext2 <1,3,5,7>, <2,u,0,1>
- 2625636502U, // <5,7,3,0>: Cost 3 vext2 <1,3,5,7>, <3,0,1,2>
- 2625636604U, // <5,7,3,1>: Cost 3 vext2 <1,3,5,7>, <3,1,3,5>
- 3699378478U, // <5,7,3,2>: Cost 4 vext2 <1,3,5,7>, <3,2,0,1>
- 2625636764U, // <5,7,3,3>: Cost 3 vext2 <1,3,5,7>, <3,3,3,3>
- 2625636866U, // <5,7,3,4>: Cost 3 vext2 <1,3,5,7>, <3,4,5,6>
- 2625636959U, // <5,7,3,5>: Cost 3 vext2 <1,3,5,7>, <3,5,7,0>
- 3699378808U, // <5,7,3,6>: Cost 4 vext2 <1,3,5,7>, <3,6,0,7>
- 2640235254U, // <5,7,3,7>: Cost 3 vext2 <3,7,5,7>, <3,7,5,7>
- 2625637150U, // <5,7,3,u>: Cost 3 vext2 <1,3,5,7>, <3,u,1,2>
- 2571919462U, // <5,7,4,0>: Cost 3 vext1 <3,5,7,4>, LHS
- 2571920384U, // <5,7,4,1>: Cost 3 vext1 <3,5,7,4>, <1,3,5,7>
- 3699379260U, // <5,7,4,2>: Cost 4 vext2 <1,3,5,7>, <4,2,6,0>
- 2571922019U, // <5,7,4,3>: Cost 3 vext1 <3,5,7,4>, <3,5,7,4>
- 2571922742U, // <5,7,4,4>: Cost 3 vext1 <3,5,7,4>, RHS
- 1551895862U, // <5,7,4,5>: Cost 2 vext2 <1,3,5,7>, RHS
- 2846277980U, // <5,7,4,6>: Cost 3 vuzpr RHS, <0,4,2,6>
- 2646207951U, // <5,7,4,7>: Cost 3 vext2 <4,7,5,7>, <4,7,5,7>
- 1551896105U, // <5,7,4,u>: Cost 2 vext2 <1,3,5,7>, RHS
- 2583871590U, // <5,7,5,0>: Cost 3 vext1 <5,5,7,5>, LHS
- 2652180176U, // <5,7,5,1>: Cost 3 vext2 <5,7,5,7>, <5,1,7,3>
- 2625638177U, // <5,7,5,2>: Cost 3 vext2 <1,3,5,7>, <5,2,7,3>
- 2625638262U, // <5,7,5,3>: Cost 3 vext2 <1,3,5,7>, <5,3,7,7>
- 2583874870U, // <5,7,5,4>: Cost 3 vext1 <5,5,7,5>, RHS
- 2846281732U, // <5,7,5,5>: Cost 3 vuzpr RHS, <5,5,5,5>
- 2651517015U, // <5,7,5,6>: Cost 3 vext2 <5,6,5,7>, <5,6,5,7>
- 1772539190U, // <5,7,5,7>: Cost 2 vuzpr RHS, RHS
- 1772539191U, // <5,7,5,u>: Cost 2 vuzpr RHS, RHS
- 2846281826U, // <5,7,6,0>: Cost 3 vuzpr RHS, <5,6,7,0>
- 3699380615U, // <5,7,6,1>: Cost 4 vext2 <1,3,5,7>, <6,1,3,5>
- 2846281108U, // <5,7,6,2>: Cost 3 vuzpr RHS, <4,6,u,2>
- 2589854210U, // <5,7,6,3>: Cost 3 vext1 <6,5,7,6>, <3,4,5,6>
- 2846281830U, // <5,7,6,4>: Cost 3 vuzpr RHS, <5,6,7,4>
- 2725467658U, // <5,7,6,5>: Cost 3 vext3 <6,7,u,5>, <7,6,5,u>
- 2846281076U, // <5,7,6,6>: Cost 3 vuzpr RHS, <4,6,4,6>
- 2846279610U, // <5,7,6,7>: Cost 3 vuzpr RHS, <2,6,3,7>
- 2846279611U, // <5,7,6,u>: Cost 3 vuzpr RHS, <2,6,3,u>
- 1510146150U, // <5,7,7,0>: Cost 2 vext1 <5,5,7,7>, LHS
- 2846282574U, // <5,7,7,1>: Cost 3 vuzpr RHS, <6,7,0,1>
- 2583889512U, // <5,7,7,2>: Cost 3 vext1 <5,5,7,7>, <2,2,2,2>
- 2846281919U, // <5,7,7,3>: Cost 3 vuzpr RHS, <5,7,u,3>
- 1510149430U, // <5,7,7,4>: Cost 2 vext1 <5,5,7,7>, RHS
- 1510150168U, // <5,7,7,5>: Cost 2 vext1 <5,5,7,7>, <5,5,7,7>
- 2583892474U, // <5,7,7,6>: Cost 3 vext1 <5,5,7,7>, <6,2,7,3>
- 2625640044U, // <5,7,7,7>: Cost 3 vext2 <1,3,5,7>, <7,7,7,7>
- 1510151982U, // <5,7,7,u>: Cost 2 vext1 <5,5,7,7>, LHS
- 1510154342U, // <5,7,u,0>: Cost 2 vext1 <5,5,7,u>, LHS
- 1551898414U, // <5,7,u,1>: Cost 2 vext2 <1,3,5,7>, LHS
- 2625640325U, // <5,7,u,2>: Cost 3 vext2 <1,3,5,7>, <u,2,3,0>
- 1772536477U, // <5,7,u,3>: Cost 2 vuzpr RHS, LHS
- 1510157622U, // <5,7,u,4>: Cost 2 vext1 <5,5,7,u>, RHS
- 1551898778U, // <5,7,u,5>: Cost 2 vext2 <1,3,5,7>, RHS
- 2625640656U, // <5,7,u,6>: Cost 3 vext2 <1,3,5,7>, <u,6,3,7>
- 1772539433U, // <5,7,u,7>: Cost 2 vuzpr RHS, RHS
- 1551898981U, // <5,7,u,u>: Cost 2 vext2 <1,3,5,7>, LHS
- 2625642496U, // <5,u,0,0>: Cost 3 vext2 <1,3,5,u>, <0,0,0,0>
- 1551900774U, // <5,u,0,1>: Cost 2 vext2 <1,3,5,u>, LHS
- 2625642660U, // <5,u,0,2>: Cost 3 vext2 <1,3,5,u>, <0,2,0,2>
- 2698630885U, // <5,u,0,3>: Cost 3 vext3 <2,3,4,5>, <u,0,3,2>
- 2687129325U, // <5,u,0,4>: Cost 3 vext3 <0,4,1,5>, <u,0,4,1>
- 2689783542U, // <5,u,0,5>: Cost 3 vext3 <0,u,1,5>, <u,0,5,1>
- 2266134675U, // <5,u,0,6>: Cost 3 vrev <u,5,6,0>
- 2595853772U, // <5,u,0,7>: Cost 3 vext1 <7,5,u,0>, <7,5,u,0>
- 1551901341U, // <5,u,0,u>: Cost 2 vext2 <1,3,5,u>, LHS
- 2625643254U, // <5,u,1,0>: Cost 3 vext2 <1,3,5,u>, <1,0,3,2>
- 2625643316U, // <5,u,1,1>: Cost 3 vext2 <1,3,5,u>, <1,1,1,1>
- 1613387566U, // <5,u,1,2>: Cost 2 vext3 <0,4,1,5>, LHS
- 1551901697U, // <5,u,1,3>: Cost 2 vext2 <1,3,5,u>, <1,3,5,u>
- 2626307154U, // <5,u,1,4>: Cost 3 vext2 <1,4,5,u>, <1,4,5,u>
- 2689783622U, // <5,u,1,5>: Cost 3 vext3 <0,u,1,5>, <u,1,5,0>
- 2627634420U, // <5,u,1,6>: Cost 3 vext2 <1,6,5,u>, <1,6,5,u>
- 2982366536U, // <5,u,1,7>: Cost 3 vzipr <4,u,5,1>, RHS
- 1613387620U, // <5,u,1,u>: Cost 2 vext3 <0,4,1,5>, LHS
- 2846286742U, // <5,u,2,0>: Cost 3 vuzpr RHS, <1,2,3,0>
- 2685796528U, // <5,u,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5>
- 2625644136U, // <5,u,2,2>: Cost 3 vext2 <1,3,5,u>, <2,2,2,2>
- 2687129480U, // <5,u,2,3>: Cost 3 vext3 <0,4,1,5>, <u,2,3,3>
- 2632279851U, // <5,u,2,4>: Cost 3 vext2 <2,4,5,u>, <2,4,5,u>
- 2625644394U, // <5,u,2,5>: Cost 3 vext2 <1,3,5,u>, <2,5,3,u>
- 2625644474U, // <5,u,2,6>: Cost 3 vext2 <1,3,5,u>, <2,6,3,7>
- 2713966508U, // <5,u,2,7>: Cost 3 vext3 <4,u,5,5>, <u,2,7,3>
- 2625644603U, // <5,u,2,u>: Cost 3 vext2 <1,3,5,u>, <2,u,0,1>
- 2687129532U, // <5,u,3,0>: Cost 3 vext3 <0,4,1,5>, <u,3,0,1>
- 2636261649U, // <5,u,3,1>: Cost 3 vext2 <3,1,5,u>, <3,1,5,u>
- 2636925282U, // <5,u,3,2>: Cost 3 vext2 <3,2,5,u>, <3,2,5,u>
- 2625644956U, // <5,u,3,3>: Cost 3 vext2 <1,3,5,u>, <3,3,3,3>
- 1564510724U, // <5,u,3,4>: Cost 2 vext2 <3,4,5,u>, <3,4,5,u>
- 2625645160U, // <5,u,3,5>: Cost 3 vext2 <1,3,5,u>, <3,5,u,0>
- 2734610422U, // <5,u,3,6>: Cost 3 vext3 <u,3,6,5>, <u,3,6,5>
- 2640243447U, // <5,u,3,7>: Cost 3 vext2 <3,7,5,u>, <3,7,5,u>
- 1567165256U, // <5,u,3,u>: Cost 2 vext2 <3,u,5,u>, <3,u,5,u>
- 1567828889U, // <5,u,4,0>: Cost 2 vext2 <4,0,5,u>, <4,0,5,u>
- 1661163546U, // <5,u,4,1>: Cost 2 vext3 <u,4,1,5>, <u,4,1,5>
- 2734463012U, // <5,u,4,2>: Cost 3 vext3 <u,3,4,5>, <u,4,2,6>
- 2698631212U, // <5,u,4,3>: Cost 3 vext3 <2,3,4,5>, <u,4,3,5>
- 1570458842U, // <5,u,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5>
- 1551904054U, // <5,u,4,5>: Cost 2 vext2 <1,3,5,u>, RHS
- 2846286172U, // <5,u,4,6>: Cost 3 vuzpr RHS, <0,4,2,6>
- 2646216144U, // <5,u,4,7>: Cost 3 vext2 <4,7,5,u>, <4,7,5,u>
- 1551904297U, // <5,u,4,u>: Cost 2 vext2 <1,3,5,u>, RHS
- 1509982310U, // <5,u,5,0>: Cost 2 vext1 <5,5,5,5>, LHS
- 2560058555U, // <5,u,5,1>: Cost 3 vext1 <1,5,u,5>, <1,5,u,5>
- 2698926194U, // <5,u,5,2>: Cost 3 vext3 <2,3,u,5>, <u,5,2,3>
- 2698631295U, // <5,u,5,3>: Cost 3 vext3 <2,3,4,5>, <u,5,3,7>
- 1509985590U, // <5,u,5,4>: Cost 2 vext1 <5,5,5,5>, RHS
- 229035318U, // <5,u,5,5>: Cost 1 vdup1 RHS
- 1613387930U, // <5,u,5,6>: Cost 2 vext3 <0,4,1,5>, RHS
- 1772547382U, // <5,u,5,7>: Cost 2 vuzpr RHS, RHS
- 229035318U, // <5,u,5,u>: Cost 1 vdup1 RHS
- 2566037606U, // <5,u,6,0>: Cost 3 vext1 <2,5,u,6>, LHS
- 2920044334U, // <5,u,6,1>: Cost 3 vzipl <5,6,7,0>, LHS
- 2566039445U, // <5,u,6,2>: Cost 3 vext1 <2,5,u,6>, <2,5,u,6>
- 2687129808U, // <5,u,6,3>: Cost 3 vext3 <0,4,1,5>, <u,6,3,7>
- 2566040886U, // <5,u,6,4>: Cost 3 vext1 <2,5,u,6>, RHS
- 2920044698U, // <5,u,6,5>: Cost 3 vzipl <5,6,7,0>, RHS
- 2846289268U, // <5,u,6,6>: Cost 3 vuzpr RHS, <4,6,4,6>
- 2973781320U, // <5,u,6,7>: Cost 3 vzipr <3,4,5,6>, RHS
- 2687129853U, // <5,u,6,u>: Cost 3 vext3 <0,4,1,5>, <u,6,u,7>
- 430506086U, // <5,u,7,0>: Cost 1 vext1 RHS, LHS
- 1486333117U, // <5,u,7,1>: Cost 2 vext1 <1,5,u,7>, <1,5,u,7>
- 1504249448U, // <5,u,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
- 2040971933U, // <5,u,7,3>: Cost 2 vtrnr RHS, LHS
- 430509384U, // <5,u,7,4>: Cost 1 vext1 RHS, RHS
- 1504251600U, // <5,u,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
- 118708378U, // <5,u,7,6>: Cost 1 vrev RHS
- 2040974889U, // <5,u,7,7>: Cost 2 vtrnr RHS, RHS
- 430511918U, // <5,u,7,u>: Cost 1 vext1 RHS, LHS
- 430514278U, // <5,u,u,0>: Cost 1 vext1 RHS, LHS
- 1551906606U, // <5,u,u,1>: Cost 2 vext2 <1,3,5,u>, LHS
- 1613388133U, // <5,u,u,2>: Cost 2 vext3 <0,4,1,5>, LHS
- 1772544669U, // <5,u,u,3>: Cost 2 vuzpr RHS, LHS
- 430517577U, // <5,u,u,4>: Cost 1 vext1 RHS, RHS
- 229035318U, // <5,u,u,5>: Cost 1 vdup1 RHS
- 118716571U, // <5,u,u,6>: Cost 1 vrev RHS
- 1772547625U, // <5,u,u,7>: Cost 2 vuzpr RHS, RHS
- 430520110U, // <5,u,u,u>: Cost 1 vext1 RHS, LHS
- 2686025728U, // <6,0,0,0>: Cost 3 vext3 <0,2,4,6>, <0,0,0,0>
- 2686025738U, // <6,0,0,1>: Cost 3 vext3 <0,2,4,6>, <0,0,1,1>
- 2686025748U, // <6,0,0,2>: Cost 3 vext3 <0,2,4,6>, <0,0,2,2>
- 3779084320U, // <6,0,0,3>: Cost 4 vext3 <3,4,5,6>, <0,0,3,5>
- 2642903388U, // <6,0,0,4>: Cost 3 vext2 <4,2,6,0>, <0,4,2,6>
- 3657723939U, // <6,0,0,5>: Cost 4 vext1 <5,6,0,0>, <5,6,0,0>
- 3926676514U, // <6,0,0,6>: Cost 4 vuzpr <5,6,7,0>, <7,0,5,6>
- 3926675786U, // <6,0,0,7>: Cost 4 vuzpr <5,6,7,0>, <6,0,5,7>
- 2686025802U, // <6,0,0,u>: Cost 3 vext3 <0,2,4,6>, <0,0,u,2>
- 2566070374U, // <6,0,1,0>: Cost 3 vext1 <2,6,0,1>, LHS
- 3759767642U, // <6,0,1,1>: Cost 4 vext3 <0,2,4,6>, <0,1,1,0>
- 1612284006U, // <6,0,1,2>: Cost 2 vext3 <0,2,4,6>, LHS
- 2583988738U, // <6,0,1,3>: Cost 3 vext1 <5,6,0,1>, <3,4,5,6>
- 2566073654U, // <6,0,1,4>: Cost 3 vext1 <2,6,0,1>, RHS
- 2583990308U, // <6,0,1,5>: Cost 3 vext1 <5,6,0,1>, <5,6,0,1>
- 2589963005U, // <6,0,1,6>: Cost 3 vext1 <6,6,0,1>, <6,6,0,1>
- 2595935702U, // <6,0,1,7>: Cost 3 vext1 <7,6,0,1>, <7,6,0,1>
- 1612284060U, // <6,0,1,u>: Cost 2 vext3 <0,2,4,6>, LHS
- 2686025892U, // <6,0,2,0>: Cost 3 vext3 <0,2,4,6>, <0,2,0,2>
- 2685804721U, // <6,0,2,1>: Cost 3 vext3 <0,2,1,6>, <0,2,1,6>
- 3759620282U, // <6,0,2,2>: Cost 4 vext3 <0,2,2,6>, <0,2,2,6>
- 2705342658U, // <6,0,2,3>: Cost 3 vext3 <3,4,5,6>, <0,2,3,5>
- 1612284108U, // <6,0,2,4>: Cost 2 vext3 <0,2,4,6>, <0,2,4,6>
- 3706029956U, // <6,0,2,5>: Cost 4 vext2 <2,4,6,0>, <2,5,6,7>
- 2686173406U, // <6,0,2,6>: Cost 3 vext3 <0,2,6,6>, <0,2,6,6>
- 3651769338U, // <6,0,2,7>: Cost 4 vext1 <4,6,0,2>, <7,0,1,2>
- 1612579056U, // <6,0,2,u>: Cost 2 vext3 <0,2,u,6>, <0,2,u,6>
- 3706030230U, // <6,0,3,0>: Cost 4 vext2 <2,4,6,0>, <3,0,1,2>
- 2705342720U, // <6,0,3,1>: Cost 3 vext3 <3,4,5,6>, <0,3,1,4>
- 2705342730U, // <6,0,3,2>: Cost 3 vext3 <3,4,5,6>, <0,3,2,5>
- 3706030492U, // <6,0,3,3>: Cost 4 vext2 <2,4,6,0>, <3,3,3,3>
- 2644896258U, // <6,0,3,4>: Cost 3 vext2 <4,5,6,0>, <3,4,5,6>
- 3718638154U, // <6,0,3,5>: Cost 4 vext2 <4,5,6,0>, <3,5,4,6>
- 3729918619U, // <6,0,3,6>: Cost 4 vext2 <6,4,6,0>, <3,6,4,6>
- 3926672384U, // <6,0,3,7>: Cost 4 vuzpr <5,6,7,0>, <1,3,5,7>
- 2705342784U, // <6,0,3,u>: Cost 3 vext3 <3,4,5,6>, <0,3,u,5>
- 2687058250U, // <6,0,4,0>: Cost 3 vext3 <0,4,0,6>, <0,4,0,6>
- 2686026066U, // <6,0,4,1>: Cost 3 vext3 <0,2,4,6>, <0,4,1,5>
- 1613463900U, // <6,0,4,2>: Cost 2 vext3 <0,4,2,6>, <0,4,2,6>
- 3761021285U, // <6,0,4,3>: Cost 4 vext3 <0,4,3,6>, <0,4,3,6>
- 2687353198U, // <6,0,4,4>: Cost 3 vext3 <0,4,4,6>, <0,4,4,6>
- 2632289590U, // <6,0,4,5>: Cost 3 vext2 <2,4,6,0>, RHS
- 2645560704U, // <6,0,4,6>: Cost 3 vext2 <4,6,6,0>, <4,6,6,0>
- 2646224337U, // <6,0,4,7>: Cost 3 vext2 <4,7,6,0>, <4,7,6,0>
- 1613906322U, // <6,0,4,u>: Cost 2 vext3 <0,4,u,6>, <0,4,u,6>
- 3651788902U, // <6,0,5,0>: Cost 4 vext1 <4,6,0,5>, LHS
- 2687795620U, // <6,0,5,1>: Cost 3 vext3 <0,5,1,6>, <0,5,1,6>
- 3761611181U, // <6,0,5,2>: Cost 4 vext3 <0,5,2,6>, <0,5,2,6>
- 3723284326U, // <6,0,5,3>: Cost 4 vext2 <5,3,6,0>, <5,3,6,0>
- 2646224838U, // <6,0,5,4>: Cost 3 vext2 <4,7,6,0>, <5,4,7,6>
- 3718639630U, // <6,0,5,5>: Cost 4 vext2 <4,5,6,0>, <5,5,6,6>
- 2652196962U, // <6,0,5,6>: Cost 3 vext2 <5,7,6,0>, <5,6,7,0>
- 2852932918U, // <6,0,5,7>: Cost 3 vuzpr <5,6,7,0>, RHS
- 2852932919U, // <6,0,5,u>: Cost 3 vuzpr <5,6,7,0>, RHS
- 2852933730U, // <6,0,6,0>: Cost 3 vuzpr <5,6,7,0>, <5,6,7,0>
- 2925985894U, // <6,0,6,1>: Cost 3 vzipl <6,6,6,6>, LHS
- 3060203622U, // <6,0,6,2>: Cost 3 vtrnl <6,6,6,6>, LHS
- 3718640178U, // <6,0,6,3>: Cost 4 vext2 <4,5,6,0>, <6,3,4,5>
- 2656178832U, // <6,0,6,4>: Cost 3 vext2 <6,4,6,0>, <6,4,6,0>
- 3725939378U, // <6,0,6,5>: Cost 4 vext2 <5,7,6,0>, <6,5,0,7>
- 2657506098U, // <6,0,6,6>: Cost 3 vext2 <6,6,6,0>, <6,6,6,0>
- 2619020110U, // <6,0,6,7>: Cost 3 vext2 <0,2,6,0>, <6,7,0,1>
- 2925986461U, // <6,0,6,u>: Cost 3 vzipl <6,6,6,6>, LHS
- 2572091494U, // <6,0,7,0>: Cost 3 vext1 <3,6,0,7>, LHS
- 2572092310U, // <6,0,7,1>: Cost 3 vext1 <3,6,0,7>, <1,2,3,0>
- 2980495524U, // <6,0,7,2>: Cost 3 vzipr RHS, <0,2,0,2>
- 2572094072U, // <6,0,7,3>: Cost 3 vext1 <3,6,0,7>, <3,6,0,7>
- 2572094774U, // <6,0,7,4>: Cost 3 vext1 <3,6,0,7>, RHS
- 4054238242U, // <6,0,7,5>: Cost 4 vzipr RHS, <1,4,0,5>
- 3645837653U, // <6,0,7,6>: Cost 4 vext1 <3,6,0,7>, <6,0,7,0>
- 4054239054U, // <6,0,7,7>: Cost 4 vzipr RHS, <2,5,0,7>
- 2572097326U, // <6,0,7,u>: Cost 3 vext1 <3,6,0,7>, LHS
- 2686026378U, // <6,0,u,0>: Cost 3 vext3 <0,2,4,6>, <0,u,0,2>
- 2686026386U, // <6,0,u,1>: Cost 3 vext3 <0,2,4,6>, <0,u,1,1>
- 1612284573U, // <6,0,u,2>: Cost 2 vext3 <0,2,4,6>, LHS
- 2705343144U, // <6,0,u,3>: Cost 3 vext3 <3,4,5,6>, <0,u,3,5>
- 1616265906U, // <6,0,u,4>: Cost 2 vext3 <0,u,4,6>, <0,u,4,6>
- 2632292506U, // <6,0,u,5>: Cost 3 vext2 <2,4,6,0>, RHS
- 2590020356U, // <6,0,u,6>: Cost 3 vext1 <6,6,0,u>, <6,6,0,u>
- 2852933161U, // <6,0,u,7>: Cost 3 vuzpr <5,6,7,0>, RHS
- 1612284627U, // <6,0,u,u>: Cost 2 vext3 <0,2,4,6>, LHS
- 2595995750U, // <6,1,0,0>: Cost 3 vext1 <7,6,1,0>, LHS
- 2646229094U, // <6,1,0,1>: Cost 3 vext2 <4,7,6,1>, LHS
- 3694092492U, // <6,1,0,2>: Cost 4 vext2 <0,4,6,1>, <0,2,4,6>
- 2686026486U, // <6,1,0,3>: Cost 3 vext3 <0,2,4,6>, <1,0,3,2>
- 2595999030U, // <6,1,0,4>: Cost 3 vext1 <7,6,1,0>, RHS
- 3767730952U, // <6,1,0,5>: Cost 4 vext3 <1,5,4,6>, <1,0,5,2>
- 2596000590U, // <6,1,0,6>: Cost 3 vext1 <7,6,1,0>, <6,7,0,1>
- 2596001246U, // <6,1,0,7>: Cost 3 vext1 <7,6,1,0>, <7,6,1,0>
- 2686026531U, // <6,1,0,u>: Cost 3 vext3 <0,2,4,6>, <1,0,u,2>
- 3763602219U, // <6,1,1,0>: Cost 4 vext3 <0,u,2,6>, <1,1,0,1>
- 2686026548U, // <6,1,1,1>: Cost 3 vext3 <0,2,4,6>, <1,1,1,1>
- 3764929346U, // <6,1,1,2>: Cost 4 vext3 <1,1,2,6>, <1,1,2,6>
- 2686026568U, // <6,1,1,3>: Cost 3 vext3 <0,2,4,6>, <1,1,3,3>
- 2691334996U, // <6,1,1,4>: Cost 3 vext3 <1,1,4,6>, <1,1,4,6>
- 3760874332U, // <6,1,1,5>: Cost 4 vext3 <0,4,1,6>, <1,1,5,5>
- 3765224294U, // <6,1,1,6>: Cost 4 vext3 <1,1,6,6>, <1,1,6,6>
- 3669751263U, // <6,1,1,7>: Cost 4 vext1 <7,6,1,1>, <7,6,1,1>
- 2686026613U, // <6,1,1,u>: Cost 3 vext3 <0,2,4,6>, <1,1,u,3>
- 2554208358U, // <6,1,2,0>: Cost 3 vext1 <0,6,1,2>, LHS
- 3763602311U, // <6,1,2,1>: Cost 4 vext3 <0,u,2,6>, <1,2,1,3>
- 3639895971U, // <6,1,2,2>: Cost 4 vext1 <2,6,1,2>, <2,6,1,2>
- 2686026646U, // <6,1,2,3>: Cost 3 vext3 <0,2,4,6>, <1,2,3,0>
- 2554211638U, // <6,1,2,4>: Cost 3 vext1 <0,6,1,2>, RHS
- 3760874411U, // <6,1,2,5>: Cost 4 vext3 <0,4,1,6>, <1,2,5,3>
- 2554212858U, // <6,1,2,6>: Cost 3 vext1 <0,6,1,2>, <6,2,7,3>
- 3802973114U, // <6,1,2,7>: Cost 4 vext3 <7,4,5,6>, <1,2,7,0>
- 2686026691U, // <6,1,2,u>: Cost 3 vext3 <0,2,4,6>, <1,2,u,0>
- 2566160486U, // <6,1,3,0>: Cost 3 vext1 <2,6,1,3>, LHS
- 2686026712U, // <6,1,3,1>: Cost 3 vext3 <0,2,4,6>, <1,3,1,3>
- 2686026724U, // <6,1,3,2>: Cost 3 vext3 <0,2,4,6>, <1,3,2,6>
- 3759768552U, // <6,1,3,3>: Cost 4 vext3 <0,2,4,6>, <1,3,3,1>
- 2692662262U, // <6,1,3,4>: Cost 3 vext3 <1,3,4,6>, <1,3,4,6>
- 2686026752U, // <6,1,3,5>: Cost 3 vext3 <0,2,4,6>, <1,3,5,7>
- 2590053128U, // <6,1,3,6>: Cost 3 vext1 <6,6,1,3>, <6,6,1,3>
- 3663795194U, // <6,1,3,7>: Cost 4 vext1 <6,6,1,3>, <7,0,1,2>
- 2686026775U, // <6,1,3,u>: Cost 3 vext3 <0,2,4,6>, <1,3,u,3>
- 2641587099U, // <6,1,4,0>: Cost 3 vext2 <4,0,6,1>, <4,0,6,1>
- 2693104684U, // <6,1,4,1>: Cost 3 vext3 <1,4,1,6>, <1,4,1,6>
- 3639912357U, // <6,1,4,2>: Cost 4 vext1 <2,6,1,4>, <2,6,1,4>
- 2687206462U, // <6,1,4,3>: Cost 3 vext3 <0,4,2,6>, <1,4,3,6>
- 3633941814U, // <6,1,4,4>: Cost 4 vext1 <1,6,1,4>, RHS
- 2693399632U, // <6,1,4,5>: Cost 3 vext3 <1,4,5,6>, <1,4,5,6>
- 3765077075U, // <6,1,4,6>: Cost 4 vext3 <1,1,4,6>, <1,4,6,0>
- 2646232530U, // <6,1,4,7>: Cost 3 vext2 <4,7,6,1>, <4,7,6,1>
- 2687206507U, // <6,1,4,u>: Cost 3 vext3 <0,4,2,6>, <1,4,u,6>
- 2647559796U, // <6,1,5,0>: Cost 3 vext2 <5,0,6,1>, <5,0,6,1>
- 3765077118U, // <6,1,5,1>: Cost 4 vext3 <1,1,4,6>, <1,5,1,7>
- 3767583878U, // <6,1,5,2>: Cost 4 vext3 <1,5,2,6>, <1,5,2,6>
- 2686026896U, // <6,1,5,3>: Cost 3 vext3 <0,2,4,6>, <1,5,3,7>
- 2693989528U, // <6,1,5,4>: Cost 3 vext3 <1,5,4,6>, <1,5,4,6>
- 3767805089U, // <6,1,5,5>: Cost 4 vext3 <1,5,5,6>, <1,5,5,6>
- 2652868706U, // <6,1,5,6>: Cost 3 vext2 <5,u,6,1>, <5,6,7,0>
- 3908250934U, // <6,1,5,7>: Cost 4 vuzpr <2,6,0,1>, RHS
- 2686026941U, // <6,1,5,u>: Cost 3 vext3 <0,2,4,6>, <1,5,u,7>
- 2554241126U, // <6,1,6,0>: Cost 3 vext1 <0,6,1,6>, LHS
- 3763602639U, // <6,1,6,1>: Cost 4 vext3 <0,u,2,6>, <1,6,1,7>
- 3759547607U, // <6,1,6,2>: Cost 4 vext3 <0,2,1,6>, <1,6,2,6>
- 3115221094U, // <6,1,6,3>: Cost 3 vtrnr <4,6,4,6>, LHS
- 2554244406U, // <6,1,6,4>: Cost 3 vext1 <0,6,1,6>, RHS
- 3760874739U, // <6,1,6,5>: Cost 4 vext3 <0,4,1,6>, <1,6,5,7>
- 2554245944U, // <6,1,6,6>: Cost 3 vext1 <0,6,1,6>, <6,6,6,6>
- 3719975758U, // <6,1,6,7>: Cost 4 vext2 <4,7,6,1>, <6,7,0,1>
- 3115221099U, // <6,1,6,u>: Cost 3 vtrnr <4,6,4,6>, LHS
- 2560221286U, // <6,1,7,0>: Cost 3 vext1 <1,6,1,7>, LHS
- 2560222415U, // <6,1,7,1>: Cost 3 vext1 <1,6,1,7>, <1,6,1,7>
- 2980497558U, // <6,1,7,2>: Cost 3 vzipr RHS, <3,0,1,2>
- 3103211622U, // <6,1,7,3>: Cost 3 vtrnr <2,6,3,7>, LHS
- 2560224566U, // <6,1,7,4>: Cost 3 vext1 <1,6,1,7>, RHS
- 2980495698U, // <6,1,7,5>: Cost 3 vzipr RHS, <0,4,1,5>
- 3633967526U, // <6,1,7,6>: Cost 4 vext1 <1,6,1,7>, <6,1,7,0>
- 4054237686U, // <6,1,7,7>: Cost 4 vzipr RHS, <0,6,1,7>
- 2560227118U, // <6,1,7,u>: Cost 3 vext1 <1,6,1,7>, LHS
- 2560229478U, // <6,1,u,0>: Cost 3 vext1 <1,6,1,u>, LHS
- 2686027117U, // <6,1,u,1>: Cost 3 vext3 <0,2,4,6>, <1,u,1,3>
- 2686027129U, // <6,1,u,2>: Cost 3 vext3 <0,2,4,6>, <1,u,2,6>
- 2686027132U, // <6,1,u,3>: Cost 3 vext3 <0,2,4,6>, <1,u,3,0>
- 2687206795U, // <6,1,u,4>: Cost 3 vext3 <0,4,2,6>, <1,u,4,6>
- 2686027157U, // <6,1,u,5>: Cost 3 vext3 <0,2,4,6>, <1,u,5,7>
- 2590094093U, // <6,1,u,6>: Cost 3 vext1 <6,6,1,u>, <6,6,1,u>
- 2596066790U, // <6,1,u,7>: Cost 3 vext1 <7,6,1,u>, <7,6,1,u>
- 2686027177U, // <6,1,u,u>: Cost 3 vext3 <0,2,4,6>, <1,u,u,0>
- 2646900736U, // <6,2,0,0>: Cost 3 vext2 <4,u,6,2>, <0,0,0,0>
- 1573159014U, // <6,2,0,1>: Cost 2 vext2 <4,u,6,2>, LHS
- 2646900900U, // <6,2,0,2>: Cost 3 vext2 <4,u,6,2>, <0,2,0,2>
- 3759769037U, // <6,2,0,3>: Cost 4 vext3 <0,2,4,6>, <2,0,3,0>
- 2641592668U, // <6,2,0,4>: Cost 3 vext2 <4,0,6,2>, <0,4,2,6>
- 3779085794U, // <6,2,0,5>: Cost 4 vext3 <3,4,5,6>, <2,0,5,3>
- 2686027244U, // <6,2,0,6>: Cost 3 vext3 <0,2,4,6>, <2,0,6,4>
- 3669816807U, // <6,2,0,7>: Cost 4 vext1 <7,6,2,0>, <7,6,2,0>
- 1573159581U, // <6,2,0,u>: Cost 2 vext2 <4,u,6,2>, LHS
- 2230527897U, // <6,2,1,0>: Cost 3 vrev <2,6,0,1>
- 2646901556U, // <6,2,1,1>: Cost 3 vext2 <4,u,6,2>, <1,1,1,1>
- 2646901654U, // <6,2,1,2>: Cost 3 vext2 <4,u,6,2>, <1,2,3,0>
- 2847047782U, // <6,2,1,3>: Cost 3 vuzpr <4,6,u,2>, LHS
- 3771049517U, // <6,2,1,4>: Cost 4 vext3 <2,1,4,6>, <2,1,4,6>
- 2646901904U, // <6,2,1,5>: Cost 3 vext2 <4,u,6,2>, <1,5,3,7>
- 2686027324U, // <6,2,1,6>: Cost 3 vext3 <0,2,4,6>, <2,1,6,3>
- 3669825000U, // <6,2,1,7>: Cost 4 vext1 <7,6,2,1>, <7,6,2,1>
- 2231117793U, // <6,2,1,u>: Cost 3 vrev <2,6,u,1>
- 3763603029U, // <6,2,2,0>: Cost 4 vext3 <0,u,2,6>, <2,2,0,1>
- 3759769184U, // <6,2,2,1>: Cost 4 vext3 <0,2,4,6>, <2,2,1,3>
- 2686027368U, // <6,2,2,2>: Cost 3 vext3 <0,2,4,6>, <2,2,2,2>
- 2686027378U, // <6,2,2,3>: Cost 3 vext3 <0,2,4,6>, <2,2,3,3>
- 2697971326U, // <6,2,2,4>: Cost 3 vext3 <2,2,4,6>, <2,2,4,6>
- 3759769224U, // <6,2,2,5>: Cost 4 vext3 <0,2,4,6>, <2,2,5,7>
- 2698118800U, // <6,2,2,6>: Cost 3 vext3 <2,2,6,6>, <2,2,6,6>
- 3920794092U, // <6,2,2,7>: Cost 4 vuzpr <4,6,u,2>, <6,2,5,7>
- 2686027423U, // <6,2,2,u>: Cost 3 vext3 <0,2,4,6>, <2,2,u,3>
- 2686027430U, // <6,2,3,0>: Cost 3 vext3 <0,2,4,6>, <2,3,0,1>
- 3759769262U, // <6,2,3,1>: Cost 4 vext3 <0,2,4,6>, <2,3,1,0>
- 2698487485U, // <6,2,3,2>: Cost 3 vext3 <2,3,2,6>, <2,3,2,6>
- 2705344196U, // <6,2,3,3>: Cost 3 vext3 <3,4,5,6>, <2,3,3,4>
- 2686027470U, // <6,2,3,4>: Cost 3 vext3 <0,2,4,6>, <2,3,4,5>
- 2698708696U, // <6,2,3,5>: Cost 3 vext3 <2,3,5,6>, <2,3,5,6>
- 2724660961U, // <6,2,3,6>: Cost 3 vext3 <6,6,6,6>, <2,3,6,6>
- 2729232104U, // <6,2,3,7>: Cost 3 vext3 <7,4,5,6>, <2,3,7,4>
- 2686027502U, // <6,2,3,u>: Cost 3 vext3 <0,2,4,6>, <2,3,u,1>
- 1567853468U, // <6,2,4,0>: Cost 2 vext2 <4,0,6,2>, <4,0,6,2>
- 3759769351U, // <6,2,4,1>: Cost 4 vext3 <0,2,4,6>, <2,4,1,u>
- 2699151118U, // <6,2,4,2>: Cost 3 vext3 <2,4,2,6>, <2,4,2,6>
- 2686027543U, // <6,2,4,3>: Cost 3 vext3 <0,2,4,6>, <2,4,3,6>
- 2699298592U, // <6,2,4,4>: Cost 3 vext3 <2,4,4,6>, <2,4,4,6>
- 1573162294U, // <6,2,4,5>: Cost 2 vext2 <4,u,6,2>, RHS
- 2686027564U, // <6,2,4,6>: Cost 3 vext3 <0,2,4,6>, <2,4,6,0>
- 3719982547U, // <6,2,4,7>: Cost 4 vext2 <4,7,6,2>, <4,7,6,2>
- 1573162532U, // <6,2,4,u>: Cost 2 vext2 <4,u,6,2>, <4,u,6,2>
- 3779086154U, // <6,2,5,0>: Cost 4 vext3 <3,4,5,6>, <2,5,0,3>
- 2646904528U, // <6,2,5,1>: Cost 3 vext2 <4,u,6,2>, <5,1,7,3>
- 3759769440U, // <6,2,5,2>: Cost 4 vext3 <0,2,4,6>, <2,5,2,7>
- 2699888488U, // <6,2,5,3>: Cost 3 vext3 <2,5,3,6>, <2,5,3,6>
- 2230855617U, // <6,2,5,4>: Cost 3 vrev <2,6,4,5>
- 2646904836U, // <6,2,5,5>: Cost 3 vext2 <4,u,6,2>, <5,5,5,5>
- 2646904930U, // <6,2,5,6>: Cost 3 vext2 <4,u,6,2>, <5,6,7,0>
- 2847051062U, // <6,2,5,7>: Cost 3 vuzpr <4,6,u,2>, RHS
- 2700257173U, // <6,2,5,u>: Cost 3 vext3 <2,5,u,6>, <2,5,u,6>
- 2687207321U, // <6,2,6,0>: Cost 3 vext3 <0,4,2,6>, <2,6,0,1>
- 2686027684U, // <6,2,6,1>: Cost 3 vext3 <0,2,4,6>, <2,6,1,3>
- 2566260656U, // <6,2,6,2>: Cost 3 vext1 <2,6,2,6>, <2,6,2,6>
- 2685806522U, // <6,2,6,3>: Cost 3 vext3 <0,2,1,6>, <2,6,3,7>
- 2687207361U, // <6,2,6,4>: Cost 3 vext3 <0,4,2,6>, <2,6,4,5>
- 2686027724U, // <6,2,6,5>: Cost 3 vext3 <0,2,4,6>, <2,6,5,7>
- 2646905656U, // <6,2,6,6>: Cost 3 vext2 <4,u,6,2>, <6,6,6,6>
- 2646905678U, // <6,2,6,7>: Cost 3 vext2 <4,u,6,2>, <6,7,0,1>
- 2686027751U, // <6,2,6,u>: Cost 3 vext3 <0,2,4,6>, <2,6,u,7>
- 2554323046U, // <6,2,7,0>: Cost 3 vext1 <0,6,2,7>, LHS
- 2572239606U, // <6,2,7,1>: Cost 3 vext1 <3,6,2,7>, <1,0,3,2>
- 2566268849U, // <6,2,7,2>: Cost 3 vext1 <2,6,2,7>, <2,6,2,7>
- 1906753638U, // <6,2,7,3>: Cost 2 vzipr RHS, LHS
- 2554326326U, // <6,2,7,4>: Cost 3 vext1 <0,6,2,7>, RHS
- 3304687564U, // <6,2,7,5>: Cost 4 vrev <2,6,5,7>
- 2980495708U, // <6,2,7,6>: Cost 3 vzipr RHS, <0,4,2,6>
- 2646906476U, // <6,2,7,7>: Cost 3 vext2 <4,u,6,2>, <7,7,7,7>
- 1906753643U, // <6,2,7,u>: Cost 2 vzipr RHS, LHS
- 1591744256U, // <6,2,u,0>: Cost 2 vext2 <u,0,6,2>, <u,0,6,2>
- 1573164846U, // <6,2,u,1>: Cost 2 vext2 <4,u,6,2>, LHS
- 2701805650U, // <6,2,u,2>: Cost 3 vext3 <2,u,2,6>, <2,u,2,6>
- 1906761830U, // <6,2,u,3>: Cost 2 vzipr RHS, LHS
- 2686027875U, // <6,2,u,4>: Cost 3 vext3 <0,2,4,6>, <2,u,4,5>
- 1573165210U, // <6,2,u,5>: Cost 2 vext2 <4,u,6,2>, RHS
- 2686322800U, // <6,2,u,6>: Cost 3 vext3 <0,2,u,6>, <2,u,6,0>
- 2847051305U, // <6,2,u,7>: Cost 3 vuzpr <4,6,u,2>, RHS
- 1906761835U, // <6,2,u,u>: Cost 2 vzipr RHS, LHS
- 3759769739U, // <6,3,0,0>: Cost 4 vext3 <0,2,4,6>, <3,0,0,0>
- 2686027926U, // <6,3,0,1>: Cost 3 vext3 <0,2,4,6>, <3,0,1,2>
- 2686027937U, // <6,3,0,2>: Cost 3 vext3 <0,2,4,6>, <3,0,2,4>
- 3640027286U, // <6,3,0,3>: Cost 4 vext1 <2,6,3,0>, <3,0,1,2>
- 2687207601U, // <6,3,0,4>: Cost 3 vext3 <0,4,2,6>, <3,0,4,2>
- 2705344698U, // <6,3,0,5>: Cost 3 vext3 <3,4,5,6>, <3,0,5,2>
- 3663917847U, // <6,3,0,6>: Cost 4 vext1 <6,6,3,0>, <6,6,3,0>
- 2237008560U, // <6,3,0,7>: Cost 3 vrev <3,6,7,0>
- 2686027989U, // <6,3,0,u>: Cost 3 vext3 <0,2,4,6>, <3,0,u,2>
- 3759769823U, // <6,3,1,0>: Cost 4 vext3 <0,2,4,6>, <3,1,0,3>
- 3759769830U, // <6,3,1,1>: Cost 4 vext3 <0,2,4,6>, <3,1,1,1>
- 3759769841U, // <6,3,1,2>: Cost 4 vext3 <0,2,4,6>, <3,1,2,3>
- 3759769848U, // <6,3,1,3>: Cost 4 vext3 <0,2,4,6>, <3,1,3,1>
- 2703280390U, // <6,3,1,4>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6>
- 3759769868U, // <6,3,1,5>: Cost 4 vext3 <0,2,4,6>, <3,1,5,3>
- 3704063194U, // <6,3,1,6>: Cost 4 vext2 <2,1,6,3>, <1,6,3,0>
- 3767732510U, // <6,3,1,7>: Cost 4 vext3 <1,5,4,6>, <3,1,7,3>
- 2703280390U, // <6,3,1,u>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6>
- 3704063468U, // <6,3,2,0>: Cost 4 vext2 <2,1,6,3>, <2,0,6,4>
- 2630321724U, // <6,3,2,1>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3>
- 3759769921U, // <6,3,2,2>: Cost 4 vext3 <0,2,4,6>, <3,2,2,2>
- 3759769928U, // <6,3,2,3>: Cost 4 vext3 <0,2,4,6>, <3,2,3,0>
- 3704063767U, // <6,3,2,4>: Cost 4 vext2 <2,1,6,3>, <2,4,3,6>
- 3704063876U, // <6,3,2,5>: Cost 4 vext2 <2,1,6,3>, <2,5,6,7>
- 2636957626U, // <6,3,2,6>: Cost 3 vext2 <3,2,6,3>, <2,6,3,7>
- 3777907058U, // <6,3,2,7>: Cost 4 vext3 <3,2,7,6>, <3,2,7,6>
- 2630321724U, // <6,3,2,u>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3>
- 3759769983U, // <6,3,3,0>: Cost 4 vext3 <0,2,4,6>, <3,3,0,1>
- 3710036245U, // <6,3,3,1>: Cost 4 vext2 <3,1,6,3>, <3,1,6,3>
- 2636958054U, // <6,3,3,2>: Cost 3 vext2 <3,2,6,3>, <3,2,6,3>
- 2686028188U, // <6,3,3,3>: Cost 3 vext3 <0,2,4,6>, <3,3,3,3>
- 2704607656U, // <6,3,3,4>: Cost 3 vext3 <3,3,4,6>, <3,3,4,6>
- 3773041072U, // <6,3,3,5>: Cost 4 vext3 <2,4,4,6>, <3,3,5,5>
- 3711363731U, // <6,3,3,6>: Cost 4 vext2 <3,3,6,3>, <3,6,3,7>
- 3767732676U, // <6,3,3,7>: Cost 4 vext3 <1,5,4,6>, <3,3,7,7>
- 2707999179U, // <6,3,3,u>: Cost 3 vext3 <3,u,5,6>, <3,3,u,5>
- 2584232038U, // <6,3,4,0>: Cost 3 vext1 <5,6,3,4>, LHS
- 2642267118U, // <6,3,4,1>: Cost 3 vext2 <4,1,6,3>, <4,1,6,3>
- 2642930751U, // <6,3,4,2>: Cost 3 vext2 <4,2,6,3>, <4,2,6,3>
- 2705197552U, // <6,3,4,3>: Cost 3 vext3 <3,4,3,6>, <3,4,3,6>
- 2584235318U, // <6,3,4,4>: Cost 3 vext1 <5,6,3,4>, RHS
- 1631603202U, // <6,3,4,5>: Cost 2 vext3 <3,4,5,6>, <3,4,5,6>
- 2654211444U, // <6,3,4,6>: Cost 3 vext2 <6,1,6,3>, <4,6,4,6>
- 2237041332U, // <6,3,4,7>: Cost 3 vrev <3,6,7,4>
- 1631824413U, // <6,3,4,u>: Cost 2 vext3 <3,4,u,6>, <3,4,u,6>
- 3640066150U, // <6,3,5,0>: Cost 4 vext1 <2,6,3,5>, LHS
- 3772746288U, // <6,3,5,1>: Cost 4 vext3 <2,4,0,6>, <3,5,1,7>
- 3640067790U, // <6,3,5,2>: Cost 4 vext1 <2,6,3,5>, <2,3,4,5>
- 3773041216U, // <6,3,5,3>: Cost 4 vext3 <2,4,4,6>, <3,5,3,5>
- 2705934922U, // <6,3,5,4>: Cost 3 vext3 <3,5,4,6>, <3,5,4,6>
- 3773041236U, // <6,3,5,5>: Cost 4 vext3 <2,4,4,6>, <3,5,5,7>
- 3779086940U, // <6,3,5,6>: Cost 4 vext3 <3,4,5,6>, <3,5,6,6>
- 3767732831U, // <6,3,5,7>: Cost 4 vext3 <1,5,4,6>, <3,5,7,0>
- 2706229870U, // <6,3,5,u>: Cost 3 vext3 <3,5,u,6>, <3,5,u,6>
- 2602164326U, // <6,3,6,0>: Cost 3 vext1 <u,6,3,6>, LHS
- 2654212512U, // <6,3,6,1>: Cost 3 vext2 <6,1,6,3>, <6,1,6,3>
- 2566334393U, // <6,3,6,2>: Cost 3 vext1 <2,6,3,6>, <2,6,3,6>
- 3704066588U, // <6,3,6,3>: Cost 4 vext2 <2,1,6,3>, <6,3,2,1>
- 2602167524U, // <6,3,6,4>: Cost 3 vext1 <u,6,3,6>, <4,4,6,6>
- 3710702321U, // <6,3,6,5>: Cost 4 vext2 <3,2,6,3>, <6,5,7,7>
- 2724661933U, // <6,3,6,6>: Cost 3 vext3 <6,6,6,6>, <3,6,6,6>
- 3710702465U, // <6,3,6,7>: Cost 4 vext2 <3,2,6,3>, <6,7,5,7>
- 2602170158U, // <6,3,6,u>: Cost 3 vext1 <u,6,3,6>, LHS
- 1492598886U, // <6,3,7,0>: Cost 2 vext1 <2,6,3,7>, LHS
- 2560369889U, // <6,3,7,1>: Cost 3 vext1 <1,6,3,7>, <1,6,3,7>
- 1492600762U, // <6,3,7,2>: Cost 2 vext1 <2,6,3,7>, <2,6,3,7>
- 2566342806U, // <6,3,7,3>: Cost 3 vext1 <2,6,3,7>, <3,0,1,2>
- 1492602166U, // <6,3,7,4>: Cost 2 vext1 <2,6,3,7>, RHS
- 2602176208U, // <6,3,7,5>: Cost 3 vext1 <u,6,3,7>, <5,1,7,3>
- 2566345210U, // <6,3,7,6>: Cost 3 vext1 <2,6,3,7>, <6,2,7,3>
- 2980496528U, // <6,3,7,7>: Cost 3 vzipr RHS, <1,5,3,7>
- 1492604718U, // <6,3,7,u>: Cost 2 vext1 <2,6,3,7>, LHS
- 1492607078U, // <6,3,u,0>: Cost 2 vext1 <2,6,3,u>, LHS
- 2686028574U, // <6,3,u,1>: Cost 3 vext3 <0,2,4,6>, <3,u,1,2>
- 1492608955U, // <6,3,u,2>: Cost 2 vext1 <2,6,3,u>, <2,6,3,u>
- 2566350998U, // <6,3,u,3>: Cost 3 vext1 <2,6,3,u>, <3,0,1,2>
- 1492610358U, // <6,3,u,4>: Cost 2 vext1 <2,6,3,u>, RHS
- 1634257734U, // <6,3,u,5>: Cost 2 vext3 <3,u,5,6>, <3,u,5,6>
- 2566353489U, // <6,3,u,6>: Cost 3 vext1 <2,6,3,u>, <6,3,u,0>
- 2980504720U, // <6,3,u,7>: Cost 3 vzipr RHS, <1,5,3,7>
- 1492612910U, // <6,3,u,u>: Cost 2 vext1 <2,6,3,u>, LHS
- 3703406592U, // <6,4,0,0>: Cost 4 vext2 <2,0,6,4>, <0,0,0,0>
- 2629664870U, // <6,4,0,1>: Cost 3 vext2 <2,0,6,4>, LHS
- 2629664972U, // <6,4,0,2>: Cost 3 vext2 <2,0,6,4>, <0,2,4,6>
- 3779087232U, // <6,4,0,3>: Cost 4 vext3 <3,4,5,6>, <4,0,3,1>
- 2642936156U, // <6,4,0,4>: Cost 3 vext2 <4,2,6,4>, <0,4,2,6>
- 2712570770U, // <6,4,0,5>: Cost 3 vext3 <4,6,4,6>, <4,0,5,1>
- 2687208348U, // <6,4,0,6>: Cost 3 vext3 <0,4,2,6>, <4,0,6,2>
- 3316723081U, // <6,4,0,7>: Cost 4 vrev <4,6,7,0>
- 2629665437U, // <6,4,0,u>: Cost 3 vext2 <2,0,6,4>, LHS
- 2242473291U, // <6,4,1,0>: Cost 3 vrev <4,6,0,1>
- 3700089652U, // <6,4,1,1>: Cost 4 vext2 <1,4,6,4>, <1,1,1,1>
- 3703407510U, // <6,4,1,2>: Cost 4 vext2 <2,0,6,4>, <1,2,3,0>
- 2852962406U, // <6,4,1,3>: Cost 3 vuzpr <5,6,7,4>, LHS
- 3628166454U, // <6,4,1,4>: Cost 4 vext1 <0,6,4,1>, RHS
- 3760876514U, // <6,4,1,5>: Cost 4 vext3 <0,4,1,6>, <4,1,5,0>
- 2687208430U, // <6,4,1,6>: Cost 3 vext3 <0,4,2,6>, <4,1,6,3>
- 3316731274U, // <6,4,1,7>: Cost 4 vrev <4,6,7,1>
- 2243063187U, // <6,4,1,u>: Cost 3 vrev <4,6,u,1>
- 2629666284U, // <6,4,2,0>: Cost 3 vext2 <2,0,6,4>, <2,0,6,4>
- 3703408188U, // <6,4,2,1>: Cost 4 vext2 <2,0,6,4>, <2,1,6,3>
- 3703408232U, // <6,4,2,2>: Cost 4 vext2 <2,0,6,4>, <2,2,2,2>
- 3703408294U, // <6,4,2,3>: Cost 4 vext2 <2,0,6,4>, <2,3,0,1>
- 2632320816U, // <6,4,2,4>: Cost 3 vext2 <2,4,6,4>, <2,4,6,4>
- 2923384118U, // <6,4,2,5>: Cost 3 vzipl <6,2,7,3>, RHS
- 2687208508U, // <6,4,2,6>: Cost 3 vext3 <0,4,2,6>, <4,2,6,0>
- 3760950341U, // <6,4,2,7>: Cost 4 vext3 <0,4,2,6>, <4,2,7,0>
- 2634975348U, // <6,4,2,u>: Cost 3 vext2 <2,u,6,4>, <2,u,6,4>
- 3703408790U, // <6,4,3,0>: Cost 4 vext2 <2,0,6,4>, <3,0,1,2>
- 3316305238U, // <6,4,3,1>: Cost 4 vrev <4,6,1,3>
- 3703408947U, // <6,4,3,2>: Cost 4 vext2 <2,0,6,4>, <3,2,0,6>
- 3703409052U, // <6,4,3,3>: Cost 4 vext2 <2,0,6,4>, <3,3,3,3>
- 2644929026U, // <6,4,3,4>: Cost 3 vext2 <4,5,6,4>, <3,4,5,6>
- 3718670922U, // <6,4,3,5>: Cost 4 vext2 <4,5,6,4>, <3,5,4,6>
- 2705345682U, // <6,4,3,6>: Cost 3 vext3 <3,4,5,6>, <4,3,6,5>
- 3926705152U, // <6,4,3,7>: Cost 4 vuzpr <5,6,7,4>, <1,3,5,7>
- 2668817222U, // <6,4,3,u>: Cost 3 vext2 <u,5,6,4>, <3,u,5,6>
- 2590277734U, // <6,4,4,0>: Cost 3 vext1 <6,6,4,4>, LHS
- 3716017135U, // <6,4,4,1>: Cost 4 vext2 <4,1,6,4>, <4,1,6,4>
- 2642938944U, // <6,4,4,2>: Cost 3 vext2 <4,2,6,4>, <4,2,6,4>
- 3717344401U, // <6,4,4,3>: Cost 4 vext2 <4,3,6,4>, <4,3,6,4>
- 2712571088U, // <6,4,4,4>: Cost 3 vext3 <4,6,4,6>, <4,4,4,4>
- 2629668150U, // <6,4,4,5>: Cost 3 vext2 <2,0,6,4>, RHS
- 1637649636U, // <6,4,4,6>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6>
- 2646257109U, // <6,4,4,7>: Cost 3 vext2 <4,7,6,4>, <4,7,6,4>
- 1637649636U, // <6,4,4,u>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6>
- 2566398054U, // <6,4,5,0>: Cost 3 vext1 <2,6,4,5>, LHS
- 3760876805U, // <6,4,5,1>: Cost 4 vext3 <0,4,1,6>, <4,5,1,3>
- 2566399937U, // <6,4,5,2>: Cost 3 vext1 <2,6,4,5>, <2,6,4,5>
- 2584316418U, // <6,4,5,3>: Cost 3 vext1 <5,6,4,5>, <3,4,5,6>
- 2566401334U, // <6,4,5,4>: Cost 3 vext1 <2,6,4,5>, RHS
- 2584318028U, // <6,4,5,5>: Cost 3 vext1 <5,6,4,5>, <5,6,4,5>
- 1612287286U, // <6,4,5,6>: Cost 2 vext3 <0,2,4,6>, RHS
- 2852965686U, // <6,4,5,7>: Cost 3 vuzpr <5,6,7,4>, RHS
- 1612287304U, // <6,4,5,u>: Cost 2 vext3 <0,2,4,6>, RHS
- 1504608358U, // <6,4,6,0>: Cost 2 vext1 <4,6,4,6>, LHS
- 2578350838U, // <6,4,6,1>: Cost 3 vext1 <4,6,4,6>, <1,0,3,2>
- 2578351720U, // <6,4,6,2>: Cost 3 vext1 <4,6,4,6>, <2,2,2,2>
- 2578352278U, // <6,4,6,3>: Cost 3 vext1 <4,6,4,6>, <3,0,1,2>
- 1504611638U, // <6,4,6,4>: Cost 2 vext1 <4,6,4,6>, RHS
- 2578353872U, // <6,4,6,5>: Cost 3 vext1 <4,6,4,6>, <5,1,7,3>
- 2578354682U, // <6,4,6,6>: Cost 3 vext1 <4,6,4,6>, <6,2,7,3>
- 2578355194U, // <6,4,6,7>: Cost 3 vext1 <4,6,4,6>, <7,0,1,2>
- 1504614190U, // <6,4,6,u>: Cost 2 vext1 <4,6,4,6>, LHS
- 2572386406U, // <6,4,7,0>: Cost 3 vext1 <3,6,4,7>, LHS
- 2572387226U, // <6,4,7,1>: Cost 3 vext1 <3,6,4,7>, <1,2,3,4>
- 3640157902U, // <6,4,7,2>: Cost 4 vext1 <2,6,4,7>, <2,3,4,5>
- 2572389020U, // <6,4,7,3>: Cost 3 vext1 <3,6,4,7>, <3,6,4,7>
- 2572389686U, // <6,4,7,4>: Cost 3 vext1 <3,6,4,7>, RHS
- 2980497102U, // <6,4,7,5>: Cost 3 vzipr RHS, <2,3,4,5>
- 2980495564U, // <6,4,7,6>: Cost 3 vzipr RHS, <0,2,4,6>
- 4054239090U, // <6,4,7,7>: Cost 4 vzipr RHS, <2,5,4,7>
- 2572392238U, // <6,4,7,u>: Cost 3 vext1 <3,6,4,7>, LHS
- 1504608358U, // <6,4,u,0>: Cost 2 vext1 <4,6,4,6>, LHS
- 2629670702U, // <6,4,u,1>: Cost 3 vext2 <2,0,6,4>, LHS
- 2566424516U, // <6,4,u,2>: Cost 3 vext1 <2,6,4,u>, <2,6,4,u>
- 2584340994U, // <6,4,u,3>: Cost 3 vext1 <5,6,4,u>, <3,4,5,6>
- 1640156694U, // <6,4,u,4>: Cost 2 vext3 <4,u,4,6>, <4,u,4,6>
- 2629671066U, // <6,4,u,5>: Cost 3 vext2 <2,0,6,4>, RHS
- 1612287529U, // <6,4,u,6>: Cost 2 vext3 <0,2,4,6>, RHS
- 2852965929U, // <6,4,u,7>: Cost 3 vuzpr <5,6,7,4>, RHS
- 1612287547U, // <6,4,u,u>: Cost 2 vext3 <0,2,4,6>, RHS
- 3708723200U, // <6,5,0,0>: Cost 4 vext2 <2,u,6,5>, <0,0,0,0>
- 2634981478U, // <6,5,0,1>: Cost 3 vext2 <2,u,6,5>, LHS
- 3694125260U, // <6,5,0,2>: Cost 4 vext2 <0,4,6,5>, <0,2,4,6>
- 3779087962U, // <6,5,0,3>: Cost 4 vext3 <3,4,5,6>, <5,0,3,2>
- 3760877154U, // <6,5,0,4>: Cost 4 vext3 <0,4,1,6>, <5,0,4,1>
- 4195110916U, // <6,5,0,5>: Cost 4 vtrnr <5,6,7,0>, <5,5,5,5>
- 3696779775U, // <6,5,0,6>: Cost 4 vext2 <0,u,6,5>, <0,6,2,7>
- 1175212130U, // <6,5,0,7>: Cost 2 vrev <5,6,7,0>
- 1175285867U, // <6,5,0,u>: Cost 2 vrev <5,6,u,0>
- 2248445988U, // <6,5,1,0>: Cost 3 vrev <5,6,0,1>
- 3698107237U, // <6,5,1,1>: Cost 4 vext2 <1,1,6,5>, <1,1,6,5>
- 3708724118U, // <6,5,1,2>: Cost 4 vext2 <2,u,6,5>, <1,2,3,0>
- 3908575334U, // <6,5,1,3>: Cost 4 vuzpr <2,6,4,5>, LHS
- 3716023376U, // <6,5,1,4>: Cost 4 vext2 <4,1,6,5>, <1,4,5,6>
- 3708724368U, // <6,5,1,5>: Cost 4 vext2 <2,u,6,5>, <1,5,3,7>
- 3767733960U, // <6,5,1,6>: Cost 4 vext3 <1,5,4,6>, <5,1,6,4>
- 2712571600U, // <6,5,1,7>: Cost 3 vext3 <4,6,4,6>, <5,1,7,3>
- 2712571609U, // <6,5,1,u>: Cost 3 vext3 <4,6,4,6>, <5,1,u,3>
- 2578391142U, // <6,5,2,0>: Cost 3 vext1 <4,6,5,2>, LHS
- 3704079934U, // <6,5,2,1>: Cost 4 vext2 <2,1,6,5>, <2,1,6,5>
- 3708724840U, // <6,5,2,2>: Cost 4 vext2 <2,u,6,5>, <2,2,2,2>
- 3705407182U, // <6,5,2,3>: Cost 4 vext2 <2,3,6,5>, <2,3,4,5>
- 2578394422U, // <6,5,2,4>: Cost 3 vext1 <4,6,5,2>, RHS
- 3717351272U, // <6,5,2,5>: Cost 4 vext2 <4,3,6,5>, <2,5,3,6>
- 2634983354U, // <6,5,2,6>: Cost 3 vext2 <2,u,6,5>, <2,6,3,7>
- 3115486518U, // <6,5,2,7>: Cost 3 vtrnr <4,6,u,2>, RHS
- 2634983541U, // <6,5,2,u>: Cost 3 vext2 <2,u,6,5>, <2,u,6,5>
- 3708725398U, // <6,5,3,0>: Cost 4 vext2 <2,u,6,5>, <3,0,1,2>
- 3710052631U, // <6,5,3,1>: Cost 4 vext2 <3,1,6,5>, <3,1,6,5>
- 3708725606U, // <6,5,3,2>: Cost 4 vext2 <2,u,6,5>, <3,2,6,3>
- 3708725660U, // <6,5,3,3>: Cost 4 vext2 <2,u,6,5>, <3,3,3,3>
- 2643610114U, // <6,5,3,4>: Cost 3 vext2 <4,3,6,5>, <3,4,5,6>
- 3717352010U, // <6,5,3,5>: Cost 4 vext2 <4,3,6,5>, <3,5,4,6>
- 3773632358U, // <6,5,3,6>: Cost 4 vext3 <2,5,3,6>, <5,3,6,0>
- 2248978533U, // <6,5,3,7>: Cost 3 vrev <5,6,7,3>
- 2249052270U, // <6,5,3,u>: Cost 3 vrev <5,6,u,3>
- 2596323430U, // <6,5,4,0>: Cost 3 vext1 <7,6,5,4>, LHS
- 3716025328U, // <6,5,4,1>: Cost 4 vext2 <4,1,6,5>, <4,1,6,5>
- 3716688961U, // <6,5,4,2>: Cost 4 vext2 <4,2,6,5>, <4,2,6,5>
- 2643610770U, // <6,5,4,3>: Cost 3 vext2 <4,3,6,5>, <4,3,6,5>
- 2596326710U, // <6,5,4,4>: Cost 3 vext1 <7,6,5,4>, RHS
- 2634984758U, // <6,5,4,5>: Cost 3 vext2 <2,u,6,5>, RHS
- 3767734199U, // <6,5,4,6>: Cost 4 vext3 <1,5,4,6>, <5,4,6,0>
- 1643696070U, // <6,5,4,7>: Cost 2 vext3 <5,4,7,6>, <5,4,7,6>
- 1643769807U, // <6,5,4,u>: Cost 2 vext3 <5,4,u,6>, <5,4,u,6>
- 2578415718U, // <6,5,5,0>: Cost 3 vext1 <4,6,5,5>, LHS
- 3652158198U, // <6,5,5,1>: Cost 4 vext1 <4,6,5,5>, <1,0,3,2>
- 3652159080U, // <6,5,5,2>: Cost 4 vext1 <4,6,5,5>, <2,2,2,2>
- 3652159638U, // <6,5,5,3>: Cost 4 vext1 <4,6,5,5>, <3,0,1,2>
- 2578418998U, // <6,5,5,4>: Cost 3 vext1 <4,6,5,5>, RHS
- 2712571908U, // <6,5,5,5>: Cost 3 vext3 <4,6,4,6>, <5,5,5,5>
- 2718027790U, // <6,5,5,6>: Cost 3 vext3 <5,5,6,6>, <5,5,6,6>
- 2712571928U, // <6,5,5,7>: Cost 3 vext3 <4,6,4,6>, <5,5,7,7>
- 2712571937U, // <6,5,5,u>: Cost 3 vext3 <4,6,4,6>, <5,5,u,7>
- 2705346596U, // <6,5,6,0>: Cost 3 vext3 <3,4,5,6>, <5,6,0,1>
- 3767144496U, // <6,5,6,1>: Cost 4 vext3 <1,4,5,6>, <5,6,1,4>
- 3773116473U, // <6,5,6,2>: Cost 4 vext3 <2,4,5,6>, <5,6,2,4>
- 2705346626U, // <6,5,6,3>: Cost 3 vext3 <3,4,5,6>, <5,6,3,4>
- 2705346636U, // <6,5,6,4>: Cost 3 vext3 <3,4,5,6>, <5,6,4,5>
- 3908577217U, // <6,5,6,5>: Cost 4 vuzpr <2,6,4,5>, <2,6,4,5>
- 2578428728U, // <6,5,6,6>: Cost 3 vext1 <4,6,5,6>, <6,6,6,6>
- 2712572002U, // <6,5,6,7>: Cost 3 vext3 <4,6,4,6>, <5,6,7,0>
- 2705346668U, // <6,5,6,u>: Cost 3 vext3 <3,4,5,6>, <5,6,u,1>
- 2560516198U, // <6,5,7,0>: Cost 3 vext1 <1,6,5,7>, LHS
- 2560517363U, // <6,5,7,1>: Cost 3 vext1 <1,6,5,7>, <1,6,5,7>
- 2566490060U, // <6,5,7,2>: Cost 3 vext1 <2,6,5,7>, <2,6,5,7>
- 3634260118U, // <6,5,7,3>: Cost 4 vext1 <1,6,5,7>, <3,0,1,2>
- 2560519478U, // <6,5,7,4>: Cost 3 vext1 <1,6,5,7>, RHS
- 2980498650U, // <6,5,7,5>: Cost 3 vzipr RHS, <4,4,5,5>
- 2980497922U, // <6,5,7,6>: Cost 3 vzipr RHS, <3,4,5,6>
- 3103214902U, // <6,5,7,7>: Cost 3 vtrnr <2,6,3,7>, RHS
- 2560522030U, // <6,5,7,u>: Cost 3 vext1 <1,6,5,7>, LHS
- 2560524390U, // <6,5,u,0>: Cost 3 vext1 <1,6,5,u>, LHS
- 2560525556U, // <6,5,u,1>: Cost 3 vext1 <1,6,5,u>, <1,6,5,u>
- 2566498253U, // <6,5,u,2>: Cost 3 vext1 <2,6,5,u>, <2,6,5,u>
- 2646931439U, // <6,5,u,3>: Cost 3 vext2 <4,u,6,5>, <u,3,5,7>
- 2560527670U, // <6,5,u,4>: Cost 3 vext1 <1,6,5,u>, RHS
- 2634987674U, // <6,5,u,5>: Cost 3 vext2 <2,u,6,5>, RHS
- 2980506114U, // <6,5,u,6>: Cost 3 vzipr RHS, <3,4,5,6>
- 1175277674U, // <6,5,u,7>: Cost 2 vrev <5,6,7,u>
- 1175351411U, // <6,5,u,u>: Cost 2 vrev <5,6,u,u>
- 2578448486U, // <6,6,0,0>: Cost 3 vext1 <4,6,6,0>, LHS
- 1573191782U, // <6,6,0,1>: Cost 2 vext2 <4,u,6,6>, LHS
- 2686030124U, // <6,6,0,2>: Cost 3 vext3 <0,2,4,6>, <6,0,2,4>
- 3779088690U, // <6,6,0,3>: Cost 4 vext3 <3,4,5,6>, <6,0,3,1>
- 2687209788U, // <6,6,0,4>: Cost 3 vext3 <0,4,2,6>, <6,0,4,2>
- 3652194000U, // <6,6,0,5>: Cost 4 vext1 <4,6,6,0>, <5,1,7,3>
- 2254852914U, // <6,6,0,6>: Cost 3 vrev <6,6,6,0>
- 4041575734U, // <6,6,0,7>: Cost 4 vzipr <2,4,6,0>, RHS
- 1573192349U, // <6,6,0,u>: Cost 2 vext2 <4,u,6,6>, LHS
- 2646934262U, // <6,6,1,0>: Cost 3 vext2 <4,u,6,6>, <1,0,3,2>
- 2646934324U, // <6,6,1,1>: Cost 3 vext2 <4,u,6,6>, <1,1,1,1>
- 2646934422U, // <6,6,1,2>: Cost 3 vext2 <4,u,6,6>, <1,2,3,0>
- 2846785638U, // <6,6,1,3>: Cost 3 vuzpr <4,6,4,6>, LHS
- 3760951694U, // <6,6,1,4>: Cost 4 vext3 <0,4,2,6>, <6,1,4,3>
- 2646934672U, // <6,6,1,5>: Cost 3 vext2 <4,u,6,6>, <1,5,3,7>
- 2712572320U, // <6,6,1,6>: Cost 3 vext3 <4,6,4,6>, <6,1,6,3>
- 3775549865U, // <6,6,1,7>: Cost 4 vext3 <2,u,2,6>, <6,1,7,3>
- 2846785643U, // <6,6,1,u>: Cost 3 vuzpr <4,6,4,6>, LHS
- 3759772094U, // <6,6,2,0>: Cost 4 vext3 <0,2,4,6>, <6,2,0,6>
- 3704751676U, // <6,6,2,1>: Cost 4 vext2 <2,2,6,6>, <2,1,6,3>
- 2631009936U, // <6,6,2,2>: Cost 3 vext2 <2,2,6,6>, <2,2,6,6>
- 2646935206U, // <6,6,2,3>: Cost 3 vext2 <4,u,6,6>, <2,3,0,1>
- 3759772127U, // <6,6,2,4>: Cost 4 vext3 <0,2,4,6>, <6,2,4,3>
- 3704752004U, // <6,6,2,5>: Cost 4 vext2 <2,2,6,6>, <2,5,6,7>
- 2646935482U, // <6,6,2,6>: Cost 3 vext2 <4,u,6,6>, <2,6,3,7>
- 2712572410U, // <6,6,2,7>: Cost 3 vext3 <4,6,4,6>, <6,2,7,3>
- 2712572419U, // <6,6,2,u>: Cost 3 vext3 <4,6,4,6>, <6,2,u,3>
- 2646935702U, // <6,6,3,0>: Cost 3 vext2 <4,u,6,6>, <3,0,1,2>
- 3777024534U, // <6,6,3,1>: Cost 4 vext3 <3,1,4,6>, <6,3,1,4>
- 3704752453U, // <6,6,3,2>: Cost 4 vext2 <2,2,6,6>, <3,2,2,6>
- 2646935964U, // <6,6,3,3>: Cost 3 vext2 <4,u,6,6>, <3,3,3,3>
- 2705347122U, // <6,6,3,4>: Cost 3 vext3 <3,4,5,6>, <6,3,4,5>
- 3779678778U, // <6,6,3,5>: Cost 4 vext3 <3,5,4,6>, <6,3,5,4>
- 2657553069U, // <6,6,3,6>: Cost 3 vext2 <6,6,6,6>, <3,6,6,6>
- 4039609654U, // <6,6,3,7>: Cost 4 vzipr <2,1,6,3>, RHS
- 2708001366U, // <6,6,3,u>: Cost 3 vext3 <3,u,5,6>, <6,3,u,5>
- 2578481254U, // <6,6,4,0>: Cost 3 vext1 <4,6,6,4>, LHS
- 3652223734U, // <6,6,4,1>: Cost 4 vext1 <4,6,6,4>, <1,0,3,2>
- 3760951922U, // <6,6,4,2>: Cost 4 vext3 <0,4,2,6>, <6,4,2,6>
- 3779089019U, // <6,6,4,3>: Cost 4 vext3 <3,4,5,6>, <6,4,3,6>
- 1570540772U, // <6,6,4,4>: Cost 2 vext2 <4,4,6,6>, <4,4,6,6>
- 1573195062U, // <6,6,4,5>: Cost 2 vext2 <4,u,6,6>, RHS
- 2712572560U, // <6,6,4,6>: Cost 3 vext3 <4,6,4,6>, <6,4,6,0>
- 2723410591U, // <6,6,4,7>: Cost 3 vext3 <6,4,7,6>, <6,4,7,6>
- 1573195304U, // <6,6,4,u>: Cost 2 vext2 <4,u,6,6>, <4,u,6,6>
- 3640287334U, // <6,6,5,0>: Cost 4 vext1 <2,6,6,5>, LHS
- 2646937296U, // <6,6,5,1>: Cost 3 vext2 <4,u,6,6>, <5,1,7,3>
- 3640289235U, // <6,6,5,2>: Cost 4 vext1 <2,6,6,5>, <2,6,6,5>
- 3720679279U, // <6,6,5,3>: Cost 4 vext2 <4,u,6,6>, <5,3,7,0>
- 2646937542U, // <6,6,5,4>: Cost 3 vext2 <4,u,6,6>, <5,4,7,6>
- 2646937604U, // <6,6,5,5>: Cost 3 vext2 <4,u,6,6>, <5,5,5,5>
- 2646937698U, // <6,6,5,6>: Cost 3 vext2 <4,u,6,6>, <5,6,7,0>
- 2846788918U, // <6,6,5,7>: Cost 3 vuzpr <4,6,4,6>, RHS
- 2846788919U, // <6,6,5,u>: Cost 3 vuzpr <4,6,4,6>, RHS
- 1516699750U, // <6,6,6,0>: Cost 2 vext1 <6,6,6,6>, LHS
- 2590442230U, // <6,6,6,1>: Cost 3 vext1 <6,6,6,6>, <1,0,3,2>
- 2646938106U, // <6,6,6,2>: Cost 3 vext2 <4,u,6,6>, <6,2,7,3>
- 2590443670U, // <6,6,6,3>: Cost 3 vext1 <6,6,6,6>, <3,0,1,2>
- 1516703030U, // <6,6,6,4>: Cost 2 vext1 <6,6,6,6>, RHS
- 2590445264U, // <6,6,6,5>: Cost 3 vext1 <6,6,6,6>, <5,1,7,3>
- 296144182U, // <6,6,6,6>: Cost 1 vdup2 RHS
- 2712572738U, // <6,6,6,7>: Cost 3 vext3 <4,6,4,6>, <6,6,7,7>
- 296144182U, // <6,6,6,u>: Cost 1 vdup2 RHS
- 2566561894U, // <6,6,7,0>: Cost 3 vext1 <2,6,6,7>, LHS
- 3634332924U, // <6,6,7,1>: Cost 4 vext1 <1,6,6,7>, <1,6,6,7>
- 2566563797U, // <6,6,7,2>: Cost 3 vext1 <2,6,6,7>, <2,6,6,7>
- 2584480258U, // <6,6,7,3>: Cost 3 vext1 <5,6,6,7>, <3,4,5,6>
- 2566565174U, // <6,6,7,4>: Cost 3 vext1 <2,6,6,7>, RHS
- 2717438846U, // <6,6,7,5>: Cost 3 vext3 <5,4,7,6>, <6,7,5,4>
- 2980500280U, // <6,6,7,6>: Cost 3 vzipr RHS, <6,6,6,6>
- 1906756918U, // <6,6,7,7>: Cost 2 vzipr RHS, RHS
- 1906756919U, // <6,6,7,u>: Cost 2 vzipr RHS, RHS
- 1516699750U, // <6,6,u,0>: Cost 2 vext1 <6,6,6,6>, LHS
- 1573197614U, // <6,6,u,1>: Cost 2 vext2 <4,u,6,6>, LHS
- 2566571990U, // <6,6,u,2>: Cost 3 vext1 <2,6,6,u>, <2,6,6,u>
- 2846786205U, // <6,6,u,3>: Cost 3 vuzpr <4,6,4,6>, LHS
- 1516703030U, // <6,6,u,4>: Cost 2 vext1 <6,6,6,6>, RHS
- 1573197978U, // <6,6,u,5>: Cost 2 vext2 <4,u,6,6>, RHS
- 296144182U, // <6,6,u,6>: Cost 1 vdup2 RHS
- 1906765110U, // <6,6,u,7>: Cost 2 vzipr RHS, RHS
- 296144182U, // <6,6,u,u>: Cost 1 vdup2 RHS
- 1571209216U, // <6,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
- 497467494U, // <6,7,0,1>: Cost 1 vext2 RHS, LHS
- 1571209380U, // <6,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
- 2644951292U, // <6,7,0,3>: Cost 3 vext2 RHS, <0,3,1,0>
- 1571209554U, // <6,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
- 1510756450U, // <6,7,0,5>: Cost 2 vext1 <5,6,7,0>, <5,6,7,0>
- 2644951542U, // <6,7,0,6>: Cost 3 vext2 RHS, <0,6,1,7>
- 2584499194U, // <6,7,0,7>: Cost 3 vext1 <5,6,7,0>, <7,0,1,2>
- 497468061U, // <6,7,0,u>: Cost 1 vext2 RHS, LHS
- 1571209974U, // <6,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
- 1571210036U, // <6,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
- 1571210134U, // <6,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0>
- 1571210200U, // <6,7,1,3>: Cost 2 vext2 RHS, <1,3,1,3>
- 2644952098U, // <6,7,1,4>: Cost 3 vext2 RHS, <1,4,0,5>
- 1571210384U, // <6,7,1,5>: Cost 2 vext2 RHS, <1,5,3,7>
- 2644952271U, // <6,7,1,6>: Cost 3 vext2 RHS, <1,6,1,7>
- 2578535418U, // <6,7,1,7>: Cost 3 vext1 <4,6,7,1>, <7,0,1,2>
- 1571210605U, // <6,7,1,u>: Cost 2 vext2 RHS, <1,u,1,3>
- 2644952509U, // <6,7,2,0>: Cost 3 vext2 RHS, <2,0,1,2>
- 2644952582U, // <6,7,2,1>: Cost 3 vext2 RHS, <2,1,0,3>
- 1571210856U, // <6,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
- 1571210918U, // <6,7,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
- 2644952828U, // <6,7,2,4>: Cost 3 vext2 RHS, <2,4,0,6>
- 2633009028U, // <6,7,2,5>: Cost 3 vext2 <2,5,6,7>, <2,5,6,7>
- 1571211194U, // <6,7,2,6>: Cost 2 vext2 RHS, <2,6,3,7>
- 2668840938U, // <6,7,2,7>: Cost 3 vext2 RHS, <2,7,0,1>
- 1571211323U, // <6,7,2,u>: Cost 2 vext2 RHS, <2,u,0,1>
- 1571211414U, // <6,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
- 2644953311U, // <6,7,3,1>: Cost 3 vext2 RHS, <3,1,0,3>
- 2644953390U, // <6,7,3,2>: Cost 3 vext2 RHS, <3,2,0,1>
- 1571211676U, // <6,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
- 1571211778U, // <6,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
- 2644953648U, // <6,7,3,5>: Cost 3 vext2 RHS, <3,5,1,7>
- 2644953720U, // <6,7,3,6>: Cost 3 vext2 RHS, <3,6,0,7>
- 2644953795U, // <6,7,3,7>: Cost 3 vext2 RHS, <3,7,0,1>
- 1571212062U, // <6,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
- 1573202834U, // <6,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
- 2644954058U, // <6,7,4,1>: Cost 3 vext2 RHS, <4,1,2,3>
- 2644954166U, // <6,7,4,2>: Cost 3 vext2 RHS, <4,2,5,3>
- 2644954258U, // <6,7,4,3>: Cost 3 vext2 RHS, <4,3,6,5>
- 1571212496U, // <6,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
- 497470774U, // <6,7,4,5>: Cost 1 vext2 RHS, RHS
- 1573203316U, // <6,7,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
- 2646281688U, // <6,7,4,7>: Cost 3 vext2 <4,7,6,7>, <4,7,6,7>
- 497471017U, // <6,7,4,u>: Cost 1 vext2 RHS, RHS
- 2644954696U, // <6,7,5,0>: Cost 3 vext2 RHS, <5,0,1,2>
- 1573203664U, // <6,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
- 2644954878U, // <6,7,5,2>: Cost 3 vext2 RHS, <5,2,3,4>
- 2644954991U, // <6,7,5,3>: Cost 3 vext2 RHS, <5,3,7,0>
- 1571213254U, // <6,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
- 1571213316U, // <6,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
- 1571213410U, // <6,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0>
- 1573204136U, // <6,7,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
- 1573204217U, // <6,7,5,u>: Cost 2 vext2 RHS, <5,u,5,7>
- 2644955425U, // <6,7,6,0>: Cost 3 vext2 RHS, <6,0,1,2>
- 2644955561U, // <6,7,6,1>: Cost 3 vext2 RHS, <6,1,7,3>
- 1573204474U, // <6,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
- 2644955698U, // <6,7,6,3>: Cost 3 vext2 RHS, <6,3,4,5>
- 2644955789U, // <6,7,6,4>: Cost 3 vext2 RHS, <6,4,5,6>
- 2644955889U, // <6,7,6,5>: Cost 3 vext2 RHS, <6,5,7,7>
- 1571214136U, // <6,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6>
- 1571214158U, // <6,7,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
- 1573204895U, // <6,7,6,u>: Cost 2 vext2 RHS, <6,u,0,1>
- 1573204986U, // <6,7,7,0>: Cost 2 vext2 RHS, <7,0,1,2>
- 2572608656U, // <6,7,7,1>: Cost 3 vext1 <3,6,7,7>, <1,5,3,7>
- 2644956362U, // <6,7,7,2>: Cost 3 vext2 RHS, <7,2,6,3>
- 2572610231U, // <6,7,7,3>: Cost 3 vext1 <3,6,7,7>, <3,6,7,7>
- 1573205350U, // <6,7,7,4>: Cost 2 vext2 RHS, <7,4,5,6>
- 2646947220U, // <6,7,7,5>: Cost 3 vext2 RHS, <7,5,1,7>
- 1516786498U, // <6,7,7,6>: Cost 2 vext1 <6,6,7,7>, <6,6,7,7>
- 1571214956U, // <6,7,7,7>: Cost 2 vext2 RHS, <7,7,7,7>
- 1573205634U, // <6,7,7,u>: Cost 2 vext2 RHS, <7,u,1,2>
- 1571215059U, // <6,7,u,0>: Cost 2 vext2 RHS, <u,0,1,2>
- 497473326U, // <6,7,u,1>: Cost 1 vext2 RHS, LHS
- 1571215237U, // <6,7,u,2>: Cost 2 vext2 RHS, <u,2,3,0>
- 1571215292U, // <6,7,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
- 1571215423U, // <6,7,u,4>: Cost 2 vext2 RHS, <u,4,5,6>
- 497473690U, // <6,7,u,5>: Cost 1 vext2 RHS, RHS
- 1571215568U, // <6,7,u,6>: Cost 2 vext2 RHS, <u,6,3,7>
- 1573206272U, // <6,7,u,7>: Cost 2 vext2 RHS, <u,7,0,1>
- 497473893U, // <6,7,u,u>: Cost 1 vext2 RHS, LHS
- 1571217408U, // <6,u,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
- 497475686U, // <6,u,0,1>: Cost 1 vext2 RHS, LHS
- 1571217572U, // <6,u,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
- 2689865445U, // <6,u,0,3>: Cost 3 vext3 <0,u,2,6>, <u,0,3,2>
- 1571217746U, // <6,u,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
- 1510830187U, // <6,u,0,5>: Cost 2 vext1 <5,6,u,0>, <5,6,u,0>
- 2644959734U, // <6,u,0,6>: Cost 3 vext2 RHS, <0,6,1,7>
- 1193130221U, // <6,u,0,7>: Cost 2 vrev <u,6,7,0>
- 497476253U, // <6,u,0,u>: Cost 1 vext2 RHS, LHS
- 1571218166U, // <6,u,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
- 1571218228U, // <6,u,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
- 1612289838U, // <6,u,1,2>: Cost 2 vext3 <0,2,4,6>, LHS
- 1571218392U, // <6,u,1,3>: Cost 2 vext2 RHS, <1,3,1,3>
- 2566663478U, // <6,u,1,4>: Cost 3 vext1 <2,6,u,1>, RHS
- 1571218576U, // <6,u,1,5>: Cost 2 vext2 RHS, <1,5,3,7>
- 2644960463U, // <6,u,1,6>: Cost 3 vext2 RHS, <1,6,1,7>
- 2717439835U, // <6,u,1,7>: Cost 3 vext3 <5,4,7,6>, <u,1,7,3>
- 1612289892U, // <6,u,1,u>: Cost 2 vext3 <0,2,4,6>, LHS
- 1504870502U, // <6,u,2,0>: Cost 2 vext1 <4,6,u,2>, LHS
- 2644960774U, // <6,u,2,1>: Cost 3 vext2 RHS, <2,1,0,3>
- 1571219048U, // <6,u,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
- 1571219110U, // <6,u,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
- 1504873782U, // <6,u,2,4>: Cost 2 vext1 <4,6,u,2>, RHS
- 2633017221U, // <6,u,2,5>: Cost 3 vext2 <2,5,6,u>, <2,5,6,u>
- 1571219386U, // <6,u,2,6>: Cost 2 vext2 RHS, <2,6,3,7>
- 2712573868U, // <6,u,2,7>: Cost 3 vext3 <4,6,4,6>, <u,2,7,3>
- 1571219515U, // <6,u,2,u>: Cost 2 vext2 RHS, <2,u,0,1>
- 1571219606U, // <6,u,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
- 2644961503U, // <6,u,3,1>: Cost 3 vext2 RHS, <3,1,0,3>
- 2566678499U, // <6,u,3,2>: Cost 3 vext1 <2,6,u,3>, <2,6,u,3>
- 1571219868U, // <6,u,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
- 1571219970U, // <6,u,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
- 2689865711U, // <6,u,3,5>: Cost 3 vext3 <0,u,2,6>, <u,3,5,7>
- 2708002806U, // <6,u,3,6>: Cost 3 vext3 <3,u,5,6>, <u,3,6,5>
- 2644961987U, // <6,u,3,7>: Cost 3 vext2 RHS, <3,7,0,1>
- 1571220254U, // <6,u,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
- 1571220370U, // <6,u,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
- 2644962250U, // <6,u,4,1>: Cost 3 vext2 RHS, <4,1,2,3>
- 1661245476U, // <6,u,4,2>: Cost 2 vext3 <u,4,2,6>, <u,4,2,6>
- 2686031917U, // <6,u,4,3>: Cost 3 vext3 <0,2,4,6>, <u,4,3,6>
- 1571220688U, // <6,u,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
- 497478967U, // <6,u,4,5>: Cost 1 vext2 RHS, RHS
- 1571220852U, // <6,u,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
- 1661614161U, // <6,u,4,7>: Cost 2 vext3 <u,4,7,6>, <u,4,7,6>
- 497479209U, // <6,u,4,u>: Cost 1 vext2 RHS, RHS
- 2566692966U, // <6,u,5,0>: Cost 3 vext1 <2,6,u,5>, LHS
- 1571221200U, // <6,u,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
- 2566694885U, // <6,u,5,2>: Cost 3 vext1 <2,6,u,5>, <2,6,u,5>
- 2689865855U, // <6,u,5,3>: Cost 3 vext3 <0,u,2,6>, <u,5,3,7>
- 1571221446U, // <6,u,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
- 1571221508U, // <6,u,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
- 1612290202U, // <6,u,5,6>: Cost 2 vext3 <0,2,4,6>, RHS
- 1571221672U, // <6,u,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
- 1612290220U, // <6,u,5,u>: Cost 2 vext3 <0,2,4,6>, RHS
- 1504903270U, // <6,u,6,0>: Cost 2 vext1 <4,6,u,6>, LHS
- 2644963752U, // <6,u,6,1>: Cost 3 vext2 RHS, <6,1,7,2>
- 1571222010U, // <6,u,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
- 2686032080U, // <6,u,6,3>: Cost 3 vext3 <0,2,4,6>, <u,6,3,7>
- 1504906550U, // <6,u,6,4>: Cost 2 vext1 <4,6,u,6>, RHS
- 2644964079U, // <6,u,6,5>: Cost 3 vext2 RHS, <6,5,7,5>
- 296144182U, // <6,u,6,6>: Cost 1 vdup2 RHS
- 1571222350U, // <6,u,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
- 296144182U, // <6,u,6,u>: Cost 1 vdup2 RHS
- 1492967526U, // <6,u,7,0>: Cost 2 vext1 <2,6,u,7>, LHS
- 2560738574U, // <6,u,7,1>: Cost 3 vext1 <1,6,u,7>, <1,6,u,7>
- 1492969447U, // <6,u,7,2>: Cost 2 vext1 <2,6,u,7>, <2,6,u,7>
- 1906753692U, // <6,u,7,3>: Cost 2 vzipr RHS, LHS
- 1492970806U, // <6,u,7,4>: Cost 2 vext1 <2,6,u,7>, RHS
- 2980495761U, // <6,u,7,5>: Cost 3 vzipr RHS, <0,4,u,5>
- 1516860235U, // <6,u,7,6>: Cost 2 vext1 <6,6,u,7>, <6,6,u,7>
- 1906756936U, // <6,u,7,7>: Cost 2 vzipr RHS, RHS
- 1492973358U, // <6,u,7,u>: Cost 2 vext1 <2,6,u,7>, LHS
- 1492975718U, // <6,u,u,0>: Cost 2 vext1 <2,6,u,u>, LHS
- 497481518U, // <6,u,u,1>: Cost 1 vext2 RHS, LHS
- 1612290405U, // <6,u,u,2>: Cost 2 vext3 <0,2,4,6>, LHS
- 1571223484U, // <6,u,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
- 1492978998U, // <6,u,u,4>: Cost 2 vext1 <2,6,u,u>, RHS
- 497481882U, // <6,u,u,5>: Cost 1 vext2 RHS, RHS
- 296144182U, // <6,u,u,6>: Cost 1 vdup2 RHS
- 1906765128U, // <6,u,u,7>: Cost 2 vzipr RHS, RHS
- 497482085U, // <6,u,u,u>: Cost 1 vext2 RHS, LHS
- 1638318080U, // <7,0,0,0>: Cost 2 vext3 RHS, <0,0,0,0>
- 1638318090U, // <7,0,0,1>: Cost 2 vext3 RHS, <0,0,1,1>
- 1638318100U, // <7,0,0,2>: Cost 2 vext3 RHS, <0,0,2,2>
- 3646442178U, // <7,0,0,3>: Cost 4 vext1 <3,7,0,0>, <3,7,0,0>
- 2712059941U, // <7,0,0,4>: Cost 3 vext3 RHS, <0,0,4,1>
- 2651603364U, // <7,0,0,5>: Cost 3 vext2 <5,6,7,0>, <0,5,1,6>
- 2590618445U, // <7,0,0,6>: Cost 3 vext1 <6,7,0,0>, <6,7,0,0>
- 3785801798U, // <7,0,0,7>: Cost 4 vext3 RHS, <0,0,7,7>
- 1638318153U, // <7,0,0,u>: Cost 2 vext3 RHS, <0,0,u,1>
- 1516879974U, // <7,0,1,0>: Cost 2 vext1 <6,7,0,1>, LHS
- 2693922911U, // <7,0,1,1>: Cost 3 vext3 <1,5,3,7>, <0,1,1,5>
- 564576358U, // <7,0,1,2>: Cost 1 vext3 RHS, LHS
- 2638996480U, // <7,0,1,3>: Cost 3 vext2 <3,5,7,0>, <1,3,5,7>
- 1516883254U, // <7,0,1,4>: Cost 2 vext1 <6,7,0,1>, RHS
- 2649613456U, // <7,0,1,5>: Cost 3 vext2 <5,3,7,0>, <1,5,3,7>
- 1516884814U, // <7,0,1,6>: Cost 2 vext1 <6,7,0,1>, <6,7,0,1>
- 2590626808U, // <7,0,1,7>: Cost 3 vext1 <6,7,0,1>, <7,0,1,0>
- 564576412U, // <7,0,1,u>: Cost 1 vext3 RHS, LHS
- 1638318244U, // <7,0,2,0>: Cost 2 vext3 RHS, <0,2,0,2>
- 2692743344U, // <7,0,2,1>: Cost 3 vext3 <1,3,5,7>, <0,2,1,5>
- 2712060084U, // <7,0,2,2>: Cost 3 vext3 RHS, <0,2,2,0>
- 2712060094U, // <7,0,2,3>: Cost 3 vext3 RHS, <0,2,3,1>
- 1638318284U, // <7,0,2,4>: Cost 2 vext3 RHS, <0,2,4,6>
- 2712060118U, // <7,0,2,5>: Cost 3 vext3 RHS, <0,2,5,7>
- 2651604922U, // <7,0,2,6>: Cost 3 vext2 <5,6,7,0>, <2,6,3,7>
- 2686255336U, // <7,0,2,7>: Cost 3 vext3 <0,2,7,7>, <0,2,7,7>
- 1638318316U, // <7,0,2,u>: Cost 2 vext3 RHS, <0,2,u,2>
- 2651605142U, // <7,0,3,0>: Cost 3 vext2 <5,6,7,0>, <3,0,1,2>
- 2712060156U, // <7,0,3,1>: Cost 3 vext3 RHS, <0,3,1,0>
- 2712060165U, // <7,0,3,2>: Cost 3 vext3 RHS, <0,3,2,0>
- 2651605404U, // <7,0,3,3>: Cost 3 vext2 <5,6,7,0>, <3,3,3,3>
- 2651605506U, // <7,0,3,4>: Cost 3 vext2 <5,6,7,0>, <3,4,5,6>
- 2638998111U, // <7,0,3,5>: Cost 3 vext2 <3,5,7,0>, <3,5,7,0>
- 2639661744U, // <7,0,3,6>: Cost 3 vext2 <3,6,7,0>, <3,6,7,0>
- 3712740068U, // <7,0,3,7>: Cost 4 vext2 <3,5,7,0>, <3,7,3,7>
- 2640989010U, // <7,0,3,u>: Cost 3 vext2 <3,u,7,0>, <3,u,7,0>
- 2712060232U, // <7,0,4,0>: Cost 3 vext3 RHS, <0,4,0,4>
- 1638318418U, // <7,0,4,1>: Cost 2 vext3 RHS, <0,4,1,5>
- 1638318428U, // <7,0,4,2>: Cost 2 vext3 RHS, <0,4,2,6>
- 3646474950U, // <7,0,4,3>: Cost 4 vext1 <3,7,0,4>, <3,7,0,4>
- 2712060270U, // <7,0,4,4>: Cost 3 vext3 RHS, <0,4,4,6>
- 1577864502U, // <7,0,4,5>: Cost 2 vext2 <5,6,7,0>, RHS
- 2651606388U, // <7,0,4,6>: Cost 3 vext2 <5,6,7,0>, <4,6,4,6>
- 3787792776U, // <7,0,4,7>: Cost 4 vext3 RHS, <0,4,7,5>
- 1638318481U, // <7,0,4,u>: Cost 2 vext3 RHS, <0,4,u,5>
- 2590654566U, // <7,0,5,0>: Cost 3 vext1 <6,7,0,5>, LHS
- 2651606736U, // <7,0,5,1>: Cost 3 vext2 <5,6,7,0>, <5,1,7,3>
- 2712060334U, // <7,0,5,2>: Cost 3 vext3 RHS, <0,5,2,7>
- 2649616239U, // <7,0,5,3>: Cost 3 vext2 <5,3,7,0>, <5,3,7,0>
- 2651606982U, // <7,0,5,4>: Cost 3 vext2 <5,6,7,0>, <5,4,7,6>
- 2651607044U, // <7,0,5,5>: Cost 3 vext2 <5,6,7,0>, <5,5,5,5>
- 1577865314U, // <7,0,5,6>: Cost 2 vext2 <5,6,7,0>, <5,6,7,0>
- 2651607208U, // <7,0,5,7>: Cost 3 vext2 <5,6,7,0>, <5,7,5,7>
- 1579192580U, // <7,0,5,u>: Cost 2 vext2 <5,u,7,0>, <5,u,7,0>
- 2688393709U, // <7,0,6,0>: Cost 3 vext3 <0,6,0,7>, <0,6,0,7>
- 2712060406U, // <7,0,6,1>: Cost 3 vext3 RHS, <0,6,1,7>
- 2688541183U, // <7,0,6,2>: Cost 3 vext3 <0,6,2,7>, <0,6,2,7>
- 2655588936U, // <7,0,6,3>: Cost 3 vext2 <6,3,7,0>, <6,3,7,0>
- 3762430481U, // <7,0,6,4>: Cost 4 vext3 <0,6,4,7>, <0,6,4,7>
- 2651607730U, // <7,0,6,5>: Cost 3 vext2 <5,6,7,0>, <6,5,0,7>
- 2651607864U, // <7,0,6,6>: Cost 3 vext2 <5,6,7,0>, <6,6,6,6>
- 2651607886U, // <7,0,6,7>: Cost 3 vext2 <5,6,7,0>, <6,7,0,1>
- 2688983605U, // <7,0,6,u>: Cost 3 vext3 <0,6,u,7>, <0,6,u,7>
- 2651608058U, // <7,0,7,0>: Cost 3 vext2 <5,6,7,0>, <7,0,1,2>
- 2932703334U, // <7,0,7,1>: Cost 3 vzipl <7,7,7,7>, LHS
- 3066921062U, // <7,0,7,2>: Cost 3 vtrnl <7,7,7,7>, LHS
- 3712742678U, // <7,0,7,3>: Cost 4 vext2 <3,5,7,0>, <7,3,5,7>
- 2651608422U, // <7,0,7,4>: Cost 3 vext2 <5,6,7,0>, <7,4,5,6>
- 2651608513U, // <7,0,7,5>: Cost 3 vext2 <5,6,7,0>, <7,5,6,7>
- 2663552532U, // <7,0,7,6>: Cost 3 vext2 <7,6,7,0>, <7,6,7,0>
- 2651608684U, // <7,0,7,7>: Cost 3 vext2 <5,6,7,0>, <7,7,7,7>
- 2651608706U, // <7,0,7,u>: Cost 3 vext2 <5,6,7,0>, <7,u,1,2>
- 1638318730U, // <7,0,u,0>: Cost 2 vext3 RHS, <0,u,0,2>
- 1638318738U, // <7,0,u,1>: Cost 2 vext3 RHS, <0,u,1,1>
- 564576925U, // <7,0,u,2>: Cost 1 vext3 RHS, LHS
- 2572765898U, // <7,0,u,3>: Cost 3 vext1 <3,7,0,u>, <3,7,0,u>
- 1638318770U, // <7,0,u,4>: Cost 2 vext3 RHS, <0,u,4,6>
- 1577867418U, // <7,0,u,5>: Cost 2 vext2 <5,6,7,0>, RHS
- 1516942165U, // <7,0,u,6>: Cost 2 vext1 <6,7,0,u>, <6,7,0,u>
- 2651609344U, // <7,0,u,7>: Cost 3 vext2 <5,6,7,0>, <u,7,0,1>
- 564576979U, // <7,0,u,u>: Cost 1 vext3 RHS, LHS
- 2590687334U, // <7,1,0,0>: Cost 3 vext1 <6,7,1,0>, LHS
- 2639003750U, // <7,1,0,1>: Cost 3 vext2 <3,5,7,1>, LHS
- 2793357414U, // <7,1,0,2>: Cost 3 vuzpl <7,0,1,2>, LHS
- 1638318838U, // <7,1,0,3>: Cost 2 vext3 RHS, <1,0,3,2>
- 2590690614U, // <7,1,0,4>: Cost 3 vext1 <6,7,1,0>, RHS
- 2712060679U, // <7,1,0,5>: Cost 3 vext3 RHS, <1,0,5,1>
- 2590692182U, // <7,1,0,6>: Cost 3 vext1 <6,7,1,0>, <6,7,1,0>
- 3785802521U, // <7,1,0,7>: Cost 4 vext3 RHS, <1,0,7,1>
- 1638318883U, // <7,1,0,u>: Cost 2 vext3 RHS, <1,0,u,2>
- 2712060715U, // <7,1,1,0>: Cost 3 vext3 RHS, <1,1,0,1>
- 1638318900U, // <7,1,1,1>: Cost 2 vext3 RHS, <1,1,1,1>
- 3774300994U, // <7,1,1,2>: Cost 4 vext3 <2,6,3,7>, <1,1,2,6>
- 1638318920U, // <7,1,1,3>: Cost 2 vext3 RHS, <1,1,3,3>
- 2712060755U, // <7,1,1,4>: Cost 3 vext3 RHS, <1,1,4,5>
- 2691416926U, // <7,1,1,5>: Cost 3 vext3 <1,1,5,7>, <1,1,5,7>
- 2590700375U, // <7,1,1,6>: Cost 3 vext1 <6,7,1,1>, <6,7,1,1>
- 3765158766U, // <7,1,1,7>: Cost 4 vext3 <1,1,5,7>, <1,1,7,5>
- 1638318965U, // <7,1,1,u>: Cost 2 vext3 RHS, <1,1,u,3>
- 2712060796U, // <7,1,2,0>: Cost 3 vext3 RHS, <1,2,0,1>
- 2712060807U, // <7,1,2,1>: Cost 3 vext3 RHS, <1,2,1,3>
- 3712747112U, // <7,1,2,2>: Cost 4 vext2 <3,5,7,1>, <2,2,2,2>
- 1638318998U, // <7,1,2,3>: Cost 2 vext3 RHS, <1,2,3,0>
- 2712060836U, // <7,1,2,4>: Cost 3 vext3 RHS, <1,2,4,5>
- 2712060843U, // <7,1,2,5>: Cost 3 vext3 RHS, <1,2,5,3>
- 2590708568U, // <7,1,2,6>: Cost 3 vext1 <6,7,1,2>, <6,7,1,2>
- 2735948730U, // <7,1,2,7>: Cost 3 vext3 RHS, <1,2,7,0>
- 1638319043U, // <7,1,2,u>: Cost 2 vext3 RHS, <1,2,u,0>
- 2712060876U, // <7,1,3,0>: Cost 3 vext3 RHS, <1,3,0,0>
- 1638319064U, // <7,1,3,1>: Cost 2 vext3 RHS, <1,3,1,3>
- 2712060894U, // <7,1,3,2>: Cost 3 vext3 RHS, <1,3,2,0>
- 2692596718U, // <7,1,3,3>: Cost 3 vext3 <1,3,3,7>, <1,3,3,7>
- 2712060917U, // <7,1,3,4>: Cost 3 vext3 RHS, <1,3,4,5>
- 1619002368U, // <7,1,3,5>: Cost 2 vext3 <1,3,5,7>, <1,3,5,7>
- 2692817929U, // <7,1,3,6>: Cost 3 vext3 <1,3,6,7>, <1,3,6,7>
- 2735948814U, // <7,1,3,7>: Cost 3 vext3 RHS, <1,3,7,3>
- 1619223579U, // <7,1,3,u>: Cost 2 vext3 <1,3,u,7>, <1,3,u,7>
- 2712060962U, // <7,1,4,0>: Cost 3 vext3 RHS, <1,4,0,5>
- 2712060971U, // <7,1,4,1>: Cost 3 vext3 RHS, <1,4,1,5>
- 2712060980U, // <7,1,4,2>: Cost 3 vext3 RHS, <1,4,2,5>
- 2712060989U, // <7,1,4,3>: Cost 3 vext3 RHS, <1,4,3,5>
- 3785802822U, // <7,1,4,4>: Cost 4 vext3 RHS, <1,4,4,5>
- 2639007030U, // <7,1,4,5>: Cost 3 vext2 <3,5,7,1>, RHS
- 2645642634U, // <7,1,4,6>: Cost 3 vext2 <4,6,7,1>, <4,6,7,1>
- 3719384520U, // <7,1,4,7>: Cost 4 vext2 <4,6,7,1>, <4,7,5,0>
- 2639007273U, // <7,1,4,u>: Cost 3 vext2 <3,5,7,1>, RHS
- 2572812390U, // <7,1,5,0>: Cost 3 vext1 <3,7,1,5>, LHS
- 2693776510U, // <7,1,5,1>: Cost 3 vext3 <1,5,1,7>, <1,5,1,7>
- 3774301318U, // <7,1,5,2>: Cost 4 vext3 <2,6,3,7>, <1,5,2,6>
- 1620182160U, // <7,1,5,3>: Cost 2 vext3 <1,5,3,7>, <1,5,3,7>
- 2572815670U, // <7,1,5,4>: Cost 3 vext1 <3,7,1,5>, RHS
- 3766486178U, // <7,1,5,5>: Cost 4 vext3 <1,3,5,7>, <1,5,5,7>
- 2651615331U, // <7,1,5,6>: Cost 3 vext2 <5,6,7,1>, <5,6,7,1>
- 2652278964U, // <7,1,5,7>: Cost 3 vext2 <5,7,7,1>, <5,7,7,1>
- 1620550845U, // <7,1,5,u>: Cost 2 vext3 <1,5,u,7>, <1,5,u,7>
- 3768108230U, // <7,1,6,0>: Cost 4 vext3 <1,6,0,7>, <1,6,0,7>
- 2694440143U, // <7,1,6,1>: Cost 3 vext3 <1,6,1,7>, <1,6,1,7>
- 2712061144U, // <7,1,6,2>: Cost 3 vext3 RHS, <1,6,2,7>
- 2694587617U, // <7,1,6,3>: Cost 3 vext3 <1,6,3,7>, <1,6,3,7>
- 3768403178U, // <7,1,6,4>: Cost 4 vext3 <1,6,4,7>, <1,6,4,7>
- 2694735091U, // <7,1,6,5>: Cost 3 vext3 <1,6,5,7>, <1,6,5,7>
- 3768550652U, // <7,1,6,6>: Cost 4 vext3 <1,6,6,7>, <1,6,6,7>
- 2652279630U, // <7,1,6,7>: Cost 3 vext2 <5,7,7,1>, <6,7,0,1>
- 2694956302U, // <7,1,6,u>: Cost 3 vext3 <1,6,u,7>, <1,6,u,7>
- 2645644282U, // <7,1,7,0>: Cost 3 vext2 <4,6,7,1>, <7,0,1,2>
- 2859062094U, // <7,1,7,1>: Cost 3 vuzpr <6,7,0,1>, <6,7,0,1>
- 3779462437U, // <7,1,7,2>: Cost 4 vext3 <3,5,1,7>, <1,7,2,3>
- 3121938534U, // <7,1,7,3>: Cost 3 vtrnr <5,7,5,7>, LHS
- 2554916150U, // <7,1,7,4>: Cost 3 vext1 <0,7,1,7>, RHS
- 3769140548U, // <7,1,7,5>: Cost 4 vext3 <1,7,5,7>, <1,7,5,7>
- 3726022164U, // <7,1,7,6>: Cost 4 vext2 <5,7,7,1>, <7,6,7,0>
- 2554918508U, // <7,1,7,7>: Cost 3 vext1 <0,7,1,7>, <7,7,7,7>
- 3121938539U, // <7,1,7,u>: Cost 3 vtrnr <5,7,5,7>, LHS
- 2572836966U, // <7,1,u,0>: Cost 3 vext1 <3,7,1,u>, LHS
- 1638319469U, // <7,1,u,1>: Cost 2 vext3 RHS, <1,u,1,3>
- 2712061299U, // <7,1,u,2>: Cost 3 vext3 RHS, <1,u,2,0>
- 1622173059U, // <7,1,u,3>: Cost 2 vext3 <1,u,3,7>, <1,u,3,7>
- 2572840246U, // <7,1,u,4>: Cost 3 vext1 <3,7,1,u>, RHS
- 1622320533U, // <7,1,u,5>: Cost 2 vext3 <1,u,5,7>, <1,u,5,7>
- 2696136094U, // <7,1,u,6>: Cost 3 vext3 <1,u,6,7>, <1,u,6,7>
- 2859060777U, // <7,1,u,7>: Cost 3 vuzpr <6,7,0,1>, RHS
- 1622541744U, // <7,1,u,u>: Cost 2 vext3 <1,u,u,7>, <1,u,u,7>
- 2712061364U, // <7,2,0,0>: Cost 3 vext3 RHS, <2,0,0,2>
- 2712061373U, // <7,2,0,1>: Cost 3 vext3 RHS, <2,0,1,2>
- 2712061380U, // <7,2,0,2>: Cost 3 vext3 RHS, <2,0,2,0>
- 2712061389U, // <7,2,0,3>: Cost 3 vext3 RHS, <2,0,3,0>
- 2712061404U, // <7,2,0,4>: Cost 3 vext3 RHS, <2,0,4,6>
- 2696725990U, // <7,2,0,5>: Cost 3 vext3 <2,0,5,7>, <2,0,5,7>
- 2712061417U, // <7,2,0,6>: Cost 3 vext3 RHS, <2,0,6,1>
- 3785803251U, // <7,2,0,7>: Cost 4 vext3 RHS, <2,0,7,2>
- 2696947201U, // <7,2,0,u>: Cost 3 vext3 <2,0,u,7>, <2,0,u,7>
- 2712061446U, // <7,2,1,0>: Cost 3 vext3 RHS, <2,1,0,3>
- 3785803276U, // <7,2,1,1>: Cost 4 vext3 RHS, <2,1,1,0>
- 3785803285U, // <7,2,1,2>: Cost 4 vext3 RHS, <2,1,2,0>
- 2712061471U, // <7,2,1,3>: Cost 3 vext3 RHS, <2,1,3,1>
- 2712061482U, // <7,2,1,4>: Cost 3 vext3 RHS, <2,1,4,3>
- 3766486576U, // <7,2,1,5>: Cost 4 vext3 <1,3,5,7>, <2,1,5,0>
- 2712061500U, // <7,2,1,6>: Cost 3 vext3 RHS, <2,1,6,3>
- 2602718850U, // <7,2,1,7>: Cost 3 vext1 <u,7,2,1>, <7,u,1,2>
- 2712061516U, // <7,2,1,u>: Cost 3 vext3 RHS, <2,1,u,1>
- 2712061525U, // <7,2,2,0>: Cost 3 vext3 RHS, <2,2,0,1>
- 2712061536U, // <7,2,2,1>: Cost 3 vext3 RHS, <2,2,1,3>
- 1638319720U, // <7,2,2,2>: Cost 2 vext3 RHS, <2,2,2,2>
- 1638319730U, // <7,2,2,3>: Cost 2 vext3 RHS, <2,2,3,3>
- 2712061565U, // <7,2,2,4>: Cost 3 vext3 RHS, <2,2,4,5>
- 2698053256U, // <7,2,2,5>: Cost 3 vext3 <2,2,5,7>, <2,2,5,7>
- 2712061584U, // <7,2,2,6>: Cost 3 vext3 RHS, <2,2,6,6>
- 3771795096U, // <7,2,2,7>: Cost 4 vext3 <2,2,5,7>, <2,2,7,5>
- 1638319775U, // <7,2,2,u>: Cost 2 vext3 RHS, <2,2,u,3>
- 1638319782U, // <7,2,3,0>: Cost 2 vext3 RHS, <2,3,0,1>
- 2693924531U, // <7,2,3,1>: Cost 3 vext3 <1,5,3,7>, <2,3,1,5>
- 2700560061U, // <7,2,3,2>: Cost 3 vext3 <2,6,3,7>, <2,3,2,6>
- 2693924551U, // <7,2,3,3>: Cost 3 vext3 <1,5,3,7>, <2,3,3,7>
- 1638319822U, // <7,2,3,4>: Cost 2 vext3 RHS, <2,3,4,5>
- 2698716889U, // <7,2,3,5>: Cost 3 vext3 <2,3,5,7>, <2,3,5,7>
- 2712061665U, // <7,2,3,6>: Cost 3 vext3 RHS, <2,3,6,6>
- 2735949540U, // <7,2,3,7>: Cost 3 vext3 RHS, <2,3,7,0>
- 1638319854U, // <7,2,3,u>: Cost 2 vext3 RHS, <2,3,u,1>
- 2712061692U, // <7,2,4,0>: Cost 3 vext3 RHS, <2,4,0,6>
- 2712061698U, // <7,2,4,1>: Cost 3 vext3 RHS, <2,4,1,3>
- 2712061708U, // <7,2,4,2>: Cost 3 vext3 RHS, <2,4,2,4>
- 2712061718U, // <7,2,4,3>: Cost 3 vext3 RHS, <2,4,3,5>
- 2712061728U, // <7,2,4,4>: Cost 3 vext3 RHS, <2,4,4,6>
- 2699380522U, // <7,2,4,5>: Cost 3 vext3 <2,4,5,7>, <2,4,5,7>
- 2712061740U, // <7,2,4,6>: Cost 3 vext3 RHS, <2,4,6,0>
- 3809691445U, // <7,2,4,7>: Cost 4 vext3 RHS, <2,4,7,0>
- 2699601733U, // <7,2,4,u>: Cost 3 vext3 <2,4,u,7>, <2,4,u,7>
- 2699675470U, // <7,2,5,0>: Cost 3 vext3 <2,5,0,7>, <2,5,0,7>
- 3766486867U, // <7,2,5,1>: Cost 4 vext3 <1,3,5,7>, <2,5,1,3>
- 2699822944U, // <7,2,5,2>: Cost 3 vext3 <2,5,2,7>, <2,5,2,7>
- 2692745065U, // <7,2,5,3>: Cost 3 vext3 <1,3,5,7>, <2,5,3,7>
- 2699970418U, // <7,2,5,4>: Cost 3 vext3 <2,5,4,7>, <2,5,4,7>
- 3766486907U, // <7,2,5,5>: Cost 4 vext3 <1,3,5,7>, <2,5,5,7>
- 2700117892U, // <7,2,5,6>: Cost 3 vext3 <2,5,6,7>, <2,5,6,7>
- 3771795334U, // <7,2,5,7>: Cost 4 vext3 <2,2,5,7>, <2,5,7,0>
- 2692745110U, // <7,2,5,u>: Cost 3 vext3 <1,3,5,7>, <2,5,u,7>
- 2572894310U, // <7,2,6,0>: Cost 3 vext1 <3,7,2,6>, LHS
- 2712061860U, // <7,2,6,1>: Cost 3 vext3 RHS, <2,6,1,3>
- 2700486577U, // <7,2,6,2>: Cost 3 vext3 <2,6,2,7>, <2,6,2,7>
- 1626818490U, // <7,2,6,3>: Cost 2 vext3 <2,6,3,7>, <2,6,3,7>
- 2572897590U, // <7,2,6,4>: Cost 3 vext1 <3,7,2,6>, RHS
- 2700707788U, // <7,2,6,5>: Cost 3 vext3 <2,6,5,7>, <2,6,5,7>
- 2700781525U, // <7,2,6,6>: Cost 3 vext3 <2,6,6,7>, <2,6,6,7>
- 3774597086U, // <7,2,6,7>: Cost 4 vext3 <2,6,7,7>, <2,6,7,7>
- 1627187175U, // <7,2,6,u>: Cost 2 vext3 <2,6,u,7>, <2,6,u,7>
- 2735949802U, // <7,2,7,0>: Cost 3 vext3 RHS, <2,7,0,1>
- 3780200434U, // <7,2,7,1>: Cost 4 vext3 <3,6,2,7>, <2,7,1,0>
- 3773564928U, // <7,2,7,2>: Cost 4 vext3 <2,5,2,7>, <2,7,2,5>
- 2986541158U, // <7,2,7,3>: Cost 3 vzipr <5,5,7,7>, LHS
- 2554989878U, // <7,2,7,4>: Cost 3 vext1 <0,7,2,7>, RHS
- 3775113245U, // <7,2,7,5>: Cost 4 vext3 <2,7,5,7>, <2,7,5,7>
- 4060283228U, // <7,2,7,6>: Cost 4 vzipr <5,5,7,7>, <0,4,2,6>
- 2554992236U, // <7,2,7,7>: Cost 3 vext1 <0,7,2,7>, <7,7,7,7>
- 2986541163U, // <7,2,7,u>: Cost 3 vzipr <5,5,7,7>, LHS
- 1638320187U, // <7,2,u,0>: Cost 2 vext3 RHS, <2,u,0,1>
- 2693924936U, // <7,2,u,1>: Cost 3 vext3 <1,5,3,7>, <2,u,1,5>
- 1638319720U, // <7,2,u,2>: Cost 2 vext3 RHS, <2,2,2,2>
- 1628145756U, // <7,2,u,3>: Cost 2 vext3 <2,u,3,7>, <2,u,3,7>
- 1638320227U, // <7,2,u,4>: Cost 2 vext3 RHS, <2,u,4,5>
- 2702035054U, // <7,2,u,5>: Cost 3 vext3 <2,u,5,7>, <2,u,5,7>
- 2702108791U, // <7,2,u,6>: Cost 3 vext3 <2,u,6,7>, <2,u,6,7>
- 2735949945U, // <7,2,u,7>: Cost 3 vext3 RHS, <2,u,7,0>
- 1628514441U, // <7,2,u,u>: Cost 2 vext3 <2,u,u,7>, <2,u,u,7>
- 2712062091U, // <7,3,0,0>: Cost 3 vext3 RHS, <3,0,0,0>
- 1638320278U, // <7,3,0,1>: Cost 2 vext3 RHS, <3,0,1,2>
- 2712062109U, // <7,3,0,2>: Cost 3 vext3 RHS, <3,0,2,0>
- 2590836886U, // <7,3,0,3>: Cost 3 vext1 <6,7,3,0>, <3,0,1,2>
- 2712062128U, // <7,3,0,4>: Cost 3 vext3 RHS, <3,0,4,1>
- 2712062138U, // <7,3,0,5>: Cost 3 vext3 RHS, <3,0,5,2>
- 2590839656U, // <7,3,0,6>: Cost 3 vext1 <6,7,3,0>, <6,7,3,0>
- 3311414017U, // <7,3,0,7>: Cost 4 vrev <3,7,7,0>
- 1638320341U, // <7,3,0,u>: Cost 2 vext3 RHS, <3,0,u,2>
- 2237164227U, // <7,3,1,0>: Cost 3 vrev <3,7,0,1>
- 2712062182U, // <7,3,1,1>: Cost 3 vext3 RHS, <3,1,1,1>
- 2712062193U, // <7,3,1,2>: Cost 3 vext3 RHS, <3,1,2,3>
- 2692745468U, // <7,3,1,3>: Cost 3 vext3 <1,3,5,7>, <3,1,3,5>
- 2712062214U, // <7,3,1,4>: Cost 3 vext3 RHS, <3,1,4,6>
- 2693925132U, // <7,3,1,5>: Cost 3 vext3 <1,5,3,7>, <3,1,5,3>
- 3768183059U, // <7,3,1,6>: Cost 4 vext3 <1,6,1,7>, <3,1,6,1>
- 2692745504U, // <7,3,1,7>: Cost 3 vext3 <1,3,5,7>, <3,1,7,5>
- 2696063273U, // <7,3,1,u>: Cost 3 vext3 <1,u,5,7>, <3,1,u,5>
- 2712062254U, // <7,3,2,0>: Cost 3 vext3 RHS, <3,2,0,1>
- 2712062262U, // <7,3,2,1>: Cost 3 vext3 RHS, <3,2,1,0>
- 2712062273U, // <7,3,2,2>: Cost 3 vext3 RHS, <3,2,2,2>
- 2712062280U, // <7,3,2,3>: Cost 3 vext3 RHS, <3,2,3,0>
- 2712062294U, // <7,3,2,4>: Cost 3 vext3 RHS, <3,2,4,5>
- 2712062302U, // <7,3,2,5>: Cost 3 vext3 RHS, <3,2,5,4>
- 2700560742U, // <7,3,2,6>: Cost 3 vext3 <2,6,3,7>, <3,2,6,3>
- 2712062319U, // <7,3,2,7>: Cost 3 vext3 RHS, <3,2,7,3>
- 2712062325U, // <7,3,2,u>: Cost 3 vext3 RHS, <3,2,u,0>
- 2712062335U, // <7,3,3,0>: Cost 3 vext3 RHS, <3,3,0,1>
- 2636368158U, // <7,3,3,1>: Cost 3 vext2 <3,1,7,3>, <3,1,7,3>
- 2637031791U, // <7,3,3,2>: Cost 3 vext2 <3,2,7,3>, <3,2,7,3>
- 1638320540U, // <7,3,3,3>: Cost 2 vext3 RHS, <3,3,3,3>
- 2712062374U, // <7,3,3,4>: Cost 3 vext3 RHS, <3,3,4,4>
- 2704689586U, // <7,3,3,5>: Cost 3 vext3 <3,3,5,7>, <3,3,5,7>
- 2590864235U, // <7,3,3,6>: Cost 3 vext1 <6,7,3,3>, <6,7,3,3>
- 2704837060U, // <7,3,3,7>: Cost 3 vext3 <3,3,7,7>, <3,3,7,7>
- 1638320540U, // <7,3,3,u>: Cost 2 vext3 RHS, <3,3,3,3>
- 2712062416U, // <7,3,4,0>: Cost 3 vext3 RHS, <3,4,0,1>
- 2712062426U, // <7,3,4,1>: Cost 3 vext3 RHS, <3,4,1,2>
- 2566981640U, // <7,3,4,2>: Cost 3 vext1 <2,7,3,4>, <2,7,3,4>
- 2712062447U, // <7,3,4,3>: Cost 3 vext3 RHS, <3,4,3,5>
- 2712062456U, // <7,3,4,4>: Cost 3 vext3 RHS, <3,4,4,5>
- 1638320642U, // <7,3,4,5>: Cost 2 vext3 RHS, <3,4,5,6>
- 2648313204U, // <7,3,4,6>: Cost 3 vext2 <5,1,7,3>, <4,6,4,6>
- 3311446789U, // <7,3,4,7>: Cost 4 vrev <3,7,7,4>
- 1638320669U, // <7,3,4,u>: Cost 2 vext3 RHS, <3,4,u,6>
- 2602819686U, // <7,3,5,0>: Cost 3 vext1 <u,7,3,5>, LHS
- 1574571728U, // <7,3,5,1>: Cost 2 vext2 <5,1,7,3>, <5,1,7,3>
- 2648977185U, // <7,3,5,2>: Cost 3 vext2 <5,2,7,3>, <5,2,7,3>
- 2705869378U, // <7,3,5,3>: Cost 3 vext3 <3,5,3,7>, <3,5,3,7>
- 2237491947U, // <7,3,5,4>: Cost 3 vrev <3,7,4,5>
- 2706016852U, // <7,3,5,5>: Cost 3 vext3 <3,5,5,7>, <3,5,5,7>
- 2648313954U, // <7,3,5,6>: Cost 3 vext2 <5,1,7,3>, <5,6,7,0>
- 2692745823U, // <7,3,5,7>: Cost 3 vext3 <1,3,5,7>, <3,5,7,0>
- 1579217159U, // <7,3,5,u>: Cost 2 vext2 <5,u,7,3>, <5,u,7,3>
- 2706311800U, // <7,3,6,0>: Cost 3 vext3 <3,6,0,7>, <3,6,0,7>
- 2654286249U, // <7,3,6,1>: Cost 3 vext2 <6,1,7,3>, <6,1,7,3>
- 1581208058U, // <7,3,6,2>: Cost 2 vext2 <6,2,7,3>, <6,2,7,3>
- 2706533011U, // <7,3,6,3>: Cost 3 vext3 <3,6,3,7>, <3,6,3,7>
- 2706606748U, // <7,3,6,4>: Cost 3 vext3 <3,6,4,7>, <3,6,4,7>
- 3780422309U, // <7,3,6,5>: Cost 4 vext3 <3,6,5,7>, <3,6,5,7>
- 2712062637U, // <7,3,6,6>: Cost 3 vext3 RHS, <3,6,6,6>
- 2706827959U, // <7,3,6,7>: Cost 3 vext3 <3,6,7,7>, <3,6,7,7>
- 1585189856U, // <7,3,6,u>: Cost 2 vext2 <6,u,7,3>, <6,u,7,3>
- 2693925571U, // <7,3,7,0>: Cost 3 vext3 <1,5,3,7>, <3,7,0,1>
- 2693925584U, // <7,3,7,1>: Cost 3 vext3 <1,5,3,7>, <3,7,1,5>
- 2700561114U, // <7,3,7,2>: Cost 3 vext3 <2,6,3,7>, <3,7,2,6>
- 2572978916U, // <7,3,7,3>: Cost 3 vext1 <3,7,3,7>, <3,7,3,7>
- 2693925611U, // <7,3,7,4>: Cost 3 vext3 <1,5,3,7>, <3,7,4,5>
- 2707344118U, // <7,3,7,5>: Cost 3 vext3 <3,7,5,7>, <3,7,5,7>
- 2654950894U, // <7,3,7,6>: Cost 3 vext2 <6,2,7,3>, <7,6,2,7>
- 2648315500U, // <7,3,7,7>: Cost 3 vext2 <5,1,7,3>, <7,7,7,7>
- 2693925643U, // <7,3,7,u>: Cost 3 vext3 <1,5,3,7>, <3,7,u,1>
- 2237221578U, // <7,3,u,0>: Cost 3 vrev <3,7,0,u>
- 1638320926U, // <7,3,u,1>: Cost 2 vext3 RHS, <3,u,1,2>
- 1593153452U, // <7,3,u,2>: Cost 2 vext2 <u,2,7,3>, <u,2,7,3>
- 1638320540U, // <7,3,u,3>: Cost 2 vext3 RHS, <3,3,3,3>
- 2237516526U, // <7,3,u,4>: Cost 3 vrev <3,7,4,u>
- 1638320966U, // <7,3,u,5>: Cost 2 vext3 RHS, <3,u,5,6>
- 2712062796U, // <7,3,u,6>: Cost 3 vext3 RHS, <3,u,6,3>
- 2692967250U, // <7,3,u,7>: Cost 3 vext3 <1,3,u,7>, <3,u,7,0>
- 1638320989U, // <7,3,u,u>: Cost 2 vext3 RHS, <3,u,u,2>
- 2651635712U, // <7,4,0,0>: Cost 3 vext2 <5,6,7,4>, <0,0,0,0>
- 1577893990U, // <7,4,0,1>: Cost 2 vext2 <5,6,7,4>, LHS
- 2651635876U, // <7,4,0,2>: Cost 3 vext2 <5,6,7,4>, <0,2,0,2>
- 3785804672U, // <7,4,0,3>: Cost 4 vext3 RHS, <4,0,3,1>
- 2651636050U, // <7,4,0,4>: Cost 3 vext2 <5,6,7,4>, <0,4,1,5>
- 1638468498U, // <7,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1>
- 1638468508U, // <7,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2>
- 3787795364U, // <7,4,0,7>: Cost 4 vext3 RHS, <4,0,7,1>
- 1640459181U, // <7,4,0,u>: Cost 2 vext3 RHS, <4,0,u,1>
- 2651636470U, // <7,4,1,0>: Cost 3 vext2 <5,6,7,4>, <1,0,3,2>
- 2651636532U, // <7,4,1,1>: Cost 3 vext2 <5,6,7,4>, <1,1,1,1>
- 2712062922U, // <7,4,1,2>: Cost 3 vext3 RHS, <4,1,2,3>
- 2639029248U, // <7,4,1,3>: Cost 3 vext2 <3,5,7,4>, <1,3,5,7>
- 2712062940U, // <7,4,1,4>: Cost 3 vext3 RHS, <4,1,4,3>
- 2712062946U, // <7,4,1,5>: Cost 3 vext3 RHS, <4,1,5,0>
- 2712062958U, // <7,4,1,6>: Cost 3 vext3 RHS, <4,1,6,3>
- 3785804791U, // <7,4,1,7>: Cost 4 vext3 RHS, <4,1,7,3>
- 2712062973U, // <7,4,1,u>: Cost 3 vext3 RHS, <4,1,u,0>
- 3785804807U, // <7,4,2,0>: Cost 4 vext3 RHS, <4,2,0,1>
- 3785804818U, // <7,4,2,1>: Cost 4 vext3 RHS, <4,2,1,3>
- 2651637352U, // <7,4,2,2>: Cost 3 vext2 <5,6,7,4>, <2,2,2,2>
- 2651637414U, // <7,4,2,3>: Cost 3 vext2 <5,6,7,4>, <2,3,0,1>
- 3716753194U, // <7,4,2,4>: Cost 4 vext2 <4,2,7,4>, <2,4,5,7>
- 2712063030U, // <7,4,2,5>: Cost 3 vext3 RHS, <4,2,5,3>
- 2712063036U, // <7,4,2,6>: Cost 3 vext3 RHS, <4,2,6,0>
- 3773123658U, // <7,4,2,7>: Cost 4 vext3 <2,4,5,7>, <4,2,7,5>
- 2712063054U, // <7,4,2,u>: Cost 3 vext3 RHS, <4,2,u,0>
- 2651637910U, // <7,4,3,0>: Cost 3 vext2 <5,6,7,4>, <3,0,1,2>
- 3712772348U, // <7,4,3,1>: Cost 4 vext2 <3,5,7,4>, <3,1,3,5>
- 3785804906U, // <7,4,3,2>: Cost 4 vext3 RHS, <4,3,2,1>
- 2651638172U, // <7,4,3,3>: Cost 3 vext2 <5,6,7,4>, <3,3,3,3>
- 2651638274U, // <7,4,3,4>: Cost 3 vext2 <5,6,7,4>, <3,4,5,6>
- 2639030883U, // <7,4,3,5>: Cost 3 vext2 <3,5,7,4>, <3,5,7,4>
- 2712063122U, // <7,4,3,6>: Cost 3 vext3 RHS, <4,3,6,5>
- 3712772836U, // <7,4,3,7>: Cost 4 vext2 <3,5,7,4>, <3,7,3,7>
- 2641021782U, // <7,4,3,u>: Cost 3 vext2 <3,u,7,4>, <3,u,7,4>
- 2714053802U, // <7,4,4,0>: Cost 3 vext3 RHS, <4,4,0,2>
- 3785804978U, // <7,4,4,1>: Cost 4 vext3 RHS, <4,4,1,1>
- 3716754505U, // <7,4,4,2>: Cost 4 vext2 <4,2,7,4>, <4,2,7,4>
- 3785804998U, // <7,4,4,3>: Cost 4 vext3 RHS, <4,4,3,3>
- 1638321360U, // <7,4,4,4>: Cost 2 vext3 RHS, <4,4,4,4>
- 1638468826U, // <7,4,4,5>: Cost 2 vext3 RHS, <4,4,5,5>
- 1638468836U, // <7,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6>
- 3785215214U, // <7,4,4,7>: Cost 4 vext3 <4,4,7,7>, <4,4,7,7>
- 1640459509U, // <7,4,4,u>: Cost 2 vext3 RHS, <4,4,u,5>
- 1517207654U, // <7,4,5,0>: Cost 2 vext1 <6,7,4,5>, LHS
- 2573034640U, // <7,4,5,1>: Cost 3 vext1 <3,7,4,5>, <1,5,3,7>
- 2712063246U, // <7,4,5,2>: Cost 3 vext3 RHS, <4,5,2,3>
- 2573036267U, // <7,4,5,3>: Cost 3 vext1 <3,7,4,5>, <3,7,4,5>
- 1517210934U, // <7,4,5,4>: Cost 2 vext1 <6,7,4,5>, RHS
- 2711989549U, // <7,4,5,5>: Cost 3 vext3 <4,5,5,7>, <4,5,5,7>
- 564579638U, // <7,4,5,6>: Cost 1 vext3 RHS, RHS
- 2651639976U, // <7,4,5,7>: Cost 3 vext2 <5,6,7,4>, <5,7,5,7>
- 564579656U, // <7,4,5,u>: Cost 1 vext3 RHS, RHS
- 2712063307U, // <7,4,6,0>: Cost 3 vext3 RHS, <4,6,0,1>
- 3767668056U, // <7,4,6,1>: Cost 4 vext3 <1,5,3,7>, <4,6,1,5>
- 2651640314U, // <7,4,6,2>: Cost 3 vext2 <5,6,7,4>, <6,2,7,3>
- 2655621708U, // <7,4,6,3>: Cost 3 vext2 <6,3,7,4>, <6,3,7,4>
- 1638468980U, // <7,4,6,4>: Cost 2 vext3 RHS, <4,6,4,6>
- 2712063358U, // <7,4,6,5>: Cost 3 vext3 RHS, <4,6,5,7>
- 2712063367U, // <7,4,6,6>: Cost 3 vext3 RHS, <4,6,6,7>
- 2712210826U, // <7,4,6,7>: Cost 3 vext3 RHS, <4,6,7,1>
- 1638469012U, // <7,4,6,u>: Cost 2 vext3 RHS, <4,6,u,2>
- 2651640826U, // <7,4,7,0>: Cost 3 vext2 <5,6,7,4>, <7,0,1,2>
- 3773713830U, // <7,4,7,1>: Cost 4 vext3 <2,5,4,7>, <4,7,1,2>
- 3773713842U, // <7,4,7,2>: Cost 4 vext3 <2,5,4,7>, <4,7,2,5>
- 3780349372U, // <7,4,7,3>: Cost 4 vext3 <3,6,4,7>, <4,7,3,6>
- 2651641140U, // <7,4,7,4>: Cost 3 vext2 <5,6,7,4>, <7,4,0,1>
- 2712210888U, // <7,4,7,5>: Cost 3 vext3 RHS, <4,7,5,0>
- 2712210898U, // <7,4,7,6>: Cost 3 vext3 RHS, <4,7,6,1>
- 2651641452U, // <7,4,7,7>: Cost 3 vext2 <5,6,7,4>, <7,7,7,7>
- 2713538026U, // <7,4,7,u>: Cost 3 vext3 <4,7,u,7>, <4,7,u,7>
- 1517232230U, // <7,4,u,0>: Cost 2 vext1 <6,7,4,u>, LHS
- 1577899822U, // <7,4,u,1>: Cost 2 vext2 <5,6,7,4>, LHS
- 2712063489U, // <7,4,u,2>: Cost 3 vext3 RHS, <4,u,2,3>
- 2573060846U, // <7,4,u,3>: Cost 3 vext1 <3,7,4,u>, <3,7,4,u>
- 1640312342U, // <7,4,u,4>: Cost 2 vext3 RHS, <4,u,4,6>
- 1638469146U, // <7,4,u,5>: Cost 2 vext3 RHS, <4,u,5,1>
- 564579881U, // <7,4,u,6>: Cost 1 vext3 RHS, RHS
- 2714054192U, // <7,4,u,7>: Cost 3 vext3 RHS, <4,u,7,5>
- 564579899U, // <7,4,u,u>: Cost 1 vext3 RHS, RHS
- 2579038310U, // <7,5,0,0>: Cost 3 vext1 <4,7,5,0>, LHS
- 2636382310U, // <7,5,0,1>: Cost 3 vext2 <3,1,7,5>, LHS
- 2796339302U, // <7,5,0,2>: Cost 3 vuzpl <7,4,5,6>, LHS
- 3646810719U, // <7,5,0,3>: Cost 4 vext1 <3,7,5,0>, <3,5,7,0>
- 2712063586U, // <7,5,0,4>: Cost 3 vext3 RHS, <5,0,4,1>
- 2735951467U, // <7,5,0,5>: Cost 3 vext3 RHS, <5,0,5,1>
- 2735951476U, // <7,5,0,6>: Cost 3 vext3 RHS, <5,0,6,1>
- 2579043322U, // <7,5,0,7>: Cost 3 vext1 <4,7,5,0>, <7,0,1,2>
- 2636382877U, // <7,5,0,u>: Cost 3 vext2 <3,1,7,5>, LHS
- 2712211087U, // <7,5,1,0>: Cost 3 vext3 RHS, <5,1,0,1>
- 3698180916U, // <7,5,1,1>: Cost 4 vext2 <1,1,7,5>, <1,1,1,1>
- 3710124950U, // <7,5,1,2>: Cost 4 vext2 <3,1,7,5>, <1,2,3,0>
- 2636383232U, // <7,5,1,3>: Cost 3 vext2 <3,1,7,5>, <1,3,5,7>
- 2712211127U, // <7,5,1,4>: Cost 3 vext3 RHS, <5,1,4,5>
- 2590994128U, // <7,5,1,5>: Cost 3 vext1 <6,7,5,1>, <5,1,7,3>
- 2590995323U, // <7,5,1,6>: Cost 3 vext1 <6,7,5,1>, <6,7,5,1>
- 1638469328U, // <7,5,1,7>: Cost 2 vext3 RHS, <5,1,7,3>
- 1638469337U, // <7,5,1,u>: Cost 2 vext3 RHS, <5,1,u,3>
- 3785805536U, // <7,5,2,0>: Cost 4 vext3 RHS, <5,2,0,1>
- 3785805544U, // <7,5,2,1>: Cost 4 vext3 RHS, <5,2,1,0>
- 3704817288U, // <7,5,2,2>: Cost 4 vext2 <2,2,7,5>, <2,2,5,7>
- 2712063742U, // <7,5,2,3>: Cost 3 vext3 RHS, <5,2,3,4>
- 3716761386U, // <7,5,2,4>: Cost 4 vext2 <4,2,7,5>, <2,4,5,7>
- 2714054415U, // <7,5,2,5>: Cost 3 vext3 RHS, <5,2,5,3>
- 3774304024U, // <7,5,2,6>: Cost 4 vext3 <2,6,3,7>, <5,2,6,3>
- 2712063777U, // <7,5,2,7>: Cost 3 vext3 RHS, <5,2,7,3>
- 2712063787U, // <7,5,2,u>: Cost 3 vext3 RHS, <5,2,u,4>
- 3634888806U, // <7,5,3,0>: Cost 4 vext1 <1,7,5,3>, LHS
- 2636384544U, // <7,5,3,1>: Cost 3 vext2 <3,1,7,5>, <3,1,7,5>
- 3710790001U, // <7,5,3,2>: Cost 4 vext2 <3,2,7,5>, <3,2,7,5>
- 3710126492U, // <7,5,3,3>: Cost 4 vext2 <3,1,7,5>, <3,3,3,3>
- 3634892086U, // <7,5,3,4>: Cost 4 vext1 <1,7,5,3>, RHS
- 2639039076U, // <7,5,3,5>: Cost 3 vext2 <3,5,7,5>, <3,5,7,5>
- 3713444533U, // <7,5,3,6>: Cost 4 vext2 <3,6,7,5>, <3,6,7,5>
- 2693926767U, // <7,5,3,7>: Cost 3 vext3 <1,5,3,7>, <5,3,7,0>
- 2712063864U, // <7,5,3,u>: Cost 3 vext3 RHS, <5,3,u,0>
- 2579071078U, // <7,5,4,0>: Cost 3 vext1 <4,7,5,4>, LHS
- 3646841856U, // <7,5,4,1>: Cost 4 vext1 <3,7,5,4>, <1,3,5,7>
- 3716762698U, // <7,5,4,2>: Cost 4 vext2 <4,2,7,5>, <4,2,7,5>
- 3646843491U, // <7,5,4,3>: Cost 4 vext1 <3,7,5,4>, <3,5,7,4>
- 2579074358U, // <7,5,4,4>: Cost 3 vext1 <4,7,5,4>, RHS
- 2636385590U, // <7,5,4,5>: Cost 3 vext2 <3,1,7,5>, RHS
- 2645675406U, // <7,5,4,6>: Cost 3 vext2 <4,6,7,5>, <4,6,7,5>
- 1638322118U, // <7,5,4,7>: Cost 2 vext3 RHS, <5,4,7,6>
- 1638469583U, // <7,5,4,u>: Cost 2 vext3 RHS, <5,4,u,6>
- 2714054611U, // <7,5,5,0>: Cost 3 vext3 RHS, <5,5,0,1>
- 2652974800U, // <7,5,5,1>: Cost 3 vext2 <5,u,7,5>, <5,1,7,3>
- 3710127905U, // <7,5,5,2>: Cost 4 vext2 <3,1,7,5>, <5,2,7,3>
- 3785805808U, // <7,5,5,3>: Cost 4 vext3 RHS, <5,5,3,3>
- 2712211450U, // <7,5,5,4>: Cost 3 vext3 RHS, <5,5,4,4>
- 1638322180U, // <7,5,5,5>: Cost 2 vext3 RHS, <5,5,5,5>
- 2712064014U, // <7,5,5,6>: Cost 3 vext3 RHS, <5,5,6,6>
- 1638469656U, // <7,5,5,7>: Cost 2 vext3 RHS, <5,5,7,7>
- 1638469665U, // <7,5,5,u>: Cost 2 vext3 RHS, <5,5,u,7>
- 2712064036U, // <7,5,6,0>: Cost 3 vext3 RHS, <5,6,0,1>
- 2714054707U, // <7,5,6,1>: Cost 3 vext3 RHS, <5,6,1,7>
- 3785805879U, // <7,5,6,2>: Cost 4 vext3 RHS, <5,6,2,2>
- 2712064066U, // <7,5,6,3>: Cost 3 vext3 RHS, <5,6,3,4>
- 2712064076U, // <7,5,6,4>: Cost 3 vext3 RHS, <5,6,4,5>
- 2714054743U, // <7,5,6,5>: Cost 3 vext3 RHS, <5,6,5,7>
- 2712064096U, // <7,5,6,6>: Cost 3 vext3 RHS, <5,6,6,7>
- 1638322274U, // <7,5,6,7>: Cost 2 vext3 RHS, <5,6,7,0>
- 1638469739U, // <7,5,6,u>: Cost 2 vext3 RHS, <5,6,u,0>
- 1511325798U, // <7,5,7,0>: Cost 2 vext1 <5,7,5,7>, LHS
- 2692747392U, // <7,5,7,1>: Cost 3 vext3 <1,3,5,7>, <5,7,1,3>
- 2585069160U, // <7,5,7,2>: Cost 3 vext1 <5,7,5,7>, <2,2,2,2>
- 2573126390U, // <7,5,7,3>: Cost 3 vext1 <3,7,5,7>, <3,7,5,7>
- 1511329078U, // <7,5,7,4>: Cost 2 vext1 <5,7,5,7>, RHS
- 1638469800U, // <7,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7>
- 2712211626U, // <7,5,7,6>: Cost 3 vext3 RHS, <5,7,6,0>
- 2712211636U, // <7,5,7,7>: Cost 3 vext3 RHS, <5,7,7,1>
- 1638469823U, // <7,5,7,u>: Cost 2 vext3 RHS, <5,7,u,3>
- 1511333990U, // <7,5,u,0>: Cost 2 vext1 <5,7,5,u>, LHS
- 2636388142U, // <7,5,u,1>: Cost 3 vext2 <3,1,7,5>, LHS
- 2712211671U, // <7,5,u,2>: Cost 3 vext3 RHS, <5,u,2,0>
- 2573134583U, // <7,5,u,3>: Cost 3 vext1 <3,7,5,u>, <3,7,5,u>
- 1511337270U, // <7,5,u,4>: Cost 2 vext1 <5,7,5,u>, RHS
- 1638469881U, // <7,5,u,5>: Cost 2 vext3 RHS, <5,u,5,7>
- 2712064258U, // <7,5,u,6>: Cost 3 vext3 RHS, <5,u,6,7>
- 1638469892U, // <7,5,u,7>: Cost 2 vext3 RHS, <5,u,7,0>
- 1638469904U, // <7,5,u,u>: Cost 2 vext3 RHS, <5,u,u,3>
- 2650324992U, // <7,6,0,0>: Cost 3 vext2 <5,4,7,6>, <0,0,0,0>
- 1576583270U, // <7,6,0,1>: Cost 2 vext2 <5,4,7,6>, LHS
- 2712064300U, // <7,6,0,2>: Cost 3 vext3 RHS, <6,0,2,4>
- 2255295336U, // <7,6,0,3>: Cost 3 vrev <6,7,3,0>
- 2712064316U, // <7,6,0,4>: Cost 3 vext3 RHS, <6,0,4,2>
- 2585088098U, // <7,6,0,5>: Cost 3 vext1 <5,7,6,0>, <5,6,7,0>
- 2735952204U, // <7,6,0,6>: Cost 3 vext3 RHS, <6,0,6,0>
- 2712211799U, // <7,6,0,7>: Cost 3 vext3 RHS, <6,0,7,2>
- 1576583837U, // <7,6,0,u>: Cost 2 vext2 <5,4,7,6>, LHS
- 1181340494U, // <7,6,1,0>: Cost 2 vrev <6,7,0,1>
- 2650325812U, // <7,6,1,1>: Cost 3 vext2 <5,4,7,6>, <1,1,1,1>
- 2650325910U, // <7,6,1,2>: Cost 3 vext2 <5,4,7,6>, <1,2,3,0>
- 2650325976U, // <7,6,1,3>: Cost 3 vext2 <5,4,7,6>, <1,3,1,3>
- 2579123510U, // <7,6,1,4>: Cost 3 vext1 <4,7,6,1>, RHS
- 2650326160U, // <7,6,1,5>: Cost 3 vext2 <5,4,7,6>, <1,5,3,7>
- 2714055072U, // <7,6,1,6>: Cost 3 vext3 RHS, <6,1,6,3>
- 2712064425U, // <7,6,1,7>: Cost 3 vext3 RHS, <6,1,7,3>
- 1181930390U, // <7,6,1,u>: Cost 2 vrev <6,7,u,1>
- 2712211897U, // <7,6,2,0>: Cost 3 vext3 RHS, <6,2,0,1>
- 2714055108U, // <7,6,2,1>: Cost 3 vext3 RHS, <6,2,1,3>
- 2650326632U, // <7,6,2,2>: Cost 3 vext2 <5,4,7,6>, <2,2,2,2>
- 2650326694U, // <7,6,2,3>: Cost 3 vext2 <5,4,7,6>, <2,3,0,1>
- 2714055137U, // <7,6,2,4>: Cost 3 vext3 RHS, <6,2,4,5>
- 2714055148U, // <7,6,2,5>: Cost 3 vext3 RHS, <6,2,5,7>
- 2650326970U, // <7,6,2,6>: Cost 3 vext2 <5,4,7,6>, <2,6,3,7>
- 1638470138U, // <7,6,2,7>: Cost 2 vext3 RHS, <6,2,7,3>
- 1638470147U, // <7,6,2,u>: Cost 2 vext3 RHS, <6,2,u,3>
- 2650327190U, // <7,6,3,0>: Cost 3 vext2 <5,4,7,6>, <3,0,1,2>
- 2255172441U, // <7,6,3,1>: Cost 3 vrev <6,7,1,3>
- 2255246178U, // <7,6,3,2>: Cost 3 vrev <6,7,2,3>
- 2650327452U, // <7,6,3,3>: Cost 3 vext2 <5,4,7,6>, <3,3,3,3>
- 2712064562U, // <7,6,3,4>: Cost 3 vext3 RHS, <6,3,4,5>
- 2650327627U, // <7,6,3,5>: Cost 3 vext2 <5,4,7,6>, <3,5,4,7>
- 3713452726U, // <7,6,3,6>: Cost 4 vext2 <3,6,7,6>, <3,6,7,6>
- 2700563016U, // <7,6,3,7>: Cost 3 vext3 <2,6,3,7>, <6,3,7,0>
- 2712064593U, // <7,6,3,u>: Cost 3 vext3 RHS, <6,3,u,0>
- 2650327954U, // <7,6,4,0>: Cost 3 vext2 <5,4,7,6>, <4,0,5,1>
- 2735952486U, // <7,6,4,1>: Cost 3 vext3 RHS, <6,4,1,3>
- 2735952497U, // <7,6,4,2>: Cost 3 vext3 RHS, <6,4,2,5>
- 2255328108U, // <7,6,4,3>: Cost 3 vrev <6,7,3,4>
- 2712212100U, // <7,6,4,4>: Cost 3 vext3 RHS, <6,4,4,6>
- 1576586550U, // <7,6,4,5>: Cost 2 vext2 <5,4,7,6>, RHS
- 2714055312U, // <7,6,4,6>: Cost 3 vext3 RHS, <6,4,6,0>
- 2712212126U, // <7,6,4,7>: Cost 3 vext3 RHS, <6,4,7,5>
- 1576586793U, // <7,6,4,u>: Cost 2 vext2 <5,4,7,6>, RHS
- 2579152998U, // <7,6,5,0>: Cost 3 vext1 <4,7,6,5>, LHS
- 2650328784U, // <7,6,5,1>: Cost 3 vext2 <5,4,7,6>, <5,1,7,3>
- 2714055364U, // <7,6,5,2>: Cost 3 vext3 RHS, <6,5,2,7>
- 3785806538U, // <7,6,5,3>: Cost 4 vext3 RHS, <6,5,3,4>
- 1576587206U, // <7,6,5,4>: Cost 2 vext2 <5,4,7,6>, <5,4,7,6>
- 2650329092U, // <7,6,5,5>: Cost 3 vext2 <5,4,7,6>, <5,5,5,5>
- 2650329186U, // <7,6,5,6>: Cost 3 vext2 <5,4,7,6>, <5,6,7,0>
- 2712064753U, // <7,6,5,7>: Cost 3 vext3 RHS, <6,5,7,7>
- 1181963162U, // <7,6,5,u>: Cost 2 vrev <6,7,u,5>
- 2714055421U, // <7,6,6,0>: Cost 3 vext3 RHS, <6,6,0,1>
- 2714055432U, // <7,6,6,1>: Cost 3 vext3 RHS, <6,6,1,3>
- 2650329594U, // <7,6,6,2>: Cost 3 vext2 <5,4,7,6>, <6,2,7,3>
- 3785806619U, // <7,6,6,3>: Cost 4 vext3 RHS, <6,6,3,4>
- 2712212260U, // <7,6,6,4>: Cost 3 vext3 RHS, <6,6,4,4>
- 2714055472U, // <7,6,6,5>: Cost 3 vext3 RHS, <6,6,5,7>
- 1638323000U, // <7,6,6,6>: Cost 2 vext3 RHS, <6,6,6,6>
- 1638470466U, // <7,6,6,7>: Cost 2 vext3 RHS, <6,6,7,7>
- 1638470475U, // <7,6,6,u>: Cost 2 vext3 RHS, <6,6,u,7>
- 1638323022U, // <7,6,7,0>: Cost 2 vext3 RHS, <6,7,0,1>
- 2712064854U, // <7,6,7,1>: Cost 3 vext3 RHS, <6,7,1,0>
- 2712064865U, // <7,6,7,2>: Cost 3 vext3 RHS, <6,7,2,2>
- 2712064872U, // <7,6,7,3>: Cost 3 vext3 RHS, <6,7,3,0>
- 1638323062U, // <7,6,7,4>: Cost 2 vext3 RHS, <6,7,4,5>
- 2712064894U, // <7,6,7,5>: Cost 3 vext3 RHS, <6,7,5,4>
- 2712064905U, // <7,6,7,6>: Cost 3 vext3 RHS, <6,7,6,6>
- 2712064915U, // <7,6,7,7>: Cost 3 vext3 RHS, <6,7,7,7>
- 1638323094U, // <7,6,7,u>: Cost 2 vext3 RHS, <6,7,u,1>
- 1638470559U, // <7,6,u,0>: Cost 2 vext3 RHS, <6,u,0,1>
- 1576589102U, // <7,6,u,1>: Cost 2 vext2 <5,4,7,6>, LHS
- 2712212402U, // <7,6,u,2>: Cost 3 vext3 RHS, <6,u,2,2>
- 2712212409U, // <7,6,u,3>: Cost 3 vext3 RHS, <6,u,3,0>
- 1638470599U, // <7,6,u,4>: Cost 2 vext3 RHS, <6,u,4,5>
- 1576589466U, // <7,6,u,5>: Cost 2 vext2 <5,4,7,6>, RHS
- 1638323000U, // <7,6,u,6>: Cost 2 vext3 RHS, <6,6,6,6>
- 1638470624U, // <7,6,u,7>: Cost 2 vext3 RHS, <6,u,7,3>
- 1638470631U, // <7,6,u,u>: Cost 2 vext3 RHS, <6,u,u,1>
- 2712065007U, // <7,7,0,0>: Cost 3 vext3 RHS, <7,0,0,0>
- 1638323194U, // <7,7,0,1>: Cost 2 vext3 RHS, <7,0,1,2>
- 2712065025U, // <7,7,0,2>: Cost 3 vext3 RHS, <7,0,2,0>
- 3646958337U, // <7,7,0,3>: Cost 4 vext1 <3,7,7,0>, <3,7,7,0>
- 2712065044U, // <7,7,0,4>: Cost 3 vext3 RHS, <7,0,4,1>
- 2585161907U, // <7,7,0,5>: Cost 3 vext1 <5,7,7,0>, <5,7,7,0>
- 2591134604U, // <7,7,0,6>: Cost 3 vext1 <6,7,7,0>, <6,7,7,0>
- 2591134714U, // <7,7,0,7>: Cost 3 vext1 <6,7,7,0>, <7,0,1,2>
- 1638323257U, // <7,7,0,u>: Cost 2 vext3 RHS, <7,0,u,2>
- 2712065091U, // <7,7,1,0>: Cost 3 vext3 RHS, <7,1,0,3>
- 2712065098U, // <7,7,1,1>: Cost 3 vext3 RHS, <7,1,1,1>
- 2712065109U, // <7,7,1,2>: Cost 3 vext3 RHS, <7,1,2,3>
- 2692748384U, // <7,7,1,3>: Cost 3 vext3 <1,3,5,7>, <7,1,3,5>
- 2585169206U, // <7,7,1,4>: Cost 3 vext1 <5,7,7,1>, RHS
- 2693928048U, // <7,7,1,5>: Cost 3 vext3 <1,5,3,7>, <7,1,5,3>
- 2585170766U, // <7,7,1,6>: Cost 3 vext1 <5,7,7,1>, <6,7,0,1>
- 2735953024U, // <7,7,1,7>: Cost 3 vext3 RHS, <7,1,7,1>
- 2695918731U, // <7,7,1,u>: Cost 3 vext3 <1,u,3,7>, <7,1,u,3>
- 3770471574U, // <7,7,2,0>: Cost 4 vext3 <2,0,5,7>, <7,2,0,5>
- 3785807002U, // <7,7,2,1>: Cost 4 vext3 RHS, <7,2,1,0>
- 2712065189U, // <7,7,2,2>: Cost 3 vext3 RHS, <7,2,2,2>
- 2712065196U, // <7,7,2,3>: Cost 3 vext3 RHS, <7,2,3,0>
- 3773125818U, // <7,7,2,4>: Cost 4 vext3 <2,4,5,7>, <7,2,4,5>
- 3766490305U, // <7,7,2,5>: Cost 4 vext3 <1,3,5,7>, <7,2,5,3>
- 2700563658U, // <7,7,2,6>: Cost 3 vext3 <2,6,3,7>, <7,2,6,3>
- 2735953107U, // <7,7,2,7>: Cost 3 vext3 RHS, <7,2,7,3>
- 2701890780U, // <7,7,2,u>: Cost 3 vext3 <2,u,3,7>, <7,2,u,3>
- 2712065251U, // <7,7,3,0>: Cost 3 vext3 RHS, <7,3,0,1>
- 3766490350U, // <7,7,3,1>: Cost 4 vext3 <1,3,5,7>, <7,3,1,3>
- 3774305530U, // <7,7,3,2>: Cost 4 vext3 <2,6,3,7>, <7,3,2,6>
- 2637728196U, // <7,7,3,3>: Cost 3 vext2 <3,3,7,7>, <3,3,7,7>
- 2712065291U, // <7,7,3,4>: Cost 3 vext3 RHS, <7,3,4,5>
- 2585186486U, // <7,7,3,5>: Cost 3 vext1 <5,7,7,3>, <5,7,7,3>
- 2639719095U, // <7,7,3,6>: Cost 3 vext2 <3,6,7,7>, <3,6,7,7>
- 2640382728U, // <7,7,3,7>: Cost 3 vext2 <3,7,7,7>, <3,7,7,7>
- 2641046361U, // <7,7,3,u>: Cost 3 vext2 <3,u,7,7>, <3,u,7,7>
- 2712212792U, // <7,7,4,0>: Cost 3 vext3 RHS, <7,4,0,5>
- 3646989312U, // <7,7,4,1>: Cost 4 vext1 <3,7,7,4>, <1,3,5,7>
- 3785807176U, // <7,7,4,2>: Cost 4 vext3 RHS, <7,4,2,3>
- 3646991109U, // <7,7,4,3>: Cost 4 vext1 <3,7,7,4>, <3,7,7,4>
- 2712065371U, // <7,7,4,4>: Cost 3 vext3 RHS, <7,4,4,4>
- 1638323558U, // <7,7,4,5>: Cost 2 vext3 RHS, <7,4,5,6>
- 2712212845U, // <7,7,4,6>: Cost 3 vext3 RHS, <7,4,6,4>
- 2591167846U, // <7,7,4,7>: Cost 3 vext1 <6,7,7,4>, <7,4,5,6>
- 1638323585U, // <7,7,4,u>: Cost 2 vext3 RHS, <7,4,u,6>
- 2585198694U, // <7,7,5,0>: Cost 3 vext1 <5,7,7,5>, LHS
- 2712212884U, // <7,7,5,1>: Cost 3 vext3 RHS, <7,5,1,7>
- 3711471393U, // <7,7,5,2>: Cost 4 vext2 <3,3,7,7>, <5,2,7,3>
- 2649673590U, // <7,7,5,3>: Cost 3 vext2 <5,3,7,7>, <5,3,7,7>
- 2712065455U, // <7,7,5,4>: Cost 3 vext3 RHS, <7,5,4,7>
- 1577259032U, // <7,7,5,5>: Cost 2 vext2 <5,5,7,7>, <5,5,7,7>
- 2712065473U, // <7,7,5,6>: Cost 3 vext3 RHS, <7,5,6,7>
- 2712212936U, // <7,7,5,7>: Cost 3 vext3 RHS, <7,5,7,5>
- 1579249931U, // <7,7,5,u>: Cost 2 vext2 <5,u,7,7>, <5,u,7,7>
- 2591178854U, // <7,7,6,0>: Cost 3 vext1 <6,7,7,6>, LHS
- 2735953374U, // <7,7,6,1>: Cost 3 vext3 RHS, <7,6,1,0>
- 2712212974U, // <7,7,6,2>: Cost 3 vext3 RHS, <7,6,2,7>
- 2655646287U, // <7,7,6,3>: Cost 3 vext2 <6,3,7,7>, <6,3,7,7>
- 2591182134U, // <7,7,6,4>: Cost 3 vext1 <6,7,7,6>, RHS
- 2656973553U, // <7,7,6,5>: Cost 3 vext2 <6,5,7,7>, <6,5,7,7>
- 1583895362U, // <7,7,6,6>: Cost 2 vext2 <6,6,7,7>, <6,6,7,7>
- 2712065556U, // <7,7,6,7>: Cost 3 vext3 RHS, <7,6,7,0>
- 1585222628U, // <7,7,6,u>: Cost 2 vext2 <6,u,7,7>, <6,u,7,7>
- 1523417190U, // <7,7,7,0>: Cost 2 vext1 <7,7,7,7>, LHS
- 2597159670U, // <7,7,7,1>: Cost 3 vext1 <7,7,7,7>, <1,0,3,2>
- 2597160552U, // <7,7,7,2>: Cost 3 vext1 <7,7,7,7>, <2,2,2,2>
- 2597161110U, // <7,7,7,3>: Cost 3 vext1 <7,7,7,7>, <3,0,1,2>
- 1523420470U, // <7,7,7,4>: Cost 2 vext1 <7,7,7,7>, RHS
- 2651002296U, // <7,7,7,5>: Cost 3 vext2 <5,5,7,7>, <7,5,5,7>
- 2657637906U, // <7,7,7,6>: Cost 3 vext2 <6,6,7,7>, <7,6,6,7>
- 363253046U, // <7,7,7,7>: Cost 1 vdup3 RHS
- 363253046U, // <7,7,7,u>: Cost 1 vdup3 RHS
- 1523417190U, // <7,7,u,0>: Cost 2 vext1 <7,7,7,7>, LHS
- 1638471298U, // <7,7,u,1>: Cost 2 vext3 RHS, <7,u,1,2>
- 2712213132U, // <7,7,u,2>: Cost 3 vext3 RHS, <7,u,2,3>
- 2712213138U, // <7,7,u,3>: Cost 3 vext3 RHS, <7,u,3,0>
- 1523420470U, // <7,7,u,4>: Cost 2 vext1 <7,7,7,7>, RHS
- 1638471338U, // <7,7,u,5>: Cost 2 vext3 RHS, <7,u,5,6>
- 1595840756U, // <7,7,u,6>: Cost 2 vext2 <u,6,7,7>, <u,6,7,7>
- 363253046U, // <7,7,u,7>: Cost 1 vdup3 RHS
- 363253046U, // <7,7,u,u>: Cost 1 vdup3 RHS
- 1638318080U, // <7,u,0,0>: Cost 2 vext3 RHS, <0,0,0,0>
- 1638323923U, // <7,u,0,1>: Cost 2 vext3 RHS, <u,0,1,2>
- 1662211804U, // <7,u,0,2>: Cost 2 vext3 RHS, <u,0,2,2>
- 1638323941U, // <7,u,0,3>: Cost 2 vext3 RHS, <u,0,3,2>
- 2712065773U, // <7,u,0,4>: Cost 3 vext3 RHS, <u,0,4,1>
- 1662359286U, // <7,u,0,5>: Cost 2 vext3 RHS, <u,0,5,1>
- 1662359296U, // <7,u,0,6>: Cost 2 vext3 RHS, <u,0,6,2>
- 2987150664U, // <7,u,0,7>: Cost 3 vzipr <5,6,7,0>, RHS
- 1638323986U, // <7,u,0,u>: Cost 2 vext3 RHS, <u,0,u,2>
- 1517469798U, // <7,u,1,0>: Cost 2 vext1 <6,7,u,1>, LHS
- 1638318900U, // <7,u,1,1>: Cost 2 vext3 RHS, <1,1,1,1>
- 564582190U, // <7,u,1,2>: Cost 1 vext3 RHS, LHS
- 1638324023U, // <7,u,1,3>: Cost 2 vext3 RHS, <u,1,3,3>
- 1517473078U, // <7,u,1,4>: Cost 2 vext1 <6,7,u,1>, RHS
- 2693928777U, // <7,u,1,5>: Cost 3 vext3 <1,5,3,7>, <u,1,5,3>
- 1517474710U, // <7,u,1,6>: Cost 2 vext1 <6,7,u,1>, <6,7,u,1>
- 1640462171U, // <7,u,1,7>: Cost 2 vext3 RHS, <u,1,7,3>
- 564582244U, // <7,u,1,u>: Cost 1 vext3 RHS, LHS
- 1638318244U, // <7,u,2,0>: Cost 2 vext3 RHS, <0,2,0,2>
- 2712065907U, // <7,u,2,1>: Cost 3 vext3 RHS, <u,2,1,0>
- 1638319720U, // <7,u,2,2>: Cost 2 vext3 RHS, <2,2,2,2>
- 1638324101U, // <7,u,2,3>: Cost 2 vext3 RHS, <u,2,3,0>
- 1638318284U, // <7,u,2,4>: Cost 2 vext3 RHS, <0,2,4,6>
- 2712065947U, // <7,u,2,5>: Cost 3 vext3 RHS, <u,2,5,4>
- 2700564387U, // <7,u,2,6>: Cost 3 vext3 <2,6,3,7>, <u,2,6,3>
- 1640314796U, // <7,u,2,7>: Cost 2 vext3 RHS, <u,2,7,3>
- 1638324146U, // <7,u,2,u>: Cost 2 vext3 RHS, <u,2,u,0>
- 1638324156U, // <7,u,3,0>: Cost 2 vext3 RHS, <u,3,0,1>
- 1638319064U, // <7,u,3,1>: Cost 2 vext3 RHS, <1,3,1,3>
- 2700564435U, // <7,u,3,2>: Cost 3 vext3 <2,6,3,7>, <u,3,2,6>
- 1638320540U, // <7,u,3,3>: Cost 2 vext3 RHS, <3,3,3,3>
- 1638324196U, // <7,u,3,4>: Cost 2 vext3 RHS, <u,3,4,5>
- 1638324207U, // <7,u,3,5>: Cost 2 vext3 RHS, <u,3,5,7>
- 2700564472U, // <7,u,3,6>: Cost 3 vext3 <2,6,3,7>, <u,3,6,7>
- 2695919610U, // <7,u,3,7>: Cost 3 vext3 <1,u,3,7>, <u,3,7,0>
- 1638324228U, // <7,u,3,u>: Cost 2 vext3 RHS, <u,3,u,1>
- 2712066061U, // <7,u,4,0>: Cost 3 vext3 RHS, <u,4,0,1>
- 1662212122U, // <7,u,4,1>: Cost 2 vext3 RHS, <u,4,1,5>
- 1662212132U, // <7,u,4,2>: Cost 2 vext3 RHS, <u,4,2,6>
- 2712066092U, // <7,u,4,3>: Cost 3 vext3 RHS, <u,4,3,5>
- 1638321360U, // <7,u,4,4>: Cost 2 vext3 RHS, <4,4,4,4>
- 1638324287U, // <7,u,4,5>: Cost 2 vext3 RHS, <u,4,5,6>
- 1662359624U, // <7,u,4,6>: Cost 2 vext3 RHS, <u,4,6,6>
- 1640314961U, // <7,u,4,7>: Cost 2 vext3 RHS, <u,4,7,6>
- 1638324314U, // <7,u,4,u>: Cost 2 vext3 RHS, <u,4,u,6>
- 1517502566U, // <7,u,5,0>: Cost 2 vext1 <6,7,u,5>, LHS
- 1574612693U, // <7,u,5,1>: Cost 2 vext2 <5,1,7,u>, <5,1,7,u>
- 2712066162U, // <7,u,5,2>: Cost 3 vext3 RHS, <u,5,2,3>
- 1638324351U, // <7,u,5,3>: Cost 2 vext3 RHS, <u,5,3,7>
- 1576603592U, // <7,u,5,4>: Cost 2 vext2 <5,4,7,u>, <5,4,7,u>
- 1577267225U, // <7,u,5,5>: Cost 2 vext2 <5,5,7,u>, <5,5,7,u>
- 564582554U, // <7,u,5,6>: Cost 1 vext3 RHS, RHS
- 1640462499U, // <7,u,5,7>: Cost 2 vext3 RHS, <u,5,7,7>
- 564582572U, // <7,u,5,u>: Cost 1 vext3 RHS, RHS
- 2712066223U, // <7,u,6,0>: Cost 3 vext3 RHS, <u,6,0,1>
- 2712066238U, // <7,u,6,1>: Cost 3 vext3 RHS, <u,6,1,7>
- 1581249023U, // <7,u,6,2>: Cost 2 vext2 <6,2,7,u>, <6,2,7,u>
- 1638324432U, // <7,u,6,3>: Cost 2 vext3 RHS, <u,6,3,7>
- 1638468980U, // <7,u,6,4>: Cost 2 vext3 RHS, <4,6,4,6>
- 2712066274U, // <7,u,6,5>: Cost 3 vext3 RHS, <u,6,5,7>
- 1583903555U, // <7,u,6,6>: Cost 2 vext2 <6,6,7,u>, <6,6,7,u>
- 1640315117U, // <7,u,6,7>: Cost 2 vext3 RHS, <u,6,7,0>
- 1638324477U, // <7,u,6,u>: Cost 2 vext3 RHS, <u,6,u,7>
- 1638471936U, // <7,u,7,0>: Cost 2 vext3 RHS, <u,7,0,1>
- 2692970763U, // <7,u,7,1>: Cost 3 vext3 <1,3,u,7>, <u,7,1,3>
- 2700933399U, // <7,u,7,2>: Cost 3 vext3 <2,6,u,7>, <u,7,2,6>
- 2573347601U, // <7,u,7,3>: Cost 3 vext1 <3,7,u,7>, <3,7,u,7>
- 1638471976U, // <7,u,7,4>: Cost 2 vext3 RHS, <u,7,4,5>
- 1511551171U, // <7,u,7,5>: Cost 2 vext1 <5,7,u,7>, <5,7,u,7>
- 2712213815U, // <7,u,7,6>: Cost 3 vext3 RHS, <u,7,6,2>
- 363253046U, // <7,u,7,7>: Cost 1 vdup3 RHS
- 363253046U, // <7,u,7,u>: Cost 1 vdup3 RHS
- 1638324561U, // <7,u,u,0>: Cost 2 vext3 RHS, <u,u,0,1>
- 1638324571U, // <7,u,u,1>: Cost 2 vext3 RHS, <u,u,1,2>
- 564582757U, // <7,u,u,2>: Cost 1 vext3 RHS, LHS
- 1638324587U, // <7,u,u,3>: Cost 2 vext3 RHS, <u,u,3,0>
- 1638324601U, // <7,u,u,4>: Cost 2 vext3 RHS, <u,u,4,5>
- 1638324611U, // <7,u,u,5>: Cost 2 vext3 RHS, <u,u,5,6>
- 564582797U, // <7,u,u,6>: Cost 1 vext3 RHS, RHS
- 363253046U, // <7,u,u,7>: Cost 1 vdup3 RHS
- 564582811U, // <7,u,u,u>: Cost 1 vext3 RHS, LHS
- 135053414U, // <u,0,0,0>: Cost 1 vdup0 LHS
- 1611489290U, // <u,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1>
- 1611489300U, // <u,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2>
- 2568054923U, // <u,0,0,3>: Cost 3 vext1 <3,0,0,0>, <3,0,0,0>
- 1481706806U, // <u,0,0,4>: Cost 2 vext1 <0,u,0,0>, RHS
- 2555449040U, // <u,0,0,5>: Cost 3 vext1 <0,u,0,0>, <5,1,7,3>
- 2591282078U, // <u,0,0,6>: Cost 3 vext1 <6,u,0,0>, <6,u,0,0>
- 2591945711U, // <u,0,0,7>: Cost 3 vext1 <7,0,0,0>, <7,0,0,0>
- 135053414U, // <u,0,0,u>: Cost 1 vdup0 LHS
- 1493655654U, // <u,0,1,0>: Cost 2 vext1 <2,u,0,1>, LHS
- 1860550758U, // <u,0,1,1>: Cost 2 vzipl LHS, LHS
- 537747563U, // <u,0,1,2>: Cost 1 vext3 LHS, LHS
- 2625135576U, // <u,0,1,3>: Cost 3 vext2 <1,2,u,0>, <1,3,1,3>
- 1493658934U, // <u,0,1,4>: Cost 2 vext1 <2,u,0,1>, RHS
- 2625135760U, // <u,0,1,5>: Cost 3 vext2 <1,2,u,0>, <1,5,3,7>
- 1517548447U, // <u,0,1,6>: Cost 2 vext1 <6,u,0,1>, <6,u,0,1>
- 2591290362U, // <u,0,1,7>: Cost 3 vext1 <6,u,0,1>, <7,0,1,2>
- 537747612U, // <u,0,1,u>: Cost 1 vext3 LHS, LHS
- 1611489444U, // <u,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
- 2685231276U, // <u,0,2,1>: Cost 3 vext3 LHS, <0,2,1,1>
- 1994768486U, // <u,0,2,2>: Cost 2 vtrnl LHS, LHS
- 2685231294U, // <u,0,2,3>: Cost 3 vext3 LHS, <0,2,3,1>
- 1611489484U, // <u,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
- 2712068310U, // <u,0,2,5>: Cost 3 vext3 RHS, <0,2,5,7>
- 2625136570U, // <u,0,2,6>: Cost 3 vext2 <1,2,u,0>, <2,6,3,7>
- 2591962097U, // <u,0,2,7>: Cost 3 vext1 <7,0,0,2>, <7,0,0,2>
- 1611489516U, // <u,0,2,u>: Cost 2 vext3 LHS, <0,2,u,2>
- 2954067968U, // <u,0,3,0>: Cost 3 vzipr LHS, <0,0,0,0>
- 2685231356U, // <u,0,3,1>: Cost 3 vext3 LHS, <0,3,1,0>
- 72589981U, // <u,0,3,2>: Cost 1 vrev LHS
- 2625137052U, // <u,0,3,3>: Cost 3 vext2 <1,2,u,0>, <3,3,3,3>
- 2625137154U, // <u,0,3,4>: Cost 3 vext2 <1,2,u,0>, <3,4,5,6>
- 2639071848U, // <u,0,3,5>: Cost 3 vext2 <3,5,u,0>, <3,5,u,0>
- 2639735481U, // <u,0,3,6>: Cost 3 vext2 <3,6,u,0>, <3,6,u,0>
- 2597279354U, // <u,0,3,7>: Cost 3 vext1 <7,u,0,3>, <7,u,0,3>
- 73032403U, // <u,0,3,u>: Cost 1 vrev LHS
- 2687074636U, // <u,0,4,0>: Cost 3 vext3 <0,4,0,u>, <0,4,0,u>
- 1611489618U, // <u,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5>
- 1611489628U, // <u,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6>
- 3629222038U, // <u,0,4,3>: Cost 4 vext1 <0,u,0,4>, <3,0,1,2>
- 2555481398U, // <u,0,4,4>: Cost 3 vext1 <0,u,0,4>, RHS
- 1551396150U, // <u,0,4,5>: Cost 2 vext2 <1,2,u,0>, RHS
- 2651680116U, // <u,0,4,6>: Cost 3 vext2 <5,6,u,0>, <4,6,4,6>
- 2646150600U, // <u,0,4,7>: Cost 3 vext2 <4,7,5,0>, <4,7,5,0>
- 1611932050U, // <u,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6>
- 2561458278U, // <u,0,5,0>: Cost 3 vext1 <1,u,0,5>, LHS
- 1863532646U, // <u,0,5,1>: Cost 2 vzipl RHS, LHS
- 2712068526U, // <u,0,5,2>: Cost 3 vext3 RHS, <0,5,2,7>
- 2649689976U, // <u,0,5,3>: Cost 3 vext2 <5,3,u,0>, <5,3,u,0>
- 2220237489U, // <u,0,5,4>: Cost 3 vrev <0,u,4,5>
- 2651680772U, // <u,0,5,5>: Cost 3 vext2 <5,6,u,0>, <5,5,5,5>
- 1577939051U, // <u,0,5,6>: Cost 2 vext2 <5,6,u,0>, <5,6,u,0>
- 2830077238U, // <u,0,5,7>: Cost 3 vuzpr <1,u,3,0>, RHS
- 1579266317U, // <u,0,5,u>: Cost 2 vext2 <5,u,u,0>, <5,u,u,0>
- 2555494502U, // <u,0,6,0>: Cost 3 vext1 <0,u,0,6>, LHS
- 2712068598U, // <u,0,6,1>: Cost 3 vext3 RHS, <0,6,1,7>
- 1997750374U, // <u,0,6,2>: Cost 2 vtrnl RHS, LHS
- 2655662673U, // <u,0,6,3>: Cost 3 vext2 <6,3,u,0>, <6,3,u,0>
- 2555497782U, // <u,0,6,4>: Cost 3 vext1 <0,u,0,6>, RHS
- 2651681459U, // <u,0,6,5>: Cost 3 vext2 <5,6,u,0>, <6,5,0,u>
- 2651681592U, // <u,0,6,6>: Cost 3 vext2 <5,6,u,0>, <6,6,6,6>
- 2651681614U, // <u,0,6,7>: Cost 3 vext2 <5,6,u,0>, <6,7,0,1>
- 1997750428U, // <u,0,6,u>: Cost 2 vtrnl RHS, LHS
- 2567446630U, // <u,0,7,0>: Cost 3 vext1 <2,u,0,7>, LHS
- 2567447446U, // <u,0,7,1>: Cost 3 vext1 <2,u,0,7>, <1,2,3,0>
- 2567448641U, // <u,0,7,2>: Cost 3 vext1 <2,u,0,7>, <2,u,0,7>
- 2573421338U, // <u,0,7,3>: Cost 3 vext1 <3,u,0,7>, <3,u,0,7>
- 2567449910U, // <u,0,7,4>: Cost 3 vext1 <2,u,0,7>, RHS
- 2651682242U, // <u,0,7,5>: Cost 3 vext2 <5,6,u,0>, <7,5,6,u>
- 2591339429U, // <u,0,7,6>: Cost 3 vext1 <6,u,0,7>, <6,u,0,7>
- 2651682412U, // <u,0,7,7>: Cost 3 vext2 <5,6,u,0>, <7,7,7,7>
- 2567452462U, // <u,0,7,u>: Cost 3 vext1 <2,u,0,7>, LHS
- 135053414U, // <u,0,u,0>: Cost 1 vdup0 LHS
- 1611489938U, // <u,0,u,1>: Cost 2 vext3 LHS, <0,u,1,1>
- 537748125U, // <u,0,u,2>: Cost 1 vext3 LHS, LHS
- 2685674148U, // <u,0,u,3>: Cost 3 vext3 LHS, <0,u,3,1>
- 1611932338U, // <u,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6>
- 1551399066U, // <u,0,u,5>: Cost 2 vext2 <1,2,u,0>, RHS
- 1517605798U, // <u,0,u,6>: Cost 2 vext1 <6,u,0,u>, <6,u,0,u>
- 2830077481U, // <u,0,u,7>: Cost 3 vuzpr <1,u,3,0>, RHS
- 537748179U, // <u,0,u,u>: Cost 1 vext3 LHS, LHS
- 1544101961U, // <u,1,0,0>: Cost 2 vext2 <0,0,u,1>, <0,0,u,1>
- 1558036582U, // <u,1,0,1>: Cost 2 vext2 <2,3,u,1>, LHS
- 2619171051U, // <u,1,0,2>: Cost 3 vext2 <0,2,u,1>, <0,2,u,1>
- 1611490038U, // <u,1,0,3>: Cost 2 vext3 LHS, <1,0,3,2>
- 2555522358U, // <u,1,0,4>: Cost 3 vext1 <0,u,1,0>, RHS
- 2712068871U, // <u,1,0,5>: Cost 3 vext3 RHS, <1,0,5,1>
- 2591355815U, // <u,1,0,6>: Cost 3 vext1 <6,u,1,0>, <6,u,1,0>
- 2597328512U, // <u,1,0,7>: Cost 3 vext1 <7,u,1,0>, <7,u,1,0>
- 1611490083U, // <u,1,0,u>: Cost 2 vext3 LHS, <1,0,u,2>
- 1481785446U, // <u,1,1,0>: Cost 2 vext1 <0,u,1,1>, LHS
- 202162278U, // <u,1,1,1>: Cost 1 vdup1 LHS
- 2555528808U, // <u,1,1,2>: Cost 3 vext1 <0,u,1,1>, <2,2,2,2>
- 1611490120U, // <u,1,1,3>: Cost 2 vext3 LHS, <1,1,3,3>
- 1481788726U, // <u,1,1,4>: Cost 2 vext1 <0,u,1,1>, RHS
- 2689876828U, // <u,1,1,5>: Cost 3 vext3 LHS, <1,1,5,5>
- 2591364008U, // <u,1,1,6>: Cost 3 vext1 <6,u,1,1>, <6,u,1,1>
- 2592691274U, // <u,1,1,7>: Cost 3 vext1 <7,1,1,1>, <7,1,1,1>
- 202162278U, // <u,1,1,u>: Cost 1 vdup1 LHS
- 1499709542U, // <u,1,2,0>: Cost 2 vext1 <3,u,1,2>, LHS
- 2689876871U, // <u,1,2,1>: Cost 3 vext3 LHS, <1,2,1,3>
- 2631116445U, // <u,1,2,2>: Cost 3 vext2 <2,2,u,1>, <2,2,u,1>
- 835584U, // <u,1,2,3>: Cost 0 copy LHS
- 1499712822U, // <u,1,2,4>: Cost 2 vext1 <3,u,1,2>, RHS
- 2689876907U, // <u,1,2,5>: Cost 3 vext3 LHS, <1,2,5,3>
- 2631780282U, // <u,1,2,6>: Cost 3 vext2 <2,3,u,1>, <2,6,3,7>
- 1523603074U, // <u,1,2,7>: Cost 2 vext1 <7,u,1,2>, <7,u,1,2>
- 835584U, // <u,1,2,u>: Cost 0 copy LHS
- 1487773798U, // <u,1,3,0>: Cost 2 vext1 <1,u,1,3>, LHS
- 1611490264U, // <u,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3>
- 2685232094U, // <u,1,3,2>: Cost 3 vext3 LHS, <1,3,2,0>
- 2018746470U, // <u,1,3,3>: Cost 2 vtrnr LHS, LHS
- 1487777078U, // <u,1,3,4>: Cost 2 vext1 <1,u,1,3>, RHS
- 1611490304U, // <u,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7>
- 2685674505U, // <u,1,3,6>: Cost 3 vext3 LHS, <1,3,6,7>
- 2640407307U, // <u,1,3,7>: Cost 3 vext2 <3,7,u,1>, <3,7,u,1>
- 1611490327U, // <u,1,3,u>: Cost 2 vext3 LHS, <1,3,u,3>
- 1567992749U, // <u,1,4,0>: Cost 2 vext2 <4,0,u,1>, <4,0,u,1>
- 2693121070U, // <u,1,4,1>: Cost 3 vext3 <1,4,1,u>, <1,4,1,u>
- 2693194807U, // <u,1,4,2>: Cost 3 vext3 <1,4,2,u>, <1,4,2,u>
- 1152386432U, // <u,1,4,3>: Cost 2 vrev <1,u,3,4>
- 2555555126U, // <u,1,4,4>: Cost 3 vext1 <0,u,1,4>, RHS
- 1558039862U, // <u,1,4,5>: Cost 2 vext2 <2,3,u,1>, RHS
- 2645716371U, // <u,1,4,6>: Cost 3 vext2 <4,6,u,1>, <4,6,u,1>
- 2597361284U, // <u,1,4,7>: Cost 3 vext1 <7,u,1,4>, <7,u,1,4>
- 1152755117U, // <u,1,4,u>: Cost 2 vrev <1,u,u,4>
- 1481818214U, // <u,1,5,0>: Cost 2 vext1 <0,u,1,5>, LHS
- 2555560694U, // <u,1,5,1>: Cost 3 vext1 <0,u,1,5>, <1,0,3,2>
- 2555561576U, // <u,1,5,2>: Cost 3 vext1 <0,u,1,5>, <2,2,2,2>
- 1611490448U, // <u,1,5,3>: Cost 2 vext3 LHS, <1,5,3,7>
- 1481821494U, // <u,1,5,4>: Cost 2 vext1 <0,u,1,5>, RHS
- 2651025435U, // <u,1,5,5>: Cost 3 vext2 <5,5,u,1>, <5,5,u,1>
- 2651689068U, // <u,1,5,6>: Cost 3 vext2 <5,6,u,1>, <5,6,u,1>
- 2823966006U, // <u,1,5,7>: Cost 3 vuzpr <0,u,1,1>, RHS
- 1611932861U, // <u,1,5,u>: Cost 2 vext3 LHS, <1,5,u,7>
- 2555568230U, // <u,1,6,0>: Cost 3 vext1 <0,u,1,6>, LHS
- 2689877199U, // <u,1,6,1>: Cost 3 vext3 LHS, <1,6,1,7>
- 2712069336U, // <u,1,6,2>: Cost 3 vext3 RHS, <1,6,2,7>
- 2685232353U, // <u,1,6,3>: Cost 3 vext3 LHS, <1,6,3,7>
- 2555571510U, // <u,1,6,4>: Cost 3 vext1 <0,u,1,6>, RHS
- 2689877235U, // <u,1,6,5>: Cost 3 vext3 LHS, <1,6,5,7>
- 2657661765U, // <u,1,6,6>: Cost 3 vext2 <6,6,u,1>, <6,6,u,1>
- 1584583574U, // <u,1,6,7>: Cost 2 vext2 <6,7,u,1>, <6,7,u,1>
- 1585247207U, // <u,1,6,u>: Cost 2 vext2 <6,u,u,1>, <6,u,u,1>
- 2561548390U, // <u,1,7,0>: Cost 3 vext1 <1,u,1,7>, LHS
- 2561549681U, // <u,1,7,1>: Cost 3 vext1 <1,u,1,7>, <1,u,1,7>
- 2573493926U, // <u,1,7,2>: Cost 3 vext1 <3,u,1,7>, <2,3,0,1>
- 2042962022U, // <u,1,7,3>: Cost 2 vtrnr RHS, LHS
- 2561551670U, // <u,1,7,4>: Cost 3 vext1 <1,u,1,7>, RHS
- 2226300309U, // <u,1,7,5>: Cost 3 vrev <1,u,5,7>
- 2658325990U, // <u,1,7,6>: Cost 3 vext2 <6,7,u,1>, <7,6,1,u>
- 2658326124U, // <u,1,7,7>: Cost 3 vext2 <6,7,u,1>, <7,7,7,7>
- 2042962027U, // <u,1,7,u>: Cost 2 vtrnr RHS, LHS
- 1481842790U, // <u,1,u,0>: Cost 2 vext1 <0,u,1,u>, LHS
- 202162278U, // <u,1,u,1>: Cost 1 vdup1 LHS
- 2685674867U, // <u,1,u,2>: Cost 3 vext3 LHS, <1,u,2,0>
- 835584U, // <u,1,u,3>: Cost 0 copy LHS
- 1481846070U, // <u,1,u,4>: Cost 2 vext1 <0,u,1,u>, RHS
- 1611933077U, // <u,1,u,5>: Cost 2 vext3 LHS, <1,u,5,7>
- 2685674910U, // <u,1,u,6>: Cost 3 vext3 LHS, <1,u,6,7>
- 1523652232U, // <u,1,u,7>: Cost 2 vext1 <7,u,1,u>, <7,u,1,u>
- 835584U, // <u,1,u,u>: Cost 0 copy LHS
- 1544110154U, // <u,2,0,0>: Cost 2 vext2 <0,0,u,2>, <0,0,u,2>
- 1545437286U, // <u,2,0,1>: Cost 2 vext2 <0,2,u,2>, LHS
- 1545437420U, // <u,2,0,2>: Cost 2 vext2 <0,2,u,2>, <0,2,u,2>
- 2685232589U, // <u,2,0,3>: Cost 3 vext3 LHS, <2,0,3,0>
- 2619179346U, // <u,2,0,4>: Cost 3 vext2 <0,2,u,2>, <0,4,1,5>
- 2712069606U, // <u,2,0,5>: Cost 3 vext3 RHS, <2,0,5,7>
- 2689877484U, // <u,2,0,6>: Cost 3 vext3 LHS, <2,0,6,4>
- 2659656273U, // <u,2,0,7>: Cost 3 vext2 <7,0,u,2>, <0,7,2,u>
- 1545437853U, // <u,2,0,u>: Cost 2 vext2 <0,2,u,2>, LHS
- 1550082851U, // <u,2,1,0>: Cost 2 vext2 <1,0,u,2>, <1,0,u,2>
- 2619179828U, // <u,2,1,1>: Cost 3 vext2 <0,2,u,2>, <1,1,1,1>
- 2619179926U, // <u,2,1,2>: Cost 3 vext2 <0,2,u,2>, <1,2,3,0>
- 2685232671U, // <u,2,1,3>: Cost 3 vext3 LHS, <2,1,3,1>
- 2555604278U, // <u,2,1,4>: Cost 3 vext1 <0,u,2,1>, RHS
- 2619180176U, // <u,2,1,5>: Cost 3 vext2 <0,2,u,2>, <1,5,3,7>
- 2689877564U, // <u,2,1,6>: Cost 3 vext3 LHS, <2,1,6,3>
- 2602718850U, // <u,2,1,7>: Cost 3 vext1 <u,7,2,1>, <7,u,1,2>
- 1158703235U, // <u,2,1,u>: Cost 2 vrev <2,u,u,1>
- 1481867366U, // <u,2,2,0>: Cost 2 vext1 <0,u,2,2>, LHS
- 2555609846U, // <u,2,2,1>: Cost 3 vext1 <0,u,2,2>, <1,0,3,2>
- 269271142U, // <u,2,2,2>: Cost 1 vdup2 LHS
- 1611490930U, // <u,2,2,3>: Cost 2 vext3 LHS, <2,2,3,3>
- 1481870646U, // <u,2,2,4>: Cost 2 vext1 <0,u,2,2>, RHS
- 2689877640U, // <u,2,2,5>: Cost 3 vext3 LHS, <2,2,5,7>
- 2619180986U, // <u,2,2,6>: Cost 3 vext2 <0,2,u,2>, <2,6,3,7>
- 2593436837U, // <u,2,2,7>: Cost 3 vext1 <7,2,2,2>, <7,2,2,2>
- 269271142U, // <u,2,2,u>: Cost 1 vdup2 LHS
- 408134301U, // <u,2,3,0>: Cost 1 vext1 LHS, LHS
- 1481876214U, // <u,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
- 1481877096U, // <u,2,3,2>: Cost 2 vext1 LHS, <2,2,2,2>
- 1880326246U, // <u,2,3,3>: Cost 2 vzipr LHS, LHS
- 408137014U, // <u,2,3,4>: Cost 1 vext1 LHS, RHS
- 1529654992U, // <u,2,3,5>: Cost 2 vext1 LHS, <5,1,7,3>
- 1529655802U, // <u,2,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
- 1529656314U, // <u,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2>
- 408139566U, // <u,2,3,u>: Cost 1 vext1 LHS, LHS
- 1567853468U, // <u,2,4,0>: Cost 2 vext2 <4,0,6,2>, <4,0,6,2>
- 2561598362U, // <u,2,4,1>: Cost 3 vext1 <1,u,2,4>, <1,2,3,4>
- 2555627214U, // <u,2,4,2>: Cost 3 vext1 <0,u,2,4>, <2,3,4,5>
- 2685232918U, // <u,2,4,3>: Cost 3 vext3 LHS, <2,4,3,5>
- 2555628854U, // <u,2,4,4>: Cost 3 vext1 <0,u,2,4>, RHS
- 1545440566U, // <u,2,4,5>: Cost 2 vext2 <0,2,u,2>, RHS
- 1571982740U, // <u,2,4,6>: Cost 2 vext2 <4,6,u,2>, <4,6,u,2>
- 2592125957U, // <u,2,4,7>: Cost 3 vext1 <7,0,2,4>, <7,0,2,4>
- 1545440809U, // <u,2,4,u>: Cost 2 vext2 <0,2,u,2>, RHS
- 2555633766U, // <u,2,5,0>: Cost 3 vext1 <0,u,2,5>, LHS
- 2561606550U, // <u,2,5,1>: Cost 3 vext1 <1,u,2,5>, <1,2,3,0>
- 2689877856U, // <u,2,5,2>: Cost 3 vext3 LHS, <2,5,2,7>
- 2685233000U, // <u,2,5,3>: Cost 3 vext3 LHS, <2,5,3,6>
- 1158441059U, // <u,2,5,4>: Cost 2 vrev <2,u,4,5>
- 2645725188U, // <u,2,5,5>: Cost 3 vext2 <4,6,u,2>, <5,5,5,5>
- 2689877892U, // <u,2,5,6>: Cost 3 vext3 LHS, <2,5,6,7>
- 2823900470U, // <u,2,5,7>: Cost 3 vuzpr <0,u,0,2>, RHS
- 1158736007U, // <u,2,5,u>: Cost 2 vrev <2,u,u,5>
- 1481900134U, // <u,2,6,0>: Cost 2 vext1 <0,u,2,6>, LHS
- 2555642614U, // <u,2,6,1>: Cost 3 vext1 <0,u,2,6>, <1,0,3,2>
- 2555643496U, // <u,2,6,2>: Cost 3 vext1 <0,u,2,6>, <2,2,2,2>
- 1611491258U, // <u,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7>
- 1481903414U, // <u,2,6,4>: Cost 2 vext1 <0,u,2,6>, RHS
- 2689877964U, // <u,2,6,5>: Cost 3 vext3 LHS, <2,6,5,7>
- 2689877973U, // <u,2,6,6>: Cost 3 vext3 LHS, <2,6,6,7>
- 2645726030U, // <u,2,6,7>: Cost 3 vext2 <4,6,u,2>, <6,7,0,1>
- 1611933671U, // <u,2,6,u>: Cost 2 vext3 LHS, <2,6,u,7>
- 1585919033U, // <u,2,7,0>: Cost 2 vext2 <7,0,u,2>, <7,0,u,2>
- 2573566710U, // <u,2,7,1>: Cost 3 vext1 <3,u,2,7>, <1,0,3,2>
- 2567596115U, // <u,2,7,2>: Cost 3 vext1 <2,u,2,7>, <2,u,2,7>
- 1906901094U, // <u,2,7,3>: Cost 2 vzipr RHS, LHS
- 2555653430U, // <u,2,7,4>: Cost 3 vext1 <0,u,2,7>, RHS
- 2800080230U, // <u,2,7,5>: Cost 3 vuzpl LHS, <7,4,5,6>
- 2980643164U, // <u,2,7,6>: Cost 3 vzipr RHS, <0,4,2,6>
- 2645726828U, // <u,2,7,7>: Cost 3 vext2 <4,6,u,2>, <7,7,7,7>
- 1906901099U, // <u,2,7,u>: Cost 2 vzipr RHS, LHS
- 408175266U, // <u,2,u,0>: Cost 1 vext1 LHS, LHS
- 1545443118U, // <u,2,u,1>: Cost 2 vext2 <0,2,u,2>, LHS
- 269271142U, // <u,2,u,2>: Cost 1 vdup2 LHS
- 1611491416U, // <u,2,u,3>: Cost 2 vext3 LHS, <2,u,3,3>
- 408177974U, // <u,2,u,4>: Cost 1 vext1 LHS, RHS
- 1545443482U, // <u,2,u,5>: Cost 2 vext2 <0,2,u,2>, RHS
- 1726339226U, // <u,2,u,6>: Cost 2 vuzpl LHS, RHS
- 1529697274U, // <u,2,u,7>: Cost 2 vext1 LHS, <7,0,1,2>
- 408180526U, // <u,2,u,u>: Cost 1 vext1 LHS, LHS
- 1544781824U, // <u,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
- 471040156U, // <u,3,0,1>: Cost 1 vext2 LHS, LHS
- 1544781988U, // <u,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
- 2618523900U, // <u,3,0,3>: Cost 3 vext2 LHS, <0,3,1,0>
- 1544782162U, // <u,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
- 2238188352U, // <u,3,0,5>: Cost 3 vrev <3,u,5,0>
- 2623169023U, // <u,3,0,6>: Cost 3 vext2 LHS, <0,6,2,7>
- 2238335826U, // <u,3,0,7>: Cost 3 vrev <3,u,7,0>
- 471040669U, // <u,3,0,u>: Cost 1 vext2 LHS, LHS
- 1544782582U, // <u,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
- 1544782644U, // <u,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
- 1544782742U, // <u,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
- 1544782808U, // <u,3,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
- 2618524733U, // <u,3,1,4>: Cost 3 vext2 LHS, <1,4,3,5>
- 1544782992U, // <u,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
- 2618524897U, // <u,3,1,6>: Cost 3 vext2 LHS, <1,6,3,7>
- 2703517987U, // <u,3,1,7>: Cost 3 vext3 <3,1,7,u>, <3,1,7,u>
- 1544783213U, // <u,3,1,u>: Cost 2 vext2 LHS, <1,u,1,3>
- 1529716838U, // <u,3,2,0>: Cost 2 vext1 <u,u,3,2>, LHS
- 1164167966U, // <u,3,2,1>: Cost 2 vrev <3,u,1,2>
- 1544783464U, // <u,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2>
- 1544783526U, // <u,3,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
- 1529720118U, // <u,3,2,4>: Cost 2 vext1 <u,u,3,2>, RHS
- 2618525544U, // <u,3,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
- 1544783802U, // <u,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
- 2704181620U, // <u,3,2,7>: Cost 3 vext3 <3,2,7,u>, <3,2,7,u>
- 1544783931U, // <u,3,2,u>: Cost 2 vext2 LHS, <2,u,0,1>
- 1544784022U, // <u,3,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
- 1487922559U, // <u,3,3,1>: Cost 2 vext1 <1,u,3,3>, <1,u,3,3>
- 1493895256U, // <u,3,3,2>: Cost 2 vext1 <2,u,3,3>, <2,u,3,3>
- 336380006U, // <u,3,3,3>: Cost 1 vdup3 LHS
- 1544784386U, // <u,3,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
- 2824054478U, // <u,3,3,5>: Cost 3 vuzpr LHS, <2,3,4,5>
- 2238286668U, // <u,3,3,6>: Cost 3 vrev <3,u,6,3>
- 2954069136U, // <u,3,3,7>: Cost 3 vzipr LHS, <1,5,3,7>
- 336380006U, // <u,3,3,u>: Cost 1 vdup3 LHS
- 1487929446U, // <u,3,4,0>: Cost 2 vext1 <1,u,3,4>, LHS
- 1487930752U, // <u,3,4,1>: Cost 2 vext1 <1,u,3,4>, <1,u,3,4>
- 2623171644U, // <u,3,4,2>: Cost 3 vext2 LHS, <4,2,6,0>
- 2561673366U, // <u,3,4,3>: Cost 3 vext1 <1,u,3,4>, <3,0,1,2>
- 1487932726U, // <u,3,4,4>: Cost 2 vext1 <1,u,3,4>, RHS
- 471043382U, // <u,3,4,5>: Cost 1 vext2 LHS, RHS
- 1592561012U, // <u,3,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
- 2238368598U, // <u,3,4,7>: Cost 3 vrev <3,u,7,4>
- 471043625U, // <u,3,4,u>: Cost 1 vext2 LHS, RHS
- 2555707494U, // <u,3,5,0>: Cost 3 vext1 <0,u,3,5>, LHS
- 1574645465U, // <u,3,5,1>: Cost 2 vext2 <5,1,u,3>, <5,1,u,3>
- 2567653106U, // <u,3,5,2>: Cost 3 vext1 <2,u,3,5>, <2,3,u,5>
- 2555709954U, // <u,3,5,3>: Cost 3 vext1 <0,u,3,5>, <3,4,5,6>
- 1592561606U, // <u,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
- 1592561668U, // <u,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
- 1592561762U, // <u,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0>
- 1750314294U, // <u,3,5,7>: Cost 2 vuzpr LHS, RHS
- 1750314295U, // <u,3,5,u>: Cost 2 vuzpr LHS, RHS
- 2623172897U, // <u,3,6,0>: Cost 3 vext2 LHS, <6,0,1,2>
- 2561688962U, // <u,3,6,1>: Cost 3 vext1 <1,u,3,6>, <1,u,3,6>
- 1581281795U, // <u,3,6,2>: Cost 2 vext2 <6,2,u,3>, <6,2,u,3>
- 2706541204U, // <u,3,6,3>: Cost 3 vext3 <3,6,3,u>, <3,6,3,u>
- 2623173261U, // <u,3,6,4>: Cost 3 vext2 LHS, <6,4,5,6>
- 1164495686U, // <u,3,6,5>: Cost 2 vrev <3,u,5,6>
- 1592562488U, // <u,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
- 1592562510U, // <u,3,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
- 1164716897U, // <u,3,6,u>: Cost 2 vrev <3,u,u,6>
- 1487954022U, // <u,3,7,0>: Cost 2 vext1 <1,u,3,7>, LHS
- 1487955331U, // <u,3,7,1>: Cost 2 vext1 <1,u,3,7>, <1,u,3,7>
- 1493928028U, // <u,3,7,2>: Cost 2 vext1 <2,u,3,7>, <2,u,3,7>
- 2561697942U, // <u,3,7,3>: Cost 3 vext1 <1,u,3,7>, <3,0,1,2>
- 1487957302U, // <u,3,7,4>: Cost 2 vext1 <1,u,3,7>, RHS
- 2707352311U, // <u,3,7,5>: Cost 3 vext3 <3,7,5,u>, <3,7,5,u>
- 2655024623U, // <u,3,7,6>: Cost 3 vext2 <6,2,u,3>, <7,6,2,u>
- 1592563308U, // <u,3,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
- 1487959854U, // <u,3,7,u>: Cost 2 vext1 <1,u,3,7>, LHS
- 1544787667U, // <u,3,u,0>: Cost 2 vext2 LHS, <u,0,1,2>
- 471045934U, // <u,3,u,1>: Cost 1 vext2 LHS, LHS
- 1549432709U, // <u,3,u,2>: Cost 2 vext2 LHS, <u,2,3,0>
- 336380006U, // <u,3,u,3>: Cost 1 vdup3 LHS
- 1544788031U, // <u,3,u,4>: Cost 2 vext2 LHS, <u,4,5,6>
- 471046298U, // <u,3,u,5>: Cost 1 vext2 LHS, RHS
- 1549433040U, // <u,3,u,6>: Cost 2 vext2 LHS, <u,6,3,7>
- 1750314537U, // <u,3,u,7>: Cost 2 vuzpr LHS, RHS
- 471046501U, // <u,3,u,u>: Cost 1 vext2 LHS, LHS
- 2625167360U, // <u,4,0,0>: Cost 3 vext2 <1,2,u,4>, <0,0,0,0>
- 1551425638U, // <u,4,0,1>: Cost 2 vext2 <1,2,u,4>, LHS
- 2619195630U, // <u,4,0,2>: Cost 3 vext2 <0,2,u,4>, <0,2,u,4>
- 2619343104U, // <u,4,0,3>: Cost 3 vext2 <0,3,1,4>, <0,3,1,4>
- 2625167698U, // <u,4,0,4>: Cost 3 vext2 <1,2,u,4>, <0,4,1,5>
- 1638329234U, // <u,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1>
- 1638329244U, // <u,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2>
- 3787803556U, // <u,4,0,7>: Cost 4 vext3 RHS, <4,0,7,1>
- 1551426205U, // <u,4,0,u>: Cost 2 vext2 <1,2,u,4>, LHS
- 2555748454U, // <u,4,1,0>: Cost 3 vext1 <0,u,4,1>, LHS
- 2625168180U, // <u,4,1,1>: Cost 3 vext2 <1,2,u,4>, <1,1,1,1>
- 1551426503U, // <u,4,1,2>: Cost 2 vext2 <1,2,u,4>, <1,2,u,4>
- 2625168344U, // <u,4,1,3>: Cost 3 vext2 <1,2,u,4>, <1,3,1,3>
- 2555751734U, // <u,4,1,4>: Cost 3 vext1 <0,u,4,1>, RHS
- 1860554038U, // <u,4,1,5>: Cost 2 vzipl LHS, RHS
- 2689879022U, // <u,4,1,6>: Cost 3 vext3 LHS, <4,1,6,3>
- 2592248852U, // <u,4,1,7>: Cost 3 vext1 <7,0,4,1>, <7,0,4,1>
- 1555408301U, // <u,4,1,u>: Cost 2 vext2 <1,u,u,4>, <1,u,u,4>
- 2555756646U, // <u,4,2,0>: Cost 3 vext1 <0,u,4,2>, LHS
- 2625168943U, // <u,4,2,1>: Cost 3 vext2 <1,2,u,4>, <2,1,4,u>
- 2625169000U, // <u,4,2,2>: Cost 3 vext2 <1,2,u,4>, <2,2,2,2>
- 2619197134U, // <u,4,2,3>: Cost 3 vext2 <0,2,u,4>, <2,3,4,5>
- 2555759926U, // <u,4,2,4>: Cost 3 vext1 <0,u,4,2>, RHS
- 2712071222U, // <u,4,2,5>: Cost 3 vext3 RHS, <4,2,5,3>
- 1994771766U, // <u,4,2,6>: Cost 2 vtrnl LHS, RHS
- 2592257045U, // <u,4,2,7>: Cost 3 vext1 <7,0,4,2>, <7,0,4,2>
- 1994771784U, // <u,4,2,u>: Cost 2 vtrnl LHS, RHS
- 2625169558U, // <u,4,3,0>: Cost 3 vext2 <1,2,u,4>, <3,0,1,2>
- 2567709594U, // <u,4,3,1>: Cost 3 vext1 <2,u,4,3>, <1,2,3,4>
- 2567710817U, // <u,4,3,2>: Cost 3 vext1 <2,u,4,3>, <2,u,4,3>
- 2625169820U, // <u,4,3,3>: Cost 3 vext2 <1,2,u,4>, <3,3,3,3>
- 2625169922U, // <u,4,3,4>: Cost 3 vext2 <1,2,u,4>, <3,4,5,6>
- 2954069710U, // <u,4,3,5>: Cost 3 vzipr LHS, <2,3,4,5>
- 2954068172U, // <u,4,3,6>: Cost 3 vzipr LHS, <0,2,4,6>
- 3903849472U, // <u,4,3,7>: Cost 4 vuzpr <1,u,3,4>, <1,3,5,7>
- 2954068174U, // <u,4,3,u>: Cost 3 vzipr LHS, <0,2,4,u>
- 1505919078U, // <u,4,4,0>: Cost 2 vext1 <4,u,4,4>, LHS
- 2567717831U, // <u,4,4,1>: Cost 3 vext1 <2,u,4,4>, <1,2,u,4>
- 2567719010U, // <u,4,4,2>: Cost 3 vext1 <2,u,4,4>, <2,u,4,4>
- 2570373542U, // <u,4,4,3>: Cost 3 vext1 <3,3,4,4>, <3,3,4,4>
- 161926454U, // <u,4,4,4>: Cost 1 vdup0 RHS
- 1551428918U, // <u,4,4,5>: Cost 2 vext2 <1,2,u,4>, RHS
- 1638329572U, // <u,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6>
- 2594927963U, // <u,4,4,7>: Cost 3 vext1 <7,4,4,4>, <7,4,4,4>
- 161926454U, // <u,4,4,u>: Cost 1 vdup0 RHS
- 1493983334U, // <u,4,5,0>: Cost 2 vext1 <2,u,4,5>, LHS
- 2689879301U, // <u,4,5,1>: Cost 3 vext3 LHS, <4,5,1,3>
- 1493985379U, // <u,4,5,2>: Cost 2 vext1 <2,u,4,5>, <2,u,4,5>
- 2567727254U, // <u,4,5,3>: Cost 3 vext1 <2,u,4,5>, <3,0,1,2>
- 1493986614U, // <u,4,5,4>: Cost 2 vext1 <2,u,4,5>, RHS
- 1863535926U, // <u,4,5,5>: Cost 2 vzipl RHS, RHS
- 537750838U, // <u,4,5,6>: Cost 1 vext3 LHS, RHS
- 2830110006U, // <u,4,5,7>: Cost 3 vuzpr <1,u,3,4>, RHS
- 537750856U, // <u,4,5,u>: Cost 1 vext3 LHS, RHS
- 1482047590U, // <u,4,6,0>: Cost 2 vext1 <0,u,4,6>, LHS
- 2555790070U, // <u,4,6,1>: Cost 3 vext1 <0,u,4,6>, <1,0,3,2>
- 2555790952U, // <u,4,6,2>: Cost 3 vext1 <0,u,4,6>, <2,2,2,2>
- 2555791510U, // <u,4,6,3>: Cost 3 vext1 <0,u,4,6>, <3,0,1,2>
- 1482050870U, // <u,4,6,4>: Cost 2 vext1 <0,u,4,6>, RHS
- 2689879422U, // <u,4,6,5>: Cost 3 vext3 LHS, <4,6,5,7>
- 1997753654U, // <u,4,6,6>: Cost 2 vtrnl RHS, RHS
- 2712071562U, // <u,4,6,7>: Cost 3 vext3 RHS, <4,6,7,1>
- 1482053422U, // <u,4,6,u>: Cost 2 vext1 <0,u,4,6>, LHS
- 2567741542U, // <u,4,7,0>: Cost 3 vext1 <2,u,4,7>, LHS
- 2567742362U, // <u,4,7,1>: Cost 3 vext1 <2,u,4,7>, <1,2,3,4>
- 2567743589U, // <u,4,7,2>: Cost 3 vext1 <2,u,4,7>, <2,u,4,7>
- 2573716286U, // <u,4,7,3>: Cost 3 vext1 <3,u,4,7>, <3,u,4,7>
- 2567744822U, // <u,4,7,4>: Cost 3 vext1 <2,u,4,7>, RHS
- 2712071624U, // <u,4,7,5>: Cost 3 vext3 RHS, <4,7,5,0>
- 96808489U, // <u,4,7,6>: Cost 1 vrev RHS
- 2651715180U, // <u,4,7,7>: Cost 3 vext2 <5,6,u,4>, <7,7,7,7>
- 96955963U, // <u,4,7,u>: Cost 1 vrev RHS
- 1482063974U, // <u,4,u,0>: Cost 2 vext1 <0,u,4,u>, LHS
- 1551431470U, // <u,4,u,1>: Cost 2 vext2 <1,2,u,4>, LHS
- 1494009958U, // <u,4,u,2>: Cost 2 vext1 <2,u,4,u>, <2,u,4,u>
- 2555807894U, // <u,4,u,3>: Cost 3 vext1 <0,u,4,u>, <3,0,1,2>
- 161926454U, // <u,4,u,4>: Cost 1 vdup0 RHS
- 1551431834U, // <u,4,u,5>: Cost 2 vext2 <1,2,u,4>, RHS
- 537751081U, // <u,4,u,6>: Cost 1 vext3 LHS, RHS
- 2830110249U, // <u,4,u,7>: Cost 3 vuzpr <1,u,3,4>, RHS
- 537751099U, // <u,4,u,u>: Cost 1 vext3 LHS, RHS
- 2631811072U, // <u,5,0,0>: Cost 3 vext2 <2,3,u,5>, <0,0,0,0>
- 1558069350U, // <u,5,0,1>: Cost 2 vext2 <2,3,u,5>, LHS
- 2619203823U, // <u,5,0,2>: Cost 3 vext2 <0,2,u,5>, <0,2,u,5>
- 2619867456U, // <u,5,0,3>: Cost 3 vext2 <0,3,u,5>, <0,3,u,5>
- 1546273106U, // <u,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5>
- 2733010539U, // <u,5,0,5>: Cost 3 vext3 LHS, <5,0,5,1>
- 2597622682U, // <u,5,0,6>: Cost 3 vext1 <7,u,5,0>, <6,7,u,5>
- 1176539396U, // <u,5,0,7>: Cost 2 vrev <5,u,7,0>
- 1558069917U, // <u,5,0,u>: Cost 2 vext2 <2,3,u,5>, LHS
- 1505968230U, // <u,5,1,0>: Cost 2 vext1 <4,u,5,1>, LHS
- 2624512887U, // <u,5,1,1>: Cost 3 vext2 <1,1,u,5>, <1,1,u,5>
- 2631811990U, // <u,5,1,2>: Cost 3 vext2 <2,3,u,5>, <1,2,3,0>
- 2618541056U, // <u,5,1,3>: Cost 3 vext2 <0,1,u,5>, <1,3,5,7>
- 1505971510U, // <u,5,1,4>: Cost 2 vext1 <4,u,5,1>, RHS
- 2627167419U, // <u,5,1,5>: Cost 3 vext2 <1,5,u,5>, <1,5,u,5>
- 2579714554U, // <u,5,1,6>: Cost 3 vext1 <4,u,5,1>, <6,2,7,3>
- 1638330064U, // <u,5,1,7>: Cost 2 vext3 RHS, <5,1,7,3>
- 1638477529U, // <u,5,1,u>: Cost 2 vext3 RHS, <5,1,u,3>
- 2561802342U, // <u,5,2,0>: Cost 3 vext1 <1,u,5,2>, LHS
- 2561803264U, // <u,5,2,1>: Cost 3 vext1 <1,u,5,2>, <1,3,5,7>
- 2631149217U, // <u,5,2,2>: Cost 3 vext2 <2,2,u,5>, <2,2,u,5>
- 1558071026U, // <u,5,2,3>: Cost 2 vext2 <2,3,u,5>, <2,3,u,5>
- 2561805622U, // <u,5,2,4>: Cost 3 vext1 <1,u,5,2>, RHS
- 2714062607U, // <u,5,2,5>: Cost 3 vext3 RHS, <5,2,5,3>
- 2631813050U, // <u,5,2,6>: Cost 3 vext2 <2,3,u,5>, <2,6,3,7>
- 3092335926U, // <u,5,2,7>: Cost 3 vtrnr <0,u,0,2>, RHS
- 1561389191U, // <u,5,2,u>: Cost 2 vext2 <2,u,u,5>, <2,u,u,5>
- 2561810534U, // <u,5,3,0>: Cost 3 vext1 <1,u,5,3>, LHS
- 2561811857U, // <u,5,3,1>: Cost 3 vext1 <1,u,5,3>, <1,u,5,3>
- 2631813474U, // <u,5,3,2>: Cost 3 vext2 <2,3,u,5>, <3,2,5,u>
- 2631813532U, // <u,5,3,3>: Cost 3 vext2 <2,3,u,5>, <3,3,3,3>
- 2619869698U, // <u,5,3,4>: Cost 3 vext2 <0,3,u,5>, <3,4,5,6>
- 3001847002U, // <u,5,3,5>: Cost 3 vzipr LHS, <4,4,5,5>
- 2954070530U, // <u,5,3,6>: Cost 3 vzipr LHS, <3,4,5,6>
- 2018749750U, // <u,5,3,7>: Cost 2 vtrnr LHS, RHS
- 2018749751U, // <u,5,3,u>: Cost 2 vtrnr LHS, RHS
- 2573762662U, // <u,5,4,0>: Cost 3 vext1 <3,u,5,4>, LHS
- 2620017634U, // <u,5,4,1>: Cost 3 vext2 <0,4,1,5>, <4,1,5,0>
- 2573764338U, // <u,5,4,2>: Cost 3 vext1 <3,u,5,4>, <2,3,u,5>
- 2573765444U, // <u,5,4,3>: Cost 3 vext1 <3,u,5,4>, <3,u,5,4>
- 1570680053U, // <u,5,4,4>: Cost 2 vext2 <4,4,u,5>, <4,4,u,5>
- 1558072630U, // <u,5,4,5>: Cost 2 vext2 <2,3,u,5>, RHS
- 2645749143U, // <u,5,4,6>: Cost 3 vext2 <4,6,u,5>, <4,6,u,5>
- 1638330310U, // <u,5,4,7>: Cost 2 vext3 RHS, <5,4,7,6>
- 1558072873U, // <u,5,4,u>: Cost 2 vext2 <2,3,u,5>, RHS
- 1506000998U, // <u,5,5,0>: Cost 2 vext1 <4,u,5,5>, LHS
- 2561827984U, // <u,5,5,1>: Cost 3 vext1 <1,u,5,5>, <1,5,3,7>
- 2579744360U, // <u,5,5,2>: Cost 3 vext1 <4,u,5,5>, <2,2,2,2>
- 2579744918U, // <u,5,5,3>: Cost 3 vext1 <4,u,5,5>, <3,0,1,2>
- 1506004278U, // <u,5,5,4>: Cost 2 vext1 <4,u,5,5>, RHS
- 229035318U, // <u,5,5,5>: Cost 1 vdup1 RHS
- 2712072206U, // <u,5,5,6>: Cost 3 vext3 RHS, <5,5,6,6>
- 1638330392U, // <u,5,5,7>: Cost 2 vext3 RHS, <5,5,7,7>
- 229035318U, // <u,5,5,u>: Cost 1 vdup1 RHS
- 1500037222U, // <u,5,6,0>: Cost 2 vext1 <3,u,5,6>, LHS
- 2561836436U, // <u,5,6,1>: Cost 3 vext1 <1,u,5,6>, <1,u,5,6>
- 2567809133U, // <u,5,6,2>: Cost 3 vext1 <2,u,5,6>, <2,u,5,6>
- 1500040006U, // <u,5,6,3>: Cost 2 vext1 <3,u,5,6>, <3,u,5,6>
- 1500040502U, // <u,5,6,4>: Cost 2 vext1 <3,u,5,6>, RHS
- 2714062935U, // <u,5,6,5>: Cost 3 vext3 RHS, <5,6,5,7>
- 2712072288U, // <u,5,6,6>: Cost 3 vext3 RHS, <5,6,6,7>
- 27705344U, // <u,5,6,7>: Cost 0 copy RHS
- 27705344U, // <u,5,6,u>: Cost 0 copy RHS
- 1488101478U, // <u,5,7,0>: Cost 2 vext1 <1,u,5,7>, LHS
- 1488102805U, // <u,5,7,1>: Cost 2 vext1 <1,u,5,7>, <1,u,5,7>
- 2561844840U, // <u,5,7,2>: Cost 3 vext1 <1,u,5,7>, <2,2,2,2>
- 2561845398U, // <u,5,7,3>: Cost 3 vext1 <1,u,5,7>, <3,0,1,2>
- 1488104758U, // <u,5,7,4>: Cost 2 vext1 <1,u,5,7>, RHS
- 1638330536U, // <u,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7>
- 2712072362U, // <u,5,7,6>: Cost 3 vext3 RHS, <5,7,6,0>
- 2042965302U, // <u,5,7,7>: Cost 2 vtrnr RHS, RHS
- 1488107310U, // <u,5,7,u>: Cost 2 vext1 <1,u,5,7>, LHS
- 1488109670U, // <u,5,u,0>: Cost 2 vext1 <1,u,5,u>, LHS
- 1488110998U, // <u,5,u,1>: Cost 2 vext1 <1,u,5,u>, <1,u,5,u>
- 2561853032U, // <u,5,u,2>: Cost 3 vext1 <1,u,5,u>, <2,2,2,2>
- 1500056392U, // <u,5,u,3>: Cost 2 vext1 <3,u,5,u>, <3,u,5,u>
- 1488112950U, // <u,5,u,4>: Cost 2 vext1 <1,u,5,u>, RHS
- 229035318U, // <u,5,u,5>: Cost 1 vdup1 RHS
- 2954111490U, // <u,5,u,6>: Cost 3 vzipr LHS, <3,4,5,6>
- 27705344U, // <u,5,u,7>: Cost 0 copy RHS
- 27705344U, // <u,5,u,u>: Cost 0 copy RHS
- 2619211776U, // <u,6,0,0>: Cost 3 vext2 <0,2,u,6>, <0,0,0,0>
- 1545470054U, // <u,6,0,1>: Cost 2 vext2 <0,2,u,6>, LHS
- 1545470192U, // <u,6,0,2>: Cost 2 vext2 <0,2,u,6>, <0,2,u,6>
- 2255958969U, // <u,6,0,3>: Cost 3 vrev <6,u,3,0>
- 1546797458U, // <u,6,0,4>: Cost 2 vext2 <0,4,u,6>, <0,4,u,6>
- 2720624971U, // <u,6,0,5>: Cost 3 vext3 <6,0,5,u>, <6,0,5,u>
- 2256180180U, // <u,6,0,6>: Cost 3 vrev <6,u,6,0>
- 2960682294U, // <u,6,0,7>: Cost 3 vzipr <1,2,u,0>, RHS
- 1545470621U, // <u,6,0,u>: Cost 2 vext2 <0,2,u,6>, LHS
- 1182004127U, // <u,6,1,0>: Cost 2 vrev <6,u,0,1>
- 2619212596U, // <u,6,1,1>: Cost 3 vext2 <0,2,u,6>, <1,1,1,1>
- 2619212694U, // <u,6,1,2>: Cost 3 vext2 <0,2,u,6>, <1,2,3,0>
- 2619212760U, // <u,6,1,3>: Cost 3 vext2 <0,2,u,6>, <1,3,1,3>
- 2626511979U, // <u,6,1,4>: Cost 3 vext2 <1,4,u,6>, <1,4,u,6>
- 2619212944U, // <u,6,1,5>: Cost 3 vext2 <0,2,u,6>, <1,5,3,7>
- 2714063264U, // <u,6,1,6>: Cost 3 vext3 RHS, <6,1,6,3>
- 2967326006U, // <u,6,1,7>: Cost 3 vzipr <2,3,u,1>, RHS
- 1182594023U, // <u,6,1,u>: Cost 2 vrev <6,u,u,1>
- 1506050150U, // <u,6,2,0>: Cost 2 vext1 <4,u,6,2>, LHS
- 2579792630U, // <u,6,2,1>: Cost 3 vext1 <4,u,6,2>, <1,0,3,2>
- 2619213416U, // <u,6,2,2>: Cost 3 vext2 <0,2,u,6>, <2,2,2,2>
- 2619213478U, // <u,6,2,3>: Cost 3 vext2 <0,2,u,6>, <2,3,0,1>
- 1506053430U, // <u,6,2,4>: Cost 2 vext1 <4,u,6,2>, RHS
- 2633148309U, // <u,6,2,5>: Cost 3 vext2 <2,5,u,6>, <2,5,u,6>
- 2619213754U, // <u,6,2,6>: Cost 3 vext2 <0,2,u,6>, <2,6,3,7>
- 1638330874U, // <u,6,2,7>: Cost 2 vext3 RHS, <6,2,7,3>
- 1638478339U, // <u,6,2,u>: Cost 2 vext3 RHS, <6,2,u,3>
- 2619213974U, // <u,6,3,0>: Cost 3 vext2 <0,2,u,6>, <3,0,1,2>
- 2255836074U, // <u,6,3,1>: Cost 3 vrev <6,u,1,3>
- 2255909811U, // <u,6,3,2>: Cost 3 vrev <6,u,2,3>
- 2619214236U, // <u,6,3,3>: Cost 3 vext2 <0,2,u,6>, <3,3,3,3>
- 1564715549U, // <u,6,3,4>: Cost 2 vext2 <3,4,u,6>, <3,4,u,6>
- 2639121006U, // <u,6,3,5>: Cost 3 vext2 <3,5,u,6>, <3,5,u,6>
- 3001847012U, // <u,6,3,6>: Cost 3 vzipr LHS, <4,4,6,6>
- 1880329526U, // <u,6,3,7>: Cost 2 vzipr LHS, RHS
- 1880329527U, // <u,6,3,u>: Cost 2 vzipr LHS, RHS
- 2567864422U, // <u,6,4,0>: Cost 3 vext1 <2,u,6,4>, LHS
- 2733011558U, // <u,6,4,1>: Cost 3 vext3 LHS, <6,4,1,3>
- 2567866484U, // <u,6,4,2>: Cost 3 vext1 <2,u,6,4>, <2,u,6,4>
- 2638458005U, // <u,6,4,3>: Cost 3 vext2 <3,4,u,6>, <4,3,6,u>
- 1570540772U, // <u,6,4,4>: Cost 2 vext2 <4,4,6,6>, <4,4,6,6>
- 1545473334U, // <u,6,4,5>: Cost 2 vext2 <0,2,u,6>, RHS
- 1572015512U, // <u,6,4,6>: Cost 2 vext2 <4,6,u,6>, <4,6,u,6>
- 2960715062U, // <u,6,4,7>: Cost 3 vzipr <1,2,u,4>, RHS
- 1545473577U, // <u,6,4,u>: Cost 2 vext2 <0,2,u,6>, RHS
- 2567872614U, // <u,6,5,0>: Cost 3 vext1 <2,u,6,5>, LHS
- 2645757648U, // <u,6,5,1>: Cost 3 vext2 <4,6,u,6>, <5,1,7,3>
- 2567874490U, // <u,6,5,2>: Cost 3 vext1 <2,u,6,5>, <2,6,3,7>
- 2576501250U, // <u,6,5,3>: Cost 3 vext1 <4,3,6,5>, <3,4,5,6>
- 1576660943U, // <u,6,5,4>: Cost 2 vext2 <5,4,u,6>, <5,4,u,6>
- 2645757956U, // <u,6,5,5>: Cost 3 vext2 <4,6,u,6>, <5,5,5,5>
- 2645758050U, // <u,6,5,6>: Cost 3 vext2 <4,6,u,6>, <5,6,7,0>
- 2824080694U, // <u,6,5,7>: Cost 3 vuzpr <0,u,2,6>, RHS
- 1182626795U, // <u,6,5,u>: Cost 2 vrev <6,u,u,5>
- 1506082918U, // <u,6,6,0>: Cost 2 vext1 <4,u,6,6>, LHS
- 2579825398U, // <u,6,6,1>: Cost 3 vext1 <4,u,6,6>, <1,0,3,2>
- 2645758458U, // <u,6,6,2>: Cost 3 vext2 <4,6,u,6>, <6,2,7,3>
- 2579826838U, // <u,6,6,3>: Cost 3 vext1 <4,u,6,6>, <3,0,1,2>
- 1506086198U, // <u,6,6,4>: Cost 2 vext1 <4,u,6,6>, RHS
- 2579828432U, // <u,6,6,5>: Cost 3 vext1 <4,u,6,6>, <5,1,7,3>
- 296144182U, // <u,6,6,6>: Cost 1 vdup2 RHS
- 1638331202U, // <u,6,6,7>: Cost 2 vext3 RHS, <6,6,7,7>
- 296144182U, // <u,6,6,u>: Cost 1 vdup2 RHS
- 432349286U, // <u,6,7,0>: Cost 1 vext1 RHS, LHS
- 1506091766U, // <u,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2>
- 1506092648U, // <u,6,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
- 1506093206U, // <u,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2>
- 432352809U, // <u,6,7,4>: Cost 1 vext1 RHS, RHS
- 1506094800U, // <u,6,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
- 1506095610U, // <u,6,7,6>: Cost 2 vext1 RHS, <6,2,7,3>
- 1906904374U, // <u,6,7,7>: Cost 2 vzipr RHS, RHS
- 432355118U, // <u,6,7,u>: Cost 1 vext1 RHS, LHS
- 432357478U, // <u,6,u,0>: Cost 1 vext1 RHS, LHS
- 1545475886U, // <u,6,u,1>: Cost 2 vext2 <0,2,u,6>, LHS
- 1506100840U, // <u,6,u,2>: Cost 2 vext1 RHS, <2,2,2,2>
- 1506101398U, // <u,6,u,3>: Cost 2 vext1 RHS, <3,0,1,2>
- 432361002U, // <u,6,u,4>: Cost 1 vext1 RHS, RHS
- 1545476250U, // <u,6,u,5>: Cost 2 vext2 <0,2,u,6>, RHS
- 296144182U, // <u,6,u,6>: Cost 1 vdup2 RHS
- 1880370486U, // <u,6,u,7>: Cost 2 vzipr LHS, RHS
- 432363310U, // <u,6,u,u>: Cost 1 vext1 RHS, LHS
- 1571356672U, // <u,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
- 497614950U, // <u,7,0,1>: Cost 1 vext2 RHS, LHS
- 1571356836U, // <u,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
- 2573880146U, // <u,7,0,3>: Cost 3 vext1 <3,u,7,0>, <3,u,7,0>
- 1571357010U, // <u,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
- 1512083716U, // <u,7,0,5>: Cost 2 vext1 <5,u,7,0>, <5,u,7,0>
- 2621874741U, // <u,7,0,6>: Cost 3 vext2 <0,6,u,7>, <0,6,u,7>
- 2585826298U, // <u,7,0,7>: Cost 3 vext1 <5,u,7,0>, <7,0,1,2>
- 497615517U, // <u,7,0,u>: Cost 1 vext2 RHS, LHS
- 1571357430U, // <u,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
- 1571357492U, // <u,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
- 1571357590U, // <u,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0>
- 1552114715U, // <u,7,1,3>: Cost 2 vext2 <1,3,u,7>, <1,3,u,7>
- 2573888822U, // <u,7,1,4>: Cost 3 vext1 <3,u,7,1>, RHS
- 1553441981U, // <u,7,1,5>: Cost 2 vext2 <1,5,u,7>, <1,5,u,7>
- 2627847438U, // <u,7,1,6>: Cost 3 vext2 <1,6,u,7>, <1,6,u,7>
- 2727408775U, // <u,7,1,7>: Cost 3 vext3 <7,1,7,u>, <7,1,7,u>
- 1555432880U, // <u,7,1,u>: Cost 2 vext2 <1,u,u,7>, <1,u,u,7>
- 2629838337U, // <u,7,2,0>: Cost 3 vext2 <2,0,u,7>, <2,0,u,7>
- 1188058754U, // <u,7,2,1>: Cost 2 vrev <7,u,1,2>
- 1571358312U, // <u,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
- 1571358374U, // <u,7,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
- 2632492869U, // <u,7,2,4>: Cost 3 vext2 <2,4,u,7>, <2,4,u,7>
- 2633156502U, // <u,7,2,5>: Cost 3 vext2 <2,5,u,7>, <2,5,u,7>
- 1560078311U, // <u,7,2,6>: Cost 2 vext2 <2,6,u,7>, <2,6,u,7>
- 2728072408U, // <u,7,2,7>: Cost 3 vext3 <7,2,7,u>, <7,2,7,u>
- 1561405577U, // <u,7,2,u>: Cost 2 vext2 <2,u,u,7>, <2,u,u,7>
- 1571358870U, // <u,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
- 2627184913U, // <u,7,3,1>: Cost 3 vext2 <1,5,u,7>, <3,1,5,u>
- 2633820523U, // <u,7,3,2>: Cost 3 vext2 <2,6,u,7>, <3,2,6,u>
- 1571359132U, // <u,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
- 1571359234U, // <u,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
- 1512108295U, // <u,7,3,5>: Cost 2 vext1 <5,u,7,3>, <5,u,7,3>
- 1518080992U, // <u,7,3,6>: Cost 2 vext1 <6,u,7,3>, <6,u,7,3>
- 2640456465U, // <u,7,3,7>: Cost 3 vext2 <3,7,u,7>, <3,7,u,7>
- 1571359518U, // <u,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
- 1571359634U, // <u,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
- 2573911067U, // <u,7,4,1>: Cost 3 vext1 <3,u,7,4>, <1,3,u,7>
- 2645101622U, // <u,7,4,2>: Cost 3 vext2 RHS, <4,2,5,3>
- 2573912918U, // <u,7,4,3>: Cost 3 vext1 <3,u,7,4>, <3,u,7,4>
- 1571359952U, // <u,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
- 497618248U, // <u,7,4,5>: Cost 1 vext2 RHS, RHS
- 1571360116U, // <u,7,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
- 2645102024U, // <u,7,4,7>: Cost 3 vext2 RHS, <4,7,5,0>
- 497618473U, // <u,7,4,u>: Cost 1 vext2 RHS, RHS
- 2645102152U, // <u,7,5,0>: Cost 3 vext2 RHS, <5,0,1,2>
- 1571360464U, // <u,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
- 2645102334U, // <u,7,5,2>: Cost 3 vext2 RHS, <5,2,3,4>
- 2645102447U, // <u,7,5,3>: Cost 3 vext2 RHS, <5,3,7,0>
- 1571360710U, // <u,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
- 1571360772U, // <u,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
- 1571360866U, // <u,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0>
- 1571360936U, // <u,7,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
- 1571361017U, // <u,7,5,u>: Cost 2 vext2 RHS, <5,u,5,7>
- 1530044518U, // <u,7,6,0>: Cost 2 vext1 <u,u,7,6>, LHS
- 2645103016U, // <u,7,6,1>: Cost 3 vext2 RHS, <6,1,7,2>
- 1571361274U, // <u,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
- 2645103154U, // <u,7,6,3>: Cost 3 vext2 RHS, <6,3,4,5>
- 1530047798U, // <u,7,6,4>: Cost 2 vext1 <u,u,7,6>, RHS
- 1188386474U, // <u,7,6,5>: Cost 2 vrev <7,u,5,6>
- 1571361592U, // <u,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6>
- 1571361614U, // <u,7,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
- 1571361695U, // <u,7,6,u>: Cost 2 vext2 RHS, <6,u,0,1>
- 1571361786U, // <u,7,7,0>: Cost 2 vext2 RHS, <7,0,1,2>
- 2573935616U, // <u,7,7,1>: Cost 3 vext1 <3,u,7,7>, <1,3,5,7>
- 2645103781U, // <u,7,7,2>: Cost 3 vext2 RHS, <7,2,2,2>
- 2573937497U, // <u,7,7,3>: Cost 3 vext1 <3,u,7,7>, <3,u,7,7>
- 1571362150U, // <u,7,7,4>: Cost 2 vext2 RHS, <7,4,5,6>
- 1512141067U, // <u,7,7,5>: Cost 2 vext1 <5,u,7,7>, <5,u,7,7>
- 1518113764U, // <u,7,7,6>: Cost 2 vext1 <6,u,7,7>, <6,u,7,7>
- 363253046U, // <u,7,7,7>: Cost 1 vdup3 RHS
- 363253046U, // <u,7,7,u>: Cost 1 vdup3 RHS
- 1571362515U, // <u,7,u,0>: Cost 2 vext2 RHS, <u,0,1,2>
- 497620782U, // <u,7,u,1>: Cost 1 vext2 RHS, LHS
- 1571362693U, // <u,7,u,2>: Cost 2 vext2 RHS, <u,2,3,0>
- 1571362748U, // <u,7,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
- 1571362879U, // <u,7,u,4>: Cost 2 vext2 RHS, <u,4,5,6>
- 497621146U, // <u,7,u,5>: Cost 1 vext2 RHS, RHS
- 1571363024U, // <u,7,u,6>: Cost 2 vext2 RHS, <u,6,3,7>
- 363253046U, // <u,7,u,7>: Cost 1 vdup3 RHS
- 497621349U, // <u,7,u,u>: Cost 1 vext2 RHS, LHS
- 135053414U, // <u,u,0,0>: Cost 1 vdup0 LHS
- 471081121U, // <u,u,0,1>: Cost 1 vext2 LHS, LHS
- 1544822948U, // <u,u,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
- 1616140005U, // <u,u,0,3>: Cost 2 vext3 LHS, <u,0,3,2>
- 1544823122U, // <u,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
- 1512157453U, // <u,u,0,5>: Cost 2 vext1 <5,u,u,0>, <5,u,u,0>
- 1662220032U, // <u,u,0,6>: Cost 2 vext3 RHS, <u,0,6,2>
- 1194457487U, // <u,u,0,7>: Cost 2 vrev <u,u,7,0>
- 471081629U, // <u,u,0,u>: Cost 1 vext2 LHS, LHS
- 1544823542U, // <u,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
- 202162278U, // <u,u,1,1>: Cost 1 vdup1 LHS
- 537753390U, // <u,u,1,2>: Cost 1 vext3 LHS, LHS
- 1544823768U, // <u,u,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
- 1494248758U, // <u,u,1,4>: Cost 2 vext1 <2,u,u,1>, RHS
- 1544823952U, // <u,u,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
- 1518138343U, // <u,u,1,6>: Cost 2 vext1 <6,u,u,1>, <6,u,u,1>
- 1640322907U, // <u,u,1,7>: Cost 2 vext3 RHS, <u,1,7,3>
- 537753444U, // <u,u,1,u>: Cost 1 vext3 LHS, LHS
- 1482309734U, // <u,u,2,0>: Cost 2 vext1 <0,u,u,2>, LHS
- 1194031451U, // <u,u,2,1>: Cost 2 vrev <u,u,1,2>
- 269271142U, // <u,u,2,2>: Cost 1 vdup2 LHS
- 835584U, // <u,u,2,3>: Cost 0 copy LHS
- 1482313014U, // <u,u,2,4>: Cost 2 vext1 <0,u,u,2>, RHS
- 2618566504U, // <u,u,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
- 1544824762U, // <u,u,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
- 1638479788U, // <u,u,2,7>: Cost 2 vext3 RHS, <u,2,7,3>
- 835584U, // <u,u,2,u>: Cost 0 copy LHS
- 408576723U, // <u,u,3,0>: Cost 1 vext1 LHS, LHS
- 1482318582U, // <u,u,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
- 120371557U, // <u,u,3,2>: Cost 1 vrev LHS
- 336380006U, // <u,u,3,3>: Cost 1 vdup3 LHS
- 408579382U, // <u,u,3,4>: Cost 1 vext1 LHS, RHS
- 1616140271U, // <u,u,3,5>: Cost 2 vext3 LHS, <u,3,5,7>
- 1530098170U, // <u,u,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
- 1880329544U, // <u,u,3,7>: Cost 2 vzipr LHS, RHS
- 408581934U, // <u,u,3,u>: Cost 1 vext1 LHS, LHS
- 1488298086U, // <u,u,4,0>: Cost 2 vext1 <1,u,u,4>, LHS
- 1488299437U, // <u,u,4,1>: Cost 2 vext1 <1,u,u,4>, <1,u,u,4>
- 1659271204U, // <u,u,4,2>: Cost 2 vext3 LHS, <u,4,2,6>
- 1194195311U, // <u,u,4,3>: Cost 2 vrev <u,u,3,4>
- 161926454U, // <u,u,4,4>: Cost 1 vdup0 RHS
- 471084342U, // <u,u,4,5>: Cost 1 vext2 LHS, RHS
- 1571368308U, // <u,u,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
- 1640323153U, // <u,u,4,7>: Cost 2 vext3 RHS, <u,4,7,6>
- 471084585U, // <u,u,4,u>: Cost 1 vext2 LHS, RHS
- 1494278246U, // <u,u,5,0>: Cost 2 vext1 <2,u,u,5>, LHS
- 1571368656U, // <u,u,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
- 1494280327U, // <u,u,5,2>: Cost 2 vext1 <2,u,u,5>, <2,u,u,5>
- 1616140415U, // <u,u,5,3>: Cost 2 vext3 LHS, <u,5,3,7>
- 1494281526U, // <u,u,5,4>: Cost 2 vext1 <2,u,u,5>, RHS
- 229035318U, // <u,u,5,5>: Cost 1 vdup1 RHS
- 537753754U, // <u,u,5,6>: Cost 1 vext3 LHS, RHS
- 1750355254U, // <u,u,5,7>: Cost 2 vuzpr LHS, RHS
- 537753772U, // <u,u,5,u>: Cost 1 vext3 LHS, RHS
- 1482342502U, // <u,u,6,0>: Cost 2 vext1 <0,u,u,6>, LHS
- 2556084982U, // <u,u,6,1>: Cost 3 vext1 <0,u,u,6>, <1,0,3,2>
- 1571369466U, // <u,u,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
- 1611938000U, // <u,u,6,3>: Cost 2 vext3 LHS, <u,6,3,7>
- 1482345782U, // <u,u,6,4>: Cost 2 vext1 <0,u,u,6>, RHS
- 1194359171U, // <u,u,6,5>: Cost 2 vrev <u,u,5,6>
- 296144182U, // <u,u,6,6>: Cost 1 vdup2 RHS
- 27705344U, // <u,u,6,7>: Cost 0 copy RHS
- 27705344U, // <u,u,6,u>: Cost 0 copy RHS
- 432496742U, // <u,u,7,0>: Cost 1 vext1 RHS, LHS
- 1488324016U, // <u,u,7,1>: Cost 2 vext1 <1,u,u,7>, <1,u,u,7>
- 1494296713U, // <u,u,7,2>: Cost 2 vext1 <2,u,u,7>, <2,u,u,7>
- 1906901148U, // <u,u,7,3>: Cost 2 vzipr RHS, LHS
- 432500283U, // <u,u,7,4>: Cost 1 vext1 RHS, RHS
- 1506242256U, // <u,u,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
- 120699277U, // <u,u,7,6>: Cost 1 vrev RHS
- 363253046U, // <u,u,7,7>: Cost 1 vdup3 RHS
- 432502574U, // <u,u,7,u>: Cost 1 vext1 RHS, LHS
- 408617688U, // <u,u,u,0>: Cost 1 vext1 LHS, LHS
- 471086894U, // <u,u,u,1>: Cost 1 vext2 LHS, LHS
- 537753957U, // <u,u,u,2>: Cost 1 vext3 LHS, LHS
- 835584U, // <u,u,u,3>: Cost 0 copy LHS
- 408620342U, // <u,u,u,4>: Cost 1 vext1 LHS, RHS
- 471087258U, // <u,u,u,5>: Cost 1 vext2 LHS, RHS
- 537753997U, // <u,u,u,6>: Cost 1 vext3 LHS, RHS
- 27705344U, // <u,u,u,7>: Cost 0 copy RHS
- 835584U, // <u,u,u,u>: Cost 0 copy LHS
- 0
-};
+static const unsigned PerfectShuffleTable[6561 + 1] = {
+ 135053414U, // <0,0,0,0>: Cost 1 vdup0 LHS
+ 2080972802U, // <0,0,0,1>: Cost 2 ins <0,0,u,1>, lane 2
+ 1679065190U, // <0,0,0,2>: Cost 2 vuzpl <0,2,0,2>, LHS
+ 2085707777U, // <0,0,0,3>: Cost 2 ins <0,u,0,3>, lane 1
+ 1476398390U, // <0,0,0,4>: Cost 2 vext1 <0,0,0,0>, RHS
+ 2080440323U, // <0,0,0,5>: Cost 2 ins <0,0,0,u>, lane 3
+ 2080440323U, // <0,0,0,6>: Cost 2 ins <0,0,0,u>, lane 3
+ 2080440323U, // <0,0,0,7>: Cost 2 ins <0,0,0,u>, lane 3
+ 135053414U, // <0,0,0,u>: Cost 1 vdup0 LHS
+ 1812774912U, // <0,0,1,0>: Cost 2 vzipl LHS, <0,0,0,0>
+ 739033190U, // <0,0,1,1>: Cost 1 vzipl LHS, LHS
+ 1812775076U, // <0,0,1,2>: Cost 2 vzipl LHS, <0,2,0,2>
+ 2080514051U, // <0,0,1,3>: Cost 2 ins <0,0,1,u>, lane 3
+ 1812816210U, // <0,0,1,4>: Cost 2 vzipl LHS, <0,4,1,5>
+ 2085797889U, // <0,0,1,5>: Cost 2 ins <0,u,1,5>, lane 1
+ 2080514051U, // <0,0,1,6>: Cost 2 ins <0,0,1,u>, lane 3
+ 2080514051U, // <0,0,1,7>: Cost 2 ins <0,0,1,u>, lane 3
+ 739033757U, // <0,0,1,u>: Cost 1 vzipl LHS, LHS
+ 1946992640U, // <0,0,2,0>: Cost 2 vtrnl LHS, <0,0,0,0>
+ 1946992650U, // <0,0,2,1>: Cost 2 vtrnl LHS, <0,0,1,1>
+ 873250918U, // <0,0,2,2>: Cost 1 vtrnl LHS, LHS
+ 1012113409U, // <0,0,2,3>: Cost 1 ins LHS, lane 1
+ 1946992844U, // <0,0,2,4>: Cost 2 vtrnl LHS, <0,2,4,6>
+ 2080587779U, // <0,0,2,5>: Cost 2 ins <0,0,2,u>, lane 3
+ 2085879809U, // <0,0,2,6>: Cost 2 ins <0,u,2,6>, lane 1
+ 2080587779U, // <0,0,2,7>: Cost 2 ins <0,0,2,u>, lane 3
+ 873250972U, // <0,0,2,u>: Cost 1 vtrnl LHS, LHS
+ 2080964610U, // <0,0,3,0>: Cost 2 ins <0,0,u,0>, lane 2
+ 2080972802U, // <0,0,3,1>: Cost 2 ins <0,0,u,1>, lane 2
+ 2128388096U, // <0,0,3,2>: Cost 2 ins <u,0,3,2>, lane 0
+ 2013437973U, // <0,0,3,3>: Cost 2 vtrnr <0,0,2,3>, <0,0,2,3>
+ 3154739202U, // <0,0,3,4>: Cost 3 ins <0,0,u,4>, lane 2
+ 2752809474U, // <0,0,3,5>: Cost 3 vuzpl <0,2,0,2>, <3,4,5,6>
+ 3154755586U, // <0,0,3,6>: Cost 3 ins <0,0,u,6>, lane 2
+ 2818573312U, // <0,0,3,7>: Cost 3 vuzpr <0,0,0,0>, <1,3,5,7>
+ 2080972802U, // <0,0,3,u>: Cost 2 ins <0,0,u,1>, lane 2
+ 2080964610U, // <0,0,4,0>: Cost 2 ins <0,0,u,0>, lane 2
+ 1814708326U, // <0,0,4,1>: Cost 2 vzipl <0,4,1,5>, LHS
+ 1947828326U, // <0,0,4,2>: Cost 2 vtrnl <0,2,4,6>, LHS
+ 2086002689U, // <0,0,4,3>: Cost 2 ins <0,u,4,3>, lane 1
+ 1947828428U, // <0,0,4,4>: Cost 2 vtrnl <0,2,4,6>, <0,2,4,6>
+ 2081030149U, // <0,0,4,5>: Cost 2 ins <0,0,u,u>, lane 5
+ 1679068470U, // <0,0,4,6>: Cost 2 vuzpl <0,2,0,2>, RHS
+ 3154477059U, // <0,0,4,7>: Cost 3 ins <0,0,4,u>, lane 3
+ 1679068488U, // <0,0,4,u>: Cost 2 vuzpl <0,2,0,2>, RHS
+ 2080964610U, // <0,0,5,0>: Cost 2 ins <0,0,u,0>, lane 2
+ 2128527360U, // <0,0,5,1>: Cost 2 ins <u,0,5,1>, lane 0
+ 2080980994U, // <0,0,5,2>: Cost 2 ins <0,0,u,2>, lane 2
+ 2086076417U, // <0,0,5,3>: Cost 2 ins <0,u,5,3>, lane 1
+ 3202293760U, // <0,0,5,4>: Cost 3 ins <u,0,5,4>, lane 0
+ 1947213953U, // <0,0,5,5>: Cost 2 vtrnl <0,1,5,3>, <0,1,5,3>
+ 2718712274U, // <0,0,5,6>: Cost 3 vext3 <5,6,7,0>, <0,5,6,7>
+ 1744833846U, // <0,0,5,7>: Cost 2 vuzpr <0,0,0,0>, RHS
+ 2128527360U, // <0,0,5,u>: Cost 2 ins <u,0,5,1>, lane 0
+ 2080964610U, // <0,0,6,0>: Cost 2 ins <0,0,u,0>, lane 2
+ 2080972802U, // <0,0,6,1>: Cost 2 ins <0,0,u,1>, lane 2
+ 2128609280U, // <0,0,6,2>: Cost 2 ins <u,0,6,2>, lane 0
+ 2086150145U, // <0,0,6,3>: Cost 2 ins <0,u,6,3>, lane 1
+ 3202367488U, // <0,0,6,4>: Cost 3 ins <u,0,6,4>, lane 0
+ 2617250536U, // <0,0,6,5>: Cost 3 vext2 <0,0,0,0>, <6,5,6,7>
+ 1947287690U, // <0,0,6,6>: Cost 2 vtrnl <0,1,6,3>, <0,1,6,3>
+ 2081030149U, // <0,0,6,7>: Cost 2 ins <0,0,u,u>, lane 5
+ 2080972802U, // <0,0,6,u>: Cost 2 ins <0,0,u,1>, lane 2
+ 2080964610U, // <0,0,7,0>: Cost 2 ins <0,0,u,0>, lane 2
+ 2080972802U, // <0,0,7,1>: Cost 2 ins <0,0,u,1>, lane 2
+ 2080980994U, // <0,0,7,2>: Cost 2 ins <0,0,u,2>, lane 2
+ 2086223873U, // <0,0,7,3>: Cost 2 ins <0,u,7,3>, lane 1
+ 3154739202U, // <0,0,7,4>: Cost 3 ins <0,0,u,4>, lane 2
+ 2617251265U, // <0,0,7,5>: Cost 3 vext2 <0,0,0,0>, <7,5,6,7>
+ 3154755586U, // <0,0,7,6>: Cost 3 ins <0,0,u,6>, lane 2
+ 1947361427U, // <0,0,7,7>: Cost 2 vtrnl <0,1,7,3>, <0,1,7,3>
+ 2080972802U, // <0,0,7,u>: Cost 2 ins <0,0,u,1>, lane 2
+ 135053414U, // <0,0,u,0>: Cost 1 vdup0 LHS
+ 743678054U, // <0,0,u,1>: Cost 1 vzipl LHS, LHS
+ 873693286U, // <0,0,u,2>: Cost 1 vtrnl LHS, LHS
+ 1012113409U, // <0,0,u,3>: Cost 1 ins LHS, lane 1
+ 1947435212U, // <0,0,u,4>: Cost 2 vtrnl LHS, <0,2,4,6>
+ 2085797889U, // <0,0,u,5>: Cost 2 ins <0,u,1,5>, lane 1
+ 1679071386U, // <0,0,u,6>: Cost 2 vuzpl <0,2,0,2>, RHS
+ 2080514051U, // <0,0,u,7>: Cost 2 ins <0,0,1,u>, lane 3
+ 873693340U, // <0,0,u,u>: Cost 1 vtrnl LHS, LHS
+ 2085683201U, // <0,1,0,0>: Cost 2 ins <0,u,0,0>, lane 1
+ 1007951877U, // <0,1,0,1>: Cost 1 ins LHS, lane 5
+ 1680490598U, // <0,1,0,2>: Cost 2 vuzpl <0,4,1,5>, LHS
+ 1007910914U, // <0,1,0,3>: Cost 1 ins LHS, lane 2
+ 2081660930U, // <0,1,0,4>: Cost 2 ins <0,1,u,4>, lane 2
+ 2081669122U, // <0,1,0,5>: Cost 2 ins <0,1,u,5>, lane 2
+ 2081677314U, // <0,1,0,6>: Cost 2 ins <0,1,u,6>, lane 2
+ 2081685506U, // <0,1,0,7>: Cost 2 ins <0,1,u,7>, lane 2
+ 1007951877U, // <0,1,0,u>: Cost 1 ins LHS, lane 5
+ 1812775670U, // <0,1,1,0>: Cost 2 vzipl LHS, <1,0,3,2>
+ 1812775732U, // <0,1,1,1>: Cost 2 vzipl LHS, <1,1,1,1>
+ 1812775830U, // <0,1,1,2>: Cost 2 vzipl LHS, <1,2,3,0>
+ 1007910914U, // <0,1,1,3>: Cost 1 ins LHS, lane 2
+ 1476480310U, // <0,1,1,4>: Cost 2 vext1 <0,0,1,1>, RHS
+ 1812817040U, // <0,1,1,5>: Cost 2 vzipl LHS, <1,5,3,7>
+ 2081677314U, // <0,1,1,6>: Cost 2 ins <0,1,u,6>, lane 2
+ 2081685506U, // <0,1,1,7>: Cost 2 ins <0,1,u,7>, lane 2
+ 1007910914U, // <0,1,1,u>: Cost 1 ins LHS, lane 2
+ 1007509507U, // <0,1,2,0>: Cost 1 ins LHS, lane 3
+ 1007509507U, // <0,1,2,1>: Cost 1 ins LHS, lane 3
+ 1007509507U, // <0,1,2,2>: Cost 1 ins LHS, lane 3
+ 835584U, // <0,1,2,3>: Cost 0 copy LHS
+ 1007509507U, // <0,1,2,4>: Cost 1 ins LHS, lane 3
+ 1007509507U, // <0,1,2,5>: Cost 1 ins LHS, lane 3
+ 1007509507U, // <0,1,2,6>: Cost 1 ins LHS, lane 3
+ 1007509507U, // <0,1,2,7>: Cost 1 ins LHS, lane 3
+ 835584U, // <0,1,2,u>: Cost 0 copy LHS
+ 2133680132U, // <0,1,3,0>: Cost 2 ins <u,u,3,0>, lane 4
+ 2081636354U, // <0,1,3,1>: Cost 2 ins <0,1,u,1>, lane 2
+ 2133696516U, // <0,1,3,2>: Cost 2 ins <u,u,3,2>, lane 4
+ 1007910914U, // <0,1,3,3>: Cost 1 ins LHS, lane 2
+ 2133712900U, // <0,1,3,4>: Cost 2 ins <u,u,3,4>, lane 4
+ 2081669122U, // <0,1,3,5>: Cost 2 ins <0,1,u,5>, lane 2
+ 2081677314U, // <0,1,3,6>: Cost 2 ins <0,1,u,6>, lane 2
+ 2133737476U, // <0,1,3,7>: Cost 2 ins <u,u,3,7>, lane 4
+ 1007910914U, // <0,1,3,u>: Cost 1 ins LHS, lane 2
+ 2081628162U, // <0,1,4,0>: Cost 2 ins <0,1,u,0>, lane 2
+ 2081636354U, // <0,1,4,1>: Cost 2 ins <0,1,u,1>, lane 2
+ 2081644546U, // <0,1,4,2>: Cost 2 ins <0,1,u,2>, lane 2
+ 1007910914U, // <0,1,4,3>: Cost 1 ins LHS, lane 2
+ 2081660930U, // <0,1,4,4>: Cost 2 ins <0,1,u,4>, lane 2
+ 1007951877U, // <0,1,4,5>: Cost 1 ins LHS, lane 5
+ 1680493878U, // <0,1,4,6>: Cost 2 vuzpl <0,4,1,5>, RHS
+ 2081685506U, // <0,1,4,7>: Cost 2 ins <0,1,u,7>, lane 2
+ 1007910914U, // <0,1,4,u>: Cost 1 ins LHS, lane 2
+ 2081628162U, // <0,1,5,0>: Cost 2 ins <0,1,u,0>, lane 2
+ 2133835780U, // <0,1,5,1>: Cost 2 ins <u,u,5,1>, lane 4
+ 2081644546U, // <0,1,5,2>: Cost 2 ins <0,1,u,2>, lane 2
+ 1007910914U, // <0,1,5,3>: Cost 1 ins LHS, lane 2
+ 2081660930U, // <0,1,5,4>: Cost 2 ins <0,1,u,4>, lane 2
+ 2133868548U, // <0,1,5,5>: Cost 2 ins <u,u,5,5>, lane 4
+ 2133876740U, // <0,1,5,6>: Cost 2 ins <u,u,5,6>, lane 4
+ 2133884932U, // <0,1,5,7>: Cost 2 ins <u,u,5,7>, lane 4
+ 1007910914U, // <0,1,5,u>: Cost 1 ins LHS, lane 2
+ 2081628162U, // <0,1,6,0>: Cost 2 ins <0,1,u,0>, lane 2
+ 2081636354U, // <0,1,6,1>: Cost 2 ins <0,1,u,1>, lane 2
+ 2133917700U, // <0,1,6,2>: Cost 2 ins <u,u,6,2>, lane 4
+ 1007910914U, // <0,1,6,3>: Cost 1 ins LHS, lane 2
+ 2081660930U, // <0,1,6,4>: Cost 2 ins <0,1,u,4>, lane 2
+ 2081669122U, // <0,1,6,5>: Cost 2 ins <0,1,u,5>, lane 2
+ 2133950468U, // <0,1,6,6>: Cost 2 ins <u,u,6,6>, lane 4
+ 1060216836U, // <0,1,6,7>: Cost 1 ins RHS, lane 4
+ 1007910914U, // <0,1,6,u>: Cost 1 ins LHS, lane 2
+ 2133975044U, // <0,1,7,0>: Cost 2 ins <u,u,7,0>, lane 4
+ 2081636354U, // <0,1,7,1>: Cost 2 ins <0,1,u,1>, lane 2
+ 2081644546U, // <0,1,7,2>: Cost 2 ins <0,1,u,2>, lane 2
+ 1007910914U, // <0,1,7,3>: Cost 1 ins LHS, lane 2
+ 2134007812U, // <0,1,7,4>: Cost 2 ins <u,u,7,4>, lane 4
+ 2081669122U, // <0,1,7,5>: Cost 2 ins <0,1,u,5>, lane 2
+ 2134024196U, // <0,1,7,6>: Cost 2 ins <u,u,7,6>, lane 4
+ 2134032388U, // <0,1,7,7>: Cost 2 ins <u,u,7,7>, lane 4
+ 1007910914U, // <0,1,7,u>: Cost 1 ins LHS, lane 2
+ 1007509507U, // <0,1,u,0>: Cost 1 ins LHS, lane 3
+ 1007951877U, // <0,1,u,1>: Cost 1 ins LHS, lane 5
+ 1007509507U, // <0,1,u,2>: Cost 1 ins LHS, lane 3
+ 835584U, // <0,1,u,3>: Cost 0 copy LHS
+ 1007509507U, // <0,1,u,4>: Cost 1 ins LHS, lane 3
+ 1007509507U, // <0,1,u,5>: Cost 1 ins LHS, lane 3
+ 1007509507U, // <0,1,u,6>: Cost 1 ins LHS, lane 3
+ 1007509507U, // <0,1,u,7>: Cost 1 ins LHS, lane 3
+ 835584U, // <0,1,u,u>: Cost 0 copy LHS
+ 1678557184U, // <0,2,0,0>: Cost 2 vuzpl LHS, <0,0,0,0>
+ 1678598154U, // <0,2,0,1>: Cost 2 vuzpl LHS, <0,0,1,1>
+ 604815462U, // <0,2,0,2>: Cost 1 vuzpl LHS, LHS
+ 2081767427U, // <0,2,0,3>: Cost 2 ins <0,2,0,u>, lane 3
+ 1678598348U, // <0,2,0,4>: Cost 2 vuzpl LHS, <0,2,4,6>
+ 2081767427U, // <0,2,0,5>: Cost 2 ins <0,2,0,u>, lane 3
+ 2082340866U, // <0,2,0,6>: Cost 2 ins <0,2,u,6>, lane 2
+ 2081767427U, // <0,2,0,7>: Cost 2 ins <0,2,0,u>, lane 3
+ 604815516U, // <0,2,0,u>: Cost 1 vuzpl LHS, LHS
+ 2752340940U, // <0,2,1,0>: Cost 3 vuzpl LHS, <1,3,0,0>
+ 1678558004U, // <0,2,1,1>: Cost 2 vuzpl LHS, <1,1,1,1>
+ 1812776552U, // <0,2,1,2>: Cost 2 vzipl LHS, <2,2,2,2>
+ 1678557942U, // <0,2,1,3>: Cost 2 vuzpl LHS, <1,0,3,2>
+ 2752340982U, // <0,2,1,4>: Cost 3 vuzpl LHS, <1,3,4,6>
+ 1678599168U, // <0,2,1,5>: Cost 2 vuzpl LHS, <1,3,5,7>
+ 1812817850U, // <0,2,1,6>: Cost 2 vzipl LHS, <2,6,3,7>
+ 2860466282U, // <0,2,1,7>: Cost 3 vuzpr <7,0,1,2>, <0,1,2,7>
+ 1678598947U, // <0,2,1,u>: Cost 2 vuzpl LHS, <1,0,u,2>
+ 1678558886U, // <0,2,2,0>: Cost 2 vuzpl LHS, <2,3,0,1>
+ 2085838849U, // <0,2,2,1>: Cost 2 ins <0,u,2,1>, lane 1
+ 1678558824U, // <0,2,2,2>: Cost 2 vuzpl LHS, <2,2,2,2>
+ 1012113409U, // <0,2,2,3>: Cost 1 ins LHS, lane 1
+ 1678558926U, // <0,2,2,4>: Cost 2 vuzpl LHS, <2,3,4,5>
+ 2085871617U, // <0,2,2,5>: Cost 2 ins <0,u,2,5>, lane 1
+ 2085879809U, // <0,2,2,6>: Cost 2 ins <0,u,2,6>, lane 1
+ 2085888001U, // <0,2,2,7>: Cost 2 ins <0,u,2,7>, lane 1
+ 1012113409U, // <0,2,2,u>: Cost 1 ins LHS, lane 1
+ 2129698816U, // <0,2,3,0>: Cost 2 ins <u,2,3,0>, lane 0
+ 1678559382U, // <0,2,3,1>: Cost 2 vuzpl LHS, <3,0,1,2>
+ 2082308098U, // <0,2,3,2>: Cost 2 ins <0,2,u,2>, lane 2
+ 1678559644U, // <0,2,3,3>: Cost 2 vuzpl LHS, <3,3,3,3>
+ 2129731584U, // <0,2,3,4>: Cost 2 ins <u,2,3,4>, lane 0
+ 1678559746U, // <0,2,3,5>: Cost 2 vuzpl LHS, <3,4,5,6>
+ 2082340866U, // <0,2,3,6>: Cost 2 ins <0,2,u,6>, lane 2
+ 2824782848U, // <0,2,3,7>: Cost 3 vuzpr <1,0,3,2>, <1,3,5,7>
+ 1678559445U, // <0,2,3,u>: Cost 2 vuzpl LHS, <3,0,u,2>
+ 2082062339U, // <0,2,4,0>: Cost 2 ins <0,2,4,u>, lane 3
+ 2082062339U, // <0,2,4,1>: Cost 2 ins <0,2,4,u>, lane 3
+ 2082308098U, // <0,2,4,2>: Cost 2 ins <0,2,u,2>, lane 2
+ 2082062339U, // <0,2,4,3>: Cost 2 ins <0,2,4,u>, lane 3
+ 2082062339U, // <0,2,4,4>: Cost 2 ins <0,2,4,u>, lane 3
+ 1544850742U, // <0,2,4,5>: Cost 2 vext2 <0,2,0,2>, RHS
+ 604818742U, // <0,2,4,6>: Cost 1 vuzpl LHS, RHS
+ 2082062339U, // <0,2,4,7>: Cost 2 ins <0,2,4,u>, lane 3
+ 604818760U, // <0,2,4,u>: Cost 1 vuzpl LHS, RHS
+ 3105260438U, // <0,2,5,0>: Cost 3 vtrnr <3,0,4,5>, <1,2,3,0>
+ 1678561408U, // <0,2,5,1>: Cost 2 vuzpl LHS, <5,7,1,3>
+ 2082308098U, // <0,2,5,2>: Cost 2 ins <0,2,u,2>, lane 2
+ 2086076417U, // <0,2,5,3>: Cost 2 ins <0,u,5,3>, lane 1
+ 2756947554U, // <0,2,5,4>: Cost 3 vuzpl LHS, <5,0,4,1>
+ 1678561284U, // <0,2,5,5>: Cost 2 vuzpl LHS, <5,5,5,5>
+ 2082340866U, // <0,2,5,6>: Cost 2 ins <0,2,u,6>, lane 2
+ 1751043382U, // <0,2,5,7>: Cost 2 vuzpr <1,0,3,2>, RHS
+ 1751043383U, // <0,2,5,u>: Cost 2 vuzpr <1,0,3,2>, RHS
+ 1678562126U, // <0,2,6,0>: Cost 2 vuzpl LHS, <6,7,0,1>
+ 2756948257U, // <0,2,6,1>: Cost 3 vuzpl LHS, <6,0,1,2>
+ 2082308098U, // <0,2,6,2>: Cost 2 ins <0,2,u,2>, lane 2
+ 2086150145U, // <0,2,6,3>: Cost 2 ins <0,u,6,3>, lane 1
+ 1678562166U, // <0,2,6,4>: Cost 2 vuzpl LHS, <6,7,4,5>
+ 2756948621U, // <0,2,6,5>: Cost 3 vuzpl LHS, <6,4,5,6>
+ 2082340866U, // <0,2,6,6>: Cost 2 ins <0,2,u,6>, lane 2
+ 2082357253U, // <0,2,6,7>: Cost 2 ins <0,2,u,u>, lane 5
+ 2082308098U, // <0,2,6,u>: Cost 2 ins <0,2,u,2>, lane 2
+ 3099378582U, // <0,2,7,0>: Cost 3 vtrnr <2,0,5,7>, <1,2,3,0>
+ 1678562298U, // <0,2,7,1>: Cost 2 vuzpl LHS, <7,0,1,2>
+ 2082308098U, // <0,2,7,2>: Cost 2 ins <0,2,u,2>, lane 2
+ 2130018304U, // <0,2,7,3>: Cost 2 ins <u,2,7,3>, lane 0
+ 2645136742U, // <0,2,7,4>: Cost 3 vext2 <4,6,0,2>, <7,4,5,6>
+ 1678562662U, // <0,2,7,5>: Cost 2 vuzpl LHS, <7,4,5,6>
+ 2082340866U, // <0,2,7,6>: Cost 2 ins <0,2,u,6>, lane 2
+ 1678562924U, // <0,2,7,7>: Cost 2 vuzpl LHS, <7,7,7,7>
+ 2082308098U, // <0,2,7,u>: Cost 2 ins <0,2,u,2>, lane 2
+ 1947436710U, // <0,2,u,0>: Cost 2 vtrnl LHS, <2,3,0,1>
+ 1678603987U, // <0,2,u,1>: Cost 2 vuzpl LHS, <u,0,1,2>
+ 604821294U, // <0,2,u,2>: Cost 1 vuzpl LHS, LHS
+ 1012113409U, // <0,2,u,3>: Cost 1 ins LHS, lane 1
+ 1947436750U, // <0,2,u,4>: Cost 2 vtrnl LHS, <2,3,4,5>
+ 1678604351U, // <0,2,u,5>: Cost 2 vuzpl LHS, <u,4,5,6>
+ 604821658U, // <0,2,u,6>: Cost 1 vuzpl LHS, RHS
+ 1751043625U, // <0,2,u,7>: Cost 2 vuzpr <1,0,3,2>, RHS
+ 604821348U, // <0,2,u,u>: Cost 1 vuzpl LHS, LHS
+ 2085683201U, // <0,3,0,0>: Cost 2 ins <0,u,0,0>, lane 1
+ 2130149376U, // <0,3,0,1>: Cost 2 ins <u,3,0,1>, lane 0
+ 2085699585U, // <0,3,0,2>: Cost 2 ins <0,u,0,2>, lane 1
+ 1745002517U, // <0,3,0,3>: Cost 2 vuzpr <0,0,2,3>, <0,0,2,3>
+ 2556333366U, // <0,3,0,4>: Cost 3 vext1 <1,0,3,0>, RHS
+ 3021244930U, // <0,3,0,5>: Cost 3 vtrnl <0,2,0,2>, <3,4,5,6>
+ 3159474177U, // <0,3,0,6>: Cost 3 ins <0,u,0,6>, lane 1
+ 2952791184U, // <0,3,0,7>: Cost 3 vzipr <0,0,0,0>, <1,5,3,7>
+ 2130149376U, // <0,3,0,u>: Cost 2 ins <u,3,0,1>, lane 0
+ 1812777110U, // <0,3,1,0>: Cost 2 vzipl LHS, <3,0,1,2>
+ 2085765121U, // <0,3,1,1>: Cost 2 ins <0,u,1,1>, lane 1
+ 2886519105U, // <0,3,1,2>: Cost 3 vzipl LHS, <3,2,2,2>
+ 1812777372U, // <0,3,1,3>: Cost 2 vzipl LHS, <3,3,3,3>
+ 1812777474U, // <0,3,1,4>: Cost 2 vzipl LHS, <3,4,5,6>
+ 2085797889U, // <0,3,1,5>: Cost 2 ins <0,u,1,5>, lane 1
+ 3159547905U, // <0,3,1,6>: Cost 3 ins <0,u,1,6>, lane 1
+ 2966733968U, // <0,3,1,7>: Cost 3 vzipr <2,3,0,1>, <1,5,3,7>
+ 1812777758U, // <0,3,1,u>: Cost 2 vzipl LHS, <3,u,1,2>
+ 1482604646U, // <0,3,2,0>: Cost 2 vext1 <1,0,3,2>, LHS
+ 1946994838U, // <0,3,2,1>: Cost 2 vtrnl LHS, <3,0,1,2>
+ 2085847041U, // <0,3,2,2>: Cost 2 ins <0,u,2,2>, lane 1
+ 1012113409U, // <0,3,2,3>: Cost 1 ins LHS, lane 1
+ 1482607926U, // <0,3,2,4>: Cost 2 vext1 <1,0,3,2>, RHS
+ 1946995202U, // <0,3,2,5>: Cost 2 vtrnl LHS, <3,4,5,6>
+ 2085879809U, // <0,3,2,6>: Cost 2 ins <0,u,2,6>, lane 1
+ 2085888001U, // <0,3,2,7>: Cost 2 ins <0,u,2,7>, lane 1
+ 1012113409U, // <0,3,2,u>: Cost 1 ins LHS, lane 1
+ 2887747734U, // <0,3,3,0>: Cost 3 vzipl <0,3,1,0>, <3,0,1,2>
+ 2753022102U, // <0,3,3,1>: Cost 3 vuzpl <0,2,3,1>, <3,0,1,2>
+ 2965422838U, // <0,3,3,2>: Cost 3 vzipr <2,1,0,3>, <1,0,3,2>
+ 2130386944U, // <0,3,3,3>: Cost 2 ins <u,3,3,3>, lane 0
+ 2887780866U, // <0,3,3,4>: Cost 3 vzipl <0,3,1,4>, <3,4,5,6>
+ 2753055234U, // <0,3,3,5>: Cost 3 vuzpl <0,2,3,5>, <3,4,5,6>
+ 2752375389U, // <0,3,3,6>: Cost 3 vuzpl <0,1,3,3>, <3,5,6,7>
+ 3204161536U, // <0,3,3,7>: Cost 3 ins <u,3,3,7>, lane 0
+ 2130386944U, // <0,3,3,u>: Cost 2 ins <u,3,3,3>, lane 0
+ 2888452246U, // <0,3,4,0>: Cost 3 vzipl <0,4,1,5>, <3,0,1,2>
+ 3021572246U, // <0,3,4,1>: Cost 3 vtrnl <0,2,4,6>, <3,0,1,2>
+ 3021572257U, // <0,3,4,2>: Cost 3 vtrnl <0,2,4,6>, <3,0,2,4>
+ 2086002689U, // <0,3,4,3>: Cost 2 ins <0,u,4,3>, lane 1
+ 2888452610U, // <0,3,4,4>: Cost 3 vzipl <0,4,1,5>, <3,4,5,6>
+ 2130477056U, // <0,3,4,5>: Cost 2 ins <u,3,4,5>, lane 0
+ 2086027265U, // <0,3,4,6>: Cost 2 ins <0,u,4,6>, lane 1
+ 2818747621U, // <0,3,4,7>: Cost 3 vuzpr <0,0,2,3>, <4,4,6,7>
+ 2130477056U, // <0,3,4,u>: Cost 2 ins <u,3,4,5>, lane 0
+ 3204251648U, // <0,3,5,0>: Cost 3 ins <u,3,5,0>, lane 0
+ 3204259840U, // <0,3,5,1>: Cost 3 ins <u,3,5,1>, lane 0
+ 2961457910U, // <0,3,5,2>: Cost 3 vzipr <1,4,0,5>, <1,0,3,2>
+ 2086076417U, // <0,3,5,3>: Cost 2 ins <0,u,5,3>, lane 1
+ 2232846516U, // <0,3,5,4>: Cost 3 vrev <3,0,4,5>
+ 3204292608U, // <0,3,5,5>: Cost 3 ins <u,3,5,5>, lane 0
+ 2653769826U, // <0,3,5,6>: Cost 3 vext2 <6,1,0,3>, <5,6,7,0>
+ 2130567168U, // <0,3,5,7>: Cost 2 ins <u,3,5,7>, lane 0
+ 2130567168U, // <0,3,5,u>: Cost 2 ins <u,3,5,7>, lane 0
+ 2854506594U, // <0,3,6,0>: Cost 3 vuzpr <6,0,1,3>, <5,6,7,0>
+ 2653770090U, // <0,3,6,1>: Cost 3 vext2 <6,1,0,3>, <6,1,0,3>
+ 3204341760U, // <0,3,6,2>: Cost 3 ins <u,3,6,2>, lane 0
+ 2086150145U, // <0,3,6,3>: Cost 2 ins <0,u,6,3>, lane 1
+ 3204358144U, // <0,3,6,4>: Cost 3 ins <u,3,6,4>, lane 0
+ 3204366336U, // <0,3,6,5>: Cost 3 ins <u,3,6,5>, lane 0
+ 3204374528U, // <0,3,6,6>: Cost 3 ins <u,3,6,6>, lane 0
+ 2130640896U, // <0,3,6,7>: Cost 2 ins <u,3,6,7>, lane 0
+ 2086150145U, // <0,3,6,u>: Cost 2 ins <0,u,6,3>, lane 1
+ 2968109974U, // <0,3,7,0>: Cost 3 vzipr <2,5,0,7>, <1,2,3,0>
+ 2659742787U, // <0,3,7,1>: Cost 3 vext2 <7,1,0,3>, <7,1,0,3>
+ 2660406420U, // <0,3,7,2>: Cost 3 vext2 <7,2,0,3>, <7,2,0,3>
+ 2086223873U, // <0,3,7,3>: Cost 2 ins <0,u,7,3>, lane 1
+ 3204431872U, // <0,3,7,4>: Cost 3 ins <u,3,7,4>, lane 0
+ 3204440064U, // <0,3,7,5>: Cost 3 ins <u,3,7,5>, lane 0
+ 2752378305U, // <0,3,7,6>: Cost 3 vuzpl <0,1,3,3>, <7,5,6,7>
+ 3204456448U, // <0,3,7,7>: Cost 3 ins <u,3,7,7>, lane 0
+ 2086223873U, // <0,3,7,u>: Cost 2 ins <0,u,7,3>, lane 1
+ 1817421974U, // <0,3,u,0>: Cost 2 vzipl LHS, <3,0,1,2>
+ 1947437206U, // <0,3,u,1>: Cost 2 vtrnl LHS, <3,0,1,2>
+ 2085699585U, // <0,3,u,2>: Cost 2 ins <0,u,0,2>, lane 1
+ 1012113409U, // <0,3,u,3>: Cost 1 ins LHS, lane 1
+ 1817422338U, // <0,3,u,4>: Cost 2 vzipl LHS, <3,4,5,6>
+ 1947437570U, // <0,3,u,5>: Cost 2 vtrnl LHS, <3,4,5,6>
+ 2085879809U, // <0,3,u,6>: Cost 2 ins <0,u,2,6>, lane 1
+ 2130567168U, // <0,3,u,7>: Cost 2 ins <u,3,5,7>, lane 0
+ 1012113409U, // <0,3,u,u>: Cost 1 ins LHS, lane 1
+ 2085683201U, // <0,4,0,0>: Cost 2 ins <0,u,0,0>, lane 1
+ 2083684357U, // <0,4,0,1>: Cost 2 ins <0,4,u,u>, lane 5
+ 1679392870U, // <0,4,0,2>: Cost 2 vuzpl <0,2,4,6>, LHS
+ 2085707777U, // <0,4,0,3>: Cost 2 ins <0,u,0,3>, lane 1
+ 1679392972U, // <0,4,0,4>: Cost 2 vuzpl <0,2,4,6>, <0,2,4,6>
+ 2083659778U, // <0,4,0,5>: Cost 2 ins <0,4,u,5>, lane 2
+ 1947503926U, // <0,4,0,6>: Cost 2 vtrnl <0,2,0,2>, RHS
+ 3156836355U, // <0,4,0,7>: Cost 3 ins <0,4,0,u>, lane 3
+ 1947503944U, // <0,4,0,u>: Cost 2 vtrnl <0,2,0,2>, RHS
+ 2083168259U, // <0,4,1,0>: Cost 2 ins <0,4,1,u>, lane 3
+ 2085765121U, // <0,4,1,1>: Cost 2 ins <0,u,1,1>, lane 1
+ 2083168259U, // <0,4,1,2>: Cost 2 ins <0,4,1,u>, lane 3
+ 2083168259U, // <0,4,1,3>: Cost 2 ins <0,4,1,u>, lane 3
+ 2083168259U, // <0,4,1,4>: Cost 2 ins <0,4,1,u>, lane 3
+ 739036470U, // <0,4,1,5>: Cost 1 vzipl LHS, RHS
+ 1948929334U, // <0,4,1,6>: Cost 2 vtrnl <0,4,1,5>, RHS
+ 2083168259U, // <0,4,1,7>: Cost 2 ins <0,4,1,u>, lane 3
+ 739036713U, // <0,4,1,u>: Cost 1 vzipl LHS, RHS
+ 2083241987U, // <0,4,2,0>: Cost 2 ins <0,4,2,u>, lane 3
+ 2083241987U, // <0,4,2,1>: Cost 2 ins <0,4,2,u>, lane 3
+ 2085847041U, // <0,4,2,2>: Cost 2 ins <0,u,2,2>, lane 1
+ 1012113409U, // <0,4,2,3>: Cost 1 ins LHS, lane 1
+ 2083241987U, // <0,4,2,4>: Cost 2 ins <0,4,2,u>, lane 3
+ 1813286198U, // <0,4,2,5>: Cost 2 vzipl <0,2,0,2>, RHS
+ 873254198U, // <0,4,2,6>: Cost 1 vtrnl LHS, RHS
+ 2083241987U, // <0,4,2,7>: Cost 2 ins <0,4,2,u>, lane 3
+ 873254216U, // <0,4,2,u>: Cost 1 vtrnl LHS, RHS
+ 3020811514U, // <0,4,3,0>: Cost 3 vtrnl <0,1,3,3>, <4,5,0,1>
+ 2753136790U, // <0,4,3,1>: Cost 3 vuzpl <0,2,4,6>, <3,0,1,2>
+ 2753136801U, // <0,4,3,2>: Cost 3 vuzpl <0,2,4,6>, <3,0,2,4>
+ 2085928961U, // <0,4,3,3>: Cost 2 ins <0,u,3,3>, lane 1
+ 3204800512U, // <0,4,3,4>: Cost 3 ins <u,4,3,4>, lane 0
+ 2083659778U, // <0,4,3,5>: Cost 2 ins <0,4,u,5>, lane 2
+ 2083667970U, // <0,4,3,6>: Cost 2 ins <0,4,u,6>, lane 2
+ 3087183077U, // <0,4,3,7>: Cost 3 vtrnr <0,0,2,3>, <4,4,6,7>
+ 2083659778U, // <0,4,3,u>: Cost 2 ins <0,4,u,5>, lane 2
+ 2753137995U, // <0,4,4,0>: Cost 3 vuzpl <0,2,4,6>, <4,6,0,1>
+ 2888453090U, // <0,4,4,1>: Cost 3 vzipl <0,4,1,5>, <4,1,5,0>
+ 2888535100U, // <0,4,4,2>: Cost 3 vzipl <0,4,2,6>, <4,2,6,0>
+ 2086002689U, // <0,4,4,3>: Cost 2 ins <0,u,4,3>, lane 1
+ 2131132416U, // <0,4,4,4>: Cost 2 ins <u,4,4,4>, lane 0
+ 1814711606U, // <0,4,4,5>: Cost 2 vzipl <0,4,1,5>, RHS
+ 1679396150U, // <0,4,4,6>: Cost 2 vuzpl <0,2,4,6>, RHS
+ 3157131267U, // <0,4,4,7>: Cost 3 ins <0,4,4,u>, lane 3
+ 1679396168U, // <0,4,4,u>: Cost 2 vuzpl <0,2,4,6>, RHS
+ 2568388710U, // <0,4,5,0>: Cost 3 vext1 <3,0,4,5>, LHS
+ 2568389526U, // <0,4,5,1>: Cost 3 vext1 <3,0,4,5>, <1,2,3,0>
+ 3204931584U, // <0,4,5,2>: Cost 3 ins <u,4,5,2>, lane 0
+ 2086076417U, // <0,4,5,3>: Cost 2 ins <0,u,5,3>, lane 1
+ 2568391990U, // <0,4,5,4>: Cost 3 vext1 <3,0,4,5>, RHS
+ 2131214336U, // <0,4,5,5>: Cost 2 ins <u,4,5,5>, lane 0
+ 1618136374U, // <0,4,5,6>: Cost 2 vext3 <1,2,3,0>, RHS
+ 2830699830U, // <0,4,5,7>: Cost 3 vuzpr <2,0,2,4>, RHS
+ 1618136392U, // <0,4,5,u>: Cost 2 vext3 <1,2,3,0>, RHS
+ 2712227146U, // <0,4,6,0>: Cost 3 vext3 <4,6,0,0>, <4,6,0,0>
+ 2753138977U, // <0,4,6,1>: Cost 3 vuzpl <0,2,4,6>, <6,0,1,2>
+ 2753138988U, // <0,4,6,2>: Cost 3 vuzpl <0,2,4,6>, <6,0,2,4>
+ 2086150145U, // <0,4,6,3>: Cost 2 ins <0,u,6,3>, lane 1
+ 2712522094U, // <0,4,6,4>: Cost 3 vext3 <4,6,4,0>, <4,6,4,0>
+ 2083659778U, // <0,4,6,5>: Cost 2 ins <0,4,u,5>, lane 2
+ 2131296256U, // <0,4,6,6>: Cost 2 ins <u,4,6,6>, lane 0
+ 2083684357U, // <0,4,6,7>: Cost 2 ins <0,4,u,u>, lane 5
+ 2083659778U, // <0,4,6,u>: Cost 2 ins <0,4,u,5>, lane 2
+ 3021106426U, // <0,4,7,0>: Cost 3 vtrnl <0,1,7,3>, <4,5,0,1>
+ 2860487502U, // <0,4,7,1>: Cost 3 vuzpr <7,0,1,4>, <6,7,0,1>
+ 3157377026U, // <0,4,7,2>: Cost 3 ins <0,4,u,2>, lane 2
+ 2086223873U, // <0,4,7,3>: Cost 2 ins <0,u,7,3>, lane 1
+ 3205095424U, // <0,4,7,4>: Cost 3 ins <u,4,7,4>, lane 0
+ 2083659778U, // <0,4,7,5>: Cost 2 ins <0,4,u,5>, lane 2
+ 2131369984U, // <0,4,7,6>: Cost 2 ins <u,4,7,6>, lane 0
+ 2752452204U, // <0,4,7,7>: Cost 3 vuzpl <0,1,4,3>, <7,7,7,7>
+ 2083659778U, // <0,4,7,u>: Cost 2 ins <0,4,u,5>, lane 2
+ 2083168259U, // <0,4,u,0>: Cost 2 ins <0,4,1,u>, lane 3
+ 2083684357U, // <0,4,u,1>: Cost 2 ins <0,4,u,u>, lane 5
+ 1679398702U, // <0,4,u,2>: Cost 2 vuzpl <0,2,4,6>, LHS
+ 1012113409U, // <0,4,u,3>: Cost 1 ins LHS, lane 1
+ 1679392972U, // <0,4,u,4>: Cost 2 vuzpl <0,2,4,6>, <0,2,4,6>
+ 743681334U, // <0,4,u,5>: Cost 1 vzipl LHS, RHS
+ 873696566U, // <0,4,u,6>: Cost 1 vtrnl LHS, RHS
+ 2083168259U, // <0,4,u,7>: Cost 2 ins <0,4,1,u>, lane 3
+ 873696584U, // <0,4,u,u>: Cost 1 vtrnl LHS, RHS
+ 2085683201U, // <0,5,0,0>: Cost 2 ins <0,u,0,0>, lane 1
+ 2131476480U, // <0,5,0,1>: Cost 2 ins <u,5,0,1>, lane 0
+ 2085699585U, // <0,5,0,2>: Cost 2 ins <0,u,0,2>, lane 1
+ 2085707777U, // <0,5,0,3>: Cost 2 ins <0,u,0,3>, lane 1
+ 3159457793U, // <0,5,0,4>: Cost 3 ins <0,u,0,4>, lane 1
+ 1678778497U, // <0,5,0,5>: Cost 2 vuzpl <0,1,5,3>, <0,1,5,3>
+ 3159474177U, // <0,5,0,6>: Cost 3 ins <0,u,0,6>, lane 1
+ 2013269302U, // <0,5,0,7>: Cost 2 vtrnr <0,0,0,0>, RHS
+ 2085699585U, // <0,5,0,u>: Cost 2 ins <0,u,0,2>, lane 1
+ 1500659814U, // <0,5,1,0>: Cost 2 vext1 <4,0,5,1>, LHS
+ 2085765121U, // <0,5,1,1>: Cost 2 ins <0,u,1,1>, lane 1
+ 3159515137U, // <0,5,1,2>: Cost 3 ins <0,u,1,2>, lane 1
+ 2085781505U, // <0,5,1,3>: Cost 2 ins <0,u,1,3>, lane 1
+ 1812778950U, // <0,5,1,4>: Cost 2 vzipl LHS, <5,4,7,6>
+ 2085797889U, // <0,5,1,5>: Cost 2 ins <0,u,1,5>, lane 1
+ 1812779106U, // <0,5,1,6>: Cost 2 vzipl LHS, <5,6,7,0>
+ 2013351222U, // <0,5,1,7>: Cost 2 vtrnr <0,0,1,1>, RHS
+ 2085765121U, // <0,5,1,u>: Cost 2 ins <0,u,1,1>, lane 1
+ 2085830657U, // <0,5,2,0>: Cost 2 ins <0,u,2,0>, lane 1
+ 1946996864U, // <0,5,2,1>: Cost 2 vtrnl LHS, <5,7,1,3>
+ 2085847041U, // <0,5,2,2>: Cost 2 ins <0,u,2,2>, lane 1
+ 1012113409U, // <0,5,2,3>: Cost 1 ins LHS, lane 1
+ 2085863425U, // <0,5,2,4>: Cost 2 ins <0,u,2,4>, lane 1
+ 1946996740U, // <0,5,2,5>: Cost 2 vtrnl LHS, <5,5,5,5>
+ 2085879809U, // <0,5,2,6>: Cost 2 ins <0,u,2,6>, lane 1
+ 2019478838U, // <0,5,2,7>: Cost 2 vtrnr <1,0,3,2>, RHS
+ 1012113409U, // <0,5,2,u>: Cost 1 ins LHS, lane 1
+ 2637858966U, // <0,5,3,0>: Cost 3 vext2 <3,4,0,5>, <3,0,1,2>
+ 3205439488U, // <0,5,3,1>: Cost 3 ins <u,5,3,1>, lane 0
+ 3087183153U, // <0,5,3,2>: Cost 3 vtrnr <0,0,2,3>, <4,5,6,2>
+ 2085928961U, // <0,5,3,3>: Cost 2 ins <0,u,3,3>, lane 1
+ 2637859284U, // <0,5,3,4>: Cost 3 vext2 <3,4,0,5>, <3,4,0,5>
+ 3205472256U, // <0,5,3,5>: Cost 3 ins <u,5,3,5>, lane 0
+ 3205480448U, // <0,5,3,6>: Cost 3 ins <u,5,3,6>, lane 0
+ 2131746816U, // <0,5,3,7>: Cost 2 ins <u,5,3,7>, lane 0
+ 2131746816U, // <0,5,3,u>: Cost 2 ins <u,5,3,7>, lane 0
+ 2888453704U, // <0,5,4,0>: Cost 3 vzipl <0,4,1,5>, <5,0,1,2>
+ 3159728129U, // <0,5,4,1>: Cost 3 ins <0,u,4,1>, lane 1
+ 3159736321U, // <0,5,4,2>: Cost 3 ins <0,u,4,2>, lane 1
+ 2086002689U, // <0,5,4,3>: Cost 2 ins <0,u,4,3>, lane 1
+ 2888454068U, // <0,5,4,4>: Cost 3 vzipl <0,4,1,5>, <5,4,5,6>
+ 2131804160U, // <0,5,4,5>: Cost 2 ins <u,5,4,5>, lane 0
+ 2086027265U, // <0,5,4,6>: Cost 2 ins <0,u,4,6>, lane 1
+ 2131820544U, // <0,5,4,7>: Cost 2 ins <u,5,4,7>, lane 0
+ 2086027265U, // <0,5,4,u>: Cost 2 ins <0,u,4,6>, lane 1
+ 3205578752U, // <0,5,5,0>: Cost 3 ins <u,5,5,0>, lane 0
+ 2997291922U, // <0,5,5,1>: Cost 3 vzipr <7,4,0,5>, <4,0,5,1>
+ 2752523939U, // <0,5,5,2>: Cost 3 vuzpl <0,1,5,3>, <5,1,2,3>
+ 2086076417U, // <0,5,5,3>: Cost 2 ins <0,u,5,3>, lane 1
+ 3205611520U, // <0,5,5,4>: Cost 3 ins <u,5,5,4>, lane 0
+ 2131877888U, // <0,5,5,5>: Cost 2 ins <u,5,5,5>, lane 0
+ 2657767522U, // <0,5,5,6>: Cost 3 vext2 <6,7,0,5>, <5,6,7,0>
+ 2131894272U, // <0,5,5,7>: Cost 2 ins <u,5,5,7>, lane 0
+ 2086076417U, // <0,5,5,u>: Cost 2 ins <0,u,5,3>, lane 1
+ 2131910656U, // <0,5,6,0>: Cost 2 ins <u,5,6,0>, lane 0
+ 2131918848U, // <0,5,6,1>: Cost 2 ins <u,5,6,1>, lane 0
+ 2131927040U, // <0,5,6,2>: Cost 2 ins <u,5,6,2>, lane 0
+ 2131935232U, // <0,5,6,3>: Cost 2 ins <u,5,6,3>, lane 0
+ 2131943424U, // <0,5,6,4>: Cost 2 ins <u,5,6,4>, lane 0
+ 2131951616U, // <0,5,6,5>: Cost 2 ins <u,5,6,5>, lane 0
+ 2131959808U, // <0,5,6,6>: Cost 2 ins <u,5,6,6>, lane 0
+ 1058226176U, // <0,5,6,7>: Cost 1 ins RHS, lane 0
+ 1058226176U, // <0,5,6,u>: Cost 1 ins RHS, lane 0
+ 2562506854U, // <0,5,7,0>: Cost 3 vext1 <2,0,5,7>, LHS
+ 2562507670U, // <0,5,7,1>: Cost 3 vext1 <2,0,5,7>, <1,2,3,0>
+ 2562508262U, // <0,5,7,2>: Cost 3 vext1 <2,0,5,7>, <2,0,5,7>
+ 2086223873U, // <0,5,7,3>: Cost 2 ins <0,u,7,3>, lane 1
+ 2562510134U, // <0,5,7,4>: Cost 3 vext1 <2,0,5,7>, RHS
+ 2718716072U, // <0,5,7,5>: Cost 3 vext3 <5,6,7,0>, <5,7,5,7>
+ 2718716074U, // <0,5,7,6>: Cost 3 vext3 <5,6,7,0>, <5,7,6,0>
+ 2132041728U, // <0,5,7,7>: Cost 2 ins <u,5,7,7>, lane 0
+ 2132041728U, // <0,5,7,u>: Cost 2 ins <u,5,7,7>, lane 0
+ 1500717158U, // <0,5,u,0>: Cost 2 vext1 <4,0,5,u>, LHS
+ 2085765121U, // <0,5,u,1>: Cost 2 ins <0,u,1,1>, lane 1
+ 2085699585U, // <0,5,u,2>: Cost 2 ins <0,u,0,2>, lane 1
+ 1012113409U, // <0,5,u,3>: Cost 1 ins LHS, lane 1
+ 1817423814U, // <0,5,u,4>: Cost 2 vzipl LHS, <5,4,7,6>
+ 2085797889U, // <0,5,u,5>: Cost 2 ins <0,u,1,5>, lane 1
+ 2085879809U, // <0,5,u,6>: Cost 2 ins <0,u,2,6>, lane 1
+ 1058226176U, // <0,5,u,7>: Cost 1 ins RHS, lane 0
+ 1012113409U, // <0,5,u,u>: Cost 1 ins LHS, lane 1
+ 2085683201U, // <0,6,0,0>: Cost 2 ins <0,u,0,0>, lane 1
+ 2085691393U, // <0,6,0,1>: Cost 2 ins <0,u,0,1>, lane 1
+ 2132148224U, // <0,6,0,2>: Cost 2 ins <u,6,0,2>, lane 0
+ 2085707777U, // <0,6,0,3>: Cost 2 ins <0,u,0,3>, lane 1
+ 2619949386U, // <0,6,0,4>: Cost 3 vext2 <0,4,0,6>, <0,4,0,6>
+ 2586415202U, // <0,6,0,5>: Cost 3 vext1 <6,0,6,0>, <5,6,7,0>
+ 1678852234U, // <0,6,0,6>: Cost 2 vuzpl <0,1,6,3>, <0,1,6,3>
+ 1879051574U, // <0,6,0,7>: Cost 2 vzipr <0,0,0,0>, RHS
+ 2132148224U, // <0,6,0,u>: Cost 2 ins <u,6,0,2>, lane 0
+ 2993278336U, // <0,6,1,0>: Cost 3 vzipr <6,7,0,1>, <4,6,6,0>
+ 2085765121U, // <0,6,1,1>: Cost 2 ins <0,u,1,1>, lane 1
+ 1812779514U, // <0,6,1,2>: Cost 2 vzipl LHS, <6,2,7,3>
+ 2085781505U, // <0,6,1,3>: Cost 2 ins <0,u,1,3>, lane 1
+ 3159531521U, // <0,6,1,4>: Cost 3 ins <0,u,1,4>, lane 1
+ 2085797889U, // <0,6,1,5>: Cost 2 ins <0,u,1,5>, lane 1
+ 1812779832U, // <0,6,1,6>: Cost 2 vzipl LHS, <6,6,6,6>
+ 1892994358U, // <0,6,1,7>: Cost 2 vzipr <2,3,0,1>, RHS
+ 1892994359U, // <0,6,1,u>: Cost 2 vzipr <2,3,0,1>, RHS
+ 1946997582U, // <0,6,2,0>: Cost 2 vtrnl LHS, <6,7,0,1>
+ 2085838849U, // <0,6,2,1>: Cost 2 ins <0,u,2,1>, lane 1
+ 2085847041U, // <0,6,2,2>: Cost 2 ins <0,u,2,2>, lane 1
+ 1012113409U, // <0,6,2,3>: Cost 1 ins LHS, lane 1
+ 1946997622U, // <0,6,2,4>: Cost 2 vtrnl LHS, <6,7,4,5>
+ 2085871617U, // <0,6,2,5>: Cost 2 ins <0,u,2,5>, lane 1
+ 2085879809U, // <0,6,2,6>: Cost 2 ins <0,u,2,6>, lane 1
+ 1880395062U, // <0,6,2,7>: Cost 2 vzipr <0,2,0,2>, RHS
+ 1012113409U, // <0,6,2,u>: Cost 1 ins LHS, lane 1
+ 3122942050U, // <0,6,3,0>: Cost 3 vtrnr <6,0,1,3>, <5,6,7,0>
+ 2250527010U, // <0,6,3,1>: Cost 3 vrev <6,0,1,3>
+ 3206111232U, // <0,6,3,2>: Cost 3 ins <u,6,3,2>, lane 0
+ 2085928961U, // <0,6,3,3>: Cost 2 ins <0,u,3,3>, lane 1
+ 3206127616U, // <0,6,3,4>: Cost 3 ins <u,6,3,4>, lane 0
+ 3206135808U, // <0,6,3,5>: Cost 3 ins <u,6,3,5>, lane 0
+ 3206144000U, // <0,6,3,6>: Cost 3 ins <u,6,3,6>, lane 0
+ 2132410368U, // <0,6,3,7>: Cost 2 ins <u,6,3,7>, lane 0
+ 2132410368U, // <0,6,3,u>: Cost 2 ins <u,6,3,7>, lane 0
+ 2888536380U, // <0,6,4,0>: Cost 3 vzipl <0,4,2,6>, <6,0,4,2>
+ 3021574433U, // <0,6,4,1>: Cost 3 vtrnl <0,2,4,6>, <6,0,1,2>
+ 3021574444U, // <0,6,4,2>: Cost 3 vtrnl <0,2,4,6>, <6,0,2,4>
+ 2086002689U, // <0,6,4,3>: Cost 2 ins <0,u,4,3>, lane 1
+ 2562559286U, // <0,6,4,4>: Cost 3 vext1 <2,0,6,4>, RHS
+ 2086019073U, // <0,6,4,5>: Cost 2 ins <0,u,4,5>, lane 1
+ 2132475904U, // <0,6,4,6>: Cost 2 ins <u,6,4,6>, lane 0
+ 2954153270U, // <0,6,4,7>: Cost 3 vzipr <0,2,0,4>, RHS
+ 2132475904U, // <0,6,4,u>: Cost 2 ins <u,6,4,6>, lane 0
+ 2718716594U, // <0,6,5,0>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7>
+ 3206250496U, // <0,6,5,1>: Cost 3 ins <u,6,5,1>, lane 0
+ 3206258688U, // <0,6,5,2>: Cost 3 ins <u,6,5,2>, lane 0
+ 2086076417U, // <0,6,5,3>: Cost 2 ins <0,u,5,3>, lane 1
+ 3206275072U, // <0,6,5,4>: Cost 3 ins <u,6,5,4>, lane 0
+ 3206283264U, // <0,6,5,5>: Cost 3 ins <u,6,5,5>, lane 0
+ 3206291456U, // <0,6,5,6>: Cost 3 ins <u,6,5,6>, lane 0
+ 2961460534U, // <0,6,5,7>: Cost 3 vzipr <1,4,0,5>, RHS
+ 2086076417U, // <0,6,5,u>: Cost 2 ins <0,u,5,3>, lane 1
+ 2724172540U, // <0,6,6,0>: Cost 3 vext3 <6,6,0,0>, <6,6,0,0>
+ 2889838972U, // <0,6,6,1>: Cost 3 vzipl <0,6,2,3>, <6,1,2,3>
+ 2997300124U, // <0,6,6,2>: Cost 3 vzipr <7,4,0,6>, <4,0,6,2>
+ 2086150145U, // <0,6,6,3>: Cost 2 ins <0,u,6,3>, lane 1
+ 3206348800U, // <0,6,6,4>: Cost 3 ins <u,6,6,4>, lane 0
+ 2889839336U, // <0,6,6,5>: Cost 3 vzipl <0,6,2,3>, <6,5,6,7>
+ 2132623360U, // <0,6,6,6>: Cost 2 ins <u,6,6,6>, lane 0
+ 2132631552U, // <0,6,6,7>: Cost 2 ins <u,6,6,7>, lane 0
+ 2086150145U, // <0,6,6,u>: Cost 2 ins <0,u,6,3>, lane 1
+ 2132647936U, // <0,6,7,0>: Cost 2 ins <u,6,7,0>, lane 0
+ 2724909910U, // <0,6,7,1>: Cost 3 vext3 <6,7,1,0>, <6,7,1,0>
+ 3206406144U, // <0,6,7,2>: Cost 3 ins <u,6,7,2>, lane 0
+ 2086223873U, // <0,6,7,3>: Cost 2 ins <0,u,7,3>, lane 1
+ 2132680704U, // <0,6,7,4>: Cost 2 ins <u,6,7,4>, lane 0
+ 2718716800U, // <0,6,7,5>: Cost 3 vext3 <5,6,7,0>, <6,7,5,6>
+ 3206438912U, // <0,6,7,6>: Cost 3 ins <u,6,7,6>, lane 0
+ 2132705280U, // <0,6,7,7>: Cost 2 ins <u,6,7,7>, lane 0
+ 2132647936U, // <0,6,7,u>: Cost 2 ins <u,6,7,0>, lane 0
+ 2132647936U, // <0,6,u,0>: Cost 2 ins <u,6,7,0>, lane 0
+ 2085765121U, // <0,6,u,1>: Cost 2 ins <0,u,1,1>, lane 1
+ 2132148224U, // <0,6,u,2>: Cost 2 ins <u,6,0,2>, lane 0
+ 1012113409U, // <0,6,u,3>: Cost 1 ins LHS, lane 1
+ 2132680704U, // <0,6,u,4>: Cost 2 ins <u,6,7,4>, lane 0
+ 2085797889U, // <0,6,u,5>: Cost 2 ins <0,u,1,5>, lane 1
+ 2085879809U, // <0,6,u,6>: Cost 2 ins <0,u,2,6>, lane 1
+ 1880444214U, // <0,6,u,7>: Cost 2 vzipr <0,2,0,u>, RHS
+ 1012113409U, // <0,6,u,u>: Cost 1 ins LHS, lane 1
+ 2085683201U, // <0,7,0,0>: Cost 2 ins <0,u,0,0>, lane 1
+ 2132803584U, // <0,7,0,1>: Cost 2 ins <u,7,0,1>, lane 0
+ 2085699585U, // <0,7,0,2>: Cost 2 ins <0,u,0,2>, lane 1
+ 2085707777U, // <0,7,0,3>: Cost 2 ins <0,u,0,3>, lane 1
+ 2580516150U, // <0,7,0,4>: Cost 3 vext1 <5,0,7,0>, RHS
+ 2580516476U, // <0,7,0,5>: Cost 3 vext1 <5,0,7,0>, <5,0,7,0>
+ 2586489173U, // <0,7,0,6>: Cost 3 vext1 <6,0,7,0>, <6,0,7,0>
+ 1678925971U, // <0,7,0,7>: Cost 2 vuzpl <0,1,7,3>, <0,1,7,3>
+ 2132803584U, // <0,7,0,u>: Cost 2 ins <u,7,0,1>, lane 0
+ 1812780026U, // <0,7,1,0>: Cost 2 vzipl LHS, <7,0,1,2>
+ 2085765121U, // <0,7,1,1>: Cost 2 ins <0,u,1,1>, lane 1
+ 2632565654U, // <0,7,1,2>: Cost 3 vext2 <2,5,0,7>, <1,2,3,0>
+ 2132893696U, // <0,7,1,3>: Cost 2 ins <u,7,1,3>, lane 0
+ 1812780390U, // <0,7,1,4>: Cost 2 vzipl LHS, <7,4,5,6>
+ 2085797889U, // <0,7,1,5>: Cost 2 ins <0,u,1,5>, lane 1
+ 2586497366U, // <0,7,1,6>: Cost 3 vext1 <6,0,7,1>, <6,0,7,1>
+ 1812780652U, // <0,7,1,7>: Cost 2 vzipl LHS, <7,7,7,7>
+ 2085765121U, // <0,7,1,u>: Cost 2 ins <0,u,1,1>, lane 1
+ 2085830657U, // <0,7,2,0>: Cost 2 ins <0,u,2,0>, lane 1
+ 1182749690U, // <0,7,2,1>: Cost 2 vrev <7,0,1,2>
+ 2085847041U, // <0,7,2,2>: Cost 2 ins <0,u,2,2>, lane 1
+ 1012113409U, // <0,7,2,3>: Cost 1 ins LHS, lane 1
+ 2085863425U, // <0,7,2,4>: Cost 2 ins <0,u,2,4>, lane 1
+ 1946998118U, // <0,7,2,5>: Cost 2 vtrnl LHS, <7,4,5,6>
+ 2085879809U, // <0,7,2,6>: Cost 2 ins <0,u,2,6>, lane 1
+ 1946998380U, // <0,7,2,7>: Cost 2 vtrnl LHS, <7,7,7,7>
+ 1012113409U, // <0,7,2,u>: Cost 1 ins LHS, lane 1
+ 2989314146U, // <0,7,3,0>: Cost 3 vzipr <6,1,0,3>, <5,6,7,0>
+ 3206766592U, // <0,7,3,1>: Cost 3 ins <u,7,3,1>, lane 0
+ 3020813397U, // <0,7,3,2>: Cost 3 vtrnl <0,1,3,3>, <7,1,2,3>
+ 2085928961U, // <0,7,3,3>: Cost 2 ins <0,u,3,3>, lane 1
+ 3206791168U, // <0,7,3,4>: Cost 3 ins <u,7,3,4>, lane 0
+ 3206799360U, // <0,7,3,5>: Cost 3 ins <u,7,3,5>, lane 0
+ 2639202936U, // <0,7,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7>
+ 3206815744U, // <0,7,3,7>: Cost 3 ins <u,7,3,7>, lane 0
+ 2085928961U, // <0,7,3,u>: Cost 2 ins <0,u,3,3>, lane 1
+ 3206832128U, // <0,7,4,0>: Cost 3 ins <u,7,4,0>, lane 0
+ 2256507900U, // <0,7,4,1>: Cost 3 vrev <7,0,1,4>
+ 2256581637U, // <0,7,4,2>: Cost 3 vrev <7,0,2,4>
+ 2086002689U, // <0,7,4,3>: Cost 2 ins <0,u,4,3>, lane 1
+ 3206864896U, // <0,7,4,4>: Cost 3 ins <u,7,4,4>, lane 0
+ 2133131264U, // <0,7,4,5>: Cost 2 ins <u,7,4,5>, lane 0
+ 2086027265U, // <0,7,4,6>: Cost 2 ins <0,u,4,6>, lane 1
+ 3020887660U, // <0,7,4,7>: Cost 3 vtrnl <0,1,4,3>, <7,7,7,7>
+ 2133131264U, // <0,7,4,u>: Cost 2 ins <u,7,4,5>, lane 0
+ 2993311842U, // <0,7,5,0>: Cost 3 vzipr <6,7,0,5>, <5,6,7,0>
+ 3206914048U, // <0,7,5,1>: Cost 3 ins <u,7,5,1>, lane 0
+ 3020960853U, // <0,7,5,2>: Cost 3 vtrnl <0,1,5,3>, <7,1,2,3>
+ 2086076417U, // <0,7,5,3>: Cost 2 ins <0,u,5,3>, lane 1
+ 2256737304U, // <0,7,5,4>: Cost 3 vrev <7,0,4,5>
+ 3206946816U, // <0,7,5,5>: Cost 3 ins <u,7,5,5>, lane 0
+ 2718717377U, // <0,7,5,6>: Cost 3 vext3 <5,6,7,0>, <7,5,6,7>
+ 2133221376U, // <0,7,5,7>: Cost 2 ins <u,7,5,7>, lane 0
+ 2133221376U, // <0,7,5,u>: Cost 2 ins <u,7,5,7>, lane 0
+ 2854834274U, // <0,7,6,0>: Cost 3 vuzpr <6,0,5,7>, <5,6,7,0>
+ 2256524286U, // <0,7,6,1>: Cost 3 vrev <7,0,1,6>
+ 3206995968U, // <0,7,6,2>: Cost 3 ins <u,7,6,2>, lane 0
+ 2086150145U, // <0,7,6,3>: Cost 2 ins <0,u,6,3>, lane 1
+ 3207012352U, // <0,7,6,4>: Cost 3 ins <u,7,6,4>, lane 0
+ 2656457394U, // <0,7,6,5>: Cost 3 vext2 <6,5,0,7>, <6,5,0,7>
+ 3207028736U, // <0,7,6,6>: Cost 3 ins <u,7,6,6>, lane 0
+ 2133295104U, // <0,7,6,7>: Cost 2 ins <u,7,6,7>, lane 0
+ 2086150145U, // <0,7,6,u>: Cost 2 ins <0,u,6,3>, lane 1
+ 2992001122U, // <0,7,7,0>: Cost 3 vzipr <6,5,0,7>, <5,6,7,0>
+ 3207061504U, // <0,7,7,1>: Cost 3 ins <u,7,7,1>, lane 0
+ 2752672853U, // <0,7,7,2>: Cost 3 vuzpl <0,1,7,3>, <7,1,2,3>
+ 2086223873U, // <0,7,7,3>: Cost 2 ins <0,u,7,3>, lane 1
+ 3207086080U, // <0,7,7,4>: Cost 3 ins <u,7,7,4>, lane 0
+ 3207094272U, // <0,7,7,5>: Cost 3 ins <u,7,7,5>, lane 0
+ 2663093724U, // <0,7,7,6>: Cost 3 vext2 <7,6,0,7>, <7,6,0,7>
+ 2133368832U, // <0,7,7,7>: Cost 2 ins <u,7,7,7>, lane 0
+ 2086223873U, // <0,7,7,u>: Cost 2 ins <0,u,7,3>, lane 1
+ 1817424890U, // <0,7,u,0>: Cost 2 vzipl LHS, <7,0,1,2>
+ 1182798848U, // <0,7,u,1>: Cost 2 vrev <7,0,1,u>
+ 2085699585U, // <0,7,u,2>: Cost 2 ins <0,u,0,2>, lane 1
+ 1012113409U, // <0,7,u,3>: Cost 1 ins LHS, lane 1
+ 1817425254U, // <0,7,u,4>: Cost 2 vzipl LHS, <7,4,5,6>
+ 2085797889U, // <0,7,u,5>: Cost 2 ins <0,u,1,5>, lane 1
+ 2085879809U, // <0,7,u,6>: Cost 2 ins <0,u,2,6>, lane 1
+ 2133221376U, // <0,7,u,7>: Cost 2 ins <u,7,5,7>, lane 0
+ 1012113409U, // <0,7,u,u>: Cost 1 ins LHS, lane 1
+ 135053414U, // <0,u,0,0>: Cost 1 vdup0 LHS
+ 1007951877U, // <0,u,0,1>: Cost 1 ins LHS, lane 5
+ 605257830U, // <0,u,0,2>: Cost 1 vuzpl LHS, LHS
+ 1007910914U, // <0,u,0,3>: Cost 1 ins LHS, lane 2
+ 1678999756U, // <0,u,0,4>: Cost 2 vuzpl LHS, <0,2,4,6>
+ 2081767427U, // <0,u,0,5>: Cost 2 ins <0,2,0,u>, lane 3
+ 1947506842U, // <0,u,0,6>: Cost 2 vtrnl <0,2,0,2>, RHS
+ 2081767427U, // <0,u,0,7>: Cost 2 ins <0,2,0,u>, lane 3
+ 605257884U, // <0,u,0,u>: Cost 1 vuzpl LHS, LHS
+ 1812821715U, // <0,u,1,0>: Cost 2 vzipl LHS, <u,0,1,2>
+ 739039022U, // <0,u,1,1>: Cost 1 vzipl LHS, LHS
+ 1813264264U, // <0,u,1,2>: Cost 2 vzipl LHS, <u,2,3,3>
+ 1007910914U, // <0,u,1,3>: Cost 1 ins LHS, lane 2
+ 1812822079U, // <0,u,1,4>: Cost 2 vzipl LHS, <u,4,5,6>
+ 739039386U, // <0,u,1,5>: Cost 1 vzipl LHS, RHS
+ 1813264592U, // <0,u,1,6>: Cost 2 vzipl LHS, <u,6,3,7>
+ 1892994376U, // <0,u,1,7>: Cost 2 vzipr <2,3,0,1>, RHS
+ 739039589U, // <0,u,1,u>: Cost 1 vzipl LHS, LHS
+ 1007509507U, // <0,u,2,0>: Cost 1 ins LHS, lane 3
+ 1007509507U, // <0,u,2,1>: Cost 1 ins LHS, lane 3
+ 873256750U, // <0,u,2,2>: Cost 1 vtrnl LHS, LHS
+ 835584U, // <0,u,2,3>: Cost 0 copy LHS
+ 1007509507U, // <0,u,2,4>: Cost 1 ins LHS, lane 3
+ 1007509507U, // <0,u,2,5>: Cost 1 ins LHS, lane 3
+ 873257114U, // <0,u,2,6>: Cost 1 vtrnl LHS, RHS
+ 1007509507U, // <0,u,2,7>: Cost 1 ins LHS, lane 3
+ 835584U, // <0,u,2,u>: Cost 0 copy LHS
+ 2133680132U, // <0,u,3,0>: Cost 2 ins <u,u,3,0>, lane 4
+ 1679001750U, // <0,u,3,1>: Cost 2 vuzpl LHS, <3,0,1,2>
+ 2128388096U, // <0,u,3,2>: Cost 2 ins <u,0,3,2>, lane 0
+ 1007910914U, // <0,u,3,3>: Cost 1 ins LHS, lane 2
+ 2133712900U, // <0,u,3,4>: Cost 2 ins <u,u,3,4>, lane 4
+ 1679002114U, // <0,u,3,5>: Cost 2 vuzpl LHS, <3,4,5,6>
+ 2082340866U, // <0,u,3,6>: Cost 2 ins <0,2,u,6>, lane 2
+ 2133737476U, // <0,u,3,7>: Cost 2 ins <u,u,3,7>, lane 4
+ 1007910914U, // <0,u,3,u>: Cost 1 ins LHS, lane 2
+ 2082062339U, // <0,u,4,0>: Cost 2 ins <0,2,4,u>, lane 3
+ 1814714158U, // <0,u,4,1>: Cost 2 vzipl <0,4,1,5>, LHS
+ 1947834158U, // <0,u,4,2>: Cost 2 vtrnl <0,2,4,6>, LHS
+ 1007910914U, // <0,u,4,3>: Cost 1 ins LHS, lane 2
+ 1947828428U, // <0,u,4,4>: Cost 2 vtrnl <0,2,4,6>, <0,2,4,6>
+ 1007951877U, // <0,u,4,5>: Cost 1 ins LHS, lane 5
+ 605261110U, // <0,u,4,6>: Cost 1 vuzpl LHS, RHS
+ 2082062339U, // <0,u,4,7>: Cost 2 ins <0,2,4,u>, lane 3
+ 605261128U, // <0,u,4,u>: Cost 1 vuzpl LHS, RHS
+ 2080964610U, // <0,u,5,0>: Cost 2 ins <0,0,u,0>, lane 2
+ 2128527360U, // <0,u,5,1>: Cost 2 ins <u,0,5,1>, lane 0
+ 2080980994U, // <0,u,5,2>: Cost 2 ins <0,0,u,2>, lane 2
+ 1007910914U, // <0,u,5,3>: Cost 1 ins LHS, lane 2
+ 2081660930U, // <0,u,5,4>: Cost 2 ins <0,1,u,4>, lane 2
+ 2133868548U, // <0,u,5,5>: Cost 2 ins <u,u,5,5>, lane 4
+ 1618139290U, // <0,u,5,6>: Cost 2 vext3 <1,2,3,0>, RHS
+ 1751092534U, // <0,u,5,7>: Cost 2 vuzpr <1,0,3,u>, RHS
+ 1007910914U, // <0,u,5,u>: Cost 1 ins LHS, lane 2
+ 1679004494U, // <0,u,6,0>: Cost 2 vuzpl LHS, <6,7,0,1>
+ 2080972802U, // <0,u,6,1>: Cost 2 ins <0,0,u,1>, lane 2
+ 2128609280U, // <0,u,6,2>: Cost 2 ins <u,0,6,2>, lane 0
+ 1007910914U, // <0,u,6,3>: Cost 1 ins LHS, lane 2
+ 1679004534U, // <0,u,6,4>: Cost 2 vuzpl LHS, <6,7,4,5>
+ 2083659778U, // <0,u,6,5>: Cost 2 ins <0,4,u,5>, lane 2
+ 2133950468U, // <0,u,6,6>: Cost 2 ins <u,u,6,6>, lane 4
+ 1060216836U, // <0,u,6,7>: Cost 1 ins RHS, lane 4
+ 1007910914U, // <0,u,6,u>: Cost 1 ins LHS, lane 2
+ 2133975044U, // <0,u,7,0>: Cost 2 ins <u,u,7,0>, lane 4
+ 2080972802U, // <0,u,7,1>: Cost 2 ins <0,0,u,1>, lane 2
+ 2080980994U, // <0,u,7,2>: Cost 2 ins <0,0,u,2>, lane 2
+ 1007910914U, // <0,u,7,3>: Cost 1 ins LHS, lane 2
+ 2134007812U, // <0,u,7,4>: Cost 2 ins <u,u,7,4>, lane 4
+ 2083659778U, // <0,u,7,5>: Cost 2 ins <0,4,u,5>, lane 2
+ 2134024196U, // <0,u,7,6>: Cost 2 ins <u,u,7,6>, lane 4
+ 2134032388U, // <0,u,7,7>: Cost 2 ins <u,u,7,7>, lane 4
+ 1007910914U, // <0,u,7,u>: Cost 1 ins LHS, lane 2
+ 135053414U, // <0,u,u,0>: Cost 1 vdup0 LHS
+ 743683886U, // <0,u,u,1>: Cost 1 vzipl LHS, LHS
+ 605263662U, // <0,u,u,2>: Cost 1 vuzpl LHS, LHS
+ 835584U, // <0,u,u,3>: Cost 0 copy LHS
+ 1007509507U, // <0,u,u,4>: Cost 1 ins LHS, lane 3
+ 743684250U, // <0,u,u,5>: Cost 1 vzipl LHS, RHS
+ 605264026U, // <0,u,u,6>: Cost 1 vuzpl LHS, RHS
+ 1007509507U, // <0,u,u,7>: Cost 1 ins LHS, lane 3
+ 835584U, // <0,u,u,u>: Cost 0 copy LHS
+ 2128150528U, // <1,0,0,0>: Cost 2 ins <u,0,0,0>, lane 0
+ 1818148966U, // <1,0,0,1>: Cost 2 vzipl <1,0,3,2>, LHS
+ 2086952962U, // <1,0,0,2>: Cost 2 ins <1,0,u,2>, lane 2
+ 2619310332U, // <1,0,0,3>: Cost 3 vext2 <0,3,1,0>, <0,3,1,0>
+ 2891891026U, // <1,0,0,4>: Cost 3 vzipl <1,0,3,2>, <0,4,1,5>
+ 3165437953U, // <1,0,0,5>: Cost 3 ins <1,u,0,5>, lane 1
+ 3160154115U, // <1,0,0,6>: Cost 3 ins <1,0,0,u>, lane 3
+ 3160154115U, // <1,0,0,7>: Cost 3 ins <1,0,0,u>, lane 3
+ 1818149533U, // <1,0,0,u>: Cost 2 vzipl <1,0,3,2>, LHS
+ 1141522514U, // <1,0,1,0>: Cost 2 vrev <0,1,0,1>
+ 1818656870U, // <1,0,1,1>: Cost 2 vzipl <1,1,1,1>, LHS
+ 1616003174U, // <1,0,1,2>: Cost 2 vext3 <0,u,1,1>, LHS
+ 2091753473U, // <1,0,1,3>: Cost 2 ins <1,u,1,3>, lane 1
+ 1477070134U, // <1,0,1,4>: Cost 2 vext1 <0,1,0,1>, RHS
+ 2760770560U, // <1,0,1,5>: Cost 3 vuzpl <1,5,0,2>, <1,3,5,7>
+ 2724839566U, // <1,0,1,6>: Cost 3 vext3 <6,7,0,1>, <0,1,6,7>
+ 3165528065U, // <1,0,1,7>: Cost 3 ins <1,u,1,7>, lane 1
+ 1616003228U, // <1,0,1,u>: Cost 2 vext3 <0,u,1,1>, LHS
+ 2685690019U, // <1,0,2,0>: Cost 3 vext3 <0,2,0,1>, <0,2,0,1>
+ 1819459686U, // <1,0,2,1>: Cost 2 vzipl <1,2,3,0>, LHS
+ 2128314368U, // <1,0,2,2>: Cost 2 ins <u,0,2,2>, lane 0
+ 2087002117U, // <1,0,2,3>: Cost 2 ins <1,0,u,u>, lane 5
+ 2689745100U, // <1,0,2,4>: Cost 3 vext3 <0,u,1,1>, <0,2,4,6>
+ 2970798548U, // <1,0,2,5>: Cost 3 vzipr <3,0,1,2>, <3,4,0,5>
+ 3165593601U, // <1,0,2,6>: Cost 3 ins <1,u,2,6>, lane 1
+ 2592625730U, // <1,0,2,7>: Cost 3 vext1 <7,1,0,2>, <7,1,0,2>
+ 1819460253U, // <1,0,2,u>: Cost 2 vzipl <1,2,3,0>, LHS
+ 2014101504U, // <1,0,3,0>: Cost 2 vtrnr LHS, <0,0,0,0>
+ 2014101514U, // <1,0,3,1>: Cost 2 vtrnr LHS, <0,0,1,1>
+ 67944550U, // <1,0,3,2>: Cost 1 vrev LHS
+ 2091900929U, // <1,0,3,3>: Cost 2 ins <1,u,3,3>, lane 1
+ 2091909121U, // <1,0,3,4>: Cost 2 ins <1,u,3,4>, lane 1
+ 2086633475U, // <1,0,3,5>: Cost 2 ins <1,0,3,u>, lane 3
+ 2086633475U, // <1,0,3,6>: Cost 2 ins <1,0,3,u>, lane 3
+ 2091933697U, // <1,0,3,7>: Cost 2 ins <1,u,3,7>, lane 1
+ 68386972U, // <1,0,3,u>: Cost 1 vrev LHS
+ 2667752338U, // <1,0,4,0>: Cost 3 vext2 <u,4,1,0>, <4,0,5,1>
+ 2689745234U, // <1,0,4,1>: Cost 3 vext3 <0,u,1,1>, <0,4,1,5>
+ 2086952962U, // <1,0,4,2>: Cost 2 ins <1,0,u,2>, lane 2
+ 2819383641U, // <1,0,4,3>: Cost 3 vuzpr <0,1,2,0>, <0,4,2,3>
+ 2894569810U, // <1,0,4,4>: Cost 3 vzipl <1,4,3,5>, <0,4,1,5>
+ 2087002117U, // <1,0,4,5>: Cost 2 ins <1,0,u,u>, lane 5
+ 2758102326U, // <1,0,4,6>: Cost 3 vuzpl <1,1,0,0>, RHS
+ 2819386597U, // <1,0,4,7>: Cost 3 vuzpr <0,1,2,0>, <4,4,6,7>
+ 2086952962U, // <1,0,4,u>: Cost 2 ins <1,0,u,2>, lane 2
+ 2955558912U, // <1,0,5,0>: Cost 3 vzipr <0,4,1,5>, <0,0,0,0>
+ 1821507686U, // <1,0,5,1>: Cost 2 vzipl <1,5,3,7>, LHS
+ 1954545766U, // <1,0,5,2>: Cost 2 vtrnl <1,3,5,7>, LHS
+ 3165790209U, // <1,0,5,3>: Cost 3 ins <1,u,5,3>, lane 1
+ 1141850234U, // <1,0,5,4>: Cost 2 vrev <0,1,4,5>
+ 3165806593U, // <1,0,5,5>: Cost 3 ins <1,u,5,5>, lane 1
+ 3202310144U, // <1,0,5,6>: Cost 3 ins <u,0,5,6>, lane 0
+ 2092081153U, // <1,0,5,7>: Cost 2 ins <1,u,5,7>, lane 1
+ 1954545820U, // <1,0,5,u>: Cost 2 vtrnl <1,3,5,7>, LHS
+ 3202334720U, // <1,0,6,0>: Cost 3 ins <u,0,6,0>, lane 0
+ 2895765606U, // <1,0,6,1>: Cost 3 vzipl <1,6,1,7>, LHS
+ 2128609280U, // <1,0,6,2>: Cost 2 ins <u,0,6,2>, lane 0
+ 2819383803U, // <1,0,6,3>: Cost 3 vuzpr <0,1,2,0>, <0,6,2,3>
+ 2896060754U, // <1,0,6,4>: Cost 3 vzipl <1,6,5,7>, <0,4,1,5>
+ 2215673988U, // <1,0,6,5>: Cost 3 vrev <0,1,5,6>
+ 3165888513U, // <1,0,6,6>: Cost 3 ins <1,u,6,6>, lane 1
+ 2087002117U, // <1,0,6,7>: Cost 2 ins <1,0,u,u>, lane 5
+ 2128609280U, // <1,0,6,u>: Cost 2 ins <u,0,6,2>, lane 0
+ 2659128312U, // <1,0,7,0>: Cost 3 vext2 <7,0,1,0>, <7,0,1,0>
+ 2974156454U, // <1,0,7,1>: Cost 3 vzipr <3,5,1,7>, <2,3,0,1>
+ 2086952962U, // <1,0,7,2>: Cost 2 ins <1,0,u,2>, lane 2
+ 2861265024U, // <1,0,7,3>: Cost 3 vuzpr <7,1,3,0>, <5,7,1,3>
+ 3202441216U, // <1,0,7,4>: Cost 3 ins <u,0,7,4>, lane 0
+ 3165954049U, // <1,0,7,5>: Cost 3 ins <1,u,7,5>, lane 1
+ 1142014094U, // <1,0,7,6>: Cost 2 vrev <0,1,6,7>
+ 3165970433U, // <1,0,7,7>: Cost 3 ins <1,u,7,7>, lane 1
+ 2086952962U, // <1,0,7,u>: Cost 2 ins <1,0,u,2>, lane 2
+ 2014142464U, // <1,0,u,0>: Cost 2 vtrnr LHS, <0,0,0,0>
+ 2014142474U, // <1,0,u,1>: Cost 2 vtrnr LHS, <0,0,1,1>
+ 67985515U, // <1,0,u,2>: Cost 1 vrev LHS
+ 2091753473U, // <1,0,u,3>: Cost 2 ins <1,u,1,3>, lane 1
+ 2091909121U, // <1,0,u,4>: Cost 2 ins <1,u,3,4>, lane 1
+ 2086633475U, // <1,0,u,5>: Cost 2 ins <1,0,3,u>, lane 3
+ 2086633475U, // <1,0,u,6>: Cost 2 ins <1,0,3,u>, lane 3
+ 2091933697U, // <1,0,u,7>: Cost 2 ins <1,u,3,7>, lane 1
+ 68427937U, // <1,0,u,u>: Cost 1 vrev LHS
+ 1818149622U, // <1,1,0,0>: Cost 2 vzipl <1,0,3,2>, <1,0,3,2>
+ 1548894310U, // <1,1,0,1>: Cost 2 vext2 <0,u,1,1>, LHS
+ 1684439142U, // <1,1,0,2>: Cost 2 vuzpl <1,1,1,1>, LHS
+ 2087624706U, // <1,1,0,3>: Cost 2 ins <1,1,u,3>, lane 2
+ 2622636370U, // <1,1,0,4>: Cost 3 vext2 <0,u,1,1>, <0,4,1,5>
+ 2891891856U, // <1,1,0,5>: Cost 3 vzipl <1,0,3,2>, <1,5,3,7>
+ 3161391106U, // <1,1,0,6>: Cost 3 ins <1,1,u,6>, lane 2
+ 3161399298U, // <1,1,0,7>: Cost 3 ins <1,1,u,7>, lane 2
+ 1548894866U, // <1,1,0,u>: Cost 2 vext2 <0,u,1,1>, <0,u,1,1>
+ 1483112550U, // <1,1,1,0>: Cost 2 vext1 <1,1,1,1>, LHS
+ 202162278U, // <1,1,1,1>: Cost 1 vdup1 LHS
+ 2087149571U, // <1,1,1,2>: Cost 2 ins <1,1,1,u>, lane 3
+ 1751548006U, // <1,1,1,3>: Cost 2 vuzpr <1,1,1,1>, LHS
+ 1483115830U, // <1,1,1,4>: Cost 2 vext1 <1,1,1,1>, RHS
+ 2087149571U, // <1,1,1,5>: Cost 2 ins <1,1,1,u>, lane 3
+ 2087149571U, // <1,1,1,6>: Cost 2 ins <1,1,1,u>, lane 3
+ 2087149571U, // <1,1,1,7>: Cost 2 ins <1,1,1,u>, lane 3
+ 202162278U, // <1,1,1,u>: Cost 1 vdup1 LHS
+ 2128961536U, // <1,1,2,0>: Cost 2 ins <u,1,2,0>, lane 0
+ 2128969728U, // <1,1,2,1>: Cost 2 ins <u,1,2,1>, lane 0
+ 1819460502U, // <1,1,2,2>: Cost 2 vzipl <1,2,3,0>, <1,2,3,0>
+ 1055244288U, // <1,1,2,3>: Cost 1 ins LHS, lane 0
+ 2128994304U, // <1,1,2,4>: Cost 2 ins <u,1,2,4>, lane 0
+ 2129002496U, // <1,1,2,5>: Cost 2 ins <u,1,2,5>, lane 0
+ 2129010688U, // <1,1,2,6>: Cost 2 ins <u,1,2,6>, lane 0
+ 2129018880U, // <1,1,2,7>: Cost 2 ins <u,1,2,7>, lane 0
+ 1055244288U, // <1,1,2,u>: Cost 1 ins LHS, lane 0
+ 2091876353U, // <1,1,3,0>: Cost 2 ins <1,u,3,0>, lane 1
+ 2014102324U, // <1,1,3,1>: Cost 2 vtrnr LHS, <1,1,1,1>
+ 2091892737U, // <1,1,3,2>: Cost 2 ins <1,u,3,2>, lane 1
+ 940359782U, // <1,1,3,3>: Cost 1 vtrnr LHS, LHS
+ 2091909121U, // <1,1,3,4>: Cost 2 ins <1,u,3,4>, lane 1
+ 2087297027U, // <1,1,3,5>: Cost 2 ins <1,1,3,u>, lane 3
+ 2087297027U, // <1,1,3,6>: Cost 2 ins <1,1,3,u>, lane 3
+ 2091933697U, // <1,1,3,7>: Cost 2 ins <1,u,3,7>, lane 1
+ 940359787U, // <1,1,3,u>: Cost 1 vtrnr LHS, LHS
+ 2556878950U, // <1,1,4,0>: Cost 3 vext1 <1,1,1,4>, LHS
+ 2087608322U, // <1,1,4,1>: Cost 2 ins <1,1,u,1>, lane 2
+ 2894496662U, // <1,1,4,2>: Cost 3 vzipl <1,4,2,5>, <1,2,3,0>
+ 2087624706U, // <1,1,4,3>: Cost 2 ins <1,1,u,3>, lane 2
+ 2014109799U, // <1,1,4,4>: Cost 2 vtrnr <0,1,2,4>, <0,1,2,4>
+ 1548897590U, // <1,1,4,5>: Cost 2 vext2 <0,u,1,1>, RHS
+ 1684442422U, // <1,1,4,6>: Cost 2 vuzpl <1,1,1,1>, RHS
+ 3161399298U, // <1,1,4,7>: Cost 3 ins <1,1,u,7>, lane 2
+ 1548897833U, // <1,1,4,u>: Cost 2 vext2 <0,u,1,1>, RHS
+ 3028288624U, // <1,1,5,0>: Cost 3 vtrnl <1,3,5,7>, <1,5,0,2>
+ 2087608322U, // <1,1,5,1>: Cost 2 ins <1,1,u,1>, lane 2
+ 2955561110U, // <1,1,5,2>: Cost 3 vzipr <0,4,1,5>, <3,0,1,2>
+ 2087624706U, // <1,1,5,3>: Cost 2 ins <1,1,u,3>, lane 2
+ 2955558925U, // <1,1,5,4>: Cost 3 vzipr <0,4,1,5>, <0,0,1,4>
+ 1881817426U, // <1,1,5,5>: Cost 2 vzipr <0,4,1,5>, <0,4,1,5>
+ 2670415970U, // <1,1,5,6>: Cost 3 vext2 <u,u,1,1>, <5,6,7,0>
+ 1751551286U, // <1,1,5,7>: Cost 2 vuzpr <1,1,1,1>, RHS
+ 1751551287U, // <1,1,5,u>: Cost 2 vuzpr <1,1,1,1>, RHS
+ 3165839361U, // <1,1,6,0>: Cost 3 ins <1,u,6,0>, lane 1
+ 2087608322U, // <1,1,6,1>: Cost 2 ins <1,1,u,1>, lane 2
+ 2973485206U, // <1,1,6,2>: Cost 3 vzipr <3,4,1,6>, <3,0,1,2>
+ 2087624706U, // <1,1,6,3>: Cost 2 ins <1,1,u,3>, lane 2
+ 2221572948U, // <1,1,6,4>: Cost 3 vrev <1,1,4,6>
+ 2955567442U, // <1,1,6,5>: Cost 3 vzipr <0,4,1,6>, <0,4,1,5>
+ 2014126185U, // <1,1,6,6>: Cost 2 vtrnr <0,1,2,6>, <0,1,2,6>
+ 2087665669U, // <1,1,6,7>: Cost 2 ins <1,1,u,u>, lane 5
+ 2087624706U, // <1,1,6,u>: Cost 2 ins <1,1,u,3>, lane 2
+ 2670416890U, // <1,1,7,0>: Cost 3 vext2 <u,u,1,1>, <7,0,1,2>
+ 2087608322U, // <1,1,7,1>: Cost 2 ins <1,1,u,1>, lane 2
+ 3203088384U, // <1,1,7,2>: Cost 3 ins <u,1,7,2>, lane 0
+ 2129354752U, // <1,1,7,3>: Cost 2 ins <u,1,7,3>, lane 0
+ 2670417254U, // <1,1,7,4>: Cost 3 vext2 <u,u,1,1>, <7,4,5,6>
+ 2221654878U, // <1,1,7,5>: Cost 3 vrev <1,1,5,7>
+ 3161391106U, // <1,1,7,6>: Cost 3 ins <1,1,u,6>, lane 2
+ 2014134378U, // <1,1,7,7>: Cost 2 vtrnr <0,1,2,7>, <0,1,2,7>
+ 2129354752U, // <1,1,7,u>: Cost 2 ins <u,1,7,3>, lane 0
+ 1818149622U, // <1,1,u,0>: Cost 2 vzipl <1,0,3,2>, <1,0,3,2>
+ 202162278U, // <1,1,u,1>: Cost 1 vdup1 LHS
+ 1684444974U, // <1,1,u,2>: Cost 2 vuzpl <1,1,1,1>, LHS
+ 940400742U, // <1,1,u,3>: Cost 1 vtrnr LHS, LHS
+ 1483115830U, // <1,1,u,4>: Cost 2 vext1 <1,1,1,1>, RHS
+ 1548900506U, // <1,1,u,5>: Cost 2 vext2 <0,u,1,1>, RHS
+ 1684445338U, // <1,1,u,6>: Cost 2 vuzpl <1,1,1,1>, RHS
+ 1751551529U, // <1,1,u,7>: Cost 2 vuzpr <1,1,1,1>, RHS
+ 940400747U, // <1,1,u,u>: Cost 1 vtrnr LHS, LHS
+ 2088263682U, // <1,2,0,0>: Cost 2 ins <1,2,u,0>, lane 2
+ 1561509990U, // <1,2,0,1>: Cost 2 vext2 <3,0,1,2>, LHS
+ 2129494016U, // <1,2,0,2>: Cost 2 ins <u,2,0,2>, lane 0
+ 2954854502U, // <1,2,0,3>: Cost 3 vzipr <0,3,1,0>, LHS
+ 2088296450U, // <1,2,0,4>: Cost 2 ins <1,2,u,4>, lane 2
+ 3165437953U, // <1,2,0,5>: Cost 3 ins <1,u,0,5>, lane 1
+ 2891892666U, // <1,2,0,6>: Cost 3 vzipl <1,0,3,2>, <2,6,3,7>
+ 2659140170U, // <1,2,0,7>: Cost 3 vext2 <7,0,1,2>, <0,7,2,1>
+ 1561510557U, // <1,2,0,u>: Cost 2 vext2 <3,0,1,2>, LHS
+ 2088263682U, // <1,2,1,0>: Cost 2 ins <1,2,u,0>, lane 2
+ 2091737089U, // <1,2,1,1>: Cost 2 ins <1,u,1,1>, lane 1
+ 1745657957U, // <1,2,1,2>: Cost 2 vuzpr <0,1,2,2>, <0,1,2,2>
+ 1884438630U, // <1,2,1,3>: Cost 2 vzipr <0,u,1,1>, LHS
+ 2088296450U, // <1,2,1,4>: Cost 2 ins <1,2,u,4>, lane 2
+ 2635252880U, // <1,2,1,5>: Cost 3 vext2 <3,0,1,2>, <1,5,3,7>
+ 2958180700U, // <1,2,1,6>: Cost 3 vzipr <0,u,1,1>, <0,4,2,6>
+ 3165528065U, // <1,2,1,7>: Cost 3 ins <1,u,1,7>, lane 1
+ 1884438635U, // <1,2,1,u>: Cost 2 vzipr <0,u,1,1>, LHS
+ 2088263682U, // <1,2,2,0>: Cost 2 ins <1,2,u,0>, lane 2
+ 2893235754U, // <1,2,2,1>: Cost 3 vzipl <1,2,3,4>, <2,1,4,3>
+ 2129641472U, // <1,2,2,2>: Cost 2 ins <u,2,2,2>, lane 0
+ 1897054310U, // <1,2,2,3>: Cost 2 vzipr <3,0,1,2>, LHS
+ 2088296450U, // <1,2,2,4>: Cost 2 ins <1,2,u,4>, lane 2
+ 3165585409U, // <1,2,2,5>: Cost 3 ins <1,u,2,5>, lane 1
+ 2893203386U, // <1,2,2,6>: Cost 3 vzipl <1,2,3,0>, <2,6,3,7>
+ 2994684010U, // <1,2,2,7>: Cost 3 vzipr <7,0,1,2>, <0,1,2,7>
+ 1897054315U, // <1,2,2,u>: Cost 2 vzipr <3,0,1,2>, LHS
+ 403488870U, // <1,2,3,0>: Cost 1 vext1 LHS, LHS
+ 1477231350U, // <1,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+ 1477232232U, // <1,2,3,2>: Cost 2 vext1 LHS, <2,2,2,2>
+ 1477233052U, // <1,2,3,3>: Cost 2 vext1 LHS, <3,3,3,3>
+ 403492150U, // <1,2,3,4>: Cost 1 vext1 LHS, RHS
+ 1525010128U, // <1,2,3,5>: Cost 2 vext1 LHS, <5,1,7,3>
+ 2014101708U, // <1,2,3,6>: Cost 2 vtrnr LHS, <0,2,4,6>
+ 1525011450U, // <1,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2>
+ 403494702U, // <1,2,3,u>: Cost 1 vext1 LHS, LHS
+ 2088263682U, // <1,2,4,0>: Cost 2 ins <1,2,u,0>, lane 2
+ 3162013698U, // <1,2,4,1>: Cost 3 ins <1,2,u,1>, lane 2
+ 3162021890U, // <1,2,4,2>: Cost 3 ins <1,2,u,2>, lane 2
+ 2954887270U, // <1,2,4,3>: Cost 3 vzipr <0,3,1,4>, LHS
+ 2088296450U, // <1,2,4,4>: Cost 2 ins <1,2,u,4>, lane 2
+ 1561513270U, // <1,2,4,5>: Cost 2 vext2 <3,0,1,2>, RHS
+ 2129821696U, // <1,2,4,6>: Cost 2 ins <u,2,4,6>, lane 0
+ 2659143112U, // <1,2,4,7>: Cost 3 vext2 <7,0,1,2>, <4,7,5,0>
+ 1561513513U, // <1,2,4,u>: Cost 2 vext2 <3,0,1,2>, RHS
+ 2088263682U, // <1,2,5,0>: Cost 2 ins <1,2,u,0>, lane 2
+ 2550989824U, // <1,2,5,1>: Cost 3 vext1 <0,1,2,5>, <1,3,5,7>
+ 2955558932U, // <1,2,5,2>: Cost 3 vzipr <0,4,1,5>, <0,0,2,2>
+ 1881817190U, // <1,2,5,3>: Cost 2 vzipr <0,4,1,5>, LHS
+ 2088296450U, // <1,2,5,4>: Cost 2 ins <1,2,u,4>, lane 2
+ 2659143684U, // <1,2,5,5>: Cost 3 vext2 <7,0,1,2>, <5,5,5,5>
+ 2955559260U, // <1,2,5,6>: Cost 3 vzipr <0,4,1,5>, <0,4,2,6>
+ 2092081153U, // <1,2,5,7>: Cost 2 ins <1,u,5,7>, lane 1
+ 1881817195U, // <1,2,5,u>: Cost 2 vzipr <0,4,1,5>, LHS
+ 2088263682U, // <1,2,6,0>: Cost 2 ins <1,2,u,0>, lane 2
+ 3162013698U, // <1,2,6,1>: Cost 3 ins <1,2,u,1>, lane 2
+ 2659144186U, // <1,2,6,2>: Cost 3 vext2 <7,0,1,2>, <6,2,7,3>
+ 2954240102U, // <1,2,6,3>: Cost 3 vzipr <0,2,1,6>, LHS
+ 2088296450U, // <1,2,6,4>: Cost 2 ins <1,2,u,4>, lane 2
+ 3162046466U, // <1,2,6,5>: Cost 3 ins <1,2,u,5>, lane 2
+ 2895914938U, // <1,2,6,6>: Cost 3 vzipl <1,6,3,7>, <2,6,3,7>
+ 2088329221U, // <1,2,6,7>: Cost 2 ins <1,2,u,u>, lane 5
+ 2088263682U, // <1,2,6,u>: Cost 2 ins <1,2,u,0>, lane 2
+ 1585402874U, // <1,2,7,0>: Cost 2 vext2 <7,0,1,2>, <7,0,1,2>
+ 3203743744U, // <1,2,7,1>: Cost 3 ins <u,2,7,1>, lane 0
+ 3203751936U, // <1,2,7,2>: Cost 3 ins <u,2,7,2>, lane 0
+ 2130018304U, // <1,2,7,3>: Cost 2 ins <u,2,7,3>, lane 0
+ 2088296450U, // <1,2,7,4>: Cost 2 ins <1,2,u,4>, lane 2
+ 3203776512U, // <1,2,7,5>: Cost 3 ins <u,2,7,5>, lane 0
+ 3203784704U, // <1,2,7,6>: Cost 3 ins <u,2,7,6>, lane 0
+ 2659145255U, // <1,2,7,7>: Cost 3 vext2 <7,0,1,2>, <7,7,0,1>
+ 1590711938U, // <1,2,7,u>: Cost 2 vext2 <7,u,1,2>, <7,u,1,2>
+ 403529835U, // <1,2,u,0>: Cost 1 vext1 LHS, LHS
+ 1477272310U, // <1,2,u,1>: Cost 2 vext1 LHS, <1,0,3,2>
+ 1477273192U, // <1,2,u,2>: Cost 2 vext1 LHS, <2,2,2,2>
+ 1477273750U, // <1,2,u,3>: Cost 2 vext1 LHS, <3,0,1,2>
+ 403533110U, // <1,2,u,4>: Cost 1 vext1 LHS, RHS
+ 1561516186U, // <1,2,u,5>: Cost 2 vext2 <3,0,1,2>, RHS
+ 2014142668U, // <1,2,u,6>: Cost 2 vtrnr LHS, <0,2,4,6>
+ 1525052410U, // <1,2,u,7>: Cost 2 vext1 LHS, <7,0,1,2>
+ 403535662U, // <1,2,u,u>: Cost 1 vext1 LHS, LHS
+ 1745666048U, // <1,3,0,0>: Cost 2 vuzpr LHS, <0,0,0,0>
+ 1746108426U, // <1,3,0,1>: Cost 2 vuzpr LHS, <0,0,1,1>
+ 1745666806U, // <1,3,0,2>: Cost 2 vuzpr LHS, <1,0,3,2>
+ 2088951810U, // <1,3,0,3>: Cost 2 ins <1,3,u,3>, lane 2
+ 2819850253U, // <1,3,0,4>: Cost 3 vuzpr LHS, <0,0,1,4>
+ 2758984055U, // <1,3,0,5>: Cost 3 vuzpl <1,2,3,0>, <0,4,5,6>
+ 2867183658U, // <1,3,0,6>: Cost 3 vuzpr LHS, <0,0,4,6>
+ 2088984578U, // <1,3,0,7>: Cost 2 ins <1,3,u,7>, lane 2
+ 1745668252U, // <1,3,0,u>: Cost 2 vuzpr LHS, <3,0,1,u>
+ 2088476675U, // <1,3,1,0>: Cost 2 ins <1,3,1,u>, lane 3
+ 1745666868U, // <1,3,1,1>: Cost 2 vuzpr LHS, <1,1,1,1>
+ 2088476675U, // <1,3,1,2>: Cost 2 ins <1,3,1,u>, lane 3
+ 671924326U, // <1,3,1,3>: Cost 1 vuzpr LHS, LHS
+ 2088476675U, // <1,3,1,4>: Cost 2 ins <1,3,1,u>, lane 3
+ 2088476675U, // <1,3,1,5>: Cost 2 ins <1,3,1,u>, lane 3
+ 2088476675U, // <1,3,1,6>: Cost 2 ins <1,3,1,u>, lane 3
+ 2088984578U, // <1,3,1,7>: Cost 2 ins <1,3,u,7>, lane 2
+ 671924331U, // <1,3,1,u>: Cost 1 vuzpr LHS, LHS
+ 1745666966U, // <1,3,2,0>: Cost 2 vuzpr LHS, <1,2,3,0>
+ 2819408044U, // <1,3,2,1>: Cost 3 vuzpr LHS, <0,2,1,1>
+ 1745666212U, // <1,3,2,2>: Cost 2 vuzpr LHS, <0,2,0,2>
+ 1746110066U, // <1,3,2,3>: Cost 2 vuzpr LHS, <2,2,3,3>
+ 1745666970U, // <1,3,2,4>: Cost 2 vuzpr LHS, <1,2,3,4>
+ 2819408066U, // <1,3,2,5>: Cost 3 vuzpr LHS, <0,2,3,5>
+ 1745666252U, // <1,3,2,6>: Cost 2 vuzpr LHS, <0,2,4,6>
+ 2088984578U, // <1,3,2,7>: Cost 2 ins <1,3,u,7>, lane 2
+ 1745666218U, // <1,3,2,u>: Cost 2 vuzpr LHS, <0,2,0,u>
+ 1483276390U, // <1,3,3,0>: Cost 2 vext1 <1,1,3,3>, LHS
+ 1745667750U, // <1,3,3,1>: Cost 2 vuzpr LHS, <2,3,0,1>
+ 2091892737U, // <1,3,3,2>: Cost 2 ins <1,u,3,2>, lane 1
+ 1745667032U, // <1,3,3,3>: Cost 2 vuzpr LHS, <1,3,1,3>
+ 1483279670U, // <1,3,3,4>: Cost 2 vext1 <1,1,3,3>, RHS
+ 1745667790U, // <1,3,3,5>: Cost 2 vuzpr LHS, <2,3,4,5>
+ 2819408868U, // <1,3,3,6>: Cost 3 vuzpr LHS, <1,3,2,6>
+ 2014102528U, // <1,3,3,7>: Cost 2 vtrnr LHS, <1,3,5,7>
+ 1745667037U, // <1,3,3,u>: Cost 2 vuzpr LHS, <1,3,1,u>
+ 2568970342U, // <1,3,4,0>: Cost 3 vext1 <3,1,3,4>, LHS
+ 2759019375U, // <1,3,4,1>: Cost 3 vuzpl <1,2,3,4>, <4,0,1,2>
+ 2759019466U, // <1,3,4,2>: Cost 3 vuzpl <1,2,3,4>, <4,1,2,3>
+ 2088951810U, // <1,3,4,3>: Cost 2 ins <1,3,u,3>, lane 2
+ 1793445072U, // <1,3,4,4>: Cost 2 vuzpr LHS, <4,4,4,4>
+ 1746108754U, // <1,3,4,5>: Cost 2 vuzpr LHS, <0,4,1,5>
+ 1745668610U, // <1,3,4,6>: Cost 2 vuzpr LHS, <3,4,5,6>
+ 2088984578U, // <1,3,4,7>: Cost 2 ins <1,3,u,7>, lane 2
+ 1745668612U, // <1,3,4,u>: Cost 2 vuzpr LHS, <3,4,5,u>
+ 2088771587U, // <1,3,5,0>: Cost 2 ins <1,3,5,u>, lane 3
+ 2088771587U, // <1,3,5,1>: Cost 2 ins <1,3,5,u>, lane 3
+ 2088771587U, // <1,3,5,2>: Cost 2 ins <1,3,5,u>, lane 3
+ 2088951810U, // <1,3,5,3>: Cost 2 ins <1,3,u,3>, lane 2
+ 2088771587U, // <1,3,5,4>: Cost 2 ins <1,3,5,u>, lane 3
+ 1793445892U, // <1,3,5,5>: Cost 2 vuzpr LHS, <5,5,5,5>
+ 2088771587U, // <1,3,5,6>: Cost 2 ins <1,3,5,u>, lane 3
+ 671927606U, // <1,3,5,7>: Cost 1 vuzpr LHS, RHS
+ 671927607U, // <1,3,5,u>: Cost 1 vuzpr LHS, RHS
+ 1793445986U, // <1,3,6,0>: Cost 2 vuzpr LHS, <5,6,7,0>
+ 2867185561U, // <1,3,6,1>: Cost 3 vuzpr LHS, <2,6,0,1>
+ 1793445196U, // <1,3,6,2>: Cost 2 vuzpr LHS, <4,6,0,2>
+ 2088951810U, // <1,3,6,3>: Cost 2 ins <1,3,u,3>, lane 2
+ 1793445990U, // <1,3,6,4>: Cost 2 vuzpr LHS, <5,6,7,4>
+ 2849642738U, // <1,3,6,5>: Cost 3 vuzpr <5,1,7,3>, <u,6,7,5>
+ 1793445236U, // <1,3,6,6>: Cost 2 vuzpr LHS, <4,6,4,6>
+ 1746110394U, // <1,3,6,7>: Cost 2 vuzpr LHS, <2,6,3,7>
+ 1746110395U, // <1,3,6,u>: Cost 2 vuzpr LHS, <2,6,3,u>
+ 2706926275U, // <1,3,7,0>: Cost 3 vext3 <3,7,0,1>, <3,7,0,1>
+ 1793446734U, // <1,3,7,1>: Cost 2 vuzpr LHS, <6,7,0,1>
+ 2867187830U, // <1,3,7,2>: Cost 3 vuzpr LHS, <5,7,0,2>
+ 1793446016U, // <1,3,7,3>: Cost 2 vuzpr LHS, <5,7,1,3>
+ 2849637679U, // <1,3,7,4>: Cost 3 vuzpr <5,1,7,3>, <1,7,3,4>
+ 1793446774U, // <1,3,7,5>: Cost 2 vuzpr LHS, <6,7,4,5>
+ 2867185674U, // <1,3,7,6>: Cost 3 vuzpr LHS, <2,7,3,6>
+ 1793446056U, // <1,3,7,7>: Cost 2 vuzpr LHS, <5,7,5,7>
+ 1793446021U, // <1,3,7,u>: Cost 2 vuzpr LHS, <5,7,1,u>
+ 1746109820U, // <1,3,u,0>: Cost 2 vuzpr LHS, <1,u,3,0>
+ 2014144166U, // <1,3,u,1>: Cost 2 vtrnr LHS, <2,3,0,1>
+ 1745668894U, // <1,3,u,2>: Cost 2 vuzpr LHS, <3,u,1,2>
+ 671924893U, // <1,3,u,3>: Cost 1 vuzpr LHS, LHS
+ 1746109824U, // <1,3,u,4>: Cost 2 vuzpr LHS, <1,u,3,4>
+ 2014144206U, // <1,3,u,5>: Cost 2 vtrnr LHS, <2,3,4,5>
+ 1745668934U, // <1,3,u,6>: Cost 2 vuzpr LHS, <3,u,5,6>
+ 671927849U, // <1,3,u,7>: Cost 1 vuzpr LHS, RHS
+ 671924898U, // <1,3,u,u>: Cost 1 vuzpr LHS, LHS
+ 3165396993U, // <1,4,0,0>: Cost 3 ins <1,u,0,0>, lane 1
+ 2619342950U, // <1,4,0,1>: Cost 3 vext2 <0,3,1,4>, LHS
+ 2758434918U, // <1,4,0,2>: Cost 3 vuzpl <1,1,4,5>, LHS
+ 2619343104U, // <1,4,0,3>: Cost 3 vext2 <0,3,1,4>, <0,3,1,4>
+ 3165429761U, // <1,4,0,4>: Cost 3 ins <1,u,0,4>, lane 1
+ 1818152246U, // <1,4,0,5>: Cost 2 vzipl <1,0,3,2>, RHS
+ 3026537782U, // <1,4,0,6>: Cost 3 vtrnl <1,1,0,0>, RHS
+ 3162808323U, // <1,4,0,7>: Cost 3 ins <1,4,0,u>, lane 3
+ 1818152489U, // <1,4,0,u>: Cost 2 vzipl <1,0,3,2>, RHS
+ 3204620288U, // <1,4,1,0>: Cost 3 ins <u,4,1,0>, lane 0
+ 2091737089U, // <1,4,1,1>: Cost 2 ins <1,u,1,1>, lane 1
+ 3204636672U, // <1,4,1,2>: Cost 3 ins <u,4,1,2>, lane 0
+ 2091753473U, // <1,4,1,3>: Cost 2 ins <1,u,1,3>, lane 1
+ 1745674343U, // <1,4,1,4>: Cost 2 vuzpr <0,1,2,4>, <0,1,2,4>
+ 1818660150U, // <1,4,1,5>: Cost 2 vzipl <1,1,1,1>, RHS
+ 1952877878U, // <1,4,1,6>: Cost 2 vtrnl <1,1,1,1>, RHS
+ 3165528065U, // <1,4,1,7>: Cost 3 ins <1,u,1,7>, lane 1
+ 1818660393U, // <1,4,1,u>: Cost 2 vzipl <1,1,1,1>, RHS
+ 2893237103U, // <1,4,2,0>: Cost 3 vzipl <1,2,3,4>, <4,0,1,2>
+ 2893237194U, // <1,4,2,1>: Cost 3 vzipl <1,2,3,4>, <4,1,2,3>
+ 3165560833U, // <1,4,2,2>: Cost 3 ins <1,u,2,2>, lane 1
+ 2130976768U, // <1,4,2,3>: Cost 2 ins <u,4,2,3>, lane 0
+ 2893237467U, // <1,4,2,4>: Cost 3 vzipl <1,2,3,4>, <4,4,5,6>
+ 1819462966U, // <1,4,2,5>: Cost 2 vzipl <1,2,3,0>, RHS
+ 2131001344U, // <1,4,2,6>: Cost 2 ins <u,4,2,6>, lane 0
+ 3165601793U, // <1,4,2,7>: Cost 3 ins <1,u,2,7>, lane 1
+ 1819463209U, // <1,4,2,u>: Cost 2 vzipl <1,2,3,0>, RHS
+ 2091876353U, // <1,4,3,0>: Cost 2 ins <1,u,3,0>, lane 1
+ 3027454831U, // <1,4,3,1>: Cost 3 vtrnl <1,2,3,4>, <4,0,1,2>
+ 2091892737U, // <1,4,3,2>: Cost 2 ins <1,u,3,2>, lane 1
+ 2091900929U, // <1,4,3,3>: Cost 2 ins <1,u,3,3>, lane 1
+ 2061880528U, // <1,4,3,4>: Cost 2 vtrnr LHS, <4,4,4,4>
+ 2014101842U, // <1,4,3,5>: Cost 2 vtrnr LHS, <0,4,1,5>
+ 2014101852U, // <1,4,3,6>: Cost 2 vtrnr LHS, <0,4,2,6>
+ 2091933697U, // <1,4,3,7>: Cost 2 ins <1,u,3,7>, lane 1
+ 2014101845U, // <1,4,3,u>: Cost 2 vtrnr LHS, <0,4,1,u>
+ 2557100134U, // <1,4,4,0>: Cost 3 vext1 <1,1,4,4>, LHS
+ 2557100882U, // <1,4,4,1>: Cost 3 vext1 <1,1,4,4>, <1,1,4,4>
+ 3165708289U, // <1,4,4,2>: Cost 3 ins <1,u,4,2>, lane 1
+ 2819416409U, // <1,4,4,3>: Cost 3 vuzpr <0,1,2,4>, <0,4,2,3>
+ 2131132416U, // <1,4,4,4>: Cost 2 ins <u,4,4,4>, lane 0
+ 2619346230U, // <1,4,4,5>: Cost 3 vext2 <0,3,1,4>, RHS
+ 2758438198U, // <1,4,4,6>: Cost 3 vuzpl <1,1,4,5>, RHS
+ 2819419365U, // <1,4,4,7>: Cost 3 vuzpr <0,1,2,4>, <4,4,6,7>
+ 2131132416U, // <1,4,4,u>: Cost 2 ins <u,4,4,4>, lane 0
+ 1477394554U, // <1,4,5,0>: Cost 2 vext1 <0,1,4,5>, <0,1,4,5>
+ 2955558949U, // <1,4,5,1>: Cost 3 vzipr <0,4,1,5>, <0,0,4,1>
+ 3204931584U, // <1,4,5,2>: Cost 3 ins <u,4,5,2>, lane 0
+ 3165790209U, // <1,4,5,3>: Cost 3 ins <1,u,5,3>, lane 1
+ 1477397814U, // <1,4,5,4>: Cost 2 vext1 <0,1,4,5>, RHS
+ 1821510966U, // <1,4,5,5>: Cost 2 vzipl <1,5,3,7>, RHS
+ 1616006454U, // <1,4,5,6>: Cost 2 vext3 <0,u,1,1>, RHS
+ 2092081153U, // <1,4,5,7>: Cost 2 ins <1,u,5,7>, lane 1
+ 1616006472U, // <1,4,5,u>: Cost 2 vext3 <0,u,1,1>, RHS
+ 2557116518U, // <1,4,6,0>: Cost 3 vext1 <1,1,4,6>, LHS
+ 2557117268U, // <1,4,6,1>: Cost 3 vext1 <1,1,4,6>, <1,1,4,6>
+ 3165855745U, // <1,4,6,2>: Cost 3 ins <1,u,6,2>, lane 1
+ 2569062662U, // <1,4,6,3>: Cost 3 vext1 <3,1,4,6>, <3,1,4,6>
+ 2557119798U, // <1,4,6,4>: Cost 3 vext1 <1,1,4,6>, RHS
+ 2895768886U, // <1,4,6,5>: Cost 3 vzipl <1,6,1,7>, RHS
+ 2131296256U, // <1,4,6,6>: Cost 2 ins <u,4,6,6>, lane 0
+ 2131304448U, // <1,4,6,7>: Cost 2 ins <u,4,6,7>, lane 0
+ 2131296256U, // <1,4,6,u>: Cost 2 ins <u,4,6,6>, lane 0
+ 2659161084U, // <1,4,7,0>: Cost 3 vext2 <7,0,1,4>, <7,0,1,4>
+ 3165921281U, // <1,4,7,1>: Cost 3 ins <1,u,7,1>, lane 1
+ 3205079040U, // <1,4,7,2>: Cost 3 ins <u,4,7,2>, lane 0
+ 2861297792U, // <1,4,7,3>: Cost 3 vuzpr <7,1,3,4>, <5,7,1,3>
+ 2669778278U, // <1,4,7,4>: Cost 3 vext2 <u,7,1,4>, <7,4,5,6>
+ 3205103616U, // <1,4,7,5>: Cost 3 ins <u,4,7,5>, lane 0
+ 2131369984U, // <1,4,7,6>: Cost 2 ins <u,4,7,6>, lane 0
+ 3165970433U, // <1,4,7,7>: Cost 3 ins <1,u,7,7>, lane 1
+ 2131369984U, // <1,4,7,u>: Cost 2 ins <u,4,7,6>, lane 0
+ 2091876353U, // <1,4,u,0>: Cost 2 ins <1,u,3,0>, lane 1
+ 2091737089U, // <1,4,u,1>: Cost 2 ins <1,u,1,1>, lane 1
+ 2091892737U, // <1,4,u,2>: Cost 2 ins <1,u,3,2>, lane 1
+ 2091753473U, // <1,4,u,3>: Cost 2 ins <1,u,1,3>, lane 1
+ 2061921488U, // <1,4,u,4>: Cost 2 vtrnr LHS, <4,4,4,4>
+ 2014142802U, // <1,4,u,5>: Cost 2 vtrnr LHS, <0,4,1,5>
+ 2014142812U, // <1,4,u,6>: Cost 2 vtrnr LHS, <0,4,2,6>
+ 2091933697U, // <1,4,u,7>: Cost 2 ins <1,u,3,7>, lane 1
+ 2014142805U, // <1,4,u,u>: Cost 2 vtrnr LHS, <0,4,1,u>
+ 2620014592U, // <1,5,0,0>: Cost 3 vext2 <0,4,1,5>, <0,0,0,0>
+ 1546272870U, // <1,5,0,1>: Cost 2 vext2 <0,4,1,5>, LHS
+ 1686110310U, // <1,5,0,2>: Cost 2 vuzpl <1,3,5,7>, LHS
+ 3163471875U, // <1,5,0,3>: Cost 3 ins <1,5,0,u>, lane 3
+ 1546273106U, // <1,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5>
+ 3165437953U, // <1,5,0,5>: Cost 3 ins <1,u,0,5>, lane 1
+ 3164045314U, // <1,5,0,6>: Cost 3 ins <1,5,u,6>, lane 2
+ 2090311682U, // <1,5,0,7>: Cost 2 ins <1,5,u,7>, lane 2
+ 1546273437U, // <1,5,0,u>: Cost 2 vext2 <0,4,1,5>, LHS
+ 2620015350U, // <1,5,1,0>: Cost 3 vext2 <0,4,1,5>, <1,0,3,2>
+ 2091737089U, // <1,5,1,1>: Cost 2 ins <1,u,1,1>, lane 1
+ 2620015510U, // <1,5,1,2>: Cost 3 vext2 <0,4,1,5>, <1,2,3,0>
+ 2091753473U, // <1,5,1,3>: Cost 2 ins <1,u,1,3>, lane 1
+ 2620015677U, // <1,5,1,4>: Cost 3 vext2 <0,4,1,5>, <1,4,3,5>
+ 1686111232U, // <1,5,1,5>: Cost 2 vuzpl <1,3,5,7>, <1,3,5,7>
+ 2958181456U, // <1,5,1,6>: Cost 3 vzipr <0,u,1,1>, <1,4,5,6>
+ 2019986742U, // <1,5,1,7>: Cost 2 vtrnr <1,1,1,1>, RHS
+ 2019986743U, // <1,5,1,u>: Cost 2 vtrnr <1,1,1,1>, RHS
+ 2759853734U, // <1,5,2,0>: Cost 3 vuzpl <1,3,5,7>, <2,3,0,1>
+ 2620016163U, // <1,5,2,1>: Cost 3 vext2 <0,4,1,5>, <2,1,3,5>
+ 2620016232U, // <1,5,2,2>: Cost 3 vext2 <0,4,1,5>, <2,2,2,2>
+ 2090319877U, // <1,5,2,3>: Cost 2 ins <1,5,u,u>, lane 5
+ 2759853774U, // <1,5,2,4>: Cost 3 vuzpl <1,3,5,7>, <2,3,4,5>
+ 2994687194U, // <1,5,2,5>: Cost 3 vzipr <7,0,1,2>, <4,4,5,5>
+ 2620016570U, // <1,5,2,6>: Cost 3 vext2 <0,4,1,5>, <2,6,3,7>
+ 2090311682U, // <1,5,2,7>: Cost 2 ins <1,5,u,7>, lane 2
+ 2090319877U, // <1,5,2,u>: Cost 2 ins <1,5,u,u>, lane 5
+ 2091876353U, // <1,5,3,0>: Cost 2 ins <1,u,3,0>, lane 1
+ 2089951235U, // <1,5,3,1>: Cost 2 ins <1,5,3,u>, lane 3
+ 2091892737U, // <1,5,3,2>: Cost 2 ins <1,u,3,2>, lane 1
+ 2091900929U, // <1,5,3,3>: Cost 2 ins <1,u,3,3>, lane 1
+ 2091909121U, // <1,5,3,4>: Cost 2 ins <1,u,3,4>, lane 1
+ 2061881348U, // <1,5,3,5>: Cost 2 vtrnr LHS, <5,5,5,5>
+ 2089951235U, // <1,5,3,6>: Cost 2 ins <1,5,3,u>, lane 3
+ 940363062U, // <1,5,3,7>: Cost 1 vtrnr LHS, RHS
+ 940363063U, // <1,5,3,u>: Cost 1 vtrnr LHS, RHS
+ 2620017554U, // <1,5,4,0>: Cost 3 vext2 <0,4,1,5>, <4,0,5,1>
+ 2620017634U, // <1,5,4,1>: Cost 3 vext2 <0,4,1,5>, <4,1,5,0>
+ 3164012546U, // <1,5,4,2>: Cost 3 ins <1,5,u,2>, lane 2
+ 3163766787U, // <1,5,4,3>: Cost 3 ins <1,5,4,u>, lane 3
+ 2575092710U, // <1,5,4,4>: Cost 3 vext1 <4,1,5,4>, <4,1,5,4>
+ 1546276150U, // <1,5,4,5>: Cost 2 vext2 <0,4,1,5>, RHS
+ 1686113590U, // <1,5,4,6>: Cost 2 vuzpl <1,3,5,7>, RHS
+ 2090311682U, // <1,5,4,7>: Cost 2 ins <1,5,u,7>, lane 2
+ 1546276393U, // <1,5,4,u>: Cost 2 vext2 <0,4,1,5>, RHS
+ 2955561954U, // <1,5,5,0>: Cost 3 vzipr <0,4,1,5>, <4,1,5,0>
+ 2955561874U, // <1,5,5,1>: Cost 3 vzipr <0,4,1,5>, <4,0,5,1>
+ 3165782017U, // <1,5,5,2>: Cost 3 ins <1,u,5,2>, lane 1
+ 2955559851U, // <1,5,5,3>: Cost 3 vzipr <0,4,1,5>, <1,2,5,3>
+ 2955561958U, // <1,5,5,4>: Cost 3 vzipr <0,4,1,5>, <4,1,5,4>
+ 2131877888U, // <1,5,5,5>: Cost 2 ins <u,5,5,5>, lane 0
+ 2955561474U, // <1,5,5,6>: Cost 3 vzipr <0,4,1,5>, <3,4,5,6>
+ 2092081153U, // <1,5,5,7>: Cost 2 ins <1,u,5,7>, lane 1
+ 2092081153U, // <1,5,5,u>: Cost 2 ins <1,u,5,7>, lane 1
+ 2131910656U, // <1,5,6,0>: Cost 2 ins <u,5,6,0>, lane 0
+ 2131918848U, // <1,5,6,1>: Cost 2 ins <u,5,6,1>, lane 0
+ 2131927040U, // <1,5,6,2>: Cost 2 ins <u,5,6,2>, lane 0
+ 2131935232U, // <1,5,6,3>: Cost 2 ins <u,5,6,3>, lane 0
+ 2131943424U, // <1,5,6,4>: Cost 2 ins <u,5,6,4>, lane 0
+ 2131951616U, // <1,5,6,5>: Cost 2 ins <u,5,6,5>, lane 0
+ 2131959808U, // <1,5,6,6>: Cost 2 ins <u,5,6,6>, lane 0
+ 1058226176U, // <1,5,6,7>: Cost 1 ins RHS, lane 0
+ 1058226176U, // <1,5,6,u>: Cost 1 ins RHS, lane 0
+ 2557198438U, // <1,5,7,0>: Cost 3 vext1 <1,1,5,7>, LHS
+ 2557199198U, // <1,5,7,1>: Cost 3 vext1 <1,1,5,7>, <1,1,5,7>
+ 2569143974U, // <1,5,7,2>: Cost 3 vext1 <3,1,5,7>, <2,3,0,1>
+ 2759857248U, // <1,5,7,3>: Cost 3 vuzpl <1,3,5,7>, <7,1,3,5>
+ 2557201718U, // <1,5,7,4>: Cost 3 vext1 <1,1,5,7>, RHS
+ 2759857510U, // <1,5,7,5>: Cost 3 vuzpl <1,3,5,7>, <7,4,5,6>
+ 2593035086U, // <1,5,7,6>: Cost 3 vext1 <7,1,5,7>, <6,7,0,1>
+ 2132041728U, // <1,5,7,7>: Cost 2 ins <u,5,7,7>, lane 0
+ 2132041728U, // <1,5,7,u>: Cost 2 ins <u,5,7,7>, lane 0
+ 2091876353U, // <1,5,u,0>: Cost 2 ins <1,u,3,0>, lane 1
+ 1546278702U, // <1,5,u,1>: Cost 2 vext2 <0,4,1,5>, LHS
+ 1686116142U, // <1,5,u,2>: Cost 2 vuzpl <1,3,5,7>, LHS
+ 2091753473U, // <1,5,u,3>: Cost 2 ins <1,u,1,3>, lane 1
+ 1594054682U, // <1,5,u,4>: Cost 2 vext2 <u,4,1,5>, <u,4,1,5>
+ 1546279066U, // <1,5,u,5>: Cost 2 vext2 <0,4,1,5>, RHS
+ 1686116506U, // <1,5,u,6>: Cost 2 vuzpl <1,3,5,7>, RHS
+ 940404022U, // <1,5,u,7>: Cost 1 vtrnr LHS, RHS
+ 940404023U, // <1,5,u,u>: Cost 1 vtrnr LHS, RHS
+ 3205873664U, // <1,6,0,0>: Cost 3 ins <u,6,0,0>, lane 0
+ 2618695782U, // <1,6,0,1>: Cost 3 vext2 <0,2,1,6>, LHS
+ 2132148224U, // <1,6,0,2>: Cost 2 ins <u,6,0,2>, lane 0
+ 3087819259U, // <1,6,0,3>: Cost 3 vtrnr <0,1,2,0>, <0,6,2,3>
+ 2620023123U, // <1,6,0,4>: Cost 3 vext2 <0,4,1,6>, <0,4,1,6>
+ 3165437953U, // <1,6,0,5>: Cost 3 ins <1,u,0,5>, lane 1
+ 3164708866U, // <1,6,0,6>: Cost 3 ins <1,6,u,6>, lane 2
+ 2954857782U, // <1,6,0,7>: Cost 3 vzipr <0,3,1,0>, RHS
+ 2132148224U, // <1,6,0,u>: Cost 2 ins <u,6,0,2>, lane 0
+ 3205947392U, // <1,6,1,0>: Cost 3 ins <u,6,1,0>, lane 0
+ 2091737089U, // <1,6,1,1>: Cost 2 ins <1,u,1,1>, lane 1
+ 3005959068U, // <1,6,1,2>: Cost 3 vzipr <u,u,1,1>, <4,0,6,2>
+ 2091753473U, // <1,6,1,3>: Cost 2 ins <1,u,1,3>, lane 1
+ 2625995820U, // <1,6,1,4>: Cost 3 vext2 <1,4,1,6>, <1,4,1,6>
+ 3205988352U, // <1,6,1,5>: Cost 3 ins <u,6,1,5>, lane 0
+ 1745690729U, // <1,6,1,6>: Cost 2 vuzpr <0,1,2,6>, <0,1,2,6>
+ 1884441910U, // <1,6,1,7>: Cost 2 vzipr <0,u,1,1>, RHS
+ 1884441911U, // <1,6,1,u>: Cost 2 vzipr <0,u,1,1>, RHS
+ 2721526201U, // <1,6,2,0>: Cost 3 vext3 <6,2,0,1>, <6,2,0,1>
+ 2994687442U, // <1,6,2,1>: Cost 3 vzipr <7,0,1,2>, <4,7,6,1>
+ 2994686876U, // <1,6,2,2>: Cost 3 vzipr <7,0,1,2>, <4,0,6,2>
+ 2132303872U, // <1,6,2,3>: Cost 2 ins <u,6,2,3>, lane 0
+ 3206053888U, // <1,6,2,4>: Cost 3 ins <u,6,2,4>, lane 0
+ 3165585409U, // <1,6,2,5>: Cost 3 ins <1,u,2,5>, lane 1
+ 2618697658U, // <1,6,2,6>: Cost 3 vext2 <0,2,1,6>, <2,6,3,7>
+ 1897057590U, // <1,6,2,7>: Cost 2 vzipr <3,0,1,2>, RHS
+ 1897057591U, // <1,6,2,u>: Cost 2 vzipr <3,0,1,2>, RHS
+ 2061881442U, // <1,6,3,0>: Cost 2 vtrnr LHS, <5,6,7,0>
+ 2987396400U, // <1,6,3,1>: Cost 3 vzipr <5,7,1,3>, <4,5,6,1>
+ 2061880652U, // <1,6,3,2>: Cost 2 vtrnr LHS, <4,6,0,2>
+ 2091900929U, // <1,6,3,3>: Cost 2 ins <1,u,3,3>, lane 1
+ 2061881446U, // <1,6,3,4>: Cost 2 vtrnr LHS, <5,6,7,4>
+ 3118078194U, // <1,6,3,5>: Cost 3 vtrnr <5,1,7,3>, <u,6,7,5>
+ 2061880692U, // <1,6,3,6>: Cost 2 vtrnr LHS, <4,6,4,6>
+ 2014103482U, // <1,6,3,7>: Cost 2 vtrnr LHS, <2,6,3,7>
+ 2014103483U, // <1,6,3,u>: Cost 2 vtrnr LHS, <2,6,3,u>
+ 3206168576U, // <1,6,4,0>: Cost 3 ins <u,6,4,0>, lane 0
+ 2761256201U, // <1,6,4,1>: Cost 3 vuzpl <1,5,6,7>, <4,5,1,7>
+ 3164676098U, // <1,6,4,2>: Cost 3 ins <1,6,u,2>, lane 2
+ 3087852027U, // <1,6,4,3>: Cost 3 vtrnr <0,1,2,4>, <0,6,2,3>
+ 3206201344U, // <1,6,4,4>: Cost 3 ins <u,6,4,4>, lane 0
+ 2618699062U, // <1,6,4,5>: Cost 3 vext2 <0,2,1,6>, RHS
+ 2132475904U, // <1,6,4,6>: Cost 2 ins <u,6,4,6>, lane 0
+ 2954890550U, // <1,6,4,7>: Cost 3 vzipr <0,3,1,4>, RHS
+ 2132475904U, // <1,6,4,u>: Cost 2 ins <u,6,4,6>, lane 0
+ 3164659714U, // <1,6,5,0>: Cost 3 ins <1,6,u,0>, lane 2
+ 3206250496U, // <1,6,5,1>: Cost 3 ins <u,6,5,1>, lane 0
+ 3003337628U, // <1,6,5,2>: Cost 3 vzipr <u,4,1,5>, <4,0,6,2>
+ 3165790209U, // <1,6,5,3>: Cost 3 ins <1,u,5,3>, lane 1
+ 3206275072U, // <1,6,5,4>: Cost 3 ins <u,6,5,4>, lane 0
+ 3206283264U, // <1,6,5,5>: Cost 3 ins <u,6,5,5>, lane 0
+ 3003337956U, // <1,6,5,6>: Cost 3 vzipr <u,4,1,5>, <4,4,6,6>
+ 1881820470U, // <1,6,5,7>: Cost 2 vzipr <0,4,1,5>, RHS
+ 1881820471U, // <1,6,5,u>: Cost 2 vzipr <0,4,1,5>, RHS
+ 2724180733U, // <1,6,6,0>: Cost 3 vext3 <6,6,0,1>, <6,6,0,1>
+ 2557264742U, // <1,6,6,1>: Cost 3 vext1 <1,1,6,6>, <1,1,6,6>
+ 3165855745U, // <1,6,6,2>: Cost 3 ins <1,u,6,2>, lane 1
+ 2819432955U, // <1,6,6,3>: Cost 3 vuzpr <0,1,2,6>, <0,6,2,3>
+ 3206348800U, // <1,6,6,4>: Cost 3 ins <u,6,6,4>, lane 0
+ 3206356992U, // <1,6,6,5>: Cost 3 ins <u,6,6,5>, lane 0
+ 2132623360U, // <1,6,6,6>: Cost 2 ins <u,6,6,6>, lane 0
+ 2132631552U, // <1,6,6,7>: Cost 2 ins <u,6,6,7>, lane 0
+ 2132623360U, // <1,6,6,u>: Cost 2 ins <u,6,6,6>, lane 0
+ 1651102542U, // <1,6,7,0>: Cost 2 vext3 <6,7,0,1>, <6,7,0,1>
+ 2724918103U, // <1,6,7,1>: Cost 3 vext3 <6,7,1,1>, <6,7,1,1>
+ 3206406144U, // <1,6,7,2>: Cost 3 ins <u,6,7,2>, lane 0
+ 3206414336U, // <1,6,7,3>: Cost 3 ins <u,6,7,3>, lane 0
+ 2132680704U, // <1,6,7,4>: Cost 2 ins <u,6,7,4>, lane 0
+ 2725213051U, // <1,6,7,5>: Cost 3 vext3 <6,7,5,1>, <6,7,5,1>
+ 2725507979U, // <1,6,7,6>: Cost 3 vext3 <6,u,0,1>, <6,7,6,u>
+ 2132705280U, // <1,6,7,7>: Cost 2 ins <u,6,7,7>, lane 0
+ 1651692438U, // <1,6,7,u>: Cost 2 vext3 <6,7,u,1>, <6,7,u,1>
+ 1651766175U, // <1,6,u,0>: Cost 2 vext3 <6,u,0,1>, <6,u,0,1>
+ 2091737089U, // <1,6,u,1>: Cost 2 ins <1,u,1,1>, lane 1
+ 2061921612U, // <1,6,u,2>: Cost 2 vtrnr LHS, <4,6,0,2>
+ 2091753473U, // <1,6,u,3>: Cost 2 ins <1,u,1,3>, lane 1
+ 2061922406U, // <1,6,u,4>: Cost 2 vtrnr LHS, <5,6,7,4>
+ 2618701978U, // <1,6,u,5>: Cost 3 vext2 <0,2,1,6>, RHS
+ 2061921652U, // <1,6,u,6>: Cost 2 vtrnr LHS, <4,6,4,6>
+ 2014144442U, // <1,6,u,7>: Cost 2 vtrnr LHS, <2,6,3,7>
+ 2014144443U, // <1,6,u,u>: Cost 2 vtrnr LHS, <2,6,3,u>
+ 2726171632U, // <1,7,0,0>: Cost 3 vext3 <7,0,0,1>, <7,0,0,1>
+ 2132803584U, // <1,7,0,1>: Cost 2 ins <u,7,0,1>, lane 0
+ 3206553600U, // <1,7,0,2>: Cost 3 ins <u,7,0,2>, lane 0
+ 2257286235U, // <1,7,0,3>: Cost 3 vrev <7,1,3,0>
+ 2726466580U, // <1,7,0,4>: Cost 3 vext3 <7,0,4,1>, <7,0,4,1>
+ 3206578176U, // <1,7,0,5>: Cost 3 ins <u,7,0,5>, lane 0
+ 2621358582U, // <1,7,0,6>: Cost 3 vext2 <0,6,1,7>, <0,6,1,7>
+ 3165380610U, // <1,7,0,7>: Cost 3 ins <1,7,u,7>, lane 2
+ 2132803584U, // <1,7,0,u>: Cost 2 ins <u,7,0,1>, lane 0
+ 2581184614U, // <1,7,1,0>: Cost 3 vext1 <5,1,7,1>, LHS
+ 2091737089U, // <1,7,1,1>: Cost 2 ins <1,u,1,1>, lane 1
+ 3206627328U, // <1,7,1,2>: Cost 3 ins <u,7,1,2>, lane 0
+ 2132893696U, // <1,7,1,3>: Cost 2 ins <u,7,1,3>, lane 0
+ 2581187894U, // <1,7,1,4>: Cost 3 vext1 <5,1,7,1>, RHS
+ 2626667646U, // <1,7,1,5>: Cost 3 vext2 <1,5,1,7>, <1,5,1,7>
+ 2627331279U, // <1,7,1,6>: Cost 3 vext2 <1,6,1,7>, <1,6,1,7>
+ 1745698922U, // <1,7,1,7>: Cost 2 vuzpr <0,1,2,7>, <0,1,2,7>
+ 2132893696U, // <1,7,1,u>: Cost 2 ins <u,7,1,3>, lane 0
+ 2587164774U, // <1,7,2,0>: Cost 3 vext1 <6,1,7,2>, LHS
+ 2994687370U, // <1,7,2,1>: Cost 3 vzipr <7,0,1,2>, <4,6,7,1>
+ 3206701056U, // <1,7,2,2>: Cost 3 ins <u,7,2,2>, lane 0
+ 2132967424U, // <1,7,2,3>: Cost 2 ins <u,7,2,3>, lane 0
+ 2587168054U, // <1,7,2,4>: Cost 3 vext1 <6,1,7,2>, RHS
+ 3206725632U, // <1,7,2,5>: Cost 3 ins <u,7,2,5>, lane 0
+ 2587169192U, // <1,7,2,6>: Cost 3 vext1 <6,1,7,2>, <6,1,7,2>
+ 2994688024U, // <1,7,2,7>: Cost 3 vzipr <7,0,1,2>, <5,5,7,7>
+ 2132967424U, // <1,7,2,u>: Cost 2 ins <u,7,2,3>, lane 0
+ 1507459174U, // <1,7,3,0>: Cost 2 vext1 <5,1,7,3>, LHS
+ 2061882190U, // <1,7,3,1>: Cost 2 vtrnr LHS, <6,7,0,1>
+ 2091892737U, // <1,7,3,2>: Cost 2 ins <1,u,3,2>, lane 1
+ 2061881472U, // <1,7,3,3>: Cost 2 vtrnr LHS, <5,7,1,3>
+ 1507462454U, // <1,7,3,4>: Cost 2 vext1 <5,1,7,3>, RHS
+ 1507462864U, // <1,7,3,5>: Cost 2 vext1 <5,1,7,3>, <5,1,7,3>
+ 2581205498U, // <1,7,3,6>: Cost 3 vext1 <5,1,7,3>, <6,2,7,3>
+ 2061881512U, // <1,7,3,7>: Cost 2 vtrnr LHS, <5,7,5,7>
+ 1507465006U, // <1,7,3,u>: Cost 2 vext1 <5,1,7,3>, LHS
+ 2728826164U, // <1,7,4,0>: Cost 3 vext3 <7,4,0,1>, <7,4,0,1>
+ 3165331458U, // <1,7,4,1>: Cost 3 ins <1,7,u,1>, lane 2
+ 2644585539U, // <1,7,4,2>: Cost 3 vext2 <4,5,1,7>, <4,2,6,7>
+ 2257319007U, // <1,7,4,3>: Cost 3 vrev <7,1,3,4>
+ 3206864896U, // <1,7,4,4>: Cost 3 ins <u,7,4,4>, lane 0
+ 2133131264U, // <1,7,4,5>: Cost 2 ins <u,7,4,5>, lane 0
+ 3206881280U, // <1,7,4,6>: Cost 3 ins <u,7,4,6>, lane 0
+ 3165380610U, // <1,7,4,7>: Cost 3 ins <1,7,u,7>, lane 2
+ 2133131264U, // <1,7,4,u>: Cost 2 ins <u,7,4,5>, lane 0
+ 2569273446U, // <1,7,5,0>: Cost 3 vext1 <3,1,7,5>, LHS
+ 3028292602U, // <1,7,5,1>: Cost 3 vtrnl <1,3,5,7>, <7,0,1,2>
+ 3165782017U, // <1,7,5,2>: Cost 3 ins <1,u,5,2>, lane 1
+ 3028292704U, // <1,7,5,3>: Cost 3 vtrnl <1,3,5,7>, <7,1,3,5>
+ 2569276726U, // <1,7,5,4>: Cost 3 vext1 <3,1,7,5>, RHS
+ 3028292966U, // <1,7,5,5>: Cost 3 vtrnl <1,3,5,7>, <7,4,5,6>
+ 2651222067U, // <1,7,5,6>: Cost 3 vext2 <5,6,1,7>, <5,6,1,7>
+ 2133221376U, // <1,7,5,7>: Cost 2 ins <u,7,5,7>, lane 0
+ 2133221376U, // <1,7,5,u>: Cost 2 ins <u,7,5,7>, lane 0
+ 2730153430U, // <1,7,6,0>: Cost 3 vext3 <7,6,0,1>, <7,6,0,1>
+ 2724845022U, // <1,7,6,1>: Cost 3 vext3 <6,7,0,1>, <7,6,1,0>
+ 3206995968U, // <1,7,6,2>: Cost 3 ins <u,7,6,2>, lane 0
+ 3165347842U, // <1,7,6,3>: Cost 3 ins <1,7,u,3>, lane 2
+ 2257409130U, // <1,7,6,4>: Cost 3 vrev <7,1,4,6>
+ 3207020544U, // <1,7,6,5>: Cost 3 ins <u,7,6,5>, lane 0
+ 3207028736U, // <1,7,6,6>: Cost 3 ins <u,7,6,6>, lane 0
+ 2133295104U, // <1,7,6,7>: Cost 2 ins <u,7,6,7>, lane 0
+ 2133295104U, // <1,7,6,u>: Cost 2 ins <u,7,6,7>, lane 0
+ 2730817063U, // <1,7,7,0>: Cost 3 vext3 <7,7,0,1>, <7,7,0,1>
+ 2861470542U, // <1,7,7,1>: Cost 3 vuzpr <7,1,5,7>, <6,7,0,1>
+ 3165929473U, // <1,7,7,2>: Cost 3 ins <1,u,7,2>, lane 1
+ 2998046416U, // <1,7,7,3>: Cost 3 vzipr <7,5,1,7>, <5,1,7,3>
+ 3207086080U, // <1,7,7,4>: Cost 3 ins <u,7,7,4>, lane 0
+ 2257491060U, // <1,7,7,5>: Cost 3 vrev <7,1,5,7>
+ 3207102464U, // <1,7,7,6>: Cost 3 ins <u,7,7,6>, lane 0
+ 2133368832U, // <1,7,7,7>: Cost 2 ins <u,7,7,7>, lane 0
+ 2133368832U, // <1,7,7,u>: Cost 2 ins <u,7,7,7>, lane 0
+ 1507500134U, // <1,7,u,0>: Cost 2 vext1 <5,1,7,u>, LHS
+ 2061923150U, // <1,7,u,1>: Cost 2 vtrnr LHS, <6,7,0,1>
+ 2091892737U, // <1,7,u,2>: Cost 2 ins <1,u,3,2>, lane 1
+ 2061922432U, // <1,7,u,3>: Cost 2 vtrnr LHS, <5,7,1,3>
+ 1507503414U, // <1,7,u,4>: Cost 2 vext1 <5,1,7,u>, RHS
+ 1507503829U, // <1,7,u,5>: Cost 2 vext1 <5,1,7,u>, <5,1,7,u>
+ 2581246458U, // <1,7,u,6>: Cost 3 vext1 <5,1,7,u>, <6,2,7,3>
+ 2061922472U, // <1,7,u,7>: Cost 2 vtrnr LHS, <5,7,5,7>
+ 1507505966U, // <1,7,u,u>: Cost 2 vext1 <5,1,7,u>, LHS
+ 1745707008U, // <1,u,0,0>: Cost 2 vuzpr LHS, <0,0,0,0>
+ 1745707018U, // <1,u,0,1>: Cost 2 vuzpr LHS, <0,0,1,1>
+ 1745707028U, // <1,u,0,2>: Cost 2 vuzpr LHS, <0,0,2,2>
+ 2087624706U, // <1,u,0,3>: Cost 2 ins <1,1,u,3>, lane 2
+ 1546297685U, // <1,u,0,4>: Cost 2 vext2 <0,4,1,u>, <0,4,1,u>
+ 1818155162U, // <1,u,0,5>: Cost 2 vzipl <1,0,3,2>, RHS
+ 2891897040U, // <1,u,0,6>: Cost 3 vzipl <1,0,3,2>, <u,6,3,7>
+ 2088984578U, // <1,u,0,7>: Cost 2 ins <1,3,u,7>, lane 2
+ 1745707025U, // <1,u,0,u>: Cost 2 vuzpr LHS, <0,0,1,u>
+ 1483112550U, // <1,u,1,0>: Cost 2 vext1 <1,1,1,1>, LHS
+ 202162278U, // <1,u,1,1>: Cost 1 vdup1 LHS
+ 1616009006U, // <1,u,1,2>: Cost 2 vext3 <0,u,1,1>, LHS
+ 671965286U, // <1,u,1,3>: Cost 1 vuzpr LHS, LHS
+ 1483115830U, // <1,u,1,4>: Cost 2 vext1 <1,1,1,1>, RHS
+ 1818663066U, // <1,u,1,5>: Cost 2 vzipl <1,1,1,1>, RHS
+ 1952880794U, // <1,u,1,6>: Cost 2 vtrnl <1,1,1,1>, RHS
+ 1884441928U, // <1,u,1,7>: Cost 2 vzipr <0,u,1,1>, RHS
+ 671965291U, // <1,u,1,u>: Cost 1 vuzpr LHS, LHS
+ 1745707926U, // <1,u,2,0>: Cost 2 vuzpr LHS, <1,2,3,0>
+ 1819465518U, // <1,u,2,1>: Cost 2 vzipl <1,2,3,0>, LHS
+ 1745707172U, // <1,u,2,2>: Cost 2 vuzpr LHS, <0,2,0,2>
+ 1055244288U, // <1,u,2,3>: Cost 1 ins LHS, lane 0
+ 1745707930U, // <1,u,2,4>: Cost 2 vuzpr LHS, <1,2,3,4>
+ 1819465882U, // <1,u,2,5>: Cost 2 vzipl <1,2,3,0>, RHS
+ 1745707212U, // <1,u,2,6>: Cost 2 vuzpr LHS, <0,2,4,6>
+ 1897057608U, // <1,u,2,7>: Cost 2 vzipr <3,0,1,2>, RHS
+ 1055244288U, // <1,u,2,u>: Cost 1 ins LHS, lane 0
+ 403931292U, // <1,u,3,0>: Cost 1 vext1 LHS, LHS
+ 2014102162U, // <1,u,3,1>: Cost 2 vtrnr LHS, <0,u,1,1>
+ 115726126U, // <1,u,3,2>: Cost 1 vrev LHS
+ 940360349U, // <1,u,3,3>: Cost 1 vtrnr LHS, LHS
+ 403934518U, // <1,u,3,4>: Cost 1 vext1 LHS, RHS
+ 2014102166U, // <1,u,3,5>: Cost 2 vtrnr LHS, <0,u,1,5>
+ 2014102176U, // <1,u,3,6>: Cost 2 vtrnr LHS, <0,u,2,6>
+ 940363305U, // <1,u,3,7>: Cost 1 vtrnr LHS, RHS
+ 940360354U, // <1,u,3,u>: Cost 1 vtrnr LHS, LHS
+ 2088263682U, // <1,u,4,0>: Cost 2 ins <1,2,u,0>, lane 2
+ 2087608322U, // <1,u,4,1>: Cost 2 ins <1,1,u,1>, lane 2
+ 2086952962U, // <1,u,4,2>: Cost 2 ins <1,0,u,2>, lane 2
+ 2087624706U, // <1,u,4,3>: Cost 2 ins <1,1,u,3>, lane 2
+ 1793486032U, // <1,u,4,4>: Cost 2 vuzpr LHS, <4,4,4,4>
+ 1745707346U, // <1,u,4,5>: Cost 2 vuzpr LHS, <0,4,1,5>
+ 1745707356U, // <1,u,4,6>: Cost 2 vuzpr LHS, <0,4,2,6>
+ 2088984578U, // <1,u,4,7>: Cost 2 ins <1,3,u,7>, lane 2
+ 1745707349U, // <1,u,4,u>: Cost 2 vuzpr LHS, <0,4,1,u>
+ 2088263682U, // <1,u,5,0>: Cost 2 ins <1,2,u,0>, lane 2
+ 1821513518U, // <1,u,5,1>: Cost 2 vzipl <1,5,3,7>, LHS
+ 1954551598U, // <1,u,5,2>: Cost 2 vtrnl <1,3,5,7>, LHS
+ 1881817244U, // <1,u,5,3>: Cost 2 vzipr <0,4,1,5>, LHS
+ 2088296450U, // <1,u,5,4>: Cost 2 ins <1,2,u,4>, lane 2
+ 1821513882U, // <1,u,5,5>: Cost 2 vzipl <1,5,3,7>, RHS
+ 1616009370U, // <1,u,5,6>: Cost 2 vext3 <0,u,1,1>, RHS
+ 671968566U, // <1,u,5,7>: Cost 1 vuzpr LHS, RHS
+ 671968567U, // <1,u,5,u>: Cost 1 vuzpr LHS, RHS
+ 1793486946U, // <1,u,6,0>: Cost 2 vuzpr LHS, <5,6,7,0>
+ 2087608322U, // <1,u,6,1>: Cost 2 ins <1,1,u,1>, lane 2
+ 1793486156U, // <1,u,6,2>: Cost 2 vuzpr LHS, <4,6,0,2>
+ 2087624706U, // <1,u,6,3>: Cost 2 ins <1,1,u,3>, lane 2
+ 1793486950U, // <1,u,6,4>: Cost 2 vuzpr LHS, <5,6,7,4>
+ 2131951616U, // <1,u,6,5>: Cost 2 ins <u,5,6,5>, lane 0
+ 1793486196U, // <1,u,6,6>: Cost 2 vuzpr LHS, <4,6,4,6>
+ 1058226176U, // <1,u,6,7>: Cost 1 ins RHS, lane 0
+ 1058226176U, // <1,u,6,u>: Cost 1 ins RHS, lane 0
+ 1585452032U, // <1,u,7,0>: Cost 2 vext2 <7,0,1,u>, <7,0,1,u>
+ 1793487694U, // <1,u,7,1>: Cost 2 vuzpr LHS, <6,7,0,1>
+ 2086952962U, // <1,u,7,2>: Cost 2 ins <1,0,u,2>, lane 2
+ 1793486976U, // <1,u,7,3>: Cost 2 vuzpr LHS, <5,7,1,3>
+ 2088296450U, // <1,u,7,4>: Cost 2 ins <1,2,u,4>, lane 2
+ 1793487734U, // <1,u,7,5>: Cost 2 vuzpr LHS, <6,7,4,5>
+ 2131369984U, // <1,u,7,6>: Cost 2 ins <u,4,7,6>, lane 0
+ 1793487016U, // <1,u,7,7>: Cost 2 vuzpr LHS, <5,7,5,7>
+ 1590761096U, // <1,u,7,u>: Cost 2 vext2 <7,u,1,u>, <7,u,1,u>
+ 403972257U, // <1,u,u,0>: Cost 1 vext1 LHS, LHS
+ 202162278U, // <1,u,u,1>: Cost 1 vdup1 LHS
+ 115767091U, // <1,u,u,2>: Cost 1 vrev LHS
+ 671965853U, // <1,u,u,3>: Cost 1 vuzpr LHS, LHS
+ 403975478U, // <1,u,u,4>: Cost 1 vext1 LHS, RHS
+ 1745707670U, // <1,u,u,5>: Cost 2 vuzpr LHS, <0,u,1,5>
+ 1745707680U, // <1,u,u,6>: Cost 2 vuzpr LHS, <0,u,2,6>
+ 671968809U, // <1,u,u,7>: Cost 1 vuzpr LHS, RHS
+ 671965858U, // <1,u,u,u>: Cost 1 vuzpr LHS, LHS
+ 2128150528U, // <2,0,0,0>: Cost 2 ins <u,0,0,0>, lane 0
+ 2097635329U, // <2,0,0,1>: Cost 2 ins <2,u,0,1>, lane 1
+ 1691664486U, // <2,0,0,2>: Cost 2 vuzpl <2,3,0,1>, LHS
+ 2826094014U, // <2,0,0,3>: Cost 3 vuzpr <1,2,3,0>, <2,0,1,3>
+ 2551467318U, // <2,0,0,4>: Cost 3 vext1 <0,2,0,0>, RHS
+ 2826094772U, // <2,0,0,5>: Cost 3 vuzpr <1,2,3,0>, <3,0,4,5>
+ 3171418113U, // <2,0,0,6>: Cost 3 ins <2,u,0,6>, lane 1
+ 3094529510U, // <2,0,0,7>: Cost 3 vtrnr <1,2,3,0>, <2,0,5,7>
+ 1691664540U, // <2,0,0,u>: Cost 2 vuzpl <2,3,0,1>, LHS
+ 2215927971U, // <2,0,1,0>: Cost 3 vrev <0,2,0,1>
+ 2128232448U, // <2,0,1,1>: Cost 2 ins <u,0,1,1>, lane 0
+ 1611956326U, // <2,0,1,2>: Cost 2 vext3 <0,2,0,2>, LHS
+ 1752350822U, // <2,0,1,3>: Cost 2 vuzpr <1,2,3,0>, LHS
+ 2551475510U, // <2,0,1,4>: Cost 3 vext1 <0,2,0,1>, RHS
+ 2765407232U, // <2,0,1,5>: Cost 3 vuzpl <2,3,0,1>, <1,3,5,7>
+ 2587308473U, // <2,0,1,6>: Cost 3 vext1 <6,2,0,1>, <6,2,0,1>
+ 3166707714U, // <2,0,1,7>: Cost 3 ins <2,0,u,7>, lane 2
+ 1611956380U, // <2,0,1,u>: Cost 2 vext3 <0,2,0,2>, LHS
+ 1142194340U, // <2,0,2,0>: Cost 2 vrev <0,2,0,2>
+ 1825374310U, // <2,0,2,1>: Cost 2 vzipl <2,2,2,2>, LHS
+ 1959592038U, // <2,0,2,2>: Cost 2 vtrnl <2,2,2,2>, LHS
+ 2128322560U, // <2,0,2,3>: Cost 2 ins <u,0,2,3>, lane 0
+ 1477741878U, // <2,0,2,4>: Cost 2 vext1 <0,2,0,2>, RHS
+ 2599259856U, // <2,0,2,5>: Cost 3 vext1 <u,2,0,2>, <5,1,7,3>
+ 3088351274U, // <2,0,2,6>: Cost 3 vtrnr <0,2,0,2>, <0,0,4,6>
+ 2599261178U, // <2,0,2,7>: Cost 3 vext1 <u,2,0,2>, <7,0,1,2>
+ 1477744430U, // <2,0,2,u>: Cost 2 vext1 <0,2,0,2>, LHS
+ 1879883776U, // <2,0,3,0>: Cost 2 vzipr LHS, <0,0,0,0>
+ 1879885478U, // <2,0,3,1>: Cost 2 vzipr LHS, <2,3,0,1>
+ 1879883940U, // <2,0,3,2>: Cost 2 vzipr LHS, <0,2,0,2>
+ 2097872897U, // <2,0,3,3>: Cost 2 ins <2,u,3,3>, lane 1
+ 2958270630U, // <2,0,3,4>: Cost 3 vzipr LHS, <0,2,0,4>
+ 2826094286U, // <2,0,3,5>: Cost 3 vuzpr <1,2,3,0>, <2,3,4,5>
+ 2958270794U, // <2,0,3,6>: Cost 3 vzipr LHS, <0,4,0,6>
+ 2097905665U, // <2,0,3,7>: Cost 2 ins <2,u,3,7>, lane 1
+ 1879883946U, // <2,0,3,u>: Cost 2 vzipr LHS, <0,2,0,u>
+ 2215952550U, // <2,0,4,0>: Cost 3 vrev <0,2,0,4>
+ 2685698386U, // <2,0,4,1>: Cost 3 vext3 <0,2,0,2>, <0,4,1,5>
+ 1960427622U, // <2,0,4,2>: Cost 2 vtrnl <2,3,4,5>, LHS
+ 3171688449U, // <2,0,4,3>: Cost 3 ins <2,u,4,3>, lane 1
+ 2551500086U, // <2,0,4,4>: Cost 3 vext1 <0,2,0,4>, RHS
+ 2097963009U, // <2,0,4,5>: Cost 2 ins <2,u,4,5>, lane 1
+ 1691667766U, // <2,0,4,6>: Cost 2 vuzpl <2,3,0,1>, RHS
+ 3171721217U, // <2,0,4,7>: Cost 3 ins <2,u,4,7>, lane 1
+ 1691667784U, // <2,0,4,u>: Cost 2 vuzpl <2,3,0,1>, RHS
+ 3033596068U, // <2,0,5,0>: Cost 3 vtrnl <2,2,5,7>, <0,2,0,2>
+ 2128527360U, // <2,0,5,1>: Cost 2 ins <u,0,5,1>, lane 0
+ 2955632804U, // <2,0,5,2>: Cost 3 vzipr <0,4,2,5>, <0,2,0,2>
+ 2216181954U, // <2,0,5,3>: Cost 3 vrev <0,2,3,5>
+ 2216255691U, // <2,0,5,4>: Cost 3 vrev <0,2,4,5>
+ 2867900420U, // <2,0,5,5>: Cost 3 vuzpr <u,2,3,0>, <5,5,5,5>
+ 3202310144U, // <2,0,5,6>: Cost 3 ins <u,0,5,6>, lane 0
+ 1752354102U, // <2,0,5,7>: Cost 2 vuzpr <1,2,3,0>, RHS
+ 1752354103U, // <2,0,5,u>: Cost 2 vuzpr <1,2,3,0>, RHS
+ 3088678912U, // <2,0,6,0>: Cost 3 vtrnr <0,2,4,6>, <0,0,0,0>
+ 1828143206U, // <2,0,6,1>: Cost 2 vzipl <2,6,3,7>, LHS
+ 2128609280U, // <2,0,6,2>: Cost 2 ins <u,0,6,2>, lane 0
+ 3171835905U, // <2,0,6,3>: Cost 3 ins <2,u,6,3>, lane 1
+ 1142522060U, // <2,0,6,4>: Cost 2 vrev <0,2,4,6>
+ 3171852289U, // <2,0,6,5>: Cost 3 ins <2,u,6,5>, lane 1
+ 2867899764U, // <2,0,6,6>: Cost 3 vuzpr <u,2,3,0>, <4,6,4,6>
+ 2128650240U, // <2,0,6,7>: Cost 2 ins <u,0,6,7>, lane 0
+ 1142817008U, // <2,0,6,u>: Cost 2 vrev <0,2,u,6>
+ 2659202049U, // <2,0,7,0>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0>
+ 2867901262U, // <2,0,7,1>: Cost 3 vuzpr <u,2,3,0>, <6,7,0,1>
+ 2956976292U, // <2,0,7,2>: Cost 3 vzipr <0,6,2,7>, <0,2,0,2>
+ 2867900544U, // <2,0,7,3>: Cost 3 vuzpr <u,2,3,0>, <5,7,1,3>
+ 3171917825U, // <2,0,7,4>: Cost 3 ins <2,u,7,4>, lane 1
+ 2867901302U, // <2,0,7,5>: Cost 3 vuzpr <u,2,3,0>, <6,7,4,5>
+ 3166699522U, // <2,0,7,6>: Cost 3 ins <2,0,u,6>, lane 2
+ 2867900584U, // <2,0,7,7>: Cost 3 vuzpr <u,2,3,0>, <5,7,5,7>
+ 2867900549U, // <2,0,7,u>: Cost 3 vuzpr <u,2,3,0>, <5,7,1,u>
+ 1879924736U, // <2,0,u,0>: Cost 2 vzipr LHS, <0,0,0,0>
+ 1879926438U, // <2,0,u,1>: Cost 2 vzipr LHS, <2,3,0,1>
+ 1879924900U, // <2,0,u,2>: Cost 2 vzipr LHS, <0,2,0,2>
+ 1752351389U, // <2,0,u,3>: Cost 2 vuzpr <1,2,3,0>, LHS
+ 1477791030U, // <2,0,u,4>: Cost 2 vext1 <0,2,0,u>, RHS
+ 2097963009U, // <2,0,u,5>: Cost 2 ins <2,u,4,5>, lane 1
+ 1691670682U, // <2,0,u,6>: Cost 2 vuzpl <2,3,0,1>, RHS
+ 1752354345U, // <2,0,u,7>: Cost 2 vuzpr <1,2,3,0>, RHS
+ 1879924906U, // <2,0,u,u>: Cost 2 vzipr LHS, <0,2,0,u>
+ 2763497636U, // <2,1,0,0>: Cost 3 vuzpl <2,0,1,2>, <0,2,0,2>
+ 2097635329U, // <2,1,0,1>: Cost 2 ins <2,u,0,1>, lane 1
+ 2820130966U, // <2,1,0,2>: Cost 3 vuzpr <0,2,3,1>, <3,0,1,2>
+ 1616823030U, // <2,1,0,3>: Cost 2 vext3 <1,0,3,2>, <1,0,3,2>
+ 2767487180U, // <2,1,0,4>: Cost 3 vuzpl <2,6,1,3>, <0,2,4,6>
+ 3033842688U, // <2,1,0,5>: Cost 3 vtrnl <2,3,0,1>, <1,3,5,7>
+ 3171418113U, // <2,1,0,6>: Cost 3 ins <2,u,0,6>, lane 1
+ 3171426305U, // <2,1,0,7>: Cost 3 ins <2,u,0,7>, lane 1
+ 1617191715U, // <2,1,0,u>: Cost 2 vext3 <1,0,u,2>, <1,0,u,2>
+ 2551546028U, // <2,1,1,0>: Cost 3 vext1 <0,2,1,1>, <0,2,1,1>
+ 2128896000U, // <2,1,1,1>: Cost 2 ins <u,1,1,1>, lane 0
+ 2954938518U, // <2,1,1,2>: Cost 3 vzipr <0,3,2,1>, <3,0,1,2>
+ 2128912384U, // <2,1,1,3>: Cost 2 ins <u,1,1,3>, lane 0
+ 2551549238U, // <2,1,1,4>: Cost 3 vext1 <0,2,1,1>, RHS
+ 3202670592U, // <2,1,1,5>: Cost 3 ins <u,1,1,5>, lane 0
+ 3202678784U, // <2,1,1,6>: Cost 3 ins <u,1,1,6>, lane 0
+ 2953612553U, // <2,1,1,7>: Cost 3 vzipr <0,1,2,1>, <4,5,1,7>
+ 2128896000U, // <2,1,1,u>: Cost 2 ins <u,1,1,1>, lane 0
+ 2128961536U, // <2,1,2,0>: Cost 2 ins <u,1,2,0>, lane 0
+ 2128969728U, // <2,1,2,1>: Cost 2 ins <u,1,2,1>, lane 0
+ 2128977920U, // <2,1,2,2>: Cost 2 ins <u,1,2,2>, lane 0
+ 1055244288U, // <2,1,2,3>: Cost 1 ins LHS, lane 0
+ 2128994304U, // <2,1,2,4>: Cost 2 ins <u,1,2,4>, lane 0
+ 2129002496U, // <2,1,2,5>: Cost 2 ins <u,1,2,5>, lane 0
+ 2129010688U, // <2,1,2,6>: Cost 2 ins <u,1,2,6>, lane 0
+ 2129018880U, // <2,1,2,7>: Cost 2 ins <u,1,2,7>, lane 0
+ 1055244288U, // <2,1,2,u>: Cost 1 ins LHS, lane 0
+ 2953625609U, // <2,1,3,0>: Cost 3 vzipr LHS, <0,0,1,0>
+ 1879883786U, // <2,1,3,1>: Cost 2 vzipr LHS, <0,0,1,1>
+ 1879885974U, // <2,1,3,2>: Cost 2 vzipr LHS, <3,0,1,2>
+ 1879884760U, // <2,1,3,3>: Cost 2 vzipr LHS, <1,3,1,3>
+ 2953625856U, // <2,1,3,4>: Cost 3 vzipr LHS, <0,3,1,4>
+ 1879884114U, // <2,1,3,5>: Cost 2 vzipr LHS, <0,4,1,5>
+ 2958270641U, // <2,1,3,6>: Cost 3 vzipr LHS, <0,2,1,6>
+ 2097905665U, // <2,1,3,7>: Cost 2 ins <2,u,3,7>, lane 1
+ 1879883793U, // <2,1,3,u>: Cost 2 vzipr LHS, <0,0,1,u>
+ 3171663873U, // <2,1,4,0>: Cost 3 ins <2,u,4,0>, lane 1
+ 3094561588U, // <2,1,4,1>: Cost 3 vtrnr <1,2,3,4>, <1,1,1,1>
+ 2900378522U, // <2,1,4,2>: Cost 3 vzipl <2,4,1,3>, <1,2,3,4>
+ 1148404634U, // <2,1,4,3>: Cost 2 vrev <1,2,3,4>
+ 3171696641U, // <2,1,4,4>: Cost 3 ins <2,u,4,4>, lane 1
+ 2097963009U, // <2,1,4,5>: Cost 2 ins <2,u,4,5>, lane 1
+ 2763500854U, // <2,1,4,6>: Cost 3 vuzpl <2,0,1,2>, RHS
+ 3171721217U, // <2,1,4,7>: Cost 3 ins <2,u,4,7>, lane 1
+ 2020819051U, // <2,1,4,u>: Cost 2 vtrnr <1,2,3,4>, LHS
+ 2551578800U, // <2,1,5,0>: Cost 3 vext1 <0,2,1,5>, <0,2,1,5>
+ 2551579648U, // <2,1,5,1>: Cost 3 vext1 <0,2,1,5>, <1,3,5,7>
+ 2901001110U, // <2,1,5,2>: Cost 3 vzipl <2,5,0,7>, <1,2,3,0>
+ 2129207296U, // <2,1,5,3>: Cost 2 ins <u,1,5,3>, lane 0
+ 2551582006U, // <2,1,5,4>: Cost 3 vext1 <0,2,1,5>, RHS
+ 3202965504U, // <2,1,5,5>: Cost 3 ins <u,1,5,5>, lane 0
+ 3171786753U, // <2,1,5,6>: Cost 3 ins <2,u,5,6>, lane 1
+ 2819910966U, // <2,1,5,7>: Cost 3 vuzpr <0,2,0,1>, RHS
+ 2129207296U, // <2,1,5,u>: Cost 2 ins <u,1,5,3>, lane 0
+ 2551586993U, // <2,1,6,0>: Cost 3 vext1 <0,2,1,6>, <0,2,1,6>
+ 3088679732U, // <2,1,6,1>: Cost 3 vtrnr <0,2,4,6>, <1,1,1,1>
+ 2551588794U, // <2,1,6,2>: Cost 3 vext1 <0,2,1,6>, <2,6,3,7>
+ 2014937190U, // <2,1,6,3>: Cost 2 vtrnr <0,2,4,6>, LHS
+ 2551590198U, // <2,1,6,4>: Cost 3 vext1 <0,2,1,6>, RHS
+ 2955641170U, // <2,1,6,5>: Cost 3 vzipr <0,4,2,6>, <0,4,1,5>
+ 2901886177U, // <2,1,6,6>: Cost 3 vzipl <2,6,3,7>, <1,6,3,7>
+ 2129313792U, // <2,1,6,7>: Cost 2 ins <u,1,6,7>, lane 0
+ 2014937195U, // <2,1,6,u>: Cost 2 vtrnr <0,2,4,6>, LHS
+ 3171885057U, // <2,1,7,0>: Cost 3 ins <2,u,7,0>, lane 1
+ 3203080192U, // <2,1,7,1>: Cost 3 ins <u,1,7,1>, lane 0
+ 3001439874U, // <2,1,7,2>: Cost 3 vzipr <u,1,2,7>, <7,u,1,2>
+ 2129354752U, // <2,1,7,3>: Cost 2 ins <u,1,7,3>, lane 0
+ 3171917825U, // <2,1,7,4>: Cost 3 ins <2,u,7,4>, lane 1
+ 3203112960U, // <2,1,7,5>: Cost 3 ins <u,1,7,5>, lane 0
+ 2222392248U, // <2,1,7,6>: Cost 3 vrev <1,2,6,7>
+ 3171942401U, // <2,1,7,7>: Cost 3 ins <2,u,7,7>, lane 1
+ 2129354752U, // <2,1,7,u>: Cost 2 ins <u,1,7,3>, lane 0
+ 2128961536U, // <2,1,u,0>: Cost 2 ins <u,1,2,0>, lane 0
+ 1879924746U, // <2,1,u,1>: Cost 2 vzipr LHS, <0,0,1,1>
+ 1879926934U, // <2,1,u,2>: Cost 2 vzipr LHS, <3,0,1,2>
+ 1055244288U, // <2,1,u,3>: Cost 1 ins LHS, lane 0
+ 2128994304U, // <2,1,u,4>: Cost 2 ins <u,1,2,4>, lane 0
+ 1879925074U, // <2,1,u,5>: Cost 2 vzipr LHS, <0,4,1,5>
+ 2129010688U, // <2,1,u,6>: Cost 2 ins <u,1,2,6>, lane 0
+ 2097905665U, // <2,1,u,7>: Cost 2 ins <2,u,3,7>, lane 1
+ 1055244288U, // <2,1,u,u>: Cost 1 ins LHS, lane 0
+ 2020787094U, // <2,2,0,0>: Cost 2 vtrnr <1,2,3,0>, <1,2,3,0>
+ 1548976230U, // <2,2,0,1>: Cost 2 vext2 <0,u,2,2>, LHS
+ 1691156582U, // <2,2,0,2>: Cost 2 vuzpl <2,2,2,2>, LHS
+ 2094260226U, // <2,2,0,3>: Cost 2 ins <2,2,u,3>, lane 2
+ 2819917256U, // <2,2,0,4>: Cost 3 vuzpr <0,2,0,2>, <2,0,2,4>
+ 3168018434U, // <2,2,0,5>: Cost 3 ins <2,2,u,5>, lane 2
+ 2819915818U, // <2,2,0,6>: Cost 3 vuzpr <0,2,0,2>, <0,0,4,6>
+ 3171426305U, // <2,2,0,7>: Cost 3 ins <2,u,0,7>, lane 1
+ 1548976796U, // <2,2,0,u>: Cost 2 vext2 <0,u,2,2>, <0,u,2,2>
+ 2622718710U, // <2,2,1,0>: Cost 3 vext2 <0,u,2,2>, <1,0,3,2>
+ 1879867492U, // <2,2,1,1>: Cost 2 vzipr <0,1,2,1>, <0,1,2,1>
+ 2094252034U, // <2,2,1,2>: Cost 2 ins <2,2,u,2>, lane 2
+ 1746174054U, // <2,2,1,3>: Cost 2 vuzpr <0,2,0,2>, LHS
+ 3167526915U, // <2,2,1,4>: Cost 3 ins <2,2,1,u>, lane 3
+ 2622719120U, // <2,2,1,5>: Cost 3 vext2 <0,u,2,2>, <1,5,3,7>
+ 3203342336U, // <2,2,1,6>: Cost 3 ins <u,2,1,6>, lane 0
+ 3168034818U, // <2,2,1,7>: Cost 3 ins <2,2,u,7>, lane 2
+ 1746174059U, // <2,2,1,u>: Cost 2 vuzpr <0,2,0,2>, LHS
+ 1489829990U, // <2,2,2,0>: Cost 2 vext1 <2,2,2,2>, LHS
+ 2093858819U, // <2,2,2,1>: Cost 2 ins <2,2,2,u>, lane 3
+ 269271142U, // <2,2,2,2>: Cost 1 vdup2 LHS
+ 1884520550U, // <2,2,2,3>: Cost 2 vzipr <0,u,2,2>, LHS
+ 1489833270U, // <2,2,2,4>: Cost 2 vext1 <2,2,2,2>, RHS
+ 2093858819U, // <2,2,2,5>: Cost 2 ins <2,2,2,u>, lane 3
+ 2093858819U, // <2,2,2,6>: Cost 2 ins <2,2,2,u>, lane 3
+ 2093858819U, // <2,2,2,7>: Cost 2 ins <2,2,2,u>, lane 3
+ 269271142U, // <2,2,2,u>: Cost 1 vdup2 LHS
+ 2129698816U, // <2,2,3,0>: Cost 2 ins <u,2,3,0>, lane 0
+ 2093932547U, // <2,2,3,1>: Cost 2 ins <2,2,3,u>, lane 3
+ 1879885416U, // <2,2,3,2>: Cost 2 vzipr LHS, <2,2,2,2>
+ 806142054U, // <2,2,3,3>: Cost 1 vzipr LHS, LHS
+ 2129731584U, // <2,2,3,4>: Cost 2 ins <u,2,3,4>, lane 0
+ 2093932547U, // <2,2,3,5>: Cost 2 ins <2,2,3,u>, lane 3
+ 1884528988U, // <2,2,3,6>: Cost 2 vzipr LHS, <0,4,2,6>
+ 2097905665U, // <2,2,3,7>: Cost 2 ins <2,u,3,7>, lane 1
+ 806142059U, // <2,2,3,u>: Cost 1 vzipr LHS, LHS
+ 2551644344U, // <2,2,4,0>: Cost 3 vext1 <0,2,2,4>, <0,2,2,4>
+ 3171672065U, // <2,2,4,1>: Cost 3 ins <2,u,4,1>, lane 1
+ 2094252034U, // <2,2,4,2>: Cost 2 ins <2,2,u,2>, lane 2
+ 2094260226U, // <2,2,4,3>: Cost 2 ins <2,2,u,3>, lane 2
+ 2020819866U, // <2,2,4,4>: Cost 2 vtrnr <1,2,3,4>, <1,2,3,4>
+ 1548979510U, // <2,2,4,5>: Cost 2 vext2 <0,u,2,2>, RHS
+ 1691159862U, // <2,2,4,6>: Cost 2 vuzpl <2,2,2,2>, RHS
+ 3171721217U, // <2,2,4,7>: Cost 3 ins <2,u,4,7>, lane 1
+ 1548979753U, // <2,2,4,u>: Cost 2 vext2 <0,u,2,2>, RHS
+ 3167821827U, // <2,2,5,0>: Cost 3 ins <2,2,5,u>, lane 3
+ 2670497488U, // <2,2,5,1>: Cost 3 vext2 <u,u,2,2>, <5,1,7,3>
+ 2094252034U, // <2,2,5,2>: Cost 2 ins <2,2,u,2>, lane 2
+ 2094260226U, // <2,2,5,3>: Cost 2 ins <2,2,u,3>, lane 2
+ 2228201085U, // <2,2,5,4>: Cost 3 vrev <2,2,4,5>
+ 1879900264U, // <2,2,5,5>: Cost 2 vzipr <0,1,2,5>, <0,1,2,5>
+ 2670497890U, // <2,2,5,6>: Cost 3 vext2 <u,u,2,2>, <5,6,7,0>
+ 1746177334U, // <2,2,5,7>: Cost 2 vuzpr <0,2,0,2>, RHS
+ 1746177335U, // <2,2,5,u>: Cost 2 vuzpr <0,2,0,2>, RHS
+ 3088679830U, // <2,2,6,0>: Cost 3 vtrnr <0,2,4,6>, <1,2,3,0>
+ 3171819521U, // <2,2,6,1>: Cost 3 ins <2,u,6,1>, lane 1
+ 2094252034U, // <2,2,6,2>: Cost 2 ins <2,2,u,2>, lane 2
+ 1881899110U, // <2,2,6,3>: Cost 2 vzipr <0,4,2,6>, LHS
+ 3088679078U, // <2,2,6,4>: Cost 3 vtrnr <0,2,4,6>, <0,2,0,4>
+ 3171852289U, // <2,2,6,5>: Cost 3 ins <2,u,6,5>, lane 1
+ 2014937292U, // <2,2,6,6>: Cost 2 vtrnr <0,2,4,6>, <0,2,4,6>
+ 2094301189U, // <2,2,6,7>: Cost 2 ins <2,2,u,u>, lane 5
+ 1881899115U, // <2,2,6,u>: Cost 2 vzipr <0,4,2,6>, LHS
+ 2726250474U, // <2,2,7,0>: Cost 3 vext3 <7,0,1,2>, <2,7,0,1>
+ 2867696462U, // <2,2,7,1>: Cost 3 vuzpr <u,2,0,2>, <6,7,0,1>
+ 2094252034U, // <2,2,7,2>: Cost 2 ins <2,2,u,2>, lane 2
+ 2130018304U, // <2,2,7,3>: Cost 2 ins <u,2,7,3>, lane 0
+ 2670499174U, // <2,2,7,4>: Cost 3 vext2 <u,u,2,2>, <7,4,5,6>
+ 2228291208U, // <2,2,7,5>: Cost 3 vrev <2,2,5,7>
+ 3203784704U, // <2,2,7,6>: Cost 3 ins <u,2,7,6>, lane 0
+ 1879916650U, // <2,2,7,7>: Cost 2 vzipr <0,1,2,7>, <0,1,2,7>
+ 2130018304U, // <2,2,7,u>: Cost 2 ins <u,2,7,3>, lane 0
+ 2020787094U, // <2,2,u,0>: Cost 2 vtrnr <1,2,3,0>, <1,2,3,0>
+ 1548982062U, // <2,2,u,1>: Cost 2 vext2 <0,u,2,2>, LHS
+ 269271142U, // <2,2,u,2>: Cost 1 vdup2 LHS
+ 806183014U, // <2,2,u,3>: Cost 1 vzipr LHS, LHS
+ 1489833270U, // <2,2,u,4>: Cost 2 vext1 <2,2,2,2>, RHS
+ 1548982426U, // <2,2,u,5>: Cost 2 vext2 <0,u,2,2>, RHS
+ 1879925084U, // <2,2,u,6>: Cost 2 vzipr LHS, <0,4,2,6>
+ 1746177577U, // <2,2,u,7>: Cost 2 vuzpr <0,2,0,2>, RHS
+ 806183019U, // <2,2,u,u>: Cost 1 vzipr LHS, LHS
+ 1544339456U, // <2,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
+ 470597734U, // <2,3,0,1>: Cost 1 vext2 LHS, LHS
+ 1548984484U, // <2,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+ 2094374915U, // <2,3,0,3>: Cost 2 ins <2,3,0,u>, lane 3
+ 1548984658U, // <2,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+ 2094940162U, // <2,3,0,5>: Cost 2 ins <2,3,u,5>, lane 2
+ 2094374915U, // <2,3,0,6>: Cost 2 ins <2,3,0,u>, lane 3
+ 2094374915U, // <2,3,0,7>: Cost 2 ins <2,3,0,u>, lane 3
+ 470598301U, // <2,3,0,u>: Cost 1 vext2 LHS, LHS
+ 1544340214U, // <2,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+ 1544340276U, // <2,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
+ 1544340374U, // <2,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
+ 1548985304U, // <2,3,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+ 2551696694U, // <2,3,1,4>: Cost 3 vext1 <0,2,3,1>, RHS
+ 1548985488U, // <2,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+ 2622727375U, // <2,3,1,6>: Cost 3 vext2 LHS, <1,6,1,7>
+ 2094956546U, // <2,3,1,7>: Cost 2 ins <2,3,u,7>, lane 2
+ 1548985709U, // <2,3,1,u>: Cost 2 vext2 LHS, <1,u,1,3>
+ 2094522371U, // <2,3,2,0>: Cost 2 ins <2,3,2,u>, lane 3
+ 2094907394U, // <2,3,2,1>: Cost 2 ins <2,3,u,1>, lane 2
+ 1544341096U, // <2,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2>
+ 1059889156U, // <2,3,2,3>: Cost 1 ins LHS, lane 4
+ 2094522371U, // <2,3,2,4>: Cost 2 ins <2,3,2,u>, lane 3
+ 2094940162U, // <2,3,2,5>: Cost 2 ins <2,3,u,5>, lane 2
+ 1548986298U, // <2,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+ 2094956546U, // <2,3,2,7>: Cost 2 ins <2,3,u,7>, lane 2
+ 1059889156U, // <2,3,2,u>: Cost 1 ins LHS, lane 4
+ 1879884694U, // <2,3,3,0>: Cost 2 vzipr LHS, <1,2,3,0>
+ 2094907394U, // <2,3,3,1>: Cost 2 ins <2,3,u,1>, lane 2
+ 1879884534U, // <2,3,3,2>: Cost 2 vzipr LHS, <1,0,3,2>
+ 1544341916U, // <2,3,3,3>: Cost 2 vext2 LHS, <3,3,3,3>
+ 1879884698U, // <2,3,3,4>: Cost 2 vzipr LHS, <1,2,3,4>
+ 2094940162U, // <2,3,3,5>: Cost 2 ins <2,3,u,5>, lane 2
+ 2953627415U, // <2,3,3,6>: Cost 3 vzipr LHS, <2,4,3,6>
+ 1884529808U, // <2,3,3,7>: Cost 2 vzipr LHS, <1,5,3,7>
+ 1879884702U, // <2,3,3,u>: Cost 2 vzipr LHS, <1,2,3,u>
+ 1483948134U, // <2,3,4,0>: Cost 2 vext1 <1,2,3,4>, LHS
+ 1483948954U, // <2,3,4,1>: Cost 2 vext1 <1,2,3,4>, <1,2,3,4>
+ 2094669827U, // <2,3,4,2>: Cost 2 ins <2,3,4,u>, lane 3
+ 2094669827U, // <2,3,4,3>: Cost 2 ins <2,3,4,u>, lane 3
+ 1483951414U, // <2,3,4,4>: Cost 2 vext1 <1,2,3,4>, RHS
+ 470601014U, // <2,3,4,5>: Cost 1 vext2 LHS, RHS
+ 1691241782U, // <2,3,4,6>: Cost 2 vuzpl <2,2,3,3>, RHS
+ 2094669827U, // <2,3,4,7>: Cost 2 ins <2,3,4,u>, lane 3
+ 470601257U, // <2,3,4,u>: Cost 1 vext2 LHS, RHS
+ 2551726274U, // <2,3,5,0>: Cost 3 vext1 <0,2,3,5>, <0,2,3,5>
+ 1592118992U, // <2,3,5,1>: Cost 2 vext2 LHS, <5,1,7,3>
+ 2665860843U, // <2,3,5,2>: Cost 3 vext2 LHS, <5,2,1,3>
+ 2094923778U, // <2,3,5,3>: Cost 2 ins <2,3,u,3>, lane 2
+ 1592119238U, // <2,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
+ 1592119300U, // <2,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
+ 1592119394U, // <2,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0>
+ 1758350646U, // <2,3,5,7>: Cost 2 vuzpr <2,2,3,3>, RHS
+ 1758350647U, // <2,3,5,u>: Cost 2 vuzpr <2,2,3,3>, RHS
+ 2094817283U, // <2,3,6,0>: Cost 2 ins <2,3,6,u>, lane 3
+ 2094907394U, // <2,3,6,1>: Cost 2 ins <2,3,u,1>, lane 2
+ 1592119802U, // <2,3,6,2>: Cost 2 vext2 LHS, <6,2,7,3>
+ 2094923778U, // <2,3,6,3>: Cost 2 ins <2,3,u,3>, lane 2
+ 2094817283U, // <2,3,6,4>: Cost 2 ins <2,3,6,u>, lane 3
+ 2094940162U, // <2,3,6,5>: Cost 2 ins <2,3,u,5>, lane 2
+ 1592120120U, // <2,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
+ 1060216836U, // <2,3,6,7>: Cost 1 ins RHS, lane 4
+ 1060216836U, // <2,3,6,u>: Cost 1 ins RHS, lane 4
+ 1592120314U, // <2,3,7,0>: Cost 2 vext2 LHS, <7,0,1,2>
+ 2094907394U, // <2,3,7,1>: Cost 2 ins <2,3,u,1>, lane 2
+ 2974892790U, // <2,3,7,2>: Cost 3 vzipr <3,6,2,7>, <1,0,3,2>
+ 2133999620U, // <2,3,7,3>: Cost 2 ins <u,u,7,3>, lane 4
+ 1592120678U, // <2,3,7,4>: Cost 2 vext2 LHS, <7,4,5,6>
+ 2094940162U, // <2,3,7,5>: Cost 2 ins <2,3,u,5>, lane 2
+ 2134024196U, // <2,3,7,6>: Cost 2 ins <u,u,7,6>, lane 4
+ 1592120940U, // <2,3,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
+ 1592120962U, // <2,3,7,u>: Cost 2 vext2 LHS, <7,u,1,2>
+ 1879925654U, // <2,3,u,0>: Cost 2 vzipr LHS, <1,2,3,0>
+ 470603566U, // <2,3,u,1>: Cost 1 vext2 LHS, LHS
+ 1879925494U, // <2,3,u,2>: Cost 2 vzipr LHS, <1,0,3,2>
+ 1059889156U, // <2,3,u,3>: Cost 1 ins LHS, lane 4
+ 1879925658U, // <2,3,u,4>: Cost 2 vzipr LHS, <1,2,3,4>
+ 470603930U, // <2,3,u,5>: Cost 1 vext2 LHS, RHS
+ 1548990672U, // <2,3,u,6>: Cost 2 vext2 LHS, <u,6,3,7>
+ 1060216836U, // <2,3,u,7>: Cost 1 ins RHS, lane 4
+ 470604133U, // <2,3,u,u>: Cost 1 vext2 LHS, LHS
+ 2826125312U, // <2,4,0,0>: Cost 3 vuzpr <1,2,3,4>, <0,0,0,0>
+ 2097635329U, // <2,4,0,1>: Cost 2 ins <2,u,0,1>, lane 1
+ 1691992166U, // <2,4,0,2>: Cost 2 vuzpl <2,3,4,5>, LHS
+ 3171393537U, // <2,4,0,3>: Cost 3 ins <2,u,0,3>, lane 1
+ 2765734092U, // <2,4,0,4>: Cost 3 vuzpl <2,3,4,5>, <0,2,4,6>
+ 3094528338U, // <2,4,0,5>: Cost 3 vtrnr <1,2,3,0>, <0,4,1,5>
+ 1960103222U, // <2,4,0,6>: Cost 2 vtrnl <2,3,0,1>, RHS
+ 3171426305U, // <2,4,0,7>: Cost 3 ins <2,u,0,7>, lane 1
+ 1960103240U, // <2,4,0,u>: Cost 2 vtrnl <2,3,0,1>, RHS
+ 3204620288U, // <2,4,1,0>: Cost 3 ins <u,4,1,0>, lane 0
+ 2826126132U, // <2,4,1,1>: Cost 3 vuzpr <1,2,3,4>, <1,1,1,1>
+ 2625389466U, // <2,4,1,2>: Cost 3 vext2 <1,3,2,4>, <1,2,3,4>
+ 1752383590U, // <2,4,1,3>: Cost 2 vuzpr <1,2,3,4>, LHS
+ 3204653056U, // <2,4,1,4>: Cost 3 ins <u,4,1,4>, lane 0
+ 2130919424U, // <2,4,1,5>: Cost 2 ins <u,4,1,5>, lane 0
+ 3031936310U, // <2,4,1,6>: Cost 3 vtrnl <2,0,1,2>, RHS
+ 3169361922U, // <2,4,1,7>: Cost 3 ins <2,4,u,7>, lane 2
+ 1752383595U, // <2,4,1,u>: Cost 2 vuzpr <1,2,3,4>, LHS
+ 2826126230U, // <2,4,2,0>: Cost 3 vuzpr <1,2,3,4>, <1,2,3,0>
+ 3171524609U, // <2,4,2,1>: Cost 3 ins <2,u,2,1>, lane 1
+ 2097790977U, // <2,4,2,2>: Cost 2 ins <2,u,2,2>, lane 1
+ 2130976768U, // <2,4,2,3>: Cost 2 ins <u,4,2,3>, lane 0
+ 1752384410U, // <2,4,2,4>: Cost 2 vuzpr <1,2,3,4>, <1,2,3,4>
+ 1825377590U, // <2,4,2,5>: Cost 2 vzipl <2,2,2,2>, RHS
+ 1959595318U, // <2,4,2,6>: Cost 2 vtrnl <2,2,2,2>, RHS
+ 3171573761U, // <2,4,2,7>: Cost 3 ins <2,u,2,7>, lane 1
+ 1825377833U, // <2,4,2,u>: Cost 2 vzipl <2,2,2,2>, RHS
+ 2826127049U, // <2,4,3,0>: Cost 3 vuzpr <1,2,3,4>, <2,3,4,0>
+ 2958270501U, // <2,4,3,1>: Cost 3 vzipr LHS, <0,0,4,1>
+ 2958270502U, // <2,4,3,2>: Cost 3 vzipr LHS, <0,0,4,2>
+ 2097872897U, // <2,4,3,3>: Cost 2 ins <2,u,3,3>, lane 1
+ 1927662800U, // <2,4,3,4>: Cost 2 vzipr LHS, <4,4,4,4>
+ 1879885518U, // <2,4,3,5>: Cost 2 vzipr LHS, <2,3,4,5>
+ 1879883980U, // <2,4,3,6>: Cost 2 vzipr LHS, <0,2,4,6>
+ 2097905665U, // <2,4,3,7>: Cost 2 ins <2,u,3,7>, lane 1
+ 1879883982U, // <2,4,3,u>: Cost 2 vzipr LHS, <0,2,4,u>
+ 2563735654U, // <2,4,4,0>: Cost 3 vext1 <2,2,4,4>, LHS
+ 2826127824U, // <2,4,4,1>: Cost 3 vuzpr <1,2,3,4>, <3,4,0,1>
+ 2826127834U, // <2,4,4,2>: Cost 3 vuzpr <1,2,3,4>, <3,4,1,2>
+ 2826127106U, // <2,4,4,3>: Cost 3 vuzpr <1,2,3,4>, <2,4,1,3>
+ 2131132416U, // <2,4,4,4>: Cost 2 ins <u,4,4,4>, lane 0
+ 2097963009U, // <2,4,4,5>: Cost 2 ins <2,u,4,5>, lane 1
+ 1691995446U, // <2,4,4,6>: Cost 2 vuzpl <2,3,4,5>, RHS
+ 3094562602U, // <2,4,4,7>: Cost 3 vtrnr <1,2,3,4>, <2,4,5,7>
+ 1691995464U, // <2,4,4,u>: Cost 2 vuzpl <2,3,4,5>, RHS
+ 2551800011U, // <2,4,5,0>: Cost 3 vext1 <0,2,4,5>, <0,2,4,5>
+ 2569716470U, // <2,4,5,1>: Cost 3 vext1 <3,2,4,5>, <1,0,3,2>
+ 2563745405U, // <2,4,5,2>: Cost 3 vext1 <2,2,4,5>, <2,2,4,5>
+ 2765737726U, // <2,4,5,3>: Cost 3 vuzpl <2,3,4,5>, <5,2,3,4>
+ 2551803190U, // <2,4,5,4>: Cost 3 vext1 <0,2,4,5>, RHS
+ 2131214336U, // <2,4,5,5>: Cost 2 ins <u,4,5,5>, lane 0
+ 1611959606U, // <2,4,5,6>: Cost 2 vext3 <0,2,0,2>, RHS
+ 1752386870U, // <2,4,5,7>: Cost 2 vuzpr <1,2,3,4>, RHS
+ 1611959624U, // <2,4,5,u>: Cost 2 vext3 <0,2,0,2>, RHS
+ 1478066380U, // <2,4,6,0>: Cost 2 vext1 <0,2,4,6>, <0,2,4,6>
+ 2551808758U, // <2,4,6,1>: Cost 3 vext1 <0,2,4,6>, <1,0,3,2>
+ 2551809516U, // <2,4,6,2>: Cost 3 vext1 <0,2,4,6>, <2,0,6,4>
+ 2551810198U, // <2,4,6,3>: Cost 3 vext1 <0,2,4,6>, <3,0,1,2>
+ 1478069558U, // <2,4,6,4>: Cost 2 vext1 <0,2,4,6>, RHS
+ 1828146486U, // <2,4,6,5>: Cost 2 vzipl <2,6,3,7>, RHS
+ 2131296256U, // <2,4,6,6>: Cost 2 ins <u,4,6,6>, lane 0
+ 2131304448U, // <2,4,6,7>: Cost 2 ins <u,4,6,7>, lane 0
+ 1478072110U, // <2,4,6,u>: Cost 2 vext1 <0,2,4,6>, LHS
+ 2659234821U, // <2,4,7,0>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4>
+ 2867934030U, // <2,4,7,1>: Cost 3 vuzpr <u,2,3,4>, <6,7,0,1>
+ 3169320962U, // <2,4,7,2>: Cost 3 ins <2,4,u,2>, lane 2
+ 2867933312U, // <2,4,7,3>: Cost 3 vuzpr <u,2,3,4>, <5,7,1,3>
+ 3205095424U, // <2,4,7,4>: Cost 3 ins <u,4,7,4>, lane 0
+ 2726251976U, // <2,4,7,5>: Cost 3 vext3 <7,0,1,2>, <4,7,5,0>
+ 2131369984U, // <2,4,7,6>: Cost 2 ins <u,4,7,6>, lane 0
+ 2867933352U, // <2,4,7,7>: Cost 3 vuzpr <u,2,3,4>, <5,7,5,7>
+ 2131369984U, // <2,4,7,u>: Cost 2 ins <u,4,7,6>, lane 0
+ 1478082766U, // <2,4,u,0>: Cost 2 vext1 <0,2,4,u>, <0,2,4,u>
+ 2097635329U, // <2,4,u,1>: Cost 2 ins <2,u,0,1>, lane 1
+ 1691997998U, // <2,4,u,2>: Cost 2 vuzpl <2,3,4,5>, LHS
+ 1752384157U, // <2,4,u,3>: Cost 2 vuzpr <1,2,3,4>, LHS
+ 1478085942U, // <2,4,u,4>: Cost 2 vext1 <0,2,4,u>, RHS
+ 1879926478U, // <2,4,u,5>: Cost 2 vzipr LHS, <2,3,4,5>
+ 1879924940U, // <2,4,u,6>: Cost 2 vzipr LHS, <0,2,4,6>
+ 1752387113U, // <2,4,u,7>: Cost 2 vuzpr <1,2,3,4>, RHS
+ 1879924942U, // <2,4,u,u>: Cost 2 vzipr LHS, <0,2,4,u>
+ 2765160612U, // <2,5,0,0>: Cost 3 vuzpl <2,2,5,7>, <0,2,0,2>
+ 2097635329U, // <2,5,0,1>: Cost 2 ins <2,u,0,1>, lane 1
+ 2620088484U, // <2,5,0,2>: Cost 3 vext2 <0,4,2,5>, <0,2,0,2>
+ 2619425034U, // <2,5,0,3>: Cost 3 vext2 <0,3,2,5>, <0,3,2,5>
+ 2620088667U, // <2,5,0,4>: Cost 3 vext2 <0,4,2,5>, <0,4,2,5>
+ 3136335876U, // <2,5,0,5>: Cost 3 vtrnr <u,2,3,0>, <5,5,5,5>
+ 3171418113U, // <2,5,0,6>: Cost 3 ins <2,u,0,6>, lane 1
+ 2020789558U, // <2,5,0,7>: Cost 2 vtrnr <1,2,3,0>, RHS
+ 2020789559U, // <2,5,0,u>: Cost 2 vtrnr <1,2,3,0>, RHS
+ 2599616614U, // <2,5,1,0>: Cost 3 vext1 <u,2,5,1>, LHS
+ 3205292032U, // <2,5,1,1>: Cost 3 ins <u,5,1,1>, lane 0
+ 2626061206U, // <2,5,1,2>: Cost 3 vext2 <1,4,2,5>, <1,2,3,0>
+ 2618098688U, // <2,5,1,3>: Cost 3 vext2 <0,1,2,5>, <1,3,5,7>
+ 2626061364U, // <2,5,1,4>: Cost 3 vext2 <1,4,2,5>, <1,4,2,5>
+ 2599620736U, // <2,5,1,5>: Cost 3 vext1 <u,2,5,1>, <5,7,1,3>
+ 3205332992U, // <2,5,1,6>: Cost 3 ins <u,5,1,6>, lane 0
+ 2131599360U, // <2,5,1,7>: Cost 2 ins <u,5,1,7>, lane 0
+ 2131599360U, // <2,5,1,u>: Cost 2 ins <u,5,1,7>, lane 0
+ 3171516417U, // <2,5,2,0>: Cost 3 ins <2,u,2,0>, lane 1
+ 3006040978U, // <2,5,2,1>: Cost 3 vzipr <u,u,2,2>, <4,0,5,1>
+ 2097790977U, // <2,5,2,2>: Cost 2 ins <2,u,2,2>, lane 1
+ 2131640320U, // <2,5,2,3>: Cost 2 ins <u,5,2,3>, lane 0
+ 2632034061U, // <2,5,2,4>: Cost 3 vext2 <2,4,2,5>, <2,4,2,5>
+ 2820014256U, // <2,5,2,5>: Cost 3 vuzpr <0,2,1,5>, <0,2,1,5>
+ 2958264834U, // <2,5,2,6>: Cost 3 vzipr <0,u,2,2>, <3,4,5,6>
+ 2014612790U, // <2,5,2,7>: Cost 2 vtrnr <0,2,0,2>, RHS
+ 2014612791U, // <2,5,2,u>: Cost 2 vtrnr <0,2,0,2>, RHS
+ 2958273506U, // <2,5,3,0>: Cost 3 vzipr LHS, <4,1,5,0>
+ 1927662482U, // <2,5,3,1>: Cost 2 vzipr LHS, <4,0,5,1>
+ 2899955454U, // <2,5,3,2>: Cost 3 vzipl <2,3,4,5>, <5,2,3,4>
+ 2097872897U, // <2,5,3,3>: Cost 2 ins <2,u,3,3>, lane 1
+ 2619427330U, // <2,5,3,4>: Cost 3 vext2 <0,3,2,5>, <3,4,5,6>
+ 1927662810U, // <2,5,3,5>: Cost 2 vzipr LHS, <4,4,5,5>
+ 1879886338U, // <2,5,3,6>: Cost 2 vzipr LHS, <3,4,5,6>
+ 1879884800U, // <2,5,3,7>: Cost 2 vzipr LHS, <1,3,5,7>
+ 1879884801U, // <2,5,3,u>: Cost 2 vzipr LHS, <1,3,5,u>
+ 2569781350U, // <2,5,4,0>: Cost 3 vext1 <3,2,5,4>, LHS
+ 3171672065U, // <2,5,4,1>: Cost 3 ins <2,u,4,1>, lane 1
+ 2569782990U, // <2,5,4,2>: Cost 3 vext1 <3,2,5,4>, <2,3,4,5>
+ 3034173182U, // <2,5,4,3>: Cost 3 vtrnl <2,3,4,5>, <5,2,3,4>
+ 2569784630U, // <2,5,4,4>: Cost 3 vext1 <3,2,5,4>, RHS
+ 2097963009U, // <2,5,4,5>: Cost 2 ins <2,u,4,5>, lane 1
+ 2820164098U, // <2,5,4,6>: Cost 3 vuzpr <0,2,3,5>, <3,4,5,6>
+ 2020822326U, // <2,5,4,7>: Cost 2 vtrnr <1,2,3,4>, RHS
+ 2020822327U, // <2,5,4,u>: Cost 2 vtrnr <1,2,3,4>, RHS
+ 2599649382U, // <2,5,5,0>: Cost 3 vext1 <u,2,5,5>, LHS
+ 3003411346U, // <2,5,5,1>: Cost 3 vzipr <u,4,2,5>, <4,0,5,1>
+ 2563819142U, // <2,5,5,2>: Cost 3 vext1 <2,2,5,5>, <2,2,5,5>
+ 2953642113U, // <2,5,5,3>: Cost 3 vzipr <0,1,2,5>, <0,1,5,3>
+ 2599652662U, // <2,5,5,4>: Cost 3 vext1 <u,2,5,5>, RHS
+ 2131877888U, // <2,5,5,5>: Cost 2 ins <u,5,5,5>, lane 0
+ 2954971650U, // <2,5,5,6>: Cost 3 vzipr <0,3,2,5>, <3,4,5,6>
+ 2131894272U, // <2,5,5,7>: Cost 2 ins <u,5,5,7>, lane 0
+ 2131877888U, // <2,5,5,u>: Cost 2 ins <u,5,5,5>, lane 0
+ 2131910656U, // <2,5,6,0>: Cost 2 ins <u,5,6,0>, lane 0
+ 2131918848U, // <2,5,6,1>: Cost 2 ins <u,5,6,1>, lane 0
+ 2131927040U, // <2,5,6,2>: Cost 2 ins <u,5,6,2>, lane 0
+ 2131935232U, // <2,5,6,3>: Cost 2 ins <u,5,6,3>, lane 0
+ 2131943424U, // <2,5,6,4>: Cost 2 ins <u,5,6,4>, lane 0
+ 2131951616U, // <2,5,6,5>: Cost 2 ins <u,5,6,5>, lane 0
+ 2131959808U, // <2,5,6,6>: Cost 2 ins <u,5,6,6>, lane 0
+ 1058226176U, // <2,5,6,7>: Cost 1 ins RHS, lane 0
+ 1058226176U, // <2,5,6,u>: Cost 1 ins RHS, lane 0
+ 2563833958U, // <2,5,7,0>: Cost 3 vext1 <2,2,5,7>, LHS
+ 2712244352U, // <2,5,7,1>: Cost 3 vext3 <4,6,0,2>, <5,7,1,3>
+ 2563835528U, // <2,5,7,2>: Cost 3 vext1 <2,2,5,7>, <2,2,5,7>
+ 2953658497U, // <2,5,7,3>: Cost 3 vzipr <0,1,2,7>, <0,1,5,3>
+ 2563837238U, // <2,5,7,4>: Cost 3 vext1 <2,2,5,7>, RHS
+ 2712244392U, // <2,5,7,5>: Cost 3 vext3 <4,6,0,2>, <5,7,5,7>
+ 2712244396U, // <2,5,7,6>: Cost 3 vext3 <4,6,0,2>, <5,7,6,2>
+ 2132041728U, // <2,5,7,7>: Cost 2 ins <u,5,7,7>, lane 0
+ 2132041728U, // <2,5,7,u>: Cost 2 ins <u,5,7,7>, lane 0
+ 2131910656U, // <2,5,u,0>: Cost 2 ins <u,5,6,0>, lane 0
+ 1927703442U, // <2,5,u,1>: Cost 2 vzipr LHS, <4,0,5,1>
+ 2097790977U, // <2,5,u,2>: Cost 2 ins <2,u,2,2>, lane 1
+ 2097872897U, // <2,5,u,3>: Cost 2 ins <2,u,3,3>, lane 1
+ 2131943424U, // <2,5,u,4>: Cost 2 ins <u,5,6,4>, lane 0
+ 1927703770U, // <2,5,u,5>: Cost 2 vzipr LHS, <4,4,5,5>
+ 1879927298U, // <2,5,u,6>: Cost 2 vzipr LHS, <3,4,5,6>
+ 1058226176U, // <2,5,u,7>: Cost 1 ins RHS, lane 0
+ 1058226176U, // <2,5,u,u>: Cost 1 ins RHS, lane 0
+ 2820243456U, // <2,6,0,0>: Cost 3 vuzpr <0,2,4,6>, <0,0,0,0>
+ 1546354790U, // <2,6,0,1>: Cost 2 vext2 <0,4,2,6>, LHS
+ 2132148224U, // <2,6,0,2>: Cost 2 ins <u,6,0,2>, lane 0
+ 3171393537U, // <2,6,0,3>: Cost 3 ins <2,u,0,3>, lane 1
+ 1546355036U, // <2,6,0,4>: Cost 2 vext2 <0,4,2,6>, <0,4,2,6>
+ 3170672642U, // <2,6,0,5>: Cost 3 ins <2,6,u,5>, lane 2
+ 3136335220U, // <2,6,0,6>: Cost 3 vtrnr <u,2,3,0>, <4,6,4,6>
+ 2096947202U, // <2,6,0,7>: Cost 2 ins <2,6,u,7>, lane 2
+ 1546355357U, // <2,6,0,u>: Cost 2 vext2 <0,4,2,6>, LHS
+ 2620097270U, // <2,6,1,0>: Cost 3 vext2 <0,4,2,6>, <1,0,3,2>
+ 2820244276U, // <2,6,1,1>: Cost 3 vuzpr <0,2,4,6>, <1,1,1,1>
+ 2620097430U, // <2,6,1,2>: Cost 3 vext2 <0,4,2,6>, <1,2,3,0>
+ 1746501734U, // <2,6,1,3>: Cost 2 vuzpr <0,2,4,6>, LHS
+ 2620097598U, // <2,6,1,4>: Cost 3 vext2 <0,4,2,6>, <1,4,3,6>
+ 2620097680U, // <2,6,1,5>: Cost 3 vext2 <0,4,2,6>, <1,5,3,7>
+ 3205996544U, // <2,6,1,6>: Cost 3 ins <u,6,1,6>, lane 0
+ 2096947202U, // <2,6,1,7>: Cost 2 ins <2,6,u,7>, lane 2
+ 1746501739U, // <2,6,1,u>: Cost 2 vuzpr <0,2,4,6>, LHS
+ 2820244374U, // <2,6,2,0>: Cost 3 vuzpr <0,2,4,6>, <1,2,3,0>
+ 3171524609U, // <2,6,2,1>: Cost 3 ins <2,u,2,1>, lane 1
+ 2097790977U, // <2,6,2,2>: Cost 2 ins <2,u,2,2>, lane 1
+ 2096955397U, // <2,6,2,3>: Cost 2 ins <2,6,u,u>, lane 5
+ 2820243622U, // <2,6,2,4>: Cost 3 vuzpr <0,2,4,6>, <0,2,0,4>
+ 3171557377U, // <2,6,2,5>: Cost 3 ins <2,u,2,5>, lane 1
+ 1746501836U, // <2,6,2,6>: Cost 2 vuzpr <0,2,4,6>, <0,2,4,6>
+ 1884523830U, // <2,6,2,7>: Cost 2 vzipr <0,u,2,2>, RHS
+ 1884523831U, // <2,6,2,u>: Cost 2 vzipr <0,u,2,2>, RHS
+ 2096586755U, // <2,6,3,0>: Cost 2 ins <2,6,3,u>, lane 3
+ 2096586755U, // <2,6,3,1>: Cost 2 ins <2,6,3,u>, lane 3
+ 1927662492U, // <2,6,3,2>: Cost 2 vzipr LHS, <4,0,6,2>
+ 2097872897U, // <2,6,3,3>: Cost 2 ins <2,u,3,3>, lane 1
+ 2096586755U, // <2,6,3,4>: Cost 2 ins <2,6,3,u>, lane 3
+ 2096586755U, // <2,6,3,5>: Cost 2 ins <2,6,3,u>, lane 3
+ 1927662820U, // <2,6,3,6>: Cost 2 vzipr LHS, <4,4,6,6>
+ 806145334U, // <2,6,3,7>: Cost 1 vzipr LHS, RHS
+ 806145335U, // <2,6,3,u>: Cost 1 vzipr LHS, RHS
+ 2820245292U, // <2,6,4,0>: Cost 3 vuzpr <0,2,4,6>, <2,4,6,0>
+ 3171672065U, // <2,6,4,1>: Cost 3 ins <2,u,4,1>, lane 1
+ 2820243782U, // <2,6,4,2>: Cost 3 vuzpr <0,2,4,6>, <0,4,0,2>
+ 3171688449U, // <2,6,4,3>: Cost 3 ins <2,u,4,3>, lane 1
+ 2820243784U, // <2,6,4,4>: Cost 3 vuzpr <0,2,4,6>, <0,4,0,4>
+ 1546358070U, // <2,6,4,5>: Cost 2 vext2 <0,4,2,6>, RHS
+ 2132475904U, // <2,6,4,6>: Cost 2 ins <u,6,4,6>, lane 0
+ 2096947202U, // <2,6,4,7>: Cost 2 ins <2,6,u,7>, lane 2
+ 1546358313U, // <2,6,4,u>: Cost 2 vext2 <0,4,2,6>, RHS
+ 3170476035U, // <2,6,5,0>: Cost 3 ins <2,6,5,u>, lane 3
+ 2667876048U, // <2,6,5,1>: Cost 3 vext2 <u,4,2,6>, <5,1,7,3>
+ 3206258688U, // <2,6,5,2>: Cost 3 ins <u,6,5,2>, lane 0
+ 3170656258U, // <2,6,5,3>: Cost 3 ins <2,6,u,3>, lane 2
+ 2252091873U, // <2,6,5,4>: Cost 3 vrev <6,2,4,5>
+ 2868023300U, // <2,6,5,5>: Cost 3 vuzpr <u,2,4,6>, <5,5,5,5>
+ 2667876450U, // <2,6,5,6>: Cost 3 vext2 <u,4,2,6>, <5,6,7,0>
+ 1746505014U, // <2,6,5,7>: Cost 2 vuzpr <0,2,4,6>, RHS
+ 1746505015U, // <2,6,5,u>: Cost 2 vuzpr <0,2,4,6>, RHS
+ 2955643964U, // <2,6,6,0>: Cost 3 vzipr <0,4,2,6>, <4,2,6,0>
+ 2820246859U, // <2,6,6,1>: Cost 3 vuzpr <0,2,4,6>, <4,6,0,1>
+ 2820246860U, // <2,6,6,2>: Cost 3 vuzpr <0,2,4,6>, <4,6,0,2>
+ 2820245412U, // <2,6,6,3>: Cost 3 vuzpr <0,2,4,6>, <2,6,1,3>
+ 2955643968U, // <2,6,6,4>: Cost 3 vzipr <0,4,2,6>, <4,2,6,4>
+ 2820246899U, // <2,6,6,5>: Cost 3 vuzpr <0,2,4,6>, <4,6,4,5>
+ 2132623360U, // <2,6,6,6>: Cost 2 ins <u,6,6,6>, lane 0
+ 1881902390U, // <2,6,6,7>: Cost 2 vzipr <0,4,2,6>, RHS
+ 1881902391U, // <2,6,6,u>: Cost 2 vzipr <0,4,2,6>, RHS
+ 2132647936U, // <2,6,7,0>: Cost 2 ins <u,6,7,0>, lane 0
+ 2724926296U, // <2,6,7,1>: Cost 3 vext3 <6,7,1,2>, <6,7,1,2>
+ 3124596044U, // <2,6,7,2>: Cost 3 vtrnr <6,2,5,7>, <4,6,0,2>
+ 2868023424U, // <2,6,7,3>: Cost 3 vuzpr <u,2,4,6>, <5,7,1,3>
+ 2132680704U, // <2,6,7,4>: Cost 2 ins <u,6,7,4>, lane 0
+ 2252181996U, // <2,6,7,5>: Cost 3 vrev <6,2,5,7>
+ 2725294981U, // <2,6,7,6>: Cost 3 vext3 <6,7,6,2>, <6,7,6,2>
+ 2132705280U, // <2,6,7,7>: Cost 2 ins <u,6,7,7>, lane 0
+ 2132647936U, // <2,6,7,u>: Cost 2 ins <u,6,7,0>, lane 0
+ 2096586755U, // <2,6,u,0>: Cost 2 ins <2,6,3,u>, lane 3
+ 1546360622U, // <2,6,u,1>: Cost 2 vext2 <0,4,2,6>, LHS
+ 1927703452U, // <2,6,u,2>: Cost 2 vzipr LHS, <4,0,6,2>
+ 1746502301U, // <2,6,u,3>: Cost 2 vuzpr <0,2,4,6>, LHS
+ 1594136612U, // <2,6,u,4>: Cost 2 vext2 <u,4,2,6>, <u,4,2,6>
+ 1546360986U, // <2,6,u,5>: Cost 2 vext2 <0,4,2,6>, RHS
+ 1927703780U, // <2,6,u,6>: Cost 2 vzipr LHS, <4,4,6,6>
+ 806186294U, // <2,6,u,7>: Cost 1 vzipr LHS, RHS
+ 806186295U, // <2,6,u,u>: Cost 1 vzipr LHS, RHS
+ 2581839974U, // <2,7,0,0>: Cost 3 vext1 <5,2,7,0>, LHS
+ 1652511738U, // <2,7,0,1>: Cost 2 vext3 <7,0,1,2>, <7,0,1,2>
+ 2621431972U, // <2,7,0,2>: Cost 3 vext2 <0,6,2,7>, <0,2,0,2>
+ 2257949868U, // <2,7,0,3>: Cost 3 vrev <7,2,3,0>
+ 2581843254U, // <2,7,0,4>: Cost 3 vext1 <5,2,7,0>, RHS
+ 2581843742U, // <2,7,0,5>: Cost 3 vext1 <5,2,7,0>, <5,2,7,0>
+ 2621432319U, // <2,7,0,6>: Cost 3 vext2 <0,6,2,7>, <0,6,2,7>
+ 3136336040U, // <2,7,0,7>: Cost 3 vtrnr <u,2,3,0>, <5,7,5,7>
+ 1653027897U, // <2,7,0,u>: Cost 2 vext3 <7,0,u,2>, <7,0,u,2>
+ 2639348470U, // <2,7,1,0>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2>
+ 3206619136U, // <2,7,1,1>: Cost 3 ins <u,7,1,1>, lane 0
+ 3206627328U, // <2,7,1,2>: Cost 3 ins <u,7,1,2>, lane 0
+ 2132893696U, // <2,7,1,3>: Cost 2 ins <u,7,1,3>, lane 0
+ 2599767350U, // <2,7,1,4>: Cost 3 vext1 <u,2,7,1>, RHS
+ 3206651904U, // <2,7,1,5>: Cost 3 ins <u,7,1,5>, lane 0
+ 3171344386U, // <2,7,1,6>: Cost 3 ins <2,7,u,6>, lane 2
+ 2599769082U, // <2,7,1,7>: Cost 3 vext1 <u,2,7,1>, <7,0,1,2>
+ 2132893696U, // <2,7,1,u>: Cost 2 ins <u,7,1,3>, lane 0
+ 2581856358U, // <2,7,2,0>: Cost 3 vext1 <5,2,7,2>, LHS
+ 3136131918U, // <2,7,2,1>: Cost 3 vtrnr <u,2,0,2>, <6,7,0,1>
+ 2097790977U, // <2,7,2,2>: Cost 2 ins <2,u,2,2>, lane 1
+ 2132967424U, // <2,7,2,3>: Cost 2 ins <u,7,2,3>, lane 0
+ 2581859638U, // <2,7,2,4>: Cost 3 vext1 <5,2,7,2>, RHS
+ 2632714080U, // <2,7,2,5>: Cost 3 vext2 <2,5,2,7>, <2,5,2,7>
+ 2633377713U, // <2,7,2,6>: Cost 3 vext2 <2,6,2,7>, <2,6,2,7>
+ 1770548291U, // <2,7,2,7>: Cost 2 vuzpr <4,2,6,7>, <4,2,6,7>
+ 2097790977U, // <2,7,2,u>: Cost 2 ins <2,u,2,2>, lane 1
+ 1514094694U, // <2,7,3,0>: Cost 2 vext1 <6,2,7,3>, LHS
+ 2569921680U, // <2,7,3,1>: Cost 3 vext1 <3,2,7,3>, <1,5,3,7>
+ 2587838056U, // <2,7,3,2>: Cost 3 vext1 <6,2,7,3>, <2,2,2,2>
+ 1927663312U, // <2,7,3,3>: Cost 2 vzipr LHS, <5,1,7,3>
+ 1514097974U, // <2,7,3,4>: Cost 2 vext1 <6,2,7,3>, RHS
+ 2581868321U, // <2,7,3,5>: Cost 3 vext1 <5,2,7,3>, <5,2,7,3>
+ 1514099194U, // <2,7,3,6>: Cost 2 vext1 <6,2,7,3>, <6,2,7,3>
+ 1927663640U, // <2,7,3,7>: Cost 2 vzipr LHS, <5,5,7,7>
+ 1514100526U, // <2,7,3,u>: Cost 2 vext1 <6,2,7,3>, LHS
+ 2581872742U, // <2,7,4,0>: Cost 3 vext1 <5,2,7,4>, LHS
+ 2581873562U, // <2,7,4,1>: Cost 3 vext1 <5,2,7,4>, <1,2,3,4>
+ 3171680257U, // <2,7,4,2>: Cost 3 ins <2,u,4,2>, lane 1
+ 2257982640U, // <2,7,4,3>: Cost 3 vrev <7,2,3,4>
+ 2581876022U, // <2,7,4,4>: Cost 3 vext1 <5,2,7,4>, RHS
+ 2133131264U, // <2,7,4,5>: Cost 2 ins <u,7,4,5>, lane 0
+ 2712245609U, // <2,7,4,6>: Cost 3 vext3 <4,6,0,2>, <7,4,6,0>
+ 3136368808U, // <2,7,4,7>: Cost 3 vtrnr <u,2,3,4>, <5,7,5,7>
+ 2133131264U, // <2,7,4,u>: Cost 2 ins <u,7,4,5>, lane 0
+ 2729497990U, // <2,7,5,0>: Cost 3 vext3 <7,5,0,2>, <7,5,0,2>
+ 3206914048U, // <2,7,5,1>: Cost 3 ins <u,7,5,1>, lane 0
+ 2844290353U, // <2,7,5,2>: Cost 3 vuzpr <4,2,6,7>, <4,5,6,2>
+ 2991469050U, // <2,7,5,3>: Cost 3 vzipr <6,4,2,5>, <6,2,7,3>
+ 2599800118U, // <2,7,5,4>: Cost 3 vext1 <u,2,7,5>, RHS
+ 3206946816U, // <2,7,5,5>: Cost 3 ins <u,7,5,5>, lane 0
+ 3206955008U, // <2,7,5,6>: Cost 3 ins <u,7,5,6>, lane 0
+ 2133221376U, // <2,7,5,7>: Cost 2 ins <u,7,5,7>, lane 0
+ 2133221376U, // <2,7,5,u>: Cost 2 ins <u,7,5,7>, lane 0
+ 2581889126U, // <2,7,6,0>: Cost 3 vext1 <5,2,7,6>, LHS
+ 3136459598U, // <2,7,6,1>: Cost 3 vtrnr <u,2,4,6>, <6,7,0,1>
+ 2901890250U, // <2,7,6,2>: Cost 3 vzipl <2,6,3,7>, <7,2,6,3>
+ 3136458880U, // <2,7,6,3>: Cost 3 vtrnr <u,2,4,6>, <5,7,1,3>
+ 2581892406U, // <2,7,6,4>: Cost 3 vext1 <5,2,7,6>, RHS
+ 2581892900U, // <2,7,6,5>: Cost 3 vext1 <5,2,7,6>, <5,2,7,6>
+ 2587865597U, // <2,7,6,6>: Cost 3 vext1 <6,2,7,6>, <6,2,7,6>
+ 2133295104U, // <2,7,6,7>: Cost 2 ins <u,7,6,7>, lane 0
+ 2133295104U, // <2,7,6,u>: Cost 2 ins <u,7,6,7>, lane 0
+ 2726254119U, // <2,7,7,0>: Cost 3 vext3 <7,0,1,2>, <7,7,0,1>
+ 3207061504U, // <2,7,7,1>: Cost 3 ins <u,7,7,1>, lane 0
+ 2563983002U, // <2,7,7,2>: Cost 3 vext1 <2,2,7,7>, <2,2,7,7>
+ 2998784506U, // <2,7,7,3>: Cost 3 vzipr <7,6,2,7>, <6,2,7,3>
+ 2599816502U, // <2,7,7,4>: Cost 3 vext1 <u,2,7,7>, RHS
+ 3207094272U, // <2,7,7,5>: Cost 3 ins <u,7,7,5>, lane 0
+ 2663241198U, // <2,7,7,6>: Cost 3 vext2 <7,6,2,7>, <7,6,2,7>
+ 2133368832U, // <2,7,7,7>: Cost 2 ins <u,7,7,7>, lane 0
+ 2133368832U, // <2,7,7,u>: Cost 2 ins <u,7,7,7>, lane 0
+ 1514135654U, // <2,7,u,0>: Cost 2 vext1 <6,2,7,u>, LHS
+ 1657820802U, // <2,7,u,1>: Cost 2 vext3 <7,u,1,2>, <7,u,1,2>
+ 2097790977U, // <2,7,u,2>: Cost 2 ins <2,u,2,2>, lane 1
+ 1927704272U, // <2,7,u,3>: Cost 2 vzipr LHS, <5,1,7,3>
+ 1514138934U, // <2,7,u,4>: Cost 2 vext1 <6,2,7,u>, RHS
+ 2133131264U, // <2,7,u,5>: Cost 2 ins <u,7,4,5>, lane 0
+ 1514140159U, // <2,7,u,6>: Cost 2 vext1 <6,2,7,u>, <6,2,7,u>
+ 1927704600U, // <2,7,u,7>: Cost 2 vzipr LHS, <5,5,7,7>
+ 1514141486U, // <2,7,u,u>: Cost 2 vext1 <6,2,7,u>, LHS
+ 1544380416U, // <2,u,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
+ 470638699U, // <2,u,0,1>: Cost 1 vext2 LHS, LHS
+ 1544380580U, // <2,u,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+ 2020786845U, // <2,u,0,3>: Cost 2 vtrnr <1,2,3,0>, LHS
+ 1544380754U, // <2,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+ 2094940162U, // <2,u,0,5>: Cost 2 ins <2,3,u,5>, lane 2
+ 1960106138U, // <2,u,0,6>: Cost 2 vtrnl <2,3,0,1>, RHS
+ 2020789801U, // <2,u,0,7>: Cost 2 vtrnr <1,2,3,0>, RHS
+ 470639261U, // <2,u,0,u>: Cost 1 vext2 LHS, LHS
+ 1544381174U, // <2,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+ 1544381236U, // <2,u,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
+ 1544381334U, // <2,u,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
+ 1544381400U, // <2,u,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+ 2618123325U, // <2,u,1,4>: Cost 3 vext2 LHS, <1,4,3,5>
+ 1544381584U, // <2,u,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+ 2618123489U, // <2,u,1,6>: Cost 3 vext2 LHS, <1,6,3,7>
+ 2096947202U, // <2,u,1,7>: Cost 2 ins <2,6,u,7>, lane 2
+ 1544381823U, // <2,u,1,u>: Cost 2 vext2 LHS, <1,u,3,3>
+ 1478328556U, // <2,u,2,0>: Cost 2 vext1 <0,2,u,2>, <0,2,u,2>
+ 1825380142U, // <2,u,2,1>: Cost 2 vzipl <2,2,2,2>, LHS
+ 269271142U, // <2,u,2,2>: Cost 1 vdup2 LHS
+ 1055244288U, // <2,u,2,3>: Cost 1 ins LHS, lane 0
+ 1478331702U, // <2,u,2,4>: Cost 2 vext1 <0,2,u,2>, RHS
+ 1825380506U, // <2,u,2,5>: Cost 2 vzipl <2,2,2,2>, RHS
+ 1544382394U, // <2,u,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+ 2014613033U, // <2,u,2,7>: Cost 2 vtrnr <0,2,0,2>, RHS
+ 1055244288U, // <2,u,2,u>: Cost 1 ins LHS, lane 0
+ 1544382614U, // <2,u,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
+ 1879885550U, // <2,u,3,1>: Cost 2 vzipr LHS, <2,3,u,1>
+ 1879884012U, // <2,u,3,2>: Cost 2 vzipr LHS, <0,2,u,2>
+ 806142108U, // <2,u,3,3>: Cost 1 vzipr LHS, LHS
+ 1544382978U, // <2,u,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
+ 1879885554U, // <2,u,3,5>: Cost 2 vzipr LHS, <2,3,u,5>
+ 1879884016U, // <2,u,3,6>: Cost 2 vzipr LHS, <0,2,u,6>
+ 806145352U, // <2,u,3,7>: Cost 1 vzipr LHS, RHS
+ 806142113U, // <2,u,3,u>: Cost 1 vzipr LHS, LHS
+ 1484316774U, // <2,u,4,0>: Cost 2 vext1 <1,2,u,4>, LHS
+ 1484317639U, // <2,u,4,1>: Cost 2 vext1 <1,2,u,4>, <1,2,u,4>
+ 1960433454U, // <2,u,4,2>: Cost 2 vtrnl <2,3,4,5>, LHS
+ 2020819613U, // <2,u,4,3>: Cost 2 vtrnr <1,2,3,4>, LHS
+ 1484320054U, // <2,u,4,4>: Cost 2 vext1 <1,2,u,4>, RHS
+ 470641974U, // <2,u,4,5>: Cost 1 vext2 LHS, RHS
+ 1691610422U, // <2,u,4,6>: Cost 2 vuzpl <2,2,u,3>, RHS
+ 2020822569U, // <2,u,4,7>: Cost 2 vtrnr <1,2,3,4>, RHS
+ 470642217U, // <2,u,4,u>: Cost 1 vext2 LHS, RHS
+ 2552094959U, // <2,u,5,0>: Cost 3 vext1 <0,2,u,5>, <0,2,u,5>
+ 1592159952U, // <2,u,5,1>: Cost 2 vext2 LHS, <5,1,7,3>
+ 2094252034U, // <2,u,5,2>: Cost 2 ins <2,2,u,2>, lane 2
+ 2094260226U, // <2,u,5,3>: Cost 2 ins <2,2,u,3>, lane 2
+ 1592160198U, // <2,u,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
+ 1592160260U, // <2,u,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
+ 1611962522U, // <2,u,5,6>: Cost 2 vext3 <0,2,0,2>, RHS
+ 1746226486U, // <2,u,5,7>: Cost 2 vuzpr <0,2,0,u>, RHS
+ 1611962540U, // <2,u,5,u>: Cost 2 vext3 <0,2,0,2>, RHS
+ 1478361328U, // <2,u,6,0>: Cost 2 vext1 <0,2,u,6>, <0,2,u,6>
+ 1828149038U, // <2,u,6,1>: Cost 2 vzipl <2,6,3,7>, LHS
+ 1592160762U, // <2,u,6,2>: Cost 2 vext2 LHS, <6,2,7,3>
+ 2014937757U, // <2,u,6,3>: Cost 2 vtrnr <0,2,4,6>, LHS
+ 1478364470U, // <2,u,6,4>: Cost 2 vext1 <0,2,u,6>, RHS
+ 1828149402U, // <2,u,6,5>: Cost 2 vzipl <2,6,3,7>, RHS
+ 1592161080U, // <2,u,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
+ 1060216836U, // <2,u,6,7>: Cost 1 ins RHS, lane 4
+ 1060216836U, // <2,u,6,u>: Cost 1 ins RHS, lane 4
+ 1592161274U, // <2,u,7,0>: Cost 2 vext2 LHS, <7,0,1,2>
+ 2094907394U, // <2,u,7,1>: Cost 2 ins <2,3,u,1>, lane 2
+ 2094252034U, // <2,u,7,2>: Cost 2 ins <2,2,u,2>, lane 2
+ 2129354752U, // <2,u,7,3>: Cost 2 ins <u,1,7,3>, lane 0
+ 1592161638U, // <2,u,7,4>: Cost 2 vext2 LHS, <7,4,5,6>
+ 2094940162U, // <2,u,7,5>: Cost 2 ins <2,3,u,5>, lane 2
+ 2134024196U, // <2,u,7,6>: Cost 2 ins <u,u,7,6>, lane 4
+ 1592161900U, // <2,u,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
+ 1592161922U, // <2,u,7,u>: Cost 2 vext2 LHS, <7,u,1,2>
+ 1879925699U, // <2,u,u,0>: Cost 2 vzipr LHS, <1,2,u,0>
+ 470644526U, // <2,u,u,1>: Cost 1 vext2 LHS, LHS
+ 269271142U, // <2,u,u,2>: Cost 1 vdup2 LHS
+ 806183068U, // <2,u,u,3>: Cost 1 vzipr LHS, LHS
+ 1879925703U, // <2,u,u,4>: Cost 2 vzipr LHS, <1,2,u,4>
+ 470644890U, // <2,u,u,5>: Cost 1 vext2 LHS, RHS
+ 1879924976U, // <2,u,u,6>: Cost 2 vzipr LHS, <0,2,u,6>
+ 806186312U, // <2,u,u,7>: Cost 1 vzipr LHS, RHS
+ 470645093U, // <2,u,u,u>: Cost 1 vext2 LHS, LHS
+ 1611448320U, // <3,0,0,0>: Cost 2 vext3 LHS, <0,0,0,0>
+ 1611890698U, // <3,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1>
+ 1611890708U, // <3,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2>
+ 2960312624U, // <3,0,0,3>: Cost 3 vzipr <1,2,3,0>, <3,2,0,3>
+ 2689835045U, // <3,0,0,4>: Cost 3 vext3 LHS, <0,0,4,1>
+ 3177381889U, // <3,0,0,5>: Cost 3 ins <3,u,0,5>, lane 1
+ 3177390081U, // <3,0,0,6>: Cost 3 ins <3,u,0,6>, lane 1
+ 3177398273U, // <3,0,0,7>: Cost 3 ins <3,u,0,7>, lane 1
+ 1616093258U, // <3,0,0,u>: Cost 2 vext3 LHS, <0,0,u,2>
+ 1490337894U, // <3,0,1,0>: Cost 2 vext1 <2,3,0,1>, LHS
+ 2128232448U, // <3,0,1,1>: Cost 2 ins <u,0,1,1>, lane 0
+ 537706598U, // <3,0,1,2>: Cost 1 vext3 LHS, LHS
+ 2098429955U, // <3,0,1,3>: Cost 2 ins <3,0,1,u>, lane 3
+ 1490341174U, // <3,0,1,4>: Cost 2 vext1 <2,3,0,1>, RHS
+ 2098429955U, // <3,0,1,5>: Cost 2 ins <3,0,1,u>, lane 3
+ 2098429955U, // <3,0,1,6>: Cost 2 ins <3,0,1,u>, lane 3
+ 2098429955U, // <3,0,1,7>: Cost 2 ins <3,0,1,u>, lane 3
+ 537706652U, // <3,0,1,u>: Cost 1 vext3 LHS, LHS
+ 1611890852U, // <3,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
+ 2685632684U, // <3,0,2,1>: Cost 3 vext3 LHS, <0,2,1,1>
+ 2128314368U, // <3,0,2,2>: Cost 2 ins <u,0,2,2>, lane 0
+ 2098946053U, // <3,0,2,3>: Cost 2 ins <3,0,u,u>, lane 5
+ 1611890892U, // <3,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
+ 2959000610U, // <3,0,2,5>: Cost 3 vzipr <1,0,3,2>, <1,4,0,5>
+ 2624767930U, // <3,0,2,6>: Cost 3 vext2 <1,2,3,0>, <2,6,3,7>
+ 3177545729U, // <3,0,2,7>: Cost 3 ins <3,u,2,7>, lane 1
+ 1611890924U, // <3,0,2,u>: Cost 2 vext3 LHS, <0,2,u,2>
+ 2820636924U, // <3,0,3,0>: Cost 3 vuzpr <0,3,1,0>, <0,3,1,0>
+ 1832091750U, // <3,0,3,1>: Cost 2 vzipl <3,3,3,3>, LHS
+ 1966309478U, // <3,0,3,2>: Cost 2 vtrnl <3,3,3,3>, LHS
+ 2103844865U, // <3,0,3,3>: Cost 2 ins <3,u,3,3>, lane 1
+ 2624768514U, // <3,0,3,4>: Cost 3 vext2 <1,2,3,0>, <3,4,5,6>
+ 2772716034U, // <3,0,3,5>: Cost 3 vuzpl <3,5,0,2>, <3,4,5,6>
+ 3177611265U, // <3,0,3,6>: Cost 3 ins <3,u,3,6>, lane 1
+ 3177619457U, // <3,0,3,7>: Cost 3 ins <3,u,3,7>, lane 1
+ 1832092317U, // <3,0,3,u>: Cost 2 vzipl <3,3,3,3>, LHS
+ 2689835334U, // <3,0,4,0>: Cost 3 vext3 LHS, <0,4,0,2>
+ 1611891026U, // <3,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5>
+ 1611891036U, // <3,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6>
+ 2906669312U, // <3,0,4,3>: Cost 3 vzipl <3,4,5,6>, <0,3,1,4>
+ 2689835373U, // <3,0,4,4>: Cost 3 vext3 LHS, <0,4,4,5>
+ 1551027510U, // <3,0,4,5>: Cost 2 vext2 <1,2,3,0>, RHS
+ 2769382710U, // <3,0,4,6>: Cost 3 vuzpl <3,0,0,0>, RHS
+ 3177693185U, // <3,0,4,7>: Cost 3 ins <3,u,4,7>, lane 1
+ 1616093586U, // <3,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6>
+ 3101278208U, // <3,0,5,0>: Cost 3 vtrnr <2,3,4,5>, <0,0,0,0>
+ 2128527360U, // <3,0,5,1>: Cost 2 ins <u,0,5,1>, lane 0
+ 1967145062U, // <3,0,5,2>: Cost 2 vtrnl <3,4,5,6>, LHS
+ 3040886978U, // <3,0,5,3>: Cost 3 vtrnl <3,4,5,6>, <0,2,3,5>
+ 3040886988U, // <3,0,5,4>: Cost 3 vtrnl <3,4,5,6>, <0,2,4,6>
+ 2666573828U, // <3,0,5,5>: Cost 3 vext2 <u,2,3,0>, <5,5,5,5>
+ 2104016897U, // <3,0,5,6>: Cost 2 ins <3,u,5,6>, lane 1
+ 2820640054U, // <3,0,5,7>: Cost 3 vuzpr <0,3,1,0>, RHS
+ 1967145116U, // <3,0,5,u>: Cost 2 vtrnl <3,4,5,6>, LHS
+ 3202334720U, // <3,0,6,0>: Cost 3 ins <u,0,6,0>, lane 0
+ 2907635814U, // <3,0,6,1>: Cost 3 vzipl <3,6,0,7>, LHS
+ 2128609280U, // <3,0,6,2>: Cost 2 ins <u,0,6,2>, lane 0
+ 3177807873U, // <3,0,6,3>: Cost 3 ins <3,u,6,3>, lane 1
+ 3202367488U, // <3,0,6,4>: Cost 3 ins <u,0,6,4>, lane 0
+ 3172663298U, // <3,0,6,5>: Cost 3 ins <3,0,u,5>, lane 2
+ 2666574648U, // <3,0,6,6>: Cost 3 vext2 <u,2,3,0>, <6,6,6,6>
+ 2098946053U, // <3,0,6,7>: Cost 2 ins <3,0,u,u>, lane 5
+ 2128609280U, // <3,0,6,u>: Cost 2 ins <u,0,6,2>, lane 0
+ 3095396352U, // <3,0,7,0>: Cost 3 vtrnr <1,3,5,7>, <0,0,0,0>
+ 3095396362U, // <3,0,7,1>: Cost 3 vtrnr <1,3,5,7>, <0,0,1,1>
+ 2098896898U, // <3,0,7,2>: Cost 2 ins <3,0,u,2>, lane 2
+ 3177881601U, // <3,0,7,3>: Cost 3 ins <3,u,7,3>, lane 1
+ 2666575206U, // <3,0,7,4>: Cost 3 vext2 <u,2,3,0>, <7,4,5,6>
+ 3177897985U, // <3,0,7,5>: Cost 3 ins <3,u,7,5>, lane 1
+ 3202457600U, // <3,0,7,6>: Cost 3 ins <u,0,7,6>, lane 0
+ 2666575468U, // <3,0,7,7>: Cost 3 vext2 <u,2,3,0>, <7,7,7,7>
+ 2098896898U, // <3,0,7,u>: Cost 2 ins <3,0,u,2>, lane 2
+ 1616093834U, // <3,0,u,0>: Cost 2 vext3 LHS, <0,u,0,2>
+ 1611891346U, // <3,0,u,1>: Cost 2 vext3 LHS, <0,u,1,1>
+ 537707165U, // <3,0,u,2>: Cost 1 vext3 LHS, LHS
+ 2098429955U, // <3,0,u,3>: Cost 2 ins <3,0,1,u>, lane 3
+ 1616093874U, // <3,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6>
+ 1551030426U, // <3,0,u,5>: Cost 2 vext2 <1,2,3,0>, RHS
+ 2098429955U, // <3,0,u,6>: Cost 2 ins <3,0,1,u>, lane 3
+ 2098429955U, // <3,0,u,7>: Cost 2 ins <3,0,1,u>, lane 3
+ 537707219U, // <3,0,u,u>: Cost 1 vext3 LHS, LHS
+ 2552201468U, // <3,1,0,0>: Cost 3 vext1 <0,3,1,0>, <0,3,1,0>
+ 2128822272U, // <3,1,0,1>: Cost 2 ins <u,1,0,1>, lane 0
+ 1695727718U, // <3,1,0,2>: Cost 2 vuzpl <3,0,1,2>, LHS
+ 1611449078U, // <3,1,0,3>: Cost 2 vext3 LHS, <1,0,3,2>
+ 2552204598U, // <3,1,0,4>: Cost 3 vext1 <0,3,1,0>, RHS
+ 2960310610U, // <3,1,0,5>: Cost 3 vzipr <1,2,3,0>, <0,4,1,5>
+ 2832516572U, // <3,1,0,6>: Cost 3 vuzpr <2,3,0,1>, <2,0,4,6>
+ 3177398273U, // <3,1,0,7>: Cost 3 ins <3,u,0,7>, lane 1
+ 1611891491U, // <3,1,0,u>: Cost 2 vext3 LHS, <1,0,u,2>
+ 2689835819U, // <3,1,1,0>: Cost 3 vext3 LHS, <1,1,0,1>
+ 1611449140U, // <3,1,1,1>: Cost 2 vext3 LHS, <1,1,1,1>
+ 2103689217U, // <3,1,1,2>: Cost 2 ins <3,u,1,2>, lane 1
+ 1611891528U, // <3,1,1,3>: Cost 2 vext3 LHS, <1,1,3,3>
+ 2689835859U, // <3,1,1,4>: Cost 3 vext3 LHS, <1,1,4,5>
+ 2689835868U, // <3,1,1,5>: Cost 3 vext3 LHS, <1,1,5,5>
+ 3177463809U, // <3,1,1,6>: Cost 3 ins <3,u,1,6>, lane 1
+ 3100952848U, // <3,1,1,7>: Cost 3 vtrnr <2,3,0,1>, <3,1,5,7>
+ 1611891573U, // <3,1,1,u>: Cost 2 vext3 LHS, <1,1,u,3>
+ 2128961536U, // <3,1,2,0>: Cost 2 ins <u,1,2,0>, lane 0
+ 2128969728U, // <3,1,2,1>: Cost 2 ins <u,1,2,1>, lane 0
+ 2128977920U, // <3,1,2,2>: Cost 2 ins <u,1,2,2>, lane 0
+ 1055244288U, // <3,1,2,3>: Cost 1 ins LHS, lane 0
+ 2128994304U, // <3,1,2,4>: Cost 2 ins <u,1,2,4>, lane 0
+ 2129002496U, // <3,1,2,5>: Cost 2 ins <u,1,2,5>, lane 0
+ 2129010688U, // <3,1,2,6>: Cost 2 ins <u,1,2,6>, lane 0
+ 2129018880U, // <3,1,2,7>: Cost 2 ins <u,1,2,7>, lane 0
+ 1055244288U, // <3,1,2,u>: Cost 1 ins LHS, lane 0
+ 1484456038U, // <3,1,3,0>: Cost 2 vext1 <1,3,1,3>, LHS
+ 1611891672U, // <3,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3>
+ 2685633502U, // <3,1,3,2>: Cost 3 vext3 LHS, <1,3,2,0>
+ 2021326950U, // <3,1,3,3>: Cost 2 vtrnr <1,3,1,3>, LHS
+ 1484459318U, // <3,1,3,4>: Cost 2 vext1 <1,3,1,3>, RHS
+ 1611891712U, // <3,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7>
+ 2689836041U, // <3,1,3,6>: Cost 3 vext3 LHS, <1,3,6,7>
+ 2832516096U, // <3,1,3,7>: Cost 3 vuzpr <2,3,0,1>, <1,3,5,7>
+ 1611891735U, // <3,1,3,u>: Cost 2 vext3 LHS, <1,3,u,3>
+ 2552234240U, // <3,1,4,0>: Cost 3 vext1 <0,3,1,4>, <0,3,1,4>
+ 2960343050U, // <3,1,4,1>: Cost 3 vzipr <1,2,3,4>, <0,0,1,1>
+ 2960345238U, // <3,1,4,2>: Cost 3 vzipr <1,2,3,4>, <3,0,1,2>
+ 2129133568U, // <3,1,4,3>: Cost 2 ins <u,1,4,3>, lane 0
+ 2552237366U, // <3,1,4,4>: Cost 3 vext1 <0,3,1,4>, RHS
+ 2129149952U, // <3,1,4,5>: Cost 2 ins <u,1,4,5>, lane 0
+ 1695730998U, // <3,1,4,6>: Cost 2 vuzpl <3,0,1,2>, RHS
+ 3177693185U, // <3,1,4,7>: Cost 3 ins <3,u,4,7>, lane 1
+ 1695731016U, // <3,1,4,u>: Cost 2 vuzpl <3,0,1,2>, RHS
+ 2689836143U, // <3,1,5,0>: Cost 3 vext3 LHS, <1,5,0,1>
+ 2564187280U, // <3,1,5,1>: Cost 3 vext1 <2,3,1,5>, <1,5,3,7>
+ 2564187827U, // <3,1,5,2>: Cost 3 vext1 <2,3,1,5>, <2,3,1,5>
+ 1611891856U, // <3,1,5,3>: Cost 2 vext3 LHS, <1,5,3,7>
+ 2689836183U, // <3,1,5,4>: Cost 3 vext3 LHS, <1,5,4,5>
+ 2961678674U, // <3,1,5,5>: Cost 3 vzipr <1,4,3,5>, <0,4,1,5>
+ 2104016897U, // <3,1,5,6>: Cost 2 ins <3,u,5,6>, lane 1
+ 1758776630U, // <3,1,5,7>: Cost 2 vuzpr <2,3,0,1>, RHS
+ 1611891901U, // <3,1,5,u>: Cost 2 vext3 LHS, <1,5,u,7>
+ 2907783926U, // <3,1,6,0>: Cost 3 vzipl <3,6,2,7>, <1,0,3,2>
+ 2689836239U, // <3,1,6,1>: Cost 3 vext3 LHS, <1,6,1,7>
+ 2222752740U, // <3,1,6,2>: Cost 3 vrev <1,3,2,6>
+ 2129281024U, // <3,1,6,3>: Cost 2 ins <u,1,6,3>, lane 0
+ 2222900214U, // <3,1,6,4>: Cost 3 vrev <1,3,4,6>
+ 2689836275U, // <3,1,6,5>: Cost 3 vext3 LHS, <1,6,5,7>
+ 2868350324U, // <3,1,6,6>: Cost 3 vuzpr <u,3,0,1>, <4,6,4,6>
+ 2129313792U, // <3,1,6,7>: Cost 2 ins <u,1,6,7>, lane 0
+ 2129281024U, // <3,1,6,u>: Cost 2 ins <u,1,6,3>, lane 0
+ 3177857025U, // <3,1,7,0>: Cost 3 ins <3,u,7,0>, lane 1
+ 3095397172U, // <3,1,7,1>: Cost 3 vtrnr <1,3,5,7>, <1,1,1,1>
+ 2962360470U, // <3,1,7,2>: Cost 3 vzipr <1,5,3,7>, <3,0,1,2>
+ 2021654630U, // <3,1,7,3>: Cost 2 vtrnr <1,3,5,7>, LHS
+ 3177889793U, // <3,1,7,4>: Cost 3 ins <3,u,7,4>, lane 1
+ 1149240320U, // <3,1,7,5>: Cost 2 vrev <1,3,5,7>
+ 2223055881U, // <3,1,7,6>: Cost 3 vrev <1,3,6,7>
+ 2868351144U, // <3,1,7,7>: Cost 3 vuzpr <u,3,0,1>, <5,7,5,7>
+ 2021654635U, // <3,1,7,u>: Cost 2 vtrnr <1,3,5,7>, LHS
+ 1484496998U, // <3,1,u,0>: Cost 2 vext1 <1,3,1,u>, LHS
+ 1611892077U, // <3,1,u,1>: Cost 2 vext3 LHS, <1,u,1,3>
+ 1695733550U, // <3,1,u,2>: Cost 2 vuzpl <3,0,1,2>, LHS
+ 1055244288U, // <3,1,u,3>: Cost 1 ins LHS, lane 0
+ 1484500278U, // <3,1,u,4>: Cost 2 vext1 <1,3,1,u>, RHS
+ 1611892117U, // <3,1,u,5>: Cost 2 vext3 LHS, <1,u,5,7>
+ 1695733914U, // <3,1,u,6>: Cost 2 vuzpl <3,0,1,2>, RHS
+ 1758776873U, // <3,1,u,7>: Cost 2 vuzpr <2,3,0,1>, RHS
+ 1055244288U, // <3,1,u,u>: Cost 1 ins LHS, lane 0
+ 2623455232U, // <3,2,0,0>: Cost 3 vext2 <1,0,3,2>, <0,0,0,0>
+ 1549713510U, // <3,2,0,1>: Cost 2 vext2 <1,0,3,2>, LHS
+ 2129494016U, // <3,2,0,2>: Cost 2 ins <u,2,0,2>, lane 0
+ 1886568550U, // <3,2,0,3>: Cost 2 vzipr <1,2,3,0>, LHS
+ 2623455570U, // <3,2,0,4>: Cost 3 vext2 <1,0,3,2>, <0,4,1,5>
+ 2960311348U, // <3,2,0,5>: Cost 3 vzipr <1,2,3,0>, <1,4,2,5>
+ 2689836524U, // <3,2,0,6>: Cost 3 vext3 LHS, <2,0,6,4>
+ 3177398273U, // <3,2,0,7>: Cost 3 ins <3,u,0,7>, lane 1
+ 1549714077U, // <3,2,0,u>: Cost 2 vext2 <1,0,3,2>, LHS
+ 1549714166U, // <3,2,1,0>: Cost 2 vext2 <1,0,3,2>, <1,0,3,2>
+ 2623456052U, // <3,2,1,1>: Cost 3 vext2 <1,0,3,2>, <1,1,1,1>
+ 2103689217U, // <3,2,1,2>: Cost 2 ins <3,u,1,2>, lane 1
+ 2685634079U, // <3,2,1,3>: Cost 3 vext3 LHS, <2,1,3,1>
+ 2552286518U, // <3,2,1,4>: Cost 3 vext1 <0,3,2,1>, RHS
+ 2623456400U, // <3,2,1,5>: Cost 3 vext2 <1,0,3,2>, <1,5,3,7>
+ 2689836604U, // <3,2,1,6>: Cost 3 vext3 LHS, <2,1,6,3>
+ 3177472001U, // <3,2,1,7>: Cost 3 ins <3,u,1,7>, lane 1
+ 1155385070U, // <3,2,1,u>: Cost 2 vrev <2,3,u,1>
+ 2689836629U, // <3,2,2,0>: Cost 3 vext3 LHS, <2,2,0,1>
+ 2689836640U, // <3,2,2,1>: Cost 3 vext3 LHS, <2,2,1,3>
+ 1611449960U, // <3,2,2,2>: Cost 2 vext3 LHS, <2,2,2,2>
+ 1611892338U, // <3,2,2,3>: Cost 2 vext3 LHS, <2,2,3,3>
+ 2689836669U, // <3,2,2,4>: Cost 3 vext3 LHS, <2,2,4,5>
+ 2689836680U, // <3,2,2,5>: Cost 3 vext3 LHS, <2,2,5,7>
+ 2689836685U, // <3,2,2,6>: Cost 3 vext3 LHS, <2,2,6,3>
+ 3177545729U, // <3,2,2,7>: Cost 3 ins <3,u,2,7>, lane 1
+ 1611892383U, // <3,2,2,u>: Cost 2 vext3 LHS, <2,2,u,3>
+ 1611450022U, // <3,2,3,0>: Cost 2 vext3 LHS, <2,3,0,1>
+ 2685191854U, // <3,2,3,1>: Cost 3 vext3 LHS, <2,3,1,0>
+ 1611450042U, // <3,2,3,2>: Cost 2 vext3 LHS, <2,3,2,3>
+ 1885929574U, // <3,2,3,3>: Cost 2 vzipr <1,1,3,3>, LHS
+ 1611450062U, // <3,2,3,4>: Cost 2 vext3 LHS, <2,3,4,5>
+ 2732967635U, // <3,2,3,5>: Cost 3 vext3 LHS, <2,3,5,1>
+ 1611450082U, // <3,2,3,6>: Cost 2 vext3 LHS, <2,3,6,7>
+ 2732967652U, // <3,2,3,7>: Cost 3 vext3 LHS, <2,3,7,0>
+ 1611450094U, // <3,2,3,u>: Cost 2 vext3 LHS, <2,3,u,1>
+ 2558279782U, // <3,2,4,0>: Cost 3 vext1 <1,3,2,4>, LHS
+ 2558280674U, // <3,2,4,1>: Cost 3 vext1 <1,3,2,4>, <1,3,2,4>
+ 2960343060U, // <3,2,4,2>: Cost 3 vzipr <1,2,3,4>, <0,0,2,2>
+ 1886601318U, // <3,2,4,3>: Cost 2 vzipr <1,2,3,4>, LHS
+ 2960344034U, // <3,2,4,4>: Cost 3 vzipr <1,2,3,4>, <1,3,2,4>
+ 1549716790U, // <3,2,4,5>: Cost 2 vext2 <1,0,3,2>, RHS
+ 2129821696U, // <3,2,4,6>: Cost 2 ins <u,2,4,6>, lane 0
+ 3177693185U, // <3,2,4,7>: Cost 3 ins <3,u,4,7>, lane 1
+ 1549717033U, // <3,2,4,u>: Cost 2 vext2 <1,0,3,2>, RHS
+ 2552316170U, // <3,2,5,0>: Cost 3 vext1 <0,3,2,5>, <0,3,2,5>
+ 2228643507U, // <3,2,5,1>: Cost 3 vrev <2,3,1,5>
+ 2689836896U, // <3,2,5,2>: Cost 3 vext3 LHS, <2,5,2,7>
+ 2685634408U, // <3,2,5,3>: Cost 3 vext3 LHS, <2,5,3,6>
+ 1155122894U, // <3,2,5,4>: Cost 2 vrev <2,3,4,5>
+ 2665263108U, // <3,2,5,5>: Cost 3 vext2 <u,0,3,2>, <5,5,5,5>
+ 2104016897U, // <3,2,5,6>: Cost 2 ins <3,u,5,6>, lane 1
+ 2826554678U, // <3,2,5,7>: Cost 3 vuzpr <1,3,0,2>, RHS
+ 1155417842U, // <3,2,5,u>: Cost 2 vrev <2,3,u,5>
+ 2689836953U, // <3,2,6,0>: Cost 3 vext3 LHS, <2,6,0,1>
+ 2689836964U, // <3,2,6,1>: Cost 3 vext3 LHS, <2,6,1,3>
+ 2689836976U, // <3,2,6,2>: Cost 3 vext3 LHS, <2,6,2,6>
+ 1611892666U, // <3,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7>
+ 2689836993U, // <3,2,6,4>: Cost 3 vext3 LHS, <2,6,4,5>
+ 2689837004U, // <3,2,6,5>: Cost 3 vext3 LHS, <2,6,5,7>
+ 2689837013U, // <3,2,6,6>: Cost 3 vext3 LHS, <2,6,6,7>
+ 2129977344U, // <3,2,6,7>: Cost 2 ins <u,2,6,7>, lane 0
+ 1611892711U, // <3,2,6,u>: Cost 2 vext3 LHS, <2,6,u,7>
+ 3095397270U, // <3,2,7,0>: Cost 3 vtrnr <1,3,5,7>, <1,2,3,0>
+ 3203743744U, // <3,2,7,1>: Cost 3 ins <u,2,7,1>, lane 0
+ 3095396516U, // <3,2,7,2>: Cost 3 vtrnr <1,3,5,7>, <0,2,0,2>
+ 1888616550U, // <3,2,7,3>: Cost 2 vzipr <1,5,3,7>, LHS
+ 3095397274U, // <3,2,7,4>: Cost 3 vtrnr <1,3,5,7>, <1,2,3,4>
+ 3095396528U, // <3,2,7,5>: Cost 3 vtrnr <1,3,5,7>, <0,2,1,5>
+ 1155286754U, // <3,2,7,6>: Cost 2 vrev <2,3,6,7>
+ 2665264748U, // <3,2,7,7>: Cost 3 vext2 <u,0,3,2>, <7,7,7,7>
+ 1888616555U, // <3,2,7,u>: Cost 2 vzipr <1,5,3,7>, LHS
+ 1611892795U, // <3,2,u,0>: Cost 2 vext3 LHS, <2,u,0,1>
+ 1549719342U, // <3,2,u,1>: Cost 2 vext2 <1,0,3,2>, LHS
+ 2129494016U, // <3,2,u,2>: Cost 2 ins <u,2,0,2>, lane 0
+ 1611892824U, // <3,2,u,3>: Cost 2 vext3 LHS, <2,u,3,3>
+ 1611892835U, // <3,2,u,4>: Cost 2 vext3 LHS, <2,u,4,5>
+ 1549719706U, // <3,2,u,5>: Cost 2 vext2 <1,0,3,2>, RHS
+ 2129821696U, // <3,2,u,6>: Cost 2 ins <u,2,4,6>, lane 0
+ 2129977344U, // <3,2,u,7>: Cost 2 ins <u,2,6,7>, lane 0
+ 1611892867U, // <3,2,u,u>: Cost 2 vext3 LHS, <2,u,u,1>
+ 1886569366U, // <3,3,0,0>: Cost 2 vzipr <1,2,3,0>, <1,2,3,0>
+ 1611450518U, // <3,3,0,1>: Cost 2 vext3 LHS, <3,0,1,2>
+ 1697874022U, // <3,3,0,2>: Cost 2 vuzpl <3,3,3,3>, LHS
+ 2100895746U, // <3,3,0,3>: Cost 2 ins <3,3,u,3>, lane 2
+ 2685634736U, // <3,3,0,4>: Cost 3 vext3 LHS, <3,0,4,1>
+ 3041151490U, // <3,3,0,5>: Cost 3 vtrnl <3,5,0,2>, <3,4,5,6>
+ 3177390081U, // <3,3,0,6>: Cost 3 ins <3,u,0,6>, lane 1
+ 2960311440U, // <3,3,0,7>: Cost 3 vzipr <1,2,3,0>, <1,5,3,7>
+ 1611450581U, // <3,3,0,u>: Cost 2 vext3 LHS, <3,0,u,2>
+ 2685192415U, // <3,3,1,0>: Cost 3 vext3 LHS, <3,1,0,3>
+ 1550385992U, // <3,3,1,1>: Cost 2 vext2 <1,1,3,3>, <1,1,3,3>
+ 2103689217U, // <3,3,1,2>: Cost 2 ins <3,u,1,2>, lane 1
+ 1752891494U, // <3,3,1,3>: Cost 2 vuzpr <1,3,1,3>, LHS
+ 2826635515U, // <3,3,1,4>: Cost 3 vuzpr <1,3,1,3>, <3,1,3,4>
+ 2685634828U, // <3,3,1,5>: Cost 3 vext3 LHS, <3,1,5,3>
+ 3177463809U, // <3,3,1,6>: Cost 3 ins <3,u,1,6>, lane 1
+ 3100951552U, // <3,3,1,7>: Cost 3 vtrnr <2,3,0,1>, <1,3,5,7>
+ 1752891499U, // <3,3,1,u>: Cost 2 vuzpr <1,3,1,3>, LHS
+ 2959000470U, // <3,3,2,0>: Cost 3 vzipr <1,0,3,2>, <1,2,3,0>
+ 2959000471U, // <3,3,2,1>: Cost 3 vzipr <1,0,3,2>, <1,2,3,1>
+ 1885258486U, // <3,3,2,2>: Cost 2 vzipr <1,0,3,2>, <1,0,3,2>
+ 2130313216U, // <3,3,2,3>: Cost 2 ins <u,3,2,3>, lane 0
+ 2959000474U, // <3,3,2,4>: Cost 3 vzipr <1,0,3,2>, <1,2,3,4>
+ 2732968286U, // <3,3,2,5>: Cost 3 vext3 LHS, <3,2,5,4>
+ 2685634918U, // <3,3,2,6>: Cost 3 vext3 LHS, <3,2,6,3>
+ 2959000720U, // <3,3,2,7>: Cost 3 vzipr <1,0,3,2>, <1,5,3,7>
+ 1561004120U, // <3,3,2,u>: Cost 2 vext2 <2,u,3,3>, <2,u,3,3>
+ 1496547430U, // <3,3,3,0>: Cost 2 vext1 <3,3,3,3>, LHS
+ 2100568067U, // <3,3,3,1>: Cost 2 ins <3,3,3,u>, lane 3
+ 2100568067U, // <3,3,3,2>: Cost 2 ins <3,3,3,u>, lane 3
+ 336380006U, // <3,3,3,3>: Cost 1 vdup3 LHS
+ 1496550710U, // <3,3,3,4>: Cost 2 vext1 <3,3,3,3>, RHS
+ 2100568067U, // <3,3,3,5>: Cost 2 ins <3,3,3,u>, lane 3
+ 2100568067U, // <3,3,3,6>: Cost 2 ins <3,3,3,u>, lane 3
+ 2100568067U, // <3,3,3,7>: Cost 2 ins <3,3,3,u>, lane 3
+ 336380006U, // <3,3,3,u>: Cost 1 vdup3 LHS
+ 2960343958U, // <3,3,4,0>: Cost 3 vzipr <1,2,3,4>, <1,2,3,0>
+ 2558354411U, // <3,3,4,1>: Cost 3 vext1 <1,3,3,4>, <1,3,3,4>
+ 2960343798U, // <3,3,4,2>: Cost 3 vzipr <1,2,3,4>, <1,0,3,2>
+ 2100895746U, // <3,3,4,3>: Cost 2 ins <3,3,u,3>, lane 2
+ 1886602138U, // <3,3,4,4>: Cost 2 vzipr <1,2,3,4>, <1,2,3,4>
+ 1611893250U, // <3,3,4,5>: Cost 2 vext3 LHS, <3,4,5,6>
+ 1697877302U, // <3,3,4,6>: Cost 2 vuzpl <3,3,3,3>, RHS
+ 2960344208U, // <3,3,4,7>: Cost 3 vzipr <1,2,3,4>, <1,5,3,7>
+ 1611893277U, // <3,3,4,u>: Cost 2 vext3 LHS, <3,4,u,6>
+ 2558361702U, // <3,3,5,0>: Cost 3 vext1 <1,3,3,5>, LHS
+ 2558362604U, // <3,3,5,1>: Cost 3 vext1 <1,3,3,5>, <1,3,3,5>
+ 2558363342U, // <3,3,5,2>: Cost 3 vext1 <1,3,3,5>, <2,3,4,5>
+ 2100895746U, // <3,3,5,3>: Cost 2 ins <3,3,u,3>, lane 2
+ 2558364982U, // <3,3,5,4>: Cost 3 vext1 <1,3,3,5>, RHS
+ 2027538126U, // <3,3,5,5>: Cost 2 vtrnr <2,3,4,5>, <2,3,4,5>
+ 2104016897U, // <3,3,5,6>: Cost 2 ins <3,u,5,6>, lane 1
+ 1752894774U, // <3,3,5,7>: Cost 2 vuzpr <1,3,1,3>, RHS
+ 1752894775U, // <3,3,5,u>: Cost 2 vuzpr <1,3,1,3>, RHS
+ 2732968568U, // <3,3,6,0>: Cost 3 vext3 LHS, <3,6,0,7>
+ 3204333568U, // <3,3,6,1>: Cost 3 ins <u,3,6,1>, lane 0
+ 2732968586U, // <3,3,6,2>: Cost 3 vext3 LHS, <3,6,2,7>
+ 2100895746U, // <3,3,6,3>: Cost 2 ins <3,3,u,3>, lane 2
+ 2234845608U, // <3,3,6,4>: Cost 3 vrev <3,3,4,6>
+ 3204366336U, // <3,3,6,5>: Cost 3 ins <u,3,6,5>, lane 0
+ 1967893085U, // <3,3,6,6>: Cost 2 vtrnl <3,5,6,7>, <3,5,6,7>
+ 2130640896U, // <3,3,6,7>: Cost 2 ins <u,3,6,7>, lane 0
+ 2100895746U, // <3,3,6,u>: Cost 2 ins <3,3,u,3>, lane 2
+ 2558378086U, // <3,3,7,0>: Cost 3 vext1 <1,3,3,7>, LHS
+ 2558378990U, // <3,3,7,1>: Cost 3 vext1 <1,3,3,7>, <1,3,3,7>
+ 2962359030U, // <3,3,7,2>: Cost 3 vzipr <1,5,3,7>, <1,0,3,2>
+ 2100895746U, // <3,3,7,3>: Cost 2 ins <3,3,u,3>, lane 2
+ 2558381366U, // <3,3,7,4>: Cost 3 vext1 <1,3,3,7>, RHS
+ 3095398094U, // <3,3,7,5>: Cost 3 vtrnr <1,3,5,7>, <2,3,4,5>
+ 3174662146U, // <3,3,7,6>: Cost 3 ins <3,3,u,6>, lane 2
+ 2021655552U, // <3,3,7,7>: Cost 2 vtrnr <1,3,5,7>, <1,3,5,7>
+ 2021655552U, // <3,3,7,u>: Cost 2 vtrnr <1,3,5,7>, <1,3,5,7>
+ 1886569366U, // <3,3,u,0>: Cost 2 vzipr <1,2,3,0>, <1,2,3,0>
+ 1611893534U, // <3,3,u,1>: Cost 2 vext3 LHS, <3,u,1,2>
+ 1697879854U, // <3,3,u,2>: Cost 2 vuzpl <3,3,3,3>, LHS
+ 336380006U, // <3,3,u,3>: Cost 1 vdup3 LHS
+ 1496550710U, // <3,3,u,4>: Cost 2 vext1 <3,3,3,3>, RHS
+ 1611893574U, // <3,3,u,5>: Cost 2 vext3 LHS, <3,u,5,6>
+ 1697880218U, // <3,3,u,6>: Cost 2 vuzpl <3,3,3,3>, RHS
+ 1752895017U, // <3,3,u,7>: Cost 2 vuzpr <1,3,1,3>, RHS
+ 336380006U, // <3,3,u,u>: Cost 1 vdup3 LHS
+ 2624798720U, // <3,4,0,0>: Cost 3 vext2 <1,2,3,4>, <0,0,0,0>
+ 1551056998U, // <3,4,0,1>: Cost 2 vext2 <1,2,3,4>, LHS
+ 2624798884U, // <3,4,0,2>: Cost 3 vext2 <1,2,3,4>, <0,2,0,2>
+ 3177365505U, // <3,4,0,3>: Cost 3 ins <3,u,0,3>, lane 1
+ 2624799058U, // <3,4,0,4>: Cost 3 vext2 <1,2,3,4>, <0,4,1,5>
+ 1829948726U, // <3,4,0,5>: Cost 2 vzipl <3,0,1,2>, RHS
+ 1659227036U, // <3,4,0,6>: Cost 2 vext3 LHS, <4,0,6,2>
+ 3177398273U, // <3,4,0,7>: Cost 3 ins <3,u,0,7>, lane 1
+ 1551057565U, // <3,4,0,u>: Cost 2 vext2 <1,2,3,4>, LHS
+ 2624799478U, // <3,4,1,0>: Cost 3 vext2 <1,2,3,4>, <1,0,3,2>
+ 2624799540U, // <3,4,1,1>: Cost 3 vext2 <1,2,3,4>, <1,1,1,1>
+ 1551057818U, // <3,4,1,2>: Cost 2 vext2 <1,2,3,4>, <1,2,3,4>
+ 2820669542U, // <3,4,1,3>: Cost 3 vuzpr <0,3,1,4>, LHS
+ 2564377910U, // <3,4,1,4>: Cost 3 vext1 <2,3,4,1>, RHS
+ 2130919424U, // <3,4,1,5>: Cost 2 ins <u,4,1,5>, lane 0
+ 1964166454U, // <3,4,1,6>: Cost 2 vtrnl <3,0,1,2>, RHS
+ 3177472001U, // <3,4,1,7>: Cost 3 ins <3,u,1,7>, lane 1
+ 1555039616U, // <3,4,1,u>: Cost 2 vext2 <1,u,3,4>, <1,u,3,4>
+ 3204694016U, // <3,4,2,0>: Cost 3 ins <u,4,2,0>, lane 0
+ 2624800298U, // <3,4,2,1>: Cost 3 vext2 <1,2,3,4>, <2,1,4,3>
+ 2624800360U, // <3,4,2,2>: Cost 3 vext2 <1,2,3,4>, <2,2,2,2>
+ 2101600261U, // <3,4,2,3>: Cost 2 ins <3,4,u,u>, lane 5
+ 2826716058U, // <3,4,2,4>: Cost 3 vuzpr <1,3,2,4>, <1,2,3,4>
+ 2959001294U, // <3,4,2,5>: Cost 3 vzipr <1,0,3,2>, <2,3,4,5>
+ 2131001344U, // <3,4,2,6>: Cost 2 ins <u,4,2,6>, lane 0
+ 3177545729U, // <3,4,2,7>: Cost 3 ins <3,u,2,7>, lane 1
+ 2101600261U, // <3,4,2,u>: Cost 2 ins <3,4,u,u>, lane 5
+ 2624800918U, // <3,4,3,0>: Cost 3 vext2 <1,2,3,4>, <3,0,1,2>
+ 2636081403U, // <3,4,3,1>: Cost 3 vext2 <3,1,3,4>, <3,1,3,4>
+ 2636745036U, // <3,4,3,2>: Cost 3 vext2 <3,2,3,4>, <3,2,3,4>
+ 2103844865U, // <3,4,3,3>: Cost 2 ins <3,u,3,3>, lane 1
+ 2820669696U, // <3,4,3,4>: Cost 3 vuzpr <0,3,1,4>, <0,3,1,4>
+ 1832095030U, // <3,4,3,5>: Cost 2 vzipl <3,3,3,3>, RHS
+ 1966312758U, // <3,4,3,6>: Cost 2 vtrnl <3,3,3,3>, RHS
+ 3177619457U, // <3,4,3,7>: Cost 3 ins <3,u,3,7>, lane 1
+ 1832095273U, // <3,4,3,u>: Cost 2 vzipl <3,3,3,3>, RHS
+ 2960344777U, // <3,4,4,0>: Cost 3 vzipr <1,2,3,4>, <2,3,4,0>
+ 2960344778U, // <3,4,4,1>: Cost 3 vzipr <1,2,3,4>, <2,3,4,1>
+ 2564400845U, // <3,4,4,2>: Cost 3 vext1 <2,3,4,4>, <2,3,4,4>
+ 2960344618U, // <3,4,4,3>: Cost 3 vzipr <1,2,3,4>, <2,1,4,3>
+ 1659227344U, // <3,4,4,4>: Cost 2 vext3 LHS, <4,4,4,4>
+ 1551060278U, // <3,4,4,5>: Cost 2 vext2 <1,2,3,4>, RHS
+ 1659227364U, // <3,4,4,6>: Cost 2 vext3 LHS, <4,4,6,6>
+ 3177693185U, // <3,4,4,7>: Cost 3 ins <3,u,4,7>, lane 1
+ 1551060521U, // <3,4,4,u>: Cost 2 vext2 <1,2,3,4>, RHS
+ 1490665574U, // <3,4,5,0>: Cost 2 vext1 <2,3,4,5>, LHS
+ 2101379075U, // <3,4,5,1>: Cost 2 ins <3,4,5,u>, lane 3
+ 1490667214U, // <3,4,5,2>: Cost 2 vext1 <2,3,4,5>, <2,3,4,5>
+ 2101379075U, // <3,4,5,3>: Cost 2 ins <3,4,5,u>, lane 3
+ 1490668854U, // <3,4,5,4>: Cost 2 vext1 <2,3,4,5>, RHS
+ 2131214336U, // <3,4,5,5>: Cost 2 ins <u,4,5,5>, lane 0
+ 537709878U, // <3,4,5,6>: Cost 1 vext3 LHS, RHS
+ 2101379075U, // <3,4,5,7>: Cost 2 ins <3,4,5,u>, lane 3
+ 537709896U, // <3,4,5,u>: Cost 1 vext3 LHS, RHS
+ 1659227468U, // <3,4,6,0>: Cost 2 vext3 LHS, <4,6,0,2>
+ 2689838422U, // <3,4,6,1>: Cost 3 vext3 LHS, <4,6,1,3>
+ 2564417231U, // <3,4,6,2>: Cost 3 vext1 <2,3,4,6>, <2,3,4,6>
+ 2558446082U, // <3,4,6,3>: Cost 3 vext1 <1,3,4,6>, <3,4,5,6>
+ 1659227508U, // <3,4,6,4>: Cost 2 vext3 LHS, <4,6,4,6>
+ 2689838462U, // <3,4,6,5>: Cost 3 vext3 LHS, <4,6,5,7>
+ 2131296256U, // <3,4,6,6>: Cost 2 ins <u,4,6,6>, lane 0
+ 2101600261U, // <3,4,6,7>: Cost 2 ins <3,4,u,u>, lane 5
+ 1659227540U, // <3,4,6,u>: Cost 2 vext3 LHS, <4,6,u,2>
+ 2666607610U, // <3,4,7,0>: Cost 3 vext2 <u,2,3,4>, <7,0,1,2>
+ 2659972191U, // <3,4,7,1>: Cost 3 vext2 <7,1,3,4>, <7,1,3,4>
+ 2660635824U, // <3,4,7,2>: Cost 3 vext2 <7,2,3,4>, <7,2,3,4>
+ 3177881601U, // <3,4,7,3>: Cost 3 ins <3,u,7,3>, lane 1
+ 2666607974U, // <3,4,7,4>: Cost 3 vext2 <u,2,3,4>, <7,4,5,6>
+ 3095396690U, // <3,4,7,5>: Cost 3 vtrnr <1,3,5,7>, <0,4,1,5>
+ 2131369984U, // <3,4,7,6>: Cost 2 ins <u,4,7,6>, lane 0
+ 2666608236U, // <3,4,7,7>: Cost 3 vext2 <u,2,3,4>, <7,7,7,7>
+ 2131369984U, // <3,4,7,u>: Cost 2 ins <u,4,7,6>, lane 0
+ 1490690150U, // <3,4,u,0>: Cost 2 vext1 <2,3,4,u>, LHS
+ 1551062830U, // <3,4,u,1>: Cost 2 vext2 <1,2,3,4>, LHS
+ 1490691793U, // <3,4,u,2>: Cost 2 vext1 <2,3,4,u>, <2,3,4,u>
+ 2101600261U, // <3,4,u,3>: Cost 2 ins <3,4,u,u>, lane 5
+ 1490693430U, // <3,4,u,4>: Cost 2 vext1 <2,3,4,u>, RHS
+ 1551063194U, // <3,4,u,5>: Cost 2 vext2 <1,2,3,4>, RHS
+ 537710121U, // <3,4,u,6>: Cost 1 vext3 LHS, RHS
+ 2101379075U, // <3,4,u,7>: Cost 2 ins <3,4,5,u>, lane 3
+ 537710139U, // <3,4,u,u>: Cost 1 vext3 LHS, RHS
+ 2832842752U, // <3,5,0,0>: Cost 3 vuzpr <2,3,4,5>, <0,0,0,0>
+ 2131476480U, // <3,5,0,1>: Cost 2 ins <u,5,0,1>, lane 0
+ 1698709606U, // <3,5,0,2>: Cost 2 vuzpl <3,4,5,6>, LHS
+ 2772451522U, // <3,5,0,3>: Cost 3 vuzpl <3,4,5,6>, <0,2,3,5>
+ 2689838690U, // <3,5,0,4>: Cost 3 vext3 LHS, <5,0,4,1>
+ 2732969579U, // <3,5,0,5>: Cost 3 vext3 LHS, <5,0,5,1>
+ 2960310647U, // <3,5,0,6>: Cost 3 vzipr <1,2,3,0>, <0,4,5,6>
+ 2131525632U, // <3,5,0,7>: Cost 2 ins <u,5,0,7>, lane 0
+ 1698709660U, // <3,5,0,u>: Cost 2 vuzpl <3,4,5,6>, LHS
+ 2594308198U, // <3,5,1,0>: Cost 3 vext1 <7,3,5,1>, LHS
+ 2832843572U, // <3,5,1,1>: Cost 3 vuzpr <2,3,4,5>, <1,1,1,1>
+ 2103689217U, // <3,5,1,2>: Cost 2 ins <3,u,1,2>, lane 1
+ 1759101030U, // <3,5,1,3>: Cost 2 vuzpr <2,3,4,5>, LHS
+ 2626135101U, // <3,5,1,4>: Cost 3 vext2 <1,4,3,5>, <1,4,3,5>
+ 2772452352U, // <3,5,1,5>: Cost 3 vuzpl <3,4,5,6>, <1,3,5,7>
+ 3205332992U, // <3,5,1,6>: Cost 3 ins <u,5,1,6>, lane 0
+ 2027212086U, // <3,5,1,7>: Cost 2 vtrnr <2,3,0,1>, RHS
+ 2027212087U, // <3,5,1,u>: Cost 2 vtrnr <2,3,0,1>, RHS
+ 2832843670U, // <3,5,2,0>: Cost 3 vuzpr <2,3,4,5>, <1,2,3,0>
+ 2630116899U, // <3,5,2,1>: Cost 3 vext2 <2,1,3,5>, <2,1,3,5>
+ 2832842916U, // <3,5,2,2>: Cost 3 vuzpr <2,3,4,5>, <0,2,0,2>
+ 2131640320U, // <3,5,2,3>: Cost 2 ins <u,5,2,3>, lane 0
+ 2832842936U, // <3,5,2,4>: Cost 3 vuzpr <2,3,4,5>, <0,2,2,4>
+ 2715938575U, // <3,5,2,5>: Cost 3 vext3 <5,2,5,3>, <5,2,5,3>
+ 2959002114U, // <3,5,2,6>: Cost 3 vzipr <1,0,3,2>, <3,4,5,6>
+ 2131673088U, // <3,5,2,7>: Cost 2 ins <u,5,2,7>, lane 0
+ 2131640320U, // <3,5,2,u>: Cost 2 ins <u,5,2,3>, lane 0
+ 2772453922U, // <3,5,3,0>: Cost 3 vuzpl <3,4,5,6>, <3,5,0,2>
+ 2832844454U, // <3,5,3,1>: Cost 3 vuzpr <2,3,4,5>, <2,3,0,1>
+ 3177578497U, // <3,5,3,2>: Cost 3 ins <3,u,3,2>, lane 1
+ 2103844865U, // <3,5,3,3>: Cost 2 ins <3,u,3,3>, lane 1
+ 2618837506U, // <3,5,3,4>: Cost 3 vext2 <0,2,3,5>, <3,4,5,6>
+ 1759102670U, // <3,5,3,5>: Cost 2 vuzpr <2,3,4,5>, <2,3,4,5>
+ 2959673858U, // <3,5,3,6>: Cost 3 vzipr <1,1,3,3>, <3,4,5,6>
+ 2021330230U, // <3,5,3,7>: Cost 2 vtrnr <1,3,1,3>, RHS
+ 2021330231U, // <3,5,3,u>: Cost 2 vtrnr <1,3,1,3>, RHS
+ 2832845308U, // <3,5,4,0>: Cost 3 vuzpr <2,3,4,5>, <3,4,5,0>
+ 2732969871U, // <3,5,4,1>: Cost 3 vext3 LHS, <5,4,1,5>
+ 2832844536U, // <3,5,4,2>: Cost 3 vuzpr <2,3,4,5>, <2,4,0,2>
+ 3177660417U, // <3,5,4,3>: Cost 3 ins <3,u,4,3>, lane 1
+ 2832845312U, // <3,5,4,4>: Cost 3 vuzpr <2,3,4,5>, <3,4,5,4>
+ 2131804160U, // <3,5,4,5>: Cost 2 ins <u,5,4,5>, lane 0
+ 1698712886U, // <3,5,4,6>: Cost 2 vuzpl <3,4,5,6>, RHS
+ 1659228102U, // <3,5,4,7>: Cost 2 vext3 LHS, <5,4,7,6>
+ 1698712904U, // <3,5,4,u>: Cost 2 vuzpl <3,4,5,6>, RHS
+ 2570453094U, // <3,5,5,0>: Cost 3 vext1 <3,3,5,5>, LHS
+ 2832846074U, // <3,5,5,1>: Cost 3 vuzpr <2,3,4,5>, <4,5,0,1>
+ 2570454734U, // <3,5,5,2>: Cost 3 vext1 <3,3,5,5>, <2,3,4,5>
+ 2832845356U, // <3,5,5,3>: Cost 3 vuzpr <2,3,4,5>, <3,5,1,3>
+ 2570456374U, // <3,5,5,4>: Cost 3 vext1 <3,3,5,5>, RHS
+ 1659228164U, // <3,5,5,5>: Cost 2 vext3 LHS, <5,5,5,5>
+ 2104016897U, // <3,5,5,6>: Cost 2 ins <3,u,5,6>, lane 1
+ 1759104310U, // <3,5,5,7>: Cost 2 vuzpr <2,3,4,5>, RHS
+ 1759104311U, // <3,5,5,u>: Cost 2 vuzpr <2,3,4,5>, RHS
+ 2131910656U, // <3,5,6,0>: Cost 2 ins <u,5,6,0>, lane 0
+ 2131918848U, // <3,5,6,1>: Cost 2 ins <u,5,6,1>, lane 0
+ 2131927040U, // <3,5,6,2>: Cost 2 ins <u,5,6,2>, lane 0
+ 2131935232U, // <3,5,6,3>: Cost 2 ins <u,5,6,3>, lane 0
+ 2131943424U, // <3,5,6,4>: Cost 2 ins <u,5,6,4>, lane 0
+ 2131951616U, // <3,5,6,5>: Cost 2 ins <u,5,6,5>, lane 0
+ 2131959808U, // <3,5,6,6>: Cost 2 ins <u,5,6,6>, lane 0
+ 1058226176U, // <3,5,6,7>: Cost 1 ins RHS, lane 0
+ 1058226176U, // <3,5,6,u>: Cost 1 ins RHS, lane 0
+ 1484783718U, // <3,5,7,0>: Cost 2 vext1 <1,3,5,7>, LHS
+ 1484784640U, // <3,5,7,1>: Cost 2 vext1 <1,3,5,7>, <1,3,5,7>
+ 2558527080U, // <3,5,7,2>: Cost 3 vext1 <1,3,5,7>, <2,2,2,2>
+ 2558527638U, // <3,5,7,3>: Cost 3 vext1 <1,3,5,7>, <3,0,1,2>
+ 1484786998U, // <3,5,7,4>: Cost 2 vext1 <1,3,5,7>, RHS
+ 1659228328U, // <3,5,7,5>: Cost 2 vext3 LHS, <5,7,5,7>
+ 3095397528U, // <3,5,7,6>: Cost 3 vtrnr <1,3,5,7>, <1,5,4,6>
+ 2021657910U, // <3,5,7,7>: Cost 2 vtrnr <1,3,5,7>, RHS
+ 1484789550U, // <3,5,7,u>: Cost 2 vext1 <1,3,5,7>, LHS
+ 1484791910U, // <3,5,u,0>: Cost 2 vext1 <1,3,5,u>, LHS
+ 1484792833U, // <3,5,u,1>: Cost 2 vext1 <1,3,5,u>, <1,3,5,u>
+ 1698715438U, // <3,5,u,2>: Cost 2 vuzpl <3,4,5,6>, LHS
+ 1759101597U, // <3,5,u,3>: Cost 2 vuzpr <2,3,4,5>, LHS
+ 1484795190U, // <3,5,u,4>: Cost 2 vext1 <1,3,5,u>, RHS
+ 1659228409U, // <3,5,u,5>: Cost 2 vext3 LHS, <5,u,5,7>
+ 1698715802U, // <3,5,u,6>: Cost 2 vuzpl <3,4,5,6>, RHS
+ 1058226176U, // <3,5,u,7>: Cost 1 ins RHS, lane 0
+ 1058226176U, // <3,5,u,u>: Cost 1 ins RHS, lane 0
+ 2732970264U, // <3,6,0,0>: Cost 3 vext3 LHS, <6,0,0,2>
+ 2689839393U, // <3,6,0,1>: Cost 3 vext3 LHS, <6,0,1,2>
+ 2132148224U, // <3,6,0,2>: Cost 2 ins <u,6,0,2>, lane 0
+ 3177365505U, // <3,6,0,3>: Cost 3 ins <3,u,0,3>, lane 1
+ 2689839420U, // <3,6,0,4>: Cost 3 vext3 LHS, <6,0,4,2>
+ 2732970314U, // <3,6,0,5>: Cost 3 vext3 LHS, <6,0,5,7>
+ 2732970316U, // <3,6,0,6>: Cost 3 vext3 LHS, <6,0,6,0>
+ 1886571830U, // <3,6,0,7>: Cost 2 vzipr <1,2,3,0>, RHS
+ 1886571831U, // <3,6,0,u>: Cost 2 vzipr <1,2,3,0>, RHS
+ 2720878954U, // <3,6,1,0>: Cost 3 vext3 <6,1,0,3>, <6,1,0,3>
+ 3205955584U, // <3,6,1,1>: Cost 3 ins <u,6,1,1>, lane 0
+ 2103689217U, // <3,6,1,2>: Cost 2 ins <3,u,1,2>, lane 1
+ 2826731622U, // <3,6,1,3>: Cost 3 vuzpr <1,3,2,6>, LHS
+ 2626143294U, // <3,6,1,4>: Cost 3 vext2 <1,4,3,6>, <1,4,3,6>
+ 3205988352U, // <3,6,1,5>: Cost 3 ins <u,6,1,5>, lane 0
+ 2721321376U, // <3,6,1,6>: Cost 3 vext3 <6,1,6,3>, <6,1,6,3>
+ 2954349878U, // <3,6,1,7>: Cost 3 vzipr <0,2,3,1>, RHS
+ 2103689217U, // <3,6,1,u>: Cost 2 ins <3,u,1,2>, lane 1
+ 2594390118U, // <3,6,2,0>: Cost 3 vext1 <7,3,6,2>, LHS
+ 2721616324U, // <3,6,2,1>: Cost 3 vext3 <6,2,1,3>, <6,2,1,3>
+ 2630788725U, // <3,6,2,2>: Cost 3 vext2 <2,2,3,6>, <2,2,3,6>
+ 2132303872U, // <3,6,2,3>: Cost 2 ins <u,6,2,3>, lane 0
+ 2632115991U, // <3,6,2,4>: Cost 3 vext2 <2,4,3,6>, <2,4,3,6>
+ 2632779624U, // <3,6,2,5>: Cost 3 vext2 <2,5,3,6>, <2,5,3,6>
+ 2826731724U, // <3,6,2,6>: Cost 3 vuzpr <1,3,2,6>, <0,2,4,6>
+ 1885261110U, // <3,6,2,7>: Cost 2 vzipr <1,0,3,2>, RHS
+ 1885261111U, // <3,6,2,u>: Cost 2 vzipr <1,0,3,2>, RHS
+ 3136876642U, // <3,6,3,0>: Cost 3 vtrnr <u,3,1,3>, <5,6,7,0>
+ 3206103040U, // <3,6,3,1>: Cost 3 ins <u,6,3,1>, lane 0
+ 3001478044U, // <3,6,3,2>: Cost 3 vzipr <u,1,3,3>, <4,0,6,2>
+ 2103844865U, // <3,6,3,3>: Cost 2 ins <3,u,3,3>, lane 1
+ 2632780290U, // <3,6,3,4>: Cost 3 vext2 <2,5,3,6>, <3,4,5,6>
+ 3206135808U, // <3,6,3,5>: Cost 3 ins <u,6,3,5>, lane 0
+ 1699457629U, // <3,6,3,6>: Cost 2 vuzpl <3,5,6,7>, <3,5,6,7>
+ 1885932854U, // <3,6,3,7>: Cost 2 vzipr <1,1,3,3>, RHS
+ 1885932855U, // <3,6,3,u>: Cost 2 vzipr <1,1,3,3>, RHS
+ 2732970588U, // <3,6,4,0>: Cost 3 vext3 LHS, <6,4,0,2>
+ 2722943590U, // <3,6,4,1>: Cost 3 vext3 <6,4,1,3>, <6,4,1,3>
+ 2732970604U, // <3,6,4,2>: Cost 3 vext3 LHS, <6,4,2,0>
+ 2906673714U, // <3,6,4,3>: Cost 3 vzipl <3,4,5,6>, <6,3,4,5>
+ 2732970628U, // <3,6,4,4>: Cost 3 vext3 LHS, <6,4,4,6>
+ 2689839757U, // <3,6,4,5>: Cost 3 vext3 LHS, <6,4,5,6>
+ 2132475904U, // <3,6,4,6>: Cost 2 ins <u,6,4,6>, lane 0
+ 1886604598U, // <3,6,4,7>: Cost 2 vzipr <1,2,3,4>, RHS
+ 1886604599U, // <3,6,4,u>: Cost 2 vzipr <1,2,3,4>, RHS
+ 2576498790U, // <3,6,5,0>: Cost 3 vext1 <4,3,6,5>, LHS
+ 3206250496U, // <3,6,5,1>: Cost 3 ins <u,6,5,1>, lane 0
+ 2732970692U, // <3,6,5,2>: Cost 3 vext3 LHS, <6,5,2,7>
+ 2576501250U, // <3,6,5,3>: Cost 3 vext1 <4,3,6,5>, <3,4,5,6>
+ 3040891442U, // <3,6,5,4>: Cost 3 vtrnl <3,4,5,6>, <6,3,4,5>
+ 3206283264U, // <3,6,5,5>: Cost 3 ins <u,6,5,5>, lane 0
+ 2104016897U, // <3,6,5,6>: Cost 2 ins <3,u,5,6>, lane 1
+ 2954382646U, // <3,6,5,7>: Cost 3 vzipr <0,2,3,5>, RHS
+ 2104016897U, // <3,6,5,u>: Cost 2 ins <3,u,5,6>, lane 1
+ 2732970748U, // <3,6,6,0>: Cost 3 vext3 LHS, <6,6,0,0>
+ 2724270856U, // <3,6,6,1>: Cost 3 vext3 <6,6,1,3>, <6,6,1,3>
+ 2732970768U, // <3,6,6,2>: Cost 3 vext3 LHS, <6,6,2,2>
+ 3177807873U, // <3,6,6,3>: Cost 3 ins <3,u,6,3>, lane 1
+ 2732970788U, // <3,6,6,4>: Cost 3 vext3 LHS, <6,6,4,4>
+ 2732970800U, // <3,6,6,5>: Cost 3 vext3 LHS, <6,6,5,7>
+ 1659228984U, // <3,6,6,6>: Cost 2 vext3 LHS, <6,6,6,6>
+ 1659228994U, // <3,6,6,7>: Cost 2 vext3 LHS, <6,6,7,7>
+ 1659229003U, // <3,6,6,u>: Cost 2 vext3 LHS, <6,6,u,7>
+ 1659229006U, // <3,6,7,0>: Cost 2 vext3 LHS, <6,7,0,1>
+ 2558600201U, // <3,6,7,1>: Cost 3 vext1 <1,3,6,7>, <1,3,6,7>
+ 1611453282U, // <3,6,7,2>: Cost 2 vext3 LHS, <6,7,2,3>
+ 2968996198U, // <3,6,7,3>: Cost 3 vzipr <2,6,3,7>, <3,2,6,3>
+ 1659229046U, // <3,6,7,4>: Cost 2 vext3 LHS, <6,7,4,5>
+ 2968995633U, // <3,6,7,5>: Cost 3 vzipr <2,6,3,7>, <2,4,6,5>
+ 1611453322U, // <3,6,7,6>: Cost 2 vext3 LHS, <6,7,6,7>
+ 1888619830U, // <3,6,7,7>: Cost 2 vzipr <1,5,3,7>, RHS
+ 1888619831U, // <3,6,7,u>: Cost 2 vzipr <1,5,3,7>, RHS
+ 1659229087U, // <3,6,u,0>: Cost 2 vext3 LHS, <6,u,0,1>
+ 2689840041U, // <3,6,u,1>: Cost 3 vext3 LHS, <6,u,1,2>
+ 2132148224U, // <3,6,u,2>: Cost 2 ins <u,6,0,2>, lane 0
+ 2132303872U, // <3,6,u,3>: Cost 2 ins <u,6,2,3>, lane 0
+ 1659229127U, // <3,6,u,4>: Cost 2 vext3 LHS, <6,u,4,5>
+ 2689840081U, // <3,6,u,5>: Cost 3 vext3 LHS, <6,u,5,6>
+ 2132475904U, // <3,6,u,6>: Cost 2 ins <u,6,4,6>, lane 0
+ 1885310262U, // <3,6,u,7>: Cost 2 vzipr <1,0,3,u>, RHS
+ 1885310263U, // <3,6,u,u>: Cost 2 vzipr <1,0,3,u>, RHS
+ 2826960896U, // <3,7,0,0>: Cost 3 vuzpr <1,3,5,7>, <0,0,0,0>
+ 1553072230U, // <3,7,0,1>: Cost 2 vext2 <1,5,3,7>, LHS
+ 2826960916U, // <3,7,0,2>: Cost 3 vuzpr <1,3,5,7>, <0,0,2,2>
+ 3002117840U, // <3,7,0,3>: Cost 3 vzipr <u,2,3,0>, <5,1,7,3>
+ 2626814290U, // <3,7,0,4>: Cost 3 vext2 <1,5,3,7>, <0,4,1,5>
+ 2582507375U, // <3,7,0,5>: Cost 3 vext1 <5,3,7,0>, <5,3,7,0>
+ 2588480072U, // <3,7,0,6>: Cost 3 vext1 <6,3,7,0>, <6,3,7,0>
+ 2732971055U, // <3,7,0,7>: Cost 3 vext3 LHS, <7,0,7,1>
+ 1553072797U, // <3,7,0,u>: Cost 2 vext2 <1,5,3,7>, LHS
+ 2626814710U, // <3,7,1,0>: Cost 3 vext2 <1,5,3,7>, <1,0,3,2>
+ 2826961716U, // <3,7,1,1>: Cost 3 vuzpr <1,3,5,7>, <1,1,1,1>
+ 2103689217U, // <3,7,1,2>: Cost 2 ins <3,u,1,2>, lane 1
+ 1753219174U, // <3,7,1,3>: Cost 2 vuzpr <1,3,5,7>, LHS
+ 2582514998U, // <3,7,1,4>: Cost 3 vext1 <5,3,7,1>, RHS
+ 1553073296U, // <3,7,1,5>: Cost 2 vext2 <1,5,3,7>, <1,5,3,7>
+ 2627478753U, // <3,7,1,6>: Cost 3 vext2 <1,6,3,7>, <1,6,3,7>
+ 2727367810U, // <3,7,1,7>: Cost 3 vext3 <7,1,7,3>, <7,1,7,3>
+ 1753219179U, // <3,7,1,u>: Cost 2 vuzpr <1,3,5,7>, LHS
+ 2826961814U, // <3,7,2,0>: Cost 3 vuzpr <1,3,5,7>, <1,2,3,0>
+ 3206692864U, // <3,7,2,1>: Cost 3 ins <u,7,2,1>, lane 0
+ 2826961060U, // <3,7,2,2>: Cost 3 vuzpr <1,3,5,7>, <0,2,0,2>
+ 2132967424U, // <3,7,2,3>: Cost 2 ins <u,7,2,3>, lane 0
+ 2826961818U, // <3,7,2,4>: Cost 3 vuzpr <1,3,5,7>, <1,2,3,4>
+ 2826961072U, // <3,7,2,5>: Cost 3 vuzpr <1,3,5,7>, <0,2,1,5>
+ 1559709626U, // <3,7,2,6>: Cost 2 vext2 <2,6,3,7>, <2,6,3,7>
+ 2728031443U, // <3,7,2,7>: Cost 3 vext3 <7,2,7,3>, <7,2,7,3>
+ 1561036892U, // <3,7,2,u>: Cost 2 vext2 <2,u,3,7>, <2,u,3,7>
+ 2626816150U, // <3,7,3,0>: Cost 3 vext2 <1,5,3,7>, <3,0,1,2>
+ 2826962598U, // <3,7,3,1>: Cost 3 vuzpr <1,3,5,7>, <2,3,0,1>
+ 2633451878U, // <3,7,3,2>: Cost 3 vext2 <2,6,3,7>, <3,2,6,3>
+ 2103844865U, // <3,7,3,3>: Cost 2 ins <3,u,3,3>, lane 1
+ 2626816514U, // <3,7,3,4>: Cost 3 vext2 <1,5,3,7>, <3,4,5,6>
+ 2826962638U, // <3,7,3,5>: Cost 3 vuzpr <1,3,5,7>, <2,3,4,5>
+ 2639424147U, // <3,7,3,6>: Cost 3 vext2 <3,6,3,7>, <3,6,3,7>
+ 1753220096U, // <3,7,3,7>: Cost 2 vuzpr <1,3,5,7>, <1,3,5,7>
+ 1753220096U, // <3,7,3,u>: Cost 2 vuzpr <1,3,5,7>, <1,3,5,7>
+ 2582536294U, // <3,7,4,0>: Cost 3 vext1 <5,3,7,4>, LHS
+ 2582537360U, // <3,7,4,1>: Cost 3 vext1 <5,3,7,4>, <1,5,3,7>
+ 2588510138U, // <3,7,4,2>: Cost 3 vext1 <6,3,7,4>, <2,6,3,7>
+ 3002150608U, // <3,7,4,3>: Cost 3 vzipr <u,2,3,4>, <5,1,7,3>
+ 2582539574U, // <3,7,4,4>: Cost 3 vext1 <5,3,7,4>, RHS
+ 1553075510U, // <3,7,4,5>: Cost 2 vext2 <1,5,3,7>, RHS
+ 2826961244U, // <3,7,4,6>: Cost 3 vuzpr <1,3,5,7>, <0,4,2,6>
+ 2732971383U, // <3,7,4,7>: Cost 3 vext3 LHS, <7,4,7,5>
+ 1553075753U, // <3,7,4,u>: Cost 2 vext2 <1,5,3,7>, RHS
+ 2826963551U, // <3,7,5,0>: Cost 3 vuzpr <1,3,5,7>, <3,5,7,0>
+ 2826963552U, // <3,7,5,1>: Cost 3 vuzpr <1,3,5,7>, <3,5,7,1>
+ 2826962032U, // <3,7,5,2>: Cost 3 vuzpr <1,3,5,7>, <1,5,0,2>
+ 2626817903U, // <3,7,5,3>: Cost 3 vext2 <1,5,3,7>, <5,3,7,0>
+ 2826963555U, // <3,7,5,4>: Cost 3 vuzpr <1,3,5,7>, <3,5,7,4>
+ 2826962044U, // <3,7,5,5>: Cost 3 vuzpr <1,3,5,7>, <1,5,1,5>
+ 2104016897U, // <3,7,5,6>: Cost 2 ins <3,u,5,6>, lane 1
+ 1753222454U, // <3,7,5,7>: Cost 2 vuzpr <1,3,5,7>, RHS
+ 1753222455U, // <3,7,5,u>: Cost 2 vuzpr <1,3,5,7>, RHS
+ 2732971478U, // <3,7,6,0>: Cost 3 vext3 LHS, <7,6,0,1>
+ 2732971486U, // <3,7,6,1>: Cost 3 vext3 LHS, <7,6,1,0>
+ 2633454074U, // <3,7,6,2>: Cost 3 vext2 <2,6,3,7>, <6,2,7,3>
+ 2633454152U, // <3,7,6,3>: Cost 3 vext2 <2,6,3,7>, <6,3,7,0>
+ 2732971518U, // <3,7,6,4>: Cost 3 vext3 LHS, <7,6,4,5>
+ 2732971526U, // <3,7,6,5>: Cost 3 vext3 LHS, <7,6,5,4>
+ 2732971537U, // <3,7,6,6>: Cost 3 vext3 LHS, <7,6,6,6>
+ 2133295104U, // <3,7,6,7>: Cost 2 ins <u,7,6,7>, lane 0
+ 2133295104U, // <3,7,6,u>: Cost 2 ins <u,7,6,7>, lane 0
+ 2962362223U, // <3,7,7,0>: Cost 3 vzipr <1,5,3,7>, <5,3,7,0>
+ 2826965109U, // <3,7,7,1>: Cost 3 vuzpr <1,3,5,7>, <5,7,0,1>
+ 2968998474U, // <3,7,7,2>: Cost 3 vzipr <2,6,3,7>, <6,3,7,2>
+ 2826963662U, // <3,7,7,3>: Cost 3 vuzpr <1,3,5,7>, <3,7,1,3>
+ 2962362227U, // <3,7,7,4>: Cost 3 vzipr <1,5,3,7>, <5,3,7,4>
+ 2826965149U, // <3,7,7,5>: Cost 3 vuzpr <1,3,5,7>, <5,7,4,5>
+ 2588537423U, // <3,7,7,6>: Cost 3 vext1 <6,3,7,7>, <6,3,7,7>
+ 1659229804U, // <3,7,7,7>: Cost 2 vext3 LHS, <7,7,7,7>
+ 1659229804U, // <3,7,7,u>: Cost 2 vext3 LHS, <7,7,7,7>
+ 2826962300U, // <3,7,u,0>: Cost 3 vuzpr <1,3,5,7>, <1,u,3,0>
+ 1553078062U, // <3,7,u,1>: Cost 2 vext2 <1,5,3,7>, LHS
+ 2103689217U, // <3,7,u,2>: Cost 2 ins <3,u,1,2>, lane 1
+ 1753219741U, // <3,7,u,3>: Cost 2 vuzpr <1,3,5,7>, LHS
+ 2826962304U, // <3,7,u,4>: Cost 3 vuzpr <1,3,5,7>, <1,u,3,4>
+ 1553078426U, // <3,7,u,5>: Cost 2 vext2 <1,5,3,7>, RHS
+ 1595545808U, // <3,7,u,6>: Cost 2 vext2 <u,6,3,7>, <u,6,3,7>
+ 1753222697U, // <3,7,u,7>: Cost 2 vuzpr <1,3,5,7>, RHS
+ 1753219746U, // <3,7,u,u>: Cost 2 vuzpr <1,3,5,7>, LHS
+ 1611448320U, // <3,u,0,0>: Cost 2 vext3 LHS, <0,0,0,0>
+ 1611896531U, // <3,u,0,1>: Cost 2 vext3 LHS, <u,0,1,2>
+ 1696243814U, // <3,u,0,2>: Cost 2 vuzpl <3,0,u,2>, LHS
+ 1616099045U, // <3,u,0,3>: Cost 2 vext3 LHS, <u,0,3,2>
+ 2685638381U, // <3,u,0,4>: Cost 3 vext3 LHS, <u,0,4,1>
+ 1829951642U, // <3,u,0,5>: Cost 2 vzipl <3,0,1,2>, RHS
+ 1663874816U, // <3,u,0,6>: Cost 2 vext3 LHS, <u,0,6,2>
+ 1886571848U, // <3,u,0,7>: Cost 2 vzipr <1,2,3,0>, RHS
+ 1611896594U, // <3,u,0,u>: Cost 2 vext3 LHS, <u,0,u,2>
+ 1549763324U, // <3,u,1,0>: Cost 2 vext2 <1,0,3,u>, <1,0,3,u>
+ 1550426957U, // <3,u,1,1>: Cost 2 vext2 <1,1,3,u>, <1,1,3,u>
+ 537712430U, // <3,u,1,2>: Cost 1 vext3 LHS, LHS
+ 1616541495U, // <3,u,1,3>: Cost 2 vext3 LHS, <u,1,3,3>
+ 1490930998U, // <3,u,1,4>: Cost 2 vext1 <2,3,u,1>, RHS
+ 1553081489U, // <3,u,1,5>: Cost 2 vext2 <1,5,3,u>, <1,5,3,u>
+ 1964169370U, // <3,u,1,6>: Cost 2 vtrnl <3,0,1,2>, RHS
+ 2027212329U, // <3,u,1,7>: Cost 2 vtrnr <2,3,0,1>, RHS
+ 537712484U, // <3,u,1,u>: Cost 1 vext3 LHS, LHS
+ 1659672428U, // <3,u,2,0>: Cost 2 vext3 LHS, <u,2,0,2>
+ 2128969728U, // <3,u,2,1>: Cost 2 ins <u,1,2,1>, lane 0
+ 1557063287U, // <3,u,2,2>: Cost 2 vext2 <2,2,3,u>, <2,2,3,u>
+ 1055244288U, // <3,u,2,3>: Cost 1 ins LHS, lane 0
+ 1659672468U, // <3,u,2,4>: Cost 2 vext3 LHS, <u,2,4,6>
+ 2129002496U, // <3,u,2,5>: Cost 2 ins <u,1,2,5>, lane 0
+ 1559717819U, // <3,u,2,6>: Cost 2 vext2 <2,6,3,u>, <2,6,3,u>
+ 1885261128U, // <3,u,2,7>: Cost 2 vzipr <1,0,3,2>, RHS
+ 1055244288U, // <3,u,2,u>: Cost 1 ins LHS, lane 0
+ 1611896764U, // <3,u,3,0>: Cost 2 vext3 LHS, <u,3,0,1>
+ 1616541639U, // <3,u,3,1>: Cost 2 vext3 LHS, <u,3,1,3>
+ 1966315310U, // <3,u,3,2>: Cost 2 vtrnl <3,3,3,3>, LHS
+ 336380006U, // <3,u,3,3>: Cost 1 vdup3 LHS
+ 1611896804U, // <3,u,3,4>: Cost 2 vext3 LHS, <u,3,4,5>
+ 1616541679U, // <3,u,3,5>: Cost 2 vext3 LHS, <u,3,5,7>
+ 1966315674U, // <3,u,3,6>: Cost 2 vtrnl <3,3,3,3>, RHS
+ 1885932872U, // <3,u,3,7>: Cost 2 vzipr <1,1,3,3>, RHS
+ 336380006U, // <3,u,3,u>: Cost 1 vdup3 LHS
+ 2960344003U, // <3,u,4,0>: Cost 3 vzipr <1,2,3,4>, <1,2,u,0>
+ 1832933166U, // <3,u,4,1>: Cost 2 vzipl <3,4,5,6>, LHS
+ 1659672612U, // <3,u,4,2>: Cost 2 vext3 LHS, <u,4,2,6>
+ 1886601372U, // <3,u,4,3>: Cost 2 vzipr <1,2,3,4>, LHS
+ 1886602138U, // <3,u,4,4>: Cost 2 vzipr <1,2,3,4>, <1,2,3,4>
+ 1611896895U, // <3,u,4,5>: Cost 2 vext3 LHS, <u,4,5,6>
+ 1696247094U, // <3,u,4,6>: Cost 2 vuzpl <3,0,u,2>, RHS
+ 1886604616U, // <3,u,4,7>: Cost 2 vzipr <1,2,3,4>, RHS
+ 1611896922U, // <3,u,4,u>: Cost 2 vext3 LHS, <u,4,u,6>
+ 1490960486U, // <3,u,5,0>: Cost 2 vext1 <2,3,u,5>, LHS
+ 2128527360U, // <3,u,5,1>: Cost 2 ins <u,0,5,1>, lane 0
+ 1490962162U, // <3,u,5,2>: Cost 2 vext1 <2,3,u,5>, <2,3,u,5>
+ 1616541823U, // <3,u,5,3>: Cost 2 vext3 LHS, <u,5,3,7>
+ 1490963766U, // <3,u,5,4>: Cost 2 vext1 <2,3,u,5>, RHS
+ 2027538126U, // <3,u,5,5>: Cost 2 vtrnr <2,3,4,5>, <2,3,4,5>
+ 537712794U, // <3,u,5,6>: Cost 1 vext3 LHS, RHS
+ 1752935734U, // <3,u,5,7>: Cost 2 vuzpr <1,3,1,u>, RHS
+ 537712812U, // <3,u,5,u>: Cost 1 vext3 LHS, RHS
+ 1663875248U, // <3,u,6,0>: Cost 2 vext3 LHS, <u,6,0,2>
+ 2131918848U, // <3,u,6,1>: Cost 2 ins <u,5,6,1>, lane 0
+ 2128609280U, // <3,u,6,2>: Cost 2 ins <u,0,6,2>, lane 0
+ 1616099536U, // <3,u,6,3>: Cost 2 vext3 LHS, <u,6,3,7>
+ 1663875288U, // <3,u,6,4>: Cost 2 vext3 LHS, <u,6,4,6>
+ 2131951616U, // <3,u,6,5>: Cost 2 ins <u,5,6,5>, lane 0
+ 2131296256U, // <3,u,6,6>: Cost 2 ins <u,4,6,6>, lane 0
+ 1058226176U, // <3,u,6,7>: Cost 1 ins RHS, lane 0
+ 1058226176U, // <3,u,6,u>: Cost 1 ins RHS, lane 0
+ 1485004902U, // <3,u,7,0>: Cost 2 vext1 <1,3,u,7>, LHS
+ 1485005851U, // <3,u,7,1>: Cost 2 vext1 <1,3,u,7>, <1,3,u,7>
+ 2098896898U, // <3,u,7,2>: Cost 2 ins <3,0,u,2>, lane 2
+ 2021655197U, // <3,u,7,3>: Cost 2 vtrnr <1,3,5,7>, LHS
+ 1485008182U, // <3,u,7,4>: Cost 2 vext1 <1,3,u,7>, RHS
+ 1659230515U, // <3,u,7,5>: Cost 2 vext3 LHS, <u,7,5,7>
+ 2131369984U, // <3,u,7,6>: Cost 2 ins <u,4,7,6>, lane 0
+ 2021658153U, // <3,u,7,7>: Cost 2 vtrnr <1,3,5,7>, RHS
+ 2021655202U, // <3,u,7,u>: Cost 2 vtrnr <1,3,5,7>, LHS
+ 1616099665U, // <3,u,u,0>: Cost 2 vext3 LHS, <u,u,0,1>
+ 1611897179U, // <3,u,u,1>: Cost 2 vext3 LHS, <u,u,1,2>
+ 537712997U, // <3,u,u,2>: Cost 1 vext3 LHS, LHS
+ 1055244288U, // <3,u,u,3>: Cost 1 ins LHS, lane 0
+ 1616099705U, // <3,u,u,4>: Cost 2 vext3 LHS, <u,u,4,5>
+ 1611897219U, // <3,u,u,5>: Cost 2 vext3 LHS, <u,u,5,6>
+ 537713037U, // <3,u,u,6>: Cost 1 vext3 LHS, RHS
+ 1058226176U, // <3,u,u,7>: Cost 1 ins RHS, lane 0
+ 537713051U, // <3,u,u,u>: Cost 1 vext3 LHS, LHS
+ 2128150528U, // <4,0,0,0>: Cost 2 ins <u,0,0,0>, lane 0
+ 2104860674U, // <4,0,0,1>: Cost 2 ins <4,0,u,1>, lane 2
+ 1705607270U, // <4,0,0,2>: Cost 2 vuzpl <4,6,0,2>, LHS
+ 3178070019U, // <4,0,0,3>: Cost 3 ins <4,0,0,u>, lane 3
+ 2909946194U, // <4,0,0,4>: Cost 3 vzipl <4,0,5,1>, <0,4,1,5>
+ 3178070019U, // <4,0,0,5>: Cost 3 ins <4,0,0,u>, lane 3
+ 3183362049U, // <4,0,0,6>: Cost 3 ins <4,u,0,6>, lane 1
+ 2109628417U, // <4,0,0,7>: Cost 2 ins <4,u,0,7>, lane 1
+ 1705607324U, // <4,0,0,u>: Cost 2 vuzpl <4,6,0,2>, LHS
+ 2570715238U, // <4,0,1,0>: Cost 3 vext1 <3,4,0,1>, LHS
+ 2128232448U, // <4,0,1,1>: Cost 2 ins <u,0,1,1>, lane 0
+ 1618165862U, // <4,0,1,2>: Cost 2 vext3 <1,2,3,4>, LHS
+ 2833612902U, // <4,0,1,3>: Cost 3 vuzpr <2,4,6,0>, LHS
+ 2570718518U, // <4,0,1,4>: Cost 3 vext1 <3,4,0,1>, RHS
+ 2779350016U, // <4,0,1,5>: Cost 3 vuzpl <4,6,0,2>, <1,3,5,7>
+ 3202015232U, // <4,0,1,6>: Cost 3 ins <u,0,1,6>, lane 0
+ 2109702145U, // <4,0,1,7>: Cost 2 ins <4,u,1,7>, lane 1
+ 1618165916U, // <4,0,1,u>: Cost 2 vext3 <1,2,3,4>, LHS
+ 2685714598U, // <4,0,2,0>: Cost 3 vext3 <0,2,0,4>, <0,2,0,4>
+ 2104860674U, // <4,0,2,1>: Cost 2 ins <4,0,u,1>, lane 2
+ 2128314368U, // <4,0,2,2>: Cost 2 ins <u,0,2,2>, lane 0
+ 2104918021U, // <4,0,2,3>: Cost 2 ins <4,0,u,u>, lane 5
+ 2685714636U, // <4,0,2,4>: Cost 3 vext3 <0,2,0,4>, <0,2,4,6>
+ 3044622465U, // <4,0,2,5>: Cost 3 vtrnl <4,1,2,3>, <0,1,5,3>
+ 2833613004U, // <4,0,2,6>: Cost 3 vuzpr <2,4,6,0>, <0,2,4,6>
+ 2109775873U, // <4,0,2,7>: Cost 2 ins <4,u,2,7>, lane 1
+ 2104860674U, // <4,0,2,u>: Cost 2 ins <4,0,u,1>, lane 2
+ 3202113536U, // <4,0,3,0>: Cost 3 ins <u,0,3,0>, lane 0
+ 2104860674U, // <4,0,3,1>: Cost 2 ins <4,0,u,1>, lane 2
+ 2128388096U, // <4,0,3,2>: Cost 2 ins <u,0,3,2>, lane 0
+ 2779351452U, // <4,0,3,3>: Cost 3 vuzpl <4,6,0,2>, <3,3,3,3>
+ 3178627074U, // <4,0,3,4>: Cost 3 ins <4,0,u,4>, lane 2
+ 2839512782U, // <4,0,3,5>: Cost 3 vuzpr <3,4,5,0>, <2,3,4,5>
+ 3178643458U, // <4,0,3,6>: Cost 3 ins <4,0,u,6>, lane 2
+ 2109849601U, // <4,0,3,7>: Cost 2 ins <4,u,3,7>, lane 1
+ 2104860674U, // <4,0,3,u>: Cost 2 ins <4,0,u,1>, lane 2
+ 1705610572U, // <4,0,4,0>: Cost 2 vuzpl <4,6,0,2>, <4,6,0,2>
+ 2104860674U, // <4,0,4,1>: Cost 2 ins <4,0,u,1>, lane 2
+ 1974370406U, // <4,0,4,2>: Cost 2 vtrnl <4,6,4,6>, LHS
+ 3178364931U, // <4,0,4,3>: Cost 3 ins <4,0,4,u>, lane 3
+ 2109898753U, // <4,0,4,4>: Cost 2 ins <4,u,4,4>, lane 1
+ 2104918021U, // <4,0,4,5>: Cost 2 ins <4,0,u,u>, lane 5
+ 1705610550U, // <4,0,4,6>: Cost 2 vuzpl <4,6,0,2>, RHS
+ 2109923329U, // <4,0,4,7>: Cost 2 ins <4,u,4,7>, lane 1
+ 1705610568U, // <4,0,4,u>: Cost 2 vuzpl <4,6,0,2>, RHS
+ 1839644672U, // <4,0,5,0>: Cost 2 vzipl RHS, <0,0,0,0>
+ 765902950U, // <4,0,5,1>: Cost 1 vzipl RHS, LHS
+ 1839644836U, // <4,0,5,2>: Cost 2 vzipl RHS, <0,2,0,2>
+ 2104696835U, // <4,0,5,3>: Cost 2 ins <4,0,5,u>, lane 3
+ 1839645010U, // <4,0,5,4>: Cost 2 vzipl RHS, <0,4,1,5>
+ 2109980673U, // <4,0,5,5>: Cost 2 ins <4,u,5,5>, lane 1
+ 2104696835U, // <4,0,5,6>: Cost 2 ins <4,0,5,u>, lane 3
+ 2104696835U, // <4,0,5,7>: Cost 2 ins <4,0,5,u>, lane 3
+ 765903517U, // <4,0,5,u>: Cost 1 vzipl RHS, LHS
+ 1973862400U, // <4,0,6,0>: Cost 2 vtrnl RHS, <0,0,0,0>
+ 1973862410U, // <4,0,6,1>: Cost 2 vtrnl RHS, <0,0,1,1>
+ 900120678U, // <4,0,6,2>: Cost 1 vtrnl RHS, LHS
+ 2104770563U, // <4,0,6,3>: Cost 2 ins <4,0,6,u>, lane 3
+ 1973862604U, // <4,0,6,4>: Cost 2 vtrnl RHS, <0,2,4,6>
+ 2104770563U, // <4,0,6,5>: Cost 2 ins <4,0,6,u>, lane 3
+ 2110062593U, // <4,0,6,6>: Cost 2 ins <4,u,6,6>, lane 1
+ 1036328961U, // <4,0,6,7>: Cost 1 ins RHS, lane 1
+ 900120732U, // <4,0,6,u>: Cost 1 vtrnl RHS, LHS
+ 3202408448U, // <4,0,7,0>: Cost 3 ins <u,0,7,0>, lane 0
+ 2104860674U, // <4,0,7,1>: Cost 2 ins <4,0,u,1>, lane 2
+ 2104868866U, // <4,0,7,2>: Cost 2 ins <4,0,u,2>, lane 2
+ 3114049557U, // <4,0,7,3>: Cost 3 vtrnr <4,4,6,7>, <0,0,2,3>
+ 3178627074U, // <4,0,7,4>: Cost 3 ins <4,0,u,4>, lane 2
+ 2779354470U, // <4,0,7,5>: Cost 3 vuzpl <4,6,0,2>, <7,4,5,6>
+ 2779354473U, // <4,0,7,6>: Cost 3 vuzpl <4,6,0,2>, <7,4,6,0>
+ 2110144513U, // <4,0,7,7>: Cost 2 ins <4,u,7,7>, lane 1
+ 2104860674U, // <4,0,7,u>: Cost 2 ins <4,0,u,1>, lane 2
+ 1974009856U, // <4,0,u,0>: Cost 2 vtrnl RHS, <0,0,0,0>
+ 767893606U, // <4,0,u,1>: Cost 1 vzipl RHS, LHS
+ 900268134U, // <4,0,u,2>: Cost 1 vtrnl RHS, LHS
+ 2104918021U, // <4,0,u,3>: Cost 2 ins <4,0,u,u>, lane 5
+ 1974010060U, // <4,0,u,4>: Cost 2 vtrnl RHS, <0,2,4,6>
+ 2104918021U, // <4,0,u,5>: Cost 2 ins <4,0,u,u>, lane 5
+ 1705613466U, // <4,0,u,6>: Cost 2 vuzpl <4,6,0,2>, RHS
+ 1036328961U, // <4,0,u,7>: Cost 1 ins RHS, lane 1
+ 900268188U, // <4,0,u,u>: Cost 1 vtrnl RHS, LHS
+ 2600640614U, // <4,1,0,0>: Cost 3 vext1 <u,4,1,0>, LHS
+ 2128822272U, // <4,1,0,1>: Cost 2 ins <u,1,0,1>, lane 0
+ 2109587457U, // <4,1,0,2>: Cost 2 ins <4,u,0,2>, lane 1
+ 2128838656U, // <4,1,0,3>: Cost 2 ins <u,1,0,3>, lane 0
+ 2622857554U, // <4,1,0,4>: Cost 3 vext2 <0,u,4,1>, <0,4,1,5>
+ 3047785472U, // <4,1,0,5>: Cost 3 vtrnl <4,6,0,2>, <1,3,5,7>
+ 3183362049U, // <4,1,0,6>: Cost 3 ins <4,u,0,6>, lane 1
+ 2109628417U, // <4,1,0,7>: Cost 2 ins <4,u,0,7>, lane 1
+ 2109587457U, // <4,1,0,u>: Cost 2 ins <4,u,0,2>, lane 1
+ 3202629632U, // <4,1,1,0>: Cost 3 ins <u,1,1,0>, lane 0
+ 2128896000U, // <4,1,1,1>: Cost 2 ins <u,1,1,1>, lane 0
+ 2631484314U, // <4,1,1,2>: Cost 3 vext2 <2,3,4,1>, <1,2,3,4>
+ 2128912384U, // <4,1,1,3>: Cost 2 ins <u,1,1,3>, lane 0
+ 3202662400U, // <4,1,1,4>: Cost 3 ins <u,1,1,4>, lane 0
+ 2958401874U, // <4,1,1,5>: Cost 3 vzipr <0,u,4,1>, <0,4,1,5>
+ 2778801323U, // <4,1,1,6>: Cost 3 vuzpl <4,5,1,7>, <1,5,6,7>
+ 2109702145U, // <4,1,1,7>: Cost 2 ins <4,u,1,7>, lane 1
+ 2128896000U, // <4,1,1,u>: Cost 2 ins <u,1,1,1>, lane 0
+ 2128961536U, // <4,1,2,0>: Cost 2 ins <u,1,2,0>, lane 0
+ 2128969728U, // <4,1,2,1>: Cost 2 ins <u,1,2,1>, lane 0
+ 2128977920U, // <4,1,2,2>: Cost 2 ins <u,1,2,2>, lane 0
+ 1055244288U, // <4,1,2,3>: Cost 1 ins LHS, lane 0
+ 2128994304U, // <4,1,2,4>: Cost 2 ins <u,1,2,4>, lane 0
+ 2129002496U, // <4,1,2,5>: Cost 2 ins <u,1,2,5>, lane 0
+ 2129010688U, // <4,1,2,6>: Cost 2 ins <u,1,2,6>, lane 0
+ 2129018880U, // <4,1,2,7>: Cost 2 ins <u,1,2,7>, lane 0
+ 1055244288U, // <4,1,2,u>: Cost 1 ins LHS, lane 0
+ 2564833382U, // <4,1,3,0>: Cost 3 vext1 <2,4,1,3>, LHS
+ 2691908568U, // <4,1,3,1>: Cost 3 vext3 <1,2,3,4>, <1,3,1,3>
+ 2691908578U, // <4,1,3,2>: Cost 3 vext3 <1,2,3,4>, <1,3,2,4>
+ 2129059840U, // <4,1,3,3>: Cost 2 ins <u,1,3,3>, lane 0
+ 2564836662U, // <4,1,3,4>: Cost 3 vext1 <2,4,1,3>, RHS
+ 2691908608U, // <4,1,3,5>: Cost 3 vext3 <1,2,3,4>, <1,3,5,7>
+ 2588725862U, // <4,1,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
+ 2109849601U, // <4,1,3,7>: Cost 2 ins <4,u,3,7>, lane 1
+ 2129059840U, // <4,1,3,u>: Cost 2 ins <u,1,3,3>, lane 0
+ 2600673382U, // <4,1,4,0>: Cost 3 vext1 <u,4,1,4>, LHS
+ 1705061641U, // <4,1,4,1>: Cost 2 vuzpl <4,5,1,7>, <4,5,1,7>
+ 2912641946U, // <4,1,4,2>: Cost 3 vzipl <4,4,5,6>, <1,2,3,4>
+ 2040135782U, // <4,1,4,3>: Cost 2 vtrnr <4,4,4,4>, LHS
+ 2109898753U, // <4,1,4,4>: Cost 2 ins <4,u,4,4>, lane 1
+ 2129149952U, // <4,1,4,5>: Cost 2 ins <u,1,4,5>, lane 0
+ 2109915137U, // <4,1,4,6>: Cost 2 ins <4,u,4,6>, lane 1
+ 2109923329U, // <4,1,4,7>: Cost 2 ins <4,u,4,7>, lane 1
+ 2109915137U, // <4,1,4,u>: Cost 2 ins <4,u,4,6>, lane 1
+ 1479164242U, // <4,1,5,0>: Cost 2 vext1 <0,4,1,5>, <0,4,1,5>
+ 1839645492U, // <4,1,5,1>: Cost 2 vzipl RHS, <1,1,1,1>
+ 1839645590U, // <4,1,5,2>: Cost 2 vzipl RHS, <1,2,3,0>
+ 2016034918U, // <4,1,5,3>: Cost 2 vtrnr <0,4,1,5>, LHS
+ 1479167286U, // <4,1,5,4>: Cost 2 vext1 <0,4,1,5>, RHS
+ 1839645840U, // <4,1,5,5>: Cost 2 vzipl RHS, <1,5,3,7>
+ 3089776763U, // <4,1,5,6>: Cost 3 vtrnr <0,4,1,5>, <0,1,4,6>
+ 2109997057U, // <4,1,5,7>: Cost 2 ins <4,u,5,7>, lane 1
+ 1479169838U, // <4,1,5,u>: Cost 2 vext1 <0,4,1,5>, LHS
+ 2110013441U, // <4,1,6,0>: Cost 2 ins <4,u,6,0>, lane 1
+ 1973863220U, // <4,1,6,1>: Cost 2 vtrnl RHS, <1,1,1,1>
+ 2110029825U, // <4,1,6,2>: Cost 2 ins <4,u,6,2>, lane 1
+ 2016116838U, // <4,1,6,3>: Cost 2 vtrnr <0,4,2,6>, LHS
+ 2110046209U, // <4,1,6,4>: Cost 2 ins <4,u,6,4>, lane 1
+ 1973863424U, // <4,1,6,5>: Cost 2 vtrnl RHS, <1,3,5,7>
+ 2110062593U, // <4,1,6,6>: Cost 2 ins <4,u,6,6>, lane 1
+ 1036328961U, // <4,1,6,7>: Cost 1 ins RHS, lane 1
+ 1036328961U, // <4,1,6,u>: Cost 1 ins RHS, lane 1
+ 2659357716U, // <4,1,7,0>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1>
+ 3203080192U, // <4,1,7,1>: Cost 3 ins <u,1,7,1>, lane 0
+ 3203088384U, // <4,1,7,2>: Cost 3 ins <u,1,7,2>, lane 0
+ 2129354752U, // <4,1,7,3>: Cost 2 ins <u,1,7,3>, lane 0
+ 2664666470U, // <4,1,7,4>: Cost 3 vext2 <7,u,4,1>, <7,4,5,6>
+ 3203112960U, // <4,1,7,5>: Cost 3 ins <u,1,7,5>, lane 0
+ 3114049641U, // <4,1,7,6>: Cost 3 vtrnr <4,4,6,7>, <0,1,2,6>
+ 2110144513U, // <4,1,7,7>: Cost 2 ins <4,u,7,7>, lane 1
+ 2129354752U, // <4,1,7,u>: Cost 2 ins <u,1,7,3>, lane 0
+ 1479188821U, // <4,1,u,0>: Cost 2 vext1 <0,4,1,u>, <0,4,1,u>
+ 1974010676U, // <4,1,u,1>: Cost 2 vtrnl RHS, <1,1,1,1>
+ 1841636246U, // <4,1,u,2>: Cost 2 vzipl RHS, <1,2,3,0>
+ 1055244288U, // <4,1,u,3>: Cost 1 ins LHS, lane 0
+ 1479191862U, // <4,1,u,4>: Cost 2 vext1 <0,4,1,u>, RHS
+ 1974010880U, // <4,1,u,5>: Cost 2 vtrnl RHS, <1,3,5,7>
+ 2109915137U, // <4,1,u,6>: Cost 2 ins <4,u,4,6>, lane 1
+ 1036328961U, // <4,1,u,7>: Cost 1 ins RHS, lane 1
+ 1055244288U, // <4,1,u,u>: Cost 1 ins LHS, lane 0
+ 3047786150U, // <4,2,0,0>: Cost 3 vtrnl <4,6,0,2>, <2,3,0,1>
+ 2109579265U, // <4,2,0,1>: Cost 2 ins <4,u,0,1>, lane 1
+ 2129494016U, // <4,2,0,2>: Cost 2 ins <u,2,0,2>, lane 0
+ 2967019622U, // <4,2,0,3>: Cost 3 vzipr <2,3,4,0>, LHS
+ 2635473244U, // <4,2,0,4>: Cost 3 vext2 <3,0,4,2>, <0,4,2,6>
+ 2909947747U, // <4,2,0,5>: Cost 3 vzipl <4,0,5,1>, <2,5,3,1>
+ 2696775148U, // <4,2,0,6>: Cost 3 vext3 <2,0,6,4>, <2,0,6,4>
+ 2109628417U, // <4,2,0,7>: Cost 2 ins <4,u,0,7>, lane 1
+ 2129494016U, // <4,2,0,u>: Cost 2 ins <u,2,0,2>, lane 0
+ 3203293184U, // <4,2,1,0>: Cost 3 ins <u,2,1,0>, lane 0
+ 3203301376U, // <4,2,1,1>: Cost 3 ins <u,2,1,1>, lane 0
+ 3203309568U, // <4,2,1,2>: Cost 3 ins <u,2,1,2>, lane 0
+ 2821242982U, // <4,2,1,3>: Cost 3 vuzpr <0,4,0,2>, LHS
+ 2691909162U, // <4,2,1,4>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3>
+ 3203334144U, // <4,2,1,5>: Cost 3 ins <u,2,1,5>, lane 0
+ 3203342336U, // <4,2,1,6>: Cost 3 ins <u,2,1,6>, lane 0
+ 2109702145U, // <4,2,1,7>: Cost 2 ins <4,u,1,7>, lane 1
+ 2109702145U, // <4,2,1,u>: Cost 2 ins <4,u,1,7>, lane 1
+ 2229208824U, // <4,2,2,0>: Cost 3 vrev <2,4,0,2>
+ 2911397400U, // <4,2,2,1>: Cost 3 vzipl <4,2,6,7>, <2,1,2,3>
+ 2129641472U, // <4,2,2,2>: Cost 2 ins <u,2,2,2>, lane 0
+ 2129649664U, // <4,2,2,3>: Cost 2 ins <u,2,2,3>, lane 0
+ 2697954940U, // <4,2,2,4>: Cost 3 vext3 <2,2,4,4>, <2,2,4,4>
+ 2911397764U, // <4,2,2,5>: Cost 3 vzipl <4,2,6,7>, <2,5,6,7>
+ 2821243084U, // <4,2,2,6>: Cost 3 vuzpr <0,4,0,2>, <0,2,4,6>
+ 2109775873U, // <4,2,2,7>: Cost 2 ins <4,u,2,7>, lane 1
+ 2129641472U, // <4,2,2,u>: Cost 2 ins <u,2,2,2>, lane 0
+ 2129698816U, // <4,2,3,0>: Cost 2 ins <u,2,3,0>, lane 0
+ 2229290754U, // <4,2,3,1>: Cost 3 vrev <2,4,1,3>
+ 3203457024U, // <4,2,3,2>: Cost 3 ins <u,2,3,2>, lane 0
+ 2129723392U, // <4,2,3,3>: Cost 2 ins <u,2,3,3>, lane 0
+ 2129731584U, // <4,2,3,4>: Cost 2 ins <u,2,3,4>, lane 0
+ 2833188558U, // <4,2,3,5>: Cost 3 vuzpr <2,4,0,2>, <2,3,4,5>
+ 3203489792U, // <4,2,3,6>: Cost 3 ins <u,2,3,6>, lane 0
+ 2109849601U, // <4,2,3,7>: Cost 2 ins <4,u,3,7>, lane 1
+ 2129698816U, // <4,2,3,u>: Cost 2 ins <u,2,3,0>, lane 0
+ 2564915302U, // <4,2,4,0>: Cost 3 vext1 <2,4,2,4>, LHS
+ 2564916122U, // <4,2,4,1>: Cost 3 vext1 <2,4,2,4>, <1,2,3,4>
+ 1702448074U, // <4,2,4,2>: Cost 2 vuzpl <4,1,2,3>, <4,1,2,3>
+ 1905918054U, // <4,2,4,3>: Cost 2 vzipr <4,4,4,4>, LHS
+ 2109898753U, // <4,2,4,4>: Cost 2 ins <4,u,4,4>, lane 1
+ 2109906945U, // <4,2,4,5>: Cost 2 ins <4,u,4,5>, lane 1
+ 2129821696U, // <4,2,4,6>: Cost 2 ins <u,2,4,6>, lane 0
+ 2109923329U, // <4,2,4,7>: Cost 2 ins <4,u,4,7>, lane 1
+ 2129821696U, // <4,2,4,u>: Cost 2 ins <u,2,4,6>, lane 0
+ 3089777558U, // <4,2,5,0>: Cost 3 vtrnr <0,4,1,5>, <1,2,3,0>
+ 2109947905U, // <4,2,5,1>: Cost 2 ins <4,u,5,1>, lane 1
+ 1839646312U, // <4,2,5,2>: Cost 2 vzipl RHS, <2,2,2,2>
+ 1893318758U, // <4,2,5,3>: Cost 2 vzipr <2,3,4,5>, LHS
+ 3089777562U, // <4,2,5,4>: Cost 3 vtrnr <0,4,1,5>, <1,2,3,4>
+ 2109980673U, // <4,2,5,5>: Cost 2 ins <4,u,5,5>, lane 1
+ 1839646650U, // <4,2,5,6>: Cost 2 vzipl RHS, <2,6,3,7>
+ 2109997057U, // <4,2,5,7>: Cost 2 ins <4,u,5,7>, lane 1
+ 1893318763U, // <4,2,5,u>: Cost 2 vzipr <2,3,4,5>, LHS
+ 1479246172U, // <4,2,6,0>: Cost 2 vext1 <0,4,2,6>, <0,4,2,6>
+ 2110021633U, // <4,2,6,1>: Cost 2 ins <4,u,6,1>, lane 1
+ 1973864040U, // <4,2,6,2>: Cost 2 vtrnl RHS, <2,2,2,2>
+ 1880719462U, // <4,2,6,3>: Cost 2 vzipr <0,2,4,6>, LHS
+ 1479249206U, // <4,2,6,4>: Cost 2 vext1 <0,4,2,6>, RHS
+ 2110054401U, // <4,2,6,5>: Cost 2 ins <4,u,6,5>, lane 1
+ 2110062593U, // <4,2,6,6>: Cost 2 ins <4,u,6,6>, lane 1
+ 1036328961U, // <4,2,6,7>: Cost 1 ins RHS, lane 1
+ 1036328961U, // <4,2,6,u>: Cost 1 ins RHS, lane 1
+ 2659365909U, // <4,2,7,0>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2>
+ 3203743744U, // <4,2,7,1>: Cost 3 ins <u,2,7,1>, lane 0
+ 3203751936U, // <4,2,7,2>: Cost 3 ins <u,2,7,2>, lane 0
+ 2130018304U, // <4,2,7,3>: Cost 2 ins <u,2,7,3>, lane 0
+ 3102032794U, // <4,2,7,4>: Cost 3 vtrnr <2,4,5,7>, <1,2,3,4>
+ 2229618474U, // <4,2,7,5>: Cost 3 vrev <2,4,5,7>
+ 3203784704U, // <4,2,7,6>: Cost 3 ins <u,2,7,6>, lane 0
+ 2110144513U, // <4,2,7,7>: Cost 2 ins <4,u,7,7>, lane 1
+ 2130018304U, // <4,2,7,u>: Cost 2 ins <u,2,7,3>, lane 0
+ 1479262558U, // <4,2,u,0>: Cost 2 vext1 <0,4,2,u>, <0,4,2,u>
+ 2109947905U, // <4,2,u,1>: Cost 2 ins <4,u,5,1>, lane 1
+ 1974011496U, // <4,2,u,2>: Cost 2 vtrnl RHS, <2,2,2,2>
+ 1880735846U, // <4,2,u,3>: Cost 2 vzipr <0,2,4,u>, LHS
+ 1479265590U, // <4,2,u,4>: Cost 2 vext1 <0,4,2,u>, RHS
+ 2109980673U, // <4,2,u,5>: Cost 2 ins <4,u,5,5>, lane 1
+ 1841637306U, // <4,2,u,6>: Cost 2 vzipl RHS, <2,6,3,7>
+ 1036328961U, // <4,2,u,7>: Cost 1 ins RHS, lane 1
+ 1036328961U, // <4,2,u,u>: Cost 1 ins RHS, lane 1
+ 3203883008U, // <4,3,0,0>: Cost 3 ins <u,3,0,0>, lane 0
+ 2130149376U, // <4,3,0,1>: Cost 2 ins <u,3,0,1>, lane 0
+ 2109587457U, // <4,3,0,2>: Cost 2 ins <4,u,0,2>, lane 1
+ 3047786908U, // <4,3,0,3>: Cost 3 vtrnl <4,6,0,2>, <3,3,3,3>
+ 2967020442U, // <4,3,0,4>: Cost 3 vzipr <2,3,4,0>, <1,2,3,4>
+ 2235533820U, // <4,3,0,5>: Cost 3 vrev <3,4,5,0>
+ 3183362049U, // <4,3,0,6>: Cost 3 ins <4,u,0,6>, lane 1
+ 2109628417U, // <4,3,0,7>: Cost 2 ins <4,u,0,7>, lane 1
+ 2130149376U, // <4,3,0,u>: Cost 2 ins <u,3,0,1>, lane 0
+ 2235173328U, // <4,3,1,0>: Cost 3 vrev <3,4,0,1>
+ 3203964928U, // <4,3,1,1>: Cost 3 ins <u,3,1,1>, lane 0
+ 2630173594U, // <4,3,1,2>: Cost 3 vext2 <2,1,4,3>, <1,2,3,4>
+ 2130239488U, // <4,3,1,3>: Cost 2 ins <u,3,1,3>, lane 0
+ 2967028634U, // <4,3,1,4>: Cost 3 vzipr <2,3,4,1>, <1,2,3,4>
+ 3203997696U, // <4,3,1,5>: Cost 3 ins <u,3,1,5>, lane 0
+ 2821398633U, // <4,3,1,6>: Cost 3 vuzpr <0,4,2,3>, <0,1,2,6>
+ 2109702145U, // <4,3,1,7>: Cost 2 ins <4,u,1,7>, lane 1
+ 2130239488U, // <4,3,1,u>: Cost 2 ins <u,3,1,3>, lane 0
+ 3204030464U, // <4,3,2,0>: Cost 3 ins <u,3,2,0>, lane 0
+ 2630174250U, // <4,3,2,1>: Cost 3 vext2 <2,1,4,3>, <2,1,4,3>
+ 3204046848U, // <4,3,2,2>: Cost 3 ins <u,3,2,2>, lane 0
+ 2130313216U, // <4,3,2,3>: Cost 2 ins <u,3,2,3>, lane 0
+ 2833269658U, // <4,3,2,4>: Cost 3 vuzpr <2,4,1,3>, <1,2,3,4>
+ 3101624014U, // <4,3,2,5>: Cost 3 vtrnr <2,4,0,2>, <2,3,4,5>
+ 3204079616U, // <4,3,2,6>: Cost 3 ins <u,3,2,6>, lane 0
+ 2109775873U, // <4,3,2,7>: Cost 2 ins <4,u,2,7>, lane 1
+ 2130313216U, // <4,3,2,u>: Cost 2 ins <u,3,2,3>, lane 0
+ 3204104192U, // <4,3,3,0>: Cost 3 ins <u,3,3,0>, lane 0
+ 2779564182U, // <4,3,3,1>: Cost 3 vuzpl <4,6,3,1>, <3,0,1,2>
+ 2636810580U, // <4,3,3,2>: Cost 3 vext2 <3,2,4,3>, <3,2,4,3>
+ 2130386944U, // <4,3,3,3>: Cost 2 ins <u,3,3,3>, lane 0
+ 2965717914U, // <4,3,3,4>: Cost 3 vzipr <2,1,4,3>, <1,2,3,4>
+ 2779597314U, // <4,3,3,5>: Cost 3 vuzpl <4,6,3,5>, <3,4,5,6>
+ 2778950237U, // <4,3,3,6>: Cost 3 vuzpl <4,5,3,7>, <3,5,6,7>
+ 2109849601U, // <4,3,3,7>: Cost 2 ins <4,u,3,7>, lane 1
+ 2130386944U, // <4,3,3,u>: Cost 2 ins <u,3,3,3>, lane 0
+ 2691910096U, // <4,3,4,0>: Cost 3 vext3 <1,2,3,4>, <3,4,0,1>
+ 2691910106U, // <4,3,4,1>: Cost 3 vext3 <1,2,3,4>, <3,4,1,2>
+ 3183624193U, // <4,3,4,2>: Cost 3 ins <4,u,4,2>, lane 1
+ 1747657049U, // <4,3,4,3>: Cost 2 vuzpr <0,4,2,3>, <0,4,2,3>
+ 2109898753U, // <4,3,4,4>: Cost 2 ins <4,u,4,4>, lane 1
+ 2130477056U, // <4,3,4,5>: Cost 2 ins <u,3,4,5>, lane 0
+ 2109915137U, // <4,3,4,6>: Cost 2 ins <4,u,4,6>, lane 1
+ 2109923329U, // <4,3,4,7>: Cost 2 ins <4,u,4,7>, lane 1
+ 2130477056U, // <4,3,4,u>: Cost 2 ins <u,3,4,5>, lane 0
+ 1839646870U, // <4,3,5,0>: Cost 2 vzipl RHS, <3,0,1,2>
+ 2109947905U, // <4,3,5,1>: Cost 2 ins <4,u,5,1>, lane 1
+ 2967061238U, // <4,3,5,2>: Cost 3 vzipr <2,3,4,5>, <1,0,3,2>
+ 1839647132U, // <4,3,5,3>: Cost 2 vzipl RHS, <3,3,3,3>
+ 1839647234U, // <4,3,5,4>: Cost 2 vzipl RHS, <3,4,5,6>
+ 2109980673U, // <4,3,5,5>: Cost 2 ins <4,u,5,5>, lane 1
+ 2913389176U, // <4,3,5,6>: Cost 3 vzipl RHS, <3,6,0,7>
+ 2130567168U, // <4,3,5,7>: Cost 2 ins <u,3,5,7>, lane 0
+ 1839647518U, // <4,3,5,u>: Cost 2 vzipl RHS, <3,u,1,2>
+ 2110013441U, // <4,3,6,0>: Cost 2 ins <4,u,6,0>, lane 1
+ 1973864598U, // <4,3,6,1>: Cost 2 vtrnl RHS, <3,0,1,2>
+ 2110029825U, // <4,3,6,2>: Cost 2 ins <4,u,6,2>, lane 1
+ 1973864860U, // <4,3,6,3>: Cost 2 vtrnl RHS, <3,3,3,3>
+ 2110046209U, // <4,3,6,4>: Cost 2 ins <4,u,6,4>, lane 1
+ 1161841154U, // <4,3,6,5>: Cost 2 vrev <3,4,5,6>
+ 2110062593U, // <4,3,6,6>: Cost 2 ins <4,u,6,6>, lane 1
+ 1036328961U, // <4,3,6,7>: Cost 1 ins RHS, lane 1
+ 1036328961U, // <4,3,6,u>: Cost 1 ins RHS, lane 1
+ 3204399104U, // <4,3,7,0>: Cost 3 ins <u,3,7,0>, lane 0
+ 3204407296U, // <4,3,7,1>: Cost 3 ins <u,3,7,1>, lane 0
+ 2660701368U, // <4,3,7,2>: Cost 3 vext2 <7,2,4,3>, <7,2,4,3>
+ 3204423680U, // <4,3,7,3>: Cost 3 ins <u,3,7,3>, lane 0
+ 2968404890U, // <4,3,7,4>: Cost 3 vzipr <2,5,4,7>, <1,2,3,4>
+ 3204440064U, // <4,3,7,5>: Cost 3 ins <u,3,7,5>, lane 0
+ 2235664908U, // <4,3,7,6>: Cost 3 vrev <3,4,6,7>
+ 2110144513U, // <4,3,7,7>: Cost 2 ins <4,u,7,7>, lane 1
+ 2110144513U, // <4,3,7,u>: Cost 2 ins <4,u,7,7>, lane 1
+ 1841637526U, // <4,3,u,0>: Cost 2 vzipl RHS, <3,0,1,2>
+ 1974012054U, // <4,3,u,1>: Cost 2 vtrnl RHS, <3,0,1,2>
+ 2109587457U, // <4,3,u,2>: Cost 2 ins <4,u,0,2>, lane 1
+ 1974012316U, // <4,3,u,3>: Cost 2 vtrnl RHS, <3,3,3,3>
+ 1841637890U, // <4,3,u,4>: Cost 2 vzipl RHS, <3,4,5,6>
+ 1161857540U, // <4,3,u,5>: Cost 2 vrev <3,4,5,u>
+ 2109915137U, // <4,3,u,6>: Cost 2 ins <4,u,4,6>, lane 1
+ 1036328961U, // <4,3,u,7>: Cost 1 ins RHS, lane 1
+ 1036328961U, // <4,3,u,u>: Cost 1 ins RHS, lane 1
+ 1974046028U, // <4,4,0,0>: Cost 2 vtrnl <4,6,0,2>, <4,6,0,2>
+ 2107572229U, // <4,4,0,1>: Cost 2 ins <4,4,u,u>, lane 5
+ 1705934950U, // <4,4,0,2>: Cost 2 vuzpl <4,6,4,6>, LHS
+ 3180724227U, // <4,4,0,3>: Cost 3 ins <4,4,0,u>, lane 3
+ 2107539458U, // <4,4,0,4>: Cost 2 ins <4,4,u,4>, lane 2
+ 2107547650U, // <4,4,0,5>: Cost 2 ins <4,4,u,5>, lane 2
+ 1974046006U, // <4,4,0,6>: Cost 2 vtrnl <4,6,0,2>, RHS
+ 2109628417U, // <4,4,0,7>: Cost 2 ins <4,u,0,7>, lane 1
+ 1974046024U, // <4,4,0,u>: Cost 2 vtrnl <4,6,0,2>, RHS
+ 3204620288U, // <4,4,1,0>: Cost 3 ins <u,4,1,0>, lane 0
+ 1836665802U, // <4,4,1,1>: Cost 2 vzipl <4,1,2,3>, <4,1,2,3>
+ 2691910602U, // <4,4,1,2>: Cost 3 vext3 <1,2,3,4>, <4,1,2,3>
+ 1771700326U, // <4,4,1,3>: Cost 2 vuzpr <4,4,4,4>, LHS
+ 2107539458U, // <4,4,1,4>: Cost 2 ins <4,4,u,4>, lane 2
+ 2130919424U, // <4,4,1,5>: Cost 2 ins <u,4,1,5>, lane 0
+ 2107555842U, // <4,4,1,6>: Cost 2 ins <4,4,u,6>, lane 2
+ 2109702145U, // <4,4,1,7>: Cost 2 ins <4,u,1,7>, lane 1
+ 2130919424U, // <4,4,1,u>: Cost 2 ins <u,4,1,5>, lane 0
+ 2779678374U, // <4,4,2,0>: Cost 3 vuzpl <4,6,4,6>, <2,3,0,1>
+ 3044625673U, // <4,4,2,1>: Cost 3 vtrnl <4,1,2,3>, <4,5,1,7>
+ 1970883530U, // <4,4,2,2>: Cost 2 vtrnl <4,1,2,3>, <4,1,2,3>
+ 2107572229U, // <4,4,2,3>: Cost 2 ins <4,4,u,u>, lane 5
+ 2107539458U, // <4,4,2,4>: Cost 2 ins <4,4,u,4>, lane 2
+ 2107547650U, // <4,4,2,5>: Cost 2 ins <4,4,u,5>, lane 2
+ 2131001344U, // <4,4,2,6>: Cost 2 ins <u,4,2,6>, lane 0
+ 2109775873U, // <4,4,2,7>: Cost 2 ins <4,u,2,7>, lane 1
+ 2107572229U, // <4,4,2,u>: Cost 2 ins <4,4,u,u>, lane 5
+ 3181248514U, // <4,4,3,0>: Cost 3 ins <4,4,u,0>, lane 2
+ 2779678870U, // <4,4,3,1>: Cost 3 vuzpl <4,6,4,6>, <3,0,1,2>
+ 3181264898U, // <4,4,3,2>: Cost 3 ins <4,4,u,2>, lane 2
+ 1880031352U, // <4,4,3,3>: Cost 2 vzipr <0,1,4,3>, <0,1,4,3>
+ 2107539458U, // <4,4,3,4>: Cost 2 ins <4,4,u,4>, lane 2
+ 2107547650U, // <4,4,3,5>: Cost 2 ins <4,4,u,5>, lane 2
+ 2107555842U, // <4,4,3,6>: Cost 2 ins <4,4,u,6>, lane 2
+ 2109849601U, // <4,4,3,7>: Cost 2 ins <4,u,3,7>, lane 1
+ 2107547650U, // <4,4,3,u>: Cost 2 ins <4,4,u,5>, lane 2
+ 1503264870U, // <4,4,4,0>: Cost 2 vext1 <4,4,4,4>, LHS
+ 2107277315U, // <4,4,4,1>: Cost 2 ins <4,4,4,u>, lane 3
+ 2107277315U, // <4,4,4,2>: Cost 2 ins <4,4,4,u>, lane 3
+ 2107277315U, // <4,4,4,3>: Cost 2 ins <4,4,4,u>, lane 3
+ 161926454U, // <4,4,4,4>: Cost 1 vdup0 RHS
+ 2107547650U, // <4,4,4,5>: Cost 2 ins <4,4,u,5>, lane 2
+ 1705938230U, // <4,4,4,6>: Cost 2 vuzpl <4,6,4,6>, RHS
+ 2109923329U, // <4,4,4,7>: Cost 2 ins <4,u,4,7>, lane 1
+ 161926454U, // <4,4,4,u>: Cost 1 vdup0 RHS
+ 1839647634U, // <4,4,5,0>: Cost 2 vzipl RHS, <4,0,5,1>
+ 2109947905U, // <4,4,5,1>: Cost 2 ins <4,u,5,1>, lane 1
+ 2107351043U, // <4,4,5,2>: Cost 2 ins <4,4,5,u>, lane 3
+ 2107351043U, // <4,4,5,3>: Cost 2 ins <4,4,5,u>, lane 3
+ 1839647952U, // <4,4,5,4>: Cost 2 vzipl RHS, <4,4,4,4>
+ 765906230U, // <4,4,5,5>: Cost 1 vzipl RHS, RHS
+ 1618169142U, // <4,4,5,6>: Cost 2 vext3 <1,2,3,4>, RHS
+ 2107351043U, // <4,4,5,7>: Cost 2 ins <4,4,5,u>, lane 3
+ 765906473U, // <4,4,5,u>: Cost 1 vzipl RHS, RHS
+ 1973865804U, // <4,4,6,0>: Cost 2 vtrnl RHS, <4,6,0,2>
+ 2107424771U, // <4,4,6,1>: Cost 2 ins <4,4,6,u>, lane 3
+ 2110029825U, // <4,4,6,2>: Cost 2 ins <4,u,6,2>, lane 1
+ 2107424771U, // <4,4,6,3>: Cost 2 ins <4,4,6,u>, lane 3
+ 1973865680U, // <4,4,6,4>: Cost 2 vtrnl RHS, <4,4,4,4>
+ 1973865362U, // <4,4,6,5>: Cost 2 vtrnl RHS, <4,0,5,1>
+ 900123958U, // <4,4,6,6>: Cost 1 vtrnl RHS, RHS
+ 1036328961U, // <4,4,6,7>: Cost 1 ins RHS, lane 1
+ 900123976U, // <4,4,6,u>: Cost 1 vtrnl RHS, RHS
+ 3181248514U, // <4,4,7,0>: Cost 3 ins <4,4,u,0>, lane 2
+ 2779681786U, // <4,4,7,1>: Cost 3 vuzpl <4,6,4,6>, <7,0,1,2>
+ 3181264898U, // <4,4,7,2>: Cost 3 ins <4,4,u,2>, lane 2
+ 2845442636U, // <4,4,7,3>: Cost 3 vuzpr <4,4,4,4>, <0,7,2,3>
+ 2107539458U, // <4,4,7,4>: Cost 2 ins <4,4,u,4>, lane 2
+ 2107547650U, // <4,4,7,5>: Cost 2 ins <4,4,u,5>, lane 2
+ 2131369984U, // <4,4,7,6>: Cost 2 ins <u,4,7,6>, lane 0
+ 2040311013U, // <4,4,7,7>: Cost 2 vtrnr <4,4,6,7>, <4,4,6,7>
+ 2107547650U, // <4,4,7,u>: Cost 2 ins <4,4,u,5>, lane 2
+ 1974013260U, // <4,4,u,0>: Cost 2 vtrnl RHS, <4,6,0,2>
+ 2107572229U, // <4,4,u,1>: Cost 2 ins <4,4,u,u>, lane 5
+ 1705940782U, // <4,4,u,2>: Cost 2 vuzpl <4,6,4,6>, LHS
+ 2107572229U, // <4,4,u,3>: Cost 2 ins <4,4,u,u>, lane 5
+ 161926454U, // <4,4,u,4>: Cost 1 vdup0 RHS
+ 767896886U, // <4,4,u,5>: Cost 1 vzipl RHS, RHS
+ 900271414U, // <4,4,u,6>: Cost 1 vtrnl RHS, RHS
+ 1036328961U, // <4,4,u,7>: Cost 1 ins RHS, lane 1
+ 900271432U, // <4,4,u,u>: Cost 1 vtrnl RHS, RHS
+ 2108170242U, // <4,5,0,0>: Cost 2 ins <4,5,u,0>, lane 2
+ 1034493957U, // <4,5,0,1>: Cost 1 ins RHS, lane 5
+ 1707294822U, // <4,5,0,2>: Cost 2 vuzpl <4,u,5,1>, LHS
+ 2108194818U, // <4,5,0,3>: Cost 2 ins <4,5,u,3>, lane 2
+ 2108203010U, // <4,5,0,4>: Cost 2 ins <4,5,u,4>, lane 2
+ 2108211202U, // <4,5,0,5>: Cost 2 ins <4,5,u,5>, lane 2
+ 2108219394U, // <4,5,0,6>: Cost 2 ins <4,5,u,6>, lane 2
+ 1034485762U, // <4,5,0,7>: Cost 1 ins RHS, lane 2
+ 1034493957U, // <4,5,0,u>: Cost 1 ins RHS, lane 5
+ 2108170242U, // <4,5,1,0>: Cost 2 ins <4,5,u,0>, lane 2
+ 2133540868U, // <4,5,1,1>: Cost 2 ins <u,u,1,1>, lane 4
+ 2133549060U, // <4,5,1,2>: Cost 2 ins <u,u,1,2>, lane 4
+ 1747599462U, // <4,5,1,3>: Cost 2 vuzpr <0,4,1,5>, LHS
+ 2108203010U, // <4,5,1,4>: Cost 2 ins <4,5,u,4>, lane 2
+ 2133573636U, // <4,5,1,5>: Cost 2 ins <u,u,1,5>, lane 4
+ 2108219394U, // <4,5,1,6>: Cost 2 ins <4,5,u,6>, lane 2
+ 1034485762U, // <4,5,1,7>: Cost 1 ins RHS, lane 2
+ 1034485762U, // <4,5,1,u>: Cost 1 ins RHS, lane 2
+ 2108170242U, // <4,5,2,0>: Cost 2 ins <4,5,u,0>, lane 2
+ 2108178434U, // <4,5,2,1>: Cost 2 ins <4,5,u,1>, lane 2
+ 2133622788U, // <4,5,2,2>: Cost 2 ins <u,u,2,2>, lane 4
+ 1059889156U, // <4,5,2,3>: Cost 1 ins LHS, lane 4
+ 2108203010U, // <4,5,2,4>: Cost 2 ins <4,5,u,4>, lane 2
+ 2108211202U, // <4,5,2,5>: Cost 2 ins <4,5,u,5>, lane 2
+ 2133655556U, // <4,5,2,6>: Cost 2 ins <u,u,2,6>, lane 4
+ 1034485762U, // <4,5,2,7>: Cost 1 ins RHS, lane 2
+ 1059889156U, // <4,5,2,u>: Cost 1 ins LHS, lane 4
+ 2133680132U, // <4,5,3,0>: Cost 2 ins <u,u,3,0>, lane 4
+ 2108178434U, // <4,5,3,1>: Cost 2 ins <4,5,u,1>, lane 2
+ 2133696516U, // <4,5,3,2>: Cost 2 ins <u,u,3,2>, lane 4
+ 2133704708U, // <4,5,3,3>: Cost 2 ins <u,u,3,3>, lane 4
+ 2133712900U, // <4,5,3,4>: Cost 2 ins <u,u,3,4>, lane 4
+ 2108211202U, // <4,5,3,5>: Cost 2 ins <4,5,u,5>, lane 2
+ 2108219394U, // <4,5,3,6>: Cost 2 ins <4,5,u,6>, lane 2
+ 1034485762U, // <4,5,3,7>: Cost 1 ins RHS, lane 2
+ 1034485762U, // <4,5,3,u>: Cost 1 ins RHS, lane 2
+ 2108170242U, // <4,5,4,0>: Cost 2 ins <4,5,u,0>, lane 2
+ 2108178434U, // <4,5,4,1>: Cost 2 ins <4,5,u,1>, lane 2
+ 2108186626U, // <4,5,4,2>: Cost 2 ins <4,5,u,2>, lane 2
+ 2108194818U, // <4,5,4,3>: Cost 2 ins <4,5,u,3>, lane 2
+ 2109898753U, // <4,5,4,4>: Cost 2 ins <4,u,4,4>, lane 1
+ 1034493957U, // <4,5,4,5>: Cost 1 ins RHS, lane 5
+ 1707298102U, // <4,5,4,6>: Cost 2 vuzpl <4,u,5,1>, RHS
+ 1034485762U, // <4,5,4,7>: Cost 1 ins RHS, lane 2
+ 1034493957U, // <4,5,4,u>: Cost 1 ins RHS, lane 5
+ 1503346790U, // <4,5,5,0>: Cost 2 vext1 <4,4,5,5>, LHS
+ 1839656656U, // <4,5,5,1>: Cost 2 vzipl RHS, <5,1,7,3>
+ 2108186626U, // <4,5,5,2>: Cost 2 ins <4,5,u,2>, lane 2
+ 2108194818U, // <4,5,5,3>: Cost 2 ins <4,5,u,3>, lane 2
+ 1839648710U, // <4,5,5,4>: Cost 2 vzipl RHS, <5,4,7,6>
+ 1839648772U, // <4,5,5,5>: Cost 2 vzipl RHS, <5,5,5,5>
+ 1839648866U, // <4,5,5,6>: Cost 2 vzipl RHS, <5,6,7,0>
+ 1034485762U, // <4,5,5,7>: Cost 1 ins RHS, lane 2
+ 1034485762U, // <4,5,5,u>: Cost 1 ins RHS, lane 2
+ 1034346499U, // <4,5,6,0>: Cost 1 ins RHS, lane 3
+ 1034346499U, // <4,5,6,1>: Cost 1 ins RHS, lane 3
+ 1034346499U, // <4,5,6,2>: Cost 1 ins RHS, lane 3
+ 1034346499U, // <4,5,6,3>: Cost 1 ins RHS, lane 3
+ 1034346499U, // <4,5,6,4>: Cost 1 ins RHS, lane 3
+ 1034346499U, // <4,5,6,5>: Cost 1 ins RHS, lane 3
+ 1034346499U, // <4,5,6,6>: Cost 1 ins RHS, lane 3
+ 27705344U, // <4,5,6,7>: Cost 0 copy RHS
+ 27705344U, // <4,5,6,u>: Cost 0 copy RHS
+ 2133975044U, // <4,5,7,0>: Cost 2 ins <u,u,7,0>, lane 4
+ 2108178434U, // <4,5,7,1>: Cost 2 ins <4,5,u,1>, lane 2
+ 2108186626U, // <4,5,7,2>: Cost 2 ins <4,5,u,2>, lane 2
+ 2133999620U, // <4,5,7,3>: Cost 2 ins <u,u,7,3>, lane 4
+ 2134007812U, // <4,5,7,4>: Cost 2 ins <u,u,7,4>, lane 4
+ 2108211202U, // <4,5,7,5>: Cost 2 ins <4,5,u,5>, lane 2
+ 2134024196U, // <4,5,7,6>: Cost 2 ins <u,u,7,6>, lane 4
+ 1034485762U, // <4,5,7,7>: Cost 1 ins RHS, lane 2
+ 1034485762U, // <4,5,7,u>: Cost 1 ins RHS, lane 2
+ 1034346499U, // <4,5,u,0>: Cost 1 ins RHS, lane 3
+ 1034493957U, // <4,5,u,1>: Cost 1 ins RHS, lane 5
+ 1034346499U, // <4,5,u,2>: Cost 1 ins RHS, lane 3
+ 1059889156U, // <4,5,u,3>: Cost 1 ins LHS, lane 4
+ 1034346499U, // <4,5,u,4>: Cost 1 ins RHS, lane 3
+ 1034493957U, // <4,5,u,5>: Cost 1 ins RHS, lane 5
+ 1034346499U, // <4,5,u,6>: Cost 1 ins RHS, lane 3
+ 27705344U, // <4,5,u,7>: Cost 0 copy RHS
+ 27705344U, // <4,5,u,u>: Cost 0 copy RHS
+ 1705426944U, // <4,6,0,0>: Cost 2 vuzpl RHS, <0,0,0,0>
+ 1545175142U, // <4,6,0,1>: Cost 2 vext2 <0,2,4,6>, LHS
+ 631685222U, // <4,6,0,2>: Cost 1 vuzpl RHS, LHS
+ 2108309507U, // <4,6,0,3>: Cost 2 ins <4,6,0,u>, lane 3
+ 1705427148U, // <4,6,0,4>: Cost 2 vuzpl RHS, <0,2,4,6>
+ 2108309507U, // <4,6,0,5>: Cost 2 ins <4,6,0,u>, lane 3
+ 2108882946U, // <4,6,0,6>: Cost 2 ins <4,6,u,6>, lane 2
+ 2108309507U, // <4,6,0,7>: Cost 2 ins <4,6,0,u>, lane 3
+ 631685276U, // <4,6,0,u>: Cost 1 vuzpl RHS, LHS
+ 2618917622U, // <4,6,1,0>: Cost 3 vext2 <0,2,4,6>, <1,0,3,2>
+ 1705427764U, // <4,6,1,1>: Cost 2 vuzpl RHS, <1,1,1,1>
+ 2108850178U, // <4,6,1,2>: Cost 2 ins <4,6,u,2>, lane 2
+ 1747681382U, // <4,6,1,3>: Cost 2 vuzpr <0,4,2,6>, LHS
+ 2779169619U, // <4,6,1,4>: Cost 3 vuzpl RHS, <1,1,4,5>
+ 1705427968U, // <4,6,1,5>: Cost 2 vuzpl RHS, <1,3,5,7>
+ 2108882946U, // <4,6,1,6>: Cost 2 ins <4,6,u,6>, lane 2
+ 2109702145U, // <4,6,1,7>: Cost 2 ins <4,u,1,7>, lane 1
+ 1747681387U, // <4,6,1,u>: Cost 2 vuzpr <0,4,2,6>, LHS
+ 1705428646U, // <4,6,2,0>: Cost 2 vuzpl RHS, <2,3,0,1>
+ 2779170237U, // <4,6,2,1>: Cost 3 vuzpl RHS, <2,0,1,2>
+ 1705428584U, // <4,6,2,2>: Cost 2 vuzpl RHS, <2,2,2,2>
+ 1705428594U, // <4,6,2,3>: Cost 2 vuzpl RHS, <2,2,3,3>
+ 1705428686U, // <4,6,2,4>: Cost 2 vuzpl RHS, <2,3,4,5>
+ 2839560386U, // <4,6,2,5>: Cost 3 vuzpr <3,4,5,6>, <0,2,3,5>
+ 2108882946U, // <4,6,2,6>: Cost 2 ins <4,6,u,6>, lane 2
+ 2109775873U, // <4,6,2,7>: Cost 2 ins <4,u,2,7>, lane 1
+ 1705428639U, // <4,6,2,u>: Cost 2 vuzpl RHS, <2,2,u,3>
+ 2618919062U, // <4,6,3,0>: Cost 3 vext2 <0,2,4,6>, <3,0,1,2>
+ 1705429142U, // <4,6,3,1>: Cost 2 vuzpl RHS, <3,0,1,2>
+ 2108850178U, // <4,6,3,2>: Cost 2 ins <4,6,u,2>, lane 2
+ 1705429404U, // <4,6,3,3>: Cost 2 vuzpl RHS, <3,3,3,3>
+ 2618919426U, // <4,6,3,4>: Cost 3 vext2 <0,2,4,6>, <3,4,5,6>
+ 1705429506U, // <4,6,3,5>: Cost 2 vuzpl RHS, <3,4,5,6>
+ 2108882946U, // <4,6,3,6>: Cost 2 ins <4,6,u,6>, lane 2
+ 2132410368U, // <4,6,3,7>: Cost 2 ins <u,6,3,7>, lane 0
+ 1705429205U, // <4,6,3,u>: Cost 2 vuzpl RHS, <3,0,u,2>
+ 1705430348U, // <4,6,4,0>: Cost 2 vuzpl RHS, <4,6,0,2>
+ 2108604419U, // <4,6,4,1>: Cost 2 ins <4,6,4,u>, lane 3
+ 2108850178U, // <4,6,4,2>: Cost 2 ins <4,6,u,2>, lane 2
+ 2108604419U, // <4,6,4,3>: Cost 2 ins <4,6,4,u>, lane 3
+ 1705430224U, // <4,6,4,4>: Cost 2 vuzpl RHS, <4,4,4,4>
+ 1545178422U, // <4,6,4,5>: Cost 2 vext2 <0,2,4,6>, RHS
+ 631688502U, // <4,6,4,6>: Cost 1 vuzpl RHS, RHS
+ 2108604419U, // <4,6,4,7>: Cost 2 ins <4,6,4,u>, lane 3
+ 631688520U, // <4,6,4,u>: Cost 1 vuzpl RHS, RHS
+ 2839563567U, // <4,6,5,0>: Cost 3 vuzpr <3,4,5,6>, <4,5,6,0>
+ 1705439360U, // <4,6,5,1>: Cost 2 vuzpl RHS, <5,7,1,3>
+ 1839657466U, // <4,6,5,2>: Cost 2 vzipl RHS, <6,2,7,3>
+ 2839563570U, // <4,6,5,3>: Cost 3 vuzpr <3,4,5,6>, <4,5,6,3>
+ 2839563571U, // <4,6,5,4>: Cost 3 vuzpr <3,4,5,6>, <4,5,6,4>
+ 1705431044U, // <4,6,5,5>: Cost 2 vuzpl RHS, <5,5,5,5>
+ 1839649592U, // <4,6,5,6>: Cost 2 vzipl RHS, <6,6,6,6>
+ 1747684662U, // <4,6,5,7>: Cost 2 vuzpr <0,4,2,6>, RHS
+ 1747684663U, // <4,6,5,u>: Cost 2 vuzpr <0,4,2,6>, RHS
+ 1705431886U, // <4,6,6,0>: Cost 2 vuzpl RHS, <6,7,0,1>
+ 2110021633U, // <4,6,6,1>: Cost 2 ins <4,u,6,1>, lane 1
+ 2110029825U, // <4,6,6,2>: Cost 2 ins <4,u,6,2>, lane 1
+ 2110038017U, // <4,6,6,3>: Cost 2 ins <4,u,6,3>, lane 1
+ 1705431926U, // <4,6,6,4>: Cost 2 vuzpl RHS, <6,7,4,5>
+ 2110054401U, // <4,6,6,5>: Cost 2 ins <4,u,6,5>, lane 1
+ 1705431864U, // <4,6,6,6>: Cost 2 vuzpl RHS, <6,6,6,6>
+ 1036328961U, // <4,6,6,7>: Cost 1 ins RHS, lane 1
+ 1036328961U, // <4,6,6,u>: Cost 1 ins RHS, lane 1
+ 2132647936U, // <4,6,7,0>: Cost 2 ins <u,6,7,0>, lane 0
+ 1705432058U, // <4,6,7,1>: Cost 2 vuzpl RHS, <7,0,1,2>
+ 2108850178U, // <4,6,7,2>: Cost 2 ins <4,6,u,2>, lane 2
+ 2779173980U, // <4,6,7,3>: Cost 3 vuzpl RHS, <7,1,3,1>
+ 2132680704U, // <4,6,7,4>: Cost 2 ins <u,6,7,4>, lane 0
+ 1705432422U, // <4,6,7,5>: Cost 2 vuzpl RHS, <7,4,5,6>
+ 2108882946U, // <4,6,7,6>: Cost 2 ins <4,6,u,6>, lane 2
+ 1705432684U, // <4,6,7,7>: Cost 2 vuzpl RHS, <7,7,7,7>
+ 1705432121U, // <4,6,7,u>: Cost 2 vuzpl RHS, <7,0,u,2>
+ 1705433020U, // <4,6,u,0>: Cost 2 vuzpl RHS, <u,3,0,1>
+ 1545180974U, // <4,6,u,1>: Cost 2 vext2 <0,2,4,6>, LHS
+ 631691054U, // <4,6,u,2>: Cost 1 vuzpl RHS, LHS
+ 1747681949U, // <4,6,u,3>: Cost 2 vuzpr <0,4,2,6>, LHS
+ 1705433060U, // <4,6,u,4>: Cost 2 vuzpl RHS, <u,3,4,5>
+ 1545181338U, // <4,6,u,5>: Cost 2 vext2 <0,2,4,6>, RHS
+ 631691418U, // <4,6,u,6>: Cost 1 vuzpl RHS, RHS
+ 1036328961U, // <4,6,u,7>: Cost 1 ins RHS, lane 1
+ 631691108U, // <4,6,u,u>: Cost 1 vuzpl RHS, LHS
+ 3206537216U, // <4,7,0,0>: Cost 3 ins <u,7,0,0>, lane 0
+ 2132803584U, // <4,7,0,1>: Cost 2 ins <u,7,0,1>, lane 0
+ 2109587457U, // <4,7,0,2>: Cost 2 ins <4,u,0,2>, lane 1
+ 2845614101U, // <4,7,0,3>: Cost 3 vuzpr <4,4,6,7>, <0,0,2,3>
+ 3206569984U, // <4,7,0,4>: Cost 3 ins <u,7,0,4>, lane 0
+ 3047789926U, // <4,7,0,5>: Cost 3 vtrnl <4,6,0,2>, <7,4,5,6>
+ 3047789929U, // <4,7,0,6>: Cost 3 vtrnl <4,6,0,2>, <7,4,6,0>
+ 2109628417U, // <4,7,0,7>: Cost 2 ins <4,u,0,7>, lane 1
+ 2132803584U, // <4,7,0,u>: Cost 2 ins <u,7,0,1>, lane 0
+ 2259064116U, // <4,7,1,0>: Cost 3 vrev <7,4,0,1>
+ 3206619136U, // <4,7,1,1>: Cost 3 ins <u,7,1,1>, lane 0
+ 2632860570U, // <4,7,1,2>: Cost 3 vext2 <2,5,4,7>, <1,2,3,4>
+ 2132893696U, // <4,7,1,3>: Cost 2 ins <u,7,1,3>, lane 0
+ 3206643712U, // <4,7,1,4>: Cost 3 ins <u,7,1,4>, lane 0
+ 3206651904U, // <4,7,1,5>: Cost 3 ins <u,7,1,5>, lane 0
+ 2988265414U, // <4,7,1,6>: Cost 3 vzipr <5,u,4,1>, <5,4,7,6>
+ 2109702145U, // <4,7,1,7>: Cost 2 ins <4,u,1,7>, lane 1
+ 2132893696U, // <4,7,1,u>: Cost 2 ins <u,7,1,3>, lane 0
+ 3206684672U, // <4,7,2,0>: Cost 3 ins <u,7,2,0>, lane 0
+ 3206692864U, // <4,7,2,1>: Cost 3 ins <u,7,2,1>, lane 0
+ 3206701056U, // <4,7,2,2>: Cost 3 ins <u,7,2,2>, lane 0
+ 2132967424U, // <4,7,2,3>: Cost 2 ins <u,7,2,3>, lane 0
+ 2833597338U, // <4,7,2,4>: Cost 3 vuzpr <2,4,5,7>, <1,2,3,4>
+ 2632861554U, // <4,7,2,5>: Cost 3 vext2 <2,5,4,7>, <2,5,4,7>
+ 3206733824U, // <4,7,2,6>: Cost 3 ins <u,7,2,6>, lane 0
+ 2109775873U, // <4,7,2,7>: Cost 2 ins <4,u,2,7>, lane 1
+ 2132967424U, // <4,7,2,u>: Cost 2 ins <u,7,2,3>, lane 0
+ 3206758400U, // <4,7,3,0>: Cost 3 ins <u,7,3,0>, lane 0
+ 3206766592U, // <4,7,3,1>: Cost 3 ins <u,7,3,1>, lane 0
+ 3047388245U, // <4,7,3,2>: Cost 3 vtrnl <4,5,3,7>, <7,1,2,3>
+ 3206782976U, // <4,7,3,3>: Cost 3 ins <u,7,3,3>, lane 0
+ 2989609062U, // <4,7,3,4>: Cost 3 vzipr <6,1,4,3>, <5,6,7,4>
+ 3206799360U, // <4,7,3,5>: Cost 3 ins <u,7,3,5>, lane 0
+ 2639497884U, // <4,7,3,6>: Cost 3 vext2 <3,6,4,7>, <3,6,4,7>
+ 2109849601U, // <4,7,3,7>: Cost 2 ins <4,u,3,7>, lane 1
+ 2109849601U, // <4,7,3,u>: Cost 2 ins <4,u,3,7>, lane 1
+ 2583199846U, // <4,7,4,0>: Cost 3 vext1 <5,4,7,4>, LHS
+ 3048117242U, // <4,7,4,1>: Cost 3 vtrnl <4,6,4,6>, <7,0,1,2>
+ 3183624193U, // <4,7,4,2>: Cost 3 ins <4,u,4,2>, lane 1
+ 2979659923U, // <4,7,4,3>: Cost 3 vzipr <4,4,4,4>, <0,1,7,3>
+ 2109898753U, // <4,7,4,4>: Cost 2 ins <4,u,4,4>, lane 1
+ 2133131264U, // <4,7,4,5>: Cost 2 ins <u,7,4,5>, lane 0
+ 2109915137U, // <4,7,4,6>: Cost 2 ins <4,u,4,6>, lane 1
+ 1771875557U, // <4,7,4,7>: Cost 2 vuzpr <4,4,6,7>, <4,4,6,7>
+ 2133131264U, // <4,7,4,u>: Cost 2 ins <u,7,4,5>, lane 0
+ 1839649786U, // <4,7,5,0>: Cost 2 vzipl RHS, <7,0,1,2>
+ 2109947905U, // <4,7,5,1>: Cost 2 ins <4,u,5,1>, lane 1
+ 2913391781U, // <4,7,5,2>: Cost 3 vzipl RHS, <7,2,2,2>
+ 2913391843U, // <4,7,5,3>: Cost 3 vzipl RHS, <7,3,0,1>
+ 1839650150U, // <4,7,5,4>: Cost 2 vzipl RHS, <7,4,5,6>
+ 2109980673U, // <4,7,5,5>: Cost 2 ins <4,u,5,5>, lane 1
+ 2913392145U, // <4,7,5,6>: Cost 3 vzipl RHS, <7,6,6,6>
+ 1839650412U, // <4,7,5,7>: Cost 2 vzipl RHS, <7,7,7,7>
+ 1839650434U, // <4,7,5,u>: Cost 2 vzipl RHS, <7,u,1,2>
+ 1509474406U, // <4,7,6,0>: Cost 2 vext1 <5,4,7,6>, LHS
+ 1973867514U, // <4,7,6,1>: Cost 2 vtrnl RHS, <7,0,1,2>
+ 2110029825U, // <4,7,6,2>: Cost 2 ins <4,u,6,2>, lane 1
+ 2110038017U, // <4,7,6,3>: Cost 2 ins <4,u,6,3>, lane 1
+ 1509477686U, // <4,7,6,4>: Cost 2 vext1 <5,4,7,6>, RHS
+ 1973867878U, // <4,7,6,5>: Cost 2 vtrnl RHS, <7,4,5,6>
+ 2110062593U, // <4,7,6,6>: Cost 2 ins <4,u,6,6>, lane 1
+ 1036328961U, // <4,7,6,7>: Cost 1 ins RHS, lane 1
+ 1036328961U, // <4,7,6,u>: Cost 1 ins RHS, lane 1
+ 2914587642U, // <4,7,7,0>: Cost 3 vzipl <4,7,5,0>, <7,0,1,2>
+ 2779862010U, // <4,7,7,1>: Cost 3 vuzpl <4,6,7,1>, <7,0,1,2>
+ 2779247701U, // <4,7,7,2>: Cost 3 vuzpl <4,5,7,7>, <7,1,2,3>
+ 3207077888U, // <4,7,7,3>: Cost 3 ins <u,7,7,3>, lane 0
+ 2914620774U, // <4,7,7,4>: Cost 3 vzipl <4,7,5,4>, <7,4,5,6>
+ 2779895142U, // <4,7,7,5>: Cost 3 vuzpl <4,6,7,5>, <7,4,5,6>
+ 2992295878U, // <4,7,7,6>: Cost 3 vzipr <6,5,4,7>, <5,4,7,6>
+ 2133368832U, // <4,7,7,7>: Cost 2 ins <u,7,7,7>, lane 0
+ 2133368832U, // <4,7,7,u>: Cost 2 ins <u,7,7,7>, lane 0
+ 1841640442U, // <4,7,u,0>: Cost 2 vzipl RHS, <7,0,1,2>
+ 1974014970U, // <4,7,u,1>: Cost 2 vtrnl RHS, <7,0,1,2>
+ 2109587457U, // <4,7,u,2>: Cost 2 ins <4,u,0,2>, lane 1
+ 2132893696U, // <4,7,u,3>: Cost 2 ins <u,7,1,3>, lane 0
+ 1841640806U, // <4,7,u,4>: Cost 2 vzipl RHS, <7,4,5,6>
+ 1974015334U, // <4,7,u,5>: Cost 2 vtrnl RHS, <7,4,5,6>
+ 2109915137U, // <4,7,u,6>: Cost 2 ins <4,u,4,6>, lane 1
+ 1036328961U, // <4,7,u,7>: Cost 1 ins RHS, lane 1
+ 1036328961U, // <4,7,u,u>: Cost 1 ins RHS, lane 1
+ 1705574400U, // <4,u,0,0>: Cost 2 vuzpl RHS, <0,0,0,0>
+ 1034493957U, // <4,u,0,1>: Cost 1 ins RHS, lane 5
+ 631832678U, // <4,u,0,2>: Cost 1 vuzpl RHS, LHS
+ 2108309507U, // <4,u,0,3>: Cost 2 ins <4,6,0,u>, lane 3
+ 1705574604U, // <4,u,0,4>: Cost 2 vuzpl RHS, <0,2,4,6>
+ 2107547650U, // <4,u,0,5>: Cost 2 ins <4,4,u,5>, lane 2
+ 1974048922U, // <4,u,0,6>: Cost 2 vtrnl <4,6,0,2>, RHS
+ 1034485762U, // <4,u,0,7>: Cost 1 ins RHS, lane 2
+ 631832732U, // <4,u,0,u>: Cost 1 vuzpl RHS, LHS
+ 2108170242U, // <4,u,1,0>: Cost 2 ins <4,5,u,0>, lane 2
+ 1705575220U, // <4,u,1,1>: Cost 2 vuzpl RHS, <1,1,1,1>
+ 1618171694U, // <4,u,1,2>: Cost 2 vext3 <1,2,3,4>, LHS
+ 1747624038U, // <4,u,1,3>: Cost 2 vuzpr <0,4,1,u>, LHS
+ 2107539458U, // <4,u,1,4>: Cost 2 ins <4,4,u,4>, lane 2
+ 1705575424U, // <4,u,1,5>: Cost 2 vuzpl RHS, <1,3,5,7>
+ 2107555842U, // <4,u,1,6>: Cost 2 ins <4,4,u,6>, lane 2
+ 1034485762U, // <4,u,1,7>: Cost 1 ins RHS, lane 2
+ 1034485762U, // <4,u,1,u>: Cost 1 ins RHS, lane 2
+ 1705576102U, // <4,u,2,0>: Cost 2 vuzpl RHS, <2,3,0,1>
+ 2104860674U, // <4,u,2,1>: Cost 2 ins <4,0,u,1>, lane 2
+ 1705576040U, // <4,u,2,2>: Cost 2 vuzpl RHS, <2,2,2,2>
+ 1055244288U, // <4,u,2,3>: Cost 1 ins LHS, lane 0
+ 1705576142U, // <4,u,2,4>: Cost 2 vuzpl RHS, <2,3,4,5>
+ 2107547650U, // <4,u,2,5>: Cost 2 ins <4,4,u,5>, lane 2
+ 2131001344U, // <4,u,2,6>: Cost 2 ins <u,4,2,6>, lane 0
+ 1034485762U, // <4,u,2,7>: Cost 1 ins RHS, lane 2
+ 1055244288U, // <4,u,2,u>: Cost 1 ins LHS, lane 0
+ 2129698816U, // <4,u,3,0>: Cost 2 ins <u,2,3,0>, lane 0
+ 1705576598U, // <4,u,3,1>: Cost 2 vuzpl RHS, <3,0,1,2>
+ 2128388096U, // <4,u,3,2>: Cost 2 ins <u,0,3,2>, lane 0
+ 1705576860U, // <4,u,3,3>: Cost 2 vuzpl RHS, <3,3,3,3>
+ 2129731584U, // <4,u,3,4>: Cost 2 ins <u,2,3,4>, lane 0
+ 1705576962U, // <4,u,3,5>: Cost 2 vuzpl RHS, <3,4,5,6>
+ 2107555842U, // <4,u,3,6>: Cost 2 ins <4,4,u,6>, lane 2
+ 1034485762U, // <4,u,3,7>: Cost 1 ins RHS, lane 2
+ 1034485762U, // <4,u,3,u>: Cost 1 ins RHS, lane 2
+ 1705577804U, // <4,u,4,0>: Cost 2 vuzpl RHS, <4,6,0,2>
+ 2104860674U, // <4,u,4,1>: Cost 2 ins <4,0,u,1>, lane 2
+ 1974376238U, // <4,u,4,2>: Cost 2 vtrnl <4,6,4,6>, LHS
+ 2108604419U, // <4,u,4,3>: Cost 2 ins <4,6,4,u>, lane 3
+ 161926454U, // <4,u,4,4>: Cost 1 vdup0 RHS
+ 1034493957U, // <4,u,4,5>: Cost 1 ins RHS, lane 5
+ 631835958U, // <4,u,4,6>: Cost 1 vuzpl RHS, RHS
+ 1034485762U, // <4,u,4,7>: Cost 1 ins RHS, lane 2
+ 631835976U, // <4,u,4,u>: Cost 1 vuzpl RHS, RHS
+ 1839650515U, // <4,u,5,0>: Cost 2 vzipl RHS, <u,0,1,2>
+ 765908782U, // <4,u,5,1>: Cost 1 vzipl RHS, LHS
+ 1839650693U, // <4,u,5,2>: Cost 2 vzipl RHS, <u,2,3,0>
+ 2016035485U, // <4,u,5,3>: Cost 2 vtrnr <0,4,1,5>, LHS
+ 1839650879U, // <4,u,5,4>: Cost 2 vzipl RHS, <u,4,5,6>
+ 765909146U, // <4,u,5,5>: Cost 1 vzipl RHS, RHS
+ 1618172058U, // <4,u,5,6>: Cost 2 vext3 <1,2,3,4>, RHS
+ 1034485762U, // <4,u,5,7>: Cost 1 ins RHS, lane 2
+ 765909349U, // <4,u,5,u>: Cost 1 vzipl RHS, LHS
+ 1034346499U, // <4,u,6,0>: Cost 1 ins RHS, lane 3
+ 1034346499U, // <4,u,6,1>: Cost 1 ins RHS, lane 3
+ 900126510U, // <4,u,6,2>: Cost 1 vtrnl RHS, LHS
+ 1034346499U, // <4,u,6,3>: Cost 1 ins RHS, lane 3
+ 1034346499U, // <4,u,6,4>: Cost 1 ins RHS, lane 3
+ 1034346499U, // <4,u,6,5>: Cost 1 ins RHS, lane 3
+ 900126874U, // <4,u,6,6>: Cost 1 vtrnl RHS, RHS
+ 27705344U, // <4,u,6,7>: Cost 0 copy RHS
+ 27705344U, // <4,u,6,u>: Cost 0 copy RHS
+ 2133975044U, // <4,u,7,0>: Cost 2 ins <u,u,7,0>, lane 4
+ 1705579514U, // <4,u,7,1>: Cost 2 vuzpl RHS, <7,0,1,2>
+ 2104868866U, // <4,u,7,2>: Cost 2 ins <4,0,u,2>, lane 2
+ 2129354752U, // <4,u,7,3>: Cost 2 ins <u,1,7,3>, lane 0
+ 2134007812U, // <4,u,7,4>: Cost 2 ins <u,u,7,4>, lane 4
+ 1705579878U, // <4,u,7,5>: Cost 2 vuzpl RHS, <7,4,5,6>
+ 2131369984U, // <4,u,7,6>: Cost 2 ins <u,4,7,6>, lane 0
+ 1034485762U, // <4,u,7,7>: Cost 1 ins RHS, lane 2
+ 1034485762U, // <4,u,7,u>: Cost 1 ins RHS, lane 2
+ 1034346499U, // <4,u,u,0>: Cost 1 ins RHS, lane 3
+ 767899438U, // <4,u,u,1>: Cost 1 vzipl RHS, LHS
+ 631838510U, // <4,u,u,2>: Cost 1 vuzpl RHS, LHS
+ 1055244288U, // <4,u,u,3>: Cost 1 ins LHS, lane 0
+ 161926454U, // <4,u,u,4>: Cost 1 vdup0 RHS
+ 767899802U, // <4,u,u,5>: Cost 1 vzipl RHS, RHS
+ 631838874U, // <4,u,u,6>: Cost 1 vuzpl RHS, RHS
+ 27705344U, // <4,u,u,7>: Cost 0 copy RHS
+ 27705344U, // <4,u,u,u>: Cost 0 copy RHS
+ 2128150528U, // <5,0,0,0>: Cost 2 ins <u,0,0,0>, lane 0
+ 2687123466U, // <5,0,0,1>: Cost 3 vext3 <0,4,1,5>, <0,0,1,1>
+ 2687123476U, // <5,0,0,2>: Cost 3 vext3 <0,4,1,5>, <0,0,2,2>
+ 2846220309U, // <5,0,0,3>: Cost 3 vuzpr <4,5,6,0>, <0,0,2,3>
+ 2642166098U, // <5,0,0,4>: Cost 3 vext2 <4,1,5,0>, <0,4,1,5>
+ 2583318482U, // <5,0,0,5>: Cost 3 vext1 <5,5,0,0>, <5,5,0,0>
+ 3189334017U, // <5,0,0,6>: Cost 3 ins <5,u,0,6>, lane 1
+ 2846223265U, // <5,0,0,7>: Cost 3 vuzpr <4,5,6,0>, <4,0,6,7>
+ 2128150528U, // <5,0,0,u>: Cost 2 ins <u,0,0,0>, lane 0
+ 1503608934U, // <5,0,1,0>: Cost 2 vext1 <4,5,0,1>, LHS
+ 1843003494U, // <5,0,1,1>: Cost 2 vzipl <5,1,7,3>, LHS
+ 1613381734U, // <5,0,1,2>: Cost 2 vext3 <0,4,1,5>, LHS
+ 2115641345U, // <5,0,1,3>: Cost 2 ins <5,u,1,3>, lane 1
+ 1611612282U, // <5,0,1,4>: Cost 2 vext3 <0,1,4,5>, <0,1,4,5>
+ 2583326675U, // <5,0,1,5>: Cost 3 vext1 <5,5,0,1>, <5,5,0,1>
+ 3202015232U, // <5,0,1,6>: Cost 3 ins <u,0,1,6>, lane 0
+ 3189415937U, // <5,0,1,7>: Cost 3 ins <5,u,1,7>, lane 1
+ 1613381788U, // <5,0,1,u>: Cost 2 vext3 <0,4,1,5>, LHS
+ 2686017700U, // <5,0,2,0>: Cost 3 vext3 <0,2,4,5>, <0,2,0,2>
+ 2685796528U, // <5,0,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5>
+ 2128314368U, // <5,0,2,2>: Cost 2 ins <u,0,2,2>, lane 0
+ 2128322560U, // <5,0,2,3>: Cost 2 ins <u,0,2,3>, lane 0
+ 2686017739U, // <5,0,2,4>: Cost 3 vext3 <0,2,4,5>, <0,2,4,5>
+ 2686091476U, // <5,0,2,5>: Cost 3 vext3 <0,2,5,5>, <0,2,5,5>
+ 3189481473U, // <5,0,2,6>: Cost 3 ins <5,u,2,6>, lane 1
+ 2595280262U, // <5,0,2,7>: Cost 3 vext1 <7,5,0,2>, <7,5,0,2>
+ 2128314368U, // <5,0,2,u>: Cost 2 ins <u,0,2,2>, lane 0
+ 3202113536U, // <5,0,3,0>: Cost 3 ins <u,0,3,0>, lane 0
+ 2918047846U, // <5,0,3,1>: Cost 3 vzipl <5,3,7,0>, LHS
+ 2128388096U, // <5,0,3,2>: Cost 2 ins <u,0,3,2>, lane 0
+ 3189530625U, // <5,0,3,3>: Cost 3 ins <5,u,3,3>, lane 1
+ 2638187004U, // <5,0,3,4>: Cost 3 vext2 <3,4,5,0>, <3,4,5,0>
+ 2785315330U, // <5,0,3,5>: Cost 3 vuzpl <5,6,0,1>, <3,4,5,6>
+ 3202162688U, // <5,0,3,6>: Cost 3 ins <u,0,3,6>, lane 0
+ 2840323072U, // <5,0,3,7>: Cost 3 vuzpr <3,5,7,0>, <1,3,5,7>
+ 2128388096U, // <5,0,3,u>: Cost 2 ins <u,0,3,2>, lane 0
+ 2559459430U, // <5,0,4,0>: Cost 3 vext1 <1,5,0,4>, LHS
+ 1613381970U, // <5,0,4,1>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5>
+ 2687123804U, // <5,0,4,2>: Cost 3 vext3 <0,4,1,5>, <0,4,2,6>
+ 3184336899U, // <5,0,4,3>: Cost 3 ins <5,0,4,u>, lane 3
+ 2687345005U, // <5,0,4,4>: Cost 3 vext3 <0,4,4,5>, <0,4,4,5>
+ 2638187830U, // <5,0,4,5>: Cost 3 vext2 <3,4,5,0>, RHS
+ 2846222850U, // <5,0,4,6>: Cost 3 vuzpr <4,5,6,0>, <3,4,5,6>
+ 2646150600U, // <5,0,4,7>: Cost 3 vext2 <4,7,5,0>, <4,7,5,0>
+ 1845019293U, // <5,0,4,u>: Cost 2 vzipl <5,4,7,6>, LHS
+ 1772481839U, // <5,0,5,0>: Cost 2 vuzpr <4,5,6,0>, <4,5,6,0>
+ 1845526630U, // <5,0,5,1>: Cost 2 vzipl <5,5,5,5>, LHS
+ 1979744358U, // <5,0,5,2>: Cost 2 vtrnl <5,5,5,5>, LHS
+ 3189678081U, // <5,0,5,3>: Cost 3 ins <5,u,5,3>, lane 1
+ 2919268690U, // <5,0,5,4>: Cost 3 vzipl <5,5,5,5>, <0,4,1,5>
+ 2115952641U, // <5,0,5,5>: Cost 2 ins <5,u,5,5>, lane 1
+ 3202310144U, // <5,0,5,6>: Cost 3 ins <u,0,5,6>, lane 0
+ 2115969025U, // <5,0,5,7>: Cost 2 ins <5,u,5,7>, lane 1
+ 1845527197U, // <5,0,5,u>: Cost 2 vzipl <5,5,5,5>, LHS
+ 2973777920U, // <5,0,6,0>: Cost 3 vzipr <3,4,5,6>, <0,0,0,0>
+ 1846296678U, // <5,0,6,1>: Cost 2 vzipl <5,6,7,0>, LHS
+ 2128609280U, // <5,0,6,2>: Cost 2 ins <u,0,6,2>, lane 0
+ 3189751809U, // <5,0,6,3>: Cost 3 ins <5,u,6,3>, lane 1
+ 2920038738U, // <5,0,6,4>: Cost 3 vzipl <5,6,7,0>, <0,4,1,5>
+ 2920038866U, // <5,0,6,5>: Cost 3 vzipl <5,6,7,0>, <0,5,6,7>
+ 3189776385U, // <5,0,6,6>: Cost 3 ins <5,u,6,6>, lane 1
+ 2128650240U, // <5,0,6,7>: Cost 2 ins <u,0,6,7>, lane 0
+ 1846297245U, // <5,0,6,u>: Cost 2 vzipl <5,6,7,0>, LHS
+ 2040971264U, // <5,0,7,0>: Cost 2 vtrnr RHS, <0,0,0,0>
+ 2040971274U, // <5,0,7,1>: Cost 2 vtrnr RHS, <0,0,1,1>
+ 2040971284U, // <5,0,7,2>: Cost 2 vtrnr RHS, <0,0,2,2>
+ 2116083713U, // <5,0,7,3>: Cost 2 ins <5,u,7,3>, lane 1
+ 2116091905U, // <5,0,7,4>: Cost 2 ins <5,u,7,4>, lane 1
+ 3114715316U, // <5,0,7,5>: Cost 3 vtrnr RHS, <3,0,4,5>
+ 2116108289U, // <5,0,7,6>: Cost 2 ins <5,u,7,6>, lane 1
+ 2116116481U, // <5,0,7,7>: Cost 2 ins <5,u,7,7>, lane 1
+ 2040971281U, // <5,0,7,u>: Cost 2 vtrnr RHS, <0,0,1,u>
+ 2040979456U, // <5,0,u,0>: Cost 2 vtrnr RHS, <0,0,0,0>
+ 1616036502U, // <5,0,u,1>: Cost 2 vext3 <0,u,1,5>, <0,u,1,5>
+ 1613382301U, // <5,0,u,2>: Cost 2 vext3 <0,4,1,5>, LHS
+ 2115641345U, // <5,0,u,3>: Cost 2 ins <5,u,1,3>, lane 1
+ 2116091905U, // <5,0,u,4>: Cost 2 ins <5,u,7,4>, lane 1
+ 2115952641U, // <5,0,u,5>: Cost 2 ins <5,u,5,5>, lane 1
+ 2116108289U, // <5,0,u,6>: Cost 2 ins <5,u,7,6>, lane 1
+ 2115969025U, // <5,0,u,7>: Cost 2 ins <5,u,5,7>, lane 1
+ 1613382355U, // <5,0,u,u>: Cost 2 vext3 <0,4,1,5>, LHS
+ 2646818816U, // <5,1,0,0>: Cost 3 vext2 <4,u,5,1>, <0,0,0,0>
+ 1573077094U, // <5,1,0,1>: Cost 2 vext2 <4,u,5,1>, LHS
+ 1712324710U, // <5,1,0,2>: Cost 2 vuzpl <5,7,1,3>, LHS
+ 2111512578U, // <5,1,0,3>: Cost 2 ins <5,1,u,3>, lane 2
+ 2641510738U, // <5,1,0,4>: Cost 3 vext2 <4,0,5,1>, <0,4,1,5>
+ 2977710418U, // <5,1,0,5>: Cost 3 vzipr <4,1,5,0>, <0,4,1,5>
+ 3185278978U, // <5,1,0,6>: Cost 3 ins <5,1,u,6>, lane 2
+ 3184705539U, // <5,1,0,7>: Cost 3 ins <5,1,0,u>, lane 3
+ 1573077661U, // <5,1,0,u>: Cost 2 vext2 <4,u,5,1>, LHS
+ 2223891567U, // <5,1,1,0>: Cost 3 vrev <1,5,0,1>
+ 2128896000U, // <5,1,1,1>: Cost 2 ins <u,1,1,1>, lane 0
+ 2646819734U, // <5,1,1,2>: Cost 3 vext2 <4,u,5,1>, <1,2,3,0>
+ 2115641345U, // <5,1,1,3>: Cost 2 ins <5,u,1,3>, lane 1
+ 2691326803U, // <5,1,1,4>: Cost 3 vext3 <1,1,4,5>, <1,1,4,5>
+ 2691400540U, // <5,1,1,5>: Cost 3 vext3 <1,1,5,5>, <1,1,5,5>
+ 3189407745U, // <5,1,1,6>: Cost 3 ins <5,u,1,6>, lane 1
+ 2982367283U, // <5,1,1,7>: Cost 3 vzipr <4,u,5,1>, <5,6,1,7>
+ 2115641345U, // <5,1,1,u>: Cost 2 ins <5,u,1,3>, lane 1
+ 2128961536U, // <5,1,2,0>: Cost 2 ins <u,1,2,0>, lane 0
+ 2128969728U, // <5,1,2,1>: Cost 2 ins <u,1,2,1>, lane 0
+ 2128977920U, // <5,1,2,2>: Cost 2 ins <u,1,2,2>, lane 0
+ 1055244288U, // <5,1,2,3>: Cost 1 ins LHS, lane 0
+ 2128994304U, // <5,1,2,4>: Cost 2 ins <u,1,2,4>, lane 0
+ 2129002496U, // <5,1,2,5>: Cost 2 ins <u,1,2,5>, lane 0
+ 2129010688U, // <5,1,2,6>: Cost 2 ins <u,1,2,6>, lane 0
+ 2129018880U, // <5,1,2,7>: Cost 2 ins <u,1,2,7>, lane 0
+ 1055244288U, // <5,1,2,u>: Cost 1 ins LHS, lane 0
+ 2571468902U, // <5,1,3,0>: Cost 3 vext1 <3,5,1,3>, LHS
+ 2687124440U, // <5,1,3,1>: Cost 3 vext3 <0,4,1,5>, <1,3,1,3>
+ 2571470542U, // <5,1,3,2>: Cost 3 vext1 <3,5,1,3>, <2,3,4,5>
+ 2129059840U, // <5,1,3,3>: Cost 2 ins <u,1,3,3>, lane 0
+ 2687124469U, // <5,1,3,4>: Cost 3 vext3 <0,4,1,5>, <1,3,4,5>
+ 2685207552U, // <5,1,3,5>: Cost 3 vext3 <0,1,2,5>, <1,3,5,7>
+ 2595361654U, // <5,1,3,6>: Cost 3 vext1 <7,5,1,3>, <6,7,4,5>
+ 2840331264U, // <5,1,3,7>: Cost 3 vuzpr <3,5,7,1>, <1,3,5,7>
+ 2129059840U, // <5,1,3,u>: Cost 2 ins <u,1,3,3>, lane 0
+ 1567771538U, // <5,1,4,0>: Cost 2 vext2 <4,0,5,1>, <4,0,5,1>
+ 2693096491U, // <5,1,4,1>: Cost 3 vext3 <1,4,1,5>, <1,4,1,5>
+ 2693170228U, // <5,1,4,2>: Cost 3 vext3 <1,4,2,5>, <1,4,2,5>
+ 2111512578U, // <5,1,4,3>: Cost 2 ins <5,1,u,3>, lane 2
+ 2646822096U, // <5,1,4,4>: Cost 3 vext2 <4,u,5,1>, <4,4,4,4>
+ 1573080374U, // <5,1,4,5>: Cost 2 vext2 <4,u,5,1>, RHS
+ 1712327990U, // <5,1,4,6>: Cost 2 vuzpl <5,7,1,3>, RHS
+ 3185000451U, // <5,1,4,7>: Cost 3 ins <5,1,4,u>, lane 3
+ 1573080602U, // <5,1,4,u>: Cost 2 vext2 <4,u,5,1>, <4,u,5,1>
+ 2687124591U, // <5,1,5,0>: Cost 3 vext3 <0,4,1,5>, <1,5,0,1>
+ 1712328832U, // <5,1,5,1>: Cost 2 vuzpl <5,7,1,3>, <5,7,1,3>
+ 2982398102U, // <5,1,5,2>: Cost 3 vzipr <4,u,5,5>, <3,0,1,2>
+ 2046853222U, // <5,1,5,3>: Cost 2 vtrnr <5,5,5,5>, LHS
+ 2687124631U, // <5,1,5,4>: Cost 3 vext3 <0,4,1,5>, <1,5,4,5>
+ 2115952641U, // <5,1,5,5>: Cost 2 ins <5,u,5,5>, lane 1
+ 2646823010U, // <5,1,5,6>: Cost 3 vext2 <4,u,5,1>, <5,6,7,0>
+ 2115969025U, // <5,1,5,7>: Cost 2 ins <5,u,5,7>, lane 1
+ 2046853227U, // <5,1,5,u>: Cost 2 vtrnr <5,5,5,5>, LHS
+ 2920039158U, // <5,1,6,0>: Cost 3 vzipl <5,6,7,0>, <1,0,3,2>
+ 2961834642U, // <5,1,6,1>: Cost 3 vzipr <1,4,5,6>, <0,u,1,1>
+ 2973780118U, // <5,1,6,2>: Cost 3 vzipr <3,4,5,6>, <3,0,1,2>
+ 2111512578U, // <5,1,6,3>: Cost 2 ins <5,1,u,3>, lane 2
+ 2224227480U, // <5,1,6,4>: Cost 3 vrev <1,5,4,6>
+ 2973778258U, // <5,1,6,5>: Cost 3 vzipr <3,4,5,6>, <0,4,1,5>
+ 2646823736U, // <5,1,6,6>: Cost 3 vext2 <4,u,5,1>, <6,6,6,6>
+ 2111553541U, // <5,1,6,7>: Cost 2 ins <5,1,u,u>, lane 5
+ 2111512578U, // <5,1,6,u>: Cost 2 ins <5,1,u,3>, lane 2
+ 2116059137U, // <5,1,7,0>: Cost 2 ins <5,u,7,0>, lane 1
+ 2040972084U, // <5,1,7,1>: Cost 2 vtrnr RHS, <1,1,1,1>
+ 2111479811U, // <5,1,7,2>: Cost 2 ins <5,1,7,u>, lane 3
+ 967229542U, // <5,1,7,3>: Cost 1 vtrnr RHS, LHS
+ 2116091905U, // <5,1,7,4>: Cost 2 ins <5,u,7,4>, lane 1
+ 2111479811U, // <5,1,7,5>: Cost 2 ins <5,1,7,u>, lane 3
+ 2116108289U, // <5,1,7,6>: Cost 2 ins <5,u,7,6>, lane 1
+ 2116116481U, // <5,1,7,7>: Cost 2 ins <5,u,7,7>, lane 1
+ 967229547U, // <5,1,7,u>: Cost 1 vtrnr RHS, LHS
+ 2116059137U, // <5,1,u,0>: Cost 2 ins <5,u,7,0>, lane 1
+ 2040980276U, // <5,1,u,1>: Cost 2 vtrnr RHS, <1,1,1,1>
+ 1712330542U, // <5,1,u,2>: Cost 2 vuzpl <5,7,1,3>, LHS
+ 967237734U, // <5,1,u,3>: Cost 1 vtrnr RHS, LHS
+ 2116091905U, // <5,1,u,4>: Cost 2 ins <5,u,7,4>, lane 1
+ 1573083290U, // <5,1,u,5>: Cost 2 vext2 <4,u,5,1>, RHS
+ 1712330906U, // <5,1,u,6>: Cost 2 vuzpl <5,7,1,3>, RHS
+ 2115969025U, // <5,1,u,7>: Cost 2 ins <5,u,5,7>, lane 1
+ 967237739U, // <5,1,u,u>: Cost 1 vtrnr RHS, LHS
+ 2786132132U, // <5,2,0,0>: Cost 3 vuzpl <5,7,2,2>, <0,2,0,2>
+ 2628911206U, // <5,2,0,1>: Cost 3 vext2 <1,u,5,2>, LHS
+ 2129494016U, // <5,2,0,2>: Cost 2 ins <u,2,0,2>, lane 0
+ 2973728870U, // <5,2,0,3>: Cost 3 vzipr <3,4,5,0>, LHS
+ 2786164940U, // <5,2,0,4>: Cost 3 vuzpl <5,7,2,6>, <0,2,4,6>
+ 2782158977U, // <5,2,0,5>: Cost 3 vuzpl <5,1,2,3>, <0,1,5,3>
+ 3185942530U, // <5,2,0,6>: Cost 3 ins <5,2,u,6>, lane 2
+ 3114658883U, // <5,2,0,7>: Cost 3 vtrnr <4,5,6,0>, <4,2,6,7>
+ 2129494016U, // <5,2,0,u>: Cost 2 ins <u,2,0,2>, lane 0
+ 3054503590U, // <5,2,1,0>: Cost 3 vtrnl <5,7,1,3>, <2,3,0,1>
+ 3203301376U, // <5,2,1,1>: Cost 3 ins <u,2,1,1>, lane 0
+ 2982363156U, // <5,2,1,2>: Cost 3 vzipr <4,u,5,1>, <0,0,2,2>
+ 1908621414U, // <5,2,1,3>: Cost 2 vzipr <4,u,5,1>, LHS
+ 3054503630U, // <5,2,1,4>: Cost 3 vtrnl <5,7,1,3>, <2,3,4,5>
+ 2601390208U, // <5,2,1,5>: Cost 3 vext1 <u,5,2,1>, <5,7,1,3>
+ 2982363484U, // <5,2,1,6>: Cost 3 vzipr <4,u,5,1>, <0,4,2,6>
+ 3189415937U, // <5,2,1,7>: Cost 3 ins <5,u,1,7>, lane 1
+ 1908621419U, // <5,2,1,u>: Cost 2 vzipr <4,u,5,1>, LHS
+ 3203366912U, // <5,2,2,0>: Cost 3 ins <u,2,2,0>, lane 0
+ 3203375104U, // <5,2,2,1>: Cost 3 ins <u,2,2,1>, lane 0
+ 2129641472U, // <5,2,2,2>: Cost 2 ins <u,2,2,2>, lane 0
+ 2129649664U, // <5,2,2,3>: Cost 2 ins <u,2,2,3>, lane 0
+ 2697963133U, // <5,2,2,4>: Cost 3 vext3 <2,2,4,5>, <2,2,4,5>
+ 2698036870U, // <5,2,2,5>: Cost 3 vext3 <2,2,5,5>, <2,2,5,5>
+ 3189481473U, // <5,2,2,6>: Cost 3 ins <5,u,2,6>, lane 1
+ 2846239811U, // <5,2,2,7>: Cost 3 vuzpr <4,5,6,2>, <4,2,6,7>
+ 2129641472U, // <5,2,2,u>: Cost 2 ins <u,2,2,2>, lane 0
+ 2129698816U, // <5,2,3,0>: Cost 2 ins <u,2,3,0>, lane 0
+ 2698405555U, // <5,2,3,1>: Cost 3 vext3 <2,3,1,5>, <2,3,1,5>
+ 2577516238U, // <5,2,3,2>: Cost 3 vext1 <4,5,2,3>, <2,3,4,5>
+ 2129723392U, // <5,2,3,3>: Cost 2 ins <u,2,3,3>, lane 0
+ 1624884942U, // <5,2,3,4>: Cost 2 vext3 <2,3,4,5>, <2,3,4,5>
+ 2717943511U, // <5,2,3,5>: Cost 3 vext3 <5,5,5,5>, <2,3,5,5>
+ 3203489792U, // <5,2,3,6>: Cost 3 ins <u,2,3,6>, lane 0
+ 2827879424U, // <5,2,3,7>: Cost 3 vuzpr <1,5,0,2>, <1,3,5,7>
+ 1625179890U, // <5,2,3,u>: Cost 2 vext3 <2,3,u,5>, <2,3,u,5>
+ 3203514368U, // <5,2,4,0>: Cost 3 ins <u,2,4,0>, lane 0
+ 3189587969U, // <5,2,4,1>: Cost 3 ins <5,u,4,1>, lane 1
+ 2699142925U, // <5,2,4,2>: Cost 3 vext3 <2,4,2,5>, <2,4,2,5>
+ 2698626838U, // <5,2,4,3>: Cost 3 vext3 <2,3,4,5>, <2,4,3,5>
+ 3203547136U, // <5,2,4,4>: Cost 3 ins <u,2,4,4>, lane 0
+ 2628914486U, // <5,2,4,5>: Cost 3 vext2 <1,u,5,2>, RHS
+ 2129821696U, // <5,2,4,6>: Cost 2 ins <u,2,4,6>, lane 0
+ 2846239973U, // <5,2,4,7>: Cost 3 vuzpr <4,5,6,2>, <4,4,6,7>
+ 2129821696U, // <5,2,4,u>: Cost 2 ins <u,2,4,6>, lane 0
+ 3053487782U, // <5,2,5,0>: Cost 3 vtrnl <5,5,5,5>, <2,3,0,1>
+ 3203596288U, // <5,2,5,1>: Cost 3 ins <u,2,5,1>, lane 0
+ 1772498225U, // <5,2,5,2>: Cost 2 vuzpr <4,5,6,2>, <4,5,6,2>
+ 1908654182U, // <5,2,5,3>: Cost 2 vzipr <4,u,5,5>, LHS
+ 3053487822U, // <5,2,5,4>: Cost 3 vtrnl <5,5,5,5>, <2,3,4,5>
+ 2115952641U, // <5,2,5,5>: Cost 2 ins <5,u,5,5>, lane 1
+ 2982396252U, // <5,2,5,6>: Cost 3 vzipr <4,u,5,5>, <0,4,2,6>
+ 2115969025U, // <5,2,5,7>: Cost 2 ins <5,u,5,7>, lane 1
+ 1908654187U, // <5,2,5,u>: Cost 2 vzipr <4,u,5,5>, LHS
+ 3203661824U, // <5,2,6,0>: Cost 3 ins <u,2,6,0>, lane 0
+ 3189735425U, // <5,2,6,1>: Cost 3 ins <5,u,6,1>, lane 1
+ 2973777940U, // <5,2,6,2>: Cost 3 vzipr <3,4,5,6>, <0,0,2,2>
+ 1900036198U, // <5,2,6,3>: Cost 2 vzipr <3,4,5,6>, LHS
+ 2700617665U, // <5,2,6,4>: Cost 3 vext3 <2,6,4,5>, <2,6,4,5>
+ 2973778186U, // <5,2,6,5>: Cost 3 vzipr <3,4,5,6>, <0,3,2,5>
+ 2973778268U, // <5,2,6,6>: Cost 3 vzipr <3,4,5,6>, <0,4,2,6>
+ 2129977344U, // <5,2,6,7>: Cost 2 ins <u,2,6,7>, lane 0
+ 1900036203U, // <5,2,6,u>: Cost 2 vzipr <3,4,5,6>, LHS
+ 2040972182U, // <5,2,7,0>: Cost 2 vtrnr RHS, <1,2,3,0>
+ 3114713251U, // <5,2,7,1>: Cost 3 vtrnr RHS, <0,2,0,1>
+ 2040971428U, // <5,2,7,2>: Cost 2 vtrnr RHS, <0,2,0,2>
+ 1887436902U, // <5,2,7,3>: Cost 2 vzipr <1,3,5,7>, LHS
+ 2040972186U, // <5,2,7,4>: Cost 2 vtrnr RHS, <1,2,3,4>
+ 2961178728U, // <5,2,7,5>: Cost 3 vzipr <1,3,5,7>, <0,1,2,5>
+ 2040971468U, // <5,2,7,6>: Cost 2 vtrnr RHS, <0,2,4,6>
+ 2116116481U, // <5,2,7,7>: Cost 2 ins <5,u,7,7>, lane 1
+ 1887436907U, // <5,2,7,u>: Cost 2 vzipr <1,3,5,7>, LHS
+ 2040980374U, // <5,2,u,0>: Cost 2 vtrnr RHS, <1,2,3,0>
+ 2628917038U, // <5,2,u,1>: Cost 3 vext2 <1,u,5,2>, LHS
+ 2040979620U, // <5,2,u,2>: Cost 2 vtrnr RHS, <0,2,0,2>
+ 1887445094U, // <5,2,u,3>: Cost 2 vzipr <1,3,5,u>, LHS
+ 1628203107U, // <5,2,u,4>: Cost 2 vext3 <2,u,4,5>, <2,u,4,5>
+ 2115952641U, // <5,2,u,5>: Cost 2 ins <5,u,5,5>, lane 1
+ 2040979660U, // <5,2,u,6>: Cost 2 vtrnr RHS, <0,2,4,6>
+ 2115969025U, // <5,2,u,7>: Cost 2 ins <5,u,5,7>, lane 1
+ 1887445099U, // <5,2,u,u>: Cost 2 vzipr <1,3,5,u>, LHS
+ 3203883008U, // <5,3,0,0>: Cost 3 ins <u,3,0,0>, lane 0
+ 2130149376U, // <5,3,0,1>: Cost 2 ins <u,3,0,1>, lane 0
+ 2782904422U, // <5,3,0,2>: Cost 3 vuzpl <5,2,3,4>, LHS
+ 3186581506U, // <5,3,0,3>: Cost 3 ins <5,3,u,3>, lane 2
+ 2687125680U, // <5,3,0,4>: Cost 3 vext3 <0,4,1,5>, <3,0,4,1>
+ 3053750786U, // <5,3,0,5>: Cost 3 vtrnl <5,6,0,1>, <3,4,5,6>
+ 2618302971U, // <5,3,0,6>: Cost 3 vext2 <0,1,5,3>, <0,6,2,3>
+ 2236344927U, // <5,3,0,7>: Cost 3 vrev <3,5,7,0>
+ 2130149376U, // <5,3,0,u>: Cost 2 ins <u,3,0,1>, lane 0
+ 2982364054U, // <5,3,1,0>: Cost 3 vzipr <4,u,5,1>, <1,2,3,0>
+ 3054504086U, // <5,3,1,1>: Cost 3 vtrnl <5,7,1,3>, <3,0,1,2>
+ 2624938923U, // <5,3,1,2>: Cost 3 vext2 <1,2,5,3>, <1,2,5,3>
+ 2130239488U, // <5,3,1,3>: Cost 2 ins <u,3,1,3>, lane 0
+ 2982364058U, // <5,3,1,4>: Cost 3 vzipr <4,u,5,1>, <1,2,3,4>
+ 2636219536U, // <5,3,1,5>: Cost 3 vext2 <3,1,5,3>, <1,5,3,7>
+ 3189407745U, // <5,3,1,6>: Cost 3 ins <5,u,1,6>, lane 1
+ 2964448400U, // <5,3,1,7>: Cost 3 vzipr <1,u,5,1>, <1,5,3,7>
+ 2130239488U, // <5,3,1,u>: Cost 2 ins <u,3,1,3>, lane 0
+ 2235845154U, // <5,3,2,0>: Cost 3 vrev <3,5,0,2>
+ 3204038656U, // <5,3,2,1>: Cost 3 ins <u,3,2,1>, lane 0
+ 3204046848U, // <5,3,2,2>: Cost 3 ins <u,3,2,2>, lane 0
+ 2130313216U, // <5,3,2,3>: Cost 2 ins <u,3,2,3>, lane 0
+ 2703935830U, // <5,3,2,4>: Cost 3 vext3 <3,2,4,5>, <3,2,4,5>
+ 2698627422U, // <5,3,2,5>: Cost 3 vext3 <2,3,4,5>, <3,2,5,4>
+ 3204079616U, // <5,3,2,6>: Cost 3 ins <u,3,2,6>, lane 0
+ 3096314880U, // <5,3,2,7>: Cost 3 vtrnr <1,5,0,2>, <1,3,5,7>
+ 2130313216U, // <5,3,2,u>: Cost 2 ins <u,3,2,3>, lane 0
+ 3204104192U, // <5,3,3,0>: Cost 3 ins <u,3,3,0>, lane 0
+ 2636220684U, // <5,3,3,1>: Cost 3 vext2 <3,1,5,3>, <3,1,5,3>
+ 3204120576U, // <5,3,3,2>: Cost 3 ins <u,3,3,2>, lane 0
+ 2130386944U, // <5,3,3,3>: Cost 2 ins <u,3,3,3>, lane 0
+ 2704599463U, // <5,3,3,4>: Cost 3 vext3 <3,3,4,5>, <3,3,4,5>
+ 2704673200U, // <5,3,3,5>: Cost 3 vext3 <3,3,5,5>, <3,3,5,5>
+ 3189555201U, // <5,3,3,6>: Cost 3 ins <5,u,3,6>, lane 1
+ 2971763856U, // <5,3,3,7>: Cost 3 vzipr <3,1,5,3>, <1,5,3,7>
+ 2130386944U, // <5,3,3,u>: Cost 2 ins <u,3,3,3>, lane 0
+ 2704968148U, // <5,3,4,0>: Cost 3 vext3 <3,4,0,5>, <3,4,0,5>
+ 2642193381U, // <5,3,4,1>: Cost 3 vext2 <4,1,5,3>, <4,1,5,3>
+ 2642857014U, // <5,3,4,2>: Cost 3 vext2 <4,2,5,3>, <4,2,5,3>
+ 2705189359U, // <5,3,4,3>: Cost 3 vext3 <3,4,3,5>, <3,4,3,5>
+ 2705263096U, // <5,3,4,4>: Cost 3 vext3 <3,4,4,5>, <3,4,4,5>
+ 2130477056U, // <5,3,4,5>: Cost 2 ins <u,3,4,5>, lane 0
+ 2846247426U, // <5,3,4,6>: Cost 3 vuzpr <4,5,6,3>, <3,4,5,6>
+ 2236377699U, // <5,3,4,7>: Cost 3 vrev <3,5,7,4>
+ 2130477056U, // <5,3,4,u>: Cost 2 ins <u,3,4,5>, lane 0
+ 2571632742U, // <5,3,5,0>: Cost 3 vext1 <3,5,3,5>, LHS
+ 3053488278U, // <5,3,5,1>: Cost 3 vtrnl <5,5,5,5>, <3,0,1,2>
+ 2571634382U, // <5,3,5,2>: Cost 3 vext1 <3,5,3,5>, <2,3,4,5>
+ 1748320682U, // <5,3,5,3>: Cost 2 vuzpr <0,5,2,3>, <0,5,2,3>
+ 2571636022U, // <5,3,5,4>: Cost 3 vext1 <3,5,3,5>, RHS
+ 2115952641U, // <5,3,5,5>: Cost 2 ins <5,u,5,5>, lane 1
+ 3204300800U, // <5,3,5,6>: Cost 3 ins <u,3,5,6>, lane 0
+ 2130567168U, // <5,3,5,7>: Cost 2 ins <u,3,5,7>, lane 0
+ 2130567168U, // <5,3,5,u>: Cost 2 ins <u,3,5,7>, lane 0
+ 2565668966U, // <5,3,6,0>: Cost 3 vext1 <2,5,3,6>, LHS
+ 3204333568U, // <5,3,6,1>: Cost 3 ins <u,3,6,1>, lane 0
+ 2565670760U, // <5,3,6,2>: Cost 3 vext1 <2,5,3,6>, <2,5,3,6>
+ 2565671426U, // <5,3,6,3>: Cost 3 vext1 <2,5,3,6>, <3,4,5,6>
+ 2565672246U, // <5,3,6,4>: Cost 3 vext1 <2,5,3,6>, RHS
+ 2973778114U, // <5,3,6,5>: Cost 3 vzipr <3,4,5,6>, <0,2,3,5>
+ 2973779816U, // <5,3,6,6>: Cost 3 vzipr <3,4,5,6>, <2,5,3,6>
+ 2130640896U, // <5,3,6,7>: Cost 2 ins <u,3,6,7>, lane 0
+ 2130640896U, // <5,3,6,u>: Cost 2 ins <u,3,6,7>, lane 0
+ 1485963366U, // <5,3,7,0>: Cost 2 vext1 <1,5,3,7>, LHS
+ 1485964432U, // <5,3,7,1>: Cost 2 vext1 <1,5,3,7>, <1,5,3,7>
+ 2961179382U, // <5,3,7,2>: Cost 3 vzipr <1,3,5,7>, <1,0,3,2>
+ 2040972248U, // <5,3,7,3>: Cost 2 vtrnr RHS, <1,3,1,3>
+ 1485966646U, // <5,3,7,4>: Cost 2 vext1 <1,5,3,7>, RHS
+ 2040973006U, // <5,3,7,5>: Cost 2 vtrnr RHS, <2,3,4,5>
+ 2116108289U, // <5,3,7,6>: Cost 2 ins <5,u,7,6>, lane 1
+ 2040972288U, // <5,3,7,7>: Cost 2 vtrnr RHS, <1,3,5,7>
+ 1485969198U, // <5,3,7,u>: Cost 2 vext1 <1,5,3,7>, LHS
+ 1485971558U, // <5,3,u,0>: Cost 2 vext1 <1,5,3,u>, LHS
+ 1485972625U, // <5,3,u,1>: Cost 2 vext1 <1,5,3,u>, <1,5,3,u>
+ 2961187574U, // <5,3,u,2>: Cost 3 vzipr <1,3,5,u>, <1,0,3,2>
+ 2040980440U, // <5,3,u,3>: Cost 2 vtrnr RHS, <1,3,1,3>
+ 1485974838U, // <5,3,u,4>: Cost 2 vext1 <1,5,3,u>, RHS
+ 2040981198U, // <5,3,u,5>: Cost 2 vtrnr RHS, <2,3,4,5>
+ 2116108289U, // <5,3,u,6>: Cost 2 ins <5,u,7,6>, lane 1
+ 2040980480U, // <5,3,u,7>: Cost 2 vtrnr RHS, <1,3,5,7>
+ 1485977390U, // <5,3,u,u>: Cost 2 vext1 <1,5,3,u>, LHS
+ 3189284865U, // <5,4,0,0>: Cost 3 ins <5,u,0,0>, lane 1
+ 2113544197U, // <5,4,0,1>: Cost 2 ins <5,4,u,u>, lane 5
+ 2781626470U, // <5,4,0,2>: Cost 3 vuzpl <5,0,4,1>, LHS
+ 2242022676U, // <5,4,0,3>: Cost 3 vrev <4,5,3,0>
+ 2642198866U, // <5,4,0,4>: Cost 3 vext2 <4,1,5,4>, <0,4,1,5>
+ 2687126418U, // <5,4,0,5>: Cost 3 vext3 <0,4,1,5>, <4,0,5,1>
+ 2113527810U, // <5,4,0,6>: Cost 2 ins <5,4,u,6>, lane 2
+ 3114659045U, // <5,4,0,7>: Cost 3 vtrnr <4,5,6,0>, <4,4,6,7>
+ 2113544197U, // <5,4,0,u>: Cost 2 ins <5,4,u,u>, lane 5
+ 1168067834U, // <5,4,1,0>: Cost 2 vrev <4,5,0,1>
+ 3189366785U, // <5,4,1,1>: Cost 3 ins <5,u,1,1>, lane 1
+ 3204636672U, // <5,4,1,2>: Cost 3 ins <u,4,1,2>, lane 0
+ 2115641345U, // <5,4,1,3>: Cost 2 ins <5,u,1,3>, lane 1
+ 2982366416U, // <5,4,1,4>: Cost 3 vzipr <4,u,5,1>, <4,4,4,4>
+ 1843006774U, // <5,4,1,5>: Cost 2 vzipl <5,1,7,3>, RHS
+ 1980763446U, // <5,4,1,6>: Cost 2 vtrnl <5,7,1,3>, RHS
+ 3189415937U, // <5,4,1,7>: Cost 3 ins <5,u,1,7>, lane 1
+ 1843007017U, // <5,4,1,u>: Cost 2 vzipl <5,1,7,3>, RHS
+ 3204694016U, // <5,4,2,0>: Cost 3 ins <u,4,2,0>, lane 0
+ 2241891588U, // <5,4,2,1>: Cost 3 vrev <4,5,1,2>
+ 3189448705U, // <5,4,2,2>: Cost 3 ins <5,u,2,2>, lane 1
+ 2113544197U, // <5,4,2,3>: Cost 2 ins <5,4,u,u>, lane 5
+ 3204726784U, // <5,4,2,4>: Cost 3 ins <u,4,2,4>, lane 0
+ 2973746894U, // <5,4,2,5>: Cost 3 vzipr <3,4,5,2>, <2,3,4,5>
+ 2131001344U, // <5,4,2,6>: Cost 2 ins <u,4,2,6>, lane 0
+ 3114675429U, // <5,4,2,7>: Cost 3 vtrnr <4,5,6,2>, <4,4,6,7>
+ 2113544197U, // <5,4,2,u>: Cost 2 ins <5,4,u,u>, lane 5
+ 3204767744U, // <5,4,3,0>: Cost 3 ins <u,4,3,0>, lane 0
+ 2241899781U, // <5,4,3,1>: Cost 3 vrev <4,5,1,3>
+ 1168231694U, // <5,4,3,2>: Cost 2 vrev <4,5,2,3>
+ 3189530625U, // <5,4,3,3>: Cost 3 ins <5,u,3,3>, lane 1
+ 2638219776U, // <5,4,3,4>: Cost 3 vext2 <3,4,5,4>, <3,4,5,4>
+ 2978399950U, // <5,4,3,5>: Cost 3 vzipr <4,2,5,3>, <2,3,4,5>
+ 2113527810U, // <5,4,3,6>: Cost 2 ins <5,4,u,6>, lane 2
+ 2840355840U, // <5,4,3,7>: Cost 3 vuzpr <3,5,7,4>, <1,3,5,7>
+ 2113527810U, // <5,4,3,u>: Cost 2 ins <5,4,u,6>, lane 2
+ 2918763410U, // <5,4,4,0>: Cost 3 vzipl <5,4,7,6>, <4,0,5,1>
+ 2642201574U, // <5,4,4,1>: Cost 3 vext2 <4,1,5,4>, <4,1,5,4>
+ 3186991107U, // <5,4,4,2>: Cost 3 ins <5,4,4,u>, lane 3
+ 3186991107U, // <5,4,4,3>: Cost 3 ins <5,4,4,u>, lane 3
+ 2131132416U, // <5,4,4,4>: Cost 2 ins <u,4,4,4>, lane 0
+ 1845022006U, // <5,4,4,5>: Cost 2 vzipl <5,4,7,6>, RHS
+ 2113527810U, // <5,4,4,6>: Cost 2 ins <5,4,u,6>, lane 2
+ 2646183372U, // <5,4,4,7>: Cost 3 vext2 <4,7,5,4>, <4,7,5,4>
+ 1845022249U, // <5,4,4,u>: Cost 2 vzipl <5,4,7,6>, RHS
+ 1503936614U, // <5,4,5,0>: Cost 2 vext1 <4,5,4,5>, LHS
+ 2559763607U, // <5,4,5,1>: Cost 3 vext1 <1,5,4,5>, <1,5,4,5>
+ 2698628366U, // <5,4,5,2>: Cost 3 vext3 <2,3,4,5>, <4,5,2,3>
+ 3189678081U, // <5,4,5,3>: Cost 3 ins <5,u,5,3>, lane 1
+ 1168395554U, // <5,4,5,4>: Cost 2 vrev <4,5,4,5>
+ 1845529910U, // <5,4,5,5>: Cost 2 vzipl <5,5,5,5>, RHS
+ 1613385014U, // <5,4,5,6>: Cost 2 vext3 <0,4,1,5>, RHS
+ 2115969025U, // <5,4,5,7>: Cost 2 ins <5,u,5,7>, lane 1
+ 1613385032U, // <5,4,5,u>: Cost 2 vext3 <0,4,1,5>, RHS
+ 2559770726U, // <5,4,6,0>: Cost 3 vext1 <1,5,4,6>, LHS
+ 2559771800U, // <5,4,6,1>: Cost 3 vext1 <1,5,4,6>, <1,5,4,6>
+ 3189743617U, // <5,4,6,2>: Cost 3 ins <5,u,6,2>, lane 1
+ 2571717194U, // <5,4,6,3>: Cost 3 vext1 <3,5,4,6>, <3,5,4,6>
+ 2559774006U, // <5,4,6,4>: Cost 3 vext1 <1,5,4,6>, RHS
+ 1846299958U, // <5,4,6,5>: Cost 2 vzipl <5,6,7,0>, RHS
+ 2131296256U, // <5,4,6,6>: Cost 2 ins <u,4,6,6>, lane 0
+ 2113544197U, // <5,4,6,7>: Cost 2 ins <5,4,u,u>, lane 5
+ 1846300201U, // <5,4,6,u>: Cost 2 vzipl <5,6,7,0>, RHS
+ 2116059137U, // <5,4,7,0>: Cost 2 ins <5,u,7,0>, lane 1
+ 2113470467U, // <5,4,7,1>: Cost 2 ins <5,4,7,u>, lane 3
+ 2113470467U, // <5,4,7,2>: Cost 2 ins <5,4,7,u>, lane 3
+ 2116083713U, // <5,4,7,3>: Cost 2 ins <5,u,7,3>, lane 1
+ 2040974544U, // <5,4,7,4>: Cost 2 vtrnr RHS, <4,4,4,4>
+ 2040971602U, // <5,4,7,5>: Cost 2 vtrnr RHS, <0,4,1,5>
+ 94817590U, // <5,4,7,6>: Cost 1 vrev RHS
+ 2116116481U, // <5,4,7,7>: Cost 2 ins <5,u,7,7>, lane 1
+ 94965064U, // <5,4,7,u>: Cost 1 vrev RHS
+ 2116059137U, // <5,4,u,0>: Cost 2 ins <5,u,7,0>, lane 1
+ 2113544197U, // <5,4,u,1>: Cost 2 ins <5,4,u,u>, lane 5
+ 2113470467U, // <5,4,u,2>: Cost 2 ins <5,4,7,u>, lane 3
+ 2115641345U, // <5,4,u,3>: Cost 2 ins <5,u,1,3>, lane 1
+ 2040982736U, // <5,4,u,4>: Cost 2 vtrnr RHS, <4,4,4,4>
+ 2040979794U, // <5,4,u,5>: Cost 2 vtrnr RHS, <0,4,1,5>
+ 94825783U, // <5,4,u,6>: Cost 1 vrev RHS
+ 2115969025U, // <5,4,u,7>: Cost 2 ins <5,u,5,7>, lane 1
+ 94973257U, // <5,4,u,u>: Cost 1 vrev RHS
+ 2040917295U, // <5,5,0,0>: Cost 2 vtrnr <4,5,6,0>, <4,5,6,0>
+ 1573109862U, // <5,5,0,1>: Cost 2 vext2 <4,u,5,5>, LHS
+ 1711308902U, // <5,5,0,2>: Cost 2 vuzpl <5,5,5,5>, LHS
+ 3187908610U, // <5,5,0,3>: Cost 3 ins <5,5,u,3>, lane 2
+ 2687127138U, // <5,5,0,4>: Cost 3 vext3 <0,4,1,5>, <5,0,4,1>
+ 2114183170U, // <5,5,0,5>: Cost 2 ins <5,5,u,5>, lane 2
+ 3187933186U, // <5,5,0,6>: Cost 3 ins <5,5,u,6>, lane 2
+ 2114199554U, // <5,5,0,7>: Cost 2 ins <5,5,u,7>, lane 2
+ 1573110429U, // <5,5,0,u>: Cost 2 vext2 <4,u,5,5>, LHS
+ 2646852342U, // <5,5,1,0>: Cost 3 vext2 <4,u,5,5>, <1,0,3,2>
+ 1908624922U, // <5,5,1,1>: Cost 2 vzipr <4,u,5,1>, <4,u,5,1>
+ 2646852502U, // <5,5,1,2>: Cost 3 vext2 <4,u,5,5>, <1,2,3,0>
+ 1778417766U, // <5,5,1,3>: Cost 2 vuzpr <5,5,5,5>, LHS
+ 2715217591U, // <5,5,1,4>: Cost 3 vext3 <5,1,4,5>, <5,1,4,5>
+ 2114183170U, // <5,5,1,5>: Cost 2 ins <5,5,u,5>, lane 2
+ 2982365698U, // <5,5,1,6>: Cost 3 vzipr <4,u,5,1>, <3,4,5,6>
+ 2114199554U, // <5,5,1,7>: Cost 2 ins <5,5,u,7>, lane 2
+ 1778417771U, // <5,5,1,u>: Cost 2 vuzpr <5,5,5,5>, LHS
+ 2785052326U, // <5,5,2,0>: Cost 3 vuzpl <5,5,5,5>, <2,3,0,1>
+ 3205365760U, // <5,5,2,1>: Cost 3 ins <u,5,2,1>, lane 0
+ 2040933681U, // <5,5,2,2>: Cost 2 vtrnr <4,5,6,2>, <4,5,6,2>
+ 2114207749U, // <5,5,2,3>: Cost 2 ins <5,5,u,u>, lane 5
+ 2785052366U, // <5,5,2,4>: Cost 3 vuzpl <5,5,5,5>, <2,3,4,5>
+ 2114183170U, // <5,5,2,5>: Cost 2 ins <5,5,u,5>, lane 2
+ 2646853562U, // <5,5,2,6>: Cost 3 vext2 <4,u,5,5>, <2,6,3,7>
+ 2114199554U, // <5,5,2,7>: Cost 2 ins <5,5,u,7>, lane 2
+ 2114207749U, // <5,5,2,u>: Cost 2 ins <5,5,u,u>, lane 5
+ 2646853782U, // <5,5,3,0>: Cost 3 vext2 <4,u,5,5>, <3,0,1,2>
+ 2785052822U, // <5,5,3,1>: Cost 3 vuzpl <5,5,5,5>, <3,0,1,2>
+ 3187900418U, // <5,5,3,2>: Cost 3 ins <5,5,u,2>, lane 2
+ 1880105089U, // <5,5,3,3>: Cost 2 vzipr <0,1,5,3>, <0,1,5,3>
+ 2646854146U, // <5,5,3,4>: Cost 3 vext2 <4,u,5,5>, <3,4,5,6>
+ 2114183170U, // <5,5,3,5>: Cost 2 ins <5,5,u,5>, lane 2
+ 3205480448U, // <5,5,3,6>: Cost 3 ins <u,5,3,6>, lane 0
+ 2131746816U, // <5,5,3,7>: Cost 2 ins <u,5,3,7>, lane 0
+ 2131746816U, // <5,5,3,u>: Cost 2 ins <u,5,3,7>, lane 0
+ 2646854546U, // <5,5,4,0>: Cost 3 vext2 <4,u,5,5>, <4,0,5,1>
+ 2716987279U, // <5,5,4,1>: Cost 3 vext3 <5,4,1,5>, <5,4,1,5>
+ 3187900418U, // <5,5,4,2>: Cost 3 ins <5,5,u,2>, lane 2
+ 3187908610U, // <5,5,4,3>: Cost 3 ins <5,5,u,3>, lane 2
+ 1845022662U, // <5,5,4,4>: Cost 2 vzipl <5,4,7,6>, <5,4,7,6>
+ 1573113142U, // <5,5,4,5>: Cost 2 vext2 <4,u,5,5>, RHS
+ 1711312182U, // <5,5,4,6>: Cost 2 vuzpl <5,5,5,5>, RHS
+ 2114199554U, // <5,5,4,7>: Cost 2 ins <5,5,u,7>, lane 2
+ 1573113374U, // <5,5,4,u>: Cost 2 vext2 <4,u,5,5>, <4,u,5,5>
+ 1509982310U, // <5,5,5,0>: Cost 2 vext1 <5,5,5,5>, LHS
+ 2113986563U, // <5,5,5,1>: Cost 2 ins <5,5,5,u>, lane 3
+ 2113986563U, // <5,5,5,2>: Cost 2 ins <5,5,5,u>, lane 3
+ 2113986563U, // <5,5,5,3>: Cost 2 ins <5,5,5,u>, lane 3
+ 1509985590U, // <5,5,5,4>: Cost 2 vext1 <5,5,5,5>, RHS
+ 229035318U, // <5,5,5,5>: Cost 1 vdup1 RHS
+ 2113986563U, // <5,5,5,6>: Cost 2 ins <5,5,5,u>, lane 3
+ 1778421046U, // <5,5,5,7>: Cost 2 vuzpr <5,5,5,5>, RHS
+ 229035318U, // <5,5,5,u>: Cost 1 vdup1 RHS
+ 2131910656U, // <5,5,6,0>: Cost 2 ins <u,5,6,0>, lane 0
+ 2131918848U, // <5,5,6,1>: Cost 2 ins <u,5,6,1>, lane 0
+ 2131927040U, // <5,5,6,2>: Cost 2 ins <u,5,6,2>, lane 0
+ 2131935232U, // <5,5,6,3>: Cost 2 ins <u,5,6,3>, lane 0
+ 2131943424U, // <5,5,6,4>: Cost 2 ins <u,5,6,4>, lane 0
+ 2131951616U, // <5,5,6,5>: Cost 2 ins <u,5,6,5>, lane 0
+ 1900038658U, // <5,5,6,6>: Cost 2 vzipr <3,4,5,6>, <3,4,5,6>
+ 1058226176U, // <5,5,6,7>: Cost 1 ins RHS, lane 0
+ 1058226176U, // <5,5,6,u>: Cost 1 ins RHS, lane 0
+ 2116059137U, // <5,5,7,0>: Cost 2 ins <5,u,7,0>, lane 1
+ 2114134019U, // <5,5,7,1>: Cost 2 ins <5,5,7,u>, lane 3
+ 2114134019U, // <5,5,7,2>: Cost 2 ins <5,5,7,u>, lane 3
+ 2116083713U, // <5,5,7,3>: Cost 2 ins <5,u,7,3>, lane 1
+ 2116091905U, // <5,5,7,4>: Cost 2 ins <5,u,7,4>, lane 1
+ 2040975364U, // <5,5,7,5>: Cost 2 vtrnr RHS, <5,5,5,5>
+ 2116108289U, // <5,5,7,6>: Cost 2 ins <5,u,7,6>, lane 1
+ 967232822U, // <5,5,7,7>: Cost 1 vtrnr RHS, RHS
+ 967232823U, // <5,5,7,u>: Cost 1 vtrnr RHS, RHS
+ 1509982310U, // <5,5,u,0>: Cost 2 vext1 <5,5,5,5>, LHS
+ 1573115694U, // <5,5,u,1>: Cost 2 vext2 <4,u,5,5>, LHS
+ 1711314734U, // <5,5,u,2>: Cost 2 vuzpl <5,5,5,5>, LHS
+ 1778418333U, // <5,5,u,3>: Cost 2 vuzpr <5,5,5,5>, LHS
+ 1845022662U, // <5,5,u,4>: Cost 2 vzipl <5,4,7,6>, <5,4,7,6>
+ 229035318U, // <5,5,u,5>: Cost 1 vdup1 RHS
+ 1711315098U, // <5,5,u,6>: Cost 2 vuzpl <5,5,5,5>, RHS
+ 967241014U, // <5,5,u,7>: Cost 1 vtrnr RHS, RHS
+ 967241015U, // <5,5,u,u>: Cost 1 vtrnr RHS, RHS
+ 2114805762U, // <5,6,0,0>: Cost 2 ins <5,6,u,0>, lane 2
+ 1564491878U, // <5,6,0,1>: Cost 2 vext2 <3,4,5,6>, LHS
+ 2132148224U, // <5,6,0,2>: Cost 2 ins <u,6,0,2>, lane 0
+ 2638233856U, // <5,6,0,3>: Cost 3 vext2 <3,4,5,6>, <0,3,1,4>
+ 2114838530U, // <5,6,0,4>: Cost 2 ins <5,6,u,4>, lane 2
+ 3188588546U, // <5,6,0,5>: Cost 3 ins <5,6,u,5>, lane 2
+ 3188596738U, // <5,6,0,6>: Cost 3 ins <5,6,u,6>, lane 2
+ 2973732150U, // <5,6,0,7>: Cost 3 vzipr <3,4,5,0>, RHS
+ 1564492445U, // <5,6,0,u>: Cost 2 vext2 <3,4,5,6>, LHS
+ 2114805762U, // <5,6,1,0>: Cost 2 ins <5,6,u,0>, lane 2
+ 2638234420U, // <5,6,1,1>: Cost 3 vext2 <3,4,5,6>, <1,1,1,1>
+ 2638234518U, // <5,6,1,2>: Cost 3 vext2 <3,4,5,6>, <1,2,3,0>
+ 2115641345U, // <5,6,1,3>: Cost 2 ins <5,u,1,3>, lane 1
+ 2114838530U, // <5,6,1,4>: Cost 2 ins <5,6,u,4>, lane 2
+ 2638234768U, // <5,6,1,5>: Cost 3 vext2 <3,4,5,6>, <1,5,3,7>
+ 2982366436U, // <5,6,1,6>: Cost 3 vzipr <4,u,5,1>, <4,4,6,6>
+ 1908624694U, // <5,6,1,7>: Cost 2 vzipr <4,u,5,1>, RHS
+ 1908624695U, // <5,6,1,u>: Cost 2 vzipr <4,u,5,1>, RHS
+ 2114805762U, // <5,6,2,0>: Cost 2 ins <5,6,u,0>, lane 2
+ 3188555778U, // <5,6,2,1>: Cost 3 ins <5,6,u,1>, lane 2
+ 2638235240U, // <5,6,2,2>: Cost 3 vext2 <3,4,5,6>, <2,2,2,2>
+ 2114871301U, // <5,6,2,3>: Cost 2 ins <5,6,u,u>, lane 5
+ 2114838530U, // <5,6,2,4>: Cost 2 ins <5,6,u,4>, lane 2
+ 2638235496U, // <5,6,2,5>: Cost 3 vext2 <3,4,5,6>, <2,5,3,6>
+ 2638235578U, // <5,6,2,6>: Cost 3 vext2 <3,4,5,6>, <2,6,3,7>
+ 2964458806U, // <5,6,2,7>: Cost 3 vzipr <1,u,5,2>, RHS
+ 2114805762U, // <5,6,2,u>: Cost 2 ins <5,6,u,0>, lane 2
+ 2114805762U, // <5,6,3,0>: Cost 2 ins <5,6,u,0>, lane 2
+ 3206103040U, // <5,6,3,1>: Cost 3 ins <u,6,3,1>, lane 0
+ 3206111232U, // <5,6,3,2>: Cost 3 ins <u,6,3,2>, lane 0
+ 2638236060U, // <5,6,3,3>: Cost 3 vext2 <3,4,5,6>, <3,3,3,3>
+ 1564494338U, // <5,6,3,4>: Cost 2 vext2 <3,4,5,6>, <3,4,5,6>
+ 2783119874U, // <5,6,3,5>: Cost 3 vuzpl <5,2,6,3>, <3,4,5,6>
+ 3206144000U, // <5,6,3,6>: Cost 3 ins <u,6,3,6>, lane 0
+ 2132410368U, // <5,6,3,7>: Cost 2 ins <u,6,3,7>, lane 0
+ 1567148870U, // <5,6,3,u>: Cost 2 vext2 <3,u,5,6>, <3,u,5,6>
+ 2114805762U, // <5,6,4,0>: Cost 2 ins <5,6,u,0>, lane 2
+ 3189587969U, // <5,6,4,1>: Cost 3 ins <5,u,4,1>, lane 1
+ 2918765050U, // <5,6,4,2>: Cost 3 vzipl <5,4,7,6>, <6,2,7,3>
+ 2638236818U, // <5,6,4,3>: Cost 3 vext2 <3,4,5,6>, <4,3,6,5>
+ 2114838530U, // <5,6,4,4>: Cost 2 ins <5,6,u,4>, lane 2
+ 1564495158U, // <5,6,4,5>: Cost 2 vext2 <3,4,5,6>, RHS
+ 2132475904U, // <5,6,4,6>: Cost 2 ins <u,6,4,6>, lane 0
+ 2972437814U, // <5,6,4,7>: Cost 3 vzipr <3,2,5,4>, RHS
+ 1564495401U, // <5,6,4,u>: Cost 2 vext2 <3,4,5,6>, RHS
+ 2114805762U, // <5,6,5,0>: Cost 2 ins <5,6,u,0>, lane 2
+ 2662125264U, // <5,6,5,1>: Cost 3 vext2 <7,4,5,6>, <5,1,7,3>
+ 2982398876U, // <5,6,5,2>: Cost 3 vzipr <4,u,5,5>, <4,0,6,2>
+ 3189678081U, // <5,6,5,3>: Cost 3 ins <5,u,5,3>, lane 1
+ 2114838530U, // <5,6,5,4>: Cost 2 ins <5,6,u,4>, lane 2
+ 2115952641U, // <5,6,5,5>: Cost 2 ins <5,u,5,5>, lane 1
+ 1772530997U, // <5,6,5,6>: Cost 2 vuzpr <4,5,6,6>, <4,5,6,6>
+ 1908657462U, // <5,6,5,7>: Cost 2 vzipr <4,u,5,5>, RHS
+ 1908657463U, // <5,6,5,u>: Cost 2 vzipr <4,u,5,5>, RHS
+ 2114805762U, // <5,6,6,0>: Cost 2 ins <5,6,u,0>, lane 2
+ 3189735425U, // <5,6,6,1>: Cost 3 ins <5,u,6,1>, lane 1
+ 2920043002U, // <5,6,6,2>: Cost 3 vzipl <5,6,7,0>, <6,2,7,3>
+ 2973781298U, // <5,6,6,3>: Cost 3 vzipr <3,4,5,6>, <4,5,6,3>
+ 2114838530U, // <5,6,6,4>: Cost 2 ins <5,6,u,4>, lane 2
+ 2973781138U, // <5,6,6,5>: Cost 3 vzipr <3,4,5,6>, <4,3,6,5>
+ 2132623360U, // <5,6,6,6>: Cost 2 ins <u,6,6,6>, lane 0
+ 1900039478U, // <5,6,6,7>: Cost 2 vzipr <3,4,5,6>, RHS
+ 1900039479U, // <5,6,6,u>: Cost 2 vzipr <3,4,5,6>, RHS
+ 430358630U, // <5,6,7,0>: Cost 1 vext1 RHS, LHS
+ 1504101110U, // <5,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2>
+ 1504101992U, // <5,6,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
+ 1504102550U, // <5,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2>
+ 430361910U, // <5,6,7,4>: Cost 1 vext1 RHS, RHS
+ 1504104390U, // <5,6,7,5>: Cost 2 vext1 RHS, <5,4,7,6>
+ 1504105272U, // <5,6,7,6>: Cost 2 vext1 RHS, <6,6,6,6>
+ 1887440182U, // <5,6,7,7>: Cost 2 vzipr <1,3,5,7>, RHS
+ 430364462U, // <5,6,7,u>: Cost 1 vext1 RHS, LHS
+ 430366822U, // <5,6,u,0>: Cost 1 vext1 RHS, LHS
+ 1564497710U, // <5,6,u,1>: Cost 2 vext2 <3,4,5,6>, LHS
+ 1504110184U, // <5,6,u,2>: Cost 2 vext1 RHS, <2,2,2,2>
+ 1504110742U, // <5,6,u,3>: Cost 2 vext1 RHS, <3,0,1,2>
+ 430370103U, // <5,6,u,4>: Cost 1 vext1 RHS, RHS
+ 1564498074U, // <5,6,u,5>: Cost 2 vext2 <3,4,5,6>, RHS
+ 1504113146U, // <5,6,u,6>: Cost 2 vext1 RHS, <6,2,7,3>
+ 1887448374U, // <5,6,u,7>: Cost 2 vzipr <1,3,5,u>, RHS
+ 430372654U, // <5,6,u,u>: Cost 1 vext1 RHS, LHS
+ 1772535808U, // <5,7,0,0>: Cost 2 vuzpr RHS, <0,0,0,0>
+ 1551892582U, // <5,7,0,1>: Cost 2 vext2 <1,3,5,7>, LHS
+ 1772535828U, // <5,7,0,2>: Cost 2 vuzpr RHS, <0,0,2,2>
+ 2115493890U, // <5,7,0,3>: Cost 2 ins <5,7,u,3>, lane 2
+ 2625634642U, // <5,7,0,4>: Cost 3 vext2 <1,3,5,7>, <0,4,1,5>
+ 2846279860U, // <5,7,0,5>: Cost 3 vuzpr RHS, <3,0,4,5>
+ 2846277674U, // <5,7,0,6>: Cost 3 vuzpr RHS, <0,0,4,6>
+ 2115526658U, // <5,7,0,7>: Cost 2 ins <5,7,u,7>, lane 2
+ 1551893149U, // <5,7,0,u>: Cost 2 vext2 <1,3,5,7>, LHS
+ 2115018755U, // <5,7,1,0>: Cost 2 ins <5,7,1,u>, lane 3
+ 1772536628U, // <5,7,1,1>: Cost 2 vuzpr RHS, <1,1,1,1>
+ 2115018755U, // <5,7,1,2>: Cost 2 ins <5,7,1,u>, lane 3
+ 698794086U, // <5,7,1,3>: Cost 1 vuzpr RHS, LHS
+ 2115018755U, // <5,7,1,4>: Cost 2 ins <5,7,1,u>, lane 3
+ 2115018755U, // <5,7,1,5>: Cost 2 ins <5,7,1,u>, lane 3
+ 2115018755U, // <5,7,1,6>: Cost 2 ins <5,7,1,u>, lane 3
+ 2115526658U, // <5,7,1,7>: Cost 2 ins <5,7,u,7>, lane 2
+ 698794091U, // <5,7,1,u>: Cost 1 vuzpr RHS, LHS
+ 1772536726U, // <5,7,2,0>: Cost 2 vuzpr RHS, <1,2,3,0>
+ 2846277795U, // <5,7,2,1>: Cost 3 vuzpr RHS, <0,2,0,1>
+ 1772535972U, // <5,7,2,2>: Cost 2 vuzpr RHS, <0,2,0,2>
+ 1772537458U, // <5,7,2,3>: Cost 2 vuzpr RHS, <2,2,3,3>
+ 1772536730U, // <5,7,2,4>: Cost 2 vuzpr RHS, <1,2,3,4>
+ 2625636201U, // <5,7,2,5>: Cost 3 vext2 <1,3,5,7>, <2,5,3,7>
+ 1772536012U, // <5,7,2,6>: Cost 2 vuzpr RHS, <0,2,4,6>
+ 2115526658U, // <5,7,2,7>: Cost 2 ins <5,7,u,7>, lane 2
+ 1772535978U, // <5,7,2,u>: Cost 2 vuzpr RHS, <0,2,0,u>
+ 2625636502U, // <5,7,3,0>: Cost 3 vext2 <1,3,5,7>, <3,0,1,2>
+ 1772537510U, // <5,7,3,1>: Cost 2 vuzpr RHS, <2,3,0,1>
+ 2846278606U, // <5,7,3,2>: Cost 3 vuzpr RHS, <1,3,0,2>
+ 1772536792U, // <5,7,3,3>: Cost 2 vuzpr RHS, <1,3,1,3>
+ 2625636866U, // <5,7,3,4>: Cost 3 vext2 <1,3,5,7>, <3,4,5,6>
+ 1772537550U, // <5,7,3,5>: Cost 2 vuzpr RHS, <2,3,4,5>
+ 2846278628U, // <5,7,3,6>: Cost 3 vuzpr RHS, <1,3,2,6>
+ 1772536832U, // <5,7,3,7>: Cost 2 vuzpr RHS, <1,3,5,7>
+ 1772536797U, // <5,7,3,u>: Cost 2 vuzpr RHS, <1,3,1,u>
+ 2571919462U, // <5,7,4,0>: Cost 3 vext1 <3,5,7,4>, LHS
+ 2571920384U, // <5,7,4,1>: Cost 3 vext1 <3,5,7,4>, <1,3,5,7>
+ 2846277958U, // <5,7,4,2>: Cost 3 vuzpr RHS, <0,4,0,2>
+ 2115493890U, // <5,7,4,3>: Cost 2 ins <5,7,u,3>, lane 2
+ 1772539088U, // <5,7,4,4>: Cost 2 vuzpr RHS, <4,4,4,4>
+ 1551895862U, // <5,7,4,5>: Cost 2 vext2 <1,3,5,7>, RHS
+ 1772536156U, // <5,7,4,6>: Cost 2 vuzpr RHS, <0,4,2,6>
+ 2115526658U, // <5,7,4,7>: Cost 2 ins <5,7,u,7>, lane 2
+ 1551896105U, // <5,7,4,u>: Cost 2 vext2 <1,3,5,7>, RHS
+ 2115313667U, // <5,7,5,0>: Cost 2 ins <5,7,5,u>, lane 3
+ 2115313667U, // <5,7,5,1>: Cost 2 ins <5,7,5,u>, lane 3
+ 2115313667U, // <5,7,5,2>: Cost 2 ins <5,7,5,u>, lane 3
+ 2115493890U, // <5,7,5,3>: Cost 2 ins <5,7,u,3>, lane 2
+ 2115313667U, // <5,7,5,4>: Cost 2 ins <5,7,5,u>, lane 3
+ 1772539908U, // <5,7,5,5>: Cost 2 vuzpr RHS, <5,5,5,5>
+ 2115313667U, // <5,7,5,6>: Cost 2 ins <5,7,5,u>, lane 3
+ 698797366U, // <5,7,5,7>: Cost 1 vuzpr RHS, RHS
+ 698797367U, // <5,7,5,u>: Cost 1 vuzpr RHS, RHS
+ 1772540002U, // <5,7,6,0>: Cost 2 vuzpr RHS, <5,6,7,0>
+ 2846279577U, // <5,7,6,1>: Cost 3 vuzpr RHS, <2,6,0,1>
+ 1772539212U, // <5,7,6,2>: Cost 2 vuzpr RHS, <4,6,0,2>
+ 2115493890U, // <5,7,6,3>: Cost 2 ins <5,7,u,3>, lane 2
+ 1772540006U, // <5,7,6,4>: Cost 2 vuzpr RHS, <5,6,7,4>
+ 2846279617U, // <5,7,6,5>: Cost 3 vuzpr RHS, <2,6,4,5>
+ 1772539252U, // <5,7,6,6>: Cost 2 vuzpr RHS, <4,6,4,6>
+ 1772537786U, // <5,7,6,7>: Cost 2 vuzpr RHS, <2,6,3,7>
+ 1772537787U, // <5,7,6,u>: Cost 2 vuzpr RHS, <2,6,3,u>
+ 1510146150U, // <5,7,7,0>: Cost 2 vext1 <5,5,7,7>, LHS
+ 1772540750U, // <5,7,7,1>: Cost 2 vuzpr RHS, <6,7,0,1>
+ 2846281846U, // <5,7,7,2>: Cost 3 vuzpr RHS, <5,7,0,2>
+ 1772540032U, // <5,7,7,3>: Cost 2 vuzpr RHS, <5,7,1,3>
+ 1510149430U, // <5,7,7,4>: Cost 2 vext1 <5,5,7,7>, RHS
+ 1772540790U, // <5,7,7,5>: Cost 2 vuzpr RHS, <6,7,4,5>
+ 2116108289U, // <5,7,7,6>: Cost 2 ins <5,u,7,6>, lane 1
+ 1772540072U, // <5,7,7,7>: Cost 2 vuzpr RHS, <5,7,5,7>
+ 1772540037U, // <5,7,7,u>: Cost 2 vuzpr RHS, <5,7,1,u>
+ 1772537212U, // <5,7,u,0>: Cost 2 vuzpr RHS, <1,u,3,0>
+ 1551898414U, // <5,7,u,1>: Cost 2 vext2 <1,3,5,7>, LHS
+ 1772536458U, // <5,7,u,2>: Cost 2 vuzpr RHS, <0,u,0,2>
+ 698794653U, // <5,7,u,3>: Cost 1 vuzpr RHS, LHS
+ 1772537216U, // <5,7,u,4>: Cost 2 vuzpr RHS, <1,u,3,4>
+ 1551898778U, // <5,7,u,5>: Cost 2 vext2 <1,3,5,7>, RHS
+ 1772536480U, // <5,7,u,6>: Cost 2 vuzpr RHS, <0,u,2,6>
+ 698797609U, // <5,7,u,7>: Cost 1 vuzpr RHS, RHS
+ 698794658U, // <5,7,u,u>: Cost 1 vuzpr RHS, LHS
+ 1772544000U, // <5,u,0,0>: Cost 2 vuzpr RHS, <0,0,0,0>
+ 1551900774U, // <5,u,0,1>: Cost 2 vext2 <1,3,5,u>, LHS
+ 1772544020U, // <5,u,0,2>: Cost 2 vuzpr RHS, <0,0,2,2>
+ 2111512578U, // <5,u,0,3>: Cost 2 ins <5,1,u,3>, lane 2
+ 2114838530U, // <5,u,0,4>: Cost 2 ins <5,6,u,4>, lane 2
+ 2114183170U, // <5,u,0,5>: Cost 2 ins <5,5,u,5>, lane 2
+ 2113527810U, // <5,u,0,6>: Cost 2 ins <5,4,u,6>, lane 2
+ 2114199554U, // <5,u,0,7>: Cost 2 ins <5,5,u,7>, lane 2
+ 1551901341U, // <5,u,0,u>: Cost 2 vext2 <1,3,5,u>, LHS
+ 2114805762U, // <5,u,1,0>: Cost 2 ins <5,6,u,0>, lane 2
+ 1772544820U, // <5,u,1,1>: Cost 2 vuzpr RHS, <1,1,1,1>
+ 1613387566U, // <5,u,1,2>: Cost 2 vext3 <0,4,1,5>, LHS
+ 698802278U, // <5,u,1,3>: Cost 1 vuzpr RHS, LHS
+ 2114838530U, // <5,u,1,4>: Cost 2 ins <5,6,u,4>, lane 2
+ 1843009690U, // <5,u,1,5>: Cost 2 vzipl <5,1,7,3>, RHS
+ 1980766362U, // <5,u,1,6>: Cost 2 vtrnl <5,7,1,3>, RHS
+ 1908624712U, // <5,u,1,7>: Cost 2 vzipr <4,u,5,1>, RHS
+ 698802283U, // <5,u,1,u>: Cost 1 vuzpr RHS, LHS
+ 1772544918U, // <5,u,2,0>: Cost 2 vuzpr RHS, <1,2,3,0>
+ 2128969728U, // <5,u,2,1>: Cost 2 ins <u,1,2,1>, lane 0
+ 1772544164U, // <5,u,2,2>: Cost 2 vuzpr RHS, <0,2,0,2>
+ 1055244288U, // <5,u,2,3>: Cost 1 ins LHS, lane 0
+ 1772544922U, // <5,u,2,4>: Cost 2 vuzpr RHS, <1,2,3,4>
+ 2129002496U, // <5,u,2,5>: Cost 2 ins <u,1,2,5>, lane 0
+ 1772544204U, // <5,u,2,6>: Cost 2 vuzpr RHS, <0,2,4,6>
+ 2114199554U, // <5,u,2,7>: Cost 2 ins <5,5,u,7>, lane 2
+ 1055244288U, // <5,u,2,u>: Cost 1 ins LHS, lane 0
+ 2129698816U, // <5,u,3,0>: Cost 2 ins <u,2,3,0>, lane 0
+ 1772545702U, // <5,u,3,1>: Cost 2 vuzpr RHS, <2,3,0,1>
+ 2128388096U, // <5,u,3,2>: Cost 2 ins <u,0,3,2>, lane 0
+ 1772544984U, // <5,u,3,3>: Cost 2 vuzpr RHS, <1,3,1,3>
+ 1564510724U, // <5,u,3,4>: Cost 2 vext2 <3,4,5,u>, <3,4,5,u>
+ 1772545742U, // <5,u,3,5>: Cost 2 vuzpr RHS, <2,3,4,5>
+ 2113527810U, // <5,u,3,6>: Cost 2 ins <5,4,u,6>, lane 2
+ 1772545024U, // <5,u,3,7>: Cost 2 vuzpr RHS, <1,3,5,7>
+ 1567165256U, // <5,u,3,u>: Cost 2 vext2 <3,u,5,u>, <3,u,5,u>
+ 2114805762U, // <5,u,4,0>: Cost 2 ins <5,6,u,0>, lane 2
+ 1845024558U, // <5,u,4,1>: Cost 2 vzipl <5,4,7,6>, LHS
+ 2642897979U, // <5,u,4,2>: Cost 3 vext2 <4,2,5,u>, <4,2,5,u>
+ 2111512578U, // <5,u,4,3>: Cost 2 ins <5,1,u,3>, lane 2
+ 1772547280U, // <5,u,4,4>: Cost 2 vuzpr RHS, <4,4,4,4>
+ 1551904054U, // <5,u,4,5>: Cost 2 vext2 <1,3,5,u>, RHS
+ 1772544348U, // <5,u,4,6>: Cost 2 vuzpr RHS, <0,4,2,6>
+ 2114199554U, // <5,u,4,7>: Cost 2 ins <5,5,u,7>, lane 2
+ 1551904297U, // <5,u,4,u>: Cost 2 vext2 <1,3,5,u>, RHS
+ 1509982310U, // <5,u,5,0>: Cost 2 vext1 <5,5,5,5>, LHS
+ 1845532462U, // <5,u,5,1>: Cost 2 vzipl <5,5,5,5>, LHS
+ 1979750190U, // <5,u,5,2>: Cost 2 vtrnl <5,5,5,5>, LHS
+ 1908654236U, // <5,u,5,3>: Cost 2 vzipr <4,u,5,5>, LHS
+ 1509985590U, // <5,u,5,4>: Cost 2 vext1 <5,5,5,5>, RHS
+ 229035318U, // <5,u,5,5>: Cost 1 vdup1 RHS
+ 1613387930U, // <5,u,5,6>: Cost 2 vext3 <0,4,1,5>, RHS
+ 698805558U, // <5,u,5,7>: Cost 1 vuzpr RHS, RHS
+ 698805559U, // <5,u,5,u>: Cost 1 vuzpr RHS, RHS
+ 1772548194U, // <5,u,6,0>: Cost 2 vuzpr RHS, <5,6,7,0>
+ 1846302510U, // <5,u,6,1>: Cost 2 vzipl <5,6,7,0>, LHS
+ 1772547404U, // <5,u,6,2>: Cost 2 vuzpr RHS, <4,6,0,2>
+ 1900036252U, // <5,u,6,3>: Cost 2 vzipr <3,4,5,6>, LHS
+ 1772548198U, // <5,u,6,4>: Cost 2 vuzpr RHS, <5,6,7,4>
+ 1846302874U, // <5,u,6,5>: Cost 2 vzipl <5,6,7,0>, RHS
+ 1772547444U, // <5,u,6,6>: Cost 2 vuzpr RHS, <4,6,4,6>
+ 1058226176U, // <5,u,6,7>: Cost 1 ins RHS, lane 0
+ 1058226176U, // <5,u,6,u>: Cost 1 ins RHS, lane 0
+ 430506086U, // <5,u,7,0>: Cost 1 vext1 RHS, LHS
+ 1486333117U, // <5,u,7,1>: Cost 2 vext1 <1,5,u,7>, <1,5,u,7>
+ 2040971914U, // <5,u,7,2>: Cost 2 vtrnr RHS, <0,u,0,2>
+ 967230109U, // <5,u,7,3>: Cost 1 vtrnr RHS, LHS
+ 430509384U, // <5,u,7,4>: Cost 1 vext1 RHS, RHS
+ 2040971926U, // <5,u,7,5>: Cost 2 vtrnr RHS, <0,u,1,5>
+ 118708378U, // <5,u,7,6>: Cost 1 vrev RHS
+ 967233065U, // <5,u,7,7>: Cost 1 vtrnr RHS, RHS
+ 967230114U, // <5,u,7,u>: Cost 1 vtrnr RHS, LHS
+ 430514278U, // <5,u,u,0>: Cost 1 vext1 RHS, LHS
+ 1551906606U, // <5,u,u,1>: Cost 2 vext2 <1,3,5,u>, LHS
+ 1613388133U, // <5,u,u,2>: Cost 2 vext3 <0,4,1,5>, LHS
+ 698802845U, // <5,u,u,3>: Cost 1 vuzpr RHS, LHS
+ 430517577U, // <5,u,u,4>: Cost 1 vext1 RHS, RHS
+ 229035318U, // <5,u,u,5>: Cost 1 vdup1 RHS
+ 118716571U, // <5,u,u,6>: Cost 1 vrev RHS
+ 698805801U, // <5,u,u,7>: Cost 1 vuzpr RHS, RHS
+ 698802850U, // <5,u,u,u>: Cost 1 vuzpr RHS, LHS
+ 2128150528U, // <6,0,0,0>: Cost 2 ins <u,0,0,0>, lane 0
+ 2121523201U, // <6,0,0,1>: Cost 2 ins <6,u,0,1>, lane 1
+ 1718206566U, // <6,0,0,2>: Cost 2 vuzpl <6,7,0,1>, LHS
+ 2852933922U, // <6,0,0,3>: Cost 3 vuzpr <5,6,7,0>, <6,0,1,3>
+ 2642903388U, // <6,0,0,4>: Cost 3 vext2 <4,2,6,0>, <0,4,2,6>
+ 2852934680U, // <6,0,0,5>: Cost 3 vuzpr <5,6,7,0>, <7,0,4,5>
+ 2852934690U, // <6,0,0,6>: Cost 3 vuzpr <5,6,7,0>, <7,0,5,6>
+ 2852933962U, // <6,0,0,7>: Cost 3 vuzpr <5,6,7,0>, <6,0,5,7>
+ 1718206620U, // <6,0,0,u>: Cost 2 vuzpl <6,7,0,1>, LHS
+ 2566070374U, // <6,0,1,0>: Cost 3 vext1 <2,6,0,1>, LHS
+ 2128232448U, // <6,0,1,1>: Cost 2 ins <u,0,1,1>, lane 0
+ 1612284006U, // <6,0,1,2>: Cost 2 vext3 <0,2,4,6>, LHS
+ 1779187814U, // <6,0,1,3>: Cost 2 vuzpr <5,6,7,0>, LHS
+ 2566073654U, // <6,0,1,4>: Cost 3 vext1 <2,6,0,1>, RHS
+ 2583990308U, // <6,0,1,5>: Cost 3 vext1 <5,6,0,1>, <5,6,0,1>
+ 2589963005U, // <6,0,1,6>: Cost 3 vext1 <6,6,0,1>, <6,6,0,1>
+ 2791949566U, // <6,0,1,7>: Cost 3 vuzpl <6,7,0,1>, <1,6,7,0>
+ 1612284060U, // <6,0,1,u>: Cost 2 vext3 <0,2,4,6>, LHS
+ 1504280678U, // <6,0,2,0>: Cost 2 vext1 <4,6,0,2>, LHS
+ 1849639014U, // <6,0,2,1>: Cost 2 vzipl <6,2,7,3>, LHS
+ 2128314368U, // <6,0,2,2>: Cost 2 ins <u,0,2,2>, lane 0
+ 2128322560U, // <6,0,2,3>: Cost 2 ins <u,0,2,3>, lane 0
+ 1612284108U, // <6,0,2,4>: Cost 2 vext3 <0,2,4,6>, <0,2,4,6>
+ 2578026192U, // <6,0,2,5>: Cost 3 vext1 <4,6,0,2>, <5,1,7,3>
+ 2578026792U, // <6,0,2,6>: Cost 3 vext1 <4,6,0,2>, <6,0,2,0>
+ 2578027514U, // <6,0,2,7>: Cost 3 vext1 <4,6,0,2>, <7,0,1,2>
+ 1612579056U, // <6,0,2,u>: Cost 2 vext3 <0,2,u,6>, <0,2,u,6>
+ 3202113536U, // <6,0,3,0>: Cost 3 ins <u,0,3,0>, lane 0
+ 2705342720U, // <6,0,3,1>: Cost 3 vext3 <3,4,5,6>, <0,3,1,4>
+ 2128388096U, // <6,0,3,2>: Cost 2 ins <u,0,3,2>, lane 0
+ 2852930520U, // <6,0,3,3>: Cost 3 vuzpr <5,6,7,0>, <1,3,1,3>
+ 2644896258U, // <6,0,3,4>: Cost 3 vext2 <4,5,6,0>, <3,4,5,6>
+ 2852931278U, // <6,0,3,5>: Cost 3 vuzpr <5,6,7,0>, <2,3,4,5>
+ 3190587394U, // <6,0,3,6>: Cost 3 ins <6,0,u,6>, lane 2
+ 2852930560U, // <6,0,3,7>: Cost 3 vuzpr <5,6,7,0>, <1,3,5,7>
+ 2128388096U, // <6,0,3,u>: Cost 2 ins <u,0,3,2>, lane 0
+ 2687058250U, // <6,0,4,0>: Cost 3 vext3 <0,4,0,6>, <0,4,0,6>
+ 2686026066U, // <6,0,4,1>: Cost 3 vext3 <0,2,4,6>, <0,4,1,5>
+ 1613463900U, // <6,0,4,2>: Cost 2 vext3 <0,4,2,6>, <0,4,2,6>
+ 3195576321U, // <6,0,4,3>: Cost 3 ins <6,u,4,3>, lane 1
+ 2687353198U, // <6,0,4,4>: Cost 3 vext3 <0,4,4,6>, <0,4,4,6>
+ 2121850881U, // <6,0,4,5>: Cost 2 ins <6,u,4,5>, lane 1
+ 1718209846U, // <6,0,4,6>: Cost 2 vuzpl <6,7,0,1>, RHS
+ 3195609089U, // <6,0,4,7>: Cost 3 ins <6,u,4,7>, lane 1
+ 1613906322U, // <6,0,4,u>: Cost 2 vext3 <0,4,u,6>, <0,4,u,6>
+ 3202260992U, // <6,0,5,0>: Cost 3 ins <u,0,5,0>, lane 0
+ 2128527360U, // <6,0,5,1>: Cost 2 ins <u,0,5,1>, lane 0
+ 3056156774U, // <6,0,5,2>: Cost 3 vtrnl <6,0,5,7>, LHS
+ 3190562818U, // <6,0,5,3>: Cost 3 ins <6,0,u,3>, lane 2
+ 3058802892U, // <6,0,5,4>: Cost 3 vtrnl <6,4,5,6>, <0,2,4,6>
+ 2852933636U, // <6,0,5,5>: Cost 3 vuzpr <5,6,7,0>, <5,5,5,5>
+ 2852932908U, // <6,0,5,6>: Cost 3 vuzpr <5,6,7,0>, <4,5,5,6>
+ 1779191094U, // <6,0,5,7>: Cost 2 vuzpr <5,6,7,0>, RHS
+ 1779191095U, // <6,0,5,u>: Cost 2 vuzpr <5,6,7,0>, RHS
+ 1779191906U, // <6,0,6,0>: Cost 2 vuzpr <5,6,7,0>, <5,6,7,0>
+ 1852244070U, // <6,0,6,1>: Cost 2 vzipl <6,6,6,6>, LHS
+ 1986461798U, // <6,0,6,2>: Cost 2 vtrnl <6,6,6,6>, LHS
+ 3195723777U, // <6,0,6,3>: Cost 3 ins <6,u,6,3>, lane 1
+ 2852933734U, // <6,0,6,4>: Cost 3 vuzpr <5,6,7,0>, <5,6,7,4>
+ 3195740161U, // <6,0,6,5>: Cost 3 ins <6,u,6,5>, lane 1
+ 2122006529U, // <6,0,6,6>: Cost 2 ins <6,u,6,6>, lane 1
+ 2128650240U, // <6,0,6,7>: Cost 2 ins <u,0,6,7>, lane 0
+ 1852244637U, // <6,0,6,u>: Cost 2 vzipl <6,6,6,6>, LHS
+ 1906753536U, // <6,0,7,0>: Cost 2 vzipr RHS, <0,0,0,0>
+ 1906755238U, // <6,0,7,1>: Cost 2 vzipr RHS, <2,3,0,1>
+ 1906753700U, // <6,0,7,2>: Cost 2 vzipr RHS, <0,2,0,2>
+ 2122055681U, // <6,0,7,3>: Cost 2 ins <6,u,7,3>, lane 1
+ 2572094774U, // <6,0,7,4>: Cost 3 vext1 <3,6,0,7>, RHS
+ 2980496418U, // <6,0,7,5>: Cost 3 vzipr RHS, <1,4,0,5>
+ 2980495690U, // <6,0,7,6>: Cost 3 vzipr RHS, <0,4,0,6>
+ 2122088449U, // <6,0,7,7>: Cost 2 ins <6,u,7,7>, lane 1
+ 1906753706U, // <6,0,7,u>: Cost 2 vzipr RHS, <0,2,0,u>
+ 1906761728U, // <6,0,u,0>: Cost 2 vzipr RHS, <0,0,0,0>
+ 1906763430U, // <6,0,u,1>: Cost 2 vzipr RHS, <2,3,0,1>
+ 1612284573U, // <6,0,u,2>: Cost 2 vext3 <0,2,4,6>, LHS
+ 1779188381U, // <6,0,u,3>: Cost 2 vuzpr <5,6,7,0>, LHS
+ 1616265906U, // <6,0,u,4>: Cost 2 vext3 <0,u,4,6>, <0,u,4,6>
+ 2121850881U, // <6,0,u,5>: Cost 2 ins <6,u,4,5>, lane 1
+ 1718212762U, // <6,0,u,6>: Cost 2 vuzpl <6,7,0,1>, RHS
+ 1779191337U, // <6,0,u,7>: Cost 2 vuzpr <5,6,7,0>, RHS
+ 1612284627U, // <6,0,u,u>: Cost 2 vext3 <0,2,4,6>, LHS
+ 2595995750U, // <6,1,0,0>: Cost 3 vext1 <7,6,1,0>, LHS
+ 2121523201U, // <6,1,0,1>: Cost 2 ins <6,u,0,1>, lane 1
+ 2846673046U, // <6,1,0,2>: Cost 3 vuzpr <4,6,3,1>, <3,0,1,2>
+ 2047623270U, // <6,1,0,3>: Cost 2 vtrnr <5,6,7,0>, LHS
+ 2787385548U, // <6,1,0,4>: Cost 3 vuzpl <6,0,1,2>, <0,2,4,6>
+ 3060384768U, // <6,1,0,5>: Cost 3 vtrnl <6,7,0,1>, <1,3,5,7>
+ 2596000590U, // <6,1,0,6>: Cost 3 vext1 <7,6,1,0>, <6,7,0,1>
+ 3060385022U, // <6,1,0,7>: Cost 3 vtrnl <6,7,0,1>, <1,6,7,0>
+ 2047623275U, // <6,1,0,u>: Cost 2 vtrnr <5,6,7,0>, LHS
+ 2578088038U, // <6,1,1,0>: Cost 3 vext1 <4,6,1,1>, LHS
+ 2128896000U, // <6,1,1,1>: Cost 2 ins <u,1,1,1>, lane 0
+ 2981778426U, // <6,1,1,2>: Cost 3 vzipr <4,7,6,1>, <7,0,1,2>
+ 2128912384U, // <6,1,1,3>: Cost 2 ins <u,1,1,3>, lane 0
+ 2691334996U, // <6,1,1,4>: Cost 3 vext3 <1,1,4,6>, <1,1,4,6>
+ 3202670592U, // <6,1,1,5>: Cost 3 ins <u,1,1,5>, lane 0
+ 2691482470U, // <6,1,1,6>: Cost 3 vext3 <1,1,6,6>, <1,1,6,6>
+ 2980449545U, // <6,1,1,7>: Cost 3 vzipr <4,5,6,1>, <4,5,1,7>
+ 2128896000U, // <6,1,1,u>: Cost 2 ins <u,1,1,1>, lane 0
+ 2128961536U, // <6,1,2,0>: Cost 2 ins <u,1,2,0>, lane 0
+ 2128969728U, // <6,1,2,1>: Cost 2 ins <u,1,2,1>, lane 0
+ 2128977920U, // <6,1,2,2>: Cost 2 ins <u,1,2,2>, lane 0
+ 1055244288U, // <6,1,2,3>: Cost 1 ins LHS, lane 0
+ 2128994304U, // <6,1,2,4>: Cost 2 ins <u,1,2,4>, lane 0
+ 2129002496U, // <6,1,2,5>: Cost 2 ins <u,1,2,5>, lane 0
+ 2129010688U, // <6,1,2,6>: Cost 2 ins <u,1,2,6>, lane 0
+ 2129018880U, // <6,1,2,7>: Cost 2 ins <u,1,2,7>, lane 0
+ 1055244288U, // <6,1,2,u>: Cost 1 ins LHS, lane 0
+ 2566160486U, // <6,1,3,0>: Cost 3 vext1 <2,6,1,3>, LHS
+ 2686026712U, // <6,1,3,1>: Cost 3 vext3 <0,2,4,6>, <1,3,1,3>
+ 2686026724U, // <6,1,3,2>: Cost 3 vext3 <0,2,4,6>, <1,3,2,6>
+ 2129059840U, // <6,1,3,3>: Cost 2 ins <u,1,3,3>, lane 0
+ 2692662262U, // <6,1,3,4>: Cost 3 vext3 <1,3,4,6>, <1,3,4,6>
+ 2686026752U, // <6,1,3,5>: Cost 3 vext3 <0,2,4,6>, <1,3,5,7>
+ 2590053128U, // <6,1,3,6>: Cost 3 vext1 <6,6,1,3>, <6,6,1,3>
+ 2953923849U, // <6,1,3,7>: Cost 3 vzipr <0,1,6,3>, <4,5,1,7>
+ 2129059840U, // <6,1,3,u>: Cost 2 ins <u,1,3,3>, lane 0
+ 2788724044U, // <6,1,4,0>: Cost 3 vuzpl <6,2,1,3>, <4,6,0,2>
+ 2693104684U, // <6,1,4,1>: Cost 3 vext3 <1,4,1,6>, <1,4,1,6>
+ 3195568129U, // <6,1,4,2>: Cost 3 ins <6,u,4,2>, lane 1
+ 2047656038U, // <6,1,4,3>: Cost 2 vtrnr <5,6,7,4>, LHS
+ 2791378292U, // <6,1,4,4>: Cost 3 vuzpl <6,6,1,3>, <4,6,4,6>
+ 2121850881U, // <6,1,4,5>: Cost 2 ins <6,u,4,5>, lane 1
+ 2834506076U, // <6,1,4,6>: Cost 3 vuzpr <2,6,0,1>, <0,4,2,6>
+ 2646232530U, // <6,1,4,7>: Cost 3 vext2 <4,7,6,1>, <4,7,6,1>
+ 2047656043U, // <6,1,4,u>: Cost 2 vtrnr <5,6,7,4>, LHS
+ 2578120806U, // <6,1,5,0>: Cost 3 vext1 <4,6,1,5>, LHS
+ 2578121728U, // <6,1,5,1>: Cost 3 vext1 <4,6,1,5>, <1,3,5,7>
+ 3202940928U, // <6,1,5,2>: Cost 3 ins <u,1,5,2>, lane 0
+ 2129207296U, // <6,1,5,3>: Cost 2 ins <u,1,5,3>, lane 0
+ 2693989528U, // <6,1,5,4>: Cost 3 vext3 <1,5,4,6>, <1,5,4,6>
+ 3202965504U, // <6,1,5,5>: Cost 3 ins <u,1,5,5>, lane 0
+ 2652868706U, // <6,1,5,6>: Cost 3 vext2 <5,u,6,1>, <5,6,7,0>
+ 2834509110U, // <6,1,5,7>: Cost 3 vuzpr <2,6,0,1>, RHS
+ 2129207296U, // <6,1,5,u>: Cost 2 ins <u,1,5,3>, lane 0
+ 2925986550U, // <6,1,6,0>: Cost 3 vzipl <6,6,6,6>, <1,0,3,2>
+ 2834507673U, // <6,1,6,1>: Cost 3 vuzpr <2,6,0,1>, <2,6,0,1>
+ 2982480022U, // <6,1,6,2>: Cost 3 vzipr <4,u,6,6>, <3,0,1,2>
+ 2041479270U, // <6,1,6,3>: Cost 2 vtrnr <4,6,4,6>, LHS
+ 2602020150U, // <6,1,6,4>: Cost 3 vext1 <u,6,1,6>, RHS
+ 2982478162U, // <6,1,6,5>: Cost 3 vzipr <4,u,6,6>, <0,4,1,5>
+ 2122006529U, // <6,1,6,6>: Cost 2 ins <6,u,6,6>, lane 1
+ 2129313792U, // <6,1,6,7>: Cost 2 ins <u,1,6,7>, lane 0
+ 2041479275U, // <6,1,6,u>: Cost 2 vtrnr <4,6,4,6>, LHS
+ 2560221286U, // <6,1,7,0>: Cost 3 vext1 <1,6,1,7>, LHS
+ 1906753546U, // <6,1,7,1>: Cost 2 vzipr RHS, <0,0,1,1>
+ 1906755734U, // <6,1,7,2>: Cost 2 vzipr RHS, <3,0,1,2>
+ 2029469798U, // <6,1,7,3>: Cost 2 vtrnr <2,6,3,7>, LHS
+ 2560224566U, // <6,1,7,4>: Cost 3 vext1 <1,6,1,7>, RHS
+ 1906753874U, // <6,1,7,5>: Cost 2 vzipr RHS, <0,4,1,5>
+ 2980495537U, // <6,1,7,6>: Cost 3 vzipr RHS, <0,2,1,6>
+ 2122088449U, // <6,1,7,7>: Cost 2 ins <6,u,7,7>, lane 1
+ 2029469803U, // <6,1,7,u>: Cost 2 vtrnr <2,6,3,7>, LHS
+ 2128961536U, // <6,1,u,0>: Cost 2 ins <u,1,2,0>, lane 0
+ 1906761738U, // <6,1,u,1>: Cost 2 vzipr RHS, <0,0,1,1>
+ 1906763926U, // <6,1,u,2>: Cost 2 vzipr RHS, <3,0,1,2>
+ 1055244288U, // <6,1,u,3>: Cost 1 ins LHS, lane 0
+ 2128994304U, // <6,1,u,4>: Cost 2 ins <u,1,2,4>, lane 0
+ 1906762066U, // <6,1,u,5>: Cost 2 vzipr RHS, <0,4,1,5>
+ 2129010688U, // <6,1,u,6>: Cost 2 ins <u,1,2,6>, lane 0
+ 2122088449U, // <6,1,u,7>: Cost 2 ins <6,u,7,7>, lane 1
+ 1055244288U, // <6,1,u,u>: Cost 1 ins LHS, lane 0
+ 2846457856U, // <6,2,0,0>: Cost 3 vuzpr <4,6,0,2>, <0,0,0,0>
+ 1573159014U, // <6,2,0,1>: Cost 2 vext2 <4,u,6,2>, LHS
+ 2129494016U, // <6,2,0,2>: Cost 2 ins <u,2,0,2>, lane 0
+ 2118148098U, // <6,2,0,3>: Cost 2 ins <6,2,u,3>, lane 2
+ 2641592668U, // <6,2,0,4>: Cost 3 vext2 <4,0,6,2>, <0,4,2,6>
+ 3195297793U, // <6,2,0,5>: Cost 3 ins <6,u,0,5>, lane 1
+ 2686027244U, // <6,2,0,6>: Cost 3 vext3 <0,2,4,6>, <2,0,6,4>
+ 3195314177U, // <6,2,0,7>: Cost 3 ins <6,u,0,7>, lane 1
+ 1573159581U, // <6,2,0,u>: Cost 2 vext2 <4,u,6,2>, LHS
+ 2230527897U, // <6,2,1,0>: Cost 3 vrev <2,6,0,1>
+ 2846458676U, // <6,2,1,1>: Cost 3 vuzpr <4,6,0,2>, <1,1,1,1>
+ 2646901654U, // <6,2,1,2>: Cost 3 vext2 <4,u,6,2>, <1,2,3,0>
+ 1772716134U, // <6,2,1,3>: Cost 2 vuzpr <4,6,0,2>, LHS
+ 3191414787U, // <6,2,1,4>: Cost 3 ins <6,2,1,u>, lane 3
+ 2646901904U, // <6,2,1,5>: Cost 3 vext2 <4,u,6,2>, <1,5,3,7>
+ 3114885324U, // <6,2,1,6>: Cost 3 vtrnr <4,6,0,1>, <0,2,4,6>
+ 3191922690U, // <6,2,1,7>: Cost 3 ins <6,2,u,7>, lane 2
+ 1772716139U, // <6,2,1,u>: Cost 2 vuzpr <4,6,0,2>, LHS
+ 2846458774U, // <6,2,2,0>: Cost 3 vuzpr <4,6,0,2>, <1,2,3,0>
+ 3195412481U, // <6,2,2,1>: Cost 3 ins <6,u,2,1>, lane 1
+ 2129641472U, // <6,2,2,2>: Cost 2 ins <u,2,2,2>, lane 0
+ 1908703334U, // <6,2,2,3>: Cost 2 vzipr <4,u,6,2>, LHS
+ 2697971326U, // <6,2,2,4>: Cost 3 vext3 <2,2,4,6>, <2,2,4,6>
+ 3195445249U, // <6,2,2,5>: Cost 3 ins <6,u,2,5>, lane 1
+ 2698118800U, // <6,2,2,6>: Cost 3 vext3 <2,2,6,6>, <2,2,6,6>
+ 2846462444U, // <6,2,2,7>: Cost 3 vuzpr <4,6,0,2>, <6,2,5,7>
+ 1908703339U, // <6,2,2,u>: Cost 2 vzipr <4,u,6,2>, LHS
+ 2129698816U, // <6,2,3,0>: Cost 2 ins <u,2,3,0>, lane 0
+ 2230618020U, // <6,2,3,1>: Cost 3 vrev <2,6,1,3>
+ 2698487485U, // <6,2,3,2>: Cost 3 vext3 <2,3,2,6>, <2,3,2,6>
+ 2129723392U, // <6,2,3,3>: Cost 2 ins <u,2,3,3>, lane 0
+ 2129731584U, // <6,2,3,4>: Cost 2 ins <u,2,3,4>, lane 0
+ 2846459598U, // <6,2,3,5>: Cost 3 vuzpr <4,6,0,2>, <2,3,4,5>
+ 2966528348U, // <6,2,3,6>: Cost 3 vzipr <2,2,6,3>, <0,4,2,6>
+ 2846458880U, // <6,2,3,7>: Cost 3 vuzpr <4,6,0,2>, <1,3,5,7>
+ 2129698816U, // <6,2,3,u>: Cost 2 ins <u,2,3,0>, lane 0
+ 1567853468U, // <6,2,4,0>: Cost 2 vext2 <4,0,6,2>, <4,0,6,2>
+ 3191873538U, // <6,2,4,1>: Cost 3 ins <6,2,u,1>, lane 2
+ 2699151118U, // <6,2,4,2>: Cost 3 vext3 <2,4,2,6>, <2,4,2,6>
+ 2118148098U, // <6,2,4,3>: Cost 2 ins <6,2,u,3>, lane 2
+ 2699298592U, // <6,2,4,4>: Cost 3 vext3 <2,4,4,6>, <2,4,4,6>
+ 1573162294U, // <6,2,4,5>: Cost 2 vext2 <4,u,6,2>, RHS
+ 2129821696U, // <6,2,4,6>: Cost 2 ins <u,2,4,6>, lane 0
+ 3195609089U, // <6,2,4,7>: Cost 3 ins <6,u,4,7>, lane 1
+ 1573162532U, // <6,2,4,u>: Cost 2 vext2 <4,u,6,2>, <4,u,6,2>
+ 3191709699U, // <6,2,5,0>: Cost 3 ins <6,2,5,u>, lane 3
+ 2646904528U, // <6,2,5,1>: Cost 3 vext2 <4,u,6,2>, <5,1,7,3>
+ 3203604480U, // <6,2,5,2>: Cost 3 ins <u,2,5,2>, lane 0
+ 2118148098U, // <6,2,5,3>: Cost 2 ins <6,2,u,3>, lane 2
+ 2230855617U, // <6,2,5,4>: Cost 3 vrev <2,6,4,5>
+ 2846461956U, // <6,2,5,5>: Cost 3 vuzpr <4,6,0,2>, <5,5,5,5>
+ 3115213004U, // <6,2,5,6>: Cost 3 vtrnr <4,6,4,5>, <0,2,4,6>
+ 1772719414U, // <6,2,5,7>: Cost 2 vuzpr <4,6,0,2>, RHS
+ 1772719415U, // <6,2,5,u>: Cost 2 vuzpr <4,6,0,2>, RHS
+ 2687207321U, // <6,2,6,0>: Cost 3 vext3 <0,4,2,6>, <2,6,0,1>
+ 3195707393U, // <6,2,6,1>: Cost 3 ins <6,u,6,1>, lane 1
+ 1772719436U, // <6,2,6,2>: Cost 2 vuzpr <4,6,0,2>, <4,6,0,2>
+ 1908736102U, // <6,2,6,3>: Cost 2 vzipr <4,u,6,6>, LHS
+ 2687207361U, // <6,2,6,4>: Cost 3 vext3 <0,4,2,6>, <2,6,4,5>
+ 3195740161U, // <6,2,6,5>: Cost 3 ins <6,u,6,5>, lane 1
+ 2122006529U, // <6,2,6,6>: Cost 2 ins <6,u,6,6>, lane 1
+ 2118189061U, // <6,2,6,7>: Cost 2 ins <6,2,u,u>, lane 5
+ 1908736107U, // <6,2,6,u>: Cost 2 vzipr <4,u,6,6>, LHS
+ 2118115331U, // <6,2,7,0>: Cost 2 ins <6,2,7,u>, lane 3
+ 2118115331U, // <6,2,7,1>: Cost 2 ins <6,2,7,u>, lane 3
+ 1906753556U, // <6,2,7,2>: Cost 2 vzipr RHS, <0,0,2,2>
+ 833011814U, // <6,2,7,3>: Cost 1 vzipr RHS, LHS
+ 2118115331U, // <6,2,7,4>: Cost 2 ins <6,2,7,u>, lane 3
+ 2118115331U, // <6,2,7,5>: Cost 2 ins <6,2,7,u>, lane 3
+ 1906753884U, // <6,2,7,6>: Cost 2 vzipr RHS, <0,4,2,6>
+ 2122088449U, // <6,2,7,7>: Cost 2 ins <6,u,7,7>, lane 1
+ 833011819U, // <6,2,7,u>: Cost 1 vzipr RHS, LHS
+ 2129698816U, // <6,2,u,0>: Cost 2 ins <u,2,3,0>, lane 0
+ 1573164846U, // <6,2,u,1>: Cost 2 vext2 <4,u,6,2>, LHS
+ 1906761748U, // <6,2,u,2>: Cost 2 vzipr RHS, <0,0,2,2>
+ 833020006U, // <6,2,u,3>: Cost 1 vzipr RHS, LHS
+ 2129731584U, // <6,2,u,4>: Cost 2 ins <u,2,3,4>, lane 0
+ 1573165210U, // <6,2,u,5>: Cost 2 vext2 <4,u,6,2>, RHS
+ 1906762076U, // <6,2,u,6>: Cost 2 vzipr RHS, <0,4,2,6>
+ 1772719657U, // <6,2,u,7>: Cost 2 vuzpr <4,6,0,2>, RHS
+ 833020011U, // <6,2,u,u>: Cost 1 vzipr RHS, LHS
+ 3203883008U, // <6,3,0,0>: Cost 3 ins <u,3,0,0>, lane 0
+ 2130149376U, // <6,3,0,1>: Cost 2 ins <u,3,0,1>, lane 0
+ 2686027937U, // <6,3,0,2>: Cost 3 vext3 <0,2,4,6>, <3,0,2,4>
+ 3121365976U, // <6,3,0,3>: Cost 3 vtrnr <5,6,7,0>, <1,3,1,3>
+ 2687207601U, // <6,3,0,4>: Cost 3 vext3 <0,4,2,6>, <3,0,4,2>
+ 3121366734U, // <6,3,0,5>: Cost 3 vtrnr <5,6,7,0>, <2,3,4,5>
+ 3195305985U, // <6,3,0,6>: Cost 3 ins <6,u,0,6>, lane 1
+ 3121366016U, // <6,3,0,7>: Cost 3 vtrnr <5,6,7,0>, <1,3,5,7>
+ 2130149376U, // <6,3,0,u>: Cost 2 ins <u,3,0,1>, lane 0
+ 2578235494U, // <6,3,1,0>: Cost 3 vext1 <4,6,3,1>, LHS
+ 3203964928U, // <6,3,1,1>: Cost 3 ins <u,3,1,1>, lane 0
+ 3203973120U, // <6,3,1,2>: Cost 3 ins <u,3,1,2>, lane 0
+ 2130239488U, // <6,3,1,3>: Cost 2 ins <u,3,1,3>, lane 0
+ 2703280390U, // <6,3,1,4>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6>
+ 3203997696U, // <6,3,1,5>: Cost 3 ins <u,3,1,5>, lane 0
+ 2822725737U, // <6,3,1,6>: Cost 3 vuzpr <0,6,2,3>, <0,1,2,6>
+ 2970494906U, // <6,3,1,7>: Cost 3 vzipr <2,u,6,1>, <2,6,3,7>
+ 2130239488U, // <6,3,1,u>: Cost 2 ins <u,3,1,3>, lane 0
+ 2982445974U, // <6,3,2,0>: Cost 3 vzipr <4,u,6,2>, <1,2,3,0>
+ 2630321724U, // <6,3,2,1>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3>
+ 2630985357U, // <6,3,2,2>: Cost 3 vext2 <2,2,6,3>, <2,2,6,3>
+ 2130313216U, // <6,3,2,3>: Cost 2 ins <u,3,2,3>, lane 0
+ 2982445978U, // <6,3,2,4>: Cost 3 vzipr <4,u,6,2>, <1,2,3,4>
+ 3114895054U, // <6,3,2,5>: Cost 3 vtrnr <4,6,0,2>, <2,3,4,5>
+ 2834596044U, // <6,3,2,6>: Cost 3 vuzpr <2,6,1,3>, <0,2,4,6>
+ 3114894336U, // <6,3,2,7>: Cost 3 vtrnr <4,6,0,2>, <1,3,5,7>
+ 2130313216U, // <6,3,2,u>: Cost 2 ins <u,3,2,3>, lane 0
+ 2578251878U, // <6,3,3,0>: Cost 3 vext1 <4,6,3,3>, LHS
+ 2792163478U, // <6,3,3,1>: Cost 3 vuzpl <6,7,3,0>, <3,0,1,2>
+ 2636958054U, // <6,3,3,2>: Cost 3 vext2 <3,2,6,3>, <3,2,6,3>
+ 2130386944U, // <6,3,3,3>: Cost 2 ins <u,3,3,3>, lane 0
+ 2704607656U, // <6,3,3,4>: Cost 3 vext3 <3,3,4,6>, <3,3,4,6>
+ 2792196610U, // <6,3,3,5>: Cost 3 vuzpl <6,7,3,4>, <3,4,5,6>
+ 2590200602U, // <6,3,3,6>: Cost 3 vext1 <6,6,3,3>, <6,6,3,3>
+ 2972501946U, // <6,3,3,7>: Cost 3 vzipr <3,2,6,3>, <2,6,3,7>
+ 2130386944U, // <6,3,3,u>: Cost 2 ins <u,3,3,3>, lane 0
+ 2584232038U, // <6,3,4,0>: Cost 3 vext1 <5,6,3,4>, LHS
+ 2705050078U, // <6,3,4,1>: Cost 3 vext3 <3,4,1,6>, <3,4,1,6>
+ 2642930751U, // <6,3,4,2>: Cost 3 vext2 <4,2,6,3>, <4,2,6,3>
+ 2705197552U, // <6,3,4,3>: Cost 3 vext3 <3,4,3,6>, <3,4,3,6>
+ 2584235318U, // <6,3,4,4>: Cost 3 vext1 <5,6,3,4>, RHS
+ 1631603202U, // <6,3,4,5>: Cost 2 vext3 <3,4,5,6>, <3,4,5,6>
+ 2846540124U, // <6,3,4,6>: Cost 3 vuzpr <4,6,1,3>, <0,4,2,6>
+ 3121398784U, // <6,3,4,7>: Cost 3 vtrnr <5,6,7,4>, <1,3,5,7>
+ 1631824413U, // <6,3,4,u>: Cost 2 vext3 <3,4,u,6>, <3,4,u,6>
+ 2578268262U, // <6,3,5,0>: Cost 3 vext1 <4,6,3,5>, LHS
+ 3204259840U, // <6,3,5,1>: Cost 3 ins <u,3,5,1>, lane 0
+ 2648903448U, // <6,3,5,2>: Cost 3 vext2 <5,2,6,3>, <5,2,6,3>
+ 2578270722U, // <6,3,5,3>: Cost 3 vext1 <4,6,3,5>, <3,4,5,6>
+ 2705934922U, // <6,3,5,4>: Cost 3 vext3 <3,5,4,6>, <3,5,4,6>
+ 3204292608U, // <6,3,5,5>: Cost 3 ins <u,3,5,5>, lane 0
+ 3204300800U, // <6,3,5,6>: Cost 3 ins <u,3,5,6>, lane 0
+ 2130567168U, // <6,3,5,7>: Cost 2 ins <u,3,5,7>, lane 0
+ 2130567168U, // <6,3,5,u>: Cost 2 ins <u,3,5,7>, lane 0
+ 2982478742U, // <6,3,6,0>: Cost 3 vzipr <4,u,6,6>, <1,2,3,0>
+ 3115222694U, // <6,3,6,1>: Cost 3 vtrnr <4,6,4,6>, <2,3,0,1>
+ 2982478582U, // <6,3,6,2>: Cost 3 vzipr <4,u,6,6>, <1,0,3,2>
+ 1748984315U, // <6,3,6,3>: Cost 2 vuzpr <0,6,2,3>, <0,6,2,3>
+ 2982478746U, // <6,3,6,4>: Cost 3 vzipr <4,u,6,6>, <1,2,3,4>
+ 3115222734U, // <6,3,6,5>: Cost 3 vtrnr <4,6,4,6>, <2,3,4,5>
+ 2122006529U, // <6,3,6,6>: Cost 2 ins <6,u,6,6>, lane 1
+ 2130640896U, // <6,3,6,7>: Cost 2 ins <u,3,6,7>, lane 0
+ 1748984315U, // <6,3,6,u>: Cost 2 vuzpr <0,6,2,3>, <0,6,2,3>
+ 1492598886U, // <6,3,7,0>: Cost 2 vext1 <2,6,3,7>, LHS
+ 2560369889U, // <6,3,7,1>: Cost 3 vext1 <1,6,3,7>, <1,6,3,7>
+ 1492600762U, // <6,3,7,2>: Cost 2 vext1 <2,6,3,7>, <2,6,3,7>
+ 1906754376U, // <6,3,7,3>: Cost 2 vzipr RHS, <1,1,3,3>
+ 1492602166U, // <6,3,7,4>: Cost 2 vext1 <2,6,3,7>, RHS
+ 3103213262U, // <6,3,7,5>: Cost 3 vtrnr <2,6,3,7>, <2,3,4,5>
+ 2566345210U, // <6,3,7,6>: Cost 3 vext1 <2,6,3,7>, <6,2,7,3>
+ 1906754704U, // <6,3,7,7>: Cost 2 vzipr RHS, <1,5,3,7>
+ 1492604718U, // <6,3,7,u>: Cost 2 vext1 <2,6,3,7>, LHS
+ 1492607078U, // <6,3,u,0>: Cost 2 vext1 <2,6,3,u>, LHS
+ 2130149376U, // <6,3,u,1>: Cost 2 ins <u,3,0,1>, lane 0
+ 1492608955U, // <6,3,u,2>: Cost 2 vext1 <2,6,3,u>, <2,6,3,u>
+ 1906762568U, // <6,3,u,3>: Cost 2 vzipr RHS, <1,1,3,3>
+ 1492610358U, // <6,3,u,4>: Cost 2 vext1 <2,6,3,u>, RHS
+ 1634257734U, // <6,3,u,5>: Cost 2 vext3 <3,u,5,6>, <3,u,5,6>
+ 2122006529U, // <6,3,u,6>: Cost 2 ins <6,u,6,6>, lane 1
+ 1906762896U, // <6,3,u,7>: Cost 2 vzipr RHS, <1,5,3,7>
+ 1492612910U, // <6,3,u,u>: Cost 2 vext1 <2,6,3,u>, LHS
+ 2242465098U, // <6,4,0,0>: Cost 3 vrev <4,6,0,0>
+ 2121523201U, // <6,4,0,1>: Cost 2 ins <6,u,0,1>, lane 1
+ 1718534246U, // <6,4,0,2>: Cost 2 vuzpl <6,7,4,5>, LHS
+ 3195281409U, // <6,4,0,3>: Cost 3 ins <6,u,0,3>, lane 1
+ 2642936156U, // <6,4,0,4>: Cost 3 vext2 <4,2,6,4>, <0,4,2,6>
+ 2712570770U, // <6,4,0,5>: Cost 3 vext3 <4,6,4,6>, <4,0,5,1>
+ 1986645302U, // <6,4,0,6>: Cost 2 vtrnl <6,7,0,1>, RHS
+ 3195314177U, // <6,4,0,7>: Cost 3 ins <6,u,0,7>, lane 1
+ 1986645320U, // <6,4,0,u>: Cost 2 vtrnl <6,7,0,1>, RHS
+ 2242473291U, // <6,4,1,0>: Cost 3 vrev <4,6,0,1>
+ 2242547028U, // <6,4,1,1>: Cost 3 vrev <4,6,1,1>
+ 3204636672U, // <6,4,1,2>: Cost 3 ins <u,4,1,2>, lane 0
+ 1779220582U, // <6,4,1,3>: Cost 2 vuzpr <5,6,7,4>, LHS
+ 3059813748U, // <6,4,1,4>: Cost 3 vtrnl <6,6,1,3>, <4,6,4,6>
+ 2130919424U, // <6,4,1,5>: Cost 2 ins <u,4,1,5>, lane 0
+ 3102941532U, // <6,4,1,6>: Cost 3 vtrnr <2,6,0,1>, <0,4,2,6>
+ 2242989450U, // <6,4,1,7>: Cost 3 vrev <4,6,7,1>
+ 1779220587U, // <6,4,1,u>: Cost 2 vuzpr <5,6,7,4>, LHS
+ 1168739660U, // <6,4,2,0>: Cost 2 vrev <4,6,0,2>
+ 3195412481U, // <6,4,2,1>: Cost 3 ins <6,u,2,1>, lane 1
+ 2242628958U, // <6,4,2,2>: Cost 3 vrev <4,6,2,2>
+ 2130976768U, // <6,4,2,3>: Cost 2 ins <u,4,2,3>, lane 0
+ 2632320816U, // <6,4,2,4>: Cost 3 vext2 <2,4,6,4>, <2,4,6,4>
+ 1849642294U, // <6,4,2,5>: Cost 2 vzipl <6,2,7,3>, RHS
+ 2131001344U, // <6,4,2,6>: Cost 2 ins <u,4,2,6>, lane 0
+ 3195461633U, // <6,4,2,7>: Cost 3 ins <6,u,2,7>, lane 1
+ 1169329556U, // <6,4,2,u>: Cost 2 vrev <4,6,u,2>
+ 3195478017U, // <6,4,3,0>: Cost 3 ins <6,u,3,0>, lane 1
+ 2242563414U, // <6,4,3,1>: Cost 3 vrev <4,6,1,3>
+ 2242637151U, // <6,4,3,2>: Cost 3 vrev <4,6,2,3>
+ 2242710888U, // <6,4,3,3>: Cost 3 vrev <4,6,3,3>
+ 2644929026U, // <6,4,3,4>: Cost 3 vext2 <4,5,6,4>, <3,4,5,6>
+ 2846623438U, // <6,4,3,5>: Cost 3 vuzpr <4,6,2,4>, <2,3,4,5>
+ 2965864652U, // <6,4,3,6>: Cost 3 vzipr <2,1,6,3>, <0,2,4,6>
+ 2852963328U, // <6,4,3,7>: Cost 3 vuzpr <5,6,7,4>, <1,3,5,7>
+ 2243079573U, // <6,4,3,u>: Cost 3 vrev <4,6,u,3>
+ 2242497870U, // <6,4,4,0>: Cost 3 vrev <4,6,0,4>
+ 2852967732U, // <6,4,4,1>: Cost 3 vuzpr <5,6,7,4>, <7,4,0,1>
+ 2642938944U, // <6,4,4,2>: Cost 3 vext2 <4,2,6,4>, <4,2,6,4>
+ 2852967014U, // <6,4,4,3>: Cost 3 vuzpr <5,6,7,4>, <6,4,1,3>
+ 2131132416U, // <6,4,4,4>: Cost 2 ins <u,4,4,4>, lane 0
+ 2121850881U, // <6,4,4,5>: Cost 2 ins <6,u,4,5>, lane 1
+ 1718537526U, // <6,4,4,6>: Cost 2 vuzpl <6,7,4,5>, RHS
+ 2852967054U, // <6,4,4,7>: Cost 3 vuzpr <5,6,7,4>, <6,4,5,7>
+ 1718537544U, // <6,4,4,u>: Cost 2 vuzpl <6,7,4,5>, RHS
+ 2566398054U, // <6,4,5,0>: Cost 3 vext1 <2,6,4,5>, LHS
+ 2242579800U, // <6,4,5,1>: Cost 3 vrev <4,6,1,5>
+ 2566399937U, // <6,4,5,2>: Cost 3 vext1 <2,6,4,5>, <2,6,4,5>
+ 2242727274U, // <6,4,5,3>: Cost 3 vrev <4,6,3,5>
+ 2566401334U, // <6,4,5,4>: Cost 3 vext1 <2,6,4,5>, RHS
+ 2131214336U, // <6,4,5,5>: Cost 2 ins <u,4,5,5>, lane 0
+ 1612287286U, // <6,4,5,6>: Cost 2 vext3 <0,2,4,6>, RHS
+ 1779223862U, // <6,4,5,7>: Cost 2 vuzpr <5,6,7,4>, RHS
+ 1612287304U, // <6,4,5,u>: Cost 2 vext3 <0,2,4,6>, RHS
+ 1504608358U, // <6,4,6,0>: Cost 2 vext1 <4,6,4,6>, LHS
+ 2578350838U, // <6,4,6,1>: Cost 3 vext1 <4,6,4,6>, <1,0,3,2>
+ 2578351720U, // <6,4,6,2>: Cost 3 vext1 <4,6,4,6>, <2,2,2,2>
+ 2578352278U, // <6,4,6,3>: Cost 3 vext1 <4,6,4,6>, <3,0,1,2>
+ 1169067380U, // <6,4,6,4>: Cost 2 vrev <4,6,4,6>
+ 1852247350U, // <6,4,6,5>: Cost 2 vzipl <6,6,6,6>, RHS
+ 1986465078U, // <6,4,6,6>: Cost 2 vtrnl <6,6,6,6>, RHS
+ 2131304448U, // <6,4,6,7>: Cost 2 ins <u,4,6,7>, lane 0
+ 1504614190U, // <6,4,6,u>: Cost 2 vext1 <4,6,4,6>, LHS
+ 2572386406U, // <6,4,7,0>: Cost 3 vext1 <3,6,4,7>, LHS
+ 2572387226U, // <6,4,7,1>: Cost 3 vext1 <3,6,4,7>, <1,2,3,4>
+ 2980495398U, // <6,4,7,2>: Cost 3 vzipr RHS, <0,0,4,2>
+ 2122055681U, // <6,4,7,3>: Cost 2 ins <6,u,7,3>, lane 1
+ 1906756816U, // <6,4,7,4>: Cost 2 vzipr RHS, <4,4,4,4>
+ 1906755278U, // <6,4,7,5>: Cost 2 vzipr RHS, <2,3,4,5>
+ 1906753740U, // <6,4,7,6>: Cost 2 vzipr RHS, <0,2,4,6>
+ 2122088449U, // <6,4,7,7>: Cost 2 ins <6,u,7,7>, lane 1
+ 1906753742U, // <6,4,7,u>: Cost 2 vzipr RHS, <0,2,4,u>
+ 1168788818U, // <6,4,u,0>: Cost 2 vrev <4,6,0,u>
+ 2121523201U, // <6,4,u,1>: Cost 2 ins <6,u,0,1>, lane 1
+ 1718540078U, // <6,4,u,2>: Cost 2 vuzpl <6,7,4,5>, LHS
+ 1779221149U, // <6,4,u,3>: Cost 2 vuzpr <5,6,7,4>, LHS
+ 1906765008U, // <6,4,u,4>: Cost 2 vzipr RHS, <4,4,4,4>
+ 1906763470U, // <6,4,u,5>: Cost 2 vzipr RHS, <2,3,4,5>
+ 1612287529U, // <6,4,u,6>: Cost 2 vext3 <0,2,4,6>, RHS
+ 1779224105U, // <6,4,u,7>: Cost 2 vuzpr <5,6,7,4>, RHS
+ 1612287547U, // <6,4,u,u>: Cost 2 vext3 <0,2,4,6>, RHS
+ 3195256833U, // <6,5,0,0>: Cost 3 ins <6,u,0,0>, lane 1
+ 2121523201U, // <6,5,0,1>: Cost 2 ins <6,u,0,1>, lane 1
+ 2787721318U, // <6,5,0,2>: Cost 3 vuzpl <6,0,5,7>, LHS
+ 3195281409U, // <6,5,0,3>: Cost 3 ins <6,u,0,3>, lane 1
+ 2790367436U, // <6,5,0,4>: Cost 3 vuzpl <6,4,5,6>, <0,2,4,6>
+ 3121369092U, // <6,5,0,5>: Cost 3 vtrnr <5,6,7,0>, <5,5,5,5>
+ 2980440578U, // <6,5,0,6>: Cost 3 vzipr <4,5,6,0>, <3,4,5,6>
+ 1175212130U, // <6,5,0,7>: Cost 2 vrev <5,6,7,0>
+ 2047626551U, // <6,5,0,u>: Cost 2 vtrnr <5,6,7,0>, RHS
+ 2578382950U, // <6,5,1,0>: Cost 3 vext1 <4,6,5,1>, LHS
+ 3205292032U, // <6,5,1,1>: Cost 3 ins <u,5,1,1>, lane 0
+ 3195346945U, // <6,5,1,2>: Cost 3 ins <6,u,1,2>, lane 1
+ 2834833510U, // <6,5,1,3>: Cost 3 vuzpr <2,6,4,5>, LHS
+ 2578386296U, // <6,5,1,4>: Cost 3 vext1 <4,6,5,1>, <4,6,5,1>
+ 2578387072U, // <6,5,1,5>: Cost 3 vext1 <4,6,5,1>, <5,7,1,3>
+ 2922205282U, // <6,5,1,6>: Cost 3 vzipl <6,1,0,3>, <5,6,7,0>
+ 2131599360U, // <6,5,1,7>: Cost 2 ins <u,5,1,7>, lane 0
+ 2131599360U, // <6,5,1,u>: Cost 2 ins <u,5,1,7>, lane 0
+ 2578391142U, // <6,5,2,0>: Cost 3 vext1 <4,6,5,2>, LHS
+ 2982448018U, // <6,5,2,1>: Cost 3 vzipr <4,u,6,2>, <4,0,5,1>
+ 3195420673U, // <6,5,2,2>: Cost 3 ins <6,u,2,2>, lane 1
+ 2131640320U, // <6,5,2,3>: Cost 2 ins <u,5,2,3>, lane 0
+ 2578394489U, // <6,5,2,4>: Cost 3 vext1 <4,6,5,2>, <4,6,5,2>
+ 3114897412U, // <6,5,2,5>: Cost 3 vtrnr <4,6,0,2>, <5,5,5,5>
+ 2634983354U, // <6,5,2,6>: Cost 3 vext2 <2,u,6,5>, <2,6,3,7>
+ 2041154870U, // <6,5,2,7>: Cost 2 vtrnr <4,6,0,2>, RHS
+ 2041154871U, // <6,5,2,u>: Cost 2 vtrnr <4,6,0,2>, RHS
+ 3195478017U, // <6,5,3,0>: Cost 3 ins <6,u,3,0>, lane 1
+ 3205439488U, // <6,5,3,1>: Cost 3 ins <u,5,3,1>, lane 0
+ 3091164465U, // <6,5,3,2>: Cost 3 vtrnr <0,6,2,3>, <4,5,6,2>
+ 3195502593U, // <6,5,3,3>: Cost 3 ins <6,u,3,3>, lane 1
+ 2643610114U, // <6,5,3,4>: Cost 3 vext2 <4,3,6,5>, <3,4,5,6>
+ 3205472256U, // <6,5,3,5>: Cost 3 ins <u,5,3,5>, lane 0
+ 2980465154U, // <6,5,3,6>: Cost 3 vzipr <4,5,6,3>, <3,4,5,6>
+ 2131746816U, // <6,5,3,7>: Cost 2 ins <u,5,3,7>, lane 0
+ 2131746816U, // <6,5,3,u>: Cost 2 ins <u,5,3,7>, lane 0
+ 2789051724U, // <6,5,4,0>: Cost 3 vuzpl <6,2,5,7>, <4,6,0,2>
+ 3060715648U, // <6,5,4,1>: Cost 3 vtrnl <6,7,4,5>, <5,7,1,3>
+ 3195568129U, // <6,5,4,2>: Cost 3 ins <6,u,4,2>, lane 1
+ 2643610770U, // <6,5,4,3>: Cost 3 vext2 <4,3,6,5>, <4,3,6,5>
+ 2791705972U, // <6,5,4,4>: Cost 3 vuzpl <6,6,5,7>, <4,6,4,6>
+ 2121850881U, // <6,5,4,5>: Cost 2 ins <6,u,4,5>, lane 1
+ 2834833756U, // <6,5,4,6>: Cost 3 vuzpr <2,6,4,5>, <0,4,2,6>
+ 1643696070U, // <6,5,4,7>: Cost 2 vext3 <5,4,7,6>, <5,4,7,6>
+ 1643769807U, // <6,5,4,u>: Cost 2 vext3 <5,4,u,6>, <5,4,u,6>
+ 2578415718U, // <6,5,5,0>: Cost 3 vext1 <4,6,5,5>, LHS
+ 3006363382U, // <6,5,5,1>: Cost 3 vzipr <u,u,6,5>, <u,0,5,1>
+ 3205595136U, // <6,5,5,2>: Cost 3 ins <u,5,5,2>, lane 0
+ 2980479105U, // <6,5,5,3>: Cost 3 vzipr <4,5,6,5>, <0,1,5,3>
+ 2578419068U, // <6,5,5,4>: Cost 3 vext1 <4,6,5,5>, <4,6,5,5>
+ 2131877888U, // <6,5,5,5>: Cost 2 ins <u,5,5,5>, lane 0
+ 2979154434U, // <6,5,5,6>: Cost 3 vzipr <4,3,6,5>, <3,4,5,6>
+ 2131894272U, // <6,5,5,7>: Cost 2 ins <u,5,5,7>, lane 0
+ 2131877888U, // <6,5,5,u>: Cost 2 ins <u,5,5,5>, lane 0
+ 2131910656U, // <6,5,6,0>: Cost 2 ins <u,5,6,0>, lane 0
+ 2131918848U, // <6,5,6,1>: Cost 2 ins <u,5,6,1>, lane 0
+ 2131927040U, // <6,5,6,2>: Cost 2 ins <u,5,6,2>, lane 0
+ 2131935232U, // <6,5,6,3>: Cost 2 ins <u,5,6,3>, lane 0
+ 2131943424U, // <6,5,6,4>: Cost 2 ins <u,5,6,4>, lane 0
+ 2131951616U, // <6,5,6,5>: Cost 2 ins <u,5,6,5>, lane 0
+ 2131959808U, // <6,5,6,6>: Cost 2 ins <u,5,6,6>, lane 0
+ 1058226176U, // <6,5,6,7>: Cost 1 ins RHS, lane 0
+ 1058226176U, // <6,5,6,u>: Cost 1 ins RHS, lane 0
+ 2560516198U, // <6,5,7,0>: Cost 3 vext1 <1,6,5,7>, LHS
+ 1906756498U, // <6,5,7,1>: Cost 2 vzipr RHS, <4,0,5,1>
+ 2566490060U, // <6,5,7,2>: Cost 3 vext1 <2,6,5,7>, <2,6,5,7>
+ 2122055681U, // <6,5,7,3>: Cost 2 ins <6,u,7,3>, lane 1
+ 2560519478U, // <6,5,7,4>: Cost 3 vext1 <1,6,5,7>, RHS
+ 1906756826U, // <6,5,7,5>: Cost 2 vzipr RHS, <4,4,5,5>
+ 1906756098U, // <6,5,7,6>: Cost 2 vzipr RHS, <3,4,5,6>
+ 2029473078U, // <6,5,7,7>: Cost 2 vtrnr <2,6,3,7>, RHS
+ 2029473079U, // <6,5,7,u>: Cost 2 vtrnr <2,6,3,7>, RHS
+ 2131910656U, // <6,5,u,0>: Cost 2 ins <u,5,6,0>, lane 0
+ 1906764690U, // <6,5,u,1>: Cost 2 vzipr RHS, <4,0,5,1>
+ 2131927040U, // <6,5,u,2>: Cost 2 ins <u,5,6,2>, lane 0
+ 2122055681U, // <6,5,u,3>: Cost 2 ins <6,u,7,3>, lane 1
+ 2131943424U, // <6,5,u,4>: Cost 2 ins <u,5,6,4>, lane 0
+ 1906765018U, // <6,5,u,5>: Cost 2 vzipr RHS, <4,4,5,5>
+ 1906764290U, // <6,5,u,6>: Cost 2 vzipr RHS, <3,4,5,6>
+ 1058226176U, // <6,5,u,7>: Cost 1 ins RHS, lane 0
+ 1058226176U, // <6,5,u,u>: Cost 1 ins RHS, lane 0
+ 2047627362U, // <6,6,0,0>: Cost 2 vtrnr <5,6,7,0>, <5,6,7,0>
+ 1573191782U, // <6,6,0,1>: Cost 2 vext2 <4,u,6,6>, LHS
+ 1718026342U, // <6,6,0,2>: Cost 2 vuzpl <6,6,6,6>, LHS
+ 3195281409U, // <6,6,0,3>: Cost 3 ins <6,u,0,3>, lane 1
+ 2687209788U, // <6,6,0,4>: Cost 3 vext3 <0,4,2,6>, <6,0,4,2>
+ 3195297793U, // <6,6,0,5>: Cost 3 ins <6,u,0,5>, lane 1
+ 2120826882U, // <6,6,0,6>: Cost 2 ins <6,6,u,6>, lane 2
+ 2120835074U, // <6,6,0,7>: Cost 2 ins <6,6,u,7>, lane 2
+ 1573192349U, // <6,6,0,u>: Cost 2 vext2 <4,u,6,6>, LHS
+ 2646934262U, // <6,6,1,0>: Cost 3 vext2 <4,u,6,6>, <1,0,3,2>
+ 1906707760U, // <6,6,1,1>: Cost 2 vzipr <4,5,6,1>, <4,5,6,1>
+ 2646934422U, // <6,6,1,2>: Cost 3 vext2 <4,u,6,6>, <1,2,3,0>
+ 1773043814U, // <6,6,1,3>: Cost 2 vuzpr <4,6,4,6>, LHS
+ 3194068995U, // <6,6,1,4>: Cost 3 ins <6,6,1,u>, lane 3
+ 2646934672U, // <6,6,1,5>: Cost 3 vext2 <4,u,6,6>, <1,5,3,7>
+ 2120826882U, // <6,6,1,6>: Cost 2 ins <6,6,u,6>, lane 2
+ 2120835074U, // <6,6,1,7>: Cost 2 ins <6,6,u,7>, lane 2
+ 1773043819U, // <6,6,1,u>: Cost 2 vuzpr <4,6,4,6>, LHS
+ 3114896750U, // <6,6,2,0>: Cost 3 vtrnr <4,6,0,2>, <4,6,4,0>
+ 3195412481U, // <6,6,2,1>: Cost 3 ins <6,u,2,1>, lane 1
+ 2041154892U, // <6,6,2,2>: Cost 2 vtrnr <4,6,0,2>, <4,6,0,2>
+ 2120843269U, // <6,6,2,3>: Cost 2 ins <6,6,u,u>, lane 5
+ 3114897510U, // <6,6,2,4>: Cost 3 vtrnr <4,6,0,2>, <5,6,7,4>
+ 3195445249U, // <6,6,2,5>: Cost 3 ins <6,u,2,5>, lane 1
+ 2120826882U, // <6,6,2,6>: Cost 2 ins <6,6,u,6>, lane 2
+ 1908706614U, // <6,6,2,7>: Cost 2 vzipr <4,u,6,2>, RHS
+ 1908706615U, // <6,6,2,u>: Cost 2 vzipr <4,u,6,2>, RHS
+ 2646935702U, // <6,6,3,0>: Cost 3 vext2 <4,u,6,6>, <3,0,1,2>
+ 2846787238U, // <6,6,3,1>: Cost 3 vuzpr <4,6,4,6>, <2,3,0,1>
+ 3206111232U, // <6,6,3,2>: Cost 3 ins <u,6,3,2>, lane 0
+ 1880178826U, // <6,6,3,3>: Cost 2 vzipr <0,1,6,3>, <0,1,6,3>
+ 2705347122U, // <6,6,3,4>: Cost 3 vext3 <3,4,5,6>, <6,3,4,5>
+ 2846787278U, // <6,6,3,5>: Cost 3 vuzpr <4,6,4,6>, <2,3,4,5>
+ 2120826882U, // <6,6,3,6>: Cost 2 ins <6,6,u,6>, lane 2
+ 2132410368U, // <6,6,3,7>: Cost 2 ins <u,6,3,7>, lane 0
+ 2132410368U, // <6,6,3,u>: Cost 2 ins <u,6,3,7>, lane 0
+ 2846790288U, // <6,6,4,0>: Cost 3 vuzpr <4,6,4,6>, <6,4,6,0>
+ 3194527746U, // <6,6,4,1>: Cost 3 ins <6,6,u,1>, lane 2
+ 2846788778U, // <6,6,4,2>: Cost 3 vuzpr <4,6,4,6>, <4,4,0,2>
+ 3195576321U, // <6,6,4,3>: Cost 3 ins <6,u,4,3>, lane 1
+ 2047660134U, // <6,6,4,4>: Cost 2 vtrnr <5,6,7,4>, <5,6,7,4>
+ 1573195062U, // <6,6,4,5>: Cost 2 vext2 <4,u,6,6>, RHS
+ 1718029622U, // <6,6,4,6>: Cost 2 vuzpl <6,6,6,6>, RHS
+ 2120835074U, // <6,6,4,7>: Cost 2 ins <6,6,u,7>, lane 2
+ 1573195304U, // <6,6,4,u>: Cost 2 vext2 <4,u,6,6>, <4,u,6,6>
+ 3194363907U, // <6,6,5,0>: Cost 3 ins <6,6,5,u>, lane 3
+ 2646937296U, // <6,6,5,1>: Cost 3 vext2 <4,u,6,6>, <5,1,7,3>
+ 3206258688U, // <6,6,5,2>: Cost 3 ins <u,6,5,2>, lane 0
+ 3194544130U, // <6,6,5,3>: Cost 3 ins <6,6,u,3>, lane 2
+ 2646937542U, // <6,6,5,4>: Cost 3 vext2 <4,u,6,6>, <5,4,7,6>
+ 1906740532U, // <6,6,5,5>: Cost 2 vzipr <4,5,6,5>, <4,5,6,5>
+ 2120826882U, // <6,6,5,6>: Cost 2 ins <6,6,u,6>, lane 2
+ 1773047094U, // <6,6,5,7>: Cost 2 vuzpr <4,6,4,6>, RHS
+ 1773047095U, // <6,6,5,u>: Cost 2 vuzpr <4,6,4,6>, RHS
+ 1516699750U, // <6,6,6,0>: Cost 2 vext1 <6,6,6,6>, LHS
+ 2120695811U, // <6,6,6,1>: Cost 2 ins <6,6,6,u>, lane 3
+ 2120695811U, // <6,6,6,2>: Cost 2 ins <6,6,6,u>, lane 3
+ 2120695811U, // <6,6,6,3>: Cost 2 ins <6,6,6,u>, lane 3
+ 1516703030U, // <6,6,6,4>: Cost 2 vext1 <6,6,6,6>, RHS
+ 2120695811U, // <6,6,6,5>: Cost 2 ins <6,6,6,u>, lane 3
+ 296144182U, // <6,6,6,6>: Cost 1 vdup2 RHS
+ 1908739382U, // <6,6,6,7>: Cost 2 vzipr <4,u,6,6>, RHS
+ 296144182U, // <6,6,6,u>: Cost 1 vdup2 RHS
+ 2132647936U, // <6,6,7,0>: Cost 2 ins <u,6,7,0>, lane 0
+ 2120769539U, // <6,6,7,1>: Cost 2 ins <6,6,7,u>, lane 3
+ 1908747164U, // <6,6,7,2>: Cost 2 vzipr RHS, <4,0,6,2>
+ 2122055681U, // <6,6,7,3>: Cost 2 ins <6,u,7,3>, lane 1
+ 2132680704U, // <6,6,7,4>: Cost 2 ins <u,6,7,4>, lane 0
+ 2120769539U, // <6,6,7,5>: Cost 2 ins <6,6,7,u>, lane 3
+ 1906758456U, // <6,6,7,6>: Cost 2 vzipr RHS, <6,6,6,6>
+ 833015094U, // <6,6,7,7>: Cost 1 vzipr RHS, RHS
+ 833015095U, // <6,6,7,u>: Cost 1 vzipr RHS, RHS
+ 2047627362U, // <6,6,u,0>: Cost 2 vtrnr <5,6,7,0>, <5,6,7,0>
+ 1573197614U, // <6,6,u,1>: Cost 2 vext2 <4,u,6,6>, LHS
+ 1906764700U, // <6,6,u,2>: Cost 2 vzipr RHS, <4,0,6,2>
+ 1773044381U, // <6,6,u,3>: Cost 2 vuzpr <4,6,4,6>, LHS
+ 2047660134U, // <6,6,u,4>: Cost 2 vtrnr <5,6,7,4>, <5,6,7,4>
+ 1573197978U, // <6,6,u,5>: Cost 2 vext2 <4,u,6,6>, RHS
+ 296144182U, // <6,6,u,6>: Cost 1 vdup2 RHS
+ 833023286U, // <6,6,u,7>: Cost 1 vzipr RHS, RHS
+ 833023287U, // <6,6,u,u>: Cost 1 vzipr RHS, RHS
+ 1571209216U, // <6,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
+ 497467494U, // <6,7,0,1>: Cost 1 vext2 RHS, LHS
+ 1571209380U, // <6,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
+ 2120916995U, // <6,7,0,3>: Cost 2 ins <6,7,0,u>, lane 3
+ 1571209554U, // <6,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
+ 1510756450U, // <6,7,0,5>: Cost 2 vext1 <5,6,7,0>, <5,6,7,0>
+ 2120916995U, // <6,7,0,6>: Cost 2 ins <6,7,0,u>, lane 3
+ 2120916995U, // <6,7,0,7>: Cost 2 ins <6,7,0,u>, lane 3
+ 497468061U, // <6,7,0,u>: Cost 1 vext2 RHS, LHS
+ 1571209974U, // <6,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
+ 1571210036U, // <6,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
+ 1571210134U, // <6,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0>
+ 1761034342U, // <6,7,1,3>: Cost 2 vuzpr <2,6,3,7>, LHS
+ 2644952098U, // <6,7,1,4>: Cost 3 vext2 RHS, <1,4,0,5>
+ 1571210384U, // <6,7,1,5>: Cost 2 vext2 RHS, <1,5,3,7>
+ 2644952271U, // <6,7,1,6>: Cost 3 vext2 RHS, <1,6,1,7>
+ 2121498626U, // <6,7,1,7>: Cost 2 ins <6,7,u,7>, lane 2
+ 1761034347U, // <6,7,1,u>: Cost 2 vuzpr <2,6,3,7>, LHS
+ 2121064451U, // <6,7,2,0>: Cost 2 ins <6,7,2,u>, lane 3
+ 2121449474U, // <6,7,2,1>: Cost 2 ins <6,7,u,1>, lane 2
+ 1571210856U, // <6,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
+ 1059889156U, // <6,7,2,3>: Cost 1 ins LHS, lane 4
+ 2121064451U, // <6,7,2,4>: Cost 2 ins <6,7,2,u>, lane 3
+ 2121482242U, // <6,7,2,5>: Cost 2 ins <6,7,u,5>, lane 2
+ 1571211194U, // <6,7,2,6>: Cost 2 vext2 RHS, <2,6,3,7>
+ 2121498626U, // <6,7,2,7>: Cost 2 ins <6,7,u,7>, lane 2
+ 1059889156U, // <6,7,2,u>: Cost 1 ins LHS, lane 4
+ 1571211414U, // <6,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
+ 2121449474U, // <6,7,3,1>: Cost 2 ins <6,7,u,1>, lane 2
+ 2133696516U, // <6,7,3,2>: Cost 2 ins <u,u,3,2>, lane 4
+ 1571211676U, // <6,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
+ 1571211778U, // <6,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
+ 2121482242U, // <6,7,3,5>: Cost 2 ins <6,7,u,5>, lane 2
+ 2834777789U, // <6,7,3,6>: Cost 3 vuzpr <2,6,3,7>, <2,3,2,6>
+ 2133737476U, // <6,7,3,7>: Cost 2 ins <u,u,3,7>, lane 4
+ 1571212062U, // <6,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
+ 1573202834U, // <6,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
+ 2121449474U, // <6,7,4,1>: Cost 2 ins <6,7,u,1>, lane 2
+ 2121211907U, // <6,7,4,2>: Cost 2 ins <6,7,4,u>, lane 3
+ 2121211907U, // <6,7,4,3>: Cost 2 ins <6,7,4,u>, lane 3
+ 1571212496U, // <6,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
+ 497470774U, // <6,7,4,5>: Cost 1 vext2 RHS, RHS
+ 1573203276U, // <6,7,4,6>: Cost 2 vext2 RHS, <4,6,0,2>
+ 2121211907U, // <6,7,4,7>: Cost 2 ins <6,7,4,u>, lane 3
+ 497471017U, // <6,7,4,u>: Cost 1 vext2 RHS, RHS
+ 2644954696U, // <6,7,5,0>: Cost 3 vext2 RHS, <5,0,1,2>
+ 1573203664U, // <6,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+ 2644954878U, // <6,7,5,2>: Cost 3 vext2 RHS, <5,2,3,4>
+ 2121465858U, // <6,7,5,3>: Cost 2 ins <6,7,u,3>, lane 2
+ 1571213254U, // <6,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
+ 1571213316U, // <6,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
+ 1571213410U, // <6,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0>
+ 1761037622U, // <6,7,5,7>: Cost 2 vuzpr <2,6,3,7>, RHS
+ 1761037623U, // <6,7,5,u>: Cost 2 vuzpr <2,6,3,7>, RHS
+ 2121359363U, // <6,7,6,0>: Cost 2 ins <6,7,6,u>, lane 3
+ 2121449474U, // <6,7,6,1>: Cost 2 ins <6,7,u,1>, lane 2
+ 1573204474U, // <6,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+ 2121465858U, // <6,7,6,3>: Cost 2 ins <6,7,u,3>, lane 2
+ 2121359363U, // <6,7,6,4>: Cost 2 ins <6,7,6,u>, lane 3
+ 2121482242U, // <6,7,6,5>: Cost 2 ins <6,7,u,5>, lane 2
+ 1571214136U, // <6,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6>
+ 1060216836U, // <6,7,6,7>: Cost 1 ins RHS, lane 4
+ 1060216836U, // <6,7,6,u>: Cost 1 ins RHS, lane 4
+ 1906757730U, // <6,7,7,0>: Cost 2 vzipr RHS, <5,6,7,0>
+ 2121449474U, // <6,7,7,1>: Cost 2 ins <6,7,u,1>, lane 2
+ 2644956362U, // <6,7,7,2>: Cost 3 vext2 RHS, <7,2,6,3>
+ 1906758138U, // <6,7,7,3>: Cost 2 vzipr RHS, <6,2,7,3>
+ 1906757734U, // <6,7,7,4>: Cost 2 vzipr RHS, <5,6,7,4>
+ 2121482242U, // <6,7,7,5>: Cost 2 ins <6,7,u,5>, lane 2
+ 1906757574U, // <6,7,7,6>: Cost 2 vzipr RHS, <5,4,7,6>
+ 1571214956U, // <6,7,7,7>: Cost 2 vext2 RHS, <7,7,7,7>
+ 1906757738U, // <6,7,7,u>: Cost 2 vzipr RHS, <5,6,7,u>
+ 1571215059U, // <6,7,u,0>: Cost 2 vext2 RHS, <u,0,1,2>
+ 497473326U, // <6,7,u,1>: Cost 1 vext2 RHS, LHS
+ 1571215237U, // <6,7,u,2>: Cost 2 vext2 RHS, <u,2,3,0>
+ 1059889156U, // <6,7,u,3>: Cost 1 ins LHS, lane 4
+ 1571215423U, // <6,7,u,4>: Cost 2 vext2 RHS, <u,4,5,6>
+ 497473690U, // <6,7,u,5>: Cost 1 vext2 RHS, RHS
+ 1571215568U, // <6,7,u,6>: Cost 2 vext2 RHS, <u,6,3,7>
+ 1060216836U, // <6,7,u,7>: Cost 1 ins RHS, lane 4
+ 497473893U, // <6,7,u,u>: Cost 1 vext2 RHS, LHS
+ 1571217408U, // <6,u,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
+ 497475686U, // <6,u,0,1>: Cost 1 vext2 RHS, LHS
+ 1571217572U, // <6,u,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
+ 2047623837U, // <6,u,0,3>: Cost 2 vtrnr <5,6,7,0>, LHS
+ 1571217746U, // <6,u,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
+ 1510830187U, // <6,u,0,5>: Cost 2 vext1 <5,6,u,0>, <5,6,u,0>
+ 1986648218U, // <6,u,0,6>: Cost 2 vtrnl <6,7,0,1>, RHS
+ 2047626793U, // <6,u,0,7>: Cost 2 vtrnr <5,6,7,0>, RHS
+ 497476253U, // <6,u,0,u>: Cost 1 vext2 RHS, LHS
+ 1571218166U, // <6,u,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
+ 1571218228U, // <6,u,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
+ 1612289838U, // <6,u,1,2>: Cost 2 vext3 <0,2,4,6>, LHS
+ 1761042534U, // <6,u,1,3>: Cost 2 vuzpr <2,6,3,u>, LHS
+ 2566663478U, // <6,u,1,4>: Cost 3 vext1 <2,6,u,1>, RHS
+ 1571218576U, // <6,u,1,5>: Cost 2 vext2 RHS, <1,5,3,7>
+ 2120826882U, // <6,u,1,6>: Cost 2 ins <6,6,u,6>, lane 2
+ 2120835074U, // <6,u,1,7>: Cost 2 ins <6,6,u,7>, lane 2
+ 1612289892U, // <6,u,1,u>: Cost 2 vext3 <0,2,4,6>, LHS
+ 1504870502U, // <6,u,2,0>: Cost 2 vext1 <4,6,u,2>, LHS
+ 1849644846U, // <6,u,2,1>: Cost 2 vzipl <6,2,7,3>, LHS
+ 1571219048U, // <6,u,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
+ 1055244288U, // <6,u,2,3>: Cost 1 ins LHS, lane 0
+ 1504873876U, // <6,u,2,4>: Cost 2 vext1 <4,6,u,2>, <4,6,u,2>
+ 1849645210U, // <6,u,2,5>: Cost 2 vzipl <6,2,7,3>, RHS
+ 1571219386U, // <6,u,2,6>: Cost 2 vext2 RHS, <2,6,3,7>
+ 2041155113U, // <6,u,2,7>: Cost 2 vtrnr <4,6,0,2>, RHS
+ 1055244288U, // <6,u,2,u>: Cost 1 ins LHS, lane 0
+ 1571219606U, // <6,u,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
+ 2121449474U, // <6,u,3,1>: Cost 2 ins <6,7,u,1>, lane 2
+ 2128388096U, // <6,u,3,2>: Cost 2 ins <u,0,3,2>, lane 0
+ 1571219868U, // <6,u,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
+ 1571219970U, // <6,u,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
+ 2121482242U, // <6,u,3,5>: Cost 2 ins <6,7,u,5>, lane 2
+ 2120826882U, // <6,u,3,6>: Cost 2 ins <6,6,u,6>, lane 2
+ 2131746816U, // <6,u,3,7>: Cost 2 ins <u,5,3,7>, lane 0
+ 1571220254U, // <6,u,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
+ 1571220370U, // <6,u,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
+ 2121449474U, // <6,u,4,1>: Cost 2 ins <6,7,u,1>, lane 2
+ 1986975534U, // <6,u,4,2>: Cost 2 vtrnl <6,7,4,5>, LHS
+ 2047656605U, // <6,u,4,3>: Cost 2 vtrnr <5,6,7,4>, LHS
+ 1571220688U, // <6,u,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
+ 497478967U, // <6,u,4,5>: Cost 1 vext2 RHS, RHS
+ 1571220812U, // <6,u,4,6>: Cost 2 vext2 RHS, <4,6,0,2>
+ 2047659561U, // <6,u,4,7>: Cost 2 vtrnr <5,6,7,4>, RHS
+ 497479209U, // <6,u,4,u>: Cost 1 vext2 RHS, RHS
+ 2566692966U, // <6,u,5,0>: Cost 3 vext1 <2,6,u,5>, LHS
+ 1571221200U, // <6,u,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+ 2566694885U, // <6,u,5,2>: Cost 3 vext1 <2,6,u,5>, <2,6,u,5>
+ 2118148098U, // <6,u,5,3>: Cost 2 ins <6,2,u,3>, lane 2
+ 1571221446U, // <6,u,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
+ 1571221508U, // <6,u,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
+ 1612290202U, // <6,u,5,6>: Cost 2 vext3 <0,2,4,6>, RHS
+ 1761045814U, // <6,u,5,7>: Cost 2 vuzpr <2,6,3,u>, RHS
+ 1612290220U, // <6,u,5,u>: Cost 2 vext3 <0,2,4,6>, RHS
+ 1504903270U, // <6,u,6,0>: Cost 2 vext1 <4,6,u,6>, LHS
+ 1852249902U, // <6,u,6,1>: Cost 2 vzipl <6,6,6,6>, LHS
+ 1571222010U, // <6,u,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+ 2041479837U, // <6,u,6,3>: Cost 2 vtrnr <4,6,4,6>, LHS
+ 1504906648U, // <6,u,6,4>: Cost 2 vext1 <4,6,u,6>, <4,6,u,6>
+ 1852250266U, // <6,u,6,5>: Cost 2 vzipl <6,6,6,6>, RHS
+ 296144182U, // <6,u,6,6>: Cost 1 vdup2 RHS
+ 1058226176U, // <6,u,6,7>: Cost 1 ins RHS, lane 0
+ 1058226176U, // <6,u,6,u>: Cost 1 ins RHS, lane 0
+ 1492967526U, // <6,u,7,0>: Cost 2 vext1 <2,6,u,7>, LHS
+ 1906753609U, // <6,u,7,1>: Cost 2 vzipr RHS, <0,0,u,1>
+ 1492969447U, // <6,u,7,2>: Cost 2 vext1 <2,6,u,7>, <2,6,u,7>
+ 833011868U, // <6,u,7,3>: Cost 1 vzipr RHS, LHS
+ 1492970806U, // <6,u,7,4>: Cost 2 vext1 <2,6,u,7>, RHS
+ 1906753937U, // <6,u,7,5>: Cost 2 vzipr RHS, <0,4,u,5>
+ 1906753776U, // <6,u,7,6>: Cost 2 vzipr RHS, <0,2,u,6>
+ 833015112U, // <6,u,7,7>: Cost 1 vzipr RHS, RHS
+ 833011873U, // <6,u,7,u>: Cost 1 vzipr RHS, LHS
+ 1492975718U, // <6,u,u,0>: Cost 2 vext1 <2,6,u,u>, LHS
+ 497481518U, // <6,u,u,1>: Cost 1 vext2 RHS, LHS
+ 1612290405U, // <6,u,u,2>: Cost 2 vext3 <0,2,4,6>, LHS
+ 833020060U, // <6,u,u,3>: Cost 1 vzipr RHS, LHS
+ 1492978998U, // <6,u,u,4>: Cost 2 vext1 <2,6,u,u>, RHS
+ 497481882U, // <6,u,u,5>: Cost 1 vext2 RHS, RHS
+ 296144182U, // <6,u,u,6>: Cost 1 vdup2 RHS
+ 833023304U, // <6,u,u,7>: Cost 1 vzipr RHS, RHS
+ 497482085U, // <6,u,u,u>: Cost 1 vext2 RHS, LHS
+ 1638318080U, // <7,0,0,0>: Cost 2 vext3 RHS, <0,0,0,0>
+ 1638318090U, // <7,0,0,1>: Cost 2 vext3 RHS, <0,0,1,1>
+ 1638318100U, // <7,0,0,2>: Cost 2 vext3 RHS, <0,0,2,2>
+ 2987152532U, // <7,0,0,3>: Cost 3 vzipr <5,6,7,0>, <7,2,0,3>
+ 2712059941U, // <7,0,0,4>: Cost 3 vext3 RHS, <0,0,4,1>
+ 2987152210U, // <7,0,0,5>: Cost 3 vzipr <5,6,7,0>, <6,7,0,5>
+ 2590618445U, // <7,0,0,6>: Cost 3 vext1 <6,7,0,0>, <6,7,0,0>
+ 2987152050U, // <7,0,0,7>: Cost 3 vzipr <5,6,7,0>, <6,5,0,7>
+ 1638318153U, // <7,0,0,u>: Cost 2 vext3 RHS, <0,0,u,1>
+ 1516879974U, // <7,0,1,0>: Cost 2 vext1 <6,7,0,1>, LHS
+ 2128232448U, // <7,0,1,1>: Cost 2 ins <u,0,1,1>, lane 0
+ 564576358U, // <7,0,1,2>: Cost 1 vext3 RHS, LHS
+ 2122317827U, // <7,0,1,3>: Cost 2 ins <7,0,1,u>, lane 3
+ 1516883254U, // <7,0,1,4>: Cost 2 vext1 <6,7,0,1>, RHS
+ 2122317827U, // <7,0,1,5>: Cost 2 ins <7,0,1,u>, lane 3
+ 1516884814U, // <7,0,1,6>: Cost 2 vext1 <6,7,0,1>, <6,7,0,1>
+ 2122317827U, // <7,0,1,7>: Cost 2 ins <7,0,1,u>, lane 3
+ 564576412U, // <7,0,1,u>: Cost 1 vext3 RHS, LHS
+ 1638318244U, // <7,0,2,0>: Cost 2 vext3 RHS, <0,2,0,2>
+ 2692743344U, // <7,0,2,1>: Cost 3 vext3 <1,3,5,7>, <0,2,1,5>
+ 2128314368U, // <7,0,2,2>: Cost 2 ins <u,0,2,2>, lane 0
+ 2122833925U, // <7,0,2,3>: Cost 2 ins <7,0,u,u>, lane 5
+ 1638318284U, // <7,0,2,4>: Cost 2 vext3 RHS, <0,2,4,6>
+ 2712060118U, // <7,0,2,5>: Cost 3 vext3 RHS, <0,2,5,7>
+ 2712060126U, // <7,0,2,6>: Cost 3 vext3 RHS, <0,2,6,6>
+ 3201433601U, // <7,0,2,7>: Cost 3 ins <7,u,2,7>, lane 1
+ 1638318316U, // <7,0,2,u>: Cost 2 vext3 RHS, <0,2,u,2>
+ 2983854080U, // <7,0,3,0>: Cost 3 vzipr <5,1,7,3>, <0,0,0,0>
+ 2712060156U, // <7,0,3,1>: Cost 3 vext3 RHS, <0,3,1,0>
+ 2128388096U, // <7,0,3,2>: Cost 2 ins <u,0,3,2>, lane 0
+ 2651605404U, // <7,0,3,3>: Cost 3 vext2 <5,6,7,0>, <3,3,3,3>
+ 2651605506U, // <7,0,3,4>: Cost 3 vext2 <5,6,7,0>, <3,4,5,6>
+ 2638998111U, // <7,0,3,5>: Cost 3 vext2 <3,5,7,0>, <3,5,7,0>
+ 3196559362U, // <7,0,3,6>: Cost 3 ins <7,0,u,6>, lane 2
+ 3201507329U, // <7,0,3,7>: Cost 3 ins <7,u,3,7>, lane 1
+ 2128388096U, // <7,0,3,u>: Cost 2 ins <u,0,3,2>, lane 0
+ 2712060230U, // <7,0,4,0>: Cost 3 vext3 RHS, <0,4,0,2>
+ 1638318418U, // <7,0,4,1>: Cost 2 vext3 RHS, <0,4,1,5>
+ 1638318428U, // <7,0,4,2>: Cost 2 vext3 RHS, <0,4,2,6>
+ 3201548289U, // <7,0,4,3>: Cost 3 ins <7,u,4,3>, lane 1
+ 2712060269U, // <7,0,4,4>: Cost 3 vext3 RHS, <0,4,4,5>
+ 1577864502U, // <7,0,4,5>: Cost 2 vext2 <5,6,7,0>, RHS
+ 2651606348U, // <7,0,4,6>: Cost 3 vext2 <5,6,7,0>, <4,6,0,2>
+ 3201581057U, // <7,0,4,7>: Cost 3 ins <7,u,4,7>, lane 1
+ 1638318481U, // <7,0,4,u>: Cost 2 vext3 RHS, <0,4,u,5>
+ 2647625340U, // <7,0,5,0>: Cost 3 vext2 <5,0,7,0>, <5,0,7,0>
+ 2128527360U, // <7,0,5,1>: Cost 2 ins <u,0,5,1>, lane 0
+ 1991032934U, // <7,0,5,2>: Cost 2 vtrnl <7,4,5,6>, LHS
+ 2649616239U, // <7,0,5,3>: Cost 3 vext2 <5,3,7,0>, <5,3,7,0>
+ 2651606982U, // <7,0,5,4>: Cost 3 vext2 <5,6,7,0>, <5,4,7,6>
+ 2651607044U, // <7,0,5,5>: Cost 3 vext2 <5,6,7,0>, <5,5,5,5>
+ 1577865314U, // <7,0,5,6>: Cost 2 vext2 <5,6,7,0>, <5,6,7,0>
+ 2847477046U, // <7,0,5,7>: Cost 3 vuzpr <4,7,5,0>, RHS
+ 1579192580U, // <7,0,5,u>: Cost 2 vext2 <5,u,7,0>, <5,u,7,0>
+ 2985869312U, // <7,0,6,0>: Cost 3 vzipr <5,4,7,6>, <0,0,0,0>
+ 2712060406U, // <7,0,6,1>: Cost 3 vext3 RHS, <0,6,1,7>
+ 2128609280U, // <7,0,6,2>: Cost 2 ins <u,0,6,2>, lane 0
+ 2655588936U, // <7,0,6,3>: Cost 3 vext2 <6,3,7,0>, <6,3,7,0>
+ 3202367488U, // <7,0,6,4>: Cost 3 ins <u,0,6,4>, lane 0
+ 2651607730U, // <7,0,6,5>: Cost 3 vext2 <5,6,7,0>, <6,5,0,7>
+ 2651607864U, // <7,0,6,6>: Cost 3 vext2 <5,6,7,0>, <6,6,6,6>
+ 2122833925U, // <7,0,6,7>: Cost 2 ins <7,0,u,u>, lane 5
+ 2128609280U, // <7,0,6,u>: Cost 2 ins <u,0,6,2>, lane 0
+ 2847477192U, // <7,0,7,0>: Cost 3 vuzpr <4,7,5,0>, <4,7,5,0>
+ 1858961510U, // <7,0,7,1>: Cost 2 vzipl <7,7,7,7>, LHS
+ 1993179238U, // <7,0,7,2>: Cost 2 vtrnl <7,7,7,7>, LHS
+ 3201769473U, // <7,0,7,3>: Cost 3 ins <7,u,7,3>, lane 1
+ 2651608422U, // <7,0,7,4>: Cost 3 vext2 <5,6,7,0>, <7,4,5,6>
+ 2651608513U, // <7,0,7,5>: Cost 3 vext2 <5,6,7,0>, <7,5,6,7>
+ 2663552532U, // <7,0,7,6>: Cost 3 vext2 <7,6,7,0>, <7,6,7,0>
+ 2128060417U, // <7,0,7,7>: Cost 2 ins <7,u,7,7>, lane 1
+ 1858962077U, // <7,0,7,u>: Cost 2 vzipl <7,7,7,7>, LHS
+ 1638318730U, // <7,0,u,0>: Cost 2 vext3 RHS, <0,u,0,2>
+ 1638318738U, // <7,0,u,1>: Cost 2 vext3 RHS, <0,u,1,1>
+ 564576925U, // <7,0,u,2>: Cost 1 vext3 RHS, LHS
+ 2122317827U, // <7,0,u,3>: Cost 2 ins <7,0,1,u>, lane 3
+ 1638318770U, // <7,0,u,4>: Cost 2 vext3 RHS, <0,u,4,6>
+ 1577867418U, // <7,0,u,5>: Cost 2 vext2 <5,6,7,0>, RHS
+ 1516942165U, // <7,0,u,6>: Cost 2 vext1 <6,7,0,u>, <6,7,0,u>
+ 2122317827U, // <7,0,u,7>: Cost 2 ins <7,0,1,u>, lane 3
+ 564576979U, // <7,0,u,u>: Cost 1 vext3 RHS, LHS
+ 2712060634U, // <7,1,0,0>: Cost 3 vext3 RHS, <1,0,0,1>
+ 2128822272U, // <7,1,0,1>: Cost 2 ins <u,1,0,1>, lane 0
+ 1719615590U, // <7,1,0,2>: Cost 2 vuzpl <7,0,1,2>, LHS
+ 1638318838U, // <7,1,0,3>: Cost 2 vext3 RHS, <1,0,3,2>
+ 2859062268U, // <7,1,0,4>: Cost 3 vuzpr <6,7,0,1>, <7,0,1,4>
+ 2712060679U, // <7,1,0,5>: Cost 3 vext3 RHS, <1,0,5,1>
+ 2859061568U, // <7,1,0,6>: Cost 3 vuzpr <6,7,0,1>, <6,0,4,6>
+ 3201286145U, // <7,1,0,7>: Cost 3 ins <7,u,0,7>, lane 1
+ 1638318883U, // <7,1,0,u>: Cost 2 vext3 RHS, <1,0,u,2>
+ 2712060714U, // <7,1,1,0>: Cost 3 vext3 RHS, <1,1,0,0>
+ 1638318900U, // <7,1,1,1>: Cost 2 vext3 RHS, <1,1,1,1>
+ 2127577089U, // <7,1,1,2>: Cost 2 ins <7,u,1,2>, lane 1
+ 1638318920U, // <7,1,1,3>: Cost 2 vext3 RHS, <1,1,3,3>
+ 2712060755U, // <7,1,1,4>: Cost 3 vext3 RHS, <1,1,4,5>
+ 2691416926U, // <7,1,1,5>: Cost 3 vext3 <1,1,5,7>, <1,1,5,7>
+ 2590700375U, // <7,1,1,6>: Cost 3 vext1 <6,7,1,1>, <6,7,1,1>
+ 2859057294U, // <7,1,1,7>: Cost 3 vuzpr <6,7,0,1>, <0,1,6,7>
+ 1638318965U, // <7,1,1,u>: Cost 2 vext3 RHS, <1,1,u,3>
+ 2128961536U, // <7,1,2,0>: Cost 2 ins <u,1,2,0>, lane 0
+ 2128969728U, // <7,1,2,1>: Cost 2 ins <u,1,2,1>, lane 0
+ 2128977920U, // <7,1,2,2>: Cost 2 ins <u,1,2,2>, lane 0
+ 1055244288U, // <7,1,2,3>: Cost 1 ins LHS, lane 0
+ 2128994304U, // <7,1,2,4>: Cost 2 ins <u,1,2,4>, lane 0
+ 2129002496U, // <7,1,2,5>: Cost 2 ins <u,1,2,5>, lane 0
+ 2129010688U, // <7,1,2,6>: Cost 2 ins <u,1,2,6>, lane 0
+ 2129018880U, // <7,1,2,7>: Cost 2 ins <u,1,2,7>, lane 0
+ 1055244288U, // <7,1,2,u>: Cost 1 ins LHS, lane 0
+ 1510998118U, // <7,1,3,0>: Cost 2 vext1 <5,7,1,3>, LHS
+ 1638319064U, // <7,1,3,1>: Cost 2 vext3 RHS, <1,3,1,3>
+ 2712060894U, // <7,1,3,2>: Cost 3 vext3 RHS, <1,3,2,0>
+ 2047869030U, // <7,1,3,3>: Cost 2 vtrnr <5,7,1,3>, LHS
+ 1511001398U, // <7,1,3,4>: Cost 2 vext1 <5,7,1,3>, RHS
+ 1619002368U, // <7,1,3,5>: Cost 2 vext3 <1,3,5,7>, <1,3,5,7>
+ 2692817929U, // <7,1,3,6>: Cost 3 vext3 <1,3,6,7>, <1,3,6,7>
+ 2983859604U, // <7,1,3,7>: Cost 3 vzipr <5,1,7,3>, <7,5,1,7>
+ 1619223579U, // <7,1,3,u>: Cost 2 vext3 <1,3,u,7>, <1,3,u,7>
+ 2712060962U, // <7,1,4,0>: Cost 3 vext3 RHS, <1,4,0,5>
+ 2712060971U, // <7,1,4,1>: Cost 3 vext3 RHS, <1,4,1,5>
+ 2712060980U, // <7,1,4,2>: Cost 3 vext3 RHS, <1,4,2,5>
+ 2129133568U, // <7,1,4,3>: Cost 2 ins <u,1,4,3>, lane 0
+ 2859060432U, // <7,1,4,4>: Cost 3 vuzpr <6,7,0,1>, <4,4,4,4>
+ 2129149952U, // <7,1,4,5>: Cost 2 ins <u,1,4,5>, lane 0
+ 1719618870U, // <7,1,4,6>: Cost 2 vuzpl <7,0,1,2>, RHS
+ 2793360778U, // <7,1,4,7>: Cost 3 vuzpl <7,0,1,2>, <4,6,7,1>
+ 1719618888U, // <7,1,4,u>: Cost 2 vuzpl <7,0,1,2>, RHS
+ 2572812390U, // <7,1,5,0>: Cost 3 vext1 <3,7,1,5>, LHS
+ 2693776510U, // <7,1,5,1>: Cost 3 vext3 <1,5,1,7>, <1,5,1,7>
+ 3202940928U, // <7,1,5,2>: Cost 3 ins <u,1,5,2>, lane 0
+ 1620182160U, // <7,1,5,3>: Cost 2 vext3 <1,5,3,7>, <1,5,3,7>
+ 2572815670U, // <7,1,5,4>: Cost 3 vext1 <3,7,1,5>, RHS
+ 2985861458U, // <7,1,5,5>: Cost 3 vzipr <5,4,7,5>, <0,4,1,5>
+ 2127904769U, // <7,1,5,6>: Cost 2 ins <7,u,5,6>, lane 1
+ 1785318710U, // <7,1,5,7>: Cost 2 vuzpr <6,7,0,1>, RHS
+ 1620550845U, // <7,1,5,u>: Cost 2 vext3 <1,5,u,7>, <1,5,u,7>
+ 2653606230U, // <7,1,6,0>: Cost 3 vext2 <6,0,7,1>, <6,0,7,1>
+ 2694440143U, // <7,1,6,1>: Cost 3 vext3 <1,6,1,7>, <1,6,1,7>
+ 2712061144U, // <7,1,6,2>: Cost 3 vext3 RHS, <1,6,2,7>
+ 2129281024U, // <7,1,6,3>: Cost 2 ins <u,1,6,3>, lane 0
+ 2859061350U, // <7,1,6,4>: Cost 3 vuzpr <6,7,0,1>, <5,6,7,4>
+ 2694735091U, // <7,1,6,5>: Cost 3 vext3 <1,6,5,7>, <1,6,5,7>
+ 2859060596U, // <7,1,6,6>: Cost 3 vuzpr <6,7,0,1>, <4,6,4,6>
+ 2129313792U, // <7,1,6,7>: Cost 2 ins <u,1,6,7>, lane 0
+ 2129281024U, // <7,1,6,u>: Cost 2 ins <u,1,6,3>, lane 0
+ 2645644282U, // <7,1,7,0>: Cost 3 vext2 <4,6,7,1>, <7,0,1,2>
+ 1785320270U, // <7,1,7,1>: Cost 2 vuzpr <6,7,0,1>, <6,7,0,1>
+ 2986543254U, // <7,1,7,2>: Cost 3 vzipr <5,5,7,7>, <3,0,1,2>
+ 2048196710U, // <7,1,7,3>: Cost 2 vtrnr <5,7,5,7>, LHS
+ 2793362538U, // <7,1,7,4>: Cost 3 vuzpl <7,0,1,2>, <7,1,4,6>
+ 2986541394U, // <7,1,7,5>: Cost 3 vzipr <5,5,7,7>, <0,4,1,5>
+ 3201794049U, // <7,1,7,6>: Cost 3 ins <7,u,7,6>, lane 1
+ 2128060417U, // <7,1,7,7>: Cost 2 ins <7,u,7,7>, lane 1
+ 2048196715U, // <7,1,7,u>: Cost 2 vtrnr <5,7,5,7>, LHS
+ 1511039078U, // <7,1,u,0>: Cost 2 vext1 <5,7,1,u>, LHS
+ 1638319469U, // <7,1,u,1>: Cost 2 vext3 RHS, <1,u,1,3>
+ 1719621422U, // <7,1,u,2>: Cost 2 vuzpl <7,0,1,2>, LHS
+ 1055244288U, // <7,1,u,3>: Cost 1 ins LHS, lane 0
+ 1511042358U, // <7,1,u,4>: Cost 2 vext1 <5,7,1,u>, RHS
+ 1622320533U, // <7,1,u,5>: Cost 2 vext3 <1,u,5,7>, <1,u,5,7>
+ 1719621786U, // <7,1,u,6>: Cost 2 vuzpl <7,0,1,2>, RHS
+ 1785318953U, // <7,1,u,7>: Cost 2 vuzpr <6,7,0,1>, RHS
+ 1055244288U, // <7,1,u,u>: Cost 1 ins LHS, lane 0
+ 2712061364U, // <7,2,0,0>: Cost 3 vext3 RHS, <2,0,0,2>
+ 2712061373U, // <7,2,0,1>: Cost 3 vext3 RHS, <2,0,1,2>
+ 2129494016U, // <7,2,0,2>: Cost 2 ins <u,2,0,2>, lane 0
+ 1913405542U, // <7,2,0,3>: Cost 2 vzipr <5,6,7,0>, LHS
+ 2712061400U, // <7,2,0,4>: Cost 3 vext3 RHS, <2,0,4,2>
+ 2696725990U, // <7,2,0,5>: Cost 3 vext3 <2,0,5,7>, <2,0,5,7>
+ 2712061417U, // <7,2,0,6>: Cost 3 vext3 RHS, <2,0,6,1>
+ 2927577066U, // <7,2,0,7>: Cost 3 vzipl <7,0,1,2>, <2,7,0,1>
+ 1913405547U, // <7,2,0,u>: Cost 2 vzipr <5,6,7,0>, LHS
+ 2712061446U, // <7,2,1,0>: Cost 3 vext3 RHS, <2,1,0,3>
+ 3203301376U, // <7,2,1,1>: Cost 3 ins <u,2,1,1>, lane 0
+ 2127577089U, // <7,2,1,2>: Cost 2 ins <7,u,1,2>, lane 1
+ 2974548070U, // <7,2,1,3>: Cost 3 vzipr <3,5,7,1>, LHS
+ 2712061482U, // <7,2,1,4>: Cost 3 vext3 RHS, <2,1,4,3>
+ 3203334144U, // <7,2,1,5>: Cost 3 ins <u,2,1,5>, lane 0
+ 2712061500U, // <7,2,1,6>: Cost 3 vext3 RHS, <2,1,6,3>
+ 2602718850U, // <7,2,1,7>: Cost 3 vext1 <u,7,2,1>, <7,u,1,2>
+ 2127577089U, // <7,2,1,u>: Cost 2 ins <7,u,1,2>, lane 1
+ 2712061524U, // <7,2,2,0>: Cost 3 vext3 RHS, <2,2,0,0>
+ 2712061536U, // <7,2,2,1>: Cost 3 vext3 RHS, <2,2,1,3>
+ 1638319720U, // <7,2,2,2>: Cost 2 vext3 RHS, <2,2,2,2>
+ 1638319730U, // <7,2,2,3>: Cost 2 vext3 RHS, <2,2,3,3>
+ 2712061564U, // <7,2,2,4>: Cost 3 vext3 RHS, <2,2,4,4>
+ 2698053256U, // <7,2,2,5>: Cost 3 vext3 <2,2,5,7>, <2,2,5,7>
+ 2712061581U, // <7,2,2,6>: Cost 3 vext3 RHS, <2,2,6,3>
+ 3201433601U, // <7,2,2,7>: Cost 3 ins <7,u,2,7>, lane 1
+ 1638319775U, // <7,2,2,u>: Cost 2 vext3 RHS, <2,2,u,3>
+ 1638319782U, // <7,2,3,0>: Cost 2 vext3 RHS, <2,3,0,1>
+ 2693924531U, // <7,2,3,1>: Cost 3 vext3 <1,5,3,7>, <2,3,1,5>
+ 1638319802U, // <7,2,3,2>: Cost 2 vext3 RHS, <2,3,2,3>
+ 1910112358U, // <7,2,3,3>: Cost 2 vzipr <5,1,7,3>, LHS
+ 1638319822U, // <7,2,3,4>: Cost 2 vext3 RHS, <2,3,4,5>
+ 2698716889U, // <7,2,3,5>: Cost 3 vext3 <2,3,5,7>, <2,3,5,7>
+ 1625048802U, // <7,2,3,6>: Cost 2 vext3 <2,3,6,7>, <2,3,6,7>
+ 2990495214U, // <7,2,3,7>: Cost 3 vzipr <6,2,7,3>, <7,6,2,7>
+ 1638319854U, // <7,2,3,u>: Cost 2 vext3 RHS, <2,3,u,1>
+ 2712061688U, // <7,2,4,0>: Cost 3 vext3 RHS, <2,4,0,2>
+ 2712061698U, // <7,2,4,1>: Cost 3 vext3 RHS, <2,4,1,3>
+ 2712061708U, // <7,2,4,2>: Cost 3 vext3 RHS, <2,4,2,4>
+ 1913438310U, // <7,2,4,3>: Cost 2 vzipr <5,6,7,4>, LHS
+ 2712061728U, // <7,2,4,4>: Cost 3 vext3 RHS, <2,4,4,6>
+ 2699380522U, // <7,2,4,5>: Cost 3 vext3 <2,4,5,7>, <2,4,5,7>
+ 2129821696U, // <7,2,4,6>: Cost 2 ins <u,2,4,6>, lane 0
+ 3201581057U, // <7,2,4,7>: Cost 3 ins <7,u,4,7>, lane 1
+ 1913438315U, // <7,2,4,u>: Cost 2 vzipr <5,6,7,4>, LHS
+ 2699675470U, // <7,2,5,0>: Cost 3 vext3 <2,5,0,7>, <2,5,0,7>
+ 3203596288U, // <7,2,5,1>: Cost 3 ins <u,2,5,1>, lane 0
+ 2699822944U, // <7,2,5,2>: Cost 3 vext3 <2,5,2,7>, <2,5,2,7>
+ 2692745065U, // <7,2,5,3>: Cost 3 vext3 <1,3,5,7>, <2,5,3,7>
+ 2699970418U, // <7,2,5,4>: Cost 3 vext3 <2,5,4,7>, <2,5,4,7>
+ 3203629056U, // <7,2,5,5>: Cost 3 ins <u,2,5,5>, lane 0
+ 2127904769U, // <7,2,5,6>: Cost 2 ins <7,u,5,6>, lane 1
+ 2853096758U, // <7,2,5,7>: Cost 3 vuzpr <5,7,0,2>, RHS
+ 2127904769U, // <7,2,5,u>: Cost 2 ins <7,u,5,6>, lane 1
+ 2572894310U, // <7,2,6,0>: Cost 3 vext1 <3,7,2,6>, LHS
+ 2712061860U, // <7,2,6,1>: Cost 3 vext3 RHS, <2,6,1,3>
+ 2700486577U, // <7,2,6,2>: Cost 3 vext3 <2,6,2,7>, <2,6,2,7>
+ 1626818490U, // <7,2,6,3>: Cost 2 vext3 <2,6,3,7>, <2,6,3,7>
+ 2572897590U, // <7,2,6,4>: Cost 3 vext1 <3,7,2,6>, RHS
+ 2700707788U, // <7,2,6,5>: Cost 3 vext3 <2,6,5,7>, <2,6,5,7>
+ 2700781525U, // <7,2,6,6>: Cost 3 vext3 <2,6,6,7>, <2,6,6,7>
+ 2129977344U, // <7,2,6,7>: Cost 2 ins <u,2,6,7>, lane 0
+ 1627187175U, // <7,2,6,u>: Cost 2 vext3 <2,6,u,7>, <2,6,u,7>
+ 3121939350U, // <7,2,7,0>: Cost 3 vtrnr <5,7,5,7>, <1,2,3,0>
+ 3203743744U, // <7,2,7,1>: Cost 3 ins <u,2,7,1>, lane 0
+ 1720366165U, // <7,2,7,2>: Cost 2 vuzpl <7,1,2,3>, <7,1,2,3>
+ 1912799334U, // <7,2,7,3>: Cost 2 vzipr <5,5,7,7>, LHS
+ 3121939354U, // <7,2,7,4>: Cost 3 vtrnr <5,7,5,7>, <1,2,3,4>
+ 3203776512U, // <7,2,7,5>: Cost 3 ins <u,2,7,5>, lane 0
+ 2986541404U, // <7,2,7,6>: Cost 3 vzipr <5,5,7,7>, <0,4,2,6>
+ 2128060417U, // <7,2,7,7>: Cost 2 ins <7,u,7,7>, lane 1
+ 1912799339U, // <7,2,7,u>: Cost 2 vzipr <5,5,7,7>, LHS
+ 1638320187U, // <7,2,u,0>: Cost 2 vext3 RHS, <2,u,0,1>
+ 2693924936U, // <7,2,u,1>: Cost 3 vext3 <1,5,3,7>, <2,u,1,5>
+ 2129494016U, // <7,2,u,2>: Cost 2 ins <u,2,0,2>, lane 0
+ 1628145756U, // <7,2,u,3>: Cost 2 vext3 <2,u,3,7>, <2,u,3,7>
+ 1638320227U, // <7,2,u,4>: Cost 2 vext3 RHS, <2,u,4,5>
+ 2702035054U, // <7,2,u,5>: Cost 3 vext3 <2,u,5,7>, <2,u,5,7>
+ 2129821696U, // <7,2,u,6>: Cost 2 ins <u,2,4,6>, lane 0
+ 2129977344U, // <7,2,u,7>: Cost 2 ins <u,2,6,7>, lane 0
+ 1628514441U, // <7,2,u,u>: Cost 2 vext3 <2,u,u,7>, <2,u,u,7>
+ 2712062091U, // <7,3,0,0>: Cost 3 vext3 RHS, <3,0,0,0>
+ 1638320278U, // <7,3,0,1>: Cost 2 vext3 RHS, <3,0,1,2>
+ 2712062109U, // <7,3,0,2>: Cost 3 vext3 RHS, <3,0,2,0>
+ 2712062119U, // <7,3,0,3>: Cost 3 vext3 RHS, <3,0,3,1>
+ 2712062128U, // <7,3,0,4>: Cost 3 vext3 RHS, <3,0,4,1>
+ 2712062138U, // <7,3,0,5>: Cost 3 vext3 RHS, <3,0,5,2>
+ 2590839656U, // <7,3,0,6>: Cost 3 vext1 <6,7,3,0>, <6,7,3,0>
+ 2985157776U, // <7,3,0,7>: Cost 3 vzipr <5,3,7,0>, <1,5,3,7>
+ 1638320341U, // <7,3,0,u>: Cost 2 vext3 RHS, <3,0,u,2>
+ 2237164227U, // <7,3,1,0>: Cost 3 vrev <3,7,0,1>
+ 2712062182U, // <7,3,1,1>: Cost 3 vext3 RHS, <3,1,1,1>
+ 2127577089U, // <7,3,1,2>: Cost 2 ins <7,u,1,2>, lane 1
+ 1779433574U, // <7,3,1,3>: Cost 2 vuzpr <5,7,1,3>, LHS
+ 2712062214U, // <7,3,1,4>: Cost 3 vext3 RHS, <3,1,4,6>
+ 2693925132U, // <7,3,1,5>: Cost 3 vext3 <1,5,3,7>, <3,1,5,3>
+ 2853179064U, // <7,3,1,6>: Cost 3 vuzpr <5,7,1,3>, <5,1,4,6>
+ 2692745504U, // <7,3,1,7>: Cost 3 vext3 <1,3,5,7>, <3,1,7,5>
+ 1779433579U, // <7,3,1,u>: Cost 2 vuzpr <5,7,1,3>, LHS
+ 2712062254U, // <7,3,2,0>: Cost 3 vext3 RHS, <3,2,0,1>
+ 2712062262U, // <7,3,2,1>: Cost 3 vext3 RHS, <3,2,1,0>
+ 2712062273U, // <7,3,2,2>: Cost 3 vext3 RHS, <3,2,2,2>
+ 2130313216U, // <7,3,2,3>: Cost 2 ins <u,3,2,3>, lane 0
+ 2712062292U, // <7,3,2,4>: Cost 3 vext3 RHS, <3,2,4,3>
+ 2712062302U, // <7,3,2,5>: Cost 3 vext3 RHS, <3,2,5,4>
+ 2700560742U, // <7,3,2,6>: Cost 3 vext3 <2,6,3,7>, <3,2,6,3>
+ 2712062319U, // <7,3,2,7>: Cost 3 vext3 RHS, <3,2,7,3>
+ 2130313216U, // <7,3,2,u>: Cost 2 ins <u,3,2,3>, lane 0
+ 2712062334U, // <7,3,3,0>: Cost 3 vext3 RHS, <3,3,0,0>
+ 2636368158U, // <7,3,3,1>: Cost 3 vext2 <3,1,7,3>, <3,1,7,3>
+ 2637031791U, // <7,3,3,2>: Cost 3 vext2 <3,2,7,3>, <3,2,7,3>
+ 1638320540U, // <7,3,3,3>: Cost 2 vext3 RHS, <3,3,3,3>
+ 2712062374U, // <7,3,3,4>: Cost 3 vext3 RHS, <3,3,4,4>
+ 2704689586U, // <7,3,3,5>: Cost 3 vext3 <3,3,5,7>, <3,3,5,7>
+ 2990491658U, // <7,3,3,6>: Cost 3 vzipr <6,2,7,3>, <2,7,3,6>
+ 2972574864U, // <7,3,3,7>: Cost 3 vzipr <3,2,7,3>, <1,5,3,7>
+ 1638320540U, // <7,3,3,u>: Cost 2 vext3 RHS, <3,3,3,3>
+ 2712062416U, // <7,3,4,0>: Cost 3 vext3 RHS, <3,4,0,1>
+ 2712062426U, // <7,3,4,1>: Cost 3 vext3 RHS, <3,4,1,2>
+ 2987180790U, // <7,3,4,2>: Cost 3 vzipr <5,6,7,4>, <1,0,3,2>
+ 2712062447U, // <7,3,4,3>: Cost 3 vext3 RHS, <3,4,3,5>
+ 2712062455U, // <7,3,4,4>: Cost 3 vext3 RHS, <3,4,4,4>
+ 1638320642U, // <7,3,4,5>: Cost 2 vext3 RHS, <3,4,5,6>
+ 2648313164U, // <7,3,4,6>: Cost 3 vext2 <5,1,7,3>, <4,6,0,2>
+ 2985190544U, // <7,3,4,7>: Cost 3 vzipr <5,3,7,4>, <1,5,3,7>
+ 1638320669U, // <7,3,4,u>: Cost 2 vext3 RHS, <3,4,u,6>
+ 2712062498U, // <7,3,5,0>: Cost 3 vext3 RHS, <3,5,0,2>
+ 1574571728U, // <7,3,5,1>: Cost 2 vext2 <5,1,7,3>, <5,1,7,3>
+ 2648977185U, // <7,3,5,2>: Cost 3 vext2 <5,2,7,3>, <5,2,7,3>
+ 2705869378U, // <7,3,5,3>: Cost 3 vext3 <3,5,3,7>, <3,5,3,7>
+ 2237491947U, // <7,3,5,4>: Cost 3 vrev <3,7,4,5>
+ 2706016852U, // <7,3,5,5>: Cost 3 vext3 <3,5,5,7>, <3,5,5,7>
+ 2127904769U, // <7,3,5,6>: Cost 2 ins <7,u,5,6>, lane 1
+ 1779436854U, // <7,3,5,7>: Cost 2 vuzpr <5,7,1,3>, RHS
+ 1779436855U, // <7,3,5,u>: Cost 2 vuzpr <5,7,1,3>, RHS
+ 2706311800U, // <7,3,6,0>: Cost 3 vext3 <3,6,0,7>, <3,6,0,7>
+ 2853178744U, // <7,3,6,1>: Cost 3 vuzpr <5,7,1,3>, <4,6,5,1>
+ 1581208058U, // <7,3,6,2>: Cost 2 vext2 <6,2,7,3>, <6,2,7,3>
+ 2706533011U, // <7,3,6,3>: Cost 3 vext3 <3,6,3,7>, <3,6,3,7>
+ 2706606748U, // <7,3,6,4>: Cost 3 vext3 <3,6,4,7>, <3,6,4,7>
+ 3204366336U, // <7,3,6,5>: Cost 3 ins <u,3,6,5>, lane 0
+ 2712062637U, // <7,3,6,6>: Cost 3 vext3 RHS, <3,6,6,6>
+ 2130640896U, // <7,3,6,7>: Cost 2 ins <u,3,6,7>, lane 0
+ 1585189856U, // <7,3,6,u>: Cost 2 vext2 <6,u,7,3>, <6,u,7,3>
+ 2693925571U, // <7,3,7,0>: Cost 3 vext3 <1,5,3,7>, <3,7,0,1>
+ 2693925584U, // <7,3,7,1>: Cost 3 vext3 <1,5,3,7>, <3,7,1,5>
+ 2700561114U, // <7,3,7,2>: Cost 3 vext3 <2,6,3,7>, <3,7,2,6>
+ 1779437696U, // <7,3,7,3>: Cost 2 vuzpr <5,7,1,3>, <5,7,1,3>
+ 2693925611U, // <7,3,7,4>: Cost 3 vext3 <1,5,3,7>, <3,7,4,5>
+ 2237582070U, // <7,3,7,5>: Cost 3 vrev <3,7,5,7>
+ 2654950894U, // <7,3,7,6>: Cost 3 vext2 <6,2,7,3>, <7,6,2,7>
+ 2128060417U, // <7,3,7,7>: Cost 2 ins <7,u,7,7>, lane 1
+ 1779437696U, // <7,3,7,u>: Cost 2 vuzpr <5,7,1,3>, <5,7,1,3>
+ 2237221578U, // <7,3,u,0>: Cost 3 vrev <3,7,0,u>
+ 1638320926U, // <7,3,u,1>: Cost 2 vext3 RHS, <3,u,1,2>
+ 1593153452U, // <7,3,u,2>: Cost 2 vext2 <u,2,7,3>, <u,2,7,3>
+ 1779434141U, // <7,3,u,3>: Cost 2 vuzpr <5,7,1,3>, LHS
+ 2237516526U, // <7,3,u,4>: Cost 3 vrev <3,7,4,u>
+ 1638320966U, // <7,3,u,5>: Cost 2 vext3 RHS, <3,u,5,6>
+ 2127904769U, // <7,3,u,6>: Cost 2 ins <7,u,5,6>, lane 1
+ 1779437097U, // <7,3,u,7>: Cost 2 vuzpr <5,7,1,3>, RHS
+ 1638320989U, // <7,3,u,u>: Cost 2 vext3 RHS, <3,u,u,2>
+ 2714053478U, // <7,4,0,0>: Cost 3 vext3 RHS, <4,0,0,2>
+ 1577893990U, // <7,4,0,1>: Cost 2 vext2 <5,6,7,4>, LHS
+ 2651635876U, // <7,4,0,2>: Cost 3 vext2 <5,6,7,4>, <0,2,0,2>
+ 3201253377U, // <7,4,0,3>: Cost 3 ins <7,u,0,3>, lane 1
+ 2714053512U, // <7,4,0,4>: Cost 3 vext3 RHS, <4,0,4,0>
+ 1638468498U, // <7,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1>
+ 1638468508U, // <7,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2>
+ 2927578568U, // <7,4,0,7>: Cost 3 vzipl <7,0,1,2>, <4,7,5,0>
+ 1640311726U, // <7,4,0,u>: Cost 2 vext3 RHS, <4,0,u,2>
+ 2651636470U, // <7,4,1,0>: Cost 3 vext2 <5,6,7,4>, <1,0,3,2>
+ 2651636532U, // <7,4,1,1>: Cost 3 vext2 <5,6,7,4>, <1,1,1,1>
+ 2127577089U, // <7,4,1,2>: Cost 2 ins <7,u,1,2>, lane 1
+ 2639029248U, // <7,4,1,3>: Cost 3 vext2 <3,5,7,4>, <1,3,5,7>
+ 3127495888U, // <7,4,1,4>: Cost 3 vtrnr <6,7,0,1>, <4,4,4,4>
+ 2130919424U, // <7,4,1,5>: Cost 2 ins <u,4,1,5>, lane 0
+ 1988054326U, // <7,4,1,6>: Cost 2 vtrnl <7,0,1,2>, RHS
+ 3061796234U, // <7,4,1,7>: Cost 3 vtrnl <7,0,1,2>, <4,6,7,1>
+ 1988054344U, // <7,4,1,u>: Cost 2 vtrnl <7,0,1,2>, RHS
+ 3204694016U, // <7,4,2,0>: Cost 3 ins <u,4,2,0>, lane 0
+ 3199172610U, // <7,4,2,1>: Cost 3 ins <7,4,u,1>, lane 2
+ 2651637352U, // <7,4,2,2>: Cost 3 vext2 <5,6,7,4>, <2,2,2,2>
+ 2125488133U, // <7,4,2,3>: Cost 2 ins <7,4,u,u>, lane 5
+ 2853258138U, // <7,4,2,4>: Cost 3 vuzpr <5,7,2,4>, <1,2,3,4>
+ 2712063030U, // <7,4,2,5>: Cost 3 vext3 RHS, <4,2,5,3>
+ 2131001344U, // <7,4,2,6>: Cost 2 ins <u,4,2,6>, lane 0
+ 3201433601U, // <7,4,2,7>: Cost 3 ins <7,u,2,7>, lane 1
+ 2125488133U, // <7,4,2,u>: Cost 2 ins <7,4,u,u>, lane 5
+ 2651637910U, // <7,4,3,0>: Cost 3 vext2 <5,6,7,4>, <3,0,1,2>
+ 3201458177U, // <7,4,3,1>: Cost 3 ins <7,u,3,1>, lane 1
+ 3204784128U, // <7,4,3,2>: Cost 3 ins <u,4,3,2>, lane 0
+ 2651638172U, // <7,4,3,3>: Cost 3 vext2 <5,6,7,4>, <3,3,3,3>
+ 2983857360U, // <7,4,3,4>: Cost 3 vzipr <5,1,7,3>, <4,4,4,4>
+ 2639030883U, // <7,4,3,5>: Cost 3 vext2 <3,5,7,4>, <3,5,7,4>
+ 2125471746U, // <7,4,3,6>: Cost 2 ins <7,4,u,6>, lane 2
+ 3201507329U, // <7,4,3,7>: Cost 3 ins <7,u,3,7>, lane 1
+ 2125471746U, // <7,4,3,u>: Cost 2 ins <7,4,u,6>, lane 2
+ 2714053800U, // <7,4,4,0>: Cost 3 vext3 RHS, <4,4,0,0>
+ 3201531905U, // <7,4,4,1>: Cost 3 ins <7,u,4,1>, lane 1
+ 3201540097U, // <7,4,4,2>: Cost 3 ins <7,u,4,2>, lane 1
+ 2987185336U, // <7,4,4,3>: Cost 3 vzipr <5,6,7,4>, <7,2,4,3>
+ 1638321360U, // <7,4,4,4>: Cost 2 vext3 RHS, <4,4,4,4>
+ 1638468826U, // <7,4,4,5>: Cost 2 vext3 RHS, <4,4,5,5>
+ 1638468836U, // <7,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6>
+ 2987185664U, // <7,4,4,7>: Cost 3 vzipr <5,6,7,4>, <7,6,4,7>
+ 1640312054U, // <7,4,4,u>: Cost 2 vext3 RHS, <4,4,u,6>
+ 1517207654U, // <7,4,5,0>: Cost 2 vext1 <6,7,4,5>, LHS
+ 2125266947U, // <7,4,5,1>: Cost 2 ins <7,4,5,u>, lane 3
+ 2125266947U, // <7,4,5,2>: Cost 2 ins <7,4,5,u>, lane 3
+ 2125266947U, // <7,4,5,3>: Cost 2 ins <7,4,5,u>, lane 3
+ 1517210934U, // <7,4,5,4>: Cost 2 vext1 <6,7,4,5>, RHS
+ 2131214336U, // <7,4,5,5>: Cost 2 ins <u,4,5,5>, lane 0
+ 564579638U, // <7,4,5,6>: Cost 1 vext3 RHS, RHS
+ 2125266947U, // <7,4,5,7>: Cost 2 ins <7,4,5,u>, lane 3
+ 564579656U, // <7,4,5,u>: Cost 1 vext3 RHS, RHS
+ 1638468940U, // <7,4,6,0>: Cost 2 vext3 RHS, <4,6,0,2>
+ 2712063318U, // <7,4,6,1>: Cost 3 vext3 RHS, <4,6,1,3>
+ 2712210780U, // <7,4,6,2>: Cost 3 vext3 RHS, <4,6,2,0>
+ 2712210790U, // <7,4,6,3>: Cost 3 vext3 RHS, <4,6,3,1>
+ 1638468980U, // <7,4,6,4>: Cost 2 vext3 RHS, <4,6,4,6>
+ 2712063358U, // <7,4,6,5>: Cost 3 vext3 RHS, <4,6,5,7>
+ 2131296256U, // <7,4,6,6>: Cost 2 ins <u,4,6,6>, lane 0
+ 2125488133U, // <7,4,6,7>: Cost 2 ins <7,4,u,u>, lane 5
+ 1638469012U, // <7,4,6,u>: Cost 2 vext3 RHS, <4,6,u,2>
+ 2651640826U, // <7,4,7,0>: Cost 3 vext2 <5,6,7,4>, <7,0,1,2>
+ 2794279930U, // <7,4,7,1>: Cost 3 vuzpl <7,1,4,6>, <7,0,1,2>
+ 3201761281U, // <7,4,7,2>: Cost 3 ins <7,u,7,2>, lane 1
+ 3201769473U, // <7,4,7,3>: Cost 3 ins <7,u,7,3>, lane 1
+ 2847509964U, // <7,4,7,4>: Cost 3 vuzpr <4,7,5,4>, <4,7,5,4>
+ 1858964790U, // <7,4,7,5>: Cost 2 vzipl <7,7,7,7>, RHS
+ 1993182518U, // <7,4,7,6>: Cost 2 vtrnl <7,7,7,7>, RHS
+ 2128060417U, // <7,4,7,7>: Cost 2 ins <7,u,7,7>, lane 1
+ 1858965033U, // <7,4,7,u>: Cost 2 vzipl <7,7,7,7>, RHS
+ 1640312302U, // <7,4,u,0>: Cost 2 vext3 RHS, <4,u,0,2>
+ 1577899822U, // <7,4,u,1>: Cost 2 vext2 <5,6,7,4>, LHS
+ 2127577089U, // <7,4,u,2>: Cost 2 ins <7,u,1,2>, lane 1
+ 2125488133U, // <7,4,u,3>: Cost 2 ins <7,4,u,u>, lane 5
+ 1640312342U, // <7,4,u,4>: Cost 2 vext3 RHS, <4,u,4,6>
+ 1638469146U, // <7,4,u,5>: Cost 2 vext3 RHS, <4,u,5,1>
+ 564579881U, // <7,4,u,6>: Cost 1 vext3 RHS, RHS
+ 2125266947U, // <7,4,u,7>: Cost 2 ins <7,4,5,u>, lane 3
+ 564579899U, // <7,4,u,u>: Cost 1 vext3 RHS, RHS
+ 2579038310U, // <7,5,0,0>: Cost 3 vext1 <4,7,5,0>, LHS
+ 2131476480U, // <7,5,0,1>: Cost 2 ins <u,5,0,1>, lane 0
+ 1722597478U, // <7,5,0,2>: Cost 2 vuzpl <7,4,5,6>, LHS
+ 3201253377U, // <7,5,0,3>: Cost 3 ins <7,u,0,3>, lane 1
+ 2712063586U, // <7,5,0,4>: Cost 3 vext3 RHS, <5,0,4,1>
+ 2987150554U, // <7,5,0,5>: Cost 3 vzipr <5,6,7,0>, <4,4,5,5>
+ 2987149826U, // <7,5,0,6>: Cost 3 vzipr <5,6,7,0>, <3,4,5,6>
+ 2131525632U, // <7,5,0,7>: Cost 2 ins <u,5,0,7>, lane 0
+ 1722597532U, // <7,5,0,u>: Cost 2 vuzpl <7,4,5,6>, LHS
+ 2714054287U, // <7,5,1,0>: Cost 3 vext3 RHS, <5,1,0,1>
+ 2249183358U, // <7,5,1,1>: Cost 3 vrev <5,7,1,1>
+ 2127577089U, // <7,5,1,2>: Cost 2 ins <7,u,1,2>, lane 1
+ 1785643110U, // <7,5,1,3>: Cost 2 vuzpr <6,7,4,5>, LHS
+ 2714054327U, // <7,5,1,4>: Cost 3 vext3 RHS, <5,1,4,5>
+ 3127496708U, // <7,5,1,5>: Cost 3 vtrnr <6,7,0,1>, <5,5,5,5>
+ 2590995323U, // <7,5,1,6>: Cost 3 vext1 <6,7,5,1>, <6,7,5,1>
+ 1638469328U, // <7,5,1,7>: Cost 2 vext3 RHS, <5,1,7,3>
+ 1638469337U, // <7,5,1,u>: Cost 2 vext3 RHS, <5,1,u,3>
+ 2249117814U, // <7,5,2,0>: Cost 3 vrev <5,7,0,2>
+ 2714054379U, // <7,5,2,1>: Cost 3 vext3 RHS, <5,2,1,3>
+ 2249265288U, // <7,5,2,2>: Cost 3 vrev <5,7,2,2>
+ 2131640320U, // <7,5,2,3>: Cost 2 ins <u,5,2,3>, lane 0
+ 2859385754U, // <7,5,2,4>: Cost 3 vuzpr <6,7,4,5>, <1,2,3,4>
+ 2714054415U, // <7,5,2,5>: Cost 3 vext3 RHS, <5,2,5,3>
+ 2712063768U, // <7,5,2,6>: Cost 3 vext3 RHS, <5,2,6,3>
+ 2131673088U, // <7,5,2,7>: Cost 2 ins <u,5,2,7>, lane 0
+ 2131640320U, // <7,5,2,u>: Cost 2 ins <u,5,2,3>, lane 0
+ 3201449985U, // <7,5,3,0>: Cost 3 ins <7,u,3,0>, lane 1
+ 1175457920U, // <7,5,3,1>: Cost 2 vrev <5,7,1,3>
+ 2249273481U, // <7,5,3,2>: Cost 3 vrev <5,7,2,3>
+ 2249347218U, // <7,5,3,3>: Cost 3 vrev <5,7,3,3>
+ 3201482753U, // <7,5,3,4>: Cost 3 ins <7,u,3,4>, lane 1
+ 2983857370U, // <7,5,3,5>: Cost 3 vzipr <5,1,7,3>, <4,4,5,5>
+ 2983856642U, // <7,5,3,6>: Cost 3 vzipr <5,1,7,3>, <3,4,5,6>
+ 2047872310U, // <7,5,3,7>: Cost 2 vtrnr <5,7,1,3>, RHS
+ 2047872311U, // <7,5,3,u>: Cost 2 vtrnr <5,7,1,3>, RHS
+ 2579071078U, // <7,5,4,0>: Cost 3 vext1 <4,7,5,4>, LHS
+ 2987182994U, // <7,5,4,1>: Cost 3 vzipr <5,6,7,4>, <4,0,5,1>
+ 2249281674U, // <7,5,4,2>: Cost 3 vrev <5,7,2,4>
+ 3201548289U, // <7,5,4,3>: Cost 3 ins <7,u,4,3>, lane 1
+ 2579074508U, // <7,5,4,4>: Cost 3 vext1 <4,7,5,4>, <4,7,5,4>
+ 2131804160U, // <7,5,4,5>: Cost 2 ins <u,5,4,5>, lane 0
+ 1722600758U, // <7,5,4,6>: Cost 2 vuzpl <7,4,5,6>, RHS
+ 1638322118U, // <7,5,4,7>: Cost 2 vext3 RHS, <5,4,7,6>
+ 1638469583U, // <7,5,4,u>: Cost 2 vext3 RHS, <5,4,u,6>
+ 2714054611U, // <7,5,5,0>: Cost 3 vext3 RHS, <5,5,0,1>
+ 2714054620U, // <7,5,5,1>: Cost 3 vext3 RHS, <5,5,1,1>
+ 3201613825U, // <7,5,5,2>: Cost 3 ins <7,u,5,2>, lane 1
+ 2649657204U, // <7,5,5,3>: Cost 3 vext2 <5,3,7,5>, <5,3,7,5>
+ 2714054651U, // <7,5,5,4>: Cost 3 vext3 RHS, <5,5,4,5>
+ 1638322180U, // <7,5,5,5>: Cost 2 vext3 RHS, <5,5,5,5>
+ 2127904769U, // <7,5,5,6>: Cost 2 ins <7,u,5,6>, lane 1
+ 1638469656U, // <7,5,5,7>: Cost 2 vext3 RHS, <5,5,7,7>
+ 1638469665U, // <7,5,5,u>: Cost 2 vext3 RHS, <5,5,u,7>
+ 2131910656U, // <7,5,6,0>: Cost 2 ins <u,5,6,0>, lane 0
+ 2131918848U, // <7,5,6,1>: Cost 2 ins <u,5,6,1>, lane 0
+ 2131927040U, // <7,5,6,2>: Cost 2 ins <u,5,6,2>, lane 0
+ 2131935232U, // <7,5,6,3>: Cost 2 ins <u,5,6,3>, lane 0
+ 2131943424U, // <7,5,6,4>: Cost 2 ins <u,5,6,4>, lane 0
+ 2131951616U, // <7,5,6,5>: Cost 2 ins <u,5,6,5>, lane 0
+ 2131959808U, // <7,5,6,6>: Cost 2 ins <u,5,6,6>, lane 0
+ 1058226176U, // <7,5,6,7>: Cost 1 ins RHS, lane 0
+ 1058226176U, // <7,5,6,u>: Cost 1 ins RHS, lane 0
+ 1511325798U, // <7,5,7,0>: Cost 2 vext1 <5,7,5,7>, LHS
+ 1638469760U, // <7,5,7,1>: Cost 2 vext3 RHS, <5,7,1,3>
+ 2712211590U, // <7,5,7,2>: Cost 3 vext3 RHS, <5,7,2,0>
+ 2573126390U, // <7,5,7,3>: Cost 3 vext1 <3,7,5,7>, <3,7,5,7>
+ 1511329078U, // <7,5,7,4>: Cost 2 vext1 <5,7,5,7>, RHS
+ 1638469800U, // <7,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7>
+ 2712211626U, // <7,5,7,6>: Cost 3 vext3 RHS, <5,7,6,0>
+ 2048199990U, // <7,5,7,7>: Cost 2 vtrnr <5,7,5,7>, RHS
+ 1638469823U, // <7,5,7,u>: Cost 2 vext3 RHS, <5,7,u,3>
+ 1511333990U, // <7,5,u,0>: Cost 2 vext1 <5,7,5,u>, LHS
+ 1638469841U, // <7,5,u,1>: Cost 2 vext3 RHS, <5,u,1,3>
+ 1722603310U, // <7,5,u,2>: Cost 2 vuzpl <7,4,5,6>, LHS
+ 1785643677U, // <7,5,u,3>: Cost 2 vuzpr <6,7,4,5>, LHS
+ 1511337270U, // <7,5,u,4>: Cost 2 vext1 <5,7,5,u>, RHS
+ 1638469881U, // <7,5,u,5>: Cost 2 vext3 RHS, <5,u,5,7>
+ 1722603674U, // <7,5,u,6>: Cost 2 vuzpl <7,4,5,6>, RHS
+ 1058226176U, // <7,5,u,7>: Cost 1 ins RHS, lane 0
+ 1058226176U, // <7,5,u,u>: Cost 1 ins RHS, lane 0
+ 2650324992U, // <7,6,0,0>: Cost 3 vext2 <5,4,7,6>, <0,0,0,0>
+ 1576583270U, // <7,6,0,1>: Cost 2 vext2 <5,4,7,6>, LHS
+ 2132148224U, // <7,6,0,2>: Cost 2 ins <u,6,0,2>, lane 0
+ 2255295336U, // <7,6,0,3>: Cost 3 vrev <6,7,3,0>
+ 2712064316U, // <7,6,0,4>: Cost 3 vext3 RHS, <6,0,4,2>
+ 2987151292U, // <7,6,0,5>: Cost 3 vzipr <5,6,7,0>, <5,4,6,5>
+ 2987150564U, // <7,6,0,6>: Cost 3 vzipr <5,6,7,0>, <4,4,6,6>
+ 1913408822U, // <7,6,0,7>: Cost 2 vzipr <5,6,7,0>, RHS
+ 1576583837U, // <7,6,0,u>: Cost 2 vext2 <5,4,7,6>, LHS
+ 1181340494U, // <7,6,1,0>: Cost 2 vrev <6,7,0,1>
+ 2650325812U, // <7,6,1,1>: Cost 3 vext2 <5,4,7,6>, <1,1,1,1>
+ 2127577089U, // <7,6,1,2>: Cost 2 ins <7,u,1,2>, lane 1
+ 2841329766U, // <7,6,1,3>: Cost 3 vuzpr <3,7,2,6>, LHS
+ 2579123666U, // <7,6,1,4>: Cost 3 vext1 <4,7,6,1>, <4,7,6,1>
+ 2650326160U, // <7,6,1,5>: Cost 3 vext2 <5,4,7,6>, <1,5,3,7>
+ 2714055072U, // <7,6,1,6>: Cost 3 vext3 RHS, <6,1,6,3>
+ 2974551350U, // <7,6,1,7>: Cost 3 vzipr <3,5,7,1>, RHS
+ 1181930390U, // <7,6,1,u>: Cost 2 vrev <6,7,u,1>
+ 2712211897U, // <7,6,2,0>: Cost 3 vext3 RHS, <6,2,0,1>
+ 2714055108U, // <7,6,2,1>: Cost 3 vext3 RHS, <6,2,1,3>
+ 2714055117U, // <7,6,2,2>: Cost 3 vext3 RHS, <6,2,2,3>
+ 2132303872U, // <7,6,2,3>: Cost 2 ins <u,6,2,3>, lane 0
+ 2714055137U, // <7,6,2,4>: Cost 3 vext3 RHS, <6,2,4,5>
+ 2714055148U, // <7,6,2,5>: Cost 3 vext3 RHS, <6,2,5,7>
+ 2714055152U, // <7,6,2,6>: Cost 3 vext3 RHS, <6,2,6,2>
+ 1638470138U, // <7,6,2,7>: Cost 2 vext3 RHS, <6,2,7,3>
+ 1638470147U, // <7,6,2,u>: Cost 2 vext3 RHS, <6,2,u,3>
+ 2650327190U, // <7,6,3,0>: Cost 3 vext2 <5,4,7,6>, <3,0,1,2>
+ 3121614200U, // <7,6,3,1>: Cost 3 vtrnr <5,7,1,3>, <4,6,5,1>
+ 1181504354U, // <7,6,3,2>: Cost 2 vrev <6,7,2,3>
+ 2650327452U, // <7,6,3,3>: Cost 3 vext2 <5,4,7,6>, <3,3,3,3>
+ 2712064562U, // <7,6,3,4>: Cost 3 vext3 RHS, <6,3,4,5>
+ 3206135808U, // <7,6,3,5>: Cost 3 ins <u,6,3,5>, lane 0
+ 2983857380U, // <7,6,3,6>: Cost 3 vzipr <5,1,7,3>, <4,4,6,6>
+ 1910115638U, // <7,6,3,7>: Cost 2 vzipr <5,1,7,3>, RHS
+ 1910115639U, // <7,6,3,u>: Cost 2 vzipr <5,1,7,3>, RHS
+ 2650327954U, // <7,6,4,0>: Cost 3 vext2 <5,4,7,6>, <4,0,5,1>
+ 2735952486U, // <7,6,4,1>: Cost 3 vext3 RHS, <6,4,1,3>
+ 2714055276U, // <7,6,4,2>: Cost 3 vext3 RHS, <6,4,2,0>
+ 2255328108U, // <7,6,4,3>: Cost 3 vrev <6,7,3,4>
+ 2650328272U, // <7,6,4,4>: Cost 3 vext2 <5,4,7,6>, <4,4,4,4>
+ 1576586550U, // <7,6,4,5>: Cost 2 vext2 <5,4,7,6>, RHS
+ 2132475904U, // <7,6,4,6>: Cost 2 ins <u,6,4,6>, lane 0
+ 1913441590U, // <7,6,4,7>: Cost 2 vzipr <5,6,7,4>, RHS
+ 1576586793U, // <7,6,4,u>: Cost 2 vext2 <5,4,7,6>, RHS
+ 2579152998U, // <7,6,5,0>: Cost 3 vext1 <4,7,6,5>, LHS
+ 2650328784U, // <7,6,5,1>: Cost 3 vext2 <5,4,7,6>, <5,1,7,3>
+ 2714055364U, // <7,6,5,2>: Cost 3 vext3 RHS, <6,5,2,7>
+ 3201622017U, // <7,6,5,3>: Cost 3 ins <7,u,5,3>, lane 1
+ 1576587206U, // <7,6,5,4>: Cost 2 vext2 <5,4,7,6>, <5,4,7,6>
+ 2650329092U, // <7,6,5,5>: Cost 3 vext2 <5,4,7,6>, <5,5,5,5>
+ 2127904769U, // <7,6,5,6>: Cost 2 ins <7,u,5,6>, lane 1
+ 2971929910U, // <7,6,5,7>: Cost 3 vzipr <3,1,7,5>, RHS
+ 1181963162U, // <7,6,5,u>: Cost 2 vrev <6,7,u,5>
+ 2714055421U, // <7,6,6,0>: Cost 3 vext3 RHS, <6,6,0,1>
+ 2714055432U, // <7,6,6,1>: Cost 3 vext3 RHS, <6,6,1,3>
+ 2712212245U, // <7,6,6,2>: Cost 3 vext3 RHS, <6,6,2,7>
+ 3201695745U, // <7,6,6,3>: Cost 3 ins <7,u,6,3>, lane 1
+ 2714055461U, // <7,6,6,4>: Cost 3 vext3 RHS, <6,6,4,5>
+ 2714055472U, // <7,6,6,5>: Cost 3 vext3 RHS, <6,6,5,7>
+ 1638323000U, // <7,6,6,6>: Cost 2 vext3 RHS, <6,6,6,6>
+ 1638470466U, // <7,6,6,7>: Cost 2 vext3 RHS, <6,6,7,7>
+ 1638470475U, // <7,6,6,u>: Cost 2 vext3 RHS, <6,6,u,7>
+ 1638323022U, // <7,6,7,0>: Cost 2 vext3 RHS, <6,7,0,1>
+ 2712064854U, // <7,6,7,1>: Cost 3 vext3 RHS, <6,7,1,0>
+ 1638323042U, // <7,6,7,2>: Cost 2 vext3 RHS, <6,7,2,3>
+ 2712064872U, // <7,6,7,3>: Cost 3 vext3 RHS, <6,7,3,0>
+ 1638323062U, // <7,6,7,4>: Cost 2 vext3 RHS, <6,7,4,5>
+ 2712064894U, // <7,6,7,5>: Cost 3 vext3 RHS, <6,7,5,4>
+ 1638323082U, // <7,6,7,6>: Cost 2 vext3 RHS, <6,7,6,7>
+ 1912802614U, // <7,6,7,7>: Cost 2 vzipr <5,5,7,7>, RHS
+ 1638323094U, // <7,6,7,u>: Cost 2 vext3 RHS, <6,7,u,1>
+ 1638470559U, // <7,6,u,0>: Cost 2 vext3 RHS, <6,u,0,1>
+ 1576589102U, // <7,6,u,1>: Cost 2 vext2 <5,4,7,6>, LHS
+ 2132148224U, // <7,6,u,2>: Cost 2 ins <u,6,0,2>, lane 0
+ 2132303872U, // <7,6,u,3>: Cost 2 ins <u,6,2,3>, lane 0
+ 1638470599U, // <7,6,u,4>: Cost 2 vext3 RHS, <6,u,4,5>
+ 1576589466U, // <7,6,u,5>: Cost 2 vext2 <5,4,7,6>, RHS
+ 2132475904U, // <7,6,u,6>: Cost 2 ins <u,6,4,6>, lane 0
+ 1638470624U, // <7,6,u,7>: Cost 2 vext3 RHS, <6,u,7,3>
+ 1638470631U, // <7,6,u,u>: Cost 2 vext3 RHS, <6,u,u,1>
+ 1913409634U, // <7,7,0,0>: Cost 2 vzipr <5,6,7,0>, <5,6,7,0>
+ 1638323194U, // <7,7,0,1>: Cost 2 vext3 RHS, <7,0,1,2>
+ 1724743782U, // <7,7,0,2>: Cost 2 vuzpl <7,7,7,7>, LHS
+ 2987151056U, // <7,7,0,3>: Cost 3 vzipr <5,6,7,0>, <5,1,7,3>
+ 2712065044U, // <7,7,0,4>: Cost 3 vext3 RHS, <7,0,4,1>
+ 2585161907U, // <7,7,0,5>: Cost 3 vext1 <5,7,7,0>, <5,7,7,0>
+ 2987151302U, // <7,7,0,6>: Cost 3 vzipr <5,6,7,0>, <5,4,7,6>
+ 2127470594U, // <7,7,0,7>: Cost 2 ins <7,7,u,7>, lane 2
+ 1638323257U, // <7,7,0,u>: Cost 2 vext3 RHS, <7,0,u,2>
+ 2712065091U, // <7,7,1,0>: Cost 3 vext3 RHS, <7,1,0,3>
+ 2053755726U, // <7,7,1,1>: Cost 2 vtrnr <6,7,0,1>, <6,7,0,1>
+ 2127577089U, // <7,7,1,2>: Cost 2 ins <7,u,1,2>, lane 1
+ 1779761254U, // <7,7,1,3>: Cost 2 vuzpr <5,7,5,7>, LHS
+ 2585169206U, // <7,7,1,4>: Cost 3 vext1 <5,7,7,1>, RHS
+ 2693928048U, // <7,7,1,5>: Cost 3 vext3 <1,5,3,7>, <7,1,5,3>
+ 2585170766U, // <7,7,1,6>: Cost 3 vext1 <5,7,7,1>, <6,7,0,1>
+ 2127470594U, // <7,7,1,7>: Cost 2 ins <7,7,u,7>, lane 2
+ 1779761259U, // <7,7,1,u>: Cost 2 vuzpr <5,7,5,7>, LHS
+ 2853503894U, // <7,7,2,0>: Cost 3 vuzpr <5,7,5,7>, <1,2,3,0>
+ 3206692864U, // <7,7,2,1>: Cost 3 ins <u,7,2,1>, lane 0
+ 1988801621U, // <7,7,2,2>: Cost 2 vtrnl <7,1,2,3>, <7,1,2,3>
+ 2132967424U, // <7,7,2,3>: Cost 2 ins <u,7,2,3>, lane 0
+ 2853503898U, // <7,7,2,4>: Cost 3 vuzpr <5,7,5,7>, <1,2,3,4>
+ 3206725632U, // <7,7,2,5>: Cost 3 ins <u,7,2,5>, lane 0
+ 2700563658U, // <7,7,2,6>: Cost 3 vext3 <2,6,3,7>, <7,2,6,3>
+ 2127470594U, // <7,7,2,7>: Cost 2 ins <7,7,u,7>, lane 2
+ 1988801621U, // <7,7,2,u>: Cost 2 vtrnl <7,1,2,3>, <7,1,2,3>
+ 2712065251U, // <7,7,3,0>: Cost 3 vext3 RHS, <7,3,0,1>
+ 3121615694U, // <7,7,3,1>: Cost 3 vtrnr <5,7,1,3>, <6,7,0,1>
+ 3201171458U, // <7,7,3,2>: Cost 3 ins <7,7,u,2>, lane 2
+ 1910116048U, // <7,7,3,3>: Cost 2 vzipr <5,1,7,3>, <5,1,7,3>
+ 2712065291U, // <7,7,3,4>: Cost 3 vext3 RHS, <7,3,4,5>
+ 2639055462U, // <7,7,3,5>: Cost 3 vext2 <3,5,7,7>, <3,5,7,7>
+ 2639719095U, // <7,7,3,6>: Cost 3 vext2 <3,6,7,7>, <3,6,7,7>
+ 2127470594U, // <7,7,3,7>: Cost 2 ins <7,7,u,7>, lane 2
+ 1910116048U, // <7,7,3,u>: Cost 2 vzipr <5,1,7,3>, <5,1,7,3>
+ 2712212792U, // <7,7,4,0>: Cost 3 vext3 RHS, <7,4,0,5>
+ 3062715386U, // <7,7,4,1>: Cost 3 vtrnl <7,1,4,6>, <7,0,1,2>
+ 3201540097U, // <7,7,4,2>: Cost 3 ins <7,u,4,2>, lane 1
+ 2987183824U, // <7,7,4,3>: Cost 3 vzipr <5,6,7,4>, <5,1,7,3>
+ 1913442406U, // <7,7,4,4>: Cost 2 vzipr <5,6,7,4>, <5,6,7,4>
+ 1638323558U, // <7,7,4,5>: Cost 2 vext3 RHS, <7,4,5,6>
+ 1724747062U, // <7,7,4,6>: Cost 2 vuzpl <7,7,7,7>, RHS
+ 2127470594U, // <7,7,4,7>: Cost 2 ins <7,7,u,7>, lane 2
+ 1638323585U, // <7,7,4,u>: Cost 2 vext3 RHS, <7,4,u,6>
+ 2853508547U, // <7,7,5,0>: Cost 3 vuzpr <5,7,5,7>, <7,5,7,0>
+ 2712212884U, // <7,7,5,1>: Cost 3 vext3 RHS, <7,5,1,7>
+ 3201613825U, // <7,7,5,2>: Cost 3 ins <7,u,5,2>, lane 1
+ 2649673590U, // <7,7,5,3>: Cost 3 vext2 <5,3,7,7>, <5,3,7,7>
+ 2712065455U, // <7,7,5,4>: Cost 3 vext3 RHS, <7,5,4,7>
+ 1577259032U, // <7,7,5,5>: Cost 2 vext2 <5,5,7,7>, <5,5,7,7>
+ 2127904769U, // <7,7,5,6>: Cost 2 ins <7,u,5,6>, lane 1
+ 1779764534U, // <7,7,5,7>: Cost 2 vuzpr <5,7,5,7>, RHS
+ 1779764535U, // <7,7,5,u>: Cost 2 vuzpr <5,7,5,7>, RHS
+ 2985873506U, // <7,7,6,0>: Cost 3 vzipr <5,4,7,6>, <5,6,7,0>
+ 2735953374U, // <7,7,6,1>: Cost 3 vext3 RHS, <7,6,1,0>
+ 2712212974U, // <7,7,6,2>: Cost 3 vext3 RHS, <7,6,2,7>
+ 2985873104U, // <7,7,6,3>: Cost 3 vzipr <5,4,7,6>, <5,1,7,3>
+ 2985873510U, // <7,7,6,4>: Cost 3 vzipr <5,4,7,6>, <5,6,7,4>
+ 2985873511U, // <7,7,6,5>: Cost 3 vzipr <5,4,7,6>, <5,6,7,5>
+ 1912131526U, // <7,7,6,6>: Cost 2 vzipr <5,4,7,6>, <5,4,7,6>
+ 2133295104U, // <7,7,6,7>: Cost 2 ins <u,7,6,7>, lane 0
+ 1585222628U, // <7,7,6,u>: Cost 2 vext2 <6,u,7,7>, <6,u,7,7>
+ 1523417190U, // <7,7,7,0>: Cost 2 vext1 <7,7,7,7>, LHS
+ 2127405059U, // <7,7,7,1>: Cost 2 ins <7,7,7,u>, lane 3
+ 2127405059U, // <7,7,7,2>: Cost 2 ins <7,7,7,u>, lane 3
+ 2127405059U, // <7,7,7,3>: Cost 2 ins <7,7,7,u>, lane 3
+ 1523420470U, // <7,7,7,4>: Cost 2 vext1 <7,7,7,7>, RHS
+ 2127405059U, // <7,7,7,5>: Cost 2 ins <7,7,7,u>, lane 3
+ 2127405059U, // <7,7,7,6>: Cost 2 ins <7,7,7,u>, lane 3
+ 363253046U, // <7,7,7,7>: Cost 1 vdup3 RHS
+ 363253046U, // <7,7,7,u>: Cost 1 vdup3 RHS
+ 1913409634U, // <7,7,u,0>: Cost 2 vzipr <5,6,7,0>, <5,6,7,0>
+ 1638471298U, // <7,7,u,1>: Cost 2 vext3 RHS, <7,u,1,2>
+ 1724749614U, // <7,7,u,2>: Cost 2 vuzpl <7,7,7,7>, LHS
+ 1779761821U, // <7,7,u,3>: Cost 2 vuzpr <5,7,5,7>, LHS
+ 1913442406U, // <7,7,u,4>: Cost 2 vzipr <5,6,7,4>, <5,6,7,4>
+ 1638471338U, // <7,7,u,5>: Cost 2 vext3 RHS, <7,u,5,6>
+ 1724749978U, // <7,7,u,6>: Cost 2 vuzpl <7,7,7,7>, RHS
+ 363253046U, // <7,7,u,7>: Cost 1 vdup3 RHS
+ 363253046U, // <7,7,u,u>: Cost 1 vdup3 RHS
+ 1638318080U, // <7,u,0,0>: Cost 2 vext3 RHS, <0,0,0,0>
+ 1638323923U, // <7,u,0,1>: Cost 2 vext3 RHS, <u,0,1,2>
+ 1720131686U, // <7,u,0,2>: Cost 2 vuzpl <7,0,u,2>, LHS
+ 1638323941U, // <7,u,0,3>: Cost 2 vext3 RHS, <u,0,3,2>
+ 2712065773U, // <7,u,0,4>: Cost 3 vext3 RHS, <u,0,4,1>
+ 1853839514U, // <7,u,0,5>: Cost 2 vzipl <7,0,1,2>, RHS
+ 1662359296U, // <7,u,0,6>: Cost 2 vext3 RHS, <u,0,6,2>
+ 1913408840U, // <7,u,0,7>: Cost 2 vzipr <5,6,7,0>, RHS
+ 1638323986U, // <7,u,0,u>: Cost 2 vext3 RHS, <u,0,u,2>
+ 1517469798U, // <7,u,1,0>: Cost 2 vext1 <6,7,u,1>, LHS
+ 2128232448U, // <7,u,1,1>: Cost 2 ins <u,0,1,1>, lane 0
+ 564582190U, // <7,u,1,2>: Cost 1 vext3 RHS, LHS
+ 1638324023U, // <7,u,1,3>: Cost 2 vext3 RHS, <u,1,3,3>
+ 1517473078U, // <7,u,1,4>: Cost 2 vext1 <6,7,u,1>, RHS
+ 2122317827U, // <7,u,1,5>: Cost 2 ins <7,0,1,u>, lane 3
+ 1517474710U, // <7,u,1,6>: Cost 2 vext1 <6,7,u,1>, <6,7,u,1>
+ 1640462171U, // <7,u,1,7>: Cost 2 vext3 RHS, <u,1,7,3>
+ 564582244U, // <7,u,1,u>: Cost 1 vext3 RHS, LHS
+ 1662211948U, // <7,u,2,0>: Cost 2 vext3 RHS, <u,2,0,2>
+ 2128969728U, // <7,u,2,1>: Cost 2 ins <u,1,2,1>, lane 0
+ 2128314368U, // <7,u,2,2>: Cost 2 ins <u,0,2,2>, lane 0
+ 1055244288U, // <7,u,2,3>: Cost 1 ins LHS, lane 0
+ 1662211988U, // <7,u,2,4>: Cost 2 vext3 RHS, <u,2,4,6>
+ 2129002496U, // <7,u,2,5>: Cost 2 ins <u,1,2,5>, lane 0
+ 2131001344U, // <7,u,2,6>: Cost 2 ins <u,4,2,6>, lane 0
+ 1640314796U, // <7,u,2,7>: Cost 2 vext3 RHS, <u,2,7,3>
+ 1055244288U, // <7,u,2,u>: Cost 1 ins LHS, lane 0
+ 1638324156U, // <7,u,3,0>: Cost 2 vext3 RHS, <u,3,0,1>
+ 1638324167U, // <7,u,3,1>: Cost 2 vext3 RHS, <u,3,1,3>
+ 2128388096U, // <7,u,3,2>: Cost 2 ins <u,0,3,2>, lane 0
+ 1910112412U, // <7,u,3,3>: Cost 2 vzipr <5,1,7,3>, LHS
+ 1638324196U, // <7,u,3,4>: Cost 2 vext3 RHS, <u,3,4,5>
+ 1638324207U, // <7,u,3,5>: Cost 2 vext3 RHS, <u,3,5,7>
+ 2125471746U, // <7,u,3,6>: Cost 2 ins <7,4,u,6>, lane 2
+ 1910115656U, // <7,u,3,7>: Cost 2 vzipr <5,1,7,3>, RHS
+ 1638324228U, // <7,u,3,u>: Cost 2 vext3 RHS, <u,3,u,1>
+ 2712066061U, // <7,u,4,0>: Cost 3 vext3 RHS, <u,4,0,1>
+ 1856821038U, // <7,u,4,1>: Cost 2 vzipl <7,4,5,6>, LHS
+ 1662212132U, // <7,u,4,2>: Cost 2 vext3 RHS, <u,4,2,6>
+ 1913438364U, // <7,u,4,3>: Cost 2 vzipr <5,6,7,4>, LHS
+ 1638321360U, // <7,u,4,4>: Cost 2 vext3 RHS, <4,4,4,4>
+ 1638324287U, // <7,u,4,5>: Cost 2 vext3 RHS, <u,4,5,6>
+ 1720134966U, // <7,u,4,6>: Cost 2 vuzpl <7,0,u,2>, RHS
+ 1640314961U, // <7,u,4,7>: Cost 2 vext3 RHS, <u,4,7,6>
+ 1638324314U, // <7,u,4,u>: Cost 2 vext3 RHS, <u,4,u,6>
+ 1517502566U, // <7,u,5,0>: Cost 2 vext1 <6,7,u,5>, LHS
+ 1574612693U, // <7,u,5,1>: Cost 2 vext2 <5,1,7,u>, <5,1,7,u>
+ 1991038766U, // <7,u,5,2>: Cost 2 vtrnl <7,4,5,6>, LHS
+ 1638324351U, // <7,u,5,3>: Cost 2 vext3 RHS, <u,5,3,7>
+ 1576603592U, // <7,u,5,4>: Cost 2 vext2 <5,4,7,u>, <5,4,7,u>
+ 1577267225U, // <7,u,5,5>: Cost 2 vext2 <5,5,7,u>, <5,5,7,u>
+ 564582554U, // <7,u,5,6>: Cost 1 vext3 RHS, RHS
+ 1640462499U, // <7,u,5,7>: Cost 2 vext3 RHS, <u,5,7,7>
+ 564582572U, // <7,u,5,u>: Cost 1 vext3 RHS, RHS
+ 1662359728U, // <7,u,6,0>: Cost 2 vext3 RHS, <u,6,0,2>
+ 2131918848U, // <7,u,6,1>: Cost 2 ins <u,5,6,1>, lane 0
+ 1581249023U, // <7,u,6,2>: Cost 2 vext2 <6,2,7,u>, <6,2,7,u>
+ 1638324432U, // <7,u,6,3>: Cost 2 vext3 RHS, <u,6,3,7>
+ 1662359768U, // <7,u,6,4>: Cost 2 vext3 RHS, <u,6,4,6>
+ 2131951616U, // <7,u,6,5>: Cost 2 ins <u,5,6,5>, lane 0
+ 1583903555U, // <7,u,6,6>: Cost 2 vext2 <6,6,7,u>, <6,6,7,u>
+ 1058226176U, // <7,u,6,7>: Cost 1 ins RHS, lane 0
+ 1058226176U, // <7,u,6,u>: Cost 1 ins RHS, lane 0
+ 1638471936U, // <7,u,7,0>: Cost 2 vext3 RHS, <u,7,0,1>
+ 1640462603U, // <7,u,7,1>: Cost 2 vext3 RHS, <u,7,1,3>
+ 1993185070U, // <7,u,7,2>: Cost 2 vtrnl <7,7,7,7>, LHS
+ 1912799388U, // <7,u,7,3>: Cost 2 vzipr <5,5,7,7>, LHS
+ 1638471976U, // <7,u,7,4>: Cost 2 vext3 RHS, <u,7,4,5>
+ 1640462643U, // <7,u,7,5>: Cost 2 vext3 RHS, <u,7,5,7>
+ 1993185434U, // <7,u,7,6>: Cost 2 vtrnl <7,7,7,7>, RHS
+ 363253046U, // <7,u,7,7>: Cost 1 vdup3 RHS
+ 363253046U, // <7,u,7,u>: Cost 1 vdup3 RHS
+ 1638324561U, // <7,u,u,0>: Cost 2 vext3 RHS, <u,u,0,1>
+ 1638324571U, // <7,u,u,1>: Cost 2 vext3 RHS, <u,u,1,2>
+ 564582757U, // <7,u,u,2>: Cost 1 vext3 RHS, LHS
+ 1055244288U, // <7,u,u,3>: Cost 1 ins LHS, lane 0
+ 1638324601U, // <7,u,u,4>: Cost 2 vext3 RHS, <u,u,4,5>
+ 1638324611U, // <7,u,u,5>: Cost 2 vext3 RHS, <u,u,5,6>
+ 564582797U, // <7,u,u,6>: Cost 1 vext3 RHS, RHS
+ 1058226176U, // <7,u,u,7>: Cost 1 ins RHS, lane 0
+ 564582811U, // <7,u,u,u>: Cost 1 vext3 RHS, LHS
+ 135053414U, // <u,0,0,0>: Cost 1 vdup0 LHS
+ 1611489290U, // <u,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1>
+ 1611489300U, // <u,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2>
+ 2085707777U, // <u,0,0,3>: Cost 2 ins <0,u,0,3>, lane 1
+ 1481706806U, // <u,0,0,4>: Cost 2 vext1 <0,u,0,0>, RHS
+ 2080440323U, // <u,0,0,5>: Cost 2 ins <0,0,0,u>, lane 3
+ 2080440323U, // <u,0,0,6>: Cost 2 ins <0,0,0,u>, lane 3
+ 2080440323U, // <u,0,0,7>: Cost 2 ins <0,0,0,u>, lane 3
+ 135053414U, // <u,0,0,u>: Cost 1 vdup0 LHS
+ 1493655654U, // <u,0,1,0>: Cost 2 vext1 <2,u,0,1>, LHS
+ 786808934U, // <u,0,1,1>: Cost 1 vzipl LHS, LHS
+ 537747563U, // <u,0,1,2>: Cost 1 vext3 LHS, LHS
+ 1756332134U, // <u,0,1,3>: Cost 2 vuzpr <1,u,3,0>, LHS
+ 1493658934U, // <u,0,1,4>: Cost 2 vext1 <2,u,0,1>, RHS
+ 2085797889U, // <u,0,1,5>: Cost 2 ins <0,u,1,5>, lane 1
+ 1517548447U, // <u,0,1,6>: Cost 2 vext1 <6,u,0,1>, <6,u,0,1>
+ 2080514051U, // <u,0,1,7>: Cost 2 ins <0,0,1,u>, lane 3
+ 537747612U, // <u,0,1,u>: Cost 1 vext3 LHS, LHS
+ 1611489444U, // <u,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
+ 1994768394U, // <u,0,2,1>: Cost 2 vtrnl LHS, <0,0,1,1>
+ 921026662U, // <u,0,2,2>: Cost 1 vtrnl LHS, LHS
+ 1012113409U, // <u,0,2,3>: Cost 1 ins LHS, lane 1
+ 1611489484U, // <u,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
+ 2080587779U, // <u,0,2,5>: Cost 2 ins <0,0,2,u>, lane 3
+ 2085879809U, // <u,0,2,6>: Cost 2 ins <0,u,2,6>, lane 1
+ 2080587779U, // <u,0,2,7>: Cost 2 ins <0,0,2,u>, lane 3
+ 921026716U, // <u,0,2,u>: Cost 1 vtrnl LHS, LHS
+ 1880326144U, // <u,0,3,0>: Cost 2 vzipr LHS, <0,0,0,0>
+ 1880327846U, // <u,0,3,1>: Cost 2 vzipr LHS, <2,3,0,1>
+ 72589981U, // <u,0,3,2>: Cost 1 vrev LHS
+ 2091900929U, // <u,0,3,3>: Cost 2 ins <1,u,3,3>, lane 1
+ 2091909121U, // <u,0,3,4>: Cost 2 ins <1,u,3,4>, lane 1
+ 2086633475U, // <u,0,3,5>: Cost 2 ins <1,0,3,u>, lane 3
+ 2086633475U, // <u,0,3,6>: Cost 2 ins <1,0,3,u>, lane 3
+ 2091933697U, // <u,0,3,7>: Cost 2 ins <1,u,3,7>, lane 1
+ 73032403U, // <u,0,3,u>: Cost 1 vrev LHS
+ 1705610572U, // <u,0,4,0>: Cost 2 vuzpl <4,6,0,2>, <4,6,0,2>
+ 1611489618U, // <u,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5>
+ 1611489628U, // <u,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6>
+ 2086002689U, // <u,0,4,3>: Cost 2 ins <0,u,4,3>, lane 1
+ 1947828428U, // <u,0,4,4>: Cost 2 vtrnl <0,2,4,6>, <0,2,4,6>
+ 1551396150U, // <u,0,4,5>: Cost 2 vext2 <1,2,u,0>, RHS
+ 1726844214U, // <u,0,4,6>: Cost 2 vuzpl <u,2,0,2>, RHS
+ 2109923329U, // <u,0,4,7>: Cost 2 ins <4,u,4,7>, lane 1
+ 1611932050U, // <u,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6>
+ 1863532544U, // <u,0,5,0>: Cost 2 vzipl RHS, <0,0,0,0>
+ 789790822U, // <u,0,5,1>: Cost 1 vzipl RHS, LHS
+ 1996349542U, // <u,0,5,2>: Cost 2 vtrnl <u,3,5,7>, LHS
+ 2104696835U, // <u,0,5,3>: Cost 2 ins <4,0,5,u>, lane 3
+ 1863532882U, // <u,0,5,4>: Cost 2 vzipl RHS, <0,4,1,5>
+ 2109980673U, // <u,0,5,5>: Cost 2 ins <4,u,5,5>, lane 1
+ 1577939051U, // <u,0,5,6>: Cost 2 vext2 <5,6,u,0>, <5,6,u,0>
+ 1756335414U, // <u,0,5,7>: Cost 2 vuzpr <1,u,3,0>, RHS
+ 789791389U, // <u,0,5,u>: Cost 1 vzipl RHS, LHS
+ 1997750272U, // <u,0,6,0>: Cost 2 vtrnl RHS, <0,0,0,0>
+ 1997750282U, // <u,0,6,1>: Cost 2 vtrnl RHS, <0,0,1,1>
+ 924008550U, // <u,0,6,2>: Cost 1 vtrnl RHS, LHS
+ 2104770563U, // <u,0,6,3>: Cost 2 ins <4,0,6,u>, lane 3
+ 1146503858U, // <u,0,6,4>: Cost 2 vrev <0,u,4,6>
+ 2104770563U, // <u,0,6,5>: Cost 2 ins <4,0,6,u>, lane 3
+ 2110062593U, // <u,0,6,6>: Cost 2 ins <4,u,6,6>, lane 1
+ 1036328961U, // <u,0,6,7>: Cost 1 ins RHS, lane 1
+ 924008604U, // <u,0,6,u>: Cost 1 vtrnl RHS, LHS
+ 1906900992U, // <u,0,7,0>: Cost 2 vzipr RHS, <0,0,0,0>
+ 1906902694U, // <u,0,7,1>: Cost 2 vzipr RHS, <2,3,0,1>
+ 1906901156U, // <u,0,7,2>: Cost 2 vzipr RHS, <0,2,0,2>
+ 2116083713U, // <u,0,7,3>: Cost 2 ins <5,u,7,3>, lane 1
+ 2116091905U, // <u,0,7,4>: Cost 2 ins <5,u,7,4>, lane 1
+ 2980643874U, // <u,0,7,5>: Cost 3 vzipr RHS, <1,4,0,5>
+ 2116108289U, // <u,0,7,6>: Cost 2 ins <5,u,7,6>, lane 1
+ 2116116481U, // <u,0,7,7>: Cost 2 ins <5,u,7,7>, lane 1
+ 1906901162U, // <u,0,7,u>: Cost 2 vzipr RHS, <0,2,0,u>
+ 135053414U, // <u,0,u,0>: Cost 1 vdup0 LHS
+ 791453798U, // <u,0,u,1>: Cost 1 vzipl LHS, LHS
+ 537748125U, // <u,0,u,2>: Cost 1 vext3 LHS, LHS
+ 1012113409U, // <u,0,u,3>: Cost 1 ins LHS, lane 1
+ 1611932338U, // <u,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6>
+ 1551399066U, // <u,0,u,5>: Cost 2 vext2 <1,2,u,0>, RHS
+ 1517605798U, // <u,0,u,6>: Cost 2 vext1 <6,u,0,u>, <6,u,0,u>
+ 1036328961U, // <u,0,u,7>: Cost 1 ins RHS, lane 1
+ 537748179U, // <u,0,u,u>: Cost 1 vext3 LHS, LHS
+ 1818149622U, // <u,1,0,0>: Cost 2 vzipl <1,0,3,2>, <1,0,3,2>
+ 1007951877U, // <u,1,0,1>: Cost 1 ins LHS, lane 5
+ 1725587558U, // <u,1,0,2>: Cost 2 vuzpl <u,0,1,2>, LHS
+ 1007910914U, // <u,1,0,3>: Cost 1 ins LHS, lane 2
+ 2081660930U, // <u,1,0,4>: Cost 2 ins <0,1,u,4>, lane 2
+ 2081669122U, // <u,1,0,5>: Cost 2 ins <0,1,u,5>, lane 2
+ 2081677314U, // <u,1,0,6>: Cost 2 ins <0,1,u,6>, lane 2
+ 2081685506U, // <u,1,0,7>: Cost 2 ins <0,1,u,7>, lane 2
+ 1007951877U, // <u,1,0,u>: Cost 1 ins LHS, lane 5
+ 1481786002U, // <u,1,1,0>: Cost 2 vext1 <0,u,1,1>, <0,u,1,1>
+ 202162278U, // <u,1,1,1>: Cost 1 vdup1 LHS
+ 1860551574U, // <u,1,1,2>: Cost 2 vzipl LHS, <1,2,3,0>
+ 1007910914U, // <u,1,1,3>: Cost 1 ins LHS, lane 2
+ 1481788726U, // <u,1,1,4>: Cost 2 vext1 <0,u,1,1>, RHS
+ 1860551824U, // <u,1,1,5>: Cost 2 vzipl LHS, <1,5,3,7>
+ 2081677314U, // <u,1,1,6>: Cost 2 ins <0,1,u,6>, lane 2
+ 2081685506U, // <u,1,1,7>: Cost 2 ins <0,1,u,7>, lane 2
+ 1007910914U, // <u,1,1,u>: Cost 1 ins LHS, lane 2
+ 1007509507U, // <u,1,2,0>: Cost 1 ins LHS, lane 3
+ 1007509507U, // <u,1,2,1>: Cost 1 ins LHS, lane 3
+ 1007509507U, // <u,1,2,2>: Cost 1 ins LHS, lane 3
+ 835584U, // <u,1,2,3>: Cost 0 copy LHS
+ 1007509507U, // <u,1,2,4>: Cost 1 ins LHS, lane 3
+ 1007509507U, // <u,1,2,5>: Cost 1 ins LHS, lane 3
+ 1007509507U, // <u,1,2,6>: Cost 1 ins LHS, lane 3
+ 1007509507U, // <u,1,2,7>: Cost 1 ins LHS, lane 3
+ 835584U, // <u,1,2,u>: Cost 0 copy LHS
+ 1487773798U, // <u,1,3,0>: Cost 2 vext1 <1,u,1,3>, LHS
+ 1611490264U, // <u,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3>
+ 1880328342U, // <u,1,3,2>: Cost 2 vzipr LHS, <3,0,1,2>
+ 945004646U, // <u,1,3,3>: Cost 1 vtrnr LHS, LHS
+ 1487777078U, // <u,1,3,4>: Cost 2 vext1 <1,u,1,3>, RHS
+ 1611490304U, // <u,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7>
+ 2087297027U, // <u,1,3,6>: Cost 2 ins <1,1,3,u>, lane 3
+ 2133737476U, // <u,1,3,7>: Cost 2 ins <u,u,3,7>, lane 4
+ 945004651U, // <u,1,3,u>: Cost 1 vtrnr LHS, LHS
+ 1567992749U, // <u,1,4,0>: Cost 2 vext2 <4,0,u,1>, <4,0,u,1>
+ 2081636354U, // <u,1,4,1>: Cost 2 ins <0,1,u,1>, lane 2
+ 2081644546U, // <u,1,4,2>: Cost 2 ins <0,1,u,2>, lane 2
+ 1007910914U, // <u,1,4,3>: Cost 1 ins LHS, lane 2
+ 2081660930U, // <u,1,4,4>: Cost 2 ins <0,1,u,4>, lane 2
+ 1007951877U, // <u,1,4,5>: Cost 1 ins LHS, lane 5
+ 1725590838U, // <u,1,4,6>: Cost 2 vuzpl <u,0,1,2>, RHS
+ 2081685506U, // <u,1,4,7>: Cost 2 ins <0,1,u,7>, lane 2
+ 1007910914U, // <u,1,4,u>: Cost 1 ins LHS, lane 2
+ 1481818774U, // <u,1,5,0>: Cost 2 vext1 <0,u,1,5>, <0,u,1,5>
+ 1863533364U, // <u,1,5,1>: Cost 2 vzipl RHS, <1,1,1,1>
+ 1863533462U, // <u,1,5,2>: Cost 2 vzipl RHS, <1,2,3,0>
+ 1007910914U, // <u,1,5,3>: Cost 1 ins LHS, lane 2
+ 1481821494U, // <u,1,5,4>: Cost 2 vext1 <0,u,1,5>, RHS
+ 1863533712U, // <u,1,5,5>: Cost 2 vzipl RHS, <1,5,3,7>
+ 2133876740U, // <u,1,5,6>: Cost 2 ins <u,u,5,6>, lane 4
+ 1750224182U, // <u,1,5,7>: Cost 2 vuzpr <0,u,1,1>, RHS
+ 1007910914U, // <u,1,5,u>: Cost 1 ins LHS, lane 2
+ 2081628162U, // <u,1,6,0>: Cost 2 ins <0,1,u,0>, lane 2
+ 1997751092U, // <u,1,6,1>: Cost 2 vtrnl RHS, <1,1,1,1>
+ 2133917700U, // <u,1,6,2>: Cost 2 ins <u,u,6,2>, lane 4
+ 1007910914U, // <u,1,6,3>: Cost 1 ins LHS, lane 2
+ 2081660930U, // <u,1,6,4>: Cost 2 ins <0,1,u,4>, lane 2
+ 1997751296U, // <u,1,6,5>: Cost 2 vtrnl RHS, <1,3,5,7>
+ 2133950468U, // <u,1,6,6>: Cost 2 ins <u,u,6,6>, lane 4
+ 1060216836U, // <u,1,6,7>: Cost 1 ins RHS, lane 4
+ 1007910914U, // <u,1,6,u>: Cost 1 ins LHS, lane 2
+ 2133975044U, // <u,1,7,0>: Cost 2 ins <u,u,7,0>, lane 4
+ 1906901002U, // <u,1,7,1>: Cost 2 vzipr RHS, <0,0,1,1>
+ 1906903190U, // <u,1,7,2>: Cost 2 vzipr RHS, <3,0,1,2>
+ 969220198U, // <u,1,7,3>: Cost 1 vtrnr RHS, LHS
+ 2134007812U, // <u,1,7,4>: Cost 2 ins <u,u,7,4>, lane 4
+ 1152558485U, // <u,1,7,5>: Cost 2 vrev <1,u,5,7>
+ 2134024196U, // <u,1,7,6>: Cost 2 ins <u,u,7,6>, lane 4
+ 2134032388U, // <u,1,7,7>: Cost 2 ins <u,u,7,7>, lane 4
+ 969220203U, // <u,1,7,u>: Cost 1 vtrnr RHS, LHS
+ 1007509507U, // <u,1,u,0>: Cost 1 ins LHS, lane 3
+ 1007951877U, // <u,1,u,1>: Cost 1 ins LHS, lane 5
+ 1007509507U, // <u,1,u,2>: Cost 1 ins LHS, lane 3
+ 835584U, // <u,1,u,3>: Cost 0 copy LHS
+ 1007509507U, // <u,1,u,4>: Cost 1 ins LHS, lane 3
+ 1007509507U, // <u,1,u,5>: Cost 1 ins LHS, lane 3
+ 1007509507U, // <u,1,u,6>: Cost 1 ins LHS, lane 3
+ 1007509507U, // <u,1,u,7>: Cost 1 ins LHS, lane 3
+ 835584U, // <u,1,u,u>: Cost 0 copy LHS
+ 1726332928U, // <u,2,0,0>: Cost 2 vuzpl LHS, <0,0,0,0>
+ 1545437286U, // <u,2,0,1>: Cost 2 vext2 <0,2,u,2>, LHS
+ 652591206U, // <u,2,0,2>: Cost 1 vuzpl LHS, LHS
+ 1886937190U, // <u,2,0,3>: Cost 2 vzipr <1,2,u,0>, LHS
+ 1726333132U, // <u,2,0,4>: Cost 2 vuzpl LHS, <0,2,4,6>
+ 2081767427U, // <u,2,0,5>: Cost 2 ins <0,2,0,u>, lane 3
+ 2082340866U, // <u,2,0,6>: Cost 2 ins <0,2,u,6>, lane 2
+ 2081767427U, // <u,2,0,7>: Cost 2 ins <0,2,0,u>, lane 3
+ 652591260U, // <u,2,0,u>: Cost 1 vuzpl LHS, LHS
+ 1550082851U, // <u,2,1,0>: Cost 2 vext2 <1,0,u,2>, <1,0,u,2>
+ 1726333748U, // <u,2,1,1>: Cost 2 vuzpl LHS, <1,1,1,1>
+ 1860552296U, // <u,2,1,2>: Cost 2 vzipl LHS, <2,2,2,2>
+ 1750155366U, // <u,2,1,3>: Cost 2 vuzpr <0,u,0,2>, LHS
+ 2088296450U, // <u,2,1,4>: Cost 2 ins <1,2,u,4>, lane 2
+ 1726333952U, // <u,2,1,5>: Cost 2 vuzpl LHS, <1,3,5,7>
+ 1860552634U, // <u,2,1,6>: Cost 2 vzipl LHS, <2,6,3,7>
+ 2109702145U, // <u,2,1,7>: Cost 2 ins <4,u,1,7>, lane 1
+ 1750155371U, // <u,2,1,u>: Cost 2 vuzpr <0,u,0,2>, LHS
+ 1481867932U, // <u,2,2,0>: Cost 2 vext1 <0,u,2,2>, <0,u,2,2>
+ 2085838849U, // <u,2,2,1>: Cost 2 ins <0,u,2,1>, lane 1
+ 269271142U, // <u,2,2,2>: Cost 1 vdup2 LHS
+ 1012113409U, // <u,2,2,3>: Cost 1 ins LHS, lane 1
+ 1481870646U, // <u,2,2,4>: Cost 2 vext1 <0,u,2,2>, RHS
+ 2085871617U, // <u,2,2,5>: Cost 2 ins <0,u,2,5>, lane 1
+ 2085879809U, // <u,2,2,6>: Cost 2 ins <0,u,2,6>, lane 1
+ 2085888001U, // <u,2,2,7>: Cost 2 ins <0,u,2,7>, lane 1
+ 1012113409U, // <u,2,2,u>: Cost 1 ins LHS, lane 1
+ 408134301U, // <u,2,3,0>: Cost 1 vext1 LHS, LHS
+ 1481876214U, // <u,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+ 1880326164U, // <u,2,3,2>: Cost 2 vzipr LHS, <0,0,2,2>
+ 806584422U, // <u,2,3,3>: Cost 1 vzipr LHS, LHS
+ 408137014U, // <u,2,3,4>: Cost 1 vext1 LHS, RHS
+ 1726335490U, // <u,2,3,5>: Cost 2 vuzpl LHS, <3,4,5,6>
+ 1880326492U, // <u,2,3,6>: Cost 2 vzipr LHS, <0,4,2,6>
+ 1529656314U, // <u,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2>
+ 806584427U, // <u,2,3,u>: Cost 1 vzipr LHS, LHS
+ 1726336332U, // <u,2,4,0>: Cost 2 vuzpl LHS, <4,6,0,2>
+ 2082062339U, // <u,2,4,1>: Cost 2 ins <0,2,4,u>, lane 3
+ 2082308098U, // <u,2,4,2>: Cost 2 ins <0,2,u,2>, lane 2
+ 1886969958U, // <u,2,4,3>: Cost 2 vzipr <1,2,u,4>, LHS
+ 1726336208U, // <u,2,4,4>: Cost 2 vuzpl LHS, <4,4,4,4>
+ 1545440566U, // <u,2,4,5>: Cost 2 vext2 <0,2,u,2>, RHS
+ 652594486U, // <u,2,4,6>: Cost 1 vuzpl LHS, RHS
+ 2082062339U, // <u,2,4,7>: Cost 2 ins <0,2,4,u>, lane 3
+ 652594504U, // <u,2,4,u>: Cost 1 vuzpl LHS, RHS
+ 2088263682U, // <u,2,5,0>: Cost 2 ins <1,2,u,0>, lane 2
+ 1726337152U, // <u,2,5,1>: Cost 2 vuzpl LHS, <5,7,1,3>
+ 1863534184U, // <u,2,5,2>: Cost 2 vzipl RHS, <2,2,2,2>
+ 1884987494U, // <u,2,5,3>: Cost 2 vzipr <0,u,u,5>, LHS
+ 1158441059U, // <u,2,5,4>: Cost 2 vrev <2,u,4,5>
+ 1726337028U, // <u,2,5,5>: Cost 2 vuzpl LHS, <5,5,5,5>
+ 1863534522U, // <u,2,5,6>: Cost 2 vzipl RHS, <2,6,3,7>
+ 1750158646U, // <u,2,5,7>: Cost 2 vuzpr <0,u,0,2>, RHS
+ 1750158647U, // <u,2,5,u>: Cost 2 vuzpr <0,u,0,2>, RHS
+ 1481900704U, // <u,2,6,0>: Cost 2 vext1 <0,u,2,6>, <0,u,2,6>
+ 2110021633U, // <u,2,6,1>: Cost 2 ins <4,u,6,1>, lane 1
+ 1997751912U, // <u,2,6,2>: Cost 2 vtrnl RHS, <2,2,2,2>
+ 1611491258U, // <u,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7>
+ 1481903414U, // <u,2,6,4>: Cost 2 vext1 <0,u,2,6>, RHS
+ 2110054401U, // <u,2,6,5>: Cost 2 ins <4,u,6,5>, lane 1
+ 1726337848U, // <u,2,6,6>: Cost 2 vuzpl LHS, <6,6,6,6>
+ 1036328961U, // <u,2,6,7>: Cost 1 ins RHS, lane 1
+ 1036328961U, // <u,2,6,u>: Cost 1 ins RHS, lane 1
+ 2042962838U, // <u,2,7,0>: Cost 2 vtrnr RHS, <1,2,3,0>
+ 1726338042U, // <u,2,7,1>: Cost 2 vuzpl LHS, <7,0,1,2>
+ 1906901012U, // <u,2,7,2>: Cost 2 vzipr RHS, <0,0,2,2>
+ 833159270U, // <u,2,7,3>: Cost 1 vzipr RHS, LHS
+ 2042962842U, // <u,2,7,4>: Cost 2 vtrnr RHS, <1,2,3,4>
+ 1726338406U, // <u,2,7,5>: Cost 2 vuzpl LHS, <7,4,5,6>
+ 1906901340U, // <u,2,7,6>: Cost 2 vzipr RHS, <0,4,2,6>
+ 1726338668U, // <u,2,7,7>: Cost 2 vuzpl LHS, <7,7,7,7>
+ 833159275U, // <u,2,7,u>: Cost 1 vzipr RHS, LHS
+ 408175266U, // <u,2,u,0>: Cost 1 vext1 LHS, LHS
+ 1545443118U, // <u,2,u,1>: Cost 2 vext2 <0,2,u,2>, LHS
+ 652597038U, // <u,2,u,2>: Cost 1 vuzpl LHS, LHS
+ 806625382U, // <u,2,u,3>: Cost 1 vzipr LHS, LHS
+ 408177974U, // <u,2,u,4>: Cost 1 vext1 LHS, RHS
+ 1545443482U, // <u,2,u,5>: Cost 2 vext2 <0,2,u,2>, RHS
+ 652597402U, // <u,2,u,6>: Cost 1 vuzpl LHS, RHS
+ 1036328961U, // <u,2,u,7>: Cost 1 ins RHS, lane 1
+ 806625387U, // <u,2,u,u>: Cost 1 vzipr LHS, LHS
+ 1544781824U, // <u,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
+ 471040156U, // <u,3,0,1>: Cost 1 vext2 LHS, LHS
+ 1544781988U, // <u,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+ 2088951810U, // <u,3,0,3>: Cost 2 ins <1,3,u,3>, lane 2
+ 1544782162U, // <u,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+ 2094940162U, // <u,3,0,5>: Cost 2 ins <2,3,u,5>, lane 2
+ 2094374915U, // <u,3,0,6>: Cost 2 ins <2,3,0,u>, lane 3
+ 2088984578U, // <u,3,0,7>: Cost 2 ins <1,3,u,7>, lane 2
+ 471040669U, // <u,3,0,u>: Cost 1 vext2 LHS, LHS
+ 1544782582U, // <u,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+ 1544782644U, // <u,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
+ 1544782742U, // <u,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
+ 676569190U, // <u,3,1,3>: Cost 1 vuzpr LHS, LHS
+ 1860553218U, // <u,3,1,4>: Cost 2 vzipl LHS, <3,4,5,6>
+ 1544782992U, // <u,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+ 2088476675U, // <u,3,1,6>: Cost 2 ins <1,3,1,u>, lane 3
+ 2088984578U, // <u,3,1,7>: Cost 2 ins <1,3,u,7>, lane 2
+ 676569195U, // <u,3,1,u>: Cost 1 vuzpr LHS, LHS
+ 1750311830U, // <u,3,2,0>: Cost 2 vuzpr LHS, <1,2,3,0>
+ 1164167966U, // <u,3,2,1>: Cost 2 vrev <3,u,1,2>
+ 1544783464U, // <u,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2>
+ 1012113409U, // <u,3,2,3>: Cost 1 ins LHS, lane 1
+ 1750311834U, // <u,3,2,4>: Cost 2 vuzpr LHS, <1,2,3,4>
+ 1994770946U, // <u,3,2,5>: Cost 2 vtrnl LHS, <3,4,5,6>
+ 1544783802U, // <u,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+ 2088984578U, // <u,3,2,7>: Cost 2 ins <1,3,u,7>, lane 2
+ 1012113409U, // <u,3,2,u>: Cost 1 ins LHS, lane 1
+ 1544784022U, // <u,3,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
+ 1750312614U, // <u,3,3,1>: Cost 2 vuzpr LHS, <2,3,0,1>
+ 1880326902U, // <u,3,3,2>: Cost 2 vzipr LHS, <1,0,3,2>
+ 336380006U, // <u,3,3,3>: Cost 1 vdup3 LHS
+ 1544784386U, // <u,3,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
+ 1750312654U, // <u,3,3,5>: Cost 2 vuzpr LHS, <2,3,4,5>
+ 2100568067U, // <u,3,3,6>: Cost 2 ins <3,3,3,u>, lane 3
+ 1880327312U, // <u,3,3,7>: Cost 2 vzipr LHS, <1,5,3,7>
+ 336380006U, // <u,3,3,u>: Cost 1 vdup3 LHS
+ 1487929446U, // <u,3,4,0>: Cost 2 vext1 <1,u,3,4>, LHS
+ 1487930752U, // <u,3,4,1>: Cost 2 vext1 <1,u,3,4>, <1,u,3,4>
+ 2094669827U, // <u,3,4,2>: Cost 2 ins <2,3,4,u>, lane 3
+ 2088951810U, // <u,3,4,3>: Cost 2 ins <1,3,u,3>, lane 2
+ 1487932726U, // <u,3,4,4>: Cost 2 vext1 <1,u,3,4>, RHS
+ 471043382U, // <u,3,4,5>: Cost 1 vext2 LHS, RHS
+ 1750311260U, // <u,3,4,6>: Cost 2 vuzpr LHS, <0,4,2,6>
+ 2088984578U, // <u,3,4,7>: Cost 2 ins <1,3,u,7>, lane 2
+ 471043625U, // <u,3,4,u>: Cost 1 vext2 LHS, RHS
+ 1863534742U, // <u,3,5,0>: Cost 2 vzipl RHS, <3,0,1,2>
+ 1574645465U, // <u,3,5,1>: Cost 2 vext2 <5,1,u,3>, <5,1,u,3>
+ 2088771587U, // <u,3,5,2>: Cost 2 ins <1,3,5,u>, lane 3
+ 1863535004U, // <u,3,5,3>: Cost 2 vzipl RHS, <3,3,3,3>
+ 1592561606U, // <u,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
+ 1592561668U, // <u,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
+ 1592561762U, // <u,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0>
+ 676572470U, // <u,3,5,7>: Cost 1 vuzpr LHS, RHS
+ 676572471U, // <u,3,5,u>: Cost 1 vuzpr LHS, RHS
+ 1798090850U, // <u,3,6,0>: Cost 2 vuzpr LHS, <5,6,7,0>
+ 1997752470U, // <u,3,6,1>: Cost 2 vtrnl RHS, <3,0,1,2>
+ 1581281795U, // <u,3,6,2>: Cost 2 vext2 <6,2,u,3>, <6,2,u,3>
+ 1997752732U, // <u,3,6,3>: Cost 2 vtrnl RHS, <3,3,3,3>
+ 1798090854U, // <u,3,6,4>: Cost 2 vuzpr LHS, <5,6,7,4>
+ 1164495686U, // <u,3,6,5>: Cost 2 vrev <3,u,5,6>
+ 1592562488U, // <u,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
+ 1060216836U, // <u,3,6,7>: Cost 1 ins RHS, lane 4
+ 1060216836U, // <u,3,6,u>: Cost 1 ins RHS, lane 4
+ 1487954022U, // <u,3,7,0>: Cost 2 vext1 <1,u,3,7>, LHS
+ 1487955331U, // <u,3,7,1>: Cost 2 vext1 <1,u,3,7>, <1,u,3,7>
+ 1493928028U, // <u,3,7,2>: Cost 2 vext1 <2,u,3,7>, <2,u,3,7>
+ 1906901832U, // <u,3,7,3>: Cost 2 vzipr RHS, <1,1,3,3>
+ 1487957302U, // <u,3,7,4>: Cost 2 vext1 <1,u,3,7>, RHS
+ 2042963662U, // <u,3,7,5>: Cost 2 vtrnr RHS, <2,3,4,5>
+ 2134024196U, // <u,3,7,6>: Cost 2 ins <u,u,7,6>, lane 4
+ 1906902160U, // <u,3,7,7>: Cost 2 vzipr RHS, <1,5,3,7>
+ 1487959854U, // <u,3,7,u>: Cost 2 vext1 <1,u,3,7>, LHS
+ 1544787667U, // <u,3,u,0>: Cost 2 vext2 LHS, <u,0,1,2>
+ 471045934U, // <u,3,u,1>: Cost 1 vext2 LHS, LHS
+ 1880367862U, // <u,3,u,2>: Cost 2 vzipr LHS, <1,0,3,2>
+ 676569757U, // <u,3,u,3>: Cost 1 vuzpr LHS, LHS
+ 1544788031U, // <u,3,u,4>: Cost 2 vext2 LHS, <u,4,5,6>
+ 471046298U, // <u,3,u,5>: Cost 1 vext2 LHS, RHS
+ 1750311584U, // <u,3,u,6>: Cost 2 vuzpr LHS, <0,u,2,6>
+ 676572713U, // <u,3,u,7>: Cost 1 vuzpr LHS, RHS
+ 471046501U, // <u,3,u,u>: Cost 1 vext2 LHS, LHS
+ 1974046028U, // <u,4,0,0>: Cost 2 vtrnl <4,6,0,2>, <4,6,0,2>
+ 1551425638U, // <u,4,0,1>: Cost 2 vext2 <1,2,u,4>, LHS
+ 1727168614U, // <u,4,0,2>: Cost 2 vuzpl <u,2,4,6>, LHS
+ 2085707777U, // <u,4,0,3>: Cost 2 ins <0,u,0,3>, lane 1
+ 1679392972U, // <u,4,0,4>: Cost 2 vuzpl <0,2,4,6>, <0,2,4,6>
+ 1638329234U, // <u,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1>
+ 1638329244U, // <u,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2>
+ 2109628417U, // <u,4,0,7>: Cost 2 ins <4,u,0,7>, lane 1
+ 1551426205U, // <u,4,0,u>: Cost 2 vext2 <1,2,u,4>, LHS
+ 1860553618U, // <u,4,1,0>: Cost 2 vzipl LHS, <4,0,5,1>
+ 2085765121U, // <u,4,1,1>: Cost 2 ins <0,u,1,1>, lane 1
+ 1551426503U, // <u,4,1,2>: Cost 2 vext2 <1,2,u,4>, <1,2,u,4>
+ 1756364902U, // <u,4,1,3>: Cost 2 vuzpr <1,u,3,4>, LHS
+ 1860553936U, // <u,4,1,4>: Cost 2 vzipl LHS, <4,4,4,4>
+ 786812214U, // <u,4,1,5>: Cost 1 vzipl LHS, RHS
+ 1994026294U, // <u,4,1,6>: Cost 2 vtrnl <u,0,1,2>, RHS
+ 2083168259U, // <u,4,1,7>: Cost 2 ins <0,4,1,u>, lane 3
+ 786812457U, // <u,4,1,u>: Cost 1 vzipl LHS, RHS
+ 1170066926U, // <u,4,2,0>: Cost 2 vrev <4,u,0,2>
+ 2083241987U, // <u,4,2,1>: Cost 2 ins <0,4,2,u>, lane 3
+ 2085847041U, // <u,4,2,2>: Cost 2 ins <0,u,2,2>, lane 1
+ 1012113409U, // <u,4,2,3>: Cost 1 ins LHS, lane 1
+ 1994771664U, // <u,4,2,4>: Cost 2 vtrnl LHS, <4,4,4,4>
+ 1994771346U, // <u,4,2,5>: Cost 2 vtrnl LHS, <4,0,5,1>
+ 921029942U, // <u,4,2,6>: Cost 1 vtrnl LHS, RHS
+ 2083241987U, // <u,4,2,7>: Cost 2 ins <0,4,2,u>, lane 3
+ 921029960U, // <u,4,2,u>: Cost 1 vtrnl LHS, RHS
+ 2091876353U, // <u,4,3,0>: Cost 2 ins <1,u,3,0>, lane 1
+ 2954070192U, // <u,4,3,1>: Cost 3 vzipr LHS, <3,0,4,1>
+ 2091892737U, // <u,4,3,2>: Cost 2 ins <1,u,3,2>, lane 1
+ 2091900929U, // <u,4,3,3>: Cost 2 ins <1,u,3,3>, lane 1
+ 1928105168U, // <u,4,3,4>: Cost 2 vzipr LHS, <4,4,4,4>
+ 1880327886U, // <u,4,3,5>: Cost 2 vzipr LHS, <2,3,4,5>
+ 1880326348U, // <u,4,3,6>: Cost 2 vzipr LHS, <0,2,4,6>
+ 2091933697U, // <u,4,3,7>: Cost 2 ins <1,u,3,7>, lane 1
+ 1880326350U, // <u,4,3,u>: Cost 2 vzipr LHS, <0,2,4,u>
+ 1505919078U, // <u,4,4,0>: Cost 2 vext1 <4,u,4,4>, LHS
+ 2107277315U, // <u,4,4,1>: Cost 2 ins <4,4,4,u>, lane 3
+ 2107277315U, // <u,4,4,2>: Cost 2 ins <4,4,4,u>, lane 3
+ 2086002689U, // <u,4,4,3>: Cost 2 ins <0,u,4,3>, lane 1
+ 161926454U, // <u,4,4,4>: Cost 1 vdup0 RHS
+ 1551428918U, // <u,4,4,5>: Cost 2 vext2 <1,2,u,4>, RHS
+ 1638329572U, // <u,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6>
+ 2109923329U, // <u,4,4,7>: Cost 2 ins <4,u,4,7>, lane 1
+ 161926454U, // <u,4,4,u>: Cost 1 vdup0 RHS
+ 1493983334U, // <u,4,5,0>: Cost 2 vext1 <2,u,4,5>, LHS
+ 2101379075U, // <u,4,5,1>: Cost 2 ins <3,4,5,u>, lane 3
+ 1493985379U, // <u,4,5,2>: Cost 2 vext1 <2,u,4,5>, <2,u,4,5>
+ 2101379075U, // <u,4,5,3>: Cost 2 ins <3,4,5,u>, lane 3
+ 1493986614U, // <u,4,5,4>: Cost 2 vext1 <2,u,4,5>, RHS
+ 789794102U, // <u,4,5,5>: Cost 1 vzipl RHS, RHS
+ 537750838U, // <u,4,5,6>: Cost 1 vext3 LHS, RHS
+ 1756368182U, // <u,4,5,7>: Cost 2 vuzpr <1,u,3,4>, RHS
+ 537750856U, // <u,4,5,u>: Cost 1 vext3 LHS, RHS
+ 1482048178U, // <u,4,6,0>: Cost 2 vext1 <0,u,4,6>, <0,u,4,6>
+ 2107424771U, // <u,4,6,1>: Cost 2 ins <4,4,6,u>, lane 3
+ 2110029825U, // <u,4,6,2>: Cost 2 ins <4,u,6,2>, lane 1
+ 2107424771U, // <u,4,6,3>: Cost 2 ins <4,4,6,u>, lane 3
+ 1482050870U, // <u,4,6,4>: Cost 2 vext1 <0,u,4,6>, RHS
+ 1997753234U, // <u,4,6,5>: Cost 2 vtrnl RHS, <4,0,5,1>
+ 924011830U, // <u,4,6,6>: Cost 1 vtrnl RHS, RHS
+ 1036328961U, // <u,4,6,7>: Cost 1 ins RHS, lane 1
+ 924011848U, // <u,4,6,u>: Cost 1 vtrnl RHS, RHS
+ 2116059137U, // <u,4,7,0>: Cost 2 ins <5,u,7,0>, lane 1
+ 2113470467U, // <u,4,7,1>: Cost 2 ins <5,4,7,u>, lane 3
+ 2113470467U, // <u,4,7,2>: Cost 2 ins <5,4,7,u>, lane 3
+ 2116083713U, // <u,4,7,3>: Cost 2 ins <5,u,7,3>, lane 1
+ 1906904272U, // <u,4,7,4>: Cost 2 vzipr RHS, <4,4,4,4>
+ 1906902734U, // <u,4,7,5>: Cost 2 vzipr RHS, <2,3,4,5>
+ 96808489U, // <u,4,7,6>: Cost 1 vrev RHS
+ 2116116481U, // <u,4,7,7>: Cost 2 ins <5,u,7,7>, lane 1
+ 96955963U, // <u,4,7,u>: Cost 1 vrev RHS
+ 1482064564U, // <u,4,u,0>: Cost 2 vext1 <0,u,4,u>, <0,u,4,u>
+ 1551431470U, // <u,4,u,1>: Cost 2 vext2 <1,2,u,4>, LHS
+ 1494009958U, // <u,4,u,2>: Cost 2 vext1 <2,u,4,u>, <2,u,4,u>
+ 1012113409U, // <u,4,u,3>: Cost 1 ins LHS, lane 1
+ 161926454U, // <u,4,u,4>: Cost 1 vdup0 RHS
+ 791457078U, // <u,4,u,5>: Cost 1 vzipl LHS, RHS
+ 537751081U, // <u,4,u,6>: Cost 1 vext3 LHS, RHS
+ 1036328961U, // <u,4,u,7>: Cost 1 ins RHS, lane 1
+ 537751099U, // <u,4,u,u>: Cost 1 vext3 LHS, RHS
+ 2085683201U, // <u,5,0,0>: Cost 2 ins <0,u,0,0>, lane 1
+ 1034493957U, // <u,5,0,1>: Cost 1 ins RHS, lane 5
+ 1727914086U, // <u,5,0,2>: Cost 2 vuzpl <u,3,5,7>, LHS
+ 2085707777U, // <u,5,0,3>: Cost 2 ins <0,u,0,3>, lane 1
+ 1546273106U, // <u,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5>
+ 1678778497U, // <u,5,0,5>: Cost 2 vuzpl <0,1,5,3>, <0,1,5,3>
+ 2108219394U, // <u,5,0,6>: Cost 2 ins <4,5,u,6>, lane 2
+ 1034485762U, // <u,5,0,7>: Cost 1 ins RHS, lane 2
+ 1034493957U, // <u,5,0,u>: Cost 1 ins RHS, lane 5
+ 1505968230U, // <u,5,1,0>: Cost 2 vext1 <4,u,5,1>, LHS
+ 1860554448U, // <u,5,1,1>: Cost 2 vzipl LHS, <5,1,7,3>
+ 2103689217U, // <u,5,1,2>: Cost 2 ins <3,u,1,2>, lane 1
+ 1750253670U, // <u,5,1,3>: Cost 2 vuzpr <0,u,1,5>, LHS
+ 1505971738U, // <u,5,1,4>: Cost 2 vext1 <4,u,5,1>, <4,u,5,1>
+ 1860554756U, // <u,5,1,5>: Cost 2 vzipl LHS, <5,5,5,5>
+ 1860554850U, // <u,5,1,6>: Cost 2 vzipl LHS, <5,6,7,0>
+ 1034485762U, // <u,5,1,7>: Cost 1 ins RHS, lane 2
+ 1034485762U, // <u,5,1,u>: Cost 1 ins RHS, lane 2
+ 2085830657U, // <u,5,2,0>: Cost 2 ins <0,u,2,0>, lane 1
+ 1994772608U, // <u,5,2,1>: Cost 2 vtrnl LHS, <5,7,1,3>
+ 2085847041U, // <u,5,2,2>: Cost 2 ins <0,u,2,2>, lane 1
+ 1012113409U, // <u,5,2,3>: Cost 1 ins LHS, lane 1
+ 2085863425U, // <u,5,2,4>: Cost 2 ins <0,u,2,4>, lane 1
+ 1994772484U, // <u,5,2,5>: Cost 2 vtrnl LHS, <5,5,5,5>
+ 2085879809U, // <u,5,2,6>: Cost 2 ins <0,u,2,6>, lane 1
+ 1034485762U, // <u,5,2,7>: Cost 1 ins RHS, lane 2
+ 1012113409U, // <u,5,2,u>: Cost 1 ins LHS, lane 1
+ 2091876353U, // <u,5,3,0>: Cost 2 ins <1,u,3,0>, lane 1
+ 1176121553U, // <u,5,3,1>: Cost 2 vrev <5,u,1,3>
+ 2091892737U, // <u,5,3,2>: Cost 2 ins <1,u,3,2>, lane 1
+ 2091900929U, // <u,5,3,3>: Cost 2 ins <1,u,3,3>, lane 1
+ 2091909121U, // <u,5,3,4>: Cost 2 ins <1,u,3,4>, lane 1
+ 1928105178U, // <u,5,3,5>: Cost 2 vzipr LHS, <4,4,5,5>
+ 1880328706U, // <u,5,3,6>: Cost 2 vzipr LHS, <3,4,5,6>
+ 945007926U, // <u,5,3,7>: Cost 1 vtrnr LHS, RHS
+ 945007927U, // <u,5,3,u>: Cost 1 vtrnr LHS, RHS
+ 2108170242U, // <u,5,4,0>: Cost 2 ins <4,5,u,0>, lane 2
+ 2108178434U, // <u,5,4,1>: Cost 2 ins <4,5,u,1>, lane 2
+ 2108186626U, // <u,5,4,2>: Cost 2 ins <4,5,u,2>, lane 2
+ 2086002689U, // <u,5,4,3>: Cost 2 ins <0,u,4,3>, lane 1
+ 1845022662U, // <u,5,4,4>: Cost 2 vzipl <5,4,7,6>, <5,4,7,6>
+ 1034493957U, // <u,5,4,5>: Cost 1 ins RHS, lane 5
+ 1727917366U, // <u,5,4,6>: Cost 2 vuzpl <u,3,5,7>, RHS
+ 1034485762U, // <u,5,4,7>: Cost 1 ins RHS, lane 2
+ 1034493957U, // <u,5,4,u>: Cost 1 ins RHS, lane 5
+ 1506000998U, // <u,5,5,0>: Cost 2 vext1 <4,u,5,5>, LHS
+ 1863536336U, // <u,5,5,1>: Cost 2 vzipl RHS, <5,1,7,3>
+ 2108186626U, // <u,5,5,2>: Cost 2 ins <4,5,u,2>, lane 2
+ 2086076417U, // <u,5,5,3>: Cost 2 ins <0,u,5,3>, lane 1
+ 1506004510U, // <u,5,5,4>: Cost 2 vext1 <4,u,5,5>, <4,u,5,5>
+ 229035318U, // <u,5,5,5>: Cost 1 vdup1 RHS
+ 1863536738U, // <u,5,5,6>: Cost 2 vzipl RHS, <5,6,7,0>
+ 1034485762U, // <u,5,5,7>: Cost 1 ins RHS, lane 2
+ 1034485762U, // <u,5,5,u>: Cost 1 ins RHS, lane 2
+ 1034346499U, // <u,5,6,0>: Cost 1 ins RHS, lane 3
+ 1034346499U, // <u,5,6,1>: Cost 1 ins RHS, lane 3
+ 1034346499U, // <u,5,6,2>: Cost 1 ins RHS, lane 3
+ 1034346499U, // <u,5,6,3>: Cost 1 ins RHS, lane 3
+ 1034346499U, // <u,5,6,4>: Cost 1 ins RHS, lane 3
+ 1034346499U, // <u,5,6,5>: Cost 1 ins RHS, lane 3
+ 1034346499U, // <u,5,6,6>: Cost 1 ins RHS, lane 3
+ 27705344U, // <u,5,6,7>: Cost 0 copy RHS
+ 27705344U, // <u,5,6,u>: Cost 0 copy RHS
+ 1488101478U, // <u,5,7,0>: Cost 2 vext1 <1,u,5,7>, LHS
+ 1488102805U, // <u,5,7,1>: Cost 2 vext1 <1,u,5,7>, <1,u,5,7>
+ 2114134019U, // <u,5,7,2>: Cost 2 ins <5,5,7,u>, lane 3
+ 2133999620U, // <u,5,7,3>: Cost 2 ins <u,u,7,3>, lane 4
+ 1488104758U, // <u,5,7,4>: Cost 2 vext1 <1,u,5,7>, RHS
+ 1638330536U, // <u,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7>
+ 1906903554U, // <u,5,7,6>: Cost 2 vzipr RHS, <3,4,5,6>
+ 969223478U, // <u,5,7,7>: Cost 1 vtrnr RHS, RHS
+ 969223479U, // <u,5,7,u>: Cost 1 vtrnr RHS, RHS
+ 1034346499U, // <u,5,u,0>: Cost 1 ins RHS, lane 3
+ 1034493957U, // <u,5,u,1>: Cost 1 ins RHS, lane 5
+ 1034346499U, // <u,5,u,2>: Cost 1 ins RHS, lane 3
+ 1012113409U, // <u,5,u,3>: Cost 1 ins LHS, lane 1
+ 1034346499U, // <u,5,u,4>: Cost 1 ins RHS, lane 3
+ 1034493957U, // <u,5,u,5>: Cost 1 ins RHS, lane 5
+ 1034346499U, // <u,5,u,6>: Cost 1 ins RHS, lane 3
+ 27705344U, // <u,5,u,7>: Cost 0 copy RHS
+ 27705344U, // <u,5,u,u>: Cost 0 copy RHS
+ 1729314816U, // <u,6,0,0>: Cost 2 vuzpl RHS, <0,0,0,0>
+ 1545470054U, // <u,6,0,1>: Cost 2 vext2 <0,2,u,6>, LHS
+ 655573094U, // <u,6,0,2>: Cost 1 vuzpl RHS, LHS
+ 2108309507U, // <u,6,0,3>: Cost 2 ins <4,6,0,u>, lane 3
+ 1546797458U, // <u,6,0,4>: Cost 2 vext2 <0,4,u,6>, <0,4,u,6>
+ 2108309507U, // <u,6,0,5>: Cost 2 ins <4,6,0,u>, lane 3
+ 2108882946U, // <u,6,0,6>: Cost 2 ins <4,6,u,6>, lane 2
+ 1886940470U, // <u,6,0,7>: Cost 2 vzipr <1,2,u,0>, RHS
+ 655573148U, // <u,6,0,u>: Cost 1 vuzpl RHS, LHS
+ 1182004127U, // <u,6,1,0>: Cost 2 vrev <6,u,0,1>
+ 1729315636U, // <u,6,1,1>: Cost 2 vuzpl RHS, <1,1,1,1>
+ 1860555258U, // <u,6,1,2>: Cost 2 vzipl LHS, <6,2,7,3>
+ 1750335590U, // <u,6,1,3>: Cost 2 vuzpr <0,u,2,6>, LHS
+ 2114838530U, // <u,6,1,4>: Cost 2 ins <5,6,u,4>, lane 2
+ 1729315840U, // <u,6,1,5>: Cost 2 vuzpl RHS, <1,3,5,7>
+ 1860555576U, // <u,6,1,6>: Cost 2 vzipl LHS, <6,6,6,6>
+ 1884958006U, // <u,6,1,7>: Cost 2 vzipr <0,u,u,1>, RHS
+ 1750335595U, // <u,6,1,u>: Cost 2 vuzpr <0,u,2,6>, LHS
+ 1506050150U, // <u,6,2,0>: Cost 2 vext1 <4,u,6,2>, LHS
+ 2085838849U, // <u,6,2,1>: Cost 2 ins <0,u,2,1>, lane 1
+ 1729316456U, // <u,6,2,2>: Cost 2 vuzpl RHS, <2,2,2,2>
+ 1012113409U, // <u,6,2,3>: Cost 1 ins LHS, lane 1
+ 1506053668U, // <u,6,2,4>: Cost 2 vext1 <4,u,6,2>, <4,u,6,2>
+ 2085871617U, // <u,6,2,5>: Cost 2 ins <0,u,2,5>, lane 1
+ 1994773304U, // <u,6,2,6>: Cost 2 vtrnl LHS, <6,6,6,6>
+ 1880984886U, // <u,6,2,7>: Cost 2 vzipr <0,2,u,2>, RHS
+ 1012113409U, // <u,6,2,u>: Cost 1 ins LHS, lane 1
+ 2066526306U, // <u,6,3,0>: Cost 2 vtrnr LHS, <5,6,7,0>
+ 1729317014U, // <u,6,3,1>: Cost 2 vuzpl RHS, <3,0,1,2>
+ 1928104860U, // <u,6,3,2>: Cost 2 vzipr LHS, <4,0,6,2>
+ 1729317276U, // <u,6,3,3>: Cost 2 vuzpl RHS, <3,3,3,3>
+ 1564715549U, // <u,6,3,4>: Cost 2 vext2 <3,4,u,6>, <3,4,u,6>
+ 1729317378U, // <u,6,3,5>: Cost 2 vuzpl RHS, <3,4,5,6>
+ 1928105188U, // <u,6,3,6>: Cost 2 vzipr LHS, <4,4,6,6>
+ 806587702U, // <u,6,3,7>: Cost 1 vzipr LHS, RHS
+ 806587703U, // <u,6,3,u>: Cost 1 vzipr LHS, RHS
+ 1729318220U, // <u,6,4,0>: Cost 2 vuzpl RHS, <4,6,0,2>
+ 2108604419U, // <u,6,4,1>: Cost 2 ins <4,6,4,u>, lane 3
+ 2108850178U, // <u,6,4,2>: Cost 2 ins <4,6,u,2>, lane 2
+ 2108604419U, // <u,6,4,3>: Cost 2 ins <4,6,4,u>, lane 3
+ 1729318096U, // <u,6,4,4>: Cost 2 vuzpl RHS, <4,4,4,4>
+ 1545473334U, // <u,6,4,5>: Cost 2 vext2 <0,2,u,6>, RHS
+ 655576374U, // <u,6,4,6>: Cost 1 vuzpl RHS, RHS
+ 1886973238U, // <u,6,4,7>: Cost 2 vzipr <1,2,u,4>, RHS
+ 655576392U, // <u,6,4,u>: Cost 1 vuzpl RHS, RHS
+ 2114805762U, // <u,6,5,0>: Cost 2 ins <5,6,u,0>, lane 2
+ 1729319040U, // <u,6,5,1>: Cost 2 vuzpl RHS, <5,7,1,3>
+ 1863537146U, // <u,6,5,2>: Cost 2 vzipl RHS, <6,2,7,3>
+ 2086076417U, // <u,6,5,3>: Cost 2 ins <0,u,5,3>, lane 1
+ 1576660943U, // <u,6,5,4>: Cost 2 vext2 <5,4,u,6>, <5,4,u,6>
+ 1729318916U, // <u,6,5,5>: Cost 2 vuzpl RHS, <5,5,5,5>
+ 1863537464U, // <u,6,5,6>: Cost 2 vzipl RHS, <6,6,6,6>
+ 1750338870U, // <u,6,5,7>: Cost 2 vuzpr <0,u,2,6>, RHS
+ 1750338871U, // <u,6,5,u>: Cost 2 vuzpr <0,u,2,6>, RHS
+ 1506082918U, // <u,6,6,0>: Cost 2 vext1 <4,u,6,6>, LHS
+ 2110021633U, // <u,6,6,1>: Cost 2 ins <4,u,6,1>, lane 1
+ 2110029825U, // <u,6,6,2>: Cost 2 ins <4,u,6,2>, lane 1
+ 2086150145U, // <u,6,6,3>: Cost 2 ins <0,u,6,3>, lane 1
+ 1506086440U, // <u,6,6,4>: Cost 2 vext1 <4,u,6,6>, <4,u,6,6>
+ 2110054401U, // <u,6,6,5>: Cost 2 ins <4,u,6,5>, lane 1
+ 296144182U, // <u,6,6,6>: Cost 1 vdup2 RHS
+ 1036328961U, // <u,6,6,7>: Cost 1 ins RHS, lane 1
+ 1036328961U, // <u,6,6,u>: Cost 1 ins RHS, lane 1
+ 432349286U, // <u,6,7,0>: Cost 1 vext1 RHS, LHS
+ 1506091766U, // <u,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2>
+ 1906903964U, // <u,6,7,2>: Cost 2 vzipr RHS, <4,0,6,2>
+ 1506093206U, // <u,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2>
+ 432352809U, // <u,6,7,4>: Cost 1 vext1 RHS, RHS
+ 1506094800U, // <u,6,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
+ 1906904292U, // <u,6,7,6>: Cost 2 vzipr RHS, <4,4,6,6>
+ 833162550U, // <u,6,7,7>: Cost 1 vzipr RHS, RHS
+ 833162551U, // <u,6,7,u>: Cost 1 vzipr RHS, RHS
+ 432357478U, // <u,6,u,0>: Cost 1 vext1 RHS, LHS
+ 1545475886U, // <u,6,u,1>: Cost 2 vext2 <0,2,u,6>, LHS
+ 655578926U, // <u,6,u,2>: Cost 1 vuzpl RHS, LHS
+ 1012113409U, // <u,6,u,3>: Cost 1 ins LHS, lane 1
+ 432361002U, // <u,6,u,4>: Cost 1 vext1 RHS, RHS
+ 1545476250U, // <u,6,u,5>: Cost 2 vext2 <0,2,u,6>, RHS
+ 655579290U, // <u,6,u,6>: Cost 1 vuzpl RHS, RHS
+ 806628662U, // <u,6,u,7>: Cost 1 vzipr LHS, RHS
+ 806628663U, // <u,6,u,u>: Cost 1 vzipr LHS, RHS
+ 1571356672U, // <u,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
+ 497614950U, // <u,7,0,1>: Cost 1 vext2 RHS, LHS
+ 1571356836U, // <u,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
+ 2115493890U, // <u,7,0,3>: Cost 2 ins <5,7,u,3>, lane 2
+ 1571357010U, // <u,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
+ 1512083716U, // <u,7,0,5>: Cost 2 vext1 <5,u,7,0>, <5,u,7,0>
+ 2120916995U, // <u,7,0,6>: Cost 2 ins <6,7,0,u>, lane 3
+ 2115526658U, // <u,7,0,7>: Cost 2 ins <5,7,u,7>, lane 2
+ 497615517U, // <u,7,0,u>: Cost 1 vext2 RHS, LHS
+ 1571357430U, // <u,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
+ 1571357492U, // <u,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
+ 1571357590U, // <u,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0>
+ 700784742U, // <u,7,1,3>: Cost 1 vuzpr RHS, LHS
+ 1860556134U, // <u,7,1,4>: Cost 2 vzipl LHS, <7,4,5,6>
+ 1553441981U, // <u,7,1,5>: Cost 2 vext2 <1,5,u,7>, <1,5,u,7>
+ 2115018755U, // <u,7,1,6>: Cost 2 ins <5,7,1,u>, lane 3
+ 1860556396U, // <u,7,1,7>: Cost 2 vzipl LHS, <7,7,7,7>
+ 700784747U, // <u,7,1,u>: Cost 1 vuzpr RHS, LHS
+ 1774527382U, // <u,7,2,0>: Cost 2 vuzpr RHS, <1,2,3,0>
+ 1188058754U, // <u,7,2,1>: Cost 2 vrev <7,u,1,2>
+ 1571358312U, // <u,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
+ 1012113409U, // <u,7,2,3>: Cost 1 ins LHS, lane 1
+ 1774527386U, // <u,7,2,4>: Cost 2 vuzpr RHS, <1,2,3,4>
+ 1994773862U, // <u,7,2,5>: Cost 2 vtrnl LHS, <7,4,5,6>
+ 1560078311U, // <u,7,2,6>: Cost 2 vext2 <2,6,u,7>, <2,6,u,7>
+ 1994774124U, // <u,7,2,7>: Cost 2 vtrnl LHS, <7,7,7,7>
+ 1012113409U, // <u,7,2,u>: Cost 1 ins LHS, lane 1
+ 1571358870U, // <u,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
+ 1774528166U, // <u,7,3,1>: Cost 2 vuzpr RHS, <2,3,0,1>
+ 2091892737U, // <u,7,3,2>: Cost 2 ins <1,u,3,2>, lane 1
+ 1571359132U, // <u,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
+ 1571359234U, // <u,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
+ 1774528206U, // <u,7,3,5>: Cost 2 vuzpr RHS, <2,3,4,5>
+ 1518080992U, // <u,7,3,6>: Cost 2 vext1 <6,u,7,3>, <6,u,7,3>
+ 1774527488U, // <u,7,3,7>: Cost 2 vuzpr RHS, <1,3,5,7>
+ 1571359518U, // <u,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
+ 1571359634U, // <u,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
+ 2121449474U, // <u,7,4,1>: Cost 2 ins <6,7,u,1>, lane 2
+ 2121211907U, // <u,7,4,2>: Cost 2 ins <6,7,4,u>, lane 3
+ 2115493890U, // <u,7,4,3>: Cost 2 ins <5,7,u,3>, lane 2
+ 1571359952U, // <u,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
+ 497618248U, // <u,7,4,5>: Cost 1 vext2 RHS, RHS
+ 1571360076U, // <u,7,4,6>: Cost 2 vext2 RHS, <4,6,0,2>
+ 2115526658U, // <u,7,4,7>: Cost 2 ins <5,7,u,7>, lane 2
+ 497618473U, // <u,7,4,u>: Cost 1 vext2 RHS, RHS
+ 1863537658U, // <u,7,5,0>: Cost 2 vzipl RHS, <7,0,1,2>
+ 1571360464U, // <u,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+ 2115313667U, // <u,7,5,2>: Cost 2 ins <5,7,5,u>, lane 3
+ 2115493890U, // <u,7,5,3>: Cost 2 ins <5,7,u,3>, lane 2
+ 1571360710U, // <u,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
+ 1571360772U, // <u,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
+ 1571360866U, // <u,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0>
+ 700788022U, // <u,7,5,7>: Cost 1 vuzpr RHS, RHS
+ 700788023U, // <u,7,5,u>: Cost 1 vuzpr RHS, RHS
+ 1774530658U, // <u,7,6,0>: Cost 2 vuzpr RHS, <5,6,7,0>
+ 1997755386U, // <u,7,6,1>: Cost 2 vtrnl RHS, <7,0,1,2>
+ 1571361274U, // <u,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+ 2115493890U, // <u,7,6,3>: Cost 2 ins <5,7,u,3>, lane 2
+ 1774530662U, // <u,7,6,4>: Cost 2 vuzpr RHS, <5,6,7,4>
+ 1188386474U, // <u,7,6,5>: Cost 2 vrev <7,u,5,6>
+ 1571361592U, // <u,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6>
+ 1036328961U, // <u,7,6,7>: Cost 1 ins RHS, lane 1
+ 1036328961U, // <u,7,6,u>: Cost 1 ins RHS, lane 1
+ 1571361786U, // <u,7,7,0>: Cost 2 vext2 RHS, <7,0,1,2>
+ 1774531406U, // <u,7,7,1>: Cost 2 vuzpr RHS, <6,7,0,1>
+ 2127405059U, // <u,7,7,2>: Cost 2 ins <7,7,7,u>, lane 3
+ 1906904784U, // <u,7,7,3>: Cost 2 vzipr RHS, <5,1,7,3>
+ 1571362150U, // <u,7,7,4>: Cost 2 vext2 RHS, <7,4,5,6>
+ 1774531446U, // <u,7,7,5>: Cost 2 vuzpr RHS, <6,7,4,5>
+ 1906905030U, // <u,7,7,6>: Cost 2 vzipr RHS, <5,4,7,6>
+ 363253046U, // <u,7,7,7>: Cost 1 vdup3 RHS
+ 363253046U, // <u,7,7,u>: Cost 1 vdup3 RHS
+ 1571362515U, // <u,7,u,0>: Cost 2 vext2 RHS, <u,0,1,2>
+ 497620782U, // <u,7,u,1>: Cost 1 vext2 RHS, LHS
+ 1571362693U, // <u,7,u,2>: Cost 2 vext2 RHS, <u,2,3,0>
+ 700785309U, // <u,7,u,3>: Cost 1 vuzpr RHS, LHS
+ 1571362879U, // <u,7,u,4>: Cost 2 vext2 RHS, <u,4,5,6>
+ 497621146U, // <u,7,u,5>: Cost 1 vext2 RHS, RHS
+ 1571363024U, // <u,7,u,6>: Cost 2 vext2 RHS, <u,6,3,7>
+ 700788265U, // <u,7,u,7>: Cost 1 vuzpr RHS, RHS
+ 497621349U, // <u,7,u,u>: Cost 1 vext2 RHS, LHS
+ 135053414U, // <u,u,0,0>: Cost 1 vdup0 LHS
+ 471081121U, // <u,u,0,1>: Cost 1 vext2 LHS, LHS
+ 653033574U, // <u,u,0,2>: Cost 1 vuzpl LHS, LHS
+ 1007910914U, // <u,u,0,3>: Cost 1 ins LHS, lane 2
+ 1544823122U, // <u,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+ 1512157453U, // <u,u,0,5>: Cost 2 vext1 <5,u,u,0>, <5,u,u,0>
+ 1995282586U, // <u,u,0,6>: Cost 2 vtrnl <u,2,0,2>, RHS
+ 1034485762U, // <u,u,0,7>: Cost 1 ins RHS, lane 2
+ 471081629U, // <u,u,0,u>: Cost 1 vext2 LHS, LHS
+ 1544823542U, // <u,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+ 786814766U, // <u,u,1,1>: Cost 1 vzipl LHS, LHS
+ 537753390U, // <u,u,1,2>: Cost 1 vext3 LHS, LHS
+ 676610150U, // <u,u,1,3>: Cost 1 vuzpr LHS, LHS
+ 1482304822U, // <u,u,1,4>: Cost 2 vext1 <0,u,u,1>, RHS
+ 786815130U, // <u,u,1,5>: Cost 1 vzipl LHS, RHS
+ 1518138343U, // <u,u,1,6>: Cost 2 vext1 <6,u,u,1>, <6,u,u,1>
+ 1034485762U, // <u,u,1,7>: Cost 1 ins RHS, lane 2
+ 537753444U, // <u,u,1,u>: Cost 1 vext3 LHS, LHS
+ 1007509507U, // <u,u,2,0>: Cost 1 ins LHS, lane 3
+ 1007509507U, // <u,u,2,1>: Cost 1 ins LHS, lane 3
+ 921032494U, // <u,u,2,2>: Cost 1 vtrnl LHS, LHS
+ 835584U, // <u,u,2,3>: Cost 0 copy LHS
+ 1007509507U, // <u,u,2,4>: Cost 1 ins LHS, lane 3
+ 1007509507U, // <u,u,2,5>: Cost 1 ins LHS, lane 3
+ 921032858U, // <u,u,2,6>: Cost 1 vtrnl LHS, RHS
+ 1007509507U, // <u,u,2,7>: Cost 1 ins LHS, lane 3
+ 835584U, // <u,u,2,u>: Cost 0 copy LHS
+ 408576723U, // <u,u,3,0>: Cost 1 vext1 LHS, LHS
+ 1880327918U, // <u,u,3,1>: Cost 2 vzipr LHS, <2,3,u,1>
+ 120371557U, // <u,u,3,2>: Cost 1 vrev LHS
+ 806584476U, // <u,u,3,3>: Cost 1 vzipr LHS, LHS
+ 408579382U, // <u,u,3,4>: Cost 1 vext1 LHS, RHS
+ 1880327922U, // <u,u,3,5>: Cost 2 vzipr LHS, <2,3,u,5>
+ 1880326384U, // <u,u,3,6>: Cost 2 vzipr LHS, <0,2,u,6>
+ 806587720U, // <u,u,3,7>: Cost 1 vzipr LHS, RHS
+ 806584481U, // <u,u,3,u>: Cost 1 vzipr LHS, LHS
+ 1488298086U, // <u,u,4,0>: Cost 2 vext1 <1,u,u,4>, LHS
+ 1488299437U, // <u,u,4,1>: Cost 2 vext1 <1,u,u,4>, <1,u,u,4>
+ 1659271204U, // <u,u,4,2>: Cost 2 vext3 LHS, <u,4,2,6>
+ 1007910914U, // <u,u,4,3>: Cost 1 ins LHS, lane 2
+ 161926454U, // <u,u,4,4>: Cost 1 vdup0 RHS
+ 471084342U, // <u,u,4,5>: Cost 1 vext2 LHS, RHS
+ 653036854U, // <u,u,4,6>: Cost 1 vuzpl LHS, RHS
+ 1034485762U, // <u,u,4,7>: Cost 1 ins RHS, lane 2
+ 471084585U, // <u,u,4,u>: Cost 1 vext2 LHS, RHS
+ 1482334933U, // <u,u,5,0>: Cost 2 vext1 <0,u,u,5>, <0,u,u,5>
+ 789796654U, // <u,u,5,1>: Cost 1 vzipl RHS, LHS
+ 1494280327U, // <u,u,5,2>: Cost 2 vext1 <2,u,u,5>, <2,u,u,5>
+ 1007910914U, // <u,u,5,3>: Cost 1 ins LHS, lane 2
+ 1482337590U, // <u,u,5,4>: Cost 2 vext1 <0,u,u,5>, RHS
+ 789797018U, // <u,u,5,5>: Cost 1 vzipl RHS, RHS
+ 537753754U, // <u,u,5,6>: Cost 1 vext3 LHS, RHS
+ 676613430U, // <u,u,5,7>: Cost 1 vuzpr LHS, RHS
+ 537753772U, // <u,u,5,u>: Cost 1 vext3 LHS, RHS
+ 1034346499U, // <u,u,6,0>: Cost 1 ins RHS, lane 3
+ 1034346499U, // <u,u,6,1>: Cost 1 ins RHS, lane 3
+ 924014382U, // <u,u,6,2>: Cost 1 vtrnl RHS, LHS
+ 1007910914U, // <u,u,6,3>: Cost 1 ins LHS, lane 2
+ 1034346499U, // <u,u,6,4>: Cost 1 ins RHS, lane 3
+ 1034346499U, // <u,u,6,5>: Cost 1 ins RHS, lane 3
+ 924014746U, // <u,u,6,6>: Cost 1 vtrnl RHS, RHS
+ 27705344U, // <u,u,6,7>: Cost 0 copy RHS
+ 27705344U, // <u,u,6,u>: Cost 0 copy RHS
+ 432496742U, // <u,u,7,0>: Cost 1 vext1 RHS, LHS
+ 1488324016U, // <u,u,7,1>: Cost 2 vext1 <1,u,u,7>, <1,u,u,7>
+ 1494296713U, // <u,u,7,2>: Cost 2 vext1 <2,u,u,7>, <2,u,u,7>
+ 833159324U, // <u,u,7,3>: Cost 1 vzipr RHS, LHS
+ 432500283U, // <u,u,7,4>: Cost 1 vext1 RHS, RHS
+ 1906901393U, // <u,u,7,5>: Cost 2 vzipr RHS, <0,4,u,5>
+ 120699277U, // <u,u,7,6>: Cost 1 vrev RHS
+ 833162568U, // <u,u,7,7>: Cost 1 vzipr RHS, RHS
+ 833159329U, // <u,u,7,u>: Cost 1 vzipr RHS, LHS
+ 408617688U, // <u,u,u,0>: Cost 1 vext1 LHS, LHS
+ 471086894U, // <u,u,u,1>: Cost 1 vext2 LHS, LHS
+ 537753957U, // <u,u,u,2>: Cost 1 vext3 LHS, LHS
+ 835584U, // <u,u,u,3>: Cost 0 copy LHS
+ 408620342U, // <u,u,u,4>: Cost 1 vext1 LHS, RHS
+ 471087258U, // <u,u,u,5>: Cost 1 vext2 LHS, RHS
+ 537753997U, // <u,u,u,6>: Cost 1 vext3 LHS, RHS
+ 27705344U, // <u,u,u,7>: Cost 0 copy RHS
+ 835584U, // <u,u,u,u>: Cost 0 copy LHS
+ 0};
+
+static unsigned getPerfectShuffleCost(llvm::ArrayRef<int> M) {
+ assert(M.size() == 4 && "Expected a 4 entry perfect shuffle");
+
+ // Special case zero-cost nop copies, from either LHS or RHS.
+ if (llvm::all_of(llvm::enumerate(M), [](auto &E) {
+ return E.value() < 0 || E.value() == (int)E.index();
+ }))
+ return 0;
+ if (llvm::all_of(llvm::enumerate(M), [](auto &E) {
+ return E.value() < 0 || E.value() == (int)E.index() + 4;
+ }))
+ return 0;
+
+ // Get the four mask elementd from the 2 inputs. Perfect shuffles encode undef
+ // elements with value 8.
+ unsigned PFIndexes[4];
+ for (unsigned i = 0; i != 4; ++i) {
+ assert(M[i] < 8 && "Expected a maximum entry of 8 for shuffle mask");
+ if (M[i] < 0)
+ PFIndexes[i] = 8;
+ else
+ PFIndexes[i] = M[i];
+ }
+
+ // Compute the index in the perfect shuffle table.
+ unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
+ PFIndexes[2] * 9 + PFIndexes[3];
+ unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
+ // And extract the cost from the upper bits. The cost is encoded as Cost-1.
+ return (PFEntry >> 30) + 1;
+}
#endif
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index d1b901e58d27..f7c06b9fb71b 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -19,6 +19,7 @@
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -32,6 +33,8 @@
using namespace llvm;
+#define GET_CC_REGISTER_LISTS
+#include "AArch64GenCallingConv.inc"
#define GET_REGINFO_TARGET_DESC
#include "AArch64GenRegisterInfo.inc"
@@ -63,14 +66,6 @@ bool AArch64RegisterInfo::regNeedsCFI(unsigned Reg,
return true;
}
-bool AArch64RegisterInfo::hasSVEArgsOrReturn(const MachineFunction *MF) {
- const Function &F = MF->getFunction();
- return isa<ScalableVectorType>(F.getReturnType()) ||
- any_of(F.args(), [](const Argument &Arg) {
- return isa<ScalableVectorType>(Arg.getType());
- });
-}
-
const MCPhysReg *
AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
assert(MF && "Invalid MachineFunction pointer.");
@@ -108,7 +103,7 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
// This is for OSes other than Windows; Windows is a separate case further
// above.
return CSR_AArch64_AAPCS_X18_SaveList;
- if (hasSVEArgsOrReturn(MF))
+ if (MF->getInfo<AArch64FunctionInfo>()->isSVECC())
return CSR_AArch64_SVE_AAPCS_SaveList;
return CSR_AArch64_AAPCS_SaveList;
}
@@ -335,6 +330,13 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
if (MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
markSuperRegs(Reserved, AArch64::W16);
+ // SME tiles are not allocatable.
+ if (MF.getSubtarget<AArch64Subtarget>().hasSME()) {
+ for (MCSubRegIterator SubReg(AArch64::ZA, this, /*self=*/true);
+ SubReg.isValid(); ++SubReg)
+ Reserved.set(*SubReg);
+ }
+
assert(checkAllSuperRegsMarked(Reserved));
return Reserved;
}
@@ -417,6 +419,68 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
return false;
}
+bool AArch64RegisterInfo::isArgumentRegister(const MachineFunction &MF,
+ MCRegister Reg) const {
+ CallingConv::ID CC = MF.getFunction().getCallingConv();
+ const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
+ bool IsVarArg = STI.isCallingConvWin64(MF.getFunction().getCallingConv());
+
+ auto HasReg = [](ArrayRef<MCRegister> RegList, MCRegister Reg) {
+ return llvm::any_of(RegList,
+ [Reg](const MCRegister R) { return R == Reg; });
+ };
+
+ switch (CC) {
+ default:
+ report_fatal_error("Unsupported calling convention.");
+ case CallingConv::WebKit_JS:
+ return HasReg(CC_AArch64_WebKit_JS_ArgRegs, Reg);
+ case CallingConv::GHC:
+ return HasReg(CC_AArch64_GHC_ArgRegs, Reg);
+ case CallingConv::C:
+ case CallingConv::Fast:
+ case CallingConv::PreserveMost:
+ case CallingConv::CXX_FAST_TLS:
+ case CallingConv::Swift:
+ case CallingConv::SwiftTail:
+ case CallingConv::Tail:
+ if (STI.isTargetWindows() && IsVarArg)
+ return HasReg(CC_AArch64_Win64_VarArg_ArgRegs, Reg);
+ if (!STI.isTargetDarwin()) {
+ switch (CC) {
+ default:
+ return HasReg(CC_AArch64_AAPCS_ArgRegs, Reg);
+ case CallingConv::Swift:
+ case CallingConv::SwiftTail:
+ return HasReg(CC_AArch64_AAPCS_ArgRegs, Reg) ||
+ HasReg(CC_AArch64_AAPCS_Swift_ArgRegs, Reg);
+ }
+ }
+ if (!IsVarArg) {
+ switch (CC) {
+ default:
+ return HasReg(CC_AArch64_DarwinPCS_ArgRegs, Reg);
+ case CallingConv::Swift:
+ case CallingConv::SwiftTail:
+ return HasReg(CC_AArch64_DarwinPCS_ArgRegs, Reg) ||
+ HasReg(CC_AArch64_DarwinPCS_Swift_ArgRegs, Reg);
+ }
+ }
+ if (STI.isTargetILP32())
+ return HasReg(CC_AArch64_DarwinPCS_ILP32_VarArg_ArgRegs, Reg);
+ return HasReg(CC_AArch64_DarwinPCS_VarArg_ArgRegs, Reg);
+ case CallingConv::Win64:
+ if (IsVarArg)
+ HasReg(CC_AArch64_Win64_VarArg_ArgRegs, Reg);
+ return HasReg(CC_AArch64_AAPCS_ArgRegs, Reg);
+ case CallingConv::CFGuard_Check:
+ return HasReg(CC_AArch64_Win64_CFGuard_Check_ArgRegs, Reg);
+ case CallingConv::AArch64_VectorCall:
+ case CallingConv::AArch64_SVE_VectorCall:
+ return HasReg(CC_AArch64_AAPCS_ArgRegs, Reg);
+ }
+}
+
Register
AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const AArch64FrameLowering *TFI = getFrameLowering(MF);
@@ -588,23 +652,31 @@ void AArch64RegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg,
// Create a scratch register for the frame index elimination in an instruction.
// This function has special handling of stack tagging loop pseudos, in which
-// case it can also change the instruction opcode (but not the operands).
+// case it can also change the instruction opcode.
static Register
-createScratchRegisterForInstruction(MachineInstr &MI,
+createScratchRegisterForInstruction(MachineInstr &MI, unsigned FIOperandNum,
const AArch64InstrInfo *TII) {
// ST*Gloop have a reserved scratch register in operand 1. Use it, and also
// replace the instruction with the writeback variant because it will now
// satisfy the operand constraints for it.
- if (MI.getOpcode() == AArch64::STGloop) {
- MI.setDesc(TII->get(AArch64::STGloop_wback));
- return MI.getOperand(1).getReg();
- } else if (MI.getOpcode() == AArch64::STZGloop) {
- MI.setDesc(TII->get(AArch64::STZGloop_wback));
- return MI.getOperand(1).getReg();
+ Register ScratchReg;
+ if (MI.getOpcode() == AArch64::STGloop ||
+ MI.getOpcode() == AArch64::STZGloop) {
+ assert(FIOperandNum == 3 &&
+ "Wrong frame index operand for STGloop/STZGloop");
+ unsigned Op = MI.getOpcode() == AArch64::STGloop ? AArch64::STGloop_wback
+ : AArch64::STZGloop_wback;
+ ScratchReg = MI.getOperand(1).getReg();
+ MI.getOperand(3).ChangeToRegister(ScratchReg, false, false, true);
+ MI.setDesc(TII->get(Op));
+ MI.tieOperands(1, 3);
} else {
- return MI.getMF()->getRegInfo().createVirtualRegister(
- &AArch64::GPR64RegClass);
+ ScratchReg =
+ MI.getMF()->getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
+ MI.getOperand(FIOperandNum)
+ .ChangeToRegister(ScratchReg, false, false, true);
}
+ return ScratchReg;
}
void AArch64RegisterInfo::getOffsetOpcodes(
@@ -721,9 +793,9 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// If we get here, the immediate doesn't fit into the instruction. We folded
// as much as possible above. Handle the rest, providing a register that is
// SP+LargeImm.
- Register ScratchReg = createScratchRegisterForInstruction(MI, TII);
+ Register ScratchReg =
+ createScratchRegisterForInstruction(MI, FIOperandNum, TII);
emitFrameOffset(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, TII);
- MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false, false, true);
}
unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
index 0c871ac089a7..12dd70fa4aa8 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -42,8 +42,6 @@ public:
void UpdateCustomCallPreservedMask(MachineFunction &MF,
const uint32_t **Mask) const;
- static bool hasSVEArgsOrReturn(const MachineFunction *MF);
-
/// Code Generation virtual methods...
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
const MCPhysReg *getDarwinCalleeSavedRegs(const MachineFunction *MF) const;
@@ -120,6 +118,9 @@ public:
bool hasBasePointer(const MachineFunction &MF) const;
unsigned getBaseRegister() const;
+ bool isArgumentRegister(const MachineFunction &MF,
+ MCRegister Reg) const override;
+
// Debug information queries.
Register getFrameRegister(const MachineFunction &MF) const override;
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
index 70daf5abf81d..7a2b165570cb 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -871,7 +871,7 @@ class ZPRRegOp <string Suffix, AsmOperandClass C, ElementSizeEnum Size,
// SVE predicate register classes.
class PPRClass<int lastreg> : RegisterClass<
"AArch64",
- [ nxv16i1, nxv8i1, nxv4i1, nxv2i1 ], 16,
+ [ nxv16i1, nxv8i1, nxv4i1, nxv2i1, nxv1i1 ], 16,
(sequence "P%u", 0, lastreg)> {
let Size = 16;
}
@@ -1212,26 +1212,28 @@ let SubRegIndices = [zasubb] in {
// SME Register Classes
-// Accumulator array
-def MPR : RegisterClass<"AArch64", [untyped], 2048, (add ZA)> {
- let Size = 2048;
-}
+let isAllocatable = 0 in {
+ // Accumulator array
+ def MPR : RegisterClass<"AArch64", [untyped], 2048, (add ZA)> {
+ let Size = 2048;
+ }
-// Accumulator array as single tiles
-def MPR8 : RegisterClass<"AArch64", [untyped], 2048, (add (sequence "ZAB%u", 0, 0))> {
- let Size = 2048;
-}
-def MPR16 : RegisterClass<"AArch64", [untyped], 1024, (add (sequence "ZAH%u", 0, 1))> {
- let Size = 1024;
-}
-def MPR32 : RegisterClass<"AArch64", [untyped], 512, (add (sequence "ZAS%u", 0, 3))> {
- let Size = 512;
-}
-def MPR64 : RegisterClass<"AArch64", [untyped], 256, (add (sequence "ZAD%u", 0, 7))> {
- let Size = 256;
-}
-def MPR128 : RegisterClass<"AArch64", [untyped], 128, (add (sequence "ZAQ%u", 0, 15))> {
- let Size = 128;
+ // Accumulator array as single tiles
+ def MPR8 : RegisterClass<"AArch64", [untyped], 2048, (add (sequence "ZAB%u", 0, 0))> {
+ let Size = 2048;
+ }
+ def MPR16 : RegisterClass<"AArch64", [untyped], 1024, (add (sequence "ZAH%u", 0, 1))> {
+ let Size = 1024;
+ }
+ def MPR32 : RegisterClass<"AArch64", [untyped], 512, (add (sequence "ZAS%u", 0, 3))> {
+ let Size = 512;
+ }
+ def MPR64 : RegisterClass<"AArch64", [untyped], 256, (add (sequence "ZAD%u", 0, 7))> {
+ let Size = 256;
+ }
+ def MPR128 : RegisterClass<"AArch64", [untyped], 128, (add (sequence "ZAQ%u", 0, 15))> {
+ let Size = 128;
+ }
}
// SME Register Operands
@@ -1385,3 +1387,12 @@ def svcr_op : Operand<i32> {
return AArch64SVCR::lookupSVCRByEncoding(MCOp.getImm()) != nullptr;
}];
}
+
+//===----------------------------------------------------------------------===//
+// Register categories.
+//
+
+def GeneralPurposeRegisters : RegisterCategory<[GPR64, GPR32]>;
+
+def FIXED_REGS : RegisterClass<"AArch64", [i64], 64, (add FP, SP, VG, FFR)>;
+def FixedRegisters : RegisterCategory<[CCR, FIXED_REGS]>;
diff --git a/llvm/lib/Target/AArch64/AArch64SLSHardening.cpp b/llvm/lib/Target/AArch64/AArch64SLSHardening.cpp
index c4965e7146ff..364ce687fd55 100644
--- a/llvm/lib/Target/AArch64/AArch64SLSHardening.cpp
+++ b/llvm/lib/Target/AArch64/AArch64SLSHardening.cpp
@@ -360,8 +360,8 @@ AArch64SLSHardening::ConvertBLRToBL(MachineBasicBlock &MBB,
assert(ImpSPOpIdx != -1);
int FirstOpIdxToRemove = std::max(ImpLROpIdx, ImpSPOpIdx);
int SecondOpIdxToRemove = std::min(ImpLROpIdx, ImpSPOpIdx);
- BL->RemoveOperand(FirstOpIdxToRemove);
- BL->RemoveOperand(SecondOpIdxToRemove);
+ BL->removeOperand(FirstOpIdxToRemove);
+ BL->removeOperand(SecondOpIdxToRemove);
// Now copy over the implicit operands from the original BLR
BL->copyImplicitOps(MF, BLR);
MF.moveCallSiteInfo(&BLR, BL);
diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index aacace64e998..e595d20c8d4e 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -14,9 +14,18 @@
// Add vector elements horizontally or vertically to ZA tile.
//===----------------------------------------------------------------------===//
+def SDT_AArch64RDSVL : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]>;
+def AArch64rdsvl : SDNode<"AArch64ISD::RDSVL", SDT_AArch64RDSVL>;
+
let Predicates = [HasSME] in {
+def RDSVLI_XI : sve_int_read_vl_a<0b0, 0b11111, "rdsvl", /*streaming_sve=*/0b1>;
+def ADDSPL_XXI : sve_int_arith_vl<0b1, "addspl", /*streaming_sve=*/0b1>;
+def ADDSVL_XXI : sve_int_arith_vl<0b0, "addsvl", /*streaming_sve=*/0b1>;
+
def ADDHA_MPPZ_S : sme_add_vector_to_tile_u32<0b0, "addha">;
def ADDVA_MPPZ_S : sme_add_vector_to_tile_u32<0b1, "addva">;
+
+def : Pat<(AArch64rdsvl (i32 simm6_32b:$imm)), (RDSVLI_XI simm6_32b:$imm)>;
}
let Predicates = [HasSMEI64] in {
@@ -29,41 +38,41 @@ let Predicates = [HasSME] in {
// Outer products
//===----------------------------------------------------------------------===//
-defm BFMOPA_MPPZZ : sme_bf16_outer_product<0b0, "bfmopa">;
-defm BFMOPS_MPPZZ : sme_bf16_outer_product<0b1, "bfmops">;
+defm BFMOPA_MPPZZ : sme_bf16_outer_product<0b0, "bfmopa", int_aarch64_sme_mopa_wide>;
+defm BFMOPS_MPPZZ : sme_bf16_outer_product<0b1, "bfmops", int_aarch64_sme_mops_wide>;
-def FMOPA_MPPZZ_S : sme_outer_product_fp32<0b0, "fmopa">;
-def FMOPS_MPPZZ_S : sme_outer_product_fp32<0b1, "fmops">;
+defm FMOPA_MPPZZ_S : sme_outer_product_fp32<0b0, "fmopa", int_aarch64_sme_mopa>;
+defm FMOPS_MPPZZ_S : sme_outer_product_fp32<0b1, "fmops", int_aarch64_sme_mops>;
}
let Predicates = [HasSMEF64] in {
-def FMOPA_MPPZZ_D : sme_outer_product_fp64<0b0, "fmopa">;
-def FMOPS_MPPZZ_D : sme_outer_product_fp64<0b1, "fmops">;
+defm FMOPA_MPPZZ_D : sme_outer_product_fp64<0b0, "fmopa", int_aarch64_sme_mopa>;
+defm FMOPS_MPPZZ_D : sme_outer_product_fp64<0b1, "fmops", int_aarch64_sme_mops>;
}
let Predicates = [HasSME] in {
-defm FMOPAL_MPPZZ : sme_f16_outer_product<0b0, "fmopa">;
-defm FMOPSL_MPPZZ : sme_f16_outer_product<0b1, "fmops">;
-
-def SMOPA_MPPZZ_S : sme_int_outer_product_i32<0b000, "smopa">;
-def SMOPS_MPPZZ_S : sme_int_outer_product_i32<0b001, "smops">;
-def UMOPA_MPPZZ_S : sme_int_outer_product_i32<0b110, "umopa">;
-def UMOPS_MPPZZ_S : sme_int_outer_product_i32<0b111, "umops">;
-def SUMOPA_MPPZZ_S : sme_int_outer_product_i32<0b010, "sumopa">;
-def SUMOPS_MPPZZ_S : sme_int_outer_product_i32<0b011, "sumops">;
-def USMOPA_MPPZZ_S : sme_int_outer_product_i32<0b100, "usmopa">;
-def USMOPS_MPPZZ_S : sme_int_outer_product_i32<0b101, "usmops">;
+defm FMOPAL_MPPZZ : sme_f16_outer_product<0b0, "fmopa", int_aarch64_sme_mopa_wide>;
+defm FMOPSL_MPPZZ : sme_f16_outer_product<0b1, "fmops", int_aarch64_sme_mops_wide>;
+
+defm SMOPA_MPPZZ_S : sme_int_outer_product_i32<0b000, "smopa", int_aarch64_sme_smopa_wide>;
+defm SMOPS_MPPZZ_S : sme_int_outer_product_i32<0b001, "smops", int_aarch64_sme_smops_wide>;
+defm UMOPA_MPPZZ_S : sme_int_outer_product_i32<0b110, "umopa", int_aarch64_sme_umopa_wide>;
+defm UMOPS_MPPZZ_S : sme_int_outer_product_i32<0b111, "umops", int_aarch64_sme_umops_wide>;
+defm SUMOPA_MPPZZ_S : sme_int_outer_product_i32<0b010, "sumopa", int_aarch64_sme_sumopa_wide>;
+defm SUMOPS_MPPZZ_S : sme_int_outer_product_i32<0b011, "sumops", int_aarch64_sme_sumops_wide>;
+defm USMOPA_MPPZZ_S : sme_int_outer_product_i32<0b100, "usmopa", int_aarch64_sme_usmopa_wide>;
+defm USMOPS_MPPZZ_S : sme_int_outer_product_i32<0b101, "usmops", int_aarch64_sme_usmops_wide>;
}
let Predicates = [HasSMEI64] in {
-def SMOPA_MPPZZ_D : sme_int_outer_product_i64<0b000, "smopa">;
-def SMOPS_MPPZZ_D : sme_int_outer_product_i64<0b001, "smops">;
-def UMOPA_MPPZZ_D : sme_int_outer_product_i64<0b110, "umopa">;
-def UMOPS_MPPZZ_D : sme_int_outer_product_i64<0b111, "umops">;
-def SUMOPA_MPPZZ_D : sme_int_outer_product_i64<0b010, "sumopa">;
-def SUMOPS_MPPZZ_D : sme_int_outer_product_i64<0b011, "sumops">;
-def USMOPA_MPPZZ_D : sme_int_outer_product_i64<0b100, "usmopa">;
-def USMOPS_MPPZZ_D : sme_int_outer_product_i64<0b101, "usmops">;
+defm SMOPA_MPPZZ_D : sme_int_outer_product_i64<0b000, "smopa", int_aarch64_sme_smopa_wide>;
+defm SMOPS_MPPZZ_D : sme_int_outer_product_i64<0b001, "smops", int_aarch64_sme_smops_wide>;
+defm UMOPA_MPPZZ_D : sme_int_outer_product_i64<0b110, "umopa", int_aarch64_sme_umopa_wide>;
+defm UMOPS_MPPZZ_D : sme_int_outer_product_i64<0b111, "umops", int_aarch64_sme_umops_wide>;
+defm SUMOPA_MPPZZ_D : sme_int_outer_product_i64<0b010, "sumopa", int_aarch64_sme_sumopa_wide>;
+defm SUMOPS_MPPZZ_D : sme_int_outer_product_i64<0b011, "sumops", int_aarch64_sme_sumops_wide>;
+defm USMOPA_MPPZZ_D : sme_int_outer_product_i64<0b100, "usmopa", int_aarch64_sme_usmopa_wide>;
+defm USMOPS_MPPZZ_D : sme_int_outer_product_i64<0b101, "usmops", int_aarch64_sme_usmops_wide>;
}
let Predicates = [HasSME] in {
@@ -129,15 +138,21 @@ def : InstAlias<"smstop", (MSRpstatesvcrImm1 0b011, 0b0)>;
def : InstAlias<"smstop sm", (MSRpstatesvcrImm1 0b001, 0b0)>;
def : InstAlias<"smstop za", (MSRpstatesvcrImm1 0b010, 0b0)>;
+// Read and write TPIDR2_EL0
+def : Pat<(int_aarch64_sme_set_tpidr2 i64:$val),
+ (MSR 0xde85, GPR64:$val)>;
+def : Pat<(i64 (int_aarch64_sme_get_tpidr2)),
+ (MRS 0xde85)>;
+
//===----------------------------------------------------------------------===//
// SVE2 instructions
//===----------------------------------------------------------------------===//
-def REVD_ZPmZ : sve2_int_perm_revd<"revd">;
+defm REVD_ZPmZ : sve2_int_perm_revd<"revd", AArch64revd_mt>;
-defm SCLAMP_ZZZ : sve2_clamp<"sclamp", 0b0>;
-defm UCLAMP_ZZZ : sve2_clamp<"uclamp", 0b1>;
+defm SCLAMP_ZZZ : sve2_clamp<"sclamp", 0b0, int_aarch64_sve_sclamp>;
+defm UCLAMP_ZZZ : sve2_clamp<"uclamp", 0b1, int_aarch64_sve_uclamp>;
-defm PSEL_PPPRI : sve2_int_perm_sel_p<"psel">;
+defm PSEL_PPPRI : sve2_int_perm_sel_p<"psel", int_aarch64_sve_psel>;
} // End let Predicates = [HasSME]
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 1d162610de9c..68ff1b78e84b 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -165,8 +165,8 @@ def AArch64lasta : SDNode<"AArch64ISD::LASTA", SDT_AArch64Reduce>;
def AArch64lastb : SDNode<"AArch64ISD::LASTB", SDT_AArch64Reduce>;
def SDT_AArch64Arith : SDTypeProfile<1, 3, [
- SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>,
- SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, SDTCisSameAs<2,3>
+ SDTCisVec<0>, SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>,
+ SDTCisSameAs<2,3>, SDTCisSameNumEltsAs<0,1>
]>;
def SDT_AArch64FMA : SDTypeProfile<1, 4, [
@@ -175,7 +175,6 @@ def SDT_AArch64FMA : SDTypeProfile<1, 4, [
]>;
// Predicated operations with the result of inactive lanes being unspecified.
-def AArch64add_p : SDNode<"AArch64ISD::ADD_PRED", SDT_AArch64Arith>;
def AArch64asr_p : SDNode<"AArch64ISD::SRA_PRED", SDT_AArch64Arith>;
def AArch64fadd_p : SDNode<"AArch64ISD::FADD_PRED", SDT_AArch64Arith>;
def AArch64fdiv_p : SDNode<"AArch64ISD::FDIV_PRED", SDT_AArch64Arith>;
@@ -194,7 +193,6 @@ def AArch64sdiv_p : SDNode<"AArch64ISD::SDIV_PRED", SDT_AArch64Arith>;
def AArch64smax_p : SDNode<"AArch64ISD::SMAX_PRED", SDT_AArch64Arith>;
def AArch64smin_p : SDNode<"AArch64ISD::SMIN_PRED", SDT_AArch64Arith>;
def AArch64smulh_p : SDNode<"AArch64ISD::MULHS_PRED", SDT_AArch64Arith>;
-def AArch64sub_p : SDNode<"AArch64ISD::SUB_PRED", SDT_AArch64Arith>;
def AArch64uabd_p : SDNode<"AArch64ISD::ABDU_PRED", SDT_AArch64Arith>;
def AArch64udiv_p : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64Arith>;
def AArch64umax_p : SDNode<"AArch64ISD::UMAX_PRED", SDT_AArch64Arith>;
@@ -235,6 +233,7 @@ def AArch64rbit_mt : SDNode<"AArch64ISD::BITREVERSE_MERGE_PASSTHRU", SDT_AArch
def AArch64revb_mt : SDNode<"AArch64ISD::BSWAP_MERGE_PASSTHRU", SDT_AArch64Arith>;
def AArch64revh_mt : SDNode<"AArch64ISD::REVH_MERGE_PASSTHRU", SDT_AArch64Arith>;
def AArch64revw_mt : SDNode<"AArch64ISD::REVW_MERGE_PASSTHRU", SDT_AArch64Arith>;
+def AArch64revd_mt : SDNode<"AArch64ISD::REVD_MERGE_PASSTHRU", SDT_AArch64Arith>;
// These are like the above but we don't yet have need for ISD nodes. They allow
// a single pattern to match intrinsic and ISD operand layouts.
@@ -242,6 +241,26 @@ def AArch64cls_mt : PatFrags<(ops node:$pg, node:$op, node:$pt), [(int_aarch64_
def AArch64cnot_mt : PatFrags<(ops node:$pg, node:$op, node:$pt), [(int_aarch64_sve_cnot node:$pt, node:$pg, node:$op)]>;
def AArch64not_mt : PatFrags<(ops node:$pg, node:$op, node:$pt), [(int_aarch64_sve_not node:$pt, node:$pg, node:$op)]>;
+def AArch64fmul_m1 : EitherVSelectOrPassthruPatFrags<int_aarch64_sve_fmul, AArch64fmul_p>;
+def AArch64fadd_m1 : EitherVSelectOrPassthruPatFrags<int_aarch64_sve_fadd, AArch64fadd_p>;
+def AArch64fsub_m1 : EitherVSelectOrPassthruPatFrags<int_aarch64_sve_fsub, AArch64fsub_p>;
+
+def AArch64saba : PatFrags<(ops node:$op1, node:$op2, node:$op3),
+ [(int_aarch64_sve_saba node:$op1, node:$op2, node:$op3),
+ (add node:$op1, (AArch64sabd_p (SVEAllActive), node:$op2, node:$op3))]>;
+
+def AArch64uaba : PatFrags<(ops node:$op1, node:$op2, node:$op3),
+ [(int_aarch64_sve_uaba node:$op1, node:$op2, node:$op3),
+ (add node:$op1, (AArch64uabd_p (SVEAllActive), node:$op2, node:$op3))]>;
+
+def AArch64usra : PatFrags<(ops node:$op1, node:$op2, node:$op3),
+ [(int_aarch64_sve_usra node:$op1, node:$op2, node:$op3),
+ (add node:$op1, (AArch64lsr_p (SVEAllActive), node:$op2, (SVEShiftSplatImmR (i32 node:$op3))))]>;
+
+def AArch64ssra : PatFrags<(ops node:$op1, node:$op2, node:$op3),
+ [(int_aarch64_sve_ssra node:$op1, node:$op2, node:$op3),
+ (add node:$op1, (AArch64asr_p (SVEAllActive), node:$op2, (SVEShiftSplatImmR (i32 node:$op3))))]>;
+
def SDT_AArch64FCVT : SDTypeProfile<1, 3, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>,
SDTCVecEltisVT<1,i1>
@@ -282,6 +301,14 @@ def AArch64mul_p_oneuse : PatFrag<(ops node:$pred, node:$src1, node:$src2),
def AArch64fabd_p : PatFrag<(ops node:$pg, node:$op1, node:$op2),
(AArch64fabs_mt node:$pg, (AArch64fsub_p node:$pg, node:$op1, node:$op2), undef)>;
+// FMAs with a negated multiplication operand can be commuted.
+def AArch64fmls_p : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3),
+ [(AArch64fma_p node:$pred, (AArch64fneg_mt node:$pred, node:$op1, (undef)), node:$op2, node:$op3),
+ (AArch64fma_p node:$pred, node:$op2, (AArch64fneg_mt node:$pred, node:$op1, (undef)), node:$op3)]>;
+
+def AArch64fsubr_p : PatFrag<(ops node:$pg, node:$op1, node:$op2),
+ (AArch64fsub_p node:$pg, node:$op2, node:$op1)>;
+
def AArch64fneg_mt_nsz : PatFrag<(ops node:$pred, node:$op, node:$pt),
(AArch64fneg_mt node:$pred, node:$op, node:$pt), [{
return N->getFlags().hasNoSignedZeros();
@@ -295,11 +322,14 @@ def SDT_AArch64Arith_Unpred : SDTypeProfile<1, 2, [
def AArch64bic_node : SDNode<"AArch64ISD::BIC", SDT_AArch64Arith_Unpred>;
def AArch64bic : PatFrags<(ops node:$op1, node:$op2),
- [(and node:$op1, (xor node:$op2, (AArch64dup (i32 -1)))),
- (and node:$op1, (xor node:$op2, (AArch64dup (i64 -1)))),
+ [(and node:$op1, (xor node:$op2, (splat_vector (i32 -1)))),
+ (and node:$op1, (xor node:$op2, (splat_vector (i64 -1)))),
(and node:$op1, (xor node:$op2, (SVEAllActive))),
(AArch64bic_node node:$op1, node:$op2)]>;
+def AArch64subr : PatFrag<(ops node:$op1, node:$op2),
+ (sub node:$op2, node:$op1)>;
+
let Predicates = [HasSVE] in {
defm RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>;
def RDFFRS_PPz : sve_int_rdffr_pred<0b1, "rdffrs">;
@@ -308,7 +338,7 @@ let Predicates = [HasSVE] in {
def WRFFR : sve_int_wrffr<"wrffr", int_aarch64_sve_wrffr>;
} // End HasSVE
-let Predicates = [HasSVEorStreamingSVE] in {
+let Predicates = [HasSVEorSME] in {
defm ADD_ZZZ : sve_int_bin_cons_arit_0<0b000, "add", add>;
defm SUB_ZZZ : sve_int_bin_cons_arit_0<0b001, "sub", sub>;
defm SQADD_ZZZ : sve_int_bin_cons_arit_0<0b100, "sqadd", saddsat>;
@@ -325,25 +355,27 @@ let Predicates = [HasSVEorStreamingSVE] in {
defm SUB_ZPmZ : sve_int_bin_pred_arit_0<0b001, "sub", "SUB_ZPZZ", int_aarch64_sve_sub, DestructiveBinaryCommWithRev, "SUBR_ZPmZ">;
defm SUBR_ZPmZ : sve_int_bin_pred_arit_0<0b011, "subr", "SUBR_ZPZZ", int_aarch64_sve_subr, DestructiveBinaryCommWithRev, "SUB_ZPmZ", /*isReverseInstr*/ 1>;
- defm ADD_ZPZZ : sve_int_bin_pred_bhsd<AArch64add_p>;
- defm SUB_ZPZZ : sve_int_bin_pred_bhsd<AArch64sub_p>;
-} // End HasSVEorStreamingSVE
+ defm ORR_ZPmZ : sve_int_bin_pred_log<0b000, "orr", "ORR_ZPZZ", int_aarch64_sve_orr, DestructiveBinaryComm>;
+ defm EOR_ZPmZ : sve_int_bin_pred_log<0b001, "eor", "EOR_ZPZZ", int_aarch64_sve_eor, DestructiveBinaryComm>;
+ defm AND_ZPmZ : sve_int_bin_pred_log<0b010, "and", "AND_ZPZZ", int_aarch64_sve_and, DestructiveBinaryComm>;
+ defm BIC_ZPmZ : sve_int_bin_pred_log<0b011, "bic", "BIC_ZPZZ", int_aarch64_sve_bic, DestructiveBinary>;
+} // End HasSVEorSME
-let Predicates = [HasSVEorStreamingSVE, UseExperimentalZeroingPseudos] in {
+let Predicates = [HasSVEorSME, UseExperimentalZeroingPseudos] in {
defm ADD_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_add>;
defm SUB_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_sub>;
defm SUBR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_subr>;
-} // End HasSVEorStreamingSVE, UseExperimentalZeroingPseudos
-let Predicates = [HasSVEorStreamingSVE] in {
- defm ORR_ZPmZ : sve_int_bin_pred_log<0b000, "orr", int_aarch64_sve_orr>;
- defm EOR_ZPmZ : sve_int_bin_pred_log<0b001, "eor", int_aarch64_sve_eor>;
- defm AND_ZPmZ : sve_int_bin_pred_log<0b010, "and", int_aarch64_sve_and>;
- defm BIC_ZPmZ : sve_int_bin_pred_log<0b011, "bic", int_aarch64_sve_bic>;
+ defm ORR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_orr>;
+ defm EOR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_eor>;
+ defm AND_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_and>;
+ defm BIC_ZPZZ : sve_int_bin_pred_zeroing_bhsd<null_frag>;
+} // End HasSVEorSME, UseExperimentalZeroingPseudos
+let Predicates = [HasSVEorSME] in {
defm ADD_ZI : sve_int_arith_imm0<0b000, "add", add>;
defm SUB_ZI : sve_int_arith_imm0<0b001, "sub", sub>;
- defm SUBR_ZI : sve_int_arith_imm0_subr<0b011, "subr", sub>;
+ defm SUBR_ZI : sve_int_arith_imm0<0b011, "subr", AArch64subr>;
defm SQADD_ZI : sve_int_arith_imm0<0b100, "sqadd", saddsat>;
defm UQADD_ZI : sve_int_arith_imm0<0b101, "uqadd", uaddsat>;
defm SQSUB_ZI : sve_int_arith_imm0<0b110, "sqsub", ssubsat>;
@@ -440,11 +472,11 @@ let Predicates = [HasSVEorStreamingSVE] in {
defm FMINNM_ZPmI : sve_fp_2op_i_p_zds<0b101, "fminnm", "FMINNM_ZPZI", sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fminnm>;
defm FMAX_ZPmI : sve_fp_2op_i_p_zds<0b110, "fmax", "FMAX_ZPZI", sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fmax>;
defm FMIN_ZPmI : sve_fp_2op_i_p_zds<0b111, "fmin", "FMIN_ZPZI", sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fmin>;
-
+
defm FADD_ZPZI : sve_fp_2op_i_p_zds_hfd<sve_fpimm_half_one, fpimm_half, fpimm_one, AArch64fadd_p>;
defm FSUB_ZPZI : sve_fp_2op_i_p_zds_hfd<sve_fpimm_half_one, fpimm_half, fpimm_one, AArch64fsub_p>;
defm FMUL_ZPZI : sve_fp_2op_i_p_zds_hfd<sve_fpimm_half_two, fpimm_half, fpimm_two, AArch64fmul_p>;
- defm FSUBR_ZPZI : sve_fp_2op_i_p_zds_hfd<sve_fpimm_half_one, fpimm_half, fpimm_one>;
+ defm FSUBR_ZPZI : sve_fp_2op_i_p_zds_hfd<sve_fpimm_half_one, fpimm_half, fpimm_one, AArch64fsubr_p>;
defm FMAXNM_ZPZI : sve_fp_2op_i_p_zds_hfd<sve_fpimm_zero_one, fpimm0, fpimm_one, AArch64fmaxnm_p>;
defm FMINNM_ZPZI : sve_fp_2op_i_p_zds_hfd<sve_fpimm_zero_one, fpimm0, fpimm_one, AArch64fminnm_p>;
defm FMAX_ZPZI : sve_fp_2op_i_p_zds_hfd<sve_fpimm_zero_one, fpimm0, fpimm_one, AArch64fmax_p>;
@@ -461,9 +493,9 @@ let Predicates = [HasSVEorStreamingSVE] in {
defm FMIN_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd<sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fmin>;
}
- defm FADD_ZPmZ : sve_fp_2op_p_zds<0b0000, "fadd", "FADD_ZPZZ", int_aarch64_sve_fadd, DestructiveBinaryComm>;
- defm FSUB_ZPmZ : sve_fp_2op_p_zds<0b0001, "fsub", "FSUB_ZPZZ", int_aarch64_sve_fsub, DestructiveBinaryCommWithRev, "FSUBR_ZPmZ">;
- defm FMUL_ZPmZ : sve_fp_2op_p_zds<0b0010, "fmul", "FMUL_ZPZZ", int_aarch64_sve_fmul, DestructiveBinaryComm>;
+ defm FADD_ZPmZ : sve_fp_2op_p_zds<0b0000, "fadd", "FADD_ZPZZ", AArch64fadd_m1, DestructiveBinaryComm>;
+ defm FSUB_ZPmZ : sve_fp_2op_p_zds<0b0001, "fsub", "FSUB_ZPZZ", AArch64fsub_m1, DestructiveBinaryCommWithRev, "FSUBR_ZPmZ">;
+ defm FMUL_ZPmZ : sve_fp_2op_p_zds<0b0010, "fmul", "FMUL_ZPZZ", AArch64fmul_m1, DestructiveBinaryComm>;
defm FSUBR_ZPmZ : sve_fp_2op_p_zds<0b0011, "fsubr", "FSUBR_ZPZZ", int_aarch64_sve_fsubr, DestructiveBinaryCommWithRev, "FSUB_ZPmZ", /*isReverseInstr*/ 1>;
defm FMAXNM_ZPmZ : sve_fp_2op_p_zds<0b0100, "fmaxnm", "FMAXNM_ZPZZ", int_aarch64_sve_fmaxnm, DestructiveBinaryComm>;
defm FMINNM_ZPmZ : sve_fp_2op_p_zds<0b0101, "fminnm", "FMINNM_ZPZZ", int_aarch64_sve_fminnm, DestructiveBinaryComm>;
@@ -484,9 +516,9 @@ let Predicates = [HasSVEorStreamingSVE] in {
defm FMIN_ZPZZ : sve_fp_bin_pred_hfd<AArch64fmin_p>;
defm FABD_ZPZZ : sve_fp_bin_pred_hfd<AArch64fabd_p>;
defm FDIV_ZPZZ : sve_fp_bin_pred_hfd<AArch64fdiv_p>;
-} // End HasSVEorStreamingSVE
+} // End HasSVEorSME
-let Predicates = [HasSVEorStreamingSVE, UseExperimentalZeroingPseudos] in {
+let Predicates = [HasSVEorSME, UseExperimentalZeroingPseudos] in {
defm FADD_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fadd>;
defm FSUB_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fsub>;
defm FMUL_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fmul>;
@@ -499,28 +531,28 @@ let Predicates = [HasSVEorStreamingSVE, UseExperimentalZeroingPseudos] in {
defm FMULX_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fmulx>;
defm FDIVR_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fdivr>;
defm FDIV_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fdiv>;
-} // End HasSVEorStreamingSVE, UseExperimentalZeroingPseudos
+} // End HasSVEorSME, UseExperimentalZeroingPseudos
-let Predicates = [HasSVEorStreamingSVE] in {
+let Predicates = [HasSVEorSME] in {
defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd", fadd, AArch64fadd_p>;
defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub", fsub, AArch64fsub_p>;
defm FMUL_ZZZ : sve_fp_3op_u_zd<0b010, "fmul", fmul, AArch64fmul_p>;
-} // End HasSVEorStreamingSVE
+} // End HasSVEorSME
let Predicates = [HasSVE] in {
defm FTSMUL_ZZZ : sve_fp_3op_u_zd_ftsmul<0b011, "ftsmul", int_aarch64_sve_ftsmul_x>;
} // End HasSVE
-let Predicates = [HasSVEorStreamingSVE] in {
+let Predicates = [HasSVEorSME] in {
defm FRECPS_ZZZ : sve_fp_3op_u_zd<0b110, "frecps", AArch64frecps>;
defm FRSQRTS_ZZZ : sve_fp_3op_u_zd<0b111, "frsqrts", AArch64frsqrts>;
-} // End HasSVEorStreamingSVE
+} // End HasSVEorSME
let Predicates = [HasSVE] in {
defm FTSSEL_ZZZ : sve_int_bin_cons_misc_0_b<"ftssel", int_aarch64_sve_ftssel_x>;
} // End HasSVE
-let Predicates = [HasSVEorStreamingSVE] in {
+let Predicates = [HasSVEorSME] in {
defm FCADD_ZPmZ : sve_fp_fcadd<"fcadd", int_aarch64_sve_fcadd>;
defm FCMLA_ZPmZZ : sve_fp_fcmla<"fcmla", int_aarch64_sve_fcmla>;
@@ -545,7 +577,7 @@ let Predicates = [HasSVEorStreamingSVE] in {
(!cast<Instruction>("FMLA_ZPZZZ_UNDEF_"#Suffix) $P, ZPR:$Za, ZPR:$Zn, ZPR:$Zm)>;
// Zd = Za + -Zn * Zm
- def : Pat<(Ty (AArch64fma_p PredTy:$P, (AArch64fneg_mt PredTy:$P, Ty:$Zn, (Ty (undef))), Ty:$Zm, Ty:$Za)),
+ def : Pat<(Ty (AArch64fmls_p PredTy:$P, Ty:$Zn, Ty:$Zm, Ty:$Za)),
(!cast<Instruction>("FMLS_ZPZZZ_UNDEF_"#Suffix) $P, ZPR:$Za, ZPR:$Zn, ZPR:$Zm)>;
// Zd = -Za + Zn * Zm
@@ -576,26 +608,26 @@ let Predicates = [HasSVEorStreamingSVE] in {
defm : fma<nxv4f32, nxv4i1, "S">;
defm : fma<nxv2f32, nxv2i1, "S">;
defm : fma<nxv2f64, nxv2i1, "D">;
-} // End HasSVEorStreamingSVE
+} // End HasSVEorSME
let Predicates = [HasSVE] in {
defm FTMAD_ZZI : sve_fp_ftmad<"ftmad", int_aarch64_sve_ftmad_x>;
} // End HasSVE
-let Predicates = [HasSVEorStreamingSVE] in {
+let Predicates = [HasSVEorSME] in {
defm FMLA_ZZZI : sve_fp_fma_by_indexed_elem<0b0, "fmla", int_aarch64_sve_fmla_lane>;
defm FMLS_ZZZI : sve_fp_fma_by_indexed_elem<0b1, "fmls", int_aarch64_sve_fmls_lane>;
defm FCMLA_ZZZI : sve_fp_fcmla_by_indexed_elem<"fcmla", int_aarch64_sve_fcmla_lane>;
defm FMUL_ZZZI : sve_fp_fmul_by_indexed_elem<"fmul", int_aarch64_sve_fmul_lane>;
-} // End HasSVEorStreamingSVE
+} // End HasSVEorSME
let Predicates = [HasSVE] in {
// SVE floating point reductions.
defm FADDA_VPZ : sve_fp_2op_p_vd<0b000, "fadda", AArch64fadda_p>;
} // End HasSVE
-let Predicates = [HasSVEorStreamingSVE] in {
+let Predicates = [HasSVEorSME] in {
defm FADDV_VPZ : sve_fp_fast_red<0b000, "faddv", AArch64faddv_p>;
defm FMAXNMV_VPZ : sve_fp_fast_red<0b100, "fmaxnmv", AArch64fmaxnmv_p>;
defm FMINNMV_VPZ : sve_fp_fast_red<0b101, "fminnmv", AArch64fminnmv_p>;
@@ -613,7 +645,7 @@ let Predicates = [HasSVEorStreamingSVE] in {
defm FCPY_ZPmI : sve_int_dup_fpimm_pred<"fcpy">;
// Splat scalar register (unpredicated, GPR or vector + element index)
- defm DUP_ZR : sve_int_perm_dup_r<"dup", AArch64dup>;
+ defm DUP_ZR : sve_int_perm_dup_r<"dup", splat_vector>;
defm DUP_ZZI : sve_int_perm_dup_i<"dup">;
// Splat scalar register (predicated)
@@ -621,61 +653,67 @@ let Predicates = [HasSVEorStreamingSVE] in {
defm CPY_ZPmV : sve_int_perm_cpy_v<"cpy", AArch64dup_mt>;
// Duplicate FP scalar into all vector elements
- def : Pat<(nxv8f16 (AArch64dup (f16 FPR16:$src))),
+ def : Pat<(nxv8f16 (splat_vector (f16 FPR16:$src))),
(DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>;
- def : Pat<(nxv4f16 (AArch64dup (f16 FPR16:$src))),
+ def : Pat<(nxv4f16 (splat_vector (f16 FPR16:$src))),
(DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>;
- def : Pat<(nxv2f16 (AArch64dup (f16 FPR16:$src))),
+ def : Pat<(nxv2f16 (splat_vector (f16 FPR16:$src))),
(DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>;
- def : Pat<(nxv4f32 (AArch64dup (f32 FPR32:$src))),
+ def : Pat<(nxv4f32 (splat_vector (f32 FPR32:$src))),
(DUP_ZZI_S (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$src, ssub), 0)>;
- def : Pat<(nxv2f32 (AArch64dup (f32 FPR32:$src))),
+ def : Pat<(nxv2f32 (splat_vector (f32 FPR32:$src))),
(DUP_ZZI_S (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$src, ssub), 0)>;
- def : Pat<(nxv2f64 (AArch64dup (f64 FPR64:$src))),
+ def : Pat<(nxv2f64 (splat_vector (f64 FPR64:$src))),
(DUP_ZZI_D (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$src, dsub), 0)>;
- def : Pat<(nxv8bf16 (AArch64dup (bf16 FPR16:$src))),
+ def : Pat<(nxv8bf16 (splat_vector (bf16 FPR16:$src))),
+ (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>;
+ def : Pat<(nxv4bf16 (splat_vector (bf16 FPR16:$src))),
+ (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>;
+ def : Pat<(nxv2bf16 (splat_vector (bf16 FPR16:$src))),
(DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>;
// Duplicate +0.0 into all vector elements
- def : Pat<(nxv8f16 (AArch64dup (f16 fpimm0))), (DUP_ZI_H 0, 0)>;
- def : Pat<(nxv4f16 (AArch64dup (f16 fpimm0))), (DUP_ZI_H 0, 0)>;
- def : Pat<(nxv2f16 (AArch64dup (f16 fpimm0))), (DUP_ZI_H 0, 0)>;
- def : Pat<(nxv4f32 (AArch64dup (f32 fpimm0))), (DUP_ZI_S 0, 0)>;
- def : Pat<(nxv2f32 (AArch64dup (f32 fpimm0))), (DUP_ZI_S 0, 0)>;
- def : Pat<(nxv2f64 (AArch64dup (f64 fpimm0))), (DUP_ZI_D 0, 0)>;
- def : Pat<(nxv8bf16 (AArch64dup (bf16 fpimm0))), (DUP_ZI_H 0, 0)>;
+ def : Pat<(nxv8f16 (splat_vector (f16 fpimm0))), (DUP_ZI_H 0, 0)>;
+ def : Pat<(nxv4f16 (splat_vector (f16 fpimm0))), (DUP_ZI_H 0, 0)>;
+ def : Pat<(nxv2f16 (splat_vector (f16 fpimm0))), (DUP_ZI_H 0, 0)>;
+ def : Pat<(nxv4f32 (splat_vector (f32 fpimm0))), (DUP_ZI_S 0, 0)>;
+ def : Pat<(nxv2f32 (splat_vector (f32 fpimm0))), (DUP_ZI_S 0, 0)>;
+ def : Pat<(nxv2f64 (splat_vector (f64 fpimm0))), (DUP_ZI_D 0, 0)>;
+ def : Pat<(nxv8bf16 (splat_vector (bf16 fpimm0))), (DUP_ZI_H 0, 0)>;
+ def : Pat<(nxv4bf16 (splat_vector (bf16 fpimm0))), (DUP_ZI_H 0, 0)>;
+ def : Pat<(nxv2bf16 (splat_vector (bf16 fpimm0))), (DUP_ZI_H 0, 0)>;
// Duplicate Int immediate into all vector elements
- def : Pat<(nxv16i8 (AArch64dup (i32 (SVE8BitLslImm32 i32:$a, i32:$b)))),
+ def : Pat<(nxv16i8 (splat_vector (i32 (SVECpyDupImm8Pat i32:$a, i32:$b)))),
(DUP_ZI_B $a, $b)>;
- def : Pat<(nxv8i16 (AArch64dup (i32 (SVE8BitLslImm32 i32:$a, i32:$b)))),
+ def : Pat<(nxv8i16 (splat_vector (i32 (SVECpyDupImm16Pat i32:$a, i32:$b)))),
(DUP_ZI_H $a, $b)>;
- def : Pat<(nxv4i32 (AArch64dup (i32 (SVE8BitLslImm32 i32:$a, i32:$b)))),
+ def : Pat<(nxv4i32 (splat_vector (i32 (SVECpyDupImm32Pat i32:$a, i32:$b)))),
(DUP_ZI_S $a, $b)>;
- def : Pat<(nxv2i64 (AArch64dup (i64 (SVE8BitLslImm64 i32:$a, i32:$b)))),
+ def : Pat<(nxv2i64 (splat_vector (i64 (SVECpyDupImm64Pat i32:$a, i32:$b)))),
(DUP_ZI_D $a, $b)>;
// Duplicate immediate FP into all vector elements.
- def : Pat<(nxv2f32 (AArch64dup (f32 fpimm:$val))),
+ def : Pat<(nxv2f32 (splat_vector (f32 fpimm:$val))),
(DUP_ZR_S (MOVi32imm (bitcast_fpimm_to_i32 f32:$val)))>;
- def : Pat<(nxv4f32 (AArch64dup (f32 fpimm:$val))),
+ def : Pat<(nxv4f32 (splat_vector (f32 fpimm:$val))),
(DUP_ZR_S (MOVi32imm (bitcast_fpimm_to_i32 f32:$val)))>;
- def : Pat<(nxv2f64 (AArch64dup (f64 fpimm:$val))),
+ def : Pat<(nxv2f64 (splat_vector (f64 fpimm:$val))),
(DUP_ZR_D (MOVi64imm (bitcast_fpimm_to_i64 f64:$val)))>;
// Duplicate FP immediate into all vector elements
let AddedComplexity = 2 in {
- def : Pat<(nxv8f16 (AArch64dup fpimm16:$imm8)),
+ def : Pat<(nxv8f16 (splat_vector fpimm16:$imm8)),
(FDUP_ZI_H fpimm16:$imm8)>;
- def : Pat<(nxv4f16 (AArch64dup fpimm16:$imm8)),
+ def : Pat<(nxv4f16 (splat_vector fpimm16:$imm8)),
(FDUP_ZI_H fpimm16:$imm8)>;
- def : Pat<(nxv2f16 (AArch64dup fpimm16:$imm8)),
+ def : Pat<(nxv2f16 (splat_vector fpimm16:$imm8)),
(FDUP_ZI_H fpimm16:$imm8)>;
- def : Pat<(nxv4f32 (AArch64dup fpimm32:$imm8)),
+ def : Pat<(nxv4f32 (splat_vector fpimm32:$imm8)),
(FDUP_ZI_S fpimm32:$imm8)>;
- def : Pat<(nxv2f32 (AArch64dup fpimm32:$imm8)),
+ def : Pat<(nxv2f32 (splat_vector fpimm32:$imm8)),
(FDUP_ZI_S fpimm32:$imm8)>;
- def : Pat<(nxv2f64 (AArch64dup fpimm64:$imm8)),
+ def : Pat<(nxv2f64 (splat_vector fpimm64:$imm8)),
(FDUP_ZI_D fpimm64:$imm8)>;
}
@@ -683,13 +721,13 @@ let Predicates = [HasSVEorStreamingSVE] in {
defm SEL_ZPZZ : sve_int_sel_vvv<"sel", vselect>;
defm SPLICE_ZPZ : sve_int_perm_splice<"splice", AArch64splice>;
-} // End HasSVEorStreamingSVE
+} // End HasSVEorSME
let Predicates = [HasSVE] in {
defm COMPACT_ZPZ : sve_int_perm_compact<"compact", int_aarch64_sve_compact>;
} // End HasSVE
-let Predicates = [HasSVEorStreamingSVE] in {
+let Predicates = [HasSVEorSME] in {
defm INSR_ZR : sve_int_perm_insrs<"insr", AArch64insr>;
defm INSR_ZV : sve_int_perm_insrv<"insr", AArch64insr>;
defm EXT_ZZI : sve_int_perm_extract_i<"ext", AArch64ext>;
@@ -710,16 +748,21 @@ let Predicates = [HasSVEorStreamingSVE] in {
defm PUNPKLO_PP : sve_int_perm_punpk<0b0, "punpklo", int_aarch64_sve_punpklo>;
defm PUNPKHI_PP : sve_int_perm_punpk<0b1, "punpkhi", int_aarch64_sve_punpkhi>;
+ // Define pattern for `nxv1i1 splat_vector(1)`.
+ // We do this here instead of in ISelLowering such that PatFrag's can still
+ // recognize a splat.
+ def : Pat<(nxv1i1 immAllOnesV), (PUNPKLO_PP (PTRUE_D 31))>;
+
defm MOVPRFX_ZPzZ : sve_int_movprfx_pred_zero<0b000, "movprfx">;
defm MOVPRFX_ZPmZ : sve_int_movprfx_pred_merge<0b001, "movprfx">;
def MOVPRFX_ZZ : sve_int_bin_cons_misc_0_c<0b00000001, "movprfx", ZPRAny>;
-} // End HasSVEorStreamingSVE
+} // End HasSVEorSME
let Predicates = [HasSVE] in {
defm FEXPA_ZZ : sve_int_bin_cons_misc_0_c_fexpa<"fexpa", int_aarch64_sve_fexpa_x>;
} // End HasSVE
-let Predicates = [HasSVEorStreamingSVE] in {
+let Predicates = [HasSVEorSME] in {
defm BRKPA_PPzPP : sve_int_brkp<0b00, "brkpa", int_aarch64_sve_brkpa_z>;
defm BRKPAS_PPzPP : sve_int_brkp<0b10, "brkpas", null_frag>;
defm BRKPB_PPzPP : sve_int_brkp<0b01, "brkpb", int_aarch64_sve_brkpb_z>;
@@ -831,7 +874,7 @@ let Predicates = [HasSVEorStreamingSVE] in {
defm LD1SB_S : sve_mem_cld_ss<0b1101, "ld1sb", Z_s, ZPR32, GPR64NoXZRshifted8>;
defm LD1SB_H : sve_mem_cld_ss<0b1110, "ld1sb", Z_h, ZPR16, GPR64NoXZRshifted8>;
defm LD1D : sve_mem_cld_ss<0b1111, "ld1d", Z_d, ZPR64, GPR64NoXZRshifted64>;
-} // End HasSVEorStreamingSVE
+} // End HasSVEorSME
let Predicates = [HasSVE] in {
// non-faulting continuous load with reg+immediate
@@ -871,7 +914,7 @@ let Predicates = [HasSVE] in {
defm LDFF1D : sve_mem_cldff_ss<0b1111, "ldff1d", Z_d, ZPR64, GPR64shifted64>;
} // End HasSVE
-let Predicates = [HasSVEorStreamingSVE] in {
+let Predicates = [HasSVEorSME] in {
// LD(2|3|4) structured loads with reg+immediate
defm LD2B_IMM : sve_mem_eld_si<0b00, 0b01, ZZ_b, "ld2b", simm4s2>;
defm LD3B_IMM : sve_mem_eld_si<0b00, 0b10, ZZZ_b, "ld3b", simm4s3>;
@@ -899,7 +942,7 @@ let Predicates = [HasSVEorStreamingSVE] in {
def LD2D : sve_mem_eld_ss<0b11, 0b01, ZZ_d, "ld2d", GPR64NoXZRshifted64>;
def LD3D : sve_mem_eld_ss<0b11, 0b10, ZZZ_d, "ld3d", GPR64NoXZRshifted64>;
def LD4D : sve_mem_eld_ss<0b11, 0b11, ZZZZ_d, "ld4d", GPR64NoXZRshifted64>;
-} // End HasSVEorStreamingSVE
+} // End HasSVEorSME
let Predicates = [HasSVE] in {
// Gathers using unscaled 32-bit offsets, e.g.
@@ -1013,9 +1056,95 @@ let Predicates = [HasSVE] in {
defm GLDFF1W_D : sve_mem_64b_gld_sv_32_scaled<0b1011, "ldff1w", AArch64ldff1_gather_sxtw_scaled_z, AArch64ldff1_gather_uxtw_scaled_z, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>;
defm GLD1D : sve_mem_64b_gld_sv_32_scaled<0b1110, "ld1d", AArch64ld1_gather_sxtw_scaled_z, AArch64ld1_gather_uxtw_scaled_z, ZPR64ExtSXTW64, ZPR64ExtUXTW64, nxv2i64>;
defm GLDFF1D : sve_mem_64b_gld_sv_32_scaled<0b1111, "ldff1d", AArch64ldff1_gather_sxtw_scaled_z, AArch64ldff1_gather_uxtw_scaled_z, ZPR64ExtSXTW64, ZPR64ExtUXTW64, nxv2i64>;
+
+ multiclass sve_masked_gather_x2_scaled<ValueType Ty, SDPatternOperator Load, string Inst> {
+ // base + vector of scaled offsets
+ def : Pat<(Ty (Load (SVEDup0Undef), (nxv2i1 PPR:$gp), GPR64:$base, (nxv2i64 ZPR:$offs))),
+ (!cast<Instruction>(Inst # _SCALED) PPR:$gp, GPR64:$base, ZPR:$offs)>;
+ // base + vector of signed 32bit scaled offsets
+ def : Pat<(Ty (Load (SVEDup0Undef), (nxv2i1 PPR:$gp), GPR64:$base, (sext_inreg (nxv2i64 ZPR:$offs), nxv2i32))),
+ (!cast<Instruction>(Inst # _SXTW_SCALED) PPR:$gp, GPR64:$base, ZPR:$offs)>;
+ // base + vector of unsigned 32bit scaled offsets
+ def : Pat<(Ty (Load (SVEDup0Undef), (nxv2i1 PPR:$gp), GPR64:$base, (and (nxv2i64 ZPR:$offs), (nxv2i64 (splat_vector (i64 0xFFFFFFFF)))))),
+ (!cast<Instruction>(Inst # _UXTW_SCALED) PPR:$gp, GPR64:$base, ZPR:$offs)>;
+ }
+
+ multiclass sve_masked_gather_x2_unscaled<ValueType Ty, SDPatternOperator Load, string Inst, Operand ImmTy> {
+ // vector of pointers + immediate offset (includes zero)
+ def : Pat<(Ty (Load (SVEDup0Undef), (nxv2i1 PPR:$gp), (i64 ImmTy:$imm), (nxv2i64 ZPR:$ptrs))),
+ (!cast<Instruction>(Inst # _IMM) PPR:$gp, ZPR:$ptrs, ImmTy:$imm)>;
+ // base + vector of offsets
+ def : Pat<(Ty (Load (SVEDup0Undef), (nxv2i1 PPR:$gp), GPR64:$base, (nxv2i64 ZPR:$offs))),
+ (!cast<Instruction>(Inst) PPR:$gp, GPR64:$base, ZPR:$offs)>;
+ // base + vector of signed 32bit offsets
+ def : Pat<(Ty (Load (SVEDup0Undef), (nxv2i1 PPR:$gp), GPR64:$base, (sext_inreg (nxv2i64 ZPR:$offs), nxv2i32))),
+ (!cast<Instruction>(Inst # _SXTW) PPR:$gp, GPR64:$base, ZPR:$offs)>;
+ // base + vector of unsigned 32bit offsets
+ def : Pat<(Ty (Load (SVEDup0Undef), (nxv2i1 PPR:$gp), GPR64:$base, (and (nxv2i64 ZPR:$offs), (nxv2i64 (splat_vector (i64 0xFFFFFFFF)))))),
+ (!cast<Instruction>(Inst # _UXTW) PPR:$gp, GPR64:$base, ZPR:$offs)>;
+ }
+
+ multiclass sve_masked_gather_x4<ValueType Ty, SDPatternOperator Load, Instruction Inst> {
+ def : Pat<(Ty (Load (SVEDup0Undef), (nxv4i1 PPR:$gp), GPR64:$base, (nxv4i32 ZPR:$offs))),
+ (Inst PPR:$gp, GPR64:$base, ZPR:$offs)>;
+ }
+
+ defm : sve_masked_gather_x2_scaled<nxv2i64, azext_masked_gather_i16_signed_scaled, "GLD1H_D">;
+ defm : sve_masked_gather_x2_scaled<nxv2i64, sext_masked_gather_i16_signed_scaled, "GLD1SH_D">;
+ defm : sve_masked_gather_x2_scaled<nxv2i64, azext_masked_gather_i32_signed_scaled, "GLD1W_D">;
+ defm : sve_masked_gather_x2_scaled<nxv2i64, sext_masked_gather_i32_signed_scaled, "GLD1SW_D">;
+ defm : sve_masked_gather_x2_scaled<nxv2i64, nonext_masked_gather_signed_scaled, "GLD1D">;
+ defm : sve_masked_gather_x2_scaled<nxv2f16, nonext_masked_gather_signed_scaled, "GLD1H_D">;
+ defm : sve_masked_gather_x2_scaled<nxv2f32, nonext_masked_gather_signed_scaled, "GLD1W_D">;
+ defm : sve_masked_gather_x2_scaled<nxv2f64, nonext_masked_gather_signed_scaled, "GLD1D">;
+ defm : sve_masked_gather_x2_scaled<nxv2bf16, nonext_masked_gather_signed_scaled, "GLD1H_D">;
+
+ defm : sve_masked_gather_x2_unscaled<nxv2i64, azext_masked_gather_i8_signed_unscaled, "GLD1B_D" , imm0_31>;
+ defm : sve_masked_gather_x2_unscaled<nxv2i64, sext_masked_gather_i8_signed_unscaled, "GLD1SB_D", imm0_31>;
+ defm : sve_masked_gather_x2_unscaled<nxv2i64, azext_masked_gather_i16_signed_unscaled, "GLD1H_D", uimm5s2>;
+ defm : sve_masked_gather_x2_unscaled<nxv2i64, sext_masked_gather_i16_signed_unscaled, "GLD1SH_D", uimm5s2>;
+ defm : sve_masked_gather_x2_unscaled<nxv2i64, azext_masked_gather_i32_signed_unscaled, "GLD1W_D", uimm5s4>;
+ defm : sve_masked_gather_x2_unscaled<nxv2i64, sext_masked_gather_i32_signed_unscaled, "GLD1SW_D", uimm5s4>;
+ defm : sve_masked_gather_x2_unscaled<nxv2i64, nonext_masked_gather_signed_unscaled, "GLD1D", uimm5s8>;
+ defm : sve_masked_gather_x2_unscaled<nxv2f16, nonext_masked_gather_signed_unscaled, "GLD1H_D", uimm5s2>;
+ defm : sve_masked_gather_x2_unscaled<nxv2f32, nonext_masked_gather_signed_unscaled, "GLD1W_D", uimm5s4>;
+ defm : sve_masked_gather_x2_unscaled<nxv2f64, nonext_masked_gather_signed_unscaled, "GLD1D", uimm5s8>;
+ defm : sve_masked_gather_x2_unscaled<nxv2bf16, nonext_masked_gather_signed_unscaled, "GLD1H_D", uimm5s2>;
+
+ defm : sve_masked_gather_x4<nxv4i32, azext_masked_gather_i16_signed_scaled, GLD1H_S_SXTW_SCALED>;
+ defm : sve_masked_gather_x4<nxv4i32, sext_masked_gather_i16_signed_scaled, GLD1SH_S_SXTW_SCALED>;
+ defm : sve_masked_gather_x4<nxv4i32, nonext_masked_gather_signed_scaled, GLD1W_SXTW_SCALED>;
+ defm : sve_masked_gather_x4<nxv4f16, nonext_masked_gather_signed_scaled, GLD1H_S_SXTW_SCALED>;
+ defm : sve_masked_gather_x4<nxv4f32, nonext_masked_gather_signed_scaled, GLD1W_SXTW_SCALED>;
+ defm : sve_masked_gather_x4<nxv4bf16, nonext_masked_gather_signed_scaled, GLD1H_S_SXTW_SCALED>;
+
+ defm : sve_masked_gather_x4<nxv4i32, azext_masked_gather_i8_signed_unscaled, GLD1B_S_SXTW>;
+ defm : sve_masked_gather_x4<nxv4i32, sext_masked_gather_i8_signed_unscaled, GLD1SB_S_SXTW>;
+ defm : sve_masked_gather_x4<nxv4i32, azext_masked_gather_i16_signed_unscaled, GLD1H_S_SXTW>;
+ defm : sve_masked_gather_x4<nxv4i32, sext_masked_gather_i16_signed_unscaled, GLD1SH_S_SXTW>;
+ defm : sve_masked_gather_x4<nxv4i32, nonext_masked_gather_signed_unscaled, GLD1W_SXTW>;
+ defm : sve_masked_gather_x4<nxv4f16, nonext_masked_gather_signed_unscaled, GLD1H_S_SXTW>;
+ defm : sve_masked_gather_x4<nxv4f32, nonext_masked_gather_signed_unscaled, GLD1W_SXTW>;
+ defm : sve_masked_gather_x4<nxv4bf16, nonext_masked_gather_signed_unscaled, GLD1H_S_SXTW>;
+
+ defm : sve_masked_gather_x4<nxv4i32, azext_masked_gather_i16_unsigned_scaled, GLD1H_S_UXTW_SCALED>;
+ defm : sve_masked_gather_x4<nxv4i32, sext_masked_gather_i16_unsigned_scaled, GLD1SH_S_UXTW_SCALED>;
+ defm : sve_masked_gather_x4<nxv4i32, nonext_masked_gather_unsigned_scaled, GLD1W_UXTW_SCALED>;
+ defm : sve_masked_gather_x4<nxv4f16, nonext_masked_gather_unsigned_scaled, GLD1H_S_UXTW_SCALED>;
+ defm : sve_masked_gather_x4<nxv4f32, nonext_masked_gather_unsigned_scaled, GLD1W_UXTW_SCALED>;
+ defm : sve_masked_gather_x4<nxv4bf16, nonext_masked_gather_unsigned_scaled, GLD1H_S_UXTW_SCALED>;
+
+ defm : sve_masked_gather_x4<nxv4i32, azext_masked_gather_i8_unsigned_unscaled, GLD1B_S_UXTW>;
+ defm : sve_masked_gather_x4<nxv4i32, sext_masked_gather_i8_unsigned_unscaled, GLD1SB_S_UXTW>;
+ defm : sve_masked_gather_x4<nxv4i32, azext_masked_gather_i16_unsigned_unscaled, GLD1H_S_UXTW>;
+ defm : sve_masked_gather_x4<nxv4i32, sext_masked_gather_i16_unsigned_unscaled, GLD1SH_S_UXTW>;
+ defm : sve_masked_gather_x4<nxv4i32, nonext_masked_gather_unsigned_unscaled, GLD1W_UXTW>;
+ defm : sve_masked_gather_x4<nxv4f16, nonext_masked_gather_unsigned_unscaled, GLD1H_S_UXTW>;
+ defm : sve_masked_gather_x4<nxv4f32, nonext_masked_gather_unsigned_unscaled, GLD1W_UXTW>;
+ defm : sve_masked_gather_x4<nxv4bf16, nonext_masked_gather_unsigned_unscaled, GLD1H_S_UXTW>;
} // End HasSVE
-let Predicates = [HasSVEorStreamingSVE] in {
+let Predicates = [HasSVEorSME] in {
// Non-temporal contiguous loads (register + immediate)
defm LDNT1B_ZRI : sve_mem_cldnt_si<0b00, "ldnt1b", Z_b, ZPR8>;
defm LDNT1H_ZRI : sve_mem_cldnt_si<0b01, "ldnt1h", Z_h, ZPR16>;
@@ -1051,7 +1180,7 @@ let Predicates = [HasSVEorStreamingSVE] in {
defm ST1W : sve_mem_cst_ss<0b1010, "st1w", Z_s, ZPR32, GPR64NoXZRshifted32>;
defm ST1W_D : sve_mem_cst_ss<0b1011, "st1w", Z_d, ZPR64, GPR64NoXZRshifted32>;
defm ST1D : sve_mem_cst_ss<0b1111, "st1d", Z_d, ZPR64, GPR64NoXZRshifted64>;
-} // End HasSVEorStreamingSVE
+} // End HasSVEorSME
let Predicates = [HasSVE] in {
// Scatters using unpacked, unscaled 32-bit offsets, e.g.
@@ -1100,12 +1229,87 @@ let Predicates = [HasSVE] in {
// Scatters using scaled 64-bit offsets, e.g.
// st1h z0.d, p0, [x0, z0.d, lsl #1]
- defm SST1H_D_SCALED : sve_mem_sst_sv_64_scaled<0b01, "st1h", AArch64st1_scatter_scaled, ZPR64ExtLSL16, nxv2i16>;
- defm SST1W_D_SCALED : sve_mem_sst_sv_64_scaled<0b10, "st1w", AArch64st1_scatter_scaled, ZPR64ExtLSL32, nxv2i32>;
- defm SST1D_SCALED : sve_mem_sst_sv_64_scaled<0b11, "st1d", AArch64st1_scatter_scaled, ZPR64ExtLSL64, nxv2i64>;
+ defm SST1H_D : sve_mem_sst_sv_64_scaled<0b01, "st1h", AArch64st1_scatter_scaled, ZPR64ExtLSL16, nxv2i16>;
+ defm SST1W_D : sve_mem_sst_sv_64_scaled<0b10, "st1w", AArch64st1_scatter_scaled, ZPR64ExtLSL32, nxv2i32>;
+ defm SST1D : sve_mem_sst_sv_64_scaled<0b11, "st1d", AArch64st1_scatter_scaled, ZPR64ExtLSL64, nxv2i64>;
+
+ multiclass sve_masked_scatter_x2_scaled<ValueType Ty, SDPatternOperator Store, string Inst> {
+ // base + vector of scaled offsets
+ def : Pat<(Store (Ty ZPR:$data), (nxv2i1 PPR:$gp), GPR64:$base, (nxv2i64 ZPR:$offs)),
+ (!cast<Instruction>(Inst # _SCALED) ZPR:$data, PPR:$gp, GPR64:$base, ZPR:$offs)>;
+ // base + vector of signed 32bit scaled offsets
+ def : Pat<(Store (Ty ZPR:$data), (nxv2i1 PPR:$gp), GPR64:$base, (sext_inreg (nxv2i64 ZPR:$offs), nxv2i32)),
+ (!cast<Instruction>(Inst # _SXTW_SCALED) ZPR:$data, PPR:$gp, GPR64:$base, ZPR:$offs)>;
+ // base + vector of unsigned 32bit scaled offsets
+ def : Pat<(Store (Ty ZPR:$data), (nxv2i1 PPR:$gp), GPR64:$base, (and (nxv2i64 ZPR:$offs), (nxv2i64 (splat_vector (i64 0xFFFFFFFF))))),
+ (!cast<Instruction>(Inst # _UXTW_SCALED) ZPR:$data, PPR:$gp, GPR64:$base, ZPR:$offs)>;
+ }
+
+ multiclass sve_masked_scatter_x2_unscaled<ValueType Ty, SDPatternOperator Store, string Inst, Operand ImmTy> {
+ // vector of pointers + immediate offset (includes zero)
+ def : Pat<(Store (Ty ZPR:$data), (nxv2i1 PPR:$gp), (i64 ImmTy:$imm), (nxv2i64 ZPR:$ptrs)),
+ (!cast<Instruction>(Inst # _IMM) ZPR:$data, PPR:$gp, ZPR:$ptrs, ImmTy:$imm)>;
+ // base + vector of offsets
+ def : Pat<(Store (Ty ZPR:$data), (nxv2i1 PPR:$gp), GPR64:$base, (nxv2i64 ZPR:$offs)),
+ (!cast<Instruction>(Inst) ZPR:$data, PPR:$gp, GPR64:$base, ZPR:$offs)>;
+ // base + vector of signed 32bit offsets
+ def : Pat<(Store (Ty ZPR:$data), (nxv2i1 PPR:$gp), GPR64:$base, (sext_inreg (nxv2i64 ZPR:$offs), nxv2i32)),
+ (!cast<Instruction>(Inst # _SXTW) ZPR:$data, PPR:$gp, GPR64:$base, ZPR:$offs)>;
+ // base + vector of unsigned 32bit offsets
+ def : Pat<(Store (Ty ZPR:$data), (nxv2i1 PPR:$gp), GPR64:$base, (and (nxv2i64 ZPR:$offs), (nxv2i64 (splat_vector (i64 0xFFFFFFFF))))),
+ (!cast<Instruction>(Inst # _UXTW) ZPR:$data, PPR:$gp, GPR64:$base, ZPR:$offs)>;
+ }
+
+ multiclass sve_masked_scatter_x4<ValueType Ty, SDPatternOperator Store, Instruction Inst> {
+ def : Pat<(Store (Ty ZPR:$data), (nxv4i1 PPR:$gp), GPR64:$base, (nxv4i32 ZPR:$offs)),
+ (Inst ZPR:$data, PPR:$gp, GPR64:$base, ZPR:$offs)>;
+ }
+
+ defm : sve_masked_scatter_x2_scaled<nxv2i64, trunc_masked_scatter_i16_signed_scaled, "SST1H_D">;
+ defm : sve_masked_scatter_x2_scaled<nxv2i64, trunc_masked_scatter_i32_signed_scaled, "SST1W_D">;
+ defm : sve_masked_scatter_x2_scaled<nxv2i64, nontrunc_masked_scatter_signed_scaled, "SST1D">;
+ defm : sve_masked_scatter_x2_scaled<nxv2f16, nontrunc_masked_scatter_signed_scaled, "SST1H_D">;
+ defm : sve_masked_scatter_x2_scaled<nxv2f32, nontrunc_masked_scatter_signed_scaled, "SST1W_D">;
+ defm : sve_masked_scatter_x2_scaled<nxv2f64, nontrunc_masked_scatter_signed_scaled, "SST1D">;
+ defm : sve_masked_scatter_x2_scaled<nxv2bf16, nontrunc_masked_scatter_signed_scaled, "SST1H_D">;
+
+ defm : sve_masked_scatter_x2_unscaled<nxv2i64, trunc_masked_scatter_i8_signed_unscaled, "SST1B_D" , imm0_31>;
+ defm : sve_masked_scatter_x2_unscaled<nxv2i64, trunc_masked_scatter_i16_signed_unscaled, "SST1H_D", uimm5s2>;
+ defm : sve_masked_scatter_x2_unscaled<nxv2i64, trunc_masked_scatter_i32_signed_unscaled, "SST1W_D", uimm5s4>;
+ defm : sve_masked_scatter_x2_unscaled<nxv2i64, nontrunc_masked_scatter_signed_unscaled, "SST1D", uimm5s8>;
+ defm : sve_masked_scatter_x2_unscaled<nxv2f16, nontrunc_masked_scatter_signed_unscaled, "SST1H_D", uimm5s2>;
+ defm : sve_masked_scatter_x2_unscaled<nxv2f32, nontrunc_masked_scatter_signed_unscaled, "SST1W_D", uimm5s4>;
+ defm : sve_masked_scatter_x2_unscaled<nxv2f64, nontrunc_masked_scatter_signed_unscaled, "SST1D", uimm5s8>;
+ defm : sve_masked_scatter_x2_unscaled<nxv2bf16, nontrunc_masked_scatter_signed_unscaled, "SST1H_D", uimm5s2>;
+
+ defm : sve_masked_scatter_x4<nxv4i32, trunc_masked_scatter_i16_signed_scaled, SST1H_S_SXTW_SCALED>;
+ defm : sve_masked_scatter_x4<nxv4i32, nontrunc_masked_scatter_signed_scaled, SST1W_SXTW_SCALED>;
+ defm : sve_masked_scatter_x4<nxv4f16, nontrunc_masked_scatter_signed_scaled, SST1H_S_SXTW_SCALED>;
+ defm : sve_masked_scatter_x4<nxv4f32, nontrunc_masked_scatter_signed_scaled, SST1W_SXTW_SCALED>;
+ defm : sve_masked_scatter_x4<nxv4bf16, nontrunc_masked_scatter_signed_scaled, SST1H_S_SXTW_SCALED>;
+
+ defm : sve_masked_scatter_x4<nxv4i32, trunc_masked_scatter_i8_signed_unscaled, SST1B_S_SXTW>;
+ defm : sve_masked_scatter_x4<nxv4i32, trunc_masked_scatter_i16_signed_unscaled, SST1H_S_SXTW>;
+ defm : sve_masked_scatter_x4<nxv4i32, nontrunc_masked_scatter_signed_unscaled, SST1W_SXTW>;
+ defm : sve_masked_scatter_x4<nxv4f16, nontrunc_masked_scatter_signed_unscaled, SST1H_S_SXTW>;
+ defm : sve_masked_scatter_x4<nxv4f32, nontrunc_masked_scatter_signed_unscaled, SST1W_SXTW>;
+ defm : sve_masked_scatter_x4<nxv4bf16, nontrunc_masked_scatter_signed_unscaled, SST1H_S_SXTW>;
+
+ defm : sve_masked_scatter_x4<nxv4i32, trunc_masked_scatter_i16_unsigned_scaled, SST1H_S_UXTW_SCALED>;
+ defm : sve_masked_scatter_x4<nxv4i32, nontrunc_masked_scatter_unsigned_scaled, SST1W_UXTW_SCALED>;
+ defm : sve_masked_scatter_x4<nxv4f16, nontrunc_masked_scatter_unsigned_scaled, SST1H_S_UXTW_SCALED>;
+ defm : sve_masked_scatter_x4<nxv4f32, nontrunc_masked_scatter_unsigned_scaled, SST1W_UXTW_SCALED>;
+ defm : sve_masked_scatter_x4<nxv4bf16, nontrunc_masked_scatter_unsigned_scaled, SST1H_S_UXTW_SCALED>;
+
+ defm : sve_masked_scatter_x4<nxv4i32, trunc_masked_scatter_i8_unsigned_unscaled, SST1B_S_UXTW>;
+ defm : sve_masked_scatter_x4<nxv4i32, trunc_masked_scatter_i16_unsigned_unscaled, SST1H_S_UXTW>;
+ defm : sve_masked_scatter_x4<nxv4i32, nontrunc_masked_scatter_unsigned_unscaled, SST1W_UXTW>;
+ defm : sve_masked_scatter_x4<nxv4f16, nontrunc_masked_scatter_unsigned_unscaled, SST1H_S_UXTW>;
+ defm : sve_masked_scatter_x4<nxv4f32, nontrunc_masked_scatter_unsigned_unscaled, SST1W_UXTW>;
+ defm : sve_masked_scatter_x4<nxv4bf16, nontrunc_masked_scatter_unsigned_unscaled, SST1H_S_UXTW>;
} // End HasSVE
-let Predicates = [HasSVEorStreamingSVE] in {
+let Predicates = [HasSVEorSME] in {
// ST(2|3|4) structured stores (register + immediate)
defm ST2B_IMM : sve_mem_est_si<0b00, 0b01, ZZ_b, "st2b", simm4s2>;
defm ST3B_IMM : sve_mem_est_si<0b00, 0b10, ZZZ_b, "st3b", simm4s3>;
@@ -1161,7 +1365,7 @@ let Predicates = [HasSVEorStreamingSVE] in {
// Contiguous prefetch (register + register)
def PRFB_PRR : sve_mem_prfm_ss<0b001, "prfb", GPR64NoXZRshifted8>;
def PRFH_PRR : sve_mem_prfm_ss<0b011, "prfh", GPR64NoXZRshifted16>;
- def PRFS_PRR : sve_mem_prfm_ss<0b101, "prfw", GPR64NoXZRshifted32>;
+ def PRFW_PRR : sve_mem_prfm_ss<0b101, "prfw", GPR64NoXZRshifted32>;
def PRFD_PRR : sve_mem_prfm_ss<0b111, "prfd", GPR64NoXZRshifted64>;
multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instruction RegImmInst, Instruction RegRegInst, ComplexPattern AddrCP> {
@@ -1184,9 +1388,9 @@ let Predicates = [HasSVEorStreamingSVE] in {
defm : sve_prefetch<int_aarch64_sve_prf, nxv16i1, PRFB_PRI, PRFB_PRR, am_sve_regreg_lsl0>;
defm : sve_prefetch<int_aarch64_sve_prf, nxv8i1, PRFH_PRI, PRFH_PRR, am_sve_regreg_lsl1>;
- defm : sve_prefetch<int_aarch64_sve_prf, nxv4i1, PRFW_PRI, PRFS_PRR, am_sve_regreg_lsl2>;
+ defm : sve_prefetch<int_aarch64_sve_prf, nxv4i1, PRFW_PRI, PRFW_PRR, am_sve_regreg_lsl2>;
defm : sve_prefetch<int_aarch64_sve_prf, nxv2i1, PRFD_PRI, PRFD_PRR, am_sve_regreg_lsl3>;
-} // End HasSVEorStreamingSVE
+} // End HasSVEorSME
let Predicates = [HasSVE] in {
// Gather prefetch using scaled 32-bit offsets, e.g.
@@ -1249,7 +1453,7 @@ let Predicates = [HasSVE] in {
// Patterns to generate adr instruction.
// adr z0.d, [z0.d, z0.d, uxtw]
def : Pat<(add nxv2i64:$Op1,
- (nxv2i64 (and nxv2i64:$Op2, (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))))),
+ (nxv2i64 (and nxv2i64:$Op2, (nxv2i64 (splat_vector (i64 0xFFFFFFFF)))))),
(ADR_UXTW_ZZZ_D_0 $Op1, $Op2)>;
// adr z0.d, [z0.d, z0.d, sxtw]
def : Pat<(add nxv2i64:$Op1,
@@ -1262,7 +1466,7 @@ let Predicates = [HasSVE] in {
def : Pat<(add Ty:$Op1,
(Ty (AArch64lsl_p (PredTy (SVEAllActive)),
Ty:$Op2,
- (Ty (AArch64dup (ShiftTy ShiftAmt)))))),
+ (Ty (splat_vector (ShiftTy ShiftAmt)))))),
(DestAdrIns $Op1, $Op2)>;
}
defm : adrShiftPat<nxv2i64, nxv2i1, i64, ADR_LSL_ZZZ_D_1, 1>;
@@ -1277,14 +1481,14 @@ let Predicates = [HasSVE] in {
multiclass adrXtwShiftPat<ValueType Ty, ValueType PredTy, int ShiftAmt> {
def : Pat<(add Ty:$Op1,
(Ty (AArch64lsl_p (PredTy (SVEAllActive)),
- (Ty (and Ty:$Op2, (Ty (AArch64dup (i64 0xFFFFFFFF))))),
- (Ty (AArch64dup (i64 ShiftAmt)))))),
+ (Ty (and Ty:$Op2, (Ty (splat_vector (i64 0xFFFFFFFF))))),
+ (Ty (splat_vector (i64 ShiftAmt)))))),
(!cast<Instruction>("ADR_UXTW_ZZZ_D_"#ShiftAmt) $Op1, $Op2)>;
def : Pat<(add Ty:$Op1,
(Ty (AArch64lsl_p (PredTy (SVEAllActive)),
(Ty (sext_inreg Ty:$Op2, nxv2i32)),
- (Ty (AArch64dup (i64 ShiftAmt)))))),
+ (Ty (splat_vector (i64 ShiftAmt)))))),
(!cast<Instruction>("ADR_SXTW_ZZZ_D_"#ShiftAmt) $Op1, $Op2)>;
}
defm : adrXtwShiftPat<nxv2i64, nxv2i1, 1>;
@@ -1292,7 +1496,7 @@ let Predicates = [HasSVE] in {
defm : adrXtwShiftPat<nxv2i64, nxv2i1, 3>;
} // End HasSVE
-let Predicates = [HasSVEorStreamingSVE] in {
+let Predicates = [HasSVEorSME] in {
defm TBL_ZZZ : sve_int_perm_tbl<"tbl", AArch64tbl>;
defm ZIP1_ZZZ : sve_int_perm_bin_perm_zz<0b000, "zip1", AArch64zip1>;
@@ -1310,6 +1514,10 @@ let Predicates = [HasSVEorStreamingSVE] in {
defm TRN2_PPP : sve_int_perm_bin_perm_pp<0b101, "trn2", AArch64trn2>;
// Extract lo/hi halves of legal predicate types.
+ def : Pat<(nxv1i1 (extract_subvector (nxv2i1 PPR:$Ps), (i64 0))),
+ (PUNPKLO_PP PPR:$Ps)>;
+ def : Pat<(nxv1i1 (extract_subvector (nxv2i1 PPR:$Ps), (i64 1))),
+ (PUNPKHI_PP PPR:$Ps)>;
def : Pat<(nxv2i1 (extract_subvector (nxv4i1 PPR:$Ps), (i64 0))),
(PUNPKLO_PP PPR:$Ps)>;
def : Pat<(nxv2i1 (extract_subvector (nxv4i1 PPR:$Ps), (i64 2))),
@@ -1400,6 +1608,8 @@ let Predicates = [HasSVEorStreamingSVE] in {
(UUNPKHI_ZZ_D (UUNPKHI_ZZ_S ZPR:$Zs))>;
// Concatenate two predicates.
+ def : Pat<(nxv2i1 (concat_vectors nxv1i1:$p1, nxv1i1:$p2)),
+ (UZP1_PPP_D $p1, $p2)>;
def : Pat<(nxv4i1 (concat_vectors nxv2i1:$p1, nxv2i1:$p2)),
(UZP1_PPP_S $p1, $p2)>;
def : Pat<(nxv8i1 (concat_vectors nxv4i1:$p1, nxv4i1:$p2)),
@@ -1475,7 +1685,7 @@ let Predicates = [HasSVEorStreamingSVE] in {
defm FCMGE_PPzZZ : sve_fp_3op_p_pd_cc<0b000, "fcmge", SETOGE, SETGE, SETOLE, SETLE>;
defm FCMGT_PPzZZ : sve_fp_3op_p_pd_cc<0b001, "fcmgt", SETOGT, SETGT, SETOLT, SETLT>;
defm FCMEQ_PPzZZ : sve_fp_3op_p_pd_cc<0b010, "fcmeq", SETOEQ, SETEQ, SETOEQ, SETEQ>;
- defm FCMNE_PPzZZ : sve_fp_3op_p_pd_cc<0b011, "fcmne", SETONE, SETNE, SETONE, SETNE>;
+ defm FCMNE_PPzZZ : sve_fp_3op_p_pd_cc<0b011, "fcmne", SETUNE, SETNE, SETUNE, SETNE>;
defm FCMUO_PPzZZ : sve_fp_3op_p_pd_cc<0b100, "fcmuo", SETUO, SETUO, SETUO, SETUO>;
defm FACGE_PPzZZ : sve_fp_3op_p_pd<0b101, "facge", int_aarch64_sve_facge>;
defm FACGT_PPzZZ : sve_fp_3op_p_pd<0b111, "facgt", int_aarch64_sve_facgt>;
@@ -1485,7 +1695,7 @@ let Predicates = [HasSVEorStreamingSVE] in {
defm FCMLT_PPzZ0 : sve_fp_2op_p_pd<0b010, "fcmlt", SETOLT, SETLT, SETOGT, SETGT>;
defm FCMLE_PPzZ0 : sve_fp_2op_p_pd<0b011, "fcmle", SETOLE, SETLE, SETOGE, SETGE>;
defm FCMEQ_PPzZ0 : sve_fp_2op_p_pd<0b100, "fcmeq", SETOEQ, SETEQ, SETOEQ, SETEQ>;
- defm FCMNE_PPzZ0 : sve_fp_2op_p_pd<0b110, "fcmne", SETONE, SETNE, SETONE, SETNE>;
+ defm FCMNE_PPzZ0 : sve_fp_2op_p_pd<0b110, "fcmne", SETUNE, SETNE, SETUNE, SETNE>;
defm WHILELT_PWW : sve_int_while4_rr<0b010, "whilelt", int_aarch64_sve_whilelt>;
defm WHILELE_PWW : sve_int_while4_rr<0b011, "whilele", int_aarch64_sve_whilele>;
@@ -1522,7 +1732,7 @@ let Predicates = [HasSVEorStreamingSVE] in {
defm INCD_XPiI : sve_int_pred_pattern_a<0b110, "incd", add, int_aarch64_sve_cntd>;
defm DECD_XPiI : sve_int_pred_pattern_a<0b111, "decd", sub, int_aarch64_sve_cntd>;
-let Predicates = [HasSVEorStreamingSVE] in {
+let Predicates = [HasSVEorSME] in {
defm SQINCB_XPiWdI : sve_int_pred_pattern_b_s32<0b00000, "sqincb", int_aarch64_sve_sqincb_n32>;
defm UQINCB_WPiI : sve_int_pred_pattern_b_u32<0b00001, "uqincb", int_aarch64_sve_uqincb_n32>;
defm SQDECB_XPiWdI : sve_int_pred_pattern_b_s32<0b00010, "sqdecb", int_aarch64_sve_sqdecb_n32>;
@@ -1619,16 +1829,16 @@ let Predicates = [HasSVEorStreamingSVE] in {
defm ASR_ZPZI : sve_int_shift_pred_bhsd<AArch64asr_p, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>;
defm LSR_ZPZI : sve_int_shift_pred_bhsd<AArch64lsr_p, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>;
defm LSL_ZPZI : sve_int_shift_pred_bhsd<AArch64lsl_p, SVEShiftImmL8, SVEShiftImmL16, SVEShiftImmL32, SVEShiftImmL64>;
-} // End HasSVEorStreamingSVE
+} // End HasSVEorSME
-let Predicates = [HasSVEorStreamingSVE, UseExperimentalZeroingPseudos] in {
+let Predicates = [HasSVEorSME, UseExperimentalZeroingPseudos] in {
defm ASR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_asr>;
defm LSR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsr>;
defm LSL_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsl>;
defm ASRD_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<AArch64asrd_m1>;
-} // End HasSVEorStreamingSVE, UseExperimentalZeroingPseudos
+} // End HasSVEorSME, UseExperimentalZeroingPseudos
-let Predicates = [HasSVEorStreamingSVE] in {
+let Predicates = [HasSVEorSME] in {
defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr", "ASR_ZPZZ", int_aarch64_sve_asr, "ASRR_ZPmZ">;
defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr", "LSR_ZPZZ", int_aarch64_sve_lsr, "LSRR_ZPmZ">;
defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl", "LSL_ZPZZ", int_aarch64_sve_lsl, "LSLR_ZPmZ">;
@@ -1679,60 +1889,61 @@ let Predicates = [HasSVEorStreamingSVE] in {
defm FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1111110, "fcvtzs", ZPR64, ZPR64, null_frag, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;
defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1111111, "fcvtzu", ZPR64, ZPR64, null_frag, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;
- def : Pat<(nxv2f32 (AArch64fcvte_mt (nxv2i1 PPR:$Pg), (nxv2f16 ZPR:$Zs), (nxv2f32 ZPR:$Zd))),
- (FCVT_ZPmZ_HtoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
+ //These patterns exist to improve the code quality of conversions on unpacked types.
+ def : Pat<(nxv2f32 (AArch64fcvte_mt (nxv2i1 (SVEAllActive):$Pg), (nxv2f16 ZPR:$Zs), (nxv2f32 ZPR:$Zd))),
+ (FCVT_ZPmZ_HtoS_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
// FP_ROUND has an additional 'precise' flag which indicates the type of rounding.
// This is ignored by the pattern below where it is matched by (i64 timm0_1)
- def : Pat<(nxv2f16 (AArch64fcvtr_mt (nxv2i1 PPR:$Pg), (nxv2f32 ZPR:$Zs), (i64 timm0_1), (nxv2f16 ZPR:$Zd))),
- (FCVT_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
+ def : Pat<(nxv2f16 (AArch64fcvtr_mt (nxv2i1 (SVEAllActive):$Pg), (nxv2f32 ZPR:$Zs), (i64 timm0_1), (nxv2f16 ZPR:$Zd))),
+ (FCVT_ZPmZ_StoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
- // Floating-point -> signed integer
- def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 PPR:$Pg),
+ // Signed integer -> Floating-point
+ def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 (SVEAllActive):$Pg),
(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i16), (nxv2f16 ZPR:$Zd))),
- (SCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
+ (SCVTF_ZPmZ_HtoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
- def : Pat<(nxv4f16 (AArch64scvtf_mt (nxv4i1 PPR:$Pg),
+ def : Pat<(nxv4f16 (AArch64scvtf_mt (nxv4i1 (SVEAllActive):$Pg),
(sext_inreg (nxv4i32 ZPR:$Zs), nxv4i16), (nxv4f16 ZPR:$Zd))),
- (SCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
+ (SCVTF_ZPmZ_HtoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
- def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 PPR:$Pg),
+ def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 (SVEAllActive):$Pg),
(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f16 ZPR:$Zd))),
- (SCVTF_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
+ (SCVTF_ZPmZ_StoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
- def : Pat<(nxv2f32 (AArch64scvtf_mt (nxv2i1 PPR:$Pg),
+ def : Pat<(nxv2f32 (AArch64scvtf_mt (nxv2i1 (SVEAllActive):$Pg),
(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f32 ZPR:$Zd))),
- (SCVTF_ZPmZ_StoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
+ (SCVTF_ZPmZ_StoS_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
- def : Pat<(nxv2f64 (AArch64scvtf_mt (nxv2i1 PPR:$Pg),
+ def : Pat<(nxv2f64 (AArch64scvtf_mt (nxv2i1 (SVEAllActive):$Pg),
(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f64 ZPR:$Zd))),
- (SCVTF_ZPmZ_StoD ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
+ (SCVTF_ZPmZ_StoD_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
- // Floating-point -> unsigned integer
- def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg),
+ // Unsigned integer -> Floating-point
+ def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive):$Pg),
(and (nxv2i64 ZPR:$Zs),
- (nxv2i64 (AArch64dup (i64 0xFFFF)))), (nxv2f16 ZPR:$Zd))),
- (UCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
+ (nxv2i64 (splat_vector (i64 0xFFFF)))), (nxv2f16 ZPR:$Zd))),
+ (UCVTF_ZPmZ_HtoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
- def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg),
+ def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive):$Pg),
(and (nxv2i64 ZPR:$Zs),
- (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f16 ZPR:$Zd))),
- (UCVTF_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
+ (nxv2i64 (splat_vector (i64 0xFFFFFFFF)))), (nxv2f16 ZPR:$Zd))),
+ (UCVTF_ZPmZ_StoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
- def : Pat<(nxv4f16 (AArch64ucvtf_mt (nxv4i1 PPR:$Pg),
+ def : Pat<(nxv4f16 (AArch64ucvtf_mt (nxv4i1 (SVEAllActive):$Pg),
(and (nxv4i32 ZPR:$Zs),
- (nxv4i32 (AArch64dup (i32 0xFFFF)))), (nxv4f16 ZPR:$Zd))),
- (UCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
+ (nxv4i32 (splat_vector (i32 0xFFFF)))), (nxv4f16 ZPR:$Zd))),
+ (UCVTF_ZPmZ_HtoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
- def : Pat<(nxv2f32 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg),
+ def : Pat<(nxv2f32 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive):$Pg),
(and (nxv2i64 ZPR:$Zs),
- (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f32 ZPR:$Zd))),
- (UCVTF_ZPmZ_StoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
+ (nxv2i64 (splat_vector (i64 0xFFFFFFFF)))), (nxv2f32 ZPR:$Zd))),
+ (UCVTF_ZPmZ_StoS_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
- def : Pat<(nxv2f64 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg),
+ def : Pat<(nxv2f64 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive):$Pg),
(and (nxv2i64 ZPR:$Zs),
- (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f64 ZPR:$Zd))),
- (UCVTF_ZPmZ_StoD ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
+ (nxv2i64 (splat_vector (i64 0xFFFFFFFF)))), (nxv2f64 ZPR:$Zd))),
+ (UCVTF_ZPmZ_StoD_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", AArch64frintn_mt>;
defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", AArch64frintp_mt>;
@@ -1743,27 +1954,27 @@ let Predicates = [HasSVEorStreamingSVE] in {
defm FRINTI_ZPmZ : sve_fp_2op_p_zd_HSD<0b00111, "frinti", AArch64frinti_mt>;
defm FRECPX_ZPmZ : sve_fp_2op_p_zd_HSD<0b01100, "frecpx", AArch64frecpx_mt>;
defm FSQRT_ZPmZ : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt", AArch64fsqrt_mt>;
-} // End HasSVEorStreamingSVE
+} // End HasSVEorSME
-let Predicates = [HasBF16, HasSVEorStreamingSVE] in {
+let Predicates = [HasBF16, HasSVEorSME] in {
defm BFDOT_ZZZ : sve_bfloat_dot<"bfdot", int_aarch64_sve_bfdot>;
defm BFDOT_ZZI : sve_bfloat_dot_indexed<"bfdot", int_aarch64_sve_bfdot_lane>;
-} // End HasBF16, HasSVEorStreamingSVE
+} // End HasBF16, HasSVEorSME
let Predicates = [HasBF16, HasSVE] in {
defm BFMMLA_ZZZ : sve_bfloat_matmul<"bfmmla", int_aarch64_sve_bfmmla>;
} // End HasBF16, HasSVE
-let Predicates = [HasBF16, HasSVEorStreamingSVE] in {
- defm BFMMLA_B_ZZZ : sve_bfloat_matmul_longvecl<0b0, "bfmlalb", int_aarch64_sve_bfmlalb>;
- defm BFMMLA_T_ZZZ : sve_bfloat_matmul_longvecl<0b1, "bfmlalt", int_aarch64_sve_bfmlalt>;
- defm BFMMLA_B_ZZI : sve_bfloat_matmul_longvecl_idx<0b0, "bfmlalb", int_aarch64_sve_bfmlalb_lane>;
- defm BFMMLA_T_ZZI : sve_bfloat_matmul_longvecl_idx<0b1, "bfmlalt", int_aarch64_sve_bfmlalt_lane>;
+let Predicates = [HasBF16, HasSVEorSME] in {
+ defm BFMLALB_ZZZ : sve_bfloat_matmul_longvecl<0b0, "bfmlalb", int_aarch64_sve_bfmlalb>;
+ defm BFMLALT_ZZZ : sve_bfloat_matmul_longvecl<0b1, "bfmlalt", int_aarch64_sve_bfmlalt>;
+ defm BFMLALB_ZZI : sve_bfloat_matmul_longvecl_idx<0b0, "bfmlalb", int_aarch64_sve_bfmlalb_lane>;
+ defm BFMLALT_ZZI : sve_bfloat_matmul_longvecl_idx<0b1, "bfmlalt", int_aarch64_sve_bfmlalt_lane>;
defm BFCVT_ZPmZ : sve_bfloat_convert<0b1, "bfcvt", int_aarch64_sve_fcvt_bf16f32>;
defm BFCVTNT_ZPmZ : sve_bfloat_convert<0b0, "bfcvtnt", int_aarch64_sve_fcvtnt_bf16f32>;
-} // End HasBF16, HasSVEorStreamingSVE
+} // End HasBF16, HasSVEorSME
-let Predicates = [HasSVEorStreamingSVE] in {
+let Predicates = [HasSVEorSME] in {
// InstAliases
def : InstAlias<"mov $Zd, $Zn",
(ORR_ZZZ ZPR64:$Zd, ZPR64:$Zn, ZPR64:$Zn), 1>;
@@ -1875,7 +2086,7 @@ let Predicates = [HasSVEorStreamingSVE] in {
let AddedComplexity = 1 in {
class LD1RPat<ValueType vt, SDPatternOperator operator,
Instruction load, Instruction ptrue, ValueType index_vt, ComplexPattern CP, Operand immtype> :
- Pat<(vt (AArch64dup (index_vt (operator (CP GPR64:$base, immtype:$offset))))),
+ Pat<(vt (splat_vector (index_vt (operator (CP GPR64:$base, immtype:$offset))))),
(load (ptrue 31), GPR64:$base, $offset)>;
}
@@ -1963,22 +2174,22 @@ let Predicates = [HasSVEorStreamingSVE] in {
GPR32:$op, sub_32), $imm),
sub_32))>;
- def : Pat<(nxv8i16 (add ZPR:$op, (nxv8i16 (AArch64dup (i32 (trunc (vscale (sve_cnth_imm i32:$imm)))))))),
+ def : Pat<(nxv8i16 (add ZPR:$op, (nxv8i16 (splat_vector (i32 (trunc (vscale (sve_cnth_imm i32:$imm)))))))),
(INCH_ZPiI ZPR:$op, 31, $imm)>;
- def : Pat<(nxv4i32 (add ZPR:$op, (nxv4i32 (AArch64dup (i32 (trunc (vscale (sve_cntw_imm i32:$imm)))))))),
+ def : Pat<(nxv4i32 (add ZPR:$op, (nxv4i32 (splat_vector (i32 (trunc (vscale (sve_cntw_imm i32:$imm)))))))),
(INCW_ZPiI ZPR:$op, 31, $imm)>;
- def : Pat<(nxv2i64 (add ZPR:$op, (nxv2i64 (AArch64dup (i64 (vscale (sve_cntd_imm i32:$imm))))))),
+ def : Pat<(nxv2i64 (add ZPR:$op, (nxv2i64 (splat_vector (i64 (vscale (sve_cntd_imm i32:$imm))))))),
(INCD_ZPiI ZPR:$op, 31, $imm)>;
- def : Pat<(nxv8i16 (sub ZPR:$op, (nxv8i16 (AArch64dup (i32 (trunc (vscale (sve_cnth_imm i32:$imm)))))))),
+ def : Pat<(nxv8i16 (sub ZPR:$op, (nxv8i16 (splat_vector (i32 (trunc (vscale (sve_cnth_imm i32:$imm)))))))),
(DECH_ZPiI ZPR:$op, 31, $imm)>;
- def : Pat<(nxv4i32 (sub ZPR:$op, (nxv4i32 (AArch64dup (i32 (trunc (vscale (sve_cntw_imm i32:$imm)))))))),
+ def : Pat<(nxv4i32 (sub ZPR:$op, (nxv4i32 (splat_vector (i32 (trunc (vscale (sve_cntw_imm i32:$imm)))))))),
(DECW_ZPiI ZPR:$op, 31, $imm)>;
- def : Pat<(nxv2i64 (sub ZPR:$op, (nxv2i64 (AArch64dup (i64 (vscale (sve_cntd_imm i32:$imm))))))),
+ def : Pat<(nxv2i64 (sub ZPR:$op, (nxv2i64 (splat_vector (i64 (vscale (sve_cntd_imm i32:$imm))))))),
(DECD_ZPiI ZPR:$op, 31, $imm)>;
}
- let Predicates = [HasSVEorStreamingSVE, UseScalarIncVL], AddedComplexity = 5 in {
+ let Predicates = [HasSVEorSME, UseScalarIncVL], AddedComplexity = 5 in {
def : Pat<(add GPR64:$op, (vscale (sve_cnth_imm i32:$imm))),
(INCH_XPiI GPR64:$op, 31, $imm)>;
def : Pat<(add GPR64:$op, (vscale (sve_cntw_imm i32:$imm))),
@@ -2098,15 +2309,23 @@ let Predicates = [HasSVEorStreamingSVE] in {
def : Pat<(nxv16i1 (reinterpret_cast (nxv8i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
def : Pat<(nxv16i1 (reinterpret_cast (nxv4i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
def : Pat<(nxv16i1 (reinterpret_cast (nxv2i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
+ def : Pat<(nxv16i1 (reinterpret_cast (nxv1i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
def : Pat<(nxv8i1 (reinterpret_cast (nxv16i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
def : Pat<(nxv8i1 (reinterpret_cast (nxv4i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
def : Pat<(nxv8i1 (reinterpret_cast (nxv2i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
+ def : Pat<(nxv8i1 (reinterpret_cast (nxv1i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
def : Pat<(nxv4i1 (reinterpret_cast (nxv16i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
def : Pat<(nxv4i1 (reinterpret_cast (nxv8i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
def : Pat<(nxv4i1 (reinterpret_cast (nxv2i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
+ def : Pat<(nxv4i1 (reinterpret_cast (nxv1i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
def : Pat<(nxv2i1 (reinterpret_cast (nxv16i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
def : Pat<(nxv2i1 (reinterpret_cast (nxv8i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
def : Pat<(nxv2i1 (reinterpret_cast (nxv4i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
+ def : Pat<(nxv2i1 (reinterpret_cast (nxv1i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
+ def : Pat<(nxv1i1 (reinterpret_cast (nxv16i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
+ def : Pat<(nxv1i1 (reinterpret_cast (nxv8i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
+ def : Pat<(nxv1i1 (reinterpret_cast (nxv4i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
+ def : Pat<(nxv1i1 (reinterpret_cast (nxv2i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
// These allow casting from/to unpacked floating-point types.
def : Pat<(nxv2f16 (reinterpret_cast (nxv8f16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
@@ -2145,12 +2364,12 @@ let Predicates = [HasSVEorStreamingSVE] in {
}
// 2-element contiguous loads
- defm : pred_load<nxv2i64, nxv2i1, zext_masked_load_i8, LD1B_D, LD1B_D_IMM, am_sve_regreg_lsl0>;
- defm : pred_load<nxv2i64, nxv2i1, asext_masked_load_i8, LD1SB_D, LD1SB_D_IMM, am_sve_regreg_lsl0>;
- defm : pred_load<nxv2i64, nxv2i1, zext_masked_load_i16, LD1H_D, LD1H_D_IMM, am_sve_regreg_lsl1>;
- defm : pred_load<nxv2i64, nxv2i1, asext_masked_load_i16, LD1SH_D, LD1SH_D_IMM, am_sve_regreg_lsl1>;
- defm : pred_load<nxv2i64, nxv2i1, zext_masked_load_i32, LD1W_D, LD1W_D_IMM, am_sve_regreg_lsl2>;
- defm : pred_load<nxv2i64, nxv2i1, asext_masked_load_i32, LD1SW_D, LD1SW_D_IMM, am_sve_regreg_lsl2>;
+ defm : pred_load<nxv2i64, nxv2i1, azext_masked_load_i8, LD1B_D, LD1B_D_IMM, am_sve_regreg_lsl0>;
+ defm : pred_load<nxv2i64, nxv2i1, sext_masked_load_i8, LD1SB_D, LD1SB_D_IMM, am_sve_regreg_lsl0>;
+ defm : pred_load<nxv2i64, nxv2i1, azext_masked_load_i16, LD1H_D, LD1H_D_IMM, am_sve_regreg_lsl1>;
+ defm : pred_load<nxv2i64, nxv2i1, sext_masked_load_i16, LD1SH_D, LD1SH_D_IMM, am_sve_regreg_lsl1>;
+ defm : pred_load<nxv2i64, nxv2i1, azext_masked_load_i32, LD1W_D, LD1W_D_IMM, am_sve_regreg_lsl2>;
+ defm : pred_load<nxv2i64, nxv2i1, sext_masked_load_i32, LD1SW_D, LD1SW_D_IMM, am_sve_regreg_lsl2>;
defm : pred_load<nxv2i64, nxv2i1, nonext_masked_load, LD1D, LD1D_IMM, am_sve_regreg_lsl3>;
defm : pred_load<nxv2f16, nxv2i1, nonext_masked_load, LD1H_D, LD1H_D_IMM, am_sve_regreg_lsl1>;
defm : pred_load<nxv2bf16, nxv2i1, nonext_masked_load, LD1H_D, LD1H_D_IMM, am_sve_regreg_lsl1>;
@@ -2158,18 +2377,18 @@ let Predicates = [HasSVEorStreamingSVE] in {
defm : pred_load<nxv2f64, nxv2i1, nonext_masked_load, LD1D, LD1D_IMM, am_sve_regreg_lsl3>;
// 4-element contiguous loads
- defm : pred_load<nxv4i32, nxv4i1, zext_masked_load_i8, LD1B_S, LD1B_S_IMM, am_sve_regreg_lsl0>;
- defm : pred_load<nxv4i32, nxv4i1, asext_masked_load_i8, LD1SB_S, LD1SB_S_IMM, am_sve_regreg_lsl0>;
- defm : pred_load<nxv4i32, nxv4i1, zext_masked_load_i16, LD1H_S, LD1H_S_IMM, am_sve_regreg_lsl1>;
- defm : pred_load<nxv4i32, nxv4i1, asext_masked_load_i16, LD1SH_S, LD1SH_S_IMM, am_sve_regreg_lsl1>;
+ defm : pred_load<nxv4i32, nxv4i1, azext_masked_load_i8, LD1B_S, LD1B_S_IMM, am_sve_regreg_lsl0>;
+ defm : pred_load<nxv4i32, nxv4i1, sext_masked_load_i8, LD1SB_S, LD1SB_S_IMM, am_sve_regreg_lsl0>;
+ defm : pred_load<nxv4i32, nxv4i1, azext_masked_load_i16, LD1H_S, LD1H_S_IMM, am_sve_regreg_lsl1>;
+ defm : pred_load<nxv4i32, nxv4i1, sext_masked_load_i16, LD1SH_S, LD1SH_S_IMM, am_sve_regreg_lsl1>;
defm : pred_load<nxv4i32, nxv4i1, nonext_masked_load, LD1W, LD1W_IMM, am_sve_regreg_lsl2>;
defm : pred_load<nxv4f16, nxv4i1, nonext_masked_load, LD1H_S, LD1H_S_IMM, am_sve_regreg_lsl1>;
defm : pred_load<nxv4bf16, nxv4i1, nonext_masked_load, LD1H_S, LD1H_S_IMM, am_sve_regreg_lsl1>;
defm : pred_load<nxv4f32, nxv4i1, nonext_masked_load, LD1W, LD1W_IMM, am_sve_regreg_lsl2>;
// 8-element contiguous loads
- defm : pred_load<nxv8i16, nxv8i1, zext_masked_load_i8, LD1B_H, LD1B_H_IMM, am_sve_regreg_lsl0>;
- defm : pred_load<nxv8i16, nxv8i1, asext_masked_load_i8, LD1SB_H, LD1SB_H_IMM, am_sve_regreg_lsl0>;
+ defm : pred_load<nxv8i16, nxv8i1, azext_masked_load_i8, LD1B_H, LD1B_H_IMM, am_sve_regreg_lsl0>;
+ defm : pred_load<nxv8i16, nxv8i1, sext_masked_load_i8, LD1SB_H, LD1SB_H_IMM, am_sve_regreg_lsl0>;
defm : pred_load<nxv8i16, nxv8i1, nonext_masked_load, LD1H, LD1H_IMM, am_sve_regreg_lsl1>;
defm : pred_load<nxv8f16, nxv8i1, nonext_masked_load, LD1H, LD1H_IMM, am_sve_regreg_lsl1>;
defm : pred_load<nxv8bf16, nxv8i1, nonext_masked_load, LD1H, LD1H_IMM, am_sve_regreg_lsl1>;
@@ -2397,7 +2616,7 @@ let Predicates = [HasSVEorStreamingSVE] in {
// 16-element contiguous loads
defm : ld1<LD1B, LD1B_IMM, nxv16i8, AArch64ld1_z, nxv16i1, nxv16i8, am_sve_regreg_lsl0>;
-} // End HasSVEorStreamingSVE
+} // End HasSVEorSME
let Predicates = [HasSVE] in {
multiclass ldnf1<Instruction I, ValueType Ty, SDPatternOperator Load, ValueType PredTy, ValueType MemVT> {
@@ -2482,7 +2701,7 @@ let Predicates = [HasSVE] in {
defm : ldff1<LDFF1B, nxv16i8, AArch64ldff1_z, nxv16i1, nxv16i8, am_sve_regreg_lsl0>;
} // End HasSVE
-let Predicates = [HasSVEorStreamingSVE] in {
+let Predicates = [HasSVEorSME] in {
multiclass st1<Instruction RegRegInst, Instruction RegImmInst, ValueType Ty,
SDPatternOperator Store, ValueType PredTy, ValueType MemVT, ComplexPattern AddrCP> {
// reg + reg
@@ -2716,7 +2935,7 @@ let Predicates = [HasSVEorStreamingSVE] in {
def : Pat<(vector_extract (nxv2f64 ZPR:$Zs), (i64 0)),
(f64 (EXTRACT_SUBREG ZPR:$Zs, dsub))>;
}
-} // End HasSVEorStreamingSVE
+} // End HasSVEorSME
let Predicates = [HasSVE, HasMatMulInt8] in {
defm SMMLA_ZZZ : sve_int_matmul<0b00, "smmla", int_aarch64_sve_smmla>;
@@ -2724,11 +2943,11 @@ let Predicates = [HasSVE, HasMatMulInt8] in {
defm USMMLA_ZZZ : sve_int_matmul<0b10, "usmmla", int_aarch64_sve_usmmla>;
} // End HasSVE, HasMatMulInt8
-let Predicates = [HasSVEorStreamingSVE, HasMatMulInt8] in {
+let Predicates = [HasSVEorSME, HasMatMulInt8] in {
defm USDOT_ZZZ : sve_int_dot_mixed<"usdot", int_aarch64_sve_usdot>;
defm USDOT_ZZZI : sve_int_dot_mixed_indexed<0, "usdot", int_aarch64_sve_usdot_lane>;
defm SUDOT_ZZZI : sve_int_dot_mixed_indexed<1, "sudot", int_aarch64_sve_sudot_lane>;
-} // End HasSVEorStreamingSVE, HasMatMulInt8
+} // End HasSVEorSME, HasMatMulInt8
let Predicates = [HasSVE, HasMatMulFP32] in {
defm FMMLA_ZZZ_S : sve_fp_matrix_mla<0, "fmmla", ZPR32, int_aarch64_sve_fmmla, nxv4f32>;
@@ -2746,16 +2965,16 @@ let Predicates = [HasSVE, HasMatMulFP64] in {
defm LD1RO_D : sve_mem_ldor_ss<0b11, "ld1rod", Z_d, ZPR64, GPR64NoXZRshifted64, nxv2i64, nxv2i1, AArch64ld1ro_z, am_sve_regreg_lsl3>;
} // End HasSVE, HasMatMulFP64
-let Predicates = [HasSVEorStreamingSVE, HasMatMulFP64] in {
+let Predicates = [HasSVEorSME, HasMatMulFP64] in {
defm ZIP1_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b00, 0, "zip1", int_aarch64_sve_zip1q>;
defm ZIP2_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b00, 1, "zip2", int_aarch64_sve_zip2q>;
defm UZP1_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b01, 0, "uzp1", int_aarch64_sve_uzp1q>;
defm UZP2_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b01, 1, "uzp2", int_aarch64_sve_uzp2q>;
defm TRN1_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b11, 0, "trn1", int_aarch64_sve_trn1q>;
defm TRN2_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b11, 1, "trn2", int_aarch64_sve_trn2q>;
-} // End HasSVEorStreamingSVE, HasMatMulFP64
+} // End HasSVEorSME, HasMatMulFP64
-let Predicates = [HasSVE2orStreamingSVE] in {
+let Predicates = [HasSVE2orSME] in {
// SVE2 integer multiply-add (indexed)
defm MLA_ZZZI : sve2_int_mla_by_indexed_elem<0b01, 0b0, "mla", int_aarch64_sve_mla_lane>;
defm MLS_ZZZI : sve2_int_mla_by_indexed_elem<0b01, 0b1, "mls", int_aarch64_sve_mls_lane>;
@@ -2903,17 +3122,17 @@ let Predicates = [HasSVE2orStreamingSVE] in {
defm UQSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd<int_aarch64_sve_uqshl>;
defm SQRSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd<int_aarch64_sve_sqrshl>;
defm UQRSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd<int_aarch64_sve_uqrshl>;
-} // End HasSVE2orStreamingSVE
+} // End HasSVE2orSME
-let Predicates = [HasSVE2orStreamingSVE, UseExperimentalZeroingPseudos] in {
+let Predicates = [HasSVE2orSME, UseExperimentalZeroingPseudos] in {
defm SQSHL_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd<null_frag>;
defm UQSHL_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd<null_frag>;
defm SRSHR_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<int_aarch64_sve_srshr>;
defm URSHR_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<int_aarch64_sve_urshr>;
defm SQSHLU_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd<int_aarch64_sve_sqshlu>;
-} // End HasSVE2orStreamingSVE, UseExperimentalZeroingPseudos
+} // End HasSVE2orSME, UseExperimentalZeroingPseudos
-let Predicates = [HasSVE2orStreamingSVE] in {
+let Predicates = [HasSVE2orSME] in {
// SVE2 predicated shifts
defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left_dup<0b0110, "sqshl", "SQSHL_ZPZI", int_aarch64_sve_sqshl>;
defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left_dup<0b0111, "uqshl", "UQSHL_ZPZI", int_aarch64_sve_uqshl>;
@@ -2960,18 +3179,18 @@ let Predicates = [HasSVE2orStreamingSVE] in {
defm SLI_ZZI : sve2_int_bin_shift_imm_left< 0b1, "sli", int_aarch64_sve_sli>;
// SVE2 bitwise shift right and accumulate
- defm SSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b00, "ssra", int_aarch64_sve_ssra>;
- defm USRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b01, "usra", int_aarch64_sve_usra>;
- defm SRSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b10, "srsra", int_aarch64_sve_srsra>;
- defm URSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b11, "ursra", int_aarch64_sve_ursra>;
+ defm SSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b00, "ssra", AArch64ssra>;
+ defm USRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b01, "usra", AArch64usra>;
+ defm SRSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b10, "srsra", int_aarch64_sve_srsra, int_aarch64_sve_srshr>;
+ defm URSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b11, "ursra", int_aarch64_sve_ursra, int_aarch64_sve_urshr>;
// SVE2 complex integer add
defm CADD_ZZI : sve2_int_cadd<0b0, "cadd", int_aarch64_sve_cadd_x>;
defm SQCADD_ZZI : sve2_int_cadd<0b1, "sqcadd", int_aarch64_sve_sqcadd_x>;
// SVE2 integer absolute difference and accumulate
- defm SABA_ZZZ : sve2_int_absdiff_accum<0b0, "saba", int_aarch64_sve_saba>;
- defm UABA_ZZZ : sve2_int_absdiff_accum<0b1, "uaba", int_aarch64_sve_uaba>;
+ defm SABA_ZZZ : sve2_int_absdiff_accum<0b0, "saba", AArch64saba>;
+ defm UABA_ZZZ : sve2_int_absdiff_accum<0b1, "uaba", AArch64uaba>;
// SVE2 integer absolute difference and accumulate long
defm SABALB_ZZZ : sve2_int_absdiff_accum_long<0b00, "sabalb", int_aarch64_sve_sabalb>;
@@ -3026,7 +3245,7 @@ let Predicates = [HasSVE2orStreamingSVE] in {
defm SQXTNT_ZZ : sve2_int_sat_extract_narrow_top<0b00, "sqxtnt", int_aarch64_sve_sqxtnt>;
defm UQXTNT_ZZ : sve2_int_sat_extract_narrow_top<0b01, "uqxtnt", int_aarch64_sve_uqxtnt>;
defm SQXTUNT_ZZ : sve2_int_sat_extract_narrow_top<0b10, "sqxtunt", int_aarch64_sve_sqxtunt>;
-} // End HasSVE2orStreamingSVE
+} // End HasSVE2orSME
let Predicates = [HasSVE2] in {
// SVE2 character match
@@ -3034,7 +3253,7 @@ let Predicates = [HasSVE2] in {
defm NMATCH_PPzZZ : sve2_char_match<0b1, "nmatch", int_aarch64_sve_nmatch>;
} // End HasSVE2
-let Predicates = [HasSVE2orStreamingSVE] in {
+let Predicates = [HasSVE2orSME] in {
// SVE2 bitwise exclusive-or interleaved
defm EORBT_ZZZ : sve2_bitwise_xor_interleaved<0b0, "eorbt", int_aarch64_sve_eorbt>;
defm EORTB_ZZZ : sve2_bitwise_xor_interleaved<0b1, "eortb", int_aarch64_sve_eortb>;
@@ -3049,7 +3268,7 @@ let Predicates = [HasSVE2orStreamingSVE] in {
defm SADDLBT_ZZZ : sve2_misc_int_addsub_long_interleaved<0b00, "saddlbt", int_aarch64_sve_saddlbt>;
defm SSUBLBT_ZZZ : sve2_misc_int_addsub_long_interleaved<0b10, "ssublbt", int_aarch64_sve_ssublbt>;
defm SSUBLTB_ZZZ : sve2_misc_int_addsub_long_interleaved<0b11, "ssubltb", int_aarch64_sve_ssubltb>;
-} // End HasSVE2orStreamingSVE
+} // End HasSVE2orSME
let Predicates = [HasSVE2] in {
// SVE2 histogram generation (segment)
@@ -3059,7 +3278,7 @@ let Predicates = [HasSVE2] in {
defm HISTCNT_ZPzZZ : sve2_hist_gen_vector<"histcnt", int_aarch64_sve_histcnt>;
} // End HasSVE2
-let Predicates = [HasSVE2orStreamingSVE] in {
+let Predicates = [HasSVE2orSME] in {
// SVE2 floating-point base 2 logarithm as integer
defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb", int_aarch64_sve_flogb>;
@@ -3091,7 +3310,7 @@ let Predicates = [HasSVE2orStreamingSVE] in {
// SVE2 bitwise ternary operations
defm EOR3_ZZZZ : sve2_int_bitwise_ternary_op<0b000, "eor3", int_aarch64_sve_eor3>;
defm BCAX_ZZZZ : sve2_int_bitwise_ternary_op<0b010, "bcax", int_aarch64_sve_bcax>;
- defm BSL_ZZZZ : sve2_int_bitwise_ternary_op<0b001, "bsl", int_aarch64_sve_bsl>;
+ defm BSL_ZZZZ : sve2_int_bitwise_ternary_op<0b001, "bsl", int_aarch64_sve_bsl, AArch64bsp>;
defm BSL1N_ZZZZ : sve2_int_bitwise_ternary_op<0b011, "bsl1n", int_aarch64_sve_bsl1n>;
defm BSL2N_ZZZZ : sve2_int_bitwise_ternary_op<0b101, "bsl2n", int_aarch64_sve_bsl2n>;
defm NBSL_ZZZZ : sve2_int_bitwise_ternary_op<0b111, "nbsl", int_aarch64_sve_nbsl>;
@@ -3101,7 +3320,7 @@ let Predicates = [HasSVE2orStreamingSVE] in {
// SVE2 extract vector (immediate offset, constructive)
def EXT_ZZI_B : sve2_int_perm_extract_i_cons<"ext">;
-} // End HasSVE2orStreamingSVE
+} // End HasSVE2orSME
let Predicates = [HasSVE2] in {
// SVE2 non-temporal gather loads
@@ -3120,10 +3339,10 @@ let Predicates = [HasSVE2] in {
defm LDNT1D_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b11110, "ldnt1d", AArch64ldnt1_gather_z, nxv2i64>;
} // End HasSVE2
-let Predicates = [HasSVE2orStreamingSVE] in {
+let Predicates = [HasSVE2orSME] in {
// SVE2 vector splice (constructive)
defm SPLICE_ZPZZ : sve2_int_perm_splice_cons<"splice">;
-} // End HasSVE2orStreamingSVE
+} // End HasSVE2orSME
let Predicates = [HasSVE2] in {
// SVE2 non-temporal scatter stores
@@ -3137,7 +3356,7 @@ let Predicates = [HasSVE2] in {
defm STNT1D_ZZR_D : sve2_mem_sstnt_vs_64_ptrs<0b110, "stnt1d", AArch64stnt1_scatter, nxv2i64>;
} // End HasSVE2
-let Predicates = [HasSVE2orStreamingSVE] in {
+let Predicates = [HasSVE2orSME] in {
// SVE2 table lookup (three sources)
defm TBL_ZZZZ : sve2_int_perm_tbl<"tbl", int_aarch64_sve_tbl2>;
defm TBX_ZZZ : sve2_int_perm_tbx<"tbx", int_aarch64_sve_tbx>;
@@ -3156,7 +3375,7 @@ let Predicates = [HasSVE2orStreamingSVE] in {
// SVE2 pointer conflict compare
defm WHILEWR_PXX : sve2_int_while_rr<0b0, "whilewr", "int_aarch64_sve_whilewr">;
defm WHILERW_PXX : sve2_int_while_rr<0b1, "whilerw", "int_aarch64_sve_whilerw">;
-} // End HasSVE2orStreamingSVE
+} // End HasSVE2orSME
let Predicates = [HasSVE2AES] in {
// SVE2 crypto destructive binary operations
diff --git a/llvm/lib/Target/AArch64/AArch64SchedA55.td b/llvm/lib/Target/AArch64/AArch64SchedA55.td
index 009219ce3c54..c6b112d0d2f1 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedA55.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA55.td
@@ -6,7 +6,10 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines the machine model for the ARM Cortex-A55 processors.
+// This file defines the machine model for the ARM Cortex-A55 processors. Note
+// that this schedule is currently used as the default for -mcpu=generic. As a
+// result, some of the modelling decision made do not precisely model the
+// Cortex-A55, instead aiming to be a good compromise between different cpus.
//
//===----------------------------------------------------------------------===//
@@ -149,8 +152,31 @@ def : WriteRes<WriteFCmp, [CortexA55UnitFPALU]> { let Latency = 3; }
def : WriteRes<WriteFCvt, [CortexA55UnitFPALU]> { let Latency = 4; }
def : WriteRes<WriteFCopy, [CortexA55UnitFPALU]> { let Latency = 3; }
def : WriteRes<WriteFImm, [CortexA55UnitFPALU]> { let Latency = 3; }
-def : WriteRes<WriteVd, [CortexA55UnitFPALU]> { let Latency = 4; }
-def : WriteRes<WriteVq, [CortexA55UnitFPALU,CortexA55UnitFPALU]> { let Latency = 4; let BeginGroup = 1; }
+
+// NEON
+class CortexA55WriteVd<int n, ProcResourceKind res> : SchedWriteRes<[res]> {
+ let Latency = n;
+}
+class CortexA55WriteVq<int n, ProcResourceKind res> : SchedWriteRes<[res, res]> {
+ let Latency = n;
+ let BeginGroup = 1;
+}
+def CortexA55WriteDotScVq_4 : CortexA55WriteVq<4, CortexA55UnitFPALU>;
+def CortexA55WriteDotVq_4 : CortexA55WriteVq<4, CortexA55UnitFPALU>;
+def CortexA55WriteDotVd_4 : CortexA55WriteVd<4, CortexA55UnitFPALU>;
+def CortexA55WriteMlaLVq_4 : CortexA55WriteVq<4, CortexA55UnitFPALU>;
+def CortexA55WriteMlaIxVq_4 : CortexA55WriteVq<4, CortexA55UnitFPALU>;
+def CortexA55WriteMlaVq_4 : CortexA55WriteVq<4, CortexA55UnitFPALU>;
+def CortexA55WriteMlaVd_4 : CortexA55WriteVd<4, CortexA55UnitFPALU>;
+def CortexA55WriteAluVq_4 : CortexA55WriteVq<4, CortexA55UnitFPALU>;
+def CortexA55WriteAluVd_3 : CortexA55WriteVd<3, CortexA55UnitFPALU>;
+def CortexA55WriteAluVq_3 : CortexA55WriteVq<3, CortexA55UnitFPALU>;
+def CortexA55WriteAluVd_2 : CortexA55WriteVd<2, CortexA55UnitFPALU>;
+def CortexA55WriteAluVq_2 : CortexA55WriteVq<2, CortexA55UnitFPALU>;
+def CortexA55WriteAluVd_1 : CortexA55WriteVd<1, CortexA55UnitFPALU>;
+def CortexA55WriteAluVq_1 : CortexA55WriteVq<1, CortexA55UnitFPALU>;
+def : SchedAlias<WriteVd, CortexA55WriteVd<4, CortexA55UnitFPALU>>;
+def : SchedAlias<WriteVq, CortexA55WriteVq<4, CortexA55UnitFPALU>>;
// FP ALU specific new schedwrite definitions
def CortexA55WriteFPALU_F2 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 2;}
@@ -358,4 +384,99 @@ def : InstRW<[CortexA55WriteFSqrtHP], (instregex "^.*SQRT.*16$")>;
def : InstRW<[CortexA55WriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
def : InstRW<[CortexA55WriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
+// 4.15. Advanced SIMD integer instructions
+// ASIMD absolute diff
+def : InstRW<[CortexA55WriteAluVd_3], (instregex "[SU]ABDv(2i32|4i16|8i8)")>;
+def : InstRW<[CortexA55WriteAluVq_3], (instregex "[SU]ABDv(16i8|4i32|8i16)")>;
+// ASIMD absolute diff accum
+def : InstRW<[CortexA55WriteAluVq_4], (instregex "[SU]ABAL?v")>;
+// ASIMD absolute diff long
+def : InstRW<[CortexA55WriteAluVq_3], (instregex "[SU]ABDLv")>;
+// ASIMD arith #1
+def : InstRW<[CortexA55WriteAluVd_2], (instregex "(ADD|SUB|NEG)v(1i64|2i32|4i16|8i8)",
+ "[SU]R?HADDv(2i32|4i16|8i8)", "[SU]HSUBv(2i32|4i16|8i8)")>;
+def : InstRW<[CortexA55WriteAluVq_2], (instregex "(ADD|SUB|NEG)v(2i64|4i32|8i16|16i8)",
+ "[SU]R?HADDv(8i16|4i32|16i8)", "[SU]HSUBv(8i16|4i32|16i8)")>;
+// ASIMD arith #2
+def : InstRW<[CortexA55WriteAluVd_3], (instregex "ABSv(1i64|2i32|4i16|8i8)$",
+ "[SU]ADDLPv(2i32_v1i64|4i16_v2i32|8i8_v4i16)$",
+ "([SU]QADD|[SU]QSUB|SQNEG|SUQADD|USQADD)v(1i16|1i32|1i64|1i8|2i32|4i16|8i8)$",
+ "ADDPv(2i32|4i16|8i8)$")>;
+def : InstRW<[CortexA55WriteAluVq_3], (instregex "ABSv(2i64|4i32|8i16|16i8)$",
+ "[SU]ADDLPv(16i8_v8i16|4i32_v2i64|8i16_v4i32)$",
+ "([SU]QADD|[SU]QSUB|SQNEG|SUQADD|USQADD)v(16i8|2i64|4i32|8i16)$",
+ "ADDPv(16i8|2i64|4i32|8i16)$")>;
+// ASIMD arith #3
+def : InstRW<[CortexA55WriteAluVq_3], (instregex "SADDLv", "UADDLv", "SADDWv",
+ "UADDWv", "SSUBLv", "USUBLv", "SSUBWv", "USUBWv", "ADDHNv", "SUBHNv")>;
+// ASIMD arith #5
+def : InstRW<[CortexA55WriteAluVq_4], (instregex "RADDHNv", "RSUBHNv")>;
+// ASIMD arith, reduce
+def : InstRW<[CortexA55WriteAluVq_3], (instregex "ADDVv", "SADDLVv", "UADDLVv")>;
+// ASIMD compare #1
+def : InstRW<[CortexA55WriteAluVd_2], (instregex "CM(EQ|GE|GT|HI|HS|LE|LT)v(1i64|2i32|4i16|8i8)")>;
+def : InstRW<[CortexA55WriteAluVq_2], (instregex "CM(EQ|GE|GT|HI|HS|LE|LT)v(2i64|4i32|8i16|16i8)")>;
+// ASIMD compare #2
+def : InstRW<[CortexA55WriteAluVd_3], (instregex "CMTSTv(1i64|2i32|4i16|8i8)")>;
+def : InstRW<[CortexA55WriteAluVq_3], (instregex "CMTSTv(2i64|4i32|8i16|16i8)")>;
+// ASIMD logical $1
+def : InstRW<[CortexA55WriteAluVd_1], (instregex "(AND|EOR|NOT|ORN)v8i8",
+ "(ORR|BIC)v(2i32|4i16|8i8)$", "MVNIv(2i|2s|4i16)")>;
+def : InstRW<[CortexA55WriteAluVq_1], (instregex "(AND|EOR|NOT|ORN)v16i8",
+ "(ORR|BIC)v(16i8|4i32|8i16)$", "MVNIv(4i32|4s|8i16)")>;
+// ASIMD max/min, basic
+def : InstRW<[CortexA55WriteAluVd_2], (instregex "[SU](MIN|MAX)P?v(2i32|4i16|8i8)")>;
+def : InstRW<[CortexA55WriteAluVq_2], (instregex "[SU](MIN|MAX)P?v(16i8|4i132|8i16)")>;
+// SIMD max/min, reduce
+def : InstRW<[CortexA55WriteAluVq_4], (instregex "[SU](MAX|MIN)Vv")>;
+// ASIMD multiply, by element
+def : InstRW<[CortexA55WriteAluVq_4], (instregex "MULv(2i32|4i16|4i32|8i16)_indexed$",
+ "SQR?DMULHv(1i16|1i32|2i32|4i16|4i32|8i16)_indexed$")>;
+// ASIMD multiply
+def : InstRW<[CortexA55WriteAluVd_3], (instrs PMULv8i8)>;
+def : InstRW<[CortexA55WriteAluVq_3], (instrs PMULv16i8)>;
+// ASIMD multiply accumulate
+def : InstRW<[CortexA55WriteMlaVd_4], (instregex "ML[AS]v(2i32|4i16|8i8)$")>;
+def : InstRW<[CortexA55WriteMlaVq_4], (instregex "ML[AS]v(16i8|4i32|8i16)$")>;
+def : InstRW<[CortexA55WriteMlaIxVq_4], (instregex "ML[AS]v(2i32|4i16|4i32|8i16)_indexed$")>;
+// ASIMD multiply accumulate half
+def : InstRW<[CortexA55WriteAluVq_4], (instregex "SQRDML[AS]H[vi]")>;
+// ASIMD multiply accumulate long
+def : InstRW<[CortexA55WriteMlaLVq_4], (instregex "[SU]ML[AS]Lv")>;
+// ASIMD multiply accumulate long #2
+def : InstRW<[CortexA55WriteAluVq_4], (instregex "SQDML[AS]L[iv]")>;
+// ASIMD dot product
+def : InstRW<[CortexA55WriteDotVd_4], (instregex "[SU]DOTv8i8")>;
+def : InstRW<[CortexA55WriteDotVq_4], (instregex "[SU]DOTv16i8")>;
+// ASIMD dot product, by scalar
+def : InstRW<[CortexA55WriteDotScVq_4], (instregex "[SU]DOTlanev")>;
+// ASIMD multiply long
+def : InstRW<[CortexA55WriteAluVq_4], (instregex "[SU]MULLv", "SQDMULL[iv]")>;
+// ASIMD polynomial (8x8) multiply long
+def : InstRW<[CortexA55WriteAluVq_3], (instrs PMULLv8i8, PMULLv16i8)>;
+// ASIMD pairwise add and accumulate
+def : InstRW<[CortexA55WriteAluVq_4], (instregex "[SU]ADALPv")>;
+// ASIMD shift accumulate
+def : InstRW<[CortexA55WriteAluVd_3], (instregex "[SU]SRA(d|v2i32|v4i16|v8i8)")>;
+def : InstRW<[CortexA55WriteAluVq_3], (instregex "[SU]SRAv(16i8|2i64|4i32|8i16)")>;
+// ASIMD shift accumulate #2
+def : InstRW<[CortexA55WriteAluVq_4], (instregex "[SU]RSRA[vd]")>;
+// ASIMD shift by immed
+def : InstRW<[CortexA55WriteAluVd_2], (instregex "SHLd$", "SHLv",
+ "SLId$", "SRId$", "[SU]SHR[vd]", "SHRNv")>;
+// ASIMD shift by immed
+// SXTL and UXTL are aliases for SHLL
+def : InstRW<[CortexA55WriteAluVq_2], (instregex "[US]?SHLLv")>;
+// ASIMD shift by immed #2
+def : InstRW<[CortexA55WriteAluVd_3], (instregex "[SU]RSHR(d|v2i32|v4i16|v8i8)",
+ "RSHRNv(2i32|4i16|8i8)")>;
+def : InstRW<[CortexA55WriteAluVq_3], (instregex "[SU]RSHRv(16i8|2i64|4i32|8i16)",
+ "RSHRNv(16i8|4i32|8i16)")>;
+// ASIMD shift by register
+def : InstRW<[CortexA55WriteAluVd_2], (instregex "[SU]SHLv(1i64|2i32|4i16|8i8)")>;
+def : InstRW<[CortexA55WriteAluVq_2], (instregex "[SU]SHLv(2i64|4i32|8i16|16i8)")>;
+// ASIMD shift by register #2
+def : InstRW<[CortexA55WriteAluVd_3], (instregex "[SU]RSHLv(1i64|2i32|4i16|8i8)")>;
+def : InstRW<[CortexA55WriteAluVq_3], (instregex "[SU]RSHLv(2i64|4i32|8i16|16i8)")>;
+
}
diff --git a/llvm/lib/Target/AArch64/AArch64SchedA64FX.td b/llvm/lib/Target/AArch64/AArch64SchedA64FX.td
index fa10d056b7f7..6b053f1969b4 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedA64FX.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA64FX.td
@@ -22,7 +22,7 @@ def A64FXModel : SchedMachineModel {
list<Predicate> UnsupportedFeatures =
[HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3, HasSVE2BitPerm, HasPAuth,
- HasSVE2orStreamingSVE];
+ HasSVE2orSME];
let FullInstRWOverlapCheck = 0;
}
@@ -3348,7 +3348,7 @@ def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFH_PRI)>;
def : InstRW<[A64FXWrite_10Cyc_GI056], (instrs PRFH_D_PZI, PRFH_S_PZI)>;
// [351] "prfw $prfop, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFS_PRR)>;
+def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFW_PRR)>;
// [352] "prfw $prfop, $Pg, [$Rn, $Zm]";
def : InstRW<[A64FXWrite_14Cyc_GI0256], (instrs PRFW_D_SCALED, PRFW_D_SXTW_SCALED, PRFW_D_UXTW_SCALED, PRFW_S_SXTW_SCALED, PRFW_S_UXTW_SCALED)>;
@@ -3554,7 +3554,7 @@ def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SQINCW_ZPiI)>;
def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1B, ST1B_D, ST1B_H, ST1B_S)>;
// [421] "st1b $Zt, $Pg, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1B_D_REAL, SST1B_D_SXTW, SST1B_D_UXTW, SST1B_S_SXTW, SST1B_S_UXTW)>;
+def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1B_D, SST1B_D_SXTW, SST1B_D_UXTW, SST1B_S_SXTW, SST1B_S_UXTW)>;
// [422] "st1b $Zt, $Pg, [$Rn, $imm4, mul vl]";
def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1B_D_IMM, ST1B_H_IMM, ST1B_IMM, ST1B_S_IMM)>;
@@ -3566,7 +3566,7 @@ def : InstRW<[A64FXWrite_ST1W_15], (instrs SST1B_D_IMM, SST1B_S_IMM)>;
def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1D)>;
// [425] "st1d $Zt, $Pg, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1D_REAL, SST1D_SCALED_SCALED_REAL, SST1D_SXTW, SST1D_SXTW_SCALED, SST1D_UXTW, SST1D_UXTW_SCALED)>;
+def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1D, SST1D_SCALED, SST1D_SXTW, SST1D_SXTW_SCALED, SST1D_UXTW, SST1D_UXTW_SCALED)>;
// [426] "st1d $Zt, $Pg, [$Rn, $imm4, mul vl]";
def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1D_IMM)>;
@@ -3578,7 +3578,7 @@ def : InstRW<[A64FXWrite_ST1W_15], (instrs SST1D_IMM)>;
def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1H, ST1H_D, ST1H_S)>;
// [429] "st1h $Zt, $Pg, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1H_D_REAL, SST1H_D_SCALED_SCALED_REAL, SST1H_D_SXTW, SST1H_D_SXTW_SCALED, SST1H_D_UXTW, SST1H_D_UXTW_SCALED, SST1H_S_SXTW, SST1H_S_SXTW_SCALED, SST1H_S_UXTW, SST1H_S_UXTW_SCALED)>;
+def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1H_D, SST1H_D_SCALED, SST1H_D_SXTW, SST1H_D_SXTW_SCALED, SST1H_D_UXTW, SST1H_D_UXTW_SCALED, SST1H_S_SXTW, SST1H_S_SXTW_SCALED, SST1H_S_UXTW, SST1H_S_UXTW_SCALED)>;
// [430] "st1h $Zt, $Pg, [$Rn, $imm4, mul vl]";
def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1H_D_IMM, ST1H_IMM, ST1H_S_IMM)>;
@@ -3590,7 +3590,7 @@ def : InstRW<[A64FXWrite_ST1W_15], (instrs SST1H_D_IMM, SST1H_S_IMM)>;
def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1W, ST1W_D)>;
// [433] "st1w $Zt, $Pg, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1W_D_REAL, SST1W_D_SCALED_SCALED_REAL, SST1W_D_SXTW, SST1W_D_SXTW_SCALED, SST1W_D_UXTW, SST1W_D_UXTW_SCALED, SST1W_SXTW, SST1W_SXTW_SCALED, SST1W_UXTW, SST1W_UXTW_SCALED)>;
+def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1W_D, SST1W_D_SCALED, SST1W_D_SXTW, SST1W_D_SXTW_SCALED, SST1W_D_UXTW, SST1W_D_UXTW_SCALED, SST1W_SXTW, SST1W_SXTW_SCALED, SST1W_UXTW, SST1W_UXTW_SCALED)>;
// [434] "st1w $Zt, $Pg, [$Rn, $imm4, mul vl]";
def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1W_D_IMM, ST1W_IMM)>;
diff --git a/llvm/lib/Target/AArch64/AArch64SchedAmpere1.td b/llvm/lib/Target/AArch64/AArch64SchedAmpere1.td
new file mode 100644
index 000000000000..32f7299fbf87
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64SchedAmpere1.td
@@ -0,0 +1,1136 @@
+//=- AArch64SchedAmpere1.td - Ampere-1 scheduling def -----*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for the Ampere Computing Ampere-1 to
+// support instruction scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+// The Ampere-1 core is an out-of-order micro-architecture. The front
+// end has branch prediction, with a 10-cycle recovery time from a
+// mispredicted branch. Instructions coming out of the front end are
+// decoded into internal micro-ops (uops).
+
+def Ampere1Model : SchedMachineModel {
+ let IssueWidth = 4; // 4-way decode and dispatch
+ let MicroOpBufferSize = 174; // micro-op re-order buffer size
+ let LoadLatency = 4; // Optimistic load latency
+ let MispredictPenalty = 10; // Branch mispredict penalty
+ let LoopMicroOpBufferSize = 32; // Instruction queue size
+ let CompleteModel = 1;
+
+ list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
+ SMEUnsupported.F);
+}
+
+let SchedModel = Ampere1Model in {
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available on Ampere-1.
+// Ampere-1 has 12 pipelines that 8 independent scheduler (4 integer, 2 FP,
+// and 2 memory) issue into. The integer and FP schedulers can each issue
+// one uop per cycle, while the memory schedulers can each issue one load
+// and one store address calculation per cycle.
+
+def Ampere1UnitA : ProcResource<2>; // integer single-cycle, branch, and flags r/w
+def Ampere1UnitB : ProcResource<2>; // integer single-cycle, and complex shifts
+def Ampere1UnitBS : ProcResource<1>; // integer multi-cycle
+def Ampere1UnitL : ProcResource<2>; // load
+def Ampere1UnitS : ProcResource<2>; // store address calculation
+def Ampere1UnitX : ProcResource<1>; // FP and vector operations, and flag write
+def Ampere1UnitY : ProcResource<1>; // FP and vector operations, and crypto
+def Ampere1UnitZ : ProcResource<1>; // FP store data and FP-to-integer moves
+
+def Ampere1UnitAB : ProcResGroup<[Ampere1UnitA, Ampere1UnitB]>;
+def Ampere1UnitXY : ProcResGroup<[Ampere1UnitX, Ampere1UnitY]>;
+
+//===----------------------------------------------------------------------===//
+// Define customized scheduler read/write types specific to the Ampere-1.
+
+def Ampere1Write_1cyc_1A : SchedWriteRes<[Ampere1UnitA]> {
+ let Latency = 1;
+ let NumMicroOps = 1;
+}
+
+def Ampere1Write_1cyc_2A : SchedWriteRes<[Ampere1UnitA, Ampere1UnitA]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+def Ampere1Write_1cyc_1B : SchedWriteRes<[Ampere1UnitB]> {
+ let Latency = 1;
+ let NumMicroOps = 1;
+}
+
+def Ampere1Write_1cyc_1AB : SchedWriteRes<[Ampere1UnitAB]> {
+ let Latency = 1;
+ let NumMicroOps = 1;
+}
+
+def Ampere1Write_1cyc_1L : SchedWriteRes<[Ampere1UnitL]> {
+ let Latency = 1;
+ let NumMicroOps = 1;
+}
+
+def Ampere1Write_1cyc_1S : SchedWriteRes<[Ampere1UnitS]> {
+ let Latency = 1;
+ let NumMicroOps = 1;
+}
+
+def Ampere1Write_1cyc_2S : SchedWriteRes<[Ampere1UnitS, Ampere1UnitS]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+def Ampere1Write_2cyc_1Y : SchedWriteRes<[Ampere1UnitY]> {
+ let Latency = 2;
+ let NumMicroOps = 1;
+}
+
+def Ampere1Write_2cyc_2AB : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitAB]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def Ampere1Write_2cyc_1B_1AB : SchedWriteRes<[Ampere1UnitB, Ampere1UnitAB]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def Ampere1Write_2cyc_1B_1A : SchedWriteRes<[Ampere1UnitB, Ampere1UnitA]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def Ampere1Write_2cyc_1AB_1A : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitA]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def Ampere1Write_2cyc_1AB_1L : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitL]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def Ampere1Write_2cyc_1AB_2S : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitS,
+ Ampere1UnitS]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+
+def Ampere1Write_2cyc_1AB_1S_1Z : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitS,
+ Ampere1UnitZ]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+
+def Ampere1Write_2cyc_1B_1S : SchedWriteRes<[Ampere1UnitB, Ampere1UnitS]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def Ampere1Write_2cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
+ let Latency = 2;
+ let NumMicroOps = 1;
+}
+
+def Ampere1Write_2cyc_1S_1Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitZ]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def Ampere1Write_3cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> {
+ let Latency = 3;
+ let NumMicroOps = 1;
+}
+
+def Ampere1Write_3cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
+ let Latency = 3;
+ let NumMicroOps = 1;
+}
+
+def Ampere1Write_3cyc_1B_1S_1AB : SchedWriteRes<[Ampere1UnitB, Ampere1UnitS,
+ Ampere1UnitAB]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+
+def Ampere1Write_3cyc_1S_2Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitZ, Ampere1UnitZ]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+
+def Ampere1Write_3cyc_2S_2Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitS,
+ Ampere1UnitZ, Ampere1UnitZ]> {
+ let Latency = 2;
+ let NumMicroOps = 4;
+}
+
+def Ampere1Write_4cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> {
+ let Latency = 4;
+ let NumMicroOps = 1;
+}
+
+def Ampere1Write_4cyc_1L : SchedWriteRes<[Ampere1UnitL]> {
+ let Latency = 4;
+ let NumMicroOps = 1;
+}
+
+def Ampere1Write_4cyc_1X : SchedWriteRes<[Ampere1UnitX]> {
+ let Latency = 4;
+ let NumMicroOps = 1;
+}
+
+def Ampere1Write_4cyc_1Y : SchedWriteRes<[Ampere1UnitY]> {
+ let Latency = 4;
+ let NumMicroOps = 1;
+}
+
+def Ampere1Write_4cyc_1Z : SchedWriteRes<[Ampere1UnitZ]> {
+ let Latency = 4;
+ let NumMicroOps = 1;
+}
+
+def Ampere1Write_4cyc_2L : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def Ampere1Write_4cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
+ let Latency = 4;
+ let NumMicroOps = 1;
+}
+
+def Ampere1Write_4cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def Ampere1Write_4cyc_1XY_1S_1Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitS, Ampere1UnitZ]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
+
+def Ampere1Write_4cyc_3S_3Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitS, Ampere1UnitS,
+ Ampere1UnitZ, Ampere1UnitZ, Ampere1UnitZ]> {
+ let Latency = 4;
+ let NumMicroOps = 6;
+}
+
+def Ampere1Write_5cyc_1AB_1L : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitL]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def Ampere1Write_5cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> {
+ let Latency = 5;
+ let NumMicroOps = 1;
+}
+
+def Ampere1Write_5cyc_1X : SchedWriteRes<[Ampere1UnitX]> {
+ let Latency = 5;
+ let NumMicroOps = 1;
+}
+
+def Ampere1Write_5cyc_1L : SchedWriteRes<[Ampere1UnitL]> {
+ let Latency = 5;
+ let NumMicroOps = 1;
+}
+
+def Ampere1Write_5cyc_2L : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def Ampere1Write_5cyc_1L_1BS : SchedWriteRes<[Ampere1UnitL, Ampere1UnitBS]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def Ampere1Write_5cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
+ let Latency = 5;
+ let NumMicroOps = 1;
+}
+
+def Ampere1Write_5cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def Ampere1Write_5cyc_4S_4Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitS,
+ Ampere1UnitS, Ampere1UnitS,
+ Ampere1UnitZ, Ampere1UnitZ,
+ Ampere1UnitZ, Ampere1UnitZ]> {
+ let Latency = 5;
+ let NumMicroOps = 8;
+}
+
+def Ampere1Write_5cyc_2XY_2S_2Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY,
+ Ampere1UnitS, Ampere1UnitS,
+ Ampere1UnitZ, Ampere1UnitZ]> {
+ let Latency = 5;
+ let NumMicroOps = 6;
+}
+
+def Ampere1Write_6cyc_2XY_2S_2Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY,
+ Ampere1UnitS, Ampere1UnitS,
+ Ampere1UnitZ, Ampere1UnitZ]> {
+ let Latency = 6;
+ let NumMicroOps = 6;
+}
+
+def Ampere1Write_6cyc_3XY_3S_3Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY,
+ Ampere1UnitS, Ampere1UnitS, Ampere1UnitS,
+ Ampere1UnitZ, Ampere1UnitZ, Ampere1UnitZ]> {
+ let Latency = 6;
+ let NumMicroOps = 9;
+}
+
+def Ampere1Write_6cyc_1AB_1L : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitL]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def Ampere1Write_6cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
+ let Latency = 6;
+ let NumMicroOps = 1;
+}
+
+def Ampere1Write_6cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def Ampere1Write_6cyc_3XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+
+def Ampere1Write_6cyc_3L : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, Ampere1UnitL]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+
+def Ampere1Write_6cyc_4L : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL,
+ Ampere1UnitL, Ampere1UnitL]> {
+ let Latency = 6;
+ let NumMicroOps = 4;
+}
+
+def Ampere1Write_6cyc_1XY_1Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitZ]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def Ampere1Write_7cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> {
+ let Latency = 7;
+ let NumMicroOps = 1;
+}
+
+def Ampere1Write_7cyc_1BS_1XY : SchedWriteRes<[Ampere1UnitBS, Ampere1UnitXY]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
+
+def Ampere1Write_7cyc_1L_1XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitXY]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
+
+def Ampere1Write_7cyc_2L_2XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL,
+ Ampere1UnitXY, Ampere1UnitXY]> {
+ let Latency = 7;
+ let NumMicroOps = 4;
+}
+
+def Ampere1Write_7cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
+
+def Ampere1Write_7cyc_4XY_4S_4Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY,
+ Ampere1UnitXY, Ampere1UnitXY,
+ Ampere1UnitS, Ampere1UnitS,
+ Ampere1UnitS, Ampere1UnitS,
+ Ampere1UnitZ, Ampere1UnitZ,
+ Ampere1UnitZ, Ampere1UnitZ]> {
+ let Latency = 7;
+ let NumMicroOps = 12;
+}
+
+def Ampere1Write_8cyc_1BS_1A : SchedWriteRes<[Ampere1UnitBS, Ampere1UnitA]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+
+def Ampere1Write_8cyc_1BS_2A : SchedWriteRes<[Ampere1UnitBS, Ampere1UnitA,
+ Ampere1UnitA]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+
+def Ampere1Write_8cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+
+def Ampere1Write_8cyc_4XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY,
+ Ampere1UnitXY, Ampere1UnitXY]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+
+def Ampere1Write_8cyc_3L_3XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, Ampere1UnitL,
+ Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> {
+ let Latency = 8;
+ let NumMicroOps = 6;
+}
+
+def Ampere1Write_8cyc_4L_4XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL,
+ Ampere1UnitL, Ampere1UnitL,
+ Ampere1UnitXY, Ampere1UnitXY,
+ Ampere1UnitXY, Ampere1UnitXY]> {
+ let Latency = 8;
+ let NumMicroOps = 8;
+}
+
+def Ampere1Write_9cyc_3L_3XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, Ampere1UnitL,
+ Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> {
+ let Latency = 9;
+ let NumMicroOps = 6;
+}
+
+def Ampere1Write_9cyc_4L_4XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL,
+ Ampere1UnitL, Ampere1UnitL,
+ Ampere1UnitXY, Ampere1UnitXY,
+ Ampere1UnitXY, Ampere1UnitXY]> {
+ let Latency = 9;
+ let NumMicroOps = 8;
+}
+
+def Ampere1Write_9cyc_3XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> {
+ let Latency = 9;
+ let NumMicroOps = 3;
+}
+
+def Ampere1Write_9cyc_2L_3XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL,
+ Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> {
+ let Latency = 9;
+ let NumMicroOps = 5;
+}
+
+def Ampere1Write_9cyc_6XY_4S_4Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY,
+ Ampere1UnitXY, Ampere1UnitXY,
+ Ampere1UnitXY, Ampere1UnitXY,
+ Ampere1UnitS, Ampere1UnitS,
+ Ampere1UnitS, Ampere1UnitS,
+ Ampere1UnitZ, Ampere1UnitZ,
+ Ampere1UnitZ, Ampere1UnitZ]> {
+ let Latency = 9;
+ let NumMicroOps = 14;
+}
+
+def Ampere1Write_9cyc_8XY_4S_4Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY,
+ Ampere1UnitXY, Ampere1UnitXY,
+ Ampere1UnitXY, Ampere1UnitXY,
+ Ampere1UnitXY, Ampere1UnitXY,
+ Ampere1UnitS, Ampere1UnitS,
+ Ampere1UnitS, Ampere1UnitS,
+ Ampere1UnitZ, Ampere1UnitZ,
+ Ampere1UnitZ, Ampere1UnitZ]> {
+ let Latency = 9;
+ let NumMicroOps = 16;
+}
+
+def Ampere1Write_10cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+}
+
+def Ampere1Write_10cyc_1XY_1Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitZ]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+}
+
+def Ampere1Write_10cyc_1X_1Z : SchedWriteRes<[Ampere1UnitX, Ampere1UnitZ]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+}
+
+def Ampere1Write_10cyc_3L_3XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, Ampere1UnitL,
+ Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> {
+ let Latency = 10;
+ let NumMicroOps = 6;
+}
+
+def Ampere1Write_10cyc_1A_1BS_1X : SchedWriteRes<[Ampere1UnitA, Ampere1UnitBS, Ampere1UnitX]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+}
+
+def Ampere1Write_10cyc_1A_1BS_1XY : SchedWriteRes<[Ampere1UnitA, Ampere1UnitBS, Ampere1UnitXY]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+}
+
+def Ampere1Write_11cyc_1BS_1L : SchedWriteRes<[Ampere1UnitBS, Ampere1UnitL]> {
+ let Latency = 11;
+ let NumMicroOps = 2;
+}
+
+def Ampere1Write_11cyc_1A_1BS_1X : SchedWriteRes<[Ampere1UnitA, Ampere1UnitBS, Ampere1UnitX]> {
+ let Latency = 11;
+ let NumMicroOps = 3;
+}
+
+def Ampere1Write_11cyc_1A_1BS_1XY : SchedWriteRes<[Ampere1UnitA, Ampere1UnitBS, Ampere1UnitXY]> {
+ let Latency = 11;
+ let NumMicroOps = 3;
+}
+
+def Ampere1Write_11cyc_4L_8XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL,
+ Ampere1UnitL, Ampere1UnitL,
+ Ampere1UnitXY, Ampere1UnitXY,
+ Ampere1UnitXY, Ampere1UnitXY,
+ Ampere1UnitXY, Ampere1UnitXY,
+ Ampere1UnitXY, Ampere1UnitXY]> {
+ let Latency = 11;
+ let NumMicroOps = 12;
+}
+
+def Ampere1Write_12cyc_4L_8XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL,
+ Ampere1UnitL, Ampere1UnitL,
+ Ampere1UnitXY, Ampere1UnitXY,
+ Ampere1UnitXY, Ampere1UnitXY,
+ Ampere1UnitXY, Ampere1UnitXY,
+ Ampere1UnitXY, Ampere1UnitXY]> {
+ let Latency = 12;
+ let NumMicroOps = 12;
+}
+
+def Ampere1Write_12cyc_3XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> {
+ let Latency = 12;
+ let NumMicroOps = 3;
+}
+
+def Ampere1Write_12cyc_4XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY,
+ Ampere1UnitXY, Ampere1UnitXY]> {
+ let Latency = 12;
+ let NumMicroOps = 4;
+}
+
+def Ampere1Write_18cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> {
+ let Latency = 18;
+ let NumMicroOps = 1;
+}
+
+def Ampere1Write_19cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
+ let Latency = 19;
+ let NumMicroOps = 1;
+}
+
+def Ampere1Write_25cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
+ let Latency = 25;
+ let NumMicroOps = 1;
+}
+
+def Ampere1Write_32cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
+ let Latency = 32;
+ let NumMicroOps = 1;
+}
+
+def Ampere1Write_34cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> {
+ let Latency = 34;
+ let NumMicroOps = 1;
+}
+
+def Ampere1Write_34cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
+ let Latency = 34;
+ let NumMicroOps = 1;
+}
+
+def Ampere1Write_39cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
+ let Latency = 39;
+ let NumMicroOps = 1;
+}
+
+def Ampere1Write_62cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
+ let Latency = 62;
+ let NumMicroOps = 1;
+}
+
+// For basic arithmetic, we have more flexibility for short shifts (LSL shift <= 4),
+// which are a single uop, and for extended registers, which have full flexibility
+// across Unit A or B for both uops.
+def Ampere1Write_Arith : SchedWriteVariant<[
+ SchedVar<RegExtendedPred, [Ampere1Write_2cyc_2AB]>,
+ SchedVar<AmpereCheapLSL, [Ampere1Write_1cyc_1AB]>,
+ SchedVar<NoSchedPred, [Ampere1Write_2cyc_1B_1AB]>]>;
+
+def Ampere1Write_ArithFlagsetting : SchedWriteVariant<[
+ SchedVar<RegExtendedPred, [Ampere1Write_2cyc_1AB_1A]>,
+ SchedVar<AmpereCheapLSL, [Ampere1Write_1cyc_1A]>,
+ SchedVar<NoSchedPred, [Ampere1Write_2cyc_1B_1A]>]>;
+
+//===----------------------------------------------------------------------===//
+// Map the target-defined scheduler read/write resources and latencies for Ampere-1.
+// This provides a coarse model, which is then specialised below.
+
+def : WriteRes<WriteImm, [Ampere1UnitAB]>; // MOVN, MOVZ
+def : WriteRes<WriteI, [Ampere1UnitAB]>; // ALU
+def : WriteRes<WriteISReg, [Ampere1UnitB, Ampere1UnitA]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+} // ALU of Shifted-Reg
+def : WriteRes<WriteIEReg, [Ampere1UnitAB, Ampere1UnitA]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+} // ALU of Extended-Reg
+def : WriteRes<WriteExtr, [Ampere1UnitB]>; // EXTR shifts a reg pair
+def : WriteRes<WriteIS, [Ampere1UnitB]>; // Shift/Scale
+def : WriteRes<WriteID32, [Ampere1UnitBS]> {
+ let Latency = 18;
+} // 32-bit Divide
+def : WriteRes<WriteID64, [Ampere1UnitBS]> {
+ let Latency = 34;
+} // 64-bit Divide
+def : WriteRes<WriteIM32, [Ampere1UnitBS]> {
+ let Latency = 3;
+} // 32-bit Multiply
+def : WriteRes<WriteIM64, [Ampere1UnitBS]> {
+ let Latency = 3;
+} // 32-bit Multiply
+def : WriteRes<WriteBr, [Ampere1UnitA]>;
+def : WriteRes<WriteBrReg, [Ampere1UnitA, Ampere1UnitA]>;
+def : WriteRes<WriteLD, [Ampere1UnitL]> {
+ let Latency = 4;
+} // Load from base addr plus immediate offset
+def : WriteRes<WriteST, [Ampere1UnitS]> {
+ let Latency = 1;
+} // Store to base addr plus immediate offset
+def : WriteRes<WriteSTP, [Ampere1UnitS, Ampere1UnitS]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+} // Store a register pair.
+def : WriteRes<WriteAdr, [Ampere1UnitAB]>;
+def : WriteRes<WriteLDIdx, [Ampere1UnitAB, Ampere1UnitS]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+} // Load from a register index (maybe scaled).
+def : WriteRes<WriteSTIdx, [Ampere1UnitS, Ampere1UnitS]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+} // Store to a register index (maybe scaled).
+def : WriteRes<WriteF, [Ampere1UnitXY]> {
+ let Latency = 2;
+} // General floating-point ops.
+def : WriteRes<WriteFCmp, [Ampere1UnitX]> {
+ let Latency = 5;
+} // Floating-point compare.
+def : WriteRes<WriteFCvt, [Ampere1UnitXY]> {
+ let Latency = 6;
+} // Float conversion.
+def : WriteRes<WriteFCopy, [Ampere1UnitXY]> {
+} // Float-int register copy.
+def : WriteRes<WriteFImm, [Ampere1UnitXY]> {
+ let Latency = 2;
+} // Float-int register copy.
+def : WriteRes<WriteFMul, [Ampere1UnitXY]> {
+ let Latency = 5;
+} // Floating-point multiply.
+def : WriteRes<WriteFDiv, [Ampere1UnitXY]> {
+ let Latency = 34;
+} // Floating-point division.
+def : WriteRes<WriteVd, [Ampere1UnitXY]> {
+ let Latency = 3;
+} // 64bit Vector D ops.
+def : WriteRes<WriteVq, [Ampere1UnitXY]> {
+ let Latency = 3;
+} // 128bit Vector Q ops.
+def : WriteRes<WriteVLD, [Ampere1UnitL, Ampere1UnitL]> {
+ let Latency = 5;
+} // Vector loads.
+def : WriteRes<WriteVST, [Ampere1UnitS, Ampere1UnitZ]> {
+ let Latency = 2;
+} // Vector stores.
+
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+
+def : WriteRes<WriteSys, []> { let Latency = 1; }
+def : WriteRes<WriteBarrier, []> { let Latency = 1; }
+def : WriteRes<WriteHint, []> { let Latency = 1; }
+
+def : WriteRes<WriteLDHi, []> {
+ let Latency = 4;
+} // The second register of a load-pair: LDP,LDPSW,LDNP,LDXP,LDAXP
+
+// Forwarding logic.
+def : ReadAdvance<ReadI, 0>;
+def : ReadAdvance<ReadISReg, 0>;
+def : ReadAdvance<ReadIEReg, 0>;
+def : ReadAdvance<ReadIM, 0>;
+def : ReadAdvance<ReadIMA, 1, [WriteIM32, WriteIM64]>;
+def : ReadAdvance<ReadID, 0>;
+def : ReadAdvance<ReadExtrHi, 0>;
+def : ReadAdvance<ReadST, 0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadVLD, 0>;
+
+//===----------------------------------------------------------------------===//
+// Specialising the scheduling model further for Ampere-1.
+
+def : InstRW<[Ampere1Write_1cyc_1AB], (instrs COPY)>;
+
+// Branch instructions
+def : InstRW<[Ampere1Write_1cyc_1A], (instrs Bcc, BL, RET)>;
+def : InstRW<[Ampere1Write_1cyc_1A],
+ (instrs CBZW, CBZX, CBNZW, CBNZX, TBZW, TBZX, TBNZW, TBNZX)>;
+def : InstRW<[Ampere1Write_1cyc_2A], (instrs BLR)>;
+
+// Cryptography instructions
+// -- AES encryption/decryption
+def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^AES[DE]")>;
+def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^AESI?MC")>;
+// -- Polynomial multiplication
+def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^PMUL", "^PMULL")>;
+// -- SHA-256 hash
+def : InstRW<[Ampere1Write_4cyc_1X], (instregex "^SHA256(H|H2)")>;
+// -- SHA-256 schedule update
+def : InstRW<[Ampere1Write_4cyc_1Y], (instregex "^SHA256SU[01]")>;
+// -- SHA-3 instructions
+def : InstRW<[Ampere1Write_2cyc_1XY],
+ (instregex "^BCAX", "^EOR3", "^RAX1", "^XAR")>;
+// -- SHA-512 hash
+def : InstRW<[Ampere1Write_4cyc_1X], (instregex "^SHA512(H|H2)")>;
+// -- SHA-512 schedule update
+def : InstRW<[Ampere1Write_4cyc_1Y], (instregex "^SHA512SU[01]")>;
+// -- SHA1 choose/majority/parity
+def : InstRW<[Ampere1Write_4cyc_1X], (instregex "^SHA1[CMP]")>;
+// -- SHA1 hash/schedule update
+def : InstRW<[Ampere1Write_2cyc_1Y], (instregex "^SHA1SU[01]")>;
+def : InstRW<[Ampere1Write_2cyc_1Y], (instregex "^SHA1H")>;
+
+// FP and vector load instructions
+// -- Load 1-element structure to one/all lanes
+// ---- all lanes
+def : InstRW<[Ampere1Write_7cyc_1L_1XY],
+ (instregex "^LD1Rv(8b|4h|2s|16b|8h|4s|2d)")>;
+// ---- one lane
+def : InstRW<[Ampere1Write_7cyc_1L_1XY],
+ (instregex "^LD1i(8|16|32|64)")>;
+// -- Load 1-element structure to one/all lanes, 1D size
+def : InstRW<[Ampere1Write_5cyc_1L],
+ (instregex "^LD1Rv1d")>;
+// -- Load 1-element structures to 1 register
+def : InstRW<[Ampere1Write_5cyc_1L],
+ (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)")>;
+// -- Load 1-element structures to 2 registers
+def : InstRW<[Ampere1Write_5cyc_2L],
+ (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)")>;
+// -- Load 1-element structures to 3 registers
+def : InstRW<[Ampere1Write_6cyc_3L],
+ (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>;
+// -- Load 1-element structures to 4 registers
+def : InstRW<[Ampere1Write_6cyc_4L],
+ (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)")>;
+// -- Load 2-element structure to all lanes of 2 registers, 1D size
+def : InstRW<[Ampere1Write_5cyc_2L],
+ (instregex "^LD2Rv1d")>;
+// -- Load 2-element structure to all lanes of 2 registers, other sizes
+def : InstRW<[Ampere1Write_7cyc_2L_2XY],
+ (instregex "^LD2Rv(8b|4h|2s|16b|8h|4s|2d)")>;
+// -- Load 2-element structure to one lane of 2 registers
+def : InstRW<[Ampere1Write_7cyc_2L_2XY],
+ (instregex "^LD2i(8|16|32|64)")>;
+// -- Load 2-element structures to 2 registers, 16B/8H/4S/2D size
+def : InstRW<[Ampere1Write_7cyc_2L_2XY],
+ (instregex "^LD2Twov(16b|8h|4s|2d)")>;
+// -- Load 2-element structures to 2 registers, 8B/4H/2S size
+def : InstRW<[Ampere1Write_9cyc_2L_3XY],
+ (instregex "^LD2Twov(8b|4h|2s)")>;
+// -- Load 3-element structure to all lanes of 3 registers, 1D size
+def : InstRW<[Ampere1Write_6cyc_3L],
+ (instregex "^LD3Rv1d")>;
+// -- Load 3-element structure to all lanes of 3 registers, other sizes
+def : InstRW<[Ampere1Write_8cyc_3L_3XY],
+ (instregex "^LD3Rv(8b|4h|2s|16b|8h|4s|2d)")>;
+// -- Load 3-element structure to one lane of 3 registers
+def : InstRW<[Ampere1Write_8cyc_3L_3XY],
+ (instregex "^LD3i(8|16|32|64)")>;
+// -- Load 3-element structures to 3 registers, 16B/8H/4S sizes
+def : InstRW<[Ampere1Write_9cyc_3L_3XY],
+ (instregex "^LD3Threev(16b|8h|4s)")>;
+// -- Load 3-element structures to 3 registers, 2D size
+def : InstRW<[Ampere1Write_8cyc_3L_3XY],
+ (instregex "^LD3Threev2d")>;
+// -- Load 3-element structures to 3 registers, 8B/4H/2S sizes
+def : InstRW<[Ampere1Write_10cyc_3L_3XY],
+ (instregex "^LD3Threev(8b|4h|2s)")>;
+// -- Load 4-element structure to all lanes of 4 registers, 1D size
+def : InstRW<[Ampere1Write_6cyc_4L],
+ (instregex "^LD4Rv1d")>;
+// -- Load 4-element structure to all lanes of 4 registers, other sizes
+def : InstRW<[Ampere1Write_8cyc_4L_4XY],
+ (instregex "^LD4Rv(8b|4h|2s|16b|8h|4s|2d)")>;
+// -- Load 4-element structure to one lane of 4 registers
+def : InstRW<[Ampere1Write_6cyc_4L],
+ (instregex "^LD4i(8|16|32|64)")>;
+// -- Load 4-element structures to 4 registers, 2D size
+def : InstRW<[Ampere1Write_9cyc_4L_4XY],
+ (instregex "^LD4Fourv2d")>;
+// -- Load 4-element structures to 4 registers, 2S size
+def : InstRW<[Ampere1Write_12cyc_4L_8XY],
+ (instregex "^LD4Fourv2s")>;
+// -- Load 4-element structures to 4 registers, other sizes
+def : InstRW<[Ampere1Write_11cyc_4L_8XY],
+ (instregex "^LD4Fourv(8b|4h|16b|8h|4s)")>;
+// -- Load pair, Q-form
+def : InstRW<[Ampere1Write_5cyc_2L], (instregex "LDN?PQ")>;
+// -- Load pair, S/D-form
+def : InstRW<[Ampere1Write_5cyc_1L_1BS], (instregex "LDN?P(S|D)")>;
+// -- Load register
+def : InstRW<[Ampere1Write_5cyc_1L], (instregex "LDU?R[BHSDQ]i")>;
+// -- Load register, sign-extended register
+def : InstRW<[Ampere1Write_6cyc_1AB_1L], (instregex "LDR[BHSDQ]ro(W|X)")>;
+
+// FP and vector store instructions
+// -- Store 1-element structure from one lane of 1 register
+def : InstRW<[Ampere1Write_4cyc_1XY_1S_1Z],
+ (instregex "^ST1i(8|16|32|64)")>;
+// -- Store 1-element structures from 1 register
+def : InstRW<[Ampere1Write_2cyc_1S_1Z],
+ (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)")>;
+// -- Store 1-element structures from 2 registers
+def : InstRW<[Ampere1Write_3cyc_2S_2Z],
+ (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)")>;
+// -- Store 1-element structures from 3 registers
+def : InstRW<[Ampere1Write_4cyc_3S_3Z],
+ (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>;
+// -- Store 1-element structures from 4 registers
+def : InstRW<[Ampere1Write_5cyc_4S_4Z],
+ (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)")>;
+// -- Store 2-element structure from one lane of 2 registers
+def : InstRW<[Ampere1Write_5cyc_2XY_2S_2Z],
+ (instregex "^ST2i(8|16|32|64)")>;
+// -- Store 2-element structures from 2 registers, 16B/8H/4S/2D sizes
+def : InstRW<[Ampere1Write_5cyc_2XY_2S_2Z],
+ (instregex "^ST2Twov(16b|8h|4s|2d)")>;
+// -- Store 2-element structures from 2 registers, 8B/4H/2S sizes
+def : InstRW<[Ampere1Write_6cyc_2XY_2S_2Z],
+ (instregex "^ST2Twov(8b|4h|2s)")>;
+// -- Store 3-element structure from one lane of 3 registers
+def : InstRW<[Ampere1Write_6cyc_3XY_3S_3Z],
+ (instregex "^ST3i(8|16|32|64)")>;
+// -- Store 3-element structures from 3 registers
+def : InstRW<[Ampere1Write_6cyc_3XY_3S_3Z],
+ (instregex "^ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>;
+// -- Store 4-element structure from one lane of 4 registers
+def : InstRW<[Ampere1Write_7cyc_4XY_4S_4Z],
+ (instregex "^ST4i(8|16|32|64)")>;
+// -- Store 4-element structures from 4 registers, 16B/8H/4S sizes
+def : InstRW<[Ampere1Write_9cyc_8XY_4S_4Z],
+ (instregex "^ST4Fourv(16b|8h|4s)")>;
+// -- Store 4-element structures from 4 registers, 2D sizes
+def : InstRW<[Ampere1Write_7cyc_4XY_4S_4Z],
+ (instregex "^ST4Fourv2d")>;
+// -- Store 4-element structures from 4 registers, 8B/4H/2S sizes
+def : InstRW<[Ampere1Write_9cyc_6XY_4S_4Z],
+ (instregex "^ST4Fourv(8b|4h|2s)")>;
+// -- Store pair, Q-form
+def : InstRW<[Ampere1Write_3cyc_2S_2Z], (instregex "^STN?PQ")>;
+// -- Store pair, S/D-form
+def : InstRW<[Ampere1Write_3cyc_1S_2Z], (instregex "^STN?P[SD]")>;
+// -- Store register
+def : InstRW<[Ampere1Write_2cyc_1S_1Z], (instregex "^STU?R[BHSDQ](ui|i)")>;
+// -- Store register, sign-extended register offset
+def : InstRW<[Ampere1Write_2cyc_1AB_1S_1Z], (instregex "^STR[BHSDQ]ro[XW]")>;
+
+// FP data processing, bfloat16 format
+def : InstRW<[Ampere1Write_5cyc_1XY], (instrs BFCVT)>;
+def : InstRW<[Ampere1Write_7cyc_2XY], (instrs BFCVTN, BFCVTN2)>;
+def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^BFDOTv", "^BF16DOT")>;
+def : InstRW<[Ampere1Write_4cyc_2XY], (instrs BFMMLA)>;
+def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^BFMLAL")>;
+
+// FP data processing, scalar/vector, half precision
+def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^F(ABD|ABS)v.[fi]16")>;
+def : InstRW<[Ampere1Write_4cyc_1XY],
+ (instregex "^F(ADD|ADDP|CADD|NEG|NMUL|SUB)v.[fi]16")>;
+def : InstRW<[Ampere1Write_4cyc_1XY],
+ (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v.[fi]16")>;
+def : InstRW<[Ampere1Write_4cyc_1XY],
+ (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)16")>;
+def : InstRW<[Ampere1Write_4cyc_1X],
+ (instregex "^FCMPE?H")>;
+def : InstRW<[Ampere1Write_10cyc_1A_1BS_1X],
+ (instregex "^FCCMPE?H")>;
+def : InstRW<[Ampere1Write_10cyc_1A_1BS_1XY],
+ (instregex "^FCSELH")>;
+def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FCVT[AMNPZ][SU]v.[if]16")>;
+def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^[SU]CVTFv.[fi]16")>;
+def : InstRW<[Ampere1Write_25cyc_1XY], (instregex "^FDIVv.[if]16", "FDIVH")>;
+def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^F(MAX|MIN)(NM)?P?v.[if]16")>;
+def : InstRW<[Ampere1Write_8cyc_2XY], (instregex "^F(MAX|MIN)(NM)?Vv4[if]16")>;
+def : InstRW<[Ampere1Write_12cyc_3XY], (instregex "^F(MAX|MIN)(NM)?Vv8[if]16")>;
+def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FMULX?v.[if]16")>;
+def : InstRW<[Ampere1Write_4cyc_1XY], (instrs FMULX16)>;
+def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FN?M(ADD|SUB)[H]rrr")>;
+def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FML[AS]v.[if]16")>;
+def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FRECPXv.[if]16")>;
+def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^F(RECP|RSQRT)S16")>;
+def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FRINT[AIMNPXZ]v.[if]16")>;
+def : InstRW<[Ampere1Write_39cyc_1XY], (instregex "^FSQRTv.f16", "^FSQRTHr")>;
+
+// FP data processing, scalar/vector, single/double precision
+def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^F(ABD|ABS)v.[fi](32|64)")>;
+def : InstRW<[Ampere1Write_5cyc_1XY],
+ (instregex "^F(ADD|ADDP|CADD|NEG|NMUL|SUB)v.[fi](32|64)")>;
+def : InstRW<[Ampere1Write_5cyc_1XY],
+ (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v.[fi](32|64)")>;
+def : InstRW<[Ampere1Write_5cyc_1XY],
+ (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)(32|64)")>;
+def : InstRW<[Ampere1Write_5cyc_1X],
+ (instregex "^FCMPE?(S|D)")>;
+def : InstRW<[Ampere1Write_11cyc_1A_1BS_1X],
+ (instregex "^FCCMPE?(S|D)")>;
+def : InstRW<[Ampere1Write_11cyc_1A_1BS_1XY],
+ (instregex "^FCSEL(S|D)")>;
+def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FCVT[AMNPZ][SU]v.[if](32|64)")>;
+def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^[SU]CVTFv.[fi](32|64)")>;
+def : InstRW<[Ampere1Write_34cyc_1XY], (instregex "^FDIVv.[if](64)", "FDIVD")>;
+def : InstRW<[Ampere1Write_19cyc_1XY], (instregex "^FDIVv.[if](32)", "FDIVS")>;
+def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^F(MAX|MIN)(NM)?P?v.[if](32|64)")>;
+def : InstRW<[Ampere1Write_10cyc_2XY], (instregex "^F(MAX|MIN)(NM)?Vv.[if](32|64)")>;
+def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FMULX?v.[if](32|64)")>;
+def : InstRW<[Ampere1Write_6cyc_1XY], (instrs FMULX32, FMULX64)>;
+def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FN?M(ADD|SUB)[SD]rrr")>;
+def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FML[AS]v.[if](32|64)")>;
+def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FRECPXv.[if](32|64)")>;
+def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^F(RECP|RSQRT)S(32|64)")>;
+def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FRINT[AIMNPXZ]v.[if](32|64)")>;
+def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FRINT(32|64)")>;
+def : InstRW<[Ampere1Write_62cyc_1XY], (instregex "^FSQRTv.f64", "^FSQRTDr")>;
+def : InstRW<[Ampere1Write_32cyc_1XY], (instregex "^FSQRTv.f32", "^FSQRTSr")>;
+
+// FP miscellaneous instructions
+def : InstRW<[Ampere1Write_10cyc_1XY_1Z], (instregex "^FCVT[AMNPZ][SU][SU][XW][HSD]r")>;
+def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FCVT[HSD]Hr")>;
+def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FCVT[HSD][SD]r")>;
+def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FCVTLv")>;
+def : InstRW<[Ampere1Write_8cyc_2XY], (instregex "^FCVT(N|XN)v")>;
+def : InstRW<[Ampere1Write_10cyc_1X_1Z], (instrs FJCVTZS)>;
+def : InstRW<[Ampere1Write_5cyc_1BS], (instregex "^FMOV[HSD][WX]r")>;
+def : InstRW<[Ampere1Write_7cyc_1BS_1XY], (instregex "^FMOVDXHighr")>;
+def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^FMOV[HSD][ri]")>;
+def : InstRW<[Ampere1Write_6cyc_1XY_1Z], (instregex "^FMOVXDHighr")>;
+def : InstRW<[Ampere1Write_4cyc_1Z], (instregex "^FMOV[WX][HSD]r")>;
+
+// Integer arithmetic and logical instructions
+def : InstRW<[Ampere1Write_1cyc_1A],
+ (instregex "ADC(W|X)r", "SBC(W|X)r")>;
+def : InstRW<[Ampere1Write_Arith],
+ (instregex "(ADD|AND|BIC|EON|EOR|ORN|ORR|SUB)(W|X)r")>;
+def : InstRW<[Ampere1Write_ArithFlagsetting],
+ (instregex "(ADD|AND|BIC|SUB)S(W|X)r")>;
+def : InstRW<[Ampere1Write_1cyc_1A],
+ (instregex "(ADC|SBC)S(W|X)r")>;
+def : InstRW<[Ampere1Write_1cyc_1A], (instrs RMIF)>;
+def : InstRW<[Ampere1Write_1cyc_1A],
+ (instregex "(CCMN|CCMP)(X|W)")>;
+def : InstRW<[Ampere1Write_1cyc_1A],
+ (instregex "(CSEL|CSINC|CSINV|CSNEG)(X|W)")>;
+def : InstRW<[Ampere1Write_18cyc_1BS], (instrs SDIVWr, UDIVWr)>;
+def : InstRW<[Ampere1Write_34cyc_1BS], (instrs SDIVXr, UDIVXr)>;
+def : InstRW<[Ampere1Write_3cyc_1BS],
+ (instregex "(S|U)MULHr")>;
+def : InstRW<[Ampere1Write_4cyc_1BS],
+ (instregex "(S|U)?M(ADD|SUB)L?r")>;
+
+// Integer load instructions
+def : InstRW<[Ampere1Write_4cyc_2L],
+ (instregex "(LDNP|LDP|LDPSW)(X|W)")>;
+def : InstRW<[Ampere1Write_4cyc_1L],
+ (instregex "LDR(B|D|H|Q|S)ui")>;
+def : InstRW<[Ampere1Write_4cyc_1L],
+ (instregex "LDR(D|Q|W|X)l")>;
+def : InstRW<[Ampere1Write_4cyc_1L],
+ (instregex "LDTR(B|H|W|X)i")>;
+def : InstRW<[Ampere1Write_4cyc_1L],
+ (instregex "LDTRS(BW|BX|HW|HX|W)i")>;
+def : InstRW<[Ampere1Write_4cyc_1L],
+ (instregex "LDUR(BB|HH|X|W)i")>;
+def : InstRW<[Ampere1Write_4cyc_1L],
+ (instregex "LDURS(BW|BX|HW|HX|W)i")>;
+def : InstRW<[Ampere1Write_5cyc_1AB_1L],
+ (instregex "LDR(HH|SHW|SHX|W|X)ro(W|X)")>;
+def : InstRW<[Ampere1Write_1cyc_1L],
+ (instrs PRFMl, PRFUMi, PRFUMi)>;
+def : InstRW<[Ampere1Write_2cyc_1AB_1L],
+ (instrs PRFMroW, PRFMroX)>;
+
+// Integer miscellaneous instructions
+def : InstRW<[Ampere1Write_1cyc_1A], (instrs ADR, ADRP)>;
+def : InstRW<[Ampere1Write_1cyc_1B], (instregex "EXTR(W|X)")>;
+def : InstRW<[Ampere1Write_1cyc_1B], (instregex "(S|U)?BFM(W|X)")>;
+def : InstRW<[Ampere1Write_3cyc_1BS], (instregex "^CRC32C?[BHWX]")>;
+def : InstRW<[Ampere1Write_1cyc_1B], (instregex "CLS(W|X)")>;
+def : InstRW<[Ampere1Write_1cyc_1A], (instrs SETF8, SETF16)>;
+def : InstRW<[Ampere1Write_1cyc_1AB],
+ (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>;
+def : InstRW<[Ampere1Write_1cyc_1B],
+ (instregex "(RBIT|REV|REV16)(W|X)r", "REV32Xr")>;
+def : InstRW<[Ampere1Write_1cyc_1B],
+ (instregex "(ASR|LSL|LSR|ROR)V(W|X)r")>;
+
+// Integer store instructions
+def : InstRW<[Ampere1Write_1cyc_2S], (instregex "STNP(X|W)i")>;
+def : InstRW<[Ampere1Write_2cyc_1B_1S],
+ (instrs STPWi, STPXi)>;
+def : InstRW<[Ampere1Write_3cyc_1B_1S_1AB],
+ (instregex "STP(W|X)(pre|post)")>;
+def : InstRW<[Ampere1Write_1cyc_1S],
+ (instrs STTRBi, STTRHi, STTRWi, STTRXi)>;
+def : InstRW<[Ampere1Write_1cyc_1S],
+ (instregex "STUR(BB|HH|X|W)i",
+ "STR(X|W)ui",
+ "STUR(BB|HH|X|W)i")>;
+def : InstRW<[Ampere1Write_1cyc_2S], (instrs STRWroX, STRXroX)>;
+def : InstRW<[Ampere1Write_2cyc_1AB_2S], (instrs STRWroW, STRXroW)>;
+
+// Pointer authentication
+//def : InstRW<[Ampere1Write_7cyc_1BS],
+// (instrs AUTIAZ, AUTIBZ, AUTIASP, AUTIBSP, AUTIA1716, AUTIB1716)>;
+def : InstRW<[Ampere1Write_8cyc_1BS_1A],
+ (instregex "BRA(A|AZ|B|BZ)", "RETA(A|B)", "ERETA(A|B)")>;
+def : InstRW<[Ampere1Write_8cyc_1BS_2A],
+ (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ)>;
+//def : InstRW<[Ampere1Write_7cyc_1BS],
+// (instrs PACIAZ, PACIBZ, PACIASP, PACIBSP, PACIA1716, PACIB1716)>;
+def : InstRW<[Ampere1Write_11cyc_1BS_1L], (instregex "^LDRA(A|B)")>;
+def : InstRW<[Ampere1Write_7cyc_1BS], (instrs XPACD, XPACI)>;
+
+// Vector integer instructions
+// -- absolute difference
+def : InstRW<[Ampere1Write_3cyc_1XY],
+ (instregex "^SABAv", "^SABALv", "^SABDv", "^SABDLv",
+ "^UABAv", "^UABALv", "^UABDv", "^UABDLv")>;
+// -- arithmetic
+def : InstRW<[Ampere1Write_3cyc_1XY],
+ (instregex "^ABSv", "^(ADD|SUB)v", "^SADDLv", "^SADDW", "SHADD",
+ "SHSUB", "^SRHADD", "^URHADD", "SSUBL", "SSUBW",
+ "^UADDLv", "^UADDW", "UHADD", "UHSUB", "USUBL", "USUBW")>;
+// -- arithmetic, horizontal, 16B
+def : InstRW<[Ampere1Write_12cyc_4XY],
+ (instregex "^ADDVv16i8v", "^SADDLVv16i8v", "^UADDLVv16i8v")>;
+def : InstRW<[Ampere1Write_12cyc_4XY],
+ (instregex "^[SU](MIN|MAX)Vv16i8v")>;
+// -- arithmetic, horizontal, 4H/4S
+def : InstRW<[Ampere1Write_6cyc_2XY],
+ (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v")>;
+def : InstRW<[Ampere1Write_6cyc_2XY],
+ (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v")>;
+// -- arithmetic, horizontal, 8B/8H
+def : InstRW<[Ampere1Write_9cyc_3XY],
+ (instregex "^[SU]?ADDL?V(v8i16|v4i32)v")>;
+def : InstRW<[Ampere1Write_9cyc_3XY],
+ (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v")>;
+// -- arithmetic, narrowing
+def : InstRW<[Ampere1Write_5cyc_2XY], (instregex "(ADD|SUB)HNv.*")>;
+def : InstRW<[Ampere1Write_5cyc_2XY], (instregex "(RADD|RSUB)HNv.*")>;
+// -- arithmetic, pairwise
+def : InstRW<[Ampere1Write_3cyc_1XY],
+ (instregex "^ADDPv", "^SADALP", "^UADALP", "^SADDLPv", "^UADDLPv")>;
+// -- arithmetic, saturating
+def : InstRW<[Ampere1Write_3cyc_1XY],
+ (instregex "^SQADD", "^SQSUB", "^SUQADD", "^UQADD", "^UQSUB", "^USQADD")>;
+// -- bit count
+def : InstRW<[Ampere1Write_2cyc_1XY],
+ (instregex "^(CLS|CLZ|CNT)v")>;
+// -- compare
+def : InstRW<[Ampere1Write_3cyc_1XY],
+ (instregex "^CMEQv", "^CMGEv", "^CMGTv", "^CMLEv", "^CMLTv",
+ "^CMHIv", "^CMHSv")>;
+// -- compare non-zero
+def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^CMTSTv")>;
+// -- dot product
+def : InstRW<[Ampere1Write_3cyc_1XY], (instregex "^(S|SU|U|US)DOTv")>;
+// -- fp reciprocal estimate
+def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FRECPEv", "^FRSQRTEv")>;
+// -- integer reciprocal estimate
+def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^URECPEv", "^URSQRTEv")>;
+// -- logical
+def : InstRW<[Ampere1Write_2cyc_1XY],
+ (instregex "^ANDv", "^BICv", "^EORv", "^ORRv", "^ORNv", "^NOTv")>;
+// -- logical, narrowing
+def : InstRW<[Ampere1Write_5cyc_2XY],
+ (instregex "RSHRNv",
+ "SHRNv", "SQSHRNv", "SQSHRUNv",
+ "UQXTNv")>;
+// -- matrix multiply
+def : InstRW<[Ampere1Write_6cyc_2XY],
+ (instrs SMMLA, UMMLA, USMMLA)>;
+// -- max/min
+def : InstRW<[Ampere1Write_3cyc_1XY],
+ (instregex "^SMAXv", "^SMINv", "^UMAXv", "^UMINv")>;
+def : InstRW<[Ampere1Write_3cyc_1XY],
+ (instregex "^SMAXPv", "^SMINPv", "^UMAXPv", "^UMINPv")>;
+// -- move immediate
+def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^MOVIv", "^MVNIv")>;
+// -- multiply
+def : InstRW<[Ampere1Write_3cyc_1XY],
+ (instregex "MULv", "SMULLv", "UMULLv", "SQDMUL(H|L)v", "SQRDMULHv")>;
+// -- multiply accumulate
+def : InstRW<[Ampere1Write_3cyc_1XY],
+ (instregex "MLAv", "MLSv", "(S|U|SQD)(MLAL|MLSL)v", "SQRDML(A|S)Hv")>;
+// -- negation, saturating
+def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^SQABS", "^SQNEG")>;
+// -- reverse bits/bytes
+def : InstRW<[Ampere1Write_2cyc_1XY],
+ (instregex "^RBITv", "^REV16v", "^REV32v", "^REV64v")>;
+// -- shift
+def : InstRW<[Ampere1Write_3cyc_1XY], (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>;
+// -- shift and accumulate
+def : InstRW<[Ampere1Write_3cyc_1XY],
+ (instregex "SRSRAv", "SSRAv", "URSRAv", "USRAv")>;
+// -- shift, saturating
+def : InstRW<[Ampere1Write_3cyc_1XY],
+ (instregex "^SQRSHLv", "^SQRSHRNv", "^SQRSHRUNv", "^SQSHL", "^SQSHLU",
+ "^SQXTNv", "^SQXTUNv", "^UQSHRNv", "UQRSHRNv", "^UQRSHL",
+ "^UQSHL")>;
+
+// Vector miscellaneous instructions
+// -- duplicate element
+def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^DUPv.+lane")>;
+// -- duplicate from GPR
+def : InstRW<[Ampere1Write_5cyc_1BS], (instregex "^DUPv.+gpr")>;
+// -- extract narrow
+def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^XTNv")>;
+// -- insert/extract element
+def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^EXTv", "^INSv.+lane")>;
+// -- move FP immediate
+def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^FMOVv")>;
+// -- move element to GPR
+def : InstRW<[Ampere1Write_6cyc_1XY_1Z], (instregex "(S|U)MOVv")>;
+// -- move from GPR to any element
+def : InstRW<[Ampere1Write_7cyc_1BS_1XY], (instregex "^INSv.+gpr")>;
+// -- table lookup
+def : InstRW<[Ampere1Write_2cyc_1XY],
+ (instrs TBLv8i8One, TBLv16i8One, TBXv8i8One, TBXv16i8One)>;
+def : InstRW<[Ampere1Write_4cyc_2XY],
+ (instrs TBLv8i8Two, TBLv16i8Two, TBXv8i8Two, TBXv16i8Two)>;
+def : InstRW<[Ampere1Write_6cyc_3XY],
+ (instrs TBLv8i8Three, TBLv16i8Three, TBXv8i8Three, TBXv16i8Three)>;
+def : InstRW<[Ampere1Write_8cyc_4XY],
+ (instrs TBLv8i8Four, TBLv16i8Four, TBXv8i8Four, TBXv16i8Four)>;
+// -- transpose
+def : InstRW<[Ampere1Write_2cyc_1XY],
+ (instregex "^TRN1v", "^TRN2v", "^UZP1v", "^UZP2v")>;
+// -- zip/unzip
+def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^ZIP1v", "^ZIP2v")>;
+
+} // SchedModel = Ampere1Model
diff --git a/llvm/lib/Target/AArch64/AArch64SchedPredAmpere.td b/llvm/lib/Target/AArch64/AArch64SchedPredAmpere.td
new file mode 100644
index 000000000000..8552c07bda56
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64SchedPredAmpere.td
@@ -0,0 +1,25 @@
+//===- AArch64SchedPredAmpere.td - AArch64 Sched Preds -----*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines scheduling predicate definitions that are used by the
+// AArch64 Ampere Computing processors.
+//
+//===----------------------------------------------------------------------===//
+
+// Auxiliary predicates.
+
+// Check for a LSL shift <= 4
+def AmpereCheapLSL : MCSchedPredicate<
+ CheckAny<[CheckShiftBy0,
+ CheckAll<
+ [CheckShiftLSL,
+ CheckAny<
+ [CheckShiftBy1,
+ CheckShiftBy2,
+ CheckShiftBy3,
+ CheckShiftBy4]>]>]>>;
diff --git a/llvm/lib/Target/AArch64/AArch64SchedPredExynos.td b/llvm/lib/Target/AArch64/AArch64SchedPredExynos.td
index fcda2394bacf..ee7cc1f5095b 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedPredExynos.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedPredExynos.td
@@ -109,10 +109,7 @@ def ExynosScaledIdxFn : TIIPredicate<"isExynosScaledAddr",
def ExynosScaledIdxPred : MCSchedPredicate<ExynosScaledIdxFn>;
// Identify FP instructions.
-def ExynosFPPred : MCSchedPredicate<CheckAny<[CheckHForm,
- CheckSForm,
- CheckDForm,
- CheckQForm]>>;
+def ExynosFPPred : MCSchedPredicate<CheckFpOrNEON>;
// Identify 128-bit NEON instructions.
def ExynosQFormPred : MCSchedPredicate<CheckQForm>;
diff --git a/llvm/lib/Target/AArch64/AArch64SchedPredicates.td b/llvm/lib/Target/AArch64/AArch64SchedPredicates.td
index fc13b23b4cf8..4473f3a53845 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedPredicates.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedPredicates.td
@@ -53,152 +53,23 @@ let FunctionMapper = "AArch64_AM::getShiftType" in {
}
// Check for shifting in arithmetic and logic instructions.
-foreach I = {0-3, 8} in {
+foreach I = {0-4, 8} in {
let FunctionMapper = "AArch64_AM::getShiftValue" in
def CheckShiftBy#I : CheckImmOperand<3, I>;
}
// Generic predicates.
-
-// Identify whether an instruction is the 16-bit NEON form based on its result.
-def CheckHForm : CheckAll<[CheckIsRegOperand<0>,
- CheckAny<[CheckRegOperand<0, H0>,
- CheckRegOperand<0, H1>,
- CheckRegOperand<0, H2>,
- CheckRegOperand<0, H3>,
- CheckRegOperand<0, H4>,
- CheckRegOperand<0, H5>,
- CheckRegOperand<0, H6>,
- CheckRegOperand<0, H7>,
- CheckRegOperand<0, H8>,
- CheckRegOperand<0, H9>,
- CheckRegOperand<0, H10>,
- CheckRegOperand<0, H11>,
- CheckRegOperand<0, H12>,
- CheckRegOperand<0, H13>,
- CheckRegOperand<0, H14>,
- CheckRegOperand<0, H15>,
- CheckRegOperand<0, H16>,
- CheckRegOperand<0, H17>,
- CheckRegOperand<0, H18>,
- CheckRegOperand<0, H19>,
- CheckRegOperand<0, H20>,
- CheckRegOperand<0, H21>,
- CheckRegOperand<0, H22>,
- CheckRegOperand<0, H23>,
- CheckRegOperand<0, H24>,
- CheckRegOperand<0, H25>,
- CheckRegOperand<0, H26>,
- CheckRegOperand<0, H27>,
- CheckRegOperand<0, H28>,
- CheckRegOperand<0, H29>,
- CheckRegOperand<0, H30>,
- CheckRegOperand<0, H31>]>]>;
-
-// Identify whether an instruction is the 32-bit NEON form based on its result.
-def CheckSForm : CheckAll<[CheckIsRegOperand<0>,
- CheckAny<[CheckRegOperand<0, S0>,
- CheckRegOperand<0, S1>,
- CheckRegOperand<0, S2>,
- CheckRegOperand<0, S3>,
- CheckRegOperand<0, S4>,
- CheckRegOperand<0, S5>,
- CheckRegOperand<0, S6>,
- CheckRegOperand<0, S7>,
- CheckRegOperand<0, S8>,
- CheckRegOperand<0, S9>,
- CheckRegOperand<0, S10>,
- CheckRegOperand<0, S11>,
- CheckRegOperand<0, S12>,
- CheckRegOperand<0, S13>,
- CheckRegOperand<0, S14>,
- CheckRegOperand<0, S15>,
- CheckRegOperand<0, S16>,
- CheckRegOperand<0, S17>,
- CheckRegOperand<0, S18>,
- CheckRegOperand<0, S19>,
- CheckRegOperand<0, S20>,
- CheckRegOperand<0, S21>,
- CheckRegOperand<0, S22>,
- CheckRegOperand<0, S23>,
- CheckRegOperand<0, S24>,
- CheckRegOperand<0, S25>,
- CheckRegOperand<0, S26>,
- CheckRegOperand<0, S27>,
- CheckRegOperand<0, S28>,
- CheckRegOperand<0, S29>,
- CheckRegOperand<0, S30>,
- CheckRegOperand<0, S31>]>]>;
-
-// Identify whether an instruction is the 64-bit NEON form based on its result.
-def CheckDForm : CheckAll<[CheckIsRegOperand<0>,
- CheckAny<[CheckRegOperand<0, D0>,
- CheckRegOperand<0, D1>,
- CheckRegOperand<0, D2>,
- CheckRegOperand<0, D3>,
- CheckRegOperand<0, D4>,
- CheckRegOperand<0, D5>,
- CheckRegOperand<0, D6>,
- CheckRegOperand<0, D7>,
- CheckRegOperand<0, D8>,
- CheckRegOperand<0, D9>,
- CheckRegOperand<0, D10>,
- CheckRegOperand<0, D11>,
- CheckRegOperand<0, D12>,
- CheckRegOperand<0, D13>,
- CheckRegOperand<0, D14>,
- CheckRegOperand<0, D15>,
- CheckRegOperand<0, D16>,
- CheckRegOperand<0, D17>,
- CheckRegOperand<0, D18>,
- CheckRegOperand<0, D19>,
- CheckRegOperand<0, D20>,
- CheckRegOperand<0, D21>,
- CheckRegOperand<0, D22>,
- CheckRegOperand<0, D23>,
- CheckRegOperand<0, D24>,
- CheckRegOperand<0, D25>,
- CheckRegOperand<0, D26>,
- CheckRegOperand<0, D27>,
- CheckRegOperand<0, D28>,
- CheckRegOperand<0, D29>,
- CheckRegOperand<0, D30>,
- CheckRegOperand<0, D31>]>]>;
+// Identify whether an instruction is NEON or floating point
+def CheckFpOrNEON : CheckFunctionPredicateWithTII<
+ "AArch64_MC::isFpOrNEON",
+ "AArch64InstrInfo::isFpOrNEON"
+>;
// Identify whether an instruction is the 128-bit NEON form based on its result.
-def CheckQForm : CheckAll<[CheckIsRegOperand<0>,
- CheckAny<[CheckRegOperand<0, Q0>,
- CheckRegOperand<0, Q1>,
- CheckRegOperand<0, Q2>,
- CheckRegOperand<0, Q3>,
- CheckRegOperand<0, Q4>,
- CheckRegOperand<0, Q5>,
- CheckRegOperand<0, Q6>,
- CheckRegOperand<0, Q7>,
- CheckRegOperand<0, Q8>,
- CheckRegOperand<0, Q9>,
- CheckRegOperand<0, Q10>,
- CheckRegOperand<0, Q11>,
- CheckRegOperand<0, Q12>,
- CheckRegOperand<0, Q13>,
- CheckRegOperand<0, Q14>,
- CheckRegOperand<0, Q15>,
- CheckRegOperand<0, Q16>,
- CheckRegOperand<0, Q17>,
- CheckRegOperand<0, Q18>,
- CheckRegOperand<0, Q19>,
- CheckRegOperand<0, Q20>,
- CheckRegOperand<0, Q21>,
- CheckRegOperand<0, Q22>,
- CheckRegOperand<0, Q23>,
- CheckRegOperand<0, Q24>,
- CheckRegOperand<0, Q25>,
- CheckRegOperand<0, Q26>,
- CheckRegOperand<0, Q27>,
- CheckRegOperand<0, Q28>,
- CheckRegOperand<0, Q29>,
- CheckRegOperand<0, Q30>,
- CheckRegOperand<0, Q31>]>]>;
+def CheckQForm : CheckFunctionPredicateWithTII<
+ "AArch64_MC::isQForm",
+ "AArch64InstrInfo::isQForm"
+>;
// Identify arithmetic instructions with extend.
def IsArithExtOp : CheckOpcode<[ADDWrx, ADDXrx, ADDSWrx, ADDSXrx,
diff --git a/llvm/lib/Target/AArch64/AArch64SchedTSV110.td b/llvm/lib/Target/AArch64/AArch64SchedTSV110.td
index 77fca22a5f55..6ecfc97a4273 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedTSV110.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedTSV110.td
@@ -25,7 +25,8 @@ def TSV110Model : SchedMachineModel {
let CompleteModel = 1;
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
- PAUnsupported.F);
+ PAUnsupported.F,
+ SMEUnsupported.F);
}
// Define each kind of processor resource and number available on the TSV110,
diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index 893269c1a7ef..677797a6797b 100644
--- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -91,7 +91,7 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemcpy(
SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
- SDValue Size, Align Alignment, bool isVolatile,
+ SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
MachinePointerInfo DstPtrInfo) const {
const AArch64Subtarget &STI =
DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
@@ -100,38 +100,6 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
return EmitMOPS(AArch64ISD::MOPS_MEMSET, DAG, dl, Chain, Dst, Src, Size,
Alignment, isVolatile, DstPtrInfo, MachinePointerInfo{});
}
-
- // Check to see if there is a specialized entry-point for memory zeroing.
- ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
- ConstantSDNode *SizeValue = dyn_cast<ConstantSDNode>(Size);
- const char *bzeroName =
- (V && V->isZero())
- ? DAG.getTargetLoweringInfo().getLibcallName(RTLIB::BZERO)
- : nullptr;
- // For small size (< 256), it is not beneficial to use bzero
- // instead of memset.
- if (bzeroName && (!SizeValue || SizeValue->getZExtValue() > 256)) {
- const AArch64TargetLowering &TLI = *STI.getTargetLowering();
-
- EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout());
- Type *IntPtrTy = Type::getInt8PtrTy(*DAG.getContext());
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
- Entry.Node = Dst;
- Entry.Ty = IntPtrTy;
- Args.push_back(Entry);
- Entry.Node = Size;
- Args.push_back(Entry);
- TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl)
- .setChain(Chain)
- .setLibCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol(bzeroName, IntPtr),
- std::move(Args))
- .setDiscardResult();
- std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
- return CallResult.second;
- }
return SDValue();
}
diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h
index 47fe3bf7dcf5..73f93724d6fc 100644
--- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h
@@ -34,7 +34,7 @@ public:
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Dst, SDValue Src,
SDValue Size, Align Alignment,
- bool isVolatile,
+ bool isVolatile, bool AlwaysInline,
MachinePointerInfo DstPtrInfo) const override;
SDValue
EmitTargetCodeForMemmove(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain,
diff --git a/llvm/lib/Target/AArch64/AArch64StackTagging.cpp b/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
index 566c7a16db23..24816bc9e9bd 100644
--- a/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
+++ b/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
@@ -42,20 +42,23 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/ValueHandle.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/MemoryTaggingSupport.h"
#include <cassert>
#include <iterator>
+#include <memory>
#include <utility>
using namespace llvm;
@@ -63,12 +66,12 @@ using namespace llvm;
#define DEBUG_TYPE "aarch64-stack-tagging"
static cl::opt<bool> ClMergeInit(
- "stack-tagging-merge-init", cl::Hidden, cl::init(true), cl::ZeroOrMore,
+ "stack-tagging-merge-init", cl::Hidden, cl::init(true),
cl::desc("merge stack variable initializers with tagging when possible"));
static cl::opt<bool>
ClUseStackSafety("stack-tagging-use-stack-safety", cl::Hidden,
- cl::init(true), cl::ZeroOrMore,
+ cl::init(true),
cl::desc("Use Stack Safety analysis results"));
static cl::opt<unsigned> ClScanLimit("stack-tagging-merge-init-scan-limit",
@@ -78,6 +81,12 @@ static cl::opt<unsigned>
ClMergeInitSizeLimit("stack-tagging-merge-init-size-limit", cl::init(272),
cl::Hidden);
+static cl::opt<size_t> ClMaxLifetimes(
+ "stack-tagging-max-lifetimes-for-alloca", cl::Hidden, cl::init(3),
+ cl::ReallyHidden,
+ cl::desc("How many lifetime ends to handle for a single alloca."),
+ cl::Optional);
+
static const Align kTagGranuleSize = Align(16);
namespace {
@@ -283,15 +292,6 @@ public:
};
class AArch64StackTagging : public FunctionPass {
- struct AllocaInfo {
- AllocaInst *AI;
- TrackingVH<Instruction> OldAI; // Track through RAUW to replace debug uses.
- SmallVector<IntrinsicInst *, 2> LifetimeStart;
- SmallVector<IntrinsicInst *, 2> LifetimeEnd;
- SmallVector<DbgVariableIntrinsic *, 2> DbgVariableIntrinsics;
- int Tag; // -1 for non-tagged allocations
- };
-
const bool MergeInit;
const bool UseStackSafety;
@@ -307,7 +307,6 @@ public:
}
bool isInterestingAlloca(const AllocaInst &AI);
- void alignAndPadAlloca(AllocaInfo &Info);
void tagAlloca(AllocaInst *AI, Instruction *InsertBefore, Value *Ptr,
uint64_t Size);
@@ -316,9 +315,9 @@ public:
Instruction *collectInitializers(Instruction *StartInst, Value *StartPtr,
uint64_t Size, InitializerBuilder &IB);
- Instruction *
- insertBaseTaggedPointer(const MapVector<AllocaInst *, AllocaInfo> &Allocas,
- const DominatorTree *DT);
+ Instruction *insertBaseTaggedPointer(
+ const MapVector<AllocaInst *, memtag::AllocaInfo> &Allocas,
+ const DominatorTree *DT);
bool runOnFunction(Function &F) override;
StringRef getPassName() const override { return "AArch64 Stack Tagging"; }
@@ -419,7 +418,7 @@ bool AArch64StackTagging::isInterestingAlloca(const AllocaInst &AI) {
bool IsInteresting =
AI.getAllocatedType()->isSized() && AI.isStaticAlloca() &&
// alloca() may be called with 0 size, ignore it.
- AI.getAllocationSizeInBits(*DL).getValue() > 0 &&
+ *AI.getAllocationSizeInBits(*DL) > 0 &&
// inalloca allocas are not treated as static, and we don't want
// dynamic alloca instrumentation for them as well.
!AI.isUsedWithInAlloca() &&
@@ -460,15 +459,13 @@ void AArch64StackTagging::untagAlloca(AllocaInst *AI, Instruction *InsertBefore,
}
Instruction *AArch64StackTagging::insertBaseTaggedPointer(
- const MapVector<AllocaInst *, AllocaInfo> &Allocas,
+ const MapVector<AllocaInst *, memtag::AllocaInfo> &AllocasToInstrument,
const DominatorTree *DT) {
BasicBlock *PrologueBB = nullptr;
// Try sinking IRG as deep as possible to avoid hurting shrink wrap.
- for (auto &I : Allocas) {
- const AllocaInfo &Info = I.second;
+ for (auto &I : AllocasToInstrument) {
+ const memtag::AllocaInfo &Info = I.second;
AllocaInst *AI = Info.AI;
- if (Info.Tag < 0)
- continue;
if (!PrologueBB) {
PrologueBB = AI->getParent();
continue;
@@ -486,40 +483,6 @@ Instruction *AArch64StackTagging::insertBaseTaggedPointer(
return Base;
}
-void AArch64StackTagging::alignAndPadAlloca(AllocaInfo &Info) {
- const Align NewAlignment =
- max(MaybeAlign(Info.AI->getAlign()), kTagGranuleSize);
- Info.AI->setAlignment(NewAlignment);
-
- uint64_t Size = Info.AI->getAllocationSizeInBits(*DL).getValue() / 8;
- uint64_t AlignedSize = alignTo(Size, kTagGranuleSize);
- if (Size == AlignedSize)
- return;
-
- // Add padding to the alloca.
- Type *AllocatedType =
- Info.AI->isArrayAllocation()
- ? ArrayType::get(
- Info.AI->getAllocatedType(),
- cast<ConstantInt>(Info.AI->getArraySize())->getZExtValue())
- : Info.AI->getAllocatedType();
- Type *PaddingType =
- ArrayType::get(Type::getInt8Ty(F->getContext()), AlignedSize - Size);
- Type *TypeWithPadding = StructType::get(AllocatedType, PaddingType);
- auto *NewAI = new AllocaInst(
- TypeWithPadding, Info.AI->getType()->getAddressSpace(), nullptr, "", Info.AI);
- NewAI->takeName(Info.AI);
- NewAI->setAlignment(Info.AI->getAlign());
- NewAI->setUsedWithInAlloca(Info.AI->isUsedWithInAlloca());
- NewAI->setSwiftError(Info.AI->isSwiftError());
- NewAI->copyMetadata(*Info.AI);
-
- auto *NewPtr = new BitCastInst(NewAI, Info.AI->getType(), "", Info.AI);
- Info.AI->replaceAllUsesWith(NewPtr);
- Info.AI->eraseFromParent();
- Info.AI = NewAI;
-}
-
// FIXME: check for MTE extension
bool AArch64StackTagging::runOnFunction(Function &Fn) {
if (!Fn.hasFnAttribute(Attribute::SanitizeMemTag))
@@ -532,76 +495,21 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) {
if (MergeInit)
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- MapVector<AllocaInst *, AllocaInfo> Allocas; // need stable iteration order
- SmallVector<Instruction *, 8> RetVec;
- SmallVector<Instruction *, 4> UnrecognizedLifetimes;
-
- for (auto &BB : *F) {
- for (Instruction &I : BB) {
- if (auto *AI = dyn_cast<AllocaInst>(&I)) {
- Allocas[AI].AI = AI;
- Allocas[AI].OldAI = AI;
- continue;
- }
-
- if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I)) {
- for (Value *V : DVI->location_ops())
- if (auto *AI = dyn_cast_or_null<AllocaInst>(V))
- if (Allocas[AI].DbgVariableIntrinsics.empty() ||
- Allocas[AI].DbgVariableIntrinsics.back() != DVI)
- Allocas[AI].DbgVariableIntrinsics.push_back(DVI);
- continue;
- }
-
- auto *II = dyn_cast<IntrinsicInst>(&I);
- if (II && (II->getIntrinsicID() == Intrinsic::lifetime_start ||
- II->getIntrinsicID() == Intrinsic::lifetime_end)) {
- AllocaInst *AI = findAllocaForValue(II->getArgOperand(1));
- if (!AI) {
- UnrecognizedLifetimes.push_back(&I);
- continue;
- }
- if (II->getIntrinsicID() == Intrinsic::lifetime_start)
- Allocas[AI].LifetimeStart.push_back(II);
- else
- Allocas[AI].LifetimeEnd.push_back(II);
- }
-
- if (isa<ReturnInst, ResumeInst, CleanupReturnInst>(&I))
- RetVec.push_back(&I);
- }
- }
+ memtag::StackInfoBuilder SIB(
+ [this](const AllocaInst &AI) { return isInterestingAlloca(AI); });
+ for (Instruction &I : instructions(F))
+ SIB.visit(I);
+ memtag::StackInfo &SInfo = SIB.get();
- if (Allocas.empty())
+ if (SInfo.AllocasToInstrument.empty())
return false;
- int NextTag = 0;
- int NumInterestingAllocas = 0;
- for (auto &I : Allocas) {
- AllocaInfo &Info = I.second;
- assert(Info.AI);
-
- if (!isInterestingAlloca(*Info.AI)) {
- Info.Tag = -1;
- continue;
- }
-
- alignAndPadAlloca(Info);
- NumInterestingAllocas++;
- Info.Tag = NextTag;
- NextTag = (NextTag + 1) % 16;
- }
-
- if (NumInterestingAllocas == 0)
- return true;
-
std::unique_ptr<DominatorTree> DeleteDT;
DominatorTree *DT = nullptr;
if (auto *P = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
DT = &P->getDomTree();
- if (DT == nullptr && (NumInterestingAllocas > 1 ||
- !F->hasFnAttribute(Attribute::OptimizeNone))) {
+ if (DT == nullptr) {
DeleteDT = std::make_unique<DominatorTree>(*F);
DT = DeleteDT.get();
}
@@ -611,38 +519,57 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) {
if (auto *P = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>())
PDT = &P->getPostDomTree();
- if (PDT == nullptr && !F->hasFnAttribute(Attribute::OptimizeNone)) {
+ if (PDT == nullptr) {
DeletePDT = std::make_unique<PostDominatorTree>(*F);
PDT = DeletePDT.get();
}
+ std::unique_ptr<LoopInfo> DeleteLI;
+ LoopInfo *LI = nullptr;
+ if (auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>()) {
+ LI = &LIWP->getLoopInfo();
+ } else {
+ DeleteLI = std::make_unique<LoopInfo>(*DT);
+ LI = DeleteLI.get();
+ }
+
SetTagFunc =
Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_settag);
- Instruction *Base = insertBaseTaggedPointer(Allocas, DT);
+ Instruction *Base = insertBaseTaggedPointer(SInfo.AllocasToInstrument, DT);
- for (auto &I : Allocas) {
- const AllocaInfo &Info = I.second;
+ int NextTag = 0;
+ for (auto &I : SInfo.AllocasToInstrument) {
+ memtag::AllocaInfo &Info = I.second;
+ assert(Info.AI && isInterestingAlloca(*Info.AI));
+ TrackingVH<Instruction> OldAI = Info.AI;
+ memtag::alignAndPadAlloca(Info, kTagGranuleSize);
AllocaInst *AI = Info.AI;
- if (Info.Tag < 0)
- continue;
-
+ int Tag = NextTag;
+ NextTag = (NextTag + 1) % 16;
// Replace alloca with tagp(alloca).
IRBuilder<> IRB(Info.AI->getNextNode());
Function *TagP = Intrinsic::getDeclaration(
F->getParent(), Intrinsic::aarch64_tagp, {Info.AI->getType()});
Instruction *TagPCall =
IRB.CreateCall(TagP, {Constant::getNullValue(Info.AI->getType()), Base,
- ConstantInt::get(IRB.getInt64Ty(), Info.Tag)});
+ ConstantInt::get(IRB.getInt64Ty(), Tag)});
if (Info.AI->hasName())
TagPCall->setName(Info.AI->getName() + ".tag");
Info.AI->replaceAllUsesWith(TagPCall);
TagPCall->setOperand(0, Info.AI);
- if (UnrecognizedLifetimes.empty() && Info.LifetimeStart.size() == 1 &&
- Info.LifetimeEnd.size() == 1) {
+ // Calls to functions that may return twice (e.g. setjmp) confuse the
+ // postdominator analysis, and will leave us to keep memory tagged after
+ // function return. Work around this by always untagging at every return
+ // statement if return_twice functions are called.
+ bool StandardLifetime =
+ SInfo.UnrecognizedLifetimes.empty() &&
+ memtag::isStandardLifetime(Info.LifetimeStart, Info.LifetimeEnd, DT, LI,
+ ClMaxLifetimes) &&
+ !SInfo.CallsReturnTwice;
+ if (StandardLifetime) {
IntrinsicInst *Start = Info.LifetimeStart[0];
- IntrinsicInst *End = Info.LifetimeEnd[0];
uint64_t Size =
cast<ConstantInt>(Start->getArgOperand(0))->getZExtValue();
Size = alignTo(Size, kTagGranuleSize);
@@ -650,14 +577,16 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) {
auto TagEnd = [&](Instruction *Node) { untagAlloca(AI, Node, Size); };
if (!DT || !PDT ||
- !forAllReachableExits(*DT, *PDT, Start, Info.LifetimeEnd, RetVec,
- TagEnd))
- End->eraseFromParent();
+ !memtag::forAllReachableExits(*DT, *PDT, *LI, Start, Info.LifetimeEnd,
+ SInfo.RetVec, TagEnd)) {
+ for (auto *End : Info.LifetimeEnd)
+ End->eraseFromParent();
+ }
} else {
- uint64_t Size = Info.AI->getAllocationSizeInBits(*DL).getValue() / 8;
+ uint64_t Size = *Info.AI->getAllocationSizeInBits(*DL) / 8;
Value *Ptr = IRB.CreatePointerCast(TagPCall, IRB.getInt8PtrTy());
tagAlloca(AI, &*IRB.GetInsertPoint(), Ptr, Size);
- for (auto &RI : RetVec) {
+ for (auto &RI : SInfo.RetVec) {
untagAlloca(AI, RI, Size);
}
// We may have inserted tag/untag outside of any lifetime interval.
@@ -670,12 +599,12 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) {
// Fixup debug intrinsics to point to the new alloca.
for (auto DVI : Info.DbgVariableIntrinsics)
- DVI->replaceVariableLocationOp(Info.OldAI, Info.AI);
+ DVI->replaceVariableLocationOp(OldAI, Info.AI);
}
// If we have instrumented at least one alloca, all unrecognized lifetime
- // instrinsics have to go.
- for (auto &I : UnrecognizedLifetimes)
+ // intrinsics have to go.
+ for (auto &I : SInfo.UnrecognizedLifetimes)
I->eraseFromParent();
return true;
diff --git a/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp b/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp
index cae6d65bed2d..7e91dc1b6385 100644
--- a/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp
+++ b/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp
@@ -50,7 +50,6 @@ cl::opt<UncheckedLdStMode> ClUncheckedLdSt(
static cl::opt<bool>
ClFirstSlot("stack-tagging-first-slot-opt", cl::Hidden, cl::init(true),
- cl::ZeroOrMore,
cl::desc("Apply first slot optimization for stack tagging "
"(eliminate ADDG Rt, Rn, 0, 0)."));
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 8a7e20237271..15005304383d 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -21,6 +21,7 @@
#include "GISel/AArch64RegisterBankInfo.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/Support/AArch64TargetParser.h"
@@ -51,6 +52,16 @@ static cl::opt<bool>
static cl::opt<bool> UseAA("aarch64-use-aa", cl::init(true),
cl::desc("Enable the use of AA during codegen."));
+static cl::opt<unsigned> OverrideVectorInsertExtractBaseCost(
+ "aarch64-insert-extract-base-cost",
+ cl::desc("Base cost of vector insert/extract element"), cl::Hidden);
+
+unsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const {
+ if (OverrideVectorInsertExtractBaseCost.getNumOccurrences() > 0)
+ return OverrideVectorInsertExtractBaseCost;
+ return VectorInsertExtractBaseCost;
+}
+
AArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies(
StringRef FS, StringRef CPUString, StringRef TuneCPUString) {
// Determine default and user-specified characteristics
@@ -78,14 +89,17 @@ void AArch64Subtarget::initializeProperties() {
CacheLineSize = 64;
break;
case CortexA35:
- break;
case CortexA53:
case CortexA55:
PrefFunctionLogAlignment = 4;
+ PrefLoopLogAlignment = 4;
+ MaxBytesForLoopAlignment = 8;
break;
case CortexA57:
MaxInterleaveFactor = 4;
PrefFunctionLogAlignment = 4;
+ PrefLoopLogAlignment = 4;
+ MaxBytesForLoopAlignment = 8;
break;
case CortexA65:
PrefFunctionLogAlignment = 3;
@@ -93,6 +107,10 @@ void AArch64Subtarget::initializeProperties() {
case CortexA72:
case CortexA73:
case CortexA75:
+ PrefFunctionLogAlignment = 4;
+ PrefLoopLogAlignment = 4;
+ MaxBytesForLoopAlignment = 8;
+ break;
case CortexA76:
case CortexA77:
case CortexA78:
@@ -101,12 +119,21 @@ void AArch64Subtarget::initializeProperties() {
case CortexX1:
case CortexX1C:
PrefFunctionLogAlignment = 4;
+ PrefLoopLogAlignment = 5;
+ MaxBytesForLoopAlignment = 16;
break;
case CortexA510:
+ PrefFunctionLogAlignment = 4;
+ VScaleForTuning = 1;
+ PrefLoopLogAlignment = 4;
+ MaxBytesForLoopAlignment = 8;
+ break;
case CortexA710:
case CortexX2:
PrefFunctionLogAlignment = 4;
VScaleForTuning = 1;
+ PrefLoopLogAlignment = 5;
+ MaxBytesForLoopAlignment = 16;
break;
case A64FX:
CacheLineSize = 256;
@@ -221,6 +248,12 @@ void AArch64Subtarget::initializeProperties() {
// FIXME: remove this to enable 64-bit SLP if performance looks good.
MinVectorRegisterBitWidth = 128;
break;
+ case Ampere1:
+ CacheLineSize = 64;
+ PrefFunctionLogAlignment = 6;
+ PrefLoopLogAlignment = 6;
+ MaxInterleaveFactor = 4;
+ break;
}
}
@@ -352,6 +385,8 @@ bool AArch64Subtarget::supportsAddressTopByteIgnored() const {
if (!UseAddressTopByteIgnored)
return false;
+ if (TargetTriple.isDriverKit())
+ return true;
if (TargetTriple.isiOS()) {
return TargetTriple.getiOSVersion() >= VersionTuple(8);
}
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 7b2bbad30f85..c92e3e44de31 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -22,7 +22,7 @@
#include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
#include <string>
@@ -40,6 +40,7 @@ public:
enum ARMProcFamilyEnum : uint8_t {
Others,
A64FX,
+ Ampere1,
AppleA7,
AppleA10,
AppleA11,
@@ -87,191 +88,14 @@ protected:
/// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.
ARMProcFamilyEnum ARMProcFamily = Others;
- bool HasV8_0aOps = false;
- bool HasV8_1aOps = false;
- bool HasV8_2aOps = false;
- bool HasV8_3aOps = false;
- bool HasV8_4aOps = false;
- bool HasV8_5aOps = false;
- bool HasV8_6aOps = false;
- bool HasV8_7aOps = false;
- bool HasV8_8aOps = false;
- bool HasV9_0aOps = false;
- bool HasV9_1aOps = false;
- bool HasV9_2aOps = false;
- bool HasV9_3aOps = false;
- bool HasV8_0rOps = false;
-
- bool HasCONTEXTIDREL2 = false;
- bool HasEL2VMSA = false;
- bool HasEL3 = false;
- bool HasFPARMv8 = false;
- bool HasNEON = false;
- bool HasCrypto = false;
- bool HasDotProd = false;
- bool HasCRC = false;
- bool HasLSE = false;
- bool HasLSE2 = false;
- bool HasRAS = false;
- bool HasRDM = false;
- bool HasPerfMon = false;
- bool HasFullFP16 = false;
- bool HasFP16FML = false;
- bool HasSPE = false;
-
- bool FixCortexA53_835769 = false;
-
- // ARMv8.1 extensions
- bool HasVH = false;
- bool HasPAN = false;
- bool HasLOR = false;
-
- // ARMv8.2 extensions
- bool HasPsUAO = false;
- bool HasPAN_RWV = false;
- bool HasCCPP = false;
-
- // SVE extensions
- bool HasSVE = false;
- bool UseExperimentalZeroingPseudos = false;
- bool UseScalarIncVL = false;
-
- // Armv8.2 Crypto extensions
- bool HasSM4 = false;
- bool HasSHA3 = false;
- bool HasSHA2 = false;
- bool HasAES = false;
-
- // ARMv8.3 extensions
- bool HasPAuth = false;
- bool HasJS = false;
- bool HasCCIDX = false;
- bool HasComplxNum = false;
-
- // ARMv8.4 extensions
- bool HasNV = false;
- bool HasMPAM = false;
- bool HasDIT = false;
- bool HasTRACEV8_4 = false;
- bool HasAM = false;
- bool HasSEL2 = false;
- bool HasTLB_RMI = false;
- bool HasFlagM = false;
- bool HasRCPC_IMMO = false;
-
- bool HasLSLFast = false;
- bool HasRCPC = false;
- bool HasAggressiveFMA = false;
-
- // Armv8.5-A Extensions
- bool HasAlternativeNZCV = false;
- bool HasFRInt3264 = false;
- bool HasSpecRestrict = false;
- bool HasSSBS = false;
- bool HasSB = false;
- bool HasPredRes = false;
- bool HasCCDP = false;
- bool HasBTI = false;
- bool HasRandGen = false;
- bool HasMTE = false;
- bool HasTME = false;
-
- // Armv8.6-A Extensions
- bool HasBF16 = false;
- bool HasMatMulInt8 = false;
- bool HasMatMulFP32 = false;
- bool HasMatMulFP64 = false;
- bool HasAMVS = false;
- bool HasFineGrainedTraps = false;
- bool HasEnhancedCounterVirtualization = false;
-
- // Armv8.7-A Extensions
- bool HasXS = false;
- bool HasWFxT = false;
- bool HasHCX = false;
- bool HasLS64 = false;
-
- // Armv8.8-A Extensions
- bool HasHBC = false;
- bool HasMOPS = false;
-
- // Arm SVE2 extensions
- bool HasSVE2 = false;
- bool HasSVE2AES = false;
- bool HasSVE2SM4 = false;
- bool HasSVE2SHA3 = false;
- bool HasSVE2BitPerm = false;
-
- // Armv9-A Extensions
- bool HasRME = false;
-
- // Arm Scalable Matrix Extension (SME)
- bool HasSME = false;
- bool HasSMEF64 = false;
- bool HasSMEI64 = false;
- bool HasStreamingSVE = false;
-
- // AppleA7 system register.
- bool HasAppleA7SysReg = false;
-
- // Future architecture extensions.
- bool HasETE = false;
- bool HasTRBE = false;
- bool HasBRBE = false;
- bool HasSPE_EEF = false;
-
- // HasZeroCycleRegMove - Has zero-cycle register mov instructions.
- bool HasZeroCycleRegMove = false;
-
- // HasZeroCycleZeroing - Has zero-cycle zeroing instructions.
- bool HasZeroCycleZeroing = false;
- bool HasZeroCycleZeroingGP = false;
- bool HasZeroCycleZeroingFPWorkaround = false;
-
- // It is generally beneficial to rewrite "fmov s0, wzr" to "movi d0, #0".
- // as movi is more efficient across all cores. Newer cores can eliminate
- // fmovs early and there is no difference with movi, but this not true for
- // all implementations.
- bool HasZeroCycleZeroingFP = true;
-
- // StrictAlign - Disallow unaligned memory accesses.
- bool StrictAlign = false;
-
- // NegativeImmediates - transform instructions with negative immediates
- bool NegativeImmediates = true;
-
// Enable 64-bit vectorization in SLP.
unsigned MinVectorRegisterBitWidth = 64;
- bool OutlineAtomics = false;
- bool PredictableSelectIsExpensive = false;
- bool BalanceFPOps = false;
- bool CustomAsCheapAsMove = false;
- bool ExynosAsCheapAsMove = false;
- bool UsePostRAScheduler = false;
- bool Misaligned128StoreIsSlow = false;
- bool Paired128IsSlow = false;
- bool STRQroIsSlow = false;
- bool UseAlternateSExtLoadCVTF32Pattern = false;
- bool HasArithmeticBccFusion = false;
- bool HasArithmeticCbzFusion = false;
- bool HasCmpBccFusion = false;
- bool HasFuseAddress = false;
- bool HasFuseAES = false;
- bool HasFuseArithmeticLogic = false;
- bool HasFuseCCSelect = false;
- bool HasFuseCryptoEOR = false;
- bool HasFuseLiterals = false;
- bool DisableLatencySchedHeuristic = false;
- bool UseRSqrt = false;
- bool Force32BitJumpTables = false;
- bool UseEL1ForTP = false;
- bool UseEL2ForTP = false;
- bool UseEL3ForTP = false;
- bool AllowTaggedGlobals = false;
- bool HardenSlsRetBr = false;
- bool HardenSlsBlr = false;
- bool HardenSlsNoComdat = false;
+// Bool members corresponding to the SubtargetFeatures defined in tablegen
+#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
+ bool ATTRIBUTE = DEFAULT;
+#include "AArch64GenSubtargetInfo.inc"
+
uint8_t MaxInterleaveFactor = 2;
uint8_t VectorInsertExtractBaseCost = 3;
uint16_t CacheLineSize = 0;
@@ -282,7 +106,6 @@ protected:
unsigned PrefLoopLogAlignment = 0;
unsigned MaxBytesForLoopAlignment = 0;
unsigned MaxJumpTableSize = 0;
- unsigned WideningBaseCost = 0;
// ReserveXRegister[i] - X#i is not available as a general purpose register.
BitVector ReserveXRegister;
@@ -331,6 +154,11 @@ public:
unsigned MinSVEVectorSizeInBitsOverride = 0,
unsigned MaxSVEVectorSizeInBitsOverride = 0);
+// Getters for SubtargetFeatures defined in tablegen
+#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
+ bool GETTER() const { return ATTRIBUTE; }
+#include "AArch64GenSubtargetInfo.inc"
+
const AArch64SelectionDAGInfo *getSelectionDAGInfo() const override {
return &TSInfo;
}
@@ -351,9 +179,7 @@ public:
const RegisterBankInfo *getRegBankInfo() const override;
const Triple &getTargetTriple() const { return TargetTriple; }
bool enableMachineScheduler() const override { return true; }
- bool enablePostRAScheduler() const override {
- return UsePostRAScheduler;
- }
+ bool enablePostRAScheduler() const override { return usePostRAScheduler(); }
/// Returns ARM processor family.
/// Avoid this function! CPU specifics should be kept local to this class
@@ -363,30 +189,6 @@ public:
return ARMProcFamily;
}
- bool hasV8_0aOps() const { return HasV8_0aOps; }
- bool hasV8_1aOps() const { return HasV8_1aOps; }
- bool hasV8_2aOps() const { return HasV8_2aOps; }
- bool hasV8_3aOps() const { return HasV8_3aOps; }
- bool hasV8_4aOps() const { return HasV8_4aOps; }
- bool hasV8_5aOps() const { return HasV8_5aOps; }
- bool hasV9_0aOps() const { return HasV9_0aOps; }
- bool hasV9_1aOps() const { return HasV9_1aOps; }
- bool hasV9_2aOps() const { return HasV9_2aOps; }
- bool hasV9_3aOps() const { return HasV9_3aOps; }
- bool hasV8_0rOps() const { return HasV8_0rOps; }
-
- bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; }
-
- bool hasZeroCycleZeroingGP() const { return HasZeroCycleZeroingGP; }
-
- bool hasZeroCycleZeroingFP() const { return HasZeroCycleZeroingFP; }
-
- bool hasZeroCycleZeroingFPWorkaround() const {
- return HasZeroCycleZeroingFPWorkaround;
- }
-
- bool requiresStrictAlign() const { return StrictAlign; }
-
bool isXRaySupported() const override { return true; }
unsigned getMinVectorRegisterBitWidth() const {
@@ -399,63 +201,16 @@ public:
return CustomCallSavedXRegs[i];
}
bool hasCustomCallingConv() const { return CustomCallSavedXRegs.any(); }
- bool hasFPARMv8() const { return HasFPARMv8; }
- bool hasNEON() const { return HasNEON; }
- bool hasCrypto() const { return HasCrypto; }
- bool hasDotProd() const { return HasDotProd; }
- bool hasCRC() const { return HasCRC; }
- bool hasLSE() const { return HasLSE; }
- bool hasLSE2() const { return HasLSE2; }
- bool hasRAS() const { return HasRAS; }
- bool hasRDM() const { return HasRDM; }
- bool hasSM4() const { return HasSM4; }
- bool hasSHA3() const { return HasSHA3; }
- bool hasSHA2() const { return HasSHA2; }
- bool hasAES() const { return HasAES; }
- bool hasCONTEXTIDREL2() const { return HasCONTEXTIDREL2; }
- bool balanceFPOps() const { return BalanceFPOps; }
- bool predictableSelectIsExpensive() const {
- return PredictableSelectIsExpensive;
- }
- bool hasCustomCheapAsMoveHandling() const { return CustomAsCheapAsMove; }
- bool hasExynosCheapAsMoveHandling() const { return ExynosAsCheapAsMove; }
- bool isMisaligned128StoreSlow() const { return Misaligned128StoreIsSlow; }
- bool isPaired128Slow() const { return Paired128IsSlow; }
- bool isSTRQroSlow() const { return STRQroIsSlow; }
- bool useAlternateSExtLoadCVTF32Pattern() const {
- return UseAlternateSExtLoadCVTF32Pattern;
- }
- bool hasArithmeticBccFusion() const { return HasArithmeticBccFusion; }
- bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; }
- bool hasCmpBccFusion() const { return HasCmpBccFusion; }
- bool hasFuseAddress() const { return HasFuseAddress; }
- bool hasFuseAES() const { return HasFuseAES; }
- bool hasFuseArithmeticLogic() const { return HasFuseArithmeticLogic; }
- bool hasFuseCCSelect() const { return HasFuseCCSelect; }
- bool hasFuseCryptoEOR() const { return HasFuseCryptoEOR; }
- bool hasFuseLiterals() const { return HasFuseLiterals; }
/// Return true if the CPU supports any kind of instruction fusion.
bool hasFusion() const {
return hasArithmeticBccFusion() || hasArithmeticCbzFusion() ||
- hasFuseAES() || hasFuseArithmeticLogic() ||
- hasFuseCCSelect() || hasFuseLiterals();
+ hasFuseAES() || hasFuseArithmeticLogic() || hasFuseCCSelect() ||
+ hasFuseAdrpAdd() || hasFuseLiterals();
}
- bool hardenSlsRetBr() const { return HardenSlsRetBr; }
- bool hardenSlsBlr() const { return HardenSlsBlr; }
- bool hardenSlsNoComdat() const { return HardenSlsNoComdat; }
-
- bool useEL1ForTP() const { return UseEL1ForTP; }
- bool useEL2ForTP() const { return UseEL2ForTP; }
- bool useEL3ForTP() const { return UseEL3ForTP; }
-
- bool useRSqrt() const { return UseRSqrt; }
- bool force32BitJumpTables() const { return Force32BitJumpTables; }
unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
- unsigned getVectorInsertExtractBaseCost() const {
- return VectorInsertExtractBaseCost;
- }
+ unsigned getVectorInsertExtractBaseCost() const;
unsigned getCacheLineSize() const override { return CacheLineSize; }
unsigned getPrefetchDistance() const override { return PrefetchDistance; }
unsigned getMinPrefetchStride(unsigned NumMemAccesses,
@@ -478,60 +233,10 @@ public:
unsigned getMaximumJumpTableSize() const { return MaxJumpTableSize; }
- unsigned getWideningBaseCost() const { return WideningBaseCost; }
-
- bool useExperimentalZeroingPseudos() const {
- return UseExperimentalZeroingPseudos;
- }
-
- bool useScalarIncVL() const { return UseScalarIncVL; }
-
/// CPU has TBI (top byte of addresses is ignored during HW address
/// translation) and OS enables it.
bool supportsAddressTopByteIgnored() const;
- bool hasPerfMon() const { return HasPerfMon; }
- bool hasFullFP16() const { return HasFullFP16; }
- bool hasFP16FML() const { return HasFP16FML; }
- bool hasSPE() const { return HasSPE; }
- bool hasLSLFast() const { return HasLSLFast; }
- bool hasSVE() const { return HasSVE; }
- bool hasSVE2() const { return HasSVE2; }
- bool hasRCPC() const { return HasRCPC; }
- bool hasAggressiveFMA() const { return HasAggressiveFMA; }
- bool hasAlternativeNZCV() const { return HasAlternativeNZCV; }
- bool hasFRInt3264() const { return HasFRInt3264; }
- bool hasSpecRestrict() const { return HasSpecRestrict; }
- bool hasSSBS() const { return HasSSBS; }
- bool hasSB() const { return HasSB; }
- bool hasPredRes() const { return HasPredRes; }
- bool hasCCDP() const { return HasCCDP; }
- bool hasBTI() const { return HasBTI; }
- bool hasRandGen() const { return HasRandGen; }
- bool hasMTE() const { return HasMTE; }
- bool hasTME() const { return HasTME; }
- // Arm SVE2 extensions
- bool hasSVE2AES() const { return HasSVE2AES; }
- bool hasSVE2SM4() const { return HasSVE2SM4; }
- bool hasSVE2SHA3() const { return HasSVE2SHA3; }
- bool hasSVE2BitPerm() const { return HasSVE2BitPerm; }
- bool hasMatMulInt8() const { return HasMatMulInt8; }
- bool hasMatMulFP32() const { return HasMatMulFP32; }
- bool hasMatMulFP64() const { return HasMatMulFP64; }
-
- // Armv8.6-A Extensions
- bool hasBF16() const { return HasBF16; }
- bool hasFineGrainedTraps() const { return HasFineGrainedTraps; }
- bool hasEnhancedCounterVirtualization() const {
- return HasEnhancedCounterVirtualization;
- }
-
- // Arm Scalable Matrix Extension (SME)
- bool hasSME() const { return HasSME; }
- bool hasSMEF64() const { return HasSMEF64; }
- bool hasSMEI64() const { return HasSMEI64; }
- bool hasStreamingSVE() const { return HasStreamingSVE; }
-
bool isLittleEndian() const { return IsLittle; }
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
@@ -552,42 +257,6 @@ public:
bool useAA() const override;
- bool outlineAtomics() const { return OutlineAtomics; }
-
- bool hasVH() const { return HasVH; }
- bool hasPAN() const { return HasPAN; }
- bool hasLOR() const { return HasLOR; }
-
- bool hasPsUAO() const { return HasPsUAO; }
- bool hasPAN_RWV() const { return HasPAN_RWV; }
- bool hasCCPP() const { return HasCCPP; }
-
- bool hasPAuth() const { return HasPAuth; }
- bool hasJS() const { return HasJS; }
- bool hasCCIDX() const { return HasCCIDX; }
- bool hasComplxNum() const { return HasComplxNum; }
-
- bool hasNV() const { return HasNV; }
- bool hasMPAM() const { return HasMPAM; }
- bool hasDIT() const { return HasDIT; }
- bool hasTRACEV8_4() const { return HasTRACEV8_4; }
- bool hasAM() const { return HasAM; }
- bool hasAMVS() const { return HasAMVS; }
- bool hasXS() const { return HasXS; }
- bool hasWFxT() const { return HasWFxT; }
- bool hasHCX() const { return HasHCX; }
- bool hasLS64() const { return HasLS64; }
- bool hasSEL2() const { return HasSEL2; }
- bool hasTLB_RMI() const { return HasTLB_RMI; }
- bool hasFlagM() const { return HasFlagM; }
- bool hasRCPC_IMMO() const { return HasRCPC_IMMO; }
- bool hasEL2VMSA() const { return HasEL2VMSA; }
- bool hasEL3() const { return HasEL3; }
- bool hasHBC() const { return HasHBC; }
- bool hasMOPS() const { return HasMOPS; }
-
- bool fixCortexA53_835769() const { return FixCortexA53_835769; }
-
bool addrSinkUsingGEPs() const override {
// Keeping GEPs inbounds is important for exploiting AArch64
// addressing-modes in ILP32 mode.
@@ -623,8 +292,6 @@ public:
bool enableEarlyIfConversion() const override;
- bool enableAdvancedRASplitCost() const override { return false; }
-
std::unique_ptr<PBQPRAConstraint> getCustomPBQPConstraints() const override;
bool isCallingConvWin64(CallingConv::ID CC) const {
diff --git a/llvm/lib/Target/AArch64/AArch64SystemOperands.td b/llvm/lib/Target/AArch64/AArch64SystemOperands.td
index cce5813fe6e9..f3788175c48d 100644
--- a/llvm/lib/Target/AArch64/AArch64SystemOperands.td
+++ b/llvm/lib/Target/AArch64/AArch64SystemOperands.td
@@ -18,23 +18,23 @@ include "llvm/TableGen/SearchableTable.td"
//===----------------------------------------------------------------------===//
def HasCCPP : Predicate<"Subtarget->hasCCPP()">,
- AssemblerPredicate<(all_of FeatureCCPP), "ccpp">;
+ AssemblerPredicateWithAll<(all_of FeatureCCPP), "ccpp">;
def HasPAN : Predicate<"Subtarget->hasPAN()">,
- AssemblerPredicate<(all_of FeaturePAN),
+ AssemblerPredicateWithAll<(all_of FeaturePAN),
"ARM v8.1 Privileged Access-Never extension">;
def HasPsUAO : Predicate<"Subtarget->hasPsUAO()">,
- AssemblerPredicate<(all_of FeaturePsUAO),
+ AssemblerPredicateWithAll<(all_of FeaturePsUAO),
"ARM v8.2 UAO PState extension (psuao)">;
def HasPAN_RWV : Predicate<"Subtarget->hasPAN_RWV()">,
- AssemblerPredicate<(all_of FeaturePAN_RWV),
+ AssemblerPredicateWithAll<(all_of FeaturePAN_RWV),
"ARM v8.2 PAN AT S1E1R and AT S1E1W Variation">;
def HasCONTEXTIDREL2
: Predicate<"Subtarget->hasCONTEXTIDREL2()">,
- AssemblerPredicate<(all_of FeatureCONTEXTIDREL2),
+ AssemblerPredicateWithAll<(all_of FeatureCONTEXTIDREL2),
"Target contains CONTEXTIDR_EL2 RW operand">;
//===----------------------------------------------------------------------===//
@@ -631,6 +631,7 @@ def : ROSysReg<"OSLSR_EL1", 0b10, 0b000, 0b0001, 0b0001, 0b100>;
def : ROSysReg<"DBGAUTHSTATUS_EL1", 0b10, 0b000, 0b0111, 0b1110, 0b110>;
def : ROSysReg<"PMCEID0_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b110>;
def : ROSysReg<"PMCEID1_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b111>;
+def : ROSysReg<"PMMIR_EL1", 0b11, 0b000, 0b1001, 0b1110, 0b110>;
def : ROSysReg<"MIDR_EL1", 0b11, 0b000, 0b0000, 0b0000, 0b000>;
def : ROSysReg<"CCSIDR_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b000>;
@@ -977,7 +978,6 @@ def : RWSysReg<"PMUSERENR_EL0", 0b11, 0b011, 0b1001, 0b1110, 0b000>;
def : RWSysReg<"PMINTENSET_EL1", 0b11, 0b000, 0b1001, 0b1110, 0b001>;
def : RWSysReg<"PMINTENCLR_EL1", 0b11, 0b000, 0b1001, 0b1110, 0b010>;
def : RWSysReg<"PMOVSSET_EL0", 0b11, 0b011, 0b1001, 0b1110, 0b011>;
-def : RWSysReg<"PMMIR_EL1", 0b11, 0b000, 0b1001, 0b1110, 0b110>;
def : RWSysReg<"MAIR_EL1", 0b11, 0b000, 0b1010, 0b0010, 0b000>;
def : RWSysReg<"MAIR_EL2", 0b11, 0b100, 0b1010, 0b0010, 0b000>;
def : RWSysReg<"MAIR_EL3", 0b11, 0b110, 0b1010, 0b0010, 0b000>;
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 4af28fc070dd..3f9795f5198b 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -12,6 +12,7 @@
#include "AArch64TargetMachine.h"
#include "AArch64.h"
#include "AArch64MachineFunctionInfo.h"
+#include "AArch64MachineScheduler.h"
#include "AArch64MacroFusion.h"
#include "AArch64Subtarget.h"
#include "AArch64TargetObjectFile.h"
@@ -21,7 +22,9 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/CFIFixup.h"
#include "llvm/CodeGen/CSEConfigBase.h"
+#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
@@ -31,6 +34,7 @@
#include "llvm/CodeGen/MIRParser/MIParser.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
@@ -59,6 +63,11 @@ static cl::opt<bool>
cl::desc("Enable the conditional branch tuning pass"),
cl::init(true), cl::Hidden);
+static cl::opt<bool> EnableAArch64CopyPropagation(
+ "aarch64-enable-copy-propagation",
+ cl::desc("Enable the copy propagation with AArch64 copy instr"),
+ cl::init(true), cl::Hidden);
+
static cl::opt<bool> EnableMCR("aarch64-enable-mcr",
cl::desc("Enable the machine combiner pass"),
cl::init(true), cl::Hidden);
@@ -265,7 +274,7 @@ static Reloc::Model getEffectiveRelocModel(const Triple &TT,
// On ELF platforms the default static relocation model has a smart enough
// linker to cope with referencing external symbols defined in a shared
// library. Hence DynamicNoPIC doesn't need to be promoted to PIC.
- if (!RM.hasValue() || *RM == Reloc::DynamicNoPIC)
+ if (!RM || *RM == Reloc::DynamicNoPIC)
return Reloc::Static;
return *RM;
}
@@ -354,6 +363,10 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT,
// AArch64 supports the debug entry values.
setSupportsDebugEntryValues(true);
+
+ // AArch64 supports fixing up the DWARF unwind information.
+ if (!getMCAsmInfo()->usesWindowsCFI())
+ setCFIFixup(true);
}
AArch64TargetMachine::~AArch64TargetMachine() = default;
@@ -379,7 +392,7 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
if (VScaleRangeAttr.isValid()) {
Optional<unsigned> VScaleMax = VScaleRangeAttr.getVScaleRangeMax();
MinSVEVectorSize = VScaleRangeAttr.getVScaleRangeMin() * 128;
- MaxSVEVectorSize = VScaleMax ? VScaleMax.getValue() * 128 : 0;
+ MaxSVEVectorSize = VScaleMax ? *VScaleMax * 128 : 0;
} else {
MinSVEVectorSize = SVEVectorBitsMinOpt;
MaxSVEVectorSize = SVEVectorBitsMaxOpt;
@@ -468,15 +481,17 @@ public:
ScheduleDAGInstrs *
createPostMachineScheduler(MachineSchedContext *C) const override {
const AArch64Subtarget &ST = C->MF->getSubtarget<AArch64Subtarget>();
+ ScheduleDAGMI *DAG =
+ new ScheduleDAGMI(C, std::make_unique<AArch64PostRASchedStrategy>(C),
+ /* RemoveKillFlags=*/true);
if (ST.hasFusion()) {
// Run the Macro Fusion after RA again since literals are expanded from
// pseudos then (v. addPreSched2()).
- ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
DAG->addMutation(createAArch64MacroFusionDAGMutation());
return DAG;
}
- return nullptr;
+ return DAG;
}
void addIRPasses() override;
@@ -504,7 +519,7 @@ public:
} // end anonymous namespace
TargetTransformInfo
-AArch64TargetMachine::getTargetTransformInfo(const Function &F) {
+AArch64TargetMachine::getTargetTransformInfo(const Function &F) const {
return TargetTransformInfo(AArch64TTIImpl(this, F));
}
@@ -531,6 +546,7 @@ void AArch64PassConfig::addIRPasses() {
if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
addPass(createCFGSimplificationPass(SimplifyCFGOptions()
.forwardSwitchCondToPhi(true)
+ .convertSwitchRangeToICmp(true)
.convertSwitchToLookupTable(true)
.needCanonicalLoops(false)
.hoistCommonInsts(true)
@@ -574,6 +590,9 @@ void AArch64PassConfig::addIRPasses() {
// Add Control Flow Guard checks.
if (TM->getTargetTriple().isOSWindows())
addPass(createCFGuardCheckPass());
+
+ if (TM->Options.JMCInstrument)
+ addPass(createJMCInstrumenterPass());
}
// Pass Pipeline Configuration
@@ -759,6 +778,10 @@ void AArch64PassConfig::addPreEmitPass() {
if (TM->getOptLevel() >= CodeGenOpt::Aggressive && EnableLoadStoreOpt)
addPass(createAArch64LoadStoreOptimizationPass());
+ if (TM->getOptLevel() >= CodeGenOpt::Aggressive &&
+ EnableAArch64CopyPropagation)
+ addPass(createMachineCopyPropagationPass(true));
+
addPass(createAArch64A53Fix835769());
if (EnableBranchTargets)
@@ -804,8 +827,7 @@ AArch64TargetMachine::convertFuncInfoToYAML(const MachineFunction &MF) const {
bool AArch64TargetMachine::parseMachineFunctionInfo(
const yaml::MachineFunctionInfo &MFI, PerFunctionMIParsingState &PFS,
SMDiagnostic &Error, SMRange &SourceRange) const {
- const auto &YamlMFI =
- reinterpret_cast<const yaml::AArch64FunctionInfo &>(MFI);
+ const auto &YamlMFI = static_cast<const yaml::AArch64FunctionInfo &>(MFI);
MachineFunction &MF = PFS.MF;
MF.getInfo<AArch64FunctionInfo>()->initializeBaseYamlFields(YamlMFI);
return false;
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.h b/llvm/lib/Target/AArch64/AArch64TargetMachine.h
index 7d314bce99b1..beb109502ff9 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.h
@@ -41,7 +41,7 @@ public:
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
- TargetTransformInfo getTargetTransformInfo(const Function &F) override;
+ TargetTransformInfo getTargetTransformInfo(const Function &F) const override;
TargetLoweringObjectFile* getObjFileLowering() const override {
return TLOF.get();
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index b2ffdf949d8b..41c7a8c5042f 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -8,6 +8,7 @@
#include "AArch64TargetTransformInfo.h"
#include "AArch64ExpandImm.h"
+#include "AArch64PerfectShuffle.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -15,8 +16,8 @@
#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/CodeGen/CostTable.h"
#include "llvm/CodeGen/TargetLowering.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"
@@ -50,6 +51,12 @@ bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
return (CallerBits & CalleeBits) == CalleeBits;
}
+bool AArch64TTIImpl::shouldMaximizeVectorBandwidth(
+ TargetTransformInfo::RegisterKind K) const {
+ assert(K != TargetTransformInfo::RGK_Scalar);
+ return K == TargetTransformInfo::RGK_FixedWidthVector;
+}
+
/// Calculate the cost of materializing a 64-bit value. This helper
/// method might only calculate a fraction of a larger immediate. Therefore it
/// is valid to return a cost of ZERO.
@@ -370,6 +377,49 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
return Entry->Cost;
break;
}
+ case Intrinsic::fptosi_sat:
+ case Intrinsic::fptoui_sat: {
+ if (ICA.getArgTypes().empty())
+ break;
+ bool IsSigned = ICA.getID() == Intrinsic::fptosi_sat;
+ auto LT = TLI->getTypeLegalizationCost(DL, ICA.getArgTypes()[0]);
+ EVT MTy = TLI->getValueType(DL, RetTy);
+ // Check for the legal types, which are where the size of the input and the
+ // output are the same, or we are using cvt f64->i32 or f32->i64.
+ if ((LT.second == MVT::f32 || LT.second == MVT::f64 ||
+ LT.second == MVT::v2f32 || LT.second == MVT::v4f32 ||
+ LT.second == MVT::v2f64) &&
+ (LT.second.getScalarSizeInBits() == MTy.getScalarSizeInBits() ||
+ (LT.second == MVT::f64 && MTy == MVT::i32) ||
+ (LT.second == MVT::f32 && MTy == MVT::i64)))
+ return LT.first;
+ // Similarly for fp16 sizes
+ if (ST->hasFullFP16() &&
+ ((LT.second == MVT::f16 && MTy == MVT::i32) ||
+ ((LT.second == MVT::v4f16 || LT.second == MVT::v8f16) &&
+ (LT.second.getScalarSizeInBits() == MTy.getScalarSizeInBits()))))
+ return LT.first;
+
+ // Otherwise we use a legal convert followed by a min+max
+ if ((LT.second.getScalarType() == MVT::f32 ||
+ LT.second.getScalarType() == MVT::f64 ||
+ (ST->hasFullFP16() && LT.second.getScalarType() == MVT::f16)) &&
+ LT.second.getScalarSizeInBits() >= MTy.getScalarSizeInBits()) {
+ Type *LegalTy =
+ Type::getIntNTy(RetTy->getContext(), LT.second.getScalarSizeInBits());
+ if (LT.second.isVector())
+ LegalTy = VectorType::get(LegalTy, LT.second.getVectorElementCount());
+ InstructionCost Cost = 1;
+ IntrinsicCostAttributes Attrs1(IsSigned ? Intrinsic::smin : Intrinsic::umin,
+ LegalTy, {LegalTy, LegalTy});
+ Cost += getIntrinsicInstrCost(Attrs1, CostKind);
+ IntrinsicCostAttributes Attrs2(IsSigned ? Intrinsic::smax : Intrinsic::umax,
+ LegalTy, {LegalTy, LegalTy});
+ Cost += getIntrinsicInstrCost(Attrs2, CostKind);
+ return LT.first * Cost;
+ }
+ break;
+ }
default:
break;
}
@@ -525,6 +575,14 @@ static Optional<Instruction *> instCombineConvertFromSVBool(InstCombiner &IC,
return IC.replaceInstUsesWith(II, EarliestReplacement);
}
+static Optional<Instruction *> instCombineSVESel(InstCombiner &IC,
+ IntrinsicInst &II) {
+ IRBuilder<> Builder(&II);
+ auto Select = Builder.CreateSelect(II.getOperand(0), II.getOperand(1),
+ II.getOperand(2));
+ return IC.replaceInstUsesWith(II, Select);
+}
+
static Optional<Instruction *> instCombineSVEDup(InstCombiner &IC,
IntrinsicInst &II) {
IntrinsicInst *Pg = dyn_cast<IntrinsicInst>(II.getArgOperand(1));
@@ -594,8 +652,7 @@ static Optional<Instruction *> instCombineSVECmpNE(InstCombiner &IC,
return None;
auto *VecIns = dyn_cast<IntrinsicInst>(DupQLane->getArgOperand(0));
- if (!VecIns ||
- VecIns->getIntrinsicID() != Intrinsic::experimental_vector_insert)
+ if (!VecIns || VecIns->getIntrinsicID() != Intrinsic::vector_insert)
return None;
// Where the vector insert is a fixed constant vector insert into undef at
@@ -862,12 +919,14 @@ instCombineSVELD1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) {
if (isAllActivePredicate(Pred)) {
LoadInst *Load = Builder.CreateLoad(VecTy, VecPtr);
+ Load->copyMetadata(II);
return IC.replaceInstUsesWith(II, Load);
}
CallInst *MaskedLoad =
Builder.CreateMaskedLoad(VecTy, VecPtr, PtrOp->getPointerAlignment(DL),
Pred, ConstantAggregateZero::get(VecTy));
+ MaskedLoad->copyMetadata(II);
return IC.replaceInstUsesWith(II, MaskedLoad);
}
@@ -883,12 +942,14 @@ instCombineSVEST1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) {
Builder.CreateBitCast(PtrOp, VecOp->getType()->getPointerTo());
if (isAllActivePredicate(Pred)) {
- Builder.CreateStore(VecOp, VecPtr);
+ StoreInst *Store = Builder.CreateStore(VecOp, VecPtr);
+ Store->copyMetadata(II);
return IC.eraseInstFromFunction(II);
}
- Builder.CreateMaskedStore(VecOp, VecPtr, PtrOp->getPointerAlignment(DL),
- Pred);
+ CallInst *MaskedStore = Builder.CreateMaskedStore(
+ VecOp, VecPtr, PtrOp->getPointerAlignment(DL), Pred);
+ MaskedStore->copyMetadata(II);
return IC.eraseInstFromFunction(II);
}
@@ -1069,7 +1130,6 @@ static Optional<Instruction *> instCombineLD1GatherIndex(InstCombiner &IC,
Value *BasePtr = II.getOperand(1);
Value *Index = II.getOperand(2);
Type *Ty = II.getType();
- Type *BasePtrTy = BasePtr->getType();
Value *PassThru = ConstantAggregateZero::get(Ty);
// Contiguous gather => masked load.
@@ -1085,8 +1145,8 @@ static Optional<Instruction *> instCombineLD1GatherIndex(InstCombiner &IC,
BasePtr->getPointerAlignment(II.getModule()->getDataLayout());
Type *VecPtrTy = PointerType::getUnqual(Ty);
- Value *Ptr = Builder.CreateGEP(BasePtrTy->getPointerElementType(), BasePtr,
- IndexBase);
+ Value *Ptr = Builder.CreateGEP(
+ cast<VectorType>(Ty)->getElementType(), BasePtr, IndexBase);
Ptr = Builder.CreateBitCast(Ptr, VecPtrTy);
CallInst *MaskedLoad =
Builder.CreateMaskedLoad(Ty, Ptr, Alignment, Mask, PassThru);
@@ -1104,10 +1164,9 @@ static Optional<Instruction *> instCombineST1ScatterIndex(InstCombiner &IC,
Value *BasePtr = II.getOperand(2);
Value *Index = II.getOperand(3);
Type *Ty = Val->getType();
- Type *BasePtrTy = BasePtr->getType();
// Contiguous scatter => masked store.
- // (sve.ld1.scatter.index Value Mask BasePtr (sve.index IndexBase 1))
+ // (sve.st1.scatter.index Value Mask BasePtr (sve.index IndexBase 1))
// => (masked.store Value (gep BasePtr IndexBase) Align Mask)
Value *IndexBase;
if (match(Index, m_Intrinsic<Intrinsic::aarch64_sve_index>(
@@ -1118,8 +1177,8 @@ static Optional<Instruction *> instCombineST1ScatterIndex(InstCombiner &IC,
Align Alignment =
BasePtr->getPointerAlignment(II.getModule()->getDataLayout());
- Value *Ptr = Builder.CreateGEP(BasePtrTy->getPointerElementType(), BasePtr,
- IndexBase);
+ Value *Ptr = Builder.CreateGEP(
+ cast<VectorType>(Ty)->getElementType(), BasePtr, IndexBase);
Type *VecPtrTy = PointerType::getUnqual(Ty);
Ptr = Builder.CreateBitCast(Ptr, VecPtrTy);
@@ -1165,6 +1224,52 @@ static Optional<Instruction *> instCombineSVESDIV(InstCombiner &IC,
return None;
}
+static Optional<Instruction *> instCombineMaxMinNM(InstCombiner &IC,
+ IntrinsicInst &II) {
+ Value *A = II.getArgOperand(0);
+ Value *B = II.getArgOperand(1);
+ if (A == B)
+ return IC.replaceInstUsesWith(II, A);
+
+ return None;
+}
+
+static Optional<Instruction *> instCombineSVESrshl(InstCombiner &IC,
+ IntrinsicInst &II) {
+ IRBuilder<> Builder(&II);
+ Value *Pred = II.getOperand(0);
+ Value *Vec = II.getOperand(1);
+ Value *Shift = II.getOperand(2);
+
+ // Convert SRSHL into the simpler LSL intrinsic when fed by an ABS intrinsic.
+ Value *AbsPred, *MergedValue;
+ if (!match(Vec, m_Intrinsic<Intrinsic::aarch64_sve_sqabs>(
+ m_Value(MergedValue), m_Value(AbsPred), m_Value())) &&
+ !match(Vec, m_Intrinsic<Intrinsic::aarch64_sve_abs>(
+ m_Value(MergedValue), m_Value(AbsPred), m_Value())))
+
+ return None;
+
+ // Transform is valid if any of the following are true:
+ // * The ABS merge value is an undef or non-negative
+ // * The ABS predicate is all active
+ // * The ABS predicate and the SRSHL predicates are the same
+ if (!isa<UndefValue>(MergedValue) &&
+ !match(MergedValue, m_NonNegative()) &&
+ AbsPred != Pred && !isAllActivePredicate(AbsPred))
+ return None;
+
+ // Only valid when the shift amount is non-negative, otherwise the rounding
+ // behaviour of SRSHL cannot be ignored.
+ if (!match(Shift, m_NonNegative()))
+ return None;
+
+ auto LSL = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_lsl, {II.getType()},
+ {Pred, Vec, Shift});
+
+ return IC.replaceInstUsesWith(II, LSL);
+}
+
Optional<Instruction *>
AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
IntrinsicInst &II) const {
@@ -1172,6 +1277,9 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
switch (IID) {
default:
break;
+ case Intrinsic::aarch64_neon_fmaxnm:
+ case Intrinsic::aarch64_neon_fminnm:
+ return instCombineMaxMinNM(IC, II);
case Intrinsic::aarch64_sve_convert_from_svbool:
return instCombineConvertFromSVBool(IC, II);
case Intrinsic::aarch64_sve_dup:
@@ -1227,6 +1335,10 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
return instCombineSVEST1(IC, II, DL);
case Intrinsic::aarch64_sve_sdiv:
return instCombineSVESDIV(IC, II);
+ case Intrinsic::aarch64_sve_sel:
+ return instCombineSVESel(IC, II);
+ case Intrinsic::aarch64_sve_srshl:
+ return instCombineSVESrshl(IC, II);
}
return None;
@@ -1262,7 +1374,7 @@ bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
ArrayRef<const Value *> Args) {
// A helper that returns a vector type from the given type. The number of
- // elements in type Ty determine the vector width.
+ // elements in type Ty determines the vector width.
auto toVectorTy = [&](Type *ArgTy) {
return VectorType::get(ArgTy->getScalarType(),
cast<VectorType>(DstTy)->getElementCount());
@@ -1277,26 +1389,32 @@ bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
// "long" (e.g., usubl) and "wide" (e.g., usubw) versions of the
// instructions.
//
- // TODO: Add additional widening operations (e.g., mul, shl, etc.) once we
+ // TODO: Add additional widening operations (e.g., shl, etc.) once we
// verify that their extending operands are eliminated during code
// generation.
switch (Opcode) {
case Instruction::Add: // UADDL(2), SADDL(2), UADDW(2), SADDW(2).
case Instruction::Sub: // USUBL(2), SSUBL(2), USUBW(2), SSUBW(2).
+ case Instruction::Mul: // SMULL(2), UMULL(2)
break;
default:
return false;
}
// To be a widening instruction (either the "wide" or "long" versions), the
- // second operand must be a sign- or zero extend having a single user. We
- // only consider extends having a single user because they may otherwise not
- // be eliminated.
+ // second operand must be a sign- or zero extend.
if (Args.size() != 2 ||
- (!isa<SExtInst>(Args[1]) && !isa<ZExtInst>(Args[1])) ||
- !Args[1]->hasOneUse())
+ (!isa<SExtInst>(Args[1]) && !isa<ZExtInst>(Args[1])))
return false;
auto *Extend = cast<CastInst>(Args[1]);
+ auto *Arg0 = dyn_cast<CastInst>(Args[0]);
+
+ // A mul only has a mull version (not like addw). Both operands need to be
+ // extending and the same type.
+ if (Opcode == Instruction::Mul &&
+ (!Arg0 || Arg0->getOpcode() != Extend->getOpcode() ||
+ Arg0->getOperand(0)->getType() != Extend->getOperand(0)->getType()))
+ return false;
// Legalize the destination type and ensure it can be used in a widening
// operation.
@@ -1334,7 +1452,7 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
// If the cast is observable, and it is used by a widening instruction (e.g.,
// uaddl, saddw, etc.), it may be free.
- if (I && I->hasOneUse()) {
+ if (I && I->hasOneUser()) {
auto *SingleUser = cast<Instruction>(*I->user_begin());
SmallVector<const Value *, 4> Operands(SingleUser->operand_values());
if (isWideningInstruction(Dst, SingleUser->getOpcode(), Operands)) {
@@ -1606,6 +1724,36 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
SrcTy.getSimpleVT()))
return AdjustCost(Entry->Cost);
+ static const TypeConversionCostTblEntry FP16Tbl[] = {
+ {ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f16, 1}, // fcvtzs
+ {ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f16, 1},
+ {ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f16, 1}, // fcvtzs
+ {ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f16, 1},
+ {ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f16, 2}, // fcvtl+fcvtzs
+ {ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f16, 2},
+ {ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f16, 2}, // fcvtzs+xtn
+ {ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f16, 2},
+ {ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f16, 1}, // fcvtzs
+ {ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f16, 1},
+ {ISD::FP_TO_SINT, MVT::v8i32, MVT::v8f16, 4}, // 2*fcvtl+2*fcvtzs
+ {ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f16, 4},
+ {ISD::FP_TO_SINT, MVT::v16i8, MVT::v16f16, 3}, // 2*fcvtzs+xtn
+ {ISD::FP_TO_UINT, MVT::v16i8, MVT::v16f16, 3},
+ {ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f16, 2}, // 2*fcvtzs
+ {ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f16, 2},
+ {ISD::FP_TO_SINT, MVT::v16i32, MVT::v16f16, 8}, // 4*fcvtl+4*fcvtzs
+ {ISD::FP_TO_UINT, MVT::v16i32, MVT::v16f16, 8},
+ {ISD::UINT_TO_FP, MVT::v8f16, MVT::v8i8, 2}, // ushll + ucvtf
+ {ISD::SINT_TO_FP, MVT::v8f16, MVT::v8i8, 2}, // sshll + scvtf
+ {ISD::UINT_TO_FP, MVT::v16f16, MVT::v16i8, 4}, // 2 * ushl(2) + 2 * ucvtf
+ {ISD::SINT_TO_FP, MVT::v16f16, MVT::v16i8, 4}, // 2 * sshl(2) + 2 * scvtf
+ };
+
+ if (ST->hasFullFP16())
+ if (const auto *Entry = ConvertCostTableLookup(
+ FP16Tbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()))
+ return AdjustCost(Entry->Cost);
+
return AdjustCost(
BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));
}
@@ -1723,24 +1871,12 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
// Legalize the type.
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
-
- // If the instruction is a widening instruction (e.g., uaddl, saddw, etc.),
- // add in the widening overhead specified by the sub-target. Since the
- // extends feeding widening instructions are performed automatically, they
- // aren't present in the generated code and have a zero cost. By adding a
- // widening overhead here, we attach the total cost of the combined operation
- // to the widening instruction.
- InstructionCost Cost = 0;
- if (isWideningInstruction(Ty, Opcode, Args))
- Cost += ST->getWideningBaseCost();
-
int ISD = TLI->InstructionOpcodeToISD(Opcode);
switch (ISD) {
default:
- return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
- Opd2Info,
- Opd1PropInfo, Opd2PropInfo);
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
+ Opd2Info, Opd1PropInfo, Opd2PropInfo);
case ISD::SDIV:
if (Opd2Info == TargetTransformInfo::OK_UniformConstantValue &&
Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
@@ -1748,26 +1884,22 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
// normally expanded to the sequence ADD + CMP + SELECT + SRA.
// The OperandValue properties many not be same as that of previous
// operation; conservatively assume OP_None.
- Cost += getArithmeticInstrCost(Instruction::Add, Ty, CostKind,
- Opd1Info, Opd2Info,
- TargetTransformInfo::OP_None,
- TargetTransformInfo::OP_None);
- Cost += getArithmeticInstrCost(Instruction::Sub, Ty, CostKind,
- Opd1Info, Opd2Info,
- TargetTransformInfo::OP_None,
+ InstructionCost Cost = getArithmeticInstrCost(
+ Instruction::Add, Ty, CostKind, Opd1Info, Opd2Info,
+ TargetTransformInfo::OP_None, TargetTransformInfo::OP_None);
+ Cost += getArithmeticInstrCost(Instruction::Sub, Ty, CostKind, Opd1Info,
+ Opd2Info, TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None);
- Cost += getArithmeticInstrCost(Instruction::Select, Ty, CostKind,
- Opd1Info, Opd2Info,
- TargetTransformInfo::OP_None,
- TargetTransformInfo::OP_None);
- Cost += getArithmeticInstrCost(Instruction::AShr, Ty, CostKind,
- Opd1Info, Opd2Info,
- TargetTransformInfo::OP_None,
+ Cost += getArithmeticInstrCost(
+ Instruction::Select, Ty, CostKind, Opd1Info, Opd2Info,
+ TargetTransformInfo::OP_None, TargetTransformInfo::OP_None);
+ Cost += getArithmeticInstrCost(Instruction::AShr, Ty, CostKind, Opd1Info,
+ Opd2Info, TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None);
return Cost;
}
LLVM_FALLTHROUGH;
- case ISD::UDIV:
+ case ISD::UDIV: {
if (Opd2Info == TargetTransformInfo::OK_UniformConstantValue) {
auto VT = TLI->getValueType(DL, Ty);
if (TLI->isOperationLegalOrCustom(ISD::MULHU, VT)) {
@@ -1787,9 +1919,8 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
}
}
- Cost += BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
- Opd2Info,
- Opd1PropInfo, Opd2PropInfo);
+ InstructionCost Cost = BaseT::getArithmeticInstrCost(
+ Opcode, Ty, CostKind, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo);
if (Ty->isVectorTy()) {
// On AArch64, vector divisions are not supported natively and are
// expanded into scalar divisions of each pair of elements.
@@ -1804,27 +1935,31 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
Cost += Cost;
}
return Cost;
-
+ }
case ISD::MUL:
- if (LT.second != MVT::v2i64)
- return (Cost + 1) * LT.first;
// Since we do not have a MUL.2d instruction, a mul <2 x i64> is expensive
// as elements are extracted from the vectors and the muls scalarized.
// As getScalarizationOverhead is a bit too pessimistic, we estimate the
// cost for a i64 vector directly here, which is:
- // - four i64 extracts,
- // - two i64 inserts, and
- // - two muls.
- // So, for a v2i64 with LT.First = 1 the cost is 8, and for a v4i64 with
- // LT.first = 2 the cost is 16.
- return LT.first * 8;
+ // - four 2-cost i64 extracts,
+ // - two 2-cost i64 inserts, and
+ // - two 1-cost muls.
+ // So, for a v2i64 with LT.First = 1 the cost is 14, and for a v4i64 with
+ // LT.first = 2 the cost is 28. If both operands are extensions it will not
+ // need to scalarize so the cost can be cheaper (smull or umull).
+ if (LT.second != MVT::v2i64 || isWideningInstruction(Ty, Opcode, Args))
+ return LT.first;
+ return LT.first * 14;
case ISD::ADD:
case ISD::XOR:
case ISD::OR:
case ISD::AND:
+ case ISD::SRL:
+ case ISD::SRA:
+ case ISD::SHL:
// These nodes are marked as 'custom' for combining purposes only.
// We know that they are legal. See LowerAdd in ISelLowering.
- return (Cost + 1) * LT.first;
+ return LT.first;
case ISD::FADD:
case ISD::FSUB:
@@ -1834,11 +1969,10 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
// These nodes are marked as 'custom' just to lower them to SVE.
// We know said lowering will incur no additional cost.
if (!Ty->getScalarType()->isFP128Ty())
- return (Cost + 2) * LT.first;
+ return 2 * LT.first;
- return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
- Opd2Info,
- Opd1PropInfo, Opd2PropInfo);
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
+ Opd2Info, Opd1PropInfo, Opd2PropInfo);
}
}
@@ -1946,6 +2080,10 @@ AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
return Options;
}
+bool AArch64TTIImpl::prefersVectorizedAddressing() const {
+ return ST->hasSVE();
+}
+
InstructionCost
AArch64TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
Align Alignment, unsigned AddressSpace,
@@ -2559,11 +2697,97 @@ InstructionCost AArch64TTIImpl::getSpliceCost(VectorType *Tp, int Index) {
InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
VectorType *Tp,
ArrayRef<int> Mask, int Index,
- VectorType *SubTp) {
+ VectorType *SubTp,
+ ArrayRef<const Value *> Args) {
+ std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+ // If we have a Mask, and the LT is being legalized somehow, split the Mask
+ // into smaller vectors and sum the cost of each shuffle.
+ if (!Mask.empty() && isa<FixedVectorType>(Tp) && LT.second.isVector() &&
+ Tp->getScalarSizeInBits() == LT.second.getScalarSizeInBits() &&
+ cast<FixedVectorType>(Tp)->getNumElements() >
+ LT.second.getVectorNumElements() &&
+ !Index && !SubTp) {
+ unsigned TpNumElts = cast<FixedVectorType>(Tp)->getNumElements();
+ assert(Mask.size() == TpNumElts && "Expected Mask and Tp size to match!");
+ unsigned LTNumElts = LT.second.getVectorNumElements();
+ unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts;
+ VectorType *NTp =
+ VectorType::get(Tp->getScalarType(), LT.second.getVectorElementCount());
+ InstructionCost Cost;
+ for (unsigned N = 0; N < NumVecs; N++) {
+ SmallVector<int> NMask;
+ // Split the existing mask into chunks of size LTNumElts. Track the source
+ // sub-vectors to ensure the result has at most 2 inputs.
+ unsigned Source1, Source2;
+ unsigned NumSources = 0;
+ for (unsigned E = 0; E < LTNumElts; E++) {
+ int MaskElt = (N * LTNumElts + E < TpNumElts) ? Mask[N * LTNumElts + E]
+ : UndefMaskElem;
+ if (MaskElt < 0) {
+ NMask.push_back(UndefMaskElem);
+ continue;
+ }
+
+ // Calculate which source from the input this comes from and whether it
+ // is new to us.
+ unsigned Source = MaskElt / LTNumElts;
+ if (NumSources == 0) {
+ Source1 = Source;
+ NumSources = 1;
+ } else if (NumSources == 1 && Source != Source1) {
+ Source2 = Source;
+ NumSources = 2;
+ } else if (NumSources >= 2 && Source != Source1 && Source != Source2) {
+ NumSources++;
+ }
+
+ // Add to the new mask. For the NumSources>2 case these are not correct,
+ // but are only used for the modular lane number.
+ if (Source == Source1)
+ NMask.push_back(MaskElt % LTNumElts);
+ else if (Source == Source2)
+ NMask.push_back(MaskElt % LTNumElts + LTNumElts);
+ else
+ NMask.push_back(MaskElt % LTNumElts);
+ }
+ // If the sub-mask has at most 2 input sub-vectors then re-cost it using
+ // getShuffleCost. If not then cost it using the worst case.
+ if (NumSources <= 2)
+ Cost += getShuffleCost(NumSources <= 1 ? TTI::SK_PermuteSingleSrc
+ : TTI::SK_PermuteTwoSrc,
+ NTp, NMask, 0, nullptr, Args);
+ else if (any_of(enumerate(NMask), [&](const auto &ME) {
+ return ME.value() % LTNumElts == ME.index();
+ }))
+ Cost += LTNumElts - 1;
+ else
+ Cost += LTNumElts;
+ }
+ return Cost;
+ }
+
Kind = improveShuffleKindFromMask(Kind, Mask);
+
+ // Check for broadcast loads.
+ if (Kind == TTI::SK_Broadcast) {
+ bool IsLoad = !Args.empty() && isa<LoadInst>(Args[0]);
+ if (IsLoad && LT.second.isVector() &&
+ isLegalBroadcastLoad(Tp->getElementType(),
+ LT.second.getVectorElementCount()))
+ return 0; // broadcast is handled by ld1r
+ }
+
+ // If we have 4 elements for the shuffle and a Mask, get the cost straight
+ // from the perfect shuffle tables.
+ if (Mask.size() == 4 && Tp->getElementCount() == ElementCount::getFixed(4) &&
+ (Tp->getScalarSizeInBits() == 16 || Tp->getScalarSizeInBits() == 32) &&
+ all_of(Mask, [](int E) { return E < 8; }))
+ return getPerfectShuffleCost(Mask);
+
if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose ||
Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc ||
Kind == TTI::SK_Reverse) {
+
static const CostTblEntry ShuffleTbl[] = {
// Broadcast shuffle kinds can be performed with 'dup'.
{ TTI::SK_Broadcast, MVT::v8i8, 1 },
@@ -2618,6 +2842,12 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
{ TTI::SK_Reverse, MVT::v2f32, 1 }, // mov.
{ TTI::SK_Reverse, MVT::v4f32, 2 }, // REV64; EXT
{ TTI::SK_Reverse, MVT::v2f64, 1 }, // mov.
+ { TTI::SK_Reverse, MVT::v8f16, 2 }, // REV64; EXT
+ { TTI::SK_Reverse, MVT::v8i16, 2 }, // REV64; EXT
+ { TTI::SK_Reverse, MVT::v16i8, 2 }, // REV64; EXT
+ { TTI::SK_Reverse, MVT::v4f16, 1 }, // REV64
+ { TTI::SK_Reverse, MVT::v4i16, 1 }, // REV64
+ { TTI::SK_Reverse, MVT::v8i8, 1 }, // REV64
// Broadcast shuffle kinds for scalable vectors
{ TTI::SK_Broadcast, MVT::nxv16i8, 1 },
{ TTI::SK_Broadcast, MVT::nxv8i16, 1 },
@@ -2655,11 +2885,26 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
{ TTI::SK_Reverse, MVT::nxv4i1, 1 },
{ TTI::SK_Reverse, MVT::nxv2i1, 1 },
};
- std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
if (const auto *Entry = CostTableLookup(ShuffleTbl, Kind, LT.second))
return LT.first * Entry->Cost;
}
+
if (Kind == TTI::SK_Splice && isa<ScalableVectorType>(Tp))
return getSpliceCost(Tp, Index);
+
+ // Inserting a subvector can often be done with either a D, S or H register
+ // move, so long as the inserted vector is "aligned".
+ if (Kind == TTI::SK_InsertSubvector && LT.second.isFixedLengthVector() &&
+ LT.second.getSizeInBits() <= 128 && SubTp) {
+ std::pair<InstructionCost, MVT> SubLT =
+ TLI->getTypeLegalizationCost(DL, SubTp);
+ if (SubLT.second.isVector()) {
+ int NumElts = LT.second.getVectorNumElements();
+ int NumSubElts = SubLT.second.getVectorNumElements();
+ if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
+ return SubLT.first;
+ }
+ }
+
return BaseT::getShuffleCost(Kind, Tp, Mask, Index, SubTp);
}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index a6029b9f2445..d0aacb457a39 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -135,6 +135,8 @@ public:
return ST->getVScaleForTuning();
}
+ bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const;
+
/// Try to return an estimate cost factor that can be used as a multiplier
/// when scalarizing an operation for a vector with ElementCount \p VF.
/// For scalable vectors this currently takes the most pessimistic view based
@@ -148,6 +150,8 @@ public:
unsigned getMaxInterleaveFactor(unsigned VF);
+ bool prefersVectorizedAddressing() const;
+
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind);
@@ -278,6 +282,23 @@ public:
return isLegalMaskedGatherScatter(DataType);
}
+ bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const {
+ // Return true if we can generate a `ld1r` splat load instruction.
+ if (!ST->hasNEON() || NumElements.isScalable())
+ return false;
+ switch (unsigned ElementBits = ElementTy->getScalarSizeInBits()) {
+ case 8:
+ case 16:
+ case 32:
+ case 64: {
+ // We accept bit-widths >= 64bits and elements {8,16,32,64} bits.
+ unsigned VectorBits = NumElements.getFixedValue() * ElementBits;
+ return VectorBits >= 64;
+ }
+ }
+ return false;
+ }
+
bool isLegalNTStore(Type *DataType, Align Alignment) {
// NOTE: The logic below is mostly geared towards LV, which calls it with
// vectors with 2 elements. We might want to improve that, if other
@@ -330,7 +351,8 @@ public:
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
ArrayRef<int> Mask, int Index,
- VectorType *SubTp);
+ VectorType *SubTp,
+ ArrayRef<const Value *> Args = None);
/// @}
};
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 33ed7ae9780e..ade23f643538 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -127,7 +127,7 @@ private:
return Prefix;
}
- PrefixInfo() : Active(false), Predicated(false) {}
+ PrefixInfo() = default;
bool isActive() const { return Active; }
bool isPredicated() const { return Predicated; }
unsigned getElementSize() const {
@@ -141,8 +141,8 @@ private:
}
private:
- bool Active;
- bool Predicated;
+ bool Active = false;
+ bool Predicated = false;
unsigned ElementSize;
unsigned Dst;
unsigned Pg;
@@ -157,7 +157,8 @@ private:
bool parseSysAlias(StringRef Name, SMLoc NameLoc, OperandVector &Operands);
void createSysAlias(uint16_t Encoding, OperandVector &Operands, SMLoc S);
- AArch64CC::CondCode parseCondCodeString(StringRef Cond);
+ AArch64CC::CondCode parseCondCodeString(StringRef Cond,
+ std::string &Suggestion);
bool parseCondCode(OperandVector &Operands, bool invertCondCode);
unsigned matchRegisterNameAlias(StringRef Name, RegKind Kind);
bool parseRegister(OperandVector &Operands);
@@ -189,6 +190,7 @@ private:
bool parseDirectiveUnreq(SMLoc L);
bool parseDirectiveCFINegateRAState();
bool parseDirectiveCFIBKeyFrame();
+ bool parseDirectiveCFIMTETaggedFrame();
bool parseDirectiveVariantPCS(SMLoc L);
@@ -2425,7 +2427,7 @@ static Optional<std::pair<int, int>> parseVectorKind(StringRef Suffix,
}
static bool isValidVectorKind(StringRef Suffix, RegKind VectorKind) {
- return parseVectorKind(Suffix, VectorKind).hasValue();
+ return parseVectorKind(Suffix, VectorKind).has_value();
}
static unsigned matchSVEDataVectorRegName(StringRef Name) {
@@ -2758,8 +2760,8 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) {
}
auto PRFM = LookupByEncoding(MCE->getValue());
- Operands.push_back(AArch64Operand::CreatePrefetch(
- prfop, PRFM.getValueOr(""), S, getContext()));
+ Operands.push_back(AArch64Operand::CreatePrefetch(prfop, PRFM.value_or(""),
+ S, getContext()));
return MatchOperand_Success;
}
@@ -3029,8 +3031,10 @@ AArch64AsmParser::tryParseImmWithOptionalShift(OperandVector &Operands) {
return MatchOperand_Success;
}
-/// parseCondCodeString - Parse a Condition Code string.
-AArch64CC::CondCode AArch64AsmParser::parseCondCodeString(StringRef Cond) {
+/// parseCondCodeString - Parse a Condition Code string, optionally returning a
+/// suggestion to help common typos.
+AArch64CC::CondCode
+AArch64AsmParser::parseCondCodeString(StringRef Cond, std::string &Suggestion) {
AArch64CC::CondCode CC = StringSwitch<AArch64CC::CondCode>(Cond.lower())
.Case("eq", AArch64CC::EQ)
.Case("ne", AArch64CC::NE)
@@ -3053,7 +3057,7 @@ AArch64CC::CondCode AArch64AsmParser::parseCondCodeString(StringRef Cond) {
.Default(AArch64CC::Invalid);
if (CC == AArch64CC::Invalid &&
- getSTI().getFeatureBits()[AArch64::FeatureSVE])
+ getSTI().getFeatureBits()[AArch64::FeatureSVE]) {
CC = StringSwitch<AArch64CC::CondCode>(Cond.lower())
.Case("none", AArch64CC::EQ)
.Case("any", AArch64CC::NE)
@@ -3067,6 +3071,9 @@ AArch64CC::CondCode AArch64AsmParser::parseCondCodeString(StringRef Cond) {
.Case("tstop", AArch64CC::LT)
.Default(AArch64CC::Invalid);
+ if (CC == AArch64CC::Invalid && Cond.lower() == "nfirst")
+ Suggestion = "nfrst";
+ }
return CC;
}
@@ -3078,9 +3085,14 @@ bool AArch64AsmParser::parseCondCode(OperandVector &Operands,
assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
StringRef Cond = Tok.getString();
- AArch64CC::CondCode CC = parseCondCodeString(Cond);
- if (CC == AArch64CC::Invalid)
- return TokError("invalid condition code");
+ std::string Suggestion;
+ AArch64CC::CondCode CC = parseCondCodeString(Cond, Suggestion);
+ if (CC == AArch64CC::Invalid) {
+ std::string Msg = "invalid condition code";
+ if (!Suggestion.empty())
+ Msg += ", did you mean " + Suggestion + "?";
+ return TokError(Msg);
+ }
Lex(); // Eat identifier token.
if (invertCondCode) {
@@ -3910,7 +3922,6 @@ AArch64AsmParser::tryParseMatrixTileList(OperandVector &Operands) {
const MCRegisterInfo *RI = getContext().getRegisterInfo();
unsigned PrevReg = FirstReg;
- unsigned Count = 1;
SmallSet<unsigned, 8> DRegs;
AArch64Operand::ComputeRegsForAlias(FirstReg, DRegs, ElementWidth);
@@ -3942,7 +3953,6 @@ AArch64AsmParser::tryParseMatrixTileList(OperandVector &Operands) {
}
PrevReg = Reg;
- ++Count;
}
if (parseToken(AsmToken::RCurly, "'}' expected"))
@@ -4545,9 +4555,14 @@ bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
SMLoc SuffixLoc = SMLoc::getFromPointer(NameLoc.getPointer() +
(Head.data() - Name.data()));
- AArch64CC::CondCode CC = parseCondCodeString(Head);
- if (CC == AArch64CC::Invalid)
- return Error(SuffixLoc, "invalid condition code");
+ std::string Suggestion;
+ AArch64CC::CondCode CC = parseCondCodeString(Head, Suggestion);
+ if (CC == AArch64CC::Invalid) {
+ std::string Msg = "invalid condition code";
+ if (!Suggestion.empty())
+ Msg += ", did you mean " + Suggestion + "?";
+ return Error(SuffixLoc, Msg);
+ }
Operands.push_back(AArch64Operand::CreateToken(".", SuffixLoc, getContext(),
/*IsSuffix=*/true));
Operands.push_back(
@@ -6024,6 +6039,8 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
parseDirectiveCFINegateRAState();
else if (IDVal == ".cfi_b_key_frame")
parseDirectiveCFIBKeyFrame();
+ else if (IDVal == ".cfi_mte_tagged_frame")
+ parseDirectiveCFIMTETaggedFrame();
else if (IDVal == ".arch_extension")
parseDirectiveArchExtension(Loc);
else if (IDVal == ".variant_pcs")
@@ -6198,12 +6215,11 @@ bool AArch64AsmParser::parseDirectiveArch(SMLoc L) {
if (Extension.Features.none())
report_fatal_error("unsupported architectural extension: " + Name);
- FeatureBitset ToggleFeatures = EnableFeature
- ? (~Features & Extension.Features)
- : ( Features & Extension.Features);
- FeatureBitset Features =
- ComputeAvailableFeatures(STI.ToggleFeature(ToggleFeatures));
- setAvailableFeatures(Features);
+ FeatureBitset ToggleFeatures =
+ EnableFeature
+ ? STI.SetFeatureBitsTransitively(~Features & Extension.Features)
+ : STI.ToggleFeature(Features & Extension.Features);
+ setAvailableFeatures(ComputeAvailableFeatures(ToggleFeatures));
break;
}
}
@@ -6217,8 +6233,7 @@ bool AArch64AsmParser::parseDirectiveArchExtension(SMLoc L) {
StringRef Name = getParser().parseStringToEndOfStatement().trim();
- if (parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.arch_extension' directive"))
+ if (parseEOL())
return true;
bool EnableFeature = true;
@@ -6236,12 +6251,11 @@ bool AArch64AsmParser::parseDirectiveArchExtension(SMLoc L) {
if (Extension.Features.none())
return Error(ExtLoc, "unsupported architectural extension: " + Name);
- FeatureBitset ToggleFeatures = EnableFeature
- ? (~Features & Extension.Features)
- : (Features & Extension.Features);
- FeatureBitset Features =
- ComputeAvailableFeatures(STI.ToggleFeature(ToggleFeatures));
- setAvailableFeatures(Features);
+ FeatureBitset ToggleFeatures =
+ EnableFeature
+ ? STI.SetFeatureBitsTransitively(~Features & Extension.Features)
+ : STI.ToggleFeature(Features & Extension.Features);
+ setAvailableFeatures(ComputeAvailableFeatures(ToggleFeatures));
return false;
}
@@ -6281,7 +6295,6 @@ bool AArch64AsmParser::parseDirectiveCPU(SMLoc L) {
ExpandCryptoAEK(llvm::AArch64::getCPUArchKind(CPU), RequestedExtensions);
- FeatureBitset Features = STI.getFeatureBits();
for (auto Name : RequestedExtensions) {
// Advance source location past '+'.
CurLoc = incrementLoc(CurLoc, 1);
@@ -6301,12 +6314,12 @@ bool AArch64AsmParser::parseDirectiveCPU(SMLoc L) {
if (Extension.Features.none())
report_fatal_error("unsupported architectural extension: " + Name);
- FeatureBitset ToggleFeatures = EnableFeature
- ? (~Features & Extension.Features)
- : ( Features & Extension.Features);
- FeatureBitset Features =
- ComputeAvailableFeatures(STI.ToggleFeature(ToggleFeatures));
- setAvailableFeatures(Features);
+ FeatureBitset Features = STI.getFeatureBits();
+ FeatureBitset ToggleFeatures =
+ EnableFeature
+ ? STI.SetFeatureBitsTransitively(~Features & Extension.Features)
+ : STI.ToggleFeature(Features & Extension.Features);
+ setAvailableFeatures(ComputeAvailableFeatures(ToggleFeatures));
FoundExtension = true;
break;
@@ -6401,12 +6414,10 @@ bool AArch64AsmParser::parseDirectiveLOH(StringRef IDVal, SMLoc Loc) {
if (Idx + 1 == NbArgs)
break;
- if (parseToken(AsmToken::Comma,
- "unexpected token in '" + Twine(IDVal) + "' directive"))
+ if (parseComma())
return true;
}
- if (parseToken(AsmToken::EndOfStatement,
- "unexpected token in '" + Twine(IDVal) + "' directive"))
+ if (parseEOL())
return true;
getStreamer().emitLOHDirective((MCLOHType)Kind, Args);
@@ -6416,7 +6427,7 @@ bool AArch64AsmParser::parseDirectiveLOH(StringRef IDVal, SMLoc Loc) {
/// parseDirectiveLtorg
/// ::= .ltorg | .pool
bool AArch64AsmParser::parseDirectiveLtorg(SMLoc L) {
- if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive"))
+ if (parseEOL())
return true;
getTargetStreamer().emitCurrentConstantPool();
return false;
@@ -6474,8 +6485,7 @@ bool AArch64AsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
return Error(SRegLoc, "register name or alias expected");
// Shouldn't be anything else.
- if (parseToken(AsmToken::EndOfStatement,
- "unexpected input in .req directive"))
+ if (parseEOL())
return true;
auto pair = std::make_pair(RegisterKind, (unsigned) RegNum);
@@ -6496,7 +6506,7 @@ bool AArch64AsmParser::parseDirectiveUnreq(SMLoc L) {
}
bool AArch64AsmParser::parseDirectiveCFINegateRAState() {
- if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive"))
+ if (parseEOL())
return true;
getStreamer().emitCFINegateRAState();
return false;
@@ -6505,31 +6515,31 @@ bool AArch64AsmParser::parseDirectiveCFINegateRAState() {
/// parseDirectiveCFIBKeyFrame
/// ::= .cfi_b_key
bool AArch64AsmParser::parseDirectiveCFIBKeyFrame() {
- if (parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.cfi_b_key_frame'"))
+ if (parseEOL())
return true;
getStreamer().emitCFIBKeyFrame();
return false;
}
+/// parseDirectiveCFIMTETaggedFrame
+/// ::= .cfi_mte_tagged_frame
+bool AArch64AsmParser::parseDirectiveCFIMTETaggedFrame() {
+ if (parseEOL())
+ return true;
+ getStreamer().emitCFIMTETaggedFrame();
+ return false;
+}
+
/// parseDirectiveVariantPCS
/// ::= .variant_pcs symbolname
bool AArch64AsmParser::parseDirectiveVariantPCS(SMLoc L) {
- const AsmToken &Tok = getTok();
- if (Tok.isNot(AsmToken::Identifier))
+ StringRef Name;
+ if (getParser().parseIdentifier(Name))
return TokError("expected symbol name");
-
- StringRef SymbolName = Tok.getIdentifier();
-
- MCSymbol *Sym = getContext().lookupSymbol(SymbolName);
- if (!Sym)
- return TokError("unknown symbol");
-
- Lex(); // Eat the symbol
-
if (parseEOL())
return true;
- getTargetStreamer().emitDirectiveVariantPCS(Sym);
+ getTargetStreamer().emitDirectiveVariantPCS(
+ getContext().getOrCreateSymbol(Name));
return false;
}
@@ -6880,7 +6890,7 @@ unsigned AArch64AsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
// as a literal token.
if (Op.isTokenEqual("za"))
return Match_Success;
- break;
+ return Match_InvalidOperand;
}
if (!Op.isImm())
return Match_InvalidOperand;
diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index 9ce00f76d9c7..1b65589416c3 100644
--- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -16,9 +16,10 @@
#include "TargetInfo/AArch64TargetInfo.h"
#include "Utils/AArch64BaseInfo.h"
#include "llvm-c/Disassembler.h"
+#include "llvm/MC/MCDecoderOps.h"
#include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
-#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/TargetRegistry.h"
@@ -37,213 +38,226 @@ using DecodeStatus = MCDisassembler::DecodeStatus;
// Forward declare these because the autogenerated code will reference them.
// Definitions are further down.
-static DecodeStatus DecodeFPR128RegisterClass(MCInst &Inst,
- unsigned RegNo, uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeFPR128_loRegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeFPR128RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeFPR128_loRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeFPR16RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeFPR8RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeGPR64commonRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
+static DecodeStatus
+DecodeGPR64commonRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeGPR64x8ClassRegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeGPR64spRegisterClass(MCInst &Inst,
- unsigned RegNo, uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeMatrixIndexGPR32_12_15RegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
+static DecodeStatus
+DecodeGPR64x8ClassRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeGPR64spRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus
+DecodeMatrixIndexGPR32_12_15RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeGPR32spRegisterClass(MCInst &Inst,
- unsigned RegNo, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeGPR32spRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeQQRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeQQQRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeQQQQRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeDDRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeDDDRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeDDDDRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeZPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeZPR_4bRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeZPR_3bRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeZPR2RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeZPR3RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeZPR4RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
template <unsigned NumBitsForTile>
static DecodeStatus DecodeMatrixTile(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeMatrixTileListRegisterClass(MCInst &Inst,
- unsigned RegMask,
- uint64_t Address,
- const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus
+DecodeMatrixTileListRegisterClass(MCInst &Inst, unsigned RegMask,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodePPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodePPR_3bRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeFixedPointScaleImm32(MCInst &Inst, unsigned Imm,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeFixedPointScaleImm64(MCInst &Inst, unsigned Imm,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodePCRelLabel19(MCInst &Inst, unsigned Imm,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeMemExtend(MCInst &Inst, unsigned Imm,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeMRSSystemRegister(MCInst &Inst, unsigned Imm,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeMSRSystemRegister(MCInst &Inst, unsigned Imm,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThreeAddrSRegInstruction(MCInst &Inst, uint32_t insn,
- uint64_t Address,
- const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus
+DecodeThreeAddrSRegInstruction(MCInst &Inst, uint32_t insn, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeMoveImmInstruction(MCInst &Inst, uint32_t insn,
uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeUnsignedLdStInstruction(MCInst &Inst, uint32_t insn,
- uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
+static DecodeStatus
+DecodeUnsignedLdStInstruction(MCInst &Inst, uint32_t insn, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeSignedLdStInstruction(MCInst &Inst, uint32_t insn,
uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeExclusiveLdStInstruction(MCInst &Inst, uint32_t insn,
- uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
+static DecodeStatus
+DecodeExclusiveLdStInstruction(MCInst &Inst, uint32_t insn, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodePairLdStInstruction(MCInst &Inst, uint32_t insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeAuthLoadInstruction(MCInst &Inst, uint32_t insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeAddSubERegInstruction(MCInst &Inst, uint32_t insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeLogicalImmInstruction(MCInst &Inst, uint32_t insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeModImmInstruction(MCInst &Inst, uint32_t insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeModImmTiedInstruction(MCInst &Inst, uint32_t insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeAdrInstruction(MCInst &Inst, uint32_t insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeAddSubImmShift(MCInst &Inst, uint32_t insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeUnconditionalBranch(MCInst &Inst, uint32_t insn,
uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeSystemPStateInstruction(MCInst &Inst, uint32_t insn,
- uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
+static DecodeStatus
+DecodeSystemPStateInstruction(MCInst &Inst, uint32_t insn, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeTestAndBranch(MCInst &Inst, uint32_t insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeFMOVLaneInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeVecShiftR64Imm(MCInst &Inst, unsigned Imm,
- uint64_t Addr, const void *Decoder);
+ uint64_t Addr,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeVecShiftR64ImmNarrow(MCInst &Inst, unsigned Imm,
uint64_t Addr,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeVecShiftR32Imm(MCInst &Inst, unsigned Imm,
- uint64_t Addr, const void *Decoder);
+ uint64_t Addr,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeVecShiftR32ImmNarrow(MCInst &Inst, unsigned Imm,
uint64_t Addr,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeVecShiftR16Imm(MCInst &Inst, unsigned Imm,
- uint64_t Addr, const void *Decoder);
+ uint64_t Addr,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeVecShiftR16ImmNarrow(MCInst &Inst, unsigned Imm,
uint64_t Addr,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeVecShiftR8Imm(MCInst &Inst, unsigned Imm,
- uint64_t Addr, const void *Decoder);
+ uint64_t Addr,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeVecShiftL64Imm(MCInst &Inst, unsigned Imm,
- uint64_t Addr, const void *Decoder);
+ uint64_t Addr,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeVecShiftL32Imm(MCInst &Inst, unsigned Imm,
- uint64_t Addr, const void *Decoder);
+ uint64_t Addr,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeVecShiftL16Imm(MCInst &Inst, unsigned Imm,
- uint64_t Addr, const void *Decoder);
+ uint64_t Addr,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeVecShiftL8Imm(MCInst &Inst, unsigned Imm,
- uint64_t Addr, const void *Decoder);
-static DecodeStatus DecodeWSeqPairsClassRegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Addr,
- const void *Decoder);
-static DecodeStatus DecodeXSeqPairsClassRegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Addr,
- const void *Decoder);
-static DecodeStatus DecodeSVELogicalImmInstruction(MCInst &Inst, uint32_t insn,
- uint64_t Address,
- const void *Decoder);
+ uint64_t Addr,
+ const MCDisassembler *Decoder);
+static DecodeStatus
+DecodeWSeqPairsClassRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Addr,
+ const MCDisassembler *Decoder);
+static DecodeStatus
+DecodeXSeqPairsClassRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Addr,
+ const MCDisassembler *Decoder);
+static DecodeStatus
+DecodeSVELogicalImmInstruction(MCInst &Inst, uint32_t insn, uint64_t Address,
+ const MCDisassembler *Decoder);
template <int Bits>
static DecodeStatus DecodeSImm(MCInst &Inst, uint64_t Imm, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
template <int ElementWidth>
-static DecodeStatus DecodeImm8OptLsl(MCInst &Inst, unsigned Imm,
- uint64_t Addr, const void *Decoder);
+static DecodeStatus DecodeImm8OptLsl(MCInst &Inst, unsigned Imm, uint64_t Addr,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeSVEIncDecImm(MCInst &Inst, unsigned Imm,
- uint64_t Addr, const void *Decoder);
+ uint64_t Addr,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeSVCROp(MCInst &Inst, unsigned Imm, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeCPYMemOpInstruction(MCInst &Inst, uint32_t insn,
uint64_t Addr,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeSETMemOpInstruction(MCInst &Inst, uint32_t insn,
uint64_t Addr,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static bool Check(DecodeStatus &Out, DecodeStatus In) {
switch (In) {
@@ -270,7 +284,8 @@ static bool Check(DecodeStatus &Out, DecodeStatus In) {
static MCDisassembler *createAArch64Disassembler(const Target &T,
const MCSubtargetInfo &STI,
MCContext &Ctx) {
- return new AArch64Disassembler(STI, Ctx);
+
+ return new AArch64Disassembler(STI, Ctx, T.createMCInstrInfo());
}
DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size,
@@ -295,67 +310,37 @@ DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size,
DecodeStatus Result =
decodeInstruction(Table, MI, Insn, Address, this, STI);
- switch (MI.getOpcode()) {
- default:
- break;
+ const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
+
// For Scalable Matrix Extension (SME) instructions that have an implicit
- // operand for the accumulator (ZA) which isn't encoded, manually insert
- // operand.
- case AArch64::LDR_ZA:
- case AArch64::STR_ZA: {
- MI.insert(MI.begin(), MCOperand::createReg(AArch64::ZA));
- // Spill and fill instructions have a single immediate used for both the
- // vector select offset and optional memory offset. Replicate the decoded
- // immediate.
+ // operand for the accumulator (ZA) or implicit immediate zero which isn't
+ // encoded, manually insert operand.
+ for (unsigned i = 0; i < Desc.getNumOperands(); i++) {
+ if (Desc.OpInfo[i].OperandType == MCOI::OPERAND_REGISTER) {
+ switch (Desc.OpInfo[i].RegClass) {
+ default:
+ break;
+ case AArch64::MPRRegClassID:
+ MI.insert(MI.begin() + i, MCOperand::createReg(AArch64::ZA));
+ break;
+ case AArch64::MPR8RegClassID:
+ MI.insert(MI.begin() + i, MCOperand::createReg(AArch64::ZAB0));
+ break;
+ }
+ } else if (Desc.OpInfo[i].OperandType ==
+ AArch64::OPERAND_IMPLICIT_IMM_0) {
+ MI.insert(MI.begin() + i, MCOperand::createImm(0));
+ }
+ }
+
+ if (MI.getOpcode() == AArch64::LDR_ZA ||
+ MI.getOpcode() == AArch64::STR_ZA) {
+ // Spill and fill instructions have a single immediate used for both
+ // the vector select offset and optional memory offset. Replicate
+ // the decoded immediate.
const MCOperand &Imm4Op = MI.getOperand(2);
assert(Imm4Op.isImm() && "Unexpected operand type!");
MI.addOperand(Imm4Op);
- break;
- }
- case AArch64::LD1_MXIPXX_H_B:
- case AArch64::LD1_MXIPXX_V_B:
- case AArch64::ST1_MXIPXX_H_B:
- case AArch64::ST1_MXIPXX_V_B:
- case AArch64::INSERT_MXIPZ_H_B:
- case AArch64::INSERT_MXIPZ_V_B:
- // e.g.
- // MOVA ZA0<HV>.B[<Ws>, <imm>], <Pg>/M, <Zn>.B
- // ^ insert implicit 8-bit element tile
- MI.insert(MI.begin(), MCOperand::createReg(AArch64::ZAB0));
- break;
- case AArch64::EXTRACT_ZPMXI_H_B:
- case AArch64::EXTRACT_ZPMXI_V_B:
- // MOVA <Zd>.B, <Pg>/M, ZA0<HV>.B[<Ws>, <imm>]
- // ^ insert implicit 8-bit element tile
- MI.insert(MI.begin()+2, MCOperand::createReg(AArch64::ZAB0));
- break;
- case AArch64::LD1_MXIPXX_H_Q:
- case AArch64::LD1_MXIPXX_V_Q:
- case AArch64::ST1_MXIPXX_H_Q:
- case AArch64::ST1_MXIPXX_V_Q:
- // 128-bit load/store have implicit zero vector index.
- MI.insert(MI.begin()+2, MCOperand::createImm(0));
- break;
- // 128-bit mova have implicit zero vector index.
- case AArch64::INSERT_MXIPZ_H_Q:
- case AArch64::INSERT_MXIPZ_V_Q:
- MI.insert(MI.begin()+2, MCOperand::createImm(0));
- break;
- case AArch64::EXTRACT_ZPMXI_H_Q:
- case AArch64::EXTRACT_ZPMXI_V_Q:
- MI.addOperand(MCOperand::createImm(0));
- break;
- case AArch64::SMOVvi8to32_idx0:
- case AArch64::SMOVvi8to64_idx0:
- case AArch64::SMOVvi16to32_idx0:
- case AArch64::SMOVvi16to64_idx0:
- case AArch64::SMOVvi32to64_idx0:
- case AArch64::UMOVvi8_idx0:
- case AArch64::UMOVvi16_idx0:
- case AArch64::UMOVvi32_idx0:
- case AArch64::UMOVvi64_idx0:
- MI.addOperand(MCOperand::createImm(0));
- break;
}
if (Result != MCDisassembler::Fail)
@@ -400,7 +385,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Disassembler() {
static DecodeStatus DecodeFPR128RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 31)
return Fail;
@@ -410,9 +395,9 @@ static DecodeStatus DecodeFPR128RegisterClass(MCInst &Inst, unsigned RegNo,
return Success;
}
-static DecodeStatus DecodeFPR128_loRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Addr,
- const void *Decoder) {
+static DecodeStatus
+DecodeFPR128_loRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Addr,
+ const MCDisassembler *Decoder) {
if (RegNo > 15)
return Fail;
return DecodeFPR128RegisterClass(Inst, RegNo, Addr, Decoder);
@@ -420,7 +405,7 @@ static DecodeStatus DecodeFPR128_loRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 31)
return Fail;
@@ -432,7 +417,7 @@ static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 31)
return Fail;
@@ -444,7 +429,7 @@ static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeFPR16RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 31)
return Fail;
@@ -456,7 +441,7 @@ static DecodeStatus DecodeFPR16RegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeFPR8RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 31)
return Fail;
@@ -466,9 +451,9 @@ static DecodeStatus DecodeFPR8RegisterClass(MCInst &Inst, unsigned RegNo,
return Success;
}
-static DecodeStatus DecodeGPR64commonRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Addr,
- const void *Decoder) {
+static DecodeStatus
+DecodeGPR64commonRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Addr,
+ const MCDisassembler *Decoder) {
if (RegNo > 30)
return Fail;
@@ -481,7 +466,7 @@ static DecodeStatus DecodeGPR64commonRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 31)
return Fail;
@@ -491,10 +476,9 @@ static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, unsigned RegNo,
return Success;
}
-static DecodeStatus DecodeGPR64x8ClassRegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus
+DecodeGPR64x8ClassRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address,
+ const MCDisassembler *Decoder) {
if (RegNo > 22)
return Fail;
if (RegNo & 1)
@@ -509,7 +493,7 @@ static DecodeStatus DecodeGPR64x8ClassRegisterClass(MCInst &Inst,
static DecodeStatus DecodeGPR64spRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 31)
return Fail;
unsigned Register =
@@ -518,10 +502,10 @@ static DecodeStatus DecodeGPR64spRegisterClass(MCInst &Inst, unsigned RegNo,
return Success;
}
-static DecodeStatus DecodeMatrixIndexGPR32_12_15RegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Addr,
- const void *Decoder) {
+static DecodeStatus
+DecodeMatrixIndexGPR32_12_15RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Addr,
+ const MCDisassembler *Decoder) {
if (RegNo > 3)
return Fail;
@@ -534,7 +518,7 @@ static DecodeStatus DecodeMatrixIndexGPR32_12_15RegisterClass(MCInst &Inst,
static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 31)
return Fail;
@@ -546,7 +530,7 @@ static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeGPR32spRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 31)
return Fail;
@@ -558,7 +542,7 @@ static DecodeStatus DecodeGPR32spRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeZPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void* Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 31)
return Fail;
@@ -570,7 +554,7 @@ static DecodeStatus DecodeZPRRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeZPR_4bRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 15)
return Fail;
return DecodeZPRRegisterClass(Inst, RegNo, Address, Decoder);
@@ -578,7 +562,7 @@ static DecodeStatus DecodeZPR_4bRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeZPR_3bRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 7)
return Fail;
return DecodeZPRRegisterClass(Inst, RegNo, Address, Decoder);
@@ -586,7 +570,7 @@ static DecodeStatus DecodeZPR_3bRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeZPR2RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void* Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 31)
return Fail;
unsigned Register =
@@ -597,7 +581,7 @@ static DecodeStatus DecodeZPR2RegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeZPR3RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void* Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 31)
return Fail;
unsigned Register =
@@ -608,7 +592,7 @@ static DecodeStatus DecodeZPR3RegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeZPR4RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void* Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 31)
return Fail;
unsigned Register =
@@ -617,10 +601,10 @@ static DecodeStatus DecodeZPR4RegisterClass(MCInst &Inst, unsigned RegNo,
return Success;
}
-static DecodeStatus DecodeMatrixTileListRegisterClass(MCInst &Inst,
- unsigned RegMask,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus
+DecodeMatrixTileListRegisterClass(MCInst &Inst, unsigned RegMask,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if (RegMask > 0xFF)
return Fail;
Inst.addOperand(MCOperand::createImm(RegMask));
@@ -641,7 +625,8 @@ static const SmallVector<SmallVector<unsigned, 16>, 5>
template <unsigned NumBitsForTile>
static DecodeStatus DecodeMatrixTile(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned LastReg = (1 << NumBitsForTile) - 1;
if (RegNo > LastReg)
return Fail;
@@ -651,7 +636,8 @@ static DecodeStatus DecodeMatrixTile(MCInst &Inst, unsigned RegNo,
}
static DecodeStatus DecodePPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Addr, const void *Decoder) {
+ uint64_t Addr,
+ const MCDisassembler *Decoder) {
if (RegNo > 15)
return Fail;
@@ -663,7 +649,7 @@ static DecodeStatus DecodePPRRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodePPR_3bRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr,
- const void* Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 7)
return Fail;
@@ -672,7 +658,8 @@ static DecodeStatus DecodePPR_3bRegisterClass(MCInst &Inst, unsigned RegNo,
}
static DecodeStatus DecodeQQRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Addr, const void *Decoder) {
+ uint64_t Addr,
+ const MCDisassembler *Decoder) {
if (RegNo > 31)
return Fail;
unsigned Register =
@@ -682,7 +669,8 @@ static DecodeStatus DecodeQQRegisterClass(MCInst &Inst, unsigned RegNo,
}
static DecodeStatus DecodeQQQRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Addr, const void *Decoder) {
+ uint64_t Addr,
+ const MCDisassembler *Decoder) {
if (RegNo > 31)
return Fail;
unsigned Register =
@@ -693,7 +681,7 @@ static DecodeStatus DecodeQQQRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeQQQQRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 31)
return Fail;
unsigned Register =
@@ -703,7 +691,8 @@ static DecodeStatus DecodeQQQQRegisterClass(MCInst &Inst, unsigned RegNo,
}
static DecodeStatus DecodeDDRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Addr, const void *Decoder) {
+ uint64_t Addr,
+ const MCDisassembler *Decoder) {
if (RegNo > 31)
return Fail;
unsigned Register =
@@ -713,7 +702,8 @@ static DecodeStatus DecodeDDRegisterClass(MCInst &Inst, unsigned RegNo,
}
static DecodeStatus DecodeDDDRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Addr, const void *Decoder) {
+ uint64_t Addr,
+ const MCDisassembler *Decoder) {
if (RegNo > 31)
return Fail;
unsigned Register =
@@ -724,7 +714,7 @@ static DecodeStatus DecodeDDDRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeDDDDRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 31)
return Fail;
unsigned Register =
@@ -735,7 +725,7 @@ static DecodeStatus DecodeDDDDRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeFixedPointScaleImm32(MCInst &Inst, unsigned Imm,
uint64_t Addr,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
// scale{5} is asserted as 1 in tblgen.
Imm |= 0x20;
Inst.addOperand(MCOperand::createImm(64 - Imm));
@@ -744,29 +734,29 @@ static DecodeStatus DecodeFixedPointScaleImm32(MCInst &Inst, unsigned Imm,
static DecodeStatus DecodeFixedPointScaleImm64(MCInst &Inst, unsigned Imm,
uint64_t Addr,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
Inst.addOperand(MCOperand::createImm(64 - Imm));
return Success;
}
static DecodeStatus DecodePCRelLabel19(MCInst &Inst, unsigned Imm,
- uint64_t Addr, const void *Decoder) {
+ uint64_t Addr,
+ const MCDisassembler *Decoder) {
int64_t ImmVal = Imm;
- const AArch64Disassembler *Dis =
- static_cast<const AArch64Disassembler *>(Decoder);
// Sign-extend 19-bit immediate.
if (ImmVal & (1 << (19 - 1)))
ImmVal |= ~((1LL << 19) - 1);
- if (!Dis->tryAddingSymbolicOperand(Inst, ImmVal * 4, Addr,
- Inst.getOpcode() != AArch64::LDRXl, 0, 4))
+ if (!Decoder->tryAddingSymbolicOperand(
+ Inst, ImmVal * 4, Addr, Inst.getOpcode() != AArch64::LDRXl, 0, 0, 4))
Inst.addOperand(MCOperand::createImm(ImmVal));
return Success;
}
static DecodeStatus DecodeMemExtend(MCInst &Inst, unsigned Imm,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
Inst.addOperand(MCOperand::createImm((Imm >> 1) & 1));
Inst.addOperand(MCOperand::createImm(Imm & 1));
return Success;
@@ -774,7 +764,7 @@ static DecodeStatus DecodeMemExtend(MCInst &Inst, unsigned Imm,
static DecodeStatus DecodeMRSSystemRegister(MCInst &Inst, unsigned Imm,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
Inst.addOperand(MCOperand::createImm(Imm));
// Every system register in the encoding space is valid with the syntax
@@ -784,7 +774,7 @@ static DecodeStatus DecodeMRSSystemRegister(MCInst &Inst, unsigned Imm,
static DecodeStatus DecodeMSRSystemRegister(MCInst &Inst, unsigned Imm,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
Inst.addOperand(MCOperand::createImm(Imm));
return Success;
@@ -792,7 +782,7 @@ static DecodeStatus DecodeMSRSystemRegister(MCInst &Inst, unsigned Imm,
static DecodeStatus DecodeFMOVLaneInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
// This decoder exists to add the dummy Lane operand to the MCInst, which must
// be 1 in assembly but has no other real manifestation.
unsigned Rd = fieldFromInstruction(Insn, 0, 5);
@@ -826,66 +816,74 @@ static DecodeStatus DecodeVecShiftLImm(MCInst &Inst, unsigned Imm,
}
static DecodeStatus DecodeVecShiftR64Imm(MCInst &Inst, unsigned Imm,
- uint64_t Addr, const void *Decoder) {
+ uint64_t Addr,
+ const MCDisassembler *Decoder) {
return DecodeVecShiftRImm(Inst, Imm, 64);
}
static DecodeStatus DecodeVecShiftR64ImmNarrow(MCInst &Inst, unsigned Imm,
uint64_t Addr,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return DecodeVecShiftRImm(Inst, Imm | 0x20, 64);
}
static DecodeStatus DecodeVecShiftR32Imm(MCInst &Inst, unsigned Imm,
- uint64_t Addr, const void *Decoder) {
+ uint64_t Addr,
+ const MCDisassembler *Decoder) {
return DecodeVecShiftRImm(Inst, Imm, 32);
}
static DecodeStatus DecodeVecShiftR32ImmNarrow(MCInst &Inst, unsigned Imm,
uint64_t Addr,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return DecodeVecShiftRImm(Inst, Imm | 0x10, 32);
}
static DecodeStatus DecodeVecShiftR16Imm(MCInst &Inst, unsigned Imm,
- uint64_t Addr, const void *Decoder) {
+ uint64_t Addr,
+ const MCDisassembler *Decoder) {
return DecodeVecShiftRImm(Inst, Imm, 16);
}
static DecodeStatus DecodeVecShiftR16ImmNarrow(MCInst &Inst, unsigned Imm,
uint64_t Addr,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return DecodeVecShiftRImm(Inst, Imm | 0x8, 16);
}
static DecodeStatus DecodeVecShiftR8Imm(MCInst &Inst, unsigned Imm,
- uint64_t Addr, const void *Decoder) {
+ uint64_t Addr,
+ const MCDisassembler *Decoder) {
return DecodeVecShiftRImm(Inst, Imm, 8);
}
static DecodeStatus DecodeVecShiftL64Imm(MCInst &Inst, unsigned Imm,
- uint64_t Addr, const void *Decoder) {
+ uint64_t Addr,
+ const MCDisassembler *Decoder) {
return DecodeVecShiftLImm(Inst, Imm, 64);
}
static DecodeStatus DecodeVecShiftL32Imm(MCInst &Inst, unsigned Imm,
- uint64_t Addr, const void *Decoder) {
+ uint64_t Addr,
+ const MCDisassembler *Decoder) {
return DecodeVecShiftLImm(Inst, Imm, 32);
}
static DecodeStatus DecodeVecShiftL16Imm(MCInst &Inst, unsigned Imm,
- uint64_t Addr, const void *Decoder) {
+ uint64_t Addr,
+ const MCDisassembler *Decoder) {
return DecodeVecShiftLImm(Inst, Imm, 16);
}
static DecodeStatus DecodeVecShiftL8Imm(MCInst &Inst, unsigned Imm,
- uint64_t Addr, const void *Decoder) {
+ uint64_t Addr,
+ const MCDisassembler *Decoder) {
return DecodeVecShiftLImm(Inst, Imm, 8);
}
-static DecodeStatus DecodeThreeAddrSRegInstruction(MCInst &Inst, uint32_t insn,
- uint64_t Addr,
- const void *Decoder) {
+static DecodeStatus
+DecodeThreeAddrSRegInstruction(MCInst &Inst, uint32_t insn, uint64_t Addr,
+ const MCDisassembler *Decoder) {
unsigned Rd = fieldFromInstruction(insn, 0, 5);
unsigned Rn = fieldFromInstruction(insn, 5, 5);
unsigned Rm = fieldFromInstruction(insn, 16, 5);
@@ -947,7 +945,7 @@ static DecodeStatus DecodeThreeAddrSRegInstruction(MCInst &Inst, uint32_t insn,
static DecodeStatus DecodeMoveImmInstruction(MCInst &Inst, uint32_t insn,
uint64_t Addr,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
unsigned Rd = fieldFromInstruction(insn, 0, 5);
unsigned imm = fieldFromInstruction(insn, 5, 16);
unsigned shift = fieldFromInstruction(insn, 21, 2);
@@ -978,14 +976,12 @@ static DecodeStatus DecodeMoveImmInstruction(MCInst &Inst, uint32_t insn,
return Success;
}
-static DecodeStatus DecodeUnsignedLdStInstruction(MCInst &Inst, uint32_t insn,
- uint64_t Addr,
- const void *Decoder) {
+static DecodeStatus
+DecodeUnsignedLdStInstruction(MCInst &Inst, uint32_t insn, uint64_t Addr,
+ const MCDisassembler *Decoder) {
unsigned Rt = fieldFromInstruction(insn, 0, 5);
unsigned Rn = fieldFromInstruction(insn, 5, 5);
unsigned offset = fieldFromInstruction(insn, 10, 12);
- const AArch64Disassembler *Dis =
- static_cast<const AArch64Disassembler *>(Decoder);
switch (Inst.getOpcode()) {
default:
@@ -1034,14 +1030,14 @@ static DecodeStatus DecodeUnsignedLdStInstruction(MCInst &Inst, uint32_t insn,
}
DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
- if (!Dis->tryAddingSymbolicOperand(Inst, offset, Addr, Fail, 0, 4))
+ if (!Decoder->tryAddingSymbolicOperand(Inst, offset, Addr, Fail, 0, 0, 4))
Inst.addOperand(MCOperand::createImm(offset));
return Success;
}
static DecodeStatus DecodeSignedLdStInstruction(MCInst &Inst, uint32_t insn,
uint64_t Addr,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
unsigned Rt = fieldFromInstruction(insn, 0, 5);
unsigned Rn = fieldFromInstruction(insn, 5, 5);
int64_t offset = fieldFromInstruction(insn, 12, 9);
@@ -1237,9 +1233,9 @@ static DecodeStatus DecodeSignedLdStInstruction(MCInst &Inst, uint32_t insn,
return Success;
}
-static DecodeStatus DecodeExclusiveLdStInstruction(MCInst &Inst, uint32_t insn,
- uint64_t Addr,
- const void *Decoder) {
+static DecodeStatus
+DecodeExclusiveLdStInstruction(MCInst &Inst, uint32_t insn, uint64_t Addr,
+ const MCDisassembler *Decoder) {
unsigned Rt = fieldFromInstruction(insn, 0, 5);
unsigned Rn = fieldFromInstruction(insn, 5, 5);
unsigned Rt2 = fieldFromInstruction(insn, 10, 5);
@@ -1322,7 +1318,7 @@ static DecodeStatus DecodeExclusiveLdStInstruction(MCInst &Inst, uint32_t insn,
static DecodeStatus DecodePairLdStInstruction(MCInst &Inst, uint32_t insn,
uint64_t Addr,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
unsigned Rt = fieldFromInstruction(insn, 0, 5);
unsigned Rn = fieldFromInstruction(insn, 5, 5);
unsigned Rt2 = fieldFromInstruction(insn, 10, 5);
@@ -1456,7 +1452,7 @@ static DecodeStatus DecodePairLdStInstruction(MCInst &Inst, uint32_t insn,
static DecodeStatus DecodeAuthLoadInstruction(MCInst &Inst, uint32_t insn,
uint64_t Addr,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
unsigned Rt = fieldFromInstruction(insn, 0, 5);
unsigned Rn = fieldFromInstruction(insn, 5, 5);
uint64_t offset = fieldFromInstruction(insn, 22, 1) << 9 |
@@ -1489,7 +1485,7 @@ static DecodeStatus DecodeAuthLoadInstruction(MCInst &Inst, uint32_t insn,
static DecodeStatus DecodeAddSubERegInstruction(MCInst &Inst, uint32_t insn,
uint64_t Addr,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
unsigned Rd = fieldFromInstruction(insn, 0, 5);
unsigned Rn = fieldFromInstruction(insn, 5, 5);
unsigned Rm = fieldFromInstruction(insn, 16, 5);
@@ -1546,7 +1542,7 @@ static DecodeStatus DecodeAddSubERegInstruction(MCInst &Inst, uint32_t insn,
static DecodeStatus DecodeLogicalImmInstruction(MCInst &Inst, uint32_t insn,
uint64_t Addr,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
unsigned Rd = fieldFromInstruction(insn, 0, 5);
unsigned Rn = fieldFromInstruction(insn, 5, 5);
unsigned Datasize = fieldFromInstruction(insn, 31, 1);
@@ -1577,7 +1573,7 @@ static DecodeStatus DecodeLogicalImmInstruction(MCInst &Inst, uint32_t insn,
static DecodeStatus DecodeModImmInstruction(MCInst &Inst, uint32_t insn,
uint64_t Addr,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
unsigned Rd = fieldFromInstruction(insn, 0, 5);
unsigned cmode = fieldFromInstruction(insn, 12, 4);
unsigned imm = fieldFromInstruction(insn, 16, 3) << 5;
@@ -1616,7 +1612,7 @@ static DecodeStatus DecodeModImmInstruction(MCInst &Inst, uint32_t insn,
static DecodeStatus DecodeModImmTiedInstruction(MCInst &Inst, uint32_t insn,
uint64_t Addr,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
unsigned Rd = fieldFromInstruction(insn, 0, 5);
unsigned cmode = fieldFromInstruction(insn, 12, 4);
unsigned imm = fieldFromInstruction(insn, 16, 3) << 5;
@@ -1633,26 +1629,26 @@ static DecodeStatus DecodeModImmTiedInstruction(MCInst &Inst, uint32_t insn,
}
static DecodeStatus DecodeAdrInstruction(MCInst &Inst, uint32_t insn,
- uint64_t Addr, const void *Decoder) {
+ uint64_t Addr,
+ const MCDisassembler *Decoder) {
unsigned Rd = fieldFromInstruction(insn, 0, 5);
int64_t imm = fieldFromInstruction(insn, 5, 19) << 2;
imm |= fieldFromInstruction(insn, 29, 2);
- const AArch64Disassembler *Dis =
- static_cast<const AArch64Disassembler *>(Decoder);
// Sign-extend the 21-bit immediate.
if (imm & (1 << (21 - 1)))
imm |= ~((1LL << 21) - 1);
DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder);
- if (!Dis->tryAddingSymbolicOperand(Inst, imm, Addr, Fail, 0, 4))
+ if (!Decoder->tryAddingSymbolicOperand(Inst, imm, Addr, Fail, 0, 0, 4))
Inst.addOperand(MCOperand::createImm(imm));
return Success;
}
static DecodeStatus DecodeAddSubImmShift(MCInst &Inst, uint32_t insn,
- uint64_t Addr, const void *Decoder) {
+ uint64_t Addr,
+ const MCDisassembler *Decoder) {
unsigned Rd = fieldFromInstruction(insn, 0, 5);
unsigned Rn = fieldFromInstruction(insn, 5, 5);
unsigned Imm = fieldFromInstruction(insn, 10, 14);
@@ -1661,8 +1657,6 @@ static DecodeStatus DecodeAddSubImmShift(MCInst &Inst, uint32_t insn,
unsigned ShifterVal = (Imm >> 12) & 3;
unsigned ImmVal = Imm & 0xFFF;
- const AArch64Disassembler *Dis =
- static_cast<const AArch64Disassembler *>(Decoder);
if (ShifterVal != 0 && ShifterVal != 1)
return Fail;
@@ -1681,7 +1675,7 @@ static DecodeStatus DecodeAddSubImmShift(MCInst &Inst, uint32_t insn,
DecodeGPR32spRegisterClass(Inst, Rn, Addr, Decoder);
}
- if (!Dis->tryAddingSymbolicOperand(Inst, Imm, Addr, Fail, 0, 4))
+ if (!Decoder->tryAddingSymbolicOperand(Inst, Imm, Addr, Fail, 0, 0, 4))
Inst.addOperand(MCOperand::createImm(ImmVal));
Inst.addOperand(MCOperand::createImm(12 * ShifterVal));
return Success;
@@ -1689,24 +1683,22 @@ static DecodeStatus DecodeAddSubImmShift(MCInst &Inst, uint32_t insn,
static DecodeStatus DecodeUnconditionalBranch(MCInst &Inst, uint32_t insn,
uint64_t Addr,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
int64_t imm = fieldFromInstruction(insn, 0, 26);
- const AArch64Disassembler *Dis =
- static_cast<const AArch64Disassembler *>(Decoder);
// Sign-extend the 26-bit immediate.
if (imm & (1 << (26 - 1)))
imm |= ~((1LL << 26) - 1);
- if (!Dis->tryAddingSymbolicOperand(Inst, imm * 4, Addr, true, 0, 4))
+ if (!Decoder->tryAddingSymbolicOperand(Inst, imm * 4, Addr, true, 0, 0, 4))
Inst.addOperand(MCOperand::createImm(imm));
return Success;
}
-static DecodeStatus DecodeSystemPStateInstruction(MCInst &Inst, uint32_t insn,
- uint64_t Addr,
- const void *Decoder) {
+static DecodeStatus
+DecodeSystemPStateInstruction(MCInst &Inst, uint32_t insn, uint64_t Addr,
+ const MCDisassembler *Decoder) {
uint64_t op1 = fieldFromInstruction(insn, 16, 3);
uint64_t op2 = fieldFromInstruction(insn, 5, 3);
uint64_t crm = fieldFromInstruction(insn, 8, 4);
@@ -1726,22 +1718,20 @@ static DecodeStatus DecodeSystemPStateInstruction(MCInst &Inst, uint32_t insn,
Inst.addOperand(MCOperand::createImm(pstate_field));
Inst.addOperand(MCOperand::createImm(crm));
- const AArch64Disassembler *Dis =
- static_cast<const AArch64Disassembler *>(Decoder);
auto PState = AArch64PState::lookupPStateByEncoding(pstate_field);
- if (PState && PState->haveFeatures(Dis->getSubtargetInfo().getFeatureBits()))
+ if (PState &&
+ PState->haveFeatures(Decoder->getSubtargetInfo().getFeatureBits()))
return Success;
return Fail;
}
static DecodeStatus DecodeTestAndBranch(MCInst &Inst, uint32_t insn,
- uint64_t Addr, const void *Decoder) {
+ uint64_t Addr,
+ const MCDisassembler *Decoder) {
uint64_t Rt = fieldFromInstruction(insn, 0, 5);
uint64_t bit = fieldFromInstruction(insn, 31, 1) << 5;
bit |= fieldFromInstruction(insn, 19, 5);
int64_t dst = fieldFromInstruction(insn, 5, 14);
- const AArch64Disassembler *Dis =
- static_cast<const AArch64Disassembler *>(Decoder);
// Sign-extend 14-bit immediate.
if (dst & (1 << (14 - 1)))
@@ -1752,17 +1742,16 @@ static DecodeStatus DecodeTestAndBranch(MCInst &Inst, uint32_t insn,
else
DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
Inst.addOperand(MCOperand::createImm(bit));
- if (!Dis->tryAddingSymbolicOperand(Inst, dst * 4, Addr, true, 0, 4))
+ if (!Decoder->tryAddingSymbolicOperand(Inst, dst * 4, Addr, true, 0, 0, 4))
Inst.addOperand(MCOperand::createImm(dst));
return Success;
}
-static DecodeStatus DecodeGPRSeqPairsClassRegisterClass(MCInst &Inst,
- unsigned RegClassID,
- unsigned RegNo,
- uint64_t Addr,
- const void *Decoder) {
+static DecodeStatus
+DecodeGPRSeqPairsClassRegisterClass(MCInst &Inst, unsigned RegClassID,
+ unsigned RegNo, uint64_t Addr,
+ const MCDisassembler *Decoder) {
// Register number must be even (see CASP instruction)
if (RegNo & 0x1)
return Fail;
@@ -1772,27 +1761,25 @@ static DecodeStatus DecodeGPRSeqPairsClassRegisterClass(MCInst &Inst,
return Success;
}
-static DecodeStatus DecodeWSeqPairsClassRegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Addr,
- const void *Decoder) {
+static DecodeStatus
+DecodeWSeqPairsClassRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Addr,
+ const MCDisassembler *Decoder) {
return DecodeGPRSeqPairsClassRegisterClass(Inst,
AArch64::WSeqPairsClassRegClassID,
RegNo, Addr, Decoder);
}
-static DecodeStatus DecodeXSeqPairsClassRegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Addr,
- const void *Decoder) {
+static DecodeStatus
+DecodeXSeqPairsClassRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Addr,
+ const MCDisassembler *Decoder) {
return DecodeGPRSeqPairsClassRegisterClass(Inst,
AArch64::XSeqPairsClassRegClassID,
RegNo, Addr, Decoder);
}
-static DecodeStatus DecodeSVELogicalImmInstruction(MCInst &Inst, uint32_t insn,
- uint64_t Addr,
- const void *Decoder) {
+static DecodeStatus
+DecodeSVELogicalImmInstruction(MCInst &Inst, uint32_t insn, uint64_t Addr,
+ const MCDisassembler *Decoder) {
unsigned Zdn = fieldFromInstruction(insn, 0, 5);
unsigned imm = fieldFromInstruction(insn, 5, 13);
if (!AArch64_AM::isValidDecodeLogicalImmediate(imm, 64))
@@ -1808,7 +1795,7 @@ static DecodeStatus DecodeSVELogicalImmInstruction(MCInst &Inst, uint32_t insn,
template <int Bits>
static DecodeStatus DecodeSImm(MCInst &Inst, uint64_t Imm, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (Imm & ~((1LL << Bits) - 1))
return Fail;
@@ -1822,8 +1809,8 @@ static DecodeStatus DecodeSImm(MCInst &Inst, uint64_t Imm, uint64_t Address,
// Decode 8-bit signed/unsigned immediate for a given element width.
template <int ElementWidth>
-static DecodeStatus DecodeImm8OptLsl(MCInst &Inst, unsigned Imm,
- uint64_t Addr, const void *Decoder) {
+static DecodeStatus DecodeImm8OptLsl(MCInst &Inst, unsigned Imm, uint64_t Addr,
+ const MCDisassembler *Decoder) {
unsigned Val = (uint8_t)Imm;
unsigned Shift = (Imm & 0x100) ? 8 : 0;
if (ElementWidth == 8 && Shift)
@@ -1835,13 +1822,14 @@ static DecodeStatus DecodeImm8OptLsl(MCInst &Inst, unsigned Imm,
// Decode uimm4 ranged from 1-16.
static DecodeStatus DecodeSVEIncDecImm(MCInst &Inst, unsigned Imm,
- uint64_t Addr, const void *Decoder) {
+ uint64_t Addr,
+ const MCDisassembler *Decoder) {
Inst.addOperand(MCOperand::createImm(Imm + 1));
return Success;
}
static DecodeStatus DecodeSVCROp(MCInst &Inst, unsigned Imm, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (AArch64SVCR::lookupSVCRByEncoding(Imm)) {
Inst.addOperand(MCOperand::createImm(Imm));
return Success;
@@ -1851,7 +1839,7 @@ static DecodeStatus DecodeSVCROp(MCInst &Inst, unsigned Imm, uint64_t Address,
static DecodeStatus DecodeCPYMemOpInstruction(MCInst &Inst, uint32_t insn,
uint64_t Addr,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
unsigned Rd = fieldFromInstruction(insn, 0, 5);
unsigned Rs = fieldFromInstruction(insn, 16, 5);
unsigned Rn = fieldFromInstruction(insn, 5, 5);
@@ -1876,7 +1864,7 @@ static DecodeStatus DecodeCPYMemOpInstruction(MCInst &Inst, uint32_t insn,
static DecodeStatus DecodeSETMemOpInstruction(MCInst &Inst, uint32_t insn,
uint64_t Addr,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
unsigned Rd = fieldFromInstruction(insn, 0, 5);
unsigned Rm = fieldFromInstruction(insn, 16, 5);
unsigned Rn = fieldFromInstruction(insn, 5, 5);
diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.h b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.h
index 374a89edcb74..6761d449a7f4 100644
--- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.h
+++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.h
@@ -13,13 +13,17 @@
#define LLVM_LIB_TARGET_AARCH64_DISASSEMBLER_AARCH64DISASSEMBLER_H
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
+#include "llvm/MC/MCInstrInfo.h"
namespace llvm {
class AArch64Disassembler : public MCDisassembler {
+ std::unique_ptr<const MCInstrInfo> const MCII;
+
public:
- AArch64Disassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
- : MCDisassembler(STI, Ctx) {}
+ AArch64Disassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
+ MCInstrInfo const *MCII)
+ : MCDisassembler(STI, Ctx), MCII(MCII) {}
~AArch64Disassembler() override = default;
diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
index 5b6f06f8dbb4..11964b2075e5 100644
--- a/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
+++ b/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
@@ -60,7 +60,7 @@ getVariant(uint64_t LLVMDisassembler_VariantKind) {
/// an operand to the MCInst and Fail otherwise.
bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand(
MCInst &MI, raw_ostream &CommentStream, int64_t Value, uint64_t Address,
- bool IsBranch, uint64_t Offset, uint64_t InstSize) {
+ bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) {
if (!SymbolLookUp)
return false;
// FIXME: This method shares a lot of code with
@@ -73,8 +73,8 @@ bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand(
SymbolicOp.Value = Value;
uint64_t ReferenceType;
const char *ReferenceName;
- if (!GetOpInfo ||
- !GetOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) {
+ if (!GetOpInfo || !GetOpInfo(DisInfo, Address, /*Offset=*/0, OpSize, InstSize,
+ 1, &SymbolicOp)) {
if (IsBranch) {
ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
const char *Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType,
diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h b/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h
index dc72331660cc..ca677db49739 100644
--- a/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h
+++ b/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h
@@ -29,7 +29,8 @@ public:
bool tryAddingSymbolicOperand(MCInst &MI, raw_ostream &CommentStream,
int64_t Value, uint64_t Address, bool IsBranch,
- uint64_t Offset, uint64_t InstSize) override;
+ uint64_t Offset, uint64_t OpSize,
+ uint64_t InstSize) override;
};
} // namespace llvm
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
index 097b93e4fcca..89e1d85a6085 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@@ -18,6 +18,7 @@
#include "AArch64Subtarget.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/ObjCARCUtil.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
@@ -1058,10 +1059,10 @@ bool AArch64CallLowering::lowerTailCall(
// If Callee is a reg, since it is used by a target specific instruction,
// it must have a register class matching the constraint of that instruction.
- if (Info.Callee.isReg())
+ if (MIB->getOperand(0).isReg())
constrainOperandRegClass(MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(),
*MF.getSubtarget().getRegBankInfo(), *MIB,
- MIB->getDesc(), Info.Callee, 0);
+ MIB->getDesc(), MIB->getOperand(0), 0);
MF.getFrameInfo().setHasTailCall();
Info.LoweredTailCall = true;
@@ -1127,14 +1128,39 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
// Create a temporarily-floating call instruction so we can add the implicit
// uses of arg registers.
- unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), false);
+
+ const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+ unsigned Opc = 0;
+ // Calls with operand bundle "clang.arc.attachedcall" are special. They should
+ // be expanded to the call, directly followed by a special marker sequence and
+ // a call to an ObjC library function.
+ if (Info.CB && objcarc::hasAttachedCallOpBundle(Info.CB))
+ Opc = AArch64::BLR_RVMARKER;
+ // A call to a returns twice function like setjmp must be followed by a bti
+ // instruction.
+ else if (Info.CB &&
+ Info.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) &&
+ !Subtarget.noBTIAtReturnTwice() &&
+ MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
+ Opc = AArch64::BLR_BTI;
+ else
+ Opc = getCallOpcode(MF, Info.Callee.isReg(), false);
auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
+ unsigned CalleeOpNo = 0;
+
+ if (Opc == AArch64::BLR_RVMARKER) {
+ // Add a target global address for the retainRV/claimRV runtime function
+ // just before the call target.
+ Function *ARCFn = *objcarc::getAttachedARCFunction(Info.CB);
+ MIB.addGlobalAddress(ARCFn);
+ ++CalleeOpNo;
+ }
+
MIB.add(Info.Callee);
// Tell the call which registers are clobbered.
const uint32_t *Mask;
- const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
const auto *TRI = Subtarget.getRegisterInfo();
AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg,
@@ -1160,10 +1186,10 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
// If Callee is a reg, since it is used by a target specific
// instruction, it must have a register class matching the
// constraint of that instruction.
- if (Info.Callee.isReg())
+ if (MIB->getOperand(CalleeOpNo).isReg())
constrainOperandRegClass(MF, *TRI, MRI, *Subtarget.getInstrInfo(),
*Subtarget.getRegBankInfo(), *MIB, MIB->getDesc(),
- Info.Callee, 0);
+ MIB->getOperand(CalleeOpNo), CalleeOpNo);
// Finally we can copy the returned value back into its virtual-register. In
// symmetry with the arguments, the physical register must be an
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 703e356f016d..9a65687735fe 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -21,13 +21,16 @@
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "llvm/ADT/Optional.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
-#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -38,9 +41,9 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
-#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -62,6 +65,7 @@ namespace {
#include "AArch64GenGlobalISel.inc"
#undef GET_GLOBALISEL_PREDICATE_BITSET
+
class AArch64InstructionSelector : public InstructionSelector {
public:
AArch64InstructionSelector(const AArch64TargetMachine &TM,
@@ -293,6 +297,20 @@ private:
emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
+ /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
+ /// In some cases this is even possible with OR operations in the expression.
+ MachineInstr *emitConjunction(Register Val, AArch64CC::CondCode &OutCC,
+ MachineIRBuilder &MIB) const;
+ MachineInstr *emitConditionalComparison(Register LHS, Register RHS,
+ CmpInst::Predicate CC,
+ AArch64CC::CondCode Predicate,
+ AArch64CC::CondCode OutCC,
+ MachineIRBuilder &MIB) const;
+ MachineInstr *emitConjunctionRec(Register Val, AArch64CC::CondCode &OutCC,
+ bool Negate, Register CCOp,
+ AArch64CC::CondCode Predicate,
+ MachineIRBuilder &MIB) const;
+
/// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
/// \p IsNegative is true if the test should be "not zero".
/// This will also optimize the test bit instruction when possible.
@@ -419,12 +437,16 @@ private:
int OpIdx = -1) const;
void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx = -1) const;
+ void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx = -1) const;
// Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
// Optimization methods.
- bool tryOptSelect(MachineInstr &MI);
+ bool tryOptSelect(GSelect &Sel);
+ bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);
MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
MachineOperand &Predicate,
MachineIRBuilder &MIRBuilder) const;
@@ -485,9 +507,11 @@ AArch64InstructionSelector::AArch64InstructionSelector(
// FIXME: This should be target-independent, inferred from the types declared
// for each class in the bank.
+//
+/// Given a register bank, and a type, return the smallest register class that
+/// can represent that combination.
static const TargetRegisterClass *
getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
- const RegisterBankInfo &RBI,
bool GetAllRegSet = false) {
if (RB.getID() == AArch64::GPRRegBankID) {
if (Ty.getSizeInBits() <= 32)
@@ -828,39 +852,6 @@ static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
return GenericOpc;
}
-#ifndef NDEBUG
-/// Helper function that verifies that we have a valid copy at the end of
-/// selectCopy. Verifies that the source and dest have the expected sizes and
-/// then returns true.
-static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
- const MachineRegisterInfo &MRI,
- const TargetRegisterInfo &TRI,
- const RegisterBankInfo &RBI) {
- const Register DstReg = I.getOperand(0).getReg();
- const Register SrcReg = I.getOperand(1).getReg();
- const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
- const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
-
- // Make sure the size of the source and dest line up.
- assert(
- (DstSize == SrcSize ||
- // Copies are a mean to setup initial types, the number of
- // bits may not exactly match.
- (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
- // Copies are a mean to copy bits around, as long as we are
- // on the same register class, that's fine. Otherwise, that
- // means we need some SUBREG_TO_REG or AND & co.
- (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
- "Copy with different width?!");
-
- // Check the size of the destination.
- assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
- "GPRs cannot get more than 64-bit width values");
-
- return true;
-}
-#endif
-
/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
/// to \p *To.
///
@@ -935,31 +926,6 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
return false;
}
- // A couple helpers below, for making sure that the copy we produce is valid.
-
- // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
- // to verify that the src and dst are the same size, since that's handled by
- // the SUBREG_TO_REG.
- bool KnownValid = false;
-
- // Returns true, or asserts if something we don't expect happens. Instead of
- // returning true, we return isValidCopy() to ensure that we verify the
- // result.
- auto CheckCopy = [&]() {
- // If we have a bitcast or something, we can't have physical registers.
- assert((I.isCopy() ||
- (!Register::isPhysicalRegister(I.getOperand(0).getReg()) &&
- !Register::isPhysicalRegister(I.getOperand(1).getReg()))) &&
- "No phys reg on generic operator!");
- bool ValidCopy = true;
-#ifndef NDEBUG
- ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI);
- assert(ValidCopy && "Invalid copy.");
-#endif
- (void)KnownValid;
- return ValidCopy;
- };
-
// Is this a copy? If so, then we may need to insert a subregister copy.
if (I.isCopy()) {
// Yes. Check if there's anything to fix up.
@@ -1004,15 +970,12 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
.addImm(SubReg);
MachineOperand &RegOp = I.getOperand(1);
RegOp.setReg(PromoteReg);
-
- // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
- KnownValid = true;
}
// If the destination is a physical register, then there's nothing to
// change, so we're done.
if (Register::isPhysicalRegister(DstReg))
- return CheckCopy();
+ return true;
}
// No need to constrain SrcReg. It will get constrained when we hit another
@@ -1032,7 +995,7 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
}
I.setDesc(TII.get(AArch64::COPY));
- return CheckCopy();
+ return true;
}
static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
@@ -1309,6 +1272,90 @@ static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
}
}
+/// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
+static void changeFPCCToORAArch64CC(CmpInst::Predicate CC,
+ AArch64CC::CondCode &CondCode,
+ AArch64CC::CondCode &CondCode2) {
+ CondCode2 = AArch64CC::AL;
+ switch (CC) {
+ default:
+ llvm_unreachable("Unknown FP condition!");
+ case CmpInst::FCMP_OEQ:
+ CondCode = AArch64CC::EQ;
+ break;
+ case CmpInst::FCMP_OGT:
+ CondCode = AArch64CC::GT;
+ break;
+ case CmpInst::FCMP_OGE:
+ CondCode = AArch64CC::GE;
+ break;
+ case CmpInst::FCMP_OLT:
+ CondCode = AArch64CC::MI;
+ break;
+ case CmpInst::FCMP_OLE:
+ CondCode = AArch64CC::LS;
+ break;
+ case CmpInst::FCMP_ONE:
+ CondCode = AArch64CC::MI;
+ CondCode2 = AArch64CC::GT;
+ break;
+ case CmpInst::FCMP_ORD:
+ CondCode = AArch64CC::VC;
+ break;
+ case CmpInst::FCMP_UNO:
+ CondCode = AArch64CC::VS;
+ break;
+ case CmpInst::FCMP_UEQ:
+ CondCode = AArch64CC::EQ;
+ CondCode2 = AArch64CC::VS;
+ break;
+ case CmpInst::FCMP_UGT:
+ CondCode = AArch64CC::HI;
+ break;
+ case CmpInst::FCMP_UGE:
+ CondCode = AArch64CC::PL;
+ break;
+ case CmpInst::FCMP_ULT:
+ CondCode = AArch64CC::LT;
+ break;
+ case CmpInst::FCMP_ULE:
+ CondCode = AArch64CC::LE;
+ break;
+ case CmpInst::FCMP_UNE:
+ CondCode = AArch64CC::NE;
+ break;
+ }
+}
+
+/// Convert an IR fp condition code to an AArch64 CC.
+/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
+/// should be AND'ed instead of OR'ed.
+static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC,
+ AArch64CC::CondCode &CondCode,
+ AArch64CC::CondCode &CondCode2) {
+ CondCode2 = AArch64CC::AL;
+ switch (CC) {
+ default:
+ changeFPCCToORAArch64CC(CC, CondCode, CondCode2);
+ assert(CondCode2 == AArch64CC::AL);
+ break;
+ case CmpInst::FCMP_ONE:
+ // (a one b)
+ // == ((a olt b) || (a ogt b))
+ // == ((a ord b) && (a une b))
+ CondCode = AArch64CC::VC;
+ CondCode2 = AArch64CC::NE;
+ break;
+ case CmpInst::FCMP_UEQ:
+ // (a ueq b)
+ // == ((a uno b) || (a oeq b))
+ // == ((a ule b) && (a uge b))
+ CondCode = AArch64CC::PL;
+ CondCode2 = AArch64CC::LE;
+ break;
+ }
+}
+
/// Return a register which can be used as a bit to test in a TB(N)Z.
static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
MachineRegisterInfo &MRI) {
@@ -1703,7 +1750,6 @@ static Optional<int64_t> getVectorShiftImm(Register Reg,
MachineRegisterInfo &MRI) {
assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
MachineInstr *OpMI = MRI.getVRegDef(Reg);
- assert(OpMI && "Expected to find a vreg def for vector shift operand");
return getAArch64VectorSplatScalar(*OpMI, MRI);
}
@@ -1810,7 +1856,7 @@ bool AArch64InstructionSelector::selectVectorAshrLshr(
unsigned Opc = 0;
unsigned NegOpc = 0;
const TargetRegisterClass *RC =
- getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI);
+ getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
if (Ty == LLT::fixed_vector(2, 64)) {
Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
NegOpc = AArch64::NEGv2i64;
@@ -2266,6 +2312,16 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
I.eraseFromParent();
return true;
}
+ case TargetOpcode::G_FENCE: {
+ if (I.getOperand(1).getImm() == 0)
+ BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CompilerBarrier))
+ .addImm(I.getOperand(0).getImm());
+ else
+ BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::DMB))
+ .addImm(I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
+ I.eraseFromParent();
+ return true;
+ }
default:
return false;
}
@@ -2279,8 +2335,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
MachineFunction &MF = *MBB.getParent();
MachineRegisterInfo &MRI = MF.getRegInfo();
- const AArch64Subtarget *Subtarget =
- &static_cast<const AArch64Subtarget &>(MF.getSubtarget());
+ const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>();
if (Subtarget->requiresStrictAlign()) {
// We don't support this feature yet.
LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
@@ -2312,7 +2367,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
return false;
}
const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
- DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
+ DefRC = getRegClassForTypeOnBank(DefTy, RB);
if (!DefRC) {
LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
return false;
@@ -2488,7 +2543,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
// The case when we have 0.0 is covered by tablegen. Reject it here so we
// can be sure tablegen works correctly and isn't rescued by this code.
- // 0.0 is not covered by tablegen for FP128. So we will handle this
+ // 0.0 is not covered by tablegen for FP128. So we will handle this
// scenario in the code here.
if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
return false;
@@ -2510,7 +2565,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
}
if (isFP) {
- const TargetRegisterClass &FPRRC = *getMinClassForRegBank(RB, DefSize);
+ const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB);
// For 16, 64, and 128b values, emit a constant pool load.
switch (DefSize) {
default:
@@ -2735,12 +2790,18 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
return false;
if (isa<GLoad>(LdSt)) {
- static unsigned Opcodes[] = {AArch64::LDARB, AArch64::LDARH,
- AArch64::LDARW, AArch64::LDARX};
+ static constexpr unsigned LDAPROpcodes[] = {
+ AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
+ static constexpr unsigned LDAROpcodes[] = {
+ AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
+ ArrayRef<unsigned> Opcodes =
+ STI.hasLDAPR() && Order != AtomicOrdering::SequentiallyConsistent
+ ? LDAPROpcodes
+ : LDAROpcodes;
I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
} else {
- static unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
- AArch64::STLRW, AArch64::STLRX};
+ static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
+ AArch64::STLRW, AArch64::STLRX};
Register ValReg = LdSt.getReg(0);
if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
// Emit a subreg copy of 32 bits.
@@ -2774,7 +2835,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
unsigned SubReg;
LLT MemTy = LdSt.getMMO().getMemoryType();
- auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI);
+ auto *RC = getRegClassForTypeOnBank(MemTy, RB);
if (!getSubRegForClass(RC, TRI, SubReg))
return false;
@@ -2790,7 +2851,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
if (RB.getID() == AArch64::FPRRegBankID) {
unsigned SubReg;
LLT MemTy = LdSt.getMMO().getMemoryType();
- auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI);
+ auto *RC = getRegClassForTypeOnBank(MemTy, RB);
if (!getSubRegForClass(RC, TRI, SubReg))
return false;
Register OldDst = LdSt.getReg(0);
@@ -2804,7 +2865,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
.addImm(0)
.addUse(NewDst)
.addImm(SubReg);
- auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB, RBI);
+ auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB);
RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
MIB.setInstr(LdSt);
}
@@ -2934,8 +2995,6 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
ShiftTy.getSizeInBits() == 64) {
assert(!ShiftTy.isVector() && "unexpected vector shift ty");
- assert(MRI.getVRegDef(ShiftReg) &&
- "could not find a vreg definition for shift amount");
// Insert a subregister copy to implement a 64->32 trunc
auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
.addReg(ShiftReg, 0, AArch64::sub_32);
@@ -2944,10 +3003,6 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
}
}
LLVM_FALLTHROUGH;
- case TargetOpcode::G_FADD:
- case TargetOpcode::G_FSUB:
- case TargetOpcode::G_FMUL:
- case TargetOpcode::G_FDIV:
case TargetOpcode::G_OR: {
// Reject the various things we don't support yet.
if (unsupportedBinOp(I, RBI, MRI, TRI))
@@ -3026,13 +3081,11 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
}
if (DstRB.getID() == AArch64::GPRRegBankID) {
- const TargetRegisterClass *DstRC =
- getRegClassForTypeOnBank(DstTy, DstRB, RBI);
+ const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
if (!DstRC)
return false;
- const TargetRegisterClass *SrcRC =
- getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
+ const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB);
if (!SrcRC)
return false;
@@ -3270,6 +3323,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
I.setDesc(TII.get(NewOpc));
constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ I.setFlags(MachineInstr::NoFPExcept);
return true;
}
@@ -3291,17 +3345,18 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
return selectCopy(I, TII, MRI, TRI, RBI);
case TargetOpcode::G_SELECT: {
- if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
+ auto &Sel = cast<GSelect>(I);
+ if (MRI.getType(Sel.getCondReg()) != LLT::scalar(1)) {
LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
<< ", expected: " << LLT::scalar(1) << '\n');
return false;
}
- const Register CondReg = I.getOperand(1).getReg();
- const Register TReg = I.getOperand(2).getReg();
- const Register FReg = I.getOperand(3).getReg();
+ const Register CondReg = Sel.getCondReg();
+ const Register TReg = Sel.getTrueReg();
+ const Register FReg = Sel.getFalseReg();
- if (tryOptSelect(I))
+ if (tryOptSelect(Sel))
return true;
// Make sure to use an unused vreg instead of wzr, so that the peephole
@@ -3310,9 +3365,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
.addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
- if (!emitSelect(I.getOperand(0).getReg(), TReg, FReg, AArch64CC::NE, MIB))
+ if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB))
return false;
- I.eraseFromParent();
+ Sel.eraseFromParent();
return true;
}
case TargetOpcode::G_ICMP: {
@@ -3357,8 +3412,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
const Register DstReg = I.getOperand(0).getReg();
const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
- const TargetRegisterClass *DstRC =
- getRegClassForTypeOnBank(DstTy, DstRB, RBI);
+ const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
return true;
}
@@ -3871,7 +3925,7 @@ bool AArch64InstructionSelector::selectVectorICmp(
const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
const TargetRegisterClass *SrcRC =
- getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
+ getRegClassForTypeOnBank(SrcTy, VecRB, true);
if (!SrcRC) {
LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
return false;
@@ -4037,7 +4091,7 @@ MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
}
const TargetRegisterClass *DstRC =
- getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
+ getRegClassForTypeOnBank(ScalarTy, DstRB, true);
if (!DstRC) {
LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
return nullptr;
@@ -4046,7 +4100,7 @@ MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
const LLT &VecTy = MRI.getType(VecReg);
const TargetRegisterClass *VecRC =
- getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
+ getRegClassForTypeOnBank(VecTy, VecRB, true);
if (!VecRC) {
LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
return nullptr;
@@ -4205,9 +4259,9 @@ bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
} else {
// No. We have to perform subregister inserts. For each insert, create an
// implicit def and a subregister insert, and save the register we create.
- const TargetRegisterClass *RC =
- getMinClassForRegBank(*RBI.getRegBank(SrcReg, MRI, TRI),
- WideTy.getScalarSizeInBits() * NumElts);
+ const TargetRegisterClass *RC = getRegClassForTypeOnBank(
+ LLT::fixed_vector(NumElts, WideTy.getScalarSizeInBits()),
+ *RBI.getRegBank(SrcReg, MRI, TRI));
unsigned SubReg = 0;
bool Found = getSubRegForClass(RC, TRI, SubReg);
(void)Found;
@@ -4594,6 +4648,7 @@ AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS,
// Partially build the compare. Decide if we need to add a use for the
// third operand based off whether or not we're comparing against 0.0.
auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
+ CmpMI.setMIFlags(MachineInstr::NoFPExcept);
if (!ShouldUseImm)
CmpMI.addUse(RHS);
constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
@@ -4632,7 +4687,7 @@ MachineInstr *AArch64InstructionSelector::emitVectorConcat(
const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
const TargetRegisterClass *DstRC =
- getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
+ getRegClassForTypeOnBank(Op1Ty.multiplyElements(2), FPRBank);
MachineInstr *WidenedOp1 =
emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
@@ -4701,7 +4756,256 @@ AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
}
}
-bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) {
+/// Returns true if @p Val is a tree of AND/OR/CMP operations that can be
+/// expressed as a conjunction.
+/// \param CanNegate Set to true if we can negate the whole sub-tree just by
+/// changing the conditions on the CMP tests.
+/// (this means we can call emitConjunctionRec() with
+/// Negate==true on this sub-tree)
+/// \param MustBeFirst Set to true if this subtree needs to be negated and we
+/// cannot do the negation naturally. We are required to
+/// emit the subtree first in this case.
+/// \param WillNegate Is true if are called when the result of this
+/// subexpression must be negated. This happens when the
+/// outer expression is an OR. We can use this fact to know
+/// that we have a double negation (or (or ...) ...) that
+/// can be implemented for free.
+static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
+ bool WillNegate, MachineRegisterInfo &MRI,
+ unsigned Depth = 0) {
+ if (!MRI.hasOneNonDBGUse(Val))
+ return false;
+ MachineInstr *ValDef = MRI.getVRegDef(Val);
+ unsigned Opcode = ValDef->getOpcode();
+ if (Opcode == TargetOpcode::G_TRUNC) {
+ // Look through a trunc.
+ Val = ValDef->getOperand(1).getReg();
+ ValDef = MRI.getVRegDef(Val);
+ Opcode = ValDef->getOpcode();
+ }
+ if (isa<GAnyCmp>(ValDef)) {
+ CanNegate = true;
+ MustBeFirst = false;
+ return true;
+ }
+ // Protect against exponential runtime and stack overflow.
+ if (Depth > 6)
+ return false;
+ if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
+ bool IsOR = Opcode == TargetOpcode::G_OR;
+ Register O0 = ValDef->getOperand(1).getReg();
+ Register O1 = ValDef->getOperand(2).getReg();
+ bool CanNegateL;
+ bool MustBeFirstL;
+ if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, MRI, Depth + 1))
+ return false;
+ bool CanNegateR;
+ bool MustBeFirstR;
+ if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, MRI, Depth + 1))
+ return false;
+
+ if (MustBeFirstL && MustBeFirstR)
+ return false;
+
+ if (IsOR) {
+ // For an OR expression we need to be able to naturally negate at least
+ // one side or we cannot do the transformation at all.
+ if (!CanNegateL && !CanNegateR)
+ return false;
+ // If we the result of the OR will be negated and we can naturally negate
+ // the leaves, then this sub-tree as a whole negates naturally.
+ CanNegate = WillNegate && CanNegateL && CanNegateR;
+ // If we cannot naturally negate the whole sub-tree, then this must be
+ // emitted first.
+ MustBeFirst = !CanNegate;
+ } else {
+ assert(Opcode == TargetOpcode::G_AND && "Must be G_AND");
+ // We cannot naturally negate an AND operation.
+ CanNegate = false;
+ MustBeFirst = MustBeFirstL || MustBeFirstR;
+ }
+ return true;
+ }
+ return false;
+}
+
+MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
+ Register LHS, Register RHS, CmpInst::Predicate CC,
+ AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC,
+ MachineIRBuilder &MIB) const {
+ // TODO: emit CMN as an optimization.
+ auto &MRI = *MIB.getMRI();
+ LLT OpTy = MRI.getType(LHS);
+ assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64);
+ unsigned CCmpOpc;
+ if (CmpInst::isIntPredicate(CC)) {
+ CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
+ } else {
+ switch (OpTy.getSizeInBits()) {
+ case 16:
+ CCmpOpc = AArch64::FCCMPHrr;
+ break;
+ case 32:
+ CCmpOpc = AArch64::FCCMPSrr;
+ break;
+ case 64:
+ CCmpOpc = AArch64::FCCMPDrr;
+ break;
+ default:
+ return nullptr;
+ }
+ }
+ AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
+ unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
+ auto CCmp =
+ MIB.buildInstr(CCmpOpc, {}, {LHS, RHS}).addImm(NZCV).addImm(Predicate);
+ constrainSelectedInstRegOperands(*CCmp, TII, TRI, RBI);
+ return &*CCmp;
+}
+
+MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
+ Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp,
+ AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const {
+ // We're at a tree leaf, produce a conditional comparison operation.
+ auto &MRI = *MIB.getMRI();
+ MachineInstr *ValDef = MRI.getVRegDef(Val);
+ unsigned Opcode = ValDef->getOpcode();
+ if (Opcode == TargetOpcode::G_TRUNC) {
+ // Look through a trunc.
+ Val = ValDef->getOperand(1).getReg();
+ ValDef = MRI.getVRegDef(Val);
+ Opcode = ValDef->getOpcode();
+ }
+ if (auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
+ Register LHS = Cmp->getLHSReg();
+ Register RHS = Cmp->getRHSReg();
+ CmpInst::Predicate CC = Cmp->getCond();
+ if (Negate)
+ CC = CmpInst::getInversePredicate(CC);
+ if (isa<GICmp>(Cmp)) {
+ OutCC = changeICMPPredToAArch64CC(CC);
+ } else {
+ // Handle special FP cases.
+ AArch64CC::CondCode ExtraCC;
+ changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
+ // Some floating point conditions can't be tested with a single condition
+ // code. Construct an additional comparison in this case.
+ if (ExtraCC != AArch64CC::AL) {
+ MachineInstr *ExtraCmp;
+ if (!CCOp)
+ ExtraCmp = emitFPCompare(LHS, RHS, MIB, CC);
+ else
+ ExtraCmp =
+ emitConditionalComparison(LHS, RHS, CC, Predicate, ExtraCC, MIB);
+ CCOp = ExtraCmp->getOperand(0).getReg();
+ Predicate = ExtraCC;
+ }
+ }
+
+ // Produce a normal comparison if we are first in the chain
+ if (!CCOp) {
+ auto Dst = MRI.cloneVirtualRegister(LHS);
+ if (isa<GICmp>(Cmp))
+ return emitSUBS(Dst, Cmp->getOperand(2), Cmp->getOperand(3), MIB);
+ return emitFPCompare(Cmp->getOperand(2).getReg(),
+ Cmp->getOperand(3).getReg(), MIB);
+ }
+ // Otherwise produce a ccmp.
+ return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB);
+ }
+ assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree");
+
+ bool IsOR = Opcode == TargetOpcode::G_OR;
+
+ Register LHS = ValDef->getOperand(1).getReg();
+ bool CanNegateL;
+ bool MustBeFirstL;
+ bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR, MRI);
+ assert(ValidL && "Valid conjunction/disjunction tree");
+ (void)ValidL;
+
+ Register RHS = ValDef->getOperand(2).getReg();
+ bool CanNegateR;
+ bool MustBeFirstR;
+ bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR, MRI);
+ assert(ValidR && "Valid conjunction/disjunction tree");
+ (void)ValidR;
+
+ // Swap sub-tree that must come first to the right side.
+ if (MustBeFirstL) {
+ assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
+ std::swap(LHS, RHS);
+ std::swap(CanNegateL, CanNegateR);
+ std::swap(MustBeFirstL, MustBeFirstR);
+ }
+
+ bool NegateR;
+ bool NegateAfterR;
+ bool NegateL;
+ bool NegateAfterAll;
+ if (Opcode == TargetOpcode::G_OR) {
+ // Swap the sub-tree that we can negate naturally to the left.
+ if (!CanNegateL) {
+ assert(CanNegateR && "at least one side must be negatable");
+ assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
+ assert(!Negate);
+ std::swap(LHS, RHS);
+ NegateR = false;
+ NegateAfterR = true;
+ } else {
+ // Negate the left sub-tree if possible, otherwise negate the result.
+ NegateR = CanNegateR;
+ NegateAfterR = !CanNegateR;
+ }
+ NegateL = true;
+ NegateAfterAll = !Negate;
+ } else {
+ assert(Opcode == TargetOpcode::G_AND &&
+ "Valid conjunction/disjunction tree");
+ assert(!Negate && "Valid conjunction/disjunction tree");
+
+ NegateL = false;
+ NegateR = false;
+ NegateAfterR = false;
+ NegateAfterAll = false;
+ }
+
+ // Emit sub-trees.
+ AArch64CC::CondCode RHSCC;
+ MachineInstr *CmpR =
+ emitConjunctionRec(RHS, RHSCC, NegateR, CCOp, Predicate, MIB);
+ if (NegateAfterR)
+ RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
+ MachineInstr *CmpL = emitConjunctionRec(
+ LHS, OutCC, NegateL, CmpR->getOperand(0).getReg(), RHSCC, MIB);
+ if (NegateAfterAll)
+ OutCC = AArch64CC::getInvertedCondCode(OutCC);
+ return CmpL;
+}
+
+MachineInstr *AArch64InstructionSelector::emitConjunction(
+ Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const {
+ bool DummyCanNegate;
+ bool DummyMustBeFirst;
+ if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false,
+ *MIB.getMRI()))
+ return nullptr;
+ return emitConjunctionRec(Val, OutCC, false, Register(), AArch64CC::AL, MIB);
+}
+
+bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI,
+ MachineInstr &CondMI) {
+ AArch64CC::CondCode AArch64CC;
+ MachineInstr *ConjMI = emitConjunction(SelI.getCondReg(), AArch64CC, MIB);
+ if (!ConjMI)
+ return false;
+
+ emitSelect(SelI.getReg(0), SelI.getTrueReg(), SelI.getFalseReg(), AArch64CC, MIB);
+ SelI.eraseFromParent();
+ return true;
+}
+
+bool AArch64InstructionSelector::tryOptSelect(GSelect &I) {
MachineRegisterInfo &MRI = *MIB.getMRI();
// We want to recognize this pattern:
//
@@ -4750,12 +5054,12 @@ bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) {
}
// Is the condition defined by a compare?
- if (!CondDef)
- return false;
-
unsigned CondOpc = CondDef->getOpcode();
- if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
+ if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
+ if (tryOptSelectConjunction(I, *CondDef))
+ return true;
return false;
+ }
AArch64CC::CondCode CondCode;
if (CondOpc == TargetOpcode::G_ICMP) {
@@ -5081,7 +5385,7 @@ bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I,
// the original size to get the result we want.
Register DemoteVec = InsMI->getOperand(0).getReg();
const TargetRegisterClass *RC =
- getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
+ getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DemoteVec, MRI, TRI));
if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
return false;
@@ -5198,12 +5502,11 @@ bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
}))
return false;
unsigned SubReg;
- const TargetRegisterClass *EltRC =
- getMinClassForRegBank(EltRB, EltTy.getSizeInBits());
+ const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(EltTy, EltRB);
if (!EltRC)
return false;
const TargetRegisterClass *DstRC =
- getMinClassForRegBank(DstRB, MRI.getType(Dst).getSizeInBits());
+ getRegClassForTypeOnBank(MRI.getType(Dst), DstRB);
if (!DstRC)
return false;
if (!getSubRegForClass(EltRC, TRI, SubReg))
@@ -5261,7 +5564,7 @@ bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
if (DstSize < 128) {
// Force this to be FPR using the destination vector.
const TargetRegisterClass *RC =
- getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
+ getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
if (!RC)
return false;
if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
@@ -5528,7 +5831,7 @@ bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
uint64_t Key = I.getOperand(3).getImm();
Register DiscReg = I.getOperand(4).getReg();
auto DiscVal = getIConstantVRegVal(DiscReg, MRI);
- bool IsDiscZero = DiscVal.hasValue() && DiscVal->isNullValue();
+ bool IsDiscZero = DiscVal && DiscVal->isNullValue();
if (Key > 3)
return false;
@@ -5777,8 +6080,6 @@ AArch64InstructionSelector::selectExtendedSHL(
MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
- if (!OffsetInst)
- return None;
unsigned OffsetOpc = OffsetInst->getOpcode();
bool LookedThroughZExt = false;
@@ -5932,7 +6233,7 @@ AArch64InstructionSelector::selectAddrModeRegisterOffset(
// We need a GEP.
MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
- if (!Gep || Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
+ if (Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
return None;
// If this is used more than once, let's not bother folding.
@@ -6112,14 +6413,12 @@ AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
return None;
MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
- if (!RootDef)
- return None;
MachineOperand &OffImm = RootDef->getOperand(2);
if (!OffImm.isReg())
return None;
MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
- if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
+ if (RHS->getOpcode() != TargetOpcode::G_CONSTANT)
return None;
int64_t RHSC;
MachineOperand &RHSOp1 = RHS->getOperand(1);
@@ -6187,9 +6486,6 @@ AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
return None;
MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
- if (!RootDef)
- return None;
-
if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
return {{
[=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
@@ -6210,27 +6506,26 @@ AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
MachineOperand &RHS = RootDef->getOperand(2);
MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
- if (LHSDef && RHSDef) {
- int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
- unsigned Scale = Log2_32(Size);
- if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
- if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
- return {{
- [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
- [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
- }};
+ int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
+ unsigned Scale = Log2_32(Size);
+ if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
+ if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
return {{
- [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
+ [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
[=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
}};
- }
+
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
+ }};
}
}
// Before falling back to our general case, check if the unscaled
// instructions can handle this. If so, that's preferable.
- if (selectAddrModeUnscaled(Root, Size).hasValue())
+ if (selectAddrModeUnscaled(Root, Size))
return None;
return {{
@@ -6269,8 +6564,6 @@ AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,
// Check if the operand is defined by an instruction which corresponds to
// a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
- if (!ShiftInst)
- return None;
AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst);
if (ShType == AArch64_AM::InvalidShiftExtend)
return None;
@@ -6425,7 +6718,7 @@ AArch64InstructionSelector::selectArithExtendedRegister(
// to.
if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
- if (ExtInst && isDef32(*ExtInst))
+ if (isDef32(*ExtInst))
return None;
}
}
@@ -6450,7 +6743,7 @@ void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
Optional<int64_t> CstVal =
getIConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
assert(CstVal && "Expected constant value");
- MIB.addImm(CstVal.getValue());
+ MIB.addImm(*CstVal);
}
void AArch64InstructionSelector::renderLogicalImm32(
@@ -6498,6 +6791,17 @@ void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB,
AArch64_AM::getFP64Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
}
+void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
+ MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
+ assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
+ "Expected G_FCONSTANT");
+ MIB.addImm(AArch64_AM::encodeAdvSIMDModImmType4(MI.getOperand(1)
+ .getFPImm()
+ ->getValueAPF()
+ .bitcastToAPInt()
+ .getZExtValue()));
+}
+
bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
const MachineInstr &MI, unsigned NumBytes) const {
if (!MI.mayLoadOrStore())
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index e9df7e001d38..74ec9373ce9e 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -169,7 +169,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.scalarize(0);
getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
- .lowerFor({s1, s8, s16, s32, s64, v2s64, v4s32, v2s32})
+ .lowerFor({s8, s16, s32, s64, v2s64, v4s32, v2s32})
.widenScalarOrEltToNextPow2(0)
.clampScalarOrElt(0, s32, s64)
.clampNumElements(0, v2s32, v4s32)
@@ -180,7 +180,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder({G_SMULO, G_UMULO})
.widenScalarToNextPow2(0, /*Min = */ 32)
.clampScalar(0, s32, s64)
- .lowerIf(typeIs(1, s1));
+ .lower();
getActionDefinitionsBuilder({G_SMULH, G_UMULH})
.legalFor({s64, v8s16, v16s8, v4s32})
@@ -308,7 +308,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
// These extends are also legal
.legalForTypesWithMemDesc({{s32, p0, s8, 8}, {s32, p0, s16, 8}})
.widenScalarToNextPow2(0, /* MinSize = */8)
- .lowerIfMemSizeNotPow2()
+ .lowerIfMemSizeNotByteSizePow2()
.clampScalar(0, s8, s64)
.narrowScalarIf([=](const LegalityQuery &Query) {
// Clamp extending load results to 32-bits.
@@ -317,10 +317,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
Query.Types[0].getSizeInBits() > 32;
},
changeTo(0, s32))
- // Lower any any-extending loads left into G_ANYEXT and G_LOAD
- .lowerIf([=](const LegalityQuery &Query) {
- return Query.Types[0] != Query.MMODescrs[0].MemoryTy;
- })
.clampMaxNumElements(0, s8, 16)
.clampMaxNumElements(0, s16, 8)
.clampMaxNumElements(0, s32, 4)
@@ -536,7 +532,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
.lowerIf(
- all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(1, s1), typeIs(2, p0)));
+ all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
.customIf([](const LegalityQuery &Query) {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
index 3dec980a819a..ba206bac68d1 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
@@ -20,11 +20,13 @@
//===----------------------------------------------------------------------===//
#include "AArch64TargetMachine.h"
+#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
@@ -133,7 +135,7 @@ bool matchAArch64MulConstCombine(
if (!Const)
return false;
- const APInt ConstValue = Const->Value.sextOrSelf(Ty.getSizeInBits());
+ APInt ConstValue = Const->Value.sext(Ty.getSizeInBits());
// The following code is ported from AArch64ISelLowering.
// Multiplication of a power of two plus/minus one can be done more
// cheaply as as shift+add/sub. For now, this is true unilaterally. If
@@ -258,7 +260,7 @@ void applyFoldMergeToZext(MachineInstr &MI, MachineRegisterInfo &MRI,
// %d(s64) = G_ZEXT %a(s32)
Observer.changingInstr(MI);
MI.setDesc(B.getTII().get(TargetOpcode::G_ZEXT));
- MI.RemoveOperand(2);
+ MI.removeOperand(2);
Observer.changedInstr(MI);
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index 3ff67d188822..d7959a82c484 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -58,7 +58,7 @@ struct ShuffleVectorPseudo {
ShuffleVectorPseudo(unsigned Opc, Register Dst,
std::initializer_list<SrcOp> SrcOps)
: Opc(Opc), Dst(Dst), SrcOps(SrcOps){};
- ShuffleVectorPseudo() {}
+ ShuffleVectorPseudo() = default;
};
/// Check if a vector shuffle corresponds to a REV instruction with the
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
index cc45c6642ac5..ce6f15a799b7 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
@@ -149,7 +149,7 @@ bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) {
"op in fcmp range: "
<< II);
II.setDesc(TII->get(NewOpc));
- II.RemoveOperand(DeadNZCVIdx);
+ II.removeOperand(DeadNZCVIdx);
// Changing the opcode can result in differing regclass requirements,
// e.g. SUBSWri uses gpr32 for the dest, whereas SUBWri uses gpr32sp.
// Constrain the regclasses, possibly introducing a copy.
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
index d3f4130d2ba1..275949c5ee64 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
@@ -13,6 +13,7 @@
#include "AArch64GlobalISelUtils.h"
#include "AArch64TargetMachine.h"
+#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
@@ -162,13 +163,14 @@ static bool matchFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
// Check whether folding this offset is legal. It must not go out of bounds of
// the referenced object to avoid violating the code model, and must be
- // smaller than 2^21 because this is the largest offset expressible in all
- // object formats.
+ // smaller than 2^20 because this is the largest offset expressible in all
+ // object formats. (The IMAGE_REL_ARM64_PAGEBASE_REL21 relocation in COFF
+ // stores an immediate signed 21 bit offset.)
//
// This check also prevents us from folding negative offsets, which will end
// up being treated in the same way as large positive ones. They could also
// cause code model violations, and aren't really common enough to matter.
- if (NewOffset >= (1 << 21))
+ if (NewOffset >= (1 << 20))
return false;
Type *T = GV->getValueType();
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 515a5c63a559..f0b311289c41 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -12,20 +12,19 @@
//===----------------------------------------------------------------------===//
#include "AArch64RegisterBankInfo.h"
-#include "AArch64InstrInfo.h"
#include "AArch64RegisterInfo.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterBank.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -42,8 +41,8 @@
using namespace llvm;
-AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
- : AArch64GenRegisterBankInfo() {
+AArch64RegisterBankInfo::AArch64RegisterBankInfo(
+ const TargetRegisterInfo &TRI) {
static llvm::once_flag InitializeRegisterBankFlag;
static auto InitializeRegisterBankOnce = [&]() {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h
index 2d76e48d7df2..01ef0bd92d50 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h
@@ -13,7 +13,7 @@
#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64REGISTERBANKINFO_H
#define LLVM_LIB_TARGET_AARCH64_AARCH64REGISTERBANKINFO_H
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
#define GET_REGBANK_DECLARATIONS
#include "AArch64GenRegisterBank.inc"
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index dbb8e85713cb..e4b547e17f64 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -22,10 +22,10 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/MCValue.h"
#include "llvm/MC/TargetRegistry.h"
-#include "llvm/Support/EndianStream.h"
#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
@@ -470,7 +470,7 @@ bool AArch64AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
// We are properly aligned, so write NOPs as requested.
Count /= 4;
for (uint64_t i = 0; i != Count; ++i)
- support::endian::write<uint32_t>(OS, 0xd503201f, Endian);
+ OS.write("\x1f\x20\x03\xd5", 4);
return true;
}
@@ -592,17 +592,18 @@ public:
if (XReg != AArch64::FP)
return CU::UNWIND_ARM64_MODE_DWARF;
- assert(XReg == AArch64::FP && "Invalid frame pointer!");
- assert(i + 2 < e && "Insufficient CFI instructions to define a frame!");
+ if (i + 2 >= e)
+ return CU::UNWIND_ARM64_MODE_DWARF;
const MCCFIInstruction &LRPush = Instrs[++i];
- assert(LRPush.getOperation() == MCCFIInstruction::OpOffset &&
- "Link register not pushed!");
+ if (LRPush.getOperation() != MCCFIInstruction::OpOffset)
+ return CU::UNWIND_ARM64_MODE_DWARF;
const MCCFIInstruction &FPPush = Instrs[++i];
- assert(FPPush.getOperation() == MCCFIInstruction::OpOffset &&
- "Frame pointer not pushed!");
+ if (FPPush.getOperation() != MCCFIInstruction::OpOffset)
+ return CU::UNWIND_ARM64_MODE_DWARF;
- assert(FPPush.getOffset() + 8 == LRPush.getOffset());
+ if (FPPush.getOffset() + 8 != LRPush.getOffset())
+ return CU::UNWIND_ARM64_MODE_DWARF;
CurOffset = FPPush.getOffset();
unsigned LRReg = *MRI.getLLVMRegNum(LRPush.getRegister(), true);
@@ -611,8 +612,8 @@ public:
LRReg = getXRegFromWReg(LRReg);
FPReg = getXRegFromWReg(FPReg);
- assert(LRReg == AArch64::LR && FPReg == AArch64::FP &&
- "Pushing invalid registers for frame!");
+ if (LRReg != AArch64::LR || FPReg != AArch64::FP)
+ return CU::UNWIND_ARM64_MODE_DWARF;
// Indicate that the function has a frame.
CompactUnwindEncoding |= CU::UNWIND_ARM64_MODE_FRAME;
@@ -620,7 +621,8 @@ public:
break;
}
case MCCFIInstruction::OpDefCfaOffset: {
- assert(StackSize == 0 && "We already have the CFA offset!");
+ if (StackSize != 0)
+ return CU::UNWIND_ARM64_MODE_DWARF;
StackSize = std::abs(Inst.getOffset());
break;
}
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index 78c0e90b1384..46edb12959d2 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -254,6 +254,7 @@ void AArch64TargetELFStreamer::emitInst(uint32_t Inst) {
}
void AArch64TargetELFStreamer::emitDirectiveVariantPCS(MCSymbol *Symbol) {
+ getStreamer().getAssembler().registerSymbol(*Symbol);
cast<MCSymbolELF>(Symbol)->setOther(ELF::STO_AARCH64_VARIANT_PCS);
}
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
index ee0870d9ef7a..5d2ba7ef02c0 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@@ -1340,11 +1340,6 @@ void AArch64InstPrinter::printGPRSeqPairsClassOperand(const MCInst *MI,
O << getRegisterName(Even) << ", " << getRegisterName(Odd);
}
-static const unsigned MatrixZADRegisterTable[] = {
- AArch64::ZAD0, AArch64::ZAD1, AArch64::ZAD2, AArch64::ZAD3,
- AArch64::ZAD4, AArch64::ZAD5, AArch64::ZAD6, AArch64::ZAD7
-};
-
void AArch64InstPrinter::printMatrixTileList(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
@@ -1362,7 +1357,7 @@ void AArch64InstPrinter::printMatrixTileList(const MCInst *MI, unsigned OpNum,
unsigned Reg = RegMask & (1 << I);
if (Reg == 0)
continue;
- O << getRegisterName(MatrixZADRegisterTable[I]);
+ O << getRegisterName(AArch64::ZAD0 + I);
if (Printed + 1 != NumRegs)
O << ", ";
++Printed;
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index ad97071434df..2901e5c0fe4d 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -16,6 +16,7 @@
#include "Utils/AArch64BaseInfo.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCFixup.h"
@@ -677,7 +678,6 @@ unsigned AArch64MCCodeEmitter::fixOneOperandFPComparison(
#include "AArch64GenMCCodeEmitter.inc"
MCCodeEmitter *llvm::createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx) {
return new AArch64MCCodeEmitter(MCII, Ctx);
}
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
index 844bd6bbada9..cb39c2a11487 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
@@ -17,6 +17,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index c1186ae804d2..34e3b2cf58e4 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -52,21 +52,14 @@ static MCSubtargetInfo *
createAArch64MCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
if (CPU.empty()) {
CPU = "generic";
+ if (FS.empty())
+ FS = "+v8a";
if (TT.isArm64e())
CPU = "apple-a12";
}
- // Most of the NEON instruction set isn't supported in streaming mode on SME
- // targets, disable NEON unless explicitly requested.
- bool RequestedNEON = FS.contains("neon");
- bool RequestedStreamingSVE = FS.contains("streaming-sve");
- MCSubtargetInfo *STI =
- createAArch64MCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
- if (RequestedStreamingSVE && !RequestedNEON &&
- STI->hasFeature(AArch64::FeatureNEON))
- STI->ToggleFeature(AArch64::FeatureNEON);
- return STI;
+ return createAArch64MCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
}
void AArch64_MC::initLLVMToCVRegMapping(MCRegisterInfo *MRI) {
@@ -243,6 +236,31 @@ void AArch64_MC::initLLVMToCVRegMapping(MCRegisterInfo *MRI) {
MRI->mapLLVMRegToCVReg(I.Reg, static_cast<int>(I.CVReg));
}
+bool AArch64_MC::isQForm(const MCInst &MI, const MCInstrInfo *MCII) {
+ const auto &FPR128 = AArch64MCRegisterClasses[AArch64::FPR128RegClassID];
+ return llvm::any_of(MI, [&](const MCOperand &Op) {
+ return Op.isReg() && FPR128.contains(Op.getReg());
+ });
+}
+
+bool AArch64_MC::isFpOrNEON(const MCInst &MI, const MCInstrInfo *MCII) {
+ const auto &FPR128 = AArch64MCRegisterClasses[AArch64::FPR128RegClassID];
+ const auto &FPR64 = AArch64MCRegisterClasses[AArch64::FPR64RegClassID];
+ const auto &FPR32 = AArch64MCRegisterClasses[AArch64::FPR32RegClassID];
+ const auto &FPR16 = AArch64MCRegisterClasses[AArch64::FPR16RegClassID];
+ const auto &FPR8 = AArch64MCRegisterClasses[AArch64::FPR8RegClassID];
+
+ auto IsFPR = [&](const MCOperand &Op) {
+ if (!Op.isReg())
+ return false;
+ auto Reg = Op.getReg();
+ return FPR128.contains(Reg) || FPR64.contains(Reg) || FPR32.contains(Reg) ||
+ FPR16.contains(Reg) || FPR8.contains(Reg);
+ };
+
+ return llvm::any_of(MI, IsFPR);
+}
+
static MCRegisterInfo *createAArch64MCRegisterInfo(const Triple &Triple) {
MCRegisterInfo *X = new MCRegisterInfo();
InitAArch64MCRegisterInfo(X, AArch64::LR);
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
index 66cb7a37a958..049c49796dc6 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
@@ -13,6 +13,7 @@
#ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64MCTARGETDESC_H
#define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64MCTARGETDESC_H
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Support/DataTypes.h"
#include <memory>
@@ -22,6 +23,7 @@ class formatted_raw_ostream;
class MCAsmBackend;
class MCCodeEmitter;
class MCContext;
+class MCInst;
class MCInstrInfo;
class MCInstPrinter;
class MCRegisterInfo;
@@ -33,7 +35,6 @@ class MCTargetStreamer;
class Target;
MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx);
MCAsmBackend *createAArch64leAsmBackend(const Target &T,
const MCSubtargetInfo &STI,
@@ -60,8 +61,16 @@ MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S,
namespace AArch64_MC {
void initLLVMToCVRegMapping(MCRegisterInfo *MRI);
+bool isQForm(const MCInst &MI, const MCInstrInfo *MCII);
+bool isFpOrNEON(const MCInst &MI, const MCInstrInfo *MCII);
}
+namespace AArch64 {
+enum OperandType {
+ OPERAND_IMPLICIT_IMM_0 = MCOI::OPERAND_FIRST_TARGET,
+};
+} // namespace AArch64
+
} // End llvm namespace
// Defines symbolic names for AArch64 registers. This defines a mapping from
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
index 92552c3d41d5..1a8071ac1b33 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
@@ -76,7 +76,7 @@ void AArch64TargetStreamer::emitNoteSection(unsigned Flags) {
return;
}
MCSection *Cur = OutStreamer.getCurrentSectionOnly();
- OutStreamer.SwitchSection(Nt);
+ OutStreamer.switchSection(Nt);
// Emit the note header.
OutStreamer.emitValueToAlignment(Align(8).value());
@@ -92,7 +92,7 @@ void AArch64TargetStreamer::emitNoteSection(unsigned Flags) {
OutStreamer.emitIntValue(0, 4); // pad
OutStreamer.endSection(Nt);
- OutStreamer.SwitchSection(Cur);
+ OutStreamer.switchSection(Cur);
}
void AArch64TargetStreamer::emitInst(uint32_t Inst) {
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp
index 0072af4cc16e..46ffa50b3e6e 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp
@@ -19,6 +19,7 @@
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCValue.h"
#include "llvm/MC/MCWinCOFFObjectWriter.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
index b688165d3a7b..820d940c1ed2 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
@@ -8,6 +8,7 @@
#include "AArch64WinCOFFStreamer.h"
#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCWin64EH.h"
@@ -26,14 +27,14 @@ public:
std::unique_ptr<MCObjectWriter> OW)
: MCWinCOFFStreamer(C, std::move(AB), std::move(CE), std::move(OW)) {}
- void EmitWinEHHandlerData(SMLoc Loc) override;
- void EmitWindowsUnwindTables() override;
- void EmitWindowsUnwindTables(WinEH::FrameInfo *Frame) override;
+ void emitWinEHHandlerData(SMLoc Loc) override;
+ void emitWindowsUnwindTables() override;
+ void emitWindowsUnwindTables(WinEH::FrameInfo *Frame) override;
void finishImpl() override;
};
-void AArch64WinCOFFStreamer::EmitWinEHHandlerData(SMLoc Loc) {
- MCStreamer::EmitWinEHHandlerData(Loc);
+void AArch64WinCOFFStreamer::emitWinEHHandlerData(SMLoc Loc) {
+ MCStreamer::emitWinEHHandlerData(Loc);
// We have to emit the unwind info now, because this directive
// actually switches to the .xdata section!
@@ -41,11 +42,11 @@ void AArch64WinCOFFStreamer::EmitWinEHHandlerData(SMLoc Loc) {
/* HandlerData = */ true);
}
-void AArch64WinCOFFStreamer::EmitWindowsUnwindTables(WinEH::FrameInfo *Frame) {
+void AArch64WinCOFFStreamer::emitWindowsUnwindTables(WinEH::FrameInfo *Frame) {
EHStreamer.EmitUnwindInfo(*this, Frame, /* HandlerData = */ false);
}
-void AArch64WinCOFFStreamer::EmitWindowsUnwindTables() {
+void AArch64WinCOFFStreamer::emitWindowsUnwindTables() {
if (!getNumWinFrameInfos())
return;
EHStreamer.Emit(*this);
@@ -53,7 +54,7 @@ void AArch64WinCOFFStreamer::EmitWindowsUnwindTables() {
void AArch64WinCOFFStreamer::finishImpl() {
emitFrames(nullptr);
- EmitWindowsUnwindTables();
+ emitWindowsUnwindTables();
MCWinCOFFStreamer::finishImpl();
}
@@ -71,10 +72,9 @@ void AArch64TargetWinCOFFStreamer::emitARM64WinUnwindCode(unsigned UnwindCode,
WinEH::FrameInfo *CurFrame = S.EnsureValidWinFrameInfo(SMLoc());
if (!CurFrame)
return;
- MCSymbol *Label = S.emitCFILabel();
- auto Inst = WinEH::Instruction(UnwindCode, Label, Reg, Offset);
+ auto Inst = WinEH::Instruction(UnwindCode, /*Label=*/nullptr, Reg, Offset);
if (InEpilogCFI)
- CurFrame->EpilogMap[CurrentEpilog].push_back(Inst);
+ CurFrame->EpilogMap[CurrentEpilog].Instructions.push_back(Inst);
else
CurFrame->Instructions.push_back(Inst);
}
@@ -176,7 +176,8 @@ void AArch64TargetWinCOFFStreamer::emitARM64WinCFIPrologEnd() {
MCSymbol *Label = S.emitCFILabel();
CurFrame->PrologEnd = Label;
- WinEH::Instruction Inst = WinEH::Instruction(Win64EH::UOP_End, Label, -1, 0);
+ WinEH::Instruction Inst =
+ WinEH::Instruction(Win64EH::UOP_End, /*Label=*/nullptr, -1, 0);
auto it = CurFrame->Instructions.begin();
CurFrame->Instructions.insert(it, Inst);
}
@@ -198,9 +199,9 @@ void AArch64TargetWinCOFFStreamer::emitARM64WinCFIEpilogEnd() {
return;
InEpilogCFI = false;
- MCSymbol *Label = S.emitCFILabel();
- WinEH::Instruction Inst = WinEH::Instruction(Win64EH::UOP_End, Label, -1, 0);
- CurFrame->EpilogMap[CurrentEpilog].push_back(Inst);
+ WinEH::Instruction Inst =
+ WinEH::Instruction(Win64EH::UOP_End, /*Label=*/nullptr, -1, 0);
+ CurFrame->EpilogMap[CurrentEpilog].Instructions.push_back(Inst);
CurrentEpilog = nullptr;
}
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 41f2cead4cf8..2744e81f99f1 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -10,14 +10,36 @@
//
//===----------------------------------------------------------------------===//
+def imm_to_tile8 : ComplexPattern<i64, 1, "ImmToTile<AArch64::ZAB0>", []>;
+def imm_to_tile16 : ComplexPattern<i64, 1, "ImmToTile<AArch64::ZAH0>", []>;
+def imm_to_tile32 : ComplexPattern<i64, 1, "ImmToTile<AArch64::ZAS0>", []>;
+def imm_to_tile64 : ComplexPattern<i64, 1, "ImmToTile<AArch64::ZAD0>", []>;
+def imm_to_tile128 : ComplexPattern<i64, 1, "ImmToTile<AArch64::ZAQ0>", []>;
+
+def tileslice8 : ComplexPattern<i32 , 2, "SelectSMETileSlice<4>", []>;
+def tileslice16 : ComplexPattern<i32 , 2, "SelectSMETileSlice<3>", []>;
+def tileslice32 : ComplexPattern<i32 , 2, "SelectSMETileSlice<2>", []>;
+def tileslice64 : ComplexPattern<i32 , 2, "SelectSMETileSlice<1>", []>;
+def tileslice128 : ComplexPattern<i32 , 2, "SelectSMETileSlice<0>", []>; // nop
+
+def am_sme_indexed_b4 :ComplexPattern<iPTR, 2, "SelectAddrModeIndexedSVE<0,15>", [], [SDNPWantRoot]>;
+
//===----------------------------------------------------------------------===//
// SME Outer Products
//===----------------------------------------------------------------------===//
+class sme_outer_product_pseudo<ZPRRegOp zpr_ty>
+ : Pseudo<(outs), (ins i64imm:$tile, PPR3bAny:$pn, PPR3bAny:$pm,
+ zpr_ty:$zn, zpr_ty:$zm), []>,
+ Sched<[]> {
+ // Translated to the actual instructions in AArch64ISelLowering.cpp
+ let usesCustomInserter = 1;
+}
+
class sme_fp_outer_product_inst<bit S, bit sz, MatrixTileOperand za_ty,
ZPRRegOp zpr_ty, string mnemonic>
: I<(outs za_ty:$ZAda),
- (ins PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm),
+ (ins za_ty:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm),
mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm",
"", []>,
Sched<[]> {
@@ -34,26 +56,42 @@ class sme_fp_outer_product_inst<bit S, bit sz, MatrixTileOperand za_ty,
let Inst{9-5} = Zn;
let Inst{4} = S;
let Inst{3} = 0b0;
+
+ let Constraints = "$ZAda = $_ZAda";
}
-class sme_outer_product_fp32<bit S, string mnemonic>
- : sme_fp_outer_product_inst<S, 0b0, TileOp32, ZPR32, mnemonic> {
- bits<2> ZAda;
- let Inst{1-0} = ZAda;
- let Inst{2} = 0b0;
+multiclass sme_outer_product_fp32<bit S, string mnemonic, SDPatternOperator op> {
+ def NAME : sme_fp_outer_product_inst<S, 0b0, TileOp32, ZPR32, mnemonic> {
+ bits<2> ZAda;
+ let Inst{1-0} = ZAda;
+ let Inst{2} = 0b0;
+ }
+
+ def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR32>;
+
+ def : Pat<(op imm0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm),
+ (nxv4f32 ZPR32:$zn), (nxv4f32 ZPR32:$zm)),
+ (!cast<Instruction>(NAME # _PSEUDO) imm0_3:$tile, $pn, $pm, $zn, $zm)>;
}
-class sme_outer_product_fp64<bit S, string mnemonic>
- : sme_fp_outer_product_inst<S, 0b1, TileOp64, ZPR64, mnemonic> {
- bits<3> ZAda;
- let Inst{2-0} = ZAda;
+multiclass sme_outer_product_fp64<bit S, string mnemonic, SDPatternOperator op> {
+ def NAME : sme_fp_outer_product_inst<S, 0b1, TileOp64, ZPR64, mnemonic> {
+ bits<3> ZAda;
+ let Inst{2-0} = ZAda;
+ }
+
+ def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR64>;
+
+ def : Pat<(op imm0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm),
+ (nxv2f64 ZPR64:$zn), (nxv2f64 ZPR64:$zm)),
+ (!cast<Instruction>(NAME # _PSEUDO) imm0_7:$tile, $pn, $pm, $zn, $zm)>;
}
class sme_int_outer_product_inst<bit u0, bit u1, bit S, bit sz,
MatrixTileOperand za_ty, ZPRRegOp zpr_ty,
string mnemonic>
: I<(outs za_ty:$ZAda),
- (ins PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm),
+ (ins za_ty:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm),
mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm",
"", []>,
Sched<[]> {
@@ -72,26 +110,44 @@ class sme_int_outer_product_inst<bit u0, bit u1, bit S, bit sz,
let Inst{9-5} = Zn;
let Inst{4} = S;
let Inst{3} = 0b0;
+
+ let Constraints = "$ZAda = $_ZAda";
}
-class sme_int_outer_product_i32<bits<3> opc, string mnemonic>
- : sme_int_outer_product_inst<opc{2}, opc{1}, opc{0}, 0b0, TileOp32, ZPR8,
- mnemonic> {
- bits<2> ZAda;
- let Inst{1-0} = ZAda;
- let Inst{2} = 0b0;
+multiclass sme_int_outer_product_i32<bits<3> opc, string mnemonic,
+ SDPatternOperator op> {
+ def NAME : sme_int_outer_product_inst<opc{2}, opc{1}, opc{0}, 0b0, TileOp32,
+ ZPR8, mnemonic> {
+ bits<2> ZAda;
+ let Inst{1-0} = ZAda;
+ let Inst{2} = 0b0;
+ }
+
+ def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR8>;
+
+ def : Pat<(op imm0_3:$tile, (nxv16i1 PPR3bAny:$pn), (nxv16i1 PPR3bAny:$pm),
+ (nxv16i8 ZPR8:$zn), (nxv16i8 ZPR8:$zm)),
+ (!cast<Instruction>(NAME # _PSEUDO) imm0_3:$tile, $pn, $pm, $zn, $zm)>;
}
-class sme_int_outer_product_i64<bits<3> opc, string mnemonic>
- : sme_int_outer_product_inst<opc{2}, opc{1}, opc{0}, 0b1, TileOp64, ZPR16,
- mnemonic> {
- bits<3> ZAda;
- let Inst{2-0} = ZAda;
+multiclass sme_int_outer_product_i64<bits<3> opc, string mnemonic,
+ SDPatternOperator op> {
+ def NAME : sme_int_outer_product_inst<opc{2}, opc{1}, opc{0}, 0b1, TileOp64,
+ ZPR16, mnemonic> {
+ bits<3> ZAda;
+ let Inst{2-0} = ZAda;
+ }
+
+ def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16>;
+
+ def : Pat<(op imm0_7:$tile, (nxv8i1 PPR3bAny:$pn), (nxv8i1 PPR3bAny:$pm),
+ (nxv8i16 ZPR16:$zn), (nxv8i16 ZPR16:$zm)),
+ (!cast<Instruction>(NAME # _PSEUDO) imm0_7:$tile, $pn, $pm, $zn, $zm)>;
}
class sme_outer_product_widening_inst<bit op, bit S, string mnemonic>
: I<(outs TileOp32:$ZAda),
- (ins PPR3bAny:$Pn, PPR3bAny:$Pm, ZPR16:$Zn, ZPR16:$Zm),
+ (ins TileOp32:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, ZPR16:$Zn, ZPR16:$Zm),
mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm",
"", []>,
Sched<[]> {
@@ -109,14 +165,28 @@ class sme_outer_product_widening_inst<bit op, bit S, string mnemonic>
let Inst{4} = S;
let Inst{3-2} = 0b00;
let Inst{1-0} = ZAda;
+
+ let Constraints = "$ZAda = $_ZAda";
}
-multiclass sme_bf16_outer_product<bit S, string mnemonic> {
- def : sme_outer_product_widening_inst<0b0, S, mnemonic>;
+multiclass sme_bf16_outer_product<bit S, string mnemonic, SDPatternOperator op> {
+ def NAME : sme_outer_product_widening_inst<0b0, S, mnemonic>;
+
+ def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16>;
+
+ def : Pat<(op imm0_3:$tile, (nxv8i1 PPR3bAny:$pn), (nxv8i1 PPR3bAny:$pm),
+ (nxv8bf16 ZPR16:$zn), (nxv8bf16 ZPR16:$zm)),
+ (!cast<Instruction>(NAME # _PSEUDO) imm0_3:$tile, $pn, $pm, $zn, $zm)>;
}
-multiclass sme_f16_outer_product<bit S, string mnemonic> {
- def : sme_outer_product_widening_inst<0b1, S, mnemonic>;
+multiclass sme_f16_outer_product<bit S, string mnemonic, SDPatternOperator op> {
+ def NAME : sme_outer_product_widening_inst<0b1, S, mnemonic>;
+
+ def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16>;
+
+ def : Pat<(op imm0_3:$tile, (nxv8i1 PPR3bAny:$pn), (nxv8i1 PPR3bAny:$pm),
+ (nxv8f16 ZPR16:$zn), (nxv8f16 ZPR16:$zm)),
+ (!cast<Instruction>(NAME # _PSEUDO) imm0_3:$tile, $pn, $pm, $zn, $zm)>;
}
//===----------------------------------------------------------------------===//
@@ -126,7 +196,7 @@ multiclass sme_f16_outer_product<bit S, string mnemonic> {
class sme_add_vector_to_tile_inst<bit op, bit V, MatrixTileOperand tile_ty,
ZPRRegOp zpr_ty, string mnemonic>
: I<(outs tile_ty:$ZAda),
- (ins PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn),
+ (ins tile_ty:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn),
mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn",
"", []>, Sched<[]> {
bits<3> Pm;
@@ -140,6 +210,8 @@ class sme_add_vector_to_tile_inst<bit op, bit V, MatrixTileOperand tile_ty,
let Inst{12-10} = Pn;
let Inst{9-5} = Zn;
let Inst{4-3} = 0b00;
+
+ let Constraints = "$ZAda = $_ZAda";
}
class sme_add_vector_to_tile_u32<bit V, string mnemonic>
@@ -225,6 +297,33 @@ multiclass sme_mem_ld_ss_aliases<string inst, bit is_col> {
defm NAME : sme_mem_ss_aliases<"ld1", inst, is_col, "/z">;
}
+multiclass sme_mem_ld_ss_patterns<Instruction Inst, SDPatternOperator Load,
+ Operand tile_ty, Operand offset_ty,
+ ComplexPattern addr,
+ ComplexPattern tileslice> {
+ // base, tileslice
+ def : Pat<(Load PPR3bAny:$pg, GPR64sp:$base, tile_ty:$tile,
+ (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, offset_ty:$imm))),
+ (Inst tile_ty:$tile, $idx, $imm, $pg, $base, XZR)>;
+
+ // reg + reg, tileslice
+ let AddedComplexity = 1 in {
+ def : Pat<(Load PPR3bAny:$pg, (addr GPR64sp:$base, GPR64:$offset),
+ tile_ty:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$idx,
+ offset_ty:$imm))),
+ (Inst tile_ty:$tile, $idx, $imm, $pg, $base, $offset)>;
+ }
+}
+
+class sme_load_pseudo
+ : Pseudo<(outs), (ins i64imm:$tile, MatrixIndexGPR32Op12_15:$idx,
+ i64imm:$imm, PPR3bAny:$pg, GPR64sp:$base, GPR64:$offset), []>,
+ Sched<[]> {
+ // Translated to the actual instructions in AArch64ISelLowering.cpp
+ let usesCustomInserter = 1;
+ let mayLoad = 1;
+}
+
multiclass sme_mem_ld_v_ss<string mnemonic, bit is_col> {
def _B : sme_mem_ld_ss_inst<0b0, 0b00, mnemonic # "b",
!if(is_col, TileVectorOpV8, TileVectorOpH8),
@@ -264,6 +363,40 @@ multiclass sme_mem_ld_v_ss<string mnemonic, bit is_col> {
}
defm : sme_mem_ld_ss_aliases<NAME, is_col>;
+
+ // Pseudo instructions for lowering intrinsics, using immediates instead of
+ // tile registers.
+ def _PSEUDO_B : sme_load_pseudo;
+ def _PSEUDO_H : sme_load_pseudo;
+ def _PSEUDO_S : sme_load_pseudo;
+ def _PSEUDO_D : sme_load_pseudo;
+ def _PSEUDO_Q : sme_load_pseudo;
+
+ defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_B),
+ !if(is_col, int_aarch64_sme_ld1b_vert,
+ int_aarch64_sme_ld1b_horiz),
+ sme_elm_idx0_0, imm0_15, am_sve_regreg_lsl0,
+ tileslice8>;
+ defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_H),
+ !if(is_col, int_aarch64_sme_ld1h_vert,
+ int_aarch64_sme_ld1h_horiz),
+ imm0_1, imm0_7, am_sve_regreg_lsl1,
+ tileslice16>;
+ defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_S),
+ !if(is_col, int_aarch64_sme_ld1w_vert,
+ int_aarch64_sme_ld1w_horiz),
+ imm0_3, imm0_3, am_sve_regreg_lsl2,
+ tileslice32>;
+ defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_D),
+ !if(is_col, int_aarch64_sme_ld1d_vert,
+ int_aarch64_sme_ld1d_horiz),
+ imm0_7, imm0_1, am_sve_regreg_lsl3,
+ tileslice64>;
+ defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
+ !if(is_col, int_aarch64_sme_ld1q_vert,
+ int_aarch64_sme_ld1q_horiz),
+ imm0_15, sme_elm_idx0_0, am_sve_regreg_lsl4,
+ tileslice128>;
}
multiclass sme_mem_ld_ss<string mnemonic> {
@@ -310,6 +443,25 @@ multiclass sme_mem_st_ss_aliases<string inst, bit is_col> {
defm NAME : sme_mem_ss_aliases<"st1", inst, is_col>;
}
+multiclass sme_mem_st_ss_patterns<Instruction Inst, SDPatternOperator Store,
+ Operand offset_ty,
+ ComplexPattern imm2tile,
+ ComplexPattern addr,
+ ComplexPattern tileslice> {
+ // base, tileslice
+ def : Pat<(Store PPR3bAny:$pg, GPR64sp:$base, (imm2tile untyped:$tile),
+ (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, offset_ty:$imm))),
+ (Inst $tile, $idx, $imm, $pg, $base, XZR)>;
+
+ // reg + reg, tileslice
+ let AddedComplexity = 1 in {
+ def : Pat<(Store PPR3bAny:$pg, (addr GPR64sp:$base, GPR64:$offset),
+ (imm2tile untyped:$tile),
+ (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, offset_ty:$imm))),
+ (Inst $tile, $idx, $imm, $pg, $base, $offset)>;
+ }
+}
+
multiclass sme_mem_st_v_ss<string mnemonic, bit is_col> {
def _B : sme_mem_st_ss_inst<0b0, 0b00, mnemonic # "b",
!if(is_col, TileVectorOpV8, TileVectorOpH8),
@@ -349,6 +501,32 @@ multiclass sme_mem_st_v_ss<string mnemonic, bit is_col> {
}
defm : sme_mem_st_ss_aliases<NAME, is_col>;
+
+ defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _B),
+ !if(is_col, int_aarch64_sme_st1b_vert,
+ int_aarch64_sme_st1b_horiz),
+ imm0_15, imm_to_tile8, am_sve_regreg_lsl0,
+ tileslice8>;
+ defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _H),
+ !if(is_col, int_aarch64_sme_st1h_vert,
+ int_aarch64_sme_st1h_horiz),
+ imm0_7, imm_to_tile16, am_sve_regreg_lsl1,
+ tileslice16>;
+ defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _S),
+ !if(is_col, int_aarch64_sme_st1w_vert,
+ int_aarch64_sme_st1w_horiz),
+ imm0_3, imm_to_tile32, am_sve_regreg_lsl2,
+ tileslice32>;
+ defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _D),
+ !if(is_col, int_aarch64_sme_st1d_vert,
+ int_aarch64_sme_st1d_horiz),
+ imm0_1, imm_to_tile64, am_sve_regreg_lsl3,
+ tileslice64>;
+ defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _Q),
+ !if(is_col, int_aarch64_sme_st1q_vert,
+ int_aarch64_sme_st1q_horiz),
+ sme_elm_idx0_0, imm_to_tile128,
+ am_sve_regreg_lsl4, tileslice128>;
}
multiclass sme_mem_st_ss<string mnemonic> {
@@ -360,7 +538,7 @@ multiclass sme_mem_st_ss<string mnemonic> {
// SME Save and Restore Array
//===----------------------------------------------------------------------===//
-class sme_spill_fill_inst<bit isStore, dag outs, dag ins, string opcodestr>
+class sme_spill_fill_base<bit isStore, dag outs, dag ins, string opcodestr>
: I<outs, ins, opcodestr, "\t$ZAt[$Rv, $imm4], [$Rn, $offset, mul vl]", "",
[]>,
Sched<[]> {
@@ -375,33 +553,61 @@ class sme_spill_fill_inst<bit isStore, dag outs, dag ins, string opcodestr>
let Inst{9-5} = Rn;
let Inst{4} = 0b0;
let Inst{3-0} = imm4;
-
- let mayLoad = !not(isStore);
- let mayStore = isStore;
}
-multiclass sme_spill_fill<bit isStore, dag outs, dag ins, string opcodestr> {
- def NAME : sme_spill_fill_inst<isStore, outs, ins, opcodestr>;
-
+let mayStore = 1 in
+class sme_spill_inst<string opcodestr>
+ : sme_spill_fill_base<0b1, (outs),
+ (ins MatrixOp:$ZAt, MatrixIndexGPR32Op12_15:$Rv,
+ sme_elm_idx0_15:$imm4, GPR64sp:$Rn,
+ imm0_15:$offset),
+ opcodestr>;
+let mayLoad = 1 in
+class sme_fill_inst<string opcodestr>
+ : sme_spill_fill_base<0b0, (outs MatrixOp:$ZAt),
+ (ins MatrixIndexGPR32Op12_15:$Rv,
+ sme_elm_idx0_15:$imm4, GPR64sp:$Rn,
+ imm0_15:$offset),
+ opcodestr>;
+multiclass sme_spill<string opcodestr> {
+ def NAME : sme_spill_inst<opcodestr>;
def : InstAlias<opcodestr # "\t$ZAt[$Rv, $imm4], [$Rn]",
(!cast<Instruction>(NAME) MatrixOp:$ZAt,
MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 0), 1>;
-}
-
-multiclass sme_spill<string opcodestr> {
- defm NAME : sme_spill_fill<0b1, (outs),
- (ins MatrixOp:$ZAt, MatrixIndexGPR32Op12_15:$Rv,
- sme_elm_idx0_15:$imm4, GPR64sp:$Rn,
- imm0_15:$offset),
- opcodestr>;
+ // base
+ def : Pat<(int_aarch64_sme_str MatrixIndexGPR32Op12_15:$idx, GPR64sp:$base),
+ (!cast<Instruction>(NAME) ZA, $idx, 0, $base, 0)>;
+ // scalar + immediate (mul vl)
+ let AddedComplexity = 2 in {
+ def : Pat<(int_aarch64_sme_str MatrixIndexGPR32Op12_15:$idx,
+ (am_sme_indexed_b4 GPR64sp:$base, imm0_15:$imm4)),
+ (!cast<Instruction>(NAME) ZA, $idx, 0, $base, $imm4)>;
+ }
}
multiclass sme_fill<string opcodestr> {
- defm NAME : sme_spill_fill<0b0, (outs MatrixOp:$ZAt),
- (ins MatrixIndexGPR32Op12_15:$Rv,
- sme_elm_idx0_15:$imm4, GPR64sp:$Rn,
- imm0_15:$offset),
- opcodestr>;
+ def NAME : sme_fill_inst<opcodestr>;
+ def : InstAlias<opcodestr # "\t$ZAt[$Rv, $imm4], [$Rn]",
+ (!cast<Instruction>(NAME) MatrixOp:$ZAt,
+ MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 0), 1>;
+ def NAME # _PSEUDO
+ : Pseudo<(outs),
+ (ins MatrixIndexGPR32Op12_15:$idx, imm0_15:$imm4,
+ GPR64sp:$base), []>,
+ Sched<[]> {
+ // Translated to actual instruction in AArch64ISelLowering.cpp
+ let usesCustomInserter = 1;
+ let mayLoad = 1;
+ }
+ // base
+ def : Pat<(int_aarch64_sme_ldr MatrixIndexGPR32Op12_15:$idx, GPR64sp:$base),
+ (!cast<Instruction>(NAME # _PSEUDO) $idx, 0, $base)>;
+ // scalar + immediate (mul vl)
+ let AddedComplexity = 2 in {
+ def : Pat<(int_aarch64_sme_ldr MatrixIndexGPR32Op12_15:$idx,
+ (am_sme_indexed_b4 GPR64sp:$base, imm0_15:$imm4)),
+ (!cast<Instruction>(NAME # _PSEUDO) $idx, $imm4, $base)>;
+ }
}
//===----------------------------------------------------------------------===//
@@ -429,8 +635,12 @@ class sme_vector_to_tile_inst<bit Q, bits<2> sz, MatrixTileVectorOperand tile_ty
bit is_col, Operand imm_ty, ZPRRegOp zpr_ty,
string mnemonic>
: sme_vector_to_tile_base<Q, is_col, sz, (outs tile_ty:$ZAd),
- (ins MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, zpr_ty:$Zn),
- mnemonic, "\t$ZAd[$Rv, $imm], $Pg/m, $Zn">;
+ (ins tile_ty:$_ZAd, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, zpr_ty:$Zn),
+ mnemonic, "\t$ZAd[$Rv, $imm], $Pg/m, $Zn">{
+
+ let Constraints = "$ZAd = $_ZAd";
+}
+
multiclass sme_vector_to_tile_aliases<Instruction inst,
MatrixTileVectorOperand tile_ty,
@@ -439,6 +649,30 @@ multiclass sme_vector_to_tile_aliases<Instruction inst,
(inst tile_ty:$ZAd, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, zpr_ty:$Zn), 1>;
}
+multiclass sme_vector_to_tile_patterns<Instruction inst, ValueType zpr_vt,
+ ValueType ppr_vt, Operand imm_ty,
+ Operand offset_ty,
+ SDPatternOperator op,
+ ComplexPattern tileslice> {
+ def : Pat<(op imm_ty:$tile, MatrixIndexGPR32Op12_15:$idx,
+ (ppr_vt PPR3bAny:$pg), (zpr_vt ZPRAny:$zn)),
+ (inst imm_ty:$tile, $idx, 0, $pg, $zn)>;
+ let AddedComplexity = 1 in {
+ def : Pat<(op imm_ty:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$idx,
+ offset_ty:$imm)),
+ (ppr_vt PPR3bAny:$pg), (zpr_vt ZPRAny:$zn)),
+ (inst imm_ty:$tile, $idx, $imm, $pg, $zn)>;
+ }
+}
+
+class sme_mova_insert_pseudo
+ : Pseudo<(outs), (ins i64imm:$tile, MatrixIndexGPR32Op12_15:$idx,
+ i64imm:$imm, PPR3bAny:$pg, ZPRAny:$zn), []>,
+ Sched<[]> {
+ // Translated to the actual instructions in AArch64ISelLowering.cpp
+ let usesCustomInserter = 1;
+}
+
multiclass sme_vector_v_to_tile<string mnemonic, bit is_col> {
def _B : sme_vector_to_tile_inst<0b0, 0b00, !if(is_col, TileVectorOpV8,
TileVectorOpH8),
@@ -478,6 +712,14 @@ multiclass sme_vector_v_to_tile<string mnemonic, bit is_col> {
let Inst{3-0} = ZAd;
}
+ // Pseudo instructions for lowering intrinsics, using immediates instead of
+ // tile registers.
+ def _PSEUDO_B : sme_mova_insert_pseudo;
+ def _PSEUDO_H : sme_mova_insert_pseudo;
+ def _PSEUDO_S : sme_mova_insert_pseudo;
+ def _PSEUDO_D : sme_mova_insert_pseudo;
+ def _PSEUDO_Q : sme_mova_insert_pseudo;
+
defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _B),
!if(is_col, TileVectorOpV8,
TileVectorOpH8),
@@ -498,6 +740,62 @@ multiclass sme_vector_v_to_tile<string mnemonic, bit is_col> {
!if(is_col, TileVectorOpV128,
TileVectorOpH128),
ZPR128, sme_elm_idx0_0>;
+
+ defvar op = !if(is_col, int_aarch64_sme_write_vert,
+ int_aarch64_sme_write_horiz);
+
+ defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_B),
+ nxv16i8, nxv16i1, sme_elm_idx0_0, imm0_15,
+ op, tileslice8>;
+ defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_H),
+ nxv8i16, nxv8i1, sme_elm_idx0_1, imm0_7,
+ op, tileslice16>;
+ defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_H),
+ nxv8f16, nxv8i1, sme_elm_idx0_1, imm0_7,
+ op, tileslice16>;
+ defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_H),
+ nxv8bf16, nxv8i1, sme_elm_idx0_1, imm0_7,
+ op, tileslice16>;
+ defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_S),
+ nxv4i32, nxv4i1, sme_elm_idx0_3, imm0_3,
+ op, tileslice32>;
+ defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_S),
+ nxv4f32, nxv4i1, sme_elm_idx0_3, imm0_3,
+ op, tileslice32>;
+ defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_D),
+ nxv2i64, nxv2i1, sme_elm_idx0_7, imm0_1,
+ op, tileslice64>;
+ defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_D),
+ nxv2f64, nxv2i1, sme_elm_idx0_7, imm0_1,
+ op, tileslice64>;
+
+ defvar opq = !if(is_col, int_aarch64_sme_writeq_vert,
+ int_aarch64_sme_writeq_horiz);
+
+ defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
+ nxv16i8, nxv16i1, sme_elm_idx0_15,
+ sme_elm_idx0_0, opq, tileslice128>;
+ defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
+ nxv8i16, nxv8i1, sme_elm_idx0_15,
+ sme_elm_idx0_0, opq, tileslice128>;
+ defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
+ nxv8f16, nxv8i1, sme_elm_idx0_15,
+ sme_elm_idx0_0, opq, tileslice128>;
+ defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
+ nxv8bf16, nxv8i1, sme_elm_idx0_15,
+ sme_elm_idx0_0, opq, tileslice128>;
+ defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
+ nxv4i32, nxv4i1, sme_elm_idx0_15,
+ sme_elm_idx0_0, opq, tileslice128>;
+ defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
+ nxv4f32, nxv4i1, sme_elm_idx0_15,
+ sme_elm_idx0_0, opq, tileslice128>;
+ defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
+ nxv2i64, nxv2i1, sme_elm_idx0_15,
+ sme_elm_idx0_0, opq, tileslice128>;
+ defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
+ nxv2f64, nxv2i1, sme_elm_idx0_15,
+ sme_elm_idx0_0, opq, tileslice128>;
}
multiclass sme_vector_to_tile<string mnemonic> {
@@ -526,8 +824,11 @@ class sme_tile_to_vector_inst<bit Q, bits<2> sz, ZPRRegOp zpr_ty,
MatrixTileVectorOperand tile_ty,
bit is_col, Operand imm_ty, string mnemonic>
: sme_tile_to_vector_base<Q, is_col, sz, (outs zpr_ty:$Zd),
- (ins PPR3bAny:$Pg, tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm),
- mnemonic, "\t$Zd, $Pg/m, $ZAn[$Rv, $imm]">;
+ (ins zpr_ty:$_Zd, PPR3bAny:$Pg, tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm),
+ mnemonic, "\t$Zd, $Pg/m, $ZAn[$Rv, $imm]"> {
+
+ let Constraints = "$Zd = $_Zd";
+}
multiclass sme_tile_to_vector_aliases<Instruction inst, ZPRRegOp zpr_ty,
MatrixTileVectorOperand tile_ty,
@@ -536,6 +837,23 @@ multiclass sme_tile_to_vector_aliases<Instruction inst, ZPRRegOp zpr_ty,
(inst zpr_ty:$Zd, PPR3bAny:$Pg, tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm), 1>;
}
+multiclass sme_tile_to_vector_patterns<Instruction inst, ValueType zpr_vt,
+ ValueType ppr_vt, Operand offset_ty,
+ ComplexPattern imm2tile,
+ ComplexPattern tileslice,
+ SDPatternOperator op> {
+ def : Pat<(zpr_vt (op (zpr_vt ZPRAny:$passthru), (ppr_vt PPR3bAny:$pg),
+ (imm2tile untyped:$tile), MatrixIndexGPR32Op12_15:$idx)),
+ (inst $passthru, $pg, $tile, $idx, 0)>;
+ let AddedComplexity = 1 in {
+ def : Pat<(zpr_vt (op (zpr_vt ZPRAny:$passthru), (ppr_vt PPR3bAny:$pg),
+ (imm2tile untyped:$tile),
+ (i32 (tileslice MatrixIndexGPR32Op12_15:$idx,
+ offset_ty:$imm)))),
+ (inst $passthru, $pg, $tile, $idx, $imm)>;
+ }
+}
+
multiclass sme_tile_to_vector_v<string mnemonic, bit is_col> {
def _B : sme_tile_to_vector_inst<0b0, 0b00, ZPR8, !if(is_col, TileVectorOpV8,
TileVectorOpH8),
@@ -589,6 +907,62 @@ multiclass sme_tile_to_vector_v<string mnemonic, bit is_col> {
defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _Q), ZPR128,
!if(is_col, TileVectorOpV128,
TileVectorOpH128), sme_elm_idx0_0>;
+
+ defvar op = !if(is_col, int_aarch64_sme_read_vert,
+ int_aarch64_sme_read_horiz);
+
+ defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _B),
+ nxv16i8, nxv16i1, imm0_15,
+ imm_to_tile8, tileslice8, op>;
+ defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _H),
+ nxv8i16, nxv8i1, imm0_7,
+ imm_to_tile16, tileslice16, op>;
+ defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _H),
+ nxv8f16, nxv8i1, imm0_7,
+ imm_to_tile16, tileslice16, op>;
+ defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _H),
+ nxv8bf16, nxv8i1, imm0_7,
+ imm_to_tile16, tileslice16, op>;
+ defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _S),
+ nxv4i32, nxv4i1, imm0_3,
+ imm_to_tile32, tileslice32, op>;
+ defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _S),
+ nxv4f32, nxv4i1, imm0_3,
+ imm_to_tile32, tileslice32, op>;
+ defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _D),
+ nxv2i64, nxv2i1, imm0_1,
+ imm_to_tile64, tileslice64, op>;
+ defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _D),
+ nxv2f64, nxv2i1, imm0_1,
+ imm_to_tile64, tileslice64, op>;
+
+ defvar opq = !if(is_col, int_aarch64_sme_readq_vert,
+ int_aarch64_sme_readq_horiz);
+
+ defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
+ nxv16i8, nxv16i1, sme_elm_idx0_0,
+ imm_to_tile128, tileslice128, opq>;
+ defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
+ nxv8i16, nxv8i1, sme_elm_idx0_0,
+ imm_to_tile128, tileslice128, opq>;
+ defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
+ nxv8f16, nxv8i1, sme_elm_idx0_0,
+ imm_to_tile128, tileslice128, opq>;
+ defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
+ nxv8bf16, nxv8i1, sme_elm_idx0_0,
+ imm_to_tile128, tileslice128, opq>;
+ defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
+ nxv4i32, nxv4i1, sme_elm_idx0_0,
+ imm_to_tile128, tileslice128, opq>;
+ defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
+ nxv4f32, nxv4i1, sme_elm_idx0_0,
+ imm_to_tile128, tileslice128, opq>;
+ defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
+ nxv2i64, nxv2i1, sme_elm_idx0_0,
+ imm_to_tile128, tileslice128, opq>;
+ defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
+ nxv2f64, nxv2i1, sme_elm_idx0_0,
+ imm_to_tile128, tileslice128, opq>;
}
multiclass sme_tile_to_vector<string mnemonic> {
@@ -600,8 +974,11 @@ multiclass sme_tile_to_vector<string mnemonic> {
// SME Zero
//===----------------------------------------------------------------------===//
+// NOTE: This definition isn't really correct because there are outputs, i.e.
+// the tile registers being zeroed. We fix this up in a custom inserter that
+// marks the appropriate registers as being implicitly defined.
class sme_zero_inst<string mnemonic>
- : I<(outs MatrixTileList:$imm), (ins),
+ : I<(outs), (ins MatrixTileList:$imm),
mnemonic, "\t$imm", "", []>, Sched<[]> {
bits<8> imm;
let Inst{31-8} = 0b110000000000100000000000;
@@ -626,6 +1003,15 @@ multiclass sme_zero<string mnemonic> {
def : InstAlias<"zero\t\\{za0.s,za1.s,za3.s\\}", (!cast<Instruction>(NAME) 0b10111011), 1>;
def : InstAlias<"zero\t\\{za0.s,za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11011101), 1>;
def : InstAlias<"zero\t\\{za1.s,za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11101110), 1>;
+
+ def NAME # _PSEUDO : Pseudo<(outs), (ins i64imm:$tilelist), []>,
+ Sched<[]> {
+ // Translated to the actual instructions in AArch64ISelLowering.cpp
+ let usesCustomInserter = 1;
+ }
+
+ def : Pat<(int_aarch64_sme_zero imm:$imm),
+ (!cast<Instruction>(NAME # _PSEUDO) imm:$imm)>;
}
//===----------------------------------------------------------------------===//
@@ -651,6 +1037,15 @@ class sve2_int_perm_revd<string asm>
let ElementSize = ZPR128.ElementSize;
}
+multiclass sve2_int_perm_revd<string asm, SDPatternOperator op> {
+ def NAME : sve2_int_perm_revd<asm>;
+
+ def : SVE_1_Op_Passthru_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME)>;
+ def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME)>;
+ def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME)>;
+ def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME)>;
+}
+
class sve2_clamp<string asm, bits<2> sz, bit U, ZPRRegOp zpr_ty>
: I<(outs zpr_ty:$Zd), (ins zpr_ty:$Zn, zpr_ty:$Zm, zpr_ty:$_Zd),
asm, "\t$Zd, $Zn, $Zm", "", []>,
@@ -672,11 +1067,16 @@ class sve2_clamp<string asm, bits<2> sz, bit U, ZPRRegOp zpr_ty>
let ElementSize = zpr_ty.ElementSize;
}
-multiclass sve2_clamp<string asm, bit U> {
+multiclass sve2_clamp<string asm, bit U, SDPatternOperator op> {
def _B : sve2_clamp<asm, 0b00, U, ZPR8>;
def _H : sve2_clamp<asm, 0b01, U, ZPR16>;
def _S : sve2_clamp<asm, 0b10, U, ZPR32>;
def _D : sve2_clamp<asm, 0b11, U, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv16i8, op, nxv16i8, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
class sve2_int_perm_sel_p<string asm, PPRRegOp ppr_ty, Operand imm_ty>
@@ -699,7 +1099,7 @@ class sve2_int_perm_sel_p<string asm, PPRRegOp ppr_ty, Operand imm_ty>
let Inst{3-0} = Pd;
}
-multiclass sve2_int_perm_sel_p<string asm> {
+multiclass sve2_int_perm_sel_p<string asm, SDPatternOperator op> {
def _B : sve2_int_perm_sel_p<asm, PPR8, sme_elm_idx0_15> {
bits<4> imm;
let Inst{23-22} = imm{3-2};
@@ -723,4 +1123,32 @@ multiclass sve2_int_perm_sel_p<string asm> {
let Inst{22} = 0b1;
let Inst{20-18} = 0b000;
}
+
+ def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv16i1 PPRAny:$Pm),
+ MatrixIndexGPR32Op12_15:$idx)),
+ (!cast<Instruction>(NAME # _B) $Pn, $Pm, $idx, 0)>;
+ def : Pat<(nxv8i1 (op (nxv8i1 PPRAny:$Pn), (nxv8i1 PPRAny:$Pm),
+ MatrixIndexGPR32Op12_15:$idx)),
+ (!cast<Instruction>(NAME # _H) $Pn, $Pm, $idx, 0)>;
+ def : Pat<(nxv4i1 (op (nxv4i1 PPRAny:$Pn), (nxv4i1 PPRAny:$Pm),
+ MatrixIndexGPR32Op12_15:$idx)),
+ (!cast<Instruction>(NAME # _S) $Pn, $Pm, $idx, 0)>;
+ def : Pat<(nxv2i1 (op (nxv2i1 PPRAny:$Pn), (nxv2i1 PPRAny:$Pm),
+ MatrixIndexGPR32Op12_15:$idx)),
+ (!cast<Instruction>(NAME # _D) $Pn, $Pm, $idx, 0)>;
+
+ let AddedComplexity = 1 in {
+ def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv16i1 PPRAny:$Pm),
+ (i32 (tileslice8 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_15:$imm)))),
+ (!cast<Instruction>(NAME # _B) $Pn, $Pm, $idx, $imm)>;
+ def : Pat<(nxv8i1 (op (nxv8i1 PPRAny:$Pn), (nxv8i1 PPRAny:$Pm),
+ (i32 (tileslice16 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_7:$imm)))),
+ (!cast<Instruction>(NAME # _H) $Pn, $Pm, $idx, $imm)>;
+ def : Pat<(nxv4i1 (op (nxv4i1 PPRAny:$Pn), (nxv4i1 PPRAny:$Pm),
+ (i32 (tileslice32 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_3:$imm)))),
+ (!cast<Instruction>(NAME # _S) $Pn, $Pm, $idx, $imm)>;
+ def : Pat<(nxv2i1 (op (nxv2i1 PPRAny:$Pn), (nxv2i1 PPRAny:$Pm),
+ (i32 (tileslice64 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_1:$imm)))),
+ (!cast<Instruction>(NAME # _D) $Pn, $Pm, $idx, $imm)>;
+ }
}
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 9d4bdbe5d053..3631536a32b9 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -199,6 +199,11 @@ def SVEAddSubImm16Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i16>", [
def SVEAddSubImm32Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i32>", []>;
def SVEAddSubImm64Pat : ComplexPattern<i64, 2, "SelectSVEAddSubImm<MVT::i64>", []>;
+def SVECpyDupImm8Pat : ComplexPattern<i32, 2, "SelectSVECpyDupImm<MVT::i8>", []>;
+def SVECpyDupImm16Pat : ComplexPattern<i32, 2, "SelectSVECpyDupImm<MVT::i16>", []>;
+def SVECpyDupImm32Pat : ComplexPattern<i32, 2, "SelectSVECpyDupImm<MVT::i32>", []>;
+def SVECpyDupImm64Pat : ComplexPattern<i64, 2, "SelectSVECpyDupImm<MVT::i64>", []>;
+
def SVELogicalImm8Pat : ComplexPattern<i32, 1, "SelectSVELogicalImm<MVT::i8>", []>;
def SVELogicalImm16Pat : ComplexPattern<i32, 1, "SelectSVELogicalImm<MVT::i16>", []>;
def SVELogicalImm32Pat : ComplexPattern<i32, 1, "SelectSVELogicalImm<MVT::i32>", []>;
@@ -209,14 +214,6 @@ def SVELogicalImm16NotPat : ComplexPattern<i32, 1, "SelectSVELogicalImm<MVT::i16
def SVELogicalImm32NotPat : ComplexPattern<i32, 1, "SelectSVELogicalImm<MVT::i32, true>", []>;
def SVELogicalImm64NotPat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i64, true>", []>;
-def SVE8BitLslImm32 : ComplexPattern<i32, 2, "SelectSVE8BitLslImm", [imm]>;
-def SVE8BitLslImm64 : ComplexPattern<i64, 2, "SelectSVE8BitLslImm", [imm]>;
-class SVE8BitLslImm<ValueType ty> {
- ComplexPattern Pat = !cond(
- !eq(ty, i32): SVE8BitLslImm32,
- !eq(ty, i64): SVE8BitLslImm64);
-}
-
def SVEArithUImm8Pat : ComplexPattern<i32, 1, "SelectSVEArithImm<MVT::i8>", []>;
def SVEArithUImm16Pat : ComplexPattern<i32, 1, "SelectSVEArithImm<MVT::i16>", []>;
def SVEArithUImm32Pat : ComplexPattern<i32, 1, "SelectSVEArithImm<MVT::i32>", []>;
@@ -234,6 +231,8 @@ def SVEShiftImmR16 : ComplexPattern<i32, 1, "SelectSVEShiftImm<1, 16, true>", []
def SVEShiftImmR32 : ComplexPattern<i32, 1, "SelectSVEShiftImm<1, 32, true>", []>;
def SVEShiftImmR64 : ComplexPattern<i64, 1, "SelectSVEShiftImm<1, 64, true>", []>;
+def SVEShiftSplatImmR : ComplexPattern<iAny, 1, "SelectSVEShiftSplatImmR", []>;
+
def SVEAllActive : ComplexPattern<untyped, 0, "SelectAllActivePredicate", []>;
class SVEExactFPImm<string Suffix, string ValA, string ValB> : AsmOperandClass {
@@ -335,9 +334,14 @@ multiclass sve_int_ptrue<bits<3> opc, string asm, SDPatternOperator op> {
def SDT_AArch64PTrue : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
def AArch64ptrue : SDNode<"AArch64ISD::PTRUE", SDT_AArch64PTrue>;
-let Predicates = [HasSVEorStreamingSVE] in {
+let Predicates = [HasSVEorSME] in {
defm PTRUE : sve_int_ptrue<0b000, "ptrue", AArch64ptrue>;
defm PTRUES : sve_int_ptrue<0b001, "ptrues", null_frag>;
+
+ def : Pat<(nxv16i1 immAllOnesV), (PTRUE_B 31)>;
+ def : Pat<(nxv8i1 immAllOnesV), (PTRUE_H 31)>;
+ def : Pat<(nxv4i1 immAllOnesV), (PTRUE_S 31)>;
+ def : Pat<(nxv2i1 immAllOnesV), (PTRUE_D 31)>;
}
//===----------------------------------------------------------------------===//
@@ -370,24 +374,27 @@ class SVE_1_Op_Passthru_Round_Pat<ValueType vtd, SDPatternOperator op, ValueType
: Pat<(vtd (op pg:$Op1, vts:$Op2, (i64 timm0_1), vtd:$Op3)),
(inst $Op3, $Op1, $Op2)>;
-class SVE_1_Op_Imm_OptLsl_Reverse_Pat<ValueType vt, SDPatternOperator op, ZPRRegOp zprty,
- ValueType it, ComplexPattern cpx, Instruction inst>
- : Pat<(vt (op (vt (AArch64dup (it (cpx i32:$imm, i32:$shift)))), (vt zprty:$Op1))),
- (inst $Op1, i32:$imm, i32:$shift)>;
+multiclass SVE_1_Op_PassthruUndef_Round_Pat<ValueType vtd, SDPatternOperator op, ValueType pg,
+ ValueType vts, Instruction inst>{
+ def : Pat<(vtd (op pg:$Op1, vts:$Op2, (i64 timm0_1), (vtd undef))),
+ (inst (IMPLICIT_DEF), $Op1, $Op2)>;
+ def : Pat<(vtd (op (pg (SVEAllActive:$Op1)), vts:$Op2, (i64 timm0_1), vtd:$Op3)),
+ (inst $Op3, $Op1, $Op2)>;
+}
class SVE_1_Op_Imm_OptLsl_Pat<ValueType vt, SDPatternOperator op, ZPRRegOp zprty,
ValueType it, ComplexPattern cpx, Instruction inst>
- : Pat<(vt (op (vt zprty:$Op1), (vt (AArch64dup (it (cpx i32:$imm, i32:$shift)))))),
+ : Pat<(vt (op (vt zprty:$Op1), (vt (splat_vector (it (cpx i32:$imm, i32:$shift)))))),
(inst $Op1, i32:$imm, i32:$shift)>;
class SVE_1_Op_Imm_Arith_All_Active<ValueType vt, ValueType pt, SDPatternOperator op,
ZPRRegOp zprty, ValueType it, ComplexPattern cpx, Instruction inst>
- : Pat<(vt (op (pt (SVEAllActive)), (vt zprty:$Op1), (vt (AArch64dup (it (cpx i32:$imm)))))),
+ : Pat<(vt (op (pt (SVEAllActive)), (vt zprty:$Op1), (vt (splat_vector (it (cpx i32:$imm)))))),
(inst $Op1, i32:$imm)>;
class SVE_1_Op_Imm_Log_Pat<ValueType vt, SDPatternOperator op, ZPRRegOp zprty,
ValueType it, ComplexPattern cpx, Instruction inst>
- : Pat<(vt (op (vt zprty:$Op1), (vt (AArch64dup (it (cpx i64:$imm)))))),
+ : Pat<(vt (op (vt zprty:$Op1), (vt (splat_vector (it (cpx i64:$imm)))))),
(inst $Op1, i64:$imm)>;
class SVE_2_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
@@ -489,20 +496,20 @@ multiclass SVE_InReg_Extend_PassthruUndef<ValueType vt, SDPatternOperator op, Va
class SVE_Shift_DupImm_Pred_Pat<ValueType vt, SDPatternOperator op,
ValueType pt, ValueType it,
ComplexPattern cast, Instruction inst>
-: Pat<(vt (op pt:$Pg, vt:$Rn, (vt (AArch64dup (it (cast i32:$imm)))))),
+: Pat<(vt (op pt:$Pg, vt:$Rn, (vt (splat_vector (it (cast i32:$imm)))))),
(inst $Pg, $Rn, i32:$imm)>;
class SVE_Shift_DupImm_All_Active_Pat<ValueType vt, SDPatternOperator op,
ValueType pt, ValueType it,
ComplexPattern cast, Instruction inst>
-: Pat<(vt (op (pt (SVEAllActive)), vt:$Rn, (vt (AArch64dup (it (cast i32:$imm)))))),
+: Pat<(vt (op (pt (SVEAllActive)), vt:$Rn, (vt (splat_vector (it (cast i32:$imm)))))),
(inst $Rn, i32:$imm)>;
class SVE_2_Op_Fp_Imm_Pat<ValueType vt, SDPatternOperator op,
ValueType pt, ValueType it,
FPImmLeaf immL, int imm,
Instruction inst>
-: Pat<(vt (op (pt PPR_3b:$Pg), (vt ZPR:$Zs1), (vt (AArch64dup (it immL))))),
+: Pat<(vt (op (pt PPR_3b:$Pg), (vt ZPR:$Zs1), (vt (splat_vector (it immL))))),
(inst $Pg, $Zs1, imm)>;
class SVE_2_Op_Fp_Imm_Pat_Zero<ValueType vt, SDPatternOperator op,
@@ -510,9 +517,33 @@ class SVE_2_Op_Fp_Imm_Pat_Zero<ValueType vt, SDPatternOperator op,
FPImmLeaf immL, int imm,
Instruction inst>
: Pat<(vt (op pt:$Pg, (vselect pt:$Pg, vt:$Zs1, (SVEDup0)),
- (vt (AArch64dup (it immL))))),
+ (vt (splat_vector (it immL))))),
(inst $Pg, $Zs1, imm)>;
+// Used to re-order the operands of BSP when lowering to BSL. BSP has the order:
+// mask, in1, in2 whereas BSL for SVE2 has them ordered in1, in2, mask
+class SVE_3_Op_BSP_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
+ ValueType vt2, ValueType vt3, Instruction inst>
+: Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3)),
+ (inst $Op2, $Op3, $Op1)>;
+
+class SVE_Shift_Add_All_Active_Pat<ValueType vtd, SDPatternOperator op, ValueType pt,
+ ValueType vt1, ValueType vt2, ValueType vt3,
+ Instruction inst>
+: Pat<(vtd (add vt1:$Op1, (op (pt (SVEAllActive)), vt2:$Op2, vt3:$Op3))),
+ (inst $Op1, $Op2, $Op3)>;
+
+//===----------------------------------------------------------------------===//
+// SVE pattern match helpers.
+//===----------------------------------------------------------------------===//
+
+// Matches either an intrinsic, or a predicated operation with an all active predicate
+class EitherVSelectOrPassthruPatFrags<SDPatternOperator intrinsic, SDPatternOperator sdnode>
+: PatFrags<(ops node:$Pg, node:$Op1, node:$Op2), [
+ (intrinsic node:$Pg, node:$Op1, node:$Op2),
+ (vselect node:$Pg, (sdnode (SVEAllActive), node:$Op1, node:$Op2), node:$Op1),
+ ]>;
+
//
// Pseudo -> Instruction mappings
//
@@ -612,10 +643,11 @@ class sve_int_pfalse<bits<6> opc, string asm>
multiclass sve_int_pfalse<bits<6> opc, string asm> {
def NAME : sve_int_pfalse<opc, asm>;
- def : Pat<(nxv16i1 (splat_vector (i32 0))), (!cast<Instruction>(NAME))>;
- def : Pat<(nxv8i1 (splat_vector (i32 0))), (!cast<Instruction>(NAME))>;
- def : Pat<(nxv4i1 (splat_vector (i32 0))), (!cast<Instruction>(NAME))>;
- def : Pat<(nxv2i1 (splat_vector (i32 0))), (!cast<Instruction>(NAME))>;
+ def : Pat<(nxv16i1 immAllZerosV), (!cast<Instruction>(NAME))>;
+ def : Pat<(nxv8i1 immAllZerosV), (!cast<Instruction>(NAME))>;
+ def : Pat<(nxv4i1 immAllZerosV), (!cast<Instruction>(NAME))>;
+ def : Pat<(nxv2i1 immAllZerosV), (!cast<Instruction>(NAME))>;
+ def : Pat<(nxv1i1 immAllZerosV), (!cast<Instruction>(NAME))>;
}
class sve_int_ptest<bits<6> opc, string asm>
@@ -885,6 +917,8 @@ class sve_int_count<bits<3> opc, string asm>
let Inst{10} = opc{0};
let Inst{9-5} = pattern;
let Inst{4-0} = Rd;
+
+ let isReMaterializable = 1;
}
multiclass sve_int_count<bits<3> opc, string asm, SDPatternOperator op> {
@@ -965,7 +999,7 @@ class sve_int_pred_pattern_a<bits<3> opc, string asm>
multiclass sve_int_pred_pattern_a<bits<3> opc, string asm,
SDPatternOperator op,
SDPatternOperator opcnt> {
- let Predicates = [HasSVEorStreamingSVE] in {
+ let Predicates = [HasSVEorSME] in {
def NAME : sve_int_pred_pattern_a<opc, asm>;
def : InstAlias<asm # "\t$Rdn, $pattern",
@@ -974,7 +1008,7 @@ multiclass sve_int_pred_pattern_a<bits<3> opc, string asm,
(!cast<Instruction>(NAME) GPR64:$Rdn, 0b11111, 1), 2>;
}
- let Predicates = [HasSVEorStreamingSVE, UseScalarIncVL] in {
+ let Predicates = [HasSVEorSME, UseScalarIncVL] in {
def : Pat<(i64 (op GPR64:$Rdn, (opcnt sve_pred_enum:$pattern))),
(!cast<Instruction>(NAME) GPR64:$Rdn, sve_pred_enum:$pattern, 1)>;
@@ -1170,28 +1204,45 @@ multiclass sve_int_perm_dup_i<string asm> {
(!cast<Instruction>(NAME # _Q) ZPR128:$Zd, FPR128asZPR:$Qn, 0), 2>;
// Duplicate extracted element of vector into all vector elements
- def : Pat<(nxv16i8 (AArch64dup (i32 (vector_extract (nxv16i8 ZPR:$vec), sve_elm_idx_extdup_b:$index)))),
+ def : Pat<(nxv16i8 (splat_vector (i32 (vector_extract (nxv16i8 ZPR:$vec), sve_elm_idx_extdup_b:$index)))),
(!cast<Instruction>(NAME # _B) ZPR:$vec, sve_elm_idx_extdup_b:$index)>;
- def : Pat<(nxv8i16 (AArch64dup (i32 (vector_extract (nxv8i16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))),
+ def : Pat<(nxv8i16 (splat_vector (i32 (vector_extract (nxv8i16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))),
(!cast<Instruction>(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>;
- def : Pat<(nxv4i32 (AArch64dup (i32 (vector_extract (nxv4i32 ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
+ def : Pat<(nxv4i32 (splat_vector (i32 (vector_extract (nxv4i32 ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
(!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
- def : Pat<(nxv2i64 (AArch64dup (i64 (vector_extract (nxv2i64 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
+ def : Pat<(nxv2i64 (splat_vector (i64 (vector_extract (nxv2i64 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
(!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
- def : Pat<(nxv8f16 (AArch64dup (f16 (vector_extract (nxv8f16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))),
+ def : Pat<(nxv8f16 (splat_vector (f16 (vector_extract (nxv8f16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))),
(!cast<Instruction>(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>;
- def : Pat<(nxv8bf16 (AArch64dup (bf16 (vector_extract (nxv8bf16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))),
+ def : Pat<(nxv8bf16 (splat_vector (bf16 (vector_extract (nxv8bf16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))),
(!cast<Instruction>(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>;
- def : Pat<(nxv4f16 (AArch64dup (f16 (vector_extract (nxv4f16 ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
+ def : Pat<(nxv4f16 (splat_vector (f16 (vector_extract (nxv4f16 ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
(!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
- def : Pat<(nxv2f16 (AArch64dup (f16 (vector_extract (nxv2f16 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
+ def : Pat<(nxv2f16 (splat_vector (f16 (vector_extract (nxv2f16 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
(!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
- def : Pat<(nxv4f32 (AArch64dup (f32 (vector_extract (nxv4f32 ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
+ def : Pat<(nxv4f32 (splat_vector (f32 (vector_extract (nxv4f32 ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
(!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
- def : Pat<(nxv2f32 (AArch64dup (f32 (vector_extract (nxv2f32 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
+ def : Pat<(nxv2f32 (splat_vector (f32 (vector_extract (nxv2f32 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
(!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
- def : Pat<(nxv2f64 (AArch64dup (f64 (vector_extract (nxv2f64 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
+ def : Pat<(nxv2f64 (splat_vector (f64 (vector_extract (nxv2f64 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
(!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
+
+ def : Pat<(nxv16i8 (AArch64duplane128 nxv16i8:$Op1, i64:$imm)),
+ (!cast<Instruction>(NAME # _Q) $Op1, $imm)>;
+ def : Pat<(nxv8i16 (AArch64duplane128 nxv8i16:$Op1, i64:$imm)),
+ (!cast<Instruction>(NAME # _Q) $Op1, $imm)>;
+ def : Pat<(nxv4i32 (AArch64duplane128 nxv4i32:$Op1, i64:$imm)),
+ (!cast<Instruction>(NAME # _Q) $Op1, $imm)>;
+ def : Pat<(nxv2i64 (AArch64duplane128 nxv2i64:$Op1, i64:$imm)),
+ (!cast<Instruction>(NAME # _Q) $Op1, $imm)>;
+ def : Pat<(nxv8f16 (AArch64duplane128 nxv8f16:$Op1, i64:$imm)),
+ (!cast<Instruction>(NAME # _Q) $Op1, $imm)>;
+ def : Pat<(nxv4f32 (AArch64duplane128 nxv4f32:$Op1, i64:$imm)),
+ (!cast<Instruction>(NAME # _Q) $Op1, $imm)>;
+ def : Pat<(nxv2f64 (AArch64duplane128 nxv2f64:$Op1, i64:$imm)),
+ (!cast<Instruction>(NAME # _Q) $Op1, $imm)>;
+ def : Pat<(nxv8bf16 (AArch64duplane128 nxv8bf16:$Op1, i64:$imm)),
+ (!cast<Instruction>(NAME # _Q) $Op1, $imm)>;
}
class sve_int_perm_tbl<bits<2> sz8_64, bits<2> opc, string asm, ZPRRegOp zprty,
@@ -1631,6 +1682,7 @@ multiclass sve_int_pred_log<bits<4> opc, string asm, SDPatternOperator op,
def : SVE_3_Op_Pat<nxv8i1, op, nxv8i1, nxv8i1, nxv8i1, !cast<Instruction>(NAME)>;
def : SVE_3_Op_Pat<nxv4i1, op, nxv4i1, nxv4i1, nxv4i1, !cast<Instruction>(NAME)>;
def : SVE_3_Op_Pat<nxv2i1, op, nxv2i1, nxv2i1, nxv2i1, !cast<Instruction>(NAME)>;
+ def : SVE_3_Op_Pat<nxv1i1, op, nxv1i1, nxv1i1, nxv1i1, !cast<Instruction>(NAME)>;
def : SVE_2_Op_AllActive_Pat<nxv16i1, op_nopred, nxv16i1, nxv16i1,
!cast<Instruction>(NAME), PTRUE_B>;
def : SVE_2_Op_AllActive_Pat<nxv8i1, op_nopred, nxv8i1, nxv8i1,
@@ -1743,7 +1795,7 @@ multiclass sve_int_dup_mask_imm<string asm> {
def : InstAlias<"mov $Zd, $imm",
(!cast<Instruction>(NAME) ZPR64:$Zd, sve_preferred_logical_imm64:$imm), 5>;
- def : Pat<(nxv2i64 (AArch64dup (i64 logical_imm64:$imm))),
+ def : Pat<(nxv2i64 (splat_vector (i64 logical_imm64:$imm))),
(!cast<Instruction>(NAME) logical_imm64:$imm)>;
}
@@ -2478,7 +2530,7 @@ multiclass sve2_fp_mla_long<bits<2> opc, string asm, SDPatternOperator op> {
// SVE Stack Allocation Group
//===----------------------------------------------------------------------===//
-class sve_int_arith_vl<bit opc, string asm>
+class sve_int_arith_vl<bit opc, string asm, bit streaming_sve = 0b0>
: I<(outs GPR64sp:$Rd), (ins GPR64sp:$Rn, simm6_32b:$imm6),
asm, "\t$Rd, $Rn, $imm6",
"",
@@ -2490,12 +2542,13 @@ class sve_int_arith_vl<bit opc, string asm>
let Inst{22} = opc;
let Inst{21} = 0b1;
let Inst{20-16} = Rn;
- let Inst{15-11} = 0b01010;
+ let Inst{15-12} = 0b0101;
+ let Inst{11} = streaming_sve;
let Inst{10-5} = imm6;
let Inst{4-0} = Rd;
}
-class sve_int_read_vl_a<bit op, bits<5> opc2, string asm>
+class sve_int_read_vl_a<bit op, bits<5> opc2, string asm, bit streaming_sve = 0b0>
: I<(outs GPR64:$Rd), (ins simm6_32b:$imm6),
asm, "\t$Rd, $imm6",
"",
@@ -2506,9 +2559,12 @@ class sve_int_read_vl_a<bit op, bits<5> opc2, string asm>
let Inst{22} = op;
let Inst{21} = 0b1;
let Inst{20-16} = opc2{4-0};
- let Inst{15-11} = 0b01010;
+ let Inst{15-12} = 0b0101;
+ let Inst{11} = streaming_sve;
let Inst{10-5} = imm6;
let Inst{4-0} = Rd;
+
+ let isReMaterializable = 1;
}
//===----------------------------------------------------------------------===//
@@ -2589,8 +2645,8 @@ multiclass sve_fp_2op_p_zd<bits<7> opc, string asm,
SDPatternOperator int_op,
SDPatternOperator ir_op, ValueType vt1,
ValueType vt2, ValueType vt3, ElementSizeEnum Sz> {
- def NAME : sve_fp_2op_p_zd<opc, asm, i_zprtype, o_zprtype, Sz>;
-
+ def NAME : sve_fp_2op_p_zd<opc, asm, i_zprtype, o_zprtype, Sz>,
+ SVEPseudo2Instr<NAME, 1>;
// convert vt1 to a packed type for the intrinsic patterns
defvar packedvt1 = !cond(!eq(!cast<string>(vt1), "nxv2f16"): nxv8f16,
!eq(!cast<string>(vt1), "nxv4f16"): nxv8f16,
@@ -2604,8 +2660,11 @@ multiclass sve_fp_2op_p_zd<bits<7> opc, string asm,
1 : vt3);
def : SVE_3_Op_Pat<packedvt1, int_op, packedvt1, vt2, packedvt3, !cast<Instruction>(NAME)>;
-
def : SVE_1_Op_Passthru_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME)>;
+
+ def _UNDEF : PredOneOpPassthruPseudo<NAME, !cast<ZPRRegOp>(i_zprtype)>;
+
+ defm : SVE_1_Op_PassthruUndef_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME # _UNDEF)>;
}
multiclass sve_fp_2op_p_zdr<bits<7> opc, string asm,
@@ -2614,7 +2673,8 @@ multiclass sve_fp_2op_p_zdr<bits<7> opc, string asm,
SDPatternOperator int_op,
SDPatternOperator ir_op, ValueType vt1,
ValueType vt2, ValueType vt3, ElementSizeEnum Sz> {
- def NAME : sve_fp_2op_p_zd<opc, asm, i_zprtype, o_zprtype, Sz>;
+ def NAME : sve_fp_2op_p_zd<opc, asm, i_zprtype, o_zprtype, Sz>,
+ SVEPseudo2Instr<NAME, 1>;
// convert vt1 to a packed type for the intrinsic patterns
defvar packedvt1 = !cond(!eq(!cast<string>(vt1), "nxv2f16"): nxv8f16,
@@ -2623,8 +2683,11 @@ multiclass sve_fp_2op_p_zdr<bits<7> opc, string asm,
1 : vt1);
def : SVE_3_Op_Pat<packedvt1, int_op, packedvt1, vt2, vt3, !cast<Instruction>(NAME)>;
-
def : SVE_1_Op_Passthru_Round_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME)>;
+
+ def _UNDEF : PredOneOpPassthruPseudo<NAME, !cast<ZPRRegOp>(i_zprtype)>;
+
+ defm : SVE_1_Op_PassthruUndef_Round_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME # _UNDEF)>;
}
multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm, SDPatternOperator op> {
@@ -2726,11 +2789,19 @@ class sve_int_bin_pred_arit_log<bits<2> sz8_64, bits<2> fmt, bits<3> opc,
let ElementSize = zprty.ElementSize;
}
-multiclass sve_int_bin_pred_log<bits<3> opc, string asm, SDPatternOperator op> {
- def _B : sve_int_bin_pred_arit_log<0b00, 0b11, opc, asm, ZPR8>;
- def _H : sve_int_bin_pred_arit_log<0b01, 0b11, opc, asm, ZPR16>;
- def _S : sve_int_bin_pred_arit_log<0b10, 0b11, opc, asm, ZPR32>;
- def _D : sve_int_bin_pred_arit_log<0b11, 0b11, opc, asm, ZPR64>;
+multiclass sve_int_bin_pred_log<bits<3> opc, string asm, string Ps,
+ SDPatternOperator op,
+ DestructiveInstTypeEnum flags> {
+ let DestructiveInstType = flags in {
+ def _B : sve_int_bin_pred_arit_log<0b00, 0b11, opc, asm, ZPR8>,
+ SVEPseudo2Instr<Ps # _B, 1>;
+ def _H : sve_int_bin_pred_arit_log<0b01, 0b11, opc, asm, ZPR16>,
+ SVEPseudo2Instr<Ps # _H, 1>;
+ def _S : sve_int_bin_pred_arit_log<0b10, 0b11, opc, asm, ZPR32>,
+ SVEPseudo2Instr<Ps # _S, 1>;
+ def _D : sve_int_bin_pred_arit_log<0b11, 0b11, opc, asm, ZPR64>,
+ SVEPseudo2Instr<Ps # _D, 1>;
+ }
def : SVE_3_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
@@ -3756,7 +3827,8 @@ class sve2_int_bin_accum_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm,
}
multiclass sve2_int_bin_accum_shift_imm_right<bits<2> opc, string asm,
- SDPatternOperator op> {
+ SDPatternOperator op,
+ SDPatternOperator shift_op = null_frag> {
def _B : sve2_int_bin_accum_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>;
def _H : sve2_int_bin_accum_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> {
let Inst{19} = imm{3};
@@ -3773,6 +3845,11 @@ multiclass sve2_int_bin_accum_shift_imm_right<bits<2> opc, string asm,
def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>;
def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>;
def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>;
+
+ def : SVE_Shift_Add_All_Active_Pat<nxv16i8, shift_op, nxv16i1, nxv16i8, nxv16i8, i32, !cast<Instruction>(NAME # _B)>;
+ def : SVE_Shift_Add_All_Active_Pat<nxv8i16, shift_op, nxv8i1, nxv8i16, nxv8i16, i32, !cast<Instruction>(NAME # _H)>;
+ def : SVE_Shift_Add_All_Active_Pat<nxv4i32, shift_op, nxv4i1, nxv4i32, nxv4i32, i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_Shift_Add_All_Active_Pat<nxv2i64, shift_op, nxv2i1, nxv2i64, nxv2i64, i32, !cast<Instruction>(NAME # _D)>;
}
class sve2_int_cadd<bits<2> sz, bit opc, string asm, ZPRRegOp zprty>
@@ -4331,18 +4408,6 @@ multiclass sve_int_arith_imm0<bits<3> opc, string asm, SDPatternOperator op> {
def : SVE_1_Op_Imm_OptLsl_Pat<nxv2i64, op, ZPR64, i64, SVEAddSubImm64Pat, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve_int_arith_imm0_subr<bits<3> opc, string asm, SDPatternOperator op> {
- def _B : sve_int_arith_imm0<0b00, opc, asm, ZPR8, addsub_imm8_opt_lsl_i8>;
- def _H : sve_int_arith_imm0<0b01, opc, asm, ZPR16, addsub_imm8_opt_lsl_i16>;
- def _S : sve_int_arith_imm0<0b10, opc, asm, ZPR32, addsub_imm8_opt_lsl_i32>;
- def _D : sve_int_arith_imm0<0b11, opc, asm, ZPR64, addsub_imm8_opt_lsl_i64>;
-
- def : SVE_1_Op_Imm_OptLsl_Reverse_Pat<nxv16i8, op, ZPR8, i32, SVEAddSubImm8Pat, !cast<Instruction>(NAME # _B)>;
- def : SVE_1_Op_Imm_OptLsl_Reverse_Pat<nxv8i16, op, ZPR16, i32, SVEAddSubImm16Pat, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_Imm_OptLsl_Reverse_Pat<nxv4i32, op, ZPR32, i32, SVEAddSubImm32Pat, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_Imm_OptLsl_Reverse_Pat<nxv2i64, op, ZPR64, i64, SVEAddSubImm64Pat, !cast<Instruction>(NAME # _D)>;
-}
-
class sve_int_arith_imm<bits<2> sz8_64, bits<6> opc, string asm,
ZPRRegOp zprty, Operand immtype>
: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, immtype:$imm),
@@ -4458,7 +4523,8 @@ class sve2_int_bitwise_ternary_op_d<bits<3> opc, string asm>
let ElementSize = ElementSizeNone;
}
-multiclass sve2_int_bitwise_ternary_op<bits<3> opc, string asm, SDPatternOperator op> {
+multiclass sve2_int_bitwise_ternary_op<bits<3> opc, string asm, SDPatternOperator op,
+ SDPatternOperator ir_op = null_frag> {
def NAME : sve2_int_bitwise_ternary_op_d<opc, asm>;
def : InstAlias<asm # "\t$Zdn, $Zdn, $Zm, $Zk",
@@ -4472,6 +4538,12 @@ multiclass sve2_int_bitwise_ternary_op<bits<3> opc, string asm, SDPatternOperato
def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, nxv8i16, !cast<Instruction>(NAME)>;
def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, nxv4i32, !cast<Instruction>(NAME)>;
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, nxv2i64, !cast<Instruction>(NAME)>;
+
+
+ def : SVE_3_Op_BSP_Pat<nxv16i8, ir_op, nxv16i8, nxv16i8, nxv16i8, !cast<Instruction>(NAME)>;
+ def : SVE_3_Op_BSP_Pat<nxv8i16, ir_op, nxv8i16, nxv8i16, nxv8i16, !cast<Instruction>(NAME)>;
+ def : SVE_3_Op_BSP_Pat<nxv4i32, ir_op, nxv4i32, nxv4i32, nxv4i32, !cast<Instruction>(NAME)>;
+ def : SVE_3_Op_BSP_Pat<nxv2i64, ir_op, nxv2i64, nxv2i64, nxv2i64, !cast<Instruction>(NAME)>;
}
class sve2_int_rotate_right_imm<bits<4> tsz8_64, string asm,
@@ -4578,29 +4650,28 @@ class sve_int_dup_imm_pred<bits<2> sz8_64, bit m, string asm,
}
multiclass sve_int_dup_imm_pred_merge_inst<
- bits<2> sz8_64, string asm, ZPRRegOp zprty, ValueType intty,
- ValueType predty, ValueType scalarty, imm8_opt_lsl cpyimm> {
+ bits<2> sz8_64, string asm, ZPRRegOp zprty, imm8_opt_lsl cpyimm,
+ ValueType intty, ValueType predty, ValueType scalarty, ComplexPattern cpx> {
let Constraints = "$Zd = $_Zd" in
def NAME : sve_int_dup_imm_pred<sz8_64, 1, asm, zprty, "/m",
(ins zprty:$_Zd, PPRAny:$Pg, cpyimm:$imm)>;
def : InstAlias<"mov $Zd, $Pg/m, $imm",
(!cast<Instruction>(NAME) zprty:$Zd, PPRAny:$Pg, cpyimm:$imm), 1>;
- def : Pat<(intty
- (vselect predty:$Pg,
- (intty (AArch64dup (scalarty (SVE8BitLslImm<scalarty>.Pat i32:$imm, i32:$shift)))),
- intty:$Zd)),
- (!cast<Instruction>(NAME) zprty:$Zd, $Pg, i32:$imm, i32:$shift)>;
+ def : Pat<(vselect predty:$Pg,
+ (intty (splat_vector (scalarty (cpx i32:$imm, i32:$shift)))),
+ ZPR:$Zd),
+ (!cast<Instruction>(NAME) $Zd, $Pg, $imm, $shift)>;
}
multiclass sve_int_dup_imm_pred_merge<string asm> {
- defm _B : sve_int_dup_imm_pred_merge_inst<0b00, asm, ZPR8, nxv16i8, nxv16i1,
- i32, cpy_imm8_opt_lsl_i8>;
- defm _H : sve_int_dup_imm_pred_merge_inst<0b01, asm, ZPR16, nxv8i16, nxv8i1,
- i32, cpy_imm8_opt_lsl_i16>;
- defm _S : sve_int_dup_imm_pred_merge_inst<0b10, asm, ZPR32, nxv4i32, nxv4i1,
- i32, cpy_imm8_opt_lsl_i32>;
- defm _D : sve_int_dup_imm_pred_merge_inst<0b11, asm, ZPR64, nxv2i64, nxv2i1,
- i64, cpy_imm8_opt_lsl_i64>;
+ defm _B : sve_int_dup_imm_pred_merge_inst<0b00, asm, ZPR8, cpy_imm8_opt_lsl_i8,
+ nxv16i8, nxv16i1, i32, SVECpyDupImm8Pat>;
+ defm _H : sve_int_dup_imm_pred_merge_inst<0b01, asm, ZPR16, cpy_imm8_opt_lsl_i16,
+ nxv8i16, nxv8i1, i32, SVECpyDupImm16Pat>;
+ defm _S : sve_int_dup_imm_pred_merge_inst<0b10, asm, ZPR32, cpy_imm8_opt_lsl_i32,
+ nxv4i32, nxv4i1, i32, SVECpyDupImm32Pat>;
+ defm _D : sve_int_dup_imm_pred_merge_inst<0b11, asm, ZPR64, cpy_imm8_opt_lsl_i64,
+ nxv2i64, nxv2i1, i64, SVECpyDupImm64Pat>;
def : InstAlias<"fmov $Zd, $Pg/m, #0.0",
(!cast<Instruction>(NAME # _H) ZPR16:$Zd, PPRAny:$Pg, 0, 0), 0>;
@@ -4608,11 +4679,24 @@ multiclass sve_int_dup_imm_pred_merge<string asm> {
(!cast<Instruction>(NAME # _S) ZPR32:$Zd, PPRAny:$Pg, 0, 0), 0>;
def : InstAlias<"fmov $Zd, $Pg/m, #0.0",
(!cast<Instruction>(NAME # _D) ZPR64:$Zd, PPRAny:$Pg, 0, 0), 0>;
+
+ def : Pat<(vselect PPRAny:$Pg, (SVEDup0), (nxv8f16 ZPR:$Zd)),
+ (!cast<Instruction>(NAME # _H) $Zd, $Pg, 0, 0)>;
+ def : Pat<(vselect PPRAny:$Pg, (SVEDup0), (nxv4f16 ZPR:$Zd)),
+ (!cast<Instruction>(NAME # _S) $Zd, $Pg, 0, 0)>;
+ def : Pat<(vselect PPRAny:$Pg, (SVEDup0), (nxv2f16 ZPR:$Zd)),
+ (!cast<Instruction>(NAME # _D) $Zd, $Pg, 0, 0)>;
+ def : Pat<(vselect PPRAny:$Pg, (SVEDup0), (nxv4f32 ZPR:$Zd)),
+ (!cast<Instruction>(NAME # _S) $Zd, $Pg, 0, 0)>;
+ def : Pat<(vselect PPRAny:$Pg, (SVEDup0), (nxv2f32 ZPR:$Zd)),
+ (!cast<Instruction>(NAME # _D) $Zd, $Pg, 0, 0)>;
+ def : Pat<(vselect PPRAny:$Pg, (SVEDup0), (nxv2f64 ZPR:$Zd)),
+ (!cast<Instruction>(NAME # _D) $Zd, $Pg, 0, 0)>;
}
multiclass sve_int_dup_imm_pred_zero_inst<
- bits<2> sz8_64, string asm, ZPRRegOp zprty, ValueType intty,
- ValueType predty, ValueType scalarty, imm8_opt_lsl cpyimm> {
+ bits<2> sz8_64, string asm, ZPRRegOp zprty, imm8_opt_lsl cpyimm,
+ ValueType intty, ValueType predty, ValueType scalarty, ComplexPattern cpx> {
def NAME : sve_int_dup_imm_pred<sz8_64, 0, asm, zprty, "/z",
(ins PPRAny:$Pg, cpyimm:$imm)>;
def : InstAlias<"mov $Zd, $Pg/z, $imm",
@@ -4623,22 +4707,21 @@ multiclass sve_int_dup_imm_pred_zero_inst<
(!cast<Instruction>(NAME) PPRAny:$Ps1, -1, 0)>;
def : Pat<(intty (anyext (predty PPRAny:$Ps1))),
(!cast<Instruction>(NAME) PPRAny:$Ps1, 1, 0)>;
- def : Pat<(intty
- (vselect predty:$Pg,
- (intty (AArch64dup (scalarty (SVE8BitLslImm<scalarty>.Pat i32:$imm, i32:$shift)))),
- (intty (AArch64dup (scalarty 0))))),
- (!cast<Instruction>(NAME) $Pg, i32:$imm, i32:$shift)>;
+ def : Pat<(vselect predty:$Pg,
+ (intty (splat_vector (scalarty (cpx i32:$imm, i32:$shift)))),
+ (intty (splat_vector (scalarty 0)))),
+ (!cast<Instruction>(NAME) $Pg, $imm, $shift)>;
}
multiclass sve_int_dup_imm_pred_zero<string asm> {
- defm _B : sve_int_dup_imm_pred_zero_inst<0b00, asm, ZPR8, nxv16i8, nxv16i1,
- i32, cpy_imm8_opt_lsl_i8>;
- defm _H : sve_int_dup_imm_pred_zero_inst<0b01, asm, ZPR16, nxv8i16, nxv8i1,
- i32, cpy_imm8_opt_lsl_i16>;
- defm _S : sve_int_dup_imm_pred_zero_inst<0b10, asm, ZPR32, nxv4i32, nxv4i1,
- i32, cpy_imm8_opt_lsl_i32>;
- defm _D : sve_int_dup_imm_pred_zero_inst<0b11, asm, ZPR64, nxv2i64, nxv2i1,
- i64, cpy_imm8_opt_lsl_i64>;
+ defm _B : sve_int_dup_imm_pred_zero_inst<0b00, asm, ZPR8, cpy_imm8_opt_lsl_i8,
+ nxv16i8, nxv16i1, i32, SVECpyDupImm8Pat>;
+ defm _H : sve_int_dup_imm_pred_zero_inst<0b01, asm, ZPR16, cpy_imm8_opt_lsl_i16,
+ nxv8i16, nxv8i1, i32, SVECpyDupImm16Pat>;
+ defm _S : sve_int_dup_imm_pred_zero_inst<0b10, asm, ZPR32, cpy_imm8_opt_lsl_i32,
+ nxv4i32, nxv4i1, i32, SVECpyDupImm32Pat>;
+ defm _D : sve_int_dup_imm_pred_zero_inst<0b11, asm, ZPR64, cpy_imm8_opt_lsl_i64,
+ nxv2i64, nxv2i1, i64, SVECpyDupImm64Pat>;
}
//===----------------------------------------------------------------------===//
@@ -4690,6 +4773,10 @@ multiclass SVE_SETCC_Pat_With_Zero<CondCode cc, CondCode invcc, ValueType predvt
(cmp $Op1, $Op2)>;
def : Pat<(predvt (AArch64setcc_z predvt:$Op1, (SVEDup0), intvt:$Op2, invcc)),
(cmp $Op1, $Op2)>;
+ def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z (predvt (AArch64ptrue 31)), intvt:$Op1, (SVEDup0), cc))),
+ (cmp $Pg, $Op1)>;
+ def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z (predvt (AArch64ptrue 31)), (SVEDup0), intvt:$Op1, invcc))),
+ (cmp $Pg, $Op1)>;
}
multiclass sve_int_cmp_0<bits<3> opc, string asm, CondCode cc, CondCode invcc> {
@@ -4761,14 +4848,26 @@ multiclass SVE_SETCC_Imm_Pat<CondCode cc, CondCode commuted_cc,
ValueType predvt, ValueType intvt,
Operand immtype, Instruction cmp> {
def : Pat<(predvt (AArch64setcc_z (predvt PPR_3b:$Pg),
- (intvt ZPR:$Zs1),
- (intvt (AArch64dup (immtype:$imm))),
- cc)),
+ (intvt ZPR:$Zs1),
+ (intvt (splat_vector (immtype:$imm))),
+ cc)),
(cmp $Pg, $Zs1, immtype:$imm)>;
def : Pat<(predvt (AArch64setcc_z (predvt PPR_3b:$Pg),
- (intvt (AArch64dup (immtype:$imm))),
- (intvt ZPR:$Zs1),
- commuted_cc)),
+ (intvt (splat_vector (immtype:$imm))),
+ (intvt ZPR:$Zs1),
+ commuted_cc)),
+ (cmp $Pg, $Zs1, immtype:$imm)>;
+ def : Pat<(predvt (and predvt:$Pg,
+ (AArch64setcc_z (predvt (AArch64ptrue 31)),
+ (intvt ZPR:$Zs1),
+ (intvt (splat_vector (immtype:$imm))),
+ cc))),
+ (cmp $Pg, $Zs1, immtype:$imm)>;
+ def : Pat<(predvt (and predvt:$Pg,
+ (AArch64setcc_z (predvt (AArch64ptrue 31)),
+ (intvt (splat_vector (immtype:$imm))),
+ (intvt ZPR:$Zs1),
+ commuted_cc))),
(cmp $Pg, $Zs1, immtype:$imm)>;
}
@@ -5148,6 +5247,8 @@ class sve_int_index_ii<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{15-10} = 0b010000;
let Inst{9-5} = imm5;
let Inst{4-0} = Zd;
+
+ let isReMaterializable = 1;
}
multiclass sve_int_index_ii<string asm> {
@@ -5166,13 +5267,13 @@ multiclass sve_int_index_ii<string asm> {
(!cast<Instruction>(NAME # "_D") (i64 0), simm5_64b:$imm5b)>;
// add(step_vector(step), dup(X)) -> index(X, step).
- def : Pat<(add (nxv16i8 (step_vector_oneuse simm5_8b_tgt:$imm5b)), (nxv16i8 (AArch64dup(simm5_8b:$imm5)))),
+ def : Pat<(add (nxv16i8 (step_vector_oneuse simm5_8b_tgt:$imm5b)), (nxv16i8 (splat_vector(simm5_8b:$imm5)))),
(!cast<Instruction>(NAME # "_B") simm5_8b:$imm5, (!cast<SDNodeXForm>("trunc_imm") $imm5b))>;
- def : Pat<(add (nxv8i16 (step_vector_oneuse simm5_16b_tgt:$imm5b)), (nxv8i16 (AArch64dup(simm5_16b:$imm5)))),
+ def : Pat<(add (nxv8i16 (step_vector_oneuse simm5_16b_tgt:$imm5b)), (nxv8i16 (splat_vector(simm5_16b:$imm5)))),
(!cast<Instruction>(NAME # "_H") simm5_16b:$imm5, (!cast<SDNodeXForm>("trunc_imm") $imm5b))>;
- def : Pat<(add (nxv4i32 (step_vector_oneuse simm5_32b_tgt:$imm5b)), (nxv4i32 (AArch64dup(simm5_32b:$imm5)))),
+ def : Pat<(add (nxv4i32 (step_vector_oneuse simm5_32b_tgt:$imm5b)), (nxv4i32 (splat_vector(simm5_32b:$imm5)))),
(!cast<Instruction>(NAME # "_S") simm5_32b:$imm5, simm5_32b:$imm5b)>;
- def : Pat<(add (nxv2i64 (step_vector_oneuse simm5_64b_tgt:$imm5b)), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))),
+ def : Pat<(add (nxv2i64 (step_vector_oneuse simm5_64b_tgt:$imm5b)), (nxv2i64 (splat_vector(simm5_64b:$imm5)))),
(!cast<Instruction>(NAME # "_D") simm5_64b:$imm5, simm5_64b:$imm5b)>;
}
@@ -5211,35 +5312,35 @@ multiclass sve_int_index_ir<string asm, SDPatternOperator mulop, SDPatternOperat
(!cast<Instruction>(NAME # "_D") (i64 0), (SUBREG_TO_REG (i64 0), (!cast<Instruction>("MOVi32imm") (!cast<SDNodeXForm>("trunc_imm") $imm)), sub_32))>;
// add(step_vector(step), dup(X)) -> index(X, step).
- def : Pat<(add (nxv16i8 (step_vector_oneuse i8:$imm)), (nxv16i8 (AArch64dup(simm5_8b:$imm5)))),
+ def : Pat<(add (nxv16i8 (step_vector_oneuse i8:$imm)), (nxv16i8 (splat_vector(simm5_8b:$imm5)))),
(!cast<Instruction>(NAME # "_B") simm5_8b:$imm5, (!cast<Instruction>("MOVi32imm") (!cast<SDNodeXForm>("trunc_imm") $imm)))>;
- def : Pat<(add (nxv8i16 (step_vector_oneuse i16:$imm)), (nxv8i16 (AArch64dup(simm5_16b:$imm5)))),
+ def : Pat<(add (nxv8i16 (step_vector_oneuse i16:$imm)), (nxv8i16 (splat_vector(simm5_16b:$imm5)))),
(!cast<Instruction>(NAME # "_H") simm5_16b:$imm5, (!cast<Instruction>("MOVi32imm") (!cast<SDNodeXForm>("trunc_imm") $imm)))>;
- def : Pat<(add (nxv4i32 (step_vector_oneuse i32:$imm)), (nxv4i32 (AArch64dup(simm5_32b:$imm5)))),
+ def : Pat<(add (nxv4i32 (step_vector_oneuse i32:$imm)), (nxv4i32 (splat_vector(simm5_32b:$imm5)))),
(!cast<Instruction>(NAME # "_S") simm5_32b:$imm5, (!cast<Instruction>("MOVi32imm") $imm))>;
- def : Pat<(add (nxv2i64 (step_vector_oneuse i64:$imm)), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))),
+ def : Pat<(add (nxv2i64 (step_vector_oneuse i64:$imm)), (nxv2i64 (splat_vector(simm5_64b:$imm5)))),
(!cast<Instruction>(NAME # "_D") simm5_64b:$imm5, (!cast<Instruction>("MOVi64imm") $imm))>;
- def : Pat<(add (nxv2i64 (step_vector_oneuse i64imm_32bit_tgt:$imm)), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))),
+ def : Pat<(add (nxv2i64 (step_vector_oneuse i64imm_32bit_tgt:$imm)), (nxv2i64 (splat_vector(simm5_64b:$imm5)))),
(!cast<Instruction>(NAME # "_D") simm5_64b:$imm5, (SUBREG_TO_REG (i64 0), (!cast<Instruction>("MOVi32imm") (!cast<SDNodeXForm>("trunc_imm") $imm)), sub_32))>;
// mul(step_vector(1), dup(Y)) -> index(0, Y).
- def : Pat<(mulop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (step_vector_oneuse (i8 1))), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))),
+ def : Pat<(mulop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (step_vector_oneuse (i8 1))), (nxv16i8 (splat_vector(i32 GPR32:$Rm)))),
(!cast<Instruction>(NAME # "_B") (i32 0), GPR32:$Rm)>;
- def : Pat<(mulop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (step_vector_oneuse (i16 1))), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))),
+ def : Pat<(mulop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (step_vector_oneuse (i16 1))), (nxv8i16 (splat_vector(i32 GPR32:$Rm)))),
(!cast<Instruction>(NAME # "_H") (i32 0), GPR32:$Rm)>;
- def : Pat<(mulop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (step_vector_oneuse (i32 1))), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))),
+ def : Pat<(mulop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (step_vector_oneuse (i32 1))), (nxv4i32 (splat_vector(i32 GPR32:$Rm)))),
(!cast<Instruction>(NAME # "_S") (i32 0), GPR32:$Rm)>;
- def : Pat<(mulop (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (step_vector_oneuse (i64 1))), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))),
+ def : Pat<(mulop (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (step_vector_oneuse (i64 1))), (nxv2i64 (splat_vector(i64 GPR64:$Rm)))),
(!cast<Instruction>(NAME # "_D") (i64 0), GPR64:$Rm)>;
// add(mul(step_vector(1), dup(Y)), dup(X)) -> index(X, Y).
- def : Pat<(add (muloneuseop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (step_vector_oneuse (i8 1))), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), (nxv16i8 (AArch64dup(simm5_8b:$imm5)))),
+ def : Pat<(add (muloneuseop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (step_vector_oneuse (i8 1))), (nxv16i8 (splat_vector(i32 GPR32:$Rm)))), (nxv16i8 (splat_vector(simm5_8b:$imm5)))),
(!cast<Instruction>(NAME # "_B") simm5_8b:$imm5, GPR32:$Rm)>;
- def : Pat<(add (muloneuseop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (step_vector_oneuse (i16 1))), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))), (nxv8i16 (AArch64dup(simm5_16b:$imm5)))),
+ def : Pat<(add (muloneuseop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (step_vector_oneuse (i16 1))), (nxv8i16 (splat_vector(i32 GPR32:$Rm)))), (nxv8i16 (splat_vector(simm5_16b:$imm5)))),
(!cast<Instruction>(NAME # "_H") simm5_16b:$imm5, GPR32:$Rm)>;
- def : Pat<(add (muloneuseop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (step_vector_oneuse (i32 1))), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))), (nxv4i32 (AArch64dup(simm5_32b:$imm5)))),
+ def : Pat<(add (muloneuseop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (step_vector_oneuse (i32 1))), (nxv4i32 (splat_vector(i32 GPR32:$Rm)))), (nxv4i32 (splat_vector(simm5_32b:$imm5)))),
(!cast<Instruction>(NAME # "_S") simm5_32b:$imm5, GPR32:$Rm)>;
- def : Pat<(add (muloneuseop (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (step_vector_oneuse (i64 1))), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))),
+ def : Pat<(add (muloneuseop (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (step_vector_oneuse (i64 1))), (nxv2i64 (splat_vector(i64 GPR64:$Rm)))), (nxv2i64 (splat_vector(simm5_64b:$imm5)))),
(!cast<Instruction>(NAME # "_D") simm5_64b:$imm5, GPR64:$Rm)>;
}
@@ -5267,13 +5368,13 @@ multiclass sve_int_index_ri<string asm> {
def _D : sve_int_index_ri<0b11, asm, ZPR64, GPR64, simm5_64b>;
// add(step_vector(step), dup(X)) -> index(X, step).
- def : Pat<(add (nxv16i8 (step_vector_oneuse simm5_8b_tgt:$imm5)), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))),
+ def : Pat<(add (nxv16i8 (step_vector_oneuse simm5_8b_tgt:$imm5)), (nxv16i8 (splat_vector(i32 GPR32:$Rm)))),
(!cast<Instruction>(NAME # "_B") GPR32:$Rm, (!cast<SDNodeXForm>("trunc_imm") $imm5))>;
- def : Pat<(add (nxv8i16 (step_vector_oneuse simm5_16b_tgt:$imm5)), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))),
+ def : Pat<(add (nxv8i16 (step_vector_oneuse simm5_16b_tgt:$imm5)), (nxv8i16 (splat_vector(i32 GPR32:$Rm)))),
(!cast<Instruction>(NAME # "_H") GPR32:$Rm, (!cast<SDNodeXForm>("trunc_imm") $imm5))>;
- def : Pat<(add (nxv4i32 (step_vector_oneuse simm5_32b_tgt:$imm5)), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))),
+ def : Pat<(add (nxv4i32 (step_vector_oneuse simm5_32b_tgt:$imm5)), (nxv4i32 (splat_vector(i32 GPR32:$Rm)))),
(!cast<Instruction>(NAME # "_S") GPR32:$Rm, simm5_32b:$imm5)>;
- def : Pat<(add (nxv2i64 (step_vector_oneuse simm5_64b_tgt:$imm5)), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))),
+ def : Pat<(add (nxv2i64 (step_vector_oneuse simm5_64b_tgt:$imm5)), (nxv2i64 (splat_vector(i64 GPR64:$Rm)))),
(!cast<Instruction>(NAME # "_D") GPR64:$Rm, simm5_64b:$imm5)>;
}
@@ -5301,25 +5402,25 @@ multiclass sve_int_index_rr<string asm, SDPatternOperator mulop> {
def _D : sve_int_index_rr<0b11, asm, ZPR64, GPR64>;
// add(step_vector(step), dup(X)) -> index(X, step).
- def : Pat<(add (nxv16i8 (step_vector_oneuse i8:$imm)), (nxv16i8 (AArch64dup(i32 GPR32:$Rn)))),
+ def : Pat<(add (nxv16i8 (step_vector_oneuse i8:$imm)), (nxv16i8 (splat_vector(i32 GPR32:$Rn)))),
(!cast<Instruction>(NAME # "_B") GPR32:$Rn, (!cast<Instruction>("MOVi32imm") (!cast<SDNodeXForm>("trunc_imm") $imm)))>;
- def : Pat<(add (nxv8i16 (step_vector_oneuse i16:$imm)), (nxv8i16 (AArch64dup(i32 GPR32:$Rn)))),
+ def : Pat<(add (nxv8i16 (step_vector_oneuse i16:$imm)), (nxv8i16 (splat_vector(i32 GPR32:$Rn)))),
(!cast<Instruction>(NAME # "_H") GPR32:$Rn, (!cast<Instruction>("MOVi32imm") (!cast<SDNodeXForm>("trunc_imm") $imm)))>;
- def : Pat<(add (nxv4i32 (step_vector_oneuse i32:$imm)), (nxv4i32 (AArch64dup(i32 GPR32:$Rn)))),
+ def : Pat<(add (nxv4i32 (step_vector_oneuse i32:$imm)), (nxv4i32 (splat_vector(i32 GPR32:$Rn)))),
(!cast<Instruction>(NAME # "_S") GPR32:$Rn, (!cast<Instruction>("MOVi32imm") $imm))>;
- def : Pat<(add (nxv2i64 (step_vector_oneuse i64:$imm)), (nxv2i64 (AArch64dup(i64 GPR64:$Rn)))),
+ def : Pat<(add (nxv2i64 (step_vector_oneuse i64:$imm)), (nxv2i64 (splat_vector(i64 GPR64:$Rn)))),
(!cast<Instruction>(NAME # "_D") GPR64:$Rn, (!cast<Instruction>("MOVi64imm") $imm))>;
- def : Pat<(add (nxv2i64 (step_vector_oneuse i64imm_32bit_tgt:$imm)), (nxv2i64 (AArch64dup(i64 GPR64:$Rn)))),
+ def : Pat<(add (nxv2i64 (step_vector_oneuse i64imm_32bit_tgt:$imm)), (nxv2i64 (splat_vector(i64 GPR64:$Rn)))),
(!cast<Instruction>(NAME # "_D") GPR64:$Rn, (SUBREG_TO_REG (i64 0), (!cast<Instruction>("MOVi32imm") (!cast<SDNodeXForm>("trunc_imm") $imm)), sub_32))>;
// add(mul(step_vector(1), dup(Y)), dup(X)) -> index(X, Y).
- def : Pat<(add (mulop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (step_vector_oneuse (i8 1))), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), (nxv16i8 (AArch64dup(i32 GPR32:$Rn)))),
+ def : Pat<(add (mulop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (step_vector_oneuse (i8 1))), (nxv16i8 (splat_vector(i32 GPR32:$Rm)))), (nxv16i8 (splat_vector(i32 GPR32:$Rn)))),
(!cast<Instruction>(NAME # "_B") GPR32:$Rn, GPR32:$Rm)>;
- def : Pat<(add (mulop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (step_vector_oneuse (i16 1))), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))),(nxv8i16 (AArch64dup(i32 GPR32:$Rn)))),
+ def : Pat<(add (mulop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (step_vector_oneuse (i16 1))), (nxv8i16 (splat_vector(i32 GPR32:$Rm)))),(nxv8i16 (splat_vector(i32 GPR32:$Rn)))),
(!cast<Instruction>(NAME # "_H") GPR32:$Rn, GPR32:$Rm)>;
- def : Pat<(add (mulop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (step_vector_oneuse (i32 1))), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))),(nxv4i32 (AArch64dup(i32 GPR32:$Rn)))),
+ def : Pat<(add (mulop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (step_vector_oneuse (i32 1))), (nxv4i32 (splat_vector(i32 GPR32:$Rm)))),(nxv4i32 (splat_vector(i32 GPR32:$Rn)))),
(!cast<Instruction>(NAME # "_S") GPR32:$Rn, GPR32:$Rm)>;
- def : Pat<(add (mulop (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (step_vector_oneuse (i64 1))), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))),(nxv2i64 (AArch64dup(i64 GPR64:$Rn)))),
+ def : Pat<(add (mulop (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (step_vector_oneuse (i64 1))), (nxv2i64 (splat_vector(i64 GPR64:$Rm)))),(nxv2i64 (splat_vector(i64 GPR64:$Rn)))),
(!cast<Instruction>(NAME # "_D") GPR64:$Rn, GPR64:$Rm)>;
}
@@ -5972,25 +6073,25 @@ multiclass sve_mem_sst_sv_64_scaled<bits<2> msz, string asm,
SDPatternOperator op,
RegisterOperand zprext,
ValueType vt> {
- def _SCALED_REAL : sve_mem_sst_sv2<msz, 1, asm, zprext>;
+ def _SCALED : sve_mem_sst_sv2<msz, 1, asm, zprext>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",
- (!cast<Instruction>(NAME # _SCALED_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), 0>;
+ (!cast<Instruction>(NAME # _SCALED) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), 0>;
def : Pat<(op (nxv2i64 ZPR:$data), (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$indices), vt),
- (!cast<Instruction>(NAME # _SCALED_REAL) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$indices)>;
+ (!cast<Instruction>(NAME # _SCALED) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$indices)>;
}
multiclass sve_mem_sst_sv_64_unscaled<bits<2> msz, string asm,
SDPatternOperator op,
ValueType vt> {
- def _REAL : sve_mem_sst_sv2<msz, 0, asm, ZPR64ExtLSL8>;
+ def NAME : sve_mem_sst_sv2<msz, 0, asm, ZPR64ExtLSL8>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",
- (!cast<Instruction>(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, ZPR64ExtLSL8:$Zm), 0>;
+ (!cast<Instruction>(NAME) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, ZPR64ExtLSL8:$Zm), 0>;
def : Pat<(op (nxv2i64 ZPR:$data), (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt),
- (!cast<Instruction>(NAME # _REAL) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
+ (!cast<Instruction>(NAME) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
}
class sve_mem_sst_vi<bits<3> opc, string asm, ZPRRegOp zprty,
@@ -8433,6 +8534,7 @@ def am_sve_regreg_lsl0 : ComplexPattern<iPTR, 2, "SelectSVERegRegAddrMode<0>", [
def am_sve_regreg_lsl1 : ComplexPattern<iPTR, 2, "SelectSVERegRegAddrMode<1>", []>;
def am_sve_regreg_lsl2 : ComplexPattern<iPTR, 2, "SelectSVERegRegAddrMode<2>", []>;
def am_sve_regreg_lsl3 : ComplexPattern<iPTR, 2, "SelectSVERegRegAddrMode<3>", []>;
+def am_sve_regreg_lsl4 : ComplexPattern<iPTR, 2, "SelectSVERegRegAddrMode<4>", []>;
// Predicated pseudo floating point two operand instructions.
multiclass sve_fp_bin_pred_hfd<SDPatternOperator op> {
diff --git a/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp b/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
index 4a24162540a5..ccb34f367338 100644
--- a/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
+++ b/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
@@ -305,8 +305,7 @@ bool SVEIntrinsicOpts::optimizePredicateStore(Instruction *I) {
// ..where the value stored comes from a vector extract..
auto *IntrI = dyn_cast<IntrinsicInst>(Store->getOperand(0));
- if (!IntrI ||
- IntrI->getIntrinsicID() != Intrinsic::experimental_vector_extract)
+ if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::vector_extract)
return false;
// ..that is extracting from index 0..
@@ -365,8 +364,7 @@ bool SVEIntrinsicOpts::optimizePredicateLoad(Instruction *I) {
// ..whose operand is a vector_insert..
auto *IntrI = dyn_cast<IntrinsicInst>(BitCast->getOperand(0));
- if (!IntrI ||
- IntrI->getIntrinsicID() != Intrinsic::experimental_vector_insert)
+ if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::vector_insert)
return false;
// ..that is inserting into index zero of an undef vector..
@@ -451,8 +449,8 @@ bool SVEIntrinsicOpts::runOnModule(Module &M) {
continue;
switch (F.getIntrinsicID()) {
- case Intrinsic::experimental_vector_extract:
- case Intrinsic::experimental_vector_insert:
+ case Intrinsic::vector_extract:
+ case Intrinsic::vector_insert:
case Intrinsic::aarch64_sve_ptrue:
for (User *U : F.users())
Functions.insert(cast<Instruction>(U)->getFunction());
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index 5906a5d6b50b..71303611265c 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -634,7 +634,8 @@ namespace AArch64SysReg {
FeatureBitset FeaturesRequired;
bool haveFeatures(FeatureBitset ActiveFeatures) const {
- return (FeaturesRequired & ActiveFeatures) == FeaturesRequired;
+ return ActiveFeatures[llvm::AArch64::FeatureAll] ||
+ (FeaturesRequired & ActiveFeatures) == FeaturesRequired;
}
};
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 11cc1a01d248..c4680cbedadf 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -91,10 +91,6 @@ ModulePass *createAMDGPULowerIntrinsicsPass();
void initializeAMDGPULowerIntrinsicsPass(PassRegistry &);
extern char &AMDGPULowerIntrinsicsID;
-ModulePass *createAMDGPUFixFunctionBitcastsPass();
-void initializeAMDGPUFixFunctionBitcastsPass(PassRegistry &);
-extern char &AMDGPUFixFunctionBitcastsID;
-
ModulePass *createAMDGPUCtorDtorLoweringPass();
void initializeAMDGPUCtorDtorLoweringPass(PassRegistry &);
extern char &AMDGPUCtorDtorLoweringID;
@@ -303,6 +299,12 @@ extern char &SIMemoryLegalizerID;
void initializeSIModeRegisterPass(PassRegistry&);
extern char &SIModeRegisterID;
+void initializeAMDGPUReleaseVGPRsPass(PassRegistry &);
+extern char &AMDGPUReleaseVGPRsID;
+
+void initializeAMDGPUInsertDelayAluPass(PassRegistry &);
+extern char &AMDGPUInsertDelayAluID;
+
void initializeSIInsertHardClausesPass(PassRegistry &);
extern char &SIInsertHardClausesID;
@@ -335,6 +337,9 @@ extern char &GCNNSAReassignID;
void initializeGCNPreRAOptimizationsPass(PassRegistry &);
extern char &GCNPreRAOptimizationsID;
+FunctionPass *createAMDGPUSetWavePriorityPass();
+void initializeAMDGPUSetWavePriorityPass(PassRegistry &);
+
namespace AMDGPU {
enum TargetIndex {
TI_CONSTDATA_START,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 806c0b18637a..48b5814cd482 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -86,6 +86,12 @@ def FeatureScalarFlatScratchInsts : SubtargetFeature<"scalar-flat-scratch-insts"
"Have s_scratch_* flat memory instructions"
>;
+def FeatureEnableFlatScratch : SubtargetFeature<"enable-flat-scratch",
+ "EnableFlatScratch",
+ "true",
+ "Use scratch_* flat memory instructions to access scratch"
+>;
+
def FeatureAddNoCarryInsts : SubtargetFeature<"add-no-carry-insts",
"AddNoCarryInsts",
"true",
@@ -171,6 +177,12 @@ def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
"VI SGPR initialization bug requiring a fixed SGPR allocation size"
>;
+def FeatureUserSGPRInit16Bug : SubtargetFeature<"user-sgpr-init16-bug",
+ "UserSGPRInit16Bug",
+ "true",
+ "Bug requiring at least 16 user+system SGPRs to be enabled"
+>;
+
def FeatureLdsMisalignedBug : SubtargetFeature<"lds-misaligned-bug",
"LDSMisalignedBug",
"true",
@@ -307,12 +319,24 @@ def FeatureGFX90AInsts : SubtargetFeature<"gfx90a-insts",
"Additional instructions for GFX90A+"
>;
+def FeatureGFX940Insts : SubtargetFeature<"gfx940-insts",
+ "GFX940Insts",
+ "true",
+ "Additional instructions for GFX940+"
+>;
+
def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts",
"GFX10Insts",
"true",
"Additional instructions for GFX10+"
>;
+def FeatureGFX11Insts : SubtargetFeature<"gfx11-insts",
+ "GFX11Insts",
+ "true",
+ "Additional instructions for GFX11+"
+>;
+
def FeatureGFX10_3Insts : SubtargetFeature<"gfx10-3-insts",
"GFX10_3Insts",
"true",
@@ -343,6 +367,12 @@ def Feature16BitInsts : SubtargetFeature<"16-bit-insts",
"Has i16/f16 instructions"
>;
+def FeatureTrue16BitInsts : SubtargetFeature<"true16",
+ "HasTrue16BitInsts",
+ "true",
+ "True 16-bit operand instructions"
+>;
+
def FeatureVOP3P : SubtargetFeature<"vop3p",
"HasVOP3PInsts",
"true",
@@ -458,6 +488,12 @@ def FeatureNSAEncoding : SubtargetFeature<"nsa-encoding",
"Support NSA encoding for image instructions"
>;
+def FeatureImageInsts : SubtargetFeature<"image-insts",
+ "HasImageInsts",
+ "true",
+ "Support image instructions"
+>;
+
def FeatureExtendedImageInsts : SubtargetFeature<"extended-image-insts",
"HasExtendedImageInsts",
"true",
@@ -536,6 +572,13 @@ def FeatureDot7Insts : SubtargetFeature<"dot7-insts",
"Has v_dot2_f32_f16, v_dot4_u32_u8, v_dot8_u32_u4 instructions"
>;
+def FeatureDot8Insts : SubtargetFeature<"dot8-insts",
+ "HasDot8Insts",
+ "true",
+ "Has v_dot2_f16_f16, v_dot2_bf16_bf16, v_dot2_f32_bf16, "
+ "v_dot4_i32_iu8, v_dot8_i32_iu4 instructions"
+>;
+
def FeatureMAIInsts : SubtargetFeature<"mai-insts",
"HasMAIInsts",
"true",
@@ -548,11 +591,28 @@ def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst",
"Has v_pk_fmac_f16 instruction"
>;
-def FeatureAtomicFaddInsts : SubtargetFeature<"atomic-fadd-insts",
- "HasAtomicFaddInsts",
+def FeatureAtomicFaddRtnInsts : SubtargetFeature<"atomic-fadd-rtn-insts",
+ "HasAtomicFaddRtnInsts",
"true",
- "Has buffer_atomic_add_f32, buffer_atomic_pk_add_f16, global_atomic_add_f32, "
- "global_atomic_pk_add_f16 instructions",
+ "Has buffer_atomic_add_f32 and global_atomic_add_f32 instructions that "
+ "return original value",
+ [FeatureFlatGlobalInsts]
+>;
+
+def FeatureAtomicFaddNoRtnInsts : SubtargetFeature<"atomic-fadd-no-rtn-insts",
+ "HasAtomicFaddNoRtnInsts",
+ "true",
+ "Has buffer_atomic_add_f32 and global_atomic_add_f32 instructions that "
+ "don't return original value",
+ [FeatureFlatGlobalInsts]
+>;
+
+def FeatureAtomicPkFaddNoRtnInsts
+ : SubtargetFeature<"atomic-pk-fadd-no-rtn-insts",
+ "HasAtomicPkFaddNoRtnInsts",
+ "true",
+ "Has buffer_atomic_pk_add_f16 and global_atomic_pk_add_f16 instructions that "
+ "don't return original value",
[FeatureFlatGlobalInsts]
>;
@@ -632,6 +692,12 @@ class SubtargetFeatureNSAMaxSize <int Value> : SubtargetFeature <
def FeatureNSAMaxSize5 : SubtargetFeatureNSAMaxSize<5>;
def FeatureNSAMaxSize13 : SubtargetFeatureNSAMaxSize<13>;
+def FeatureVOPD : SubtargetFeature<"vopd",
+ "HasVOPDInsts",
+ "true",
+ "Has VOPD dual issue wave32 instructions"
+>;
+
//===------------------------------------------------------------===//
// Subtarget Features (options and debugging)
//===------------------------------------------------------------===//
@@ -762,7 +828,7 @@ def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
[FeatureFP64, FeatureLocalMemorySize32768, FeatureMIMG_R128,
FeatureWavefrontSize64, FeatureSMemTimeInst, FeatureMadMacF32Insts,
FeatureDsSrc2Insts, FeatureLDSBankCount32, FeatureMovrel,
- FeatureTrigReducedRange, FeatureExtendedImageInsts
+ FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts
]
>;
@@ -772,7 +838,8 @@ def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
FeatureWavefrontSize64, FeatureFlatAddressSpace,
FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange,
FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
- FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureUnalignedBufferAccess
+ FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureUnalignedBufferAccess,
+ FeatureImageInsts
]
>;
@@ -787,7 +854,7 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
FeatureIntClamp, FeatureTrigReducedRange, FeatureGFX8Insts,
FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureFastDenormalF32,
- FeatureUnalignedBufferAccess
+ FeatureUnalignedBufferAccess, FeatureImageInsts
]
>;
@@ -824,6 +891,25 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
FeatureVOP3Literal, FeatureDPP8, FeatureExtendedImageInsts,
FeatureNoDataDepHazard, FeaturePkFmacF16Inst,
FeatureGFX10A16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureG16,
+ FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureImageInsts
+ ]
+>;
+
+def FeatureGFX11 : GCNSubtargetFeatureGeneration<"GFX11",
+ "gfx11",
+ [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
+ FeatureFlatAddressSpace, Feature16BitInsts,
+ FeatureInv2PiInlineImm, FeatureApertureRegs,
+ FeatureCIInsts, FeatureGFX8Insts, FeatureGFX9Insts, FeatureGFX10Insts,
+ FeatureGFX10_AEncoding, FeatureGFX10_BEncoding, FeatureGFX10_3Insts,
+ FeatureGFX11Insts, FeatureVOP3P, FeatureVOPD, FeatureTrue16BitInsts,
+ FeatureMovrel, FeatureFastFMAF32, FeatureDPP, FeatureIntClamp,
+ FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
+ FeatureAddNoCarryInsts, FeatureFmaMixInsts,
+ FeatureNoSdstCMPX, FeatureVscnt,
+ FeatureVOP3Literal, FeatureDPP8, FeatureExtendedImageInsts,
+ FeatureNoDataDepHazard, FeaturePkFmacF16Inst,
+ FeatureGFX10A16, FeatureFastDenormalF32, FeatureG16,
FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess
]
>;
@@ -910,6 +996,7 @@ def FeatureISAVersion9_0_0 : FeatureSet<
FeatureLDSBankCount32,
FeatureDsSrc2Insts,
FeatureExtendedImageInsts,
+ FeatureImageInsts,
FeatureMadMacF32Insts,
FeatureImageGather4D16Bug]>;
@@ -919,6 +1006,7 @@ def FeatureISAVersion9_0_2 : FeatureSet<
FeatureLDSBankCount32,
FeatureDsSrc2Insts,
FeatureExtendedImageInsts,
+ FeatureImageInsts,
FeatureMadMacF32Insts,
FeatureImageGather4D16Bug]>;
@@ -927,6 +1015,7 @@ def FeatureISAVersion9_0_4 : FeatureSet<
FeatureLDSBankCount32,
FeatureDsSrc2Insts,
FeatureExtendedImageInsts,
+ FeatureImageInsts,
FeatureMadMacF32Insts,
FeatureFmaMixInsts,
FeatureImageGather4D16Bug]>;
@@ -938,6 +1027,7 @@ def FeatureISAVersion9_0_6 : FeatureSet<
FeatureLDSBankCount32,
FeatureDsSrc2Insts,
FeatureExtendedImageInsts,
+ FeatureImageInsts,
FeatureMadMacF32Insts,
FeatureDLInsts,
FeatureDot1Insts,
@@ -953,6 +1043,7 @@ def FeatureISAVersion9_0_8 : FeatureSet<
FeatureLDSBankCount32,
FeatureDsSrc2Insts,
FeatureExtendedImageInsts,
+ FeatureImageInsts,
FeatureMadMacF32Insts,
FeatureDLInsts,
FeatureDot1Insts,
@@ -964,7 +1055,8 @@ def FeatureISAVersion9_0_8 : FeatureSet<
FeatureDot7Insts,
FeatureMAIInsts,
FeaturePkFmacF16Inst,
- FeatureAtomicFaddInsts,
+ FeatureAtomicFaddNoRtnInsts,
+ FeatureAtomicPkFaddNoRtnInsts,
FeatureSupportsSRAMECC,
FeatureMFMAInlineLiteralBug,
FeatureImageGather4D16Bug]>;
@@ -975,6 +1067,7 @@ def FeatureISAVersion9_0_9 : FeatureSet<
FeatureLDSBankCount32,
FeatureDsSrc2Insts,
FeatureExtendedImageInsts,
+ FeatureImageInsts,
FeatureMadMacF32Insts,
FeatureImageGather4D16Bug]>;
@@ -995,7 +1088,10 @@ def FeatureISAVersion9_0_A : FeatureSet<
FeaturePackedFP32Ops,
FeatureMAIInsts,
FeaturePkFmacF16Inst,
- FeatureAtomicFaddInsts,
+ FeatureAtomicFaddRtnInsts,
+ FeatureAtomicFaddNoRtnInsts,
+ FeatureAtomicPkFaddNoRtnInsts,
+ FeatureImageInsts,
FeatureMadMacF32Insts,
FeatureSupportsSRAMECC,
FeaturePackedTID,
@@ -1007,9 +1103,36 @@ def FeatureISAVersion9_0_C : FeatureSet<
FeatureLDSBankCount32,
FeatureDsSrc2Insts,
FeatureExtendedImageInsts,
+ FeatureImageInsts,
FeatureMadMacF32Insts,
FeatureImageGather4D16Bug]>;
+def FeatureISAVersion9_4_0 : FeatureSet<
+ [FeatureGFX9,
+ FeatureGFX90AInsts,
+ FeatureGFX940Insts,
+ FeatureFmaMixInsts,
+ FeatureLDSBankCount32,
+ FeatureDLInsts,
+ FeatureDot1Insts,
+ FeatureDot2Insts,
+ FeatureDot3Insts,
+ FeatureDot4Insts,
+ FeatureDot5Insts,
+ FeatureDot6Insts,
+ FeatureDot7Insts,
+ Feature64BitDPP,
+ FeaturePackedFP32Ops,
+ FeatureMAIInsts,
+ FeaturePkFmacF16Inst,
+ FeatureAtomicFaddRtnInsts,
+ FeatureAtomicFaddNoRtnInsts,
+ FeatureAtomicPkFaddNoRtnInsts,
+ FeatureSupportsSRAMECC,
+ FeaturePackedTID,
+ FeatureArchitectedFlatScratch,
+ FullRate64Ops]>;
+
// TODO: Organize more features into groups.
def FeatureGroup {
// Bugs present on gfx10.1.
@@ -1124,6 +1247,33 @@ def FeatureISAVersion10_3_0 : FeatureSet<
FeatureWavefrontSize32,
FeatureShaderCyclesRegister]>;
+def FeatureISAVersion11_Common : FeatureSet<
+ [FeatureGFX11,
+ FeatureLDSBankCount32,
+ FeatureDLInsts,
+ FeatureDot5Insts,
+ FeatureDot7Insts,
+ FeatureDot8Insts,
+ FeatureNSAEncoding,
+ FeatureNSAMaxSize5,
+ FeatureWavefrontSize32,
+ FeatureShaderCyclesRegister,
+ FeatureArchitectedFlatScratch,
+ FeatureAtomicFaddRtnInsts,
+ FeatureAtomicFaddNoRtnInsts,
+ FeatureImageInsts,
+ FeaturePackedTID,
+ FeatureVcmpxPermlaneHazard]>;
+
+// Features for GFX 11.0.0 and 11.0.1
+def FeatureISAVersion11_0 : FeatureSet<
+ !listconcat(FeatureISAVersion11_Common.Features,
+ [FeatureUserSGPRInit16Bug])>;
+
+def FeatureISAVersion11_0_2 : FeatureSet<
+ !listconcat(FeatureISAVersion11_Common.Features,
+ [FeatureUserSGPRInit16Bug])>;
+
//===----------------------------------------------------------------------===//
def AMDGPUInstrInfo : InstrInfo {
@@ -1152,8 +1302,10 @@ def AMDGPUAsmVariants {
int SDWA9_ID = 3;
string DPP = "DPP";
int DPP_ID = 4;
+ string VOP3_DPP = "VOP3_DPP";
+ int VOP3_DPP_ID = 5;
string Disable = "Disable";
- int Disable_ID = 5;
+ int Disable_ID = 6;
}
def DefaultAMDGPUAsmParserVariant : AsmParserVariant {
@@ -1176,12 +1328,16 @@ def SDWA9AsmParserVariant : AsmParserVariant {
let Name = AMDGPUAsmVariants.SDWA9;
}
-
def DPPAsmParserVariant : AsmParserVariant {
let Variant = AMDGPUAsmVariants.DPP_ID;
let Name = AMDGPUAsmVariants.DPP;
}
+def VOP3_DPPAsmParserVariant : AsmParserVariant {
+ let Variant = AMDGPUAsmVariants.VOP3_DPP_ID;
+ let Name = AMDGPUAsmVariants.VOP3_DPP;
+}
+
def AMDGPU : Target {
// Pull in Instruction Info:
let InstructionSet = AMDGPUInstrInfo;
@@ -1190,7 +1346,8 @@ def AMDGPU : Target {
VOP3AsmParserVariant,
SDWAAsmParserVariant,
SDWA9AsmParserVariant,
- DPPAsmParserVariant];
+ DPPAsmParserVariant,
+ VOP3_DPPAsmParserVariant];
let AssemblyWriters = [AMDGPUAsmWriter];
let AllowRegisterRenaming = 1;
}
@@ -1216,6 +1373,12 @@ def isGFX6GFX7GFX10 :
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
"Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">,
+ AssemblerPredicate<(all_of (not FeatureGCN3Encoding), (not FeatureGFX11Insts))>;
+
+def isGFX6GFX7GFX10Plus :
+ Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
+ "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
+ "Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10">,
AssemblerPredicate<(all_of (not FeatureGCN3Encoding))>;
def isGFX7Only :
@@ -1225,6 +1388,12 @@ def isGFX7Only :
def isGFX7GFX10 :
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
"Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">,
+ AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureCIInsts, (not FeatureGFX11Insts))>;
+
+def isGFX7GFX10GFX11 :
+ Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
+ "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10 ||"
+ "Subtarget->getGeneration() == AMDGPUSubtarget::GFX11">,
AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureCIInsts)>;
def isGFX7GFX8GFX9 :
@@ -1248,6 +1417,21 @@ def isGFX6GFX7GFX8GFX9NotGFX90A :
" Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">,
AssemblerPredicate<(all_of (not FeatureGFX10Insts), (not FeatureGFX90AInsts))>;
+def isGFX6GFX7GFX8GFX9GFX10 :
+ Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
+ "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
+ "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
+ "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9 ||"
+ "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">,
+ AssemblerPredicate<(all_of (not FeatureGFX11Insts))>;
+
+def isGFX7GFX8GFX9GFX10 :
+ Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
+ "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
+ "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9 ||"
+ "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">,
+ AssemblerPredicate<(all_of FeatureCIInsts, (not FeatureGFX11Insts))>;
+
def isGFX7Plus :
Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS">,
AssemblerPredicate<(all_of FeatureCIInsts)>;
@@ -1287,18 +1471,37 @@ def isGFX8GFX9NotGFX90A :
AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding, (not FeatureGFX90AInsts))>;
def isGFX90AOnly :
- Predicate<"Subtarget->hasGFX90AInsts()">,
- AssemblerPredicate<(all_of FeatureGFX90AInsts)>;
+ Predicate<"Subtarget->hasGFX90AInsts() && !Subtarget->hasGFX940Insts()">,
+ AssemblerPredicate<(all_of FeatureGFX90AInsts, (not FeatureGFX940Insts))>;
def isGFX908orGFX90A :
- Predicate<"Subtarget->hasMAIInsts()">,
- AssemblerPredicate<(all_of FeatureMAIInsts)>;
+ Predicate<"Subtarget->hasMAIInsts() && !Subtarget->hasGFX940Insts()">,
+ AssemblerPredicate<(all_of FeatureMAIInsts, (not FeatureGFX940Insts))>;
+
+def isGFX940Plus :
+ Predicate<"Subtarget->hasGFX940Insts()">,
+ AssemblerPredicate<(all_of FeatureGFX940Insts)>;
+
+def isGFX940GFX11Plus :
+ Predicate<"Subtarget->hasGFX940Insts() ||"
+ "Subtarget->getGeneration() >= AMDGPUSubtarget::GFX11">,
+ AssemblerPredicate<(any_of FeatureGFX940Insts, FeatureGFX11Insts)>;
+
+def isGFX8GFX9NotGFX940 :
+ Predicate<"!Subtarget->hasGFX940Insts() &&"
+ "(Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
+ " Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">,
+ AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding, (not FeatureGFX940Insts))>;
def isGFX8GFX9 :
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
"Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding)>;
+def isGFX10Only :
+ Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">,
+ AssemblerPredicate<(all_of FeatureGFX10Insts, (not FeatureGFX11Insts))>;
+
def isGFX10Plus :
Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10">,
AssemblerPredicate<(all_of FeatureGFX10Insts)>;
@@ -1308,6 +1511,25 @@ def isGFX10Before1030 :
"!Subtarget->hasGFX10_3Insts()">,
AssemblerPredicate<(all_of FeatureGFX10Insts,(not FeatureGFX10_3Insts))>;
+def isGFX9GFX10 :
+ Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX9 ||"
+ "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">,
+ AssemblerPredicate<(all_of FeatureGFX9Insts, (not FeatureGFX11Insts))>;
+
+def isGFX8GFX9GFX10 :
+ Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
+ "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9 ||"
+ "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">,
+ AssemblerPredicate<(all_of FeatureGFX8Insts, (not FeatureGFX11Insts))>;
+
+def isGFX11Only :
+ Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX11">,
+ AssemblerPredicate<(all_of FeatureGFX11Insts)>;
+
+def isGFX11Plus :
+ Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX11">,
+ AssemblerPredicate<(all_of FeatureGFX11Insts)>;
+
def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">,
AssemblerPredicate<(all_of FeatureFlatAddressSpace)>;
@@ -1321,7 +1543,9 @@ def HasD16LoadStore : Predicate<"Subtarget->hasD16LoadStore()">,
AssemblerPredicate<(all_of FeatureGFX9Insts)>;
def HasFlatScratchSTMode : Predicate<"Subtarget->hasFlatScratchSTMode()">,
- AssemblerPredicate<(any_of FeatureGFX10_3Insts)>;
+ AssemblerPredicate<(any_of FeatureGFX10_3Insts, FeatureGFX940Insts)>;
+def HasFlatScratchSVSMode : Predicate<"Subtarget->hasFlatScratchSVSMode()">,
+ AssemblerPredicate<(any_of FeatureGFX940Insts, FeatureGFX11Insts)>;
def HasGFX10_AEncoding : Predicate<"Subtarget->hasGFX10_AEncoding()">,
AssemblerPredicate<(all_of FeatureGFX10_AEncoding)>;
@@ -1354,6 +1578,11 @@ def NotHasAddNoCarryInsts : Predicate<"!Subtarget->hasAddNoCarry()">;
def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">,
AssemblerPredicate<(all_of Feature16BitInsts)>;
+
+def HasTrue16BitInsts : Predicate<"Subtarget->hasTrue16BitInsts()">,
+ AssemblerPredicate<(all_of FeatureTrue16BitInsts)>;
+def NotHasTrue16BitInsts : Predicate<"!Subtarget->hasTrue16BitInsts()">;
+
def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">,
AssemblerPredicate<(all_of FeatureVOP3P)>;
@@ -1385,7 +1614,10 @@ def HasPackedFP32Ops : Predicate<"Subtarget->hasPackedFP32Ops()">,
def HasFmaakFmamkF32Insts :
Predicate<"Subtarget->hasFmaakFmamkF32Insts()">,
- AssemblerPredicate<(any_of FeatureGFX10Insts)>;
+ AssemblerPredicate<(any_of FeatureGFX10Insts, FeatureGFX940Insts)>;
+
+def HasImageInsts : Predicate<"Subtarget->hasImageInsts()">,
+ AssemblerPredicate<(all_of FeatureImageInsts)>;
def HasExtendedImageInsts : Predicate<"Subtarget->hasExtendedImageInsts()">,
AssemblerPredicate<(all_of FeatureExtendedImageInsts)>;
@@ -1454,6 +1686,9 @@ def HasDot6Insts : Predicate<"Subtarget->hasDot6Insts()">,
def HasDot7Insts : Predicate<"Subtarget->hasDot7Insts()">,
AssemblerPredicate<(all_of FeatureDot7Insts)>;
+def HasDot8Insts : Predicate<"Subtarget->hasDot8Insts()">,
+ AssemblerPredicate<(all_of FeatureDot8Insts)>;
+
def HasGetWaveIdInst : Predicate<"Subtarget->hasGetWaveIdInst()">,
AssemblerPredicate<(all_of FeatureGetWaveIdInst)>;
@@ -1478,8 +1713,13 @@ def HasMadMacF32Insts : Predicate<"Subtarget->hasMadMacF32Insts()">,
def HasFmaLegacy32 : Predicate<"Subtarget->hasGFX10_3Insts()">,
AssemblerPredicate<(any_of FeatureGFX10_3Insts)>;
-def HasAtomicFaddInsts : Predicate<"Subtarget->hasAtomicFaddInsts()">,
- AssemblerPredicate<(all_of FeatureAtomicFaddInsts)>;
+def HasAtomicFaddRtnInsts : Predicate<"Subtarget->hasAtomicFaddRtnInsts()">,
+ AssemblerPredicate<(all_of FeatureAtomicFaddRtnInsts)>;
+def HasAtomicFaddNoRtnInsts : Predicate<"Subtarget->hasAtomicFaddNoRtnInsts()">,
+ AssemblerPredicate<(all_of FeatureAtomicFaddNoRtnInsts)>;
+def HasAtomicPkFaddNoRtnInsts
+ : Predicate<"Subtarget->hasAtomicPkFaddNoRtnInsts()">,
+ AssemblerPredicate<(all_of FeatureAtomicPkFaddNoRtnInsts)>;
def HasDsSrc2Insts : Predicate<"!Subtarget->hasDsSrc2Insts()">,
AssemblerPredicate<(all_of FeatureDsSrc2Insts)>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
index bebf032b5535..74be0336851c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
@@ -14,12 +14,11 @@
#include "AMDGPU.h"
#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/ADT/SmallSet.h"
+#include "Utils/AMDGPUMemoryUtils.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/IR/InstVisitor.h"
-#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/InitializePasses.h"
#define DEBUG_TYPE "amdgpu-annotate-uniform"
@@ -33,8 +32,18 @@ class AMDGPUAnnotateUniformValues : public FunctionPass,
LegacyDivergenceAnalysis *DA;
MemorySSA *MSSA;
AliasAnalysis *AA;
- DenseMap<Value*, GetElementPtrInst*> noClobberClones;
bool isEntryFunc;
+ bool Changed;
+
+ void setUniformMetadata(Instruction *I) {
+ I->setMetadata("amdgpu.uniform", MDNode::get(I->getContext(), {}));
+ Changed = true;
+ }
+
+ void setNoClobberMetadata(Instruction *I) {
+ I->setMetadata("amdgpu.noclobber", MDNode::get(I->getContext(), {}));
+ Changed = true;
+ }
public:
static char ID;
@@ -54,7 +63,6 @@ public:
void visitBranchInst(BranchInst &I);
void visitLoadInst(LoadInst &I);
- bool isClobberedInFunction(LoadInst * Load);
};
} // End anonymous namespace
@@ -69,88 +77,6 @@ INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
char AMDGPUAnnotateUniformValues::ID = 0;
-static void setUniformMetadata(Instruction *I) {
- I->setMetadata("amdgpu.uniform", MDNode::get(I->getContext(), {}));
-}
-static void setNoClobberMetadata(Instruction *I) {
- I->setMetadata("amdgpu.noclobber", MDNode::get(I->getContext(), {}));
-}
-
-bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst *Load) {
- MemorySSAWalker *Walker = MSSA->getWalker();
- SmallVector<MemoryAccess *> WorkList{Walker->getClobberingMemoryAccess(Load)};
- SmallSet<MemoryAccess *, 8> Visited;
- MemoryLocation Loc(MemoryLocation::get(Load));
-
- const auto isReallyAClobber = [this, Load](MemoryDef *Def) -> bool {
- Instruction *DefInst = Def->getMemoryInst();
- LLVM_DEBUG(dbgs() << " Def: " << *DefInst << '\n');
-
- if (isa<FenceInst>(DefInst))
- return false;
-
- if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) {
- switch (II->getIntrinsicID()) {
- case Intrinsic::amdgcn_s_barrier:
- case Intrinsic::amdgcn_wave_barrier:
- return false;
- default:
- break;
- }
- }
-
- // Ignore atomics not aliasing with the original load, any atomic is a
- // universal MemoryDef from MSSA's point of view too, just like a fence.
- const auto checkNoAlias = [this, Load](auto I) -> bool {
- return I && AA->isNoAlias(I->getPointerOperand(),
- Load->getPointerOperand());
- };
-
- if (checkNoAlias(dyn_cast<AtomicCmpXchgInst>(DefInst)) ||
- checkNoAlias(dyn_cast<AtomicRMWInst>(DefInst)))
- return false;
-
- return true;
- };
-
- LLVM_DEBUG(dbgs() << "Checking clobbering of: " << *Load << '\n');
-
- // Start with a nearest dominating clobbering access, it will be either
- // live on entry (nothing to do, load is not clobbered), MemoryDef, or
- // MemoryPhi if several MemoryDefs can define this memory state. In that
- // case add all Defs to WorkList and continue going up and checking all
- // the definitions of this memory location until the root. When all the
- // defs are exhausted and came to the entry state we have no clobber.
- // Along the scan ignore barriers and fences which are considered clobbers
- // by the MemorySSA, but not really writing anything into the memory.
- while (!WorkList.empty()) {
- MemoryAccess *MA = WorkList.pop_back_val();
- if (!Visited.insert(MA).second)
- continue;
-
- if (MSSA->isLiveOnEntryDef(MA))
- continue;
-
- if (MemoryDef *Def = dyn_cast<MemoryDef>(MA)) {
- if (isReallyAClobber(Def)) {
- LLVM_DEBUG(dbgs() << " -> load is clobbered\n");
- return true;
- }
-
- WorkList.push_back(
- Walker->getClobberingMemoryAccess(Def->getDefiningAccess(), Loc));
- continue;
- }
-
- const MemoryPhi *Phi = cast<MemoryPhi>(MA);
- for (auto &Use : Phi->incoming_values())
- WorkList.push_back(cast<MemoryAccess>(&Use));
- }
-
- LLVM_DEBUG(dbgs() << " -> no clobber\n");
- return false;
-}
-
void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) {
if (DA->isUniform(&I))
setUniformMetadata(&I);
@@ -160,46 +86,18 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
Value *Ptr = I.getPointerOperand();
if (!DA->isUniform(Ptr))
return;
+ Instruction *PtrI = dyn_cast<Instruction>(Ptr);
+ if (PtrI)
+ setUniformMetadata(PtrI);
+
// We're tracking up to the Function boundaries, and cannot go beyond because
// of FunctionPass restrictions. We can ensure that is memory not clobbered
// for memory operations that are live in to entry points only.
- Instruction *PtrI = dyn_cast<Instruction>(Ptr);
-
- if (!isEntryFunc) {
- if (PtrI)
- setUniformMetadata(PtrI);
+ if (!isEntryFunc)
return;
- }
-
- bool NotClobbered = false;
bool GlobalLoad = I.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
- if (PtrI)
- NotClobbered = GlobalLoad && !isClobberedInFunction(&I);
- else if (isa<Argument>(Ptr) || isa<GlobalValue>(Ptr)) {
- if (GlobalLoad && !isClobberedInFunction(&I)) {
- NotClobbered = true;
- // Lookup for the existing GEP
- if (noClobberClones.count(Ptr)) {
- PtrI = noClobberClones[Ptr];
- } else {
- // Create GEP of the Value
- Function *F = I.getParent()->getParent();
- Value *Idx = Constant::getIntegerValue(
- Type::getInt32Ty(Ptr->getContext()), APInt(64, 0));
- // Insert GEP at the entry to make it dominate all uses
- PtrI = GetElementPtrInst::Create(I.getType(), Ptr,
- ArrayRef<Value *>(Idx), Twine(""),
- F->getEntryBlock().getFirstNonPHI());
- }
- I.replaceUsesOfWith(Ptr, PtrI);
- }
- }
-
- if (PtrI) {
- setUniformMetadata(PtrI);
- if (NotClobbered)
- setNoClobberMetadata(PtrI);
- }
+ if (GlobalLoad && !AMDGPU::isClobberedInFunction(&I, MSSA, AA))
+ setNoClobberMetadata(&I);
}
bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) {
@@ -215,9 +113,9 @@ bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) {
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
isEntryFunc = AMDGPU::isEntryFunctionCC(F.getCallingConv());
+ Changed = false;
visit(F);
- noClobberClones.clear();
- return true;
+ return Changed;
}
FunctionPass *
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 6e2984f2a04f..57a4660bc1eb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -27,6 +27,8 @@
#include "SIMachineFunctionInfo.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
@@ -34,6 +36,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/AMDHSAKernelDescriptor.h"
+#include "llvm/Support/TargetParser.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
@@ -111,6 +114,12 @@ AMDGPUTargetStreamer* AMDGPUAsmPrinter::getTargetStreamer() const {
}
void AMDGPUAsmPrinter::emitStartOfAsmFile(Module &M) {
+ IsTargetStreamerInitialized = false;
+}
+
+void AMDGPUAsmPrinter::initTargetStreamer(Module &M) {
+ IsTargetStreamerInitialized = true;
+
// TODO: Which one is called first, emitStartOfAsmFile or
// emitFunctionBodyStart?
if (getTargetStreamer() && !getTargetStreamer()->getTargetID())
@@ -143,6 +152,10 @@ void AMDGPUAsmPrinter::emitStartOfAsmFile(Module &M) {
}
void AMDGPUAsmPrinter::emitEndOfAsmFile(Module &M) {
+ // Init target streamer if it has not yet happened
+ if (!IsTargetStreamerInitialized)
+ initTargetStreamer(M);
+
// Following code requires TargetStreamer to be present.
if (!getTargetStreamer())
return;
@@ -234,8 +247,8 @@ void AMDGPUAsmPrinter::emitFunctionBodyEnd() {
auto &ObjectFileInfo = *Context.getObjectFileInfo();
auto &ReadOnlySection = *ObjectFileInfo.getReadOnlySection();
- Streamer.PushSection();
- Streamer.SwitchSection(&ReadOnlySection);
+ Streamer.pushSection();
+ Streamer.switchSection(&ReadOnlySection);
// CP microcode requires the kernel descriptor to be allocated on 64 byte
// alignment.
@@ -256,7 +269,7 @@ void AMDGPUAsmPrinter::emitFunctionBodyEnd() {
CurrentProgramInfo.FlatUsed),
CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed);
- Streamer.PopSection();
+ Streamer.popSection();
}
void AMDGPUAsmPrinter::emitFunctionEntryLabel() {
@@ -319,7 +332,7 @@ void AMDGPUAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
const DataLayout &DL = GV->getParent()->getDataLayout();
uint64_t Size = DL.getTypeAllocSize(GV->getValueType());
- Align Alignment = GV->getAlign().getValueOr(Align(4));
+ Align Alignment = GV->getAlign().value_or(Align(4));
emitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration());
emitLinkage(GV, GVSym);
@@ -339,7 +352,7 @@ bool AMDGPUAsmPrinter::doFinalization(Module &M) {
if ((AMDGPU::isGFX10Plus(STI) || AMDGPU::isGFX90A(STI)) &&
(STI.getTargetTriple().getOS() == Triple::AMDHSA ||
STI.getTargetTriple().getOS() == Triple::AMDPAL)) {
- OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
+ OutStreamer->switchSection(getObjFileLowering().getTextSection());
getTargetStreamer()->EmitCodeEnd(STI);
}
@@ -381,7 +394,7 @@ uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
KernelCodeProperties |=
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
}
- if (MFI.hasQueuePtr()) {
+ if (MFI.hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5) {
KernelCodeProperties |=
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
}
@@ -437,6 +450,11 @@ amdhsa::kernel_descriptor_t AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(
}
bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ // Init target streamer lazily on the first function so that previous passes
+ // can set metadata.
+ if (!IsTargetStreamerInitialized)
+ initTargetStreamer(*MF.getFunction().getParent());
+
ResourceUsage = &getAnalysis<AMDGPUResourceUsageAnalysis>();
CurrentProgramInfo = SIProgramInfo();
@@ -454,7 +472,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
if (!STM.isAmdHsaOS() && !STM.isAmdPalOS()) {
MCSectionELF *ConfigSection =
Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0);
- OutStreamer->SwitchSection(ConfigSection);
+ OutStreamer->switchSection(ConfigSection);
}
if (MFI->isModuleEntryFunction()) {
@@ -491,7 +509,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
if (isVerbose()) {
MCSectionELF *CommentSection =
Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0);
- OutStreamer->SwitchSection(CommentSection);
+ OutStreamer->switchSection(CommentSection);
if (!MFI->isEntryFunction()) {
OutStreamer->emitRawComment(" Function info:", false);
@@ -590,7 +608,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
if (DumpCodeInstEmitter) {
- OutStreamer->SwitchSection(
+ OutStreamer->switchSection(
Context.getELFSection(".AMDGPU.disasm", ELF::SHT_PROGBITS, 0));
for (size_t i = 0; i < DisasmLines.size(); ++i) {
@@ -677,7 +695,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
ProgInfo.DynamicCallStack = Info.HasDynamicallySizedStack || Info.HasRecursion;
const uint64_t MaxScratchPerWorkitem =
- GCNSubtarget::MaxWaveScratchSize / STM.getWavefrontSize();
+ STM.getMaxWaveScratchSize() / STM.getWavefrontSize();
if (ProgInfo.ScratchSize > MaxScratchPerWorkitem) {
DiagnosticInfoStackSize DiagStackSize(MF.getFunction(),
ProgInfo.ScratchSize,
@@ -857,22 +875,18 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
LDSAlignShift = 9;
}
- unsigned LDSSpillSize =
- MFI->getLDSWaveSpillSize() * MFI->getMaxFlatWorkGroupSize();
-
- ProgInfo.LDSSize = MFI->getLDSSize() + LDSSpillSize;
+ ProgInfo.LDSSize = MFI->getLDSSize();
ProgInfo.LDSBlocks =
alignTo(ProgInfo.LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift;
- // Scratch is allocated in 256 dword blocks.
- unsigned ScratchAlignShift = 10;
+ // Scratch is allocated in 64-dword or 256-dword blocks.
+ unsigned ScratchAlignShift =
+ STM.getGeneration() >= AMDGPUSubtarget::GFX11 ? 8 : 10;
// We need to program the hardware with the amount of scratch memory that
// is used by the entire wave. ProgInfo.ScratchSize is the amount of
// scratch memory used per thread.
- ProgInfo.ScratchBlocks =
- alignTo(ProgInfo.ScratchSize * STM.getWavefrontSize(),
- 1ULL << ScratchAlignShift) >>
- ScratchAlignShift;
+ ProgInfo.ScratchBlocks = divideCeil(
+ ProgInfo.ScratchSize * STM.getWavefrontSize(), 1ULL << ScratchAlignShift);
if (getIsaVersion(getGlobalSTI()->getCPU()).Major >= 10) {
ProgInfo.WgpMode = STM.isCuModeEnabled() ? 0 : 1;
@@ -886,8 +900,14 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
else if (MFI->hasWorkItemIDY())
TIDIGCompCnt = 1;
+ // The private segment wave byte offset is the last of the system SGPRs. We
+ // initially assumed it was allocated, and may have used it. It shouldn't harm
+ // anything to disable it if we know the stack isn't used here. We may still
+ // have emitted code reading it to initialize scratch, but if that's unused
+ // reading garbage should be OK.
+ const bool EnablePrivateSegment = ProgInfo.ScratchBlocks > 0;
ProgInfo.ComputePGMRSrc2 =
- S_00B84C_SCRATCH_EN(ProgInfo.ScratchBlocks > 0) |
+ S_00B84C_SCRATCH_EN(EnablePrivateSegment) |
S_00B84C_USER_SGPR(MFI->getNumUserSGPRs()) |
// For AMDHSA, TRAP_HANDLER must be zero, as it is populated by the CP.
S_00B84C_TRAP_HANDLER(STM.isAmdHsaOS() ? 0 : STM.isTrapHandlerEnabled()) |
@@ -931,6 +951,7 @@ static unsigned getRsrcReg(CallingConv::ID CallConv) {
void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
const SIProgramInfo &CurrentProgramInfo) {
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
unsigned RsrcReg = getRsrcReg(MF.getFunction().getCallingConv());
if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {
@@ -942,7 +963,10 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
OutStreamer->emitInt32(CurrentProgramInfo.ComputePGMRSrc2);
OutStreamer->emitInt32(R_00B860_COMPUTE_TMPRING_SIZE);
- OutStreamer->emitInt32(S_00B860_WAVESIZE(CurrentProgramInfo.ScratchBlocks));
+ OutStreamer->emitInt32(
+ STM.getGeneration() >= AMDGPUSubtarget::GFX11
+ ? S_00B860_WAVESIZE_GFX11Plus(CurrentProgramInfo.ScratchBlocks)
+ : S_00B860_WAVESIZE_PreGFX11(CurrentProgramInfo.ScratchBlocks));
// TODO: Should probably note flat usage somewhere. SC emits a "FlatPtr32 =
// 0" comment but I don't see a corresponding field in the register spec.
@@ -951,14 +975,18 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
OutStreamer->emitIntValue(S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) |
S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks), 4);
OutStreamer->emitInt32(R_0286E8_SPI_TMPRING_SIZE);
- OutStreamer->emitIntValue(
- S_0286E8_WAVESIZE(CurrentProgramInfo.ScratchBlocks), 4);
+ OutStreamer->emitInt32(
+ STM.getGeneration() >= AMDGPUSubtarget::GFX11
+ ? S_0286E8_WAVESIZE_GFX11Plus(CurrentProgramInfo.ScratchBlocks)
+ : S_0286E8_WAVESIZE_PreGFX11(CurrentProgramInfo.ScratchBlocks));
}
if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) {
OutStreamer->emitInt32(R_00B02C_SPI_SHADER_PGM_RSRC2_PS);
- OutStreamer->emitInt32(
- S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks));
+ unsigned ExtraLDSSize = STM.getGeneration() >= AMDGPUSubtarget::GFX11
+ ? divideCeil(CurrentProgramInfo.LDSBlocks, 2)
+ : CurrentProgramInfo.LDSBlocks;
+ OutStreamer->emitInt32(S_00B02C_EXTRA_LDS_SIZE(ExtraLDSSize));
OutStreamer->emitInt32(R_0286CC_SPI_PS_INPUT_ENA);
OutStreamer->emitInt32(MFI->getPSInputEnable());
OutStreamer->emitInt32(R_0286D0_SPI_PS_INPUT_ADDR);
@@ -984,6 +1012,13 @@ void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,
MD->setEntryPoint(CC, MF.getFunction().getName());
MD->setNumUsedVgprs(CC, CurrentProgramInfo.NumVGPRsForWavesPerEU);
+
+ // Only set AGPRs for supported devices
+ const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
+ if (STM.hasMAIInsts()) {
+ MD->setNumUsedAgprs(CC, CurrentProgramInfo.NumAccVGPR);
+ }
+
MD->setNumUsedSgprs(CC, CurrentProgramInfo.NumSGPRsForWavesPerEU);
MD->setRsrc1(CC, CurrentProgramInfo.getPGMRSrc1(CC));
if (AMDGPU::isCompute(CC)) {
@@ -995,12 +1030,14 @@ void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,
// ScratchSize is in bytes, 16 aligned.
MD->setScratchSize(CC, alignTo(CurrentProgramInfo.ScratchSize, 16));
if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) {
- MD->setRsrc2(CC, S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks));
+ unsigned ExtraLDSSize = STM.getGeneration() >= AMDGPUSubtarget::GFX11
+ ? divideCeil(CurrentProgramInfo.LDSBlocks, 2)
+ : CurrentProgramInfo.LDSBlocks;
+ MD->setRsrc2(CC, S_00B02C_EXTRA_LDS_SIZE(ExtraLDSSize));
MD->setSpiPsInputEna(MFI->getPSInputEnable());
MD->setSpiPsInputAddr(MFI->getPSInputAddr());
}
- const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
if (STM.isWave32())
MD->setWave32(MF.getFunction().getCallingConv());
}
@@ -1067,7 +1104,7 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
if (MFI->hasDispatchPtr())
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
- if (MFI->hasQueuePtr())
+ if (MFI->hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5)
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
if (MFI->hasKernargSegmentPtr())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index d5c60aa3be7d..ddda2cf107b1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -77,6 +77,8 @@ private:
const MachineFunction &MF,
const SIProgramInfo &PI) const;
+ void initTargetStreamer(Module &M);
+
public:
explicit AMDGPUAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer);
@@ -132,6 +134,7 @@ protected:
std::vector<std::string> DisasmLines, HexLines;
size_t DisasmLineMaxLen;
+ bool IsTargetStreamerInitialized;
};
} // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
index 1e2cf3890d0a..3ccfd9dde269 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
@@ -311,6 +311,12 @@ Value *AMDGPUAtomicOptimizer::buildReduction(IRBuilder<> &B,
if (ST->isWave32())
return V;
+ if (ST->hasPermLane64()) {
+ // Reduce across the upper and lower 32 lanes.
+ return buildNonAtomicBinOp(
+ B, Op, V, B.CreateIntrinsic(Intrinsic::amdgcn_permlane64, {}, V));
+ }
+
// Pick an arbitrary lane from 0..31 and an arbitrary lane from 32..63 and
// combine them with a scalar operation.
Function *ReadLane =
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def b/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def
new file mode 100644
index 000000000000..0a2cf3874245
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def
@@ -0,0 +1,31 @@
+//===--- AMDGPUAttributes.def ---------------------------------*- C++ -*---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains descriptions of the various function attributes
+// that indicate *absence* of the corresponding implicit kernel
+// arguments.
+//
+//===----------------------------------------------------------------------===//
+
+// NOTE: NO INCLUDE GUARD DESIRED!
+
+AMDGPU_ATTRIBUTE(DISPATCH_PTR, "amdgpu-no-dispatch-ptr")
+AMDGPU_ATTRIBUTE(QUEUE_PTR, "amdgpu-no-queue-ptr")
+AMDGPU_ATTRIBUTE(DISPATCH_ID, "amdgpu-no-dispatch-id")
+AMDGPU_ATTRIBUTE(IMPLICIT_ARG_PTR, "amdgpu-no-implicitarg-ptr")
+AMDGPU_ATTRIBUTE(MULTIGRID_SYNC_ARG, "amdgpu-no-multigrid-sync-arg")
+AMDGPU_ATTRIBUTE(HOSTCALL_PTR, "amdgpu-no-hostcall-ptr")
+AMDGPU_ATTRIBUTE(HEAP_PTR, "amdgpu-no-heap-ptr")
+AMDGPU_ATTRIBUTE(WORKGROUP_ID_X, "amdgpu-no-workgroup-id-x")
+AMDGPU_ATTRIBUTE(WORKGROUP_ID_Y, "amdgpu-no-workgroup-id-y")
+AMDGPU_ATTRIBUTE(WORKGROUP_ID_Z, "amdgpu-no-workgroup-id-z")
+AMDGPU_ATTRIBUTE(WORKITEM_ID_X, "amdgpu-no-workitem-id-x")
+AMDGPU_ATTRIBUTE(WORKITEM_ID_Y, "amdgpu-no-workitem-id-y")
+AMDGPU_ATTRIBUTE(WORKITEM_ID_Z, "amdgpu-no-workitem-id-z")
+
+#undef AMDGPU_ATTRIBUTE
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index b4ebc7d7d75f..8de0d7e6bff1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -12,6 +12,7 @@
#include "AMDGPU.h"
#include "GCNSubtarget.h"
+#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
@@ -22,37 +23,25 @@
using namespace llvm;
+#define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
+
+enum ImplicitArgumentPositions {
+ #include "AMDGPUAttributes.def"
+ LAST_ARG_POS
+};
+
+#define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
+
enum ImplicitArgumentMask {
NOT_IMPLICIT_INPUT = 0,
-
- // SGPRs
- DISPATCH_PTR = 1 << 0,
- QUEUE_PTR = 1 << 1,
- DISPATCH_ID = 1 << 2,
- IMPLICIT_ARG_PTR = 1 << 3,
- WORKGROUP_ID_X = 1 << 4,
- WORKGROUP_ID_Y = 1 << 5,
- WORKGROUP_ID_Z = 1 << 6,
-
- // VGPRS:
- WORKITEM_ID_X = 1 << 7,
- WORKITEM_ID_Y = 1 << 8,
- WORKITEM_ID_Z = 1 << 9,
- ALL_ARGUMENT_MASK = (1 << 10) - 1
+ #include "AMDGPUAttributes.def"
+ ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1
};
+#define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
static constexpr std::pair<ImplicitArgumentMask,
StringLiteral> ImplicitAttrs[] = {
- {DISPATCH_PTR, "amdgpu-no-dispatch-ptr"},
- {QUEUE_PTR, "amdgpu-no-queue-ptr"},
- {DISPATCH_ID, "amdgpu-no-dispatch-id"},
- {IMPLICIT_ARG_PTR, "amdgpu-no-implicitarg-ptr"},
- {WORKGROUP_ID_X, "amdgpu-no-workgroup-id-x"},
- {WORKGROUP_ID_Y, "amdgpu-no-workgroup-id-y"},
- {WORKGROUP_ID_Z, "amdgpu-no-workgroup-id-z"},
- {WORKITEM_ID_X, "amdgpu-no-workitem-id-x"},
- {WORKITEM_ID_Y, "amdgpu-no-workitem-id-y"},
- {WORKITEM_ID_Z, "amdgpu-no-workitem-id-z"}
+ #include "AMDGPUAttributes.def"
};
// We do not need to note the x workitem or workgroup id because they are always
@@ -61,7 +50,9 @@ static constexpr std::pair<ImplicitArgumentMask,
// TODO: We should not add the attributes if the known compile time workgroup
// size is 1 for y/z.
static ImplicitArgumentMask
-intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &IsQueuePtr) {
+intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
+ bool HasApertureRegs, bool SupportsGetDoorBellID) {
+ unsigned CodeObjectVersion = AMDGPU::getAmdhsaCodeObjectVersion();
switch (ID) {
case Intrinsic::amdgcn_workitem_id_x:
NonKernelOnly = true;
@@ -87,13 +78,23 @@ intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &IsQueuePtr) {
return DISPATCH_ID;
case Intrinsic::amdgcn_implicitarg_ptr:
return IMPLICIT_ARG_PTR;
+ // Need queue_ptr anyway. But under V5, we also need implicitarg_ptr to access
+ // queue_ptr.
case Intrinsic::amdgcn_queue_ptr:
+ NeedsImplicit = (CodeObjectVersion == 5);
+ return QUEUE_PTR;
case Intrinsic::amdgcn_is_shared:
case Intrinsic::amdgcn_is_private:
- // TODO: Does not require queue ptr on gfx9+
+ if (HasApertureRegs)
+ return NOT_IMPLICIT_INPUT;
+ // Under V5, we need implicitarg_ptr + offsets to access private_base or
+ // shared_base. For pre-V5, however, need to access them through queue_ptr +
+ // offsets.
+ return CodeObjectVersion == 5 ? IMPLICIT_ARG_PTR : QUEUE_PTR;
case Intrinsic::trap:
- case Intrinsic::debugtrap:
- IsQueuePtr = true;
+ if (SupportsGetDoorBellID) // GetDoorbellID support implemented since V4.
+ return CodeObjectVersion >= 4 ? NOT_IMPLICIT_INPUT : QUEUE_PTR;
+ NeedsImplicit = (CodeObjectVersion == 5); // Need impicitarg_ptr under V5.
return QUEUE_PTR;
default:
return NOT_IMPLICIT_INPUT;
@@ -114,7 +115,7 @@ static bool isDSAddress(const Constant *C) {
/// Returns true if the function requires the implicit argument be passed
/// regardless of the function contents.
-static bool funcRequiresImplicitArgPtr(const Function &F) {
+static bool funcRequiresHostcallPtr(const Function &F) {
// Sanitizers require the hostcall buffer passed in the implicit arguments.
return F.hasFnAttribute(Attribute::SanitizeAddress) ||
F.hasFnAttribute(Attribute::SanitizeThread) ||
@@ -140,6 +141,12 @@ public:
return ST.hasApertureRegs();
}
+ /// Check if the subtarget supports GetDoorbellID.
+ bool supportsGetDoorbellID(Function &F) {
+ const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
+ return ST.supportsGetDoorbellID();
+ }
+
std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) {
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
return ST.getFlatWorkGroupSizes(F);
@@ -152,7 +159,7 @@ public:
}
private:
- /// Check if the ConstantExpr \p CE requires queue ptr attribute.
+ /// Check if the ConstantExpr \p CE requires the queue pointer.
static bool visitConstExpr(const ConstantExpr *CE) {
if (CE->getOpcode() == Instruction::AddrSpaceCast) {
unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
@@ -186,7 +193,7 @@ private:
}
public:
- /// Returns true if \p Fn needs a queue ptr attribute because of \p C.
+ /// Returns true if \p Fn needs the queue pointer because of \p C.
bool needsQueuePtr(const Constant *C, Function &Fn) {
bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
bool HasAperture = hasApertureRegs(Fn);
@@ -205,7 +212,7 @@ public:
}
private:
- /// Used to determine if the Constant needs a queue ptr attribute.
+ /// Used to determine if the Constant needs the queue pointer.
DenseMap<const Constant *, uint8_t> ConstantStatus;
};
@@ -353,12 +360,15 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
// If the function requires the implicit arg pointer due to sanitizers,
// assume it's needed even if explicitly marked as not requiring it.
- const bool NeedsImplicit = funcRequiresImplicitArgPtr(*F);
- if (NeedsImplicit)
+ const bool NeedsHostcall = funcRequiresHostcallPtr(*F);
+ if (NeedsHostcall) {
removeAssumedBits(IMPLICIT_ARG_PTR);
+ removeAssumedBits(HOSTCALL_PTR);
+ }
for (auto Attr : ImplicitAttrs) {
- if (NeedsImplicit && Attr.first == IMPLICIT_ARG_PTR)
+ if (NeedsHostcall &&
+ (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR))
continue;
if (F->hasFnAttribute(Attr.second))
@@ -388,9 +398,11 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
return indicatePessimisticFixpoint();
bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
- auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
- bool NeedsQueuePtr = false;
+ bool NeedsImplicit = false;
+ auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
+ bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
+ bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*F);
for (Function *Callee : AAEdges.getOptimisticEdges()) {
Intrinsic::ID IID = Callee->getIntrinsicID();
@@ -403,20 +415,87 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
bool NonKernelOnly = false;
ImplicitArgumentMask AttrMask =
- intrinsicToAttrMask(IID, NonKernelOnly, NeedsQueuePtr);
+ intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,
+ HasApertureRegs, SupportsGetDoorbellID);
if (AttrMask != NOT_IMPLICIT_INPUT) {
if ((IsNonEntryFunc || !NonKernelOnly))
removeAssumedBits(AttrMask);
}
}
- // If we found that we need amdgpu-queue-ptr, nothing else to do.
- if (NeedsQueuePtr) {
+ // Need implicitarg_ptr to acess queue_ptr, private_base, and shared_base.
+ if (NeedsImplicit)
+ removeAssumedBits(IMPLICIT_ARG_PTR);
+
+ if (isAssumed(QUEUE_PTR) && checkForQueuePtr(A)) {
+ // Under V5, we need implicitarg_ptr + offsets to access private_base or
+ // shared_base. We do not actually need queue_ptr.
+ if (AMDGPU::getAmdhsaCodeObjectVersion() == 5)
+ removeAssumedBits(IMPLICIT_ARG_PTR);
+ else
+ removeAssumedBits(QUEUE_PTR);
+ }
+
+ if (funcRetrievesMultigridSyncArg(A)) {
+ assert(!isAssumed(IMPLICIT_ARG_PTR) &&
+ "multigrid_sync_arg needs implicitarg_ptr");
+ removeAssumedBits(MULTIGRID_SYNC_ARG);
+ }
+
+ if (funcRetrievesHostcallPtr(A)) {
+ assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr");
+ removeAssumedBits(HOSTCALL_PTR);
+ }
+
+ if (funcRetrievesHeapPtr(A)) {
+ assert(!isAssumed(IMPLICIT_ARG_PTR) && "heap_ptr needs implicitarg_ptr");
+ removeAssumedBits(HEAP_PTR);
+ }
+
+ if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A)) {
+ assert(!isAssumed(IMPLICIT_ARG_PTR) && "queue_ptr needs implicitarg_ptr");
removeAssumedBits(QUEUE_PTR);
- return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED :
- ChangeStatus::UNCHANGED;
}
+ return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
+ : ChangeStatus::UNCHANGED;
+ }
+
+ ChangeStatus manifest(Attributor &A) override {
+ SmallVector<Attribute, 8> AttrList;
+ LLVMContext &Ctx = getAssociatedFunction()->getContext();
+
+ for (auto Attr : ImplicitAttrs) {
+ if (isKnown(Attr.first))
+ AttrList.push_back(Attribute::get(Ctx, Attr.second));
+ }
+
+ return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
+ /* ForceReplace */ true);
+ }
+
+ const std::string getAsStr() const override {
+ std::string Str;
+ raw_string_ostream OS(Str);
+ OS << "AMDInfo[";
+ for (auto Attr : ImplicitAttrs)
+ OS << ' ' << Attr.second;
+ OS << " ]";
+ return OS.str();
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {}
+
+private:
+ bool checkForQueuePtr(Attributor &A) {
+ Function *F = getAssociatedFunction();
+ bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
+
+ auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
+
+ bool NeedsQueuePtr = false;
+
auto CheckAddrSpaceCasts = [&](Instruction &I) {
unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
if (castRequiresQueuePtr(SrcAS)) {
@@ -431,7 +510,7 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
// `checkForAllInstructions` is much more cheaper than going through all
// instructions, try it first.
- // amdgpu-queue-ptr is not needed if aperture regs is present.
+ // The queue pointer is not needed if aperture regs is present.
if (!HasApertureRegs) {
bool UsedAssumedInformation = false;
A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
@@ -439,61 +518,79 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
UsedAssumedInformation);
}
- // If we found that we need amdgpu-queue-ptr, nothing else to do.
- if (NeedsQueuePtr) {
- removeAssumedBits(QUEUE_PTR);
- return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED :
- ChangeStatus::UNCHANGED;
- }
+ // If we found that we need the queue pointer, nothing else to do.
+ if (NeedsQueuePtr)
+ return true;
- if (!IsNonEntryFunc && HasApertureRegs) {
- return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED :
- ChangeStatus::UNCHANGED;
- }
+ if (!IsNonEntryFunc && HasApertureRegs)
+ return false;
for (BasicBlock &BB : *F) {
for (Instruction &I : BB) {
for (const Use &U : I.operands()) {
if (const auto *C = dyn_cast<Constant>(U)) {
- if (InfoCache.needsQueuePtr(C, *F)) {
- removeAssumedBits(QUEUE_PTR);
- return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED :
- ChangeStatus::UNCHANGED;
- }
+ if (InfoCache.needsQueuePtr(C, *F))
+ return true;
}
}
}
}
- return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED :
- ChangeStatus::UNCHANGED;
+ return false;
}
- ChangeStatus manifest(Attributor &A) override {
- SmallVector<Attribute, 8> AttrList;
- LLVMContext &Ctx = getAssociatedFunction()->getContext();
+ bool funcRetrievesMultigridSyncArg(Attributor &A) {
+ auto Pos = llvm::AMDGPU::getMultigridSyncArgImplicitArgPosition();
+ AAPointerInfo::OffsetAndSize OAS(Pos, 8);
+ return funcRetrievesImplicitKernelArg(A, OAS);
+ }
- for (auto Attr : ImplicitAttrs) {
- if (isKnown(Attr.first))
- AttrList.push_back(Attribute::get(Ctx, Attr.second));
- }
+ bool funcRetrievesHostcallPtr(Attributor &A) {
+ auto Pos = llvm::AMDGPU::getHostcallImplicitArgPosition();
+ AAPointerInfo::OffsetAndSize OAS(Pos, 8);
+ return funcRetrievesImplicitKernelArg(A, OAS);
+ }
- return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
- /* ForceReplace */ true);
+ bool funcRetrievesHeapPtr(Attributor &A) {
+ if (AMDGPU::getAmdhsaCodeObjectVersion() != 5)
+ return false;
+ AAPointerInfo::OffsetAndSize OAS(AMDGPU::ImplicitArg::HEAP_PTR_OFFSET, 8);
+ return funcRetrievesImplicitKernelArg(A, OAS);
}
- const std::string getAsStr() const override {
- std::string Str;
- raw_string_ostream OS(Str);
- OS << "AMDInfo[";
- for (auto Attr : ImplicitAttrs)
- OS << ' ' << Attr.second;
- OS << " ]";
- return OS.str();
+ bool funcRetrievesQueuePtr(Attributor &A) {
+ if (AMDGPU::getAmdhsaCodeObjectVersion() != 5)
+ return false;
+ AAPointerInfo::OffsetAndSize OAS(AMDGPU::ImplicitArg::QUEUE_PTR_OFFSET, 8);
+ return funcRetrievesImplicitKernelArg(A, OAS);
}
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {}
+ bool funcRetrievesImplicitKernelArg(Attributor &A,
+ AAPointerInfo::OffsetAndSize OAS) {
+ // Check if this is a call to the implicitarg_ptr builtin and it
+ // is used to retrieve the hostcall pointer. The implicit arg for
+ // hostcall is not used only if every use of the implicitarg_ptr
+ // is a load that clearly does not retrieve any byte of the
+ // hostcall pointer. We check this by tracing all the uses of the
+ // initial call to the implicitarg_ptr intrinsic.
+ auto DoesNotLeadToKernelArgLoc = [&](Instruction &I) {
+ auto &Call = cast<CallBase>(I);
+ if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr)
+ return true;
+
+ const auto &PointerInfoAA = A.getAAFor<AAPointerInfo>(
+ *this, IRPosition::callsite_returned(Call), DepClassTy::REQUIRED);
+
+ return PointerInfoAA.forallInterferingAccesses(
+ OAS, [](const AAPointerInfo::Access &Acc, bool IsExact) {
+ return Acc.getRemoteInst()->isDroppable();
+ });
+ };
+
+ bool UsedAssumedInformation = false;
+ return !A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *this,
+ UsedAssumedInformation);
+ }
};
AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
@@ -646,9 +743,14 @@ public:
AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM);
DenseSet<const char *> Allowed(
{&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
- &AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID});
+ &AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID, &AAPointerInfo::ID});
+
+ AttributorConfig AC(CGUpdater);
+ AC.Allowed = &Allowed;
+ AC.IsModulePass = true;
+ AC.DefaultInitializeLiveInternals = false;
- Attributor A(Functions, InfoCache, CGUpdater, &Allowed);
+ Attributor A(Functions, InfoCache, AC);
for (Function &F : M) {
if (!F.isIntrinsic()) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index cd084fd5440a..fd812eb676ef 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -21,6 +21,7 @@
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#define DEBUG_TYPE "amdgpu-call-lowering"
@@ -349,7 +350,6 @@ bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &B, const Value *Val,
FunctionLoweringInfo &FLI) const {
MachineFunction &MF = B.getMF();
- MachineRegisterInfo &MRI = MF.getRegInfo();
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
MFI->setIfReturnsVoid(!Val);
@@ -365,40 +365,15 @@ bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &B, const Value *Val,
return true;
}
- auto const &ST = MF.getSubtarget<GCNSubtarget>();
-
- unsigned ReturnOpc = 0;
- if (IsShader)
- ReturnOpc = AMDGPU::SI_RETURN_TO_EPILOG;
- else if (CC == CallingConv::AMDGPU_Gfx)
- ReturnOpc = AMDGPU::S_SETPC_B64_return_gfx;
- else
- ReturnOpc = AMDGPU::S_SETPC_B64_return;
-
+ unsigned ReturnOpc =
+ IsShader ? AMDGPU::SI_RETURN_TO_EPILOG : AMDGPU::SI_RETURN;
auto Ret = B.buildInstrNoInsert(ReturnOpc);
- Register ReturnAddrVReg;
- if (ReturnOpc == AMDGPU::S_SETPC_B64_return) {
- ReturnAddrVReg = MRI.createVirtualRegister(&AMDGPU::CCR_SGPR_64RegClass);
- Ret.addUse(ReturnAddrVReg);
- } else if (ReturnOpc == AMDGPU::S_SETPC_B64_return_gfx) {
- ReturnAddrVReg =
- MRI.createVirtualRegister(&AMDGPU::Gfx_CCR_SGPR_64RegClass);
- Ret.addUse(ReturnAddrVReg);
- }
if (!FLI.CanLowerReturn)
insertSRetStores(B, Val->getType(), VRegs, FLI.DemoteRegister);
else if (!lowerReturnVal(B, Val, VRegs, Ret))
return false;
- if (ReturnOpc == AMDGPU::S_SETPC_B64_return ||
- ReturnOpc == AMDGPU::S_SETPC_B64_return_gfx) {
- const SIRegisterInfo *TRI = ST.getRegisterInfo();
- Register LiveInReturn = MF.addLiveIn(TRI->getReturnAddressReg(MF),
- &AMDGPU::SGPR_64RegClass);
- B.buildCopy(ReturnAddrVReg, LiveInReturn);
- }
-
// TODO: Handle CalleeSavedRegsViaCopy.
B.insertInstr(Ret);
@@ -479,7 +454,7 @@ static void allocateHSAUserSGPRs(CCState &CCInfo,
CCInfo.AllocateReg(DispatchPtrReg);
}
- if (Info.hasQueuePtr()) {
+ if (Info.hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5) {
Register QueuePtrReg = Info.addQueuePtr(TRI);
MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(QueuePtrReg);
@@ -523,7 +498,7 @@ bool AMDGPUCallLowering::lowerFormalArgumentsKernel(
const SITargetLowering &TLI = *getTLI<SITargetLowering>();
const DataLayout &DL = F.getParent()->getDataLayout();
- Info->allocateModuleLDSGlobal(F.getParent());
+ Info->allocateModuleLDSGlobal(F);
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
@@ -543,9 +518,8 @@ bool AMDGPUCallLowering::lowerFormalArgumentsKernel(
if (AllocSize == 0)
continue;
- MaybeAlign ABIAlign = IsByRef ? Arg.getParamAlign() : None;
- if (!ABIAlign)
- ABIAlign = DL.getABITypeAlign(ArgTy);
+ MaybeAlign ParamAlign = IsByRef ? Arg.getParamAlign() : None;
+ Align ABIAlign = DL.getValueOrABITypeAlignment(ParamAlign, ArgTy);
uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
@@ -608,19 +582,11 @@ bool AMDGPUCallLowering::lowerFormalArguments(
const SIRegisterInfo *TRI = Subtarget.getRegisterInfo();
const DataLayout &DL = F.getParent()->getDataLayout();
- Info->allocateModuleLDSGlobal(F.getParent());
+ Info->allocateModuleLDSGlobal(F);
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext());
- if (!IsEntryFunc) {
- Register ReturnAddrReg = TRI->getReturnAddressReg(MF);
- Register LiveInReturn = MF.addLiveIn(ReturnAddrReg,
- &AMDGPU::SGPR_64RegClass);
- MBB.addLiveIn(ReturnAddrReg);
- B.buildCopy(LiveInReturn, ReturnAddrReg);
- }
-
if (Info->hasImplicitBufferPtr()) {
Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI);
MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
index 1682d43ae671..b6c66077675f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
@@ -148,53 +148,32 @@ def CSR_AMDGPU_VGPRs : CalleeSavedRegs<
(sequence "VGPR%u", 248, 255))
>;
-def CSR_AMDGPU_AGPRs_32_255 : CalleeSavedRegs<
+def CSR_AMDGPU_AGPRs : CalleeSavedRegs<
(sequence "AGPR%u", 32, 255)
>;
-def CSR_AMDGPU_SGPRs_32_105 : CalleeSavedRegs<
- (sequence "SGPR%u", 32, 105)
+def CSR_AMDGPU_SGPRs : CalleeSavedRegs<
+ (sequence "SGPR%u", 30, 105)
>;
-def CSR_AMDGPU_SI_Gfx_SGPRs_4_29 : CalleeSavedRegs<
- (sequence "SGPR%u", 4, 29)
+def CSR_AMDGPU_SI_Gfx_SGPRs : CalleeSavedRegs<
+ (add (sequence "SGPR%u", 4, 31), (sequence "SGPR%u", 64, 105))
>;
-def CSR_AMDGPU_SI_Gfx_SGPRs_64_105 : CalleeSavedRegs<
- (sequence "SGPR%u", 64, 105)
+def CSR_AMDGPU : CalleeSavedRegs<
+ (add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SGPRs)
>;
-// Just to get the regmask, not for calling convention purposes.
-def CSR_AMDGPU_AllVGPRs : CalleeSavedRegs<
- (sequence "VGPR%u", 0, 255)
->;
-
-def CSR_AMDGPU_AllAGPRs : CalleeSavedRegs<
- (sequence "AGPR%u", 0, 255)
->;
-def CSR_AMDGPU_AllVectorRegs : CalleeSavedRegs<
- (add CSR_AMDGPU_AllVGPRs, CSR_AMDGPU_AllAGPRs)
->;
-
-// Just to get the regmask, not for calling convention purposes.
-def CSR_AMDGPU_AllAllocatableSRegs : CalleeSavedRegs<
- (add (sequence "SGPR%u", 0, 105), VCC_LO, VCC_HI)
->;
-
-def CSR_AMDGPU_HighRegs : CalleeSavedRegs<
- (add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SGPRs_32_105)
->;
-
-def CSR_AMDGPU_HighRegs_With_AGPRs : CalleeSavedRegs<
- (add CSR_AMDGPU_HighRegs, CSR_AMDGPU_AGPRs_32_255)
+def CSR_AMDGPU_GFX90AInsts : CalleeSavedRegs<
+ (add CSR_AMDGPU, CSR_AMDGPU_AGPRs)
>;
def CSR_AMDGPU_SI_Gfx : CalleeSavedRegs<
- (add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SI_Gfx_SGPRs_4_29, CSR_AMDGPU_SI_Gfx_SGPRs_64_105)
+ (add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SI_Gfx_SGPRs)
>;
-def CSR_AMDGPU_SI_Gfx_With_AGPRs : CalleeSavedRegs<
- (add CSR_AMDGPU_SI_Gfx, CSR_AMDGPU_AGPRs_32_255)
+def CSR_AMDGPU_SI_Gfx_GFX90AInsts : CalleeSavedRegs<
+ (add CSR_AMDGPU_SI_Gfx, CSR_AMDGPU_AGPRs)
>;
def CSR_AMDGPU_NoRegs : CalleeSavedRegs<(add)>;
@@ -233,3 +212,24 @@ def CC_AMDGPU : CallingConv<[
"AMDGPUSubtarget::SOUTHERN_ISLANDS && State.getCallingConv() == CallingConv::C",
CCDelegateTo<CC_AMDGPU_Func>>
]>;
+
+// Trivial class to denote when a def is used only to get a RegMask, i.e.
+// SaveList is ignored and the def is not used as part of any calling
+// convention.
+class RegMask<dag mask> : CalleeSavedRegs<mask>;
+
+def AMDGPU_AllVGPRs : RegMask<
+ (sequence "VGPR%u", 0, 255)
+>;
+
+def AMDGPU_AllAGPRs : RegMask<
+ (sequence "AGPR%u", 0, 255)
+>;
+
+def AMDGPU_AllVectorRegs : RegMask<
+ (add AMDGPU_AllVGPRs, AMDGPU_AllAGPRs)
+>;
+
+def AMDGPU_AllAllocatableSRegs : RegMask<
+ (add (sequence "SGPR%u", 0, 105), VCC_LO, VCC_HI)
+>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 1920684d8f1f..94d7844e8a32 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -877,7 +877,7 @@ static Value* getMulHu(IRBuilder<> &Builder, Value *LHS, Value *RHS) {
return getMul64(Builder, LHS, RHS).second;
}
-/// Figure out how many bits are really needed for this ddivision. \p AtLeast is
+/// Figure out how many bits are really needed for this division. \p AtLeast is
/// an optimization hint to bypass the second ComputeNumSignBits call if we the
/// first one is insufficient. Returns -1 on failure.
int AMDGPUCodeGenPrepare::getDivNumBits(BinaryOperator &I,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
index e79ff9b597c9..c16d8ee51a7a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
@@ -373,7 +373,8 @@ void AMDGPUCombinerHelper::applyFoldableFneg(MachineInstr &MI,
replaceRegWith(MRI, Dst, NegatedMatchInfo);
// Recreate non negated value for other uses of old MatchInfoDst
- Builder.setInstrAndDebugLoc(MI);
+ auto NextInst = ++MatchInfo->getIterator();
+ Builder.setInstrAndDebugLoc(*NextInst);
Builder.buildFNeg(MatchInfoDst, NegatedMatchInfo, MI.getFlags());
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
index 04bf623bfa46..8fcf669041b9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
@@ -50,7 +50,7 @@ public:
}
bool createInitOrFiniKernel(Module &M, GlobalVariable *GV, bool IsCtor) {
- if (!GV)
+ if (!GV || !GV->hasInitializer())
return false;
ConstantArray *GA = dyn_cast<ConstantArray>(GV->getInitializer());
if (!GA || GA->getNumOperands() == 0)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp
index bed0707f3aa7..8236ff609f85 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp
@@ -22,7 +22,7 @@ namespace {
class ExportClustering : public ScheduleDAGMutation {
public:
- ExportClustering() {}
+ ExportClustering() = default;
void apply(ScheduleDAGInstrs *DAG) override;
};
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp b/llvm/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp
deleted file mode 100644
index ea6c6d0fd212..000000000000
--- a/llvm/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp
+++ /dev/null
@@ -1,64 +0,0 @@
-//===-- AMDGPUFixFunctionBitcasts.cpp - Fix function bitcasts -------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Promote indirect (bitcast) calls to direct calls when they are statically
-/// known to be direct. Required when InstCombine is not run (e.g. at OptNone)
-/// because AMDGPU does not support indirect calls.
-///
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "llvm/IR/InstVisitor.h"
-#include "llvm/Pass.h"
-#include "llvm/Transforms/Utils/CallPromotionUtils.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "amdgpu-fix-function-bitcasts"
-
-namespace {
-class AMDGPUFixFunctionBitcasts final
- : public ModulePass,
- public InstVisitor<AMDGPUFixFunctionBitcasts> {
-
- bool runOnModule(Module &M) override;
-
- bool Modified;
-
-public:
- void visitCallBase(CallBase &CB) {
- if (CB.getCalledFunction())
- return;
- auto *Callee =
- dyn_cast<Function>(CB.getCalledOperand()->stripPointerCasts());
- if (Callee && isLegalToPromote(CB, Callee)) {
- promoteCall(CB, Callee);
- Modified = true;
- }
- }
-
- static char ID;
- AMDGPUFixFunctionBitcasts() : ModulePass(ID) {}
-};
-} // End anonymous namespace
-
-char AMDGPUFixFunctionBitcasts::ID = 0;
-char &llvm::AMDGPUFixFunctionBitcastsID = AMDGPUFixFunctionBitcasts::ID;
-INITIALIZE_PASS(AMDGPUFixFunctionBitcasts, DEBUG_TYPE,
- "Fix function bitcasts for AMDGPU", false, false)
-
-ModulePass *llvm::createAMDGPUFixFunctionBitcastsPass() {
- return new AMDGPUFixFunctionBitcasts();
-}
-
-bool AMDGPUFixFunctionBitcasts::runOnModule(Module &M) {
- Modified = false;
- visit(M);
- return Modified;
-}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 7fd94a977be7..5747fc0ca8e6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -47,10 +47,30 @@ def gi_vop3pmods :
GIComplexOperandMatcher<s32, "selectVOP3PMods">,
GIComplexPatternEquiv<VOP3PMods>;
+def gi_vop3pmodsdot :
+ GIComplexOperandMatcher<s32, "selectVOP3PModsDOT">,
+ GIComplexPatternEquiv<VOP3PModsDOT>;
+
+def gi_dotiuvop3pmods :
+ GIComplexOperandMatcher<s32, "selectDotIUVOP3PMods">,
+ GIComplexPatternEquiv<DotIUVOP3PMods>;
+
+def gi_wmmaopselvop3pmods :
+ GIComplexOperandMatcher<s32, "selectWMMAOpSelVOP3PMods">,
+ GIComplexPatternEquiv<WMMAOpSelVOP3PMods>;
+
def gi_vop3opselmods :
GIComplexOperandMatcher<s32, "selectVOP3OpSelMods">,
GIComplexPatternEquiv<VOP3OpSelMods>;
+def gi_vinterpmods :
+ GIComplexOperandMatcher<s32, "selectVINTERPMods">,
+ GIComplexPatternEquiv<VINTERPMods>;
+
+def gi_vinterpmods_hi :
+ GIComplexOperandMatcher<s32, "selectVINTERPModsHi">,
+ GIComplexPatternEquiv<VINTERPModsHi>;
+
// FIXME: Why do we have both VOP3OpSel and VOP3OpSelMods?
def gi_vop3opsel :
GIComplexOperandMatcher<s32, "selectVOP3OpSelMods">,
@@ -93,6 +113,10 @@ def gi_flat_scratch_saddr :
GIComplexOperandMatcher<s32, "selectScratchSAddr">,
GIComplexPatternEquiv<ScratchSAddr>;
+def gi_flat_scratch_svaddr :
+ GIComplexOperandMatcher<s32, "selectScratchSVAddr">,
+ GIComplexPatternEquiv<ScratchSVAddr>;
+
def gi_ds_1addr_1offset :
GIComplexOperandMatcher<s32, "selectDS1Addr1Offset">,
GIComplexPatternEquiv<DS1Addr1Offset>;
@@ -123,7 +147,7 @@ def gi_smrd_buffer_imm32 :
// Separate load nodes are defined to glue m0 initialization in
// SelectionDAG. The GISel selector can just insert m0 initialization
-// directly before before selecting a glue-less load, so hide this
+// directly before selecting a glue-less load, so hide this
// distinction.
def : GINodeEquiv<G_LOAD, AMDGPUld_glue> {
@@ -222,6 +246,9 @@ def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_FMAX, SIbuffer_atomic_fmax>;
def : GINodeEquiv<G_AMDGPU_BUFFER_ATOMIC_CMPSWAP, SIbuffer_atomic_cmpswap>;
def : GINodeEquiv<G_AMDGPU_S_BUFFER_LOAD, SIsbuffer_load>;
+def : GINodeEquiv<G_FPTRUNC_ROUND_UPWARD, SIfptrunc_round_upward>;
+def : GINodeEquiv<G_FPTRUNC_ROUND_DOWNWARD, SIfptrunc_round_downward>;
+
class GISelSop2Pat <
SDPatternOperator node,
Instruction inst,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
index cabdc6998011..1bbdc39a7a5e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
@@ -7,8 +7,10 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUGlobalISelUtils.h"
+#include "GCNSubtarget.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/IR/Constants.h"
+#include "llvm/Support/LowLevelTypeImpl.h"
using namespace llvm;
using namespace MIPatternMatch;
@@ -66,3 +68,12 @@ bool AMDGPU::isLegalVOP3PShuffleMask(ArrayRef<int> Mask) {
return true;
return (Mask[0] & 2) == (Mask[1] & 2);
}
+
+bool AMDGPU::hasAtomicFaddRtnForTy(const GCNSubtarget &Subtarget,
+ const LLT &Ty) {
+ if (Ty == LLT::scalar(32))
+ return Subtarget.hasAtomicFaddRtnInsts();
+ if (Ty == LLT::fixed_vector(2, 16) || Ty == LLT::scalar(64))
+ return Subtarget.hasGFX90AInsts();
+ return false;
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h
index 14d3a3fb7997..5c600d059b7a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h
@@ -16,6 +16,8 @@
namespace llvm {
class MachineRegisterInfo;
+class GCNSubtarget;
+class LLT;
namespace AMDGPU {
@@ -24,7 +26,7 @@ std::pair<Register, unsigned>
getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg);
bool isLegalVOP3PShuffleMask(ArrayRef<int> Mask);
-
+bool hasAtomicFaddRtnForTy(const GCNSubtarget &Subtarget, const LLT &Ty);
}
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
index f5018e3a19ac..6fa44ffcbfaa 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -400,17 +400,15 @@ void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func,
auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
AMDGPUAS::GLOBAL_ADDRESS);
- // Emit "printf buffer" argument if printf is used, otherwise emit dummy
- // "none" argument.
if (HiddenArgNumBytes >= 32) {
+ // We forbid the use of features requiring hostcall when compiling OpenCL
+ // before code object V5, which makes the mutual exclusion between the
+ // "printf buffer" and "hostcall buffer" here sound.
if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenPrintfBuffer);
- else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) {
- // The printf runtime binding pass should have ensured that hostcall and
- // printf are not used in the same module.
- assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts"));
+ else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr"))
emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenHostcallBuffer);
- } else
+ else
emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
}
@@ -427,8 +425,12 @@ void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func,
}
// Emit the pointer argument for multi-grid object.
- if (HiddenArgNumBytes >= 56)
- emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenMultiGridSyncArg);
+ if (HiddenArgNumBytes >= 56) {
+ if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg"))
+ emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenMultiGridSyncArg);
+ else
+ emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
+ }
}
bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
@@ -803,6 +805,8 @@ void MetadataStreamerV3::emitHiddenKernelArgs(const MachineFunction &MF,
auto &DL = M->getDataLayout();
auto Int64Ty = Type::getInt64Ty(Func.getContext());
+ Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());
+
if (HiddenArgNumBytes >= 8)
emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset,
Args);
@@ -816,19 +820,17 @@ void MetadataStreamerV3::emitHiddenKernelArgs(const MachineFunction &MF,
auto Int8PtrTy =
Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
- // Emit "printf buffer" argument if printf is used, emit "hostcall buffer"
- // if "hostcall" module flag is set, otherwise emit dummy "none" argument.
if (HiddenArgNumBytes >= 32) {
+ // We forbid the use of features requiring hostcall when compiling OpenCL
+ // before code object V5, which makes the mutual exclusion between the
+ // "printf buffer" and "hostcall buffer" here sound.
if (M->getNamedMetadata("llvm.printf.fmts"))
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
Args);
- else if (M->getModuleFlag("amdgpu_hostcall")) {
- // The printf runtime binding pass should have ensured that hostcall and
- // printf are not used in the same module.
- assert(!M->getNamedMetadata("llvm.printf.fmts"));
+ else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr"))
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
Args);
- } else
+ else
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
}
@@ -847,9 +849,14 @@ void MetadataStreamerV3::emitHiddenKernelArgs(const MachineFunction &MF,
}
// Emit the pointer argument for multi-grid object.
- if (HiddenArgNumBytes >= 56)
- emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
- Args);
+ if (HiddenArgNumBytes >= 56) {
+ if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) {
+ emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
+ Args);
+ } else {
+ emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
+ }
+ }
}
msgpack::MapDocNode
@@ -876,6 +883,12 @@ MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF,
Kern.getDocument()->getNode(STM.getWavefrontSize());
Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR);
Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR);
+
+ // Only add AGPR count to metadata for supported devices
+ if (STM.hasMAIInsts()) {
+ Kern[".agpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumAccVGPR);
+ }
+
Kern[".max_flat_workgroup_size"] =
Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
Kern[".sgpr_spill_count"] =
@@ -971,13 +984,20 @@ void MetadataStreamerV5::emitHiddenKernelArgs(const MachineFunction &MF,
msgpack::ArrayDocNode Args) {
auto &Func = MF.getFunction();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+
+ // No implicit kernel argument is used.
+ if (ST.getImplicitArgNumBytes(Func) == 0)
+ return;
+
const Module *M = Func.getParent();
auto &DL = M->getDataLayout();
+ const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
auto Int64Ty = Type::getInt64Ty(Func.getContext());
auto Int32Ty = Type::getInt32Ty(Func.getContext());
auto Int16Ty = Type::getInt16Ty(Func.getContext());
+ Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());
emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_x", Offset, Args);
emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_y", Offset, Args);
emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_z", Offset, Args);
@@ -1008,40 +1028,49 @@ void MetadataStreamerV5::emitHiddenKernelArgs(const MachineFunction &MF,
if (M->getNamedMetadata("llvm.printf.fmts")) {
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
Args);
- } else
+ } else {
Offset += 8; // Skipped.
+ }
- if (M->getModuleFlag("amdgpu_hostcall")) {
+ if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) {
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
Args);
- } else
+ } else {
Offset += 8; // Skipped.
+ }
- emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
+ if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) {
+ emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
Args);
+ } else {
+ Offset += 8; // Skipped.
+ }
- // Ignore temporarily until it is implemented.
- // emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_heap_v1", Offset, Args);
- Offset += 8;
+ if (!Func.hasFnAttribute("amdgpu-no-heap-ptr"))
+ emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_heap_v1", Offset, Args);
+ else
+ Offset += 8; // Skipped.
if (Func.hasFnAttribute("calls-enqueue-kernel")) {
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
Args);
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
Args);
- } else
+ } else {
Offset += 16; // Skipped.
+ }
Offset += 72; // Reserved.
- // hidden_private_base and hidden_shared_base are only used by GFX8.
- if (ST.getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ // hidden_private_base and hidden_shared_base are only when the subtarget has
+ // ApertureRegs.
+ if (!ST.hasApertureRegs()) {
emitKernelArg(DL, Int32Ty, Align(4), "hidden_private_base", Offset, Args);
emitKernelArg(DL, Int32Ty, Align(4), "hidden_shared_base", Offset, Args);
- } else
+ } else {
Offset += 8; // Skipped.
+ }
- const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
if (MFI.hasQueuePtr())
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset, Args);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
index bcf7fc449094..9b22d1f4d1b1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
@@ -42,7 +42,7 @@ namespace HSAMD {
class MetadataStreamer {
public:
- virtual ~MetadataStreamer(){};
+ virtual ~MetadataStreamer() = default;
virtual bool emitTo(AMDGPUTargetStreamer &TargetStreamer) = 0;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
new file mode 100644
index 000000000000..5c507ef70a8c
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
@@ -0,0 +1,439 @@
+//===--- AMDGPUIGroupLP.cpp - AMDGPU IGroupLP ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// \file This file defines a set of schedule DAG mutations that can be used to
+// override default scheduler behavior to enforce specific scheduling patterns.
+// They should be used in cases where runtime performance considerations such as
+// inter-wavefront interactions, mean that compile-time heuristics cannot
+// predict the optimal instruction ordering, or in kernels where optimum
+// instruction scheduling is important enough to warrant manual intervention.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUIGroupLP.h"
+#include "AMDGPUTargetMachine.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/ADT/BitmaskEnum.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "machine-scheduler"
+
+namespace {
+
+static cl::opt<bool>
+ EnableIGroupLP("amdgpu-igrouplp",
+ cl::desc("Enable construction of Instruction Groups and "
+ "their ordering for scheduling"),
+ cl::init(false));
+
+static cl::opt<Optional<unsigned>>
+ VMEMGroupMaxSize("amdgpu-igrouplp-vmem-group-size", cl::init(None),
+ cl::Hidden,
+ cl::desc("The maximum number of instructions to include "
+ "in VMEM group."));
+
+static cl::opt<Optional<unsigned>>
+ MFMAGroupMaxSize("amdgpu-igrouplp-mfma-group-size", cl::init(None),
+ cl::Hidden,
+ cl::desc("The maximum number of instructions to include "
+ "in MFMA group."));
+
+static cl::opt<Optional<unsigned>>
+ LDRGroupMaxSize("amdgpu-igrouplp-ldr-group-size", cl::init(None),
+ cl::Hidden,
+ cl::desc("The maximum number of instructions to include "
+ "in lds/gds read group."));
+
+static cl::opt<Optional<unsigned>>
+ LDWGroupMaxSize("amdgpu-igrouplp-ldw-group-size", cl::init(None),
+ cl::Hidden,
+ cl::desc("The maximum number of instructions to include "
+ "in lds/gds write group."));
+
+typedef function_ref<bool(const MachineInstr &, const SIInstrInfo *)>
+ CanAddMIFn;
+
+// Classify instructions into groups to enable fine tuned control over the
+// scheduler. These groups may be more specific than current SchedModel
+// instruction classes.
+class SchedGroup {
+private:
+ // Function that returns true if a non-bundle MI may be inserted into this
+ // group.
+ const CanAddMIFn canAddMI;
+
+ // Maximum number of SUnits that can be added to this group.
+ Optional<unsigned> MaxSize;
+
+ // Collection of SUnits that are classified as members of this group.
+ SmallVector<SUnit *, 32> Collection;
+
+ ScheduleDAGInstrs *DAG;
+
+ void tryAddEdge(SUnit *A, SUnit *B) {
+ if (A != B && DAG->canAddEdge(B, A)) {
+ DAG->addEdge(B, SDep(A, SDep::Artificial));
+ LLVM_DEBUG(dbgs() << "Adding edge...\n"
+ << "from: SU(" << A->NodeNum << ") " << *A->getInstr()
+ << "to: SU(" << B->NodeNum << ") " << *B->getInstr());
+ }
+ }
+
+public:
+ // Add DAG dependencies from all SUnits in this SchedGroup and this SU. If
+ // MakePred is true, SU will be a predecessor of the SUnits in this
+ // SchedGroup, otherwise SU will be a successor.
+ void link(SUnit &SU, bool MakePred = false) {
+ for (auto A : Collection) {
+ SUnit *B = &SU;
+ if (MakePred)
+ std::swap(A, B);
+
+ tryAddEdge(A, B);
+ }
+ }
+
+ // Add DAG dependencies from all SUnits in this SchedGroup and this SU. Use
+ // the predicate to determine whether SU should be a predecessor (P = true)
+ // or a successor (P = false) of this SchedGroup.
+ void link(SUnit &SU, function_ref<bool(const SUnit *A, const SUnit *B)> P) {
+ for (auto A : Collection) {
+ SUnit *B = &SU;
+ if (P(A, B))
+ std::swap(A, B);
+
+ tryAddEdge(A, B);
+ }
+ }
+
+ // Add DAG dependencies such that SUnits in this group shall be ordered
+ // before SUnits in OtherGroup.
+ void link(SchedGroup &OtherGroup) {
+ for (auto B : OtherGroup.Collection)
+ link(*B);
+ }
+
+ // Returns true if no more instructions may be added to this group.
+ bool isFull() { return MaxSize && Collection.size() >= *MaxSize; }
+
+ // Returns true if SU can be added to this SchedGroup.
+ bool canAddSU(SUnit &SU, const SIInstrInfo *TII) {
+ if (isFull())
+ return false;
+
+ MachineInstr &MI = *SU.getInstr();
+ if (MI.getOpcode() != TargetOpcode::BUNDLE)
+ return canAddMI(MI, TII);
+
+ // Special case for bundled MIs.
+ const MachineBasicBlock *MBB = MI.getParent();
+ MachineBasicBlock::instr_iterator B = MI.getIterator(), E = ++B;
+ while (E != MBB->end() && E->isBundledWithPred())
+ ++E;
+
+ // Return true if all of the bundled MIs can be added to this group.
+ return std::all_of(
+ B, E, [this, TII](MachineInstr &MI) { return canAddMI(MI, TII); });
+ }
+
+ void add(SUnit &SU) { Collection.push_back(&SU); }
+
+ SchedGroup(CanAddMIFn canAddMI, Optional<unsigned> MaxSize,
+ ScheduleDAGInstrs *DAG)
+ : canAddMI(canAddMI), MaxSize(MaxSize), DAG(DAG) {}
+};
+
+bool isMFMASGMember(const MachineInstr &MI, const SIInstrInfo *TII) {
+ return TII->isMFMA(MI);
+}
+
+bool isVALUSGMember(const MachineInstr &MI, const SIInstrInfo *TII) {
+ return TII->isVALU(MI) && !TII->isMFMA(MI);
+}
+
+bool isSALUSGMember(const MachineInstr &MI, const SIInstrInfo *TII) {
+ return TII->isSALU(MI);
+}
+
+bool isVMEMSGMember(const MachineInstr &MI, const SIInstrInfo *TII) {
+ return TII->isVMEM(MI) || (TII->isFLAT(MI) && !TII->isDS(MI));
+}
+
+bool isVMEMReadSGMember(const MachineInstr &MI, const SIInstrInfo *TII) {
+ return MI.mayLoad() &&
+ (TII->isVMEM(MI) || (TII->isFLAT(MI) && !TII->isDS(MI)));
+}
+
+bool isVMEMWriteSGMember(const MachineInstr &MI, const SIInstrInfo *TII) {
+ return MI.mayStore() &&
+ (TII->isVMEM(MI) || (TII->isFLAT(MI) && !TII->isDS(MI)));
+}
+
+bool isDSWriteSGMember(const MachineInstr &MI, const SIInstrInfo *TII) {
+ return MI.mayStore() && TII->isDS(MI);
+}
+
+bool isDSReadSGMember(const MachineInstr &MI, const SIInstrInfo *TII) {
+ return MI.mayLoad() && TII->isDS(MI);
+}
+
+class IGroupLPDAGMutation : public ScheduleDAGMutation {
+public:
+ const SIInstrInfo *TII;
+ ScheduleDAGMI *DAG;
+
+ IGroupLPDAGMutation() = default;
+ void apply(ScheduleDAGInstrs *DAGInstrs) override;
+};
+
+// DAG mutation that coordinates with the SCHED_BARRIER instruction and
+// corresponding builtin. The mutation adds edges from specific instruction
+// classes determined by the SCHED_BARRIER mask so that they cannot be
+// scheduled around the SCHED_BARRIER.
+class SchedBarrierDAGMutation : public ScheduleDAGMutation {
+private:
+ const SIInstrInfo *TII;
+
+ ScheduleDAGMI *DAG;
+
+ // Components of the mask that determines which instructions may not be
+ // scheduled across the SCHED_BARRIER.
+ enum class SchedBarrierMasks {
+ NONE = 0u,
+ ALU = 1u << 0,
+ VALU = 1u << 1,
+ SALU = 1u << 2,
+ MFMA = 1u << 3,
+ VMEM = 1u << 4,
+ VMEM_READ = 1u << 5,
+ VMEM_WRITE = 1u << 6,
+ DS = 1u << 7,
+ DS_READ = 1u << 8,
+ DS_WRITE = 1u << 9,
+ LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ DS_WRITE)
+ };
+
+ // Cache SchedGroups of each type if we have multiple SCHED_BARRIERs in a
+ // region.
+ //
+ std::unique_ptr<SchedGroup> MFMASchedGroup = nullptr;
+ std::unique_ptr<SchedGroup> VALUSchedGroup = nullptr;
+ std::unique_ptr<SchedGroup> SALUSchedGroup = nullptr;
+ std::unique_ptr<SchedGroup> VMEMReadSchedGroup = nullptr;
+ std::unique_ptr<SchedGroup> VMEMWriteSchedGroup = nullptr;
+ std::unique_ptr<SchedGroup> DSWriteSchedGroup = nullptr;
+ std::unique_ptr<SchedGroup> DSReadSchedGroup = nullptr;
+
+ // Use a SCHED_BARRIER's mask to identify instruction SchedGroups that should
+ // not be reordered accross the SCHED_BARRIER.
+ void getSchedGroupsFromMask(int32_t Mask,
+ SmallVectorImpl<SchedGroup *> &SchedGroups);
+
+ // Add DAG edges that enforce SCHED_BARRIER ordering.
+ void addSchedBarrierEdges(SUnit &SU);
+
+ // Classify instructions and add them to the SchedGroup.
+ void initSchedGroup(SchedGroup *SG);
+
+ // Remove all existing edges from a SCHED_BARRIER.
+ void resetSchedBarrierEdges(SUnit &SU);
+
+public:
+ void apply(ScheduleDAGInstrs *DAGInstrs) override;
+
+ SchedBarrierDAGMutation() = default;
+};
+
+void IGroupLPDAGMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
+ const GCNSubtarget &ST = DAGInstrs->MF.getSubtarget<GCNSubtarget>();
+ TII = ST.getInstrInfo();
+ DAG = static_cast<ScheduleDAGMI *>(DAGInstrs);
+ const TargetSchedModel *TSchedModel = DAGInstrs->getSchedModel();
+ if (!TSchedModel || DAG->SUnits.empty())
+ return;
+
+ LLVM_DEBUG(dbgs() << "Applying IGroupLPDAGMutation...\n");
+
+ // The order of InstructionGroups in this vector defines the
+ // order in which edges will be added. In other words, given the
+ // present ordering, we will try to make each VMEMRead instruction
+ // a predecessor of each DSRead instruction, and so on.
+ SmallVector<SchedGroup, 4> PipelineOrderGroups = {
+ SchedGroup(isVMEMSGMember, VMEMGroupMaxSize, DAG),
+ SchedGroup(isDSReadSGMember, LDRGroupMaxSize, DAG),
+ SchedGroup(isMFMASGMember, MFMAGroupMaxSize, DAG),
+ SchedGroup(isDSWriteSGMember, LDWGroupMaxSize, DAG)};
+
+ for (SUnit &SU : DAG->SUnits) {
+ LLVM_DEBUG(dbgs() << "Checking Node"; DAG->dumpNode(SU));
+ for (auto &SG : PipelineOrderGroups)
+ if (SG.canAddSU(SU, TII))
+ SG.add(SU);
+ }
+
+ for (unsigned i = 0; i < PipelineOrderGroups.size() - 1; i++) {
+ auto &GroupA = PipelineOrderGroups[i];
+ for (unsigned j = i + 1; j < PipelineOrderGroups.size(); j++) {
+ auto &GroupB = PipelineOrderGroups[j];
+ GroupA.link(GroupB);
+ }
+ }
+}
+
+void SchedBarrierDAGMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
+ const TargetSchedModel *TSchedModel = DAGInstrs->getSchedModel();
+ if (!TSchedModel || DAGInstrs->SUnits.empty())
+ return;
+
+ LLVM_DEBUG(dbgs() << "Applying SchedBarrierDAGMutation...\n");
+
+ const GCNSubtarget &ST = DAGInstrs->MF.getSubtarget<GCNSubtarget>();
+ TII = ST.getInstrInfo();
+ DAG = static_cast<ScheduleDAGMI *>(DAGInstrs);
+ for (auto &SU : DAG->SUnits)
+ if (SU.getInstr()->getOpcode() == AMDGPU::SCHED_BARRIER)
+ addSchedBarrierEdges(SU);
+}
+
+void SchedBarrierDAGMutation::addSchedBarrierEdges(SUnit &SchedBarrier) {
+ MachineInstr &MI = *SchedBarrier.getInstr();
+ assert(MI.getOpcode() == AMDGPU::SCHED_BARRIER);
+ // Remove all existing edges from the SCHED_BARRIER that were added due to the
+ // instruction having side effects.
+ resetSchedBarrierEdges(SchedBarrier);
+ SmallVector<SchedGroup *, 4> SchedGroups;
+ int32_t Mask = MI.getOperand(0).getImm();
+ getSchedGroupsFromMask(Mask, SchedGroups);
+ for (auto SG : SchedGroups)
+ SG->link(
+ SchedBarrier, (function_ref<bool(const SUnit *A, const SUnit *B)>)[](
+ const SUnit *A, const SUnit *B) {
+ return A->NodeNum > B->NodeNum;
+ });
+}
+
+void SchedBarrierDAGMutation::getSchedGroupsFromMask(
+ int32_t Mask, SmallVectorImpl<SchedGroup *> &SchedGroups) {
+ SchedBarrierMasks SBMask = (SchedBarrierMasks)Mask;
+ // See IntrinsicsAMDGPU.td for an explanation of these masks and their
+ // mappings.
+ //
+ if ((SBMask & SchedBarrierMasks::VALU) == SchedBarrierMasks::NONE &&
+ (SBMask & SchedBarrierMasks::ALU) == SchedBarrierMasks::NONE) {
+ if (!VALUSchedGroup) {
+ VALUSchedGroup = std::make_unique<SchedGroup>(isVALUSGMember, None, DAG);
+ initSchedGroup(VALUSchedGroup.get());
+ }
+
+ SchedGroups.push_back(VALUSchedGroup.get());
+ }
+
+ if ((SBMask & SchedBarrierMasks::SALU) == SchedBarrierMasks::NONE &&
+ (SBMask & SchedBarrierMasks::ALU) == SchedBarrierMasks::NONE) {
+ if (!SALUSchedGroup) {
+ SALUSchedGroup = std::make_unique<SchedGroup>(isSALUSGMember, None, DAG);
+ initSchedGroup(SALUSchedGroup.get());
+ }
+
+ SchedGroups.push_back(SALUSchedGroup.get());
+ }
+
+ if ((SBMask & SchedBarrierMasks::MFMA) == SchedBarrierMasks::NONE &&
+ (SBMask & SchedBarrierMasks::ALU) == SchedBarrierMasks::NONE) {
+ if (!MFMASchedGroup) {
+ MFMASchedGroup = std::make_unique<SchedGroup>(isMFMASGMember, None, DAG);
+ initSchedGroup(MFMASchedGroup.get());
+ }
+
+ SchedGroups.push_back(MFMASchedGroup.get());
+ }
+
+ if ((SBMask & SchedBarrierMasks::VMEM_READ) == SchedBarrierMasks::NONE &&
+ (SBMask & SchedBarrierMasks::VMEM) == SchedBarrierMasks::NONE) {
+ if (!VMEMReadSchedGroup) {
+ VMEMReadSchedGroup =
+ std::make_unique<SchedGroup>(isVMEMReadSGMember, None, DAG);
+ initSchedGroup(VMEMReadSchedGroup.get());
+ }
+
+ SchedGroups.push_back(VMEMReadSchedGroup.get());
+ }
+
+ if ((SBMask & SchedBarrierMasks::VMEM_WRITE) == SchedBarrierMasks::NONE &&
+ (SBMask & SchedBarrierMasks::VMEM) == SchedBarrierMasks::NONE) {
+ if (!VMEMWriteSchedGroup) {
+ VMEMWriteSchedGroup =
+ std::make_unique<SchedGroup>(isVMEMWriteSGMember, None, DAG);
+ initSchedGroup(VMEMWriteSchedGroup.get());
+ }
+
+ SchedGroups.push_back(VMEMWriteSchedGroup.get());
+ }
+
+ if ((SBMask & SchedBarrierMasks::DS_READ) == SchedBarrierMasks::NONE &&
+ (SBMask & SchedBarrierMasks::DS) == SchedBarrierMasks::NONE) {
+ if (!DSReadSchedGroup) {
+ DSReadSchedGroup =
+ std::make_unique<SchedGroup>(isDSReadSGMember, None, DAG);
+ initSchedGroup(DSReadSchedGroup.get());
+ }
+
+ SchedGroups.push_back(DSReadSchedGroup.get());
+ }
+
+ if ((SBMask & SchedBarrierMasks::DS_WRITE) == SchedBarrierMasks::NONE &&
+ (SBMask & SchedBarrierMasks::DS) == SchedBarrierMasks::NONE) {
+ if (!DSWriteSchedGroup) {
+ DSWriteSchedGroup =
+ std::make_unique<SchedGroup>(isDSWriteSGMember, None, DAG);
+ initSchedGroup(DSWriteSchedGroup.get());
+ }
+
+ SchedGroups.push_back(DSWriteSchedGroup.get());
+ }
+}
+
+void SchedBarrierDAGMutation::initSchedGroup(SchedGroup *SG) {
+ assert(SG);
+ for (auto &SU : DAG->SUnits)
+ if (SG->canAddSU(SU, TII))
+ SG->add(SU);
+}
+
+void SchedBarrierDAGMutation::resetSchedBarrierEdges(SUnit &SU) {
+ assert(SU.getInstr()->getOpcode() == AMDGPU::SCHED_BARRIER);
+ for (auto &P : SU.Preds)
+ SU.removePred(P);
+
+ for (auto &S : SU.Succs) {
+ for (auto &SP : S.getSUnit()->Preds) {
+ if (SP.getSUnit() == &SU) {
+ S.getSUnit()->removePred(SP);
+ }
+ }
+ }
+}
+
+} // namespace
+
+namespace llvm {
+
+std::unique_ptr<ScheduleDAGMutation> createIGroupLPDAGMutation() {
+ return EnableIGroupLP ? std::make_unique<IGroupLPDAGMutation>() : nullptr;
+}
+
+std::unique_ptr<ScheduleDAGMutation> createSchedBarrierDAGMutation() {
+ return std::make_unique<SchedBarrierDAGMutation>();
+}
+
+} // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.h b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.h
new file mode 100644
index 000000000000..aeb1bbad3705
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.h
@@ -0,0 +1,22 @@
+//===- AMDGPUMFMAIGroupLP.h - AMDGPU MFMA IGroupLP --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMFMAIGROUPLP_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPUMFMAIGROUPLP_H
+
+#include "llvm/CodeGen/ScheduleDAGMutation.h"
+#include <memory>
+
+namespace llvm {
+
+std::unique_ptr<ScheduleDAGMutation> createIGroupLPDAGMutation();
+std::unique_ptr<ScheduleDAGMutation> createSchedBarrierDAGMutation();
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUMFMAIGROUPLP_H
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 8236e6672247..b00df27f5fd3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -13,7 +13,9 @@
#include "AMDGPUISelDAGToDAG.h"
#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
#include "AMDGPUTargetMachine.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "MCTargetDesc/R600MCTargetDesc.h"
#include "R600RegisterInfo.h"
#include "SIMachineFunctionInfo.h"
@@ -679,9 +681,6 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
case ISD::FMA:
SelectFMAD_FMA(N);
return;
- case AMDGPUISD::ATOMIC_CMP_SWAP:
- SelectATOMIC_CMP_SWAP(N);
- return;
case AMDGPUISD::CVT_PKRTZ_F16_F32:
case AMDGPUISD::CVT_PKNORM_I16_F32:
case AMDGPUISD::CVT_PKNORM_U16_F32:
@@ -1008,7 +1007,12 @@ void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
SDLoc SL(N);
bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
- unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
+ unsigned Opc;
+ if (Subtarget->getGeneration() == AMDGPUSubtarget::GFX11)
+ Opc = Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
+ : AMDGPU::V_MAD_U64_U32_gfx11_e64;
+ else
+ Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
@@ -1021,7 +1025,12 @@ void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
void AMDGPUDAGToDAGISel::SelectMUL_LOHI(SDNode *N) {
SDLoc SL(N);
bool Signed = N->getOpcode() == ISD::SMUL_LOHI;
- unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
+ unsigned Opc;
+ if (Subtarget->getGeneration() == AMDGPUSubtarget::GFX11)
+ Opc = Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
+ : AMDGPU::V_MAD_U64_U32_gfx11_e64;
+ else
+ Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
SDValue Zero = CurDAG->getTargetConstant(0, SL, MVT::i64);
SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
@@ -1798,6 +1807,82 @@ bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr,
return true;
}
+// Check whether the flat scratch SVS swizzle bug affects this access.
+bool AMDGPUDAGToDAGISel::checkFlatScratchSVSSwizzleBug(
+ SDValue VAddr, SDValue SAddr, uint64_t ImmOffset) const {
+ if (!Subtarget->hasFlatScratchSVSSwizzleBug())
+ return false;
+
+ // The bug affects the swizzling of SVS accesses if there is any carry out
+ // from the two low order bits (i.e. from bit 1 into bit 2) when adding
+ // voffset to (soffset + inst_offset).
+ KnownBits VKnown = CurDAG->computeKnownBits(VAddr);
+ KnownBits SKnown = KnownBits::computeForAddSub(
+ true, false, CurDAG->computeKnownBits(SAddr),
+ KnownBits::makeConstant(APInt(32, ImmOffset)));
+ uint64_t VMax = VKnown.getMaxValue().getZExtValue();
+ uint64_t SMax = SKnown.getMaxValue().getZExtValue();
+ return (VMax & 3) + (SMax & 3) >= 4;
+}
+
+bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
+ SDValue &VAddr, SDValue &SAddr,
+ SDValue &Offset) const {
+ int64_t ImmOffset = 0;
+
+ SDValue LHS, RHS;
+ if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {
+ int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
+ const SIInstrInfo *TII = Subtarget->getInstrInfo();
+
+ if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, true)) {
+ Addr = LHS;
+ ImmOffset = COffsetVal;
+ } else if (!LHS->isDivergent() && COffsetVal > 0) {
+ SDLoc SL(N);
+ // saddr + large_offset -> saddr + (vaddr = large_offset & ~MaxOffset) +
+ // (large_offset & MaxOffset);
+ int64_t SplitImmOffset, RemainderOffset;
+ std::tie(SplitImmOffset, RemainderOffset)
+ = TII->splitFlatOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, true);
+
+ if (isUInt<32>(RemainderOffset)) {
+ SDNode *VMov = CurDAG->getMachineNode(
+ AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
+ CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
+ VAddr = SDValue(VMov, 0);
+ SAddr = LHS;
+ if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
+ return false;
+ Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);
+ return true;
+ }
+ }
+ }
+
+ if (Addr.getOpcode() != ISD::ADD)
+ return false;
+
+ LHS = Addr.getOperand(0);
+ RHS = Addr.getOperand(1);
+
+ if (!LHS->isDivergent() && RHS->isDivergent()) {
+ SAddr = LHS;
+ VAddr = RHS;
+ } else if (!RHS->isDivergent() && LHS->isDivergent()) {
+ SAddr = RHS;
+ VAddr = LHS;
+ } else {
+ return false;
+ }
+
+ if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
+ return false;
+ SAddr = SelectSAddrFI(CurDAG, SAddr);
+ Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
+ return true;
+}
+
bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
SDValue &Offset, bool &Imm) const {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
@@ -2224,70 +2309,6 @@ void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
}
}
-// This is here because there isn't a way to use the generated sub0_sub1 as the
-// subreg index to EXTRACT_SUBREG in tablegen.
-void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
- MemSDNode *Mem = cast<MemSDNode>(N);
- unsigned AS = Mem->getAddressSpace();
- if (AS == AMDGPUAS::FLAT_ADDRESS) {
- SelectCode(N);
- return;
- }
-
- MVT VT = N->getSimpleValueType(0);
- bool Is32 = (VT == MVT::i32);
- SDLoc SL(N);
-
- MachineSDNode *CmpSwap = nullptr;
- if (Subtarget->hasAddr64()) {
- SDValue SRsrc, VAddr, SOffset, Offset;
-
- if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset)) {
- unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN :
- AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN;
- SDValue CmpVal = Mem->getOperand(2);
- SDValue CPol = CurDAG->getTargetConstant(AMDGPU::CPol::GLC, SL, MVT::i32);
-
- // XXX - Do we care about glue operands?
-
- SDValue Ops[] = {CmpVal, VAddr, SRsrc, SOffset, Offset, CPol,
- Mem->getChain()};
-
- CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
- }
- }
-
- if (!CmpSwap) {
- SDValue SRsrc, SOffset, Offset;
- if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset)) {
- unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN :
- AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN;
-
- SDValue CmpVal = Mem->getOperand(2);
- SDValue CPol = CurDAG->getTargetConstant(AMDGPU::CPol::GLC, SL, MVT::i32);
- SDValue Ops[] = {CmpVal, SRsrc, SOffset, Offset, CPol, Mem->getChain()};
-
- CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
- }
- }
-
- if (!CmpSwap) {
- SelectCode(N);
- return;
- }
-
- MachineMemOperand *MMO = Mem->getMemOperand();
- CurDAG->setNodeMemRefs(CmpSwap, {MMO});
-
- unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
- SDValue Extract
- = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
-
- ReplaceUses(SDValue(N, 0), Extract);
- ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
- CurDAG->RemoveDeadNode(N);
-}
-
void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) {
// The address is assumed to be uniform, so if it ends up in a VGPR, it will
// be copied to an SGPR with readfirstlane.
@@ -2587,6 +2608,30 @@ bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
return true;
}
+bool AMDGPUDAGToDAGISel::SelectVINTERPModsImpl(SDValue In, SDValue &Src,
+ SDValue &SrcMods,
+ bool OpSel) const {
+ unsigned Mods;
+ if (SelectVOP3ModsImpl(In, Src, Mods, /* AllowAbs */ false)) {
+ if (OpSel)
+ Mods |= SISrcMods::OP_SEL_0;
+ SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
+ return true;
+ }
+
+ return false;
+}
+
+bool AMDGPUDAGToDAGISel::SelectVINTERPMods(SDValue In, SDValue &Src,
+ SDValue &SrcMods) const {
+ return SelectVINTERPModsImpl(In, Src, SrcMods, /* OpSel */ false);
+}
+
+bool AMDGPUDAGToDAGISel::SelectVINTERPModsHi(SDValue In, SDValue &Src,
+ SDValue &SrcMods) const {
+ return SelectVINTERPModsImpl(In, Src, SrcMods, /* OpSel */ true);
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
SDValue &SrcMods, SDValue &Clamp,
SDValue &Omod) const {
@@ -2619,7 +2664,7 @@ bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
}
bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
- SDValue &SrcMods) const {
+ SDValue &SrcMods, bool IsDOT) const {
unsigned Mods = 0;
Src = In;
@@ -2628,7 +2673,8 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
Src = Src.getOperand(0);
}
- if (Src.getOpcode() == ISD::BUILD_VECTOR) {
+ if (Src.getOpcode() == ISD::BUILD_VECTOR &&
+ (!IsDOT || !Subtarget->hasDOTOpSelHazard())) {
unsigned VecMods = Mods;
SDValue Lo = stripBitcast(Src.getOperand(0));
@@ -2716,6 +2762,40 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
return true;
}
+bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In, SDValue &Src,
+ SDValue &SrcMods) const {
+ return SelectVOP3PMods(In, Src, SrcMods, true);
+}
+
+bool AMDGPUDAGToDAGISel::SelectDotIUVOP3PMods(SDValue In, SDValue &Src) const {
+ const ConstantSDNode *C = cast<ConstantSDNode>(In);
+ // Literal i1 value set in intrinsic, represents SrcMods for the next operand.
+ // 1 promotes packed values to signed, 0 treats them as unsigned.
+ assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value");
+
+ unsigned Mods = SISrcMods::OP_SEL_1;
+ unsigned SrcSign = C->getAPIntValue().getZExtValue();
+ if (SrcSign == 1)
+ Mods ^= SISrcMods::NEG;
+
+ Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
+ return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
+ SDValue &Src) const {
+ const ConstantSDNode *C = cast<ConstantSDNode>(In);
+ assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value");
+
+ unsigned Mods = SISrcMods::OP_SEL_1;
+ unsigned SrcVal = C->getAPIntValue().getZExtValue();
+ if (SrcVal == 1)
+ Mods |= SISrcMods::OP_SEL_0;
+
+ Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
+ return true;
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
Src = In;
@@ -2840,7 +2920,7 @@ bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
}
}
}
- // If "AllUsesAcceptSReg == false" so far we haven't suceeded
+ // If "AllUsesAcceptSReg == false" so far we haven't succeeded
// commuting current user. This means have at least one use
// that strictly require VGPR. Thus, we will not attempt to commute
// other user instructions.
@@ -2854,26 +2934,15 @@ bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode * N) const {
auto Ld = cast<LoadSDNode>(N);
- return Ld->getAlignment() >= 4 &&
- (
- (
- (
- Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
- Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT
- )
- &&
- !N->isDivergent()
- )
- ||
- (
- Subtarget->getScalarizeGlobalBehavior() &&
- Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
- Ld->isSimple() &&
- !N->isDivergent() &&
- static_cast<const SITargetLowering *>(
- getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)
- )
- );
+ return Ld->getAlign() >= Align(4) &&
+ (((Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
+ Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
+ !N->isDivergent()) ||
+ (Subtarget->getScalarizeGlobalBehavior() &&
+ Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
+ Ld->isSimple() && !N->isDivergent() &&
+ static_cast<const SITargetLowering *>(getTargetLowering())
+ ->isMemOpHasNoClobberedMemOperand(N)));
}
void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index d638d9877a9b..862be9dc5568 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -188,6 +188,10 @@ private:
SDValue &VOffset, SDValue &Offset) const;
bool SelectScratchSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
SDValue &Offset) const;
+ bool checkFlatScratchSVSSwizzleBug(SDValue VAddr, SDValue SAddr,
+ uint64_t ImmOffset) const;
+ bool SelectScratchSVAddr(SDNode *N, SDValue Addr, SDValue &VAddr,
+ SDValue &SAddr, SDValue &Offset) const;
bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
bool &Imm) const;
@@ -214,10 +218,20 @@ private:
bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
SDValue &Clamp, SDValue &Omod) const;
+ bool SelectVINTERPModsImpl(SDValue In, SDValue &Src, SDValue &SrcMods,
+ bool OpSel) const;
+ bool SelectVINTERPMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVINTERPModsHi(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+
bool SelectVOP3OMods(SDValue In, SDValue &Src, SDValue &Clamp,
SDValue &Omod) const;
- bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods,
+ bool IsDOT = false) const;
+ bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+
+ bool SelectDotIUVOP3PMods(SDValue In, SDValue &Src) const;
+ bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
@@ -245,7 +259,6 @@ private:
bool isCBranchSCC(const SDNode *N) const;
void SelectBRCOND(SDNode *N);
void SelectFMAD_FMA(SDNode *N);
- void SelectATOMIC_CMP_SWAP(SDNode *N);
void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
void SelectDS_GWS(SDNode *N, unsigned IntrID);
void SelectInterpP1F16(SDNode *N);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index b9d0655feef7..ef7929012597 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -19,6 +19,7 @@
#include "GCNSubtarget.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/Support/CommandLine.h"
@@ -127,49 +128,27 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
// There are no 64-bit extloads. These should be done as a 32-bit extload and
// an extension to 64-bit.
- for (MVT VT : MVT::integer_valuetypes()) {
- setLoadExtAction(ISD::EXTLOAD, MVT::i64, VT, Expand);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i64, VT, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i64, VT, Expand);
- }
+ for (MVT VT : MVT::integer_valuetypes())
+ setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i64, VT,
+ Expand);
for (MVT VT : MVT::integer_valuetypes()) {
if (VT == MVT::i64)
continue;
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Legal);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Legal);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand);
-
- setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
- setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Legal);
- setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Legal);
- setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i32, Expand);
-
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Legal);
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Legal);
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::i32, Expand);
+ for (auto Op : {ISD::SEXTLOAD, ISD::ZEXTLOAD, ISD::EXTLOAD}) {
+ setLoadExtAction(Op, VT, MVT::i1, Promote);
+ setLoadExtAction(Op, VT, MVT::i8, Legal);
+ setLoadExtAction(Op, VT, MVT::i16, Legal);
+ setLoadExtAction(Op, VT, MVT::i32, Expand);
+ }
}
- for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Expand);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i8, Expand);
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Expand);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v4i8, Expand);
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Expand);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i16, Expand);
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::v3i16, Expand);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v3i16, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v3i16, Expand);
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Expand);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v4i16, Expand);
- }
+ for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
+ for (auto MemVT :
+ {MVT::v2i8, MVT::v4i8, MVT::v2i16, MVT::v3i16, MVT::v4i16})
+ setLoadExtAction({ISD::SEXTLOAD, ISD::ZEXTLOAD, ISD::EXTLOAD}, VT, MemVT,
+ Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand);
@@ -304,229 +283,125 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setTruncStoreAction(MVT::v16i64, MVT::v16i8, Expand);
setTruncStoreAction(MVT::v16i64, MVT::v16i1, Expand);
- setOperationAction(ISD::Constant, MVT::i32, Legal);
- setOperationAction(ISD::Constant, MVT::i64, Legal);
- setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
- setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
+ setOperationAction(ISD::Constant, {MVT::i32, MVT::i64}, Legal);
+ setOperationAction(ISD::ConstantFP, {MVT::f32, MVT::f64}, Legal);
- setOperationAction(ISD::BR_JT, MVT::Other, Expand);
- setOperationAction(ISD::BRIND, MVT::Other, Expand);
+ setOperationAction({ISD::BR_JT, ISD::BRIND}, MVT::Other, Expand);
// This is totally unsupported, just custom lower to produce an error.
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
// Library functions. These default to Expand, but we have instructions
// for them.
- setOperationAction(ISD::FCEIL, MVT::f32, Legal);
- setOperationAction(ISD::FEXP2, MVT::f32, Legal);
- setOperationAction(ISD::FPOW, MVT::f32, Legal);
- setOperationAction(ISD::FLOG2, MVT::f32, Legal);
- setOperationAction(ISD::FABS, MVT::f32, Legal);
- setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
- setOperationAction(ISD::FRINT, MVT::f32, Legal);
- setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
- setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
- setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
+ setOperationAction({ISD::FCEIL, ISD::FEXP2, ISD::FPOW, ISD::FLOG2, ISD::FABS,
+ ISD::FFLOOR, ISD::FRINT, ISD::FTRUNC, ISD::FMINNUM,
+ ISD::FMAXNUM},
+ MVT::f32, Legal);
- setOperationAction(ISD::FROUND, MVT::f32, Custom);
- setOperationAction(ISD::FROUND, MVT::f64, Custom);
+ setOperationAction(ISD::FROUND, {MVT::f32, MVT::f64}, Custom);
- setOperationAction(ISD::FLOG, MVT::f32, Custom);
- setOperationAction(ISD::FLOG10, MVT::f32, Custom);
- setOperationAction(ISD::FEXP, MVT::f32, Custom);
+ setOperationAction({ISD::FLOG, ISD::FLOG10, ISD::FEXP}, MVT::f32, Custom);
+ setOperationAction(ISD::FNEARBYINT, {MVT::f32, MVT::f64}, Custom);
- setOperationAction(ISD::FNEARBYINT, MVT::f32, Custom);
- setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom);
-
- setOperationAction(ISD::FREM, MVT::f16, Custom);
- setOperationAction(ISD::FREM, MVT::f32, Custom);
- setOperationAction(ISD::FREM, MVT::f64, Custom);
+ setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, Custom);
// Expand to fneg + fadd.
setOperationAction(ISD::FSUB, MVT::f64, Expand);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v3i32, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v3f32, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v5i32, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v5f32, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v6i32, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v6f32, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v7i32, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v7f32, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i32, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f32, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f16, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i16, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f16, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i16, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v3f32, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v3i32, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f32, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v5f32, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v5i32, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v6f32, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v6i32, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v7f32, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v7i32, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f32, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8i32, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16f32, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16i32, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32f32, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32i32, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f64, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i64, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v3f64, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v3i64, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f64, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i64, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f64, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8i64, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16f64, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16i64, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS,
+ {MVT::v3i32, MVT::v3f32, MVT::v4i32, MVT::v4f32,
+ MVT::v5i32, MVT::v5f32, MVT::v6i32, MVT::v6f32,
+ MVT::v7i32, MVT::v7f32, MVT::v8i32, MVT::v8f32},
+ Custom);
+ setOperationAction(
+ ISD::EXTRACT_SUBVECTOR,
+ {MVT::v2f16, MVT::v2i16, MVT::v4f16, MVT::v4i16, MVT::v2f32,
+ MVT::v2i32, MVT::v3f32, MVT::v3i32, MVT::v4f32, MVT::v4i32,
+ MVT::v5f32, MVT::v5i32, MVT::v6f32, MVT::v6i32, MVT::v7f32,
+ MVT::v7i32, MVT::v8f32, MVT::v8i32, MVT::v16f16, MVT::v16i16,
+ MVT::v16f32, MVT::v16i32, MVT::v32f32, MVT::v32i32, MVT::v2f64,
+ MVT::v2i64, MVT::v3f64, MVT::v3i64, MVT::v4f64, MVT::v4i64,
+ MVT::v8f64, MVT::v8i64, MVT::v16f64, MVT::v16i64},
+ Custom);
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
- setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom);
- setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom);
+ setOperationAction(ISD::FP_TO_FP16, {MVT::f64, MVT::f32}, Custom);
const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
for (MVT VT : ScalarIntVTs) {
// These should use [SU]DIVREM, so set them to expand
- setOperationAction(ISD::SDIV, VT, Expand);
- setOperationAction(ISD::UDIV, VT, Expand);
- setOperationAction(ISD::SREM, VT, Expand);
- setOperationAction(ISD::UREM, VT, Expand);
+ setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, VT,
+ Expand);
// GPU does not have divrem function for signed or unsigned.
- setOperationAction(ISD::SDIVREM, VT, Custom);
- setOperationAction(ISD::UDIVREM, VT, Custom);
+ setOperationAction({ISD::SDIVREM, ISD::UDIVREM}, VT, Custom);
// GPU does not have [S|U]MUL_LOHI functions as a single instruction.
- setOperationAction(ISD::SMUL_LOHI, VT, Expand);
- setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+ setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT, Expand);
- setOperationAction(ISD::BSWAP, VT, Expand);
- setOperationAction(ISD::CTTZ, VT, Expand);
- setOperationAction(ISD::CTLZ, VT, Expand);
+ setOperationAction({ISD::BSWAP, ISD::CTTZ, ISD::CTLZ}, VT, Expand);
// AMDGPU uses ADDC/SUBC/ADDE/SUBE
- setOperationAction(ISD::ADDC, VT, Legal);
- setOperationAction(ISD::SUBC, VT, Legal);
- setOperationAction(ISD::ADDE, VT, Legal);
- setOperationAction(ISD::SUBE, VT, Legal);
+ setOperationAction({ISD::ADDC, ISD::SUBC, ISD::ADDE, ISD::SUBE}, VT, Legal);
}
// The hardware supports 32-bit FSHR, but not FSHL.
setOperationAction(ISD::FSHR, MVT::i32, Legal);
// The hardware supports 32-bit ROTR, but not ROTL.
- setOperationAction(ISD::ROTL, MVT::i32, Expand);
- setOperationAction(ISD::ROTL, MVT::i64, Expand);
+ setOperationAction(ISD::ROTL, {MVT::i32, MVT::i64}, Expand);
setOperationAction(ISD::ROTR, MVT::i64, Expand);
- setOperationAction(ISD::MULHU, MVT::i16, Expand);
- setOperationAction(ISD::MULHS, MVT::i16, Expand);
+ setOperationAction({ISD::MULHU, ISD::MULHS}, MVT::i16, Expand);
- setOperationAction(ISD::MUL, MVT::i64, Expand);
- setOperationAction(ISD::MULHU, MVT::i64, Expand);
- setOperationAction(ISD::MULHS, MVT::i64, Expand);
- setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
- setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
- setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
+ setOperationAction({ISD::MUL, ISD::MULHU, ISD::MULHS}, MVT::i64, Expand);
+ setOperationAction(
+ {ISD::UINT_TO_FP, ISD::SINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT},
+ MVT::i64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
- setOperationAction(ISD::SMIN, MVT::i32, Legal);
- setOperationAction(ISD::UMIN, MVT::i32, Legal);
- setOperationAction(ISD::SMAX, MVT::i32, Legal);
- setOperationAction(ISD::UMAX, MVT::i32, Legal);
+ setOperationAction({ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX}, MVT::i32,
+ Legal);
- setOperationAction(ISD::CTTZ, MVT::i64, Custom);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Custom);
- setOperationAction(ISD::CTLZ, MVT::i64, Custom);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
+ setOperationAction(
+ {ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF},
+ MVT::i64, Custom);
static const MVT::SimpleValueType VectorIntTypes[] = {
MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32, MVT::v6i32, MVT::v7i32};
for (MVT VT : VectorIntTypes) {
// Expand the following operations for the current type by default.
- setOperationAction(ISD::ADD, VT, Expand);
- setOperationAction(ISD::AND, VT, Expand);
- setOperationAction(ISD::FP_TO_SINT, VT, Expand);
- setOperationAction(ISD::FP_TO_UINT, VT, Expand);
- setOperationAction(ISD::MUL, VT, Expand);
- setOperationAction(ISD::MULHU, VT, Expand);
- setOperationAction(ISD::MULHS, VT, Expand);
- setOperationAction(ISD::OR, VT, Expand);
- setOperationAction(ISD::SHL, VT, Expand);
- setOperationAction(ISD::SRA, VT, Expand);
- setOperationAction(ISD::SRL, VT, Expand);
- setOperationAction(ISD::ROTL, VT, Expand);
- setOperationAction(ISD::ROTR, VT, Expand);
- setOperationAction(ISD::SUB, VT, Expand);
- setOperationAction(ISD::SINT_TO_FP, VT, Expand);
- setOperationAction(ISD::UINT_TO_FP, VT, Expand);
- setOperationAction(ISD::SDIV, VT, Expand);
- setOperationAction(ISD::UDIV, VT, Expand);
- setOperationAction(ISD::SREM, VT, Expand);
- setOperationAction(ISD::UREM, VT, Expand);
- setOperationAction(ISD::SMUL_LOHI, VT, Expand);
- setOperationAction(ISD::UMUL_LOHI, VT, Expand);
- setOperationAction(ISD::SDIVREM, VT, Expand);
- setOperationAction(ISD::UDIVREM, VT, Expand);
- setOperationAction(ISD::SELECT, VT, Expand);
- setOperationAction(ISD::VSELECT, VT, Expand);
- setOperationAction(ISD::SELECT_CC, VT, Expand);
- setOperationAction(ISD::XOR, VT, Expand);
- setOperationAction(ISD::BSWAP, VT, Expand);
- setOperationAction(ISD::CTPOP, VT, Expand);
- setOperationAction(ISD::CTTZ, VT, Expand);
- setOperationAction(ISD::CTLZ, VT, Expand);
- setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
- setOperationAction(ISD::SETCC, VT, Expand);
+ setOperationAction({ISD::ADD, ISD::AND, ISD::FP_TO_SINT,
+ ISD::FP_TO_UINT, ISD::MUL, ISD::MULHU,
+ ISD::MULHS, ISD::OR, ISD::SHL,
+ ISD::SRA, ISD::SRL, ISD::ROTL,
+ ISD::ROTR, ISD::SUB, ISD::SINT_TO_FP,
+ ISD::UINT_TO_FP, ISD::SDIV, ISD::UDIV,
+ ISD::SREM, ISD::UREM, ISD::SMUL_LOHI,
+ ISD::UMUL_LOHI, ISD::SDIVREM, ISD::UDIVREM,
+ ISD::SELECT, ISD::VSELECT, ISD::SELECT_CC,
+ ISD::XOR, ISD::BSWAP, ISD::CTPOP,
+ ISD::CTTZ, ISD::CTLZ, ISD::VECTOR_SHUFFLE,
+ ISD::SETCC},
+ VT, Expand);
}
static const MVT::SimpleValueType FloatVectorTypes[] = {
MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32, MVT::v6f32, MVT::v7f32};
for (MVT VT : FloatVectorTypes) {
- setOperationAction(ISD::FABS, VT, Expand);
- setOperationAction(ISD::FMINNUM, VT, Expand);
- setOperationAction(ISD::FMAXNUM, VT, Expand);
- setOperationAction(ISD::FADD, VT, Expand);
- setOperationAction(ISD::FCEIL, VT, Expand);
- setOperationAction(ISD::FCOS, VT, Expand);
- setOperationAction(ISD::FDIV, VT, Expand);
- setOperationAction(ISD::FEXP2, VT, Expand);
- setOperationAction(ISD::FEXP, VT, Expand);
- setOperationAction(ISD::FLOG2, VT, Expand);
- setOperationAction(ISD::FREM, VT, Expand);
- setOperationAction(ISD::FLOG, VT, Expand);
- setOperationAction(ISD::FLOG10, VT, Expand);
- setOperationAction(ISD::FPOW, VT, Expand);
- setOperationAction(ISD::FFLOOR, VT, Expand);
- setOperationAction(ISD::FTRUNC, VT, Expand);
- setOperationAction(ISD::FMUL, VT, Expand);
- setOperationAction(ISD::FMA, VT, Expand);
- setOperationAction(ISD::FRINT, VT, Expand);
- setOperationAction(ISD::FNEARBYINT, VT, Expand);
- setOperationAction(ISD::FSQRT, VT, Expand);
- setOperationAction(ISD::FSIN, VT, Expand);
- setOperationAction(ISD::FSUB, VT, Expand);
- setOperationAction(ISD::FNEG, VT, Expand);
- setOperationAction(ISD::VSELECT, VT, Expand);
- setOperationAction(ISD::SELECT_CC, VT, Expand);
- setOperationAction(ISD::FCOPYSIGN, VT, Expand);
- setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
- setOperationAction(ISD::SETCC, VT, Expand);
- setOperationAction(ISD::FCANONICALIZE, VT, Expand);
+ setOperationAction(
+ {ISD::FABS, ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD,
+ ISD::FCEIL, ISD::FCOS, ISD::FDIV, ISD::FEXP2,
+ ISD::FEXP, ISD::FLOG2, ISD::FREM, ISD::FLOG,
+ ISD::FLOG10, ISD::FPOW, ISD::FFLOOR, ISD::FTRUNC,
+ ISD::FMUL, ISD::FMA, ISD::FRINT, ISD::FNEARBYINT,
+ ISD::FSQRT, ISD::FSIN, ISD::FSUB, ISD::FNEG,
+ ISD::VSELECT, ISD::SELECT_CC, ISD::FCOPYSIGN, ISD::VECTOR_SHUFFLE,
+ ISD::SETCC, ISD::FCANONICALIZE},
+ VT, Expand);
}
// This causes using an unrolled select operation rather than expansion with
@@ -590,26 +465,16 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
if (AMDGPUBypassSlowDiv)
addBypassSlowDiv(64, 32);
- setTargetDAGCombine(ISD::BITCAST);
- setTargetDAGCombine(ISD::SHL);
- setTargetDAGCombine(ISD::SRA);
- setTargetDAGCombine(ISD::SRL);
- setTargetDAGCombine(ISD::TRUNCATE);
- setTargetDAGCombine(ISD::MUL);
- setTargetDAGCombine(ISD::SMUL_LOHI);
- setTargetDAGCombine(ISD::UMUL_LOHI);
- setTargetDAGCombine(ISD::MULHU);
- setTargetDAGCombine(ISD::MULHS);
- setTargetDAGCombine(ISD::SELECT);
- setTargetDAGCombine(ISD::SELECT_CC);
- setTargetDAGCombine(ISD::STORE);
- setTargetDAGCombine(ISD::FADD);
- setTargetDAGCombine(ISD::FSUB);
- setTargetDAGCombine(ISD::FNEG);
- setTargetDAGCombine(ISD::FABS);
- setTargetDAGCombine(ISD::AssertZext);
- setTargetDAGCombine(ISD::AssertSext);
- setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
+ setTargetDAGCombine({ISD::BITCAST, ISD::SHL,
+ ISD::SRA, ISD::SRL,
+ ISD::TRUNCATE, ISD::MUL,
+ ISD::SMUL_LOHI, ISD::UMUL_LOHI,
+ ISD::MULHU, ISD::MULHS,
+ ISD::SELECT, ISD::SELECT_CC,
+ ISD::STORE, ISD::FADD,
+ ISD::FSUB, ISD::FNEG,
+ ISD::FABS, ISD::AssertZext,
+ ISD::AssertSext, ISD::INTRINSIC_WO_CHAIN});
}
bool AMDGPUTargetLowering::mayIgnoreSignedZero(SDValue Op) const {
@@ -785,11 +650,11 @@ bool AMDGPUTargetLowering::shouldReduceLoadWidth(SDNode *N,
unsigned AS = MN->getAddressSpace();
// Do not shrink an aligned scalar load to sub-dword.
// Scalar engine cannot do sub-dword loads.
- if (OldSize >= 32 && NewSize < 32 && MN->getAlignment() >= 4 &&
+ if (OldSize >= 32 && NewSize < 32 && MN->getAlign() >= Align(4) &&
(AS == AMDGPUAS::CONSTANT_ADDRESS ||
AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
- (isa<LoadSDNode>(N) &&
- AS == AMDGPUAS::GLOBAL_ADDRESS && MN->isInvariant())) &&
+ (isa<LoadSDNode>(N) && AS == AMDGPUAS::GLOBAL_ADDRESS &&
+ MN->isInvariant())) &&
AMDGPUInstrInfo::isUniformMMO(MN->getMemOperand()))
return false;
@@ -855,6 +720,8 @@ bool AMDGPUTargetLowering::isSDNodeAlwaysUniform(const SDNode *N) const {
AMDGPUAS::CONSTANT_ADDRESS_32BIT)
return true;
return false;
+ case AMDGPUISD::SETCC: // ballot-style instruction
+ return true;
}
return false;
}
@@ -1072,10 +939,9 @@ void AMDGPUTargetLowering::analyzeFormalArgumentsCompute(
const bool IsByRef = Arg.hasByRefAttr();
Type *BaseArgTy = Arg.getType();
Type *MemArgTy = IsByRef ? Arg.getParamByRefType() : BaseArgTy;
- MaybeAlign Alignment = IsByRef ? Arg.getParamAlign() : None;
- if (!Alignment)
- Alignment = DL.getABITypeAlign(MemArgTy);
- MaxAlign = max(Alignment, MaxAlign);
+ Align Alignment = DL.getValueOrABITypeAlignment(
+ IsByRef ? Arg.getParamAlign() : None, MemArgTy);
+ MaxAlign = std::max(Alignment, MaxAlign);
uint64_t AllocSize = DL.getTypeAllocSize(MemArgTy);
uint64_t ArgOffset = alignTo(ExplicitArgOffset, Alignment) + ExplicitOffset;
@@ -1415,6 +1281,11 @@ SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
(Start == 0 || Start == 4))
return Op;
+ if (((SrcVT == MVT::v16f16 && VT == MVT::v8f16) ||
+ (SrcVT == MVT::v16i16 && VT == MVT::v8i16)) &&
+ (Start == 0 || Start == 8))
+ return Op;
+
DAG.ExtractVectorElements(Op.getOperand(0), Args, Start,
VT.getVectorNumElements());
@@ -1589,8 +1460,8 @@ SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op,
std::tie(Lo, Hi) = splitVector(Op, SL, LoVT, HiVT, DAG);
unsigned Size = LoMemVT.getStoreSize();
- unsigned BaseAlign = Load->getAlignment();
- unsigned HiAlign = MinAlign(BaseAlign, Size);
+ Align BaseAlign = Load->getAlign();
+ Align HiAlign = commonAlignment(BaseAlign, Size);
SDValue LoLoad = DAG.getExtLoad(Load->getExtensionType(), SL, LoVT,
Load->getChain(), BasePtr, SrcValue, LoMemVT,
@@ -1628,13 +1499,13 @@ SDValue AMDGPUTargetLowering::WidenOrSplitVectorLoad(SDValue Op,
EVT MemVT = Load->getMemoryVT();
SDLoc SL(Op);
const MachinePointerInfo &SrcValue = Load->getMemOperand()->getPointerInfo();
- unsigned BaseAlign = Load->getAlignment();
+ Align BaseAlign = Load->getAlign();
unsigned NumElements = MemVT.getVectorNumElements();
// Widen from vec3 to vec4 when the load is at least 8-byte aligned
// or 16-byte fully dereferenceable. Otherwise, split the vector load.
if (NumElements != 3 ||
- (BaseAlign < 8 &&
+ (BaseAlign < Align(8) &&
!SrcValue.isDereferenceable(16, *DAG.getContext(), DAG.getDataLayout())))
return SplitVectorLoad(Op, DAG);
@@ -1681,9 +1552,9 @@ SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
SDValue HiPtr = DAG.getObjectPtrOffset(SL, BasePtr, LoMemVT.getStoreSize());
const MachinePointerInfo &SrcValue = Store->getMemOperand()->getPointerInfo();
- unsigned BaseAlign = Store->getAlignment();
+ Align BaseAlign = Store->getAlign();
unsigned Size = LoMemVT.getStoreSize();
- unsigned HiAlign = MinAlign(BaseAlign, Size);
+ Align HiAlign = commonAlignment(BaseAlign, Size);
SDValue LoStore =
DAG.getTruncStore(Chain, SL, Lo, BasePtr, SrcValue, LoMemVT, BaseAlign,
@@ -3003,12 +2874,11 @@ SDValue AMDGPUTargetLowering::performLoadCombine(SDNode *N,
// the bytes again are not eliminated in the case of an unaligned copy.
if (!allowsMisalignedMemoryAccesses(
VT, AS, Alignment, LN->getMemOperand()->getFlags(), &IsFast)) {
- SDValue Ops[2];
-
if (VT.isVector())
- std::tie(Ops[0], Ops[1]) = scalarizeVectorLoad(LN, DAG);
- else
- std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(LN, DAG);
+ return SplitVectorLoad(SDValue(LN, 0), DAG);
+
+ SDValue Ops[2];
+ std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(LN, DAG);
return DAG.getMergeValues(Ops, SDLoc(N));
}
@@ -3059,7 +2929,7 @@ SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
if (!allowsMisalignedMemoryAccesses(
VT, AS, Alignment, SN->getMemOperand()->getFlags(), &IsFast)) {
if (VT.isVector())
- return scalarizeVectorStore(SN, DAG);
+ return SplitVectorStore(SDValue(SN, 0), DAG);
return expandUnalignedStore(SN, DAG);
}
@@ -3281,8 +3151,9 @@ SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N,
// this improves the ability to match BFE patterns in isel.
if (LHS.getOpcode() == ISD::AND) {
if (auto *Mask = dyn_cast<ConstantSDNode>(LHS.getOperand(1))) {
- if (Mask->getAPIntValue().isShiftedMask() &&
- Mask->getAPIntValue().countTrailingZeros() == ShiftAmt) {
+ unsigned MaskIdx, MaskLen;
+ if (Mask->getAPIntValue().isShiftedMask(MaskIdx, MaskLen) &&
+ MaskIdx == ShiftAmt) {
return DAG.getNode(
ISD::AND, SL, VT,
DAG.getNode(ISD::SRL, SL, VT, LHS.getOperand(0), N->getOperand(1)),
@@ -4380,10 +4251,14 @@ uint32_t AMDGPUTargetLowering::getImplicitParameterOffset(
uint64_t ArgOffset = alignTo(MFI->getExplicitKernArgSize(), Alignment) +
ExplicitArgOffset;
switch (Param) {
- case GRID_DIM:
+ case FIRST_IMPLICIT:
return ArgOffset;
- case GRID_OFFSET:
- return ArgOffset + 4;
+ case PRIVATE_BASE:
+ return ArgOffset + AMDGPU::ImplicitArg::PRIVATE_BASE_OFFSET;
+ case SHARED_BASE:
+ return ArgOffset + AMDGPU::ImplicitArg::SHARED_BASE_OFFSET;
+ case QUEUE_PTR:
+ return ArgOffset + AMDGPU::ImplicitArg::QUEUE_PTR_OFFSET;
}
llvm_unreachable("unexpected implicit parameter type");
}
@@ -4405,7 +4280,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(TC_RETURN)
NODE_NAME_CASE(TRAP)
NODE_NAME_CASE(RET_FLAG)
- NODE_NAME_CASE(RET_GFX_FLAG)
NODE_NAME_CASE(RETURN_TO_EPILOG)
NODE_NAME_CASE(ENDPGM)
NODE_NAME_CASE(DWORDADDR)
@@ -4485,6 +4359,8 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(CONST_DATA_PTR)
NODE_NAME_CASE(PC_ADD_REL_OFFSET)
NODE_NAME_CASE(LDS)
+ NODE_NAME_CASE(FPTRUNC_ROUND_UPWARD)
+ NODE_NAME_CASE(FPTRUNC_ROUND_DOWNWARD)
NODE_NAME_CASE(DUMMY_CHAIN)
case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
NODE_NAME_CASE(LOAD_D16_HI)
@@ -4580,6 +4456,19 @@ SDValue AMDGPUTargetLowering::getRecipEstimate(SDValue Operand,
return SDValue();
}
+static unsigned workitemIntrinsicDim(unsigned ID) {
+ switch (ID) {
+ case Intrinsic::amdgcn_workitem_id_x:
+ return 0;
+ case Intrinsic::amdgcn_workitem_id_y:
+ return 1;
+ case Intrinsic::amdgcn_workitem_id_z:
+ return 2;
+ default:
+ llvm_unreachable("not a workitem intrinsic");
+ }
+}
+
void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
const SDValue Op, KnownBits &Known,
const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
@@ -4716,6 +4605,14 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
Known.Zero.setHighBits(Size - ST.getWavefrontSizeLog2());
break;
}
+ case Intrinsic::amdgcn_workitem_id_x:
+ case Intrinsic::amdgcn_workitem_id_y:
+ case Intrinsic::amdgcn_workitem_id_z: {
+ unsigned MaxValue = Subtarget->getMaxWorkitemID(
+ DAG.getMachineFunction().getFunction(), workitemIntrinsicDim(IID));
+ Known.Zero.setHighBits(countLeadingZeros(MaxValue));
+ break;
+ }
default:
break;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index b41506157b68..73081483f1c3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -320,8 +320,9 @@ public:
enum ImplicitParameter {
FIRST_IMPLICIT,
- GRID_DIM = FIRST_IMPLICIT,
- GRID_OFFSET,
+ PRIVATE_BASE,
+ SHARED_BASE,
+ QUEUE_PTR,
};
/// Helper function that returns the byte offset of the given
@@ -367,9 +368,6 @@ enum NodeType : unsigned {
// Return with values from a non-entry function.
RET_FLAG,
- // Return with values from a non-entry function (AMDGPU_Gfx CC).
- RET_GFX_FLAG,
-
DWORDADDR,
FRACT,
@@ -483,6 +481,9 @@ enum NodeType : unsigned {
CONST_DATA_PTR,
PC_ADD_REL_OFFSET,
LDS,
+ FPTRUNC_ROUND_UPWARD,
+ FPTRUNC_ROUND_DOWNWARD,
+
DUMMY_CHAIN,
FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
LOAD_D16_HI,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp
new file mode 100644
index 000000000000..c9cdbc89f3a4
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp
@@ -0,0 +1,457 @@
+//===- AMDGPUInsertDelayAlu.cpp - Insert s_delay_alu instructions ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Insert s_delay_alu instructions to avoid stalls on GFX11+.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "GCNSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIInstrInfo.h"
+#include "llvm/ADT/SetVector.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-insert-delay-alu"
+
+namespace {
+
+class AMDGPUInsertDelayAlu : public MachineFunctionPass {
+public:
+ static char ID;
+
+ const SIInstrInfo *SII;
+ const TargetRegisterInfo *TRI;
+
+ TargetSchedModel SchedModel;
+
+ AMDGPUInsertDelayAlu() : MachineFunctionPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ // Return true if MI waits for all outstanding VALU instructions to complete.
+ static bool instructionWaitsForVALU(const MachineInstr &MI) {
+ // These instruction types wait for VA_VDST==0 before issuing.
+ const uint64_t VA_VDST_0 = SIInstrFlags::DS | SIInstrFlags::EXP |
+ SIInstrFlags::FLAT | SIInstrFlags::MIMG |
+ SIInstrFlags::MTBUF | SIInstrFlags::MUBUF;
+ if (MI.getDesc().TSFlags & VA_VDST_0)
+ return true;
+ if (MI.getOpcode() == AMDGPU::S_SENDMSG_RTN_B32 ||
+ MI.getOpcode() == AMDGPU::S_SENDMSG_RTN_B64)
+ return true;
+ if (MI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
+ (MI.getOperand(0).getImm() & 0xf000) == 0)
+ return true;
+ return false;
+ }
+
+ // Types of delay that can be encoded in an s_delay_alu instruction.
+ enum DelayType { VALU, TRANS, SALU, OTHER };
+
+ // Get the delay type for an instruction with the specified TSFlags.
+ static DelayType getDelayType(uint64_t TSFlags) {
+ if (TSFlags & SIInstrFlags::TRANS)
+ return TRANS;
+ if (TSFlags & SIInstrFlags::VALU)
+ return VALU;
+ if (TSFlags & SIInstrFlags::SALU)
+ return SALU;
+ return OTHER;
+ }
+
+ // Information about the last instruction(s) that wrote to a particular
+ // regunit. In straight-line code there will only be one such instruction, but
+ // when control flow converges we merge the delay information from each path
+ // to represent the union of the worst-case delays of each type.
+ struct DelayInfo {
+ // One larger than the maximum number of (non-TRANS) VALU instructions we
+ // can encode in an s_delay_alu instruction.
+ static const unsigned VALU_MAX = 5;
+
+ // One larger than the maximum number of TRANS instructions we can encode in
+ // an s_delay_alu instruction.
+ static const unsigned TRANS_MAX = 4;
+
+ // If it was written by a (non-TRANS) VALU, remember how many clock cycles
+ // are left until it completes, and how many other (non-TRANS) VALU we have
+ // seen since it was issued.
+ uint8_t VALUCycles = 0;
+ uint8_t VALUNum = VALU_MAX;
+
+ // If it was written by a TRANS, remember how many clock cycles are left
+ // until it completes, and how many other TRANS we have seen since it was
+ // issued.
+ uint8_t TRANSCycles = 0;
+ uint8_t TRANSNum = TRANS_MAX;
+ // Also remember how many other (non-TRANS) VALU we have seen since it was
+ // issued. When an instruction depends on both a prior TRANS and a prior
+ // non-TRANS VALU, this is used to decide whether to encode a wait for just
+ // one or both of them.
+ uint8_t TRANSNumVALU = VALU_MAX;
+
+ // If it was written by an SALU, remember how many clock cycles are left
+ // until it completes.
+ uint8_t SALUCycles = 0;
+
+ DelayInfo() = default;
+
+ DelayInfo(DelayType Type, unsigned Cycles) {
+ switch (Type) {
+ default:
+ llvm_unreachable("unexpected type");
+ case VALU:
+ VALUCycles = Cycles;
+ VALUNum = 0;
+ break;
+ case TRANS:
+ TRANSCycles = Cycles;
+ TRANSNum = 0;
+ TRANSNumVALU = 0;
+ break;
+ case SALU:
+ SALUCycles = Cycles;
+ break;
+ }
+ }
+
+ bool operator==(const DelayInfo &RHS) const {
+ return VALUCycles == RHS.VALUCycles && VALUNum == RHS.VALUNum &&
+ TRANSCycles == RHS.TRANSCycles && TRANSNum == RHS.TRANSNum &&
+ TRANSNumVALU == RHS.TRANSNumVALU && SALUCycles == RHS.SALUCycles;
+ }
+
+ bool operator!=(const DelayInfo &RHS) const { return !(*this == RHS); }
+
+ // Merge another DelayInfo into this one, to represent the union of the
+ // worst-case delays of each type.
+ void merge(const DelayInfo &RHS) {
+ VALUCycles = std::max(VALUCycles, RHS.VALUCycles);
+ VALUNum = std::min(VALUNum, RHS.VALUNum);
+ TRANSCycles = std::max(TRANSCycles, RHS.TRANSCycles);
+ TRANSNum = std::min(TRANSNum, RHS.TRANSNum);
+ TRANSNumVALU = std::min(TRANSNumVALU, RHS.TRANSNumVALU);
+ SALUCycles = std::max(SALUCycles, RHS.SALUCycles);
+ }
+
+ // Update this DelayInfo after issuing an instruction. IsVALU should be 1
+ // when issuing a (non-TRANS) VALU, else 0. IsTRANS should be 1 when issuing
+ // a TRANS, else 0. Cycles is the number of cycles it takes to issue the
+ // instruction. Return true if there is no longer any useful delay info.
+ bool advance(DelayType Type, unsigned Cycles) {
+ bool Erase = true;
+
+ VALUNum += (Type == VALU);
+ if (VALUNum >= VALU_MAX || VALUCycles <= Cycles) {
+ // Forget about the VALU instruction. It was too far back or has
+ // definitely completed by now.
+ VALUNum = VALU_MAX;
+ VALUCycles = 0;
+ } else {
+ VALUCycles -= Cycles;
+ Erase = false;
+ }
+
+ TRANSNum += (Type == TRANS);
+ TRANSNumVALU += (Type == VALU);
+ if (TRANSNum >= TRANS_MAX || TRANSCycles <= Cycles) {
+ // Forget about any TRANS instruction. It was too far back or has
+ // definitely completed by now.
+ TRANSNum = TRANS_MAX;
+ TRANSNumVALU = VALU_MAX;
+ TRANSCycles = 0;
+ } else {
+ TRANSCycles -= Cycles;
+ Erase = false;
+ }
+
+ if (SALUCycles <= Cycles) {
+ // Forget about any SALU instruction. It has definitely completed by
+ // now.
+ SALUCycles = 0;
+ } else {
+ SALUCycles -= Cycles;
+ Erase = false;
+ }
+
+ return Erase;
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ void dump() const {
+ if (VALUCycles)
+ dbgs() << " VALUCycles=" << (int)VALUCycles;
+ if (VALUNum < VALU_MAX)
+ dbgs() << " VALUNum=" << (int)VALUNum;
+ if (TRANSCycles)
+ dbgs() << " TRANSCycles=" << (int)TRANSCycles;
+ if (TRANSNum < TRANS_MAX)
+ dbgs() << " TRANSNum=" << (int)TRANSNum;
+ if (TRANSNumVALU < VALU_MAX)
+ dbgs() << " TRANSNumVALU=" << (int)TRANSNumVALU;
+ if (SALUCycles)
+ dbgs() << " SALUCycles=" << (int)SALUCycles;
+ }
+#endif
+ };
+
+ // A map from regunits to the delay info for that regunit.
+ struct DelayState : DenseMap<unsigned, DelayInfo> {
+ // Merge another DelayState into this one by merging the delay info for each
+ // regunit.
+ void merge(const DelayState &RHS) {
+ for (const auto &KV : RHS) {
+ iterator It;
+ bool Inserted;
+ std::tie(It, Inserted) = insert(KV);
+ if (!Inserted)
+ It->second.merge(KV.second);
+ }
+ }
+
+ // Advance the delay info for each regunit, erasing any that are no longer
+ // useful.
+ void advance(DelayType Type, unsigned Cycles) {
+ iterator Next;
+ for (auto I = begin(), E = end(); I != E; I = Next) {
+ Next = std::next(I);
+ if (I->second.advance(Type, Cycles))
+ erase(I);
+ }
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ void dump(const TargetRegisterInfo *TRI) const {
+ if (empty()) {
+ dbgs() << " empty\n";
+ return;
+ }
+
+ // Dump DelayInfo for each RegUnit in numerical order.
+ SmallVector<const_iterator, 8> Order;
+ Order.reserve(size());
+ for (const_iterator I = begin(), E = end(); I != E; ++I)
+ Order.push_back(I);
+ llvm::sort(Order, [](const const_iterator &A, const const_iterator &B) {
+ return A->first < B->first;
+ });
+ for (const_iterator I : Order) {
+ dbgs() << " " << printRegUnit(I->first, TRI);
+ I->second.dump();
+ dbgs() << "\n";
+ }
+ }
+#endif
+ };
+
+ // The saved delay state at the end of each basic block.
+ DenseMap<MachineBasicBlock *, DelayState> BlockState;
+
+ // Emit an s_delay_alu instruction if necessary before MI.
+ MachineInstr *emitDelayAlu(MachineInstr &MI, DelayInfo Delay,
+ MachineInstr *LastDelayAlu) {
+ unsigned Imm = 0;
+
+ // Wait for a TRANS instruction.
+ if (Delay.TRANSNum < DelayInfo::TRANS_MAX)
+ Imm |= 4 + Delay.TRANSNum;
+
+ // Wait for a VALU instruction (if it's more recent than any TRANS
+ // instruction that we're also waiting for).
+ if (Delay.VALUNum < DelayInfo::VALU_MAX &&
+ Delay.VALUNum <= Delay.TRANSNumVALU) {
+ if (Imm & 0xf)
+ Imm |= Delay.VALUNum << 7;
+ else
+ Imm |= Delay.VALUNum;
+ }
+
+ // Wait for an SALU instruction.
+ if (Delay.SALUCycles) {
+ if (Imm & 0x780) {
+ // We have already encoded a VALU and a TRANS delay. There's no room in
+ // the encoding for an SALU delay as well, so just drop it.
+ } else if (Imm & 0xf) {
+ Imm |= (Delay.SALUCycles + 8) << 7;
+ } else {
+ Imm |= Delay.SALUCycles + 8;
+ }
+ }
+
+ // Don't emit the s_delay_alu instruction if there's nothing to wait for.
+ if (!Imm)
+ return LastDelayAlu;
+
+ // If we only need to wait for one instruction, try encoding it in the last
+ // s_delay_alu that we emitted.
+ if (!(Imm & 0x780) && LastDelayAlu) {
+ unsigned Skip = 0;
+ for (auto I = MachineBasicBlock::instr_iterator(LastDelayAlu),
+ E = MachineBasicBlock::instr_iterator(MI);
+ ++I != E;) {
+ if (!I->isBundle() && !I->isMetaInstruction())
+ ++Skip;
+ }
+ if (Skip < 6) {
+ MachineOperand &Op = LastDelayAlu->getOperand(0);
+ unsigned LastImm = Op.getImm();
+ assert((LastImm & ~0xf) == 0 &&
+ "Remembered an s_delay_alu with no room for another delay!");
+ LastImm |= Imm << 7 | Skip << 4;
+ Op.setImm(LastImm);
+ return nullptr;
+ }
+ }
+
+ auto &MBB = *MI.getParent();
+ MachineInstr *DelayAlu =
+ BuildMI(MBB, MI, DebugLoc(), SII->get(AMDGPU::S_DELAY_ALU)).addImm(Imm);
+ // Remember the s_delay_alu for next time if there is still room in it to
+ // encode another delay.
+ return (Imm & 0x780) ? nullptr : DelayAlu;
+ }
+
+ bool runOnMachineBasicBlock(MachineBasicBlock &MBB, bool Emit) {
+ DelayState State;
+ for (auto *Pred : MBB.predecessors())
+ State.merge(BlockState[Pred]);
+
+ LLVM_DEBUG(dbgs() << " State at start of " << printMBBReference(MBB)
+ << "\n";
+ State.dump(TRI););
+
+ bool Changed = false;
+ MachineInstr *LastDelayAlu = nullptr;
+
+ // Iterate over the contents of bundles, but don't emit any instructions
+ // inside a bundle.
+ for (auto &MI : MBB.instrs()) {
+ if (MI.isBundle() || MI.isMetaInstruction())
+ continue;
+
+ // Ignore some more instructions that do not generate any code.
+ switch (MI.getOpcode()) {
+ case AMDGPU::SI_RETURN_TO_EPILOG:
+ continue;
+ }
+
+ DelayType Type = getDelayType(MI.getDesc().TSFlags);
+
+ if (instructionWaitsForVALU(MI)) {
+ // Forget about all outstanding VALU delays.
+ State = DelayState();
+ } else if (Type != OTHER) {
+ DelayInfo Delay;
+ // TODO: Scan implicit uses too?
+ for (const auto &Op : MI.explicit_uses()) {
+ if (Op.isReg()) {
+ // One of the operands of the writelane is also the output operand.
+ // This creates the insertion of redundant delays. Hence, we have to
+ // ignore this operand.
+ if (MI.getOpcode() == AMDGPU::V_WRITELANE_B32 && Op.isTied())
+ continue;
+ for (MCRegUnitIterator UI(Op.getReg(), TRI); UI.isValid(); ++UI) {
+ auto It = State.find(*UI);
+ if (It != State.end()) {
+ Delay.merge(It->second);
+ State.erase(*UI);
+ }
+ }
+ }
+ }
+ if (Emit && !MI.isBundledWithPred()) {
+ // TODO: For VALU->SALU delays should we use s_delay_alu or s_nop or
+ // just ignore them?
+ LastDelayAlu = emitDelayAlu(MI, Delay, LastDelayAlu);
+ }
+ }
+
+ if (Type != OTHER) {
+ // TODO: Scan implicit defs too?
+ for (const auto &Op : MI.defs()) {
+ unsigned Latency = SchedModel.computeOperandLatency(
+ &MI, MI.getOperandNo(&Op), nullptr, 0);
+ for (MCRegUnitIterator UI(Op.getReg(), TRI); UI.isValid(); ++UI)
+ State[*UI] = DelayInfo(Type, Latency);
+ }
+ }
+
+ // Advance by the number of cycles it takes to issue this instruction.
+ // TODO: Use a more advanced model that accounts for instructions that
+ // take multiple cycles to issue on a particular pipeline.
+ unsigned Cycles = SIInstrInfo::getNumWaitStates(MI);
+ // TODO: In wave64 mode, double the number of cycles for VALU and VMEM
+ // instructions on the assumption that they will usually have to be issued
+ // twice?
+ State.advance(Type, Cycles);
+
+ LLVM_DEBUG(dbgs() << " State after " << MI; State.dump(TRI););
+ }
+
+ if (Emit) {
+ assert(State == BlockState[&MBB] &&
+ "Basic block state should not have changed on final pass!");
+ } else if (State != BlockState[&MBB]) {
+ BlockState[&MBB] = std::move(State);
+ Changed = true;
+ }
+ return Changed;
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "AMDGPUInsertDelayAlu running on " << MF.getName()
+ << "\n");
+
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ if (!ST.hasDelayAlu())
+ return false;
+
+ SII = ST.getInstrInfo();
+ TRI = ST.getRegisterInfo();
+
+ SchedModel.init(&ST);
+
+ // Calculate the delay state for each basic block, iterating until we reach
+ // a fixed point.
+ SetVector<MachineBasicBlock *> WorkList;
+ for (auto &MBB : reverse(MF))
+ WorkList.insert(&MBB);
+ while (!WorkList.empty()) {
+ auto &MBB = *WorkList.pop_back_val();
+ bool Changed = runOnMachineBasicBlock(MBB, false);
+ if (Changed)
+ WorkList.insert(MBB.succ_begin(), MBB.succ_end());
+ }
+
+ LLVM_DEBUG(dbgs() << "Final pass over all BBs\n");
+
+ // Make one last pass over all basic blocks to emit s_delay_alu
+ // instructions.
+ bool Changed = false;
+ for (auto &MBB : MF)
+ Changed |= runOnMachineBasicBlock(MBB, true);
+ return Changed;
+ }
+};
+
+} // namespace
+
+char AMDGPUInsertDelayAlu::ID = 0;
+
+char &llvm::AMDGPUInsertDelayAluID = AMDGPUInsertDelayAlu::ID;
+
+INITIALIZE_PASS(AMDGPUInsertDelayAlu, DEBUG_TYPE, "AMDGPU Insert Delay ALU",
+ false, false)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 4f1d700bcd84..695093322a01 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -110,33 +110,42 @@ static Value *convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder) {
llvm_unreachable("Should never be called!");
}
-/// Applies Function(II.Args, II.ArgTys) and replaces the intrinsic call with
-/// the modified arguments.
+/// Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with
+/// modified arguments (based on OldIntr) and replaces InstToReplace with
+/// this newly created intrinsic call.
static Optional<Instruction *> modifyIntrinsicCall(
- IntrinsicInst &II, unsigned NewIntr, InstCombiner &IC,
+ IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr,
+ InstCombiner &IC,
std::function<void(SmallVectorImpl<Value *> &, SmallVectorImpl<Type *> &)>
Func) {
SmallVector<Type *, 4> ArgTys;
- if (!Intrinsic::getIntrinsicSignature(II.getCalledFunction(), ArgTys))
+ if (!Intrinsic::getIntrinsicSignature(OldIntr.getCalledFunction(), ArgTys))
return None;
- SmallVector<Value *, 8> Args(II.args());
+ SmallVector<Value *, 8> Args(OldIntr.args());
// Modify arguments and types
Func(Args, ArgTys);
- Function *I = Intrinsic::getDeclaration(II.getModule(), NewIntr, ArgTys);
+ Function *I = Intrinsic::getDeclaration(OldIntr.getModule(), NewIntr, ArgTys);
CallInst *NewCall = IC.Builder.CreateCall(I, Args);
- NewCall->takeName(&II);
- NewCall->copyMetadata(II);
+ NewCall->takeName(&OldIntr);
+ NewCall->copyMetadata(OldIntr);
if (isa<FPMathOperator>(NewCall))
- NewCall->copyFastMathFlags(&II);
+ NewCall->copyFastMathFlags(&OldIntr);
// Erase and replace uses
- if (!II.getType()->isVoidTy())
- IC.replaceInstUsesWith(II, NewCall);
- return IC.eraseInstFromFunction(II);
+ if (!InstToReplace.getType()->isVoidTy())
+ IC.replaceInstUsesWith(InstToReplace, NewCall);
+
+ bool RemoveOldIntr = &OldIntr != &InstToReplace;
+
+ auto RetValue = IC.eraseInstFromFunction(InstToReplace);
+ if (RemoveOldIntr)
+ IC.eraseInstFromFunction(OldIntr);
+
+ return RetValue;
}
static Optional<Instruction *>
@@ -153,7 +162,7 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
AMDGPU::getImageDimIntrinsicByBaseOpcode(LZMappingInfo->LZ,
ImageDimIntr->Dim);
return modifyIntrinsicCall(
- II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
+ II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
Args.erase(Args.begin() + ImageDimIntr->LodIndex);
});
}
@@ -170,7 +179,7 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
AMDGPU::getImageDimIntrinsicByBaseOpcode(MIPMappingInfo->NONMIP,
ImageDimIntr->Dim);
return modifyIntrinsicCall(
- II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
+ II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
Args.erase(Args.begin() + ImageDimIntr->MipIndex);
});
}
@@ -187,7 +196,7 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
AMDGPU::getImageDimIntrinsicByBaseOpcode(BiasMappingInfo->NoBias,
ImageDimIntr->Dim);
return modifyIntrinsicCall(
- II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
+ II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
Args.erase(Args.begin() + ImageDimIntr->BiasIndex);
ArgTys.erase(ArgTys.begin() + ImageDimIntr->BiasTyArg);
});
@@ -205,13 +214,41 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
AMDGPU::getImageDimIntrinsicByBaseOpcode(
OffsetMappingInfo->NoOffset, ImageDimIntr->Dim);
return modifyIntrinsicCall(
- II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
+ II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
Args.erase(Args.begin() + ImageDimIntr->OffsetIndex);
});
}
}
}
+ // Try to use D16
+ if (ST->hasD16Images()) {
+
+ const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
+ AMDGPU::getMIMGBaseOpcodeInfo(ImageDimIntr->BaseOpcode);
+
+ if (BaseOpcode->HasD16) {
+
+ // If the only use of image intrinsic is a fptrunc (with conversion to
+ // half) then both fptrunc and image intrinsic will be replaced with image
+ // intrinsic with D16 flag.
+ if (II.hasOneUse()) {
+ Instruction *User = II.user_back();
+
+ if (User->getOpcode() == Instruction::FPTrunc &&
+ User->getType()->getScalarType()->isHalfTy()) {
+
+ return modifyIntrinsicCall(II, *User, ImageDimIntr->Intr, IC,
+ [&](auto &Args, auto &ArgTys) {
+ // Change return type of image intrinsic.
+ // Set it to return type of fptrunc.
+ ArgTys[0] = User->getType();
+ });
+ }
+ }
+ }
+ }
+
// Try to use A16 or G16
if (!ST->hasA16() && !ST->hasG16())
return None;
@@ -263,7 +300,7 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
: Type::getInt16Ty(II.getContext());
return modifyIntrinsicCall(
- II, II.getIntrinsicID(), IC, [&](auto &Args, auto &ArgTys) {
+ II, II, II.getIntrinsicID(), IC, [&](auto &Args, auto &ArgTys) {
ArgTys[ImageDimIntr->GradientTyArg] = CoordType;
if (!OnlyDerivatives) {
ArgTys[ImageDimIntr->CoordTyArg] = CoordType;
@@ -584,6 +621,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
return IC.replaceInstUsesWith(II, RightShift);
}
case Intrinsic::amdgcn_exp:
+ case Intrinsic::amdgcn_exp_row:
case Intrinsic::amdgcn_exp_compr: {
ConstantInt *En = cast<ConstantInt>(II.getArgOperand(1));
unsigned EnBits = En->getZExtValue();
@@ -882,6 +920,12 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
return IC.replaceOperand(II, 0, UndefValue::get(VDstIn->getType()));
}
+ case Intrinsic::amdgcn_permlane64:
+ // A constant value is trivially uniform.
+ if (Constant *C = dyn_cast<Constant>(II.getArgOperand(0))) {
+ return IC.replaceInstUsesWith(II, C);
+ }
+ break;
case Intrinsic::amdgcn_readfirstlane:
case Intrinsic::amdgcn_readlane: {
// A constant value is trivially uniform.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index 391dc8428539..23b8fcf75f16 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -355,11 +355,7 @@ def AMDGPUendpgm : SDNode<"AMDGPUISD::ENDPGM", SDTNone,
def AMDGPUreturn_to_epilog : SDNode<"AMDGPUISD::RETURN_TO_EPILOG", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
-def AMDGPUret_flag : SDNode<"AMDGPUISD::RET_FLAG", SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
- [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]
->;
-
-def AMDGPUret_gfx_flag : SDNode<"AMDGPUISD::RET_GFX_FLAG", SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
+def AMDGPUret_flag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]
>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index b7d0f0580cda..3f242fdb6d8e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
@@ -80,8 +81,11 @@ bool AMDGPUInstructionSelector::isVCC(Register Reg,
RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
if (RC) {
const LLT Ty = MRI.getType(Reg);
- return RC->hasSuperClassEq(TRI.getBoolRC()) &&
- Ty.isValid() && Ty.getSizeInBits() == 1;
+ if (!Ty.isValid() || Ty.getSizeInBits() != 1)
+ return false;
+ // G_TRUNC s1 result is never vcc.
+ return MRI.getVRegDef(Reg)->getOpcode() != AMDGPU::G_TRUNC &&
+ RC->hasSuperClassEq(TRI.getBoolRC());
}
const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
@@ -91,7 +95,7 @@ bool AMDGPUInstructionSelector::isVCC(Register Reg,
bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(MachineInstr &MI,
unsigned NewOpc) const {
MI.setDesc(TII.get(NewOpc));
- MI.RemoveOperand(1); // Remove intrinsic ID.
+ MI.removeOperand(1); // Remove intrinsic ID.
MI.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
MachineOperand &Dst = MI.getOperand(0);
@@ -216,7 +220,7 @@ bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const {
}
const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
- DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB, *MRI);
+ DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB);
if (!DefRC) {
LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
return false;
@@ -454,6 +458,24 @@ bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
return true;
}
+bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(
+ MachineInstr &I) const {
+ MachineBasicBlock *BB = I.getParent();
+ MachineFunction *MF = BB->getParent();
+ const bool IsUnsigned = I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
+
+ unsigned Opc;
+ if (Subtarget->getGeneration() == AMDGPUSubtarget::GFX11)
+ Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_gfx11_e64
+ : AMDGPU::V_MAD_I64_I32_gfx11_e64;
+ else
+ Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_e64 : AMDGPU::V_MAD_I64_I32_e64;
+ I.setDesc(TII.get(Opc));
+ I.addOperand(*MF, MachineOperand::CreateImm(0));
+ I.addImplicitDefUseOperands(*MF);
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+}
+
// TODO: We should probably legalize these to only using 32-bit results.
bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
@@ -481,7 +503,7 @@ bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
const TargetRegisterClass *SrcRC =
- TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank, *MRI);
+ TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank);
if (!SrcRC)
return false;
unsigned SubReg = SIRegisterInfo::getSubRegFromChannel(Offset / 32,
@@ -514,7 +536,7 @@ bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const {
const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
const unsigned DstSize = DstTy.getSizeInBits();
const TargetRegisterClass *DstRC =
- TRI.getRegClassForSizeOnBank(DstSize, *DstBank, *MRI);
+ TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
if (!DstRC)
return false;
@@ -556,7 +578,7 @@ bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const {
const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
const TargetRegisterClass *SrcRC =
- TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank, *MRI);
+ TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank);
if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
return false;
@@ -630,7 +652,7 @@ bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR_TRUNC(
MachineInstr *Src1Def = getDefIgnoringCopies(Src1, *MRI);
if (Src1Def && Src1Def->getOpcode() == AMDGPU::G_IMPLICIT_DEF) {
MI.setDesc(TII.get(AMDGPU::COPY));
- MI.RemoveOperand(2);
+ MI.removeOperand(2);
return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI) &&
RBI.constrainGenericRegister(Src0, AMDGPU::SReg_32RegClass, *MRI);
}
@@ -643,6 +665,8 @@ bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR_TRUNC(
//
// (build_vector_trunc (lshr_oneuse $src0, 16), (lshr_oneuse $src1, 16)
// => (S_PACK_HH_B32_B16 $src0, $src1)
+ // (build_vector_trunc (lshr_oneuse SReg_32:$src0, 16), $src1)
+ // => (S_PACK_HL_B32_B16 $src0, $src1)
// (build_vector_trunc $src0, (lshr_oneuse SReg_32:$src1, 16))
// => (S_PACK_LH_B32_B16 $src0, $src1)
// (build_vector_trunc $src0, $src1)
@@ -662,14 +686,20 @@ bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR_TRUNC(
} else if (Shift1) {
Opc = AMDGPU::S_PACK_LH_B32_B16;
MI.getOperand(2).setReg(ShiftSrc1);
- } else if (Shift0 && ConstSrc1 && ConstSrc1->Value == 0) {
- // build_vector_trunc (lshr $src0, 16), 0 -> s_lshr_b32 $src0, 16
- auto MIB = BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_LSHR_B32), Dst)
- .addReg(ShiftSrc0)
- .addImm(16);
+ } else if (Shift0) {
+ if (ConstSrc1 && ConstSrc1->Value == 0) {
+ // build_vector_trunc (lshr $src0, 16), 0 -> s_lshr_b32 $src0, 16
+ auto MIB = BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_LSHR_B32), Dst)
+ .addReg(ShiftSrc0)
+ .addImm(16);
- MI.eraseFromParent();
- return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+ MI.eraseFromParent();
+ return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+ }
+ if (STI.hasSPackHL()) {
+ Opc = AMDGPU::S_PACK_HL_B32_B16;
+ MI.getOperand(1).setReg(ShiftSrc0);
+ }
}
MI.setDesc(TII.get(Opc));
@@ -722,16 +752,16 @@ bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const {
const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
const TargetRegisterClass *DstRC =
- TRI.getRegClassForSizeOnBank(DstSize, *DstBank, *MRI);
+ TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
if (!DstRC)
return false;
const RegisterBank *Src0Bank = RBI.getRegBank(Src0Reg, *MRI, TRI);
const RegisterBank *Src1Bank = RBI.getRegBank(Src1Reg, *MRI, TRI);
const TargetRegisterClass *Src0RC =
- TRI.getRegClassForSizeOnBank(DstSize, *Src0Bank, *MRI);
+ TRI.getRegClassForSizeOnBank(DstSize, *Src0Bank);
const TargetRegisterClass *Src1RC =
- TRI.getRegClassForSizeOnBank(InsSize, *Src1Bank, *MRI);
+ TRI.getRegClassForSizeOnBank(InsSize, *Src1Bank);
// Deal with weird cases where the class only partially supports the subreg
// index.
@@ -970,6 +1000,13 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const {
return selectGroupStaticSize(I);
case Intrinsic::returnaddress:
return selectReturnAddress(I);
+ case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
+ case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
+ case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
+ case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
+ case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
+ case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
+ return selectSMFMACIntrin(I);
default:
return selectImpl(I, *CoverageInfo);
}
@@ -1142,7 +1179,7 @@ bool AMDGPUInstructionSelector::selectBallot(MachineInstr &I) const {
Optional<ValueAndVReg> Arg =
getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), *MRI);
- if (Arg.hasValue()) {
+ if (Arg) {
const int64_t Value = Arg.getValue().Value.getSExtValue();
if (Value == 0) {
unsigned Opcode = Is64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
@@ -1164,8 +1201,7 @@ bool AMDGPUInstructionSelector::selectBallot(MachineInstr &I) const {
bool AMDGPUInstructionSelector::selectRelocConstant(MachineInstr &I) const {
Register DstReg = I.getOperand(0).getReg();
const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
- const TargetRegisterClass *DstRC =
- TRI.getRegClassForSizeOnBank(32, *DstBank, *MRI);
+ const TargetRegisterClass *DstRC = TRI.getRegClassForSizeOnBank(32, *DstBank);
if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
return false;
@@ -1300,12 +1336,14 @@ bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
unsigned ShaderType = SIInstrInfo::getDSShaderTypeValue(*MF);
unsigned Offset0 = OrderedCountIndex << 2;
- unsigned Offset1 = WaveRelease | (WaveDone << 1) | (ShaderType << 2) |
- (Instruction << 4);
+ unsigned Offset1 = WaveRelease | (WaveDone << 1) | (Instruction << 4);
if (STI.getGeneration() >= AMDGPUSubtarget::GFX10)
Offset1 |= (CountDw - 1) << 6;
+ if (STI.getGeneration() < AMDGPUSubtarget::GFX11)
+ Offset1 |= ShaderType << 2;
+
unsigned Offset = Offset0 | (Offset1 << 8);
Register M0Val = MI.getOperand(2).getReg();
@@ -1424,23 +1462,7 @@ bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(MachineInstr &MI,
if (HasVSrc) {
Register VSrc = MI.getOperand(1).getReg();
-
- if (STI.needsAlignedVGPRs()) {
- // Add implicit aligned super-reg to force alignment on the data operand.
- Register Undef = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- BuildMI(*MBB, &*MIB, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef);
- Register NewVR =
- MRI->createVirtualRegister(&AMDGPU::VReg_64_Align2RegClass);
- BuildMI(*MBB, &*MIB, DL, TII.get(AMDGPU::REG_SEQUENCE), NewVR)
- .addReg(VSrc, 0, MI.getOperand(1).getSubReg())
- .addImm(AMDGPU::sub0)
- .addReg(Undef)
- .addImm(AMDGPU::sub1);
- MIB.addReg(NewVR, 0, AMDGPU::sub0);
- MIB.addReg(NewVR, RegState::Implicit);
- } else {
- MIB.addReg(VSrc);
- }
+ MIB.addReg(VSrc);
if (!RBI.constrainGenericRegister(VSrc, AMDGPU::VGPR_32RegClass, *MRI))
return false;
@@ -1449,6 +1471,8 @@ bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(MachineInstr &MI,
MIB.addImm(ImmOffset)
.cloneMemRefs(MI);
+ TII.enforceOperandRCAlignment(*MIB, AMDGPU::OpName::data0);
+
MI.eraseFromParent();
return true;
}
@@ -1523,6 +1547,7 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim);
unsigned IntrOpcode = Intr->BaseOpcode;
const bool IsGFX10Plus = AMDGPU::isGFX10Plus(STI);
+ const bool IsGFX11Plus = AMDGPU::isGFX11Plus(STI);
const unsigned ArgOffset = MI.getNumExplicitDefs() + 1;
@@ -1627,7 +1652,7 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
}
// The legalizer preprocessed the intrinsic arguments. If we aren't using
- // NSA, these should have beeen packed into a single value in the first
+ // NSA, these should have been packed into a single value in the first
// address register
const bool UseNSA = NumVAddrRegs != 1 && NumVAddrDwords == NumVAddrRegs;
if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) {
@@ -1639,13 +1664,29 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
++NumVDataDwords;
int Opcode = -1;
- if (IsGFX10Plus) {
+ if (IsGFX11Plus) {
+ Opcode = AMDGPU::getMIMGOpcode(IntrOpcode,
+ UseNSA ? AMDGPU::MIMGEncGfx11NSA
+ : AMDGPU::MIMGEncGfx11Default,
+ NumVDataDwords, NumVAddrDwords);
+ } else if (IsGFX10Plus) {
Opcode = AMDGPU::getMIMGOpcode(IntrOpcode,
UseNSA ? AMDGPU::MIMGEncGfx10NSA
: AMDGPU::MIMGEncGfx10Default,
NumVDataDwords, NumVAddrDwords);
} else {
- if (STI.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ if (Subtarget->hasGFX90AInsts()) {
+ Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx90a,
+ NumVDataDwords, NumVAddrDwords);
+ if (Opcode == -1) {
+ LLVM_DEBUG(
+ dbgs()
+ << "requested image instruction is not supported on this GPU\n");
+ return false;
+ }
+ }
+ if (Opcode == -1 &&
+ STI.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx8,
NumVDataDwords, NumVAddrDwords);
if (Opcode == -1)
@@ -1703,7 +1744,13 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
if (IsGFX10Plus)
MIB.addImm(IsA16 ? -1 : 0);
- MIB.addImm(TFE); // tfe
+ if (!Subtarget->hasGFX90AInsts()) {
+ MIB.addImm(TFE); // tfe
+ } else if (TFE) {
+ LLVM_DEBUG(dbgs() << "TFE is not supported on this GPU\n");
+ return false;
+ }
+
MIB.addImm(LWE); // lwe
if (!IsGFX10Plus)
MIB.addImm(DimInfo->DA ? -1 : 0);
@@ -1743,7 +1790,9 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
}
MI.eraseFromParent();
- return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+ constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+ TII.enforceOperandRCAlignment(*MIB, AMDGPU::OpName::vaddr);
+ return true;
}
bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
@@ -1770,10 +1819,22 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
return selectSBarrier(I);
case Intrinsic::amdgcn_global_atomic_fadd:
return selectGlobalAtomicFadd(I, I.getOperand(2), I.getOperand(3));
- default: {
- return selectImpl(I, *CoverageInfo);
- }
+ case Intrinsic::amdgcn_raw_buffer_load_lds:
+ case Intrinsic::amdgcn_struct_buffer_load_lds:
+ return selectBufferLoadLds(I);
+ case Intrinsic::amdgcn_global_load_lds:
+ return selectGlobalLoadLds(I);
+ case Intrinsic::amdgcn_exp_compr:
+ if (!STI.hasCompressedExport()) {
+ Function &F = I.getMF()->getFunction();
+ DiagnosticInfoUnsupported NoFpRet(
+ F, "intrinsic not supported on subtarget", I.getDebugLoc(), DS_Error);
+ F.getContext().diagnose(NoFpRet);
+ return false;
+ }
+ break;
}
+ return selectImpl(I, *CoverageInfo);
}
bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
@@ -1872,10 +1933,10 @@ bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {
unsigned DstSize = DstTy.getSizeInBits();
unsigned SrcSize = SrcTy.getSizeInBits();
- const TargetRegisterClass *SrcRC
- = TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB, *MRI);
- const TargetRegisterClass *DstRC
- = TRI.getRegClassForSizeOnBank(DstSize, *DstRB, *MRI);
+ const TargetRegisterClass *SrcRC =
+ TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB);
+ const TargetRegisterClass *DstRC =
+ TRI.getRegClassForSizeOnBank(DstSize, *DstRB);
if (!SrcRC || !DstRC)
return false;
@@ -2014,10 +2075,10 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
return selectCOPY(I);
const TargetRegisterClass *SrcRC =
- TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank, *MRI);
+ TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank);
const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
const TargetRegisterClass *DstRC =
- TRI.getRegClassForSizeOnBank(DstSize, *DstBank, *MRI);
+ TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
Register UndefReg = MRI->createVirtualRegister(SrcRC);
BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
@@ -2384,65 +2445,6 @@ bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW(
return selectImpl(I, *CoverageInfo);
}
-// TODO: No rtn optimization.
-bool AMDGPUInstructionSelector::selectG_AMDGPU_ATOMIC_CMPXCHG(
- MachineInstr &MI) const {
- Register PtrReg = MI.getOperand(1).getReg();
- const LLT PtrTy = MRI->getType(PtrReg);
- if (PtrTy.getAddressSpace() == AMDGPUAS::FLAT_ADDRESS ||
- STI.useFlatForGlobal())
- return selectImpl(MI, *CoverageInfo);
-
- Register DstReg = MI.getOperand(0).getReg();
- const LLT Ty = MRI->getType(DstReg);
- const bool Is64 = Ty.getSizeInBits() == 64;
- const unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
- Register TmpReg = MRI->createVirtualRegister(
- Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass);
-
- const DebugLoc &DL = MI.getDebugLoc();
- MachineBasicBlock *BB = MI.getParent();
-
- Register VAddr, RSrcReg, SOffset;
- int64_t Offset = 0;
-
- unsigned Opcode;
- if (selectMUBUFOffsetImpl(MI.getOperand(1), RSrcReg, SOffset, Offset)) {
- Opcode = Is64 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN :
- AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN;
- } else if (selectMUBUFAddr64Impl(MI.getOperand(1), VAddr,
- RSrcReg, SOffset, Offset)) {
- Opcode = Is64 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN :
- AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN;
- } else
- return selectImpl(MI, *CoverageInfo);
-
- auto MIB = BuildMI(*BB, &MI, DL, TII.get(Opcode), TmpReg)
- .addReg(MI.getOperand(2).getReg());
-
- if (VAddr)
- MIB.addReg(VAddr);
-
- MIB.addReg(RSrcReg);
- if (SOffset)
- MIB.addReg(SOffset);
- else
- MIB.addImm(0);
-
- MIB.addImm(Offset);
- MIB.addImm(AMDGPU::CPol::GLC);
- MIB.cloneMemRefs(MI);
-
- BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), DstReg)
- .addReg(TmpReg, RegState::Kill, SubReg);
-
- MI.eraseFromParent();
-
- MRI->setRegClass(
- DstReg, Is64 ? &AMDGPU::VReg_64RegClass : &AMDGPU::VGPR_32RegClass);
- return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
-}
-
static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI) {
if (Reg.isPhysical())
return false;
@@ -2551,7 +2553,7 @@ bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const {
// Try to avoid emitting a bit operation when we only need to touch half of
// the 64-bit pointer.
- APInt MaskOnes = KnownBits->getKnownOnes(MaskReg).zextOrSelf(64);
+ APInt MaskOnes = KnownBits->getKnownOnes(MaskReg).zext(64);
const APInt MaskHi32 = APInt::getHighBitsSet(64, 32);
const APInt MaskLo32 = APInt::getLowBitsSet(64, 32);
@@ -2571,12 +2573,10 @@ bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const {
const TargetRegisterClass &RegRC
= IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
- const TargetRegisterClass *DstRC = TRI.getRegClassForTypeOnBank(Ty, *DstRB,
- *MRI);
- const TargetRegisterClass *SrcRC = TRI.getRegClassForTypeOnBank(Ty, *SrcRB,
- *MRI);
+ const TargetRegisterClass *DstRC = TRI.getRegClassForTypeOnBank(Ty, *DstRB);
+ const TargetRegisterClass *SrcRC = TRI.getRegClassForTypeOnBank(Ty, *SrcRB);
const TargetRegisterClass *MaskRC =
- TRI.getRegClassForTypeOnBank(MaskTy, *MaskRB, *MRI);
+ TRI.getRegClassForTypeOnBank(MaskTy, *MaskRB);
if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
@@ -2689,10 +2689,10 @@ bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
if (IdxRB->getID() != AMDGPU::SGPRRegBankID)
return false;
- const TargetRegisterClass *SrcRC = TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB,
- *MRI);
- const TargetRegisterClass *DstRC = TRI.getRegClassForTypeOnBank(DstTy, *DstRB,
- *MRI);
+ const TargetRegisterClass *SrcRC =
+ TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB);
+ const TargetRegisterClass *DstRC =
+ TRI.getRegClassForTypeOnBank(DstTy, *DstRB);
if (!SrcRC || !DstRC)
return false;
if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
@@ -2771,10 +2771,10 @@ bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
if (IdxRB->getID() != AMDGPU::SGPRRegBankID)
return false;
- const TargetRegisterClass *VecRC = TRI.getRegClassForTypeOnBank(VecTy, *VecRB,
- *MRI);
- const TargetRegisterClass *ValRC = TRI.getRegClassForTypeOnBank(ValTy, *ValRB,
- *MRI);
+ const TargetRegisterClass *VecRC =
+ TRI.getRegClassForTypeOnBank(VecTy, *VecRB);
+ const TargetRegisterClass *ValRC =
+ TRI.getRegClassForTypeOnBank(ValTy, *ValRB);
if (!RBI.constrainGenericRegister(VecReg, *VecRC, *MRI) ||
!RBI.constrainGenericRegister(DstReg, *VecRC, *MRI) ||
@@ -2867,7 +2867,6 @@ bool AMDGPUInstructionSelector::selectG_SHUFFLE_VECTOR(
return false;
assert(ShufMask.size() == 2);
- assert(STI.hasSDWA() && "no target has VOP3P but not SDWA");
MachineBasicBlock *MBB = MI.getParent();
const DebugLoc &DL = MI.getDebugLoc();
@@ -2924,17 +2923,28 @@ bool AMDGPUInstructionSelector::selectG_SHUFFLE_VECTOR(
}
} else if (Mask[0] == 0 && Mask[1] == 0) {
if (IsVALU) {
- // Write low half of the register into the high half.
- MachineInstr *MovSDWA =
- BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
- .addImm(0) // $src0_modifiers
- .addReg(SrcVec) // $src0
- .addImm(0) // $clamp
- .addImm(AMDGPU::SDWA::WORD_1) // $dst_sel
- .addImm(AMDGPU::SDWA::UNUSED_PRESERVE) // $dst_unused
- .addImm(AMDGPU::SDWA::WORD_0) // $src0_sel
- .addReg(SrcVec, RegState::Implicit);
- MovSDWA->tieOperands(0, MovSDWA->getNumOperands() - 1);
+ if (STI.hasSDWA()) {
+ // Write low half of the register into the high half.
+ MachineInstr *MovSDWA =
+ BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
+ .addImm(0) // $src0_modifiers
+ .addReg(SrcVec) // $src0
+ .addImm(0) // $clamp
+ .addImm(AMDGPU::SDWA::WORD_1) // $dst_sel
+ .addImm(AMDGPU::SDWA::UNUSED_PRESERVE) // $dst_unused
+ .addImm(AMDGPU::SDWA::WORD_0) // $src0_sel
+ .addReg(SrcVec, RegState::Implicit);
+ MovSDWA->tieOperands(0, MovSDWA->getNumOperands() - 1);
+ } else {
+ Register TmpReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg)
+ .addImm(0xFFFF)
+ .addReg(SrcVec);
+ BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), DstReg)
+ .addReg(TmpReg)
+ .addImm(16)
+ .addReg(TmpReg);
+ }
} else {
BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_PACK_LL_B32_B16), DstReg)
.addReg(SrcVec)
@@ -2942,17 +2952,28 @@ bool AMDGPUInstructionSelector::selectG_SHUFFLE_VECTOR(
}
} else if (Mask[0] == 1 && Mask[1] == 1) {
if (IsVALU) {
- // Write high half of the register into the low half.
- MachineInstr *MovSDWA =
- BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
- .addImm(0) // $src0_modifiers
- .addReg(SrcVec) // $src0
- .addImm(0) // $clamp
- .addImm(AMDGPU::SDWA::WORD_0) // $dst_sel
- .addImm(AMDGPU::SDWA::UNUSED_PRESERVE) // $dst_unused
- .addImm(AMDGPU::SDWA::WORD_1) // $src0_sel
- .addReg(SrcVec, RegState::Implicit);
- MovSDWA->tieOperands(0, MovSDWA->getNumOperands() - 1);
+ if (STI.hasSDWA()) {
+ // Write high half of the register into the low half.
+ MachineInstr *MovSDWA =
+ BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
+ .addImm(0) // $src0_modifiers
+ .addReg(SrcVec) // $src0
+ .addImm(0) // $clamp
+ .addImm(AMDGPU::SDWA::WORD_0) // $dst_sel
+ .addImm(AMDGPU::SDWA::UNUSED_PRESERVE) // $dst_unused
+ .addImm(AMDGPU::SDWA::WORD_1) // $src0_sel
+ .addReg(SrcVec, RegState::Implicit);
+ MovSDWA->tieOperands(0, MovSDWA->getNumOperands() - 1);
+ } else {
+ Register TmpReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), TmpReg)
+ .addImm(16)
+ .addReg(SrcVec);
+ BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), DstReg)
+ .addReg(TmpReg)
+ .addImm(16)
+ .addReg(TmpReg);
+ }
} else {
BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_PACK_HH_B32_B16), DstReg)
.addReg(SrcVec)
@@ -2965,13 +2986,19 @@ bool AMDGPUInstructionSelector::selectG_SHUFFLE_VECTOR(
.addReg(SrcVec)
.addImm(16);
} else {
- Register TmpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
- BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_LSHR_B32), TmpReg)
- .addReg(SrcVec)
- .addImm(16);
- BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_PACK_LL_B32_B16), DstReg)
- .addReg(TmpReg)
- .addReg(SrcVec);
+ if (STI.hasSPackHL()) {
+ BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_PACK_HL_B32_B16), DstReg)
+ .addReg(SrcVec)
+ .addReg(SrcVec);
+ } else {
+ Register TmpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_LSHR_B32), TmpReg)
+ .addReg(SrcVec)
+ .addImm(16);
+ BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_PACK_LL_B32_B16), DstReg)
+ .addReg(TmpReg)
+ .addReg(SrcVec);
+ }
}
} else
llvm_unreachable("all shuffle masks should be handled");
@@ -2982,13 +3009,15 @@ bool AMDGPUInstructionSelector::selectG_SHUFFLE_VECTOR(
bool AMDGPUInstructionSelector::selectAMDGPU_BUFFER_ATOMIC_FADD(
MachineInstr &MI) const {
- if (STI.hasGFX90AInsts())
+ const Register DefReg = MI.getOperand(0).getReg();
+ LLT DefTy = MRI->getType(DefReg);
+ if (AMDGPU::hasAtomicFaddRtnForTy(STI, DefTy))
return selectImpl(MI, *CoverageInfo);
MachineBasicBlock *MBB = MI.getParent();
const DebugLoc &DL = MI.getDebugLoc();
- if (!MRI->use_nodbg_empty(MI.getOperand(0).getReg())) {
+ if (!MRI->use_nodbg_empty(DefReg)) {
Function &F = MBB->getParent()->getFunction();
DiagnosticInfoUnsupported
NoFpRet(F, "return versions of fp atomics not supported",
@@ -3105,9 +3134,236 @@ bool AMDGPUInstructionSelector::selectGlobalAtomicFadd(
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
}
+bool AMDGPUInstructionSelector::selectBufferLoadLds(MachineInstr &MI) const {
+ unsigned Opc;
+ unsigned Size = MI.getOperand(3).getImm();
+
+ // The struct intrinsic variants add one additional operand over raw.
+ const bool HasVIndex = MI.getNumOperands() == 9;
+ Register VIndex;
+ int OpOffset = 0;
+ if (HasVIndex) {
+ VIndex = MI.getOperand(4).getReg();
+ OpOffset = 1;
+ }
+
+ Register VOffset = MI.getOperand(4 + OpOffset).getReg();
+ Optional<ValueAndVReg> MaybeVOffset =
+ getIConstantVRegValWithLookThrough(VOffset, *MRI);
+ const bool HasVOffset = !MaybeVOffset || MaybeVOffset->Value.getZExtValue();
+
+ switch (Size) {
+ default:
+ return false;
+ case 1:
+ Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN
+ : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN
+ : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN
+ : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;
+ break;
+ case 2:
+ Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN
+ : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN
+ : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN
+ : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;
+ break;
+ case 4:
+ Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN
+ : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN
+ : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
+ : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
+ break;
+ }
+
+ MachineBasicBlock *MBB = MI.getParent();
+ const DebugLoc &DL = MI.getDebugLoc();
+ BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
+ .add(MI.getOperand(2));
+
+ auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opc));
+
+ if (HasVIndex && HasVOffset) {
+ Register IdxReg = MRI->createVirtualRegister(TRI.getVGPR64Class());
+ BuildMI(*MBB, &*MIB, DL, TII.get(AMDGPU::REG_SEQUENCE), IdxReg)
+ .addReg(VIndex)
+ .addImm(AMDGPU::sub0)
+ .addReg(VOffset)
+ .addImm(AMDGPU::sub1);
+
+ MIB.addReg(IdxReg);
+ } else if (HasVIndex) {
+ MIB.addReg(VIndex);
+ } else if (HasVOffset) {
+ MIB.addReg(VOffset);
+ }
+
+ MIB.add(MI.getOperand(1)); // rsrc
+ MIB.add(MI.getOperand(5 + OpOffset)); // soffset
+ MIB.add(MI.getOperand(6 + OpOffset)); // imm offset
+ unsigned Aux = MI.getOperand(7 + OpOffset).getImm();
+ MIB.addImm(Aux & AMDGPU::CPol::ALL); // cpol
+ MIB.addImm((Aux >> 3) & 1); // swz
+
+ MachineMemOperand *LoadMMO = *MI.memoperands_begin();
+ MachinePointerInfo LoadPtrI = LoadMMO->getPointerInfo();
+ LoadPtrI.Offset = MI.getOperand(6 + OpOffset).getImm();
+ MachinePointerInfo StorePtrI = LoadPtrI;
+ StorePtrI.V = nullptr;
+ StorePtrI.AddrSpace = AMDGPUAS::LOCAL_ADDRESS;
+
+ auto F = LoadMMO->getFlags() &
+ ~(MachineMemOperand::MOStore | MachineMemOperand::MOLoad);
+ LoadMMO = MF->getMachineMemOperand(LoadPtrI, F | MachineMemOperand::MOLoad,
+ Size, LoadMMO->getBaseAlign());
+
+ MachineMemOperand *StoreMMO =
+ MF->getMachineMemOperand(StorePtrI, F | MachineMemOperand::MOStore,
+ sizeof(int32_t), LoadMMO->getBaseAlign());
+
+ MIB.setMemRefs({LoadMMO, StoreMMO});
+
+ MI.eraseFromParent();
+ return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+}
+
+/// Match a zero extend from a 32-bit value to 64-bits.
+static Register matchZeroExtendFromS32(MachineRegisterInfo &MRI, Register Reg) {
+ Register ZExtSrc;
+ if (mi_match(Reg, MRI, m_GZExt(m_Reg(ZExtSrc))))
+ return MRI.getType(ZExtSrc) == LLT::scalar(32) ? ZExtSrc : Register();
+
+ // Match legalized form %zext = G_MERGE_VALUES (s32 %x), (s32 0)
+ const MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);
+ if (Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
+ return false;
+
+ if (mi_match(Def->getOperand(2).getReg(), MRI, m_ZeroInt())) {
+ return Def->getOperand(1).getReg();
+ }
+
+ return Register();
+}
+
+bool AMDGPUInstructionSelector::selectGlobalLoadLds(MachineInstr &MI) const{
+ unsigned Opc;
+ unsigned Size = MI.getOperand(3).getImm();
+
+ switch (Size) {
+ default:
+ return false;
+ case 1:
+ Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;
+ break;
+ case 2:
+ Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;
+ break;
+ case 4:
+ Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
+ break;
+ }
+
+ MachineBasicBlock *MBB = MI.getParent();
+ const DebugLoc &DL = MI.getDebugLoc();
+ BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
+ .add(MI.getOperand(2));
+
+ Register Addr = MI.getOperand(1).getReg();
+ Register VOffset;
+ // Try to split SAddr and VOffset. Global and LDS pointers share the same
+ // immediate offset, so we cannot use a regular SelectGlobalSAddr().
+ if (!isSGPR(Addr)) {
+ auto AddrDef = getDefSrcRegIgnoringCopies(Addr, *MRI);
+ if (isSGPR(AddrDef->Reg)) {
+ Addr = AddrDef->Reg;
+ } else if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
+ Register SAddr =
+ getSrcRegIgnoringCopies(AddrDef->MI->getOperand(1).getReg(), *MRI);
+ if (SAddr && isSGPR(SAddr)) {
+ Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
+ if (Register Off = matchZeroExtendFromS32(*MRI, PtrBaseOffset)) {
+ Addr = SAddr;
+ VOffset = Off;
+ }
+ }
+ }
+ }
+
+ if (isSGPR(Addr)) {
+ Opc = AMDGPU::getGlobalSaddrOp(Opc);
+ if (!VOffset) {
+ VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::V_MOV_B32_e32), VOffset)
+ .addImm(0);
+ }
+ }
+
+ auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opc))
+ .addReg(Addr);
+
+ if (isSGPR(Addr))
+ MIB.addReg(VOffset);
+
+ MIB.add(MI.getOperand(4)) // offset
+ .add(MI.getOperand(5)); // cpol
+
+ MachineMemOperand *LoadMMO = *MI.memoperands_begin();
+ MachinePointerInfo LoadPtrI = LoadMMO->getPointerInfo();
+ LoadPtrI.Offset = MI.getOperand(4).getImm();
+ MachinePointerInfo StorePtrI = LoadPtrI;
+ LoadPtrI.AddrSpace = AMDGPUAS::GLOBAL_ADDRESS;
+ StorePtrI.AddrSpace = AMDGPUAS::LOCAL_ADDRESS;
+ auto F = LoadMMO->getFlags() &
+ ~(MachineMemOperand::MOStore | MachineMemOperand::MOLoad);
+ LoadMMO = MF->getMachineMemOperand(LoadPtrI, F | MachineMemOperand::MOLoad,
+ Size, LoadMMO->getBaseAlign());
+ MachineMemOperand *StoreMMO =
+ MF->getMachineMemOperand(StorePtrI, F | MachineMemOperand::MOStore,
+ sizeof(int32_t), Align(4));
+
+ MIB.setMemRefs({LoadMMO, StoreMMO});
+
+ MI.eraseFromParent();
+ return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+}
+
bool AMDGPUInstructionSelector::selectBVHIntrinsic(MachineInstr &MI) const{
MI.setDesc(TII.get(MI.getOperand(1).getImm()));
- MI.RemoveOperand(1);
+ MI.removeOperand(1);
+ MI.addImplicitDefUseOperands(*MI.getParent()->getParent());
+ return true;
+}
+
+bool AMDGPUInstructionSelector::selectSMFMACIntrin(MachineInstr &MI) const {
+ unsigned Opc;
+ switch (MI.getIntrinsicID()) {
+ case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
+ Opc = AMDGPU::V_SMFMAC_F32_16X16X32_F16_e64;
+ break;
+ case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
+ Opc = AMDGPU::V_SMFMAC_F32_32X32X16_F16_e64;
+ break;
+ case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
+ Opc = AMDGPU::V_SMFMAC_F32_16X16X32_BF16_e64;
+ break;
+ case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
+ Opc = AMDGPU::V_SMFMAC_F32_32X32X16_BF16_e64;
+ break;
+ case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
+ Opc = AMDGPU::V_SMFMAC_I32_16X16X64_I8_e64;
+ break;
+ case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
+ Opc = AMDGPU::V_SMFMAC_I32_32X32X32_I8_e64;
+ break;
+ default:
+ llvm_unreachable("unhandled smfmac intrinsic");
+ }
+
+ auto VDst_In = MI.getOperand(4);
+
+ MI.setDesc(TII.get(Opc));
+ MI.removeOperand(4); // VDst_In
+ MI.removeOperand(1); // Intrinsic ID
+ MI.addOperand(VDst_In); // Readd VDst_In to the end
MI.addImplicitDefUseOperands(*MI.getParent()->getParent());
return true;
}
@@ -3166,6 +3422,9 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_UADDE:
case TargetOpcode::G_USUBE:
return selectG_UADDO_USUBO_UADDE_USUBE(I);
+ case AMDGPU::G_AMDGPU_MAD_U64_U32:
+ case AMDGPU::G_AMDGPU_MAD_I64_I32:
+ return selectG_AMDGPU_MAD_64_32(I);
case TargetOpcode::G_INTTOPTR:
case TargetOpcode::G_BITCAST:
case TargetOpcode::G_PTRTOINT:
@@ -3226,8 +3485,6 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
case AMDGPU::G_AMDGPU_ATOMIC_FMIN:
case AMDGPU::G_AMDGPU_ATOMIC_FMAX:
return selectG_LOAD_STORE_ATOMICRMW(I);
- case AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG:
- return selectG_AMDGPU_ATOMIC_CMPXCHG(I);
case TargetOpcode::G_SELECT:
return selectG_SELECT(I);
case TargetOpcode::G_TRUNC:
@@ -3286,9 +3543,8 @@ AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const {
}
-std::pair<Register, unsigned>
-AMDGPUInstructionSelector::selectVOP3ModsImpl(MachineOperand &Root,
- bool AllowAbs) const {
+std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3ModsImpl(
+ MachineOperand &Root, bool AllowAbs, bool OpSel, bool ForceVGPR) const {
Register Src = Root.getReg();
Register OrigSrc = Src;
unsigned Mods = 0;
@@ -3305,7 +3561,10 @@ AMDGPUInstructionSelector::selectVOP3ModsImpl(MachineOperand &Root,
Mods |= SISrcMods::ABS;
}
- if (Mods != 0 &&
+ if (OpSel)
+ Mods |= SISrcMods::OP_SEL_0;
+
+ if ((Mods != 0 || ForceVGPR) &&
RBI.getRegBank(Src, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID) {
MachineInstr *UseMI = Root.getParent();
@@ -3407,7 +3666,7 @@ AMDGPUInstructionSelector::selectVOP3NoMods(MachineOperand &Root) const {
std::pair<Register, unsigned>
AMDGPUInstructionSelector::selectVOP3PModsImpl(
- Register Src, const MachineRegisterInfo &MRI) const {
+ Register Src, const MachineRegisterInfo &MRI, bool IsDOT) const {
unsigned Mods = 0;
MachineInstr *MI = MRI.getVRegDef(Src);
@@ -3421,6 +3680,7 @@ AMDGPUInstructionSelector::selectVOP3PModsImpl(
}
// TODO: Match op_sel through g_build_vector_trunc and g_shuffle_vector.
+ (void)IsDOT; // DOTs do not use OPSEL on gfx940+, check ST.hasDOTOpSelHazard()
// Packed instructions do not have abs modifiers.
Mods |= SISrcMods::OP_SEL_1;
@@ -3444,6 +3704,50 @@ AMDGPUInstructionSelector::selectVOP3PMods(MachineOperand &Root) const {
}
InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const {
+ MachineRegisterInfo &MRI
+ = Root.getParent()->getParent()->getParent()->getRegInfo();
+
+ Register Src;
+ unsigned Mods;
+ std::tie(Src, Mods) = selectVOP3PModsImpl(Root.getReg(), MRI, true);
+
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
+ }};
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectDotIUVOP3PMods(MachineOperand &Root) const {
+ // Literal i1 value set in intrinsic, represents SrcMods for the next operand.
+ // Value is in Imm operand as i1 sign extended to int64_t.
+ // 1(-1) promotes packed values to signed, 0 treats them as unsigned.
+ assert((Root.isImm() && (Root.getImm() == -1 || Root.getImm() == 0)) &&
+ "expected i1 value");
+ unsigned Mods = SISrcMods::OP_SEL_1;
+ if (Root.getImm() == -1)
+ Mods ^= SISrcMods::NEG;
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
+ }};
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
+ MachineOperand &Root) const {
+ assert((Root.isImm() && (Root.getImm() == -1 || Root.getImm() == 0)) &&
+ "expected i1 value");
+ unsigned Mods = SISrcMods::OP_SEL_1;
+ if (Root.getImm() != 0)
+ Mods |= SISrcMods::OP_SEL_0;
+
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
+ }};
+}
+
+InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectVOP3Mods_nnan(MachineOperand &Root) const {
Register Src;
unsigned Mods;
@@ -3467,6 +3771,36 @@ AMDGPUInstructionSelector::selectVOP3OpSelMods(MachineOperand &Root) const {
}
InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectVINTERPMods(MachineOperand &Root) const {
+ Register Src;
+ unsigned Mods;
+ std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
+ /* AllowAbs */ false,
+ /* OpSel */ false,
+ /* ForceVGPR */ true);
+
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods
+ }};
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectVINTERPModsHi(MachineOperand &Root) const {
+ Register Src;
+ unsigned Mods;
+ std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
+ /* AllowAbs */ false,
+ /* OpSel */ true,
+ /* ForceVGPR */ true);
+
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods
+ }};
+}
+
+InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const {
SmallVector<GEPInfo, 4> AddrInfo;
getAddrModeInfo(*Root.getParent(), *MRI, AddrInfo);
@@ -3594,24 +3928,6 @@ AMDGPUInstructionSelector::selectScratchOffset(MachineOperand &Root) const {
}};
}
-/// Match a zero extend from a 32-bit value to 64-bits.
-static Register matchZeroExtendFromS32(MachineRegisterInfo &MRI, Register Reg) {
- Register ZExtSrc;
- if (mi_match(Reg, MRI, m_GZExt(m_Reg(ZExtSrc))))
- return MRI.getType(ZExtSrc) == LLT::scalar(32) ? ZExtSrc : Register();
-
- // Match legalized form %zext = G_MERGE_VALUES (s32 %x), (s32 0)
- const MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);
- if (Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
- return false;
-
- if (mi_match(Def->getOperand(2).getReg(), MRI, m_ZeroInt())) {
- return Def->getOperand(1).getReg();
- }
-
- return Register();
-}
-
// Match (64-bit SGPR base) + (zext vgpr offset) + sext(imm offset)
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectGlobalSAddr(MachineOperand &Root) const {
@@ -3631,9 +3947,6 @@ AMDGPUInstructionSelector::selectGlobalSAddr(MachineOperand &Root) const {
ImmOffset = ConstOffset;
} else {
auto PtrBaseDef = getDefSrcRegIgnoringCopies(PtrBase, *MRI);
- if (!PtrBaseDef)
- return None;
-
if (isSGPR(PtrBaseDef->Reg)) {
if (ConstOffset > 0) {
// Offset is too large.
@@ -3679,11 +3992,8 @@ AMDGPUInstructionSelector::selectGlobalSAddr(MachineOperand &Root) const {
}
}
- auto AddrDef = getDefSrcRegIgnoringCopies(Addr, *MRI);
- if (!AddrDef)
- return None;
-
// Match the variable offset.
+ auto AddrDef = getDefSrcRegIgnoringCopies(Addr, *MRI);
if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
// Look through the SGPR->VGPR copy.
Register SAddr =
@@ -3749,9 +4059,6 @@ AMDGPUInstructionSelector::selectScratchSAddr(MachineOperand &Root) const {
}
auto AddrDef = getDefSrcRegIgnoringCopies(Addr, *MRI);
- if (!AddrDef)
- return None;
-
if (AddrDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
int FI = AddrDef->MI->getOperand(1).getIndex();
return {{
@@ -3768,8 +4075,7 @@ AMDGPUInstructionSelector::selectScratchSAddr(MachineOperand &Root) const {
auto LHSDef = getDefSrcRegIgnoringCopies(LHS, *MRI);
auto RHSDef = getDefSrcRegIgnoringCopies(RHS, *MRI);
- if (LHSDef && RHSDef &&
- LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX &&
+ if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX &&
isSGPR(RHSDef->Reg)) {
int FI = LHSDef->MI->getOperand(1).getIndex();
MachineInstr &I = *Root.getParent();
@@ -3792,6 +4098,74 @@ AMDGPUInstructionSelector::selectScratchSAddr(MachineOperand &Root) const {
}};
}
+// Check whether the flat scratch SVS swizzle bug affects this access.
+bool AMDGPUInstructionSelector::checkFlatScratchSVSSwizzleBug(
+ Register VAddr, Register SAddr, uint64_t ImmOffset) const {
+ if (!Subtarget->hasFlatScratchSVSSwizzleBug())
+ return false;
+
+ // The bug affects the swizzling of SVS accesses if there is any carry out
+ // from the two low order bits (i.e. from bit 1 into bit 2) when adding
+ // voffset to (soffset + inst_offset).
+ auto VKnown = KnownBits->getKnownBits(VAddr);
+ auto SKnown = KnownBits::computeForAddSub(
+ true, false, KnownBits->getKnownBits(SAddr),
+ KnownBits::makeConstant(APInt(32, ImmOffset)));
+ uint64_t VMax = VKnown.getMaxValue().getZExtValue();
+ uint64_t SMax = SKnown.getMaxValue().getZExtValue();
+ return (VMax & 3) + (SMax & 3) >= 4;
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectScratchSVAddr(MachineOperand &Root) const {
+ Register Addr = Root.getReg();
+ Register PtrBase;
+ int64_t ConstOffset;
+ int64_t ImmOffset = 0;
+
+ // Match the immediate offset first, which canonically is moved as low as
+ // possible.
+ std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(Addr, *MRI);
+
+ if (ConstOffset != 0 &&
+ TII.isLegalFLATOffset(ConstOffset, AMDGPUAS::PRIVATE_ADDRESS, true)) {
+ Addr = PtrBase;
+ ImmOffset = ConstOffset;
+ }
+
+ auto AddrDef = getDefSrcRegIgnoringCopies(Addr, *MRI);
+ if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD)
+ return None;
+
+ Register RHS = AddrDef->MI->getOperand(2).getReg();
+ if (RBI.getRegBank(RHS, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID)
+ return None;
+
+ Register LHS = AddrDef->MI->getOperand(1).getReg();
+ auto LHSDef = getDefSrcRegIgnoringCopies(LHS, *MRI);
+
+ if (checkFlatScratchSVSSwizzleBug(RHS, LHS, ImmOffset))
+ return None;
+
+ if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
+ int FI = LHSDef->MI->getOperand(1).getIndex();
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(RHS); }, // vaddr
+ [=](MachineInstrBuilder &MIB) { MIB.addFrameIndex(FI); }, // saddr
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(ImmOffset); } // offset
+ }};
+ }
+
+ if (!isSGPR(LHS))
+ return None;
+
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(RHS); }, // vaddr
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(LHS); }, // saddr
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(ImmOffset); } // offset
+ }};
+}
+
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
MachineInstr *MI = Root.getParent();
@@ -3856,7 +4230,7 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
MIB.addReg(Info->getScratchRSrcReg());
},
[=](MachineInstrBuilder &MIB) { // vaddr
- if (FI.hasValue())
+ if (FI)
MIB.addFrameIndex(FI.getValue());
else
MIB.addReg(VAddr);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 42095332d11a..22672ba59e76 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -97,6 +97,7 @@ private:
bool selectG_AND_OR_XOR(MachineInstr &I) const;
bool selectG_ADD_SUB(MachineInstr &I) const;
bool selectG_UADDO_USUBO_UADDE_USUBE(MachineInstr &I) const;
+ bool selectG_AMDGPU_MAD_64_32(MachineInstr &I) const;
bool selectG_EXTRACT(MachineInstr &I) const;
bool selectG_MERGE_VALUES(MachineInstr &I) const;
bool selectG_UNMERGE_VALUES(MachineInstr &I) const;
@@ -133,7 +134,6 @@ private:
void initM0(MachineInstr &I) const;
bool selectG_LOAD_STORE_ATOMICRMW(MachineInstr &I) const;
- bool selectG_AMDGPU_ATOMIC_CMPXCHG(MachineInstr &I) const;
bool selectG_SELECT(MachineInstr &I) const;
bool selectG_BRCOND(MachineInstr &I) const;
bool selectG_GLOBAL_VALUE(MachineInstr &I) const;
@@ -144,11 +144,15 @@ private:
bool selectAMDGPU_BUFFER_ATOMIC_FADD(MachineInstr &I) const;
bool selectGlobalAtomicFadd(MachineInstr &I, MachineOperand &AddrOp,
MachineOperand &DataOp) const;
+ bool selectBufferLoadLds(MachineInstr &MI) const;
+ bool selectGlobalLoadLds(MachineInstr &MI) const;
bool selectBVHIntrinsic(MachineInstr &I) const;
+ bool selectSMFMACIntrin(MachineInstr &I) const;
bool selectWaveAddress(MachineInstr &I) const;
- std::pair<Register, unsigned> selectVOP3ModsImpl(MachineOperand &Root,
- bool AllowAbs = true) const;
+ std::pair<Register, unsigned>
+ selectVOP3ModsImpl(MachineOperand &Root, bool AllowAbs = true,
+ bool OpSel = false, bool ForceVGPR = false) const;
InstructionSelector::ComplexRendererFns
selectVCSRC(MachineOperand &Root) const;
@@ -173,15 +177,30 @@ private:
selectVOP3Mods_nnan(MachineOperand &Root) const;
std::pair<Register, unsigned>
- selectVOP3PModsImpl(Register Src, const MachineRegisterInfo &MRI) const;
+ selectVOP3PModsImpl(Register Src, const MachineRegisterInfo &MRI,
+ bool IsDOT = false) const;
InstructionSelector::ComplexRendererFns
selectVOP3PMods(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
+ selectVOP3PModsDOT(MachineOperand &Root) const;
+
+ InstructionSelector::ComplexRendererFns
+ selectDotIUVOP3PMods(MachineOperand &Root) const;
+
+ InstructionSelector::ComplexRendererFns
+ selectWMMAOpSelVOP3PMods(MachineOperand &Root) const;
+
+ InstructionSelector::ComplexRendererFns
selectVOP3OpSelMods(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
+ selectVINTERPMods(MachineOperand &Root) const;
+ InstructionSelector::ComplexRendererFns
+ selectVINTERPModsHi(MachineOperand &Root) const;
+
+ InstructionSelector::ComplexRendererFns
selectSmrdImm(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
selectSmrdImm32(MachineOperand &Root) const;
@@ -203,6 +222,10 @@ private:
InstructionSelector::ComplexRendererFns
selectScratchSAddr(MachineOperand &Root) const;
+ bool checkFlatScratchSVSSwizzleBug(Register VAddr, Register SAddr,
+ uint64_t ImmOffset) const;
+ InstructionSelector::ComplexRendererFns
+ selectScratchSVAddr(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
selectMUBUFScratchOffen(MachineOperand &Root) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 7d3dbfd7e851..31012915457b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -40,7 +40,7 @@ class AMDGPUInst <dag outs, dag ins, string asm = "",
// instructions to not match without killing the whole decode process. It is
// mainly used for ARM, but Tablegen expects this field to exist or it fails
// to build the decode table.
- field bits<64> SoftFail = 0;
+ field bits<96> SoftFail = 0;
let DecoderNamespace = Namespace;
@@ -87,6 +87,17 @@ class PredConcat<list<Predicate> lst, Predicate pred> {
!listconcat([pred], !filter(item, lst, !ne(item, pred)));
}
+// Add a Register to the list if does not already exist
+class RegAppend<list<Register> lst, Register reg> {
+ list<Register> ret =
+ !listconcat([reg], !filter(item, lst, !ne(item, reg)));
+}
+// Get the union of two Register lists
+class RegListUnion<list<Register> lstA, list<Register> lstB> {
+ list<Register> ret =
+ !foldl(lstA, lstB, temp, item, RegAppend<temp, item>.ret);
+}
+
class PredicateControl {
Predicate SubtargetPredicate = TruePredicate;
Predicate AssemblerPredicate = TruePredicate;
@@ -444,34 +455,28 @@ def load_#as : PatFrag<(ops node:$ptr), (unindexedload node:$ptr)> {
let IsNonExtLoad = 1;
}
-def extloadi8_#as : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
+def extloadi8_#as : PatFrag<(ops node:$ptr), (extloadi8 node:$ptr)> {
let IsLoad = 1;
- let MemoryVT = i8;
}
-def extloadi16_#as : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
+def extloadi16_#as : PatFrag<(ops node:$ptr), (extloadi16 node:$ptr)> {
let IsLoad = 1;
- let MemoryVT = i16;
}
-def sextloadi8_#as : PatFrag<(ops node:$ptr), (sextload node:$ptr)> {
+def sextloadi8_#as : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr)> {
let IsLoad = 1;
- let MemoryVT = i8;
}
-def sextloadi16_#as : PatFrag<(ops node:$ptr), (sextload node:$ptr)> {
+def sextloadi16_#as : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr)> {
let IsLoad = 1;
- let MemoryVT = i16;
}
-def zextloadi8_#as : PatFrag<(ops node:$ptr), (zextload node:$ptr)> {
+def zextloadi8_#as : PatFrag<(ops node:$ptr), (zextloadi8 node:$ptr)> {
let IsLoad = 1;
- let MemoryVT = i8;
}
-def zextloadi16_#as : PatFrag<(ops node:$ptr), (zextload node:$ptr)> {
+def zextloadi16_#as : PatFrag<(ops node:$ptr), (zextloadi16 node:$ptr)> {
let IsLoad = 1;
- let MemoryVT = i16;
}
def atomic_load_8_#as : PatFrag<(ops node:$ptr), (atomic_load_8 node:$ptr)> {
@@ -498,17 +503,15 @@ def atomic_load_64_#as : PatFrag<(ops node:$ptr), (atomic_load_64 node:$ptr)> {
foreach as = [ "global", "flat", "local", "private", "region" ] in {
-let AddressSpaces = !cast<AddressSpaceList>("StoreAddress_"#as).AddrSpaces in {
+let IsStore = 1, AddressSpaces = !cast<AddressSpaceList>("StoreAddress_"#as).AddrSpaces in {
def store_#as : PatFrag<(ops node:$val, node:$ptr),
(unindexedstore node:$val, node:$ptr)> {
- let IsStore = 1;
let IsTruncStore = 0;
}
// truncstore fragments.
def truncstore_#as : PatFrag<(ops node:$val, node:$ptr),
(unindexedstore node:$val, node:$ptr)> {
- let IsStore = 1;
let IsTruncStore = 1;
}
@@ -517,90 +520,133 @@ def truncstore_#as : PatFrag<(ops node:$val, node:$ptr),
// unnecessary check that the memory size is less than the value type
// in the generated matcher table.
def truncstorei8_#as : PatFrag<(ops node:$val, node:$ptr),
- (truncstore node:$val, node:$ptr)> {
- let IsStore = 1;
- let MemoryVT = i8;
-}
-
+ (truncstorei8 node:$val, node:$ptr)>;
def truncstorei16_#as : PatFrag<(ops node:$val, node:$ptr),
- (truncstore node:$val, node:$ptr)> {
- let IsStore = 1;
- let MemoryVT = i16;
-}
+ (truncstorei16 node:$val, node:$ptr)>;
def store_hi16_#as : StoreHi16 <truncstorei16, i16>;
def truncstorei8_hi16_#as : StoreHi16<truncstorei8, i8>;
def truncstorei16_hi16_#as : StoreHi16<truncstorei16, i16>;
-defm atomic_store_#as : binary_atomic_op<atomic_store>;
+} // End let IsStore = 1, AddressSpaces = ...
-} // End let AddressSpaces
+let IsAtomic = 1, AddressSpaces = !cast<AddressSpaceList>("StoreAddress_"#as).AddrSpaces in {
+def atomic_store_8_#as : PatFrag<(ops node:$ptr, node:$val),
+ (atomic_store_8 node:$ptr, node:$val)>;
+def atomic_store_16_#as : PatFrag<(ops node:$ptr, node:$val),
+ (atomic_store_16 node:$ptr, node:$val)>;
+def atomic_store_32_#as : PatFrag<(ops node:$ptr, node:$val),
+ (atomic_store_32 node:$ptr, node:$val)>;
+def atomic_store_64_#as : PatFrag<(ops node:$ptr, node:$val),
+ (atomic_store_64 node:$ptr, node:$val)>;
+}
} // End foreach as
+// TODO: Add GISelPredicateCode for the ret and noret PatFrags once
+// GlobalISelEmitter allows pattern matches where src and dst def count
+// mismatch.
+
+multiclass ret_noret_op {
+ let PredicateCode = [{ return !(SDValue(N, 0).use_empty()); }],
+ GISelPredicateCode = [{ return true; }] in {
+ def "_ret" : PatFrag<(ops node:$ptr, node:$data),
+ (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>;
+ }
+
+ let PredicateCode = [{ return (SDValue(N, 0).use_empty()); }],
+ GISelPredicateCode = [{ return false; }] in {
+ def "_noret" : PatFrag<(ops node:$ptr, node:$data),
+ (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>;
+ }
+}
+
+defm int_amdgcn_flat_atomic_fadd : ret_noret_op;
+defm int_amdgcn_flat_atomic_fadd_v2bf16 : ret_noret_op;
+defm int_amdgcn_flat_atomic_fmin : ret_noret_op;
+defm int_amdgcn_flat_atomic_fmax : ret_noret_op;
+defm int_amdgcn_global_atomic_fadd : ret_noret_op;
+defm int_amdgcn_global_atomic_fadd_v2bf16 : ret_noret_op;
+defm int_amdgcn_global_atomic_fmin : ret_noret_op;
+defm int_amdgcn_global_atomic_fmax : ret_noret_op;
+defm int_amdgcn_ds_fadd_v2bf16 : ret_noret_op;
multiclass ret_noret_binary_atomic_op<SDNode atomic_op, bit IsInt = 1> {
+ let PredicateCode = [{ return (SDValue(N, 0).use_empty()); }],
+ GISelPredicateCode = [{ return false; }] in {
+ defm "_noret" : binary_atomic_op<atomic_op, IsInt>;
+ }
+
+ let PredicateCode = [{ return !(SDValue(N, 0).use_empty()); }],
+ GISelPredicateCode = [{ return true; }] in {
+ defm "_ret" : binary_atomic_op<atomic_op, IsInt>;
+ }
+}
+
+multiclass ret_noret_ternary_atomic_op<SDNode atomic_op> {
+ let PredicateCode = [{ return (SDValue(N, 0).use_empty()); }],
+ GISelPredicateCode = [{ return false; }] in {
+ defm "_noret" : ternary_atomic_op<atomic_op>;
+ }
+
+ let PredicateCode = [{ return !(SDValue(N, 0).use_empty()); }],
+ GISelPredicateCode = [{ return true; }] in {
+ defm "_ret" : ternary_atomic_op<atomic_op>;
+ }
+}
+
+multiclass binary_atomic_op_all_as<SDNode atomic_op, bit IsInt = 1> {
foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in {
let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
defm "_"#as : binary_atomic_op<atomic_op, IsInt>;
-
- let PredicateCode = [{return (SDValue(N, 0).use_empty());}] in {
- defm "_"#as#"_noret" : binary_atomic_op<atomic_op, IsInt>;
- }
-
- let PredicateCode = [{return !(SDValue(N, 0).use_empty());}] in {
- defm "_"#as#"_ret" : binary_atomic_op<atomic_op, IsInt>;
- }
+ defm "_"#as : ret_noret_binary_atomic_op<atomic_op, IsInt>;
}
}
}
-defm atomic_swap : ret_noret_binary_atomic_op<atomic_swap>;
-defm atomic_load_add : ret_noret_binary_atomic_op<atomic_load_add>;
-defm atomic_load_and : ret_noret_binary_atomic_op<atomic_load_and>;
-defm atomic_load_max : ret_noret_binary_atomic_op<atomic_load_max>;
-defm atomic_load_min : ret_noret_binary_atomic_op<atomic_load_min>;
-defm atomic_load_or : ret_noret_binary_atomic_op<atomic_load_or>;
-defm atomic_load_sub : ret_noret_binary_atomic_op<atomic_load_sub>;
-defm atomic_load_umax : ret_noret_binary_atomic_op<atomic_load_umax>;
-defm atomic_load_umin : ret_noret_binary_atomic_op<atomic_load_umin>;
-defm atomic_load_xor : ret_noret_binary_atomic_op<atomic_load_xor>;
-defm atomic_load_fadd : ret_noret_binary_atomic_op<atomic_load_fadd, 0>;
+defm atomic_swap : binary_atomic_op_all_as<atomic_swap>;
+defm atomic_load_add : binary_atomic_op_all_as<atomic_load_add>;
+defm atomic_load_and : binary_atomic_op_all_as<atomic_load_and>;
+defm atomic_load_max : binary_atomic_op_all_as<atomic_load_max>;
+defm atomic_load_min : binary_atomic_op_all_as<atomic_load_min>;
+defm atomic_load_or : binary_atomic_op_all_as<atomic_load_or>;
+defm atomic_load_sub : binary_atomic_op_all_as<atomic_load_sub>;
+defm atomic_load_umax : binary_atomic_op_all_as<atomic_load_umax>;
+defm atomic_load_umin : binary_atomic_op_all_as<atomic_load_umin>;
+defm atomic_load_xor : binary_atomic_op_all_as<atomic_load_xor>;
+defm atomic_load_fadd : binary_atomic_op_all_as<atomic_load_fadd, 0>;
let MemoryVT = v2f16 in
-defm atomic_load_fadd_v2f16 : ret_noret_binary_atomic_op<atomic_load_fadd, 0>;
-defm AMDGPUatomic_cmp_swap : ret_noret_binary_atomic_op<AMDGPUatomic_cmp_swap>;
+defm atomic_load_fadd_v2f16 : binary_atomic_op_all_as<atomic_load_fadd, 0>;
+defm AMDGPUatomic_cmp_swap : binary_atomic_op_all_as<AMDGPUatomic_cmp_swap>;
def load_align8_local : PatFrag<(ops node:$ptr), (load_local node:$ptr)>,
- Aligned<8> {
+ Aligned<8> {
let IsLoad = 1;
- let IsNonExtLoad = 1;
}
def load_align16_local : PatFrag<(ops node:$ptr), (load_local node:$ptr)>,
Aligned<16> {
let IsLoad = 1;
- let IsNonExtLoad = 1;
}
def store_align8_local: PatFrag<(ops node:$val, node:$ptr),
(store_local node:$val, node:$ptr)>, Aligned<8> {
let IsStore = 1;
- let IsTruncStore = 0;
}
def store_align16_local: PatFrag<(ops node:$val, node:$ptr),
(store_local node:$val, node:$ptr)>, Aligned<16> {
let IsStore = 1;
- let IsTruncStore = 0;
}
let AddressSpaces = StoreAddress_local.AddrSpaces in {
defm atomic_cmp_swap_local : ternary_atomic_op<atomic_cmp_swap>;
-defm atomic_cmp_swap_local_m0 : ternary_atomic_op<atomic_cmp_swap_glue>;
+defm atomic_cmp_swap_local : ret_noret_ternary_atomic_op<atomic_cmp_swap>;
+defm atomic_cmp_swap_local_m0 : ret_noret_ternary_atomic_op<atomic_cmp_swap_glue>;
}
let AddressSpaces = StoreAddress_region.AddrSpaces in {
-defm atomic_cmp_swap_region : ternary_atomic_op<atomic_cmp_swap>;
-defm atomic_cmp_swap_region_m0 : ternary_atomic_op<atomic_cmp_swap_glue>;
+defm atomic_cmp_swap_region : ret_noret_ternary_atomic_op<atomic_cmp_swap>;
+defm atomic_cmp_swap_region_m0 : ret_noret_ternary_atomic_op<atomic_cmp_swap_glue>;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 645d05aa9238..01a3e78ea48c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -26,6 +26,7 @@
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/IntrinsicsR600.h"
#define DEBUG_TYPE "amdgpu-legalinfo"
@@ -134,7 +135,6 @@ static LegalizeMutation moreEltsToNext32Bit(unsigned TypeIdx) {
static LLT getBitcastRegisterType(const LLT Ty) {
const unsigned Size = Ty.getSizeInBits();
- LLT CoercedTy;
if (Size <= 32) {
// <2 x s8> -> s16
// <4 x s8> -> s32
@@ -530,13 +530,22 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
if (ST.hasVOP3PInsts() && ST.hasAddNoCarry() && ST.hasIntClamp()) {
// Full set of gfx9 features.
- getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
+ getActionDefinitionsBuilder({G_ADD, G_SUB})
.legalFor({S32, S16, V2S16})
+ .clampMaxNumElementsStrict(0, S16, 2)
+ .scalarize(0)
.minScalar(0, S16)
+ .widenScalarToNextMultipleOf(0, 32)
+ .maxScalar(0, S32);
+
+ getActionDefinitionsBuilder(G_MUL)
+ .legalFor({S32, S16, V2S16})
.clampMaxNumElementsStrict(0, S16, 2)
+ .scalarize(0)
+ .minScalar(0, S16)
.widenScalarToNextMultipleOf(0, 32)
- .maxScalar(0, S32)
- .scalarize(0);
+ .custom();
+ assert(ST.hasMad64_32());
getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT, G_SADDSAT, G_SSUBSAT})
.legalFor({S32, S16, V2S16}) // Clamp modifier
@@ -546,13 +555,21 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.widenScalarToNextPow2(0, 32)
.lower();
} else if (ST.has16BitInsts()) {
- getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
+ getActionDefinitionsBuilder({G_ADD, G_SUB})
.legalFor({S32, S16})
.minScalar(0, S16)
.widenScalarToNextMultipleOf(0, 32)
.maxScalar(0, S32)
.scalarize(0);
+ getActionDefinitionsBuilder(G_MUL)
+ .legalFor({S32, S16})
+ .scalarize(0)
+ .minScalar(0, S16)
+ .widenScalarToNextMultipleOf(0, 32)
+ .custom();
+ assert(ST.hasMad64_32());
+
// Technically the saturating operations require clamp bit support, but this
// was introduced at the same time as 16-bit operations.
getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT})
@@ -569,12 +586,23 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.scalarize(0)
.lower();
} else {
- getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
+ getActionDefinitionsBuilder({G_ADD, G_SUB})
.legalFor({S32})
.widenScalarToNextMultipleOf(0, 32)
.clampScalar(0, S32, S32)
.scalarize(0);
+ auto &Mul = getActionDefinitionsBuilder(G_MUL)
+ .legalFor({S32})
+ .scalarize(0)
+ .minScalar(0, S32)
+ .widenScalarToNextMultipleOf(0, 32);
+
+ if (ST.hasMad64_32())
+ Mul.custom();
+ else
+ Mul.maxScalar(0, S32);
+
if (ST.hasIntClamp()) {
getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT})
.legalFor({S32}) // Clamp modifier.
@@ -632,7 +660,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
G_UADDE, G_SADDE, G_USUBE, G_SSUBE})
.legalFor({{S32, S1}, {S32, S32}})
.minScalar(0, S32)
- // TODO: .scalarize(0)
+ .scalarize(0)
.lower();
getActionDefinitionsBuilder(G_BITCAST)
@@ -767,13 +795,24 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.narrowScalarFor({{S64, S16}}, changeTo(0, S32))
.scalarize(0);
- getActionDefinitionsBuilder(G_FSUB)
+ auto &FSubActions = getActionDefinitionsBuilder(G_FSUB);
+ if (ST.has16BitInsts()) {
+ FSubActions
+ // Use actual fsub instruction
+ .legalFor({S32, S16})
+ // Must use fadd + fneg
+ .lowerFor({S64, V2S16});
+ } else {
+ FSubActions
// Use actual fsub instruction
.legalFor({S32})
// Must use fadd + fneg
- .lowerFor({S64, S16, V2S16})
- .scalarize(0)
- .clampScalar(0, S32, S64);
+ .lowerFor({S64, S16, V2S16});
+ }
+
+ FSubActions
+ .scalarize(0)
+ .clampScalar(0, S32, S64);
// Whether this is legal depends on the floating point mode for the function.
auto &FMad = getActionDefinitionsBuilder(G_FMAD);
@@ -839,6 +878,11 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.scalarize(0)
.lower();
+ getActionDefinitionsBuilder(G_INTRINSIC_FPTRUNC_ROUND)
+ .customFor({S16, S32})
+ .scalarize(0)
+ .lower();
+
// Lower roundeven into G_FRINT
getActionDefinitionsBuilder({G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
.scalarize(0)
@@ -1292,6 +1336,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
Atomic.legalFor({{S32, LocalPtr}, {S32, RegionPtr}});
if (ST.hasGFX90AInsts())
Atomic.legalFor({{S64, LocalPtr}});
+ if (ST.hasGFX940Insts())
+ Atomic.legalFor({{V2S16, LocalPtr}});
}
if (ST.hasAtomicFaddInsts())
Atomic.legalFor({{S32, GlobalPtr}});
@@ -1505,7 +1551,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.clampMaxNumElements(1, S16, 2) // TODO: Make 4?
.clampMaxNumElements(0, S16, 64);
- // TODO: Don't fully scalarize v2s16 pieces? Or combine out thosse
+ // TODO: Don't fully scalarize v2s16 pieces? Or combine out those
// pre-legalize.
if (ST.hasVOP3PInsts()) {
getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
@@ -1756,9 +1802,13 @@ bool AMDGPULegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
return legalizeFFloor(MI, MRI, B);
case TargetOpcode::G_BUILD_VECTOR:
return legalizeBuildVector(MI, MRI, B);
+ case TargetOpcode::G_MUL:
+ return legalizeMul(Helper, MI);
case TargetOpcode::G_CTLZ:
case TargetOpcode::G_CTTZ:
return legalizeCTLZ_CTTZ(MI, MRI, B);
+ case TargetOpcode::G_INTRINSIC_FPTRUNC_ROUND:
+ return legalizeFPTruncRound(MI, B);
default:
return false;
}
@@ -1801,6 +1851,39 @@ Register AMDGPULegalizerInfo::getSegmentAperture(
return B.buildShl(S32, GetReg, ShiftAmt).getReg(0);
}
+ // TODO: can we be smarter about machine pointer info?
+ MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
+ Register LoadAddr = MRI.createGenericVirtualRegister(
+ LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
+ // For code object version 5, private_base and shared_base are passed through
+ // implicit kernargs.
+ if (AMDGPU::getAmdhsaCodeObjectVersion() == 5) {
+ AMDGPUTargetLowering::ImplicitParameter Param =
+ AS == AMDGPUAS::LOCAL_ADDRESS ? AMDGPUTargetLowering::SHARED_BASE
+ : AMDGPUTargetLowering::PRIVATE_BASE;
+ uint64_t Offset =
+ ST.getTargetLowering()->getImplicitParameterOffset(B.getMF(), Param);
+
+ Register KernargPtrReg = MRI.createGenericVirtualRegister(
+ LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
+
+ if (!loadInputValue(KernargPtrReg, B,
+ AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR))
+ return Register();
+
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ PtrInfo,
+ MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOInvariant,
+ LLT::scalar(32), commonAlignment(Align(64), Offset));
+
+ // Pointer address
+ B.buildPtrAdd(LoadAddr, KernargPtrReg,
+ B.buildConstant(LLT::scalar(64), Offset).getReg(0));
+ // Load address
+ return B.buildLoad(S32, LoadAddr, *MMO).getReg(0);
+ }
+
Register QueuePtr = MRI.createGenericVirtualRegister(
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
@@ -1811,17 +1894,14 @@ Register AMDGPULegalizerInfo::getSegmentAperture(
// private_segment_aperture_base_hi.
uint32_t StructOffset = (AS == AMDGPUAS::LOCAL_ADDRESS) ? 0x40 : 0x44;
- // TODO: can we be smarter about machine pointer info?
- MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
MachineMemOperand *MMO = MF.getMachineMemOperand(
PtrInfo,
MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant,
LLT::scalar(32), commonAlignment(Align(64), StructOffset));
- Register LoadAddr;
-
- B.materializePtrAdd(LoadAddr, QueuePtr, LLT::scalar(64), StructOffset);
+ B.buildPtrAdd(LoadAddr, QueuePtr,
+ B.buildConstant(LLT::scalar(64), StructOffset).getReg(0));
return B.buildLoad(S32, LoadAddr, *MMO).getReg(0);
}
@@ -1872,31 +1952,9 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast(
return true;
}
- if (DestAS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
- // Truncate.
- B.buildExtract(Dst, Src, 0);
- MI.eraseFromParent();
- return true;
- }
-
- if (SrcAS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
- const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
- uint32_t AddrHiVal = Info->get32BitAddressHighBits();
-
- // FIXME: This is a bit ugly due to creating a merge of 2 pointers to
- // another. Merge operands are required to be the same type, but creating an
- // extra ptrtoint would be kind of pointless.
- auto HighAddr = B.buildConstant(
- LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS_32BIT, 32), AddrHiVal);
- B.buildMerge(Dst, {Src, HighAddr});
- MI.eraseFromParent();
- return true;
- }
-
- if (SrcAS == AMDGPUAS::FLAT_ADDRESS) {
- assert(DestAS == AMDGPUAS::LOCAL_ADDRESS ||
- DestAS == AMDGPUAS::PRIVATE_ADDRESS);
-
+ if (SrcAS == AMDGPUAS::FLAT_ADDRESS &&
+ (DestAS == AMDGPUAS::LOCAL_ADDRESS ||
+ DestAS == AMDGPUAS::PRIVATE_ADDRESS)) {
if (isKnownNonNull(Src, MRI, TM, SrcAS)) {
// Extract low 32-bits of the pointer.
B.buildExtract(Dst, Src, 0);
@@ -1920,37 +1978,70 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast(
return true;
}
- if (SrcAS != AMDGPUAS::LOCAL_ADDRESS && SrcAS != AMDGPUAS::PRIVATE_ADDRESS)
- return false;
+ if (DestAS == AMDGPUAS::FLAT_ADDRESS &&
+ (SrcAS == AMDGPUAS::LOCAL_ADDRESS ||
+ SrcAS == AMDGPUAS::PRIVATE_ADDRESS)) {
+ if (!ST.hasFlatAddressSpace())
+ return false;
- if (!ST.hasFlatAddressSpace())
- return false;
+ Register ApertureReg = getSegmentAperture(SrcAS, MRI, B);
+ if (!ApertureReg.isValid())
+ return false;
- Register ApertureReg = getSegmentAperture(SrcAS, MRI, B);
- if (!ApertureReg.isValid())
- return false;
+ // Coerce the type of the low half of the result so we can use merge_values.
+ Register SrcAsInt = B.buildPtrToInt(S32, Src).getReg(0);
+
+ // TODO: Should we allow mismatched types but matching sizes in merges to
+ // avoid the ptrtoint?
+ auto BuildPtr = B.buildMerge(DstTy, {SrcAsInt, ApertureReg});
+
+ if (isKnownNonNull(Src, MRI, TM, SrcAS)) {
+ B.buildCopy(Dst, BuildPtr);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ auto SegmentNull = B.buildConstant(SrcTy, TM.getNullPointerValue(SrcAS));
+ auto FlatNull = B.buildConstant(DstTy, TM.getNullPointerValue(DestAS));
- // Coerce the type of the low half of the result so we can use merge_values.
- Register SrcAsInt = B.buildPtrToInt(S32, Src).getReg(0);
+ auto CmpRes = B.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Src,
+ SegmentNull.getReg(0));
- // TODO: Should we allow mismatched types but matching sizes in merges to
- // avoid the ptrtoint?
- auto BuildPtr = B.buildMerge(DstTy, {SrcAsInt, ApertureReg});
+ B.buildSelect(Dst, CmpRes, BuildPtr, FlatNull);
+
+ MI.eraseFromParent();
+ return true;
+ }
- if (isKnownNonNull(Src, MRI, TM, SrcAS)) {
- B.buildCopy(Dst, BuildPtr);
+ if (DestAS == AMDGPUAS::CONSTANT_ADDRESS_32BIT &&
+ SrcTy.getSizeInBits() == 64) {
+ // Truncate.
+ B.buildExtract(Dst, Src, 0);
MI.eraseFromParent();
return true;
}
- auto SegmentNull = B.buildConstant(SrcTy, TM.getNullPointerValue(SrcAS));
- auto FlatNull = B.buildConstant(DstTy, TM.getNullPointerValue(DestAS));
+ if (SrcAS == AMDGPUAS::CONSTANT_ADDRESS_32BIT &&
+ DstTy.getSizeInBits() == 64) {
+ const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+ uint32_t AddrHiVal = Info->get32BitAddressHighBits();
- auto CmpRes =
- B.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Src, SegmentNull.getReg(0));
+ // FIXME: This is a bit ugly due to creating a merge of 2 pointers to
+ // another. Merge operands are required to be the same type, but creating an
+ // extra ptrtoint would be kind of pointless.
+ auto HighAddr = B.buildConstant(
+ LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS_32BIT, 32), AddrHiVal);
+ B.buildMerge(Dst, {Src, HighAddr});
+ MI.eraseFromParent();
+ return true;
+ }
- B.buildSelect(Dst, CmpRes, BuildPtr, FlatNull);
+ DiagnosticInfoUnsupported InvalidAddrSpaceCast(
+ MF.getFunction(), "invalid addrspacecast", B.getDebugLoc());
+ LLVMContext &Ctx = MF.getFunction().getContext();
+ Ctx.diagnose(InvalidAddrSpaceCast);
+ B.buildUndef(Dst);
MI.eraseFromParent();
return true;
}
@@ -2811,6 +2902,298 @@ bool AMDGPULegalizerInfo::legalizeBuildVector(
return true;
}
+// Build a big integer multiply or multiply-add using MAD_64_32 instructions.
+//
+// Source and accumulation registers must all be 32-bits.
+//
+// TODO: When the multiply is uniform, we should produce a code sequence
+// that is better suited to instruction selection on the SALU. Instead of
+// the outer loop going over parts of the result, the outer loop should go
+// over parts of one of the factors. This should result in instruction
+// selection that makes full use of S_ADDC_U32 instructions.
+void AMDGPULegalizerInfo::buildMultiply(
+ LegalizerHelper &Helper, MutableArrayRef<Register> Accum,
+ ArrayRef<Register> Src0, ArrayRef<Register> Src1,
+ bool UsePartialMad64_32, bool SeparateOddAlignedProducts) const {
+ // Use (possibly empty) vectors of S1 registers to represent the set of
+ // carries from one pair of positions to the next.
+ using Carry = SmallVector<Register, 2>;
+
+ MachineIRBuilder &B = Helper.MIRBuilder;
+
+ const LLT S1 = LLT::scalar(1);
+ const LLT S32 = LLT::scalar(32);
+ const LLT S64 = LLT::scalar(64);
+
+ Register Zero32;
+ Register Zero64;
+
+ auto getZero32 = [&]() -> Register {
+ if (!Zero32)
+ Zero32 = B.buildConstant(S32, 0).getReg(0);
+ return Zero32;
+ };
+ auto getZero64 = [&]() -> Register {
+ if (!Zero64)
+ Zero64 = B.buildConstant(S64, 0).getReg(0);
+ return Zero64;
+ };
+
+ // Merge the given carries into the 32-bit LocalAccum, which is modified
+ // in-place.
+ //
+ // Returns the carry-out, which is a single S1 register or null.
+ auto mergeCarry =
+ [&](Register &LocalAccum, const Carry &CarryIn) -> Register {
+ if (CarryIn.empty())
+ return Register();
+
+ bool HaveCarryOut = true;
+ Register CarryAccum;
+ if (CarryIn.size() == 1) {
+ if (!LocalAccum) {
+ LocalAccum = B.buildZExt(S32, CarryIn[0]).getReg(0);
+ return Register();
+ }
+
+ CarryAccum = getZero32();
+ } else {
+ CarryAccum = B.buildZExt(S32, CarryIn[0]).getReg(0);
+ for (unsigned i = 1; i + 1 < CarryIn.size(); ++i) {
+ CarryAccum =
+ B.buildUAdde(S32, S1, CarryAccum, getZero32(), CarryIn[i])
+ .getReg(0);
+ }
+
+ if (!LocalAccum) {
+ LocalAccum = getZero32();
+ HaveCarryOut = false;
+ }
+ }
+
+ auto Add =
+ B.buildUAdde(S32, S1, CarryAccum, LocalAccum, CarryIn.back());
+ LocalAccum = Add.getReg(0);
+ return HaveCarryOut ? Add.getReg(1) : Register();
+ };
+
+ // Build a multiply-add chain to compute
+ //
+ // LocalAccum + (partial products at DstIndex)
+ // + (opportunistic subset of CarryIn)
+ //
+ // LocalAccum is an array of one or two 32-bit registers that are updated
+ // in-place. The incoming registers may be null.
+ //
+ // In some edge cases, carry-ins can be consumed "for free". In that case,
+ // the consumed carry bits are removed from CarryIn in-place.
+ auto buildMadChain =
+ [&](MutableArrayRef<Register> LocalAccum, unsigned DstIndex, Carry &CarryIn)
+ -> Carry {
+ assert((DstIndex + 1 < Accum.size() && LocalAccum.size() == 2) ||
+ (DstIndex + 1 >= Accum.size() && LocalAccum.size() == 1));
+
+ Carry CarryOut;
+ unsigned j0 = 0;
+
+ // Use plain 32-bit multiplication for the most significant part of the
+ // result by default.
+ if (LocalAccum.size() == 1 &&
+ (!UsePartialMad64_32 || !CarryIn.empty())) {
+ do {
+ unsigned j1 = DstIndex - j0;
+ auto Mul = B.buildMul(S32, Src0[j0], Src1[j1]);
+ if (!LocalAccum[0]) {
+ LocalAccum[0] = Mul.getReg(0);
+ } else {
+ if (CarryIn.empty()) {
+ LocalAccum[0] = B.buildAdd(S32, LocalAccum[0], Mul).getReg(0);
+ } else {
+ LocalAccum[0] =
+ B.buildUAdde(S32, S1, LocalAccum[0], Mul, CarryIn.back())
+ .getReg(0);
+ CarryIn.pop_back();
+ }
+ }
+ ++j0;
+ } while (j0 <= DstIndex && (!UsePartialMad64_32 || !CarryIn.empty()));
+ }
+
+ // Build full 64-bit multiplies.
+ if (j0 <= DstIndex) {
+ bool HaveSmallAccum = false;
+ Register Tmp;
+
+ if (LocalAccum[0]) {
+ if (LocalAccum.size() == 1) {
+ Tmp = B.buildAnyExt(S64, LocalAccum[0]).getReg(0);
+ HaveSmallAccum = true;
+ } else if (LocalAccum[1]) {
+ Tmp = B.buildMerge(S64, LocalAccum).getReg(0);
+ HaveSmallAccum = false;
+ } else {
+ Tmp = B.buildZExt(S64, LocalAccum[0]).getReg(0);
+ HaveSmallAccum = true;
+ }
+ } else {
+ assert(LocalAccum.size() == 1 || !LocalAccum[1]);
+ Tmp = getZero64();
+ HaveSmallAccum = true;
+ }
+
+ do {
+ unsigned j1 = DstIndex - j0;
+ auto Mad = B.buildInstr(AMDGPU::G_AMDGPU_MAD_U64_U32, {S64, S1},
+ {Src0[j0], Src1[j1], Tmp});
+ Tmp = Mad.getReg(0);
+ if (!HaveSmallAccum)
+ CarryOut.push_back(Mad.getReg(1));
+ HaveSmallAccum = false;
+ ++j0;
+ } while (j0 <= DstIndex);
+
+ auto Unmerge = B.buildUnmerge(S32, Tmp);
+ LocalAccum[0] = Unmerge.getReg(0);
+ if (LocalAccum.size() > 1)
+ LocalAccum[1] = Unmerge.getReg(1);
+ }
+
+ return CarryOut;
+ };
+
+ // Outer multiply loop, iterating over destination parts from least
+ // significant to most significant parts.
+ //
+ // The columns of the following diagram correspond to the destination parts
+ // affected by one iteration of the outer loop (ignoring boundary
+ // conditions).
+ //
+ // Dest index relative to 2 * i: 1 0 -1
+ // ------
+ // Carries from previous iteration: e o
+ // Even-aligned partial product sum: E E .
+ // Odd-aligned partial product sum: O O
+ //
+ // 'o' is OddCarry, 'e' is EvenCarry.
+ // EE and OO are computed from partial products via buildMadChain and use
+ // accumulation where possible and appropriate.
+ //
+ Register SeparateOddCarry;
+ Carry EvenCarry;
+ Carry OddCarry;
+
+ for (unsigned i = 0; i <= Accum.size() / 2; ++i) {
+ Carry OddCarryIn = std::move(OddCarry);
+ Carry EvenCarryIn = std::move(EvenCarry);
+ OddCarry.clear();
+ EvenCarry.clear();
+
+ // Partial products at offset 2 * i.
+ if (2 * i < Accum.size()) {
+ auto LocalAccum = Accum.drop_front(2 * i).take_front(2);
+ EvenCarry = buildMadChain(LocalAccum, 2 * i, EvenCarryIn);
+ }
+
+ // Partial products at offset 2 * i - 1.
+ if (i > 0) {
+ if (!SeparateOddAlignedProducts) {
+ auto LocalAccum = Accum.drop_front(2 * i - 1).take_front(2);
+ OddCarry = buildMadChain(LocalAccum, 2 * i - 1, OddCarryIn);
+ } else {
+ bool IsHighest = 2 * i >= Accum.size();
+ Register SeparateOddOut[2];
+ auto LocalAccum = makeMutableArrayRef(SeparateOddOut)
+ .take_front(IsHighest ? 1 : 2);
+ OddCarry = buildMadChain(LocalAccum, 2 * i - 1, OddCarryIn);
+
+ MachineInstr *Lo;
+
+ if (i == 1) {
+ if (!IsHighest)
+ Lo = B.buildUAddo(S32, S1, Accum[2 * i - 1], SeparateOddOut[0]);
+ else
+ Lo = B.buildAdd(S32, Accum[2 * i - 1], SeparateOddOut[0]);
+ } else {
+ Lo = B.buildUAdde(S32, S1, Accum[2 * i - 1], SeparateOddOut[0],
+ SeparateOddCarry);
+ }
+ Accum[2 * i - 1] = Lo->getOperand(0).getReg();
+
+ if (!IsHighest) {
+ auto Hi = B.buildUAdde(S32, S1, Accum[2 * i], SeparateOddOut[1],
+ Lo->getOperand(1).getReg());
+ Accum[2 * i] = Hi.getReg(0);
+ SeparateOddCarry = Hi.getReg(1);
+ }
+ }
+ }
+
+ // Add in the carries from the previous iteration
+ if (i > 0) {
+ if (Register CarryOut = mergeCarry(Accum[2 * i - 1], OddCarryIn))
+ EvenCarryIn.push_back(CarryOut);
+
+ if (2 * i < Accum.size()) {
+ if (Register CarryOut = mergeCarry(Accum[2 * i], EvenCarryIn))
+ OddCarry.push_back(CarryOut);
+ }
+ }
+ }
+}
+
+// Custom narrowing of wide multiplies using wide multiply-add instructions.
+//
+// TODO: If the multiply is followed by an addition, we should attempt to
+// integrate it to make better use of V_MAD_U64_U32's multiply-add capabilities.
+bool AMDGPULegalizerInfo::legalizeMul(LegalizerHelper &Helper,
+ MachineInstr &MI) const {
+ assert(ST.hasMad64_32());
+ assert(MI.getOpcode() == TargetOpcode::G_MUL);
+
+ MachineIRBuilder &B = Helper.MIRBuilder;
+ MachineRegisterInfo &MRI = *B.getMRI();
+
+ Register DstReg = MI.getOperand(0).getReg();
+ Register Src0 = MI.getOperand(1).getReg();
+ Register Src1 = MI.getOperand(2).getReg();
+
+ LLT Ty = MRI.getType(DstReg);
+ assert(Ty.isScalar());
+
+ unsigned Size = Ty.getSizeInBits();
+ unsigned NumParts = Size / 32;
+ assert((Size % 32) == 0);
+ assert(NumParts >= 2);
+
+ // Whether to use MAD_64_32 for partial products whose high half is
+ // discarded. This avoids some ADD instructions but risks false dependency
+ // stalls on some subtargets in some cases.
+ const bool UsePartialMad64_32 = ST.getGeneration() < AMDGPUSubtarget::GFX10;
+
+ // Whether to compute odd-aligned partial products separately. This is
+ // advisable on subtargets where the accumulator of MAD_64_32 must be placed
+ // in an even-aligned VGPR.
+ const bool SeparateOddAlignedProducts = ST.hasFullRate64Ops();
+
+ LLT S32 = LLT::scalar(32);
+ SmallVector<Register, 2> Src0Parts, Src1Parts;
+ for (unsigned i = 0; i < NumParts; ++i) {
+ Src0Parts.push_back(MRI.createGenericVirtualRegister(S32));
+ Src1Parts.push_back(MRI.createGenericVirtualRegister(S32));
+ }
+ B.buildUnmerge(Src0Parts, Src0);
+ B.buildUnmerge(Src1Parts, Src1);
+
+ SmallVector<Register, 2> AccumRegs(NumParts);
+ buildMultiply(Helper, AccumRegs, Src0Parts, Src1Parts, UsePartialMad64_32,
+ SeparateOddAlignedProducts);
+
+ B.buildMerge(DstReg, AccumRegs);
+ MI.eraseFromParent();
+ return true;
+
+}
+
// Legalize ctlz/cttz to ffbh/ffbl instead of the default legalization to
// ctlz/cttz_zero_undef. This allows us to fix up the result for the zero input
// case with a single min instruction instead of a compare+select.
@@ -2954,6 +3337,89 @@ bool AMDGPULegalizerInfo::legalizePreloadedArgIntrin(
return true;
}
+static bool replaceWithConstant(MachineIRBuilder &B, MachineInstr &MI,
+ int64_t C) {
+ B.buildConstant(MI.getOperand(0).getReg(), C);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool AMDGPULegalizerInfo::legalizeWorkitemIDIntrinsic(
+ MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
+ unsigned Dim, AMDGPUFunctionArgInfo::PreloadedValue ArgType) const {
+ unsigned MaxID = ST.getMaxWorkitemID(B.getMF().getFunction(), Dim);
+ if (MaxID == 0)
+ return replaceWithConstant(B, MI, 0);
+
+ const SIMachineFunctionInfo *MFI = B.getMF().getInfo<SIMachineFunctionInfo>();
+ const ArgDescriptor *Arg;
+ const TargetRegisterClass *ArgRC;
+ LLT ArgTy;
+ std::tie(Arg, ArgRC, ArgTy) = MFI->getPreloadedValue(ArgType);
+
+ Register DstReg = MI.getOperand(0).getReg();
+ if (!Arg) {
+ // It's undefined behavior if a function marked with the amdgpu-no-*
+ // attributes uses the corresponding intrinsic.
+ B.buildUndef(DstReg);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ if (Arg->isMasked()) {
+ // Don't bother inserting AssertZext for packed IDs since we're emitting the
+ // masking operations anyway.
+ //
+ // TODO: We could assert the top bit is 0 for the source copy.
+ if (!loadInputValue(DstReg, B, ArgType))
+ return false;
+ } else {
+ Register TmpReg = MRI.createGenericVirtualRegister(LLT::scalar(32));
+ if (!loadInputValue(TmpReg, B, ArgType))
+ return false;
+ B.buildAssertZExt(DstReg, TmpReg, 32 - countLeadingZeros(MaxID));
+ }
+
+ MI.eraseFromParent();
+ return true;
+}
+
+Register AMDGPULegalizerInfo::getKernargParameterPtr(MachineIRBuilder &B,
+ int64_t Offset) const {
+ LLT PtrTy = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
+ Register KernArgReg = B.getMRI()->createGenericVirtualRegister(PtrTy);
+
+ // TODO: If we passed in the base kernel offset we could have a better
+ // alignment than 4, but we don't really need it.
+ if (!loadInputValue(KernArgReg, B,
+ AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR))
+ llvm_unreachable("failed to find kernarg segment ptr");
+
+ auto COffset = B.buildConstant(LLT::scalar(64), Offset);
+ // TODO: Should get nuw
+ return B.buildPtrAdd(PtrTy, KernArgReg, COffset).getReg(0);
+}
+
+/// Legalize a value that's loaded from kernel arguments. This is only used by
+/// legacy intrinsics.
+bool AMDGPULegalizerInfo::legalizeKernargMemParameter(MachineInstr &MI,
+ MachineIRBuilder &B,
+ uint64_t Offset,
+ Align Alignment) const {
+ Register DstReg = MI.getOperand(0).getReg();
+
+ assert(B.getMRI()->getType(DstReg) == LLT::scalar(32) &&
+ "unexpected kernarg parameter type");
+
+ Register Ptr = getKernargParameterPtr(B, Offset);
+ MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
+ B.buildLoad(DstReg, Ptr, PtrInfo, Align(4),
+ MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOInvariant);
+ MI.eraseFromParent();
+ return true;
+}
+
bool AMDGPULegalizerInfo::legalizeFDIV(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
@@ -3688,9 +4154,9 @@ bool AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper,
// The remaining operands were used to set fields in the MemOperand on
// construction.
for (int I = 6; I > 3; --I)
- MI.RemoveOperand(I);
+ MI.removeOperand(I);
- MI.RemoveOperand(1); // Remove the intrinsic ID.
+ MI.removeOperand(1); // Remove the intrinsic ID.
Observer.changedInstr(MI);
return true;
}
@@ -4359,7 +4825,7 @@ static void convertImageAddrToPacked(MachineIRBuilder &B, MachineInstr &MI,
///
/// We don't want to directly select image instructions just yet, but also want
/// to exposes all register repacking to the legalizer/combiners. We also don't
-/// want a selected instrution entering RegBankSelect. In order to avoid
+/// want a selected instruction entering RegBankSelect. In order to avoid
/// defining a multitude of intermediate image instructions, directly hack on
/// the intrinsic's arguments. In cases like a16 addresses, this requires
/// padding now unnecessary arguments with $noreg.
@@ -4508,6 +4974,10 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
//
// SIShrinkInstructions will convert NSA encodings to non-NSA after register
// allocation when possible.
+ //
+ // TODO: we can actually allow partial NSA where the final register is a
+ // contiguous set of the remaining addresses.
+ // This could help where there are more addresses than supported.
const bool UseNSA = ST.hasNSAEncoding() && CorrectedNumVAddrs >= 3 &&
CorrectedNumVAddrs <= ST.getNSAMaxSize();
@@ -4607,7 +5077,7 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
return false;
// TODO: Make sure the TFE operand bit is set.
- MI.RemoveOperand(1);
+ MI.removeOperand(1);
// Handle the easy case that requires no repack instructions.
if (Ty == S32) {
@@ -4737,7 +5207,7 @@ bool AMDGPULegalizerInfo::legalizeSBufferLoad(
// should be fixed to have a memory operand. Since it's readnone, we're not
// allowed to add one.
MI.setDesc(B.getTII().get(AMDGPU::G_AMDGPU_S_BUFFER_LOAD));
- MI.RemoveOperand(1); // Remove intrinsic ID
+ MI.removeOperand(1); // Remove intrinsic ID
// FIXME: When intrinsic definition is fixed, this should have an MMO already.
// TODO: Should this use datalayout alignment?
@@ -4797,6 +5267,47 @@ bool AMDGPULegalizerInfo::legalizeTrapEndpgm(
bool AMDGPULegalizerInfo::legalizeTrapHsaQueuePtr(
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const {
+ MachineFunction &MF = B.getMF();
+ const LLT S64 = LLT::scalar(64);
+
+ Register SGPR01(AMDGPU::SGPR0_SGPR1);
+ // For code object version 5, queue_ptr is passed through implicit kernarg.
+ if (AMDGPU::getAmdhsaCodeObjectVersion() == 5) {
+ AMDGPUTargetLowering::ImplicitParameter Param =
+ AMDGPUTargetLowering::QUEUE_PTR;
+ uint64_t Offset =
+ ST.getTargetLowering()->getImplicitParameterOffset(B.getMF(), Param);
+
+ Register KernargPtrReg = MRI.createGenericVirtualRegister(
+ LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
+
+ if (!loadInputValue(KernargPtrReg, B,
+ AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR))
+ return false;
+
+ // TODO: can we be smarter about machine pointer info?
+ MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ PtrInfo,
+ MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOInvariant,
+ LLT::scalar(64), commonAlignment(Align(64), Offset));
+
+ // Pointer address
+ Register LoadAddr = MRI.createGenericVirtualRegister(
+ LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
+ B.buildPtrAdd(LoadAddr, KernargPtrReg,
+ B.buildConstant(LLT::scalar(64), Offset).getReg(0));
+ // Load address
+ Register Temp = B.buildLoad(S64, LoadAddr, *MMO).getReg(0);
+ B.buildCopy(SGPR01, Temp);
+ B.buildInstr(AMDGPU::S_TRAP)
+ .addImm(static_cast<unsigned>(GCNSubtarget::TrapID::LLVMAMDHSATrap))
+ .addReg(SGPR01, RegState::Implicit);
+ MI.eraseFromParent();
+ return true;
+ }
+
// Pass queue pointer to trap handler as input, and insert trap instruction
// Reference: https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
Register LiveIn =
@@ -4804,7 +5315,6 @@ bool AMDGPULegalizerInfo::legalizeTrapHsaQueuePtr(
if (!loadInputValue(LiveIn, B, AMDGPUFunctionArgInfo::QUEUE_PTR))
return false;
- Register SGPR01(AMDGPU::SGPR0_SGPR1);
B.buildCopy(SGPR01, LiveIn);
B.buildInstr(AMDGPU::S_TRAP)
.addImm(static_cast<unsigned>(GCNSubtarget::TrapID::LLVMAMDHSATrap))
@@ -4848,6 +5358,8 @@ bool AMDGPULegalizerInfo::legalizeBVHIntrinsic(MachineInstr &MI,
MachineRegisterInfo &MRI = *B.getMRI();
const LLT S16 = LLT::scalar(16);
const LLT S32 = LLT::scalar(32);
+ const LLT V2S16 = LLT::fixed_vector(2, 16);
+ const LLT V3S32 = LLT::fixed_vector(3, 32);
Register DstReg = MI.getOperand(0).getReg();
Register NodePtr = MI.getOperand(2).getReg();
@@ -4865,61 +5377,98 @@ bool AMDGPULegalizerInfo::legalizeBVHIntrinsic(MachineInstr &MI,
return false;
}
+ const bool IsGFX11Plus = AMDGPU::isGFX11Plus(ST);
const bool IsA16 = MRI.getType(RayDir).getElementType().getSizeInBits() == 16;
const bool Is64 = MRI.getType(NodePtr).getSizeInBits() == 64;
const unsigned NumVDataDwords = 4;
const unsigned NumVAddrDwords = IsA16 ? (Is64 ? 9 : 8) : (Is64 ? 12 : 11);
- const bool UseNSA =
- ST.hasNSAEncoding() && NumVAddrDwords <= ST.getNSAMaxSize();
+ const unsigned NumVAddrs = IsGFX11Plus ? (IsA16 ? 4 : 5) : NumVAddrDwords;
+ const bool UseNSA = ST.hasNSAEncoding() && NumVAddrs <= ST.getNSAMaxSize();
const unsigned BaseOpcodes[2][2] = {
{AMDGPU::IMAGE_BVH_INTERSECT_RAY, AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16},
{AMDGPU::IMAGE_BVH64_INTERSECT_RAY,
AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16}};
int Opcode;
if (UseNSA) {
- Opcode =
- AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16], AMDGPU::MIMGEncGfx10NSA,
- NumVDataDwords, NumVAddrDwords);
- } else {
Opcode = AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16],
- AMDGPU::MIMGEncGfx10Default, NumVDataDwords,
- PowerOf2Ceil(NumVAddrDwords));
+ IsGFX11Plus ? AMDGPU::MIMGEncGfx11NSA
+ : AMDGPU::MIMGEncGfx10NSA,
+ NumVDataDwords, NumVAddrDwords);
+ } else {
+ Opcode = AMDGPU::getMIMGOpcode(
+ BaseOpcodes[Is64][IsA16],
+ IsGFX11Plus ? AMDGPU::MIMGEncGfx11Default : AMDGPU::MIMGEncGfx10Default,
+ NumVDataDwords, PowerOf2Ceil(NumVAddrDwords));
}
assert(Opcode != -1);
SmallVector<Register, 12> Ops;
- if (Is64) {
- auto Unmerge = B.buildUnmerge({S32, S32}, NodePtr);
- Ops.push_back(Unmerge.getReg(0));
- Ops.push_back(Unmerge.getReg(1));
- } else {
+ if (UseNSA && IsGFX11Plus) {
+ auto packLanes = [&Ops, &S32, &V3S32, &B](Register Src) {
+ auto Unmerge = B.buildUnmerge({S32, S32, S32}, Src);
+ auto Merged = B.buildMerge(
+ V3S32, {Unmerge.getReg(0), Unmerge.getReg(1), Unmerge.getReg(2)});
+ Ops.push_back(Merged.getReg(0));
+ };
+
Ops.push_back(NodePtr);
- }
- Ops.push_back(RayExtent);
+ Ops.push_back(RayExtent);
+ packLanes(RayOrigin);
+
+ if (IsA16) {
+ auto UnmergeRayDir = B.buildUnmerge({S16, S16, S16}, RayDir);
+ auto UnmergeRayInvDir = B.buildUnmerge({S16, S16, S16}, RayInvDir);
+ auto MergedDir = B.buildMerge(
+ V3S32,
+ {B.buildBitcast(S32, B.buildMerge(V2S16, {UnmergeRayInvDir.getReg(0),
+ UnmergeRayDir.getReg(0)}))
+ .getReg(0),
+ B.buildBitcast(S32, B.buildMerge(V2S16, {UnmergeRayInvDir.getReg(1),
+ UnmergeRayDir.getReg(1)}))
+ .getReg(0),
+ B.buildBitcast(S32, B.buildMerge(V2S16, {UnmergeRayInvDir.getReg(2),
+ UnmergeRayDir.getReg(2)}))
+ .getReg(0)});
+ Ops.push_back(MergedDir.getReg(0));
+ } else {
+ packLanes(RayDir);
+ packLanes(RayInvDir);
+ }
+ } else {
+ if (Is64) {
+ auto Unmerge = B.buildUnmerge({S32, S32}, NodePtr);
+ Ops.push_back(Unmerge.getReg(0));
+ Ops.push_back(Unmerge.getReg(1));
+ } else {
+ Ops.push_back(NodePtr);
+ }
+ Ops.push_back(RayExtent);
- auto packLanes = [&Ops, &S32, &B](Register Src) {
- auto Unmerge = B.buildUnmerge({S32, S32, S32}, Src);
- Ops.push_back(Unmerge.getReg(0));
- Ops.push_back(Unmerge.getReg(1));
- Ops.push_back(Unmerge.getReg(2));
- };
+ auto packLanes = [&Ops, &S32, &B](Register Src) {
+ auto Unmerge = B.buildUnmerge({S32, S32, S32}, Src);
+ Ops.push_back(Unmerge.getReg(0));
+ Ops.push_back(Unmerge.getReg(1));
+ Ops.push_back(Unmerge.getReg(2));
+ };
- packLanes(RayOrigin);
- if (IsA16) {
- auto UnmergeRayDir = B.buildUnmerge({S16, S16, S16}, RayDir);
- auto UnmergeRayInvDir = B.buildUnmerge({S16, S16, S16}, RayInvDir);
- Register R1 = MRI.createGenericVirtualRegister(S32);
- Register R2 = MRI.createGenericVirtualRegister(S32);
- Register R3 = MRI.createGenericVirtualRegister(S32);
- B.buildMerge(R1, {UnmergeRayDir.getReg(0), UnmergeRayDir.getReg(1)});
- B.buildMerge(R2, {UnmergeRayDir.getReg(2), UnmergeRayInvDir.getReg(0)});
- B.buildMerge(R3, {UnmergeRayInvDir.getReg(1), UnmergeRayInvDir.getReg(2)});
- Ops.push_back(R1);
- Ops.push_back(R2);
- Ops.push_back(R3);
- } else {
- packLanes(RayDir);
- packLanes(RayInvDir);
+ packLanes(RayOrigin);
+ if (IsA16) {
+ auto UnmergeRayDir = B.buildUnmerge({S16, S16, S16}, RayDir);
+ auto UnmergeRayInvDir = B.buildUnmerge({S16, S16, S16}, RayInvDir);
+ Register R1 = MRI.createGenericVirtualRegister(S32);
+ Register R2 = MRI.createGenericVirtualRegister(S32);
+ Register R3 = MRI.createGenericVirtualRegister(S32);
+ B.buildMerge(R1, {UnmergeRayDir.getReg(0), UnmergeRayDir.getReg(1)});
+ B.buildMerge(R2, {UnmergeRayDir.getReg(2), UnmergeRayInvDir.getReg(0)});
+ B.buildMerge(R3,
+ {UnmergeRayInvDir.getReg(1), UnmergeRayInvDir.getReg(2)});
+ Ops.push_back(R1);
+ Ops.push_back(R2);
+ Ops.push_back(R3);
+ } else {
+ packLanes(RayDir);
+ packLanes(RayInvDir);
+ }
}
if (!UseNSA) {
@@ -4946,9 +5495,24 @@ bool AMDGPULegalizerInfo::legalizeBVHIntrinsic(MachineInstr &MI,
return true;
}
-static bool replaceWithConstant(MachineIRBuilder &B, MachineInstr &MI, int64_t C) {
- B.buildConstant(MI.getOperand(0).getReg(), C);
+bool AMDGPULegalizerInfo::legalizeFPTruncRound(MachineInstr &MI,
+ MachineIRBuilder &B) const {
+ unsigned Opc;
+ int RoundMode = MI.getOperand(2).getImm();
+
+ if (RoundMode == (int)RoundingMode::TowardPositive)
+ Opc = AMDGPU::G_FPTRUNC_ROUND_UPWARD;
+ else if (RoundMode == (int)RoundingMode::TowardNegative)
+ Opc = AMDGPU::G_FPTRUNC_ROUND_DOWNWARD;
+ else
+ return false;
+
+ B.buildInstr(Opc)
+ .addDef(MI.getOperand(0).getReg())
+ .addUse(MI.getOperand(1).getReg());
+
MI.eraseFromParent();
+
return true;
}
@@ -5055,22 +5619,14 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
case Intrinsic::amdgcn_implicitarg_ptr:
return legalizeImplicitArgPtr(MI, MRI, B);
case Intrinsic::amdgcn_workitem_id_x:
- if (ST.getMaxWorkitemID(B.getMF().getFunction(), 0) == 0)
- return replaceWithConstant(B, MI, 0);
- return legalizePreloadedArgIntrin(MI, MRI, B,
- AMDGPUFunctionArgInfo::WORKITEM_ID_X);
+ return legalizeWorkitemIDIntrinsic(MI, MRI, B, 0,
+ AMDGPUFunctionArgInfo::WORKITEM_ID_X);
case Intrinsic::amdgcn_workitem_id_y:
- if (ST.getMaxWorkitemID(B.getMF().getFunction(), 1) == 0)
- return replaceWithConstant(B, MI, 0);
-
- return legalizePreloadedArgIntrin(MI, MRI, B,
- AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
+ return legalizeWorkitemIDIntrinsic(MI, MRI, B, 1,
+ AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
case Intrinsic::amdgcn_workitem_id_z:
- if (ST.getMaxWorkitemID(B.getMF().getFunction(), 2) == 0)
- return replaceWithConstant(B, MI, 0);
-
- return legalizePreloadedArgIntrin(MI, MRI, B,
- AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
+ return legalizeWorkitemIDIntrinsic(MI, MRI, B, 2,
+ AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
case Intrinsic::amdgcn_workgroup_id_x:
return legalizePreloadedArgIntrin(MI, MRI, B,
AMDGPUFunctionArgInfo::WORKGROUP_ID_X);
@@ -5092,6 +5648,31 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
case Intrinsic::amdgcn_dispatch_id:
return legalizePreloadedArgIntrin(MI, MRI, B,
AMDGPUFunctionArgInfo::DISPATCH_ID);
+ case Intrinsic::r600_read_ngroups_x:
+ // TODO: Emit error for hsa
+ return legalizeKernargMemParameter(MI, B,
+ SI::KernelInputOffsets::NGROUPS_X);
+ case Intrinsic::r600_read_ngroups_y:
+ return legalizeKernargMemParameter(MI, B,
+ SI::KernelInputOffsets::NGROUPS_Y);
+ case Intrinsic::r600_read_ngroups_z:
+ return legalizeKernargMemParameter(MI, B,
+ SI::KernelInputOffsets::NGROUPS_Z);
+ case Intrinsic::r600_read_local_size_x:
+ // TODO: Could insert G_ASSERT_ZEXT from s16
+ return legalizeKernargMemParameter(MI, B, SI::KernelInputOffsets::LOCAL_SIZE_X);
+ case Intrinsic::r600_read_local_size_y:
+ // TODO: Could insert G_ASSERT_ZEXT from s16
+ return legalizeKernargMemParameter(MI, B, SI::KernelInputOffsets::LOCAL_SIZE_Y);
+ // TODO: Could insert G_ASSERT_ZEXT from s16
+ case Intrinsic::r600_read_local_size_z:
+ return legalizeKernargMemParameter(MI, B, SI::KernelInputOffsets::LOCAL_SIZE_Z);
+ case Intrinsic::r600_read_global_size_x:
+ return legalizeKernargMemParameter(MI, B, SI::KernelInputOffsets::GLOBAL_SIZE_X);
+ case Intrinsic::r600_read_global_size_y:
+ return legalizeKernargMemParameter(MI, B, SI::KernelInputOffsets::GLOBAL_SIZE_Y);
+ case Intrinsic::r600_read_global_size_z:
+ return legalizeKernargMemParameter(MI, B, SI::KernelInputOffsets::GLOBAL_SIZE_Z);
case Intrinsic::amdgcn_fdiv_fast:
return legalizeFDIVFastIntrin(MI, MRI, B);
case Intrinsic::amdgcn_is_shared:
@@ -5157,7 +5738,8 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
case Intrinsic::amdgcn_raw_buffer_atomic_fadd:
case Intrinsic::amdgcn_struct_buffer_atomic_fadd: {
Register DstReg = MI.getOperand(0).getReg();
- if (!MRI.use_empty(DstReg) && !ST.hasGFX90AInsts()) {
+ if (!MRI.use_empty(DstReg) &&
+ !AMDGPU::hasAtomicFaddRtnForTy(ST, MRI.getType(DstReg))) {
Function &F = B.getMF().getFunction();
DiagnosticInfoUnsupported NoFpRet(
F, "return versions of fp atomics not supported", B.getDebugLoc(),
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index 964a41d3d740..cee533aa34ec 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -88,6 +88,12 @@ public:
bool legalizeBuildVector(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
+
+ void buildMultiply(LegalizerHelper &Helper, MutableArrayRef<Register> Accum,
+ ArrayRef<Register> Src0, ArrayRef<Register> Src1,
+ bool UsePartialMad64_32,
+ bool SeparateOddAlignedProducts) const;
+ bool legalizeMul(LegalizerHelper &Helper, MachineInstr &MI) const;
bool legalizeCTLZ_CTTZ(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
@@ -96,9 +102,18 @@ public:
const TargetRegisterClass *ArgRC, LLT ArgTy) const;
bool loadInputValue(Register DstReg, MachineIRBuilder &B,
AMDGPUFunctionArgInfo::PreloadedValue ArgType) const;
+
bool legalizePreloadedArgIntrin(
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
AMDGPUFunctionArgInfo::PreloadedValue ArgType) const;
+ bool legalizeWorkitemIDIntrinsic(
+ MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
+ unsigned Dim, AMDGPUFunctionArgInfo::PreloadedValue ArgType) const;
+
+ Register getKernargParameterPtr(MachineIRBuilder &B, int64_t Offset) const;
+ bool legalizeKernargMemParameter(MachineInstr &MI, MachineIRBuilder &B,
+ uint64_t Offset,
+ Align Alignment = Align(4)) const;
bool legalizeUnsignedDIV_REM(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
@@ -169,6 +184,8 @@ public:
bool legalizeBVHIntrinsic(MachineInstr &MI, MachineIRBuilder &B) const;
+ bool legalizeFPTruncRound(MachineInstr &MI, MachineIRBuilder &B) const;
+
bool legalizeImageIntrinsic(
MachineInstr &MI, MachineIRBuilder &B,
GISelChangeObserver &Observer,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index bbbadfdfd444..78e092b2e872 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -1593,8 +1593,9 @@ bool AMDGPULibCalls::evaluateCall(CallInst *aCI, const FuncInfo &FInfo) {
// max vector size is 16, and sincos will generate two results.
double DVal0[16], DVal1[16];
+ int FuncVecSize = getVecSize(FInfo);
bool hasTwoResults = (FInfo.getId() == AMDGPULibFunc::EI_SINCOS);
- if (getVecSize(FInfo) == 1) {
+ if (FuncVecSize == 1) {
if (!evaluateScalarMathFunc(FInfo, DVal0[0],
DVal1[0], copr0, copr1, copr2)) {
return false;
@@ -1603,7 +1604,7 @@ bool AMDGPULibCalls::evaluateCall(CallInst *aCI, const FuncInfo &FInfo) {
ConstantDataVector *CDV0 = dyn_cast_or_null<ConstantDataVector>(copr0);
ConstantDataVector *CDV1 = dyn_cast_or_null<ConstantDataVector>(copr1);
ConstantDataVector *CDV2 = dyn_cast_or_null<ConstantDataVector>(copr2);
- for (int i=0; i < getVecSize(FInfo); ++i) {
+ for (int i = 0; i < FuncVecSize; ++i) {
Constant *celt0 = CDV0 ? CDV0->getElementAsConstant(i) : nullptr;
Constant *celt1 = CDV1 ? CDV1->getElementAsConstant(i) : nullptr;
Constant *celt2 = CDV2 ? CDV2->getElementAsConstant(i) : nullptr;
@@ -1616,19 +1617,19 @@ bool AMDGPULibCalls::evaluateCall(CallInst *aCI, const FuncInfo &FInfo) {
LLVMContext &context = CI->getParent()->getParent()->getContext();
Constant *nval0, *nval1;
- if (getVecSize(FInfo) == 1) {
+ if (FuncVecSize == 1) {
nval0 = ConstantFP::get(CI->getType(), DVal0[0]);
if (hasTwoResults)
nval1 = ConstantFP::get(CI->getType(), DVal1[0]);
} else {
if (getArgType(FInfo) == AMDGPULibFunc::F32) {
SmallVector <float, 0> FVal0, FVal1;
- for (int i=0; i < getVecSize(FInfo); ++i)
+ for (int i = 0; i < FuncVecSize; ++i)
FVal0.push_back((float)DVal0[i]);
ArrayRef<float> tmp0(FVal0);
nval0 = ConstantDataVector::get(context, tmp0);
if (hasTwoResults) {
- for (int i=0; i < getVecSize(FInfo); ++i)
+ for (int i = 0; i < FuncVecSize; ++i)
FVal1.push_back((float)DVal1[i]);
ArrayRef<float> tmp1(FVal1);
nval1 = ConstantDataVector::get(context, tmp1);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibFunc.h b/llvm/lib/Target/AMDGPU/AMDGPULibFunc.h
index dc0ac72016f3..bf0fda25b2c0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULibFunc.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibFunc.h
@@ -324,8 +324,8 @@ public:
class AMDGPULibFuncImpl : public AMDGPULibFuncBase {
public:
- AMDGPULibFuncImpl() {}
- virtual ~AMDGPULibFuncImpl() {}
+ AMDGPULibFuncImpl() = default;
+ virtual ~AMDGPULibFuncImpl() = default;
/// Get unmangled name for mangled library function and name for unmangled
/// library function.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
index b700dd5aa301..93d1eed2cf63 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
@@ -13,7 +13,6 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
@@ -156,11 +155,8 @@ bool AMDGPULowerIntrinsics::runOnModule(Module &M) {
Changed = true;
break;
- case Intrinsic::amdgcn_workitem_id_x:
case Intrinsic::r600_read_tidig_x:
- case Intrinsic::amdgcn_workitem_id_y:
case Intrinsic::r600_read_tidig_y:
- case Intrinsic::amdgcn_workitem_id_z:
case Intrinsic::r600_read_tidig_z:
case Intrinsic::r600_read_local_size_x:
case Intrinsic::r600_read_local_size_y:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
index c34c12ab9fec..2e5c35f1f571 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
@@ -73,7 +73,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
const uint64_t BaseOffset = ST.getExplicitKernelArgOffset(F);
Align MaxAlign;
- // FIXME: Alignment is broken broken with explicit arg offset.;
+ // FIXME: Alignment is broken with explicit arg offset.;
const uint64_t TotalKernArgSize = ST.getKernArgSegmentSize(F, MaxAlign);
if (TotalKernArgSize == 0)
return false;
@@ -92,9 +92,8 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
for (Argument &Arg : F.args()) {
const bool IsByRef = Arg.hasByRefAttr();
Type *ArgTy = IsByRef ? Arg.getParamByRefType() : Arg.getType();
- MaybeAlign ABITypeAlign = IsByRef ? Arg.getParamAlign() : None;
- if (!ABITypeAlign)
- ABITypeAlign = DL.getABITypeAlign(ArgTy);
+ MaybeAlign ParamAlign = IsByRef ? Arg.getParamAlign() : None;
+ Align ABITypeAlign = DL.getValueOrABITypeAlignment(ParamAlign, ArgTy);
uint64_t Size = DL.getTypeSizeInBits(ArgTy);
uint64_t AllocSize = DL.getTypeAllocSize(ArgTy);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp
index 08a1b970648d..f5903b3afb81 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp
@@ -163,39 +163,29 @@ static bool processUse(CallInst *CI) {
if (!GroupSize || !GridSize)
continue;
+ using namespace llvm::PatternMatch;
+ auto GroupIDIntrin =
+ I == 0 ? m_Intrinsic<Intrinsic::amdgcn_workgroup_id_x>()
+ : (I == 1 ? m_Intrinsic<Intrinsic::amdgcn_workgroup_id_y>()
+ : m_Intrinsic<Intrinsic::amdgcn_workgroup_id_z>());
+
for (User *U : GroupSize->users()) {
auto *ZextGroupSize = dyn_cast<ZExtInst>(U);
if (!ZextGroupSize)
continue;
- for (User *ZextUser : ZextGroupSize->users()) {
- auto *SI = dyn_cast<SelectInst>(ZextUser);
- if (!SI)
- continue;
-
- using namespace llvm::PatternMatch;
- auto GroupIDIntrin = I == 0 ?
- m_Intrinsic<Intrinsic::amdgcn_workgroup_id_x>() :
- (I == 1 ? m_Intrinsic<Intrinsic::amdgcn_workgroup_id_y>() :
- m_Intrinsic<Intrinsic::amdgcn_workgroup_id_z>());
-
- auto SubExpr = m_Sub(m_Specific(GridSize),
- m_Mul(GroupIDIntrin, m_Specific(ZextGroupSize)));
-
- ICmpInst::Predicate Pred;
- if (match(SI,
- m_Select(m_ICmp(Pred, SubExpr, m_Specific(ZextGroupSize)),
- SubExpr,
- m_Specific(ZextGroupSize))) &&
- Pred == ICmpInst::ICMP_ULT) {
+ for (User *UMin : ZextGroupSize->users()) {
+ if (match(UMin,
+ m_UMin(m_Sub(m_Specific(GridSize),
+ m_Mul(GroupIDIntrin, m_Specific(ZextGroupSize))),
+ m_Specific(ZextGroupSize)))) {
if (HasReqdWorkGroupSize) {
ConstantInt *KnownSize
= mdconst::extract<ConstantInt>(MD->getOperand(I));
- SI->replaceAllUsesWith(ConstantExpr::getIntegerCast(KnownSize,
- SI->getType(),
- false));
+ UMin->replaceAllUsesWith(ConstantExpr::getIntegerCast(
+ KnownSize, UMin->getType(), false));
} else {
- SI->replaceAllUsesWith(ZextGroupSize);
+ UMin->replaceAllUsesWith(ZextGroupSize);
}
MadeChange = true;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
index 6e2b5dc471bc..35922341de26 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -14,7 +14,7 @@
// known address. AMDGPUMachineFunction allocates the LDS global.
//
// Local variables with constant annotation or non-undef initializer are passed
-// through unchanged for simplication or error diagnostics in later passes.
+// through unchanged for simplification or error diagnostics in later passes.
//
// To reduce the memory overhead variables that are only used by kernels are
// excluded from this transform. The analysis to determine whether a variable
@@ -28,8 +28,9 @@
#include "AMDGPU.h"
#include "Utils/AMDGPUBaseInfo.h"
-#include "Utils/AMDGPULDSUtils.h"
+#include "Utils/AMDGPUMemoryUtils.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/CallGraph.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/IRBuilder.h"
@@ -163,9 +164,10 @@ public:
}
bool runOnModule(Module &M) override {
+ CallGraph CG = CallGraph(M);
UsedList = getUsedList(M);
bool Changed = superAlignLDSGlobals(M);
- Changed |= processUsedLDS(M);
+ Changed |= processUsedLDS(CG, M);
for (Function &F : M.functions()) {
if (F.isDeclaration())
@@ -174,7 +176,7 @@ public:
// Only lower compute kernels' LDS.
if (!AMDGPU::isKernel(F.getCallingConv()))
continue;
- Changed |= processUsedLDS(M, &F);
+ Changed |= processUsedLDS(CG, M, &F);
}
UsedList.clear();
@@ -226,7 +228,7 @@ private:
return Changed;
}
- bool processUsedLDS(Module &M, Function *F = nullptr) {
+ bool processUsedLDS(CallGraph const &CG, Module &M, Function *F = nullptr) {
LLVMContext &Ctx = M.getContext();
const DataLayout &DL = M.getDataLayout();
@@ -374,7 +376,20 @@ private:
IRBuilder<> Builder(Ctx);
for (Function &Func : M.functions()) {
if (!Func.isDeclaration() && AMDGPU::isKernelCC(&Func)) {
- markUsedByKernel(Builder, &Func, SGV);
+ const CallGraphNode *N = CG[&Func];
+ const bool CalleesRequireModuleLDS = N->size() > 0;
+
+ if (CalleesRequireModuleLDS) {
+ // If a function this kernel might call requires module LDS,
+ // annotate the kernel to let later passes know it will allocate
+ // this structure, even if not apparent from the IR.
+ markUsedByKernel(Builder, &Func, SGV);
+ } else {
+ // However if we are certain this kernel cannot call a function that
+ // requires module LDS, annotate the kernel so the backend can elide
+ // the allocation without repeating callgraph walks.
+ Func.addFnAttr("amdgpu-elide-module-lds");
+ }
}
}
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index 3fad7e192195..ed6ddbf426fd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -120,8 +120,7 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
// FIXME: Should be able to handle this with emitPseudoExpansionLowering. We
// need to select it to the subtarget specific version, and there's no way to
// do that with a single pseudo source operation.
- if (Opcode == AMDGPU::S_SETPC_B64_return ||
- Opcode == AMDGPU::S_SETPC_B64_return_gfx)
+ if (Opcode == AMDGPU::S_SETPC_B64_return)
Opcode = AMDGPU::S_SETPC_B64;
else if (Opcode == AMDGPU::SI_CALL) {
// SI_CALL is just S_SWAPPC_B64 with an additional operand to track the
@@ -208,6 +207,16 @@ void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
return;
}
+ if (MI->getOpcode() == AMDGPU::SCHED_BARRIER) {
+ if (isVerbose()) {
+ std::string HexString;
+ raw_string_ostream HexStream(HexString);
+ HexStream << format_hex(MI->getOperand(0).getImm(), 10, true);
+ OutStreamer->emitRawComment(" sched_barrier mask(" + HexString + ")");
+ }
+ return;
+ }
+
if (MI->getOpcode() == AMDGPU::SI_MASKED_UNREACHABLE) {
if (isVerbose())
OutStreamer->emitRawComment(" divergent unreachable");
@@ -240,7 +249,7 @@ void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
raw_svector_ostream CodeStream(CodeBytes);
std::unique_ptr<MCCodeEmitter> InstEmitter(createSIMCCodeEmitter(
- *STI.getInstrInfo(), *OutContext.getRegisterInfo(), OutContext));
+ *STI.getInstrInfo(), OutContext));
InstEmitter->encodeInstruction(TmpInst, CodeStream, Fixups, STI);
assert(CodeBytes.size() == STI.getInstrInfo()->getInstSizeInBytes(*MI));
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h
index 0e43b4fe9461..5c656f158e71 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h
@@ -1,4 +1,4 @@
-//===- AMDGPUMCInstLower.h - Lower AMDGPU MachineInstr to an MCInst -------===//
+//===- AMDGPUMCInstLower.h - Lower MachineInstr to MCInst ------*- C++ -*--===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp
index c3441f81a78e..0712466a0e88 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp
@@ -21,17 +21,18 @@ bool AMDGPUMIRFormatter::parseCustomPseudoSourceValue(
StringRef Src, MachineFunction &MF, PerFunctionMIParsingState &PFS,
const PseudoSourceValue *&PSV, ErrorCallbackType ErrorCallback) const {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- const SIInstrInfo &TII = *MF.getSubtarget<GCNSubtarget>().getInstrInfo();
+ const AMDGPUTargetMachine &TM =
+ static_cast<const AMDGPUTargetMachine &>(MF.getTarget());
if (Src == "BufferResource") {
- PSV = MFI->getBufferPSV(TII);
+ PSV = MFI->getBufferPSV(TM);
return false;
}
if (Src == "ImageResource") {
- PSV = MFI->getImagePSV(TII);
+ PSV = MFI->getImagePSV(TM);
return false;
}
if (Src == "GWSResource") {
- PSV = MFI->getGWSPSV(TII);
+ PSV = MFI->getGWSPSV(TM);
return false;
}
llvm_unreachable("unknown MIR custom pseudo source value");
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h
index 47faa6c72481..753f7edc9385 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h
@@ -25,7 +25,7 @@ struct PerFunctionMIParsingState;
class AMDGPUMIRFormatter final : public MIRFormatter {
public:
- AMDGPUMIRFormatter() {}
+ AMDGPUMIRFormatter() = default;
virtual ~AMDGPUMIRFormatter() = default;
/// Implement target specific parsing of target custom pseudo source value.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
index 4e2f98d2a5db..d837f8cb2f60 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
@@ -1295,7 +1295,7 @@ static void fixRegionTerminator(RegionMRT *Region) {
}
}
-// If a region region is just a sequence of regions (and the exit
+// If a region is just a sequence of regions (and the exit
// block in the case of the top level region), we can simply skip
// linearizing it, because it is already linear
bool regionIsSequence(RegionMRT *Region) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
index 593388a4d819..b461c3c4bfdc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUMachineFunction.h"
+#include "AMDGPU.h"
#include "AMDGPUPerfHintAnalysis.h"
#include "AMDGPUSubtarget.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
@@ -32,6 +33,15 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF)
Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter");
WaveLimiter = WaveLimitAttr.getValueAsBool();
+ // FIXME: How is this attribute supposed to interact with statically known
+ // global sizes?
+ StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
+ if (!S.empty())
+ S.consumeInteger(0, GDSSize);
+
+ // Assume the attribute allocates before any known GDS globals.
+ StaticGDSSize = GDSSize;
+
CallingConv::ID CC = F.getCallingConv();
if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL)
ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign);
@@ -46,25 +56,43 @@ unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
Align Alignment =
DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
- /// TODO: We should sort these to minimize wasted space due to alignment
- /// padding. Currently the padding is decided by the first encountered use
- /// during lowering.
- unsigned Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment);
+ unsigned Offset;
+ if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
+ /// TODO: We should sort these to minimize wasted space due to alignment
+ /// padding. Currently the padding is decided by the first encountered use
+ /// during lowering.
+ Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment);
- Entry.first->second = Offset;
- StaticLDSSize += DL.getTypeAllocSize(GV.getValueType());
+ StaticLDSSize += DL.getTypeAllocSize(GV.getValueType());
- // Update the LDS size considering the padding to align the dynamic shared
- // memory.
- LDSSize = alignTo(StaticLDSSize, DynLDSAlign);
+ // Update the LDS size considering the padding to align the dynamic shared
+ // memory.
+ LDSSize = alignTo(StaticLDSSize, DynLDSAlign);
+ } else {
+ assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS &&
+ "expected region address space");
+ Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment);
+ StaticGDSSize += DL.getTypeAllocSize(GV.getValueType());
+
+ // FIXME: Apply alignment of dynamic GDS
+ GDSSize = StaticGDSSize;
+ }
+
+ Entry.first->second = Offset;
return Offset;
}
-void AMDGPUMachineFunction::allocateModuleLDSGlobal(const Module *M) {
+// This kernel calls no functions that require the module lds struct
+static bool canElideModuleLDS(const Function &F) {
+ return F.hasFnAttribute("amdgpu-elide-module-lds");
+}
+
+void AMDGPUMachineFunction::allocateModuleLDSGlobal(const Function &F) {
+ const Module *M = F.getParent();
if (isModuleEntryFunction()) {
const GlobalVariable *GV = M->getNamedGlobal("llvm.amdgcn.module.lds");
- if (GV) {
+ if (GV && !canElideModuleLDS(F)) {
unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV);
(void)Offset;
assert(Offset == 0 &&
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
index 48cf46b5f871..df62c2314617 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
@@ -12,6 +12,10 @@
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Function.h"
namespace llvm {
@@ -25,11 +29,13 @@ protected:
Align MaxKernArgAlign; // Cache for this.
/// Number of bytes in the LDS that are being used.
- unsigned LDSSize = 0;
+ uint32_t LDSSize = 0;
+ uint32_t GDSSize = 0;
/// Number of bytes in the LDS allocated statically. This field is only used
/// in the instruction selector and not part of the machine function info.
- unsigned StaticLDSSize = 0;
+ uint32_t StaticLDSSize = 0;
+ uint32_t StaticGDSSize = 0;
/// Align for dynamic shared memory if any. Dynamic shared memory is
/// allocated directly after the static one, i.e., LDSSize. Need to pad
@@ -63,12 +69,16 @@ public:
return ExplicitKernArgSize;
}
- unsigned getMaxKernArgAlign() const { return MaxKernArgAlign.value(); }
+ Align getMaxKernArgAlign() const { return MaxKernArgAlign; }
- unsigned getLDSSize() const {
+ uint32_t getLDSSize() const {
return LDSSize;
}
+ uint32_t getGDSSize() const {
+ return GDSSize;
+ }
+
AMDGPU::SIModeRegisterDefaults getMode() const {
return Mode;
}
@@ -92,7 +102,7 @@ public:
}
unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV);
- void allocateModuleLDSGlobal(const Module *M);
+ void allocateModuleLDSGlobal(const Function &F);
Align getDynLDSAlign() const { return DynLDSAlign; }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp
index 6646cce8186b..2d48be9ea542 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUMachineModuleInfo.h"
+#include "llvm/MC/MCSymbol.h"
namespace llvm {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
index 5a5a5d213a1a..fb7709d66c76 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
@@ -34,6 +34,7 @@
#include "AMDGPU.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Mangler.h"
#include "llvm/IR/Module.h"
@@ -71,7 +72,7 @@ ModulePass* llvm::createAMDGPUOpenCLEnqueuedBlockLoweringPass() {
return new AMDGPUOpenCLEnqueuedBlockLowering();
}
-/// Collect direct or indrect callers of \p F and save them
+/// Collect direct or indirect callers of \p F and save them
/// to \p Callers.
static void collectCallers(Function *F, DenseSet<Function *> &Callers) {
for (auto U : F->users()) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
index 8ad344816ad2..09dbd2150db6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
@@ -116,7 +116,6 @@ private:
bool isGlobalAddr(const Value *V) const;
bool isLocalAddr(const Value *V) const;
- bool isConstantAddr(const Value *V) const;
};
static std::pair<const Value *, const Type *> getMemoryInstrPtrAndType(
@@ -153,7 +152,7 @@ bool AMDGPUPerfHint::isIndirectAccess(const Instruction *Inst) const {
if (auto LD = dyn_cast<LoadInst>(V)) {
auto M = LD->getPointerOperand();
- if (isGlobalAddr(M) || isLocalAddr(M) || isConstantAddr(M)) {
+ if (isGlobalAddr(M)) {
LLVM_DEBUG(dbgs() << " is IA\n");
return true;
}
@@ -267,19 +266,23 @@ bool AMDGPUPerfHint::runOnFunction(Function &F) {
<< " LSMInst cost: " << Info->LSMInstCost << '\n'
<< " TotalInst cost: " << Info->InstCost << '\n');
+ bool Changed = false;
+
if (isMemBound(*Info)) {
LLVM_DEBUG(dbgs() << F.getName() << " is memory bound\n");
NumMemBound++;
F.addFnAttr("amdgpu-memory-bound", "true");
+ Changed = true;
}
if (AMDGPU::isEntryFunctionCC(F.getCallingConv()) && needLimitWave(*Info)) {
LLVM_DEBUG(dbgs() << F.getName() << " needs limit wave\n");
NumLimitWave++;
F.addFnAttr("amdgpu-wave-limiter", "true");
+ Changed = true;
}
- return true;
+ return Changed;
}
bool AMDGPUPerfHint::isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &FI) {
@@ -332,15 +335,6 @@ AMDGPUPerfHint::makeMemAccessInfo(Instruction *Inst) const {
return MAI;
}
-bool AMDGPUPerfHint::isConstantAddr(const Value *V) const {
- if (auto PT = dyn_cast<PointerType>(V->getType())) {
- unsigned As = PT->getAddressSpace();
- return As == AMDGPUAS::CONSTANT_ADDRESS ||
- As == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
- }
- return false;
-}
-
bool AMDGPUPerfHint::MemAccessInfo::isLargeStride(
MemAccessInfo &Reference) const {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
index c029046ab65f..bfe2e9b66ed4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
@@ -16,6 +16,7 @@
#include "AMDGPULegalizerInfo.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
@@ -125,7 +126,6 @@ void AMDGPUPreLegalizerCombinerHelper::applyClampI64ToI16(
LLT::scalar(64));
const LLT S32 = LLT::scalar(32);
- B.setMBB(*MI.getParent());
B.setInstrAndDebugLoc(MI);
auto Unmerge = B.buildUnmerge(S32, Src);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
index f91f31508ad2..1db7c18e4598 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
@@ -19,6 +19,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Dominators.h"
@@ -66,7 +67,7 @@ private:
Value *simplify(Instruction *I, const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
- return SimplifyInstruction(I, {*TD, TLI, DT});
+ return simplifyInstruction(I, {*TD, TLI, DT});
}
const DataLayout *TD;
@@ -562,15 +563,6 @@ bool AMDGPUPrintfRuntimeBindingImpl::run(Module &M) {
if (Printfs.empty())
return false;
- if (auto HostcallFunction = M.getFunction("__ockl_hostcall_internal")) {
- for (auto &U : HostcallFunction->uses()) {
- if (auto *CI = dyn_cast<CallInst>(U.getUser())) {
- M.getContext().emitError(
- CI, "Cannot use both printf and hostcall in the same module");
- }
- }
- }
-
TD = &M.getDataLayout();
return lowerPrintfForGpu(M);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 99b7ffb33884..5a4426ba8113 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -334,86 +334,49 @@ static FixedVectorType *arrayTypeToVecType(ArrayType *ArrayTy) {
ArrayTy->getNumElements());
}
-static Value *stripBitcasts(Value *V) {
- while (Instruction *I = dyn_cast<Instruction>(V)) {
- if (I->getOpcode() != Instruction::BitCast)
- break;
- V = I->getOperand(0);
- }
- return V;
-}
-
static Value *
calculateVectorIndex(Value *Ptr,
const std::map<GetElementPtrInst *, Value *> &GEPIdx) {
- GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(stripBitcasts(Ptr));
+ auto *GEP = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts());
if (!GEP)
- return nullptr;
+ return ConstantInt::getNullValue(Type::getInt32Ty(Ptr->getContext()));
auto I = GEPIdx.find(GEP);
- return I == GEPIdx.end() ? nullptr : I->second;
+ assert(I != GEPIdx.end() && "Must have entry for GEP!");
+ return I->second;
}
-static Value* GEPToVectorIndex(GetElementPtrInst *GEP) {
- // FIXME we only support simple cases
- if (GEP->getNumOperands() != 3)
+static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,
+ Type *VecElemTy, const DataLayout &DL) {
+ // TODO: Extracting a "multiple of X" from a GEP might be a useful generic
+ // helper.
+ unsigned BW = DL.getIndexTypeSizeInBits(GEP->getType());
+ MapVector<Value *, APInt> VarOffsets;
+ APInt ConstOffset(BW, 0);
+ if (GEP->getPointerOperand()->stripPointerCasts() != Alloca ||
+ !GEP->collectOffset(DL, BW, VarOffsets, ConstOffset))
return nullptr;
- ConstantInt *I0 = dyn_cast<ConstantInt>(GEP->getOperand(1));
- if (!I0 || !I0->isZero())
+ unsigned VecElemSize = DL.getTypeAllocSize(VecElemTy);
+ if (VarOffsets.size() > 1)
return nullptr;
- return GEP->getOperand(2);
-}
-
-// Not an instruction handled below to turn into a vector.
-//
-// TODO: Check isTriviallyVectorizable for calls and handle other
-// instructions.
-static bool canVectorizeInst(Instruction *Inst, User *User,
- const DataLayout &DL) {
- switch (Inst->getOpcode()) {
- case Instruction::Load: {
- // Currently only handle the case where the Pointer Operand is a GEP.
- // Also we could not vectorize volatile or atomic loads.
- LoadInst *LI = cast<LoadInst>(Inst);
- if (isa<AllocaInst>(User) &&
- LI->getPointerOperandType() == User->getType() &&
- isa<VectorType>(LI->getType()))
- return true;
-
- Instruction *PtrInst = dyn_cast<Instruction>(LI->getPointerOperand());
- if (!PtrInst)
- return false;
-
- return (PtrInst->getOpcode() == Instruction::GetElementPtr ||
- PtrInst->getOpcode() == Instruction::BitCast) &&
- LI->isSimple();
+ if (VarOffsets.size() == 1) {
+ // Only handle cases where we don't need to insert extra arithmetic
+ // instructions.
+ const auto &VarOffset = VarOffsets.front();
+ if (!ConstOffset.isZero() || VarOffset.second != VecElemSize)
+ return nullptr;
+ return VarOffset.first;
}
- case Instruction::BitCast:
- return true;
- case Instruction::Store: {
- // Must be the stored pointer operand, not a stored value, plus
- // since it should be canonical form, the User should be a GEP.
- // Also we could not vectorize volatile or atomic stores.
- StoreInst *SI = cast<StoreInst>(Inst);
- if (isa<AllocaInst>(User) &&
- SI->getPointerOperandType() == User->getType() &&
- isa<VectorType>(SI->getValueOperand()->getType()))
- return true;
-
- Instruction *UserInst = dyn_cast<Instruction>(User);
- if (!UserInst)
- return false;
- return (SI->getPointerOperand() == User) &&
- (UserInst->getOpcode() == Instruction::GetElementPtr ||
- UserInst->getOpcode() == Instruction::BitCast) &&
- SI->isSimple();
- }
- default:
- return false;
- }
+ APInt Quot;
+ uint64_t Rem;
+ APInt::udivrem(ConstOffset, VecElemSize, Quot, Rem);
+ if (Rem != 0)
+ return nullptr;
+
+ return ConstantInt::get(GEP->getContext(), Quot);
}
static bool tryPromoteAllocaToVector(AllocaInst *Alloca, const DataLayout &DL,
@@ -455,73 +418,87 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, const DataLayout &DL,
}
std::map<GetElementPtrInst*, Value*> GEPVectorIdx;
- std::vector<Value *> WorkList;
- SmallVector<User *, 8> Users(Alloca->users());
- SmallVector<User *, 8> UseUsers(Users.size(), Alloca);
+ SmallVector<Instruction *> WorkList;
+ SmallVector<Use *, 8> Uses;
+ for (Use &U : Alloca->uses())
+ Uses.push_back(&U);
+
Type *VecEltTy = VectorTy->getElementType();
- while (!Users.empty()) {
- User *AllocaUser = Users.pop_back_val();
- User *UseUser = UseUsers.pop_back_val();
- Instruction *Inst = dyn_cast<Instruction>(AllocaUser);
-
- GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(AllocaUser);
- if (!GEP) {
- if (!canVectorizeInst(Inst, UseUser, DL))
+ while (!Uses.empty()) {
+ Use *U = Uses.pop_back_val();
+ Instruction *Inst = dyn_cast<Instruction>(U->getUser());
+
+ if (Value *Ptr = getLoadStorePointerOperand(Inst)) {
+ // This is a store of the pointer, not to the pointer.
+ if (isa<StoreInst>(Inst) &&
+ U->getOperandNo() != StoreInst::getPointerOperandIndex())
return false;
- if (Inst->getOpcode() == Instruction::BitCast) {
- Type *FromTy = Inst->getOperand(0)->getType()->getPointerElementType();
- Type *ToTy = Inst->getType()->getPointerElementType();
- if (FromTy->isAggregateType() || ToTy->isAggregateType() ||
- DL.getTypeSizeInBits(FromTy) != DL.getTypeSizeInBits(ToTy))
- continue;
-
- for (User *CastUser : Inst->users()) {
- if (isAssumeLikeIntrinsic(cast<Instruction>(CastUser)))
- continue;
- Users.push_back(CastUser);
- UseUsers.push_back(Inst);
- }
+ Type *AccessTy = getLoadStoreType(Inst);
+ Ptr = Ptr->stripPointerCasts();
+ // Alloca already accessed as vector, leave alone.
+ if (Ptr == Alloca && DL.getTypeStoreSize(Alloca->getAllocatedType()) ==
+ DL.getTypeStoreSize(AccessTy))
continue;
- }
- WorkList.push_back(AllocaUser);
+ // Check that this is a simple access of a vector element.
+ bool IsSimple = isa<LoadInst>(Inst) ? cast<LoadInst>(Inst)->isSimple()
+ : cast<StoreInst>(Inst)->isSimple();
+ if (!IsSimple ||
+ !CastInst::isBitOrNoopPointerCastable(VecEltTy, AccessTy, DL))
+ return false;
+
+ WorkList.push_back(Inst);
continue;
}
- Value *Index = GEPToVectorIndex(GEP);
+ if (isa<BitCastInst>(Inst)) {
+ // Look through bitcasts.
+ for (Use &U : Inst->uses())
+ Uses.push_back(&U);
+ continue;
+ }
- // If we can't compute a vector index from this GEP, then we can't
- // promote this alloca to vector.
- if (!Index) {
- LLVM_DEBUG(dbgs() << " Cannot compute vector index for GEP " << *GEP
- << '\n');
- return false;
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
+ // If we can't compute a vector index from this GEP, then we can't
+ // promote this alloca to vector.
+ Value *Index = GEPToVectorIndex(GEP, Alloca, VecEltTy, DL);
+ if (!Index) {
+ LLVM_DEBUG(dbgs() << " Cannot compute vector index for GEP " << *GEP
+ << '\n');
+ return false;
+ }
+
+ GEPVectorIdx[GEP] = Index;
+ for (Use &U : Inst->uses())
+ Uses.push_back(&U);
+ continue;
}
- GEPVectorIdx[GEP] = Index;
- Users.append(GEP->user_begin(), GEP->user_end());
- UseUsers.append(GEP->getNumUses(), GEP);
+ // Ignore assume-like intrinsics and comparisons used in assumes.
+ if (isAssumeLikeIntrinsic(Inst))
+ continue;
+
+ if (isa<ICmpInst>(Inst) && all_of(Inst->users(), [](User *U) {
+ return isAssumeLikeIntrinsic(cast<Instruction>(U));
+ }))
+ continue;
+
+ // Unknown user.
+ return false;
}
LLVM_DEBUG(dbgs() << " Converting alloca to vector " << *AllocaTy << " -> "
<< *VectorTy << '\n');
- for (Value *V : WorkList) {
- Instruction *Inst = cast<Instruction>(V);
+ for (Instruction *Inst : WorkList) {
IRBuilder<> Builder(Inst);
switch (Inst->getOpcode()) {
case Instruction::Load: {
- if (Inst->getType() == AllocaTy || Inst->getType()->isVectorTy())
- break;
-
Value *Ptr = cast<LoadInst>(Inst)->getPointerOperand();
Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
- if (!Index)
- break;
-
- Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
+ Type *VecPtrTy = VectorTy->getPointerTo(Alloca->getAddressSpace());
Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
Value *VecValue = Builder.CreateLoad(VectorTy, BitCast);
Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index);
@@ -533,16 +510,9 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, const DataLayout &DL,
}
case Instruction::Store: {
StoreInst *SI = cast<StoreInst>(Inst);
- if (SI->getValueOperand()->getType() == AllocaTy ||
- SI->getValueOperand()->getType()->isVectorTy())
- break;
-
Value *Ptr = SI->getPointerOperand();
Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
- if (!Index)
- break;
-
- Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
+ Type *VecPtrTy = VectorTy->getPointerTo(Alloca->getAddressSpace());
Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
Value *VecValue = Builder.CreateLoad(VectorTy, BitCast);
Value *Elt = SI->getValueOperand();
@@ -808,10 +778,7 @@ bool AMDGPUPromoteAllocaImpl::hasSufficientLocalMem(const Function &F) {
//
// FIXME: We should really do something to fix the addresses to a more optimal
// value instead
- llvm::sort(AllocatedSizes, [](std::pair<uint64_t, Align> LHS,
- std::pair<uint64_t, Align> RHS) {
- return LHS.second < RHS.second;
- });
+ llvm::sort(AllocatedSizes, llvm::less_second());
// Check how much local memory is being used by global objects
CurrentLocalMemUsage = 0;
@@ -917,7 +884,7 @@ bool AMDGPUPromoteAllocaImpl::handleAlloca(AllocaInst &I, bool SufficientLDS) {
// usage order.
//
// FIXME: It is also possible that if we're allowed to use all of the memory
- // could could end up using more than the maximum due to alignment padding.
+ // could end up using more than the maximum due to alignment padding.
uint32_t NewSize = alignTo(CurrentLocalMemUsage, Alignment);
uint32_t AllocSize = WorkGroupSize * DL.getTypeAllocSize(AllocaTy);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp
index 01d03d17ec47..ed450f59e4b3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp
@@ -16,7 +16,9 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "Utils/AMDGPUMemoryUtils.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/InitializePasses.h"
@@ -30,6 +32,8 @@ namespace {
class AMDGPUPromoteKernelArguments : public FunctionPass {
MemorySSA *MSSA;
+ AliasAnalysis *AA;
+
Instruction *ArgCastInsertPt;
SmallVector<Value *> Ptrs;
@@ -38,16 +42,19 @@ class AMDGPUPromoteKernelArguments : public FunctionPass {
bool promotePointer(Value *Ptr);
+ bool promoteLoad(LoadInst *LI);
+
public:
static char ID;
AMDGPUPromoteKernelArguments() : FunctionPass(ID) {}
- bool run(Function &F, MemorySSA &MSSA);
+ bool run(Function &F, MemorySSA &MSSA, AliasAnalysis &AA);
bool runOnFunction(Function &F) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<MemorySSAWrapperPass>();
AU.setPreservesAll();
}
@@ -68,17 +75,10 @@ void AMDGPUPromoteKernelArguments::enqueueUsers(Value *Ptr) {
break;
case Instruction::Load: {
LoadInst *LD = cast<LoadInst>(U);
- PointerType *PT = dyn_cast<PointerType>(LD->getType());
- if (!PT ||
- (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS &&
- PT->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS &&
- PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) ||
- LD->getPointerOperand()->stripInBoundsOffsets() != Ptr)
- break;
- const MemoryAccess *MA = MSSA->getWalker()->getClobberingMemoryAccess(LD);
- // TODO: This load poprobably can be promoted to constant address space.
- if (MSSA->isLiveOnEntryDef(MA))
+ if (LD->getPointerOperand()->stripInBoundsOffsets() == Ptr &&
+ !AMDGPU::isClobberedInFunction(LD, MSSA, AA))
Ptrs.push_back(LD);
+
break;
}
case Instruction::GetElementPtr:
@@ -92,15 +92,26 @@ void AMDGPUPromoteKernelArguments::enqueueUsers(Value *Ptr) {
}
bool AMDGPUPromoteKernelArguments::promotePointer(Value *Ptr) {
- enqueueUsers(Ptr);
+ bool Changed = false;
+
+ LoadInst *LI = dyn_cast<LoadInst>(Ptr);
+ if (LI)
+ Changed |= promoteLoad(LI);
+
+ PointerType *PT = dyn_cast<PointerType>(Ptr->getType());
+ if (!PT)
+ return Changed;
+
+ if (PT->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS ||
+ PT->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
+ PT->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
+ enqueueUsers(Ptr);
- PointerType *PT = cast<PointerType>(Ptr->getType());
if (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS)
- return false;
+ return Changed;
- bool IsArg = isa<Argument>(Ptr);
- IRBuilder<> B(IsArg ? ArgCastInsertPt
- : &*std::next(cast<Instruction>(Ptr)->getIterator()));
+ IRBuilder<> B(LI ? &*std::next(cast<Instruction>(Ptr)->getIterator())
+ : ArgCastInsertPt);
// Cast pointer to global address space and back to flat and let
// Infer Address Spaces pass to do all necessary rewriting.
@@ -116,6 +127,14 @@ bool AMDGPUPromoteKernelArguments::promotePointer(Value *Ptr) {
return true;
}
+bool AMDGPUPromoteKernelArguments::promoteLoad(LoadInst *LI) {
+ if (!LI->isSimple())
+ return false;
+
+ LI->setMetadata("amdgpu.noclobber", MDNode::get(LI->getContext(), {}));
+ return true;
+}
+
// skip allocas
static BasicBlock::iterator getInsertPt(BasicBlock &BB) {
BasicBlock::iterator InsPt = BB.getFirstInsertionPt();
@@ -131,7 +150,8 @@ static BasicBlock::iterator getInsertPt(BasicBlock &BB) {
return InsPt;
}
-bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA) {
+bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA,
+ AliasAnalysis &AA) {
if (skipFunction(F))
return false;
@@ -141,6 +161,7 @@ bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA) {
ArgCastInsertPt = &*getInsertPt(*F.begin());
this->MSSA = &MSSA;
+ this->AA = &AA;
for (Argument &Arg : F.args()) {
if (Arg.use_empty())
@@ -166,11 +187,13 @@ bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA) {
bool AMDGPUPromoteKernelArguments::runOnFunction(Function &F) {
MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
- return run(F, MSSA);
+ AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+ return run(F, MSSA, AA);
}
INITIALIZE_PASS_BEGIN(AMDGPUPromoteKernelArguments, DEBUG_TYPE,
"AMDGPU Promote Kernel Arguments", false, false)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
INITIALIZE_PASS_END(AMDGPUPromoteKernelArguments, DEBUG_TYPE,
"AMDGPU Promote Kernel Arguments", false, false)
@@ -185,7 +208,8 @@ PreservedAnalyses
AMDGPUPromoteKernelArgumentsPass::run(Function &F,
FunctionAnalysisManager &AM) {
MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
- if (AMDGPUPromoteKernelArguments().run(F, MSSA)) {
+ AliasAnalysis &AA = AM.getResult<AAManager>(F);
+ if (AMDGPUPromoteKernelArguments().run(F, MSSA, AA)) {
PreservedAnalyses PA;
PA.preserveSet<CFGAnalyses>();
PA.preserve<MemorySSAAnalysis>();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index de2dccef804a..0830cbd919a0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -76,10 +76,11 @@
#include "GCNSubtarget.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/CodeGen/RegisterBank.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#define GET_TARGET_REGBANK_IMPL
@@ -193,9 +194,7 @@ public:
}
AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const GCNSubtarget &ST)
- : AMDGPUGenRegisterBankInfo(),
- Subtarget(ST),
- TRI(Subtarget.getRegisterInfo()),
+ : Subtarget(ST), TRI(Subtarget.getRegisterInfo()),
TII(Subtarget.getInstrInfo()) {
// HACK: Until this is fully tablegen'd.
@@ -428,11 +427,6 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsicWSideEffects(
}
}
-static bool memOpHasNoClobbered(const MachineMemOperand *MMO) {
- const Instruction *I = dyn_cast_or_null<Instruction>(MMO->getValue());
- return I && I->getMetadata("amdgpu.noclobber");
-}
-
// FIXME: Returns uniform if there's no source value information. This is
// probably wrong.
static bool isScalarLoadLegal(const MachineInstr &MI) {
@@ -451,7 +445,7 @@ static bool isScalarLoadLegal(const MachineInstr &MI) {
// spaces.
(IsConst || !MMO->isVolatile()) &&
// Memory must be known constant, or not written before this load.
- (IsConst || MMO->isInvariant() || memOpHasNoClobbered(MMO)) &&
+ (IsConst || MMO->isInvariant() || (MMO->getFlags() & MONoClobber)) &&
AMDGPUInstrInfo::isUniformMMO(MMO);
}
@@ -684,6 +678,62 @@ static LLT getHalfSizedType(LLT Ty) {
return LLT::scalar(Ty.getScalarSizeInBits() / 2);
}
+// Build one or more V_READFIRSTLANE_B32 instructions to move the given vector
+// source value into a scalar register.
+Register AMDGPURegisterBankInfo::buildReadFirstLane(MachineIRBuilder &B,
+ MachineRegisterInfo &MRI,
+ Register Src) const {
+ LLT Ty = MRI.getType(Src);
+ const RegisterBank *Bank = getRegBank(Src, MRI, *TRI);
+
+ if (Bank == &AMDGPU::SGPRRegBank)
+ return Src;
+
+ unsigned Bits = Ty.getSizeInBits();
+ assert(Bits % 32 == 0);
+
+ if (Bank != &AMDGPU::VGPRRegBank) {
+ // We need to copy from AGPR to VGPR
+ Src = B.buildCopy(Ty, Src).getReg(0);
+ MRI.setRegBank(Src, AMDGPU::VGPRRegBank);
+ }
+
+ LLT S32 = LLT::scalar(32);
+ unsigned NumParts = Bits / 32;
+ SmallVector<Register, 8> SrcParts;
+ SmallVector<Register, 8> DstParts;
+
+ if (Bits == 32) {
+ SrcParts.push_back(Src);
+ } else {
+ auto Unmerge = B.buildUnmerge(S32, Src);
+ for (unsigned i = 0; i < NumParts; ++i)
+ SrcParts.push_back(Unmerge.getReg(i));
+ }
+
+ for (unsigned i = 0; i < NumParts; ++i) {
+ Register SrcPart = SrcParts[i];
+ Register DstPart = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ MRI.setType(DstPart, NumParts == 1 ? Ty : S32);
+
+ const TargetRegisterClass *Constrained =
+ constrainGenericRegister(SrcPart, AMDGPU::VGPR_32RegClass, MRI);
+ (void)Constrained;
+ assert(Constrained && "Failed to constrain readfirstlane src reg");
+
+ B.buildInstr(AMDGPU::V_READFIRSTLANE_B32, {DstPart}, {SrcPart});
+
+ DstParts.push_back(DstPart);
+ }
+
+ if (Bits == 32)
+ return DstParts[0];
+
+ Register Dst = B.buildMerge(Ty, DstParts).getReg(0);
+ MRI.setRegBank(Dst, AMDGPU::SGPRRegBank);
+ return Dst;
+}
+
/// Legalize instruction \p MI where operands in \p OpIndices must be SGPRs. If
/// any of the required SGPR operands are VGPRs, perform a waterfall loop to
/// execute the instruction for each unique combination of values in all lanes
@@ -716,8 +766,6 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
MachineFunction *MF = &B.getMF();
const TargetRegisterClass *WaveRC = TRI->getWaveMaskRegClass();
- const unsigned WaveAndOpc = Subtarget.isWave32() ?
- AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
const unsigned MovExecOpc =
Subtarget.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
const unsigned MovExecTermOpc =
@@ -747,16 +795,19 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
// To insert the loop we need to split the block. Move everything before this
// point to a new block, and insert a new empty block before this instruction.
MachineBasicBlock *LoopBB = MF->CreateMachineBasicBlock();
+ MachineBasicBlock *BodyBB = MF->CreateMachineBasicBlock();
MachineBasicBlock *RemainderBB = MF->CreateMachineBasicBlock();
MachineBasicBlock *RestoreExecBB = MF->CreateMachineBasicBlock();
MachineFunction::iterator MBBI(MBB);
++MBBI;
MF->insert(MBBI, LoopBB);
+ MF->insert(MBBI, BodyBB);
MF->insert(MBBI, RestoreExecBB);
MF->insert(MBBI, RemainderBB);
- LoopBB->addSuccessor(RestoreExecBB);
- LoopBB->addSuccessor(LoopBB);
+ LoopBB->addSuccessor(BodyBB);
+ BodyBB->addSuccessor(RestoreExecBB);
+ BodyBB->addSuccessor(LoopBB);
// Move the rest of the block into a new block.
RemainderBB->transferSuccessorsAndUpdatePHIs(&MBB);
@@ -768,27 +819,27 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
B.setInsertPt(*LoopBB, LoopBB->end());
B.buildInstr(TargetOpcode::PHI)
- .addDef(PhiExec)
- .addReg(InitSaveExecReg)
- .addMBB(&MBB)
- .addReg(NewExec)
- .addMBB(LoopBB);
+ .addDef(PhiExec)
+ .addReg(InitSaveExecReg)
+ .addMBB(&MBB)
+ .addReg(NewExec)
+ .addMBB(BodyBB);
const DebugLoc &DL = B.getDL();
MachineInstr &FirstInst = *Range.begin();
- // Move the instruction into the loop. Note we moved everything after
+ // Move the instruction into the loop body. Note we moved everything after
// Range.end() already into a new block, so Range.end() is no longer valid.
- LoopBB->splice(LoopBB->end(), &MBB, Range.begin(), MBB.end());
+ BodyBB->splice(BodyBB->end(), &MBB, Range.begin(), MBB.end());
// Figure out the iterator range after splicing the instructions.
MachineBasicBlock::iterator NewBegin = FirstInst.getIterator();
- auto NewEnd = LoopBB->end();
+ auto NewEnd = BodyBB->end();
- MachineBasicBlock::iterator I = Range.begin();
- B.setInsertPt(*LoopBB, I);
+ B.setMBB(*LoopBB);
+ LLT S1 = LLT::scalar(1);
Register CondReg;
assert(std::distance(NewBegin, NewEnd) == OrigRangeSize);
@@ -819,164 +870,62 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
B.setMBB(MBB);
OpReg = B.buildCopy(OpTy, OpReg).getReg(0);
MRI.setRegBank(OpReg, AMDGPU::VGPRRegBank);
- B.setInstr(*I);
+ B.setMBB(*LoopBB);
}
- unsigned OpSize = OpTy.getSizeInBits();
+ Register CurrentLaneReg = buildReadFirstLane(B, MRI, OpReg);
- // Can only do a readlane of 32-bit pieces.
- if (OpSize == 32) {
- // Avoid extra copies in the simple case of one 32-bit register.
- Register CurrentLaneOpReg
- = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
- MRI.setType(CurrentLaneOpReg, OpTy);
-
- constrainGenericRegister(OpReg, AMDGPU::VGPR_32RegClass, MRI);
- // Read the next variant <- also loop target.
- BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
- CurrentLaneOpReg)
- .addReg(OpReg);
-
- Register NewCondReg = MRI.createVirtualRegister(WaveRC);
- bool First = CondReg == AMDGPU::NoRegister;
- if (First)
- CondReg = NewCondReg;
-
- // Compare the just read M0 value to all possible Idx values.
- B.buildInstr(AMDGPU::V_CMP_EQ_U32_e64)
- .addDef(NewCondReg)
- .addReg(CurrentLaneOpReg)
- .addReg(OpReg);
- Op.setReg(CurrentLaneOpReg);
-
- if (!First) {
- Register AndReg = MRI.createVirtualRegister(WaveRC);
-
- // If there are multiple operands to consider, and the conditions.
- B.buildInstr(WaveAndOpc)
- .addDef(AndReg)
- .addReg(NewCondReg)
- .addReg(CondReg);
- CondReg = AndReg;
- }
+ // Build the comparison(s).
+ unsigned OpSize = OpTy.getSizeInBits();
+ bool Is64 = OpSize % 64 == 0;
+ unsigned PartSize = Is64 ? 64 : 32;
+ LLT PartTy = LLT::scalar(PartSize);
+ unsigned NumParts = OpSize / PartSize;
+ SmallVector<Register, 8> OpParts;
+ SmallVector<Register, 8> CurrentLaneParts;
+
+ if (NumParts == 1) {
+ OpParts.push_back(OpReg);
+ CurrentLaneParts.push_back(CurrentLaneReg);
} else {
- LLT S32 = LLT::scalar(32);
- SmallVector<Register, 8> ReadlanePieces;
-
- // The compares can be done as 64-bit, but the extract needs to be done
- // in 32-bit pieces.
-
- bool Is64 = OpSize % 64 == 0;
-
- unsigned UnmergeTySize = Is64 ? 64 : 32;
- unsigned CmpOp =
- Is64 ? AMDGPU::V_CMP_EQ_U64_e64 : AMDGPU::V_CMP_EQ_U32_e64;
-
- // Insert the unmerge before the loop.
-
- B.setMBB(MBB);
- unsigned NumPieces = OpSize / UnmergeTySize;
- SmallVector<Register, 8> UnmergePieces;
- if (NumPieces == 1) {
- UnmergePieces.push_back(OpReg);
- } else {
- LLT UnmergeTy = LLT::scalar(UnmergeTySize);
- MachineInstrBuilder Unmerge = B.buildUnmerge(UnmergeTy, OpReg);
- for (unsigned PieceIdx = 0; PieceIdx != NumPieces; ++PieceIdx)
- UnmergePieces.push_back(Unmerge.getReg(PieceIdx));
+ auto UnmergeOp = B.buildUnmerge(PartTy, OpReg);
+ auto UnmergeCurrentLane = B.buildUnmerge(PartTy, CurrentLaneReg);
+ for (unsigned i = 0; i < NumParts; ++i) {
+ OpParts.push_back(UnmergeOp.getReg(i));
+ CurrentLaneParts.push_back(UnmergeCurrentLane.getReg(i));
+ MRI.setRegBank(OpParts[i], AMDGPU::VGPRRegBank);
+ MRI.setRegBank(CurrentLaneParts[i], AMDGPU::SGPRRegBank);
}
- B.setInstr(*I);
-
- for (Register UnmergePiece : UnmergePieces) {
- Register CurrentLaneOpReg;
- if (Is64) {
- Register CurrentLaneOpRegLo = MRI.createGenericVirtualRegister(S32);
- Register CurrentLaneOpRegHi = MRI.createGenericVirtualRegister(S32);
-
- MRI.setRegClass(UnmergePiece, &AMDGPU::VReg_64RegClass);
- MRI.setRegClass(CurrentLaneOpRegLo, &AMDGPU::SReg_32_XM0RegClass);
- MRI.setRegClass(CurrentLaneOpRegHi, &AMDGPU::SReg_32_XM0RegClass);
-
- // Read the next variant <- also loop target.
- BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
- CurrentLaneOpRegLo)
- .addReg(UnmergePiece, 0, AMDGPU::sub0);
-
- // Read the next variant <- also loop target.
- BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
- CurrentLaneOpRegHi)
- .addReg(UnmergePiece, 0, AMDGPU::sub1);
-
- CurrentLaneOpReg =
- B.buildMerge(LLT::scalar(64),
- {CurrentLaneOpRegLo, CurrentLaneOpRegHi})
- .getReg(0);
-
- MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_64_XEXECRegClass);
-
- if (OpTy.getScalarSizeInBits() == 64) {
- // If we need to produce a 64-bit element vector, so use the
- // merged pieces
- ReadlanePieces.push_back(CurrentLaneOpReg);
- } else {
- // 32-bit element type.
- ReadlanePieces.push_back(CurrentLaneOpRegLo);
- ReadlanePieces.push_back(CurrentLaneOpRegHi);
- }
- } else {
- CurrentLaneOpReg = MRI.createGenericVirtualRegister(S32);
- MRI.setRegClass(UnmergePiece, &AMDGPU::VGPR_32RegClass);
- MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_32_XM0RegClass);
-
- // Read the next variant <- also loop target.
- BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
- CurrentLaneOpReg)
- .addReg(UnmergePiece);
- ReadlanePieces.push_back(CurrentLaneOpReg);
- }
+ }
- Register NewCondReg = MRI.createVirtualRegister(WaveRC);
- bool First = CondReg == AMDGPU::NoRegister;
- if (First)
- CondReg = NewCondReg;
-
- B.buildInstr(CmpOp)
- .addDef(NewCondReg)
- .addReg(CurrentLaneOpReg)
- .addReg(UnmergePiece);
-
- if (!First) {
- Register AndReg = MRI.createVirtualRegister(WaveRC);
-
- // If there are multiple operands to consider, and the conditions.
- B.buildInstr(WaveAndOpc)
- .addDef(AndReg)
- .addReg(NewCondReg)
- .addReg(CondReg);
- CondReg = AndReg;
- }
- }
+ for (unsigned i = 0; i < NumParts; ++i) {
+ auto CmpReg = B.buildICmp(CmpInst::ICMP_EQ, S1, CurrentLaneParts[i],
+ OpParts[i]).getReg(0);
+ MRI.setRegBank(CmpReg, AMDGPU::VCCRegBank);
- // FIXME: Build merge seems to switch to CONCAT_VECTORS but not
- // BUILD_VECTOR
- if (OpTy.isVector()) {
- auto Merge = B.buildBuildVector(OpTy, ReadlanePieces);
- Op.setReg(Merge.getReg(0));
- MRI.setRegBank(Op.getReg(), AMDGPU::SGPRRegBank);
- } else if (ReadlanePieces.size() > 1) {
- auto Merge = B.buildMerge(OpTy, ReadlanePieces);
- Op.setReg(Merge.getReg(0));
- MRI.setRegBank(Op.getReg(), AMDGPU::SGPRRegBank);
+ if (!CondReg) {
+ CondReg = CmpReg;
} else {
- Op.setReg(ReadlanePieces[0]);
+ CondReg = B.buildAnd(S1, CondReg, CmpReg).getReg(0);
+ MRI.setRegBank(CondReg, AMDGPU::VCCRegBank);
}
}
+ Op.setReg(CurrentLaneReg);
+
// Make sure we don't re-process this register again.
WaterfalledRegMap.insert(std::make_pair(OldReg, Op.getReg()));
}
}
+ // The ballot becomes a no-op during instruction selection.
+ CondReg = B.buildIntrinsic(Intrinsic::amdgcn_ballot,
+ {LLT::scalar(Subtarget.isWave32() ? 32 : 64)},
+ false)
+ .addReg(CondReg)
+ .getReg(0);
+ MRI.setRegClass(CondReg, WaveRC);
+
// Update EXEC, save the original EXEC value to VCC.
B.buildInstr(AndSaveExecOpc)
.addDef(NewExec)
@@ -984,7 +933,7 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
MRI.setSimpleHint(NewExec, CondReg);
- B.setInsertPt(*LoopBB, LoopBB->end());
+ B.setInsertPt(*BodyBB, BodyBB->end());
// Update EXEC, switch all done bits to 0 and all todo bits to 1.
B.buildInstr(XorTermOpc)
@@ -1064,28 +1013,10 @@ void AMDGPURegisterBankInfo::constrainOpWithReadfirstlane(
if (Bank == &AMDGPU::SGPRRegBank)
return;
- LLT Ty = MRI.getType(Reg);
MachineIRBuilder B(MI);
- if (Bank != &AMDGPU::VGPRRegBank) {
- // We need to copy from AGPR to VGPR
- Reg = B.buildCopy(Ty, Reg).getReg(0);
- MRI.setRegBank(Reg, AMDGPU::VGPRRegBank);
- }
-
- Register SGPR = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
- B.buildInstr(AMDGPU::V_READFIRSTLANE_B32)
- .addDef(SGPR)
- .addReg(Reg);
-
- MRI.setType(SGPR, Ty);
-
- const TargetRegisterClass *Constrained =
- constrainGenericRegister(Reg, AMDGPU::VGPR_32RegClass, MRI);
- (void)Constrained;
- assert(Constrained && "Failed to constrain readfirstlane src reg");
-
- MI.getOperand(OpIdx).setReg(SGPR);
+ Reg = buildReadFirstLane(B, MRI, Reg);
+ MI.getOperand(OpIdx).setReg(Reg);
}
/// Split \p Ty into 2 pieces. The first will have \p FirstSize bits, and the
@@ -1624,6 +1555,157 @@ bool AMDGPURegisterBankInfo::applyMappingBFE(const OperandsMapper &OpdMapper,
return true;
}
+bool AMDGPURegisterBankInfo::applyMappingMAD_64_32(
+ const OperandsMapper &OpdMapper) const {
+ MachineInstr &MI = OpdMapper.getMI();
+ MachineRegisterInfo &MRI = OpdMapper.getMRI();
+
+ // Insert basic copies.
+ applyDefaultMapping(OpdMapper);
+
+ Register Dst0 = MI.getOperand(0).getReg();
+ Register Dst1 = MI.getOperand(1).getReg();
+ Register Src0 = MI.getOperand(2).getReg();
+ Register Src1 = MI.getOperand(3).getReg();
+ Register Src2 = MI.getOperand(4).getReg();
+
+ if (MRI.getRegBankOrNull(Src0) == &AMDGPU::VGPRRegBank)
+ return true;
+
+ bool IsUnsigned = MI.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
+ LLT S1 = LLT::scalar(1);
+ LLT S32 = LLT::scalar(32);
+
+ bool DstOnValu = MRI.getRegBankOrNull(Src2) == &AMDGPU::VGPRRegBank;
+ bool Accumulate = true;
+
+ if (!DstOnValu) {
+ if (mi_match(Src2, MRI, m_ZeroInt()))
+ Accumulate = false;
+ }
+
+ // Keep the multiplication on the SALU.
+ MachineIRBuilder B(MI);
+
+ Register DstHi;
+ Register DstLo = B.buildMul(S32, Src0, Src1).getReg(0);
+ bool MulHiInVgpr = false;
+
+ MRI.setRegBank(DstLo, AMDGPU::SGPRRegBank);
+
+ if (Subtarget.hasSMulHi()) {
+ DstHi = IsUnsigned ? B.buildUMulH(S32, Src0, Src1).getReg(0)
+ : B.buildSMulH(S32, Src0, Src1).getReg(0);
+ MRI.setRegBank(DstHi, AMDGPU::SGPRRegBank);
+ } else {
+ Register VSrc0 = B.buildCopy(S32, Src0).getReg(0);
+ Register VSrc1 = B.buildCopy(S32, Src1).getReg(0);
+
+ MRI.setRegBank(VSrc0, AMDGPU::VGPRRegBank);
+ MRI.setRegBank(VSrc1, AMDGPU::VGPRRegBank);
+
+ DstHi = IsUnsigned ? B.buildUMulH(S32, VSrc0, VSrc1).getReg(0)
+ : B.buildSMulH(S32, VSrc0, VSrc1).getReg(0);
+ MRI.setRegBank(DstHi, AMDGPU::VGPRRegBank);
+
+ if (!DstOnValu) {
+ DstHi = buildReadFirstLane(B, MRI, DstHi);
+ } else {
+ MulHiInVgpr = true;
+ }
+ }
+
+ // Accumulate and produce the "carry-out" bit.
+ //
+ // The "carry-out" is defined as bit 64 of the result when computed as a
+ // big integer. For unsigned multiply-add, this matches the usual definition
+ // of carry-out. For signed multiply-add, bit 64 is the sign bit of the
+ // result, which is determined as:
+ // sign(Src0 * Src1) + sign(Src2) + carry-out from unsigned 64-bit add
+ LLT CarryType = DstOnValu ? S1 : S32;
+ const RegisterBank &CarryBank =
+ DstOnValu ? AMDGPU::VCCRegBank : AMDGPU::SGPRRegBank;
+ const RegisterBank &DstBank =
+ DstOnValu ? AMDGPU::VGPRRegBank : AMDGPU::SGPRRegBank;
+ Register Carry;
+ Register Zero;
+
+ if (!IsUnsigned) {
+ Zero = B.buildConstant(S32, 0).getReg(0);
+ MRI.setRegBank(Zero,
+ MulHiInVgpr ? AMDGPU::VGPRRegBank : AMDGPU::SGPRRegBank);
+
+ Carry = B.buildICmp(CmpInst::ICMP_SLT, MulHiInVgpr ? S1 : S32, DstHi, Zero)
+ .getReg(0);
+ MRI.setRegBank(Carry, MulHiInVgpr ? AMDGPU::VCCRegBank
+ : AMDGPU::SGPRRegBank);
+
+ if (DstOnValu && !MulHiInVgpr) {
+ Carry = B.buildTrunc(S1, Carry).getReg(0);
+ MRI.setRegBank(Carry, AMDGPU::VCCRegBank);
+ }
+ }
+
+ if (Accumulate) {
+ if (DstOnValu) {
+ DstLo = B.buildCopy(S32, DstLo).getReg(0);
+ DstHi = B.buildCopy(S32, DstHi).getReg(0);
+ MRI.setRegBank(DstLo, AMDGPU::VGPRRegBank);
+ MRI.setRegBank(DstHi, AMDGPU::VGPRRegBank);
+ }
+
+ auto Unmerge = B.buildUnmerge(S32, Src2);
+ Register Src2Lo = Unmerge.getReg(0);
+ Register Src2Hi = Unmerge.getReg(1);
+ MRI.setRegBank(Src2Lo, DstBank);
+ MRI.setRegBank(Src2Hi, DstBank);
+
+ if (!IsUnsigned) {
+ auto Src2Sign = B.buildICmp(CmpInst::ICMP_SLT, CarryType, Src2Hi, Zero);
+ MRI.setRegBank(Src2Sign.getReg(0), CarryBank);
+
+ Carry = B.buildXor(CarryType, Carry, Src2Sign).getReg(0);
+ MRI.setRegBank(Carry, CarryBank);
+ }
+
+ auto AddLo = B.buildUAddo(S32, CarryType, DstLo, Src2Lo);
+ DstLo = AddLo.getReg(0);
+ Register CarryLo = AddLo.getReg(1);
+ MRI.setRegBank(DstLo, DstBank);
+ MRI.setRegBank(CarryLo, CarryBank);
+
+ auto AddHi = B.buildUAdde(S32, CarryType, DstHi, Src2Hi, CarryLo);
+ DstHi = AddHi.getReg(0);
+ MRI.setRegBank(DstHi, DstBank);
+
+ Register CarryHi = AddHi.getReg(1);
+ MRI.setRegBank(CarryHi, CarryBank);
+
+ if (IsUnsigned) {
+ Carry = CarryHi;
+ } else {
+ Carry = B.buildXor(CarryType, Carry, CarryHi).getReg(0);
+ MRI.setRegBank(Carry, CarryBank);
+ }
+ } else {
+ if (IsUnsigned) {
+ Carry = B.buildConstant(CarryType, 0).getReg(0);
+ MRI.setRegBank(Carry, CarryBank);
+ }
+ }
+
+ B.buildMerge(Dst0, {DstLo, DstHi});
+
+ if (DstOnValu) {
+ B.buildCopy(Dst1, Carry);
+ } else {
+ B.buildTrunc(Dst1, Carry);
+ }
+
+ MI.eraseFromParent();
+ return true;
+}
+
// Return a suitable opcode for extending the operands of Opc when widening.
static unsigned getExtendOp(unsigned Opc) {
switch (Opc) {
@@ -1794,7 +1876,7 @@ bool AMDGPURegisterBankInfo::buildVCopy(MachineIRBuilder &B, Register DstReg,
}
/// Utility function for pushing dynamic vector indexes with a constant offset
-/// into waterwall loops.
+/// into waterfall loops.
static void reinsertVectorIndexAdd(MachineIRBuilder &B,
MachineInstr &IdxUseInstr,
unsigned OpIdx,
@@ -1857,7 +1939,7 @@ bool AMDGPURegisterBankInfo::foldExtractEltToCmpSelect(
unsigned NumElem = VecTy.getNumElements();
if (!SITargetLowering::shouldExpandVectorDynExt(EltSize, NumElem,
- IsDivergentIdx))
+ IsDivergentIdx, &Subtarget))
return false;
MachineIRBuilder B(MI);
@@ -1955,7 +2037,7 @@ bool AMDGPURegisterBankInfo::foldInsertEltToCmpSelect(
unsigned NumElem = VecTy.getNumElements();
if (!SITargetLowering::shouldExpandVectorDynExt(EltSize, NumElem,
- IsDivergentIdx))
+ IsDivergentIdx, &Subtarget))
return false;
MachineIRBuilder B(MI);
@@ -2926,7 +3008,8 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
case Intrinsic::amdgcn_interp_p2:
case Intrinsic::amdgcn_interp_mov:
case Intrinsic::amdgcn_interp_p1_f16:
- case Intrinsic::amdgcn_interp_p2_f16: {
+ case Intrinsic::amdgcn_interp_p2_f16:
+ case Intrinsic::amdgcn_lds_param_load: {
applyDefaultMapping(OpdMapper);
// Readlane for m0 value, which is always the last operand.
@@ -2934,6 +3017,12 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
constrainOpWithReadfirstlane(MI, MRI, MI.getNumOperands() - 1); // Index
return;
}
+ case Intrinsic::amdgcn_interp_inreg_p10:
+ case Intrinsic::amdgcn_interp_inreg_p2:
+ case Intrinsic::amdgcn_interp_inreg_p10_f16:
+ case Intrinsic::amdgcn_interp_inreg_p2_f16:
+ applyDefaultMapping(OpdMapper);
+ return;
case Intrinsic::amdgcn_permlane16:
case Intrinsic::amdgcn_permlanex16: {
// Doing a waterfall loop over these wouldn't make any sense.
@@ -3015,6 +3104,35 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
constrainOpWithReadfirstlane(MI, MRI, 2);
return;
}
+ case Intrinsic::amdgcn_raw_buffer_load_lds: {
+ applyDefaultMapping(OpdMapper);
+ constrainOpWithReadfirstlane(MI, MRI, 1); // rsrc
+ constrainOpWithReadfirstlane(MI, MRI, 2); // M0
+ constrainOpWithReadfirstlane(MI, MRI, 5); // soffset
+ return;
+ }
+ case Intrinsic::amdgcn_struct_buffer_load_lds: {
+ applyDefaultMapping(OpdMapper);
+ constrainOpWithReadfirstlane(MI, MRI, 1); // rsrc
+ constrainOpWithReadfirstlane(MI, MRI, 2); // M0
+ constrainOpWithReadfirstlane(MI, MRI, 6); // soffset
+ return;
+ }
+ case Intrinsic::amdgcn_global_load_lds: {
+ applyDefaultMapping(OpdMapper);
+ constrainOpWithReadfirstlane(MI, MRI, 2);
+ return;
+ }
+ case Intrinsic::amdgcn_lds_direct_load: {
+ applyDefaultMapping(OpdMapper);
+ // Readlane for m0 value, which is always the last operand.
+ constrainOpWithReadfirstlane(MI, MRI, MI.getNumOperands() - 1); // Index
+ return;
+ }
+ case Intrinsic::amdgcn_exp_row:
+ applyDefaultMapping(OpdMapper);
+ constrainOpWithReadfirstlane(MI, MRI, 8); // M0
+ return;
default: {
if (const AMDGPU::RsrcIntrinsic *RSrcIntrin =
AMDGPU::lookupRsrcIntrinsic(IntrID)) {
@@ -3143,6 +3261,10 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
case AMDGPU::G_UBFX:
applyMappingBFE(OpdMapper, /*Signed*/ false);
return;
+ case AMDGPU::G_AMDGPU_MAD_U64_U32:
+ case AMDGPU::G_AMDGPU_MAD_I64_I32:
+ applyMappingMAD_64_32(OpdMapper);
+ return;
default:
break;
}
@@ -3668,6 +3790,48 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
return getDefaultMappingSOP(MI);
return getDefaultMappingVOP(MI);
}
+ case AMDGPU::G_AMDGPU_MAD_U64_U32:
+ case AMDGPU::G_AMDGPU_MAD_I64_I32: {
+ // Three possible mappings:
+ //
+ // - Default SOP
+ // - Default VOP
+ // - Scalar multiply: src0 and src1 are SGPRs, the rest is VOP.
+ //
+ // This allows instruction selection to keep the multiplication part of the
+ // instruction on the SALU.
+ bool AllSalu = true;
+ bool MulSalu = true;
+ for (unsigned i = 0; i < 5; ++i) {
+ Register Reg = MI.getOperand(i).getReg();
+ if (const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI)) {
+ if (Bank->getID() != AMDGPU::SGPRRegBankID) {
+ AllSalu = false;
+ if (i == 2 || i == 3) {
+ MulSalu = false;
+ break;
+ }
+ }
+ }
+ }
+
+ if (AllSalu)
+ return getDefaultMappingSOP(MI);
+
+ // If the multiply-add is full-rate in VALU, use that even if the
+ // multiplication part is scalar. Accumulating separately on the VALU would
+ // take two instructions.
+ if (!MulSalu || Subtarget.hasFullRate64Ops())
+ return getDefaultMappingVOP(MI);
+
+ // Keep the multiplication on the SALU, then accumulate on the VALU.
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
+ OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
+ OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
+ OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
+ OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
+ break;
+ }
case AMDGPU::G_IMPLICIT_DEF: {
unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
@@ -3828,10 +3992,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
}
case AMDGPU::G_FCMP: {
unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
- unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI);
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
OpdsMapping[1] = nullptr; // Predicate Operand.
- OpdsMapping[2] = AMDGPU::getValueMapping(Op2Bank, Size);
+ OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
break;
}
@@ -4102,6 +4265,17 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_udot4:
case Intrinsic::amdgcn_sdot8:
case Intrinsic::amdgcn_udot8:
+ case Intrinsic::amdgcn_fdot2_bf16_bf16:
+ case Intrinsic::amdgcn_fdot2_f16_f16:
+ case Intrinsic::amdgcn_fdot2_f32_bf16:
+ case Intrinsic::amdgcn_sudot4:
+ case Intrinsic::amdgcn_sudot8:
+ case Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16:
+ case Intrinsic::amdgcn_wmma_f16_16x16x16_f16:
+ case Intrinsic::amdgcn_wmma_f32_16x16x16_bf16:
+ case Intrinsic::amdgcn_wmma_f32_16x16x16_f16:
+ case Intrinsic::amdgcn_wmma_i32_16x16x16_iu4:
+ case Intrinsic::amdgcn_wmma_i32_16x16x16_iu8:
return getDefaultMappingVOP(MI);
case Intrinsic::amdgcn_sbfe:
case Intrinsic::amdgcn_ubfe:
@@ -4120,6 +4294,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_wqm:
case Intrinsic::amdgcn_softwqm:
case Intrinsic::amdgcn_set_inactive:
+ case Intrinsic::amdgcn_permlane64:
return getDefaultMappingAllVGPR(MI);
case Intrinsic::amdgcn_kernarg_segment_ptr:
case Intrinsic::amdgcn_s_getpc:
@@ -4247,24 +4422,50 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_mfma_f32_32x32x8bf16_1k:
case Intrinsic::amdgcn_mfma_f32_16x16x16bf16_1k:
case Intrinsic::amdgcn_mfma_f64_16x16x4f64:
- case Intrinsic::amdgcn_mfma_f64_4x4x4f64: {
+ case Intrinsic::amdgcn_mfma_f64_4x4x4f64:
+ case Intrinsic::amdgcn_mfma_i32_16x16x32_i8:
+ case Intrinsic::amdgcn_mfma_i32_32x32x16_i8:
+ case Intrinsic::amdgcn_mfma_f32_16x16x8_xf32:
+ case Intrinsic::amdgcn_mfma_f32_32x32x4_xf32: {
// Default for MAI intrinsics.
// srcC can also be an immediate which can be folded later.
// FIXME: Should we eventually add an alternative mapping with AGPR src
// for srcA/srcB?
//
// vdst, srcA, srcB, srcC
+ const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+ OpdsMapping[0] =
+ Info->mayNeedAGPRs()
+ ? getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI)
+ : getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
+ OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
+ OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
+ OpdsMapping[4] =
+ Info->mayNeedAGPRs()
+ ? getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI)
+ : getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
+ break;
+ }
+ case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
+ case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
+ case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
+ case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
+ case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
+ case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8: {
+ // vdst, srcA, srcB, srcC, idx
OpdsMapping[0] = getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
OpdsMapping[4] = getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
+ OpdsMapping[5] = getVGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI);
break;
}
case Intrinsic::amdgcn_interp_p1:
case Intrinsic::amdgcn_interp_p2:
case Intrinsic::amdgcn_interp_mov:
case Intrinsic::amdgcn_interp_p1_f16:
- case Intrinsic::amdgcn_interp_p2_f16: {
+ case Intrinsic::amdgcn_interp_p2_f16:
+ case Intrinsic::amdgcn_lds_param_load: {
const int M0Idx = MI.getNumOperands() - 1;
Register M0Reg = MI.getOperand(M0Idx).getReg();
unsigned M0Bank = getRegBankID(M0Reg, MRI, AMDGPU::SGPRRegBankID);
@@ -4279,6 +4480,17 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[M0Idx] = AMDGPU::getValueMapping(M0Bank, 32);
break;
}
+ case Intrinsic::amdgcn_interp_inreg_p10:
+ case Intrinsic::amdgcn_interp_inreg_p2:
+ case Intrinsic::amdgcn_interp_inreg_p10_f16:
+ case Intrinsic::amdgcn_interp_inreg_p2_f16: {
+ unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
+ OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
+ OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
+ OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
+ break;
+ }
case Intrinsic::amdgcn_ballot: {
unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
unsigned SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
@@ -4314,8 +4526,10 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
} else {
// NSA form
- for (unsigned I = 2; I < N; ++I)
- OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
+ for (unsigned I = 2; I < N; ++I) {
+ unsigned Size = MRI.getType(MI.getOperand(I).getReg()).getSizeInBits();
+ OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
+ }
}
break;
}
@@ -4325,7 +4539,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_s_getreg:
case Intrinsic::amdgcn_s_memtime:
case Intrinsic::amdgcn_s_memrealtime:
- case Intrinsic::amdgcn_s_get_waveid_in_workgroup: {
+ case Intrinsic::amdgcn_s_get_waveid_in_workgroup:
+ case Intrinsic::amdgcn_s_sendmsg_rtn: {
unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
break;
@@ -4337,6 +4552,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_flat_atomic_fadd:
case Intrinsic::amdgcn_flat_atomic_fmin:
case Intrinsic::amdgcn_flat_atomic_fmax:
+ case Intrinsic::amdgcn_global_atomic_fadd_v2bf16:
+ case Intrinsic::amdgcn_flat_atomic_fadd_v2bf16:
return getDefaultMappingAllVGPR(MI);
case Intrinsic::amdgcn_ds_ordered_add:
case Intrinsic::amdgcn_ds_ordered_swap: {
@@ -4366,6 +4583,13 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
break;
+ case Intrinsic::amdgcn_exp_row:
+ OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
+ OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
+ OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
+ OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
+ OpdsMapping[8] = getSGPROpMapping(MI.getOperand(8).getReg(), MRI, *TRI);
+ break;
case Intrinsic::amdgcn_s_sendmsg:
case Intrinsic::amdgcn_s_sendmsghalt: {
// This must be an SGPR, but accept a VGPR.
@@ -4412,6 +4636,13 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[4] = getSGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
break;
}
+ case Intrinsic::amdgcn_raw_buffer_load_lds: {
+ OpdsMapping[1] = getSGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
+ OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
+ OpdsMapping[4] = getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
+ OpdsMapping[5] = getSGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI);
+ break;
+ }
case Intrinsic::amdgcn_raw_buffer_store:
case Intrinsic::amdgcn_raw_buffer_store_format:
case Intrinsic::amdgcn_raw_tbuffer_store: {
@@ -4430,6 +4661,14 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[5] = getSGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI);
break;
}
+ case Intrinsic::amdgcn_struct_buffer_load_lds: {
+ OpdsMapping[1] = getSGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
+ OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
+ OpdsMapping[4] = getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
+ OpdsMapping[5] = getVGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI);
+ OpdsMapping[6] = getSGPROpMapping(MI.getOperand(6).getReg(), MRI, *TRI);
+ break;
+ }
case Intrinsic::amdgcn_struct_buffer_store:
case Intrinsic::amdgcn_struct_tbuffer_store: {
OpdsMapping[1] = getVGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
@@ -4464,6 +4703,31 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[1] = AMDGPU::getValueMapping(Bank, 32);
break;
}
+ case Intrinsic::amdgcn_global_load_lds: {
+ OpdsMapping[1] = getVGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
+ OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
+ break;
+ }
+ case Intrinsic::amdgcn_lds_direct_load: {
+ const int M0Idx = MI.getNumOperands() - 1;
+ Register M0Reg = MI.getOperand(M0Idx).getReg();
+ unsigned M0Bank = getRegBankID(M0Reg, MRI, AMDGPU::SGPRRegBankID);
+ unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
+ for (int I = 2; I != M0Idx && MI.getOperand(I).isReg(); ++I)
+ OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
+
+ // Must be SGPR, but we must take whatever the original bank is and fix it
+ // later.
+ OpdsMapping[M0Idx] = AMDGPU::getValueMapping(M0Bank, 32);
+ break;
+ }
+ case Intrinsic::amdgcn_ds_add_gs_reg_rtn:
+ case Intrinsic::amdgcn_ds_sub_gs_reg_rtn:
+ OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
+ OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
+ break;
default:
return getInvalidInstructionMapping();
}
@@ -4568,6 +4832,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[0] = AMDGPU::getValueMapping(Bank, 1);
break;
}
+ case AMDGPU::G_FPTRUNC_ROUND_UPWARD:
+ case AMDGPU::G_FPTRUNC_ROUND_DOWNWARD:
+ return getDefaultMappingVOP(MI);
}
return getInstructionMapping(/*ID*/1, /*Cost*/1,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
index 2b9d0923ab49..c9741c2202e6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
@@ -16,7 +16,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/Register.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
#define GET_REGBANK_DECLARATIONS
#include "AMDGPUGenRegisterBank.inc"
@@ -59,6 +59,9 @@ public:
SmallSet<Register, 4> &SGPROperandRegs,
MachineRegisterInfo &MRI) const;
+ Register buildReadFirstLane(MachineIRBuilder &B, MachineRegisterInfo &MRI,
+ Register Src) const;
+
bool executeInWaterfallLoop(MachineIRBuilder &B,
MachineInstr &MI,
MachineRegisterInfo &MRI,
@@ -83,6 +86,8 @@ public:
bool applyMappingBFE(const OperandsMapper &OpdMapper, bool Signed) const;
+ bool applyMappingMAD_64_32(const OperandsMapper &OpdMapper) const;
+
Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI,
Register Reg) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUReleaseVGPRs.cpp b/llvm/lib/Target/AMDGPU/AMDGPUReleaseVGPRs.cpp
new file mode 100644
index 000000000000..a86871a4a653
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUReleaseVGPRs.cpp
@@ -0,0 +1,140 @@
+//===- AMDGPUReleaseVGPRs.cpp - Automatically release vgprs on GFX11+ -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Insert S_SENDMSG instructions to release vgprs on GFX11+.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "GCNSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIDefines.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineOperand.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "release-vgprs"
+
+namespace {
+
+class AMDGPUReleaseVGPRs : public MachineFunctionPass {
+public:
+ static char ID;
+
+ const SIInstrInfo *SII;
+ const SIRegisterInfo *TRI;
+
+ AMDGPUReleaseVGPRs() : MachineFunctionPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ // Used to cache the result of isLastInstructionVMEMStore for each block
+ using BlockVMEMStoreType = DenseMap<MachineBasicBlock *, bool>;
+ BlockVMEMStoreType BlockVMEMStore;
+
+ // Return true if the last instruction referencing a vgpr in this MBB
+ // is a VMEM store, otherwise return false.
+ // Visit previous basic blocks to find this last instruction if needed.
+ // Because this pass is late in the pipeline, it is expected that the
+ // last vgpr use will likely be one of vmem store, ds, exp.
+ // Loads and others vgpr operations would have been
+ // deleted by this point, except for complex control flow involving loops.
+ // This is why we are just testing the type of instructions rather
+ // than the operands.
+ bool isLastVGPRUseVMEMStore(MachineBasicBlock &MBB) {
+ // Use the cache to break infinite loop and save some time. Initialize to
+ // false in case we have a cycle.
+ BlockVMEMStoreType::iterator It;
+ bool Inserted;
+ std::tie(It, Inserted) = BlockVMEMStore.insert({&MBB, false});
+ bool &CacheEntry = It->second;
+ if (!Inserted)
+ return CacheEntry;
+
+ for (auto &MI : reverse(MBB.instrs())) {
+ // If it's a VMEM store, a vgpr will be used, return true.
+ if ((SIInstrInfo::isVMEM(MI) || SIInstrInfo::isFLAT(MI)) && MI.mayStore())
+ return CacheEntry = true;
+
+ // If it's referencing a VGPR but is not a VMEM store, return false.
+ if (SIInstrInfo::isDS(MI) || SIInstrInfo::isEXP(MI) ||
+ SIInstrInfo::isVMEM(MI) || SIInstrInfo::isFLAT(MI) ||
+ SIInstrInfo::isVALU(MI))
+ return CacheEntry = false;
+ }
+
+ // Recursive call into parent blocks. Look into predecessors if there is no
+ // vgpr used in this block.
+ return CacheEntry = llvm::any_of(MBB.predecessors(),
+ [this](MachineBasicBlock *Parent) {
+ return isLastVGPRUseVMEMStore(*Parent);
+ });
+ }
+
+ bool runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+
+ bool Changed = false;
+
+ for (auto &MI : MBB.terminators()) {
+ // Look for S_ENDPGM instructions
+ if (MI.getOpcode() == AMDGPU::S_ENDPGM ||
+ MI.getOpcode() == AMDGPU::S_ENDPGM_SAVED) {
+ // If the last instruction using a VGPR in the block is a VMEM store,
+ // release VGPRs. The VGPRs release will be placed just before ending
+ // the program
+ if (isLastVGPRUseVMEMStore(MBB)) {
+ BuildMI(MBB, MI, DebugLoc(), SII->get(AMDGPU::S_SENDMSG))
+ .addImm(AMDGPU::SendMsg::ID_DEALLOC_VGPRS_GFX11Plus);
+ Changed = true;
+ }
+ }
+ }
+
+ return Changed;
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ Function &F = MF.getFunction();
+ if (skipFunction(F) || !AMDGPU::isEntryFunctionCC(F.getCallingConv()))
+ return false;
+
+ // This pass only runs on GFX11+
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ if (ST.getGeneration() < AMDGPUSubtarget::GFX11)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "AMDGPUReleaseVGPRs running on " << MF.getName()
+ << "\n");
+
+ SII = ST.getInstrInfo();
+ TRI = ST.getRegisterInfo();
+
+ bool Changed = false;
+ for (auto &MBB : MF) {
+ Changed |= runOnMachineBasicBlock(MBB);
+ }
+
+ BlockVMEMStore.clear();
+
+ return Changed;
+ }
+};
+
+} // namespace
+
+char AMDGPUReleaseVGPRs::ID = 0;
+
+char &llvm::AMDGPUReleaseVGPRsID = AMDGPUReleaseVGPRs::ID;
+
+INITIALIZE_PASS(AMDGPUReleaseVGPRs, DEBUG_TYPE, "Release VGPRs", false, false)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp
index 2475b44b42a3..4d7a3f4028e8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp
@@ -83,7 +83,7 @@
#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "Utils/AMDGPUBaseInfo.h"
-#include "Utils/AMDGPULDSUtils.h"
+#include "Utils/AMDGPUMemoryUtils.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetOperations.h"
@@ -442,7 +442,7 @@ class CollectReachableCallees {
continue;
for (const auto &GI : *CGN) {
- auto *RCB = cast<CallBase>(GI.first.getValue());
+ auto *RCB = cast<CallBase>(*GI.first);
auto *RCGN = GI.second;
if (auto *DCallee = RCGN->getFunction()) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
index cb511e5e3483..f7f93c75c870 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
@@ -27,7 +27,9 @@
#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "SIMachineFunctionInfo.h"
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/Analysis/CallGraph.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalValue.h"
@@ -87,9 +89,7 @@ int32_t AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumSGPRs(
int32_t AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumVGPRs(
const GCNSubtarget &ST, int32_t ArgNumAGPR, int32_t ArgNumVGPR) const {
- if (ST.hasGFX90AInsts() && ArgNumAGPR)
- return alignTo(ArgNumVGPR, 4) + ArgNumAGPR;
- return std::max(ArgNumVGPR, ArgNumAGPR);
+ return AMDGPU::getTotalNumVGPRs(ST.hasGFX90AInsts(), ArgNumAGPR, ArgNumVGPR);
}
int32_t AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumVGPRs(
@@ -97,28 +97,31 @@ int32_t AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumVGPRs(
return getTotalNumVGPRs(ST, NumAGPR, NumVGPR);
}
-bool AMDGPUResourceUsageAnalysis::runOnSCC(CallGraphSCC &SCC) {
+bool AMDGPUResourceUsageAnalysis::runOnModule(Module &M) {
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
if (!TPC)
return false;
+ MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
const TargetMachine &TM = TPC->getTM<TargetMachine>();
bool HasIndirectCall = false;
- for (CallGraphNode *I : SCC) {
- Function *F = I->getFunction();
+ CallGraph CG = CallGraph(M);
+ auto End = po_end(&CG);
+
+ for (auto IT = po_begin(&CG); IT != End; ++IT) {
+ Function *F = IT->getFunction();
if (!F || F->isDeclaration())
continue;
- MachineModuleInfo &MMI =
- getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
- MachineFunction &MF = MMI.getOrCreateMachineFunction(*F);
+ MachineFunction *MF = MMI.getMachineFunction(*F);
+ assert(MF && "function must have been generated already");
auto CI = CallGraphResourceInfo.insert(
- std::make_pair(&MF.getFunction(), SIFunctionResourceInfo()));
+ std::make_pair(F, SIFunctionResourceInfo()));
SIFunctionResourceInfo &Info = CI.first->second;
assert(CI.second && "should only be called once per function");
- Info = analyzeResourceUsage(MF, TM);
+ Info = analyzeResourceUsage(*MF, TM);
HasIndirectCall |= Info.HasIndirectCall;
}
@@ -246,6 +249,7 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
case AMDGPU::SRC_PRIVATE_BASE:
case AMDGPU::SRC_PRIVATE_LIMIT:
case AMDGPU::SGPR_NULL:
+ case AMDGPU::SGPR_NULL64:
case AMDGPU::MODE:
continue;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.h b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.h
index b0a2d3bffc62..df0789e471c1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.h
@@ -24,7 +24,7 @@ class GCNSubtarget;
class MachineFunction;
class TargetMachine;
-struct AMDGPUResourceUsageAnalysis : public CallGraphSCCPass {
+struct AMDGPUResourceUsageAnalysis : public ModulePass {
static char ID;
public:
@@ -50,15 +50,15 @@ public:
int32_t getTotalNumVGPRs(const GCNSubtarget &ST) const;
};
- AMDGPUResourceUsageAnalysis() : CallGraphSCCPass(ID) {}
+ AMDGPUResourceUsageAnalysis() : ModulePass(ID) {}
- bool runOnSCC(CallGraphSCC &SCC) override;
-
- bool doInitialization(CallGraph &CG) override {
+ bool doInitialization(Module &M) override {
CallGraphResourceInfo.clear();
- return CallGraphSCCPass::doInitialization(CG);
+ return ModulePass::doInitialization(M);
}
+ bool runOnModule(Module &M) override;
+
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineModuleInfoWrapperPass>();
AU.setPreservesAll();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
index 1c6c63dd5b25..4f8a61a77097 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
@@ -83,12 +83,8 @@ private:
const DataLayout *DL = nullptr;
MemoryDependenceResults *MDA = nullptr;
- bool checkArgumentUses(Value &Arg) const;
- bool isOutArgumentCandidate(Argument &Arg) const;
-
-#ifndef NDEBUG
- bool isVec3ToVec4Shuffle(Type *Ty0, Type* Ty1) const;
-#endif
+ Type *getStoredType(Value &Arg) const;
+ Type *getOutArgumentType(Argument &Arg) const;
public:
static char ID;
@@ -114,72 +110,61 @@ INITIALIZE_PASS_END(AMDGPURewriteOutArguments, DEBUG_TYPE,
char AMDGPURewriteOutArguments::ID = 0;
-bool AMDGPURewriteOutArguments::checkArgumentUses(Value &Arg) const {
+Type *AMDGPURewriteOutArguments::getStoredType(Value &Arg) const {
const int MaxUses = 10;
int UseCount = 0;
- for (Use &U : Arg.uses()) {
- StoreInst *SI = dyn_cast<StoreInst>(U.getUser());
- if (UseCount > MaxUses)
- return false;
+ SmallVector<Use *> Worklist;
+ for (Use &U : Arg.uses())
+ Worklist.push_back(&U);
- if (!SI) {
- auto *BCI = dyn_cast<BitCastInst>(U.getUser());
- if (!BCI || !BCI->hasOneUse())
- return false;
-
- // We don't handle multiple stores currently, so stores to aggregate
- // pointers aren't worth the trouble since they are canonically split up.
- Type *DestEltTy = BCI->getType()->getPointerElementType();
- if (DestEltTy->isAggregateType())
- return false;
-
- // We could handle these if we had a convenient way to bitcast between
- // them.
- Type *SrcEltTy = Arg.getType()->getPointerElementType();
- if (SrcEltTy->isArrayTy())
- return false;
-
- // Special case handle structs with single members. It is useful to handle
- // some casts between structs and non-structs, but we can't bitcast
- // directly between them. Blender uses some casts that look like
- // { <3 x float> }* to <4 x float>*
- if ((SrcEltTy->isStructTy() && (SrcEltTy->getStructNumElements() != 1)))
- return false;
-
- // Clang emits OpenCL 3-vector type accesses with a bitcast to the
- // equivalent 4-element vector and accesses that, and we're looking for
- // this pointer cast.
- if (DL->getTypeAllocSize(SrcEltTy) != DL->getTypeAllocSize(DestEltTy))
- return false;
-
- return checkArgumentUses(*BCI);
+ Type *StoredType = nullptr;
+ while (!Worklist.empty()) {
+ Use *U = Worklist.pop_back_val();
+
+ if (auto *BCI = dyn_cast<BitCastInst>(U->getUser())) {
+ for (Use &U : BCI->uses())
+ Worklist.push_back(&U);
+ continue;
}
- if (!SI->isSimple() ||
- U.getOperandNo() != StoreInst::getPointerOperandIndex())
- return false;
+ if (auto *SI = dyn_cast<StoreInst>(U->getUser())) {
+ if (UseCount++ > MaxUses)
+ return nullptr;
+
+ if (!SI->isSimple() ||
+ U->getOperandNo() != StoreInst::getPointerOperandIndex())
+ return nullptr;
- ++UseCount;
+ if (StoredType && StoredType != SI->getValueOperand()->getType())
+ return nullptr; // More than one type.
+ StoredType = SI->getValueOperand()->getType();
+ continue;
+ }
+
+ // Unsupported user.
+ return nullptr;
}
- // Skip unused arguments.
- return UseCount > 0;
+ return StoredType;
}
-bool AMDGPURewriteOutArguments::isOutArgumentCandidate(Argument &Arg) const {
+Type *AMDGPURewriteOutArguments::getOutArgumentType(Argument &Arg) const {
const unsigned MaxOutArgSizeBytes = 4 * MaxNumRetRegs;
PointerType *ArgTy = dyn_cast<PointerType>(Arg.getType());
// TODO: It might be useful for any out arguments, not just privates.
if (!ArgTy || (ArgTy->getAddressSpace() != DL->getAllocaAddrSpace() &&
!AnyAddressSpace) ||
- Arg.hasByValAttr() || Arg.hasStructRetAttr() ||
- DL->getTypeStoreSize(ArgTy->getPointerElementType()) > MaxOutArgSizeBytes) {
- return false;
+ Arg.hasByValAttr() || Arg.hasStructRetAttr()) {
+ return nullptr;
}
- return checkArgumentUses(Arg);
+ Type *StoredType = getStoredType(Arg);
+ if (!StoredType || DL->getTypeStoreSize(StoredType) > MaxOutArgSizeBytes)
+ return nullptr;
+
+ return StoredType;
}
bool AMDGPURewriteOutArguments::doInitialization(Module &M) {
@@ -187,22 +172,6 @@ bool AMDGPURewriteOutArguments::doInitialization(Module &M) {
return false;
}
-#ifndef NDEBUG
-bool AMDGPURewriteOutArguments::isVec3ToVec4Shuffle(Type *Ty0, Type* Ty1) const {
- auto *VT0 = dyn_cast<FixedVectorType>(Ty0);
- auto *VT1 = dyn_cast<FixedVectorType>(Ty1);
- if (!VT0 || !VT1)
- return false;
-
- if (VT0->getNumElements() != 3 ||
- VT1->getNumElements() != 4)
- return false;
-
- return DL->getTypeSizeInBits(VT0->getElementType()) ==
- DL->getTypeSizeInBits(VT1->getElementType());
-}
-#endif
-
bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
@@ -215,7 +184,7 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
MDA = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
unsigned ReturnNumRegs = 0;
- SmallSet<int, 4> OutArgIndexes;
+ SmallDenseMap<int, Type *, 4> OutArgIndexes;
SmallVector<Type *, 4> ReturnTypes;
Type *RetTy = F.getReturnType();
if (!RetTy->isVoidTy()) {
@@ -227,12 +196,12 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
ReturnTypes.push_back(RetTy);
}
- SmallVector<Argument *, 4> OutArgs;
+ SmallVector<std::pair<Argument *, Type *>, 4> OutArgs;
for (Argument &Arg : F.args()) {
- if (isOutArgumentCandidate(Arg)) {
+ if (Type *Ty = getOutArgumentType(Arg)) {
LLVM_DEBUG(dbgs() << "Found possible out argument " << Arg
<< " in function " << F.getName() << '\n');
- OutArgs.push_back(&Arg);
+ OutArgs.push_back({&Arg, Ty});
}
}
@@ -264,11 +233,12 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
// first. On the second iteration we've removed that out clobbering argument
// (by effectively moving it into another function) and will find the second
// argument is OK to move.
- for (Argument *OutArg : OutArgs) {
+ for (const auto &Pair : OutArgs) {
bool ThisReplaceable = true;
SmallVector<std::pair<ReturnInst *, StoreInst *>, 4> ReplaceableStores;
- Type *ArgTy = OutArg->getType()->getPointerElementType();
+ Argument *OutArg = Pair.first;
+ Type *ArgTy = Pair.second;
// Skip this argument if converting it will push us over the register
// count to return limit.
@@ -324,7 +294,7 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
if (ThisReplaceable) {
ReturnTypes.push_back(ArgTy);
- OutArgIndexes.insert(OutArg->getArgNo());
+ OutArgIndexes.insert({OutArg->getArgNo(), ArgTy});
++NumOutArgumentsReplaced;
Changing = true;
}
@@ -376,32 +346,8 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
if (RetVal)
NewRetVal = B.CreateInsertValue(NewRetVal, RetVal, RetIdx++);
- for (std::pair<Argument *, Value *> ReturnPoint : Replacement.second) {
- Argument *Arg = ReturnPoint.first;
- Value *Val = ReturnPoint.second;
- Type *EltTy = Arg->getType()->getPointerElementType();
- if (Val->getType() != EltTy) {
- Type *EffectiveEltTy = EltTy;
- if (StructType *CT = dyn_cast<StructType>(EltTy)) {
- assert(CT->getNumElements() == 1);
- EffectiveEltTy = CT->getElementType(0);
- }
-
- if (DL->getTypeSizeInBits(EffectiveEltTy) !=
- DL->getTypeSizeInBits(Val->getType())) {
- assert(isVec3ToVec4Shuffle(EffectiveEltTy, Val->getType()));
- Val = B.CreateShuffleVector(Val, ArrayRef<int>{0, 1, 2});
- }
-
- Val = B.CreateBitCast(Val, EffectiveEltTy);
-
- // Re-create single element composite.
- if (EltTy != EffectiveEltTy)
- Val = B.CreateInsertValue(UndefValue::get(EltTy), Val, 0);
- }
-
- NewRetVal = B.CreateInsertValue(NewRetVal, Val, RetIdx++);
- }
+ for (std::pair<Argument *, Value *> ReturnPoint : Replacement.second)
+ NewRetVal = B.CreateInsertValue(NewRetVal, ReturnPoint.second, RetIdx++);
if (RetVal)
RI->setOperand(0, NewRetVal);
@@ -433,7 +379,7 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
PointerType *ArgType = cast<PointerType>(Arg.getType());
- auto *EltTy = ArgType->getPointerElementType();
+ Type *EltTy = OutArgIndexes[Arg.getArgNo()];
const auto Align =
DL->getValueOrABITypeAlignment(Arg.getParamAlign(), EltTy);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
index afe016731395..8297635d7bb2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
@@ -39,7 +39,8 @@ class GcnBufferFormatBase<bits<8> f, bits<8> bpc, bits<8> numc, bits<8> nfmt, bi
}
class Gfx9BufferFormat<bits<8> f, bits<8> bpc, bits<8> numc, bits<8> nfmt, bits<8> dfmt> : GcnBufferFormatBase<f, bpc, numc, nfmt, dfmt>;
-class Gfx10PlusBufferFormat<bits<8> f, bits<8> bpc, bits<8> numc, bits<8> nfmt, bits<8> dfmt> : GcnBufferFormatBase<f, bpc, numc, nfmt, dfmt>;
+class Gfx10BufferFormat<bits<8> f, bits<8> bpc, bits<8> numc, bits<8> nfmt, bits<8> dfmt> : GcnBufferFormatBase<f, bpc, numc, nfmt, dfmt>;
+class Gfx11PlusBufferFormat<bits<8> f, bits<8> bpc, bits<8> numc, bits<8> nfmt, bits<8> dfmt> : GcnBufferFormatBase<f, bpc, numc, nfmt, dfmt>;
class GcnBufferFormatTable : GenericTable {
let CppTypeName = "GcnBufferFormatInfo";
@@ -51,17 +52,25 @@ def Gfx9BufferFormat : GcnBufferFormatTable {
let FilterClass = "Gfx9BufferFormat";
let PrimaryKeyName = "getGfx9BufferFormatInfo";
}
-def Gfx10PlusBufferFormat : GcnBufferFormatTable {
- let FilterClass = "Gfx10PlusBufferFormat";
- let PrimaryKeyName = "getGfx10PlusBufferFormatInfo";
+def Gfx10BufferFormat : GcnBufferFormatTable {
+ let FilterClass = "Gfx10BufferFormat";
+ let PrimaryKeyName = "getGfx10BufferFormatInfo";
+}
+def Gfx11PlusBufferFormat : GcnBufferFormatTable {
+ let FilterClass = "Gfx11PlusBufferFormat";
+ let PrimaryKeyName = "getGfx11PlusBufferFormatInfo";
}
def getGfx9BufferFormatInfo : SearchIndex {
let Table = Gfx9BufferFormat;
let Key = ["Format"];
}
-def getGfx10PlusBufferFormatInfo : SearchIndex {
- let Table = Gfx10PlusBufferFormat;
+def getGfx10BufferFormatInfo : SearchIndex {
+ let Table = Gfx10BufferFormat;
+ let Key = ["Format"];
+}
+def getGfx11PlusBufferFormatInfo : SearchIndex {
+ let Table = Gfx11PlusBufferFormat;
let Key = ["Format"];
}
@@ -119,57 +128,87 @@ def : Gfx9BufferFormat< /*FORMAT_32_32_32_32_SINT*/ 0x5E, 32, 4, /*NUM_FORMA
def : Gfx9BufferFormat< /*FORMAT_32_32_32_32_FLOAT*/ 0x7E, 32, 4, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32_32_32_32*/ 14>;
// Buffer formats with equal component sizes (GFX10 and later)
-def : Gfx10PlusBufferFormat< /*FORMAT_8_UNORM*/ 0x01, 8, 1, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_8*/ 1>;
-def : Gfx10PlusBufferFormat< /*FORMAT_8_SNORM*/ 0x02, 8, 1, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_8*/ 1>;
-def : Gfx10PlusBufferFormat< /*FORMAT_8_USCALED*/ 0x03, 8, 1, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_8*/ 1>;
-def : Gfx10PlusBufferFormat< /*FORMAT_8_SSCALED*/ 0x04, 8, 1, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_8*/ 1>;
-def : Gfx10PlusBufferFormat< /*FORMAT_8_UINT*/ 0x05, 8, 1, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_8*/ 1>;
-def : Gfx10PlusBufferFormat< /*FORMAT_8_SINT*/ 0x06, 8, 1, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_8*/ 1>;
-def : Gfx10PlusBufferFormat< /*FORMAT_16_UNORM*/ 0x07, 16, 1, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_16*/ 2>;
-def : Gfx10PlusBufferFormat< /*FORMAT_16_SNORM*/ 0x08, 16, 1, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_16*/ 2>;
-def : Gfx10PlusBufferFormat< /*FORMAT_16_USCALED*/ 0x09, 16, 1, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_16*/ 2>;
-def : Gfx10PlusBufferFormat< /*FORMAT_16_SSCALED*/ 0x0A, 16, 1, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_16*/ 2>;
-def : Gfx10PlusBufferFormat< /*FORMAT_16_UINT*/ 0x0B, 16, 1, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_16*/ 2>;
-def : Gfx10PlusBufferFormat< /*FORMAT_16_SINT*/ 0x0C, 16, 1, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_16*/ 2>;
-def : Gfx10PlusBufferFormat< /*FORMAT_16_FLOAT*/ 0x0D, 16, 1, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_16*/ 2>;
-def : Gfx10PlusBufferFormat< /*FORMAT_8_8_UNORM*/ 0x0E, 8, 2, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_8_8*/ 3>;
-def : Gfx10PlusBufferFormat< /*FORMAT_8_8_SNORM*/ 0x0F, 8, 2, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_8_8*/ 3>;
-def : Gfx10PlusBufferFormat< /*FORMAT_8_8_USCALED*/ 0x10, 8, 2, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_8_8*/ 3>;
-def : Gfx10PlusBufferFormat< /*FORMAT_8_8_SSCALED*/ 0x11, 8, 2, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_8_8*/ 3>;
-def : Gfx10PlusBufferFormat< /*FORMAT_8_8_UINT*/ 0x12, 8, 2, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_8_8*/ 3>;
-def : Gfx10PlusBufferFormat< /*FORMAT_8_8_SINT*/ 0x13, 8, 2, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_8_8*/ 3>;
-def : Gfx10PlusBufferFormat< /*FORMAT_32_UINT*/ 0x14, 32, 1, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32*/ 4>;
-def : Gfx10PlusBufferFormat< /*FORMAT_32_SINT*/ 0x15, 32, 1, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32*/ 4>;
-def : Gfx10PlusBufferFormat< /*FORMAT_32_FLOAT*/ 0x16, 32, 1, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32*/ 4>;
-def : Gfx10PlusBufferFormat< /*FORMAT_16_16_UNORM*/ 0x17, 16, 2, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_16_16*/ 5>;
-def : Gfx10PlusBufferFormat< /*FORMAT_16_16_SNORM*/ 0x18, 16, 2, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_16_16*/ 5>;
-def : Gfx10PlusBufferFormat< /*FORMAT_16_16_USCALED*/ 0x19, 16, 2, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_16_16*/ 5>;
-def : Gfx10PlusBufferFormat< /*FORMAT_16_16_SSCALED*/ 0x1A, 16, 2, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_16_16*/ 5>;
-def : Gfx10PlusBufferFormat< /*FORMAT_16_16_UINT*/ 0x1B, 16, 2, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_16_16*/ 5>;
-def : Gfx10PlusBufferFormat< /*FORMAT_16_16_SINT*/ 0x1C, 16, 2, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_16_16*/ 5>;
-def : Gfx10PlusBufferFormat< /*FORMAT_16_16_FLOAT*/ 0x1D, 16, 2, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_16_16*/ 5>;
-def : Gfx10PlusBufferFormat< /*FORMAT_8_8_8_8_UNORM*/ 0x38, 8, 4, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_8_8_8_8*/ 10>;
-def : Gfx10PlusBufferFormat< /*FORMAT_8_8_8_8_SNORM*/ 0x39, 8, 4, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_8_8_8_8*/ 10>;
-def : Gfx10PlusBufferFormat< /*FORMAT_8_8_8_8_USCALED*/ 0x3A, 8, 4, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_8_8_8_8*/ 10>;
-def : Gfx10PlusBufferFormat< /*FORMAT_8_8_8_8_SSCALED*/ 0x3B, 8, 4, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_8_8_8_8*/ 10>;
-def : Gfx10PlusBufferFormat< /*FORMAT_8_8_8_8_UINT*/ 0x3C, 8, 4, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_8_8_8_8*/ 10>;
-def : Gfx10PlusBufferFormat< /*FORMAT_8_8_8_8_SINT*/ 0x3D, 8, 4, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_8_8_8_8*/ 10>;
-def : Gfx10PlusBufferFormat< /*FORMAT_32_32_UINT*/ 0x3E, 32, 2, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32_32*/ 11>;
-def : Gfx10PlusBufferFormat< /*FORMAT_32_32_SINT*/ 0x3F, 32, 2, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32_32*/ 11>;
-def : Gfx10PlusBufferFormat< /*FORMAT_32_32_FLOAT*/ 0x40, 32, 2, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32_32*/ 11>;
-def : Gfx10PlusBufferFormat< /*FORMAT_16_16_16_16_UNORM*/ 0x41, 16, 4, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_16_16_16_16*/ 12>;
-def : Gfx10PlusBufferFormat< /*FORMAT_16_16_16_16_SNORM*/ 0x42, 16, 4, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_16_16_16_16*/ 12>;
-def : Gfx10PlusBufferFormat< /*FORMAT_16_16_16_16_USCALED*/ 0x43, 16, 4, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_16_16_16_16*/ 12>;
-def : Gfx10PlusBufferFormat< /*FORMAT_16_16_16_16_SSCALED*/ 0x44, 16, 4, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_16_16_16_16*/ 12>;
-def : Gfx10PlusBufferFormat< /*FORMAT_16_16_16_16_UINT*/ 0x45, 16, 4, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_16_16_16_16*/ 12>;
-def : Gfx10PlusBufferFormat< /*FORMAT_16_16_16_16_SINT*/ 0x46, 16, 4, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_16_16_16_16*/ 12>;
-def : Gfx10PlusBufferFormat< /*FORMAT_16_16_16_16_FLOAT*/ 0x47, 16, 4, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_16_16_16_16*/ 12>;
-def : Gfx10PlusBufferFormat< /*FORMAT_32_32_32_UINT*/ 0x48, 32, 3, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32_32_32*/ 13>;
-def : Gfx10PlusBufferFormat< /*FORMAT_32_32_32_SINT*/ 0x49, 32, 3, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32_32_32*/ 13>;
-def : Gfx10PlusBufferFormat< /*FORMAT_32_32_32_FLOAT*/ 0x4A, 32, 3, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32_32_32*/ 13>;
-def : Gfx10PlusBufferFormat< /*FORMAT_32_32_32_32_UINT*/ 0x4B, 32, 4, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32_32_32_32*/ 14>;
-def : Gfx10PlusBufferFormat< /*FORMAT_32_32_32_32_SINT*/ 0x4C, 32, 4, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32_32_32_32*/ 14>;
-def : Gfx10PlusBufferFormat< /*FORMAT_32_32_32_32_FLOAT*/ 0x4D, 32, 4, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32_32_32_32*/ 14>;
+multiclass Gfx10PlusBufferFormat<bits<8> f, bits<8> bpc, bits<8> numc, bits<8> nfmt, bits<8> dfmt> {
+ def : Gfx10BufferFormat<f, bpc, numc, nfmt, dfmt>;
+ def : Gfx11PlusBufferFormat<f, bpc, numc, nfmt, dfmt>;
+}
+defm : Gfx10PlusBufferFormat< /*FORMAT_8_UNORM*/ 0x01, 8, 1, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_8*/ 1>;
+defm : Gfx10PlusBufferFormat< /*FORMAT_8_SNORM*/ 0x02, 8, 1, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_8*/ 1>;
+defm : Gfx10PlusBufferFormat< /*FORMAT_8_USCALED*/ 0x03, 8, 1, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_8*/ 1>;
+defm : Gfx10PlusBufferFormat< /*FORMAT_8_SSCALED*/ 0x04, 8, 1, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_8*/ 1>;
+defm : Gfx10PlusBufferFormat< /*FORMAT_8_UINT*/ 0x05, 8, 1, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_8*/ 1>;
+defm : Gfx10PlusBufferFormat< /*FORMAT_8_SINT*/ 0x06, 8, 1, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_8*/ 1>;
+defm : Gfx10PlusBufferFormat< /*FORMAT_16_UNORM*/ 0x07, 16, 1, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_16*/ 2>;
+defm : Gfx10PlusBufferFormat< /*FORMAT_16_SNORM*/ 0x08, 16, 1, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_16*/ 2>;
+defm : Gfx10PlusBufferFormat< /*FORMAT_16_USCALED*/ 0x09, 16, 1, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_16*/ 2>;
+defm : Gfx10PlusBufferFormat< /*FORMAT_16_SSCALED*/ 0x0A, 16, 1, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_16*/ 2>;
+defm : Gfx10PlusBufferFormat< /*FORMAT_16_UINT*/ 0x0B, 16, 1, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_16*/ 2>;
+defm : Gfx10PlusBufferFormat< /*FORMAT_16_SINT*/ 0x0C, 16, 1, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_16*/ 2>;
+defm : Gfx10PlusBufferFormat< /*FORMAT_16_FLOAT*/ 0x0D, 16, 1, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_16*/ 2>;
+defm : Gfx10PlusBufferFormat< /*FORMAT_8_8_UNORM*/ 0x0E, 8, 2, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_8_8*/ 3>;
+defm : Gfx10PlusBufferFormat< /*FORMAT_8_8_SNORM*/ 0x0F, 8, 2, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_8_8*/ 3>;
+defm : Gfx10PlusBufferFormat< /*FORMAT_8_8_USCALED*/ 0x10, 8, 2, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_8_8*/ 3>;
+defm : Gfx10PlusBufferFormat< /*FORMAT_8_8_SSCALED*/ 0x11, 8, 2, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_8_8*/ 3>;
+defm : Gfx10PlusBufferFormat< /*FORMAT_8_8_UINT*/ 0x12, 8, 2, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_8_8*/ 3>;
+defm : Gfx10PlusBufferFormat< /*FORMAT_8_8_SINT*/ 0x13, 8, 2, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_8_8*/ 3>;
+defm : Gfx10PlusBufferFormat< /*FORMAT_32_UINT*/ 0x14, 32, 1, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32*/ 4>;
+defm : Gfx10PlusBufferFormat< /*FORMAT_32_SINT*/ 0x15, 32, 1, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32*/ 4>;
+defm : Gfx10PlusBufferFormat< /*FORMAT_32_FLOAT*/ 0x16, 32, 1, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32*/ 4>;
+defm : Gfx10PlusBufferFormat< /*FORMAT_16_16_UNORM*/ 0x17, 16, 2, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_16_16*/ 5>;
+defm : Gfx10PlusBufferFormat< /*FORMAT_16_16_SNORM*/ 0x18, 16, 2, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_16_16*/ 5>;
+defm : Gfx10PlusBufferFormat< /*FORMAT_16_16_USCALED*/ 0x19, 16, 2, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_16_16*/ 5>;
+defm : Gfx10PlusBufferFormat< /*FORMAT_16_16_SSCALED*/ 0x1A, 16, 2, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_16_16*/ 5>;
+defm : Gfx10PlusBufferFormat< /*FORMAT_16_16_UINT*/ 0x1B, 16, 2, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_16_16*/ 5>;
+defm : Gfx10PlusBufferFormat< /*FORMAT_16_16_SINT*/ 0x1C, 16, 2, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_16_16*/ 5>;
+defm : Gfx10PlusBufferFormat< /*FORMAT_16_16_FLOAT*/ 0x1D, 16, 2, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_16_16*/ 5>;
+
+// Buffer formats with equal component sizes (GFX10 only)
+def : Gfx10BufferFormat< /*FORMAT_8_8_8_8_UNORM*/ 0x38, 8, 4, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_8_8_8_8*/ 10>;
+def : Gfx10BufferFormat< /*FORMAT_8_8_8_8_SNORM*/ 0x39, 8, 4, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_8_8_8_8*/ 10>;
+def : Gfx10BufferFormat< /*FORMAT_8_8_8_8_USCALED*/ 0x3A, 8, 4, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_8_8_8_8*/ 10>;
+def : Gfx10BufferFormat< /*FORMAT_8_8_8_8_SSCALED*/ 0x3B, 8, 4, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_8_8_8_8*/ 10>;
+def : Gfx10BufferFormat< /*FORMAT_8_8_8_8_UINT*/ 0x3C, 8, 4, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_8_8_8_8*/ 10>;
+def : Gfx10BufferFormat< /*FORMAT_8_8_8_8_SINT*/ 0x3D, 8, 4, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_8_8_8_8*/ 10>;
+def : Gfx10BufferFormat< /*FORMAT_32_32_UINT*/ 0x3E, 32, 2, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32_32*/ 11>;
+def : Gfx10BufferFormat< /*FORMAT_32_32_SINT*/ 0x3F, 32, 2, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32_32*/ 11>;
+def : Gfx10BufferFormat< /*FORMAT_32_32_FLOAT*/ 0x40, 32, 2, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32_32*/ 11>;
+def : Gfx10BufferFormat< /*FORMAT_16_16_16_16_UNORM*/ 0x41, 16, 4, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_16_16_16_16*/ 12>;
+def : Gfx10BufferFormat< /*FORMAT_16_16_16_16_SNORM*/ 0x42, 16, 4, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_16_16_16_16*/ 12>;
+def : Gfx10BufferFormat< /*FORMAT_16_16_16_16_USCALED*/ 0x43, 16, 4, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_16_16_16_16*/ 12>;
+def : Gfx10BufferFormat< /*FORMAT_16_16_16_16_SSCALED*/ 0x44, 16, 4, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_16_16_16_16*/ 12>;
+def : Gfx10BufferFormat< /*FORMAT_16_16_16_16_UINT*/ 0x45, 16, 4, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_16_16_16_16*/ 12>;
+def : Gfx10BufferFormat< /*FORMAT_16_16_16_16_SINT*/ 0x46, 16, 4, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_16_16_16_16*/ 12>;
+def : Gfx10BufferFormat< /*FORMAT_16_16_16_16_FLOAT*/ 0x47, 16, 4, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_16_16_16_16*/ 12>;
+def : Gfx10BufferFormat< /*FORMAT_32_32_32_UINT*/ 0x48, 32, 3, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32_32_32*/ 13>;
+def : Gfx10BufferFormat< /*FORMAT_32_32_32_SINT*/ 0x49, 32, 3, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32_32_32*/ 13>;
+def : Gfx10BufferFormat< /*FORMAT_32_32_32_FLOAT*/ 0x4A, 32, 3, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32_32_32*/ 13>;
+def : Gfx10BufferFormat< /*FORMAT_32_32_32_32_UINT*/ 0x4B, 32, 4, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32_32_32_32*/ 14>;
+def : Gfx10BufferFormat< /*FORMAT_32_32_32_32_SINT*/ 0x4C, 32, 4, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32_32_32_32*/ 14>;
+def : Gfx10BufferFormat< /*FORMAT_32_32_32_32_FLOAT*/ 0x4D, 32, 4, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32_32_32_32*/ 14>;
+
+// Buffer formats with equal component sizes (GFX11 and later)
+def : Gfx11PlusBufferFormat< /*FORMAT_8_8_8_8_UNORM*/ 0x2A, 8, 4, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_8_8_8_8*/ 10>;
+def : Gfx11PlusBufferFormat< /*FORMAT_8_8_8_8_SNORM*/ 0x2B, 8, 4, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_8_8_8_8*/ 10>;
+def : Gfx11PlusBufferFormat< /*FORMAT_8_8_8_8_USCALED*/ 0x2C, 8, 4, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_8_8_8_8*/ 10>;
+def : Gfx11PlusBufferFormat< /*FORMAT_8_8_8_8_SSCALED*/ 0x2D, 8, 4, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_8_8_8_8*/ 10>;
+def : Gfx11PlusBufferFormat< /*FORMAT_8_8_8_8_UINT*/ 0x2E, 8, 4, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_8_8_8_8*/ 10>;
+def : Gfx11PlusBufferFormat< /*FORMAT_8_8_8_8_SINT*/ 0x2F, 8, 4, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_8_8_8_8*/ 10>;
+def : Gfx11PlusBufferFormat< /*FORMAT_32_32_UINT*/ 0x30, 32, 2, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32_32*/ 11>;
+def : Gfx11PlusBufferFormat< /*FORMAT_32_32_SINT*/ 0x31, 32, 2, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32_32*/ 11>;
+def : Gfx11PlusBufferFormat< /*FORMAT_32_32_FLOAT*/ 0x32, 32, 2, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32_32*/ 11>;
+def : Gfx11PlusBufferFormat< /*FORMAT_16_16_16_16_UNORM*/ 0x33, 16, 4, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_16_16_16_16*/ 12>;
+def : Gfx11PlusBufferFormat< /*FORMAT_16_16_16_16_SNORM*/ 0x34, 16, 4, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_16_16_16_16*/ 12>;
+def : Gfx11PlusBufferFormat< /*FORMAT_16_16_16_16_USCALED*/ 0x35, 16, 4, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_16_16_16_16*/ 12>;
+def : Gfx11PlusBufferFormat< /*FORMAT_16_16_16_16_SSCALED*/ 0x36, 16, 4, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_16_16_16_16*/ 12>;
+def : Gfx11PlusBufferFormat< /*FORMAT_16_16_16_16_UINT*/ 0x37, 16, 4, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_16_16_16_16*/ 12>;
+def : Gfx11PlusBufferFormat< /*FORMAT_16_16_16_16_SINT*/ 0x38, 16, 4, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_16_16_16_16*/ 12>;
+def : Gfx11PlusBufferFormat< /*FORMAT_16_16_16_16_FLOAT*/ 0x39, 16, 4, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_16_16_16_16*/ 12>;
+def : Gfx11PlusBufferFormat< /*FORMAT_32_32_32_UINT*/ 0x3A, 32, 3, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32_32_32*/ 13>;
+def : Gfx11PlusBufferFormat< /*FORMAT_32_32_32_SINT*/ 0x3B, 32, 3, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32_32_32*/ 13>;
+def : Gfx11PlusBufferFormat< /*FORMAT_32_32_32_FLOAT*/ 0x3C, 32, 3, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32_32_32*/ 13>;
+def : Gfx11PlusBufferFormat< /*FORMAT_32_32_32_32_UINT*/ 0x3D, 32, 4, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32_32_32_32*/ 14>;
+def : Gfx11PlusBufferFormat< /*FORMAT_32_32_32_32_SINT*/ 0x3E, 32, 4, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32_32_32_32*/ 14>;
+def : Gfx11PlusBufferFormat< /*FORMAT_32_32_32_32_FLOAT*/ 0x3F, 32, 4, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32_32_32_32*/ 14>;
class SourceOfDivergence<Intrinsic intr> {
Intrinsic Intr = intr;
@@ -191,6 +230,8 @@ def : SourceOfDivergence<int_amdgcn_interp_p1>;
def : SourceOfDivergence<int_amdgcn_interp_p2>;
def : SourceOfDivergence<int_amdgcn_interp_p1_f16>;
def : SourceOfDivergence<int_amdgcn_interp_p2_f16>;
+def : SourceOfDivergence<int_amdgcn_lds_direct_load>;
+def : SourceOfDivergence<int_amdgcn_lds_param_load>;
def : SourceOfDivergence<int_amdgcn_mbcnt_hi>;
def : SourceOfDivergence<int_amdgcn_mbcnt_lo>;
def : SourceOfDivergence<int_r600_read_tidig_x>;
@@ -205,9 +246,12 @@ def : SourceOfDivergence<int_amdgcn_global_atomic_fmax>;
def : SourceOfDivergence<int_amdgcn_flat_atomic_fadd>;
def : SourceOfDivergence<int_amdgcn_flat_atomic_fmin>;
def : SourceOfDivergence<int_amdgcn_flat_atomic_fmax>;
+def : SourceOfDivergence<int_amdgcn_global_atomic_fadd_v2bf16>;
+def : SourceOfDivergence<int_amdgcn_flat_atomic_fadd_v2bf16>;
def : SourceOfDivergence<int_amdgcn_ds_fadd>;
def : SourceOfDivergence<int_amdgcn_ds_fmin>;
def : SourceOfDivergence<int_amdgcn_ds_fmax>;
+def : SourceOfDivergence<int_amdgcn_ds_fadd_v2bf16>;
def : SourceOfDivergence<int_amdgcn_buffer_atomic_swap>;
def : SourceOfDivergence<int_amdgcn_buffer_atomic_add>;
def : SourceOfDivergence<int_amdgcn_buffer_atomic_sub>;
@@ -292,6 +336,16 @@ def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x8bf16_1k>;
def : SourceOfDivergence<int_amdgcn_mfma_f32_16x16x16bf16_1k>;
def : SourceOfDivergence<int_amdgcn_mfma_f64_16x16x4f64>;
def : SourceOfDivergence<int_amdgcn_mfma_f64_4x4x4f64>;
+def : SourceOfDivergence<int_amdgcn_mfma_i32_16x16x32_i8>;
+def : SourceOfDivergence<int_amdgcn_mfma_i32_32x32x16_i8>;
+def : SourceOfDivergence<int_amdgcn_mfma_f32_16x16x8_xf32>;
+def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x4_xf32>;
+def : SourceOfDivergence<int_amdgcn_smfmac_f32_16x16x32_f16>;
+def : SourceOfDivergence<int_amdgcn_smfmac_f32_32x32x16_f16>;
+def : SourceOfDivergence<int_amdgcn_smfmac_f32_16x16x32_bf16>;
+def : SourceOfDivergence<int_amdgcn_smfmac_f32_32x32x16_bf16>;
+def : SourceOfDivergence<int_amdgcn_smfmac_i32_16x16x64_i8>;
+def : SourceOfDivergence<int_amdgcn_smfmac_i32_32x32x32_i8>;
// The dummy boolean output is divergent from the IR's perspective,
// but the mask results are uniform. These produce a divergent and
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSetWavePriority.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSetWavePriority.cpp
new file mode 100644
index 000000000000..34702ee6623b
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSetWavePriority.cpp
@@ -0,0 +1,166 @@
+//===- AMDGPUSetWavePriority.cpp - Set wave priority ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Pass to temporarily raise the wave priority beginning the start of
+/// the shader function until its last VMEM instructions to allow younger
+/// waves to issue their VMEM instructions as well.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "GCNSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIInstrInfo.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/Allocator.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-set-wave-priority"
+
+namespace {
+
+struct MBBInfo {
+ MBBInfo() = default;
+ bool MayReachVMEMLoad = false;
+};
+
+using MBBInfoSet = DenseMap<const MachineBasicBlock *, MBBInfo>;
+
+class AMDGPUSetWavePriority : public MachineFunctionPass {
+public:
+ static char ID;
+
+ AMDGPUSetWavePriority() : MachineFunctionPass(ID) {}
+
+ StringRef getPassName() const override { return "Set wave priority"; }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+private:
+ MachineInstr *BuildSetprioMI(MachineFunction &MF, unsigned priority) const;
+
+ const SIInstrInfo *TII;
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS(AMDGPUSetWavePriority, DEBUG_TYPE, "Set wave priority", false,
+ false)
+
+char AMDGPUSetWavePriority::ID = 0;
+
+FunctionPass *llvm::createAMDGPUSetWavePriorityPass() {
+ return new AMDGPUSetWavePriority();
+}
+
+MachineInstr *AMDGPUSetWavePriority::BuildSetprioMI(MachineFunction &MF,
+ unsigned priority) const {
+ return BuildMI(MF, DebugLoc(), TII->get(AMDGPU::S_SETPRIO)).addImm(priority);
+}
+
+// Checks that for every predecessor Pred that can reach a VMEM load,
+// none of Pred's successors can reach a VMEM load.
+static bool CanLowerPriorityDirectlyInPredecessors(const MachineBasicBlock &MBB,
+ MBBInfoSet &MBBInfos) {
+ for (const MachineBasicBlock *Pred : MBB.predecessors()) {
+ if (!MBBInfos[Pred].MayReachVMEMLoad)
+ continue;
+ for (const MachineBasicBlock *Succ : Pred->successors()) {
+ if (MBBInfos[Succ].MayReachVMEMLoad)
+ return false;
+ }
+ }
+ return true;
+}
+
+static bool isVMEMLoad(const MachineInstr &MI) {
+ return SIInstrInfo::isVMEM(MI) && MI.mayLoad();
+}
+
+bool AMDGPUSetWavePriority::runOnMachineFunction(MachineFunction &MF) {
+ const unsigned HighPriority = 3;
+ const unsigned LowPriority = 0;
+
+ Function &F = MF.getFunction();
+ if (skipFunction(F) || !AMDGPU::isEntryFunctionCC(F.getCallingConv()))
+ return false;
+
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ TII = ST.getInstrInfo();
+
+ MBBInfoSet MBBInfos;
+ SmallVector<const MachineBasicBlock *, 16> Worklist;
+ for (MachineBasicBlock &MBB : MF) {
+ if (any_of(MBB, isVMEMLoad))
+ Worklist.push_back(&MBB);
+ }
+
+ // Mark blocks from which control may reach VMEM loads.
+ while (!Worklist.empty()) {
+ const MachineBasicBlock *MBB = Worklist.pop_back_val();
+ MBBInfo &Info = MBBInfos[MBB];
+ if (!Info.MayReachVMEMLoad) {
+ Info.MayReachVMEMLoad = true;
+ Worklist.append(MBB->pred_begin(), MBB->pred_end());
+ }
+ }
+
+ MachineBasicBlock &Entry = MF.front();
+ if (!MBBInfos[&Entry].MayReachVMEMLoad)
+ return false;
+
+ // Raise the priority at the beginning of the shader.
+ MachineBasicBlock::iterator I = Entry.begin(), E = Entry.end();
+ while (I != E && !SIInstrInfo::isVALU(*I) && !I->isTerminator())
+ ++I;
+ Entry.insert(I, BuildSetprioMI(MF, HighPriority));
+
+ // Lower the priority on edges where control leaves blocks from which
+ // VMEM loads are reachable.
+ SmallSet<MachineBasicBlock *, 16> PriorityLoweringBlocks;
+ for (MachineBasicBlock &MBB : MF) {
+ if (MBBInfos[&MBB].MayReachVMEMLoad) {
+ if (MBB.succ_empty())
+ PriorityLoweringBlocks.insert(&MBB);
+ continue;
+ }
+
+ if (CanLowerPriorityDirectlyInPredecessors(MBB, MBBInfos)) {
+ for (MachineBasicBlock *Pred : MBB.predecessors()) {
+ if (MBBInfos[Pred].MayReachVMEMLoad)
+ PriorityLoweringBlocks.insert(Pred);
+ }
+ continue;
+ }
+
+ // Where lowering the priority in predecessors is not possible, the
+ // block receiving control either was not part of a loop in the first
+ // place or the loop simplification/canonicalization pass should have
+ // already tried to split the edge and insert a preheader, and if for
+ // whatever reason it failed to do so, then this leaves us with the
+ // only option of lowering the priority within the loop.
+ PriorityLoweringBlocks.insert(&MBB);
+ }
+
+ for (MachineBasicBlock *MBB : PriorityLoweringBlocks) {
+ MachineBasicBlock::iterator I = MBB->end(), B = MBB->begin();
+ while (I != B) {
+ if (isVMEMLoad(*--I)) {
+ ++I;
+ break;
+ }
+ }
+ MBB->insert(I, BuildSetprioMI(MF, LowPriority));
+ }
+
+ return true;
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index e82f9232b114..77816a783630 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -50,11 +50,6 @@ static cl::opt<bool> EnableVGPRIndexMode(
cl::desc("Use GPR indexing mode instead of movrel for vector indexing"),
cl::init(false));
-static cl::opt<bool> EnableFlatScratch(
- "amdgpu-enable-flat-scratch",
- cl::desc("Use flat scratch instructions"),
- cl::init(false));
-
static cl::opt<bool> UseAA("amdgpu-use-aa-in-codegen",
cl::desc("Enable the use of AA during codegen."),
cl::init(true));
@@ -159,26 +154,7 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
return *this;
}
-AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT) :
- TargetTriple(TT),
- GCN3Encoding(false),
- Has16BitInsts(false),
- HasMadMixInsts(false),
- HasMadMacF32Insts(false),
- HasDsSrc2Insts(false),
- HasSDWA(false),
- HasVOP3PInsts(false),
- HasMulI24(true),
- HasMulU24(true),
- HasSMulHi(false),
- HasInv2PiInlineImm(false),
- HasFminFmaxLegacy(true),
- EnablePromoteAlloca(false),
- HasTrigReducedRange(false),
- MaxWavesPerEU(10),
- LocalMemorySize(0),
- WavefrontSizeLog2(0)
- { }
+AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT) : TargetTriple(TT) {}
GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
const GCNTargetMachine &TM)
@@ -187,120 +163,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
AMDGPUSubtarget(TT),
TargetTriple(TT),
TargetID(*this),
- Gen(INVALID),
InstrItins(getInstrItineraryForCPU(GPU)),
- LDSBankCount(0),
- MaxPrivateElementSize(0),
-
- FastFMAF32(false),
- FastDenormalF32(false),
- HalfRate64Ops(false),
- FullRate64Ops(false),
-
- FlatForGlobal(false),
- AutoWaitcntBeforeBarrier(false),
- UnalignedScratchAccess(false),
- UnalignedAccessMode(false),
-
- HasApertureRegs(false),
- SupportsXNACK(false),
- EnableXNACK(false),
- EnableTgSplit(false),
- EnableCuMode(false),
- TrapHandler(false),
-
- EnableLoadStoreOpt(false),
- EnableUnsafeDSOffsetFolding(false),
- EnableSIScheduler(false),
- EnableDS128(false),
- EnablePRTStrictNull(false),
- DumpCode(false),
-
- FP64(false),
- CIInsts(false),
- GFX8Insts(false),
- GFX9Insts(false),
- GFX90AInsts(false),
- GFX10Insts(false),
- GFX10_3Insts(false),
- GFX7GFX8GFX9Insts(false),
- SGPRInitBug(false),
- NegativeScratchOffsetBug(false),
- NegativeUnalignedScratchOffsetBug(false),
- HasSMemRealTime(false),
- HasIntClamp(false),
- HasFmaMixInsts(false),
- HasMovrel(false),
- HasVGPRIndexMode(false),
- HasScalarStores(false),
- HasScalarAtomics(false),
- HasSDWAOmod(false),
- HasSDWAScalar(false),
- HasSDWASdst(false),
- HasSDWAMac(false),
- HasSDWAOutModsVOPC(false),
- HasDPP(false),
- HasDPP8(false),
- Has64BitDPP(false),
- HasPackedFP32Ops(false),
- HasExtendedImageInsts(false),
- HasR128A16(false),
- HasGFX10A16(false),
- HasG16(false),
- HasNSAEncoding(false),
- NSAMaxSize(0),
- GFX10_AEncoding(false),
- GFX10_BEncoding(false),
- HasDLInsts(false),
- HasDot1Insts(false),
- HasDot2Insts(false),
- HasDot3Insts(false),
- HasDot4Insts(false),
- HasDot5Insts(false),
- HasDot6Insts(false),
- HasDot7Insts(false),
- HasMAIInsts(false),
- HasPkFmacF16Inst(false),
- HasAtomicFaddInsts(false),
- SupportsSRAMECC(false),
- EnableSRAMECC(false),
- HasNoSdstCMPX(false),
- HasVscnt(false),
- HasGetWaveIdInst(false),
- HasSMemTimeInst(false),
- HasShaderCyclesRegister(false),
- HasVOP3Literal(false),
- HasNoDataDepHazard(false),
- FlatAddressSpace(false),
- FlatInstOffsets(false),
- FlatGlobalInsts(false),
- FlatScratchInsts(false),
- ScalarFlatScratchInsts(false),
- HasArchitectedFlatScratch(false),
- AddNoCarryInsts(false),
- HasUnpackedD16VMem(false),
- LDSMisalignedBug(false),
- HasMFMAInlineLiteralBug(false),
- UnalignedBufferAccess(false),
- UnalignedDSAccess(false),
- HasPackedTID(false),
-
- ScalarizeGlobal(false),
-
- HasVcmpxPermlaneHazard(false),
- HasVMEMtoScalarWriteHazard(false),
- HasSMEMtoVectorWriteHazard(false),
- HasInstFwdPrefetchBug(false),
- HasVcmpxExecWARHazard(false),
- HasLdsBranchVmemWARHazard(false),
- HasNSAtoVMEMBug(false),
- HasNSAClauseBug(false),
- HasOffset3fBug(false),
- HasFlatSegmentOffsetBug(false),
- HasImageStoreD16Bug(false),
- HasImageGather4D16Bug(false),
-
- FeatureDisable(false),
InstrInfo(initializeSubtargetDependencies(TT, GPU, FS)),
TLInfo(TM, *this),
FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0) {
@@ -314,11 +177,6 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
*this, *static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get()), TM));
}
-bool GCNSubtarget::enableFlatScratch() const {
- return flatScratchIsArchitected() ||
- (EnableFlatScratch && hasFlatScratchInsts());
-}
-
unsigned GCNSubtarget::getConstantBusLimit(unsigned Opcode) const {
if (getGeneration() < GFX10)
return 1;
@@ -326,12 +184,15 @@ unsigned GCNSubtarget::getConstantBusLimit(unsigned Opcode) const {
switch (Opcode) {
case AMDGPU::V_LSHLREV_B64_e64:
case AMDGPU::V_LSHLREV_B64_gfx10:
+ case AMDGPU::V_LSHLREV_B64_e64_gfx11:
case AMDGPU::V_LSHL_B64_e64:
case AMDGPU::V_LSHRREV_B64_e64:
case AMDGPU::V_LSHRREV_B64_gfx10:
+ case AMDGPU::V_LSHRREV_B64_e64_gfx11:
case AMDGPU::V_LSHR_B64_e64:
case AMDGPU::V_ASHRREV_I64_e64:
case AMDGPU::V_ASHRREV_I64_gfx10:
+ case AMDGPU::V_ASHRREV_I64_e64_gfx11:
case AMDGPU::V_ASHR_I64_e64:
return 1;
}
@@ -658,7 +519,8 @@ unsigned AMDGPUSubtarget::getImplicitArgNumBytes(const Function &F) const {
return 16;
// Assume all implicit inputs are used by default
- return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 56);
+ unsigned NBytes = (AMDGPU::getAmdhsaCodeObjectVersion() >= 5) ? 256 : 56;
+ return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", NBytes);
}
uint64_t AMDGPUSubtarget::getExplicitKernArgSize(const Function &F,
@@ -673,13 +535,11 @@ uint64_t AMDGPUSubtarget::getExplicitKernArgSize(const Function &F,
for (const Argument &Arg : F.args()) {
const bool IsByRef = Arg.hasByRefAttr();
Type *ArgTy = IsByRef ? Arg.getParamByRefType() : Arg.getType();
- MaybeAlign Alignment = IsByRef ? Arg.getParamAlign() : None;
- if (!Alignment)
- Alignment = DL.getABITypeAlign(ArgTy);
-
+ Align Alignment = DL.getValueOrABITypeAlignment(
+ IsByRef ? Arg.getParamAlign() : None, ArgTy);
uint64_t AllocSize = DL.getTypeAllocSize(ArgTy);
ExplicitArgBytes = alignTo(ExplicitArgBytes, Alignment) + AllocSize;
- MaxAlign = max(MaxAlign, Alignment);
+ MaxAlign = std::max(MaxAlign, Alignment);
}
return ExplicitArgBytes;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 7f1b94be4ffe..7400c81effd0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -38,30 +38,32 @@ public:
SEA_ISLANDS = 6,
VOLCANIC_ISLANDS = 7,
GFX9 = 8,
- GFX10 = 9
+ GFX10 = 9,
+ GFX11 = 10
};
private:
Triple TargetTriple;
protected:
- bool GCN3Encoding;
- bool Has16BitInsts;
- bool HasMadMixInsts;
- bool HasMadMacF32Insts;
- bool HasDsSrc2Insts;
- bool HasSDWA;
- bool HasVOP3PInsts;
- bool HasMulI24;
- bool HasMulU24;
- bool HasSMulHi;
- bool HasInv2PiInlineImm;
- bool HasFminFmaxLegacy;
- bool EnablePromoteAlloca;
- bool HasTrigReducedRange;
- unsigned MaxWavesPerEU;
- unsigned LocalMemorySize;
- char WavefrontSizeLog2;
+ bool GCN3Encoding = false;
+ bool Has16BitInsts = false;
+ bool HasTrue16BitInsts = false;
+ bool HasMadMixInsts = false;
+ bool HasMadMacF32Insts = false;
+ bool HasDsSrc2Insts = false;
+ bool HasSDWA = false;
+ bool HasVOP3PInsts = false;
+ bool HasMulI24 = true;
+ bool HasMulU24 = true;
+ bool HasSMulHi = false;
+ bool HasInv2PiInlineImm = false;
+ bool HasFminFmaxLegacy = true;
+ bool EnablePromoteAlloca = false;
+ bool HasTrigReducedRange = false;
+ unsigned MaxWavesPerEU = 10;
+ unsigned LocalMemorySize = 0;
+ char WavefrontSizeLog2 = 0;
public:
AMDGPUSubtarget(const Triple &TT);
@@ -145,6 +147,8 @@ public:
return Has16BitInsts;
}
+ bool hasTrue16BitInsts() const { return HasTrue16BitInsts; }
+
bool hasMadMixInsts() const {
return HasMadMixInsts;
}
@@ -267,7 +271,7 @@ public:
/// \p WavefrontSize.
AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const;
- virtual ~AMDGPUSubtarget() {}
+ virtual ~AMDGPUSubtarget() = default;
};
} // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index a2c61f9da8da..1c6b9d35695a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -16,6 +16,7 @@
#include "AMDGPU.h"
#include "AMDGPUAliasAnalysis.h"
#include "AMDGPUExportClustering.h"
+#include "AMDGPUIGroupLP.h"
#include "AMDGPUMacroFusion.h"
#include "AMDGPUTargetObjectFile.h"
#include "AMDGPUTargetTransformInfo.h"
@@ -27,6 +28,7 @@
#include "SIMachineScheduler.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
#include "llvm/Analysis/CGSCCPassManager.h"
+#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
@@ -56,6 +58,7 @@
#include "llvm/Transforms/Vectorize.h"
using namespace llvm;
+using namespace llvm::PatternMatch;
namespace {
class SGPRRegisterRegAlloc : public RegisterRegAllocBase<SGPRRegisterRegAlloc> {
@@ -269,12 +272,22 @@ static cl::opt<bool> EnableSIModeRegisterPass(
cl::init(true),
cl::Hidden);
+// Enable GFX11+ s_delay_alu insertion
+static cl::opt<bool>
+ EnableInsertDelayAlu("amdgpu-enable-delay-alu",
+ cl::desc("Enable s_delay_alu insertion"),
+ cl::init(true), cl::Hidden);
+
// Option is used in lit tests to prevent deadcoding of patterns inspected.
static cl::opt<bool>
EnableDCEInRA("amdgpu-dce-in-ra",
cl::init(true), cl::Hidden,
cl::desc("Enable machine DCE inside regalloc"));
+static cl::opt<bool> EnableSetWavePriority("amdgpu-set-wave-priority",
+ cl::desc("Adjust wave priority"),
+ cl::init(false), cl::Hidden);
+
static cl::opt<bool> EnableScalarIRPasses(
"amdgpu-scalar-ir-passes",
cl::desc("Enable scalar IR passes"),
@@ -330,7 +343,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeSIOptimizeExecMaskingPreRAPass(*PR);
initializeSIOptimizeVGPRLiveRangePass(*PR);
initializeSILoadStoreOptimizerPass(*PR);
- initializeAMDGPUFixFunctionBitcastsPass(*PR);
initializeAMDGPUCtorDtorLoweringPass(*PR);
initializeAMDGPUAlwaysInlinePass(*PR);
initializeAMDGPUAttributorPass(*PR);
@@ -357,6 +369,8 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeAMDGPURewriteOutArgumentsPass(*PR);
initializeAMDGPUUnifyMetadataPass(*PR);
initializeSIAnnotateControlFlowPass(*PR);
+ initializeAMDGPUReleaseVGPRsPass(*PR);
+ initializeAMDGPUInsertDelayAluPass(*PR);
initializeSIInsertHardClausesPass(*PR);
initializeSIInsertWaitcntsPass(*PR);
initializeSIModeRegisterPass(*PR);
@@ -390,9 +404,14 @@ static ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C) {
static ScheduleDAGInstrs *
createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
+ const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
ScheduleDAGMILive *DAG =
new GCNScheduleDAGMILive(C, std::make_unique<GCNMaxOccupancySchedStrategy>(C));
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
+ if (ST.shouldClusterStores())
+ DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
+ DAG->addMutation(createIGroupLPDAGMutation());
+ DAG->addMutation(createSchedBarrierDAGMutation());
DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
DAG->addMutation(createAMDGPUExportClusteringDAGMutation());
return DAG;
@@ -400,9 +419,12 @@ createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
static ScheduleDAGInstrs *
createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
+ const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
auto DAG = new GCNIterativeScheduler(C,
GCNIterativeScheduler::SCHEDULE_LEGACYMAXOCCUPANCY);
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
+ if (ST.shouldClusterStores())
+ DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
return DAG;
}
@@ -413,9 +435,12 @@ static ScheduleDAGInstrs *createMinRegScheduler(MachineSchedContext *C) {
static ScheduleDAGInstrs *
createIterativeILPMachineScheduler(MachineSchedContext *C) {
+ const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
auto DAG = new GCNIterativeScheduler(C,
GCNIterativeScheduler::SCHEDULE_ILP);
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
+ if (ST.shouldClusterStores())
+ DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
return DAG;
}
@@ -801,6 +826,23 @@ AMDGPUTargetMachine::getPredicatedAddrSpace(const Value *V) const {
return std::make_pair(nullptr, -1);
}
+unsigned
+AMDGPUTargetMachine::getAddressSpaceForPseudoSourceKind(unsigned Kind) const {
+ switch (Kind) {
+ case PseudoSourceValue::Stack:
+ case PseudoSourceValue::FixedStack:
+ return AMDGPUAS::PRIVATE_ADDRESS;
+ case PseudoSourceValue::ConstantPool:
+ case PseudoSourceValue::GOT:
+ case PseudoSourceValue::JumpTable:
+ case PseudoSourceValue::GlobalValueCallEntry:
+ case PseudoSourceValue::ExternalSymbolCallEntry:
+ case PseudoSourceValue::TargetCustom:
+ return AMDGPUAS::CONSTANT_ADDRESS;
+ }
+ return AMDGPUAS::FLAT_ADDRESS;
+}
+
//===----------------------------------------------------------------------===//
// GCN Target Machine (SI+)
//===----------------------------------------------------------------------===//
@@ -836,7 +878,7 @@ GCNTargetMachine::getSubtargetImpl(const Function &F) const {
}
TargetTransformInfo
-GCNTargetMachine::getTargetTransformInfo(const Function &F) {
+GCNTargetMachine::getTargetTransformInfo(const Function &F) const {
return TargetTransformInfo(GCNTTIImpl(this, F));
}
@@ -873,7 +915,11 @@ public:
ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
+ if (ST.shouldClusterStores())
+ DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
DAG->addMutation(ST.createFillMFMAShadowMutation(DAG->TII));
+ DAG->addMutation(createIGroupLPDAGMutation());
+ DAG->addMutation(createSchedBarrierDAGMutation());
return DAG;
}
@@ -953,10 +999,6 @@ void AMDGPUPassConfig::addIRPasses() {
addPass(createAMDGPUPrintfRuntimeBinding());
addPass(createAMDGPUCtorDtorLoweringPass());
- // This must occur before inlining, as the inliner will not look through
- // bitcast calls.
- addPass(createAMDGPUFixFunctionBitcastsPass());
-
// A call to propagate attributes pass in the backend in case opt was not run.
addPass(createAMDGPUPropagateAttributesEarlyPass(&TM));
@@ -967,7 +1009,7 @@ void AMDGPUPassConfig::addIRPasses() {
addPass(createAlwaysInlinerLegacyPass());
// We need to add the barrier noop pass, otherwise adding the function
// inlining pass will cause all of the PassConfigs passes to be run
- // one function at a time, which means if we have a nodule with two
+ // one function at a time, which means if we have a module with two
// functions, then we will generate code for the first function
// without ever running any passes on the second.
addPass(createBarrierNoopPass());
@@ -1079,8 +1121,11 @@ bool AMDGPUPassConfig::addGCPasses() {
llvm::ScheduleDAGInstrs *
AMDGPUPassConfig::createMachineScheduler(MachineSchedContext *C) const {
+ const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
ScheduleDAGMILive *DAG = createGenericSchedLive(C);
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
+ if (ST.shouldClusterStores())
+ DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
return DAG;
}
@@ -1363,6 +1408,8 @@ void GCNPassConfig::addPreEmitPass() {
addPass(&SIInsertHardClausesID);
addPass(&SILateBranchLoweringPassID);
+ if (isPassEnabled(EnableSetWavePriority, CodeGenOpt::Less))
+ addPass(createAMDGPUSetWavePriorityPass());
if (getOptLevel() > CodeGenOpt::None)
addPass(&SIPreEmitPeepholeID);
// The hazard recognizer that runs as part of the post-ra scheduler does not
@@ -1374,6 +1421,13 @@ void GCNPassConfig::addPreEmitPass() {
// Here we add a stand-alone hazard recognizer pass which can handle all
// cases.
addPass(&PostRAHazardRecognizerID);
+
+ if (getOptLevel() > CodeGenOpt::Less)
+ addPass(&AMDGPUReleaseVGPRsID);
+
+ if (isPassEnabled(EnableInsertDelayAlu, CodeGenOpt::Less))
+ addPass(&AMDGPUInsertDelayAluID);
+
addPass(&BranchRelaxationPassID);
}
@@ -1396,7 +1450,7 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
const yaml::MachineFunctionInfo &MFI_, PerFunctionMIParsingState &PFS,
SMDiagnostic &Error, SMRange &SourceRange) const {
const yaml::SIMachineFunctionInfo &YamlMFI =
- reinterpret_cast<const yaml::SIMachineFunctionInfo &>(MFI_);
+ static_cast<const yaml::SIMachineFunctionInfo &>(MFI_);
MachineFunction &MF = PFS.MF;
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
@@ -1420,6 +1474,14 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
return false;
};
+ auto parseOptionalRegister = [&](const yaml::StringValue &RegName,
+ Register &RegVal) {
+ return !RegName.Value.empty() && parseRegister(RegName, RegVal);
+ };
+
+ if (parseOptionalRegister(YamlMFI.VGPRForAGPRCopy, MFI->VGPRForAGPRCopy))
+ return true;
+
auto diagnoseRegisterClass = [&](const yaml::StringValue &RegName) {
// Create a diagnostic for a the register string literal.
const MemoryBuffer &Buffer =
@@ -1452,6 +1514,14 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
return diagnoseRegisterClass(YamlMFI.StackPtrOffsetReg);
}
+ for (const auto &YamlReg : YamlMFI.WWMReservedRegs) {
+ Register ParsedReg;
+ if (parseRegister(YamlReg, ParsedReg))
+ return true;
+
+ MFI->reserveWWMRegister(ParsedReg);
+ }
+
auto parseAndCheckArgument = [&](const Optional<yaml::SIArgument> &A,
const TargetRegisterClass &RC,
ArgDescriptor &Arg, unsigned UserSGPRs,
@@ -1473,7 +1543,7 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
Arg = ArgDescriptor::createStack(A->StackOffset);
// Check and apply the optional mask.
if (A->Mask)
- Arg = ArgDescriptor::createArg(Arg, A->Mask.getValue());
+ Arg = ArgDescriptor::createArg(Arg, *A->Mask);
MFI->NumUserSGPRs += UserSGPRs;
MFI->NumSystemSGPRs += SystemSGPRs;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index dd3676f3b707..567cc9d610d2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
//
/// \file
-/// The AMDGPU TargetMachine interface definition for hw codgen targets.
+/// The AMDGPU TargetMachine interface definition for hw codegen targets.
//
//===----------------------------------------------------------------------===//
@@ -64,6 +64,8 @@ public:
std::pair<const Value *, unsigned>
getPredicatedAddrSpace(const Value *V) const override;
+
+ unsigned getAddressSpaceForPseudoSourceKind(unsigned Kind) const override;
};
//===----------------------------------------------------------------------===//
@@ -84,7 +86,7 @@ public:
const TargetSubtargetInfo *getSubtargetImpl(const Function &) const override;
- TargetTransformInfo getTargetTransformInfo(const Function &F) override;
+ TargetTransformInfo getTargetTransformInfo(const Function &F) const override;
bool useIPRA() const override {
return true;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index a8df7789c8a1..a79cd2e9499e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -288,33 +288,21 @@ GCNTTIImpl::GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()),
ST(static_cast<const GCNSubtarget *>(TM->getSubtargetImpl(F))),
TLI(ST->getTargetLowering()), CommonTTI(TM, F),
- IsGraphics(AMDGPU::isGraphics(F.getCallingConv())),
- MaxVGPRs(ST->getMaxNumVGPRs(
- std::max(ST->getWavesPerEU(F).first,
- ST->getWavesPerEUForWorkGroup(
- ST->getFlatWorkGroupSizes(F).second)))) {
+ IsGraphics(AMDGPU::isGraphics(F.getCallingConv())) {
AMDGPU::SIModeRegisterDefaults Mode(F);
HasFP32Denormals = Mode.allFP32Denormals();
HasFP64FP16Denormals = Mode.allFP64FP16Denormals();
}
-unsigned GCNTTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
- // The concept of vector registers doesn't really exist. Some packed vector
- // operations operate on the normal 32-bit registers.
- return MaxVGPRs;
-}
+unsigned GCNTTIImpl::getNumberOfRegisters(unsigned RCID) const {
+ // NB: RCID is not an RCID. In fact it is 0 or 1 for scalar or vector
+ // registers. See getRegisterClassForType for the implementation.
+ // In this case vector registers are not vector in terms of
+ // VGPRs, but those which can hold multiple values.
-unsigned GCNTTIImpl::getNumberOfRegisters(bool Vec) const {
// This is really the number of registers to fill when vectorizing /
// interleaving loops, so we lie to avoid trying to use all registers.
- return getHardwareNumberOfRegisters(Vec) >> 3;
-}
-
-unsigned GCNTTIImpl::getNumberOfRegisters(unsigned RCID) const {
- const SIRegisterInfo *TRI = ST->getRegisterInfo();
- const TargetRegisterClass *RC = TRI->getRegClass(RCID);
- unsigned NumVGPRs = (TRI->getRegSizeInBits(*RC) + 31) / 32;
- return getHardwareNumberOfRegisters(false) / NumVGPRs;
+ return 4;
}
TypeSize
@@ -410,11 +398,14 @@ bool GCNTTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
// unaligned access is legal?
//
// FIXME: This could use fine tuning and microbenchmarks.
-Type *GCNTTIImpl::getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
- unsigned SrcAddrSpace,
- unsigned DestAddrSpace,
- unsigned SrcAlign,
- unsigned DestAlign) const {
+Type *GCNTTIImpl::getMemcpyLoopLoweringType(
+ LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
+ unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
+ Optional<uint32_t> AtomicElementSize) const {
+
+ if (AtomicElementSize)
+ return Type::getIntNTy(Context, *AtomicElementSize * 8);
+
unsigned MinAlign = std::min(SrcAlign, DestAlign);
// A (multi-)dword access at an address == 2 (mod 4) will be decomposed by the
@@ -439,11 +430,17 @@ Type *GCNTTIImpl::getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
}
void GCNTTIImpl::getMemcpyLoopResidualLoweringType(
- SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
- unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
- unsigned SrcAlign, unsigned DestAlign) const {
+ SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
+ unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
+ unsigned SrcAlign, unsigned DestAlign,
+ Optional<uint32_t> AtomicCpySize) const {
assert(RemainingBytes < 16);
+ if (AtomicCpySize)
+ BaseT::getMemcpyLoopResidualLoweringType(
+ OpsOut, Context, RemainingBytes, SrcAddrSpace, DestAddrSpace, SrcAlign,
+ DestAlign, AtomicCpySize);
+
unsigned MinAlign = std::min(SrcAlign, DestAlign);
if (MinAlign != 2) {
@@ -1042,7 +1039,8 @@ Value *GCNTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
InstructionCost GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
VectorType *VT, ArrayRef<int> Mask,
- int Index, VectorType *SubTp) {
+ int Index, VectorType *SubTp,
+ ArrayRef<const Value *> Args) {
Kind = improveShuffleKindFromMask(Kind, Mask);
if (ST->hasVOP3PInsts()) {
if (cast<FixedVectorType>(VT)->getNumElements() == 2 &&
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index e901b5c5747d..f2260c31e678 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -68,7 +68,6 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
bool IsGraphics;
bool HasFP32Denormals;
bool HasFP64FP16Denormals;
- unsigned MaxVGPRs;
static const FeatureBitset InlineFeatureIgnoreList;
@@ -113,8 +112,6 @@ public:
return TTI::PSK_FastHardware;
}
- unsigned getHardwareNumberOfRegisters(bool Vector) const;
- unsigned getNumberOfRegisters(bool Vector) const;
unsigned getNumberOfRegisters(unsigned RCID) const;
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind Vector) const;
unsigned getMinVectorRegisterBitWidth() const;
@@ -135,15 +132,14 @@ public:
unsigned AddrSpace) const;
Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
unsigned SrcAddrSpace, unsigned DestAddrSpace,
- unsigned SrcAlign, unsigned DestAlign) const;
-
- void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
- LLVMContext &Context,
- unsigned RemainingBytes,
- unsigned SrcAddrSpace,
- unsigned DestAddrSpace,
- unsigned SrcAlign,
- unsigned DestAlign) const;
+ unsigned SrcAlign, unsigned DestAlign,
+ Optional<uint32_t> AtomicElementSize) const;
+
+ void getMemcpyLoopResidualLoweringType(
+ SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
+ unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
+ unsigned SrcAlign, unsigned DestAlign,
+ Optional<uint32_t> AtomicCpySize) const;
unsigned getMaxInterleaveFactor(unsigned VF);
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
@@ -201,7 +197,8 @@ public:
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
ArrayRef<int> Mask, int Index,
- VectorType *SubTp);
+ VectorType *SubTp,
+ ArrayRef<const Value *> Args = None);
bool areInlineCompatible(const Function *Caller,
const Function *Callee) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDKernelCodeT.h b/llvm/lib/Target/AMDGPU/AMDKernelCodeT.h
index 654153ea5151..8e5f966b7c6c 100644
--- a/llvm/lib/Target/AMDGPU/AMDKernelCodeT.h
+++ b/llvm/lib/Target/AMDGPU/AMDKernelCodeT.h
@@ -142,7 +142,7 @@ enum amd_code_property_mask_t {
/// is provided to the finalizer when it is invoked and is recorded
/// here. The hardware will interleave the memory requests of each
/// lane of a wavefront by this element size to ensure each
- /// work-item gets a distinct memory memory location. Therefore, the
+ /// work-item gets a distinct memory location. Therefore, the
/// finalizer ensures that all load and store operations done to
/// private memory do not exceed this size. For example, if the
/// element size is 4 (32-bits or dword) and a 64-bit value must be
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index ffe626513d47..e12d0ffef35c 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -20,10 +20,13 @@
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
@@ -33,6 +36,7 @@
#include "llvm/Support/AMDHSAKernelDescriptor.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/MachineValueType.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/TargetParser.h"
using namespace llvm;
@@ -120,12 +124,6 @@ public:
ImmTyD16,
ImmTyClampSI,
ImmTyOModSI,
- ImmTyDPP8,
- ImmTyDppCtrl,
- ImmTyDppRowMask,
- ImmTyDppBankMask,
- ImmTyDppBoundCtrl,
- ImmTyDppFi,
ImmTySdwaDstSel,
ImmTySdwaSrc0Sel,
ImmTySdwaSrc1Sel,
@@ -151,6 +149,12 @@ public:
ImmTyOpSelHi,
ImmTyNegLo,
ImmTyNegHi,
+ ImmTyDPP8,
+ ImmTyDppCtrl,
+ ImmTyDppRowMask,
+ ImmTyDppBankMask,
+ ImmTyDppBoundCtrl,
+ ImmTyDppFi,
ImmTySwizzle,
ImmTyGprIdxMode,
ImmTyHigh,
@@ -158,6 +162,8 @@ public:
ImmTyCBSZ,
ImmTyABID,
ImmTyEndpgm,
+ ImmTyWaitVDST,
+ ImmTyWaitEXP,
};
enum ImmKindTy {
@@ -262,6 +268,14 @@ public:
return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
}
+ bool isRegOrInlineImmWithInt16InputMods() const {
+ return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
+ }
+
+ bool isRegOrInlineImmWithInt32InputMods() const {
+ return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
+ }
+
bool isRegOrImmWithInt64InputMods() const {
return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
}
@@ -278,6 +292,15 @@ public:
return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
}
+ bool isRegOrInlineImmWithFP16InputMods() const {
+ return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16);
+ }
+
+ bool isRegOrInlineImmWithFP32InputMods() const {
+ return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
+ }
+
+
bool isVReg() const {
return isRegClass(AMDGPU::VGPR_32RegClassID) ||
isRegClass(AMDGPU::VReg_64RegClassID) ||
@@ -815,6 +838,8 @@ public:
}
bool isSWaitCnt() const;
+ bool isDepCtr() const;
+ bool isSDelayAlu() const;
bool isHwreg() const;
bool isSendMsg() const;
bool isSwizzle() const;
@@ -830,6 +855,8 @@ public:
bool isS16Imm() const;
bool isU16Imm() const;
bool isEndpgm() const;
+ bool isWaitVDST() const;
+ bool isWaitEXP() const;
StringRef getExpressionAsToken() const {
assert(isExpr());
@@ -1037,6 +1064,8 @@ public:
case ImmTyCBSZ: OS << "CBSZ"; break;
case ImmTyABID: OS << "ABID"; break;
case ImmTyEndpgm: OS << "Endpgm"; break;
+ case ImmTyWaitVDST: OS << "WaitVDST"; break;
+ case ImmTyWaitEXP: OS << "WaitEXP"; break;
}
}
@@ -1123,7 +1152,9 @@ raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
class KernelScopeInfo {
int SgprIndexUnusedMin = -1;
int VgprIndexUnusedMin = -1;
+ int AgprIndexUnusedMin = -1;
MCContext *Ctx = nullptr;
+ MCSubtargetInfo const *MSTI = nullptr;
void usesSgprAt(int i) {
if (i >= SgprIndexUnusedMin) {
@@ -1142,7 +1173,31 @@ class KernelScopeInfo {
if (Ctx) {
MCSymbol* const Sym =
Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
- Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
+ int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
+ VgprIndexUnusedMin);
+ Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
+ }
+ }
+ }
+
+ void usesAgprAt(int i) {
+ // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
+ if (!hasMAIInsts(*MSTI))
+ return;
+
+ if (i >= AgprIndexUnusedMin) {
+ AgprIndexUnusedMin = ++i;
+ if (Ctx) {
+ MCSymbol* const Sym =
+ Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
+ Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
+
+ // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
+ MCSymbol* const vSym =
+ Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
+ int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
+ VgprIndexUnusedMin);
+ vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
}
}
}
@@ -1152,16 +1207,29 @@ public:
void initialize(MCContext &Context) {
Ctx = &Context;
+ MSTI = Ctx->getSubtargetInfo();
+
usesSgprAt(SgprIndexUnusedMin = -1);
usesVgprAt(VgprIndexUnusedMin = -1);
+ if (hasMAIInsts(*MSTI)) {
+ usesAgprAt(AgprIndexUnusedMin = -1);
+ }
}
- void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
+ void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
+ unsigned RegWidth) {
switch (RegKind) {
- case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
- case IS_AGPR: // fall through
- case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
- default: break;
+ case IS_SGPR:
+ usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
+ break;
+ case IS_AGPR:
+ usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
+ break;
+ case IS_VGPR:
+ usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
+ break;
+ default:
+ break;
}
}
};
@@ -1353,10 +1421,15 @@ public:
return AMDGPU::isGFX9(getSTI());
}
+ // TODO: isGFX90A is also true for GFX940. We need to clean it.
bool isGFX90A() const {
return AMDGPU::isGFX90A(getSTI());
}
+ bool isGFX940() const {
+ return AMDGPU::isGFX940(getSTI());
+ }
+
bool isGFX9Plus() const {
return AMDGPU::isGFX9Plus(getSTI());
}
@@ -1367,6 +1440,14 @@ public:
bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
+ bool isGFX11() const {
+ return AMDGPU::isGFX11(getSTI());
+ }
+
+ bool isGFX11Plus() const {
+ return AMDGPU::isGFX11Plus(getSTI());
+ }
+
bool isGFX10_BEncoding() const {
return AMDGPU::isGFX10_BEncoding(getSTI());
}
@@ -1496,6 +1577,14 @@ public:
bool parseCnt(int64_t &IntVal);
OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
+
+ bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
+ void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
+ OperandMatchResultTy parseDepCtrOps(OperandVector &Operands);
+
+ bool parseDelay(int64_t &Delay);
+ OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands);
+
OperandMatchResultTy parseHwreg(OperandVector &Operands);
private:
@@ -1522,6 +1611,7 @@ private:
SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
+ SMLoc getBLGPLoc(const OperandVector &Operands) const;
SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
const OperandVector &Operands) const;
@@ -1540,7 +1630,7 @@ private:
bool validateMIMGAtomicDMask(const MCInst &Inst);
bool validateMIMGGatherDMask(const MCInst &Inst);
bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
- bool validateMIMGDataSize(const MCInst &Inst);
+ Optional<StringRef> validateMIMGDataSize(const MCInst &Inst);
bool validateMIMGAddrSize(const MCInst &Inst);
bool validateMIMGD16(const MCInst &Inst);
bool validateMIMGDim(const MCInst &Inst);
@@ -1553,10 +1643,14 @@ private:
bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
bool validateAGPRLdSt(const MCInst &Inst) const;
bool validateVGPRAlign(const MCInst &Inst) const;
+ bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
bool validateDivScale(const MCInst &Inst);
bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
const SMLoc &IDLoc);
+ bool validateFlatLdsDMA(const MCInst &Inst, const OperandVector &Operands,
+ const SMLoc &IDLoc);
+ bool validateExeczVcczOperands(const OperandVector &Operands);
Optional<StringRef> validateLdsDirect(const MCInst &Inst);
unsigned getConstantBusLimit(unsigned Opcode) const;
bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
@@ -1586,7 +1680,7 @@ private:
bool parseExpr(int64_t &Imm, StringRef Expected = "");
bool parseExpr(OperandVector &Operands);
StringRef getTokenStr() const;
- AsmToken peekToken();
+ AsmToken peekToken(bool ShouldSkipSpace = true);
AsmToken getToken() const;
SMLoc getLoc() const;
void lex();
@@ -1644,10 +1738,12 @@ public:
void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
+ void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
OptionalImmIndexMap &OptionalIdx);
void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
+ void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
bool IsAtomic = false);
@@ -1668,7 +1764,24 @@ public:
AMDGPUOperand::Ptr defaultBoundCtrl() const;
AMDGPUOperand::Ptr defaultFI() const;
void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
- void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
+ void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
+ cvtDPP(Inst, Operands, true);
+ }
+ void cvtVOPCNoDstDPP(MCInst &Inst, const OperandVector &Operands,
+ bool IsDPP8 = false);
+ void cvtVOPCNoDstDPP8(MCInst &Inst, const OperandVector &Operands) {
+ cvtVOPCNoDstDPP(Inst, Operands, true);
+ }
+ void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
+ bool IsDPP8 = false);
+ void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
+ cvtVOP3DPP(Inst, Operands, true);
+ }
+ void cvtVOPC64NoDstDPP(MCInst &Inst, const OperandVector &Operands,
+ bool IsDPP8 = false);
+ void cvtVOPC64NoDstDPP8(MCInst &Inst, const OperandVector &Operands) {
+ cvtVOPC64NoDstDPP(Inst, Operands, true);
+ }
OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
AMDGPUOperand::ImmTy Type);
@@ -1689,6 +1802,10 @@ public:
OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
+
+ AMDGPUOperand::Ptr defaultWaitVDST() const;
+ AMDGPUOperand::Ptr defaultWaitEXP() const;
+ OperandMatchResultTy parseVOPD(OperandVector &Operands);
};
struct OptionalOperand {
@@ -1897,7 +2014,7 @@ bool AMDGPUOperand::isLiteralImm(MVT type) const {
// We allow fp literals with f16x2 operands assuming that the specified
// literal goes into the lower half and the upper half is zero. We also
- // require that the literal may be losslesly converted to f16.
+ // require that the literal may be losslessly converted to f16.
MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
(type == MVT::v2i16)? MVT::i16 :
(type == MVT::v2f32)? MVT::f32 : type;
@@ -2211,52 +2328,86 @@ static int getRegClass(RegisterKind Is, unsigned RegWidth) {
if (Is == IS_VGPR) {
switch (RegWidth) {
default: return -1;
- case 1: return AMDGPU::VGPR_32RegClassID;
- case 2: return AMDGPU::VReg_64RegClassID;
- case 3: return AMDGPU::VReg_96RegClassID;
- case 4: return AMDGPU::VReg_128RegClassID;
- case 5: return AMDGPU::VReg_160RegClassID;
- case 6: return AMDGPU::VReg_192RegClassID;
- case 7: return AMDGPU::VReg_224RegClassID;
- case 8: return AMDGPU::VReg_256RegClassID;
- case 16: return AMDGPU::VReg_512RegClassID;
- case 32: return AMDGPU::VReg_1024RegClassID;
+ case 32:
+ return AMDGPU::VGPR_32RegClassID;
+ case 64:
+ return AMDGPU::VReg_64RegClassID;
+ case 96:
+ return AMDGPU::VReg_96RegClassID;
+ case 128:
+ return AMDGPU::VReg_128RegClassID;
+ case 160:
+ return AMDGPU::VReg_160RegClassID;
+ case 192:
+ return AMDGPU::VReg_192RegClassID;
+ case 224:
+ return AMDGPU::VReg_224RegClassID;
+ case 256:
+ return AMDGPU::VReg_256RegClassID;
+ case 512:
+ return AMDGPU::VReg_512RegClassID;
+ case 1024:
+ return AMDGPU::VReg_1024RegClassID;
}
} else if (Is == IS_TTMP) {
switch (RegWidth) {
default: return -1;
- case 1: return AMDGPU::TTMP_32RegClassID;
- case 2: return AMDGPU::TTMP_64RegClassID;
- case 4: return AMDGPU::TTMP_128RegClassID;
- case 8: return AMDGPU::TTMP_256RegClassID;
- case 16: return AMDGPU::TTMP_512RegClassID;
+ case 32:
+ return AMDGPU::TTMP_32RegClassID;
+ case 64:
+ return AMDGPU::TTMP_64RegClassID;
+ case 128:
+ return AMDGPU::TTMP_128RegClassID;
+ case 256:
+ return AMDGPU::TTMP_256RegClassID;
+ case 512:
+ return AMDGPU::TTMP_512RegClassID;
}
} else if (Is == IS_SGPR) {
switch (RegWidth) {
default: return -1;
- case 1: return AMDGPU::SGPR_32RegClassID;
- case 2: return AMDGPU::SGPR_64RegClassID;
- case 3: return AMDGPU::SGPR_96RegClassID;
- case 4: return AMDGPU::SGPR_128RegClassID;
- case 5: return AMDGPU::SGPR_160RegClassID;
- case 6: return AMDGPU::SGPR_192RegClassID;
- case 7: return AMDGPU::SGPR_224RegClassID;
- case 8: return AMDGPU::SGPR_256RegClassID;
- case 16: return AMDGPU::SGPR_512RegClassID;
+ case 32:
+ return AMDGPU::SGPR_32RegClassID;
+ case 64:
+ return AMDGPU::SGPR_64RegClassID;
+ case 96:
+ return AMDGPU::SGPR_96RegClassID;
+ case 128:
+ return AMDGPU::SGPR_128RegClassID;
+ case 160:
+ return AMDGPU::SGPR_160RegClassID;
+ case 192:
+ return AMDGPU::SGPR_192RegClassID;
+ case 224:
+ return AMDGPU::SGPR_224RegClassID;
+ case 256:
+ return AMDGPU::SGPR_256RegClassID;
+ case 512:
+ return AMDGPU::SGPR_512RegClassID;
}
} else if (Is == IS_AGPR) {
switch (RegWidth) {
default: return -1;
- case 1: return AMDGPU::AGPR_32RegClassID;
- case 2: return AMDGPU::AReg_64RegClassID;
- case 3: return AMDGPU::AReg_96RegClassID;
- case 4: return AMDGPU::AReg_128RegClassID;
- case 5: return AMDGPU::AReg_160RegClassID;
- case 6: return AMDGPU::AReg_192RegClassID;
- case 7: return AMDGPU::AReg_224RegClassID;
- case 8: return AMDGPU::AReg_256RegClassID;
- case 16: return AMDGPU::AReg_512RegClassID;
- case 32: return AMDGPU::AReg_1024RegClassID;
+ case 32:
+ return AMDGPU::AGPR_32RegClassID;
+ case 64:
+ return AMDGPU::AReg_64RegClassID;
+ case 96:
+ return AMDGPU::AReg_96RegClassID;
+ case 128:
+ return AMDGPU::AReg_128RegClassID;
+ case 160:
+ return AMDGPU::AReg_160RegClassID;
+ case 192:
+ return AMDGPU::AReg_192RegClassID;
+ case 224:
+ return AMDGPU::AReg_224RegClassID;
+ case 256:
+ return AMDGPU::AReg_256RegClassID;
+ case 512:
+ return AMDGPU::AReg_512RegClassID;
+ case 1024:
+ return AMDGPU::AReg_1024RegClassID;
}
}
return -1;
@@ -2343,32 +2494,32 @@ bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
case IS_SPECIAL:
if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
Reg = AMDGPU::EXEC;
- RegWidth = 2;
+ RegWidth = 64;
return true;
}
if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
Reg = AMDGPU::FLAT_SCR;
- RegWidth = 2;
+ RegWidth = 64;
return true;
}
if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
Reg = AMDGPU::XNACK_MASK;
- RegWidth = 2;
+ RegWidth = 64;
return true;
}
if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
Reg = AMDGPU::VCC;
- RegWidth = 2;
+ RegWidth = 64;
return true;
}
if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
Reg = AMDGPU::TBA;
- RegWidth = 2;
+ RegWidth = 64;
return true;
}
if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
Reg = AMDGPU::TMA;
- RegWidth = 2;
+ RegWidth = 64;
return true;
}
Error(Loc, "register does not fit in the list");
@@ -2377,11 +2528,11 @@ bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
case IS_SGPR:
case IS_AGPR:
case IS_TTMP:
- if (Reg1 != Reg + RegWidth) {
+ if (Reg1 != Reg + RegWidth / 32) {
Error(Loc, "registers in a list must have consecutive indices");
return false;
}
- RegWidth++;
+ RegWidth += 32;
return true;
default:
llvm_unreachable("unexpected register kind");
@@ -2470,7 +2621,7 @@ AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
// SGPR and TTMP registers must be aligned.
// Max required alignment is 4 dwords.
- AlignSize = std::min(RegWidth, 4u);
+ AlignSize = std::min(RegWidth / 32, 4u);
}
if (RegNum % AlignSize != 0) {
@@ -2495,8 +2646,7 @@ AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
return RC.getRegister(RegIdx);
}
-bool
-AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
+bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
int64_t RegLo, RegHi;
if (!skipToken(AsmToken::LBrac, "missing register index"))
return false;
@@ -2534,7 +2684,7 @@ AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
}
Num = static_cast<unsigned>(RegLo);
- Width = (RegHi - RegLo) + 1;
+ RegWidth = 32 * ((RegHi - RegLo) + 1);
return true;
}
@@ -2545,7 +2695,7 @@ unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
unsigned Reg = getSpecialRegForName(getTokenStr());
if (Reg) {
RegNum = 0;
- RegWidth = 1;
+ RegWidth = 32;
RegKind = IS_SPECIAL;
Tokens.push_back(getToken());
lex(); // skip register name
@@ -2577,7 +2727,7 @@ unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
Error(Loc, "invalid register index");
return AMDGPU::NoRegister;
}
- RegWidth = 1;
+ RegWidth = 32;
} else {
// Range of registers: v[XX:YY]. ":YY" is optional.
if (!ParseRegRange(RegNum, RegWidth))
@@ -2603,7 +2753,7 @@ unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
auto Loc = getLoc();
if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
return AMDGPU::NoRegister;
- if (RegWidth != 1) {
+ if (RegWidth != 32) {
Error(Loc, "expected a single 32-bit register");
return AMDGPU::NoRegister;
}
@@ -2618,7 +2768,7 @@ unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
Tokens)) {
return AMDGPU::NoRegister;
}
- if (NextRegWidth != 1) {
+ if (NextRegWidth != 32) {
Error(Loc, "expected a single 32-bit register");
return AMDGPU::NoRegister;
}
@@ -2721,7 +2871,7 @@ bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
return true;
MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
- int64_t NewMax = DwordRegIndex + RegWidth - 1;
+ int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
int64_t OldCount;
if (!Sym->isVariable())
@@ -2761,7 +2911,8 @@ OperandMatchResultTy
AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
// TODO: add syntactic sugar for 1/(2*PI)
- assert(!isRegister());
+ if (isRegister())
+ return MatchOperand_NoMatch;
assert(!isModifier());
const auto& Tok = getToken();
@@ -2927,7 +3078,7 @@ AMDGPUAsmParser::isModifier() {
// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
// Negative fp literals with preceding "-" are
-// handled likewise for unifomtity
+// handled likewise for uniformity
//
bool
AMDGPUAsmParser::parseSP3NegModifier() {
@@ -3110,7 +3261,8 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
static ArrayRef<unsigned> getAllVariants() {
static const unsigned Variants[] = {
AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
- AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
+ AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9,
+ AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP
};
return makeArrayRef(Variants);
@@ -3118,6 +3270,10 @@ static ArrayRef<unsigned> getAllVariants() {
// What asm variants we should check
ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
+ if (isForcedDPP() && isForcedVOP3()) {
+ static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
+ return makeArrayRef(Variants);
+ }
if (getForcedEncodingSize() == 32) {
static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
return makeArrayRef(Variants);
@@ -3143,6 +3299,9 @@ ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
}
StringRef AMDGPUAsmParser::getMatchedVariantName() const {
+ if (isForcedDPP() && isForcedVOP3())
+ return "e64_dpp";
+
if (getForcedEncodingSize() == 32)
return "e32";
@@ -3231,10 +3390,13 @@ unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
// 64-bit shift instructions can use only one scalar value input
case AMDGPU::V_LSHLREV_B64_e64:
case AMDGPU::V_LSHLREV_B64_gfx10:
+ case AMDGPU::V_LSHLREV_B64_e64_gfx11:
case AMDGPU::V_LSHRREV_B64_e64:
case AMDGPU::V_LSHRREV_B64_gfx10:
+ case AMDGPU::V_LSHRREV_B64_e64_gfx11:
case AMDGPU::V_ASHRREV_I64_e64:
case AMDGPU::V_ASHRREV_I64_gfx10:
+ case AMDGPU::V_ASHRREV_I64_e64_gfx11:
case AMDGPU::V_LSHL_B64_e64:
case AMDGPU::V_LSHR_B64_e64:
case AMDGPU::V_ASHR_I64_e64:
@@ -3305,8 +3467,7 @@ AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
// flat_scratch_lo, flat_scratch_hi
// are theoretically valid but they are disabled anyway.
// Note that this code mimics SIInstrInfo::verifyInstruction
- if (!SGPRsUsed.count(LastSGPR)) {
- SGPRsUsed.insert(LastSGPR);
+ if (SGPRsUsed.insert(LastSGPR).second) {
++ConstantBusUseCount;
}
} else { // Expression or a literal
@@ -3369,7 +3530,6 @@ AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
assert(DstIdx != -1);
const MCOperand &Dst = Inst.getOperand(DstIdx);
assert(Dst.isReg());
- const unsigned DstReg = mc2PseudoReg(Dst.getReg());
const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
@@ -3377,8 +3537,8 @@ AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
if (SrcIdx == -1) break;
const MCOperand &Src = Inst.getOperand(SrcIdx);
if (Src.isReg()) {
- const unsigned SrcReg = mc2PseudoReg(Src.getReg());
- if (isRegIntersect(DstReg, SrcReg, TRI)) {
+ if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) {
+ const unsigned SrcReg = mc2PseudoReg(Src.getReg());
Error(getRegLoc(SrcReg, Operands),
"destination must be different than all sources");
return false;
@@ -3403,13 +3563,13 @@ bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
return true;
}
-bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
+Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
const unsigned Opc = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opc);
if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
- return true;
+ return None;
int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
@@ -3418,7 +3578,7 @@ bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
assert(VDataIdx != -1);
if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
- return true;
+ return None;
unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
@@ -3426,15 +3586,22 @@ bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
if (DMask == 0)
DMask = 1;
+ bool isPackedD16 = false;
unsigned DataSize =
(Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
if (hasPackedD16()) {
int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
- if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
+ isPackedD16 = D16Idx >= 0;
+ if (isPackedD16 && Inst.getOperand(D16Idx).getImm())
DataSize = (DataSize + 1) / 2;
}
- return (VDataSize / 4) == DataSize + TFESize;
+ if ((VDataSize / 4) == DataSize + TFESize)
+ return None;
+
+ return StringRef(isPackedD16
+ ? "image data size does not match dmask, d16 and tfe"
+ : "image data size does not match dmask and tfe");
}
bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
@@ -3607,7 +3774,7 @@ bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
auto Reg = mc2PseudoReg(Src0.getReg());
const MCRegisterInfo *TRI = getContext().getRegisterInfo();
- if (isSGPR(Reg, TRI)) {
+ if (!isGFX90A() && isSGPR(Reg, TRI)) {
Error(getRegLoc(Reg, Operands),
"source operand must be either a VGPR or an inline constant");
return false;
@@ -3641,7 +3808,7 @@ bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128)
return true;
- if (isRegIntersect(Src2Reg, DstReg, TRI)) {
+ if (TRI->regsOverlap(Src2Reg, DstReg)) {
Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
"source 2 operand must not partially overlap with dst");
return false;
@@ -3861,7 +4028,7 @@ Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
const auto &Src = Inst.getOperand(SrcIdx);
if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
- if (isGFX90A())
+ if (isGFX90A() || isGFX11Plus())
return StringRef("lds_direct is not supported on this GPU");
if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
@@ -4009,6 +4176,20 @@ bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
if (OpSel & ~3)
return false;
}
+
+ if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) {
+ int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
+ if (OpSelIdx != -1) {
+ if (Inst.getOperand(OpSelIdx).getImm() != 0)
+ return false;
+ }
+ int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
+ if (OpSelHiIdx != -1) {
+ if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
+ return false;
+ }
+ }
+
return true;
}
@@ -4179,6 +4360,47 @@ bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
return true;
}
+SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
+ for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+ if (Op.isBLGP())
+ return Op.getStartLoc();
+ }
+ return SMLoc();
+}
+
+bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
+ const OperandVector &Operands) {
+ unsigned Opc = Inst.getOpcode();
+ int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
+ if (BlgpIdx == -1)
+ return true;
+ SMLoc BLGPLoc = getBLGPLoc(Operands);
+ if (!BLGPLoc.isValid())
+ return true;
+ bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:");
+ auto FB = getFeatureBits();
+ bool UsesNeg = false;
+ if (FB[AMDGPU::FeatureGFX940Insts]) {
+ switch (Opc) {
+ case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
+ case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
+ case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
+ case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
+ UsesNeg = true;
+ }
+ }
+
+ if (IsNeg == UsesNeg)
+ return true;
+
+ Error(BLGPLoc,
+ UsesNeg ? "invalid modifier: blgp is not supported"
+ : "invalid modifier: neg is not supported");
+
+ return false;
+}
+
// gfx90a has an undocumented limitation:
// DS_GWS opcodes must use even aligned registers.
bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
@@ -4218,13 +4440,19 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
unsigned CPol = Inst.getOperand(CPolPos).getImm();
uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
- if ((TSFlags & (SIInstrFlags::SMRD)) &&
- (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
- Error(IDLoc, "invalid cache policy for SMRD instruction");
- return false;
+ if (TSFlags & SIInstrFlags::SMRD) {
+ if (CPol && (isSI() || isCI())) {
+ SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
+ Error(S, "cache policy is not supported for SMRD instructions");
+ return false;
+ }
+ if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
+ Error(IDLoc, "invalid cache policy for SMEM instruction");
+ return false;
+ }
}
- if (isGFX90A() && (CPol & CPol::SCC)) {
+ if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
StringRef CStr(S.getPointer());
S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
@@ -4237,15 +4465,18 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
if (TSFlags & SIInstrFlags::IsAtomicRet) {
if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
- Error(IDLoc, "instruction must use glc");
+ Error(IDLoc, isGFX940() ? "instruction must use sc0"
+ : "instruction must use glc");
return false;
}
} else {
if (CPol & CPol::GLC) {
SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
StringRef CStr(S.getPointer());
- S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
- Error(S, "instruction must not use glc");
+ S = SMLoc::getFromPointer(
+ &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
+ Error(S, isGFX940() ? "instruction must not use sc0"
+ : "instruction must not use glc");
return false;
}
}
@@ -4253,6 +4484,47 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
return true;
}
+bool AMDGPUAsmParser::validateFlatLdsDMA(const MCInst &Inst,
+ const OperandVector &Operands,
+ const SMLoc &IDLoc) {
+ if (isGFX940())
+ return true;
+
+ uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
+ if ((TSFlags & (SIInstrFlags::VALU | SIInstrFlags::FLAT)) !=
+ (SIInstrFlags::VALU | SIInstrFlags::FLAT))
+ return true;
+ // This is FLAT LDS DMA.
+
+ SMLoc S = getImmLoc(AMDGPUOperand::ImmTyLDS, Operands);
+ StringRef CStr(S.getPointer());
+ if (!CStr.startswith("lds")) {
+ // This is incorrectly selected LDS DMA version of a FLAT load opcode.
+ // And LDS version should have 'lds' modifier, but it follows optional
+ // operands so its absense is ignored by the matcher.
+ Error(IDLoc, "invalid operands for instruction");
+ return false;
+ }
+
+ return true;
+}
+
+bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
+ if (!isGFX11Plus())
+ return true;
+ for (auto &Operand : Operands) {
+ if (!Operand->isReg())
+ continue;
+ unsigned Reg = Operand->getReg();
+ if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
+ Error(getRegLoc(Reg, Operands),
+ "execz and vccz are not supported on this GPU");
+ return false;
+ }
+ }
+ return true;
+}
+
bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
const SMLoc &IDLoc,
const OperandVector &Operands) {
@@ -4302,9 +4574,8 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
"invalid dim; must be MSAA type");
return false;
}
- if (!validateMIMGDataSize(Inst)) {
- Error(IDLoc,
- "image data size does not match dmask and tfe");
+ if (auto ErrMsg = validateMIMGDataSize(Inst)) {
+ Error(IDLoc, *ErrMsg);
return false;
}
if (!validateMIMGAddrSize(Inst)) {
@@ -4357,6 +4628,10 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
return false;
}
+ if (!validateBLGP(Inst, Operands)) {
+ return false;
+ }
+
if (!validateDivScale(Inst)) {
Error(IDLoc, "ABS not allowed in VOP3B instructions");
return false;
@@ -4364,6 +4639,13 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
return false;
}
+ if (!validateExeczVcczOperands(Operands)) {
+ return false;
+ }
+
+ if (!validateFlatLdsDMA(Inst, Operands, IDLoc)) {
+ return false;
+ }
return true;
}
@@ -4606,6 +4888,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
SMRange VGPRRange;
uint64_t NextFreeVGPR = 0;
uint64_t AccumOffset = 0;
+ uint64_t SharedVGPRCount = 0;
SMRange SGPRRange;
uint64_t NextFreeSGPR = 0;
@@ -4630,9 +4913,8 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
if (ID == ".end_amdhsa_kernel")
break;
- if (Seen.find(ID) != Seen.end())
+ if (!Seen.insert(ID).second)
return TokError(".amdhsa_ directives cannot be repeated");
- Seen.insert(ID);
SMLoc ValStart = getLoc();
int64_t IVal;
@@ -4833,6 +5115,13 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
ValRange);
+ } else if (ID == ".amdhsa_shared_vgpr_count") {
+ if (IVersion.Major < 10)
+ return Error(IDRange.Start, "directive requires gfx10+", IDRange);
+ SharedVGPRCount = Val;
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
+ COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT, Val,
+ ValRange);
} else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
PARSE_BITS_ENTRY(
KD.compute_pgm_rsrc2,
@@ -4922,6 +5211,19 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
(AccumOffset / 4 - 1));
}
+ if (IVersion.Major == 10) {
+ // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
+ if (SharedVGPRCount && EnableWavefrontSize32) {
+ return TokError("shared_vgpr_count directive not valid on "
+ "wavefront size 32");
+ }
+ if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
+ return TokError("shared_vgpr_count*2 + "
+ "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
+ "exceed 63\n");
+ }
+ }
+
getTargetStreamer().EmitAmdhsaKernelDescriptor(
getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
ReserveFlatScr);
@@ -5253,8 +5555,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
return Error(AlignLoc, "alignment is too large");
}
- if (parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.amdgpu_lds' directive"))
+ if (parseEOL())
return true;
Symbol->redefineIfPossible();
@@ -5313,26 +5614,21 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
unsigned RegNo) {
- for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
- R.isValid(); ++R) {
- if (*R == RegNo)
- return isGFX9Plus();
- }
+ if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
+ return isGFX9Plus();
- // GFX10 has 2 more SGPRs 104 and 105.
- for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
- R.isValid(); ++R) {
- if (*R == RegNo)
- return hasSGPR104_SGPR105();
- }
+ // GFX10+ has 2 more SGPRs 104 and 105.
+ if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
+ return hasSGPR104_SGPR105();
switch (RegNo) {
case AMDGPU::SRC_SHARED_BASE:
case AMDGPU::SRC_SHARED_LIMIT:
case AMDGPU::SRC_PRIVATE_BASE:
case AMDGPU::SRC_PRIVATE_LIMIT:
- case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
return isGFX9Plus();
+ case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
+ return isGFX9Plus() && !isGFX11Plus();
case AMDGPU::TBA:
case AMDGPU::TBA_LO:
case AMDGPU::TBA_HI:
@@ -5355,7 +5651,7 @@ bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
if (isSI() || isGFX10Plus()) {
// No flat_scr on SI.
- // On GFX10 flat scratch is not a valid register operand and can only be
+ // On GFX10Plus flat scratch is not a valid register operand and can only be
// accessed with s_setreg/s_getreg.
switch (RegNo) {
case AMDGPU::FLAT_SCR:
@@ -5369,11 +5665,8 @@ bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
// VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
// SI/CI have.
- for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
- R.isValid(); ++R) {
- if (*R == RegNo)
- return hasSGPR102_SGPR103();
- }
+ if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
+ return hasSGPR102_SGPR103();
return true;
}
@@ -5381,8 +5674,13 @@ bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
OperandMatchResultTy
AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
OperandMode Mode) {
+ OperandMatchResultTy ResTy = parseVOPD(Operands);
+ if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
+ isToken(AsmToken::EndOfStatement))
+ return ResTy;
+
// Try to parse with a custom parser
- OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
+ ResTy = MatchOperandParserImpl(Operands, Mnemonic);
// If we successfully parsed the operand or if there as an error parsing,
// we are done.
@@ -5435,7 +5733,11 @@ StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
setForcedDPP(false);
setForcedSDWA(false);
- if (Name.endswith("_e64")) {
+ if (Name.endswith("_e64_dpp")) {
+ setForcedDPP(true);
+ setForcedEncodingSize(64);
+ return Name.substr(0, Name.size() - 8);
+ } else if (Name.endswith("_e64")) {
setForcedEncodingSize(64);
return Name.substr(0, Name.size() - 4);
} else if (Name.endswith("_e32")) {
@@ -5451,11 +5753,20 @@ StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
return Name;
}
+static void applyMnemonicAliases(StringRef &Mnemonic,
+ const FeatureBitset &Features,
+ unsigned VariantID);
+
bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
StringRef Name,
SMLoc NameLoc, OperandVector &Operands) {
// Add the instruction mnemonic
Name = parseMnemonicSuffix(Name);
+
+ // If the target architecture uses MnemonicAlias, call it here to parse
+ // operands correctly.
+ applyMnemonicAliases(Name, getAvailableFeatures(), 0);
+
Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
bool IsMIMG = Name.startswith("image_");
@@ -5603,7 +5914,24 @@ AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
unsigned CPolOff = 0;
SMLoc S = getLoc();
- if (trySkipId("glc"))
+ StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
+ if (isGFX940() && !Mnemo.startswith("s_")) {
+ if (trySkipId("sc0"))
+ CPolOn = AMDGPU::CPol::SC0;
+ else if (trySkipId("nosc0"))
+ CPolOff = AMDGPU::CPol::SC0;
+ else if (trySkipId("nt"))
+ CPolOn = AMDGPU::CPol::NT;
+ else if (trySkipId("nont"))
+ CPolOff = AMDGPU::CPol::NT;
+ else if (trySkipId("sc1"))
+ CPolOn = AMDGPU::CPol::SC1;
+ else if (trySkipId("nosc1"))
+ CPolOff = AMDGPU::CPol::SC1;
+ else
+ return MatchOperand_NoMatch;
+ }
+ else if (trySkipId("glc"))
CPolOn = AMDGPU::CPol::GLC;
else if (trySkipId("noglc"))
CPolOff = AMDGPU::CPol::GLC;
@@ -5809,7 +6137,7 @@ AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
if (isGFX10Plus()) {
- auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
+ auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
if (Ufmt == UFMT_UNDEF) {
Error(FormatLoc, "unsupported format");
return MatchOperand_ParseFail;
@@ -5828,7 +6156,7 @@ AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
int64_t &Format) {
using namespace llvm::AMDGPU::MTBUFFormat;
- auto Id = getUnifiedFormat(FormatStr);
+ auto Id = getUnifiedFormat(FormatStr, getSTI());
if (Id == UFMT_UNDEF)
return MatchOperand_NoMatch;
@@ -5969,6 +6297,7 @@ void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
bool IsGdsHardcoded) {
OptionalImmIndexMap OptionalIdx;
+ AMDGPUOperand::ImmTy OffsetType = AMDGPUOperand::ImmTyOffset;
for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
@@ -5986,13 +6315,10 @@ void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
// Handle optional arguments
OptionalIdx[Op.getImmTy()] = i;
- }
- AMDGPUOperand::ImmTy OffsetType =
- (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
- Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
- Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
- AMDGPUOperand::ImmTyOffset;
+ if (Op.getImmTy() == AMDGPUOperand::ImmTySwizzle)
+ OffsetType = AMDGPUOperand::ImmTySwizzle;
+ }
addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
@@ -6034,7 +6360,7 @@ void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
continue;
}
- if (Op.isToken() && Op.getToken() == "done")
+ if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
continue;
// Handle optional arguments
@@ -6157,11 +6483,179 @@ AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
return MatchOperand_Success;
}
+bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
+ SMLoc FieldLoc = getLoc();
+ StringRef FieldName = getTokenStr();
+ if (!skipToken(AsmToken::Identifier, "expected a field name") ||
+ !skipToken(AsmToken::LParen, "expected a left parenthesis"))
+ return false;
+
+ SMLoc ValueLoc = getLoc();
+ StringRef ValueName = getTokenStr();
+ if (!skipToken(AsmToken::Identifier, "expected a value name") ||
+ !skipToken(AsmToken::RParen, "expected a right parenthesis"))
+ return false;
+
+ unsigned Shift;
+ if (FieldName == "instid0") {
+ Shift = 0;
+ } else if (FieldName == "instskip") {
+ Shift = 4;
+ } else if (FieldName == "instid1") {
+ Shift = 7;
+ } else {
+ Error(FieldLoc, "invalid field name " + FieldName);
+ return false;
+ }
+
+ int Value;
+ if (Shift == 4) {
+ // Parse values for instskip.
+ Value = StringSwitch<int>(ValueName)
+ .Case("SAME", 0)
+ .Case("NEXT", 1)
+ .Case("SKIP_1", 2)
+ .Case("SKIP_2", 3)
+ .Case("SKIP_3", 4)
+ .Case("SKIP_4", 5)
+ .Default(-1);
+ } else {
+ // Parse values for instid0 and instid1.
+ Value = StringSwitch<int>(ValueName)
+ .Case("NO_DEP", 0)
+ .Case("VALU_DEP_1", 1)
+ .Case("VALU_DEP_2", 2)
+ .Case("VALU_DEP_3", 3)
+ .Case("VALU_DEP_4", 4)
+ .Case("TRANS32_DEP_1", 5)
+ .Case("TRANS32_DEP_2", 6)
+ .Case("TRANS32_DEP_3", 7)
+ .Case("FMA_ACCUM_CYCLE_1", 8)
+ .Case("SALU_CYCLE_1", 9)
+ .Case("SALU_CYCLE_2", 10)
+ .Case("SALU_CYCLE_3", 11)
+ .Default(-1);
+ }
+ if (Value < 0) {
+ Error(ValueLoc, "invalid value name " + ValueName);
+ return false;
+ }
+
+ Delay |= Value << Shift;
+ return true;
+}
+
+OperandMatchResultTy
+AMDGPUAsmParser::parseSDelayAluOps(OperandVector &Operands) {
+ int64_t Delay = 0;
+ SMLoc S = getLoc();
+
+ if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
+ do {
+ if (!parseDelay(Delay))
+ return MatchOperand_ParseFail;
+ } while (trySkipToken(AsmToken::Pipe));
+ } else {
+ if (!parseExpr(Delay))
+ return MatchOperand_ParseFail;
+ }
+
+ Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
+ return MatchOperand_Success;
+}
+
bool
AMDGPUOperand::isSWaitCnt() const {
return isImm();
}
+bool AMDGPUOperand::isSDelayAlu() const { return isImm(); }
+
+//===----------------------------------------------------------------------===//
+// DepCtr
+//===----------------------------------------------------------------------===//
+
+void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
+ StringRef DepCtrName) {
+ switch (ErrorId) {
+ case OPR_ID_UNKNOWN:
+ Error(Loc, Twine("invalid counter name ", DepCtrName));
+ return;
+ case OPR_ID_UNSUPPORTED:
+ Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
+ return;
+ case OPR_ID_DUPLICATE:
+ Error(Loc, Twine("duplicate counter name ", DepCtrName));
+ return;
+ case OPR_VAL_INVALID:
+ Error(Loc, Twine("invalid value for ", DepCtrName));
+ return;
+ default:
+ assert(false);
+ }
+}
+
+bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
+
+ using namespace llvm::AMDGPU::DepCtr;
+
+ SMLoc DepCtrLoc = getLoc();
+ StringRef DepCtrName = getTokenStr();
+
+ if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
+ !skipToken(AsmToken::LParen, "expected a left parenthesis"))
+ return false;
+
+ int64_t ExprVal;
+ if (!parseExpr(ExprVal))
+ return false;
+
+ unsigned PrevOprMask = UsedOprMask;
+ int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
+
+ if (CntVal < 0) {
+ depCtrError(DepCtrLoc, CntVal, DepCtrName);
+ return false;
+ }
+
+ if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
+ return false;
+
+ if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
+ if (isToken(AsmToken::EndOfStatement)) {
+ Error(getLoc(), "expected a counter name");
+ return false;
+ }
+ }
+
+ unsigned CntValMask = PrevOprMask ^ UsedOprMask;
+ DepCtr = (DepCtr & ~CntValMask) | CntVal;
+ return true;
+}
+
+OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) {
+ using namespace llvm::AMDGPU::DepCtr;
+
+ int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
+ SMLoc Loc = getLoc();
+
+ if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
+ unsigned UsedOprMask = 0;
+ while (!isToken(AsmToken::EndOfStatement)) {
+ if (!parseDepCtr(DepCtr, UsedOprMask))
+ return MatchOperand_ParseFail;
+ }
+ } else {
+ if (!parseExpr(DepCtr))
+ return MatchOperand_ParseFail;
+ }
+
+ Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
+ return MatchOperand_Success;
+}
+
+bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
+
//===----------------------------------------------------------------------===//
// hwreg
//===----------------------------------------------------------------------===//
@@ -6175,7 +6669,7 @@ AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
// The register may be specified by name or using a numeric code
HwReg.Loc = getLoc();
if (isToken(AsmToken::Identifier) &&
- (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
+ (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
HwReg.IsSymbolic = true;
lex(); // skip register name
} else if (!parseExpr(HwReg.Id, "a register name")) {
@@ -6208,15 +6702,18 @@ AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
using namespace llvm::AMDGPU::Hwreg;
- if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
- Error(HwReg.Loc,
- "specified hardware register is not supported on this GPU");
- return false;
- }
- if (!isValidHwreg(HwReg.Id)) {
- Error(HwReg.Loc,
- "invalid code of hardware register: only 6-bit values are legal");
- return false;
+ if (HwReg.IsSymbolic) {
+ if (HwReg.Id == OPR_ID_UNSUPPORTED) {
+ Error(HwReg.Loc,
+ "specified hardware register is not supported on this GPU");
+ return false;
+ }
+ } else {
+ if (!isValidHwreg(HwReg.Id)) {
+ Error(HwReg.Loc,
+ "invalid code of hardware register: only 6-bit values are legal");
+ return false;
+ }
}
if (!isValidHwregOffset(Offset.Id)) {
Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
@@ -6238,7 +6735,7 @@ AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
SMLoc Loc = getLoc();
if (trySkipId("hwreg", AsmToken::LParen)) {
- OperandInfoTy HwReg(ID_UNKNOWN_);
+ OperandInfoTy HwReg(OPR_ID_UNKNOWN);
OperandInfoTy Offset(OFFSET_DEFAULT_);
OperandInfoTy Width(WIDTH_DEFAULT_);
if (parseHwregBody(HwReg, Offset, Width) &&
@@ -6275,7 +6772,8 @@ AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
using namespace llvm::AMDGPU::SendMsg;
Msg.Loc = getLoc();
- if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
+ if (isToken(AsmToken::Identifier) &&
+ (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
Msg.IsSymbolic = true;
lex(); // skip message name
} else if (!parseExpr(Msg.Id, "a message name")) {
@@ -6310,15 +6808,22 @@ AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
using namespace llvm::AMDGPU::SendMsg;
// Validation strictness depends on whether message is specified
- // in a symbolc or in a numeric form. In the latter case
+ // in a symbolic or in a numeric form. In the latter case
// only encoding possibility is checked.
bool Strict = Msg.IsSymbolic;
- if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
- Error(Msg.Loc, "invalid message id");
- return false;
+ if (Strict) {
+ if (Msg.Id == OPR_ID_UNSUPPORTED) {
+ Error(Msg.Loc, "specified message id is not supported on this GPU");
+ return false;
+ }
+ } else {
+ if (!isValidMsgId(Msg.Id, getSTI())) {
+ Error(Msg.Loc, "invalid message id");
+ return false;
+ }
}
- if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
+ if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) {
if (Op.IsDefined) {
Error(Op.Loc, "message does not support operations");
} else {
@@ -6330,7 +6835,8 @@ AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
Error(Op.Loc, "invalid operation id");
return false;
}
- if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
+ if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) &&
+ Stream.IsDefined) {
Error(Stream.Loc, "message operation does not support streams");
return false;
}
@@ -6349,7 +6855,7 @@ AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
SMLoc Loc = getLoc();
if (trySkipId("sendmsg", AsmToken::LParen)) {
- OperandInfoTy Msg(ID_UNKNOWN_);
+ OperandInfoTy Msg(OPR_ID_UNKNOWN);
OperandInfoTy Op(OP_NONE_);
OperandInfoTy Stream(STREAM_ID_NONE_);
if (parseSendMsgBody(Msg, Op, Stream) &&
@@ -6610,9 +7116,10 @@ AMDGPUAsmParser::getToken() const {
return Parser.getTok();
}
-AsmToken
-AMDGPUAsmParser::peekToken() {
- return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
+AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
+ return isToken(AsmToken::EndOfStatement)
+ ? getToken()
+ : getLexer().peekTok(ShouldSkipSpace);
}
void
@@ -7078,8 +7585,6 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
const OperandVector &Operands,
bool IsAtomic,
bool IsLds) {
- bool IsLdsOpcode = IsLds;
- bool HasLdsModifier = false;
OptionalImmIndexMap OptionalIdx;
unsigned FirstOperandIdx = 1;
bool IsAtomicReturn = false;
@@ -7123,8 +7628,6 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
continue;
}
- HasLdsModifier |= Op.isLDS();
-
// Handle tokens like 'offen' which are sometimes hard-coded into the
// asm string. There are no MCInst operands for these.
if (Op.isToken()) {
@@ -7136,25 +7639,10 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
OptionalIdx[Op.getImmTy()] = i;
}
- // This is a workaround for an llvm quirk which may result in an
- // incorrect instruction selection. Lds and non-lds versions of
- // MUBUF instructions are identical except that lds versions
- // have mandatory 'lds' modifier. However this modifier follows
- // optional modifiers and llvm asm matcher regards this 'lds'
- // modifier as an optional one. As a result, an lds version
- // of opcode may be selected even if it has no 'lds' modifier.
- if (IsLdsOpcode && !HasLdsModifier) {
- int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
- if (NoLdsOpcode != -1) { // Got lds version - correct it.
- Inst.setOpcode(NoLdsOpcode);
- IsLdsOpcode = false;
- }
- }
-
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
- if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
+ if (!IsLds) { // tfe is not legal with lds opcodes
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
}
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
@@ -7327,7 +7815,8 @@ bool AMDGPUOperand::isSMRDOffset8() const {
}
bool AMDGPUOperand::isSMEMOffset() const {
- return isImm(); // Offset range is checked later by validator.
+ return isImmTy(ImmTyNone) ||
+ isImmTy(ImmTyOffset); // Offset range is checked later by validator.
}
bool AMDGPUOperand::isSMRDLiteralOffset() const {
@@ -7415,10 +7904,6 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
{"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
{"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr},
{"dim", AMDGPUOperand::ImmTyDim, false, nullptr},
- {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
- {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
- {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
- {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr},
{"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
{"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
{"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
@@ -7429,9 +7914,17 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
{"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
{"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
{"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
+ {"dpp8", AMDGPUOperand::ImmTyDPP8, false, nullptr},
+ {"dpp_ctrl", AMDGPUOperand::ImmTyDppCtrl, false, nullptr},
+ {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
+ {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
+ {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
+ {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr},
{"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
{"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
- {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
+ {"abid", AMDGPUOperand::ImmTyABID, false, nullptr},
+ {"wait_vdst", AMDGPUOperand::ImmTyWaitVDST, false, nullptr},
+ {"wait_exp", AMDGPUOperand::ImmTyWaitEXP, false, nullptr}
};
void AMDGPUAsmParser::onBeginOfFile() {
@@ -7497,8 +7990,17 @@ OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands)
res = parseDim(Operands);
} else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
res = parseCPol(Operands);
+ } else if (Op.Type == AMDGPUOperand::ImmTyDPP8) {
+ res = parseDPP8(Operands);
+ } else if (Op.Type == AMDGPUOperand::ImmTyDppCtrl) {
+ res = parseDPPCtrl(Operands);
} else {
res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
+ if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) {
+ res = parseOperandArrayWithPrefix("neg", Operands,
+ AMDGPUOperand::ImmTyBLGP,
+ nullptr);
+ }
}
if (res != MatchOperand_NoMatch) {
return res;
@@ -7596,6 +8098,66 @@ void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
}
}
+void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
+{
+ OptionalImmIndexMap OptionalIdx;
+ unsigned Opc = Inst.getOpcode();
+
+ unsigned I = 1;
+ const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
+ for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
+ ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
+ }
+
+ for (unsigned E = Operands.size(); I != E; ++I) {
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
+ if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
+ Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
+ } else if (Op.isImmModifier()) {
+ OptionalIdx[Op.getImmTy()] = I;
+ } else {
+ llvm_unreachable("unhandled operand type");
+ }
+ }
+
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
+
+ int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
+ if (OpSelIdx != -1)
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
+
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
+
+ if (OpSelIdx == -1)
+ return;
+
+ const int Ops[] = { AMDGPU::OpName::src0,
+ AMDGPU::OpName::src1,
+ AMDGPU::OpName::src2 };
+ const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
+ AMDGPU::OpName::src1_modifiers,
+ AMDGPU::OpName::src2_modifiers };
+
+ unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
+
+ for (int J = 0; J < 3; ++J) {
+ int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
+ if (OpIdx == -1)
+ break;
+
+ int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
+ uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
+
+ if ((OpSel & (1 << J)) != 0)
+ ModVal |= SISrcMods::OP_SEL_0;
+ if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
+ (OpSel & (1 << 3)) != 0)
+ ModVal |= SISrcMods::DST_OP_SEL;
+
+ Inst.getOperand(ModIdx).setImm(ModVal);
+ }
+}
+
void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
OptionalImmIndexMap &OptionalIdx) {
unsigned Opc = Inst.getOpcode();
@@ -7652,9 +8214,12 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
Opc == AMDGPU::V_MAC_F16_e64_vi ||
Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
+ Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
Opc == AMDGPU::V_FMAC_F32_e64_vi ||
Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
- Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
+ Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
+ Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
+ Opc == AMDGPU::V_FMAC_F16_e64_gfx11) {
auto it = Inst.begin();
std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
@@ -7731,6 +8296,11 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
if (OpIdx == -1)
break;
+ int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
+
+ if (ModIdx == -1)
+ continue;
+
uint32_t ModVal = 0;
if ((OpSel & (1 << J)) != 0)
@@ -7745,8 +8315,6 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
if ((NegHi & (1 << J)) != 0)
ModVal |= SISrcMods::NEG_HI;
- int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
-
Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
}
}
@@ -7758,6 +8326,118 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
}
//===----------------------------------------------------------------------===//
+// VOPD
+//===----------------------------------------------------------------------===//
+
+OperandMatchResultTy AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
+ if (!hasVOPD(getSTI()))
+ return MatchOperand_NoMatch;
+
+ if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
+ SMLoc S = getLoc();
+ lex();
+ lex();
+ Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
+ const MCExpr *Expr;
+ if (isToken(AsmToken::Identifier) && !Parser.parseExpression(Expr)) {
+ Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
+ return MatchOperand_Success;
+ }
+ Error(S, "invalid VOPD :: usage");
+ return MatchOperand_ParseFail;
+ }
+ return MatchOperand_NoMatch;
+}
+
+// Create VOPD MCInst operands using parsed assembler operands.
+// Parsed VOPD operands are ordered as follows:
+// OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::'
+// OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm]
+// If both OpX and OpY have an imm, the first imm has a different name:
+// OpXMnemo dstX src0X [vsrc1X|immDeferred vsrc1X|vsrc1X immDeferred] '::'
+// OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm]
+// MCInst operands have the following order:
+// dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
+void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
+ auto addOp = [&](uint16_t i) { // NOLINT:function pointer
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+ if (Op.isReg()) {
+ Op.addRegOperands(Inst, 1);
+ return;
+ }
+ if (Op.isImm()) {
+ Op.addImmOperands(Inst, 1);
+ return;
+ }
+ // Handle tokens like 'offen' which are sometimes hard-coded into the
+ // asm string. There are no MCInst operands for these.
+ if (Op.isToken()) {
+ return;
+ }
+ llvm_unreachable("Unhandled operand type in cvtVOPD");
+ };
+
+ // Indices into MCInst.Operands
+ const auto FmamkOpXImmMCIndex = 3; // dstX, dstY, src0X, imm, ...
+ const auto FmaakOpXImmMCIndex = 4; // dstX, dstY, src0X, src1X, imm, ...
+ const auto MinOpYImmMCIndex = 4; // dstX, dstY, src0X, src0Y, imm, ...
+
+ unsigned Opc = Inst.getOpcode();
+ bool HasVsrc1X =
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vsrc1X) != -1;
+ bool HasImmX =
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::immDeferred) != -1 ||
+ (HasVsrc1X && (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) ==
+ FmamkOpXImmMCIndex ||
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) ==
+ FmaakOpXImmMCIndex));
+
+ bool HasVsrc1Y =
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vsrc1Y) != -1;
+ bool HasImmY =
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::immDeferred) != -1 ||
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) >=
+ MinOpYImmMCIndex + HasVsrc1X;
+
+ // Indices of parsed operands relative to dst
+ const auto DstIdx = 0;
+ const auto Src0Idx = 1;
+ const auto Vsrc1OrImmIdx = 2;
+
+ const auto OpXOperandsSize = 2 + HasImmX + HasVsrc1X;
+ const auto BridgeTokensSize = 2; // Special VOPD tokens ('::' and OpYMnemo)
+
+ // Offsets into parsed operands
+ const auto OpXFirstOperandOffset = 1;
+ const auto OpYFirstOperandOffset =
+ OpXFirstOperandOffset + OpXOperandsSize + BridgeTokensSize;
+
+ // Order of addOp calls determines MC operand order
+ addOp(OpXFirstOperandOffset + DstIdx); // vdstX
+ addOp(OpYFirstOperandOffset + DstIdx); // vdstY
+
+ addOp(OpXFirstOperandOffset + Src0Idx); // src0X
+ if (HasImmX) {
+ // immX then vsrc1X for fmamk, vsrc1X then immX for fmaak
+ addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx);
+ addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx + 1);
+ } else {
+ if (HasVsrc1X) // all except v_mov
+ addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx); // vsrc1X
+ }
+
+ addOp(OpYFirstOperandOffset + Src0Idx); // src0Y
+ if (HasImmY) {
+ // immY then vsrc1Y for fmamk, vsrc1Y then immY for fmaak
+ addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx);
+ addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx + 1);
+ } else {
+ if (HasVsrc1Y) // all except v_mov
+ addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx); // vsrc1Y
+ }
+}
+
+//===----------------------------------------------------------------------===//
// dpp
//===----------------------------------------------------------------------===//
@@ -8067,6 +8747,88 @@ AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
}
+// Add dummy $old operand
+void AMDGPUAsmParser::cvtVOPC64NoDstDPP(MCInst &Inst,
+ const OperandVector &Operands,
+ bool IsDPP8) {
+ Inst.addOperand(MCOperand::createReg(0));
+ cvtVOP3DPP(Inst, Operands, IsDPP8);
+}
+
+void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
+ OptionalImmIndexMap OptionalIdx;
+ unsigned Opc = Inst.getOpcode();
+ bool HasModifiers = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
+ unsigned I = 1;
+ const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
+ for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
+ ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
+ }
+
+ int Fi = 0;
+ for (unsigned E = Operands.size(); I != E; ++I) {
+ auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
+ MCOI::TIED_TO);
+ if (TiedTo != -1) {
+ assert((unsigned)TiedTo < Inst.getNumOperands());
+ // handle tied old or src2 for MAC instructions
+ Inst.addOperand(Inst.getOperand(TiedTo));
+ }
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
+ // Add the register arguments
+ if (IsDPP8 && Op.isFI()) {
+ Fi = Op.getImm();
+ } else if (HasModifiers &&
+ isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
+ Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
+ } else if (Op.isReg()) {
+ Op.addRegOperands(Inst, 1);
+ } else if (Op.isImm() &&
+ Desc.OpInfo[Inst.getNumOperands()].RegClass != -1) {
+ assert(!HasModifiers && "Case should be unreachable with modifiers");
+ assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");
+ Op.addImmOperands(Inst, 1);
+ } else if (Op.isImm()) {
+ OptionalIdx[Op.getImmTy()] = I;
+ } else {
+ llvm_unreachable("unhandled operand type");
+ }
+ }
+ if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
+ }
+ if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
+ }
+ if (Desc.TSFlags & SIInstrFlags::VOP3P)
+ cvtVOP3P(Inst, Operands, OptionalIdx);
+ else if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) {
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
+ }
+
+ if (IsDPP8) {
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
+ using namespace llvm::AMDGPU::DPP;
+ Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
+ } else {
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
+ if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
+ }
+ }
+}
+
+// Add dummy $old operand
+void AMDGPUAsmParser::cvtVOPCNoDstDPP(MCInst &Inst,
+ const OperandVector &Operands,
+ bool IsDPP8) {
+ Inst.addOperand(MCOperand::createReg(0));
+ cvtDPP(Inst, Operands, IsDPP8);
+}
+
void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
OptionalImmIndexMap OptionalIdx;
@@ -8352,7 +9114,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
#define GET_MNEMONIC_CHECKER
#include "AMDGPUGenAsmMatcher.inc"
-// This fuction should be defined after auto-generated include so that we have
+// This function should be defined after auto-generated include so that we have
// MatchClassKind enum defined
unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
unsigned Kind) {
@@ -8431,3 +9193,27 @@ OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
}
bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
+
+//===----------------------------------------------------------------------===//
+// LDSDIR
+//===----------------------------------------------------------------------===//
+
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitVDST() const {
+ return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitVDST);
+}
+
+bool AMDGPUOperand::isWaitVDST() const {
+ return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm());
+}
+
+//===----------------------------------------------------------------------===//
+// VINTERP
+//===----------------------------------------------------------------------===//
+
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitEXP() const {
+ return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitEXP);
+}
+
+bool AMDGPUOperand::isWaitEXP() const {
+ return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm());
+}
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index a535c8cc0918..a087323e5de7 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -35,11 +35,6 @@ class MUBUFAddr64Table <bit is_addr64, string Name> {
string OpName = Name;
}
-class MUBUFLdsTable <bit is_lds, string Name> {
- bit IsLds = is_lds;
- string OpName = Name;
-}
-
class MTBUFAddr64Table <bit is_addr64, string Name> {
bit IsAddr64 = is_addr64;
string OpName = Name;
@@ -100,8 +95,8 @@ class MTBUF_Pseudo <string opName, dag outs, dag ins,
bits<1> sccb_value = 0;
}
-class MTBUF_Real <MTBUF_Pseudo ps> :
- InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []> {
+class MTBUF_Real <MTBUF_Pseudo ps, string real_name = ps.Mnemonic> :
+ InstSI <ps.OutOperandList, ps.InOperandList, real_name # ps.AsmOperands, []> {
let isPseudo = 0;
let isCodeGenOnly = 0;
@@ -136,7 +131,7 @@ class MTBUF_Real <MTBUF_Pseudo ps> :
bits<3> nfmt = format{6-4};
// GFX90A+ only: instruction uses AccVGPR for data
- // Bit superceedes tfe.
+ // Bit supersedes tfe.
bits<1> acc = !if(ps.has_vdata, vdata{9}, 0);
}
@@ -320,7 +315,7 @@ class MUBUF_Pseudo <string opName, dag outs, dag ins,
bits<1> idxen = 0;
bits<1> addr64 = 0;
bits<1> lds = 0;
- bits<1> has_vdata = 1;
+ bits<1> has_vdata = !not(lds);
bits<1> has_vaddr = 1;
bits<1> has_glc = 1;
bits<1> has_dlc = 1;
@@ -337,8 +332,8 @@ class MUBUF_Pseudo <string opName, dag outs, dag ins,
bits<1> IsBufferInv = 0;
}
-class MUBUF_Real <MUBUF_Pseudo ps> :
- InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []> {
+class MUBUF_Real <MUBUF_Pseudo ps, string real_name = ps.Mnemonic> :
+ InstSI <ps.OutOperandList, ps.InOperandList, real_name # ps.AsmOperands, []> {
let isPseudo = 0;
let isCodeGenOnly = 0;
@@ -360,6 +355,8 @@ class MUBUF_Real <MUBUF_Pseudo ps> :
let mayStore = ps.mayStore;
let IsAtomicRet = ps.IsAtomicRet;
let IsAtomicNoRet = ps.IsAtomicNoRet;
+ let VALU = ps.VALU;
+ let LGKM_CNT = ps.LGKM_CNT;
bits<12> offset;
bits<5> cpol;
@@ -370,8 +367,8 @@ class MUBUF_Real <MUBUF_Pseudo ps> :
bits<8> soffset;
// GFX90A+ only: instruction uses AccVGPR for data
- // Bit superceedes tfe.
- bits<1> acc = !if(ps.has_vdata, vdata{9}, 0);
+ // Bit supersedes tfe.
+ bits<1> acc = !if(ps.has_vdata, vdata{9}, !if(ps.lds, ?, 0));
}
@@ -486,16 +483,17 @@ class MUBUF_Load_Pseudo <string opName,
ValueType vdata_vt,
bit HasTiedDest = 0,
bit isLds = 0,
+ bit isLdsOpc = 0,
list<dag> pattern=[],
// Workaround bug bz30254
int addrKindCopy = addrKind,
RegisterClass vdata_rc = getVregSrcForVT<vdata_vt>.ret,
RegisterOperand vdata_op = getLdStRegisterOperand<vdata_rc>.ret>
: MUBUF_Pseudo<opName,
- (outs vdata_op:$vdata),
+ !if(!or(isLds, isLdsOpc), (outs), (outs vdata_op:$vdata)),
!con(getMUBUFIns<addrKindCopy, [], isLds>.ret,
!if(HasTiedDest, (ins vdata_op:$vdata_in), (ins))),
- " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$cpol" #
+ !if(!or(isLds, isLdsOpc), " ", " $vdata, ") # getMUBUFAsmOps<addrKindCopy>.ret # "$cpol" #
!if(isLds, " lds", "$tfe") # "$swz",
pattern>,
MUBUF_SetupAddr<addrKindCopy> {
@@ -504,13 +502,16 @@ class MUBUF_Load_Pseudo <string opName,
let AsmMatchConverter = !if(isLds, "cvtMubufLds", "cvtMubuf");
let Constraints = !if(HasTiedDest, "$vdata = $vdata_in", "");
+ let LGKM_CNT = isLds;
+ let has_vdata = !not(isLdsOpc);
let mayLoad = 1;
- let mayStore = 0;
+ let mayStore = isLds;
let maybeAtomic = 1;
- let Uses = !if(isLds, [EXEC, M0], [EXEC]);
+ let Uses = !if(!or(isLds, isLdsOpc) , [EXEC, M0], [EXEC]);
let has_tfe = !not(isLds);
let lds = isLds;
let elements = getMUBUFElements<vdata_vt>.ret;
+ let VALU = isLds;
}
class MUBUF_Offset_Load_Pat <Instruction inst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> : Pat <
@@ -563,6 +564,20 @@ multiclass MUBUF_Pseudo_Loads_Lds<string opName, ValueType load_vt = i32> {
defm _LDS : MUBUF_Pseudo_Loads<opName, load_vt, 0, 1>;
}
+multiclass MUBUF_Pseudo_Loads_LDSOpc<string opName,
+ ValueType load_vt = i32,
+ bit TiedDest = 0,
+ bit isLds = 0,
+ bit isLdsOpc = 1> {
+
+ defvar legal_load_vt = !if(!eq(!cast<string>(load_vt), !cast<string>(v3f16)), v4f16, load_vt);
+
+ def _OFFSET : MUBUF_Load_Pseudo <opName, BUFAddrKind.Offset, legal_load_vt, TiedDest, isLds, isLdsOpc>;
+ def _OFFEN : MUBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, legal_load_vt, TiedDest, isLds, isLdsOpc>;
+ def _IDXEN : MUBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, legal_load_vt, TiedDest, isLds, isLdsOpc>;
+ def _BOTHEN : MUBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, legal_load_vt, TiedDest, isLds, isLdsOpc>;
+}
+
class MUBUF_Store_Pseudo <string opName,
int addrKind,
ValueType store_vt,
@@ -615,7 +630,8 @@ class MUBUF_Pseudo_Store_Lds<string opName>
(outs),
(ins SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol:$cpol, SWZ:$swz),
" $srsrc, $soffset$offset lds$cpol$swz"> {
- let mayLoad = 0;
+ let LGKM_CNT = 1;
+ let mayLoad = 1;
let mayStore = 1;
let maybeAtomic = 1;
@@ -623,6 +639,7 @@ class MUBUF_Pseudo_Store_Lds<string opName>
let has_vaddr = 0;
let has_tfe = 0;
let lds = 1;
+ let VALU = 1;
let Uses = [EXEC, M0];
let AsmMatchConverter = "cvtMubufLds";
@@ -785,7 +802,7 @@ multiclass MUBUF_Pseudo_Atomics_RTN <string opName,
multiclass MUBUF_Pseudo_Atomics <string opName,
RegisterClass vdataClass,
ValueType vdataType,
- SDPatternOperator atomic> :
+ SDPatternOperator atomic = null_frag> :
MUBUF_Pseudo_Atomics_NO_RTN<opName, vdataClass, vdataType>,
MUBUF_Pseudo_Atomics_RTN<opName, vdataClass, vdataType, atomic>;
@@ -898,6 +915,29 @@ defm BUFFER_LOAD_DWORDX4 : MUBUF_Pseudo_Loads <
"buffer_load_dwordx4", v4i32
>;
+defm BUFFER_LOAD_LDS_B32 : MUBUF_Pseudo_Loads_LDSOpc <
+ "buffer_load_lds_b32", i32
+>;
+defm BUFFER_LOAD_LDS_FORMAT_X : MUBUF_Pseudo_Loads_LDSOpc <
+ "buffer_load_lds_format_x", f32
+>;
+defm BUFFER_LOAD_LDS_I8 : MUBUF_Pseudo_Loads_LDSOpc <
+ "buffer_load_lds_i8", i32
+>;
+defm BUFFER_LOAD_LDS_I16 : MUBUF_Pseudo_Loads_LDSOpc <
+ "buffer_load_lds_i16", i32
+>;
+defm BUFFER_LOAD_LDS_U8 : MUBUF_Pseudo_Loads_LDSOpc <
+ "buffer_load_lds_u8", i32
+>;
+defm BUFFER_LOAD_LDS_U16 : MUBUF_Pseudo_Loads_LDSOpc <
+ "buffer_load_lds_u16", i32
+>;
+
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, atomic_load_8_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i32, atomic_load_16_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i16, atomic_load_8_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i16, atomic_load_16_global>;
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, extloadi8_global>;
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, zextloadi8_global>;
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SBYTE", i32, sextloadi8_global>;
@@ -909,21 +949,6 @@ defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX2", v2i32, load_global>;
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX3", v3i32, load_global>;
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX4", v4i32, load_global>;
-// This is not described in AMD documentation,
-// but 'lds' versions of these opcodes are available
-// in at least GFX8+ chips. See Bug 37653.
-let SubtargetPredicate = isGFX8GFX9 in {
-defm BUFFER_LOAD_DWORDX2_LDS : MUBUF_Pseudo_Loads <
- "buffer_load_dwordx2", v2i32, 0, 1
->;
-defm BUFFER_LOAD_DWORDX3_LDS : MUBUF_Pseudo_Loads <
- "buffer_load_dwordx3", v3i32, 0, 1
->;
-defm BUFFER_LOAD_DWORDX4_LDS : MUBUF_Pseudo_Loads <
- "buffer_load_dwordx4", v4i32, 0, 1
->;
-}
-
defm BUFFER_STORE_BYTE : MUBUF_Pseudo_Stores <
"buffer_store_byte", i32, truncstorei8_global
>;
@@ -943,82 +968,82 @@ defm BUFFER_STORE_DWORDX4 : MUBUF_Pseudo_Stores <
"buffer_store_dwordx4", v4i32, store_global
>;
defm BUFFER_ATOMIC_SWAP : MUBUF_Pseudo_Atomics <
- "buffer_atomic_swap", VGPR_32, i32, atomic_swap_global_32
+ "buffer_atomic_swap", VGPR_32, i32
>;
defm BUFFER_ATOMIC_CMPSWAP : MUBUF_Pseudo_Atomics <
- "buffer_atomic_cmpswap", VReg_64, v2i32, null_frag
+ "buffer_atomic_cmpswap", VReg_64, v2i32
>;
defm BUFFER_ATOMIC_ADD : MUBUF_Pseudo_Atomics <
- "buffer_atomic_add", VGPR_32, i32, atomic_load_add_global_32
+ "buffer_atomic_add", VGPR_32, i32
>;
defm BUFFER_ATOMIC_SUB : MUBUF_Pseudo_Atomics <
- "buffer_atomic_sub", VGPR_32, i32, atomic_load_sub_global_32
+ "buffer_atomic_sub", VGPR_32, i32
>;
defm BUFFER_ATOMIC_SMIN : MUBUF_Pseudo_Atomics <
- "buffer_atomic_smin", VGPR_32, i32, atomic_load_min_global_32
+ "buffer_atomic_smin", VGPR_32, i32
>;
defm BUFFER_ATOMIC_UMIN : MUBUF_Pseudo_Atomics <
- "buffer_atomic_umin", VGPR_32, i32, atomic_load_umin_global_32
+ "buffer_atomic_umin", VGPR_32, i32
>;
defm BUFFER_ATOMIC_SMAX : MUBUF_Pseudo_Atomics <
- "buffer_atomic_smax", VGPR_32, i32, atomic_load_max_global_32
+ "buffer_atomic_smax", VGPR_32, i32
>;
defm BUFFER_ATOMIC_UMAX : MUBUF_Pseudo_Atomics <
- "buffer_atomic_umax", VGPR_32, i32, atomic_load_umax_global_32
+ "buffer_atomic_umax", VGPR_32, i32
>;
defm BUFFER_ATOMIC_AND : MUBUF_Pseudo_Atomics <
- "buffer_atomic_and", VGPR_32, i32, atomic_load_and_global_32
+ "buffer_atomic_and", VGPR_32, i32
>;
defm BUFFER_ATOMIC_OR : MUBUF_Pseudo_Atomics <
- "buffer_atomic_or", VGPR_32, i32, atomic_load_or_global_32
+ "buffer_atomic_or", VGPR_32, i32
>;
defm BUFFER_ATOMIC_XOR : MUBUF_Pseudo_Atomics <
- "buffer_atomic_xor", VGPR_32, i32, atomic_load_xor_global_32
+ "buffer_atomic_xor", VGPR_32, i32
>;
defm BUFFER_ATOMIC_INC : MUBUF_Pseudo_Atomics <
- "buffer_atomic_inc", VGPR_32, i32, atomic_inc_global_32
+ "buffer_atomic_inc", VGPR_32, i32
>;
defm BUFFER_ATOMIC_DEC : MUBUF_Pseudo_Atomics <
- "buffer_atomic_dec", VGPR_32, i32, atomic_dec_global_32
+ "buffer_atomic_dec", VGPR_32, i32
>;
defm BUFFER_ATOMIC_SWAP_X2 : MUBUF_Pseudo_Atomics <
- "buffer_atomic_swap_x2", VReg_64, i64, atomic_swap_global_64
+ "buffer_atomic_swap_x2", VReg_64, i64
>;
defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Pseudo_Atomics <
- "buffer_atomic_cmpswap_x2", VReg_128, v2i64, null_frag
+ "buffer_atomic_cmpswap_x2", VReg_128, v2i64
>;
defm BUFFER_ATOMIC_ADD_X2 : MUBUF_Pseudo_Atomics <
- "buffer_atomic_add_x2", VReg_64, i64, atomic_load_add_global_64
+ "buffer_atomic_add_x2", VReg_64, i64
>;
defm BUFFER_ATOMIC_SUB_X2 : MUBUF_Pseudo_Atomics <
- "buffer_atomic_sub_x2", VReg_64, i64, atomic_load_sub_global_64
+ "buffer_atomic_sub_x2", VReg_64, i64
>;
defm BUFFER_ATOMIC_SMIN_X2 : MUBUF_Pseudo_Atomics <
- "buffer_atomic_smin_x2", VReg_64, i64, atomic_load_min_global_64
+ "buffer_atomic_smin_x2", VReg_64, i64
>;
defm BUFFER_ATOMIC_UMIN_X2 : MUBUF_Pseudo_Atomics <
- "buffer_atomic_umin_x2", VReg_64, i64, atomic_load_umin_global_64
+ "buffer_atomic_umin_x2", VReg_64, i64
>;
defm BUFFER_ATOMIC_SMAX_X2 : MUBUF_Pseudo_Atomics <
- "buffer_atomic_smax_x2", VReg_64, i64, atomic_load_max_global_64
+ "buffer_atomic_smax_x2", VReg_64, i64
>;
defm BUFFER_ATOMIC_UMAX_X2 : MUBUF_Pseudo_Atomics <
- "buffer_atomic_umax_x2", VReg_64, i64, atomic_load_umax_global_64
+ "buffer_atomic_umax_x2", VReg_64, i64
>;
defm BUFFER_ATOMIC_AND_X2 : MUBUF_Pseudo_Atomics <
- "buffer_atomic_and_x2", VReg_64, i64, atomic_load_and_global_64
+ "buffer_atomic_and_x2", VReg_64, i64
>;
defm BUFFER_ATOMIC_OR_X2 : MUBUF_Pseudo_Atomics <
- "buffer_atomic_or_x2", VReg_64, i64, atomic_load_or_global_64
+ "buffer_atomic_or_x2", VReg_64, i64
>;
defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Pseudo_Atomics <
- "buffer_atomic_xor_x2", VReg_64, i64, atomic_load_xor_global_64
+ "buffer_atomic_xor_x2", VReg_64, i64
>;
defm BUFFER_ATOMIC_INC_X2 : MUBUF_Pseudo_Atomics <
- "buffer_atomic_inc_x2", VReg_64, i64, atomic_inc_global_64
+ "buffer_atomic_inc_x2", VReg_64, i64
>;
defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Pseudo_Atomics <
- "buffer_atomic_dec_x2", VReg_64, i64, atomic_dec_global_64
+ "buffer_atomic_dec_x2", VReg_64, i64
>;
let SubtargetPredicate = HasGFX10_BEncoding in
@@ -1040,7 +1065,7 @@ def BUFFER_WBINVL1_SC : MUBUF_Invalidate <"buffer_wbinvl1_sc",
int_amdgcn_buffer_wbinvl1_sc>;
}
-let SubtargetPredicate = isGFX6GFX7GFX10 in {
+let SubtargetPredicate = isGFX6GFX7GFX10Plus in {
defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Pseudo_Atomics <
"buffer_atomic_fcmpswap", VReg_64, v2f32, null_frag
@@ -1051,6 +1076,11 @@ defm BUFFER_ATOMIC_FMIN : MUBUF_Pseudo_Atomics <
defm BUFFER_ATOMIC_FMAX : MUBUF_Pseudo_Atomics <
"buffer_atomic_fmax", VGPR_32, f32, null_frag
>;
+
+}
+
+let SubtargetPredicate = isGFX6GFX7GFX10 in {
+
defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Pseudo_Atomics <
"buffer_atomic_fcmpswap_x2", VReg_128, v2f64, null_frag
>;
@@ -1109,23 +1139,25 @@ defm BUFFER_STORE_FORMAT_D16_HI_X : MUBUF_Pseudo_Stores <
def BUFFER_WBINVL1 : MUBUF_Invalidate <"buffer_wbinvl1",
int_amdgcn_buffer_wbinvl1>;
-let SubtargetPredicate = HasAtomicFaddInsts in {
-defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_NO_RTN <
+let SubtargetPredicate = HasAtomicFaddNoRtnInsts in
+defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_NO_RTN<
"buffer_atomic_add_f32", VGPR_32, f32
>;
+
+let SubtargetPredicate = HasAtomicPkFaddNoRtnInsts in
defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_NO_RTN <
"buffer_atomic_pk_add_f16", VGPR_32, v2f16
>;
-let OtherPredicates = [isGFX90APlus] in {
-defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_RTN <
+let OtherPredicates = [HasAtomicFaddRtnInsts] in
+defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_RTN<
"buffer_atomic_add_f32", VGPR_32, f32, atomic_load_fadd_global_32
>;
+
+let OtherPredicates = [isGFX90APlus] in
defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_RTN <
"buffer_atomic_pk_add_f16", VGPR_32, v2f16, atomic_load_fadd_v2f16_global_32
>;
-}
-} // End SubtargetPredicate = HasAtomicFaddInsts
//===----------------------------------------------------------------------===//
// MTBUF Instructions
@@ -1175,15 +1207,28 @@ def BUFFER_WBINVL1_VOL : MUBUF_Invalidate <"buffer_wbinvl1_vol",
let SubtargetPredicate = isGFX90APlus in {
def BUFFER_WBL2 : MUBUF_Invalidate<"buffer_wbl2"> {
+ let has_glc = 1;
+ let has_sccb = 1;
+ let InOperandList = (ins CPol_0:$cpol);
+ let AsmOperands = "$cpol";
}
def BUFFER_INVL2 : MUBUF_Invalidate<"buffer_invl2"> {
+ let SubtargetPredicate = isGFX90AOnly;
}
- defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_add_f64", VReg_64, f64, int_amdgcn_global_atomic_fadd>;
- defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_min_f64", VReg_64, f64, int_amdgcn_global_atomic_fmin>;
- defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_max_f64", VReg_64, f64, int_amdgcn_global_atomic_fmax>;
+ defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_add_f64", VReg_64, f64>;
+ defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_min_f64", VReg_64, f64>;
+ defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_max_f64", VReg_64, f64>;
} // End SubtargetPredicate = isGFX90APlus
+def BUFFER_INV : MUBUF_Invalidate<"buffer_inv"> {
+ let SubtargetPredicate = isGFX940Plus;
+ let has_glc = 1;
+ let has_sccb = 1;
+ let InOperandList = (ins CPol_0:$cpol);
+ let AsmOperands = "$cpol";
+}
+
let SubtargetPredicate = isGFX10Plus in {
def BUFFER_GL0_INV : MUBUF_Invalidate<"buffer_gl0_inv">;
def BUFFER_GL1_INV : MUBUF_Invalidate<"buffer_gl1_inv">;
@@ -1364,75 +1409,169 @@ defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_short, i32, "BUFFER_STORE_SHORT">;
// buffer_atomic patterns
//===----------------------------------------------------------------------===//
-multiclass BufferAtomicPatterns<SDPatternOperator name, ValueType vt,
- string opcode> {
+multiclass BufferAtomicPat<string OpPrefix, ValueType vt, string Inst, bit isIntr = 0> {
+ foreach RtnMode = ["ret", "noret"] in {
+
+ defvar Op = !cast<SDPatternOperator>(OpPrefix # "_" # RtnMode
+ # !if(isIntr, "", "_" # vt.Size));
+ defvar InstSuffix = !if(!eq(RtnMode, "ret"), "_RTN", "");
+
+ def : GCNPat<
+ (vt (Op (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset), vt:$vdata_in)),
+ (!cast<MUBUF_Pseudo>(Inst # "_OFFSET" # InstSuffix) getVregSrcForVT<vt>.ret:$vdata_in,
+ SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset)
+ >;
+
+ def : GCNPat<
+ (vt (Op (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset),
+ vt:$vdata_in)),
+ (!cast<MUBUF_Pseudo>(Inst # "_ADDR64" # InstSuffix) getVregSrcForVT<vt>.ret:$vdata_in,
+ VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset)
+ >;
+
+ } // end foreach RtnMode
+}
+
+multiclass BufferAtomicIntrPat<string OpPrefix, ValueType vt, string Inst> {
+ defm : BufferAtomicPat<OpPrefix, vt, Inst, /* isIntr */ 1>;
+}
+
+multiclass BufferAtomicCmpSwapPat<ValueType vt, ValueType data_vt, string Inst> {
+ foreach RtnMode = ["ret", "noret"] in {
+
+ defvar Op = !cast<SDPatternOperator>("AMDGPUatomic_cmp_swap_global_" # RtnMode
+ # "_" # vt.Size);
+ defvar InstSuffix = !if(!eq(RtnMode, "ret"), "_RTN", "");
+
+ defvar OffsetResDag = (!cast<MUBUF_Pseudo>(Inst # "_OFFSET" # InstSuffix)
+ getVregSrcForVT<data_vt>.ret:$vdata_in, SReg_128:$srsrc, SCSrc_b32:$soffset,
+ offset:$offset);
+ def : GCNPat<
+ (vt (Op (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset), data_vt:$vdata_in)),
+ !if(!eq(RtnMode, "ret"),
+ (EXTRACT_SUBREG (vt (COPY_TO_REGCLASS OffsetResDag, getVregSrcForVT<data_vt>.ret)),
+ !if(!eq(vt, i32), sub0, sub0_sub1)),
+ OffsetResDag)
+ >;
+
+ defvar Addr64ResDag = (!cast<MUBUF_Pseudo>(Inst # "_ADDR64" # InstSuffix)
+ getVregSrcForVT<data_vt>.ret:$vdata_in, VReg_64:$vaddr, SReg_128:$srsrc,
+ SCSrc_b32:$soffset, offset:$offset);
+ def : GCNPat<
+ (vt (Op (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset),
+ data_vt:$vdata_in)),
+ !if(!eq(RtnMode, "ret"),
+ (EXTRACT_SUBREG (vt (COPY_TO_REGCLASS Addr64ResDag, getVregSrcForVT<data_vt>.ret)),
+ !if(!eq(vt, i32), sub0, sub0_sub1)),
+ Addr64ResDag)
+ >;
+
+ } // end foreach RtnMode
+}
+
+foreach Ty = [i32, i64] in {
+
+defvar Suffix = !if(!eq(Ty, i64), "_X2", "");
+
+defm : BufferAtomicPat<"atomic_swap_global", Ty, "BUFFER_ATOMIC_SWAP" # Suffix>;
+defm : BufferAtomicPat<"atomic_load_add_global", Ty, "BUFFER_ATOMIC_ADD" # Suffix>;
+defm : BufferAtomicPat<"atomic_load_sub_global", Ty, "BUFFER_ATOMIC_SUB" # Suffix>;
+defm : BufferAtomicPat<"atomic_load_min_global", Ty, "BUFFER_ATOMIC_SMIN" # Suffix>;
+defm : BufferAtomicPat<"atomic_load_umin_global", Ty, "BUFFER_ATOMIC_UMIN" # Suffix>;
+defm : BufferAtomicPat<"atomic_load_max_global", Ty, "BUFFER_ATOMIC_SMAX" # Suffix>;
+defm : BufferAtomicPat<"atomic_load_umax_global", Ty, "BUFFER_ATOMIC_UMAX" # Suffix>;
+defm : BufferAtomicPat<"atomic_load_and_global", Ty, "BUFFER_ATOMIC_AND" # Suffix>;
+defm : BufferAtomicPat<"atomic_load_or_global", Ty, "BUFFER_ATOMIC_OR" # Suffix>;
+defm : BufferAtomicPat<"atomic_load_xor_global", Ty, "BUFFER_ATOMIC_XOR" # Suffix>;
+defm : BufferAtomicPat<"atomic_inc_global", Ty, "BUFFER_ATOMIC_INC" # Suffix>;
+defm : BufferAtomicPat<"atomic_dec_global", Ty, "BUFFER_ATOMIC_DEC" # Suffix>;
+
+} // end foreach Ty
+
+defm : BufferAtomicCmpSwapPat<i32, v2i32, "BUFFER_ATOMIC_CMPSWAP">;
+defm : BufferAtomicCmpSwapPat<i64, v2i64, "BUFFER_ATOMIC_CMPSWAP_X2">;
+
+multiclass SIBufferAtomicPat<string OpPrefix, ValueType vt, string Inst,
+ list<string> RtnModes = ["ret", "noret"]> {
+ foreach RtnMode = RtnModes in {
+
+ defvar Op = !cast<SDPatternOperator>(!if(!eq(RtnMode, "none"),
+ OpPrefix, OpPrefix # "_" # RtnMode));
+ defvar InstSuffix = !if(!or(!eq(RtnMode, "none"), !eq(RtnMode, "ret")),
+ "_RTN", "");
+ defvar CachePolicy = !if(!or(!eq(RtnMode, "none"), !eq(RtnMode, "ret")),
+ (set_glc $cachepolicy), (timm:$cachepolicy));
+
def : GCNPat<
- (vt (name vt:$vdata_in, v4i32:$rsrc, 0, 0, i32:$soffset,
+ (vt (Op vt:$vdata_in, v4i32:$rsrc, 0, 0, i32:$soffset,
timm:$offset, timm:$cachepolicy, 0)),
- (!cast<MUBUF_Pseudo>(opcode # _OFFSET_RTN)
+ (!cast<MUBUF_Pseudo>(Inst # "_OFFSET" # InstSuffix)
getVregSrcForVT<vt>.ret:$vdata_in, SReg_128:$rsrc, SCSrc_b32:$soffset,
- (as_i16timm $offset), (set_glc $cachepolicy))
+ (as_i16timm $offset), CachePolicy)
>;
def : GCNPat<
- (vt (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset,
+ (vt (Op vt:$vdata_in, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset,
timm:$offset, timm:$cachepolicy, timm)),
- (!cast<MUBUF_Pseudo>(opcode # _IDXEN_RTN) getVregSrcForVT<vt>.ret:$vdata_in,
- VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset,
- (as_i16timm $offset), (set_glc $cachepolicy))
+ (!cast<MUBUF_Pseudo>(Inst # "_IDXEN" # InstSuffix)
+ getVregSrcForVT<vt>.ret:$vdata_in, VGPR_32:$vindex, SReg_128:$rsrc,
+ SCSrc_b32:$soffset, (as_i16timm $offset), CachePolicy)
>;
def : GCNPat<
- (vt (name vt:$vdata_in, v4i32:$rsrc, 0, i32:$voffset,
+ (vt (Op vt:$vdata_in, v4i32:$rsrc, 0, i32:$voffset,
i32:$soffset, timm:$offset, timm:$cachepolicy, 0)),
- (!cast<MUBUF_Pseudo>(opcode # _OFFEN_RTN) getVregSrcForVT<vt>.ret:$vdata_in,
- VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset,
- (as_i16timm $offset), (set_glc $cachepolicy))
+ (!cast<MUBUF_Pseudo>(Inst # "_OFFEN" # InstSuffix)
+ getVregSrcForVT<vt>.ret:$vdata_in, VGPR_32:$voffset, SReg_128:$rsrc,
+ SCSrc_b32:$soffset, (as_i16timm $offset), CachePolicy)
>;
def : GCNPat<
- (vt (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex, i32:$voffset,
+ (vt (Op vt:$vdata_in, v4i32:$rsrc, i32:$vindex, i32:$voffset,
i32:$soffset, timm:$offset, timm:$cachepolicy, timm)),
- (!cast<MUBUF_Pseudo>(opcode # _BOTHEN_RTN)
+ (!cast<MUBUF_Pseudo>(Inst # "_BOTHEN" # InstSuffix)
getVregSrcForVT<vt>.ret:$vdata_in,
(REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
- SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
- (set_glc $cachepolicy))
+ SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), CachePolicy)
>;
-}
-
-defm : BufferAtomicPatterns<SIbuffer_atomic_swap, i32, "BUFFER_ATOMIC_SWAP">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_swap, f32, "BUFFER_ATOMIC_SWAP">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_add, i32, "BUFFER_ATOMIC_ADD">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_sub, i32, "BUFFER_ATOMIC_SUB">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_smin, i32, "BUFFER_ATOMIC_SMIN">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_umin, i32, "BUFFER_ATOMIC_UMIN">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_smax, i32, "BUFFER_ATOMIC_SMAX">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_umax, i32, "BUFFER_ATOMIC_UMAX">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_and, i32, "BUFFER_ATOMIC_AND">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_or, i32, "BUFFER_ATOMIC_OR">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_xor, i32, "BUFFER_ATOMIC_XOR">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_inc, i32, "BUFFER_ATOMIC_INC">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_dec, i32, "BUFFER_ATOMIC_DEC">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_csub, i32, "BUFFER_ATOMIC_CSUB">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_swap, i64, "BUFFER_ATOMIC_SWAP_X2">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_add, i64, "BUFFER_ATOMIC_ADD_X2">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_sub, i64, "BUFFER_ATOMIC_SUB_X2">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_smin, i64, "BUFFER_ATOMIC_SMIN_X2">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_umin, i64, "BUFFER_ATOMIC_UMIN_X2">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_smax, i64, "BUFFER_ATOMIC_SMAX_X2">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_umax, i64, "BUFFER_ATOMIC_UMAX_X2">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_and, i64, "BUFFER_ATOMIC_AND_X2">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_or, i64, "BUFFER_ATOMIC_OR_X2">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_xor, i64, "BUFFER_ATOMIC_XOR_X2">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_inc, i64, "BUFFER_ATOMIC_INC_X2">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_dec, i64, "BUFFER_ATOMIC_DEC_X2">;
+ } // end foreach RtnMode
+}
+
+defm : SIBufferAtomicPat<"SIbuffer_atomic_swap", i32, "BUFFER_ATOMIC_SWAP">;
+defm : SIBufferAtomicPat<"SIbuffer_atomic_swap", f32, "BUFFER_ATOMIC_SWAP">;
+defm : SIBufferAtomicPat<"SIbuffer_atomic_add", i32, "BUFFER_ATOMIC_ADD">;
+defm : SIBufferAtomicPat<"SIbuffer_atomic_sub", i32, "BUFFER_ATOMIC_SUB">;
+defm : SIBufferAtomicPat<"SIbuffer_atomic_smin", i32, "BUFFER_ATOMIC_SMIN">;
+defm : SIBufferAtomicPat<"SIbuffer_atomic_umin", i32, "BUFFER_ATOMIC_UMIN">;
+defm : SIBufferAtomicPat<"SIbuffer_atomic_smax", i32, "BUFFER_ATOMIC_SMAX">;
+defm : SIBufferAtomicPat<"SIbuffer_atomic_umax", i32, "BUFFER_ATOMIC_UMAX">;
+defm : SIBufferAtomicPat<"SIbuffer_atomic_and", i32, "BUFFER_ATOMIC_AND">;
+defm : SIBufferAtomicPat<"SIbuffer_atomic_or", i32, "BUFFER_ATOMIC_OR">;
+defm : SIBufferAtomicPat<"SIbuffer_atomic_xor", i32, "BUFFER_ATOMIC_XOR">;
+defm : SIBufferAtomicPat<"SIbuffer_atomic_inc", i32, "BUFFER_ATOMIC_INC">;
+defm : SIBufferAtomicPat<"SIbuffer_atomic_dec", i32, "BUFFER_ATOMIC_DEC">;
+defm : SIBufferAtomicPat<"SIbuffer_atomic_csub", i32, "BUFFER_ATOMIC_CSUB", ["none"]>;
+defm : SIBufferAtomicPat<"SIbuffer_atomic_swap", i64, "BUFFER_ATOMIC_SWAP_X2">;
+defm : SIBufferAtomicPat<"SIbuffer_atomic_add", i64, "BUFFER_ATOMIC_ADD_X2">;
+defm : SIBufferAtomicPat<"SIbuffer_atomic_sub", i64, "BUFFER_ATOMIC_SUB_X2">;
+defm : SIBufferAtomicPat<"SIbuffer_atomic_smin", i64, "BUFFER_ATOMIC_SMIN_X2">;
+defm : SIBufferAtomicPat<"SIbuffer_atomic_umin", i64, "BUFFER_ATOMIC_UMIN_X2">;
+defm : SIBufferAtomicPat<"SIbuffer_atomic_smax", i64, "BUFFER_ATOMIC_SMAX_X2">;
+defm : SIBufferAtomicPat<"SIbuffer_atomic_umax", i64, "BUFFER_ATOMIC_UMAX_X2">;
+defm : SIBufferAtomicPat<"SIbuffer_atomic_and", i64, "BUFFER_ATOMIC_AND_X2">;
+defm : SIBufferAtomicPat<"SIbuffer_atomic_or", i64, "BUFFER_ATOMIC_OR_X2">;
+defm : SIBufferAtomicPat<"SIbuffer_atomic_xor", i64, "BUFFER_ATOMIC_XOR_X2">;
+defm : SIBufferAtomicPat<"SIbuffer_atomic_inc", i64, "BUFFER_ATOMIC_INC_X2">;
+defm : SIBufferAtomicPat<"SIbuffer_atomic_dec", i64, "BUFFER_ATOMIC_DEC_X2">;
+
+let SubtargetPredicate = isGFX6GFX7GFX10Plus in {
+ defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f32, "BUFFER_ATOMIC_FMIN">;
+ defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f32, "BUFFER_ATOMIC_FMAX">;
+}
let SubtargetPredicate = isGFX6GFX7GFX10 in {
- defm : BufferAtomicPatterns<SIbuffer_atomic_fmin, f32, "BUFFER_ATOMIC_FMIN">;
- defm : BufferAtomicPatterns<SIbuffer_atomic_fmax, f32, "BUFFER_ATOMIC_FMAX">;
- defm : BufferAtomicPatterns<SIbuffer_atomic_fmin, f64, "BUFFER_ATOMIC_FMIN_X2">;
- defm : BufferAtomicPatterns<SIbuffer_atomic_fmax, f64, "BUFFER_ATOMIC_FMAX_X2">;
+ defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_FMIN_X2">;
+ defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f64, "BUFFER_ATOMIC_FMAX_X2">;
}
class NoUseBufferAtomic<SDPatternOperator Op, ValueType vt> : PatFrag <
@@ -1482,71 +1621,89 @@ multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt,
>;
}
-let SubtargetPredicate = HasAtomicFaddInsts in {
+let SubtargetPredicate = HasAtomicFaddNoRtnInsts in
defm : BufferAtomicPatterns_NO_RTN<SIbuffer_atomic_fadd, f32, "BUFFER_ATOMIC_ADD_F32">;
+
+let SubtargetPredicate = HasAtomicPkFaddNoRtnInsts in
defm : BufferAtomicPatterns_NO_RTN<SIbuffer_atomic_fadd, v2f16, "BUFFER_ATOMIC_PK_ADD_F16">;
-}
-let SubtargetPredicate = isGFX90APlus in {
- defm : BufferAtomicPatterns<SIbuffer_atomic_fadd, f32, "BUFFER_ATOMIC_ADD_F32">;
- defm : BufferAtomicPatterns<SIbuffer_atomic_fadd, v2f16, "BUFFER_ATOMIC_PK_ADD_F16">;
+let SubtargetPredicate = HasAtomicFaddRtnInsts in
+ defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f32, "BUFFER_ATOMIC_ADD_F32">;
- defm : BufferAtomicPatterns<SIbuffer_atomic_fadd, f64, "BUFFER_ATOMIC_ADD_F64">;
- defm : BufferAtomicPatterns<SIbuffer_atomic_fmin, f64, "BUFFER_ATOMIC_MIN_F64">;
- defm : BufferAtomicPatterns<SIbuffer_atomic_fmax, f64, "BUFFER_ATOMIC_MAX_F64">;
+let SubtargetPredicate = isGFX90APlus in {
+ defm : BufferAtomicIntrPat<"int_amdgcn_global_atomic_fadd", f64, "BUFFER_ATOMIC_ADD_F64">;
+ defm : BufferAtomicIntrPat<"int_amdgcn_global_atomic_fmin", f64, "BUFFER_ATOMIC_MIN_F64">;
+ defm : BufferAtomicIntrPat<"int_amdgcn_global_atomic_fmax", f64, "BUFFER_ATOMIC_MAX_F64">;
+ defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16">;
+
+ defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f64, "BUFFER_ATOMIC_ADD_F64">;
+ defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_MIN_F64">;
+ defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f64, "BUFFER_ATOMIC_MAX_F64">;
} // End SubtargetPredicate = isGFX90APlus
+foreach RtnMode = ["ret", "noret"] in {
+
+defvar Op = !cast<SDPatternOperator>(SIbuffer_atomic_cmpswap # "_" # RtnMode);
+defvar InstSuffix = !if(!eq(RtnMode, "ret"), "_RTN", "");
+defvar CachePolicy = !if(!eq(RtnMode, "ret"), (set_glc $cachepolicy),
+ (timm:$cachepolicy));
+
+defvar OffsetResDag = (!cast<MUBUF_Pseudo>("BUFFER_ATOMIC_CMPSWAP_OFFSET" # InstSuffix)
+ (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1),
+ SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), CachePolicy);
def : GCNPat<
- (SIbuffer_atomic_cmpswap
+ (Op
i32:$data, i32:$cmp, v4i32:$rsrc, 0, 0, i32:$soffset,
timm:$offset, timm:$cachepolicy, 0),
- (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS
- (BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN
- (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1),
- SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
- (set_glc $cachepolicy)), VReg_64)), sub0)
+ !if(!eq(RtnMode, "ret"),
+ (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS OffsetResDag, VReg_64)), sub0),
+ OffsetResDag)
>;
+defvar IdxenResDag = (!cast<MUBUF_Pseudo>("BUFFER_ATOMIC_CMPSWAP_IDXEN" # InstSuffix)
+ (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1),
+ VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
+ CachePolicy);
def : GCNPat<
- (SIbuffer_atomic_cmpswap
+ (Op
i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex,
0, i32:$soffset, timm:$offset,
timm:$cachepolicy, timm),
- (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS
- (BUFFER_ATOMIC_CMPSWAP_IDXEN_RTN
- (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1),
- VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
- (set_glc $cachepolicy)), VReg_64)),
- sub0)
+ !if(!eq(RtnMode, "ret"),
+ (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS IdxenResDag, VReg_64)), sub0),
+ IdxenResDag)
>;
+defvar OffenResDag = (!cast<MUBUF_Pseudo>("BUFFER_ATOMIC_CMPSWAP_OFFEN" # InstSuffix)
+ (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1),
+ VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
+ CachePolicy);
def : GCNPat<
- (SIbuffer_atomic_cmpswap
+ (Op
i32:$data, i32:$cmp, v4i32:$rsrc, 0,
i32:$voffset, i32:$soffset, timm:$offset,
timm:$cachepolicy, 0),
- (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS
- (BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN
- (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1),
- VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
- (set_glc $cachepolicy)), VReg_64)),
- sub0)
+ !if(!eq(RtnMode, "ret"),
+ (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS OffenResDag, VReg_64)), sub0),
+ OffenResDag)
>;
+defvar BothenResDag = (!cast<MUBUF_Pseudo>("BUFFER_ATOMIC_CMPSWAP_BOTHEN" # InstSuffix)
+ (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1),
+ (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
+ SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), CachePolicy);
def : GCNPat<
- (SIbuffer_atomic_cmpswap
+ (Op
i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex,
i32:$voffset, i32:$soffset, timm:$offset,
timm:$cachepolicy, timm),
- (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS
- (BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN
- (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1),
- (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
- SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
- (set_glc $cachepolicy)), VReg_64)),
- sub0)
+ !if(!eq(RtnMode, "ret"),
+ (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS BothenResDag, VReg_64)), sub0),
+ BothenResDag)
>;
+} // end foreach RtnMode
+
class MUBUFLoad_PatternADDR64 <MUBUF_Pseudo Instr_ADDR64, ValueType vt,
PatFrag constant_ld> : GCNPat <
(vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
@@ -1682,8 +1839,12 @@ multiclass MUBUFStore_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo In
>;
}
let SubtargetPredicate = isGFX6GFX7 in {
-defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_DWORD_ADDR64, BUFFER_STORE_DWORD_OFFSET, i32, atomic_store_global_32>;
-defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_DWORDX2_ADDR64, BUFFER_STORE_DWORDX2_OFFSET, i64, atomic_store_global_64>;
+defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_BYTE_ADDR64, BUFFER_STORE_BYTE_OFFSET, i32, atomic_store_8_global>;
+defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_BYTE_ADDR64, BUFFER_STORE_BYTE_OFFSET, i16, atomic_store_8_global>;
+defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_SHORT_ADDR64, BUFFER_STORE_SHORT_OFFSET, i32, atomic_store_16_global>;
+defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_SHORT_ADDR64, BUFFER_STORE_SHORT_OFFSET, i16, atomic_store_16_global>;
+defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_DWORD_ADDR64, BUFFER_STORE_DWORD_OFFSET, i32, atomic_store_32_global>;
+defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_DWORDX2_ADDR64, BUFFER_STORE_DWORDX2_OFFSET, i64, atomic_store_64_global>;
} // End Predicates = isGFX6GFX7
@@ -1731,7 +1892,7 @@ defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX3_OFFEN, BUFFER_STORE_DWORDX3_OF
defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, BUFFER_STORE_DWORDX4_OFFSET, v4i32, store_private, VReg_128>;
-let OtherPredicates = [D16PreservesUnusedBits, DisableFlatScratch] in {
+let OtherPredicates = [HasD16LoadStore, DisableFlatScratch] in {
// Hiding the extract high pattern in the PatFrag seems to not
// automatically increase the complexity.
let AddedComplexity = 1 in {
@@ -1882,24 +2043,41 @@ let SubtargetPredicate = HasPackedD16VMem in {
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
-// Base ENC_MUBUF for GFX6, GFX7, GFX10.
+// Base ENC_MUBUF for GFX6, GFX7, GFX10, GFX11.
//===----------------------------------------------------------------------===//
-class Base_MUBUF_Real_gfx6_gfx7_gfx10<bits<7> op, MUBUF_Pseudo ps, int ef> :
- MUBUF_Real<ps>, Enc64, SIMCInstr<ps.PseudoInstr, ef> {
+class Base_MUBUF_Real_gfx6_gfx7_gfx10_gfx11 <MUBUF_Pseudo ps, int ef,
+ string real_name = ps.Mnemonic> :
+ MUBUF_Real<ps, real_name>, Enc64, SIMCInstr<ps.PseudoInstr, ef> {
let Inst{11-0} = !if(ps.has_offset, offset, ?);
+ let Inst{31-26} = 0x38;
+ let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
+ let Inst{47-40} = !if(ps.has_vdata, vdata{7-0}, ?);
+ let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?);
+ let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
+}
+
+class MUBUF_Real_gfx11<bits<8> op, MUBUF_Pseudo ps,
+ string real_name = ps.Mnemonic> :
+ Base_MUBUF_Real_gfx6_gfx7_gfx10_gfx11<ps, SIEncodingFamily.GFX11, real_name> {
+ let Inst{12} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?);
+ let Inst{13} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlc_value);
+ let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value);
+ let Inst{25-18} = op;
+ let Inst{53} = !if(ps.has_tfe, tfe, ?);
+ let Inst{54} = ps.offen;
+ let Inst{55} = ps.idxen;
+}
+
+class Base_MUBUF_Real_gfx6_gfx7_gfx10<bits<7> op, MUBUF_Pseudo ps, int ef> :
+ Base_MUBUF_Real_gfx6_gfx7_gfx10_gfx11<ps, ef> {
let Inst{12} = ps.offen;
let Inst{13} = ps.idxen;
let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value);
let Inst{16} = ps.lds;
let Inst{24-18} = op;
- let Inst{31-26} = 0x38;
- let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
- let Inst{47-40} = !if(ps.has_vdata, vdata{7-0}, ?);
- let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?);
let Inst{54} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?);
let Inst{55} = !if(ps.has_tfe, tfe, ?);
- let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
}
class MUBUF_Real_gfx10<bits<8> op, MUBUF_Pseudo ps> :
@@ -1914,10 +2092,155 @@ class MUBUF_Real_gfx6_gfx7<bits<8> op, MUBUF_Pseudo ps> :
}
//===----------------------------------------------------------------------===//
+// MUBUF - GFX11.
+//===----------------------------------------------------------------------===//
+
+let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in
+multiclass MUBUF_Real_AllAddr_gfx11_Renamed_Impl<bits<8> op, string real_name> {
+ def _BOTHEN_gfx11 :
+ MUBUF_Real_gfx11<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN"), real_name>,
+ AtomicNoRet<NAME # "_BOTHEN_gfx11", 0>;
+ def _IDXEN_gfx11 :
+ MUBUF_Real_gfx11<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN"), real_name>,
+ AtomicNoRet<NAME # "_IDXEN_gfx11", 0>;
+ def _OFFEN_gfx11 :
+ MUBUF_Real_gfx11<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN"), real_name>,
+ AtomicNoRet<NAME # "_OFFEN_gfx11", 0>;
+ def _OFFSET_gfx11 :
+ MUBUF_Real_gfx11<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET"), real_name>,
+ AtomicNoRet<NAME # "_OFFSET_gfx11", 0>;
+}
+
+multiclass MUBUF_Real_AllAddr_gfx11_Impl<bits<8> op, MUBUF_Pseudo ps> :
+ MUBUF_Real_AllAddr_gfx11_Renamed_Impl<op, ps.Mnemonic>;
+multiclass MUBUF_Real_AllAddr_gfx11<bits<8> op> :
+ MUBUF_Real_AllAddr_gfx11_Impl<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>;
+
+class Pre_gfx11_MUBUF_Name <MUBUF_Pseudo ps, string real_name> :
+ MnemonicAlias<ps.Mnemonic, real_name>, Requires<[isGFX11Plus]>;
+multiclass MUBUF_Real_AllAddr_gfx11_Renamed<bits<8> op, string real_name> :
+ MUBUF_Real_AllAddr_gfx11_Renamed_Impl<op, real_name> {
+ def : Pre_gfx11_MUBUF_Name<!cast<MUBUF_Pseudo>(NAME#"_BOTHEN"), real_name>;
+}
+
+let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in
+multiclass MUBUF_Real_Atomics_RTN_gfx11_Renamed<bits<8> op, string real_name> {
+ def _BOTHEN_RTN_gfx11 :
+ MUBUF_Real_gfx11<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN"), real_name>,
+ AtomicNoRet<NAME # "_BOTHEN_gfx11", 1>;
+ def _IDXEN_RTN_gfx11 :
+ MUBUF_Real_gfx11<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN"), real_name>,
+ AtomicNoRet<NAME # "_IDXEN_gfx11", 1>;
+ def _OFFEN_RTN_gfx11 :
+ MUBUF_Real_gfx11<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN"), real_name>,
+ AtomicNoRet<NAME # "_OFFEN_gfx11", 1>;
+ def _OFFSET_RTN_gfx11 :
+ MUBUF_Real_gfx11<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN"), real_name>,
+ AtomicNoRet<NAME # "_OFFSET_gfx11", 1>;
+}
+
+multiclass MUBUF_Real_Atomics_RTN_gfx11_impl<bits<8> op, MUBUF_Pseudo ps> :
+ MUBUF_Real_Atomics_RTN_gfx11_Renamed<op, ps.Mnemonic>;
+multiclass MUBUF_Real_Atomics_RTN_gfx11<bits<8> op> :
+ MUBUF_Real_Atomics_RTN_gfx11_impl<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>;
+
+multiclass MUBUF_Real_Atomics_gfx11<bits<8> op> :
+ MUBUF_Real_AllAddr_gfx11<op>,
+ MUBUF_Real_Atomics_RTN_gfx11<op>;
+
+multiclass MUBUF_Real_Atomics_gfx11_Renamed<bits<8> op, string real_name> :
+ MUBUF_Real_AllAddr_gfx11_Renamed<op, real_name>,
+ MUBUF_Real_Atomics_RTN_gfx11_Renamed<op, real_name>;
+
+let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in {
+def BUFFER_GL0_INV_gfx11 : MUBUF_Real_gfx11<0x02B, BUFFER_GL0_INV>;
+def BUFFER_GL1_INV_gfx11 : MUBUF_Real_gfx11<0x02C, BUFFER_GL1_INV>;
+}
+
+defm BUFFER_LOAD_DWORD : MUBUF_Real_AllAddr_gfx11_Renamed<0x014, "buffer_load_b32">;
+defm BUFFER_LOAD_DWORDX2 : MUBUF_Real_AllAddr_gfx11_Renamed<0x015, "buffer_load_b64">;
+defm BUFFER_LOAD_DWORDX3 : MUBUF_Real_AllAddr_gfx11_Renamed<0x016, "buffer_load_b96">;
+defm BUFFER_LOAD_DWORDX4 : MUBUF_Real_AllAddr_gfx11_Renamed<0x017, "buffer_load_b128">;
+defm BUFFER_LOAD_SHORT_D16 : MUBUF_Real_AllAddr_gfx11_Renamed<0x020, "buffer_load_d16_b16">;
+defm BUFFER_LOAD_FORMAT_D16_X : MUBUF_Real_AllAddr_gfx11_Renamed<0x008, "buffer_load_d16_format_x">;
+defm BUFFER_LOAD_FORMAT_D16_XY : MUBUF_Real_AllAddr_gfx11_Renamed<0x009, "buffer_load_d16_format_xy">;
+defm BUFFER_LOAD_FORMAT_D16_XYZ : MUBUF_Real_AllAddr_gfx11_Renamed<0x00a, "buffer_load_d16_format_xyz">;
+defm BUFFER_LOAD_FORMAT_D16_XYZW : MUBUF_Real_AllAddr_gfx11_Renamed<0x00b, "buffer_load_d16_format_xyzw">;
+defm BUFFER_LOAD_SHORT_D16_HI : MUBUF_Real_AllAddr_gfx11_Renamed<0x023, "buffer_load_d16_hi_b16">;
+defm BUFFER_LOAD_FORMAT_D16_HI_X : MUBUF_Real_AllAddr_gfx11_Renamed<0x026, "buffer_load_d16_hi_format_x">;
+defm BUFFER_LOAD_SBYTE_D16_HI : MUBUF_Real_AllAddr_gfx11_Renamed<0x022, "buffer_load_d16_hi_i8">;
+defm BUFFER_LOAD_UBYTE_D16_HI : MUBUF_Real_AllAddr_gfx11_Renamed<0x021, "buffer_load_d16_hi_u8">;
+defm BUFFER_LOAD_SBYTE_D16 : MUBUF_Real_AllAddr_gfx11_Renamed<0x01f, "buffer_load_d16_i8">;
+defm BUFFER_LOAD_UBYTE_D16 : MUBUF_Real_AllAddr_gfx11_Renamed<0x01e, "buffer_load_d16_u8">;
+defm BUFFER_LOAD_FORMAT_X : MUBUF_Real_AllAddr_gfx11<0x000>;
+defm BUFFER_LOAD_FORMAT_XY : MUBUF_Real_AllAddr_gfx11<0x001>;
+defm BUFFER_LOAD_FORMAT_XYZ : MUBUF_Real_AllAddr_gfx11<0x002>;
+defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Real_AllAddr_gfx11<0x003>;
+defm BUFFER_LOAD_SBYTE : MUBUF_Real_AllAddr_gfx11_Renamed<0x011, "buffer_load_i8">;
+defm BUFFER_LOAD_SSHORT : MUBUF_Real_AllAddr_gfx11_Renamed<0x013, "buffer_load_i16">;
+defm BUFFER_LOAD_UBYTE : MUBUF_Real_AllAddr_gfx11_Renamed<0x010, "buffer_load_u8">;
+defm BUFFER_LOAD_USHORT : MUBUF_Real_AllAddr_gfx11_Renamed<0x012, "buffer_load_u16">;
+defm BUFFER_LOAD_LDS_B32 : MUBUF_Real_AllAddr_gfx11<0x031>;
+defm BUFFER_LOAD_LDS_FORMAT_X : MUBUF_Real_AllAddr_gfx11<0x032>;
+defm BUFFER_LOAD_LDS_I8 : MUBUF_Real_AllAddr_gfx11<0x02e>;
+defm BUFFER_LOAD_LDS_I16 : MUBUF_Real_AllAddr_gfx11<0x030>;
+defm BUFFER_LOAD_LDS_U8 : MUBUF_Real_AllAddr_gfx11<0x02d>;
+defm BUFFER_LOAD_LDS_U16 : MUBUF_Real_AllAddr_gfx11<0x02f>;
+defm BUFFER_STORE_BYTE : MUBUF_Real_AllAddr_gfx11_Renamed<0x018, "buffer_store_b8">;
+defm BUFFER_STORE_SHORT : MUBUF_Real_AllAddr_gfx11_Renamed<0x019, "buffer_store_b16">;
+defm BUFFER_STORE_DWORD : MUBUF_Real_AllAddr_gfx11_Renamed<0x01A, "buffer_store_b32">;
+defm BUFFER_STORE_DWORDX2 : MUBUF_Real_AllAddr_gfx11_Renamed<0x01B, "buffer_store_b64">;
+defm BUFFER_STORE_DWORDX3 : MUBUF_Real_AllAddr_gfx11_Renamed<0x01C, "buffer_store_b96">;
+defm BUFFER_STORE_DWORDX4 : MUBUF_Real_AllAddr_gfx11_Renamed<0x01D, "buffer_store_b128">;
+defm BUFFER_STORE_FORMAT_D16_X : MUBUF_Real_AllAddr_gfx11_Renamed<0x00C, "buffer_store_d16_format_x">;
+defm BUFFER_STORE_FORMAT_D16_XY : MUBUF_Real_AllAddr_gfx11_Renamed<0x00D, "buffer_store_d16_format_xy">;
+defm BUFFER_STORE_FORMAT_D16_XYZ : MUBUF_Real_AllAddr_gfx11_Renamed<0x00E, "buffer_store_d16_format_xyz">;
+defm BUFFER_STORE_FORMAT_D16_XYZW : MUBUF_Real_AllAddr_gfx11_Renamed<0x00F, "buffer_store_d16_format_xyzw">;
+defm BUFFER_STORE_BYTE_D16_HI : MUBUF_Real_AllAddr_gfx11_Renamed<0x024, "buffer_store_d16_hi_b8">;
+defm BUFFER_STORE_SHORT_D16_HI : MUBUF_Real_AllAddr_gfx11_Renamed<0x025, "buffer_store_d16_hi_b16">;
+defm BUFFER_STORE_FORMAT_D16_HI_X : MUBUF_Real_AllAddr_gfx11_Renamed<0x027, "buffer_store_d16_hi_format_x">;
+defm BUFFER_STORE_FORMAT_X : MUBUF_Real_AllAddr_gfx11<0x004>;
+defm BUFFER_STORE_FORMAT_XY : MUBUF_Real_AllAddr_gfx11<0x005>;
+defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Real_AllAddr_gfx11<0x006>;
+defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Real_AllAddr_gfx11<0x007>;
+defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Real_Atomics_gfx11<0x056>;
+defm BUFFER_ATOMIC_ADD : MUBUF_Real_Atomics_gfx11_Renamed<0x035, "buffer_atomic_add_u32">;
+defm BUFFER_ATOMIC_ADD_X2 : MUBUF_Real_Atomics_gfx11_Renamed<0x043, "buffer_atomic_add_u64">;
+defm BUFFER_ATOMIC_AND : MUBUF_Real_Atomics_gfx11_Renamed<0x03C, "buffer_atomic_and_b32">;
+defm BUFFER_ATOMIC_AND_X2 : MUBUF_Real_Atomics_gfx11_Renamed<0x049, "buffer_atomic_and_b64">;
+defm BUFFER_ATOMIC_CMPSWAP : MUBUF_Real_Atomics_gfx11_Renamed<0x034, "buffer_atomic_cmpswap_b32">;
+defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Real_Atomics_gfx11_Renamed<0x042, "buffer_atomic_cmpswap_b64">;
+defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Real_Atomics_gfx11_Renamed<0x050, "buffer_atomic_cmpswap_f32">;
+defm BUFFER_ATOMIC_CSUB : MUBUF_Real_Atomics_RTN_gfx11_Renamed<0x037, "buffer_atomic_csub_u32">;
+def : MnemonicAlias<"buffer_atomic_csub", "buffer_atomic_csub_u32">, Requires<[isGFX11Plus]>;
+defm BUFFER_ATOMIC_DEC : MUBUF_Real_Atomics_gfx11_Renamed<0x040, "buffer_atomic_dec_u32">;
+defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Real_Atomics_gfx11_Renamed<0x04D, "buffer_atomic_dec_u64">;
+defm BUFFER_ATOMIC_INC : MUBUF_Real_Atomics_gfx11_Renamed<0x03F, "buffer_atomic_inc_u32">;
+defm BUFFER_ATOMIC_INC_X2 : MUBUF_Real_Atomics_gfx11_Renamed<0x04C, "buffer_atomic_inc_u64">;
+defm BUFFER_ATOMIC_FMAX : MUBUF_Real_Atomics_gfx11_Renamed<0x052, "buffer_atomic_max_f32">;
+defm BUFFER_ATOMIC_SMAX : MUBUF_Real_Atomics_gfx11_Renamed<0x03A, "buffer_atomic_max_i32">;
+defm BUFFER_ATOMIC_SMAX_X2 : MUBUF_Real_Atomics_gfx11_Renamed<0x047, "buffer_atomic_max_i64">;
+defm BUFFER_ATOMIC_UMAX : MUBUF_Real_Atomics_gfx11_Renamed<0x03B, "buffer_atomic_max_u32">;
+defm BUFFER_ATOMIC_UMAX_X2 : MUBUF_Real_Atomics_gfx11_Renamed<0x048, "buffer_atomic_max_u64">;
+defm BUFFER_ATOMIC_FMIN : MUBUF_Real_Atomics_gfx11_Renamed<0x051, "buffer_atomic_min_f32">;
+defm BUFFER_ATOMIC_SMIN : MUBUF_Real_Atomics_gfx11_Renamed<0x038, "buffer_atomic_min_i32">;
+defm BUFFER_ATOMIC_SMIN_X2 : MUBUF_Real_Atomics_gfx11_Renamed<0x045, "buffer_atomic_min_i64">;
+defm BUFFER_ATOMIC_UMIN : MUBUF_Real_Atomics_gfx11_Renamed<0x039, "buffer_atomic_min_u32">;
+defm BUFFER_ATOMIC_UMIN_X2 : MUBUF_Real_Atomics_gfx11_Renamed<0x046, "buffer_atomic_min_u64">;
+defm BUFFER_ATOMIC_OR : MUBUF_Real_Atomics_gfx11_Renamed<0x03D, "buffer_atomic_or_b32">;
+defm BUFFER_ATOMIC_OR_X2 : MUBUF_Real_Atomics_gfx11_Renamed<0x04A, "buffer_atomic_or_b64">;
+defm BUFFER_ATOMIC_SUB : MUBUF_Real_Atomics_gfx11_Renamed<0x036, "buffer_atomic_sub_u32">;
+defm BUFFER_ATOMIC_SUB_X2 : MUBUF_Real_Atomics_gfx11_Renamed<0x044, "buffer_atomic_sub_u64">;
+defm BUFFER_ATOMIC_SWAP : MUBUF_Real_Atomics_gfx11_Renamed<0x033, "buffer_atomic_swap_b32">;
+defm BUFFER_ATOMIC_SWAP_X2 : MUBUF_Real_Atomics_gfx11_Renamed<0x041, "buffer_atomic_swap_b64">;
+defm BUFFER_ATOMIC_XOR : MUBUF_Real_Atomics_gfx11_Renamed<0x03E, "buffer_atomic_xor_b32">;
+defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Real_Atomics_gfx11_Renamed<0x04B, "buffer_atomic_xor_b64">;
+
+//===----------------------------------------------------------------------===//
// MUBUF - GFX10.
//===----------------------------------------------------------------------===//
-let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
+let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
multiclass MUBUF_Real_AllAddr_gfx10<bits<8> op> {
def _BOTHEN_gfx10 :
MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>;
@@ -1929,23 +2252,15 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>;
}
multiclass MUBUF_Real_AllAddr_Lds_gfx10<bits<8> op> {
- def _OFFSET_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>,
- MUBUFLdsTable<0, NAME # "_OFFSET_gfx10">;
- def _OFFEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>,
- MUBUFLdsTable<0, NAME # "_OFFEN_gfx10">;
- def _IDXEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>,
- MUBUFLdsTable<0, NAME # "_IDXEN_gfx10">;
- def _BOTHEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>,
- MUBUFLdsTable<0, NAME # "_BOTHEN_gfx10">;
-
- def _LDS_OFFSET_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFSET")>,
- MUBUFLdsTable<1, NAME # "_OFFSET_gfx10">;
- def _LDS_OFFEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFEN")>,
- MUBUFLdsTable<1, NAME # "_OFFEN_gfx10">;
- def _LDS_IDXEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_IDXEN")>,
- MUBUFLdsTable<1, NAME # "_IDXEN_gfx10">;
- def _LDS_BOTHEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>,
- MUBUFLdsTable<1, NAME # "_BOTHEN_gfx10">;
+ def _OFFSET_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>;
+ def _OFFEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>;
+ def _IDXEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>;
+ def _BOTHEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>;
+
+ def _LDS_OFFSET_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFSET")>;
+ def _LDS_OFFEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFEN")>;
+ def _LDS_IDXEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_IDXEN")>;
+ def _LDS_BOTHEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>;
}
multiclass MUBUF_Real_Atomics_RTN_gfx10<bits<8> op> {
def _BOTHEN_RTN_gfx10 :
@@ -1976,7 +2291,7 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>,
AtomicNoRet<NAME # "_OFFSET_gfx10", 0>;
}
-} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
+} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10"
defm BUFFER_STORE_BYTE_D16_HI : MUBUF_Real_AllAddr_gfx10<0x019>;
defm BUFFER_STORE_SHORT_D16_HI : MUBUF_Real_AllAddr_gfx10<0x01b>;
@@ -2033,27 +2348,17 @@ let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>;
}
multiclass MUBUF_Real_AllAddr_Lds_gfx6_gfx7<bits<8> op> {
- def _OFFSET_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>,
- MUBUFLdsTable<0, NAME # "_OFFSET_gfx6_gfx7">;
- def _ADDR64_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64")>,
- MUBUFLdsTable<0, NAME # "_ADDR64_gfx6_gfx7">;
- def _OFFEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>,
- MUBUFLdsTable<0, NAME # "_OFFEN_gfx6_gfx7">;
- def _IDXEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>,
- MUBUFLdsTable<0, NAME # "_IDXEN_gfx6_gfx7">;
- def _BOTHEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>,
- MUBUFLdsTable<0, NAME # "_BOTHEN_gfx6_gfx7">;
-
- def _LDS_OFFSET_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFSET")>,
- MUBUFLdsTable<1, NAME # "_OFFSET_gfx6_gfx7">;
- def _LDS_ADDR64_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_ADDR64")>,
- MUBUFLdsTable<1, NAME # "_ADDR64_gfx6_gfx7">;
- def _LDS_OFFEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFEN")>,
- MUBUFLdsTable<1, NAME # "_OFFEN_gfx6_gfx7">;
- def _LDS_IDXEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_IDXEN")>,
- MUBUFLdsTable<1, NAME # "_IDXEN_gfx6_gfx7">;
- def _LDS_BOTHEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>,
- MUBUFLdsTable<1, NAME # "_BOTHEN_gfx6_gfx7">;
+ def _OFFSET_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>;
+ def _ADDR64_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64")>;
+ def _OFFEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>;
+ def _IDXEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>;
+ def _BOTHEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>;
+
+ def _LDS_OFFSET_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFSET")>;
+ def _LDS_ADDR64_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_ADDR64")>;
+ def _LDS_OFFEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFEN")>;
+ def _LDS_IDXEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_IDXEN")>;
+ def _LDS_BOTHEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>;
}
multiclass MUBUF_Real_Atomics_gfx6_gfx7<bits<8> op> {
def _ADDR64_gfx6_gfx7 :
@@ -2167,26 +2472,89 @@ defm BUFFER_WBINVL1_VOL : MUBUF_Real_gfx7<0x070>;
def BUFFER_WBINVL1_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<0x071, BUFFER_WBINVL1>;
//===----------------------------------------------------------------------===//
-// Base ENC_MTBUF for GFX6, GFX7, GFX10.
+// Base ENC_MTBUF for GFX6, GFX7, GFX10, GFX11.
//===----------------------------------------------------------------------===//
-class Base_MTBUF_Real_gfx6_gfx7_gfx10<bits<3> op, MTBUF_Pseudo ps, int ef> :
- MTBUF_Real<ps>, Enc64, SIMCInstr<ps.PseudoInstr, ef> {
+class Base_MTBUF_Real_gfx6_gfx7_gfx10_gfx11<MTBUF_Pseudo ps, int ef,
+ string real_name = ps.Mnemonic> :
+ MTBUF_Real<ps, real_name>, Enc64, SIMCInstr<ps.PseudoInstr, ef> {
let Inst{11-0} = !if(ps.has_offset, offset, ?);
- let Inst{12} = ps.offen;
- let Inst{13} = ps.idxen;
let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value);
- let Inst{18-16} = op;
let Inst{31-26} = 0x3a; //encoding
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
let Inst{47-40} = !if(ps.has_vdata, vdata{7-0}, ?);
let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?);
+ let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
+}
+
+class Base_MTBUF_Real_gfx11<bits<4> op, MTBUF_Pseudo ps,
+ string real_name = ps.Mnemonic> :
+ Base_MTBUF_Real_gfx6_gfx7_gfx10_gfx11<ps, SIEncodingFamily.GFX11, real_name> {
+ let Inst{12} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?);
+ let Inst{13} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlc_value);
+ let Inst{18-15} = op;
+ let Inst{25-19} = format;
+ let Inst{53} = !if(ps.has_tfe, tfe, ?);
+ let Inst{54} = ps.offen;
+ let Inst{55} = ps.idxen;
+}
+
+class Base_MTBUF_Real_gfx6_gfx7_gfx10<bits<3> op, MTBUF_Pseudo ps, int ef> :
+ Base_MTBUF_Real_gfx6_gfx7_gfx10_gfx11<ps, ef> {
+ let Inst{12} = ps.offen;
+ let Inst{13} = ps.idxen;
+ let Inst{18-16} = op;
let Inst{54} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?);
let Inst{55} = !if(ps.has_tfe, tfe, ?);
- let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
}
//===----------------------------------------------------------------------===//
+// MTBUF - GFX11.
+//===----------------------------------------------------------------------===//
+
+let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in
+multiclass MTBUF_Real_AllAddr_gfx11_Renamed_Impl<bits<4> op, string real_name> {
+ def _BOTHEN_gfx11 :
+ Base_MTBUF_Real_gfx11<op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN"), real_name>;
+ def _IDXEN_gfx11 :
+ Base_MTBUF_Real_gfx11<op, !cast<MTBUF_Pseudo>(NAME#"_IDXEN"), real_name>;
+ def _OFFEN_gfx11 :
+ Base_MTBUF_Real_gfx11<op, !cast<MTBUF_Pseudo>(NAME#"_OFFEN"), real_name>;
+ def _OFFSET_gfx11 :
+ Base_MTBUF_Real_gfx11<op, !cast<MTBUF_Pseudo>(NAME#"_OFFSET"), real_name>;
+}
+
+multiclass MTBUF_Real_AllAddr_gfx11_Impl<bits<4> op, MTBUF_Pseudo ps>
+ : MTBUF_Real_AllAddr_gfx11_Renamed_Impl<op, ps.Mnemonic>;
+multiclass MTBUF_Real_AllAddr_gfx11<bits<4> op>
+ : MTBUF_Real_AllAddr_gfx11_Impl<op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN")>;
+
+
+class Pre_gfx11_MTBUF_Name <MTBUF_Pseudo ps, string real_name>
+ : MnemonicAlias<ps.Mnemonic, real_name>, Requires<[isGFX11Plus]>;
+multiclass MTBUF_Real_AllAddr_gfx11_Renamed<bits<4> op, string real_name>
+ : MTBUF_Real_AllAddr_gfx11_Renamed_Impl<op, real_name> {
+ def : Pre_gfx11_MTBUF_Name<!cast<MTBUF_Pseudo>(NAME#"_BOTHEN"), real_name>;
+}
+
+defm TBUFFER_LOAD_FORMAT_D16_X : MTBUF_Real_AllAddr_gfx11_Renamed<0x008, "tbuffer_load_d16_format_x">;
+defm TBUFFER_LOAD_FORMAT_D16_XY : MTBUF_Real_AllAddr_gfx11_Renamed<0x009, "tbuffer_load_d16_format_xy">;
+defm TBUFFER_LOAD_FORMAT_D16_XYZ : MTBUF_Real_AllAddr_gfx11_Renamed<0x00a, "tbuffer_load_d16_format_xyz">;
+defm TBUFFER_LOAD_FORMAT_D16_XYZW : MTBUF_Real_AllAddr_gfx11_Renamed<0x00b, "tbuffer_load_d16_format_xyzw">;
+defm TBUFFER_LOAD_FORMAT_X : MTBUF_Real_AllAddr_gfx11<0x000>;
+defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Real_AllAddr_gfx11<0x001>;
+defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Real_AllAddr_gfx11<0x002>;
+defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Real_AllAddr_gfx11<0x003>;
+defm TBUFFER_STORE_FORMAT_D16_X : MTBUF_Real_AllAddr_gfx11_Renamed<0x00c, "tbuffer_store_d16_format_x">;
+defm TBUFFER_STORE_FORMAT_D16_XY : MTBUF_Real_AllAddr_gfx11_Renamed<0x00d, "tbuffer_store_d16_format_xy">;
+defm TBUFFER_STORE_FORMAT_D16_XYZ : MTBUF_Real_AllAddr_gfx11_Renamed<0x00e, "tbuffer_store_d16_format_xyz">;
+defm TBUFFER_STORE_FORMAT_D16_XYZW : MTBUF_Real_AllAddr_gfx11_Renamed<0x00f, "tbuffer_store_d16_format_xyzw">;
+defm TBUFFER_STORE_FORMAT_X : MTBUF_Real_AllAddr_gfx11<0x004>;
+defm TBUFFER_STORE_FORMAT_XY : MTBUF_Real_AllAddr_gfx11<0x005>;
+defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Real_AllAddr_gfx11<0x006>;
+defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_gfx11<0x007>;
+
+//===----------------------------------------------------------------------===//
// MTBUF - GFX10.
//===----------------------------------------------------------------------===//
@@ -2197,7 +2565,7 @@ class MTBUF_Real_gfx10<bits<4> op, MTBUF_Pseudo ps> :
let Inst{53} = op{3};
}
-let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
+let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
multiclass MTBUF_Real_AllAddr_gfx10<bits<4> op> {
def _BOTHEN_gfx10 :
MTBUF_Real_gfx10<op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN")>;
@@ -2208,7 +2576,7 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
def _OFFSET_gfx10 :
MTBUF_Real_gfx10<op, !cast<MTBUF_Pseudo>(NAME#"_OFFSET")>;
}
-} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
+} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10"
defm TBUFFER_LOAD_FORMAT_D16_X : MTBUF_Real_AllAddr_gfx10<0x008>;
defm TBUFFER_LOAD_FORMAT_D16_XY : MTBUF_Real_AllAddr_gfx10<0x009>;
@@ -2303,9 +2671,28 @@ class MUBUF_Real_gfx90a <bits<7> op, MUBUF_Pseudo ps,
let Inst{55} = acc;
}
+class MUBUF_Real_gfx940 <bits<7> op, MUBUF_Pseudo ps> :
+ MUBUF_Real_Base_vi<op, ps, SIEncodingFamily.GFX940> {
+ let AssemblerPredicate = isGFX940Plus;
+ let DecoderNamespace = "GFX9";
+ let AsmString = ps.Mnemonic # !subst("$tfe", "", ps.AsmOperands);
+
+ let Inst{55} = acc;
+}
+
multiclass MUBUF_Real_vi_gfx90a<bits<7> op, MUBUF_Pseudo ps> {
def _vi : MUBUF_Real_vi<op, ps>;
- def _gfx90a : MUBUF_Real_gfx90a<op, ps, !and(ps.has_sccb,!not(ps.FPAtomic))>;
+
+ foreach _ = BoolToList<!not(ps.FPAtomic)>.ret in
+ def _gfx90a : MUBUF_Real_gfx90a<op, ps>;
+
+ foreach _ = BoolToList<ps.FPAtomic>.ret in {
+ def _gfx90a : MUBUF_Real_gfx90a<op, ps, 0> {
+ let SubtargetPredicate = isGFX90AOnly;
+ let AssemblerPredicate = isGFX90AOnly;
+ }
+ def _gfx940 : MUBUF_Real_gfx940<op, ps>;
+ }
}
multiclass MUBUF_Real_AllAddr_vi<bits<7> op> {
@@ -2317,41 +2704,25 @@ multiclass MUBUF_Real_AllAddr_vi<bits<7> op> {
multiclass MUBUF_Real_AllAddr_Lds_vi<bits<7> op> {
- def _OFFSET_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>,
- MUBUFLdsTable<0, NAME # "_OFFSET_vi">;
- def _OFFEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>,
- MUBUFLdsTable<0, NAME # "_OFFEN_vi">;
- def _IDXEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>,
- MUBUFLdsTable<0, NAME # "_IDXEN_vi">;
- def _BOTHEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>,
- MUBUFLdsTable<0, NAME # "_BOTHEN_vi">;
-
- def _LDS_OFFSET_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFSET")>,
- MUBUFLdsTable<1, NAME # "_OFFSET_vi">;
- def _LDS_OFFEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFEN")>,
- MUBUFLdsTable<1, NAME # "_OFFEN_vi">;
- def _LDS_IDXEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_IDXEN")>,
- MUBUFLdsTable<1, NAME # "_IDXEN_vi">;
- def _LDS_BOTHEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>,
- MUBUFLdsTable<1, NAME # "_BOTHEN_vi">;
-
- def _OFFSET_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>,
- MUBUFLdsTable<0, NAME # "_OFFSET_gfx90a">;
- def _OFFEN_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>,
- MUBUFLdsTable<0, NAME # "_OFFEN_gfx90a">;
- def _IDXEN_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>,
- MUBUFLdsTable<0, NAME # "_IDXEN_gfx90a">;
- def _BOTHEN_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>,
- MUBUFLdsTable<0, NAME # "_BOTHEN_gfx90a">;
-
- def _LDS_OFFSET_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFSET")>,
- MUBUFLdsTable<1, NAME # "_OFFSET_gfx90a">;
- def _LDS_OFFEN_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFEN")>,
- MUBUFLdsTable<1, NAME # "_OFFEN_gfx90a">;
- def _LDS_IDXEN_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_IDXEN")>,
- MUBUFLdsTable<1, NAME # "_IDXEN_gfx90a">;
- def _LDS_BOTHEN_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>,
- MUBUFLdsTable<1, NAME # "_BOTHEN_gfx90a">;
+ def _OFFSET_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>;
+ def _OFFEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>;
+ def _IDXEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>;
+ def _BOTHEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>;
+
+ def _LDS_OFFSET_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFSET")>;
+ def _LDS_OFFEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFEN")>;
+ def _LDS_IDXEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_IDXEN")>;
+ def _LDS_BOTHEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>;
+
+ def _OFFSET_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>;
+ def _OFFEN_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>;
+ def _IDXEN_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>;
+ def _BOTHEN_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>;
+
+ def _LDS_OFFSET_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFSET")>;
+ def _LDS_OFFEN_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFEN")>;
+ def _LDS_IDXEN_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_IDXEN")>;
+ def _LDS_BOTHEN_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>;
}
class MUBUF_Real_gfx80 <bits<7> op, MUBUF_Pseudo ps> :
@@ -2424,9 +2795,9 @@ defm BUFFER_LOAD_SBYTE : MUBUF_Real_AllAddr_Lds_vi <0x11>;
defm BUFFER_LOAD_USHORT : MUBUF_Real_AllAddr_Lds_vi <0x12>;
defm BUFFER_LOAD_SSHORT : MUBUF_Real_AllAddr_Lds_vi <0x13>;
defm BUFFER_LOAD_DWORD : MUBUF_Real_AllAddr_Lds_vi <0x14>;
-defm BUFFER_LOAD_DWORDX2 : MUBUF_Real_AllAddr_Lds_vi <0x15>;
-defm BUFFER_LOAD_DWORDX3 : MUBUF_Real_AllAddr_Lds_vi <0x16>;
-defm BUFFER_LOAD_DWORDX4 : MUBUF_Real_AllAddr_Lds_vi <0x17>;
+defm BUFFER_LOAD_DWORDX2 : MUBUF_Real_AllAddr_vi <0x15>;
+defm BUFFER_LOAD_DWORDX3 : MUBUF_Real_AllAddr_vi <0x16>;
+defm BUFFER_LOAD_DWORDX4 : MUBUF_Real_AllAddr_vi <0x17>;
defm BUFFER_STORE_BYTE : MUBUF_Real_AllAddr_vi <0x18>;
defm BUFFER_STORE_BYTE_D16_HI : MUBUF_Real_AllAddr_vi <0x19>;
defm BUFFER_STORE_SHORT : MUBUF_Real_AllAddr_vi <0x1a>;
@@ -2481,12 +2852,12 @@ def BUFFER_WBINVL1_vi : MUBUF_Real_vi <0x3e, BUFFER_WBINVL1>;
def BUFFER_WBINVL1_VOL_vi : MUBUF_Real_vi <0x3f, BUFFER_WBINVL1_VOL>;
} // End AssemblerPredicate = isGFX8GFX9
-let SubtargetPredicate = HasAtomicFaddInsts in {
+let SubtargetPredicate = HasAtomicFaddNoRtnInsts in {
defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Real_Atomic_vi <0x4d>;
defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Real_Atomic_vi <0x4e>;
-} // End SubtargetPredicate = HasAtomicFaddInsts
+} // End SubtargetPredicate = HasAtomicFaddNoRtnInsts
let SubtargetPredicate = isGFX90APlus in {
defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Real_Atomic_vi<0x4f>;
@@ -2495,9 +2866,17 @@ let SubtargetPredicate = isGFX90APlus in {
} // End SubtargetPredicate = isGFX90APlus, AssemblerPredicate = isGFX90APlus
def BUFFER_WBL2_gfx90a : MUBUF_Real_gfx90a<0x28, BUFFER_WBL2> {
+ let AsmString = BUFFER_WBL2.Mnemonic; // drop flags
+ let AssemblerPredicate = isGFX90AOnly;
+ let SubtargetPredicate = isGFX90AOnly;
}
def BUFFER_INVL2_gfx90a : MUBUF_Real_gfx90a<0x29, BUFFER_INVL2>;
+let SubtargetPredicate = isGFX940Plus in {
+def BUFFER_WBL2_gfx940 : MUBUF_Real_gfx940<0x28, BUFFER_WBL2>;
+def BUFFER_INV_gfx940 : MUBUF_Real_gfx940<0x29, BUFFER_INV>;
+}
+
class MTBUF_Real_Base_vi <bits<4> op, MTBUF_Pseudo ps, int Enc> :
MTBUF_Real<ps>,
Enc64,
diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td
index c4043177b618..27b723875aa4 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -52,8 +52,8 @@ class DS_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> patt
let Uses = !if(has_m0_read, [M0, EXEC], [EXEC]);
}
-class DS_Real <DS_Pseudo ps> :
- InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
+class DS_Real <DS_Pseudo ps, string opName = ps.Mnemonic> :
+ InstSI <ps.OutOperandList, ps.InOperandList, opName # ps.AsmOperands>,
Enc64 {
let isPseudo = 0;
@@ -72,6 +72,9 @@ class DS_Real <DS_Pseudo ps> :
let IsAtomicRet = ps.IsAtomicRet;
let IsAtomicNoRet = ps.IsAtomicNoRet;
+ let Constraints = ps.Constraints;
+ let DisableEncoding = ps.DisableEncoding;
+
// encoding fields
bits<10> vdst;
bits<1> gds;
@@ -172,6 +175,22 @@ multiclass DS_1A2D_Off8_NORET_mc <string opName, RegisterClass rc = VGPR_32> {
}
}
+class DS_0A1D_RET_GDS<string opName, RegisterClass rc = VGPR_32, RegisterClass src = rc,
+ RegisterOperand dst_op = getLdStRegisterOperand<rc>.ret,
+ RegisterOperand src_op = getLdStRegisterOperand<src>.ret>
+: DS_Pseudo<opName,
+ (outs dst_op:$vdst),
+ (ins src_op:$data0, offset:$offset),
+ " $vdst, $data0$offset gds"> {
+
+ let has_addr = 0;
+ let has_data1 = 0;
+ let has_gds = 0;
+ let gdsValue = 1;
+ let AsmMatchConverter = "cvtDSGds";
+ let hasSideEffects = 1;
+}
+
class DS_1A1D_RET <string opName, RegisterClass rc = VGPR_32,
RegisterOperand data_op = getLdStRegisterOperand<rc>.ret>
: DS_Pseudo<opName,
@@ -462,6 +481,22 @@ let SubtargetPredicate = isGFX90APlus in {
defm DS_ADD_RTN_F64 : DS_1A1D_RET_mc_gfx9<"ds_add_rtn_f64", VReg_64, "ds_add_f64">;
} // End SubtargetPredicate = isGFX90APlus
+let SubtargetPredicate = isGFX940Plus in {
+ defm DS_PK_ADD_F16 : DS_1A1D_NORET_mc_gfx9<"ds_pk_add_f16">;
+ defm DS_PK_ADD_RTN_F16 : DS_1A1D_RET_mc_gfx9<"ds_pk_add_rtn_f16", VGPR_32, "ds_pk_add_f16">;
+ defm DS_PK_ADD_BF16 : DS_1A1D_NORET_mc_gfx9<"ds_pk_add_bf16">;
+ defm DS_PK_ADD_RTN_BF16 : DS_1A1D_RET_mc_gfx9<"ds_pk_add_rtn_bf16", VGPR_32, "ds_pk_add_bf16">;
+} // End SubtargetPredicate = isGFX940Plus
+
+defm DS_CMPSTORE_B32 : DS_1A2D_NORET_mc<"ds_cmpstore_b32">;
+defm DS_CMPSTORE_F32 : DS_1A2D_NORET_mc<"ds_cmpstore_f32">;
+defm DS_CMPSTORE_B64 : DS_1A2D_NORET_mc<"ds_cmpstore_b64", VReg_64>;
+defm DS_CMPSTORE_F64 : DS_1A2D_NORET_mc<"ds_cmpstore_f64", VReg_64>;
+defm DS_CMPSTORE_RTN_B32 : DS_1A2D_RET_mc<"ds_cmpstore_rtn_b32", VGPR_32, "ds_cmpstore_b32">;
+defm DS_CMPSTORE_RTN_F32 : DS_1A2D_RET_mc<"ds_cmpstore_rtn_f32", VGPR_32, "ds_cmpstore_f32">;
+defm DS_CMPSTORE_RTN_B64 : DS_1A2D_RET_mc<"ds_cmpstore_rtn_b64", VReg_64, "ds_cmpstore_b64">;
+defm DS_CMPSTORE_RTN_F64 : DS_1A2D_RET_mc<"ds_cmpstore_rtn_f64", VReg_64, "ds_cmpstore_f64">;
+
defm DS_MSKOR_B32 : DS_1A2D_NORET_mc<"ds_mskor_b32">;
defm DS_CMPST_B32 : DS_1A2D_NORET_mc<"ds_cmpst_b32">;
defm DS_CMPST_F32 : DS_1A2D_NORET_mc<"ds_cmpst_f32">;
@@ -619,6 +654,8 @@ def DS_READ_ADDTID_B32 : DS_0A_RET<"ds_read_addtid_b32">;
def DS_CONSUME : DS_0A_RET<"ds_consume">;
def DS_APPEND : DS_0A_RET<"ds_append">;
+
+let SubtargetPredicate = isNotGFX90APlus in
def DS_ORDERED_COUNT : DS_1A_RET_GDS<"ds_ordered_count">;
//===----------------------------------------------------------------------===//
@@ -667,6 +704,18 @@ let SubtargetPredicate = HasLDSFPAtomicAdd, OtherPredicates = [HasDsSrc2Insts] i
def DS_ADD_SRC2_F32 : DS_1A<"ds_add_src2_f32">;
}
+
+//===----------------------------------------------------------------------===//
+// Instruction definitions for GFX11 and newer.
+//===----------------------------------------------------------------------===//
+
+let SubtargetPredicate = isGFX11Plus in {
+
+def DS_ADD_GS_REG_RTN : DS_0A1D_RET_GDS<"ds_add_gs_reg_rtn", VReg_64, VGPR_32>;
+def DS_SUB_GS_REG_RTN : DS_0A1D_RET_GDS<"ds_sub_gs_reg_rtn", VReg_64, VGPR_32>;
+
+} // let SubtargetPredicate = isGFX11Plus
+
//===----------------------------------------------------------------------===//
// DS Patterns
//===----------------------------------------------------------------------===//
@@ -777,14 +826,14 @@ foreach vt = Reg32Types.types in {
defm : DSWritePat_mc <DS_WRITE_B32, vt, "store_local">;
}
-defm : DSAtomicWritePat_mc <DS_WRITE_B8, i16, "atomic_store_local_8">;
-defm : DSAtomicWritePat_mc <DS_WRITE_B8, i32, "atomic_store_local_8">;
-defm : DSAtomicWritePat_mc <DS_WRITE_B16, i16, "atomic_store_local_16">;
-defm : DSAtomicWritePat_mc <DS_WRITE_B16, i32, "atomic_store_local_16">;
-defm : DSAtomicWritePat_mc <DS_WRITE_B32, i32, "atomic_store_local_32">;
-defm : DSAtomicWritePat_mc <DS_WRITE_B64, i64, "atomic_store_local_64">;
+defm : DSAtomicWritePat_mc <DS_WRITE_B8, i16, "atomic_store_8_local">;
+defm : DSAtomicWritePat_mc <DS_WRITE_B8, i32, "atomic_store_8_local">;
+defm : DSAtomicWritePat_mc <DS_WRITE_B16, i16, "atomic_store_16_local">;
+defm : DSAtomicWritePat_mc <DS_WRITE_B16, i32, "atomic_store_16_local">;
+defm : DSAtomicWritePat_mc <DS_WRITE_B32, i32, "atomic_store_32_local">;
+defm : DSAtomicWritePat_mc <DS_WRITE_B64, i64, "atomic_store_64_local">;
-let OtherPredicates = [D16PreservesUnusedBits] in {
+let OtherPredicates = [HasD16LoadStore] in {
def : DSWritePat <DS_WRITE_B16_D16_HI, i32, store_hi16_local>;
def : DSWritePat <DS_WRITE_B8_D16_HI, i32, truncstorei8_hi16_local>;
}
@@ -870,15 +919,30 @@ defm : DSWritePat_mc <DS_WRITE_B128, vt, "store_align16_local">;
let SubtargetPredicate = HasUnalignedAccessMode in {
-// FIXME: From performance point of view, is ds_read_b96/ds_write_b96 better choice
-// for unaligned accesses?
+// Select 64 bit loads and stores aligned less than 4 as a single ds_read_b64/
+// ds_write_b64 instruction as this is faster than ds_read2_b32/ds_write2_b32
+// which would be used otherwise. In this case a b32 access would still be
+// misaligned, but we will have 2 of them.
+foreach vt = VReg_64.RegTypes in {
+defm : DSReadPat_mc <DS_READ_B64, vt, "load_align_less_than_4_local">;
+defm : DSWritePat_mc <DS_WRITE_B64, vt, "store_align_less_than_4_local">;
+}
+
+// Selection will split most of the unaligned 3 dword accesses due to performance
+// reasons when beneficial. Keep these two patterns for the rest of the cases.
foreach vt = VReg_96.RegTypes in {
defm : DSReadPat_mc <DS_READ_B96, vt, "load_local">;
defm : DSWritePat_mc <DS_WRITE_B96, vt, "store_local">;
}
-// For performance reasons, *do not* select ds_read_b128/ds_write_b128 for unaligned
-// accesses.
+// Select 128 bit loads and stores aligned less than 4 as a single ds_read_b128/
+// ds_write_b128 instruction as this is faster than ds_read2_b64/ds_write2_b64
+// which would be used otherwise. In this case a b64 access would still be
+// misaligned, but we will have 2 of them.
+foreach vt = VReg_128.RegTypes in {
+defm : DSReadPat_mc <DS_READ_B128, vt, "load_align_less_than_4_local">;
+defm : DSWritePat_mc <DS_WRITE_B128, vt, "store_align_less_than_4_local">;
+}
} // End SubtargetPredicate = HasUnalignedAccessMode
@@ -904,69 +968,143 @@ multiclass DSAtomicRetPat_mc<DS_Pseudo inst, ValueType vt, string frag> {
def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size), 1>;
}
+multiclass DSAtomicRetNoRetPat_mc<DS_Pseudo inst, DS_Pseudo noRetInst,
+ ValueType vt, string frag> {
+ let OtherPredicates = [LDSRequiresM0Init] in {
+ def : DSAtomicRetPat<inst, vt,
+ !cast<PatFrag>(frag#"_local_m0_ret_"#vt.Size)>;
+ def : DSAtomicRetPat<noRetInst, vt,
+ !cast<PatFrag>(frag#"_local_m0_noret_"#vt.Size)>;
+ }
+
+ let OtherPredicates = [NotLDSRequiresM0Init] in {
+ def : DSAtomicRetPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
+ !cast<PatFrag>(frag#"_local_ret_"#vt.Size)>;
+ def : DSAtomicRetPat<!cast<DS_Pseudo>(!cast<string>(noRetInst)#"_gfx9"), vt,
+ !cast<PatFrag>(frag#"_local_noret_"#vt.Size)>;
+ }
+ def : DSAtomicRetPat<inst, vt,
+ !cast<PatFrag>(frag#"_region_m0_ret_"#vt.Size), 1>;
+ def : DSAtomicRetPat<noRetInst, vt,
+ !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size), 1>;
+}
-class DSAtomicCmpXChg<DS_Pseudo inst, ValueType vt, PatFrag frag, bit gds=0> : GCNPat <
+
+
+let SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 in {
+// Caution, the order of src and cmp is the *opposite* of the BUFFER_ATOMIC_CMPSWAP opcode.
+class DSAtomicCmpXChgSwapped<DS_Pseudo inst, ValueType vt, PatFrag frag, bit gds=0> : GCNPat <
(frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$cmp, vt:$swap),
(inst $ptr, getVregSrcForVT<vt>.ret:$cmp, getVregSrcForVT<vt>.ret:$swap, offset:$offset, (i1 gds))
>;
-multiclass DSAtomicCmpXChg_mc<DS_Pseudo inst, ValueType vt, string frag> {
+multiclass DSAtomicCmpXChgSwapped_mc<DS_Pseudo inst, DS_Pseudo noRetInst, ValueType vt,
+ string frag> {
let OtherPredicates = [LDSRequiresM0Init] in {
- def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_local_m0_"#vt.Size)>;
+ def : DSAtomicCmpXChgSwapped<inst, vt, !cast<PatFrag>(frag#"_local_m0_ret_"#vt.Size)>;
+ def : DSAtomicCmpXChgSwapped<noRetInst, vt, !cast<PatFrag>(frag#"_local_m0_noret_"#vt.Size)>;
}
let OtherPredicates = [NotLDSRequiresM0Init] in {
- def : DSAtomicCmpXChg<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
- !cast<PatFrag>(frag#"_local_"#vt.Size)>;
+ def : DSAtomicCmpXChgSwapped<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
+ !cast<PatFrag>(frag#"_local_ret_"#vt.Size)>;
+ def : DSAtomicCmpXChgSwapped<!cast<DS_Pseudo>(!cast<string>(noRetInst)#"_gfx9"), vt,
+ !cast<PatFrag>(frag#"_local_noret_"#vt.Size)>;
}
- def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size), 1>;
+ def : DSAtomicCmpXChgSwapped<inst, vt, !cast<PatFrag>(frag#"_region_m0_ret_"#vt.Size), 1>;
+ def : DSAtomicCmpXChgSwapped<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size), 1>;
}
+} // End SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10
+
+let SubtargetPredicate = isGFX11Plus in {
+// The order of src and cmp agrees with the BUFFER_ATOMIC_CMPSWAP opcode.
+class DSAtomicCmpXChg<DS_Pseudo inst, ValueType vt, PatFrag frag, bit gds=0> : GCNPat <
+ (frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$cmp, vt:$swap),
+ (inst $ptr, getVregSrcForVT<vt>.ret:$swap, getVregSrcForVT<vt>.ret:$cmp, offset:$offset, (i1 gds))
+>;
+multiclass DSAtomicCmpXChg_mc<DS_Pseudo inst, DS_Pseudo noRetInst, ValueType vt, string frag> {
+ def : DSAtomicCmpXChg<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
+ !cast<PatFrag>(frag#"_local_ret_"#vt.Size)>;
+ def : DSAtomicCmpXChg<!cast<DS_Pseudo>(!cast<string>(noRetInst)#"_gfx9"), vt,
+ !cast<PatFrag>(frag#"_local_noret_"#vt.Size)>;
+
+ def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_region_m0_ret_"#vt.Size), 1>;
+ def : DSAtomicCmpXChg<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size), 1>;
+}
+} // End SubtargetPredicate = isGFX11Plus
// 32-bit atomics.
defm : DSAtomicRetPat_mc<DS_WRXCHG_RTN_B32, i32, "atomic_swap">;
-defm : DSAtomicRetPat_mc<DS_ADD_RTN_U32, i32, "atomic_load_add">;
-defm : DSAtomicRetPat_mc<DS_SUB_RTN_U32, i32, "atomic_load_sub">;
-defm : DSAtomicRetPat_mc<DS_INC_RTN_U32, i32, "atomic_inc">;
-defm : DSAtomicRetPat_mc<DS_DEC_RTN_U32, i32, "atomic_dec">;
-defm : DSAtomicRetPat_mc<DS_AND_RTN_B32, i32, "atomic_load_and">;
-defm : DSAtomicRetPat_mc<DS_OR_RTN_B32, i32, "atomic_load_or">;
-defm : DSAtomicRetPat_mc<DS_XOR_RTN_B32, i32, "atomic_load_xor">;
-defm : DSAtomicRetPat_mc<DS_MIN_RTN_I32, i32, "atomic_load_min">;
-defm : DSAtomicRetPat_mc<DS_MAX_RTN_I32, i32, "atomic_load_max">;
-defm : DSAtomicRetPat_mc<DS_MIN_RTN_U32, i32, "atomic_load_umin">;
-defm : DSAtomicRetPat_mc<DS_MAX_RTN_U32, i32, "atomic_load_umax">;
-defm : DSAtomicRetPat_mc<DS_MIN_RTN_F32, f32, "atomic_load_fmin">;
-defm : DSAtomicRetPat_mc<DS_MAX_RTN_F32, f32, "atomic_load_fmax">;
-defm : DSAtomicCmpXChg_mc<DS_CMPST_RTN_B32, i32, "atomic_cmp_swap">;
+defm : DSAtomicRetNoRetPat_mc<DS_ADD_RTN_U32, DS_ADD_U32, i32, "atomic_load_add">;
+defm : DSAtomicRetNoRetPat_mc<DS_SUB_RTN_U32, DS_SUB_U32, i32, "atomic_load_sub">;
+defm : DSAtomicRetNoRetPat_mc<DS_INC_RTN_U32, DS_INC_U32, i32, "atomic_inc">;
+defm : DSAtomicRetNoRetPat_mc<DS_DEC_RTN_U32, DS_DEC_U32, i32, "atomic_dec">;
+defm : DSAtomicRetNoRetPat_mc<DS_AND_RTN_B32, DS_AND_B32, i32, "atomic_load_and">;
+defm : DSAtomicRetNoRetPat_mc<DS_OR_RTN_B32, DS_OR_B32, i32, "atomic_load_or">;
+defm : DSAtomicRetNoRetPat_mc<DS_XOR_RTN_B32, DS_XOR_B32, i32, "atomic_load_xor">;
+defm : DSAtomicRetNoRetPat_mc<DS_MIN_RTN_I32, DS_MIN_I32, i32, "atomic_load_min">;
+defm : DSAtomicRetNoRetPat_mc<DS_MAX_RTN_I32, DS_MAX_I32, i32, "atomic_load_max">;
+defm : DSAtomicRetNoRetPat_mc<DS_MIN_RTN_U32, DS_MIN_U32, i32, "atomic_load_umin">;
+defm : DSAtomicRetNoRetPat_mc<DS_MAX_RTN_U32, DS_MAX_U32, i32, "atomic_load_umax">;
+defm : DSAtomicRetNoRetPat_mc<DS_MIN_RTN_F32, DS_MIN_F32, f32, "atomic_load_fmin">;
+defm : DSAtomicRetNoRetPat_mc<DS_MAX_RTN_F32, DS_MAX_F32, f32, "atomic_load_fmax">;
+
+let SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 in {
+defm : DSAtomicCmpXChgSwapped_mc<DS_CMPST_RTN_B32, DS_CMPST_B32, i32, "atomic_cmp_swap">;
+}
+
+let SubtargetPredicate = isGFX11Plus in {
+defm : DSAtomicCmpXChg_mc<DS_CMPSTORE_RTN_B32, DS_CMPSTORE_B32, i32, "atomic_cmp_swap">;
+}
let SubtargetPredicate = HasLDSFPAtomicAdd in {
-defm : DSAtomicRetPat_mc<DS_ADD_RTN_F32, f32, "atomic_load_fadd">;
+defm : DSAtomicRetNoRetPat_mc<DS_ADD_RTN_F32, DS_ADD_F32, f32, "atomic_load_fadd">;
}
// 64-bit atomics.
defm : DSAtomicRetPat_mc<DS_WRXCHG_RTN_B64, i64, "atomic_swap">;
-defm : DSAtomicRetPat_mc<DS_ADD_RTN_U64, i64, "atomic_load_add">;
-defm : DSAtomicRetPat_mc<DS_SUB_RTN_U64, i64, "atomic_load_sub">;
-defm : DSAtomicRetPat_mc<DS_INC_RTN_U64, i64, "atomic_inc">;
-defm : DSAtomicRetPat_mc<DS_DEC_RTN_U64, i64, "atomic_dec">;
-defm : DSAtomicRetPat_mc<DS_AND_RTN_B64, i64, "atomic_load_and">;
-defm : DSAtomicRetPat_mc<DS_OR_RTN_B64, i64, "atomic_load_or">;
-defm : DSAtomicRetPat_mc<DS_XOR_RTN_B64, i64, "atomic_load_xor">;
-defm : DSAtomicRetPat_mc<DS_MIN_RTN_I64, i64, "atomic_load_min">;
-defm : DSAtomicRetPat_mc<DS_MAX_RTN_I64, i64, "atomic_load_max">;
-defm : DSAtomicRetPat_mc<DS_MIN_RTN_U64, i64, "atomic_load_umin">;
-defm : DSAtomicRetPat_mc<DS_MAX_RTN_U64, i64, "atomic_load_umax">;
-defm : DSAtomicRetPat_mc<DS_MIN_RTN_F64, f64, "atomic_load_fmin">;
-defm : DSAtomicRetPat_mc<DS_MAX_RTN_F64, f64, "atomic_load_fmax">;
-
-defm : DSAtomicCmpXChg_mc<DS_CMPST_RTN_B64, i64, "atomic_cmp_swap">;
+defm : DSAtomicRetNoRetPat_mc<DS_ADD_RTN_U64, DS_ADD_U64, i64, "atomic_load_add">;
+defm : DSAtomicRetNoRetPat_mc<DS_SUB_RTN_U64, DS_SUB_U64, i64, "atomic_load_sub">;
+defm : DSAtomicRetNoRetPat_mc<DS_INC_RTN_U64, DS_INC_U64, i64, "atomic_inc">;
+defm : DSAtomicRetNoRetPat_mc<DS_DEC_RTN_U64, DS_DEC_U64, i64, "atomic_dec">;
+defm : DSAtomicRetNoRetPat_mc<DS_AND_RTN_B64, DS_AND_B64, i64, "atomic_load_and">;
+defm : DSAtomicRetNoRetPat_mc<DS_OR_RTN_B64, DS_OR_B64, i64, "atomic_load_or">;
+defm : DSAtomicRetNoRetPat_mc<DS_XOR_RTN_B64, DS_XOR_B64, i64, "atomic_load_xor">;
+defm : DSAtomicRetNoRetPat_mc<DS_MIN_RTN_I64, DS_MIN_I64, i64, "atomic_load_min">;
+defm : DSAtomicRetNoRetPat_mc<DS_MAX_RTN_I64, DS_MAX_I64, i64, "atomic_load_max">;
+defm : DSAtomicRetNoRetPat_mc<DS_MIN_RTN_U64, DS_MIN_U64, i64, "atomic_load_umin">;
+defm : DSAtomicRetNoRetPat_mc<DS_MAX_RTN_U64, DS_MAX_U64, i64, "atomic_load_umax">;
+defm : DSAtomicRetNoRetPat_mc<DS_MIN_RTN_F64, DS_MIN_F64, f64, "atomic_load_fmin">;
+defm : DSAtomicRetNoRetPat_mc<DS_MAX_RTN_F64, DS_MAX_F64, f64, "atomic_load_fmax">;
+
+let SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 in {
+defm : DSAtomicCmpXChgSwapped_mc<DS_CMPST_RTN_B64, DS_CMPST_B64, i64, "atomic_cmp_swap">;
+} // End SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10
+
+let SubtargetPredicate = isGFX11Plus in {
+defm : DSAtomicCmpXChg_mc<DS_CMPSTORE_RTN_B64, DS_CMPSTORE_B64, i64, "atomic_cmp_swap">;
+} // End SubtargetPredicate = isGFX11Plus
let SubtargetPredicate = isGFX90APlus in {
-def : DSAtomicRetPat<DS_ADD_RTN_F64, f64, atomic_load_fadd_local_64>;
+def : DSAtomicRetPat<DS_ADD_RTN_F64, f64, atomic_load_fadd_local_ret_64>;
+def : DSAtomicRetPat<DS_ADD_F64, f64, atomic_load_fadd_local_noret_64>;
+}
+
+let SubtargetPredicate = isGFX940Plus in {
+def : DSAtomicRetPat<DS_PK_ADD_RTN_F16, v2f16, atomic_load_fadd_v2f16_local_ret_32>;
+def : DSAtomicRetPat<DS_PK_ADD_F16, v2f16, atomic_load_fadd_v2f16_local_noret_32>;
+def : GCNPat <
+ (v2i16 (int_amdgcn_ds_fadd_v2bf16_ret i32:$ptr, v2i16:$src)),
+ (DS_PK_ADD_RTN_BF16 VGPR_32:$ptr, VGPR_32:$src, 0, 0)
+>;
+def : GCNPat <
+ (v2i16 (int_amdgcn_ds_fadd_v2bf16_noret i32:$ptr, v2i16:$src)),
+ (DS_PK_ADD_BF16 VGPR_32:$ptr, VGPR_32:$src, 0, 0)
+>;
}
def : Pat <
@@ -974,16 +1112,44 @@ def : Pat <
(DS_ORDERED_COUNT $value, (as_i16imm $offset))
>;
+def : GCNPat <
+ (i64 (int_amdgcn_ds_add_gs_reg_rtn i32:$src, timm:$offset32)),
+ (DS_ADD_GS_REG_RTN VGPR_32:$src, (as_i32timm $offset32))
+>;
+
+def : GCNPat <
+ (i32 (int_amdgcn_ds_add_gs_reg_rtn i32:$src, timm:$offset32)),
+ (EXTRACT_SUBREG
+ (i64 (COPY_TO_REGCLASS
+ (DS_ADD_GS_REG_RTN VGPR_32:$src, (as_i32timm $offset32)),
+ VReg_64)),
+ sub0)
+>;
+
+def : GCNPat <
+ (i64 (int_amdgcn_ds_sub_gs_reg_rtn i32:$src, timm:$offset32)),
+ (DS_SUB_GS_REG_RTN VGPR_32:$src, (as_i32timm $offset32))
+>;
+
+def : GCNPat <
+ (i32 (int_amdgcn_ds_sub_gs_reg_rtn i32:$src, timm:$offset32)),
+ (EXTRACT_SUBREG
+ (i64 (COPY_TO_REGCLASS
+ (DS_SUB_GS_REG_RTN VGPR_32:$src, (as_i32timm $offset32)),
+ VReg_64)),
+ sub0)
+>;
+
//===----------------------------------------------------------------------===//
// Target-specific instruction encodings.
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
-// Base ENC_DS for GFX6, GFX7, GFX10.
+// Base ENC_DS for GFX6, GFX7, GFX10, GFX11.
//===----------------------------------------------------------------------===//
-class Base_DS_Real_gfx6_gfx7_gfx10<bits<8> op, DS_Pseudo ps, int ef> :
- DS_Real<ps>, SIMCInstr <ps.Mnemonic, ef> {
+class Base_DS_Real_gfx6_gfx7_gfx10_gfx11<bits<8> op, DS_Pseudo ps, int ef, string opName = ps.Mnemonic> :
+ DS_Real<ps, opName>, SIMCInstr <ps.Mnemonic, ef> {
let Inst{7-0} = !if(ps.has_offset0, offset0, 0);
let Inst{15-8} = !if(ps.has_offset1, offset1, 0);
@@ -997,19 +1163,89 @@ class Base_DS_Real_gfx6_gfx7_gfx10<bits<8> op, DS_Pseudo ps, int ef> :
}
//===----------------------------------------------------------------------===//
+// GFX11.
+//===----------------------------------------------------------------------===//
+
+let AssemblerPredicate = isGFX11Plus, DecoderNamespace = "GFX11" in {
+ multiclass DS_Real_gfx11<bits<8> op> {
+ def _gfx11 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11<op, !cast<DS_Pseudo>(NAME),
+ SIEncodingFamily.GFX11>;
+ }
+
+ multiclass DS_Real_Renamed_gfx11<bits<8> op, DS_Pseudo backing_pseudo, string real_name> {
+ def _gfx11 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11<op, backing_pseudo, SIEncodingFamily.GFX11, real_name>,
+ MnemonicAlias<backing_pseudo.Mnemonic, real_name>, Requires<[isGFX11Plus]>;
+ }
+} // End AssemblerPredicate = isGFX11Plus, DecoderNamespace = "GFX11"
+
+defm DS_STORE_B32 : DS_Real_Renamed_gfx11<0x00d, DS_WRITE_B32, "ds_store_b32">;
+defm DS_STORE_2ADDR_B32 : DS_Real_Renamed_gfx11<0x00e, DS_WRITE2_B32, "ds_store_2addr_b32">;
+defm DS_STORE_2ADDR_STRIDE64_B32 : DS_Real_Renamed_gfx11<0x00f, DS_WRITE2ST64_B32, "ds_store_2addr_stride64_b32">;
+defm DS_STORE_B8 : DS_Real_Renamed_gfx11<0x01e, DS_WRITE_B8, "ds_store_b8">;
+defm DS_STORE_B16 : DS_Real_Renamed_gfx11<0x01f, DS_WRITE_B16, "ds_store_b16">;
+defm DS_STOREXCHG_RTN_B32 : DS_Real_Renamed_gfx11<0x02d, DS_WRXCHG_RTN_B32, "ds_storexchg_rtn_b32">;
+defm DS_STOREXCHG_2ADDR_RTN_B32 : DS_Real_Renamed_gfx11<0x02e, DS_WRXCHG2_RTN_B32, "ds_storexchg_2addr_rtn_b32">;
+defm DS_STOREXCHG_2ADDR_STRIDE64_RTN_B32 : DS_Real_Renamed_gfx11<0x02f, DS_WRXCHG2ST64_RTN_B32, "ds_storexchg_2addr_stride64_rtn_b32">;
+defm DS_LOAD_B32 : DS_Real_Renamed_gfx11<0x036, DS_READ_B32, "ds_load_b32">;
+defm DS_LOAD_2ADDR_B32 : DS_Real_Renamed_gfx11<0x037, DS_READ2_B32, "ds_load_2addr_b32">;
+defm DS_LOAD_2ADDR_STRIDE64_B32 : DS_Real_Renamed_gfx11<0x038, DS_READ2ST64_B32, "ds_load_2addr_stride64_b32">;
+defm DS_LOAD_I8 : DS_Real_Renamed_gfx11<0x039, DS_READ_I8, "ds_load_i8">;
+defm DS_LOAD_U8 : DS_Real_Renamed_gfx11<0x03a, DS_READ_U8, "ds_load_u8">;
+defm DS_LOAD_I16 : DS_Real_Renamed_gfx11<0x03b, DS_READ_I16, "ds_load_i16">;
+defm DS_LOAD_U16 : DS_Real_Renamed_gfx11<0x03c, DS_READ_U16, "ds_load_u16">;
+defm DS_STORE_B64 : DS_Real_Renamed_gfx11<0x04d, DS_WRITE_B64, "ds_store_b64">;
+defm DS_STORE_2ADDR_B64 : DS_Real_Renamed_gfx11<0x04e, DS_WRITE2_B64, "ds_store_2addr_b64">;
+defm DS_STORE_2ADDR_STRIDE64_B64 : DS_Real_Renamed_gfx11<0x04f, DS_WRITE2ST64_B64, "ds_store_2addr_stride64_b64">;
+defm DS_STOREXCHG_RTN_B64 : DS_Real_Renamed_gfx11<0x06d, DS_WRXCHG_RTN_B64, "ds_storexchg_rtn_b64">;
+defm DS_STOREXCHG_2ADDR_RTN_B64 : DS_Real_Renamed_gfx11<0x06e, DS_WRXCHG2_RTN_B64, "ds_storexchg_2addr_rtn_b64">;
+defm DS_STOREXCHG_2ADDR_STRIDE64_RTN_B64 : DS_Real_Renamed_gfx11<0x06f, DS_WRXCHG2ST64_RTN_B64, "ds_storexchg_2addr_stride64_rtn_b64">;
+defm DS_LOAD_B64 : DS_Real_Renamed_gfx11<0x076, DS_READ_B64, "ds_load_b64">;
+defm DS_LOAD_2ADDR_B64 : DS_Real_Renamed_gfx11<0x077, DS_READ2_B64, "ds_load_2addr_b64">;
+defm DS_LOAD_2ADDR_STRIDE64_B64 : DS_Real_Renamed_gfx11<0x078, DS_READ2ST64_B64, "ds_load_2addr_stride64_b64">;
+defm DS_STORE_B8_D16_HI : DS_Real_Renamed_gfx11<0x0a0, DS_WRITE_B8_D16_HI, "ds_store_b8_d16_hi">;
+defm DS_STORE_B16_D16_HI : DS_Real_Renamed_gfx11<0x0a1, DS_WRITE_B16_D16_HI, "ds_store_b16_d16_hi">;
+defm DS_LOAD_U8_D16 : DS_Real_Renamed_gfx11<0x0a2, DS_READ_U8_D16, "ds_load_u8_d16">;
+defm DS_LOAD_U8_D16_HI : DS_Real_Renamed_gfx11<0x0a3, DS_READ_U8_D16_HI, "ds_load_u8_d16_hi">;
+defm DS_LOAD_I8_D16 : DS_Real_Renamed_gfx11<0x0a4, DS_READ_I8_D16, "ds_load_i8_d16">;
+defm DS_LOAD_I8_D16_HI : DS_Real_Renamed_gfx11<0x0a5, DS_READ_I8_D16_HI, "ds_load_i8_d16_hi">;
+defm DS_LOAD_U16_D16 : DS_Real_Renamed_gfx11<0x0a6, DS_READ_U16_D16, "ds_load_u16_d16">;
+defm DS_LOAD_U16_D16_HI : DS_Real_Renamed_gfx11<0x0a7, DS_READ_U16_D16_HI, "ds_load_u16_d16_hi">;
+defm DS_STORE_ADDTID_B32 : DS_Real_Renamed_gfx11<0x0b0, DS_WRITE_ADDTID_B32, "ds_store_addtid_b32">;
+defm DS_LOAD_ADDTID_B32 : DS_Real_Renamed_gfx11<0x0b1, DS_READ_ADDTID_B32, "ds_load_addtid_b32">;
+defm DS_STORE_B96 : DS_Real_Renamed_gfx11<0x0de, DS_WRITE_B96, "ds_store_b96">;
+defm DS_STORE_B128 : DS_Real_Renamed_gfx11<0x0df, DS_WRITE_B128, "ds_store_b128">;
+defm DS_LOAD_B96 : DS_Real_Renamed_gfx11<0x0fe, DS_READ_B96, "ds_load_b96">;
+defm DS_LOAD_B128 : DS_Real_Renamed_gfx11<0x0ff, DS_READ_B128, "ds_load_b128">;
+
+// DS_CMPST_* are renamed to DS_CMPSTORE_* in GFX11, but also the data operands (src and cmp) are swapped
+// comparing to pre-GFX11.
+// Note: the mnemonic alias is not generated to avoid a potential ambiguity due to the semantics change.
+
+defm DS_CMPSTORE_B32 : DS_Real_gfx11<0x010>;
+defm DS_CMPSTORE_F32 : DS_Real_gfx11<0x011>;
+defm DS_CMPSTORE_RTN_B32 : DS_Real_gfx11<0x030>;
+defm DS_CMPSTORE_RTN_F32 : DS_Real_gfx11<0x031>;
+defm DS_CMPSTORE_B64 : DS_Real_gfx11<0x050>;
+defm DS_CMPSTORE_F64 : DS_Real_gfx11<0x051>;
+defm DS_CMPSTORE_RTN_B64 : DS_Real_gfx11<0x070>;
+defm DS_CMPSTORE_RTN_F64 : DS_Real_gfx11<0x071>;
+
+defm DS_ADD_RTN_F32 : DS_Real_gfx11<0x079>;
+defm DS_ADD_GS_REG_RTN : DS_Real_gfx11<0x07a>;
+defm DS_SUB_GS_REG_RTN : DS_Real_gfx11<0x07b>;
+
+//===----------------------------------------------------------------------===//
// GFX10.
//===----------------------------------------------------------------------===//
-let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
+let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
multiclass DS_Real_gfx10<bits<8> op> {
- def _gfx10 : Base_DS_Real_gfx6_gfx7_gfx10<op, !cast<DS_Pseudo>(NAME),
+ def _gfx10 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11<op, !cast<DS_Pseudo>(NAME),
SIEncodingFamily.GFX10>;
}
-} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
+} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10"
-defm DS_ADD_F32 : DS_Real_gfx10<0x015>;
defm DS_ADD_RTN_F32 : DS_Real_gfx10<0x055>;
-defm DS_ADD_SRC2_F32 : DS_Real_gfx10<0x095>;
defm DS_WRITE_B8_D16_HI : DS_Real_gfx10<0x0a0>;
defm DS_WRITE_B16_D16_HI : DS_Real_gfx10<0x0a1>;
defm DS_READ_U8_D16 : DS_Real_gfx10<0x0a2>;
@@ -1020,95 +1256,118 @@ defm DS_READ_U16_D16 : DS_Real_gfx10<0x0a6>;
defm DS_READ_U16_D16_HI : DS_Real_gfx10<0x0a7>;
defm DS_WRITE_ADDTID_B32 : DS_Real_gfx10<0x0b0>;
defm DS_READ_ADDTID_B32 : DS_Real_gfx10<0x0b1>;
-defm DS_PERMUTE_B32 : DS_Real_gfx10<0x0b2>;
-defm DS_BPERMUTE_B32 : DS_Real_gfx10<0x0b3>;
//===----------------------------------------------------------------------===//
-// GFX7, GFX10.
+// GFX10, GFX11.
+//===----------------------------------------------------------------------===//
+
+multiclass DS_Real_gfx10_gfx11<bits<8> op> :
+ DS_Real_gfx10<op>, DS_Real_gfx11<op>;
+
+defm DS_ADD_F32 : DS_Real_gfx10_gfx11<0x015>;
+defm DS_ADD_SRC2_F32 : DS_Real_gfx10<0x095>;
+defm DS_PERMUTE_B32 : DS_Real_gfx10_gfx11<0x0b2>;
+defm DS_BPERMUTE_B32 : DS_Real_gfx10_gfx11<0x0b3>;
+
+//===----------------------------------------------------------------------===//
+// GFX7, GFX10, GFX11.
//===----------------------------------------------------------------------===//
let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in {
multiclass DS_Real_gfx7<bits<8> op> {
- def _gfx7 : Base_DS_Real_gfx6_gfx7_gfx10<op, !cast<DS_Pseudo>(NAME),
+ def _gfx7 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11<op, !cast<DS_Pseudo>(NAME),
SIEncodingFamily.SI>;
}
} // End AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7"
+multiclass DS_Real_gfx7_gfx10_gfx11<bits<8> op> :
+ DS_Real_gfx7<op>, DS_Real_gfx10_gfx11<op>;
+
multiclass DS_Real_gfx7_gfx10<bits<8> op> :
DS_Real_gfx7<op>, DS_Real_gfx10<op>;
// FIXME-GFX7: Add tests when upstreaming this part.
-defm DS_GWS_SEMA_RELEASE_ALL : DS_Real_gfx7_gfx10<0x018>;
-defm DS_WRAP_RTN_B32 : DS_Real_gfx7_gfx10<0x034>;
-defm DS_CONDXCHG32_RTN_B64 : DS_Real_gfx7_gfx10<0x07e>;
+defm DS_GWS_SEMA_RELEASE_ALL : DS_Real_gfx7_gfx10_gfx11<0x018>;
+defm DS_WRAP_RTN_B32 : DS_Real_gfx7_gfx10_gfx11<0x034>;
+defm DS_CONDXCHG32_RTN_B64 : DS_Real_gfx7_gfx10_gfx11<0x07e>;
defm DS_WRITE_B96 : DS_Real_gfx7_gfx10<0x0de>;
defm DS_WRITE_B128 : DS_Real_gfx7_gfx10<0x0df>;
defm DS_READ_B96 : DS_Real_gfx7_gfx10<0x0fe>;
defm DS_READ_B128 : DS_Real_gfx7_gfx10<0x0ff>;
//===----------------------------------------------------------------------===//
-// GFX6, GFX7, GFX10.
+// GFX6, GFX7, GFX10, GFX11.
//===----------------------------------------------------------------------===//
let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
multiclass DS_Real_gfx6_gfx7<bits<8> op> {
- def _gfx6_gfx7 : Base_DS_Real_gfx6_gfx7_gfx10<op, !cast<DS_Pseudo>(NAME),
+ def _gfx6_gfx7 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11<op, !cast<DS_Pseudo>(NAME),
SIEncodingFamily.SI>;
}
} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
+multiclass DS_Real_gfx6_gfx7_gfx10_gfx11<bits<8> op> :
+ DS_Real_gfx6_gfx7<op>, DS_Real_gfx10_gfx11<op>;
+
multiclass DS_Real_gfx6_gfx7_gfx10<bits<8> op> :
DS_Real_gfx6_gfx7<op>, DS_Real_gfx10<op>;
-defm DS_ADD_U32 : DS_Real_gfx6_gfx7_gfx10<0x000>;
-defm DS_SUB_U32 : DS_Real_gfx6_gfx7_gfx10<0x001>;
-defm DS_RSUB_U32 : DS_Real_gfx6_gfx7_gfx10<0x002>;
-defm DS_INC_U32 : DS_Real_gfx6_gfx7_gfx10<0x003>;
-defm DS_DEC_U32 : DS_Real_gfx6_gfx7_gfx10<0x004>;
-defm DS_MIN_I32 : DS_Real_gfx6_gfx7_gfx10<0x005>;
-defm DS_MAX_I32 : DS_Real_gfx6_gfx7_gfx10<0x006>;
-defm DS_MIN_U32 : DS_Real_gfx6_gfx7_gfx10<0x007>;
-defm DS_MAX_U32 : DS_Real_gfx6_gfx7_gfx10<0x008>;
-defm DS_AND_B32 : DS_Real_gfx6_gfx7_gfx10<0x009>;
-defm DS_OR_B32 : DS_Real_gfx6_gfx7_gfx10<0x00a>;
-defm DS_XOR_B32 : DS_Real_gfx6_gfx7_gfx10<0x00b>;
-defm DS_MSKOR_B32 : DS_Real_gfx6_gfx7_gfx10<0x00c>;
+defm DS_ADD_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x000>;
+defm DS_SUB_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x001>;
+defm DS_RSUB_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x002>;
+defm DS_INC_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x003>;
+defm DS_DEC_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x004>;
+defm DS_MIN_I32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x005>;
+defm DS_MAX_I32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x006>;
+defm DS_MIN_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x007>;
+defm DS_MAX_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x008>;
+defm DS_AND_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x009>;
+defm DS_OR_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x00a>;
+defm DS_XOR_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x00b>;
+defm DS_MSKOR_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x00c>;
+
defm DS_WRITE_B32 : DS_Real_gfx6_gfx7_gfx10<0x00d>;
defm DS_WRITE2_B32 : DS_Real_gfx6_gfx7_gfx10<0x00e>;
defm DS_WRITE2ST64_B32 : DS_Real_gfx6_gfx7_gfx10<0x00f>;
defm DS_CMPST_B32 : DS_Real_gfx6_gfx7_gfx10<0x010>;
defm DS_CMPST_F32 : DS_Real_gfx6_gfx7_gfx10<0x011>;
-defm DS_MIN_F32 : DS_Real_gfx6_gfx7_gfx10<0x012>;
-defm DS_MAX_F32 : DS_Real_gfx6_gfx7_gfx10<0x013>;
-defm DS_NOP : DS_Real_gfx6_gfx7_gfx10<0x014>;
-defm DS_GWS_INIT : DS_Real_gfx6_gfx7_gfx10<0x019>;
-defm DS_GWS_SEMA_V : DS_Real_gfx6_gfx7_gfx10<0x01a>;
-defm DS_GWS_SEMA_BR : DS_Real_gfx6_gfx7_gfx10<0x01b>;
-defm DS_GWS_SEMA_P : DS_Real_gfx6_gfx7_gfx10<0x01c>;
-defm DS_GWS_BARRIER : DS_Real_gfx6_gfx7_gfx10<0x01d>;
+
+defm DS_MIN_F32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x012>;
+defm DS_MAX_F32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x013>;
+defm DS_NOP : DS_Real_gfx6_gfx7_gfx10_gfx11<0x014>;
+defm DS_GWS_INIT : DS_Real_gfx6_gfx7_gfx10_gfx11<0x019>;
+defm DS_GWS_SEMA_V : DS_Real_gfx6_gfx7_gfx10_gfx11<0x01a>;
+defm DS_GWS_SEMA_BR : DS_Real_gfx6_gfx7_gfx10_gfx11<0x01b>;
+defm DS_GWS_SEMA_P : DS_Real_gfx6_gfx7_gfx10_gfx11<0x01c>;
+defm DS_GWS_BARRIER : DS_Real_gfx6_gfx7_gfx10_gfx11<0x01d>;
+
defm DS_WRITE_B8 : DS_Real_gfx6_gfx7_gfx10<0x01e>;
defm DS_WRITE_B16 : DS_Real_gfx6_gfx7_gfx10<0x01f>;
-defm DS_ADD_RTN_U32 : DS_Real_gfx6_gfx7_gfx10<0x020>;
-defm DS_SUB_RTN_U32 : DS_Real_gfx6_gfx7_gfx10<0x021>;
-defm DS_RSUB_RTN_U32 : DS_Real_gfx6_gfx7_gfx10<0x022>;
-defm DS_INC_RTN_U32 : DS_Real_gfx6_gfx7_gfx10<0x023>;
-defm DS_DEC_RTN_U32 : DS_Real_gfx6_gfx7_gfx10<0x024>;
-defm DS_MIN_RTN_I32 : DS_Real_gfx6_gfx7_gfx10<0x025>;
-defm DS_MAX_RTN_I32 : DS_Real_gfx6_gfx7_gfx10<0x026>;
-defm DS_MIN_RTN_U32 : DS_Real_gfx6_gfx7_gfx10<0x027>;
-defm DS_MAX_RTN_U32 : DS_Real_gfx6_gfx7_gfx10<0x028>;
-defm DS_AND_RTN_B32 : DS_Real_gfx6_gfx7_gfx10<0x029>;
-defm DS_OR_RTN_B32 : DS_Real_gfx6_gfx7_gfx10<0x02a>;
-defm DS_XOR_RTN_B32 : DS_Real_gfx6_gfx7_gfx10<0x02b>;
-defm DS_MSKOR_RTN_B32 : DS_Real_gfx6_gfx7_gfx10<0x02c>;
+
+defm DS_ADD_RTN_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x020>;
+defm DS_SUB_RTN_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x021>;
+defm DS_RSUB_RTN_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x022>;
+defm DS_INC_RTN_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x023>;
+defm DS_DEC_RTN_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x024>;
+defm DS_MIN_RTN_I32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x025>;
+defm DS_MAX_RTN_I32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x026>;
+defm DS_MIN_RTN_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x027>;
+defm DS_MAX_RTN_U32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x028>;
+defm DS_AND_RTN_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x029>;
+defm DS_OR_RTN_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x02a>;
+defm DS_XOR_RTN_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x02b>;
+defm DS_MSKOR_RTN_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x02c>;
+
defm DS_WRXCHG_RTN_B32 : DS_Real_gfx6_gfx7_gfx10<0x02d>;
defm DS_WRXCHG2_RTN_B32 : DS_Real_gfx6_gfx7_gfx10<0x02e>;
defm DS_WRXCHG2ST64_RTN_B32 : DS_Real_gfx6_gfx7_gfx10<0x02f>;
defm DS_CMPST_RTN_B32 : DS_Real_gfx6_gfx7_gfx10<0x030>;
defm DS_CMPST_RTN_F32 : DS_Real_gfx6_gfx7_gfx10<0x031>;
-defm DS_MIN_RTN_F32 : DS_Real_gfx6_gfx7_gfx10<0x032>;
-defm DS_MAX_RTN_F32 : DS_Real_gfx6_gfx7_gfx10<0x033>;
-defm DS_SWIZZLE_B32 : DS_Real_gfx6_gfx7_gfx10<0x035>;
+
+defm DS_MIN_RTN_F32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x032>;
+defm DS_MAX_RTN_F32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x033>;
+defm DS_SWIZZLE_B32 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x035>;
+
defm DS_READ_B32 : DS_Real_gfx6_gfx7_gfx10<0x036>;
defm DS_READ2_B32 : DS_Real_gfx6_gfx7_gfx10<0x037>;
defm DS_READ2ST64_B32 : DS_Real_gfx6_gfx7_gfx10<0x038>;
@@ -1116,49 +1375,55 @@ defm DS_READ_I8 : DS_Real_gfx6_gfx7_gfx10<0x039>;
defm DS_READ_U8 : DS_Real_gfx6_gfx7_gfx10<0x03a>;
defm DS_READ_I16 : DS_Real_gfx6_gfx7_gfx10<0x03b>;
defm DS_READ_U16 : DS_Real_gfx6_gfx7_gfx10<0x03c>;
-defm DS_CONSUME : DS_Real_gfx6_gfx7_gfx10<0x03d>;
-defm DS_APPEND : DS_Real_gfx6_gfx7_gfx10<0x03e>;
-defm DS_ORDERED_COUNT : DS_Real_gfx6_gfx7_gfx10<0x03f>;
-defm DS_ADD_U64 : DS_Real_gfx6_gfx7_gfx10<0x040>;
-defm DS_SUB_U64 : DS_Real_gfx6_gfx7_gfx10<0x041>;
-defm DS_RSUB_U64 : DS_Real_gfx6_gfx7_gfx10<0x042>;
-defm DS_INC_U64 : DS_Real_gfx6_gfx7_gfx10<0x043>;
-defm DS_DEC_U64 : DS_Real_gfx6_gfx7_gfx10<0x044>;
-defm DS_MIN_I64 : DS_Real_gfx6_gfx7_gfx10<0x045>;
-defm DS_MAX_I64 : DS_Real_gfx6_gfx7_gfx10<0x046>;
-defm DS_MIN_U64 : DS_Real_gfx6_gfx7_gfx10<0x047>;
-defm DS_MAX_U64 : DS_Real_gfx6_gfx7_gfx10<0x048>;
-defm DS_AND_B64 : DS_Real_gfx6_gfx7_gfx10<0x049>;
-defm DS_OR_B64 : DS_Real_gfx6_gfx7_gfx10<0x04a>;
-defm DS_XOR_B64 : DS_Real_gfx6_gfx7_gfx10<0x04b>;
-defm DS_MSKOR_B64 : DS_Real_gfx6_gfx7_gfx10<0x04c>;
+
+defm DS_CONSUME : DS_Real_gfx6_gfx7_gfx10_gfx11<0x03d>;
+defm DS_APPEND : DS_Real_gfx6_gfx7_gfx10_gfx11<0x03e>;
+defm DS_ORDERED_COUNT : DS_Real_gfx6_gfx7_gfx10_gfx11<0x03f>;
+defm DS_ADD_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x040>;
+defm DS_SUB_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x041>;
+defm DS_RSUB_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x042>;
+defm DS_INC_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x043>;
+defm DS_DEC_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x044>;
+defm DS_MIN_I64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x045>;
+defm DS_MAX_I64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x046>;
+defm DS_MIN_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x047>;
+defm DS_MAX_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x048>;
+defm DS_AND_B64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x049>;
+defm DS_OR_B64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x04a>;
+defm DS_XOR_B64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x04b>;
+defm DS_MSKOR_B64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x04c>;
+
defm DS_WRITE_B64 : DS_Real_gfx6_gfx7_gfx10<0x04d>;
defm DS_WRITE2_B64 : DS_Real_gfx6_gfx7_gfx10<0x04e>;
defm DS_WRITE2ST64_B64 : DS_Real_gfx6_gfx7_gfx10<0x04f>;
defm DS_CMPST_B64 : DS_Real_gfx6_gfx7_gfx10<0x050>;
defm DS_CMPST_F64 : DS_Real_gfx6_gfx7_gfx10<0x051>;
-defm DS_MIN_F64 : DS_Real_gfx6_gfx7_gfx10<0x052>;
-defm DS_MAX_F64 : DS_Real_gfx6_gfx7_gfx10<0x053>;
-defm DS_ADD_RTN_U64 : DS_Real_gfx6_gfx7_gfx10<0x060>;
-defm DS_SUB_RTN_U64 : DS_Real_gfx6_gfx7_gfx10<0x061>;
-defm DS_RSUB_RTN_U64 : DS_Real_gfx6_gfx7_gfx10<0x062>;
-defm DS_INC_RTN_U64 : DS_Real_gfx6_gfx7_gfx10<0x063>;
-defm DS_DEC_RTN_U64 : DS_Real_gfx6_gfx7_gfx10<0x064>;
-defm DS_MIN_RTN_I64 : DS_Real_gfx6_gfx7_gfx10<0x065>;
-defm DS_MAX_RTN_I64 : DS_Real_gfx6_gfx7_gfx10<0x066>;
-defm DS_MIN_RTN_U64 : DS_Real_gfx6_gfx7_gfx10<0x067>;
-defm DS_MAX_RTN_U64 : DS_Real_gfx6_gfx7_gfx10<0x068>;
-defm DS_AND_RTN_B64 : DS_Real_gfx6_gfx7_gfx10<0x069>;
-defm DS_OR_RTN_B64 : DS_Real_gfx6_gfx7_gfx10<0x06a>;
-defm DS_XOR_RTN_B64 : DS_Real_gfx6_gfx7_gfx10<0x06b>;
-defm DS_MSKOR_RTN_B64 : DS_Real_gfx6_gfx7_gfx10<0x06c>;
+
+defm DS_MIN_F64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x052>;
+defm DS_MAX_F64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x053>;
+defm DS_ADD_RTN_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x060>;
+defm DS_SUB_RTN_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x061>;
+defm DS_RSUB_RTN_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x062>;
+defm DS_INC_RTN_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x063>;
+defm DS_DEC_RTN_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x064>;
+defm DS_MIN_RTN_I64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x065>;
+defm DS_MAX_RTN_I64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x066>;
+defm DS_MIN_RTN_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x067>;
+defm DS_MAX_RTN_U64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x068>;
+defm DS_AND_RTN_B64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x069>;
+defm DS_OR_RTN_B64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x06a>;
+defm DS_XOR_RTN_B64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x06b>;
+defm DS_MSKOR_RTN_B64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x06c>;
+
defm DS_WRXCHG_RTN_B64 : DS_Real_gfx6_gfx7_gfx10<0x06d>;
defm DS_WRXCHG2_RTN_B64 : DS_Real_gfx6_gfx7_gfx10<0x06e>;
defm DS_WRXCHG2ST64_RTN_B64 : DS_Real_gfx6_gfx7_gfx10<0x06f>;
defm DS_CMPST_RTN_B64 : DS_Real_gfx6_gfx7_gfx10<0x070>;
defm DS_CMPST_RTN_F64 : DS_Real_gfx6_gfx7_gfx10<0x071>;
-defm DS_MIN_RTN_F64 : DS_Real_gfx6_gfx7_gfx10<0x072>;
-defm DS_MAX_RTN_F64 : DS_Real_gfx6_gfx7_gfx10<0x073>;
+
+defm DS_MIN_RTN_F64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x072>;
+defm DS_MAX_RTN_F64 : DS_Real_gfx6_gfx7_gfx10_gfx11<0x073>;
+
defm DS_READ_B64 : DS_Real_gfx6_gfx7_gfx10<0x076>;
defm DS_READ2_B64 : DS_Real_gfx6_gfx7_gfx10<0x077>;
defm DS_READ2ST64_B64 : DS_Real_gfx6_gfx7_gfx10<0x078>;
@@ -1381,3 +1646,10 @@ let SubtargetPredicate = isGFX90APlus in {
def DS_ADD_F64_vi : DS_Real_vi<0x5c, DS_ADD_F64>;
def DS_ADD_RTN_F64_vi : DS_Real_vi<0x7c, DS_ADD_RTN_F64>;
} // End SubtargetPredicate = isGFX90APlus
+
+let SubtargetPredicate = isGFX940Plus in {
+ def DS_PK_ADD_F16_vi : DS_Real_vi<0x17, DS_PK_ADD_F16>;
+ def DS_PK_ADD_RTN_F16_vi : DS_Real_vi<0xb7, DS_PK_ADD_RTN_F16>;
+ def DS_PK_ADD_BF16_vi : DS_Real_vi<0x18, DS_PK_ADD_BF16>;
+ def DS_PK_ADD_RTN_BF16_vi : DS_Real_vi<0xb8, DS_PK_ADD_RTN_BF16>;
+} // End SubtargetPredicate = isGFX940Plus
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index e2186d4d533e..ccaf646008b1 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -18,15 +18,20 @@
#include "Disassembler/AMDGPUDisassembler.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIDefines.h"
+#include "SIRegisterInfo.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm-c/DisassemblerTypes.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDecoderOps.h"
#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCFixedLenDisassembler.h"
-#include "llvm/MC/TargetRegistry.h"
#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/AMDHSAKernelDescriptor.h"
using namespace llvm;
@@ -70,7 +75,8 @@ static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op,
}
static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm,
- uint64_t Addr, const void *Decoder) {
+ uint64_t Addr,
+ const MCDisassembler *Decoder) {
auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
// Our branches take a simm16, but we need two extra bits to account for the
@@ -78,13 +84,13 @@ static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm,
APInt SignedOffset(18, Imm * 4, true);
int64_t Offset = (SignedOffset.sext(64) + 4 + Addr).getSExtValue();
- if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2))
+ if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0))
return MCDisassembler::Success;
return addOperand(Inst, MCOperand::createImm(Imm));
}
-static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm,
- uint64_t Addr, const void *Decoder) {
+static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,
+ const MCDisassembler *Decoder) {
auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
int64_t Offset;
if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
@@ -95,20 +101,19 @@ static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm,
return addOperand(Inst, MCOperand::createImm(Offset));
}
-static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val,
- uint64_t Addr, const void *Decoder) {
+static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
+ const MCDisassembler *Decoder) {
auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
return addOperand(Inst, DAsm->decodeBoolReg(Val));
}
-#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
-static DecodeStatus StaticDecoderName(MCInst &Inst, \
- unsigned Imm, \
- uint64_t /*Addr*/, \
- const void *Decoder) { \
- auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); \
- return addOperand(Inst, DAsm->DecoderName(Imm)); \
-}
+#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
+ static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
+ uint64_t /*Addr*/, \
+ const MCDisassembler *Decoder) { \
+ auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
+ return addOperand(Inst, DAsm->DecoderName(Imm)); \
+ }
#define DECODE_OPERAND_REG(RegClass) \
DECODE_OPERAND(Decode##RegClass##RegisterClass, decodeOperand_##RegClass)
@@ -144,155 +149,151 @@ DECODE_OPERAND_REG(AReg_512)
DECODE_OPERAND_REG(AReg_1024)
DECODE_OPERAND_REG(AV_32)
DECODE_OPERAND_REG(AV_64)
+DECODE_OPERAND_REG(AV_128)
+DECODE_OPERAND_REG(AVDst_128)
+DECODE_OPERAND_REG(AVDst_512)
-static DecodeStatus decodeOperand_VSrc16(MCInst &Inst,
- unsigned Imm,
+static DecodeStatus decodeOperand_VSrc16(MCInst &Inst, unsigned Imm,
uint64_t Addr,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
return addOperand(Inst, DAsm->decodeOperand_VSrc16(Imm));
}
-static DecodeStatus decodeOperand_VSrcV216(MCInst &Inst,
- unsigned Imm,
- uint64_t Addr,
- const void *Decoder) {
+static DecodeStatus decodeOperand_VSrcV216(MCInst &Inst, unsigned Imm,
+ uint64_t Addr,
+ const MCDisassembler *Decoder) {
auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
return addOperand(Inst, DAsm->decodeOperand_VSrcV216(Imm));
}
-static DecodeStatus decodeOperand_VSrcV232(MCInst &Inst,
- unsigned Imm,
+static DecodeStatus decodeOperand_VSrcV232(MCInst &Inst, unsigned Imm,
uint64_t Addr,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
return addOperand(Inst, DAsm->decodeOperand_VSrcV232(Imm));
}
-static DecodeStatus decodeOperand_VS_16(MCInst &Inst,
- unsigned Imm,
+static DecodeStatus decodeOperand_VS_16(MCInst &Inst, unsigned Imm,
uint64_t Addr,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
return addOperand(Inst, DAsm->decodeOperand_VSrc16(Imm));
}
-static DecodeStatus decodeOperand_VS_32(MCInst &Inst,
- unsigned Imm,
+static DecodeStatus decodeOperand_VS_32(MCInst &Inst, unsigned Imm,
uint64_t Addr,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
return addOperand(Inst, DAsm->decodeOperand_VS_32(Imm));
}
-static DecodeStatus decodeOperand_AReg_64(MCInst &Inst,
- unsigned Imm,
+static DecodeStatus decodeOperand_AReg_64(MCInst &Inst, unsigned Imm,
uint64_t Addr,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm | 512));
}
-static DecodeStatus decodeOperand_AReg_128(MCInst &Inst,
- unsigned Imm,
+static DecodeStatus decodeOperand_AReg_128(MCInst &Inst, unsigned Imm,
uint64_t Addr,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW128, Imm | 512));
}
-static DecodeStatus decodeOperand_AReg_256(MCInst &Inst,
- unsigned Imm,
+static DecodeStatus decodeOperand_AReg_256(MCInst &Inst, unsigned Imm,
uint64_t Addr,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW256, Imm | 512));
}
-static DecodeStatus decodeOperand_AReg_512(MCInst &Inst,
- unsigned Imm,
+static DecodeStatus decodeOperand_AReg_512(MCInst &Inst, unsigned Imm,
uint64_t Addr,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW512, Imm | 512));
}
-static DecodeStatus decodeOperand_AReg_1024(MCInst &Inst,
- unsigned Imm,
+static DecodeStatus decodeOperand_AReg_1024(MCInst &Inst, unsigned Imm,
uint64_t Addr,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW1024, Imm | 512));
}
-static DecodeStatus decodeOperand_VReg_64(MCInst &Inst,
- unsigned Imm,
+static DecodeStatus decodeOperand_VReg_64(MCInst &Inst, unsigned Imm,
uint64_t Addr,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm));
}
-static DecodeStatus decodeOperand_VReg_128(MCInst &Inst,
- unsigned Imm,
+static DecodeStatus decodeOperand_VReg_128(MCInst &Inst, unsigned Imm,
uint64_t Addr,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW128, Imm));
}
-static DecodeStatus decodeOperand_VReg_256(MCInst &Inst,
- unsigned Imm,
+static DecodeStatus decodeOperand_VReg_256(MCInst &Inst, unsigned Imm,
uint64_t Addr,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW256, Imm));
}
-static DecodeStatus decodeOperand_VReg_512(MCInst &Inst,
- unsigned Imm,
+static DecodeStatus decodeOperand_VReg_512(MCInst &Inst, unsigned Imm,
uint64_t Addr,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW512, Imm));
}
-static DecodeStatus decodeOperand_VReg_1024(MCInst &Inst,
- unsigned Imm,
+static DecodeStatus decodeOperand_VReg_1024(MCInst &Inst, unsigned Imm,
uint64_t Addr,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW1024, Imm));
}
static DecodeStatus decodeOperand_f32kimm(MCInst &Inst, unsigned Imm,
- uint64_t Addr, const void *Decoder) {
+ uint64_t Addr,
+ const MCDisassembler *Decoder) {
const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
}
static DecodeStatus decodeOperand_f16kimm(MCInst &Inst, unsigned Imm,
- uint64_t Addr, const void *Decoder) {
+ uint64_t Addr,
+ const MCDisassembler *Decoder) {
const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
}
-static DecodeStatus decodeOperand_VS_16_Deferred(MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const void *Decoder) {
+static DecodeStatus
+decodeOperand_VS_16_Deferred(MCInst &Inst, unsigned Imm, uint64_t Addr,
+ const MCDisassembler *Decoder) {
const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
return addOperand(
Inst, DAsm->decodeSrcOp(llvm::AMDGPUDisassembler::OPW16, Imm, true));
}
-static DecodeStatus decodeOperand_VS_32_Deferred(MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const void *Decoder) {
+static DecodeStatus
+decodeOperand_VS_32_Deferred(MCInst &Inst, unsigned Imm, uint64_t Addr,
+ const MCDisassembler *Decoder) {
const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
return addOperand(
Inst, DAsm->decodeSrcOp(llvm::AMDGPUDisassembler::OPW32, Imm, true));
}
+static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
+ uint64_t Addr, const void *Decoder) {
+ const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+ return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
+}
+
static bool IsAGPROperand(const MCInst &Inst, int OpIdx,
const MCRegisterInfo *MRI) {
if (OpIdx < 0)
@@ -307,10 +308,9 @@ static bool IsAGPROperand(const MCInst &Inst, int OpIdx,
return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255;
}
-static DecodeStatus decodeOperand_AVLdSt_Any(MCInst &Inst,
- unsigned Imm,
+static DecodeStatus decodeOperand_AVLdSt_Any(MCInst &Inst, unsigned Imm,
AMDGPUDisassembler::OpWidthTy Opw,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
if (!DAsm->isGFX90A()) {
Imm &= 511;
@@ -342,54 +342,41 @@ static DecodeStatus decodeOperand_AVLdSt_Any(MCInst &Inst,
return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));
}
-static DecodeStatus DecodeAVLdSt_32RegisterClass(MCInst &Inst,
- unsigned Imm,
- uint64_t Addr,
- const void *Decoder) {
+static DecodeStatus
+DecodeAVLdSt_32RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr,
+ const MCDisassembler *Decoder) {
return decodeOperand_AVLdSt_Any(Inst, Imm,
AMDGPUDisassembler::OPW32, Decoder);
}
-static DecodeStatus DecodeAVLdSt_64RegisterClass(MCInst &Inst,
- unsigned Imm,
- uint64_t Addr,
- const void *Decoder) {
+static DecodeStatus
+DecodeAVLdSt_64RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr,
+ const MCDisassembler *Decoder) {
return decodeOperand_AVLdSt_Any(Inst, Imm,
AMDGPUDisassembler::OPW64, Decoder);
}
-static DecodeStatus DecodeAVLdSt_96RegisterClass(MCInst &Inst,
- unsigned Imm,
- uint64_t Addr,
- const void *Decoder) {
+static DecodeStatus
+DecodeAVLdSt_96RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr,
+ const MCDisassembler *Decoder) {
return decodeOperand_AVLdSt_Any(Inst, Imm,
AMDGPUDisassembler::OPW96, Decoder);
}
-static DecodeStatus DecodeAVLdSt_128RegisterClass(MCInst &Inst,
- unsigned Imm,
- uint64_t Addr,
- const void *Decoder) {
+static DecodeStatus
+DecodeAVLdSt_128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr,
+ const MCDisassembler *Decoder) {
return decodeOperand_AVLdSt_Any(Inst, Imm,
AMDGPUDisassembler::OPW128, Decoder);
}
-static DecodeStatus decodeOperand_SReg_32(MCInst &Inst,
- unsigned Imm,
+static DecodeStatus decodeOperand_SReg_32(MCInst &Inst, unsigned Imm,
uint64_t Addr,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
return addOperand(Inst, DAsm->decodeOperand_SReg_32(Imm));
}
-static DecodeStatus decodeOperand_VGPR_32(MCInst &Inst,
- unsigned Imm,
- uint64_t Addr,
- const void *Decoder) {
- auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
- return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW32, Imm));
-}
-
#define DECODE_SDWA(DecName) \
DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
@@ -410,21 +397,15 @@ template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
return Res;
}
-DecodeStatus AMDGPUDisassembler::tryDecodeInst(const uint8_t* Table,
- MCInst &MI,
- uint64_t Inst,
- uint64_t Address) const {
- assert(MI.getOpcode() == 0);
- assert(MI.getNumOperands() == 0);
- MCInst TmpInst;
- HasLiteral = false;
- const auto SavedBytes = Bytes;
- if (decodeInstruction(Table, TmpInst, Inst, Address, this, STI)) {
- MI = TmpInst;
- return MCDisassembler::Success;
- }
- Bytes = SavedBytes;
- return MCDisassembler::Fail;
+static inline DecoderUInt128 eat12Bytes(ArrayRef<uint8_t> &Bytes) {
+ assert(Bytes.size() >= 12);
+ uint64_t Lo = support::endian::read<uint64_t, support::endianness::little>(
+ Bytes.data());
+ Bytes = Bytes.slice(8);
+ uint64_t Hi = support::endian::read<uint32_t, support::endianness::little>(
+ Bytes.data());
+ Bytes = Bytes.slice(4);
+ return DecoderUInt128(Lo, Hi);
}
// The disassembler is greedy, so we need to check FI operand value to
@@ -457,6 +438,29 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
// Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
// encodings
+ if (isGFX11Plus() && Bytes.size() >= 12 ) {
+ DecoderUInt128 DecW = eat12Bytes(Bytes);
+ Res = tryDecodeInst(DecoderTableDPP8GFX1196, MI, DecW,
+ Address);
+ if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
+ break;
+ MI = MCInst(); // clear
+ Res = tryDecodeInst(DecoderTableDPPGFX1196, MI, DecW,
+ Address);
+ if (Res) {
+ if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
+ convertVOP3PDPPInst(MI);
+ else if (AMDGPU::isVOPC64DPP(MI.getOpcode()))
+ convertVOPCDPPInst(MI);
+ break;
+ }
+ Res = tryDecodeInst(DecoderTableGFX1196, MI, DecW, Address);
+ if (Res)
+ break;
+ }
+ // Reinitialize Bytes
+ Bytes = Bytes_.slice(0, MaxInstBytesNum);
+
if (Bytes.size() >= 8) {
const uint64_t QW = eatBytes<uint64_t>(Bytes);
@@ -475,12 +479,23 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Res = tryDecodeInst(DecoderTableDPP864, MI, QW, Address);
if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
break;
+ MI = MCInst(); // clear
+ Res = tryDecodeInst(DecoderTableDPP8GFX1164, MI, QW, Address);
+ if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
+ break;
MI = MCInst(); // clear
Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address);
if (Res) break;
+ Res = tryDecodeInst(DecoderTableDPPGFX1164, MI, QW, Address);
+ if (Res) {
+ if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
+ convertVOPCDPPInst(MI);
+ break;
+ }
+
Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address);
if (Res) { IsSDWA = true; break; }
@@ -535,6 +550,9 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Res = tryDecodeInst(DecoderTableGFX1032, MI, DW, Address);
if (Res) break;
+ Res = tryDecodeInst(DecoderTableGFX1132, MI, DW, Address);
+ if (Res) break;
+
if (Bytes.size() < 4) break;
const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW;
@@ -554,6 +572,13 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
if (Res) break;
Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address);
+ if (Res) break;
+
+ Res = tryDecodeInst(DecoderTableGFX1164, MI, QW, Address);
+ if (Res)
+ break;
+
+ Res = tryDecodeInst(DecoderTableWMMAGFX1164, MI, QW, Address);
} while (false);
if (Res && (MI.getOpcode() == AMDGPU::V_MAC_F32_e64_vi ||
@@ -565,8 +590,11 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
MI.getOpcode() == AMDGPU::V_FMAC_F64_e64_gfx90a ||
MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_vi ||
MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_gfx10 ||
+ MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_gfx11 ||
MI.getOpcode() == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
- MI.getOpcode() == AMDGPU::V_FMAC_F16_e64_gfx10)) {
+ MI.getOpcode() == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
+ MI.getOpcode() == AMDGPU::V_FMAC_F16_e64_gfx10 ||
+ MI.getOpcode() == AMDGPU::V_FMAC_F16_e64_gfx11)) {
// Insert dummy unused src2_modifiers.
insertNamedMCOperand(MI, MCOperand::createImm(0),
AMDGPU::OpName::src2_modifiers);
@@ -625,8 +653,10 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Res = MCDisassembler::Fail;
} else {
for (unsigned i = 0; i < NSAArgs; ++i) {
- MI.insert(MI.begin() + VAddr0Idx + 1 + i,
- decodeOperand_VGPR_32(Bytes[i]));
+ const unsigned VAddrIdx = VAddr0Idx + 1 + i;
+ auto VAddrRCID = MCII->get(MI.getOpcode()).OpInfo[VAddrIdx].RegClass;
+ MI.insert(MI.begin() + VAddrIdx,
+ createRegOperand(VAddrRCID, Bytes[i]));
}
Bytes = Bytes.slice(4 * NSAWords);
}
@@ -636,6 +666,12 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Res = convertMIMGInst(MI);
}
+ if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP))
+ Res = convertEXPInst(MI);
+
+ if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP))
+ Res = convertVINTERPInst(MI);
+
if (Res && IsSDWA)
Res = convertSDWAInst(MI);
@@ -667,6 +703,28 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return Res;
}
+DecodeStatus AMDGPUDisassembler::convertEXPInst(MCInst &MI) const {
+ if (STI.getFeatureBits()[AMDGPU::FeatureGFX11]) {
+ // The MCInst still has these fields even though they are no longer encoded
+ // in the GFX11 instruction.
+ insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm);
+ insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::compr);
+ }
+ return MCDisassembler::Success;
+}
+
+DecodeStatus AMDGPUDisassembler::convertVINTERPInst(MCInst &MI) const {
+ if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx11 ||
+ MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx11 ||
+ MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx11 ||
+ MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx11) {
+ // The MCInst has this field that is not directly encoded in the
+ // instruction.
+ insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);
+ }
+ return MCDisassembler::Success;
+}
+
DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
if (STI.getFeatureBits()[AMDGPU::FeatureGFX9] ||
STI.getFeatureBits()[AMDGPU::FeatureGFX10]) {
@@ -692,18 +750,23 @@ DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
unsigned Opc = MI.getOpcode();
unsigned DescNumOps = MCII->get(Opc).getNumOperands();
-
- // Insert dummy unused src modifiers.
- if (MI.getNumOperands() < DescNumOps &&
- AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1)
- insertNamedMCOperand(MI, MCOperand::createImm(0),
- AMDGPU::OpName::src0_modifiers);
-
- if (MI.getNumOperands() < DescNumOps &&
- AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers) != -1)
- insertNamedMCOperand(MI, MCOperand::createImm(0),
- AMDGPU::OpName::src1_modifiers);
-
+ if (MCII->get(Opc).TSFlags & SIInstrFlags::VOP3P) {
+ convertVOP3PDPPInst(MI);
+ } else if ((MCII->get(Opc).TSFlags & SIInstrFlags::VOPC) ||
+ AMDGPU::isVOPC64DPP(Opc)) {
+ convertVOPCDPPInst(MI);
+ } else {
+ // Insert dummy unused src modifiers.
+ if (MI.getNumOperands() < DescNumOps &&
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1)
+ insertNamedMCOperand(MI, MCOperand::createImm(0),
+ AMDGPU::OpName::src0_modifiers);
+
+ if (MI.getNumOperands() < DescNumOps &&
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers) != -1)
+ insertNamedMCOperand(MI, MCOperand::createImm(0),
+ AMDGPU::OpName::src1_modifiers);
+ }
return isValidDPP8(MI) ? MCDisassembler::Success : MCDisassembler::SoftFail;
}
@@ -745,7 +808,7 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
bool IsNSA = false;
unsigned AddrSize = Info->VAddrDwords;
- if (STI.getFeatureBits()[AMDGPU::FeatureGFX10]) {
+ if (isGFX10Plus()) {
unsigned DimIdx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
int A16Idx =
@@ -757,7 +820,8 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
AddrSize =
AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI));
- IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA;
+ IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
+ Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA;
if (!IsNSA) {
if (AddrSize > 8)
AddrSize = 16;
@@ -808,9 +872,9 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
}
}
+ // If not using NSA on GFX10+, widen address register to correct size.
unsigned NewVAddr0 = AMDGPU::NoRegister;
- if (STI.getFeatureBits()[AMDGPU::FeatureGFX10] && !IsNSA &&
- AddrSize != Info->VAddrDwords) {
+ if (isGFX10Plus() && !IsNSA && AddrSize != Info->VAddrDwords) {
unsigned VAddr0 = MI.getOperand(VAddr0Idx).getReg();
unsigned VAddrSub0 = MRI.getSubReg(VAddr0, AMDGPU::sub0);
VAddr0 = (VAddrSub0 != 0) ? VAddrSub0 : VAddr0;
@@ -844,11 +908,84 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
return MCDisassembler::Success;
}
+// Opsel and neg bits are used in src_modifiers and standalone operands. Autogen
+// decoder only adds to src_modifiers, so manually add the bits to the other
+// operands.
+DecodeStatus AMDGPUDisassembler::convertVOP3PDPPInst(MCInst &MI) const {
+ unsigned Opc = MI.getOpcode();
+ unsigned DescNumOps = MCII->get(Opc).getNumOperands();
+
+ if (MI.getNumOperands() < DescNumOps &&
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1)
+ insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vdst_in);
+
+ const int ModOps[] = {AMDGPU::OpName::src0_modifiers,
+ AMDGPU::OpName::src1_modifiers,
+ AMDGPU::OpName::src2_modifiers};
+ unsigned OpSel = 0;
+ unsigned OpSelHi = 0;
+ unsigned NegLo = 0;
+ unsigned NegHi = 0;
+ for (int J = 0; J < 3; ++J) {
+ int OpIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
+ if (OpIdx == -1)
+ break;
+ unsigned Val = MI.getOperand(OpIdx).getImm();
+
+ OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J;
+ OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J;
+ NegLo |= !!(Val & SISrcMods::NEG) << J;
+ NegHi |= !!(Val & SISrcMods::NEG_HI) << J;
+ }
+
+ if (MI.getNumOperands() < DescNumOps &&
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1)
+ insertNamedMCOperand(MI, MCOperand::createImm(OpSel),
+ AMDGPU::OpName::op_sel);
+ if (MI.getNumOperands() < DescNumOps &&
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi) != -1)
+ insertNamedMCOperand(MI, MCOperand::createImm(OpSelHi),
+ AMDGPU::OpName::op_sel_hi);
+ if (MI.getNumOperands() < DescNumOps &&
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo) != -1)
+ insertNamedMCOperand(MI, MCOperand::createImm(NegLo),
+ AMDGPU::OpName::neg_lo);
+ if (MI.getNumOperands() < DescNumOps &&
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi) != -1)
+ insertNamedMCOperand(MI, MCOperand::createImm(NegHi),
+ AMDGPU::OpName::neg_hi);
+
+ return MCDisassembler::Success;
+}
+
+// Create dummy old operand and insert optional operands
+DecodeStatus AMDGPUDisassembler::convertVOPCDPPInst(MCInst &MI) const {
+ unsigned Opc = MI.getOpcode();
+ unsigned DescNumOps = MCII->get(Opc).getNumOperands();
+
+ if (MI.getNumOperands() < DescNumOps &&
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old) != -1)
+ insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
+
+ if (MI.getNumOperands() < DescNumOps &&
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1)
+ insertNamedMCOperand(MI, MCOperand::createImm(0),
+ AMDGPU::OpName::src0_modifiers);
+
+ if (MI.getNumOperands() < DescNumOps &&
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers) != -1)
+ insertNamedMCOperand(MI, MCOperand::createImm(0),
+ AMDGPU::OpName::src1_modifiers);
+ return MCDisassembler::Success;
+}
+
DecodeStatus AMDGPUDisassembler::convertFMAanyK(MCInst &MI,
int ImmLitIdx) const {
assert(HasLiteral && "Should have decoded a literal");
const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
unsigned DescNumOps = Desc.getNumOperands();
+ insertNamedMCOperand(MI, MCOperand::createImm(Literal),
+ AMDGPU::OpName::immDeferred);
assert(DescNumOps == MI.getNumOperands());
for (unsigned I = 0; I < DescNumOps; ++I) {
auto &Op = MI.getOperand(I);
@@ -1001,6 +1138,22 @@ MCOperand AMDGPUDisassembler::decodeOperand_AV_64(unsigned Val) const {
return decodeSrcOp(OPW64, Val);
}
+MCOperand AMDGPUDisassembler::decodeOperand_AV_128(unsigned Val) const {
+ return decodeSrcOp(OPW128, Val);
+}
+
+MCOperand AMDGPUDisassembler::decodeOperand_AVDst_128(unsigned Val) const {
+ using namespace AMDGPU::EncValues;
+ assert((Val & IS_VGPR) == 0); // Val{8} is not encoded but assumed to be 1.
+ return decodeSrcOp(OPW128, Val | IS_VGPR);
+}
+
+MCOperand AMDGPUDisassembler::decodeOperand_AVDst_512(unsigned Val) const {
+ using namespace AMDGPU::EncValues;
+ assert((Val & IS_VGPR) == 0); // Val{8} is not encoded but assumed to be 1.
+ return decodeSrcOp(OPW512, Val | IS_VGPR);
+}
+
MCOperand AMDGPUDisassembler::decodeOperand_VReg_64(unsigned Val) const {
return createRegOperand(AMDGPU::VReg_64RegClassID, Val);
}
@@ -1075,6 +1228,9 @@ MCOperand AMDGPUDisassembler::decodeOperand_SReg_512(unsigned Val) const {
MCOperand
AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {
if (HasLiteral) {
+ assert(
+ AMDGPU::hasVOPD(STI) &&
+ "Should only decode multiple kimm with VOPD, check VSrc operand types");
if (Literal != Val)
return errOperand(Val, "More than one unique literal is illegal");
}
@@ -1367,6 +1523,20 @@ MCOperand AMDGPUDisassembler::decodeDstOp(const OpWidthTy Width, unsigned Val) c
llvm_unreachable("unknown dst register");
}
+// Bit 0 of DstY isn't stored in the instruction, because it's always the
+// opposite of bit 0 of DstX.
+MCOperand AMDGPUDisassembler::decodeVOPDDstYOp(MCInst &Inst,
+ unsigned Val) const {
+ int VDstXInd =
+ AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdstX);
+ assert(VDstXInd != -1);
+ assert(Inst.getOperand(VDstXInd).isReg());
+ unsigned XDstReg = MRI.getEncodingValue(Inst.getOperand(VDstXInd).getReg());
+ Val |= ~XDstReg & 1;
+ auto Width = llvm::AMDGPUDisassembler::OPW32;
+ return createRegOperand(getVgprClassId(Width), Val);
+}
+
MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {
using namespace AMDGPU;
@@ -1381,8 +1551,10 @@ MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {
case 109: return createRegOperand(TBA_HI);
case 110: return createRegOperand(TMA_LO);
case 111: return createRegOperand(TMA_HI);
- case 124: return createRegOperand(M0);
- case 125: return createRegOperand(SGPR_NULL);
+ case 124:
+ return isGFX11Plus() ? createRegOperand(SGPR_NULL) : createRegOperand(M0);
+ case 125:
+ return isGFX11Plus() ? createRegOperand(M0) : createRegOperand(SGPR_NULL);
case 126: return createRegOperand(EXEC_LO);
case 127: return createRegOperand(EXEC_HI);
case 235: return createRegOperand(SRC_SHARED_BASE);
@@ -1408,7 +1580,14 @@ MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const {
case 106: return createRegOperand(VCC);
case 108: return createRegOperand(TBA);
case 110: return createRegOperand(TMA);
- case 125: return createRegOperand(SGPR_NULL);
+ case 124:
+ if (isGFX11Plus())
+ return createRegOperand(SGPR_NULL);
+ break;
+ case 125:
+ if (!isGFX11Plus())
+ return createRegOperand(SGPR_NULL);
+ break;
case 126: return createRegOperand(EXEC);
case 235: return createRegOperand(SRC_SHARED_BASE);
case 236: return createRegOperand(SRC_SHARED_LIMIT);
@@ -1522,6 +1701,15 @@ bool AMDGPUDisassembler::isGFX10Plus() const {
return AMDGPU::isGFX10Plus(STI);
}
+bool AMDGPUDisassembler::isGFX11() const {
+ return STI.getFeatureBits()[AMDGPU::FeatureGFX11];
+}
+
+bool AMDGPUDisassembler::isGFX11Plus() const {
+ return AMDGPU::isGFX11Plus(STI);
+}
+
+
bool AMDGPUDisassembler::hasArchitectedFlatScratch() const {
return STI.getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
}
@@ -1888,10 +2076,10 @@ AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
//===----------------------------------------------------------------------===//
// Try to find symbol name for specified label
-bool AMDGPUSymbolizer::tryAddingSymbolicOperand(MCInst &Inst,
- raw_ostream &/*cStream*/, int64_t Value,
- uint64_t /*Address*/, bool IsBranch,
- uint64_t /*Offset*/, uint64_t /*InstSize*/) {
+bool AMDGPUSymbolizer::tryAddingSymbolicOperand(
+ MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
+ uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
+ uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
if (!IsBranch) {
return false;
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index eea6074d5281..31869f0917ae 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -15,8 +15,10 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_DISASSEMBLER_AMDGPUDISASSEMBLER_H
#define LLVM_LIB_TARGET_AMDGPU_DISASSEMBLER_AMDGPUDISASSEMBLER_H
+#include "llvm/ADT/APInt.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/Support/DataExtractor.h"
#include <memory>
@@ -27,6 +29,60 @@ class MCOperand;
class MCSubtargetInfo;
class Twine;
+// Exposes an interface expected by autogenerated code in
+// FixedLenDecoderEmitter
+class DecoderUInt128 {
+private:
+ uint64_t Lo = 0;
+ uint64_t Hi = 0;
+
+public:
+ DecoderUInt128() = default;
+ DecoderUInt128(uint64_t Lo, uint64_t Hi = 0) : Lo(Lo), Hi(Hi) {}
+ operator bool() const { return Lo || Hi; }
+ void insertBits(uint64_t SubBits, unsigned BitPosition, unsigned NumBits) {
+ assert(NumBits && NumBits <= 64);
+ assert(SubBits >> 1 >> (NumBits - 1) == 0);
+ assert(BitPosition < 128);
+ if (BitPosition < 64) {
+ Lo |= SubBits << BitPosition;
+ Hi |= SubBits >> 1 >> (63 - BitPosition);
+ } else {
+ Hi |= SubBits << (BitPosition - 64);
+ }
+ }
+ uint64_t extractBitsAsZExtValue(unsigned NumBits,
+ unsigned BitPosition) const {
+ assert(NumBits && NumBits <= 64);
+ assert(BitPosition < 128);
+ uint64_t Val;
+ if (BitPosition < 64)
+ Val = Lo >> BitPosition | Hi << 1 << (63 - BitPosition);
+ else
+ Val = Hi >> (BitPosition - 64);
+ return Val & ((uint64_t(2) << (NumBits - 1)) - 1);
+ }
+ DecoderUInt128 operator&(const DecoderUInt128 &RHS) const {
+ return DecoderUInt128(Lo & RHS.Lo, Hi & RHS.Hi);
+ }
+ DecoderUInt128 operator&(const uint64_t &RHS) const {
+ return *this & DecoderUInt128(RHS);
+ }
+ DecoderUInt128 operator~() const { return DecoderUInt128(~Lo, ~Hi); }
+ bool operator==(const DecoderUInt128 &RHS) {
+ return Lo == RHS.Lo && Hi == RHS.Hi;
+ }
+ bool operator!=(const DecoderUInt128 &RHS) {
+ return Lo != RHS.Lo || Hi != RHS.Hi;
+ }
+ bool operator!=(const int &RHS) {
+ return *this != DecoderUInt128(RHS);
+ }
+ friend raw_ostream &operator<<(raw_ostream &OS, const DecoderUInt128 &RHS) {
+ return OS << APInt(128, {RHS.Lo, RHS.Hi});
+ }
+};
+
//===----------------------------------------------------------------------===//
// AMDGPUDisassembler
//===----------------------------------------------------------------------===//
@@ -57,8 +113,21 @@ public:
MCOperand errOperand(unsigned V, const Twine& ErrMsg) const;
- DecodeStatus tryDecodeInst(const uint8_t* Table, MCInst &MI, uint64_t Inst,
- uint64_t Address) const;
+ template <typename InsnType>
+ DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst,
+ uint64_t Address) const {
+ assert(MI.getOpcode() == 0);
+ assert(MI.getNumOperands() == 0);
+ MCInst TmpInst;
+ HasLiteral = false;
+ const auto SavedBytes = Bytes;
+ if (decodeInstruction(Table, TmpInst, Inst, Address, this, STI)) {
+ MI = TmpInst;
+ return MCDisassembler::Success;
+ }
+ Bytes = SavedBytes;
+ return MCDisassembler::Fail;
+ }
Optional<DecodeStatus> onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
@@ -87,10 +156,14 @@ public:
DecodeStatus decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer,
raw_string_ostream &KdStream) const;
+ DecodeStatus convertEXPInst(MCInst &MI) const;
+ DecodeStatus convertVINTERPInst(MCInst &MI) const;
DecodeStatus convertFMAanyK(MCInst &MI, int ImmLitIdx) const;
DecodeStatus convertSDWAInst(MCInst &MI) const;
DecodeStatus convertDPP8Inst(MCInst &MI) const;
DecodeStatus convertMIMGInst(MCInst &MI) const;
+ DecodeStatus convertVOP3PDPPInst(MCInst &MI) const;
+ DecodeStatus convertVOPCDPPInst(MCInst &MI) const;
MCOperand decodeOperand_VGPR_32(unsigned Val) const;
MCOperand decodeOperand_VRegOrLds_32(unsigned Val) const;
@@ -127,6 +200,9 @@ public:
MCOperand decodeOperand_AReg_1024(unsigned Val) const;
MCOperand decodeOperand_AV_32(unsigned Val) const;
MCOperand decodeOperand_AV_64(unsigned Val) const;
+ MCOperand decodeOperand_AV_128(unsigned Val) const;
+ MCOperand decodeOperand_AVDst_128(unsigned Val) const;
+ MCOperand decodeOperand_AVDst_512(unsigned Val) const;
enum OpWidthTy {
OPW32,
@@ -157,6 +233,7 @@ public:
MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val,
bool MandatoryLiteral = false) const;
MCOperand decodeDstOp(const OpWidthTy Width, unsigned Val) const;
+ MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const;
MCOperand decodeSpecialReg32(unsigned Val) const;
MCOperand decodeSpecialReg64(unsigned Val) const;
@@ -177,6 +254,8 @@ public:
bool isGFX9Plus() const;
bool isGFX10() const;
bool isGFX10Plus() const;
+ bool isGFX11() const;
+ bool isGFX11Plus() const;
bool hasArchitectedFlatScratch() const;
};
@@ -196,8 +275,8 @@ public:
: MCSymbolizer(Ctx, std::move(RelInfo)), DisInfo(disInfo) {}
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream,
- int64_t Value, uint64_t Address,
- bool IsBranch, uint64_t Offset,
+ int64_t Value, uint64_t Address, bool IsBranch,
+ uint64_t Offset, uint64_t OpSize,
uint64_t InstSize) override;
void tryAddingPcLoadReferenceComment(raw_ostream &cStream,
diff --git a/llvm/lib/Target/AMDGPU/EXPInstructions.td b/llvm/lib/Target/AMDGPU/EXPInstructions.td
index b3b55ddd2c97..14ba01f0d67c 100644
--- a/llvm/lib/Target/AMDGPU/EXPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/EXPInstructions.td
@@ -10,7 +10,7 @@
// EXP classes
//===----------------------------------------------------------------------===//
-class EXPCommon<bit done, string asm = ""> : InstSI<
+class EXPCommon<bit row, bit done, string asm = ""> : InstSI<
(outs),
(ins exp_tgt:$tgt,
ExpSrc0:$src0, ExpSrc1:$src1, ExpSrc2:$src2, ExpSrc3:$src3,
@@ -21,21 +21,30 @@ class EXPCommon<bit done, string asm = ""> : InstSI<
let mayLoad = done;
let mayStore = 1;
let UseNamedOperandTable = 1;
- let Uses = [EXEC];
+ let Uses = !if(row, [EXEC, M0], [EXEC]);
let SchedRW = [WriteExport];
let DisableWQM = 1;
}
-class EXP_Pseudo<bit done> : EXPCommon<done>,
- SIMCInstr <NAME, SIEncodingFamily.NONE> {
+class EXP_Pseudo<bit row, bit done>
+ : EXPCommon<row, done>, SIMCInstr<NAME, SIEncodingFamily.NONE> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
-class EXP_Real<bit done, string pseudo, int subtarget>
- : EXPCommon<done, "exp$tgt $src0, $src1, $src2, $src3"#!if(done, " done", "")
- #"$compr$vm">,
- SIMCInstr <pseudo, subtarget> {
+// Real instruction with optional asm operands "compr" and "vm".
+class EXP_Real_ComprVM<bit done, string pseudo, int subtarget>
+ : EXPCommon<0, done, "exp$tgt $src0, $src1, $src2, $src3"
+ #!if(done, " done", "")#"$compr$vm">,
+ SIMCInstr<pseudo, subtarget> {
+ let AsmMatchConverter = "cvtExp";
+}
+
+// Real instruction with optional asm operand "row_en".
+class EXP_Real_Row<bit row, bit done, string pseudo, int subtarget>
+ : EXPCommon<row, done, "exp$tgt $src0, $src1, $src2, $src3"
+ #!if(done, " done", "")#!if(row, " row_en", "")>,
+ SIMCInstr<pseudo, subtarget> {
let AsmMatchConverter = "cvtExp";
}
@@ -43,17 +52,21 @@ class EXP_Real<bit done, string pseudo, int subtarget>
// EXP Instructions
//===----------------------------------------------------------------------===//
-// Split EXP instruction into EXP and EXP_DONE so we can set
-// mayLoad for done=1.
-def EXP : EXP_Pseudo<0>;
-def EXP_DONE : EXP_Pseudo<1>;
+// DONE variants have mayLoad = 1.
+// ROW variants have an implicit use of M0.
+let SubtargetPredicate = isNotGFX90APlus in {
+def EXP : EXP_Pseudo<0, 0>;
+def EXP_DONE : EXP_Pseudo<0, 1>;
+def EXP_ROW : EXP_Pseudo<1, 0>;
+def EXP_ROW_DONE : EXP_Pseudo<1, 1>;
+} // let SubtargetPredicate = isNotGFX90APlus
//===----------------------------------------------------------------------===//
// SI
//===----------------------------------------------------------------------===//
class EXP_Real_si<bit _done, string pseudo>
- : EXP_Real<_done, pseudo, SIEncodingFamily.SI>, EXPe {
+ : EXP_Real_ComprVM<_done, pseudo, SIEncodingFamily.SI>, EXPe_ComprVM {
let AssemblerPredicate = isGFX6GFX7;
let DecoderNamespace = "GFX6GFX7";
let done = _done;
@@ -67,8 +80,9 @@ def EXP_DONE_si : EXP_Real_si<1, "EXP_DONE">;
//===----------------------------------------------------------------------===//
class EXP_Real_vi<bit _done, string pseudo>
- : EXP_Real<_done, pseudo, SIEncodingFamily.VI>, EXPe_vi {
+ : EXP_Real_ComprVM<_done, pseudo, SIEncodingFamily.VI>, EXPe_vi {
let AssemblerPredicate = isGFX8GFX9;
+ let SubtargetPredicate = isNotGFX90APlus;
let DecoderNamespace = "GFX8";
let done = _done;
}
@@ -77,12 +91,12 @@ def EXP_vi : EXP_Real_vi<0, "EXP">;
def EXP_DONE_vi : EXP_Real_vi<1, "EXP_DONE">;
//===----------------------------------------------------------------------===//
-// GFX10+
+// GFX10
//===----------------------------------------------------------------------===//
class EXP_Real_gfx10<bit _done, string pseudo>
- : EXP_Real<_done, pseudo, SIEncodingFamily.GFX10>, EXPe {
- let AssemblerPredicate = isGFX10Plus;
+ : EXP_Real_ComprVM<_done, pseudo, SIEncodingFamily.GFX10>, EXPe_ComprVM {
+ let AssemblerPredicate = isGFX10Only;
let DecoderNamespace = "GFX10";
let done = _done;
}
@@ -91,6 +105,23 @@ def EXP_gfx10 : EXP_Real_gfx10<0, "EXP">;
def EXP_DONE_gfx10 : EXP_Real_gfx10<1, "EXP_DONE">;
//===----------------------------------------------------------------------===//
+// GFX11+
+//===----------------------------------------------------------------------===//
+
+class EXP_Real_gfx11<bit _row, bit _done, string pseudo>
+ : EXP_Real_Row<_row, _done, pseudo, SIEncodingFamily.GFX11>, EXPe_Row {
+ let AssemblerPredicate = isGFX11Plus;
+ let DecoderNamespace = "GFX11";
+ let row = _row;
+ let done = _done;
+}
+
+def EXP_gfx11 : EXP_Real_gfx11<0, 0, "EXP">;
+def EXP_DONE_gfx11 : EXP_Real_gfx11<0, 1, "EXP_DONE">;
+def EXP_ROW_gfx11 : EXP_Real_gfx11<1, 0, "EXP_ROW">;
+def EXP_ROW_DONE_gfx11 : EXP_Real_gfx11<1, 1, "EXP_ROW_DONE">;
+
+//===----------------------------------------------------------------------===//
// EXP Patterns
//===----------------------------------------------------------------------===//
@@ -103,6 +134,15 @@ class ExpPattern<ValueType vt, Instruction Inst, int done_val> : GCNPat<
ExpSrc2:$src2, ExpSrc3:$src3, timm:$vm, 0, timm:$en)
>;
+class ExpRowPattern<ValueType vt, Instruction Inst, int done_val> : GCNPat<
+ (int_amdgcn_exp_row timm:$tgt, timm:$en,
+ (vt ExpSrc0:$src0), (vt ExpSrc1:$src1),
+ (vt ExpSrc2:$src2), (vt ExpSrc3:$src3),
+ done_val, M0),
+ (Inst timm:$tgt, ExpSrc0:$src0, ExpSrc1:$src1,
+ ExpSrc2:$src2, ExpSrc3:$src3, 0, 0, timm:$en)
+>;
+
class ExpComprPattern<ValueType vt, Instruction Inst, int done_val> : GCNPat<
(int_amdgcn_exp_compr timm:$tgt, timm:$en,
(vt ExpSrc0:$src0), (vt ExpSrc1:$src1),
@@ -119,6 +159,11 @@ def : ExpPattern<i32, EXP_DONE, -1>;
def : ExpPattern<f32, EXP, 0>;
def : ExpPattern<f32, EXP_DONE, -1>;
+def : ExpRowPattern<i32, EXP_ROW, 0>;
+def : ExpRowPattern<i32, EXP_ROW_DONE, -1>;
+def : ExpRowPattern<f32, EXP_ROW, 0>;
+def : ExpRowPattern<f32, EXP_ROW_DONE, -1>;
+
def : ExpComprPattern<v2i16, EXP, 0>;
def : ExpComprPattern<v2i16, EXP_DONE, -1>;
def : ExpComprPattern<v2f16, EXP, 0>;
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index c530d3cb49f0..cb2822818549 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -12,6 +12,7 @@ def ScratchOffset : ComplexPattern<iPTR, 2, "SelectScratchOffset", [], [SDNPWant
def GlobalSAddr : ComplexPattern<iPTR, 3, "SelectGlobalSAddr", [], [SDNPWantRoot], -10>;
def ScratchSAddr : ComplexPattern<iPTR, 2, "SelectScratchSAddr", [], [SDNPWantRoot], -10>;
+def ScratchSVAddr : ComplexPattern<iPTR, 3, "SelectScratchSVAddr", [], [SDNPWantRoot], -10>;
//===----------------------------------------------------------------------===//
// FLAT classes
@@ -56,6 +57,9 @@ class FLAT_Pseudo<string opName, dag outs, dag ins,
bits<1> dlcValue = 0;
bits<1> has_sccb = 1;
bits<1> sccbValue = 0;
+ bits<1> has_sve = 0; // Scratch VGPR Enable
+ bits<1> lds = 0;
+ bits<1> sve = 0;
let SubtargetPredicate = !if(is_flat_global, HasFlatGlobalInsts,
!if(is_flat_scratch, HasFlatScratchInsts, HasFlatAddressSpace));
@@ -74,8 +78,8 @@ class FLAT_Pseudo<string opName, dag outs, dag ins,
let FlatScratch = is_flat_scratch;
}
-class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
- InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
+class FLAT_Real <bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> :
+ InstSI <ps.OutOperandList, ps.InOperandList, opName # ps.AsmOperands, []>,
Enc64 {
let isPseudo = 0;
@@ -96,6 +100,7 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
let IsAtomicNoRet = ps.IsAtomicNoRet;
let VM_CNT = ps.VM_CNT;
let LGKM_CNT = ps.LGKM_CNT;
+ let VALU = ps.VALU;
// encoding fields
bits<8> vaddr;
@@ -106,7 +111,7 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
bits<5> cpol;
// Only valid on gfx9
- bits<1> lds = 0; // XXX - What does this actually do?
+ bits<1> lds = ps.lds; // LDS DMA for global and scratch
// Segment, 00=flat, 01=scratch, 10=global, 11=reserved
bits<2> seg = !if(ps.is_flat_global, 0b10,
@@ -123,7 +128,7 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
// Only valid on GFX9+
let Inst{12-0} = offset;
- let Inst{13} = lds;
+ let Inst{13} = !if(ps.has_sve, ps.sve, lds);
let Inst{15-14} = seg;
let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glcValue);
@@ -240,6 +245,35 @@ multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> {
}
}
+class FLAT_Global_Load_LDS_Pseudo <string opName, bit EnableSaddr = 0> : FLAT_Pseudo<
+ opName,
+ (outs ),
+ !con(
+ !if(EnableSaddr, (ins SReg_64:$saddr, VGPR_32:$vaddr), (ins VReg_64:$vaddr)),
+ (ins flat_offset:$offset, CPol_0:$cpol)),
+ " $vaddr"#!if(EnableSaddr, ", $saddr", ", off")#"$offset$cpol"> {
+ let LGKM_CNT = 1;
+ let is_flat_global = 1;
+ let lds = 1;
+ let has_data = 0;
+ let has_vdst = 0;
+ let mayLoad = 1;
+ let mayStore = 1;
+ let has_saddr = 1;
+ let enabled_saddr = EnableSaddr;
+ let VALU = 1;
+ let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
+ let Uses = [M0, EXEC];
+ let SchedRW = [WriteVMEM, WriteLDS];
+}
+
+multiclass FLAT_Global_Load_LDS_Pseudo<string opName> {
+ def "" : FLAT_Global_Load_LDS_Pseudo<opName>,
+ GlobalSaddrTable<0, opName>;
+ def _SADDR : FLAT_Global_Load_LDS_Pseudo<opName, 1>,
+ GlobalSaddrTable<1, opName>;
+}
+
class FLAT_Global_Store_AddTid_Pseudo <string opName, RegisterClass vdataClass,
bit EnableSaddr = 0> : FLAT_Pseudo<
opName,
@@ -273,16 +307,19 @@ class FlatScratchInst <string sv_op, string mode> {
class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass,
bit HasTiedOutput = 0,
bit EnableSaddr = 0,
- bit EnableVaddr = !not(EnableSaddr)>
+ bit EnableSVE = 0,
+ bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr))>
: FLAT_Pseudo<
opName,
(outs getLdStRegisterOperand<regClass>.ret:$vdst),
!con(
- !if(EnableSaddr,
- (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset),
- !if(EnableVaddr,
- (ins VGPR_32:$vaddr, flat_offset:$offset),
- (ins flat_offset:$offset))),
+ !if(EnableSVE,
+ (ins VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset),
+ !if(EnableSaddr,
+ (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset),
+ !if(EnableVaddr,
+ (ins VGPR_32:$vaddr, flat_offset:$offset),
+ (ins flat_offset:$offset)))),
!if(HasTiedOutput, (ins CPol:$cpol, getLdStRegisterOperand<regClass>.ret:$vdst_in),
(ins CPol_0:$cpol))),
" $vdst, "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
@@ -291,7 +328,9 @@ class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass,
let has_saddr = 1;
let enabled_saddr = EnableSaddr;
let has_vaddr = EnableVaddr;
- let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST"));
+ let has_sve = EnableSVE;
+ let sve = EnableVaddr;
+ let PseudoInstr = opName#!if(EnableSVE, "_SVS", !if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST")));
let maybeAtomic = 1;
let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
@@ -299,15 +338,18 @@ class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass,
}
class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit EnableSaddr = 0,
- bit EnableVaddr = !not(EnableSaddr),
+ bit EnableSVE = 0,
+ bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr)),
RegisterOperand vdata_op = getLdStRegisterOperand<vdataClass>.ret> : FLAT_Pseudo<
opName,
(outs),
- !if(EnableSaddr,
- (ins vdata_op:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol),
- !if(EnableVaddr,
- (ins vdata_op:$vdata, VGPR_32:$vaddr, flat_offset:$offset, CPol_0:$cpol),
- (ins vdata_op:$vdata, flat_offset:$offset, CPol_0:$cpol))),
+ !if(EnableSVE,
+ (ins vdata_op:$vdata, VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol),
+ !if(EnableSaddr,
+ (ins vdata_op:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol),
+ !if(EnableVaddr,
+ (ins vdata_op:$vdata, VGPR_32:$vaddr, flat_offset:$offset, CPol_0:$cpol),
+ (ins vdata_op:$vdata, flat_offset:$offset, CPol_0:$cpol)))),
" "#!if(EnableVaddr, "$vaddr", "off")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
let mayLoad = 0;
let mayStore = 1;
@@ -315,7 +357,9 @@ class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit En
let has_saddr = 1;
let enabled_saddr = EnableSaddr;
let has_vaddr = EnableVaddr;
- let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST"));
+ let has_sve = EnableSVE;
+ let sve = EnableVaddr;
+ let PseudoInstr = opName#!if(EnableSVE, "_SVS", !if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST")));
let maybeAtomic = 1;
}
@@ -326,8 +370,12 @@ multiclass FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass, bit H
def _SADDR : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 1>,
FlatScratchInst<opName, "SS">;
+ let SubtargetPredicate = HasFlatScratchSVSMode in
+ def _SVS : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 1, 1>,
+ FlatScratchInst<opName, "SVS">;
+
let SubtargetPredicate = HasFlatScratchSTMode in
- def _ST : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 0, 0>,
+ def _ST : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 0, 0, 0>,
FlatScratchInst<opName, "ST">;
}
}
@@ -339,12 +387,59 @@ multiclass FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> {
def _SADDR : FLAT_Scratch_Store_Pseudo<opName, regClass, 1>,
FlatScratchInst<opName, "SS">;
+ let SubtargetPredicate = HasFlatScratchSVSMode in
+ def _SVS : FLAT_Scratch_Store_Pseudo<opName, regClass, 1, 1>,
+ FlatScratchInst<opName, "SVS">;
+
let SubtargetPredicate = HasFlatScratchSTMode in
- def _ST : FLAT_Scratch_Store_Pseudo<opName, regClass, 0, 0>,
+ def _ST : FLAT_Scratch_Store_Pseudo<opName, regClass, 0, 0, 0>,
FlatScratchInst<opName, "ST">;
}
}
+class FLAT_Scratch_Load_LDS_Pseudo <string opName, bit EnableSaddr = 0,
+ bit EnableSVE = 0,
+ bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr))> : FLAT_Pseudo<
+ opName,
+ (outs ),
+ !if(EnableSVE,
+ (ins VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol:$cpol),
+ !if(EnableSaddr,
+ (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol:$cpol),
+ !if(EnableVaddr,
+ (ins VGPR_32:$vaddr, flat_offset:$offset, CPol:$cpol),
+ (ins flat_offset:$offset, CPol:$cpol)))),
+ " "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
+
+ let LGKM_CNT = 1;
+ let is_flat_scratch = 1;
+ let lds = 1;
+ let has_data = 0;
+ let has_vdst = 0;
+ let mayLoad = 1;
+ let mayStore = 1;
+ let has_saddr = 1;
+ let enabled_saddr = EnableSaddr;
+ let has_vaddr = EnableVaddr;
+ let has_sve = EnableSVE;
+ let sve = EnableVaddr;
+ let VALU = 1;
+ let PseudoInstr = opName#!if(EnableSVE, "_SVS", !if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST")));
+ let Uses = [M0, EXEC];
+ let SchedRW = [WriteVMEM, WriteLDS];
+}
+
+multiclass FLAT_Scratch_Load_LDS_Pseudo<string opName> {
+ def "" : FLAT_Scratch_Load_LDS_Pseudo<opName>,
+ FlatScratchInst<opName, "SV">;
+ def _SADDR : FLAT_Scratch_Load_LDS_Pseudo<opName, 1>,
+ FlatScratchInst<opName, "SS">;
+ def _SVS : FLAT_Scratch_Load_LDS_Pseudo<opName, 1, 1>,
+ FlatScratchInst<opName, "SVS">;
+ def _ST : FLAT_Scratch_Load_LDS_Pseudo<opName, 0, 0, 0>,
+ FlatScratchInst<opName, "ST">;
+}
+
class FLAT_AtomicNoRet_Pseudo<string opName, dag outs, dag ins,
string asm, list<dag> pattern = []> :
FLAT_Pseudo<opName, outs, ins, asm, pattern> {
@@ -375,7 +470,6 @@ multiclass FLAT_Atomic_Pseudo<
string opName,
RegisterClass vdst_rc,
ValueType vt,
- SDPatternOperator atomic = null_frag,
ValueType data_vt = vt,
RegisterClass data_rc = vdst_rc,
bit isFP = isFloatType<data_vt>.ret,
@@ -394,11 +488,9 @@ multiclass FLAT_Atomic_Pseudo<
def _RTN : FLAT_AtomicRet_Pseudo <opName,
(outs getLdStRegisterOperand<vdst_rc>.ret:$vdst),
(ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol),
- " $vdst, $vaddr, $vdata$offset$cpol",
- [(set vt:$vdst,
- (atomic (FlatOffset i64:$vaddr, i16:$offset), data_vt:$vdata))]>,
- GlobalSaddrTable<0, opName#"_rtn">,
- AtomicNoRet <opName, 1>{
+ " $vdst, $vaddr, $vdata$offset$cpol">,
+ GlobalSaddrTable<0, opName#"_rtn">,
+ AtomicNoRet <opName, 1> {
let FPAtomic = isFP;
let AddedComplexity = -1; // Prefer global atomics if available
}
@@ -441,7 +533,6 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN<
string opName,
RegisterClass vdst_rc,
ValueType vt,
- SDPatternOperator atomic = null_frag,
ValueType data_vt = vt,
RegisterClass data_rc = vdst_rc,
bit isFP = isFloatType<data_vt>.ret,
@@ -451,11 +542,9 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN<
def _RTN : FLAT_AtomicRet_Pseudo <opName,
(outs vdst_op:$vdst),
(ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol),
- " $vdst, $vaddr, $vdata, off$offset$cpol",
- [(set vt:$vdst,
- (atomic (GlobalOffset i64:$vaddr, i16:$offset), data_vt:$vdata))]>,
- GlobalSaddrTable<0, opName#"_rtn">,
- AtomicNoRet <opName, 1> {
+ " $vdst, $vaddr, $vdata, off$offset$cpol">,
+ GlobalSaddrTable<0, opName#"_rtn">,
+ AtomicNoRet <opName, 1> {
let has_saddr = 1;
let FPAtomic = isFP;
}
@@ -477,12 +566,11 @@ multiclass FLAT_Global_Atomic_Pseudo<
string opName,
RegisterClass vdst_rc,
ValueType vt,
- SDPatternOperator atomic_rtn = null_frag,
ValueType data_vt = vt,
RegisterClass data_rc = vdst_rc> {
let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
defm "" : FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, data_vt, data_rc>;
- defm "" : FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, atomic_rtn, data_vt, data_rc>;
+ defm "" : FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, data_vt, data_rc>;
}
}
@@ -519,99 +607,88 @@ def FLAT_STORE_SHORT_D16_HI : FLAT_Store_Pseudo <"flat_store_short_d16_hi", VGPR
}
defm FLAT_ATOMIC_CMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap",
- VGPR_32, i32, AMDGPUatomic_cmp_swap_flat_32,
- v2i32, VReg_64>;
+ VGPR_32, i32, v2i32, VReg_64>;
defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap_x2",
- VReg_64, i64, AMDGPUatomic_cmp_swap_flat_64,
- v2i64, VReg_128>;
+ VReg_64, i64, v2i64, VReg_128>;
defm FLAT_ATOMIC_SWAP : FLAT_Atomic_Pseudo <"flat_atomic_swap",
- VGPR_32, i32, atomic_swap_flat_32>;
+ VGPR_32, i32>;
defm FLAT_ATOMIC_SWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_swap_x2",
- VReg_64, i64, atomic_swap_flat_64>;
+ VReg_64, i64>;
defm FLAT_ATOMIC_ADD : FLAT_Atomic_Pseudo <"flat_atomic_add",
- VGPR_32, i32, atomic_load_add_flat_32>;
+ VGPR_32, i32>;
defm FLAT_ATOMIC_SUB : FLAT_Atomic_Pseudo <"flat_atomic_sub",
- VGPR_32, i32, atomic_load_sub_flat_32>;
+ VGPR_32, i32>;
defm FLAT_ATOMIC_SMIN : FLAT_Atomic_Pseudo <"flat_atomic_smin",
- VGPR_32, i32, atomic_load_min_flat_32>;
+ VGPR_32, i32>;
defm FLAT_ATOMIC_UMIN : FLAT_Atomic_Pseudo <"flat_atomic_umin",
- VGPR_32, i32, atomic_load_umin_flat_32>;
+ VGPR_32, i32>;
defm FLAT_ATOMIC_SMAX : FLAT_Atomic_Pseudo <"flat_atomic_smax",
- VGPR_32, i32, atomic_load_max_flat_32>;
+ VGPR_32, i32>;
defm FLAT_ATOMIC_UMAX : FLAT_Atomic_Pseudo <"flat_atomic_umax",
- VGPR_32, i32, atomic_load_umax_flat_32>;
+ VGPR_32, i32>;
defm FLAT_ATOMIC_AND : FLAT_Atomic_Pseudo <"flat_atomic_and",
- VGPR_32, i32, atomic_load_and_flat_32>;
+ VGPR_32, i32>;
defm FLAT_ATOMIC_OR : FLAT_Atomic_Pseudo <"flat_atomic_or",
- VGPR_32, i32, atomic_load_or_flat_32>;
+ VGPR_32, i32>;
defm FLAT_ATOMIC_XOR : FLAT_Atomic_Pseudo <"flat_atomic_xor",
- VGPR_32, i32, atomic_load_xor_flat_32>;
+ VGPR_32, i32>;
defm FLAT_ATOMIC_INC : FLAT_Atomic_Pseudo <"flat_atomic_inc",
- VGPR_32, i32, atomic_inc_flat_32>;
+ VGPR_32, i32>;
defm FLAT_ATOMIC_DEC : FLAT_Atomic_Pseudo <"flat_atomic_dec",
- VGPR_32, i32, atomic_dec_flat_32>;
+ VGPR_32, i32>;
defm FLAT_ATOMIC_ADD_X2 : FLAT_Atomic_Pseudo <"flat_atomic_add_x2",
- VReg_64, i64, atomic_load_add_flat_64>;
+ VReg_64, i64>;
defm FLAT_ATOMIC_SUB_X2 : FLAT_Atomic_Pseudo <"flat_atomic_sub_x2",
- VReg_64, i64, atomic_load_sub_flat_64>;
+ VReg_64, i64>;
defm FLAT_ATOMIC_SMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smin_x2",
- VReg_64, i64, atomic_load_min_flat_64>;
+ VReg_64, i64>;
defm FLAT_ATOMIC_UMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umin_x2",
- VReg_64, i64, atomic_load_umin_flat_64>;
+ VReg_64, i64>;
defm FLAT_ATOMIC_SMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smax_x2",
- VReg_64, i64, atomic_load_max_flat_64>;
+ VReg_64, i64>;
defm FLAT_ATOMIC_UMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umax_x2",
- VReg_64, i64, atomic_load_umax_flat_64>;
+ VReg_64, i64>;
defm FLAT_ATOMIC_AND_X2 : FLAT_Atomic_Pseudo <"flat_atomic_and_x2",
- VReg_64, i64, atomic_load_and_flat_64>;
+ VReg_64, i64>;
defm FLAT_ATOMIC_OR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_or_x2",
- VReg_64, i64, atomic_load_or_flat_64>;
+ VReg_64, i64>;
defm FLAT_ATOMIC_XOR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_xor_x2",
- VReg_64, i64, atomic_load_xor_flat_64>;
+ VReg_64, i64>;
defm FLAT_ATOMIC_INC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2",
- VReg_64, i64, atomic_inc_flat_64>;
+ VReg_64, i64>;
defm FLAT_ATOMIC_DEC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2",
- VReg_64, i64, atomic_dec_flat_64>;
+ VReg_64, i64>;
// GFX7-, GFX10-only flat instructions.
let SubtargetPredicate = isGFX7GFX10 in {
-defm FLAT_ATOMIC_FCMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap",
- VGPR_32, f32, null_frag, v2f32, VReg_64>;
-
defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap_x2",
- VReg_64, f64, null_frag, v2f64, VReg_128>;
-
-defm FLAT_ATOMIC_FMIN : FLAT_Atomic_Pseudo <"flat_atomic_fmin",
- VGPR_32, f32>;
-
-defm FLAT_ATOMIC_FMAX : FLAT_Atomic_Pseudo <"flat_atomic_fmax",
- VGPR_32, f32>;
+ VReg_64, f64, v2f64, VReg_128>;
defm FLAT_ATOMIC_FMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmin_x2",
VReg_64, f64>;
@@ -622,14 +699,39 @@ defm FLAT_ATOMIC_FMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2",
} // End SubtargetPredicate = isGFX7GFX10
let SubtargetPredicate = isGFX90APlus in {
- defm FLAT_ATOMIC_ADD_F64 : FLAT_Atomic_Pseudo<"flat_atomic_add_f64", VReg_64, f64, int_amdgcn_flat_atomic_fadd>;
- defm FLAT_ATOMIC_MIN_F64 : FLAT_Atomic_Pseudo<"flat_atomic_min_f64", VReg_64, f64, int_amdgcn_flat_atomic_fmin>;
- defm FLAT_ATOMIC_MAX_F64 : FLAT_Atomic_Pseudo<"flat_atomic_max_f64", VReg_64, f64, int_amdgcn_flat_atomic_fmax>;
- defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_add_f64", VReg_64, f64, int_amdgcn_global_atomic_fadd>;
- defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_min_f64", VReg_64, f64, int_amdgcn_global_atomic_fmin>;
- defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_max_f64", VReg_64, f64, int_amdgcn_global_atomic_fmax>;
+ defm FLAT_ATOMIC_ADD_F64 : FLAT_Atomic_Pseudo<"flat_atomic_add_f64", VReg_64, f64>;
+ defm FLAT_ATOMIC_MIN_F64 : FLAT_Atomic_Pseudo<"flat_atomic_min_f64", VReg_64, f64>;
+ defm FLAT_ATOMIC_MAX_F64 : FLAT_Atomic_Pseudo<"flat_atomic_max_f64", VReg_64, f64>;
+ defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_add_f64", VReg_64, f64>;
+ defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_min_f64", VReg_64, f64>;
+ defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_max_f64", VReg_64, f64>;
} // End SubtargetPredicate = isGFX90APlus
+let SubtargetPredicate = isGFX940Plus in {
+ defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_f16", VGPR_32, v2f16>;
+ defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_bf16", VGPR_32, v2f16>;
+ defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Atomic_Pseudo<"global_atomic_pk_add_bf16", VGPR_32, v2f16>;
+} // End SubtargetPredicate = isGFX940Plus
+
+// GFX7-, GFX10-, GFX11-only flat instructions.
+let SubtargetPredicate = isGFX7GFX10GFX11 in {
+
+defm FLAT_ATOMIC_FCMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap",
+ VGPR_32, f32, v2f32, VReg_64>;
+
+defm FLAT_ATOMIC_FMIN : FLAT_Atomic_Pseudo <"flat_atomic_fmin",
+ VGPR_32, f32>;
+
+defm FLAT_ATOMIC_FMAX : FLAT_Atomic_Pseudo <"flat_atomic_fmax",
+ VGPR_32, f32>;
+
+} // End SubtargetPredicate = isGFX7GFX10GFX11
+
+// GFX940-, GFX11-only flat instructions.
+let SubtargetPredicate = isGFX940GFX11Plus in {
+ defm FLAT_ATOMIC_ADD_F32 : FLAT_Atomic_Pseudo<"flat_atomic_add_f32", VGPR_32, f32>;
+} // End SubtargetPredicate = isGFX940GFX11Plus
+
defm GLOBAL_LOAD_UBYTE : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>;
defm GLOBAL_LOAD_SBYTE : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>;
defm GLOBAL_LOAD_USHORT : FLAT_Global_Load_Pseudo <"global_load_ushort", VGPR_32>;
@@ -662,88 +764,93 @@ defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d
let is_flat_global = 1 in {
defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap",
- VGPR_32, i32, AMDGPUatomic_cmp_swap_global_32,
- v2i32, VReg_64>;
+ VGPR_32, i32, v2i32, VReg_64>;
defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap_x2",
- VReg_64, i64, AMDGPUatomic_cmp_swap_global_64,
- v2i64, VReg_128>;
+ VReg_64, i64, v2i64, VReg_128>;
defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap",
- VGPR_32, i32, atomic_swap_global_32>;
+ VGPR_32, i32>;
defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_swap_x2",
- VReg_64, i64, atomic_swap_global_64>;
+ VReg_64, i64>;
defm GLOBAL_ATOMIC_ADD : FLAT_Global_Atomic_Pseudo <"global_atomic_add",
- VGPR_32, i32, atomic_load_add_global_32>;
+ VGPR_32, i32>;
defm GLOBAL_ATOMIC_SUB : FLAT_Global_Atomic_Pseudo <"global_atomic_sub",
- VGPR_32, i32, atomic_load_sub_global_32>;
+ VGPR_32, i32>;
defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_smin",
- VGPR_32, i32, atomic_load_min_global_32>;
+ VGPR_32, i32>;
defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_umin",
- VGPR_32, i32, atomic_load_umin_global_32>;
+ VGPR_32, i32>;
defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_smax",
- VGPR_32, i32, atomic_load_max_global_32>;
+ VGPR_32, i32>;
defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_umax",
- VGPR_32, i32, atomic_load_umax_global_32>;
+ VGPR_32, i32>;
defm GLOBAL_ATOMIC_AND : FLAT_Global_Atomic_Pseudo <"global_atomic_and",
- VGPR_32, i32, atomic_load_and_global_32>;
+ VGPR_32, i32>;
defm GLOBAL_ATOMIC_OR : FLAT_Global_Atomic_Pseudo <"global_atomic_or",
- VGPR_32, i32, atomic_load_or_global_32>;
+ VGPR_32, i32>;
defm GLOBAL_ATOMIC_XOR : FLAT_Global_Atomic_Pseudo <"global_atomic_xor",
- VGPR_32, i32, atomic_load_xor_global_32>;
+ VGPR_32, i32>;
defm GLOBAL_ATOMIC_INC : FLAT_Global_Atomic_Pseudo <"global_atomic_inc",
- VGPR_32, i32, atomic_inc_global_32>;
+ VGPR_32, i32>;
defm GLOBAL_ATOMIC_DEC : FLAT_Global_Atomic_Pseudo <"global_atomic_dec",
- VGPR_32, i32, atomic_dec_global_32>;
+ VGPR_32, i32>;
defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_add_x2",
- VReg_64, i64, atomic_load_add_global_64>;
+ VReg_64, i64>;
defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_sub_x2",
- VReg_64, i64, atomic_load_sub_global_64>;
+ VReg_64, i64>;
defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smin_x2",
- VReg_64, i64, atomic_load_min_global_64>;
+ VReg_64, i64>;
defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umin_x2",
- VReg_64, i64, atomic_load_umin_global_64>;
+ VReg_64, i64>;
defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smax_x2",
- VReg_64, i64, atomic_load_max_global_64>;
+ VReg_64, i64>;
defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umax_x2",
- VReg_64, i64, atomic_load_umax_global_64>;
+ VReg_64, i64>;
defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_and_x2",
- VReg_64, i64, atomic_load_and_global_64>;
+ VReg_64, i64>;
defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_or_x2",
- VReg_64, i64, atomic_load_or_global_64>;
+ VReg_64, i64>;
defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_xor_x2",
- VReg_64, i64, atomic_load_xor_global_64>;
+ VReg_64, i64>;
defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_inc_x2",
- VReg_64, i64, atomic_inc_global_64>;
+ VReg_64, i64>;
defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2",
- VReg_64, i64, atomic_dec_global_64>;
+ VReg_64, i64>;
let SubtargetPredicate = HasGFX10_BEncoding in
defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo_RTN <"global_atomic_csub",
- VGPR_32, i32, int_amdgcn_global_atomic_csub>;
+ VGPR_32, i32>;
+
+defm GLOBAL_LOAD_LDS_UBYTE : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_ubyte">;
+defm GLOBAL_LOAD_LDS_SBYTE : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sbyte">;
+defm GLOBAL_LOAD_LDS_USHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_ushort">;
+defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sshort">;
+defm GLOBAL_LOAD_LDS_DWORD : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dword">;
+
} // End is_flat_global = 1
@@ -775,41 +882,46 @@ defm SCRATCH_STORE_DWORDX4 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx4",
defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_byte_d16_hi", VGPR_32>;
defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_short_d16_hi", VGPR_32>;
+defm SCRATCH_LOAD_LDS_UBYTE : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_ubyte">;
+defm SCRATCH_LOAD_LDS_SBYTE : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_sbyte">;
+defm SCRATCH_LOAD_LDS_USHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_ushort">;
+defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_sshort">;
+defm SCRATCH_LOAD_LDS_DWORD : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_dword">;
+
} // End SubtargetPredicate = HasFlatScratchInsts
let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in {
defm GLOBAL_ATOMIC_FCMPSWAP :
- FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32, null_frag, v2f32, VReg_64>;
+ FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32, v2f32, VReg_64>;
defm GLOBAL_ATOMIC_FMIN :
- FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32, int_amdgcn_global_atomic_fmin>;
+ FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32>;
defm GLOBAL_ATOMIC_FMAX :
- FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32, int_amdgcn_global_atomic_fmax>;
+ FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32>;
defm GLOBAL_ATOMIC_FCMPSWAP_X2 :
- FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64, null_frag, v2f64, VReg_128>;
+ FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64, v2f64, VReg_128>;
defm GLOBAL_ATOMIC_FMIN_X2 :
- FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64, int_amdgcn_global_atomic_fmin>;
+ FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64>;
defm GLOBAL_ATOMIC_FMAX_X2 :
- FLAT_Global_Atomic_Pseudo<"global_atomic_fmax_x2", VReg_64, f64, int_amdgcn_global_atomic_fmax>;
+ FLAT_Global_Atomic_Pseudo<"global_atomic_fmax_x2", VReg_64, f64>;
} // End SubtargetPredicate = isGFX10Plus, is_flat_global = 1
let is_flat_global = 1 in {
-let OtherPredicates = [HasAtomicFaddInsts] in {
+let OtherPredicates = [HasAtomicFaddNoRtnInsts] in
defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN <
"global_atomic_add_f32", VGPR_32, f32
>;
+let OtherPredicates = [HasAtomicPkFaddNoRtnInsts] in
defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN <
"global_atomic_pk_add_f16", VGPR_32, v2f16
>;
-} // End OtherPredicates = [HasAtomicFaddInsts]
-
-let OtherPredicates = [isGFX90APlus] in {
+let OtherPredicates = [HasAtomicFaddRtnInsts] in
defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_RTN <
- "global_atomic_add_f32", VGPR_32, f32, int_amdgcn_global_atomic_fadd
+ "global_atomic_add_f32", VGPR_32, f32
>;
+let OtherPredicates = [isGFX90APlus] in
defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_RTN <
- "global_atomic_pk_add_f16", VGPR_32, v2f16, int_amdgcn_global_atomic_fadd
+ "global_atomic_pk_add_f16", VGPR_32, v2f16
>;
-} // End OtherPredicates = [isGFX90APlus]
} // End is_flat_global = 1
//===----------------------------------------------------------------------===//
@@ -896,24 +1008,47 @@ class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node,
(inst $vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)
>;
-class FlatAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
- ValueType data_vt = vt> : GCNPat <
- (vt (node (FlatOffset i64:$vaddr, i16:$offset), data_vt:$data)),
- (inst $vaddr, $data, $offset)
->;
-
class FlatAtomicPatNoRtn <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
(node (FlatOffset i64:$vaddr, i16:$offset), vt:$data),
(inst VReg_64:$vaddr, getVregSrcForVT<vt>.ret:$data, $offset)
>;
+multiclass FlatAtomicPat <string inst, string node, ValueType vt,
+ ValueType data_vt = vt> {
+ defvar rtnNode = !cast<PatFrags>(node#"_ret_"#vt.Size);
+ defvar noRtnNode = !cast<PatFrags>(node#"_noret_"#vt.Size);
+
+ def : GCNPat <(vt (rtnNode (FlatOffset i64:$vaddr, i16:$offset), data_vt:$data)),
+ (!cast<FLAT_Pseudo>(inst#"_RTN") VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>;
+
+ def : GCNPat <(vt (noRtnNode (FlatOffset i64:$vaddr, i16:$offset), data_vt:$data)),
+ (!cast<FLAT_Pseudo>(inst) VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>;
+}
+
+multiclass FlatSignedAtomicPat <string inst, string node, ValueType vt,
+ ValueType data_vt = vt, bit isIntr = 0> {
+ defvar rtnNode = !cast<PatFrags>(node # "_ret" # !if(isIntr, "", "_" # vt.Size));
+ defvar noRtnNode = !cast<PatFrags>(node # "_noret" # !if(isIntr, "", "_" # vt.Size));
+
+ def : GCNPat <(vt (rtnNode (GlobalOffset i64:$vaddr, i16:$offset), data_vt:$data)),
+ (!cast<FLAT_Pseudo>(inst#"_RTN") VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>;
+
+ def : GCNPat <(vt (noRtnNode (GlobalOffset i64:$vaddr, i16:$offset), data_vt:$data)),
+ (!cast<FLAT_Pseudo>(inst) VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>;
+}
+
+multiclass FlatSignedAtomicIntrPat <string inst, string node, ValueType vt,
+ ValueType data_vt = vt> {
+ defm : FlatSignedAtomicPat<inst, node, vt, data_vt, /* isIntr */ 1>;
+}
+
class FlatSignedAtomicPatNoRtn <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
(node (GlobalOffset i64:$vaddr, i16:$offset), vt:$data),
(inst VReg_64:$vaddr, getVregSrcForVT<vt>.ret:$data, $offset)
>;
-class FlatSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
- ValueType data_vt = vt> : GCNPat <
+class FlatSignedAtomicPatRtn <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
+ ValueType data_vt = vt> : GCNPat <
(vt (node (GlobalOffset i64:$vaddr, i16:$offset), data_vt:$data)),
(inst VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)
>;
@@ -949,8 +1084,28 @@ class ScratchStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
(inst getVregSrcForVT<vt>.ret:$data, $saddr, $offset)
>;
+class ScratchLoadSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+ (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i16:$offset))),
+ (inst $vaddr, $saddr, $offset, 0)
+>;
+
+class ScratchStoreSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
+ ValueType vt> : GCNPat <
+ (node vt:$data, (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i16:$offset)),
+ (inst getVregSrcForVT<vt>.ret:$data, $vaddr, $saddr, $offset)
+>;
+
+class ScratchLoadSVaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+ (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i16:$offset), vt:$in)),
+ (inst $vaddr, $saddr, $offset, 0, $in)
+>;
+
let OtherPredicates = [HasFlatAddressSpace] in {
+def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i32>;
+def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i16>;
+def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i32>;
+def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i16>;
def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i32>;
def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i32>;
def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>;
@@ -986,44 +1141,52 @@ def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, vt>;
def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, vt>;
}
-def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_store_flat_32, i32>;
-def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_store_flat_64, i64>;
-
-def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_load_add_global_32, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_load_sub_global_32, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_INC_RTN, atomic_inc_global_32, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_DEC_RTN, atomic_dec_global_32, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_load_and_global_32, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_load_max_global_32, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_load_umax_global_32, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_load_min_global_32, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_load_umin_global_32, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_load_or_global_32, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global_32, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global_32, i32, v2i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_load_xor_global_32, i32>;
-
-def : FlatAtomicPat <FLAT_ATOMIC_ADD_X2_RTN, atomic_load_add_global_64, i64>;
-def : FlatAtomicPat <FLAT_ATOMIC_SUB_X2_RTN, atomic_load_sub_global_64, i64>;
-def : FlatAtomicPat <FLAT_ATOMIC_INC_X2_RTN, atomic_inc_global_64, i64>;
-def : FlatAtomicPat <FLAT_ATOMIC_DEC_X2_RTN, atomic_dec_global_64, i64>;
-def : FlatAtomicPat <FLAT_ATOMIC_AND_X2_RTN, atomic_load_and_global_64, i64>;
-def : FlatAtomicPat <FLAT_ATOMIC_SMAX_X2_RTN, atomic_load_max_global_64, i64>;
-def : FlatAtomicPat <FLAT_ATOMIC_UMAX_X2_RTN, atomic_load_umax_global_64, i64>;
-def : FlatAtomicPat <FLAT_ATOMIC_SMIN_X2_RTN, atomic_load_min_global_64, i64>;
-def : FlatAtomicPat <FLAT_ATOMIC_UMIN_X2_RTN, atomic_load_umin_global_64, i64>;
-def : FlatAtomicPat <FLAT_ATOMIC_OR_X2_RTN, atomic_load_or_global_64, i64>;
-def : FlatAtomicPat <FLAT_ATOMIC_SWAP_X2_RTN, atomic_swap_global_64, i64>;
-def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global_64, i64, v2i64>;
-def : FlatAtomicPat <FLAT_ATOMIC_XOR_X2_RTN, atomic_load_xor_global_64, i64>;
+def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_store_32_flat, i32>;
+def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_store_64_flat, i64>;
+def : FlatStoreAtomicPat <FLAT_STORE_BYTE, atomic_store_8_flat, i32>;
+def : FlatStoreAtomicPat <FLAT_STORE_BYTE, atomic_store_8_flat, i16>;
+def : FlatStoreAtomicPat <FLAT_STORE_SHORT, atomic_store_16_flat, i32>;
+def : FlatStoreAtomicPat <FLAT_STORE_SHORT, atomic_store_16_flat, i16>;
+
+foreach as = [ "flat", "global" ] in {
+defm : FlatAtomicPat <"FLAT_ATOMIC_ADD", "atomic_load_add_"#as, i32>;
+defm : FlatAtomicPat <"FLAT_ATOMIC_SUB", "atomic_load_sub_"#as, i32>;
+defm : FlatAtomicPat <"FLAT_ATOMIC_INC", "atomic_inc_"#as, i32>;
+defm : FlatAtomicPat <"FLAT_ATOMIC_DEC", "atomic_dec_"#as, i32>;
+defm : FlatAtomicPat <"FLAT_ATOMIC_AND", "atomic_load_and_"#as, i32>;
+defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX", "atomic_load_max_"#as, i32>;
+defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX", "atomic_load_umax_"#as, i32>;
+defm : FlatAtomicPat <"FLAT_ATOMIC_SMIN", "atomic_load_min_"#as, i32>;
+defm : FlatAtomicPat <"FLAT_ATOMIC_UMIN", "atomic_load_umin_"#as, i32>;
+defm : FlatAtomicPat <"FLAT_ATOMIC_OR", "atomic_load_or_"#as, i32>;
+defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP", "atomic_swap_"#as, i32>;
+defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP", "AMDGPUatomic_cmp_swap_"#as, i32, v2i32>;
+defm : FlatAtomicPat <"FLAT_ATOMIC_XOR", "atomic_load_xor_"#as, i32>;
+
+defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_X2", "atomic_load_add_"#as, i64>;
+defm : FlatAtomicPat <"FLAT_ATOMIC_SUB_X2", "atomic_load_sub_"#as, i64>;
+defm : FlatAtomicPat <"FLAT_ATOMIC_INC_X2", "atomic_inc_"#as, i64>;
+defm : FlatAtomicPat <"FLAT_ATOMIC_DEC_X2", "atomic_dec_"#as, i64>;
+defm : FlatAtomicPat <"FLAT_ATOMIC_AND_X2", "atomic_load_and_"#as, i64>;
+defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX_X2", "atomic_load_max_"#as, i64>;
+defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX_X2", "atomic_load_umax_"#as, i64>;
+defm : FlatAtomicPat <"FLAT_ATOMIC_SMIN_X2", "atomic_load_min_"#as, i64>;
+defm : FlatAtomicPat <"FLAT_ATOMIC_UMIN_X2", "atomic_load_umin_"#as, i64>;
+defm : FlatAtomicPat <"FLAT_ATOMIC_OR_X2", "atomic_load_or_"#as, i64>;
+defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP_X2", "atomic_swap_"#as, i64>;
+defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP_X2", "AMDGPUatomic_cmp_swap_"#as, i64, v2i64>;
+defm : FlatAtomicPat <"FLAT_ATOMIC_XOR_X2", "atomic_load_xor_"#as, i64>;
+} // end foreach as
def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>;
-let OtherPredicates = [D16PreservesUnusedBits] in {
+let OtherPredicates = [HasD16LoadStore] in {
def : FlatStorePat <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>;
def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>;
+}
+let OtherPredicates = [D16PreservesUnusedBits] in {
def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2i16>;
def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2f16>;
def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2i16>;
@@ -1084,9 +1247,9 @@ multiclass GlobalFLATAtomicStorePats<FLAT_Pseudo inst, SDPatternOperator node, V
}
}
-multiclass GlobalFLATAtomicPats<string nortn_inst_name, SDPatternOperator node,
- ValueType vt, ValueType data_vt = vt> {
- def : FlatSignedAtomicPat <!cast<FLAT_Pseudo>(nortn_inst_name#"_RTN"), node, vt, data_vt> {
+multiclass GlobalFLATAtomicPatsRtn<string nortn_inst_name, SDPatternOperator node,
+ ValueType vt, ValueType data_vt = vt> {
+ def : FlatSignedAtomicPatRtn <!cast<FLAT_Pseudo>(nortn_inst_name#"_RTN"), node, vt, data_vt> {
let AddedComplexity = 10;
}
@@ -1095,6 +1258,26 @@ multiclass GlobalFLATAtomicPats<string nortn_inst_name, SDPatternOperator node,
}
}
+multiclass GlobalFLATAtomicPats<string inst, string node, ValueType vt,
+ ValueType data_vt = vt, bit isIntr = 0> {
+ defvar rtnNode = !cast<PatFrags>(node # "_ret" # !if(isIntr, "", "_" # vt.Size));
+ defvar noRtnNode = !cast<PatFrags>(node # "_noret" # !if(isIntr, "", "_" # vt.Size));
+
+ let AddedComplexity = 10 in {
+ defm : FlatSignedAtomicPat <inst, node, vt, data_vt, isIntr>;
+ }
+
+ let AddedComplexity = 11 in {
+ def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR"), noRtnNode, vt, data_vt>;
+ def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR_RTN"), rtnNode, vt, data_vt>;
+ }
+}
+
+multiclass GlobalFLATAtomicIntrPats<string inst, string node, ValueType vt,
+ ValueType data_vt = vt> {
+ defm : GlobalFLATAtomicPats<inst, node, vt, data_vt, /* isIntr */ 1>;
+}
+
multiclass GlobalFLATNoRtnAtomicPats<FLAT_Pseudo inst, SDPatternOperator node,
ValueType vt> {
def : FlatSignedAtomicPatNoRtn <inst, node, vt> {
@@ -1114,6 +1297,11 @@ multiclass ScratchFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueTy
def : ScratchLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
let AddedComplexity = 26;
}
+
+ def : ScratchLoadSVaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> {
+ let SubtargetPredicate = HasFlatScratchSVSMode;
+ let AddedComplexity = 27;
+ }
}
multiclass ScratchFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node,
@@ -1125,6 +1313,11 @@ multiclass ScratchFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node,
def : ScratchStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
let AddedComplexity = 26;
}
+
+ def : ScratchStoreSVaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> {
+ let SubtargetPredicate = HasFlatScratchSVSMode;
+ let AddedComplexity = 27;
+ }
}
multiclass ScratchFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
@@ -1135,10 +1328,19 @@ multiclass ScratchFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, Val
def : ScratchLoadSaddrPat_D16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
let AddedComplexity = 26;
}
+
+ def : ScratchLoadSVaddrPat_D16 <!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> {
+ let SubtargetPredicate = HasFlatScratchSVSMode;
+ let AddedComplexity = 27;
+ }
}
let OtherPredicates = [HasFlatGlobalInsts] in {
+defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_8_global, i32>;
+defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_8_global, i16>;
+defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_16_global, i32>;
+defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_16_global, i16>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i32>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i32>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i32>;
@@ -1179,10 +1381,12 @@ defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, truncstorei16_global, i32>;
defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, store_global, i16>;
defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX3, store_global, v3i32>;
-let OtherPredicates = [D16PreservesUnusedBits] in {
+let OtherPredicates = [HasD16LoadStore] in {
defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT_D16_HI, truncstorei16_hi16_global, i32>;
defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE_D16_HI, truncstorei8_hi16_global, i32>;
+}
+let OtherPredicates = [D16PreservesUnusedBits] in {
defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2i16>;
defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2f16>;
defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2i16>;
@@ -1198,59 +1402,84 @@ defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2i16>
defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2f16>;
}
-defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_DWORD, atomic_store_global_32, i32>;
-defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_DWORDX2, atomic_store_global_64, i64>;
-
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD", atomic_load_add_global_32, i32>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB", atomic_load_sub_global_32, i32>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC", atomic_inc_global_32, i32>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC", atomic_dec_global_32, i32>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND", atomic_load_and_global_32, i32>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX", atomic_load_max_global_32, i32>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX", atomic_load_umax_global_32, i32>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMIN", atomic_load_min_global_32, i32>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMIN", atomic_load_umin_global_32, i32>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_OR", atomic_load_or_global_32, i32>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP", atomic_swap_global_32, i32>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP", AMDGPUatomic_cmp_swap_global_32, i32, v2i32>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR", atomic_load_xor_global_32, i32>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CSUB", int_amdgcn_global_atomic_csub, i32>;
-
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_X2", atomic_load_add_global_64, i64>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB_X2", atomic_load_sub_global_64, i64>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC_X2", atomic_inc_global_64, i64>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC_X2", atomic_dec_global_64, i64>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND_X2", atomic_load_and_global_64, i64>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX_X2", atomic_load_max_global_64, i64>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX_X2", atomic_load_umax_global_64, i64>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMIN_X2", atomic_load_min_global_64, i64>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMIN_X2", atomic_load_umin_global_64, i64>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_OR_X2", atomic_load_or_global_64, i64>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP_X2", atomic_swap_global_64, i64>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP_X2", AMDGPUatomic_cmp_swap_global_64, i64, v2i64>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR_X2", atomic_load_xor_global_64, i64>;
+defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_BYTE, atomic_store_8_global, i32>;
+defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_BYTE, atomic_store_8_global, i16>;
+defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_SHORT, atomic_store_16_global, i32>;
+defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_SHORT, atomic_store_16_global, i16>;
+defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_DWORD, atomic_store_32_global, i32>;
+defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_DWORDX2, atomic_store_64_global, i64>;
+
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD", "atomic_load_add_global", i32>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB", "atomic_load_sub_global", i32>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC", "atomic_inc_global", i32>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC", "atomic_dec_global", i32>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND", "atomic_load_and_global", i32>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX", "atomic_load_max_global", i32>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX", "atomic_load_umax_global", i32>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMIN", "atomic_load_min_global", i32>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMIN", "atomic_load_umin_global", i32>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_OR", "atomic_load_or_global", i32>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP", "atomic_swap_global", i32>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP", "AMDGPUatomic_cmp_swap_global", i32, v2i32>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR", "atomic_load_xor_global", i32>;
+defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_CSUB", int_amdgcn_global_atomic_csub, i32>;
+
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_X2", "atomic_load_add_global", i64>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB_X2", "atomic_load_sub_global", i64>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC_X2", "atomic_inc_global", i64>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC_X2", "atomic_dec_global", i64>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND_X2", "atomic_load_and_global", i64>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX_X2", "atomic_load_max_global", i64>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX_X2", "atomic_load_umax_global", i64>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMIN_X2", "atomic_load_min_global", i64>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMIN_X2", "atomic_load_umin_global", i64>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_OR_X2", "atomic_load_or_global", i64>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP_X2", "atomic_swap_global", i64>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP_X2", "AMDGPUatomic_cmp_swap_global", i64, v2i64>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR_X2", "atomic_load_xor_global", i64>;
let OtherPredicates = [isGFX10Plus] in {
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN", atomic_load_fmin_global_32, f32>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX", atomic_load_fmax_global_32, f32>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN_X2", atomic_load_fmin_global_64, f64>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX_X2", atomic_load_fmax_global_64, f64>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN", "atomic_load_fmin_global", f32>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX", "atomic_load_fmax_global", f32>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN_X2", "atomic_load_fmin_global", f64>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX_X2", "atomic_load_fmax_global", f64>;
+defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN", "int_amdgcn_global_atomic_fmin", f32>;
+defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX", "int_amdgcn_global_atomic_fmax", f32>;
+defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN_X2", "int_amdgcn_global_atomic_fmin", f64>;
+defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX_X2", "int_amdgcn_global_atomic_fmax", f64>;
}
-let OtherPredicates = [HasAtomicFaddInsts] in {
+let OtherPredicates = [HasAtomicFaddNoRtnInsts] in
defm : GlobalFLATNoRtnAtomicPats <GLOBAL_ATOMIC_ADD_F32, atomic_load_fadd_global_noret_32, f32>;
+let OtherPredicates = [HasAtomicPkFaddNoRtnInsts] in
defm : GlobalFLATNoRtnAtomicPats <GLOBAL_ATOMIC_PK_ADD_F16, atomic_load_fadd_v2f16_global_noret_32, v2f16>;
-}
let OtherPredicates = [isGFX90APlus] in {
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_F32", atomic_load_fadd_global_32, f32>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_PK_ADD_F16", atomic_load_fadd_v2f16_global_32, v2f16>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_F64", atomic_load_fadd_global_64, f64>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MIN_F64", atomic_load_fmin_global_64, f64>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MAX_F64", atomic_load_fmax_global_64, f64>;
-def : FlatSignedAtomicPat <FLAT_ATOMIC_ADD_F64_RTN, atomic_load_fadd_flat_64, f64>;
-def : FlatSignedAtomicPat <FLAT_ATOMIC_MIN_F64_RTN, atomic_load_fmin_flat_64, f64>;
-def : FlatSignedAtomicPat <FLAT_ATOMIC_MAX_F64_RTN, atomic_load_fmax_flat_64, f64>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_F32", "atomic_load_fadd_global", f32>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_PK_ADD_F16", "atomic_load_fadd_v2f16_global", v2f16>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_F64", "atomic_load_fadd_global", f64>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MIN_F64", "atomic_load_fmin_global", f64>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MAX_F64", "atomic_load_fmax_global", f64>;
+defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_global_atomic_fadd", f32>;
+defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_global_atomic_fadd", f64>;
+defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_global_atomic_fadd", v2f16>;
+defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MIN_F64", "int_amdgcn_global_atomic_fmin", f64>;
+defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MAX_F64", "int_amdgcn_global_atomic_fmax", f64>;
+defm : FlatSignedAtomicPat <"FLAT_ATOMIC_ADD_F64", "atomic_load_fadd_flat", f64>;
+defm : FlatSignedAtomicPat <"FLAT_ATOMIC_MIN_F64", "atomic_load_fmin_flat", f64>;
+defm : FlatSignedAtomicPat <"FLAT_ATOMIC_MAX_F64", "atomic_load_fmax_flat", f64>;
+defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_ADD_F64", "int_amdgcn_flat_atomic_fadd", f64>;
+defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_MIN_F64", "int_amdgcn_flat_atomic_fmin", f64>;
+defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_MAX_F64", "int_amdgcn_flat_atomic_fmax", f64>;
+}
+
+let OtherPredicates = [isGFX940Plus] in {
+defm : FlatSignedAtomicPat <"FLAT_ATOMIC_ADD_F32", "atomic_load_fadd_flat", f32>;
+defm : FlatSignedAtomicPat <"FLAT_ATOMIC_PK_ADD_F16", "atomic_load_fadd_v2f16_flat", v2f16>;
+defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", f32>;
+defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", v2f16>;
+defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_PK_ADD_BF16", "int_amdgcn_flat_atomic_fadd_v2bf16", v2i16>;
+defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_PK_ADD_BF16", "int_amdgcn_global_atomic_fadd_v2bf16", v2i16>;
}
} // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10
@@ -1291,10 +1520,12 @@ defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT, truncstorei16_private, i32>;
defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT, store_private, i16>;
defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX3, store_private, v3i32>;
-let OtherPredicates = [D16PreservesUnusedBits, HasFlatScratchInsts, EnableFlatScratch] in {
+let OtherPredicates = [HasD16LoadStore, HasFlatScratchInsts, EnableFlatScratch] in {
defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT_D16_HI, truncstorei16_hi16_private, i32>;
defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE_D16_HI, truncstorei8_hi16_private, i32>;
+}
+let OtherPredicates = [D16PreservesUnusedBits, HasFlatScratchInsts, EnableFlatScratch] in {
defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_private, v2i16>;
defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_private, v2f16>;
defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_private, v2i16>;
@@ -1405,6 +1636,57 @@ multiclass FLAT_Real_AllAddr_vi<bits<7> op,
def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>;
}
+class FLAT_Real_gfx940 <bits<7> op, FLAT_Pseudo ps> :
+ FLAT_Real <op, ps>,
+ SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX940> {
+ let AssemblerPredicate = isGFX940Plus;
+ let DecoderNamespace = "GFX9";
+ let Inst{13} = ps.sve;
+ let Inst{25} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccbValue);
+}
+
+multiclass FLAT_Real_AllAddr_SVE_vi<bits<7> op> {
+ def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME)> {
+ let AssemblerPredicate = isGFX8GFX9NotGFX940;
+ let OtherPredicates = [isGFX8GFX9NotGFX940];
+ }
+ def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")> {
+ let DecoderNamespace = "GFX9";
+ }
+ let AssemblerPredicate = isGFX940Plus, SubtargetPredicate = isGFX940Plus in {
+ def _VE_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>;
+ def _SVS_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>;
+ def _ST_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_ST")>;
+ }
+}
+
+multiclass FLAT_Real_AllAddr_LDS<bits<7> op, bits<7> pre_gfx940_op,
+ string pre_gfx940_name = !subst("_lds", "", !cast<FLAT_Pseudo>(NAME).PseudoInstr),
+ bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
+
+ let OtherPredicates = [isGFX8GFX9NotGFX940] in {
+ def _vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME), has_sccb> {
+ let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds";
+ }
+ def _SADDR_vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb> {
+ let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds";
+ }
+ }
+
+ let SubtargetPredicate = isGFX940Plus in {
+ def _gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>;
+ def _SADDR_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
+ }
+}
+
+multiclass FLAT_Real_AllAddr_SVE_LDS<bits<7> op, bits<7> pre_gfx940_op> {
+ defm "" : FLAT_Real_AllAddr_LDS<op, pre_gfx940_op>;
+ let SubtargetPredicate = isGFX940Plus in {
+ def _SVS_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>;
+ def _ST_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_ST")>;
+ }
+}
+
def FLAT_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>;
def FLAT_LOAD_SBYTE_vi : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>;
def FLAT_LOAD_USHORT_vi : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>;
@@ -1496,6 +1778,11 @@ defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>;
defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>;
defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>;
+defm GLOBAL_LOAD_LDS_UBYTE : FLAT_Real_AllAddr_LDS <0x026, 0x10>;
+defm GLOBAL_LOAD_LDS_SBYTE : FLAT_Real_AllAddr_LDS <0x027, 0x11>;
+defm GLOBAL_LOAD_LDS_USHORT : FLAT_Real_AllAddr_LDS <0x028, 0x12>;
+defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_LDS <0x029, 0x13>;
+defm GLOBAL_LOAD_LDS_DWORD : FLAT_Real_AllAddr_LDS <0x02a, 0x14>;
defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Real_Atomics_vi <0x40>;
defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Real_Atomics_vi <0x41>;
@@ -1524,32 +1811,39 @@ defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Real_Atomics_vi <0x6a>;
defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Real_Atomics_vi <0x6b>;
defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Real_Atomics_vi <0x6c>;
-defm SCRATCH_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>;
-defm SCRATCH_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>;
-defm SCRATCH_LOAD_USHORT : FLAT_Real_AllAddr_vi <0x12>;
-defm SCRATCH_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>;
-defm SCRATCH_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>;
-defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>;
-defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>;
-defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>;
-defm SCRATCH_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>;
-defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_vi <0x19>;
-defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_vi <0x20>;
-defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x21>;
-defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_vi <0x22>;
-defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x23>;
-defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_AllAddr_vi <0x24>;
-defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x25>;
-defm SCRATCH_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>;
-defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>;
-defm SCRATCH_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>;
-defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>;
-defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>;
-defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>;
-
-let SubtargetPredicate = HasAtomicFaddInsts in {
-defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_vi <0x04d, 0>;
-defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_vi <0x04e, 0>;
+defm SCRATCH_LOAD_LDS_UBYTE : FLAT_Real_AllAddr_SVE_LDS <0x026, 0x10>;
+defm SCRATCH_LOAD_LDS_SBYTE : FLAT_Real_AllAddr_SVE_LDS <0x027, 0x11>;
+defm SCRATCH_LOAD_LDS_USHORT : FLAT_Real_AllAddr_SVE_LDS <0x028, 0x12>;
+defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_SVE_LDS <0x029, 0x13>;
+defm SCRATCH_LOAD_LDS_DWORD : FLAT_Real_AllAddr_SVE_LDS <0x02a, 0x14>;
+
+defm SCRATCH_LOAD_UBYTE : FLAT_Real_AllAddr_SVE_vi <0x10>;
+defm SCRATCH_LOAD_SBYTE : FLAT_Real_AllAddr_SVE_vi <0x11>;
+defm SCRATCH_LOAD_USHORT : FLAT_Real_AllAddr_SVE_vi <0x12>;
+defm SCRATCH_LOAD_SSHORT : FLAT_Real_AllAddr_SVE_vi <0x13>;
+defm SCRATCH_LOAD_DWORD : FLAT_Real_AllAddr_SVE_vi <0x14>;
+defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_AllAddr_SVE_vi <0x15>;
+defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_AllAddr_SVE_vi <0x16>;
+defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_AllAddr_SVE_vi <0x17>;
+defm SCRATCH_STORE_BYTE : FLAT_Real_AllAddr_SVE_vi <0x18>;
+defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x19>;
+defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_SVE_vi <0x20>;
+defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x21>;
+defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_SVE_vi <0x22>;
+defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x23>;
+defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_AllAddr_SVE_vi <0x24>;
+defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x25>;
+defm SCRATCH_STORE_SHORT : FLAT_Real_AllAddr_SVE_vi <0x1a>;
+defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x1b>;
+defm SCRATCH_STORE_DWORD : FLAT_Real_AllAddr_SVE_vi <0x1c>;
+defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_SVE_vi <0x1d>;
+defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_SVE_vi <0x1e>;
+defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_SVE_vi <0x1f>;
+
+let SubtargetPredicate = isGFX8GFX9NotGFX940 in {
+ // These instructions are encoded differently on gfx90* and gfx940.
+ defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_vi <0x04d, 0>;
+ defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_vi <0x04e, 0>;
}
let SubtargetPredicate = isGFX90AOnly in {
@@ -1561,13 +1855,46 @@ let SubtargetPredicate = isGFX90AOnly in {
defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_vi<0x51, 0>;
} // End SubtargetPredicate = isGFX90AOnly
+multiclass FLAT_Real_AllAddr_gfx940<bits<7> op> {
+ def _gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>;
+ def _SADDR_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
+}
+
+multiclass FLAT_Real_Atomics_gfx940 <bits<7> op, FLAT_Pseudo ps> {
+ def _gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
+ def _RTN_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
+}
+
+multiclass FLAT_Global_Real_Atomics_gfx940<bits<7> op> :
+ FLAT_Real_AllAddr_gfx940<op> {
+ def _RTN_gfx940 : FLAT_Real_gfx940 <op, !cast<FLAT_Pseudo>(NAME#"_RTN")>;
+ def _SADDR_RTN_gfx940 : FLAT_Real_gfx940 <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>;
+}
+
+let SubtargetPredicate = isGFX940Plus in {
+ // These instructions are encoded differently on gfx90* and gfx940.
+ defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_gfx940 <0x04d>;
+ defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_gfx940 <0x04e>;
+
+ defm FLAT_ATOMIC_ADD_F64 : FLAT_Real_Atomics_gfx940<0x4f, FLAT_ATOMIC_ADD_F64>;
+ defm FLAT_ATOMIC_MIN_F64 : FLAT_Real_Atomics_gfx940<0x50, FLAT_ATOMIC_MIN_F64>;
+ defm FLAT_ATOMIC_MAX_F64 : FLAT_Real_Atomics_gfx940<0x51, FLAT_ATOMIC_MAX_F64>;
+ defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_gfx940<0x4f>;
+ defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_gfx940<0x50>;
+ defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_gfx940<0x51>;
+ defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_vi<0x4d, FLAT_ATOMIC_ADD_F32>;
+ defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Real_Atomics_vi<0x4e, FLAT_ATOMIC_PK_ADD_F16>;
+ defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Real_Atomics_vi<0x52, FLAT_ATOMIC_PK_ADD_BF16>;
+ defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi<0x52>;
+} // End SubtargetPredicate = isGFX940Plus
+
//===----------------------------------------------------------------------===//
// GFX10.
//===----------------------------------------------------------------------===//
class FLAT_Real_gfx10<bits<7> op, FLAT_Pseudo ps> :
FLAT_Real<op, ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10> {
- let AssemblerPredicate = isGFX10Plus;
+ let AssemblerPredicate = isGFX10Only;
let DecoderNamespace = "GFX10";
let Inst{11-0} = offset{11-0};
@@ -1627,6 +1954,23 @@ multiclass FLAT_Real_ScratchAllAddr_gfx10<bits<7> op> :
FLAT_Real_SADDR_gfx10<op>,
FLAT_Real_ST_gfx10<op>;
+multiclass FLAT_Real_AllAddr_LDS_gfx10<bits<7> op,
+ string opname = !subst("_lds", "", !cast<FLAT_Pseudo>(NAME).PseudoInstr)> {
+ let AsmString = opname # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds" in
+ defm "" : FLAT_Real_Base_gfx10<op>;
+
+ let AsmString = opname # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds" in
+ defm "" : FLAT_Real_SADDR_gfx10<op>;
+}
+
+multiclass FLAT_Real_ScratchAllAddr_LDS_gfx10<bits<7> op,
+ string opname = !subst("_lds", "", !cast<FLAT_Pseudo>(NAME).PseudoInstr)> {
+ defm "" : FLAT_Real_AllAddr_LDS_gfx10<op>;
+
+ let AsmString = opname # !cast<FLAT_Pseudo>(NAME#"_ST").AsmOperands # " lds" in
+ defm "" : FLAT_Real_ST_gfx10<op>;
+}
+
// ENC_FLAT.
defm FLAT_LOAD_UBYTE : FLAT_Real_Base_gfx10<0x008>;
defm FLAT_LOAD_SBYTE : FLAT_Real_Base_gfx10<0x009>;
@@ -1743,6 +2087,12 @@ defm GLOBAL_ATOMIC_FMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x060>;
defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Real_AllAddr_gfx10<0x016>;
defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Real_AllAddr_gfx10<0x017>;
+defm GLOBAL_LOAD_LDS_UBYTE : FLAT_Real_AllAddr_LDS_gfx10 <0x008>;
+defm GLOBAL_LOAD_LDS_SBYTE : FLAT_Real_AllAddr_LDS_gfx10 <0x009>;
+defm GLOBAL_LOAD_LDS_USHORT : FLAT_Real_AllAddr_LDS_gfx10 <0x00a>;
+defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_LDS_gfx10 <0x00b>;
+defm GLOBAL_LOAD_LDS_DWORD : FLAT_Real_AllAddr_LDS_gfx10 <0x00c>;
+
// ENC_FLAT_SCRATCH.
defm SCRATCH_LOAD_UBYTE : FLAT_Real_ScratchAllAddr_gfx10<0x008>;
defm SCRATCH_LOAD_SBYTE : FLAT_Real_ScratchAllAddr_gfx10<0x009>;
@@ -1766,3 +2116,219 @@ defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_ScratchAllAddr_gfx10<0x022>;
defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x023>;
defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_ScratchAllAddr_gfx10<0x024>;
defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x025>;
+
+defm SCRATCH_LOAD_LDS_UBYTE : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x008>;
+defm SCRATCH_LOAD_LDS_SBYTE : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x009>;
+defm SCRATCH_LOAD_LDS_USHORT : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00a>;
+defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00b>;
+defm SCRATCH_LOAD_LDS_DWORD : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00c>;
+
+//===----------------------------------------------------------------------===//
+// GFX11
+//===----------------------------------------------------------------------===//
+
+class FLAT_Real_gfx11 <bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> :
+ FLAT_Real <op, ps, opName>,
+ SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX11> {
+ let AssemblerPredicate = isGFX11Plus;
+ let DecoderNamespace = "GFX11";
+
+ let Inst{13} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlcValue);
+ let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glcValue);
+ let Inst{15} = cpol{CPolBit.SLC};
+ let Inst{17-16} = seg;
+ let Inst{55} = ps.sve;
+}
+
+multiclass FLAT_Real_Base_gfx11<bits<7> op, string ps, string opName, int renamed = false> {
+ def _gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps), opName> {
+ let Inst{54-48} = !cast<int>(SGPR_NULL_gfx11plus.HWEncoding);
+ }
+ if renamed then
+ def _renamed_gfx11 : MnemonicAlias<!cast<FLAT_Pseudo>(ps).Mnemonic, opName>, Requires<[isGFX11Plus]>;
+}
+
+multiclass FLAT_Real_RTN_gfx11<bits<7> op, string ps, string opName> {
+ def _RTN_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_RTN"), opName> {
+ let Inst{54-48} = !cast<int>(SGPR_NULL_gfx11plus.HWEncoding);
+ }
+}
+
+multiclass FLAT_Real_SADDR_gfx11<bits<7> op, string ps, string opName> {
+ def _SADDR_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_SADDR"), opName>;
+}
+
+multiclass FLAT_Real_SADDR_RTN_gfx11<bits<7> op, string ps, string opName> {
+ def _SADDR_RTN_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_SADDR_RTN"), opName>;
+}
+
+multiclass FLAT_Real_ST_gfx11<bits<7> op, string ps, string opName> {
+ def _ST_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_ST"), opName> {
+ let Inst{54-48} = !cast<int>(SGPR_NULL_gfx11plus.HWEncoding);
+ let OtherPredicates = [HasFlatScratchSTMode];
+ }
+}
+
+multiclass FLAT_Real_SVS_gfx11<bits<7> op, string ps, string opName> {
+ def _SVS_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_SVS"), opName> {
+ let OtherPredicates = [HasFlatScratchSVSMode];
+ }
+}
+
+multiclass FLAT_Real_AllAddr_gfx11<bits<7> op, string ps, string opName, int renamed = false> :
+ FLAT_Real_Base_gfx11<op, ps, opName, renamed>,
+ FLAT_Real_SADDR_gfx11<op, ps, opName>;
+
+multiclass FLAT_Real_Atomics_gfx11<bits<7> op, string ps, string opName, int renamed = false> :
+ FLAT_Real_Base_gfx11<op, ps, opName, renamed>,
+ FLAT_Real_RTN_gfx11<op, ps, opName>;
+
+multiclass FLAT_Real_GlblAtomics_gfx11<bits<7> op, string ps, string opName, int renamed = false> :
+ FLAT_Real_AllAddr_gfx11<op, ps, opName, renamed>,
+ FLAT_Real_RTN_gfx11<op, ps, opName>,
+ FLAT_Real_SADDR_RTN_gfx11<op, ps, opName>;
+
+multiclass FLAT_Real_GlblAtomics_RTN_gfx11<bits<7> op, string ps, string opName> :
+ FLAT_Real_RTN_gfx11<op, ps, opName>,
+ FLAT_Real_SADDR_RTN_gfx11<op, ps, opName>;
+
+multiclass FLAT_Real_ScratchAllAddr_gfx11<bits<7> op, string ps, string opName, int renamed = false> :
+ FLAT_Real_Base_gfx11<op, ps, opName, renamed>,
+ FLAT_Real_SADDR_gfx11<op, ps, opName>,
+ FLAT_Real_ST_gfx11<op, ps, opName>,
+ FLAT_Real_SVS_gfx11<op, ps, opName>;
+
+// ENC_FLAT.
+defm FLAT_LOAD_U8 : FLAT_Real_Base_gfx11<0x010, "FLAT_LOAD_UBYTE", "flat_load_u8", true>;
+defm FLAT_LOAD_I8 : FLAT_Real_Base_gfx11<0x011, "FLAT_LOAD_SBYTE", "flat_load_i8", true>;
+defm FLAT_LOAD_U16 : FLAT_Real_Base_gfx11<0x012, "FLAT_LOAD_USHORT", "flat_load_u16", true>;
+defm FLAT_LOAD_I16 : FLAT_Real_Base_gfx11<0x013, "FLAT_LOAD_SSHORT", "flat_load_i16", true>;
+defm FLAT_LOAD_B32 : FLAT_Real_Base_gfx11<0x014, "FLAT_LOAD_DWORD", "flat_load_b32", true>;
+defm FLAT_LOAD_B64 : FLAT_Real_Base_gfx11<0x015, "FLAT_LOAD_DWORDX2", "flat_load_b64", true>;
+defm FLAT_LOAD_B96 : FLAT_Real_Base_gfx11<0x016, "FLAT_LOAD_DWORDX3", "flat_load_b96", true>;
+defm FLAT_LOAD_B128 : FLAT_Real_Base_gfx11<0x017, "FLAT_LOAD_DWORDX4", "flat_load_b128", true>;
+defm FLAT_STORE_B8 : FLAT_Real_Base_gfx11<0x018, "FLAT_STORE_BYTE", "flat_store_b8", true>;
+defm FLAT_STORE_B16 : FLAT_Real_Base_gfx11<0x019, "FLAT_STORE_SHORT", "flat_store_b16", true>;
+defm FLAT_STORE_B32 : FLAT_Real_Base_gfx11<0x01a, "FLAT_STORE_DWORD", "flat_store_b32", true>;
+defm FLAT_STORE_B64 : FLAT_Real_Base_gfx11<0x01b, "FLAT_STORE_DWORDX2", "flat_store_b64", true>;
+defm FLAT_STORE_B96 : FLAT_Real_Base_gfx11<0x01c, "FLAT_STORE_DWORDX3", "flat_store_b96", true>;
+defm FLAT_STORE_B128 : FLAT_Real_Base_gfx11<0x01d, "FLAT_STORE_DWORDX4", "flat_store_b128", true>;
+defm FLAT_LOAD_D16_U8 : FLAT_Real_Base_gfx11<0x01e, "FLAT_LOAD_UBYTE_D16", "flat_load_d16_u8">;
+defm FLAT_LOAD_D16_I8 : FLAT_Real_Base_gfx11<0x01f, "FLAT_LOAD_SBYTE_D16", "flat_load_d16_i8">;
+defm FLAT_LOAD_D16_B16 : FLAT_Real_Base_gfx11<0x020, "FLAT_LOAD_SHORT_D16", "flat_load_d16_b16">;
+defm FLAT_LOAD_D16_HI_U8 : FLAT_Real_Base_gfx11<0x021, "FLAT_LOAD_UBYTE_D16_HI", "flat_load_d16_hi_u8">;
+defm FLAT_LOAD_D16_HI_I8 : FLAT_Real_Base_gfx11<0x022, "FLAT_LOAD_SBYTE_D16_HI", "flat_load_d16_hi_i8">;
+defm FLAT_LOAD_D16_HI_B16 : FLAT_Real_Base_gfx11<0x023, "FLAT_LOAD_SHORT_D16_HI", "flat_load_d16_hi_b16">;
+defm FLAT_STORE_D16_HI_B8 : FLAT_Real_Base_gfx11<0x024, "FLAT_STORE_BYTE_D16_HI", "flat_store_d16_hi_b8">;
+defm FLAT_STORE_D16_HI_B16 : FLAT_Real_Base_gfx11<0x025, "FLAT_STORE_SHORT_D16_HI", "flat_store_d16_hi_b16">;
+defm FLAT_ATOMIC_SWAP_B32 : FLAT_Real_Atomics_gfx11<0x033, "FLAT_ATOMIC_SWAP", "flat_atomic_swap_b32", true>;
+defm FLAT_ATOMIC_CMPSWAP_B32 : FLAT_Real_Atomics_gfx11<0x034, "FLAT_ATOMIC_CMPSWAP", "flat_atomic_cmpswap_b32", true>;
+defm FLAT_ATOMIC_ADD_U32 : FLAT_Real_Atomics_gfx11<0x035, "FLAT_ATOMIC_ADD", "flat_atomic_add_u32", true>;
+defm FLAT_ATOMIC_SUB_U32 : FLAT_Real_Atomics_gfx11<0x036, "FLAT_ATOMIC_SUB", "flat_atomic_sub_u32", true>;
+defm FLAT_ATOMIC_MIN_I32 : FLAT_Real_Atomics_gfx11<0x038, "FLAT_ATOMIC_SMIN", "flat_atomic_min_i32", true>;
+defm FLAT_ATOMIC_MIN_U32 : FLAT_Real_Atomics_gfx11<0x039, "FLAT_ATOMIC_UMIN", "flat_atomic_min_u32", true>;
+defm FLAT_ATOMIC_MAX_I32 : FLAT_Real_Atomics_gfx11<0x03a, "FLAT_ATOMIC_SMAX", "flat_atomic_max_i32", true>;
+defm FLAT_ATOMIC_MAX_U32 : FLAT_Real_Atomics_gfx11<0x03b, "FLAT_ATOMIC_UMAX", "flat_atomic_max_u32", true>;
+defm FLAT_ATOMIC_AND_B32 : FLAT_Real_Atomics_gfx11<0x03c, "FLAT_ATOMIC_AND", "flat_atomic_and_b32", true>;
+defm FLAT_ATOMIC_OR_B32 : FLAT_Real_Atomics_gfx11<0x03d, "FLAT_ATOMIC_OR", "flat_atomic_or_b32", true>;
+defm FLAT_ATOMIC_XOR_B32 : FLAT_Real_Atomics_gfx11<0x03e, "FLAT_ATOMIC_XOR", "flat_atomic_xor_b32", true>;
+defm FLAT_ATOMIC_INC_U32 : FLAT_Real_Atomics_gfx11<0x03f, "FLAT_ATOMIC_INC", "flat_atomic_inc_u32", true>;
+defm FLAT_ATOMIC_DEC_U32 : FLAT_Real_Atomics_gfx11<0x040, "FLAT_ATOMIC_DEC", "flat_atomic_dec_u32", true>;
+defm FLAT_ATOMIC_SWAP_B64 : FLAT_Real_Atomics_gfx11<0x041, "FLAT_ATOMIC_SWAP_X2", "flat_atomic_swap_b64", true>;
+defm FLAT_ATOMIC_CMPSWAP_B64 : FLAT_Real_Atomics_gfx11<0x042, "FLAT_ATOMIC_CMPSWAP_X2", "flat_atomic_cmpswap_b64", true>;
+defm FLAT_ATOMIC_ADD_U64 : FLAT_Real_Atomics_gfx11<0x043, "FLAT_ATOMIC_ADD_X2", "flat_atomic_add_u64", true>;
+defm FLAT_ATOMIC_SUB_U64 : FLAT_Real_Atomics_gfx11<0x044, "FLAT_ATOMIC_SUB_X2", "flat_atomic_sub_u64", true>;
+defm FLAT_ATOMIC_MIN_I64 : FLAT_Real_Atomics_gfx11<0x045, "FLAT_ATOMIC_SMIN_X2", "flat_atomic_min_i64", true>;
+defm FLAT_ATOMIC_MIN_U64 : FLAT_Real_Atomics_gfx11<0x046, "FLAT_ATOMIC_UMIN_X2", "flat_atomic_min_u64", true>;
+defm FLAT_ATOMIC_MAX_I64 : FLAT_Real_Atomics_gfx11<0x047, "FLAT_ATOMIC_SMAX_X2", "flat_atomic_max_i64", true>;
+defm FLAT_ATOMIC_MAX_U64 : FLAT_Real_Atomics_gfx11<0x048, "FLAT_ATOMIC_UMAX_X2", "flat_atomic_max_u64", true>;
+defm FLAT_ATOMIC_AND_B64 : FLAT_Real_Atomics_gfx11<0x049, "FLAT_ATOMIC_AND_X2", "flat_atomic_and_b64", true>;
+defm FLAT_ATOMIC_OR_B64 : FLAT_Real_Atomics_gfx11<0x04a, "FLAT_ATOMIC_OR_X2", "flat_atomic_or_b64", true>;
+defm FLAT_ATOMIC_XOR_B64 : FLAT_Real_Atomics_gfx11<0x04b, "FLAT_ATOMIC_XOR_X2", "flat_atomic_xor_b64", true>;
+defm FLAT_ATOMIC_INC_U64 : FLAT_Real_Atomics_gfx11<0x04c, "FLAT_ATOMIC_INC_X2", "flat_atomic_inc_u64", true>;
+defm FLAT_ATOMIC_DEC_U64 : FLAT_Real_Atomics_gfx11<0x04d, "FLAT_ATOMIC_DEC_X2", "flat_atomic_dec_u64", true>;
+defm FLAT_ATOMIC_CMPSWAP_F32 : FLAT_Real_Atomics_gfx11<0x050, "FLAT_ATOMIC_FCMPSWAP", "flat_atomic_cmpswap_f32">;
+defm FLAT_ATOMIC_MIN_F32 : FLAT_Real_Atomics_gfx11<0x051, "FLAT_ATOMIC_FMIN", "flat_atomic_min_f32">;
+defm FLAT_ATOMIC_MAX_F32 : FLAT_Real_Atomics_gfx11<0x052, "FLAT_ATOMIC_FMAX", "flat_atomic_max_f32">;
+defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_gfx11<0x056, "FLAT_ATOMIC_ADD_F32", "flat_atomic_add_f32">;
+
+// ENC_FLAT_GLBL.
+defm GLOBAL_LOAD_U8 : FLAT_Real_AllAddr_gfx11<0x010, "GLOBAL_LOAD_UBYTE", "global_load_u8", true>;
+defm GLOBAL_LOAD_I8 : FLAT_Real_AllAddr_gfx11<0x011, "GLOBAL_LOAD_SBYTE", "global_load_i8", true>;
+defm GLOBAL_LOAD_U16 : FLAT_Real_AllAddr_gfx11<0x012, "GLOBAL_LOAD_USHORT", "global_load_u16", true>;
+defm GLOBAL_LOAD_I16 : FLAT_Real_AllAddr_gfx11<0x013, "GLOBAL_LOAD_SSHORT", "global_load_i16", true>;
+defm GLOBAL_LOAD_B32 : FLAT_Real_AllAddr_gfx11<0x014, "GLOBAL_LOAD_DWORD", "global_load_b32", true>;
+defm GLOBAL_LOAD_B64 : FLAT_Real_AllAddr_gfx11<0x015, "GLOBAL_LOAD_DWORDX2", "global_load_b64", true>;
+defm GLOBAL_LOAD_B96 : FLAT_Real_AllAddr_gfx11<0x016, "GLOBAL_LOAD_DWORDX3", "global_load_b96", true>;
+defm GLOBAL_LOAD_B128 : FLAT_Real_AllAddr_gfx11<0x017, "GLOBAL_LOAD_DWORDX4", "global_load_b128", true>;
+defm GLOBAL_STORE_B8 : FLAT_Real_AllAddr_gfx11<0x018, "GLOBAL_STORE_BYTE", "global_store_b8", true>;
+defm GLOBAL_STORE_B16 : FLAT_Real_AllAddr_gfx11<0x019, "GLOBAL_STORE_SHORT", "global_store_b16", true>;
+defm GLOBAL_STORE_B32 : FLAT_Real_AllAddr_gfx11<0x01a, "GLOBAL_STORE_DWORD", "global_store_b32", true>;
+defm GLOBAL_STORE_B64 : FLAT_Real_AllAddr_gfx11<0x01b, "GLOBAL_STORE_DWORDX2", "global_store_b64", true>;
+defm GLOBAL_STORE_B96 : FLAT_Real_AllAddr_gfx11<0x01c, "GLOBAL_STORE_DWORDX3", "global_store_b96", true>;
+defm GLOBAL_STORE_B128 : FLAT_Real_AllAddr_gfx11<0x01d, "GLOBAL_STORE_DWORDX4", "global_store_b128", true>;
+defm GLOBAL_LOAD_D16_U8 : FLAT_Real_AllAddr_gfx11<0x01e, "GLOBAL_LOAD_UBYTE_D16", "global_load_d16_u8">;
+defm GLOBAL_LOAD_D16_I8 : FLAT_Real_AllAddr_gfx11<0x01f, "GLOBAL_LOAD_SBYTE_D16", "global_load_d16_i8">;
+defm GLOBAL_LOAD_D16_B16 : FLAT_Real_AllAddr_gfx11<0x020, "GLOBAL_LOAD_SHORT_D16", "global_load_d16_b16">;
+defm GLOBAL_LOAD_D16_HI_U8 : FLAT_Real_AllAddr_gfx11<0x021, "GLOBAL_LOAD_UBYTE_D16_HI", "global_load_d16_hi_u8">;
+defm GLOBAL_LOAD_D16_HI_I8 : FLAT_Real_AllAddr_gfx11<0x022, "GLOBAL_LOAD_SBYTE_D16_HI", "global_load_d16_hi_i8">;
+defm GLOBAL_LOAD_D16_HI_B16 : FLAT_Real_AllAddr_gfx11<0x023, "GLOBAL_LOAD_SHORT_D16_HI", "global_load_d16_hi_b16">;
+defm GLOBAL_STORE_D16_HI_B8 : FLAT_Real_AllAddr_gfx11<0x024, "GLOBAL_STORE_BYTE_D16_HI", "global_store_d16_hi_b8">;
+defm GLOBAL_STORE_D16_HI_B16 : FLAT_Real_AllAddr_gfx11<0x025, "GLOBAL_STORE_SHORT_D16_HI", "global_store_d16_hi_b16">;
+defm GLOBAL_LOAD_ADDTID_B32 : FLAT_Real_AllAddr_gfx11<0x028, "GLOBAL_LOAD_DWORD_ADDTID", "global_load_addtid_b32">;
+defm GLOBAL_STORE_ADDTID_B32 : FLAT_Real_AllAddr_gfx11<0x029, "GLOBAL_STORE_DWORD_ADDTID", "global_store_addtid_b32">;
+defm GLOBAL_ATOMIC_SWAP_B32 : FLAT_Real_GlblAtomics_gfx11<0x033, "GLOBAL_ATOMIC_SWAP", "global_atomic_swap_b32", true>;
+defm GLOBAL_ATOMIC_CMPSWAP_B32 : FLAT_Real_GlblAtomics_gfx11<0x034, "GLOBAL_ATOMIC_CMPSWAP", "global_atomic_cmpswap_b32", true>;
+defm GLOBAL_ATOMIC_ADD_U32 : FLAT_Real_GlblAtomics_gfx11<0x035, "GLOBAL_ATOMIC_ADD", "global_atomic_add_u32", true>;
+defm GLOBAL_ATOMIC_SUB_U32 : FLAT_Real_GlblAtomics_gfx11<0x036, "GLOBAL_ATOMIC_SUB", "global_atomic_sub_u32", true>;
+defm GLOBAL_ATOMIC_CSUB_U32 : FLAT_Real_GlblAtomics_RTN_gfx11<0x037, "GLOBAL_ATOMIC_CSUB", "global_atomic_csub_u32">;
+defm GLOBAL_ATOMIC_MIN_I32 : FLAT_Real_GlblAtomics_gfx11<0x038, "GLOBAL_ATOMIC_SMIN", "global_atomic_min_i32", true>;
+defm GLOBAL_ATOMIC_MIN_U32 : FLAT_Real_GlblAtomics_gfx11<0x039, "GLOBAL_ATOMIC_UMIN", "global_atomic_min_u32", true>;
+defm GLOBAL_ATOMIC_MAX_I32 : FLAT_Real_GlblAtomics_gfx11<0x03a, "GLOBAL_ATOMIC_SMAX", "global_atomic_max_i32", true>;
+defm GLOBAL_ATOMIC_MAX_U32 : FLAT_Real_GlblAtomics_gfx11<0x03b, "GLOBAL_ATOMIC_UMAX", "global_atomic_max_u32", true>;
+defm GLOBAL_ATOMIC_AND_B32 : FLAT_Real_GlblAtomics_gfx11<0x03c, "GLOBAL_ATOMIC_AND", "global_atomic_and_b32", true>;
+defm GLOBAL_ATOMIC_OR_B32 : FLAT_Real_GlblAtomics_gfx11<0x03d, "GLOBAL_ATOMIC_OR", "global_atomic_or_b32", true>;
+defm GLOBAL_ATOMIC_XOR_B32 : FLAT_Real_GlblAtomics_gfx11<0x03e, "GLOBAL_ATOMIC_XOR", "global_atomic_xor_b32", true>;
+defm GLOBAL_ATOMIC_INC_U32 : FLAT_Real_GlblAtomics_gfx11<0x03f, "GLOBAL_ATOMIC_INC", "global_atomic_inc_u32", true>;
+defm GLOBAL_ATOMIC_DEC_U32 : FLAT_Real_GlblAtomics_gfx11<0x040, "GLOBAL_ATOMIC_DEC", "global_atomic_dec_u32", true>;
+defm GLOBAL_ATOMIC_SWAP_B64 : FLAT_Real_GlblAtomics_gfx11<0x041, "GLOBAL_ATOMIC_SWAP_X2", "global_atomic_swap_b64", true>;
+defm GLOBAL_ATOMIC_CMPSWAP_B64 : FLAT_Real_GlblAtomics_gfx11<0x042, "GLOBAL_ATOMIC_CMPSWAP_X2", "global_atomic_cmpswap_b64", true>;
+defm GLOBAL_ATOMIC_ADD_U64 : FLAT_Real_GlblAtomics_gfx11<0x043, "GLOBAL_ATOMIC_ADD_X2", "global_atomic_add_u64", true>;
+defm GLOBAL_ATOMIC_SUB_U64 : FLAT_Real_GlblAtomics_gfx11<0x044, "GLOBAL_ATOMIC_SUB_X2", "global_atomic_sub_u64", true>;
+defm GLOBAL_ATOMIC_MIN_I64 : FLAT_Real_GlblAtomics_gfx11<0x045, "GLOBAL_ATOMIC_SMIN_X2", "global_atomic_min_i64", true>;
+defm GLOBAL_ATOMIC_MIN_U64 : FLAT_Real_GlblAtomics_gfx11<0x046, "GLOBAL_ATOMIC_UMIN_X2", "global_atomic_min_u64", true>;
+defm GLOBAL_ATOMIC_MAX_I64 : FLAT_Real_GlblAtomics_gfx11<0x047, "GLOBAL_ATOMIC_SMAX_X2", "global_atomic_max_i64", true>;
+defm GLOBAL_ATOMIC_MAX_U64 : FLAT_Real_GlblAtomics_gfx11<0x048, "GLOBAL_ATOMIC_UMAX_X2", "global_atomic_max_u64", true>;
+defm GLOBAL_ATOMIC_AND_B64 : FLAT_Real_GlblAtomics_gfx11<0x049, "GLOBAL_ATOMIC_AND_X2", "global_atomic_and_b64", true>;
+defm GLOBAL_ATOMIC_OR_B64 : FLAT_Real_GlblAtomics_gfx11<0x04a, "GLOBAL_ATOMIC_OR_X2", "global_atomic_or_b64", true>;
+defm GLOBAL_ATOMIC_XOR_B64 : FLAT_Real_GlblAtomics_gfx11<0x04b, "GLOBAL_ATOMIC_XOR_X2", "global_atomic_xor_b64", true>;
+defm GLOBAL_ATOMIC_INC_U64 : FLAT_Real_GlblAtomics_gfx11<0x04c, "GLOBAL_ATOMIC_INC_X2", "global_atomic_inc_u64", true>;
+defm GLOBAL_ATOMIC_DEC_U64 : FLAT_Real_GlblAtomics_gfx11<0x04d, "GLOBAL_ATOMIC_DEC_X2", "global_atomic_dec_u64", true>;
+defm GLOBAL_ATOMIC_CMPSWAP_F32 : FLAT_Real_GlblAtomics_gfx11<0x050, "GLOBAL_ATOMIC_FCMPSWAP", "global_atomic_cmpswap_f32">;
+defm GLOBAL_ATOMIC_MIN_F32 : FLAT_Real_GlblAtomics_gfx11<0x051, "GLOBAL_ATOMIC_FMIN", "global_atomic_min_f32">;
+defm GLOBAL_ATOMIC_MAX_F32 : FLAT_Real_GlblAtomics_gfx11<0x052, "GLOBAL_ATOMIC_FMAX", "global_atomic_max_f32">;
+defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Real_GlblAtomics_gfx11<0x056, "GLOBAL_ATOMIC_ADD_F32", "global_atomic_add_f32">;
+
+// ENC_FLAT_SCRATCH.
+defm SCRATCH_LOAD_U8 : FLAT_Real_ScratchAllAddr_gfx11<0x10, "SCRATCH_LOAD_UBYTE", "scratch_load_u8", true>;
+defm SCRATCH_LOAD_I8 : FLAT_Real_ScratchAllAddr_gfx11<0x11, "SCRATCH_LOAD_SBYTE", "scratch_load_i8", true>;
+defm SCRATCH_LOAD_U16 : FLAT_Real_ScratchAllAddr_gfx11<0x12, "SCRATCH_LOAD_USHORT", "scratch_load_u16", true>;
+defm SCRATCH_LOAD_I16 : FLAT_Real_ScratchAllAddr_gfx11<0x13, "SCRATCH_LOAD_SSHORT", "scratch_load_i16", true>;
+defm SCRATCH_LOAD_B32 : FLAT_Real_ScratchAllAddr_gfx11<0x14, "SCRATCH_LOAD_DWORD", "scratch_load_b32", true>;
+defm SCRATCH_LOAD_B64 : FLAT_Real_ScratchAllAddr_gfx11<0x15, "SCRATCH_LOAD_DWORDX2", "scratch_load_b64", true>;
+defm SCRATCH_LOAD_B96 : FLAT_Real_ScratchAllAddr_gfx11<0x16, "SCRATCH_LOAD_DWORDX3", "scratch_load_b96", true>;
+defm SCRATCH_LOAD_B128 : FLAT_Real_ScratchAllAddr_gfx11<0x17, "SCRATCH_LOAD_DWORDX4", "scratch_load_b128", true>;
+defm SCRATCH_STORE_B8 : FLAT_Real_ScratchAllAddr_gfx11<0x18, "SCRATCH_STORE_BYTE", "scratch_store_b8", true>;
+defm SCRATCH_STORE_B16 : FLAT_Real_ScratchAllAddr_gfx11<0x19, "SCRATCH_STORE_SHORT", "scratch_store_b16", true>;
+defm SCRATCH_STORE_B32 : FLAT_Real_ScratchAllAddr_gfx11<0x1a, "SCRATCH_STORE_DWORD", "scratch_store_b32", true>;
+defm SCRATCH_STORE_B64 : FLAT_Real_ScratchAllAddr_gfx11<0x1b, "SCRATCH_STORE_DWORDX2", "scratch_store_b64", true>;
+defm SCRATCH_STORE_B96 : FLAT_Real_ScratchAllAddr_gfx11<0x1c, "SCRATCH_STORE_DWORDX3", "scratch_store_b96", true>;
+defm SCRATCH_STORE_B128 : FLAT_Real_ScratchAllAddr_gfx11<0x1d, "SCRATCH_STORE_DWORDX4", "scratch_store_b128", true>;
+defm SCRATCH_LOAD_D16_U8 : FLAT_Real_ScratchAllAddr_gfx11<0x1e, "SCRATCH_LOAD_UBYTE_D16", "scratch_load_d16_u8">;
+defm SCRATCH_LOAD_D16_I8 : FLAT_Real_ScratchAllAddr_gfx11<0x1f, "SCRATCH_LOAD_SBYTE_D16", "scratch_load_d16_i8">;
+defm SCRATCH_LOAD_D16_B16 : FLAT_Real_ScratchAllAddr_gfx11<0x20, "SCRATCH_LOAD_SHORT_D16", "scratch_load_d16_b16">;
+defm SCRATCH_LOAD_D16_HI_U8 : FLAT_Real_ScratchAllAddr_gfx11<0x21, "SCRATCH_LOAD_UBYTE_D16_HI", "scratch_load_d16_hi_u8">;
+defm SCRATCH_LOAD_D16_HI_I8 : FLAT_Real_ScratchAllAddr_gfx11<0x22, "SCRATCH_LOAD_SBYTE_D16_HI", "scratch_load_d16_hi_i8">;
+defm SCRATCH_LOAD_D16_HI_B16 : FLAT_Real_ScratchAllAddr_gfx11<0x23, "SCRATCH_LOAD_SHORT_D16_HI", "scratch_load_d16_hi_b16">;
+defm SCRATCH_STORE_D16_HI_B8 : FLAT_Real_ScratchAllAddr_gfx11<0x24, "SCRATCH_STORE_BYTE_D16_HI", "scratch_store_d16_hi_b8">;
+defm SCRATCH_STORE_D16_HI_B16 : FLAT_Real_ScratchAllAddr_gfx11<0x25, "SCRATCH_STORE_SHORT_D16_HI", "scratch_store_d16_hi_b16">;
diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
index a8c85ec4e5ea..1cd880eaa48e 100644
--- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -167,7 +167,9 @@ MachineOperand *GCNDPPCombine::getOldOpndValue(MachineOperand &OldOpnd) const {
return nullptr;
case AMDGPU::COPY:
case AMDGPU::V_MOV_B32_e32:
- case AMDGPU::V_MOV_B64_PSEUDO: {
+ case AMDGPU::V_MOV_B64_PSEUDO:
+ case AMDGPU::V_MOV_B64_e32:
+ case AMDGPU::V_MOV_B64_e64: {
auto &Op1 = Def->getOperand(1);
if (Op1.isImm())
return &Op1;
@@ -183,6 +185,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
bool CombBCZ,
bool IsShrinkable) const {
assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp ||
+ MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp ||
MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
auto OrigOp = OrigMI.getOpcode();
@@ -383,6 +386,7 @@ bool GCNDPPCombine::hasNoImmOrEqual(MachineInstr &MI, unsigned OpndName,
bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp ||
+ MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp ||
MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
LLVM_DEBUG(dbgs() << "\nDPP combine: " << MovMI);
@@ -399,7 +403,8 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
return false;
}
- if (MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO) {
+ if (MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO ||
+ MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp) {
auto *DppCtrl = TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl);
assert(DppCtrl && DppCtrl->isImm());
if (!AMDGPU::isLegal64BitDPPControl(DppCtrl->getImm())) {
@@ -447,12 +452,6 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
return false;
}
- if (OldOpndValue->getParent()->getParent() != MovMI.getParent()) {
- LLVM_DEBUG(dbgs() <<
- " failed: old reg def and mov should be in the same BB\n");
- return false;
- }
-
if (OldOpndValue->getImm() == 0) {
if (MaskAllLanes) {
assert(!BoundCtrlZero); // by check [1]
@@ -616,7 +615,8 @@ bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
if (MI.getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(MI)) {
Changed = true;
++NumDPPMovsCombined;
- } else if (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO) {
+ } else if (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO ||
+ MI.getOpcode() == AMDGPU::V_MOV_B64_dpp) {
if (ST->has64BitDPP() && combineDPPMov(MI)) {
Changed = true;
++NumDPPMovsCombined;
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index c0592f6f3c7a..b6d16009e776 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -13,14 +13,38 @@
#include "GCNHazardRecognizer.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/Support/TargetParser.h"
using namespace llvm;
+namespace {
+
+struct MFMAPaddingRatioParser : public cl::parser<unsigned> {
+ MFMAPaddingRatioParser(cl::Option &O) : cl::parser<unsigned>(O) {}
+
+ bool parse(cl::Option &O, StringRef ArgName, StringRef Arg, unsigned &Value) {
+ if (Arg.getAsInteger(0, Value))
+ return O.error("'" + Arg + "' value invalid for uint argument!");
+
+ if (Value > 100)
+ return O.error("'" + Arg + "' value must be in the range [0, 100]!");
+
+ return false;
+ }
+};
+
+} // end anonymous namespace
+
+static cl::opt<unsigned, false, MFMAPaddingRatioParser>
+ MFMAPaddingRatio("amdgpu-mfma-padding-ratio", cl::init(0), cl::Hidden,
+ cl::desc("Fill a percentage of the latency between "
+ "neighboring MFMA with s_nops."));
+
//===----------------------------------------------------------------------===//
-// Hazard Recoginizer Implementation
+// Hazard Recognizer Implementation
//===----------------------------------------------------------------------===//
static bool shouldRunLdsBranchVmemWARHazardFixup(const MachineFunction &MF,
@@ -92,12 +116,7 @@ static bool isSMovRel(unsigned Opcode) {
}
static bool isDGEMM(unsigned Opcode) {
- return Opcode == AMDGPU::V_MFMA_F64_4X4X4F64_e64 ||
- Opcode == AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64 ||
- Opcode == AMDGPU::V_MFMA_F64_16X16X4F64_e64 ||
- Opcode == AMDGPU::V_MFMA_F64_16X16X4F64_vgprcd_e64 ||
- Opcode == AMDGPU::V_MFMA_F64_16X16X4F64_mac_e64 ||
- Opcode == AMDGPU::V_MFMA_F64_16X16X4F64_mac_vgprcd_e64;
+ return AMDGPU::getMAIIsDGEMM(Opcode);
}
static bool isXDL(const GCNSubtarget &ST, const MachineInstr &MI) {
@@ -109,7 +128,10 @@ static bool isXDL(const GCNSubtarget &ST, const MachineInstr &MI) {
Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64)
return false;
- return true;
+ if (!ST.hasGFX940Insts())
+ return true;
+
+ return AMDGPU::getMAIIsGFX940XDL(Opcode);
}
static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII,
@@ -144,6 +166,11 @@ static bool isPermlane(const MachineInstr &MI) {
Opcode == AMDGPU::V_PERMLANEX16_B32_e64;
}
+static bool isLdsDma(const MachineInstr &MI) {
+ return SIInstrInfo::isVALU(MI) &&
+ (SIInstrInfo::isMUBUF(MI) || SIInstrInfo::isFLAT(MI));
+}
+
static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
const MachineOperand *RegOp = TII->getNamedOperand(RegInstr,
AMDGPU::OpName::simm16);
@@ -204,12 +231,12 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0)
return HazardType;
- if (ST.hasReadM0MovRelInterpHazard() &&
- (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) &&
- checkReadM0Hazards(MI) > 0)
- return HazardType;
-
- if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI) &&
+ if (((ST.hasReadM0MovRelInterpHazard() &&
+ (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode()))) ||
+ (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI)) ||
+ (ST.hasReadM0LdsDmaHazard() && isLdsDma(*MI)) ||
+ (ST.hasReadM0LdsDirectHazard() &&
+ MI->readsRegister(AMDGPU::LDS_DIRECT))) &&
checkReadM0Hazards(MI) > 0)
return HazardType;
@@ -237,6 +264,14 @@ static void insertNoopsInBundle(MachineInstr *MI, const SIInstrInfo &TII,
}
}
+unsigned
+GCNHazardRecognizer::getMFMAPipelineWaitStates(const MachineInstr &MI) const {
+ const MCSchedClassDesc *SC = TSchedModel.resolveSchedClass(&MI);
+ assert(TSchedModel.getWriteProcResBegin(SC) !=
+ TSchedModel.getWriteProcResEnd(SC));
+ return TSchedModel.getWriteProcResBegin(SC)->Cycles;
+}
+
void GCNHazardRecognizer::processBundle() {
MachineBasicBlock::instr_iterator MI = std::next(CurrCycleInstr->getIterator());
MachineBasicBlock::instr_iterator E = CurrCycleInstr->getParent()->instr_end();
@@ -321,11 +356,11 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
if (isRFE(MI->getOpcode()))
return std::max(WaitStates, checkRFEHazards(MI));
- if (ST.hasReadM0MovRelInterpHazard() && (TII.isVINTRP(*MI) ||
- isSMovRel(MI->getOpcode())))
- return std::max(WaitStates, checkReadM0Hazards(MI));
-
- if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI))
+ if ((ST.hasReadM0MovRelInterpHazard() &&
+ (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode()))) ||
+ (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI)) ||
+ (ST.hasReadM0LdsDmaHazard() && isLdsDma(*MI)) ||
+ (ST.hasReadM0LdsDirectHazard() && MI->readsRegister(AMDGPU::LDS_DIRECT)))
return std::max(WaitStates, checkReadM0Hazards(MI));
if (SIInstrInfo::isMAI(*MI))
@@ -389,16 +424,61 @@ void GCNHazardRecognizer::RecedeCycle() {
// Helper Functions
//===----------------------------------------------------------------------===//
+typedef enum { HazardFound, HazardExpired, NoHazardFound } HazardFnResult;
+
typedef function_ref<bool(const MachineInstr &, int WaitStates)> IsExpiredFn;
+typedef function_ref<unsigned int(const MachineInstr &)> GetNumWaitStatesFn;
+
+// Search for a hazard in a block and its predecessors.
+template <typename StateT>
+static bool
+hasHazard(StateT State,
+ function_ref<HazardFnResult(StateT &, const MachineInstr &)> IsHazard,
+ function_ref<void(StateT &, const MachineInstr &)> UpdateState,
+ const MachineBasicBlock *MBB,
+ MachineBasicBlock::const_reverse_instr_iterator I,
+ DenseSet<const MachineBasicBlock *> &Visited) {
+ for (auto E = MBB->instr_rend(); I != E; ++I) {
+ // No need to look at parent BUNDLE instructions.
+ if (I->isBundle())
+ continue;
+
+ switch (IsHazard(State, *I)) {
+ case HazardFound:
+ return true;
+ case HazardExpired:
+ return false;
+ default:
+ // Continue search
+ break;
+ }
+
+ if (I->isInlineAsm() || I->isMetaInstruction())
+ continue;
+
+ UpdateState(State, *I);
+ }
+
+ for (MachineBasicBlock *Pred : MBB->predecessors()) {
+ if (!Visited.insert(Pred).second)
+ continue;
+
+ if (hasHazard(State, IsHazard, UpdateState, Pred, Pred->instr_rbegin(),
+ Visited))
+ return true;
+ }
+
+ return false;
+}
// Returns a minimum wait states since \p I walking all predecessors.
// Only scans until \p IsExpired does not return true.
// Can only be run in a hazard recognizer mode.
-static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
- const MachineBasicBlock *MBB,
- MachineBasicBlock::const_reverse_instr_iterator I,
- int WaitStates, IsExpiredFn IsExpired,
- DenseSet<const MachineBasicBlock *> &Visited) {
+static int getWaitStatesSince(
+ GCNHazardRecognizer::IsHazardFn IsHazard, const MachineBasicBlock *MBB,
+ MachineBasicBlock::const_reverse_instr_iterator I, int WaitStates,
+ IsExpiredFn IsExpired, DenseSet<const MachineBasicBlock *> &Visited,
+ GetNumWaitStatesFn GetNumWaitStates = SIInstrInfo::getNumWaitStates) {
for (auto E = MBB->instr_rend(); I != E; ++I) {
// Don't add WaitStates for parent BUNDLE instructions.
if (I->isBundle())
@@ -410,7 +490,7 @@ static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
if (I->isInlineAsm())
continue;
- WaitStates += SIInstrInfo::getNumWaitStates(*I);
+ WaitStates += GetNumWaitStates(*I);
if (IsExpired(*I, WaitStates))
return std::numeric_limits<int>::max();
@@ -421,8 +501,8 @@ static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
if (!Visited.insert(Pred).second)
continue;
- int W = getWaitStatesSince(IsHazard, Pred, Pred->instr_rbegin(),
- WaitStates, IsExpired, Visited);
+ int W = getWaitStatesSince(IsHazard, Pred, Pred->instr_rbegin(), WaitStates,
+ IsExpired, Visited, GetNumWaitStates);
MinWaitStates = std::min(MinWaitStates, W);
}
@@ -534,7 +614,7 @@ int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) {
// In order to handle these situations correctly we need to make sure that
// when a clause has more than one instruction, no instruction in the clause
// writes to a register that is read by another instruction in the clause
- // (including itself). If we encounter this situaion, we need to break the
+ // (including itself). If we encounter this situation, we need to break the
// clause by inserting a non SMEM instruction.
for (MachineInstr *MI : EmittedInstrs) {
@@ -764,7 +844,7 @@ GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def,
// 8 bytes can have there store data over written by the next instruction.
const SIRegisterInfo *TRI = ST.getRegisterInfo();
- const int VALUWaitStates = 1;
+ const int VALUWaitStates = ST.hasGFX940Insts() ? 2 : 1;
int WaitStatesNeeded = 0;
if (!TRI->isVectorRegister(MRI, Def.getReg()))
@@ -783,13 +863,136 @@ GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def,
}
int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) {
+ int WaitStatesNeeded = 0;
+
+ if (ST.hasTransForwardingHazard() && !SIInstrInfo::isTRANS(*VALU)) {
+ const int TransDefWaitstates = 1;
+
+ auto IsTransDefFn = [this, VALU](const MachineInstr &MI) {
+ if (!SIInstrInfo::isTRANS(MI))
+ return false;
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ Register Def = TII->getNamedOperand(MI, AMDGPU::OpName::vdst)->getReg();
+
+ for (const MachineOperand &Use : VALU->explicit_uses()) {
+ if (Use.isReg() && TRI->regsOverlap(Def, Use.getReg()))
+ return true;
+ }
+
+ return false;
+ };
+
+ int WaitStatesNeededForDef =
+ TransDefWaitstates -
+ getWaitStatesSince(IsTransDefFn, TransDefWaitstates);
+ WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
+ }
+
+ if (ST.hasDstSelForwardingHazard()) {
+ const int Shift16DefWaitstates = 1;
+
+ auto IsShift16BitDefFn = [this, VALU](const MachineInstr &MI) {
+ if (!SIInstrInfo::isVALU(MI))
+ return false;
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ if (SIInstrInfo::isSDWA(MI)) {
+ if (auto *DstSel = TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel))
+ if (DstSel->getImm() == AMDGPU::SDWA::DWORD)
+ return false;
+ } else {
+ if ((AMDGPU::getNamedOperandIdx(MI.getOpcode(),
+ AMDGPU::OpName::op_sel) == -1) ||
+ !(TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)
+ ->getImm() &
+ SISrcMods::DST_OP_SEL))
+ return false;
+ }
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ if (auto *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst)) {
+ Register Def = Dst->getReg();
+
+ for (const MachineOperand &Use : VALU->explicit_uses()) {
+ if (Use.isReg() && TRI->regsOverlap(Def, Use.getReg()))
+ return true;
+ }
+ }
+
+ return false;
+ };
+
+ int WaitStatesNeededForDef =
+ Shift16DefWaitstates -
+ getWaitStatesSince(IsShift16BitDefFn, Shift16DefWaitstates);
+ WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
+ }
+
+ if (ST.hasVDecCoExecHazard()) {
+ const int VALUWriteSGPRVALUReadWaitstates = 2;
+ const int VALUWriteEXECRWLane = 4;
+ const int VALUWriteVGPRReadlaneRead = 1;
+
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ Register UseReg;
+ auto IsVALUDefSGPRFn = [&UseReg, TRI](const MachineInstr &MI) {
+ if (!SIInstrInfo::isVALU(MI))
+ return false;
+ return MI.modifiesRegister(UseReg, TRI);
+ };
+
+ for (const MachineOperand &Use : VALU->explicit_uses()) {
+ if (!Use.isReg())
+ continue;
+
+ UseReg = Use.getReg();
+ if (TRI->isSGPRReg(MRI, UseReg)) {
+ int WaitStatesNeededForDef =
+ VALUWriteSGPRVALUReadWaitstates -
+ getWaitStatesSince(IsVALUDefSGPRFn,
+ VALUWriteSGPRVALUReadWaitstates);
+ WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
+ }
+ }
+
+ if (VALU->readsRegister(AMDGPU::VCC, TRI)) {
+ UseReg = AMDGPU::VCC;
+ int WaitStatesNeededForDef =
+ VALUWriteSGPRVALUReadWaitstates -
+ getWaitStatesSince(IsVALUDefSGPRFn, VALUWriteSGPRVALUReadWaitstates);
+ WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
+ }
+
+ switch (VALU->getOpcode()) {
+ case AMDGPU::V_READLANE_B32:
+ case AMDGPU::V_READFIRSTLANE_B32: {
+ MachineOperand *Src = TII.getNamedOperand(*VALU, AMDGPU::OpName::src0);
+ UseReg = Src->getReg();
+ int WaitStatesNeededForDef =
+ VALUWriteVGPRReadlaneRead -
+ getWaitStatesSince(IsVALUDefSGPRFn, VALUWriteVGPRReadlaneRead);
+ WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
+ }
+ LLVM_FALLTHROUGH;
+ case AMDGPU::V_WRITELANE_B32: {
+ UseReg = AMDGPU::EXEC;
+ int WaitStatesNeededForDef =
+ VALUWriteEXECRWLane -
+ getWaitStatesSince(IsVALUDefSGPRFn, VALUWriteEXECRWLane);
+ WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
+ break;
+ }
+ default:
+ break;
+ }
+ }
+
// This checks for the hazard where VMEM instructions that store more than
// 8 bytes can have there store data over written by the next instruction.
if (!ST.has12DWordStoreHazard())
- return 0;
+ return WaitStatesNeeded;
const MachineRegisterInfo &MRI = MF.getRegInfo();
- int WaitStatesNeeded = 0;
for (const MachineOperand &Def : VALU->defs()) {
WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Def, MRI));
@@ -861,10 +1064,10 @@ int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) {
int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) {
const SIInstrInfo *TII = ST.getInstrInfo();
- const int SMovRelWaitStates = 1;
+ const int ReadM0WaitStates = 1;
auto IsHazardFn = [TII](const MachineInstr &MI) { return TII->isSALU(MI); };
- return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn,
- SMovRelWaitStates);
+ return ReadM0WaitStates -
+ getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn, ReadM0WaitStates);
}
void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
@@ -873,6 +1076,13 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
fixSMEMtoVectorWriteHazards(MI);
fixVcmpxExecWARHazard(MI);
fixLdsBranchVmemWARHazard(MI);
+ if (ST.hasLdsDirect()) {
+ fixLdsDirectVALUHazard(MI);
+ fixLdsDirectVMEMHazard(MI);
+ }
+ fixVALUPartialForwardingHazard(MI);
+ fixVALUTransUseHazard(MI);
+ fixWMMAHazards(MI);
}
bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) {
@@ -880,7 +1090,12 @@ bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) {
return false;
const SIInstrInfo *TII = ST.getInstrInfo();
- auto IsHazardFn = [TII](const MachineInstr &MI) { return TII->isVOPC(MI); };
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ auto IsHazardFn = [TII, TRI](const MachineInstr &MI) {
+ return (TII->isVOPC(MI) ||
+ ((TII->isVOP3(MI) || TII->isSDWA(MI)) && MI.isCompare())) &&
+ MI.modifiesRegister(AMDGPU::EXEC, TRI);
+ };
auto IsExpiredFn = [](const MachineInstr &MI, int) {
unsigned Opc = MI.getOpcode();
@@ -893,7 +1108,7 @@ bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) {
return false;
// V_NOP will be discarded by SQ.
- // Use V_MOB_B32 v?, v?. Register must be alive so use src0 of V_PERMLANE*
+ // Use V_MOV_B32 v?, v?. Register must be alive so use src0 of V_PERMLANE*
// which is always a VGPR and available.
auto *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0);
Register Reg = Src0->getReg();
@@ -1157,6 +1372,369 @@ bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) {
return true;
}
+bool GCNHazardRecognizer::fixLdsDirectVALUHazard(MachineInstr *MI) {
+ if (!SIInstrInfo::isLDSDIR(*MI))
+ return false;
+
+ const int NoHazardWaitStates = 15;
+ const MachineOperand *VDST = TII.getNamedOperand(*MI, AMDGPU::OpName::vdst);
+ const Register VDSTReg = VDST->getReg();
+
+ bool VisitedTrans = false;
+ auto IsHazardFn = [this, VDSTReg, &VisitedTrans](const MachineInstr &I) {
+ if (!SIInstrInfo::isVALU(I))
+ return false;
+ VisitedTrans = VisitedTrans || SIInstrInfo::isTRANS(I);
+ // Cover both WAR and WAW
+ return I.readsRegister(VDSTReg, &TRI) || I.modifiesRegister(VDSTReg, &TRI);
+ };
+ auto IsExpiredFn = [&](const MachineInstr &I, int WaitStates) {
+ if (WaitStates >= NoHazardWaitStates)
+ return true;
+ // Instructions which cause va_vdst==0 expire hazard
+ return SIInstrInfo::isVMEM(I) || SIInstrInfo::isFLAT(I) ||
+ SIInstrInfo::isDS(I) || SIInstrInfo::isEXP(I);
+ };
+ auto GetWaitStatesFn = [](const MachineInstr &MI) {
+ return SIInstrInfo::isVALU(MI) ? 1 : 0;
+ };
+
+ DenseSet<const MachineBasicBlock *> Visited;
+ auto Count = ::getWaitStatesSince(IsHazardFn, MI->getParent(),
+ std::next(MI->getReverseIterator()), 0,
+ IsExpiredFn, Visited, GetWaitStatesFn);
+
+ // Transcendentals can execute in parallel to other VALUs.
+ // This makes va_vdst count unusable with a mixture of VALU and TRANS.
+ if (VisitedTrans)
+ Count = 0;
+
+ MachineOperand *WaitVdstOp =
+ TII.getNamedOperand(*MI, AMDGPU::OpName::waitvdst);
+ WaitVdstOp->setImm(std::min(Count, NoHazardWaitStates));
+
+ return true;
+}
+
+bool GCNHazardRecognizer::fixLdsDirectVMEMHazard(MachineInstr *MI) {
+ if (!SIInstrInfo::isLDSDIR(*MI))
+ return false;
+
+ const MachineOperand *VDST = TII.getNamedOperand(*MI, AMDGPU::OpName::vdst);
+ const Register VDSTReg = VDST->getReg();
+
+ auto IsHazardFn = [this, VDSTReg](const MachineInstr &I) {
+ if (!SIInstrInfo::isVMEM(I) && !SIInstrInfo::isFLAT(I) &&
+ !SIInstrInfo::isDS(I))
+ return false;
+ return I.readsRegister(VDSTReg, &TRI) || I.modifiesRegister(VDSTReg, &TRI);
+ };
+ auto IsExpiredFn = [](const MachineInstr &I, int) {
+ return SIInstrInfo::isVALU(I) || SIInstrInfo::isEXP(I) ||
+ (I.getOpcode() == AMDGPU::S_WAITCNT && !I.getOperand(0).getImm()) ||
+ (I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
+ I.getOperand(0).getImm() == 0xffe3);
+ };
+
+ if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
+ std::numeric_limits<int>::max())
+ return false;
+
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ TII.get(AMDGPU::S_WAITCNT_DEPCTR))
+ .addImm(0xffe3);
+
+ return true;
+}
+
+bool GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) {
+ if (!ST.isWave64())
+ return false;
+ if (!ST.hasVALUPartialForwardingHazard())
+ return false;
+ if (!SIInstrInfo::isVALU(*MI))
+ return false;
+
+ SmallSetVector<Register, 4> SrcVGPRs;
+
+ for (const MachineOperand &Use : MI->explicit_uses()) {
+ if (Use.isReg() && TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
+ SrcVGPRs.insert(Use.getReg());
+ }
+
+ // Only applies with >= 2 unique VGPR sources
+ if (SrcVGPRs.size() <= 1)
+ return false;
+
+ // Look for the following pattern:
+ // Va <- VALU [PreExecPos]
+ // intv1
+ // Exec <- SALU [ExecPos]
+ // intv2
+ // Vb <- VALU [PostExecPos]
+ // intv3
+ // MI Va, Vb (WaitState = 0)
+ //
+ // Where:
+ // intv1 + intv2 <= 2 VALUs
+ // intv3 <= 4 VALUs
+ //
+ // If found, insert an appropriate S_WAITCNT_DEPCTR before MI.
+
+ const int Intv1plus2MaxVALUs = 2;
+ const int Intv3MaxVALUs = 4;
+ const int IntvMaxVALUs = 6;
+ const int NoHazardVALUWaitStates = IntvMaxVALUs + 2;
+
+ struct StateType {
+ SmallDenseMap<Register, int, 4> DefPos;
+ int ExecPos = std::numeric_limits<int>::max();
+ int VALUs = 0;
+ };
+
+ StateType State;
+
+ // This overloads expiry testing with all the hazard detection
+ auto IsHazardFn = [&, this](StateType &State, const MachineInstr &I) {
+ // Too many VALU states have passed
+ if (State.VALUs > NoHazardVALUWaitStates)
+ return HazardExpired;
+
+ // Instructions which cause va_vdst==0 expire hazard
+ if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isFLAT(I) ||
+ SIInstrInfo::isDS(I) || SIInstrInfo::isEXP(I) ||
+ (I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
+ I.getOperand(0).getImm() == 0x0fff))
+ return HazardExpired;
+
+ // Track registers writes
+ bool Changed = false;
+ if (SIInstrInfo::isVALU(I)) {
+ for (Register Src : SrcVGPRs) {
+ if (!State.DefPos.count(Src) && I.modifiesRegister(Src, &TRI)) {
+ State.DefPos[Src] = State.VALUs;
+ Changed = true;
+ }
+ }
+ } else if (SIInstrInfo::isSALU(I)) {
+ if (State.ExecPos == std::numeric_limits<int>::max()) {
+ if (!State.DefPos.empty() && I.modifiesRegister(AMDGPU::EXEC, &TRI)) {
+ State.ExecPos = State.VALUs;
+ Changed = true;
+ }
+ }
+ }
+
+ // Early expiration: too many VALUs in intv3
+ if (State.VALUs > Intv3MaxVALUs && State.DefPos.empty())
+ return HazardExpired;
+
+ // Only evaluate state if something changed
+ if (!Changed)
+ return NoHazardFound;
+
+ // Determine positions of VALUs pre/post exec change
+ if (State.ExecPos == std::numeric_limits<int>::max())
+ return NoHazardFound;
+
+ int PreExecPos = std::numeric_limits<int>::max();
+ int PostExecPos = std::numeric_limits<int>::max();
+
+ for (auto Entry : State.DefPos) {
+ int DefVALUs = Entry.second;
+ if (DefVALUs != std::numeric_limits<int>::max()) {
+ if (DefVALUs >= State.ExecPos)
+ PreExecPos = std::min(PreExecPos, DefVALUs);
+ else if (DefVALUs < State.ExecPos)
+ PostExecPos = std::min(PostExecPos, DefVALUs);
+ }
+ }
+
+ // Need a VALUs post exec change
+ if (PostExecPos == std::numeric_limits<int>::max())
+ return NoHazardFound;
+
+ // Too many VALUs in intv3?
+ int Intv3VALUs = PostExecPos;
+ if (Intv3VALUs > Intv3MaxVALUs)
+ return HazardExpired;
+
+ // Too many VALUs in intv2?
+ int Intv2VALUs = (State.ExecPos - PostExecPos) - 1;
+ if (Intv2VALUs > Intv1plus2MaxVALUs)
+ return HazardExpired;
+
+ // Need a VALUs pre exec change
+ if (PreExecPos == std::numeric_limits<int>::max())
+ return NoHazardFound;
+
+ // Too many VALUs in intv1?
+ int Intv1VALUs = PreExecPos - State.ExecPos;
+ if (Intv1VALUs > Intv1plus2MaxVALUs)
+ return HazardExpired;
+
+ // Too many VALUs in intv1 + intv2
+ if (Intv1VALUs + Intv2VALUs > Intv1plus2MaxVALUs)
+ return HazardExpired;
+
+ return HazardFound;
+ };
+ auto UpdateStateFn = [](StateType &State, const MachineInstr &MI) {
+ if (SIInstrInfo::isVALU(MI))
+ State.VALUs += 1;
+ };
+
+ DenseSet<const MachineBasicBlock *> Visited;
+ if (!hasHazard<StateType>(State, IsHazardFn, UpdateStateFn, MI->getParent(),
+ std::next(MI->getReverseIterator()), Visited))
+ return false;
+
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ TII.get(AMDGPU::S_WAITCNT_DEPCTR))
+ .addImm(0x0fff);
+
+ return true;
+}
+
+bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) {
+ if (!ST.hasVALUTransUseHazard())
+ return false;
+ if (!SIInstrInfo::isVALU(*MI))
+ return false;
+
+ SmallSet<Register, 4> SrcVGPRs;
+
+ for (const MachineOperand &Use : MI->explicit_uses()) {
+ if (Use.isReg() && TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
+ SrcVGPRs.insert(Use.getReg());
+ }
+
+ // Look for the following pattern:
+ // Va <- TRANS VALU
+ // intv
+ // MI Va (WaitState = 0)
+ //
+ // Where:
+ // intv <= 5 VALUs / 1 TRANS
+ //
+ // If found, insert an appropriate S_WAITCNT_DEPCTR before MI.
+
+ const int IntvMaxVALUs = 5;
+ const int IntvMaxTRANS = 1;
+
+ struct StateType {
+ int VALUs = 0;
+ int TRANS = 0;
+ };
+
+ StateType State;
+
+ // This overloads expiry testing with all the hazard detection
+ auto IsHazardFn = [&, this](StateType &State, const MachineInstr &I) {
+ // Too many VALU states have passed
+ if (State.VALUs > IntvMaxVALUs || State.TRANS > IntvMaxTRANS)
+ return HazardExpired;
+
+ // Instructions which cause va_vdst==0 expire hazard
+ if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isFLAT(I) ||
+ SIInstrInfo::isDS(I) || SIInstrInfo::isEXP(I) ||
+ (I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
+ I.getOperand(0).getImm() == 0x0fff))
+ return HazardExpired;
+
+ // Track registers writes
+ if (SIInstrInfo::isTRANS(I)) {
+ for (Register Src : SrcVGPRs) {
+ if (I.modifiesRegister(Src, &TRI)) {
+ return HazardFound;
+ }
+ }
+ }
+
+ return NoHazardFound;
+ };
+ auto UpdateStateFn = [](StateType &State, const MachineInstr &MI) {
+ if (SIInstrInfo::isVALU(MI))
+ State.VALUs += 1;
+ if (SIInstrInfo::isTRANS(MI))
+ State.TRANS += 1;
+ };
+
+ DenseSet<const MachineBasicBlock *> Visited;
+ if (!hasHazard<StateType>(State, IsHazardFn, UpdateStateFn, MI->getParent(),
+ std::next(MI->getReverseIterator()), Visited))
+ return false;
+
+ // Hazard is observed - insert a wait on va_dst counter to ensure hazard is
+ // avoided (mask 0x0fff achieves this).
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ TII.get(AMDGPU::S_WAITCNT_DEPCTR))
+ .addImm(0x0fff);
+
+ return true;
+}
+
+bool GCNHazardRecognizer::fixWMMAHazards(MachineInstr *MI) {
+ if (!SIInstrInfo::isWMMA(*MI))
+ return false;
+
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+
+ auto IsHazardFn = [MI, TII, TRI](const MachineInstr &I) {
+ if (!SIInstrInfo::isWMMA(I))
+ return false;
+
+ // Src0 or Src1 of the current wmma instruction overlaps with the dest of
+ // the previous wmma.
+ const Register CurSrc0Reg =
+ TII->getNamedOperand(*MI, AMDGPU::OpName::src0)->getReg();
+ const Register CurSrc1Reg =
+ TII->getNamedOperand(*MI, AMDGPU::OpName::src1)->getReg();
+
+ const Register PrevDstReg =
+ TII->getNamedOperand(I, AMDGPU::OpName::vdst)->getReg();
+
+ if (TRI->regsOverlap(PrevDstReg, CurSrc0Reg) ||
+ TRI->regsOverlap(PrevDstReg, CurSrc1Reg)) {
+ return true;
+ }
+
+ // Src2 of the current wmma instruction overlaps with the dest of the
+ // previous wmma.
+ const MachineOperand *Src2 =
+ TII->getNamedOperand(*MI, AMDGPU::OpName::src2);
+ const Register CurSrc2Reg = Src2->isReg() ? Src2->getReg() : Register();
+
+ if (CurSrc2Reg != AMDGPU::NoRegister &&
+ TRI->regsOverlap(PrevDstReg, CurSrc2Reg)) {
+
+ const MachineOperand *Src2Mods =
+ TII->getNamedOperand(*MI, AMDGPU::OpName::src2_modifiers);
+ const bool NoSrc2Mods =
+ (Src2Mods->getImm() & (SISrcMods::NEG | SISrcMods::NEG_HI)) == 0;
+ // Exception: there is no hazard if the wmma instructions are of the same
+ // type and there is no input modifier on src2 of the current instruction.
+ return !(NoSrc2Mods && (TII->pseudoToMCOpcode(I.getOpcode()) ==
+ TII->pseudoToMCOpcode(MI->getOpcode())));
+ }
+
+ return false;
+ };
+
+ auto IsExpiredFn = [](const MachineInstr &I, int) {
+ return SIInstrInfo::isVALU(I);
+ };
+
+ if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
+ std::numeric_limits<int>::max())
+ return false;
+
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::V_NOP_e32));
+
+ return true;
+}
+
int GCNHazardRecognizer::checkNSAtoVMEMHazard(MachineInstr *MI) {
int NSAtoVMEMWaitStates = 1;
@@ -1223,6 +1801,36 @@ int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) {
return ST.hasGFX90AInsts() ? checkMAIHazards90A(MI) : checkMAIHazards908(MI);
}
+int GCNHazardRecognizer::checkMFMAPadding(MachineInstr *MI) {
+ // Early exit if no padding is requested.
+ if (MFMAPaddingRatio == 0)
+ return 0;
+
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ if (!SIInstrInfo::isMFMA(*MI) || MFI->getOccupancy() < 2)
+ return 0;
+
+ int NeighborMFMALatency = 0;
+ auto IsNeighboringMFMA = [&NeighborMFMALatency,
+ this](const MachineInstr &MI) {
+ if (!SIInstrInfo::isMFMA(MI))
+ return false;
+
+ NeighborMFMALatency = this->getMFMAPipelineWaitStates(MI);
+ return true;
+ };
+
+ const int MaxMFMAPipelineWaitStates = 16;
+ int WaitStatesSinceNeighborMFMA =
+ getWaitStatesSince(IsNeighboringMFMA, MaxMFMAPipelineWaitStates);
+
+ int NeighborMFMAPaddingNeeded =
+ (NeighborMFMALatency * MFMAPaddingRatio / 100) -
+ WaitStatesSinceNeighborMFMA;
+
+ return std::max(0, NeighborMFMAPaddingNeeded);
+}
+
int GCNHazardRecognizer::checkMAIHazards908(MachineInstr *MI) {
int WaitStatesNeeded = 0;
unsigned Opc = MI->getOpcode();
@@ -1257,12 +1865,6 @@ int GCNHazardRecognizer::checkMAIHazards908(MachineInstr *MI) {
}
}
- auto IsMFMAFn = [](const MachineInstr &MI) {
- return SIInstrInfo::isMAI(MI) &&
- MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
- MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64;
- };
-
for (const MachineOperand &Op : MI->explicit_operands()) {
if (!Op.isReg() || !TRI.isAGPR(MF.getRegInfo(), Op.getReg()))
continue;
@@ -1282,9 +1884,9 @@ int GCNHazardRecognizer::checkMAIHazards908(MachineInstr *MI) {
Register Reg = Op.getReg();
unsigned HazardDefLatency = 0;
- auto IsOverlappedMFMAFn = [Reg, &IsMFMAFn, &HazardDefLatency,
+ auto IsOverlappedMFMAFn = [Reg, &HazardDefLatency,
this](const MachineInstr &MI) {
- if (!IsMFMAFn(MI))
+ if (!SIInstrInfo::isMFMA(MI))
return false;
Register DstReg = MI.getOperand(0).getReg();
if (DstReg == Reg)
@@ -1361,9 +1963,9 @@ int GCNHazardRecognizer::checkMAIHazards908(MachineInstr *MI) {
Register DstReg = MI->getOperand(0).getReg();
unsigned HazardDefLatency = 0;
- auto IsSrcCMFMAFn = [DstReg, &IsMFMAFn, &HazardDefLatency,
+ auto IsSrcCMFMAFn = [DstReg, &HazardDefLatency,
this](const MachineInstr &MI) {
- if (!IsMFMAFn(MI))
+ if (!SIInstrInfo::isMFMA(MI))
return false;
Register Reg = TII.getNamedOperand(MI, AMDGPU::OpName::src2)->getReg();
HazardDefLatency =
@@ -1387,6 +1989,9 @@ int GCNHazardRecognizer::checkMAIHazards908(MachineInstr *MI) {
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
}
+ // Pad neighboring MFMA with noops for better inter-wave performance.
+ WaitStatesNeeded = std::max(WaitStatesNeeded, checkMFMAPadding(MI));
+
return WaitStatesNeeded;
}
@@ -1394,21 +1999,16 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
int WaitStatesNeeded = 0;
unsigned Opc = MI->getOpcode();
- auto IsMFMAFn = [](const MachineInstr &MI) {
- return SIInstrInfo::isMAI(MI) &&
- MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
- MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64;
+ auto IsLegacyVALUFn = [](const MachineInstr &MI) {
+ return SIInstrInfo::isVALU(MI) && !SIInstrInfo::isMFMA(MI);
};
- auto IsLegacyVALUFn = [&IsMFMAFn](const MachineInstr &MI) {
- return SIInstrInfo::isVALU(MI) && !IsMFMAFn(MI);
+ auto IsLegacyVALUNotDotFn = [](const MachineInstr &MI) {
+ return SIInstrInfo::isVALU(MI) && !SIInstrInfo::isMFMA(MI) &&
+ !SIInstrInfo::isDOT(MI);
};
- auto IsLegacyVALUNotDotFn = [&IsMFMAFn](const MachineInstr &MI) {
- return SIInstrInfo::isVALU(MI) && !IsMFMAFn(MI) && !SIInstrInfo::isDOT(MI);
- };
-
- if (!IsMFMAFn(*MI))
+ if (!SIInstrInfo::isMFMA(*MI))
return WaitStatesNeeded;
const int VALUWritesExecWaitStates = 4;
@@ -1423,6 +2023,13 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
for (const MachineOperand &Use : MI->explicit_uses()) {
const int LegacyVALUNotDotWritesVGPRWaitStates = 2;
const int SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates = 2;
+ const int GFX940_XDL2PassWritesVGPROverlappedSMFMASrcCWaitStates = 3;
+ const int GFX940_XDL4PassWritesVGPROverlappedSMFMASrcCWaitStates = 5;
+ const int GFX940_SMFMA4PassWritesVGPROverlappedSMFMASrcCWaitStates = 4;
+ const int GFX940_XDL8PassWritesVGPROverlappedSMFMASrcCWaitStates = 9;
+ const int GFX940_SMFMA8PassWritesVGPROverlappedSMFMASrcCWaitStates = 8;
+ const int GFX940_XDL16PassWritesVGPROverlappedSMFMASrcCWaitStates = 17;
+ const int GFX940_SMFMA16PassWritesVGPROverlappedSMFMASrcCWaitStates = 16;
const int SMFMA16x16WritesVGPROverlappedSMFMASrcCWaitStates = 8;
const int SMFMA32x32WritesVGPROverlappedSMFMASrcCWaitStates = 16;
const int SMFMA4x4WritesVGPROverlappedDMFMASrcCWaitStates = 3;
@@ -1433,9 +2040,18 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
const int SMFMA4x4WritesVGPROverlappedSrcABWaitStates = 5;
const int SMFMA16x16WritesVGPROverlappedSrcABWaitStates = 11;
const int SMFMA32x32WritesVGPROverlappedSrcABWaitStates = 19;
+ const int GFX940_SMFMA2PassWritesVGPROverlappedSrcABWaitStates = 4;
+ const int GFX940_SMFMA4PassWritesVGPROverlappedSrcABWaitStates = 6;
+ const int GFX940_SMFMA8PassWritesVGPROverlappedSrcABWaitStates = 10;
+ const int GFX940_SMFMA16PassWritesVGPROverlappedSrcABWaitStates = 18;
+ const int GFX940_XDL2PassWritesVGPROverlappedSrcABWaitStates = 5;
+ const int GFX940_XDL4PassWritesVGPROverlappedSrcABWaitStates = 7;
+ const int GFX940_XDL8PassWritesVGPROverlappedSrcABWaitStates = 11;
+ const int GFX940_XDL16PassWritesVGPROverlappedSrcABWaitStates = 19;
const int DMFMA4x4WritesVGPROverlappedMFMASrcABWaitStates = 6;
const int DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates = 11;
const int DMFMA4x4WritesVGPRFullSrcCWaitStates = 4;
+ const int GFX940_SMFMA4x4WritesVGPRFullSrcCWaitStates = 2;
const int MaxWaitStates = 19;
if (!Use.isReg())
@@ -1444,9 +2060,9 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
bool FullReg;
const MachineInstr *MI1;
- auto IsOverlappedMFMAFn = [Reg, &IsMFMAFn, &FullReg, &MI1,
+ auto IsOverlappedMFMAFn = [Reg, &FullReg, &MI1,
this](const MachineInstr &MI) {
- if (!IsMFMAFn(MI))
+ if (!SIInstrInfo::isMFMA(MI))
return false;
Register DstReg = MI.getOperand(0).getReg();
FullReg = (DstReg == Reg);
@@ -1467,7 +2083,7 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
unsigned Opc1 = MI1->getOpcode();
int NeedWaitStates = 0;
if (OpNo == SrcCIdx) {
- if (!isDGEMM(Opc) && isDGEMM(Opc1)) {
+ if (!isDGEMM(Opc) && (!ST.hasGFX940Insts() && isDGEMM(Opc1))) {
NeedWaitStates = 0;
} else if (FullReg) {
if ((Opc == AMDGPU::V_MFMA_F64_4X4X4F64_e64 ||
@@ -1475,6 +2091,9 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
(Opc1 == AMDGPU::V_MFMA_F64_4X4X4F64_e64 ||
Opc1 == AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64))
NeedWaitStates = DMFMA4x4WritesVGPRFullSrcCWaitStates;
+ else if (ST.hasGFX940Insts() &&
+ TSchedModel.computeInstrLatency(MI1) == 2)
+ NeedWaitStates = GFX940_SMFMA4x4WritesVGPRFullSrcCWaitStates;
} else {
switch (Opc1) {
case AMDGPU::V_MFMA_F64_16X16X4F64_e64:
@@ -1490,22 +2109,42 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
NeedWaitStates = DMFMA4x4WritesVGPROverlappedSrcCWaitStates;
break;
default:
+ if (ST.hasGFX940Insts() && isXDL(ST, *MI) && !isXDL(ST, *MI1))
+ break;
switch (TSchedModel.computeInstrLatency(MI1)) {
case 2:
- NeedWaitStates = isDGEMM(Opc)
- ? SMFMA4x4WritesVGPROverlappedDMFMASrcCWaitStates
- : SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates;
+ NeedWaitStates = ST.hasGFX940Insts()
+ ? isXDL(ST, *MI1)
+ ? GFX940_XDL2PassWritesVGPROverlappedSMFMASrcCWaitStates
+ : SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates
+ : isDGEMM(Opc)
+ ? SMFMA4x4WritesVGPROverlappedDMFMASrcCWaitStates
+ : SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates;
+ break;
+ case 4:
+ assert(ST.hasGFX940Insts());
+ NeedWaitStates = isXDL(ST, *MI1)
+ ? GFX940_XDL4PassWritesVGPROverlappedSMFMASrcCWaitStates
+ : GFX940_SMFMA4PassWritesVGPROverlappedSMFMASrcCWaitStates;
break;
case 8:
- NeedWaitStates = isDGEMM(Opc)
- ? SMFMA16x16WritesVGPROverlappedDMFMASrcCWaitStates
- : SMFMA16x16WritesVGPROverlappedSMFMASrcCWaitStates;
+ NeedWaitStates = ST.hasGFX940Insts()
+ ? isXDL(ST, *MI1)
+ ? GFX940_XDL8PassWritesVGPROverlappedSMFMASrcCWaitStates
+ : GFX940_SMFMA8PassWritesVGPROverlappedSMFMASrcCWaitStates
+ : isDGEMM(Opc)
+ ? SMFMA16x16WritesVGPROverlappedDMFMASrcCWaitStates
+ : SMFMA16x16WritesVGPROverlappedSMFMASrcCWaitStates;
break;
case 16: LLVM_FALLTHROUGH;
default:
- NeedWaitStates = isDGEMM(Opc)
- ? SMFMA32x32WritesVGPROverlappedDMFMASrcCWaitStates
- : SMFMA32x32WritesVGPROverlappedSMFMASrcCWaitStates;
+ NeedWaitStates = ST.hasGFX940Insts()
+ ? isXDL(ST, *MI1)
+ ? GFX940_XDL16PassWritesVGPROverlappedSMFMASrcCWaitStates
+ : GFX940_SMFMA16PassWritesVGPROverlappedSMFMASrcCWaitStates
+ : isDGEMM(Opc)
+ ? SMFMA32x32WritesVGPROverlappedDMFMASrcCWaitStates
+ : SMFMA32x32WritesVGPROverlappedSMFMASrcCWaitStates;
}
}
}
@@ -1524,14 +2163,32 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
default:
switch (TSchedModel.computeInstrLatency(MI1)) {
case 2:
- NeedWaitStates = SMFMA4x4WritesVGPROverlappedSrcABWaitStates;
+ NeedWaitStates = ST.hasGFX940Insts()
+ ? isXDL(ST, *MI1)
+ ? GFX940_XDL2PassWritesVGPROverlappedSrcABWaitStates
+ : GFX940_SMFMA2PassWritesVGPROverlappedSrcABWaitStates
+ : SMFMA4x4WritesVGPROverlappedSrcABWaitStates;
+ break;
+ case 4:
+ assert(ST.hasGFX940Insts());
+ NeedWaitStates = isXDL(ST, *MI1)
+ ? GFX940_XDL4PassWritesVGPROverlappedSrcABWaitStates
+ : GFX940_SMFMA4PassWritesVGPROverlappedSrcABWaitStates;
break;
case 8:
- NeedWaitStates = SMFMA16x16WritesVGPROverlappedSrcABWaitStates;
+ NeedWaitStates = ST.hasGFX940Insts()
+ ? isXDL(ST, *MI1)
+ ? GFX940_XDL8PassWritesVGPROverlappedSrcABWaitStates
+ : GFX940_SMFMA8PassWritesVGPROverlappedSrcABWaitStates
+ : SMFMA16x16WritesVGPROverlappedSrcABWaitStates;
break;
case 16: LLVM_FALLTHROUGH;
default:
- NeedWaitStates = SMFMA32x32WritesVGPROverlappedSrcABWaitStates;
+ NeedWaitStates = ST.hasGFX940Insts()
+ ? isXDL(ST, *MI1)
+ ? GFX940_XDL16PassWritesVGPROverlappedSrcABWaitStates
+ : GFX940_SMFMA16PassWritesVGPROverlappedSrcABWaitStates
+ : SMFMA32x32WritesVGPROverlappedSrcABWaitStates;
}
}
}
@@ -1599,18 +2256,12 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
if (!ST.hasGFX90AInsts())
return 0;
- auto IsMFMAFn = [](const MachineInstr &MI) -> bool {
- return SIInstrInfo::isMAI(MI) &&
- MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
- MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64;
- };
-
auto IsDGEMMFn = [](const MachineInstr &MI) -> bool {
return isDGEMM(MI.getOpcode());
};
// This is checked in checkMAIHazards90A()
- if (IsMFMAFn(*MI))
+ if (SIInstrInfo::isMFMA(*MI))
return 0;
int WaitStatesNeeded = 0;
@@ -1623,8 +2274,9 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
const MachineInstr *MFMA = nullptr;
unsigned Reg;
- auto IsMFMAWriteFn = [&Reg, &IsMFMAFn, &MFMA, this](const MachineInstr &MI) {
- if (!IsMFMAFn(MI) || !TRI.regsOverlap(MI.getOperand(0).getReg(), Reg))
+ auto IsMFMAWriteFn = [&Reg, &MFMA, this](const MachineInstr &MI) {
+ if (!SIInstrInfo::isMFMA(MI) ||
+ !TRI.regsOverlap(MI.getOperand(0).getReg(), Reg))
return false;
MFMA = &MI;
return true;
@@ -1646,6 +2298,14 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
const int SMFMA4x4WriteVgprVALUMemExpReadWaitStates = 5;
const int SMFMA16x16WriteVgprVALUMemExpReadWaitStates = 11;
const int SMFMA32x32WriteVgprVALUMemExpReadWaitStates = 19;
+ const int GFX940_SMFMA2PassWriteVgprVALUMemExpReadWaitStates = 4;
+ const int GFX940_SMFMA4PassWriteVgprVALUMemExpReadWaitStates = 6;
+ const int GFX940_SMFMA8PassWriteVgprVALUMemExpReadWaitStates = 10;
+ const int GFX940_SMFMA16PassWriteVgprVALUMemExpReadWaitStates = 18;
+ const int GFX940_XDL2PassWriteVgprVALUMemExpReadWaitStates = 5;
+ const int GFX940_XDL4PassWriteVgprVALUMemExpReadWaitStates = 7;
+ const int GFX940_XDL8PassWriteVgprVALUMemExpReadWaitStates = 11;
+ const int GFX940_XDL16PassWriteVgprVALUMemExpReadWaitStates = 19;
const int DMFMA4x4WriteVgprMemExpReadWaitStates = 9;
const int DMFMA16x16WriteVgprMemExpReadWaitStates = 18;
const int DMFMA4x4WriteVgprVALUReadWaitStates = 6;
@@ -1685,16 +2345,30 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
int NeedWaitStates = MaxWaitStates;
switch (HazardDefLatency) {
case 2:
- NeedWaitStates = SMFMA4x4WriteVgprVALUMemExpReadWaitStates;
+ NeedWaitStates =
+ ST.hasGFX940Insts()
+ ? isXDL(ST, *MFMA)
+ ? GFX940_XDL2PassWriteVgprVALUMemExpReadWaitStates
+ : GFX940_SMFMA2PassWriteVgprVALUMemExpReadWaitStates
+ : SMFMA4x4WriteVgprVALUMemExpReadWaitStates;
break;
case 4:
- assert(isDGEMM(MFMA->getOpcode()));
+ assert(isDGEMM(MFMA->getOpcode()) || ST.hasGFX940Insts());
NeedWaitStates =
- IsMemOrExport ? DMFMA4x4WriteVgprMemExpReadWaitStates
- : DMFMA4x4WriteVgprVALUReadWaitStates;
+ isDGEMM(MFMA->getOpcode())
+ ? IsMemOrExport ? DMFMA4x4WriteVgprMemExpReadWaitStates
+ : DMFMA4x4WriteVgprVALUReadWaitStates
+ : isXDL(ST, *MFMA)
+ ? GFX940_XDL4PassWriteVgprVALUMemExpReadWaitStates
+ : GFX940_SMFMA4PassWriteVgprVALUMemExpReadWaitStates;
break;
case 8:
- NeedWaitStates = SMFMA16x16WriteVgprVALUMemExpReadWaitStates;
+ NeedWaitStates =
+ ST.hasGFX940Insts()
+ ? isXDL(ST, *MFMA)
+ ? GFX940_XDL8PassWriteVgprVALUMemExpReadWaitStates
+ : GFX940_SMFMA8PassWriteVgprVALUMemExpReadWaitStates
+ : SMFMA16x16WriteVgprVALUMemExpReadWaitStates;
break;
case 16: LLVM_FALLTHROUGH;
default:
@@ -1702,7 +2376,11 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
isDGEMM(MFMA->getOpcode())
? IsMemOrExport ? DMFMA16x16WriteVgprMemExpReadWaitStates
: DMFMA16x16WriteVgprVALUReadWaitStates
- : SMFMA32x32WriteVgprVALUMemExpReadWaitStates;
+ : ST.hasGFX940Insts()
+ ? isXDL(ST, *MFMA)
+ ? GFX940_XDL16PassWriteVgprVALUMemExpReadWaitStates
+ : GFX940_SMFMA16PassWriteVgprVALUMemExpReadWaitStates
+ : SMFMA32x32WriteVgprVALUMemExpReadWaitStates;
break;
}
@@ -1732,7 +2410,16 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
const int SMFMA4x4WriteVgprVALUWawWaitStates = 5;
const int SMFMA16x16WriteVgprVALUWawWaitStates = 11;
const int SMFMA32x32WriteVgprVALUWawWaitStates = 19;
+ const int GFX940_SMFMA2PassWriteVgprVALUWawWaitStates = 4;
+ const int GFX940_SMFMA4PassWriteVgprVALUWawWaitStates = 6;
+ const int GFX940_SMFMA8PassWriteVgprVALUWawWaitStates = 10;
+ const int GFX940_SMFMA16PassWriteVgprVALUWawWaitStates = 18;
+ const int GFX940_XDL2PassWriteVgprVALUWawWaitStates = 5;
+ const int GFX940_XDL4PassWriteVgprVALUWawWaitStates = 7;
+ const int GFX940_XDL8PassWriteVgprVALUWawWaitStates = 11;
+ const int GFX940_XDL16PassWriteVgprVALUWawWaitStates = 19;
const int SMFMA4x4ReadVgprVALUWarWaitStates = 1;
+ const int GFX940_XDL4PassReadVgprVALUWarWaitStates = 3;
const int SMFMA16x16ReadVgprVALUWarWaitStates = 7;
const int SMFMA32x32ReadVgprVALUWarWaitStates = 15;
const int DMFMA4x4WriteVgprVALUWriteWaitStates = 6;
@@ -1757,19 +2444,35 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
int NeedWaitStates = MaxWaitStates;
switch (TSchedModel.computeInstrLatency(MFMA)) {
case 2:
- NeedWaitStates = SMFMA4x4WriteVgprVALUWawWaitStates;
+ NeedWaitStates = ST.hasGFX940Insts()
+ ? isXDL(ST, *MFMA)
+ ? GFX940_XDL2PassWriteVgprVALUWawWaitStates
+ : GFX940_SMFMA2PassWriteVgprVALUWawWaitStates
+ : SMFMA4x4WriteVgprVALUWawWaitStates;
break;
case 4:
- assert(isDGEMM(MFMA->getOpcode()));
- NeedWaitStates = DMFMA4x4WriteVgprVALUWriteWaitStates;
+ assert(isDGEMM(MFMA->getOpcode()) || ST.hasGFX940Insts());
+ NeedWaitStates = isDGEMM(MFMA->getOpcode())
+ ? DMFMA4x4WriteVgprVALUWriteWaitStates
+ : isXDL(ST, *MFMA)
+ ? GFX940_XDL4PassWriteVgprVALUWawWaitStates
+ : GFX940_SMFMA4PassWriteVgprVALUWawWaitStates;
break;
case 8:
- NeedWaitStates = SMFMA16x16WriteVgprVALUWawWaitStates;
+ NeedWaitStates = ST.hasGFX940Insts()
+ ? isXDL(ST, *MFMA)
+ ? GFX940_XDL8PassWriteVgprVALUWawWaitStates
+ : GFX940_SMFMA8PassWriteVgprVALUWawWaitStates
+ : SMFMA16x16WriteVgprVALUWawWaitStates;
break;
case 16: LLVM_FALLTHROUGH;
default:
NeedWaitStates = isDGEMM(MFMA->getOpcode())
? DMFMA16x16WriteVgprVALUWriteWaitStates
+ : ST.hasGFX940Insts()
+ ? isXDL(ST, *MFMA)
+ ? GFX940_XDL16PassWriteVgprVALUWawWaitStates
+ : GFX940_SMFMA16PassWriteVgprVALUWawWaitStates
: SMFMA32x32WriteVgprVALUWawWaitStates;
break;
}
@@ -1781,12 +2484,14 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
break;
}
- auto IsSMFMAReadAsCFn = [&Reg, &IsMFMAFn, &MFMA,
- this](const MachineInstr &MI) {
- if (!IsMFMAFn(MI) || isDGEMM(MI.getOpcode()) ||
+ auto IsSMFMAReadAsCFn = [&Reg, &MFMA, this](const MachineInstr &MI) {
+ if (!SIInstrInfo::isMFMA(MI) || isDGEMM(MI.getOpcode()) ||
!MI.readsRegister(Reg, &TRI))
return false;
+ if (ST.hasGFX940Insts() && !isXDL(ST, MI))
+ return false;
+
const MachineOperand *SrcC =
TII.getNamedOperand(MI, AMDGPU::OpName::src2);
assert(SrcC);
@@ -1808,6 +2513,9 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
switch (HazardDefLatency) {
case 2: NeedWaitStates = SMFMA4x4ReadVgprVALUWarWaitStates;
break;
+ case 4: assert(ST.hasGFX940Insts());
+ NeedWaitStates = GFX940_XDL4PassReadVgprVALUWarWaitStates;
+ break;
case 8: NeedWaitStates = SMFMA16x16ReadVgprVALUWarWaitStates;
break;
case 16: LLVM_FALLTHROUGH;
@@ -1827,11 +2535,10 @@ bool GCNHazardRecognizer::ShouldPreferAnother(SUnit *SU) {
return false;
const MachineInstr *MAI = nullptr;
+
auto IsMFMAFn = [&MAI](const MachineInstr &MI) {
MAI = nullptr;
- if (SIInstrInfo::isMAI(MI) &&
- MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
- MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64)
+ if (SIInstrInfo::isMFMA(MI))
MAI = &MI;
return MAI != nullptr;
};
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
index 716bc027a894..57f5a04c6eda 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -62,6 +62,10 @@ private:
void addClauseInst(const MachineInstr &MI);
+ /// \returns the number of wait states before another MFMA instruction can be
+ /// issued after \p MI.
+ unsigned getMFMAPipelineWaitStates(const MachineInstr &MI) const;
+
// Advance over a MachineInstr bundle. Look for hazards in the bundled
// instructions.
void processBundle();
@@ -92,10 +96,31 @@ private:
bool fixSMEMtoVectorWriteHazards(MachineInstr *MI);
bool fixVcmpxExecWARHazard(MachineInstr *MI);
bool fixLdsBranchVmemWARHazard(MachineInstr *MI);
+ bool fixLdsDirectVALUHazard(MachineInstr *MI);
+ bool fixLdsDirectVMEMHazard(MachineInstr *MI);
+ bool fixVALUPartialForwardingHazard(MachineInstr *MI);
+ bool fixVALUTransUseHazard(MachineInstr *MI);
+ bool fixWMMAHazards(MachineInstr *MI);
int checkMAIHazards(MachineInstr *MI);
int checkMAIHazards908(MachineInstr *MI);
int checkMAIHazards90A(MachineInstr *MI);
+ /// Pad the latency between neighboring MFMA instructions with s_nops. The
+ /// percentage of wait states to fill with s_nops is specified by the command
+ /// line option '-amdgpu-mfma-padding-ratio'.
+ ///
+ /// For example, with '-amdgpu-mfma-padding-ratio=100':
+ ///
+ /// 2 pass MFMA instructions have a latency of 2 wait states. Therefore, a
+ /// 'S_NOP 1' will be added between sequential MFMA instructions.
+ ///
+ /// V_MFMA_F32_4X4X1F32
+ /// V_MFMA_F32_4X4X1F32
+ ///-->
+ /// V_MFMA_F32_4X4X1F32
+ /// S_NOP 1
+ /// V_MFMA_F32_4X4X1F32
+ int checkMFMAPadding(MachineInstr *MI);
int checkMAIVALUHazards(MachineInstr *MI);
int checkMAILdStHazards(MachineInstr *MI);
diff --git a/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp b/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
index 9f98f9ada802..6f82148854c4 100644
--- a/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
@@ -1,4 +1,4 @@
-//===-- GCNNSAReassign.cpp - Reassign registers in NSA unstructions -------===//
+//===-- GCNNSAReassign.cpp - Reassign registers in NSA instructions -------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -8,7 +8,7 @@
//
/// \file
/// \brief Try to reassign registers on GFX10+ from non-sequential to sequential
-/// in NSA image instructions. Later SIShrinkInstructions pass will relace NSA
+/// in NSA image instructions. Later SIShrinkInstructions pass will replace NSA
/// with sequential versions where possible.
///
//===----------------------------------------------------------------------===//
@@ -16,10 +16,12 @@
#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveRegMatrix.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/InitializePasses.h"
using namespace llvm;
@@ -159,15 +161,23 @@ GCNNSAReassign::scavengeRegs(SmallVectorImpl<LiveInterval *> &Intervals) const {
GCNNSAReassign::NSA_Status
GCNNSAReassign::CheckNSA(const MachineInstr &MI, bool Fast) const {
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
- if (!Info || Info->MIMGEncoding != AMDGPU::MIMGEncGfx10NSA)
+ if (!Info)
return NSA_Status::NOT_NSA;
+ switch (Info->MIMGEncoding) {
+ case AMDGPU::MIMGEncGfx10NSA:
+ case AMDGPU::MIMGEncGfx11NSA:
+ break;
+ default:
+ return NSA_Status::NOT_NSA;
+ }
+
int VAddr0Idx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
unsigned VgprBase = 0;
bool NSA = false;
- for (unsigned I = 0; I < Info->VAddrDwords; ++I) {
+ for (unsigned I = 0; I < Info->VAddrOperands; ++I) {
const MachineOperand &Op = MI.getOperand(VAddr0Idx + I);
Register Reg = Op.getReg();
if (Reg.isPhysical() || !VRM->isAssignedReg(Reg))
@@ -179,15 +189,16 @@ GCNNSAReassign::CheckNSA(const MachineInstr &MI, bool Fast) const {
if (!PhysReg)
return NSA_Status::FIXED;
+ // TODO: address the below limitation to handle GFX11 BVH instructions
// Bail if address is not a VGPR32. That should be possible to extend the
// optimization to work with subregs of a wider register tuples, but the
// logic to find free registers will be much more complicated with much
// less chances for success. That seems reasonable to assume that in most
// cases a tuple is used because a vector variable contains different
- // parts of an address and it is either already consequitive or cannot
+ // parts of an address and it is either already consecutive or cannot
// be reassigned if not. If needed it is better to rely on register
// coalescer to process such address tuples.
- if (MRI->getRegClass(Reg) != &AMDGPU::VGPR_32RegClass || Op.getSubReg())
+ if (TRI->getRegSizeInBits(*MRI->getRegClass(Reg)) != 32 || Op.getSubReg())
return NSA_Status::FIXED;
// InlineSpiller does not call LRM::assign() after an LI split leaving
@@ -278,7 +289,7 @@ bool GCNNSAReassign::runOnMachineFunction(MachineFunction &MF) {
SmallVector<LiveInterval *, 16> Intervals;
SmallVector<MCRegister, 16> OrigRegs;
SlotIndex MinInd, MaxInd;
- for (unsigned I = 0; I < Info->VAddrDwords; ++I) {
+ for (unsigned I = 0; I < Info->VAddrOperands; ++I) {
const MachineOperand &Op = MI->getOperand(VAddr0Idx + I);
Register Reg = Op.getReg();
LiveInterval *LI = &LIS->getInterval(Reg);
@@ -331,11 +342,11 @@ bool GCNNSAReassign::runOnMachineFunction(MachineFunction &MF) {
}
if (!Success) {
- for (unsigned I = 0; I < Info->VAddrDwords; ++I)
+ for (unsigned I = 0; I < Info->VAddrOperands; ++I)
if (VRM->hasPhys(Intervals[I]->reg()))
LRM->unassign(*Intervals[I]);
- for (unsigned I = 0; I < Info->VAddrDwords; ++I)
+ for (unsigned I = 0; I < Info->VAddrOperands; ++I)
LRM->assign(*Intervals[I], OrigRegs[I]);
continue;
diff --git a/llvm/lib/Target/AMDGPU/GCNProcessors.td b/llvm/lib/Target/AMDGPU/GCNProcessors.td
index 3a68ed1934e1..281474994bca 100644
--- a/llvm/lib/Target/AMDGPU/GCNProcessors.td
+++ b/llvm/lib/Target/AMDGPU/GCNProcessors.td
@@ -192,6 +192,10 @@ def : ProcessorModel<"gfx90c", SIQuarterSpeedModel,
FeatureISAVersion9_0_C.Features
>;
+def : ProcessorModel<"gfx940", SIDPGFX940FullSpeedModel,
+ FeatureISAVersion9_4_0.Features
+>;
+
//===----------------------------------------------------------------------===//
// GCN GFX10.
//===----------------------------------------------------------------------===//
@@ -235,3 +239,27 @@ def : ProcessorModel<"gfx1034", GFX10SpeedModel,
def : ProcessorModel<"gfx1035", GFX10SpeedModel,
FeatureISAVersion10_3_0.Features
>;
+
+def : ProcessorModel<"gfx1036", GFX10SpeedModel,
+ FeatureISAVersion10_3_0.Features
+>;
+
+//===----------------------------------------------------------------------===//
+// GCN GFX11.
+//===----------------------------------------------------------------------===//
+
+def : ProcessorModel<"gfx1100", GFX11SpeedModel,
+ FeatureISAVersion11_0.Features
+>;
+
+def : ProcessorModel<"gfx1101", GFX11SpeedModel,
+ FeatureISAVersion11_0.Features
+>;
+
+def : ProcessorModel<"gfx1102", GFX11SpeedModel,
+ FeatureISAVersion11_0_2.Features
+>;
+
+def : ProcessorModel<"gfx1103", GFX11SpeedModel,
+ FeatureISAVersion11_0_2.Features
+>;
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 257561cb8430..c41548d19c8e 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -10,7 +10,7 @@
/// This file defines the GCNRegPressure class, which tracks registry pressure
/// by bookkeeping number of SGPR/VGPRs used, weights for large SGPR/VGPRs. It
/// also implements a compare function, which compares different register
-/// pressures, and declares one with max occupance as winner.
+/// pressures, and declares one with max occupancy as winner.
///
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 75855a7a4f9c..100410bb7644 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -13,6 +13,7 @@
#include "GCNSchedStrategy.h"
#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
#define DEBUG_TYPE "machine-scheduler"
@@ -362,6 +363,9 @@ void GCNScheduleDAGMILive::schedule() {
if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit &&
PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) <= S.VGPRCriticalLimit) {
Pressure[RegionIdx] = PressureAfter;
+ RegionsWithMinOcc[RegionIdx] =
+ PressureAfter.getOccupancy(ST) == MinOccupancy;
+
LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n");
return;
}
@@ -378,6 +382,7 @@ void GCNScheduleDAGMILive::schedule() {
// occupancy before was higher, or if the current schedule has register
// pressure higher than the excess limits which could lead to more spilling.
unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
+
// Allow memory bound functions to drop to 4 waves if not limited by an
// attribute.
if (WavesAfter < WavesBefore && WavesAfter < MinOccupancy &&
@@ -390,6 +395,7 @@ void GCNScheduleDAGMILive::schedule() {
if (NewOccupancy < MinOccupancy) {
MinOccupancy = NewOccupancy;
MFI.limitOccupancy(MinOccupancy);
+ RegionsWithMinOcc.reset();
LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to "
<< MinOccupancy << ".\n");
}
@@ -416,6 +422,8 @@ void GCNScheduleDAGMILive::schedule() {
PressureAfter.less(ST, PressureBefore) ||
!RescheduleRegions[RegionIdx]) {
Pressure[RegionIdx] = PressureAfter;
+ RegionsWithMinOcc[RegionIdx] =
+ PressureAfter.getOccupancy(ST) == MinOccupancy;
if (!RegionsWithClusters[RegionIdx] &&
(Stage + 1) == UnclusteredReschedule)
RescheduleRegions[RegionIdx] = false;
@@ -425,13 +433,18 @@ void GCNScheduleDAGMILive::schedule() {
}
}
+ RegionsWithMinOcc[RegionIdx] =
+ PressureBefore.getOccupancy(ST) == MinOccupancy;
LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");
RescheduleRegions[RegionIdx] = RegionsWithClusters[RegionIdx] ||
(Stage + 1) != UnclusteredReschedule;
RegionEnd = RegionBegin;
+ int SkippedDebugInstr = 0;
for (MachineInstr *MI : Unsched) {
- if (MI->isDebugInstr())
+ if (MI->isDebugInstr()) {
+ ++SkippedDebugInstr;
continue;
+ }
if (MI->getIterator() != RegionEnd) {
BB->remove(MI);
@@ -459,10 +472,31 @@ void GCNScheduleDAGMILive::schedule() {
++RegionEnd;
LLVM_DEBUG(dbgs() << "Scheduling " << *MI);
}
+
+ // After reverting schedule, debug instrs will now be at the end of the block
+ // and RegionEnd will point to the first debug instr. Increment RegionEnd
+ // pass debug instrs to the actual end of the scheduling region.
+ while (SkippedDebugInstr-- > 0)
+ ++RegionEnd;
+
+ // If Unsched.front() instruction is a debug instruction, this will actually
+ // shrink the region since we moved all debug instructions to the end of the
+ // block. Find the first instruction that is not a debug instruction.
RegionBegin = Unsched.front()->getIterator();
- Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd);
+ if (RegionBegin->isDebugInstr()) {
+ for (MachineInstr *MI : Unsched) {
+ if (MI->isDebugInstr())
+ continue;
+ RegionBegin = MI->getIterator();
+ break;
+ }
+ }
+ // Then move the debug instructions back into their correct place and set
+ // RegionBegin and RegionEnd if needed.
placeDebugValues();
+
+ Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd);
}
GCNRegPressure GCNScheduleDAGMILive::getRealRegPressure() const {
@@ -493,14 +527,14 @@ void GCNScheduleDAGMILive::computeBlockPressure(const MachineBasicBlock *MBB) {
auto I = MBB->begin();
auto LiveInIt = MBBLiveIns.find(MBB);
+ auto &Rgn = Regions[CurRegion];
+ auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second);
if (LiveInIt != MBBLiveIns.end()) {
auto LiveIn = std::move(LiveInIt->second);
RPTracker.reset(*MBB->begin(), &LiveIn);
MBBLiveIns.erase(LiveInIt);
} else {
- auto &Rgn = Regions[CurRegion];
I = Rgn.first;
- auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second);
auto LRS = BBLiveInMap.lookup(NonDbgMI);
#ifdef EXPENSIVE_CHECKS
assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS));
@@ -511,7 +545,7 @@ void GCNScheduleDAGMILive::computeBlockPressure(const MachineBasicBlock *MBB) {
for ( ; ; ) {
I = RPTracker.getNext();
- if (Regions[CurRegion].first == I) {
+ if (Regions[CurRegion].first == I || NonDbgMI == I) {
LiveIns[CurRegion] = RPTracker.getLiveRegs();
RPTracker.clearMaxPressure();
}
@@ -561,9 +595,11 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
RescheduleRegions.resize(Regions.size());
RegionsWithClusters.resize(Regions.size());
RegionsWithHighRP.resize(Regions.size());
+ RegionsWithMinOcc.resize(Regions.size());
RescheduleRegions.set();
RegionsWithClusters.reset();
RegionsWithHighRP.reset();
+ RegionsWithMinOcc.reset();
if (!Regions.empty())
BBLiveInMap = getBBLiveInMap();
@@ -600,13 +636,41 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
<< "Retrying function scheduling with lowest recorded occupancy "
<< MinOccupancy << ".\n");
}
+
+ if (Stage == PreRARematerialize) {
+ if (RegionsWithMinOcc.none() || Regions.size() == 1)
+ break;
+
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ // Check maximum occupancy
+ if (ST.computeOccupancy(MF.getFunction(), MFI.getLDSSize()) ==
+ MinOccupancy)
+ break;
+
+ // FIXME: This pass will invalidate cached MBBLiveIns for regions
+ // inbetween the defs and region we sinked the def to. Cached pressure
+ // for regions where a def is sinked from will also be invalidated. Will
+ // need to be fixed if there is another pass after this pass.
+ static_assert(LastStage == PreRARematerialize,
+ "Passes after PreRARematerialize are not supported");
+
+ collectRematerializableInstructions();
+ if (RematerializableInsts.empty() || !sinkTriviallyRematInsts(ST, TII))
+ break;
+
+ LLVM_DEBUG(
+ dbgs() << "Retrying function scheduling with improved occupancy of "
+ << MinOccupancy << " from rematerializing\n");
+ }
}
if (Stage == UnclusteredReschedule)
SavedMutations.swap(Mutations);
for (auto Region : Regions) {
- if ((Stage == UnclusteredReschedule && !RescheduleRegions[RegionIdx]) ||
+ if (((Stage == UnclusteredReschedule || Stage == PreRARematerialize) &&
+ !RescheduleRegions[RegionIdx]) ||
(Stage == ClusteredLowOccupancyReschedule &&
!RegionsWithClusters[RegionIdx] && !RegionsWithHighRP[RegionIdx])) {
@@ -631,6 +695,7 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
// Skip empty scheduling regions (0 or 1 schedulable instructions).
if (begin() == end() || begin() == std::prev(end())) {
exitRegion();
+ ++RegionIdx;
continue;
}
@@ -653,3 +718,282 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
SavedMutations.swap(Mutations);
} while (Stage != LastStage);
}
+
+void GCNScheduleDAGMILive::collectRematerializableInstructions() {
+ const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(TRI);
+ for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
+ Register Reg = Register::index2VirtReg(I);
+ if (!LIS->hasInterval(Reg))
+ continue;
+
+ // TODO: Handle AGPR and SGPR rematerialization
+ if (!SRI->isVGPRClass(MRI.getRegClass(Reg)) || !MRI.hasOneDef(Reg) ||
+ !MRI.hasOneNonDBGUse(Reg))
+ continue;
+
+ MachineOperand *Op = MRI.getOneDef(Reg);
+ MachineInstr *Def = Op->getParent();
+ if (Op->getSubReg() != 0 || !isTriviallyReMaterializable(*Def, AA))
+ continue;
+
+ MachineInstr *UseI = &*MRI.use_instr_nodbg_begin(Reg);
+ if (Def->getParent() == UseI->getParent())
+ continue;
+
+ // We are only collecting defs that are defined in another block and are
+ // live-through or used inside regions at MinOccupancy. This means that the
+ // register must be in the live-in set for the region.
+ bool AddedToRematList = false;
+ for (unsigned I = 0, E = Regions.size(); I != E; ++I) {
+ auto It = LiveIns[I].find(Reg);
+ if (It != LiveIns[I].end() && !It->second.none()) {
+ if (RegionsWithMinOcc[I]) {
+ RematerializableInsts[I][Def] = UseI;
+ AddedToRematList = true;
+ }
+
+ // Collect regions with rematerializable reg as live-in to avoid
+ // searching later when updating RP.
+ RematDefToLiveInRegions[Def].push_back(I);
+ }
+ }
+ if (!AddedToRematList)
+ RematDefToLiveInRegions.erase(Def);
+ }
+}
+
+bool GCNScheduleDAGMILive::sinkTriviallyRematInsts(const GCNSubtarget &ST,
+ const TargetInstrInfo *TII) {
+ // Temporary copies of cached variables we will be modifying and replacing if
+ // sinking succeeds.
+ SmallVector<
+ std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>, 32>
+ NewRegions;
+ DenseMap<unsigned, GCNRPTracker::LiveRegSet> NewLiveIns;
+ DenseMap<unsigned, GCNRegPressure> NewPressure;
+ BitVector NewRescheduleRegions;
+
+ NewRegions.resize(Regions.size());
+ NewRescheduleRegions.resize(Regions.size());
+
+ // Collect only regions that has a rematerializable def as a live-in.
+ SmallSet<unsigned, 16> ImpactedRegions;
+ for (const auto &It : RematDefToLiveInRegions)
+ ImpactedRegions.insert(It.second.begin(), It.second.end());
+
+ // Make copies of register pressure and live-ins cache that will be updated
+ // as we rematerialize.
+ for (auto Idx : ImpactedRegions) {
+ NewPressure[Idx] = Pressure[Idx];
+ NewLiveIns[Idx] = LiveIns[Idx];
+ }
+ NewRegions = Regions;
+ NewRescheduleRegions.reset();
+
+ DenseMap<MachineInstr *, MachineInstr *> InsertedMIToOldDef;
+ bool Improved = false;
+ for (auto I : ImpactedRegions) {
+ if (!RegionsWithMinOcc[I])
+ continue;
+
+ Improved = false;
+ int VGPRUsage = NewPressure[I].getVGPRNum(ST.hasGFX90AInsts());
+ int SGPRUsage = NewPressure[I].getSGPRNum();
+
+ // TODO: Handle occupancy drop due to AGPR and SGPR.
+ // Check if cause of occupancy drop is due to VGPR usage and not SGPR.
+ if (ST.getOccupancyWithNumSGPRs(SGPRUsage) == MinOccupancy)
+ break;
+
+ // The occupancy of this region could have been improved by a previous
+ // iteration's sinking of defs.
+ if (NewPressure[I].getOccupancy(ST) > MinOccupancy) {
+ NewRescheduleRegions[I] = true;
+ Improved = true;
+ continue;
+ }
+
+ // First check if we have enough trivially rematerializable instructions to
+ // improve occupancy. Optimistically assume all instructions we are able to
+ // sink decreased RP.
+ int TotalSinkableRegs = 0;
+ for (const auto &It : RematerializableInsts[I]) {
+ MachineInstr *Def = It.first;
+ Register DefReg = Def->getOperand(0).getReg();
+ TotalSinkableRegs +=
+ SIRegisterInfo::getNumCoveredRegs(NewLiveIns[I][DefReg]);
+ }
+ int VGPRsAfterSink = VGPRUsage - TotalSinkableRegs;
+ unsigned OptimisticOccupancy = ST.getOccupancyWithNumVGPRs(VGPRsAfterSink);
+ // If in the most optimistic scenario, we cannot improve occupancy, then do
+ // not attempt to sink any instructions.
+ if (OptimisticOccupancy <= MinOccupancy)
+ break;
+
+ unsigned ImproveOccupancy = 0;
+ SmallVector<MachineInstr *, 4> SinkedDefs;
+ for (auto &It : RematerializableInsts[I]) {
+ MachineInstr *Def = It.first;
+ MachineBasicBlock::iterator InsertPos =
+ MachineBasicBlock::iterator(It.second);
+ Register Reg = Def->getOperand(0).getReg();
+ // Rematerialize MI to its use block. Since we are only rematerializing
+ // instructions that do not have any virtual reg uses, we do not need to
+ // call LiveRangeEdit::allUsesAvailableAt() and
+ // LiveRangeEdit::canRematerializeAt().
+ TII->reMaterialize(*InsertPos->getParent(), InsertPos, Reg,
+ Def->getOperand(0).getSubReg(), *Def, *TRI);
+ MachineInstr *NewMI = &*(--InsertPos);
+ LIS->InsertMachineInstrInMaps(*NewMI);
+ LIS->removeInterval(Reg);
+ LIS->createAndComputeVirtRegInterval(Reg);
+ InsertedMIToOldDef[NewMI] = Def;
+
+ // Update region boundaries in scheduling region we sinked from since we
+ // may sink an instruction that was at the beginning or end of its region
+ updateRegionBoundaries(NewRegions, Def, /*NewMI =*/nullptr,
+ /*Removing =*/true);
+
+ // Update region boundaries in region we sinked to.
+ updateRegionBoundaries(NewRegions, InsertPos, NewMI);
+
+ LaneBitmask PrevMask = NewLiveIns[I][Reg];
+ // FIXME: Also update cached pressure for where the def was sinked from.
+ // Update RP for all regions that has this reg as a live-in and remove
+ // the reg from all regions as a live-in.
+ for (auto Idx : RematDefToLiveInRegions[Def]) {
+ NewLiveIns[Idx].erase(Reg);
+ if (InsertPos->getParent() != Regions[Idx].first->getParent()) {
+ // Def is live-through and not used in this block.
+ NewPressure[Idx].inc(Reg, PrevMask, LaneBitmask::getNone(), MRI);
+ } else {
+ // Def is used and rematerialized into this block.
+ GCNDownwardRPTracker RPT(*LIS);
+ auto *NonDbgMI = &*skipDebugInstructionsForward(
+ NewRegions[Idx].first, NewRegions[Idx].second);
+ RPT.reset(*NonDbgMI, &NewLiveIns[Idx]);
+ RPT.advance(NewRegions[Idx].second);
+ NewPressure[Idx] = RPT.moveMaxPressure();
+ }
+ }
+
+ SinkedDefs.push_back(Def);
+ ImproveOccupancy = NewPressure[I].getOccupancy(ST);
+ if (ImproveOccupancy > MinOccupancy)
+ break;
+ }
+
+ // Remove defs we just sinked from all regions' list of sinkable defs
+ for (auto &Def : SinkedDefs)
+ for (auto TrackedIdx : RematDefToLiveInRegions[Def])
+ RematerializableInsts[TrackedIdx].erase(Def);
+
+ if (ImproveOccupancy <= MinOccupancy)
+ break;
+
+ NewRescheduleRegions[I] = true;
+ Improved = true;
+ }
+
+ if (!Improved) {
+ // Occupancy was not improved for all regions that were at MinOccupancy.
+ // Undo sinking and remove newly rematerialized instructions.
+ for (auto &Entry : InsertedMIToOldDef) {
+ MachineInstr *MI = Entry.first;
+ MachineInstr *OldMI = Entry.second;
+ Register Reg = MI->getOperand(0).getReg();
+ LIS->RemoveMachineInstrFromMaps(*MI);
+ MI->eraseFromParent();
+ OldMI->clearRegisterDeads(Reg);
+ LIS->removeInterval(Reg);
+ LIS->createAndComputeVirtRegInterval(Reg);
+ }
+ return false;
+ }
+
+ // Occupancy was improved for all regions.
+ for (auto &Entry : InsertedMIToOldDef) {
+ MachineInstr *MI = Entry.first;
+ MachineInstr *OldMI = Entry.second;
+
+ // Remove OldMI from BBLiveInMap since we are sinking it from its MBB.
+ BBLiveInMap.erase(OldMI);
+
+ // Remove OldMI and update LIS
+ Register Reg = MI->getOperand(0).getReg();
+ LIS->RemoveMachineInstrFromMaps(*OldMI);
+ OldMI->eraseFromParent();
+ LIS->removeInterval(Reg);
+ LIS->createAndComputeVirtRegInterval(Reg);
+ }
+
+ // Update live-ins, register pressure, and regions caches.
+ for (auto Idx : ImpactedRegions) {
+ LiveIns[Idx] = NewLiveIns[Idx];
+ Pressure[Idx] = NewPressure[Idx];
+ MBBLiveIns.erase(Regions[Idx].first->getParent());
+ }
+ Regions = NewRegions;
+ RescheduleRegions = NewRescheduleRegions;
+
+ SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
+ MFI.increaseOccupancy(MF, ++MinOccupancy);
+
+ return true;
+}
+
+// Copied from MachineLICM
+bool GCNScheduleDAGMILive::isTriviallyReMaterializable(const MachineInstr &MI,
+ AAResults *AA) {
+ if (!TII->isTriviallyReMaterializable(MI, AA))
+ return false;
+
+ for (const MachineOperand &MO : MI.operands())
+ if (MO.isReg() && MO.isUse() && MO.getReg().isVirtual())
+ return false;
+
+ return true;
+}
+
+// When removing, we will have to check both beginning and ending of the region.
+// When inserting, we will only have to check if we are inserting NewMI in front
+// of a scheduling region and do not need to check the ending since we will only
+// ever be inserting before an already existing MI.
+void GCNScheduleDAGMILive::updateRegionBoundaries(
+ SmallVectorImpl<std::pair<MachineBasicBlock::iterator,
+ MachineBasicBlock::iterator>> &RegionBoundaries,
+ MachineBasicBlock::iterator MI, MachineInstr *NewMI, bool Removing) {
+ unsigned I = 0, E = RegionBoundaries.size();
+ // Search for first region of the block where MI is located
+ while (I != E && MI->getParent() != RegionBoundaries[I].first->getParent())
+ ++I;
+
+ for (; I != E; ++I) {
+ if (MI->getParent() != RegionBoundaries[I].first->getParent())
+ return;
+
+ if (Removing && MI == RegionBoundaries[I].first &&
+ MI == RegionBoundaries[I].second) {
+ // MI is in a region with size 1, after removing, the region will be
+ // size 0, set RegionBegin and RegionEnd to pass end of block iterator.
+ RegionBoundaries[I] =
+ std::make_pair(MI->getParent()->end(), MI->getParent()->end());
+ return;
+ }
+ if (MI == RegionBoundaries[I].first) {
+ if (Removing)
+ RegionBoundaries[I] =
+ std::make_pair(std::next(MI), RegionBoundaries[I].second);
+ else
+ // Inserted NewMI in front of region, set new RegionBegin to NewMI
+ RegionBoundaries[I] = std::make_pair(MachineBasicBlock::iterator(NewMI),
+ RegionBoundaries[I].second);
+ return;
+ }
+ if (Removing && MI == RegionBoundaries[I].second) {
+ RegionBoundaries[I] =
+ std::make_pair(RegionBoundaries[I].first, std::prev(MI));
+ return;
+ }
+ }
+}
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index a6e42ad3dfca..97f94f69b70e 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -14,6 +14,7 @@
#define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
#include "GCNRegPressure.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/CodeGen/MachineScheduler.h"
namespace llvm {
@@ -77,7 +78,8 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
InitialSchedule,
UnclusteredReschedule,
ClusteredLowOccupancyReschedule,
- LastStage = ClusteredLowOccupancyReschedule
+ PreRARematerialize,
+ LastStage = PreRARematerialize
};
const GCNSubtarget &ST;
@@ -110,24 +112,56 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
// Record regions with high register pressure.
BitVector RegionsWithHighRP;
+ // Regions that has the same occupancy as the latest MinOccupancy
+ BitVector RegionsWithMinOcc;
+
// Region live-in cache.
SmallVector<GCNRPTracker::LiveRegSet, 32> LiveIns;
// Region pressure cache.
SmallVector<GCNRegPressure, 32> Pressure;
+ // Each region at MinOccupancy will have their own list of trivially
+ // rematerializable instructions we can remat to reduce RP. The list maps an
+ // instruction to the position we should remat before, usually the MI using
+ // the rematerializable instruction.
+ MapVector<unsigned, MapVector<MachineInstr *, MachineInstr *>>
+ RematerializableInsts;
+
+ // Map a trivially remateriazable def to a list of regions at MinOccupancy
+ // that has the defined reg as a live-in.
+ DenseMap<MachineInstr *, SmallVector<unsigned, 4>> RematDefToLiveInRegions;
+
// Temporary basic block live-in cache.
DenseMap<const MachineBasicBlock*, GCNRPTracker::LiveRegSet> MBBLiveIns;
DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> BBLiveInMap;
DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> getBBLiveInMap() const;
+ // Collect all trivially rematerializable VGPR instructions with a single def
+ // and single use outside the defining block into RematerializableInsts.
+ void collectRematerializableInstructions();
+
+ bool isTriviallyReMaterializable(const MachineInstr &MI, AAResults *AA);
+
+ // TODO: Should also attempt to reduce RP of SGPRs and AGPRs
+ // Attempt to reduce RP of VGPR by sinking trivially rematerializable
+ // instructions. Returns true if we were able to sink instruction(s).
+ bool sinkTriviallyRematInsts(const GCNSubtarget &ST,
+ const TargetInstrInfo *TII);
+
// Return current region pressure.
GCNRegPressure getRealRegPressure() const;
// Compute and cache live-ins and pressure for all regions in block.
void computeBlockPressure(const MachineBasicBlock *MBB);
+ // Update region boundaries when removing MI or inserting NewMI before MI.
+ void updateRegionBoundaries(
+ SmallVectorImpl<std::pair<MachineBasicBlock::iterator,
+ MachineBasicBlock::iterator>> &RegionBoundaries,
+ MachineBasicBlock::iterator MI, MachineInstr *NewMI,
+ bool Removing = false);
public:
GCNScheduleDAGMILive(MachineSchedContext *C,
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 0cd2cfa2f0e7..d269d0945f3b 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -58,133 +58,142 @@ protected:
// Basic subtarget description.
Triple TargetTriple;
AMDGPU::IsaInfo::AMDGPUTargetID TargetID;
- unsigned Gen;
+ unsigned Gen = INVALID;
InstrItineraryData InstrItins;
- int LDSBankCount;
- unsigned MaxPrivateElementSize;
+ int LDSBankCount = 0;
+ unsigned MaxPrivateElementSize = 0;
// Possibly statically set by tablegen, but may want to be overridden.
- bool FastFMAF32;
- bool FastDenormalF32;
- bool HalfRate64Ops;
- bool FullRate64Ops;
+ bool FastFMAF32 = false;
+ bool FastDenormalF32 = false;
+ bool HalfRate64Ops = false;
+ bool FullRate64Ops = false;
// Dynamically set bits that enable features.
- bool FlatForGlobal;
- bool AutoWaitcntBeforeBarrier;
- bool UnalignedScratchAccess;
- bool UnalignedAccessMode;
- bool HasApertureRegs;
- bool SupportsXNACK;
+ bool FlatForGlobal = false;
+ bool AutoWaitcntBeforeBarrier = false;
+ bool UnalignedScratchAccess = false;
+ bool UnalignedAccessMode = false;
+ bool HasApertureRegs = false;
+ bool SupportsXNACK = false;
// This should not be used directly. 'TargetID' tracks the dynamic settings
// for XNACK.
- bool EnableXNACK;
+ bool EnableXNACK = false;
- bool EnableTgSplit;
- bool EnableCuMode;
- bool TrapHandler;
+ bool EnableTgSplit = false;
+ bool EnableCuMode = false;
+ bool TrapHandler = false;
// Used as options.
- bool EnableLoadStoreOpt;
- bool EnableUnsafeDSOffsetFolding;
- bool EnableSIScheduler;
- bool EnableDS128;
- bool EnablePRTStrictNull;
- bool DumpCode;
+ bool EnableLoadStoreOpt = false;
+ bool EnableUnsafeDSOffsetFolding = false;
+ bool EnableSIScheduler = false;
+ bool EnableDS128 = false;
+ bool EnablePRTStrictNull = false;
+ bool DumpCode = false;
// Subtarget statically properties set by tablegen
- bool FP64;
- bool FMA;
- bool MIMG_R128;
- bool CIInsts;
- bool GFX8Insts;
- bool GFX9Insts;
- bool GFX90AInsts;
- bool GFX10Insts;
- bool GFX10_3Insts;
- bool GFX7GFX8GFX9Insts;
- bool SGPRInitBug;
- bool NegativeScratchOffsetBug;
- bool NegativeUnalignedScratchOffsetBug;
- bool HasSMemRealTime;
- bool HasIntClamp;
- bool HasFmaMixInsts;
- bool HasMovrel;
- bool HasVGPRIndexMode;
- bool HasScalarStores;
- bool HasScalarAtomics;
- bool HasSDWAOmod;
- bool HasSDWAScalar;
- bool HasSDWASdst;
- bool HasSDWAMac;
- bool HasSDWAOutModsVOPC;
- bool HasDPP;
- bool HasDPP8;
- bool Has64BitDPP;
- bool HasPackedFP32Ops;
- bool HasExtendedImageInsts;
- bool HasR128A16;
- bool HasGFX10A16;
- bool HasG16;
- bool HasNSAEncoding;
- unsigned NSAMaxSize;
- bool GFX10_AEncoding;
- bool GFX10_BEncoding;
- bool HasDLInsts;
- bool HasDot1Insts;
- bool HasDot2Insts;
- bool HasDot3Insts;
- bool HasDot4Insts;
- bool HasDot5Insts;
- bool HasDot6Insts;
- bool HasDot7Insts;
- bool HasMAIInsts;
- bool HasPkFmacF16Inst;
- bool HasAtomicFaddInsts;
- bool SupportsSRAMECC;
+ bool FP64 = false;
+ bool FMA = false;
+ bool MIMG_R128 = false;
+ bool CIInsts = false;
+ bool GFX8Insts = false;
+ bool GFX9Insts = false;
+ bool GFX90AInsts = false;
+ bool GFX940Insts = false;
+ bool GFX10Insts = false;
+ bool GFX11Insts = false;
+ bool GFX10_3Insts = false;
+ bool GFX7GFX8GFX9Insts = false;
+ bool SGPRInitBug = false;
+ bool UserSGPRInit16Bug = false;
+ bool NegativeScratchOffsetBug = false;
+ bool NegativeUnalignedScratchOffsetBug = false;
+ bool HasSMemRealTime = false;
+ bool HasIntClamp = false;
+ bool HasFmaMixInsts = false;
+ bool HasMovrel = false;
+ bool HasVGPRIndexMode = false;
+ bool HasScalarStores = false;
+ bool HasScalarAtomics = false;
+ bool HasSDWAOmod = false;
+ bool HasSDWAScalar = false;
+ bool HasSDWASdst = false;
+ bool HasSDWAMac = false;
+ bool HasSDWAOutModsVOPC = false;
+ bool HasDPP = false;
+ bool HasDPP8 = false;
+ bool Has64BitDPP = false;
+ bool HasPackedFP32Ops = false;
+ bool HasImageInsts = false;
+ bool HasExtendedImageInsts = false;
+ bool HasR128A16 = false;
+ bool HasGFX10A16 = false;
+ bool HasG16 = false;
+ bool HasNSAEncoding = false;
+ unsigned NSAMaxSize = 0;
+ bool GFX10_AEncoding = false;
+ bool GFX10_BEncoding = false;
+ bool HasDLInsts = false;
+ bool HasDot1Insts = false;
+ bool HasDot2Insts = false;
+ bool HasDot3Insts = false;
+ bool HasDot4Insts = false;
+ bool HasDot5Insts = false;
+ bool HasDot6Insts = false;
+ bool HasDot7Insts = false;
+ bool HasDot8Insts = false;
+ bool HasMAIInsts = false;
+ bool HasPkFmacF16Inst = false;
+ bool HasAtomicFaddRtnInsts = false;
+ bool HasAtomicFaddNoRtnInsts = false;
+ bool HasAtomicPkFaddNoRtnInsts = false;
+ bool SupportsSRAMECC = false;
// This should not be used directly. 'TargetID' tracks the dynamic settings
// for SRAMECC.
- bool EnableSRAMECC;
-
- bool HasNoSdstCMPX;
- bool HasVscnt;
- bool HasGetWaveIdInst;
- bool HasSMemTimeInst;
- bool HasShaderCyclesRegister;
- bool HasVOP3Literal;
- bool HasNoDataDepHazard;
- bool FlatAddressSpace;
- bool FlatInstOffsets;
- bool FlatGlobalInsts;
- bool FlatScratchInsts;
- bool ScalarFlatScratchInsts;
- bool HasArchitectedFlatScratch;
- bool AddNoCarryInsts;
- bool HasUnpackedD16VMem;
- bool LDSMisalignedBug;
- bool HasMFMAInlineLiteralBug;
- bool UnalignedBufferAccess;
- bool UnalignedDSAccess;
- bool HasPackedTID;
- bool ScalarizeGlobal;
-
- bool HasVcmpxPermlaneHazard;
- bool HasVMEMtoScalarWriteHazard;
- bool HasSMEMtoVectorWriteHazard;
- bool HasInstFwdPrefetchBug;
- bool HasVcmpxExecWARHazard;
- bool HasLdsBranchVmemWARHazard;
- bool HasNSAtoVMEMBug;
- bool HasNSAClauseBug;
- bool HasOffset3fBug;
- bool HasFlatSegmentOffsetBug;
- bool HasImageStoreD16Bug;
- bool HasImageGather4D16Bug;
+ bool EnableSRAMECC = false;
+
+ bool HasNoSdstCMPX = false;
+ bool HasVscnt = false;
+ bool HasGetWaveIdInst = false;
+ bool HasSMemTimeInst = false;
+ bool HasShaderCyclesRegister = false;
+ bool HasVOP3Literal = false;
+ bool HasNoDataDepHazard = false;
+ bool FlatAddressSpace = false;
+ bool FlatInstOffsets = false;
+ bool FlatGlobalInsts = false;
+ bool FlatScratchInsts = false;
+ bool ScalarFlatScratchInsts = false;
+ bool HasArchitectedFlatScratch = false;
+ bool EnableFlatScratch = false;
+ bool AddNoCarryInsts = false;
+ bool HasUnpackedD16VMem = false;
+ bool LDSMisalignedBug = false;
+ bool HasMFMAInlineLiteralBug = false;
+ bool UnalignedBufferAccess = false;
+ bool UnalignedDSAccess = false;
+ bool HasPackedTID = false;
+ bool ScalarizeGlobal = false;
+
+ bool HasVcmpxPermlaneHazard = false;
+ bool HasVMEMtoScalarWriteHazard = false;
+ bool HasSMEMtoVectorWriteHazard = false;
+ bool HasInstFwdPrefetchBug = false;
+ bool HasVcmpxExecWARHazard = false;
+ bool HasLdsBranchVmemWARHazard = false;
+ bool HasNSAtoVMEMBug = false;
+ bool HasNSAClauseBug = false;
+ bool HasOffset3fBug = false;
+ bool HasFlatSegmentOffsetBug = false;
+ bool HasImageStoreD16Bug = false;
+ bool HasImageGather4D16Bug = false;
+ bool HasVOPDInsts = false;
// Dummy feature to use for assembler in tablegen.
- bool FeatureDisable;
+ bool FeatureDisable = false;
SelectionDAGTargetInfo TSInfo;
private:
@@ -193,9 +202,6 @@ private:
SIFrameLowering FrameLowering;
public:
- // See COMPUTE_TMPRING_SIZE.WAVESIZE, 13-bit field in units of 256-dword.
- static const unsigned MaxWaveScratchSize = (256 * 4) * ((1 << 13) - 1);
-
GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
const GCNTargetMachine &TM);
~GCNSubtarget() override;
@@ -258,9 +264,19 @@ public:
return (Generation)Gen;
}
+ unsigned getMaxWaveScratchSize() const {
+ // See COMPUTE_TMPRING_SIZE.WAVESIZE.
+ if (getGeneration() < GFX11) {
+ // 13-bit field in units of 256-dword.
+ return (256 * 4) * ((1 << 13) - 1);
+ }
+ // 15-bit field in units of 64-dword.
+ return (64 * 4) * ((1 << 15) - 1);
+ }
+
/// Return the number of high bits known to be zero for a frame index.
unsigned getKnownHighZeroBitsForFrameIndex() const {
- return countLeadingZeros(MaxWaveScratchSize) + getWavefrontSizeLog2();
+ return countLeadingZeros(getMaxWaveScratchSize()) + getWavefrontSizeLog2();
}
int getLDSBankCount() const {
@@ -558,13 +574,20 @@ public:
// The ST addressing mode means no registers are used, either VGPR or SGPR,
// but only immediate offset is swizzled and added to the FLAT scratch base.
bool hasFlatScratchSTMode() const {
- return hasFlatScratchInsts() && hasGFX10_3Insts();
+ return hasFlatScratchInsts() && (hasGFX10_3Insts() || hasGFX940Insts());
}
+ bool hasFlatScratchSVSMode() const { return GFX940Insts || GFX11Insts; }
+
bool hasScalarFlatScratchInsts() const {
return ScalarFlatScratchInsts;
}
+ bool enableFlatScratch() const {
+ return flatScratchIsArchitected() ||
+ (EnableFlatScratch && hasFlatScratchInsts());
+ }
+
bool hasGlobalAddTidInsts() const {
return GFX10_BEncoding;
}
@@ -690,6 +713,10 @@ public:
return HasDot7Insts;
}
+ bool hasDot8Insts() const {
+ return HasDot8Insts;
+ }
+
bool hasMAIInsts() const {
return HasMAIInsts;
}
@@ -699,9 +726,15 @@ public:
}
bool hasAtomicFaddInsts() const {
- return HasAtomicFaddInsts;
+ return HasAtomicFaddRtnInsts || HasAtomicFaddNoRtnInsts;
}
+ bool hasAtomicFaddRtnInsts() const { return HasAtomicFaddRtnInsts; }
+
+ bool hasAtomicFaddNoRtnInsts() const { return HasAtomicFaddNoRtnInsts; }
+
+ bool hasAtomicPkFaddNoRtnInsts() const { return HasAtomicPkFaddNoRtnInsts; }
+
bool hasNoSdstCMPX() const {
return HasNoSdstCMPX;
}
@@ -765,8 +798,6 @@ public:
return true;
}
- bool enableFlatScratch() const;
-
void overrideSchedPolicy(MachineSchedPolicy &Policy,
unsigned NumRegionInstrs) const override;
@@ -805,6 +836,9 @@ public:
/// \returns true if the subtarget has the v_permlanex16_b32 instruction.
bool hasPermLaneX16() const { return getGeneration() >= GFX10; }
+ /// \returns true if the subtarget has the v_permlane64_b32 instruction.
+ bool hasPermLane64() const { return getGeneration() >= GFX11; }
+
bool hasDPP() const {
return HasDPP;
}
@@ -830,7 +864,11 @@ public:
}
bool hasFmaakFmamkF32Insts() const {
- return getGeneration() >= GFX10;
+ return getGeneration() >= GFX10 || hasGFX940Insts();
+ }
+
+ bool hasImageInsts() const {
+ return HasImageInsts;
}
bool hasExtendedImageInsts() const {
@@ -875,6 +913,10 @@ public:
bool hasMadF16() const;
+ bool hasMovB64() const { return GFX940Insts; }
+
+ bool hasLshlAddB64() const { return GFX940Insts; }
+
bool enableSIScheduler() const {
return EnableSIScheduler;
}
@@ -887,6 +929,10 @@ public:
return SGPRInitBug;
}
+ bool hasUserSGPRInit16Bug() const {
+ return UserSGPRInit16Bug;
+ }
+
bool hasNegativeScratchOffsetBug() const { return NegativeScratchOffsetBug; }
bool hasNegativeUnalignedScratchOffsetBug() const {
@@ -915,6 +961,14 @@ public:
getGeneration() <= AMDGPUSubtarget::GFX9;
}
+ bool hasReadM0LdsDmaHazard() const {
+ return getGeneration() == AMDGPUSubtarget::GFX9;
+ }
+
+ bool hasReadM0LdsDirectHazard() const {
+ return getGeneration() == AMDGPUSubtarget::GFX9;
+ }
+
bool hasVcmpxPermlaneHazard() const {
return HasVcmpxPermlaneHazard;
}
@@ -943,6 +997,22 @@ public:
return HasLdsBranchVmemWARHazard;
}
+ // Has one cycle hazard on transcendental instruction feeding a
+ // non transcendental VALU.
+ bool hasTransForwardingHazard() const { return GFX940Insts; }
+
+ // Has one cycle hazard on a VALU instruction partially writing dst with
+ // a shift of result bits feeding another VALU instruction.
+ bool hasDstSelForwardingHazard() const { return GFX940Insts; }
+
+ // Cannot use op_sel with v_dot instructions.
+ bool hasDOTOpSelHazard() const { return GFX940Insts; }
+
+ // Does not have HW interlocs for VALU writing and then reading SGPRs.
+ bool hasVDecCoExecHazard() const {
+ return GFX940Insts;
+ }
+
bool hasNSAtoVMEMBug() const {
return HasNSAtoVMEMBug;
}
@@ -953,11 +1023,43 @@ public:
bool hasGFX90AInsts() const { return GFX90AInsts; }
+ bool hasVOP3DPP() const { return getGeneration() >= GFX11; }
+
+ bool hasLdsDirect() const { return getGeneration() >= GFX11; }
+
+ bool hasVALUPartialForwardingHazard() const {
+ return getGeneration() >= GFX11;
+ }
+
+ bool hasVALUTransUseHazard() const { return getGeneration() >= GFX11; }
+
/// Return if operations acting on VGPR tuples require even alignment.
bool needsAlignedVGPRs() const { return GFX90AInsts; }
+ /// Return true if the target has the S_PACK_HL_B32_B16 instruction.
+ bool hasSPackHL() const { return GFX11Insts; }
+
+ /// Return true if the target's EXP instruction has the COMPR flag, which
+ /// affects the meaning of the EN (enable) bits.
+ bool hasCompressedExport() const { return !GFX11Insts; }
+
+ /// Return true if the target's EXP instruction supports the NULL export
+ /// target.
+ bool hasNullExportTarget() const { return !GFX11Insts; }
+
+ bool hasVOPDInsts() const { return HasVOPDInsts; }
+
+ bool hasFlatScratchSVSSwizzleBug() const { return getGeneration() == GFX11; }
+
+ /// Return true if the target has the S_DELAY_ALU instruction.
+ bool hasDelayAlu() const { return GFX11Insts; }
+
bool hasPackedTID() const { return HasPackedTID; }
+ // GFX940 is a derivation to GFX90A. hasGFX940Insts() being true implies that
+ // hasGFX90AInsts is also true.
+ bool hasGFX940Insts() const { return GFX940Insts; }
+
/// Return the maximum number of waves per SIMD for kernels using \p SGPRs
/// SGPRs
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
@@ -989,6 +1091,9 @@ public:
return getGeneration() >= GFX9;
}
+ // \returns true if the target supports the pre-NGG legacy geometry path.
+ bool hasLegacyGeometry() const { return getGeneration() < GFX11; }
+
/// \returns SGPR allocation granularity supported by the subtarget.
unsigned getSGPRAllocGranule() const {
return AMDGPU::IsaInfo::getSGPRAllocGranule(this);
@@ -1105,6 +1210,10 @@ public:
/// unit requirement.
unsigned getMaxNumVGPRs(const Function &F) const;
+ unsigned getMaxNumAGPRs(const Function &F) const {
+ return getMaxNumVGPRs(F);
+ }
+
/// \returns Maximum number of VGPRs that meets number of waves per execution
/// unit requirement for function \p MF, or number of VGPRs explicitly
/// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
@@ -1165,6 +1274,10 @@ public:
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
SDep &Dep) const override;
+
+ // \returns true if it's beneficial on this subtarget for the scheduler to
+ // cluster stores as well as loads.
+ bool shouldClusterStores() const { return getGeneration() >= GFX11; }
};
} // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/LDSDIRInstructions.td b/llvm/lib/Target/AMDGPU/LDSDIRInstructions.td
new file mode 100644
index 000000000000..1f65376890da
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/LDSDIRInstructions.td
@@ -0,0 +1,116 @@
+//===-- LDSDIRInstructions.td - LDS Direct Instruction Definitions --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// LDSDIR encoding
+//===----------------------------------------------------------------------===//
+
+class LDSDIRe<bits<2> op, bit is_direct> : Enc32 {
+ // encoding fields
+ bits<2> attrchan;
+ bits<6> attr;
+ bits<4> waitvdst;
+ bits<8> vdst;
+
+ // encoding
+ let Inst{31-24} = 0xce; // encoding
+ let Inst{23-22} = 0x0; // reserved
+ let Inst{21-20} = op;
+ let Inst{19-16} = waitvdst;
+ let Inst{15-10} = !if(is_direct, ?, attr);
+ let Inst{9-8} = !if(is_direct, ?, attrchan);
+ let Inst{7-0} = vdst;
+}
+
+//===----------------------------------------------------------------------===//
+// LDSDIR Classes
+//===----------------------------------------------------------------------===//
+
+class LDSDIR_getIns<bit direct> {
+ dag ret = !if(direct,
+ (ins wait_vdst:$waitvdst),
+ (ins Attr:$attr, AttrChan:$attrchan, wait_vdst:$waitvdst)
+ );
+}
+
+class LDSDIR_Common<string opName, string asm = "", bit direct> : InstSI<
+ (outs VGPR_32:$vdst),
+ LDSDIR_getIns<direct>.ret,
+ asm> {
+ let LDSDIR = 1;
+ let EXP_CNT = 1;
+
+ let hasSideEffects = 0;
+ let mayLoad = 1;
+ let mayStore = 0;
+
+ string Mnemonic = opName;
+ let UseNamedOperandTable = 1;
+
+ let Uses = [M0, EXEC];
+ let DisableWQM = 0;
+ let SchedRW = [WriteLDS];
+
+ bit is_direct;
+ let is_direct = direct;
+}
+
+class LDSDIR_Pseudo<string opName, bit direct> :
+ LDSDIR_Common<opName, "", direct>,
+ SIMCInstr<opName, SIEncodingFamily.NONE> {
+ let isPseudo = 1;
+ let isCodeGenOnly = 1;
+}
+
+class LDSDIR_getAsm<bit direct> {
+ string ret = !if(direct,
+ " $vdst$waitvdst",
+ " $vdst, $attr$attrchan$waitvdst"
+ );
+}
+
+class LDSDIR_Real<bits<2> op, LDSDIR_Pseudo lds, int subtarget> :
+ LDSDIR_Common<lds.Mnemonic,
+ lds.Mnemonic # LDSDIR_getAsm<lds.is_direct>.ret,
+ lds.is_direct>,
+ SIMCInstr <lds.Mnemonic, subtarget>,
+ LDSDIRe<op, lds.is_direct> {
+ let isPseudo = 0;
+ let isCodeGenOnly = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// LDS Direct Instructions
+//===----------------------------------------------------------------------===//
+
+def LDS_DIRECT_LOAD : LDSDIR_Pseudo<"lds_direct_load", 1>;
+def LDS_PARAM_LOAD : LDSDIR_Pseudo<"lds_param_load", 0>;
+
+def : GCNPat <
+ (f32 (int_amdgcn_lds_direct_load M0)),
+ (LDS_DIRECT_LOAD 0)
+>;
+
+def : GCNPat <
+ (f32 (int_amdgcn_lds_param_load timm:$attrchan, timm:$attr, M0)),
+ (LDS_PARAM_LOAD timm:$attr, timm:$attrchan, 0)
+>;
+
+//===----------------------------------------------------------------------===//
+// GFX11+
+//===----------------------------------------------------------------------===//
+
+multiclass LDSDIR_Real_gfx11<bits<2> op, LDSDIR_Pseudo lds = !cast<LDSDIR_Pseudo>(NAME)> {
+ def _gfx11 : LDSDIR_Real<op, lds, SIEncodingFamily.GFX11> {
+ let AssemblerPredicate = isGFX11Plus;
+ let DecoderNamespace = "GFX11";
+ }
+}
+
+defm LDS_PARAM_LOAD : LDSDIR_Real_gfx11<0x0>;
+defm LDS_DIRECT_LOAD : LDSDIR_Real_gfx11<0x1>;
diff --git a/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp b/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp
index 912bcc792e4d..24c9cc2d7dd2 100644
--- a/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp
+++ b/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp
@@ -239,9 +239,9 @@ void AMDGPUCustomBehaviour::generateWaitCntInfo() {
AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(STI.getCPU());
InstrWaitCntInfo.resize(SrcMgr.size());
- int Index = 0;
- for (auto I = SrcMgr.begin(), E = SrcMgr.end(); I != E; ++I, ++Index) {
- const std::unique_ptr<Instruction> &Inst = *I;
+ for (const auto &EN : llvm::enumerate(SrcMgr.getInstructions())) {
+ const std::unique_ptr<Instruction> &Inst = EN.value();
+ unsigned Index = EN.index();
unsigned Opcode = Inst->getOpcode();
const MCInstrDesc &MCID = MCII.get(Opcode);
if ((MCID.TSFlags & SIInstrFlags::DS) &&
diff --git a/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h b/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h
index 56650515bd0a..7a0d454c3578 100644
--- a/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h
+++ b/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h
@@ -31,7 +31,7 @@ public:
AMDGPUInstrPostProcess(const MCSubtargetInfo &STI, const MCInstrInfo &MCII)
: InstrPostProcess(STI, MCII) {}
- ~AMDGPUInstrPostProcess() {}
+ ~AMDGPUInstrPostProcess() = default;
void postProcessInstruction(std::unique_ptr<Instruction> &Inst,
const MCInst &MCI) override;
@@ -86,7 +86,7 @@ public:
AMDGPUCustomBehaviour(const MCSubtargetInfo &STI,
const mca::SourceMgr &SrcMgr, const MCInstrInfo &MCII);
- ~AMDGPUCustomBehaviour() {}
+ ~AMDGPUCustomBehaviour() = default;
/// This method is used to determine if an instruction
/// should be allowed to be dispatched. The return value is
/// how many cycles until the instruction can be dispatched.
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
index 50318a59225d..bda3c25e956b 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -10,13 +10,16 @@
#include "MCTargetDesc/AMDGPUFixupKinds.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/TargetParser.h"
using namespace llvm;
using namespace llvm::AMDGPU;
@@ -47,7 +50,10 @@ public:
bool writeNopData(raw_ostream &OS, uint64_t Count,
const MCSubtargetInfo *STI) const override;
+ Optional<MCFixupKind> getFixupKind(StringRef Name) const override;
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
+ bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target) override;
};
} //End anonymous namespace
@@ -134,6 +140,9 @@ void AMDGPUAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
MutableArrayRef<char> Data, uint64_t Value,
bool IsResolved,
const MCSubtargetInfo *STI) const {
+ if (Fixup.getKind() >= FirstLiteralRelocationKind)
+ return;
+
Value = adjustFixupValue(Fixup, Value, &Asm.getContext());
if (!Value)
return; // Doesn't change encoding.
@@ -153,6 +162,15 @@ void AMDGPUAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
Data[Offset + i] |= static_cast<uint8_t>((Value >> (i * 8)) & 0xff);
}
+Optional<MCFixupKind> AMDGPUAsmBackend::getFixupKind(StringRef Name) const {
+ return StringSwitch<Optional<MCFixupKind>>(Name)
+#define ELF_RELOC(Name, Value) \
+ .Case(#Name, MCFixupKind(FirstLiteralRelocationKind + Value))
+#include "llvm/BinaryFormat/ELFRelocs/AMDGPU.def"
+#undef ELF_RELOC
+ .Default(None);
+}
+
const MCFixupKindInfo &AMDGPUAsmBackend::getFixupKindInfo(
MCFixupKind Kind) const {
const static MCFixupKindInfo Infos[AMDGPU::NumTargetFixupKinds] = {
@@ -160,12 +178,21 @@ const MCFixupKindInfo &AMDGPUAsmBackend::getFixupKindInfo(
{ "fixup_si_sopp_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
};
+ if (Kind >= FirstLiteralRelocationKind)
+ return MCAsmBackend::getFixupKindInfo(FK_NONE);
+
if (Kind < FirstTargetFixupKind)
return MCAsmBackend::getFixupKindInfo(Kind);
return Infos[Kind - FirstTargetFixupKind];
}
+bool AMDGPUAsmBackend::shouldForceRelocation(const MCAssembler &,
+ const MCFixup &Fixup,
+ const MCValue &) {
+ return Fixup.getKind() >= FirstLiteralRelocationKind;
+}
+
unsigned AMDGPUAsmBackend::getMinimumNopSize() const {
return 4;
}
@@ -236,5 +263,5 @@ MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T,
const MCRegisterInfo &MRI,
const MCTargetOptions &Options) {
return new ELFAMDGPUAsmBackend(T, STI.getTargetTriple(),
- getHsaAbiVersion(&STI).getValueOr(0));
+ getHsaAbiVersion(&STI).value_or(0));
}
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
index bb2c298c2850..066b36622a16 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
@@ -65,7 +65,10 @@ unsigned AMDGPUELFObjectWriter::getRelocType(MCContext &Ctx,
return ELF::R_AMDGPU_REL64;
}
- switch (Fixup.getKind()) {
+ MCFixupKind Kind = Fixup.getKind();
+ if (Kind >= FirstLiteralRelocationKind)
+ return Kind - FirstLiteralRelocationKind;
+ switch (Kind) {
default: break;
case FK_PCRel_4:
return ELF::R_AMDGPU_REL32;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index 76663b563150..bd938d829953 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -120,14 +120,6 @@ void AMDGPUInstPrinter::printAddr64(const MCInst *MI, unsigned OpNo,
printNamedBit(MI, OpNo, O, "addr64");
}
-void AMDGPUInstPrinter::printMBUFOffset(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- if (MI->getOperand(OpNo).getImm()) {
- O << " offset:";
- printU16ImmDecOperand(MI, OpNo, O);
- }
-}
-
void AMDGPUInstPrinter::printOffset(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
@@ -152,7 +144,7 @@ void AMDGPUInstPrinter::printFlatOffset(const MCInst *MI, unsigned OpNo,
if (IsFlatSeg) { // Unsigned offset
printU16ImmDecOperand(MI, OpNo, O);
} else { // Signed offset
- if (AMDGPU::isGFX10Plus(STI)) {
+ if (AMDGPU::isGFX10(STI)) {
O << formatDec(SignExtend32<12>(MI->getOperand(OpNo).getImm()));
} else {
O << formatDec(SignExtend32<13>(MI->getOperand(OpNo).getImm()));
@@ -191,6 +183,13 @@ void AMDGPUInstPrinter::printSMEMOffset(const MCInst *MI, unsigned OpNo,
O << formatHex(MI->getOperand(OpNo).getImm());
}
+void AMDGPUInstPrinter::printSMEMOffsetMod(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ O << " offset:";
+ printSMEMOffset(MI, OpNo, STI, O);
+}
+
void AMDGPUInstPrinter::printSMRDLiteralOffset(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
@@ -206,13 +205,15 @@ void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
auto Imm = MI->getOperand(OpNo).getImm();
if (Imm & CPol::GLC)
- O << " glc";
+ O << ((AMDGPU::isGFX940(STI) &&
+ !(MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::SMRD)) ? " sc0"
+ : " glc");
if (Imm & CPol::SLC)
- O << " slc";
+ O << (AMDGPU::isGFX940(STI) ? " nt" : " slc");
if ((Imm & CPol::DLC) && AMDGPU::isGFX10Plus(STI))
O << " dlc";
if ((Imm & CPol::SCC) && AMDGPU::isGFX90A(STI))
- O << " scc";
+ O << (AMDGPU::isGFX940(STI) ? " sc1" : " scc");
if (Imm & ~CPol::ALL)
O << " /* unexpected cache policy bit */";
}
@@ -309,8 +310,8 @@ void AMDGPUInstPrinter::printSymbolicFormat(const MCInst *MI,
if (AMDGPU::isGFX10Plus(STI)) {
if (Val == UFMT_DEFAULT)
return;
- if (isValidUnifiedFormat(Val)) {
- O << " format:[" << getUnifiedFormatName(Val) << ']';
+ if (isValidUnifiedFormat(Val, STI)) {
+ O << " format:[" << getUnifiedFormatName(Val, STI) << ']';
} else {
O << " format:" << Val;
}
@@ -362,27 +363,26 @@ void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O,
}
void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI,
- raw_ostream &O) {
+ const MCSubtargetInfo &STI, raw_ostream &O) {
auto Opcode = MI->getOpcode();
auto Flags = MII.get(Opcode).TSFlags;
-
if (OpNo == 0) {
- if (Flags & SIInstrFlags::VOP3) {
+ if (Flags & SIInstrFlags::VOP3 && Flags & SIInstrFlags::DPP)
+ O << "_e64_dpp";
+ else if (Flags & SIInstrFlags::VOP3) {
if (!getVOP3IsSingle(Opcode))
O << "_e64";
- } else if (Flags & SIInstrFlags::DPP) {
+ } else if (Flags & SIInstrFlags::DPP)
O << "_dpp";
- } else if (Flags & SIInstrFlags::SDWA) {
+ else if (Flags & SIInstrFlags::SDWA)
O << "_sdwa";
- } else if (((Flags & SIInstrFlags::VOP1) && !getVOP1IsSingle(Opcode)) ||
- ((Flags & SIInstrFlags::VOP2) && !getVOP2IsSingle(Opcode))) {
+ else if (((Flags & SIInstrFlags::VOP1) && !getVOP1IsSingle(Opcode)) ||
+ ((Flags & SIInstrFlags::VOP2) && !getVOP2IsSingle(Opcode)))
O << "_e32";
- }
O << " ";
}
- printOperand(MI, OpNo, STI, O);
+ printRegularOperand(MI, OpNo, STI, O);
// Print default vcc/vcc_lo operand.
switch (Opcode) {
@@ -400,7 +400,16 @@ void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo,
case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx10:
case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx10:
case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx10:
- printDefaultVccOperand(1, STI, O);
+ case AMDGPU::V_ADD_CO_CI_U32_e32_gfx11:
+ case AMDGPU::V_SUB_CO_CI_U32_e32_gfx11:
+ case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx11:
+ case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx11:
+ case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx11:
+ case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx11:
+ case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx11:
+ case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx11:
+ case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx11:
+ printDefaultVccOperand(false, STI, O);
break;
}
}
@@ -412,7 +421,7 @@ void AMDGPUInstPrinter::printVINTRPDst(const MCInst *MI, unsigned OpNo,
else
O << "_e32 ";
- printOperand(MI, OpNo, STI, O);
+ printRegularOperand(MI, OpNo, STI, O);
}
void AMDGPUInstPrinter::printImmediateInt16(uint32_t Imm,
@@ -533,7 +542,7 @@ void AMDGPUInstPrinter::printImmediate64(uint64_t Imm,
STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm])
O << "0.15915494309189532";
else {
- assert(isUInt<32>(Imm) || Imm == 0x3fc45f306dc9c882);
+ assert(isUInt<32>(Imm) || isInt<32>(Imm));
// In rare situations, we will have a 32-bit literal in a 64-bit
// operand. This is technically allowed for the encoding of s_mov_b64.
@@ -548,6 +557,18 @@ void AMDGPUInstPrinter::printBLGP(const MCInst *MI, unsigned OpNo,
if (!Imm)
return;
+ if (AMDGPU::isGFX940(STI)) {
+ switch (MI->getOpcode()) {
+ case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
+ case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
+ case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
+ case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
+ O << " neg:[" << (Imm & 1) << ',' << ((Imm >> 1) & 1) << ','
+ << ((Imm >> 2) & 1) << ']';
+ return;
+ }
+ }
+
O << " blgp:" << Imm;
}
@@ -571,26 +592,73 @@ void AMDGPUInstPrinter::printABID(const MCInst *MI, unsigned OpNo,
O << " abid:" << Imm;
}
-void AMDGPUInstPrinter::printDefaultVccOperand(unsigned OpNo,
+void AMDGPUInstPrinter::printDefaultVccOperand(bool FirstOperand,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- if (OpNo > 0)
+ if (!FirstOperand)
O << ", ";
- printRegOperand(STI.getFeatureBits()[AMDGPU::FeatureWavefrontSize64] ?
- AMDGPU::VCC : AMDGPU::VCC_LO, O, MRI);
- if (OpNo == 0)
+ printRegOperand(STI.getFeatureBits()[AMDGPU::FeatureWavefrontSize64]
+ ? AMDGPU::VCC
+ : AMDGPU::VCC_LO,
+ O, MRI);
+ if (FirstOperand)
O << ", ";
}
+void AMDGPUInstPrinter::printWaitVDST(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ uint8_t Imm = MI->getOperand(OpNo).getImm();
+ if (Imm != 0) {
+ O << " wait_vdst:";
+ printU4ImmDecOperand(MI, OpNo, O);
+ }
+}
+
+void AMDGPUInstPrinter::printWaitEXP(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ uint8_t Imm = MI->getOperand(OpNo).getImm();
+ if (Imm != 0) {
+ O << " wait_exp:";
+ printU4ImmDecOperand(MI, OpNo, O);
+ }
+}
+
+bool AMDGPUInstPrinter::needsImpliedVcc(const MCInstrDesc &Desc,
+ unsigned OpNo) const {
+ return OpNo == 1 && (Desc.TSFlags & SIInstrFlags::DPP) &&
+ (Desc.TSFlags & SIInstrFlags::VOPC) &&
+ (Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC) ||
+ Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC_LO));
+}
+
+// Print default vcc/vcc_lo operand of VOPC.
void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- // Print default vcc/vcc_lo operand of VOPC.
- const MCInstrDesc &Desc = MII.get(MI->getOpcode());
- if (OpNo == 0 && (Desc.TSFlags & SIInstrFlags::VOPC) &&
+ unsigned Opc = MI->getOpcode();
+ const MCInstrDesc &Desc = MII.get(Opc);
+ int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
+ // 0, 1 and 2 are the first printed operands in different cases
+ // If there are printed modifiers, printOperandAndFPInputMods or
+ // printOperandAndIntInputMods will be called instead
+ if ((OpNo == 0 ||
+ (OpNo == 1 && (Desc.TSFlags & SIInstrFlags::DPP)) ||
+ (OpNo == 2 && (Desc.TSFlags & SIInstrFlags::DPP) && ModIdx != -1)) &&
+ (Desc.TSFlags & SIInstrFlags::VOPC) &&
(Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC) ||
Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC_LO)))
- printDefaultVccOperand(OpNo, STI, O);
+ printDefaultVccOperand(true, STI, O);
+
+ printRegularOperand(MI, OpNo, STI, O);
+}
+
+// Print operands after vcc or modifier handling.
+void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCInstrDesc &Desc = MII.get(MI->getOpcode());
if (OpNo >= MI->getNumOperands()) {
O << "/*Missing OP" << OpNo << "*/";
@@ -710,12 +778,24 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx10:
case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx10:
case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx10:
+ case AMDGPU::V_CNDMASK_B32_e32_gfx11:
+ case AMDGPU::V_ADD_CO_CI_U32_e32_gfx11:
+ case AMDGPU::V_SUB_CO_CI_U32_e32_gfx11:
+ case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx11:
+ case AMDGPU::V_CNDMASK_B32_dpp_gfx11:
+ case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx11:
+ case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx11:
+ case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx11:
+ case AMDGPU::V_CNDMASK_B32_dpp8_gfx11:
+ case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx11:
+ case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx11:
+ case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx11:
case AMDGPU::V_CNDMASK_B32_e32_gfx6_gfx7:
case AMDGPU::V_CNDMASK_B32_e32_vi:
if ((int)OpNo == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
AMDGPU::OpName::src1))
- printDefaultVccOperand(OpNo, STI, O);
+ printDefaultVccOperand(OpNo == 0, STI, O);
break;
}
@@ -732,6 +812,10 @@ void AMDGPUInstPrinter::printOperandAndFPInputMods(const MCInst *MI,
unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
+ const MCInstrDesc &Desc = MII.get(MI->getOpcode());
+ if (needsImpliedVcc(Desc, OpNo))
+ printDefaultVccOperand(true, STI, O);
+
unsigned InputModifiers = MI->getOperand(OpNo).getImm();
// Use 'neg(...)' instead of '-' to avoid ambiguity.
@@ -754,7 +838,7 @@ void AMDGPUInstPrinter::printOperandAndFPInputMods(const MCInst *MI,
if (InputModifiers & SISrcMods::ABS)
O << '|';
- printOperand(MI, OpNo + 1, STI, O);
+ printRegularOperand(MI, OpNo + 1, STI, O);
if (InputModifiers & SISrcMods::ABS)
O << '|';
@@ -767,10 +851,14 @@ void AMDGPUInstPrinter::printOperandAndIntInputMods(const MCInst *MI,
unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
+ const MCInstrDesc &Desc = MII.get(MI->getOpcode());
+ if (needsImpliedVcc(Desc, OpNo))
+ printDefaultVccOperand(true, STI, O);
+
unsigned InputModifiers = MI->getOperand(OpNo).getImm();
if (InputModifiers & SISrcMods::SEXT)
O << "sext(";
- printOperand(MI, OpNo + 1, STI, O);
+ printRegularOperand(MI, OpNo + 1, STI, O);
if (InputModifiers & SISrcMods::SEXT)
O << ')';
@@ -784,7 +872,7 @@ void AMDGPUInstPrinter::printOperandAndIntInputMods(const MCInst *MI,
case AMDGPU::V_SUBREV_CO_CI_U32_sdwa_gfx10:
if ((int)OpNo + 1 == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
AMDGPU::OpName::src1))
- printDefaultVccOperand(OpNo, STI, O);
+ printDefaultVccOperand(OpNo == 0, STI, O);
break;
}
}
@@ -1203,9 +1291,9 @@ void AMDGPUInstPrinter::printVGPRIndexMode(const MCInst *MI, unsigned OpNo,
void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- printOperand(MI, OpNo, STI, O);
+ printRegularOperand(MI, OpNo, STI, O);
O << ", ";
- printOperand(MI, OpNo + 1, STI, O);
+ printRegularOperand(MI, OpNo + 1, STI, O);
}
void AMDGPUInstPrinter::printIfSet(const MCInst *MI, unsigned OpNo,
@@ -1262,15 +1350,16 @@ void AMDGPUInstPrinter::printSendMsg(const MCInst *MI, unsigned OpNo,
uint16_t MsgId;
uint16_t OpId;
uint16_t StreamId;
- decodeMsg(Imm16, MsgId, OpId, StreamId);
+ decodeMsg(Imm16, MsgId, OpId, StreamId, STI);
- if (isValidMsgId(MsgId, STI) &&
- isValidMsgOp(MsgId, OpId, STI) &&
+ StringRef MsgName = getMsgName(MsgId, STI);
+
+ if (!MsgName.empty() && isValidMsgOp(MsgId, OpId, STI) &&
isValidMsgStream(MsgId, OpId, StreamId, STI)) {
- O << "sendmsg(" << getMsgName(MsgId);
- if (msgRequiresOp(MsgId)) {
- O << ", " << getMsgOpName(MsgId, OpId);
- if (msgSupportsStream(MsgId, OpId)) {
+ O << "sendmsg(" << MsgName;
+ if (msgRequiresOp(MsgId, STI)) {
+ O << ", " << getMsgOpName(MsgId, OpId, STI);
+ if (msgSupportsStream(MsgId, OpId, STI)) {
O << ", " << StreamId;
}
}
@@ -1423,6 +1512,76 @@ void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
}
}
+void AMDGPUInstPrinter::printDepCtr(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ using namespace llvm::AMDGPU::DepCtr;
+
+ uint64_t Imm16 = MI->getOperand(OpNo).getImm() & 0xffff;
+
+ bool HasNonDefaultVal = false;
+ if (isSymbolicDepCtrEncoding(Imm16, HasNonDefaultVal, STI)) {
+ int Id = 0;
+ StringRef Name;
+ unsigned Val;
+ bool IsDefault;
+ bool NeedSpace = false;
+ while (decodeDepCtr(Imm16, Id, Name, Val, IsDefault, STI)) {
+ if (!IsDefault || !HasNonDefaultVal) {
+ if (NeedSpace)
+ O << ' ';
+ O << Name << '(' << Val << ')';
+ NeedSpace = true;
+ }
+ }
+ } else {
+ O << formatHex(Imm16);
+ }
+}
+
+void AMDGPUInstPrinter::printDelayFlag(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const char *BadInstId = "/* invalid instid value */";
+ static const std::array<const char *, 12> InstIds = {
+ "NO_DEP", "VALU_DEP_1", "VALU_DEP_2",
+ "VALU_DEP_3", "VALU_DEP_4", "TRANS32_DEP_1",
+ "TRANS32_DEP_2", "TRANS32_DEP_3", "FMA_ACCUM_CYCLE_1",
+ "SALU_CYCLE_1", "SALU_CYCLE_2", "SALU_CYCLE_3"};
+
+ const char *BadInstSkip = "/* invalid instskip value */";
+ static const std::array<const char *, 6> InstSkips = {
+ "SAME", "NEXT", "SKIP_1", "SKIP_2", "SKIP_3", "SKIP_4"};
+
+ unsigned SImm16 = MI->getOperand(OpNo).getImm();
+ const char *Prefix = "";
+
+ unsigned Value = SImm16 & 0xF;
+ if (Value) {
+ const char *Name = Value < InstIds.size() ? InstIds[Value] : BadInstId;
+ O << Prefix << "instid0(" << Name << ')';
+ Prefix = " | ";
+ }
+
+ Value = (SImm16 >> 4) & 7;
+ if (Value) {
+ const char *Name =
+ Value < InstSkips.size() ? InstSkips[Value] : BadInstSkip;
+ O << Prefix << "instskip(" << Name << ')';
+ Prefix = " | ";
+ }
+
+ Value = (SImm16 >> 7) & 0xF;
+ if (Value) {
+ const char *Name = Value < InstIds.size() ? InstIds[Value] : BadInstId;
+ O << Prefix << "instid1(" << Name << ')';
+ Prefix = " | ";
+ }
+
+ if (!*Prefix)
+ O << "0";
+}
+
void AMDGPUInstPrinter::printHwreg(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
unsigned Id;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
index 71db0beba0b6..202edeee3cb3 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
@@ -15,6 +15,7 @@
#include "llvm/MC/MCInstPrinter.h"
namespace llvm {
+class MCInstrDesc;
class AMDGPUInstPrinter : public MCInstPrinter {
public:
@@ -50,7 +51,6 @@ private:
void printOffen(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printIdxen(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printAddr64(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printMBUFOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printOffset(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
void printFlatOffset(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
@@ -64,6 +64,8 @@ private:
const MCSubtargetInfo &STI, raw_ostream &O);
void printSMEMOffset(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
+ void printSMEMOffsetMod(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printSMRDLiteralOffset(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
void printGDS(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
@@ -116,6 +118,8 @@ private:
raw_ostream &O);
void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
+ void printRegularOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printOperand(const MCInst *MI, uint64_t /*Address*/, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O) {
printOperand(MI, OpNum, STI, O);
@@ -172,8 +176,13 @@ private:
raw_ostream &O);
void printABID(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
- void printDefaultVccOperand(unsigned OpNo, const MCSubtargetInfo &STI,
+ bool needsImpliedVcc(const MCInstrDesc &Desc, unsigned OpNo) const;
+ void printDefaultVccOperand(bool FirstOperand, const MCSubtargetInfo &STI,
raw_ostream &O);
+ void printWaitVDST(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
+ void printWaitEXP(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
void printExpSrcN(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O, unsigned N);
@@ -234,6 +243,10 @@ protected:
raw_ostream &O);
void printWaitFlag(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
+ void printDepCtr(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
+ void printDelayFlag(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printHwreg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
void printEndpgm(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
index 53c724f2211a..02c213f90f89 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
@@ -14,8 +14,8 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCCODEEMITTER_H
#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCCODEEMITTER_H
+#include "llvm/ADT/APInt.h"
#include "llvm/MC/MCCodeEmitter.h"
-#include <cstdint>
namespace llvm {
@@ -34,46 +34,34 @@ protected:
AMDGPUMCCodeEmitter(const MCInstrInfo &mcii) : MCII(mcii) {}
public:
+ void getBinaryCodeForInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
+ APInt &Inst, APInt &Scratch,
+ const MCSubtargetInfo &STI) const;
- uint64_t getBinaryCodeForInstr(const MCInst &MI,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
+ virtual void getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ APInt &Op, SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const = 0;
- virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- return 0;
- }
+ virtual void getSOPPBrEncoding(const MCInst &MI, unsigned OpNo, APInt &Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const = 0;
- virtual unsigned getSOPPBrEncoding(const MCInst &MI, unsigned OpNo,
+ virtual void getSMEMOffsetEncoding(const MCInst &MI, unsigned OpNo, APInt &Op,
SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- return 0;
- }
+ const MCSubtargetInfo &STI) const = 0;
- virtual unsigned getSMEMOffsetEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- return 0;
- }
+ virtual void getSDWASrcEncoding(const MCInst &MI, unsigned OpNo, APInt &Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const = 0;
- virtual unsigned getSDWASrcEncoding(const MCInst &MI, unsigned OpNo,
+ virtual void getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo,
+ APInt &Op,
SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- return 0;
- }
-
- virtual unsigned getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- return 0;
- }
-
- virtual unsigned getAVOperandEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- return 0;
- }
+ const MCSubtargetInfo &STI) const = 0;
+
+ virtual void getAVOperandEncoding(const MCInst &MI, unsigned OpNo, APInt &Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const = 0;
protected:
FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index 1f917cd91b47..11fe3f9ef058 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -19,6 +19,7 @@
#include "R600InstPrinter.h"
#include "R600MCTargetDesc.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
+#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCELFStreamer.h"
@@ -27,6 +28,7 @@
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/TargetRegistry.h"
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
index e5cce6045c8c..060d4b660632 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -33,7 +33,6 @@ enum AMDGPUDwarfFlavour : unsigned { Wave64 = 0, Wave32 = 1 };
MCRegisterInfo *createGCNMCRegisterInfo(AMDGPUDwarfFlavour DwarfFlavour);
MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx);
MCAsmBackend *createAMDGPUAsmBackend(const Target &T,
@@ -51,7 +50,6 @@ createAMDGPUELFObjectWriter(bool Is64Bit, uint8_t OSABI,
#define GET_INSTRINFO_ENUM
#define GET_INSTRINFO_OPERAND_ENUM
-#define GET_INSTRINFO_SCHED_ENUM
#include "AMDGPUGenInstrInfo.inc"
#define GET_SUBTARGETINFO_ENUM
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 7aa5f1abf65b..078133469549 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -17,12 +17,16 @@
#include "Utils/AMDKernelCodeTUtils.h"
#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/AMDGPUMetadata.h"
#include "llvm/Support/AMDHSAKernelDescriptor.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetParser.h"
using namespace llvm;
using namespace llvm::AMDGPU;
@@ -102,6 +106,7 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A: AK = GK_GFX90A; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C: AK = GK_GFX90C; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX940: AK = GK_GFX940; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break;
@@ -112,6 +117,11 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1033: AK = GK_GFX1033; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1034: AK = GK_GFX1034; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1035: AK = GK_GFX1035; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1036: AK = GK_GFX1036; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1100: AK = GK_GFX1100; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1101: AK = GK_GFX1101; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1102: AK = GK_GFX1102; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103: AK = GK_GFX1103; break;
case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
}
@@ -165,6 +175,7 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
case GK_GFX909: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909;
case GK_GFX90A: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A;
case GK_GFX90C: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C;
+ case GK_GFX940: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX940;
case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011;
case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012;
@@ -175,6 +186,11 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
case GK_GFX1033: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1033;
case GK_GFX1034: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1034;
case GK_GFX1035: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1035;
+ case GK_GFX1036: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1036;
+ case GK_GFX1100: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1100;
+ case GK_GFX1101: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1101;
+ case GK_GFX1102: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1102;
+ case GK_GFX1103: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103;
case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE;
}
@@ -285,7 +301,7 @@ bool AMDGPUTargetAsmStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) {
uint32_t Encoded_pad = Encoded_s_code_end;
// Instruction cache line size in bytes.
- const unsigned Log2CacheLineSize = 6;
+ const unsigned Log2CacheLineSize = AMDGPU::isGFX11Plus(STI) ? 7 : 6;
const unsigned CacheLineSize = 1u << Log2CacheLineSize;
// Extra padding amount in bytes to support prefetch mode 3.
@@ -439,6 +455,8 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
PRINT_FIELD(OS, ".amdhsa_forward_progress", KD,
compute_pgm_rsrc1,
amdhsa::COMPUTE_PGM_RSRC1_FWD_PROGRESS);
+ PRINT_FIELD(OS, ".amdhsa_shared_vgpr_count", KD, compute_pgm_rsrc3,
+ amdhsa::COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT);
}
PRINT_FIELD(
OS, ".amdhsa_exception_fp_ieee_invalid_op", KD,
@@ -515,8 +533,8 @@ void AMDGPUTargetELFStreamer::EmitNote(
if (STI.getTargetTriple().getOS() == Triple::AMDHSA)
NoteFlags = ELF::SHF_ALLOC;
- S.PushSection();
- S.SwitchSection(
+ S.pushSection();
+ S.switchSection(
Context.getELFSection(ElfNote::SectionName, ELF::SHT_NOTE, NoteFlags));
S.emitInt32(NameSZ); // namesz
S.emitValue(DescSZ, 4); // descz
@@ -525,7 +543,7 @@ void AMDGPUTargetELFStreamer::EmitNote(
S.emitValueToAlignment(4, 0, 1, 0); // padding 0
EmitDesc(S); // desc
S.emitValueToAlignment(4, 0, 1, 0); // padding 0
- S.PopSection();
+ S.popSection();
}
unsigned AMDGPUTargetELFStreamer::getEFlags() {
@@ -691,7 +709,7 @@ AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISAV2(uint32_t Major,
OS.emitBytes(VendorName);
OS.emitInt8(0); // NULL terminate VendorName
OS.emitBytes(ArchName);
- OS.emitInt8(0); // NULL terminte ArchName
+ OS.emitInt8(0); // NULL terminate ArchName
});
}
@@ -699,9 +717,9 @@ void
AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
MCStreamer &OS = getStreamer();
- OS.PushSection();
+ OS.pushSection();
OS.emitBytes(StringRef((const char*)&Header, sizeof(Header)));
- OS.PopSection();
+ OS.popSection();
}
void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
@@ -806,7 +824,7 @@ bool AMDGPUTargetELFStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) {
uint32_t Encoded_pad = Encoded_s_code_end;
// Instruction cache line size in bytes.
- const unsigned Log2CacheLineSize = 6;
+ const unsigned Log2CacheLineSize = AMDGPU::isGFX11Plus(STI) ? 7 : 6;
const unsigned CacheLineSize = 1u << Log2CacheLineSize;
// Extra padding amount in bytes to support prefetch mode 3.
@@ -818,11 +836,11 @@ bool AMDGPUTargetELFStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) {
}
MCStreamer &OS = getStreamer();
- OS.PushSection();
+ OS.pushSection();
OS.emitValueToAlignment(CacheLineSize, Encoded_pad, 4);
for (unsigned I = 0; I < FillSize; I += 4)
OS.emitInt32(Encoded_pad);
- OS.PopSection();
+ OS.popSection();
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
index 6fe192e95e72..78eb304fe84f 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -20,6 +20,7 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Support/EndianStream.h"
@@ -84,9 +85,8 @@ enum FCInstr {
};
MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx) {
- return new R600MCCodeEmitter(MCII, MRI);
+ return new R600MCCodeEmitter(MCII, *Ctx.getRegisterInfo());
}
void R600MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.h
index fc52cb33824f..605ae851378d 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.h
@@ -24,7 +24,6 @@ class MCInstrInfo;
class MCRegisterInfo;
MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx);
MCInstrInfo *createR600MCInstrInfo();
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
index 77f219aaa3ab..5e67fb5ec876 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -17,10 +17,15 @@
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIDefines.h"
#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/Casting.h"
using namespace llvm;
@@ -34,9 +39,8 @@ class SIMCCodeEmitter : public AMDGPUMCCodeEmitter {
const MCSubtargetInfo &STI) const;
public:
- SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
- MCContext &ctx)
- : AMDGPUMCCodeEmitter(mcii), MRI(mri) {}
+ SIMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
+ : AMDGPUMCCodeEmitter(mcii), MRI(*ctx.getRegisterInfo()) {}
SIMCCodeEmitter(const SIMCCodeEmitter &) = delete;
SIMCCodeEmitter &operator=(const SIMCCodeEmitter &) = delete;
@@ -46,42 +50,45 @@ public:
const MCSubtargetInfo &STI) const override;
/// \returns the encoding for an MCOperand.
- uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const override;
+ void getMachineOpValue(const MCInst &MI, const MCOperand &MO, APInt &Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const override;
/// Use a fixup to encode the simm16 field for SOPP branch
/// instructions.
- unsigned getSOPPBrEncoding(const MCInst &MI, unsigned OpNo,
+ void getSOPPBrEncoding(const MCInst &MI, unsigned OpNo, APInt &Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const override;
+
+ void getSMEMOffsetEncoding(const MCInst &MI, unsigned OpNo, APInt &Op,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
- unsigned getSMEMOffsetEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const override;
+ void getSDWASrcEncoding(const MCInst &MI, unsigned OpNo, APInt &Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const override;
- unsigned getSDWASrcEncoding(const MCInst &MI, unsigned OpNo,
+ void getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo, APInt &Op,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
- unsigned getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const override;
-
- unsigned getAVOperandEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const override;
+ void getAVOperandEncoding(const MCInst &MI, unsigned OpNo, APInt &Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const override;
private:
uint64_t getImplicitOpSelHiEncoding(int Opcode) const;
+ void getMachineOpValueCommon(const MCInst &MI, const MCOperand &MO,
+ unsigned OpNo, APInt &Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
};
} // end anonymous namespace
MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx) {
- return new SIMCCodeEmitter(MCII, MRI, Ctx);
+ return new SIMCCodeEmitter(MCII, Ctx);
}
// Returns the encoding value to use if the given integer is an integer inline
@@ -309,8 +316,9 @@ void SIMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
computeAvailableFeatures(STI.getFeatureBits()));
int Opcode = MI.getOpcode();
- uint64_t Encoding = getBinaryCodeForInstr(MI, Fixups, STI);
- const MCInstrDesc &Desc = MCII.get(Opcode);
+ APInt Encoding, Scratch;
+ getBinaryCodeForInstr(MI, Fixups, Encoding, Scratch, STI);
+ const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
unsigned bytes = Desc.getSize();
// Set unused op_sel_hi bits to 1 for VOP3P and MAI instructions.
@@ -322,7 +330,7 @@ void SIMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
}
for (unsigned i = 0; i < bytes; i++) {
- OS.write((uint8_t) ((Encoding >> (8 * i)) & 0xff));
+ OS.write((uint8_t)Encoding.extractBitsAsZExtValue(8, 8 * i));
}
// NSA encoding.
@@ -335,9 +343,11 @@ void SIMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
unsigned NumExtraAddrs = srsrc - vaddr0 - 1;
unsigned NumPadding = (-NumExtraAddrs) & 3;
- for (unsigned i = 0; i < NumExtraAddrs; ++i)
- OS.write((uint8_t)getMachineOpValue(MI, MI.getOperand(vaddr0 + 1 + i),
- Fixups, STI));
+ for (unsigned i = 0; i < NumExtraAddrs; ++i) {
+ getMachineOpValue(MI, MI.getOperand(vaddr0 + 1 + i), Encoding, Fixups,
+ STI);
+ OS.write((uint8_t)Encoding.getLimitedValue());
+ }
for (unsigned i = 0; i < NumPadding; ++i)
OS.write(0);
}
@@ -385,34 +395,36 @@ void SIMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
}
}
-unsigned SIMCCodeEmitter::getSOPPBrEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
+void SIMCCodeEmitter::getSOPPBrEncoding(const MCInst &MI, unsigned OpNo,
+ APInt &Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpNo);
if (MO.isExpr()) {
const MCExpr *Expr = MO.getExpr();
MCFixupKind Kind = (MCFixupKind)AMDGPU::fixup_si_sopp_br;
Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc()));
- return 0;
+ Op = APInt::getNullValue(96);
+ } else {
+ getMachineOpValue(MI, MO, Op, Fixups, STI);
}
-
- return getMachineOpValue(MI, MO, Fixups, STI);
}
-unsigned SIMCCodeEmitter::getSMEMOffsetEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
+void SIMCCodeEmitter::getSMEMOffsetEncoding(const MCInst &MI, unsigned OpNo,
+ APInt &Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
auto Offset = MI.getOperand(OpNo).getImm();
// VI only supports 20-bit unsigned offsets.
assert(!AMDGPU::isVI(STI) || isUInt<20>(Offset));
- return Offset;
+ Op = Offset;
}
-unsigned
-SIMCCodeEmitter::getSDWASrcEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
+void SIMCCodeEmitter::getSDWASrcEncoding(const MCInst &MI, unsigned OpNo,
+ APInt &Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
using namespace AMDGPU::SDWA;
uint64_t RegEnc = 0;
@@ -426,23 +438,24 @@ SIMCCodeEmitter::getSDWASrcEncoding(const MCInst &MI, unsigned OpNo,
if (AMDGPU::isSGPR(AMDGPU::mc2PseudoReg(Reg), &MRI)) {
RegEnc |= SDWA9EncValues::SRC_SGPR_MASK;
}
- return RegEnc;
+ Op = RegEnc;
+ return;
} else {
const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
uint32_t Enc = getLitEncoding(MO, Desc.OpInfo[OpNo], STI);
if (Enc != ~0U && Enc != 255) {
- return Enc | SDWA9EncValues::SRC_SGPR_MASK;
+ Op = Enc | SDWA9EncValues::SRC_SGPR_MASK;
+ return;
}
}
llvm_unreachable("Unsupported operand kind");
- return 0;
}
-unsigned
-SIMCCodeEmitter::getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
+void SIMCCodeEmitter::getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo,
+ APInt &Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
using namespace AMDGPU::SDWA;
uint64_t RegEnc = 0;
@@ -455,13 +468,13 @@ SIMCCodeEmitter::getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo,
RegEnc &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
RegEnc |= SDWA9EncValues::VOPC_DST_VCC_MASK;
}
- return RegEnc;
+ Op = RegEnc;
}
-unsigned
-SIMCCodeEmitter::getAVOperandEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
+void SIMCCodeEmitter::getAVOperandEncoding(const MCInst &MI, unsigned OpNo,
+ APInt &Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
unsigned Reg = MI.getOperand(OpNo).getReg();
uint64_t Enc = MRI.getEncodingValue(Reg);
@@ -476,10 +489,11 @@ SIMCCodeEmitter::getAVOperandEncoding(const MCInst &MI, unsigned OpNo,
MRI.getRegClass(AMDGPU::AReg_192RegClassID).contains(Reg) ||
MRI.getRegClass(AMDGPU::AReg_224RegClassID).contains(Reg) ||
MRI.getRegClass(AMDGPU::AReg_256RegClassID).contains(Reg) ||
+ MRI.getRegClass(AMDGPU::AReg_512RegClassID).contains(Reg) ||
MRI.getRegClass(AMDGPU::AGPR_LO16RegClassID).contains(Reg))
Enc |= 512;
- return Enc;
+ Op = Enc;
}
static bool needsPCRel(const MCExpr *Expr) {
@@ -505,12 +519,21 @@ static bool needsPCRel(const MCExpr *Expr) {
llvm_unreachable("invalid kind");
}
-uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
- const MCOperand &MO,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- if (MO.isReg())
- return MRI.getEncodingValue(MO.getReg());
+void SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
+ const MCOperand &MO, APInt &Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ if (MO.isReg()){
+ Op = MRI.getEncodingValue(MO.getReg());
+ return;
+ }
+ unsigned OpNo = &MO - MI.begin();
+ getMachineOpValueCommon(MI, MO, OpNo, Op, Fixups, STI);
+}
+
+void SIMCCodeEmitter::getMachineOpValueCommon(
+ const MCInst &MI, const MCOperand &MO, unsigned OpNo, APInt &Op,
+ SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const {
if (MO.isExpr() && MO.getExpr()->getKind() != MCExpr::Constant) {
// FIXME: If this is expression is PCRel or not should not depend on what
@@ -533,28 +556,22 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
uint32_t Offset = Desc.getSize();
assert(Offset == 4 || Offset == 8);
- Fixups.push_back(
- MCFixup::create(Offset, MO.getExpr(), Kind, MI.getLoc()));
- }
-
- // Figure out the operand number, needed for isSrcOperand check
- unsigned OpNo = 0;
- for (unsigned e = MI.getNumOperands(); OpNo < e; ++OpNo) {
- if (&MO == &MI.getOperand(OpNo))
- break;
+ Fixups.push_back(MCFixup::create(Offset, MO.getExpr(), Kind, MI.getLoc()));
}
const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
if (AMDGPU::isSISrcOperand(Desc, OpNo)) {
uint32_t Enc = getLitEncoding(MO, Desc.OpInfo[OpNo], STI);
- if (Enc != ~0U)
- return Enc;
-
- } else if (MO.isImm())
- return MO.getImm();
+ if (Enc != ~0U) {
+ Op = Enc;
+ return;
+ }
+ } else if (MO.isImm()) {
+ Op = MO.getImm();
+ return;
+ }
llvm_unreachable("Encoding of this operand type is not supported yet.");
- return 0;
}
#define ENABLE_INSTR_PREDICATE_VERIFIER
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index cf03fd682143..be1addf35012 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -14,6 +14,8 @@
// - MIMGEncGfx90a: encoding for gfx90a for atomics
// - MIMGEncGfx10Default: gfx10 default (non-NSA) encoding
// - MIMGEncGfx10NSA: gfx10 NSA encoding
+// - MIMGEncGfx11Default: gfx11 default (non-NSA) encoding
+// - MIMGEncGfx11NSA: gfx11 NSA encoding
class MIMGEncoding;
def MIMGEncGfx6 : MIMGEncoding;
@@ -21,6 +23,8 @@ def MIMGEncGfx8 : MIMGEncoding;
def MIMGEncGfx90a : MIMGEncoding;
def MIMGEncGfx10Default : MIMGEncoding;
def MIMGEncGfx10NSA : MIMGEncoding;
+def MIMGEncGfx11Default : MIMGEncoding;
+def MIMGEncGfx11NSA : MIMGEncoding;
def MIMGEncoding : GenericEnum {
let FilterClass = "MIMGEncoding";
@@ -90,11 +94,13 @@ def MIMG {
int NOP = -1;
}
-class mimgopc <int base, int vi = base, int si = base> {
- field bits<8> BASE = base; // Opcode for all but atomics
+class mimgopc <int gfx11, int gfx10m, int vi = gfx10m, int si = gfx10m> {
+ field bits<8> GFX11 = gfx11;
+ field bits<8> GFX10M = gfx10m; // GFX10minus for all but atomics
field bits<8> VI = vi; // VI is only used for atomic instructions
field bits<8> SI = si; // SI is only used for atomic instructions
- bit HAS_BASE = !ne(base, MIMG.NOP);
+ bit HAS_GFX11 = !ne(gfx11, MIMG.NOP);
+ bit HAS_GFX10M = !ne(gfx10m, MIMG.NOP);
bit HAS_VI = !ne(vi, MIMG.NOP);
bit HAS_SI = !ne(si, MIMG.NOP);
}
@@ -207,12 +213,16 @@ class MIMG <dag outs, string dns = "">
MIMGEncoding MIMGEncoding;
bits<8> VDataDwords;
bits<8> VAddrDwords;
+
+ // If NSA is used this counts number of operands VAddrDwords is split into.
+ bits<8> VAddrOperands;
}
def MIMGInfoTable : GenericTable {
let FilterClass = "MIMG";
let CppTypeName = "MIMGInfo";
- let Fields = ["Opcode", "BaseOpcode", "MIMGEncoding", "VDataDwords", "VAddrDwords"];
+ let Fields = ["Opcode", "BaseOpcode", "MIMGEncoding", "VDataDwords",
+ "VAddrDwords", "VAddrOperands"];
string TypeOf_BaseOpcode = "MIMGBaseOpcode";
string TypeOf_MIMGEncoding = "MIMGEncoding";
@@ -227,11 +237,12 @@ def getMIMGInfo : SearchIndex {
// This class used to use !foldl to memoize the AddrAsmNames list.
// It turned out that that was much slower than using !filter.
-class MIMGNSAHelper<int num_addrs> {
+class MIMGNSAHelper<int num_addrs,
+ list<RegisterClass> addr_types=!listsplat(VGPR_32, num_addrs)> {
list<string> AddrAsmNames =
!foreach(i, !filter(i, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11],
!lt(i, num_addrs)), "vaddr" # i);
- dag AddrIns = !dag(ins, !foreach(arg, AddrAsmNames, VGPR_32), AddrAsmNames);
+ dag AddrIns = !dag(ins, addr_types, AddrAsmNames);
string AddrAsm = "[$" # !interleave(AddrAsmNames, ", $") # "]";
int NSA = !if(!le(num_addrs, 1), ?,
@@ -247,6 +258,7 @@ class MIMG_gfx6789<bits<8> op, dag outs, string dns = "">
let AssemblerPredicate = isGFX6GFX7GFX8GFX9NotGFX90A;
let MIMGEncoding = MIMGEncGfx6;
+ let VAddrOperands = 1;
let d16 = !if(BaseOpcode.HasD16, ?, 0);
}
@@ -257,6 +269,7 @@ class MIMG_gfx90a<bits<8> op, dag outs, string dns = "">
let AssemblerPredicate = isGFX90APlus;
let MIMGEncoding = MIMGEncGfx90a;
+ let VAddrOperands = 1;
let d16 = !if(BaseOpcode.HasD16, ?, 0);
}
@@ -264,10 +277,11 @@ class MIMG_gfx90a<bits<8> op, dag outs, string dns = "">
// Base class of all non-NSA gfx10 MIMG instructions.
class MIMG_gfx10<int op, dag outs, string dns = "">
: MIMG<outs, dns>, MIMGe_gfx10<op> {
- let SubtargetPredicate = isGFX10Plus;
- let AssemblerPredicate = isGFX10Plus;
+ let SubtargetPredicate = isGFX10Only;
+ let AssemblerPredicate = isGFX10Only;
let MIMGEncoding = MIMGEncGfx10Default;
+ let VAddrOperands = 1;
let d16 = !if(BaseOpcode.HasD16, ?, 0);
let nsa = 0;
@@ -277,10 +291,11 @@ class MIMG_gfx10<int op, dag outs, string dns = "">
// Note that 1-dword addresses always use non-NSA variants.
class MIMG_nsa_gfx10<int op, dag outs, int num_addrs, string dns="">
: MIMG<outs, dns>, MIMGe_gfx10<op> {
- let SubtargetPredicate = isGFX10Plus;
- let AssemblerPredicate = isGFX10Plus;
+ let SubtargetPredicate = isGFX10Only;
+ let AssemblerPredicate = isGFX10Only;
let MIMGEncoding = MIMGEncGfx10NSA;
+ let VAddrOperands = num_addrs;
MIMGNSAHelper nsah = MIMGNSAHelper<num_addrs>;
dag AddrIns = nsah.AddrIns;
@@ -290,11 +305,45 @@ class MIMG_nsa_gfx10<int op, dag outs, int num_addrs, string dns="">
let nsa = nsah.NSA;
}
+// Base class of all non-NSA gfx11 MIMG instructions.
+class MIMG_gfx11<int op, dag outs, string dns = "">
+ : MIMG<outs, dns>, MIMGe_gfx11<op> {
+ let SubtargetPredicate = isGFX11Plus;
+ let AssemblerPredicate = isGFX11Plus;
+
+ let MIMGEncoding = MIMGEncGfx11Default;
+ let VAddrOperands = 1;
+
+ let d16 = !if(BaseOpcode.HasD16, ?, 0);
+ let nsa = 0;
+}
+
+// Base class for all NSA MIMG instructions.
+// Note that 1-dword addresses always use non-NSA variants.
+class MIMG_nsa_gfx11<int op, dag outs, int num_addrs, string dns="",
+ list<RegisterClass> addr_types=[]>
+ : MIMG<outs, dns>, MIMGe_gfx11<op> {
+ let SubtargetPredicate = isGFX11Plus;
+ let AssemblerPredicate = isGFX11Plus;
+
+ let MIMGEncoding = MIMGEncGfx11NSA;
+ let VAddrOperands = num_addrs;
+
+ MIMGNSAHelper nsah = !if(!empty(addr_types),
+ MIMGNSAHelper<num_addrs>,
+ MIMGNSAHelper<num_addrs, addr_types>);
+ dag AddrIns = nsah.AddrIns;
+ string AddrAsm = nsah.AddrAsm;
+
+ let d16 = !if(BaseOpcode.HasD16, ?, 0);
+ let nsa = nsah.NSA;
+}
+
class MIMG_NoSampler_Helper <mimgopc op, string asm,
RegisterClass dst_rc,
RegisterClass addr_rc,
string dns="">
- : MIMG_gfx6789 <op.BASE, (outs dst_rc:$vdata), dns> {
+ : MIMG_gfx6789 <op.GFX10M, (outs dst_rc:$vdata), dns> {
let InOperandList = !con((ins addr_rc:$vaddr, SReg_256:$srsrc,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
@@ -307,7 +356,7 @@ class MIMG_NoSampler_Helper_gfx90a <mimgopc op, string asm,
RegisterClass dst_rc,
RegisterClass addr_rc,
string dns="">
- : MIMG_gfx90a <op.BASE, (outs getLdStRegisterOperand<dst_rc>.ret:$vdata), dns> {
+ : MIMG_gfx90a <op.GFX10M, (outs getLdStRegisterOperand<dst_rc>.ret:$vdata), dns> {
let InOperandList = !con((ins addr_rc:$vaddr, SReg_256:$srsrc,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, LWE:$lwe, DA:$da),
@@ -319,7 +368,7 @@ class MIMG_NoSampler_Helper_gfx90a <mimgopc op, string asm,
class MIMG_NoSampler_gfx10<mimgopc op, string opcode,
RegisterClass DataRC, RegisterClass AddrRC,
string dns="">
- : MIMG_gfx10<op.BASE, (outs DataRC:$vdata), dns> {
+ : MIMG_gfx10<op.GFX10M, (outs DataRC:$vdata), dns> {
let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
@@ -331,7 +380,32 @@ class MIMG_NoSampler_gfx10<mimgopc op, string opcode,
class MIMG_NoSampler_nsa_gfx10<mimgopc op, string opcode,
RegisterClass DataRC, int num_addrs,
string dns="">
- : MIMG_nsa_gfx10<op.BASE, (outs DataRC:$vdata), num_addrs, dns> {
+ : MIMG_nsa_gfx10<op.GFX10M, (outs DataRC:$vdata), num_addrs, dns> {
+ let InOperandList = !con(AddrIns,
+ (ins SReg_256:$srsrc, DMask:$dmask,
+ Dim:$dim, UNorm:$unorm, CPol:$cpol,
+ R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
+ !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
+ let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe"
+ #!if(BaseOpcode.HasD16, "$d16", "");
+}
+
+class MIMG_NoSampler_gfx11<mimgopc op, string opcode,
+ RegisterClass DataRC, RegisterClass AddrRC,
+ string dns="">
+ : MIMG_gfx11<op.GFX11, (outs DataRC:$vdata), dns> {
+ let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, DMask:$dmask,
+ Dim:$dim, UNorm:$unorm, CPol:$cpol,
+ R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
+ !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
+ let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe"
+ #!if(BaseOpcode.HasD16, "$d16", "");
+}
+
+class MIMG_NoSampler_nsa_gfx11<mimgopc op, string opcode,
+ RegisterClass DataRC, int num_addrs,
+ string dns="">
+ : MIMG_nsa_gfx11<op.GFX11, (outs DataRC:$vdata), num_addrs, dns> {
let InOperandList = !con(AddrIns,
(ins SReg_256:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, CPol:$cpol,
@@ -347,7 +421,7 @@ multiclass MIMG_NoSampler_Src_Helper <mimgopc op, string asm,
bit ExtendedImageInst = 1> {
let ssamp = 0 in {
let VAddrDwords = 1 in {
- if op.HAS_BASE then {
+ if op.HAS_GFX10M then {
def _V1 : MIMG_NoSampler_Helper <op, asm, dst_rc, VGPR_32,
!if(enableDisasm, "AMDGPU", "")>;
if !not(ExtendedImageInst) then
@@ -356,30 +430,42 @@ multiclass MIMG_NoSampler_Src_Helper <mimgopc op, string asm,
def _V1_gfx10 : MIMG_NoSampler_gfx10<op, asm, dst_rc, VGPR_32,
!if(enableDisasm, "AMDGPU", "")>;
}
+ if op.HAS_GFX11 then {
+ def _V1_gfx11 : MIMG_NoSampler_gfx11<op, asm, dst_rc, VGPR_32,
+ !if(enableDisasm, "AMDGPU", "")>;
+ }
}
let VAddrDwords = 2 in {
- if op.HAS_BASE then {
+ if op.HAS_GFX10M then {
def _V2 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_64>;
if !not(ExtendedImageInst) then
def _V2_gfx90a : MIMG_NoSampler_Helper_gfx90a <op, asm, dst_rc, VReg_64>;
def _V2_gfx10 : MIMG_NoSampler_gfx10<op, asm, dst_rc, VReg_64>;
def _V2_nsa_gfx10 : MIMG_NoSampler_nsa_gfx10<op, asm, dst_rc, 2>;
}
+ if op.HAS_GFX11 then {
+ def _V2_gfx11 : MIMG_NoSampler_gfx11<op, asm, dst_rc, VReg_64>;
+ def _V2_nsa_gfx11 : MIMG_NoSampler_nsa_gfx11<op, asm, dst_rc, 2>;
+ }
}
let VAddrDwords = 3 in {
- if op.HAS_BASE then {
+ if op.HAS_GFX10M then {
def _V3 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_96>;
if !not(ExtendedImageInst) then
def _V3_gfx90a : MIMG_NoSampler_Helper_gfx90a <op, asm, dst_rc, VReg_96>;
def _V3_gfx10 : MIMG_NoSampler_gfx10<op, asm, dst_rc, VReg_96>;
def _V3_nsa_gfx10 : MIMG_NoSampler_nsa_gfx10<op, asm, dst_rc, 3>;
}
+ if op.HAS_GFX11 then {
+ def _V3_gfx11 : MIMG_NoSampler_gfx11<op, asm, dst_rc, VReg_96>;
+ def _V3_nsa_gfx11 : MIMG_NoSampler_nsa_gfx11<op, asm, dst_rc, 3>;
+ }
}
let VAddrDwords = 4 in {
- if op.HAS_BASE then {
+ if op.HAS_GFX10M then {
def _V4 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_128>;
if !not(ExtendedImageInst) then
def _V4_gfx90a : MIMG_NoSampler_Helper_gfx90a <op, asm, dst_rc, VReg_128>;
@@ -387,6 +473,11 @@ multiclass MIMG_NoSampler_Src_Helper <mimgopc op, string asm,
def _V4_nsa_gfx10 : MIMG_NoSampler_nsa_gfx10<op, asm, dst_rc, 4,
!if(enableDisasm, "AMDGPU", "")>;
}
+ if op.HAS_GFX11 then {
+ def _V4_gfx11 : MIMG_NoSampler_gfx11<op, asm, dst_rc, VReg_128>;
+ def _V4_nsa_gfx11 : MIMG_NoSampler_nsa_gfx11<op, asm, dst_rc, 4,
+ !if(enableDisasm, "AMDGPU", "")>;
+ }
}
}
}
@@ -420,7 +511,7 @@ class MIMG_Store_Helper <mimgopc op, string asm,
RegisterClass data_rc,
RegisterClass addr_rc,
string dns = "">
- : MIMG_gfx6789<op.BASE, (outs), dns> {
+ : MIMG_gfx6789<op.GFX10M, (outs), dns> {
let InOperandList = !con((ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
@@ -433,7 +524,7 @@ class MIMG_Store_Helper_gfx90a <mimgopc op, string asm,
RegisterClass data_rc,
RegisterClass addr_rc,
string dns = "">
- : MIMG_gfx90a<op.BASE, (outs), dns> {
+ : MIMG_gfx90a<op.GFX10M, (outs), dns> {
let InOperandList = !con((ins getLdStRegisterOperand<data_rc>.ret:$vdata,
addr_rc:$vaddr, SReg_256:$srsrc,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
@@ -446,7 +537,7 @@ class MIMG_Store_Helper_gfx90a <mimgopc op, string asm,
class MIMG_Store_gfx10<mimgopc op, string opcode,
RegisterClass DataRC, RegisterClass AddrRC,
string dns="">
- : MIMG_gfx10<op.BASE, (outs), dns> {
+ : MIMG_gfx10<op.GFX10M, (outs), dns> {
let InOperandList = !con((ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc,
DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
@@ -458,7 +549,33 @@ class MIMG_Store_gfx10<mimgopc op, string opcode,
class MIMG_Store_nsa_gfx10<mimgopc op, string opcode,
RegisterClass DataRC, int num_addrs,
string dns="">
- : MIMG_nsa_gfx10<op.BASE, (outs), num_addrs, dns> {
+ : MIMG_nsa_gfx10<op.GFX10M, (outs), num_addrs, dns> {
+ let InOperandList = !con((ins DataRC:$vdata),
+ AddrIns,
+ (ins SReg_256:$srsrc, DMask:$dmask,
+ Dim:$dim, UNorm:$unorm, CPol:$cpol,
+ R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
+ !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
+ let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe"
+ #!if(BaseOpcode.HasD16, "$d16", "");
+}
+
+class MIMG_Store_gfx11<mimgopc op, string opcode,
+ RegisterClass DataRC, RegisterClass AddrRC,
+ string dns="">
+ : MIMG_gfx11<op.GFX11, (outs), dns> {
+ let InOperandList = !con((ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc,
+ DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol,
+ R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
+ !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
+ let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe"
+ #!if(BaseOpcode.HasD16, "$d16", "");
+}
+
+class MIMG_Store_nsa_gfx11<mimgopc op, string opcode,
+ RegisterClass DataRC, int num_addrs,
+ string dns="">
+ : MIMG_nsa_gfx11<op.GFX11, (outs), num_addrs, dns> {
let InOperandList = !con((ins DataRC:$vdata),
AddrIns,
(ins SReg_256:$srsrc, DMask:$dmask,
@@ -475,39 +592,57 @@ multiclass MIMG_Store_Addr_Helper <mimgopc op, string asm,
let mayLoad = 0, mayStore = 1, hasSideEffects = 0, hasPostISelHook = 0,
DisableWQM = 1, ssamp = 0 in {
let VAddrDwords = 1 in {
- if op.HAS_BASE then {
+ if op.HAS_GFX10M then {
def _V1 : MIMG_Store_Helper <op, asm, data_rc, VGPR_32,
!if(enableDisasm, "AMDGPU", "")>;
+ let hasPostISelHook = 1 in
def _V1_gfx90a : MIMG_Store_Helper_gfx90a <op, asm, data_rc, VGPR_32,
!if(enableDisasm, "GFX90A", "")>;
def _V1_gfx10 : MIMG_Store_gfx10 <op, asm, data_rc, VGPR_32,
!if(enableDisasm, "AMDGPU", "")>;
}
+ if op.HAS_GFX11 then {
+ def _V1_gfx11 : MIMG_Store_gfx11 <op, asm, data_rc, VGPR_32,
+ !if(enableDisasm, "AMDGPU", "")>;
+ }
}
let VAddrDwords = 2 in {
- if op.HAS_BASE then {
+ if op.HAS_GFX10M then {
def _V2 : MIMG_Store_Helper <op, asm, data_rc, VReg_64>;
def _V2_gfx90a : MIMG_Store_Helper_gfx90a <op, asm, data_rc, VReg_64>;
def _V2_gfx10 : MIMG_Store_gfx10 <op, asm, data_rc, VReg_64>;
def _V2_nsa_gfx10 : MIMG_Store_nsa_gfx10 <op, asm, data_rc, 2>;
}
+ if op.HAS_GFX11 then {
+ def _V2_gfx11 : MIMG_Store_gfx11 <op, asm, data_rc, VReg_64>;
+ def _V2_nsa_gfx11 : MIMG_Store_nsa_gfx11 <op, asm, data_rc, 2>;
+ }
}
let VAddrDwords = 3 in {
- if op.HAS_BASE then {
+ if op.HAS_GFX10M then {
def _V3 : MIMG_Store_Helper <op, asm, data_rc, VReg_96>;
def _V3_gfx90a : MIMG_Store_Helper_gfx90a <op, asm, data_rc, VReg_96>;
def _V3_gfx10 : MIMG_Store_gfx10 <op, asm, data_rc, VReg_96>;
def _V3_nsa_gfx10 : MIMG_Store_nsa_gfx10 <op, asm, data_rc, 3>;
}
+ if op.HAS_GFX11 then {
+ def _V3_gfx11 : MIMG_Store_gfx11 <op, asm, data_rc, VReg_96>;
+ def _V3_nsa_gfx11 : MIMG_Store_nsa_gfx11 <op, asm, data_rc, 3>;
+ }
}
let VAddrDwords = 4 in {
- if op.HAS_BASE then {
+ if op.HAS_GFX10M then {
def _V4 : MIMG_Store_Helper <op, asm, data_rc, VReg_128>;
def _V4_gfx90a : MIMG_Store_Helper_gfx90a <op, asm, data_rc, VReg_128>;
def _V4_gfx10 : MIMG_Store_gfx10 <op, asm, data_rc, VReg_128>;
def _V4_nsa_gfx10 : MIMG_Store_nsa_gfx10 <op, asm, data_rc, 4,
!if(enableDisasm, "AMDGPU", "")>;
}
+ if op.HAS_GFX11 then {
+ def _V4_gfx11 : MIMG_Store_gfx11 <op, asm, data_rc, VReg_128>;
+ def _V4_nsa_gfx11 : MIMG_Store_nsa_gfx11 <op, asm, data_rc, 4,
+ !if(enableDisasm, "AMDGPU", "")>;
+ }
}
}
}
@@ -582,7 +717,7 @@ class MIMG_Atomic_gfx90a<mimgopc op, string asm, RegisterClass data_rc,
class MIMG_Atomic_gfx10<mimgopc op, string opcode,
RegisterClass DataRC, RegisterClass AddrRC,
bit enableDisasm = 0>
- : MIMG_gfx10<!cast<int>(op.BASE), (outs DataRC:$vdst),
+ : MIMG_gfx10<!cast<int>(op.GFX10M), (outs DataRC:$vdst),
!if(enableDisasm, "AMDGPU", "")> {
let Constraints = "$vdst = $vdata";
let AsmMatchConverter = "cvtMIMGAtomic";
@@ -596,7 +731,37 @@ class MIMG_Atomic_gfx10<mimgopc op, string opcode,
class MIMG_Atomic_nsa_gfx10<mimgopc op, string opcode,
RegisterClass DataRC, int num_addrs,
bit enableDisasm = 0>
- : MIMG_nsa_gfx10<!cast<int>(op.BASE), (outs DataRC:$vdst), num_addrs,
+ : MIMG_nsa_gfx10<!cast<int>(op.GFX10M), (outs DataRC:$vdst), num_addrs,
+ !if(enableDisasm, "AMDGPU", "")> {
+ let Constraints = "$vdst = $vdata";
+ let AsmMatchConverter = "cvtMIMGAtomic";
+
+ let InOperandList = !con((ins DataRC:$vdata),
+ AddrIns,
+ (ins SReg_256:$srsrc, DMask:$dmask,
+ Dim:$dim, UNorm:$unorm, CPol:$cpol,
+ R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe));
+ let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe";
+}
+
+class MIMG_Atomic_gfx11<mimgopc op, string opcode,
+ RegisterClass DataRC, RegisterClass AddrRC,
+ bit enableDisasm = 0>
+ : MIMG_gfx11<!cast<int>(op.GFX11), (outs DataRC:$vdst),
+ !if(enableDisasm, "AMDGPU", "")> {
+ let Constraints = "$vdst = $vdata";
+ let AsmMatchConverter = "cvtMIMGAtomic";
+
+ let InOperandList = (ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc,
+ DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol,
+ R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe);
+ let AsmString = opcode#" $vdst, $vaddr0, $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe";
+}
+
+class MIMG_Atomic_nsa_gfx11<mimgopc op, string opcode,
+ RegisterClass DataRC, int num_addrs,
+ bit enableDisasm = 0>
+ : MIMG_nsa_gfx11<!cast<int>(op.GFX11), (outs DataRC:$vdst), num_addrs,
!if(enableDisasm, "AMDGPU", "")> {
let Constraints = "$vdst = $vdata";
let AsmMatchConverter = "cvtMIMGAtomic";
@@ -622,11 +787,15 @@ multiclass MIMG_Atomic_Addr_Helper_m <mimgopc op, string asm,
}
if op.HAS_VI then {
def _V1_vi : MIMG_Atomic_vi <op, asm, data_rc, VGPR_32, enableDasm>;
+ let hasPostISelHook = 1 in
def _V1_gfx90a : MIMG_Atomic_gfx90a <op, asm, data_rc, VGPR_32, enableDasm>;
}
- if op.HAS_BASE then {
+ if op.HAS_GFX10M then {
def _V1_gfx10 : MIMG_Atomic_gfx10 <op, asm, data_rc, VGPR_32, enableDasm>;
}
+ if op.HAS_GFX11 then {
+ def _V1_gfx11 : MIMG_Atomic_gfx11 <op, asm, data_rc, VGPR_32, enableDasm>;
+ }
}
let VAddrDwords = 2 in {
if op.HAS_SI then {
@@ -636,10 +805,14 @@ multiclass MIMG_Atomic_Addr_Helper_m <mimgopc op, string asm,
def _V2_vi : MIMG_Atomic_vi <op, asm, data_rc, VReg_64, 0>;
def _V2_gfx90a : MIMG_Atomic_gfx90a <op, asm, data_rc, VReg_64, 0>;
}
- if op.HAS_BASE then {
+ if op.HAS_GFX10M then {
def _V2_gfx10 : MIMG_Atomic_gfx10 <op, asm, data_rc, VReg_64, 0>;
def _V2_nsa_gfx10 : MIMG_Atomic_nsa_gfx10 <op, asm, data_rc, 2, 0>;
}
+ if op.HAS_GFX11 then {
+ def _V2_gfx11 : MIMG_Atomic_gfx11 <op, asm, data_rc, VReg_64, 0>;
+ def _V2_nsa_gfx11 : MIMG_Atomic_nsa_gfx11 <op, asm, data_rc, 2, 0>;
+ }
}
let VAddrDwords = 3 in {
if op.HAS_SI then {
@@ -649,10 +822,14 @@ multiclass MIMG_Atomic_Addr_Helper_m <mimgopc op, string asm,
def _V3_vi : MIMG_Atomic_vi <op, asm, data_rc, VReg_96, 0>;
def _V3_gfx90a : MIMG_Atomic_gfx90a <op, asm, data_rc, VReg_96, 0>;
}
- if op.HAS_BASE then {
+ if op.HAS_GFX10M then {
def _V3_gfx10 : MIMG_Atomic_gfx10 <op, asm, data_rc, VReg_96, 0>;
def _V3_nsa_gfx10 : MIMG_Atomic_nsa_gfx10 <op, asm, data_rc, 3, 0>;
}
+ if op.HAS_GFX11 then {
+ def _V3_gfx11 : MIMG_Atomic_gfx11 <op, asm, data_rc, VReg_96, 0>;
+ def _V3_nsa_gfx11 : MIMG_Atomic_nsa_gfx11 <op, asm, data_rc, 3, 0>;
+ }
}
let VAddrDwords = 4 in {
if op.HAS_SI then {
@@ -662,10 +839,14 @@ multiclass MIMG_Atomic_Addr_Helper_m <mimgopc op, string asm,
def _V4_vi : MIMG_Atomic_vi <op, asm, data_rc, VReg_128, 0>;
def _V4_gfx90a : MIMG_Atomic_gfx90a <op, asm, data_rc, VReg_128, 0>;
}
- if op.HAS_BASE then {
+ if op.HAS_GFX10M then {
def _V4_gfx10 : MIMG_Atomic_gfx10 <op, asm, data_rc, VReg_128, 0>;
def _V4_nsa_gfx10 : MIMG_Atomic_nsa_gfx10 <op, asm, data_rc, 4, enableDasm>;
}
+ if op.HAS_GFX11 then {
+ def _V4_gfx11 : MIMG_Atomic_gfx11 <op, asm, data_rc, VReg_128, 0>;
+ def _V4_nsa_gfx11 : MIMG_Atomic_nsa_gfx11 <op, asm, data_rc, 4, enableDasm>;
+ }
}
}
}
@@ -691,7 +872,7 @@ multiclass MIMG_Atomic <mimgopc op, string asm, bit isCmpSwap = 0, bit isFP = 0>
class MIMG_Sampler_Helper <mimgopc op, string asm, RegisterClass dst_rc,
RegisterClass src_rc, string dns="">
- : MIMG_gfx6789 <op.BASE, (outs dst_rc:$vdata), dns> {
+ : MIMG_gfx6789 <op.GFX10M, (outs dst_rc:$vdata), dns> {
let InOperandList = !con((ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
@@ -702,7 +883,7 @@ class MIMG_Sampler_Helper <mimgopc op, string asm, RegisterClass dst_rc,
class MIMG_Sampler_gfx90a<mimgopc op, string asm, RegisterClass dst_rc,
RegisterClass src_rc, string dns="">
- : MIMG_gfx90a<op.BASE, (outs getLdStRegisterOperand<dst_rc>.ret:$vdata), dns> {
+ : MIMG_gfx90a<op.GFX10M, (outs getLdStRegisterOperand<dst_rc>.ret:$vdata), dns> {
let InOperandList = !con((ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, LWE:$lwe, DA:$da),
@@ -714,7 +895,7 @@ class MIMG_Sampler_gfx90a<mimgopc op, string asm, RegisterClass dst_rc,
class MIMG_Sampler_gfx10<mimgopc op, string opcode,
RegisterClass DataRC, RegisterClass AddrRC,
string dns="">
- : MIMG_gfx10<op.BASE, (outs DataRC:$vdata), dns> {
+ : MIMG_gfx10<op.GFX10M, (outs DataRC:$vdata), dns> {
let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, SReg_128:$ssamp,
DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
@@ -727,7 +908,34 @@ class MIMG_Sampler_gfx10<mimgopc op, string opcode,
class MIMG_Sampler_nsa_gfx10<mimgopc op, string opcode,
RegisterClass DataRC, int num_addrs,
string dns="">
- : MIMG_nsa_gfx10<op.BASE, (outs DataRC:$vdata), num_addrs, dns> {
+ : MIMG_nsa_gfx10<op.GFX10M, (outs DataRC:$vdata), num_addrs, dns> {
+ let InOperandList = !con(AddrIns,
+ (ins SReg_256:$srsrc, SReg_128:$ssamp, DMask:$dmask,
+ Dim:$dim, UNorm:$unorm, CPol:$cpol,
+ R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
+ !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
+ let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc, $ssamp$dmask$dim$unorm"
+ #"$cpol$r128$a16$tfe$lwe"
+ #!if(BaseOpcode.HasD16, "$d16", "");
+}
+
+class MIMG_Sampler_gfx11<mimgopc op, string opcode,
+ RegisterClass DataRC, RegisterClass AddrRC,
+ string dns="">
+ : MIMG_gfx11<op.GFX11, (outs DataRC:$vdata), dns> {
+ let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, SReg_128:$ssamp,
+ DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol,
+ R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
+ !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
+ let AsmString = opcode#" $vdata, $vaddr0, $srsrc, $ssamp$dmask$dim$unorm"
+ #"$cpol$r128$a16$tfe$lwe"
+ #!if(BaseOpcode.HasD16, "$d16", "");
+}
+
+class MIMG_Sampler_nsa_gfx11<mimgopc op, string opcode,
+ RegisterClass DataRC, int num_addrs,
+ string dns="">
+ : MIMG_nsa_gfx11<op.GFX11, (outs DataRC:$vdata), num_addrs, dns> {
let InOperandList = !con(AddrIns,
(ins SReg_256:$srsrc, SReg_128:$ssamp, DMask:$dmask,
Dim:$dim, UNorm:$unorm, CPol:$cpol,
@@ -823,7 +1031,7 @@ multiclass MIMG_Sampler_Src_Helper <mimgopc op, string asm,
bit ExtendedImageInst = 1> {
foreach addr = MIMG_Sampler_AddrSizes<sample>.MachineInstrs in {
let VAddrDwords = addr.NumWords in {
- if op.HAS_BASE then {
+ if op.HAS_GFX10M then {
def _V # addr.NumWords
: MIMG_Sampler_Helper <op, asm, dst_rc, addr.RegClass,
!if(!and(enableDisasm, addr.Disassemble), "AMDGPU", "")>;
@@ -835,16 +1043,26 @@ multiclass MIMG_Sampler_Src_Helper <mimgopc op, string asm,
: MIMG_Sampler_gfx10 <op, asm, dst_rc, addr.RegClass,
!if(!and(enableDisasm, addr.Disassemble), "AMDGPU", "")>;
}
+ if op.HAS_GFX11 then {
+ def _V # addr.NumWords # _gfx11
+ : MIMG_Sampler_gfx11 <op, asm, dst_rc, addr.RegClass,
+ !if(!and(enableDisasm, addr.Disassemble), "AMDGPU", "")>;
+ }
}
}
foreach addr = MIMG_Sampler_AddrSizes<sample>.NSAInstrs in {
let VAddrDwords = addr.NumWords in {
- if op.HAS_BASE then {
+ if op.HAS_GFX10M then {
def _V # addr.NumWords # _nsa_gfx10
: MIMG_Sampler_nsa_gfx10<op, asm, dst_rc, addr.NumWords,
!if(!and(enableDisasm, addr.Disassemble), "AMDGPU", "")>;
}
+ if !and(op.HAS_GFX11, !le(addr.NumWords, 5)) then {
+ def _V # addr.NumWords # _nsa_gfx11
+ : MIMG_Sampler_nsa_gfx11<op, asm, dst_rc, addr.NumWords,
+ !if(!and(enableDisasm, addr.Disassemble), "AMDGPU", "")>;
+ }
}
}
}
@@ -911,10 +1129,17 @@ class MIMG_IntersectRay_Helper<bit Is64, bit A16> {
// when we only need 9, 11 or 12 depending on A16 field and ptr size.
RegisterClass RegClass = MIMGAddrSize<num_addrs, 0>.RegClass;
int VAddrDwords = !srl(RegClass.Size, 5);
+
+ int gfx11_nsa_addrs = !if(A16, 4, 5);
+ RegisterClass node_ptr_type = !if(Is64, VReg_64, VGPR_32);
+ list<RegisterClass> gfx11_addr_types =
+ !if(A16,
+ [node_ptr_type, VGPR_32, VReg_96, VReg_96],
+ [node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96]);
}
class MIMG_IntersectRay_gfx10<mimgopc op, string opcode, RegisterClass AddrRC, bit A16>
- : MIMG_gfx10<op.BASE, (outs VReg_128:$vdata), "AMDGPU"> {
+ : MIMG_gfx10<op.GFX10M, (outs VReg_128:$vdata), "AMDGPU"> {
let InOperandList = !con((ins AddrRC:$vaddr0, SReg_128:$srsrc),
!if(A16, (ins GFX10A16:$a16), (ins)));
@@ -924,7 +1149,27 @@ class MIMG_IntersectRay_gfx10<mimgopc op, string opcode, RegisterClass AddrRC, b
}
class MIMG_IntersectRay_nsa_gfx10<mimgopc op, string opcode, int num_addrs, bit A16>
- : MIMG_nsa_gfx10<op.BASE, (outs VReg_128:$vdata), num_addrs, "AMDGPU"> {
+ : MIMG_nsa_gfx10<op.GFX10M, (outs VReg_128:$vdata), num_addrs, "AMDGPU"> {
+ let InOperandList = !con(nsah.AddrIns,
+ (ins SReg_128:$srsrc),
+ !if(A16, (ins GFX10A16:$a16), (ins)));
+ let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $srsrc"#!if(A16, "$a16", "");
+}
+
+class MIMG_IntersectRay_gfx11<mimgopc op, string opcode, RegisterClass AddrRC, bit A16>
+ : MIMG_gfx11<op.GFX11, (outs VReg_128:$vdata), "AMDGPU"> {
+
+ let InOperandList = !con((ins AddrRC:$vaddr0, SReg_128:$srsrc),
+ !if(A16, (ins GFX10A16:$a16), (ins)));
+ let AsmString = opcode#" $vdata, $vaddr0, $srsrc"#!if(A16, "$a16", "");
+
+ let nsa = 0;
+}
+
+class MIMG_IntersectRay_nsa_gfx11<mimgopc op, string opcode, int num_addrs,
+ bit A16, list<RegisterClass> addr_types>
+ : MIMG_nsa_gfx11<op.GFX11, (outs VReg_128:$vdata), num_addrs, "AMDGPU",
+ addr_types> {
let InOperandList = !con(nsah.AddrIns,
(ins SReg_128:$srsrc),
!if(A16, (ins GFX10A16:$a16), (ins)));
@@ -936,9 +1181,7 @@ multiclass MIMG_IntersectRay<mimgopc op, string opcode, bit Is64, bit A16> {
def "" : MIMGBaseOpcode {
let BVH = 1;
}
- let SubtargetPredicate = HasGFX10_AEncoding,
- AssemblerPredicate = HasGFX10_AEncoding,
- AsmMatchConverter = !if(A16, "cvtIntersectRay", ""),
+ let AsmMatchConverter = !if(A16, "cvtIntersectRay", ""),
dmask = 0xf,
unorm = 1,
d16 = 0,
@@ -955,142 +1198,183 @@ multiclass MIMG_IntersectRay<mimgopc op, string opcode, bit Is64, bit A16> {
def _sa_gfx10 : MIMG_IntersectRay_gfx10<op, opcode, info.RegClass, A16> {
let VAddrDwords = info.VAddrDwords;
}
+ def _sa_gfx11 : MIMG_IntersectRay_gfx11<op, opcode, info.RegClass, A16> {
+ let VAddrDwords = info.VAddrDwords;
+ }
def _nsa_gfx10 : MIMG_IntersectRay_nsa_gfx10<op, opcode, info.num_addrs, A16> {
let VAddrDwords = info.num_addrs;
}
+ def _nsa_gfx11 : MIMG_IntersectRay_nsa_gfx11<op, opcode,
+ info.gfx11_nsa_addrs, A16,
+ info.gfx11_addr_types> {
+ let VAddrDwords = info.num_addrs;
+ }
+ }
+}
+
+multiclass MIMG_MSAA_Load <mimgopc op, string asm> {
+ def "" : MIMGBaseOpcode {
+ let HasD16 = 1;
+ let Gather4 = 1; /* for appropriate dmask handling */
+ let MSAA = 1;
+ }
+
+ let BaseOpcode = !cast<MIMGBaseOpcode>(NAME),
+ Gather4 = 1, hasPostISelHook = 0, mayLoad = 1 in {
+ let VDataDwords = 2 in
+ defm _V2 : MIMG_NoSampler_Src_Helper<op, asm, VReg_64, 0>; /* packed D16 */
+ let VDataDwords = 3 in
+ defm _V3 : MIMG_NoSampler_Src_Helper<op, asm, VReg_96, 0>; /* packed D16 + tfe */
+ let VDataDwords = 4 in
+ defm _V4 : MIMG_NoSampler_Src_Helper<op, asm, VReg_128, 1>;
+ let VDataDwords = 5 in
+ defm _V5 : MIMG_NoSampler_Src_Helper<op, asm, VReg_160, 0>;
}
}
//===----------------------------------------------------------------------===//
// MIMG Instructions
//===----------------------------------------------------------------------===//
-defm IMAGE_LOAD : MIMG_NoSampler <mimgopc<0x00>, "image_load", 1>;
-defm IMAGE_LOAD_MIP : MIMG_NoSampler <mimgopc<0x01>, "image_load_mip", 1, 1>;
-defm IMAGE_LOAD_PCK : MIMG_NoSampler <mimgopc<0x02>, "image_load_pck", 0>;
-defm IMAGE_LOAD_PCK_SGN : MIMG_NoSampler <mimgopc<0x03>, "image_load_pck_sgn", 0>;
-defm IMAGE_LOAD_MIP_PCK : MIMG_NoSampler <mimgopc<0x04>, "image_load_mip_pck", 0, 1>;
-defm IMAGE_LOAD_MIP_PCK_SGN : MIMG_NoSampler <mimgopc<0x05>, "image_load_mip_pck_sgn", 0, 1>;
-defm IMAGE_STORE : MIMG_Store <mimgopc<0x08>, "image_store", 1>;
-defm IMAGE_STORE_MIP : MIMG_Store <mimgopc<0x09>, "image_store_mip", 1, 1>;
-defm IMAGE_STORE_PCK : MIMG_Store <mimgopc<0x0a>, "image_store_pck", 0>;
-defm IMAGE_STORE_MIP_PCK : MIMG_Store <mimgopc<0x0b>, "image_store_mip_pck", 0, 1>;
-
-defm IMAGE_GET_RESINFO : MIMG_NoSampler <mimgopc<0x0e>, "image_get_resinfo", 0, 1, 1>;
-
-defm IMAGE_ATOMIC_SWAP : MIMG_Atomic <mimgopc<0x0f, 0x10, 0x0f>, "image_atomic_swap">;
-defm IMAGE_ATOMIC_CMPSWAP : MIMG_Atomic <mimgopc<0x10, 0x11, 0x10>, "image_atomic_cmpswap", 1>;
-defm IMAGE_ATOMIC_ADD : MIMG_Atomic <mimgopc<0x11, 0x12, 0x11>, "image_atomic_add">;
-defm IMAGE_ATOMIC_SUB : MIMG_Atomic <mimgopc<0x12, 0x13, 0x12>, "image_atomic_sub">;
-defm IMAGE_ATOMIC_RSUB : MIMG_Atomic <mimgopc<MIMG.NOP, MIMG.NOP, 0x13>, "image_atomic_rsub">;
-defm IMAGE_ATOMIC_SMIN : MIMG_Atomic <mimgopc<0x14>, "image_atomic_smin">;
-defm IMAGE_ATOMIC_UMIN : MIMG_Atomic <mimgopc<0x15>, "image_atomic_umin">;
-defm IMAGE_ATOMIC_SMAX : MIMG_Atomic <mimgopc<0x16>, "image_atomic_smax">;
-defm IMAGE_ATOMIC_UMAX : MIMG_Atomic <mimgopc<0x17>, "image_atomic_umax">;
-defm IMAGE_ATOMIC_AND : MIMG_Atomic <mimgopc<0x18>, "image_atomic_and">;
-defm IMAGE_ATOMIC_OR : MIMG_Atomic <mimgopc<0x19>, "image_atomic_or">;
-defm IMAGE_ATOMIC_XOR : MIMG_Atomic <mimgopc<0x1a>, "image_atomic_xor">;
-defm IMAGE_ATOMIC_INC : MIMG_Atomic <mimgopc<0x1b>, "image_atomic_inc">;
-defm IMAGE_ATOMIC_DEC : MIMG_Atomic <mimgopc<0x1c>, "image_atomic_dec">;
-defm IMAGE_ATOMIC_FCMPSWAP : MIMG_Atomic <mimgopc<0x1d, MIMG.NOP>, "image_atomic_fcmpswap", 1, 1>;
-defm IMAGE_ATOMIC_FMIN : MIMG_Atomic <mimgopc<0x1e, MIMG.NOP>, "image_atomic_fmin", 0, 1>;
-defm IMAGE_ATOMIC_FMAX : MIMG_Atomic <mimgopc<0x1f, MIMG.NOP>, "image_atomic_fmax", 0, 1>;
-
-defm IMAGE_SAMPLE : MIMG_Sampler_WQM <mimgopc<0x20>, AMDGPUSample>;
+let OtherPredicates = [HasImageInsts] in {
+
+defm IMAGE_LOAD : MIMG_NoSampler <mimgopc<0x00, 0x00>, "image_load", 1>;
+defm IMAGE_LOAD_MIP : MIMG_NoSampler <mimgopc<0x01, 0x01>, "image_load_mip", 1, 1>;
+defm IMAGE_LOAD_PCK : MIMG_NoSampler <mimgopc<0x02, 0x02>, "image_load_pck", 0>;
+defm IMAGE_LOAD_PCK_SGN : MIMG_NoSampler <mimgopc<0x03, 0x03>, "image_load_pck_sgn", 0>;
+defm IMAGE_LOAD_MIP_PCK : MIMG_NoSampler <mimgopc<0x04, 0x04>, "image_load_mip_pck", 0, 1>;
+defm IMAGE_LOAD_MIP_PCK_SGN : MIMG_NoSampler <mimgopc<0x05, 0x05>, "image_load_mip_pck_sgn", 0, 1>;
+defm IMAGE_STORE : MIMG_Store <mimgopc<0x06, 0x08>, "image_store", 1>;
+defm IMAGE_STORE_MIP : MIMG_Store <mimgopc<0x07, 0x09>, "image_store_mip", 1, 1>;
+defm IMAGE_STORE_PCK : MIMG_Store <mimgopc<0x08, 0x0a>, "image_store_pck", 0>;
+defm IMAGE_STORE_MIP_PCK : MIMG_Store <mimgopc<0x09, 0x0b>, "image_store_mip_pck", 0, 1>;
+
+defm IMAGE_GET_RESINFO : MIMG_NoSampler <mimgopc<0x17, 0x0e>, "image_get_resinfo", 0, 1, 1>;
+
+defm IMAGE_ATOMIC_SWAP : MIMG_Atomic <mimgopc<0x0a, 0x0f, 0x10, 0x0f>, "image_atomic_swap">;
+defm IMAGE_ATOMIC_CMPSWAP : MIMG_Atomic <mimgopc<0x0b, 0x10, 0x11, 0x10>, "image_atomic_cmpswap", 1>;
+defm IMAGE_ATOMIC_ADD : MIMG_Atomic <mimgopc<0x0c, 0x11, 0x12, 0x11>, "image_atomic_add">;
+defm IMAGE_ATOMIC_SUB : MIMG_Atomic <mimgopc<0x0d, 0x12, 0x13, 0x12>, "image_atomic_sub">;
+defm IMAGE_ATOMIC_RSUB : MIMG_Atomic <mimgopc<MIMG.NOP, MIMG.NOP, MIMG.NOP, 0x13>, "image_atomic_rsub">;
+defm IMAGE_ATOMIC_SMIN : MIMG_Atomic <mimgopc<0x0e, 0x14>, "image_atomic_smin">;
+defm IMAGE_ATOMIC_UMIN : MIMG_Atomic <mimgopc<0x0f, 0x15>, "image_atomic_umin">;
+defm IMAGE_ATOMIC_SMAX : MIMG_Atomic <mimgopc<0x10, 0x16>, "image_atomic_smax">;
+defm IMAGE_ATOMIC_UMAX : MIMG_Atomic <mimgopc<0x11, 0x17>, "image_atomic_umax">;
+defm IMAGE_ATOMIC_AND : MIMG_Atomic <mimgopc<0x12, 0x18>, "image_atomic_and">;
+defm IMAGE_ATOMIC_OR : MIMG_Atomic <mimgopc<0x13, 0x19>, "image_atomic_or">;
+defm IMAGE_ATOMIC_XOR : MIMG_Atomic <mimgopc<0x14, 0x1a>, "image_atomic_xor">;
+defm IMAGE_ATOMIC_INC : MIMG_Atomic <mimgopc<0x15, 0x1b>, "image_atomic_inc">;
+defm IMAGE_ATOMIC_DEC : MIMG_Atomic <mimgopc<0x16, 0x1c>, "image_atomic_dec">;
+defm IMAGE_ATOMIC_FCMPSWAP : MIMG_Atomic <mimgopc<MIMG.NOP, 0x1d, MIMG.NOP>, "image_atomic_fcmpswap", 1, 1>;
+defm IMAGE_ATOMIC_FMIN : MIMG_Atomic <mimgopc<MIMG.NOP, 0x1e, MIMG.NOP>, "image_atomic_fmin", 0, 1>;
+defm IMAGE_ATOMIC_FMAX : MIMG_Atomic <mimgopc<MIMG.NOP, 0x1f, MIMG.NOP>, "image_atomic_fmax", 0, 1>;
+
+defm IMAGE_SAMPLE : MIMG_Sampler_WQM <mimgopc<0x1b, 0x20>, AMDGPUSample>;
let OtherPredicates = [HasExtendedImageInsts] in {
-defm IMAGE_SAMPLE_CL : MIMG_Sampler_WQM <mimgopc<0x21>, AMDGPUSample_cl>;
-defm IMAGE_SAMPLE_D : MIMG_Sampler <mimgopc<0x22>, AMDGPUSample_d>;
-defm IMAGE_SAMPLE_D_CL : MIMG_Sampler <mimgopc<0x23>, AMDGPUSample_d_cl>;
-defm IMAGE_SAMPLE_D_G16 : MIMG_Sampler <mimgopc<0xa2>, AMDGPUSample_d, 0, 1>;
-defm IMAGE_SAMPLE_D_CL_G16 : MIMG_Sampler <mimgopc<0xa3>, AMDGPUSample_d_cl, 0, 1>;
-defm IMAGE_SAMPLE_L : MIMG_Sampler <mimgopc<0x24>, AMDGPUSample_l>;
-defm IMAGE_SAMPLE_B : MIMG_Sampler_WQM <mimgopc<0x25>, AMDGPUSample_b>;
-defm IMAGE_SAMPLE_B_CL : MIMG_Sampler_WQM <mimgopc<0x26>, AMDGPUSample_b_cl>;
-defm IMAGE_SAMPLE_LZ : MIMG_Sampler <mimgopc<0x27>, AMDGPUSample_lz>;
-defm IMAGE_SAMPLE_C : MIMG_Sampler_WQM <mimgopc<0x28>, AMDGPUSample_c>;
-defm IMAGE_SAMPLE_C_CL : MIMG_Sampler_WQM <mimgopc<0x29>, AMDGPUSample_c_cl>;
-defm IMAGE_SAMPLE_C_D : MIMG_Sampler <mimgopc<0x2a>, AMDGPUSample_c_d>;
-defm IMAGE_SAMPLE_C_D_CL : MIMG_Sampler <mimgopc<0x2b>, AMDGPUSample_c_d_cl>;
-defm IMAGE_SAMPLE_C_D_G16 : MIMG_Sampler <mimgopc<0xaa>, AMDGPUSample_c_d, 0, 1>;
-defm IMAGE_SAMPLE_C_D_CL_G16 : MIMG_Sampler <mimgopc<0xab>, AMDGPUSample_c_d_cl, 0, 1>;
-defm IMAGE_SAMPLE_C_L : MIMG_Sampler <mimgopc<0x2c>, AMDGPUSample_c_l>;
-defm IMAGE_SAMPLE_C_B : MIMG_Sampler_WQM <mimgopc<0x2d>, AMDGPUSample_c_b>;
-defm IMAGE_SAMPLE_C_B_CL : MIMG_Sampler_WQM <mimgopc<0x2e>, AMDGPUSample_c_b_cl>;
-defm IMAGE_SAMPLE_C_LZ : MIMG_Sampler <mimgopc<0x2f>, AMDGPUSample_c_lz>;
-defm IMAGE_SAMPLE_O : MIMG_Sampler_WQM <mimgopc<0x30>, AMDGPUSample_o>;
-defm IMAGE_SAMPLE_CL_O : MIMG_Sampler_WQM <mimgopc<0x31>, AMDGPUSample_cl_o>;
-defm IMAGE_SAMPLE_D_O : MIMG_Sampler <mimgopc<0x32>, AMDGPUSample_d_o>;
-defm IMAGE_SAMPLE_D_CL_O : MIMG_Sampler <mimgopc<0x33>, AMDGPUSample_d_cl_o>;
-defm IMAGE_SAMPLE_D_O_G16 : MIMG_Sampler <mimgopc<0xb2>, AMDGPUSample_d_o, 0, 1>;
-defm IMAGE_SAMPLE_D_CL_O_G16 : MIMG_Sampler <mimgopc<0xb3>, AMDGPUSample_d_cl_o, 0, 1>;
-defm IMAGE_SAMPLE_L_O : MIMG_Sampler <mimgopc<0x34>, AMDGPUSample_l_o>;
-defm IMAGE_SAMPLE_B_O : MIMG_Sampler_WQM <mimgopc<0x35>, AMDGPUSample_b_o>;
-defm IMAGE_SAMPLE_B_CL_O : MIMG_Sampler_WQM <mimgopc<0x36>, AMDGPUSample_b_cl_o>;
-defm IMAGE_SAMPLE_LZ_O : MIMG_Sampler <mimgopc<0x37>, AMDGPUSample_lz_o>;
-defm IMAGE_SAMPLE_C_O : MIMG_Sampler_WQM <mimgopc<0x38>, AMDGPUSample_c_o>;
-defm IMAGE_SAMPLE_C_CL_O : MIMG_Sampler_WQM <mimgopc<0x39>, AMDGPUSample_c_cl_o>;
-defm IMAGE_SAMPLE_C_D_O : MIMG_Sampler <mimgopc<0x3a>, AMDGPUSample_c_d_o>;
-defm IMAGE_SAMPLE_C_D_CL_O : MIMG_Sampler <mimgopc<0x3b>, AMDGPUSample_c_d_cl_o>;
-defm IMAGE_SAMPLE_C_D_O_G16 : MIMG_Sampler <mimgopc<0xba>, AMDGPUSample_c_d_o, 0, 1>;
-defm IMAGE_SAMPLE_C_D_CL_O_G16 : MIMG_Sampler <mimgopc<0xbb>, AMDGPUSample_c_d_cl_o, 0, 1>;
-defm IMAGE_SAMPLE_C_L_O : MIMG_Sampler <mimgopc<0x3c>, AMDGPUSample_c_l_o>;
-defm IMAGE_SAMPLE_C_B_CL_O : MIMG_Sampler_WQM <mimgopc<0x3e>, AMDGPUSample_c_b_cl_o>;
-defm IMAGE_SAMPLE_C_B_O : MIMG_Sampler_WQM <mimgopc<0x3d>, AMDGPUSample_c_b_o>;
-defm IMAGE_SAMPLE_C_LZ_O : MIMG_Sampler <mimgopc<0x3f>, AMDGPUSample_c_lz_o>;
-defm IMAGE_GATHER4 : MIMG_Gather_WQM <mimgopc<0x40>, AMDGPUSample>;
-defm IMAGE_GATHER4_CL : MIMG_Gather_WQM <mimgopc<0x41>, AMDGPUSample_cl>;
-defm IMAGE_GATHER4_L : MIMG_Gather <mimgopc<0x44>, AMDGPUSample_l>;
-defm IMAGE_GATHER4_B : MIMG_Gather_WQM <mimgopc<0x45>, AMDGPUSample_b>;
-defm IMAGE_GATHER4_B_CL : MIMG_Gather_WQM <mimgopc<0x46>, AMDGPUSample_b_cl>;
-defm IMAGE_GATHER4_LZ : MIMG_Gather <mimgopc<0x47>, AMDGPUSample_lz>;
-defm IMAGE_GATHER4_C : MIMG_Gather_WQM <mimgopc<0x48>, AMDGPUSample_c>;
-defm IMAGE_GATHER4_C_CL : MIMG_Gather_WQM <mimgopc<0x49>, AMDGPUSample_c_cl>;
-defm IMAGE_GATHER4_C_L : MIMG_Gather <mimgopc<0x4c>, AMDGPUSample_c_l>;
-defm IMAGE_GATHER4_C_B : MIMG_Gather_WQM <mimgopc<0x4d>, AMDGPUSample_c_b>;
-defm IMAGE_GATHER4_C_B_CL : MIMG_Gather_WQM <mimgopc<0x4e>, AMDGPUSample_c_b_cl>;
-defm IMAGE_GATHER4_C_LZ : MIMG_Gather <mimgopc<0x4f>, AMDGPUSample_c_lz>;
-defm IMAGE_GATHER4_O : MIMG_Gather_WQM <mimgopc<0x50>, AMDGPUSample_o>;
-defm IMAGE_GATHER4_CL_O : MIMG_Gather_WQM <mimgopc<0x51>, AMDGPUSample_cl_o>;
-defm IMAGE_GATHER4_L_O : MIMG_Gather <mimgopc<0x54>, AMDGPUSample_l_o>;
-defm IMAGE_GATHER4_B_O : MIMG_Gather_WQM <mimgopc<0x55>, AMDGPUSample_b_o>;
-defm IMAGE_GATHER4_B_CL_O : MIMG_Gather <mimgopc<0x56>, AMDGPUSample_b_cl_o>;
-defm IMAGE_GATHER4_LZ_O : MIMG_Gather <mimgopc<0x57>, AMDGPUSample_lz_o>;
-defm IMAGE_GATHER4_C_O : MIMG_Gather_WQM <mimgopc<0x58>, AMDGPUSample_c_o>;
-defm IMAGE_GATHER4_C_CL_O : MIMG_Gather_WQM <mimgopc<0x59>, AMDGPUSample_c_cl_o>;
-defm IMAGE_GATHER4_C_L_O : MIMG_Gather <mimgopc<0x5c>, AMDGPUSample_c_l_o>;
-defm IMAGE_GATHER4_C_B_O : MIMG_Gather_WQM <mimgopc<0x5d>, AMDGPUSample_c_b_o>;
-defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather_WQM <mimgopc<0x5e>, AMDGPUSample_c_b_cl_o>;
-defm IMAGE_GATHER4_C_LZ_O : MIMG_Gather <mimgopc<0x5f>, AMDGPUSample_c_lz_o>;
-//defm IMAGE_GATHER4H : MIMG_Gather_WQM <mimgopc<0x61>, ?>;
-
-defm IMAGE_GET_LOD : MIMG_Sampler <mimgopc<0x60>, AMDGPUSample, 1, 0, 1, "image_get_lod">;
-
-defm IMAGE_SAMPLE_CD : MIMG_Sampler <mimgopc<0x68>, AMDGPUSample_cd>;
-defm IMAGE_SAMPLE_CD_CL : MIMG_Sampler <mimgopc<0x69>, AMDGPUSample_cd_cl>;
-defm IMAGE_SAMPLE_C_CD : MIMG_Sampler <mimgopc<0x6a>, AMDGPUSample_c_cd>;
-defm IMAGE_SAMPLE_C_CD_CL : MIMG_Sampler <mimgopc<0x6b>, AMDGPUSample_c_cd_cl>;
-defm IMAGE_SAMPLE_CD_O : MIMG_Sampler <mimgopc<0x6c>, AMDGPUSample_cd_o>;
-defm IMAGE_SAMPLE_CD_CL_O : MIMG_Sampler <mimgopc<0x6d>, AMDGPUSample_cd_cl_o>;
-defm IMAGE_SAMPLE_C_CD_O : MIMG_Sampler <mimgopc<0x6e>, AMDGPUSample_c_cd_o>;
-defm IMAGE_SAMPLE_C_CD_CL_O : MIMG_Sampler <mimgopc<0x6f>, AMDGPUSample_c_cd_cl_o>;
-defm IMAGE_SAMPLE_CD_G16 : MIMG_Sampler <mimgopc<0xe8>, AMDGPUSample_cd, 0, 1>;
-defm IMAGE_SAMPLE_CD_CL_G16 : MIMG_Sampler <mimgopc<0xe9>, AMDGPUSample_cd_cl, 0, 1>;
-defm IMAGE_SAMPLE_C_CD_G16 : MIMG_Sampler <mimgopc<0xea>, AMDGPUSample_c_cd, 0, 1>;
-defm IMAGE_SAMPLE_C_CD_CL_G16 : MIMG_Sampler <mimgopc<0xeb>, AMDGPUSample_c_cd_cl, 0, 1>;
-defm IMAGE_SAMPLE_CD_O_G16 : MIMG_Sampler <mimgopc<0xec>, AMDGPUSample_cd_o, 0, 1>;
-defm IMAGE_SAMPLE_CD_CL_O_G16 : MIMG_Sampler <mimgopc<0xed>, AMDGPUSample_cd_cl_o, 0, 1>;
-defm IMAGE_SAMPLE_C_CD_O_G16 : MIMG_Sampler <mimgopc<0xee>, AMDGPUSample_c_cd_o, 0, 1>;
-defm IMAGE_SAMPLE_C_CD_CL_O_G16 : MIMG_Sampler <mimgopc<0xef>, AMDGPUSample_c_cd_cl_o, 0, 1>;
+defm IMAGE_SAMPLE_CL : MIMG_Sampler_WQM <mimgopc<0x40, 0x21>, AMDGPUSample_cl>;
+defm IMAGE_SAMPLE_D : MIMG_Sampler <mimgopc<0x1c, 0x22>, AMDGPUSample_d>;
+defm IMAGE_SAMPLE_D_CL : MIMG_Sampler <mimgopc<0x41, 0x23>, AMDGPUSample_d_cl>;
+defm IMAGE_SAMPLE_L : MIMG_Sampler <mimgopc<0x1d, 0x24>, AMDGPUSample_l>;
+defm IMAGE_SAMPLE_B : MIMG_Sampler_WQM <mimgopc<0x1e, 0x25>, AMDGPUSample_b>;
+defm IMAGE_SAMPLE_B_CL : MIMG_Sampler_WQM <mimgopc<0x42, 0x26>, AMDGPUSample_b_cl>;
+defm IMAGE_SAMPLE_LZ : MIMG_Sampler <mimgopc<0x1f, 0x27>, AMDGPUSample_lz>;
+defm IMAGE_SAMPLE_C : MIMG_Sampler_WQM <mimgopc<0x20, 0x28>, AMDGPUSample_c>;
+defm IMAGE_SAMPLE_C_CL : MIMG_Sampler_WQM <mimgopc<0x43, 0x29>, AMDGPUSample_c_cl>;
+defm IMAGE_SAMPLE_C_D : MIMG_Sampler <mimgopc<0x21, 0x2a>, AMDGPUSample_c_d>;
+defm IMAGE_SAMPLE_C_D_CL : MIMG_Sampler <mimgopc<0x44, 0x2b>, AMDGPUSample_c_d_cl>;
+defm IMAGE_SAMPLE_C_L : MIMG_Sampler <mimgopc<0x22, 0x2c>, AMDGPUSample_c_l>;
+defm IMAGE_SAMPLE_C_B : MIMG_Sampler_WQM <mimgopc<0x23, 0x2d>, AMDGPUSample_c_b>;
+defm IMAGE_SAMPLE_C_B_CL : MIMG_Sampler_WQM <mimgopc<0x45, 0x2e>, AMDGPUSample_c_b_cl>;
+defm IMAGE_SAMPLE_C_LZ : MIMG_Sampler <mimgopc<0x24, 0x2f>, AMDGPUSample_c_lz>;
+defm IMAGE_SAMPLE_O : MIMG_Sampler_WQM <mimgopc<0x25, 0x30>, AMDGPUSample_o>;
+defm IMAGE_SAMPLE_CL_O : MIMG_Sampler_WQM <mimgopc<0x46, 0x31>, AMDGPUSample_cl_o>;
+defm IMAGE_SAMPLE_D_O : MIMG_Sampler <mimgopc<0x26, 0x32>, AMDGPUSample_d_o>;
+defm IMAGE_SAMPLE_D_CL_O : MIMG_Sampler <mimgopc<0x47, 0x33>, AMDGPUSample_d_cl_o>;
+defm IMAGE_SAMPLE_L_O : MIMG_Sampler <mimgopc<0x27, 0x34>, AMDGPUSample_l_o>;
+defm IMAGE_SAMPLE_B_O : MIMG_Sampler_WQM <mimgopc<0x28, 0x35>, AMDGPUSample_b_o>;
+defm IMAGE_SAMPLE_B_CL_O : MIMG_Sampler_WQM <mimgopc<0x48, 0x36>, AMDGPUSample_b_cl_o>;
+defm IMAGE_SAMPLE_LZ_O : MIMG_Sampler <mimgopc<0x29, 0x37>, AMDGPUSample_lz_o>;
+defm IMAGE_SAMPLE_C_O : MIMG_Sampler_WQM <mimgopc<0x2a, 0x38>, AMDGPUSample_c_o>;
+defm IMAGE_SAMPLE_C_CL_O : MIMG_Sampler_WQM <mimgopc<0x49, 0x39>, AMDGPUSample_c_cl_o>;
+defm IMAGE_SAMPLE_C_D_O : MIMG_Sampler <mimgopc<0x2b, 0x3a>, AMDGPUSample_c_d_o>;
+defm IMAGE_SAMPLE_C_D_CL_O : MIMG_Sampler <mimgopc<0x4a, 0x3b>, AMDGPUSample_c_d_cl_o>;
+defm IMAGE_SAMPLE_C_L_O : MIMG_Sampler <mimgopc<0x2c, 0x3c>, AMDGPUSample_c_l_o>;
+defm IMAGE_SAMPLE_C_B_CL_O : MIMG_Sampler_WQM <mimgopc<0x4b, 0x3e>, AMDGPUSample_c_b_cl_o>;
+defm IMAGE_SAMPLE_C_B_O : MIMG_Sampler_WQM <mimgopc<0x2d, 0x3d>, AMDGPUSample_c_b_o>;
+defm IMAGE_SAMPLE_C_LZ_O : MIMG_Sampler <mimgopc<0x2e, 0x3f>, AMDGPUSample_c_lz_o>;
+defm IMAGE_GATHER4 : MIMG_Gather_WQM <mimgopc<0x2f, 0x40>, AMDGPUSample>;
+defm IMAGE_GATHER4_CL : MIMG_Gather_WQM <mimgopc<0x60, 0x41>, AMDGPUSample_cl>;
+defm IMAGE_GATHER4_L : MIMG_Gather <mimgopc<0x30, 0x44>, AMDGPUSample_l>;
+defm IMAGE_GATHER4_B : MIMG_Gather_WQM <mimgopc<0x31, 0x45>, AMDGPUSample_b>;
+defm IMAGE_GATHER4_B_CL : MIMG_Gather_WQM <mimgopc<0x61, 0x46>, AMDGPUSample_b_cl>;
+defm IMAGE_GATHER4_LZ : MIMG_Gather <mimgopc<0x32, 0x47>, AMDGPUSample_lz>;
+defm IMAGE_GATHER4_C : MIMG_Gather_WQM <mimgopc<0x33, 0x48>, AMDGPUSample_c>;
+defm IMAGE_GATHER4_C_CL : MIMG_Gather_WQM <mimgopc<0x62, 0x49>, AMDGPUSample_c_cl>;
+defm IMAGE_GATHER4_C_L : MIMG_Gather <mimgopc<0x63, 0x4c>, AMDGPUSample_c_l>;
+defm IMAGE_GATHER4_C_B : MIMG_Gather_WQM <mimgopc<0x64, 0x4d>, AMDGPUSample_c_b>;
+defm IMAGE_GATHER4_C_B_CL : MIMG_Gather_WQM <mimgopc<0x65, 0x4e>, AMDGPUSample_c_b_cl>;
+defm IMAGE_GATHER4_C_LZ : MIMG_Gather <mimgopc<0x34, 0x4f>, AMDGPUSample_c_lz>;
+defm IMAGE_GATHER4_O : MIMG_Gather_WQM <mimgopc<0x35, 0x50>, AMDGPUSample_o>;
+defm IMAGE_GATHER4_CL_O : MIMG_Gather_WQM <mimgopc<MIMG.NOP, 0x51>, AMDGPUSample_cl_o>;
+defm IMAGE_GATHER4_L_O : MIMG_Gather <mimgopc<MIMG.NOP, 0x54>, AMDGPUSample_l_o>;
+defm IMAGE_GATHER4_B_O : MIMG_Gather_WQM <mimgopc<MIMG.NOP, 0x55>, AMDGPUSample_b_o>;
+defm IMAGE_GATHER4_B_CL_O : MIMG_Gather <mimgopc<MIMG.NOP, 0x56>, AMDGPUSample_b_cl_o>;
+defm IMAGE_GATHER4_LZ_O : MIMG_Gather <mimgopc<0x36, 0x57>, AMDGPUSample_lz_o>;
+defm IMAGE_GATHER4_C_O : MIMG_Gather_WQM <mimgopc<MIMG.NOP, 0x58>, AMDGPUSample_c_o>;
+defm IMAGE_GATHER4_C_CL_O : MIMG_Gather_WQM <mimgopc<MIMG.NOP, 0x59>, AMDGPUSample_c_cl_o>;
+defm IMAGE_GATHER4_C_L_O : MIMG_Gather <mimgopc<MIMG.NOP, 0x5c>, AMDGPUSample_c_l_o>;
+defm IMAGE_GATHER4_C_B_O : MIMG_Gather_WQM <mimgopc<MIMG.NOP, 0x5d>, AMDGPUSample_c_b_o>;
+defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather_WQM <mimgopc<MIMG.NOP, 0x5e>, AMDGPUSample_c_b_cl_o>;
+defm IMAGE_GATHER4_C_LZ_O : MIMG_Gather <mimgopc<0x37, 0x5f>, AMDGPUSample_c_lz_o>;
+//defm IMAGE_GATHER4H : MIMG_Gather_WQM <mimgopc<0x90, 0x61>, ?>;
+
+defm IMAGE_GET_LOD : MIMG_Sampler <mimgopc<0x38, 0x60>, AMDGPUSample, 1, 0, 1, "image_get_lod">;
+
+defm IMAGE_SAMPLE_CD : MIMG_Sampler <mimgopc<MIMG.NOP, 0x68>, AMDGPUSample_cd>;
+defm IMAGE_SAMPLE_CD_CL : MIMG_Sampler <mimgopc<MIMG.NOP, 0x69>, AMDGPUSample_cd_cl>;
+defm IMAGE_SAMPLE_C_CD : MIMG_Sampler <mimgopc<MIMG.NOP, 0x6a>, AMDGPUSample_c_cd>;
+defm IMAGE_SAMPLE_C_CD_CL : MIMG_Sampler <mimgopc<MIMG.NOP, 0x6b>, AMDGPUSample_c_cd_cl>;
+defm IMAGE_SAMPLE_CD_O : MIMG_Sampler <mimgopc<MIMG.NOP, 0x6c>, AMDGPUSample_cd_o>;
+defm IMAGE_SAMPLE_CD_CL_O : MIMG_Sampler <mimgopc<MIMG.NOP, 0x6d>, AMDGPUSample_cd_cl_o>;
+defm IMAGE_SAMPLE_C_CD_O : MIMG_Sampler <mimgopc<MIMG.NOP, 0x6e>, AMDGPUSample_c_cd_o>;
+defm IMAGE_SAMPLE_C_CD_CL_O : MIMG_Sampler <mimgopc<MIMG.NOP, 0x6f>, AMDGPUSample_c_cd_cl_o>;
} // End OtherPredicates = [HasExtendedImageInsts]
-//def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"image_rsrc256", 0x0000007e>;
-//def IMAGE_SAMPLER : MIMG_NoPattern_ <"image_sampler", 0x0000007f>;
-
-let SubtargetPredicate = HasGFX10_AEncoding in
-defm IMAGE_MSAA_LOAD_X : MIMG_NoSampler <mimgopc<0x80>, "image_msaa_load", 1, 0, 0, 1>;
-defm IMAGE_BVH_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0xe6>, "image_bvh_intersect_ray", 0, 0>;
-defm IMAGE_BVH_INTERSECT_RAY_a16 : MIMG_IntersectRay<mimgopc<0xe6>, "image_bvh_intersect_ray", 0, 1>;
-defm IMAGE_BVH64_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0xe7>, "image_bvh64_intersect_ray", 1, 0>;
-defm IMAGE_BVH64_INTERSECT_RAY_a16 : MIMG_IntersectRay<mimgopc<0xe7>, "image_bvh64_intersect_ray", 1, 1>;
+let OtherPredicates = [HasExtendedImageInsts,HasG16] in {
+defm IMAGE_SAMPLE_D_G16 : MIMG_Sampler <mimgopc<0x39, 0xa2>, AMDGPUSample_d, 0, 1>;
+defm IMAGE_SAMPLE_D_CL_G16 : MIMG_Sampler <mimgopc<0x5f, 0xa3>, AMDGPUSample_d_cl, 0, 1>;
+defm IMAGE_SAMPLE_C_D_G16 : MIMG_Sampler <mimgopc<0x3a, 0xaa>, AMDGPUSample_c_d, 0, 1>;
+defm IMAGE_SAMPLE_C_D_CL_G16 : MIMG_Sampler <mimgopc<0x54, 0xab>, AMDGPUSample_c_d_cl, 0, 1>;
+defm IMAGE_SAMPLE_D_O_G16 : MIMG_Sampler <mimgopc<0x3b, 0xb2>, AMDGPUSample_d_o, 0, 1>;
+defm IMAGE_SAMPLE_D_CL_O_G16 : MIMG_Sampler <mimgopc<0x55, 0xb3>, AMDGPUSample_d_cl_o, 0, 1>;
+defm IMAGE_SAMPLE_C_D_O_G16 : MIMG_Sampler <mimgopc<0x3c, 0xba>, AMDGPUSample_c_d_o, 0, 1>;
+defm IMAGE_SAMPLE_C_D_CL_O_G16 : MIMG_Sampler <mimgopc<0x56, 0xbb>, AMDGPUSample_c_d_cl_o, 0, 1>;
+defm IMAGE_SAMPLE_CD_G16 : MIMG_Sampler <mimgopc<MIMG.NOP, 0xe8>, AMDGPUSample_cd, 0, 1>;
+defm IMAGE_SAMPLE_CD_CL_G16 : MIMG_Sampler <mimgopc<MIMG.NOP, 0xe9>, AMDGPUSample_cd_cl, 0, 1>;
+defm IMAGE_SAMPLE_C_CD_G16 : MIMG_Sampler <mimgopc<MIMG.NOP, 0xea>, AMDGPUSample_c_cd, 0, 1>;
+defm IMAGE_SAMPLE_C_CD_CL_G16 : MIMG_Sampler <mimgopc<MIMG.NOP, 0xeb>, AMDGPUSample_c_cd_cl, 0, 1>;
+defm IMAGE_SAMPLE_CD_O_G16 : MIMG_Sampler <mimgopc<MIMG.NOP, 0xec>, AMDGPUSample_cd_o, 0, 1>;
+defm IMAGE_SAMPLE_CD_CL_O_G16 : MIMG_Sampler <mimgopc<MIMG.NOP, 0xed>, AMDGPUSample_cd_cl_o, 0, 1>;
+defm IMAGE_SAMPLE_C_CD_O_G16 : MIMG_Sampler <mimgopc<MIMG.NOP, 0xee>, AMDGPUSample_c_cd_o, 0, 1>;
+defm IMAGE_SAMPLE_C_CD_CL_O_G16 : MIMG_Sampler <mimgopc<MIMG.NOP, 0xef>, AMDGPUSample_c_cd_cl_o, 0, 1>;
+} // End OtherPredicates = [HasExtendedImageInsts,HasG16]
+
+//def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"image_rsrc256", mimgopc<0x7e>>;
+//def IMAGE_SAMPLER : MIMG_NoPattern_ <"image_sampler", mimgopc<0x7f>>;
+
+let SubtargetPredicate = isGFX10Only, OtherPredicates = [HasGFX10_AEncoding] in
+defm IMAGE_MSAA_LOAD_X : MIMG_NoSampler <mimgopc<MIMG.NOP, 0x80>, "image_msaa_load", 1, 0, 0, 1>;
+
+let OtherPredicates = [HasGFX10_AEncoding] in
+defm IMAGE_MSAA_LOAD : MIMG_MSAA_Load <mimgopc<0x18, MIMG.NOP>, "image_msaa_load">;
+
+let OtherPredicates = [HasGFX10_AEncoding] in {
+defm IMAGE_BVH_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0x19, 0xe6>, "image_bvh_intersect_ray", 0, 0>;
+defm IMAGE_BVH_INTERSECT_RAY_a16 : MIMG_IntersectRay<mimgopc<0x19, 0xe6>, "image_bvh_intersect_ray", 0, 1>;
+defm IMAGE_BVH64_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0x1a, 0xe7>, "image_bvh64_intersect_ray", 1, 0>;
+defm IMAGE_BVH64_INTERSECT_RAY_a16 : MIMG_IntersectRay<mimgopc<0x1a, 0xe7>, "image_bvh64_intersect_ray", 1, 1>;
+} // End OtherPredicates = [HasGFX10_AEncoding]
+
+} // End let OtherPredicates = [HasImageInsts]
/********** ========================================= **********/
/********** Table of dimension-aware image intrinsics **********/
diff --git a/llvm/lib/Target/AMDGPU/R600.h b/llvm/lib/Target/AMDGPU/R600.h
index 2b483ae63da9..5dfbf8f1ef95 100644
--- a/llvm/lib/Target/AMDGPU/R600.h
+++ b/llvm/lib/Target/AMDGPU/R600.h
@@ -26,7 +26,7 @@ FunctionPass *createR600EmitClauseMarkers();
FunctionPass *createR600ClauseMergePass();
FunctionPass *createR600Packetizer();
FunctionPass *createR600ControlFlowFinalizer();
-FunctionPass *createAMDGPUCFGStructurizerPass();
+FunctionPass *createR600MachineCFGStructurizerPass();
FunctionPass *createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel);
ModulePass *createR600OpenCLImageTypeLoweringPass();
diff --git a/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp b/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp
index c19e3c41485e..afcb6b4d65f8 100644
--- a/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp
@@ -111,7 +111,7 @@ bool R600AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
MCContext &Context = getObjFileLowering().getContext();
MCSectionELF *ConfigSection =
Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0);
- OutStreamer->SwitchSection(ConfigSection);
+ OutStreamer->switchSection(ConfigSection);
EmitProgramInfoR600(MF);
@@ -120,7 +120,7 @@ bool R600AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
if (isVerbose()) {
MCSectionELF *CommentSection =
Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0);
- OutStreamer->SwitchSection(CommentSection);
+ OutStreamer->switchSection(CommentSection);
R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
OutStreamer->emitRawComment(
diff --git a/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp b/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp
index 715fd69fc7ae..2b85df8ac6cf 100644
--- a/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
//
/// \file
-/// R600EmitClauseMarker pass emits CFAlu instruction in a conservative maneer.
+/// R600EmitClauseMarker pass emits CFAlu instruction in a conservative manner.
/// This pass is merging consecutive CFAlus where applicable.
/// It needs to be called after IfCvt for best results.
//===----------------------------------------------------------------------===//
@@ -15,6 +15,7 @@
#include "MCTargetDesc/R600MCTargetDesc.h"
#include "R600.h"
#include "R600Subtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
using namespace llvm;
diff --git a/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
index 8a48a67b829c..4bf38a3c6ceb 100644
--- a/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
@@ -16,6 +16,7 @@
#include "R600.h"
#include "R600MachineFunctionInfo.h"
#include "R600Subtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include <set>
using namespace llvm;
diff --git a/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp b/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
index b9ca7f928d56..ef67e5c937dc 100644
--- a/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
+++ b/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
@@ -17,6 +17,7 @@
#include "R600.h"
#include "R600Defines.h"
#include "R600Subtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
using namespace llvm;
@@ -327,9 +328,9 @@ char R600EmitClauseMarkers::ID = 0;
} // end anonymous namespace
INITIALIZE_PASS_BEGIN(R600EmitClauseMarkers, "emitclausemarkers",
- "R600 Emit Clause Markters", false, false)
+ "R600 Emit Clause Markers", false, false)
INITIALIZE_PASS_END(R600EmitClauseMarkers, "emitclausemarkers",
- "R600 Emit Clause Markters", false, false)
+ "R600 Emit Clause Markers", false, false)
FunctionPass *llvm::createR600EmitClauseMarkers() {
return new R600EmitClauseMarkers();
diff --git a/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp b/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
index 194879fef53c..ef2d049f9175 100644
--- a/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
@@ -17,6 +17,8 @@
#include "R600.h"
#include "R600Defines.h"
#include "R600Subtarget.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
using namespace llvm;
diff --git a/llvm/lib/Target/AMDGPU/R600FrameLowering.cpp b/llvm/lib/Target/AMDGPU/R600FrameLowering.cpp
index abd4086db62c..fd8cecab90da 100644
--- a/llvm/lib/Target/AMDGPU/R600FrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/R600FrameLowering.cpp
@@ -8,6 +8,7 @@
#include "R600FrameLowering.h"
#include "R600Subtarget.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
using namespace llvm;
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index bd757e9e3d70..bf52f7830ad7 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -42,39 +42,26 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
computeRegisterProperties(Subtarget->getRegisterInfo());
// Legalize loads and stores to the private address space.
- setOperationAction(ISD::LOAD, MVT::i32, Custom);
- setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
- setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
+ setOperationAction(ISD::LOAD, {MVT::i32, MVT::v2i32, MVT::v4i32}, Custom);
// EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
// spaces, so it is custom lowered to handle those where it isn't.
- for (MVT VT : MVT::integer_valuetypes()) {
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom);
-
- setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
- setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom);
- setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom);
-
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom);
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom);
- }
+ for (auto Op : {ISD::SEXTLOAD, ISD::ZEXTLOAD, ISD::EXTLOAD})
+ for (MVT VT : MVT::integer_valuetypes()) {
+ setLoadExtAction(Op, VT, MVT::i1, Promote);
+ setLoadExtAction(Op, VT, MVT::i8, Custom);
+ setLoadExtAction(Op, VT, MVT::i16, Custom);
+ }
// Workaround for LegalizeDAG asserting on expansion of i1 vector loads.
- setLoadExtAction(ISD::EXTLOAD, MVT::v2i32, MVT::v2i1, Expand);
- setLoadExtAction(ISD::SEXTLOAD, MVT::v2i32, MVT::v2i1, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i32, MVT::v2i1, Expand);
+ setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::v2i32,
+ MVT::v2i1, Expand);
- setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i1, Expand);
- setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i1, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i1, Expand);
+ setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::v4i32,
+ MVT::v4i1, Expand);
- setOperationAction(ISD::STORE, MVT::i8, Custom);
- setOperationAction(ISD::STORE, MVT::i32, Custom);
- setOperationAction(ISD::STORE, MVT::v2i32, Custom);
- setOperationAction(ISD::STORE, MVT::v4i32, Custom);
+ setOperationAction(ISD::STORE, {MVT::i8, MVT::i32, MVT::v2i32, MVT::v4i32},
+ Custom);
setTruncStoreAction(MVT::i32, MVT::i8, Custom);
setTruncStoreAction(MVT::i32, MVT::i16, Custom);
@@ -96,55 +83,34 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
setTruncStoreAction(MVT::v4i32, MVT::v4i1, Expand);
// Set condition code actions
- setCondCodeAction(ISD::SETO, MVT::f32, Expand);
- setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
- setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
- setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
- setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
- setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
- setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
- setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
- setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
- setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
- setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
- setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
-
- setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
- setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
- setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
- setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
-
- setOperationAction(ISD::FCOS, MVT::f32, Custom);
- setOperationAction(ISD::FSIN, MVT::f32, Custom);
-
- setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
- setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
-
- setOperationAction(ISD::BR_CC, MVT::i32, Expand);
- setOperationAction(ISD::BR_CC, MVT::f32, Expand);
+ setCondCodeAction({ISD::SETO, ISD::SETUO, ISD::SETLT, ISD::SETLE, ISD::SETOLT,
+ ISD::SETOLE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGE,
+ ISD::SETUGT, ISD::SETULT, ISD::SETULE},
+ MVT::f32, Expand);
+
+ setCondCodeAction({ISD::SETLE, ISD::SETLT, ISD::SETULE, ISD::SETULT},
+ MVT::i32, Expand);
+
+ setOperationAction({ISD::FCOS, ISD::FSIN}, MVT::f32, Custom);
+
+ setOperationAction(ISD::SETCC, {MVT::v4i32, MVT::v2i32}, Expand);
+
+ setOperationAction(ISD::BR_CC, {MVT::i32, MVT::f32}, Expand);
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
setOperationAction(ISD::FSUB, MVT::f32, Expand);
- setOperationAction(ISD::FCEIL, MVT::f64, Custom);
- setOperationAction(ISD::FTRUNC, MVT::f64, Custom);
- setOperationAction(ISD::FRINT, MVT::f64, Custom);
- setOperationAction(ISD::FFLOOR, MVT::f64, Custom);
+ setOperationAction({ISD::FCEIL, ISD::FTRUNC, ISD::FRINT, ISD::FFLOOR},
+ MVT::f64, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT_CC, {MVT::f32, MVT::i32}, Custom);
- setOperationAction(ISD::SETCC, MVT::i32, Expand);
- setOperationAction(ISD::SETCC, MVT::f32, Expand);
- setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
- setOperationAction(ISD::FP_TO_SINT, MVT::i1, Custom);
- setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
+ setOperationAction(ISD::SETCC, {MVT::i32, MVT::f32}, Expand);
+ setOperationAction({ISD::FP_TO_UINT, ISD::FP_TO_SINT}, {MVT::i1, MVT::i64},
+ Custom);
- setOperationAction(ISD::SELECT, MVT::i32, Expand);
- setOperationAction(ISD::SELECT, MVT::f32, Expand);
- setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
- setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
+ setOperationAction(ISD::SELECT, {MVT::i32, MVT::f32, MVT::v2i32, MVT::v4i32},
+ Expand);
// ADD, SUB overflow.
// TODO: turn these into Legal?
@@ -158,56 +124,43 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
if (!Subtarget->hasBFE())
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i1, MVT::v4i1}, Expand);
if (!Subtarget->hasBFE())
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i8, MVT::v4i8}, Expand);
if (!Subtarget->hasBFE())
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i16, MVT::v4i16}, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i32, MVT::v4i32}, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT,
+ {MVT::v2i32, MVT::v2f32, MVT::v4i32, MVT::v4f32}, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT,
+ {MVT::v2i32, MVT::v2f32, MVT::v4i32, MVT::v4f32}, Custom);
// We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
// to be Legal/Custom in order to avoid library calls.
- setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
- setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
- setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
+ setOperationAction({ISD::SHL_PARTS, ISD::SRL_PARTS, ISD::SRA_PARTS}, MVT::i32,
+ Custom);
- if (!Subtarget->hasFMA()) {
- setOperationAction(ISD::FMA, MVT::f32, Expand);
- setOperationAction(ISD::FMA, MVT::f64, Expand);
- }
+ if (!Subtarget->hasFMA())
+ setOperationAction(ISD::FMA, {MVT::f32, MVT::f64}, Expand);
// FIXME: May need no denormals check
setOperationAction(ISD::FMAD, MVT::f32, Legal);
- if (!Subtarget->hasBFI()) {
+ if (!Subtarget->hasBFI())
// fcopysign can be done in a single instruction with BFI.
- setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
- setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
- }
+ setOperationAction(ISD::FCOPYSIGN, {MVT::f32, MVT::f64}, Expand);
if (!Subtarget->hasBCNT(32))
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
@@ -229,30 +182,22 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
- for (MVT VT : ScalarIntVTs) {
- setOperationAction(ISD::ADDC, VT, Expand);
- setOperationAction(ISD::SUBC, VT, Expand);
- setOperationAction(ISD::ADDE, VT, Expand);
- setOperationAction(ISD::SUBE, VT, Expand);
- }
+ for (MVT VT : ScalarIntVTs)
+ setOperationAction({ISD::ADDC, ISD::SUBC, ISD::ADDE, ISD::SUBE}, VT,
+ Expand);
// LLVM will expand these to atomic_cmp_swap(0)
// and atomic_swap, respectively.
- setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
+ setOperationAction({ISD::ATOMIC_LOAD, ISD::ATOMIC_STORE}, MVT::i32, Expand);
// We need to custom lower some of the intrinsics
- setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+ setOperationAction({ISD::INTRINSIC_VOID, ISD::INTRINSIC_WO_CHAIN}, MVT::Other,
+ Custom);
setSchedulingPreference(Sched::Source);
- setTargetDAGCombine(ISD::FP_ROUND);
- setTargetDAGCombine(ISD::FP_TO_SINT);
- setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
- setTargetDAGCombine(ISD::SELECT_CC);
- setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
- setTargetDAGCombine(ISD::LOAD);
+ setTargetDAGCombine({ISD::FP_ROUND, ISD::FP_TO_SINT, ISD::EXTRACT_VECTOR_ELT,
+ ISD::SELECT_CC, ISD::INSERT_VECTOR_ELT, ISD::LOAD});
}
static inline bool isEOP(MachineBasicBlock::iterator I) {
@@ -995,7 +940,7 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
/// convert these pointers to a register index. Each register holds
/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
-/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
+/// \p StackWidth, which tells us how many of the 4 sub-registers will be used
/// for indirect addressing.
SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
unsigned StackWidth,
@@ -1100,7 +1045,7 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
DAG.getConstant(3, DL, MVT::i32));
- // TODO: Contrary to the name of the functiom,
+ // TODO: Contrary to the name of the function,
// it also handles sub i32 non-truncating stores (like i1)
SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
Store->getValue());
@@ -1163,9 +1108,9 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
// TODO: can the chain be replaced without creating a new store?
SDValue NewStore = DAG.getTruncStore(
- NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(),
- MemVT, StoreNode->getAlignment(),
- StoreNode->getMemOperand()->getFlags(), StoreNode->getAAInfo());
+ NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(), MemVT,
+ StoreNode->getAlign(), StoreNode->getMemOperand()->getFlags(),
+ StoreNode->getAAInfo());
StoreNode = cast<StoreSDNode>(NewStore);
}
@@ -1417,7 +1362,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
SDValue NewLoad = DAG.getExtLoad(
ISD::EXTLOAD, DL, VT, Chain, Ptr, LoadNode->getPointerInfo(), MemVT,
- LoadNode->getAlignment(), LoadNode->getMemOperand()->getFlags());
+ LoadNode->getAlign(), LoadNode->getMemOperand()->getFlags());
SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
DAG.getValueType(MemVT));
@@ -1610,7 +1555,7 @@ static SDValue CompactSwizzlableVector(
if (NewBldVec[i].isUndef())
// We mask write here to teach later passes that the ith element of this
// vector is undef. Thus we can use it to reduce 128 bits reg usage,
- // break false dependencies and additionnaly make assembly easier to read.
+ // break false dependencies and additionally make assembly easier to read.
RemapSwizzle[i] = 7; // SEL_MASK_WRITE
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
if (C->isZero()) {
@@ -1714,7 +1659,7 @@ SDValue R600TargetLowering::constBufferLoad(LoadSDNode *LoadNode, int Block,
if (LoadNode->getMemoryVT().getScalarType() != MVT::i32 || !ISD::isNON_EXTLoad(LoadNode))
return SDValue();
- if (LoadNode->getAlignment() < 4)
+ if (LoadNode->getAlign() < Align(4))
return SDValue();
int ConstantBlock = ConstantAddressBlock(Block);
diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
index aec8b1ae4837..d04ec6490aae 100644
--- a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
@@ -18,6 +18,7 @@
#include "R600Defines.h"
#include "R600Subtarget.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
using namespace llvm;
@@ -1469,21 +1470,3 @@ void R600InstrInfo::clearFlag(MachineInstr &MI, unsigned Operand,
FlagOp.setImm(InstFlags);
}
}
-
-unsigned R600InstrInfo::getAddressSpaceForPseudoSourceKind(
- unsigned Kind) const {
- switch (Kind) {
- case PseudoSourceValue::Stack:
- case PseudoSourceValue::FixedStack:
- return AMDGPUAS::PRIVATE_ADDRESS;
- case PseudoSourceValue::ConstantPool:
- case PseudoSourceValue::GOT:
- case PseudoSourceValue::JumpTable:
- case PseudoSourceValue::GlobalValueCallEntry:
- case PseudoSourceValue::ExternalSymbolCallEntry:
- case PseudoSourceValue::TargetCustom:
- return AMDGPUAS::CONSTANT_ADDRESS;
- }
-
- llvm_unreachable("Invalid pseudo source kind");
-}
diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.h b/llvm/lib/Target/AMDGPU/R600InstrInfo.h
index bc8a4786df77..f720e4656348 100644
--- a/llvm/lib/Target/AMDGPU/R600InstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.h
@@ -320,9 +320,6 @@ public:
bool isRegisterLoad(const MachineInstr &MI) const {
return get(MI.getOpcode()).TSFlags & R600InstrFlags::REGISTER_LOAD;
}
-
- unsigned getAddressSpaceForPseudoSourceKind(
- unsigned Kind) const override;
};
namespace R600 {
diff --git a/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp b/llvm/lib/Target/AMDGPU/R600MachineCFGStructurizer.cpp
index 1736c078eb83..0a96c643d9bd 100644
--- a/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
+++ b/llvm/lib/Target/AMDGPU/R600MachineCFGStructurizer.cpp
@@ -1,4 +1,4 @@
-//===- AMDILCFGStructurizer.cpp - CFG Structurizer ------------------------===//
+//===- R600MachineCFGStructurizer.cpp - CFG Structurizer ------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -10,6 +10,7 @@
#include "R600.h"
#include "R600RegisterInfo.h"
#include "R600Subtarget.h"
+#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -42,7 +43,7 @@ STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions");
namespace llvm {
-void initializeAMDGPUCFGStructurizerPass(PassRegistry &);
+void initializeR600MachineCFGStructurizerPass(PassRegistry &);
} // end namespace llvm
@@ -89,7 +90,7 @@ public:
//
//===----------------------------------------------------------------------===//
-class AMDGPUCFGStructurizer : public MachineFunctionPass {
+class R600MachineCFGStructurizer : public MachineFunctionPass {
public:
using MBBVector = SmallVector<MachineBasicBlock *, 32>;
using MBBInfoMap = std::map<MachineBasicBlock *, BlockInformation *>;
@@ -103,8 +104,8 @@ public:
static char ID;
- AMDGPUCFGStructurizer() : MachineFunctionPass(ID) {
- initializeAMDGPUCFGStructurizerPass(*PassRegistry::getPassRegistry());
+ R600MachineCFGStructurizer() : MachineFunctionPass(ID) {
+ initializeR600MachineCFGStructurizerPass(*PassRegistry::getPassRegistry());
}
StringRef getPassName() const override {
@@ -317,16 +318,16 @@ private:
} // end anonymous namespace
-char AMDGPUCFGStructurizer::ID = 0;
+char R600MachineCFGStructurizer::ID = 0;
-int AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const {
+int R600MachineCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const {
MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
if (It == BlockInfoMap.end())
return INVALIDSCCNUM;
return (*It).second->SccNum;
}
-MachineBasicBlock *AMDGPUCFGStructurizer::getLoopLandInfo(MachineLoop *LoopRep)
+MachineBasicBlock *R600MachineCFGStructurizer::getLoopLandInfo(MachineLoop *LoopRep)
const {
LoopLandInfoMap::const_iterator It = LLInfoMap.find(LoopRep);
if (It == LLInfoMap.end())
@@ -334,7 +335,7 @@ MachineBasicBlock *AMDGPUCFGStructurizer::getLoopLandInfo(MachineLoop *LoopRep)
return (*It).second;
}
-bool AMDGPUCFGStructurizer::hasBackEdge(MachineBasicBlock *MBB) const {
+bool R600MachineCFGStructurizer::hasBackEdge(MachineBasicBlock *MBB) const {
MachineLoop *LoopRep = MLI->getLoopFor(MBB);
if (!LoopRep)
return false;
@@ -342,14 +343,14 @@ bool AMDGPUCFGStructurizer::hasBackEdge(MachineBasicBlock *MBB) const {
return MBB->isSuccessor(LoopHeader);
}
-bool AMDGPUCFGStructurizer::isRetiredBlock(MachineBasicBlock *MBB) const {
+bool R600MachineCFGStructurizer::isRetiredBlock(MachineBasicBlock *MBB) const {
MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
if (It == BlockInfoMap.end())
return false;
return (*It).second->IsRetired;
}
-bool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const {
+bool R600MachineCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const {
MachineLoop *LoopRep = MLI->getLoopFor(MBB);
while (LoopRep && LoopRep->getHeader() == MBB) {
MachineBasicBlock *LoopLand = getLoopLandInfo(LoopRep);
@@ -362,7 +363,7 @@ bool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const {
return false;
}
-AMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo(
+R600MachineCFGStructurizer::PathToKind R600MachineCFGStructurizer::singlePathTo(
MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
bool AllowSideEntry) const {
assert(DstMBB);
@@ -380,7 +381,7 @@ AMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo(
return Not_SinglePath;
}
-int AMDGPUCFGStructurizer::countActiveBlock(MBBVector::const_iterator It,
+int R600MachineCFGStructurizer::countActiveBlock(MBBVector::const_iterator It,
MBBVector::const_iterator E) const {
int Count = 0;
while (It != E) {
@@ -391,7 +392,7 @@ int AMDGPUCFGStructurizer::countActiveBlock(MBBVector::const_iterator It,
return Count;
}
-bool AMDGPUCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const {
+bool R600MachineCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const {
unsigned BlockSizeThreshold = 30;
unsigned CloneInstrThreshold = 100;
bool MultiplePreds = MBB && (MBB->pred_size() > 1);
@@ -403,7 +404,7 @@ bool AMDGPUCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const {
(BlkSize * (MBB->pred_size() - 1) > CloneInstrThreshold));
}
-void AMDGPUCFGStructurizer::reversePredicateSetter(
+void R600MachineCFGStructurizer::reversePredicateSetter(
MachineBasicBlock::iterator I, MachineBasicBlock &MBB) {
assert(I.isValid() && "Expected valid iterator");
for (;; --I) {
@@ -430,7 +431,7 @@ void AMDGPUCFGStructurizer::reversePredicateSetter(
}
}
-void AMDGPUCFGStructurizer::insertInstrEnd(MachineBasicBlock *MBB,
+void R600MachineCFGStructurizer::insertInstrEnd(MachineBasicBlock *MBB,
int NewOpcode, const DebugLoc &DL) {
MachineInstr *MI =
MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL);
@@ -439,7 +440,7 @@ void AMDGPUCFGStructurizer::insertInstrEnd(MachineBasicBlock *MBB,
SHOWNEWINSTR(MI);
}
-MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(MachineBasicBlock *MBB,
+MachineInstr *R600MachineCFGStructurizer::insertInstrBefore(MachineBasicBlock *MBB,
int NewOpcode,
const DebugLoc &DL) {
MachineInstr *MI =
@@ -452,7 +453,7 @@ MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(MachineBasicBlock *MBB,
return MI;
}
-MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(
+MachineInstr *R600MachineCFGStructurizer::insertInstrBefore(
MachineBasicBlock::iterator I, int NewOpcode) {
MachineInstr *OldMI = &(*I);
MachineBasicBlock *MBB = OldMI->getParent();
@@ -464,7 +465,7 @@ MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(
return NewMBB;
}
-void AMDGPUCFGStructurizer::insertCondBranchBefore(
+void R600MachineCFGStructurizer::insertCondBranchBefore(
MachineBasicBlock::iterator I, int NewOpcode, const DebugLoc &DL) {
MachineInstr *OldMI = &(*I);
MachineBasicBlock *MBB = OldMI->getParent();
@@ -477,7 +478,7 @@ void AMDGPUCFGStructurizer::insertCondBranchBefore(
//erase later oldInstr->eraseFromParent();
}
-void AMDGPUCFGStructurizer::insertCondBranchBefore(
+void R600MachineCFGStructurizer::insertCondBranchBefore(
MachineBasicBlock *blk, MachineBasicBlock::iterator I, int NewOpcode,
int RegNum, const DebugLoc &DL) {
MachineFunction *MF = blk->getParent();
@@ -488,7 +489,7 @@ void AMDGPUCFGStructurizer::insertCondBranchBefore(
SHOWNEWINSTR(NewInstr);
}
-int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) {
+int R600MachineCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) {
switch(OldOpcode) {
case R600::JUMP_COND:
case R600::JUMP: return R600::IF_PREDICATE_SET;
@@ -499,7 +500,7 @@ int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) {
return -1;
}
-int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) {
+int R600MachineCFGStructurizer::getBranchZeroOpcode(int OldOpcode) {
switch(OldOpcode) {
case R600::JUMP_COND:
case R600::JUMP: return R600::IF_PREDICATE_SET;
@@ -510,7 +511,7 @@ int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) {
return -1;
}
-int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) {
+int R600MachineCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) {
switch(OldOpcode) {
case R600::JUMP_COND:
case R600::JUMP: return R600::CONTINUE_LOGICALNZ_i32;
@@ -519,7 +520,7 @@ int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) {
return -1;
}
-int AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) {
+int R600MachineCFGStructurizer::getContinueZeroOpcode(int OldOpcode) {
switch(OldOpcode) {
case R600::JUMP_COND:
case R600::JUMP: return R600::CONTINUE_LOGICALZ_i32;
@@ -528,17 +529,17 @@ int AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) {
return -1;
}
-MachineBasicBlock *AMDGPUCFGStructurizer::getTrueBranch(MachineInstr *MI) {
+MachineBasicBlock *R600MachineCFGStructurizer::getTrueBranch(MachineInstr *MI) {
return MI->getOperand(0).getMBB();
}
-void AMDGPUCFGStructurizer::setTrueBranch(MachineInstr *MI,
+void R600MachineCFGStructurizer::setTrueBranch(MachineInstr *MI,
MachineBasicBlock *MBB) {
MI->getOperand(0).setMBB(MBB);
}
MachineBasicBlock *
-AMDGPUCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB,
+R600MachineCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB,
MachineInstr *MI) {
assert(MBB->succ_size() == 2);
MachineBasicBlock *TrueBranch = getTrueBranch(MI);
@@ -548,7 +549,7 @@ AMDGPUCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB,
return (*It == TrueBranch) ? *Next : *It;
}
-bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) {
+bool R600MachineCFGStructurizer::isCondBranch(MachineInstr *MI) {
switch (MI->getOpcode()) {
case R600::JUMP_COND:
case R600::BRANCH_COND_i32:
@@ -559,7 +560,7 @@ bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) {
return false;
}
-bool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) {
+bool R600MachineCFGStructurizer::isUncondBranch(MachineInstr *MI) {
switch (MI->getOpcode()) {
case R600::JUMP:
case R600::BRANCH:
@@ -570,7 +571,7 @@ bool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) {
return false;
}
-DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) {
+DebugLoc R600MachineCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) {
//get DebugLoc from the first MachineBasicBlock instruction with debug info
DebugLoc DL;
for (MachineInstr &MI : *MBB)
@@ -579,7 +580,7 @@ DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) {
return DL;
}
-MachineInstr *AMDGPUCFGStructurizer::getNormalBlockBranchInstr(
+MachineInstr *R600MachineCFGStructurizer::getNormalBlockBranchInstr(
MachineBasicBlock *MBB) {
MachineBasicBlock::reverse_iterator It = MBB->rbegin();
MachineInstr *MI = &*It;
@@ -588,7 +589,7 @@ MachineInstr *AMDGPUCFGStructurizer::getNormalBlockBranchInstr(
return nullptr;
}
-MachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr(
+MachineInstr *R600MachineCFGStructurizer::getLoopendBlockBranchInstr(
MachineBasicBlock *MBB) {
for (MachineBasicBlock::reverse_iterator It = MBB->rbegin(), E = MBB->rend();
It != E; ++It) {
@@ -604,7 +605,7 @@ MachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr(
return nullptr;
}
-MachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) {
+MachineInstr *R600MachineCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) {
MachineBasicBlock::reverse_iterator It = MBB->rbegin();
if (It != MBB->rend()) {
MachineInstr *instr = &(*It);
@@ -614,7 +615,7 @@ MachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) {
return nullptr;
}
-bool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) {
+bool R600MachineCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) {
MachineInstr *MI = getReturnInstr(MBB);
bool IsReturn = MBB->succ_empty();
if (MI)
@@ -625,13 +626,13 @@ bool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) {
return IsReturn;
}
-void AMDGPUCFGStructurizer::cloneSuccessorList(MachineBasicBlock *DstMBB,
+void R600MachineCFGStructurizer::cloneSuccessorList(MachineBasicBlock *DstMBB,
MachineBasicBlock *SrcMBB) {
for (MachineBasicBlock *Succ : SrcMBB->successors())
DstMBB->addSuccessor(Succ); // *iter's predecessor is also taken care of
}
-MachineBasicBlock *AMDGPUCFGStructurizer::clone(MachineBasicBlock *MBB) {
+MachineBasicBlock *R600MachineCFGStructurizer::clone(MachineBasicBlock *MBB) {
MachineFunction *Func = MBB->getParent();
MachineBasicBlock *NewMBB = Func->CreateMachineBasicBlock();
Func->push_back(NewMBB); //insert to function
@@ -640,7 +641,7 @@ MachineBasicBlock *AMDGPUCFGStructurizer::clone(MachineBasicBlock *MBB) {
return NewMBB;
}
-void AMDGPUCFGStructurizer::replaceInstrUseOfBlockWith(
+void R600MachineCFGStructurizer::replaceInstrUseOfBlockWith(
MachineBasicBlock *SrcMBB, MachineBasicBlock *OldMBB,
MachineBasicBlock *NewBlk) {
MachineInstr *BranchMI = getLoopendBlockBranchInstr(SrcMBB);
@@ -649,7 +650,7 @@ void AMDGPUCFGStructurizer::replaceInstrUseOfBlockWith(
setTrueBranch(BranchMI, NewBlk);
}
-void AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) {
+void R600MachineCFGStructurizer::wrapup(MachineBasicBlock *MBB) {
assert((!MBB->getParent()->getJumpTableInfo()
|| MBB->getParent()->getJumpTableInfo()->isEmpty())
&& "found a jump table");
@@ -677,12 +678,12 @@ void AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) {
// blocks in the jump table with the entryBlk //}
}
-bool AMDGPUCFGStructurizer::prepare() {
+bool R600MachineCFGStructurizer::prepare() {
bool Changed = false;
//FIXME: if not reducible flow graph, make it so ???
- LLVM_DEBUG(dbgs() << "AMDGPUCFGStructurizer::prepare\n";);
+ LLVM_DEBUG(dbgs() << "R600MachineCFGStructurizer::prepare\n";);
orderBlocks(FuncRep);
@@ -719,9 +720,9 @@ bool AMDGPUCFGStructurizer::prepare() {
return Changed;
}
-bool AMDGPUCFGStructurizer::run() {
+bool R600MachineCFGStructurizer::run() {
//Assume reducible CFG...
- LLVM_DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n");
+ LLVM_DEBUG(dbgs() << "R600MachineCFGStructurizer::run\n");
#ifdef STRESSTEST
//Use the worse block ordering to test the algorithm.
@@ -740,6 +741,7 @@ bool AMDGPUCFGStructurizer::run() {
++NumIter;
LLVM_DEBUG(dbgs() << "numIter = " << NumIter
<< ", numRemaintedBlk = " << NumRemainedBlk << "\n";);
+ (void)NumIter;
SmallVectorImpl<MachineBasicBlock *>::const_iterator It =
OrderedBlks.begin();
@@ -780,6 +782,7 @@ bool AMDGPUCFGStructurizer::run() {
LLVM_DEBUG(dbgs() << "Can't reduce SCC " << getSCCNum(MBB)
<< ", sccNumIter = " << SccNumIter;
dbgs() << "doesn't make any progress\n";);
+ (void)SccNumIter;
ContNextScc = true;
} else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < SccNumBlk) {
SccNumBlk = sccRemainedNumBlk;
@@ -842,7 +845,7 @@ bool AMDGPUCFGStructurizer::run() {
return true;
}
-void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) {
+void R600MachineCFGStructurizer::orderBlocks(MachineFunction *MF) {
int SccNum = 0;
for (scc_iterator<MachineFunction *> It = scc_begin(MF); !It.isAtEnd();
++It, ++SccNum) {
@@ -861,7 +864,7 @@ void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) {
}
}
-int AMDGPUCFGStructurizer::patternMatch(MachineBasicBlock *MBB) {
+int R600MachineCFGStructurizer::patternMatch(MachineBasicBlock *MBB) {
int NumMatch = 0;
int CurMatch;
@@ -876,7 +879,7 @@ int AMDGPUCFGStructurizer::patternMatch(MachineBasicBlock *MBB) {
return NumMatch;
}
-int AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) {
+int R600MachineCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) {
int NumMatch = 0;
NumMatch += loopendPatternMatch();
NumMatch += serialPatternMatch(MBB);
@@ -884,7 +887,7 @@ int AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) {
return NumMatch;
}
-int AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) {
+int R600MachineCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) {
if (MBB->succ_size() != 1)
return 0;
@@ -897,7 +900,7 @@ int AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) {
return 1;
}
-int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
+int R600MachineCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
//two edges
if (MBB->succ_size() != 2)
return 0;
@@ -975,7 +978,7 @@ int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
return 1 + Cloned + NumMatch;
}
-int AMDGPUCFGStructurizer::loopendPatternMatch() {
+int R600MachineCFGStructurizer::loopendPatternMatch() {
std::deque<MachineLoop *> NestedLoops;
for (auto &It: *MLI)
for (MachineLoop *ML : depth_first(It))
@@ -1000,7 +1003,7 @@ int AMDGPUCFGStructurizer::loopendPatternMatch() {
return Num;
}
-int AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) {
+int R600MachineCFGStructurizer::mergeLoop(MachineLoop *LoopRep) {
MachineBasicBlock *LoopHeader = LoopRep->getHeader();
MBBVector ExitingMBBs;
LoopRep->getExitingBlocks(ExitingMBBs);
@@ -1041,7 +1044,7 @@ int AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) {
return 1;
}
-bool AMDGPUCFGStructurizer::isSameloopDetachedContbreak(
+bool R600MachineCFGStructurizer::isSameloopDetachedContbreak(
MachineBasicBlock *Src1MBB, MachineBasicBlock *Src2MBB) {
if (Src1MBB->succ_empty()) {
MachineLoop *LoopRep = MLI->getLoopFor(Src1MBB);
@@ -1058,7 +1061,7 @@ bool AMDGPUCFGStructurizer::isSameloopDetachedContbreak(
return false;
}
-int AMDGPUCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB,
+int R600MachineCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB,
MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB);
if (Num == 0) {
@@ -1069,7 +1072,7 @@ int AMDGPUCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB,
return Num;
}
-int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
+int R600MachineCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
int Num = 0;
MachineBasicBlock *DownBlk;
@@ -1107,7 +1110,7 @@ int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
}
#ifndef NDEBUG
-void AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf(
+void R600MachineCFGStructurizer::showImproveSimpleJumpintoIf(
MachineBasicBlock *HeadMBB, MachineBasicBlock *TrueMBB,
MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB, bool Detail) {
dbgs() << "head = BB" << HeadMBB->getNumber()
@@ -1150,7 +1153,7 @@ void AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf(
}
#endif
-int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
+int R600MachineCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
MachineBasicBlock **LandMBBPtr) {
bool MigrateTrue = false;
@@ -1322,7 +1325,7 @@ int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
return NumNewBlk;
}
-void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB,
+void R600MachineCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB,
MachineBasicBlock *SrcMBB) {
LLVM_DEBUG(dbgs() << "serialPattern BB" << DstMBB->getNumber() << " <= BB"
<< SrcMBB->getNumber() << "\n";);
@@ -1336,7 +1339,7 @@ void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB,
retireBlock(SrcMBB);
}
-void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
+void R600MachineCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) {
assert (TrueMBB);
@@ -1392,7 +1395,7 @@ void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
MBB->addSuccessor(LandMBB);
}
-void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk,
+void R600MachineCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk,
MachineBasicBlock *LandMBB) {
LLVM_DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber()
<< " land = BB" << LandMBB->getNumber() << "\n";);
@@ -1402,7 +1405,7 @@ void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk,
DstBlk->replaceSuccessor(DstBlk, LandMBB);
}
-void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
+void R600MachineCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
MachineBasicBlock *LandMBB) {
LLVM_DEBUG(dbgs() << "loopbreakPattern exiting = BB"
<< ExitingMBB->getNumber() << " land = BB"
@@ -1423,7 +1426,7 @@ void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
ExitingMBB->removeSuccessor(LandMBB, true);
}
-void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB,
+void R600MachineCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB,
MachineBasicBlock *ContMBB) {
LLVM_DEBUG(dbgs() << "settleLoopcontBlock conting = BB"
<< ContingMBB->getNumber() << ", cont = BB"
@@ -1466,7 +1469,7 @@ void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB,
}
}
-int AMDGPUCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
+int R600MachineCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB) {
int Cloned = 0;
assert(PreMBB->isSuccessor(SrcMBB));
@@ -1485,10 +1488,9 @@ int AMDGPUCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
}
MachineBasicBlock *
-AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB,
+R600MachineCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB,
MachineBasicBlock *PredMBB) {
- assert(PredMBB->isSuccessor(MBB) &&
- "succBlk is not a prececessor of curBlk");
+ assert(PredMBB->isSuccessor(MBB) && "succBlk is not a predecessor of curBlk");
MachineBasicBlock *CloneMBB = clone(MBB); //clone instructions
replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB);
@@ -1510,7 +1512,7 @@ AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB,
return CloneMBB;
}
-void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB,
+void R600MachineCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB,
MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I) {
MachineBasicBlock::iterator SpliceEnd;
//look for the input branchinstr, not the AMDGPU branchinstr
@@ -1535,7 +1537,7 @@ void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB,
}
MachineBasicBlock *
-AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) {
+R600MachineCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) {
MachineBasicBlock *LoopHeader = LoopRep->getHeader();
MachineBasicBlock *LoopLatch = LoopRep->getLoopLatch();
@@ -1555,7 +1557,7 @@ AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) {
return nullptr;
}
-void AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) {
+void R600MachineCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) {
MachineInstr *BranchMI;
// I saw two unconditional branch in one basic block in example
@@ -1567,7 +1569,7 @@ void AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) {
}
}
-void AMDGPUCFGStructurizer::removeRedundantConditionalBranch(
+void R600MachineCFGStructurizer::removeRedundantConditionalBranch(
MachineBasicBlock *MBB) {
if (MBB->succ_size() != 2)
return;
@@ -1584,7 +1586,7 @@ void AMDGPUCFGStructurizer::removeRedundantConditionalBranch(
MBB->removeSuccessor(MBB1, true);
}
-void AMDGPUCFGStructurizer::addDummyExitBlock(
+void R600MachineCFGStructurizer::addDummyExitBlock(
SmallVectorImpl<MachineBasicBlock*> &RetMBB) {
MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
FuncRep->push_back(DummyExitBlk); //insert to function
@@ -1600,12 +1602,12 @@ void AMDGPUCFGStructurizer::addDummyExitBlock(
SHOWNEWBLK(DummyExitBlk, "DummyExitBlock: ");
}
-void AMDGPUCFGStructurizer::removeSuccessor(MachineBasicBlock *MBB) {
+void R600MachineCFGStructurizer::removeSuccessor(MachineBasicBlock *MBB) {
while (MBB->succ_size())
MBB->removeSuccessor(*MBB->succ_begin());
}
-void AMDGPUCFGStructurizer::recordSccnum(MachineBasicBlock *MBB,
+void R600MachineCFGStructurizer::recordSccnum(MachineBasicBlock *MBB,
int SccNum) {
BlockInformation *&srcBlkInfo = BlockInfoMap[MBB];
if (!srcBlkInfo)
@@ -1613,7 +1615,7 @@ void AMDGPUCFGStructurizer::recordSccnum(MachineBasicBlock *MBB,
srcBlkInfo->SccNum = SccNum;
}
-void AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) {
+void R600MachineCFGStructurizer::retireBlock(MachineBasicBlock *MBB) {
LLVM_DEBUG(dbgs() << "Retiring BB" << MBB->getNumber() << "\n";);
BlockInformation *&SrcBlkInfo = BlockInfoMap[MBB];
@@ -1625,14 +1627,14 @@ void AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) {
assert(MBB->succ_empty() && MBB->pred_empty() && "can't retire block yet");
}
-INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer, "amdgpustructurizer",
+INITIALIZE_PASS_BEGIN(R600MachineCFGStructurizer, "amdgpustructurizer",
"AMDGPU CFG Structurizer", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_END(AMDGPUCFGStructurizer, "amdgpustructurizer",
+INITIALIZE_PASS_END(R600MachineCFGStructurizer, "amdgpustructurizer",
"AMDGPU CFG Structurizer", false, false)
-FunctionPass *llvm::createAMDGPUCFGStructurizerPass() {
- return new AMDGPUCFGStructurizer();
+FunctionPass *llvm::createR600MachineCFGStructurizerPass() {
+ return new R600MachineCFGStructurizer();
}
diff --git a/llvm/lib/Target/AMDGPU/R600Packetizer.cpp b/llvm/lib/Target/AMDGPU/R600Packetizer.cpp
index fbe2a1cd9fba..59e274787590 100644
--- a/llvm/lib/Target/AMDGPU/R600Packetizer.cpp
+++ b/llvm/lib/Target/AMDGPU/R600Packetizer.cpp
@@ -207,7 +207,7 @@ public:
return !ARDef || !ARUse;
}
- // isLegalToPruneDependencies - Is it legal to prune dependece between SUI
+ // isLegalToPruneDependencies - Is it legal to prune dependency between SUI
// and SUJ.
bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override {
return false;
diff --git a/llvm/lib/Target/AMDGPU/R600Subtarget.cpp b/llvm/lib/Target/AMDGPU/R600Subtarget.cpp
index 20c1ce7266dd..d8f061054904 100644
--- a/llvm/lib/Target/AMDGPU/R600Subtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/R600Subtarget.cpp
@@ -27,8 +27,6 @@ R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
: R600GenSubtargetInfo(TT, GPU, /*TuneCPU*/ GPU, FS), AMDGPUSubtarget(TT),
InstrInfo(*this),
FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
- FMA(false), CaymanISA(false), CFALUBug(false), HasVertexCache(false),
- R600ALUInst(false), FP64(false), TexVTXClauseSize(0), Gen(R600),
TLInfo(TM, initializeSubtargetDependencies(TT, GPU, FS)),
InstrItins(getInstrItineraryForCPU(GPU)) {}
diff --git a/llvm/lib/Target/AMDGPU/R600Subtarget.h b/llvm/lib/Target/AMDGPU/R600Subtarget.h
index 92d559b1f8e6..c3d002f29272 100644
--- a/llvm/lib/Target/AMDGPU/R600Subtarget.h
+++ b/llvm/lib/Target/AMDGPU/R600Subtarget.h
@@ -31,14 +31,14 @@ class R600Subtarget final : public R600GenSubtargetInfo,
private:
R600InstrInfo InstrInfo;
R600FrameLowering FrameLowering;
- bool FMA;
- bool CaymanISA;
- bool CFALUBug;
- bool HasVertexCache;
- bool R600ALUInst;
- bool FP64;
- short TexVTXClauseSize;
- Generation Gen;
+ bool FMA = false;
+ bool CaymanISA = false;
+ bool CFALUBug = false;
+ bool HasVertexCache = false;
+ bool R600ALUInst = false;
+ bool FP64 = false;
+ short TexVTXClauseSize = 0;
+ Generation Gen = R600;
R600TargetLowering TLInfo;
InstrItineraryData InstrItins;
SelectionDAGTargetInfo TSInfo;
diff --git a/llvm/lib/Target/AMDGPU/R600TargetMachine.cpp b/llvm/lib/Target/AMDGPU/R600TargetMachine.cpp
index 39dad45425fc..76bb0f65ef69 100644
--- a/llvm/lib/Target/AMDGPU/R600TargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/R600TargetMachine.cpp
@@ -83,7 +83,7 @@ R600TargetMachine::getSubtargetImpl(const Function &F) const {
}
TargetTransformInfo
-R600TargetMachine::getTargetTransformInfo(const Function &F) {
+R600TargetMachine::getTargetTransformInfo(const Function &F) const {
return TargetTransformInfo(R600TTIImpl(this, F));
}
@@ -131,7 +131,7 @@ void R600PassConfig::addPreSched2() {
}
void R600PassConfig::addPreEmitPass() {
- addPass(createAMDGPUCFGStructurizerPass());
+ addPass(createR600MachineCFGStructurizerPass());
addPass(createR600ExpandSpecialInstrsPass());
addPass(&FinalizeMachineBundlesID);
addPass(createR600Packetizer());
diff --git a/llvm/lib/Target/AMDGPU/R600TargetMachine.h b/llvm/lib/Target/AMDGPU/R600TargetMachine.h
index 0ccbca3c68b1..8d20841292b9 100644
--- a/llvm/lib/Target/AMDGPU/R600TargetMachine.h
+++ b/llvm/lib/Target/AMDGPU/R600TargetMachine.h
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
//
/// \file
-/// The AMDGPU TargetMachine interface definition for hw codgen targets.
+/// The AMDGPU TargetMachine interface definition for hw codegen targets.
//
//===----------------------------------------------------------------------===//
@@ -38,7 +38,7 @@ public:
const TargetSubtargetInfo *getSubtargetImpl(const Function &) const override;
- TargetTransformInfo getTargetTransformInfo(const Function &F) override;
+ TargetTransformInfo getTargetTransformInfo(const Function &F) const override;
bool isMachineVerifierClean() const override { return false; }
};
diff --git a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
index b81fac36fc95..afd2a38b11ec 100644
--- a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
@@ -73,19 +73,19 @@ class SIAnnotateControlFlow : public FunctionPass {
bool hasKill(const BasicBlock *BB);
- void eraseIfUnused(PHINode *Phi);
+ bool eraseIfUnused(PHINode *Phi);
- void openIf(BranchInst *Term);
+ bool openIf(BranchInst *Term);
- void insertElse(BranchInst *Term);
+ bool insertElse(BranchInst *Term);
Value *
handleLoopCondition(Value *Cond, PHINode *Broken, llvm::Loop *L,
BranchInst *Term);
- void handleLoop(BranchInst *Term);
+ bool handleLoop(BranchInst *Term);
- void closeControlFlow(BasicBlock *BB);
+ bool closeControlFlow(BasicBlock *BB);
public:
static char ID;
@@ -193,31 +193,34 @@ bool SIAnnotateControlFlow::hasKill(const BasicBlock *BB) {
return false;
}
-// Erase "Phi" if it is not used any more
-void SIAnnotateControlFlow::eraseIfUnused(PHINode *Phi) {
- if (RecursivelyDeleteDeadPHINode(Phi)) {
+// Erase "Phi" if it is not used any more. Return true if any change was made.
+bool SIAnnotateControlFlow::eraseIfUnused(PHINode *Phi) {
+ bool Changed = RecursivelyDeleteDeadPHINode(Phi);
+ if (Changed)
LLVM_DEBUG(dbgs() << "Erased unused condition phi\n");
- }
+ return Changed;
}
/// Open a new "If" block
-void SIAnnotateControlFlow::openIf(BranchInst *Term) {
+bool SIAnnotateControlFlow::openIf(BranchInst *Term) {
if (isUniform(Term))
- return;
+ return false;
Value *Ret = CallInst::Create(If, Term->getCondition(), "", Term);
Term->setCondition(ExtractValueInst::Create(Ret, 0, "", Term));
push(Term->getSuccessor(1), ExtractValueInst::Create(Ret, 1, "", Term));
+ return true;
}
/// Close the last "If" block and open a new "Else" block
-void SIAnnotateControlFlow::insertElse(BranchInst *Term) {
+bool SIAnnotateControlFlow::insertElse(BranchInst *Term) {
if (isUniform(Term)) {
- return;
+ return false;
}
Value *Ret = CallInst::Create(Else, popSaved(), "", Term);
Term->setCondition(ExtractValueInst::Create(Ret, 0, "", Term));
push(Term->getSuccessor(1), ExtractValueInst::Create(Ret, 1, "", Term));
+ return true;
}
/// Recursively handle the condition leading to a loop
@@ -255,14 +258,14 @@ Value *SIAnnotateControlFlow::handleLoopCondition(
}
/// Handle a back edge (loop)
-void SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
+bool SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
if (isUniform(Term))
- return;
+ return false;
BasicBlock *BB = Term->getParent();
llvm::Loop *L = LI->getLoopFor(BB);
if (!L)
- return;
+ return false;
BasicBlock *Target = Term->getSuccessor(1);
PHINode *Broken = PHINode::Create(IntMask, 0, "phi.broken", &Target->front());
@@ -286,10 +289,12 @@ void SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
Term->setCondition(CallInst::Create(Loop, Arg, "", Term));
push(Term->getSuccessor(0), Arg);
+
+ return true;
}
/// Close the last opened control flow
-void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
+bool SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
llvm::Loop *L = LI->getLoopFor(BB);
assert(Stack.back().first == BB);
@@ -322,6 +327,8 @@ void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
}
CallInst::Create(EndCf, Exec, "", FirstInsertionPt);
}
+
+ return true;
}
/// Annotate the control flow with intrinsics so the backend can
@@ -333,6 +340,7 @@ bool SIAnnotateControlFlow::runOnFunction(Function &F) {
TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
const TargetMachine &TM = TPC.getTM<TargetMachine>();
+ bool Changed = false;
initialize(*F.getParent(), TM.getSubtarget<GCNSubtarget>(F));
for (df_iterator<BasicBlock *> I = df_begin(&F.getEntryBlock()),
E = df_end(&F.getEntryBlock()); I != E; ++I) {
@@ -341,32 +349,32 @@ bool SIAnnotateControlFlow::runOnFunction(Function &F) {
if (!Term || Term->isUnconditional()) {
if (isTopOfStack(BB))
- closeControlFlow(BB);
+ Changed |= closeControlFlow(BB);
continue;
}
if (I.nodeVisited(Term->getSuccessor(1))) {
if (isTopOfStack(BB))
- closeControlFlow(BB);
+ Changed |= closeControlFlow(BB);
if (DT->dominates(Term->getSuccessor(1), BB))
- handleLoop(Term);
+ Changed |= handleLoop(Term);
continue;
}
if (isTopOfStack(BB)) {
PHINode *Phi = dyn_cast<PHINode>(Term->getCondition());
if (Phi && Phi->getParent() == BB && isElse(Phi) && !hasKill(BB)) {
- insertElse(Term);
- eraseIfUnused(Phi);
+ Changed |= insertElse(Term);
+ Changed |= eraseIfUnused(Phi);
continue;
}
- closeControlFlow(BB);
+ Changed |= closeControlFlow(BB);
}
- openIf(Term);
+ Changed |= openIf(Term);
}
if (!Stack.empty()) {
@@ -374,7 +382,7 @@ bool SIAnnotateControlFlow::runOnFunction(Function &F) {
report_fatal_error("failed to annotate CFG");
}
- return true;
+ return Changed;
}
/// Create the annotation pass
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 107ee5ed5532..85930312352b 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -63,6 +63,12 @@ enum : uint64_t {
VGPRSpill = 1 << 24,
SGPRSpill = 1 << 25,
+ // LDSDIR instruction format.
+ LDSDIR = 1 << 26,
+
+ // VINTERP instruction format.
+ VINTERP = 1 << 27,
+
// High bits - other information.
VM_CNT = UINT64_C(1) << 32,
EXP_CNT = UINT64_C(1) << 33,
@@ -120,7 +126,10 @@ enum : uint64_t {
IsAtomicNoRet = UINT64_C(1) << 57,
// Atomic with return.
- IsAtomicRet = UINT64_C(1) << 58
+ IsAtomicRet = UINT64_C(1) << 58,
+
+ // Is a WMMA instruction.
+ IsWMMA = UINT64_C(1) << 59,
};
// v_cmp_class_* etc. use a 10-bit mask for what operation is checked.
@@ -258,9 +267,10 @@ namespace AMDGPUAsmVariants {
VOP3 = 1,
SDWA = 2,
SDWA9 = 3,
- DPP = 4
+ DPP = 4,
+ VOP3_DPP = 5
};
-}
+} // namespace AMDGPUAsmVariants
namespace AMDGPU {
namespace EncValues { // Encoding values of enum9/8/7 operands
@@ -280,7 +290,8 @@ enum : unsigned {
INLINE_FLOATING_C_MAX = 248,
LITERAL_CONST = 255,
VGPR_MIN = 256,
- VGPR_MAX = 511
+ VGPR_MAX = 511,
+ IS_VGPR = 256 // Indicates VGPR or AGPR
};
} // namespace EncValues
@@ -294,6 +305,9 @@ enum CPol {
SLC = 2,
DLC = 4,
SCC = 16,
+ SC0 = GLC,
+ SC1 = SCC,
+ NT = SLC,
ALL = GLC | SLC | DLC | SCC
};
@@ -302,24 +316,33 @@ enum CPol {
namespace SendMsg { // Encoding of SIMM16 used in s_sendmsg* insns.
enum Id { // Message ID, width(4) [3:0].
- ID_UNKNOWN_ = -1,
ID_INTERRUPT = 1,
- ID_GS = 2,
- ID_GS_DONE = 3,
- ID_SAVEWAVE = 4, // added in GFX8
+
+ ID_GS_PreGFX11 = 2, // replaced in GFX11
+ ID_GS_DONE_PreGFX11 = 3, // replaced in GFX11
+
+ ID_HS_TESSFACTOR_GFX11Plus = 2, // reused in GFX11
+ ID_DEALLOC_VGPRS_GFX11Plus = 3, // reused in GFX11
+
+ ID_SAVEWAVE = 4, // added in GFX8, removed in GFX11
ID_STALL_WAVE_GEN = 5, // added in GFX9
ID_HALT_WAVES = 6, // added in GFX9
ID_ORDERED_PS_DONE = 7, // added in GFX9
ID_EARLY_PRIM_DEALLOC = 8, // added in GFX9, removed in GFX10
ID_GS_ALLOC_REQ = 9, // added in GFX9
- ID_GET_DOORBELL = 10, // added in GFX9
- ID_GET_DDID = 11, // added in GFX10
+ ID_GET_DOORBELL = 10, // added in GFX9, removed in GFX11
+ ID_GET_DDID = 11, // added in GFX10, removed in GFX11
ID_SYSMSG = 15,
- ID_GAPS_LAST_, // Indicate that sequence has gaps.
- ID_GAPS_FIRST_ = ID_INTERRUPT,
- ID_SHIFT_ = 0,
- ID_WIDTH_ = 4,
- ID_MASK_ = (((1 << ID_WIDTH_) - 1) << ID_SHIFT_)
+
+ ID_RTN_GET_DOORBELL = 128,
+ ID_RTN_GET_DDID = 129,
+ ID_RTN_GET_TMA = 130,
+ ID_RTN_GET_REALTIME = 131,
+ ID_RTN_SAVE_WAVE = 132,
+ ID_RTN_GET_TBA = 133,
+
+ ID_MASK_PreGFX11_ = 0xF,
+ ID_MASK_GFX11Plus_ = 0xFF
};
enum Op { // Both GS and SYS operation IDs.
@@ -360,8 +383,6 @@ enum StreamId : unsigned { // Stream ID, (2) [9:8].
namespace Hwreg { // Encoding of SIMM16 used in s_setreg/getreg* insns.
enum Id { // HwRegCode, (6) [5:0]
- ID_UNKNOWN_ = -1,
- ID_SYMBOLIC_FIRST_ = 1, // There are corresponding symbolic names defined.
ID_MODE = 1,
ID_STATUS = 2,
ID_TRAPSTS = 3,
@@ -370,12 +391,15 @@ enum Id { // HwRegCode, (6) [5:0]
ID_LDS_ALLOC = 6,
ID_IB_STS = 7,
ID_MEM_BASES = 15,
- ID_SYMBOLIC_FIRST_GFX9_ = ID_MEM_BASES,
ID_TBA_LO = 16,
- ID_SYMBOLIC_FIRST_GFX10_ = ID_TBA_LO,
ID_TBA_HI = 17,
ID_TMA_LO = 18,
ID_TMA_HI = 19,
+ ID_XCC_ID = 20,
+ ID_SQ_PERF_SNAPSHOT_DATA = 21,
+ ID_SQ_PERF_SNAPSHOT_DATA1 = 22,
+ ID_SQ_PERF_SNAPSHOT_PC_LO = 23,
+ ID_SQ_PERF_SNAPSHOT_PC_HI = 24,
ID_FLAT_SCR_LO = 20,
ID_FLAT_SCR_HI = 21,
ID_XNACK_MASK = 22,
@@ -383,8 +407,7 @@ enum Id { // HwRegCode, (6) [5:0]
ID_HW_ID2 = 24,
ID_POPS_PACKER = 25,
ID_SHADER_CYCLES = 29,
- ID_SYMBOLIC_FIRST_GFX1030_ = ID_SHADER_CYCLES,
- ID_SYMBOLIC_LAST_ = 30,
+
ID_SHIFT_ = 0,
ID_WIDTH_ = 6,
ID_MASK_ = (((1 << ID_WIDTH_) - 1) << ID_SHIFT_)
@@ -503,6 +526,15 @@ enum MergedFormat : int64_t {
DFMT_NFMT_MAX = DFMT_NFMT_MASK
};
+enum UnifiedFormatCommon : int64_t {
+ UFMT_MAX = 127,
+ UFMT_UNDEF = -1,
+ UFMT_DEFAULT = 1
+};
+
+} // namespace MTBUFFormat
+
+namespace UfmtGFX10 {
enum UnifiedFormat : int64_t {
UFMT_INVALID = 0,
@@ -598,14 +630,95 @@ enum UnifiedFormat : int64_t {
UFMT_FIRST = UFMT_INVALID,
UFMT_LAST = UFMT_32_32_32_32_FLOAT,
+};
- UFMT_MAX = 127,
+} // namespace UfmtGFX10
- UFMT_UNDEF = -1,
- UFMT_DEFAULT = UFMT_8_UNORM
+namespace UfmtGFX11 {
+enum UnifiedFormat : int64_t {
+ UFMT_INVALID = 0,
+
+ UFMT_8_UNORM,
+ UFMT_8_SNORM,
+ UFMT_8_USCALED,
+ UFMT_8_SSCALED,
+ UFMT_8_UINT,
+ UFMT_8_SINT,
+
+ UFMT_16_UNORM,
+ UFMT_16_SNORM,
+ UFMT_16_USCALED,
+ UFMT_16_SSCALED,
+ UFMT_16_UINT,
+ UFMT_16_SINT,
+ UFMT_16_FLOAT,
+
+ UFMT_8_8_UNORM,
+ UFMT_8_8_SNORM,
+ UFMT_8_8_USCALED,
+ UFMT_8_8_SSCALED,
+ UFMT_8_8_UINT,
+ UFMT_8_8_SINT,
+
+ UFMT_32_UINT,
+ UFMT_32_SINT,
+ UFMT_32_FLOAT,
+
+ UFMT_16_16_UNORM,
+ UFMT_16_16_SNORM,
+ UFMT_16_16_USCALED,
+ UFMT_16_16_SSCALED,
+ UFMT_16_16_UINT,
+ UFMT_16_16_SINT,
+ UFMT_16_16_FLOAT,
+
+ UFMT_10_11_11_FLOAT,
+
+ UFMT_11_11_10_FLOAT,
+
+ UFMT_10_10_10_2_UNORM,
+ UFMT_10_10_10_2_SNORM,
+ UFMT_10_10_10_2_UINT,
+ UFMT_10_10_10_2_SINT,
+
+ UFMT_2_10_10_10_UNORM,
+ UFMT_2_10_10_10_SNORM,
+ UFMT_2_10_10_10_USCALED,
+ UFMT_2_10_10_10_SSCALED,
+ UFMT_2_10_10_10_UINT,
+ UFMT_2_10_10_10_SINT,
+
+ UFMT_8_8_8_8_UNORM,
+ UFMT_8_8_8_8_SNORM,
+ UFMT_8_8_8_8_USCALED,
+ UFMT_8_8_8_8_SSCALED,
+ UFMT_8_8_8_8_UINT,
+ UFMT_8_8_8_8_SINT,
+
+ UFMT_32_32_UINT,
+ UFMT_32_32_SINT,
+ UFMT_32_32_FLOAT,
+
+ UFMT_16_16_16_16_UNORM,
+ UFMT_16_16_16_16_SNORM,
+ UFMT_16_16_16_16_USCALED,
+ UFMT_16_16_16_16_SSCALED,
+ UFMT_16_16_16_16_UINT,
+ UFMT_16_16_16_16_SINT,
+ UFMT_16_16_16_16_FLOAT,
+
+ UFMT_32_32_32_UINT,
+ UFMT_32_32_32_SINT,
+ UFMT_32_32_32_FLOAT,
+ UFMT_32_32_32_32_UINT,
+ UFMT_32_32_32_32_SINT,
+ UFMT_32_32_32_32_FLOAT,
+
+ UFMT_FIRST = UFMT_INVALID,
+ UFMT_LAST = UFMT_32_32_32_32_FLOAT,
};
-} // namespace MTBUFFormat
+} // namespace UfmtGFX11
namespace Swizzle { // Encoding of swizzle macro used in ds_swizzle_b32.
@@ -746,20 +859,23 @@ enum Target : unsigned {
ET_MRT0 = 0,
ET_MRT7 = 7,
ET_MRTZ = 8,
- ET_NULL = 9,
+ ET_NULL = 9, // Pre-GFX11
ET_POS0 = 12,
ET_POS3 = 15,
- ET_POS4 = 16, // GFX10+
- ET_POS_LAST = ET_POS4, // Highest pos used on any subtarget
- ET_PRIM = 20, // GFX10+
- ET_PARAM0 = 32,
- ET_PARAM31 = 63,
+ ET_POS4 = 16, // GFX10+
+ ET_POS_LAST = ET_POS4, // Highest pos used on any subtarget
+ ET_PRIM = 20, // GFX10+
+ ET_DUAL_SRC_BLEND0 = 21, // GFX11+
+ ET_DUAL_SRC_BLEND1 = 22, // GFX11+
+ ET_PARAM0 = 32, // Pre-GFX11
+ ET_PARAM31 = 63, // Pre-GFX11
ET_NULL_MAX_IDX = 0,
ET_MRTZ_MAX_IDX = 0,
ET_PRIM_MAX_IDX = 0,
ET_MRT_MAX_IDX = 7,
ET_POS_MAX_IDX = 4,
+ ET_DUAL_SRC_BLEND_MAX_IDX = 1,
ET_PARAM_MAX_IDX = 31,
ET_INVALID = 255,
@@ -777,6 +893,18 @@ enum OpSel : uint64_t {
} // namespace VOP3PEncoding
+namespace ImplicitArg {
+// Implicit kernel argument offset for code object version 5.
+enum Offset_COV5 : unsigned {
+ HOSTCALL_PTR_OFFSET = 80,
+ MULTIGRID_SYNC_ARG_OFFSET = 88,
+ HEAP_PTR_OFFSET = 96,
+ PRIVATE_BASE_OFFSET = 192,
+ SHARED_BASE_OFFSET = 196,
+ QUEUE_PTR_OFFSET = 200,
+};
+
+} // namespace ImplicitArg
} // namespace AMDGPU
#define R_00B028_SPI_SHADER_PGM_RSRC1_PS 0x00B028
@@ -911,10 +1039,12 @@ enum OpSel : uint64_t {
#define FP_DENORM_MODE_DP(x) (((x) & 0x3) << 6)
#define R_00B860_COMPUTE_TMPRING_SIZE 0x00B860
-#define S_00B860_WAVESIZE(x) (((x) & 0x1FFF) << 12)
+#define S_00B860_WAVESIZE_PreGFX11(x) (((x) & 0x1FFF) << 12)
+#define S_00B860_WAVESIZE_GFX11Plus(x) (((x) & 0x7FFF) << 12)
#define R_0286E8_SPI_TMPRING_SIZE 0x0286E8
-#define S_0286E8_WAVESIZE(x) (((x) & 0x1FFF) << 12)
+#define S_0286E8_WAVESIZE_PreGFX11(x) (((x) & 0x1FFF) << 12)
+#define S_0286E8_WAVESIZE_GFX11Plus(x) (((x) & 0x7FFF) << 12)
#define R_028B54_VGT_SHADER_STAGES_EN 0x028B54
#define S_028B54_HS_W32_EN(x) (((x) & 0x1) << 21)
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 33954e11d6c6..99aa8a60b04f 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -92,7 +92,7 @@ public:
bool tryFoldCndMask(MachineInstr &MI) const;
bool tryFoldZeroHighBits(MachineInstr &MI) const;
- void foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const;
+ bool foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const;
const MachineOperand *isClamp(const MachineInstr &MI) const;
bool tryFoldClamp(MachineInstr &MI);
@@ -146,30 +146,6 @@ static unsigned macToMad(unsigned Opc) {
return AMDGPU::INSTRUCTION_LIST_END;
}
-// Wrapper around isInlineConstant that understands special cases when
-// instruction types are replaced during operand folding.
-static bool isInlineConstantIfFolded(const SIInstrInfo *TII,
- const MachineInstr &UseMI,
- unsigned OpNo,
- const MachineOperand &OpToFold) {
- if (TII->isInlineConstant(UseMI, OpNo, OpToFold))
- return true;
-
- unsigned Opc = UseMI.getOpcode();
- unsigned NewOpc = macToMad(Opc);
- if (NewOpc != AMDGPU::INSTRUCTION_LIST_END) {
- // Special case for mac. Since this is replaced with mad when folded into
- // src2, we need to check the legality for the final instruction.
- int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
- if (static_cast<int>(OpNo) == Src2Idx) {
- const MCInstrDesc &MadDesc = TII->get(NewOpc);
- return TII->isInlineConstant(OpToFold, MadDesc.OpInfo[OpNo].OperandType);
- }
- }
-
- return false;
-}
-
// TODO: Add heuristic that the frame index might not fit in the addressing mode
// immediate offset to avoid materializing in loops.
static bool frameIndexMayFold(const SIInstrInfo *TII,
@@ -210,6 +186,8 @@ static bool updateOperand(FoldCandidate &Fold,
if (Fold.isImm()) {
if (MI->getDesc().TSFlags & SIInstrFlags::IsPacked &&
!(MI->getDesc().TSFlags & SIInstrFlags::IsMAI) &&
+ (!ST.hasDOTOpSelHazard() ||
+ !(MI->getDesc().TSFlags & SIInstrFlags::IsDOT)) &&
AMDGPU::isFoldableLiteralV216(Fold.ImmToFold,
ST.hasInv2PiInlineImm())) {
// Set op_sel/op_sel_hi on this operand or bail out if op_sel is
@@ -289,7 +267,7 @@ static bool updateOperand(FoldCandidate &Fold,
// when looking at a use.
Dst0.setReg(NewReg0);
for (unsigned I = MI->getNumOperands() - 1; I > 0; --I)
- MI->RemoveOperand(I);
+ MI->removeOperand(I);
MI->setDesc(TII.get(AMDGPU::IMPLICIT_DEF));
if (Fold.isCommuted())
@@ -490,6 +468,8 @@ static bool isUseSafeToFold(const SIInstrInfo *TII,
case AMDGPU::V_MOV_B32_e32:
case AMDGPU::V_MOV_B32_e64:
case AMDGPU::V_MOV_B64_PSEUDO:
+ case AMDGPU::V_MOV_B64_e32:
+ case AMDGPU::V_MOV_B64_e64:
// Do not fold into an indirect mov.
return !MI.hasRegisterImplicitUseOperand(AMDGPU::M0);
}
@@ -675,7 +655,9 @@ void SIFoldOperands::foldOperand(
if (TII->isFLATScratch(*UseMI) &&
AMDGPU::getNamedOperandIdx(UseMI->getOpcode(),
- AMDGPU::OpName::vaddr) != -1) {
+ AMDGPU::OpName::vaddr) != -1 &&
+ AMDGPU::getNamedOperandIdx(UseMI->getOpcode(),
+ AMDGPU::OpName::saddr) == -1) {
unsigned NewOpc = AMDGPU::getFlatScratchInstSSfromSV(UseMI->getOpcode());
UseMI->setDesc(TII->get(NewOpc));
}
@@ -739,7 +721,7 @@ void SIFoldOperands::foldOperand(
while (ImpOpI != ImpOpE) {
MachineInstr::mop_iterator Tmp = ImpOpI;
ImpOpI++;
- UseMI->RemoveOperand(UseMI->getOperandNo(Tmp));
+ UseMI->removeOperand(UseMI->getOperandNo(Tmp));
}
CopiesToReplace.push_back(UseMI);
} else {
@@ -768,7 +750,7 @@ void SIFoldOperands::foldOperand(
UseMI->setDesc(TII->get(AMDGPU::REG_SEQUENCE));
for (unsigned I = UseMI->getNumOperands() - 1; I > 0; --I)
- UseMI->RemoveOperand(I);
+ UseMI->removeOperand(I);
MachineInstrBuilder B(*MBB.getParent(), UseMI);
DenseMap<TargetInstrInfo::RegSubRegPair, Register> VGPRCopies;
@@ -871,7 +853,7 @@ void SIFoldOperands::foldOperand(
UseMI->getOperand(1).ChangeToImmediate(OpToFold.getImm());
else
UseMI->getOperand(1).ChangeToFrameIndex(OpToFold.getIndex());
- UseMI->RemoveOperand(2); // Remove exec read (or src1 for readlane)
+ UseMI->removeOperand(2); // Remove exec read (or src1 for readlane)
return;
}
@@ -890,7 +872,7 @@ void SIFoldOperands::foldOperand(
UseMI->getOperand(1).setReg(OpToFold.getReg());
UseMI->getOperand(1).setSubReg(OpToFold.getSubReg());
UseMI->getOperand(1).setIsKill(false);
- UseMI->RemoveOperand(2); // Remove exec read (or src1 for readlane)
+ UseMI->removeOperand(2); // Remove exec read (or src1 for readlane)
return;
}
}
@@ -906,6 +888,22 @@ void SIFoldOperands::foldOperand(
}
if (!FoldingImmLike) {
+ if (OpToFold.isReg() && ST->needsAlignedVGPRs()) {
+ // Don't fold if OpToFold doesn't hold an aligned register.
+ const TargetRegisterClass *RC =
+ TRI->getRegClassForReg(*MRI, OpToFold.getReg());
+ if (TRI->hasVectorRegisters(RC) && OpToFold.getSubReg()) {
+ unsigned SubReg = OpToFold.getSubReg();
+ const TargetRegisterClass *SubRC = TRI->getSubRegClass(RC, SubReg);
+ RC = TRI->getCompatibleSubRegClass(RC, SubRC, SubReg);
+ if (RC)
+ RC = SubRC;
+ }
+
+ if (!RC || !TRI->isProperlyAlignedRC(*RC))
+ return;
+ }
+
tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
// FIXME: We could try to change the instruction from 64-bit to 32-bit
@@ -1025,7 +1023,7 @@ static void stripExtraCopyOperands(MachineInstr &MI) {
Desc.getNumImplicitDefs();
for (unsigned I = MI.getNumOperands() - 1; I >= NumOps; --I)
- MI.RemoveOperand(I);
+ MI.removeOperand(I);
}
static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) {
@@ -1093,7 +1091,7 @@ static bool tryConstantFoldOp(MachineRegisterInfo &MRI, const SIInstrInfo *TII,
// Be careful to change the right operand, src0 may belong to a different
// instruction.
MI->getOperand(Src0Idx).ChangeToImmediate(NewImm);
- MI->RemoveOperand(Src1Idx);
+ MI->removeOperand(Src1Idx);
mutateCopyOp(*MI, TII->get(getMovOpc(IsSGPR)));
return true;
}
@@ -1112,11 +1110,11 @@ static bool tryConstantFoldOp(MachineRegisterInfo &MRI, const SIInstrInfo *TII,
Opc == AMDGPU::S_OR_B32) {
if (Src1Val == 0) {
// y = or x, 0 => y = copy x
- MI->RemoveOperand(Src1Idx);
+ MI->removeOperand(Src1Idx);
mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
} else if (Src1Val == -1) {
// y = or x, -1 => y = v_mov_b32 -1
- MI->RemoveOperand(Src1Idx);
+ MI->removeOperand(Src1Idx);
mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_OR_B32)));
} else
return false;
@@ -1129,11 +1127,11 @@ static bool tryConstantFoldOp(MachineRegisterInfo &MRI, const SIInstrInfo *TII,
MI->getOpcode() == AMDGPU::S_AND_B32) {
if (Src1Val == 0) {
// y = and x, 0 => y = v_mov_b32 0
- MI->RemoveOperand(Src0Idx);
+ MI->removeOperand(Src0Idx);
mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_AND_B32)));
} else if (Src1Val == -1) {
// y = and x, -1 => y = copy x
- MI->RemoveOperand(Src1Idx);
+ MI->removeOperand(Src1Idx);
mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
stripExtraCopyOperands(*MI);
} else
@@ -1147,7 +1145,7 @@ static bool tryConstantFoldOp(MachineRegisterInfo &MRI, const SIInstrInfo *TII,
MI->getOpcode() == AMDGPU::S_XOR_B32) {
if (Src1Val == 0) {
// y = xor x, 0 => y = copy x
- MI->RemoveOperand(Src1Idx);
+ MI->removeOperand(Src1Idx);
mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
return true;
}
@@ -1185,12 +1183,12 @@ bool SIFoldOperands::tryFoldCndMask(MachineInstr &MI) const {
TII->get(Src0->isReg() ? (unsigned)AMDGPU::COPY : getMovOpc(false));
int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
if (Src2Idx != -1)
- MI.RemoveOperand(Src2Idx);
- MI.RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1));
+ MI.removeOperand(Src2Idx);
+ MI.removeOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1));
if (Src1ModIdx != -1)
- MI.RemoveOperand(Src1ModIdx);
+ MI.removeOperand(Src1ModIdx);
if (Src0ModIdx != -1)
- MI.RemoveOperand(Src0ModIdx);
+ MI.removeOperand(Src0ModIdx);
mutateCopyOp(MI, NewDesc);
LLVM_DEBUG(dbgs() << MI);
return true;
@@ -1217,7 +1215,7 @@ bool SIFoldOperands::tryFoldZeroHighBits(MachineInstr &MI) const {
return false;
}
-void SIFoldOperands::foldInstOperand(MachineInstr &MI,
+bool SIFoldOperands::foldInstOperand(MachineInstr &MI,
MachineOperand &OpToFold) const {
// We need mutate the operands of new mov instructions to add implicit
// uses of EXEC, but adding them invalidates the use_iterator, so defer
@@ -1225,6 +1223,7 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI,
SmallVector<MachineInstr *, 4> CopiesToReplace;
SmallVector<FoldCandidate, 4> FoldList;
MachineOperand &Dst = MI.getOperand(0);
+ bool Changed = false;
if (OpToFold.isImm()) {
for (auto &UseMI :
@@ -1237,66 +1236,25 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI,
// We may also encounter cases where one or both operands are
// immediates materialized into a register, which would ordinarily not
// be folded due to multiple uses or operand constraints.
- if (tryConstantFoldOp(*MRI, TII, &UseMI))
+ if (tryConstantFoldOp(*MRI, TII, &UseMI)) {
LLVM_DEBUG(dbgs() << "Constant folded " << UseMI);
- }
- }
-
- bool FoldingImm = OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal();
- if (FoldingImm) {
- unsigned NumLiteralUses = 0;
- MachineOperand *NonInlineUse = nullptr;
- int NonInlineUseOpNo = -1;
-
- for (auto &Use :
- make_early_inc_range(MRI->use_nodbg_operands(Dst.getReg()))) {
- MachineInstr *UseMI = Use.getParent();
- unsigned OpNo = UseMI->getOperandNo(&Use);
-
- // Try to fold any inline immediate uses, and then only fold other
- // constants if they have one use.
- //
- // The legality of the inline immediate must be checked based on the use
- // operand, not the defining instruction, because 32-bit instructions
- // with 32-bit inline immediate sources may be used to materialize
- // constants used in 16-bit operands.
- //
- // e.g. it is unsafe to fold:
- // s_mov_b32 s0, 1.0 // materializes 0x3f800000
- // v_add_f16 v0, v1, s0 // 1.0 f16 inline immediate sees 0x00003c00
-
- // Folding immediates with more than one use will increase program size.
- // FIXME: This will also reduce register usage, which may be better
- // in some cases. A better heuristic is needed.
- if (isInlineConstantIfFolded(TII, *UseMI, OpNo, OpToFold)) {
- foldOperand(OpToFold, UseMI, OpNo, FoldList, CopiesToReplace);
- } else if (frameIndexMayFold(TII, *UseMI, OpNo, OpToFold)) {
- foldOperand(OpToFold, UseMI, OpNo, FoldList, CopiesToReplace);
- } else {
- if (++NumLiteralUses == 1) {
- NonInlineUse = &Use;
- NonInlineUseOpNo = OpNo;
- }
+ Changed = true;
}
}
+ }
- if (NumLiteralUses == 1) {
- MachineInstr *UseMI = NonInlineUse->getParent();
- foldOperand(OpToFold, UseMI, NonInlineUseOpNo, FoldList, CopiesToReplace);
- }
- } else {
- // Folding register.
- SmallVector <MachineOperand *, 4> UsesToProcess;
- for (auto &Use : MRI->use_nodbg_operands(Dst.getReg()))
- UsesToProcess.push_back(&Use);
- for (auto U : UsesToProcess) {
- MachineInstr *UseMI = U->getParent();
-
- foldOperand(OpToFold, UseMI, UseMI->getOperandNo(U),
- FoldList, CopiesToReplace);
- }
+ SmallVector<MachineOperand *, 4> UsesToProcess;
+ for (auto &Use : MRI->use_nodbg_operands(Dst.getReg()))
+ UsesToProcess.push_back(&Use);
+ for (auto U : UsesToProcess) {
+ MachineInstr *UseMI = U->getParent();
+ foldOperand(OpToFold, UseMI, UseMI->getOperandNo(U), FoldList,
+ CopiesToReplace);
}
+ if (CopiesToReplace.empty() && FoldList.empty())
+ return Changed;
+
MachineFunction *MF = MI.getParent()->getParent();
// Make sure we add EXEC uses to any new v_mov instructions created.
for (MachineInstr *Copy : CopiesToReplace)
@@ -1328,6 +1286,7 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI,
TII->commuteInstruction(*Fold.UseMI, false);
}
}
+ return true;
}
// Clamp patterns are canonically selected to v_max_* instructions, so only
@@ -1593,8 +1552,9 @@ bool SIFoldOperands::tryFoldRegSequence(MachineInstr &MI) {
unsigned OpIdx = Op - &UseMI->getOperand(0);
const MCInstrDesc &InstDesc = UseMI->getDesc();
- if (!TRI->isVectorSuperClass(
- TRI->getRegClass(InstDesc.OpInfo[OpIdx].RegClass)))
+ const TargetRegisterClass *OpRC =
+ TII->getRegClass(InstDesc, OpIdx, TRI, *MI.getMF());
+ if (!OpRC || !TRI->isVectorSuperClass(OpRC))
return false;
const auto *NewDstRC = TRI->getEquivalentAGPRClass(MRI->getRegClass(Reg));
@@ -1751,22 +1711,31 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
bool IsIEEEMode = MFI->getMode().IEEE;
bool HasNSZ = MFI->hasNoSignedZerosFPMath();
+ bool Changed = false;
for (MachineBasicBlock *MBB : depth_first(&MF)) {
MachineOperand *CurrentKnownM0Val = nullptr;
for (auto &MI : make_early_inc_range(*MBB)) {
- tryFoldCndMask(MI);
+ Changed |= tryFoldCndMask(MI);
- if (tryFoldZeroHighBits(MI))
+ if (tryFoldZeroHighBits(MI)) {
+ Changed = true;
continue;
+ }
- if (MI.isRegSequence() && tryFoldRegSequence(MI))
+ if (MI.isRegSequence() && tryFoldRegSequence(MI)) {
+ Changed = true;
continue;
+ }
- if (MI.isPHI() && tryFoldLCSSAPhi(MI))
+ if (MI.isPHI() && tryFoldLCSSAPhi(MI)) {
+ Changed = true;
continue;
+ }
- if (MI.mayLoad() && tryFoldLoad(MI))
+ if (MI.mayLoad() && tryFoldLoad(MI)) {
+ Changed = true;
continue;
+ }
if (!TII->isFoldableCopy(MI)) {
// Saw an unknown clobber of m0, so we no longer know what it is.
@@ -1777,7 +1746,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
// instruction, and not the omod multiply.
if (IsIEEEMode || (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) ||
!tryFoldOMod(MI))
- tryFoldClamp(MI);
+ Changed |= tryFoldClamp(MI);
continue;
}
@@ -1788,6 +1757,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
MachineOperand &NewM0Val = MI.getOperand(1);
if (CurrentKnownM0Val && CurrentKnownM0Val->isIdenticalTo(NewM0Val)) {
MI.eraseFromParent();
+ Changed = true;
continue;
}
@@ -1817,7 +1787,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
if (!MI.getOperand(0).getReg().isVirtual())
continue;
- foldInstOperand(MI, OpToFold);
+ Changed |= foldInstOperand(MI, OpToFold);
// If we managed to fold all uses of this copy then we might as well
// delete it now.
@@ -1829,6 +1799,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
auto &SrcOp = InstToErase->getOperand(1);
auto SrcReg = SrcOp.isReg() ? SrcOp.getReg() : Register();
InstToErase->eraseFromParent();
+ Changed = true;
InstToErase = nullptr;
if (!SrcReg || SrcReg.isPhysical())
break;
@@ -1837,9 +1808,11 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
break;
}
if (InstToErase && InstToErase->isRegSequence() &&
- MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg()))
+ MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) {
InstToErase->eraseFromParent();
+ Changed = true;
+ }
}
}
- return true;
+ return Changed;
}
diff --git a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
index 80ee7a00252a..d7ca7f36284b 100644
--- a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
@@ -241,7 +241,7 @@ void SIFormMemoryClauses::collectRegUses(const MachineInstr &MI,
}
// Check register def/use conflicts, occupancy limits and collect def/use maps.
-// Return true if instruction can be bundled with previous. It it cannot
+// Return true if instruction can be bundled with previous. If it cannot
// def/use maps are not updated.
bool SIFormMemoryClauses::processRegUses(const MachineInstr &MI,
RegUse &Defs, RegUse &Uses,
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 6078f4a0577a..a57e81eb4e4a 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -749,7 +749,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
return;
}
- const MachineFrameInfo &MFI = MF.getFrameInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
@@ -789,19 +789,13 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
*Reg.FI);
}
- // VGPRs used for Whole Wave Mode
- for (const auto &Reg : FuncInfo->WWMReservedRegs) {
- auto VGPR = Reg.first;
- auto FI = Reg.second;
- if (!FI)
- continue;
-
+ for (auto ReservedWWM : FuncInfo->wwmAllocation()) {
if (!ScratchExecCopy)
ScratchExecCopy =
buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ true);
- buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, VGPR,
- *FI);
+ buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
+ std::get<0>(ReservedWWM), std::get<1>(ReservedWWM));
}
if (ScratchExecCopy) {
@@ -813,27 +807,8 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
LiveRegs.addReg(ScratchExecCopy);
}
- if (FPSaveIndex && spilledToMemory(MF, *FPSaveIndex)) {
- const int FramePtrFI = *FPSaveIndex;
- assert(!MFI.isDeadObjectIndex(FramePtrFI));
-
- initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
-
- MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
- MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
- if (!TmpVGPR)
- report_fatal_error("failed to find free scratch register");
-
- BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
- .addReg(FramePtrReg);
-
- buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, TmpVGPR,
- FramePtrFI);
- }
-
- if (BPSaveIndex && spilledToMemory(MF, *BPSaveIndex)) {
- const int BasePtrFI = *BPSaveIndex;
- assert(!MFI.isDeadObjectIndex(BasePtrFI));
+ auto SaveSGPRToMemory = [&](Register Reg, const int FI) {
+ assert(!MFI.isDeadObjectIndex(FI));
initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
@@ -843,44 +818,31 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
report_fatal_error("failed to find free scratch register");
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
- .addReg(BasePtrReg);
+ .addReg(Reg);
buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, TmpVGPR,
- BasePtrFI);
- }
+ FI);
+ };
- // In this case, spill the FP to a reserved VGPR.
- if (FPSaveIndex && !spilledToMemory(MF, *FPSaveIndex)) {
- const int FramePtrFI = *FPSaveIndex;
- assert(!MFI.isDeadObjectIndex(FramePtrFI));
+ auto SaveSGPRToVGPRLane = [&](Register Reg, const int FI) {
+ assert(!MFI.isDeadObjectIndex(FI));
- assert(MFI.getStackID(FramePtrFI) == TargetStackID::SGPRSpill);
- ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
- FuncInfo->getSGPRToVGPRSpills(FramePtrFI);
+ assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
+ ArrayRef<SIRegisterInfo::SpilledReg> Spill =
+ FuncInfo->getSGPRToVGPRSpills(FI);
assert(Spill.size() == 1);
- // Save FP before setting it up.
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR)
- .addReg(FramePtrReg)
+ .addReg(Reg)
.addImm(Spill[0].Lane)
.addReg(Spill[0].VGPR, RegState::Undef);
- }
+ };
- // In this case, spill the BP to a reserved VGPR.
- if (BPSaveIndex && !spilledToMemory(MF, *BPSaveIndex)) {
- const int BasePtrFI = *BPSaveIndex;
- assert(!MFI.isDeadObjectIndex(BasePtrFI));
-
- assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill);
- ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
- FuncInfo->getSGPRToVGPRSpills(BasePtrFI);
- assert(Spill.size() == 1);
-
- // Save BP before setting it up.
- BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR)
- .addReg(BasePtrReg)
- .addImm(Spill[0].Lane)
- .addReg(Spill[0].VGPR, RegState::Undef);
+ if (FPSaveIndex) {
+ if (spilledToMemory(MF, *FPSaveIndex))
+ SaveSGPRToMemory(FramePtrReg, *FPSaveIndex);
+ else
+ SaveSGPRToVGPRLane(FramePtrReg, *FPSaveIndex);
}
// Emit the copy if we need an FP, and are using a free SGPR to save it.
@@ -891,6 +853,13 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
.setMIFlag(MachineInstr::FrameSetup);
}
+ if (BPSaveIndex) {
+ if (spilledToMemory(MF, *BPSaveIndex))
+ SaveSGPRToMemory(BasePtrReg, *BPSaveIndex);
+ else
+ SaveSGPRToVGPRLane(BasePtrReg, *BPSaveIndex);
+ }
+
// Emit the copy if we need a BP, and are using a free SGPR to save it.
if (FuncInfo->SGPRForBPSaveRestoreCopy) {
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY),
@@ -1034,56 +1003,44 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
.setMIFlag(MachineInstr::FrameDestroy);
}
+ auto RestoreSGPRFromMemory = [&](Register Reg, const int FI) {
+ initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
+ MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
+ MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
+ if (!TmpVGPR)
+ report_fatal_error("failed to find free scratch register");
+ buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, TmpVGPR,
+ FI);
+ BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), Reg)
+ .addReg(TmpVGPR, RegState::Kill);
+ };
+
+ auto RestoreSGPRFromVGPRLane = [&](Register Reg, const int FI) {
+ assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
+ ArrayRef<SIRegisterInfo::SpilledReg> Spill =
+ FuncInfo->getSGPRToVGPRSpills(FI);
+ assert(Spill.size() == 1);
+ BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), Reg)
+ .addReg(Spill[0].VGPR)
+ .addImm(Spill[0].Lane);
+ };
+
if (FPSaveIndex) {
const int FramePtrFI = *FPSaveIndex;
assert(!MFI.isDeadObjectIndex(FramePtrFI));
- if (spilledToMemory(MF, FramePtrFI)) {
- initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
-
- MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
- MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
- if (!TmpVGPR)
- report_fatal_error("failed to find free scratch register");
- buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
- TmpVGPR, FramePtrFI);
- BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), FramePtrReg)
- .addReg(TmpVGPR, RegState::Kill);
- } else {
- // Reload from VGPR spill.
- assert(MFI.getStackID(FramePtrFI) == TargetStackID::SGPRSpill);
- ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
- FuncInfo->getSGPRToVGPRSpills(FramePtrFI);
- assert(Spill.size() == 1);
- BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), FramePtrReg)
- .addReg(Spill[0].VGPR)
- .addImm(Spill[0].Lane);
- }
+ if (spilledToMemory(MF, FramePtrFI))
+ RestoreSGPRFromMemory(FramePtrReg, FramePtrFI);
+ else
+ RestoreSGPRFromVGPRLane(FramePtrReg, FramePtrFI);
}
if (BPSaveIndex) {
const int BasePtrFI = *BPSaveIndex;
assert(!MFI.isDeadObjectIndex(BasePtrFI));
- if (spilledToMemory(MF, BasePtrFI)) {
- initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
-
- MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
- MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
- if (!TmpVGPR)
- report_fatal_error("failed to find free scratch register");
- buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
- TmpVGPR, BasePtrFI);
- BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), BasePtrReg)
- .addReg(TmpVGPR, RegState::Kill);
- } else {
- // Reload from VGPR spill.
- assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill);
- ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
- FuncInfo->getSGPRToVGPRSpills(BasePtrFI);
- assert(Spill.size() == 1);
- BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), BasePtrReg)
- .addReg(Spill[0].VGPR)
- .addImm(Spill[0].Lane);
- }
+ if (spilledToMemory(MF, BasePtrFI))
+ RestoreSGPRFromMemory(BasePtrReg, BasePtrFI);
+ else
+ RestoreSGPRFromVGPRLane(BasePtrReg, BasePtrFI);
}
Register ScratchExecCopy;
@@ -1100,18 +1057,13 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
Reg.VGPR, *Reg.FI);
}
- for (const auto &Reg : FuncInfo->WWMReservedRegs) {
- auto VGPR = Reg.first;
- auto FI = Reg.second;
- if (!FI)
- continue;
-
+ for (auto ReservedWWM : FuncInfo->wwmAllocation()) {
if (!ScratchExecCopy)
ScratchExecCopy =
buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false);
- buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, VGPR,
- *FI);
+ buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
+ std::get<0>(ReservedWWM), std::get<1>(ReservedWWM));
}
if (ScratchExecCopy) {
@@ -1161,6 +1113,11 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
MachineRegisterInfo &MRI = MF.getRegInfo();
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
+ if (!FuncInfo->isEntryFunction()) {
+ // Spill VGPRs used for Whole Wave Mode
+ FuncInfo->allocateWWMReservedSpillSlots(MFI, *TRI);
+ }
+
const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()
&& EnableSpillVGPRToAGPR;
@@ -1200,7 +1157,7 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
}
}
- // Stack slot coloring may assign different objets to the same stack slot.
+ // Stack slot coloring may assign different objects to the same stack slot.
// If not, then the VGPR to AGPR spill slot is dead.
for (unsigned FI : SpillFIs.set_bits())
if (!NonVGPRSpillFIs.test(FI))
@@ -1229,7 +1186,11 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
}
}
- FuncInfo->removeDeadFrameIndices(MFI);
+ // At this point we've already allocated all spilled SGPRs to VGPRs if we
+ // can. Any remaining SGPR spills will go to memory, so move them back to the
+ // default stack.
+ bool HaveSGPRToVMemSpill =
+ FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ true);
assert(allSGPRSpillsAreDead(MF) &&
"SGPR spill should have been removed in SILowerSGPRSpills");
@@ -1241,6 +1202,39 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
// Add an emergency spill slot
RS->addScavengingFrameIndex(FuncInfo->getScavengeFI(MFI, *TRI));
+
+ // If we are spilling SGPRs to memory with a large frame, we may need a
+ // second VGPR emergency frame index.
+ if (HaveSGPRToVMemSpill &&
+ allocateScavengingFrameIndexesNearIncomingSP(MF)) {
+ RS->addScavengingFrameIndex(MFI.CreateStackObject(4, Align(4), false));
+ }
+ }
+}
+
+void SIFrameLowering::processFunctionBeforeFrameIndicesReplaced(
+ MachineFunction &MF, RegScavenger *RS) const {
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
+
+ if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
+ // On gfx908, we had initially reserved highest available VGPR for AGPR
+ // copy. Now since we are done with RA, check if there exist an unused VGPR
+ // which is lower than the eariler reserved VGPR before RA. If one exist,
+ // use it for AGPR copy instead of one reserved before RA.
+ Register VGPRForAGPRCopy = FuncInfo->getVGPRForAGPRCopy();
+ Register UnusedLowVGPR =
+ TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
+ if (UnusedLowVGPR && (TRI->getHWRegIndex(UnusedLowVGPR) <
+ TRI->getHWRegIndex(VGPRForAGPRCopy))) {
+ // Call to setVGPRForAGPRCopy() should happen first before calling
+ // freezeReservedRegs() so that getReservedRegs() can reserve this newly
+ // identified VGPR (for AGPR copy).
+ FuncInfo->setVGPRForAGPRCopy(UnusedLowVGPR);
+ MRI.freezeReservedRegs(MF);
+ }
}
}
@@ -1333,6 +1327,20 @@ void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF,
// FP will be specially managed like SP.
if (WillHaveFP || hasFP(MF))
SavedRegs.reset(MFI->getFrameOffsetReg());
+
+ // Return address use with return instruction is hidden through the SI_RETURN
+ // pseudo. Given that and since the IPRA computes actual register usage and
+ // does not use CSR list, the clobbering of return address by function calls
+ // (D117243) or otherwise (D120922) is ignored/not seen by the IPRA's register
+ // usage collection. This will ensure save/restore of return address happens
+ // in those scenarios.
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ Register RetAddrReg = TRI->getReturnAddressReg(MF);
+ if (!MFI->isEntryFunction() &&
+ (FrameInfo.hasCalls() || MRI.isPhysRegModified(RetAddrReg))) {
+ SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub0));
+ SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub1));
+ }
}
bool SIFrameLowering::assignCalleeSavedSpillSlots(
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/llvm/lib/Target/AMDGPU/SIFrameLowering.h
index 7949dcfa6632..79154d494e91 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.h
@@ -47,6 +47,9 @@ public:
MachineFunction &MF,
RegScavenger *RS = nullptr) const override;
+ void processFunctionBeforeFrameIndicesReplaced(
+ MachineFunction &MF, RegScavenger *RS = nullptr) const override;
+
MachineBasicBlock::iterator
eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index e2f4a0896bc3..094d5cd58673 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -17,6 +17,7 @@
#include "AMDGPUTargetMachine.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
+#include "llvm/ADT/FloatingPointMode.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
@@ -25,6 +26,7 @@
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
@@ -136,6 +138,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::v4f16, &AMDGPU::SReg_64RegClass);
addRegisterClass(MVT::v8i16, &AMDGPU::SGPR_128RegClass);
addRegisterClass(MVT::v8f16, &AMDGPU::SGPR_128RegClass);
+ addRegisterClass(MVT::v16i16, &AMDGPU::SGPR_256RegClass);
+ addRegisterClass(MVT::v16f16, &AMDGPU::SGPR_256RegClass);
}
addRegisterClass(MVT::v32i32, &AMDGPU::VReg_1024RegClass);
@@ -151,27 +155,17 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setBooleanVectorContents(ZeroOrOneBooleanContent);
// We need to custom lower vector stores from local memory
- setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
- setOperationAction(ISD::LOAD, MVT::v3i32, Custom);
- setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
- setOperationAction(ISD::LOAD, MVT::v5i32, Custom);
- setOperationAction(ISD::LOAD, MVT::v6i32, Custom);
- setOperationAction(ISD::LOAD, MVT::v7i32, Custom);
- setOperationAction(ISD::LOAD, MVT::v8i32, Custom);
- setOperationAction(ISD::LOAD, MVT::v16i32, Custom);
- setOperationAction(ISD::LOAD, MVT::i1, Custom);
- setOperationAction(ISD::LOAD, MVT::v32i32, Custom);
-
- setOperationAction(ISD::STORE, MVT::v2i32, Custom);
- setOperationAction(ISD::STORE, MVT::v3i32, Custom);
- setOperationAction(ISD::STORE, MVT::v4i32, Custom);
- setOperationAction(ISD::STORE, MVT::v5i32, Custom);
- setOperationAction(ISD::STORE, MVT::v6i32, Custom);
- setOperationAction(ISD::STORE, MVT::v7i32, Custom);
- setOperationAction(ISD::STORE, MVT::v8i32, Custom);
- setOperationAction(ISD::STORE, MVT::v16i32, Custom);
- setOperationAction(ISD::STORE, MVT::i1, Custom);
- setOperationAction(ISD::STORE, MVT::v32i32, Custom);
+ setOperationAction(ISD::LOAD,
+ {MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32,
+ MVT::v6i32, MVT::v7i32, MVT::v8i32, MVT::v16i32, MVT::i1,
+ MVT::v32i32},
+ Custom);
+
+ setOperationAction(ISD::STORE,
+ {MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32,
+ MVT::v6i32, MVT::v7i32, MVT::v8i32, MVT::v16i32, MVT::i1,
+ MVT::v32i32},
+ Custom);
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
setTruncStoreAction(MVT::v3i32, MVT::v3i16, Expand);
@@ -198,81 +192,57 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setTruncStoreAction(MVT::v8i64, MVT::v8i32, Expand);
setTruncStoreAction(MVT::v16i64, MVT::v16i32, Expand);
- setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
- setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
+ setOperationAction(ISD::GlobalAddress, {MVT::i32, MVT::i64}, Custom);
setOperationAction(ISD::SELECT, MVT::i1, Promote);
setOperationAction(ISD::SELECT, MVT::i64, Custom);
setOperationAction(ISD::SELECT, MVT::f64, Promote);
AddPromotedToType(ISD::SELECT, MVT::f64, MVT::i64);
- setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
- setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
- setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
- setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
- setOperationAction(ISD::SELECT_CC, MVT::i1, Expand);
+ setOperationAction(ISD::SELECT_CC,
+ {MVT::f32, MVT::i32, MVT::i64, MVT::f64, MVT::i1}, Expand);
setOperationAction(ISD::SETCC, MVT::i1, Promote);
- setOperationAction(ISD::SETCC, MVT::v2i1, Expand);
- setOperationAction(ISD::SETCC, MVT::v4i1, Expand);
+ setOperationAction(ISD::SETCC, {MVT::v2i1, MVT::v4i1}, Expand);
AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32);
- setOperationAction(ISD::TRUNCATE, MVT::v2i32, Expand);
- setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand);
- setOperationAction(ISD::TRUNCATE, MVT::v3i32, Expand);
- setOperationAction(ISD::FP_ROUND, MVT::v3f32, Expand);
- setOperationAction(ISD::TRUNCATE, MVT::v4i32, Expand);
- setOperationAction(ISD::FP_ROUND, MVT::v4f32, Expand);
- setOperationAction(ISD::TRUNCATE, MVT::v5i32, Expand);
- setOperationAction(ISD::FP_ROUND, MVT::v5f32, Expand);
- setOperationAction(ISD::TRUNCATE, MVT::v6i32, Expand);
- setOperationAction(ISD::FP_ROUND, MVT::v6f32, Expand);
- setOperationAction(ISD::TRUNCATE, MVT::v7i32, Expand);
- setOperationAction(ISD::FP_ROUND, MVT::v7f32, Expand);
- setOperationAction(ISD::TRUNCATE, MVT::v8i32, Expand);
- setOperationAction(ISD::FP_ROUND, MVT::v8f32, Expand);
- setOperationAction(ISD::TRUNCATE, MVT::v16i32, Expand);
- setOperationAction(ISD::FP_ROUND, MVT::v16f32, Expand);
-
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Custom);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Custom);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Custom);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v3i16, Custom);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Custom);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Custom);
+ setOperationAction(ISD::TRUNCATE,
+ {MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32,
+ MVT::v6i32, MVT::v7i32, MVT::v8i32, MVT::v16i32},
+ Expand);
+ setOperationAction(ISD::FP_ROUND,
+ {MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32,
+ MVT::v6f32, MVT::v7f32, MVT::v8f32, MVT::v16f32},
+ Expand);
+
+ setOperationAction(ISD::SIGN_EXTEND_INREG,
+ {MVT::v2i1, MVT::v4i1, MVT::v2i8, MVT::v4i8, MVT::v2i16,
+ MVT::v3i16, MVT::v4i16, MVT::Other},
+ Custom);
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
- setOperationAction(ISD::BR_CC, MVT::i1, Expand);
- setOperationAction(ISD::BR_CC, MVT::i32, Expand);
- setOperationAction(ISD::BR_CC, MVT::i64, Expand);
- setOperationAction(ISD::BR_CC, MVT::f32, Expand);
- setOperationAction(ISD::BR_CC, MVT::f64, Expand);
+ setOperationAction(ISD::BR_CC,
+ {MVT::i1, MVT::i32, MVT::i64, MVT::f32, MVT::f64}, Expand);
- setOperationAction(ISD::UADDO, MVT::i32, Legal);
- setOperationAction(ISD::USUBO, MVT::i32, Legal);
+ setOperationAction({ISD::UADDO, ISD::USUBO}, MVT::i32, Legal);
- setOperationAction(ISD::ADDCARRY, MVT::i32, Legal);
- setOperationAction(ISD::SUBCARRY, MVT::i32, Legal);
+ setOperationAction({ISD::ADDCARRY, ISD::SUBCARRY}, MVT::i32, Legal);
- setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
- setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
- setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
+ setOperationAction({ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS}, MVT::i64,
+ Expand);
#if 0
- setOperationAction(ISD::ADDCARRY, MVT::i64, Legal);
- setOperationAction(ISD::SUBCARRY, MVT::i64, Legal);
+ setOperationAction({ISD::ADDCARRY, ISD::SUBCARRY}, MVT::i64, Legal);
#endif
// We only support LOAD/STORE and vector manipulation ops for vectors
// with > 4 elements.
- for (MVT VT : { MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32,
- MVT::v2i64, MVT::v2f64, MVT::v4i16, MVT::v4f16,
- MVT::v3i64, MVT::v3f64, MVT::v6i32, MVT::v6f32,
- MVT::v4i64, MVT::v4f64, MVT::v8i64, MVT::v8f64,
- MVT::v8i16, MVT::v8f16, MVT::v16i64, MVT::v16f64,
- MVT::v32i32, MVT::v32f32 }) {
+ for (MVT VT :
+ {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32, MVT::v2i64,
+ MVT::v2f64, MVT::v4i16, MVT::v4f16, MVT::v3i64, MVT::v3f64,
+ MVT::v6i32, MVT::v6f32, MVT::v4i64, MVT::v4f64, MVT::v8i64,
+ MVT::v8f64, MVT::v8i16, MVT::v8f16, MVT::v16i16, MVT::v16f16,
+ MVT::v16i64, MVT::v16f64, MVT::v32i32, MVT::v32f32}) {
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
switch (Op) {
case ISD::LOAD:
@@ -372,94 +342,63 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
AddPromotedToType(ISD::SCALAR_TO_VECTOR, Vec64, MVT::v32i32);
}
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i32, Expand);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Expand);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i32, Expand);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16f32, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE,
+ {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32},
+ Expand);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v4f16, Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, {MVT::v4f16, MVT::v4i16}, Custom);
// Avoid stack access for these.
// TODO: Generalize to more vector types.
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i16, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f16, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i16, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f16, Custom);
-
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i8, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i8, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i8, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i8, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i8, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i8, Custom);
-
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i16, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f16, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i16, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f16, Custom);
+ setOperationAction({ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT},
+ {MVT::v2i16, MVT::v2f16, MVT::v2i8, MVT::v4i8, MVT::v8i8,
+ MVT::v4i16, MVT::v4f16, MVT::v16i16, MVT::v16f16},
+ Custom);
// Deal with vec3 vector operations when widened to vec4.
- setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v3i32, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v3f32, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i32, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4f32, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR,
+ {MVT::v3i32, MVT::v3f32, MVT::v4i32, MVT::v4f32}, Custom);
// Deal with vec5/6/7 vector operations when widened to vec8.
- setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v5i32, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v5f32, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v6i32, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v6f32, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v7i32, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v7f32, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i32, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8f32, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR,
+ {MVT::v5i32, MVT::v5f32, MVT::v6i32, MVT::v6f32,
+ MVT::v7i32, MVT::v7f32, MVT::v8i32, MVT::v8f32},
+ Custom);
// BUFFER/FLAT_ATOMIC_CMP_SWAP on GCN GPUs needs input marshalling,
// and output demarshalling
- setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
- setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, {MVT::i32, MVT::i64}, Custom);
// We can't return success/failure, only the old value,
// let LLVM add the comparison
- setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, {MVT::i32, MVT::i64},
+ Expand);
- if (Subtarget->hasFlatAddressSpace()) {
- setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
- setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);
- }
+ if (Subtarget->hasFlatAddressSpace())
+ setOperationAction(ISD::ADDRSPACECAST, {MVT::i32, MVT::i64}, Custom);
- setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
- setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
+ setOperationAction(ISD::BITREVERSE, {MVT::i32, MVT::i64}, Legal);
// FIXME: This should be narrowed to i32, but that only happens if i64 is
// illegal.
// FIXME: Should lower sub-i32 bswaps to bit-ops without v_perm_b32.
- setOperationAction(ISD::BSWAP, MVT::i64, Legal);
- setOperationAction(ISD::BSWAP, MVT::i32, Legal);
+ setOperationAction(ISD::BSWAP, {MVT::i64, MVT::i32}, Legal);
// On SI this is s_memtime and s_memrealtime on VI.
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
- setOperationAction(ISD::TRAP, MVT::Other, Custom);
- setOperationAction(ISD::DEBUGTRAP, MVT::Other, Custom);
+ setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Custom);
if (Subtarget->has16BitInsts()) {
- setOperationAction(ISD::FPOW, MVT::f16, Promote);
- setOperationAction(ISD::FPOWI, MVT::f16, Promote);
- setOperationAction(ISD::FLOG, MVT::f16, Custom);
- setOperationAction(ISD::FEXP, MVT::f16, Custom);
- setOperationAction(ISD::FLOG10, MVT::f16, Custom);
+ setOperationAction({ISD::FPOW, ISD::FPOWI}, MVT::f16, Promote);
+ setOperationAction({ISD::FLOG, ISD::FEXP, ISD::FLOG10}, MVT::f16, Custom);
}
if (Subtarget->hasMadMacF32Insts())
setOperationAction(ISD::FMAD, MVT::f32, Legal);
- if (!Subtarget->hasBFI()) {
+ if (!Subtarget->hasBFI())
// fcopysign can be done in a single instruction with BFI.
- setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
- setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
- }
+ setOperationAction(ISD::FCOPYSIGN, {MVT::f32, MVT::f64}, Expand);
if (!Subtarget->hasBCNT(32))
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
@@ -467,15 +406,11 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
if (!Subtarget->hasBCNT(64))
setOperationAction(ISD::CTPOP, MVT::i64, Expand);
- if (Subtarget->hasFFBH()) {
- setOperationAction(ISD::CTLZ, MVT::i32, Custom);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom);
- }
+ if (Subtarget->hasFFBH())
+ setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom);
- if (Subtarget->hasFFBL()) {
- setOperationAction(ISD::CTTZ, MVT::i32, Custom);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom);
- }
+ if (Subtarget->hasFFBL())
+ setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom);
// We only really have 32-bit BFE instructions (and 16-bit on VI).
//
@@ -489,84 +424,48 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setHasExtractBitsInsn(true);
// Clamp modifier on add/sub
- if (Subtarget->hasIntClamp()) {
- setOperationAction(ISD::UADDSAT, MVT::i32, Legal);
- setOperationAction(ISD::USUBSAT, MVT::i32, Legal);
- }
+ if (Subtarget->hasIntClamp())
+ setOperationAction({ISD::UADDSAT, ISD::USUBSAT}, MVT::i32, Legal);
- if (Subtarget->hasAddNoCarry()) {
- setOperationAction(ISD::SADDSAT, MVT::i16, Legal);
- setOperationAction(ISD::SSUBSAT, MVT::i16, Legal);
- setOperationAction(ISD::SADDSAT, MVT::i32, Legal);
- setOperationAction(ISD::SSUBSAT, MVT::i32, Legal);
- }
-
- setOperationAction(ISD::FMINNUM, MVT::f32, Custom);
- setOperationAction(ISD::FMAXNUM, MVT::f32, Custom);
- setOperationAction(ISD::FMINNUM, MVT::f64, Custom);
- setOperationAction(ISD::FMAXNUM, MVT::f64, Custom);
+ if (Subtarget->hasAddNoCarry())
+ setOperationAction({ISD::SADDSAT, ISD::SSUBSAT}, {MVT::i16, MVT::i32},
+ Legal);
+ setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, {MVT::f32, MVT::f64},
+ Custom);
// These are really only legal for ieee_mode functions. We should be avoiding
// them for functions that don't have ieee_mode enabled, so just say they are
// legal.
- setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
- setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
- setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
- setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
-
+ setOperationAction({ISD::FMINNUM_IEEE, ISD::FMAXNUM_IEEE},
+ {MVT::f32, MVT::f64}, Legal);
- if (Subtarget->haveRoundOpsF64()) {
- setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
- setOperationAction(ISD::FCEIL, MVT::f64, Legal);
- setOperationAction(ISD::FRINT, MVT::f64, Legal);
- } else {
- setOperationAction(ISD::FCEIL, MVT::f64, Custom);
- setOperationAction(ISD::FTRUNC, MVT::f64, Custom);
- setOperationAction(ISD::FRINT, MVT::f64, Custom);
- setOperationAction(ISD::FFLOOR, MVT::f64, Custom);
- }
+ if (Subtarget->haveRoundOpsF64())
+ setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FRINT}, MVT::f64, Legal);
+ else
+ setOperationAction({ISD::FCEIL, ISD::FTRUNC, ISD::FRINT, ISD::FFLOOR},
+ MVT::f64, Custom);
setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
- setOperationAction(ISD::FSIN, MVT::f32, Custom);
- setOperationAction(ISD::FCOS, MVT::f32, Custom);
- setOperationAction(ISD::FDIV, MVT::f32, Custom);
+ setOperationAction({ISD::FSIN, ISD::FCOS, ISD::FDIV}, MVT::f32, Custom);
setOperationAction(ISD::FDIV, MVT::f64, Custom);
if (Subtarget->has16BitInsts()) {
- setOperationAction(ISD::Constant, MVT::i16, Legal);
-
- setOperationAction(ISD::SMIN, MVT::i16, Legal);
- setOperationAction(ISD::SMAX, MVT::i16, Legal);
-
- setOperationAction(ISD::UMIN, MVT::i16, Legal);
- setOperationAction(ISD::UMAX, MVT::i16, Legal);
+ setOperationAction({ISD::Constant, ISD::SMIN, ISD::SMAX, ISD::UMIN,
+ ISD::UMAX, ISD::UADDSAT, ISD::USUBSAT},
+ MVT::i16, Legal);
- setOperationAction(ISD::SIGN_EXTEND, MVT::i16, Promote);
AddPromotedToType(ISD::SIGN_EXTEND, MVT::i16, MVT::i32);
- setOperationAction(ISD::ROTR, MVT::i16, Expand);
- setOperationAction(ISD::ROTL, MVT::i16, Expand);
-
- setOperationAction(ISD::SDIV, MVT::i16, Promote);
- setOperationAction(ISD::UDIV, MVT::i16, Promote);
- setOperationAction(ISD::SREM, MVT::i16, Promote);
- setOperationAction(ISD::UREM, MVT::i16, Promote);
- setOperationAction(ISD::UADDSAT, MVT::i16, Legal);
- setOperationAction(ISD::USUBSAT, MVT::i16, Legal);
-
- setOperationAction(ISD::BITREVERSE, MVT::i16, Promote);
-
- setOperationAction(ISD::CTTZ, MVT::i16, Promote);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Promote);
- setOperationAction(ISD::CTLZ, MVT::i16, Promote);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Promote);
- setOperationAction(ISD::CTPOP, MVT::i16, Promote);
+ setOperationAction({ISD::ROTR, ISD::ROTL, ISD::SELECT_CC, ISD::BR_CC},
+ MVT::i16, Expand);
- setOperationAction(ISD::SELECT_CC, MVT::i16, Expand);
-
- setOperationAction(ISD::BR_CC, MVT::i16, Expand);
+ setOperationAction({ISD::SIGN_EXTEND, ISD::SDIV, ISD::UDIV, ISD::SREM,
+ ISD::UREM, ISD::BITREVERSE, ISD::CTTZ,
+ ISD::CTTZ_ZERO_UNDEF, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF,
+ ISD::CTPOP},
+ MVT::i16, Promote);
setOperationAction(ISD::LOAD, MVT::i16, Custom);
@@ -577,8 +476,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FP_TO_FP16, MVT::i16, Promote);
AddPromotedToType(ISD::FP_TO_FP16, MVT::i16, MVT::i32);
- setOperationAction(ISD::FP_TO_SINT, MVT::i16, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::i16, Custom);
+ setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, MVT::i16, Custom);
// F16 - Constant Actions.
setOperationAction(ISD::ConstantFP, MVT::f16, Legal);
@@ -590,22 +488,18 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
AddPromotedToType(ISD::STORE, MVT::f16, MVT::i16);
// F16 - VOP1 Actions.
- setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
- setOperationAction(ISD::FCOS, MVT::f16, Custom);
- setOperationAction(ISD::FSIN, MVT::f16, Custom);
+ setOperationAction(
+ {ISD::FP_ROUND, ISD::FCOS, ISD::FSIN, ISD::FROUND, ISD::FPTRUNC_ROUND},
+ MVT::f16, Custom);
- setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::i16, Custom);
+ setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, MVT::i16, Custom);
- setOperationAction(ISD::FP_TO_SINT, MVT::f16, Promote);
- setOperationAction(ISD::FP_TO_UINT, MVT::f16, Promote);
- setOperationAction(ISD::SINT_TO_FP, MVT::f16, Promote);
- setOperationAction(ISD::UINT_TO_FP, MVT::f16, Promote);
- setOperationAction(ISD::FROUND, MVT::f16, Custom);
+ setOperationAction(
+ {ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::SINT_TO_FP, ISD::UINT_TO_FP},
+ MVT::f16, Promote);
// F16 - VOP2 Actions.
- setOperationAction(ISD::BR_CC, MVT::f16, Expand);
- setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
+ setOperationAction({ISD::BR_CC, ISD::SELECT_CC}, MVT::f16, Expand);
setOperationAction(ISD::FDIV, MVT::f16, Custom);
@@ -615,7 +509,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FMAD, MVT::f16, Legal);
for (MVT VT : {MVT::v2i16, MVT::v2f16, MVT::v4i16, MVT::v4f16, MVT::v8i16,
- MVT::v8f16}) {
+ MVT::v8f16, MVT::v16i16, MVT::v16f16}) {
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
switch (Op) {
case ISD::LOAD:
@@ -639,16 +533,13 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
}
// v_perm_b32 can handle either of these.
- setOperationAction(ISD::BSWAP, MVT::i16, Legal);
- setOperationAction(ISD::BSWAP, MVT::v2i16, Legal);
+ setOperationAction(ISD::BSWAP, {MVT::i16, MVT::v2i16}, Legal);
setOperationAction(ISD::BSWAP, MVT::v4i16, Custom);
// XXX - Do these do anything? Vector constants turn into build_vector.
- setOperationAction(ISD::Constant, MVT::v2i16, Legal);
- setOperationAction(ISD::ConstantFP, MVT::v2f16, Legal);
+ setOperationAction(ISD::Constant, {MVT::v2i16, MVT::v2f16}, Legal);
- setOperationAction(ISD::UNDEF, MVT::v2i16, Legal);
- setOperationAction(ISD::UNDEF, MVT::v2f16, Legal);
+ setOperationAction(ISD::UNDEF, {MVT::v2i16, MVT::v2f16}, Legal);
setOperationAction(ISD::STORE, MVT::v2i16, Promote);
AddPromotedToType(ISD::STORE, MVT::v2i16, MVT::i32);
@@ -692,140 +583,98 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::STORE, MVT::v8f16, Promote);
AddPromotedToType(ISD::STORE, MVT::v8f16, MVT::v4i32);
- setOperationAction(ISD::ANY_EXTEND, MVT::v2i32, Expand);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v2i32, Expand);
- setOperationAction(ISD::SIGN_EXTEND, MVT::v2i32, Expand);
+ setOperationAction(ISD::LOAD, MVT::v16i16, Promote);
+ AddPromotedToType(ISD::LOAD, MVT::v16i16, MVT::v8i32);
+ setOperationAction(ISD::LOAD, MVT::v16f16, Promote);
+ AddPromotedToType(ISD::LOAD, MVT::v16f16, MVT::v8i32);
+
+ setOperationAction(ISD::STORE, MVT::v16i16, Promote);
+ AddPromotedToType(ISD::STORE, MVT::v16i16, MVT::v8i32);
+ setOperationAction(ISD::STORE, MVT::v16f16, Promote);
+ AddPromotedToType(ISD::STORE, MVT::v16f16, MVT::v8i32);
+
+ setOperationAction({ISD::ANY_EXTEND, ISD::ZERO_EXTEND, ISD::SIGN_EXTEND},
+ MVT::v2i32, Expand);
setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Expand);
- setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Expand);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v4i32, Expand);
- setOperationAction(ISD::SIGN_EXTEND, MVT::v4i32, Expand);
+ setOperationAction({ISD::ANY_EXTEND, ISD::ZERO_EXTEND, ISD::SIGN_EXTEND},
+ MVT::v4i32, Expand);
- setOperationAction(ISD::ANY_EXTEND, MVT::v8i32, Expand);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Expand);
- setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Expand);
+ setOperationAction({ISD::ANY_EXTEND, ISD::ZERO_EXTEND, ISD::SIGN_EXTEND},
+ MVT::v8i32, Expand);
- if (!Subtarget->hasVOP3PInsts()) {
- setOperationAction(ISD::BUILD_VECTOR, MVT::v2i16, Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v2f16, Custom);
- }
+ if (!Subtarget->hasVOP3PInsts())
+ setOperationAction(ISD::BUILD_VECTOR, {MVT::v2i16, MVT::v2f16}, Custom);
setOperationAction(ISD::FNEG, MVT::v2f16, Legal);
// This isn't really legal, but this avoids the legalizer unrolling it (and
// allows matching fneg (fabs x) patterns)
setOperationAction(ISD::FABS, MVT::v2f16, Legal);
- setOperationAction(ISD::FMAXNUM, MVT::f16, Custom);
- setOperationAction(ISD::FMINNUM, MVT::f16, Custom);
- setOperationAction(ISD::FMAXNUM_IEEE, MVT::f16, Legal);
- setOperationAction(ISD::FMINNUM_IEEE, MVT::f16, Legal);
+ setOperationAction({ISD::FMAXNUM, ISD::FMINNUM}, MVT::f16, Custom);
+ setOperationAction({ISD::FMAXNUM_IEEE, ISD::FMINNUM_IEEE}, MVT::f16, Legal);
- setOperationAction(ISD::FMINNUM_IEEE, MVT::v4f16, Custom);
- setOperationAction(ISD::FMAXNUM_IEEE, MVT::v4f16, Custom);
- setOperationAction(ISD::FMINNUM_IEEE, MVT::v8f16, Custom);
- setOperationAction(ISD::FMAXNUM_IEEE, MVT::v8f16, Custom);
+ setOperationAction({ISD::FMINNUM_IEEE, ISD::FMAXNUM_IEEE},
+ {MVT::v4f16, MVT::v8f16, MVT::v16f16}, Custom);
- setOperationAction(ISD::FMINNUM, MVT::v4f16, Expand);
- setOperationAction(ISD::FMAXNUM, MVT::v4f16, Expand);
- setOperationAction(ISD::FMINNUM, MVT::v8f16, Expand);
- setOperationAction(ISD::FMAXNUM, MVT::v8f16, Expand);
+ setOperationAction({ISD::FMINNUM, ISD::FMAXNUM},
+ {MVT::v4f16, MVT::v8f16, MVT::v16f16}, Expand);
- for (MVT Vec16 : { MVT::v8i16, MVT::v8f16 }) {
- setOperationAction(ISD::BUILD_VECTOR, Vec16, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec16, Custom);
+ for (MVT Vec16 : {MVT::v8i16, MVT::v8f16, MVT::v16i16, MVT::v16f16}) {
+ setOperationAction(
+ {ISD::BUILD_VECTOR, ISD::EXTRACT_VECTOR_ELT, ISD::SCALAR_TO_VECTOR},
+ Vec16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, Vec16, Expand);
- setOperationAction(ISD::SCALAR_TO_VECTOR, Vec16, Expand);
}
}
if (Subtarget->hasVOP3PInsts()) {
- setOperationAction(ISD::ADD, MVT::v2i16, Legal);
- setOperationAction(ISD::SUB, MVT::v2i16, Legal);
- setOperationAction(ISD::MUL, MVT::v2i16, Legal);
- setOperationAction(ISD::SHL, MVT::v2i16, Legal);
- setOperationAction(ISD::SRL, MVT::v2i16, Legal);
- setOperationAction(ISD::SRA, MVT::v2i16, Legal);
- setOperationAction(ISD::SMIN, MVT::v2i16, Legal);
- setOperationAction(ISD::UMIN, MVT::v2i16, Legal);
- setOperationAction(ISD::SMAX, MVT::v2i16, Legal);
- setOperationAction(ISD::UMAX, MVT::v2i16, Legal);
-
- setOperationAction(ISD::UADDSAT, MVT::v2i16, Legal);
- setOperationAction(ISD::USUBSAT, MVT::v2i16, Legal);
- setOperationAction(ISD::SADDSAT, MVT::v2i16, Legal);
- setOperationAction(ISD::SSUBSAT, MVT::v2i16, Legal);
-
- setOperationAction(ISD::FADD, MVT::v2f16, Legal);
- setOperationAction(ISD::FMUL, MVT::v2f16, Legal);
- setOperationAction(ISD::FMA, MVT::v2f16, Legal);
-
- setOperationAction(ISD::FMINNUM_IEEE, MVT::v2f16, Legal);
- setOperationAction(ISD::FMAXNUM_IEEE, MVT::v2f16, Legal);
-
- setOperationAction(ISD::FCANONICALIZE, MVT::v2f16, Legal);
-
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i16, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f16, Custom);
-
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f16, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f16, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i16, Custom);
-
- for (MVT VT : { MVT::v4i16, MVT::v8i16 }) {
- // Split vector operations.
- setOperationAction(ISD::SHL, VT, Custom);
- setOperationAction(ISD::SRA, VT, Custom);
- setOperationAction(ISD::SRL, VT, Custom);
- setOperationAction(ISD::ADD, VT, Custom);
- setOperationAction(ISD::SUB, VT, Custom);
- setOperationAction(ISD::MUL, VT, Custom);
+ setOperationAction({ISD::ADD, ISD::SUB, ISD::MUL, ISD::SHL, ISD::SRL,
+ ISD::SRA, ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX,
+ ISD::UADDSAT, ISD::USUBSAT, ISD::SADDSAT, ISD::SSUBSAT},
+ MVT::v2i16, Legal);
- setOperationAction(ISD::SMIN, VT, Custom);
- setOperationAction(ISD::SMAX, VT, Custom);
- setOperationAction(ISD::UMIN, VT, Custom);
- setOperationAction(ISD::UMAX, VT, Custom);
+ setOperationAction({ISD::FADD, ISD::FMUL, ISD::FMA, ISD::FMINNUM_IEEE,
+ ISD::FMAXNUM_IEEE, ISD::FCANONICALIZE},
+ MVT::v2f16, Legal);
- setOperationAction(ISD::UADDSAT, VT, Custom);
- setOperationAction(ISD::SADDSAT, VT, Custom);
- setOperationAction(ISD::USUBSAT, VT, Custom);
- setOperationAction(ISD::SSUBSAT, VT, Custom);
- }
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, {MVT::v2i16, MVT::v2f16},
+ Custom);
+
+ setOperationAction(ISD::VECTOR_SHUFFLE,
+ {MVT::v4f16, MVT::v4i16, MVT::v8f16, MVT::v8i16,
+ MVT::v16f16, MVT::v16i16},
+ Custom);
- for (MVT VT : { MVT::v4f16, MVT::v8f16 }) {
+ for (MVT VT : {MVT::v4i16, MVT::v8i16, MVT::v16i16})
// Split vector operations.
- setOperationAction(ISD::FADD, VT, Custom);
- setOperationAction(ISD::FMUL, VT, Custom);
- setOperationAction(ISD::FMA, VT, Custom);
- setOperationAction(ISD::FCANONICALIZE, VT, Custom);
- }
+ setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL, ISD::ADD, ISD::SUB,
+ ISD::MUL, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX,
+ ISD::UADDSAT, ISD::SADDSAT, ISD::USUBSAT,
+ ISD::SSUBSAT},
+ VT, Custom);
- setOperationAction(ISD::FMAXNUM, MVT::v2f16, Custom);
- setOperationAction(ISD::FMINNUM, MVT::v2f16, Custom);
+ for (MVT VT : {MVT::v4f16, MVT::v8f16, MVT::v16f16})
+ // Split vector operations.
+ setOperationAction({ISD::FADD, ISD::FMUL, ISD::FMA, ISD::FCANONICALIZE},
+ VT, Custom);
- setOperationAction(ISD::FMINNUM, MVT::v4f16, Custom);
- setOperationAction(ISD::FMAXNUM, MVT::v4f16, Custom);
+ setOperationAction({ISD::FMAXNUM, ISD::FMINNUM}, {MVT::v2f16, MVT::v4f16},
+ Custom);
setOperationAction(ISD::FEXP, MVT::v2f16, Custom);
- setOperationAction(ISD::SELECT, MVT::v4i16, Custom);
- setOperationAction(ISD::SELECT, MVT::v4f16, Custom);
+ setOperationAction(ISD::SELECT, {MVT::v4i16, MVT::v4f16}, Custom);
if (Subtarget->hasPackedFP32Ops()) {
- setOperationAction(ISD::FADD, MVT::v2f32, Legal);
- setOperationAction(ISD::FMUL, MVT::v2f32, Legal);
- setOperationAction(ISD::FMA, MVT::v2f32, Legal);
- setOperationAction(ISD::FNEG, MVT::v2f32, Legal);
-
- for (MVT VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32, MVT::v32f32 }) {
- setOperationAction(ISD::FADD, VT, Custom);
- setOperationAction(ISD::FMUL, VT, Custom);
- setOperationAction(ISD::FMA, VT, Custom);
- }
+ setOperationAction({ISD::FADD, ISD::FMUL, ISD::FMA, ISD::FNEG},
+ MVT::v2f32, Legal);
+ setOperationAction({ISD::FADD, ISD::FMUL, ISD::FMA},
+ {MVT::v4f32, MVT::v8f32, MVT::v16f32, MVT::v32f32},
+ Custom);
}
}
- setOperationAction(ISD::FNEG, MVT::v4f16, Custom);
- setOperationAction(ISD::FABS, MVT::v4f16, Custom);
+ setOperationAction({ISD::FNEG, ISD::FABS}, MVT::v4f16, Custom);
if (Subtarget->has16BitInsts()) {
setOperationAction(ISD::SELECT, MVT::v2i16, Promote);
@@ -834,107 +683,88 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
AddPromotedToType(ISD::SELECT, MVT::v2f16, MVT::i32);
} else {
// Legalization hack.
- setOperationAction(ISD::SELECT, MVT::v2i16, Custom);
- setOperationAction(ISD::SELECT, MVT::v2f16, Custom);
-
- setOperationAction(ISD::FNEG, MVT::v2f16, Custom);
- setOperationAction(ISD::FABS, MVT::v2f16, Custom);
- }
-
- for (MVT VT : { MVT::v4i16, MVT::v4f16, MVT::v2i8, MVT::v4i8, MVT::v8i8,
- MVT::v8i16, MVT::v8f16 }) {
- setOperationAction(ISD::SELECT, VT, Custom);
- }
-
- setOperationAction(ISD::SMULO, MVT::i64, Custom);
- setOperationAction(ISD::UMULO, MVT::i64, Custom);
-
- if (Subtarget->hasMad64_32()) {
- setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom);
- setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom);
- }
-
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f32, Custom);
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f16, Custom);
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2i16, Custom);
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f16, Custom);
-
- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v2f16, Custom);
- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v2i16, Custom);
- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v3f16, Custom);
- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v3i16, Custom);
- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v4f16, Custom);
- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v4i16, Custom);
- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v8f16, Custom);
- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::f16, Custom);
- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom);
- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
-
- setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
- setOperationAction(ISD::INTRINSIC_VOID, MVT::v2i16, Custom);
- setOperationAction(ISD::INTRINSIC_VOID, MVT::v2f16, Custom);
- setOperationAction(ISD::INTRINSIC_VOID, MVT::v3i16, Custom);
- setOperationAction(ISD::INTRINSIC_VOID, MVT::v3f16, Custom);
- setOperationAction(ISD::INTRINSIC_VOID, MVT::v4f16, Custom);
- setOperationAction(ISD::INTRINSIC_VOID, MVT::v4i16, Custom);
- setOperationAction(ISD::INTRINSIC_VOID, MVT::f16, Custom);
- setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
- setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
-
- setTargetDAGCombine(ISD::ADD);
- setTargetDAGCombine(ISD::ADDCARRY);
- setTargetDAGCombine(ISD::SUB);
- setTargetDAGCombine(ISD::SUBCARRY);
- setTargetDAGCombine(ISD::FADD);
- setTargetDAGCombine(ISD::FSUB);
- setTargetDAGCombine(ISD::FMINNUM);
- setTargetDAGCombine(ISD::FMAXNUM);
- setTargetDAGCombine(ISD::FMINNUM_IEEE);
- setTargetDAGCombine(ISD::FMAXNUM_IEEE);
- setTargetDAGCombine(ISD::FMA);
- setTargetDAGCombine(ISD::SMIN);
- setTargetDAGCombine(ISD::SMAX);
- setTargetDAGCombine(ISD::UMIN);
- setTargetDAGCombine(ISD::UMAX);
- setTargetDAGCombine(ISD::SETCC);
- setTargetDAGCombine(ISD::AND);
- setTargetDAGCombine(ISD::OR);
- setTargetDAGCombine(ISD::XOR);
- setTargetDAGCombine(ISD::SINT_TO_FP);
- setTargetDAGCombine(ISD::UINT_TO_FP);
- setTargetDAGCombine(ISD::FCANONICALIZE);
- setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
- setTargetDAGCombine(ISD::ZERO_EXTEND);
- setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
- setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
- setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
+ setOperationAction(ISD::SELECT, {MVT::v2i16, MVT::v2f16}, Custom);
+
+ setOperationAction({ISD::FNEG, ISD::FABS}, MVT::v2f16, Custom);
+ }
+
+ setOperationAction(ISD::SELECT,
+ {MVT::v4i16, MVT::v4f16, MVT::v2i8, MVT::v4i8, MVT::v8i8,
+ MVT::v8i16, MVT::v8f16, MVT::v16i16, MVT::v16f16},
+ Custom);
+
+ setOperationAction({ISD::SMULO, ISD::UMULO}, MVT::i64, Custom);
+
+ if (Subtarget->hasMad64_32())
+ setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, MVT::i32, Custom);
+
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN,
+ {MVT::Other, MVT::f32, MVT::v4f32, MVT::i16, MVT::f16,
+ MVT::v2i16, MVT::v2f16},
+ Custom);
+
+ setOperationAction(ISD::INTRINSIC_W_CHAIN,
+ {MVT::v2f16, MVT::v2i16, MVT::v3f16, MVT::v3i16,
+ MVT::v4f16, MVT::v4i16, MVT::v8f16, MVT::Other, MVT::f16,
+ MVT::i16, MVT::i8},
+ Custom);
+
+ setOperationAction(ISD::INTRINSIC_VOID,
+ {MVT::Other, MVT::v2i16, MVT::v2f16, MVT::v3i16,
+ MVT::v3f16, MVT::v4f16, MVT::v4i16, MVT::f16, MVT::i16,
+ MVT::i8},
+ Custom);
+
+ setTargetDAGCombine({ISD::ADD,
+ ISD::ADDCARRY,
+ ISD::SUB,
+ ISD::SUBCARRY,
+ ISD::FADD,
+ ISD::FSUB,
+ ISD::FMINNUM,
+ ISD::FMAXNUM,
+ ISD::FMINNUM_IEEE,
+ ISD::FMAXNUM_IEEE,
+ ISD::FMA,
+ ISD::SMIN,
+ ISD::SMAX,
+ ISD::UMIN,
+ ISD::UMAX,
+ ISD::SETCC,
+ ISD::AND,
+ ISD::OR,
+ ISD::XOR,
+ ISD::SINT_TO_FP,
+ ISD::UINT_TO_FP,
+ ISD::FCANONICALIZE,
+ ISD::SCALAR_TO_VECTOR,
+ ISD::ZERO_EXTEND,
+ ISD::SIGN_EXTEND_INREG,
+ ISD::EXTRACT_VECTOR_ELT,
+ ISD::INSERT_VECTOR_ELT});
// All memory operations. Some folding on the pointer operand is done to help
// matching the constant offsets in the addressing modes.
- setTargetDAGCombine(ISD::LOAD);
- setTargetDAGCombine(ISD::STORE);
- setTargetDAGCombine(ISD::ATOMIC_LOAD);
- setTargetDAGCombine(ISD::ATOMIC_STORE);
- setTargetDAGCombine(ISD::ATOMIC_CMP_SWAP);
- setTargetDAGCombine(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS);
- setTargetDAGCombine(ISD::ATOMIC_SWAP);
- setTargetDAGCombine(ISD::ATOMIC_LOAD_ADD);
- setTargetDAGCombine(ISD::ATOMIC_LOAD_SUB);
- setTargetDAGCombine(ISD::ATOMIC_LOAD_AND);
- setTargetDAGCombine(ISD::ATOMIC_LOAD_OR);
- setTargetDAGCombine(ISD::ATOMIC_LOAD_XOR);
- setTargetDAGCombine(ISD::ATOMIC_LOAD_NAND);
- setTargetDAGCombine(ISD::ATOMIC_LOAD_MIN);
- setTargetDAGCombine(ISD::ATOMIC_LOAD_MAX);
- setTargetDAGCombine(ISD::ATOMIC_LOAD_UMIN);
- setTargetDAGCombine(ISD::ATOMIC_LOAD_UMAX);
- setTargetDAGCombine(ISD::ATOMIC_LOAD_FADD);
- setTargetDAGCombine(ISD::INTRINSIC_VOID);
- setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
+ setTargetDAGCombine({ISD::LOAD,
+ ISD::STORE,
+ ISD::ATOMIC_LOAD,
+ ISD::ATOMIC_STORE,
+ ISD::ATOMIC_CMP_SWAP,
+ ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS,
+ ISD::ATOMIC_SWAP,
+ ISD::ATOMIC_LOAD_ADD,
+ ISD::ATOMIC_LOAD_SUB,
+ ISD::ATOMIC_LOAD_AND,
+ ISD::ATOMIC_LOAD_OR,
+ ISD::ATOMIC_LOAD_XOR,
+ ISD::ATOMIC_LOAD_NAND,
+ ISD::ATOMIC_LOAD_MIN,
+ ISD::ATOMIC_LOAD_MAX,
+ ISD::ATOMIC_LOAD_UMIN,
+ ISD::ATOMIC_LOAD_UMAX,
+ ISD::ATOMIC_LOAD_FADD,
+ ISD::INTRINSIC_VOID,
+ ISD::INTRINSIC_W_CHAIN});
// FIXME: In other contexts we pretend this is a per-function property.
setStackPointerRegisterToSaveRestore(AMDGPU::SGPR32);
@@ -1118,6 +948,10 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &CI,
MachineFunction &MF,
unsigned IntrID) const {
+ Info.flags = MachineMemOperand::MONone;
+ if (CI.hasMetadata(LLVMContext::MD_invariant_load))
+ Info.flags |= MachineMemOperand::MOInvariant;
+
if (const AMDGPU::RsrcIntrinsic *RsrcIntr =
AMDGPU::lookupRsrcIntrinsic(IntrID)) {
AttributeList Attr = Intrinsic::getAttributes(CI.getContext(),
@@ -1127,16 +961,17 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ const GCNTargetMachine &TM =
+ static_cast<const GCNTargetMachine &>(getTargetMachine());
+
if (RsrcIntr->IsImage) {
- Info.ptrVal =
- MFI->getImagePSV(*MF.getSubtarget<GCNSubtarget>().getInstrInfo());
+ Info.ptrVal = MFI->getImagePSV(TM);
Info.align.reset();
} else {
- Info.ptrVal =
- MFI->getBufferPSV(*MF.getSubtarget<GCNSubtarget>().getInstrInfo());
+ Info.ptrVal = MFI->getBufferPSV(TM);
}
- Info.flags = MachineMemOperand::MODereferenceable;
+ Info.flags |= MachineMemOperand::MODereferenceable;
if (Attr.hasFnAttr(Attribute::ReadOnly)) {
unsigned DMaskLanes = 4;
@@ -1178,12 +1013,23 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.opc = CI.getType()->isVoidTy() ? ISD::INTRINSIC_VOID :
ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(CI.getArgOperand(0)->getType());
- Info.flags = MachineMemOperand::MOLoad |
- MachineMemOperand::MOStore |
- MachineMemOperand::MODereferenceable;
+ Info.flags |= MachineMemOperand::MOLoad |
+ MachineMemOperand::MOStore |
+ MachineMemOperand::MODereferenceable;
// XXX - Should this be volatile without known ordering?
Info.flags |= MachineMemOperand::MOVolatile;
+
+ switch (IntrID) {
+ default:
+ break;
+ case Intrinsic::amdgcn_raw_buffer_load_lds:
+ case Intrinsic::amdgcn_struct_buffer_load_lds: {
+ unsigned Width = cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue();
+ Info.memVT = EVT::getIntegerVT(CI.getContext(), Width * 8);
+ return true;
+ }
+ }
}
return true;
}
@@ -1200,7 +1046,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.memVT = MVT::getVT(CI.getType());
Info.ptrVal = CI.getOperand(0);
Info.align.reset();
- Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+ Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
const ConstantInt *Vol = cast<ConstantInt>(CI.getOperand(4));
if (!Vol->isZero())
@@ -1211,12 +1057,14 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::amdgcn_buffer_atomic_fadd: {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ const GCNTargetMachine &TM =
+ static_cast<const GCNTargetMachine &>(getTargetMachine());
+
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(CI.getOperand(0)->getType());
- Info.ptrVal =
- MFI->getBufferPSV(*MF.getSubtarget<GCNSubtarget>().getInstrInfo());
+ Info.ptrVal = MFI->getBufferPSV(TM);
Info.align.reset();
- Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+ Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
const ConstantInt *Vol = dyn_cast<ConstantInt>(CI.getOperand(4));
if (!Vol || !Vol->isZero())
@@ -1230,7 +1078,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.memVT = MVT::getVT(CI.getType());
Info.ptrVal = CI.getOperand(0);
Info.align.reset();
- Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+ Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
const ConstantInt *Vol = cast<ConstantInt>(CI.getOperand(1));
if (!Vol->isZero())
@@ -1243,20 +1091,23 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.memVT = MVT::getVT(CI.getType());
Info.ptrVal = CI.getOperand(0);
Info.align.reset();
- Info.flags = MachineMemOperand::MOLoad |
- MachineMemOperand::MOStore |
- MachineMemOperand::MOVolatile;
+ Info.flags |= MachineMemOperand::MOLoad |
+ MachineMemOperand::MOStore |
+ MachineMemOperand::MOVolatile;
return true;
}
case Intrinsic::amdgcn_image_bvh_intersect_ray: {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(CI.getType()); // XXX: what is correct VT?
- Info.ptrVal =
- MFI->getImagePSV(*MF.getSubtarget<GCNSubtarget>().getInstrInfo());
+
+ const GCNTargetMachine &TM =
+ static_cast<const GCNTargetMachine &>(getTargetMachine());
+
+ Info.ptrVal = MFI->getImagePSV(TM);
Info.align.reset();
- Info.flags = MachineMemOperand::MOLoad |
- MachineMemOperand::MODereferenceable;
+ Info.flags |= MachineMemOperand::MOLoad |
+ MachineMemOperand::MODereferenceable;
return true;
}
case Intrinsic::amdgcn_global_atomic_fadd:
@@ -1264,15 +1115,17 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::amdgcn_global_atomic_fmax:
case Intrinsic::amdgcn_flat_atomic_fadd:
case Intrinsic::amdgcn_flat_atomic_fmin:
- case Intrinsic::amdgcn_flat_atomic_fmax: {
+ case Intrinsic::amdgcn_flat_atomic_fmax:
+ case Intrinsic::amdgcn_global_atomic_fadd_v2bf16:
+ case Intrinsic::amdgcn_flat_atomic_fadd_v2bf16: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(CI.getType());
Info.ptrVal = CI.getOperand(0);
Info.align.reset();
- Info.flags = MachineMemOperand::MOLoad |
- MachineMemOperand::MOStore |
- MachineMemOperand::MODereferenceable |
- MachineMemOperand::MOVolatile;
+ Info.flags |= MachineMemOperand::MOLoad |
+ MachineMemOperand::MOStore |
+ MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOVolatile;
return true;
}
case Intrinsic::amdgcn_ds_gws_init:
@@ -1283,18 +1136,29 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::amdgcn_ds_gws_sema_release_all: {
Info.opc = ISD::INTRINSIC_VOID;
+ const GCNTargetMachine &TM =
+ static_cast<const GCNTargetMachine &>(getTargetMachine());
+
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- Info.ptrVal =
- MFI->getGWSPSV(*MF.getSubtarget<GCNSubtarget>().getInstrInfo());
+ Info.ptrVal = MFI->getGWSPSV(TM);
// This is an abstract access, but we need to specify a type and size.
Info.memVT = MVT::i32;
Info.size = 4;
Info.align = Align(4);
- Info.flags = MachineMemOperand::MOStore;
if (IntrID == Intrinsic::amdgcn_ds_gws_barrier)
- Info.flags = MachineMemOperand::MOLoad;
+ Info.flags |= MachineMemOperand::MOLoad;
+ else
+ Info.flags |= MachineMemOperand::MOStore;
+ return true;
+ }
+ case Intrinsic::amdgcn_global_load_lds: {
+ Info.opc = ISD::INTRINSIC_VOID;
+ unsigned Width = cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue();
+ Info.memVT = EVT::getIntegerVT(CI.getContext(), Width * 8);
+ Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
+ MachineMemOperand::MOVolatile;
return true;
}
default:
@@ -1319,6 +1183,8 @@ bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II,
case Intrinsic::amdgcn_flat_atomic_fadd:
case Intrinsic::amdgcn_flat_atomic_fmin:
case Intrinsic::amdgcn_flat_atomic_fmax:
+ case Intrinsic::amdgcn_global_atomic_fadd_v2bf16:
+ case Intrinsic::amdgcn_flat_atomic_fadd_v2bf16:
case Intrinsic::amdgcn_global_atomic_csub: {
Value *Ptr = II->getArgOperand(0);
AccessTy = II->getType();
@@ -1506,47 +1372,96 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
AddrSpace == AMDGPUAS::REGION_ADDRESS) {
// Check if alignment requirements for ds_read/write instructions are
// disabled.
- if (Subtarget->hasUnalignedDSAccessEnabled() &&
- !Subtarget->hasLDSMisalignedBug()) {
- if (IsFast)
- *IsFast = Alignment != Align(2);
- return true;
- }
+ if (!Subtarget->hasUnalignedDSAccessEnabled() && Alignment < Align(4))
+ return false;
+
+ Align RequiredAlignment(PowerOf2Ceil(Size/8)); // Natural alignment.
+ if (Subtarget->hasLDSMisalignedBug() && Size > 32 &&
+ Alignment < RequiredAlignment)
+ return false;
// Either, the alignment requirements are "enabled", or there is an
// unaligned LDS access related hardware bug though alignment requirements
// are "disabled". In either case, we need to check for proper alignment
// requirements.
//
- if (Size == 64) {
+ switch (Size) {
+ case 64:
+ // SI has a hardware bug in the LDS / GDS bounds checking: if the base
+ // address is negative, then the instruction is incorrectly treated as
+ // out-of-bounds even if base + offsets is in bounds. Split vectorized
+ // loads here to avoid emitting ds_read2_b32. We may re-combine the
+ // load later in the SILoadStoreOptimizer.
+ if (!Subtarget->hasUsableDSOffset() && Alignment < Align(8))
+ return false;
+
// 8 byte accessing via ds_read/write_b64 require 8-byte alignment, but we
// can do a 4 byte aligned, 8 byte access in a single operation using
// ds_read2/write2_b32 with adjacent offsets.
- bool AlignedBy4 = Alignment >= Align(4);
- if (IsFast)
- *IsFast = AlignedBy4;
+ RequiredAlignment = Align(4);
+
+ if (Subtarget->hasUnalignedDSAccessEnabled()) {
+ // We will either select ds_read_b64/ds_write_b64 or ds_read2_b32/
+ // ds_write2_b32 depending on the alignment. In either case with either
+ // alignment there is no faster way of doing this.
+ if (IsFast)
+ *IsFast = true;
+ return true;
+ }
+
+ break;
+ case 96:
+ if (!Subtarget->hasDS96AndDS128())
+ return false;
- return AlignedBy4;
- }
- if (Size == 96) {
// 12 byte accessing via ds_read/write_b96 require 16-byte alignment on
// gfx8 and older.
- bool AlignedBy16 = Alignment >= Align(16);
- if (IsFast)
- *IsFast = AlignedBy16;
- return AlignedBy16;
- }
- if (Size == 128) {
+ if (Subtarget->hasUnalignedDSAccessEnabled()) {
+ // Naturally aligned access is fastest. However, also report it is Fast
+ // if memory is aligned less than DWORD. A narrow load or store will be
+ // be equally slow as a single ds_read_b96/ds_write_b96, but there will
+ // be more of them, so overall we will pay less penalty issuing a single
+ // instruction.
+ if (IsFast)
+ *IsFast = Alignment >= RequiredAlignment || Alignment < Align(4);
+ return true;
+ }
+
+ break;
+ case 128:
+ if (!Subtarget->hasDS96AndDS128() || !Subtarget->useDS128())
+ return false;
+
// 16 byte accessing via ds_read/write_b128 require 16-byte alignment on
// gfx8 and older, but we can do a 8 byte aligned, 16 byte access in a
// single operation using ds_read2/write2_b64.
- bool AlignedBy8 = Alignment >= Align(8);
- if (IsFast)
- *IsFast = AlignedBy8;
+ RequiredAlignment = Align(8);
+
+ if (Subtarget->hasUnalignedDSAccessEnabled()) {
+ // Naturally aligned access is fastest. However, also report it is Fast
+ // if memory is aligned less than DWORD. A narrow load or store will be
+ // be equally slow as a single ds_read_b128/ds_write_b128, but there
+ // will be more of them, so overall we will pay less penalty issuing a
+ // single instruction.
+ if (IsFast)
+ *IsFast = Alignment >= RequiredAlignment || Alignment < Align(4);
+ return true;
+ }
- return AlignedBy8;
+ break;
+ default:
+ if (Size > 32)
+ return false;
+
+ break;
}
+
+ if (IsFast)
+ *IsFast = Alignment >= RequiredAlignment;
+
+ return Alignment >= RequiredAlignment ||
+ Subtarget->hasUnalignedDSAccessEnabled();
}
if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) {
@@ -1571,14 +1486,12 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
return AlignedBy4;
}
- if (Subtarget->hasUnalignedBufferAccessEnabled() &&
- !(AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
- AddrSpace == AMDGPUAS::REGION_ADDRESS)) {
- // If we have an uniform constant load, it still requires using a slow
+ if (Subtarget->hasUnalignedBufferAccessEnabled()) {
+ // If we have a uniform constant load, it still requires using a slow
// buffer instruction if unaligned.
if (IsFast) {
// Accesses can really be issued as 1-byte aligned or 4-byte aligned, so
- // 2-byte alignment is worse than 1 unless doing a 2-byte accesss.
+ // 2-byte alignment is worse than 1 unless doing a 2-byte access.
*IsFast = (AddrSpace == AMDGPUAS::CONSTANT_ADDRESS ||
AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT) ?
Alignment >= Align(4) : Alignment != Align(2);
@@ -1603,20 +1516,22 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
bool SITargetLowering::allowsMisalignedMemoryAccesses(
EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
bool *IsFast) const {
- if (IsFast)
- *IsFast = false;
-
- // TODO: I think v3i32 should allow unaligned accesses on CI with DS_READ_B96,
- // which isn't a simple VT.
- // Until MVT is extended to handle this, simply check for the size and
- // rely on the condition below: allow accesses if the size is a multiple of 4.
- if (VT == MVT::Other || (VT != MVT::Other && VT.getSizeInBits() > 1024 &&
- VT.getStoreSize() > 16)) {
- return false;
+ bool Allow = allowsMisalignedMemoryAccessesImpl(VT.getSizeInBits(), AddrSpace,
+ Alignment, Flags, IsFast);
+
+ if (Allow && IsFast && Subtarget->hasUnalignedDSAccessEnabled() &&
+ (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
+ AddrSpace == AMDGPUAS::REGION_ADDRESS)) {
+ // Lie it is fast if +unaligned-access-mode is passed so that DS accesses
+ // get vectorized. We could use ds_read2_b*/ds_write2_b* instructions on a
+ // misaligned data which is faster than a pair of ds_read_b*/ds_write_b*
+ // which would be equally misaligned.
+ // This is only used by the common passes, selection always calls the
+ // allowsMisalignedMemoryAccessesImpl version.
+ *IsFast = true;
}
- return allowsMisalignedMemoryAccessesImpl(VT.getSizeInBits(), AddrSpace,
- Alignment, Flags, IsFast);
+ return Allow;
}
EVT SITargetLowering::getOptimalMemOpType(
@@ -1639,9 +1554,7 @@ EVT SITargetLowering::getOptimalMemOpType(
bool SITargetLowering::isMemOpHasNoClobberedMemOperand(const SDNode *N) const {
const MemSDNode *MemNode = cast<MemSDNode>(N);
- const Value *Ptr = MemNode->getMemOperand()->getValue();
- const Instruction *I = dyn_cast_or_null<Instruction>(Ptr);
- return I && I->getMetadata("amdgpu.noclobber");
+ return MemNode->getMemOperand()->getFlags() & MONoClobber;
}
bool SITargetLowering::isNonGlobalAddrSpace(unsigned AS) {
@@ -1681,6 +1594,15 @@ bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
return true;
}
+bool SITargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
+ unsigned Index) const {
+ if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
+ return false;
+
+ // TODO: Add more cases that are cheap.
+ return Index == 0;
+}
+
bool SITargetLowering::isTypeDesirableForOp(unsigned Op, EVT VT) const {
if (Subtarget->has16BitInsts() && VT == MVT::i16) {
switch (Op) {
@@ -2106,7 +2028,7 @@ void SITargetLowering::allocateSpecialInputSGPRs(
if (Info.hasDispatchPtr())
allocateSGPR64Input(CCInfo, ArgInfo.DispatchPtr);
- if (Info.hasQueuePtr())
+ if (Info.hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5)
allocateSGPR64Input(CCInfo, ArgInfo.QueuePtr);
// Implicit arg ptr takes the place of the kernarg segment pointer. This is a
@@ -2153,7 +2075,7 @@ void SITargetLowering::allocateHSAUserSGPRs(CCState &CCInfo,
CCInfo.AllocateReg(DispatchPtrReg);
}
- if (Info.hasQueuePtr()) {
+ if (Info.hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5) {
Register QueuePtrReg = Info.addQueuePtr(TRI);
MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(QueuePtrReg);
@@ -2190,6 +2112,24 @@ void SITargetLowering::allocateSystemSGPRs(CCState &CCInfo,
SIMachineFunctionInfo &Info,
CallingConv::ID CallConv,
bool IsShader) const {
+ if (Subtarget->hasUserSGPRInit16Bug()) {
+ // Pad up the used user SGPRs with dead inputs.
+ unsigned CurrentUserSGPRs = Info.getNumUserSGPRs();
+
+ // Note we do not count the PrivateSegmentWaveByteOffset. We do not want to
+ // rely on it to reach 16 since if we end up having no stack usage, it will
+ // not really be added.
+ unsigned NumRequiredSystemSGPRs = Info.hasWorkGroupIDX() +
+ Info.hasWorkGroupIDY() +
+ Info.hasWorkGroupIDZ() +
+ Info.hasWorkGroupInfo();
+ for (unsigned i = NumRequiredSystemSGPRs + CurrentUserSGPRs; i < 16; ++i) {
+ Register Reg = Info.addReservedUserSGPR();
+ MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
+ CCInfo.AllocateReg(Reg);
+ }
+ }
+
if (Info.hasWorkGroupIDX()) {
Register Reg = Info.addWorkGroupIDX();
MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
@@ -2234,6 +2174,8 @@ void SITargetLowering::allocateSystemSGPRs(CCState &CCInfo,
MF.addLiveIn(PrivateSegmentWaveByteOffsetReg, &AMDGPU::SGPR_32RegClass);
CCInfo.AllocateReg(PrivateSegmentWaveByteOffsetReg);
}
+
+ assert(!Subtarget->hasUserSGPRInit16Bug() || Info.getNumPreloadedSGPRs() >= 16);
}
static void reservePrivateMemoryRegs(const TargetMachine &TM,
@@ -2388,7 +2330,7 @@ SDValue SITargetLowering::LowerFormalArguments(
return DAG.getEntryNode();
}
- Info->allocateModuleLDSGlobal(Fn.getParent());
+ Info->allocateModuleLDSGlobal(Fn);
SmallVector<ISD::InputArg, 16> Splits;
SmallVector<CCValAssign, 16> ArgLocs;
@@ -2538,7 +2480,13 @@ SDValue SITargetLowering::LowerFormalArguments(
assert(VA.isRegLoc() && "Parameter must be in a register!");
Register Reg = VA.getLocReg();
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
+ const TargetRegisterClass *RC = nullptr;
+ if (AMDGPU::VGPR_32RegClass.contains(Reg))
+ RC = &AMDGPU::VGPR_32RegClass;
+ else if (AMDGPU::SGPR_32RegClass.contains(Reg))
+ RC = &AMDGPU::SGPR_32RegClass;
+ else
+ llvm_unreachable("Unexpected register class in LowerFormalArguments!");
EVT ValVT = VA.getValVT();
Reg = MF.addLiveIn(Reg, RC);
@@ -2657,24 +2605,6 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
SmallVector<SDValue, 48> RetOps;
RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
- // Add return address for callable functions.
- if (!Info->isEntryFunction()) {
- const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo();
- SDValue ReturnAddrReg = CreateLiveInRegister(
- DAG, &AMDGPU::SReg_64RegClass, TRI->getReturnAddressReg(MF), MVT::i64);
-
- SDValue ReturnAddrVirtualReg =
- DAG.getRegister(MF.getRegInfo().createVirtualRegister(
- CallConv != CallingConv::AMDGPU_Gfx
- ? &AMDGPU::CCR_SGPR_64RegClass
- : &AMDGPU::Gfx_CCR_SGPR_64RegClass),
- MVT::i64);
- Chain =
- DAG.getCopyToReg(Chain, DL, ReturnAddrVirtualReg, ReturnAddrReg, Flag);
- Flag = Chain.getValue(1);
- RetOps.push_back(ReturnAddrVirtualReg);
- }
-
// Copy the result values into the output registers.
for (unsigned I = 0, RealRVLocIdx = 0, E = RVLocs.size(); I != E;
++I, ++RealRVLocIdx) {
@@ -2731,15 +2661,8 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
RetOps.push_back(Flag);
unsigned Opc = AMDGPUISD::ENDPGM;
- if (!IsWaveEnd) {
- if (IsShader)
- Opc = AMDGPUISD::RETURN_TO_EPILOG;
- else if (CallConv == CallingConv::AMDGPU_Gfx)
- Opc = AMDGPUISD::RET_GFX_FLAG;
- else
- Opc = AMDGPUISD::RET_FLAG;
- }
-
+ if (!IsWaveEnd)
+ Opc = IsShader ? AMDGPUISD::RETURN_TO_EPILOG : AMDGPUISD::RET_FLAG;
return DAG.getNode(Opc, DL, MVT::Other, RetOps);
}
@@ -3321,21 +3244,6 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
}
- SDValue PhysReturnAddrReg;
- if (IsTailCall) {
- // Since the return is being combined with the call, we need to pass on the
- // return address.
-
- const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo();
- SDValue ReturnAddrReg = CreateLiveInRegister(
- DAG, &AMDGPU::SReg_64RegClass, TRI->getReturnAddressReg(MF), MVT::i64);
-
- PhysReturnAddrReg = DAG.getRegister(TRI->getReturnAddressReg(MF),
- MVT::i64);
- Chain = DAG.getCopyToReg(Chain, DL, PhysReturnAddrReg, ReturnAddrReg, InFlag);
- InFlag = Chain.getValue(1);
- }
-
// We don't usually want to end the call-sequence here because we would tidy
// the frame up *after* the call, however in the ABI-changing tail-call case
// we've carefully laid out the parameters so that when sp is reset they'll be
@@ -3365,8 +3273,6 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
// this information must travel along with the operation for eventual
// consumption by emitEpilogue.
Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32));
-
- Ops.push_back(PhysReturnAddrReg);
}
// Add argument registers to the end of the list so that they are known live
@@ -4104,6 +4010,21 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
bool IsAdd = (MI.getOpcode() == AMDGPU::V_ADD_U64_PSEUDO);
+ MachineOperand &Dest = MI.getOperand(0);
+ MachineOperand &Src0 = MI.getOperand(1);
+ MachineOperand &Src1 = MI.getOperand(2);
+
+ if (IsAdd && ST.hasLshlAddB64()) {
+ auto Add = BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_LSHL_ADD_U64_e64),
+ Dest.getReg())
+ .add(Src0)
+ .addImm(0)
+ .add(Src1);
+ TII->legalizeOperands(*Add);
+ MI.eraseFromParent();
+ return BB;
+ }
+
const auto *CarryRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
Register DestSub0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
@@ -4112,10 +4033,6 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
Register CarryReg = MRI.createVirtualRegister(CarryRC);
Register DeadCarryReg = MRI.createVirtualRegister(CarryRC);
- MachineOperand &Dest = MI.getOperand(0);
- MachineOperand &Src0 = MI.getOperand(1);
- MachineOperand &Src1 = MI.getOperand(2);
-
const TargetRegisterClass *Src0RC = Src0.isReg()
? MRI.getRegClass(Src0.getReg())
: &AMDGPU::VReg_64RegClass;
@@ -4390,29 +4307,7 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
case AMDGPU::DS_GWS_INIT:
case AMDGPU::DS_GWS_SEMA_BR:
case AMDGPU::DS_GWS_BARRIER:
- if (Subtarget->needsAlignedVGPRs()) {
- // Add implicit aligned super-reg to force alignment on the data operand.
- const DebugLoc &DL = MI.getDebugLoc();
- MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
- MachineOperand *Op = TII->getNamedOperand(MI, AMDGPU::OpName::data0);
- Register DataReg = Op->getReg();
- bool IsAGPR = TRI->isAGPR(MRI, DataReg);
- Register Undef = MRI.createVirtualRegister(
- IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
- BuildMI(*BB, MI, DL, TII->get(AMDGPU::IMPLICIT_DEF), Undef);
- Register NewVR =
- MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass
- : &AMDGPU::VReg_64_Align2RegClass);
- BuildMI(*BB, MI, DL, TII->get(AMDGPU::REG_SEQUENCE), NewVR)
- .addReg(DataReg, 0, Op->getSubReg())
- .addImm(AMDGPU::sub0)
- .addReg(Undef)
- .addImm(AMDGPU::sub1);
- Op->setReg(NewVR);
- Op->setSubReg(AMDGPU::sub0);
- MI.addOperand(MachineOperand::CreateReg(NewVR, false, true));
- }
+ TII->enforceOperandRCAlignment(MI, AMDGPU::OpName::data0);
LLVM_FALLTHROUGH;
case AMDGPU::DS_GWS_SEMA_V:
case AMDGPU::DS_GWS_SEMA_P:
@@ -4500,6 +4395,18 @@ bool SITargetLowering::hasBitPreservingFPLogic(EVT VT) const {
return isTypeLegal(VT.getScalarType());
}
+bool SITargetLowering::hasAtomicFaddRtnForTy(SDValue &Op) const {
+ switch (Op.getValue(0).getSimpleValueType().SimpleTy) {
+ case MVT::f32:
+ return Subtarget->hasAtomicFaddRtnInsts();
+ case MVT::v2f16:
+ case MVT::f64:
+ return Subtarget->hasGFX90AInsts();
+ default:
+ return false;
+ }
+}
+
bool SITargetLowering::enableAggressiveFMAFusion(EVT VT) const {
// This currently forces unfolding various combinations of fsub into fma with
// free fneg'd operands. As long as we have fast FMA (controlled by
@@ -4560,7 +4467,7 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
// Otherwise f32 mad is always full rate and returns the same result as
// the separate operations so should be preferred over fma.
- // However does not support denomals.
+ // However does not support denormals.
if (hasFP32Denormals(MF))
return Subtarget->hasFastFMAF32() || Subtarget->hasDLInsts();
@@ -4653,8 +4560,9 @@ SDValue SITargetLowering::splitBinaryVectorOp(SDValue Op,
unsigned Opc = Op.getOpcode();
EVT VT = Op.getValueType();
assert(VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4f32 ||
- VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8f32 ||
- VT == MVT::v16f32 || VT == MVT::v32f32);
+ VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i16 ||
+ VT == MVT::v16f16 || VT == MVT::v8f32 || VT == MVT::v16f32 ||
+ VT == MVT::v32f32);
SDValue Lo0, Hi0;
std::tie(Lo0, Hi0) = DAG.SplitVectorOperand(Op.getNode(), 0);
@@ -4676,8 +4584,9 @@ SDValue SITargetLowering::splitTernaryVectorOp(SDValue Op,
unsigned Opc = Op.getOpcode();
EVT VT = Op.getValueType();
assert(VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v8i16 ||
- VT == MVT::v8f16 || VT == MVT::v4f32 || VT == MVT::v8f32 ||
- VT == MVT::v16f32 || VT == MVT::v32f32);
+ VT == MVT::v8f16 || VT == MVT::v4f32 || VT == MVT::v16i16 ||
+ VT == MVT::v16f16 || VT == MVT::v8f32 || VT == MVT::v16f32 ||
+ VT == MVT::v32f32);
SDValue Lo0, Hi0;
SDValue Op0 = Op.getOperand(0);
@@ -4738,10 +4647,30 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return lowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::VECTOR_SHUFFLE:
return lowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::SCALAR_TO_VECTOR:
+ return lowerSCALAR_TO_VECTOR(Op, DAG);
case ISD::BUILD_VECTOR:
return lowerBUILD_VECTOR(Op, DAG);
case ISD::FP_ROUND:
return lowerFP_ROUND(Op, DAG);
+ case ISD::FPTRUNC_ROUND: {
+ unsigned Opc;
+ SDLoc DL(Op);
+
+ if (Op.getOperand(0)->getValueType(0) != MVT::f32)
+ return SDValue();
+
+ // Get the rounding mode from the last operand
+ int RoundMode = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ if (RoundMode == (int)RoundingMode::TowardPositive)
+ Opc = AMDGPUISD::FPTRUNC_ROUND_UPWARD;
+ else if (RoundMode == (int)RoundingMode::TowardNegative)
+ Opc = AMDGPUISD::FPTRUNC_ROUND_DOWNWARD;
+ else
+ return SDValue();
+
+ return DAG.getNode(Opc, DL, Op.getNode()->getVTList(), Op->getOperand(0));
+ }
case ISD::TRAP:
return lowerTRAP(Op, DAG);
case ISD::DEBUGTRAP:
@@ -5356,7 +5285,7 @@ SDValue SITargetLowering::lowerFMINNUM_FMAXNUM(SDValue Op,
if (IsIEEEMode)
return expandFMINNUM_FMAXNUM(Op.getNode(), DAG);
- if (VT == MVT::v4f16 || VT == MVT::v8f16)
+ if (VT == MVT::v4f16 || VT == MVT::v8f16 || VT == MVT::v16f16)
return splitBinaryVectorOp(Op, DAG);
return Op;
}
@@ -5439,24 +5368,41 @@ SDValue SITargetLowering::lowerTrapEndpgm(
return DAG.getNode(AMDGPUISD::ENDPGM, SL, MVT::Other, Chain);
}
+SDValue SITargetLowering::loadImplicitKernelArgument(SelectionDAG &DAG, MVT VT,
+ const SDLoc &DL, Align Alignment, ImplicitParameter Param) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ uint64_t Offset = getImplicitParameterOffset(MF, Param);
+ SDValue Ptr = lowerKernArgParameterPtr(DAG, DL, DAG.getEntryNode(), Offset);
+ MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
+ return DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, PtrInfo, Alignment,
+ MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOInvariant);
+}
+
SDValue SITargetLowering::lowerTrapHsaQueuePtr(
SDValue Op, SelectionDAG &DAG) const {
SDLoc SL(Op);
SDValue Chain = Op.getOperand(0);
- MachineFunction &MF = DAG.getMachineFunction();
- SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
- Register UserSGPR = Info->getQueuePtrUserSGPR();
-
SDValue QueuePtr;
- if (UserSGPR == AMDGPU::NoRegister) {
- // We probably are in a function incorrectly marked with
- // amdgpu-no-queue-ptr. This is undefined. We don't want to delete the trap,
- // so just use a null pointer.
- QueuePtr = DAG.getConstant(0, SL, MVT::i64);
+ // For code object version 5, QueuePtr is passed through implicit kernarg.
+ if (AMDGPU::getAmdhsaCodeObjectVersion() == 5) {
+ QueuePtr =
+ loadImplicitKernelArgument(DAG, MVT::i64, SL, Align(8), QUEUE_PTR);
} else {
- QueuePtr = CreateLiveInRegister(
- DAG, &AMDGPU::SReg_64RegClass, UserSGPR, MVT::i64);
+ MachineFunction &MF = DAG.getMachineFunction();
+ SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+ Register UserSGPR = Info->getQueuePtrUserSGPR();
+
+ if (UserSGPR == AMDGPU::NoRegister) {
+ // We probably are in a function incorrectly marked with
+ // amdgpu-no-queue-ptr. This is undefined. We don't want to delete the
+ // trap, so just use a null pointer.
+ QueuePtr = DAG.getConstant(0, SL, MVT::i64);
+ } else {
+ QueuePtr = CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass, UserSGPR,
+ MVT::i64);
+ }
}
SDValue SGPR01 = DAG.getRegister(AMDGPU::SGPR0_SGPR1, MVT::i64);
@@ -5532,6 +5478,14 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
return DAG.getNode(ISD::SHL, DL, MVT::i32, ApertureReg, ShiftAmount);
}
+ // For code object version 5, private_base and shared_base are passed through
+ // implicit kernargs.
+ if (AMDGPU::getAmdhsaCodeObjectVersion() == 5) {
+ ImplicitParameter Param =
+ (AS == AMDGPUAS::LOCAL_ADDRESS) ? SHARED_BASE : PRIVATE_BASE;
+ return loadImplicitKernelArgument(DAG, MVT::i32, DL, Align(4), Param);
+ }
+
MachineFunction &MF = DAG.getMachineFunction();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
Register UserSGPR = Info->getQueuePtrUserSGPR();
@@ -5691,14 +5645,11 @@ SDValue SITargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
EVT EltVT = VecVT.getVectorElementType();
unsigned VecSize = VecVT.getSizeInBits();
unsigned EltSize = EltVT.getSizeInBits();
+ SDLoc SL(Op);
-
- assert(VecSize <= 64);
-
+ // Specially handle the case of v4i16 with static indexing.
unsigned NumElts = VecVT.getVectorNumElements();
- SDLoc SL(Op);
auto KIdx = dyn_cast<ConstantSDNode>(Idx);
-
if (NumElts == 4 && EltSize == 16 && KIdx) {
SDValue BCVec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Vec);
@@ -5726,35 +5677,41 @@ SDValue SITargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
return DAG.getNode(ISD::BITCAST, SL, VecVT, Concat);
}
+ // Static indexing does not lower to stack access, and hence there is no need
+ // for special custom lowering to avoid stack access.
if (isa<ConstantSDNode>(Idx))
return SDValue();
- MVT IntVT = MVT::getIntegerVT(VecSize);
-
- // Avoid stack access for dynamic indexing.
+ // Avoid stack access for dynamic indexing by custom lowering to
// v_bfi_b32 (v_bfm_b32 16, (shl idx, 16)), val, vec
- // Create a congruent vector with the target value in each element so that
- // the required element can be masked and ORed into the target vector.
- SDValue ExtVal = DAG.getNode(ISD::BITCAST, SL, IntVT,
- DAG.getSplatBuildVector(VecVT, SL, InsVal));
+ assert(VecSize <= 64 && "Expected target vector size to be <= 64 bits");
+
+ MVT IntVT = MVT::getIntegerVT(VecSize);
+ // Convert vector index to bit-index and get the required bit mask.
assert(isPowerOf2_32(EltSize));
SDValue ScaleFactor = DAG.getConstant(Log2_32(EltSize), SL, MVT::i32);
-
- // Convert vector index to bit-index.
SDValue ScaledIdx = DAG.getNode(ISD::SHL, SL, MVT::i32, Idx, ScaleFactor);
-
- SDValue BCVec = DAG.getNode(ISD::BITCAST, SL, IntVT, Vec);
SDValue BFM = DAG.getNode(ISD::SHL, SL, IntVT,
DAG.getConstant(0xffff, SL, IntVT),
ScaledIdx);
+ // 1. Create a congruent vector with the target value in each element.
+ SDValue ExtVal = DAG.getNode(ISD::BITCAST, SL, IntVT,
+ DAG.getSplatBuildVector(VecVT, SL, InsVal));
+
+ // 2. Mask off all other indicies except the required index within (1).
SDValue LHS = DAG.getNode(ISD::AND, SL, IntVT, BFM, ExtVal);
+
+ // 3. Mask off the required index within the target vector.
+ SDValue BCVec = DAG.getNode(ISD::BITCAST, SL, IntVT, Vec);
SDValue RHS = DAG.getNode(ISD::AND, SL, IntVT,
DAG.getNOT(SL, BFM, IntVT), BCVec);
+ // 4. Get (2) and (3) ORed into the target vector.
SDValue BFI = DAG.getNode(ISD::OR, SL, IntVT, LHS, RHS);
+
return DAG.getNode(ISD::BITCAST, SL, VecVT, BFI);
}
@@ -5778,17 +5735,35 @@ SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
if (SDValue Combined = performExtractVectorEltCombine(Op.getNode(), DCI))
return Combined;
- if (VecSize == 128) {
+ if (VecSize == 128 || VecSize == 256) {
SDValue Lo, Hi;
EVT LoVT, HiVT;
- SDValue V2 = DAG.getBitcast(MVT::v2i64, Vec);
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT);
- Lo =
- DAG.getBitcast(LoVT, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i64,
- V2, DAG.getConstant(0, SL, MVT::i32)));
- Hi =
- DAG.getBitcast(HiVT, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i64,
- V2, DAG.getConstant(1, SL, MVT::i32)));
+
+ if (VecSize == 128) {
+ SDValue V2 = DAG.getBitcast(MVT::v2i64, Vec);
+ Lo = DAG.getBitcast(LoVT,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i64, V2,
+ DAG.getConstant(0, SL, MVT::i32)));
+ Hi = DAG.getBitcast(HiVT,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i64, V2,
+ DAG.getConstant(1, SL, MVT::i32)));
+ } else {
+ assert(VecSize == 256);
+
+ SDValue V2 = DAG.getBitcast(MVT::v4i64, Vec);
+ SDValue Parts[4];
+ for (unsigned P = 0; P < 4; ++P) {
+ Parts[P] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i64, V2,
+ DAG.getConstant(P, SL, MVT::i32));
+ }
+
+ Lo = DAG.getBitcast(LoVT, DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i64,
+ Parts[0], Parts[1]));
+ Hi = DAG.getBitcast(HiVT, DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i64,
+ Parts[2], Parts[3]));
+ }
+
EVT IdxVT = Idx.getValueType();
unsigned NElem = VecVT.getVectorNumElements();
assert(isPowerOf2_32(NElem));
@@ -5800,10 +5775,19 @@ SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
assert(VecSize <= 64);
+ MVT IntVT = MVT::getIntegerVT(VecSize);
+
+ // If Vec is just a SCALAR_TO_VECTOR, then use the scalar integer directly.
+ SDValue VecBC = peekThroughBitcasts(Vec);
+ if (VecBC.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ SDValue Src = VecBC.getOperand(0);
+ Src = DAG.getBitcast(Src.getValueType().changeTypeToInteger(), Src);
+ Vec = DAG.getAnyExtOrTrunc(Src, SL, IntVT);
+ }
+
unsigned EltSize = EltVT.getSizeInBits();
assert(isPowerOf2_32(EltSize));
- MVT IntVT = MVT::getIntegerVT(VecSize);
SDValue ScaleFactor = DAG.getConstant(Log2_32(EltSize), SL, MVT::i32);
// Convert vector index to bit-index (* EltSize)
@@ -5877,6 +5861,22 @@ SDValue SITargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
return DAG.getNode(ISD::CONCAT_VECTORS, SL, ResultVT, Pieces);
}
+SDValue SITargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue SVal = Op.getOperand(0);
+ EVT ResultVT = Op.getValueType();
+ EVT SValVT = SVal.getValueType();
+ SDValue UndefVal = DAG.getUNDEF(SValVT);
+ SDLoc SL(Op);
+
+ SmallVector<SDValue, 8> VElts;
+ VElts.push_back(SVal);
+ for (int I = 1, E = ResultVT.getVectorNumElements(); I < E; ++I)
+ VElts.push_back(UndefVal);
+
+ return DAG.getBuildVector(ResultVT, SL, VElts);
+}
+
SDValue SITargetLowering::lowerBUILD_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
SDLoc SL(Op);
@@ -5906,6 +5906,27 @@ SDValue SITargetLowering::lowerBUILD_VECTOR(SDValue Op,
return DAG.getNode(ISD::BITCAST, SL, VT, Blend);
}
+ if (VT == MVT::v16i16 || VT == MVT::v16f16) {
+ EVT QuarterVT = MVT::getVectorVT(VT.getVectorElementType().getSimpleVT(),
+ VT.getVectorNumElements() / 4);
+ MVT QuarterIntVT = MVT::getIntegerVT(QuarterVT.getSizeInBits());
+
+ SmallVector<SDValue, 4> Parts[4];
+ for (unsigned I = 0, E = VT.getVectorNumElements() / 4; I != E; ++I) {
+ for (unsigned P = 0; P < 4; ++P)
+ Parts[P].push_back(Op.getOperand(I + P * E));
+ }
+ SDValue Casts[4];
+ for (unsigned P = 0; P < 4; ++P) {
+ SDValue Vec = DAG.getBuildVector(QuarterVT, SL, Parts[P]);
+ Casts[P] = DAG.getNode(ISD::BITCAST, SL, QuarterIntVT, Vec);
+ }
+
+ SDValue Blend =
+ DAG.getBuildVector(MVT::getVectorVT(QuarterIntVT, 4), SL, Casts);
+ return DAG.getNode(ISD::BITCAST, SL, VT, Blend);
+ }
+
assert(VT == MVT::v2f16 || VT == MVT::v2i16);
assert(!Subtarget->hasVOP3PInsts() && "this should be legal");
@@ -6277,6 +6298,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim);
unsigned IntrOpcode = Intr->BaseOpcode;
bool IsGFX10Plus = AMDGPU::isGFX10Plus(*Subtarget);
+ bool IsGFX11Plus = AMDGPU::isGFX11Plus(*Subtarget);
SmallVector<EVT, 3> ResultTypes(Op->values());
SmallVector<EVT, 3> OrigResultTypes(Op->values());
@@ -6455,6 +6477,10 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
//
// SIShrinkInstructions will convert NSA encodings to non-NSA after register
// allocation when possible.
+ //
+ // TODO: we can actually allow partial NSA where the final register is a
+ // contiguous set of the remaining addresses.
+ // This could help where there are more addresses than supported.
bool UseNSA = ST->hasFeature(AMDGPU::FeatureNSAEncoding) &&
VAddrs.size() >= 3 &&
VAddrs.size() <= (unsigned)ST->getNSAMaxSize();
@@ -6561,7 +6587,12 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
UseNSA ? VAddrs.size() : VAddr.getValueType().getSizeInBits() / 32;
int Opcode = -1;
- if (IsGFX10Plus) {
+ if (IsGFX11Plus) {
+ Opcode = AMDGPU::getMIMGOpcode(IntrOpcode,
+ UseNSA ? AMDGPU::MIMGEncGfx11NSA
+ : AMDGPU::MIMGEncGfx11Default,
+ NumVDataDwords, NumVAddrDwords);
+ } else if (IsGFX10Plus) {
Opcode = AMDGPU::getMIMGOpcode(IntrOpcode,
UseNSA ? AMDGPU::MIMGEncGfx10NSA
: AMDGPU::MIMGEncGfx10Default,
@@ -6685,6 +6716,32 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
return Loads[0];
}
+SDValue SITargetLowering::lowerWorkitemID(SelectionDAG &DAG, SDValue Op,
+ unsigned Dim,
+ const ArgDescriptor &Arg) const {
+ SDLoc SL(Op);
+ MachineFunction &MF = DAG.getMachineFunction();
+ unsigned MaxID = Subtarget->getMaxWorkitemID(MF.getFunction(), Dim);
+ if (MaxID == 0)
+ return DAG.getConstant(0, SL, MVT::i32);
+
+ SDValue Val = loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
+ SDLoc(DAG.getEntryNode()), Arg);
+
+ // Don't bother inserting AssertZext for packed IDs since we're emitting the
+ // masking operations anyway.
+ //
+ // TODO: We could assert the top bit is 0 for the source copy.
+ if (Arg.isMasked())
+ return Val;
+
+ // Preserve the known bits after expansion to a copy.
+ EVT SmallVT =
+ EVT::getIntegerVT(*DAG.getContext(), 32 - countLeadingZeros(MaxID));
+ return DAG.getNode(ISD::AssertZext, SL, MVT::i32, Val,
+ DAG.getValueType(SmallVT));
+}
+
SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
@@ -6831,26 +6888,11 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return getPreloadedValue(DAG, *MFI, VT,
AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
case Intrinsic::amdgcn_workitem_id_x:
- if (Subtarget->getMaxWorkitemID(MF.getFunction(), 0) == 0)
- return DAG.getConstant(0, DL, MVT::i32);
-
- return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
- SDLoc(DAG.getEntryNode()),
- MFI->getArgInfo().WorkItemIDX);
+ return lowerWorkitemID(DAG, Op, 0, MFI->getArgInfo().WorkItemIDX);
case Intrinsic::amdgcn_workitem_id_y:
- if (Subtarget->getMaxWorkitemID(MF.getFunction(), 1) == 0)
- return DAG.getConstant(0, DL, MVT::i32);
-
- return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
- SDLoc(DAG.getEntryNode()),
- MFI->getArgInfo().WorkItemIDY);
+ return lowerWorkitemID(DAG, Op, 1, MFI->getArgInfo().WorkItemIDY);
case Intrinsic::amdgcn_workitem_id_z:
- if (Subtarget->getMaxWorkitemID(MF.getFunction(), 2) == 0)
- return DAG.getConstant(0, DL, MVT::i32);
-
- return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
- SDLoc(DAG.getEntryNode()),
- MFI->getArgInfo().WorkItemIDZ);
+ return lowerWorkitemID(DAG, Op, 2, MFI->getArgInfo().WorkItemIDZ);
case Intrinsic::amdgcn_wavefrontsize:
return DAG.getConstant(MF.getSubtarget<GCNSubtarget>().getWavefrontSize(),
SDLoc(Op), MVT::i32);
@@ -7157,12 +7199,14 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
unsigned ShaderType =
SIInstrInfo::getDSShaderTypeValue(DAG.getMachineFunction());
unsigned Offset0 = OrderedCountIndex << 2;
- unsigned Offset1 = WaveRelease | (WaveDone << 1) | (ShaderType << 2) |
- (Instruction << 4);
+ unsigned Offset1 = WaveRelease | (WaveDone << 1) | (Instruction << 4);
if (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10)
Offset1 |= (CountDw - 1) << 6;
+ if (Subtarget->getGeneration() < AMDGPUSubtarget::GFX11)
+ Offset1 |= ShaderType << 2;
+
unsigned Offset = Offset0 | (Offset1 << 8);
SDValue Ops[] = {
@@ -7441,7 +7485,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Opcode = AMDGPUISD::BUFFER_ATOMIC_XOR;
break;
case Intrinsic::amdgcn_buffer_atomic_fadd:
- if (!Op.getValue(0).use_empty() && !Subtarget->hasGFX90AInsts()) {
+ if (!Op.getValue(0).use_empty() && !hasAtomicFaddRtnForTy(Op)) {
DiagnosticInfoUnsupported
NoFpRet(DAG.getMachineFunction().getFunction(),
"return versions of fp atomics not supported",
@@ -7609,12 +7653,14 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
return SDValue();
}
+ const bool IsGFX11Plus = AMDGPU::isGFX11Plus(*Subtarget);
const bool IsA16 = RayDir.getValueType().getVectorElementType() == MVT::f16;
const bool Is64 = NodePtr.getValueType() == MVT::i64;
const unsigned NumVDataDwords = 4;
const unsigned NumVAddrDwords = IsA16 ? (Is64 ? 9 : 8) : (Is64 ? 12 : 11);
- const bool UseNSA = Subtarget->hasNSAEncoding() &&
- NumVAddrDwords <= Subtarget->getNSAMaxSize();
+ const unsigned NumVAddrs = IsGFX11Plus ? (IsA16 ? 4 : 5) : NumVAddrDwords;
+ const bool UseNSA =
+ Subtarget->hasNSAEncoding() && NumVAddrs <= Subtarget->getNSAMaxSize();
const unsigned BaseOpcodes[2][2] = {
{AMDGPU::IMAGE_BVH_INTERSECT_RAY, AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16},
{AMDGPU::IMAGE_BVH64_INTERSECT_RAY,
@@ -7622,12 +7668,15 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
int Opcode;
if (UseNSA) {
Opcode = AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16],
- AMDGPU::MIMGEncGfx10NSA, NumVDataDwords,
- NumVAddrDwords);
+ IsGFX11Plus ? AMDGPU::MIMGEncGfx11NSA
+ : AMDGPU::MIMGEncGfx10NSA,
+ NumVDataDwords, NumVAddrDwords);
} else {
- Opcode = AMDGPU::getMIMGOpcode(
- BaseOpcodes[Is64][IsA16], AMDGPU::MIMGEncGfx10Default, NumVDataDwords,
- PowerOf2Ceil(NumVAddrDwords));
+ Opcode =
+ AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16],
+ IsGFX11Plus ? AMDGPU::MIMGEncGfx11Default
+ : AMDGPU::MIMGEncGfx10Default,
+ NumVDataDwords, PowerOf2Ceil(NumVAddrDwords));
}
assert(Opcode != -1);
@@ -7660,15 +7709,36 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
}
};
- if (Is64)
- DAG.ExtractVectorElements(DAG.getBitcast(MVT::v2i32, NodePtr), Ops, 0, 2);
- else
+ if (UseNSA && IsGFX11Plus) {
Ops.push_back(NodePtr);
+ Ops.push_back(DAG.getBitcast(MVT::i32, RayExtent));
+ Ops.push_back(RayOrigin);
+ if (IsA16) {
+ SmallVector<SDValue, 3> DirLanes, InvDirLanes, MergedLanes;
+ DAG.ExtractVectorElements(RayDir, DirLanes, 0, 3);
+ DAG.ExtractVectorElements(RayInvDir, InvDirLanes, 0, 3);
+ for (unsigned I = 0; I < 3; ++I) {
+ MergedLanes.push_back(DAG.getBitcast(
+ MVT::i32, DAG.getBuildVector(MVT::v2f16, DL,
+ {DirLanes[I], InvDirLanes[I]})));
+ }
+ Ops.push_back(DAG.getBuildVector(MVT::v3i32, DL, MergedLanes));
+ } else {
+ Ops.push_back(RayDir);
+ Ops.push_back(RayInvDir);
+ }
+ } else {
+ if (Is64)
+ DAG.ExtractVectorElements(DAG.getBitcast(MVT::v2i32, NodePtr), Ops, 0,
+ 2);
+ else
+ Ops.push_back(NodePtr);
- Ops.push_back(DAG.getBitcast(MVT::i32, RayExtent));
- packLanes(RayOrigin, true);
- packLanes(RayDir, true);
- packLanes(RayInvDir, false);
+ Ops.push_back(DAG.getBitcast(MVT::i32, RayExtent));
+ packLanes(RayOrigin, true);
+ packLanes(RayDir, true);
+ packLanes(RayInvDir, false);
+ }
if (!UseNSA) {
// Build a single vector containing all the operands so far prepared.
@@ -7868,6 +7938,12 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
switch (IntrinsicID) {
case Intrinsic::amdgcn_exp_compr: {
+ if (!Subtarget->hasCompressedExport()) {
+ DiagnosticInfoUnsupported BadIntrin(
+ DAG.getMachineFunction().getFunction(),
+ "intrinsic not supported on subtarget", DL.getDebugLoc());
+ DAG.getContext()->diagnose(BadIntrin);
+ }
SDValue Src0 = Op.getOperand(4);
SDValue Src1 = Op.getOperand(5);
// Hack around illegal type on SI by directly selecting it.
@@ -8110,6 +8186,160 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
M->getMemoryVT(), M->getMemOperand());
}
+ case Intrinsic::amdgcn_raw_buffer_load_lds:
+ case Intrinsic::amdgcn_struct_buffer_load_lds: {
+ unsigned Opc;
+ bool HasVIndex = IntrinsicID == Intrinsic::amdgcn_struct_buffer_load_lds;
+ unsigned OpOffset = HasVIndex ? 1 : 0;
+ SDValue VOffset = Op.getOperand(5 + OpOffset);
+ auto CVOffset = dyn_cast<ConstantSDNode>(VOffset);
+ bool HasVOffset = !CVOffset || !CVOffset->isZero();
+ unsigned Size = Op->getConstantOperandVal(4);
+
+ switch (Size) {
+ default:
+ return SDValue();
+ case 1:
+ Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN
+ : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN
+ : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN
+ : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;
+ break;
+ case 2:
+ Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN
+ : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN
+ : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN
+ : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;
+ break;
+ case 4:
+ Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN
+ : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN
+ : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
+ : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
+ break;
+ }
+
+ SDValue M0Val = copyToM0(DAG, Chain, DL, Op.getOperand(3));
+
+ SmallVector<SDValue, 8> Ops;
+
+ if (HasVIndex && HasVOffset)
+ Ops.push_back(DAG.getBuildVector(MVT::v2i32, DL,
+ { Op.getOperand(5), // VIndex
+ VOffset }));
+ else if (HasVIndex)
+ Ops.push_back(Op.getOperand(5));
+ else if (HasVOffset)
+ Ops.push_back(VOffset);
+
+ Ops.push_back(Op.getOperand(2)); // rsrc
+ Ops.push_back(Op.getOperand(6 + OpOffset)); // soffset
+ Ops.push_back(Op.getOperand(7 + OpOffset)); // imm offset
+ unsigned Aux = Op.getConstantOperandVal(8 + OpOffset);
+ Ops.push_back(
+ DAG.getTargetConstant(Aux & AMDGPU::CPol::ALL, DL, MVT::i8)); // cpol
+ Ops.push_back(
+ DAG.getTargetConstant((Aux >> 3) & 1, DL, MVT::i8)); // swz
+ Ops.push_back(M0Val.getValue(0)); // Chain
+ Ops.push_back(M0Val.getValue(1)); // Glue
+
+ auto *M = cast<MemSDNode>(Op);
+ MachineMemOperand *LoadMMO = M->getMemOperand();
+ MachinePointerInfo LoadPtrI = LoadMMO->getPointerInfo();
+ LoadPtrI.Offset = Op->getConstantOperandVal(7 + OpOffset);
+ MachinePointerInfo StorePtrI = LoadPtrI;
+ StorePtrI.V = nullptr;
+ StorePtrI.AddrSpace = AMDGPUAS::LOCAL_ADDRESS;
+
+ auto F = LoadMMO->getFlags() &
+ ~(MachineMemOperand::MOStore | MachineMemOperand::MOLoad);
+ LoadMMO = MF.getMachineMemOperand(LoadPtrI, F | MachineMemOperand::MOLoad,
+ Size, LoadMMO->getBaseAlign());
+
+ MachineMemOperand *StoreMMO =
+ MF.getMachineMemOperand(StorePtrI, F | MachineMemOperand::MOStore,
+ sizeof(int32_t), LoadMMO->getBaseAlign());
+
+ auto Load = DAG.getMachineNode(Opc, DL, M->getVTList(), Ops);
+ DAG.setNodeMemRefs(Load, {LoadMMO, StoreMMO});
+
+ return SDValue(Load, 0);
+ }
+ case Intrinsic::amdgcn_global_load_lds: {
+ unsigned Opc;
+ unsigned Size = Op->getConstantOperandVal(4);
+ switch (Size) {
+ default:
+ return SDValue();
+ case 1:
+ Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;
+ break;
+ case 2:
+ Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;
+ break;
+ case 4:
+ Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
+ break;
+ }
+
+ auto *M = cast<MemSDNode>(Op);
+ SDValue M0Val = copyToM0(DAG, Chain, DL, Op.getOperand(3));
+
+ SmallVector<SDValue, 6> Ops;
+
+ SDValue Addr = Op.getOperand(2); // Global ptr
+ SDValue VOffset;
+ // Try to split SAddr and VOffset. Global and LDS pointers share the same
+ // immediate offset, so we cannot use a regular SelectGlobalSAddr().
+ if (Addr->isDivergent() && Addr.getOpcode() == ISD::ADD) {
+ SDValue LHS = Addr.getOperand(0);
+ SDValue RHS = Addr.getOperand(1);
+
+ if (LHS->isDivergent())
+ std::swap(LHS, RHS);
+
+ if (!LHS->isDivergent() && RHS.getOpcode() == ISD::ZERO_EXTEND &&
+ RHS.getOperand(0).getValueType() == MVT::i32) {
+ // add (i64 sgpr), (zero_extend (i32 vgpr))
+ Addr = LHS;
+ VOffset = RHS.getOperand(0);
+ }
+ }
+
+ Ops.push_back(Addr);
+ if (!Addr->isDivergent()) {
+ Opc = AMDGPU::getGlobalSaddrOp(Opc);
+ if (!VOffset)
+ VOffset = SDValue(
+ DAG.getMachineNode(AMDGPU::V_MOV_B32_e32, DL, MVT::i32,
+ DAG.getTargetConstant(0, DL, MVT::i32)), 0);
+ Ops.push_back(VOffset);
+ }
+
+ Ops.push_back(Op.getOperand(5)); // Offset
+ Ops.push_back(Op.getOperand(6)); // CPol
+ Ops.push_back(M0Val.getValue(0)); // Chain
+ Ops.push_back(M0Val.getValue(1)); // Glue
+
+ MachineMemOperand *LoadMMO = M->getMemOperand();
+ MachinePointerInfo LoadPtrI = LoadMMO->getPointerInfo();
+ LoadPtrI.Offset = Op->getConstantOperandVal(5);
+ MachinePointerInfo StorePtrI = LoadPtrI;
+ LoadPtrI.AddrSpace = AMDGPUAS::GLOBAL_ADDRESS;
+ StorePtrI.AddrSpace = AMDGPUAS::LOCAL_ADDRESS;
+ auto F = LoadMMO->getFlags() &
+ ~(MachineMemOperand::MOStore | MachineMemOperand::MOLoad);
+ LoadMMO = MF.getMachineMemOperand(LoadPtrI, F | MachineMemOperand::MOLoad,
+ Size, LoadMMO->getBaseAlign());
+ MachineMemOperand *StoreMMO =
+ MF.getMachineMemOperand(StorePtrI, F | MachineMemOperand::MOStore,
+ sizeof(int32_t), Align(4));
+
+ auto Load = DAG.getMachineNode(Opc, DL, Op->getVTList(), Ops);
+ DAG.setNodeMemRefs(Load, {LoadMMO, StoreMMO});
+
+ return SDValue(Load, 0);
+ }
case Intrinsic::amdgcn_end_cf:
return SDValue(DAG.getMachineNode(AMDGPU::SI_END_CF, DL, MVT::Other,
Op->getOperand(2), Chain), 0);
@@ -8271,7 +8501,7 @@ static SDValue getLoadExtOrTrunc(SelectionDAG &DAG,
SDValue SITargetLowering::widenLoad(LoadSDNode *Ld, DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
- if (Ld->getAlignment() < 4 || Ld->isDivergent())
+ if (Ld->getAlign() < Align(4) || Ld->isDivergent())
return SDValue();
// FIXME: Constant loads should all be marked invariant.
@@ -8296,14 +8526,11 @@ SDValue SITargetLowering::widenLoad(LoadSDNode *Ld, DAGCombinerInfo &DCI) const
// TODO: Drop only high part of range.
SDValue Ptr = Ld->getBasePtr();
- SDValue NewLoad = DAG.getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD,
- MVT::i32, SL, Ld->getChain(), Ptr,
- Ld->getOffset(),
- Ld->getPointerInfo(), MVT::i32,
- Ld->getAlignment(),
- Ld->getMemOperand()->getFlags(),
- Ld->getAAInfo(),
- nullptr); // Drop ranges
+ SDValue NewLoad = DAG.getLoad(
+ ISD::UNINDEXED, ISD::NON_EXTLOAD, MVT::i32, SL, Ld->getChain(), Ptr,
+ Ld->getOffset(), Ld->getPointerInfo(), MVT::i32, Ld->getAlign(),
+ Ld->getMemOperand()->getFlags(), Ld->getAAInfo(),
+ nullptr); // Drop ranges
EVT TruncVT = EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
if (MemVT.isFloatingPoint()) {
@@ -8392,17 +8619,16 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
assert(Op.getValueType().getVectorElementType() == MVT::i32 &&
"Custom lowering for non-i32 vectors hasn't been implemented.");
- unsigned Alignment = Load->getAlignment();
+ Align Alignment = Load->getAlign();
unsigned AS = Load->getAddressSpace();
- if (Subtarget->hasLDSMisalignedBug() &&
- AS == AMDGPUAS::FLAT_ADDRESS &&
- Alignment < MemVT.getStoreSize() && MemVT.getSizeInBits() > 32) {
+ if (Subtarget->hasLDSMisalignedBug() && AS == AMDGPUAS::FLAT_ADDRESS &&
+ Alignment.value() < MemVT.getStoreSize() && MemVT.getSizeInBits() > 32) {
return SplitVectorLoad(Op, DAG);
}
MachineFunction &MF = DAG.getMachineFunction();
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- // If there is a possibilty that flat instruction access scratch memory
+ // If there is a possibility that flat instruction access scratch memory
// then we need to use the same legalization rules we use for private.
if (AS == AMDGPUAS::FLAT_ADDRESS &&
!Subtarget->hasMultiDwordFlatScratchAddressing())
@@ -8413,7 +8639,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
- if (!Op->isDivergent() && Alignment >= 4 && NumElements < 32) {
+ if (!Op->isDivergent() && Alignment >= Align(4) && NumElements < 32) {
if (MemVT.isPow2VectorType())
return SDValue();
return WidenOrSplitVectorLoad(Op, DAG);
@@ -8429,7 +8655,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
AS == AMDGPUAS::GLOBAL_ADDRESS) {
if (Subtarget->getScalarizeGlobalBehavior() && !Op->isDivergent() &&
Load->isSimple() && isMemOpHasNoClobberedMemOperand(Load) &&
- Alignment >= 4 && NumElements < 32) {
+ Alignment >= Align(4) && NumElements < 32) {
if (MemVT.isPow2VectorType())
return SDValue();
return WidenOrSplitVectorLoad(Op, DAG);
@@ -8479,27 +8705,15 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
llvm_unreachable("unsupported private_element_size");
}
} else if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) {
- // Use ds_read_b128 or ds_read_b96 when possible.
- if (Subtarget->hasDS96AndDS128() &&
- ((Subtarget->useDS128() && MemVT.getStoreSize() == 16) ||
- MemVT.getStoreSize() == 12) &&
- allowsMisalignedMemoryAccessesImpl(MemVT.getSizeInBits(), AS,
- Load->getAlign()))
+ bool Fast = false;
+ auto Flags = Load->getMemOperand()->getFlags();
+ if (allowsMisalignedMemoryAccessesImpl(MemVT.getSizeInBits(), AS,
+ Load->getAlign(), Flags, &Fast) &&
+ Fast)
return SDValue();
- if (NumElements > 2)
+ if (MemVT.isVector())
return SplitVectorLoad(Op, DAG);
-
- // SI has a hardware bug in the LDS / GDS boounds checking: if the base
- // address is negative, then the instruction is incorrectly treated as
- // out-of-bounds even if base + offsets is in bounds. Split vectorized
- // loads here to avoid emitting ds_read2_b32. We may re-combine the
- // load later in the SILoadStoreOptimizer.
- if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS &&
- NumElements == 2 && MemVT.getStoreSize() == 8 &&
- Load->getAlignment() < 8) {
- return SplitVectorLoad(Op, DAG);
- }
}
if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
@@ -8514,7 +8728,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
- if (VT.getSizeInBits() == 128)
+ if (VT.getSizeInBits() == 128 || VT.getSizeInBits() == 256)
return splitTernaryVectorOp(Op, DAG);
assert(VT.getSizeInBits() == 64);
@@ -8946,13 +9160,13 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
unsigned AS = Store->getAddressSpace();
if (Subtarget->hasLDSMisalignedBug() &&
AS == AMDGPUAS::FLAT_ADDRESS &&
- Store->getAlignment() < VT.getStoreSize() && VT.getSizeInBits() > 32) {
+ Store->getAlign().value() < VT.getStoreSize() && VT.getSizeInBits() > 32) {
return SplitVectorStore(Op, DAG);
}
MachineFunction &MF = DAG.getMachineFunction();
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- // If there is a possibilty that flat instruction access scratch memory
+ // If there is a possibility that flat instruction access scratch memory
// then we need to use the same legalization rules we use for private.
if (AS == AMDGPUAS::FLAT_ADDRESS &&
!Subtarget->hasMultiDwordFlatScratchAddressing())
@@ -8990,39 +9204,21 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
llvm_unreachable("unsupported private_element_size");
}
} else if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) {
- // Use ds_write_b128 or ds_write_b96 when possible.
- if (Subtarget->hasDS96AndDS128() &&
- ((Subtarget->useDS128() && VT.getStoreSize() == 16) ||
- (VT.getStoreSize() == 12)) &&
- allowsMisalignedMemoryAccessesImpl(VT.getSizeInBits(), AS,
- Store->getAlign()))
+ bool Fast = false;
+ auto Flags = Store->getMemOperand()->getFlags();
+ if (allowsMisalignedMemoryAccessesImpl(VT.getSizeInBits(), AS,
+ Store->getAlign(), Flags, &Fast) &&
+ Fast)
return SDValue();
- if (NumElements > 2)
+ if (VT.isVector())
return SplitVectorStore(Op, DAG);
- // SI has a hardware bug in the LDS / GDS boounds checking: if the base
- // address is negative, then the instruction is incorrectly treated as
- // out-of-bounds even if base + offsets is in bounds. Split vectorized
- // stores here to avoid emitting ds_write2_b32. We may re-combine the
- // store later in the SILoadStoreOptimizer.
- if (!Subtarget->hasUsableDSOffset() &&
- NumElements == 2 && VT.getStoreSize() == 8 &&
- Store->getAlignment() < 8) {
- return SplitVectorStore(Op, DAG);
- }
-
- if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
- VT, *Store->getMemOperand())) {
- if (VT.isVector())
- return SplitVectorStore(Op, DAG);
- return expandUnalignedStore(Store, DAG);
- }
-
- return SDValue();
- } else {
- llvm_unreachable("unhandled address space");
+ return expandUnalignedStore(Store, DAG);
}
+
+ // Probably an invalid store. If so we'll end up emitting a selection error.
+ return SDValue();
}
SDValue SITargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
@@ -10041,7 +10237,7 @@ SDValue SITargetLowering::performFCanonicalizeCombine(
}
}
- // If one half is undef, and one is constant, perfer a splat vector rather
+ // If one half is undef, and one is constant, prefer a splat vector rather
// than the normal qNaN. If it's a register, prefer 0.0 since that's
// cheaper to use and may be free with a packed operation.
if (NewElts[0].isUndef()) {
@@ -10349,7 +10545,8 @@ SDValue SITargetLowering::performCvtPkRTZCombine(SDNode *N,
// expanded into a set of cmp/select instructions.
bool SITargetLowering::shouldExpandVectorDynExt(unsigned EltSize,
unsigned NumElem,
- bool IsDivergentIdx) {
+ bool IsDivergentIdx,
+ const GCNSubtarget *Subtarget) {
if (UseDivergentRegisterIndexing)
return false;
@@ -10371,10 +10568,18 @@ bool SITargetLowering::shouldExpandVectorDynExt(unsigned EltSize,
// Large vectors would yield too many compares and v_cndmask_b32 instructions.
unsigned NumInsts = NumElem /* Number of compares */ +
((EltSize + 31) / 32) * NumElem /* Number of cndmasks */;
- return NumInsts <= 16;
+
+ // On some architectures (GFX9) movrel is not available and it's better
+ // to expand.
+ if (!Subtarget->hasMovrel())
+ return NumInsts <= 16;
+
+ // If movrel is available, use it instead of expanding for vector of 8
+ // elements.
+ return NumInsts <= 15;
}
-static bool shouldExpandVectorDynExt(SDNode *N) {
+bool SITargetLowering::shouldExpandVectorDynExt(SDNode *N) const {
SDValue Idx = N->getOperand(N->getNumOperands() - 1);
if (isa<ConstantSDNode>(Idx))
return false;
@@ -10385,8 +10590,8 @@ static bool shouldExpandVectorDynExt(SDNode *N) {
unsigned EltSize = EltVT.getSizeInBits();
unsigned NumElem = VecVT.getVectorNumElements();
- return SITargetLowering::shouldExpandVectorDynExt(EltSize, NumElem,
- Idx->isDivergent());
+ return SITargetLowering::shouldExpandVectorDynExt(
+ EltSize, NumElem, Idx->isDivergent(), getSubtarget());
}
SDValue SITargetLowering::performExtractVectorEltCombine(
@@ -10450,7 +10655,7 @@ SDValue SITargetLowering::performExtractVectorEltCombine(
unsigned EltSize = EltVT.getSizeInBits();
// EXTRACT_VECTOR_ELT (<n x e>, var-idx) => n x select (e, const-idx)
- if (::shouldExpandVectorDynExt(N)) {
+ if (shouldExpandVectorDynExt(N)) {
SDLoc SL(N);
SDValue Idx = N->getOperand(1);
SDValue V;
@@ -10513,7 +10718,7 @@ SITargetLowering::performInsertVectorEltCombine(SDNode *N,
// INSERT_VECTOR_ELT (<n x e>, var-idx)
// => BUILD_VECTOR n x select (e, const-idx)
- if (!::shouldExpandVectorDynExt(N))
+ if (!shouldExpandVectorDynExt(N))
return SDValue();
SelectionDAG &DAG = DCI.DAG;
@@ -10603,39 +10808,145 @@ static SDValue getMad64_32(SelectionDAG &DAG, const SDLoc &SL,
return DAG.getNode(ISD::TRUNCATE, SL, VT, Mad);
}
-SDValue SITargetLowering::performAddCombine(SDNode *N,
+// Fold (add (mul x, y), z) --> (mad_[iu]64_[iu]32 x, y, z) plus high
+// multiplies, if any.
+//
+// Full 64-bit multiplies that feed into an addition are lowered here instead
+// of using the generic expansion. The generic expansion ends up with
+// a tree of ADD nodes that prevents us from using the "add" part of the
+// MAD instruction. The expansion produced here results in a chain of ADDs
+// instead of a tree.
+SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
DAGCombinerInfo &DCI) const {
+ assert(N->getOpcode() == ISD::ADD);
+
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
SDLoc SL(N);
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
- if ((LHS.getOpcode() == ISD::MUL || RHS.getOpcode() == ISD::MUL)
- && Subtarget->hasMad64_32() &&
- !VT.isVector() && VT.getScalarSizeInBits() > 32 &&
- VT.getScalarSizeInBits() <= 64) {
- if (LHS.getOpcode() != ISD::MUL)
- std::swap(LHS, RHS);
+ if (VT.isVector())
+ return SDValue();
- SDValue MulLHS = LHS.getOperand(0);
- SDValue MulRHS = LHS.getOperand(1);
- SDValue AddRHS = RHS;
+ // S_MUL_HI_[IU]32 was added in gfx9, which allows us to keep the overall
+ // result in scalar registers for uniform values.
+ if (!N->isDivergent() && Subtarget->hasSMulHi())
+ return SDValue();
+
+ unsigned NumBits = VT.getScalarSizeInBits();
+ if (NumBits <= 32 || NumBits > 64)
+ return SDValue();
+
+ if (LHS.getOpcode() != ISD::MUL) {
+ assert(RHS.getOpcode() == ISD::MUL);
+ std::swap(LHS, RHS);
+ }
+
+ // Avoid the fold if it would unduly increase the number of multiplies due to
+ // multiple uses, except on hardware with full-rate multiply-add (which is
+ // part of full-rate 64-bit ops).
+ if (!Subtarget->hasFullRate64Ops()) {
+ unsigned NumUsers = 0;
+ for (SDNode *Use : LHS->uses()) {
+ // There is a use that does not feed into addition, so the multiply can't
+ // be removed. We prefer MUL + ADD + ADDC over MAD + MUL.
+ if (Use->getOpcode() != ISD::ADD)
+ return SDValue();
- // TODO: Maybe restrict if SGPR inputs.
- if (numBitsUnsigned(MulLHS, DAG) <= 32 &&
- numBitsUnsigned(MulRHS, DAG) <= 32) {
- MulLHS = DAG.getZExtOrTrunc(MulLHS, SL, MVT::i32);
- MulRHS = DAG.getZExtOrTrunc(MulRHS, SL, MVT::i32);
- AddRHS = DAG.getZExtOrTrunc(AddRHS, SL, MVT::i64);
- return getMad64_32(DAG, SL, VT, MulLHS, MulRHS, AddRHS, false);
+ // We prefer 2xMAD over MUL + 2xADD + 2xADDC (code density), and prefer
+ // MUL + 3xADD + 3xADDC over 3xMAD.
+ ++NumUsers;
+ if (NumUsers >= 3)
+ return SDValue();
}
+ }
+
+ SDValue MulLHS = LHS.getOperand(0);
+ SDValue MulRHS = LHS.getOperand(1);
+ SDValue AddRHS = RHS;
+
+ // Always check whether operands are small unsigned values, since that
+ // knowledge is useful in more cases. Check for small signed values only if
+ // doing so can unlock a shorter code sequence.
+ bool MulLHSUnsigned32 = numBitsUnsigned(MulLHS, DAG) <= 32;
+ bool MulRHSUnsigned32 = numBitsUnsigned(MulRHS, DAG) <= 32;
+
+ bool MulSignedLo = false;
+ if (!MulLHSUnsigned32 || !MulRHSUnsigned32) {
+ MulSignedLo = numBitsSigned(MulLHS, DAG) <= 32 &&
+ numBitsSigned(MulRHS, DAG) <= 32;
+ }
- if (numBitsSigned(MulLHS, DAG) <= 32 && numBitsSigned(MulRHS, DAG) <= 32) {
- MulLHS = DAG.getSExtOrTrunc(MulLHS, SL, MVT::i32);
- MulRHS = DAG.getSExtOrTrunc(MulRHS, SL, MVT::i32);
- AddRHS = DAG.getSExtOrTrunc(AddRHS, SL, MVT::i64);
- return getMad64_32(DAG, SL, VT, MulLHS, MulRHS, AddRHS, true);
+ // The operands and final result all have the same number of bits. If
+ // operands need to be extended, they can be extended with garbage. The
+ // resulting garbage in the high bits of the mad_[iu]64_[iu]32 result is
+ // truncated away in the end.
+ if (VT != MVT::i64) {
+ MulLHS = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i64, MulLHS);
+ MulRHS = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i64, MulRHS);
+ AddRHS = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i64, AddRHS);
+ }
+
+ // The basic code generated is conceptually straightforward. Pseudo code:
+ //
+ // accum = mad_64_32 lhs.lo, rhs.lo, accum
+ // accum.hi = add (mul lhs.hi, rhs.lo), accum.hi
+ // accum.hi = add (mul lhs.lo, rhs.hi), accum.hi
+ //
+ // The second and third lines are optional, depending on whether the factors
+ // are {sign,zero}-extended or not.
+ //
+ // The actual DAG is noisier than the pseudo code, but only due to
+ // instructions that disassemble values into low and high parts, and
+ // assemble the final result.
+ SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
+ SDValue One = DAG.getConstant(1, SL, MVT::i32);
+
+ auto MulLHSLo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, MulLHS);
+ auto MulRHSLo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, MulRHS);
+ SDValue Accum =
+ getMad64_32(DAG, SL, MVT::i64, MulLHSLo, MulRHSLo, AddRHS, MulSignedLo);
+
+ if (!MulSignedLo && (!MulLHSUnsigned32 || !MulRHSUnsigned32)) {
+ auto AccumLo = DAG.getNode(ISD::EXTRACT_ELEMENT, SL, MVT::i32, Accum, Zero);
+ auto AccumHi = DAG.getNode(ISD::EXTRACT_ELEMENT, SL, MVT::i32, Accum, One);
+
+ if (!MulLHSUnsigned32) {
+ auto MulLHSHi =
+ DAG.getNode(ISD::EXTRACT_ELEMENT, SL, MVT::i32, MulLHS, One);
+ SDValue MulHi = DAG.getNode(ISD::MUL, SL, MVT::i32, MulLHSHi, MulRHSLo);
+ AccumHi = DAG.getNode(ISD::ADD, SL, MVT::i32, MulHi, AccumHi);
+ }
+
+ if (!MulRHSUnsigned32) {
+ auto MulRHSHi =
+ DAG.getNode(ISD::EXTRACT_ELEMENT, SL, MVT::i32, MulRHS, One);
+ SDValue MulHi = DAG.getNode(ISD::MUL, SL, MVT::i32, MulLHSLo, MulRHSHi);
+ AccumHi = DAG.getNode(ISD::ADD, SL, MVT::i32, MulHi, AccumHi);
+ }
+
+ Accum = DAG.getBuildVector(MVT::v2i32, SL, {AccumLo, AccumHi});
+ Accum = DAG.getBitcast(MVT::i64, Accum);
+ }
+
+ if (VT != MVT::i64)
+ Accum = DAG.getNode(ISD::TRUNCATE, SL, VT, Accum);
+ return Accum;
+}
+
+SDValue SITargetLowering::performAddCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ EVT VT = N->getValueType(0);
+ SDLoc SL(N);
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ if (LHS.getOpcode() == ISD::MUL || RHS.getOpcode() == ISD::MUL) {
+ if (Subtarget->hasMad64_32()) {
+ if (SDValue Folded = tryFoldToMad64_32(N, DCI))
+ return Folded;
}
return SDValue();
@@ -10763,7 +11074,7 @@ SDValue SITargetLowering::performFAddCombine(SDNode *N,
SDValue RHS = N->getOperand(1);
// These should really be instruction patterns, but writing patterns with
- // source modiifiers is a pain.
+ // source modifiers is a pain.
// fadd (fadd (a, a), b) -> mad 2.0, a, b
if (LHS.getOpcode() == ISD::FADD) {
@@ -10860,8 +11171,8 @@ SDValue SITargetLowering::performFMACombine(SDNode *N,
return SDValue();
// fdot2_f32_f16 always flushes fp32 denormal operand and output to zero,
- // regardless of the denorm mode setting. Therefore, unsafe-fp-math/fp-contract
- // is sufficient to allow generaing fdot2.
+ // regardless of the denorm mode setting. Therefore,
+ // unsafe-fp-math/fp-contract is sufficient to allow generating fdot2.
const TargetOptions &Options = DAG.getTarget().Options;
if (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
(N->getFlags().hasAllowContract() &&
@@ -11562,7 +11873,7 @@ void SITargetLowering::AddIMGInit(MachineInstr &MI) const {
if (DstSize < InitIdx)
return;
- // Create a register for the intialization value.
+ // Create a register for the initialization value.
Register PrevDst = MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
unsigned NewDst = 0; // Final initialized value will be in here
@@ -11608,7 +11919,7 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
TII->legalizeOperandsVOP3(MRI, MI);
// Prefer VGPRs over AGPRs in mAI instructions where possible.
- // This saves a chain-copy of registers and better ballance register
+ // This saves a chain-copy of registers and better balance register
// use between vgpr and agpr as agpr tuples tend to be big.
if (MI.getDesc().OpInfo) {
unsigned Opc = MI.getOpcode();
@@ -11633,54 +11944,29 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
// so no use checks are needed.
MRI.setRegClass(Op.getReg(), NewRC);
}
- }
- return;
- }
-
- // Replace unused atomics with the no return version.
- int NoRetAtomicOp = AMDGPU::getAtomicNoRetOp(MI.getOpcode());
- if (NoRetAtomicOp != -1) {
- if (!Node->hasAnyUseOfValue(0)) {
- int CPolIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
- AMDGPU::OpName::cpol);
- if (CPolIdx != -1) {
- MachineOperand &CPol = MI.getOperand(CPolIdx);
- CPol.setImm(CPol.getImm() & ~AMDGPU::CPol::GLC);
+ // Resolve the rest of AV operands to AGPRs.
+ if (auto *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2)) {
+ if (Src2->isReg() && Src2->getReg().isVirtual()) {
+ auto *RC = TRI->getRegClassForReg(MRI, Src2->getReg());
+ if (TRI->isVectorSuperClass(RC)) {
+ auto *NewRC = TRI->getEquivalentAGPRClass(RC);
+ MRI.setRegClass(Src2->getReg(), NewRC);
+ if (Src2->isTied())
+ MRI.setRegClass(MI.getOperand(0).getReg(), NewRC);
+ }
+ }
}
- MI.RemoveOperand(0);
- MI.setDesc(TII->get(NoRetAtomicOp));
- return;
}
- // For mubuf_atomic_cmpswap, we need to have tablegen use an extract_subreg
- // instruction, because the return type of these instructions is a vec2 of
- // the memory type, so it can be tied to the input operand.
- // This means these instructions always have a use, so we need to add a
- // special case to check if the atomic has only one extract_subreg use,
- // which itself has no uses.
- if ((Node->hasNUsesOfValue(1, 0) &&
- Node->use_begin()->isMachineOpcode() &&
- Node->use_begin()->getMachineOpcode() == AMDGPU::EXTRACT_SUBREG &&
- !Node->use_begin()->hasAnyUseOfValue(0))) {
- Register Def = MI.getOperand(0).getReg();
-
- // Change this into a noret atomic.
- MI.setDesc(TII->get(NoRetAtomicOp));
- MI.RemoveOperand(0);
-
- // If we only remove the def operand from the atomic instruction, the
- // extract_subreg will be left with a use of a vreg without a def.
- // So we need to insert an implicit_def to avoid machine verifier
- // errors.
- BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
- TII->get(AMDGPU::IMPLICIT_DEF), Def);
- }
return;
}
- if (TII->isMIMG(MI) && !MI.mayStore())
- AddIMGInit(MI);
+ if (TII->isMIMG(MI)) {
+ if (!MI.mayStore())
+ AddIMGInit(MI);
+ TII->enforceOperandRCAlignment(MI, AMDGPU::OpName::vaddr);
+ }
}
static SDValue buildSMovImm32(SelectionDAG &DAG, const SDLoc &DL,
@@ -12243,13 +12529,17 @@ Align SITargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
MachineBasicBlock *Exit = ML->getExitBlock();
if (Pre && Exit) {
- BuildMI(*Pre, Pre->getFirstTerminator(), DebugLoc(),
- TII->get(AMDGPU::S_INST_PREFETCH))
- .addImm(1); // prefetch 2 lines behind PC
+ auto PreTerm = Pre->getFirstTerminator();
+ if (PreTerm == Pre->begin() ||
+ std::prev(PreTerm)->getOpcode() != AMDGPU::S_INST_PREFETCH)
+ BuildMI(*Pre, PreTerm, DebugLoc(), TII->get(AMDGPU::S_INST_PREFETCH))
+ .addImm(1); // prefetch 2 lines behind PC
- BuildMI(*Exit, Exit->getFirstNonDebugInstr(), DebugLoc(),
- TII->get(AMDGPU::S_INST_PREFETCH))
- .addImm(2); // prefetch 1 line behind PC
+ auto ExitHead = Exit->getFirstNonDebugInstr();
+ if (ExitHead == Exit->end() ||
+ ExitHead->getOpcode() != AMDGPU::S_INST_PREFETCH)
+ BuildMI(*Exit, ExitHead, DebugLoc(), TII->get(AMDGPU::S_INST_PREFETCH))
+ .addImm(2); // prefetch 1 line behind PC
}
return CacheLineAlign;
@@ -12390,6 +12680,9 @@ static bool fpModeMatchesGlobalFPAtomicMode(const AtomicRMWInst *RMW) {
TargetLowering::AtomicExpansionKind
SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
+ unsigned AS = RMW->getPointerAddressSpace();
+ if (AS == AMDGPUAS::PRIVATE_ADDRESS)
+ return AtomicExpansionKind::NotAtomic;
auto ReportUnsafeHWInst = [&](TargetLowering::AtomicExpansionKind Kind) {
OptimizationRemarkEmitter ORE(RMW->getFunction());
@@ -12421,10 +12714,11 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
if (!Ty->isFloatTy() && (!Subtarget->hasGFX90AInsts() || !Ty->isDoubleTy()))
return AtomicExpansionKind::CmpXChg;
- unsigned AS = RMW->getPointerAddressSpace();
-
if ((AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS) &&
- Subtarget->hasAtomicFaddInsts()) {
+ Subtarget->hasAtomicFaddNoRtnInsts()) {
+ if (Subtarget->hasGFX940Insts())
+ return AtomicExpansionKind::None;
+
// The amdgpu-unsafe-fp-atomics attribute enables generation of unsafe
// floating point atomic instructions. May generate more efficient code,
// but may not respect rounding and denormal modes, and may give incorrect
@@ -12453,8 +12747,8 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
: AtomicExpansionKind::CmpXChg;
}
- // DS FP atomics do repect the denormal mode, but the rounding mode is fixed
- // to round-to-nearest-even.
+ // DS FP atomics do respect the denormal mode, but the rounding mode is
+ // fixed to round-to-nearest-even.
// The only exception is DS_ADD_F64 which never flushes regardless of mode.
if (AS == AMDGPUAS::LOCAL_ADDRESS && Subtarget->hasLDSFPAtomicAdd()) {
if (!Ty->isDoubleTy())
@@ -12479,6 +12773,27 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
return AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(RMW);
}
+TargetLowering::AtomicExpansionKind
+SITargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
+ return LI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS
+ ? AtomicExpansionKind::NotAtomic
+ : AtomicExpansionKind::None;
+}
+
+TargetLowering::AtomicExpansionKind
+SITargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
+ return SI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS
+ ? AtomicExpansionKind::NotAtomic
+ : AtomicExpansionKind::None;
+}
+
+TargetLowering::AtomicExpansionKind
+SITargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CmpX) const {
+ return CmpX->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS
+ ? AtomicExpansionKind::NotAtomic
+ : AtomicExpansionKind::None;
+}
+
const TargetRegisterClass *
SITargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
const TargetRegisterClass *RC = TargetLoweringBase::getRegClassFor(VT, false);
@@ -12500,7 +12815,7 @@ SITargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
// always uniform.
static bool hasCFUser(const Value *V, SmallPtrSet<const Value *, 16> &Visited,
unsigned WaveSize) {
- // FIXME: We asssume we never cast the mask results of a control flow
+ // FIXME: We assume we never cast the mask results of a control flow
// intrinsic.
// Early exit if the type won't be consistent as a compile time hack.
IntegerType *IT = dyn_cast<IntegerType>(V->getType());
@@ -12604,7 +12919,7 @@ bool SITargetLowering::isReassocProfitable(SelectionDAG &DAG, SDValue N0,
SDValue N1) const {
if (!N0.hasOneUse())
return false;
- // Take care of the oportunity to keep N0 uniform
+ // Take care of the opportunity to keep N0 uniform
if (N0->isDivergent() || !N1->isDivergent())
return true;
// Check if we have a good chance to form the memory access pattern with the
@@ -12612,3 +12927,11 @@ bool SITargetLowering::isReassocProfitable(SelectionDAG &DAG, SDValue N0,
return (DAG.isBaseWithConstantOffset(N0) &&
hasMemSDNodeUser(*N0->use_begin()));
}
+
+MachineMemOperand::Flags
+SITargetLowering::getTargetMMOFlags(const Instruction &I) const {
+ // Propagate metadata set by AMDGPUAnnotateUniformValues to the MMO of a load.
+ if (I.getMetadata("amdgpu.noclobber"))
+ return MONoClobber;
+ return MachineMemOperand::MONone;
+}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index bf81e082b478..4fbccf0c5850 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -53,6 +53,9 @@ private:
uint64_t Offset, Align Alignment,
bool Signed,
const ISD::InputArg *Arg = nullptr) const;
+ SDValue loadImplicitKernelArgument(SelectionDAG &DAG, MVT VT, const SDLoc &DL,
+ Align Alignment,
+ ImplicitParameter Param) const;
SDValue lowerStackParameter(SelectionDAG &DAG, CCValAssign &VA,
const SDLoc &SL, SDValue Chain,
@@ -76,6 +79,9 @@ private:
SDValue lowerStructBufferAtomicIntrin(SDValue Op, SelectionDAG &DAG,
unsigned NewOpcode) const;
+ SDValue lowerWorkitemID(SelectionDAG &DAG, SDValue Op, unsigned Dim,
+ const ArgDescriptor &ArgDesc) const;
+
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
@@ -145,6 +151,7 @@ private:
SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerTRAP(SDValue Op, SelectionDAG &DAG) const;
@@ -191,6 +198,7 @@ private:
SDValue reassociateScalarOps(SDNode *N, SelectionDAG &DAG) const;
unsigned getFusedOpcode(const SelectionDAG &DAG,
const SDNode *N0, const SDNode *N1) const;
+ SDValue tryFoldToMad64_32(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performAddCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performAddCarrySubCarryCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performSubCombine(SDNode *N, DAGCombinerInfo &DCI) const;
@@ -227,7 +235,10 @@ public:
/// Check if EXTRACT_VECTOR_ELT/INSERT_VECTOR_ELT (<n x e>, var-idx) should be
/// expanded into a set of cmp/select instructions.
static bool shouldExpandVectorDynExt(unsigned EltSize, unsigned NumElem,
- bool IsDivergentIdx);
+ bool IsDivergentIdx,
+ const GCNSubtarget *Subtarget);
+
+ bool shouldExpandVectorDynExt(SDNode *N) const;
private:
// Analyze a combined offset from an amdgcn_buffer_ intrinsic and store the
@@ -310,6 +321,9 @@ public:
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const override;
+ bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
+ unsigned Index) const override;
+
bool isTypeDesirableForOp(unsigned Op, EVT VT) const override;
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
@@ -380,6 +394,7 @@ public:
MachineBasicBlock *BB) const override;
bool hasBitPreservingFPLogic(EVT VT) const override;
+ bool hasAtomicFaddRtnForTy(SDValue &Op) const;
bool enableAggressiveFMAFusion(EVT VT) const override;
bool enableAggressiveFMAFusion(LLT Ty) const override;
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
@@ -466,6 +481,10 @@ public:
bool SNaN = false,
unsigned Depth = 0) const override;
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
+ AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
+ AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
+ AtomicExpansionKind
+ shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
virtual const TargetRegisterClass *
getRegClassFor(MVT VT, bool isDivergent) const override;
@@ -505,6 +524,9 @@ public:
std::pair<InstructionCost, MVT> getTypeLegalizationCost(const DataLayout &DL,
Type *Ty) const;
+
+ MachineMemOperand::Flags
+ getTargetMMOFlags(const Instruction &I) const override;
};
} // End namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp b/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
index 125f006a1d1d..50f8ad4433c6 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
@@ -35,6 +35,7 @@
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
using namespace llvm;
@@ -42,11 +43,39 @@ using namespace llvm;
namespace {
+// A clause length of 64 instructions could be encoded in the s_clause
+// instruction, but the hardware documentation (at least for GFX11) says that
+// 63 is the maximum allowed.
+constexpr unsigned MaxInstructionsInClause = 63;
+
enum HardClauseType {
+ // For GFX10:
+
// Texture, buffer, global or scratch memory instructions.
HARDCLAUSE_VMEM,
// Flat (not global or scratch) memory instructions.
HARDCLAUSE_FLAT,
+
+ // For GFX11:
+
+ // Texture memory instructions.
+ HARDCLAUSE_MIMG_LOAD,
+ HARDCLAUSE_MIMG_STORE,
+ HARDCLAUSE_MIMG_ATOMIC,
+ HARDCLAUSE_MIMG_SAMPLE,
+ // Buffer, global or scratch memory instructions.
+ HARDCLAUSE_VMEM_LOAD,
+ HARDCLAUSE_VMEM_STORE,
+ HARDCLAUSE_VMEM_ATOMIC,
+ // Flat (not global or scratch) memory instructions.
+ HARDCLAUSE_FLAT_LOAD,
+ HARDCLAUSE_FLAT_STORE,
+ HARDCLAUSE_FLAT_ATOMIC,
+ // BVH instructions.
+ HARDCLAUSE_BVH,
+
+ // Common:
+
// Instructions that access LDS.
HARDCLAUSE_LDS,
// Scalar memory instructions.
@@ -78,19 +107,43 @@ public:
}
HardClauseType getHardClauseType(const MachineInstr &MI) {
-
- // On current architectures we only get a benefit from clausing loads.
- if (MI.mayLoad()) {
- if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) {
- if (ST->hasNSAClauseBug()) {
+ if (MI.mayLoad() || (MI.mayStore() && ST->shouldClusterStores())) {
+ if (ST->getGeneration() == AMDGPUSubtarget::GFX10) {
+ if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) {
+ if (ST->hasNSAClauseBug()) {
+ const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
+ if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA)
+ return HARDCLAUSE_ILLEGAL;
+ }
+ return HARDCLAUSE_VMEM;
+ }
+ if (SIInstrInfo::isFLAT(MI))
+ return HARDCLAUSE_FLAT;
+ } else {
+ assert(ST->getGeneration() >= AMDGPUSubtarget::GFX11);
+ if (SIInstrInfo::isMIMG(MI)) {
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
- if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA)
- return HARDCLAUSE_ILLEGAL;
+ const AMDGPU::MIMGBaseOpcodeInfo *BaseInfo =
+ AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
+ if (BaseInfo->BVH)
+ return HARDCLAUSE_BVH;
+ if (BaseInfo->Sampler)
+ return HARDCLAUSE_MIMG_SAMPLE;
+ return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_MIMG_ATOMIC
+ : HARDCLAUSE_MIMG_LOAD
+ : HARDCLAUSE_MIMG_STORE;
+ }
+ if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) {
+ return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_VMEM_ATOMIC
+ : HARDCLAUSE_VMEM_LOAD
+ : HARDCLAUSE_VMEM_STORE;
+ }
+ if (SIInstrInfo::isFLAT(MI)) {
+ return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_FLAT_ATOMIC
+ : HARDCLAUSE_FLAT_LOAD
+ : HARDCLAUSE_FLAT_STORE;
}
- return HARDCLAUSE_VMEM;
}
- if (SIInstrInfo::isFLAT(MI))
- return HARDCLAUSE_FLAT;
// TODO: LDS
if (SIInstrInfo::isSMRD(MI))
return HARDCLAUSE_SMEM;
@@ -129,7 +182,7 @@ public:
bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) {
if (CI.First == CI.Last)
return false;
- assert(CI.Length <= 64 && "Hard clause is too long!");
+ assert(CI.Length <= MaxInstructionsInClause && "Hard clause is too long!");
auto &MBB = *CI.First->getParent();
auto ClauseMI =
@@ -170,7 +223,7 @@ public:
}
}
- if (CI.Length == 64 ||
+ if (CI.Length == MaxInstructionsInClause ||
(CI.Length && Type != HARDCLAUSE_INTERNAL &&
Type != HARDCLAUSE_IGNORE &&
(Type != CI.Type ||
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index f8a10bc8ef6f..349bcbf82195 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -31,6 +31,7 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/Sequence.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/DebugCounter.h"
@@ -87,29 +88,29 @@ struct RegisterEncoding {
};
enum WaitEventType {
- VMEM_ACCESS, // vector-memory read & write
- VMEM_READ_ACCESS, // vector-memory read
- VMEM_WRITE_ACCESS,// vector-memory write
- LDS_ACCESS, // lds read & write
- GDS_ACCESS, // gds read & write
- SQ_MESSAGE, // send message
- SMEM_ACCESS, // scalar-memory read & write
- EXP_GPR_LOCK, // export holding on its data src
- GDS_GPR_LOCK, // GDS holding on its data and addr src
- EXP_POS_ACCESS, // write to export position
- EXP_PARAM_ACCESS, // write to export parameter
- VMW_GPR_LOCK, // vector-memory write holding on its data src
+ VMEM_ACCESS, // vector-memory read & write
+ VMEM_READ_ACCESS, // vector-memory read
+ VMEM_WRITE_ACCESS, // vector-memory write
+ LDS_ACCESS, // lds read & write
+ GDS_ACCESS, // gds read & write
+ SQ_MESSAGE, // send message
+ SMEM_ACCESS, // scalar-memory read & write
+ EXP_GPR_LOCK, // export holding on its data src
+ GDS_GPR_LOCK, // GDS holding on its data and addr src
+ EXP_POS_ACCESS, // write to export position
+ EXP_PARAM_ACCESS, // write to export parameter
+ VMW_GPR_LOCK, // vector-memory write holding on its data src
+ EXP_LDS_ACCESS, // read by ldsdir counting as export
NUM_WAIT_EVENTS,
};
static const unsigned WaitEventMaskForInst[NUM_INST_CNTS] = {
- (1 << VMEM_ACCESS) | (1 << VMEM_READ_ACCESS),
- (1 << SMEM_ACCESS) | (1 << LDS_ACCESS) | (1 << GDS_ACCESS) |
- (1 << SQ_MESSAGE),
- (1 << EXP_GPR_LOCK) | (1 << GDS_GPR_LOCK) | (1 << VMW_GPR_LOCK) |
- (1 << EXP_PARAM_ACCESS) | (1 << EXP_POS_ACCESS),
- (1 << VMEM_WRITE_ACCESS)
-};
+ (1 << VMEM_ACCESS) | (1 << VMEM_READ_ACCESS),
+ (1 << SMEM_ACCESS) | (1 << LDS_ACCESS) | (1 << GDS_ACCESS) |
+ (1 << SQ_MESSAGE),
+ (1 << EXP_GPR_LOCK) | (1 << GDS_GPR_LOCK) | (1 << VMW_GPR_LOCK) |
+ (1 << EXP_PARAM_ACCESS) | (1 << EXP_POS_ACCESS) | (1 << EXP_LDS_ACCESS),
+ (1 << VMEM_WRITE_ACCESS)};
// The mapping is:
// 0 .. SQ_MAX_PGM_VGPRS-1 real VGPRs
@@ -119,10 +120,10 @@ static const unsigned WaitEventMaskForInst[NUM_INST_CNTS] = {
// special tokens like SCMEM_LDS (needed for buffer load to LDS).
enum RegisterMapping {
SQ_MAX_PGM_VGPRS = 512, // Maximum programmable VGPRs across all targets.
- AGPR_OFFSET = 226, // Maximum programmable ArchVGPRs across all targets.
+ AGPR_OFFSET = 256, // Maximum programmable ArchVGPRs across all targets.
SQ_MAX_PGM_SGPRS = 256, // Maximum programmable SGPRs across all targets.
NUM_EXTRA_VGPRS = 1, // A reserved slot for DS.
- EXTRA_VGPR_LDS = 0, // This is a placeholder the Shader algorithm uses.
+ EXTRA_VGPR_LDS = 0, // An artificial register to track LDS writes.
NUM_ALL_VGPRS = SQ_MAX_PGM_VGPRS + NUM_EXTRA_VGPRS, // Where SGPR starts.
};
@@ -355,6 +356,8 @@ private:
DenseSet<MachineInstr *> TrackedWaitcntSet;
DenseMap<const Value *, MachineBasicBlock *> SLoadAddresses;
+ DenseMap<MachineBasicBlock *, bool> PreheadersToFlush;
+ MachineLoopInfo *MLI;
MachinePostDominatorTree *PDT;
struct BlockInfo {
@@ -381,6 +384,9 @@ public:
(void)ForceVMCounter;
}
+ bool shouldFlushVmCnt(MachineLoop *ML, WaitcntBrackets &Brackets);
+ bool isPreheaderToFlush(MachineBasicBlock &MBB,
+ WaitcntBrackets &ScoreBrackets);
bool runOnMachineFunction(MachineFunction &MF) override;
StringRef getPassName() const override {
@@ -389,6 +395,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
+ AU.addRequired<MachineLoopInfo>();
AU.addRequired<MachinePostDominatorTree>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -431,14 +438,23 @@ public:
bool mayAccessLDSThroughFlat(const MachineInstr &MI) const;
bool generateWaitcntInstBefore(MachineInstr &MI,
WaitcntBrackets &ScoreBrackets,
- MachineInstr *OldWaitcntInstr);
+ MachineInstr *OldWaitcntInstr,
+ bool FlushVmCnt);
+ bool generateWaitcntBlockEnd(MachineBasicBlock &Block,
+ WaitcntBrackets &ScoreBrackets,
+ MachineInstr *OldWaitcntInstr);
+ bool generateWaitcnt(AMDGPU::Waitcnt Wait,
+ MachineBasicBlock::instr_iterator It,
+ MachineBasicBlock &Block, WaitcntBrackets &ScoreBrackets,
+ MachineInstr *OldWaitcntInstr);
void updateEventWaitcntAfter(MachineInstr &Inst,
WaitcntBrackets *ScoreBrackets);
bool insertWaitcntInBlock(MachineFunction &MF, MachineBasicBlock &Block,
WaitcntBrackets &ScoreBrackets);
bool applyPreexistingWaitcnt(WaitcntBrackets &ScoreBrackets,
MachineInstr &OldWaitcntInstr,
- AMDGPU::Waitcnt &Wait, const MachineInstr *MI);
+ AMDGPU::Waitcnt &Wait,
+ MachineBasicBlock::instr_iterator It);
};
} // end anonymous namespace
@@ -496,6 +512,14 @@ void WaitcntBrackets::setExpScore(const MachineInstr *MI,
}
}
+// MUBUF and FLAT LDS DMA operations need a wait on vmcnt before LDS written
+// can be accessed. A load from LDS to VMEM does not need a wait.
+static bool mayWriteLDSThroughDMA(const MachineInstr &MI) {
+ return SIInstrInfo::isVALU(MI) &&
+ (SIInstrInfo::isMUBUF(MI) || SIInstrInfo::isFLAT(MI)) &&
+ MI.getOpcode() != AMDGPU::BUFFER_STORE_LDS_DWORD;
+}
+
void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
const SIRegisterInfo *TRI,
const MachineRegisterInfo *MRI,
@@ -588,6 +612,12 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data),
CurrScore);
}
+ } else if (TII->isLDSDIR(Inst)) {
+ // LDSDIR instructions attach the score to the destination.
+ setExpScore(
+ &Inst, TII, TRI, MRI,
+ AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdst),
+ CurrScore);
} else {
if (TII->isEXP(Inst)) {
// For export the destination registers are really temps that
@@ -644,7 +674,7 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
setRegScore(RegNo, T, CurrScore);
}
}
- if (TII->isDS(Inst) && Inst.mayStore()) {
+ if (Inst.mayStore() && (TII->isDS(Inst) || mayWriteLDSThroughDMA(Inst))) {
setRegScore(SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS, T, CurrScore);
}
}
@@ -784,6 +814,7 @@ bool WaitcntBrackets::counterOutOfOrder(InstCounterType T) const {
INITIALIZE_PASS_BEGIN(SIInsertWaitcnts, DEBUG_TYPE, "SI Insert Waitcnts", false,
false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
INITIALIZE_PASS_END(SIInsertWaitcnts, DEBUG_TYPE, "SI Insert Waitcnts", false,
false)
@@ -796,53 +827,53 @@ FunctionPass *llvm::createSIInsertWaitcntsPass() {
return new SIInsertWaitcnts();
}
-/// Combine consecutive waitcnt instructions that precede \p MI and follow
+/// Combine consecutive waitcnt instructions that precede \p It and follow
/// \p OldWaitcntInstr and apply any extra wait from waitcnt that were added
/// by previous passes. Currently this pass conservatively assumes that these
/// preexisting waitcnt are required for correctness.
-bool SIInsertWaitcnts::applyPreexistingWaitcnt(WaitcntBrackets &ScoreBrackets,
- MachineInstr &OldWaitcntInstr,
- AMDGPU::Waitcnt &Wait,
- const MachineInstr *MI) {
+bool SIInsertWaitcnts::applyPreexistingWaitcnt(
+ WaitcntBrackets &ScoreBrackets, MachineInstr &OldWaitcntInstr,
+ AMDGPU::Waitcnt &Wait, MachineBasicBlock::instr_iterator It) {
bool Modified = false;
MachineInstr *WaitcntInstr = nullptr;
MachineInstr *WaitcntVsCntInstr = nullptr;
- for (auto II = OldWaitcntInstr.getIterator(), NextI = std::next(II);
- &*II != MI; II = NextI, ++NextI) {
- if (II->isMetaInstruction())
+
+ for (auto &II :
+ make_early_inc_range(make_range(OldWaitcntInstr.getIterator(), It))) {
+ if (II.isMetaInstruction())
continue;
- if (II->getOpcode() == AMDGPU::S_WAITCNT) {
+ if (II.getOpcode() == AMDGPU::S_WAITCNT) {
// Conservatively update required wait if this waitcnt was added in an
// earlier pass. In this case it will not exist in the tracked waitcnt
// set.
- if (!TrackedWaitcntSet.count(&*II)) {
- unsigned IEnc = II->getOperand(0).getImm();
+ if (!TrackedWaitcntSet.count(&II)) {
+ unsigned IEnc = II.getOperand(0).getImm();
AMDGPU::Waitcnt OldWait = AMDGPU::decodeWaitcnt(IV, IEnc);
Wait = Wait.combined(OldWait);
}
// Merge consecutive waitcnt of the same type by erasing multiples.
if (!WaitcntInstr) {
- WaitcntInstr = &*II;
+ WaitcntInstr = &II;
} else {
- II->eraseFromParent();
+ II.eraseFromParent();
Modified = true;
}
} else {
- assert(II->getOpcode() == AMDGPU::S_WAITCNT_VSCNT);
- assert(II->getOperand(0).getReg() == AMDGPU::SGPR_NULL);
- if (!TrackedWaitcntSet.count(&*II)) {
+ assert(II.getOpcode() == AMDGPU::S_WAITCNT_VSCNT);
+ assert(II.getOperand(0).getReg() == AMDGPU::SGPR_NULL);
+ if (!TrackedWaitcntSet.count(&II)) {
unsigned OldVSCnt =
- TII->getNamedOperand(*II, AMDGPU::OpName::simm16)->getImm();
+ TII->getNamedOperand(II, AMDGPU::OpName::simm16)->getImm();
Wait.VsCnt = std::min(Wait.VsCnt, OldVSCnt);
}
if (!WaitcntVsCntInstr) {
- WaitcntVsCntInstr = &*II;
+ WaitcntVsCntInstr = &II;
} else {
- II->eraseFromParent();
+ II.eraseFromParent();
Modified = true;
}
}
@@ -862,9 +893,14 @@ bool SIInsertWaitcnts::applyPreexistingWaitcnt(WaitcntBrackets &ScoreBrackets,
Wait.LgkmCnt = ~0u;
Wait.ExpCnt = ~0u;
- LLVM_DEBUG(dbgs() << "generateWaitcntInstBefore\n"
- << "Old Instr: " << *MI << "New Instr: " << *WaitcntInstr
- << '\n');
+ LLVM_DEBUG(It == OldWaitcntInstr.getParent()->end()
+ ? dbgs() << "applyPreexistingWaitcnt\n"
+ << "New Instr at block end: " << *WaitcntInstr
+ << '\n'
+ : dbgs() << "applyPreexistingWaitcnt\n"
+ << "Old Instr: " << *It
+ << "New Instr: " << *WaitcntInstr << '\n');
+
} else {
WaitcntInstr->eraseFromParent();
Modified = true;
@@ -885,9 +921,13 @@ bool SIInsertWaitcnts::applyPreexistingWaitcnt(WaitcntBrackets &ScoreBrackets,
ScoreBrackets.applyWaitcnt(Wait);
Wait.VsCnt = ~0u;
- LLVM_DEBUG(dbgs() << "generateWaitcntInstBefore\n"
- << "Old Instr: " << *MI
- << "New Instr: " << *WaitcntVsCntInstr << '\n');
+ LLVM_DEBUG(It == OldWaitcntInstr.getParent()->end()
+ ? dbgs() << "applyPreexistingWaitcnt\n"
+ << "New Instr at block end: "
+ << *WaitcntVsCntInstr << '\n'
+ : dbgs() << "applyPreexistingWaitcnt\n"
+ << "Old Instr: " << *It
+ << "New Instr: " << *WaitcntVsCntInstr << '\n');
} else {
WaitcntVsCntInstr->eraseFromParent();
Modified = true;
@@ -928,16 +968,18 @@ static bool callWaitsOnFunctionReturn(const MachineInstr &MI) {
/// and if so what the value of each counter is.
/// The "score bracket" is bound by the lower bound and upper bound
/// scores (*_score_LB and *_score_ub respectively).
-bool SIInsertWaitcnts::generateWaitcntInstBefore(
- MachineInstr &MI, WaitcntBrackets &ScoreBrackets,
- MachineInstr *OldWaitcntInstr) {
+/// If FlushVmCnt is true, that means that we want to generate a s_waitcnt to
+/// flush the vmcnt counter here.
+bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
+ WaitcntBrackets &ScoreBrackets,
+ MachineInstr *OldWaitcntInstr,
+ bool FlushVmCnt) {
setForceEmitWaitcnt();
if (MI.isMetaInstruction())
return false;
AMDGPU::Waitcnt Wait;
- bool Modified = false;
// FIXME: This should have already been handled by the memory legalizer.
// Removing this currently doesn't affect any lit tests, but we need to
@@ -955,16 +997,17 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
// NOTE: this could be improved with knowledge of all call sites or
// with knowledge of the called routines.
if (MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
+ MI.getOpcode() == AMDGPU::SI_RETURN ||
MI.getOpcode() == AMDGPU::S_SETPC_B64_return ||
- MI.getOpcode() == AMDGPU::S_SETPC_B64_return_gfx ||
(MI.isReturn() && MI.isCall() && !callWaitsOnFunctionEntry(MI))) {
Wait = Wait.combined(AMDGPU::Waitcnt::allZero(ST->hasVscnt()));
}
// Resolve vm waits before gs-done.
else if ((MI.getOpcode() == AMDGPU::S_SENDMSG ||
MI.getOpcode() == AMDGPU::S_SENDMSGHALT) &&
- ((MI.getOperand(0).getImm() & AMDGPU::SendMsg::ID_MASK_) ==
- AMDGPU::SendMsg::ID_GS_DONE)) {
+ ST->hasLegacyGeometry() &&
+ ((MI.getOperand(0).getImm() & AMDGPU::SendMsg::ID_MASK_PreGFX11_) ==
+ AMDGPU::SendMsg::ID_GS_DONE_PreGFX11)) {
Wait.VmCnt = 0;
}
#if 0 // TODO: the following blocks of logic when we have fence.
@@ -1040,7 +1083,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
if (MI.isCall() && callWaitsOnFunctionEntry(MI)) {
// The function is going to insert a wait on everything in its prolog.
// This still needs to be careful if the call target is a load (e.g. a GOT
- // load). We also need to check WAW depenancy with saved PC.
+ // load). We also need to check WAW dependency with saved PC.
Wait = AMDGPU::Waitcnt();
int CallAddrOpIdx =
@@ -1089,7 +1132,10 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
SLoadAddresses.erase(Ptr);
}
unsigned AS = Memop->getAddrSpace();
- if (AS != AMDGPUAS::LOCAL_ADDRESS)
+ if (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::FLAT_ADDRESS)
+ continue;
+ // No need to wait before load from VMEM to LDS.
+ if (mayWriteLDSThroughDMA(MI))
continue;
unsigned RegNo = SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS;
// VM_CNT is only relevant to vgpr or LDS.
@@ -1123,7 +1169,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
VM_CNT, ScoreBrackets.getRegScore(RegNo, VM_CNT), Wait);
ScoreBrackets.clearVgprVmemTypes(RegNo);
}
- if (Op.isDef()) {
+ if (Op.isDef() || ScoreBrackets.hasPendingEvent(EXP_LDS_ACCESS)) {
ScoreBrackets.determineWait(
EXP_CNT, ScoreBrackets.getRegScore(RegNo, EXP_CNT), Wait);
}
@@ -1170,47 +1216,93 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
if (ForceEmitWaitcnt[VS_CNT])
Wait.VsCnt = 0;
- if (OldWaitcntInstr) {
+ if (FlushVmCnt) {
+ unsigned UB = ScoreBrackets.getScoreUB(VM_CNT);
+ unsigned LB = ScoreBrackets.getScoreLB(VM_CNT);
+ if (UB - LB != 0)
+ Wait.VmCnt = 0;
+ }
+
+ return generateWaitcnt(Wait, MI.getIterator(), *MI.getParent(), ScoreBrackets,
+ OldWaitcntInstr);
+}
+
+// Add a waitcnt to flush the vmcnt counter at the end of the given block if
+// needed.
+bool SIInsertWaitcnts::generateWaitcntBlockEnd(MachineBasicBlock &Block,
+ WaitcntBrackets &ScoreBrackets,
+ MachineInstr *OldWaitcntInstr) {
+ AMDGPU::Waitcnt Wait;
+
+ unsigned UB = ScoreBrackets.getScoreUB(VM_CNT);
+ unsigned LB = ScoreBrackets.getScoreLB(VM_CNT);
+ if (UB - LB == 0)
+ return false;
+
+ Wait.VmCnt = 0;
+
+ return generateWaitcnt(Wait, Block.instr_end(), Block, ScoreBrackets,
+ OldWaitcntInstr);
+}
+
+bool SIInsertWaitcnts::generateWaitcnt(AMDGPU::Waitcnt Wait,
+ MachineBasicBlock::instr_iterator It,
+ MachineBasicBlock &Block,
+ WaitcntBrackets &ScoreBrackets,
+ MachineInstr *OldWaitcntInstr) {
+ bool Modified = false;
+ const DebugLoc &DL = Block.findDebugLoc(It);
+
+ if (OldWaitcntInstr)
// Try to merge the required wait with preexisting waitcnt instructions.
// Also erase redundant waitcnt.
Modified =
- applyPreexistingWaitcnt(ScoreBrackets, *OldWaitcntInstr, Wait, &MI);
- } else {
- // Update waitcnt brackets after determining the required wait.
+ applyPreexistingWaitcnt(ScoreBrackets, *OldWaitcntInstr, Wait, It);
+ else
ScoreBrackets.applyWaitcnt(Wait);
+
+ // ExpCnt can be merged into VINTERP.
+ if (Wait.ExpCnt != ~0u && It != Block.instr_end() &&
+ SIInstrInfo::isVINTERP(*It)) {
+ MachineOperand *WaitExp =
+ TII->getNamedOperand(*It, AMDGPU::OpName::waitexp);
+ if (Wait.ExpCnt < WaitExp->getImm()) {
+ WaitExp->setImm(Wait.ExpCnt);
+ Modified = true;
+ }
+ Wait.ExpCnt = ~0u;
+
+ LLVM_DEBUG(dbgs() << "generateWaitcntInstBefore\n"
+ << "Update Instr: " << *It);
}
// Build new waitcnt instructions unless no wait is needed or the old waitcnt
// instruction was modified to handle the required wait.
if (Wait.hasWaitExceptVsCnt()) {
unsigned Enc = AMDGPU::encodeWaitcnt(IV, Wait);
- auto SWaitInst = BuildMI(*MI.getParent(), MI.getIterator(),
- MI.getDebugLoc(), TII->get(AMDGPU::S_WAITCNT))
- .addImm(Enc);
+ auto SWaitInst =
+ BuildMI(Block, It, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(Enc);
TrackedWaitcntSet.insert(SWaitInst);
Modified = true;
- LLVM_DEBUG(dbgs() << "generateWaitcntInstBefore\n"
- << "Old Instr: " << MI
- << "New Instr: " << *SWaitInst << '\n');
+ LLVM_DEBUG(dbgs() << "generateWaitcnt\n";
+ if (It != Block.instr_end()) dbgs() << "Old Instr: " << *It;
+ dbgs() << "New Instr: " << *SWaitInst << '\n');
}
if (Wait.hasWaitVsCnt()) {
assert(ST->hasVscnt());
- auto SWaitInst =
- BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
- TII->get(AMDGPU::S_WAITCNT_VSCNT))
- .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
- .addImm(Wait.VsCnt);
+ auto SWaitInst = BuildMI(Block, It, DL, TII->get(AMDGPU::S_WAITCNT_VSCNT))
+ .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
+ .addImm(Wait.VsCnt);
TrackedWaitcntSet.insert(SWaitInst);
Modified = true;
- LLVM_DEBUG(dbgs() << "generateWaitcntInstBefore\n"
- << "Old Instr: " << MI
- << "New Instr: " << *SWaitInst << '\n');
+ LLVM_DEBUG(dbgs() << "generateWaitcnt\n";
+ if (It != Block.instr_end()) dbgs() << "Old Instr: " << *It;
+ dbgs() << "New Instr: " << *SWaitInst << '\n');
}
-
return Modified;
}
@@ -1338,6 +1430,11 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
// May need to way wait for anything.
ScoreBrackets->applyWaitcnt(AMDGPU::Waitcnt());
}
+ } else if (SIInstrInfo::isLDSDIR(Inst)) {
+ ScoreBrackets->updateByEvent(TII, TRI, MRI, EXP_LDS_ACCESS, Inst);
+ } else if (TII->isVINTERP(Inst)) {
+ int64_t Imm = TII->getNamedOperand(Inst, AMDGPU::OpName::waitexp)->getImm();
+ ScoreBrackets->applyWaitcnt(EXP_CNT, Imm);
} else if (SIInstrInfo::isEXP(Inst)) {
unsigned Imm = TII->getNamedOperand(Inst, AMDGPU::OpName::tgt)->getImm();
if (Imm >= AMDGPU::Exp::ET_PARAM0 && Imm <= AMDGPU::Exp::ET_PARAM31)
@@ -1349,6 +1446,8 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
} else {
switch (Inst.getOpcode()) {
case AMDGPU::S_SENDMSG:
+ case AMDGPU::S_SENDMSG_RTN_B32:
+ case AMDGPU::S_SENDMSG_RTN_B64:
case AMDGPU::S_SENDMSGHALT:
ScoreBrackets->updateByEvent(TII, TRI, MRI, SQ_MESSAGE, Inst);
break;
@@ -1476,8 +1575,12 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
continue;
}
+ bool FlushVmCnt = Block.getFirstTerminator() == Inst &&
+ isPreheaderToFlush(Block, ScoreBrackets);
+
// Generate an s_waitcnt instruction to be placed before Inst, if needed.
- Modified |= generateWaitcntInstBefore(Inst, ScoreBrackets, OldWaitcntInstr);
+ Modified |= generateWaitcntInstBefore(Inst, ScoreBrackets, OldWaitcntInstr,
+ FlushVmCnt);
OldWaitcntInstr = nullptr;
// Restore vccz if it's not known to be correct already.
@@ -1562,9 +1665,101 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
++Iter;
}
+ if (Block.getFirstTerminator() == Block.end() &&
+ isPreheaderToFlush(Block, ScoreBrackets))
+ Modified |= generateWaitcntBlockEnd(Block, ScoreBrackets, OldWaitcntInstr);
+
return Modified;
}
+// Return true if the given machine basic block is a preheader of a loop in
+// which we want to flush the vmcnt counter, and false otherwise.
+bool SIInsertWaitcnts::isPreheaderToFlush(MachineBasicBlock &MBB,
+ WaitcntBrackets &ScoreBrackets) {
+ if (PreheadersToFlush.count(&MBB))
+ return PreheadersToFlush[&MBB];
+
+ auto UpdateCache = [&](bool val) {
+ PreheadersToFlush[&MBB] = val;
+ return val;
+ };
+
+ MachineBasicBlock *Succ = MBB.getSingleSuccessor();
+ if (!Succ)
+ return UpdateCache(false);
+
+ MachineLoop *Loop = MLI->getLoopFor(Succ);
+ if (!Loop)
+ return UpdateCache(false);
+
+ if (Loop->getLoopPreheader() == &MBB && shouldFlushVmCnt(Loop, ScoreBrackets))
+ return UpdateCache(true);
+
+ return UpdateCache(false);
+}
+
+// Return true if it is better to flush the vmcnt counter in the preheader of
+// the given loop. We currently decide to flush in two situations:
+// 1. The loop contains vmem store(s), no vmem load and at least one use of a
+// vgpr containing a value that is loaded outside of the loop. (Only on
+// targets with no vscnt counter).
+// 2. The loop contains vmem load(s), but the loaded values are not used in the
+// loop, and at least one use of a vgpr containing a value that is loaded
+// outside of the loop.
+bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML,
+ WaitcntBrackets &Brackets) {
+ bool HasVMemLoad = false;
+ bool HasVMemStore = false;
+ bool UsesVgprLoadedOutside = false;
+ DenseSet<Register> VgprUse;
+ DenseSet<Register> VgprDef;
+
+ for (MachineBasicBlock *MBB : ML->blocks()) {
+ for (MachineInstr &MI : *MBB) {
+ if (SIInstrInfo::isVMEM(MI)) {
+ if (MI.mayLoad())
+ HasVMemLoad = true;
+ if (MI.mayStore())
+ HasVMemStore = true;
+ }
+ for (unsigned I = 0; I < MI.getNumOperands(); I++) {
+ MachineOperand &Op = MI.getOperand(I);
+ if (!Op.isReg() || !TRI->isVectorRegister(*MRI, Op.getReg()))
+ continue;
+ RegInterval Interval = Brackets.getRegInterval(&MI, TII, MRI, TRI, I);
+ // Vgpr use
+ if (Op.isUse()) {
+ for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
+ // If we find a register that is loaded inside the loop, 1. and 2.
+ // are invalidated and we can exit.
+ if (VgprDef.contains(RegNo))
+ return false;
+ VgprUse.insert(RegNo);
+ // If at least one of Op's registers is in the score brackets, the
+ // value is likely loaded outside of the loop.
+ if (Brackets.getRegScore(RegNo, VM_CNT) > 0) {
+ UsesVgprLoadedOutside = true;
+ break;
+ }
+ }
+ }
+ // VMem load vgpr def
+ else if (SIInstrInfo::isVMEM(MI) && MI.mayLoad() && Op.isDef())
+ for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
+ // If we find a register that is loaded inside the loop, 1. and 2.
+ // are invalidated and we can exit.
+ if (VgprUse.contains(RegNo))
+ return false;
+ VgprDef.insert(RegNo);
+ }
+ }
+ }
+ }
+ if (!ST->hasVscnt() && HasVMemStore && !HasVMemLoad && UsesVgprLoadedOutside)
+ return true;
+ return HasVMemLoad && UsesVgprLoadedOutside;
+}
+
bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
ST = &MF.getSubtarget<GCNSubtarget>();
TII = ST->getInstrInfo();
@@ -1572,6 +1767,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
IV = AMDGPU::getIsaVersion(ST->getCPU());
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ MLI = &getAnalysis<MachineLoopInfo>();
PDT = &getAnalysis<MachinePostDominatorTree>();
ForceEmitZeroWaitcnts = ForceEmitZeroFlag;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td
index e39f52875f1f..b398e108bf62 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td
@@ -48,6 +48,12 @@ class InstSI <dag outs, dag ins, string asm = "",
field bit VGPRSpill = 0;
field bit SGPRSpill = 0;
+ // LDSDIR instruction format.
+ field bit LDSDIR = 0;
+
+ // VINTERP instruction format.
+ field bit VINTERP = 0;
+
// High bits - other information.
field bit VM_CNT = 0;
field bit EXP_CNT = 0;
@@ -141,6 +147,9 @@ class InstSI <dag outs, dag ins, string asm = "",
// Atomic with return.
field bit IsAtomicRet = 0;
+ // This bit indicates that this is one of WMMA instructions.
+ field bit IsWMMA = 0;
+
// These need to be kept in sync with the enum in SIInstrFlags.
let TSFlags{0} = SALU;
let TSFlags{1} = VALU;
@@ -173,6 +182,9 @@ class InstSI <dag outs, dag ins, string asm = "",
let TSFlags{24} = VGPRSpill;
let TSFlags{25} = SGPRSpill;
+ let TSFlags{26} = LDSDIR;
+ let TSFlags{27} = VINTERP;
+
let TSFlags{32} = VM_CNT;
let TSFlags{33} = EXP_CNT;
let TSFlags{34} = LGKM_CNT;
@@ -215,6 +227,8 @@ class InstSI <dag outs, dag ins, string asm = "",
let TSFlags{58} = IsAtomicRet;
+ let TSFlags{59} = IsWMMA;
+
let SchedRW = [Write32Bit];
let AsmVariantName = AMDGPUAsmVariants.Default;
@@ -261,6 +275,11 @@ class Enc64 {
int Size = 8;
}
+class Enc96 {
+ field bits<96> Inst;
+ int Size = 12;
+}
+
def CPolBit {
int GLC = 0;
int SLC = 1;
@@ -284,7 +303,7 @@ class VINTRPe <bits<2> op> : Enc32 {
let Inst{31-26} = 0x32; // encoding
}
-class MIMGe : Enc64 {
+class MIMGe_gfxpre11 : Enc64 {
bits<10> vdata;
bits<4> dmask;
bits<1> unorm;
@@ -309,7 +328,7 @@ class MIMGe : Enc64 {
let Inst{63} = d16;
}
-class MIMGe_gfx6789 <bits<8> op> : MIMGe {
+class MIMGe_gfx6789 <bits<8> op> : MIMGe_gfxpre11 {
bits<8> vaddr;
bits<1> da;
@@ -321,7 +340,7 @@ class MIMGe_gfx6789 <bits<8> op> : MIMGe {
let Inst{39-32} = vaddr;
}
-class MIMGe_gfx90a <bits<8> op> : MIMGe {
+class MIMGe_gfx90a <bits<8> op> : MIMGe_gfxpre11 {
bits<8> vaddr;
bits<1> da;
@@ -333,7 +352,7 @@ class MIMGe_gfx90a <bits<8> op> : MIMGe {
let Inst{39-32} = vaddr;
}
-class MIMGe_gfx10 <bits<8> op> : MIMGe {
+class MIMGe_gfx10 <bits<8> op> : MIMGe_gfxpre11 {
bits<8> vaddr0;
bits<3> dim;
bits<2> nsa;
@@ -349,12 +368,46 @@ class MIMGe_gfx10 <bits<8> op> : MIMGe {
let Inst{62} = a16;
}
+class MIMGe_gfx11 <bits<8> op> : Enc64 {
+ bits<8> vdata;
+ bits<4> dmask;
+ bits<1> unorm;
+ bits<5> cpol;
+ bits<1> r128;
+ bits<1> tfe;
+ bits<1> lwe;
+ bits<7> srsrc;
+ bits<7> ssamp;
+ bit d16;
+ bits<1> a16;
+ bits<8> vaddr0;
+ bits<3> dim;
+ bits<1> nsa;
+
+ let Inst{0} = nsa;
+ let Inst{4-2} = dim;
+ let Inst{7} = unorm;
+ let Inst{11-8} = dmask;
+ let Inst{12} = cpol{CPolBit.SLC};
+ let Inst{13} = cpol{CPolBit.DLC};
+ let Inst{14} = cpol{CPolBit.GLC};
+ let Inst{15} = r128;
+ let Inst{16} = a16;
+ let Inst{17} = d16;
+ let Inst{25-18} = op;
+ let Inst{31-26} = 0x3c;
+ let Inst{39-32} = vaddr0;
+ let Inst{47-40} = vdata;
+ let Inst{52-48} = srsrc{6-2};
+ let Inst{53} = tfe;
+ let Inst{54} = lwe;
+ let Inst{62-58} = ssamp{6-2};
+}
+
class EXPe : Enc64 {
bits<4> en;
bits<6> tgt;
- bits<1> compr;
bits<1> done;
- bits<1> vm;
bits<8> src0;
bits<8> src1;
bits<8> src2;
@@ -362,9 +415,7 @@ class EXPe : Enc64 {
let Inst{3-0} = en;
let Inst{9-4} = tgt;
- let Inst{10} = compr;
let Inst{11} = done;
- let Inst{12} = vm;
let Inst{31-26} = 0x3e;
let Inst{39-32} = src0;
let Inst{47-40} = src1;
@@ -372,6 +423,22 @@ class EXPe : Enc64 {
let Inst{63-56} = src3;
}
+// Pre-GFX11 encoding has compr and vm bits.
+class EXPe_ComprVM : EXPe {
+ bits<1> compr;
+ bits<1> vm;
+
+ let Inst{10} = compr;
+ let Inst{12} = vm;
+}
+
+// GFX11+ encoding has row bit.
+class EXPe_Row : EXPe {
+ bits<1> row;
+
+ let Inst{13} = row;
+}
+
let Uses = [EXEC] in {
class VINTRPCommon <dag outs, dag ins, string asm, list<dag> pattern> :
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 0a2f9381e71f..814a7c446889 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -16,12 +16,12 @@
#include "AMDGPUInstrInfo.h"
#include "GCNHazardRecognizer.h"
#include "GCNSubtarget.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/ScheduleDAG.h"
@@ -130,9 +130,31 @@ bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
return false;
}
-static bool readsExecAsData(const MachineInstr &MI) {
- if (MI.isCompare())
- return true;
+// Returns true if the scalar result of a VALU instruction depends on exec.
+static bool resultDependsOnExec(const MachineInstr &MI) {
+ // Ignore comparisons which are only used masked with exec.
+ // This allows some hoisting/sinking of VALU comparisons.
+ if (MI.isCompare()) {
+ const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+ Register DstReg = MI.getOperand(0).getReg();
+ if (!DstReg.isVirtual())
+ return true;
+ for (MachineInstr &Use : MRI.use_nodbg_instructions(DstReg)) {
+ switch (Use.getOpcode()) {
+ case AMDGPU::S_AND_SAVEEXEC_B32:
+ case AMDGPU::S_AND_SAVEEXEC_B64:
+ break;
+ case AMDGPU::S_AND_B32:
+ case AMDGPU::S_AND_B64:
+ if (!Use.readsRegister(AMDGPU::EXEC))
+ return true;
+ break;
+ default:
+ return true;
+ }
+ }
+ return false;
+ }
switch (MI.getOpcode()) {
default:
@@ -147,7 +169,7 @@ static bool readsExecAsData(const MachineInstr &MI) {
bool SIInstrInfo::isIgnorableUse(const MachineOperand &MO) const {
// Any implicit use of exec by VALU is not a real register read.
return MO.getReg() == AMDGPU::EXEC && MO.isImplicit() &&
- isVALU(*MO.getParent()) && !readsExecAsData(*MO.getParent());
+ isVALU(*MO.getParent()) && !resultDependsOnExec(*MO.getParent());
}
bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
@@ -181,7 +203,7 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
if (Offset0Idx == -1 || Offset1Idx == -1)
return false;
- // XXX - be careful of datalesss loads
+ // XXX - be careful of dataless loads
// getNamedOperandIdx returns the index for MachineInstrs. Since they
// include the output in the operand list, but SDNodes don't, we need to
// subtract the index by one.
@@ -362,6 +384,8 @@ bool SIInstrInfo::getMemOperandsWithOffsetWidth(
DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
if (DataOpIdx == -1)
DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
+ if (DataOpIdx == -1) // LDS DMA
+ return false;
Width = getOpSize(LdSt, DataOpIdx);
return true;
}
@@ -410,6 +434,8 @@ bool SIInstrInfo::getMemOperandsWithOffsetWidth(
DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
if (DataOpIdx == -1)
DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
+ if (DataOpIdx == -1) // LDS DMA
+ return false;
Width = getOpSize(LdSt, DataOpIdx);
return true;
}
@@ -464,7 +490,7 @@ bool SIInstrInfo::shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
return false;
}
- // In order to avoid regester pressure, on an average, the number of DWORDS
+ // In order to avoid register pressure, on an average, the number of DWORDS
// loaded together by all clustered mem ops should not exceed 8. This is an
// empirical value based on certain observations and performance related
// experiments.
@@ -517,8 +543,9 @@ static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB,
.addReg(SrcReg, getKillRegState(KillSrc));
}
-/// Handle copying from SGPR to AGPR, or from AGPR to AGPR. It is not possible
-/// to directly copy, so an intermediate VGPR needs to be used.
+/// Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908. It is not
+/// possible to have a direct copy in these cases on GFX908, so an intermediate
+/// VGPR copy is required.
static void indirectCopyToAGPR(const SIInstrInfo &TII,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
@@ -527,10 +554,18 @@ static void indirectCopyToAGPR(const SIInstrInfo &TII,
RegScavenger &RS,
Register ImpDefSuperReg = Register(),
Register ImpUseSuperReg = Register()) {
- const SIRegisterInfo &RI = TII.getRegisterInfo();
+ assert((TII.getSubtarget().hasMAIInsts() &&
+ !TII.getSubtarget().hasGFX90AInsts()) &&
+ "Expected GFX908 subtarget.");
- assert(AMDGPU::SReg_32RegClass.contains(SrcReg) ||
- AMDGPU::AGPR_32RegClass.contains(SrcReg));
+ assert((AMDGPU::SReg_32RegClass.contains(SrcReg) ||
+ AMDGPU::AGPR_32RegClass.contains(SrcReg)) &&
+ "Source register of the copy should be either an SGPR or an AGPR.");
+
+ assert(AMDGPU::AGPR_32RegClass.contains(DestReg) &&
+ "Destination register of the copy should be an AGPR.");
+
+ const SIRegisterInfo &RI = TII.getRegisterInfo();
// First try to find defining accvgpr_write to avoid temporary registers.
for (auto Def = MI, E = MBB.begin(); Def != E; ) {
@@ -581,23 +616,21 @@ static void indirectCopyToAGPR(const SIInstrInfo &TII,
// Registers in the sequence are allocated contiguously so we can just
// use register number to pick one of three round-robin temps.
- unsigned RegNo = DestReg % 3;
- Register Tmp = RS.scavengeRegister(&AMDGPU::VGPR_32RegClass, 0);
- if (!Tmp)
- report_fatal_error("Cannot scavenge VGPR to copy to AGPR");
- RS.setRegUsed(Tmp);
-
- if (!TII.getSubtarget().hasGFX90AInsts()) {
- // Only loop through if there are any free registers left, otherwise
- // scavenger may report a fatal error without emergency spill slot
- // or spill with the slot.
- while (RegNo-- && RS.FindUnusedReg(&AMDGPU::VGPR_32RegClass)) {
- Register Tmp2 = RS.scavengeRegister(&AMDGPU::VGPR_32RegClass, 0);
- if (!Tmp2 || RI.getHWRegIndex(Tmp2) >= MaxVGPRs)
- break;
- Tmp = Tmp2;
- RS.setRegUsed(Tmp);
- }
+ unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
+ Register Tmp =
+ MBB.getParent()->getInfo<SIMachineFunctionInfo>()->getVGPRForAGPRCopy();
+ assert(MBB.getParent()->getRegInfo().isReserved(Tmp) &&
+ "VGPR used for an intermediate copy should have been reserved.");
+
+ // Only loop through if there are any free registers left, otherwise
+ // scavenger may report a fatal error without emergency spill slot
+ // or spill with the slot.
+ while (RegNo-- && RS.FindUnusedReg(&AMDGPU::VGPR_32RegClass)) {
+ Register Tmp2 = RS.scavengeRegister(&AMDGPU::VGPR_32RegClass, 0);
+ if (!Tmp2 || RI.getHWRegIndex(Tmp2) >= MaxVGPRs)
+ break;
+ Tmp = Tmp2;
+ RS.setRegUsed(Tmp);
}
// Insert copy to temporary VGPR.
@@ -796,7 +829,8 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
if (RC == &AMDGPU::AGPR_32RegClass) {
- if (AMDGPU::VGPR_32RegClass.contains(SrcReg)) {
+ if (AMDGPU::VGPR_32RegClass.contains(SrcReg) ||
+ (ST.hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
BuildMI(MBB, MI, DL, get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
return;
@@ -884,6 +918,11 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
const TargetRegisterClass *SrcRC = RI.getPhysRegClass(SrcReg);
if (RC == RI.getVGPR64Class() && (SrcRC == RC || RI.isSGPRClass(SrcRC))) {
+ if (ST.hasMovB64()) {
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_e32), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
if (ST.hasPackedFP32Ops()) {
BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), DestReg)
.addImm(SISrcMods::OP_SEL_1)
@@ -906,7 +945,9 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
return;
}
- expandSGPRCopy(*this, MBB, MI, DL, DestReg, SrcReg, KillSrc, RC, Forward);
+ const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
+ expandSGPRCopy(*this, MBB, MI, DL, DestReg, SrcReg, CanKillSuperReg, RC,
+ Forward);
return;
}
@@ -915,7 +956,8 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (RI.isAGPRClass(RC)) {
if (ST.hasGFX90AInsts() && RI.isAGPRClass(SrcRC))
Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
- else if (RI.hasVGPRs(SrcRC))
+ else if (RI.hasVGPRs(SrcRC) ||
+ (ST.hasGFX90AInsts() && RI.isSGPRClass(SrcRC)))
Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
else
Opcode = AMDGPU::INSTRUCTION_LIST_END;
@@ -925,7 +967,10 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
(RI.isProperlyAlignedRC(*RC) &&
(SrcRC == RC || RI.isSGPRClass(SrcRC)))) {
// TODO: In 96-bit case, could do a 64-bit mov and then a 32-bit mov.
- if (ST.hasPackedFP32Ops()) {
+ if (ST.hasMovB64()) {
+ Opcode = AMDGPU::V_MOV_B64_e32;
+ EltSize = 8;
+ } else if (ST.hasPackedFP32Ops()) {
Opcode = AMDGPU::V_PK_MOV_B32;
EltSize = 8;
}
@@ -1725,13 +1770,8 @@ unsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) {
case AMDGPU::S_NOP:
return MI.getOperand(0).getImm() + 1;
-
- // FIXME: Any other pseudo instruction?
// SI_RETURN_TO_EPILOG is a fallthrough to code outside of the function. The
// hazard, even if one exist, won't really be visible. Should we handle it?
- case AMDGPU::SI_MASKED_UNREACHABLE:
- case AMDGPU::WAVE_BARRIER:
- return 0;
}
}
@@ -1807,6 +1847,11 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
const MachineOperand &SrcOp = MI.getOperand(1);
// FIXME: Will this work for 64-bit floating point immediates?
assert(!SrcOp.isFPImm());
+ if (ST.hasMovB64()) {
+ MI.setDesc(get(AMDGPU::V_MOV_B64_e32));
+ if (!isLiteralConstant(MI, 1) || isUInt<32>(SrcOp.getImm()))
+ break;
+ }
if (SrcOp.isImm()) {
APInt Imm(64, SrcOp.getImm());
APInt Lo(32, Imm.getLoBits(32).getZExtValue());
@@ -1887,6 +1932,10 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case AMDGPU::V_SET_INACTIVE_B32: {
unsigned NotOpc = ST.isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64;
unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
+ // FIXME: We may possibly optimize the COPY once we find ways to make LLVM
+ // optimizations (mainly Register Coalescer) aware of WWM register liveness.
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), MI.getOperand(0).getReg())
+ .add(MI.getOperand(1));
auto FirstNot = BuildMI(MBB, MI, DL, get(NotOpc), Exec).addReg(Exec);
FirstNot->addRegisterDead(AMDGPU::SCC, TRI); // SCC is overwritten
BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), MI.getOperand(0).getReg())
@@ -1899,11 +1948,15 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case AMDGPU::V_SET_INACTIVE_B64: {
unsigned NotOpc = ST.isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64;
unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
- auto FirstNot = BuildMI(MBB, MI, DL, get(NotOpc), Exec).addReg(Exec);
- FirstNot->addRegisterDead(AMDGPU::SCC, TRI); // SCC is overwritten
MachineInstr *Copy = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO),
MI.getOperand(0).getReg())
- .add(MI.getOperand(2));
+ .add(MI.getOperand(1));
+ expandPostRAPseudo(*Copy);
+ auto FirstNot = BuildMI(MBB, MI, DL, get(NotOpc), Exec).addReg(Exec);
+ FirstNot->addRegisterDead(AMDGPU::SCC, TRI); // SCC is overwritten
+ Copy = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO),
+ MI.getOperand(0).getReg())
+ .add(MI.getOperand(2));
expandPostRAPseudo(*Copy);
BuildMI(MBB, MI, DL, get(NotOpc), Exec)
.addReg(Exec);
@@ -2085,6 +2138,23 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MI.setDesc(get(ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64));
break;
}
+ case AMDGPU::SI_RETURN: {
+ const MachineFunction *MF = MBB.getParent();
+ const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ // Hiding the return address use with SI_RETURN may lead to extra kills in
+ // the function and missing live-ins. We are fine in practice because callee
+ // saved register handling ensures the register value is restored before
+ // RET, but we need the undef flag here to appease the MachineVerifier
+ // liveness checks.
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_SETPC_B64_return))
+ .addReg(TRI->getReturnAddressReg(*MF), RegState::Undef);
+
+ MIB.copyImplicitOps(MI);
+ MI.eraseFromParent();
+ break;
+ }
}
return true;
}
@@ -2093,6 +2163,13 @@ std::pair<MachineInstr*, MachineInstr*>
SIInstrInfo::expandMovDPP64(MachineInstr &MI) const {
assert (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
+ if (ST.hasMovB64() &&
+ AMDGPU::isLegal64BitDPPControl(
+ getNamedOperand(MI, AMDGPU::OpName::dpp_ctrl)->getImm())) {
+ MI.setDesc(get(AMDGPU::V_MOV_B64_dpp));
+ return std::make_pair(&MI, nullptr);
+ }
+
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = MBB.findDebugLoc(MI);
MachineFunction *MF = MBB.getParent();
@@ -2789,6 +2866,8 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
case AMDGPU::V_MOV_B32_e32:
case AMDGPU::V_MOV_B32_e64:
case AMDGPU::V_MOV_B64_PSEUDO:
+ case AMDGPU::V_MOV_B64_e32:
+ case AMDGPU::V_MOV_B64_e64:
case AMDGPU::S_MOV_B32:
case AMDGPU::S_MOV_B64:
case AMDGPU::COPY:
@@ -2801,35 +2880,15 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
}
}
-unsigned SIInstrInfo::getAddressSpaceForPseudoSourceKind(
- unsigned Kind) const {
- switch(Kind) {
- case PseudoSourceValue::Stack:
- case PseudoSourceValue::FixedStack:
- return AMDGPUAS::PRIVATE_ADDRESS;
- case PseudoSourceValue::ConstantPool:
- case PseudoSourceValue::GOT:
- case PseudoSourceValue::JumpTable:
- case PseudoSourceValue::GlobalValueCallEntry:
- case PseudoSourceValue::ExternalSymbolCallEntry:
- case PseudoSourceValue::TargetCustom:
- return AMDGPUAS::CONSTANT_ADDRESS;
- }
- return AMDGPUAS::FLAT_ADDRESS;
-}
+static constexpr unsigned ModifierOpNames[] = {
+ AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
+ AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
+ AMDGPU::OpName::omod};
-static void removeModOperands(MachineInstr &MI) {
+void SIInstrInfo::removeModOperands(MachineInstr &MI) const {
unsigned Opc = MI.getOpcode();
- int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc,
- AMDGPU::OpName::src0_modifiers);
- int Src1ModIdx = AMDGPU::getNamedOperandIdx(Opc,
- AMDGPU::OpName::src1_modifiers);
- int Src2ModIdx = AMDGPU::getNamedOperandIdx(Opc,
- AMDGPU::OpName::src2_modifiers);
-
- MI.RemoveOperand(Src2ModIdx);
- MI.RemoveOperand(Src1ModIdx);
- MI.RemoveOperand(Src0ModIdx);
+ for (unsigned Name : reverse(ModifierOpNames))
+ MI.removeOperand(AMDGPU::getNamedOperandIdx(Opc, Name));
}
bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
@@ -2841,7 +2900,7 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
default:
return false;
case AMDGPU::S_MOV_B64:
- // TODO: We could fold 64-bit immediates, but this get compilicated
+ // TODO: We could fold 64-bit immediates, but this get complicated
// when there are sub-registers.
return false;
@@ -2921,7 +2980,7 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
MachineOperand *Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2);
// Multiplied part is the constant: Use v_madmk_{f16, f32}.
- // We should only expect these to be on src0 due to canonicalizations.
+ // We should only expect these to be on src0 due to canonicalization.
if (Src0->isReg() && Src0->getReg() == Reg) {
if (!Src1->isReg() || RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))
return false;
@@ -2942,12 +3001,6 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
// FIXME: This would be a lot easier if we could return a new instruction
// instead of having to modify in place.
- // Remove these first since they are at the end.
- UseMI.RemoveOperand(
- AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod));
- UseMI.RemoveOperand(
- AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp));
-
Register Src1Reg = Src1->getReg();
unsigned Src1SubReg = Src1->getSubReg();
Src0->setReg(Src1Reg);
@@ -2966,7 +3019,7 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
removeModOperands(UseMI);
UseMI.setDesc(get(NewOpc));
- bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
+ bool DeleteDef = MRI->use_nodbg_empty(Reg);
if (DeleteDef)
DefMI.eraseFromParent();
@@ -3025,12 +3078,6 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
// FIXME: This would be a lot easier if we could return a new instruction
// instead of having to modify in place.
- // Remove these first since they are at the end.
- UseMI.RemoveOperand(
- AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod));
- UseMI.RemoveOperand(
- AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp));
-
if (Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F32_e64 ||
@@ -3049,7 +3096,7 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
// constant and SGPR are illegal.
legalizeOperands(UseMI);
- bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
+ bool DeleteDef = MRI->use_nodbg_empty(Reg);
if (DeleteDef)
DefMI.eraseFromParent();
@@ -3192,34 +3239,68 @@ static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI,
MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
LiveVariables *LV,
LiveIntervals *LIS) const {
+ MachineBasicBlock &MBB = *MI.getParent();
unsigned Opc = MI.getOpcode();
- bool IsF16 = false;
+
+ // Handle MFMA.
+ int NewMFMAOpc = AMDGPU::getMFMAEarlyClobberOp(Opc);
+ if (NewMFMAOpc != -1) {
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MI, MI.getDebugLoc(), get(NewMFMAOpc));
+ for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I)
+ MIB.add(MI.getOperand(I));
+ updateLiveVariables(LV, MI, *MIB);
+ if (LIS)
+ LIS->ReplaceMachineInstrInMaps(MI, *MIB);
+ return MIB;
+ }
+
+ if (SIInstrInfo::isWMMA(MI)) {
+ unsigned NewOpc = AMDGPU::mapWMMA2AddrTo3AddrOpcode(MI.getOpcode());
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
+ .setMIFlags(MI.getFlags());
+ for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I)
+ MIB->addOperand(MI.getOperand(I));
+
+ updateLiveVariables(LV, MI, *MIB);
+ if (LIS)
+ LIS->ReplaceMachineInstrInMaps(MI, *MIB);
+
+ return MIB;
+ }
+
+ // Handle MAC/FMAC.
+ bool IsF16 = Opc == AMDGPU::V_MAC_F16_e32 || Opc == AMDGPU::V_MAC_F16_e64 ||
+ Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64;
bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 ||
+ Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
+ Opc == AMDGPU::V_FMAC_LEGACY_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
bool IsF64 = Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
- int NewMFMAOpc = -1;
+ bool IsLegacy = Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
+ Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
+ Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
+ Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
+ bool Src0Literal = false;
switch (Opc) {
default:
- NewMFMAOpc = AMDGPU::getMFMAEarlyClobberOp(Opc);
- if (NewMFMAOpc == -1)
- return nullptr;
- break;
+ return nullptr;
case AMDGPU::V_MAC_F16_e64:
case AMDGPU::V_FMAC_F16_e64:
- IsF16 = true;
- LLVM_FALLTHROUGH;
case AMDGPU::V_MAC_F32_e64:
+ case AMDGPU::V_MAC_LEGACY_F32_e64:
case AMDGPU::V_FMAC_F32_e64:
+ case AMDGPU::V_FMAC_LEGACY_F32_e64:
case AMDGPU::V_FMAC_F64_e64:
break;
case AMDGPU::V_MAC_F16_e32:
case AMDGPU::V_FMAC_F16_e32:
- IsF16 = true;
- LLVM_FALLTHROUGH;
case AMDGPU::V_MAC_F32_e32:
+ case AMDGPU::V_MAC_LEGACY_F32_e32:
case AMDGPU::V_FMAC_F32_e32:
+ case AMDGPU::V_FMAC_LEGACY_F32_e32:
case AMDGPU::V_FMAC_F64_e32: {
int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
AMDGPU::OpName::src0);
@@ -3228,25 +3309,13 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
return nullptr;
if (Src0->isImm() && !isInlineConstant(MI, Src0Idx, *Src0))
- return nullptr;
+ Src0Literal = true;
break;
}
}
MachineInstrBuilder MIB;
- MachineBasicBlock &MBB = *MI.getParent();
-
- if (NewMFMAOpc != -1) {
- MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewMFMAOpc));
- for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I)
- MIB.add(MI.getOperand(I));
- updateLiveVariables(LV, MI, *MIB);
- if (LIS)
- LIS->ReplaceMachineInstrInMaps(MI, *MIB);
- return MIB;
- }
-
const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst);
const MachineOperand *Src0 = getNamedOperand(MI, AMDGPU::OpName::src0);
const MachineOperand *Src0Mods =
@@ -3255,10 +3324,13 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
const MachineOperand *Src1Mods =
getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
+ const MachineOperand *Src2Mods =
+ getNamedOperand(MI, AMDGPU::OpName::src2_modifiers);
const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod);
- if (!Src0Mods && !Src1Mods && !Clamp && !Omod && !IsF64 &&
+ if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsF64 &&
+ !IsLegacy &&
// If we have an SGPR input, we will violate the constant bus restriction.
(ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() ||
!RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) {
@@ -3271,11 +3343,11 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
// We cannot just remove the DefMI here, calling pass will crash.
DefMI->setDesc(get(AMDGPU::IMPLICIT_DEF));
for (unsigned I = DefMI->getNumOperands() - 1; I != 0; --I)
- DefMI->RemoveOperand(I);
+ DefMI->removeOperand(I);
};
int64_t Imm;
- if (getFoldableImm(Src2, Imm, &DefMI)) {
+ if (!Src0Literal && getFoldableImm(Src2, Imm, &DefMI)) {
unsigned NewOpc =
IsFMA ? (IsF16 ? AMDGPU::V_FMAAK_F16 : AMDGPU::V_FMAAK_F32)
: (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
@@ -3295,7 +3367,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
unsigned NewOpc = IsFMA
? (IsF16 ? AMDGPU::V_FMAMK_F16 : AMDGPU::V_FMAMK_F32)
: (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
- if (getFoldableImm(Src1, Imm, &DefMI)) {
+ if (!Src0Literal && getFoldableImm(Src1, Imm, &DefMI)) {
if (pseudoToMCOpcode(NewOpc) != -1) {
MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
.add(*Dst)
@@ -3309,7 +3381,11 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
return MIB;
}
}
- if (getFoldableImm(Src0, Imm, &DefMI)) {
+ if (Src0Literal || getFoldableImm(Src0, Imm, &DefMI)) {
+ if (Src0Literal) {
+ Imm = Src0->getImm();
+ DefMI = nullptr;
+ }
if (pseudoToMCOpcode(NewOpc) != -1 &&
isOperandLegal(
MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0),
@@ -3322,16 +3398,27 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
updateLiveVariables(LV, MI, *MIB);
if (LIS)
LIS->ReplaceMachineInstrInMaps(MI, *MIB);
- killDef();
+ if (DefMI)
+ killDef();
return MIB;
}
}
}
- unsigned NewOpc = IsFMA ? (IsF16 ? AMDGPU::V_FMA_F16_gfx9_e64
- : IsF64 ? AMDGPU::V_FMA_F64_e64
- : AMDGPU::V_FMA_F32_e64)
- : (IsF16 ? AMDGPU::V_MAD_F16_e64 : AMDGPU::V_MAD_F32_e64);
+ // VOP2 mac/fmac with a literal operand cannot be converted to VOP3 mad/fma
+ // because VOP3 does not allow a literal operand.
+ // TODO: Remove this restriction for GFX10.
+ if (Src0Literal)
+ return nullptr;
+
+ unsigned NewOpc = IsFMA ? IsF16 ? AMDGPU::V_FMA_F16_gfx9_e64
+ : IsF64 ? AMDGPU::V_FMA_F64_e64
+ : IsLegacy
+ ? AMDGPU::V_FMA_LEGACY_F32_e64
+ : AMDGPU::V_FMA_F32_e64
+ : IsF16 ? AMDGPU::V_MAD_F16_e64
+ : IsLegacy ? AMDGPU::V_MAD_LEGACY_F32_e64
+ : AMDGPU::V_MAD_F32_e64;
if (pseudoToMCOpcode(NewOpc) == -1)
return nullptr;
@@ -3341,7 +3428,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
.add(*Src0)
.addImm(Src1Mods ? Src1Mods->getImm() : 0)
.add(*Src1)
- .addImm(0) // Src mods
+ .addImm(Src2Mods ? Src2Mods->getImm() : 0)
.add(*Src2)
.addImm(Clamp ? Clamp->getImm() : 0)
.addImm(Omod ? Omod->getImm() : 0);
@@ -3383,6 +3470,9 @@ bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
if (MI.getOpcode() == TargetOpcode::INLINEASM_BR)
return true;
+ if (MI.getOpcode() == AMDGPU::SCHED_BARRIER && MI.getOperand(0).getImm() == 0)
+ return true;
+
// Target-independent instructions do not have an implicit-use of EXEC, even
// when they operate on VGPRs. Treating EXEC modifications as scheduling
// boundaries prevents incorrect movements of such instructions.
@@ -3676,11 +3766,8 @@ bool SIInstrInfo::hasModifiersSet(const MachineInstr &MI,
}
bool SIInstrInfo::hasAnyModifiersSet(const MachineInstr &MI) const {
- return hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) ||
- hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) ||
- hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers) ||
- hasModifiersSet(MI, AMDGPU::OpName::clamp) ||
- hasModifiersSet(MI, AMDGPU::OpName::omod);
+ return any_of(ModifierOpNames,
+ [&](unsigned Name) { return hasModifiersSet(MI, Name); });
}
bool SIInstrInfo::canShrink(const MachineInstr &MI,
@@ -3754,18 +3841,19 @@ static void copyFlagsToImplicitVCC(MachineInstr &MI,
MachineInstr *SIInstrInfo::buildShrunkInst(MachineInstr &MI,
unsigned Op32) const {
- MachineBasicBlock *MBB = MI.getParent();;
+ MachineBasicBlock *MBB = MI.getParent();
MachineInstrBuilder Inst32 =
BuildMI(*MBB, MI, MI.getDebugLoc(), get(Op32))
.setMIFlags(MI.getFlags());
// Add the dst operand if the 32-bit encoding also has an explicit $vdst.
// For VOPC instructions, this is replaced by an implicit def of vcc.
- int Op32DstIdx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::vdst);
- if (Op32DstIdx != -1) {
+ if (AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::vdst) != -1) {
// dst
Inst32.add(MI.getOperand(0));
- } else {
+ } else if (AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::sdst) != -1) {
+ // VOPCX instructions won't be writing to an explicit dst, so this should
+ // not fail for these instructions.
assert(((MI.getOperand(0).getReg() == AMDGPU::VCC) ||
(MI.getOperand(0).getReg() == AMDGPU::VCC_LO)) &&
"Unexpected case");
@@ -3816,7 +3904,7 @@ bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,
return RI.isSGPRClass(MRI.getRegClass(MO.getReg()));
// Null is free
- if (MO.getReg() == AMDGPU::SGPR_NULL)
+ if (MO.getReg() == AMDGPU::SGPR_NULL || MO.getReg() == AMDGPU::SGPR_NULL64)
return false;
// SGPRs use the constant bus
@@ -3951,6 +4039,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
case AMDGPU::OPERAND_REG_IMM_INT32:
case AMDGPU::OPERAND_REG_IMM_FP32:
case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
+ case AMDGPU::OPERAND_REG_IMM_V2FP32:
break;
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
@@ -4031,9 +4120,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
- const int OpIndicies[] = { DstIdx, Src0Idx, Src1Idx, Src2Idx };
-
- for (int OpIdx: OpIndicies) {
+ for (int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) {
if (OpIdx == -1)
continue;
const MachineOperand &MO = MI.getOperand(OpIdx);
@@ -4150,24 +4237,25 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
}
// Verify VOP*. Ignore multiple sgpr operands on writelane.
- if (Desc.getOpcode() != AMDGPU::V_WRITELANE_B32
- && (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isVOPC(MI) || isSDWA(MI))) {
- // Only look at the true operands. Only a real operand can use the constant
- // bus, and we don't want to check pseudo-operands like the source modifier
- // flags.
- const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
-
+ if (isVALU(MI) && Desc.getOpcode() != AMDGPU::V_WRITELANE_B32) {
unsigned ConstantBusCount = 0;
bool UsesLiteral = false;
const MachineOperand *LiteralVal = nullptr;
- if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1)
+ int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
+ if (ImmIdx != -1) {
++ConstantBusCount;
+ UsesLiteral = true;
+ LiteralVal = &MI.getOperand(ImmIdx);
+ }
SmallVector<Register, 2> SGPRsUsed;
Register SGPRUsed;
- for (int OpIdx : OpIndices) {
+ // Only look at the true operands. Only a real operand can use the constant
+ // bus, and we don't want to check pseudo-operands like the source modifier
+ // flags.
+ for (int OpIdx : {Src0Idx, Src1Idx, Src2Idx}) {
if (OpIdx == -1)
break;
const MachineOperand &MO = MI.getOperand(OpIdx);
@@ -4186,8 +4274,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
UsesLiteral = true;
LiteralVal = &MO;
} else if (!MO.isIdenticalTo(*LiteralVal)) {
- assert(isVOP3(MI));
- ErrInfo = "VOP3 instruction uses more than one literal";
+ assert(isVOP2(MI) || isVOP3(MI));
+ ErrInfo = "VOP2/VOP3 instruction uses more than one literal";
return false;
}
}
@@ -4196,7 +4284,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
SGPRUsed = findImplicitSGPRRead(MI);
if (SGPRUsed != AMDGPU::NoRegister) {
- // Implicit uses may safely overlap true overands
+ // Implicit uses may safely overlap true operands
if (llvm::all_of(SGPRsUsed, [this, SGPRUsed](unsigned SGPR) {
return !RI.regsOverlap(SGPRUsed, SGPR);
})) {
@@ -4225,7 +4313,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
unsigned SGPRCount = 0;
Register SGPRUsed = AMDGPU::NoRegister;
- for (int OpIdx : {Src0Idx, Src1Idx, Src2Idx}) {
+ for (int OpIdx : {Src0Idx, Src1Idx}) {
if (OpIdx == -1)
break;
@@ -4272,16 +4360,11 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
if (isSOP2(MI) || isSOPC(MI)) {
const MachineOperand &Src0 = MI.getOperand(Src0Idx);
const MachineOperand &Src1 = MI.getOperand(Src1Idx);
- unsigned Immediates = 0;
- if (!Src0.isReg() &&
- !isInlineConstant(Src0, Desc.OpInfo[Src0Idx].OperandType))
- Immediates++;
- if (!Src1.isReg() &&
- !isInlineConstant(Src1, Desc.OpInfo[Src1Idx].OperandType))
- Immediates++;
-
- if (Immediates > 1) {
+ if (!Src0.isReg() && !Src1.isReg() &&
+ !isInlineConstant(Src0, Desc.OpInfo[Src0Idx].OperandType) &&
+ !isInlineConstant(Src1, Desc.OpInfo[Src1Idx].OperandType) &&
+ !Src0.isIdenticalTo(Src1)) {
ErrInfo = "SOP2/SOPC instruction requires too many immediate constants";
return false;
}
@@ -4364,10 +4447,11 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
}
if (isSMRD(MI)) {
- if (MI.mayStore()) {
+ if (MI.mayStore() &&
+ ST.getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS) {
// The register offset form of scalar stores may only use m0 as the
// soffset register.
- const MachineOperand *Soff = getNamedOperand(MI, AMDGPU::OpName::soff);
+ const MachineOperand *Soff = getNamedOperand(MI, AMDGPU::OpName::soffset);
if (Soff && Soff->getReg() != AMDGPU::M0) {
ErrInfo = "scalar stores must use m0 as offset register";
return false;
@@ -4477,7 +4561,6 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
}
int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
- int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
((DstIdx >= 0 &&
@@ -4527,24 +4610,45 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
}
}
- if (ST.needsAlignedVGPRs() &&
- (MI.getOpcode() == AMDGPU::DS_GWS_INIT ||
- MI.getOpcode() == AMDGPU::DS_GWS_SEMA_BR ||
- MI.getOpcode() == AMDGPU::DS_GWS_BARRIER)) {
- const MachineOperand *Op = getNamedOperand(MI, AMDGPU::OpName::data0);
- Register Reg = Op->getReg();
- bool Aligned = true;
- if (Reg.isPhysical()) {
- Aligned = !(RI.getHWRegIndex(Reg) & 1);
- } else {
+ if (ST.needsAlignedVGPRs()) {
+ const auto isAlignedReg = [&MI, &MRI, this](unsigned OpName) -> bool {
+ const MachineOperand *Op = getNamedOperand(MI, OpName);
+ if (!Op)
+ return true;
+ Register Reg = Op->getReg();
+ if (Reg.isPhysical())
+ return !(RI.getHWRegIndex(Reg) & 1);
const TargetRegisterClass &RC = *MRI.getRegClass(Reg);
- Aligned = RI.getRegSizeInBits(RC) > 32 && RI.isProperlyAlignedRC(RC) &&
- !(RI.getChannelFromSubReg(Op->getSubReg()) & 1);
+ return RI.getRegSizeInBits(RC) > 32 && RI.isProperlyAlignedRC(RC) &&
+ !(RI.getChannelFromSubReg(Op->getSubReg()) & 1);
+ };
+
+ if (MI.getOpcode() == AMDGPU::DS_GWS_INIT ||
+ MI.getOpcode() == AMDGPU::DS_GWS_SEMA_BR ||
+ MI.getOpcode() == AMDGPU::DS_GWS_BARRIER) {
+
+ if (!isAlignedReg(AMDGPU::OpName::data0)) {
+ ErrInfo = "Subtarget requires even aligned vector registers "
+ "for DS_GWS instructions";
+ return false;
+ }
+ }
+
+ if (isMIMG(MI)) {
+ if (!isAlignedReg(AMDGPU::OpName::vaddr)) {
+ ErrInfo = "Subtarget requires even aligned vector registers "
+ "for vaddr operand of image instructions";
+ return false;
+ }
}
+ }
- if (!Aligned) {
- ErrInfo = "Subtarget requires even aligned vector registers "
- "for DS_GWS instructions";
+ if (MI.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
+ !ST.hasGFX90AInsts()) {
+ const MachineOperand *Src = getNamedOperand(MI, AMDGPU::OpName::src0);
+ if (Src->isReg() && RI.isSGPRReg(MRI, Src->getReg())) {
+ ErrInfo = "Invalid register class: "
+ "v_accvgpr_write with an SGPR is not supported on this GPU";
return false;
}
}
@@ -4641,26 +4745,40 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
"Unexpected scalar opcode without corresponding vector one!");
}
-static unsigned adjustAllocatableRegClass(const GCNSubtarget &ST,
- const MachineRegisterInfo &MRI,
- const MCInstrDesc &TID,
- unsigned RCID,
- bool IsAllocatable) {
+static const TargetRegisterClass *
+adjustAllocatableRegClass(const GCNSubtarget &ST, const SIRegisterInfo &RI,
+ const MachineRegisterInfo &MRI,
+ const MCInstrDesc &TID, unsigned RCID,
+ bool IsAllocatable) {
if ((IsAllocatable || !ST.hasGFX90AInsts() || !MRI.reservedRegsFrozen()) &&
(((TID.mayLoad() || TID.mayStore()) &&
!(TID.TSFlags & SIInstrFlags::VGPRSpill)) ||
(TID.TSFlags & (SIInstrFlags::DS | SIInstrFlags::MIMG)))) {
switch (RCID) {
- case AMDGPU::AV_32RegClassID: return AMDGPU::VGPR_32RegClassID;
- case AMDGPU::AV_64RegClassID: return AMDGPU::VReg_64RegClassID;
- case AMDGPU::AV_96RegClassID: return AMDGPU::VReg_96RegClassID;
- case AMDGPU::AV_128RegClassID: return AMDGPU::VReg_128RegClassID;
- case AMDGPU::AV_160RegClassID: return AMDGPU::VReg_160RegClassID;
+ case AMDGPU::AV_32RegClassID:
+ RCID = AMDGPU::VGPR_32RegClassID;
+ break;
+ case AMDGPU::AV_64RegClassID:
+ RCID = AMDGPU::VReg_64RegClassID;
+ break;
+ case AMDGPU::AV_96RegClassID:
+ RCID = AMDGPU::VReg_96RegClassID;
+ break;
+ case AMDGPU::AV_128RegClassID:
+ RCID = AMDGPU::VReg_128RegClassID;
+ break;
+ case AMDGPU::AV_160RegClassID:
+ RCID = AMDGPU::VReg_160RegClassID;
+ break;
+ case AMDGPU::AV_512RegClassID:
+ RCID = AMDGPU::VReg_512RegClassID;
+ break;
default:
break;
}
}
- return RCID;
+
+ return RI.getProperlyAlignedRC(RI.getRegClass(RCID));
}
const TargetRegisterClass *SIInstrInfo::getRegClass(const MCInstrDesc &TID,
@@ -4673,7 +4791,7 @@ const TargetRegisterClass *SIInstrInfo::getRegClass(const MCInstrDesc &TID,
bool IsAllocatable = false;
if (TID.TSFlags & (SIInstrFlags::DS | SIInstrFlags::FLAT)) {
// vdst and vdata should be both VGPR or AGPR, same for the DS instructions
- // with two data operands. Request register class constainted to VGPR only
+ // with two data operands. Request register class constrained to VGPR only
// of both operands present as Machine Copy Propagation can not check this
// constraint and possibly other passes too.
//
@@ -4690,9 +4808,8 @@ const TargetRegisterClass *SIInstrInfo::getRegClass(const MCInstrDesc &TID,
AMDGPU::OpName::data1) != -1;
}
}
- RegClass = adjustAllocatableRegClass(ST, MF.getRegInfo(), TID, RegClass,
- IsAllocatable);
- return RI.getRegClass(RegClass);
+ return adjustAllocatableRegClass(ST, RI, MF.getRegInfo(), TID, RegClass,
+ IsAllocatable);
}
const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
@@ -4709,8 +4826,7 @@ const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
}
unsigned RCID = Desc.OpInfo[OpNo].RegClass;
- RCID = adjustAllocatableRegClass(ST, MRI, Desc, RCID, true);
- return RI.getRegClass(RCID);
+ return adjustAllocatableRegClass(ST, RI, MRI, Desc, RCID, true);
}
void SIInstrInfo::legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const {
@@ -4797,7 +4913,7 @@ MachineOperand SIInstrInfo::buildExtractSubRegOrImm(
void SIInstrInfo::swapOperands(MachineInstr &Inst) const {
assert(Inst.getNumExplicitOperands() == 3);
MachineOperand Op1 = Inst.getOperand(1);
- Inst.RemoveOperand(1);
+ Inst.removeOperand(1);
Inst.addOperand(Op1);
}
@@ -4851,9 +4967,9 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
MO = &MI.getOperand(OpIdx);
int ConstantBusLimit = ST.getConstantBusLimit(MI.getOpcode());
- int VOP3LiteralLimit = ST.hasVOP3Literal() ? 1 : 0;
+ int LiteralLimit = !isVOP3(MI) || ST.hasVOP3Literal() ? 1 : 0;
if (isVALU(MI) && usesConstantBus(MRI, *MO, OpInfo)) {
- if (isVOP3(MI) && isLiteralConstantLike(*MO, OpInfo) && !VOP3LiteralLimit--)
+ if (isLiteralConstantLike(*MO, OpInfo) && !LiteralLimit--)
return false;
SmallDenseSet<RegSubRegPair> SGPRsUsed;
@@ -4872,12 +4988,10 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
return false;
SGPRsUsed.insert(SGPR);
}
- } else if (InstDesc.OpInfo[i].OperandType == AMDGPU::OPERAND_KIMM32) {
- if (--ConstantBusLimit <= 0)
- return false;
- } else if (isVOP3(MI) && AMDGPU::isSISrcOperand(InstDesc, i) &&
- isLiteralConstantLike(Op, InstDesc.OpInfo[i])) {
- if (!VOP3LiteralLimit--)
+ } else if (InstDesc.OpInfo[i].OperandType == AMDGPU::OPERAND_KIMM32 ||
+ (AMDGPU::isSISrcOperand(InstDesc, i) &&
+ isLiteralConstantLike(Op, InstDesc.OpInfo[i]))) {
+ if (!LiteralLimit--)
return false;
if (--ConstantBusLimit <= 0)
return false;
@@ -4886,7 +5000,10 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
}
if (MO->isReg()) {
- assert(DefinedRC);
+ if (!DefinedRC) {
+ // This operand allows any register.
+ return true;
+ }
if (!isLegalRegOperand(MRI, OpInfo, *MO))
return false;
bool IsAGPR = RI.isAGPR(MRI, MO->getReg());
@@ -4916,7 +5033,7 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
RI.isAGPR(MRI, MI.getOperand(Data1Idx).getReg()) != IsAGPR)
return false;
}
- if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
+ if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts() &&
(int)OpIdx == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0) &&
RI.isSGPRReg(MRI, MO->getReg()))
return false;
@@ -5186,7 +5303,7 @@ void SIInstrInfo::legalizeOperandsSMRD(MachineRegisterInfo &MRI,
Register SGPR = readlaneVGPRToSGPR(SBase->getReg(), MI, MRI);
SBase->setReg(SGPR);
}
- MachineOperand *SOff = getNamedOperand(MI, AMDGPU::OpName::soff);
+ MachineOperand *SOff = getNamedOperand(MI, AMDGPU::OpName::soffset);
if (SOff && !RI.isSGPRClass(MRI.getRegClass(SOff->getReg()))) {
Register SGPR = readlaneVGPRToSGPR(SOff->getReg(), MI, MRI);
SOff->setReg(SGPR);
@@ -5232,16 +5349,16 @@ bool SIInstrInfo::moveFlatAddrToVGPR(MachineInstr &Inst) const {
const MCInstrDesc &NewDesc = get(NewOpc);
Inst.setDesc(NewDesc);
- // Callers expect interator to be valid after this call, so modify the
+ // Callers expect iterator to be valid after this call, so modify the
// instruction in place.
if (OldVAddrIdx == NewVAddrIdx) {
MachineOperand &NewVAddr = Inst.getOperand(NewVAddrIdx);
// Clear use list from the old vaddr holding a zero register.
MRI.removeRegOperandFromUseList(&NewVAddr);
MRI.moveOperands(&NewVAddr, &SAddr, 1);
- Inst.RemoveOperand(OldSAddrIdx);
+ Inst.removeOperand(OldSAddrIdx);
// Update the use list with the pointer we have just moved from vaddr to
- // saddr poisition. Otherwise new vaddr will be missing from the use list.
+ // saddr position. Otherwise new vaddr will be missing from the use list.
MRI.removeRegOperandFromUseList(&NewVAddr);
MRI.addRegOperandToUseList(&NewVAddr);
} else {
@@ -5251,14 +5368,14 @@ bool SIInstrInfo::moveFlatAddrToVGPR(MachineInstr &Inst) const {
int NewVDstIn = AMDGPU::getNamedOperandIdx(NewOpc,
AMDGPU::OpName::vdst_in);
- // RemoveOperand doesn't try to fixup tied operand indexes at it goes, so
+ // removeOperand doesn't try to fixup tied operand indexes at it goes, so
// it asserts. Untie the operands for now and retie them afterwards.
if (NewVDstIn != -1) {
int OldVDstIn = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
Inst.untieRegOperand(OldVDstIn);
}
- Inst.RemoveOperand(OldVAddrIdx);
+ Inst.removeOperand(OldVAddrIdx);
if (NewVDstIn != -1) {
int NewVDst = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
@@ -5340,7 +5457,8 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB,
static void
emitLoadSRsrcFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI,
MachineBasicBlock &OrigBB, MachineBasicBlock &LoopBB,
- const DebugLoc &DL, MachineOperand &Rsrc) {
+ MachineBasicBlock &BodyBB, const DebugLoc &DL,
+ MachineOperand &Rsrc) {
MachineFunction &MF = *OrigBB.getParent();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
@@ -5398,7 +5516,7 @@ emitLoadSRsrcFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI,
else
Cmp.addReg(VRsrc, VRsrcUndef, TRI->getSubRegFromChannel(Idx, 2));
- // Combine the comparision results with AND.
+ // Combine the comparison results with AND.
if (CondReg == AMDGPU::NoRegister) // First.
CondReg = NewCondReg;
else { // If not the first, we create an AND.
@@ -5433,14 +5551,14 @@ emitLoadSRsrcFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI,
.addReg(CondReg, RegState::Kill);
// The original instruction is here; we insert the terminators after it.
- I = LoopBB.end();
+ I = BodyBB.end();
// Update EXEC, switch all done bits to 0 and all todo bits to 1.
- BuildMI(LoopBB, I, DL, TII.get(XorTermOpc), Exec)
+ BuildMI(BodyBB, I, DL, TII.get(XorTermOpc), Exec)
.addReg(Exec)
.addReg(SaveExec);
- BuildMI(LoopBB, I, DL, TII.get(AMDGPU::SI_WATERFALL_LOOP)).addMBB(&LoopBB);
+ BuildMI(BodyBB, I, DL, TII.get(AMDGPU::SI_WATERFALL_LOOP)).addMBB(&LoopBB);
}
// Build a waterfall loop around \p MI, replacing the VGPR \p Rsrc register
@@ -5487,31 +5605,35 @@ loadSRsrcFromVGPR(const SIInstrInfo &TII, MachineInstr &MI,
// To insert the loop we need to split the block. Move everything after this
// point to a new block, and insert a new empty block between the two.
MachineBasicBlock *LoopBB = MF.CreateMachineBasicBlock();
+ MachineBasicBlock *BodyBB = MF.CreateMachineBasicBlock();
MachineBasicBlock *RemainderBB = MF.CreateMachineBasicBlock();
MachineFunction::iterator MBBI(MBB);
++MBBI;
MF.insert(MBBI, LoopBB);
+ MF.insert(MBBI, BodyBB);
MF.insert(MBBI, RemainderBB);
- LoopBB->addSuccessor(LoopBB);
- LoopBB->addSuccessor(RemainderBB);
+ LoopBB->addSuccessor(BodyBB);
+ BodyBB->addSuccessor(LoopBB);
+ BodyBB->addSuccessor(RemainderBB);
- // Move Begin to MI to the LoopBB, and the remainder of the block to
+ // Move Begin to MI to the BodyBB, and the remainder of the block to
// RemainderBB.
RemainderBB->transferSuccessorsAndUpdatePHIs(&MBB);
RemainderBB->splice(RemainderBB->begin(), &MBB, End, MBB.end());
- LoopBB->splice(LoopBB->begin(), &MBB, Begin, MBB.end());
+ BodyBB->splice(BodyBB->begin(), &MBB, Begin, MBB.end());
MBB.addSuccessor(LoopBB);
// Update dominators. We know that MBB immediately dominates LoopBB, that
- // LoopBB immediately dominates RemainderBB, and that RemainderBB immediately
- // dominates all of the successors transferred to it from MBB that MBB used
- // to properly dominate.
+ // LoopBB immediately dominates BodyBB, and BodyBB immediately dominates
+ // RemainderBB. RemainderBB immediately dominates all of the successors
+ // transferred to it from MBB that MBB used to properly dominate.
if (MDT) {
MDT->addNewBlock(LoopBB, &MBB);
- MDT->addNewBlock(RemainderBB, LoopBB);
+ MDT->addNewBlock(BodyBB, LoopBB);
+ MDT->addNewBlock(RemainderBB, BodyBB);
for (auto &Succ : RemainderBB->successors()) {
if (MDT->properlyDominates(&MBB, Succ)) {
MDT->changeImmediateDominator(Succ, RemainderBB);
@@ -5519,12 +5641,12 @@ loadSRsrcFromVGPR(const SIInstrInfo &TII, MachineInstr &MI,
}
}
- emitLoadSRsrcFromVGPRLoop(TII, MRI, MBB, *LoopBB, DL, Rsrc);
+ emitLoadSRsrcFromVGPRLoop(TII, MRI, MBB, *LoopBB, *BodyBB, DL, Rsrc);
// Restore the EXEC mask
MachineBasicBlock::iterator First = RemainderBB->begin();
BuildMI(*RemainderBB, First, DL, TII.get(MovExecOpc), Exec).addReg(SaveExec);
- return LoopBB;
+ return BodyBB;
}
// Extract pointer from Rsrc and return a zero-value Rsrc replacement.
@@ -5762,7 +5884,7 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI,
if (RI.getCommonSubClass(MRI.getRegClass(Rsrc->getReg()),
RI.getRegClass(RsrcRC))) {
// The operands are legal.
- // FIXME: We may need to legalize operands besided srsrc.
+ // FIXME: We may need to legalize operands besides srsrc.
return CreatedBB;
}
@@ -5836,7 +5958,7 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI,
MachineOperand *SOffset = getNamedOperand(MI, AMDGPU::OpName::soffset);
unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI.getOpcode());
- // Atomics rith return have have an additional tied operand and are
+ // Atomics with return have an additional tied operand and are
// missing some of the special bits.
MachineOperand *VDataIn = getNamedOperand(MI, AMDGPU::OpName::vdata_in);
MachineInstr *Addr64;
@@ -6050,7 +6172,7 @@ MachineBasicBlock *SIInstrInfo::moveToVALU(MachineInstr &TopInst,
BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(Opc), VCC)
.addReg(EXEC)
.addReg(IsSCC ? VCC : CondReg);
- Inst.RemoveOperand(1);
+ Inst.removeOperand(1);
}
break;
@@ -6060,6 +6182,7 @@ MachineBasicBlock *SIInstrInfo::moveToVALU(MachineInstr &TopInst,
case AMDGPU::S_PACK_LL_B32_B16:
case AMDGPU::S_PACK_LH_B32_B16:
+ case AMDGPU::S_PACK_HL_B32_B16:
case AMDGPU::S_PACK_HH_B32_B16:
movePackToVALU(Worklist, MRI, Inst);
Inst.eraseFromParent();
@@ -6217,7 +6340,7 @@ MachineBasicBlock *SIInstrInfo::moveToVALU(MachineInstr &TopInst,
addSCCDefUsersToVALUWorklist(Op, Inst, Worklist);
if (Op.isUse())
addSCCDefsToVALUWorklist(Op, Worklist);
- Inst.RemoveOperand(i);
+ Inst.removeOperand(i);
}
}
@@ -6247,7 +6370,7 @@ MachineBasicBlock *SIInstrInfo::moveToVALU(MachineInstr &TopInst,
uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
- Inst.RemoveOperand(2); // Remove old immediate.
+ Inst.removeOperand(2); // Remove old immediate.
Inst.addOperand(MachineOperand::CreateImm(Offset));
Inst.addOperand(MachineOperand::CreateImm(BitWidth));
}
@@ -6281,7 +6404,7 @@ MachineBasicBlock *SIInstrInfo::moveToVALU(MachineInstr &TopInst,
// these are deleted later, but at -O0 it would leave a suspicious
// looking illegal copy of an undef register.
for (unsigned I = Inst.getNumOperands() - 1; I != 0; --I)
- Inst.RemoveOperand(I);
+ Inst.removeOperand(I);
Inst.setDesc(get(AMDGPU::IMPLICIT_DEF));
continue;
}
@@ -6323,7 +6446,7 @@ SIInstrInfo::moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst,
AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
assert(Inst.getOperand(3).getReg() == AMDGPU::SCC);
- Inst.RemoveOperand(3);
+ Inst.removeOperand(3);
Inst.setDesc(get(NewOpc));
Inst.addOperand(MachineOperand::CreateImm(0)); // clamp bit
@@ -6467,7 +6590,7 @@ void SIInstrInfo::lowerScalarXnor(SetVectorType &Worklist,
// Using the identity !(x ^ y) == (!x ^ y) == (x ^ !y), we can
// invert either source and then perform the XOR. If either source is a
// scalar register, then we can leave the inversion on the scalar unit to
- // acheive a better distrubution of scalar and vector instructions.
+ // achieve a better distribution of scalar and vector instructions.
bool Src0IsSGPR = Src0.isReg() &&
RI.isSGPRClass(MRI.getRegClass(Src0.getReg()));
bool Src1IsSGPR = Src1.isReg() &&
@@ -6689,7 +6812,7 @@ void SIInstrInfo::splitScalar64BitAddSub(SetVectorType &Worklist,
legalizeOperands(*LoHalf, MDT);
legalizeOperands(*HiHalf, MDT);
- // Move all users of this moved vlaue.
+ // Move all users of this moved value.
addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
}
@@ -6753,7 +6876,7 @@ void SIInstrInfo::splitScalar64BitBinaryOp(SetVectorType &Worklist,
Worklist.insert(&LoHalf);
Worklist.insert(&HiHalf);
- // Move all users of this moved vlaue.
+ // Move all users of this moved value.
addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
}
@@ -6831,7 +6954,7 @@ void SIInstrInfo::splitScalar64BitBCNT(
MRI.replaceRegWith(Dest.getReg(), ResultReg);
- // We don't need to legalize operands here. src0 for etiher instruction can be
+ // We don't need to legalize operands here. src0 for either instruction can be
// an SGPR, and the second input is unused or determined here.
addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
}
@@ -6973,6 +7096,17 @@ void SIInstrInfo::movePackToVALU(SetVectorType &Worklist,
.add(Src1);
break;
}
+ case AMDGPU::S_PACK_HL_B32_B16: {
+ Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHRREV_B32_e64), TmpReg)
+ .addImm(16)
+ .add(Src0);
+ BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHL_OR_B32_e64), ResultReg)
+ .add(Src1)
+ .addImm(16)
+ .addReg(TmpReg, RegState::Kill);
+ break;
+ }
case AMDGPU::S_PACK_HH_B32_B16: {
Register ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
@@ -7045,7 +7179,7 @@ void SIInstrInfo::addSCCDefsToVALUWorklist(MachineOperand &Op,
assert(Op.isReg() && Op.getReg() == AMDGPU::SCC && Op.isUse());
MachineInstr *SCCUseInst = Op.getParent();
- // Look for a preceeding instruction that either defines VCC or SCC. If VCC
+ // Look for a preceding instruction that either defines VCC or SCC. If VCC
// then there is nothing to do because the defining instruction has been
// converted to a VALU already. If SCC then that instruction needs to be
// converted to a VALU.
@@ -7191,7 +7325,10 @@ MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI,
uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const {
if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
- return (AMDGPU::MTBUFFormat::UFMT_32_FLOAT << 44) |
+ int64_t Format = ST.getGeneration() >= AMDGPUSubtarget::GFX11 ?
+ AMDGPU::UfmtGFX11::UFMT_32_FLOAT :
+ AMDGPU::UfmtGFX10::UFMT_32_FLOAT;
+ return (Format << 44) |
(1ULL << 56) | // RESOURCE_LEVEL = 1
(3ULL << 60); // OOB_SELECT = 3
}
@@ -7332,7 +7469,9 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
return DescSize;
bool HasLiteral = false;
for (int I = 0, E = MI.getNumExplicitOperands(); I != E; ++I) {
- if (isLiteralConstant(MI, I)) {
+ const MachineOperand &Op = MI.getOperand(I);
+ const MCOperandInfo &OpInfo = Desc.OpInfo[I];
+ if (isLiteralConstantLike(Op, OpInfo)) {
HasLiteral = true;
break;
}
@@ -7513,6 +7652,16 @@ SIInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
return makeArrayRef(TargetFlags);
}
+ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
+SIInstrInfo::getSerializableMachineMemOperandTargetFlags() const {
+ static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
+ {
+ {MONoClobber, "amdgpu-noclobber"},
+ };
+
+ return makeArrayRef(TargetFlags);
+}
+
bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI) const {
return !MI.isTerminator() && MI.getOpcode() != AMDGPU::COPY &&
MI.modifiesRegister(AMDGPU::EXEC, &RI);
@@ -7690,6 +7839,7 @@ SIInstrInfo::splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace,
}
// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
+// and the columns of the getMCOpcodeGen table.
enum SIEncodingFamily {
SI = 0,
VI = 1,
@@ -7699,7 +7849,9 @@ enum SIEncodingFamily {
GFX9 = 5,
GFX10 = 6,
SDWA10 = 7,
- GFX90A = 8
+ GFX90A = 8,
+ GFX940 = 9,
+ GFX11 = 10,
};
static SIEncodingFamily subtargetEncodingFamily(const GCNSubtarget &ST) {
@@ -7714,6 +7866,8 @@ static SIEncodingFamily subtargetEncodingFamily(const GCNSubtarget &ST) {
return SIEncodingFamily::VI;
case AMDGPUSubtarget::GFX10:
return SIEncodingFamily::GFX10;
+ case AMDGPUSubtarget::GFX11:
+ return SIEncodingFamily::GFX11;
}
llvm_unreachable("Unknown subtarget generation!");
}
@@ -7779,6 +7933,9 @@ int SIInstrInfo::pseudoToMCOpcode(int Opcode) const {
if (ST.hasGFX90AInsts()) {
uint16_t NMCOp = (uint16_t)-1;
+ if (ST.hasGFX940Insts())
+ NMCOp = AMDGPU::getMCOpcode(Opcode, SIEncodingFamily::GFX940);
+ if (NMCOp == (uint16_t)-1)
NMCOp = AMDGPU::getMCOpcode(Opcode, SIEncodingFamily::GFX90A);
if (NMCOp == (uint16_t)-1)
NMCOp = AMDGPU::getMCOpcode(Opcode, SIEncodingFamily::GFX9);
@@ -7925,7 +8082,7 @@ bool llvm::execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI,
auto &UseInst = *Use.getParent();
// Don't bother searching between blocks, although it is possible this block
// doesn't modify exec.
- if (UseInst.getParent() != DefBB)
+ if (UseInst.getParent() != DefBB || UseInst.isPHI())
return true;
if (++NumUse > MaxUseScan)
@@ -8150,7 +8307,7 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI,
this](int64_t ExpectedValue, unsigned SrcSize,
- bool IsReversable, bool IsSigned) -> bool {
+ bool IsReversible, bool IsSigned) -> bool {
// s_cmp_eq_u32 (s_and_b32 $src, 1 << n), 1 << n => s_and_b32 $src, 1 << n
// s_cmp_eq_i32 (s_and_b32 $src, 1 << n), 1 << n => s_and_b32 $src, 1 << n
// s_cmp_ge_u32 (s_and_b32 $src, 1 << n), 1 << n => s_and_b32 $src, 1 << n
@@ -8208,7 +8365,7 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
bool IsReversedCC = false;
if (CmpValue != ExpectedValue) {
- if (!IsReversable)
+ if (!IsReversible)
return false;
IsReversedCC = CmpValue == (ExpectedValue ^ Mask);
if (!IsReversedCC)
@@ -8284,3 +8441,37 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
return false;
}
+
+void SIInstrInfo::enforceOperandRCAlignment(MachineInstr &MI,
+ unsigned OpName) const {
+ if (!ST.needsAlignedVGPRs())
+ return;
+
+ int OpNo = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpName);
+ if (OpNo < 0)
+ return;
+ MachineOperand &Op = MI.getOperand(OpNo);
+ if (getOpSize(MI, OpNo) > 4)
+ return;
+
+ // Add implicit aligned super-reg to force alignment on the data operand.
+ const DebugLoc &DL = MI.getDebugLoc();
+ MachineBasicBlock *BB = MI.getParent();
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ Register DataReg = Op.getReg();
+ bool IsAGPR = RI.isAGPR(MRI, DataReg);
+ Register Undef = MRI.createVirtualRegister(
+ IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
+ BuildMI(*BB, MI, DL, get(AMDGPU::IMPLICIT_DEF), Undef);
+ Register NewVR =
+ MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass
+ : &AMDGPU::VReg_64_Align2RegClass);
+ BuildMI(*BB, MI, DL, get(AMDGPU::REG_SEQUENCE), NewVR)
+ .addReg(DataReg, 0, Op.getSubReg())
+ .addImm(AMDGPU::sub0)
+ .addReg(Undef)
+ .addImm(AMDGPU::sub1);
+ Op.setReg(NewVR);
+ Op.setSubReg(AMDGPU::sub0);
+ MI.addOperand(MachineOperand::CreateReg(NewVR, false, true));
+}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index e551d6c7223f..311f9f68e675 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -15,6 +15,7 @@
#define LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
#include "AMDGPUMIRFormatter.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIRegisterInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/SetVector.h"
@@ -35,6 +36,11 @@ class RegScavenger;
class TargetRegisterClass;
class ScheduleHazardRecognizer;
+/// Mark the MMO of a uniform load if there are no potentially clobbering stores
+/// on any path from the start of an entry function to this load.
+static const MachineMemOperand::Flags MONoClobber =
+ MachineMemOperand::MOTargetFlag1;
+
class SIInstrInfo final : public AMDGPUGenInstrInfo {
private:
const SIRegisterInfo RI;
@@ -323,15 +329,14 @@ public:
Register SrcReg2, int64_t CmpMask, int64_t CmpValue,
const MachineRegisterInfo *MRI) const override;
- unsigned getAddressSpaceForPseudoSourceKind(
- unsigned Kind) const override;
-
bool
areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
const MachineInstr &MIb) const override;
static bool isFoldableCopy(const MachineInstr &MI);
+ void removeModOperands(MachineInstr &MI) const;
+
bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg,
MachineRegisterInfo *MRI) const final;
@@ -549,6 +554,14 @@ public:
return MI.getDesc().TSFlags & SIInstrFlags::EXP;
}
+ static bool isDualSourceBlendEXP(const MachineInstr &MI) {
+ if (!isEXP(MI))
+ return false;
+ unsigned Target = MI.getOperand(0).getImm();
+ return Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND0 ||
+ Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND1;
+ }
+
bool isEXP(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::EXP;
}
@@ -651,14 +664,43 @@ public:
return get(Opcode).TSFlags & SIInstrFlags::IsMAI;
}
+ static bool isMFMA(const MachineInstr &MI) {
+ return isMAI(MI) && MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
+ MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64;
+ }
+
static bool isDOT(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::IsDOT;
}
+ static bool isWMMA(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::IsWMMA;
+ }
+
+ bool isWMMA(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::IsWMMA;
+ }
+
bool isDOT(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::IsDOT;
}
+ static bool isLDSDIR(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::LDSDIR;
+ }
+
+ bool isLDSDIR(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::LDSDIR;
+ }
+
+ static bool isVINTERP(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::VINTERP;
+ }
+
+ bool isVINTERP(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::VINTERP;
+ }
+
static bool isScalarUnit(const MachineInstr &MI) {
return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD);
}
@@ -1036,6 +1078,9 @@ public:
ArrayRef<std::pair<unsigned, const char *>>
getSerializableDirectMachineOperandTargetFlags() const override;
+ ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
+ getSerializableMachineMemOperandTargetFlags() const override;
+
ScheduleHazardRecognizer *
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
const ScheduleDAG *DAG) const override;
@@ -1132,6 +1177,11 @@ public:
static unsigned getDSShaderTypeValue(const MachineFunction &MF);
const TargetSchedModel &getSchedModel() const { return SchedModel; }
+
+ // Enforce operand's \p OpName even alignment if required by target.
+ // This is used if an operand is a 32 bit register but needs to be aligned
+ // regardless.
+ void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const;
};
/// \brief Returns true if a reg:subreg pair P has a TRC class
@@ -1210,9 +1260,6 @@ namespace AMDGPU {
int getIfAddr64Inst(uint16_t Opcode);
LLVM_READONLY
- int getMUBUFNoLdsInst(uint16_t Opcode);
-
- LLVM_READONLY
int getAtomicNoRetOp(uint16_t Opcode);
LLVM_READONLY
@@ -1236,6 +1283,11 @@ namespace AMDGPU {
LLVM_READONLY
int getFlatScratchInstSTfromSS(uint16_t Opcode);
+ /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode
+ /// of an SVS (SADDR + VADDR) form.
+ LLVM_READONLY
+ int getFlatScratchInstSVfromSVS(uint16_t Opcode);
+
/// \returns SS (SADDR) form of a FLAT Scratch instruction given an \p Opcode
/// of an SV (VADDR) form.
LLVM_READONLY
@@ -1250,6 +1302,10 @@ namespace AMDGPU {
LLVM_READONLY
int getMFMAEarlyClobberOp(uint16_t Opcode);
+ /// \returns v_cmpx version of a v_cmp instruction.
+ LLVM_READONLY
+ int getVCMPXOpFromVCMP(uint16_t Opcode);
+
const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19);
const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 713a08907e99..29ee9f12b12d 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1,4 +1,4 @@
-//===-- SIInstrInfo.td - SI Instruction Infos -------------*- tablegen -*--===//
+//===-- SIInstrInfo.td -----------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -17,7 +17,8 @@ class GCNPredicateControl : PredicateControl {
}
// Except for the NONE field, this must be kept in sync with the
-// SIEncodingFamily enum in AMDGPUInstrInfo.cpp
+// SIEncodingFamily enum in SIInstrInfo.cpp and the columns of the
+// getMCOpcodeGen table.
def SIEncodingFamily {
int NONE = -1;
int SI = 0;
@@ -29,6 +30,8 @@ def SIEncodingFamily {
int GFX10 = 6;
int SDWA10 = 7;
int GFX90A = 8;
+ int GFX940 = 9;
+ int GFX11 = 10;
}
//===----------------------------------------------------------------------===//
@@ -190,6 +193,44 @@ def SIbuffer_atomic_fadd : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FADD">;
def SIbuffer_atomic_fmin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMIN">;
def SIbuffer_atomic_fmax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMAX">;
+multiclass SDBufferAtomicRetNoRet {
+ def "_ret" : PatFrag<
+ (ops node:$vdata_in, node:$rsrc, node:$vindex, node:$voffset, node:$soffset,
+ node:$offset, node:$cachepolicy, node:$idxen),
+ (!cast<SDNode>(NAME) node:$vdata_in, node:$rsrc, node:$vindex,
+ node:$voffset, node:$soffset, node:$offset, node:$cachepolicy,
+ node:$idxen)> {
+ let PredicateCode = [{ return !(SDValue(N, 0).use_empty()); }];
+ let GISelPredicateCode = [{ return true; }];
+ }
+
+ def "_noret" : PatFrag<
+ (ops node:$vdata_in, node:$rsrc, node:$vindex, node:$voffset, node:$soffset,
+ node:$offset, node:$cachepolicy, node:$idxen),
+ (!cast<SDNode>(NAME) node:$vdata_in, node:$rsrc, node:$vindex,
+ node:$voffset, node:$soffset, node:$offset, node:$cachepolicy,
+ node:$idxen)> {
+ let PredicateCode = [{ return SDValue(N, 0).use_empty(); }];
+ let GISelPredicateCode = [{ return false; }];
+ }
+}
+
+defm SIbuffer_atomic_swap : SDBufferAtomicRetNoRet;
+defm SIbuffer_atomic_add : SDBufferAtomicRetNoRet;
+defm SIbuffer_atomic_sub : SDBufferAtomicRetNoRet;
+defm SIbuffer_atomic_smin : SDBufferAtomicRetNoRet;
+defm SIbuffer_atomic_umin : SDBufferAtomicRetNoRet;
+defm SIbuffer_atomic_smax : SDBufferAtomicRetNoRet;
+defm SIbuffer_atomic_umax : SDBufferAtomicRetNoRet;
+defm SIbuffer_atomic_and : SDBufferAtomicRetNoRet;
+defm SIbuffer_atomic_or : SDBufferAtomicRetNoRet;
+defm SIbuffer_atomic_xor : SDBufferAtomicRetNoRet;
+defm SIbuffer_atomic_inc : SDBufferAtomicRetNoRet;
+defm SIbuffer_atomic_dec : SDBufferAtomicRetNoRet;
+defm SIbuffer_atomic_fadd : SDBufferAtomicRetNoRet;
+defm SIbuffer_atomic_fmin : SDBufferAtomicRetNoRet;
+defm SIbuffer_atomic_fmax : SDBufferAtomicRetNoRet;
+
def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP",
SDTypeProfile<1, 9,
[SDTCisVT<0, i32>, // dst
@@ -205,6 +246,26 @@ def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP",
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
>;
+def SIbuffer_atomic_cmpswap_ret : PatFrag<
+ (ops node:$src, node:$cmp, node:$rsrc, node:$vindex, node:$voffset,
+ node:$soffset, node:$offset, node:$cachepolicy, node:$idxen),
+ (SIbuffer_atomic_cmpswap node:$src, node:$cmp, node:$rsrc, node:$vindex,
+ node:$voffset, node:$soffset, node:$offset, node:$cachepolicy,
+ node:$idxen)> {
+ let PredicateCode = [{ return !(SDValue(N, 0).use_empty()); }];
+ let GISelPredicateCode = [{ return true; }];
+}
+
+def SIbuffer_atomic_cmpswap_noret : PatFrag<
+ (ops node:$src, node:$cmp, node:$rsrc, node:$vindex, node:$voffset,
+ node:$soffset, node:$offset, node:$cachepolicy, node:$idxen),
+ (SIbuffer_atomic_cmpswap node:$src, node:$cmp, node:$rsrc, node:$vindex,
+ node:$voffset, node:$soffset, node:$offset, node:$cachepolicy,
+ node:$idxen)> {
+ let PredicateCode = [{ return SDValue(N, 0).use_empty(); }];
+ let GISelPredicateCode = [{ return false; }];
+}
+
class SDGlobalAtomicNoRtn<string opcode, ValueType ty> : SDNode <opcode,
SDTypeProfile<0, 2,
[SDTCisPtrTy<0>, // vaddr
@@ -255,35 +316,57 @@ def SIdenorm_mode : SDNode<"AMDGPUISD::DENORM_MODE",
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]
>;
+def SIfptrunc_round_upward : SDNode<"AMDGPUISD::FPTRUNC_ROUND_UPWARD",
+ SDTFPRoundOp
+>;
+
+def SIfptrunc_round_downward : SDNode<"AMDGPUISD::FPTRUNC_ROUND_DOWNWARD",
+ SDTFPRoundOp
+>;
+
//===----------------------------------------------------------------------===//
// ValueType helpers
//===----------------------------------------------------------------------===//
// Returns 1 if the source arguments have modifiers, 0 if they do not.
-// XXX - do f16 instructions?
class isFloatType<ValueType SrcVT> {
bit ret = !or(!eq(SrcVT.Value, f16.Value),
!eq(SrcVT.Value, f32.Value),
!eq(SrcVT.Value, f64.Value),
!eq(SrcVT.Value, v2f16.Value),
!eq(SrcVT.Value, v4f16.Value),
+ !eq(SrcVT.Value, v8f16.Value),
+ !eq(SrcVT.Value, v16f16.Value),
!eq(SrcVT.Value, v2f32.Value),
+ !eq(SrcVT.Value, v4f32.Value),
+ !eq(SrcVT.Value, v8f32.Value),
!eq(SrcVT.Value, v2f64.Value),
!eq(SrcVT.Value, v4f64.Value));
}
+// XXX - do v2i16 instructions?
class isIntType<ValueType SrcVT> {
bit ret = !or(!eq(SrcVT.Value, i16.Value),
!eq(SrcVT.Value, i32.Value),
!eq(SrcVT.Value, i64.Value),
- !eq(SrcVT.Value, v2i32.Value));
+ !eq(SrcVT.Value, v4i16.Value),
+ !eq(SrcVT.Value, v8i16.Value),
+ !eq(SrcVT.Value, v16i16.Value),
+ !eq(SrcVT.Value, v2i32.Value),
+ !eq(SrcVT.Value, v4i32.Value),
+ !eq(SrcVT.Value, v8i32.Value));
}
class isPackedType<ValueType SrcVT> {
bit ret = !or(!eq(SrcVT.Value, v2i16.Value),
!eq(SrcVT.Value, v2f16.Value),
!eq(SrcVT.Value, v4f16.Value),
- !eq(SrcVT.Value, v2f32.Value));
+ !eq(SrcVT.Value, v2i32.Value),
+ !eq(SrcVT.Value, v2f32.Value),
+ !eq(SrcVT.Value, v4i32.Value),
+ !eq(SrcVT.Value, v4f32.Value),
+ !eq(SrcVT.Value, v8i32.Value),
+ !eq(SrcVT.Value, v8f32.Value));
}
@@ -291,19 +374,10 @@ class isPackedType<ValueType SrcVT> {
// PatFrags for global memory operations
//===----------------------------------------------------------------------===//
-foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in {
-let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
-
-
-defm atomic_inc_#as : binary_atomic_op<SIatomic_inc>;
-defm atomic_dec_#as : binary_atomic_op<SIatomic_dec>;
-defm atomic_load_fmin_#as : binary_atomic_op<SIatomic_fmin, 0>;
-defm atomic_load_fmax_#as : binary_atomic_op<SIatomic_fmax, 0>;
-
-
-} // End let AddressSpaces = ...
-} // End foreach AddrSpace
-
+defm atomic_inc : binary_atomic_op_all_as<SIatomic_inc>;
+defm atomic_dec : binary_atomic_op_all_as<SIatomic_dec>;
+defm atomic_load_fmin : binary_atomic_op_all_as<SIatomic_fmin, 0>;
+defm atomic_load_fmax : binary_atomic_op_all_as<SIatomic_fmax, 0>;
//===----------------------------------------------------------------------===//
// SDNodes PatFrags for loads/stores with a glue input.
@@ -408,50 +482,36 @@ def load_local_m0 : PatFrag<(ops node:$ptr), (load_glue node:$ptr)> {
let IsNonExtLoad = 1;
}
-let MemoryVT = i8 in {
def extloadi8_local_m0 : PatFrag<(ops node:$ptr), (extloadi8_glue node:$ptr)>;
def sextloadi8_local_m0 : PatFrag<(ops node:$ptr), (sextloadi8_glue node:$ptr)>;
def zextloadi8_local_m0 : PatFrag<(ops node:$ptr), (zextloadi8_glue node:$ptr)>;
-}
-let MemoryVT = i16 in {
def extloadi16_local_m0 : PatFrag<(ops node:$ptr), (extloadi16_glue node:$ptr)>;
def sextloadi16_local_m0 : PatFrag<(ops node:$ptr), (sextloadi16_glue node:$ptr)>;
def zextloadi16_local_m0 : PatFrag<(ops node:$ptr), (zextloadi16_glue node:$ptr)>;
-}
+} // End IsLoad = 1, , AddressSpaces = LoadAddress_local.AddrSpaces
def load_align8_local_m0 : PatFrag<(ops node:$ptr),
- (load_local_m0 node:$ptr)>, Aligned<8> {
+ (load_local_m0 node:$ptr)> {
let IsLoad = 1;
- let IsNonExtLoad = 1;
+ int MinAlignment = 8;
}
def load_align16_local_m0 : PatFrag<(ops node:$ptr),
- (load_local_m0 node:$ptr)>, Aligned<16> {
+ (load_local_m0 node:$ptr)> {
let IsLoad = 1;
- let IsNonExtLoad = 1;
+ int MinAlignment = 16;
}
-} // End IsLoad = 1
-
let IsAtomic = 1, AddressSpaces = LoadAddress_local.AddrSpaces in {
def atomic_load_8_local_m0 : PatFrag<(ops node:$ptr),
- (atomic_load_8_glue node:$ptr)> {
- let MemoryVT = i8;
-}
+ (atomic_load_8_glue node:$ptr)>;
def atomic_load_16_local_m0 : PatFrag<(ops node:$ptr),
- (atomic_load_16_glue node:$ptr)> {
- let MemoryVT = i16;
-}
+ (atomic_load_16_glue node:$ptr)>;
def atomic_load_32_local_m0 : PatFrag<(ops node:$ptr),
- (atomic_load_32_glue node:$ptr)> {
- let MemoryVT = i32;
-}
+ (atomic_load_32_glue node:$ptr)>;
def atomic_load_64_local_m0 : PatFrag<(ops node:$ptr),
- (atomic_load_64_glue node:$ptr)> {
- let MemoryVT = i64;
-}
-
+ (atomic_load_64_glue node:$ptr)>;
} // End let AddressSpaces = LoadAddress_local.AddrSpaces
@@ -485,75 +545,103 @@ def truncstorei8_glue : PatFrag<(ops node:$val, node:$ptr),
(truncstore_glue node:$val, node:$ptr)> {
let IsStore = 1;
let MemoryVT = i8;
+ let IsTruncStore = 1;
}
def truncstorei16_glue : PatFrag<(ops node:$val, node:$ptr),
(truncstore_glue node:$val, node:$ptr)> {
let IsStore = 1;
let MemoryVT = i16;
+ let IsTruncStore = 1;
}
let IsStore = 1, AddressSpaces = StoreAddress_local.AddrSpaces in {
def store_local_m0 : PatFrag<(ops node:$val, node:$ptr),
- (store_glue node:$val, node:$ptr)> {
- let IsStore = 1;
- let IsTruncStore = 0;
-}
-
+ (store_glue node:$val, node:$ptr)>;
def truncstorei8_local_m0 : PatFrag<(ops node:$val, node:$ptr),
- (unindexedstore_glue node:$val, node:$ptr)> {
- let IsStore = 1;
- let MemoryVT = i8;
-}
-
+ (truncstorei8_glue node:$val, node:$ptr)>;
def truncstorei16_local_m0 : PatFrag<(ops node:$val, node:$ptr),
- (unindexedstore_glue node:$val, node:$ptr)> {
- let IsStore = 1;
- let MemoryVT = i16;
-}
+ (truncstorei16_glue node:$val, node:$ptr)>;
}
def store_align8_local_m0 : PatFrag <(ops node:$value, node:$ptr),
(store_local_m0 node:$value, node:$ptr)>,
Aligned<8> {
let IsStore = 1;
- let IsTruncStore = 0;
}
def store_align16_local_m0 : PatFrag <(ops node:$value, node:$ptr),
(store_local_m0 node:$value, node:$ptr)>,
Aligned<16> {
let IsStore = 1;
+}
+
+let PredicateCode = [{return cast<MemSDNode>(N)->getAlignment() < 4;}],
+ GISelPredicateCode = [{return (*MI.memoperands_begin())->getAlign() < 4;}],
+ AddressSpaces = [ AddrSpaces.Local ] in {
+def load_align_less_than_4_local : PatFrag<(ops node:$ptr),
+ (load_local node:$ptr)> {
+ let IsLoad = 1;
+ let IsNonExtLoad = 1;
+}
+
+def load_align_less_than_4_local_m0 : PatFrag<(ops node:$ptr),
+ (load_local_m0 node:$ptr)> {
+ let IsLoad = 1;
+ let IsNonExtLoad = 1;
+}
+
+def store_align_less_than_4_local : PatFrag <(ops node:$value, node:$ptr),
+ (store_local node:$value, node:$ptr)> {
+ let IsStore = 1;
let IsTruncStore = 0;
}
-let AddressSpaces = StoreAddress_local.AddrSpaces in {
+def store_align_less_than_4_local_m0 : PatFrag <(ops node:$value, node:$ptr),
+ (store_local_m0 node:$value, node:$ptr)> {
+ let IsStore = 1;
+ let IsTruncStore = 0;
+}
+}
-def atomic_store_local_8_m0 : PatFrag <
- (ops node:$value, node:$ptr),
- (AMDGPUatomic_st_glue node:$value, node:$ptr)> {
+def atomic_store_8_glue : PatFrag <
+ (ops node:$ptr, node:$value),
+ (AMDGPUatomic_st_glue node:$ptr, node:$value)> {
let IsAtomic = 1;
let MemoryVT = i8;
}
-def atomic_store_local_16_m0 : PatFrag <
- (ops node:$value, node:$ptr),
- (AMDGPUatomic_st_glue node:$value, node:$ptr)> {
+
+def atomic_store_16_glue : PatFrag <
+ (ops node:$ptr, node:$value),
+ (AMDGPUatomic_st_glue node:$ptr, node:$value)> {
let IsAtomic = 1;
let MemoryVT = i16;
}
-def atomic_store_local_32_m0 : PatFrag <
- (ops node:$value, node:$ptr),
- (AMDGPUatomic_st_glue node:$value, node:$ptr)> {
+
+def atomic_store_32_glue : PatFrag <
+ (ops node:$ptr, node:$value),
+ (AMDGPUatomic_st_glue node:$ptr, node:$value)> {
let IsAtomic = 1;
let MemoryVT = i32;
}
-def atomic_store_local_64_m0 : PatFrag <
- (ops node:$value, node:$ptr),
- (AMDGPUatomic_st_glue node:$value, node:$ptr)> {
+
+def atomic_store_64_glue : PatFrag <
+ (ops node:$ptr, node:$value),
+ (AMDGPUatomic_st_glue node:$ptr, node:$value)> {
let IsAtomic = 1;
let MemoryVT = i64;
}
-} // End let AddressSpaces = StoreAddress_local.AddrSpaces
+
+let IsAtomic = 1, AddressSpaces = StoreAddress_local.AddrSpaces in {
+def atomic_store_8_local_m0 : PatFrag<(ops node:$ptr, node:$val),
+ (atomic_store_8_glue node:$ptr, node:$val)>;
+def atomic_store_16_local_m0 : PatFrag<(ops node:$ptr, node:$val),
+ (atomic_store_16_glue node:$ptr, node:$val)>;
+def atomic_store_32_local_m0 : PatFrag<(ops node:$ptr, node:$val),
+ (atomic_store_32_glue node:$ptr, node:$val)>;
+def atomic_store_64_local_m0 : PatFrag<(ops node:$ptr, node:$val),
+ (atomic_store_64_glue node:$ptr, node:$val)>;
+} // End let IsAtomic = 1, AddressSpaces = StoreAddress_local.AddrSpaces
def si_setcc_uniform : PatFrag <
@@ -686,10 +774,14 @@ multiclass SIAtomicM0Glue2 <string op_name, bit is_amdgpu = 0,
let AddressSpaces = StoreAddress_local.AddrSpaces in {
defm _local_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>;
+ defm _local_m0 : ret_noret_binary_atomic_op <!cast<SDNode>(NAME#"_glue"),
+ IsInt>;
}
let AddressSpaces = StoreAddress_region.AddrSpaces in {
defm _region_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>;
+ defm _region_m0 : ret_noret_binary_atomic_op <!cast<SDNode>(NAME#"_glue"),
+ IsInt>;
}
}
@@ -954,6 +1046,18 @@ def SWaitMatchClass : AsmOperandClass {
let ParserMethod = "parseSWaitCntOps";
}
+def DepCtrMatchClass : AsmOperandClass {
+ let Name = "DepCtr";
+ let RenderMethod = "addImmOperands";
+ let ParserMethod = "parseDepCtrOps";
+}
+
+def SDelayMatchClass : AsmOperandClass {
+ let Name = "SDelayAlu";
+ let RenderMethod = "addImmOperands";
+ let ParserMethod = "parseSDelayAluOps";
+}
+
def VReg32OrOffClass : AsmOperandClass {
let Name = "VReg32OrOff";
let ParserMethod = "parseVReg32OrOff";
@@ -979,6 +1083,16 @@ def WAIT_FLAG : Operand <i32> {
let ParserMatchClass = SWaitMatchClass;
let PrintMethod = "printWaitFlag";
}
+
+def DepCtrImm : Operand <i32> {
+ let ParserMatchClass = DepCtrMatchClass;
+ let PrintMethod = "printDepCtr";
+}
+
+def DELAY_FLAG : Operand <i32> {
+ let ParserMatchClass = SDelayMatchClass;
+ let PrintMethod = "printDelayFlag";
+}
} // End OperandType = "OPERAND_IMMEDIATE"
include "SIInstrFormats.td"
@@ -1163,14 +1277,6 @@ def FORMAT : NamedOperandU8<"FORMAT", NamedMatchClass<"FORMAT", 0>>;
def DMask : NamedOperandU16<"DMask", NamedMatchClass<"DMask">>;
def Dim : NamedOperandU8<"Dim", NamedMatchClass<"Dim", 0>>;
-def dpp8 : NamedOperandU32<"DPP8", NamedMatchClass<"DPP8", 0>>;
-
-def dpp_ctrl : NamedOperandU32<"DPPCtrl", NamedMatchClass<"DPPCtrl", 0>>;
-def row_mask : NamedOperandU32<"RowMask", NamedMatchClass<"RowMask">>;
-def bank_mask : NamedOperandU32<"BankMask", NamedMatchClass<"BankMask">>;
-def bound_ctrl : NamedOperandBit<"BoundCtrl", NamedMatchClass<"BoundCtrl">>;
-def FI : NamedOperandU32<"FI", NamedMatchClass<"FI">>;
-
def dst_sel : NamedOperandU32<"SDWADstSel", NamedMatchClass<"SDWADstSel">>;
def src0_sel : NamedOperandU32<"SDWASrc0Sel", NamedMatchClass<"SDWASrc0Sel">>;
def src1_sel : NamedOperandU32<"SDWASrc1Sel", NamedMatchClass<"SDWASrc1Sel">>;
@@ -1181,6 +1287,14 @@ def op_sel_hi0 : NamedOperandU32Default0<"OpSelHi", NamedMatchClass<"OpSelHi">>;
def neg_lo0 : NamedOperandU32Default0<"NegLo", NamedMatchClass<"NegLo">>;
def neg_hi0 : NamedOperandU32Default0<"NegHi", NamedMatchClass<"NegHi">>;
+def dpp8 : NamedOperandU32<"DPP8", NamedMatchClass<"DPP8", 0>>;
+def dpp_ctrl : NamedOperandU32<"DPPCtrl", NamedMatchClass<"DPPCtrl", 0>>;
+
+def row_mask : NamedOperandU32<"RowMask", NamedMatchClass<"RowMask">>;
+def bank_mask : NamedOperandU32<"BankMask", NamedMatchClass<"BankMask">>;
+def bound_ctrl : NamedOperandBit<"BoundCtrl", NamedMatchClass<"BoundCtrl">>;
+def FI : NamedOperandU32<"FI", NamedMatchClass<"FI">>;
+
def blgp : NamedOperandU32<"BLGP", NamedMatchClass<"BLGP">>;
def cbsz : NamedOperandU32<"CBSZ", NamedMatchClass<"CBSZ">>;
def abid : NamedOperandU32<"ABID", NamedMatchClass<"ABID">>;
@@ -1191,6 +1305,9 @@ def exp_tgt : NamedOperandU32<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> {
}
+def wait_vdst : NamedOperandU8<"WaitVDST", NamedMatchClass<"WaitVDST">>;
+def wait_exp : NamedOperandU8<"WaitEXP", NamedMatchClass<"WaitEXP">>;
+
} // End OperandType = "OPERAND_IMMEDIATE"
class KImmMatchClass<int size> : AsmOperandClass {
@@ -1223,10 +1340,18 @@ class FPInputModsMatchClass <int opSize> : AsmOperandClass {
let PredicateMethod = "isRegOrImmWithFP"#opSize#"InputMods";
}
+class FPVCSrcInputModsMatchClass <int opSize> : FPInputModsMatchClass <opSize> {
+ let Name = "RegOrInlineImmWithFP"#opSize#"InputMods";
+ let PredicateMethod = "isRegOrInlineImmWithFP"#opSize#"InputMods";
+}
+
def FP16InputModsMatchClass : FPInputModsMatchClass<16>;
def FP32InputModsMatchClass : FPInputModsMatchClass<32>;
def FP64InputModsMatchClass : FPInputModsMatchClass<64>;
+def FP16VCSrcInputModsMatchClass : FPVCSrcInputModsMatchClass<16>;
+def FP32VCSrcInputModsMatchClass : FPVCSrcInputModsMatchClass<32>;
+
class InputMods <AsmOperandClass matchClass> : Operand <i32> {
let OperandNamespace = "AMDGPU";
let OperandType = "OPERAND_INPUT_MODS";
@@ -1241,19 +1366,28 @@ def FP16InputMods : FPInputMods<FP16InputModsMatchClass>;
def FP32InputMods : FPInputMods<FP32InputModsMatchClass>;
def FP64InputMods : FPInputMods<FP64InputModsMatchClass>;
+def FP16VCSrcInputMods : FPInputMods<FP16VCSrcInputModsMatchClass>;
+def FP32VCSrcInputMods : FPInputMods<FP32VCSrcInputModsMatchClass>;
+
class IntInputModsMatchClass <int opSize> : AsmOperandClass {
let Name = "RegOrImmWithInt"#opSize#"InputMods";
let ParserMethod = "parseRegOrImmWithIntInputMods";
let PredicateMethod = "isRegOrImmWithInt"#opSize#"InputMods";
}
+class IntVCSrcInputModsMatchClass <int opSize> : IntInputModsMatchClass <opSize> {
+ let Name = "RegOrInlineImmWithInt"#opSize#"InputMods";
+ let PredicateMethod = "isRegOrInlineImmWithInt"#opSize#"InputMods";
+}
def Int32InputModsMatchClass : IntInputModsMatchClass<32>;
def Int64InputModsMatchClass : IntInputModsMatchClass<64>;
+def Int32VCSrcInputModsMatchClass : IntVCSrcInputModsMatchClass<32>;
class IntInputMods <IntInputModsMatchClass matchClass> : InputMods <matchClass> {
let PrintMethod = "printOperandAndIntInputMods";
}
def Int32InputMods : IntInputMods<Int32InputModsMatchClass>;
def Int64InputMods : IntInputMods<Int64InputModsMatchClass>;
+def Int32VCSrcInputMods : IntInputMods<Int32VCSrcInputModsMatchClass>;
class OpSelModsMatchClass : AsmOperandClass {
let Name = "OpSelMods";
@@ -1366,12 +1500,19 @@ def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">;
def VOP3PMods : ComplexPattern<untyped, 2, "SelectVOP3PMods">;
+def VOP3PModsDOT : ComplexPattern<untyped, 2, "SelectVOP3PModsDOT">;
+def DotIUVOP3PMods : ComplexPattern<untyped, 1, "SelectDotIUVOP3PMods">;
+def WMMAOpSelVOP3PMods : ComplexPattern<untyped, 1, "SelectWMMAOpSelVOP3PMods">;
+
def VOP3OpSel : ComplexPattern<untyped, 2, "SelectVOP3OpSel">;
def VOP3OpSelMods : ComplexPattern<untyped, 2, "SelectVOP3OpSelMods">;
def VOP3PMadMixMods : ComplexPattern<untyped, 2, "SelectVOP3PMadMixMods">;
+def VINTERPMods : ComplexPattern<untyped, 2, "SelectVINTERPMods">;
+def VINTERPModsHi : ComplexPattern<untyped, 2, "SelectVINTERPModsHi">;
+
//===----------------------------------------------------------------------===//
// SI assembler operands
//===----------------------------------------------------------------------===//
@@ -1575,6 +1716,19 @@ class getVOP3SrcForVT<ValueType VT> {
);
}
+// Src2 of VOP3 DPP instructions cannot be a literal
+class getVOP3DPPSrcForVT<ValueType VT> {
+ bit isFP = isFloatType<VT>.ret;
+ RegisterOperand ret =
+ !if (!eq(VT.Value, i1.Value), SSrc_i1,
+ !if (isFP,
+ !if (!eq(VT.Value, f16.Value), VCSrc_f16,
+ !if (!eq(VT.Value, v2f16.Value), VCSrc_v2f16, VCSrc_f32)),
+ !if (!eq(VT.Value, i16.Value), VCSrc_b16,
+ !if (!eq(VT.Value, v2i16.Value), VCSrc_v2b16,
+ VCSrc_b32))));
+}
+
// Float or packed int
class isModifierType<ValueType SrcVT> {
bit ret = !or(!eq(SrcVT.Value, f16.Value),
@@ -1583,7 +1737,17 @@ class isModifierType<ValueType SrcVT> {
!eq(SrcVT.Value, v2f16.Value),
!eq(SrcVT.Value, v2i16.Value),
!eq(SrcVT.Value, v2f32.Value),
- !eq(SrcVT.Value, v2i32.Value));
+ !eq(SrcVT.Value, v2i32.Value),
+ !eq(SrcVT.Value, v4f16.Value),
+ !eq(SrcVT.Value, v4i16.Value),
+ !eq(SrcVT.Value, v4f32.Value),
+ !eq(SrcVT.Value, v4i32.Value),
+ !eq(SrcVT.Value, v8f16.Value),
+ !eq(SrcVT.Value, v8i16.Value),
+ !eq(SrcVT.Value, v8f32.Value),
+ !eq(SrcVT.Value, v8i32.Value),
+ !eq(SrcVT.Value, v16f16.Value),
+ !eq(SrcVT.Value, v16i16.Value));
}
// Return type of input modifiers operand for specified input operand
@@ -1611,6 +1775,17 @@ class getSrcModDPP <ValueType VT> {
Operand ret = !if(isFP, FPVRegInputMods, IntVRegInputMods);
}
+// Return type of input modifiers operand for specified input operand for DPP
+class getSrcModVOP3DPP <ValueType VT, bit EnableF32SrcMods> {
+ bit isFP = isFloatType<VT>.ret;
+ bit isPacked = isPackedType<VT>.ret;
+ Operand ret =
+ !if (isFP,
+ !if (!eq(VT.Value, f16.Value), FP16VCSrcInputMods,
+ FP32VCSrcInputMods),
+ !if (EnableF32SrcMods, FP32VCSrcInputMods, Int32VCSrcInputMods));
+}
+
// Return type of input modifiers operand specified input operand for SDWA
class getSrcModSDWA <ValueType VT> {
Operand ret = !if(!eq(VT.Value, f16.Value), FP16SDWAInputMods,
@@ -1620,7 +1795,7 @@ class getSrcModSDWA <ValueType VT> {
}
// Returns the input arguments for VOP[12C] instructions for the given SrcVT.
-class getIns32 <RegisterOperand Src0RC, RegisterClass Src1RC, int NumSrcArgs> {
+class getIns32 <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs> {
dag ret = !if(!eq(NumSrcArgs, 1), (ins Src0RC:$src0), // VOP1
!if(!eq(NumSrcArgs, 2), (ins Src0RC:$src0, Src1RC:$src1), // VOP2
(ins)));
@@ -1715,19 +1890,21 @@ class getInsVOP3Base<RegisterOperand Src0RC, RegisterOperand Src1RC,
HasClamp, HasModifiers, HasSrc2Mods, HasOMod,
Src0Mod, Src1Mod, Src2Mod>.ret;
dag opsel = (ins op_sel0:$op_sel);
- dag vop3pFields = (ins op_sel_hi0:$op_sel_hi, neg_lo0:$neg_lo, neg_hi0:$neg_hi);
+ dag vop3pOpsel = (ins op_sel_hi0:$op_sel_hi);
+ dag vop3pFields = !con(!if(HasOpSel, vop3pOpsel, (ins)), (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi));
+
dag ret = !con(base,
!if(HasOpSel, opsel,(ins)),
!if(IsVOP3P, vop3pFields,(ins)));
}
class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC,
- RegisterOperand Src2RC, int NumSrcArgs, bit HasClamp,
+ RegisterOperand Src2RC, int NumSrcArgs, bit HasClamp, bit HasOpSel,
Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
dag ret = getInsVOP3Base<Src0RC, Src1RC, Src2RC, NumSrcArgs,
HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/,
0/*HasOMod*/, Src0Mod, Src1Mod, Src2Mod,
- 1/*HasOpSel*/, 1/*IsVOP3P*/>.ret;
+ HasOpSel, 1/*IsVOP3P*/>.ret;
}
class getInsVOP3OpSel <RegisterOperand Src0RC, RegisterOperand Src1RC,
@@ -1741,8 +1918,8 @@ class getInsVOP3OpSel <RegisterOperand Src0RC, RegisterOperand Src1RC,
}
class getInsDPPBase <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC,
- int NumSrcArgs, bit HasModifiers,
- Operand Src0Mod, Operand Src1Mod> {
+ RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers,
+ Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
dag ret = !if (!eq(NumSrcArgs, 0),
// VOP1 without input operands (V_NOP)
@@ -1756,6 +1933,7 @@ class getInsDPPBase <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass
// VOP1_DPP without modifiers
(ins OldRC:$old, Src0RC:$src0)
/* endif */),
+ !if (!eq(NumSrcArgs, 2),
!if (HasModifiers,
// VOP2_DPP with modifiers
(ins OldRC:$old,
@@ -1765,34 +1943,72 @@ class getInsDPPBase <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass
// VOP2_DPP without modifiers
(ins OldRC:$old,
Src0RC:$src0, Src1RC:$src1)
- )));
+ )
+ /* NumSrcArgs == 3, VOP3 */,
+ !if (HasModifiers,
+ // VOP3_DPP with modifiers
+ (ins OldRC:$old,
+ Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ Src2Mod:$src2_modifiers, Src2RC:$src2)
+ /* else */,
+ // VOP3_DPP without modifiers
+ (ins OldRC:$old,
+ Src0RC:$src0, Src1RC:$src1,
+ Src2RC:$src2)
+ )
+ /* endif */)));
}
class getInsDPP <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC,
- int NumSrcArgs, bit HasModifiers,
- Operand Src0Mod, Operand Src1Mod> {
- dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, NumSrcArgs,
- HasModifiers, Src0Mod, Src1Mod>.ret,
+ RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers,
+ Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
+ dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs,
+ HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret,
(ins dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
- bank_mask:$bank_mask, bound_ctrl:$bound_ctrl));
+ bank_mask:$bank_mask, bound_ctrl:$bound_ctrl));
}
class getInsDPP16 <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC,
- int NumSrcArgs, bit HasModifiers,
- Operand Src0Mod, Operand Src1Mod> {
- dag ret = !con(getInsDPP<OldRC, Src0RC, Src1RC, NumSrcArgs,
- HasModifiers, Src0Mod, Src1Mod>.ret,
+ RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers,
+ Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
+ dag ret = !con(getInsDPP<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs,
+ HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret,
(ins FI:$fi));
}
class getInsDPP8 <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC,
- int NumSrcArgs, bit HasModifiers,
- Operand Src0Mod, Operand Src1Mod> {
- dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, NumSrcArgs,
- HasModifiers, Src0Mod, Src1Mod>.ret,
+ RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers,
+ Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
+ dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs,
+ HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret,
(ins dpp8:$dpp8, FI:$fi));
}
+class getInsVOP3DPPBase<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs> {
+ dag old = ( ins OldRC:$old );
+ dag base = VOP3Base;
+ dag ret = !con(
+ !if(!ne(NumSrcArgs, 0), old, (ins)),
+ base
+ );
+}
+
+class getInsVOP3DPP<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs> {
+ dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs>.ret,
+ (ins dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
+ bank_mask:$bank_mask, bound_ctrl:$bound_ctrl));
+}
+
+class getInsVOP3DPP16<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs> {
+ dag ret = !con(getInsVOP3DPP<VOP3Base,OldRC,NumSrcArgs>.ret,
+ (ins FI:$fi));
+}
+
+class getInsVOP3DPP8<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs> {
+ dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs>.ret,
+ (ins dpp8:$dpp8, FI:$fi));
+}
// Ins for SDWA
class getInsSDWA <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs,
@@ -1870,6 +2086,15 @@ class getAsm32 <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> {
!if(!eq(NumSrcArgs, 3), src0#src1#src2, "");
}
+class getAsmVOPDPart <int NumSrcArgs, string XorY> {
+ string dst = "$vdst" # XorY;
+ string src0 = ", $src0" # XorY;
+ string src1 = ", $vsrc1" # XorY;
+ string ret = dst #
+ !if(!ge(NumSrcArgs, 1), src0, "") #
+ !if(!ge(NumSrcArgs, 2), src1, "");
+}
+
// Returns the assembly string for the inputs and outputs of a VOP3
// instruction.
class getAsm64 <bit HasDst, int NumSrcArgs, bit HasIntClamp, bit HasModifiers,
@@ -1890,7 +2115,7 @@ class getAsm64 <bit HasDst, int NumSrcArgs, bit HasIntClamp, bit HasModifiers,
// Returns the assembly string for the inputs and outputs of a VOP3P
// instruction.
class getAsmVOP3P <int NumSrcArgs, bit HasModifiers,
- bit HasClamp> {
+ bit HasClamp, bit HasOpSel> {
string dst = "$vdst";
string src0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
string src1 = !if(!eq(NumSrcArgs, 1), "",
@@ -1900,10 +2125,11 @@ class getAsmVOP3P <int NumSrcArgs, bit HasModifiers,
string mods = !if(HasModifiers, "$neg_lo$neg_hi", "");
string clamp = !if(HasClamp, "$clamp", "");
+ string opsel = !if(HasOpSel, "$op_sel$op_sel_hi", "");
// Each modifier is printed as an array of bits for each operand, so
// all operands are printed as part of src0_modifiers.
- string ret = dst#", "#src0#src1#src2#"$op_sel$op_sel_hi"#mods#clamp;
+ string ret = dst#", "#src0#src1#src2#opsel#mods#clamp;
}
class getAsmVOP3OpSel <int NumSrcArgs,
@@ -1930,8 +2156,8 @@ class getAsmVOP3OpSel <int NumSrcArgs,
string src2 = !if(Src2HasMods, fsrc2, isrc2);
string clamp = !if(HasClamp, "$clamp", "");
-
- string ret = dst#", "#src0#src1#src2#"$op_sel"#clamp;
+ string omod = "";
+ string ret = dst#", "#src0#src1#src2#"$op_sel"#clamp#omod;
}
class getAsmDPP <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
@@ -1955,15 +2181,63 @@ class getAsmDPP16 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT
}
class getAsmDPP8 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32>
- : getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT> {
+ : getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>{
let ret = dst#args#" $dpp8$fi";
}
+class getAsmVOP3DPPBase <int NumSrcArgs, bit HasDst, bit HasClamp,
+ bit HasOpSel, bit HasOMod, bit IsVOP3P,
+ bit HasModifiers, bit Src0HasMods,
+ bit Src1HasMods, bit Src2HasMods, ValueType DstVT = i32> {
+ string dst = !if(HasDst,
+ !if(!eq(DstVT.Size, 1),
+ "$sdst",
+ "$vdst"),
+ ""); // use $sdst for VOPC
+ string isrc0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
+ string isrc1 = !if(!eq(NumSrcArgs, 1), "",
+ !if(!eq(NumSrcArgs, 2), " $src1",
+ " $src1,"));
+ string isrc2 = !if(!eq(NumSrcArgs, 3), " $src2", "");
+
+ string fsrc0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
+ string fsrc1 = !if(!eq(NumSrcArgs, 1), "",
+ !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
+ " $src1_modifiers,"));
+ string fsrc2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", "");
+
+ string src0 = !if(Src0HasMods, fsrc0, isrc0);
+ string src1 = !if(Src1HasMods, fsrc1, isrc1);
+ string src2 = !if(Src2HasMods, fsrc2, isrc2);
+ string opsel = !if(HasOpSel, "$op_sel", "");
+ string 3PMods = !if(IsVOP3P,
+ !if(HasOpSel, "$op_sel_hi", "")
+ #!if(HasModifiers, "$neg_lo$neg_hi", ""),
+ "");
+ string clamp = !if(HasClamp, "$clamp", "");
+ string omod = !if(HasOMod, "$omod", "");
+
+ string ret = dst#", "#src0#src1#src2#opsel#3PMods#clamp#omod;
+
+}
+
+class getAsmVOP3DPP<string base> {
+ string ret = base # " $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
+}
+
+class getAsmVOP3DPP16<string base> {
+ string ret = getAsmVOP3DPP<base>.ret # "$fi";
+}
+
+class getAsmVOP3DPP8<string base> {
+ string ret = base # " $dpp8$fi";
+}
+
class getAsmSDWA <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> {
string dst = !if(HasDst,
!if(!eq(DstVT.Size, 1),
- " vcc", // use vcc token as dst for VOPC instructioins
+ " vcc", // use vcc token as dst for VOPC instructions
"$vdst"),
"");
string src0 = "$src0_modifiers";
@@ -2056,6 +2330,12 @@ class getHasDPP <int NumSrcArgs> {
1);
}
+class getHasExt32BitDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
+ ValueType Src1VT = i32> {
+ bit ret = !and(getHasDPP<NumSrcArgs>.ret,
+ !not(getHas64BitOps<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret));
+}
+
class getHasExt64BitDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
ValueType Src1VT = i32> {
bit ret = !and(getHasDPP<NumSrcArgs>.ret,
@@ -2089,6 +2369,24 @@ class BitAnd<bit a, bit b> {
bit ret = !if(a, !if(b, 1, 0), 0);
}
+class getHasVOP3DPP <ValueType DstVT = i32, ValueType Src0VT = i32,
+ ValueType Src1VT = i32, ValueType Src2VT = i32> {
+ bit ret = !if(!eq(DstVT.Size, 64),
+ 0, // 64-bit dst No DPP for 64-bit operands
+ !if(!eq(Src0VT.Size, 64),
+ 0, // 64-bit src0
+ !if(!eq(Src1VT.Size, 64),
+ 0, // 64-bit src1
+ !if(!eq(Src2VT.Size, 64),
+ 0, // 64-bit src2
+ 1
+ )
+ )
+ )
+ );
+}
+
+
def PatGenMode {
int NoPattern = 0;
int Pattern = 1;
@@ -2106,15 +2404,20 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
field ValueType Src1VT = ArgVT[2];
field ValueType Src2VT = ArgVT[3];
field RegisterOperand DstRC = getVALUDstForVT<DstVT>.ret;
+ field RegisterOperand DstRC64 = DstRC;
field RegisterOperand DstRCDPP = getVALUDstForVT<DstVT>.ret;
field RegisterOperand DstRCSDWA = getSDWADstForVT<DstVT>.ret;
field RegisterOperand Src0RC32 = getVOPSrc0ForVT<Src0VT>.ret;
- field RegisterClass Src1RC32 = getVregSrcForVT<Src1VT>.ret;
+ field RegisterOperand Src1RC32 = RegisterOperand<getVregSrcForVT<Src1VT>.ret>;
field RegisterOperand Src0RC64 = getVOP3SrcForVT<Src0VT>.ret;
field RegisterOperand Src1RC64 = getVOP3SrcForVT<Src1VT>.ret;
field RegisterOperand Src2RC64 = getVOP3SrcForVT<Src2VT>.ret;
field RegisterClass Src0DPP = getVregSrcForVT<Src0VT>.ret;
field RegisterClass Src1DPP = getVregSrcForVT<Src1VT>.ret;
+ field RegisterClass Src2DPP = getVregSrcForVT<Src2VT>.ret;
+ field RegisterOperand Src0VOP3DPP = VGPRSrc_32;
+ field RegisterOperand Src1VOP3DPP = VGPRSrc_32;
+ field RegisterOperand Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT>.ret;
field RegisterOperand Src0SDWA = getSDWASrcForVT<Src0VT>.ret;
field RegisterOperand Src1SDWA = getSDWASrcForVT<Src0VT>.ret;
field Operand Src0Mod = getSrcMod<Src0VT, EnableF32SrcMods>.ret;
@@ -2122,6 +2425,8 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
field Operand Src2Mod = getSrcMod<Src2VT, EnableF32SrcMods>.ret;
field Operand Src0ModDPP = getSrcModDPP<Src0VT>.ret;
field Operand Src1ModDPP = getSrcModDPP<Src1VT>.ret;
+ field Operand Src2ModDPP = getSrcModDPP<Src2VT>.ret;
+ field Operand Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, EnableF32SrcMods>.ret;
field Operand Src0ModSDWA = getSrcModSDWA<Src0VT>.ret;
field Operand Src1ModSDWA = getSrcModSDWA<Src1VT>.ret;
@@ -2169,15 +2474,20 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
field bit HasSrc2Mods = !if(HasModifiers, !or(HasSrc2FloatMods, HasSrc2IntMods), 0);
field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
- field bit HasExtDPP = getHasDPP<NumSrcArgs>.ret;
+ field bit HasExtVOP3DPP = getHasVOP3DPP<DstVT, Src0VT, Src1VT, Src2VT>.ret;
+ field bit HasExtDPP = !if(!or(getHasDPP<NumSrcArgs>.ret,
+ HasExtVOP3DPP), 1, 0);
+ field bit HasExt32BitDPP = getHasExt32BitDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
field bit HasExt64BitDPP = getHasExt64BitDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
field bit HasExtSDWA = getHasSDWA<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
field bit HasExtSDWA9 = HasExtSDWA;
field int NeedPatGen = PatGenMode.NoPattern;
field bit IsMAI = 0;
+ field bit IsVOP3P = 0;
field bit IsDOT = 0;
field bit IsSingle = 0;
+ field bit IsWMMA = 0;
field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods);
field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods);
@@ -2188,9 +2498,11 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
// VOP3b instructions are a special case with a second explicit
// output. This is manually overridden for them.
field dag Outs32 = Outs;
- field dag Outs64 = Outs;
+ field dag Outs64 = !if(HasDst,(outs DstRC64:$vdst),(outs));
field dag OutsDPP = getOutsDPP<HasDst, DstVT, DstRCDPP>.ret;
field dag OutsDPP8 = getOutsDPP<HasDst, DstVT, DstRCDPP>.ret;
+ field dag OutsVOP3DPP = OutsDPP;
+ field dag OutsVOP3DPP8 = OutsDPP8;
field dag OutsSDWA = getOutsSDWA<HasDst, DstVT, DstRCSDWA>.ret;
field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
@@ -2198,7 +2510,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
HasIntClamp, HasModifiers, HasSrc2Mods,
HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret;
field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64,
- NumSrcArgs, HasClamp,
+ NumSrcArgs, HasClamp, HasOpSel,
Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret;
field dag InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64,
NumSrcArgs, HasClamp, HasOMod,
@@ -2206,21 +2518,35 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
getOpSelMod<Src1VT>.ret,
getOpSelMod<Src2VT>.ret>.ret;
field dag InsDPP = !if(HasExtDPP,
- getInsDPP<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs,
- HasModifiers, Src0ModDPP, Src1ModDPP>.ret,
+ getInsDPP<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs,
+ HasModifiers, Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret,
(ins));
- field dag InsDPP16 = getInsDPP16<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs,
- HasModifiers, Src0ModDPP, Src1ModDPP>.ret;
- field dag InsDPP8 = getInsDPP8<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs, 0,
- Src0ModDPP, Src1ModDPP>.ret;
+ field dag InsDPP16 = getInsDPP16<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs,
+ HasModifiers, Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret;
+ field dag InsDPP8 = getInsDPP8<DstRCDPP, Src0DPP, Src1DPP, Src2DPP,
+ NumSrcArgs, HasModifiers,
+ Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret;
+ field dag InsVOP3Base = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP,
+ Src2VOP3DPP, NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, HasOMod,
+ Src0ModDPP, Src1ModDPP, Src2ModVOP3DPP, HasOpSel, IsVOP3P>.ret;
+ field dag InsVOP3DPP = getInsVOP3DPP<InsVOP3Base, DstRCDPP, NumSrcArgs>.ret;
+ field dag InsVOP3DPP16 = getInsVOP3DPP16<InsVOP3Base, DstRCDPP, NumSrcArgs>.ret;
+ field dag InsVOP3DPP8 = getInsVOP3DPP8<InsVOP3Base, DstRCDPP, NumSrcArgs>.ret;
field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs,
HasSDWAOMod, Src0ModSDWA, Src1ModSDWA,
DstVT>.ret;
+ field dag InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X);
+ // It is a slight misnomer to use the deferred f32 operand type for non-float
+ // operands, but this operand type will only be used if the other dual
+ // component is FMAAK or FMAMK
+ field dag InsVOPDXDeferred = (ins !if(!eq(Src0VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0X, VGPR_32:$vsrc1X);
+ field dag InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y);
+ field dag InsVOPDYDeferred = (ins !if(!eq(Src1VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0Y, VGPR_32:$vsrc1Y);
field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret;
field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasIntClamp, HasModifiers, HasOMod, DstVT>.ret;
- field string AsmVOP3P = getAsmVOP3P<NumSrcArgs, HasModifiers, HasClamp>.ret;
+ field string AsmVOP3P = getAsmVOP3P<NumSrcArgs, HasModifiers, HasClamp, HasOpSel>.ret;
field string AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs,
HasClamp,
HasSrc0FloatMods,
@@ -2232,15 +2558,24 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
// DPP8 encoding has no fields for modifiers, and it is enforced by setting
// the asm operand name via this HasModifiers flag
field string AsmDPP8 = getAsmDPP8<HasDst, NumSrcArgs, 0 /*HasModifiers*/, DstVT>.ret;
+ field string AsmVOP3DPPBase = getAsmVOP3DPPBase<NumSrcArgs, HasDst, HasClamp,
+ HasOpSel, HasOMod, IsVOP3P, HasModifiers, HasSrc0FloatMods, HasSrc1FloatMods,
+ HasSrc2FloatMods, DstVT >.ret;
+ field string AsmVOP3DPP = getAsmVOP3DPP<AsmVOP3DPPBase>.ret;
+ field string AsmVOP3DPP16 = getAsmVOP3DPP16<AsmVOP3DPPBase>.ret;
+ field string AsmVOP3DPP8 = getAsmVOP3DPP8<AsmVOP3DPPBase>.ret;
field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, DstVT>.ret;
field string AsmSDWA9 = getAsmSDWA9<HasDst, HasSDWAOMod, NumSrcArgs, DstVT>.ret;
-
+ field string AsmVOPDX = getAsmVOPDPart<NumSrcArgs, "X">.ret;
+ field string AsmVOPDY = getAsmVOPDPart<NumSrcArgs, "Y">.ret;
field string TieRegDPP = "$old";
}
-class VOP_NO_EXT <VOPProfile p> : VOPProfile <p.ArgVT> {
+ class VOP_NO_EXT <VOPProfile p> : VOPProfile <p.ArgVT> {
let HasExt = 0;
let HasExtDPP = 0;
+ let HasExtVOP3DPP = 0;
+ let HasExt32BitDPP = 0;
let HasExt64BitDPP = 0;
let HasExtSDWA = 0;
let HasExtSDWA9 = 0;
@@ -2249,10 +2584,10 @@ class VOP_NO_EXT <VOPProfile p> : VOPProfile <p.ArgVT> {
class VOP_PAT_GEN <VOPProfile p, int mode=PatGenMode.NoPattern> : VOPProfile <p.ArgVT> {
let NeedPatGen = mode;
}
-
def VOP_F16_F16 : VOPProfile <[f16, f16, untyped, untyped]>;
def VOP_F16_I16 : VOPProfile <[f16, i16, untyped, untyped]>;
def VOP_I16_F16 : VOPProfile <[i16, f16, untyped, untyped]>;
+def VOP_I16_I16 : VOPProfile <[i16, i16, untyped, untyped]>;
def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>;
def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i16, untyped]>;
@@ -2264,6 +2599,7 @@ def VOP_I16_I16_I16_I16 : VOPProfile <[i16, i16, i16, i16, untyped]>;
def VOP_F16_F16_F16_F16 : VOPProfile <[f16, f16, f16, f16, untyped]>;
def VOP_I32_I16_I16_I32 : VOPProfile <[i32, i16, i16, i32, untyped]>;
+def VOP_I32_I16 : VOPProfile <[i32, i16, untyped, untyped]>;
def VOP_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, untyped]>;
def VOP_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, untyped]>;
@@ -2274,6 +2610,10 @@ def VOP_V2I16_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, v2i16]>;
def VOP_V2I16_F32_F32 : VOPProfile <[v2i16, f32, f32, untyped]>;
def VOP_V2I16_I32_I32 : VOPProfile <[v2i16, i32, i32, untyped]>;
+def VOP_F16_V2F16_V2F16_F16 : VOPProfile <[f16, v2f16, v2f16, f16]>;
+def VOP_I16_V2I16_V2I16_I16 : VOPProfile <[i16, v2i16, v2i16, i16]>;
+def VOP_F32_V2I16_V2I16_F32 : VOPProfile <[f32, v2i16, v2i16, f32]>;
+
def VOP_F32_V2F16_V2F16_V2F16 : VOPProfile <[f32, v2f16, v2f16, v2f16]>;
def VOP_NONE : VOPProfile <[untyped, untyped, untyped, untyped]>;
@@ -2343,6 +2683,18 @@ def VOP_V4F32_V4I16_V4I16_V4F32 : VOPProfile <[v4f32, v4i16, v4i16, v4f32]>;
def VOP_V16F32_V4I16_V4I16_V16F32 : VOPProfile <[v16f32, v4i16, v4i16, v16f32]>;
def VOP_V32F32_V4I16_V4I16_V32F32 : VOPProfile <[v32f32, v4i16, v4i16, v32f32]>;
+def VOP_V4I32_I64_I64_V4I32 : VOPProfile <[v4i32, i64, i64, v4i32]>;
+def VOP_V16I32_I64_I64_V16I32 : VOPProfile <[v16i32, i64, i64, v16i32]>;
+def VOP_V4F32_V2F32_V2F32_V4F32 : VOPProfile <[v4f32, v2f32, v2f32, v4f32]>;
+def VOP_V16F32_V2F32_V2F32_V16F32 : VOPProfile <[v16f32, v2f32, v2f32, v16f32]>;
+
+def VOP_V4F32_V4F16_V8F16_I32 : VOPProfile <[v4f32, v4f16, v8f16, i32]>;
+def VOP_V16F32_V4F16_V8F16_I32 : VOPProfile <[v16f32, v4f16, v8f16, i32]>;
+def VOP_V4F32_V4I16_V8I16_I32 : VOPProfile <[v4f32, v4i16, v8i16, i32]>;
+def VOP_V16F32_V4I16_V8I16_I32 : VOPProfile <[v16f32, v4i16, v8i16, i32]>;
+def VOP_V4I32_V2I32_V4I32_I32 : VOPProfile <[v4i32, v2i32, v4i32, i32]>;
+def VOP_V16I32_V2I32_V4I32_I32 : VOPProfile <[v16i32, v2i32, v4i32, i32]>;
+
class Commutable_REV <string revOp, bit isOrig> {
string RevOp = revOp;
bit IsOrig = isOrig;
@@ -2394,10 +2746,11 @@ multiclass VINTRP_m <bits <2> op, dag outs, dag ins, string asm,
def _vi : VINTRP_Real_vi <op, NAME, outs, ins, asm>;
- let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
+ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
def _gfx10 : VINTRP_Real_si<op, NAME, outs, ins, asm, SIEncodingFamily.GFX10>;
- } // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
+ } // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10"
}
+
//===----------------------------------------------------------------------===//
// Vector instruction mappings
//===----------------------------------------------------------------------===//
@@ -2470,6 +2823,7 @@ def getMCOpcodeGen : InstrMapping {
let RowFields = ["PseudoInstr"];
let ColFields = ["Subtarget"];
let KeyCol = [!cast<string>(SIEncodingFamily.NONE)];
+ // These columns must be kept in sync with the SIEncodingFamily enumeration.
let ValueCols = [[!cast<string>(SIEncodingFamily.SI)],
[!cast<string>(SIEncodingFamily.VI)],
[!cast<string>(SIEncodingFamily.SDWA)],
@@ -2482,7 +2836,9 @@ def getMCOpcodeGen : InstrMapping {
[!cast<string>(SIEncodingFamily.GFX9)],
[!cast<string>(SIEncodingFamily.GFX10)],
[!cast<string>(SIEncodingFamily.SDWA10)],
- [!cast<string>(SIEncodingFamily.GFX90A)]];
+ [!cast<string>(SIEncodingFamily.GFX90A)],
+ [!cast<string>(SIEncodingFamily.GFX940)],
+ [!cast<string>(SIEncodingFamily.GFX11)]];
}
// Get equivalent SOPK instruction.
@@ -2510,14 +2866,6 @@ def getIfAddr64Inst : InstrMapping {
let ValueCols = [["1"]];
}
-def getMUBUFNoLdsInst : InstrMapping {
- let FilterClass = "MUBUFLdsTable";
- let RowFields = ["OpName"];
- let ColFields = ["IsLds"];
- let KeyCol = ["1"];
- let ValueCols = [["0"]];
-}
-
// Maps an atomic opcode to its returnless version.
def getAtomicNoRetOp : InstrMapping {
let FilterClass = "AtomicNoRet";
@@ -2580,6 +2928,14 @@ def getFlatScratchInstSSfromSV : InstrMapping {
let ValueCols = [["SS"]];
}
+def getFlatScratchInstSVfromSVS : InstrMapping {
+ let FilterClass = "FlatScratchInst";
+ let RowFields = ["SVOp"];
+ let ColFields = ["Mode"];
+ let KeyCol = ["SVS"];
+ let ValueCols = [["SV"]];
+}
+
def getFlatScratchInstSVfromSS : InstrMapping {
let FilterClass = "FlatScratchInst";
let RowFields = ["SVOp"];
@@ -2596,6 +2952,15 @@ def getMFMAEarlyClobberOp : InstrMapping {
let ValueCols = [["0"]];
}
+// Maps an v_cmp instruction to its v_cmpx equivalent.
+def getVCMPXOpFromVCMP : InstrMapping {
+ let FilterClass = "VCMPVCMPXTable";
+ let RowFields = ["VCMPOp"];
+ let ColFields = ["IsVCMPX"];
+ let KeyCol = ["0"];
+ let ValueCols = [["1"]];
+}
+
include "SIInstructions.td"
include "DSInstructions.td"
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 7be63ae6964b..829669157893 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -14,12 +14,24 @@ class GCNPat<dag pattern, dag result> : Pat<pattern, result>, GCNPredicateContro
}
+class UniformSextInreg<ValueType VT> : PatFrag<
+ (ops node:$src),
+ (sext_inreg $src, VT),
+ [{ return !N->isDivergent(); }]>;
+
+class DivergentSextInreg<ValueType VT> : PatFrag<
+ (ops node:$src),
+ (sext_inreg $src, VT),
+ [{ return N->isDivergent(); }]>;
+
include "SOPInstructions.td"
include "VOPInstructions.td"
include "SMInstructions.td"
include "FLATInstructions.td"
include "BUFInstructions.td"
include "EXPInstructions.td"
+include "LDSDIRInstructions.td"
+include "VINTERPInstructions.td"
//===----------------------------------------------------------------------===//
// VINTRP Instructions
@@ -176,19 +188,33 @@ def EXIT_STRICT_WQM : SPseudoInstSI <(outs SReg_1:$sdst), (ins SReg_1:$src0)> {
let mayStore = 0;
}
+// Pseudo instructions used for @llvm.fptrunc.round upward
+// and @llvm.fptrunc.round downward.
+// These intrinsics will be legalized to G_FPTRUNC_ROUND_UPWARD
+// and G_FPTRUNC_ROUND_DOWNWARD before being lowered to
+// FPTRUNC_UPWARD_PSEUDO and FPTRUNC_DOWNWARD_PSEUDO.
+// The final codegen is done in the ModeRegister pass.
+let Uses = [MODE, EXEC] in {
+def FPTRUNC_UPWARD_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst),
+ (ins VGPR_32:$src0),
+ [(set f16:$vdst, (SIfptrunc_round_upward f32:$src0))]>;
+
+def FPTRUNC_DOWNWARD_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst),
+ (ins VGPR_32:$src0),
+ [(set f16:$vdst, (SIfptrunc_round_downward f32:$src0))]>;
+} // End Uses = [MODE, EXEC]
+
// Invert the exec mask and overwrite the inactive lanes of dst with inactive,
// restoring it after we're done.
let Defs = [SCC] in {
def V_SET_INACTIVE_B32 : VPseudoInstSI <(outs VGPR_32:$vdst),
- (ins VGPR_32: $src, VSrc_b32:$inactive),
+ (ins VSrc_b32: $src, VSrc_b32:$inactive),
[(set i32:$vdst, (int_amdgcn_set_inactive i32:$src, i32:$inactive))]> {
- let Constraints = "$src = $vdst";
}
def V_SET_INACTIVE_B64 : VPseudoInstSI <(outs VReg_64:$vdst),
- (ins VReg_64: $src, VSrc_b64:$inactive),
+ (ins VSrc_b64: $src, VSrc_b64:$inactive),
[(set i64:$vdst, (int_amdgcn_set_inactive i64:$src, i64:$inactive))]> {
- let Constraints = "$src = $vdst";
}
} // End Defs = [SCC]
@@ -287,6 +313,20 @@ def WAVE_BARRIER : SPseudoInstSI<(outs), (ins),
let isConvergent = 1;
let FixedSize = 1;
let Size = 0;
+ let isMeta = 1;
+}
+
+def SCHED_BARRIER : SPseudoInstSI<(outs), (ins i32imm:$mask),
+ [(int_amdgcn_sched_barrier (i32 timm:$mask))]> {
+ let SchedRW = [];
+ let hasNoSchedulingInfo = 1;
+ let hasSideEffects = 1;
+ let mayLoad = 0;
+ let mayStore = 0;
+ let isConvergent = 1;
+ let FixedSize = 1;
+ let Size = 0;
+ let isMeta = 1;
}
// SI pseudo instructions. These are used by the CFG structurizer pass
@@ -424,6 +464,7 @@ def SI_MASKED_UNREACHABLE : SPseudoInstSI <(outs), (ins),
let Size = 0;
let hasNoSchedulingInfo = 1;
let FixedSize = 1;
+ let isMeta = 1;
}
// Used as an isel pseudo to directly emit initialization with an
@@ -459,11 +500,14 @@ def SI_RETURN_TO_EPILOG : SPseudoInstSI <
let hasNoSchedulingInfo = 1;
let DisableWQM = 1;
let FixedSize = 1;
+
+ // TODO: Should this be true?
+ let isMeta = 0;
}
// Return for returning function calls.
def SI_RETURN : SPseudoInstSI <
- (outs), (ins), [],
+ (outs), (ins), [(AMDGPUret_flag)],
"; return"> {
let isTerminator = 1;
let isBarrier = 1;
@@ -496,6 +540,7 @@ def : GCNPat<
def SI_CALL : SPseudoInstSI <
(outs SReg_64:$dst), (ins SSrc_b64:$src0, unknown:$callee)> {
let Size = 4;
+ let FixedSize = 1;
let isCall = 1;
let UseNamedOperandTable = 1;
let SchedRW = [WriteBranch];
@@ -508,6 +553,7 @@ def SI_TCRETURN : SPseudoInstSI <(outs),
(ins SReg_64:$src0, unknown:$callee, i32imm:$fpdiff),
[(AMDGPUtc_return i64:$src0, tglobaladdr:$callee, i32:$fpdiff)]> {
let Size = 4;
+ let FixedSize = 1;
let isCall = 1;
let isTerminator = 1;
let isReturn = 1;
@@ -1212,6 +1258,26 @@ def : Pat <
(v4f16 (EXTRACT_SUBREG v8f16:$vec, sub2_sub3))
>;
+def : Pat <
+ (extract_subvector v16i16:$vec, (i32 0)),
+ (v8i16 (EXTRACT_SUBREG v16i16:$vec, sub0_sub1_sub2_sub3))
+>;
+
+def : Pat <
+ (extract_subvector v16i16:$vec, (i32 8)),
+ (v8i16 (EXTRACT_SUBREG v16i16:$vec, sub4_sub5_sub6_sub7))
+>;
+
+def : Pat <
+ (extract_subvector v16f16:$vec, (i32 0)),
+ (v8f16 (EXTRACT_SUBREG v16f16:$vec, sub0_sub1_sub2_sub3))
+>;
+
+def : Pat <
+ (extract_subvector v16f16:$vec, (i32 8)),
+ (v8f16 (EXTRACT_SUBREG v16f16:$vec, sub4_sub5_sub6_sub7))
+>;
+
foreach Index = 0-31 in {
def Extract_Element_v32i32_#Index : Extract_Element <
i32, v32i32, Index, !cast<SubRegIndex>(sub#Index)
@@ -1371,7 +1437,18 @@ def : BitConvert <v8i32, v4i64, VReg_256>;
def : BitConvert <v8f32, v4i64, VReg_256>;
def : BitConvert <v8i32, v4f64, VReg_256>;
def : BitConvert <v8f32, v4f64, VReg_256>;
-
+def : BitConvert <v16i16, v16f16, SReg_256>;
+def : BitConvert <v16f16, v16i16, SReg_256>;
+def : BitConvert <v16i16, v16f16, VReg_256>;
+def : BitConvert <v16f16, v16i16, VReg_256>;
+def : BitConvert <v16f16, v8i32, VReg_256>;
+def : BitConvert <v16i16, v8i32, VReg_256>;
+def : BitConvert <v16f16, v8f32, VReg_256>;
+def : BitConvert <v16i16, v8f32, VReg_256>;
+def : BitConvert <v8i32, v16f16, VReg_256>;
+def : BitConvert <v8i32, v16i16, VReg_256>;
+def : BitConvert <v8f32, v16f16, VReg_256>;
+def : BitConvert <v8f32, v16i16, VReg_256>;
// 512-bit bitcast
def : BitConvert <v16i32, v16f32, VReg_512>;
@@ -1941,12 +2018,6 @@ def : GCNPat <
//===----------------------------------------------------------------------===//
// Conversion Patterns
//===----------------------------------------------------------------------===//
-
-class UniformSextInreg<ValueType VT> : PatFrag<
- (ops node:$src),
- (sext_inreg $src, VT),
- [{ return !N->isDivergent(); }]>;
-
def : GCNPat<(i32 (UniformSextInreg<i1> i32:$src)),
(S_BFE_I32 i32:$src, (i32 65536))>; // 0 | 1 << 16
@@ -1981,23 +2052,28 @@ def : GCNPat <
(S_BFE_I64 i64:$src, (i32 0x200000)) // 0 | 32 << 16
>;
-
-class DivergentSextInreg<ValueType VT> : PatFrag<
- (ops node:$src),
- (sext_inreg $src, VT),
- [{ return N->isDivergent(); }]>;
-
-def : GCNPat<(i32 (DivergentSextInreg<i1> i32:$src)),
+def : GCNPat<
+ (i32 (DivergentSextInreg<i1> i32:$src)),
(V_BFE_I32_e64 i32:$src, (i32 0), (i32 1))>;
def : GCNPat <
(i16 (DivergentSextInreg<i1> i16:$src)),
- (V_BFE_I32_e64 $src, (i32 0), (i32 1)) // 0 | 1 << 16
+ (V_BFE_I32_e64 $src, (i32 0), (i32 1))
>;
def : GCNPat <
(i16 (DivergentSextInreg<i8> i16:$src)),
- (V_BFE_I32_e64 $src, (i32 0), (i32 8)) // 0 | 8 << 16
+ (V_BFE_I32_e64 $src, (i32 0), (i32 8))
+>;
+
+def : GCNPat<
+ (i32 (DivergentSextInreg<i8> i32:$src)),
+ (V_BFE_I32_e64 i32:$src, (i32 0), (i32 8))
+>;
+
+def : GCNPat <
+ (i32 (DivergentSextInreg<i16> i32:$src)),
+ (V_BFE_I32_e64 $src, (i32 0), (i32 16))
>;
def : GCNPat <
@@ -2010,14 +2086,14 @@ def : GCNPat <
def : GCNPat <
(i64 (DivergentSextInreg<i8> i64:$src)),
(REG_SEQUENCE VReg_64,
- (V_BFE_I32_e64 (i32 (EXTRACT_SUBREG i64:$src, sub0)), (i32 0), (i32 8)/* 0 | 8 << 16 */), sub0,
+ (V_BFE_I32_e64 (i32 (EXTRACT_SUBREG i64:$src, sub0)), (i32 0), (i32 8)), sub0,
(V_ASHRREV_I32_e32 (i32 31), (V_BFE_I32_e64 (i32 (EXTRACT_SUBREG i64:$src, sub0)), (i32 0), (i32 8))), sub1)
>;
def : GCNPat <
(i64 (DivergentSextInreg<i16> i64:$src)),
(REG_SEQUENCE VReg_64,
- (V_BFE_I32_e64 (i32 (EXTRACT_SUBREG i64:$src, sub0)), (i32 0), (i32 16)/* 0 | 16 << 16 */), sub0,
+ (V_BFE_I32_e64 (i32 (EXTRACT_SUBREG i64:$src, sub0)), (i32 0), (i32 16)), sub0,
(V_ASHRREV_I32_e32 (i32 31), (V_BFE_I32_e64 (i32 (EXTRACT_SUBREG i64:$src, sub0)), (i32 0), (i32 16))), sub1)
>;
@@ -2053,12 +2129,18 @@ def : ZExt_i64_i1_Pat<anyext>;
// FIXME: We need to use COPY_TO_REGCLASS to work-around the fact that
// REG_SEQUENCE patterns don't support instructions with multiple outputs.
def : GCNPat <
- (i64 (sext i32:$src)),
+ (i64 (UniformUnaryFrag<sext> i32:$src)),
(REG_SEQUENCE SReg_64, $src, sub0,
(i32 (COPY_TO_REGCLASS (S_ASHR_I32 $src, (i32 31)), SReg_32_XM0)), sub1)
>;
def : GCNPat <
+ (i64 (DivergentUnaryFrag<sext> i32:$src)),
+ (REG_SEQUENCE VReg_64, $src, sub0,
+ (i32 (COPY_TO_REGCLASS (V_ASHRREV_I32_e64 (i32 31), $src), VGPR_32)), sub1)
+>;
+
+def : GCNPat <
(i64 (sext i1:$src)),
(REG_SEQUENCE VReg_64,
(V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
@@ -2235,6 +2317,30 @@ def : GCNPat <
// the src is lowered. e.g. fptrunc + fma may be lowered to a
// v_fma_mix* instruction which does not zero, or may not.
def : GCNPat<
+ (i32 (DivergentUnaryFrag<abs> i32:$src)),
+ (V_MAX_I32_e64 (V_SUB_CO_U32_e32 (i32 0), $src), $src)>;
+
+let AddedComplexity = 1 in {
+def : GCNPat<
+ (i32 (DivergentUnaryFrag<abs> i32:$src)),
+ (V_MAX_I32_e64 (V_SUB_U32_e32 (i32 0), $src), $src)>{
+ let SubtargetPredicate = HasAddNoCarryInsts;
+}
+} // AddedComplexity = 1
+
+def : GCNPat<
+ (i32 (DivergentUnaryFrag<zext> i16:$src)),
+ (V_AND_B32_e64 (S_MOV_B32 (i32 0xffff)), $src)
+>;
+
+def : GCNPat<
+ (i64 (DivergentUnaryFrag<zext> i16:$src)),
+ (REG_SEQUENCE VReg_64,
+ (V_AND_B32_e64 (S_MOV_B32 (i32 0xffff)), $src), sub0,
+ (S_MOV_B32 (i32 0)), sub1)
+>;
+
+def : GCNPat<
(i32 (zext (i16 (bitconvert fp16_zeros_high_16bits:$src)))),
(COPY VSrc_b16:$src)>;
@@ -2269,6 +2375,34 @@ def : GCNPat <
(V_CMP_EQ_U32_e64 (V_AND_B32_e64 (i32 1), $a), (i32 1))
>;
+def IMMBitSelConst : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(1ULL << N->getZExtValue(), SDLoc(N),
+ MVT::i32);
+}]>;
+
+// Matching separate SRL and TRUNC instructions
+// with dependent operands (SRL dest is source of TRUNC)
+// generates three instructions. However, by using bit shifts,
+// the V_LSHRREV_B32_e64 result can be directly used in the
+// operand of the V_AND_B32_e64 instruction:
+// (trunc i32 (srl i32 $a, i32 $b)) ->
+// v_and_b32_e64 $a, (1 << $b), $a
+// v_cmp_ne_u32_e64 $a, 0, $a
+
+// Handle the VALU case.
+def : GCNPat <
+ (i1 (DivergentUnaryFrag<trunc> (i32 (srl i32:$a, (i32 imm:$b))))),
+ (V_CMP_NE_U32_e64 (V_AND_B32_e64 (i32 (IMMBitSelConst $b)), $a),
+ (i32 0))
+>;
+
+// Handle the scalar case.
+def : GCNPat <
+ (i1 (UniformUnaryFrag<trunc> (i32 (srl i32:$a, (i32 imm:$b))))),
+ (S_CMP_LG_U32 (S_AND_B32 (i32 (IMMBitSelConst $b)), $a),
+ (i32 0))
+>;
+
def : GCNPat <
(i1 (DivergentUnaryFrag<trunc> i64:$a)),
(V_CMP_EQ_U32_e64 (V_AND_B32_e64 (i32 1),
@@ -2350,6 +2484,11 @@ def : GCNPat <
}
+def : GCNPat<
+ (i64 (DivergentUnaryFrag<bitreverse> i64:$a)),
+ (REG_SEQUENCE VReg_64,
+ (V_BFREV_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$a, sub1))), sub0,
+ (V_BFREV_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$a, sub0))), sub1)>;
// Prefer selecting to max when legal, but using mul is always valid.
let AddedComplexity = -5 in {
@@ -2508,12 +2647,12 @@ def : GCNPat <
>;
def : GCNPat <
- (v2i16 (build_vector (i16 SReg_32:$src0), (i16 undef))),
+ (v2i16 (UniformBinFrag<build_vector> (i16 SReg_32:$src0), (i16 undef))),
(COPY_TO_REGCLASS SReg_32:$src0, SReg_32)
>;
def : GCNPat <
- (v2i16 (build_vector (i16 VGPR_32:$src0), (i16 undef))),
+ (v2i16 (DivergentBinFrag<build_vector> (i16 VGPR_32:$src0), (i16 undef))),
(COPY_TO_REGCLASS VGPR_32:$src0, VGPR_32)
>;
@@ -2597,6 +2736,15 @@ def : GCNPat <
>;
} // End SubtargetPredicate = HasVOP3PInsts
+// With multiple uses of the shift, this will duplicate the shift and
+// increase register pressure.
+let SubtargetPredicate = isGFX11Plus in
+def : GCNPat <
+ (v2i16 (build_vector (i16 (trunc (srl_oneuse SReg_32:$src0, (i32 16)))), (i16 SReg_32:$src1))),
+ (v2i16 (S_PACK_HL_B32_B16 SReg_32:$src0, SReg_32:$src1))
+>;
+
+
def : GCNPat <
(v2f16 (scalar_to_vector f16:$src0)),
(COPY $src0)
@@ -2678,18 +2826,18 @@ def : GCNPat <
// an inline immediate than -c.
// TODO: Also do for 64-bit.
def : GCNPat<
- (add i32:$src0, (i32 NegSubInlineConst32:$src1)),
+ (UniformBinFrag<add> i32:$src0, (i32 NegSubInlineConst32:$src1)),
(S_SUB_I32 SReg_32:$src0, NegSubInlineConst32:$src1)
>;
def : GCNPat<
- (add i32:$src0, (i32 NegSubInlineConst32:$src1)),
+ (DivergentBinFrag<add> i32:$src0, (i32 NegSubInlineConst32:$src1)),
(V_SUB_U32_e64 VS_32:$src0, NegSubInlineConst32:$src1)> {
let SubtargetPredicate = HasAddNoCarryInsts;
}
def : GCNPat<
- (add i32:$src0, (i32 NegSubInlineConst32:$src1)),
+ (DivergentBinFrag<add> i32:$src0, (i32 NegSubInlineConst32:$src1)),
(V_SUB_CO_U32_e64 VS_32:$src0, NegSubInlineConst32:$src1)> {
let SubtargetPredicate = NotHasAddNoCarryInsts;
}
@@ -2703,20 +2851,21 @@ def : GCNPat<
(S_MOV_B32 SReg_32:$src)
>;
-multiclass BFMPatterns <ValueType vt, InstSI BFM, InstSI MOV> {
+multiclass BFMPatterns <ValueType vt, PatFrag SHL, PatFrag ADD, InstSI BFM> {
def : GCNPat <
- (vt (shl (vt (add (vt (shl 1, vt:$a)), -1)), vt:$b)),
+ (vt (SHL (vt (add (vt (shl 1, vt:$a)), -1)), vt:$b)),
(BFM $a, $b)
>;
def : GCNPat <
- (vt (add (vt (shl 1, vt:$a)), -1)),
- (BFM $a, (MOV (i32 0)))
+ (vt (ADD (vt (shl 1, vt:$a)), -1)),
+ (BFM $a, (i32 0))
>;
}
-defm : BFMPatterns <i32, S_BFM_B32, S_MOV_B32>;
-// FIXME: defm : BFMPatterns <i64, S_BFM_B64, S_MOV_B64>;
+defm : BFMPatterns <i32, UniformBinFrag<shl>, UniformBinFrag<add>, S_BFM_B32>;
+// FIXME: defm : BFMPatterns <i64, UniformBinFrag<shl>, UniformBinFrag<add>, S_BFM_B64>;
+defm : BFMPatterns <i32, DivergentBinFrag<shl>, DivergentBinFrag<add>, V_BFM_B32_e64>;
// Bitfield extract patterns
@@ -3007,6 +3156,19 @@ def G_AMDGPU_CLAMP : AMDGPUGenericInstruction {
let hasSideEffects = 0;
}
+// Integer multiply-add: arg0 * arg1 + arg2.
+//
+// arg0 and arg1 are 32-bit integers (interpreted as signed or unsigned),
+// arg2 is a 64-bit integer. Result is a 64-bit integer and a 1-bit carry-out.
+class G_AMDGPU_MAD_64_32 : AMDGPUGenericInstruction {
+ let OutOperandList = (outs type0:$dst, type1:$carry_out);
+ let InOperandList = (ins type2:$arg0, type2:$arg1, type0:$arg2);
+ let hasSideEffects = 0;
+}
+
+def G_AMDGPU_MAD_U64_U32 : G_AMDGPU_MAD_64_32;
+def G_AMDGPU_MAD_I64_I32 : G_AMDGPU_MAD_64_32;
+
// Atomic cmpxchg. $cmpval ad $newval are packed in a single vector
// operand Expects a MachineMemOperand in addition to explicit
// operands.
@@ -3130,3 +3292,15 @@ def G_SI_CALL : AMDGPUGenericInstruction {
// TODO: Should really base this on the call target
let isConvergent = 1;
}
+
+def G_FPTRUNC_ROUND_UPWARD : AMDGPUGenericInstruction {
+ let OutOperandList = (outs type0:$vdst);
+ let InOperandList = (ins type1:$src0);
+ let hasSideEffects = 0;
+}
+
+def G_FPTRUNC_ROUND_DOWNWARD : AMDGPUGenericInstruction {
+ let OutOperandList = (outs type0:$vdst);
+ let InOperandList = (ins type1:$src0);
+ let hasSideEffects = 0;
+}
diff --git a/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp b/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp
index 4fa8ec711134..47095ae22027 100644
--- a/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp
@@ -72,16 +72,22 @@ static void generateEndPgm(MachineBasicBlock &MBB,
bool IsPS = F.getCallingConv() == CallingConv::AMDGPU_PS;
// Check if hardware has been configured to expect color or depth exports.
- bool HasExports =
- AMDGPU::getHasColorExport(F) || AMDGPU::getHasDepthExport(F);
+ bool HasColorExports = AMDGPU::getHasColorExport(F);
+ bool HasDepthExports = AMDGPU::getHasDepthExport(F);
+ bool HasExports = HasColorExports || HasDepthExports;
// Prior to GFX10, hardware always expects at least one export for PS.
bool MustExport = !AMDGPU::isGFX10Plus(TII->getSubtarget());
if (IsPS && (HasExports || MustExport)) {
// Generate "null export" if hardware is expecting PS to export.
+ const GCNSubtarget &ST = MBB.getParent()->getSubtarget<GCNSubtarget>();
+ int Target =
+ ST.hasNullExportTarget()
+ ? AMDGPU::Exp::ET_NULL
+ : (HasColorExports ? AMDGPU::Exp::ET_MRT0 : AMDGPU::Exp::ET_MRTZ);
BuildMI(MBB, I, DL, TII->get(AMDGPU::EXP_DONE))
- .addImm(AMDGPU::Exp::ET_NULL)
+ .addImm(Target)
.addReg(AMDGPU::VGPR0, RegState::Undef)
.addReg(AMDGPU::VGPR0, RegState::Undef)
.addReg(AMDGPU::VGPR0, RegState::Undef)
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 44bdbe37dec0..6d4e1d2c898b 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -79,6 +79,13 @@ enum InstClassEnum {
MIMG,
TBUFFER_LOAD,
TBUFFER_STORE,
+ GLOBAL_LOAD_SADDR,
+ GLOBAL_STORE_SADDR,
+ FLAT_LOAD,
+ FLAT_STORE,
+ GLOBAL_LOAD, // GLOBAL_LOAD/GLOBAL_STORE are never used as the InstClass of
+ GLOBAL_STORE // any CombineInfo, they are only ever returned by
+ // getCommonInstClass.
};
struct AddressRegs {
@@ -86,6 +93,7 @@ struct AddressRegs {
bool SBase = false;
bool SRsrc = false;
bool SOffset = false;
+ bool SAddr = false;
bool VAddr = false;
bool Addr = false;
bool SSamp = false;
@@ -160,6 +168,11 @@ class SILoadStoreOptimizer : public MachineFunctionPass {
}
void setMI(MachineBasicBlock::iterator MI, const SILoadStoreOptimizer &LSO);
+
+ // Compare by pointer order.
+ bool operator<(const CombineInfo& Other) const {
+ return (InstClass == MIMG) ? DMask < Other.DMask : Offset < Other.Offset;
+ }
};
struct BaseRegisters {
@@ -185,6 +198,9 @@ private:
AliasAnalysis *AA = nullptr;
bool OptimizeAgain;
+ bool canSwapInstructions(const DenseSet<Register> &ARegDefs,
+ const DenseSet<Register> &ARegUses,
+ const MachineInstr &A, const MachineInstr &B) const;
static bool dmasksCanBeCombined(const CombineInfo &CI,
const SIInstrInfo &TII,
const CombineInfo &Paired);
@@ -199,38 +215,43 @@ private:
const CombineInfo &Paired);
const TargetRegisterClass *getDataRegClass(const MachineInstr &MI) const;
- bool checkAndPrepareMerge(CombineInfo &CI, CombineInfo &Paired,
- SmallVectorImpl<MachineInstr *> &InstsToMove);
+ CombineInfo *checkAndPrepareMerge(CombineInfo &CI, CombineInfo &Paired);
unsigned read2Opcode(unsigned EltSize) const;
unsigned read2ST64Opcode(unsigned EltSize) const;
- MachineBasicBlock::iterator mergeRead2Pair(CombineInfo &CI,
- CombineInfo &Paired,
- const SmallVectorImpl<MachineInstr *> &InstsToMove);
+ MachineBasicBlock::iterator
+ mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired,
+ MachineBasicBlock::iterator InsertBefore);
unsigned write2Opcode(unsigned EltSize) const;
unsigned write2ST64Opcode(unsigned EltSize) const;
MachineBasicBlock::iterator
mergeWrite2Pair(CombineInfo &CI, CombineInfo &Paired,
- const SmallVectorImpl<MachineInstr *> &InstsToMove);
+ MachineBasicBlock::iterator InsertBefore);
MachineBasicBlock::iterator
mergeImagePair(CombineInfo &CI, CombineInfo &Paired,
- const SmallVectorImpl<MachineInstr *> &InstsToMove);
+ MachineBasicBlock::iterator InsertBefore);
MachineBasicBlock::iterator
mergeSBufferLoadImmPair(CombineInfo &CI, CombineInfo &Paired,
- const SmallVectorImpl<MachineInstr *> &InstsToMove);
+ MachineBasicBlock::iterator InsertBefore);
MachineBasicBlock::iterator
mergeBufferLoadPair(CombineInfo &CI, CombineInfo &Paired,
- const SmallVectorImpl<MachineInstr *> &InstsToMove);
+ MachineBasicBlock::iterator InsertBefore);
MachineBasicBlock::iterator
mergeBufferStorePair(CombineInfo &CI, CombineInfo &Paired,
- const SmallVectorImpl<MachineInstr *> &InstsToMove);
+ MachineBasicBlock::iterator InsertBefore);
MachineBasicBlock::iterator
mergeTBufferLoadPair(CombineInfo &CI, CombineInfo &Paired,
- const SmallVectorImpl<MachineInstr *> &InstsToMove);
+ MachineBasicBlock::iterator InsertBefore);
MachineBasicBlock::iterator
mergeTBufferStorePair(CombineInfo &CI, CombineInfo &Paired,
- const SmallVectorImpl<MachineInstr *> &InstsToMove);
+ MachineBasicBlock::iterator InsertBefore);
+ MachineBasicBlock::iterator
+ mergeFlatLoadPair(CombineInfo &CI, CombineInfo &Paired,
+ MachineBasicBlock::iterator InsertBefore);
+ MachineBasicBlock::iterator
+ mergeFlatStorePair(CombineInfo &CI, CombineInfo &Paired,
+ MachineBasicBlock::iterator InsertBefore);
void updateBaseAndOffset(MachineInstr &I, Register NewBase,
int32_t NewOffset) const;
@@ -252,6 +273,12 @@ private:
MemInfoMap &Visited, SmallPtrSet<MachineInstr *, 4> &AnchorList,
std::list<std::list<CombineInfo>> &MergeableInsts) const;
+ static MachineMemOperand *combineKnownAdjacentMMOs(const CombineInfo &CI,
+ const CombineInfo &Paired);
+
+ static InstClassEnum getCommonInstClass(const CombineInfo &CI,
+ const CombineInfo &Paired);
+
public:
static char ID;
@@ -298,10 +325,35 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
switch (Opc) {
case AMDGPU::S_BUFFER_LOAD_DWORD_IMM:
+ case AMDGPU::GLOBAL_LOAD_DWORD:
+ case AMDGPU::GLOBAL_LOAD_DWORD_SADDR:
+ case AMDGPU::GLOBAL_STORE_DWORD:
+ case AMDGPU::GLOBAL_STORE_DWORD_SADDR:
+ case AMDGPU::FLAT_LOAD_DWORD:
+ case AMDGPU::FLAT_STORE_DWORD:
return 1;
case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
+ case AMDGPU::GLOBAL_LOAD_DWORDX2:
+ case AMDGPU::GLOBAL_LOAD_DWORDX2_SADDR:
+ case AMDGPU::GLOBAL_STORE_DWORDX2:
+ case AMDGPU::GLOBAL_STORE_DWORDX2_SADDR:
+ case AMDGPU::FLAT_LOAD_DWORDX2:
+ case AMDGPU::FLAT_STORE_DWORDX2:
return 2;
+ case AMDGPU::GLOBAL_LOAD_DWORDX3:
+ case AMDGPU::GLOBAL_LOAD_DWORDX3_SADDR:
+ case AMDGPU::GLOBAL_STORE_DWORDX3:
+ case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR:
+ case AMDGPU::FLAT_LOAD_DWORDX3:
+ case AMDGPU::FLAT_STORE_DWORDX3:
+ return 3;
case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
+ case AMDGPU::GLOBAL_LOAD_DWORDX4:
+ case AMDGPU::GLOBAL_LOAD_DWORDX4_SADDR:
+ case AMDGPU::GLOBAL_STORE_DWORDX4:
+ case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR:
+ case AMDGPU::FLAT_LOAD_DWORDX4:
+ case AMDGPU::FLAT_STORE_DWORDX4:
return 4;
case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM:
return 8;
@@ -386,11 +438,40 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) {
case AMDGPU::DS_WRITE_B64:
case AMDGPU::DS_WRITE_B64_gfx9:
return DS_WRITE;
+ case AMDGPU::GLOBAL_LOAD_DWORD:
+ case AMDGPU::GLOBAL_LOAD_DWORDX2:
+ case AMDGPU::GLOBAL_LOAD_DWORDX3:
+ case AMDGPU::GLOBAL_LOAD_DWORDX4:
+ case AMDGPU::FLAT_LOAD_DWORD:
+ case AMDGPU::FLAT_LOAD_DWORDX2:
+ case AMDGPU::FLAT_LOAD_DWORDX3:
+ case AMDGPU::FLAT_LOAD_DWORDX4:
+ return FLAT_LOAD;
+ case AMDGPU::GLOBAL_LOAD_DWORD_SADDR:
+ case AMDGPU::GLOBAL_LOAD_DWORDX2_SADDR:
+ case AMDGPU::GLOBAL_LOAD_DWORDX3_SADDR:
+ case AMDGPU::GLOBAL_LOAD_DWORDX4_SADDR:
+ return GLOBAL_LOAD_SADDR;
+ case AMDGPU::GLOBAL_STORE_DWORD:
+ case AMDGPU::GLOBAL_STORE_DWORDX2:
+ case AMDGPU::GLOBAL_STORE_DWORDX3:
+ case AMDGPU::GLOBAL_STORE_DWORDX4:
+ case AMDGPU::FLAT_STORE_DWORD:
+ case AMDGPU::FLAT_STORE_DWORDX2:
+ case AMDGPU::FLAT_STORE_DWORDX3:
+ case AMDGPU::FLAT_STORE_DWORDX4:
+ return FLAT_STORE;
+ case AMDGPU::GLOBAL_STORE_DWORD_SADDR:
+ case AMDGPU::GLOBAL_STORE_DWORDX2_SADDR:
+ case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR:
+ case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR:
+ return GLOBAL_STORE_SADDR;
}
}
/// Determines instruction subclass from opcode. Only instructions
-/// of the same subclass can be merged together.
+/// of the same subclass can be merged together. The merged instruction may have
+/// a different subclass but must have the same class.
static unsigned getInstSubclass(unsigned Opc, const SIInstrInfo &TII) {
switch (Opc) {
default:
@@ -418,9 +499,55 @@ static unsigned getInstSubclass(unsigned Opc, const SIInstrInfo &TII) {
case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM:
return AMDGPU::S_BUFFER_LOAD_DWORD_IMM;
+ case AMDGPU::GLOBAL_LOAD_DWORD:
+ case AMDGPU::GLOBAL_LOAD_DWORDX2:
+ case AMDGPU::GLOBAL_LOAD_DWORDX3:
+ case AMDGPU::GLOBAL_LOAD_DWORDX4:
+ case AMDGPU::FLAT_LOAD_DWORD:
+ case AMDGPU::FLAT_LOAD_DWORDX2:
+ case AMDGPU::FLAT_LOAD_DWORDX3:
+ case AMDGPU::FLAT_LOAD_DWORDX4:
+ return AMDGPU::FLAT_LOAD_DWORD;
+ case AMDGPU::GLOBAL_LOAD_DWORD_SADDR:
+ case AMDGPU::GLOBAL_LOAD_DWORDX2_SADDR:
+ case AMDGPU::GLOBAL_LOAD_DWORDX3_SADDR:
+ case AMDGPU::GLOBAL_LOAD_DWORDX4_SADDR:
+ return AMDGPU::GLOBAL_LOAD_DWORD_SADDR;
+ case AMDGPU::GLOBAL_STORE_DWORD:
+ case AMDGPU::GLOBAL_STORE_DWORDX2:
+ case AMDGPU::GLOBAL_STORE_DWORDX3:
+ case AMDGPU::GLOBAL_STORE_DWORDX4:
+ case AMDGPU::FLAT_STORE_DWORD:
+ case AMDGPU::FLAT_STORE_DWORDX2:
+ case AMDGPU::FLAT_STORE_DWORDX3:
+ case AMDGPU::FLAT_STORE_DWORDX4:
+ return AMDGPU::FLAT_STORE_DWORD;
+ case AMDGPU::GLOBAL_STORE_DWORD_SADDR:
+ case AMDGPU::GLOBAL_STORE_DWORDX2_SADDR:
+ case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR:
+ case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR:
+ return AMDGPU::GLOBAL_STORE_DWORD_SADDR;
}
}
+// GLOBAL loads and stores are classified as FLAT initially. If both combined
+// instructions are FLAT GLOBAL adjust the class to GLOBAL_LOAD or GLOBAL_STORE.
+// If either or both instructions are non segment specific FLAT the resulting
+// combined operation will be FLAT, potentially promoting one of the GLOBAL
+// operations to FLAT.
+// For other instructions return the original unmodified class.
+InstClassEnum
+SILoadStoreOptimizer::getCommonInstClass(const CombineInfo &CI,
+ const CombineInfo &Paired) {
+ assert(CI.InstClass == Paired.InstClass);
+
+ if ((CI.InstClass == FLAT_LOAD || CI.InstClass == FLAT_STORE) &&
+ SIInstrInfo::isFLATGlobal(*CI.I) && SIInstrInfo::isFLATGlobal(*Paired.I))
+ return (CI.InstClass == FLAT_STORE) ? GLOBAL_STORE : GLOBAL_LOAD;
+
+ return CI.InstClass;
+}
+
static AddressRegs getRegs(unsigned Opc, const SIInstrInfo &TII) {
AddressRegs Result;
@@ -480,6 +607,34 @@ static AddressRegs getRegs(unsigned Opc, const SIInstrInfo &TII) {
case AMDGPU::DS_WRITE_B64_gfx9:
Result.Addr = true;
return Result;
+ case AMDGPU::GLOBAL_LOAD_DWORD_SADDR:
+ case AMDGPU::GLOBAL_LOAD_DWORDX2_SADDR:
+ case AMDGPU::GLOBAL_LOAD_DWORDX3_SADDR:
+ case AMDGPU::GLOBAL_LOAD_DWORDX4_SADDR:
+ case AMDGPU::GLOBAL_STORE_DWORD_SADDR:
+ case AMDGPU::GLOBAL_STORE_DWORDX2_SADDR:
+ case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR:
+ case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR:
+ Result.SAddr = true;
+ LLVM_FALLTHROUGH;
+ case AMDGPU::GLOBAL_LOAD_DWORD:
+ case AMDGPU::GLOBAL_LOAD_DWORDX2:
+ case AMDGPU::GLOBAL_LOAD_DWORDX3:
+ case AMDGPU::GLOBAL_LOAD_DWORDX4:
+ case AMDGPU::GLOBAL_STORE_DWORD:
+ case AMDGPU::GLOBAL_STORE_DWORDX2:
+ case AMDGPU::GLOBAL_STORE_DWORDX3:
+ case AMDGPU::GLOBAL_STORE_DWORDX4:
+ case AMDGPU::FLAT_LOAD_DWORD:
+ case AMDGPU::FLAT_LOAD_DWORDX2:
+ case AMDGPU::FLAT_LOAD_DWORDX3:
+ case AMDGPU::FLAT_LOAD_DWORDX4:
+ case AMDGPU::FLAT_STORE_DWORD:
+ case AMDGPU::FLAT_STORE_DWORDX2:
+ case AMDGPU::FLAT_STORE_DWORDX3:
+ case AMDGPU::FLAT_STORE_DWORDX4:
+ Result.VAddr = true;
+ return Result;
}
}
@@ -551,6 +706,9 @@ void SILoadStoreOptimizer::CombineInfo::setMI(MachineBasicBlock::iterator MI,
if (Regs.SOffset)
AddrIdx[NumAddresses++] =
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::soffset);
+ if (Regs.SAddr)
+ AddrIdx[NumAddresses++] =
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr);
if (Regs.VAddr)
AddrIdx[NumAddresses++] =
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr);
@@ -579,92 +737,58 @@ FunctionPass *llvm::createSILoadStoreOptimizerPass() {
return new SILoadStoreOptimizer();
}
-static void moveInstsAfter(MachineBasicBlock::iterator I,
- ArrayRef<MachineInstr *> InstsToMove) {
- MachineBasicBlock *MBB = I->getParent();
- ++I;
- for (MachineInstr *MI : InstsToMove) {
- MI->removeFromParent();
- MBB->insert(I, MI);
- }
-}
-
static void addDefsUsesToList(const MachineInstr &MI,
DenseSet<Register> &RegDefs,
- DenseSet<Register> &PhysRegUses) {
- for (const MachineOperand &Op : MI.operands()) {
- if (Op.isReg()) {
- if (Op.isDef())
- RegDefs.insert(Op.getReg());
- else if (Op.readsReg() && Op.getReg().isPhysical())
- PhysRegUses.insert(Op.getReg());
- }
- }
-}
-
-static bool memAccessesCanBeReordered(MachineBasicBlock::iterator A,
- MachineBasicBlock::iterator B,
- AliasAnalysis *AA) {
- // RAW or WAR - cannot reorder
- // WAW - cannot reorder
- // RAR - safe to reorder
- return !(A->mayStore() || B->mayStore()) || !A->mayAlias(AA, *B, true);
-}
-
-// Add MI and its defs to the lists if MI reads one of the defs that are
-// already in the list. Returns true in that case.
-static bool addToListsIfDependent(MachineInstr &MI, DenseSet<Register> &RegDefs,
- DenseSet<Register> &PhysRegUses,
- SmallVectorImpl<MachineInstr *> &Insts) {
- for (MachineOperand &Use : MI.operands()) {
- // If one of the defs is read, then there is a use of Def between I and the
- // instruction that I will potentially be merged with. We will need to move
- // this instruction after the merged instructions.
- //
- // Similarly, if there is a def which is read by an instruction that is to
- // be moved for merging, then we need to move the def-instruction as well.
- // This can only happen for physical registers such as M0; virtual
- // registers are in SSA form.
- if (Use.isReg() && ((Use.readsReg() && RegDefs.count(Use.getReg())) ||
- (Use.isDef() && RegDefs.count(Use.getReg())) ||
- (Use.isDef() && Use.getReg().isPhysical() &&
- PhysRegUses.count(Use.getReg())))) {
- Insts.push_back(&MI);
- addDefsUsesToList(MI, RegDefs, PhysRegUses);
- return true;
- }
+ DenseSet<Register> &RegUses) {
+ for (const auto &Op : MI.operands()) {
+ if (!Op.isReg())
+ continue;
+ if (Op.isDef())
+ RegDefs.insert(Op.getReg());
+ if (Op.readsReg())
+ RegUses.insert(Op.getReg());
}
-
- return false;
}
-static bool canMoveInstsAcrossMemOp(MachineInstr &MemOp,
- ArrayRef<MachineInstr *> InstsToMove,
- AliasAnalysis *AA) {
- assert(MemOp.mayLoadOrStore());
-
- for (MachineInstr *InstToMove : InstsToMove) {
- if (!InstToMove->mayLoadOrStore())
+bool SILoadStoreOptimizer::canSwapInstructions(
+ const DenseSet<Register> &ARegDefs, const DenseSet<Register> &ARegUses,
+ const MachineInstr &A, const MachineInstr &B) const {
+ if (A.mayLoadOrStore() && B.mayLoadOrStore() &&
+ (A.mayStore() || B.mayStore()) && A.mayAlias(AA, B, true))
+ return false;
+ for (const auto &BOp : B.operands()) {
+ if (!BOp.isReg())
continue;
- if (!memAccessesCanBeReordered(MemOp, *InstToMove, AA))
+ if ((BOp.isDef() || BOp.readsReg()) && ARegDefs.contains(BOp.getReg()))
+ return false;
+ if (BOp.isDef() && ARegUses.contains(BOp.getReg()))
return false;
}
return true;
}
-// This function assumes that \p A and \p B have are identical except for
-// size and offset, and they reference adjacent memory.
-static MachineMemOperand *combineKnownAdjacentMMOs(MachineFunction &MF,
- const MachineMemOperand *A,
- const MachineMemOperand *B) {
- unsigned MinOffset = std::min(A->getOffset(), B->getOffset());
- unsigned Size = A->getSize() + B->getSize();
- // This function adds the offset parameter to the existing offset for A,
- // so we pass 0 here as the offset and then manually set it to the correct
- // value after the call.
- MachineMemOperand *MMO = MF.getMachineMemOperand(A, 0, Size);
- MMO->setOffset(MinOffset);
- return MMO;
+// Given that \p CI and \p Paired are adjacent memory operations produce a new
+// MMO for the combined operation with a new access size.
+MachineMemOperand *
+SILoadStoreOptimizer::combineKnownAdjacentMMOs(const CombineInfo &CI,
+ const CombineInfo &Paired) {
+ const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
+ const MachineMemOperand *MMOb = *Paired.I->memoperands_begin();
+
+ unsigned Size = MMOa->getSize() + MMOb->getSize();
+
+ // A base pointer for the combined operation is the same as the leading
+ // operation's pointer.
+ if (Paired < CI)
+ std::swap(MMOa, MMOb);
+
+ MachinePointerInfo PtrInfo(MMOa->getPointerInfo());
+ // If merging FLAT and GLOBAL set address space to FLAT.
+ if (MMOb->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS)
+ PtrInfo.AddrSpace = AMDGPUAS::FLAT_ADDRESS;
+
+ MachineFunction *MF = CI.I->getMF();
+ return MF->getMachineMemOperand(MMOa, PtrInfo, Size);
}
bool SILoadStoreOptimizer::dmasksCanBeCombined(const CombineInfo &CI,
@@ -787,8 +911,7 @@ bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI,
if ((CI.InstClass != DS_READ) && (CI.InstClass != DS_WRITE)) {
return (EltOffset0 + CI.Width == EltOffset1 ||
EltOffset1 + Paired.Width == EltOffset0) &&
- CI.CPol == Paired.CPol &&
- (CI.InstClass == S_BUFFER_LOAD_IMM || CI.CPol == Paired.CPol);
+ CI.CPol == Paired.CPol;
}
// If the offset in elements doesn't fit in 8-bits, we might be able to use
@@ -889,111 +1012,59 @@ SILoadStoreOptimizer::getDataRegClass(const MachineInstr &MI) const {
return nullptr;
}
-/// This function assumes that CI comes before Paired in a basic block.
-bool SILoadStoreOptimizer::checkAndPrepareMerge(
- CombineInfo &CI, CombineInfo &Paired,
- SmallVectorImpl<MachineInstr *> &InstsToMove) {
+/// This function assumes that CI comes before Paired in a basic block. Return
+/// an insertion point for the merged instruction or nullptr on failure.
+SILoadStoreOptimizer::CombineInfo *
+SILoadStoreOptimizer::checkAndPrepareMerge(CombineInfo &CI,
+ CombineInfo &Paired) {
+ // If another instruction has already been merged into CI, it may now be a
+ // type that we can't do any further merging into.
+ if (CI.InstClass == UNKNOWN || Paired.InstClass == UNKNOWN)
+ return nullptr;
+ assert(CI.InstClass == Paired.InstClass);
+
+ if (getInstSubclass(CI.I->getOpcode(), *TII) !=
+ getInstSubclass(Paired.I->getOpcode(), *TII))
+ return nullptr;
// Check both offsets (or masks for MIMG) can be combined and fit in the
// reduced range.
- if (CI.InstClass == MIMG && !dmasksCanBeCombined(CI, *TII, Paired))
- return false;
-
- if (CI.InstClass != MIMG &&
- (!widthsFit(*STM, CI, Paired) || !offsetsCanBeCombined(CI, *STM, Paired)))
- return false;
-
- const unsigned Opc = CI.I->getOpcode();
- const InstClassEnum InstClass = getInstClass(Opc, *TII);
-
- if (InstClass == UNKNOWN) {
- return false;
+ if (CI.InstClass == MIMG) {
+ if (!dmasksCanBeCombined(CI, *TII, Paired))
+ return nullptr;
+ } else {
+ if (!widthsFit(*STM, CI, Paired) || !offsetsCanBeCombined(CI, *STM, Paired))
+ return nullptr;
}
- const unsigned InstSubclass = getInstSubclass(Opc, *TII);
-
- DenseSet<Register> RegDefsToMove;
- DenseSet<Register> PhysRegUsesToMove;
- addDefsUsesToList(*CI.I, RegDefsToMove, PhysRegUsesToMove);
-
- MachineBasicBlock::iterator E = std::next(Paired.I);
- MachineBasicBlock::iterator MBBI = std::next(CI.I);
- MachineBasicBlock::iterator MBBE = CI.I->getParent()->end();
- for (; MBBI != E; ++MBBI) {
-
- if (MBBI == MBBE) {
- // CombineInfo::Order is a hint on the instruction ordering within the
- // basic block. This hint suggests that CI precedes Paired, which is
- // true most of the time. However, moveInstsAfter() processing a
- // previous list may have changed this order in a situation when it
- // moves an instruction which exists in some other merge list.
- // In this case it must be dependent.
- return false;
- }
-
- if ((getInstClass(MBBI->getOpcode(), *TII) != InstClass) ||
- (getInstSubclass(MBBI->getOpcode(), *TII) != InstSubclass)) {
- // This is not a matching instruction, but we can keep looking as
- // long as one of these conditions are met:
- // 1. It is safe to move I down past MBBI.
- // 2. It is safe to move MBBI down past the instruction that I will
- // be merged into.
-
- if (MBBI->mayLoadOrStore() &&
- (!memAccessesCanBeReordered(*CI.I, *MBBI, AA) ||
- !canMoveInstsAcrossMemOp(*MBBI, InstsToMove, AA))) {
- // We fail condition #1, but we may still be able to satisfy condition
- // #2. Add this instruction to the move list and then we will check
- // if condition #2 holds once we have selected the matching instruction.
- InstsToMove.push_back(&*MBBI);
- addDefsUsesToList(*MBBI, RegDefsToMove, PhysRegUsesToMove);
- continue;
- }
- // When we match I with another DS instruction we will be moving I down
- // to the location of the matched instruction any uses of I will need to
- // be moved down as well.
- addToListsIfDependent(*MBBI, RegDefsToMove, PhysRegUsesToMove,
- InstsToMove);
- continue;
+ DenseSet<Register> RegDefs;
+ DenseSet<Register> RegUses;
+ CombineInfo *Where;
+ if (CI.I->mayLoad()) {
+ // Try to hoist Paired up to CI.
+ addDefsUsesToList(*Paired.I, RegDefs, RegUses);
+ for (MachineBasicBlock::iterator MBBI = Paired.I; --MBBI != CI.I;) {
+ if (!canSwapInstructions(RegDefs, RegUses, *Paired.I, *MBBI))
+ return nullptr;
}
-
- // Handle a case like
- // DS_WRITE_B32 addr, v, idx0
- // w = DS_READ_B32 addr, idx0
- // DS_WRITE_B32 addr, f(w), idx1
- // where the DS_READ_B32 ends up in InstsToMove and therefore prevents
- // merging of the two writes.
- if (addToListsIfDependent(*MBBI, RegDefsToMove, PhysRegUsesToMove,
- InstsToMove))
- continue;
-
- if (&*MBBI == &*Paired.I) {
- // We need to go through the list of instructions that we plan to
- // move and make sure they are all safe to move down past the merged
- // instruction.
- if (canMoveInstsAcrossMemOp(*MBBI, InstsToMove, AA)) {
-
- // Call offsetsCanBeCombined with modify = true so that the offsets are
- // correct for the new instruction. This should return true, because
- // this function should only be called on CombineInfo objects that
- // have already been confirmed to be mergeable.
- if (CI.InstClass != MIMG)
- offsetsCanBeCombined(CI, *STM, Paired, true);
- return true;
- }
- return false;
+ Where = &CI;
+ } else {
+ // Try to sink CI down to Paired.
+ addDefsUsesToList(*CI.I, RegDefs, RegUses);
+ for (MachineBasicBlock::iterator MBBI = CI.I; ++MBBI != Paired.I;) {
+ if (!canSwapInstructions(RegDefs, RegUses, *CI.I, *MBBI))
+ return nullptr;
}
-
- // We've found a load/store that we couldn't merge for some reason.
- // We could potentially keep looking, but we'd need to make sure that
- // it was safe to move I and also all the instruction in InstsToMove
- // down past this instruction.
- // check if we can move I across MBBI and if we can move all I's users
- if (!memAccessesCanBeReordered(*CI.I, *MBBI, AA) ||
- !canMoveInstsAcrossMemOp(*MBBI, InstsToMove, AA))
- break;
+ Where = &Paired;
}
- return false;
+
+ // Call offsetsCanBeCombined with modify = true so that the offsets are
+ // correct for the new instruction. This should return true, because
+ // this function should only be called on CombineInfo objects that
+ // have already been confirmed to be mergeable.
+ if (CI.InstClass == DS_READ || CI.InstClass == DS_WRITE)
+ offsetsCanBeCombined(CI, *STM, Paired, true);
+ return Where;
}
unsigned SILoadStoreOptimizer::read2Opcode(unsigned EltSize) const {
@@ -1012,7 +1083,7 @@ unsigned SILoadStoreOptimizer::read2ST64Opcode(unsigned EltSize) const {
MachineBasicBlock::iterator
SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired,
- const SmallVectorImpl<MachineInstr *> &InstsToMove) {
+ MachineBasicBlock::iterator InsertBefore) {
MachineBasicBlock *MBB = CI.I->getParent();
// Be careful, since the addresses could be subregisters themselves in weird
@@ -1051,13 +1122,13 @@ SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired,
unsigned BaseRegFlags = 0;
if (CI.BaseOff) {
Register ImmReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
- BuildMI(*MBB, Paired.I, DL, TII->get(AMDGPU::S_MOV_B32), ImmReg)
+ BuildMI(*MBB, InsertBefore, DL, TII->get(AMDGPU::S_MOV_B32), ImmReg)
.addImm(CI.BaseOff);
BaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
BaseRegFlags = RegState::Kill;
- TII->getAddNoCarry(*MBB, Paired.I, DL, BaseReg)
+ TII->getAddNoCarry(*MBB, InsertBefore, DL, BaseReg)
.addReg(ImmReg)
.addReg(AddrReg->getReg(), 0, BaseSubReg)
.addImm(0); // clamp bit
@@ -1065,7 +1136,7 @@ SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired,
}
MachineInstrBuilder Read2 =
- BuildMI(*MBB, Paired.I, DL, Read2Desc, DestReg)
+ BuildMI(*MBB, InsertBefore, DL, Read2Desc, DestReg)
.addReg(BaseReg, BaseRegFlags, BaseSubReg) // addr
.addImm(NewOffset0) // offset0
.addImm(NewOffset1) // offset1
@@ -1077,14 +1148,12 @@ SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI, CombineInfo &Paired,
const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY);
// Copy to the old destination registers.
- BuildMI(*MBB, Paired.I, DL, CopyDesc)
+ BuildMI(*MBB, InsertBefore, DL, CopyDesc)
.add(*Dest0) // Copy to same destination including flags and sub reg.
.addReg(DestReg, 0, SubRegIdx0);
- MachineInstr *Copy1 = BuildMI(*MBB, Paired.I, DL, CopyDesc)
- .add(*Dest1)
- .addReg(DestReg, RegState::Kill, SubRegIdx1);
-
- moveInstsAfter(Copy1, InstsToMove);
+ BuildMI(*MBB, InsertBefore, DL, CopyDesc)
+ .add(*Dest1)
+ .addReg(DestReg, RegState::Kill, SubRegIdx1);
CI.I->eraseFromParent();
Paired.I->eraseFromParent();
@@ -1109,9 +1178,9 @@ unsigned SILoadStoreOptimizer::write2ST64Opcode(unsigned EltSize) const {
: AMDGPU::DS_WRITE2ST64_B64_gfx9;
}
-MachineBasicBlock::iterator
-SILoadStoreOptimizer::mergeWrite2Pair(CombineInfo &CI, CombineInfo &Paired,
- const SmallVectorImpl<MachineInstr *> &InstsToMove) {
+MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
+ CombineInfo &CI, CombineInfo &Paired,
+ MachineBasicBlock::iterator InsertBefore) {
MachineBasicBlock *MBB = CI.I->getParent();
// Be sure to use .addOperand(), and not .addReg() with these. We want to be
@@ -1145,13 +1214,13 @@ SILoadStoreOptimizer::mergeWrite2Pair(CombineInfo &CI, CombineInfo &Paired,
unsigned BaseRegFlags = 0;
if (CI.BaseOff) {
Register ImmReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
- BuildMI(*MBB, Paired.I, DL, TII->get(AMDGPU::S_MOV_B32), ImmReg)
+ BuildMI(*MBB, InsertBefore, DL, TII->get(AMDGPU::S_MOV_B32), ImmReg)
.addImm(CI.BaseOff);
BaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
BaseRegFlags = RegState::Kill;
- TII->getAddNoCarry(*MBB, Paired.I, DL, BaseReg)
+ TII->getAddNoCarry(*MBB, InsertBefore, DL, BaseReg)
.addReg(ImmReg)
.addReg(AddrReg->getReg(), 0, BaseSubReg)
.addImm(0); // clamp bit
@@ -1159,7 +1228,7 @@ SILoadStoreOptimizer::mergeWrite2Pair(CombineInfo &CI, CombineInfo &Paired,
}
MachineInstrBuilder Write2 =
- BuildMI(*MBB, Paired.I, DL, Write2Desc)
+ BuildMI(*MBB, InsertBefore, DL, Write2Desc)
.addReg(BaseReg, BaseRegFlags, BaseSubReg) // addr
.add(*Data0) // data0
.add(*Data1) // data1
@@ -1168,8 +1237,6 @@ SILoadStoreOptimizer::mergeWrite2Pair(CombineInfo &CI, CombineInfo &Paired,
.addImm(0) // gds
.cloneMergedMemRefs({&*CI.I, &*Paired.I});
- moveInstsAfter(Write2, InstsToMove);
-
CI.I->eraseFromParent();
Paired.I->eraseFromParent();
@@ -1179,7 +1246,7 @@ SILoadStoreOptimizer::mergeWrite2Pair(CombineInfo &CI, CombineInfo &Paired,
MachineBasicBlock::iterator
SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI, CombineInfo &Paired,
- const SmallVectorImpl<MachineInstr *> &InstsToMove) {
+ MachineBasicBlock::iterator InsertBefore) {
MachineBasicBlock *MBB = CI.I->getParent();
DebugLoc DL = CI.I->getDebugLoc();
const unsigned Opcode = getNewOpcode(CI, Paired);
@@ -1191,7 +1258,7 @@ SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI, CombineInfo &Paired,
unsigned DMaskIdx =
AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AMDGPU::OpName::dmask);
- auto MIB = BuildMI(*MBB, Paired.I, DL, TII->get(Opcode), DestReg);
+ auto MIB = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode), DestReg);
for (unsigned I = 1, E = (*CI.I).getNumOperands(); I != E; ++I) {
if (I == DMaskIdx)
MIB.addImm(MergedDMask);
@@ -1204,10 +1271,7 @@ SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI, CombineInfo &Paired,
// will return true if this is the case.
assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand());
- const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
- const MachineMemOperand *MMOb = *Paired.I->memoperands_begin();
-
- MachineInstr *New = MIB.addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
+ MachineInstr *New = MIB.addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
unsigned SubRegIdx0, SubRegIdx1;
std::tie(SubRegIdx0, SubRegIdx1) = getSubRegIdxs(CI, Paired);
@@ -1217,14 +1281,12 @@ SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI, CombineInfo &Paired,
const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata);
const auto *Dest1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::vdata);
- BuildMI(*MBB, Paired.I, DL, CopyDesc)
+ BuildMI(*MBB, InsertBefore, DL, CopyDesc)
.add(*Dest0) // Copy to same destination including flags and sub reg.
.addReg(DestReg, 0, SubRegIdx0);
- MachineInstr *Copy1 = BuildMI(*MBB, Paired.I, DL, CopyDesc)
- .add(*Dest1)
- .addReg(DestReg, RegState::Kill, SubRegIdx1);
-
- moveInstsAfter(Copy1, InstsToMove);
+ BuildMI(*MBB, InsertBefore, DL, CopyDesc)
+ .add(*Dest1)
+ .addReg(DestReg, RegState::Kill, SubRegIdx1);
CI.I->eraseFromParent();
Paired.I->eraseFromParent();
@@ -1233,7 +1295,7 @@ SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI, CombineInfo &Paired,
MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSBufferLoadImmPair(
CombineInfo &CI, CombineInfo &Paired,
- const SmallVectorImpl<MachineInstr *> &InstsToMove) {
+ MachineBasicBlock::iterator InsertBefore) {
MachineBasicBlock *MBB = CI.I->getParent();
DebugLoc DL = CI.I->getDebugLoc();
const unsigned Opcode = getNewOpcode(CI, Paired);
@@ -1248,15 +1310,12 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSBufferLoadImmPair(
// will return true if this is the case.
assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand());
- const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
- const MachineMemOperand *MMOb = *Paired.I->memoperands_begin();
-
MachineInstr *New =
- BuildMI(*MBB, Paired.I, DL, TII->get(Opcode), DestReg)
- .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::sbase))
- .addImm(MergedOffset) // offset
- .addImm(CI.CPol) // cpol
- .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
+ BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode), DestReg)
+ .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::sbase))
+ .addImm(MergedOffset) // offset
+ .addImm(CI.CPol) // cpol
+ .addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI, Paired);
const unsigned SubRegIdx0 = std::get<0>(SubRegIdx);
@@ -1267,14 +1326,12 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSBufferLoadImmPair(
const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::sdst);
const auto *Dest1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::sdst);
- BuildMI(*MBB, Paired.I, DL, CopyDesc)
+ BuildMI(*MBB, InsertBefore, DL, CopyDesc)
.add(*Dest0) // Copy to same destination including flags and sub reg.
.addReg(DestReg, 0, SubRegIdx0);
- MachineInstr *Copy1 = BuildMI(*MBB, Paired.I, DL, CopyDesc)
- .add(*Dest1)
- .addReg(DestReg, RegState::Kill, SubRegIdx1);
-
- moveInstsAfter(Copy1, InstsToMove);
+ BuildMI(*MBB, InsertBefore, DL, CopyDesc)
+ .add(*Dest1)
+ .addReg(DestReg, RegState::Kill, SubRegIdx1);
CI.I->eraseFromParent();
Paired.I->eraseFromParent();
@@ -1283,7 +1340,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSBufferLoadImmPair(
MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair(
CombineInfo &CI, CombineInfo &Paired,
- const SmallVectorImpl<MachineInstr *> &InstsToMove) {
+ MachineBasicBlock::iterator InsertBefore) {
MachineBasicBlock *MBB = CI.I->getParent();
DebugLoc DL = CI.I->getDebugLoc();
@@ -1295,7 +1352,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair(
Register DestReg = MRI->createVirtualRegister(SuperRC);
unsigned MergedOffset = std::min(CI.Offset, Paired.Offset);
- auto MIB = BuildMI(*MBB, Paired.I, DL, TII->get(Opcode), DestReg);
+ auto MIB = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode), DestReg);
AddressRegs Regs = getRegs(Opcode, *TII);
@@ -1307,9 +1364,6 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair(
// will return true if this is the case.
assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand());
- const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
- const MachineMemOperand *MMOb = *Paired.I->memoperands_begin();
-
MachineInstr *New =
MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
@@ -1317,7 +1371,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair(
.addImm(CI.CPol) // cpol
.addImm(0) // tfe
.addImm(0) // swz
- .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
+ .addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI, Paired);
const unsigned SubRegIdx0 = std::get<0>(SubRegIdx);
@@ -1328,14 +1382,12 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair(
const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata);
const auto *Dest1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::vdata);
- BuildMI(*MBB, Paired.I, DL, CopyDesc)
+ BuildMI(*MBB, InsertBefore, DL, CopyDesc)
.add(*Dest0) // Copy to same destination including flags and sub reg.
.addReg(DestReg, 0, SubRegIdx0);
- MachineInstr *Copy1 = BuildMI(*MBB, Paired.I, DL, CopyDesc)
- .add(*Dest1)
- .addReg(DestReg, RegState::Kill, SubRegIdx1);
-
- moveInstsAfter(Copy1, InstsToMove);
+ BuildMI(*MBB, InsertBefore, DL, CopyDesc)
+ .add(*Dest1)
+ .addReg(DestReg, RegState::Kill, SubRegIdx1);
CI.I->eraseFromParent();
Paired.I->eraseFromParent();
@@ -1344,7 +1396,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair(
MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferLoadPair(
CombineInfo &CI, CombineInfo &Paired,
- const SmallVectorImpl<MachineInstr *> &InstsToMove) {
+ MachineBasicBlock::iterator InsertBefore) {
MachineBasicBlock *MBB = CI.I->getParent();
DebugLoc DL = CI.I->getDebugLoc();
@@ -1356,7 +1408,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferLoadPair(
Register DestReg = MRI->createVirtualRegister(SuperRC);
unsigned MergedOffset = std::min(CI.Offset, Paired.Offset);
- auto MIB = BuildMI(*MBB, Paired.I, DL, TII->get(Opcode), DestReg);
+ auto MIB = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode), DestReg);
AddressRegs Regs = getRegs(Opcode, *TII);
@@ -1371,9 +1423,6 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferLoadPair(
// will return true if this is the case.
assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand());
- const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
- const MachineMemOperand *MMOb = *Paired.I->memoperands_begin();
-
MachineInstr *New =
MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
@@ -1382,8 +1431,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferLoadPair(
.addImm(CI.CPol) // cpol
.addImm(0) // tfe
.addImm(0) // swz
- .addMemOperand(
- combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
+ .addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI, Paired);
const unsigned SubRegIdx0 = std::get<0>(SubRegIdx);
@@ -1394,14 +1442,12 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferLoadPair(
const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata);
const auto *Dest1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::vdata);
- BuildMI(*MBB, Paired.I, DL, CopyDesc)
+ BuildMI(*MBB, InsertBefore, DL, CopyDesc)
.add(*Dest0) // Copy to same destination including flags and sub reg.
.addReg(DestReg, 0, SubRegIdx0);
- MachineInstr *Copy1 = BuildMI(*MBB, Paired.I, DL, CopyDesc)
- .add(*Dest1)
- .addReg(DestReg, RegState::Kill, SubRegIdx1);
-
- moveInstsAfter(Copy1, InstsToMove);
+ BuildMI(*MBB, InsertBefore, DL, CopyDesc)
+ .add(*Dest1)
+ .addReg(DestReg, RegState::Kill, SubRegIdx1);
CI.I->eraseFromParent();
Paired.I->eraseFromParent();
@@ -1410,7 +1456,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferLoadPair(
MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferStorePair(
CombineInfo &CI, CombineInfo &Paired,
- const SmallVectorImpl<MachineInstr *> &InstsToMove) {
+ MachineBasicBlock::iterator InsertBefore) {
MachineBasicBlock *MBB = CI.I->getParent();
DebugLoc DL = CI.I->getDebugLoc();
@@ -1427,13 +1473,13 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferStorePair(
const auto *Src0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata);
const auto *Src1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::vdata);
- BuildMI(*MBB, Paired.I, DL, TII->get(AMDGPU::REG_SEQUENCE), SrcReg)
+ BuildMI(*MBB, InsertBefore, DL, TII->get(AMDGPU::REG_SEQUENCE), SrcReg)
.add(*Src0)
.addImm(SubRegIdx0)
.add(*Src1)
.addImm(SubRegIdx1);
- auto MIB = BuildMI(*MBB, Paired.I, DL, TII->get(Opcode))
+ auto MIB = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode))
.addReg(SrcReg, RegState::Kill);
AddressRegs Regs = getRegs(Opcode, *TII);
@@ -1449,9 +1495,6 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferStorePair(
// will return true if this is the case.
assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand());
- const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
- const MachineMemOperand *MMOb = *Paired.I->memoperands_begin();
-
MachineInstr *New =
MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
@@ -1460,10 +1503,92 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferStorePair(
.addImm(CI.CPol) // cpol
.addImm(0) // tfe
.addImm(0) // swz
- .addMemOperand(
- combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
+ .addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
+
+ CI.I->eraseFromParent();
+ Paired.I->eraseFromParent();
+ return New;
+}
+
+MachineBasicBlock::iterator SILoadStoreOptimizer::mergeFlatLoadPair(
+ CombineInfo &CI, CombineInfo &Paired,
+ MachineBasicBlock::iterator InsertBefore) {
+ MachineBasicBlock *MBB = CI.I->getParent();
+ DebugLoc DL = CI.I->getDebugLoc();
+
+ const unsigned Opcode = getNewOpcode(CI, Paired);
+
+ const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
+ Register DestReg = MRI->createVirtualRegister(SuperRC);
+
+ auto MIB = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode), DestReg);
+
+ if (auto *SAddr = TII->getNamedOperand(*CI.I, AMDGPU::OpName::saddr))
+ MIB.add(*SAddr);
+
+ MachineInstr *New =
+ MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr))
+ .addImm(std::min(CI.Offset, Paired.Offset))
+ .addImm(CI.CPol)
+ .addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
+
+ std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI, Paired);
+ const unsigned SubRegIdx0 = std::get<0>(SubRegIdx);
+ const unsigned SubRegIdx1 = std::get<1>(SubRegIdx);
+
+ // Copy to the old destination registers.
+ const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY);
+ const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdst);
+ const auto *Dest1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::vdst);
- moveInstsAfter(MIB, InstsToMove);
+ BuildMI(*MBB, InsertBefore, DL, CopyDesc)
+ .add(*Dest0) // Copy to same destination including flags and sub reg.
+ .addReg(DestReg, 0, SubRegIdx0);
+ BuildMI(*MBB, InsertBefore, DL, CopyDesc)
+ .add(*Dest1)
+ .addReg(DestReg, RegState::Kill, SubRegIdx1);
+
+ CI.I->eraseFromParent();
+ Paired.I->eraseFromParent();
+ return New;
+}
+
+MachineBasicBlock::iterator SILoadStoreOptimizer::mergeFlatStorePair(
+ CombineInfo &CI, CombineInfo &Paired,
+ MachineBasicBlock::iterator InsertBefore) {
+ MachineBasicBlock *MBB = CI.I->getParent();
+ DebugLoc DL = CI.I->getDebugLoc();
+
+ const unsigned Opcode = getNewOpcode(CI, Paired);
+
+ std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI, Paired);
+ const unsigned SubRegIdx0 = std::get<0>(SubRegIdx);
+ const unsigned SubRegIdx1 = std::get<1>(SubRegIdx);
+
+ // Copy to the new source register.
+ const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI, Paired);
+ Register SrcReg = MRI->createVirtualRegister(SuperRC);
+
+ const auto *Src0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata);
+ const auto *Src1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::vdata);
+
+ BuildMI(*MBB, InsertBefore, DL, TII->get(AMDGPU::REG_SEQUENCE), SrcReg)
+ .add(*Src0)
+ .addImm(SubRegIdx0)
+ .add(*Src1)
+ .addImm(SubRegIdx1);
+
+ auto MIB = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode))
+ .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr))
+ .addReg(SrcReg, RegState::Kill);
+
+ if (auto *SAddr = TII->getNamedOperand(*CI.I, AMDGPU::OpName::saddr))
+ MIB.add(*SAddr);
+
+ MachineInstr *New =
+ MIB.addImm(std::min(CI.Offset, Paired.Offset))
+ .addImm(CI.CPol)
+ .addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
CI.I->eraseFromParent();
Paired.I->eraseFromParent();
@@ -1474,7 +1599,7 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI,
const CombineInfo &Paired) {
const unsigned Width = CI.Width + Paired.Width;
- switch (CI.InstClass) {
+ switch (getCommonInstClass(CI, Paired)) {
default:
assert(CI.InstClass == BUFFER_LOAD || CI.InstClass == BUFFER_STORE);
// FIXME: Handle d16 correctly
@@ -1498,6 +1623,72 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI,
case 8:
return AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM;
}
+ case GLOBAL_LOAD:
+ switch (Width) {
+ default:
+ return 0;
+ case 2:
+ return AMDGPU::GLOBAL_LOAD_DWORDX2;
+ case 3:
+ return AMDGPU::GLOBAL_LOAD_DWORDX3;
+ case 4:
+ return AMDGPU::GLOBAL_LOAD_DWORDX4;
+ }
+ case GLOBAL_LOAD_SADDR:
+ switch (Width) {
+ default:
+ return 0;
+ case 2:
+ return AMDGPU::GLOBAL_LOAD_DWORDX2_SADDR;
+ case 3:
+ return AMDGPU::GLOBAL_LOAD_DWORDX3_SADDR;
+ case 4:
+ return AMDGPU::GLOBAL_LOAD_DWORDX4_SADDR;
+ }
+ case GLOBAL_STORE:
+ switch (Width) {
+ default:
+ return 0;
+ case 2:
+ return AMDGPU::GLOBAL_STORE_DWORDX2;
+ case 3:
+ return AMDGPU::GLOBAL_STORE_DWORDX3;
+ case 4:
+ return AMDGPU::GLOBAL_STORE_DWORDX4;
+ }
+ case GLOBAL_STORE_SADDR:
+ switch (Width) {
+ default:
+ return 0;
+ case 2:
+ return AMDGPU::GLOBAL_STORE_DWORDX2_SADDR;
+ case 3:
+ return AMDGPU::GLOBAL_STORE_DWORDX3_SADDR;
+ case 4:
+ return AMDGPU::GLOBAL_STORE_DWORDX4_SADDR;
+ }
+ case FLAT_LOAD:
+ switch (Width) {
+ default:
+ return 0;
+ case 2:
+ return AMDGPU::FLAT_LOAD_DWORDX2;
+ case 3:
+ return AMDGPU::FLAT_LOAD_DWORDX3;
+ case 4:
+ return AMDGPU::FLAT_LOAD_DWORDX4;
+ }
+ case FLAT_STORE:
+ switch (Width) {
+ default:
+ return 0;
+ case 2:
+ return AMDGPU::FLAT_STORE_DWORDX2;
+ case 3:
+ return AMDGPU::FLAT_STORE_DWORDX3;
+ case 4:
+ return AMDGPU::FLAT_STORE_DWORDX4;
+ }
case MIMG:
assert((countPopulation(CI.DMask | Paired.DMask) == Width) &&
"No overlaps");
@@ -1508,15 +1699,9 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI,
std::pair<unsigned, unsigned>
SILoadStoreOptimizer::getSubRegIdxs(const CombineInfo &CI,
const CombineInfo &Paired) {
- bool ReverseOrder;
- if (CI.InstClass == MIMG) {
- assert(
- (countPopulation(CI.DMask | Paired.DMask) == CI.Width + Paired.Width) &&
- "No overlaps");
- ReverseOrder = CI.DMask > Paired.DMask;
- } else {
- ReverseOrder = CI.Offset > Paired.Offset;
- }
+ assert((CI.InstClass != MIMG || (countPopulation(CI.DMask | Paired.DMask) ==
+ CI.Width + Paired.Width)) &&
+ "No overlaps");
unsigned Idx0;
unsigned Idx1;
@@ -1532,7 +1717,7 @@ SILoadStoreOptimizer::getSubRegIdxs(const CombineInfo &CI,
assert(CI.Width >= 1 && CI.Width <= 4);
assert(Paired.Width >= 1 && Paired.Width <= 4);
- if (ReverseOrder) {
+ if (Paired < CI) {
Idx1 = Idxs[0][Paired.Width - 1];
Idx0 = Idxs[Paired.Width][CI.Width - 1];
} else {
@@ -1569,7 +1754,7 @@ SILoadStoreOptimizer::getTargetRegisterClass(const CombineInfo &CI,
MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferStorePair(
CombineInfo &CI, CombineInfo &Paired,
- const SmallVectorImpl<MachineInstr *> &InstsToMove) {
+ MachineBasicBlock::iterator InsertBefore) {
MachineBasicBlock *MBB = CI.I->getParent();
DebugLoc DL = CI.I->getDebugLoc();
@@ -1586,13 +1771,13 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferStorePair(
const auto *Src0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata);
const auto *Src1 = TII->getNamedOperand(*Paired.I, AMDGPU::OpName::vdata);
- BuildMI(*MBB, Paired.I, DL, TII->get(AMDGPU::REG_SEQUENCE), SrcReg)
+ BuildMI(*MBB, InsertBefore, DL, TII->get(AMDGPU::REG_SEQUENCE), SrcReg)
.add(*Src0)
.addImm(SubRegIdx0)
.add(*Src1)
.addImm(SubRegIdx1);
- auto MIB = BuildMI(*MBB, Paired.I, DL, TII->get(Opcode))
+ auto MIB = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode))
.addReg(SrcReg, RegState::Kill);
AddressRegs Regs = getRegs(Opcode, *TII);
@@ -1606,9 +1791,6 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferStorePair(
// will return true if this is the case.
assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand());
- const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
- const MachineMemOperand *MMOb = *Paired.I->memoperands_begin();
-
MachineInstr *New =
MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
@@ -1616,9 +1798,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferStorePair(
.addImm(CI.CPol) // cpol
.addImm(0) // tfe
.addImm(0) // swz
- .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
-
- moveInstsAfter(MIB, InstsToMove);
+ .addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
CI.I->eraseFromParent();
Paired.I->eraseFromParent();
@@ -1846,7 +2026,7 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm(
// from which (&a + 4096) has 13 bit distance. Both &a + 6144 and &a + 8192
// has 13bit distance from &a + 4096. The heuristic considers &a + 8192
// as the new-base(anchor) because of the maximum distance which can
- // accomodate more intermediate bases presumeably.
+ // accommodate more intermediate bases presumably.
//
// Step3: move (&a + 8192) above load1. Compute and promote offsets from
// (&a + 8192) for load1, load2, load4.
@@ -2098,8 +2278,8 @@ SILoadStoreOptimizer::optimizeInstsWithSameBaseAddr(
CombineInfo &CI = *First;
CombineInfo &Paired = *Second;
- SmallVector<MachineInstr *, 8> InstsToMove;
- if (!checkAndPrepareMerge(CI, Paired, InstsToMove)) {
+ CombineInfo *Where = checkAndPrepareMerge(CI, Paired);
+ if (!Where) {
++I;
continue;
}
@@ -2108,66 +2288,56 @@ SILoadStoreOptimizer::optimizeInstsWithSameBaseAddr(
LLVM_DEBUG(dbgs() << "Merging: " << *CI.I << " with: " << *Paired.I);
+ MachineBasicBlock::iterator NewMI;
switch (CI.InstClass) {
default:
llvm_unreachable("unknown InstClass");
break;
- case DS_READ: {
- MachineBasicBlock::iterator NewMI =
- mergeRead2Pair(CI, Paired, InstsToMove);
- CI.setMI(NewMI, *this);
+ case DS_READ:
+ NewMI = mergeRead2Pair(CI, Paired, Where->I);
break;
- }
- case DS_WRITE: {
- MachineBasicBlock::iterator NewMI =
- mergeWrite2Pair(CI, Paired, InstsToMove);
- CI.setMI(NewMI, *this);
+ case DS_WRITE:
+ NewMI = mergeWrite2Pair(CI, Paired, Where->I);
break;
- }
- case S_BUFFER_LOAD_IMM: {
- MachineBasicBlock::iterator NewMI =
- mergeSBufferLoadImmPair(CI, Paired, InstsToMove);
- CI.setMI(NewMI, *this);
- OptimizeListAgain |= (CI.Width + Paired.Width) < 8;
+ case S_BUFFER_LOAD_IMM:
+ NewMI = mergeSBufferLoadImmPair(CI, Paired, Where->I);
+ OptimizeListAgain |= CI.Width + Paired.Width < 8;
break;
- }
- case BUFFER_LOAD: {
- MachineBasicBlock::iterator NewMI =
- mergeBufferLoadPair(CI, Paired, InstsToMove);
- CI.setMI(NewMI, *this);
- OptimizeListAgain |= (CI.Width + Paired.Width) < 4;
+ case BUFFER_LOAD:
+ NewMI = mergeBufferLoadPair(CI, Paired, Where->I);
+ OptimizeListAgain |= CI.Width + Paired.Width < 4;
break;
- }
- case BUFFER_STORE: {
- MachineBasicBlock::iterator NewMI =
- mergeBufferStorePair(CI, Paired, InstsToMove);
- CI.setMI(NewMI, *this);
- OptimizeListAgain |= (CI.Width + Paired.Width) < 4;
+ case BUFFER_STORE:
+ NewMI = mergeBufferStorePair(CI, Paired, Where->I);
+ OptimizeListAgain |= CI.Width + Paired.Width < 4;
break;
- }
- case MIMG: {
- MachineBasicBlock::iterator NewMI =
- mergeImagePair(CI, Paired, InstsToMove);
- CI.setMI(NewMI, *this);
- OptimizeListAgain |= (CI.Width + Paired.Width) < 4;
+ case MIMG:
+ NewMI = mergeImagePair(CI, Paired, Where->I);
+ OptimizeListAgain |= CI.Width + Paired.Width < 4;
break;
- }
- case TBUFFER_LOAD: {
- MachineBasicBlock::iterator NewMI =
- mergeTBufferLoadPair(CI, Paired, InstsToMove);
- CI.setMI(NewMI, *this);
- OptimizeListAgain |= (CI.Width + Paired.Width) < 4;
+ case TBUFFER_LOAD:
+ NewMI = mergeTBufferLoadPair(CI, Paired, Where->I);
+ OptimizeListAgain |= CI.Width + Paired.Width < 4;
break;
- }
- case TBUFFER_STORE: {
- MachineBasicBlock::iterator NewMI =
- mergeTBufferStorePair(CI, Paired, InstsToMove);
- CI.setMI(NewMI, *this);
- OptimizeListAgain |= (CI.Width + Paired.Width) < 4;
+ case TBUFFER_STORE:
+ NewMI = mergeTBufferStorePair(CI, Paired, Where->I);
+ OptimizeListAgain |= CI.Width + Paired.Width < 4;
+ break;
+ case FLAT_LOAD:
+ case GLOBAL_LOAD:
+ case GLOBAL_LOAD_SADDR:
+ NewMI = mergeFlatLoadPair(CI, Paired, Where->I);
+ OptimizeListAgain |= CI.Width + Paired.Width < 4;
+ break;
+ case FLAT_STORE:
+ case GLOBAL_STORE:
+ case GLOBAL_STORE_SADDR:
+ NewMI = mergeFlatStorePair(CI, Paired, Where->I);
+ OptimizeListAgain |= CI.Width + Paired.Width < 4;
break;
}
- }
- CI.Order = Paired.Order;
+ CI.setMI(NewMI, *this);
+ CI.Order = Where->Order;
if (I == Second)
I = Next;
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index e1018bdfde46..607383ab8cde 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -509,8 +509,35 @@ MachineBasicBlock *SILowerControlFlow::emitEndCf(MachineInstr &MI) {
BuildMI(MBB, InsPt, DL, TII->get(Opcode), Exec)
.addReg(Exec)
.add(MI.getOperand(0));
- if (LV)
- LV->replaceKillInstruction(MI.getOperand(0).getReg(), MI, *NewMI);
+ if (LV) {
+ LV->replaceKillInstruction(DataReg, MI, *NewMI);
+
+ if (SplitBB != &MBB) {
+ // Track the set of registers defined in the split block so we don't
+ // accidentally add the original block to AliveBlocks.
+ DenseSet<Register> SplitDefs;
+ for (MachineInstr &X : *SplitBB) {
+ for (MachineOperand &Op : X.operands()) {
+ if (Op.isReg() && Op.isDef() && Op.getReg().isVirtual())
+ SplitDefs.insert(Op.getReg());
+ }
+ }
+
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ Register Reg = Register::index2VirtReg(i);
+ LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
+
+ if (VI.AliveBlocks.test(MBB.getNumber()))
+ VI.AliveBlocks.set(SplitBB->getNumber());
+ else {
+ for (MachineInstr *Kill : VI.Kills) {
+ if (Kill->getParent() == SplitBB && !SplitDefs.contains(Reg))
+ VI.AliveBlocks.set(MBB.getNumber());
+ }
+ }
+ }
+ }
+ }
LoweredEndCf.insert(NewMI);
@@ -540,7 +567,7 @@ void SILowerControlFlow::findMaskOperands(MachineInstr &MI, unsigned OpNo,
return;
// Make sure we do not modify exec between def and use.
- // A copy with implcitly defined exec inserted earlier is an exclusion, it
+ // A copy with implicitly defined exec inserted earlier is an exclusion, it
// does not really modify exec.
for (auto I = Def->getIterator(); I != MI.getIterator(); ++I)
if (I->modifiesRegister(AMDGPU::EXEC, TRI) &&
@@ -573,14 +600,14 @@ void SILowerControlFlow::combineMasks(MachineInstr &MI) {
else return;
Register Reg = MI.getOperand(OpToReplace).getReg();
- MI.RemoveOperand(OpToReplace);
+ MI.removeOperand(OpToReplace);
MI.addOperand(Ops[UniqueOpndIdx]);
if (MRI->use_empty(Reg))
MRI->getUniqueVRegDef(Reg)->eraseFromParent();
}
void SILowerControlFlow::optimizeEndCf() {
- // If the only instruction immediately following this END_CF is an another
+ // If the only instruction immediately following this END_CF is another
// END_CF in the only successor we can avoid emitting exec mask restore here.
if (!EnableOptimizeEndCf)
return;
@@ -865,6 +892,7 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
}
}
+ bool Changed = false;
MachineFunction::iterator NextBB;
for (MachineFunction::iterator BI = MF.begin();
BI != MF.end(); BI = NextBB) {
@@ -886,6 +914,7 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
case AMDGPU::SI_LOOP:
case AMDGPU::SI_END_CF:
SplitMBB = process(MI);
+ Changed = true;
break;
// FIXME: find a better place for this
@@ -894,6 +923,7 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
lowerInitExec(MBB, MI);
if (LIS)
LIS->removeAllRegUnitsForPhysReg(AMDGPU::EXEC);
+ Changed = true;
break;
default:
@@ -913,5 +943,5 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
LoweredIf.clear();
KillBlocks.clear();
- return true;
+ return Changed;
}
diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
index 672266f0c11e..5fb545b50228 100644
--- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -79,9 +79,9 @@ public:
}
private:
- void lowerCopiesFromI1();
- void lowerPhis();
- void lowerCopiesToI1();
+ bool lowerCopiesFromI1();
+ bool lowerPhis();
+ bool lowerCopiesToI1();
bool isConstantLaneMask(Register Reg, bool &Val) const;
void buildMergeLaneMasks(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, const DebugLoc &DL,
@@ -473,15 +473,17 @@ bool SILowerI1Copies::runOnMachineFunction(MachineFunction &TheMF) {
OrN2Op = AMDGPU::S_ORN2_B64;
}
- lowerCopiesFromI1();
- lowerPhis();
- lowerCopiesToI1();
+ bool Changed = false;
+ Changed |= lowerCopiesFromI1();
+ Changed |= lowerPhis();
+ Changed |= lowerCopiesToI1();
+ assert(Changed || ConstrainRegs.empty());
for (unsigned Reg : ConstrainRegs)
MRI->constrainRegClass(Reg, &AMDGPU::SReg_1_XEXECRegClass);
ConstrainRegs.clear();
- return true;
+ return Changed;
}
#ifndef NDEBUG
@@ -493,7 +495,8 @@ static bool isVRegCompatibleReg(const SIRegisterInfo &TRI,
}
#endif
-void SILowerI1Copies::lowerCopiesFromI1() {
+bool SILowerI1Copies::lowerCopiesFromI1() {
+ bool Changed = false;
SmallVector<MachineInstr *, 4> DeadCopies;
for (MachineBasicBlock &MBB : *MF) {
@@ -509,6 +512,8 @@ void SILowerI1Copies::lowerCopiesFromI1() {
if (isLaneMaskReg(DstReg) || isVreg1(DstReg))
continue;
+ Changed = true;
+
// Copy into a 32-bit vector register.
LLVM_DEBUG(dbgs() << "Lower copy from i1: " << MI);
DebugLoc DL = MI.getDebugLoc();
@@ -530,9 +535,10 @@ void SILowerI1Copies::lowerCopiesFromI1() {
MI->eraseFromParent();
DeadCopies.clear();
}
+ return Changed;
}
-void SILowerI1Copies::lowerPhis() {
+bool SILowerI1Copies::lowerPhis() {
MachineSSAUpdater SSAUpdater(*MF);
LoopFinder LF(*DT, *PDT);
PhiIncomingAnalysis PIA(*PDT);
@@ -550,6 +556,8 @@ void SILowerI1Copies::lowerPhis() {
Vreg1Phis.push_back(&MI);
}
}
+ if (Vreg1Phis.empty())
+ return false;
MachineBasicBlock *PrevMBB = nullptr;
for (MachineInstr *MI : Vreg1Phis) {
@@ -662,9 +670,11 @@ void SILowerI1Copies::lowerPhis() {
IncomingRegs.clear();
IncomingUpdated.clear();
}
+ return true;
}
-void SILowerI1Copies::lowerCopiesToI1() {
+bool SILowerI1Copies::lowerCopiesToI1() {
+ bool Changed = false;
MachineSSAUpdater SSAUpdater(*MF);
LoopFinder LF(*DT, *PDT);
SmallVector<MachineInstr *, 4> DeadCopies;
@@ -681,6 +691,8 @@ void SILowerI1Copies::lowerCopiesToI1() {
if (!isVreg1(DstReg))
continue;
+ Changed = true;
+
if (MRI->use_empty(DstReg)) {
DeadCopies.push_back(&MI);
continue;
@@ -731,6 +743,7 @@ void SILowerI1Copies::lowerCopiesToI1() {
MI->eraseFromParent();
DeadCopies.clear();
}
+ return Changed;
}
bool SILowerI1Copies::isConstantLaneMask(Register Reg, bool &Val) const {
diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
index 0fbdbef6fcce..dd881ec42d53 100644
--- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
@@ -20,6 +20,7 @@
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/InitializePasses.h"
@@ -79,6 +80,8 @@ static void insertCSRSaves(MachineBasicBlock &SaveBlock,
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const SIRegisterInfo *RI = ST.getRegisterInfo();
MachineBasicBlock::iterator I = SaveBlock.begin();
if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) {
@@ -89,8 +92,8 @@ static void insertCSRSaves(MachineBasicBlock &SaveBlock,
MCRegister Reg = CS.getReg();
MachineInstrSpan MIS(I, &SaveBlock);
- const TargetRegisterClass *RC =
- TRI->getMinimalPhysRegClass(Reg, MVT::i32);
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(
+ Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32);
// If this value was already livein, we probably have a direct use of the
// incoming register value, so don't kill at the spill point. This happens
@@ -119,7 +122,8 @@ static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
-
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const SIRegisterInfo *RI = ST.getRegisterInfo();
// Restore all registers immediately before the return and any
// terminators that precede it.
MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator();
@@ -128,8 +132,8 @@ static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) {
for (const CalleeSavedInfo &CI : reverse(CSI)) {
Register Reg = CI.getReg();
- const TargetRegisterClass *RC =
- TRI->getMinimalPhysRegClass(Reg, MVT::i32);
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(
+ Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32);
TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI);
assert(I != RestoreBlock.begin() &&
@@ -321,7 +325,7 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
// free frame index ids by the later pass(es) like "stack slot coloring"
// which in turn could mess-up with the book keeping of "frame index to VGPR
// lane".
- FuncInfo->removeDeadFrameIndices(MFI);
+ FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ false);
MadeChange = true;
}
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index cca8565c9ff9..0504c59ebd9e 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -31,6 +31,9 @@ using namespace llvm;
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
: AMDGPUMachineFunction(MF),
+ BufferPSV(static_cast<const AMDGPUTargetMachine &>(MF.getTarget())),
+ ImagePSV(static_cast<const AMDGPUTargetMachine &>(MF.getTarget())),
+ GWSResourcePSV(static_cast<const AMDGPUTargetMachine &>(MF.getTarget())),
PrivateSegmentBuffer(false),
DispatchPtr(false),
QueuePtr(false),
@@ -48,8 +51,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
ImplicitBufferPtr(false),
ImplicitArgPtr(false),
GITPtrHigh(0xffffffff),
- HighBitsOf32BitAddress(0),
- GDSSize(0) {
+ HighBitsOf32BitAddress(0) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const Function &F = MF.getFunction();
FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
@@ -74,6 +76,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
}
+ MayNeedAGPRs = ST.hasMAIInsts();
+
if (!isEntryFunction()) {
if (CC != CallingConv::AMDGPU_Gfx)
ArgInfo = AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
@@ -97,6 +101,11 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
ImplicitArgPtr = false;
MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
MaxKernArgAlign);
+
+ if (ST.hasGFX90AInsts() &&
+ ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
+ !mayUseAGPRs(MF))
+ MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
}
bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
@@ -177,9 +186,20 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
if (!S.empty())
S.consumeInteger(0, HighBitsOf32BitAddress);
- S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
- if (!S.empty())
- S.consumeInteger(0, GDSSize);
+ // On GFX908, in order to guarantee copying between AGPRs, we need a scratch
+ // VGPR available at all times. For now, reserve highest available VGPR. After
+ // RA, shift it to the lowest available unused VGPR if the one exist.
+ if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
+ VGPRForAGPRCopy =
+ AMDGPU::VGPR_32RegClass.getRegister(ST.getMaxNumVGPRs(F) - 1);
+ }
+}
+
+MachineFunctionInfo *SIMachineFunctionInfo::clone(
+ BumpPtrAllocator &Allocator, MachineFunction &DestMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
+ const {
+ return DestMF.cloneInfo<SIMachineFunctionInfo>(*this);
}
void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
@@ -265,7 +285,7 @@ bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction &MF,
/// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
int FI) {
- std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
+ std::vector<SIRegisterInfo::SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
// This has already been allocated.
if (!SpillLanes.empty())
@@ -320,7 +340,7 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
SpillVGPRs.push_back(SGPRSpillVGPR(LaneVGPR, SpillFI));
- // Add this register as live-in to all blocks to avoid machine verifer
+ // Add this register as live-in to all blocks to avoid machine verifier
// complaining about use of an undefined physical register.
for (MachineBasicBlock &BB : MF)
BB.addLiveIn(LaneVGPR);
@@ -328,7 +348,7 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
LaneVGPR = SpillVGPRs.back().VGPR;
}
- SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
+ SpillLanes.push_back(SIRegisterInfo::SpilledReg(LaneVGPR, VGPRIndex));
}
return true;
@@ -402,7 +422,8 @@ bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
return Spill.FullyAllocated;
}
-void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) {
+bool SIMachineFunctionInfo::removeDeadFrameIndices(
+ MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs) {
// Remove dead frame indices from function frame, however keep FP & BP since
// spills for them haven't been inserted yet. And also make sure to remove the
// frame indices from `SGPRToVGPRSpills` data structure, otherwise, it could
@@ -415,17 +436,42 @@ void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) {
}
}
- // All other SPGRs must be allocated on the default stack, so reset the stack
- // ID.
- for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
- ++i)
- if (i != FramePointerSaveIndex && i != BasePointerSaveIndex)
- MFI.setStackID(i, TargetStackID::Default);
+ bool HaveSGPRToMemory = false;
+
+ if (ResetSGPRSpillStackIDs) {
+ // All other SPGRs must be allocated on the default stack, so reset the
+ // stack ID.
+ for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
+ ++i) {
+ if (i != FramePointerSaveIndex && i != BasePointerSaveIndex) {
+ if (MFI.getStackID(i) == TargetStackID::SGPRSpill) {
+ MFI.setStackID(i, TargetStackID::Default);
+ HaveSGPRToMemory = true;
+ }
+ }
+ }
+ }
for (auto &R : VGPRToAGPRSpills) {
if (R.second.IsDead)
MFI.RemoveStackObject(R.first);
}
+
+ return HaveSGPRToMemory;
+}
+
+void SIMachineFunctionInfo::allocateWWMReservedSpillSlots(
+ MachineFrameInfo &MFI, const SIRegisterInfo &TRI) {
+ assert(WWMReservedFrameIndexes.empty());
+
+ WWMReservedFrameIndexes.resize(WWMReservedRegs.size());
+
+ int I = 0;
+ for (Register VGPR : WWMReservedRegs) {
+ const TargetRegisterClass *RC = TRI.getPhysRegClass(VGPR);
+ WWMReservedFrameIndexes[I++] = MFI.CreateSpillStackObject(
+ TRI.getSpillSize(*RC), TRI.getSpillAlign(*RC));
+ }
}
int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI,
@@ -539,6 +585,7 @@ yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
const llvm::MachineFunction &MF)
: ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()),
+ GDSSize(MFI.getGDSSize()),
DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()),
NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()),
@@ -549,7 +596,14 @@ yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
+ BytesInStackArgArea(MFI.getBytesInStackArgArea()),
+ ReturnsVoid(MFI.returnsVoid()),
ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), Mode(MFI.getMode()) {
+ for (Register Reg : MFI.WWMReservedRegs)
+ WWMReservedRegs.push_back(regToString(Reg, TRI));
+
+ if (MFI.getVGPRForAGPRCopy())
+ VGPRForAGPRCopy = regToString(MFI.getVGPRForAGPRCopy(), TRI);
auto SFI = MFI.getOptionalScavengeFI();
if (SFI)
ScavengeFI = yaml::FrameIndex(*SFI, MF.getFrameInfo());
@@ -563,8 +617,9 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields(
const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF,
PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) {
ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;
- MaxKernArgAlign = assumeAligned(YamlMFI.MaxKernArgAlign);
+ MaxKernArgAlign = YamlMFI.MaxKernArgAlign;
LDSSize = YamlMFI.LDSSize;
+ GDSSize = YamlMFI.GDSSize;
DynLDSAlign = YamlMFI.DynLDSAlign;
HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
Occupancy = YamlMFI.Occupancy;
@@ -574,6 +629,8 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields(
WaveLimiter = YamlMFI.WaveLimiter;
HasSpilledSGPRs = YamlMFI.HasSpilledSGPRs;
HasSpilledVGPRs = YamlMFI.HasSpilledVGPRs;
+ BytesInStackArgArea = YamlMFI.BytesInStackArgArea;
+ ReturnsVoid = YamlMFI.ReturnsVoid;
if (YamlMFI.ScavengeFI) {
auto FIOrErr = YamlMFI.ScavengeFI->getFI(MF.getFrameInfo());
@@ -595,10 +652,47 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields(
return false;
}
+bool SIMachineFunctionInfo::mayUseAGPRs(const MachineFunction &MF) const {
+ for (const BasicBlock &BB : MF.getFunction()) {
+ for (const Instruction &I : BB) {
+ const auto *CB = dyn_cast<CallBase>(&I);
+ if (!CB)
+ continue;
+
+ if (CB->isInlineAsm()) {
+ const InlineAsm *IA = dyn_cast<InlineAsm>(CB->getCalledOperand());
+ for (const auto &CI : IA->ParseConstraints()) {
+ for (StringRef Code : CI.Codes) {
+ Code.consume_front("{");
+ if (Code.startswith("a"))
+ return true;
+ }
+ }
+ continue;
+ }
+
+ const Function *Callee =
+ dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
+ if (!Callee)
+ return true;
+
+ if (Callee->getIntrinsicID() == Intrinsic::not_intrinsic)
+ return true;
+ }
+ }
+
+ return false;
+}
+
bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction &MF) const {
if (UsesAGPRs)
return *UsesAGPRs;
+ if (!mayNeedAGPRs()) {
+ UsesAGPRs = false;
+ return false;
+ }
+
if (!AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv()) ||
MF.getFrameInfo().hasCalls()) {
UsesAGPRs = true;
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 8e821274bb77..bebb13cbf09f 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -15,9 +15,10 @@
#include "AMDGPUArgumentUsageInfo.h"
#include "AMDGPUMachineFunction.h"
+#include "AMDGPUTargetMachine.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIInstrInfo.h"
-#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/CodeGen/MIRYamlMapping.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Support/raw_ostream.h"
@@ -39,8 +40,8 @@ public:
};
protected:
- AMDGPUPseudoSourceValue(unsigned Kind, const TargetInstrInfo &TII)
- : PseudoSourceValue(Kind, TII) {}
+ AMDGPUPseudoSourceValue(unsigned Kind, const AMDGPUTargetMachine &TM)
+ : PseudoSourceValue(Kind, TM) {}
public:
bool isConstant(const MachineFrameInfo *) const override {
@@ -60,8 +61,8 @@ public:
class AMDGPUBufferPseudoSourceValue final : public AMDGPUPseudoSourceValue {
public:
- explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII)
- : AMDGPUPseudoSourceValue(PSVBuffer, TII) {}
+ explicit AMDGPUBufferPseudoSourceValue(const AMDGPUTargetMachine &TM)
+ : AMDGPUPseudoSourceValue(PSVBuffer, TM) {}
static bool classof(const PseudoSourceValue *V) {
return V->kind() == PSVBuffer;
@@ -73,8 +74,8 @@ public:
class AMDGPUImagePseudoSourceValue final : public AMDGPUPseudoSourceValue {
public:
// TODO: Is the img rsrc useful?
- explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII)
- : AMDGPUPseudoSourceValue(PSVImage, TII) {}
+ explicit AMDGPUImagePseudoSourceValue(const AMDGPUTargetMachine &TM)
+ : AMDGPUPseudoSourceValue(PSVImage, TM) {}
static bool classof(const PseudoSourceValue *V) {
return V->kind() == PSVImage;
@@ -85,8 +86,8 @@ public:
class AMDGPUGWSResourcePseudoSourceValue final : public AMDGPUPseudoSourceValue {
public:
- explicit AMDGPUGWSResourcePseudoSourceValue(const TargetInstrInfo &TII)
- : AMDGPUPseudoSourceValue(GWSResource, TII) {}
+ explicit AMDGPUGWSResourcePseudoSourceValue(const AMDGPUTargetMachine &TM)
+ : AMDGPUPseudoSourceValue(GWSResource, TM) {}
static bool classof(const PseudoSourceValue *V) {
return V->kind() == GWSResource;
@@ -269,8 +270,9 @@ template <> struct MappingTraits<SIMode> {
struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
uint64_t ExplicitKernArgSize = 0;
- unsigned MaxKernArgAlign = 0;
- unsigned LDSSize = 0;
+ Align MaxKernArgAlign;
+ uint32_t LDSSize = 0;
+ uint32_t GDSSize = 0;
Align DynLDSAlign;
bool IsEntryFunction = false;
bool NoSignedZerosFPMath = false;
@@ -283,13 +285,19 @@ struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
// TODO: 10 may be a better default since it's the maximum.
unsigned Occupancy = 0;
+ SmallVector<StringValue> WWMReservedRegs;
+
StringValue ScratchRSrcReg = "$private_rsrc_reg";
StringValue FrameOffsetReg = "$fp_reg";
StringValue StackPtrOffsetReg = "$sp_reg";
+ unsigned BytesInStackArgArea = 0;
+ bool ReturnsVoid = true;
+
Optional<SIArgumentInfo> ArgInfo;
SIMode Mode;
Optional<FrameIndex> ScavengeFI;
+ StringValue VGPRForAGPRCopy;
SIMachineFunctionInfo() = default;
SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &,
@@ -304,8 +312,9 @@ template <> struct MappingTraits<SIMachineFunctionInfo> {
static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI) {
YamlIO.mapOptional("explicitKernArgSize", MFI.ExplicitKernArgSize,
UINT64_C(0));
- YamlIO.mapOptional("maxKernArgAlign", MFI.MaxKernArgAlign, 0u);
+ YamlIO.mapOptional("maxKernArgAlign", MFI.MaxKernArgAlign);
YamlIO.mapOptional("ldsSize", MFI.LDSSize, 0u);
+ YamlIO.mapOptional("gdsSize", MFI.GDSSize, 0u);
YamlIO.mapOptional("dynLDSAlign", MFI.DynLDSAlign, Align());
YamlIO.mapOptional("isEntryFunction", MFI.IsEntryFunction, false);
YamlIO.mapOptional("noSignedZerosFPMath", MFI.NoSignedZerosFPMath, false);
@@ -319,12 +328,17 @@ template <> struct MappingTraits<SIMachineFunctionInfo> {
StringValue("$fp_reg"));
YamlIO.mapOptional("stackPtrOffsetReg", MFI.StackPtrOffsetReg,
StringValue("$sp_reg"));
+ YamlIO.mapOptional("bytesInStackArgArea", MFI.BytesInStackArgArea, 0u);
+ YamlIO.mapOptional("returnsVoid", MFI.ReturnsVoid, true);
YamlIO.mapOptional("argumentInfo", MFI.ArgInfo);
YamlIO.mapOptional("mode", MFI.Mode, SIMode());
YamlIO.mapOptional("highBitsOf32BitAddress",
MFI.HighBitsOf32BitAddress, 0u);
YamlIO.mapOptional("occupancy", MFI.Occupancy, 0);
+ YamlIO.mapOptional("wwmReservedRegs", MFI.WWMReservedRegs);
YamlIO.mapOptional("scavengeFI", MFI.ScavengeFI);
+ YamlIO.mapOptional("vgprForAGPRCopy", MFI.VGPRForAGPRCopy,
+ StringValue()); // Don't print out when it's empty.
}
};
@@ -335,8 +349,6 @@ template <> struct MappingTraits<SIMachineFunctionInfo> {
class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
friend class GCNTargetMachine;
- Register TIDReg = AMDGPU::NoRegister;
-
// Registers that may be reserved for spilling purposes. These may be the same
// as the input registers.
Register ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;
@@ -377,12 +389,11 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
// unit. Minimum - first, maximum - second.
std::pair<unsigned, unsigned> WavesPerEU = {0, 0};
- std::unique_ptr<const AMDGPUBufferPseudoSourceValue> BufferPSV;
- std::unique_ptr<const AMDGPUImagePseudoSourceValue> ImagePSV;
- std::unique_ptr<const AMDGPUGWSResourcePseudoSourceValue> GWSResourcePSV;
+ const AMDGPUBufferPseudoSourceValue BufferPSV;
+ const AMDGPUImagePseudoSourceValue ImagePSV;
+ const AMDGPUGWSResourcePseudoSourceValue GWSResourcePSV;
private:
- unsigned LDSWaveSpillSize = 0;
unsigned NumUserSGPRs = 0;
unsigned NumSystemSGPRs = 0;
@@ -422,13 +433,14 @@ private:
// user arguments. This is an offset from the KernargSegmentPtr.
bool ImplicitArgPtr : 1;
+ bool MayNeedAGPRs : 1;
+
// The hard-wired high half of the address of the global information table
// for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since
// current hardware only allows a 16 bit value.
unsigned GITPtrHigh;
unsigned HighBitsOf32BitAddress;
- unsigned GDSSize;
// Current recorded maximum possible occupancy.
unsigned Occupancy;
@@ -440,17 +452,6 @@ private:
MCPhysReg getNextSystemSGPR() const;
public:
- struct SpilledReg {
- Register VGPR;
- int Lane = -1;
-
- SpilledReg() = default;
- SpilledReg(Register R, int L) : VGPR (R), Lane (L) {}
-
- bool hasLane() { return Lane != -1;}
- bool hasReg() { return VGPR != 0;}
- };
-
struct SGPRSpillVGPR {
// VGPR used for SGPR spills
Register VGPR;
@@ -468,14 +469,28 @@ public:
bool IsDead = false;
};
- // Map WWM VGPR to a stack slot that is used to save/restore it in the
- // prolog/epilog.
- MapVector<Register, Optional<int>> WWMReservedRegs;
+ // Track VGPRs reserved for WWM.
+ SmallSetVector<Register, 8> WWMReservedRegs;
+
+ /// Track stack slots used for save/restore of reserved WWM VGPRs in the
+ /// prolog/epilog.
+
+ /// FIXME: This is temporary state only needed in PrologEpilogInserter, and
+ /// doesn't really belong here. It does not require serialization
+ SmallVector<int, 8> WWMReservedFrameIndexes;
+
+ void allocateWWMReservedSpillSlots(MachineFrameInfo &MFI,
+ const SIRegisterInfo &TRI);
+
+ auto wwmAllocation() const {
+ assert(WWMReservedRegs.size() == WWMReservedFrameIndexes.size());
+ return zip(WWMReservedRegs, WWMReservedFrameIndexes);
+ }
private:
// Track VGPR + wave index for each subregister of the SGPR spilled to
// frameindex key.
- DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
+ DenseMap<int, std::vector<SIRegisterInfo::SpilledReg>> SGPRToVGPRSpills;
unsigned NumVGPRSpillLanes = 0;
SmallVector<SGPRSpillVGPR, 2> SpillVGPRs;
@@ -491,6 +506,18 @@ private:
// frame, so save it here and add it to the RegScavenger later.
Optional<int> ScavengeFI;
+private:
+ Register VGPRForAGPRCopy;
+
+public:
+ Register getVGPRForAGPRCopy() const {
+ return VGPRForAGPRCopy;
+ }
+
+ void setVGPRForAGPRCopy(Register NewVGPRForAGPRCopy) {
+ VGPRForAGPRCopy = NewVGPRForAGPRCopy;
+ }
+
public: // FIXME
/// If this is set, an SGPR used for save/restore of the register used for the
/// frame pointer.
@@ -506,31 +533,32 @@ public: // FIXME
public:
SIMachineFunctionInfo(const MachineFunction &MF);
+ SIMachineFunctionInfo(const SIMachineFunctionInfo &MFI) = default;
+
+ MachineFunctionInfo *
+ clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
+ const override;
bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI,
const MachineFunction &MF,
PerFunctionMIParsingState &PFS,
SMDiagnostic &Error, SMRange &SourceRange);
- void reserveWWMRegister(Register Reg, Optional<int> FI) {
- WWMReservedRegs.insert(std::make_pair(Reg, FI));
+ void reserveWWMRegister(Register Reg) {
+ WWMReservedRegs.insert(Reg);
}
- ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
+ ArrayRef<SIRegisterInfo::SpilledReg>
+ getSGPRToVGPRSpills(int FrameIndex) const {
auto I = SGPRToVGPRSpills.find(FrameIndex);
- return (I == SGPRToVGPRSpills.end()) ?
- ArrayRef<SpilledReg>() : makeArrayRef(I->second);
+ return (I == SGPRToVGPRSpills.end())
+ ? ArrayRef<SIRegisterInfo::SpilledReg>()
+ : makeArrayRef(I->second);
}
ArrayRef<SGPRSpillVGPR> getSGPRSpillVGPRs() const { return SpillVGPRs; }
- void setSGPRSpillVGPRs(Register NewVGPR, Optional<int> newFI, int Index) {
- SpillVGPRs[Index].VGPR = NewVGPR;
- SpillVGPRs[Index].FI = newFI;
- }
-
- bool removeVGPRForSGPRSpill(Register ReservedVGPR, MachineFunction &MF);
-
ArrayRef<MCPhysReg> getAGPRSpillVGPRs() const {
return SpillAGPR;
}
@@ -555,15 +583,15 @@ public:
unsigned NumLane) const;
bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR);
- void removeDeadFrameIndices(MachineFrameInfo &MFI);
+
+ /// If \p ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill
+ /// to the default stack.
+ bool removeDeadFrameIndices(MachineFrameInfo &MFI,
+ bool ResetSGPRSpillStackIDs);
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI);
Optional<int> getOptionalScavengeFI() const { return ScavengeFI; }
- bool hasCalculatedTID() const { return TIDReg != 0; };
- Register getTIDReg() const { return TIDReg; };
- void setTIDReg(Register Reg) { TIDReg = Reg; }
-
unsigned getBytesInStackArgArea() const {
return BytesInStackArgArea;
}
@@ -581,6 +609,13 @@ public:
Register addFlatScratchInit(const SIRegisterInfo &TRI);
Register addImplicitBufferPtr(const SIRegisterInfo &TRI);
+ /// Increment user SGPRs used for padding the argument list only.
+ Register addReservedUserSGPR() {
+ Register Next = getNextUserSGPR();
+ ++NumUserSGPRs;
+ return Next;
+ }
+
// Add system SGPRs.
Register addWorkGroupIDX() {
ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR());
@@ -722,10 +757,6 @@ public:
return HighBitsOf32BitAddress;
}
- unsigned getGDSSize() const {
- return GDSSize;
- }
-
unsigned getNumUserSGPRs() const {
return NumUserSGPRs;
}
@@ -903,31 +934,19 @@ public:
llvm_unreachable("unexpected dimension");
}
- unsigned getLDSWaveSpillSize() const {
- return LDSWaveSpillSize;
+ const AMDGPUBufferPseudoSourceValue *
+ getBufferPSV(const AMDGPUTargetMachine &TM) {
+ return &BufferPSV;
}
- const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII) {
- if (!BufferPSV)
- BufferPSV = std::make_unique<AMDGPUBufferPseudoSourceValue>(TII);
-
- return BufferPSV.get();
- }
-
- const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII) {
- if (!ImagePSV)
- ImagePSV = std::make_unique<AMDGPUImagePseudoSourceValue>(TII);
-
- return ImagePSV.get();
+ const AMDGPUImagePseudoSourceValue *
+ getImagePSV(const AMDGPUTargetMachine &TM) {
+ return &ImagePSV;
}
- const AMDGPUGWSResourcePseudoSourceValue *getGWSPSV(const SIInstrInfo &TII) {
- if (!GWSResourcePSV) {
- GWSResourcePSV =
- std::make_unique<AMDGPUGWSResourcePseudoSourceValue>(TII);
- }
-
- return GWSResourcePSV.get();
+ const AMDGPUGWSResourcePseudoSourceValue *
+ getGWSPSV(const AMDGPUTargetMachine &TM) {
+ return &GWSResourcePSV;
}
unsigned getOccupancy() const {
@@ -953,6 +972,14 @@ public:
limitOccupancy(MF);
}
+ bool mayNeedAGPRs() const {
+ return MayNeedAGPRs;
+ }
+
+ // \returns true if a function has a use of AGPRs via inline asm or
+ // has a call which may use it.
+ bool mayUseAGPRs(const MachineFunction &MF) const;
+
// \returns true if a function needs or may need AGPRs.
bool usesAGPRs(const MachineFunction &MF) const;
};
diff --git a/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp b/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
index 81db66a98ddf..e426e938b856 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
@@ -64,7 +64,7 @@ using namespace llvm;
// First the instructions are put into blocks.
// We want the blocks help control register usage and hide high latencies
// later. To help control register usage, we typically want all local
-// computations, when for example you create a result that can be comsummed
+// computations, when for example you create a result that can be consumed
// right away, to be contained in a block. Block inputs and outputs would
// typically be important results that are needed in several locations of
// the shader. Since we do want blocks to help hide high latencies, we want
@@ -90,8 +90,8 @@ using namespace llvm;
// Increasing the number of active wavefronts helps hide the former, but it
// doesn't solve the latter, thus why even if wavefront count is high, we have
// to try have as many instructions hiding high latencies as possible.
-// The OpenCL doc says for example latency of 400 cycles for a global mem access,
-// which is hidden by 10 instructions if the wavefront count is 10.
+// The OpenCL doc says for example latency of 400 cycles for a global mem
+// access, which is hidden by 10 instructions if the wavefront count is 10.
// Some figures taken from AMD docs:
// Both texture and constant L1 caches are 4-way associative with 64 bytes
@@ -353,7 +353,7 @@ void SIScheduleBlock::initRegPressure(MachineBasicBlock::iterator BeginBlock,
// able to correctly handle 5 vs 6, 2 vs 3.
// (Note: This is not sufficient for RPTracker to not do mistakes for case 4)
// The RPTracker's LiveOutRegs has 1, 3, (some correct or incorrect)4, 5, 7
- // Comparing to LiveInRegs is not sufficient to differenciate 4 vs 5, 7
+ // Comparing to LiveInRegs is not sufficient to differentiate 4 vs 5, 7
// The use of findDefBetween removes the case 4.
for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
Register Reg = RegMaskPair.RegUnit;
@@ -402,7 +402,7 @@ void SIScheduleBlock::schedule(MachineBasicBlock::iterator BeginBlock,
nodeScheduled(SU);
}
- // TODO: compute InternalAdditionnalPressure.
+ // TODO: compute InternalAdditionalPressure.
InternalAdditionalPressure.resize(TopPressure.MaxSetPressure.size());
// Check everything is right.
@@ -696,7 +696,7 @@ void SIScheduleBlockCreator::colorHighLatenciesGroups() {
bool HasSubGraph;
std::vector<int> SubGraph;
// By construction (topological order), if SU and
- // DAG->SUnits[j] are linked, DAG->SUnits[j] is neccessary
+ // DAG->SUnits[j] are linked, DAG->SUnits[j] is necessary
// in the parent graph of SU.
#ifndef NDEBUG
SubGraph = DAG->GetTopo()->GetSubGraph(SU, DAG->SUnits[j],
@@ -1123,36 +1123,26 @@ void SIScheduleBlockCreator::colorExports() {
for (unsigned SUNum : DAG->TopDownIndex2SU) {
const SUnit &SU = DAG->SUnits[SUNum];
if (SIInstrInfo::isEXP(*SU.getInstr())) {
- // Check the EXP can be added to the group safely,
- // ie without needing any other instruction.
- // The EXP is allowed to depend on other EXP
- // (they will be in the same group).
- for (unsigned j : ExpGroup) {
- bool HasSubGraph;
- std::vector<int> SubGraph;
- // By construction (topological order), if SU and
- // DAG->SUnits[j] are linked, DAG->SUnits[j] is neccessary
- // in the parent graph of SU.
-#ifndef NDEBUG
- SubGraph = DAG->GetTopo()->GetSubGraph(SU, DAG->SUnits[j],
- HasSubGraph);
- assert(!HasSubGraph);
-#endif
- SubGraph = DAG->GetTopo()->GetSubGraph(DAG->SUnits[j], SU,
- HasSubGraph);
- if (!HasSubGraph)
- continue; // No dependencies between each other
-
- // SubGraph contains all the instructions required
- // between EXP SUnits[j] and EXP SU.
- for (unsigned k : SubGraph) {
- if (!SIInstrInfo::isEXP(*DAG->SUnits[k].getInstr()))
- // Other instructions than EXP would be required in the group.
- // Abort the groupping.
- return;
+ // SU is an export instruction. Check whether one of its successor
+ // dependencies is a non-export, in which case we skip export grouping.
+ for (const SDep &SuccDep : SU.Succs) {
+ const SUnit *SuccSU = SuccDep.getSUnit();
+ if (SuccDep.isWeak() || SuccSU->NodeNum >= DAG->SUnits.size()) {
+ // Ignore these dependencies.
+ continue;
+ }
+ assert(SuccSU->isInstr() &&
+ "SUnit unexpectedly not representing an instruction!");
+
+ if (!SIInstrInfo::isEXP(*SuccSU->getInstr())) {
+ // A non-export depends on us. Skip export grouping.
+ // Note that this is a bit pessimistic: We could still group all other
+ // exports that are not depended on by non-exports, directly or
+ // indirectly. Simply skipping this particular export but grouping all
+ // others would not account for indirect dependencies.
+ return;
}
}
-
ExpGroup.push_back(SUNum);
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index fff4f6729c99..8a66213931ff 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -19,6 +19,7 @@
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/TargetParser.h"
@@ -63,7 +64,7 @@ enum class SIAtomicScope {
};
/// The distinct address spaces supported by the AMDGPU target for
-/// atomic memory operation. Can be ORed toether.
+/// atomic memory operation. Can be ORed together.
enum class SIAtomicAddrSpace {
NONE = 0u,
GLOBAL = 1u << 0,
@@ -459,6 +460,56 @@ public:
Position Pos) const override;
};
+class SIGfx940CacheControl : public SIGfx90ACacheControl {
+protected:
+
+ /// Sets SC0 bit to "true" if present in \p MI. Returns true if \p MI
+ /// is modified, false otherwise.
+ bool enableSC0Bit(const MachineBasicBlock::iterator &MI) const {
+ return enableNamedBit(MI, AMDGPU::CPol::SC0);
+ }
+
+ /// Sets SC1 bit to "true" if present in \p MI. Returns true if \p MI
+ /// is modified, false otherwise.
+ bool enableSC1Bit(const MachineBasicBlock::iterator &MI) const {
+ return enableNamedBit(MI, AMDGPU::CPol::SC1);
+ }
+
+ /// Sets NT bit to "true" if present in \p MI. Returns true if \p MI
+ /// is modified, false otherwise.
+ bool enableNTBit(const MachineBasicBlock::iterator &MI) const {
+ return enableNamedBit(MI, AMDGPU::CPol::NT);
+ }
+
+public:
+
+ SIGfx940CacheControl(const GCNSubtarget &ST) : SIGfx90ACacheControl(ST) {};
+
+ bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace) const override;
+
+ bool enableStoreCacheBypass(const MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace) const override;
+
+ bool enableRMWCacheBypass(const MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace) const override;
+
+ bool enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator &MI,
+ SIAtomicAddrSpace AddrSpace, SIMemOp Op,
+ bool IsVolatile,
+ bool IsNonTemporal) const override;
+
+ bool insertAcquire(MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace, Position Pos) const override;
+
+ bool insertRelease(MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace, bool IsCrossAddrSpaceOrdering,
+ Position Pos) const override;
+};
+
class SIGfx10CacheControl : public SIGfx7CacheControl {
protected:
@@ -494,6 +545,20 @@ public:
Position Pos) const override;
};
+class SIGfx11CacheControl : public SIGfx10CacheControl {
+public:
+ SIGfx11CacheControl(const GCNSubtarget &ST) : SIGfx10CacheControl(ST) {}
+
+ bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace) const override;
+
+ bool enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator &MI,
+ SIAtomicAddrSpace AddrSpace, SIMemOp Op,
+ bool IsVolatile,
+ bool IsNonTemporal) const override;
+};
+
class SIMemoryLegalizer final : public MachineFunctionPass {
private:
@@ -649,7 +714,7 @@ Optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
return None;
}
- SSID = IsSyncScopeInclusion.getValue() ? SSID : MMO->getSyncScopeID();
+ SSID = *IsSyncScopeInclusion ? SSID : MMO->getSyncScopeID();
Ordering = getMergedAtomicOrdering(Ordering, OpOrdering);
assert(MMO->getFailureOrdering() != AtomicOrdering::Release &&
MMO->getFailureOrdering() != AtomicOrdering::AcquireRelease);
@@ -668,7 +733,7 @@ Optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
return None;
}
std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
- ScopeOrNone.getValue();
+ *ScopeOrNone;
if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace) ||
((InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) == SIAtomicAddrSpace::NONE)) {
@@ -730,7 +795,7 @@ Optional<SIMemOpInfo> SIMemOpAccess::getAtomicFenceInfo(
SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
bool IsCrossAddressSpaceOrdering = false;
std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
- ScopeOrNone.getValue();
+ *ScopeOrNone;
if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace)) {
@@ -775,13 +840,17 @@ bool SICacheControl::enableNamedBit(const MachineBasicBlock::iterator MI,
/* static */
std::unique_ptr<SICacheControl> SICacheControl::create(const GCNSubtarget &ST) {
GCNSubtarget::Generation Generation = ST.getGeneration();
+ if (ST.hasGFX940Insts())
+ return std::make_unique<SIGfx940CacheControl>(ST);
if (ST.hasGFX90AInsts())
return std::make_unique<SIGfx90ACacheControl>(ST);
if (Generation <= AMDGPUSubtarget::SOUTHERN_ISLANDS)
return std::make_unique<SIGfx6CacheControl>(ST);
if (Generation < AMDGPUSubtarget::GFX10)
return std::make_unique<SIGfx7CacheControl>(ST);
- return std::make_unique<SIGfx10CacheControl>(ST);
+ if (Generation < AMDGPUSubtarget::GFX11)
+ return std::make_unique<SIGfx10CacheControl>(ST);
+ return std::make_unique<SIGfx11CacheControl>(ST);
}
bool SIGfx6CacheControl::enableLoadCacheBypass(
@@ -943,7 +1012,7 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
case SIAtomicScope::WAVEFRONT:
case SIAtomicScope::SINGLETHREAD:
// The LDS keeps all memory operations in order for
- // the same wavesfront.
+ // the same wavefront.
break;
default:
llvm_unreachable("Unsupported synchronization scope");
@@ -1360,7 +1429,9 @@ bool SIGfx90ACacheControl::insertRelease(MachineBasicBlock::iterator &MI,
// to initiate writeback of any dirty cache lines of earlier writes by the
// same wave. A "S_WAITCNT vmcnt(0)" is needed after to ensure the
// writeback has completed.
- BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBL2));
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBL2))
+ // Set SC bits to indicate system scope.
+ .addImm(AMDGPU::CPol::SC0 | AMDGPU::CPol::SC1);
// Followed by same as GFX7, which will ensure the necessary "S_WAITCNT
// vmcnt(0)" needed by the "BUFFER_WBL2".
Changed = true;
@@ -1386,6 +1457,308 @@ bool SIGfx90ACacheControl::insertRelease(MachineBasicBlock::iterator &MI,
return Changed;
}
+bool SIGfx940CacheControl::enableLoadCacheBypass(
+ const MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace) const {
+ assert(MI->mayLoad() && !MI->mayStore());
+ bool Changed = false;
+
+ if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
+ switch (Scope) {
+ case SIAtomicScope::SYSTEM:
+ // Set SC bits to indicate system scope.
+ Changed |= enableSC0Bit(MI);
+ Changed |= enableSC1Bit(MI);
+ break;
+ case SIAtomicScope::AGENT:
+ // Set SC bits to indicate agent scope.
+ Changed |= enableSC1Bit(MI);
+ break;
+ case SIAtomicScope::WORKGROUP:
+ // In threadgroup split mode the waves of a work-group can be executing on
+ // different CUs. Therefore need to bypass the L1 which is per CU.
+ // Otherwise in non-threadgroup split mode all waves of a work-group are
+ // on the same CU, and so the L1 does not need to be bypassed. Setting SC
+ // bits to indicate work-group scope will do this automatically.
+ Changed |= enableSC0Bit(MI);
+ break;
+ case SIAtomicScope::WAVEFRONT:
+ case SIAtomicScope::SINGLETHREAD:
+ // Leave SC bits unset to indicate wavefront scope.
+ break;
+ default:
+ llvm_unreachable("Unsupported synchronization scope");
+ }
+ }
+
+ /// The scratch address space does not need the global memory caches
+ /// to be bypassed as all memory operations by the same thread are
+ /// sequentially consistent, and no other thread can access scratch
+ /// memory.
+
+ /// Other address spaces do not have a cache.
+
+ return Changed;
+}
+
+bool SIGfx940CacheControl::enableStoreCacheBypass(
+ const MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace) const {
+ assert(!MI->mayLoad() && MI->mayStore());
+ bool Changed = false;
+
+ if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
+ switch (Scope) {
+ case SIAtomicScope::SYSTEM:
+ // Set SC bits to indicate system scope.
+ Changed |= enableSC0Bit(MI);
+ Changed |= enableSC1Bit(MI);
+ break;
+ case SIAtomicScope::AGENT:
+ // Set SC bits to indicate agent scope.
+ Changed |= enableSC1Bit(MI);
+ break;
+ case SIAtomicScope::WORKGROUP:
+ // Set SC bits to indicate workgroup scope.
+ Changed |= enableSC0Bit(MI);
+ break;
+ case SIAtomicScope::WAVEFRONT:
+ case SIAtomicScope::SINGLETHREAD:
+ // Leave SC bits unset to indicate wavefront scope.
+ break;
+ default:
+ llvm_unreachable("Unsupported synchronization scope");
+ }
+ }
+
+ /// The scratch address space does not need the global memory caches
+ /// to be bypassed as all memory operations by the same thread are
+ /// sequentially consistent, and no other thread can access scratch
+ /// memory.
+
+ /// Other address spaces do not have a cache.
+
+ return Changed;
+}
+
+bool SIGfx940CacheControl::enableRMWCacheBypass(
+ const MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace) const {
+ assert(MI->mayLoad() && MI->mayStore());
+ bool Changed = false;
+
+ if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
+ switch (Scope) {
+ case SIAtomicScope::SYSTEM:
+ // Set SC1 bit to indicate system scope.
+ Changed |= enableSC1Bit(MI);
+ break;
+ case SIAtomicScope::AGENT:
+ case SIAtomicScope::WORKGROUP:
+ case SIAtomicScope::WAVEFRONT:
+ case SIAtomicScope::SINGLETHREAD:
+ // RMW atomic operations implicitly bypass the L1 cache and only use SC1
+ // to indicate system or agent scope. The SC0 bit is used to indicate if
+ // they are return or no-return. Leave SC1 bit unset to indicate agent
+ // scope.
+ break;
+ default:
+ llvm_unreachable("Unsupported synchronization scope");
+ }
+ }
+
+ return Changed;
+}
+
+bool SIGfx940CacheControl::enableVolatileAndOrNonTemporal(
+ MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
+ bool IsVolatile, bool IsNonTemporal) const {
+ // Only handle load and store, not atomic read-modify-write insructions. The
+ // latter use glc to indicate if the atomic returns a result and so must not
+ // be used for cache control.
+ assert(MI->mayLoad() ^ MI->mayStore());
+
+ // Only update load and store, not LLVM IR atomic read-modify-write
+ // instructions. The latter are always marked as volatile so cannot sensibly
+ // handle it as do not want to pessimize all atomics. Also they do not support
+ // the nontemporal attribute.
+ assert(Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
+
+ bool Changed = false;
+
+ if (IsVolatile) {
+ // Set SC bits to indicate system scope.
+ Changed |= enableSC0Bit(MI);
+ Changed |= enableSC1Bit(MI);
+
+ // Ensure operation has completed at system scope to cause all volatile
+ // operations to be visible outside the program in a global order. Do not
+ // request cross address space as only the global address space can be
+ // observable outside the program, so no need to cause a waitcnt for LDS
+ // address space operations.
+ Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false,
+ Position::AFTER);
+
+ return Changed;
+ }
+
+ if (IsNonTemporal) {
+ Changed |= enableNTBit(MI);
+ return Changed;
+ }
+
+ return Changed;
+}
+
+bool SIGfx940CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace,
+ Position Pos) const {
+ if (!InsertCacheInv)
+ return false;
+
+ bool Changed = false;
+
+ MachineBasicBlock &MBB = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+
+ if (Pos == Position::AFTER)
+ ++MI;
+
+ if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
+ switch (Scope) {
+ case SIAtomicScope::SYSTEM:
+ // Ensures that following loads will not see stale remote VMEM data or
+ // stale local VMEM data with MTYPE NC. Local VMEM data with MTYPE RW and
+ // CC will never be stale due to the local memory probes.
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_INV))
+ // Set SC bits to indicate system scope.
+ .addImm(AMDGPU::CPol::SC0 | AMDGPU::CPol::SC1);
+ // Inserting a "S_WAITCNT vmcnt(0)" after is not required because the
+ // hardware does not reorder memory operations by the same wave with
+ // respect to a preceding "BUFFER_INV". The invalidate is guaranteed to
+ // remove any cache lines of earlier writes by the same wave and ensures
+ // later reads by the same wave will refetch the cache lines.
+ Changed = true;
+ break;
+ case SIAtomicScope::AGENT:
+ // Ensures that following loads will not see stale remote date or local
+ // MTYPE NC global data. Local MTYPE RW and CC memory will never be stale
+ // due to the memory probes.
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_INV))
+ // Set SC bits to indicate agent scope.
+ .addImm(AMDGPU::CPol::SC1);
+ // Inserting "S_WAITCNT vmcnt(0)" is not required because the hardware
+ // does not reorder memory operations with respect to preceeding buffer
+ // invalidate. The invalidate is guaranteed to remove any cache lines of
+ // earlier writes and ensures later writes will refetch the cache lines.
+ Changed = true;
+ break;
+ case SIAtomicScope::WORKGROUP:
+ // In threadgroup split mode the waves of a work-group can be executing on
+ // different CUs. Therefore need to invalidate the L1 which is per CU.
+ // Otherwise in non-threadgroup split mode all waves of a work-group are
+ // on the same CU, and so the L1 does not need to be invalidated.
+ if (ST.isTgSplitEnabled()) {
+ // Ensures L1 is invalidated if in threadgroup split mode. In
+ // non-threadgroup split mode it is a NOP, but no point generating it in
+ // that case if know not in that mode.
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_INV))
+ // Set SC bits to indicate work-group scope.
+ .addImm(AMDGPU::CPol::SC0);
+ // Inserting "S_WAITCNT vmcnt(0)" is not required because the hardware
+ // does not reorder memory operations with respect to preceeding buffer
+ // invalidate. The invalidate is guaranteed to remove any cache lines of
+ // earlier writes and ensures later writes will refetch the cache lines.
+ Changed = true;
+ }
+ break;
+ case SIAtomicScope::WAVEFRONT:
+ case SIAtomicScope::SINGLETHREAD:
+ // Could generate "BUFFER_INV" but it would do nothing as there are no
+ // caches to invalidate.
+ break;
+ default:
+ llvm_unreachable("Unsupported synchronization scope");
+ }
+ }
+
+ /// The scratch address space does not need the global memory cache
+ /// to be flushed as all memory operations by the same thread are
+ /// sequentially consistent, and no other thread can access scratch
+ /// memory.
+
+ /// Other address spaces do not have a cache.
+
+ if (Pos == Position::AFTER)
+ --MI;
+
+ return Changed;
+}
+
+bool SIGfx940CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace,
+ bool IsCrossAddrSpaceOrdering,
+ Position Pos) const {
+ bool Changed = false;
+
+ MachineBasicBlock &MBB = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+
+ if (Pos == Position::AFTER)
+ ++MI;
+
+ if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
+ switch (Scope) {
+ case SIAtomicScope::SYSTEM:
+ // Inserting a "S_WAITCNT vmcnt(0)" before is not required because the
+ // hardware does not reorder memory operations by the same wave with
+ // respect to a following "BUFFER_WBL2". The "BUFFER_WBL2" is guaranteed
+ // to initiate writeback of any dirty cache lines of earlier writes by the
+ // same wave. A "S_WAITCNT vmcnt(0)" is needed after to ensure the
+ // writeback has completed.
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBL2))
+ // Set SC bits to indicate system scope.
+ .addImm(AMDGPU::CPol::SC0 | AMDGPU::CPol::SC1);
+ // Since AddrSpace contains SIAtomicAddrSpace::GLOBAL and Scope is
+ // SIAtomicScope::SYSTEM, the following insertWait will generate the
+ // required "S_WAITCNT vmcnt(0)" needed by the "BUFFER_WBL2".
+ Changed = true;
+ break;
+ case SIAtomicScope::AGENT:
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBL2))
+ // Set SC bits to indicate agent scope.
+ .addImm(AMDGPU::CPol::SC1);
+
+ // Since AddrSpace contains SIAtomicAddrSpace::GLOBAL and Scope is
+ // SIAtomicScope::AGENT, the following insertWait will generate the
+ // required "S_WAITCNT vmcnt(0)".
+ Changed = true;
+ break;
+ case SIAtomicScope::WORKGROUP:
+ case SIAtomicScope::WAVEFRONT:
+ case SIAtomicScope::SINGLETHREAD:
+ // Do not generate "BUFFER_WBL2" as there are no caches it would
+ // writeback, and would require an otherwise unnecessary
+ // "S_WAITCNT vmcnt(0)".
+ break;
+ default:
+ llvm_unreachable("Unsupported synchronization scope");
+ }
+ }
+
+ if (Pos == Position::AFTER)
+ --MI;
+
+ // Ensure the necessary S_WAITCNT needed by any "BUFFER_WBL2" as well as other
+ // S_WAITCNT needed.
+ Changed |= insertWait(MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
+ IsCrossAddrSpaceOrdering, Pos);
+
+ return Changed;
+}
+
bool SIGfx10CacheControl::enableLoadCacheBypass(
const MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
@@ -1547,7 +1920,7 @@ bool SIGfx10CacheControl::insertWait(MachineBasicBlock::iterator &MI,
case SIAtomicScope::WAVEFRONT:
case SIAtomicScope::SINGLETHREAD:
// The LDS keeps all memory operations in order for
- // the same wavesfront.
+ // the same wavefront.
break;
default:
llvm_unreachable("Unsupported synchronization scope");
@@ -1655,6 +2028,101 @@ bool SIGfx10CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
return Changed;
}
+bool SIGfx11CacheControl::enableLoadCacheBypass(
+ const MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace) const {
+ assert(MI->mayLoad() && !MI->mayStore());
+ bool Changed = false;
+
+ if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
+ switch (Scope) {
+ case SIAtomicScope::SYSTEM:
+ case SIAtomicScope::AGENT:
+ // Set the L0 and L1 cache policies to MISS_EVICT.
+ // Note: there is no L2 cache coherent bypass control at the ISA level.
+ Changed |= enableGLCBit(MI);
+ break;
+ case SIAtomicScope::WORKGROUP:
+ // In WGP mode the waves of a work-group can be executing on either CU of
+ // the WGP. Therefore need to bypass the L0 which is per CU. Otherwise in
+ // CU mode all waves of a work-group are on the same CU, and so the L0
+ // does not need to be bypassed.
+ if (!ST.isCuModeEnabled())
+ Changed |= enableGLCBit(MI);
+ break;
+ case SIAtomicScope::WAVEFRONT:
+ case SIAtomicScope::SINGLETHREAD:
+ // No cache to bypass.
+ break;
+ default:
+ llvm_unreachable("Unsupported synchronization scope");
+ }
+ }
+
+ /// The scratch address space does not need the global memory caches
+ /// to be bypassed as all memory operations by the same thread are
+ /// sequentially consistent, and no other thread can access scratch
+ /// memory.
+
+ /// Other address spaces do not have a cache.
+
+ return Changed;
+}
+
+bool SIGfx11CacheControl::enableVolatileAndOrNonTemporal(
+ MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
+ bool IsVolatile, bool IsNonTemporal) const {
+
+ // Only handle load and store, not atomic read-modify-write insructions. The
+ // latter use glc to indicate if the atomic returns a result and so must not
+ // be used for cache control.
+ assert(MI->mayLoad() ^ MI->mayStore());
+
+ // Only update load and store, not LLVM IR atomic read-modify-write
+ // instructions. The latter are always marked as volatile so cannot sensibly
+ // handle it as do not want to pessimize all atomics. Also they do not support
+ // the nontemporal attribute.
+ assert(Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
+
+ bool Changed = false;
+
+ if (IsVolatile) {
+ // Set L0 and L1 cache policy to be MISS_EVICT for load instructions
+ // and MISS_LRU for store instructions.
+ // Note: there is no L2 cache coherent bypass control at the ISA level.
+ if (Op == SIMemOp::LOAD)
+ Changed |= enableGLCBit(MI);
+
+ // Set MALL NOALLOC for load and store instructions.
+ Changed |= enableDLCBit(MI);
+
+ // Ensure operation has completed at system scope to cause all volatile
+ // operations to be visible outside the program in a global order. Do not
+ // request cross address space as only the global address space can be
+ // observable outside the program, so no need to cause a waitcnt for LDS
+ // address space operations.
+ Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false,
+ Position::AFTER);
+ return Changed;
+ }
+
+ if (IsNonTemporal) {
+ // For loads setting SLC configures L0 and L1 cache policy to HIT_EVICT
+ // and L2 cache policy to STREAM.
+ // For stores setting both GLC and SLC configures L0 and L1 cache policy
+ // to MISS_EVICT and the L2 cache policy to STREAM.
+ if (Op == SIMemOp::STORE)
+ Changed |= enableGLCBit(MI);
+ Changed |= enableSLCBit(MI);
+
+ // Set MALL NOALLOC for load and store instructions.
+ Changed |= enableDLCBit(MI);
+ return Changed;
+ }
+
+ return Changed;
+}
+
bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
if (AtomicPseudoMIs.empty())
return false;
diff --git a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
index 24a8879b5684..a5816e2e8c73 100644
--- a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
+++ b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
@@ -17,6 +17,7 @@
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include <queue>
#define DEBUG_TYPE "si-mode-register"
@@ -162,7 +163,9 @@ FunctionPass *llvm::createSIModeRegisterPass() { return new SIModeRegister(); }
// double precision setting.
Status SIModeRegister::getInstructionMode(MachineInstr &MI,
const SIInstrInfo *TII) {
- if (TII->usesFPDPRounding(MI)) {
+ if (TII->usesFPDPRounding(MI) ||
+ MI.getOpcode() == AMDGPU::FPTRUNC_UPWARD_PSEUDO ||
+ MI.getOpcode() == AMDGPU::FPTRUNC_DOWNWARD_PSEUDO) {
switch (MI.getOpcode()) {
case AMDGPU::V_INTERP_P1LL_F16:
case AMDGPU::V_INTERP_P1LV_F16:
@@ -170,6 +173,18 @@ Status SIModeRegister::getInstructionMode(MachineInstr &MI,
// f16 interpolation instructions need double precision round to zero
return Status(FP_ROUND_MODE_DP(3),
FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_ZERO));
+ case AMDGPU::FPTRUNC_UPWARD_PSEUDO: {
+ // Replacing the pseudo by a real instruction
+ MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32));
+ return Status(FP_ROUND_MODE_DP(3),
+ FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_INF));
+ }
+ case AMDGPU::FPTRUNC_DOWNWARD_PSEUDO: {
+ // Replacing the pseudo by a real instruction
+ MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32));
+ return Status(FP_ROUND_MODE_DP(3),
+ FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEGINF));
+ }
default:
return DefaultStatus;
}
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
index b9c839fe28ba..5215397d5936 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
@@ -9,6 +9,7 @@
#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/InitializePasses.h"
@@ -292,6 +293,210 @@ static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg) {
return false;
}
+// Backwards-iterate from Origin (for n=MaxInstructions iterations) until either
+// the beginning of the BB is reached or Pred evaluates to true - which can be
+// an arbitrary condition based on the current MachineInstr, for instance an
+// target instruction. Breaks prematurely by returning nullptr if one of the
+// registers given in NonModifiableRegs is modified by the current instruction.
+static MachineInstr *
+findInstrBackwards(MachineInstr &Origin,
+ std::function<bool(MachineInstr *)> Pred,
+ ArrayRef<MCRegister> NonModifiableRegs,
+ const SIRegisterInfo *TRI, unsigned MaxInstructions = 20) {
+ MachineBasicBlock::reverse_iterator A = Origin.getReverseIterator(),
+ E = Origin.getParent()->rend();
+ unsigned CurrentIteration = 0;
+
+ for (++A; CurrentIteration < MaxInstructions && A != E; ++A) {
+ if (A->isDebugInstr())
+ continue;
+
+ if (Pred(&*A))
+ return &*A;
+
+ for (MCRegister Reg : NonModifiableRegs) {
+ if (A->modifiesRegister(Reg, TRI))
+ return nullptr;
+ }
+
+ ++CurrentIteration;
+ }
+
+ return nullptr;
+}
+
+
+// Determine if a register Reg is not re-defined and still in use
+// in the range (Stop..Start].
+// It does so by backwards calculating liveness from the end of the BB until
+// either Stop or the beginning of the BB is reached.
+// After liveness is calculated, we can determine if Reg is still in use and not
+// defined inbetween the instructions.
+static bool isRegisterInUseBetween(MachineInstr &Stop, MachineInstr &Start,
+ MCRegister Reg, const SIRegisterInfo *TRI,
+ MachineRegisterInfo &MRI,
+ bool useLiveOuts = false,
+ bool ignoreStart = false) {
+ LivePhysRegs LR(*TRI);
+ if (useLiveOuts)
+ LR.addLiveOuts(*Stop.getParent());
+
+ MachineBasicBlock::reverse_iterator A(Start);
+ MachineBasicBlock::reverse_iterator E(Stop);
+
+ if (ignoreStart)
+ ++A;
+
+ for (; A != Stop.getParent()->rend() && A != Stop; ++A) {
+ LR.stepBackward(*A);
+ }
+
+ return !LR.available(MRI, Reg);
+}
+
+// Determine if a register Reg is not re-defined and still in use
+// in the range (Stop..BB.end].
+static bool isRegisterInUseAfter(MachineInstr &Stop, MCRegister Reg,
+ const SIRegisterInfo *TRI,
+ MachineRegisterInfo &MRI) {
+ return isRegisterInUseBetween(Stop, *Stop.getParent()->rbegin(), Reg, TRI,
+ MRI, true);
+}
+
+// Tries to find a possibility to optimize a v_cmp ..., s_and_saveexec sequence
+// by looking at an instance of a s_and_saveexec instruction. Returns a pointer
+// to the v_cmp instruction if it is safe to replace the sequence (see the
+// conditions in the function body). This is after register allocation, so some
+// checks on operand dependencies need to be considered.
+static MachineInstr *findPossibleVCMPVCMPXOptimization(
+ MachineInstr &SaveExec, MCRegister Exec, const SIRegisterInfo *TRI,
+ const SIInstrInfo *TII, MachineRegisterInfo &MRI) {
+
+ MachineInstr *VCmp = nullptr;
+
+ Register SaveExecDest = SaveExec.getOperand(0).getReg();
+ if (!TRI->isSGPRReg(MRI, SaveExecDest))
+ return nullptr;
+
+ MachineOperand *SaveExecSrc0 =
+ TII->getNamedOperand(SaveExec, AMDGPU::OpName::src0);
+ if (!SaveExecSrc0->isReg())
+ return nullptr;
+
+ // Try to find the last v_cmp instruction that defs the saveexec input
+ // operand without any write to Exec or the saveexec input operand inbetween.
+ VCmp = findInstrBackwards(
+ SaveExec,
+ [&](MachineInstr *Check) {
+ return AMDGPU::getVCMPXOpFromVCMP(Check->getOpcode()) != -1 &&
+ Check->modifiesRegister(SaveExecSrc0->getReg(), TRI);
+ },
+ {Exec, SaveExecSrc0->getReg()}, TRI);
+
+ if (!VCmp)
+ return nullptr;
+
+ MachineOperand *VCmpDest = TII->getNamedOperand(*VCmp, AMDGPU::OpName::sdst);
+ assert(VCmpDest && "Should have an sdst operand!");
+
+ // Check if any of the v_cmp source operands is written by the saveexec.
+ MachineOperand *Src0 = TII->getNamedOperand(*VCmp, AMDGPU::OpName::src0);
+ if (Src0->isReg() && TRI->isSGPRReg(MRI, Src0->getReg()) &&
+ SaveExec.modifiesRegister(Src0->getReg(), TRI))
+ return nullptr;
+
+ MachineOperand *Src1 = TII->getNamedOperand(*VCmp, AMDGPU::OpName::src1);
+ if (Src1->isReg() && TRI->isSGPRReg(MRI, Src1->getReg()) &&
+ SaveExec.modifiesRegister(Src1->getReg(), TRI))
+ return nullptr;
+
+ // Don't do the transformation if the destination operand is included in
+ // it's MBB Live-outs, meaning it's used in any of it's successors, leading
+ // to incorrect code if the v_cmp and therefore the def of
+ // the dest operand is removed.
+ if (isLiveOut(*VCmp->getParent(), VCmpDest->getReg()))
+ return nullptr;
+
+ // If the v_cmp target is in use between v_cmp and s_and_saveexec or after the
+ // s_and_saveexec, skip the optimization.
+ if (isRegisterInUseBetween(*VCmp, SaveExec, VCmpDest->getReg(), TRI, MRI,
+ false, true) ||
+ isRegisterInUseAfter(SaveExec, VCmpDest->getReg(), TRI, MRI))
+ return nullptr;
+
+ // Try to determine if there is a write to any of the VCmp
+ // operands between the saveexec and the vcmp.
+ // If yes, additional VGPR spilling might need to be inserted. In this case,
+ // it's not worth replacing the instruction sequence.
+ SmallVector<MCRegister, 2> NonDefRegs;
+ if (Src0->isReg())
+ NonDefRegs.push_back(Src0->getReg());
+
+ if (Src1->isReg())
+ NonDefRegs.push_back(Src1->getReg());
+
+ if (!findInstrBackwards(
+ SaveExec, [&](MachineInstr *Check) { return Check == VCmp; },
+ NonDefRegs, TRI))
+ return nullptr;
+
+ return VCmp;
+}
+
+// Inserts the optimized s_mov_b32 / v_cmpx sequence based on the
+// operands extracted from a v_cmp ..., s_and_saveexec pattern.
+static bool optimizeVCMPSaveExecSequence(MachineInstr &SaveExecInstr,
+ MachineInstr &VCmp, MCRegister Exec,
+ const SIInstrInfo *TII,
+ const SIRegisterInfo *TRI,
+ MachineRegisterInfo &MRI) {
+ const int NewOpcode = AMDGPU::getVCMPXOpFromVCMP(VCmp.getOpcode());
+
+ if (NewOpcode == -1)
+ return false;
+
+ MachineOperand *Src0 = TII->getNamedOperand(VCmp, AMDGPU::OpName::src0);
+ MachineOperand *Src1 = TII->getNamedOperand(VCmp, AMDGPU::OpName::src1);
+
+ Register MoveDest = SaveExecInstr.getOperand(0).getReg();
+
+ MachineBasicBlock::instr_iterator InsertPosIt = SaveExecInstr.getIterator();
+ if (!SaveExecInstr.uses().empty()) {
+ bool isSGPR32 = TRI->getRegSizeInBits(MoveDest, MRI) == 32;
+ unsigned MovOpcode = isSGPR32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
+ BuildMI(*SaveExecInstr.getParent(), InsertPosIt,
+ SaveExecInstr.getDebugLoc(), TII->get(MovOpcode), MoveDest)
+ .addReg(Exec);
+ }
+
+ // Omit dst as V_CMPX is implicitly writing to EXEC.
+ // Add dummy src and clamp modifiers, if needed.
+ auto Builder = BuildMI(*VCmp.getParent(), std::next(InsertPosIt),
+ VCmp.getDebugLoc(), TII->get(NewOpcode));
+
+ auto TryAddImmediateValueFromNamedOperand =
+ [&](unsigned OperandName) -> void {
+ if (auto *Mod = TII->getNamedOperand(VCmp, OperandName))
+ Builder.addImm(Mod->getImm());
+ };
+
+ TryAddImmediateValueFromNamedOperand(AMDGPU::OpName::src0_modifiers);
+ Builder.add(*Src0);
+
+ TryAddImmediateValueFromNamedOperand(AMDGPU::OpName::src1_modifiers);
+ Builder.add(*Src1);
+
+ TryAddImmediateValueFromNamedOperand(AMDGPU::OpName::clamp);
+
+ // The kill flags may no longer be correct.
+ if (Src0->isReg())
+ MRI.clearKillFlags(Src0->getReg());
+ if (Src1->isReg())
+ MRI.clearKillFlags(Src1->getReg());
+
+ return true;
+}
+
bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
@@ -299,6 +504,7 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
const SIInstrInfo *TII = ST.getInstrInfo();
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
// Optimize sequences emitted for control flow lowering. They are originally
@@ -312,6 +518,7 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
// x = s_<op>_saveexec_b64 y
//
+ bool Changed = false;
for (MachineBasicBlock &MBB : MF) {
MachineBasicBlock::reverse_iterator I = fixTerminators(*TII, MBB);
MachineBasicBlock::reverse_iterator E = MBB.rend();
@@ -351,6 +558,7 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "into: " << *PrepareExecInst << '\n');
CopyToExecInst->eraseFromParent();
+ Changed = true;
}
continue;
@@ -456,8 +664,49 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
OtherInst->substituteRegister(CopyToExec, Exec,
AMDGPU::NoSubRegister, *TRI);
}
+
+ Changed = true;
}
- return true;
+ // After all s_op_saveexec instructions are inserted,
+ // replace (on GFX10.3 and later)
+ // v_cmp_* SGPR, IMM, VGPR
+ // s_and_saveexec_b32 EXEC_SGPR_DEST, SGPR
+ // with
+ // s_mov_b32 EXEC_SGPR_DEST, exec_lo
+ // v_cmpx_* IMM, VGPR
+ // to reduce pipeline stalls.
+ if (ST.hasGFX10_3Insts()) {
+ DenseMap<MachineInstr *, MachineInstr *> SaveExecVCmpMapping;
+ const unsigned AndSaveExecOpcode =
+ ST.isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64;
+
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ // Record relevant v_cmp / s_and_saveexec instruction pairs for
+ // replacement.
+ if (MI.getOpcode() != AndSaveExecOpcode)
+ continue;
+
+ if (MachineInstr *VCmp =
+ findPossibleVCMPVCMPXOptimization(MI, Exec, TRI, TII, *MRI))
+ SaveExecVCmpMapping[&MI] = VCmp;
+ }
+ }
+
+ for (const auto &Entry : SaveExecVCmpMapping) {
+ MachineInstr *SaveExecInstr = Entry.getFirst();
+ MachineInstr *VCmpInstr = Entry.getSecond();
+
+ if (optimizeVCMPSaveExecSequence(*SaveExecInstr, *VCmpInstr, Exec, TII,
+ TRI, *MRI)) {
+ SaveExecInstr->eraseFromParent();
+ VCmpInstr->eraseFromParent();
+
+ Changed = true;
+ }
+ }
+ }
+ return Changed;
}
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
index 5f89f3826683..e5e65a8dbbf1 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
@@ -39,7 +39,7 @@ private:
MCRegister CondReg;
MCRegister ExecReg;
- Register optimizeVcndVcmpPair(MachineBasicBlock &MBB);
+ bool optimizeVcndVcmpPair(MachineBasicBlock &MBB);
bool optimizeElseBranch(MachineBasicBlock &MBB);
public:
@@ -90,8 +90,8 @@ static bool isDefBetween(const LiveRange &LR, SlotIndex AndIdx,
static bool isDefBetween(const SIRegisterInfo &TRI,
LiveIntervals *LIS, Register Reg,
const MachineInstr &Sel, const MachineInstr &And) {
- SlotIndex AndIdx = LIS->getInstructionIndex(And);
- SlotIndex SelIdx = LIS->getInstructionIndex(Sel);
+ SlotIndex AndIdx = LIS->getInstructionIndex(And).getRegSlot();
+ SlotIndex SelIdx = LIS->getInstructionIndex(Sel).getRegSlot();
if (Reg.isVirtual())
return isDefBetween(LIS->getInterval(Reg), AndIdx, SelIdx);
@@ -119,21 +119,20 @@ static bool isDefBetween(const SIRegisterInfo &TRI,
// required part of the pattern since V_CNDMASK_B32 writes zeroes for inactive
// lanes.
//
-// Returns %cc register on success.
-Register
-SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
+// Returns true on success.
+bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
auto I = llvm::find_if(MBB.terminators(), [](const MachineInstr &MI) {
unsigned Opc = MI.getOpcode();
return Opc == AMDGPU::S_CBRANCH_VCCZ ||
Opc == AMDGPU::S_CBRANCH_VCCNZ; });
if (I == MBB.terminators().end())
- return Register();
+ return false;
auto *And =
TRI->findReachingDef(CondReg, AMDGPU::NoSubRegister, *I, *MRI, LIS);
if (!And || And->getOpcode() != AndOpc ||
!And->getOperand(1).isReg() || !And->getOperand(2).isReg())
- return Register();
+ return false;
MachineOperand *AndCC = &And->getOperand(1);
Register CmpReg = AndCC->getReg();
@@ -143,49 +142,49 @@ SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
CmpReg = AndCC->getReg();
CmpSubReg = AndCC->getSubReg();
} else if (And->getOperand(2).getReg() != Register(ExecReg)) {
- return Register();
+ return false;
}
auto *Cmp = TRI->findReachingDef(CmpReg, CmpSubReg, *And, *MRI, LIS);
if (!Cmp || !(Cmp->getOpcode() == AMDGPU::V_CMP_NE_U32_e32 ||
Cmp->getOpcode() == AMDGPU::V_CMP_NE_U32_e64) ||
Cmp->getParent() != And->getParent())
- return Register();
+ return false;
MachineOperand *Op1 = TII->getNamedOperand(*Cmp, AMDGPU::OpName::src0);
MachineOperand *Op2 = TII->getNamedOperand(*Cmp, AMDGPU::OpName::src1);
if (Op1->isImm() && Op2->isReg())
std::swap(Op1, Op2);
if (!Op1->isReg() || !Op2->isImm() || Op2->getImm() != 1)
- return Register();
+ return false;
Register SelReg = Op1->getReg();
auto *Sel = TRI->findReachingDef(SelReg, Op1->getSubReg(), *Cmp, *MRI, LIS);
if (!Sel || Sel->getOpcode() != AMDGPU::V_CNDMASK_B32_e64)
- return Register();
+ return false;
if (TII->hasModifiersSet(*Sel, AMDGPU::OpName::src0_modifiers) ||
TII->hasModifiersSet(*Sel, AMDGPU::OpName::src1_modifiers))
- return Register();
+ return false;
Op1 = TII->getNamedOperand(*Sel, AMDGPU::OpName::src0);
Op2 = TII->getNamedOperand(*Sel, AMDGPU::OpName::src1);
MachineOperand *CC = TII->getNamedOperand(*Sel, AMDGPU::OpName::src2);
if (!Op1->isImm() || !Op2->isImm() || !CC->isReg() ||
Op1->getImm() != 0 || Op2->getImm() != 1)
- return Register();
+ return false;
Register CCReg = CC->getReg();
// If there was a def between the select and the and, we would need to move it
// to fold this.
if (isDefBetween(*TRI, LIS, CCReg, *Sel, *And))
- return Register();
+ return false;
+ // TODO: Guard against implicit def operands?
LLVM_DEBUG(dbgs() << "Folding sequence:\n\t" << *Sel << '\t' << *Cmp << '\t'
<< *And);
- LIS->RemoveMachineInstrFromMaps(*And);
MachineInstr *Andn2 =
BuildMI(MBB, *And, And->getDebugLoc(), TII->get(Andn2Opc),
And->getOperand(0).getReg())
@@ -196,34 +195,92 @@ SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
MachineOperand &Andn2SCC = Andn2->getOperand(3);
assert(Andn2SCC.getReg() == AMDGPU::SCC);
Andn2SCC.setIsDead(AndSCC.isDead());
+
+ SlotIndex AndIdx = LIS->ReplaceMachineInstrInMaps(*And, *Andn2);
And->eraseFromParent();
- LIS->InsertMachineInstrInMaps(*Andn2);
LLVM_DEBUG(dbgs() << "=>\n\t" << *Andn2 << '\n');
+ SlotIndex CmpIdx = LIS->getInstructionIndex(*Cmp);
+ SlotIndex SelIdx = LIS->getInstructionIndex(*Sel);
+
+ LiveInterval *CmpLI =
+ CmpReg.isVirtual() ? &LIS->getInterval(CmpReg) : nullptr;
+ LiveInterval *SelLI =
+ SelReg.isVirtual() ? &LIS->getInterval(SelReg) : nullptr;
+
+ // Update live intervals for CCReg before potentially removing CmpReg/SelReg,
+ // and their associated liveness information.
+ if (CCReg.isVirtual()) {
+ // Note: this ignores that SelLI might have multiple internal values
+ // or splits and simply extends the live range to cover all cases
+ // where the result of the v_cndmask_b32 was live (e.g. loops).
+ // This could yield worse register allocation in rare edge cases.
+ SlotIndex EndIdx = AndIdx.getRegSlot();
+ if (SelLI && SelLI->endIndex() > EndIdx && SelLI->endIndex().isBlock())
+ EndIdx = SelLI->endIndex();
+
+ LiveInterval &CCLI = LIS->getInterval(CCReg);
+ auto CCQ = CCLI.Query(SelIdx.getRegSlot());
+ if (CCQ.valueIn()) {
+ CCLI.addSegment(LiveRange::Segment(SelIdx.getRegSlot(),
+ EndIdx, CCQ.valueIn()));
+ }
+
+ if (CC->getSubReg()) {
+ LaneBitmask Mask = TRI->getSubRegIndexLaneMask(CC->getSubReg());
+ BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
+ CCLI.refineSubRanges(
+ Allocator, Mask,
+ [=](LiveInterval::SubRange &SR) {
+ auto CCQS = SR.Query(SelIdx.getRegSlot());
+ if (CCQS.valueIn()) {
+ SR.addSegment(LiveRange::Segment(
+ SelIdx.getRegSlot(), EndIdx, CCQS.valueIn()));
+ }
+ },
+ *LIS->getSlotIndexes(), *TRI);
+ CCLI.removeEmptySubRanges();
+
+ SmallVector<LiveInterval *> SplitLIs;
+ LIS->splitSeparateComponents(CCLI, SplitLIs);
+ }
+ } else
+ LIS->removeAllRegUnitsForPhysReg(CCReg);
+
// Try to remove compare. Cmp value should not used in between of cmp
// and s_and_b64 if VCC or just unused if any other register.
- if ((CmpReg.isVirtual() && MRI->use_nodbg_empty(CmpReg)) ||
+ if ((CmpReg.isVirtual() && CmpLI && CmpLI->Query(AndIdx.getRegSlot()).isKill()) ||
(CmpReg == Register(CondReg) &&
std::none_of(std::next(Cmp->getIterator()), Andn2->getIterator(),
[&](const MachineInstr &MI) {
return MI.readsRegister(CondReg, TRI);
}))) {
LLVM_DEBUG(dbgs() << "Erasing: " << *Cmp << '\n');
-
+ if (CmpLI)
+ LIS->removeVRegDefAt(*CmpLI, CmpIdx.getRegSlot());
LIS->RemoveMachineInstrFromMaps(*Cmp);
Cmp->eraseFromParent();
// Try to remove v_cndmask_b32.
- if (SelReg.isVirtual() && MRI->use_nodbg_empty(SelReg)) {
- LLVM_DEBUG(dbgs() << "Erasing: " << *Sel << '\n');
+ if (SelLI) {
+ bool CanRemoveSel = SelLI->Query(CmpIdx.getRegSlot()).isKill();
+ if (!CanRemoveSel) {
+ // Try to shrink the live interval and check for dead def instead.
+ LIS->shrinkToUses(SelLI, nullptr);
+ CanRemoveSel = SelLI->Query(SelIdx.getRegSlot()).isDeadDef();
+ }
+ if (CanRemoveSel) {
+ LLVM_DEBUG(dbgs() << "Erasing: " << *Sel << '\n');
- LIS->RemoveMachineInstrFromMaps(*Sel);
- Sel->eraseFromParent();
+ LIS->removeVRegDefAt(*SelLI, SelIdx.getRegSlot());
+ LIS->RemoveMachineInstrFromMaps(*Sel);
+ Sel->eraseFromParent();
+ }
}
}
- return CCReg;
+ return true;
}
// Optimize sequence
@@ -330,8 +387,7 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
Changed = true;
}
- if (Register Reg = optimizeVcndVcmpPair(MBB)) {
- RecalcRegs.insert(Reg);
+ if (optimizeVcndVcmpPair(MBB)) {
RecalcRegs.insert(AMDGPU::VCC_LO);
RecalcRegs.insert(AMDGPU::VCC_HI);
RecalcRegs.insert(AMDGPU::SCC);
@@ -402,7 +458,7 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
}
// If the only user of a logical operation is move to exec, fold it now
- // to prevent forming of saveexec. I.e:
+ // to prevent forming of saveexec. I.e.:
//
// %0:sreg_64 = COPY $exec
// %1:sreg_64 = S_AND_B64 %0:sreg_64, %2:sreg_64
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp
index e13e33ed5457..2ae3157bab49 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp
@@ -112,8 +112,10 @@ public:
SmallVectorImpl<Register> &CandidateRegs) const;
void collectWaterfallCandidateRegisters(
- MachineBasicBlock *Loop,
- SmallSetVector<Register, 16> &CandidateRegs) const;
+ MachineBasicBlock *LoopHeader, MachineBasicBlock *LoopEnd,
+ SmallSetVector<Register, 16> &CandidateRegs,
+ SmallSetVector<MachineBasicBlock *, 2> &Blocks,
+ SmallVectorImpl<MachineInstr *> &Instructions) const;
void findNonPHIUsesInBlock(Register Reg, MachineBasicBlock *MBB,
SmallVectorImpl<MachineInstr *> &Uses) const;
@@ -131,7 +133,10 @@ public:
MachineBasicBlock *Flow, MachineBasicBlock *Endif,
SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const;
- void optimizeWaterfallLiveRange(Register Reg, MachineBasicBlock *If) const;
+ void optimizeWaterfallLiveRange(
+ Register Reg, MachineBasicBlock *LoopHeader,
+ SmallSetVector<MachineBasicBlock *, 2> &LoopBlocks,
+ SmallVectorImpl<MachineInstr *> &Instructions) const;
SIOptimizeVGPRLiveRange() : MachineFunctionPass(ID) {}
@@ -323,12 +328,34 @@ void SIOptimizeVGPRLiveRange::collectCandidateRegisters(
/// Collect the registers used in the waterfall loop block that are defined
/// before.
void SIOptimizeVGPRLiveRange::collectWaterfallCandidateRegisters(
- MachineBasicBlock *Loop,
- SmallSetVector<Register, 16> &CandidateRegs) const {
+ MachineBasicBlock *LoopHeader, MachineBasicBlock *LoopEnd,
+ SmallSetVector<Register, 16> &CandidateRegs,
+ SmallSetVector<MachineBasicBlock *, 2> &Blocks,
+ SmallVectorImpl<MachineInstr *> &Instructions) const {
+
+ // Collect loop instructions, potentially spanning multiple blocks
+ auto *MBB = LoopHeader;
+ for (;;) {
+ Blocks.insert(MBB);
+ for (auto &MI : *MBB) {
+ if (MI.isDebugInstr())
+ continue;
+ Instructions.push_back(&MI);
+ }
+ if (MBB == LoopEnd)
+ break;
- for (auto &MI : Loop->instrs()) {
- if (MI.isDebugInstr())
- continue;
+ if ((MBB != LoopHeader && MBB->pred_size() != 1) ||
+ (MBB == LoopHeader && MBB->pred_size() != 2) || MBB->succ_size() != 1) {
+ LLVM_DEBUG(dbgs() << "Unexpected edges in CFG, ignoring loop\n");
+ return;
+ }
+
+ MBB = *MBB->succ_begin();
+ }
+
+ for (auto *I : Instructions) {
+ auto &MI = *I;
for (auto &MO : MI.operands()) {
if (!MO.isReg() || !MO.getReg() || MO.isDef())
@@ -340,16 +367,17 @@ void SIOptimizeVGPRLiveRange::collectWaterfallCandidateRegisters(
continue;
if (MO.readsReg()) {
- const MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent();
+ MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent();
// Make sure the value is defined before the LOOP block
- if (DefMBB != Loop && !CandidateRegs.contains(MOReg)) {
+ if (!Blocks.contains(DefMBB) && !CandidateRegs.contains(MOReg)) {
// If the variable is used after the loop, the register coalescer will
// merge the newly created register and remove the phi node again.
// Just do nothing in that case.
LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(MOReg);
bool IsUsed = false;
- for (auto *Succ : Loop->successors()) {
- if (Succ != Loop && OldVarInfo.isLiveIn(*Succ, MOReg, *MRI)) {
+ for (auto *Succ : LoopEnd->successors()) {
+ if (!Blocks.contains(Succ) &&
+ OldVarInfo.isLiveIn(*Succ, MOReg, *MRI)) {
IsUsed = true;
break;
}
@@ -513,7 +541,9 @@ void SIOptimizeVGPRLiveRange::optimizeLiveRange(
}
void SIOptimizeVGPRLiveRange::optimizeWaterfallLiveRange(
- Register Reg, MachineBasicBlock *Loop) const {
+ Register Reg, MachineBasicBlock *LoopHeader,
+ SmallSetVector<MachineBasicBlock *, 2> &Blocks,
+ SmallVectorImpl<MachineInstr *> &Instructions) const {
// Insert a new PHI, marking the value from the last loop iteration undef.
LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n');
const auto *RC = MRI->getRegClass(Reg);
@@ -525,15 +555,16 @@ void SIOptimizeVGPRLiveRange::optimizeWaterfallLiveRange(
for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) {
auto *UseMI = O.getParent();
auto *UseBlock = UseMI->getParent();
- // Replace uses in Loop block
- if (UseBlock == Loop)
+ // Replace uses in Loop blocks
+ if (Blocks.contains(UseBlock))
O.setReg(NewReg);
}
- MachineInstrBuilder PHI = BuildMI(*Loop, Loop->getFirstNonPHI(), DebugLoc(),
- TII->get(TargetOpcode::PHI), NewReg);
- for (auto *Pred : Loop->predecessors()) {
- if (Pred == Loop)
+ MachineInstrBuilder PHI =
+ BuildMI(*LoopHeader, LoopHeader->getFirstNonPHI(), DebugLoc(),
+ TII->get(TargetOpcode::PHI), NewReg);
+ for (auto *Pred : LoopHeader->predecessors()) {
+ if (Blocks.contains(Pred))
PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred);
else
PHI.addReg(Reg).addMBB(Pred);
@@ -542,21 +573,36 @@ void SIOptimizeVGPRLiveRange::optimizeWaterfallLiveRange(
LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg);
LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
- // collectWaterfallCandidateRegisters only collects registers that are dead
- // after the loop. So we know that the old reg is not live throughout the
- // whole block anymore.
- OldVarInfo.AliveBlocks.reset(Loop->getNumber());
-
- // Mark the last use as kill
- for (auto &MI : reverse(Loop->instrs())) {
- if (MI.readsRegister(NewReg, TRI)) {
- MI.addRegisterKilled(NewReg, TRI);
- NewVarInfo.Kills.push_back(&MI);
+ // Find last use and mark as kill
+ MachineInstr *Kill = nullptr;
+ for (auto *MI : reverse(Instructions)) {
+ if (MI->readsRegister(NewReg, TRI)) {
+ MI->addRegisterKilled(NewReg, TRI);
+ NewVarInfo.Kills.push_back(MI);
+ Kill = MI;
break;
}
}
- assert(!NewVarInfo.Kills.empty() &&
- "Failed to find last usage of register in loop");
+ assert(Kill && "Failed to find last usage of register in loop");
+
+ MachineBasicBlock *KillBlock = Kill->getParent();
+ bool PostKillBlock = false;
+ for (auto *Block : Blocks) {
+ auto BBNum = Block->getNumber();
+
+ // collectWaterfallCandidateRegisters only collects registers that are dead
+ // after the loop. So we know that the old reg is no longer live throughout
+ // the waterfall loop.
+ OldVarInfo.AliveBlocks.reset(BBNum);
+
+ // The new register is live up to (and including) the block that kills it.
+ PostKillBlock |= (Block == KillBlock);
+ if (PostKillBlock) {
+ NewVarInfo.AliveBlocks.reset(BBNum);
+ } else if (Block != LoopHeader) {
+ NewVarInfo.AliveBlocks.set(BBNum);
+ }
+ }
}
char SIOptimizeVGPRLiveRange::ID = 0;
@@ -601,6 +647,10 @@ bool SIOptimizeVGPRLiveRange::runOnMachineFunction(MachineFunction &MF) {
if (!Endif)
continue;
+ // Skip unexpected control flow.
+ if (!MDT->dominates(&MBB, IfTarget) || !MDT->dominates(IfTarget, Endif))
+ continue;
+
SmallSetVector<MachineBasicBlock *, 16> ElseBlocks;
SmallVector<Register> CandidateRegs;
@@ -620,15 +670,22 @@ bool SIOptimizeVGPRLiveRange::runOnMachineFunction(MachineFunction &MF) {
for (auto Reg : CandidateRegs)
optimizeLiveRange(Reg, &MBB, IfTarget, Endif, ElseBlocks);
} else if (MI.getOpcode() == AMDGPU::SI_WATERFALL_LOOP) {
+ auto *LoopHeader = MI.getOperand(0).getMBB();
+ auto *LoopEnd = &MBB;
+
LLVM_DEBUG(dbgs() << "Checking Waterfall loop: "
- << printMBBReference(MBB) << '\n');
+ << printMBBReference(*LoopHeader) << '\n');
SmallSetVector<Register, 16> CandidateRegs;
- collectWaterfallCandidateRegisters(&MBB, CandidateRegs);
+ SmallVector<MachineInstr *, 16> Instructions;
+ SmallSetVector<MachineBasicBlock *, 2> Blocks;
+
+ collectWaterfallCandidateRegisters(LoopHeader, LoopEnd, CandidateRegs,
+ Blocks, Instructions);
MadeChange |= !CandidateRegs.empty();
// Now we are safe to optimize.
for (auto Reg : CandidateRegs)
- optimizeWaterfallLiveRange(Reg, &MBB);
+ optimizeWaterfallLiveRange(Reg, LoopHeader, Blocks, Instructions);
}
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index da41a5e2478a..e768a2f3e1a5 100644
--- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -316,7 +316,7 @@ uint64_t SDWASrcOperand::getSrcMods(const SIInstrInfo *TII,
}
if (Abs || Neg) {
assert(!Sext &&
- "Float and integer src modifiers can't be set simulteniously");
+ "Float and integer src modifiers can't be set simultaneously");
Mods |= Abs ? SISrcMods::ABS : 0u;
Mods ^= Neg ? SISrcMods::NEG : 0u;
} else if (Sext) {
@@ -1131,16 +1131,16 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
bool Converted = false;
for (auto &Operand : SDWAOperands) {
LLVM_DEBUG(dbgs() << *SDWAInst << "\nOperand: " << *Operand);
- // There should be no intesection between SDWA operands and potential MIs
+ // There should be no intersection between SDWA operands and potential MIs
// e.g.:
// v_and_b32 v0, 0xff, v1 -> src:v1 sel:BYTE_0
// v_and_b32 v2, 0xff, v0 -> src:v0 sel:BYTE_0
// v_add_u32 v3, v4, v2
//
- // In that example it is possible that we would fold 2nd instruction into 3rd
- // (v_add_u32_sdwa) and then try to fold 1st instruction into 2nd (that was
- // already destroyed). So if SDWAOperand is also a potential MI then do not
- // apply it.
+ // In that example it is possible that we would fold 2nd instruction into
+ // 3rd (v_add_u32_sdwa) and then try to fold 1st instruction into 2nd (that
+ // was already destroyed). So if SDWAOperand is also a potential MI then do
+ // not apply it.
if (PotentialMatches.count(Operand->getParentInst()) == 0)
Converted |= Operand->convertToSDWA(*SDWAInst, TII);
}
diff --git a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
index c2e2875ed6bf..4fab13bb44b1 100644
--- a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
@@ -18,7 +18,10 @@
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveRegMatrix.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/InitializePasses.h"
using namespace llvm;
@@ -85,9 +88,6 @@ FunctionPass *llvm::createSIPreAllocateWWMRegsPass() {
}
bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) {
- if (!MO.isReg())
- return false;
-
Register Reg = MO.getReg();
if (Reg.isPhysical())
return false;
@@ -111,7 +111,6 @@ bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) {
}
llvm_unreachable("physreg not found for WWM expression");
- return false;
}
void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) {
@@ -142,7 +141,6 @@ void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) {
}
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- MachineFrameInfo &FrameInfo = MF.getFrameInfo();
for (unsigned Reg : RegsToRewrite) {
LIS->removeInterval(Reg);
@@ -150,18 +148,7 @@ void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) {
const Register PhysReg = VRM->getPhys(Reg);
assert(PhysReg != 0);
- // Check if PhysReg is already reserved
- if (!MFI->WWMReservedRegs.count(PhysReg)) {
- Optional<int> FI;
- if (!MFI->isEntryFunction()) {
- // Create a stack object for a possible spill in the function prologue.
- // Note: Non-CSR VGPR also need this as we may overwrite inactive lanes.
- const TargetRegisterClass *RC = TRI->getPhysRegClass(PhysReg);
- FI = FrameInfo.CreateSpillStackObject(TRI->getSpillSize(*RC),
- TRI->getSpillAlign(*RC));
- }
- MFI->reserveWWMRegister(PhysReg, FI);
- }
+ MFI->reserveWWMRegister(PhysReg);
}
RegsToRewrite.clear();
diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index b0e45dd3e3e3..8d33b8a1fd4b 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -74,6 +74,15 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
// We end up with this pattern sometimes after basic block placement.
// It happens while combining a block which assigns -1 or 0 to a saved mask
// and another block which consumes that saved mask and then a branch.
+ //
+ // While searching this also performs the following substitution:
+ // vcc = V_CMP
+ // vcc = S_AND exec, vcc
+ // S_CBRANCH_VCC[N]Z
+ // =>
+ // vcc = V_CMP
+ // S_CBRANCH_VCC[N]Z
+
bool Changed = false;
MachineBasicBlock &MBB = *MI.getParent();
const GCNSubtarget &ST = MBB.getParent()->getSubtarget<GCNSubtarget>();
@@ -121,19 +130,32 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
SReg = Op2.getReg();
auto M = std::next(A);
bool ReadsSreg = false;
+ bool ModifiesExec = false;
for (; M != E; ++M) {
if (M->definesRegister(SReg, TRI))
break;
if (M->modifiesRegister(SReg, TRI))
return Changed;
ReadsSreg |= M->readsRegister(SReg, TRI);
+ ModifiesExec |= M->modifiesRegister(ExecReg, TRI);
+ }
+ if (M == E)
+ return Changed;
+ // If SReg is VCC and SReg definition is a VALU comparison.
+ // This means S_AND with EXEC is not required.
+ // Erase the S_AND and return.
+ // Note: isVOPC is used instead of isCompare to catch V_CMP_CLASS
+ if (A->getOpcode() == And && SReg == CondReg && !ModifiesExec &&
+ TII->isVOPC(*M)) {
+ A->eraseFromParent();
+ return true;
}
- if (M == E || !M->isMoveImmediate() || !M->getOperand(1).isImm() ||
+ if (!M->isMoveImmediate() || !M->getOperand(1).isImm() ||
(M->getOperand(1).getImm() != -1 && M->getOperand(1).getImm() != 0))
return Changed;
MaskValue = M->getOperand(1).getImm();
// First if sreg is only used in the AND instruction fold the immediate
- // into into the AND.
+ // into the AND.
if (!ReadsSreg && Op2.isKill()) {
A->getOperand(2).ChangeToImmediate(MaskValue);
M->eraseFromParent();
@@ -213,7 +235,7 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
TII->get(IsVCCZ ? AMDGPU::S_CBRANCH_EXECZ : AMDGPU::S_CBRANCH_EXECNZ));
}
- MI.RemoveOperand(MI.findRegisterUseOperandIdx(CondReg, false /*Kill*/, TRI));
+ MI.removeOperand(MI.findRegisterUseOperandIdx(CondReg, false /*Kill*/, TRI));
MI.addImplicitDefUseOperands(*MBB.getParent());
return true;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 21aed4ececb5..ad1455ed20fd 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -19,7 +19,9 @@
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
using namespace llvm;
@@ -182,6 +184,16 @@ struct SGPRSpillBuilder {
TmpVGPRLive = true;
}
+ if (TmpVGPRLive) {
+ // We need to inform the scavenger that this index is already in use until
+ // we're done with the custom emergency spill.
+ RS->assignRegToScavengingIndex(TmpVGPRIndex, TmpVGPR);
+ }
+
+ // We may end up recursively calling the scavenger, and don't want to re-use
+ // the same register.
+ RS->setRegUsed(TmpVGPR);
+
// Try to scavenge SGPRs to save exec
assert(!SavedExecReg && "Exec is already saved, refuse to save again");
const TargetRegisterClass &RC =
@@ -202,6 +214,12 @@ struct SGPRSpillBuilder {
// Spill needed lanes
TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false);
} else {
+ // The modify and restore of exec clobber SCC, which we would have to save
+ // and restore. FIXME: We probably would need to reserve a register for
+ // this.
+ if (RS->isRegUsed(AMDGPU::SCC))
+ MI->emitError("unhandled SGPR spill to memory");
+
// Spill active lanes
if (TmpVGPRLive)
TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false,
@@ -251,6 +269,12 @@ struct SGPRSpillBuilder {
if (TmpVGPRLive)
TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true);
}
+
+ // Inform the scavenger where we're releasing our custom scavenged register.
+ if (TmpVGPRLive) {
+ MachineBasicBlock::iterator RestorePt = std::prev(MI);
+ RS->assignRegToScavengingIndex(TmpVGPRIndex, TmpVGPR, &*RestorePt);
+ }
}
// Write TmpVGPR to memory or read TmpVGPR from memory.
@@ -265,6 +289,12 @@ struct SGPRSpillBuilder {
// Spill needed lanes
TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad);
} else {
+ // The modify and restore of exec clobber SCC, which we would have to save
+ // and restore. FIXME: We probably would need to reserve a register for
+ // this.
+ if (RS->isRegUsed(AMDGPU::SCC))
+ MI->emitError("unhandled SGPR spill to memory");
+
// Spill active lanes
TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad,
/*IsKill*/ false);
@@ -329,7 +359,7 @@ SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST)
static auto InitializeSubRegFromChannelTableOnce = [this]() {
for (auto &Row : SubRegFromChannelTable)
Row.fill(AMDGPU::NoSubRegister);
- for (uint16_t Idx = 1; Idx < getNumSubRegIndices(); ++Idx) {
+ for (unsigned Idx = 1; Idx < getNumSubRegIndices(); ++Idx) {
unsigned Width = AMDGPUSubRegIdxRanges[Idx].Size / 32;
unsigned Offset = AMDGPUSubRegIdxRanges[Idx].Offset / 32;
assert(Width < SubRegFromChannelTableWidthMap.size());
@@ -364,13 +394,11 @@ const MCPhysReg *SIRegisterInfo::getCalleeSavedRegs(
case CallingConv::C:
case CallingConv::Fast:
case CallingConv::Cold:
- return MF->getSubtarget<GCNSubtarget>().hasGFX90AInsts()
- ? CSR_AMDGPU_HighRegs_With_AGPRs_SaveList
- : CSR_AMDGPU_HighRegs_SaveList;
+ return ST.hasGFX90AInsts() ? CSR_AMDGPU_GFX90AInsts_SaveList
+ : CSR_AMDGPU_SaveList;
case CallingConv::AMDGPU_Gfx:
- return MF->getSubtarget<GCNSubtarget>().hasGFX90AInsts()
- ? CSR_AMDGPU_SI_Gfx_With_AGPRs_SaveList
- : CSR_AMDGPU_SI_Gfx_SaveList;
+ return ST.hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_SaveList
+ : CSR_AMDGPU_SI_Gfx_SaveList;
default: {
// Dummy to not crash RegisterClassInfo.
static const MCPhysReg NoCalleeSavedReg = AMDGPU::NoRegister;
@@ -390,13 +418,11 @@ const uint32_t *SIRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
case CallingConv::C:
case CallingConv::Fast:
case CallingConv::Cold:
- return MF.getSubtarget<GCNSubtarget>().hasGFX90AInsts()
- ? CSR_AMDGPU_HighRegs_With_AGPRs_RegMask
- : CSR_AMDGPU_HighRegs_RegMask;
+ return ST.hasGFX90AInsts() ? CSR_AMDGPU_GFX90AInsts_RegMask
+ : CSR_AMDGPU_RegMask;
case CallingConv::AMDGPU_Gfx:
- return MF.getSubtarget<GCNSubtarget>().hasGFX90AInsts()
- ? CSR_AMDGPU_SI_Gfx_With_AGPRs_RegMask
- : CSR_AMDGPU_SI_Gfx_RegMask;
+ return ST.hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_RegMask
+ : CSR_AMDGPU_SI_Gfx_RegMask;
default:
return nullptr;
}
@@ -413,8 +439,7 @@ SIRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
// equivalent AV class. If used one, the verifier will crash after
// RegBankSelect in the GISel flow. The aligned regclasses are not fully given
// until Instruction selection.
- if (MF.getSubtarget<GCNSubtarget>().hasMAIInsts() &&
- (isVGPRClass(RC) || isAGPRClass(RC))) {
+ if (ST.hasMAIInsts() && (isVGPRClass(RC) || isAGPRClass(RC))) {
if (RC == &AMDGPU::VGPR_32RegClass || RC == &AMDGPU::AGPR_32RegClass)
return &AMDGPU::AV_32RegClass;
if (RC == &AMDGPU::VReg_64RegClass || RC == &AMDGPU::AReg_64RegClass)
@@ -463,8 +488,7 @@ SIRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
}
Register SIRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- const SIFrameLowering *TFI =
- MF.getSubtarget<GCNSubtarget>().getFrameLowering();
+ const SIFrameLowering *TFI = ST.getFrameLowering();
const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
// During ISel lowering we always reserve the stack pointer in entry
// functions, but never actually want to reference it when accessing our own
@@ -487,19 +511,19 @@ bool SIRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
Register SIRegisterInfo::getBaseRegister() const { return AMDGPU::SGPR34; }
const uint32_t *SIRegisterInfo::getAllVGPRRegMask() const {
- return CSR_AMDGPU_AllVGPRs_RegMask;
+ return AMDGPU_AllVGPRs_RegMask;
}
const uint32_t *SIRegisterInfo::getAllAGPRRegMask() const {
- return CSR_AMDGPU_AllAGPRs_RegMask;
+ return AMDGPU_AllAGPRs_RegMask;
}
const uint32_t *SIRegisterInfo::getAllVectorRegMask() const {
- return CSR_AMDGPU_AllVectorRegs_RegMask;
+ return AMDGPU_AllVectorRegs_RegMask;
}
const uint32_t *SIRegisterInfo::getAllAllocatableSRegMask() const {
- return CSR_AMDGPU_AllAllocatableSRegs_RegMask;
+ return AMDGPU_AllAllocatableSRegs_RegMask;
}
unsigned SIRegisterInfo::getSubRegFromChannel(unsigned Channel,
@@ -522,6 +546,10 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
Reserved.set(AMDGPU::MODE);
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+ // Reserve special purpose registers.
+ //
// EXEC_LO and EXEC_HI could be allocated and used as regular register, but
// this seems likely to result in bugs, so I'm marking them as reserved.
reserveRegisterTuples(Reserved, AMDGPU::EXEC);
@@ -563,7 +591,7 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
// Reserve null register - it shall never be allocated
- reserveRegisterTuples(Reserved, AMDGPU::SGPR_NULL);
+ reserveRegisterTuples(Reserved, AMDGPU::SGPR_NULL64);
// Disallow vcc_hi allocation in wave32. It may be allocated but most likely
// will result in bugs.
@@ -572,6 +600,8 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(AMDGPU::VCC_HI);
}
+ // Reserve SGPRs.
+ //
unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
@@ -579,39 +609,6 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
reserveRegisterTuples(Reserved, Reg);
}
- const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
- unsigned MaxNumAGPRs = MaxNumVGPRs;
- unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
-
- if (ST.hasGFX90AInsts()) {
- // In an entry function without calls and AGPRs used it is possible to use
- // the whole register budget for VGPRs.
-
- // TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and
- // split register file accordingly.
- if (MFI->usesAGPRs(MF)) {
- MaxNumVGPRs /= 2;
- MaxNumAGPRs = MaxNumVGPRs;
- } else {
- if (MaxNumVGPRs > TotalNumVGPRs) {
- MaxNumAGPRs = MaxNumVGPRs - TotalNumVGPRs;
- MaxNumVGPRs = TotalNumVGPRs;
- } else
- MaxNumAGPRs = 0;
- }
- }
-
- for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
- unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
- reserveRegisterTuples(Reserved, Reg);
- }
-
- for (unsigned i = MaxNumAGPRs; i < TotalNumVGPRs; ++i) {
- unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
- reserveRegisterTuples(Reserved, Reg);
- }
-
for (auto Reg : AMDGPU::SReg_32RegClass) {
Reserved.set(getSubReg(Reg, AMDGPU::hi16));
Register Low = getSubReg(Reg, AMDGPU::lo16);
@@ -620,22 +617,10 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(Low);
}
- for (auto Reg : AMDGPU::AGPR_32RegClass) {
- Reserved.set(getSubReg(Reg, AMDGPU::hi16));
- }
-
- // Reserve all the rest AGPRs if there are no instructions to use it.
- if (!ST.hasMAIInsts()) {
- for (unsigned i = 0; i < MaxNumVGPRs; ++i) {
- unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
- reserveRegisterTuples(Reserved, Reg);
- }
- }
-
Register ScratchRSrcReg = MFI->getScratchRSrcReg();
if (ScratchRSrcReg != AMDGPU::NoRegister) {
- // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
- // to spill.
+ // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we
+ // need to spill.
// TODO: May need to reserve a VGPR if doing LDS spilling.
reserveRegisterTuples(Reserved, ScratchRSrcReg);
}
@@ -644,7 +629,6 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// which is detected after the function is lowered. If we aren't really going
// to need SP, don't bother reserving it.
MCRegister StackPtrReg = MFI->getStackPtrOffsetReg();
-
if (StackPtrReg) {
reserveRegisterTuples(Reserved, StackPtrReg);
assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
@@ -662,20 +646,63 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
assert(!isSubRegister(ScratchRSrcReg, BasePtrReg));
}
- for (auto Reg : MFI->WWMReservedRegs) {
- reserveRegisterTuples(Reserved, Reg.first);
+ // Reserve VGPRs/AGPRs.
+ //
+ unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
+ unsigned MaxNumAGPRs = MaxNumVGPRs;
+ unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
+
+ // Reserve all the AGPRs if there are no instructions to use it.
+ if (!ST.hasMAIInsts()) {
+ for (unsigned i = 0; i < MaxNumAGPRs; ++i) {
+ unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
+ reserveRegisterTuples(Reserved, Reg);
+ }
}
- // Reserve VGPRs used for SGPR spilling.
- // Note we treat freezeReservedRegs unusually because we run register
- // allocation in two phases. It's OK to re-freeze with new registers for the
- // second run.
-#if 0
- for (auto &SpilledFI : MFI->sgpr_spill_vgprs()) {
- for (auto &SpilledVGPR : SpilledFI.second)
- reserveRegisterTuples(Reserved, SpilledVGPR.VGPR);
+ for (auto Reg : AMDGPU::AGPR_32RegClass) {
+ Reserved.set(getSubReg(Reg, AMDGPU::hi16));
}
-#endif
+
+ // On GFX90A, the number of VGPRs and AGPRs need not be equal. Theoretically,
+ // a wave may have up to 512 total vector registers combining together both
+ // VGPRs and AGPRs. Hence, in an entry function without calls and without
+ // AGPRs used within it, it is possible to use the whole vector register
+ // budget for VGPRs.
+ //
+ // TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and split
+ // register file accordingly.
+ if (ST.hasGFX90AInsts()) {
+ if (MFI->usesAGPRs(MF)) {
+ MaxNumVGPRs /= 2;
+ MaxNumAGPRs = MaxNumVGPRs;
+ } else {
+ if (MaxNumVGPRs > TotalNumVGPRs) {
+ MaxNumAGPRs = MaxNumVGPRs - TotalNumVGPRs;
+ MaxNumVGPRs = TotalNumVGPRs;
+ } else
+ MaxNumAGPRs = 0;
+ }
+ }
+
+ for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
+ unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
+ reserveRegisterTuples(Reserved, Reg);
+ }
+
+ for (unsigned i = MaxNumAGPRs; i < TotalNumVGPRs; ++i) {
+ unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
+ reserveRegisterTuples(Reserved, Reg);
+ }
+
+ // On GFX908, in order to guarantee copying between AGPRs, we need a scratch
+ // VGPR available at all times.
+ if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
+ reserveRegisterTuples(Reserved, MFI->getVGPRForAGPRCopy());
+ }
+
+ for (Register Reg : MFI->WWMReservedRegs)
+ reserveRegisterTuples(Reserved, Reg);
// FIXME: Stop using reserved registers for this.
for (MCPhysReg Reg : MFI->getAGPRSpillVGPRs())
@@ -690,6 +717,11 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
return Reserved;
}
+bool SIRegisterInfo::isAsmClobberable(const MachineFunction &MF,
+ MCRegister PhysReg) const {
+ return !MF.getRegInfo().isReserved(PhysReg);
+}
+
bool SIRegisterInfo::shouldRealignStack(const MachineFunction &MF) const {
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
// On entry, the base address is 0, so it can't possibly need any more
@@ -1010,6 +1042,8 @@ static int getOffsetMUBUFStore(unsigned Opc) {
return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
+ case AMDGPU::BUFFER_STORE_DWORDX3_OFFEN:
+ return AMDGPU::BUFFER_STORE_DWORDX3_OFFSET;
case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
@@ -1035,6 +1069,8 @@ static int getOffsetMUBUFLoad(unsigned Opc) {
return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
+ case AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN:
+ return AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET;
case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
@@ -1054,6 +1090,64 @@ static int getOffsetMUBUFLoad(unsigned Opc) {
}
}
+static int getOffenMUBUFStore(unsigned Opc) {
+ switch (Opc) {
+ case AMDGPU::BUFFER_STORE_DWORD_OFFSET:
+ return AMDGPU::BUFFER_STORE_DWORD_OFFEN;
+ case AMDGPU::BUFFER_STORE_BYTE_OFFSET:
+ return AMDGPU::BUFFER_STORE_BYTE_OFFEN;
+ case AMDGPU::BUFFER_STORE_SHORT_OFFSET:
+ return AMDGPU::BUFFER_STORE_SHORT_OFFEN;
+ case AMDGPU::BUFFER_STORE_DWORDX2_OFFSET:
+ return AMDGPU::BUFFER_STORE_DWORDX2_OFFEN;
+ case AMDGPU::BUFFER_STORE_DWORDX3_OFFSET:
+ return AMDGPU::BUFFER_STORE_DWORDX3_OFFEN;
+ case AMDGPU::BUFFER_STORE_DWORDX4_OFFSET:
+ return AMDGPU::BUFFER_STORE_DWORDX4_OFFEN;
+ case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET:
+ return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN;
+ case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET:
+ return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN;
+ default:
+ return -1;
+ }
+}
+
+static int getOffenMUBUFLoad(unsigned Opc) {
+ switch (Opc) {
+ case AMDGPU::BUFFER_LOAD_DWORD_OFFSET:
+ return AMDGPU::BUFFER_LOAD_DWORD_OFFEN;
+ case AMDGPU::BUFFER_LOAD_UBYTE_OFFSET:
+ return AMDGPU::BUFFER_LOAD_UBYTE_OFFEN;
+ case AMDGPU::BUFFER_LOAD_SBYTE_OFFSET:
+ return AMDGPU::BUFFER_LOAD_SBYTE_OFFEN;
+ case AMDGPU::BUFFER_LOAD_USHORT_OFFSET:
+ return AMDGPU::BUFFER_LOAD_USHORT_OFFEN;
+ case AMDGPU::BUFFER_LOAD_SSHORT_OFFSET:
+ return AMDGPU::BUFFER_LOAD_SSHORT_OFFEN;
+ case AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET:
+ return AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN;
+ case AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET:
+ return AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN;
+ case AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET:
+ return AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN;
+ case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET:
+ return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN;
+ case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET:
+ return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN;
+ case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET:
+ return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN;
+ case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET:
+ return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN;
+ case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET:
+ return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN;
+ case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET:
+ return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN;
+ default:
+ return -1;
+ }
+}
+
static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
@@ -1139,8 +1233,9 @@ static unsigned getFlatScratchSpillOpcode(const SIInstrInfo *TII,
unsigned LoadStoreOp,
unsigned EltSize) {
bool IsStore = TII->get(LoadStoreOp).mayStore();
+ bool HasVAddr = AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::vaddr) != -1;
bool UseST =
- AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::vaddr) < 0 &&
+ !HasVAddr &&
AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::saddr) < 0;
switch (EltSize) {
@@ -1164,7 +1259,9 @@ static unsigned getFlatScratchSpillOpcode(const SIInstrInfo *TII,
llvm_unreachable("Unexpected spill load/store size!");
}
- if (UseST)
+ if (HasVAddr)
+ LoadStoreOp = AMDGPU::getFlatScratchInstSVfromSS(LoadStoreOp);
+ else if (UseST)
LoadStoreOp = AMDGPU::getFlatScratchInstSTfromSS(LoadStoreOp);
return LoadStoreOp;
@@ -1186,6 +1283,7 @@ void SIRegisterInfo::buildSpillLoadStore(
bool IsStore = Desc->mayStore();
bool IsFlat = TII->isFLATScratch(LoadStoreOp);
+ bool CanClobberSCC = false;
bool Scavenged = false;
MCRegister SOffset = ScratchOffsetReg;
@@ -1202,6 +1300,8 @@ void SIRegisterInfo::buildSpillLoadStore(
unsigned RemSize = RegWidth - Size;
unsigned NumRemSubRegs = RemSize ? 1 : 0;
int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
+ int64_t MaterializedOffset = Offset;
+
int64_t MaxOffset = Offset + Size + RemSize - EltSize;
int64_t ScratchOffsetRegDelta = 0;
@@ -1216,6 +1316,42 @@ void SIRegisterInfo::buildSpillLoadStore(
assert((IsFlat || ((Offset % EltSize) == 0)) &&
"unexpected VGPR spill offset");
+ // Track a VGPR to use for a constant offset we need to materialize.
+ Register TmpOffsetVGPR;
+
+ // Track a VGPR to use as an intermediate value.
+ Register TmpIntermediateVGPR;
+ bool UseVGPROffset = false;
+
+ // Materialize a VGPR offset required for the given SGPR/VGPR/Immediate
+ // combination.
+ auto MaterializeVOffset = [&](Register SGPRBase, Register TmpVGPR,
+ int64_t VOffset) {
+ // We are using a VGPR offset
+ if (IsFlat && SGPRBase) {
+ // We only have 1 VGPR offset, or 1 SGPR offset. We don't have a free
+ // SGPR, so perform the add as vector.
+ // We don't need a base SGPR in the kernel.
+
+ if (ST.getConstantBusLimit(AMDGPU::V_ADD_U32_e64) >= 2) {
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ADD_U32_e64), TmpVGPR)
+ .addReg(SGPRBase)
+ .addImm(VOffset)
+ .addImm(0); // clamp
+ } else {
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
+ .addReg(SGPRBase);
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ADD_U32_e32), TmpVGPR)
+ .addImm(VOffset)
+ .addReg(TmpOffsetVGPR);
+ }
+ } else {
+ assert(TmpOffsetVGPR);
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
+ .addImm(VOffset);
+ }
+ };
+
bool IsOffsetLegal =
IsFlat ? TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS,
SIInstrFlags::FlatScratch)
@@ -1223,17 +1359,17 @@ void SIRegisterInfo::buildSpillLoadStore(
if (!IsOffsetLegal || (IsFlat && !SOffset && !ST.hasFlatScratchSTMode())) {
SOffset = MCRegister();
- // We currently only support spilling VGPRs to EltSize boundaries, meaning
- // we can simplify the adjustment of Offset here to just scale with
- // WavefrontSize.
- if (!IsFlat)
- Offset *= ST.getWavefrontSize();
-
// We don't have access to the register scavenger if this function is called
// during PEI::scavengeFrameVirtualRegs() so use LiveRegs in this case.
+ // TODO: Clobbering SCC is not necessary for scratch instructions in the
+ // entry.
if (RS) {
SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0, false);
+
+ // Piggy back on the liveness scan we just did see if SCC is dead.
+ CanClobberSCC = !RS->isRegUsed(AMDGPU::SCC);
} else if (LiveRegs) {
+ CanClobberSCC = !LiveRegs->contains(AMDGPU::SCC);
for (MCRegister Reg : AMDGPU::SGPR_32RegClass) {
if (LiveRegs->available(MF->getRegInfo(), Reg)) {
SOffset = Reg;
@@ -1242,7 +1378,26 @@ void SIRegisterInfo::buildSpillLoadStore(
}
}
+ if (ScratchOffsetReg != AMDGPU::NoRegister && !CanClobberSCC)
+ SOffset = Register();
+
if (!SOffset) {
+ UseVGPROffset = true;
+
+ if (RS) {
+ TmpOffsetVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
+ } else {
+ assert(LiveRegs);
+ for (MCRegister Reg : AMDGPU::VGPR_32RegClass) {
+ if (LiveRegs->available(MF->getRegInfo(), Reg)) {
+ TmpOffsetVGPR = Reg;
+ break;
+ }
+ }
+ }
+
+ assert(TmpOffsetVGPR);
+ } else if (!SOffset && CanClobberSCC) {
// There are no free SGPRs, and since we are in the process of spilling
// VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true
// on SI/CI and on VI it is true until we implement spilling using scalar
@@ -1250,6 +1405,9 @@ void SIRegisterInfo::buildSpillLoadStore(
// add the offset directly to the ScratchOffset or StackPtrOffset
// register, and then subtract the offset after the spill to return the
// register to it's original value.
+
+ // TODO: If we don't have to do an emergency stack slot spill, converting
+ // to use the VGPR offset is fewer instructions.
if (!ScratchOffsetReg)
ScratchOffsetReg = FuncInfo->getStackPtrOffsetReg();
SOffset = ScratchOffsetReg;
@@ -1258,12 +1416,22 @@ void SIRegisterInfo::buildSpillLoadStore(
Scavenged = true;
}
- if (!SOffset)
+ // We currently only support spilling VGPRs to EltSize boundaries, meaning
+ // we can simplify the adjustment of Offset here to just scale with
+ // WavefrontSize.
+ if (!IsFlat && !UseVGPROffset)
+ Offset *= ST.getWavefrontSize();
+
+ if (!UseVGPROffset && !SOffset)
report_fatal_error("could not scavenge SGPR to spill in entry function");
- if (ScratchOffsetReg == AMDGPU::NoRegister) {
+ if (UseVGPROffset) {
+ // We are using a VGPR offset
+ MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR, Offset);
+ } else if (ScratchOffsetReg == AMDGPU::NoRegister) {
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), SOffset).addImm(Offset);
} else {
+ assert(Offset != 0);
auto Add = BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), SOffset)
.addReg(ScratchOffsetReg)
.addImm(Offset);
@@ -1277,13 +1445,16 @@ void SIRegisterInfo::buildSpillLoadStore(
assert(AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::vaddr) < 0
&& "Unexpected vaddr for flat scratch with a FI operand");
- assert(ST.hasFlatScratchSTMode());
- LoadStoreOp = AMDGPU::getFlatScratchInstSTfromSS(LoadStoreOp);
+ if (UseVGPROffset) {
+ LoadStoreOp = AMDGPU::getFlatScratchInstSVfromSS(LoadStoreOp);
+ } else {
+ assert(ST.hasFlatScratchSTMode());
+ LoadStoreOp = AMDGPU::getFlatScratchInstSTfromSS(LoadStoreOp);
+ }
+
Desc = &TII->get(LoadStoreOp);
}
- Register TmpReg;
-
for (unsigned i = 0, e = NumSubRegs + NumRemSubRegs, RegOffset = 0; i != e;
++i, RegOffset += EltSize) {
if (i == NumSubRegs) {
@@ -1292,6 +1463,22 @@ void SIRegisterInfo::buildSpillLoadStore(
}
Desc = &TII->get(LoadStoreOp);
+ if (!IsFlat && UseVGPROffset) {
+ int NewLoadStoreOp = IsStore ? getOffenMUBUFStore(LoadStoreOp)
+ : getOffenMUBUFLoad(LoadStoreOp);
+ Desc = &TII->get(NewLoadStoreOp);
+ }
+
+ if (UseVGPROffset && TmpOffsetVGPR == TmpIntermediateVGPR) {
+ // If we are spilling an AGPR beyond the range of the memory instruction
+ // offset and need to use a VGPR offset, we ideally have at least 2
+ // scratch VGPRs. If we don't have a second free VGPR without spilling,
+ // recycle the VGPR used for the offset which requires resetting after
+ // each subregister.
+
+ MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR, MaterializedOffset);
+ }
+
unsigned NumRegs = EltSize / 4;
Register SubReg = e == 1
? ValueReg
@@ -1300,7 +1487,8 @@ void SIRegisterInfo::buildSpillLoadStore(
unsigned SOffsetRegState = 0;
unsigned SrcDstRegState = getDefRegState(!IsStore);
- if (i + 1 == e) {
+ const bool IsLastSubReg = i + 1 == e;
+ if (IsLastSubReg) {
SOffsetRegState |= getKillRegState(Scavenged);
// The last implicit use carries the "Kill" flag.
SrcDstRegState |= getKillRegState(IsKill);
@@ -1363,21 +1551,26 @@ void SIRegisterInfo::buildSpillLoadStore(
if (IsAGPR) {
assert(EltSize == 4);
- if (!TmpReg) {
- assert(RS && "Needs to have RegScavenger to spill an AGPR!");
- // FIXME: change to scavengeRegisterBackwards()
- TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
- RS->setRegUsed(TmpReg);
+ if (!TmpIntermediateVGPR) {
+ TmpIntermediateVGPR = FuncInfo->getVGPRForAGPRCopy();
+ assert(MF->getRegInfo().isReserved(TmpIntermediateVGPR));
}
if (IsStore) {
auto AccRead = BuildMI(MBB, MI, DL,
- TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64), TmpReg)
+ TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64),
+ TmpIntermediateVGPR)
.addReg(SubReg, getKillRegState(IsKill));
if (NeedSuperRegDef)
AccRead.addReg(ValueReg, RegState::ImplicitDefine);
AccRead->setAsmPrinterFlag(MachineInstr::ReloadReuse);
}
- SubReg = TmpReg;
+ SubReg = TmpIntermediateVGPR;
+ } else if (UseVGPROffset) {
+ // FIXME: change to scavengeRegisterBackwards()
+ if (!TmpOffsetVGPR) {
+ TmpOffsetVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
+ RS->setRegUsed(TmpOffsetVGPR);
+ }
}
MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(RegOffset);
@@ -1388,12 +1581,26 @@ void SIRegisterInfo::buildSpillLoadStore(
auto MIB =
BuildMI(MBB, MI, DL, *Desc)
.addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill));
+
+ if (UseVGPROffset) {
+ // For an AGPR spill, we reuse the same temp VGPR for the offset and the
+ // intermediate accvgpr_write.
+ MIB.addReg(TmpOffsetVGPR, getKillRegState(IsLastSubReg && !IsAGPR));
+ }
+
if (!IsFlat)
MIB.addReg(FuncInfo->getScratchRSrcReg());
if (SOffset == AMDGPU::NoRegister) {
- if (!IsFlat)
- MIB.addImm(0);
+ if (!IsFlat) {
+ if (UseVGPROffset && ScratchOffsetReg) {
+ assert(!FuncInfo->isEntryFunction());
+ MIB.addReg(ScratchOffsetReg);
+ } else {
+ assert(FuncInfo->isEntryFunction());
+ MIB.addImm(0);
+ }
+ }
} else {
MIB.addReg(SOffset, SOffsetRegState);
}
@@ -1407,10 +1614,10 @@ void SIRegisterInfo::buildSpillLoadStore(
if (!IsAGPR && NeedSuperRegDef)
MIB.addReg(ValueReg, RegState::ImplicitDefine);
- if (!IsStore && TmpReg != AMDGPU::NoRegister) {
+ if (!IsStore && IsAGPR && TmpIntermediateVGPR != AMDGPU::NoRegister) {
MIB = BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64),
FinalReg)
- .addReg(TmpReg, RegState::Kill);
+ .addReg(TmpIntermediateVGPR, RegState::Kill);
MIB->setAsmPrinterFlag(MachineInstr::ReloadReuse);
}
@@ -1466,8 +1673,7 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
bool OnlyToVGPR) const {
SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS);
- ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills =
- SB.MFI.getSGPRToVGPRSpills(Index);
+ ArrayRef<SpilledReg> VGPRSpills = SB.MFI.getSGPRToVGPRSpills(Index);
bool SpillToVGPR = !VGPRSpills.empty();
if (OnlyToVGPR && !SpillToVGPR)
return false;
@@ -1485,7 +1691,7 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
SB.NumSubRegs == 1
? SB.SuperReg
: Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
- SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
+ SpilledReg Spill = VGPRSpills[i];
bool UseKill = SB.IsKill && i == SB.NumSubRegs - 1;
@@ -1586,8 +1792,7 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
bool OnlyToVGPR) const {
SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS);
- ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills =
- SB.MFI.getSGPRToVGPRSpills(Index);
+ ArrayRef<SpilledReg> VGPRSpills = SB.MFI.getSGPRToVGPRSpills(Index);
bool SpillToVGPR = !VGPRSpills.empty();
if (OnlyToVGPR && !SpillToVGPR)
return false;
@@ -1599,7 +1804,7 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
? SB.SuperReg
: Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
- SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
+ SpilledReg Spill = VGPRSpills[i];
auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32),
SubReg)
.addReg(Spill.VGPR)
@@ -1937,18 +2142,23 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
Offset = 0;
}
- assert(!TII->getNamedOperand(*MI, AMDGPU::OpName::vaddr) &&
- "Unexpected vaddr for flat scratch with a FI operand");
-
- // On GFX10 we have ST mode to use no registers for an address.
- // Otherwise we need to materialize 0 into an SGPR.
- if (!Offset && ST.hasFlatScratchSTMode()) {
+ if (!Offset) {
unsigned Opc = MI->getOpcode();
- unsigned NewOpc = AMDGPU::getFlatScratchInstSTfromSS(Opc);
- MI->RemoveOperand(
- AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr));
- MI->setDesc(TII->get(NewOpc));
- return;
+ int NewOpc = -1;
+ if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr) != -1) {
+ NewOpc = AMDGPU::getFlatScratchInstSVfromSVS(Opc);
+ } else if (ST.hasFlatScratchSTMode()) {
+ // On GFX10 we have ST mode to use no registers for an address.
+ // Otherwise we need to materialize 0 into an SGPR.
+ NewOpc = AMDGPU::getFlatScratchInstSTfromSS(Opc);
+ }
+
+ if (NewOpc != -1) {
+ MI->removeOperand(
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr));
+ MI->setDesc(TII->get(NewOpc));
+ return;
+ }
}
}
@@ -2026,57 +2236,78 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
if (!IsMUBUF && !MFI->isEntryFunction()) {
// Convert to a swizzled stack address by scaling by the wave size.
- //
// In an entry function/kernel the offset is already swizzled.
-
- bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32;
- Register ResultReg =
- IsCopy ? MI->getOperand(0).getReg()
- : RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
+ bool IsSALU = isSGPRClass(TII->getOpRegClass(*MI, FIOperandNum));
+ bool LiveSCC = RS->isRegUsed(AMDGPU::SCC);
+ const TargetRegisterClass *RC = IsSALU && !LiveSCC
+ ? &AMDGPU::SReg_32RegClass
+ : &AMDGPU::VGPR_32RegClass;
+ bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32 ||
+ MI->getOpcode() == AMDGPU::V_MOV_B32_e64;
+ Register ResultReg = IsCopy ? MI->getOperand(0).getReg()
+ : RS->scavengeRegister(RC, MI, 0);
int64_t Offset = FrameInfo.getObjectOffset(Index);
if (Offset == 0) {
+ unsigned OpCode = IsSALU && !LiveSCC ? AMDGPU::S_LSHR_B32
+ : AMDGPU::V_LSHRREV_B32_e64;
// XXX - This never happens because of emergency scavenging slot at 0?
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg)
- .addImm(ST.getWavefrontSizeLog2())
- .addReg(FrameReg);
+ auto Shift = BuildMI(*MBB, MI, DL, TII->get(OpCode), ResultReg)
+ .addImm(ST.getWavefrontSizeLog2())
+ .addReg(FrameReg);
+ if (IsSALU && !LiveSCC)
+ Shift.getInstr()->getOperand(3).setIsDead(
+ true); // Mark SCC as dead.
+ if (IsSALU && LiveSCC) {
+ Register NewDest =
+ RS->scavengeRegister(&AMDGPU::SReg_32RegClass, Shift, 0);
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
+ NewDest)
+ .addReg(ResultReg);
+ ResultReg = NewDest;
+ }
} else {
- if (auto MIB = TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)) {
- // Reuse ResultReg in intermediate step.
- Register ScaledReg = ResultReg;
-
- BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
- ScaledReg)
- .addImm(ST.getWavefrontSizeLog2())
- .addReg(FrameReg);
-
- const bool IsVOP2 = MIB->getOpcode() == AMDGPU::V_ADD_U32_e32;
-
- // TODO: Fold if use instruction is another add of a constant.
- if (IsVOP2 || AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) {
- // FIXME: This can fail
- MIB.addImm(Offset);
- MIB.addReg(ScaledReg, RegState::Kill);
- if (!IsVOP2)
+ MachineInstrBuilder MIB;
+ if (!IsSALU) {
+ if ((MIB = TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)) !=
+ nullptr) {
+ // Reuse ResultReg in intermediate step.
+ Register ScaledReg = ResultReg;
+
+ BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
+ ScaledReg)
+ .addImm(ST.getWavefrontSizeLog2())
+ .addReg(FrameReg);
+
+ const bool IsVOP2 = MIB->getOpcode() == AMDGPU::V_ADD_U32_e32;
+
+ // TODO: Fold if use instruction is another add of a constant.
+ if (IsVOP2 || AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) {
+ // FIXME: This can fail
+ MIB.addImm(Offset);
+ MIB.addReg(ScaledReg, RegState::Kill);
+ if (!IsVOP2)
+ MIB.addImm(0); // clamp bit
+ } else {
+ assert(MIB->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 &&
+ "Need to reuse carry out register");
+
+ // Use scavenged unused carry out as offset register.
+ Register ConstOffsetReg;
+ if (!isWave32)
+ ConstOffsetReg = getSubReg(MIB.getReg(1), AMDGPU::sub0);
+ else
+ ConstOffsetReg = MIB.getReg(1);
+
+ BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
+ .addImm(Offset);
+ MIB.addReg(ConstOffsetReg, RegState::Kill);
+ MIB.addReg(ScaledReg, RegState::Kill);
MIB.addImm(0); // clamp bit
- } else {
- assert(MIB->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 &&
- "Need to reuse carry out register");
-
- // Use scavenged unused carry out as offset register.
- Register ConstOffsetReg;
- if (!isWave32)
- ConstOffsetReg = getSubReg(MIB.getReg(1), AMDGPU::sub0);
- else
- ConstOffsetReg = MIB.getReg(1);
-
- BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
- .addImm(Offset);
- MIB.addReg(ConstOffsetReg, RegState::Kill);
- MIB.addReg(ScaledReg, RegState::Kill);
- MIB.addImm(0); // clamp bit
+ }
}
- } else {
+ }
+ if (!MIB || IsSALU) {
// We have to produce a carry out, and there isn't a free SGPR pair
// for it. We can keep the whole computation on the SALU to avoid
// clobbering an additional register at the cost of an extra mov.
@@ -2084,7 +2315,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
// We may have 1 free scratch SGPR even though a carry out is
// unavailable. Only one additional mov is needed.
Register TmpScaledReg =
- RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false);
+ RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false);
Register ScaledReg = TmpScaledReg.isValid() ? TmpScaledReg : FrameReg;
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), ScaledReg)
@@ -2093,14 +2324,17 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
.addReg(ScaledReg, RegState::Kill)
.addImm(Offset);
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg)
- .addReg(ScaledReg, RegState::Kill);
+ if (!IsSALU)
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg)
+ .addReg(ScaledReg, RegState::Kill);
+ else
+ ResultReg = ScaledReg;
// If there were truly no free SGPRs, we need to undo everything.
if (!TmpScaledReg.isValid()) {
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
- .addReg(ScaledReg, RegState::Kill)
- .addImm(-Offset);
+ .addReg(ScaledReg, RegState::Kill)
+ .addImm(-Offset);
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg)
.addReg(FrameReg)
.addImm(ST.getWavefrontSizeLog2());
@@ -2665,8 +2899,7 @@ MCRegister SIRegisterInfo::getReturnAddressReg(const MachineFunction &MF) const
const TargetRegisterClass *
SIRegisterInfo::getRegClassForSizeOnBank(unsigned Size,
- const RegisterBank &RB,
- const MachineRegisterInfo &MRI) const {
+ const RegisterBank &RB) const {
switch (RB.getID()) {
case AMDGPU::VGPRRegBankID:
return getVGPRClassForBitWidth(std::max(32u, Size));
@@ -2688,7 +2921,7 @@ SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO,
const MachineRegisterInfo &MRI) const {
const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(MO.getReg());
if (const RegisterBank *RB = RCOrRB.dyn_cast<const RegisterBank*>())
- return getRegClassForTypeOnBank(MRI.getType(MO.getReg()), *RB, MRI);
+ return getRegClassForTypeOnBank(MRI.getType(MO.getReg()), *RB);
if (const auto *RC = RCOrRB.dyn_cast<const TargetRegisterClass *>())
return getAllocatableClass(RC);
@@ -2808,9 +3041,29 @@ bool SIRegisterInfo::isProperlyAlignedRC(const TargetRegisterClass &RC) const {
return true;
}
+const TargetRegisterClass *
+SIRegisterInfo::getProperlyAlignedRC(const TargetRegisterClass *RC) const {
+ if (!RC || !ST.needsAlignedVGPRs())
+ return RC;
+
+ unsigned Size = getRegSizeInBits(*RC);
+ if (Size <= 32)
+ return RC;
+
+ if (isVGPRClass(RC))
+ return getAlignedVGPRClassForBitWidth(Size);
+ if (isAGPRClass(RC))
+ return getAlignedAGPRClassForBitWidth(Size);
+ if (isVectorSuperClass(RC))
+ return getAlignedVectorSuperClassForBitWidth(Size);
+
+ return RC;
+}
+
bool SIRegisterInfo::isConstantPhysReg(MCRegister PhysReg) const {
switch (PhysReg) {
case AMDGPU::SGPR_NULL:
+ case AMDGPU::SGPR_NULL64:
case AMDGPU::SRC_SHARED_BASE:
case AMDGPU::SRC_PRIVATE_BASE:
case AMDGPU::SRC_SHARED_LIMIT:
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index f1fe0a1d9329..9bfbc253410b 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -51,6 +51,17 @@ private:
public:
SIRegisterInfo(const GCNSubtarget &ST);
+ struct SpilledReg {
+ Register VGPR;
+ int Lane = -1;
+
+ SpilledReg() = default;
+ SpilledReg(Register R, int L) : VGPR(R), Lane(L) {}
+
+ bool hasLane() { return Lane != -1; }
+ bool hasReg() { return VGPR != 0; }
+ };
+
/// \returns the sub reg enum value for the given \p Channel
/// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0)
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs = 1);
@@ -64,6 +75,8 @@ public:
MCRegister reservedPrivateSegmentBufferReg(const MachineFunction &MF) const;
BitVector getReservedRegs(const MachineFunction &MF) const override;
+ bool isAsmClobberable(const MachineFunction &MF,
+ MCRegister PhysReg) const override;
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
const MCPhysReg *getCalleeSavedRegsViaCopy(const MachineFunction *MF) const;
@@ -304,15 +317,11 @@ public:
MCRegister getReturnAddressReg(const MachineFunction &MF) const;
const TargetRegisterClass *
- getRegClassForSizeOnBank(unsigned Size,
- const RegisterBank &Bank,
- const MachineRegisterInfo &MRI) const;
+ getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const;
const TargetRegisterClass *
- getRegClassForTypeOnBank(LLT Ty,
- const RegisterBank &Bank,
- const MachineRegisterInfo &MRI) const {
- return getRegClassForSizeOnBank(Ty.getSizeInBits(), Bank, MRI);
+ getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const {
+ return getRegClassForSizeOnBank(Ty.getSizeInBits(), Bank);
}
const TargetRegisterClass *
@@ -377,6 +386,11 @@ public:
// the subtarget.
bool isProperlyAlignedRC(const TargetRegisterClass &RC) const;
+ // Given \p RC returns corresponding aligned register class if required
+ // by the subtarget.
+ const TargetRegisterClass *
+ getProperlyAlignedRC(const TargetRegisterClass *RC) const;
+
/// Return all SGPR128 which satisfy the waves per execution unit requirement
/// of the subtarget.
ArrayRef<MCPhysReg> getAllSGPR128(const MachineFunction &MF) const;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index eb9452f4b85e..ffe8dce79816 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -97,7 +97,7 @@ class RegSeqNames<int last_reg, int stride, int size, string prefix,
[]);
}
-// Generates list of dags for register tupless.
+// Generates list of dags for register tuples.
class RegSeqDags<RegisterClass RC, int last_reg, int stride, int size,
int start = 0> {
dag trunc_rc = (trunc RC,
@@ -189,7 +189,7 @@ def PC_REG : SIReg<"pc", 0>, DwarfRegNum<[16, 16]> {
def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
- let HWEncoding = 106;
+ let HWEncoding = VCC_LO.HWEncoding;
}
defm EXEC_LO : SIRegLoHi16<"exec_lo", 126>, DwarfRegNum<[1, 1]>;
@@ -198,7 +198,7 @@ defm EXEC_HI : SIRegLoHi16<"exec_hi", 127>;
def EXEC : RegisterWithSubRegs<"exec", [EXEC_LO, EXEC_HI]>, DwarfRegNum<[17, 1]> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
- let HWEncoding = 126;
+ let HWEncoding = EXEC_LO.HWEncoding;
}
// 32-bit real registers, for MC only.
@@ -211,8 +211,23 @@ defm SRC_SCC : SIRegLoHi16<"src_scc", 253>;
// Should never be emitted.
def SCC : SIReg<"scc">;
-defm M0 : SIRegLoHi16 <"m0", 124>;
-defm SGPR_NULL : SIRegLoHi16 <"null", 125>;
+// Encoding changes between subtarget generations.
+// See also Utils/AMDGPUBaseInfo.cpp MAP_REG2REG.
+defm M0_gfxpre11 : SIRegLoHi16 <"m0", 124>;
+defm M0_gfx11plus : SIRegLoHi16 <"m0", 125>;
+defm M0 : SIRegLoHi16 <"m0", 0>;
+
+defm SGPR_NULL_gfxpre11 : SIRegLoHi16 <"null", 125>;
+defm SGPR_NULL_gfx11plus : SIRegLoHi16 <"null", 124>;
+defm SGPR_NULL : SIRegLoHi16 <"null", 0>;
+defm SGPR_NULL_HI : SIRegLoHi16 <"", 0>;
+
+def SGPR_NULL64 :
+ RegisterWithSubRegs<"null", [SGPR_NULL, SGPR_NULL_HI]> {
+ let Namespace = "AMDGPU";
+ let SubRegIndices = [sub0, sub1];
+ let HWEncoding = SGPR_NULL.HWEncoding;
+}
defm SRC_SHARED_BASE : SIRegLoHi16<"src_shared_base", 235>;
defm SRC_SHARED_LIMIT : SIRegLoHi16<"src_shared_limit", 236>;
@@ -237,7 +252,7 @@ def XNACK_MASK :
RegisterWithSubRegs<"xnack_mask", [XNACK_MASK_LO, XNACK_MASK_HI]> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
- let HWEncoding = 104;
+ let HWEncoding = XNACK_MASK_LO.HWEncoding;
}
// Trap handler registers
@@ -247,7 +262,7 @@ defm TBA_HI : SIRegLoHi16<"tba_hi", 109>;
def TBA : RegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
- let HWEncoding = 108;
+ let HWEncoding = TBA_LO.HWEncoding;
}
defm TMA_LO : SIRegLoHi16<"tma_lo", 110>;
@@ -256,7 +271,7 @@ defm TMA_HI : SIRegLoHi16<"tma_hi", 111>;
def TMA : RegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
- let HWEncoding = 110;
+ let HWEncoding = TMA_LO.HWEncoding;
}
foreach Index = 0...15 in {
@@ -635,16 +650,16 @@ let GeneratePressureSet = 0, HasSGPR = 1 in {
// See comments in SIInstructions.td for more info.
def SReg_32_XM0_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
(add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, XNACK_MASK_LO, XNACK_MASK_HI,
- SGPR_NULL, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, SRC_SHARED_LIMIT,
- SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, SRC_POPS_EXITING_WAVE_ID,
+ SGPR_NULL, SGPR_NULL_HI, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE,
+ SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, SRC_POPS_EXITING_WAVE_ID,
SRC_VCCZ, SRC_EXECZ, SRC_SCC)> {
let AllocationPriority = 10;
}
def SReg_LO16_XM0_XEXEC : SIRegisterClass<"AMDGPU", [i16, f16], 16,
(add SGPR_LO16, VCC_LO_LO16, VCC_HI_LO16, FLAT_SCR_LO_LO16, FLAT_SCR_HI_LO16,
- XNACK_MASK_LO_LO16, XNACK_MASK_HI_LO16, SGPR_NULL_LO16, TTMP_LO16, TMA_LO_LO16,
- TMA_HI_LO16, TBA_LO_LO16, TBA_HI_LO16, SRC_SHARED_BASE_LO16,
+ XNACK_MASK_LO_LO16, XNACK_MASK_HI_LO16, SGPR_NULL_LO16, SGPR_NULL_HI_LO16, TTMP_LO16,
+ TMA_LO_LO16, TMA_HI_LO16, TBA_LO_LO16, TBA_HI_LO16, SRC_SHARED_BASE_LO16,
SRC_SHARED_LIMIT_LO16, SRC_PRIVATE_BASE_LO16, SRC_PRIVATE_LIMIT_LO16,
SRC_POPS_EXITING_WAVE_ID_LO16, SRC_VCCZ_LO16, SRC_EXECZ_LO16, SRC_SCC_LO16)> {
let Size = 16;
@@ -701,23 +716,6 @@ def SGPR_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16],
let HasSGPR = 1;
}
-// CCR (call clobbered registers) SGPR 64-bit registers
-def CCR_SGPR_64 : SIRegisterClass<"AMDGPU", SGPR_64.RegTypes, 32,
- (add (trunc SGPR_64, 16))> {
- let CopyCost = SGPR_64.CopyCost;
- let AllocationPriority = SGPR_64.AllocationPriority;
- let HasSGPR = 1;
-}
-
-// Call clobbered 64-bit SGPRs for AMDGPU_Gfx CC
-def Gfx_CCR_SGPR_64 : SIRegisterClass<"AMDGPU", SGPR_64.RegTypes, 32,
- (add (trunc (shl SGPR_64, 15), 1), // s[30:31]
- (trunc (shl SGPR_64, 18), 14))> { // s[36:37]-s[s62:63]
- let CopyCost = SGPR_64.CopyCost;
- let AllocationPriority = SGPR_64.AllocationPriority;
- let HasSGPR = 1;
-}
-
def TTMP_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32,
(add TTMP_64Regs)> {
let isAllocatable = 0;
@@ -725,7 +723,7 @@ def TTMP_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32,
}
def SReg_64_XEXEC : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32,
- (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, TTMP_64, TBA, TMA)> {
+ (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, SGPR_NULL64, TTMP_64, TBA, TMA)> {
let CopyCost = 1;
let AllocationPriority = 13;
let HasSGPR = 1;
@@ -788,7 +786,7 @@ defm "" : SRegClass<4, 15, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16], SGPR_128R
defm "" : SRegClass<5, 16, [v5i32, v5f32], SGPR_160Regs, TTMP_160Regs>;
defm "" : SRegClass<6, 17, [v6i32, v6f32, v3i64, v3f64], SGPR_192Regs, TTMP_192Regs>;
defm "" : SRegClass<7, 18, [v7i32, v7f32], SGPR_224Regs, TTMP_224Regs>;
-defm "" : SRegClass<8, 19, [v8i32, v8f32, v4i64, v4f64], SGPR_256Regs, TTMP_256Regs>;
+defm "" : SRegClass<8, 19, [v8i32, v8f32, v4i64, v4f64, v16i16, v16f16], SGPR_256Regs, TTMP_256Regs>;
defm "" : SRegClass<16, 20, [v16i32, v16f32, v8i64, v8f64], SGPR_512Regs, TTMP_512Regs>;
defm "" : SRegClass<32, 21, [v32i32, v32f32, v16i64, v16f64], SGPR_1024Regs>;
@@ -829,7 +827,7 @@ defm VReg_160 : VRegClass<5, [v5i32, v5f32], (add VGPR_160)>;
defm VReg_192 : VRegClass<6, [v6i32, v6f32, v3i64, v3f64], (add VGPR_192)>;
defm VReg_224 : VRegClass<7, [v7i32, v7f32], (add VGPR_224)>;
-defm VReg_256 : VRegClass<8, [v8i32, v8f32, v4i64, v4f64], (add VGPR_256)>;
+defm VReg_256 : VRegClass<8, [v8i32, v8f32, v4i64, v4f64, v16i16, v16f16], (add VGPR_256)>;
defm VReg_512 : VRegClass<16, [v16i32, v16f32, v8i64, v8f64], (add VGPR_512)>;
defm VReg_1024 : VRegClass<32, [v32i32, v32f32, v16i64, v16f64], (add VGPR_1024)>;
@@ -856,21 +854,12 @@ defm AReg_1024 : ARegClass<32, [v32i32, v32f32, v16i64, v16f64], (add AGPR_1024)
} // End GeneratePressureSet = 0
-// This is not a real register. This is just to have a register to add
-// to VReg_1 that does not alias any real register that would
-// introduce inferred register classes.
-def ARTIFICIAL_VGPR : SIReg <"invalid vgpr", 0> {
- let isArtificial = 1;
-}
-
let GeneratePressureSet = 0 in {
-// FIXME: Should specify an empty set for this. No register should
-// ever be allocated using VReg_1. This is a hack for SelectionDAG
-// that should always be lowered by SILowerI1Copies. TableGen crashes
-// on an empty register set, but also sorts register classes based on
-// the number of registerss in them. Add only one register so this is
+// No register should ever be allocated using VReg_1. This is a hack for
+// SelectionDAG that should always be lowered by SILowerI1Copies. TableGen
+// sorts register classes based on the number of registers in them so this is
// sorted to the end and not preferred over VGPR_32.
-def VReg_1 : SIRegisterClass<"AMDGPU", [i1], 32, (add ARTIFICIAL_VGPR)> {
+def VReg_1 : SIRegisterClass<"AMDGPU", [i1], 32, (add)> {
let Size = 1;
let HasVGPR = 1;
}
@@ -913,11 +902,11 @@ defm AV_64 : AVRegClass<2, VReg_64.RegTypes, (add VGPR_64), (add AGPR_64)>;
defm AV_96 : AVRegClass<3, VReg_96.RegTypes, (add VGPR_96), (add AGPR_96)>;
defm AV_128 : AVRegClass<4, VReg_128.RegTypes, (add VGPR_128), (add AGPR_128)>;
defm AV_160 : AVRegClass<5, VReg_160.RegTypes, (add VGPR_160), (add AGPR_160)>;
-defm AV_192 : AVRegClass<6, VReg_160.RegTypes, (add VGPR_192), (add AGPR_192)>;
-defm AV_224 : AVRegClass<7, VReg_160.RegTypes, (add VGPR_224), (add AGPR_224)>;
-defm AV_256 : AVRegClass<8, VReg_160.RegTypes, (add VGPR_256), (add AGPR_256)>;
-defm AV_512 : AVRegClass<16, VReg_160.RegTypes, (add VGPR_512), (add AGPR_512)>;
-defm AV_1024 : AVRegClass<32, VReg_160.RegTypes, (add VGPR_1024), (add AGPR_1024)>;
+defm AV_192 : AVRegClass<6, VReg_192.RegTypes, (add VGPR_192), (add AGPR_192)>;
+defm AV_224 : AVRegClass<7, VReg_224.RegTypes, (add VGPR_224), (add AGPR_224)>;
+defm AV_256 : AVRegClass<8, VReg_256.RegTypes, (add VGPR_256), (add AGPR_256)>;
+defm AV_512 : AVRegClass<16, VReg_512.RegTypes, (add VGPR_512), (add AGPR_512)>;
+defm AV_1024 : AVRegClass<32, VReg_1024.RegTypes, (add VGPR_1024), (add AGPR_1024)>;
//===----------------------------------------------------------------------===//
// Register operands
@@ -1087,6 +1076,27 @@ def VRegSrc_32 : RegisterOperand<VGPR_32> {
let DecoderMethod = "DecodeVS_32RegisterClass";
}
+def VRegSrc_64 : RegisterOperand<VReg_64> {
+ let DecoderMethod = "decodeOperand_VReg_64";
+}
+
+def VRegSrc_128 : RegisterOperand<VReg_128> {
+ let DecoderMethod = "decodeOperand_VReg_128";
+}
+
+def VRegSrc_256 : RegisterOperand<VReg_256> {
+ let DecoderMethod = "decodeOperand_VReg_256";
+}
+
+//===----------------------------------------------------------------------===//
+// VGPRSrc_*
+//===----------------------------------------------------------------------===//
+
+// An 8-bit RegisterOperand wrapper for a VGPR
+def VGPRSrc_32 : RegisterOperand<VGPR_32> {
+ let DecoderMethod = "DecodeVGPR_32RegisterClass";
+}
+
//===----------------------------------------------------------------------===//
// ASrc_* Operands with an AccVGPR
//===----------------------------------------------------------------------===//
@@ -1116,7 +1126,7 @@ defm VISrc_512 : RegInlineOperandAC<"VReg", "VISrc_512", "_512">;
defm VISrc_1024 : RegInlineOperandAC<"VReg", "VISrc_1024", "_1024">;
//===----------------------------------------------------------------------===//
-// AVSrc_* Operands with an AGPR or VGPR
+// AVSrc_*, AVDst_*, AVLdSt_* Operands with an AGPR or VGPR
//===----------------------------------------------------------------------===//
def AVSrc_32 : RegisterOperand<AV_32> {
@@ -1129,6 +1139,21 @@ def AVSrc_64 : RegisterOperand<AV_64> {
let EncoderMethod = "getAVOperandEncoding";
}
+def AVSrc_128 : RegisterOperand<AV_128> {
+ let DecoderMethod = "DecodeAV_128RegisterClass";
+ let EncoderMethod = "getAVOperandEncoding";
+}
+
+def AVDst_128 : RegisterOperand<AV_128> {
+ let DecoderMethod = "DecodeAVDst_128RegisterClass";
+ let EncoderMethod = "getAVOperandEncoding";
+}
+
+def AVDst_512 : RegisterOperand<AV_512> {
+ let DecoderMethod = "DecodeAVDst_512RegisterClass";
+ let EncoderMethod = "getAVOperandEncoding";
+}
+
def AVLdSt_32 : RegisterOperand<AV_32> {
let DecoderMethod = "DecodeAVLdSt_32RegisterClass";
let EncoderMethod = "getAVOperandEncoding";
diff --git a/llvm/lib/Target/AMDGPU/SISchedule.td b/llvm/lib/Target/AMDGPU/SISchedule.td
index 18d424a3bc9f..53441b5a4ced 100644
--- a/llvm/lib/Target/AMDGPU/SISchedule.td
+++ b/llvm/lib/Target/AMDGPU/SISchedule.td
@@ -59,6 +59,7 @@ def WriteIntMul : SchedWrite;
// mAI multipass instructions.
def Write2PassMAI : SchedWrite;
+def Write4PassMAI : SchedWrite;
def Write8PassMAI : SchedWrite;
def Write16PassMAI : SchedWrite;
def Write4PassDGEMM : SchedWrite;
@@ -86,7 +87,9 @@ class SISchedMachineModel : SchedMachineModel {
def SIFullSpeedModel : SISchedMachineModel;
def SIQuarterSpeedModel : SISchedMachineModel;
def SIDPFullSpeedModel : SISchedMachineModel;
+def SIDPGFX940FullSpeedModel : SISchedMachineModel;
def GFX10SpeedModel : SISchedMachineModel;
+def GFX11SpeedModel : SISchedMachineModel;
// XXX: Are the resource counts correct?
def HWBranch : ProcResource<1> {
@@ -156,6 +159,8 @@ multiclass SICommonWriteRes {
let ResourceCycles = [2] in
def : HWWriteRes<Write2PassMAI, [HWXDL], 2>;
+ let ResourceCycles = [4] in
+ def : HWWriteRes<Write4PassMAI, [HWXDL], 4>;
let ResourceCycles = [8] in
def : HWWriteRes<Write8PassMAI, [HWXDL], 8>;
let ResourceCycles = [16] in
@@ -244,6 +249,40 @@ def : InstRW<[Write8PassDGEMM, MIMFMARead], (instregex "^V_MFMA_.64_16X16X")>;
} // End SchedModel = SIDPFullSpeedModel
+let SchedModel = SIDPGFX940FullSpeedModel in {
+
+defm : SICommonWriteRes;
+
+def : HWVALUWriteRes<WriteFloatFMA, 1>;
+def : HWVALUWriteRes<WriteDouble, 1>;
+def : HWVALUWriteRes<WriteDoubleAdd, 1>;
+def : HWVALUWriteRes<WriteDoubleCvt, 1>;
+def : HWVALUWriteRes<WriteTrans64, 4>;
+def : HWVALUWriteRes<WriteIntMul, 1>;
+def : HWVALUWriteRes<Write64Bit, 1>;
+
+def : InstRW<[WriteCopy], (instrs COPY)>;
+def : InstRW<[Write64Bit], (instregex "^V_ACCVGPR_WRITE_B32_e64$")>;
+def : InstRW<[Write2PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_4X4X")>;
+
+def : InstRW<[Write4PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_16X16X8X")>;
+def : InstRW<[Write4PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_16X16X16")>;
+def : InstRW<[Write4PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_16X16X32")>;
+def : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_16X16X[14][FBI]")>;
+
+def : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_32X32X4XF")>;
+def : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_32X32X8")>;
+def : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_32X32X16")>;
+def : InstRW<[Write16PassMAI, MIMFMARead], (instregex "^V_MFMA_.32_32X32X[124][FBI]")>;
+
+def : InstRW<[Write4PassDGEMM, MIMFMARead], (instregex "^V_MFMA_.64_4X4X")>;
+def : InstRW<[Write8PassDGEMM, MIMFMARead], (instregex "^V_MFMA_.64_16X16X")>;
+
+def : InstRW<[Write4PassMAI, MIMFMARead], (instregex "^V_SMFMAC_.32_16X16X")>;
+def : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_SMFMAC_.32_32X32X")>;
+
+} // End SchedModel = SIDPGFX940FullSpeedModel
+
let SchedModel = GFX10SpeedModel in {
// The latency values are 1 / (operations / cycle).
@@ -273,3 +312,29 @@ def : HWWriteRes<WriteBarrier, [HWBranch], 2000>;
def : InstRW<[WriteCopy], (instrs COPY)>;
} // End SchedModel = GFX10SpeedModel
+
+let SchedModel = GFX11SpeedModel in {
+
+def : HWWriteRes<Write32Bit, [HWVALU, HWRC], 5>;
+def : HWWriteRes<WriteFloatCvt, [HWVALU, HWRC], 5>;
+def : HWWriteRes<Write64Bit, [HWVALU, HWRC], 6>;
+def : HWWriteRes<WriteTrans32, [HWVALU, HWRC], 10>;
+def : HWWriteRes<WriteQuarterRate32, [HWVALU, HWRC], 8>;
+def : HWWriteRes<WriteFloatFMA, [HWVALU, HWRC], 5>;
+def : HWWriteRes<WriteDouble, [HWVALU, HWRC], 38>;
+def : HWWriteRes<WriteDoubleAdd, [HWVALU, HWRC], 38>;
+def : HWWriteRes<WriteDoubleCvt, [HWVALU, HWRC], 38>;
+def : HWWriteRes<WriteIntMul, [HWVALU, HWRC], 8>;
+def : HWWriteRes<WriteTrans64, [HWVALU, HWRC], 40>;
+
+def : HWWriteRes<WriteBranch, [HWBranch], 32>;
+def : HWWriteRes<WriteExport, [HWExport, HWRC], 16>;
+def : HWWriteRes<WriteLDS, [HWLGKM, HWRC], 20>;
+def : HWWriteRes<WriteSALU, [HWSALU, HWRC], 2>;
+def : HWWriteRes<WriteSMEM, [HWLGKM, HWRC], 20>;
+def : HWWriteRes<WriteVMEM, [HWVMEM, HWRC], 320>;
+def : HWWriteRes<WriteBarrier, [HWBranch], 2000>;
+
+def : InstRW<[WriteCopy], (instrs COPY)>;
+
+} // End SchedModel = GFX11SpeedModel
diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index c8f1daf26de9..05d2dd000162 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -26,15 +26,40 @@ using namespace llvm;
namespace {
class SIShrinkInstructions : public MachineFunctionPass {
+ MachineRegisterInfo *MRI;
+ const GCNSubtarget *ST;
+ const SIInstrInfo *TII;
+ const SIRegisterInfo *TRI;
+
public:
static char ID;
- void shrinkMIMG(MachineInstr &MI);
-
public:
SIShrinkInstructions() : MachineFunctionPass(ID) {
}
+ bool foldImmediates(MachineInstr &MI, bool TryToCommute = true) const;
+ bool isKImmOperand(const MachineOperand &Src) const;
+ bool isKUImmOperand(const MachineOperand &Src) const;
+ bool isKImmOrKUImmOperand(const MachineOperand &Src, bool &IsUnsigned) const;
+ bool isReverseInlineImm(const MachineOperand &Src, int32_t &ReverseImm) const;
+ void copyExtraImplicitOps(MachineInstr &NewMI, MachineInstr &MI) const;
+ void shrinkScalarCompare(MachineInstr &MI) const;
+ void shrinkMIMG(MachineInstr &MI) const;
+ void shrinkMadFma(MachineInstr &MI) const;
+ bool shrinkScalarLogicOp(MachineInstr &MI) const;
+ bool tryReplaceDeadSDST(MachineInstr &MI) const;
+ bool instAccessReg(iterator_range<MachineInstr::const_mop_iterator> &&R,
+ Register Reg, unsigned SubReg) const;
+ bool instReadsReg(const MachineInstr *MI, unsigned Reg,
+ unsigned SubReg) const;
+ bool instModifiesReg(const MachineInstr *MI, unsigned Reg,
+ unsigned SubReg) const;
+ TargetInstrInfo::RegSubRegPair getSubRegForIndex(Register Reg, unsigned Sub,
+ unsigned I) const;
+ void dropInstructionKeepingImpDefs(MachineInstr &MI) const;
+ MachineInstr *matchSwap(MachineInstr &MovT) const;
+
bool runOnMachineFunction(MachineFunction &MF) override;
StringRef getPassName() const override { return "SI Shrink Instructions"; }
@@ -59,8 +84,8 @@ FunctionPass *llvm::createSIShrinkInstructionsPass() {
/// This function checks \p MI for operands defined by a move immediate
/// instruction and then folds the literal constant into the instruction if it
/// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instructions.
-static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
- MachineRegisterInfo &MRI, bool TryToCommute = true) {
+bool SIShrinkInstructions::foldImmediates(MachineInstr &MI,
+ bool TryToCommute) const {
assert(TII->isVOP1(MI) || TII->isVOP2(MI) || TII->isVOPC(MI));
int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
@@ -69,8 +94,8 @@ static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
MachineOperand &Src0 = MI.getOperand(Src0Idx);
if (Src0.isReg()) {
Register Reg = Src0.getReg();
- if (Reg.isVirtual() && MRI.hasOneUse(Reg)) {
- MachineInstr *Def = MRI.getUniqueVRegDef(Reg);
+ if (Reg.isVirtual()) {
+ MachineInstr *Def = MRI->getUniqueVRegDef(Reg);
if (Def && Def->isMoveImmediate()) {
MachineOperand &MovSrc = Def->getOperand(1);
bool ConstantFolded = false;
@@ -91,8 +116,8 @@ static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
}
if (ConstantFolded) {
- assert(MRI.use_empty(Reg));
- Def->eraseFromParent();
+ if (MRI->use_nodbg_empty(Reg))
+ Def->eraseFromParent();
++NumLiteralConstantsFolded;
return true;
}
@@ -103,7 +128,7 @@ static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
// We have failed to fold src0, so commute the instruction and try again.
if (TryToCommute && MI.isCommutable()) {
if (TII->commuteInstruction(MI)) {
- if (foldImmediates(MI, TII, MRI, false))
+ if (foldImmediates(MI, false))
return true;
// Commute back.
@@ -114,21 +139,20 @@ static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
return false;
}
-static bool isKImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) {
+bool SIShrinkInstructions::isKImmOperand(const MachineOperand &Src) const {
return isInt<16>(Src.getImm()) &&
!TII->isInlineConstant(*Src.getParent(),
Src.getParent()->getOperandNo(&Src));
}
-static bool isKUImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) {
+bool SIShrinkInstructions::isKUImmOperand(const MachineOperand &Src) const {
return isUInt<16>(Src.getImm()) &&
!TII->isInlineConstant(*Src.getParent(),
Src.getParent()->getOperandNo(&Src));
}
-static bool isKImmOrKUImmOperand(const SIInstrInfo *TII,
- const MachineOperand &Src,
- bool &IsUnsigned) {
+bool SIShrinkInstructions::isKImmOrKUImmOperand(const MachineOperand &Src,
+ bool &IsUnsigned) const {
if (isInt<16>(Src.getImm())) {
IsUnsigned = false;
return !TII->isInlineConstant(Src);
@@ -144,9 +168,8 @@ static bool isKImmOrKUImmOperand(const SIInstrInfo *TII,
/// \returns true if the constant in \p Src should be replaced with a bitreverse
/// of an inline immediate.
-static bool isReverseInlineImm(const SIInstrInfo *TII,
- const MachineOperand &Src,
- int32_t &ReverseImm) {
+bool SIShrinkInstructions::isReverseInlineImm(const MachineOperand &Src,
+ int32_t &ReverseImm) const {
if (!isInt<32>(Src.getImm()) || TII->isInlineConstant(Src))
return false;
@@ -156,8 +179,9 @@ static bool isReverseInlineImm(const SIInstrInfo *TII,
/// Copy implicit register operands from specified instruction to this
/// instruction that are not part of the instruction definition.
-static void copyExtraImplicitOps(MachineInstr &NewMI, MachineFunction &MF,
- const MachineInstr &MI) {
+void SIShrinkInstructions::copyExtraImplicitOps(MachineInstr &NewMI,
+ MachineInstr &MI) const {
+ MachineFunction &MF = *MI.getMF();
for (unsigned i = MI.getDesc().getNumOperands() +
MI.getDesc().getNumImplicitUses() +
MI.getDesc().getNumImplicitDefs(), e = MI.getNumOperands();
@@ -168,7 +192,7 @@ static void copyExtraImplicitOps(MachineInstr &NewMI, MachineFunction &MF,
}
}
-static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI) {
+void SIShrinkInstructions::shrinkScalarCompare(MachineInstr &MI) const {
// cmpk instructions do scc = dst <cc op> imm16, so commute the instruction to
// get constants on the RHS.
if (!MI.getOperand(0).isReg())
@@ -191,7 +215,7 @@ static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI) {
// and initially selected to the unsigned versions.
if (SOPKOpc == AMDGPU::S_CMPK_EQ_U32 || SOPKOpc == AMDGPU::S_CMPK_LG_U32) {
bool HasUImm;
- if (isKImmOrKUImmOperand(TII, Src1, HasUImm)) {
+ if (isKImmOrKUImmOperand(Src1, HasUImm)) {
if (!HasUImm) {
SOPKOpc = (SOPKOpc == AMDGPU::S_CMPK_EQ_U32) ?
AMDGPU::S_CMPK_EQ_I32 : AMDGPU::S_CMPK_LG_I32;
@@ -205,22 +229,30 @@ static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI) {
const MCInstrDesc &NewDesc = TII->get(SOPKOpc);
- if ((TII->sopkIsZext(SOPKOpc) && isKUImmOperand(TII, Src1)) ||
- (!TII->sopkIsZext(SOPKOpc) && isKImmOperand(TII, Src1))) {
+ if ((TII->sopkIsZext(SOPKOpc) && isKUImmOperand(Src1)) ||
+ (!TII->sopkIsZext(SOPKOpc) && isKImmOperand(Src1))) {
MI.setDesc(NewDesc);
}
}
// Shrink NSA encoded instructions with contiguous VGPRs to non-NSA encoding.
-void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) {
+void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) const {
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
- if (!Info || Info->MIMGEncoding != AMDGPU::MIMGEncGfx10NSA)
+ if (!Info)
return;
- MachineFunction *MF = MI.getParent()->getParent();
- const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
- const SIInstrInfo *TII = ST.getInstrInfo();
- const SIRegisterInfo &TRI = TII->getRegisterInfo();
+ uint8_t NewEncoding;
+ switch (Info->MIMGEncoding) {
+ case AMDGPU::MIMGEncGfx10NSA:
+ NewEncoding = AMDGPU::MIMGEncGfx10Default;
+ break;
+ case AMDGPU::MIMGEncGfx11NSA:
+ NewEncoding = AMDGPU::MIMGEncGfx11Default;
+ break;
+ default:
+ return;
+ }
+
int VAddr0Idx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
unsigned NewAddrDwords = Info->VAddrDwords;
@@ -246,16 +278,23 @@ void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) {
}
unsigned VgprBase = 0;
+ unsigned NextVgpr = 0;
bool IsUndef = true;
bool IsKill = NewAddrDwords == Info->VAddrDwords;
- for (unsigned i = 0; i < Info->VAddrDwords; ++i) {
- const MachineOperand &Op = MI.getOperand(VAddr0Idx + i);
- unsigned Vgpr = TRI.getHWRegIndex(Op.getReg());
+ for (unsigned Idx = 0; Idx < Info->VAddrOperands; ++Idx) {
+ const MachineOperand &Op = MI.getOperand(VAddr0Idx + Idx);
+ unsigned Vgpr = TRI->getHWRegIndex(Op.getReg());
+ unsigned Dwords = TRI->getRegSizeInBits(Op.getReg(), *MRI) / 32;
+ assert(Dwords > 0 && "Un-implemented for less than 32 bit regs");
- if (i == 0) {
+ if (Idx == 0) {
VgprBase = Vgpr;
- } else if (VgprBase + i != Vgpr)
+ NextVgpr = Vgpr + Dwords;
+ } else if (Vgpr == NextVgpr) {
+ NextVgpr = Vgpr + Dwords;
+ } else {
return;
+ }
if (!Op.isUndef())
IsUndef = false;
@@ -288,21 +327,108 @@ void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) {
}
}
- unsigned NewOpcode =
- AMDGPU::getMIMGOpcode(Info->BaseOpcode, AMDGPU::MIMGEncGfx10Default,
- Info->VDataDwords, NewAddrDwords);
+ unsigned NewOpcode = AMDGPU::getMIMGOpcode(Info->BaseOpcode, NewEncoding,
+ Info->VDataDwords, NewAddrDwords);
MI.setDesc(TII->get(NewOpcode));
MI.getOperand(VAddr0Idx).setReg(RC->getRegister(VgprBase));
MI.getOperand(VAddr0Idx).setIsUndef(IsUndef);
MI.getOperand(VAddr0Idx).setIsKill(IsKill);
- for (unsigned i = 1; i < Info->VAddrDwords; ++i)
- MI.RemoveOperand(VAddr0Idx + 1);
+ for (int i = 1; i < Info->VAddrOperands; ++i)
+ MI.removeOperand(VAddr0Idx + 1);
if (ToUntie >= 0) {
MI.tieOperands(
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata),
- ToUntie - (Info->VAddrDwords - 1));
+ ToUntie - (Info->VAddrOperands - 1));
+ }
+}
+
+// Shrink MAD to MADAK/MADMK and FMA to FMAAK/FMAMK.
+void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
+ if (!ST->hasVOP3Literal())
+ return;
+
+ if (TII->hasAnyModifiersSet(MI))
+ return;
+
+ const unsigned Opcode = MI.getOpcode();
+ MachineOperand &Src0 = *TII->getNamedOperand(MI, AMDGPU::OpName::src0);
+ MachineOperand &Src1 = *TII->getNamedOperand(MI, AMDGPU::OpName::src1);
+ MachineOperand &Src2 = *TII->getNamedOperand(MI, AMDGPU::OpName::src2);
+ unsigned NewOpcode = AMDGPU::INSTRUCTION_LIST_END;
+
+ bool Swap;
+
+ // Detect "Dst = VSrc * VGPR + Imm" and convert to AK form.
+ if (Src2.isImm() && !TII->isInlineConstant(Src2)) {
+ if (Src1.isReg() && TRI->isVGPR(*MRI, Src1.getReg()))
+ Swap = false;
+ else if (Src0.isReg() && TRI->isVGPR(*MRI, Src0.getReg()))
+ Swap = true;
+ else
+ return;
+
+ switch (Opcode) {
+ default:
+ llvm_unreachable("Unexpected mad/fma opcode!");
+ case AMDGPU::V_MAD_F32_e64:
+ NewOpcode = AMDGPU::V_MADAK_F32;
+ break;
+ case AMDGPU::V_FMA_F32_e64:
+ NewOpcode = AMDGPU::V_FMAAK_F32;
+ break;
+ case AMDGPU::V_MAD_F16_e64:
+ NewOpcode = AMDGPU::V_MADAK_F16;
+ break;
+ case AMDGPU::V_FMA_F16_e64:
+ NewOpcode = AMDGPU::V_FMAAK_F16;
+ break;
+ }
+ }
+
+ // Detect "Dst = VSrc * Imm + VGPR" and convert to MK form.
+ if (Src2.isReg() && TRI->isVGPR(*MRI, Src2.getReg())) {
+ if (Src1.isImm() && !TII->isInlineConstant(Src1))
+ Swap = false;
+ else if (Src0.isImm() && !TII->isInlineConstant(Src0))
+ Swap = true;
+ else
+ return;
+
+ switch (Opcode) {
+ default:
+ llvm_unreachable("Unexpected mad/fma opcode!");
+ case AMDGPU::V_MAD_F32_e64:
+ NewOpcode = AMDGPU::V_MADMK_F32;
+ break;
+ case AMDGPU::V_FMA_F32_e64:
+ NewOpcode = AMDGPU::V_FMAMK_F32;
+ break;
+ case AMDGPU::V_MAD_F16_e64:
+ NewOpcode = AMDGPU::V_MADMK_F16;
+ break;
+ case AMDGPU::V_FMA_F16_e64:
+ NewOpcode = AMDGPU::V_FMAMK_F16;
+ break;
+ }
+ }
+
+ if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END)
+ return;
+
+ if (Swap) {
+ // Swap Src0 and Src1 by building a new instruction.
+ BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(NewOpcode),
+ MI.getOperand(0).getReg())
+ .add(Src1)
+ .add(Src0)
+ .add(Src2)
+ .setMIFlags(MI.getFlags());
+ MI.eraseFromParent();
+ } else {
+ TII->removeModOperands(MI);
+ MI.setDesc(TII->get(NewOpcode));
}
}
@@ -311,10 +437,7 @@ void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) {
/// If the inverse of the immediate is legal, use ANDN2, ORN2 or
/// XNOR (as a ^ b == ~(a ^ ~b)).
/// \returns true if the caller should continue the machine function iterator
-static bool shrinkScalarLogicOp(const GCNSubtarget &ST,
- MachineRegisterInfo &MRI,
- const SIInstrInfo *TII,
- MachineInstr &MI) {
+bool SIShrinkInstructions::shrinkScalarLogicOp(MachineInstr &MI) const {
unsigned Opc = MI.getOpcode();
const MachineOperand *Dest = &MI.getOperand(0);
MachineOperand *Src0 = &MI.getOperand(1);
@@ -323,7 +446,7 @@ static bool shrinkScalarLogicOp(const GCNSubtarget &ST,
MachineOperand *SrcImm = Src1;
if (!SrcImm->isImm() ||
- AMDGPU::isInlinableLiteral32(SrcImm->getImm(), ST.hasInv2PiInlineImm()))
+ AMDGPU::isInlinableLiteral32(SrcImm->getImm(), ST->hasInv2PiInlineImm()))
return false;
uint32_t Imm = static_cast<uint32_t>(SrcImm->getImm());
@@ -333,7 +456,7 @@ static bool shrinkScalarLogicOp(const GCNSubtarget &ST,
if (isPowerOf2_32(~Imm)) {
NewImm = countTrailingOnes(Imm);
Opc = AMDGPU::S_BITSET0_B32;
- } else if (AMDGPU::isInlinableLiteral32(~Imm, ST.hasInv2PiInlineImm())) {
+ } else if (AMDGPU::isInlinableLiteral32(~Imm, ST->hasInv2PiInlineImm())) {
NewImm = ~Imm;
Opc = AMDGPU::S_ANDN2_B32;
}
@@ -341,12 +464,12 @@ static bool shrinkScalarLogicOp(const GCNSubtarget &ST,
if (isPowerOf2_32(Imm)) {
NewImm = countTrailingZeros(Imm);
Opc = AMDGPU::S_BITSET1_B32;
- } else if (AMDGPU::isInlinableLiteral32(~Imm, ST.hasInv2PiInlineImm())) {
+ } else if (AMDGPU::isInlinableLiteral32(~Imm, ST->hasInv2PiInlineImm())) {
NewImm = ~Imm;
Opc = AMDGPU::S_ORN2_B32;
}
} else if (Opc == AMDGPU::S_XOR_B32) {
- if (AMDGPU::isInlinableLiteral32(~Imm, ST.hasInv2PiInlineImm())) {
+ if (AMDGPU::isInlinableLiteral32(~Imm, ST->hasInv2PiInlineImm())) {
NewImm = ~Imm;
Opc = AMDGPU::S_XNOR_B32;
}
@@ -354,16 +477,10 @@ static bool shrinkScalarLogicOp(const GCNSubtarget &ST,
llvm_unreachable("unexpected opcode");
}
- if ((Opc == AMDGPU::S_ANDN2_B32 || Opc == AMDGPU::S_ORN2_B32) &&
- SrcImm == Src0) {
- if (!TII->commuteInstruction(MI, false, 1, 2))
- NewImm = 0;
- }
-
if (NewImm != 0) {
if (Dest->getReg().isVirtual() && SrcReg->isReg()) {
- MRI.setRegAllocationHint(Dest->getReg(), 0, SrcReg->getReg());
- MRI.setRegAllocationHint(SrcReg->getReg(), 0, Dest->getReg());
+ MRI->setRegAllocationHint(Dest->getReg(), 0, SrcReg->getReg());
+ MRI->setRegAllocationHint(SrcReg->getReg(), 0, Dest->getReg());
return true;
}
@@ -390,19 +507,19 @@ static bool shrinkScalarLogicOp(const GCNSubtarget &ST,
// This is the same as MachineInstr::readsRegister/modifiesRegister except
// it takes subregs into account.
-static bool instAccessReg(iterator_range<MachineInstr::const_mop_iterator> &&R,
- Register Reg, unsigned SubReg,
- const SIRegisterInfo &TRI) {
+bool SIShrinkInstructions::instAccessReg(
+ iterator_range<MachineInstr::const_mop_iterator> &&R, Register Reg,
+ unsigned SubReg) const {
for (const MachineOperand &MO : R) {
if (!MO.isReg())
continue;
if (Reg.isPhysical() && MO.getReg().isPhysical()) {
- if (TRI.regsOverlap(Reg, MO.getReg()))
+ if (TRI->regsOverlap(Reg, MO.getReg()))
return true;
} else if (MO.getReg() == Reg && Reg.isVirtual()) {
- LaneBitmask Overlap = TRI.getSubRegIndexLaneMask(SubReg) &
- TRI.getSubRegIndexLaneMask(MO.getSubReg());
+ LaneBitmask Overlap = TRI->getSubRegIndexLaneMask(SubReg) &
+ TRI->getSubRegIndexLaneMask(MO.getSubReg());
if (Overlap.any())
return true;
}
@@ -410,33 +527,31 @@ static bool instAccessReg(iterator_range<MachineInstr::const_mop_iterator> &&R,
return false;
}
-static bool instReadsReg(const MachineInstr *MI,
- unsigned Reg, unsigned SubReg,
- const SIRegisterInfo &TRI) {
- return instAccessReg(MI->uses(), Reg, SubReg, TRI);
+bool SIShrinkInstructions::instReadsReg(const MachineInstr *MI, unsigned Reg,
+ unsigned SubReg) const {
+ return instAccessReg(MI->uses(), Reg, SubReg);
}
-static bool instModifiesReg(const MachineInstr *MI,
- unsigned Reg, unsigned SubReg,
- const SIRegisterInfo &TRI) {
- return instAccessReg(MI->defs(), Reg, SubReg, TRI);
+bool SIShrinkInstructions::instModifiesReg(const MachineInstr *MI, unsigned Reg,
+ unsigned SubReg) const {
+ return instAccessReg(MI->defs(), Reg, SubReg);
}
-static TargetInstrInfo::RegSubRegPair
-getSubRegForIndex(Register Reg, unsigned Sub, unsigned I,
- const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI) {
- if (TRI.getRegSizeInBits(Reg, MRI) != 32) {
+TargetInstrInfo::RegSubRegPair
+SIShrinkInstructions::getSubRegForIndex(Register Reg, unsigned Sub,
+ unsigned I) const {
+ if (TRI->getRegSizeInBits(Reg, *MRI) != 32) {
if (Reg.isPhysical()) {
- Reg = TRI.getSubReg(Reg, TRI.getSubRegFromChannel(I));
+ Reg = TRI->getSubReg(Reg, TRI->getSubRegFromChannel(I));
} else {
- Sub = TRI.getSubRegFromChannel(I + TRI.getChannelFromSubReg(Sub));
+ Sub = TRI->getSubRegFromChannel(I + TRI->getChannelFromSubReg(Sub));
}
}
return TargetInstrInfo::RegSubRegPair(Reg, Sub);
}
-static void dropInstructionKeepingImpDefs(MachineInstr &MI,
- const SIInstrInfo *TII) {
+void SIShrinkInstructions::dropInstructionKeepingImpDefs(
+ MachineInstr &MI) const {
for (unsigned i = MI.getDesc().getNumOperands() +
MI.getDesc().getNumImplicitUses() +
MI.getDesc().getNumImplicitDefs(), e = MI.getNumOperands();
@@ -464,14 +579,13 @@ static void dropInstructionKeepingImpDefs(MachineInstr &MI,
// Returns next valid instruction pointer if was able to create v_swap_b32.
//
// This shall not be done too early not to prevent possible folding which may
-// remove matched moves, and this should prefereably be done before RA to
+// remove matched moves, and this should preferably be done before RA to
// release saved registers and also possibly after RA which can insert copies
// too.
//
-// This is really just a generic peephole that is not a canocical shrinking,
+// This is really just a generic peephole that is not a canonical shrinking,
// although requirements match the pass placement and it reduces code size too.
-static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI,
- const SIInstrInfo *TII) {
+MachineInstr *SIShrinkInstructions::matchSwap(MachineInstr &MovT) const {
assert(MovT.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
MovT.getOpcode() == AMDGPU::COPY);
@@ -486,8 +600,7 @@ static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI,
unsigned Size = TII->getOpSize(MovT, 0) / 4;
- const SIRegisterInfo &TRI = TII->getRegisterInfo();
- if (!TRI.isVGPR(MRI, X))
+ if (!TRI->isVGPR(*MRI, X))
return nullptr;
if (MovT.hasRegisterImplicitUseOperand(AMDGPU::M0))
@@ -501,7 +614,7 @@ static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI,
Iter != E && Count < SearchLimit && !KilledT; ++Iter, ++Count) {
MachineInstr *MovY = &*Iter;
- KilledT = MovY->killsRegister(T, &TRI);
+ KilledT = MovY->killsRegister(T, TRI);
if ((MovY->getOpcode() != AMDGPU::V_MOV_B32_e32 &&
MovY->getOpcode() != AMDGPU::COPY) ||
@@ -514,21 +627,20 @@ static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI,
Register Y = MovY->getOperand(0).getReg();
unsigned Ysub = MovY->getOperand(0).getSubReg();
- if (!TRI.isVGPR(MRI, Y))
+ if (!TRI->isVGPR(*MRI, Y))
continue;
MachineInstr *MovX = nullptr;
for (auto IY = MovY->getIterator(), I = std::next(MovT.getIterator());
I != IY; ++I) {
- if (instReadsReg(&*I, X, Xsub, TRI) ||
- instModifiesReg(&*I, Y, Ysub, TRI) ||
- instModifiesReg(&*I, T, Tsub, TRI) ||
- (MovX && instModifiesReg(&*I, X, Xsub, TRI))) {
+ if (instReadsReg(&*I, X, Xsub) || instModifiesReg(&*I, Y, Ysub) ||
+ instModifiesReg(&*I, T, Tsub) ||
+ (MovX && instModifiesReg(&*I, X, Xsub))) {
MovX = nullptr;
break;
}
- if (!instReadsReg(&*I, Y, Ysub, TRI)) {
- if (!MovX && instModifiesReg(&*I, X, Xsub, TRI)) {
+ if (!instReadsReg(&*I, Y, Ysub)) {
+ if (!MovX && instModifiesReg(&*I, X, Xsub)) {
MovX = nullptr;
break;
}
@@ -559,8 +671,8 @@ static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI,
for (unsigned I = 0; I < Size; ++I) {
TargetInstrInfo::RegSubRegPair X1, Y1;
- X1 = getSubRegForIndex(X, Xsub, I, TRI, MRI);
- Y1 = getSubRegForIndex(Y, Ysub, I, TRI, MRI);
+ X1 = getSubRegForIndex(X, Xsub, I);
+ Y1 = getSubRegForIndex(Y, Ysub, I);
MachineBasicBlock &MBB = *MovT.getParent();
auto MIB = BuildMI(MBB, MovX->getIterator(), MovT.getDebugLoc(),
TII->get(AMDGPU::V_SWAP_B32))
@@ -570,23 +682,23 @@ static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI,
.addReg(X1.Reg, 0, X1.SubReg).getInstr();
if (MovX->hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
// Drop implicit EXEC.
- MIB->RemoveOperand(MIB->getNumExplicitOperands());
+ MIB->removeOperand(MIB->getNumExplicitOperands());
MIB->copyImplicitOps(*MBB.getParent(), *MovX);
}
}
MovX->eraseFromParent();
- dropInstructionKeepingImpDefs(*MovY, TII);
+ dropInstructionKeepingImpDefs(*MovY);
MachineInstr *Next = &*std::next(MovT.getIterator());
- if (T.isVirtual() && MRI.use_nodbg_empty(T)) {
- dropInstructionKeepingImpDefs(MovT, TII);
+ if (T.isVirtual() && MRI->use_nodbg_empty(T)) {
+ dropInstructionKeepingImpDefs(MovT);
} else {
Xop.setIsKill(false);
for (int I = MovT.getNumImplicitOperands() - 1; I >= 0; --I ) {
unsigned OpNo = MovT.getNumExplicitOperands() + I;
const MachineOperand &Op = MovT.getOperand(OpNo);
- if (Op.isKill() && TRI.regsOverlap(X, Op.getReg()))
- MovT.RemoveOperand(OpNo);
+ if (Op.isKill() && TRI->regsOverlap(X, Op.getReg()))
+ MovT.removeOperand(OpNo);
}
}
@@ -596,14 +708,32 @@ static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI,
return nullptr;
}
+// If an instruction has dead sdst replace it with NULL register on gfx1030+
+bool SIShrinkInstructions::tryReplaceDeadSDST(MachineInstr &MI) const {
+ if (!ST->hasGFX10_3Insts())
+ return false;
+
+ MachineOperand *Op = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
+ if (!Op)
+ return false;
+ Register SDstReg = Op->getReg();
+ if (SDstReg.isPhysical() || !MRI->use_nodbg_empty(SDstReg))
+ return false;
+
+ Op->setReg(ST->isWave32() ? AMDGPU::SGPR_NULL : AMDGPU::SGPR_NULL64);
+ return true;
+}
+
bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
- MachineRegisterInfo &MRI = MF.getRegInfo();
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- const SIInstrInfo *TII = ST.getInstrInfo();
- unsigned VCCReg = ST.isWave32() ? AMDGPU::VCC_LO : AMDGPU::VCC;
+ MRI = &MF.getRegInfo();
+ ST = &MF.getSubtarget<GCNSubtarget>();
+ TII = ST->getInstrInfo();
+ TRI = &TII->getRegisterInfo();
+
+ unsigned VCCReg = ST->isWave32() ? AMDGPU::VCC_LO : AMDGPU::VCC;
std::vector<unsigned> I1Defs;
@@ -628,7 +758,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
MachineOperand &Src = MI.getOperand(1);
if (Src.isImm() && MI.getOperand(0).getReg().isPhysical()) {
int32_t ReverseImm;
- if (isReverseInlineImm(TII, Src, ReverseImm)) {
+ if (isReverseInlineImm(Src, ReverseImm)) {
MI.setDesc(TII->get(AMDGPU::V_BFREV_B32_e32));
Src.setImm(ReverseImm);
continue;
@@ -636,19 +766,15 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
}
}
- if (ST.hasSwap() && (MI.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
- MI.getOpcode() == AMDGPU::COPY)) {
- if (auto *NextMI = matchSwap(MI, MRI, TII)) {
+ if (ST->hasSwap() && (MI.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
+ MI.getOpcode() == AMDGPU::COPY)) {
+ if (auto *NextMI = matchSwap(MI)) {
Next = NextMI->getIterator();
continue;
}
}
- // FIXME: We also need to consider movs of constant operands since
- // immediate operands are not folded if they have more than one use, and
- // the operand folding pass is unaware if the immediate will be free since
- // it won't know if the src == dest constraint will end up being
- // satisfied.
+ // Try to use S_ADDK_I32 and S_MULK_I32.
if (MI.getOpcode() == AMDGPU::S_ADD_I32 ||
MI.getOpcode() == AMDGPU::S_MUL_I32) {
const MachineOperand *Dest = &MI.getOperand(0);
@@ -664,13 +790,13 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
// we have a vector add of a constant, we usually don't get the correct
// allocation due to the subregister usage.
if (Dest->getReg().isVirtual() && Src0->isReg()) {
- MRI.setRegAllocationHint(Dest->getReg(), 0, Src0->getReg());
- MRI.setRegAllocationHint(Src0->getReg(), 0, Dest->getReg());
+ MRI->setRegAllocationHint(Dest->getReg(), 0, Src0->getReg());
+ MRI->setRegAllocationHint(Src0->getReg(), 0, Dest->getReg());
continue;
}
if (Src0->isReg() && Src0->getReg() == Dest->getReg()) {
- if (Src1->isImm() && isKImmOperand(TII, *Src1)) {
+ if (Src1->isImm() && isKImmOperand(*Src1)) {
unsigned Opc = (MI.getOpcode() == AMDGPU::S_ADD_I32) ?
AMDGPU::S_ADDK_I32 : AMDGPU::S_MULK_I32;
@@ -682,7 +808,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
// Try to use s_cmpk_*
if (MI.isCompare() && TII->isSOPC(MI)) {
- shrinkScalarCompare(TII, MI);
+ shrinkScalarCompare(MI);
continue;
}
@@ -693,9 +819,9 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
if (Src.isImm() && Dst.getReg().isPhysical()) {
int32_t ReverseImm;
- if (isKImmOperand(TII, Src))
+ if (isKImmOperand(Src))
MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
- else if (isReverseInlineImm(TII, Src, ReverseImm)) {
+ else if (isReverseInlineImm(Src, ReverseImm)) {
MI.setDesc(TII->get(AMDGPU::S_BREV_B32));
Src.setImm(ReverseImm);
}
@@ -708,47 +834,70 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
if (MI.getOpcode() == AMDGPU::S_AND_B32 ||
MI.getOpcode() == AMDGPU::S_OR_B32 ||
MI.getOpcode() == AMDGPU::S_XOR_B32) {
- if (shrinkScalarLogicOp(ST, MRI, TII, MI))
+ if (shrinkScalarLogicOp(MI))
continue;
}
if (TII->isMIMG(MI.getOpcode()) &&
- ST.getGeneration() >= AMDGPUSubtarget::GFX10 &&
+ ST->getGeneration() >= AMDGPUSubtarget::GFX10 &&
MF.getProperties().hasProperty(
MachineFunctionProperties::Property::NoVRegs)) {
shrinkMIMG(MI);
continue;
}
- if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
+ if (!TII->isVOP3(MI))
continue;
- if (!TII->canShrink(MI, MRI)) {
+ if (MI.getOpcode() == AMDGPU::V_MAD_F32_e64 ||
+ MI.getOpcode() == AMDGPU::V_FMA_F32_e64 ||
+ MI.getOpcode() == AMDGPU::V_MAD_F16_e64 ||
+ MI.getOpcode() == AMDGPU::V_FMA_F16_e64) {
+ shrinkMadFma(MI);
+ continue;
+ }
+
+ if (!TII->hasVALU32BitEncoding(MI.getOpcode())) {
+ // If there is no chance we will shrink it and use VCC as sdst to get
+ // a 32 bit form try to replace dead sdst with NULL.
+ tryReplaceDeadSDST(MI);
+ continue;
+ }
+
+ if (!TII->canShrink(MI, *MRI)) {
// Try commuting the instruction and see if that enables us to shrink
// it.
if (!MI.isCommutable() || !TII->commuteInstruction(MI) ||
- !TII->canShrink(MI, MRI))
+ !TII->canShrink(MI, *MRI)) {
+ tryReplaceDeadSDST(MI);
continue;
+ }
}
int Op32 = AMDGPU::getVOPe32(MI.getOpcode());
if (TII->isVOPC(Op32)) {
- Register DstReg = MI.getOperand(0).getReg();
- if (DstReg.isVirtual()) {
- // VOPC instructions can only write to the VCC register. We can't
- // force them to use VCC here, because this is only one register and
- // cannot deal with sequences which would require multiple copies of
- // VCC, e.g. S_AND_B64 (vcc = V_CMP_...), (vcc = V_CMP_...)
- //
- // So, instead of forcing the instruction to write to VCC, we provide
- // a hint to the register allocator to use VCC and then we will run
- // this pass again after RA and shrink it if it outputs to VCC.
- MRI.setRegAllocationHint(MI.getOperand(0).getReg(), 0, VCCReg);
- continue;
+ MachineOperand &Op0 = MI.getOperand(0);
+ if (Op0.isReg()) {
+ // Exclude VOPCX instructions as these don't explicitly write a
+ // dst.
+ Register DstReg = Op0.getReg();
+ if (DstReg.isVirtual()) {
+ // VOPC instructions can only write to the VCC register. We can't
+ // force them to use VCC here, because this is only one register and
+ // cannot deal with sequences which would require multiple copies of
+ // VCC, e.g. S_AND_B64 (vcc = V_CMP_...), (vcc = V_CMP_...)
+ //
+ // So, instead of forcing the instruction to write to VCC, we
+ // provide a hint to the register allocator to use VCC and then we
+ // will run this pass again after RA and shrink it if it outputs to
+ // VCC.
+ MRI->setRegAllocationHint(DstReg, 0, VCCReg);
+ continue;
+ }
+ if (DstReg != VCCReg)
+ continue;
}
- if (DstReg != VCCReg)
- continue;
}
if (Op32 == AMDGPU::V_CNDMASK_B32_e32) {
@@ -760,7 +909,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
continue;
Register SReg = Src2->getReg();
if (SReg.isVirtual()) {
- MRI.setRegAllocationHint(SReg, 0, VCCReg);
+ MRI->setRegAllocationHint(SReg, 0, VCCReg);
continue;
}
if (SReg != VCCReg)
@@ -776,7 +925,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
if (SDst->getReg() != VCCReg) {
if (SDst->getReg().isVirtual())
- MRI.setRegAllocationHint(SDst->getReg(), 0, VCCReg);
+ MRI->setRegAllocationHint(SDst->getReg(), 0, VCCReg);
Next = true;
}
@@ -786,7 +935,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
AMDGPU::OpName::src2);
if (Src2 && Src2->getReg() != VCCReg) {
if (Src2->getReg().isVirtual())
- MRI.setRegAllocationHint(Src2->getReg(), 0, VCCReg);
+ MRI->setRegAllocationHint(Src2->getReg(), 0, VCCReg);
Next = true;
}
@@ -801,14 +950,14 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
++NumInstructionsShrunk;
// Copy extra operands not present in the instruction definition.
- copyExtraImplicitOps(*Inst32, MF, MI);
+ copyExtraImplicitOps(*Inst32, MI);
// Copy deadness from the old explicit vcc def to the new implicit def.
if (SDst && SDst->isDead())
Inst32->findRegisterDefOperand(VCCReg)->setIsDead();
MI.eraseFromParent();
- foldImmediates(*Inst32, TII, MRI);
+ foldImmediates(*Inst32);
LLVM_DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n');
}
diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index 46efb3c605c6..a5798afab595 100644
--- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -349,8 +349,7 @@ void SIWholeQuadMode::markDefs(const MachineInstr &UseMI, LiveRange &LR,
const VNInfo *NextValue = nullptr;
const VisitKey Key(Value, DefinedLanes);
- if (!Visited.count(Key)) {
- Visited.insert(Key);
+ if (Visited.insert(Key).second) {
// On first visit to a phi then start processing first predecessor
NextPredIdx = 0;
}
@@ -535,13 +534,36 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
GlobalFlags |= StateStrictWWM;
LowerToMovInstrs.push_back(&MI);
continue;
- } else if (Opcode == AMDGPU::STRICT_WQM) {
+ } else if (Opcode == AMDGPU::STRICT_WQM ||
+ TII->isDualSourceBlendEXP(MI)) {
// STRICT_WQM is similar to STRICTWWM, but instead of enabling all
// threads of the wave like STRICTWWM, STRICT_WQM enables all threads in
// quads that have at least one active thread.
markInstructionUses(MI, StateStrictWQM, Worklist);
GlobalFlags |= StateStrictWQM;
- LowerToMovInstrs.push_back(&MI);
+
+ if (Opcode == AMDGPU::STRICT_WQM) {
+ LowerToMovInstrs.push_back(&MI);
+ } else {
+ // Dual source blend export acts as implicit strict-wqm, its sources
+ // need to be shuffled in strict wqm, but the export itself needs to
+ // run in exact mode.
+ BBI.Needs |= StateExact;
+ if (!(BBI.InNeeds & StateExact)) {
+ BBI.InNeeds |= StateExact;
+ Worklist.push_back(MBB);
+ }
+ GlobalFlags |= StateExact;
+ III.Disabled = StateWQM | StateStrict;
+ }
+ continue;
+ } else if (Opcode == AMDGPU::LDS_PARAM_LOAD ||
+ Opcode == AMDGPU::LDS_DIRECT_LOAD) {
+ // Mark these STRICTWQM, but only for the instruction, not its operands.
+ // This avoid unnecessarily marking M0 as requiring WQM.
+ InstrInfo &II = Instructions[&MI];
+ II.Needs |= StateStrictWQM;
+ GlobalFlags |= StateStrictWQM;
continue;
} else if (Opcode == AMDGPU::V_SET_INACTIVE_B32 ||
Opcode == AMDGPU::V_SET_INACTIVE_B64) {
@@ -969,7 +991,7 @@ MachineInstr *SIWholeQuadMode::lowerKillI1(MachineBasicBlock &MBB,
MachineInstr *WQMMaskMI = nullptr;
Register LiveMaskWQM;
if (IsDemote) {
- // Demotes deactive quads with only helper lanes
+ // Demote - deactivate quads with only helper lanes
LiveMaskWQM = MRI->createVirtualRegister(TRI->getBoolRC());
WQMMaskMI =
BuildMI(MBB, MI, DL, TII->get(WQMOpc), LiveMaskWQM).addReg(LiveMaskReg);
@@ -977,7 +999,7 @@ MachineInstr *SIWholeQuadMode::lowerKillI1(MachineBasicBlock &MBB,
.addReg(Exec)
.addReg(LiveMaskWQM);
} else {
- // Kills deactivate lanes
+ // Kill - deactivate lanes no longer in live mask
if (Op.isImm()) {
unsigned MovOpc = ST->isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
NewTerm = BuildMI(MBB, &MI, DL, TII->get(MovOpc), Exec).addImm(0);
@@ -1453,7 +1475,7 @@ void SIWholeQuadMode::lowerCopyInstrs() {
}
int Index = MI->findRegisterUseOperandIdx(AMDGPU::EXEC);
while (Index >= 0) {
- MI->RemoveOperand(Index);
+ MI->removeOperand(Index);
Index = MI->findRegisterUseOperandIdx(AMDGPU::EXEC);
}
MI->setDesc(TII->get(AMDGPU::COPY));
@@ -1468,7 +1490,7 @@ void SIWholeQuadMode::lowerCopyInstrs() {
// an undef input so it is being replaced by a simple copy.
// There should be a second undef source that we should remove.
assert(MI->getOperand(2).isUndef());
- MI->RemoveOperand(2);
+ MI->removeOperand(2);
MI->untieRegOperand(1);
} else {
assert(MI->getNumExplicitOperands() == 2);
@@ -1588,11 +1610,11 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
// Physical registers like SCC aren't tracked by default anyway, so just
// removing the ranges we computed is the simplest option for maintaining
// the analysis results.
- LIS->removeRegUnit(*MCRegUnitIterator(MCRegister::from(AMDGPU::SCC), TRI));
+ LIS->removeAllRegUnitsForPhysReg(AMDGPU::SCC);
// If we performed any kills then recompute EXEC
if (!KillInstrs.empty())
- LIS->removeRegUnit(*MCRegUnitIterator(AMDGPU::EXEC, TRI));
+ LIS->removeAllRegUnitsForPhysReg(AMDGPU::EXEC);
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td
index 184c871db775..882d13402a19 100644
--- a/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -11,13 +11,19 @@ def smrd_offset_8 : NamedOperandU32<"SMRDOffset8",
let OperandType = "OPERAND_IMMEDIATE";
}
-def smem_offset : NamedOperandU32<"SMEMOffset",
- NamedMatchClass<"SMEMOffset">> {
+class SMEMOffset : NamedOperandU32<"SMEMOffset",
+ NamedMatchClass<"SMEMOffset">> {
let OperandType = "OPERAND_IMMEDIATE";
let EncoderMethod = "getSMEMOffsetEncoding";
let DecoderMethod = "decodeSMEMOffset";
}
+def smem_offset : SMEMOffset;
+
+def smem_offset_mod : SMEMOffset {
+ let PrintMethod = "printSMEMOffsetMod";
+}
+
//===----------------------------------------------------------------------===//
// Scalar Memory classes
//===----------------------------------------------------------------------===//
@@ -43,13 +49,13 @@ class SM_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> patt
bits<1> has_sdst = 1;
bit has_glc = 0;
bit has_dlc = 0;
- bits<1> has_offset = 1;
- bits<1> offset_is_imm = 0;
+ bit has_offset = 0;
+ bit has_soffset = 0;
bit is_buffer = 0;
}
-class SM_Real <SM_Pseudo ps>
- : InstSI<ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []> {
+class SM_Real <SM_Pseudo ps, string opName = ps.Mnemonic>
+ : InstSI<ps.OutOperandList, ps.InOperandList, opName # ps.AsmOperands> {
let isPseudo = 0;
let isCodeGenOnly = 0;
@@ -77,20 +83,40 @@ class SM_Real <SM_Pseudo ps>
bits<7> sbase;
bits<7> sdst;
bits<32> offset;
- bits<1> imm = !if(ps.has_offset, ps.offset_is_imm, 0);
+ bits<8> soffset;
bits<5> cpol;
}
-class SM_Probe_Pseudo <string opName, dag ins, bit isImm>
- : SM_Pseudo<opName, (outs), ins, " $sdata, $sbase, $offset"> {
+class OffsetMode<bit hasOffset, bit hasSOffset, string variant,
+ dag ins, string asm> {
+ bit HasOffset = hasOffset;
+ bit HasSOffset = hasSOffset;
+ string Variant = variant;
+ dag Ins = ins;
+ string Asm = asm;
+}
+
+def IMM_Offset : OffsetMode<1, 0, "_IMM", (ins smem_offset:$offset), "$offset">;
+def SGPR_Offset : OffsetMode<0, 1, "_SGPR", (ins SReg_32:$soffset), "$soffset">;
+def SGPR_IMM_Offset : OffsetMode<1, 1, "_SGPR_IMM",
+ (ins SReg_32:$soffset, smem_offset_mod:$offset),
+ "$soffset$offset">;
+
+class SM_Probe_Pseudo <string opName, string variant, RegisterClass baseClass,
+ dag offsets, string asmOffsets,
+ bit hasOffset, bit hasSOffset>
+ : SM_Pseudo<opName, (outs),
+ !con((ins i8imm:$sdata, baseClass:$sbase), offsets),
+ " $sdata, $sbase, " # asmOffsets> {
let mayLoad = 0;
let mayStore = 0;
let has_glc = 0;
let LGKM_CNT = 0;
let ScalarStore = 0;
let hasSideEffects = 1;
- let offset_is_imm = isImm;
- let PseudoInstr = opName # !if(isImm, "_IMM", "_SGPR");
+ let has_offset = hasOffset;
+ let has_soffset = hasSOffset;
+ let PseudoInstr = opName # variant;
}
class SM_Load_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> pattern=[]>
@@ -102,10 +128,11 @@ class SM_Load_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag>
let has_dlc = 1;
}
-class SM_Store_Pseudo <string opName, dag ins, string asmOps, list<dag> pattern = []>
- : SM_Pseudo<opName, (outs), ins, asmOps, pattern> {
- RegisterClass BaseClass;
- RegisterClass SrcClass;
+class SM_Store_Pseudo <string opName, RegisterClass baseClass,
+ RegisterClass srcClass, dag ins, string asmOps>
+ : SM_Pseudo<opName, (outs), ins, asmOps, []> {
+ RegisterClass BaseClass = baseClass;
+ RegisterClass SrcClass = srcClass;
let mayLoad = 0;
let mayStore = 1;
let has_glc = 1;
@@ -113,16 +140,19 @@ class SM_Store_Pseudo <string opName, dag ins, string asmOps, list<dag> pattern
let ScalarStore = 1;
}
-class SM_Discard_Pseudo <string opName, dag ins, bit isImm>
- : SM_Pseudo<opName, (outs), ins, " $sbase, $offset"> {
+class SM_Discard_Pseudo <string opName, string variant, dag offsets,
+ string asmOffsets, bit hasOffset, bit hasSOffset>
+ : SM_Pseudo<opName, (outs), !con((ins SReg_64:$sbase), offsets),
+ " $sbase, " # asmOffsets> {
let mayLoad = 0;
let mayStore = 0;
let has_glc = 0;
let has_sdst = 0;
let ScalarStore = 0;
let hasSideEffects = 1;
- let offset_is_imm = isImm;
- let PseudoInstr = opName # !if(isImm, "_IMM", "_SGPR");
+ let has_offset = hasOffset;
+ let has_soffset = hasSOffset;
+ let PseudoInstr = opName # variant;
}
multiclass SM_Pseudo_Loads<string opName,
@@ -132,7 +162,7 @@ multiclass SM_Pseudo_Loads<string opName,
(outs dstClass:$sdst),
(ins baseClass:$sbase, i32imm:$offset, CPol:$cpol),
" $sdst, $sbase, $offset$cpol", []> {
- let offset_is_imm = 1;
+ let has_offset = 1;
let BaseClass = baseClass;
let PseudoInstr = opName # "_IMM";
let has_glc = 1;
@@ -141,39 +171,63 @@ multiclass SM_Pseudo_Loads<string opName,
def _SGPR : SM_Load_Pseudo <opName,
(outs dstClass:$sdst),
- (ins baseClass:$sbase, SReg_32:$soff, CPol:$cpol),
- " $sdst, $sbase, $offset$cpol", []> {
+ (ins baseClass:$sbase, SReg_32:$soffset, CPol:$cpol),
+ " $sdst, $sbase, $soffset$cpol", []> {
+ let has_soffset = 1;
let BaseClass = baseClass;
let PseudoInstr = opName # "_SGPR";
let has_glc = 1;
let has_dlc = 1;
}
+
+ def _SGPR_IMM : SM_Load_Pseudo <opName,
+ (outs dstClass:$sdst),
+ (ins baseClass:$sbase, SReg_32:$soffset,
+ i32imm:$offset, CPol:$cpol),
+ " $sdst, $sbase, $soffset$offset$cpol", []> {
+ let has_offset = 1;
+ let has_soffset = 1;
+ let BaseClass = baseClass;
+ let PseudoInstr = opName # "_SGPR_IMM";
+ let has_glc = 1;
+ let has_dlc = 1;
+ }
}
multiclass SM_Pseudo_Stores<string opName,
RegisterClass baseClass,
RegisterClass srcClass> {
- def _IMM : SM_Store_Pseudo <opName,
+ def _IMM : SM_Store_Pseudo <opName, baseClass, srcClass,
(ins srcClass:$sdata, baseClass:$sbase, i32imm:$offset, CPol:$cpol),
- " $sdata, $sbase, $offset$cpol", []> {
- let offset_is_imm = 1;
- let BaseClass = baseClass;
- let SrcClass = srcClass;
+ " $sdata, $sbase, $offset$cpol"> {
+ let has_offset = 1;
let PseudoInstr = opName # "_IMM";
}
- def _SGPR : SM_Store_Pseudo <opName,
- (ins srcClass:$sdata, baseClass:$sbase, SReg_32:$soff, CPol:$cpol),
- " $sdata, $sbase, $offset$cpol", []> {
- let BaseClass = baseClass;
- let SrcClass = srcClass;
+ def _SGPR : SM_Store_Pseudo <opName, baseClass, srcClass,
+ (ins srcClass:$sdata, baseClass:$sbase, SReg_32:$soffset, CPol:$cpol),
+ " $sdata, $sbase, $soffset$cpol"> {
+ let has_soffset = 1;
let PseudoInstr = opName # "_SGPR";
}
+
+ def _SGPR_IMM : SM_Store_Pseudo <opName, baseClass, srcClass,
+ (ins srcClass:$sdata, baseClass:$sbase, SReg_32:$soffset, i32imm:$offset,
+ CPol:$cpol),
+ " $sdata, $sbase, $soffset$offset$cpol"> {
+ let has_offset = 1;
+ let has_soffset = 1;
+ let PseudoInstr = opName # "_SGPR_IMM";
+ }
}
multiclass SM_Pseudo_Discards<string opName> {
- def _IMM : SM_Discard_Pseudo <opName, (ins SReg_64:$sbase, smem_offset:$offset), 1>;
- def _SGPR : SM_Discard_Pseudo <opName, (ins SReg_64:$sbase, SReg_32:$offset), 0>;
+ def _IMM : SM_Discard_Pseudo <opName, "_IMM",
+ (ins smem_offset:$offset), "$offset", 1, 0>;
+ def _SGPR : SM_Discard_Pseudo <opName, "_SGPR",
+ (ins SReg_32:$soffset), "$soffset", 0, 1>;
+ def _SGPR_IMM : SM_Discard_Pseudo <opName, "_SGPR_IMM",
+ (ins SReg_32:$soffset, smem_offset_mod:$offset), "$soffset$offset", 1, 1>;
}
class SM_Time_Pseudo<string opName, SDPatternOperator node = null_frag> : SM_Pseudo<
@@ -184,21 +238,24 @@ class SM_Time_Pseudo<string opName, SDPatternOperator node = null_frag> : SM_Pse
let mayStore = 0;
let mayLoad = 0;
let has_sbase = 0;
- let has_offset = 0;
}
class SM_Inval_Pseudo <string opName, SDPatternOperator node = null_frag> : SM_Pseudo<
opName, (outs), (ins), "", [(node)]> {
let hasSideEffects = 1;
+ let mayLoad = 0;
let mayStore = 0;
let has_sdst = 0;
let has_sbase = 0;
- let has_offset = 0;
}
multiclass SM_Pseudo_Probe<string opName, RegisterClass baseClass> {
- def _IMM : SM_Probe_Pseudo <opName, (ins i8imm:$sdata, baseClass:$sbase, smem_offset:$offset), 1>;
- def _SGPR : SM_Probe_Pseudo <opName, (ins i8imm:$sdata, baseClass:$sbase, SReg_32:$offset), 0>;
+ def _IMM : SM_Probe_Pseudo <opName, "_IMM", baseClass,
+ (ins smem_offset:$offset), "$offset", 1, 0>;
+ def _SGPR : SM_Probe_Pseudo <opName, "_SGPR", baseClass,
+ (ins SReg_32:$soffset), "$soffset", 0, 1>;
+ def _SGPR_IMM : SM_Probe_Pseudo <opName, "_SGPR_IMM", baseClass,
+ (ins SReg_32:$soffset, smem_offset_mod:$offset), "$soffset$offset", 1, 1>;
}
class SM_WaveId_Pseudo<string opName, SDPatternOperator node> : SM_Pseudo<
@@ -206,9 +263,8 @@ class SM_WaveId_Pseudo<string opName, SDPatternOperator node> : SM_Pseudo<
" $sdst", [(set i32:$sdst, (node))]> {
let hasSideEffects = 1;
let mayStore = 0;
- let mayLoad = 1;
+ let mayLoad = 0;
let has_sbase = 0;
- let has_offset = 0;
}
//===----------------------------------------------------------------------===//
@@ -225,6 +281,7 @@ class SM_Atomic_Pseudo <string opName,
let mayStore = 1;
let has_glc = 1;
let has_dlc = 1;
+ let has_soffset = 1;
// Should these be set?
let ScalarStore = 1;
@@ -240,21 +297,21 @@ class SM_Atomic_Pseudo <string opName,
class SM_Pseudo_Atomic<string opName,
RegisterClass baseClass,
RegisterClass dataClass,
- bit isImm,
+ OffsetMode offsets,
bit isRet,
- string opNameWithSuffix = opName # !if(isImm,
- !if(isRet, "_IMM_RTN", "_IMM"),
- !if(isRet, "_SGPR_RTN", "_SGPR")),
+ string opNameWithSuffix =
+ opName # offsets.Variant # !if(isRet, "_RTN", ""),
Operand CPolTy = !if(isRet, CPol_GLC1, CPol)> :
SM_Atomic_Pseudo<opName,
!if(isRet, (outs dataClass:$sdst), (outs)),
- !if(isImm,
- (ins dataClass:$sdata, baseClass:$sbase, smem_offset:$offset, CPolTy:$cpol),
- (ins dataClass:$sdata, baseClass:$sbase, SReg_32:$offset, CPolTy:$cpol)),
- !if(isRet, " $sdst", " $sdata") # ", $sbase, $offset$cpol",
+ !con((ins dataClass:$sdata, baseClass:$sbase), offsets.Ins,
+ (ins CPolTy:$cpol)),
+ !if(isRet, " $sdst", " $sdata") #
+ ", $sbase, " # offsets.Asm # "$cpol",
isRet>,
AtomicNoRet <opNameWithSuffix, isRet> {
- let offset_is_imm = isImm;
+ let has_offset = offsets.HasOffset;
+ let has_soffset = offsets.HasSOffset;
let PseudoInstr = opNameWithSuffix;
let Constraints = !if(isRet, "$sdst = $sdata", "");
@@ -264,10 +321,12 @@ class SM_Pseudo_Atomic<string opName,
multiclass SM_Pseudo_Atomics<string opName,
RegisterClass baseClass,
RegisterClass dataClass> {
- def _IMM : SM_Pseudo_Atomic <opName, baseClass, dataClass, 1, 0>;
- def _SGPR : SM_Pseudo_Atomic <opName, baseClass, dataClass, 0, 0>;
- def _IMM_RTN : SM_Pseudo_Atomic <opName, baseClass, dataClass, 1, 1>;
- def _SGPR_RTN : SM_Pseudo_Atomic <opName, baseClass, dataClass, 0, 1>;
+ def _IMM : SM_Pseudo_Atomic <opName, baseClass, dataClass, IMM_Offset, 0>;
+ def _SGPR : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_Offset, 0>;
+ def _SGPR_IMM : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_IMM_Offset, 0>;
+ def _IMM_RTN : SM_Pseudo_Atomic <opName, baseClass, dataClass, IMM_Offset, 1>;
+ def _SGPR_RTN : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_Offset, 1>;
+ def _SGPR_IMM_RTN : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_IMM_Offset, 1>;
}
//===----------------------------------------------------------------------===//
@@ -452,16 +511,14 @@ class SMRD_Real_si <bits<5> op, SM_Pseudo ps>
let AssemblerPredicate = isGFX6GFX7;
let DecoderNamespace = "GFX6GFX7";
- let Inst{7-0} = !if(ps.has_offset, offset{7-0}, ?);
- let Inst{8} = imm;
+ let Inst{7-0} = !if(ps.has_offset, offset{7-0}, !if(ps.has_soffset, soffset, ?));
+ let Inst{8} = ps.has_offset;
let Inst{14-9} = !if(ps.has_sbase, sbase{6-1}, ?);
let Inst{21-15} = !if(ps.has_sdst, sdst{6-0}, ?);
let Inst{26-22} = op;
let Inst{31-27} = 0x18; //encoding
}
-// FIXME: Assembler should reject trying to use glc on SMRD
-// instructions on SI.
multiclass SM_Real_Loads_si<bits<5> op, string ps,
SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM),
SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
@@ -470,10 +527,8 @@ multiclass SM_Real_Loads_si<bits<5> op, string ps,
let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_8:$offset, CPol:$cpol);
}
- // FIXME: The operand name $offset is inconsistent with $soff used
- // in the pseudo
def _SGPR_si : SMRD_Real_si <op, sgprPs> {
- let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, CPol:$cpol);
+ let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$soffset, CPol:$cpol);
}
}
@@ -494,42 +549,82 @@ def S_DCACHE_INV_si : SMRD_Real_si <0x1f, S_DCACHE_INV>;
//===----------------------------------------------------------------------===//
-// VI
+// VI and GFX9.
//===----------------------------------------------------------------------===//
class SMEM_Real_vi <bits<8> op, SM_Pseudo ps>
: SM_Real<ps>
, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.VI>
, Enc64 {
- let AssemblerPredicate = isGFX8GFX9;
+ field bit IsGFX9SpecificEncoding = false;
+ let AssemblerPredicate = !if(IsGFX9SpecificEncoding, isGFX9Only, isGFX8GFX9);
let DecoderNamespace = "GFX8";
let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?);
let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?);
+ // Note that for GFX9 instructions with immediate offsets, soffset_en
+ // must be defined, whereas in GFX8 it's undefined in all cases,
+ // meaning GFX9 is not perfectly backward-compatible with GFX8, despite
+ // documentation suggesting otherwise.
+ field bit SOffsetEn = !if(IsGFX9SpecificEncoding,
+ !if(ps.has_offset, ps.has_soffset, !if(ps.has_soffset, 0, ?)),
+ ?);
+ let Inst{14} = SOffsetEn;
+
let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ?);
- let Inst{17} = imm;
+
+ // imm
+ // TODO: Shall not be defined if the instruction has no offset nor
+ // soffset.
+ let Inst{17} = ps.has_offset;
+
let Inst{25-18} = op;
let Inst{31-26} = 0x30; //encoding
// VI supports 20-bit unsigned offsets while GFX9+ supports 21-bit signed.
// Offset value is corrected accordingly when offset is encoded/decoded.
- let Inst{38-32} = !if(ps.has_offset, offset{6-0}, ?);
- let Inst{52-39} = !if(ps.has_offset, !if(imm, offset{20-7}, ?), ?);
+ // TODO: Forbid non-M0 register offsets for GFX8 stores and atomics.
+ field bits<21> Offset;
+ let Offset{6-0} = !if(ps.has_offset, offset{6-0},
+ !if(ps.has_soffset, soffset{6-0}, ?));
+ let Offset{20-7} = !if(ps.has_offset, offset{20-7}, ?);
+ let Inst{52-32} = Offset;
+
+ // soffset
+ let Inst{63-57} = !if(!and(IsGFX9SpecificEncoding, ps.has_soffset),
+ soffset{6-0}, ?);
}
-multiclass SM_Real_Loads_vi<bits<8> op, string ps,
- SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM),
- SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
- def _IMM_vi : SMEM_Real_vi <op, immPs> {
- let InOperandList = (ins immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol);
- }
- def _SGPR_vi : SMEM_Real_vi <op, sgprPs> {
- let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, CPol:$cpol);
- }
+class SMEM_Real_Load_vi<bits<8> op, string ps, dag offsets>
+ : SMEM_Real_vi<op, !cast<SM_Pseudo>(ps)> {
+ RegisterClass BaseClass = !cast<SM_Load_Pseudo>(ps).BaseClass;
+ let InOperandList = !con((ins BaseClass:$sbase), offsets, (ins CPol:$cpol));
}
-class SMEM_Real_Store_vi <bits<8> op, SM_Pseudo ps> : SMEM_Real_vi <op, ps> {
+// The alternative GFX9 SGPR encoding using soffset to encode the
+// offset register. Not available in assembler and goes to the GFX9
+// encoding family to avoid conflicts with the primary SGPR variant.
+class SMEM_Real_SGPR_alt_gfx9 {
+ bit IsGFX9SpecificEncoding = true;
+ bit SOffsetEn = 1;
+ bit Offset = ?;
+ int Subtarget = SIEncodingFamily.GFX9;
+ string AsmVariantName = "NonParsable";
+}
+
+multiclass SM_Real_Loads_vi<bits<8> op, string ps> {
+ def _IMM_vi : SMEM_Real_Load_vi <op, ps#"_IMM", (ins smem_offset:$offset)>;
+ def _SGPR_vi : SMEM_Real_Load_vi <op, ps#"_SGPR", (ins SReg_32:$soffset)>;
+ def _SGPR_alt_gfx9 : SMEM_Real_Load_vi <op, ps#"_SGPR",
+ (ins SReg_32:$soffset)>,
+ SMEM_Real_SGPR_alt_gfx9;
+ let IsGFX9SpecificEncoding = true in
+ def _SGPR_IMM_gfx9 : SMEM_Real_Load_vi <
+ op, ps#"_SGPR_IMM", (ins SReg_32:$soffset, smem_offset_mod:$offset)>;
+}
+
+class SMEM_Real_Store_Base_vi <bits<8> op, SM_Pseudo ps> : SMEM_Real_vi <op, ps> {
// encoding
bits<7> sdata;
@@ -537,23 +632,34 @@ class SMEM_Real_Store_vi <bits<8> op, SM_Pseudo ps> : SMEM_Real_vi <op, ps> {
let Inst{12-6} = !if(ps.has_sdst, sdata{6-0}, ?);
}
-multiclass SM_Real_Stores_vi<bits<8> op, string ps,
- SM_Store_Pseudo immPs = !cast<SM_Store_Pseudo>(ps#_IMM),
- SM_Store_Pseudo sgprPs = !cast<SM_Store_Pseudo>(ps#_SGPR)> {
- // FIXME: The operand name $offset is inconsistent with $soff used
- // in the pseudo
- def _IMM_vi : SMEM_Real_Store_vi <op, immPs> {
- let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol);
- }
+class SMEM_Real_Store_vi <bits<8> op, string ps, dag offsets>
+ : SMEM_Real_Store_Base_vi <op, !cast<SM_Pseudo>(ps)> {
+ RegisterClass SrcClass = !cast<SM_Store_Pseudo>(ps).SrcClass;
+ RegisterClass BaseClass = !cast<SM_Store_Pseudo>(ps).BaseClass;
+ let InOperandList = !con((ins SrcClass:$sdata, BaseClass:$sbase),
+ offsets, (ins CPol:$cpol));
+}
- def _SGPR_vi : SMEM_Real_Store_vi <op, sgprPs> {
- let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, CPol:$cpol);
- }
+multiclass SM_Real_Stores_vi<bits<8> op, string ps> {
+ def _IMM_vi : SMEM_Real_Store_vi <op, ps#_IMM, (ins smem_offset:$offset)>;
+ def _SGPR_vi : SMEM_Real_Store_vi <op, ps#_SGPR, (ins SReg_32:$soffset)>;
+ def _SGPR_alt_gfx9 : SMEM_Real_Store_vi <op, ps#"_SGPR",
+ (ins SReg_32:$soffset)>,
+ SMEM_Real_SGPR_alt_gfx9;
+ let IsGFX9SpecificEncoding = true in
+ def _SGPR_IMM_gfx9 : SMEM_Real_Store_vi <
+ op, ps#"_SGPR_IMM", (ins SReg_32:$soffset, smem_offset_mod:$offset)>;
}
multiclass SM_Real_Probe_vi<bits<8> op, string ps> {
- def _IMM_vi : SMEM_Real_Store_vi <op, !cast<SM_Probe_Pseudo>(ps#_IMM)>;
- def _SGPR_vi : SMEM_Real_Store_vi <op, !cast<SM_Probe_Pseudo>(ps#_SGPR)>;
+ def _IMM_vi : SMEM_Real_Store_Base_vi <op, !cast<SM_Probe_Pseudo>(ps#_IMM)>;
+ def _SGPR_vi : SMEM_Real_Store_Base_vi <op, !cast<SM_Probe_Pseudo>(ps#_SGPR)>;
+ def _SGPR_alt_gfx9
+ : SMEM_Real_Store_Base_vi <op, !cast<SM_Probe_Pseudo>(ps#_SGPR)>,
+ SMEM_Real_SGPR_alt_gfx9;
+ let IsGFX9SpecificEncoding = true in
+ def _SGPR_IMM_gfx9
+ : SMEM_Real_Store_Base_vi <op, !cast<SM_Probe_Pseudo>(ps#_SGPR_IMM)>;
}
defm S_LOAD_DWORD : SM_Real_Loads_vi <0x00, "S_LOAD_DWORD">;
@@ -614,8 +720,20 @@ class SMEM_Atomic_Real_vi <bits<8> op, SM_Atomic_Pseudo ps>
multiclass SM_Real_Atomics_vi<bits<8> op, string ps> {
def _IMM_vi : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_IMM)>;
def _SGPR_vi : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>;
+ def _SGPR_alt_gfx9
+ : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>,
+ SMEM_Real_SGPR_alt_gfx9;
+ let IsGFX9SpecificEncoding = true in
+ def _SGPR_IMM_gfx9
+ : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM)>;
def _IMM_RTN_vi : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_IMM_RTN)>;
def _SGPR_RTN_vi : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>;
+ def _SGPR_RTN_alt_gfx9
+ : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>,
+ SMEM_Real_SGPR_alt_gfx9;
+ let IsGFX9SpecificEncoding = true in
+ def _SGPR_IMM_RTN_gfx9
+ : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM_RTN)>;
}
defm S_BUFFER_ATOMIC_SWAP : SM_Real_Atomics_vi <0x40, "S_BUFFER_ATOMIC_SWAP">;
@@ -677,6 +795,10 @@ defm S_ATOMIC_DEC_X2 : SM_Real_Atomics_vi <0xac, "S_ATOMIC_DEC_X2">
multiclass SM_Real_Discard_vi<bits<8> op, string ps> {
def _IMM_vi : SMEM_Real_vi <op, !cast<SM_Discard_Pseudo>(ps#_IMM)>;
def _SGPR_vi : SMEM_Real_vi <op, !cast<SM_Discard_Pseudo>(ps#_SGPR)>;
+ def _SGPR_alt_gfx9 : SMEM_Real_vi <op, !cast<SM_Discard_Pseudo>(ps#_SGPR)>,
+ SMEM_Real_SGPR_alt_gfx9;
+ let IsGFX9SpecificEncoding = true in
+ def _SGPR_IMM_gfx9 : SMEM_Real_vi <op, !cast<SM_Discard_Pseudo>(ps#_SGPR_IMM)>;
}
defm S_DCACHE_DISCARD : SM_Real_Discard_vi <0x28, "S_DCACHE_DISCARD">;
@@ -727,8 +849,8 @@ class SMRD_Real_ci <bits<5> op, SM_Pseudo ps>
let AssemblerPredicate = isGFX7Only;
let DecoderNamespace = "GFX7";
- let Inst{7-0} = !if(ps.has_offset, offset{7-0}, ?);
- let Inst{8} = imm;
+ let Inst{7-0} = !if(ps.has_offset, offset{7-0}, !if(ps.has_soffset, soffset, ?));
+ let Inst{8} = ps.has_offset;
let Inst{14-9} = !if(ps.has_sbase, sbase{6-1}, ?);
let Inst{21-15} = !if(ps.has_sdst, sdst{6-0}, ?);
let Inst{26-22} = op;
@@ -876,20 +998,27 @@ def : GCNPat <
// GFX10.
//===----------------------------------------------------------------------===//
-class SMEM_Real_gfx10<bits<8> op, SM_Pseudo ps> :
- SM_Real<ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10>, Enc64 {
- let AssemblerPredicate = isGFX10Plus;
- let DecoderNamespace = "GFX10";
-
+class SMEM_Real_10Plus_common<bits<8> op, SM_Pseudo ps, string opName,
+ int subtarget, RegisterWithSubRegs sgpr_null> :
+ SM_Real<ps, opName>, SIMCInstr<ps.PseudoInstr, subtarget>, Enc64 {
let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?);
let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?);
- let Inst{14} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ?);
- let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ?);
let Inst{25-18} = op;
let Inst{31-26} = 0x3d;
- let Inst{52-32} = !if(ps.offset_is_imm, !if(ps.has_offset, offset{20-0}, ?), ?);
- let Inst{63-57} = !if(ps.offset_is_imm, !cast<int>(SGPR_NULL.HWEncoding),
- !if(ps.has_offset, offset{6-0}, ?));
+ // There are SMEM instructions that do not employ any of the offset
+ // fields, in which case we need them to remain undefined.
+ let Inst{52-32} = !if(ps.has_offset, offset{20-0}, !if(ps.has_soffset, 0, ?));
+ let Inst{63-57} = !if(ps.has_soffset, soffset{6-0},
+ !if(ps.has_offset, sgpr_null.HWEncoding{6-0}, ?));
+}
+
+class SMEM_Real_gfx10<bits<8> op, SM_Pseudo ps>
+ : SMEM_Real_10Plus_common<op, ps, ps.Mnemonic, SIEncodingFamily.GFX10,
+ SGPR_NULL_gfxpre11> {
+ let AssemblerPredicate = isGFX10Only;
+ let DecoderNamespace = "GFX10";
+ let Inst{14} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ?);
+ let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ?);
}
multiclass SM_Real_Loads_gfx10<bits<8> op, string ps,
@@ -899,7 +1028,11 @@ multiclass SM_Real_Loads_gfx10<bits<8> op, string ps,
let InOperandList = (ins immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol);
}
def _SGPR_gfx10 : SMEM_Real_gfx10<op, sgprPs> {
- let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, CPol:$cpol);
+ let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$soffset, CPol:$cpol);
+ }
+ def _SGPR_IMM_gfx10 : SMEM_Real_gfx10<op, !cast<SM_Load_Pseudo>(ps#_SGPR_IMM)> {
+ let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$soffset,
+ smem_offset_mod:$offset, CPol:$cpol);
}
}
@@ -913,14 +1046,17 @@ class SMEM_Real_Store_gfx10<bits<8> op, SM_Pseudo ps> : SMEM_Real_gfx10<op, ps>
multiclass SM_Real_Stores_gfx10<bits<8> op, string ps,
SM_Store_Pseudo immPs = !cast<SM_Store_Pseudo>(ps#_IMM),
SM_Store_Pseudo sgprPs = !cast<SM_Store_Pseudo>(ps#_SGPR)> {
- // FIXME: The operand name $offset is inconsistent with $soff used
- // in the pseudo
def _IMM_gfx10 : SMEM_Real_Store_gfx10 <op, immPs> {
let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol);
}
def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, sgprPs> {
- let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, CPol:$cpol);
+ let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$soffset, CPol:$cpol);
+ }
+
+ def _SGPR_IMM_gfx10 : SMEM_Real_Store_gfx10 <op, !cast<SM_Store_Pseudo>(ps#_SGPR_IMM)> {
+ let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase,
+ SReg_32:$soffset, smem_offset_mod:$offset, CPol:$cpol);
}
}
@@ -969,6 +1105,8 @@ def S_DCACHE_WB_gfx10 : SMEM_Real_gfx10<0x021, S_DCACHE_WB>;
multiclass SM_Real_Probe_gfx10<bits<8> op, string ps> {
def _IMM_gfx10 : SMEM_Real_Store_gfx10 <op, !cast<SM_Pseudo>(ps#_IMM)>;
def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR)>;
+ def _SGPR_IMM_gfx10
+ : SMEM_Real_Store_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR_IMM)>;
}
defm S_ATC_PROBE : SM_Real_Probe_gfx10 <0x26, "S_ATC_PROBE">;
@@ -992,8 +1130,10 @@ class SMEM_Atomic_Real_gfx10 <bits<8> op, SM_Atomic_Pseudo ps>
multiclass SM_Real_Atomics_gfx10<bits<8> op, string ps> {
def _IMM_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_IMM)>;
def _SGPR_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>;
+ def _SGPR_IMM_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM)>;
def _IMM_RTN_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_IMM_RTN)>;
def _SGPR_RTN_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>;
+ def _SGPR_IMM_RTN_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM_RTN)>;
}
let SubtargetPredicate = HasScalarAtomics in {
@@ -1057,6 +1197,7 @@ defm S_ATOMIC_DEC_X2 : SM_Real_Atomics_gfx10 <0xac, "S_ATOMIC_DEC_X
multiclass SM_Real_Discard_gfx10<bits<8> op, string ps> {
def _IMM_gfx10 : SMEM_Real_gfx10 <op, !cast<SM_Pseudo>(ps#_IMM)>;
def _SGPR_gfx10 : SMEM_Real_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR)>;
+ def _SGPR_IMM_gfx10 : SMEM_Real_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR_IMM)>;
}
defm S_DCACHE_DISCARD : SM_Real_Discard_gfx10 <0x28, "S_DCACHE_DISCARD">;
@@ -1072,3 +1213,64 @@ def SMInfoTable : GenericTable {
let PrimaryKey = ["Opcode"];
let PrimaryKeyName = "getSMEMOpcodeHelper";
}
+
+//===----------------------------------------------------------------------===//
+// GFX11.
+//===----------------------------------------------------------------------===//
+
+class SMEM_Real_gfx11<bits<8> op, SM_Pseudo ps, string opName = ps.Mnemonic> :
+ SMEM_Real_10Plus_common<op, ps, opName, SIEncodingFamily.GFX11,
+ SGPR_NULL_gfx11plus> {
+ let AssemblerPredicate = isGFX11Plus;
+ let DecoderNamespace = "GFX11";
+ let Inst{13} = !if(ps.has_dlc, cpol{CPolBit.DLC}, 0);
+ let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, 0);
+}
+
+class SMEM_Real_Load_gfx11<bits<8> op, string ps, string opName, dag offsets> :
+ SMEM_Real_gfx11<op, !cast<SM_Pseudo>(ps), opName> {
+ RegisterClass BaseClass = !cast<SM_Load_Pseudo>(ps).BaseClass;
+ let InOperandList = !con((ins BaseClass:$sbase), offsets, (ins CPol:$cpol));
+}
+
+multiclass SM_Real_Loads_gfx11<bits<8> op, string ps, string opName> {
+ def _IMM_gfx11 : SMEM_Real_Load_gfx11<op, ps#"_IMM", opName, (ins smem_offset:$offset)>;
+ def _SGPR_gfx11 : SMEM_Real_Load_gfx11<op, ps#"_SGPR", opName, (ins SReg_32:$soffset)>;
+ def _SGPR_IMM_gfx11 : SMEM_Real_Load_gfx11<
+ op, ps#"_SGPR_IMM", opName, (ins SReg_32:$soffset, smem_offset_mod:$offset)>;
+ def : MnemonicAlias<!cast<SM_Pseudo>(ps#"_IMM").Mnemonic, opName>,
+ Requires<[isGFX11Plus]>;
+}
+
+defm S_LOAD_B32 : SM_Real_Loads_gfx11<0x000, "S_LOAD_DWORD", "s_load_b32">;
+defm S_LOAD_B64 : SM_Real_Loads_gfx11<0x001, "S_LOAD_DWORDX2", "s_load_b64">;
+defm S_LOAD_B128 : SM_Real_Loads_gfx11<0x002, "S_LOAD_DWORDX4", "s_load_b128">;
+defm S_LOAD_B256 : SM_Real_Loads_gfx11<0x003, "S_LOAD_DWORDX8", "s_load_b256">;
+defm S_LOAD_B512 : SM_Real_Loads_gfx11<0x004, "S_LOAD_DWORDX16", "s_load_b512">;
+
+defm S_BUFFER_LOAD_B32 : SM_Real_Loads_gfx11<0x008, "S_BUFFER_LOAD_DWORD", "s_buffer_load_b32">;
+defm S_BUFFER_LOAD_B64 : SM_Real_Loads_gfx11<0x009, "S_BUFFER_LOAD_DWORDX2", "s_buffer_load_b64">;
+defm S_BUFFER_LOAD_B128 : SM_Real_Loads_gfx11<0x00a, "S_BUFFER_LOAD_DWORDX4", "s_buffer_load_b128">;
+defm S_BUFFER_LOAD_B256 : SM_Real_Loads_gfx11<0x00b, "S_BUFFER_LOAD_DWORDX8", "s_buffer_load_b256">;
+defm S_BUFFER_LOAD_B512 : SM_Real_Loads_gfx11<0x00c, "S_BUFFER_LOAD_DWORDX16", "s_buffer_load_b512">;
+
+def S_GL1_INV_gfx11 : SMEM_Real_gfx11<0x020, S_GL1_INV>;
+def S_DCACHE_INV_gfx11 : SMEM_Real_gfx11<0x021, S_DCACHE_INV>;
+
+class SMEM_Real_Store_gfx11 <bits<8> op, SM_Pseudo ps> : SMEM_Real_gfx11<op, ps> {
+ // encoding
+ bits<7> sdata;
+
+ let sdst = ?;
+ let Inst{12-6} = !if(ps.has_sdst, sdata{6-0}, ?);
+}
+
+multiclass SM_Real_Probe_gfx11<bits<8> op, string ps> {
+ def _IMM_gfx11 : SMEM_Real_Store_gfx11 <op, !cast<SM_Probe_Pseudo>(ps#_IMM)>;
+ def _SGPR_gfx11 : SMEM_Real_Store_gfx11 <op, !cast<SM_Probe_Pseudo>(ps#_SGPR)>;
+ def _SGPR_IMM_gfx11
+ : SMEM_Real_Store_gfx11 <op, !cast<SM_Probe_Pseudo>(ps#_SGPR_IMM)>;
+}
+
+defm S_ATC_PROBE : SM_Real_Probe_gfx11 <0x22, "S_ATC_PROBE">;
+defm S_ATC_PROBE_BUFFER : SM_Real_Probe_gfx11 <0x23, "S_ATC_PROBE_BUFFER">;
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 3f7837f7dbf1..37d20045adb5 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -152,8 +152,8 @@ class SOP1_64_0 <string opName, list<dag> pattern=[]> : SOP1_Pseudo <
}
// 64-bit input, no output
-class SOP1_1 <string opName, RegisterClass rc = SReg_64, list<dag> pattern=[]> : SOP1_Pseudo <
- opName, (outs), (ins rc:$src0), "$src0", pattern> {
+class SOP1_1 <string opName, list<dag> pattern=[]> : SOP1_Pseudo <
+ opName, (outs), (ins SReg_64:$src0), "$src0", pattern> {
let has_sdst = 0;
}
@@ -235,10 +235,10 @@ def : GCNPat <
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
def S_BREV_B32 : SOP1_32 <"s_brev_b32",
- [(set i32:$sdst, (bitreverse i32:$src0))]
+ [(set i32:$sdst, (UniformUnaryFrag<bitreverse> i32:$src0))]
>;
def S_BREV_B64 : SOP1_64 <"s_brev_b64",
- [(set i64:$sdst, (bitreverse i64:$src0))]
+ [(set i64:$sdst, (UniformUnaryFrag<bitreverse> i64:$src0))]
>;
} // End isReMaterializable = 1, isAsCheapAsAMove = 1
@@ -276,10 +276,10 @@ def S_FLBIT_I32 : SOP1_32 <"s_flbit_i32",
>;
def S_FLBIT_I32_I64 : SOP1_32_64 <"s_flbit_i32_i64">;
def S_SEXT_I32_I8 : SOP1_32 <"s_sext_i32_i8",
- [(set i32:$sdst, (sext_inreg i32:$src0, i8))]
+ [(set i32:$sdst, (UniformSextInreg<i8> i32:$src0))]
>;
def S_SEXT_I32_I16 : SOP1_32 <"s_sext_i32_i16",
- [(set i32:$sdst, (sext_inreg i32:$src0, i16))]
+ [(set i32:$sdst, (UniformSextInreg<i16> i32:$src0))]
>;
} // End isReMaterializable = 1
@@ -300,8 +300,7 @@ def S_SETPC_B64 : SOP1_1 <"s_setpc_b64">;
let isReturn = 1 in {
// Define variant marked as return rather than branch.
-def S_SETPC_B64_return : SOP1_1<"", CCR_SGPR_64, [(AMDGPUret_flag i64:$src0)]>;
-def S_SETPC_B64_return_gfx : SOP1_1<"", Gfx_CCR_SGPR_64, [(AMDGPUret_gfx_flag i64:$src0)]>;
+def S_SETPC_B64_return : SOP1_1<"">;
}
} // End isTerminator = 1, isBarrier = 1
@@ -341,7 +340,7 @@ def S_CBRANCH_JOIN : SOP1_0_32R <"s_cbranch_join">;
let Defs = [SCC] in {
def S_ABS_I32 : SOP1_32 <"s_abs_i32",
- [(set i32:$sdst, (abs i32:$src0))]
+ [(set i32:$sdst, (UniformUnaryFrag<abs> i32:$src0))]
>;
} // End Defs = [SCC]
@@ -385,6 +384,21 @@ let SubtargetPredicate = isGFX10Plus in {
} // End Uses = [M0]
} // End SubtargetPredicate = isGFX10Plus
+let SubtargetPredicate = isGFX11Plus in {
+ let hasSideEffects = 1 in {
+ // For s_sendmsg_rtn_* the src0 field encodes the message type directly; it
+ // is not an SGPR number.
+ def S_SENDMSG_RTN_B32 : SOP1_Pseudo<
+ "s_sendmsg_rtn_b32", (outs SReg_32:$sdst), (ins SendMsgImm:$src0),
+ "$sdst, $src0", [(set i32:$sdst, (int_amdgcn_s_sendmsg_rtn timm:$src0))]
+ >;
+ def S_SENDMSG_RTN_B64 : SOP1_Pseudo<
+ "s_sendmsg_rtn_b64", (outs SReg_64:$sdst), (ins SendMsgImm:$src0),
+ "$sdst, $src0", [(set i64:$sdst, (int_amdgcn_s_sendmsg_rtn timm:$src0))]
+ >;
+ }
+} // End SubtargetPredicate = isGFX11Plus
+
//===----------------------------------------------------------------------===//
// SOP2 Instructions
//===----------------------------------------------------------------------===//
@@ -690,6 +704,10 @@ let SubtargetPredicate = isGFX9Plus in {
} // End isCommutable = 1, isReMaterializable = 1
} // End SubtargetPredicate = isGFX9Plus
+let SubtargetPredicate = isGFX11Plus in {
+ def S_PACK_HL_B32_B16 : SOP2_32<"s_pack_hl_b32_b16">;
+} // End SubtargetPredicate = isGFX11Plus
+
//===----------------------------------------------------------------------===//
// SOPK Instructions
//===----------------------------------------------------------------------===//
@@ -855,9 +873,7 @@ def S_CBRANCH_I_FORK : SOPK_Pseudo <
"$sdst, $simm16"
>;
-let mayLoad = 1 in {
-// s_getreg_b32 should use hasSideEffects = 1 for tablegen to allow
-// its use in the readcyclecounter selection.
+// This is hasSideEffects to allow its use in readcyclecounter selection.
// FIXME: Need to truncate immediate to 16-bits.
def S_GETREG_B32 : SOPK_Pseudo <
"s_getreg_b32",
@@ -867,7 +883,6 @@ def S_GETREG_B32 : SOPK_Pseudo <
let SOPKZext = 1;
let hasSideEffects = 1;
}
-} // End mayLoad = 1
let Defs = [MODE], Uses = [MODE] in {
@@ -1169,12 +1184,12 @@ def S_ENDPGM_SAVED : SOPP_Pseudo<"s_endpgm_saved", (ins)> {
let isReturn = 1;
}
-let SubtargetPredicate = isGFX9Plus in {
+let SubtargetPredicate = isGFX9GFX10 in {
let isBarrier = 1, isReturn = 1, simm16 = 0, fixed_imm = 1 in {
def S_ENDPGM_ORDERED_PS_DONE :
SOPP_Pseudo<"s_endpgm_ordered_ps_done", (ins)>;
} // End isBarrier = 1, isReturn = 1, simm16 = 0, fixed_imm = 1
-} // End SubtargetPredicate = isGFX9Plus
+} // End SubtargetPredicate = isGFX9GFX10
let SubtargetPredicate = isGFX10Plus in {
let isBarrier = 1, isReturn = 1, simm16 = 0, fixed_imm = 1 in {
@@ -1279,15 +1294,21 @@ def S_SLEEP : SOPP_Pseudo <"s_sleep", (ins i32imm:$simm16),
let hasSideEffects = 1;
}
-def S_SETPRIO : SOPP_Pseudo <"s_setprio" , (ins i16imm:$simm16), "$simm16">;
+def S_SETPRIO : SOPP_Pseudo <"s_setprio", (ins i16imm:$simm16), "$simm16",
+ [(int_amdgcn_s_setprio timm:$simm16)]> {
+ let hasSideEffects = 1;
+}
let Uses = [EXEC, M0] in {
-// FIXME: Should this be mayLoad+mayStore?
def S_SENDMSG : SOPP_Pseudo <"s_sendmsg" , (ins SendMsgImm:$simm16), "$simm16",
- [(int_amdgcn_s_sendmsg (i32 timm:$simm16), M0)]>;
+ [(int_amdgcn_s_sendmsg (i32 timm:$simm16), M0)]> {
+ let hasSideEffects = 1;
+}
def S_SENDMSGHALT : SOPP_Pseudo <"s_sendmsghalt" , (ins SendMsgImm:$simm16), "$simm16",
- [(int_amdgcn_s_sendmsghalt (i32 timm:$simm16), M0)]>;
+ [(int_amdgcn_s_sendmsghalt (i32 timm:$simm16), M0)]> {
+ let hasSideEffects = 1;
+}
} // End Uses = [EXEC, M0]
@@ -1341,7 +1362,7 @@ let SubtargetPredicate = isGFX10Plus in {
let fixed_imm = 1;
}
def S_WAITCNT_DEPCTR :
- SOPP_Pseudo <"s_waitcnt_depctr" , (ins s16imm:$simm16), "$simm16">;
+ SOPP_Pseudo <"s_waitcnt_depctr" , (ins DepCtrImm:$simm16), "$simm16">;
let hasSideEffects = 0, Uses = [MODE], Defs = [MODE] in {
def S_ROUND_MODE :
@@ -1355,6 +1376,13 @@ let SubtargetPredicate = isGFX10Plus in {
SOPP_Pseudo<"s_ttracedata_imm", (ins s16imm:$simm16), "$simm16">;
} // End SubtargetPredicate = isGFX10Plus
+let SubtargetPredicate = isGFX11Plus in {
+ def S_WAIT_EVENT : SOPP_Pseudo<"s_wait_event", (ins s16imm:$simm16),
+ "$simm16">;
+ def S_DELAY_ALU : SOPP_Pseudo<"s_delay_alu", (ins DELAY_FLAG:$simm16),
+ "$simm16">;
+} // End SubtargetPredicate = isGFX11Plus
+
//===----------------------------------------------------------------------===//
// SOP1 Patterns
//===----------------------------------------------------------------------===//
@@ -1377,7 +1405,7 @@ def : GCNPat <
>;
def : GCNPat <
- (i32 (smax i32:$x, (i32 (ineg i32:$x)))),
+ (i32 (UniformBinFrag<smax> i32:$x, (i32 (ineg i32:$x)))),
(S_ABS_I32 SReg_32:$x)
>;
@@ -1408,7 +1436,7 @@ def : GCNPat <
// REG_SEQUENCE patterns don't support instructions with multiple
// outputs.
def : GCNPat<
- (i64 (zext i16:$src)),
+ (i64 (UniformUnaryFrag<zext> i16:$src)),
(REG_SEQUENCE SReg_64,
(i32 (COPY_TO_REGCLASS (S_AND_B32 $src, (S_MOV_B32 (i32 0xffff))), SGPR_32)), sub0,
(S_MOV_B32 (i32 0)), sub1)
@@ -1421,7 +1449,7 @@ def : GCNPat <
>;
def : GCNPat<
- (i32 (zext i16:$src)),
+ (i32 (UniformUnaryFrag<zext> i16:$src)),
(S_AND_B32 (S_MOV_B32 (i32 0xffff)), $src)
>;
@@ -1448,8 +1476,13 @@ def : ScalarNot2Pat<S_ORN2_B64, or, v2i32>;
// Target-specific instruction encodings.
//===----------------------------------------------------------------------===//
+class Select_gfx11<string opName> : SIMCInstr<opName, SIEncodingFamily.GFX11> {
+ Predicate AssemblerPredicate = isGFX11Only;
+ string DecoderNamespace = "GFX11";
+}
+
class Select_gfx10<string opName> : SIMCInstr<opName, SIEncodingFamily.GFX10> {
- Predicate AssemblerPredicate = isGFX10Plus;
+ Predicate AssemblerPredicate = isGFX10Only;
string DecoderNamespace = "GFX10";
}
@@ -1464,6 +1497,87 @@ class Select_gfx6_gfx7<string opName> : SIMCInstr<opName, SIEncodingFamily.SI> {
}
//===----------------------------------------------------------------------===//
+// GFX11.
+//===----------------------------------------------------------------------===//
+
+multiclass SOP1_Real_gfx11<bits<8> op> {
+ def _gfx11 : SOP1_Real<op, !cast<SOP1_Pseudo>(NAME)>,
+ Select_gfx11<!cast<SOP1_Pseudo>(NAME).Mnemonic>;
+}
+
+multiclass SOP1_Real_Renamed_gfx11<bits<8> op, SOP1_Pseudo backing_pseudo, string real_name> {
+ def _gfx11 : SOP1_Real<op, backing_pseudo, real_name>,
+ Select_gfx11<backing_pseudo.Mnemonic>,
+ MnemonicAlias<backing_pseudo.Mnemonic, real_name>, Requires<[isGFX11Plus]>;
+}
+
+defm S_MOV_B32 : SOP1_Real_gfx11<0x000>;
+defm S_MOV_B64 : SOP1_Real_gfx11<0x001>;
+defm S_CMOV_B32 : SOP1_Real_gfx11<0x002>;
+defm S_CMOV_B64 : SOP1_Real_gfx11<0x003>;
+defm S_BREV_B32 : SOP1_Real_gfx11<0x004>;
+defm S_BREV_B64 : SOP1_Real_gfx11<0x005>;
+defm S_CTZ_I32_B32 : SOP1_Real_Renamed_gfx11<0x008, S_FF1_I32_B32, "s_ctz_i32_b32">;
+defm S_CTZ_I32_B64 : SOP1_Real_Renamed_gfx11<0x009, S_FF1_I32_B64, "s_ctz_i32_b64">;
+defm S_CLZ_I32_U32 : SOP1_Real_Renamed_gfx11<0x00a, S_FLBIT_I32_B32, "s_clz_i32_u32">;
+defm S_CLZ_I32_U64 : SOP1_Real_Renamed_gfx11<0x00b, S_FLBIT_I32_B64, "s_clz_i32_u64">;
+defm S_CLS_I32 : SOP1_Real_Renamed_gfx11<0x00c, S_FLBIT_I32, "s_cls_i32">;
+defm S_CLS_I32_I64 : SOP1_Real_Renamed_gfx11<0x00d, S_FLBIT_I32_I64, "s_cls_i32_i64">;
+defm S_SEXT_I32_I8 : SOP1_Real_gfx11<0x00e>;
+defm S_SEXT_I32_I16 : SOP1_Real_gfx11<0x00f>;
+defm S_BITSET0_B32 : SOP1_Real_gfx11<0x010>;
+defm S_BITSET0_B64 : SOP1_Real_gfx11<0x011>;
+defm S_BITSET1_B32 : SOP1_Real_gfx11<0x012>;
+defm S_BITSET1_B64 : SOP1_Real_gfx11<0x013>;
+defm S_BITREPLICATE_B64_B32 : SOP1_Real_gfx11<0x014>;
+defm S_ABS_I32 : SOP1_Real_gfx11<0x015>;
+defm S_BCNT0_I32_B32 : SOP1_Real_gfx11<0x016>;
+defm S_BCNT0_I32_B64 : SOP1_Real_gfx11<0x017>;
+defm S_BCNT1_I32_B32 : SOP1_Real_gfx11<0x018>;
+defm S_BCNT1_I32_B64 : SOP1_Real_gfx11<0x019>;
+defm S_QUADMASK_B32 : SOP1_Real_gfx11<0x01a>;
+defm S_QUADMASK_B64 : SOP1_Real_gfx11<0x01b>;
+defm S_WQM_B32 : SOP1_Real_gfx11<0x01c>;
+defm S_WQM_B64 : SOP1_Real_gfx11<0x01d>;
+defm S_NOT_B32 : SOP1_Real_gfx11<0x01e>;
+defm S_NOT_B64 : SOP1_Real_gfx11<0x01f>;
+defm S_AND_SAVEEXEC_B32 : SOP1_Real_gfx11<0x020>;
+defm S_AND_SAVEEXEC_B64 : SOP1_Real_gfx11<0x021>;
+defm S_OR_SAVEEXEC_B32 : SOP1_Real_gfx11<0x022>;
+defm S_OR_SAVEEXEC_B64 : SOP1_Real_gfx11<0x023>;
+defm S_XOR_SAVEEXEC_B32 : SOP1_Real_gfx11<0x024>;
+defm S_XOR_SAVEEXEC_B64 : SOP1_Real_gfx11<0x025>;
+defm S_NAND_SAVEEXEC_B32 : SOP1_Real_gfx11<0x026>;
+defm S_NAND_SAVEEXEC_B64 : SOP1_Real_gfx11<0x027>;
+defm S_NOR_SAVEEXEC_B32 : SOP1_Real_gfx11<0x028>;
+defm S_NOR_SAVEEXEC_B64 : SOP1_Real_gfx11<0x029>;
+defm S_XNOR_SAVEEXEC_B32 : SOP1_Real_gfx11<0x02a>;
+/*defm S_XNOR_SAVEEXEC_B64 : SOP1_Real_gfx11<0x02b>; //same as older arch, handled there*/
+defm S_AND_NOT0_SAVEEXEC_B32 : SOP1_Real_Renamed_gfx11<0x02c, S_ANDN1_SAVEEXEC_B32, "s_and_not0_saveexec_b32">;
+defm S_AND_NOT0_SAVEEXEC_B64 : SOP1_Real_Renamed_gfx11<0x02d, S_ANDN1_SAVEEXEC_B64, "s_and_not0_saveexec_b64">;
+defm S_OR_NOT0_SAVEEXEC_B32 : SOP1_Real_Renamed_gfx11<0x02e, S_ORN1_SAVEEXEC_B32, "s_or_not0_saveexec_b32">;
+defm S_OR_NOT0_SAVEEXEC_B64 : SOP1_Real_Renamed_gfx11<0x02f, S_ORN1_SAVEEXEC_B64, "s_or_not0_saveexec_b64">;
+defm S_AND_NOT1_SAVEEXEC_B32 : SOP1_Real_Renamed_gfx11<0x030, S_ANDN2_SAVEEXEC_B32, "s_and_not1_saveexec_b32">;
+defm S_AND_NOT1_SAVEEXEC_B64 : SOP1_Real_Renamed_gfx11<0x031, S_ANDN2_SAVEEXEC_B64, "s_and_not1_saveexec_b64">;
+defm S_OR_NOT1_SAVEEXEC_B32 : SOP1_Real_Renamed_gfx11<0x032, S_ORN2_SAVEEXEC_B32, "s_or_not1_saveexec_b32">;
+defm S_OR_NOT1_SAVEEXEC_B64 : SOP1_Real_Renamed_gfx11<0x033, S_ORN2_SAVEEXEC_B64, "s_or_not1_saveexec_b64">;
+defm S_AND_NOT0_WREXEC_B32 : SOP1_Real_Renamed_gfx11<0x034, S_ANDN1_WREXEC_B32, "s_and_not0_wrexec_b32">;
+defm S_AND_NOT0_WREXEC_B64 : SOP1_Real_Renamed_gfx11<0x035, S_ANDN1_WREXEC_B64, "s_and_not0_wrexec_b64">;
+defm S_AND_NOT1_WREXEC_B32 : SOP1_Real_Renamed_gfx11<0x036, S_ANDN2_WREXEC_B32, "s_and_not1_wrexec_b32">;
+defm S_AND_NOT1_WREXEC_B64 : SOP1_Real_Renamed_gfx11<0x037, S_ANDN2_WREXEC_B64, "s_and_not1_wrexec_b64">;
+defm S_MOVRELS_B32 : SOP1_Real_gfx11<0x040>;
+defm S_MOVRELS_B64 : SOP1_Real_gfx11<0x041>;
+defm S_MOVRELD_B32 : SOP1_Real_gfx11<0x042>;
+defm S_MOVRELD_B64 : SOP1_Real_gfx11<0x043>;
+defm S_MOVRELSD_2_B32 : SOP1_Real_gfx11<0x044>;
+defm S_GETPC_B64 : SOP1_Real_gfx11<0x047>;
+defm S_SETPC_B64 : SOP1_Real_gfx11<0x048>;
+defm S_SWAPPC_B64 : SOP1_Real_gfx11<0x049>;
+defm S_RFE_B64 : SOP1_Real_gfx11<0x04a>;
+defm S_SENDMSG_RTN_B32 : SOP1_Real_gfx11<0x04c>;
+defm S_SENDMSG_RTN_B64 : SOP1_Real_gfx11<0x04d>;
+
+//===----------------------------------------------------------------------===//
// SOP1 - GFX10.
//===----------------------------------------------------------------------===//
@@ -1473,6 +1587,9 @@ multiclass SOP1_Real_gfx10<bits<8> op> {
Select_gfx10<ps.Mnemonic>;
}
+multiclass SOP1_Real_gfx10_gfx11<bits<8> op> :
+ SOP1_Real_gfx10<op>, SOP1_Real_gfx11<op>;
+
defm S_ANDN1_SAVEEXEC_B64 : SOP1_Real_gfx10<0x037>;
defm S_ORN1_SAVEEXEC_B64 : SOP1_Real_gfx10<0x038>;
defm S_ANDN1_WREXEC_B64 : SOP1_Real_gfx10<0x039>;
@@ -1493,7 +1610,7 @@ defm S_ANDN2_WREXEC_B32 : SOP1_Real_gfx10<0x047>;
defm S_MOVRELSD_2_B32 : SOP1_Real_gfx10<0x049>;
//===----------------------------------------------------------------------===//
-// SOP1 - GFX6, GFX7.
+// SOP1 - GFX6, GFX7, GFX10, GFX11.
//===----------------------------------------------------------------------===//
@@ -1506,6 +1623,9 @@ multiclass SOP1_Real_gfx6_gfx7<bits<8> op> {
multiclass SOP1_Real_gfx6_gfx7_gfx10<bits<8> op> :
SOP1_Real_gfx6_gfx7<op>, SOP1_Real_gfx10<op>;
+multiclass SOP1_Real_gfx6_gfx7_gfx10_gfx11<bits<8> op> :
+ SOP1_Real_gfx6_gfx7<op>, SOP1_Real_gfx10_gfx11<op>;
+
defm S_CBRANCH_JOIN : SOP1_Real_gfx6_gfx7<0x032>;
defm S_MOV_B32 : SOP1_Real_gfx6_gfx7_gfx10<0x003>;
@@ -1547,7 +1667,7 @@ defm S_ANDN2_SAVEEXEC_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x027>;
defm S_ORN2_SAVEEXEC_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x028>;
defm S_NAND_SAVEEXEC_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x029>;
defm S_NOR_SAVEEXEC_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x02a>;
-defm S_XNOR_SAVEEXEC_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x02b>;
+defm S_XNOR_SAVEEXEC_B64 : SOP1_Real_gfx6_gfx7_gfx10_gfx11<0x02b>;
defm S_QUADMASK_B32 : SOP1_Real_gfx6_gfx7_gfx10<0x02c>;
defm S_QUADMASK_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x02d>;
defm S_MOVRELS_B32 : SOP1_Real_gfx6_gfx7_gfx10<0x02e>;
@@ -1557,6 +1677,65 @@ defm S_MOVRELD_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x031>;
defm S_ABS_I32 : SOP1_Real_gfx6_gfx7_gfx10<0x034>;
//===----------------------------------------------------------------------===//
+// SOP2 - GFX11.
+//===----------------------------------------------------------------------===//
+
+multiclass SOP2_Real_gfx11<bits<7> op> {
+ def _gfx11 : SOP2_Real<op, !cast<SOP2_Pseudo>(NAME)>,
+ Select_gfx11<!cast<SOP2_Pseudo>(NAME).Mnemonic>;
+}
+
+multiclass SOP2_Real_Renamed_gfx11<bits<7> op, SOP2_Pseudo backing_pseudo, string real_name> {
+ def _gfx11 : SOP2_Real<op, backing_pseudo, real_name>,
+ Select_gfx11<backing_pseudo.Mnemonic>,
+ MnemonicAlias<backing_pseudo.Mnemonic, real_name>, Requires<[isGFX11Plus]>;
+}
+
+defm S_ABSDIFF_I32 : SOP2_Real_gfx11<0x006>;
+defm S_LSHL_B32 : SOP2_Real_gfx11<0x008>;
+defm S_LSHL_B64 : SOP2_Real_gfx11<0x009>;
+defm S_LSHR_B32 : SOP2_Real_gfx11<0x00a>;
+defm S_LSHR_B64 : SOP2_Real_gfx11<0x00b>;
+defm S_ASHR_I32 : SOP2_Real_gfx11<0x00c>;
+defm S_ASHR_I64 : SOP2_Real_gfx11<0x00d>;
+defm S_LSHL1_ADD_U32 : SOP2_Real_gfx11<0x00e>;
+defm S_LSHL2_ADD_U32 : SOP2_Real_gfx11<0x00f>;
+defm S_LSHL3_ADD_U32 : SOP2_Real_gfx11<0x010>;
+defm S_LSHL4_ADD_U32 : SOP2_Real_gfx11<0x011>;
+defm S_MIN_I32 : SOP2_Real_gfx11<0x012>;
+defm S_MIN_U32 : SOP2_Real_gfx11<0x013>;
+defm S_MAX_I32 : SOP2_Real_gfx11<0x014>;
+defm S_MAX_U32 : SOP2_Real_gfx11<0x015>;
+defm S_AND_B32 : SOP2_Real_gfx11<0x016>;
+defm S_AND_B64 : SOP2_Real_gfx11<0x017>;
+defm S_OR_B32 : SOP2_Real_gfx11<0x018>;
+defm S_OR_B64 : SOP2_Real_gfx11<0x019>;
+defm S_XOR_B32 : SOP2_Real_gfx11<0x01a>;
+defm S_XOR_B64 : SOP2_Real_gfx11<0x01b>;
+defm S_NAND_B32 : SOP2_Real_gfx11<0x01c>;
+defm S_NAND_B64 : SOP2_Real_gfx11<0x01d>;
+defm S_NOR_B32 : SOP2_Real_gfx11<0x01e>;
+defm S_NOR_B64 : SOP2_Real_gfx11<0x01f>;
+defm S_XNOR_B32 : SOP2_Real_gfx11<0x020>;
+defm S_XNOR_B64 : SOP2_Real_gfx11<0x021>;
+defm S_AND_NOT1_B32 : SOP2_Real_Renamed_gfx11<0x022, S_ANDN2_B32, "s_and_not1_b32">;
+defm S_AND_NOT1_B64 : SOP2_Real_Renamed_gfx11<0x023, S_ANDN2_B64, "s_and_not1_b64">;
+defm S_OR_NOT1_B32 : SOP2_Real_Renamed_gfx11<0x024, S_ORN2_B32, "s_or_not1_b32">;
+defm S_OR_NOT1_B64 : SOP2_Real_Renamed_gfx11<0x025, S_ORN2_B64, "s_or_not1_b64">;
+defm S_BFE_U32 : SOP2_Real_gfx11<0x026>;
+defm S_BFE_I32 : SOP2_Real_gfx11<0x027>;
+defm S_BFE_U64 : SOP2_Real_gfx11<0x028>;
+defm S_BFE_I64 : SOP2_Real_gfx11<0x029>;
+defm S_BFM_B32 : SOP2_Real_gfx11<0x02a>;
+defm S_BFM_B64 : SOP2_Real_gfx11<0x02b>;
+defm S_MUL_I32 : SOP2_Real_gfx11<0x02c>;
+defm S_MUL_HI_U32 : SOP2_Real_gfx11<0x02d>;
+defm S_MUL_HI_I32 : SOP2_Real_gfx11<0x02e>;
+defm S_CSELECT_B32 : SOP2_Real_gfx11<0x030>;
+defm S_CSELECT_B64 : SOP2_Real_gfx11<0x031>;
+defm S_PACK_HL_B32_B16 : SOP2_Real_gfx11<0x035>;
+
+//===----------------------------------------------------------------------===//
// SOP2 - GFX10.
//===----------------------------------------------------------------------===//
@@ -1566,13 +1745,16 @@ multiclass SOP2_Real_gfx10<bits<7> op> {
Select_gfx10<ps.Mnemonic>;
}
+multiclass SOP2_Real_gfx10_gfx11<bits<7> op> :
+ SOP2_Real_gfx10<op>, SOP2_Real_gfx11<op>;
+
defm S_LSHL1_ADD_U32 : SOP2_Real_gfx10<0x02e>;
defm S_LSHL2_ADD_U32 : SOP2_Real_gfx10<0x02f>;
defm S_LSHL3_ADD_U32 : SOP2_Real_gfx10<0x030>;
defm S_LSHL4_ADD_U32 : SOP2_Real_gfx10<0x031>;
-defm S_PACK_LL_B32_B16 : SOP2_Real_gfx10<0x032>;
-defm S_PACK_LH_B32_B16 : SOP2_Real_gfx10<0x033>;
-defm S_PACK_HH_B32_B16 : SOP2_Real_gfx10<0x034>;
+defm S_PACK_LL_B32_B16 : SOP2_Real_gfx10_gfx11<0x032>;
+defm S_PACK_LH_B32_B16 : SOP2_Real_gfx10_gfx11<0x033>;
+defm S_PACK_HH_B32_B16 : SOP2_Real_gfx10_gfx11<0x034>;
defm S_MUL_HI_U32 : SOP2_Real_gfx10<0x035>;
defm S_MUL_HI_I32 : SOP2_Real_gfx10<0x036>;
@@ -1589,14 +1771,17 @@ multiclass SOP2_Real_gfx6_gfx7<bits<7> op> {
multiclass SOP2_Real_gfx6_gfx7_gfx10<bits<7> op> :
SOP2_Real_gfx6_gfx7<op>, SOP2_Real_gfx10<op>;
+multiclass SOP2_Real_gfx6_gfx7_gfx10_gfx11<bits<7> op> :
+ SOP2_Real_gfx6_gfx7<op>, SOP2_Real_gfx10_gfx11<op>;
+
defm S_CBRANCH_G_FORK : SOP2_Real_gfx6_gfx7<0x02b>;
-defm S_ADD_U32 : SOP2_Real_gfx6_gfx7_gfx10<0x000>;
-defm S_SUB_U32 : SOP2_Real_gfx6_gfx7_gfx10<0x001>;
-defm S_ADD_I32 : SOP2_Real_gfx6_gfx7_gfx10<0x002>;
-defm S_SUB_I32 : SOP2_Real_gfx6_gfx7_gfx10<0x003>;
-defm S_ADDC_U32 : SOP2_Real_gfx6_gfx7_gfx10<0x004>;
-defm S_SUBB_U32 : SOP2_Real_gfx6_gfx7_gfx10<0x005>;
+defm S_ADD_U32 : SOP2_Real_gfx6_gfx7_gfx10_gfx11<0x000>;
+defm S_SUB_U32 : SOP2_Real_gfx6_gfx7_gfx10_gfx11<0x001>;
+defm S_ADD_I32 : SOP2_Real_gfx6_gfx7_gfx10_gfx11<0x002>;
+defm S_SUB_I32 : SOP2_Real_gfx6_gfx7_gfx10_gfx11<0x003>;
+defm S_ADDC_U32 : SOP2_Real_gfx6_gfx7_gfx10_gfx11<0x004>;
+defm S_SUBB_U32 : SOP2_Real_gfx6_gfx7_gfx10_gfx11<0x005>;
defm S_MIN_I32 : SOP2_Real_gfx6_gfx7_gfx10<0x006>;
defm S_MIN_U32 : SOP2_Real_gfx6_gfx7_gfx10<0x007>;
defm S_MAX_I32 : SOP2_Real_gfx6_gfx7_gfx10<0x008>;
@@ -1635,6 +1820,31 @@ defm S_BFE_I64 : SOP2_Real_gfx6_gfx7_gfx10<0x02a>;
defm S_ABSDIFF_I32 : SOP2_Real_gfx6_gfx7_gfx10<0x02c>;
//===----------------------------------------------------------------------===//
+// SOPK - GFX11.
+//===----------------------------------------------------------------------===//
+
+multiclass SOPK_Real32_gfx11<bits<5> op> {
+ def _gfx11 : SOPK_Real32<op, !cast<SOPK_Pseudo>(NAME)>,
+ Select_gfx11<!cast<SOPK_Pseudo>(NAME).Mnemonic>;
+}
+
+multiclass SOPK_Real64_gfx11<bits<5> op> {
+ def _gfx11 : SOPK_Real64<op, !cast<SOPK_Pseudo>(NAME)>,
+ Select_gfx11<!cast<SOPK_Pseudo>(NAME).Mnemonic>;
+}
+
+defm S_GETREG_B32 : SOPK_Real32_gfx11<0x011>;
+defm S_SETREG_B32 : SOPK_Real32_gfx11<0x012>;
+defm S_SETREG_IMM32_B32 : SOPK_Real64_gfx11<0x013>;
+defm S_CALL_B64 : SOPK_Real32_gfx11<0x014>;
+defm S_SUBVECTOR_LOOP_BEGIN : SOPK_Real32_gfx11<0x016>;
+defm S_SUBVECTOR_LOOP_END : SOPK_Real32_gfx11<0x017>;
+defm S_WAITCNT_VSCNT : SOPK_Real32_gfx11<0x018>;
+defm S_WAITCNT_VMCNT : SOPK_Real32_gfx11<0x019>;
+defm S_WAITCNT_EXPCNT : SOPK_Real32_gfx11<0x01a>;
+defm S_WAITCNT_LGKMCNT : SOPK_Real32_gfx11<0x01b>;
+
+//===----------------------------------------------------------------------===//
// SOPK - GFX10.
//===----------------------------------------------------------------------===//
@@ -1650,7 +1860,10 @@ multiclass SOPK_Real64_gfx10<bits<5> op> {
Select_gfx10<ps.Mnemonic>;
}
-defm S_VERSION : SOPK_Real32_gfx10<0x001>;
+multiclass SOPK_Real32_gfx10_gfx11<bits<5> op> :
+ SOPK_Real32_gfx10<op>, SOPK_Real32_gfx11<op>;
+
+defm S_VERSION : SOPK_Real32_gfx10_gfx11<0x001>;
defm S_CALL_B64 : SOPK_Real32_gfx10<0x016>;
defm S_WAITCNT_VSCNT : SOPK_Real32_gfx10<0x017>;
defm S_WAITCNT_VMCNT : SOPK_Real32_gfx10<0x018>;
@@ -1681,29 +1894,96 @@ multiclass SOPK_Real32_gfx6_gfx7_gfx10<bits<5> op> :
multiclass SOPK_Real64_gfx6_gfx7_gfx10<bits<5> op> :
SOPK_Real64_gfx6_gfx7<op>, SOPK_Real64_gfx10<op>;
+multiclass SOPK_Real32_gfx6_gfx7_gfx10_gfx11<bits<5> op> :
+ SOPK_Real32_gfx6_gfx7<op>, SOPK_Real32_gfx10_gfx11<op>;
+
defm S_CBRANCH_I_FORK : SOPK_Real32_gfx6_gfx7<0x011>;
-defm S_MOVK_I32 : SOPK_Real32_gfx6_gfx7_gfx10<0x000>;
-defm S_CMOVK_I32 : SOPK_Real32_gfx6_gfx7_gfx10<0x002>;
-defm S_CMPK_EQ_I32 : SOPK_Real32_gfx6_gfx7_gfx10<0x003>;
-defm S_CMPK_LG_I32 : SOPK_Real32_gfx6_gfx7_gfx10<0x004>;
-defm S_CMPK_GT_I32 : SOPK_Real32_gfx6_gfx7_gfx10<0x005>;
-defm S_CMPK_GE_I32 : SOPK_Real32_gfx6_gfx7_gfx10<0x006>;
-defm S_CMPK_LT_I32 : SOPK_Real32_gfx6_gfx7_gfx10<0x007>;
-defm S_CMPK_LE_I32 : SOPK_Real32_gfx6_gfx7_gfx10<0x008>;
-defm S_CMPK_EQ_U32 : SOPK_Real32_gfx6_gfx7_gfx10<0x009>;
-defm S_CMPK_LG_U32 : SOPK_Real32_gfx6_gfx7_gfx10<0x00a>;
-defm S_CMPK_GT_U32 : SOPK_Real32_gfx6_gfx7_gfx10<0x00b>;
-defm S_CMPK_GE_U32 : SOPK_Real32_gfx6_gfx7_gfx10<0x00c>;
-defm S_CMPK_LT_U32 : SOPK_Real32_gfx6_gfx7_gfx10<0x00d>;
-defm S_CMPK_LE_U32 : SOPK_Real32_gfx6_gfx7_gfx10<0x00e>;
-defm S_ADDK_I32 : SOPK_Real32_gfx6_gfx7_gfx10<0x00f>;
-defm S_MULK_I32 : SOPK_Real32_gfx6_gfx7_gfx10<0x010>;
+defm S_MOVK_I32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11<0x000>;
+defm S_CMOVK_I32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11<0x002>;
+defm S_CMPK_EQ_I32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11<0x003>;
+defm S_CMPK_LG_I32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11<0x004>;
+defm S_CMPK_GT_I32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11<0x005>;
+defm S_CMPK_GE_I32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11<0x006>;
+defm S_CMPK_LT_I32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11<0x007>;
+defm S_CMPK_LE_I32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11<0x008>;
+defm S_CMPK_EQ_U32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11<0x009>;
+defm S_CMPK_LG_U32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11<0x00a>;
+defm S_CMPK_GT_U32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11<0x00b>;
+defm S_CMPK_GE_U32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11<0x00c>;
+defm S_CMPK_LT_U32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11<0x00d>;
+defm S_CMPK_LE_U32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11<0x00e>;
+defm S_ADDK_I32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11<0x00f>;
+defm S_MULK_I32 : SOPK_Real32_gfx6_gfx7_gfx10_gfx11<0x010>;
defm S_GETREG_B32 : SOPK_Real32_gfx6_gfx7_gfx10<0x012>;
defm S_SETREG_B32 : SOPK_Real32_gfx6_gfx7_gfx10<0x013>;
defm S_SETREG_IMM32_B32 : SOPK_Real64_gfx6_gfx7_gfx10<0x015>;
//===----------------------------------------------------------------------===//
+// SOPP - GFX11
+//===----------------------------------------------------------------------===//
+
+multiclass SOPP_Real_32_gfx11<bits<7> op, string real_name = !cast<SOPP_Pseudo>(NAME).Mnemonic # " "> {
+ def _gfx11 : SOPP_Real_32<op, !cast<SOPP_Pseudo>(NAME), real_name>,
+ Select_gfx11<!cast<SOPP_Pseudo>(NAME).Mnemonic>,
+ SOPPRelaxTable<0, !cast<SOPP_Pseudo>(NAME).KeyName, "_gfx11">;
+}
+
+multiclass SOPP_Real_64_gfx11<bits<7> op, string real_name = !cast<SOPP_Pseudo>(NAME).Mnemonic # " "> {
+ def _gfx11 : SOPP_Real_64<op, !cast<SOPP_Pseudo>(NAME), real_name>,
+ Select_gfx11<!cast<SOPP_Pseudo>(NAME).Mnemonic>,
+ SOPPRelaxTable<1, !cast<SOPP_Pseudo>(NAME).KeyName, "_gfx11">;
+}
+
+multiclass SOPP_Real_32_Renamed_gfx11<bits<7> op, SOPP_Pseudo backing_pseudo, string real_name> {
+ def _gfx11 : SOPP_Real_32<op, backing_pseudo, real_name # " ">,
+ Select_gfx11<backing_pseudo.Mnemonic>,
+ MnemonicAlias<backing_pseudo.Mnemonic, real_name>, Requires<[isGFX11Plus]>;
+}
+
+multiclass SOPP_Real_With_Relaxation_gfx11<bits<7> op> {
+ defm "" : SOPP_Real_32_gfx11<op>;
+ defm _pad_s_nop : SOPP_Real_64_gfx11<op>;
+}
+
+defm S_SETKILL : SOPP_Real_32_gfx11<0x001>;
+defm S_SETHALT : SOPP_Real_32_gfx11<0x002>;
+defm S_SLEEP : SOPP_Real_32_gfx11<0x003>;
+defm S_SET_INST_PREFETCH_DISTANCE : SOPP_Real_32_Renamed_gfx11<0x004, S_INST_PREFETCH, "s_set_inst_prefetch_distance">;
+defm S_CLAUSE : SOPP_Real_32_gfx11<0x005>;
+defm S_DELAY_ALU : SOPP_Real_32_gfx11<0x007>;
+defm S_WAITCNT_DEPCTR : SOPP_Real_32_gfx11<0x008>;
+defm S_WAITCNT : SOPP_Real_32_gfx11<0x009>;
+defm S_WAIT_IDLE : SOPP_Real_32_gfx11<0x00a>;
+defm S_WAIT_EVENT : SOPP_Real_32_gfx11<0x00b>;
+defm S_TRAP : SOPP_Real_32_gfx11<0x010>;
+defm S_ROUND_MODE : SOPP_Real_32_gfx11<0x011>;
+defm S_DENORM_MODE : SOPP_Real_32_gfx11<0x012>;
+defm S_BRANCH : SOPP_Real_With_Relaxation_gfx11<0x020>;
+defm S_CBRANCH_SCC0 : SOPP_Real_With_Relaxation_gfx11<0x021>;
+defm S_CBRANCH_SCC1 : SOPP_Real_With_Relaxation_gfx11<0x022>;
+defm S_CBRANCH_VCCZ : SOPP_Real_With_Relaxation_gfx11<0x023>;
+defm S_CBRANCH_VCCNZ : SOPP_Real_With_Relaxation_gfx11<0x024>;
+defm S_CBRANCH_EXECZ : SOPP_Real_With_Relaxation_gfx11<0x025>;
+defm S_CBRANCH_EXECNZ : SOPP_Real_With_Relaxation_gfx11<0x026>;
+defm S_CBRANCH_CDBGSYS : SOPP_Real_With_Relaxation_gfx11<0x027>;
+defm S_CBRANCH_CDBGUSER : SOPP_Real_With_Relaxation_gfx11<0x028>;
+defm S_CBRANCH_CDBGSYS_OR_USER : SOPP_Real_With_Relaxation_gfx11<0x029>;
+defm S_CBRANCH_CDBGSYS_AND_USER : SOPP_Real_With_Relaxation_gfx11<0x02a>;
+defm S_ENDPGM : SOPP_Real_32_gfx11<0x030, "s_endpgm">;
+defm S_ENDPGM_SAVED : SOPP_Real_32_gfx11<0x031>;
+defm S_WAKEUP : SOPP_Real_32_gfx11<0x034>;
+defm S_SETPRIO : SOPP_Real_32_gfx11<0x035>;
+defm S_SENDMSG : SOPP_Real_32_gfx11<0x036>;
+defm S_SENDMSGHALT : SOPP_Real_32_gfx11<0x037>;
+defm S_INCPERFLEVEL : SOPP_Real_32_gfx11<0x038>;
+defm S_DECPERFLEVEL : SOPP_Real_32_gfx11<0x039>;
+defm S_TTRACEDATA : SOPP_Real_32_gfx11<0x03a>;
+defm S_TTRACEDATA_IMM : SOPP_Real_32_gfx11<0x03b>;
+defm S_ICACHE_INV : SOPP_Real_32_gfx11<0x03c>;
+defm S_BARRIER : SOPP_Real_32_gfx11<0x03d>;
+
+//===----------------------------------------------------------------------===//
// SOPP - GFX6, GFX7, GFX8, GFX9, GFX10
//===----------------------------------------------------------------------===//
@@ -1737,6 +2017,12 @@ multiclass SOPP_Real_32_gfx6_gfx7_gfx8_gfx9<bits<7> op, string real_name = !cast
multiclass SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<bits<7> op, string real_name = !cast<SOPP_Pseudo>(NAME).Mnemonic # " "> :
SOPP_Real_32_gfx6_gfx7_gfx8_gfx9<op, real_name>, SOPP_Real_32_gfx10<op, real_name>;
+multiclass SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10_gfx11<bits<7> op, string real_name = !cast<SOPP_Pseudo>(NAME).Mnemonic # " "> :
+ SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<op, real_name>, SOPP_Real_32_gfx11<op, real_name>;
+
+multiclass SOPP_Real_32_gfx10_gfx11<bits<7> op, string real_name = !cast<SOPP_Pseudo>(NAME).Mnemonic # " "> :
+ SOPP_Real_32_gfx10<op, real_name>, SOPP_Real_32_gfx11<op, real_name>;
+
//64 bit encodings, for Relaxation
multiclass SOPP_Real_64_gfx6_gfx7<bits<7> op, string real_name = !cast<SOPP_Pseudo>(NAME).Mnemonic # " "> {
defvar ps = !cast<SOPP_Pseudo>(NAME);
@@ -1768,13 +2054,16 @@ multiclass SOPP_Real_64_gfx6_gfx7_gfx8_gfx9<bits<7> op, string real_name = !cast
multiclass SOPP_Real_64_gfx6_gfx7_gfx8_gfx9_gfx10<bits<7> op, string real_name = !cast<SOPP_Pseudo>(NAME).Mnemonic # " "> :
SOPP_Real_64_gfx6_gfx7_gfx8_gfx9<op, real_name>, SOPP_Real_64_gfx10<op, real_name>;
+multiclass SOPP_Real_64_gfx6_gfx7_gfx8_gfx9_gfx10_gfx11<bits<7> op, string real_name = !cast<SOPP_Pseudo>(NAME).Mnemonic # " "> :
+ SOPP_Real_64_gfx6_gfx7_gfx8_gfx9_gfx10<op, real_name>, SOPP_Real_64_gfx11<op, real_name>;
+
//relaxation for insts with no operands not implemented
multiclass SOPP_Real_With_Relaxation_gfx6_gfx7_gfx8_gfx9_gfx10<bits<7> op> {
defm "" : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<op>;
defm _pad_s_nop : SOPP_Real_64_gfx6_gfx7_gfx8_gfx9_gfx10<op>;
}
-defm S_NOP : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x000>;
+defm S_NOP : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10_gfx11<0x000>;
defm S_ENDPGM : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x001, "s_endpgm">;
defm S_WAKEUP : SOPP_Real_32_gfx8_gfx9_gfx10<0x003>;
defm S_BARRIER : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x00a>;
@@ -1794,7 +2083,7 @@ defm S_ENDPGM_SAVED : SOPP_Real_32_gfx6_gfx7_gfx8_gfx9_gfx10<0x01B>;
defm S_SET_GPR_IDX_OFF : SOPP_Real_32_gfx8_gfx9<0x01c>;
defm S_SET_GPR_IDX_MODE : SOPP_Real_32_gfx8_gfx9<0x01d>;
defm S_ENDPGM_ORDERED_PS_DONE : SOPP_Real_32_gfx8_gfx9_gfx10<0x01e>;
-defm S_CODE_END : SOPP_Real_32_gfx10<0x01f>;
+defm S_CODE_END : SOPP_Real_32_gfx10_gfx11<0x01f>;
defm S_INST_PREFETCH : SOPP_Real_32_gfx10<0x020>;
defm S_CLAUSE : SOPP_Real_32_gfx10<0x021>;
defm S_WAIT_IDLE : SOPP_Real_32_gfx10<0x022>;
@@ -1818,6 +2107,34 @@ defm S_CBRANCH_CDBGSYS_AND_USER : SOPP_Real_With_Relaxation_gfx6_gfx7_gfx8_gfx9_
}
//===----------------------------------------------------------------------===//
+// SOPC - GFX11
+//===----------------------------------------------------------------------===//
+
+multiclass SOPC_Real_gfx11<bits<7> op> {
+ def _gfx11 : SOPC_Real<op, !cast<SOPC_Pseudo>(NAME)>,
+ Select_gfx11<!cast<SOPC_Pseudo>(NAME).Mnemonic>;
+}
+
+defm S_CMP_EQ_I32 : SOPC_Real_gfx11<0x00>;
+defm S_CMP_LG_I32 : SOPC_Real_gfx11<0x01>;
+defm S_CMP_GT_I32 : SOPC_Real_gfx11<0x02>;
+defm S_CMP_GE_I32 : SOPC_Real_gfx11<0x03>;
+defm S_CMP_LT_I32 : SOPC_Real_gfx11<0x04>;
+defm S_CMP_LE_I32 : SOPC_Real_gfx11<0x05>;
+defm S_CMP_EQ_U32 : SOPC_Real_gfx11<0x06>;
+defm S_CMP_LG_U32 : SOPC_Real_gfx11<0x07>;
+defm S_CMP_GT_U32 : SOPC_Real_gfx11<0x08>;
+defm S_CMP_GE_U32 : SOPC_Real_gfx11<0x09>;
+defm S_CMP_LT_U32 : SOPC_Real_gfx11<0x0a>;
+defm S_CMP_LE_U32 : SOPC_Real_gfx11<0x0b>;
+defm S_BITCMP0_B32 : SOPC_Real_gfx11<0x0c>;
+defm S_BITCMP1_B32 : SOPC_Real_gfx11<0x0d>;
+defm S_BITCMP0_B64 : SOPC_Real_gfx11<0x0e>;
+defm S_BITCMP1_B64 : SOPC_Real_gfx11<0x0f>;
+defm S_CMP_EQ_U64 : SOPC_Real_gfx11<0x10>;
+defm S_CMP_LG_U64 : SOPC_Real_gfx11<0x11>;
+
+//===----------------------------------------------------------------------===//
// SOPC - GFX6, GFX7, GFX8, GFX9, GFX10
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
index 18c348d1cf89..c0fd5bc69325 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
@@ -6,33 +6,64 @@
//
//===----------------------------------------------------------------------===//
#include "AMDGPUAsmUtils.h"
+#include "AMDGPUBaseInfo.h"
#include "SIDefines.h"
-#include "llvm/ADT/StringRef.h"
-
namespace llvm {
namespace AMDGPU {
+
+namespace DepCtr {
+
+// NOLINTBEGIN
+const CustomOperandVal DepCtrInfo[] = {
+ // Name max dflt offset width constraint
+ {{"depctr_hold_cnt"}, 1, 1, 7, 1, isGFX10_BEncoding},
+ {{"depctr_sa_sdst"}, 1, 1, 0, 1},
+ {{"depctr_va_vdst"}, 15, 15, 12, 4},
+ {{"depctr_va_sdst"}, 7, 7, 9, 3},
+ {{"depctr_va_ssrc"}, 1, 1, 8, 1},
+ {{"depctr_va_vcc"}, 1, 1, 1, 1},
+ {{"depctr_vm_vsrc"}, 7, 7, 2, 3},
+};
+// NOLINTEND
+
+const int DEP_CTR_SIZE =
+ static_cast<int>(sizeof(DepCtrInfo) / sizeof(CustomOperandVal));
+
+} // namespace DepCtr
+
namespace SendMsg {
-// This must be in sync with llvm::AMDGPU::SendMsg::Id enum members, see SIDefines.h.
-const char *const IdSymbolic[ID_GAPS_LAST_] = {
- nullptr,
- "MSG_INTERRUPT",
- "MSG_GS",
- "MSG_GS_DONE",
- "MSG_SAVEWAVE",
- "MSG_STALL_WAVE_GEN",
- "MSG_HALT_WAVES",
- "MSG_ORDERED_PS_DONE",
- "MSG_EARLY_PRIM_DEALLOC",
- "MSG_GS_ALLOC_REQ",
- "MSG_GET_DOORBELL",
- "MSG_GET_DDID",
- nullptr,
- nullptr,
- nullptr,
- "MSG_SYSMSG"
+// Disable lint checking for this block since it makes the table unreadable.
+// NOLINTBEGIN
+const CustomOperand<const MCSubtargetInfo &> Msg[] = {
+ {{""}},
+ {{"MSG_INTERRUPT"}, ID_INTERRUPT},
+ {{"MSG_GS"}, ID_GS_PreGFX11, isNotGFX11Plus},
+ {{"MSG_GS_DONE"}, ID_GS_DONE_PreGFX11, isNotGFX11Plus},
+ {{"MSG_SAVEWAVE"}, ID_SAVEWAVE, isGFX8_GFX9_GFX10},
+ {{"MSG_STALL_WAVE_GEN"}, ID_STALL_WAVE_GEN, isGFX9Plus},
+ {{"MSG_HALT_WAVES"}, ID_HALT_WAVES, isGFX9Plus},
+ {{"MSG_ORDERED_PS_DONE"}, ID_ORDERED_PS_DONE, isGFX9Plus},
+ {{"MSG_EARLY_PRIM_DEALLOC"}, ID_EARLY_PRIM_DEALLOC, isGFX9_GFX10},
+ {{"MSG_GS_ALLOC_REQ"}, ID_GS_ALLOC_REQ, isGFX9Plus},
+ {{"MSG_GET_DOORBELL"}, ID_GET_DOORBELL, isGFX9_GFX10},
+ {{"MSG_GET_DDID"}, ID_GET_DDID, isGFX10},
+ {{"MSG_HS_TESSFACTOR"}, ID_HS_TESSFACTOR_GFX11Plus, isGFX11Plus},
+ {{"MSG_DEALLOC_VGPRS"}, ID_DEALLOC_VGPRS_GFX11Plus, isGFX11Plus},
+ {{""}},
+ {{"MSG_SYSMSG"}, ID_SYSMSG},
+ {{"MSG_RTN_GET_DOORBELL"}, ID_RTN_GET_DOORBELL, isGFX11Plus},
+ {{"MSG_RTN_GET_DDID"}, ID_RTN_GET_DDID, isGFX11Plus},
+ {{"MSG_RTN_GET_TMA"}, ID_RTN_GET_TMA, isGFX11Plus},
+ {{"MSG_RTN_GET_REALTIME"}, ID_RTN_GET_REALTIME, isGFX11Plus},
+ {{"MSG_RTN_SAVE_WAVE"}, ID_RTN_SAVE_WAVE, isGFX11Plus},
+ {{"MSG_RTN_GET_TBA"}, ID_RTN_GET_TBA, isGFX11Plus},
};
+// NOLINTEND
+
+const int MSG_SIZE = static_cast<int>(
+ sizeof(Msg) / sizeof(CustomOperand<const MCSubtargetInfo &>));
// These two must be in sync with llvm::AMDGPU::SendMsg::Op enum members, see SIDefines.h.
const char *const OpSysSymbolic[OP_SYS_LAST_] = {
@@ -54,39 +85,54 @@ const char *const OpGsSymbolic[OP_GS_LAST_] = {
namespace Hwreg {
-// This must be in sync with llvm::AMDGPU::Hwreg::ID_SYMBOLIC_FIRST_/LAST_, see SIDefines.h.
-const char* const IdSymbolic[] = {
- nullptr,
- "HW_REG_MODE",
- "HW_REG_STATUS",
- "HW_REG_TRAPSTS",
- "HW_REG_HW_ID",
- "HW_REG_GPR_ALLOC",
- "HW_REG_LDS_ALLOC",
- "HW_REG_IB_STS",
- nullptr,
- nullptr,
- nullptr,
- nullptr,
- nullptr,
- nullptr,
- nullptr,
- "HW_REG_SH_MEM_BASES",
- "HW_REG_TBA_LO",
- "HW_REG_TBA_HI",
- "HW_REG_TMA_LO",
- "HW_REG_TMA_HI",
- "HW_REG_FLAT_SCR_LO",
- "HW_REG_FLAT_SCR_HI",
- "HW_REG_XNACK_MASK",
- "HW_REG_HW_ID1",
- "HW_REG_HW_ID2",
- "HW_REG_POPS_PACKER",
- nullptr,
- nullptr,
- nullptr,
- "HW_REG_SHADER_CYCLES"
+// Disable lint checking for this block since it makes the table unreadable.
+// NOLINTBEGIN
+const CustomOperand<const MCSubtargetInfo &> Opr[] = {
+ {{""}},
+ {{"HW_REG_MODE"}, ID_MODE},
+ {{"HW_REG_STATUS"}, ID_STATUS},
+ {{"HW_REG_TRAPSTS"}, ID_TRAPSTS},
+ {{"HW_REG_HW_ID"}, ID_HW_ID, isNotGFX10Plus},
+ {{"HW_REG_GPR_ALLOC"}, ID_GPR_ALLOC},
+ {{"HW_REG_LDS_ALLOC"}, ID_LDS_ALLOC},
+ {{"HW_REG_IB_STS"}, ID_IB_STS},
+ {{""}},
+ {{""}},
+ {{""}},
+ {{""}},
+ {{""}},
+ {{""}},
+ {{""}},
+ {{"HW_REG_SH_MEM_BASES"}, ID_MEM_BASES, isGFX9Plus},
+ {{"HW_REG_TBA_LO"}, ID_TBA_LO, isGFX9_GFX10},
+ {{"HW_REG_TBA_HI"}, ID_TBA_HI, isGFX9_GFX10},
+ {{"HW_REG_TMA_LO"}, ID_TMA_LO, isGFX9_GFX10},
+ {{"HW_REG_TMA_HI"}, ID_TMA_HI, isGFX9_GFX10},
+ {{"HW_REG_FLAT_SCR_LO"}, ID_FLAT_SCR_LO, isGFX10Plus},
+ {{"HW_REG_FLAT_SCR_HI"}, ID_FLAT_SCR_HI, isGFX10Plus},
+ {{"HW_REG_XNACK_MASK"}, ID_XNACK_MASK, isGFX10Before1030},
+ {{"HW_REG_HW_ID1"}, ID_HW_ID1, isGFX10Plus},
+ {{"HW_REG_HW_ID2"}, ID_HW_ID2, isGFX10Plus},
+ {{"HW_REG_POPS_PACKER"}, ID_POPS_PACKER, isGFX10},
+ {{""}},
+ {{""}},
+ {{""}},
+ {{"HW_REG_SHADER_CYCLES"}, ID_SHADER_CYCLES, isGFX10_BEncoding},
+
+ // GFX940 specific registers
+ {{"HW_REG_XCC_ID"}, ID_XCC_ID, isGFX940},
+ {{"HW_REG_SQ_PERF_SNAPSHOT_DATA"}, ID_SQ_PERF_SNAPSHOT_DATA, isGFX940},
+ {{"HW_REG_SQ_PERF_SNAPSHOT_DATA1"}, ID_SQ_PERF_SNAPSHOT_DATA1, isGFX940},
+ {{"HW_REG_SQ_PERF_SNAPSHOT_PC_LO"}, ID_SQ_PERF_SNAPSHOT_PC_LO, isGFX940},
+ {{"HW_REG_SQ_PERF_SNAPSHOT_PC_HI"}, ID_SQ_PERF_SNAPSHOT_PC_HI, isGFX940},
+
+ // Aliases
+ {{"HW_REG_HW_ID"}, ID_HW_ID1, isGFX10},
};
+// NOLINTEND
+
+const int OPR_SIZE = static_cast<int>(
+ sizeof(Opr) / sizeof(CustomOperand<const MCSubtargetInfo &>));
} // namespace Hwreg
@@ -144,7 +190,7 @@ StringLiteral const NfmtSymbolicVI[] = { // VI and GFX9
"BUF_NUM_FORMAT_FLOAT"
};
-StringLiteral const UfmtSymbolic[] = {
+StringLiteral const UfmtSymbolicGFX10[] = {
"BUF_FMT_INVALID",
"BUF_FMT_8_UNORM",
@@ -238,7 +284,7 @@ StringLiteral const UfmtSymbolic[] = {
"BUF_FMT_32_32_32_32_FLOAT"
};
-unsigned const DfmtNfmt2UFmt[] = {
+unsigned const DfmtNfmt2UFmtGFX10[] = {
DFMT_INVALID | (NFMT_UNORM << NFMT_SHIFT),
DFMT_8 | (NFMT_UNORM << NFMT_SHIFT),
@@ -332,6 +378,166 @@ unsigned const DfmtNfmt2UFmt[] = {
DFMT_32_32_32_32 | (NFMT_FLOAT << NFMT_SHIFT)
};
+StringLiteral const UfmtSymbolicGFX11[] = {
+ "BUF_FMT_INVALID",
+
+ "BUF_FMT_8_UNORM",
+ "BUF_FMT_8_SNORM",
+ "BUF_FMT_8_USCALED",
+ "BUF_FMT_8_SSCALED",
+ "BUF_FMT_8_UINT",
+ "BUF_FMT_8_SINT",
+
+ "BUF_FMT_16_UNORM",
+ "BUF_FMT_16_SNORM",
+ "BUF_FMT_16_USCALED",
+ "BUF_FMT_16_SSCALED",
+ "BUF_FMT_16_UINT",
+ "BUF_FMT_16_SINT",
+ "BUF_FMT_16_FLOAT",
+
+ "BUF_FMT_8_8_UNORM",
+ "BUF_FMT_8_8_SNORM",
+ "BUF_FMT_8_8_USCALED",
+ "BUF_FMT_8_8_SSCALED",
+ "BUF_FMT_8_8_UINT",
+ "BUF_FMT_8_8_SINT",
+
+ "BUF_FMT_32_UINT",
+ "BUF_FMT_32_SINT",
+ "BUF_FMT_32_FLOAT",
+
+ "BUF_FMT_16_16_UNORM",
+ "BUF_FMT_16_16_SNORM",
+ "BUF_FMT_16_16_USCALED",
+ "BUF_FMT_16_16_SSCALED",
+ "BUF_FMT_16_16_UINT",
+ "BUF_FMT_16_16_SINT",
+ "BUF_FMT_16_16_FLOAT",
+
+ "BUF_FMT_10_11_11_FLOAT",
+
+ "BUF_FMT_11_11_10_FLOAT",
+
+ "BUF_FMT_10_10_10_2_UNORM",
+ "BUF_FMT_10_10_10_2_SNORM",
+ "BUF_FMT_10_10_10_2_UINT",
+ "BUF_FMT_10_10_10_2_SINT",
+
+ "BUF_FMT_2_10_10_10_UNORM",
+ "BUF_FMT_2_10_10_10_SNORM",
+ "BUF_FMT_2_10_10_10_USCALED",
+ "BUF_FMT_2_10_10_10_SSCALED",
+ "BUF_FMT_2_10_10_10_UINT",
+ "BUF_FMT_2_10_10_10_SINT",
+
+ "BUF_FMT_8_8_8_8_UNORM",
+ "BUF_FMT_8_8_8_8_SNORM",
+ "BUF_FMT_8_8_8_8_USCALED",
+ "BUF_FMT_8_8_8_8_SSCALED",
+ "BUF_FMT_8_8_8_8_UINT",
+ "BUF_FMT_8_8_8_8_SINT",
+
+ "BUF_FMT_32_32_UINT",
+ "BUF_FMT_32_32_SINT",
+ "BUF_FMT_32_32_FLOAT",
+
+ "BUF_FMT_16_16_16_16_UNORM",
+ "BUF_FMT_16_16_16_16_SNORM",
+ "BUF_FMT_16_16_16_16_USCALED",
+ "BUF_FMT_16_16_16_16_SSCALED",
+ "BUF_FMT_16_16_16_16_UINT",
+ "BUF_FMT_16_16_16_16_SINT",
+ "BUF_FMT_16_16_16_16_FLOAT",
+
+ "BUF_FMT_32_32_32_UINT",
+ "BUF_FMT_32_32_32_SINT",
+ "BUF_FMT_32_32_32_FLOAT",
+ "BUF_FMT_32_32_32_32_UINT",
+ "BUF_FMT_32_32_32_32_SINT",
+ "BUF_FMT_32_32_32_32_FLOAT"
+};
+
+unsigned const DfmtNfmt2UFmtGFX11[] = {
+ DFMT_INVALID | (NFMT_UNORM << NFMT_SHIFT),
+
+ DFMT_8 | (NFMT_UNORM << NFMT_SHIFT),
+ DFMT_8 | (NFMT_SNORM << NFMT_SHIFT),
+ DFMT_8 | (NFMT_USCALED << NFMT_SHIFT),
+ DFMT_8 | (NFMT_SSCALED << NFMT_SHIFT),
+ DFMT_8 | (NFMT_UINT << NFMT_SHIFT),
+ DFMT_8 | (NFMT_SINT << NFMT_SHIFT),
+
+ DFMT_16 | (NFMT_UNORM << NFMT_SHIFT),
+ DFMT_16 | (NFMT_SNORM << NFMT_SHIFT),
+ DFMT_16 | (NFMT_USCALED << NFMT_SHIFT),
+ DFMT_16 | (NFMT_SSCALED << NFMT_SHIFT),
+ DFMT_16 | (NFMT_UINT << NFMT_SHIFT),
+ DFMT_16 | (NFMT_SINT << NFMT_SHIFT),
+ DFMT_16 | (NFMT_FLOAT << NFMT_SHIFT),
+
+ DFMT_8_8 | (NFMT_UNORM << NFMT_SHIFT),
+ DFMT_8_8 | (NFMT_SNORM << NFMT_SHIFT),
+ DFMT_8_8 | (NFMT_USCALED << NFMT_SHIFT),
+ DFMT_8_8 | (NFMT_SSCALED << NFMT_SHIFT),
+ DFMT_8_8 | (NFMT_UINT << NFMT_SHIFT),
+ DFMT_8_8 | (NFMT_SINT << NFMT_SHIFT),
+
+ DFMT_32 | (NFMT_UINT << NFMT_SHIFT),
+ DFMT_32 | (NFMT_SINT << NFMT_SHIFT),
+ DFMT_32 | (NFMT_FLOAT << NFMT_SHIFT),
+
+ DFMT_16_16 | (NFMT_UNORM << NFMT_SHIFT),
+ DFMT_16_16 | (NFMT_SNORM << NFMT_SHIFT),
+ DFMT_16_16 | (NFMT_USCALED << NFMT_SHIFT),
+ DFMT_16_16 | (NFMT_SSCALED << NFMT_SHIFT),
+ DFMT_16_16 | (NFMT_UINT << NFMT_SHIFT),
+ DFMT_16_16 | (NFMT_SINT << NFMT_SHIFT),
+ DFMT_16_16 | (NFMT_FLOAT << NFMT_SHIFT),
+
+ DFMT_10_11_11 | (NFMT_FLOAT << NFMT_SHIFT),
+
+ DFMT_11_11_10 | (NFMT_FLOAT << NFMT_SHIFT),
+
+ DFMT_10_10_10_2 | (NFMT_UNORM << NFMT_SHIFT),
+ DFMT_10_10_10_2 | (NFMT_SNORM << NFMT_SHIFT),
+ DFMT_10_10_10_2 | (NFMT_UINT << NFMT_SHIFT),
+ DFMT_10_10_10_2 | (NFMT_SINT << NFMT_SHIFT),
+
+ DFMT_2_10_10_10 | (NFMT_UNORM << NFMT_SHIFT),
+ DFMT_2_10_10_10 | (NFMT_SNORM << NFMT_SHIFT),
+ DFMT_2_10_10_10 | (NFMT_USCALED << NFMT_SHIFT),
+ DFMT_2_10_10_10 | (NFMT_SSCALED << NFMT_SHIFT),
+ DFMT_2_10_10_10 | (NFMT_UINT << NFMT_SHIFT),
+ DFMT_2_10_10_10 | (NFMT_SINT << NFMT_SHIFT),
+
+ DFMT_8_8_8_8 | (NFMT_UNORM << NFMT_SHIFT),
+ DFMT_8_8_8_8 | (NFMT_SNORM << NFMT_SHIFT),
+ DFMT_8_8_8_8 | (NFMT_USCALED << NFMT_SHIFT),
+ DFMT_8_8_8_8 | (NFMT_SSCALED << NFMT_SHIFT),
+ DFMT_8_8_8_8 | (NFMT_UINT << NFMT_SHIFT),
+ DFMT_8_8_8_8 | (NFMT_SINT << NFMT_SHIFT),
+
+ DFMT_32_32 | (NFMT_UINT << NFMT_SHIFT),
+ DFMT_32_32 | (NFMT_SINT << NFMT_SHIFT),
+ DFMT_32_32 | (NFMT_FLOAT << NFMT_SHIFT),
+
+ DFMT_16_16_16_16 | (NFMT_UNORM << NFMT_SHIFT),
+ DFMT_16_16_16_16 | (NFMT_SNORM << NFMT_SHIFT),
+ DFMT_16_16_16_16 | (NFMT_USCALED << NFMT_SHIFT),
+ DFMT_16_16_16_16 | (NFMT_SSCALED << NFMT_SHIFT),
+ DFMT_16_16_16_16 | (NFMT_UINT << NFMT_SHIFT),
+ DFMT_16_16_16_16 | (NFMT_SINT << NFMT_SHIFT),
+ DFMT_16_16_16_16 | (NFMT_FLOAT << NFMT_SHIFT),
+
+ DFMT_32_32_32 | (NFMT_UINT << NFMT_SHIFT),
+ DFMT_32_32_32 | (NFMT_SINT << NFMT_SHIFT),
+ DFMT_32_32_32 | (NFMT_FLOAT << NFMT_SHIFT),
+ DFMT_32_32_32_32 | (NFMT_UINT << NFMT_SHIFT),
+ DFMT_32_32_32_32 | (NFMT_SINT << NFMT_SHIFT),
+ DFMT_32_32_32_32 | (NFMT_FLOAT << NFMT_SHIFT)
+};
+
} // namespace MTBUFFormat
namespace Swizzle {
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h
index d1deb570a938..054e35e90f2f 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h
@@ -11,15 +11,60 @@
#include "SIDefines.h"
+#include "llvm/ADT/StringRef.h"
+
namespace llvm {
class StringLiteral;
+class MCSubtargetInfo;
namespace AMDGPU {
+const int OPR_ID_UNKNOWN = -1;
+const int OPR_ID_UNSUPPORTED = -2;
+const int OPR_ID_DUPLICATE = -3;
+const int OPR_VAL_INVALID = -4;
+
+template <class T> struct CustomOperand {
+ StringLiteral Name;
+ int Encoding = 0;
+ bool (*Cond)(T Context) = nullptr;
+};
+
+struct CustomOperandVal {
+ StringLiteral Name;
+ unsigned Max;
+ unsigned Default;
+ unsigned Shift;
+ unsigned Width;
+ bool (*Cond)(const MCSubtargetInfo &STI) = nullptr;
+ unsigned Mask = (1 << Width) - 1;
+
+ unsigned decode(unsigned Code) const { return (Code >> Shift) & Mask; }
+
+ unsigned encode(unsigned Val) const { return (Val & Mask) << Shift; }
+
+ unsigned getMask() const { return Mask << Shift; }
+
+ bool isValid(unsigned Val) const { return Val <= Max; }
+
+ bool isSupported(const MCSubtargetInfo &STI) const {
+ return !Cond || Cond(STI);
+ }
+};
+
+namespace DepCtr {
+
+extern const CustomOperandVal DepCtrInfo[];
+extern const int DEP_CTR_SIZE;
+
+} // namespace DepCtr
+
namespace SendMsg { // Symbolic names for the sendmsg(...) syntax.
-extern const char *const IdSymbolic[ID_GAPS_LAST_];
+extern const CustomOperand<const MCSubtargetInfo &> Msg[];
+extern const int MSG_SIZE;
+
extern const char *const OpSysSymbolic[OP_SYS_LAST_];
extern const char *const OpGsSymbolic[OP_GS_LAST_];
@@ -27,7 +72,8 @@ extern const char *const OpGsSymbolic[OP_GS_LAST_];
namespace Hwreg { // Symbolic names for the hwreg(...) syntax.
-extern const char* const IdSymbolic[];
+extern const CustomOperand<const MCSubtargetInfo &> Opr[];
+extern const int OPR_SIZE;
} // namespace Hwreg
@@ -37,8 +83,10 @@ extern StringLiteral const DfmtSymbolic[];
extern StringLiteral const NfmtSymbolicGFX10[];
extern StringLiteral const NfmtSymbolicSICI[];
extern StringLiteral const NfmtSymbolicVI[];
-extern StringLiteral const UfmtSymbolic[];
-extern unsigned const DfmtNfmt2UFmt[];
+extern StringLiteral const UfmtSymbolicGFX10[];
+extern StringLiteral const UfmtSymbolicGFX11[];
+extern unsigned const DfmtNfmt2UFmtGFX10[];
+extern unsigned const DfmtNfmt2UFmtGFX11[];
} // namespace MTBUFFormat
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 683be871ff82..e4ab72f1095b 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -28,10 +28,15 @@
#define GET_INSTRMAP_INFO
#include "AMDGPUGenInstrInfo.inc"
-static llvm::cl::opt<unsigned> AmdhsaCodeObjectVersion(
- "amdhsa-code-object-version", llvm::cl::Hidden,
- llvm::cl::desc("AMDHSA Code Object Version"), llvm::cl::init(4),
- llvm::cl::ZeroOrMore);
+static llvm::cl::opt<unsigned>
+ AmdhsaCodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidden,
+ llvm::cl::desc("AMDHSA Code Object Version"),
+ llvm::cl::init(4));
+
+// TODO-GFX11: Remove this when full 16-bit codegen is implemented.
+static llvm::cl::opt<bool>
+ LimitTo128VGPRs("amdgpu-limit-to-128-vgprs", llvm::cl::Hidden,
+ llvm::cl::desc("Never use more than 128 VGPRs"));
namespace {
@@ -44,9 +49,8 @@ unsigned getBitMask(unsigned Shift, unsigned Width) {
///
/// \returns Packed \p Dst.
unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
- Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width);
- Dst |= (Src << Shift) & getBitMask(Shift, Width);
- return Dst;
+ unsigned Mask = getBitMask(Shift, Width);
+ return ((Src << Shift) & Mask) | (Dst & ~Mask);
}
/// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
@@ -57,30 +61,40 @@ unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
}
/// \returns Vmcnt bit shift (lower bits).
-unsigned getVmcntBitShiftLo() { return 0; }
+unsigned getVmcntBitShiftLo(unsigned VersionMajor) {
+ return VersionMajor >= 11 ? 10 : 0;
+}
/// \returns Vmcnt bit width (lower bits).
-unsigned getVmcntBitWidthLo() { return 4; }
+unsigned getVmcntBitWidthLo(unsigned VersionMajor) {
+ return VersionMajor >= 11 ? 6 : 4;
+}
/// \returns Expcnt bit shift.
-unsigned getExpcntBitShift() { return 4; }
+unsigned getExpcntBitShift(unsigned VersionMajor) {
+ return VersionMajor >= 11 ? 0 : 4;
+}
/// \returns Expcnt bit width.
-unsigned getExpcntBitWidth() { return 3; }
+unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; }
/// \returns Lgkmcnt bit shift.
-unsigned getLgkmcntBitShift() { return 8; }
+unsigned getLgkmcntBitShift(unsigned VersionMajor) {
+ return VersionMajor >= 11 ? 4 : 8;
+}
/// \returns Lgkmcnt bit width.
unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
- return (VersionMajor >= 10) ? 6 : 4;
+ return VersionMajor >= 10 ? 6 : 4;
}
/// \returns Vmcnt bit shift (higher bits).
-unsigned getVmcntBitShiftHi() { return 14; }
+unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; }
/// \returns Vmcnt bit width (higher bits).
-unsigned getVmcntBitWidthHi() { return 2; }
+unsigned getVmcntBitWidthHi(unsigned VersionMajor) {
+ return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0;
+}
} // end namespace anonymous
@@ -136,6 +150,41 @@ bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI) {
isHsaAbiVersion5(STI);
}
+unsigned getAmdhsaCodeObjectVersion() {
+ return AmdhsaCodeObjectVersion;
+}
+
+unsigned getMultigridSyncArgImplicitArgPosition() {
+ switch (AmdhsaCodeObjectVersion) {
+ case 2:
+ case 3:
+ case 4:
+ return 48;
+ case 5:
+ return AMDGPU::ImplicitArg::MULTIGRID_SYNC_ARG_OFFSET;
+ default:
+ llvm_unreachable("Unexpected code object version");
+ return 0;
+ }
+}
+
+
+// FIXME: All such magic numbers about the ABI should be in a
+// central TD file.
+unsigned getHostcallImplicitArgPosition() {
+ switch (AmdhsaCodeObjectVersion) {
+ case 2:
+ case 3:
+ case 4:
+ return 24;
+ case 5:
+ return AMDGPU::ImplicitArg::HOSTCALL_PTR_OFFSET;
+ default:
+ llvm_unreachable("Unexpected code object version");
+ return 0;
+ }
+}
+
#define GET_MIMGBaseOpcodesTable_IMPL
#define GET_MIMGDimInfoTable_IMPL
#define GET_MIMGInfoTable_IMPL
@@ -144,6 +193,7 @@ bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI) {
#define GET_MIMGBiasMappingTable_IMPL
#define GET_MIMGOffsetMappingTable_IMPL
#define GET_MIMGG16MappingTable_IMPL
+#define GET_MAIInstInfoTable_IMPL
#include "AMDGPUGenSearchableTables.inc"
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
@@ -223,6 +273,10 @@ struct VOPInfo {
bool IsSingle;
};
+struct VOPC64DPPInfo {
+ uint16_t Opcode;
+};
+
#define GET_MTBUFInfoTable_DECL
#define GET_MTBUFInfoTable_IMPL
#define GET_MUBUFInfoTable_DECL
@@ -235,6 +289,14 @@ struct VOPInfo {
#define GET_VOP2InfoTable_IMPL
#define GET_VOP3InfoTable_DECL
#define GET_VOP3InfoTable_IMPL
+#define GET_VOPC64DPPTable_DECL
+#define GET_VOPC64DPPTable_IMPL
+#define GET_VOPC64DPP8Table_DECL
+#define GET_VOPC64DPP8Table_IMPL
+#define GET_WMMAOpcode2AddrMappingTable_DECL
+#define GET_WMMAOpcode2AddrMappingTable_IMPL
+#define GET_WMMAOpcode3AddrMappingTable_DECL
+#define GET_WMMAOpcode3AddrMappingTable_IMPL
#include "AMDGPUGenSearchableTables.inc"
int getMTBUFBaseOpcode(unsigned Opc) {
@@ -322,6 +384,30 @@ bool getVOP3IsSingle(unsigned Opc) {
return Info ? Info->IsSingle : false;
}
+bool isVOPC64DPP(unsigned Opc) {
+ return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc);
+}
+
+bool getMAIIsDGEMM(unsigned Opc) {
+ const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
+ return Info ? Info->is_dgemm : false;
+}
+
+bool getMAIIsGFX940XDL(unsigned Opc) {
+ const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
+ return Info ? Info->is_gfx940_xdl : false;
+}
+
+unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) {
+ const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc);
+ return Info ? Info->Opcode3Addr : ~0u;
+}
+
+unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc) {
+ const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opc);
+ return Info ? Info->Opcode2Addr : ~0u;
+}
+
// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
// header files, so we need to wrap it in a function that takes unsigned
// instead.
@@ -740,6 +826,15 @@ unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
}
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) {
+ if (LimitTo128VGPRs.getNumOccurrences() ? LimitTo128VGPRs
+ : isGFX11Plus(*STI)) {
+ // GFX11 changes the encoding of 16-bit operands in VOP1/2/C instructions
+ // such that values 128..255 no longer mean v128..v255, they mean
+ // v0.hi..v127.hi instead. Until the compiler understands this, it is not
+ // safe to use v128..v255.
+ // TODO-GFX11: Remove this when full 16-bit codegen is implemented.
+ return 128;
+ }
if (STI->getFeatureBits().test(FeatureGFX90AInsts))
return 512;
return 256;
@@ -904,16 +999,13 @@ std::pair<int, int> getIntegerPairAttribute(const Function &F,
}
unsigned getVmcntBitMask(const IsaVersion &Version) {
- unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
- if (Version.Major < 9)
- return VmcntLo;
-
- unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo();
- return VmcntLo | VmcntHi;
+ return (1 << (getVmcntBitWidthLo(Version.Major) +
+ getVmcntBitWidthHi(Version.Major))) -
+ 1;
}
unsigned getExpcntBitMask(const IsaVersion &Version) {
- return (1 << getExpcntBitWidth()) - 1;
+ return (1 << getExpcntBitWidth(Version.Major)) - 1;
}
unsigned getLgkmcntBitMask(const IsaVersion &Version) {
@@ -921,36 +1013,32 @@ unsigned getLgkmcntBitMask(const IsaVersion &Version) {
}
unsigned getWaitcntBitMask(const IsaVersion &Version) {
- unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
- unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
- unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(),
+ unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(Version.Major),
+ getVmcntBitWidthLo(Version.Major));
+ unsigned Expcnt = getBitMask(getExpcntBitShift(Version.Major),
+ getExpcntBitWidth(Version.Major));
+ unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(Version.Major),
getLgkmcntBitWidth(Version.Major));
- unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt;
- if (Version.Major < 9)
- return Waitcnt;
-
- unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi());
- return Waitcnt | VmcntHi;
+ unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(Version.Major),
+ getVmcntBitWidthHi(Version.Major));
+ return VmcntLo | Expcnt | Lgkmcnt | VmcntHi;
}
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
- unsigned VmcntLo =
- unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
- if (Version.Major < 9)
- return VmcntLo;
-
- unsigned VmcntHi =
- unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
- VmcntHi <<= getVmcntBitWidthLo();
- return VmcntLo | VmcntHi;
+ unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(Version.Major),
+ getVmcntBitWidthLo(Version.Major));
+ unsigned VmcntHi = unpackBits(Waitcnt, getVmcntBitShiftHi(Version.Major),
+ getVmcntBitWidthHi(Version.Major));
+ return VmcntLo | VmcntHi << getVmcntBitWidthLo(Version.Major);
}
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
- return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
+ return unpackBits(Waitcnt, getExpcntBitShift(Version.Major),
+ getExpcntBitWidth(Version.Major));
}
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
- return unpackBits(Waitcnt, getLgkmcntBitShift(),
+ return unpackBits(Waitcnt, getLgkmcntBitShift(Version.Major),
getLgkmcntBitWidth(Version.Major));
}
@@ -971,24 +1059,23 @@ Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) {
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
unsigned Vmcnt) {
- Waitcnt =
- packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
- if (Version.Major < 9)
- return Waitcnt;
-
- Vmcnt >>= getVmcntBitWidthLo();
- return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
+ Waitcnt = packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(Version.Major),
+ getVmcntBitWidthLo(Version.Major));
+ return packBits(Vmcnt >> getVmcntBitWidthLo(Version.Major), Waitcnt,
+ getVmcntBitShiftHi(Version.Major),
+ getVmcntBitWidthHi(Version.Major));
}
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
unsigned Expcnt) {
- return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
+ return packBits(Expcnt, Waitcnt, getExpcntBitShift(Version.Major),
+ getExpcntBitWidth(Version.Major));
}
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
unsigned Lgkmcnt) {
- return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(),
- getLgkmcntBitWidth(Version.Major));
+ return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(Version.Major),
+ getLgkmcntBitWidth(Version.Major));
}
unsigned encodeWaitcnt(const IsaVersion &Version,
@@ -1005,43 +1092,184 @@ unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
}
//===----------------------------------------------------------------------===//
-// hwreg
+// Custom Operands.
+//
+// A table of custom operands shall describe "primary" operand names
+// first followed by aliases if any. It is not required but recommended
+// to arrange operands so that operand encoding match operand position
+// in the table. This will make disassembly a bit more efficient.
+// Unused slots in the table shall have an empty name.
+//
//===----------------------------------------------------------------------===//
-namespace Hwreg {
-
-int64_t getHwregId(const StringRef Name) {
- for (int Id = ID_SYMBOLIC_FIRST_; Id < ID_SYMBOLIC_LAST_; ++Id) {
- if (IdSymbolic[Id] && Name == IdSymbolic[Id])
- return Id;
+template <class T>
+static bool isValidOpr(int Idx, const CustomOperand<T> OpInfo[], int OpInfoSize,
+ T Context) {
+ return 0 <= Idx && Idx < OpInfoSize && !OpInfo[Idx].Name.empty() &&
+ (!OpInfo[Idx].Cond || OpInfo[Idx].Cond(Context));
+}
+
+template <class T>
+static int getOprIdx(std::function<bool(const CustomOperand<T> &)> Test,
+ const CustomOperand<T> OpInfo[], int OpInfoSize,
+ T Context) {
+ int InvalidIdx = OPR_ID_UNKNOWN;
+ for (int Idx = 0; Idx < OpInfoSize; ++Idx) {
+ if (Test(OpInfo[Idx])) {
+ if (!OpInfo[Idx].Cond || OpInfo[Idx].Cond(Context))
+ return Idx;
+ InvalidIdx = OPR_ID_UNSUPPORTED;
+ }
}
- return ID_UNKNOWN_;
+ return InvalidIdx;
+}
+
+template <class T>
+static int getOprIdx(const StringRef Name, const CustomOperand<T> OpInfo[],
+ int OpInfoSize, T Context) {
+ auto Test = [=](const CustomOperand<T> &Op) { return Op.Name == Name; };
+ return getOprIdx<T>(Test, OpInfo, OpInfoSize, Context);
+}
+
+template <class T>
+static int getOprIdx(int Id, const CustomOperand<T> OpInfo[], int OpInfoSize,
+ T Context, bool QuickCheck = true) {
+ auto Test = [=](const CustomOperand<T> &Op) {
+ return Op.Encoding == Id && !Op.Name.empty();
+ };
+ // This is an optimization that should work in most cases.
+ // As a side effect, it may cause selection of an alias
+ // instead of a primary operand name in case of sparse tables.
+ if (QuickCheck && isValidOpr<T>(Id, OpInfo, OpInfoSize, Context) &&
+ OpInfo[Id].Encoding == Id) {
+ return Id;
+ }
+ return getOprIdx<T>(Test, OpInfo, OpInfoSize, Context);
}
-static unsigned getLastSymbolicHwreg(const MCSubtargetInfo &STI) {
- if (isSI(STI) || isCI(STI) || isVI(STI))
- return ID_SYMBOLIC_FIRST_GFX9_;
- else if (isGFX9(STI))
- return ID_SYMBOLIC_FIRST_GFX10_;
- else if (isGFX10(STI) && !isGFX10_BEncoding(STI))
- return ID_SYMBOLIC_FIRST_GFX1030_;
- else
- return ID_SYMBOLIC_LAST_;
+//===----------------------------------------------------------------------===//
+// Custom Operand Values
+//===----------------------------------------------------------------------===//
+
+static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr,
+ int Size,
+ const MCSubtargetInfo &STI) {
+ unsigned Enc = 0;
+ for (int Idx = 0; Idx < Size; ++Idx) {
+ const auto &Op = Opr[Idx];
+ if (Op.isSupported(STI))
+ Enc |= Op.encode(Op.Default);
+ }
+ return Enc;
+}
+
+static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr,
+ int Size, unsigned Code,
+ bool &HasNonDefaultVal,
+ const MCSubtargetInfo &STI) {
+ unsigned UsedOprMask = 0;
+ HasNonDefaultVal = false;
+ for (int Idx = 0; Idx < Size; ++Idx) {
+ const auto &Op = Opr[Idx];
+ if (!Op.isSupported(STI))
+ continue;
+ UsedOprMask |= Op.getMask();
+ unsigned Val = Op.decode(Code);
+ if (!Op.isValid(Val))
+ return false;
+ HasNonDefaultVal |= (Val != Op.Default);
+ }
+ return (Code & ~UsedOprMask) == 0;
+}
+
+static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size,
+ unsigned Code, int &Idx, StringRef &Name,
+ unsigned &Val, bool &IsDefault,
+ const MCSubtargetInfo &STI) {
+ while (Idx < Size) {
+ const auto &Op = Opr[Idx++];
+ if (Op.isSupported(STI)) {
+ Name = Op.Name;
+ Val = Op.decode(Code);
+ IsDefault = (Val == Op.Default);
+ return true;
+ }
+ }
+
+ return false;
}
-bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI) {
- switch (Id) {
- case ID_HW_ID:
- return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI);
- case ID_HW_ID1:
- case ID_HW_ID2:
- return isGFX10Plus(STI);
- case ID_XNACK_MASK:
- return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI);
- default:
- return ID_SYMBOLIC_FIRST_ <= Id && Id < getLastSymbolicHwreg(STI) &&
- IdSymbolic[Id];
+static int encodeCustomOperandVal(const CustomOperandVal &Op,
+ int64_t InputVal) {
+ if (InputVal < 0 || InputVal > Op.Max)
+ return OPR_VAL_INVALID;
+ return Op.encode(InputVal);
+}
+
+static int encodeCustomOperand(const CustomOperandVal *Opr, int Size,
+ const StringRef Name, int64_t InputVal,
+ unsigned &UsedOprMask,
+ const MCSubtargetInfo &STI) {
+ int InvalidId = OPR_ID_UNKNOWN;
+ for (int Idx = 0; Idx < Size; ++Idx) {
+ const auto &Op = Opr[Idx];
+ if (Op.Name == Name) {
+ if (!Op.isSupported(STI)) {
+ InvalidId = OPR_ID_UNSUPPORTED;
+ continue;
+ }
+ auto OprMask = Op.getMask();
+ if (OprMask & UsedOprMask)
+ return OPR_ID_DUPLICATE;
+ UsedOprMask |= OprMask;
+ return encodeCustomOperandVal(Op, InputVal);
+ }
}
+ return InvalidId;
+}
+
+//===----------------------------------------------------------------------===//
+// DepCtr
+//===----------------------------------------------------------------------===//
+
+namespace DepCtr {
+
+int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI) {
+ static int Default = -1;
+ if (Default == -1)
+ Default = getDefaultCustomOperandEncoding(DepCtrInfo, DEP_CTR_SIZE, STI);
+ return Default;
+}
+
+bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
+ const MCSubtargetInfo &STI) {
+ return isSymbolicCustomOperandEncoding(DepCtrInfo, DEP_CTR_SIZE, Code,
+ HasNonDefaultVal, STI);
+}
+
+bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
+ bool &IsDefault, const MCSubtargetInfo &STI) {
+ return decodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Code, Id, Name, Val,
+ IsDefault, STI);
+}
+
+int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
+ const MCSubtargetInfo &STI) {
+ return encodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Name, Val, UsedOprMask,
+ STI);
+}
+
+} // namespace DepCtr
+
+//===----------------------------------------------------------------------===//
+// hwreg
+//===----------------------------------------------------------------------===//
+
+namespace Hwreg {
+
+int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI) {
+ int Idx = getOprIdx<const MCSubtargetInfo &>(Name, Opr, OPR_SIZE, STI);
+ return (Idx < 0) ? Idx : Opr[Idx].Encoding;
}
bool isValidHwreg(int64_t Id) {
@@ -1063,7 +1291,8 @@ uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width) {
}
StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI) {
- return isValidHwreg(Id, STI) ? IdSymbolic[Id] : "";
+ int Idx = getOprIdx<const MCSubtargetInfo &>(Id, Opr, OPR_SIZE, STI);
+ return (Idx < 0) ? "" : Opr[Idx].Name;
}
void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width) {
@@ -1087,12 +1316,13 @@ struct ExpTgt {
};
static constexpr ExpTgt ExpTgtInfo[] = {
- {{"null"}, ET_NULL, ET_NULL_MAX_IDX},
- {{"mrtz"}, ET_MRTZ, ET_MRTZ_MAX_IDX},
- {{"prim"}, ET_PRIM, ET_PRIM_MAX_IDX},
- {{"mrt"}, ET_MRT0, ET_MRT_MAX_IDX},
- {{"pos"}, ET_POS0, ET_POS_MAX_IDX},
- {{"param"}, ET_PARAM0, ET_PARAM_MAX_IDX},
+ {{"null"}, ET_NULL, ET_NULL_MAX_IDX},
+ {{"mrtz"}, ET_MRTZ, ET_MRTZ_MAX_IDX},
+ {{"prim"}, ET_PRIM, ET_PRIM_MAX_IDX},
+ {{"mrt"}, ET_MRT0, ET_MRT_MAX_IDX},
+ {{"pos"}, ET_POS0, ET_POS_MAX_IDX},
+ {{"dual_src_blend"}, ET_DUAL_SRC_BLEND0, ET_DUAL_SRC_BLEND_MAX_IDX},
+ {{"param"}, ET_PARAM0, ET_PARAM_MAX_IDX},
};
bool getTgtName(unsigned Id, StringRef &Name, int &Index) {
@@ -1130,7 +1360,20 @@ unsigned getTgtId(const StringRef Name) {
}
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) {
- return (Id != ET_POS4 && Id != ET_PRIM) || isGFX10Plus(STI);
+ switch (Id) {
+ case ET_NULL:
+ return !isGFX11Plus(STI);
+ case ET_POS4:
+ case ET_PRIM:
+ return isGFX10Plus(STI);
+ case ET_DUAL_SRC_BLEND0:
+ case ET_DUAL_SRC_BLEND1:
+ return isGFX11Plus(STI);
+ default:
+ if (Id >= ET_PARAM0 && Id <= ET_PARAM31)
+ return !isGFX11Plus(STI);
+ return true;
+ }
}
} // namespace Exp
@@ -1196,27 +1439,44 @@ void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) {
Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK;
}
-int64_t getUnifiedFormat(const StringRef Name) {
- for (int Id = UFMT_FIRST; Id <= UFMT_LAST; ++Id) {
- if (Name == UfmtSymbolic[Id])
- return Id;
+int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI) {
+ if (isGFX11Plus(STI)) {
+ for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
+ if (Name == UfmtSymbolicGFX11[Id])
+ return Id;
+ }
+ } else {
+ for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
+ if (Name == UfmtSymbolicGFX10[Id])
+ return Id;
+ }
}
return UFMT_UNDEF;
}
-StringRef getUnifiedFormatName(unsigned Id) {
- return isValidUnifiedFormat(Id) ? UfmtSymbolic[Id] : "";
+StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI) {
+ if(isValidUnifiedFormat(Id, STI))
+ return isGFX10(STI) ? UfmtSymbolicGFX10[Id] : UfmtSymbolicGFX11[Id];
+ return "";
}
-bool isValidUnifiedFormat(unsigned Id) {
- return Id <= UFMT_LAST;
+bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI) {
+ return isGFX10(STI) ? Id <= UfmtGFX10::UFMT_LAST : Id <= UfmtGFX11::UFMT_LAST;
}
-int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt) {
+int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
+ const MCSubtargetInfo &STI) {
int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt);
- for (int Id = UFMT_FIRST; Id <= UFMT_LAST; ++Id) {
- if (Fmt == DfmtNfmt2UFmt[Id])
- return Id;
+ if (isGFX11Plus(STI)) {
+ for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
+ if (Fmt == DfmtNfmt2UFmtGFX11[Id])
+ return Id;
+ }
+ } else {
+ for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
+ if (Fmt == DfmtNfmt2UFmtGFX10[Id])
+ return Id;
+ }
}
return UFMT_UNDEF;
}
@@ -1239,40 +1499,22 @@ unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI) {
namespace SendMsg {
-int64_t getMsgId(const StringRef Name) {
- for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
- if (IdSymbolic[i] && Name == IdSymbolic[i])
- return i;
- }
- return ID_UNKNOWN_;
+static uint64_t getMsgIdMask(const MCSubtargetInfo &STI) {
+ return isGFX11Plus(STI) ? ID_MASK_GFX11Plus_ : ID_MASK_PreGFX11_;
}
-bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict) {
- if (Strict) {
- switch (MsgId) {
- case ID_SAVEWAVE:
- return isVI(STI) || isGFX9Plus(STI);
- case ID_STALL_WAVE_GEN:
- case ID_HALT_WAVES:
- case ID_ORDERED_PS_DONE:
- case ID_GS_ALLOC_REQ:
- case ID_GET_DOORBELL:
- return isGFX9Plus(STI);
- case ID_EARLY_PRIM_DEALLOC:
- return isGFX9(STI);
- case ID_GET_DDID:
- return isGFX10Plus(STI);
- default:
- return 0 <= MsgId && MsgId < ID_GAPS_LAST_ && IdSymbolic[MsgId];
- }
- } else {
- return 0 <= MsgId && isUInt<ID_WIDTH_>(MsgId);
- }
+int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI) {
+ int Idx = getOprIdx<const MCSubtargetInfo &>(Name, Msg, MSG_SIZE, STI);
+ return (Idx < 0) ? Idx : Msg[Idx].Encoding;
}
-StringRef getMsgName(int64_t MsgId) {
- assert(0 <= MsgId && MsgId < ID_GAPS_LAST_);
- return IdSymbolic[MsgId];
+bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI) {
+ return (MsgId & ~(getMsgIdMask(STI))) == 0;
+}
+
+StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI) {
+ int Idx = getOprIdx<const MCSubtargetInfo &>(MsgId, Msg, MSG_SIZE, STI);
+ return (Idx < 0) ? "" : Msg[Idx].Name;
}
int64_t getMsgOpId(int64_t MsgId, const StringRef Name) {
@@ -1289,26 +1531,27 @@ int64_t getMsgOpId(int64_t MsgId, const StringRef Name) {
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
bool Strict) {
- assert(isValidMsgId(MsgId, STI, Strict));
+ assert(isValidMsgId(MsgId, STI));
if (!Strict)
return 0 <= OpId && isUInt<OP_WIDTH_>(OpId);
- switch(MsgId)
- {
- case ID_GS:
- return (OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_) && OpId != OP_GS_NOP;
- case ID_GS_DONE:
- return OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_;
- case ID_SYSMSG:
+ if (MsgId == ID_SYSMSG)
return OP_SYS_FIRST_ <= OpId && OpId < OP_SYS_LAST_;
- default:
- return OpId == OP_NONE_;
+ if (!isGFX11Plus(STI)) {
+ switch (MsgId) {
+ case ID_GS_PreGFX11:
+ return (OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_) && OpId != OP_GS_NOP;
+ case ID_GS_DONE_PreGFX11:
+ return OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_;
+ }
}
+ return OpId == OP_NONE_;
}
-StringRef getMsgOpName(int64_t MsgId, int64_t OpId) {
- assert(msgRequiresOp(MsgId));
+StringRef getMsgOpName(int64_t MsgId, int64_t OpId,
+ const MCSubtargetInfo &STI) {
+ assert(msgRequiresOp(MsgId, STI));
return (MsgId == ID_SYSMSG)? OpSysSymbolic[OpId] : OpGsSymbolic[OpId];
}
@@ -1319,42 +1562,48 @@ bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
if (!Strict)
return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId);
- switch(MsgId)
- {
- case ID_GS:
- return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_;
- case ID_GS_DONE:
- return (OpId == OP_GS_NOP)?
- (StreamId == STREAM_ID_NONE_) :
- (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_);
- default:
- return StreamId == STREAM_ID_NONE_;
+ if (!isGFX11Plus(STI)) {
+ switch (MsgId) {
+ case ID_GS_PreGFX11:
+ return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_;
+ case ID_GS_DONE_PreGFX11:
+ return (OpId == OP_GS_NOP) ?
+ (StreamId == STREAM_ID_NONE_) :
+ (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_);
+ }
}
+ return StreamId == STREAM_ID_NONE_;
}
-bool msgRequiresOp(int64_t MsgId) {
- return MsgId == ID_GS || MsgId == ID_GS_DONE || MsgId == ID_SYSMSG;
+bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) {
+ return MsgId == ID_SYSMSG ||
+ (!isGFX11Plus(STI) &&
+ (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11));
}
-bool msgSupportsStream(int64_t MsgId, int64_t OpId) {
- return (MsgId == ID_GS || MsgId == ID_GS_DONE) && OpId != OP_GS_NOP;
+bool msgSupportsStream(int64_t MsgId, int64_t OpId,
+ const MCSubtargetInfo &STI) {
+ return !isGFX11Plus(STI) &&
+ (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) &&
+ OpId != OP_GS_NOP;
}
-void decodeMsg(unsigned Val,
- uint16_t &MsgId,
- uint16_t &OpId,
- uint16_t &StreamId) {
- MsgId = Val & ID_MASK_;
- OpId = (Val & OP_MASK_) >> OP_SHIFT_;
- StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_;
+void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
+ uint16_t &StreamId, const MCSubtargetInfo &STI) {
+ MsgId = Val & getMsgIdMask(STI);
+ if (isGFX11Plus(STI)) {
+ OpId = 0;
+ StreamId = 0;
+ } else {
+ OpId = (Val & OP_MASK_) >> OP_SHIFT_;
+ StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_;
+ }
}
uint64_t encodeMsg(uint64_t MsgId,
uint64_t OpId,
uint64_t StreamId) {
- return (MsgId << ID_SHIFT_) |
- (OpId << OP_SHIFT_) |
- (StreamId << STREAM_ID_SHIFT_);
+ return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_);
}
} // namespace SendMsg
@@ -1427,6 +1676,10 @@ bool isModuleEntryFunctionCC(CallingConv::ID CC) {
}
}
+bool isKernelCC(const Function *Func) {
+ return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv());
+}
+
bool hasXNACK(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureXNACK];
}
@@ -1448,7 +1701,8 @@ bool hasG16(const MCSubtargetInfo &STI) {
}
bool hasPackedD16(const MCSubtargetInfo &STI) {
- return !STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem];
+ return !STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem] && !isCI(STI) &&
+ !isSI(STI);
}
bool isSI(const MCSubtargetInfo &STI) {
@@ -1467,6 +1721,18 @@ bool isGFX9(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
}
+bool isGFX9_GFX10(const MCSubtargetInfo &STI) {
+ return isGFX9(STI) || isGFX10(STI);
+}
+
+bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI) {
+ return isVI(STI) || isGFX9(STI) || isGFX10(STI);
+}
+
+bool isGFX8Plus(const MCSubtargetInfo &STI) {
+ return isVI(STI) || isGFX9Plus(STI);
+}
+
bool isGFX9Plus(const MCSubtargetInfo &STI) {
return isGFX9(STI) || isGFX10Plus(STI);
}
@@ -1475,7 +1741,29 @@ bool isGFX10(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureGFX10];
}
-bool isGFX10Plus(const MCSubtargetInfo &STI) { return isGFX10(STI); }
+bool isGFX10Plus(const MCSubtargetInfo &STI) {
+ return isGFX10(STI) || isGFX11Plus(STI);
+}
+
+bool isGFX11(const MCSubtargetInfo &STI) {
+ return STI.getFeatureBits()[AMDGPU::FeatureGFX11];
+}
+
+bool isGFX11Plus(const MCSubtargetInfo &STI) {
+ return isGFX11(STI);
+}
+
+bool isNotGFX11Plus(const MCSubtargetInfo &STI) {
+ return !isGFX11Plus(STI);
+}
+
+bool isNotGFX10Plus(const MCSubtargetInfo &STI) {
+ return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI);
+}
+
+bool isGFX10Before1030(const MCSubtargetInfo &STI) {
+ return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI);
+}
bool isGCN3Encoding(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding];
@@ -1497,10 +1785,29 @@ bool isGFX90A(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts];
}
+bool isGFX940(const MCSubtargetInfo &STI) {
+ return STI.getFeatureBits()[AMDGPU::FeatureGFX940Insts];
+}
+
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
}
+bool hasMAIInsts(const MCSubtargetInfo &STI) {
+ return STI.getFeatureBits()[AMDGPU::FeatureMAIInsts];
+}
+
+bool hasVOPD(const MCSubtargetInfo &STI) {
+ return STI.getFeatureBits()[AMDGPU::FeatureVOPD];
+}
+
+int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR,
+ int32_t ArgNumVGPR) {
+ if (has90AInsts && ArgNumAGPR)
+ return alignTo(ArgNumVGPR, 4) + ArgNumAGPR;
+ return std::max(ArgNumVGPR, ArgNumAGPR);
+}
+
bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
const unsigned FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0);
@@ -1508,13 +1815,6 @@ bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
Reg == AMDGPU::SCC;
}
-bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {
- for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) {
- if (*R == Reg1) return true;
- }
- return false;
-}
-
#define MAP_REG2REG \
using namespace AMDGPU; \
switch(Reg) { \
@@ -1554,6 +1854,9 @@ bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {
CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
+ CASE_GFXPRE11_GFX11PLUS(M0) \
+ CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \
+ CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \
}
#define CASE_CI_VI(node) \
@@ -1563,6 +1866,12 @@ bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {
#define CASE_VI_GFX9PLUS(node) \
case node: return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;
+#define CASE_GFXPRE11_GFX11PLUS(node) \
+ case node: return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11;
+
+#define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \
+ case node: return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11;
+
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
if (STI.getTargetTriple().getArch() == Triple::r600)
return Reg;
@@ -1571,9 +1880,13 @@ unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
#undef CASE_CI_VI
#undef CASE_VI_GFX9PLUS
+#undef CASE_GFXPRE11_GFX11PLUS
+#undef CASE_GFXPRE11_GFX11PLUS_TO
#define CASE_CI_VI(node) case node##_ci: case node##_vi: return node;
#define CASE_VI_GFX9PLUS(node) case node##_vi: case node##_gfx9plus: return node;
+#define CASE_GFXPRE11_GFX11PLUS(node) case node##_gfx11plus: case node##_gfxpre11: return node;
+#define CASE_GFXPRE11_GFX11PLUS_TO(node, result)
unsigned mc2PseudoReg(unsigned Reg) {
MAP_REG2REG
@@ -1581,6 +1894,8 @@ unsigned mc2PseudoReg(unsigned Reg) {
#undef CASE_CI_VI
#undef CASE_VI_GFX9PLUS
+#undef CASE_GFXPRE11_GFX11PLUS
+#undef CASE_GFXPRE11_GFX11PLUS_TO
#undef MAP_REG2REG
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
@@ -1934,7 +2249,7 @@ Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
}
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST, bool Signed) {
- // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
+ // Address offset is 12-bit signed for GFX10, 13-bit for GFX9 and GFX11+.
if (AMDGPU::isGFX10(ST))
return Signed ? 12 : 11;
@@ -2029,7 +2344,8 @@ const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
#define GET_SourcesOfDivergence_IMPL
#define GET_Gfx9BufferFormat_IMPL
-#define GET_Gfx10PlusBufferFormat_IMPL
+#define GET_Gfx10BufferFormat_IMPL
+#define GET_Gfx11PlusBufferFormat_IMPL
#include "AMDGPUGenSearchableTables.inc"
} // end anonymous namespace
@@ -2042,16 +2358,20 @@ const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
uint8_t NumComponents,
uint8_t NumFormat,
const MCSubtargetInfo &STI) {
- return isGFX10Plus(STI)
- ? getGfx10PlusBufferFormatInfo(BitsPerComp, NumComponents,
+ return isGFX11Plus(STI)
+ ? getGfx11PlusBufferFormatInfo(BitsPerComp, NumComponents,
NumFormat)
- : getGfx9BufferFormatInfo(BitsPerComp, NumComponents, NumFormat);
+ : isGFX10(STI) ? getGfx10BufferFormatInfo(BitsPerComp,
+ NumComponents, NumFormat)
+ : getGfx9BufferFormatInfo(BitsPerComp,
+ NumComponents, NumFormat);
}
const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
const MCSubtargetInfo &STI) {
- return isGFX10Plus(STI) ? getGfx10PlusBufferFormatInfo(Format)
- : getGfx9BufferFormatInfo(Format);
+ return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format)
+ : isGFX10(STI) ? getGfx10BufferFormatInfo(Format)
+ : getGfx9BufferFormatInfo(Format);
}
} // namespace AMDGPU
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 4516b511f3c8..dffeec10a14a 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -50,10 +50,19 @@ bool isHsaAbiVersion4(const MCSubtargetInfo *STI);
/// \returns True if HSA OS ABI Version identification is 5,
/// false otherwise.
bool isHsaAbiVersion5(const MCSubtargetInfo *STI);
-/// \returns True if HSA OS ABI Version identification is 3 or 4,
+/// \returns True if HSA OS ABI Version identification is 3 and above,
/// false otherwise.
bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI);
+/// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr
+unsigned getMultigridSyncArgImplicitArgPosition();
+
+/// \returns The offset of the hostcall pointer argument from implicitarg_ptr
+unsigned getHostcallImplicitArgPosition();
+
+/// \returns Code object version.
+unsigned getAmdhsaCodeObjectVersion();
+
struct GcnBufferFormatInfo {
unsigned Format;
unsigned BitsPerComp;
@@ -62,12 +71,19 @@ struct GcnBufferFormatInfo {
unsigned DataFormat;
};
+struct MAIInstInfo {
+ uint16_t Opcode;
+ bool is_dgemm;
+ bool is_gfx940_xdl;
+};
+
#define GET_MIMGBaseOpcode_DECL
#define GET_MIMGDim_DECL
#define GET_MIMGEncoding_DECL
#define GET_MIMGLZMapping_DECL
#define GET_MIMGMIPMapping_DECL
#define GET_MIMGBiASMapping_DECL
+#define GET_MAIInstInfoTable_DECL
#include "AMDGPUGenSearchableTables.inc"
namespace IsaInfo {
@@ -352,6 +368,11 @@ struct MIMGG16MappingInfo {
LLVM_READONLY
const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
+struct WMMAOpcodeMappingInfo {
+ unsigned Opcode2Addr;
+ unsigned Opcode3Addr;
+};
+
LLVM_READONLY
const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP);
@@ -382,6 +403,7 @@ struct MIMGInfo {
uint8_t MIMGEncoding;
uint8_t VDataDwords;
uint8_t VAddrDwords;
+ uint8_t VAddrOperands;
};
LLVM_READONLY
@@ -439,6 +461,16 @@ LLVM_READONLY
bool getVOP3IsSingle(unsigned Opc);
LLVM_READONLY
+bool isVOPC64DPP(unsigned Opc);
+
+/// Returns true if MAI operation is a double precision GEMM.
+LLVM_READONLY
+bool getMAIIsDGEMM(unsigned Opc);
+
+LLVM_READONLY
+bool getMAIIsGFX940XDL(unsigned Opc);
+
+LLVM_READONLY
const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
uint8_t NumComponents,
uint8_t NumFormat,
@@ -450,6 +482,12 @@ const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
LLVM_READONLY
int getMCOpcode(uint16_t Opcode, unsigned Gen);
+LLVM_READONLY
+unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc);
+
+LLVM_READONLY
+unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc);
+
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
const MCSubtargetInfo *STI);
@@ -496,7 +534,7 @@ struct Waitcnt {
unsigned LgkmCnt = ~0u;
unsigned VsCnt = ~0u;
- Waitcnt() {}
+ Waitcnt() = default;
Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
: VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {}
@@ -555,11 +593,14 @@ unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
/// \p Lgkmcnt respectively.
///
/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
-/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9 only)
-/// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only)
-/// \p Expcnt = \p Waitcnt[6:4]
-/// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10 only)
-/// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10+ only)
+/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9)
+/// \p Vmcnt = \p Waitcnt[15:14,3:0] (gfx9,10)
+/// \p Vmcnt = \p Waitcnt[15:10] (gfx11+)
+/// \p Expcnt = \p Waitcnt[6:4] (pre-gfx11)
+/// \p Expcnt = \p Waitcnt[2:0] (gfx11+)
+/// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10)
+/// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10)
+/// \p Lgkmcnt = \p Waitcnt[9:4] (gfx11+)
void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
@@ -581,12 +622,15 @@ unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
/// \p Version.
///
/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
-/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9 only)
-/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9+ only)
-/// Waitcnt[6:4] = \p Expcnt
-/// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10 only)
-/// Waitcnt[13:8] = \p Lgkmcnt (gfx10+ only)
-/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9+ only)
+/// Waitcnt[2:0] = \p Expcnt (gfx11+)
+/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9)
+/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9,10)
+/// Waitcnt[6:4] = \p Expcnt (pre-gfx11)
+/// Waitcnt[9:4] = \p Lgkmcnt (gfx11+)
+/// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10)
+/// Waitcnt[13:8] = \p Lgkmcnt (gfx10)
+/// Waitcnt[15:10] = \p Vmcnt (gfx11+)
+/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9,10)
///
/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
/// isa \p Version.
@@ -598,10 +642,7 @@ unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
namespace Hwreg {
LLVM_READONLY
-int64_t getHwregId(const StringRef Name);
-
-LLVM_READNONE
-bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI);
+int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI);
LLVM_READNONE
bool isValidHwreg(int64_t Id);
@@ -622,6 +663,18 @@ void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width);
} // namespace Hwreg
+namespace DepCtr {
+
+int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI);
+int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
+ const MCSubtargetInfo &STI);
+bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
+ const MCSubtargetInfo &STI);
+bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
+ bool &IsDefault, const MCSubtargetInfo &STI);
+
+} // namespace DepCtr
+
namespace Exp {
bool getTgtName(unsigned Id, StringRef &Name, int &Index);
@@ -653,13 +706,14 @@ bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI);
bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI);
-int64_t getUnifiedFormat(const StringRef Name);
+int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI);
-StringRef getUnifiedFormatName(unsigned Id);
+StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI);
-bool isValidUnifiedFormat(unsigned Val);
+bool isValidUnifiedFormat(unsigned Val, const MCSubtargetInfo &STI);
-int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt);
+int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
+ const MCSubtargetInfo &STI);
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI);
@@ -670,19 +724,19 @@ unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI);
namespace SendMsg {
LLVM_READONLY
-int64_t getMsgId(const StringRef Name);
+int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI);
LLVM_READONLY
int64_t getMsgOpId(int64_t MsgId, const StringRef Name);
LLVM_READNONE
-StringRef getMsgName(int64_t MsgId);
+StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI);
LLVM_READNONE
-StringRef getMsgOpName(int64_t MsgId, int64_t OpId);
+StringRef getMsgOpName(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI);
LLVM_READNONE
-bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict = true);
+bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI);
LLVM_READNONE
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
@@ -693,15 +747,13 @@ bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
const MCSubtargetInfo &STI, bool Strict = true);
LLVM_READNONE
-bool msgRequiresOp(int64_t MsgId);
+bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI);
LLVM_READNONE
-bool msgSupportsStream(int64_t MsgId, int64_t OpId);
+bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI);
-void decodeMsg(unsigned Val,
- uint16_t &MsgId,
- uint16_t &OpId,
- uint16_t &StreamId);
+void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
+ uint16_t &StreamId, const MCSubtargetInfo &STI);
LLVM_READNONE
uint64_t encodeMsg(uint64_t MsgId,
@@ -738,6 +790,8 @@ bool isEntryFunctionCC(CallingConv::ID CC);
LLVM_READNONE
bool isModuleEntryFunctionCC(CallingConv::ID CC);
+bool isKernelCC(const Function *Func);
+
// FIXME: Remove this when calling conventions cleaned up
LLVM_READNONE
inline bool isKernel(CallingConv::ID CC) {
@@ -761,22 +815,31 @@ bool isSI(const MCSubtargetInfo &STI);
bool isCI(const MCSubtargetInfo &STI);
bool isVI(const MCSubtargetInfo &STI);
bool isGFX9(const MCSubtargetInfo &STI);
+bool isGFX9_GFX10(const MCSubtargetInfo &STI);
+bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI);
+bool isGFX8Plus(const MCSubtargetInfo &STI);
bool isGFX9Plus(const MCSubtargetInfo &STI);
bool isGFX10(const MCSubtargetInfo &STI);
bool isGFX10Plus(const MCSubtargetInfo &STI);
+bool isNotGFX10Plus(const MCSubtargetInfo &STI);
+bool isGFX10Before1030(const MCSubtargetInfo &STI);
+bool isGFX11(const MCSubtargetInfo &STI);
+bool isGFX11Plus(const MCSubtargetInfo &STI);
+bool isNotGFX11Plus(const MCSubtargetInfo &STI);
bool isGCN3Encoding(const MCSubtargetInfo &STI);
bool isGFX10_AEncoding(const MCSubtargetInfo &STI);
bool isGFX10_BEncoding(const MCSubtargetInfo &STI);
bool hasGFX10_3Insts(const MCSubtargetInfo &STI);
bool isGFX90A(const MCSubtargetInfo &STI);
+bool isGFX940(const MCSubtargetInfo &STI);
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI);
+bool hasMAIInsts(const MCSubtargetInfo &STI);
+bool hasVOPD(const MCSubtargetInfo &STI);
+int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR);
/// Is Reg - scalar register
bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
-/// Is there any intersection between registers
-bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI);
-
/// If \p Reg is a pseudo reg, return the correct hardware register given
/// \p STI otherwise return \p Reg.
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
@@ -931,7 +994,7 @@ inline bool isLegal64BitDPPControl(unsigned DC) {
/// \returns true if the intrinsic is divergent
bool isIntrinsicSourceOfDivergence(unsigned IntrID);
-// Track defaults for fields in the MODE registser.
+// Track defaults for fields in the MODE register.
struct SIModeRegisterDefaults {
/// Floating point opcodes that support exception flag gathering quiet and
/// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h
deleted file mode 100644
index 83ef68cc3f60..000000000000
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h
+++ /dev/null
@@ -1,38 +0,0 @@
-//===- AMDGPULDSUtils.h - LDS related helper functions -*- C++ -*----------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// AMDGPU LDS related helper utility functions.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULDSUTILS_H
-#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULDSUTILS_H
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/IR/Constants.h"
-
-namespace llvm {
-
-class ConstantExpr;
-
-namespace AMDGPU {
-
-bool isKernelCC(const Function *Func);
-
-Align getAlign(DataLayout const &DL, const GlobalVariable *GV);
-
-std::vector<GlobalVariable *> findVariablesToLower(Module &M,
- const Function *F = nullptr);
-
-/// Replace all uses of constant \p C with instructions in \p F.
-void replaceConstantUsesInFunction(ConstantExpr *C, const Function *F);
-} // end namespace AMDGPU
-
-} // end namespace llvm
-
-#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULDSUTILS_H
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
index a83ff6667956..83d7cbdb183c 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
@@ -1,33 +1,32 @@
-//===- AMDGPULDSUtils.cpp -------------------------------------------------===//
+//===-- AMDGPUMemoryUtils.cpp - -------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// AMDGPU LDS related helper utility functions.
-//
-//===----------------------------------------------------------------------===//
-#include "AMDGPULDSUtils.h"
+#include "AMDGPUMemoryUtils.h"
#include "AMDGPU.h"
-#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/ADT/DepthFirstIterator.h"
+#include "AMDGPUBaseInfo.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/IR/Constants.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/ReplaceConstant.h"
+#define DEBUG_TYPE "amdgpu-memory-utils"
+
using namespace llvm;
namespace llvm {
namespace AMDGPU {
-bool isKernelCC(const Function *Func) {
- return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv());
-}
-
Align getAlign(DataLayout const &DL, const GlobalVariable *GV) {
return DL.getValueOrABITypeAlignment(GV->getPointerAlignment(DL),
GV->getValueType());
@@ -139,6 +138,83 @@ std::vector<GlobalVariable *> findVariablesToLower(Module &M,
return LocalVars;
}
+bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) {
+ Instruction *DefInst = Def->getMemoryInst();
+
+ if (isa<FenceInst>(DefInst))
+ return false;
+
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::amdgcn_s_barrier:
+ case Intrinsic::amdgcn_wave_barrier:
+ case Intrinsic::amdgcn_sched_barrier:
+ return false;
+ default:
+ break;
+ }
+ }
+
+ // Ignore atomics not aliasing with the original load, any atomic is a
+ // universal MemoryDef from MSSA's point of view too, just like a fence.
+ const auto checkNoAlias = [AA, Ptr](auto I) -> bool {
+ return I && AA->isNoAlias(I->getPointerOperand(), Ptr);
+ };
+
+ if (checkNoAlias(dyn_cast<AtomicCmpXchgInst>(DefInst)) ||
+ checkNoAlias(dyn_cast<AtomicRMWInst>(DefInst)))
+ return false;
+
+ return true;
+}
+
+bool isClobberedInFunction(const LoadInst *Load, MemorySSA *MSSA,
+ AAResults *AA) {
+ MemorySSAWalker *Walker = MSSA->getWalker();
+ SmallVector<MemoryAccess *> WorkList{Walker->getClobberingMemoryAccess(Load)};
+ SmallSet<MemoryAccess *, 8> Visited;
+ MemoryLocation Loc(MemoryLocation::get(Load));
+
+ LLVM_DEBUG(dbgs() << "Checking clobbering of: " << *Load << '\n');
+
+ // Start with a nearest dominating clobbering access, it will be either
+ // live on entry (nothing to do, load is not clobbered), MemoryDef, or
+ // MemoryPhi if several MemoryDefs can define this memory state. In that
+ // case add all Defs to WorkList and continue going up and checking all
+ // the definitions of this memory location until the root. When all the
+ // defs are exhausted and came to the entry state we have no clobber.
+ // Along the scan ignore barriers and fences which are considered clobbers
+ // by the MemorySSA, but not really writing anything into the memory.
+ while (!WorkList.empty()) {
+ MemoryAccess *MA = WorkList.pop_back_val();
+ if (!Visited.insert(MA).second)
+ continue;
+
+ if (MSSA->isLiveOnEntryDef(MA))
+ continue;
+
+ if (MemoryDef *Def = dyn_cast<MemoryDef>(MA)) {
+ LLVM_DEBUG(dbgs() << " Def: " << *Def->getMemoryInst() << '\n');
+
+ if (isReallyAClobber(Load->getPointerOperand(), Def, AA)) {
+ LLVM_DEBUG(dbgs() << " -> load is clobbered\n");
+ return true;
+ }
+
+ WorkList.push_back(
+ Walker->getClobberingMemoryAccess(Def->getDefiningAccess(), Loc));
+ continue;
+ }
+
+ const MemoryPhi *Phi = cast<MemoryPhi>(MA);
+ for (auto &Use : Phi->incoming_values())
+ WorkList.push_back(cast<MemoryAccess>(&Use));
+ }
+
+ LLVM_DEBUG(dbgs() << " -> no clobber\n");
+ return false;
+}
+
} // end namespace AMDGPU
} // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
new file mode 100644
index 000000000000..65ed02ca62de
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
@@ -0,0 +1,51 @@
+//===- AMDGPUMemoryUtils.h - Memory related helper functions -*- C++ -*----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUMEMORYUTILS_H
+#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUMEMORYUTILS_H
+
+#include <vector>
+
+namespace llvm {
+
+struct Align;
+class AAResults;
+class ConstantExpr;
+class DataLayout;
+class Function;
+class GlobalVariable;
+class LoadInst;
+class MemoryDef;
+class MemorySSA;
+class Module;
+class Value;
+
+namespace AMDGPU {
+
+Align getAlign(DataLayout const &DL, const GlobalVariable *GV);
+
+std::vector<GlobalVariable *> findVariablesToLower(Module &M,
+ const Function *F = nullptr);
+
+/// Replace all uses of constant \p C with instructions in \p F.
+void replaceConstantUsesInFunction(ConstantExpr *C, const Function *F);
+
+/// Given a \p Def clobbering a load from \p Ptr according to the MSSA check
+/// if this is actually a memory update or an artificial clobber to facilitate
+/// ordering constraints.
+bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA);
+
+/// Check is a \p Load is clobbered in its function.
+bool isClobberedInFunction(const LoadInst *Load, MemorySSA *MSSA,
+ AAResults *AA);
+
+} // end namespace AMDGPU
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUMEMORYUTILS_H
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
index f6b5975f1934..4ad93f7b0b68 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
@@ -209,6 +209,11 @@ void AMDGPUPALMetadata::setNumUsedVgprs(CallingConv::ID CC, unsigned Val) {
getHwStage(CC)[".vgpr_count"] = MsgPackDoc.getNode(Val);
}
+// Set the number of used agprs in the metadata.
+void AMDGPUPALMetadata::setNumUsedAgprs(CallingConv::ID CC, unsigned Val) {
+ getHwStage(CC)[".agpr_count"] = Val;
+}
+
// Set the number of used sgprs in the metadata. This is an optional advisory
// record for logging etc; wave dispatch actually uses the rsrc1 register for
// the shader stage to determine the number of sgprs to allocate.
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
index 7fdd9a8429c1..a45a799e38a9 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
@@ -69,6 +69,10 @@ public:
// the shader stage to determine the number of vgprs to allocate.
void setNumUsedVgprs(unsigned CC, unsigned Val);
+ // Set the number of used agprs in the metadata. This is an optional advisory
+ // record for logging etc;
+ void setNumUsedAgprs(unsigned CC, unsigned Val);
+
// Set the number of used sgprs in the metadata. This is an optional advisory
// record for logging etc; wave dispatch actually uses the rsrc1 register for
// the shader stage to determine the number of sgprs to allocate.
diff --git a/llvm/lib/Target/AMDGPU/VIInstrFormats.td b/llvm/lib/Target/AMDGPU/VIInstrFormats.td
index bd65a495fa72..7393ef6c2a2d 100644
--- a/llvm/lib/Target/AMDGPU/VIInstrFormats.td
+++ b/llvm/lib/Target/AMDGPU/VIInstrFormats.td
@@ -10,7 +10,7 @@
//
//===----------------------------------------------------------------------===//
-class EXPe_vi : EXPe {
+class EXPe_vi : EXPe_ComprVM {
let Inst{31-26} = 0x31; //encoding
}
diff --git a/llvm/lib/Target/AMDGPU/VINTERPInstructions.td b/llvm/lib/Target/AMDGPU/VINTERPInstructions.td
new file mode 100644
index 000000000000..c63fbbc241d9
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/VINTERPInstructions.td
@@ -0,0 +1,180 @@
+//===-- VINTERPInstructions.td - VINTERP Instruction Definitions ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// VINTERP encoding
+//===----------------------------------------------------------------------===//
+
+class VINTERPe_gfx11 <bits<7> op, VOPProfile P> : Enc64 {
+ bits<8> vdst;
+ bits<4> src0_modifiers;
+ bits<9> src0;
+ bits<3> src1_modifiers;
+ bits<9> src1;
+ bits<3> src2_modifiers;
+ bits<9> src2;
+ bits<1> clamp;
+ bits<3> waitexp;
+
+ let Inst{31-26} = 0x33; // VOP3P encoding
+ let Inst{25-24} = 0x1; // VINTERP sub-encoding
+ let Inst{23} = 0; // reserved
+
+ let Inst{7-0} = vdst;
+ let Inst{10-8} = waitexp;
+ let Inst{11} = !if(P.HasOpSel, src0_modifiers{2}, 0); // op_sel(0)
+ let Inst{12} = !if(P.HasOpSel, src1_modifiers{2}, 0); // op_sel(1)
+ let Inst{13} = !if(P.HasOpSel, src2_modifiers{2}, 0); // op_sel(2)
+ let Inst{14} = !if(P.HasOpSel, src0_modifiers{3}, 0); // op_sel(3)
+ let Inst{15} = clamp;
+ let Inst{22-16} = op;
+ let Inst{40-32} = src0;
+ let Inst{49-41} = src1;
+ let Inst{58-50} = src2;
+ let Inst{61} = src0_modifiers{0}; // neg(0)
+ let Inst{62} = src1_modifiers{0}; // neg(1)
+ let Inst{63} = src2_modifiers{0}; // neg(2)
+}
+
+//===----------------------------------------------------------------------===//
+// VOP3 VINTERP
+//===----------------------------------------------------------------------===//
+
+class VINTERP_Pseudo <string OpName, VOPProfile P, list<dag> pattern = []> :
+ VOP3_Pseudo<OpName, P, pattern, 0, 0> {
+ let AsmMatchConverter = "cvtVINTERP";
+ let mayRaiseFPException = 0;
+
+ let VOP3_OPSEL = 1;
+ let VINTERP = 1;
+}
+
+class VINTERP_Real <VOP_Pseudo ps, int EncodingFamily> :
+ VOP3_Real <ps, EncodingFamily> {
+ let VINTERP = 1;
+}
+
+def VOP3_VINTERP_F32 : VOPProfile<[f32, f32, f32, f32]> {
+ let HasOpSel = 0;
+ let HasModifiers = 1;
+
+ let Outs64 = (outs VGPR_32:$vdst);
+ let Ins64 = (ins Src0Mod:$src0_modifiers, VRegSrc_32:$src0,
+ Src1Mod:$src1_modifiers, VRegSrc_32:$src1,
+ Src2Mod:$src2_modifiers, VRegSrc_32:$src2,
+ clampmod:$clamp,
+ wait_exp:$waitexp);
+
+ let Asm64 = " $vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$waitexp";
+}
+
+class VOP3_VINTERP_F16 <list<ValueType> ArgVT> : VOPProfile<ArgVT> {
+ let HasOpSel = 1;
+ let HasModifiers = 1;
+
+ let Outs64 = (outs VGPR_32:$vdst);
+ let Ins64 = (ins Src0Mod:$src0_modifiers, VRegSrc_32:$src0,
+ Src1Mod:$src1_modifiers, VRegSrc_32:$src1,
+ Src2Mod:$src2_modifiers, VRegSrc_32:$src2,
+ clampmod:$clamp, op_sel0:$op_sel,
+ wait_exp:$waitexp);
+
+ let Asm64 = " $vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$op_sel$waitexp";
+}
+
+//===----------------------------------------------------------------------===//
+// VINTERP Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+let SubtargetPredicate = isGFX11Plus in {
+
+let Uses = [M0, EXEC, MODE] in {
+def V_INTERP_P10_F32_inreg : VINTERP_Pseudo <"v_interp_p10_f32", VOP3_VINTERP_F32>;
+def V_INTERP_P2_F32_inreg : VINTERP_Pseudo <"v_interp_p2_f32", VOP3_VINTERP_F32>;
+def V_INTERP_P10_F16_F32_inreg :
+ VINTERP_Pseudo <"v_interp_p10_f16_f32", VOP3_VINTERP_F16<[f32, f32, f32, f32]>>;
+def V_INTERP_P2_F16_F32_inreg :
+ VINTERP_Pseudo <"v_interp_p2_f16_f32", VOP3_VINTERP_F16<[f16, f32, f32, f32]>>;
+} // Uses = [M0, EXEC, MODE]
+
+let Uses = [M0, EXEC] in {
+def V_INTERP_P10_RTZ_F16_F32_inreg :
+ VINTERP_Pseudo <"v_interp_p10_rtz_f16_f32", VOP3_VINTERP_F16<[f32, f32, f32, f32]>>;
+def V_INTERP_P2_RTZ_F16_F32_inreg :
+ VINTERP_Pseudo <"v_interp_p2_rtz_f16_f32", VOP3_VINTERP_F16<[f16, f32, f32, f32]>>;
+} // Uses = [M0, EXEC]
+
+} // SubtargetPredicate = isGFX11Plus
+
+class VInterpF32Pat <SDPatternOperator op, Instruction inst> : GCNPat <
+ (f32 (op
+ (VINTERPMods f32:$src0, i32:$src0_modifiers),
+ (VINTERPMods f32:$src1, i32:$src1_modifiers),
+ (VINTERPMods f32:$src2, i32:$src2_modifiers))),
+ (inst $src0_modifiers, $src0,
+ $src1_modifiers, $src1,
+ $src2_modifiers, $src2,
+ 0, /* clamp */
+ 7) /* wait_exp */
+>;
+
+def VINTERP_OPSEL {
+ int LOW = 0;
+ int HIGH = 0xa;
+}
+
+class VInterpF16Pat <SDPatternOperator op, Instruction inst,
+ ValueType dst_type, bit high,
+ list<ComplexPattern> pat> : GCNPat <
+ (dst_type (op
+ (pat[0] f32:$src0, i32:$src0_modifiers),
+ (pat[1] f32:$src1, i32:$src1_modifiers),
+ (pat[2] f32:$src2, i32:$src2_modifiers),
+ !if(high, (i1 -1), (i1 0)))),
+ (inst $src0_modifiers, $src0,
+ $src1_modifiers, $src1,
+ $src2_modifiers, $src2,
+ 0, /* clamp */
+ /* op_sel = 0 */
+ 7) /* wait_exp */
+>;
+
+multiclass VInterpF16Pat <SDPatternOperator op, Instruction inst,
+ ValueType dst_type, list<ComplexPattern> high_pat> {
+ def : VInterpF16Pat<op, inst, dst_type, 0,
+ [VINTERPMods, VINTERPMods, VINTERPMods]>;
+ def : VInterpF16Pat<op, inst, dst_type, 1, high_pat>;
+}
+
+def : VInterpF32Pat<int_amdgcn_interp_inreg_p10, V_INTERP_P10_F32_inreg>;
+def : VInterpF32Pat<int_amdgcn_interp_inreg_p2, V_INTERP_P2_F32_inreg>;
+defm : VInterpF16Pat<int_amdgcn_interp_inreg_p10_f16,
+ V_INTERP_P10_F16_F32_inreg, f32,
+ [VINTERPModsHi, VINTERPMods, VINTERPModsHi]>;
+defm : VInterpF16Pat<int_amdgcn_interp_inreg_p2_f16,
+ V_INTERP_P2_F16_F32_inreg, f16,
+ [VINTERPModsHi, VINTERPMods, VINTERPMods]>;
+
+//===----------------------------------------------------------------------===//
+// VINTERP Real Instructions
+//===----------------------------------------------------------------------===//
+
+let AssemblerPredicate = isGFX11Plus, DecoderNamespace = "GFX11" in {
+ multiclass VINTERP_Real_gfx11 <bits<7> op> {
+ def _gfx11 :
+ VINTERP_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.GFX11>,
+ VINTERPe_gfx11<op, !cast<VOP3_Pseudo>(NAME).Pfl>;
+ }
+}
+
+defm V_INTERP_P10_F32_inreg : VINTERP_Real_gfx11<0x000>;
+defm V_INTERP_P2_F32_inreg : VINTERP_Real_gfx11<0x001>;
+defm V_INTERP_P10_F16_F32_inreg : VINTERP_Real_gfx11<0x002>;
+defm V_INTERP_P2_F16_F32_inreg : VINTERP_Real_gfx11<0x003>;
+defm V_INTERP_P10_RTZ_F16_F32_inreg : VINTERP_Real_gfx11<0x004>;
+defm V_INTERP_P2_RTZ_F16_F32_inreg : VINTERP_Real_gfx11<0x005>;
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 48548d8b6722..1d374a9f90ba 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -59,9 +59,9 @@ class VOP1_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP1On
let AsmVariantName = AMDGPUAsmVariants.Default;
}
-class VOP1_Real <VOP1_Pseudo ps, int EncodingFamily> :
+class VOP1_Real <VOP1_Pseudo ps, int EncodingFamily, string real_name = ps.Mnemonic > :
VOP_Real <ps>,
- InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
+ InstSI <ps.OutOperandList, ps.InOperandList, real_name # ps.AsmOperands, []>,
SIMCInstr <ps.PseudoInstr, EncodingFamily> {
let VALU = 1;
@@ -110,13 +110,18 @@ class getVOP1Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies {
}
multiclass VOP1Inst <string opName, VOPProfile P,
- SDPatternOperator node = null_frag> {
+ SDPatternOperator node = null_frag, int VOPDOp = -1> {
// We only want to set this on the basic, non-SDWA or DPP forms.
- defvar should_mov_imm = !eq(opName, "v_mov_b32");
+ defvar should_mov_imm = !or(!eq(opName, "v_mov_b32"),
+ !eq(opName, "v_mov_b64"));
let isMoveImm = should_mov_imm in {
- def _e32 : VOP1_Pseudo <opName, P>;
- def _e64 : VOP3_Pseudo <opName, P, getVOP1Pat64<node, P>.ret>;
+ if !eq(VOPDOp, -1) then
+ def _e32 : VOP1_Pseudo <opName, P>;
+ else
+ // Only for V_MOV_B32
+ def _e32 : VOP1_Pseudo <opName, P>, VOPD_Component<VOPDOp, "v_mov_b32">;
+ def _e64 : VOP3InstBase <opName, P, node>;
}
foreach _ = BoolToList<P.HasExtSDWA>.ret in
@@ -125,6 +130,11 @@ multiclass VOP1Inst <string opName, VOPProfile P,
foreach _ = BoolToList<P.HasExtDPP>.ret in
def _dpp : VOP1_DPP_Pseudo <opName, P>;
+ let SubtargetPredicate = isGFX11Plus in {
+ foreach _ = BoolToList<P.HasExtVOP3DPP>.ret in
+ def _e64_dpp : VOP3_DPP_Pseudo <opName, P>;
+ } // End SubtargetPredicate = isGFX11Plus
+
def : MnemonicAlias<opName#"_e32", opName>, LetDummies;
def : MnemonicAlias<opName#"_e64", opName>, LetDummies;
@@ -141,7 +151,9 @@ class VOPProfileI2F<ValueType dstVt, ValueType srcVt> :
VOPProfile<[dstVt, srcVt, untyped, untyped]> {
let Ins64 = (ins Src0RC64:$src0, clampmod:$clamp, omod:$omod);
+ let InsVOP3Base = (ins Src0DPP:$src0, clampmod:$clamp, omod:$omod);
let Asm64 = "$vdst, $src0$clamp$omod";
+ let AsmVOP3DPPBase = Asm64;
let HasModifiers = 0;
let HasClamp = 1;
@@ -151,6 +163,12 @@ def VOP1_F64_I32 : VOPProfileI2F <f64, i32>;
def VOP1_F32_I32 : VOPProfileI2F <f32, i32>;
def VOP1_F16_I16 : VOPProfileI2F <f16, i16>;
+def VOP_NOP_PROFILE : VOPProfile <[untyped, untyped, untyped, untyped]>{
+ let HasExtVOP3DPP = 0;
+}
+
+// OMod clears exceptions when set. OMod was always an operand, but its
+// now explicitly set.
class VOP_SPECIAL_OMOD_PROF<ValueType dstVt, ValueType srcVt> :
VOPProfile<[dstVt, srcVt, untyped, untyped]> {
@@ -165,11 +183,21 @@ def VOP_I16_F16_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i16, f16>;
//===----------------------------------------------------------------------===//
let VOPAsmPrefer32Bit = 1 in {
-defm V_NOP : VOP1Inst <"v_nop", VOP_NONE>;
+defm V_NOP : VOP1Inst <"v_nop", VOP_NOP_PROFILE>;
+}
+
+def VOPProfile_MOV : VOPProfile <[i32, i32, untyped, untyped]> {
+ let InsVOPDX = (ins Src0RC32:$src0X);
+ let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X);
+ let InsVOPDY = (ins Src0RC32:$src0Y);
+ let InsVOPDYDeferred = (ins VSrc_f32_Deferred:$src0Y);
}
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
-defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOP_I32_I32>;
+defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOPProfile_MOV, null_frag, 0x8>;
+
+let SubtargetPredicate = isGFX940Plus in
+defm V_MOV_B64 : VOP1Inst <"v_mov_b64", VOP_I64_I64>;
} // End isMoveImm = 1
// FIXME: Specify SchedRW for READFIRSTLANE_B32
@@ -282,7 +310,7 @@ defm V_COS_F32 : VOP1Inst <"v_cos_f32", VOP_F32_F32, AMDGPUcos>;
} // End TRANS = 1, SchedRW = [WriteTrans32]
defm V_NOT_B32 : VOP1Inst <"v_not_b32", VOP_I32_I32>;
-defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32, bitreverse>;
+defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32, DivergentUnaryFrag<bitreverse>>;
defm V_FFBH_U32 : VOP1Inst <"v_ffbh_u32", VOP_I32_I32, AMDGPUffbh_u32>;
defm V_FFBL_B32 : VOP1Inst <"v_ffbl_b32", VOP_I32_I32, AMDGPUffbl_b32>;
defm V_FFBH_I32 : VOP1Inst <"v_ffbh_i32", VOP_I32_I32, AMDGPUffbh_i32>;
@@ -472,7 +500,7 @@ let SubtargetPredicate = isGFX9Only in {
} // End SubtargetPredicate = isGFX9Only
let SubtargetPredicate = isGFX10Plus in {
- defm V_PIPEFLUSH : VOP1Inst<"v_pipeflush", VOP_NONE>;
+ defm V_PIPEFLUSH : VOP1Inst<"v_pipeflush", VOP_NO_EXT<VOP_NONE>>;
let Uses = [M0] in {
defm V_MOVRELSD_2_B32 :
@@ -498,6 +526,17 @@ def V_ACCVGPR_MOV_B32 : VOP1_Pseudo<"v_accvgpr_mov_b32", VOPProfileAccMov, [], 1
let isAsCheapAsAMove = 1;
}
+let SubtargetPredicate = isGFX11Plus in {
+ // Restrict src0 to be VGPR
+ def V_PERMLANE64_B32 : VOP1_Pseudo<"v_permlane64_b32", VOP_MOVRELS,
+ getVOP1Pat64<int_amdgcn_permlane64,
+ VOP_MOVRELS>.ret,
+ /*VOP1Only=*/ 1>;
+ defm V_NOT_B16 : VOP1Inst<"v_not_b16", VOP_I16_I16>;
+ defm V_CVT_I32_I16 : VOP1Inst<"v_cvt_i32_i16", VOP_I32_I16>;
+ defm V_CVT_U32_U16 : VOP1Inst<"v_cvt_u32_u16", VOP_I16_I16>;
+} // End SubtargetPredicate = isGFX11Plus
+
//===----------------------------------------------------------------------===//
// Target-specific instruction encodings.
//===----------------------------------------------------------------------===//
@@ -517,9 +556,9 @@ class VOP1_DPP<bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile p = ps.Pfl, bit isDPP1
let Inst{31-25} = 0x3f;
}
-class VOP1_DPP16<bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile p = ps.Pfl> :
+class VOP1_DPP16<bits<8> op, VOP1_DPP_Pseudo ps, int subtarget, VOPProfile p = ps.Pfl> :
VOP1_DPP<op, ps, p, 1>,
- SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX10> {
+ SIMCInstr <ps.PseudoInstr, subtarget> {
let AssemblerPredicate = HasDPP16;
let SubtargetPredicate = HasDPP16;
}
@@ -539,10 +578,112 @@ class VOP1_DPP8<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> :
}
//===----------------------------------------------------------------------===//
+// GFX11.
+//===----------------------------------------------------------------------===//
+
+let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in {
+ multiclass VOP1Only_Real_gfx11<bits<9> op> {
+ let IsSingle = 1 in
+ def _gfx11 :
+ VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.GFX11>,
+ VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>;
+ }
+ multiclass VOP1_Real_e32_gfx11<bits<9> op, string opName = NAME> {
+ defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
+ def _e32_gfx11 :
+ VOP1_Real<ps, SIEncodingFamily.GFX11>,
+ VOP1e<op{7-0}, ps.Pfl>;
+ }
+ multiclass VOP1_Real_e32_with_name_gfx11<bits<9> op, string opName,
+ string asmName> {
+ defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
+ let AsmString = asmName # ps.AsmOperands in {
+ defm NAME : VOP1_Real_e32_gfx11<op, opName>,
+ MnemonicAlias<ps.Mnemonic, asmName>, Requires<[isGFX11Plus]>;
+ }
+ }
+ multiclass VOP1_Real_e64_gfx11<bits<9> op> {
+ def _e64_gfx11 :
+ VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX11>,
+ VOP3e_gfx11<{0, 1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
+ }
+ multiclass VOP1_Real_dpp_gfx11<bits<9> op, string opName = NAME> {
+ defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
+ def _dpp_gfx11 : VOP1_DPP16<op{7-0}, !cast<VOP1_DPP_Pseudo>(opName#"_dpp"), SIEncodingFamily.GFX11> {
+ let DecoderNamespace = "DPPGFX11";
+ }
+ }
+ multiclass VOP1_Real_dpp_with_name_gfx11<bits<9> op, string opName,
+ string asmName> {
+ defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
+ let AsmString = asmName # ps.Pfl.AsmDPP16, DecoderNamespace = "DPPGFX11" in {
+ defm NAME : VOP1_Real_dpp_gfx11<op, opName>,
+ MnemonicAlias<ps.Mnemonic, asmName>, Requires<[isGFX11Plus]>;
+ }
+ }
+ multiclass VOP1_Real_dpp8_gfx11<bits<9> op, string opName = NAME> {
+ defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
+ def _dpp8_gfx11 : VOP1_DPP8<op{7-0}, ps> {
+ let DecoderNamespace = "DPP8GFX11";
+ }
+ }
+ multiclass VOP1_Real_dpp8_with_name_gfx11<bits<9> op, string opName,
+ string asmName> {
+ defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
+ let AsmString = asmName # ps.Pfl.AsmDPP8, DecoderNamespace = "DPP8GFX11" in {
+ defm NAME : VOP1_Real_dpp8_gfx11<op, opName>,
+ MnemonicAlias<ps.Mnemonic, asmName>, Requires<[isGFX11Plus]>;
+ }
+ }
+} // End AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11"
+
+multiclass VOP1_Realtriple_e64_gfx11<bits<9> op> {
+ defm NAME : VOP3_Realtriple_gfx11<{0, 1, 1, op{6-0}}, /*isSingle=*/ 0, NAME>;
+}
+multiclass VOP1_Realtriple_e64_with_name_gfx11<bits<9> op, string opName,
+ string asmName> {
+ defm NAME : VOP3_Realtriple_with_name_gfx11<{0, 1, 1, op{6-0}}, opName,
+ asmName>;
+}
+
+multiclass VOP1_Real_FULL_gfx11<bits<9> op> :
+ VOP1_Real_e32_gfx11<op>, VOP1_Realtriple_e64_gfx11<op>,
+ VOP1_Real_dpp_gfx11<op>, VOP1_Real_dpp8_gfx11<op>;
+
+multiclass VOP1_Real_NO_VOP3_with_name_gfx11<bits<9> op, string opName,
+ string asmName> :
+ VOP1_Real_e32_with_name_gfx11<op, opName, asmName>,
+ VOP1_Real_dpp_with_name_gfx11<op, opName, asmName>,
+ VOP1_Real_dpp8_with_name_gfx11<op, opName, asmName>;
+
+multiclass VOP1_Real_FULL_with_name_gfx11<bits<9> op, string opName,
+ string asmName> :
+ VOP1_Real_NO_VOP3_with_name_gfx11<op, opName, asmName>,
+ VOP1_Realtriple_e64_with_name_gfx11<op, opName, asmName>;
+
+multiclass VOP1_Real_NO_DPP_gfx11<bits<9> op> :
+ VOP1_Real_e32_gfx11<op>, VOP1_Real_e64_gfx11<op>;
+
+defm V_CVT_NEAREST_I32_F32 : VOP1_Real_FULL_with_name_gfx11<0x00c,
+ "V_CVT_RPI_I32_F32", "v_cvt_nearest_i32_f32">;
+defm V_CVT_FLOOR_I32_F32 : VOP1_Real_FULL_with_name_gfx11<0x00d,
+ "V_CVT_FLR_I32_F32", "v_cvt_floor_i32_f32">;
+defm V_CLZ_I32_U32 : VOP1_Real_FULL_with_name_gfx11<0x039,
+ "V_FFBH_U32", "v_clz_i32_u32">;
+defm V_CTZ_I32_B32 : VOP1_Real_FULL_with_name_gfx11<0x03a,
+ "V_FFBL_B32", "v_ctz_i32_b32">;
+defm V_CLS_I32 : VOP1_Real_FULL_with_name_gfx11<0x03b,
+ "V_FFBH_I32", "v_cls_i32">;
+defm V_PERMLANE64_B32 : VOP1Only_Real_gfx11<0x067>;
+defm V_NOT_B16 : VOP1_Real_FULL_gfx11<0x069>;
+defm V_CVT_I32_I16 : VOP1_Real_FULL_gfx11<0x06a>;
+defm V_CVT_U32_U16 : VOP1_Real_FULL_gfx11<0x06b>;
+
+//===----------------------------------------------------------------------===//
// GFX10.
//===----------------------------------------------------------------------===//
-let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
+let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
multiclass VOP1Only_Real_gfx10<bits<9> op> {
def _gfx10 :
VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.GFX10>,
@@ -567,50 +708,59 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
}
}
multiclass VOP1_Real_dpp_gfx10<bits<9> op> {
- foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
- def _dpp_gfx10 : VOP1_DPP16<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")> {
+ foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP>.ret in
+ def _dpp_gfx10 : VOP1_DPP16<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX10> {
let DecoderNamespace = "SDWA10";
}
}
multiclass VOP1_Real_dpp8_gfx10<bits<9> op> {
- foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
+ foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP>.ret in
def _dpp8_gfx10 : VOP1_DPP8<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")> {
let DecoderNamespace = "DPP8";
}
}
-} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
+} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10"
multiclass VOP1_Real_gfx10<bits<9> op> :
VOP1_Real_e32_gfx10<op>, VOP1_Real_e64_gfx10<op>,
VOP1_Real_sdwa_gfx10<op>, VOP1_Real_dpp_gfx10<op>,
VOP1_Real_dpp8_gfx10<op>;
-defm V_PIPEFLUSH : VOP1_Real_gfx10<0x01b>;
-defm V_MOVRELSD_2_B32 : VOP1_Real_gfx10<0x048>;
-defm V_CVT_F16_U16 : VOP1_Real_gfx10<0x050>;
-defm V_CVT_F16_I16 : VOP1_Real_gfx10<0x051>;
-defm V_CVT_U16_F16 : VOP1_Real_gfx10<0x052>;
-defm V_CVT_I16_F16 : VOP1_Real_gfx10<0x053>;
-defm V_RCP_F16 : VOP1_Real_gfx10<0x054>;
-defm V_SQRT_F16 : VOP1_Real_gfx10<0x055>;
-defm V_RSQ_F16 : VOP1_Real_gfx10<0x056>;
-defm V_LOG_F16 : VOP1_Real_gfx10<0x057>;
-defm V_EXP_F16 : VOP1_Real_gfx10<0x058>;
-defm V_FREXP_MANT_F16 : VOP1_Real_gfx10<0x059>;
-defm V_FREXP_EXP_I16_F16 : VOP1_Real_gfx10<0x05a>;
-defm V_FLOOR_F16 : VOP1_Real_gfx10<0x05b>;
-defm V_CEIL_F16 : VOP1_Real_gfx10<0x05c>;
-defm V_TRUNC_F16 : VOP1_Real_gfx10<0x05d>;
-defm V_RNDNE_F16 : VOP1_Real_gfx10<0x05e>;
-defm V_FRACT_F16 : VOP1_Real_gfx10<0x05f>;
-defm V_SIN_F16 : VOP1_Real_gfx10<0x060>;
-defm V_COS_F16 : VOP1_Real_gfx10<0x061>;
-defm V_SAT_PK_U8_I16 : VOP1_Real_gfx10<0x062>;
-defm V_CVT_NORM_I16_F16 : VOP1_Real_gfx10<0x063>;
-defm V_CVT_NORM_U16_F16 : VOP1_Real_gfx10<0x064>;
-
-defm V_SWAP_B32 : VOP1Only_Real_gfx10<0x065>;
-defm V_SWAPREL_B32 : VOP1Only_Real_gfx10<0x068>;
+multiclass VOP1_Real_gfx10_FULL_gfx11<bits<9> op> :
+ VOP1_Real_gfx10<op>, VOP1_Real_FULL_gfx11<op>;
+
+multiclass VOP1_Real_gfx10_NO_DPP_gfx11<bits<9> op> :
+ VOP1_Real_gfx10<op>, VOP1_Real_NO_DPP_gfx11<op>;
+
+multiclass VOP1Only_Real_gfx10_gfx11<bits<9> op> :
+ VOP1Only_Real_gfx10<op>, VOP1Only_Real_gfx11<op>;
+
+defm V_PIPEFLUSH : VOP1_Real_gfx10_NO_DPP_gfx11<0x01b>;
+defm V_MOVRELSD_2_B32 : VOP1_Real_gfx10_FULL_gfx11<0x048>;
+defm V_CVT_F16_U16 : VOP1_Real_gfx10_FULL_gfx11<0x050>;
+defm V_CVT_F16_I16 : VOP1_Real_gfx10_FULL_gfx11<0x051>;
+defm V_CVT_U16_F16 : VOP1_Real_gfx10_FULL_gfx11<0x052>;
+defm V_CVT_I16_F16 : VOP1_Real_gfx10_FULL_gfx11<0x053>;
+defm V_RCP_F16 : VOP1_Real_gfx10_FULL_gfx11<0x054>;
+defm V_SQRT_F16 : VOP1_Real_gfx10_FULL_gfx11<0x055>;
+defm V_RSQ_F16 : VOP1_Real_gfx10_FULL_gfx11<0x056>;
+defm V_LOG_F16 : VOP1_Real_gfx10_FULL_gfx11<0x057>;
+defm V_EXP_F16 : VOP1_Real_gfx10_FULL_gfx11<0x058>;
+defm V_FREXP_MANT_F16 : VOP1_Real_gfx10_FULL_gfx11<0x059>;
+defm V_FREXP_EXP_I16_F16 : VOP1_Real_gfx10_FULL_gfx11<0x05a>;
+defm V_FLOOR_F16 : VOP1_Real_gfx10_FULL_gfx11<0x05b>;
+defm V_CEIL_F16 : VOP1_Real_gfx10_FULL_gfx11<0x05c>;
+defm V_TRUNC_F16 : VOP1_Real_gfx10_FULL_gfx11<0x05d>;
+defm V_RNDNE_F16 : VOP1_Real_gfx10_FULL_gfx11<0x05e>;
+defm V_FRACT_F16 : VOP1_Real_gfx10_FULL_gfx11<0x05f>;
+defm V_SIN_F16 : VOP1_Real_gfx10_FULL_gfx11<0x060>;
+defm V_COS_F16 : VOP1_Real_gfx10_FULL_gfx11<0x061>;
+defm V_SAT_PK_U8_I16 : VOP1_Real_gfx10_FULL_gfx11<0x062>;
+defm V_CVT_NORM_I16_F16 : VOP1_Real_gfx10_FULL_gfx11<0x063>;
+defm V_CVT_NORM_U16_F16 : VOP1_Real_gfx10_FULL_gfx11<0x064>;
+
+defm V_SWAP_B32 : VOP1Only_Real_gfx10_gfx11<0x065>;
+defm V_SWAPREL_B32 : VOP1Only_Real_gfx10_gfx11<0x068>;
//===----------------------------------------------------------------------===//
// GFX7, GFX10.
@@ -635,16 +785,19 @@ multiclass VOP1_Real_gfx7<bits<9> op> :
multiclass VOP1_Real_gfx7_gfx10<bits<9> op> :
VOP1_Real_gfx7<op>, VOP1_Real_gfx10<op>;
+multiclass VOP1_Real_gfx7_gfx10_NO_DPP_gfx11<bits<9> op> :
+ VOP1_Real_gfx7_gfx10<op>, VOP1_Real_NO_DPP_gfx11<op>;
+
defm V_LOG_LEGACY_F32 : VOP1_Real_gfx7<0x045>;
defm V_EXP_LEGACY_F32 : VOP1_Real_gfx7<0x046>;
-defm V_TRUNC_F64 : VOP1_Real_gfx7_gfx10<0x017>;
-defm V_CEIL_F64 : VOP1_Real_gfx7_gfx10<0x018>;
-defm V_RNDNE_F64 : VOP1_Real_gfx7_gfx10<0x019>;
-defm V_FLOOR_F64 : VOP1_Real_gfx7_gfx10<0x01a>;
+defm V_TRUNC_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11<0x017>;
+defm V_CEIL_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11<0x018>;
+defm V_RNDNE_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11<0x019>;
+defm V_FLOOR_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11<0x01a>;
//===----------------------------------------------------------------------===//
-// GFX6, GFX7, GFX10.
+// GFX6, GFX7, GFX10, GFX11.
//===----------------------------------------------------------------------===//
let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
@@ -666,65 +819,71 @@ multiclass VOP1_Real_gfx6_gfx7<bits<9> op> :
multiclass VOP1_Real_gfx6_gfx7_gfx10<bits<9> op> :
VOP1_Real_gfx6_gfx7<op>, VOP1_Real_gfx10<op>;
-defm V_LOG_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x026>;
-defm V_RCP_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x028>;
-defm V_RCP_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x029>;
-defm V_RSQ_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x02c>;
-defm V_RSQ_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x02d>;
-defm V_RCP_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x030>;
-defm V_RSQ_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x032>;
-
-defm V_NOP : VOP1_Real_gfx6_gfx7_gfx10<0x000>;
-defm V_MOV_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x001>;
-defm V_CVT_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x003>;
-defm V_CVT_F64_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x004>;
-defm V_CVT_F32_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x005>;
-defm V_CVT_F32_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x006>;
-defm V_CVT_U32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x007>;
-defm V_CVT_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x008>;
-defm V_CVT_F16_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00a>;
-defm V_CVT_F32_F16 : VOP1_Real_gfx6_gfx7_gfx10<0x00b>;
+multiclass VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<bits<9> op> :
+ VOP1_Real_gfx6_gfx7_gfx10<op>, VOP1_Real_FULL_gfx11<op>;
+
+multiclass VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<bits<9> op> :
+ VOP1_Real_gfx6_gfx7_gfx10<op>, VOP1_Real_NO_DPP_gfx11<op>;
+
+defm V_LOG_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x026>;
+defm V_RCP_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x028>;
+defm V_RCP_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x029>;
+defm V_RSQ_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x02c>;
+defm V_RSQ_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x02d>;
+defm V_RCP_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x030>;
+defm V_RSQ_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x032>;
+
+defm V_NOP : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x000>;
+defm V_MOV_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x001>;
+defm V_CVT_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x003>;
+defm V_CVT_F64_I32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x004>;
+defm V_CVT_F32_I32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x005>;
+defm V_CVT_F32_U32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x006>;
+defm V_CVT_U32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x007>;
+defm V_CVT_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x008>;
+defm V_CVT_F16_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x00a>;
+defm V_CVT_F32_F16 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x00b>;
defm V_CVT_RPI_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00c>;
defm V_CVT_FLR_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00d>;
-defm V_CVT_OFF_F32_I4 : VOP1_Real_gfx6_gfx7_gfx10<0x00e>;
-defm V_CVT_F32_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x00f>;
-defm V_CVT_F64_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x010>;
-defm V_CVT_F32_UBYTE0 : VOP1_Real_gfx6_gfx7_gfx10<0x011>;
-defm V_CVT_F32_UBYTE1 : VOP1_Real_gfx6_gfx7_gfx10<0x012>;
-defm V_CVT_F32_UBYTE2 : VOP1_Real_gfx6_gfx7_gfx10<0x013>;
-defm V_CVT_F32_UBYTE3 : VOP1_Real_gfx6_gfx7_gfx10<0x014>;
-defm V_CVT_U32_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x015>;
-defm V_CVT_F64_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x016>;
-defm V_FRACT_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x020>;
-defm V_TRUNC_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x021>;
-defm V_CEIL_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x022>;
-defm V_RNDNE_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x023>;
-defm V_FLOOR_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x024>;
-defm V_EXP_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x025>;
-defm V_LOG_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x027>;
-defm V_RCP_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x02a>;
-defm V_RCP_IFLAG_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x02b>;
-defm V_RSQ_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x02e>;
-defm V_RCP_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x02f>;
-defm V_RSQ_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x031>;
-defm V_SQRT_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x033>;
-defm V_SQRT_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x034>;
-defm V_SIN_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x035>;
-defm V_COS_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x036>;
-defm V_NOT_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x037>;
-defm V_BFREV_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x038>;
+defm V_CVT_OFF_F32_I4 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x00e>;
+defm V_CVT_F32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x00f>;
+defm V_CVT_F64_F32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x010>;
+defm V_CVT_F32_UBYTE0 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x011>;
+defm V_CVT_F32_UBYTE1 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x012>;
+defm V_CVT_F32_UBYTE2 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x013>;
+defm V_CVT_F32_UBYTE3 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x014>;
+defm V_CVT_U32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x015>;
+defm V_CVT_F64_U32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x016>;
+defm V_FRACT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x020>;
+defm V_TRUNC_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x021>;
+defm V_CEIL_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x022>;
+defm V_RNDNE_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x023>;
+defm V_FLOOR_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x024>;
+defm V_EXP_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x025>;
+defm V_LOG_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x027>;
+defm V_RCP_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x02a>;
+defm V_RCP_IFLAG_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x02b>;
+defm V_RSQ_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x02e>;
+defm V_RCP_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x02f>;
+defm V_RSQ_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x031>;
+defm V_SQRT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x033>;
+defm V_SQRT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x034>;
+defm V_SIN_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x035>;
+defm V_COS_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x036>;
+defm V_NOT_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x037>;
+defm V_BFREV_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x038>;
defm V_FFBH_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x039>;
defm V_FFBL_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x03a>;
defm V_FFBH_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x03b>;
-defm V_FREXP_EXP_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x03c>;
-defm V_FREXP_MANT_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x03d>;
-defm V_FRACT_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x03e>;
-defm V_FREXP_EXP_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x03f>;
-defm V_FREXP_MANT_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x040>;
+defm V_FREXP_EXP_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x03c>;
+defm V_FREXP_MANT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x03d>;
+defm V_FRACT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x03e>;
+defm V_FREXP_EXP_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x03f>;
+defm V_FREXP_MANT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x040>;
defm V_CLREXCP : VOP1_Real_gfx6_gfx7_gfx10<0x041>;
-defm V_MOVRELD_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x042>;
-defm V_MOVRELS_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x043>;
-defm V_MOVRELSD_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x044>;
+defm V_MOVRELD_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x042>;
+defm V_MOVRELS_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x043>;
+defm V_MOVRELSD_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x044>;
//===----------------------------------------------------------------------===//
// GFX8, GFX9 (VI).
@@ -949,14 +1108,29 @@ multiclass VOP1_Real_gfx9 <bits<10> op> {
defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>;
+let AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX9" in
+defm V_MOV_B64 : VOP1_Real_gfx9 <0x38>;
+
//===----------------------------------------------------------------------===//
// GFX10
//===----------------------------------------------------------------------===//
-let OtherPredicates = [isGFX10Plus] in {
+let OtherPredicates = [isGFX10Only] in {
def : GCNPat <
(i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)),
(V_MOV_B32_dpp8_gfx10 VGPR_32:$src, VGPR_32:$src,
(as_i32timm $dpp8), (i32 DPP8Mode.FI_0))
>;
-} // End OtherPredicates = [isGFX10Plus]
+} // End OtherPredicates = [isGFX10Only]
+
+//===----------------------------------------------------------------------===//
+// GFX11
+//===----------------------------------------------------------------------===//
+
+let OtherPredicates = [isGFX11Only] in {
+def : GCNPat <
+ (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)),
+ (V_MOV_B32_dpp8_gfx11 VGPR_32:$src, VGPR_32:$src,
+ (as_i32timm $dpp8), (i32 DPP8Mode.FI_0))
+>;
+} // End OtherPredicates = [isGFX11Only]
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index b9ff814a4dc5..1485a1e63129 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -80,9 +80,9 @@ class VOP2_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], string suf
let AsmVariantName = AMDGPUAsmVariants.Default;
}
-class VOP2_Real <VOP2_Pseudo ps, int EncodingFamily> :
+class VOP2_Real <VOP2_Pseudo ps, int EncodingFamily, string real_name = ps.Mnemonic> :
VOP_Real <ps>,
- InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
+ InstSI <ps.OutOperandList, ps.InOperandList, real_name # ps.AsmOperands, []>,
SIMCInstr <ps.PseudoInstr, EncodingFamily> {
let VALU = 1;
@@ -140,15 +140,26 @@ multiclass VOP2Inst_e32<string opName,
Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
} // End renamedInGFX9 = GFX9Renamed
}
-
+multiclass
+ VOP2Inst_e32_VOPD<string opName, VOPProfile P, bits<5> VOPDOp,
+ string VOPDName, SDPatternOperator node = null_frag,
+ string revOp = opName, bit GFX9Renamed = 0> {
+ defm NAME : VOP2Inst_e32<opName, P, node, revOp, GFX9Renamed>,
+ VOPD_Component<VOPDOp, VOPDName>;
+}
multiclass VOP2Inst_e64<string opName,
VOPProfile P,
SDPatternOperator node = null_frag,
string revOp = opName,
bit GFX9Renamed = 0> {
let renamedInGFX9 = GFX9Renamed in {
- def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
+ def _e64 : VOP3InstBase <opName, P, node, 1>,
Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
+
+ let SubtargetPredicate = isGFX11Plus in {
+ foreach _ = BoolToList<P.HasExtVOP3DPP>.ret in
+ def _e64_dpp : VOP3_DPP_Pseudo <opName, P>;
+ } // End SubtargetPredicate = isGFX11Plus
} // End renamedInGFX9 = GFX9Renamed
}
@@ -175,6 +186,22 @@ multiclass VOP2Inst<string opName,
}
}
+multiclass VOP2Inst_VOPD<string opName,
+ VOPProfile P,
+ bits<5> VOPDOp,
+ string VOPDName,
+ SDPatternOperator node = null_frag,
+ string revOp = opName,
+ bit GFX9Renamed = 0> :
+ VOP2Inst_e32_VOPD<opName, P, VOPDOp, VOPDName, node, revOp, GFX9Renamed>,
+ VOP2Inst_e64<opName, P, node, revOp, GFX9Renamed>,
+ VOP2Inst_sdwa<opName, P, GFX9Renamed> {
+ let renamedInGFX9 = GFX9Renamed in {
+ foreach _ = BoolToList<P.HasExtDPP>.ret in
+ def _dpp : VOP2_DPP_Pseudo <opName, P>;
+ }
+}
+
multiclass VOP2bInst <string opName,
VOPProfile P,
SDPatternOperator node = null_frag,
@@ -195,10 +222,15 @@ multiclass VOP2bInst <string opName,
}
foreach _ = BoolToList<P.HasExtDPP>.ret in
def _dpp : VOP2_DPP_Pseudo <opName, P>;
- }
+ } // End Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC]
- def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
+ def _e64 : VOP3InstBase <opName, P, node, 1>,
Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
+
+ let SubtargetPredicate = isGFX11Plus in {
+ foreach _ = BoolToList<P.HasExtVOP3DPP>.ret in
+ def _e64_dpp : VOP3_DPP_Pseudo <opName, P>;
+ } // End SubtargetPredicate = isGFX11Plus
}
}
}
@@ -220,16 +252,19 @@ multiclass VOP2bInstAliases<VOP2_Pseudo ps, VOP2_Real inst, string OpName> {
}
}
-multiclass VOP2eInst <string opName,
- VOPProfile P,
- SDPatternOperator node = null_frag,
- string revOp = opName,
- bit useSGPRInput = !eq(P.NumSrcArgs, 3)> {
+multiclass
+ VOP2eInst_Base<string opName, VOPProfile P, bits<5> VOPDOp, string VOPDName,
+ SDPatternOperator node, string revOp, bit useSGPRInput> {
let SchedRW = [Write32Bit] in {
let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]) in {
- def _e32 : VOP2_Pseudo <opName, P>,
- Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
+ if !eq(VOPDOp, -1) then
+ def _e32 : VOP2_Pseudo <opName, P>,
+ Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
+ else
+ def _e32 : VOP2_Pseudo <opName, P>,
+ Commutable_REV<revOp#"_e32", !eq(revOp, opName)>,
+ VOPD_Component<VOPDOp, VOPDName>;
foreach _ = BoolToList<P.HasExtSDWA>.ret in
def _sdwa : VOP2_SDWA_Pseudo <opName, P> {
@@ -240,13 +275,29 @@ multiclass VOP2eInst <string opName,
def _dpp : VOP2_DPP_Pseudo <opName, P>;
}
- def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
+ def _e64 : VOP3InstBase <opName, P, node, 1>,
Commutable_REV<revOp#"_e64", !eq(revOp, opName)> {
let isReMaterializable = 1;
}
+
+ let SubtargetPredicate = isGFX11Plus in {
+ foreach _ = BoolToList<P.HasExtVOP3DPP>.ret in
+ def _e64_dpp : VOP3_DPP_Pseudo <opName, P>;
+ } // End SubtargetPredicate = isGFX11Plus
}
}
+multiclass
+ VOP2eInst<string opName, VOPProfile P, SDPatternOperator node = null_frag,
+ string revOp = opName, bit useSGPRInput = !eq(P.NumSrcArgs, 3)>
+ : VOP2eInst_Base<opName, P, -1, "", node, revOp, useSGPRInput>;
+
+multiclass
+ VOP2eInst_VOPD<string opName, VOPProfile P, bits<5> VOPDOp, string VOPDName,
+ SDPatternOperator node = null_frag, string revOp = opName,
+ bit useSGPRInput = !eq(P.NumSrcArgs, 3)>
+ : VOP2eInst_Base<opName, P, VOPDOp, VOPDName, node, revOp, useSGPRInput>;
+
class VOP2eInstAlias <VOP2_Pseudo ps, Instruction inst, string opnd = ""> :
InstAlias <ps.OpName#" "#ps.Pfl.Asm32#", "#opnd,
(inst ps.Pfl.DstRC:$vdst, ps.Pfl.Src0RC32:$src0,
@@ -267,12 +318,24 @@ multiclass VOP2eInstAliases<VOP2_Pseudo ps, VOP2_Real inst> {
}
}
-class VOP_MADAK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
+class VOP_MADK_Base<ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
+ string AsmVOPDXDeferred = ?;
+}
+
+class VOP_MADAK <ValueType vt> : VOP_MADK_Base<vt> {
field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm);
field dag Ins32 = !if(!eq(vt.Size, 32),
(ins VSrc_f32_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm),
(ins VSrc_f16_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm));
+ field dag InsVOPDX = (ins VSrc_f32_Deferred:$src0X, VGPR_32:$vsrc1X, ImmOpType:$imm);
+ // Note that both src0X and imm are deferred
+ let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X, VGPR_32:$vsrc1X, ImmOpType:$immDeferred);
+ field dag InsVOPDY = (ins VSrc_f32_Deferred:$src0Y, VGPR_32:$vsrc1Y, ImmOpType:$imm);
+
field string Asm32 = "$vdst, $src0, $src1, $imm";
+ field string AsmVOPDX = "$vdstX, $src0X, $vsrc1X, $imm";
+ let AsmVOPDXDeferred = "$vdstX, $src0X, $vsrc1X, $immDeferred";
+ field string AsmVOPDY = "$vdstY, $src0Y, $vsrc1Y, $imm";
field bit HasExt = 0;
let IsSingle = 1;
}
@@ -280,10 +343,17 @@ class VOP_MADAK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
def VOP_MADAK_F16 : VOP_MADAK <f16>;
def VOP_MADAK_F32 : VOP_MADAK <f32>;
-class VOP_MADMK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
+class VOP_MADMK <ValueType vt> : VOP_MADK_Base<vt> {
field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm);
field dag Ins32 = (ins VSrc_f32_Deferred:$src0, ImmOpType:$imm, VGPR_32:$src1);
+ field dag InsVOPDX = (ins VSrc_f32_Deferred:$src0X, ImmOpType:$imm, VGPR_32:$vsrc1X);
+ let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X, ImmOpType:$immDeferred, VGPR_32:$vsrc1X);
+ field dag InsVOPDY = (ins VSrc_f32_Deferred:$src0Y, ImmOpType:$imm, VGPR_32:$vsrc1Y);
+
field string Asm32 = "$vdst, $src0, $imm, $src1";
+ field string AsmVOPDX = "$vdstX, $src0X, $imm, $vsrc1X";
+ let AsmVOPDXDeferred = "$vdstX, $src0X, $immDeferred, $vsrc1X";
+ field string AsmVOPDY = "$vdstY, $src0Y, $imm, $vsrc1Y";
field bit HasExt = 0;
let IsSingle = 1;
}
@@ -308,6 +378,10 @@ class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, v
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
let InsDPP16 = !con(InsDPP, (ins FI:$fi));
+ let InsVOP3Base = getIns64<Src0VOP3DPP, Src1RC64, RegisterOperand<VGPR_32>, 3,
+ 0, HasModifiers, HasModifiers, HasOMod,
+ Src0Mod, Src1Mod, Src2Mod>.ret;
+
let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
getVregSrcForVT<Src2VT>.ret:$src2, // stub argument
@@ -330,6 +404,7 @@ class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, v
let HasExt = 1;
let HasExtDPP = 1;
+ let HasExt32BitDPP = 1;
let HasExtSDWA = 1;
let HasExtSDWA9 = 0;
let TieRegDPP = "$src2";
@@ -337,9 +412,9 @@ class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, v
def VOP_MAC_F16 : VOP_MAC <f16>;
def VOP_MAC_F32 : VOP_MAC <f32>;
-let HasExtDPP = 0 in
+let HasExtDPP = 0, HasExt32BitDPP = 0 in
def VOP_MAC_LEGACY_F32 : VOP_MAC <f32>;
-let HasExtSDWA = 0, HasExt64BitDPP = 1 in
+let HasExtSDWA = 0, HasExt32BitDPP = 0, HasExt64BitDPP = 1 in
def VOP_MAC_F64 : VOP_MAC <f64>;
class VOP_DOT_ACC<ValueType vt0, ValueType vt1> : VOP_MAC<vt0, vt1> {
@@ -355,6 +430,7 @@ def VOP_DOT_ACC_F32_V2F16 : VOP_DOT_ACC<f32, v2f16> {
}
def VOP_DOT_ACC_I32_I32 : VOP_DOT_ACC<i32, i32> {
+ let HasExtVOP3DPP = 0;
let HasSrc0Mods = 1;
let HasSrc1Mods = 1;
}
@@ -368,13 +444,27 @@ def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped], 0, /*EnableClamp
let AsmDPP = "$vdst, vcc, $src0, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
let AsmDPP8 = "$vdst, vcc, $src0, $src1 $dpp8$fi";
let AsmDPP16 = AsmDPP#"$fi";
+ let AsmVOP3DPPBase = Asm64;
+ let InsDPP = (ins DstRCDPP:$old,
+ Src0DPP:$src0,
+ Src1DPP:$src1,
+ dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
+ bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
+ let InsDPP16 = !con(InsDPP, (ins FI:$fi));
+ let InsDPP8 = (ins DstRCDPP:$old,
+ Src0DPP:$src0,
+ Src1DPP:$src1,
+ dpp8:$dpp8, FI:$fi);
let Outs32 = (outs DstRC:$vdst);
let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst);
+ let OutsVOP3DPP = Outs64;
+ let OutsVOP3DPP8 = Outs64;
}
// Write out to vcc or arbitrary SGPR and read in from vcc or
// arbitrary SGPR.
def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], 0, /*EnableClamp=*/1> {
+ let HasSrc2Mods = 0;
let Asm32 = "$vdst, vcc, $src0, $src1, vcc";
let Asm64 = "$vdst, $sdst, $src0, $src1, $src2$clamp";
let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel";
@@ -384,6 +474,9 @@ def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], 0, /*EnableClamp=*
let AsmDPP16 = AsmDPP#"$fi";
let Outs32 = (outs DstRC:$vdst);
let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst);
+ let AsmVOP3DPPBase = Asm64;
+ let OutsVOP3DPP = Outs64;
+ let OutsVOP3DPP8 = Outs64;
// Suppress src2 implied by type since the 32-bit encoding uses an
// implicit VCC use.
@@ -401,15 +494,20 @@ def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], 0, /*EnableClamp=*
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
let InsDPP16 = !con(InsDPP, (ins FI:$fi));
+ let InsDPP8 = (ins DstRCDPP:$old,
+ Src0DPP:$src0,
+ Src1DPP:$src1,
+ dpp8:$dpp8, FI:$fi);
let HasExt = 1;
let HasExtDPP = 1;
+ let HasExt32BitDPP = 1;
let HasExtSDWA = 1;
let HasExtSDWA9 = 1;
}
// Read in from vcc or arbitrary SGPR.
-def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], /*EnableF32SrcMods=*/1> {
+class VOP2e_SGPR<list<ValueType> ArgVT> : VOPProfile<ArgVT, /*EnableF32SrcMods=*/1> {
let Asm32 = "$vdst, $src0, $src1";
let Asm64 = "$vdst, $src0_modifiers, $src1_modifiers, $src2";
let AsmSDWA = "$vdst, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel";
@@ -417,6 +515,7 @@ def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], /*EnableF32SrcMods=*/
let AsmDPP = "$vdst, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
let AsmDPP8 = "$vdst, $src0, $src1, vcc $dpp8$fi";
let AsmDPP16 = AsmDPP#"$fi";
+ let AsmVOP3DPPBase = Asm64;
let Outs32 = (outs DstRC:$vdst);
let Outs64 = (outs DstRC:$vdst);
@@ -437,14 +536,22 @@ def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], /*EnableF32SrcMods=*/
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
let InsDPP16 = !con(InsDPP, (ins FI:$fi));
+ let InsDPP8 = (ins DstRCDPP:$old,
+ Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
+ Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
+ dpp8:$dpp8, FI:$fi);
let HasExt = 1;
let HasExtDPP = 1;
+ let HasExt32BitDPP = 1;
let HasExtSDWA = 1;
let HasExtSDWA9 = 1;
}
-def VOP_READLANE : VOPProfile<[i32, i32, i32]> {
+def VOP2e_I32_I32_I32_I1 : VOP2e_SGPR<[i32, i32, i32, i1]>;
+def VOP2e_I16_I16_I16_I1 : VOP2e_SGPR<[i16, i16, i16, i1]>;
+
+def VOP_READLANE : VOPProfile<[i32, i32, i32, untyped]> {
let Outs32 = (outs SReg_32:$vdst);
let Outs64 = Outs32;
let Ins32 = (ins VRegOrLds_32:$src0, SCSrc_b32:$src1);
@@ -454,6 +561,7 @@ def VOP_READLANE : VOPProfile<[i32, i32, i32]> {
let HasExt = 0;
let HasExtDPP = 0;
+ let HasExt32BitDPP = 0;
let HasExt64BitDPP = 0;
let HasExtSDWA = 0;
let HasExtSDWA9 = 0;
@@ -471,6 +579,7 @@ def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> {
let HasExt = 0;
let HasExtDPP = 0;
+ let HasExt32BitDPP = 0;
let HasExt64BitDPP = 0;
let HasExtSDWA = 0;
let HasExtSDWA9 = 0;
@@ -480,31 +589,33 @@ def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> {
// VOP2 Instructions
//===----------------------------------------------------------------------===//
-defm V_CNDMASK_B32 : VOP2eInst <"v_cndmask_b32", VOP2e_I32_I32_I32_I1>;
+let SubtargetPredicate = isGFX11Plus in
+defm V_CNDMASK_B16 : VOP2eInst <"v_cndmask_b16", VOP2e_I16_I16_I16_I1>;
+defm V_CNDMASK_B32 : VOP2eInst_VOPD <"v_cndmask_b32", VOP2e_I32_I32_I32_I1, 0x9, "v_cndmask_b32">;
let SubtargetPredicate = HasMadMacF32Insts, isReMaterializable = 1 in
def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, []>;
let isCommutable = 1 in {
let isReMaterializable = 1 in {
-defm V_ADD_F32 : VOP2Inst <"v_add_f32", VOP_F32_F32_F32, any_fadd>;
-defm V_SUB_F32 : VOP2Inst <"v_sub_f32", VOP_F32_F32_F32, any_fsub>;
-defm V_SUBREV_F32 : VOP2Inst <"v_subrev_f32", VOP_F32_F32_F32, null_frag, "v_sub_f32">;
-defm V_MUL_LEGACY_F32 : VOP2Inst <"v_mul_legacy_f32", VOP_F32_F32_F32, AMDGPUfmul_legacy>;
-defm V_MUL_F32 : VOP2Inst <"v_mul_f32", VOP_F32_F32_F32, any_fmul>;
+defm V_ADD_F32 : VOP2Inst_VOPD <"v_add_f32", VOP_F32_F32_F32, 0x4, "v_add_f32", any_fadd>;
+defm V_SUB_F32 : VOP2Inst_VOPD <"v_sub_f32", VOP_F32_F32_F32, 0x5, "v_sub_f32", any_fsub>;
+defm V_SUBREV_F32 : VOP2Inst_VOPD <"v_subrev_f32", VOP_F32_F32_F32, 0x6, "v_subrev_f32", null_frag, "v_sub_f32">;
+defm V_MUL_LEGACY_F32 : VOP2Inst_VOPD <"v_mul_legacy_f32", VOP_F32_F32_F32, 0x7, "v_mul_dx9_zero_f32", AMDGPUfmul_legacy>;
+defm V_MUL_F32 : VOP2Inst_VOPD <"v_mul_f32", VOP_F32_F32_F32, 0x3, "v_mul_f32", any_fmul>;
defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32_ARITH, AMDGPUmul_i24>;
defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_I32_I32_I32, AMDGPUmulhi_i24>;
defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32_ARITH, AMDGPUmul_u24>;
defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_I32_I32_I32, AMDGPUmulhi_u24>;
-defm V_MIN_F32 : VOP2Inst <"v_min_f32", VOP_F32_F32_F32, fminnum_like>;
-defm V_MAX_F32 : VOP2Inst <"v_max_f32", VOP_F32_F32_F32, fmaxnum_like>;
+defm V_MIN_F32 : VOP2Inst_VOPD <"v_min_f32", VOP_F32_F32_F32, 0xb, "v_min_f32", fminnum_like>;
+defm V_MAX_F32 : VOP2Inst_VOPD <"v_max_f32", VOP_F32_F32_F32, 0xa, "v_max_f32", fmaxnum_like>;
defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smin>;
defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smax>;
defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umin>;
defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umax>;
defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, clshr_rev_32, "v_lshr_b32">;
defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, cashr_rev_32, "v_ashr_i32">;
-defm V_LSHLREV_B32 : VOP2Inst <"v_lshlrev_b32", VOP_I32_I32_I32, clshl_rev_32, "v_lshl_b32">;
-defm V_AND_B32 : VOP2Inst <"v_and_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, and>;
+defm V_LSHLREV_B32 : VOP2Inst_VOPD <"v_lshlrev_b32", VOP_I32_I32_I32, 0x11, "v_lshlrev_b32", clshl_rev_32, "v_lshl_b32">;
+defm V_AND_B32 : VOP2Inst_VOPD <"v_and_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, 0x12, "v_and_b32", and>;
defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, or>;
defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, xor>;
} // End isReMaterializable = 1
@@ -536,7 +647,7 @@ defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_f
let SubtargetPredicate = HasAddNoCarryInsts, isReMaterializable = 1 in {
-defm V_ADD_U32 : VOP2Inst <"v_add_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_add_u32", 1>;
+defm V_ADD_U32 : VOP2Inst_VOPD <"v_add_u32", VOP_I32_I32_I32_ARITH, 0x10, "v_add_nc_u32", null_frag, "v_add_u32", 1>;
defm V_SUB_U32 : VOP2Inst <"v_sub_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>;
defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>;
}
@@ -555,20 +666,20 @@ def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE,
} // End isConvergent = 1
let isReMaterializable = 1 in {
-defm V_BFM_B32 : VOP2Inst <"v_bfm_b32", VOP_NO_EXT<VOP_I32_I32_I32>>;
-defm V_BCNT_U32_B32 : VOP2Inst <"v_bcnt_u32_b32", VOP_NO_EXT<VOP_I32_I32_I32>, add_ctpop>;
-defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_NO_EXT<VOP_I32_I32_I32>, int_amdgcn_mbcnt_lo>;
-defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_NO_EXT<VOP_I32_I32_I32>, int_amdgcn_mbcnt_hi>;
-defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_NO_EXT<VOP_F32_F32_I32>, AMDGPUldexp>;
+defm V_BFM_B32 : VOP2Inst <"v_bfm_b32", VOP_I32_I32_I32>;
+defm V_BCNT_U32_B32 : VOP2Inst <"v_bcnt_u32_b32", VOP_I32_I32_I32, add_ctpop>;
+defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_lo>;
+defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_hi>;
+defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_F32_F32_I32, AMDGPUldexp>;
let ReadsModeReg = 0, mayRaiseFPException = 0 in {
-defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_NO_EXT<VOP_V2I16_F32_F32>, AMDGPUpknorm_i16_f32>;
-defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_NO_EXT<VOP_V2I16_F32_F32>, AMDGPUpknorm_u16_f32>;
+defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_i16_f32>;
+defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_u16_f32>;
}
-defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_NO_EXT<VOP_V2F16_F32_F32>, AMDGPUpkrtz_f16_f32>;
-defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_NO_EXT<VOP_V2I16_I32_I32>, AMDGPUpk_u16_u32>;
-defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_NO_EXT<VOP_V2I16_I32_I32>, AMDGPUpk_i16_i32>;
+defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_V2F16_F32_F32, AMDGPUpkrtz_f16_f32>;
+defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_V2I16_I32_I32, AMDGPUpk_u16_u32>;
+defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_V2I16_I32_I32, AMDGPUpk_i16_i32>;
let SubtargetPredicate = isGFX6GFX7 in {
@@ -641,8 +752,9 @@ def : divergent_i64_BinOp <and, V_AND_B32_e64>;
def : divergent_i64_BinOp <or, V_OR_B32_e64>;
def : divergent_i64_BinOp <xor, V_XOR_B32_e64>;
-let SubtargetPredicate = Has16BitInsts in {
+let SubtargetPredicate = Has16BitInsts in {
+let isReMaterializable = 1 in {
let FPDPRounding = 1 in {
def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK_F16, [], "">;
defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", VOP_F16_F16_I32, AMDGPUldexp>;
@@ -664,9 +776,7 @@ def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK_F16, [], "">;
}
} // End FPDPRounding = 1
-defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16_ARITH, add>;
-defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16_ARITH, sub>;
-defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16_ARITH, null_frag, "v_sub_u16">;
+
defm V_MUL_LO_U16 : VOP2Inst <"v_mul_lo_u16", VOP_I16_I16_I16, mul>;
defm V_MAX_F16 : VOP2Inst <"v_max_f16", VOP_F16_F16_F16, fmaxnum_like>;
defm V_MIN_F16 : VOP2Inst <"v_min_f16", VOP_F16_F16_F16, fminnum_like>;
@@ -675,12 +785,19 @@ defm V_MAX_I16 : VOP2Inst <"v_max_i16", VOP_I16_I16_I16, smax>;
defm V_MIN_U16 : VOP2Inst <"v_min_u16", VOP_I16_I16_I16, umin>;
defm V_MIN_I16 : VOP2Inst <"v_min_i16", VOP_I16_I16_I16, smin>;
-let Constraints = "$vdst = $src2", DisableEncoding="$src2",
- isConvertibleToThreeAddress = 1 in {
-defm V_MAC_F16 : VOP2Inst <"v_mac_f16", VOP_MAC_F16>;
+let SubtargetPredicate = isGFX8GFX9 in {
+ defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16_ARITH, add>;
+ defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16_ARITH, sub>;
+ defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16_ARITH, null_frag, "v_sub_u16">;
}
} // End isCommutable = 1
+} // End isReMaterializable = 1
+// FIXME: Missing FPDPRounding
+let Constraints = "$vdst = $src2", DisableEncoding="$src2",
+ isConvertibleToThreeAddress = 1, isCommutable = 1 in {
+defm V_MAC_F16 : VOP2Inst <"v_mac_f16", VOP_MAC_F16>;
+}
} // End SubtargetPredicate = Has16BitInsts
let SubtargetPredicate = HasDLInsts in {
@@ -722,7 +839,7 @@ let Constraints = "$vdst = $src2",
DisableEncoding = "$src2",
isConvertibleToThreeAddress = 1,
isCommutable = 1 in
-defm V_FMAC_F32 : VOP2Inst <"v_fmac_f32", VOP_MAC_F32>;
+defm V_FMAC_F32 : VOP2Inst_VOPD <"v_fmac_f32", VOP_MAC_F32, 0x0, "v_fmac_f32">;
} // End SubtargetPredicate = HasDLInsts
@@ -750,7 +867,7 @@ let Constraints = "$vdst = $src2",
isCommutable = 1,
IsDOT = 1 in {
let SubtargetPredicate = HasDot5Insts in
- defm V_DOT2C_F32_F16 : VOP2Inst<"v_dot2c_f32_f16", VOP_DOT_ACC_F32_V2F16>;
+ defm V_DOT2C_F32_F16 : VOP2Inst_VOPD<"v_dot2c_f32_f16", VOP_DOT_ACC_F32_V2F16, 0xc, "v_dot2acc_f32_f16">;
let SubtargetPredicate = HasDot6Insts in
defm V_DOT4C_I32_I8 : VOP2Inst<"v_dot4c_i32_i8", VOP_DOT_ACC_I32_I32>;
@@ -788,20 +905,20 @@ let AddedComplexity = 30 in {
} // End AddedComplexity = 30
let SubtargetPredicate = HasFmaakFmamkF32Insts, isReMaterializable = 1 in {
-def V_FMAMK_F32 : VOP2_Pseudo<"v_fmamk_f32", VOP_MADMK_F32, [], "">;
+def V_FMAMK_F32 : VOP2_Pseudo<"v_fmamk_f32", VOP_MADMK_F32, [], "">, VOPD_Component<0x2, "v_fmamk_f32">;
let isCommutable = 1 in
-def V_FMAAK_F32 : VOP2_Pseudo<"v_fmaak_f32", VOP_MADAK_F32, [], "">;
+def V_FMAAK_F32 : VOP2_Pseudo<"v_fmaak_f32", VOP_MADAK_F32, [], "">, VOPD_Component<0x1, "v_fmaak_f32">;
}
let SubtargetPredicate = isGFX10Plus in {
-let FPDPRounding = 1 in {
+let FPDPRounding = 1, isReMaterializable = 1 in {
def V_FMAMK_F16 : VOP2_Pseudo <"v_fmamk_f16", VOP_MADMK_F16, [], "">;
let isCommutable = 1 in
def V_FMAAK_F16 : VOP2_Pseudo <"v_fmaak_f16", VOP_MADAK_F16, [], "">;
-} // End FPDPRounding = 1
+} // End FPDPRounding = 1, isReMaterializable = 1
let Constraints = "$vdst = $src2",
DisableEncoding="$src2",
@@ -857,7 +974,7 @@ def : GCNPat <
>;
}
-let Predicates = [Has16BitInsts] in {
+let Predicates = [Has16BitInsts, isGFX8GFX9] in {
// Undo sub x, c -> add x, -c canonicalization since c is more likely
// an inline immediate than -c.
@@ -867,9 +984,6 @@ def : GCNPat<
(V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineIntConst16:$src1)
>;
-
-let Predicates = [Has16BitInsts, isGFX7GFX8GFX9] in {
-
def : GCNPat<
(i32 (zext (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)))),
(V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineIntConst16:$src1)
@@ -885,7 +999,10 @@ defm : Arithmetic_i16_0Hi_Pats<umax, V_MAX_U16_e64>;
defm : Arithmetic_i16_0Hi_Pats<clshl_rev_16, V_LSHLREV_B16_e64>;
defm : Arithmetic_i16_0Hi_Pats<clshr_rev_16, V_LSHRREV_B16_e64>;
defm : Arithmetic_i16_0Hi_Pats<cashr_rev_16, V_ASHRREV_I16_e64>;
-} // End Predicates = [Has16BitInsts, isGFX7GFX8GFX9]
+
+} // End Predicates = [Has16BitInsts, isGFX8GFX9]
+
+let Predicates = [Has16BitInsts] in {
def : ZExt_i16_i1_Pat<zext>;
def : ZExt_i16_i1_Pat<anyext>;
@@ -917,8 +1034,16 @@ def : VOPBinOpClampPat<uaddsat, V_ADD_U16_e64, i16>;
def : VOPBinOpClampPat<usubsat, V_SUB_U16_e64, i16>;
}
+let SubtargetPredicate = isGFX11Plus in {
+ let isCommutable = 1 in {
+ defm V_AND_B16 : VOP2Inst <"v_and_b16", VOP_I16_I16_I16, and>;
+ defm V_OR_B16 : VOP2Inst <"v_or_b16", VOP_I16_I16_I16, or>;
+ defm V_XOR_B16 : VOP2Inst <"v_xor_b16", VOP_I16_I16_I16, xor>;
+ } // End isCommutable = 1
+} // End SubtargetPredicate = isGFX11Plus
+
//===----------------------------------------------------------------------===//
-// Target-specific instruction encodings.
+// DPP Encodings
//===----------------------------------------------------------------------===//
class VOP2_DPP<bits<6> op, VOP2_DPP_Pseudo ps,
@@ -947,10 +1072,10 @@ class Base_VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps,
let OtherPredicates = ps.OtherPredicates;
}
-class VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps,
+class VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps, int subtarget,
string opName = ps.OpName, VOPProfile p = ps.Pfl> :
Base_VOP2_DPP16<op, ps, opName, p>,
- SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX10>;
+ SIMCInstr <ps.PseudoInstr, subtarget>;
class VOP2_DPP8<bits<6> op, VOP2_Pseudo ps,
VOPProfile p = ps.Pfl> :
@@ -973,10 +1098,253 @@ class VOP2_DPP8<bits<6> op, VOP2_Pseudo ps,
}
//===----------------------------------------------------------------------===//
+// GFX11.
+//===----------------------------------------------------------------------===//
+
+let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in {
+ //===------------------------------- VOP2 -------------------------------===//
+ multiclass VOP2Only_Real_MADK_gfx11<bits<6> op> {
+ def _gfx11 :
+ VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.GFX11>,
+ VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
+ }
+ multiclass VOP2_Real_e32_gfx11<bits<6> op> {
+ def _e32_gfx11 :
+ VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX11>,
+ VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>;
+ }
+ multiclass VOP2Only_Real_e32_gfx11<bits<6> op> {
+ let IsSingle = 1 in
+ defm NAME: VOP2_Real_e32_gfx11<op>;
+ }
+ multiclass VOP2_Real_e64_gfx11<bits<6> op> {
+ def _e64_gfx11 :
+ VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX11>,
+ VOP3e_gfx11<{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
+ }
+ multiclass VOP2_Real_dpp_gfx11<bits<6> op> {
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
+ def _dpp_gfx11 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX11> {
+ let DecoderNamespace = "DPPGFX11";
+ }
+ }
+ multiclass VOP2_Real_dpp8_gfx11<bits<6> op> {
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
+ def _dpp8_gfx11 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(NAME#"_e32")> {
+ let DecoderNamespace = "DPP8GFX11";
+ }
+ }
+
+ //===------------------------- VOP2 (with name) -------------------------===//
+ multiclass VOP2_Real_e32_with_name_gfx11<bits<6> op, string opName,
+ string asmName, bit single = 0> {
+ defvar ps = !cast<VOP2_Pseudo>(opName#"_e32");
+ def _e32_gfx11 :
+ VOP2_Real<ps, SIEncodingFamily.GFX11, asmName>,
+ VOP2e<op{5-0}, ps.Pfl>,
+ MnemonicAlias<ps.Mnemonic, asmName>, Requires<[isGFX11Plus]> {
+ let AsmString = asmName # ps.AsmOperands;
+ let IsSingle = single;
+ }
+ }
+ multiclass VOP2_Real_e64_with_name_gfx11<bits<6> op, string opName,
+ string asmName> {
+ defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
+ def _e64_gfx11 :
+ VOP3_Real<ps, SIEncodingFamily.GFX11>,
+ VOP3e_gfx11<{0, 1, 0, 0, op{5-0}}, ps.Pfl>,
+ MnemonicAlias<ps.Mnemonic, asmName>, Requires<[isGFX11Plus]> {
+ let AsmString = asmName # ps.AsmOperands;
+ }
+ }
+
+ multiclass VOP2_Real_dpp_with_name_gfx11<bits<6> op, string opName,
+ string asmName> {
+ defvar ps = !cast<VOP2_Pseudo>(opName#"_e32");
+ foreach _ = BoolToList<ps.Pfl.HasExtDPP>.ret in
+ def _dpp_gfx11 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"),
+ SIEncodingFamily.GFX11> {
+ let AsmString = asmName # ps.Pfl.AsmDPP16;
+ let DecoderNamespace = "DPPGFX11";
+ }
+ }
+ multiclass VOP2_Real_dpp8_with_name_gfx11<bits<6> op, string opName,
+ string asmName> {
+ defvar ps = !cast<VOP2_Pseudo>(opName#"_e32");
+ foreach _ = BoolToList<ps.Pfl.HasExtDPP>.ret in
+ def _dpp8_gfx11 : VOP2_DPP8<op, ps> {
+ let AsmString = asmName # ps.Pfl.AsmDPP8;
+ let DecoderNamespace = "DPP8GFX11";
+ }
+ }
+
+ //===------------------------------ VOP2be ------------------------------===//
+ multiclass VOP2be_Real_e32_gfx11<bits<6> op, string opName, string asmName> {
+ defvar ps = !cast<VOP2_Pseudo>(opName#"_e32");
+ def _e32_gfx11 :
+ VOP2_Real<ps, SIEncodingFamily.GFX11>,
+ VOP2e<op{5-0}, ps.Pfl> {
+ let AsmString = asmName # !subst(", vcc", "", ps.AsmOperands);
+ }
+ }
+ multiclass VOP2be_Real_dpp_gfx11<bits<6> op, string opName, string asmName> {
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in
+ def _dpp_gfx11 :
+ VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), SIEncodingFamily.GFX11, asmName> {
+ string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
+ let AsmString = asmName # !subst(", vcc", "", AsmDPP);
+ let DecoderNamespace = "DPPGFX11";
+ }
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in
+ def _dpp_w32_gfx11 :
+ Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> {
+ string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
+ let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP);
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave32;
+ }
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in
+ def _dpp_w64_gfx11 :
+ Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> {
+ string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
+ let AsmString = asmName # AsmDPP;
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave64;
+ }
+ }
+ multiclass VOP2be_Real_dpp8_gfx11<bits<6> op, string opName, string asmName> {
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in
+ def _dpp8_gfx11 :
+ VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
+ string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
+ let AsmString = asmName # !subst(", vcc", "", AsmDPP8);
+ let DecoderNamespace = "DPP8GFX11";
+ }
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in
+ def _dpp8_w32_gfx11 :
+ VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
+ string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
+ let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8);
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave32;
+ }
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in
+ def _dpp8_w64_gfx11 :
+ VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
+ string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
+ let AsmString = asmName # AsmDPP8;
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave64;
+ }
+ }
+
+} // End AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11"
+
+// We don't want to override separate decoderNamespaces within these
+multiclass VOP2_Realtriple_e64_gfx11<bits<6> op> {
+ defm NAME : VOP3_Realtriple_gfx11<{0, 1, 0, 0, op{5-0}}, /*isSingle=*/ 0, NAME> ;
+}
+multiclass VOP2_Realtriple_e64_with_name_gfx11<bits<6> op, string opName,
+ string asmName> {
+ defm NAME : VOP3_Realtriple_with_name_gfx11<{0, 1, 0, 0, op{5-0}}, opName, asmName> ;
+}
+
+multiclass VOP2be_Real_gfx11<bits<6> op, string opName, string asmName> :
+ VOP2be_Real_e32_gfx11<op, opName, asmName>,
+ VOP3be_Realtriple_gfx11<{0, 1, 0, 0, op{5-0}}, /*isSingle=*/ 0, opName, asmName>,
+ VOP2be_Real_dpp_gfx11<op, opName, asmName>,
+ VOP2be_Real_dpp8_gfx11<op, opName, asmName>;
+
+// Only for CNDMASK
+multiclass VOP2e_Real_gfx11<bits<6> op, string opName, string asmName> :
+ VOP2_Real_e32_gfx11<op>,
+ VOP2_Realtriple_e64_gfx11<op>,
+ VOP2be_Real_dpp_gfx11<op, opName, asmName>,
+ VOP2be_Real_dpp8_gfx11<op, opName, asmName>;
+
+multiclass VOP2Only_Real_gfx11<bits<6> op> :
+ VOP2Only_Real_e32_gfx11<op>,
+ VOP2_Real_dpp_gfx11<op>,
+ VOP2_Real_dpp8_gfx11<op>;
+
+multiclass VOP2_Real_NO_VOP3_gfx11<bits<6> op> :
+ VOP2_Real_e32_gfx11<op>, VOP2_Real_dpp_gfx11<op>, VOP2_Real_dpp8_gfx11<op>;
+
+multiclass VOP2_Real_FULL_gfx11<bits<6> op> :
+ VOP2_Realtriple_e64_gfx11<op>, VOP2_Real_NO_VOP3_gfx11<op>;
+
+multiclass VOP2_Real_NO_VOP3_with_name_gfx11<bits<6> op, string opName,
+ string asmName, bit isSingle = 0> :
+ VOP2_Real_e32_with_name_gfx11<op, opName, asmName, isSingle>,
+ VOP2_Real_dpp_with_name_gfx11<op, opName, asmName>,
+ VOP2_Real_dpp8_with_name_gfx11<op, opName, asmName>;
+
+multiclass VOP2_Real_FULL_with_name_gfx11<bits<6> op, string opName,
+ string asmName> :
+ VOP2_Realtriple_e64_with_name_gfx11<op, opName, asmName>,
+ VOP2_Real_NO_VOP3_with_name_gfx11<op, opName, asmName>;
+
+multiclass VOP2_Real_NO_DPP_gfx11<bits<6> op> :
+ VOP2_Real_e32_gfx11<op>, VOP2_Real_e64_gfx11<op>;
+
+multiclass VOP2_Real_NO_DPP_with_name_gfx11<bits<6> op, string opName,
+ string asmName> :
+ VOP2_Real_e32_with_name_gfx11<op, opName, asmName>,
+ VOP2_Real_e64_with_name_gfx11<op, opName, asmName>;
+
+defm V_CNDMASK_B32 : VOP2e_Real_gfx11<0x001, "V_CNDMASK_B32",
+ "v_cndmask_b32">;
+defm V_DOT2ACC_F32_F16 : VOP2_Real_NO_VOP3_with_name_gfx11<0x002,
+ "V_DOT2C_F32_F16", "v_dot2acc_f32_f16", 1>;
+defm V_FMAC_DX9_ZERO_F32 : VOP2_Real_NO_DPP_with_name_gfx11<0x006,
+ "V_FMAC_LEGACY_F32", "v_fmac_dx9_zero_f32">;
+defm V_MUL_DX9_ZERO_F32 : VOP2_Real_FULL_with_name_gfx11<0x007,
+ "V_MUL_LEGACY_F32", "v_mul_dx9_zero_f32">;
+defm V_LSHLREV_B32 : VOP2_Real_FULL_gfx11<0x018>;
+defm V_LSHRREV_B32 : VOP2_Real_FULL_gfx11<0x019>;
+defm V_ASHRREV_I32 : VOP2_Real_FULL_gfx11<0x01a>;
+defm V_ADD_CO_CI_U32 :
+ VOP2be_Real_gfx11<0x020, "V_ADDC_U32", "v_add_co_ci_u32">;
+defm V_SUB_CO_CI_U32 :
+ VOP2be_Real_gfx11<0x021, "V_SUBB_U32", "v_sub_co_ci_u32">;
+defm V_SUBREV_CO_CI_U32 :
+ VOP2be_Real_gfx11<0x022, "V_SUBBREV_U32", "v_subrev_co_ci_u32">;
+
+defm V_CVT_PK_RTZ_F16_F32 : VOP2_Real_FULL_with_name_gfx11<0x02f,
+ "V_CVT_PKRTZ_F16_F32", "v_cvt_pk_rtz_f16_f32">;
+defm V_PK_FMAC_F16 : VOP2Only_Real_gfx11<0x03c>;
+
+// VOP3 only.
+defm V_CNDMASK_B16 : VOP3Only_Realtriple_gfx11<0x25d>;
+defm V_LDEXP_F32 : VOP3Only_Realtriple_gfx11<0x31c>;
+defm V_BFM_B32 : VOP3Only_Realtriple_gfx11<0x31d>;
+defm V_BCNT_U32_B32 : VOP3Only_Realtriple_gfx11<0x31e>;
+defm V_MBCNT_LO_U32_B32 : VOP3Only_Realtriple_gfx11<0x31f>;
+defm V_MBCNT_HI_U32_B32 : VOP3Only_Realtriple_gfx11<0x320>;
+defm V_CVT_PKNORM_I16_F32 : VOP3Only_Realtriple_gfx11<0x321>;
+defm V_CVT_PKNORM_U16_F32 : VOP3Only_Realtriple_gfx11<0x322>;
+defm V_CVT_PK_U16_U32 : VOP3Only_Realtriple_gfx11<0x323>;
+defm V_CVT_PK_I16_I32 : VOP3Only_Realtriple_gfx11<0x324>;
+defm V_ADD_CO_U32 : VOP3beOnly_Realtriple_gfx11<0x300>;
+defm V_SUB_CO_U32 : VOP3beOnly_Realtriple_gfx11<0x301>;
+defm V_SUBREV_CO_U32 : VOP3beOnly_Realtriple_gfx11<0x302>;
+
+let SubtargetPredicate = isGFX11Plus in {
+ defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx11>;
+
+ defm : VOP2bInstAliases<
+ V_ADDC_U32_e32, V_ADD_CO_CI_U32_e32_gfx11, "v_add_co_ci_u32">;
+ defm : VOP2bInstAliases<
+ V_SUBB_U32_e32, V_SUB_CO_CI_U32_e32_gfx11, "v_sub_co_ci_u32">;
+ defm : VOP2bInstAliases<
+ V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx11, "v_subrev_co_ci_u32">;
+} // End SubtargetPredicate = isGFX11Plus
+
+//===----------------------------------------------------------------------===//
// GFX10.
//===----------------------------------------------------------------------===//
-let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
+let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
//===------------------------------- VOP2 -------------------------------===//
multiclass VOP2Only_Real_MADK_gfx10<bits<6> op> {
def _gfx10 :
@@ -1011,13 +1379,13 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
}
}
multiclass VOP2_Real_dpp_gfx10<bits<6> op> {
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
- def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")> {
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP>.ret in
+ def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX10> {
let DecoderNamespace = "SDWA10";
}
}
multiclass VOP2_Real_dpp8_gfx10<bits<6> op> {
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP>.ret in
def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(NAME#"_e32")> {
let DecoderNamespace = "DPP8";
}
@@ -1056,15 +1424,15 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
}
multiclass VOP2_Real_dpp_gfx10_with_name<bits<6> op, string opName,
string asmName> {
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in
- def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp")> {
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP>.ret in
+ def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), SIEncodingFamily.GFX10> {
VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32");
let AsmString = asmName # ps.Pfl.AsmDPP16;
}
}
multiclass VOP2_Real_dpp8_gfx10_with_name<bits<6> op, string opName,
string asmName> {
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP>.ret in
def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32");
let AsmString = asmName # ps.Pfl.AsmDPP8;
@@ -1122,14 +1490,14 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
}
}
multiclass VOP2be_Real_dpp_gfx10<bits<6> op, string opName, string asmName> {
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP>.ret in
def _dpp_gfx10 :
- VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> {
+ VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), SIEncodingFamily.GFX10, asmName> {
string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
let AsmString = asmName # !subst(", vcc", "", AsmDPP);
let DecoderNamespace = "SDWA10";
}
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP>.ret in
def _dpp_w32_gfx10 :
Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> {
string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
@@ -1137,7 +1505,7 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
let isAsmParserOnly = 1;
let WaveSizePredicate = isWave32;
}
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP>.ret in
def _dpp_w64_gfx10 :
Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> {
string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
@@ -1147,14 +1515,14 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
}
}
multiclass VOP2be_Real_dpp8_gfx10<bits<6> op, string opName, string asmName> {
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP>.ret in
def _dpp8_gfx10 :
VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
let AsmString = asmName # !subst(", vcc", "", AsmDPP8);
let DecoderNamespace = "DPP8";
}
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP>.ret in
def _dpp8_w32_gfx10 :
VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
@@ -1162,7 +1530,7 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
let isAsmParserOnly = 1;
let WaveSizePredicate = isWave32;
}
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in
+ foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP>.ret in
def _dpp8_w64_gfx10 :
VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
@@ -1189,7 +1557,10 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
let IsSingle = 1;
}
}
-} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
+} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10"
+
+multiclass VOP2Only_Real_MADK_gfx10_gfx11<bits<6> op> :
+ VOP2Only_Real_MADK_gfx10<op>, VOP2Only_Real_MADK_gfx11<op>;
multiclass VOP2be_Real_gfx10<bits<6> op, string opName, string asmName> :
VOP2be_Real_e32_gfx10<op, opName, asmName>,
@@ -1209,7 +1580,10 @@ multiclass VOP2_Real_gfx10<bits<6> op> :
VOP2_Real_e32_gfx10<op>, VOP2_Real_e64_gfx10<op>,
VOP2_Real_sdwa_gfx10<op>, VOP2_Real_dpp_gfx10<op>, VOP2_Real_dpp8_gfx10<op>;
-multiclass VOP2_Real_gfx10_with_name<bits<6> op, string opName,
+multiclass VOP2_Real_gfx10_gfx11<bits<6> op> :
+ VOP2_Real_gfx10<op>, VOP2_Real_FULL_gfx11<op>;
+
+multiclass VOP2_Real_with_name_gfx10<bits<6> op, string opName,
string asmName> :
VOP2_Real_e32_gfx10_with_name<op, opName, asmName>,
VOP2_Real_e64_gfx10_with_name<op, opName, asmName>,
@@ -1217,36 +1591,41 @@ multiclass VOP2_Real_gfx10_with_name<bits<6> op, string opName,
VOP2_Real_dpp_gfx10_with_name<op, opName, asmName>,
VOP2_Real_dpp8_gfx10_with_name<op, opName, asmName>;
+multiclass VOP2_Real_with_name_gfx10_gfx11<bits<6> op, string opName,
+ string asmName> :
+ VOP2_Real_with_name_gfx10<op, opName, asmName>,
+ VOP2_Real_FULL_with_name_gfx11<op, opName, asmName>;
+
// NB: Same opcode as v_mac_legacy_f32
let DecoderNamespace = "GFX10_B" in
defm V_FMAC_LEGACY_F32 : VOP2_Real_gfx10<0x006>;
-defm V_XNOR_B32 : VOP2_Real_gfx10<0x01e>;
-defm V_FMAC_F32 : VOP2_Real_gfx10<0x02b>;
-defm V_FMAMK_F32 : VOP2Only_Real_MADK_gfx10<0x02c>;
-defm V_FMAAK_F32 : VOP2Only_Real_MADK_gfx10<0x02d>;
-defm V_ADD_F16 : VOP2_Real_gfx10<0x032>;
-defm V_SUB_F16 : VOP2_Real_gfx10<0x033>;
-defm V_SUBREV_F16 : VOP2_Real_gfx10<0x034>;
-defm V_MUL_F16 : VOP2_Real_gfx10<0x035>;
-defm V_FMAC_F16 : VOP2_Real_gfx10<0x036>;
-defm V_FMAMK_F16 : VOP2Only_Real_MADK_gfx10<0x037>;
-defm V_FMAAK_F16 : VOP2Only_Real_MADK_gfx10<0x038>;
-defm V_MAX_F16 : VOP2_Real_gfx10<0x039>;
-defm V_MIN_F16 : VOP2_Real_gfx10<0x03a>;
-defm V_LDEXP_F16 : VOP2_Real_gfx10<0x03b>;
+defm V_XNOR_B32 : VOP2_Real_gfx10_gfx11<0x01e>;
+defm V_FMAC_F32 : VOP2_Real_gfx10_gfx11<0x02b>;
+defm V_FMAMK_F32 : VOP2Only_Real_MADK_gfx10_gfx11<0x02c>;
+defm V_FMAAK_F32 : VOP2Only_Real_MADK_gfx10_gfx11<0x02d>;
+defm V_ADD_F16 : VOP2_Real_gfx10_gfx11<0x032>;
+defm V_SUB_F16 : VOP2_Real_gfx10_gfx11<0x033>;
+defm V_SUBREV_F16 : VOP2_Real_gfx10_gfx11<0x034>;
+defm V_MUL_F16 : VOP2_Real_gfx10_gfx11<0x035>;
+defm V_FMAC_F16 : VOP2_Real_gfx10_gfx11<0x036>;
+defm V_FMAMK_F16 : VOP2Only_Real_MADK_gfx10_gfx11<0x037>;
+defm V_FMAAK_F16 : VOP2Only_Real_MADK_gfx10_gfx11<0x038>;
+defm V_MAX_F16 : VOP2_Real_gfx10_gfx11<0x039>;
+defm V_MIN_F16 : VOP2_Real_gfx10_gfx11<0x03a>;
+defm V_LDEXP_F16 : VOP2_Real_gfx10_gfx11<0x03b>;
let IsSingle = 1 in {
-defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx10<0x03c>;
+ defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx10<0x03c>;
}
// VOP2 no carry-in, carry-out.
defm V_ADD_NC_U32 :
- VOP2_Real_gfx10_with_name<0x025, "V_ADD_U32", "v_add_nc_u32">;
+ VOP2_Real_with_name_gfx10_gfx11<0x025, "V_ADD_U32", "v_add_nc_u32">;
defm V_SUB_NC_U32 :
- VOP2_Real_gfx10_with_name<0x026, "V_SUB_U32", "v_sub_nc_u32">;
+ VOP2_Real_with_name_gfx10_gfx11<0x026, "V_SUB_U32", "v_sub_nc_u32">;
defm V_SUBREV_NC_U32 :
- VOP2_Real_gfx10_with_name<0x027, "V_SUBREV_U32", "v_subrev_nc_u32">;
+ VOP2_Real_with_name_gfx10_gfx11<0x027, "V_SUBREV_U32", "v_subrev_nc_u32">;
// VOP2 carry-in, carry-out.
defm V_ADD_CO_CI_U32 :
@@ -1275,7 +1654,7 @@ defm V_ADD_CO_U32 : VOP3beOnly_Real_gfx10<0x30f>;
defm V_SUB_CO_U32 : VOP3beOnly_Real_gfx10<0x310>;
defm V_SUBREV_CO_U32 : VOP3beOnly_Real_gfx10<0x319>;
-let SubtargetPredicate = isGFX10Plus in {
+let SubtargetPredicate = isGFX10Only in {
defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx10>;
defm : VOP2bInstAliases<
@@ -1284,10 +1663,10 @@ let SubtargetPredicate = isGFX10Plus in {
V_SUBB_U32_e32, V_SUB_CO_CI_U32_e32_gfx10, "v_sub_co_ci_u32">;
defm : VOP2bInstAliases<
V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx10, "v_subrev_co_ci_u32">;
-} // End SubtargetPredicate = isGFX10Plus
+} // End SubtargetPredicate = isGFX10Only
//===----------------------------------------------------------------------===//
-// GFX6, GFX7, GFX10.
+// GFX6, GFX7, GFX10, GFX11
//===----------------------------------------------------------------------===//
class VOP2_DPPe <bits<6> op, VOP2_DPP_Pseudo ps, VOPProfile P = ps.Pfl> :
@@ -1338,6 +1717,9 @@ multiclass VOP2_Real_gfx6_gfx7<bits<6> op> :
multiclass VOP2_Real_gfx6_gfx7_gfx10<bits<6> op> :
VOP2_Real_gfx6_gfx7<op>, VOP2_Real_gfx10<op>;
+multiclass VOP2_Real_gfx6_gfx7_gfx10_gfx11<bits<6> op> :
+ VOP2_Real_gfx6_gfx7_gfx10<op>, VOP2_Real_FULL_gfx11<op>;
+
multiclass VOP2be_Real_gfx6_gfx7<bits<6> op> :
VOP2_Real_e32_gfx6_gfx7<op>, VOP2be_Real_e64_gfx6_gfx7<op>;
@@ -1398,28 +1780,28 @@ let SubtargetPredicate = isGFX6GFX7 in {
def : VOP2e64InstAlias<V_SUBREV_CO_U32_e64, V_SUBREV_I32_e64_gfx6_gfx7>;
} // End SubtargetPredicate = isGFX6GFX7
-defm V_ADD_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x003>;
-defm V_SUB_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x004>;
-defm V_SUBREV_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x005>;
+defm V_ADD_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x003>;
+defm V_SUB_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x004>;
+defm V_SUBREV_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x005>;
defm V_MAC_LEGACY_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x006>;
defm V_MUL_LEGACY_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x007>;
-defm V_MUL_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x008>;
-defm V_MUL_I32_I24 : VOP2_Real_gfx6_gfx7_gfx10<0x009>;
-defm V_MUL_HI_I32_I24 : VOP2_Real_gfx6_gfx7_gfx10<0x00a>;
-defm V_MUL_U32_U24 : VOP2_Real_gfx6_gfx7_gfx10<0x00b>;
-defm V_MUL_HI_U32_U24 : VOP2_Real_gfx6_gfx7_gfx10<0x00c>;
-defm V_MIN_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x00f>;
-defm V_MAX_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x010>;
-defm V_MIN_I32 : VOP2_Real_gfx6_gfx7_gfx10<0x011>;
-defm V_MAX_I32 : VOP2_Real_gfx6_gfx7_gfx10<0x012>;
-defm V_MIN_U32 : VOP2_Real_gfx6_gfx7_gfx10<0x013>;
-defm V_MAX_U32 : VOP2_Real_gfx6_gfx7_gfx10<0x014>;
+defm V_MUL_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x008>;
+defm V_MUL_I32_I24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x009>;
+defm V_MUL_HI_I32_I24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x00a>;
+defm V_MUL_U32_U24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x00b>;
+defm V_MUL_HI_U32_U24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x00c>;
+defm V_MIN_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x00f>;
+defm V_MAX_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x010>;
+defm V_MIN_I32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x011>;
+defm V_MAX_I32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x012>;
+defm V_MIN_U32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x013>;
+defm V_MAX_U32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x014>;
defm V_LSHRREV_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x016>;
defm V_ASHRREV_I32 : VOP2_Real_gfx6_gfx7_gfx10<0x018>;
defm V_LSHLREV_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x01a>;
-defm V_AND_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x01b>;
-defm V_OR_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x01c>;
-defm V_XOR_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x01d>;
+defm V_AND_B32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x01b>;
+defm V_OR_B32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x01c>;
+defm V_XOR_B32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x01d>;
defm V_MAC_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x01f>;
defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x02f>;
defm V_MADMK_F32 : VOP2Only_Real_MADK_gfx6_gfx7_gfx10<0x020>;
@@ -1436,6 +1818,13 @@ multiclass VOP2_Real_MADK_vi <bits<6> op> {
VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
}
+multiclass VOP2_Real_MADK_gfx940 <bits<6> op> {
+ def _gfx940 : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.GFX940>,
+ VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl> {
+ let DecoderNamespace = "GFX9";
+ }
+}
+
multiclass VOP2_Real_e32_vi <bits<6> op> {
def _e32_vi :
VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>,
@@ -1736,6 +2125,11 @@ let SubtargetPredicate = isGFX90APlus in {
}
} // End SubtargetPredicate = isGFX90APlus
+let SubtargetPredicate = HasFmaakFmamkF32Insts in {
+defm V_FMAMK_F32 : VOP2_Real_MADK_gfx940 <0x17>;
+defm V_FMAAK_F32 : VOP2_Real_MADK_gfx940 <0x18>;
+}
+
multiclass VOP2_Real_DOT_ACC_gfx9<bits<6> op> : VOP2_Real_e32_vi<op> {
def _dpp_vi : VOP2_DPP<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>;
}
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 494e3aeb6d55..dddd0aacc140 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -6,191 +6,25 @@
//
//===----------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// VOP3 Classes
-//===----------------------------------------------------------------------===//
-
-class getVOP3ModPat<VOPProfile P, SDPatternOperator node> {
- dag src0 = !if(P.HasOMod,
- (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod),
- (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp));
-
- list<dag> ret3 = [(set P.DstVT:$vdst,
- (DivergentFragOrOp<node, P>.ret (P.Src0VT src0),
- (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
- (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))))];
-
- list<dag> ret2 = [(set P.DstVT:$vdst,
- (DivergentFragOrOp<node, P>.ret (P.Src0VT src0),
- (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))];
-
- list<dag> ret1 = [(set P.DstVT:$vdst,
- (DivergentFragOrOp<node, P>.ret (P.Src0VT src0)))];
-
- list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
- !if(!eq(P.NumSrcArgs, 2), ret2,
- ret1));
-}
-
-class getVOP3PModPat<VOPProfile P, SDPatternOperator node, bit HasExplicitClamp> {
- dag src0_dag = (P.Src0VT (VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers));
- dag src1_dag = (P.Src1VT (VOP3PMods P.Src1VT:$src1, i32:$src1_modifiers));
- dag src2_dag = (P.Src2VT (VOP3PMods P.Src2VT:$src2, i32:$src2_modifiers));
- dag clamp_dag = (i1 timm:$clamp);
-
- list<dag> ret3 = [(set P.DstVT:$vdst,
- !if(HasExplicitClamp,
- (DivergentFragOrOp<node, P>.ret src0_dag, src1_dag, src2_dag, clamp_dag),
- (DivergentFragOrOp<node, P>.ret src0_dag, src1_dag, src2_dag)))];
-
- list<dag> ret2 = [(set P.DstVT:$vdst,
- !if(HasExplicitClamp,
- (DivergentFragOrOp<node, P>.ret src0_dag, src1_dag, clamp_dag),
- (DivergentFragOrOp<node, P>.ret src0_dag, src1_dag)))];
-
- list<dag> ret1 = [(set P.DstVT:$vdst,
- !if(HasExplicitClamp,
- (DivergentFragOrOp<node, P>.ret src0_dag, clamp_dag),
- (DivergentFragOrOp<node, P>.ret src0_dag)))];
-
- list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
- !if(!eq(P.NumSrcArgs, 2), ret2,
- ret1));
-}
-
-class getVOP3OpSelPat<VOPProfile P, SDPatternOperator node> {
- list<dag> ret3 = [(set P.DstVT:$vdst,
- (DivergentFragOrOp<node, P>.ret (P.Src0VT (VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers)),
- (P.Src1VT (VOP3OpSel P.Src1VT:$src1, i32:$src1_modifiers)),
- (P.Src2VT (VOP3OpSel P.Src2VT:$src2, i32:$src2_modifiers))))];
-
- list<dag> ret2 = [(set P.DstVT:$vdst,
- (DivergentFragOrOp<node, P>.ret (P.Src0VT (VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers)),
- (P.Src1VT (VOP3OpSel P.Src1VT:$src1, i32:$src1_modifiers))))];
-
- list<dag> ret1 = [(set P.DstVT:$vdst,
- (DivergentFragOrOp<node, P>.ret (P.Src0VT (VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers))))];
-
- list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
- !if(!eq(P.NumSrcArgs, 2), ret2,
- ret1));
-}
-
-class getVOP3OpSelModPat<VOPProfile P, SDPatternOperator node> {
- list<dag> ret3 = [(set P.DstVT:$vdst,
- (DivergentFragOrOp<node, P>.ret (P.Src0VT !if(P.HasClamp, (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers),
- (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))),
- (P.Src1VT (VOP3OpSelMods P.Src1VT:$src1, i32:$src1_modifiers)),
- (P.Src2VT (VOP3OpSelMods P.Src2VT:$src2, i32:$src2_modifiers))))];
-
- list<dag> ret2 = [(set P.DstVT:$vdst,
- (DivergentFragOrOp<node, P>.ret !if(P.HasClamp, (P.Src0VT (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers)),
- (P.Src0VT (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))),
- (P.Src1VT (VOP3OpSelMods P.Src1VT:$src1, i32:$src1_modifiers))))];
-
- list<dag> ret1 = [(set P.DstVT:$vdst,
- (DivergentFragOrOp<node, P>.ret (P.Src0VT (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))))];
-
- list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
- !if(!eq(P.NumSrcArgs, 2), ret2,
- ret1));
-}
-
-class getVOP3Pat<VOPProfile P, SDPatternOperator node> {
- list<dag> ret3 = [(set P.DstVT:$vdst, (DivergentFragOrOp<node, P>.ret P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2))];
- list<dag> ret2 = [(set P.DstVT:$vdst, (DivergentFragOrOp<node, P>.ret P.Src0VT:$src0, P.Src1VT:$src1))];
- list<dag> ret1 = [(set P.DstVT:$vdst, (DivergentFragOrOp<node, P>.ret P.Src0VT:$src0))];
- list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
- !if(!eq(P.NumSrcArgs, 2), ret2,
- ret1));
-}
-
-class getVOP3ClampPat<VOPProfile P, SDPatternOperator node> {
- list<dag> ret3 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2, i1:$clamp))];
- list<dag> ret2 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, i1:$clamp))];
- list<dag> ret1 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, i1:$clamp))];
- list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
- !if(!eq(P.NumSrcArgs, 2), ret2,
- ret1));
-}
-
-class getVOP3MAIPat<VOPProfile P, SDPatternOperator node> {
- list<dag> ret = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2,
- timm:$cbsz, timm:$abid, timm:$blgp))];
-}
-
-// Consistently gives instructions a _e64 suffix.
-multiclass VOP3Inst_Pseudo_Wrapper<string opName, VOPProfile P, list<dag> pattern = []> {
- def _e64 : VOP3_Pseudo<opName, P, pattern>;
-}
-
-class VOP3InstBase<string OpName, VOPProfile P, SDPatternOperator node = null_frag> :
- VOP3_Pseudo<OpName, P,
- !if(P.HasOpSel,
- !if(P.HasModifiers,
- getVOP3OpSelModPat<P, node>.ret,
- getVOP3OpSelPat<P, node>.ret),
- !if(P.HasModifiers,
- getVOP3ModPat<P, node>.ret,
- !if(P.HasIntClamp,
- getVOP3ClampPat<P, node>.ret,
- !if (P.IsMAI,
- getVOP3MAIPat<P, node>.ret,
- getVOP3Pat<P, node>.ret)))),
- 0, P.HasOpSel> {
-
- let IntClamp = P.HasIntClamp;
- let AsmMatchConverter =
- !if(P.HasOpSel,
- "cvtVOP3OpSel",
- !if(!or(P.HasModifiers, P.HasOMod, P.HasIntClamp),
- "cvtVOP3",
- ""));
-}
-
-multiclass VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_frag> {
- def _e64 : VOP3InstBase<OpName, P, node>;
-}
-
// Special case for v_div_fmas_{f32|f64}, since it seems to be the
// only VOP instruction that implicitly reads VCC.
let Asm64 = " $vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$omod" in {
def VOP_F32_F32_F32_F32_VCC : VOPProfile<[f32, f32, f32, f32]> {
let Outs64 = (outs DstRC.RegClass:$vdst);
+ let HasExtVOP3DPP = 0;
+ let HasExtDPP = 0;
}
def VOP_F64_F64_F64_F64_VCC : VOPProfile<[f64, f64, f64, f64]> {
let Outs64 = (outs DstRC.RegClass:$vdst);
}
}
-class VOP3Features<bit Clamp, bit OpSel, bit Packed, bit MAI> {
- bit HasClamp = Clamp;
- bit HasOpSel = OpSel;
- bit IsPacked = Packed;
- bit IsMAI = MAI;
-}
-
-def VOP3_REGULAR : VOP3Features<0, 0, 0, 0>;
-def VOP3_CLAMP : VOP3Features<1, 0, 0, 0>;
-def VOP3_OPSEL : VOP3Features<1, 1, 0, 0>;
-def VOP3_PACKED : VOP3Features<1, 1, 1, 0>;
-def VOP3_MAI : VOP3Features<0, 0, 0, 1>;
-
-class VOP3_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOPProfile<P.ArgVT> {
-
- let HasClamp = !if(Features.HasClamp, 1, P.HasClamp);
- let HasOpSel = !if(Features.HasOpSel, 1, P.HasOpSel);
- let IsMAI = !if(Features.IsMAI, 1, P.IsMAI);
- let IsPacked = !if(Features.IsPacked, 1, P.IsPacked);
-
- let HasModifiers = !if(Features.IsMAI, 0, !or(Features.IsPacked, P.HasModifiers));
- let IsSingle = 1;
-}
-
class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> {
let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst);
let Asm64 = "$vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$omod";
let IsSingle = 1;
+ let HasExtVOP3DPP = 0;
+ let HasExtDPP = 0;
}
def VOP3b_F32_I1_F32_F32_F32 : VOP3b_Profile<f32>;
@@ -198,12 +32,22 @@ def VOP3b_F64_I1_F64_F64_F64 : VOP3b_Profile<f64>;
def VOP3b_I64_I1_I32_I32_I64 : VOPProfile<[i64, i32, i32, i64]> {
let HasClamp = 1;
- let IsSingle = 1;
+ let IsSingle = 1;
let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst);
let Asm64 = "$vdst, $sdst, $src0, $src1, $src2$clamp";
}
+class V_MUL_PROF<VOPProfile P> : VOP3_Profile<P> {
+ let HasExtVOP3DPP = 0;
+ let HasExtDPP = 0;
+}
+
+def DIV_FIXUP_F32_PROF : VOP3_Profile<VOP_F32_F32_F32_F32> {
+ let HasExtVOP3DPP = 0;
+ let HasExtDPP = 0;
+}
+
//===----------------------------------------------------------------------===//
// VOP3 INTERP
//===----------------------------------------------------------------------===//
@@ -304,10 +148,10 @@ defm V_MAX_F64 : VOP3Inst <"v_max_f64", VOP3_Profile<VOP_F64_F64_F64>, fmaxnum_l
} // End SchedRW = [WriteDoubleAdd]
let SchedRW = [WriteIntMul] in {
-defm V_MUL_LO_U32 : VOP3Inst <"v_mul_lo_u32", VOP3_Profile<VOP_I32_I32_I32>, DivergentBinFrag<mul>>;
-defm V_MUL_HI_U32 : VOP3Inst <"v_mul_hi_u32", VOP3_Profile<VOP_I32_I32_I32>, mulhu>;
-defm V_MUL_LO_I32 : VOP3Inst <"v_mul_lo_i32", VOP3_Profile<VOP_I32_I32_I32>>;
-defm V_MUL_HI_I32 : VOP3Inst <"v_mul_hi_i32", VOP3_Profile<VOP_I32_I32_I32>, mulhs>;
+defm V_MUL_LO_U32 : VOP3Inst <"v_mul_lo_u32", V_MUL_PROF<VOP_I32_I32_I32>, DivergentBinFrag<mul>>;
+defm V_MUL_HI_U32 : VOP3Inst <"v_mul_hi_u32", V_MUL_PROF<VOP_I32_I32_I32>, mulhu>;
+defm V_MUL_LO_I32 : VOP3Inst <"v_mul_lo_i32", V_MUL_PROF<VOP_I32_I32_I32>>;
+defm V_MUL_HI_I32 : VOP3Inst <"v_mul_hi_i32", V_MUL_PROF<VOP_I32_I32_I32>, mulhs>;
} // End SchedRW = [WriteIntMul]
} // End isReMaterializable = 1
@@ -367,7 +211,7 @@ let isCommutable = 1 in {
} // End isCommutable = 1
defm V_CVT_PK_U8_F32 : VOP3Inst<"v_cvt_pk_u8_f32", VOP3_Profile<VOP_I32_F32_I32_I32>, int_amdgcn_cvt_pk_u8_f32>;
-defm V_DIV_FIXUP_F32 : VOP3Inst <"v_div_fixup_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUdiv_fixup>;
+defm V_DIV_FIXUP_F32 : VOP3Inst <"v_div_fixup_f32", DIV_FIXUP_F32_PROF, AMDGPUdiv_fixup>;
let SchedRW = [WriteDoubleAdd], FPDPRounding = 1 in {
defm V_DIV_FIXUP_F64 : VOP3Inst <"v_div_fixup_f64", VOP3_Profile<VOP_F64_F64_F64_F64>, AMDGPUdiv_fixup>;
@@ -419,9 +263,9 @@ def : GCNPat<
>;
let isReMaterializable = 1 in {
-let SubtargetPredicate = isGFX6GFX7GFX10 in {
+let SubtargetPredicate = isGFX6GFX7GFX10Plus in {
defm V_MULLIT_F32 : VOP3Inst <"v_mullit_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
-} // End SubtargetPredicate = isGFX6GFX7GFX10
+} // End SubtargetPredicate = isGFX6GFX7GFX10Plus
let SchedRW = [Write32Bit] in {
let SubtargetPredicate = isGFX8Plus in {
@@ -430,21 +274,30 @@ defm V_PERM_B32 : VOP3Inst <"v_perm_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMD
} // End SchedRW = [Write32Bit]
} // End isReMaterializable = 1
-let SubtargetPredicate = isGFX7Plus in {
+def VOPProfileMQSAD : VOP3_Profile<VOP_V4I32_I64_I32_V4I32, VOP3_CLAMP> {
+ let HasModifiers = 0;
+}
+let SubtargetPredicate = isGFX7Plus in {
let Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] in {
defm V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64, VOP3_CLAMP>>;
-defm V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOP3_Profile<VOP_V4I32_I64_I32_V4I32, VOP3_CLAMP>>;
+defm V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOPProfileMQSAD>;
} // End Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32]
+} // End SubtargetPredicate = isGFX7Plus
let isCommutable = 1 in {
let SchedRW = [WriteIntMul, WriteSALU] in {
+let SubtargetPredicate = isGFX7GFX8GFX9GFX10 in {
defm V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>;
defm V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>;
+}
+let SubtargetPredicate = isGFX11Only, Constraints = "@earlyclobber $vdst" in {
+defm V_MAD_U64_U32_gfx11 : VOP3Inst <"v_mad_u64_u32_gfx11", VOP3b_I64_I1_I32_I32_I64>;
+defm V_MAD_I64_I32_gfx11 : VOP3Inst <"v_mad_i64_i32_gfx11", VOP3b_I64_I1_I32_I32_I64>;
+} // End SubtargetPredicate = isGFX11Only, Constraints = "@earlyclobber $vdst"
} // End SchedRW = [WriteIntMul, WriteSALU]
} // End isCommutable = 1
-} // End SubtargetPredicate = isGFX7Plus
let FPDPRounding = 1 in {
let Predicates = [Has16BitInsts, isGFX8Only] in {
@@ -557,7 +410,7 @@ defm: Ternary_i16_Pats_gfx9<mul, add, V_MAD_I16_gfx9_e64>;
} // End Predicates = [Has16BitInsts, isGFX10Plus]
-class ThreeOpFrag<SDPatternOperator op1, SDPatternOperator op2> : PatFrag<
+class ThreeOpFragSDAG<SDPatternOperator op1, SDPatternOperator op2> : PatFrag<
(ops node:$x, node:$y, node:$z),
// When the inner operation is used multiple times, selecting 3-op
// instructions may still be beneficial -- if the other users can be
@@ -587,7 +440,9 @@ class ThreeOpFrag<SDPatternOperator op1, SDPatternOperator op2> : PatFrag<
return true;
}]> {
let PredicateCodeUsesOperands = 1;
+}
+class ThreeOpFrag<SDPatternOperator op1, SDPatternOperator op2> : ThreeOpFragSDAG<op1, op2> {
// The divergence predicate is irrelevant in GlobalISel, as we have
// proper register bank checks. We just need to verify the constant
// bus restriction when all the sources are considered.
@@ -609,6 +464,23 @@ class ThreeOpFrag<SDPatternOperator op1, SDPatternOperator op2> : PatFrag<
}];
}
+def shl_0_to_4 : PatFrag<
+ (ops node:$src0, node:$src1), (shl node:$src0, node:$src1),
+ [{
+ if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
+ return C->getZExtValue() <= 4;
+ }
+ return false;
+ }]> {
+ let GISelPredicateCode = [{
+ int64_t Imm = 0;
+ if (!mi_match(MI.getOperand(2).getReg(), MRI, m_ICst(Imm)) &&
+ !mi_match(MI.getOperand(2).getReg(), MRI, m_Copy(m_ICst(Imm))))
+ return false;
+ return (uint64_t)Imm <= 4;
+ }];
+}
+
let SubtargetPredicate = isGFX9Plus in {
let isCommutable = 1, isReMaterializable = 1 in {
defm V_ADD3_U32 : VOP3Inst <"v_add3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
@@ -649,6 +521,10 @@ defm V_LSHL_ADD_U32 : VOP3Inst <"v_lshl_add_u32", VOP3_Profile<VOP_I32_I32_I32_I
defm V_LSHL_OR_B32 : VOP3Inst <"v_lshl_or_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
} // End isReMaterializable = 1
+// V_LSHL_ADD_U64: D0.u64 = (S0.u64 << S1.u[2:0]) + S2.u64
+// src0 is shifted left by 0-4 (use “0” to get ADD_U64).
+let SubtargetPredicate = isGFX940Plus in
+defm V_LSHL_ADD_U64 : VOP3Inst <"v_lshl_add_u64", VOP3_Profile<VOP_I64_I64_I32_I64>>;
class ThreeOp_i32_Pats <SDPatternOperator op1, SDPatternOperator op2, Instruction inst> : GCNPat <
// This matches (op2 (op1 i32:$src0, i32:$src1), i32:$src2) with conditions.
@@ -664,6 +540,12 @@ def : ThreeOp_i32_Pats<and, or, V_AND_OR_B32_e64>;
def : ThreeOp_i32_Pats<or, or, V_OR3_B32_e64>;
def : ThreeOp_i32_Pats<xor, add, V_XAD_U32_e64>;
+let SubtargetPredicate = isGFX940Plus in
+def : GCNPat<
+ (ThreeOpFrag<shl_0_to_4, add> i64:$src0, i32:$src1, i64:$src2),
+ (V_LSHL_ADD_U64_e64 VSrc_b64:$src0, VSrc_b32:$src1, VSrc_b64:$src2)
+>;
+
def : VOPBinOpClampPat<saddsat, V_ADD_I32_e64, i32>;
def : VOPBinOpClampPat<ssubsat, V_SUB_I32_e64, i32>;
@@ -688,6 +570,33 @@ def : OpSelBinOpClampPat<saddsat, V_ADD_I16_e64>;
def : OpSelBinOpClampPat<ssubsat, V_SUB_I16_e64>;
} // End SubtargetPredicate = isGFX9Plus
+// FIXME: GlobalISel in general does not handle instructions with 2 results,
+// so it cannot use these patterns.
+multiclass IMAD32_Pats <VOP3_Pseudo inst> {
+ def : GCNPat <
+ (ThreeOpFrag<mul, add> i32:$src0, i32:$src1, i32:$src2),
+ (EXTRACT_SUBREG (inst $src0, $src1,
+ (REG_SEQUENCE SReg_64, // Use scalar and let it be legalized
+ $src2, sub0,
+ (i32 (IMPLICIT_DEF)), sub1),
+ 0 /* clamp */),
+ sub0)
+ >;
+ // Immediate src2 in the pattern above will not fold because it would be partially
+ // undef. Hence define specialized pattern for this case.
+ // FIXME: GlobalISel pattern exporter fails to export a pattern like this and asserts,
+ // make it SDAG only.
+ def : GCNPat <
+ (ThreeOpFragSDAG<mul, add> i32:$src0, i32:$src1, (i32 imm:$src2)),
+ (EXTRACT_SUBREG (inst $src0, $src1, (i64 (as_i64imm $src2)), 0 /* clamp */), sub0)
+ >;
+}
+
+let SubtargetPredicate = isGFX9GFX10 in // exclude pre-GFX9 where it was slow
+defm : IMAD32_Pats<V_MAD_U64_U32_e64>;
+let SubtargetPredicate = isGFX11Only in
+defm : IMAD32_Pats<V_MAD_U64_U32_gfx11_e64>;
+
def VOP3_PERMLANE_Profile : VOP3_Profile<VOPProfile <[i32, i32, i32, i32]>, VOP3_OPSEL> {
let Src0RC64 = VRegSrc_32;
let Src1RC64 = SCSrc_b32;
@@ -697,6 +606,8 @@ def VOP3_PERMLANE_Profile : VOP3_Profile<VOPProfile <[i32, i32, i32, i32]>, VOP3
IntOpSelMods:$src2_modifiers, SCSrc_b32:$src2,
VGPR_32:$vdst_in, op_sel0:$op_sel);
let HasClamp = 0;
+ let HasExtVOP3DPP = 0;
+ let HasExtDPP = 0;
}
class PermlanePat<SDPatternOperator permlane,
@@ -753,6 +664,20 @@ let SubtargetPredicate = isGFX10Plus in {
def : PermlaneDiscardVDstIn<
BoundControlOrFetchInvalidPermlane<int_amdgcn_permlanex16>,
V_PERMLANEX16_B32_e64>;
+
+ defm V_ADD_NC_U16 : VOP3Inst <"v_add_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, add>;
+ defm V_SUB_NC_U16 : VOP3Inst <"v_sub_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, sub>;
+
+ def : OpSelBinOpClampPat<uaddsat, V_ADD_NC_U16_e64>;
+ def : OpSelBinOpClampPat<usubsat, V_SUB_NC_U16_e64>;
+
+ // Undo sub x, c -> add x, -c canonicalization since c is more likely
+ // an inline immediate than -c.
+ def : GCNPat<
+ (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)),
+ (V_SUB_NC_U16_e64 0, VSrc_b16:$src0, 0, NegSubInlineIntConst16:$src1, 0, 0)
+ >;
+
} // End SubtargetPredicate = isGFX10Plus
class DivFmasPat<ValueType vt, Instruction inst, Register CondReg> : GCNPat<
@@ -773,6 +698,36 @@ def : DivFmasPat<f32, V_DIV_FMAS_F32_e64, VCC_LO>;
def : DivFmasPat<f64, V_DIV_FMAS_F64_e64, VCC_LO>;
}
+class VOP3_DOT_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOP3_Profile<P, Features> {
+ // FIXME VOP3 DPP versions are unsupported
+ let HasExtVOP3DPP = 0;
+ let HasClamp = 0;
+ let HasOMod = 0;
+ let InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64,
+ NumSrcArgs, HasClamp, HasOMod,
+ !if(isFloatType<Src0VT>.ret, FPVRegInputMods, IntOpSelMods),
+ !if(isFloatType<Src1VT>.ret, FPVRegInputMods, IntOpSelMods),
+ !if(isFloatType<Src2VT>.ret, FPVRegInputMods, IntOpSelMods)>.ret;
+}
+
+let SubtargetPredicate = isGFX11Plus in {
+ defm V_MAXMIN_F32 : VOP3Inst<"v_maxmin_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
+ defm V_MINMAX_F32 : VOP3Inst<"v_minmax_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
+ defm V_MAXMIN_F16 : VOP3Inst<"v_maxmin_f16", VOP3_Profile<VOP_F16_F16_F16_F16>>;
+ defm V_MINMAX_F16 : VOP3Inst<"v_minmax_f16", VOP3_Profile<VOP_F16_F16_F16_F16>>;
+ defm V_MAXMIN_U32 : VOP3Inst<"v_maxmin_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+ defm V_MINMAX_U32 : VOP3Inst<"v_minmax_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+ defm V_MAXMIN_I32 : VOP3Inst<"v_maxmin_i32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+ defm V_MINMAX_I32 : VOP3Inst<"v_minmax_i32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+ defm V_CVT_PK_I16_F32 : VOP3Inst<"v_cvt_pk_i16_f32", VOP3_Profile<VOP_V2I16_F32_F32>>;
+ defm V_CVT_PK_U16_F32 : VOP3Inst<"v_cvt_pk_u16_f32", VOP3_Profile<VOP_V2I16_F32_F32>>;
+} // End SubtargetPredicate = isGFX11Plus
+
+let SubtargetPredicate = HasDot8Insts in {
+ defm V_DOT2_F16_F16 : VOP3Inst<"v_dot2_f16_f16", VOP3_DOT_Profile<VOP_F16_V2F16_V2F16_F16>, int_amdgcn_fdot2_f16_f16>;
+ defm V_DOT2_BF16_BF16 : VOP3Inst<"v_dot2_bf16_bf16", VOP3_DOT_Profile<VOP_I16_V2I16_V2I16_I16>, int_amdgcn_fdot2_bf16_bf16>;
+}
+
//===----------------------------------------------------------------------===//
// Integer Clamp Patterns
//===----------------------------------------------------------------------===//
@@ -813,16 +768,137 @@ def : IntClampPat<V_MQSAD_PK_U16_U8_e64, int_amdgcn_mqsad_pk_u16_u8>;
def : IntClampPat<V_QSAD_PK_U16_U8_e64, int_amdgcn_qsad_pk_u16_u8>;
def : IntClampPat<V_MQSAD_U32_U8_e64, int_amdgcn_mqsad_u32_u8>;
-
//===----------------------------------------------------------------------===//
// Target-specific instruction encodings.
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
+// GFX11.
+//===----------------------------------------------------------------------===//
+
+defm V_FMA_DX9_ZERO_F32 : VOP3_Real_with_name_gfx11<0x209, "V_FMA_LEGACY_F32", "v_fma_dx9_zero_f32">;
+defm V_MAD_I32_I24 : VOP3_Realtriple_gfx11<0x20a>;
+defm V_MAD_U32_U24 : VOP3_Realtriple_gfx11<0x20b>;
+defm V_CUBEID_F32 : VOP3_Realtriple_gfx11<0x20c>;
+defm V_CUBESC_F32 : VOP3_Realtriple_gfx11<0x20d>;
+defm V_CUBETC_F32 : VOP3_Realtriple_gfx11<0x20e>;
+defm V_CUBEMA_F32 : VOP3_Realtriple_gfx11<0x20f>;
+defm V_BFE_U32 : VOP3_Realtriple_gfx11<0x210>;
+defm V_BFE_I32 : VOP3_Realtriple_gfx11<0x211>;
+defm V_BFI_B32 : VOP3_Realtriple_gfx11<0x212>;
+defm V_FMA_F32 : VOP3_Realtriple_gfx11<0x213>;
+defm V_FMA_F64 : VOP3_Real_Base_gfx11<0x214>;
+defm V_LERP_U8 : VOP3_Realtriple_gfx11<0x215>;
+defm V_ALIGNBIT_B32 : VOP3_Realtriple_gfx11<0x216>;
+defm V_ALIGNBYTE_B32 : VOP3_Realtriple_gfx11<0x217>;
+defm V_MULLIT_F32 : VOP3_Realtriple_gfx11<0x218>;
+defm V_MIN3_F32 : VOP3_Realtriple_gfx11<0x219>;
+defm V_MIN3_I32 : VOP3_Realtriple_gfx11<0x21a>;
+defm V_MIN3_U32 : VOP3_Realtriple_gfx11<0x21b>;
+defm V_MAX3_F32 : VOP3_Realtriple_gfx11<0x21c>;
+defm V_MAX3_I32 : VOP3_Realtriple_gfx11<0x21d>;
+defm V_MAX3_U32 : VOP3_Realtriple_gfx11<0x21e>;
+defm V_MED3_F32 : VOP3_Realtriple_gfx11<0x21f>;
+defm V_MED3_I32 : VOP3_Realtriple_gfx11<0x220>;
+defm V_MED3_U32 : VOP3_Realtriple_gfx11<0x221>;
+defm V_SAD_U8 : VOP3_Realtriple_gfx11<0x222>;
+defm V_SAD_HI_U8 : VOP3_Realtriple_gfx11<0x223>;
+defm V_SAD_U16 : VOP3_Realtriple_gfx11<0x224>;
+defm V_SAD_U32 : VOP3_Realtriple_gfx11<0x225>;
+defm V_CVT_PK_U8_F32 : VOP3_Realtriple_gfx11<0x226>;
+defm V_DIV_FIXUP_F32 : VOP3_Real_Base_gfx11<0x227>;
+defm V_DIV_FIXUP_F64 : VOP3_Real_Base_gfx11<0x228>;
+defm V_DIV_FMAS_F32 : VOP3_Real_Base_gfx11<0x237>;
+defm V_DIV_FMAS_F64 : VOP3_Real_Base_gfx11<0x238>;
+defm V_MSAD_U8 : VOP3_Realtriple_gfx11<0x239>;
+defm V_QSAD_PK_U16_U8 : VOP3_Real_Base_gfx11<0x23a>;
+defm V_MQSAD_PK_U16_U8 : VOP3_Real_Base_gfx11<0x23b>;
+defm V_MQSAD_U32_U8 : VOP3_Real_Base_gfx11<0x23d>;
+defm V_XOR3_B32 : VOP3_Realtriple_gfx11<0x240>;
+defm V_MAD_U16 : VOP3_Realtriple_with_name_gfx11<0x241, "V_MAD_U16_gfx9", "v_mad_u16">;
+defm V_PERM_B32 : VOP3_Realtriple_gfx11<0x244>;
+defm V_XAD_U32 : VOP3_Realtriple_gfx11<0x245>;
+defm V_LSHL_ADD_U32 : VOP3_Realtriple_gfx11<0x246>;
+defm V_ADD_LSHL_U32 : VOP3_Realtriple_gfx11<0x247>;
+defm V_FMA_F16 : VOP3_Realtriple_with_name_gfx11<0x248, "V_FMA_F16_gfx9", "v_fma_f16">;
+defm V_MIN3_F16 : VOP3_Realtriple_gfx11<0x249>;
+defm V_MIN3_I16 : VOP3_Realtriple_gfx11<0x24a>;
+defm V_MIN3_U16 : VOP3_Realtriple_gfx11<0x24b>;
+defm V_MAX3_F16 : VOP3_Realtriple_gfx11<0x24c>;
+defm V_MAX3_I16 : VOP3_Realtriple_gfx11<0x24d>;
+defm V_MAX3_U16 : VOP3_Realtriple_gfx11<0x24e>;
+defm V_MED3_F16 : VOP3_Realtriple_gfx11<0x24f>;
+defm V_MED3_I16 : VOP3_Realtriple_gfx11<0x250>;
+defm V_MED3_U16 : VOP3_Realtriple_gfx11<0x251>;
+defm V_MAD_I16 : VOP3_Realtriple_with_name_gfx11<0x253, "V_MAD_I16_gfx9", "v_mad_i16">;
+defm V_DIV_FIXUP_F16 : VOP3_Realtriple_with_name_gfx11<0x254, "V_DIV_FIXUP_F16_gfx9", "v_div_fixup_f16">;
+defm V_ADD3_U32 : VOP3_Realtriple_gfx11<0x255>;
+defm V_LSHL_OR_B32 : VOP3_Realtriple_gfx11<0x256>;
+defm V_AND_OR_B32 : VOP3_Realtriple_gfx11<0x257>;
+defm V_OR3_B32 : VOP3_Realtriple_gfx11<0x258>;
+defm V_MAD_U32_U16 : VOP3_Realtriple_gfx11<0x259>;
+defm V_MAD_I32_I16 : VOP3_Realtriple_gfx11<0x25a>;
+defm V_PERMLANE16_B32 : VOP3_Real_Base_gfx11<0x25b>;
+defm V_PERMLANEX16_B32 : VOP3_Real_Base_gfx11<0x25c>;
+defm V_MAXMIN_F32 : VOP3_Realtriple_gfx11<0x25e>;
+defm V_MINMAX_F32 : VOP3_Realtriple_gfx11<0x25f>;
+defm V_MAXMIN_F16 : VOP3_Realtriple_gfx11<0x260>;
+defm V_MINMAX_F16 : VOP3_Realtriple_gfx11<0x261>;
+defm V_MAXMIN_U32 : VOP3_Realtriple_gfx11<0x262>;
+defm V_MINMAX_U32 : VOP3_Realtriple_gfx11<0x263>;
+defm V_MAXMIN_I32 : VOP3_Realtriple_gfx11<0x264>;
+defm V_MINMAX_I32 : VOP3_Realtriple_gfx11<0x265>;
+// FIXME VOP3 DPP Dot instructions are unsupported
+defm V_DOT2_F16_F16 : VOP3_Real_Base_gfx11<0x266>;
+defm V_DOT2_BF16_BF16 : VOP3_Real_Base_gfx11<0x267>;
+defm V_DIV_SCALE_F32 : VOP3be_Real_gfx11<0x2fc, "V_DIV_SCALE_F32", "v_div_scale_f32">;
+defm V_DIV_SCALE_F64 : VOP3be_Real_gfx11<0x2fd, "V_DIV_SCALE_F64", "v_div_scale_f64">;
+defm V_MAD_U64_U32_gfx11 : VOP3be_Real_gfx11<0x2fe, "V_MAD_U64_U32_gfx11", "v_mad_u64_u32">;
+defm V_MAD_I64_I32_gfx11 : VOP3be_Real_gfx11<0x2ff, "V_MAD_I64_I32_gfx11", "v_mad_i64_i32">;
+defm V_ADD_NC_U16 : VOP3Only_Realtriple_gfx11<0x303>;
+defm V_SUB_NC_U16 : VOP3Only_Realtriple_gfx11<0x304>;
+defm V_MUL_LO_U16 : VOP3Only_Realtriple_gfx11<0x305>;
+defm V_CVT_PK_I16_F32 : VOP3_Realtriple_gfx11<0x306>;
+defm V_CVT_PK_U16_F32 : VOP3_Realtriple_gfx11<0x307>;
+defm V_MAX_U16 : VOP3Only_Realtriple_gfx11<0x309>;
+defm V_MAX_I16 : VOP3Only_Realtriple_gfx11<0x30a>;
+defm V_MIN_U16 : VOP3Only_Realtriple_gfx11<0x30b>;
+defm V_MIN_I16 : VOP3Only_Realtriple_gfx11<0x30c>;
+defm V_ADD_NC_I16 : VOP3_Realtriple_with_name_gfx11<0x30d, "V_ADD_I16", "v_add_nc_i16">;
+defm V_SUB_NC_I16 : VOP3_Realtriple_with_name_gfx11<0x30e, "V_SUB_I16", "v_sub_nc_i16">;
+defm V_PACK_B32_F16 : VOP3_Realtriple_gfx11<0x311>;
+defm V_CVT_PK_NORM_I16_F16 : VOP3_Realtriple_with_name_gfx11<0x312, "V_CVT_PKNORM_I16_F16" , "v_cvt_pk_norm_i16_f16" >;
+defm V_CVT_PK_NORM_U16_F16 : VOP3_Realtriple_with_name_gfx11<0x313, "V_CVT_PKNORM_U16_F16" , "v_cvt_pk_norm_u16_f16" >;
+defm V_SUB_NC_I32 : VOP3_Realtriple_with_name_gfx11<0x325, "V_SUB_I32", "v_sub_nc_i32">;
+defm V_ADD_NC_I32 : VOP3_Realtriple_with_name_gfx11<0x326, "V_ADD_I32", "v_add_nc_i32">;
+defm V_ADD_F64 : VOP3_Real_Base_gfx11<0x327>;
+defm V_MUL_F64 : VOP3_Real_Base_gfx11<0x328>;
+defm V_MIN_F64 : VOP3_Real_Base_gfx11<0x329>;
+defm V_MAX_F64 : VOP3_Real_Base_gfx11<0x32a>;
+defm V_LDEXP_F64 : VOP3_Real_Base_gfx11<0x32b>;
+defm V_MUL_LO_U32 : VOP3_Real_Base_gfx11<0x32c>;
+defm V_MUL_HI_U32 : VOP3_Real_Base_gfx11<0x32d>;
+defm V_MUL_HI_I32 : VOP3_Real_Base_gfx11<0x32e>;
+defm V_TRIG_PREOP_F64 : VOP3_Real_Base_gfx11<0x32f>;
+defm V_LSHLREV_B16 : VOP3Only_Realtriple_gfx11<0x338>;
+defm V_LSHRREV_B16 : VOP3Only_Realtriple_gfx11<0x339>;
+defm V_ASHRREV_I16 : VOP3Only_Realtriple_gfx11<0x33a>;
+defm V_LSHLREV_B64 : VOP3_Real_Base_gfx11<0x33c>;
+defm V_LSHRREV_B64 : VOP3_Real_Base_gfx11<0x33d>;
+defm V_ASHRREV_I64 : VOP3_Real_Base_gfx11<0x33e>;
+defm V_READLANE_B32 : VOP3_Real_No_Suffix_gfx11<0x360>; // Pseudo in VOP2
+let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) in {
+ defm V_WRITELANE_B32 : VOP3_Real_No_Suffix_gfx11<0x361>; // Pseudo in VOP2
+} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in)
+defm V_AND_B16 : VOP3Only_Realtriple_gfx11<0x362>;
+defm V_OR_B16 : VOP3Only_Realtriple_gfx11<0x363>;
+defm V_XOR_B16 : VOP3Only_Realtriple_gfx11<0x364>;
+
+//===----------------------------------------------------------------------===//
// GFX10.
//===----------------------------------------------------------------------===//
-let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
+let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
multiclass VOP3_Real_gfx10<bits<10> op> {
def _gfx10 :
VOP3_Real<!cast<VOP_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>,
@@ -867,7 +943,7 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
let AsmString = asmName # ps.AsmOperands;
}
}
-} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
+} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10"
defm V_READLANE_B32 : VOP3_Real_No_Suffix_gfx10<0x360>;
@@ -935,10 +1011,11 @@ defm V_MAD_I16 :
defm V_DIV_FIXUP_F16 :
VOP3OpSel_Real_gfx10_with_name<0x35f, "V_DIV_FIXUP_F16_gfx9", "v_div_fixup_f16">;
+defm V_ADD_NC_U16 : VOP3OpSel_Real_gfx10<0x303>;
+defm V_SUB_NC_U16 : VOP3OpSel_Real_gfx10<0x304>;
+
// FIXME-GFX10-OPSEL: Need to add "selective" opsel support to some of these
// (they do not support SDWA or DPP).
-defm V_ADD_NC_U16 : VOP3_Real_gfx10_with_name<0x303, "V_ADD_U16", "v_add_nc_u16">;
-defm V_SUB_NC_U16 : VOP3_Real_gfx10_with_name<0x304, "V_SUB_U16", "v_sub_nc_u16">;
defm V_MUL_LO_U16 : VOP3_Real_gfx10_with_name<0x305, "V_MUL_LO_U16", "v_mul_lo_u16">;
defm V_LSHRREV_B16 : VOP3_Real_gfx10_with_name<0x307, "V_LSHRREV_B16", "v_lshrrev_b16">;
defm V_ASHRREV_I16 : VOP3_Real_gfx10_with_name<0x308, "V_ASHRREV_I16", "v_ashrrev_i16">;
@@ -1273,3 +1350,5 @@ defm V_MAD_I32_I16 : VOP3OpSel_Real_gfx9 <0x1f2>;
defm V_CVT_PKNORM_I16_F16 : VOP3OpSel_Real_gfx9 <0x299>;
defm V_CVT_PKNORM_U16_F16 : VOP3OpSel_Real_gfx9 <0x29a>;
+
+defm V_LSHL_ADD_U64 : VOP3_Real_vi <0x208>;
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 707475ceccee..59ce532af59b 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -10,19 +10,33 @@
// VOP3P Classes
//===----------------------------------------------------------------------===//
+class VOP3P_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR,
+ bit HasDPP = 0> : VOP3_Profile<P, Features> {
+ let IsVOP3P = 1;
+ let HasExtVOP3DPP = HasDPP;
+ // We do not want to print src modifiers for vop3p because the bits are
+ // overloaded in meaning and the logic in printOperandAndFPInputMods is
+ // wrong for vop3p
+ let AsmVOP3DPPBase = AsmVOP3P;
+}
+
// Used for FMA_MIX* and MAD_MIX* insts
// Their operands are only sort of f16 operands. Depending on
// op_sel_hi, these may be interpreted as f32. The inline immediate
// values are really f16 converted to f32, so we treat these as f16
// operands.
class VOP3P_Mix_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR,
- bit useTiedOutput = 0> : VOP3_Profile<P, Features> {
+ bit useTiedOutput = 0> : VOP3P_Profile<P, Features, 1> {
bit UseTiedOutput = useTiedOutput;
dag srcs =
(ins FP16InputMods:$src0_modifiers, VCSrc_f16:$src0,
FP16InputMods:$src1_modifiers, VCSrc_f16:$src1,
FP16InputMods:$src2_modifiers, VCSrc_f16:$src2);
+ dag dpp_srcs =
+ (ins FPVRegInputMods:$src0_modifiers, VGPRSrc_32:$src0,
+ FP16InputMods:$src1_modifiers, VCSrc_f16:$src1,
+ FP16InputMods:$src2_modifiers, VCSrc_f16:$src2);
// FIXME: clampmod0 misbehaves with the non-default vdst_in
// following it. For now workaround this by requiring clamp
@@ -35,19 +49,27 @@ class VOP3P_Mix_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR,
// We use Ins64 because that is the one which populates InOperandList
// due to the logic in class VOP3_Pseudo
let Ins64 = !con(srcs, mods);
+ let InsVOP3Base = !con(dpp_srcs, mods);
let Asm64 =
"$vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$op_sel$op_sel_hi$clamp";
+ let AsmVOP3DPPBase = Asm64;
}
multiclass VOP3PInst<string OpName, VOPProfile P,
- SDPatternOperator node = null_frag, bit HasExplicitClamp = 0> {
+ SDPatternOperator node = null_frag, bit IsDOT = 0> {
def NAME : VOP3P_Pseudo<OpName, P,
!if (P.HasModifiers,
- getVOP3PModPat<P, node, HasExplicitClamp>.ret,
+ getVOP3PModPat<P, node, IsDOT, IsDOT>.ret,
getVOP3Pat<P, node>.ret)>;
+ let SubtargetPredicate = isGFX11Plus in {
+ if P.HasExtVOP3DPP then
+ def _dpp : VOP3_DPP_Pseudo<OpName, P> {
+ let VOP3P = 1;
+ let PseudoInstr = OpName #"_dpp";
+ }
+ } // end SubtargetPredicate = isGFX11Plus
}
-
// Non-packed instructions that use the VOP3P encoding.
// VOP3 neg/abs and VOP3P opsel/opsel_hi modifiers are allowed.
multiclass VOP3_VOP3PInst<string OpName, VOP3P_Mix_Profile P> {
@@ -55,37 +77,47 @@ multiclass VOP3_VOP3PInst<string OpName, VOP3P_Mix_Profile P> {
let Constraints = !if(P.UseTiedOutput, "$vdst = $vdst_in", "");
let DisableEncoding = !if(P.UseTiedOutput, "$vdst_in", "");
}
+ let SubtargetPredicate = isGFX11Plus in {
+ if P.HasExtVOP3DPP then
+ def _dpp : VOP3_DPP_Pseudo<OpName, P> {
+ let VOP3P = 1;
+ let PseudoInstr = OpName#"_dpp";
+ let Constraints = !if(P.UseTiedOutput, "$vdst = $vdst_in", "");
+ let DisableEncoding = !if(P.UseTiedOutput, "$vdst_in", "");
+ }
+ } // end SubtargetPredicate = isGFX11Plus
}
+let isReMaterializable = 1 in {
let isCommutable = 1 in {
-defm V_PK_MAD_I16 : VOP3PInst<"v_pk_mad_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16_V2I16>>;
-defm V_PK_MAD_U16 : VOP3PInst<"v_pk_mad_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16_V2I16>>;
+defm V_PK_MAD_I16 : VOP3PInst<"v_pk_mad_i16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16_V2I16>>;
+defm V_PK_MAD_U16 : VOP3PInst<"v_pk_mad_u16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16_V2I16>>;
let FPDPRounding = 1 in {
-defm V_PK_FMA_F16 : VOP3PInst<"v_pk_fma_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16_V2F16>, any_fma>;
-defm V_PK_ADD_F16 : VOP3PInst<"v_pk_add_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, any_fadd>;
-defm V_PK_MUL_F16 : VOP3PInst<"v_pk_mul_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, any_fmul>;
+defm V_PK_FMA_F16 : VOP3PInst<"v_pk_fma_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16_V2F16>, any_fma>;
+defm V_PK_ADD_F16 : VOP3PInst<"v_pk_add_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16>, any_fadd>;
+defm V_PK_MUL_F16 : VOP3PInst<"v_pk_mul_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16>, any_fmul>;
} // End FPDPRounding = 1
-defm V_PK_MAX_F16 : VOP3PInst<"v_pk_max_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fmaxnum_like>;
-defm V_PK_MIN_F16 : VOP3PInst<"v_pk_min_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fminnum_like>;
+defm V_PK_MAX_F16 : VOP3PInst<"v_pk_max_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16>, fmaxnum_like>;
+defm V_PK_MIN_F16 : VOP3PInst<"v_pk_min_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16>, fminnum_like>;
-defm V_PK_ADD_U16 : VOP3PInst<"v_pk_add_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, add>;
-defm V_PK_ADD_I16 : VOP3PInst<"v_pk_add_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
-defm V_PK_MUL_LO_U16 : VOP3PInst<"v_pk_mul_lo_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, mul>;
+defm V_PK_ADD_U16 : VOP3PInst<"v_pk_add_u16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, add>;
+defm V_PK_ADD_I16 : VOP3PInst<"v_pk_add_i16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>>;
+defm V_PK_MUL_LO_U16 : VOP3PInst<"v_pk_mul_lo_u16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, mul>;
-defm V_PK_MIN_I16 : VOP3PInst<"v_pk_min_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, smin>;
-defm V_PK_MIN_U16 : VOP3PInst<"v_pk_min_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, umin>;
-defm V_PK_MAX_I16 : VOP3PInst<"v_pk_max_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, smax>;
-defm V_PK_MAX_U16 : VOP3PInst<"v_pk_max_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, umax>;
+defm V_PK_MIN_I16 : VOP3PInst<"v_pk_min_i16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, smin>;
+defm V_PK_MIN_U16 : VOP3PInst<"v_pk_min_u16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, umin>;
+defm V_PK_MAX_I16 : VOP3PInst<"v_pk_max_i16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, smax>;
+defm V_PK_MAX_U16 : VOP3PInst<"v_pk_max_u16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, umax>;
}
-defm V_PK_SUB_U16 : VOP3PInst<"v_pk_sub_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
-defm V_PK_SUB_I16 : VOP3PInst<"v_pk_sub_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, sub>;
-
-defm V_PK_LSHLREV_B16 : VOP3PInst<"v_pk_lshlrev_b16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, clshl_rev_16>;
-defm V_PK_ASHRREV_I16 : VOP3PInst<"v_pk_ashrrev_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, cashr_rev_16>;
-defm V_PK_LSHRREV_B16 : VOP3PInst<"v_pk_lshrrev_b16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, clshr_rev_16>;
+defm V_PK_SUB_U16 : VOP3PInst<"v_pk_sub_u16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>>;
+defm V_PK_SUB_I16 : VOP3PInst<"v_pk_sub_i16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, sub>;
+defm V_PK_LSHLREV_B16 : VOP3PInst<"v_pk_lshlrev_b16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, clshl_rev_16>;
+defm V_PK_ASHRREV_I16 : VOP3PInst<"v_pk_ashrrev_i16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, cashr_rev_16>;
+defm V_PK_LSHRREV_B16 : VOP3PInst<"v_pk_lshrrev_b16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, clshr_rev_16>;
+} // End isReMaterializable = 1
let SubtargetPredicate = HasVOP3PInsts in {
@@ -178,6 +210,7 @@ let SubtargetPredicate = HasMadMixInsts in {
// Size of src arguments (16/32) is controlled by op_sel.
// For 16-bit src arguments their location (hi/lo) are controlled by op_sel_hi.
let isCommutable = 1, mayRaiseFPException = 0 in {
+let isReMaterializable = 1 in
defm V_MAD_MIX_F32 : VOP3_VOP3PInst<"v_mad_mix_f32", VOP3P_Mix_Profile<VOP_F32_F16_F16_F16, VOP3_OPSEL>>;
let FPDPRounding = 1 in {
@@ -197,6 +230,8 @@ defm : MadFmaMixPats<fmad, V_MAD_MIXLO_F16, V_MAD_MIXHI_F16>;
// Essentially the same as the mad_mix versions
let SubtargetPredicate = HasFmaMixInsts in {
let isCommutable = 1 in {
+
+let isReMaterializable = 1 in
defm V_FMA_MIX_F32 : VOP3_VOP3PInst<"v_fma_mix_f32", VOP3P_Mix_Profile<VOP_F32_F16_F16_F16, VOP3_OPSEL>>;
let FPDPRounding = 1 in {
@@ -297,34 +332,63 @@ let IsDOT = 1 in {
let SubtargetPredicate = HasDot2Insts in {
defm V_DOT2_I32_I16 : VOP3PInst<"v_dot2_i32_i16",
- VOP3_Profile<VOP_I32_V2I16_V2I16_I32>, int_amdgcn_sdot2, 1>;
+ VOP3P_Profile<VOP_I32_V2I16_V2I16_I32>, int_amdgcn_sdot2, 1>;
defm V_DOT2_U32_U16 : VOP3PInst<"v_dot2_u32_u16",
- VOP3_Profile<VOP_I32_V2I16_V2I16_I32>, int_amdgcn_udot2, 1>;
+ VOP3P_Profile<VOP_I32_V2I16_V2I16_I32>, int_amdgcn_udot2, 1>;
} // End SubtargetPredicate = HasDot2Insts
let SubtargetPredicate = HasDot7Insts in {
defm V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16",
- VOP3_Profile<VOP_F32_V2F16_V2F16_F32>,
+ VOP3P_Profile<VOP_F32_V2F16_V2F16_F32, VOP3_REGULAR, /*HasDPP*/ 1>,
AMDGPUfdot2, 1/*ExplicitClamp*/>;
defm V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8",
- VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot4, 1>;
+ VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot4, 1>;
defm V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4",
- VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot8, 1>;
+ VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot8, 1>;
} // End SubtargetPredicate = HasDot7Insts
let SubtargetPredicate = HasDot1Insts in {
defm V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8",
- VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot4, 1>;
+ VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot4, 1>;
defm V_DOT8_I32_I4 : VOP3PInst<"v_dot8_i32_i4",
- VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot8, 1>;
+ VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot8, 1>;
} // End SubtargetPredicate = HasDot1Insts
+
+let SubtargetPredicate = HasDot8Insts in {
+
+defm V_DOT2_F32_BF16 : VOP3PInst<"v_dot2_f32_bf16",
+ VOP3P_Profile<VOP_F32_V2I16_V2I16_F32, VOP3_REGULAR, /*HasDPP*/ 1>,
+ int_amdgcn_fdot2_f32_bf16, 1>;
+
+} // End SubtargetPredicate = HasDot8Insts
+
} // End let IsDOT = 1
+multiclass VOP3PDOTIUInst <string OpName, SDPatternOperator intrinsic_node> {
+ let IsDOT = 1 in
+ defm NAME : VOP3PInst<OpName, VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>,
+ null_frag, 1>;
+ // Dot-iu instructions consider input as signed if imod neg bits are set. Thus
+ // Dot-iu Intrinsics have extra operands and require separate codegen pattern.
+ def : GCNPat < (intrinsic_node (DotIUVOP3PMods i32:$src0_mods), i32:$src0,
+ (DotIUVOP3PMods i32:$src1_mods), i32:$src1,
+ i32:$src2, (i1 timm:$clamp)),
+ (!cast<Instruction>(NAME) $src0_mods, i32:$src0,
+ $src1_mods, i32:$src1,
+ (i32 8), i32:$src2, i1:$clamp)
+ >;
+}
+
+let SubtargetPredicate = HasDot8Insts in {
+defm V_DOT4_I32_IU8 : VOP3PDOTIUInst<"v_dot4_i32_iu8", int_amdgcn_sudot4>;
+defm V_DOT8_I32_IU4 : VOP3PDOTIUInst<"v_dot8_i32_iu4", int_amdgcn_sudot8>;
+} // End SubtargetPredicate = HasDot8Insts
+
def : UDot2Pat<V_DOT2_U32_U16>;
def : SDot2Pat<V_DOT2_I32_I16>;
@@ -365,18 +429,18 @@ def VDst_256 : VOPDstOperand<VReg_256>;
def VDst_512 : VOPDstOperand<VReg_512>;
def VDst_1024 : VOPDstOperand<VReg_1024>;
-def VOPProfileAccRead : VOP3_Profile<VOP_I32_I32, VOP3_MAI> {
+def VOPProfileAccRead : VOP3P_Profile<VOP_I32_I32, VOP3_MAI> {
let Src0RC64 = ARegSrc_32;
}
-def VOPProfileAccWrite : VOP3_Profile<VOP_I32_I32, VOP3_MAI> {
+def VOPProfileAccWrite : VOP3P_Profile<VOP_I32_I32, VOP3_MAI> {
let DstRC = ADst_32;
- let Src0RC64 = VISrc_b32;
+ let Src0RC64 = VCSrc_b32;
}
class VOPProfileMAI<VOPProfile P, RegisterOperand _SrcRC, RegisterOperand _DstRC,
RegisterOperand SrcABRC = AVSrc_32>
- : VOP3_Profile<P, VOP3_MAI> {
+ : VOP3P_Profile<P, VOP3_MAI> {
let DstRC = _DstRC;
let Src0RC64 = SrcABRC;
let Src1RC64 = SrcABRC;
@@ -387,15 +451,27 @@ class VOPProfileMAI<VOPProfile P, RegisterOperand _SrcRC, RegisterOperand _DstRC
let HasOMod = 0;
let HasModifiers = 0;
let Asm64 = "$vdst, $src0, $src1, $src2$cbsz$abid$blgp";
+ let AsmVOP3DPPBase = Asm64;
let Ins64 = (ins Src0RC64:$src0, Src1RC64:$src1, Src2RC64:$src2, cbsz:$cbsz, abid:$abid, blgp:$blgp);
+ let InsVOP3Base = Ins64;
// Dst and SrcC cannot partially overlap if SrcC/Dst is bigger than 4 VGPRs.
// We then create two versions of the instruction: with tied dst and src2
- // and with the eralyclobber flag on the dst. This is strciter than the
+ // and with the earlyclobber flag on the dst. This is stricter than the
// actual HW restriction. In particular earlyclobber also affects src0 and
// src1 allocation which is not required.
bit NoDstOverlap = !gt(DstVT.Size, 128);
}
+class VOPProfileSMFMAC<VOPProfile P, RegisterOperand _DstRC,
+ RegisterOperand _SrcARC, RegisterOperand _SrcBRC>
+ : VOPProfileMAI<P, _DstRC, _DstRC, _SrcARC> {
+ let Src1RC64 = _SrcBRC;
+ let Src2VT = DstVT;
+ let Asm64 = " $vdst, $src0, $src1, $idx$cbsz$abid";
+ let Outs64 = (outs DstRC:$vdst);
+ let Ins64 = (ins Src0RC64:$src0, Src1RC64:$src1, VRegSrc_32:$idx, cbsz:$cbsz, abid:$abid, Src2RC64:$src2);
+}
+
def VOPProfileMAI_F32_F32_X4 : VOPProfileMAI<VOP_V4F32_F32_F32_V4F32, AISrc_128_f32, ADst_128>;
def VOPProfileMAI_F32_F32_X16 : VOPProfileMAI<VOP_V16F32_F32_F32_V16F32, AISrc_512_f32, ADst_512>;
def VOPProfileMAI_F32_F32_X32 : VOPProfileMAI<VOP_V32F32_F32_F32_V32F32, AISrc_1024_f32, ADst_1024>;
@@ -413,6 +489,10 @@ def VOPProfileMAI_F32_V4I16_X16 : VOPProfileMAI<VOP_V16F32_V4I16_V4I16_V16F32, A
def VOPProfileMAI_F32_V4I16_X32 : VOPProfileMAI<VOP_V32F32_V4I16_V4I16_V32F32, AISrc_1024_b32, ADst_1024, AVSrc_64>;
def VOPProfileMAI_F64_16X16X4F64 : VOPProfileMAI<VOP_V4F64_F64_F64_V4F64, AISrc_256_f64, ADst_256, AVSrc_64>;
def VOPProfileMAI_F64_4X4X4F64 : VOPProfileMAI<VOP_F64_F64_F64_F64, AISrc_64_f64, ADst_64, AVSrc_64>;
+def VOPProfileMAI_I32_I64_X16 : VOPProfileMAI<VOP_V4I32_I64_I64_V4I32, AISrc_128_b32, ADst_128, AVSrc_64>;
+def VOPProfileMAI_I32_I64_X32 : VOPProfileMAI<VOP_V16I32_I64_I64_V16I32, AISrc_512_b32, ADst_512, AVSrc_64>;
+def VOPProfileMAI_F32_V2F32_X16 : VOPProfileMAI<VOP_V4F32_V2F32_V2F32_V4F32, AISrc_128_b32, ADst_128, AVSrc_64>;
+def VOPProfileMAI_F32_V2F32_X32 : VOPProfileMAI<VOP_V16F32_V2F32_V2F32_V16F32, AISrc_512_b32, ADst_512, AVSrc_64>;
def VOPProfileMAI_F32_F32_X4_VCD : VOPProfileMAI<VOP_V4F32_F32_F32_V4F32, VISrc_128_f32, VDst_128>;
def VOPProfileMAI_F32_F32_X16_VCD : VOPProfileMAI<VOP_V16F32_F32_F32_V16F32, VISrc_512_f32, VDst_512>;
@@ -431,12 +511,37 @@ def VOPProfileMAI_F32_V4I16_X16_VCD : VOPProfileMAI<VOP_V16F32_V4I16_V4I16_V16F
def VOPProfileMAI_F32_V4I16_X32_VCD : VOPProfileMAI<VOP_V32F32_V4I16_V4I16_V32F32, VISrc_1024_b32, VDst_1024, AVSrc_64>;
def VOPProfileMAI_F64_16X16X4F64_VCD : VOPProfileMAI<VOP_V4F64_F64_F64_V4F64, VISrc_256_f64, VDst_256, AVSrc_64>;
def VOPProfileMAI_F64_4X4X4F64_VCD : VOPProfileMAI<VOP_F64_F64_F64_F64, VISrc_64_f64, VDst_64, AVSrc_64>;
+def VOPProfileMAI_I32_I64_X16_VCD : VOPProfileMAI<VOP_V4I32_I64_I64_V4I32, VISrc_128_b32, VDst_128, AVSrc_64>;
+def VOPProfileMAI_I32_I64_X32_VCD : VOPProfileMAI<VOP_V16I32_I64_I64_V16I32, VISrc_512_b32, VDst_512, AVSrc_64>;
+def VOPProfileMAI_F32_V2F32_X16_VCD : VOPProfileMAI<VOP_V4F32_V2F32_V2F32_V4F32, VISrc_128_b32, VDst_128, AVSrc_64>;
+def VOPProfileMAI_F32_V2F32_X32_VCD : VOPProfileMAI<VOP_V16F32_V2F32_V2F32_V16F32, VISrc_512_b32, VDst_512, AVSrc_64>;
+
+def VOPProfileSMFMAC_F32_16X16X32_F16 : VOPProfileSMFMAC<VOP_V4F32_V4F16_V8F16_I32, AVDst_128, AVSrc_64, AVSrc_128>;
+def VOPProfileSMFMAC_F32_32X32X16_F16 : VOPProfileSMFMAC<VOP_V16F32_V4F16_V8F16_I32, AVDst_512, AVSrc_64, AVSrc_128>;
+def VOPProfileSMFMAC_F32_16X16X32_I16 : VOPProfileSMFMAC<VOP_V4F32_V4I16_V8I16_I32, AVDst_128, AVSrc_64, AVSrc_128>;
+def VOPProfileSMFMAC_F32_32X32X16_I16 : VOPProfileSMFMAC<VOP_V16F32_V4I16_V8I16_I32, AVDst_512, AVSrc_64, AVSrc_128>;
+def VOPProfileSMFMAC_I32_16X16X64_I8 : VOPProfileSMFMAC<VOP_V4I32_V2I32_V4I32_I32, AVDst_128, AVSrc_64, AVSrc_128>;
+def VOPProfileSMFMAC_I32_32X32X32_I8 : VOPProfileSMFMAC<VOP_V16I32_V2I32_V4I32_I32, AVDst_512, AVSrc_64, AVSrc_128>;
class MFMATable <bit is_mac, string Name> {
bit IsMac = is_mac;
string FMAOp = Name;
}
+class MAIFrag<SDPatternOperator Op, code pred> : PatFrag <
+ (ops node:$src0, node:$src1, node:$src2, node:$cbsz, node:$abid, node:$blgp),
+ (Op $src0, $src1, $src2, $cbsz, $abid, $blgp),
+ pred
+>;
+
+let GISelPredicateCode = [{ return MF.getInfo<SIMachineFunctionInfo>()->mayNeedAGPRs(); }] in
+class AgprMAIFrag<SDPatternOperator Op> :
+ MAIFrag<Op, [{ return MF->getInfo<SIMachineFunctionInfo>()->mayNeedAGPRs(); }]>;
+
+let GISelPredicateCode = [{ return !MF.getInfo<SIMachineFunctionInfo>()->mayNeedAGPRs(); }] in
+class VgprMAIFrag<SDPatternOperator Op> :
+ MAIFrag<Op, [{ return !MF->getInfo<SIMachineFunctionInfo>()->mayNeedAGPRs(); }]>;
+
let Predicates = [HasMAIInsts] in {
let isAsCheapAsAMove = 1, isReMaterializable = 1 in {
@@ -446,47 +551,62 @@ let isAsCheapAsAMove = 1, isReMaterializable = 1 in {
} // End isMoveImm = 1
} // End isAsCheapAsAMove = 1, isReMaterializable = 1
+class MAIInst<string OpName, VOPProfile P, SDPatternOperator node>
+ : VOP3InstBase<OpName, P, node> {
+ Instruction Opcode = !cast<Instruction>(NAME);
+ bit is_dgemm = 0;
+ bit is_gfx940_xdl = 0;
+}
+
multiclass MAIInst<string OpName, string P, SDPatternOperator node,
bit NoDstOverlap = !cast<VOPProfileMAI>("VOPProfileMAI_" # P).NoDstOverlap> {
let isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1 in {
// FP32 denorm mode is respected, rounding mode is not. Exceptions are not supported.
let Constraints = !if(NoDstOverlap, "@earlyclobber $vdst", "") in {
- defm "" : VOP3Inst<OpName, !cast<VOPProfileMAI>("VOPProfileMAI_" # P), !if(NoDstOverlap, null_frag, node)>,
- MFMATable<0, NAME # "_e64">;
+ def _e64 : MAIInst<OpName, !cast<VOPProfileMAI>("VOPProfileMAI_" # P),
+ !if(NoDstOverlap, null_frag, AgprMAIFrag<node>)>,
+ MFMATable<0, NAME # "_e64">;
let SubtargetPredicate = isGFX90APlus, Mnemonic = OpName in
- defm _vgprcd : VOP3Inst<OpName # "_vgprcd", !cast<VOPProfileMAI>("VOPProfileMAI_" # P # "_VCD")>,
- MFMATable<0, NAME # "_vgprcd_e64">;
+ def _vgprcd_e64 : MAIInst<OpName # "_vgprcd", !cast<VOPProfileMAI>("VOPProfileMAI_" # P # "_VCD"),
+ !if(NoDstOverlap, null_frag, VgprMAIFrag<node>)>,
+ MFMATable<0, NAME # "_vgprcd_e64">;
}
foreach _ = BoolToList<NoDstOverlap>.ret in {
let Constraints = !if(NoDstOverlap, "$vdst = $src2", ""),
isConvertibleToThreeAddress = NoDstOverlap,
Mnemonic = OpName in {
- defm "_mac" : VOP3Inst<OpName # "_mac", !cast<VOPProfileMAI>("VOPProfileMAI_" # P), node>,
- MFMATable<1, NAME # "_e64">;
+ def "_mac_e64" : MAIInst<OpName # "_mac", !cast<VOPProfileMAI>("VOPProfileMAI_" # P), AgprMAIFrag<node>>,
+ MFMATable<1, NAME # "_e64">;
let SubtargetPredicate = isGFX90APlus in
- defm _mac_vgprcd : VOP3Inst<OpName # "_mac_vgprcd", !cast<VOPProfileMAI>("VOPProfileMAI_" # P # "_VCD")>,
- MFMATable<1, NAME # "_vgprcd_e64">;
+ def _mac_vgprcd_e64 : MAIInst<OpName # "_mac_vgprcd", !cast<VOPProfileMAI>("VOPProfileMAI_" # P # "_VCD"),
+ VgprMAIFrag<node>>,
+ MFMATable<1, NAME # "_vgprcd_e64">;
}
}
} // End isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1
}
defm V_MFMA_F32_4X4X1F32 : MAIInst<"v_mfma_f32_4x4x1f32", "F32_F32_X4", int_amdgcn_mfma_f32_4x4x1f32>;
-defm V_MFMA_F32_4X4X4F16 : MAIInst<"v_mfma_f32_4x4x4f16", "F32_V4F16_X4", int_amdgcn_mfma_f32_4x4x4f16>;
-defm V_MFMA_I32_4X4X4I8 : MAIInst<"v_mfma_i32_4x4x4i8", "I32_I32_X4", int_amdgcn_mfma_i32_4x4x4i8>;
defm V_MFMA_F32_16X16X1F32 : MAIInst<"v_mfma_f32_16x16x1f32", "F32_F32_X16", int_amdgcn_mfma_f32_16x16x1f32>;
defm V_MFMA_F32_16X16X4F32 : MAIInst<"v_mfma_f32_16x16x4f32", "F32_F32_X4", int_amdgcn_mfma_f32_16x16x4f32>;
+defm V_MFMA_F32_32X32X1F32 : MAIInst<"v_mfma_f32_32x32x1f32", "F32_F32_X32", int_amdgcn_mfma_f32_32x32x1f32>;
+defm V_MFMA_F32_32X32X2F32 : MAIInst<"v_mfma_f32_32x32x2f32", "F32_F32_X16", int_amdgcn_mfma_f32_32x32x2f32>;
+
+let is_gfx940_xdl = 1 in {
+defm V_MFMA_F32_4X4X4F16 : MAIInst<"v_mfma_f32_4x4x4f16", "F32_V4F16_X4", int_amdgcn_mfma_f32_4x4x4f16>;
+defm V_MFMA_I32_4X4X4I8 : MAIInst<"v_mfma_i32_4x4x4i8", "I32_I32_X4", int_amdgcn_mfma_i32_4x4x4i8>;
defm V_MFMA_F32_16X16X4F16 : MAIInst<"v_mfma_f32_16x16x4f16", "F32_V4F16_X16", int_amdgcn_mfma_f32_16x16x4f16>;
defm V_MFMA_F32_16X16X16F16 : MAIInst<"v_mfma_f32_16x16x16f16", "F32_V4F16_X4", int_amdgcn_mfma_f32_16x16x16f16>;
defm V_MFMA_I32_16X16X4I8 : MAIInst<"v_mfma_i32_16x16x4i8", "I32_I32_X16", int_amdgcn_mfma_i32_16x16x4i8>;
-defm V_MFMA_F32_32X32X1F32 : MAIInst<"v_mfma_f32_32x32x1f32", "F32_F32_X32", int_amdgcn_mfma_f32_32x32x1f32>;
-defm V_MFMA_F32_32X32X2F32 : MAIInst<"v_mfma_f32_32x32x2f32", "F32_F32_X16", int_amdgcn_mfma_f32_32x32x2f32>;
defm V_MFMA_F32_32X32X4F16 : MAIInst<"v_mfma_f32_32x32x4f16", "F32_V4F16_X32", int_amdgcn_mfma_f32_32x32x4f16>;
defm V_MFMA_F32_32X32X8F16 : MAIInst<"v_mfma_f32_32x32x8f16", "F32_V4F16_X16", int_amdgcn_mfma_f32_32x32x8f16>;
defm V_MFMA_I32_32X32X4I8 : MAIInst<"v_mfma_i32_32x32x4i8", "I32_I32_X32", int_amdgcn_mfma_i32_32x32x4i8>;
+}
+
+let Predicates = [isGFX908orGFX90A] in {
defm V_MFMA_I32_16X16X16I8 : MAIInst<"v_mfma_i32_16x16x16i8", "I32_I32_X4", int_amdgcn_mfma_i32_16x16x16i8>;
defm V_MFMA_I32_32X32X8I8 : MAIInst<"v_mfma_i32_32x32x8i8", "I32_I32_X16", int_amdgcn_mfma_i32_32x32x8i8>;
defm V_MFMA_F32_4X4X2BF16 : MAIInst<"v_mfma_f32_4x4x2bf16", "F32_V2I16_X4", int_amdgcn_mfma_f32_4x4x2bf16>;
@@ -494,34 +614,314 @@ defm V_MFMA_F32_16X16X2BF16 : MAIInst<"v_mfma_f32_16x16x2bf16", "F32_V2I16_X16",
defm V_MFMA_F32_16X16X8BF16 : MAIInst<"v_mfma_f32_16x16x8bf16", "F32_V2I16_X4", int_amdgcn_mfma_f32_16x16x8bf16>;
defm V_MFMA_F32_32X32X2BF16 : MAIInst<"v_mfma_f32_32x32x2bf16", "F32_V2I16_X32", int_amdgcn_mfma_f32_32x32x2bf16>;
defm V_MFMA_F32_32X32X4BF16 : MAIInst<"v_mfma_f32_32x32x4bf16", "F32_V2I16_X16", int_amdgcn_mfma_f32_32x32x4bf16>;
+}
} // End SubtargetPredicate = HasMAIInsts
let Predicates = [isGFX90APlus] in {
+ let is_gfx940_xdl = 1 in {
defm V_MFMA_F32_32X32X4BF16_1K : MAIInst<"v_mfma_f32_32x32x4bf16_1k", "F32_V4I16_X32", int_amdgcn_mfma_f32_32x32x4bf16_1k>;
defm V_MFMA_F32_16X16X4BF16_1K : MAIInst<"v_mfma_f32_16x16x4bf16_1k", "F32_V4I16_X16", int_amdgcn_mfma_f32_16x16x4bf16_1k>;
defm V_MFMA_F32_4X4X4BF16_1K : MAIInst<"v_mfma_f32_4x4x4bf16_1k", "F32_V4I16_X4", int_amdgcn_mfma_f32_4x4x4bf16_1k>;
defm V_MFMA_F32_32X32X8BF16_1K : MAIInst<"v_mfma_f32_32x32x8bf16_1k", "F32_V4I16_X16", int_amdgcn_mfma_f32_32x32x8bf16_1k>;
defm V_MFMA_F32_16X16X16BF16_1K : MAIInst<"v_mfma_f32_16x16x16bf16_1k", "F32_V4I16_X4", int_amdgcn_mfma_f32_16x16x16bf16_1k>;
+ }
+ let is_dgemm = 1 in {
defm V_MFMA_F64_16X16X4F64 : MAIInst<"v_mfma_f64_16x16x4f64", "F64_16X16X4F64", int_amdgcn_mfma_f64_16x16x4f64>;
defm V_MFMA_F64_4X4X4F64 : MAIInst<"v_mfma_f64_4x4x4f64", "F64_4X4X4F64", int_amdgcn_mfma_f64_4x4x4f64>;
+ }
} // End Predicates = [isGFX90APlus]
-let SubtargetPredicate = HasPackedFP32Ops, isCommutable = 1 in {
- defm V_PK_FMA_F32 : VOP3PInst<"v_pk_fma_f32", VOP3_Profile<VOP_V2F32_V2F32_V2F32_V2F32, VOP3_PACKED>, any_fma>;
- defm V_PK_MUL_F32 : VOP3PInst<"v_pk_mul_f32", VOP3_Profile<VOP_V2F32_V2F32_V2F32, VOP3_PACKED>, any_fmul>;
- defm V_PK_ADD_F32 : VOP3PInst<"v_pk_add_f32", VOP3_Profile<VOP_V2F32_V2F32_V2F32, VOP3_PACKED>, any_fadd>;
- defm V_PK_MOV_B32 : VOP3PInst<"v_pk_mov_b32", VOP3_Profile<VOP_V2I32_V2I32_V2I32, VOP3_PACKED>>;
+let Predicates = [isGFX940Plus], is_gfx940_xdl = 1 in {
+ defm V_MFMA_I32_32X32X16I8 : MAIInst<"v_mfma_i32_32x32x16i8", "I32_I64_X32", int_amdgcn_mfma_i32_32x32x16_i8>;
+ defm V_MFMA_I32_16X16X32I8 : MAIInst<"v_mfma_i32_16x16x32i8", "I32_I64_X16", int_amdgcn_mfma_i32_16x16x32_i8>;
+ defm V_MFMA_F32_16X16X8XF32 : MAIInst<"v_mfma_f32_16x16x8xf32", "F32_V2F32_X16", int_amdgcn_mfma_f32_16x16x8_xf32>;
+ defm V_MFMA_F32_32X32X4XF32 : MAIInst<"v_mfma_f32_32x32x4xf32", "F32_V2F32_X32", int_amdgcn_mfma_f32_32x32x4_xf32>;
+} // End Predicates = [isGFX940Plus], is_gfx940_xdl = 1
+
+multiclass SMFMACInst<string OpName, string P, SDPatternOperator node> {
+ let Constraints = "$vdst = $src2", DisableEncoding = "$src2",
+ isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1, is_gfx940_xdl = 1 in {
+ def _e64 : MAIInst<OpName, !cast<VOPProfileSMFMAC>("VOPProfileSMFMAC_" # P), node>;
+ }
+}
+
+let SubtargetPredicate = isGFX940Plus in {
+defm V_SMFMAC_F32_16X16X32_F16 : SMFMACInst<"v_smfmac_f32_16x16x32_f16", "F32_16X16X32_F16", int_amdgcn_smfmac_f32_16x16x32_f16>;
+defm V_SMFMAC_F32_32X32X16_F16 : SMFMACInst<"v_smfmac_f32_32x32x16_f16", "F32_32X32X16_F16", int_amdgcn_smfmac_f32_32x32x16_f16>;
+defm V_SMFMAC_F32_16X16X32_BF16 : SMFMACInst<"v_smfmac_f32_16x16x32_bf16", "F32_16X16X32_I16", int_amdgcn_smfmac_f32_16x16x32_bf16>;
+defm V_SMFMAC_F32_32X32X16_BF16 : SMFMACInst<"v_smfmac_f32_32x32x16_bf16", "F32_32X32X16_I16", int_amdgcn_smfmac_f32_32x32x16_bf16>;
+defm V_SMFMAC_I32_16X16X64_I8 : SMFMACInst<"v_smfmac_i32_16x16x64_i8", "I32_16X16X64_I8", int_amdgcn_smfmac_i32_16x16x64_i8>;
+defm V_SMFMAC_I32_32X32X32_I8 : SMFMACInst<"v_smfmac_i32_32x32x32_i8", "I32_32X32X32_I8", int_amdgcn_smfmac_i32_32x32x32_i8>;
+}
+
+def MAIInstInfoTable : GenericTable {
+ let FilterClass = "MAIInst";
+ let CppTypeName = "MAIInstInfo";
+ let Fields = [
+ "Opcode", "is_dgemm", "is_gfx940_xdl"
+ ];
+
+ let PrimaryKey = ["Opcode"];
+ let PrimaryKeyName = "getMAIInstInfoHelper";
+}
+
+let SubtargetPredicate = HasPackedFP32Ops, isCommutable = 1, isReMaterializable = 1 in {
+ defm V_PK_FMA_F32 : VOP3PInst<"v_pk_fma_f32", VOP3P_Profile<VOP_V2F32_V2F32_V2F32_V2F32, VOP3_PACKED>, any_fma>;
+ defm V_PK_MUL_F32 : VOP3PInst<"v_pk_mul_f32", VOP3P_Profile<VOP_V2F32_V2F32_V2F32, VOP3_PACKED>, any_fmul>;
+ defm V_PK_ADD_F32 : VOP3PInst<"v_pk_add_f32", VOP3P_Profile<VOP_V2F32_V2F32_V2F32, VOP3_PACKED>, any_fadd>;
+ defm V_PK_MOV_B32 : VOP3PInst<"v_pk_mov_b32", VOP3P_Profile<VOP_V2I32_V2I32_V2I32, VOP3_PACKED>>;
} // End SubtargetPredicate = HasPackedFP32Ops, isCommutable = 1
def : MnemonicAlias<"v_accvgpr_read", "v_accvgpr_read_b32">;
def : MnemonicAlias<"v_accvgpr_write", "v_accvgpr_write_b32">;
+class VOPProfileWMMA<VOPProfile P, string Suffix, RegisterOperand _Src01RC64, bit _HasClamp, bit _HasOpSel> : VOP3P_Profile<P> {
+ let DstRC = !if(!eq(Suffix, "_w32"), VDst_256, VDst_128);
+ let Src0RC64 = _Src01RC64;
+ let Src1RC64 = _Src01RC64;
+ let Src2RC64 = !if(!eq(Suffix, "_w32"), VISrc_256_f64, VISrc_128_f32);
+ let HasClamp = _HasClamp;
+ let HasOpSel = _HasOpSel;
+ let IsPacked = 1;
+ let IsWMMA = 1;
+}
+
+def VOP_V8F32_V16F16_V16F16_V8F32 : VOPProfile <[v8f32, v16f16, v16f16, v8f32]>;
+def VOP_V8F32_V16I16_V16I16_V8F32 : VOPProfile <[v8f32, v16i16, v16i16, v8f32]>;
+def VOP_V16F16_V16F16_V16F16_V16F16 : VOPProfile <[v16f16, v16f16, v16f16, v16f16]>;
+def VOP_V16I16_V16I16_V16I16_V16I16 : VOPProfile <[v16i16, v16i16, v16i16, v16i16]>;
+def VOP_V8I32_V4I32_V4I32_V8I32 : VOPProfile <[v8i32, v4i32, v4i32, v8i32]>;
+def VOP_V8I32_V2I32_V2I32_V8I32 : VOPProfile <[v8i32, v2i32, v2i32, v8i32]>;
+
+def VOP_V4F32_V16F16_V16F16_V4F32 : VOPProfile <[v4f32, v16f16, v16f16, v4f32]>;
+def VOP_V4F32_V16I16_V16I16_V4F32 : VOPProfile <[v4f32, v16i16, v16i16, v4f32]>;
+def VOP_V8F16_V16F16_V16F16_V8F16 : VOPProfile <[v8f16, v16f16, v16f16, v8f16]>;
+def VOP_V8I16_V16I16_V16I16_V8I16 : VOPProfile <[v8i16, v16i16, v16i16, v8i16]>;
+def VOP_V4I32_V4I32_V4I32_V4I32 : VOPProfile <[v4i32, v4i32, v4i32, v4i32]>;
+def VOP_V4I32_V2I32_V2I32_V4I32 : VOPProfile <[v4i32, v2i32, v2i32, v4i32]>;
+
+
+class WMMAType <bits<2> val> {
+ bit hasClamp = val{0};
+ bit hasOpsel = val{1};
+}
+
+def WMMARegular : WMMAType<0b00>;
+def WMMAUIClamp : WMMAType<0b01>;
+def WMMAOpSel : WMMAType<0b10>;
+
+class WMMARegularPat<Instruction Inst, SDPatternOperator node, VOPProfile P> :
+ GCNPat < (P.DstVT (node
+ (P.Src0VT (VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers)),
+ (P.Src1VT (VOP3PMods P.Src1VT:$src1, i32:$src1_modifiers)),
+ (P.Src2VT (VOP3PMods P.Src2VT:$src2, i32:$src2_modifiers))
+ )),
+ (P.DstVT (Inst i32:$src0_modifiers, P.Src0VT:$src0, i32:$src1_modifiers, P.Src1VT:$src1, $src2_modifiers, P.Src2VT:$src2))
+>;
+
+class WMMAOpSelPat<Instruction Inst, SDPatternOperator node, VOPProfile P> :
+ GCNPat < (P.DstVT (node
+ (P.Src0VT P.Src0VT:$src0),
+ (P.Src1VT P.Src1VT:$src1),
+ (P.Src2VT P.Src2VT:$src2), (WMMAOpSelVOP3PMods i32:$src2_modifiers)
+ )),
+ (P.DstVT (Inst (i32 8), P.Src0VT:$src0, (i32 8), P.Src1VT:$src1, i32:$src2_modifiers, P.Src2VT:$src2))
+>;
+
+class WMMAUIClampPat<Instruction Inst, SDPatternOperator node, VOPProfile P> :
+ GCNPat < (P.DstVT (node
+ (DotIUVOP3PMods i32:$src0_modifiers), (P.Src0VT P.Src0VT:$src0),
+ (DotIUVOP3PMods i32:$src1_modifiers), (P.Src1VT P.Src1VT:$src1),
+ (P.Src2VT P.Src2VT:$src2), (i1 timm:$clamp)
+ )),
+ (P.DstVT (Inst i32:$src0_modifiers, P.Src0VT:$src0, i32:$src1_modifiers, P.Src1VT:$src1, (i32 8), P.Src2VT:$src2, i1:$clamp))
+>;
+
+class WMMAOpcodeMapping<Instruction TwoAddr, Instruction ThreeAddr> {
+ Instruction Opcode2Addr = TwoAddr;
+ Instruction Opcode3Addr = ThreeAddr;
+ Predicate WaveSizePredicate;
+}
+
+def WMMAOpcode : GenericEnum {
+ let FilterClass = "VOP3P_Pseudo";
+}
+
+class WMMAMappingTable : GenericTable {
+ let FilterClass = "WMMAOpcodeMapping";
+ let CppTypeName = "WMMAOpcodeMappingInfo";
+ let Fields = ["Opcode2Addr", "Opcode3Addr"];
+ string TypeOf_Opcode2Addr = "WMMAOpcode";
+ string TypeOf_Opcode3Addr = "WMMAOpcode";
+}
+
+def WMMAOpcode2AddrMappingTable : WMMAMappingTable {
+ let PrimaryKey = ["Opcode2Addr"];
+ let PrimaryKeyName = "getWMMAMappingInfoFrom2AddrOpcode";
+}
+
+def WMMAOpcode3AddrMappingTable : WMMAMappingTable {
+ let PrimaryKey = ["Opcode3Addr"];
+ let PrimaryKeyName = "getWMMAMappingInfoFrom3AddrOpcode";
+}
+
+// The WMMA instruction has extra constraints:
+// Matrices A and B cannot overlap with D. C cannot partially overlap with D,
+// but it is OK for them to be the same (which is a typical case).
+//
+// We implement it as follows:
+// 1) Map the intrinsic to the pseudo where D is tied to C ($vdst = $src2).
+// 2) The pass twoaddressinstruction checks if src2 is live and if that is the case
+// it converts the default pseudo to the pseudo where src2 is not the same as vdst.
+// 3) @earlyclobber on the destination satisfies the constraint during RA.
+
+multiclass WMMAInst<string Suffix, string Instr, VOPProfile P, SDPatternOperator node = null_frag, RegisterOperand _Src01RC64 = VRegSrc_256, WMMAType Type> {
+
+ defvar WMMAConstraints2Addr = "@earlyclobber $vdst,$vdst = $src2";
+ defvar WMMAConstraints3Addr = "@earlyclobber $vdst";
+
+ defvar WMMAProfile = VOPProfileWMMA<P, Suffix, _Src01RC64, Type.hasClamp, Type.hasOpsel>;
+ if !eq(Suffix, "_w32") then {
+ let Mnemonic = Instr, mayRaiseFPException = 0, ReadsModeReg = 0 in {
+ let Constraints = WMMAConstraints2Addr, isConvertibleToThreeAddress = 1 in {
+ def _twoaddr_w32 : VOP3P_Pseudo<Instr # Suffix, WMMAProfile>;
+ }
+ let Constraints = WMMAConstraints3Addr, SchedRW = [Write32Bit, Write32Bit] in {
+ def _threeaddr_w32 : VOP3P_Pseudo<Instr # Suffix, WMMAProfile>;
+ }
+ }
+ def : WMMAOpcodeMapping<!cast<Instruction>(NAME # _twoaddr_w32),
+ !cast<Instruction>(NAME # _threeaddr_w32)>;
+ } else if !eq(Suffix, "_w64") then {
+ let Mnemonic = Instr, mayRaiseFPException = 0, ReadsModeReg = 0 in {
+ let Constraints = WMMAConstraints2Addr, isConvertibleToThreeAddress = 1 in {
+ def _twoaddr_w64 : VOP3P_Pseudo<Instr # Suffix, WMMAProfile>;
+ }
+ let Constraints = WMMAConstraints3Addr, SchedRW = [Write32Bit, Write32Bit] in {
+ def _threeaddr_w64 : VOP3P_Pseudo<Instr # Suffix, WMMAProfile>;
+ }
+ }
+ def : WMMAOpcodeMapping<!cast<Instruction>(NAME # _twoaddr_w64),
+ !cast<Instruction>(NAME # _threeaddr_w64)>;
+ }
+
+ if !eq(Type, WMMAOpSel) then {
+ def : WMMAOpSelPat<!cast<Instruction>(NAME # _twoaddr # Suffix), node, P>;
+ } else if !eq(Type, WMMAUIClamp) then {
+ def : WMMAUIClampPat<!cast<Instruction>(NAME # _twoaddr # Suffix), node, P>;
+ } else {
+ def : WMMARegularPat<!cast<Instruction>(NAME # _twoaddr # Suffix), node, P>;
+ }
+}
+
+
+let WaveSizePredicate = isWave32 in {
+ defm V_WMMA_F32_16X16X16_F16 : WMMAInst<"_w32", "v_wmma_f32_16x16x16_f16", VOP_V8F32_V16F16_V16F16_V8F32, int_amdgcn_wmma_f32_16x16x16_f16, VRegSrc_256, WMMARegular>;
+ defm V_WMMA_F32_16X16X16_BF16 : WMMAInst<"_w32", "v_wmma_f32_16x16x16_bf16", VOP_V8F32_V16I16_V16I16_V8F32, int_amdgcn_wmma_f32_16x16x16_bf16, VRegSrc_256, WMMARegular>;
+ defm V_WMMA_F16_16X16X16_F16 : WMMAInst<"_w32", "v_wmma_f16_16x16x16_f16", VOP_V16F16_V16F16_V16F16_V16F16, int_amdgcn_wmma_f16_16x16x16_f16, VRegSrc_256, WMMAOpSel>;
+ defm V_WMMA_BF16_16X16X16_BF16 : WMMAInst<"_w32", "v_wmma_bf16_16x16x16_bf16", VOP_V16I16_V16I16_V16I16_V16I16, int_amdgcn_wmma_bf16_16x16x16_bf16, VRegSrc_256, WMMAOpSel>;
+ defm V_WMMA_I32_16X16X16_IU8 : WMMAInst<"_w32", "v_wmma_i32_16x16x16_iu8", VOP_V8I32_V4I32_V4I32_V8I32, int_amdgcn_wmma_i32_16x16x16_iu8, VRegSrc_128, WMMAUIClamp>;
+ defm V_WMMA_I32_16X16X16_IU4 : WMMAInst<"_w32", "v_wmma_i32_16x16x16_iu4", VOP_V8I32_V2I32_V2I32_V8I32, int_amdgcn_wmma_i32_16x16x16_iu4, VRegSrc_64, WMMAUIClamp>;
+}
+
+let WaveSizePredicate = isWave64 in {
+ defm V_WMMA_F32_16X16X16_F16 : WMMAInst<"_w64", "v_wmma_f32_16x16x16_f16", VOP_V4F32_V16F16_V16F16_V4F32, int_amdgcn_wmma_f32_16x16x16_f16, VRegSrc_256, WMMARegular>;
+ defm V_WMMA_F32_16X16X16_BF16 : WMMAInst<"_w64", "v_wmma_f32_16x16x16_bf16", VOP_V4F32_V16I16_V16I16_V4F32, int_amdgcn_wmma_f32_16x16x16_bf16, VRegSrc_256, WMMARegular>;
+ defm V_WMMA_F16_16X16X16_F16 : WMMAInst<"_w64", "v_wmma_f16_16x16x16_f16", VOP_V8F16_V16F16_V16F16_V8F16, int_amdgcn_wmma_f16_16x16x16_f16, VRegSrc_256, WMMAOpSel>;
+ defm V_WMMA_BF16_16X16X16_BF16 : WMMAInst<"_w64", "v_wmma_bf16_16x16x16_bf16", VOP_V8I16_V16I16_V16I16_V8I16, int_amdgcn_wmma_bf16_16x16x16_bf16, VRegSrc_256, WMMAOpSel>;
+ defm V_WMMA_I32_16X16X16_IU8 : WMMAInst<"_w64", "v_wmma_i32_16x16x16_iu8", VOP_V4I32_V4I32_V4I32_V4I32, int_amdgcn_wmma_i32_16x16x16_iu8, VRegSrc_128, WMMAUIClamp>;
+ defm V_WMMA_I32_16X16X16_IU4 : WMMAInst<"_w64", "v_wmma_i32_16x16x16_iu4", VOP_V4I32_V2I32_V2I32_V4I32, int_amdgcn_wmma_i32_16x16x16_iu4, VRegSrc_64, WMMAUIClamp>;
+
+}
+
//===----------------------------------------------------------------------===//
// Begin Real Encodings
//===----------------------------------------------------------------------===//
+class VOP3P_DPP16<bits<7> op, VOP_DPP_Pseudo ps, int subtarget,
+ string opName = ps.OpName>
+ : VOP3P_DPP<op, opName, ps.Pfl, 1>, SIMCInstr<ps.PseudoInstr, subtarget> {
+ let hasSideEffects = ps.hasSideEffects;
+ let Defs = ps.Defs;
+ let SchedRW = ps.SchedRW;
+ let Uses = ps.Uses;
+ let AssemblerPredicate = HasDPP16;
+ let SubtargetPredicate = HasDPP16;
+ let OtherPredicates = ps.OtherPredicates;
+}
+
+class VOP3P_DPP8_Base<bits<7> op, VOP_Pseudo ps, string opName = ps.OpName>
+ : VOP3P_DPP8<op, opName, ps.Pfl> {
+ let hasSideEffects = ps.hasSideEffects;
+ let Defs = ps.Defs;
+ let SchedRW = ps.SchedRW;
+ let Uses = ps.Uses;
+ let OtherPredicates = ps.OtherPredicates;
+}
+
+//===----------------------------------------------------------------------===//
+// GFX11.
+//===----------------------------------------------------------------------===//
+
+let AssemblerPredicate = isGFX11Plus,
+ DecoderNamespace = "GFX11" in {
+
+ multiclass VOP3P_Real_gfx11<bits<7> op, string backing_ps_name = NAME,
+ string asmName = !cast<VOP3P_Pseudo>(NAME).Mnemonic> {
+ def _gfx11 : VOP3P_Real<!cast<VOP3P_Pseudo>(backing_ps_name),
+ SIEncodingFamily.GFX11, asmName>,
+ VOP3Pe_gfx11<op, !cast<VOP3P_Pseudo>(backing_ps_name).Pfl>;
+ }
+
+ multiclass VOP3P_Real_dpp_gfx11<bits<7> op, string backing_ps_name = NAME,
+ string asmName = !cast<VOP3P_Pseudo>(NAME).Mnemonic> {
+ defvar ps = !cast<VOP3P_Pseudo>(backing_ps_name);
+ def _dpp_gfx11
+ : VOP3P_DPP16<op, !cast<VOP_DPP_Pseudo>(backing_ps_name #"_dpp"),
+ SIEncodingFamily.GFX11> {
+ let AsmString = asmName #ps.Pfl.AsmVOP3DPP16;
+ let DecoderNamespace = "DPPGFX11";
+ }
+ }
+
+ multiclass VOP3P_Real_dpp8_gfx11<bits<7> op, string backing_ps_name = NAME,
+ string asmName = !cast<VOP3P_Pseudo>(NAME).Mnemonic> {
+ defvar ps = !cast<VOP3P_Pseudo>(backing_ps_name);
+ def _dpp8_gfx11 : VOP3P_DPP8_Base<op, ps> {
+ let AsmString = asmName #ps.Pfl.AsmVOP3DPP8;
+ let DecoderNamespace = "DPP8GFX11";
+ }
+ }
+
+ multiclass VOP3P_Realtriple_gfx11<bits<7> op, string backing_ps_name = NAME,
+ string asmName = !cast<VOP3P_Pseudo>(NAME).Mnemonic>
+ : VOP3P_Real_gfx11<op, backing_ps_name, asmName>,
+ VOP3P_Real_dpp_gfx11<op, backing_ps_name, asmName>,
+ VOP3P_Real_dpp8_gfx11<op, backing_ps_name, asmName>;
+} // End AssemblerPredicate = isGFX11Plus, DecoderNamespace = "GFX11"
+
+defm V_DOT4_I32_IU8 : VOP3P_Real_gfx11 <0x16>;
+defm V_DOT8_I32_IU4 : VOP3P_Real_gfx11 <0x18>;
+defm V_DOT2_F32_BF16 : VOP3P_Real_gfx11 <0x1a>;
+
+multiclass VOP3P_Real_WMMA <bits<7> op> {
+ let WaveSizePredicate = isWave32, DecoderNamespace = "GFX11" in {
+ defm _twoaddr_w32 : VOP3P_Real_gfx11 <op>;
+ }
+ let WaveSizePredicate = isWave64, DecoderNamespace = "WMMAGFX11" in {
+ defm _twoaddr_w64 : VOP3P_Real_gfx11 <op>;
+ }
+}
+
+defm V_WMMA_F32_16X16X16_F16 : VOP3P_Real_WMMA <0x040>;
+defm V_WMMA_F32_16X16X16_BF16 : VOP3P_Real_WMMA <0x041>;
+defm V_WMMA_F16_16X16X16_F16 : VOP3P_Real_WMMA <0x042>;
+defm V_WMMA_BF16_16X16X16_BF16 : VOP3P_Real_WMMA <0x043>;
+defm V_WMMA_I32_16X16X16_IU8 : VOP3P_Real_WMMA <0x044>;
+defm V_WMMA_I32_16X16X16_IU4 : VOP3P_Real_WMMA <0x045>;
+
//===----------------------------------------------------------------------===//
// GFX8 (VI)
//===----------------------------------------------------------------------===//
@@ -557,15 +957,64 @@ multiclass VOP3P_Real_MFMA_gfx90a<bits<7> op> {
VOP3Pe_MAI <op, !cast<VOP3_Pseudo>(NAME # "_vgprcd" # "_e64").Pfl, 0>;
} // End AssemblerPredicate = isGFX90AOnly, DecoderNamespace = "GFX90A"
}
+}
-multiclass VOP3P_Real_MFMA<bits<7> op> :
- VOP3P_Real_MFMA_gfx90a <op> {
+multiclass VOP3P_Real_MFMA_gfx940_aliases<string NameFrom, string NameTo, string Op,
+ VOP3_Pseudo PS_ACD = !cast<VOP3_Pseudo>(Op # "_e64"),
+ VOP3_Pseudo PS_VCD = !cast<VOP3_Pseudo>(Op # "_vgprcd" # "_e64"),
+ VOPProfile Pfl_ACD = PS_ACD.Pfl,
+ VOPProfile Pfl_VCD = PS_VCD.Pfl> {
+ let Predicates = [isGFX940Plus] in {
+ foreach _ = BoolToList<!ne(NameFrom, NameTo)>.ret in {
+ def : InstAlias <NameTo # " " # PS_ACD.AsmOperands,
+ (!cast<VOP3P_Real>(Op # "_gfx940_acd") Pfl_ACD.DstRC:$vdst,
+ Pfl_ACD.Src0RC64:$src0, Pfl_ACD.Src1RC64:$src1, Pfl_ACD.Src2RC64:$src2,
+ cbsz:$cbsz, abid:$abid, blgp:$blgp)>, PredicateControl;
+ def : InstAlias <NameTo # " " # PS_VCD.AsmOperands,
+ (!cast<VOP3P_Real>(Op # "_gfx940_vcd") Pfl_VCD.DstRC:$vdst,
+ Pfl_VCD.Src0RC64:$src0, Pfl_VCD.Src1RC64:$src1, Pfl_VCD.Src2RC64:$src2,
+ cbsz:$cbsz, abid:$abid, blgp:$blgp)>, PredicateControl;
+ }
+ } // End Predicates = [isGFX940Plus]
+}
+
+multiclass VOP3P_Real_MFMA_gfx940<bits<7> op, string Name = !cast<VOP3_Pseudo>(NAME#"_e64").Mnemonic,
+ VOP3_Pseudo PS_ACD = !cast<VOP3_Pseudo>(NAME # "_e64"),
+ VOP3_Pseudo PS_VCD = !cast<VOP3_Pseudo>(NAME # "_vgprcd" # "_e64")> {
+ let SubtargetPredicate = isGFX940Plus,
+ AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX9",
+ AsmString = Name # PS_ACD.AsmOperands, Constraints = "" in {
+ def _gfx940_acd : VOP3P_Real<PS_ACD, SIEncodingFamily.GFX940>,
+ VOP3Pe_MAI <op, PS_ACD.Pfl, 1>;
+
+ def _gfx940_vcd : VOP3P_Real<PS_VCD, SIEncodingFamily.GFX940>,
+ VOP3Pe_MAI <op, PS_VCD.Pfl, 0>;
+ } // End AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX9"
+
+ defm : VOP3P_Real_MFMA_gfx940_aliases<Name, PS_ACD.Mnemonic, NAME>;
+
+ foreach _ = BoolToList<!ne(!subst("_1k", "", PS_ACD.Mnemonic), PS_ACD.Mnemonic)>.ret in
+ defm : VOP3P_Real_MFMA_gfx940_aliases<Name, !subst("_1k", "", PS_ACD.Mnemonic), NAME>;
+}
+
+multiclass VOP3P_Real_MFMA<bits<7> op, string GFX940Name = !cast<VOP3_Pseudo>(NAME#"_e64").Mnemonic> :
+ VOP3P_Real_MFMA_gfx90a <op>,
+ VOP3P_Real_MFMA_gfx940 <op, GFX940Name> {
def _vi : VOP3P_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
VOP3Pe_MAI <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl, ?> {
let AssemblerPredicate = HasMAIInsts;
let DecoderNamespace = "GFX8";
+ let Constraints = "";
}
}
+
+multiclass VOP3P_Real_SMFMAC<bits<7> op, string alias> {
+ def _gfx940 : VOP3P_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
+ VOP3Pe_SMFMAC <op> {
+ let AssemblerPredicate = isGFX940Plus;
+ let DecoderNamespace = "GFX8";
+ }
+ def : MnemonicAlias<alias, !cast<VOP3_Pseudo>(NAME#"_e64").Mnemonic>;
}
defm V_PK_MAD_I16 : VOP3P_Real_vi <0x00>;
@@ -634,19 +1083,21 @@ let SubtargetPredicate = HasMAIInsts in {
defm V_ACCVGPR_READ_B32 : VOP3P_Real_MAI <0x58>;
defm V_ACCVGPR_WRITE_B32 : VOP3P_Real_MAI <0x59>;
-defm V_MFMA_F32_32X32X1F32 : VOP3P_Real_MFMA <0x40>;
-defm V_MFMA_F32_16X16X1F32 : VOP3P_Real_MFMA <0x41>;
-defm V_MFMA_F32_4X4X1F32 : VOP3P_Real_MFMA <0x42>;
-defm V_MFMA_F32_32X32X2F32 : VOP3P_Real_MFMA <0x44>;
-defm V_MFMA_F32_16X16X4F32 : VOP3P_Real_MFMA <0x45>;
-defm V_MFMA_F32_32X32X4F16 : VOP3P_Real_MFMA <0x48>;
-defm V_MFMA_F32_16X16X4F16 : VOP3P_Real_MFMA <0x49>;
-defm V_MFMA_F32_4X4X4F16 : VOP3P_Real_MFMA <0x4a>;
-defm V_MFMA_F32_32X32X8F16 : VOP3P_Real_MFMA <0x4c>;
-defm V_MFMA_F32_16X16X16F16 : VOP3P_Real_MFMA <0x4d>;
-defm V_MFMA_I32_32X32X4I8 : VOP3P_Real_MFMA <0x50>;
-defm V_MFMA_I32_16X16X4I8 : VOP3P_Real_MFMA <0x51>;
-defm V_MFMA_I32_4X4X4I8 : VOP3P_Real_MFMA <0x52>;
+defm V_MFMA_F32_32X32X1F32 : VOP3P_Real_MFMA <0x40, "v_mfma_f32_32x32x1_2b_f32">;
+defm V_MFMA_F32_16X16X1F32 : VOP3P_Real_MFMA <0x41, "v_mfma_f32_16x16x1_4b_f32">;
+defm V_MFMA_F32_4X4X1F32 : VOP3P_Real_MFMA <0x42, "v_mfma_f32_4x4x1_16b_f32">;
+defm V_MFMA_F32_32X32X2F32 : VOP3P_Real_MFMA <0x44, "v_mfma_f32_32x32x2_f32">;
+defm V_MFMA_F32_16X16X4F32 : VOP3P_Real_MFMA <0x45, "v_mfma_f32_16x16x4_f32">;
+defm V_MFMA_F32_32X32X4F16 : VOP3P_Real_MFMA <0x48, "v_mfma_f32_32x32x4_2b_f16">;
+defm V_MFMA_F32_16X16X4F16 : VOP3P_Real_MFMA <0x49, "v_mfma_f32_16x16x4_4b_f16">;
+defm V_MFMA_F32_4X4X4F16 : VOP3P_Real_MFMA <0x4a, "v_mfma_f32_4x4x4_16b_f16">;
+defm V_MFMA_F32_32X32X8F16 : VOP3P_Real_MFMA <0x4c, "v_mfma_f32_32x32x8_f16">;
+defm V_MFMA_F32_16X16X16F16 : VOP3P_Real_MFMA <0x4d, "v_mfma_f32_16x16x16_f16">;
+defm V_MFMA_I32_32X32X4I8 : VOP3P_Real_MFMA <0x50, "v_mfma_i32_32x32x4_2b_i8">;
+defm V_MFMA_I32_16X16X4I8 : VOP3P_Real_MFMA <0x51, "v_mfma_i32_16x16x4_4b_i8">;
+defm V_MFMA_I32_4X4X4I8 : VOP3P_Real_MFMA <0x52, "v_mfma_i32_4x4x4_16b_i8">;
+
+let SubtargetPredicate = isGFX908orGFX90A in {
defm V_MFMA_I32_16X16X16I8 : VOP3P_Real_MFMA <0x55>;
defm V_MFMA_I32_32X32X8I8 : VOP3P_Real_MFMA <0x54>;
defm V_MFMA_F32_32X32X2BF16 : VOP3P_Real_MFMA <0x68>;
@@ -654,6 +1105,7 @@ defm V_MFMA_F32_16X16X2BF16 : VOP3P_Real_MFMA <0x69>;
defm V_MFMA_F32_4X4X2BF16 : VOP3P_Real_MFMA <0x6b>;
defm V_MFMA_F32_32X32X4BF16 : VOP3P_Real_MFMA <0x6c>;
defm V_MFMA_F32_16X16X8BF16 : VOP3P_Real_MFMA <0x6d>;
+}
} // End SubtargetPredicate = HasMAIInsts
@@ -665,6 +1117,27 @@ defm V_MFMA_F32_16X16X16BF16_1K : VOP3P_Real_MFMA_gfx90a <0x67>;
defm V_MFMA_F64_16X16X4F64 : VOP3P_Real_MFMA_gfx90a <0x6e>;
defm V_MFMA_F64_4X4X4F64 : VOP3P_Real_MFMA_gfx90a <0x6f>;
+defm V_MFMA_I32_32X32X16I8 : VOP3P_Real_MFMA_gfx940 <0x56, "v_mfma_i32_32x32x16_i8">;
+defm V_MFMA_I32_16X16X32I8 : VOP3P_Real_MFMA_gfx940 <0x57, "v_mfma_i32_16x16x32_i8">;
+defm V_MFMA_F32_16X16X8XF32 : VOP3P_Real_MFMA_gfx940 <0x3e, "v_mfma_f32_16x16x8_xf32">;
+defm V_MFMA_F32_32X32X4XF32 : VOP3P_Real_MFMA_gfx940 <0x3f, "v_mfma_f32_32x32x4_xf32">;
+
+defm V_MFMA_F32_32X32X4BF16_1K : VOP3P_Real_MFMA_gfx940 <0x5d, "v_mfma_f32_32x32x4_2b_bf16">;
+defm V_MFMA_F32_16X16X4BF16_1K : VOP3P_Real_MFMA_gfx940 <0x5e, "v_mfma_f32_16x16x4_4b_bf16">;
+defm V_MFMA_F32_4X4X4BF16_1K : VOP3P_Real_MFMA_gfx940 <0x5f, "v_mfma_f32_4x4x4_16b_bf16">;
+defm V_MFMA_F32_32X32X8BF16_1K : VOP3P_Real_MFMA_gfx940 <0x60, "v_mfma_f32_32x32x8_bf16">;
+defm V_MFMA_F32_16X16X16BF16_1K : VOP3P_Real_MFMA_gfx940 <0x61, "v_mfma_f32_16x16x16_bf16">;
+
+defm V_MFMA_F64_16X16X4F64 : VOP3P_Real_MFMA_gfx940 <0x6e, "v_mfma_f64_16x16x4_f64">;
+defm V_MFMA_F64_4X4X4F64 : VOP3P_Real_MFMA_gfx940 <0x6f, "v_mfma_f64_4x4x4_4b_f64">;
+
+defm V_SMFMAC_F32_16X16X32_F16 : VOP3P_Real_SMFMAC <0x62, "v_smfmac_f32_16x16x32f16">;
+defm V_SMFMAC_F32_32X32X16_F16 : VOP3P_Real_SMFMAC <0x64, "v_smfmac_f32_32x32x16f16">;
+defm V_SMFMAC_F32_16X16X32_BF16 : VOP3P_Real_SMFMAC <0x66, "v_smfmac_f32_16x16x32bf16">;
+defm V_SMFMAC_F32_32X32X16_BF16 : VOP3P_Real_SMFMAC <0x68, "v_smfmac_f32_32x32x16bf16">;
+defm V_SMFMAC_I32_16X16X64_I8 : VOP3P_Real_SMFMAC <0x6a, "v_smfmac_i32_16x16x64i8">;
+defm V_SMFMAC_I32_32X32X32_I8 : VOP3P_Real_SMFMAC <0x6c, "v_smfmac_i32_32x32x32i8">;
+
let SubtargetPredicate = HasPackedFP32Ops in {
defm V_PK_FMA_F32 : VOP3P_Real_vi <0x30>;
defm V_PK_MUL_F32 : VOP3P_Real_vi <0x31>;
@@ -676,35 +1149,41 @@ let SubtargetPredicate = HasPackedFP32Ops in {
// GFX10.
//===----------------------------------------------------------------------===//
-let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10", VOP3P = 1 in {
+let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10", VOP3P = 1 in {
multiclass VOP3P_Real_gfx10<bits<7> op> {
def _gfx10 : VOP3P_Real<!cast<VOP3P_Pseudo>(NAME), SIEncodingFamily.GFX10>,
VOP3Pe_gfx10 <op, !cast<VOP3P_Pseudo>(NAME).Pfl>;
}
-} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10", VOP3P = 1
-
-defm V_PK_MAD_I16 : VOP3P_Real_gfx10<0x00>;
-defm V_PK_MUL_LO_U16 : VOP3P_Real_gfx10<0x01>;
-defm V_PK_ADD_I16 : VOP3P_Real_gfx10<0x02>;
-defm V_PK_SUB_I16 : VOP3P_Real_gfx10<0x03>;
-defm V_PK_LSHLREV_B16 : VOP3P_Real_gfx10<0x04>;
-defm V_PK_LSHRREV_B16 : VOP3P_Real_gfx10<0x05>;
-defm V_PK_ASHRREV_I16 : VOP3P_Real_gfx10<0x06>;
-defm V_PK_MAX_I16 : VOP3P_Real_gfx10<0x07>;
-defm V_PK_MIN_I16 : VOP3P_Real_gfx10<0x08>;
-defm V_PK_MAD_U16 : VOP3P_Real_gfx10<0x09>;
-defm V_PK_ADD_U16 : VOP3P_Real_gfx10<0x0a>;
-defm V_PK_SUB_U16 : VOP3P_Real_gfx10<0x0b>;
-defm V_PK_MAX_U16 : VOP3P_Real_gfx10<0x0c>;
-defm V_PK_MIN_U16 : VOP3P_Real_gfx10<0x0d>;
-defm V_PK_FMA_F16 : VOP3P_Real_gfx10<0x0e>;
-defm V_PK_ADD_F16 : VOP3P_Real_gfx10<0x0f>;
-defm V_PK_MUL_F16 : VOP3P_Real_gfx10<0x10>;
-defm V_PK_MIN_F16 : VOP3P_Real_gfx10<0x11>;
-defm V_PK_MAX_F16 : VOP3P_Real_gfx10<0x12>;
-defm V_FMA_MIX_F32 : VOP3P_Real_gfx10<0x20>;
-defm V_FMA_MIXLO_F16 : VOP3P_Real_gfx10<0x21>;
-defm V_FMA_MIXHI_F16 : VOP3P_Real_gfx10<0x22>;
+} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10", VOP3P = 1
+
+multiclass VOP3P_Real_gfx10_gfx11<bits<7> op>
+ : VOP3P_Real_gfx10<op>, VOP3P_Real_gfx11<op>;
+
+multiclass VOP3P_Real_gfx10_gfx11_Triple<bits<7> op>
+ : VOP3P_Real_gfx10<op>, VOP3P_Realtriple_gfx11<op>;
+
+defm V_PK_MAD_I16 : VOP3P_Real_gfx10_gfx11<0x00>;
+defm V_PK_MUL_LO_U16 : VOP3P_Real_gfx10_gfx11<0x01>;
+defm V_PK_ADD_I16 : VOP3P_Real_gfx10_gfx11<0x02>;
+defm V_PK_SUB_I16 : VOP3P_Real_gfx10_gfx11<0x03>;
+defm V_PK_LSHLREV_B16 : VOP3P_Real_gfx10_gfx11<0x04>;
+defm V_PK_LSHRREV_B16 : VOP3P_Real_gfx10_gfx11<0x05>;
+defm V_PK_ASHRREV_I16 : VOP3P_Real_gfx10_gfx11<0x06>;
+defm V_PK_MAX_I16 : VOP3P_Real_gfx10_gfx11<0x07>;
+defm V_PK_MIN_I16 : VOP3P_Real_gfx10_gfx11<0x08>;
+defm V_PK_MAD_U16 : VOP3P_Real_gfx10_gfx11<0x09>;
+defm V_PK_ADD_U16 : VOP3P_Real_gfx10_gfx11<0x0a>;
+defm V_PK_SUB_U16 : VOP3P_Real_gfx10_gfx11<0x0b>;
+defm V_PK_MAX_U16 : VOP3P_Real_gfx10_gfx11<0x0c>;
+defm V_PK_MIN_U16 : VOP3P_Real_gfx10_gfx11<0x0d>;
+defm V_PK_FMA_F16 : VOP3P_Real_gfx10_gfx11<0x0e>;
+defm V_PK_ADD_F16 : VOP3P_Real_gfx10_gfx11<0x0f>;
+defm V_PK_MUL_F16 : VOP3P_Real_gfx10_gfx11<0x10>;
+defm V_PK_MIN_F16 : VOP3P_Real_gfx10_gfx11<0x11>;
+defm V_PK_MAX_F16 : VOP3P_Real_gfx10_gfx11<0x12>;
+defm V_FMA_MIX_F32 : VOP3P_Real_gfx10_gfx11_Triple <0x20>;
+defm V_FMA_MIXLO_F16 : VOP3P_Real_gfx10_gfx11_Triple <0x21>;
+defm V_FMA_MIXHI_F16 : VOP3P_Real_gfx10_gfx11_Triple <0x22>;
let SubtargetPredicate = HasDot2Insts in {
@@ -715,9 +1194,9 @@ defm V_DOT2_U32_U16 : VOP3P_Real_gfx10 <0x15>;
let SubtargetPredicate = HasDot7Insts in {
-defm V_DOT2_F32_F16 : VOP3P_Real_gfx10 <0x13>;
-defm V_DOT4_U32_U8 : VOP3P_Real_gfx10 <0x17>;
-defm V_DOT8_U32_U4 : VOP3P_Real_gfx10 <0x19>;
+defm V_DOT2_F32_F16 : VOP3P_Real_gfx10_gfx11_Triple <0x13>;
+defm V_DOT4_U32_U8 : VOP3P_Real_gfx10_gfx11 <0x17>;
+defm V_DOT8_U32_U4 : VOP3P_Real_gfx10_gfx11 <0x19>;
} // End SubtargetPredicate = HasDot7Insts
diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index c0cc91029d11..eb6c54a45263 100644
--- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -49,12 +49,36 @@ class VOPC_SDWA9e <bits<8> op, VOPProfile P> : VOP_SDWA9Be <P> {
// an explicit $dst.
class VOPC_Profile<list<SchedReadWrite> sched, ValueType vt0, ValueType vt1 = vt0> :
VOPProfile <[i1, vt0, vt1, untyped]> {
+ // We want to exclude instructions with 64bit operands
+ let HasExtDPP = getHasVOP3DPP<DstVT, Src0VT, Src1VT, Src2VT>.ret;
let Asm32 = "$src0, $src1";
+
+ let AsmDPP = !if (HasModifiers,
+ "$src0_modifiers, $src1_modifiers "
+ "$dpp_ctrl$row_mask$bank_mask$bound_ctrl",
+ "$src0, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl");
+ let AsmDPP8 = "$src0, $src1 $dpp8$fi";
+ let AsmDPP16 = AsmDPP#"$fi";
+ let InsDPP = getInsDPP<VOPDstOperand<Src0DPP>, Src0DPP, Src1DPP, Src2DPP,
+ NumSrcArgs, HasModifiers, Src0ModDPP, Src1ModDPP,
+ Src2ModDPP>.ret;
+ let InsDPP16 = getInsDPP16<VOPDstOperand<Src0DPP>, Src0DPP, Src1DPP, Src2DPP,
+ NumSrcArgs, HasModifiers, Src0ModDPP, Src1ModDPP,
+ Src2ModDPP>.ret;
+ let InsDPP8 = getInsDPP8<VOPDstOperand<Src0DPP>, Src0DPP, Src1DPP, Src2DPP,
+ NumSrcArgs, HasModifiers, Src0ModDPP, Src1ModDPP,
+ Src2ModDPP>.ret;
+
// The destination for 32-bit encoding is implicit.
let HasDst32 = 0;
// VOPC disallows dst_sel and dst_unused as they have no effect on destination
let EmitDstSel = 0;
let Outs64 = (outs VOPDstS64orS32:$sdst);
+ let OutsVOP3DPP = Outs64;
+ let OutsVOP3DPP8 = Outs64;
+ let InsVOP3DPP = getInsVOP3DPP<InsVOP3Base, Src0VOP3DPP, NumSrcArgs>.ret;
+ let InsVOP3DPP16 = getInsVOP3DPP16<InsVOP3Base, Src0VOP3DPP, NumSrcArgs>.ret;
+ let InsVOP3DPP8 = getInsVOP3DPP8<InsVOP3Base, Src0VOP3DPP, NumSrcArgs>.ret;
list<SchedReadWrite> Schedule = sched;
}
@@ -62,12 +86,15 @@ class VOPC_NoSdst_Profile<list<SchedReadWrite> sched, ValueType vt0,
ValueType vt1 = vt0> :
VOPC_Profile<sched, vt0, vt1> {
let Outs64 = (outs );
+ let OutsVOP3DPP = Outs64;
+ let OutsVOP3DPP8 = Outs64;
let OutsSDWA = (outs );
let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
src0_sel:$src0_sel, src1_sel:$src1_sel);
let Asm64 = !if(isFloatType<Src0VT>.ret, "$src0_modifiers, $src1_modifiers$clamp",
"$src0, $src1");
+ let AsmVOP3DPPBase = Asm64;
let AsmSDWA9 = "$src0_modifiers, $src1_modifiers $src0_sel $src1_sel";
let EmitDst = 0;
}
@@ -100,8 +127,8 @@ class VOPC_Pseudo <string opName, VOPC_Profile P, list<dag> pattern=[],
VOPProfile Pfl = P;
}
-class VOPC_Real <VOPC_Pseudo ps, int EncodingFamily> :
- InstSI <ps.OutOperandList, ps.InOperandList, ps.PseudoInstr # " " # ps.AsmOperands, []>,
+class VOPC_Real <VOPC_Pseudo ps, int EncodingFamily, string asm_name = ps.PseudoInstr> :
+ InstSI <ps.OutOperandList, ps.InOperandList, asm_name # " " # ps.AsmOperands, []>,
SIMCInstr <ps.PseudoInstr, EncodingFamily> {
let VALU = 1;
@@ -133,8 +160,9 @@ class VOPC_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
// This class is used only with VOPC instructions. Use $sdst for out operand
class VOPCInstAlias <VOP3_Pseudo ps, Instruction inst,
- string Asm32 = ps.Pfl.Asm32, VOPProfile p = ps.Pfl> :
- InstAlias <ps.OpName#" "#Asm32, (inst)>, PredicateControl {
+ string Asm32 = ps.Pfl.Asm32, string real_name = ps.OpName,
+ VOPProfile p = ps.Pfl> :
+ InstAlias <real_name#" "#Asm32, (inst)>, PredicateControl {
field bit isCompare;
field bit isCommutable;
@@ -167,27 +195,32 @@ class VOPCInstAlias <VOP3_Pseudo ps, Instruction inst,
let SubtargetPredicate = AssemblerPredicate;
}
-multiclass VOPCInstAliases <string OpName, string Arch> {
- def : VOPCInstAlias <!cast<VOP3_Pseudo>(OpName#"_e64"),
- !cast<Instruction>(OpName#"_e32_"#Arch)>;
+multiclass VOPCInstAliases <string old_name, string Arch, string real_name = old_name> {
+ def : VOPCInstAlias <!cast<VOP3_Pseudo>(old_name#"_e64"),
+ !cast<Instruction>(real_name#"_e32_"#Arch),
+ !cast<VOP3_Pseudo>(old_name#"_e64").Pfl.Asm32,
+ real_name>;
let WaveSizePredicate = isWave32 in {
- def : VOPCInstAlias <!cast<VOP3_Pseudo>(OpName#"_e64"),
- !cast<Instruction>(OpName#"_e32_"#Arch),
- "vcc_lo, "#!cast<VOP3_Pseudo>(OpName#"_e64").Pfl.Asm32>;
+ def : VOPCInstAlias <!cast<VOP3_Pseudo>(old_name#"_e64"),
+ !cast<Instruction>(real_name#"_e32_"#Arch),
+ "vcc_lo, "#!cast<VOP3_Pseudo>(old_name#"_e64").Pfl.Asm32,
+ real_name>;
}
let WaveSizePredicate = isWave64 in {
- def : VOPCInstAlias <!cast<VOP3_Pseudo>(OpName#"_e64"),
- !cast<Instruction>(OpName#"_e32_"#Arch),
- "vcc, "#!cast<VOP3_Pseudo>(OpName#"_e64").Pfl.Asm32>;
+ def : VOPCInstAlias <!cast<VOP3_Pseudo>(old_name#"_e64"),
+ !cast<Instruction>(real_name#"_e32_"#Arch),
+ "vcc, "#!cast<VOP3_Pseudo>(old_name#"_e64").Pfl.Asm32,
+ real_name>;
}
}
-multiclass VOPCXInstAliases <string OpName, string Arch> {
- def : VOPCInstAlias <!cast<VOP3_Pseudo>(OpName#"_e64"),
- !cast<Instruction>(OpName#"_e32_"#Arch)>;
+multiclass VOPCXInstAliases <string old_name, string Arch, string real_name = old_name> {
+ def : VOPCInstAlias <!cast<VOP3_Pseudo>(old_name#"_e64"),
+ !cast<Instruction>(real_name#"_e32_"#Arch),
+ !cast<VOP3_Pseudo>(old_name#"_e64").Pfl.Asm32,
+ real_name>;
}
-
class getVOPCPat64 <SDPatternOperator cond, VOPProfile P> : LetDummies {
list<dag> ret = !if(P.HasModifiers,
[(set i1:$sdst,
@@ -205,6 +238,11 @@ class VCMPXNoSDstTable <bit has_sdst, string Name> {
string NoSDstOp = Name;
}
+class VCMPVCMPXTable <string Name> {
+ bit IsVCMPX = 0;
+ string VCMPOp = Name;
+}
+
multiclass VOPC_Pseudos <string opName,
VOPC_Profile P,
SDPatternOperator cond = COND_NULL,
@@ -213,7 +251,8 @@ multiclass VOPC_Pseudos <string opName,
def _e32 : VOPC_Pseudo <opName, P>,
Commutable_REV<revOp#"_e32", !eq(revOp, opName)>,
- VCMPXNoSDstTable<1, opName#"_e32"> {
+ VCMPXNoSDstTable<1, opName#"_e32">,
+ VCMPVCMPXTable<opName#"_e32"> {
let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
let SchedRW = P.Schedule;
let isConvergent = DefExec;
@@ -223,7 +262,8 @@ multiclass VOPC_Pseudos <string opName,
def _e64 : VOP3_Pseudo<opName, P, getVOPCPat64<cond, P>.ret>,
Commutable_REV<revOp#"_e64", !eq(revOp, opName)>,
- VCMPXNoSDstTable<1, opName#"_e64"> {
+ VCMPXNoSDstTable<1, opName#"_e64">,
+ VCMPVCMPXTable<opName#"_e64"> {
let Defs = !if(DefExec, [EXEC], []);
let SchedRW = P.Schedule;
let isCompare = 1;
@@ -237,6 +277,26 @@ multiclass VOPC_Pseudos <string opName,
let isConvergent = DefExec;
let isCompare = 1;
}
+
+ let SubtargetPredicate = isGFX11Plus in {
+ if P.HasExtDPP then
+ def _e32_dpp : VOP_DPP_Pseudo<opName, P> {
+ let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
+ let SchedRW = P.Schedule;
+ let isConvergent = DefExec;
+ let isCompare = 1;
+ let VOPC = 1;
+ let Constraints = "";
+ }
+ if P.HasExtVOP3DPP then
+ def _e64_dpp : VOP3_DPP_Pseudo<opName, P> {
+ let Defs = !if(DefExec, [EXEC], []);
+ let SchedRW = P.Schedule;
+ let isCompare = 1;
+ let Constraints = !if(P.NumSrcArgs, P.TieRegDPP # " = $sdst", "");
+ }
+ } // end SubtargetPredicate = isGFX11Plus
+
}
let SubtargetPredicate = HasSdstCMPX in {
@@ -248,23 +308,27 @@ multiclass VOPCX_Pseudos <string opName,
def _nosdst_e32 : VOPC_Pseudo <opName#"_nosdst", P_NoSDst, [], 0>,
Commutable_REV<revOp#"_nosdst_e32", !eq(revOp, opName)>,
- VCMPXNoSDstTable<0, opName#"_e32"> {
+ VCMPXNoSDstTable<0, opName#"_e32">,
+ VCMPVCMPXTable<!subst("v_cmpx", "v_cmp", opName#"_e32")> {
let Defs = [EXEC];
let SchedRW = P_NoSDst.Schedule;
let isConvergent = 1;
let isCompare = 1;
let isCommutable = 1;
let SubtargetPredicate = HasNoSdstCMPX;
+ let IsVCMPX = 1;
}
def _nosdst_e64 : VOP3_Pseudo<opName#"_nosdst", P_NoSDst>,
Commutable_REV<revOp#"_nosdst_e64", !eq(revOp, opName)>,
- VCMPXNoSDstTable<0, opName#"_e64"> {
+ VCMPXNoSDstTable<0, opName#"_e64">,
+ VCMPVCMPXTable<!subst("v_cmpx", "v_cmp", opName#"_e64")> {
let Defs = [EXEC];
let SchedRW = P_NoSDst.Schedule;
let isCompare = 1;
let isCommutable = 1;
let SubtargetPredicate = HasNoSdstCMPX;
+ let IsVCMPX = 1;
}
foreach _ = BoolToList<P_NoSDst.HasExtSDWA>.ret in
@@ -275,6 +339,25 @@ multiclass VOPCX_Pseudos <string opName,
let isCompare = 1;
let SubtargetPredicate = HasNoSdstCMPX;
}
+
+ let SubtargetPredicate = isGFX11Plus in {
+ if P.HasExtDPP then
+ def _nosdst_e32_dpp : VOP_DPP_Pseudo<opName#"_nosdst", P_NoSDst> {
+ let Defs = [EXEC];
+ let SchedRW = P_NoSDst.Schedule;
+ let isConvergent = 1;
+ let isCompare = 1;
+ let VOPC = 1;
+ let Constraints = "";
+ }
+ if P.HasExtVOP3DPP then
+ def _nosdst_e64_dpp : VOP3_DPP_Pseudo<opName#"_nosdst", P_NoSDst> {
+ let Defs = [EXEC];
+ let SchedRW = P_NoSDst.Schedule;
+ let isCompare = 1;
+ let Constraints = "";
+ }
+ } // end SubtargetPredicate = isGFX11Plus
}
} // End SubtargetPredicate = HasSdstCMPX
@@ -626,8 +709,18 @@ defm V_CMPX_T_U64 : VOPCX_I64 <"v_cmpx_t_u64">;
class VOPC_Class_Profile<list<SchedReadWrite> sched, ValueType vt> :
VOPC_Profile<sched, vt, i32> {
+ let AsmDPP = "$src0_modifiers, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
+ let AsmDPP16 = AsmDPP#"$fi";
+ let InsDPP = (ins VGPR_32:$old, FPVRegInputMods:$src0_modifiers, VGPR_32:$src0, VGPR_32:$src1, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
+ let InsDPP16 = !con(InsDPP, (ins FI:$fi));
+ // DPP8 forbids modifiers and can inherit from VOPC_Profile
+
let Ins64 = (ins Src0Mod:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1);
+ dag InsPartVOP3DPP = (ins Src0Mod:$src0_modifiers, VGPRSrc_32:$src0, VGPRSrc_32:$src1);
+ let InsVOP3Base = !con(InsPartVOP3DPP, !if(HasOpSel, (ins op_sel0:$op_sel),
+ (ins)));
let Asm64 = "$sdst, $src0_modifiers, $src1";
+ let AsmVOP3DPPBase = Asm64;
let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
@@ -647,6 +740,7 @@ class VOPC_Class_NoSdst_Profile<list<SchedReadWrite> sched, ValueType vt> :
Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
src0_sel:$src0_sel, src1_sel:$src1_sel);
let Asm64 = "$src0_modifiers, $src1";
+ let AsmVOP3DPPBase = Asm64;
let AsmSDWA9 = "$src0_modifiers, $src1_modifiers $src0_sel $src1_sel";
let EmitDst = 0;
}
@@ -684,6 +778,24 @@ multiclass VOPC_Class_Pseudos <string opName, VOPC_Profile p, bit DefExec,
let SchedRW = p.Schedule;
let isConvergent = DefExec;
}
+
+ let SubtargetPredicate = isGFX11Plus in {
+ if p.HasExtDPP then
+ def _e32_dpp : VOP_DPP_Pseudo<opName, p> {
+ let Defs = !if(DefExec, !if(DefVcc, [VCC, EXEC], [EXEC]),
+ !if(DefVcc, [VCC], []));
+ let SchedRW = p.Schedule;
+ let isConvergent = DefExec;
+ let VOPC = 1;
+ let Constraints = "";
+ }
+ if p.HasExtVOP3DPP then
+ def _e64_dpp : VOP3_DPP_Pseudo<opName, p> {
+ let Defs = !if(DefExec, [EXEC], []);
+ let SchedRW = p.Schedule;
+ let Constraints = !if(p.NumSrcArgs, p.TieRegDPP # " = $sdst", "");
+ }
+ } // end SubtargetPredicate = isGFX11Plus
}
let SubtargetPredicate = HasSdstCMPX in {
@@ -714,6 +826,23 @@ multiclass VOPCX_Class_Pseudos <string opName,
let isConvergent = 1;
let SubtargetPredicate = HasNoSdstCMPX;
}
+
+ let SubtargetPredicate = isGFX11Plus in {
+ if P.HasExtDPP then
+ def _nosdst_e32_dpp : VOP_DPP_Pseudo<opName#"_nosdst", P_NoSDst> {
+ let Defs = [EXEC];
+ let SchedRW = P_NoSDst.Schedule;
+ let isConvergent = 1;
+ let VOPC = 1;
+ let Constraints = "";
+ }
+ if P.HasExtVOP3DPP then
+ def _nosdst_e64_dpp : VOP3_DPP_Pseudo<opName#"_nosdst", P_NoSDst> {
+ let Defs = [EXEC];
+ let SchedRW = P_NoSDst.Schedule;
+ let Constraints = "";
+ }
+ } // end SubtargetPredicate = isGFX11Plus
}
} // End SubtargetPredicate = HasSdstCMPX
@@ -872,14 +1001,676 @@ defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F16_e64, f16>;
defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F16_e64, f16>;
//===----------------------------------------------------------------------===//
+// DPP Encodings
+//===----------------------------------------------------------------------===//
+
+// VOPC32
+
+class VOPC_DPPe_Common<bits<8> op> : Enc64 {
+ bits<8> src1;
+ let Inst{16-9} = src1;
+ let Inst{24-17} = op;
+ let Inst{31-25} = 0x3e;
+}
+
+class VOPC_DPP_Base<bits<8> op, string OpName, VOPProfile P>
+ : VOP_DPP_Base<OpName, P, P.InsDPP16, " " #P.AsmDPP16>,
+ VOPC_DPPe_Common<op> {
+ bits<2> src0_modifiers;
+ bits<8> src0;
+ bits<2> src1_modifiers;
+ bits<9> dpp_ctrl;
+ bits<1> bound_ctrl;
+ bits<4> bank_mask;
+ bits<4> row_mask;
+ bit fi;
+
+ let Inst{8-0} = 0xfa;
+
+ let Inst{39-32} = !if (P.HasSrc0, src0{7-0}, 0);
+ let Inst{48-40} = dpp_ctrl;
+ let Inst{50} = fi;
+ let Inst{51} = bound_ctrl;
+ let Inst{52} = !if (P.HasSrc0Mods, src0_modifiers{0}, 0); // src0_neg
+ let Inst{53} = !if (P.HasSrc0Mods, src0_modifiers{1}, 0); // src0_abs
+ let Inst{54} = !if (P.HasSrc1Mods, src1_modifiers{0}, 0); // src1_neg
+ let Inst{55} = !if (P.HasSrc1Mods, src1_modifiers{1}, 0); // src1_abs
+ let Inst{59-56} = bank_mask;
+ let Inst{63-60} = row_mask;
+
+ let AsmMatchConverter = "cvtDPP";
+ let VOPC = 1;
+}
+
+class VOPC_DPP8_Base<bits<8> op, string OpName, VOPProfile P>
+ : VOP_DPP8_Base<OpName, P, P.InsDPP8, " " #P.AsmDPP8>,
+ VOPC_DPPe_Common<op> {
+ bits<8> src0;
+ bits<24> dpp8;
+ bits<9> fi;
+
+ let Inst{8-0} = fi;
+
+ let Inst{39-32} = !if (P.HasSrc0, src0{7-0}, 0);
+ let Inst{63-40} = dpp8{23-0};
+
+ let AsmMatchConverter = "cvtDPP8";
+ let VOPC = 1;
+}
+
+class VOPC_DPP16<bits<8> op, VOP_DPP_Pseudo ps, string opName = ps.OpName>
+ : VOPC_DPP_Base<op, opName, ps.Pfl> {
+ let AssemblerPredicate = HasDPP16;
+ let SubtargetPredicate = HasDPP16;
+ let hasSideEffects = ps.hasSideEffects;
+ let Defs = ps.Defs;
+ let SchedRW = ps.SchedRW;
+ let Uses = ps.Uses;
+ let OtherPredicates = ps.OtherPredicates;
+ let Constraints = ps.Constraints;
+ let AsmMatchConverter = "cvtVOPCNoDstDPP";
+}
+
+class VOPC_DPP16_SIMC<bits<8> op, VOP_DPP_Pseudo ps, int subtarget,
+ string opName = ps.OpName>
+ : VOPC_DPP16<op, ps, opName>, SIMCInstr<ps.PseudoInstr, subtarget>;
+
+class VOPC_DPP8<bits<8> op, VOPC_Pseudo ps, string opName = ps.OpName>
+ : VOPC_DPP8_Base<op, opName, ps.Pfl> {
+ // Note ps is the non-dpp pseudo
+ let hasSideEffects = ps.hasSideEffects;
+ let Defs = ps.Defs;
+ let SchedRW = ps.SchedRW;
+ let Uses = ps.Uses;
+ let OtherPredicates = ps.OtherPredicates;
+ let Constraints = "";
+ let AsmMatchConverter = "cvtVOPCNoDstDPP8";
+}
+
+// VOPC64
+
+class VOPC64_DPP_Base<bits<10> op, string OpName, VOPProfile P>
+ : VOP3_DPP_Base<OpName, P, 1>, VOP3_DPPe_Common<op, P> {
+ Instruction Opcode = !cast<Instruction>(NAME);
+
+ bits<8> src0;
+ bits<9> dpp_ctrl;
+ bits<1> bound_ctrl;
+ bits<4> bank_mask;
+ bits<4> row_mask;
+ bit fi;
+
+ let Inst{40-32} = 0xfa;
+ let Inst{71-64} = !if(P.HasSrc0, src0{7-0}, 0);
+ let Inst{80-72} = dpp_ctrl;
+ let Inst{82} = fi;
+ let Inst{83} = bound_ctrl;
+ // Inst{87-84} ignored by hw
+ let Inst{91-88} = bank_mask;
+ let Inst{95-92} = row_mask;
+
+}
+
+class VOPC64_DPP16<bits<10> op, VOP_DPP_Pseudo ps, string opName = ps.OpName>
+ : VOPC64_DPP_Base<op, opName, ps.Pfl> {
+ let AssemblerPredicate = HasDPP16;
+ let SubtargetPredicate = HasDPP16;
+ let hasSideEffects = ps.hasSideEffects;
+ let Defs = ps.Defs;
+ let SchedRW = ps.SchedRW;
+ let Uses = ps.Uses;
+ let OtherPredicates = ps.OtherPredicates;
+ let Constraints = ps.Constraints;
+}
+
+class VOPC64_DPP16_Dst<bits<10> op, VOP_DPP_Pseudo ps,
+ string opName = ps.OpName>
+ : VOPC64_DPP16<op, ps, opName> {
+ bits<8> sdst;
+ let Inst{7-0} = sdst;
+}
+
+class VOPC64_DPP16_NoDst<bits<10> op, VOP_DPP_Pseudo ps,
+ string opName = ps.OpName>
+ : VOPC64_DPP16<op, ps, opName> {
+ let Inst{7-0} = ? ;
+ let AsmMatchConverter = "cvtVOPC64NoDstDPP";
+}
+
+class VOPC64_DPP8_Base<bits<10> op, string OpName, VOPProfile P>
+ : VOP3_DPP8_Base<OpName, P>, VOP3_DPPe_Common<op, P> {
+ Instruction Opcode = !cast<Instruction>(NAME);
+
+ bits<8> src0;
+ bits<24> dpp8;
+ bits<9> fi;
+
+ let Inst{40-32} = fi;
+ let Inst{71-64} = !if(P.HasSrc0, src0{7-0}, 0);
+ let Inst{95-72} = dpp8{23-0};
+
+}
+
+class VOPC64_DPP8<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
+ : VOPC64_DPP8_Base<op, opName, ps.Pfl> {
+ // Note ps is the non-dpp pseudo
+ let hasSideEffects = ps.hasSideEffects;
+ let Defs = ps.Defs;
+ let SchedRW = ps.SchedRW;
+ let Uses = ps.Uses;
+ let OtherPredicates = ps.OtherPredicates;
+}
+
+class VOPC64_DPP8_Dst<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
+ : VOPC64_DPP8<op, ps, opName> {
+ bits<8> sdst;
+ let Inst{7-0} = sdst;
+ let Constraints = "$old = $sdst";
+}
+
+class VOPC64_DPP8_NoDst<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
+ : VOPC64_DPP8<op, ps, opName> {
+ let Inst{7-0} = ? ;
+ let AsmMatchConverter = "cvtVOPC64NoDstDPP8";
+ let Constraints = "";
+}
+
+//===----------------------------------------------------------------------===//
// Target-specific instruction encodings.
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
+// GFX11.
+//===----------------------------------------------------------------------===//
+
+let AssemblerPredicate = isGFX11Only in {
+ multiclass VOPC_Real_gfx11<bits<9> op> {
+ defvar ps32 = !cast<VOPC_Pseudo>(NAME#"_e32");
+ defvar ps64 = !cast<VOP3_Pseudo>(NAME#"_e64");
+ let DecoderNamespace = "GFX11" in {
+ def _e32_gfx11 : VOPC_Real<ps32, SIEncodingFamily.GFX11>,
+ VOPCe<op{7-0}>;
+ def _e64_gfx11 : VOP3_Real<ps64, SIEncodingFamily.GFX11>,
+ VOP3a_gfx11<{0, op}, ps64.Pfl> {
+ // Encoding used for VOPC instructions encoded as VOP3 differs from
+ // VOP3e by destination name (sdst) as VOPC doesn't have vector dst.
+ bits<8> sdst;
+ let Inst{7-0} = sdst;
+ }
+ } // End DecoderNamespace = "GFX11"
+
+ defm : VOPCInstAliases<NAME, "gfx11">;
+
+ foreach _ = BoolToList<ps32.Pfl.HasExtDPP>.ret in {
+ defvar psDPP = !cast<VOP_DPP_Pseudo>(NAME #"_e32" #"_dpp");
+ defvar AsmDPP = ps32.Pfl.AsmDPP16;
+ let DecoderNamespace = "DPPGFX11" in {
+ def _e32_dpp_gfx11 : VOPC_DPP16_SIMC<op{7-0}, psDPP,
+ SIEncodingFamily.GFX11>;
+ def _e32_dpp_w32_gfx11 : VOPC_DPP16<op{7-0}, psDPP> {
+ let AsmString = psDPP.OpName # " vcc_lo, " # AsmDPP;
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave32;
+ }
+ def _e32_dpp_w64_gfx11 : VOPC_DPP16<op{7-0}, psDPP> {
+ let AsmString = psDPP.OpName # " vcc, " # AsmDPP;
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave64;
+ }
+ }
+ defvar AsmDPP8 = ps32.Pfl.AsmDPP8;
+ let DecoderNamespace = "DPP8GFX11" in {
+ def _e32_dpp8_gfx11 : VOPC_DPP8<op{7-0}, ps32>;
+ def _e32_dpp8_w32_gfx11 : VOPC_DPP8<op{7-0}, ps32> {
+ let AsmString = ps32.OpName # " vcc_lo, " # AsmDPP8;
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave32;
+ }
+ def _e32_dpp8_w64_gfx11 : VOPC_DPP8<op{7-0}, ps32> {
+ let AsmString = ps32.OpName # " vcc, " # AsmDPP8;
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave64;
+ }
+ }
+ }
+ foreach _ = BoolToList<ps64.Pfl.HasExtVOP3DPP>.ret in {
+ defvar psDPP = !cast<VOP_DPP_Pseudo>(NAME #"_e64" #"_dpp");
+ defvar AsmDPP = ps64.Pfl.AsmVOP3DPP16;
+ let DecoderNamespace = "DPPGFX11" in {
+ def _e64_dpp_gfx11 : VOPC64_DPP16_Dst<{0, op}, psDPP>,
+ SIMCInstr<psDPP.PseudoInstr, SIEncodingFamily.GFX11>;
+ def _e64_dpp_w32_gfx11 : VOPC64_DPP16_Dst<{0, op}, psDPP> {
+ let AsmString = psDPP.OpName # " vcc_lo, " # AsmDPP;
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave32;
+ }
+ def _e64_dpp_w64_gfx11 : VOPC64_DPP16_Dst<{0, op}, psDPP> {
+ let AsmString = psDPP.OpName # " vcc, " # AsmDPP;
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave64;
+ }
+ }
+ defvar AsmDPP8 = ps64.Pfl.AsmVOP3DPP8;
+ let DecoderNamespace = "DPP8GFX11" in {
+ def _e64_dpp8_gfx11 : VOPC64_DPP8_Dst<{0, op}, ps64>;
+ def _e64_dpp8_w32_gfx11 : VOPC64_DPP8_Dst<{0, op}, ps64> {
+ let AsmString = ps32.OpName # " vcc_lo, " # AsmDPP8;
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave32;
+ }
+ def _e64_dpp8_w64_gfx11 : VOPC64_DPP8_Dst<{0, op}, ps64> {
+ let AsmString = ps32.OpName # " vcc, " # AsmDPP8;
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave64;
+ }
+ }
+ }
+
+ }
+
+ multiclass VOPC_Real_with_name_gfx11<bits<9> op, string OpName,
+ string asm_name> {
+ defvar ps32 = !cast<VOPC_Pseudo>(OpName#"_e32");
+ defvar ps64 = !cast<VOP3_Pseudo>(OpName#"_e64");
+ let DecoderNamespace = "GFX11" in {
+ def _e32_gfx11 :
+ // 32 and 64 bit forms of the instruction have _e32 and _e64
+ // respectively appended to their assembly mnemonic.
+ // _e64 is printed as part of the VOPDstS64orS32 operand, whereas
+ // the destination-less 32bit forms add it to the asmString here.
+ VOPC_Real<ps32, SIEncodingFamily.GFX11, asm_name#"_e32">,
+ VOPCe<op{7-0}>,
+ MnemonicAlias<ps32.Mnemonic, asm_name>, Requires<[isGFX11Plus]>;
+ def _e64_gfx11 :
+ VOP3_Real<ps64, SIEncodingFamily.GFX11, asm_name>,
+ VOP3a_gfx11<{0, op}, ps64.Pfl>,
+ MnemonicAlias<ps64.Mnemonic, asm_name>, Requires<[isGFX11Plus]> {
+ // Encoding used for VOPC instructions encoded as VOP3 differs from
+ // VOP3e by destination name (sdst) as VOPC doesn't have vector dst.
+ bits<8> sdst;
+ let Inst{7-0} = sdst;
+ }
+ } // End DecoderNamespace = "GFX11"
+
+ defm : VOPCInstAliases<OpName, "gfx11", NAME>;
+
+ foreach _ = BoolToList<ps32.Pfl.HasExtDPP>.ret in {
+ defvar psDPP = !cast<VOP_DPP_Pseudo>(OpName #"_e32" #"_dpp");
+ defvar AsmDPP = ps32.Pfl.AsmDPP16;
+ let DecoderNamespace = "DPPGFX11" in {
+ def _e32_dpp_gfx11 : VOPC_DPP16_SIMC<op{7-0}, psDPP,
+ SIEncodingFamily.GFX11, asm_name>;
+ def _e32_dpp_w32_gfx11
+ : VOPC_DPP16<op{7-0}, psDPP, asm_name> {
+ let AsmString = asm_name # " vcc_lo, " # AsmDPP;
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave32;
+ }
+ def _e32_dpp_w64_gfx11
+ : VOPC_DPP16<op{7-0}, psDPP, asm_name> {
+ let AsmString = asm_name # " vcc, " # AsmDPP;
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave64;
+ }
+ }
+ defvar AsmDPP8 = ps32.Pfl.AsmDPP8;
+ let DecoderNamespace = "DPP8GFX11" in {
+ def _e32_dpp8_gfx11 : VOPC_DPP8<op{7-0}, ps32, asm_name>;
+ def _e32_dpp8_w32_gfx11
+ : VOPC_DPP8<op{7-0}, ps32, asm_name> {
+ let AsmString = asm_name # " vcc_lo, " # AsmDPP8;
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave32;
+ }
+ def _e32_dpp8_w64_gfx11
+ : VOPC_DPP8<op{7-0}, ps32, asm_name> {
+ let AsmString = asm_name # " vcc, " # AsmDPP8;
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave64;
+ }
+ }
+ }
+
+ foreach _ = BoolToList<ps64.Pfl.HasExtVOP3DPP>.ret in {
+ defvar psDPP = !cast<VOP_DPP_Pseudo>(OpName #"_e64" #"_dpp");
+ defvar AsmDPP = ps64.Pfl.AsmVOP3DPP16;
+ let DecoderNamespace = "DPPGFX11" in {
+ def _e64_dpp_gfx11 : VOPC64_DPP16_Dst<{0, op}, psDPP, asm_name>,
+ SIMCInstr<psDPP.PseudoInstr, SIEncodingFamily.GFX11>;
+ def _e64_dpp_w32_gfx11
+ : VOPC64_DPP16_Dst<{0, op}, psDPP, asm_name> {
+ let AsmString = asm_name # " vcc_lo, " # AsmDPP;
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave32;
+ }
+ def _e64_dpp_w64_gfx11
+ : VOPC64_DPP16_Dst<{0, op}, psDPP, asm_name> {
+ let AsmString = asm_name # " vcc, " # AsmDPP;
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave64;
+ }
+ }
+ defvar AsmDPP8 = ps64.Pfl.AsmVOP3DPP8;
+ let DecoderNamespace = "DPP8GFX11" in {
+ def _e64_dpp8_gfx11 : VOPC64_DPP8_Dst<{0, op}, ps64, asm_name>;
+ def _e64_dpp8_w32_gfx11
+ : VOPC64_DPP8_Dst<{0, op}, ps64, asm_name> {
+ let AsmString = asm_name # " vcc_lo, " # AsmDPP8;
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave32;
+ }
+ def _e64_dpp8_w64_gfx11
+ : VOPC64_DPP8_Dst<{0, op}, ps64, asm_name> {
+ let AsmString = asm_name # " vcc, " # AsmDPP8;
+ let isAsmParserOnly = 1;
+ let WaveSizePredicate = isWave64;
+ }
+ }
+ }
+
+ }
+
+ multiclass VOPCX_Real_gfx11<bits<9> op> {
+ defvar ps32 = !cast<VOPC_Pseudo>(NAME#"_nosdst_e32");
+ defvar ps64 = !cast<VOP3_Pseudo>(NAME#"_nosdst_e64");
+ let DecoderNamespace = "GFX11" in {
+ def _e32_gfx11 :
+ VOPC_Real<ps32, SIEncodingFamily.GFX11>,
+ VOPCe<op{7-0}> {
+ let AsmString = !subst("_nosdst", "", ps32.PseudoInstr)
+ # " " # ps32.AsmOperands;
+ }
+ def _e64_gfx11 :
+ VOP3_Real<ps64, SIEncodingFamily.GFX11>,
+ VOP3a_gfx11<{0, op}, ps64.Pfl> {
+ let Inst{7-0} = ?; // sdst
+ let AsmString = !subst("_nosdst", "", ps64.Mnemonic)
+ # "{_e64} " # ps64.AsmOperands;
+ }
+ } // End DecoderNamespace = "GFX11"
+
+ defm : VOPCXInstAliases<NAME, "gfx11">;
+
+ foreach _ = BoolToList<ps32.Pfl.HasExtDPP>.ret in {
+ defvar psDPP = !cast<VOP_DPP_Pseudo>(NAME #"_nosdst_e32" #"_dpp");
+ defvar AsmDPP = ps32.Pfl.AsmDPP16;
+ let DecoderNamespace = "DPPGFX11" in {
+ def _e32_dpp_gfx11
+ : VOPC_DPP16_SIMC<op{7-0}, psDPP, SIEncodingFamily.GFX11> {
+ let AsmString = !subst("_nosdst", "", psDPP.OpName) # " " # AsmDPP;
+ }
+ }
+ defvar AsmDPP8 = ps32.Pfl.AsmDPP8;
+ let DecoderNamespace = "DPP8GFX11" in {
+ def _e32_dpp8_gfx11 : VOPC_DPP8<op{7-0}, ps32> {
+ let AsmString = !subst("_nosdst", "", ps32.OpName) # " " # AsmDPP8;
+ }
+ }
+ }
+
+ foreach _ = BoolToList<ps64.Pfl.HasExtVOP3DPP>.ret in {
+ defvar psDPP = !cast<VOP_DPP_Pseudo>(NAME #"_nosdst_e64" #"_dpp");
+ defvar AsmDPP = ps64.Pfl.AsmVOP3DPP16;
+ let DecoderNamespace = "DPPGFX11" in {
+ def _e64_dpp_gfx11
+ : VOPC64_DPP16_NoDst<{0, op}, psDPP>,
+ SIMCInstr<psDPP.PseudoInstr, SIEncodingFamily.GFX11> {
+ let AsmString = !subst("_nosdst", "", psDPP.OpName)
+ # "{_e64_dpp} " # AsmDPP;
+ }
+ }
+ defvar AsmDPP8 = ps64.Pfl.AsmVOP3DPP8;
+ let DecoderNamespace = "DPP8GFX11" in {
+ def _e64_dpp8_gfx11 : VOPC64_DPP8_NoDst<{0, op}, ps64> {
+ let AsmString = !subst("_nosdst", "", ps64.OpName)
+ # "{_e64_dpp} " # AsmDPP8;
+ }
+ }
+ }
+ }
+
+ multiclass VOPCX_Real_with_name_gfx11<bits<9> op, string OpName,
+ string asm_name> {
+ defvar ps32 = !cast<VOPC_Pseudo>(OpName#"_nosdst_e32");
+ defvar ps64 = !cast<VOP3_Pseudo>(OpName#"_nosdst_e64");
+ let DecoderNamespace = "GFX11" in {
+ def _e32_gfx11
+ : VOPC_Real<ps32, SIEncodingFamily.GFX11, asm_name>,
+ MnemonicAlias<!subst("_nosdst", "", ps32.Mnemonic), asm_name>,
+ Requires<[isGFX11Plus]>,
+ VOPCe<op{7-0}> {
+ let AsmString = asm_name # "{_e32} " # ps32.AsmOperands;
+ }
+ def _e64_gfx11
+ : VOP3_Real<ps64, SIEncodingFamily.GFX11, asm_name>,
+ MnemonicAlias<!subst("_nosdst", "", ps64.Mnemonic), asm_name>,
+ Requires<[isGFX11Plus]>,
+ VOP3a_gfx11<{0, op}, ps64.Pfl> {
+ let Inst{7-0} = ? ; // sdst
+ let AsmString = asm_name # "{_e64} " # ps64.AsmOperands;
+ }
+ } // End DecoderNamespace = "GFX11"
+
+ defm : VOPCXInstAliases<OpName, "gfx11", NAME>;
+
+ foreach _ = BoolToList<ps32.Pfl.HasExtDPP>.ret in {
+ defvar psDPP = !cast<VOP_DPP_Pseudo>(OpName#"_nosdst_e32"#"_dpp");
+ let DecoderNamespace = "DPPGFX11" in {
+ def _e32_dpp_gfx11 : VOPC_DPP16_SIMC<op{7-0}, psDPP,
+ SIEncodingFamily.GFX11, asm_name>;
+ }
+ let DecoderNamespace = "DPP8GFX11" in {
+ def _e32_dpp8_gfx11 : VOPC_DPP8<op{7-0}, ps32, asm_name>;
+ }
+ }
+ foreach _ = BoolToList<ps64.Pfl.HasExtVOP3DPP>.ret in {
+ defvar psDPP = !cast<VOP_DPP_Pseudo>(OpName#"_nosdst_e64"#"_dpp");
+ defvar AsmDPP = ps64.Pfl.AsmVOP3DPP16;
+ let DecoderNamespace = "DPPGFX11" in {
+ def _e64_dpp_gfx11
+ : VOPC64_DPP16_NoDst<{0, op}, psDPP, asm_name>,
+ SIMCInstr<psDPP.PseudoInstr, SIEncodingFamily.GFX11> {
+ let AsmString = asm_name # "{_e64_dpp} " # AsmDPP;
+ }
+ }
+ defvar AsmDPP8 = ps64.Pfl.AsmVOP3DPP8;
+ let DecoderNamespace = "DPP8GFX11" in {
+ def _e64_dpp8_gfx11 : VOPC64_DPP8_NoDst<{0, op}, ps64, asm_name> {
+ let AsmString = asm_name # "{_e64_dpp} " # AsmDPP8;
+ }
+ }
+ }
+
+ }
+} // End AssemblerPredicate = isGFX11Only
+
+defm V_CMP_F_F16 : VOPC_Real_gfx11<0x000>;
+defm V_CMP_LT_F16 : VOPC_Real_gfx11<0x001>;
+defm V_CMP_EQ_F16 : VOPC_Real_gfx11<0x002>;
+defm V_CMP_LE_F16 : VOPC_Real_gfx11<0x003>;
+defm V_CMP_GT_F16 : VOPC_Real_gfx11<0x004>;
+defm V_CMP_LG_F16 : VOPC_Real_gfx11<0x005>;
+defm V_CMP_GE_F16 : VOPC_Real_gfx11<0x006>;
+defm V_CMP_O_F16 : VOPC_Real_gfx11<0x007>;
+defm V_CMP_U_F16 : VOPC_Real_gfx11<0x008>;
+defm V_CMP_NGE_F16 : VOPC_Real_gfx11<0x009>;
+defm V_CMP_NLG_F16 : VOPC_Real_gfx11<0x00a>;
+defm V_CMP_NGT_F16 : VOPC_Real_gfx11<0x00b>;
+defm V_CMP_NLE_F16 : VOPC_Real_gfx11<0x00c>;
+defm V_CMP_NEQ_F16 : VOPC_Real_gfx11<0x00d>;
+defm V_CMP_NLT_F16 : VOPC_Real_gfx11<0x00e>;
+defm V_CMP_T_F16 : VOPC_Real_with_name_gfx11<0x00f, "V_CMP_TRU_F16", "v_cmp_t_f16">;
+defm V_CMP_F_F32 : VOPC_Real_gfx11<0x010>;
+defm V_CMP_LT_F32 : VOPC_Real_gfx11<0x011>;
+defm V_CMP_EQ_F32 : VOPC_Real_gfx11<0x012>;
+defm V_CMP_LE_F32 : VOPC_Real_gfx11<0x013>;
+defm V_CMP_GT_F32 : VOPC_Real_gfx11<0x014>;
+defm V_CMP_LG_F32 : VOPC_Real_gfx11<0x015>;
+defm V_CMP_GE_F32 : VOPC_Real_gfx11<0x016>;
+defm V_CMP_O_F32 : VOPC_Real_gfx11<0x017>;
+defm V_CMP_U_F32 : VOPC_Real_gfx11<0x018>;
+defm V_CMP_NGE_F32 : VOPC_Real_gfx11<0x019>;
+defm V_CMP_NLG_F32 : VOPC_Real_gfx11<0x01a>;
+defm V_CMP_NGT_F32 : VOPC_Real_gfx11<0x01b>;
+defm V_CMP_NLE_F32 : VOPC_Real_gfx11<0x01c>;
+defm V_CMP_NEQ_F32 : VOPC_Real_gfx11<0x01d>;
+defm V_CMP_NLT_F32 : VOPC_Real_gfx11<0x01e>;
+defm V_CMP_T_F32 : VOPC_Real_with_name_gfx11<0x01f, "V_CMP_TRU_F32", "v_cmp_t_f32">;
+defm V_CMP_T_F64 : VOPC_Real_with_name_gfx11<0x02f, "V_CMP_TRU_F64", "v_cmp_t_f64">;
+defm V_CMP_LT_I16 : VOPC_Real_gfx11<0x031>;
+defm V_CMP_EQ_I16 : VOPC_Real_gfx11<0x032>;
+defm V_CMP_LE_I16 : VOPC_Real_gfx11<0x033>;
+defm V_CMP_GT_I16 : VOPC_Real_gfx11<0x034>;
+defm V_CMP_NE_I16 : VOPC_Real_gfx11<0x035>;
+defm V_CMP_GE_I16 : VOPC_Real_gfx11<0x036>;
+defm V_CMP_LT_U16 : VOPC_Real_gfx11<0x039>;
+defm V_CMP_EQ_U16 : VOPC_Real_gfx11<0x03a>;
+defm V_CMP_LE_U16 : VOPC_Real_gfx11<0x03b>;
+defm V_CMP_GT_U16 : VOPC_Real_gfx11<0x03c>;
+defm V_CMP_NE_U16 : VOPC_Real_gfx11<0x03d>;
+defm V_CMP_GE_U16 : VOPC_Real_gfx11<0x03e>;
+defm V_CMP_F_I32 : VOPC_Real_gfx11<0x040>;
+defm V_CMP_LT_I32 : VOPC_Real_gfx11<0x041>;
+defm V_CMP_EQ_I32 : VOPC_Real_gfx11<0x042>;
+defm V_CMP_LE_I32 : VOPC_Real_gfx11<0x043>;
+defm V_CMP_GT_I32 : VOPC_Real_gfx11<0x044>;
+defm V_CMP_NE_I32 : VOPC_Real_gfx11<0x045>;
+defm V_CMP_GE_I32 : VOPC_Real_gfx11<0x046>;
+defm V_CMP_T_I32 : VOPC_Real_gfx11<0x047>;
+defm V_CMP_F_U32 : VOPC_Real_gfx11<0x048>;
+defm V_CMP_LT_U32 : VOPC_Real_gfx11<0x049>;
+defm V_CMP_EQ_U32 : VOPC_Real_gfx11<0x04a>;
+defm V_CMP_LE_U32 : VOPC_Real_gfx11<0x04b>;
+defm V_CMP_GT_U32 : VOPC_Real_gfx11<0x04c>;
+defm V_CMP_NE_U32 : VOPC_Real_gfx11<0x04d>;
+defm V_CMP_GE_U32 : VOPC_Real_gfx11<0x04e>;
+defm V_CMP_T_U32 : VOPC_Real_gfx11<0x04f>;
+
+defm V_CMP_F_I64 : VOPC_Real_gfx11<0x050>;
+defm V_CMP_LT_I64 : VOPC_Real_gfx11<0x051>;
+defm V_CMP_EQ_I64 : VOPC_Real_gfx11<0x052>;
+defm V_CMP_LE_I64 : VOPC_Real_gfx11<0x053>;
+defm V_CMP_GT_I64 : VOPC_Real_gfx11<0x054>;
+defm V_CMP_NE_I64 : VOPC_Real_gfx11<0x055>;
+defm V_CMP_GE_I64 : VOPC_Real_gfx11<0x056>;
+defm V_CMP_T_I64 : VOPC_Real_gfx11<0x057>;
+defm V_CMP_F_U64 : VOPC_Real_gfx11<0x058>;
+defm V_CMP_LT_U64 : VOPC_Real_gfx11<0x059>;
+defm V_CMP_EQ_U64 : VOPC_Real_gfx11<0x05a>;
+defm V_CMP_LE_U64 : VOPC_Real_gfx11<0x05b>;
+defm V_CMP_GT_U64 : VOPC_Real_gfx11<0x05c>;
+defm V_CMP_NE_U64 : VOPC_Real_gfx11<0x05d>;
+defm V_CMP_GE_U64 : VOPC_Real_gfx11<0x05e>;
+defm V_CMP_T_U64 : VOPC_Real_gfx11<0x05f>;
+
+defm V_CMP_CLASS_F16 : VOPC_Real_gfx11<0x07d>;
+defm V_CMP_CLASS_F32 : VOPC_Real_gfx11<0x07e>;
+defm V_CMP_CLASS_F64 : VOPC_Real_gfx11<0x07f>;
+
+defm V_CMPX_F_F16 : VOPCX_Real_gfx11<0x080>;
+defm V_CMPX_LT_F16 : VOPCX_Real_gfx11<0x081>;
+defm V_CMPX_EQ_F16 : VOPCX_Real_gfx11<0x082>;
+defm V_CMPX_LE_F16 : VOPCX_Real_gfx11<0x083>;
+defm V_CMPX_GT_F16 : VOPCX_Real_gfx11<0x084>;
+defm V_CMPX_LG_F16 : VOPCX_Real_gfx11<0x085>;
+defm V_CMPX_GE_F16 : VOPCX_Real_gfx11<0x086>;
+defm V_CMPX_O_F16 : VOPCX_Real_gfx11<0x087>;
+defm V_CMPX_U_F16 : VOPCX_Real_gfx11<0x088>;
+defm V_CMPX_NGE_F16 : VOPCX_Real_gfx11<0x089>;
+defm V_CMPX_NLG_F16 : VOPCX_Real_gfx11<0x08a>;
+defm V_CMPX_NGT_F16 : VOPCX_Real_gfx11<0x08b>;
+defm V_CMPX_NLE_F16 : VOPCX_Real_gfx11<0x08c>;
+defm V_CMPX_NEQ_F16 : VOPCX_Real_gfx11<0x08d>;
+defm V_CMPX_NLT_F16 : VOPCX_Real_gfx11<0x08e>;
+defm V_CMPX_T_F16 : VOPCX_Real_with_name_gfx11<0x08f, "V_CMPX_TRU_F16", "v_cmpx_t_f16">;
+defm V_CMPX_F_F32 : VOPCX_Real_gfx11<0x090>;
+defm V_CMPX_LT_F32 : VOPCX_Real_gfx11<0x091>;
+defm V_CMPX_EQ_F32 : VOPCX_Real_gfx11<0x092>;
+defm V_CMPX_LE_F32 : VOPCX_Real_gfx11<0x093>;
+defm V_CMPX_GT_F32 : VOPCX_Real_gfx11<0x094>;
+defm V_CMPX_LG_F32 : VOPCX_Real_gfx11<0x095>;
+defm V_CMPX_GE_F32 : VOPCX_Real_gfx11<0x096>;
+defm V_CMPX_O_F32 : VOPCX_Real_gfx11<0x097>;
+defm V_CMPX_U_F32 : VOPCX_Real_gfx11<0x098>;
+defm V_CMPX_NGE_F32 : VOPCX_Real_gfx11<0x099>;
+defm V_CMPX_NLG_F32 : VOPCX_Real_gfx11<0x09a>;
+defm V_CMPX_NGT_F32 : VOPCX_Real_gfx11<0x09b>;
+defm V_CMPX_NLE_F32 : VOPCX_Real_gfx11<0x09c>;
+defm V_CMPX_NEQ_F32 : VOPCX_Real_gfx11<0x09d>;
+defm V_CMPX_NLT_F32 : VOPCX_Real_gfx11<0x09e>;
+defm V_CMPX_T_F32 : VOPCX_Real_with_name_gfx11<0x09f, "V_CMPX_TRU_F32", "v_cmpx_t_f32">;
+
+defm V_CMPX_F_F64 : VOPCX_Real_gfx11<0x0a0>;
+defm V_CMPX_LT_F64 : VOPCX_Real_gfx11<0x0a1>;
+defm V_CMPX_EQ_F64 : VOPCX_Real_gfx11<0x0a2>;
+defm V_CMPX_LE_F64 : VOPCX_Real_gfx11<0x0a3>;
+defm V_CMPX_GT_F64 : VOPCX_Real_gfx11<0x0a4>;
+defm V_CMPX_LG_F64 : VOPCX_Real_gfx11<0x0a5>;
+defm V_CMPX_GE_F64 : VOPCX_Real_gfx11<0x0a6>;
+defm V_CMPX_O_F64 : VOPCX_Real_gfx11<0x0a7>;
+defm V_CMPX_U_F64 : VOPCX_Real_gfx11<0x0a8>;
+defm V_CMPX_NGE_F64 : VOPCX_Real_gfx11<0x0a9>;
+defm V_CMPX_NLG_F64 : VOPCX_Real_gfx11<0x0aa>;
+defm V_CMPX_NGT_F64 : VOPCX_Real_gfx11<0x0ab>;
+defm V_CMPX_NLE_F64 : VOPCX_Real_gfx11<0x0ac>;
+defm V_CMPX_NEQ_F64 : VOPCX_Real_gfx11<0x0ad>;
+defm V_CMPX_NLT_F64 : VOPCX_Real_gfx11<0x0ae>;
+defm V_CMPX_T_F64 : VOPCX_Real_with_name_gfx11<0x0af, "V_CMPX_TRU_F64", "v_cmpx_t_f64">;
+
+defm V_CMPX_LT_I16 : VOPCX_Real_gfx11<0x0b1>;
+defm V_CMPX_EQ_I16 : VOPCX_Real_gfx11<0x0b2>;
+defm V_CMPX_LE_I16 : VOPCX_Real_gfx11<0x0b3>;
+defm V_CMPX_GT_I16 : VOPCX_Real_gfx11<0x0b4>;
+defm V_CMPX_NE_I16 : VOPCX_Real_gfx11<0x0b5>;
+defm V_CMPX_GE_I16 : VOPCX_Real_gfx11<0x0b6>;
+defm V_CMPX_LT_U16 : VOPCX_Real_gfx11<0x0b9>;
+defm V_CMPX_EQ_U16 : VOPCX_Real_gfx11<0x0ba>;
+defm V_CMPX_LE_U16 : VOPCX_Real_gfx11<0x0bb>;
+defm V_CMPX_GT_U16 : VOPCX_Real_gfx11<0x0bc>;
+defm V_CMPX_NE_U16 : VOPCX_Real_gfx11<0x0bd>;
+defm V_CMPX_GE_U16 : VOPCX_Real_gfx11<0x0be>;
+defm V_CMPX_F_I32 : VOPCX_Real_gfx11<0x0c0>;
+defm V_CMPX_LT_I32 : VOPCX_Real_gfx11<0x0c1>;
+defm V_CMPX_EQ_I32 : VOPCX_Real_gfx11<0x0c2>;
+defm V_CMPX_LE_I32 : VOPCX_Real_gfx11<0x0c3>;
+defm V_CMPX_GT_I32 : VOPCX_Real_gfx11<0x0c4>;
+defm V_CMPX_NE_I32 : VOPCX_Real_gfx11<0x0c5>;
+defm V_CMPX_GE_I32 : VOPCX_Real_gfx11<0x0c6>;
+defm V_CMPX_T_I32 : VOPCX_Real_gfx11<0x0c7>;
+defm V_CMPX_F_U32 : VOPCX_Real_gfx11<0x0c8>;
+defm V_CMPX_LT_U32 : VOPCX_Real_gfx11<0x0c9>;
+defm V_CMPX_EQ_U32 : VOPCX_Real_gfx11<0x0ca>;
+defm V_CMPX_LE_U32 : VOPCX_Real_gfx11<0x0cb>;
+defm V_CMPX_GT_U32 : VOPCX_Real_gfx11<0x0cc>;
+defm V_CMPX_NE_U32 : VOPCX_Real_gfx11<0x0cd>;
+defm V_CMPX_GE_U32 : VOPCX_Real_gfx11<0x0ce>;
+defm V_CMPX_T_U32 : VOPCX_Real_gfx11<0x0cf>;
+
+defm V_CMPX_F_I64 : VOPCX_Real_gfx11<0x0d0>;
+defm V_CMPX_LT_I64 : VOPCX_Real_gfx11<0x0d1>;
+defm V_CMPX_EQ_I64 : VOPCX_Real_gfx11<0x0d2>;
+defm V_CMPX_LE_I64 : VOPCX_Real_gfx11<0x0d3>;
+defm V_CMPX_GT_I64 : VOPCX_Real_gfx11<0x0d4>;
+defm V_CMPX_NE_I64 : VOPCX_Real_gfx11<0x0d5>;
+defm V_CMPX_GE_I64 : VOPCX_Real_gfx11<0x0d6>;
+defm V_CMPX_T_I64 : VOPCX_Real_gfx11<0x0d7>;
+defm V_CMPX_F_U64 : VOPCX_Real_gfx11<0x0d8>;
+defm V_CMPX_LT_U64 : VOPCX_Real_gfx11<0x0d9>;
+defm V_CMPX_EQ_U64 : VOPCX_Real_gfx11<0x0da>;
+defm V_CMPX_LE_U64 : VOPCX_Real_gfx11<0x0db>;
+defm V_CMPX_GT_U64 : VOPCX_Real_gfx11<0x0dc>;
+defm V_CMPX_NE_U64 : VOPCX_Real_gfx11<0x0dd>;
+defm V_CMPX_GE_U64 : VOPCX_Real_gfx11<0x0de>;
+defm V_CMPX_T_U64 : VOPCX_Real_gfx11<0x0df>;
+defm V_CMPX_CLASS_F16 : VOPCX_Real_gfx11<0x0fd>;
+defm V_CMPX_CLASS_F32 : VOPCX_Real_gfx11<0x0fe>;
+defm V_CMPX_CLASS_F64 : VOPCX_Real_gfx11<0x0ff>;
+
+//===----------------------------------------------------------------------===//
// GFX10.
//===----------------------------------------------------------------------===//
-let AssemblerPredicate = isGFX10Plus in {
+let AssemblerPredicate = isGFX10Only in {
multiclass VOPC_Real_gfx10<bits<9> op> {
let DecoderNamespace = "GFX10" in {
def _e32_gfx10 :
@@ -931,7 +1722,7 @@ let AssemblerPredicate = isGFX10Plus in {
defm : VOPCXInstAliases<NAME, "gfx10">;
}
-} // End AssemblerPredicate = isGFX10Plus
+} // End AssemblerPredicate = isGFX10Only
defm V_CMP_LT_I16 : VOPC_Real_gfx10<0x089>;
defm V_CMP_EQ_I16 : VOPC_Real_gfx10<0x08a>;
@@ -1025,6 +1816,12 @@ multiclass VOPCX_Real_gfx6_gfx7<bits<9> op> :
multiclass VOPCX_Real_gfx6_gfx7_gfx10 <bits<9> op> :
VOPC_Real_gfx6_gfx7<op>, VOPCX_Real_gfx10<op>;
+multiclass VOPC_Real_gfx6_gfx7_gfx10_gfx11<bits<9> op> :
+ VOPC_Real_gfx6_gfx7_gfx10<op>, VOPC_Real_gfx11<op>;
+
+multiclass VOPCX_Real_gfx6_gfx7_gfx10_gfx11<bits<9> op> :
+ VOPCX_Real_gfx6_gfx7_gfx10<op>, VOPCX_Real_gfx11<op>;
+
defm V_CMP_F_F32 : VOPC_Real_gfx6_gfx7_gfx10<0x000>;
defm V_CMP_LT_F32 : VOPC_Real_gfx6_gfx7_gfx10<0x001>;
defm V_CMP_EQ_F32 : VOPC_Real_gfx6_gfx7_gfx10<0x002>;
@@ -1057,21 +1854,21 @@ defm V_CMPX_NLE_F32 : VOPCX_Real_gfx6_gfx7_gfx10<0x01c>;
defm V_CMPX_NEQ_F32 : VOPCX_Real_gfx6_gfx7_gfx10<0x01d>;
defm V_CMPX_NLT_F32 : VOPCX_Real_gfx6_gfx7_gfx10<0x01e>;
defm V_CMPX_TRU_F32 : VOPCX_Real_gfx6_gfx7_gfx10<0x01f>;
-defm V_CMP_F_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x020>;
-defm V_CMP_LT_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x021>;
-defm V_CMP_EQ_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x022>;
-defm V_CMP_LE_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x023>;
-defm V_CMP_GT_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x024>;
-defm V_CMP_LG_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x025>;
-defm V_CMP_GE_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x026>;
-defm V_CMP_O_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x027>;
-defm V_CMP_U_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x028>;
-defm V_CMP_NGE_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x029>;
-defm V_CMP_NLG_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x02a>;
-defm V_CMP_NGT_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x02b>;
-defm V_CMP_NLE_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x02c>;
-defm V_CMP_NEQ_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x02d>;
-defm V_CMP_NLT_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x02e>;
+defm V_CMP_F_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x020>;
+defm V_CMP_LT_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x021>;
+defm V_CMP_EQ_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x022>;
+defm V_CMP_LE_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x023>;
+defm V_CMP_GT_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x024>;
+defm V_CMP_LG_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x025>;
+defm V_CMP_GE_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x026>;
+defm V_CMP_O_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x027>;
+defm V_CMP_U_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x028>;
+defm V_CMP_NGE_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x029>;
+defm V_CMP_NLG_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x02a>;
+defm V_CMP_NGT_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x02b>;
+defm V_CMP_NLE_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x02c>;
+defm V_CMP_NEQ_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x02d>;
+defm V_CMP_NLT_F64 : VOPC_Real_gfx6_gfx7_gfx10_gfx11<0x02e>;
defm V_CMP_TRU_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x02f>;
defm V_CMPX_F_F64 : VOPCX_Real_gfx6_gfx7_gfx10<0x030>;
defm V_CMPX_LT_F64 : VOPCX_Real_gfx6_gfx7_gfx10<0x031>;
diff --git a/llvm/lib/Target/AMDGPU/VOPDInstructions.td b/llvm/lib/Target/AMDGPU/VOPDInstructions.td
new file mode 100644
index 000000000000..420f18436095
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/VOPDInstructions.td
@@ -0,0 +1,159 @@
+//===-- VOPDInstructions.td - Vector Instruction Definitions --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Encodings
+//===----------------------------------------------------------------------===//
+
+class VOPDe<bits<4> opX, bits<5> opY> : Enc64 {
+ bits<9> src0X;
+ bits<8> vsrc1X;
+ bits<8> vdstX;
+ bits<9> src0Y;
+ bits<8> vsrc1Y;
+ bits<8> vdstY;
+
+ let Inst{8-0} = src0X;
+ let Inst{16-9} = vsrc1X;
+ let Inst{21-17} = opY;
+ let Inst{25-22} = opX;
+ let Inst{31-26} = 0x32; // encoding
+ let Inst{40-32} = src0Y;
+ let Inst{48-41} = vsrc1Y;
+ let Inst{55-49} = vdstY{7-1};
+ let Inst{63-56} = vdstX;
+}
+
+class VOPD_MADKe<bits<4> opX, bits<5> opY> : Enc96 {
+ bits<9> src0X;
+ bits<8> vsrc1X;
+ bits<8> vdstX;
+ bits<9> src0Y;
+ bits<8> vsrc1Y;
+ bits<8> vdstY;
+ bits<32> imm;
+
+ let Inst{8-0} = src0X;
+ let Inst{16-9} = vsrc1X;
+ let Inst{21-17} = opY;
+ let Inst{25-22} = opX;
+ let Inst{31-26} = 0x32; // encoding
+ let Inst{40-32} = src0Y;
+ let Inst{48-41} = vsrc1Y;
+ let Inst{55-49} = vdstY{7-1};
+ let Inst{63-56} = vdstX;
+ let Inst{95-64} = imm;
+}
+
+//===----------------------------------------------------------------------===//
+// VOPD classes
+//===----------------------------------------------------------------------===//
+
+class VOPD_Base<dag outs, dag ins, string asm, VOP_Pseudo VDX, VOP_Pseudo VDY,
+ VOPD_Component XasVC, VOPD_Component YasVC>
+ : VOPAnyCommon<outs, ins, asm, []>,
+ VOP<NAME>,
+ SIMCInstr<NAME, SIEncodingFamily.GFX11> {
+ // Fields for table indexing
+ Instruction Opcode = !cast<Instruction>(NAME);
+ bits<5> OpX = XasVC.VOPDOp;
+ bits<5> OpY = YasVC.VOPDOp;
+
+ let VALU = 1;
+
+ let DecoderNamespace = "GFX11";
+ let AssemblerPredicate = isGFX11Plus;
+ let WaveSizePredicate = isWave32;
+ let isCodeGenOnly = 0;
+ let SubtargetPredicate = isGFX11Plus;
+ let AsmMatchConverter = "cvtVOPD";
+ let Size = 8;
+ let ReadsModeReg = !or(VDX.ReadsModeReg, VDY.ReadsModeReg);
+ let mayRaiseFPException = ReadsModeReg;
+
+ let Uses = RegListUnion<VDX.Uses, VDY.Uses>.ret;
+ let Defs = RegListUnion<VDX.Defs, VDY.Defs>.ret;
+ let SchedRW = !listconcat(VDX.SchedRW, VDY.SchedRW);
+}
+
+class VOPD<dag outs, dag ins, string asm, VOP_Pseudo VDX, VOP_Pseudo VDY,
+ VOPD_Component XasVC, VOPD_Component YasVC>
+ : VOPD_Base<outs, ins, asm, VDX, VDY, XasVC, YasVC>,
+ VOPDe<XasVC.VOPDOp{3-0}, YasVC.VOPDOp> {
+ let Inst{16-9} = !if (!eq(VDX.Mnemonic, "v_mov_b32"), 0x0, vsrc1X);
+ let Inst{48-41} = !if (!eq(VDY.Mnemonic, "v_mov_b32"), 0x0, vsrc1Y);
+}
+
+class VOPD_MADK<dag outs, dag ins, string asm, VOP_Pseudo VDX, VOP_Pseudo VDY,
+ VOPD_Component XasVC, VOPD_Component YasVC>
+ : VOPD_Base<outs, ins, asm, VDX, VDY, XasVC, YasVC>,
+ VOPD_MADKe<XasVC.VOPDOp{3-0}, YasVC.VOPDOp> {
+ let Inst{16-9} = !if (!eq(VDX.Mnemonic, "v_mov_b32"), 0x0, vsrc1X);
+ let Inst{48-41} = !if (!eq(VDY.Mnemonic, "v_mov_b32"), 0x0, vsrc1Y);
+ let Size = 12;
+}
+
+// V_DUAL_DOT2ACC_F32_BF16 is a legal instruction, but V_DOT2ACC_F32_BF16 is
+// not. Since we generate the DUAL form by converting from the normal form we
+// will never generate it.
+defvar VOPDYPseudos = [
+ "V_FMAC_F32_e32", "V_FMAAK_F32", "V_FMAMK_F32", "V_MUL_F32_e32",
+ "V_ADD_F32_e32", "V_SUB_F32_e32", "V_SUBREV_F32_e32", "V_MUL_LEGACY_F32_e32",
+ "V_MOV_B32_e32", "V_CNDMASK_B32_e32", "V_MAX_F32_e32", "V_MIN_F32_e32",
+ "V_DOT2C_F32_F16_e32", "V_ADD_U32_e32", "V_LSHLREV_B32_e32", "V_AND_B32_e32"
+];
+defvar VOPDXPseudos = VOPDYPseudos[0...VOPDX_Max_Index];
+
+def VOPDDstYOperand : RegisterOperand<VGPR_32, "printRegularOperand"> {
+ let DecoderMethod = "decodeOperandVOPDDstY";
+}
+
+foreach x = VOPDXPseudos in {
+ foreach y = VOPDYPseudos in {
+ defvar xInst = !cast<VOP_Pseudo>(x);
+ defvar yInst = !cast<VOP_Pseudo>(y);
+ defvar XasVC = !cast<VOPD_Component>(x);
+ defvar YasVC = !cast<VOPD_Component>(y);
+ defvar isMADK = !or(!eq(x, "V_FMAAK_F32"), !eq(x, "V_FMAMK_F32"),
+ !eq(y, "V_FMAAK_F32"), !eq(y, "V_FMAMK_F32"));
+ // If X or Y is MADK (have a mandatory immediate), all src operands which
+ // may contain an optional literal must use the VSrc_*_Deferred operand
+ // type. Optional literal operands in MADK VOPD components always use this
+ // operand form. If Both X and Y are MADK, the mandatory literal of X
+ // additionally must use an alternate operand format which defers to the
+ // 'real' Y literal
+ defvar isOpXMADK = !or(!eq(x, "V_FMAAK_F32"), !eq(x, "V_FMAMK_F32"));
+ defvar isOpYMADK = !or(!eq(y, "V_FMAAK_F32"), !eq(y, "V_FMAMK_F32"));
+ defvar OpName = "V_DUAL_" # !substr(x,2) # "_X_" # !substr(y,2);
+ defvar outs = (outs VGPRSrc_32:$vdstX, VOPDDstYOperand:$vdstY);
+ if !or(isOpXMADK, isOpYMADK) then {
+ if !and(isOpXMADK, isOpYMADK) then {
+ defvar X_MADK_Pfl = !cast<VOP_MADK_Base>(xInst.Pfl);
+ defvar ins = !con(xInst.Pfl.InsVOPDXDeferred, yInst.Pfl.InsVOPDY);
+ defvar asm = XasVC.VOPDName #" "# X_MADK_Pfl.AsmVOPDXDeferred #" :: "# YasVC.VOPDName #" "# yInst.Pfl.AsmVOPDY;
+ def OpName : VOPD_MADK<outs, ins, asm, xInst, yInst, XasVC, YasVC>;
+ } else {
+ defvar asm = XasVC.VOPDName #" "# xInst.Pfl.AsmVOPDX #" :: "# YasVC.VOPDName #" "# yInst.Pfl.AsmVOPDY;
+ if isOpXMADK then {
+ assert !not(isOpYMADK), "Expected only OpX as MADK";
+ defvar ins = !con(xInst.Pfl.InsVOPDX, yInst.Pfl.InsVOPDYDeferred);
+ def OpName : VOPD_MADK<outs, ins, asm, xInst, yInst, XasVC, YasVC>;
+ } else {
+ assert !not(isOpXMADK), "Expected only OpY as MADK";
+ defvar ins = !con(xInst.Pfl.InsVOPDXDeferred, yInst.Pfl.InsVOPDY);
+ def OpName : VOPD_MADK<outs, ins, asm, xInst, yInst, XasVC, YasVC>;
+ }
+ }
+ } else {
+ defvar ins = !con(xInst.Pfl.InsVOPDX, yInst.Pfl.InsVOPDY);
+ defvar asm = XasVC.VOPDName #" "# xInst.Pfl.AsmVOPDX #" :: "# YasVC.VOPDName #" "# yInst.Pfl.AsmVOPDY;
+ def OpName : VOPD<outs, ins, asm, xInst, yInst, XasVC, YasVC>;
+ }
+ }
+}
+
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index a8368892c565..8cd3d2fe2c47 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -30,6 +30,16 @@ class VOP <string opName> {
string OpName = opName;
}
+// First 13 insts from VOPDY are also VOPDX. DOT2ACC_F32_BF16 is omitted
+defvar VOPDX_Max_Index = 12;
+
+class VOPD_Component<bits<5> OpIn, string vOPDName> {
+ Instruction BaseVOP = !cast<Instruction>(NAME);
+ string VOPDName = "v_dual_" # !substr(vOPDName, 2);
+ bits<5> VOPDOp = OpIn;
+ bit CanBeVOPDX = !le(VOPDOp, VOPDX_Max_Index);
+}
+
class VOPAnyCommon <dag outs, dag ins, string asm, list<dag> pattern> :
InstSI <outs, ins, asm, pattern> {
@@ -92,6 +102,7 @@ class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern = [],
let VOP3_OPSEL = isVop3OpSel;
let IsPacked = P.IsPacked;
let IsMAI = P.IsMAI;
+ let IsWMMA = P.IsWMMA;
let AsmOperands = !if(isVop3OpSel,
P.AsmVOP3OpSel,
@@ -144,9 +155,9 @@ class VOP_Real<VOP_Pseudo ps> {
bit IsSingle = ps.Pfl.IsSingle;
}
-class VOP3_Real <VOP_Pseudo ps, int EncodingFamily> :
+class VOP3_Real <VOP_Pseudo ps, int EncodingFamily, string asm_name = ps.Mnemonic> :
VOP_Real <ps>,
- InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
+ InstSI <ps.OutOperandList, ps.InOperandList, asm_name # ps.AsmOperands, []>,
SIMCInstr <ps.PseudoInstr, EncodingFamily> {
let VALU = 1;
@@ -155,9 +166,6 @@ class VOP3_Real <VOP_Pseudo ps, int EncodingFamily> :
let isCodeGenOnly = 0;
let UseNamedOperandTable = 1;
- let Constraints = ps.Constraints;
- let DisableEncoding = ps.DisableEncoding;
-
// copy relevant pseudo op flags
let SubtargetPredicate = ps.SubtargetPredicate;
let OtherPredicates = ps.OtherPredicates;
@@ -179,8 +187,12 @@ class VOP3_Real <VOP_Pseudo ps, int EncodingFamily> :
// XXX - Is there any reason to distinguish this from regular VOP3
// here?
-class VOP3P_Real<VOP_Pseudo ps, int EncodingFamily> :
- VOP3_Real<ps, EncodingFamily>;
+class VOP3P_Real<VOP_Pseudo ps, int EncodingFamily, string asm_name = ps.Mnemonic> :
+ VOP3_Real<ps, EncodingFamily, asm_name> {
+
+ // The v_wmma pseudos have extra constraints that we do not want to impose on the real instruction.
+ let Constraints = !if(!eq(!substr(ps.Mnemonic,0,6), "v_wmma"), "", ps.Constraints);
+}
class VOP3a<VOPProfile P> : Enc64 {
bits<4> src0_modifiers;
@@ -217,6 +229,8 @@ class VOP3a_gfx10<bits<10> op, VOPProfile p> : VOP3a<p> {
let Inst{31-26} = 0x35;
}
+class VOP3a_gfx11<bits<10> op, VOPProfile p> : VOP3a_gfx10<op, p>;
+
class VOP3a_vi <bits<10> op, VOPProfile P> : VOP3a<P> {
let Inst{25-16} = op;
let Inst{15} = !if(P.HasClamp, clamp{0}, 0);
@@ -232,6 +246,8 @@ class VOP3e_gfx10<bits<10> op, VOPProfile p> : VOP3a_gfx10<op, p> {
let Inst{7-0} = !if(p.EmitDst, vdst{7-0}, 0);
}
+class VOP3e_gfx11<bits<10> op, VOPProfile p> : VOP3e_gfx10<op, p>;
+
class VOP3e_vi <bits<10> op, VOPProfile P> : VOP3a_vi <op, P> {
bits<8> vdst;
let Inst{7-0} = !if(P.EmitDst, vdst{7-0}, 0);
@@ -251,6 +267,9 @@ class VOP3OpSel_gfx10<bits<10> op, VOPProfile p> : VOP3e_gfx10<op, p> {
let Inst{14} = !if(p.HasDst, src0_modifiers{3}, 0);
}
+class VOP3OpSel_gfx11<bits<10> op, VOPProfile p> : VOP3OpSel_gfx10<op, p>;
+
+
// NB: For V_INTERP* opcodes, src0 is encoded as src1 and vice versa
class VOP3Interp_vi <bits<10> op, VOPProfile P> : VOP3e_vi <op, P> {
bits<2> attrchan;
@@ -285,6 +304,8 @@ class VOP3Interp_gfx10<bits<10> op, VOPProfile p> : VOP3e_gfx10<op, p> {
let Inst{62} = !if(p.HasSrc0Mods, src0_modifiers{0}, 0);
}
+class VOP3Interp_gfx11<bits<10> op, VOPProfile p> : VOP3Interp_gfx10<op, p>;
+
class VOP3be <VOPProfile P> : Enc64 {
bits<8> vdst;
bits<2> src0_modifiers;
@@ -310,7 +331,6 @@ class VOP3be <VOPProfile P> : Enc64 {
class VOP3Pe <bits<7> op, VOPProfile P> : Enc64 {
bits<8> vdst;
- // neg, neg_hi, op_sel put in srcN_modifiers
bits<4> src0_modifiers;
bits<9> src0;
bits<4> src1_modifiers;
@@ -372,11 +392,42 @@ class VOP3Pe_MAI <bits<7> op, VOPProfile P, bit acc_cd = 0> : Enc64 {
let Inst{63-61} = !if(P.HasSrc1, blgp, 0);
}
+class VOP3Pe_SMFMAC <bits<7> op> : Enc64 {
+ bits<10> vdst; // VGPR or AGPR, but not SGPR. vdst{8} is not encoded in the instruction.
+ bits<10> src0;
+ bits<10> src1;
+ bits<9> idx;
+ bits<3> blgp;
+ bits<3> cbsz;
+ bits<4> abid;
+
+ let blgp = 0;
+
+ let Inst{7-0} = vdst{7-0};
+
+ let Inst{10-8} = cbsz;
+ let Inst{14-11} = abid;
+
+ let Inst{15} = vdst{9}; // acc(vdst)
+
+ let Inst{22-16} = op;
+ let Inst{31-23} = 0x1a7; // encoding
+ let Inst{40-32} = src0{8-0};
+ let Inst{49-41} = src1{8-0};
+ let Inst{58-50} = idx;
+
+ let Inst{59} = src0{9}; // acc(0)
+ let Inst{60} = src1{9}; // acc(1)
+
+ let Inst{63-61} = blgp;
+}
class VOP3Pe_gfx10 <bits<7> op, VOPProfile P> : VOP3Pe<op, P> {
let Inst{31-23} = 0x198; //encoding
}
+class VOP3Pe_gfx11<bits<7> op, VOPProfile P> : VOP3Pe_gfx10<op, P>;
+
class VOP3be_gfx6_gfx7<bits<9> op, VOPProfile p> : VOP3be<p> {
let Inst{25-17} = op;
}
@@ -388,6 +439,8 @@ class VOP3be_gfx10<bits<10> op, VOPProfile p> : VOP3be<p> {
let Inst{31-26} = 0x35;
}
+class VOP3be_gfx11<bits<10> op, VOPProfile p> : VOP3be_gfx10<op, p>;
+
class VOP3be_vi <bits<10> op, VOPProfile P> : VOP3be<P> {
bits<1> clamp;
let Inst{25-16} = op;
@@ -621,8 +674,89 @@ class VOP_DPPe<VOPProfile P, bit IsDPP16=0> : Enc64 {
let Inst{63-60} = row_mask;
}
-class VOP_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
- InstSI <P.OutsDPP, P.InsDPP, OpName#P.AsmDPP, pattern>,
+class VOP3_DPPe_Fields_Base {
+ bits<9> dpp_ctrl;
+ bits<1> bound_ctrl;
+ bits<4> bank_mask;
+ bits<4> row_mask;
+ bit fi;
+}
+class VOP3_DPPe_Fields : VOP3_DPPe_Fields_Base {
+ bits<8> src0;
+}
+
+// Common refers to common between DPP and DPP8
+class VOP3_DPPe_Common_Base<bits<10> op, VOPProfile P> : Enc96 {
+ bits<4> src0_modifiers;
+ bits<3> src1_modifiers;
+ bits<3> src2_modifiers;
+ bits<1> clamp;
+ bits<2> omod;
+
+ let Inst{8} = !if(P.HasSrc0Mods, src0_modifiers{1}, 0);
+ let Inst{9} = !if(P.HasSrc1Mods, src1_modifiers{1}, 0);
+ let Inst{10} = !if(P.HasSrc2Mods, src2_modifiers{1}, 0);
+ // OPSEL must be set such that the low result only uses low inputs, and the high result only uses high inputs.
+ let Inst{11} = !if(P.HasOpSel,!if(P.HasSrc0Mods, src0_modifiers{2}, 0),?);
+ let Inst{12} = !if(P.HasOpSel,!if(P.HasSrc1Mods, src1_modifiers{2}, 0),?);
+ let Inst{13} = !if(P.HasOpSel,!if(P.HasSrc2Mods, src2_modifiers{2}, 0),?);
+ let Inst{14} = !if(P.HasOpSel,!if(P.HasSrc0Mods, src0_modifiers{3}, 0),?);
+ let Inst{15} = !if(P.HasClamp, clamp, 0);
+ let Inst{25-16} = op;
+ let Inst{31-26} = 0x35;
+
+ let Inst{60-59} = !if(P.HasOMod, omod, 0);
+ let Inst{61} = !if(P.HasSrc0Mods, src0_modifiers{0}, 0);
+ let Inst{62} = !if(P.HasSrc1Mods, src1_modifiers{0}, 0);
+ let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0);
+}
+
+class VOP3_DPPe_Common<bits<10> op, VOPProfile P> : VOP3_DPPe_Common_Base<op, P> {
+ bits<8> vdst;
+ bits<9> src1;
+ bits<9> src2;
+
+ let Inst{7-0} = !if(P.EmitDst, vdst{7-0}, 0);
+ let Inst{49-41} = !if(P.HasSrc1, src1, 0);
+ let Inst{58-50} = !if(P.HasSrc2, src2, 0);
+}
+
+class VOP3P_DPPe_Common_Base<bits<7> op, VOPProfile P> : Enc96 {
+ bits<4> src0_modifiers;
+ bits<4> src1_modifiers;
+ bits<4> src2_modifiers;
+ bits<1> clamp;
+
+ let Inst{8} = !if(P.HasSrc0Mods, src0_modifiers{1}, 0); // neg_hi src0
+ let Inst{9} = !if(P.HasSrc1Mods, src1_modifiers{1}, 0); // neg_hi src1
+ let Inst{10} = !if(P.HasSrc2Mods, src2_modifiers{1}, 0); // neg_hi src2
+ let Inst{11} = !if(!and(P.HasSrc0, P.HasOpSel), src0_modifiers{2}, 0); // op_sel(0)
+ let Inst{12} = !if(!and(P.HasSrc1, P.HasOpSel), src1_modifiers{2}, 0); // op_sel(1)
+ let Inst{13} = !if(!and(P.HasSrc2, P.HasOpSel), src2_modifiers{2}, 0); // op_sel(2)
+ let Inst{14} = !if(!and(P.HasSrc2, P.HasOpSel), src2_modifiers{3}, ?); // op_sel_hi(2)
+ let Inst{15} = !if(P.HasClamp, clamp{0}, 0);
+ let Inst{22-16} = op;
+ let Inst{31-23} = 0x198; // encoding
+ let Inst{59} = !if(!and(P.HasSrc0, P.HasOpSel), src0_modifiers{3}, ?); // op_sel_hi(0)
+ let Inst{60} = !if(!and(P.HasSrc1, P.HasOpSel), src1_modifiers{3}, ?); // op_sel_hi(1)
+ let Inst{61} = !if(P.HasSrc0Mods, src0_modifiers{0}, 0); // neg (lo)
+ let Inst{62} = !if(P.HasSrc1Mods, src1_modifiers{0}, 0); // neg (lo)
+ let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0); // neg (lo)
+}
+
+class VOP3P_DPPe_Common<bits<7> op, VOPProfile P> : VOP3P_DPPe_Common_Base<op, P> {
+ bits<8> vdst;
+ bits<9> src1;
+ bits<9> src2;
+
+ let Inst{7-0} = vdst;
+ let Inst{49-41} = !if(P.HasSrc1, src1, 0);
+ let Inst{58-50} = !if(P.HasSrc2, src2, 0);
+}
+
+class VOP_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[],
+ dag Ins = P.InsDPP, string asmOps = P.AsmDPP> :
+ InstSI <P.OutsDPP, Ins, OpName#asmOps, pattern>,
VOP <OpName>,
SIMCInstr <OpName#"_dpp", SIEncodingFamily.NONE> {
@@ -645,7 +779,7 @@ class VOP_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
let isConvergent = 1;
string Mnemonic = OpName;
- string AsmOperands = P.AsmDPP;
+ string AsmOperands = asmOps;
let AsmMatchConverter = !if(P.HasModifiers, "cvtDPP", "");
let SubtargetPredicate = !if(P.HasExt64BitDPP, Has64BitDPP, HasDPP);
@@ -659,6 +793,17 @@ class VOP_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
VOPProfile Pfl = P;
}
+class VOP3_DPP_Pseudo <string OpName, VOPProfile P> :
+ VOP_DPP_Pseudo <OpName, P, [], P.InsVOP3DPP, P.AsmVOP3DPP> {
+ let PseudoInstr = OpName#"_e64"#"_dpp";
+ let OutOperandList = P.OutsVOP3DPP;
+ let Size = 12;
+ let VOP3 = 1;
+ let AsmMatchConverter = "cvtVOP3DPP";
+ let AsmVariantName = !if(P.HasExtVOP3DPP, AMDGPUAsmVariants.VOP3_DPP,
+ AMDGPUAsmVariants.Disable);
+}
+
class VOP_DPP_Real <VOP_DPP_Pseudo ps, int EncodingFamily> :
InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
SIMCInstr <ps.PseudoInstr, EncodingFamily> {
@@ -679,6 +824,7 @@ class VOP_DPP_Real <VOP_DPP_Pseudo ps, int EncodingFamily> :
let isConvergent = ps.isConvergent;
let SubtargetPredicate = ps.SubtargetPredicate;
let AssemblerPredicate = ps.AssemblerPredicate;
+ let OtherPredicates = ps.OtherPredicates;
let AsmMatchConverter = ps.AsmMatchConverter;
let AsmVariantName = ps.AsmVariantName;
let UseNamedOperandTable = ps.UseNamedOperandTable;
@@ -692,11 +838,10 @@ class VOP_DPP_Real <VOP_DPP_Pseudo ps, int EncodingFamily> :
let TRANS = ps.TRANS;
}
-class VOP_DPP <string OpName, VOPProfile P, bit IsDPP16,
- dag InsDPP = !if(IsDPP16, P.InsDPP16, P.InsDPP),
- string AsmDPP = !if(IsDPP16, P.AsmDPP16, P.AsmDPP)> :
- InstSI <P.OutsDPP, InsDPP, OpName#AsmDPP, []>,
- VOP_DPPe<P, IsDPP16> {
+class VOP_DPP_Base <string OpName, VOPProfile P,
+ dag InsDPP,
+ string AsmDPP > :
+ InstSI <P.OutsDPP, InsDPP, OpName#AsmDPP, []> {
let mayLoad = 0;
let mayStore = 0;
@@ -717,6 +862,59 @@ class VOP_DPP <string OpName, VOPProfile P, bit IsDPP16,
let DecoderNamespace = "DPP";
}
+class VOP_DPP <string OpName, VOPProfile P, bit IsDPP16,
+ dag InsDPP = !if(IsDPP16, P.InsDPP16, P.InsDPP),
+ string AsmDPP = !if(IsDPP16, P.AsmDPP16, P.AsmDPP)> :
+ VOP_DPP_Base<OpName, P, InsDPP, AsmDPP>, VOP_DPPe<P, IsDPP16>;
+
+class VOP3_DPP_Base <string OpName, VOPProfile P, bit IsDPP16,
+ dag InsDPP = !if(IsDPP16, P.InsVOP3DPP16, P.InsVOP3DPP),
+ string AsmDPP = !if(IsDPP16, P.AsmVOP3DPP16, P.AsmVOP3DPP)> :
+ VOP_DPP_Base<OpName, P, InsDPP, AsmDPP> {
+ let OutOperandList = P.OutsVOP3DPP;
+ let AsmMatchConverter = "cvtVOP3DPP";
+ let VOP3 = 1;
+ let AsmVariantName = !if(P.HasExtVOP3DPP, AMDGPUAsmVariants.VOP3_DPP,
+ AMDGPUAsmVariants.Disable);
+ let Size = 12;
+}
+
+class VOP3_DPP <bits<10> op, string OpName, VOPProfile P, bit IsDPP16,
+ dag InsDPP = !if(IsDPP16, P.InsVOP3DPP16, P.InsVOP3DPP),
+ string AsmDPP = !if(IsDPP16, P.AsmVOP3DPP16, P.AsmVOP3DPP)> :
+ VOP3_DPP_Base<OpName, P, IsDPP16, InsDPP, AsmDPP>, VOP3_DPPe_Common<op, P>,
+ VOP3_DPPe_Fields {
+
+ let Inst{40-32} = 0xfa;
+ let Inst{71-64} = !if(P.HasSrc0, src0{7-0}, 0);
+ let Inst{80-72} = dpp_ctrl;
+ let Inst{82} = !if(IsDPP16, fi, ?);
+ let Inst{83} = bound_ctrl;
+
+ // Inst{87-84} ignored by hw
+ let Inst{91-88} = bank_mask;
+ let Inst{95-92} = row_mask;
+}
+
+class VOP3P_DPP <bits<7> op, string OpName, VOPProfile P, bit IsDPP16,
+ dag InsDPP = !if(IsDPP16, P.InsVOP3DPP16, P.InsVOP3DPP),
+ string AsmDPP = !if(IsDPP16, P.AsmVOP3DPP16, P.AsmVOP3DPP)> :
+ VOP3_DPP_Base<OpName, P, IsDPP16, InsDPP, AsmDPP>, VOP3P_DPPe_Common<op, P>,
+ VOP3_DPPe_Fields {
+
+ let VOP3P = 1;
+
+ let Inst{40-32} = 0xfa;
+ let Inst{71-64} = !if(P.HasSrc0, src0{7-0}, 0);
+ let Inst{80-72} = dpp_ctrl;
+ let Inst{82} = !if(IsDPP16, fi, ?);
+ let Inst{83} = bound_ctrl;
+
+ // Inst{87-84} ignored by hw
+ let Inst{91-88} = bank_mask;
+ let Inst{95-92} = row_mask;
+}
+
class VOP_DPP8e<VOPProfile P> : Enc64 {
bits<8> src0;
bits<24> dpp8;
@@ -726,9 +924,14 @@ class VOP_DPP8e<VOPProfile P> : Enc64 {
let Inst{63-40} = dpp8{23-0};
}
-class VOP_DPP8<string OpName, VOPProfile P> :
- InstSI<P.OutsDPP8, P.InsDPP8, OpName#P.AsmDPP8, []>,
- VOP_DPP8e<P> {
+class VOP3_DPP8e_Fields {
+ bits<8> src0;
+ bits<24> dpp8;
+ bits<9> fi;
+}
+
+class VOP_DPP8_Base<string OpName, VOPProfile P, dag InsDPP8 = P.InsDPP8, string AsmDPP8 = P.AsmDPP8> :
+ InstSI<P.OutsDPP8, InsDPP8, OpName#AsmDPP8, []> {
let mayLoad = 0;
let mayStore = 0;
@@ -742,12 +945,44 @@ class VOP_DPP8<string OpName, VOPProfile P> :
let AsmMatchConverter = "cvtDPP8";
let SubtargetPredicate = HasDPP8;
let AssemblerPredicate = HasDPP8;
- let AsmVariantName = !if(P.HasExt, AMDGPUAsmVariants.DPP,
- AMDGPUAsmVariants.Disable);
+ let AsmVariantName = AMDGPUAsmVariants.DPP;
let Constraints = !if(P.NumSrcArgs, P.TieRegDPP # " = $vdst", "");
let DisableEncoding = !if(P.NumSrcArgs, P.TieRegDPP, "");
}
+class VOP_DPP8<string OpName, VOPProfile P> :
+ VOP_DPP8_Base<OpName, P>, VOP_DPP8e<P>;
+
+class VOP3_DPP8_Base<string OpName, VOPProfile P> :
+ VOP_DPP8_Base<OpName, P, P.InsVOP3DPP8, P.AsmVOP3DPP8> {
+ let OutOperandList = P.OutsVOP3DPP8;
+ let AsmMatchConverter = "cvtVOP3DPP8";
+ let AsmVariantName = !if(P.HasExtVOP3DPP, AMDGPUAsmVariants.VOP3_DPP,
+ AMDGPUAsmVariants.Disable);
+ let VOP3 = 1;
+ let Size = 12;
+}
+
+
+class VOP3_DPP8<bits<10> op, string OpName, VOPProfile P> :
+ VOP3_DPP8_Base<OpName, P>, VOP3_DPPe_Common<op, P>,
+ VOP3_DPP8e_Fields {
+
+ let Inst{40-32} = fi;
+ let Inst{71-64} = !if(P.HasSrc0, src0{7-0}, 0);
+ let Inst{95-72} = dpp8{23-0};
+}
+
+class VOP3P_DPP8<bits<7> op, string OpName, VOPProfile P> :
+ VOP3_DPP8_Base<OpName, P>, VOP3P_DPPe_Common<op, P>,
+ VOP3_DPP8e_Fields {
+
+ let VOP3P = 1;
+ let Inst{40-32} = fi;
+ let Inst{71-64} = !if(P.HasSrc0, src0{7-0}, 0);
+ let Inst{95-72} = dpp8{23-0};
+}
+
def DPP8Mode {
int FI_0 = 0xE9;
int FI_1 = 0xEA;
@@ -780,14 +1015,12 @@ class getDivergentFrag<SDPatternOperator Op> {
}
class VOPPatGen<SDPatternOperator Op, VOPProfile P> {
-
PatFrag Operator = getDivergentFrag < Op >.ret;
dag Ins = !foreach(tmp, P.Ins32, !subst(ins, Operator,
!subst(P.Src0RC32, P.Src0VT,
!subst(P.Src1RC32, P.Src1VT, tmp))));
-
dag Outs = !foreach(tmp, P.Outs32, !subst(outs, set,
!subst(P.DstRC, P.DstVT, tmp)));
@@ -827,12 +1060,379 @@ class VOPBinOpClampPat<SDPatternOperator node, Instruction inst, ValueType vt> :
DSTCLAMP.ENABLE)
>;
+//===----------------------------------------------------------------------===//
+// VOP3 Classes
+//===----------------------------------------------------------------------===//
+
+class getVOP3ModPat<VOPProfile P, SDPatternOperator node> {
+ dag src0 = !if(P.HasOMod,
+ (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod),
+ (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp));
+
+ list<dag> ret3 = [(set P.DstVT:$vdst,
+ (DivergentFragOrOp<node, P>.ret (P.Src0VT src0),
+ (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
+ (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))))];
+
+ list<dag> ret2 = [(set P.DstVT:$vdst,
+ (DivergentFragOrOp<node, P>.ret (P.Src0VT src0),
+ (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))];
+
+ list<dag> ret1 = [(set P.DstVT:$vdst,
+ (DivergentFragOrOp<node, P>.ret (P.Src0VT src0)))];
+
+ list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
+ !if(!eq(P.NumSrcArgs, 2), ret2,
+ ret1));
+}
+
+class getVOP3PModPat<VOPProfile P, SDPatternOperator node, bit HasExplicitClamp,
+ bit IsDOT = 0,
+ ComplexPattern SrcPat = !if(IsDOT, VOP3PModsDOT, VOP3PMods)> {
+ dag src0_dag = (P.Src0VT (SrcPat P.Src0VT:$src0, i32:$src0_modifiers));
+ dag src1_dag = (P.Src1VT (SrcPat P.Src1VT:$src1, i32:$src1_modifiers));
+ dag src2_dag = (P.Src2VT (SrcPat P.Src2VT:$src2, i32:$src2_modifiers));
+ dag clamp_dag = (i1 timm:$clamp);
+
+ list<dag> ret3 = [(set P.DstVT:$vdst,
+ !if(HasExplicitClamp,
+ (DivergentFragOrOp<node, P>.ret src0_dag, src1_dag, src2_dag, clamp_dag),
+ (DivergentFragOrOp<node, P>.ret src0_dag, src1_dag, src2_dag)))];
+
+ list<dag> ret2 = [(set P.DstVT:$vdst,
+ !if(HasExplicitClamp,
+ (DivergentFragOrOp<node, P>.ret src0_dag, src1_dag, clamp_dag),
+ (DivergentFragOrOp<node, P>.ret src0_dag, src1_dag)))];
+
+ list<dag> ret1 = [(set P.DstVT:$vdst,
+ !if(HasExplicitClamp,
+ (DivergentFragOrOp<node, P>.ret src0_dag, clamp_dag),
+ (DivergentFragOrOp<node, P>.ret src0_dag)))];
+
+ list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
+ !if(!eq(P.NumSrcArgs, 2), ret2,
+ ret1));
+}
+
+class getVOP3OpSelPat<VOPProfile P, SDPatternOperator node> {
+ list<dag> ret3 = [(set P.DstVT:$vdst,
+ (DivergentFragOrOp<node, P>.ret (P.Src0VT (VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers)),
+ (P.Src1VT (VOP3OpSel P.Src1VT:$src1, i32:$src1_modifiers)),
+ (P.Src2VT (VOP3OpSel P.Src2VT:$src2, i32:$src2_modifiers))))];
+
+ list<dag> ret2 = [(set P.DstVT:$vdst,
+ (DivergentFragOrOp<node, P>.ret (P.Src0VT (VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers)),
+ (P.Src1VT (VOP3OpSel P.Src1VT:$src1, i32:$src1_modifiers))))];
+
+ list<dag> ret1 = [(set P.DstVT:$vdst,
+ (DivergentFragOrOp<node, P>.ret (P.Src0VT (VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers))))];
+
+ list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
+ !if(!eq(P.NumSrcArgs, 2), ret2,
+ ret1));
+}
+
+class getVOP3OpSelModPat<VOPProfile P, SDPatternOperator node> {
+ list<dag> ret3 = [(set P.DstVT:$vdst,
+ (DivergentFragOrOp<node, P>.ret (P.Src0VT !if(P.HasClamp, (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers),
+ (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))),
+ (P.Src1VT (VOP3OpSelMods P.Src1VT:$src1, i32:$src1_modifiers)),
+ (P.Src2VT (VOP3OpSelMods P.Src2VT:$src2, i32:$src2_modifiers))))];
+
+ list<dag> ret2 = [(set P.DstVT:$vdst,
+ (DivergentFragOrOp<node, P>.ret !if(P.HasClamp, (P.Src0VT (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers)),
+ (P.Src0VT (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))),
+ (P.Src1VT (VOP3OpSelMods P.Src1VT:$src1, i32:$src1_modifiers))))];
+
+ list<dag> ret1 = [(set P.DstVT:$vdst,
+ (DivergentFragOrOp<node, P>.ret (P.Src0VT (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))))];
+
+ list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
+ !if(!eq(P.NumSrcArgs, 2), ret2,
+ ret1));
+}
+
+class getVOP3FromVOP2Pat<VOPProfile P, SDPatternOperator node> {
+ list<dag> ret = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))];
+}
+// In VOP1, we can have clamp and omod even if !HasModifiers
+class getVOP3Pat<VOPProfile P, SDPatternOperator node> {
+ dag src0 =
+ !if(P.HasOMod,
+ !if(P.HasClamp,
+ (VOP3Mods0 P.Src0VT:$src0, i1:$clamp, i32:$omod),
+ (VOP3Mods0 P.Src0VT:$src0, i32:$omod)), // impossible?
+ !if(P.HasClamp,
+ (VOP3Mods0 P.Src0VT:$src0, i1:$clamp),
+ (VOP3Mods0 P.Src0VT:$src0))
+ );
+ list<dag> ret3 = [(set P.DstVT:$vdst, (DivergentFragOrOp<node, P>.ret (P.Src0VT src0), P.Src1VT:$src1, P.Src2VT:$src2))];
+
+ list<dag> ret2 = [(set P.DstVT:$vdst, (DivergentFragOrOp<node, P>.ret (P.Src0VT src0), P.Src1VT:$src1))];
+
+ list<dag> ret1 = [(set P.DstVT:$vdst, (DivergentFragOrOp<node, P>.ret (P.Src0VT src0)))];
+ list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
+ !if(!eq(P.NumSrcArgs, 2), ret2,
+ ret1));
+}
+
+class getVOP3ClampPat<VOPProfile P, SDPatternOperator node> {
+ list<dag> ret3 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2, i1:$clamp))];
+ list<dag> ret2 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, i1:$clamp))];
+ list<dag> ret1 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, i1:$clamp))];
+ list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
+ !if(!eq(P.NumSrcArgs, 2), ret2,
+ ret1));
+}
+
+class getVOP3MAIPat<VOPProfile P, SDPatternOperator node> {
+ list<dag> ret = !if(!eq(P.Src0VT, P.Src1VT),
+ // mfma
+ [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2,
+ timm:$cbsz, timm:$abid, timm:$blgp))],
+ // smfmac
+ [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2, i32:$idx,
+ timm:$cbsz, timm:$abid))]);
+}
+
+class VOP3Features<bit Clamp, bit OpSel, bit Packed, bit MAI> {
+ bit HasClamp = Clamp;
+ bit HasOpSel = OpSel;
+ bit IsPacked = Packed;
+ bit IsMAI = MAI;
+}
+
+def VOP3_REGULAR : VOP3Features<0, 0, 0, 0>;
+def VOP3_CLAMP : VOP3Features<1, 0, 0, 0>;
+def VOP3_OPSEL : VOP3Features<1, 1, 0, 0>;
+def VOP3_PACKED : VOP3Features<1, 1, 1, 0>;
+def VOP3_MAI : VOP3Features<0, 0, 0, 1>;
+
+class VOP3_Profile_Base<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOPProfile<P.ArgVT> {
+
+ let HasClamp = !if(Features.HasClamp, 1, P.HasClamp);
+ let HasOpSel = !if(Features.HasOpSel, 1, P.HasOpSel);
+ let IsMAI = !if(Features.IsMAI, 1, P.IsMAI);
+ let IsPacked = !if(Features.IsPacked, 1, P.IsPacked);
+
+ let HasModifiers = !if(Features.IsMAI, 0, !or(Features.IsPacked, P.HasModifiers));
+}
+
+class VOP3_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOP3_Profile_Base<P, Features> {
+ let IsSingle = 1;
+
+}
+
+// consistently gives instructions a _e64 suffix
+multiclass VOP3Inst_Pseudo_Wrapper<string opName, VOPProfile P, list<dag> pattern = [], bit VOP3Only = 0> {
+ def _e64 : VOP3_Pseudo<opName, P, pattern, VOP3Only>;
+}
+
+class VOP3InstBase<string OpName, VOPProfile P, SDPatternOperator node = null_frag, bit IsVOP2 = 0> :
+ VOP3_Pseudo<OpName, P,
+ !if(P.HasOpSel,
+ !if(P.HasModifiers,
+ getVOP3OpSelModPat<P, node>.ret,
+ getVOP3OpSelPat<P, node>.ret),
+ !if(P.HasModifiers,
+ getVOP3ModPat<P, node>.ret,
+ !if(IsVOP2,
+ getVOP3FromVOP2Pat<P, node>.ret,
+ !if(P.HasIntClamp,
+ getVOP3ClampPat<P, node>.ret,
+ !if (P.IsMAI,
+ getVOP3MAIPat<P, node>.ret,
+ getVOP3Pat<P, node>.ret))))),
+ 0, P.HasOpSel> {
+
+ let IntClamp = P.HasIntClamp;
+ let AsmMatchConverter =
+ !if(P.HasOpSel,
+ "cvtVOP3OpSel",
+ !if(!or(P.HasModifiers, P.HasOMod, P.HasIntClamp),
+ "cvtVOP3",
+ ""));
+}
+
+multiclass VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_frag> {
+ def _e64 : VOP3InstBase<OpName, P, node>;
+ let SubtargetPredicate = isGFX11Plus in {
+ foreach _ = BoolToList<P.HasExtVOP3DPP>.ret in
+ def _e64_dpp : VOP3_DPP_Pseudo <OpName, P>;
+ } // end SubtargetPredicate = isGFX11Plus
+}
+
+//===----------------------------------------------------------------------===//
+// VOP3 DPP
+//===----------------------------------------------------------------------===//
+
+class Base_VOP3_DPP16<bits<10> op, VOP_DPP_Pseudo ps, string opName = ps.OpName>
+ : VOP3_DPP<op, opName, ps.Pfl, 1> {
+ let hasSideEffects = ps.hasSideEffects;
+ let Defs = ps.Defs;
+ let SchedRW = ps.SchedRW;
+ let Uses = ps.Uses;
+ let AssemblerPredicate = HasDPP16;
+ let SubtargetPredicate = HasDPP16;
+ let OtherPredicates = ps.OtherPredicates;
+}
+
+class VOP3_DPP16<bits<10> op, VOP_DPP_Pseudo ps, int subtarget,
+ string opName = ps.OpName>
+ : Base_VOP3_DPP16<op, ps, opName>, SIMCInstr<ps.PseudoInstr, subtarget>;
+
+class Base_VOP3_DPP8<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
+ : VOP3_DPP8<op, opName, ps.Pfl> {
+ let hasSideEffects = ps.hasSideEffects;
+ let Defs = ps.Defs;
+ let SchedRW = ps.SchedRW;
+ let Uses = ps.Uses;
+
+ let OtherPredicates = ps.OtherPredicates;
+}
+
+class Base_VOP3b_DPP16<bits<10> op, VOP_DPP_Pseudo ps,
+ string opName = ps.OpName>
+ : Base_VOP3_DPP16<op, ps, opName> {
+ bits<7> sdst;
+ let Inst{14 - 8} = sdst;
+}
+
+class VOP3b_DPP8_Base<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
+ : Base_VOP3_DPP8<op, ps, opName> {
+ bits<7> sdst;
+ let Inst{14 - 8} = sdst;
+}
+
+//===----------------------------------------------------------------------===//
+// VOP3 GFX11
+//===----------------------------------------------------------------------===//
+
+let AssemblerPredicate = isGFX11Only,
+ DecoderNamespace = "GFX11" in {
+ multiclass VOP3_Real_Base_gfx11<bits<10> op, string opName = NAME,
+ bit isSingle = 0> {
+ defvar ps = !cast<VOP_Pseudo>(opName#"_e64");
+ let IsSingle = !or(isSingle, ps.Pfl.IsSingle) in {
+ foreach _ = BoolToList<ps.Pfl.HasOpSel>.ret in
+ def _e64_gfx11 :
+ VOP3_Real<ps, SIEncodingFamily.GFX11>,
+ VOP3OpSel_gfx11<op, ps.Pfl>;
+ foreach _ = BoolToList<!not(ps.Pfl.HasOpSel)>.ret in
+ def _e64_gfx11 :
+ VOP3_Real<ps, SIEncodingFamily.GFX11>,
+ VOP3e_gfx11<op, ps.Pfl>;
+ }
+ }
+ multiclass VOP3_Real_with_name_gfx11<bits<10> op, string opName,
+ string asmName, bit isSingle = 0> {
+ defvar ps = !cast<VOP_Pseudo>(opName#"_e64");
+ let AsmString = asmName # ps.AsmOperands,
+ IsSingle = !or(isSingle, ps.Pfl.IsSingle) in {
+ foreach _ = BoolToList<ps.Pfl.HasOpSel>.ret in
+ def _e64_gfx11 :
+ VOP3_Real<ps, SIEncodingFamily.GFX11>,
+ VOP3OpSel_gfx11<op, ps.Pfl>,
+ MnemonicAlias<ps.Mnemonic, asmName>, Requires<[isGFX11Plus]>;
+ foreach _ = BoolToList<!not(ps.Pfl.HasOpSel)>.ret in
+ def _e64_gfx11 :
+ VOP3_Real<ps, SIEncodingFamily.GFX11>,
+ VOP3e_gfx11<op, ps.Pfl>,
+ MnemonicAlias<ps.Mnemonic, asmName>, Requires<[isGFX11Plus]>;
+ }
+ }
+ // for READLANE/WRITELANE
+ multiclass VOP3_Real_No_Suffix_gfx11<bits<10> op, string opName = NAME> {
+ defvar ps = !cast<VOP_Pseudo>(opName);
+ def _e64_gfx11 :
+ VOP3_Real<ps, SIEncodingFamily.GFX11>,
+ VOP3e_gfx11<op, ps.Pfl>;
+ }
+ multiclass VOP3_Real_dpp_Base_gfx11<bits<10> op, string opName = NAME> {
+ def _e64_dpp_gfx11 : VOP3_DPP16<op, !cast<VOP_DPP_Pseudo>(opName#"_e64"#"_dpp"), SIEncodingFamily.GFX11> {
+ let DecoderNamespace = "DPPGFX11";
+ }
+ }
+ multiclass VOP3_Real_dpp_with_name_gfx11<bits<10> op, string opName,
+ string asmName> {
+ defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
+ let AsmString = asmName # ps.Pfl.AsmVOP3DPP16, DecoderNamespace = "DPPGFX11" in {
+ defm NAME : VOP3_Real_dpp_Base_gfx11<op, opName>;
+ }
+ }
+ multiclass VOP3_Real_dpp8_Base_gfx11<bits<10> op, string opName = NAME> {
+ defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
+ def _e64_dpp8_gfx11 : Base_VOP3_DPP8<op, ps> {
+ let DecoderNamespace = "DPP8GFX11";
+ }
+ }
+ multiclass VOP3_Real_dpp8_with_name_gfx11<bits<10> op, string opName,
+ string asmName> {
+ defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
+ let AsmString = asmName # ps.Pfl.AsmVOP3DPP8, DecoderNamespace = "DPP8GFX11" in {
+ defm NAME : VOP3_Real_dpp8_Base_gfx11<op, opName>;
+ }
+ }
+ multiclass VOP3be_Real_gfx11<bits<10> op, string opName, string asmName,
+ bit isSingle = 0> {
+ defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
+ let IsSingle = !or(isSingle, ps.Pfl.IsSingle) in
+ def _e64_gfx11 :
+ VOP3_Real<ps, SIEncodingFamily.GFX11, asmName>,
+ VOP3be_gfx11<op, ps.Pfl> ;
+ }
+ multiclass VOP3be_Real_dpp_gfx11<bits<10> op, string opName, string asmName> {
+ defvar ps = !cast<VOP3_Pseudo>(opName #"_e64");
+ defvar dpp_ps = !cast<VOP_DPP_Pseudo>(opName #"_e64" #"_dpp");
+ def _e64_dpp_gfx11 : Base_VOP3b_DPP16<op, dpp_ps, asmName>,
+ SIMCInstr<dpp_ps.PseudoInstr, SIEncodingFamily.GFX11> {
+ let DecoderNamespace = "DPPGFX11";
+ }
+ }
+ multiclass VOP3be_Real_dpp8_gfx11<bits<10> op, string opName, string asmName> {
+ defvar ps = !cast<VOP3_Pseudo>(opName #"_e64");
+ def _e64_dpp8_gfx11 : VOP3b_DPP8_Base<op, ps, asmName> {
+ let DecoderNamespace = "DPP8GFX11";
+ }
+ }
+} // End AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11"
+
+// VOP1 and VOP2 depend on these triple defs
+multiclass VOP3_Realtriple_gfx11<bits<10> op,
+ bit isSingle = 0, string opName = NAME> :
+ VOP3_Real_Base_gfx11<op, opName, isSingle>,
+ VOP3_Real_dpp_Base_gfx11<op, opName>,
+ VOP3_Real_dpp8_Base_gfx11<op, opName>;
+
+multiclass VOP3Only_Realtriple_gfx11<bits<10> op> :
+ VOP3_Realtriple_gfx11<op, 1>;
+
+multiclass VOP3_Realtriple_with_name_gfx11<bits<10> op, string opName,
+ string asmName, bit isSingle = 0> :
+ VOP3_Real_with_name_gfx11<op, opName, asmName, isSingle>,
+ VOP3_Real_dpp_with_name_gfx11<op, opName, asmName>,
+ VOP3_Real_dpp8_with_name_gfx11<op, opName, asmName>;
+
+multiclass VOP3Only_Realtriple_with_name_gfx11<bits<10> op, string opName,
+ string asmName> :
+ VOP3_Realtriple_with_name_gfx11<op, opName, asmName, 1>;
+
+multiclass VOP3be_Realtriple_gfx11<
+ bits<10> op, bit isSingle = 0, string opName = NAME,
+ string asmName = !cast<VOP_Pseudo>(opName#"_e64").Mnemonic> :
+ VOP3be_Real_gfx11<op, opName, asmName, isSingle>,
+ VOP3be_Real_dpp_gfx11<op, opName, asmName>,
+ VOP3be_Real_dpp8_gfx11<op, opName, asmName>;
+
+multiclass VOP3beOnly_Realtriple_gfx11<bits<10> op> :
+ VOP3be_Realtriple_gfx11<op, 1>;
include "VOPCInstructions.td"
include "VOP1Instructions.td"
include "VOP2Instructions.td"
include "VOP3Instructions.td"
include "VOP3PInstructions.td"
+include "VOPDInstructions.td"
class VOPInfoTable <string Format> : GenericTable {
@@ -847,3 +1447,15 @@ class VOPInfoTable <string Format> : GenericTable {
def VOP1InfoTable : VOPInfoTable<"VOP1">;
def VOP2InfoTable : VOPInfoTable<"VOP2">;
def VOP3InfoTable : VOPInfoTable<"VOP3">;
+
+class VOPC64Table <string Format> : GenericTable {
+ let FilterClass = "VOPC64_" # Format # "_Base";
+ let CppTypeName = "VOPC64DPPInfo";
+ let Fields = ["Opcode"];
+
+ let PrimaryKey = ["Opcode"];
+ let PrimaryKeyName = "isVOPC64" # Format # "OpcodeHelper";
+}
+
+def VOPC64DPPTable : VOPC64Table<"DPP">;
+def VOPC64DPP8Table : VOPC64Table<"DPP8">;
diff --git a/llvm/lib/Target/ARC/ARCMachineFunctionInfo.cpp b/llvm/lib/Target/ARC/ARCMachineFunctionInfo.cpp
index 9cd9661ae245..733f2f0a0499 100644
--- a/llvm/lib/Target/ARC/ARCMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/ARC/ARCMachineFunctionInfo.cpp
@@ -11,3 +11,10 @@
using namespace llvm;
void ARCFunctionInfo::anchor() {}
+
+MachineFunctionInfo *
+ARCFunctionInfo::clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *>
+ &Src2DstMBB) const {
+ return DestMF.cloneInfo<ARCFunctionInfo>(*this);
+}
diff --git a/llvm/lib/Target/ARC/ARCMachineFunctionInfo.h b/llvm/lib/Target/ARC/ARCMachineFunctionInfo.h
index 968c6b63f423..454206037498 100644
--- a/llvm/lib/Target/ARC/ARCMachineFunctionInfo.h
+++ b/llvm/lib/Target/ARC/ARCMachineFunctionInfo.h
@@ -34,9 +34,13 @@ public:
explicit ARCFunctionInfo(MachineFunction &MF)
: ReturnStackOffsetSet(false), VarArgsFrameIndex(0),
ReturnStackOffset(-1U), MaxCallStackReq(0) {}
-
~ARCFunctionInfo() {}
+ MachineFunctionInfo *
+ clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
+ const override;
+
void setVarArgsFrameIndex(int off) { VarArgsFrameIndex = off; }
int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
diff --git a/llvm/lib/Target/ARC/ARCOptAddrMode.cpp b/llvm/lib/Target/ARC/ARCOptAddrMode.cpp
index c956f00b628d..589c58e285bb 100644
--- a/llvm/lib/Target/ARC/ARCOptAddrMode.cpp
+++ b/llvm/lib/Target/ARC/ARCOptAddrMode.cpp
@@ -36,7 +36,7 @@ using namespace llvm;
namespace llvm {
static cl::opt<unsigned> ArcKillAddrMode("arc-kill-addr-mode", cl::init(0),
- cl::ReallyHidden, cl::ZeroOrMore);
+ cl::ReallyHidden);
#define DUMP_BEFORE() ((ArcKillAddrMode & 0x0001) != 0)
#define DUMP_AFTER() ((ArcKillAddrMode & 0x0002) != 0)
@@ -459,12 +459,12 @@ void ARCOptAddrMode::changeToAddrMode(MachineInstr &Ldst, unsigned NewOpcode,
Register BaseReg = Ldst.getOperand(BasePos).getReg();
- Ldst.RemoveOperand(OffPos);
- Ldst.RemoveOperand(BasePos);
+ Ldst.removeOperand(OffPos);
+ Ldst.removeOperand(BasePos);
if (IsStore) {
Src = Ldst.getOperand(BasePos - 1);
- Ldst.RemoveOperand(BasePos - 1);
+ Ldst.removeOperand(BasePos - 1);
}
Ldst.setDesc(AST->getInstrInfo()->get(NewOpcode));
diff --git a/llvm/lib/Target/ARC/ARCTargetMachine.cpp b/llvm/lib/Target/ARC/ARCTargetMachine.cpp
index 52f74b729ff7..21757927d873 100644
--- a/llvm/lib/Target/ARC/ARCTargetMachine.cpp
+++ b/llvm/lib/Target/ARC/ARCTargetMachine.cpp
@@ -21,7 +21,7 @@
using namespace llvm;
static Reloc::Model getRelocModel(Optional<Reloc::Model> RM) {
- return RM.getValueOr(Reloc::Static);
+ return RM.value_or(Reloc::Static);
}
/// ARCTargetMachine ctor - Create an ILP32 architecture model
@@ -84,6 +84,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARCTarget() {
}
TargetTransformInfo
-ARCTargetMachine::getTargetTransformInfo(const Function &F) {
+ARCTargetMachine::getTargetTransformInfo(const Function &F) const {
return TargetTransformInfo(ARCTTIImpl(this, F));
}
diff --git a/llvm/lib/Target/ARC/ARCTargetMachine.h b/llvm/lib/Target/ARC/ARCTargetMachine.h
index c5e8c3f2936d..81ccfc6d5dd0 100644
--- a/llvm/lib/Target/ARC/ARCTargetMachine.h
+++ b/llvm/lib/Target/ARC/ARCTargetMachine.h
@@ -39,7 +39,7 @@ public:
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
- TargetTransformInfo getTargetTransformInfo(const Function &F) override;
+ TargetTransformInfo getTargetTransformInfo(const Function &F) const override;
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
}
diff --git a/llvm/lib/Target/ARC/Disassembler/ARCDisassembler.cpp b/llvm/lib/Target/ARC/Disassembler/ARCDisassembler.cpp
index bb5336931932..618101755904 100644
--- a/llvm/lib/Target/ARC/Disassembler/ARCDisassembler.cpp
+++ b/llvm/lib/Target/ARC/Disassembler/ARCDisassembler.cpp
@@ -16,8 +16,8 @@
#include "MCTargetDesc/ARCMCTargetDesc.h"
#include "TargetInfo/ARCTargetInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDecoderOps.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
-#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
@@ -83,41 +83,43 @@ static bool readInstruction16(ArrayRef<uint8_t> Bytes, uint64_t Address,
}
template <unsigned B>
-static DecodeStatus DecodeSignedOperand(MCInst &Inst, unsigned InsnS,
- uint64_t Address = 0,
- const void *Decoder = nullptr);
+static DecodeStatus
+DecodeSignedOperand(MCInst &Inst, unsigned InsnS, uint64_t Address = 0,
+ const MCDisassembler *Decoder = nullptr);
template <unsigned B>
-static DecodeStatus DecodeFromCyclicRange(MCInst &Inst, unsigned InsnS,
- uint64_t Address = 0,
- const void *Decoder = nullptr);
+static DecodeStatus
+DecodeFromCyclicRange(MCInst &Inst, unsigned InsnS, uint64_t Address = 0,
+ const MCDisassembler *Decoder = nullptr);
template <unsigned B>
static DecodeStatus DecodeBranchTargetS(MCInst &Inst, unsigned InsnS,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeMEMrs9(MCInst &, unsigned, uint64_t, const void *);
+static DecodeStatus DecodeMEMrs9(MCInst &, unsigned, uint64_t,
+ const MCDisassembler *);
static DecodeStatus DecodeLdLImmInstruction(MCInst &, uint64_t, uint64_t,
- const void *);
+ const MCDisassembler *);
static DecodeStatus DecodeStLImmInstruction(MCInst &, uint64_t, uint64_t,
- const void *);
+ const MCDisassembler *);
static DecodeStatus DecodeLdRLImmInstruction(MCInst &, uint64_t, uint64_t,
- const void *);
+ const MCDisassembler *);
static DecodeStatus DecodeSOPwithRS12(MCInst &, uint64_t, uint64_t,
- const void *);
+ const MCDisassembler *);
static DecodeStatus DecodeSOPwithRU6(MCInst &, uint64_t, uint64_t,
- const void *);
+ const MCDisassembler *);
static DecodeStatus DecodeCCRU6Instruction(MCInst &, uint64_t, uint64_t,
- const void *);
+ const MCDisassembler *);
static DecodeStatus DecodeMoveHRegInstruction(MCInst &Inst, uint64_t, uint64_t,
- const void *);
+ const MCDisassembler *);
static const uint16_t GPR32DecoderTable[] = {
ARC::R0, ARC::R1, ARC::R2, ARC::R3, ARC::R4, ARC::R5, ARC::R6,
@@ -128,7 +130,7 @@ static const uint16_t GPR32DecoderTable[] = {
static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo >= 32) {
LLVM_DEBUG(dbgs() << "Not a GPR32 register.");
return MCDisassembler::Fail;
@@ -140,8 +142,8 @@ static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, unsigned RegNo,
}
static DecodeStatus DecodeGBR32ShortRegister(MCInst &Inst, unsigned RegNo,
- uint64_t Address,
- const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
// Enumerates registers from ranges [r0-r3],[r12-r15].
if (RegNo > 3)
RegNo += 8; // 4 for r12, etc...
@@ -165,7 +167,7 @@ static unsigned decodeAField(unsigned Insn) {
}
static DecodeStatus DecodeMEMrs9(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Dec) {
+ const MCDisassembler *Dec) {
// We have the 9-bit immediate in the low bits, 6-bit register in high bits.
unsigned S9 = Insn & 0x1ff;
unsigned R = (Insn & (0x7fff & ~0x1ff)) >> 9;
@@ -175,17 +177,16 @@ static DecodeStatus DecodeMEMrs9(MCInst &Inst, unsigned Insn, uint64_t Address,
}
static bool DecodeSymbolicOperand(MCInst &Inst, uint64_t Address,
- uint64_t Value, const void *Decoder) {
+ uint64_t Value,
+ const MCDisassembler *Decoder) {
static const uint64_t AtLeast = 2;
- // TODO: Try to force emitter to use MCDisassembler* instead of void*.
- auto Disassembler = static_cast<const MCDisassembler *>(Decoder);
- return (nullptr != Disassembler &&
- Disassembler->tryAddingSymbolicOperand(Inst, Value, Address, true, 0,
- AtLeast));
+ return (nullptr != Decoder && Decoder->tryAddingSymbolicOperand(
+ Inst, Value, Address, true, 0, AtLeast, 0));
}
static void DecodeSymbolicOperandOff(MCInst &Inst, uint64_t Address,
- uint64_t Offset, const void *Decoder) {
+ uint64_t Offset,
+ const MCDisassembler *Decoder) {
uint64_t NextAddress = Address + Offset;
if (!DecodeSymbolicOperand(Inst, Address, NextAddress, Decoder))
@@ -194,7 +195,8 @@ static void DecodeSymbolicOperandOff(MCInst &Inst, uint64_t Address,
template <unsigned B>
static DecodeStatus DecodeBranchTargetS(MCInst &Inst, unsigned InsnS,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
static_assert(B > 0, "field is empty");
DecodeSymbolicOperandOff(Inst, Address, SignExtend32<B>(InsnS), Decoder);
@@ -204,7 +206,7 @@ static DecodeStatus DecodeBranchTargetS(MCInst &Inst, unsigned InsnS,
template <unsigned B>
static DecodeStatus DecodeSignedOperand(MCInst &Inst, unsigned InsnS,
uint64_t /*Address*/,
- const void * /*Decoder*/) {
+ const MCDisassembler * /*Decoder*/) {
static_assert(B > 0, "field is empty");
Inst.addOperand(MCOperand::createImm(
@@ -215,7 +217,7 @@ static DecodeStatus DecodeSignedOperand(MCInst &Inst, unsigned InsnS,
template <unsigned B>
static DecodeStatus DecodeFromCyclicRange(MCInst &Inst, unsigned InsnS,
uint64_t /*Address*/,
- const void * /*Decoder*/) {
+ const MCDisassembler * /*Decoder*/) {
static_assert(B > 0, "field is empty");
const unsigned max = (1u << B) - 1;
@@ -226,7 +228,7 @@ static DecodeStatus DecodeFromCyclicRange(MCInst &Inst, unsigned InsnS,
static DecodeStatus DecodeStLImmInstruction(MCInst &Inst, uint64_t Insn,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
unsigned SrcC, DstB, LImm;
DstB = decodeBField(Insn);
if (DstB != 62) {
@@ -243,7 +245,7 @@ static DecodeStatus DecodeStLImmInstruction(MCInst &Inst, uint64_t Insn,
static DecodeStatus DecodeLdLImmInstruction(MCInst &Inst, uint64_t Insn,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
unsigned DstA, SrcB, LImm;
LLVM_DEBUG(dbgs() << "Decoding LdLImm:\n");
SrcB = decodeBField(Insn);
@@ -261,7 +263,7 @@ static DecodeStatus DecodeLdLImmInstruction(MCInst &Inst, uint64_t Insn,
static DecodeStatus DecodeLdRLImmInstruction(MCInst &Inst, uint64_t Insn,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
unsigned DstA, SrcB;
LLVM_DEBUG(dbgs() << "Decoding LdRLimm\n");
DstA = decodeAField(Insn);
@@ -278,7 +280,7 @@ static DecodeStatus DecodeLdRLImmInstruction(MCInst &Inst, uint64_t Insn,
static DecodeStatus DecodeMoveHRegInstruction(MCInst &Inst, uint64_t Insn,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
LLVM_DEBUG(dbgs() << "Decoding MOV_S h-register\n");
using Field = decltype(Insn);
Field H = fieldFromInstruction(Insn, 5, 3) |
@@ -304,7 +306,7 @@ static DecodeStatus DecodeMoveHRegInstruction(MCInst &Inst, uint64_t Insn,
static DecodeStatus DecodeCCRU6Instruction(MCInst &Inst, uint64_t Insn,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
unsigned DstB;
LLVM_DEBUG(dbgs() << "Decoding CCRU6 instruction:\n");
DstB = decodeBField(Insn);
@@ -318,7 +320,8 @@ static DecodeStatus DecodeCCRU6Instruction(MCInst &Inst, uint64_t Insn,
}
static DecodeStatus DecodeSOPwithRU6(MCInst &Inst, uint64_t Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned DstB = decodeBField(Insn);
DecodeGPR32RegisterClass(Inst, DstB, Address, Decoder);
using Field = decltype(Insn);
@@ -328,7 +331,8 @@ static DecodeStatus DecodeSOPwithRU6(MCInst &Inst, uint64_t Insn,
}
static DecodeStatus DecodeSOPwithRS12(MCInst &Inst, uint64_t Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned DstB = decodeBField(Insn);
DecodeGPR32RegisterClass(Inst, DstB, Address, Decoder);
using Field = decltype(Insn);
diff --git a/llvm/lib/Target/ARM/A15SDOptimizer.cpp b/llvm/lib/Target/ARM/A15SDOptimizer.cpp
index d0efecad63bc..65da95b0fc8d 100644
--- a/llvm/lib/Target/ARM/A15SDOptimizer.cpp
+++ b/llvm/lib/Target/ARM/A15SDOptimizer.cpp
@@ -361,9 +361,8 @@ void A15SDOptimizer::elideCopiesAndPHIs(MachineInstr *MI,
MI = Front.pop_back_val();
// If we have already explored this MachineInstr, ignore it.
- if (Reached.find(MI) != Reached.end())
+ if (!Reached.insert(MI).second)
continue;
- Reached.insert(MI);
if (MI->isPHI()) {
for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) {
Register Reg = MI->getOperand(I).getReg();
diff --git a/llvm/lib/Target/ARM/ARM.h b/llvm/lib/Target/ARM/ARM.h
index 979371bf7cf6..9990078cfdbb 100644
--- a/llvm/lib/Target/ARM/ARM.h
+++ b/llvm/lib/Target/ARM/ARM.h
@@ -57,6 +57,7 @@ Pass *createMVEGatherScatterLoweringPass();
FunctionPass *createARMSLSHardeningPass();
FunctionPass *createARMIndirectThunks();
Pass *createMVELaneInterleavingPass();
+FunctionPass *createARMFixCortexA57AES1742098Pass();
void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
ARMAsmPrinter &AP);
@@ -77,6 +78,7 @@ void initializeMVETailPredicationPass(PassRegistry &);
void initializeMVEGatherScatterLoweringPass(PassRegistry &);
void initializeARMSLSHardeningPass(PassRegistry &);
void initializeMVELaneInterleavingPass(PassRegistry &);
+void initializeARMFixCortexA57AES1742098Pass(PassRegistry &);
} // end namespace llvm
diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td
index 27edf69b4abf..48559a89a30a 100644
--- a/llvm/lib/Target/ARM/ARM.td
+++ b/llvm/lib/Target/ARM/ARM.td
@@ -19,9 +19,11 @@ include "llvm/Target/Target.td"
// ARM Subtarget state.
//
-def ModeThumb : SubtargetFeature<"thumb-mode", "InThumbMode",
+// True if compiling for Thumb, false for ARM.
+def ModeThumb : SubtargetFeature<"thumb-mode", "IsThumb",
"true", "Thumb mode">;
+// True if we're using software floating point features.
def ModeSoftFloat : SubtargetFeature<"soft-float","UseSoftFloat",
"true", "Use software floating "
"point features.">;
@@ -48,14 +50,18 @@ def FeatureFPRegs64 : SubtargetFeature<"fpregs64", "HasFPRegs64", "true",
"Enable 64-bit FP registers",
[FeatureFPRegs]>;
+// True if the floating point unit supports double precision.
def FeatureFP64 : SubtargetFeature<"fp64", "HasFP64", "true",
"Floating point unit supports "
"double precision",
[FeatureFPRegs64]>;
+// True if subtarget has the full 32 double precision FP registers for VFPv3.
def FeatureD32 : SubtargetFeature<"d32", "HasD32", "true",
"Extend FP to 32 double registers">;
+/// Versions of the VFP flags restricted to single precision, or to
+/// 16 d-registers, or both.
multiclass VFPver<string name, string query, string description,
list<SubtargetFeature> prev,
list<SubtargetFeature> otherimplies,
@@ -100,6 +106,7 @@ def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
"Enable NEON instructions",
[FeatureVFP3]>;
+// True if subtarget supports half-precision FP conversions.
def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true",
"Enable half-precision "
"floating point">;
@@ -110,169 +117,211 @@ defm FeatureVFP4: VFPver<"vfp4", "HasVFPv4", "Enable VFP4 instructions",
defm FeatureFPARMv8: VFPver<"fp-armv8", "HasFPARMv8", "Enable ARMv8 FP",
[FeatureVFP4], []>;
+// True if subtarget supports half-precision FP operations.
def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true",
"Enable full half-precision "
"floating point",
[FeatureFPARMv8_D16_SP, FeatureFPRegs16]>;
+// True if subtarget supports half-precision FP fml operations.
def FeatureFP16FML : SubtargetFeature<"fp16fml", "HasFP16FML", "true",
"Enable full half-precision "
"floating point fml instructions",
[FeatureFullFP16]>;
+// True if subtarget supports [su]div in Thumb mode.
def FeatureHWDivThumb : SubtargetFeature<"hwdiv",
- "HasHardwareDivideInThumb", "true",
+ "HasDivideInThumbMode", "true",
"Enable divide instructions in Thumb">;
+// True if subtarget supports [su]div in ARM mode.
def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm",
- "HasHardwareDivideInARM", "true",
+ "HasDivideInARMMode", "true",
"Enable divide instructions in ARM mode">;
// Atomic Support
+
+// True if the subtarget supports DMB / DSB data barrier instructions.
def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true",
"Has data barrier (dmb/dsb) instructions">;
+// True if the subtarget supports CLREX instructions.
def FeatureV7Clrex : SubtargetFeature<"v7clrex", "HasV7Clrex", "true",
"Has v7 clrex instruction">;
+// True if the subtarget supports DFB data barrier instruction.
def FeatureDFB : SubtargetFeature<"dfb", "HasFullDataBarrier", "true",
"Has full data barrier (dfb) instruction">;
+// True if the subtarget supports v8 atomics (LDA/LDAEX etc) instructions.
def FeatureAcquireRelease : SubtargetFeature<"acquire-release",
"HasAcquireRelease", "true",
"Has v8 acquire/release (lda/ldaex "
" etc) instructions">;
-def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true",
+// True if floating point compare + branch is slow.
+def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "IsFPBrccSlow", "true",
"FP compare + branch is slow">;
+// True if the processor supports the Performance Monitor Extensions. These
+// include a generic cycle-counter as well as more fine-grained (often
+// implementation-specific) events.
def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true",
"Enable support for Performance "
"Monitor extensions">;
// TrustZone Security Extensions
+
+// True if processor supports TrustZone security extensions.
def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true",
"Enable support for TrustZone "
"security extensions">;
+// True if processor supports ARMv8-M Security Extensions.
def Feature8MSecExt : SubtargetFeature<"8msecext", "Has8MSecExt", "true",
"Enable support for ARMv8-M "
"Security Extensions">;
+// True if processor supports SHA1 and SHA256.
def FeatureSHA2 : SubtargetFeature<"sha2", "HasSHA2", "true",
"Enable SHA1 and SHA256 support", [FeatureNEON]>;
def FeatureAES : SubtargetFeature<"aes", "HasAES", "true",
"Enable AES support", [FeatureNEON]>;
+// True if processor supports Cryptography extensions.
def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
"Enable support for "
"Cryptography extensions",
[FeatureNEON, FeatureSHA2, FeatureAES]>;
+// True if processor supports CRC instructions.
def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true",
"Enable support for CRC instructions">;
+// True if the ARMv8.2A dot product instructions are supported.
def FeatureDotProd : SubtargetFeature<"dotprod", "HasDotProd", "true",
"Enable support for dot product instructions",
[FeatureNEON]>;
-// Not to be confused with FeatureHasRetAddrStack (return address stack)
+// True if the processor supports RAS extensions.
+// Not to be confused with FeatureHasRetAddrStack (return address stack).
def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true",
"Enable Reliability, Availability "
"and Serviceability extensions">;
-// Fast computation of non-negative address offsets
+// Fast computation of non-negative address offsets.
+// True if processor does positive address offset computation faster.
def FeatureFPAO : SubtargetFeature<"fpao", "HasFPAO", "true",
"Enable fast computation of "
"positive address offsets">;
-// Fast execution of AES crypto operations
+// Fast execution of AES crypto operations.
+// True if processor executes back to back AES instruction pairs faster.
def FeatureFuseAES : SubtargetFeature<"fuse-aes", "HasFuseAES", "true",
"CPU fuses AES crypto operations">;
-// Fast execution of bottom and top halves of literal generation
+// Fast execution of bottom and top halves of literal generation.
+// True if processor executes back to back bottom and top halves of literal generation faster.
def FeatureFuseLiterals : SubtargetFeature<"fuse-literals", "HasFuseLiterals", "true",
"CPU fuses literal generation operations">;
-// The way of reading thread pointer
-def FeatureReadTp : SubtargetFeature<"read-tp-hard", "ReadTPHard", "true",
+// The way of reading thread pointer.
+// True if read thread pointer from coprocessor register.
+def FeatureReadTp : SubtargetFeature<"read-tp-hard", "IsReadTPHard", "true",
"Reading thread pointer from register">;
// Cyclone can zero VFP registers in 0 cycles.
+// True if the instructions "vmov.i32 d0, #0" and "vmov.i32 q0, #0" are
+// particularly effective at zeroing a VFP register.
def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
"Has zero-cycle zeroing instructions">;
-// Whether it is profitable to unpredicate certain instructions during if-conversion
+// Whether it is profitable to unpredicate certain instructions during if-conversion.
+// True if if conversion may decide to leave some instructions unpredicated.
def FeatureProfUnpredicate : SubtargetFeature<"prof-unpr",
"IsProfitableToUnpredicate", "true",
"Is profitable to unpredicate">;
// Some targets (e.g. Swift) have microcoded VGETLNi32.
+// True if VMOV will be favored over VGETLNi32.
def FeatureSlowVGETLNi32 : SubtargetFeature<"slow-vgetlni32",
"HasSlowVGETLNi32", "true",
"Has slow VGETLNi32 - prefer VMOV">;
// Some targets (e.g. Swift) have microcoded VDUP32.
+// True if VMOV will be favored over VDUP.
def FeatureSlowVDUP32 : SubtargetFeature<"slow-vdup32", "HasSlowVDUP32",
"true",
"Has slow VDUP32 - prefer VMOV">;
// Some targets (e.g. Cortex-A9) prefer VMOVSR to VMOVDRR even when using NEON
// for scalar FP, as this allows more effective execution domain optimization.
+// True if VMOVSR will be favored over VMOVDRR.
def FeaturePreferVMOVSR : SubtargetFeature<"prefer-vmovsr", "PreferVMOVSR",
"true", "Prefer VMOVSR">;
// Swift has ISHST barriers compatible with Atomic Release semantics but weaker
-// than ISH
-def FeaturePrefISHSTBarrier : SubtargetFeature<"prefer-ishst", "PreferISHST",
+// than ISH.
+// True if ISHST barriers will be used for Release semantics.
+def FeaturePrefISHSTBarrier : SubtargetFeature<"prefer-ishst", "PreferISHSTBarriers",
"true", "Prefer ISHST barriers">;
// Some targets (e.g. Cortex-A9) have muxed AGU and NEON/FPU.
+// True if the AGU and NEON/FPU units are multiplexed.
def FeatureMuxedUnits : SubtargetFeature<"muxed-units", "HasMuxedUnits",
"true",
"Has muxed AGU and NEON/FPU">;
// Whether VLDM/VSTM starting with odd register number need more microops
-// than single VLDRS
-def FeatureSlowOddRegister : SubtargetFeature<"slow-odd-reg", "SlowOddRegister",
+// than single VLDRS.
+// True if a VLDM/VSTM starting with an odd register number is considered to
+// take more microops than single VLDRS/VSTRS.
+def FeatureSlowOddRegister : SubtargetFeature<"slow-odd-reg", "HasSlowOddRegister",
"true", "VLDM/VSTM starting "
"with an odd register is slow">;
// Some targets have a renaming dependency when loading into D subregisters.
+// True if loading into a D subregister will be penalized.
def FeatureSlowLoadDSubreg : SubtargetFeature<"slow-load-D-subreg",
- "SlowLoadDSubregister", "true",
+ "HasSlowLoadDSubregister", "true",
"Loading into D subregs is slow">;
+// True if use a wider stride when allocating VFP registers.
def FeatureUseWideStrideVFP : SubtargetFeature<"wide-stride-vfp",
"UseWideStrideVFP", "true",
"Use a wide stride when allocating VFP registers">;
// Some targets (e.g. Cortex-A15) never want VMOVS to be widened to VMOVD.
+// True if VMOVS will never be widened to VMOVD.
def FeatureDontWidenVMOVS : SubtargetFeature<"dont-widen-vmovs",
"DontWidenVMOVS", "true",
"Don't widen VMOVS to VMOVD">;
// Some targets (e.g. Cortex-A15) prefer to avoid mixing operations on different
// VFP register widths.
+// True if splat a register between VFP and NEON instructions.
def FeatureSplatVFPToNeon : SubtargetFeature<"splat-vfp-neon",
- "SplatVFPToNeon", "true",
+ "UseSplatVFPToNeon", "true",
"Splat register from VFP to NEON",
[FeatureDontWidenVMOVS]>;
// Whether or not it is profitable to expand VFP/NEON MLA/MLS instructions.
+// True if run the MLx expansion pass.
def FeatureExpandMLx : SubtargetFeature<"expand-fp-mlx",
"ExpandMLx", "true",
"Expand VFP/NEON MLA/MLS instructions">;
// Some targets have special RAW hazards for VFP/NEON VMLA/VMLS.
+// True if VFP/NEON VMLA/VMLS have special RAW hazards.
def FeatureHasVMLxHazards : SubtargetFeature<"vmlx-hazards", "HasVMLxHazards",
"true", "Has VMLx hazards">;
// Some targets (e.g. Cortex-A9) want to convert VMOVRS, VMOVSR and VMOVS from
// VFP to NEON, as an execution domain optimization.
+// True if VMOVRS, VMOVSR and VMOVS will be converted from VFP to NEON.
def FeatureNEONForFPMovs : SubtargetFeature<"neon-fpmovs",
"UseNEONForFPMovs", "true",
"Convert VMOVSR, VMOVRS, "
@@ -281,18 +330,21 @@ def FeatureNEONForFPMovs : SubtargetFeature<"neon-fpmovs",
// Some processors benefit from using NEON instructions for scalar
// single-precision FP operations. This affects instruction selection and should
// only be enabled if the handling of denormals is not important.
+// Use the method useNEONForSinglePrecisionFP() to determine if NEON should actually be used.
def FeatureNEONForFP : SubtargetFeature<"neonfp",
- "UseNEONForSinglePrecisionFP",
+ "HasNEONForFP",
"true",
"Use NEON for single precision FP">;
// On some processors, VLDn instructions that access unaligned data take one
// extra cycle. Take that into account when computing operand latencies.
-def FeatureCheckVLDnAlign : SubtargetFeature<"vldn-align", "CheckVLDnAlign",
+// True if VLDn instructions take an extra cycle for unaligned accesses.
+def FeatureCheckVLDnAlign : SubtargetFeature<"vldn-align", "CheckVLDnAccessAlignment",
"true",
"Check for VLDn unaligned access">;
// Some processors have a nonpipelined VFP coprocessor.
+// True if VFP instructions are not pipelined.
def FeatureNonpipelinedVFP : SubtargetFeature<"nonpipelined-vfp",
"NonpipelinedVFP", "true",
"VFP instructions are not pipelined">;
@@ -300,20 +352,27 @@ def FeatureNonpipelinedVFP : SubtargetFeature<"nonpipelined-vfp",
// Some processors have FP multiply-accumulate instructions that don't
// play nicely with other VFP / NEON instructions, and it's generally better
// to just not use them.
+// If the VFP2 / NEON instructions are available, indicates
+// whether the FP VML[AS] instructions are slow (if so, don't use them).
def FeatureHasSlowFPVMLx : SubtargetFeature<"slowfpvmlx", "SlowFPVMLx", "true",
"Disable VFP / NEON MAC instructions">;
-// VFPv4 added VFMA instructions that can similar be fast or slow.
+// VFPv4 added VFMA instructions that can similarly be fast or slow.
+// If the VFP4 / NEON instructions are available, indicates
+// whether the FP VFM[AS] instructions are slow (if so, don't use them).
def FeatureHasSlowFPVFMx : SubtargetFeature<"slowfpvfmx", "SlowFPVFMx", "true",
"Disable VFP / NEON FMA instructions">;
// Cortex-A8 / A9 Advanced SIMD has multiplier accumulator forwarding.
+/// True if NEON has special multiplier accumulator
+/// forwarding to allow mul + mla being issued back to back.
def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding",
"HasVMLxForwarding", "true",
"Has multiplier accumulator forwarding">;
// Disable 32-bit to 16-bit narrowing for experimentation.
-def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true",
+// True if codegen would prefer 32-bit Thumb instructions over 16-bit ones.
+def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Prefers32BitThumb", "true",
"Prefer 32-bit Thumb instrs">;
def FeaturePrefLoopAlign32 : SubtargetFeature<"loop-align", "PrefLoopLogAlignment","2",
@@ -332,17 +391,22 @@ def FeatureMVEVectorCostFactor4 : SubtargetFeature<"mve4beat", "MVEVectorCostFac
/// out-of-order implementation, e.g. Cortex-A9, unless each individual bit is
/// mapped to a separate physical register. Avoid partial CPSR update for these
/// processors.
+/// True if codegen would avoid using instructions
+/// that partially update CPSR and add false dependency on the previous
+/// CPSR setting instruction.
def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr",
"AvoidCPSRPartialUpdate", "true",
"Avoid CPSR partial update for OOO execution">;
/// Disable +1 predication cost for instructions updating CPSR.
/// Enabled for Cortex-A57.
+/// True if disable +1 predication cost for instructions updating CPSR. Enabled for Cortex-A57.
def FeatureCheapPredicableCPSR : SubtargetFeature<"cheap-predicable-cpsr",
"CheapPredicableCPSRDef",
"true",
"Disable +1 predication cost for instructions updating CPSR">;
+// True if codegen should avoid using flag setting movs with shifter operand (i.e. asr, lsl, lsr).
def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop",
"AvoidMOVsShifterOperand", "true",
"Avoid movs instructions with "
@@ -357,16 +421,20 @@ def FeatureHasRetAddrStack : SubtargetFeature<"ret-addr-stack",
// Some processors have no branch predictor, which changes the expected cost of
// taking a branch which affects the choice of whether to use predicated
// instructions.
+// True if the subtarget has a branch predictor. Having
+// a branch predictor or not changes the expected cost of taking a branch
+// which affects the choice of whether to use predicated instructions.
def FeatureHasNoBranchPredictor : SubtargetFeature<"no-branch-predictor",
"HasBranchPredictor", "false",
"Has no branch predictor">;
/// DSP extension.
+/// True if the subtarget supports the DSP (saturating arith and such) instructions.
def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true",
"Supports DSP instructions in "
"ARM and/or Thumb2">;
-// Multiprocessing extension.
+// True if the subtarget supports Multiprocessing extension (ARMv7 only).
def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true",
"Supports Multiprocessing extension">;
@@ -378,31 +446,42 @@ def FeatureVirtualization : SubtargetFeature<"virtualization",
// Special TRAP encoding for NaCl, which looks like a TRAP in Thumb too.
// See ARMInstrInfo.td for details.
+// True if NaCl TRAP instruction is generated instead of the regular TRAP.
def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true",
"NaCl trap">;
+// True if the subtarget disallows unaligned memory
+// accesses for some types. For details, see
+// ARMTargetLowering::allowsMisalignedMemoryAccesses().
def FeatureStrictAlign : SubtargetFeature<"strict-align",
"StrictAlign", "true",
"Disallow all unaligned memory "
"access">;
+// Generate calls via indirect call instructions.
def FeatureLongCalls : SubtargetFeature<"long-calls", "GenLongCalls", "true",
"Generate calls via indirect call "
"instructions">;
+// Generate code that does not contain data access to code sections.
def FeatureExecuteOnly : SubtargetFeature<"execute-only",
"GenExecuteOnly", "true",
"Enable the generation of "
"execute only code.">;
+// True if R9 is not available as a general purpose register.
def FeatureReserveR9 : SubtargetFeature<"reserve-r9", "ReserveR9", "true",
"Reserve R9, making it unavailable"
" as GPR">;
+// True if MOVT / MOVW pairs are not used for materialization of
+// 32-bit imms (including global addresses).
def FeatureNoMovt : SubtargetFeature<"no-movt", "NoMovt", "true",
"Don't use movt/movw pairs for "
"32-bit imms">;
+/// Implicitly convert an instruction to a different one if its immediates
+/// cannot be encoded. For example, ADD r0, r1, #FFFFFFFF -> SUB r0, r1, #1.
def FeatureNoNegativeImmediates
: SubtargetFeature<"no-neg-immediates",
"NegativeImmediates", "false",
@@ -415,28 +494,39 @@ def FeatureNoNegativeImmediates
def FeatureUseMISched: SubtargetFeature<"use-misched", "UseMISched", "true",
"Use the MachineScheduler">;
+// Use the MachinePipeliner for instruction scheduling for the subtarget.
+def FeatureUseMIPipeliner: SubtargetFeature<"use-mipipeliner", "UseMIPipeliner", "true",
+ "Use the MachinePipeliner">;
+
+// False if scheduling should happen again after register allocation.
def FeatureNoPostRASched : SubtargetFeature<"disable-postra-scheduler",
"DisablePostRAScheduler", "true",
"Don't schedule again after register allocation">;
// Armv8.5-A extensions
+// Has speculation barrier.
def FeatureSB : SubtargetFeature<"sb", "HasSB", "true",
"Enable v8.5a Speculation Barrier" >;
// Armv8.6-A extensions
+
+// True if subtarget supports BFloat16 floating point operations.
def FeatureBF16 : SubtargetFeature<"bf16", "HasBF16", "true",
"Enable support for BFloat16 instructions", [FeatureNEON]>;
+// True if subtarget supports 8-bit integer matrix multiply.
def FeatureMatMulInt8 : SubtargetFeature<"i8mm", "HasMatMulInt8",
"true", "Enable Matrix Multiply Int8 Extension", [FeatureNEON]>;
// Armv8.1-M extensions
+// True if the processor supports the Low Overhead Branch extension.
def FeatureLOB : SubtargetFeature<"lob", "HasLOB", "true",
"Enable Low Overhead Branch "
"extensions">;
+// Mitigate against the cve-2021-35465 security vulnurability.
def FeatureFixCMSE_CVE_2021_35465 : SubtargetFeature<"fix-cmse-cve-2021-35465",
"FixCMSE_CVE_2021_35465", "true",
"Mitigate against the cve-2021-35465 "
@@ -446,11 +536,26 @@ def FeaturePACBTI : SubtargetFeature<"pacbti", "HasPACBTI", "true",
"Enable Pointer Authentication and Branch "
"Target Identification">;
+/// Don't place a BTI instruction after return-twice constructs (setjmp).
def FeatureNoBTIAtReturnTwice : SubtargetFeature<"no-bti-at-return-twice",
"NoBTIAtReturnTwice", "true",
"Don't place a BTI instruction "
"after a return-twice">;
+def FeatureFixCortexA57AES1742098 : SubtargetFeature<"fix-cortex-a57-aes-1742098",
+ "FixCortexA57AES1742098", "true",
+ "Work around Cortex-A57 Erratum 1742098 / Cortex-A72 Erratum 1655431 (AES)">;
+
+def FeatureAAPCSFrameChain : SubtargetFeature<"aapcs-frame-chain",
+ "CreateAAPCSFrameChain", "true",
+ "Create an AAPCS compliant frame chain">;
+
+def FeatureAAPCSFrameChainLeaf : SubtargetFeature<"aapcs-frame-chain-leaf",
+ "CreateAAPCSFrameChainLeaf", "true",
+ "Create an AAPCS compliant frame chain "
+ "for leaf functions",
+ [FeatureAAPCSFrameChain]>;
+
//===----------------------------------------------------------------------===//
// ARM architecture class
//
@@ -467,16 +572,18 @@ def FeatureRClass : SubtargetFeature<"rclass", "ARMProcClass", "RClass",
def FeatureMClass : SubtargetFeature<"mclass", "ARMProcClass", "MClass",
"Is microcontroller profile ('M' series)">;
-
+// True if Thumb2 instructions are supported.
def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true",
"Enable Thumb2 instructions">;
+// True if subtarget does not support ARM mode execution.
def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true",
"Does not support ARM mode execution">;
//===----------------------------------------------------------------------===//
// ARM ISAa.
//
+// Specify whether target support specific ARM ISA variants.
def HasV4TOps : SubtargetFeature<"v4t", "HasV4TOps", "true",
"Support ARM v4T instructions">;
@@ -599,13 +706,16 @@ foreach i = {0-7} in
// Control codegen mitigation against Straight Line Speculation vulnerability.
//===----------------------------------------------------------------------===//
+/// Harden against Straight Line Speculation for Returns and Indirect Branches.
def FeatureHardenSlsRetBr : SubtargetFeature<"harden-sls-retbr",
"HardenSlsRetBr", "true",
"Harden against straight line speculation across RETurn and BranchRegister "
"instructions">;
+/// Harden against Straight Line Speculation for indirect calls.
def FeatureHardenSlsBlr : SubtargetFeature<"harden-sls-blr",
"HardenSlsBlr", "true",
"Harden against straight line speculation across indirect calls">;
+/// Generate thunk code for SLS mitigation in the normal text section.
def FeatureHardenSlsNoComdat : SubtargetFeature<"harden-sls-nocomdat",
"HardenSlsNoComdat", "true",
"Generate thunk code for SLS mitigation in the normal text section">;
@@ -1303,6 +1413,7 @@ def : ProcessorModel<"cortex-m4", CortexM4Model, [ARMv7em,
def : ProcessorModel<"cortex-m7", CortexM7Model, [ARMv7em,
ProcM7,
FeatureFPARMv8_D16,
+ FeatureUseMIPipeliner,
FeatureUseMISched]>;
def : ProcNoItin<"cortex-m23", [ARMv8mBaseline,
@@ -1370,13 +1481,15 @@ def : ProcessorModel<"cortex-a57", CortexA57Model, [ARMv8a, ProcA57,
FeatureCRC,
FeatureFPAO,
FeatureAvoidPartialCPSR,
- FeatureCheapPredicableCPSR]>;
+ FeatureCheapPredicableCPSR,
+ FeatureFixCortexA57AES1742098]>;
def : ProcessorModel<"cortex-a72", CortexA57Model, [ARMv8a, ProcA72,
FeatureHWDivThumb,
FeatureHWDivARM,
FeatureCrypto,
- FeatureCRC]>;
+ FeatureCRC,
+ FeatureFixCortexA57AES1742098]>;
def : ProcNoItin<"cortex-a73", [ARMv8a, ProcA73,
FeatureHWDivThumb,
diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index fa09b2567aa9..4aa28bc5d28d 100644
--- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -161,10 +161,10 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
: COFF::IMAGE_SYM_CLASS_EXTERNAL;
int Type = COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT;
- OutStreamer->BeginCOFFSymbolDef(CurrentFnSym);
- OutStreamer->EmitCOFFSymbolStorageClass(Scl);
- OutStreamer->EmitCOFFSymbolType(Type);
- OutStreamer->EndCOFFSymbolDef();
+ OutStreamer->beginCOFFSymbolDef(CurrentFnSym);
+ OutStreamer->emitCOFFSymbolStorageClass(Scl);
+ OutStreamer->emitCOFFSymbolType(Type);
+ OutStreamer->endCOFFSymbolDef();
}
// Emit the rest of the function body.
@@ -535,27 +535,27 @@ void ARMAsmPrinter::emitEndOfAsmFile(Module &M) {
if (!Stubs.empty()) {
// Switch with ".non_lazy_symbol_pointer" directive.
- OutStreamer->SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
+ OutStreamer->switchSection(TLOFMacho.getNonLazySymbolPointerSection());
emitAlignment(Align(4));
for (auto &Stub : Stubs)
emitNonLazySymbolPointer(*OutStreamer, Stub.first, Stub.second);
Stubs.clear();
- OutStreamer->AddBlankLine();
+ OutStreamer->addBlankLine();
}
Stubs = MMIMacho.GetThreadLocalGVStubList();
if (!Stubs.empty()) {
// Switch with ".non_lazy_symbol_pointer" directive.
- OutStreamer->SwitchSection(TLOFMacho.getThreadLocalPointerSection());
+ OutStreamer->switchSection(TLOFMacho.getThreadLocalPointerSection());
emitAlignment(Align(4));
for (auto &Stub : Stubs)
emitNonLazySymbolPointer(*OutStreamer, Stub.first, Stub.second);
Stubs.clear();
- OutStreamer->AddBlankLine();
+ OutStreamer->addBlankLine();
}
// Funny Darwin hack: This flag tells the linker that no global symbols
@@ -740,55 +740,53 @@ void ARMAsmPrinter::emitAttributes() {
ATS.emitAttribute(ARMBuildAttrs::ABI_FP_16bit_format,
ARMBuildAttrs::FP16FormatIEEE);
- if (MMI) {
- if (const Module *SourceModule = MMI->getModule()) {
- // ABI_PCS_wchar_t to indicate wchar_t width
- // FIXME: There is no way to emit value 0 (wchar_t prohibited).
- if (auto WCharWidthValue = mdconst::extract_or_null<ConstantInt>(
- SourceModule->getModuleFlag("wchar_size"))) {
- int WCharWidth = WCharWidthValue->getZExtValue();
- assert((WCharWidth == 2 || WCharWidth == 4) &&
- "wchar_t width must be 2 or 4 bytes");
- ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_wchar_t, WCharWidth);
- }
+ if (const Module *SourceModule = MMI->getModule()) {
+ // ABI_PCS_wchar_t to indicate wchar_t width
+ // FIXME: There is no way to emit value 0 (wchar_t prohibited).
+ if (auto WCharWidthValue = mdconst::extract_or_null<ConstantInt>(
+ SourceModule->getModuleFlag("wchar_size"))) {
+ int WCharWidth = WCharWidthValue->getZExtValue();
+ assert((WCharWidth == 2 || WCharWidth == 4) &&
+ "wchar_t width must be 2 or 4 bytes");
+ ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_wchar_t, WCharWidth);
+ }
- // ABI_enum_size to indicate enum width
- // FIXME: There is no way to emit value 0 (enums prohibited) or value 3
- // (all enums contain a value needing 32 bits to encode).
- if (auto EnumWidthValue = mdconst::extract_or_null<ConstantInt>(
- SourceModule->getModuleFlag("min_enum_size"))) {
- int EnumWidth = EnumWidthValue->getZExtValue();
- assert((EnumWidth == 1 || EnumWidth == 4) &&
- "Minimum enum width must be 1 or 4 bytes");
- int EnumBuildAttr = EnumWidth == 1 ? 1 : 2;
- ATS.emitAttribute(ARMBuildAttrs::ABI_enum_size, EnumBuildAttr);
- }
+ // ABI_enum_size to indicate enum width
+ // FIXME: There is no way to emit value 0 (enums prohibited) or value 3
+ // (all enums contain a value needing 32 bits to encode).
+ if (auto EnumWidthValue = mdconst::extract_or_null<ConstantInt>(
+ SourceModule->getModuleFlag("min_enum_size"))) {
+ int EnumWidth = EnumWidthValue->getZExtValue();
+ assert((EnumWidth == 1 || EnumWidth == 4) &&
+ "Minimum enum width must be 1 or 4 bytes");
+ int EnumBuildAttr = EnumWidth == 1 ? 1 : 2;
+ ATS.emitAttribute(ARMBuildAttrs::ABI_enum_size, EnumBuildAttr);
+ }
- auto *PACValue = mdconst::extract_or_null<ConstantInt>(
- SourceModule->getModuleFlag("sign-return-address"));
- if (PACValue && PACValue->getZExtValue() == 1) {
- // If "+pacbti" is used as an architecture extension,
- // Tag_PAC_extension is emitted in
- // ARMTargetStreamer::emitTargetAttributes().
- if (!STI.hasPACBTI()) {
- ATS.emitAttribute(ARMBuildAttrs::PAC_extension,
- ARMBuildAttrs::AllowPACInNOPSpace);
- }
- ATS.emitAttribute(ARMBuildAttrs::PACRET_use, ARMBuildAttrs::PACRETUsed);
+ auto *PACValue = mdconst::extract_or_null<ConstantInt>(
+ SourceModule->getModuleFlag("sign-return-address"));
+ if (PACValue && PACValue->getZExtValue() == 1) {
+ // If "+pacbti" is used as an architecture extension,
+ // Tag_PAC_extension is emitted in
+ // ARMTargetStreamer::emitTargetAttributes().
+ if (!STI.hasPACBTI()) {
+ ATS.emitAttribute(ARMBuildAttrs::PAC_extension,
+ ARMBuildAttrs::AllowPACInNOPSpace);
}
+ ATS.emitAttribute(ARMBuildAttrs::PACRET_use, ARMBuildAttrs::PACRETUsed);
+ }
- auto *BTIValue = mdconst::extract_or_null<ConstantInt>(
- SourceModule->getModuleFlag("branch-target-enforcement"));
- if (BTIValue && BTIValue->getZExtValue() == 1) {
- // If "+pacbti" is used as an architecture extension,
- // Tag_BTI_extension is emitted in
- // ARMTargetStreamer::emitTargetAttributes().
- if (!STI.hasPACBTI()) {
- ATS.emitAttribute(ARMBuildAttrs::BTI_extension,
- ARMBuildAttrs::AllowBTIInNOPSpace);
- }
- ATS.emitAttribute(ARMBuildAttrs::BTI_use, ARMBuildAttrs::BTIUsed);
+ auto *BTIValue = mdconst::extract_or_null<ConstantInt>(
+ SourceModule->getModuleFlag("branch-target-enforcement"));
+ if (BTIValue && BTIValue->getZExtValue() == 1) {
+ // If "+pacbti" is used as an architecture extension,
+ // Tag_BTI_extension is emitted in
+ // ARMTargetStreamer::emitTargetAttributes().
+ if (!STI.hasPACBTI()) {
+ ATS.emitAttribute(ARMBuildAttrs::BTI_extension,
+ ARMBuildAttrs::AllowBTIInNOPSpace);
}
+ ATS.emitAttribute(ARMBuildAttrs::BTI_use, ARMBuildAttrs::BTIUsed);
}
}
@@ -2276,6 +2274,47 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) {
EmitToStreamer(*OutStreamer, TmpInstSB);
return;
}
+
+ case ARM::SEH_StackAlloc:
+ ATS.emitARMWinCFIAllocStack(MI->getOperand(0).getImm(),
+ MI->getOperand(1).getImm());
+ return;
+
+ case ARM::SEH_SaveRegs:
+ case ARM::SEH_SaveRegs_Ret:
+ ATS.emitARMWinCFISaveRegMask(MI->getOperand(0).getImm(),
+ MI->getOperand(1).getImm());
+ return;
+
+ case ARM::SEH_SaveSP:
+ ATS.emitARMWinCFISaveSP(MI->getOperand(0).getImm());
+ return;
+
+ case ARM::SEH_SaveFRegs:
+ ATS.emitARMWinCFISaveFRegs(MI->getOperand(0).getImm(),
+ MI->getOperand(1).getImm());
+ return;
+
+ case ARM::SEH_SaveLR:
+ ATS.emitARMWinCFISaveLR(MI->getOperand(0).getImm());
+ return;
+
+ case ARM::SEH_Nop:
+ case ARM::SEH_Nop_Ret:
+ ATS.emitARMWinCFINop(MI->getOperand(0).getImm());
+ return;
+
+ case ARM::SEH_PrologEnd:
+ ATS.emitARMWinCFIPrologEnd(/*Fragment=*/false);
+ return;
+
+ case ARM::SEH_EpilogStart:
+ ATS.emitARMWinCFIEpilogStart(ARMCC::AL);
+ return;
+
+ case ARM::SEH_EpilogEnd:
+ ATS.emitARMWinCFIEpilogEnd();
+ return;
}
MCInst TmpInst;
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 5b0bae4d9274..80ba7b5f0d2e 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -343,6 +343,13 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineInstr &MI, LiveVariables *LV,
}
// Branch analysis.
+// Cond vector output format:
+// 0 elements indicates an unconditional branch
+// 2 elements indicates a conditional branch; the elements are
+// the condition to check and the CPSR.
+// 3 elements indicates a hardware loop end; the elements
+// are the opcode, the operand value to test, and a dummy
+// operand used to pad out to 3 operands.
bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
@@ -394,6 +401,17 @@ bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
} else if (I->isReturn()) {
// Returns can't be analyzed, but we should run cleanup.
CantAnalyze = true;
+ } else if (I->getOpcode() == ARM::t2LoopEnd &&
+ MBB.getParent()
+ ->getSubtarget<ARMSubtarget>()
+ .enableMachinePipeliner()) {
+ if (!Cond.empty())
+ return true;
+ FBB = TBB;
+ TBB = I->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(I->getOpcode()));
+ Cond.push_back(I->getOperand(0));
+ Cond.push_back(MachineOperand::CreateImm(0));
} else {
// We encountered other unrecognized terminator. Bail out immediately.
return true;
@@ -457,7 +475,7 @@ unsigned ARMBaseInstrInfo::removeBranch(MachineBasicBlock &MBB,
return 0;
if (!isUncondBranchOpcode(I->getOpcode()) &&
- !isCondBranchOpcode(I->getOpcode()))
+ !isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd)
return 0;
// Remove the branch.
@@ -467,7 +485,7 @@ unsigned ARMBaseInstrInfo::removeBranch(MachineBasicBlock &MBB,
if (I == MBB.begin()) return 1;
--I;
- if (!isCondBranchOpcode(I->getOpcode()))
+ if (!isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd)
return 1;
// Remove the branch.
@@ -491,8 +509,8 @@ unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB,
// Shouldn't be a fall through.
assert(TBB && "insertBranch must not be told to insert a fallthrough");
- assert((Cond.size() == 2 || Cond.size() == 0) &&
- "ARM branch conditions have two components!");
+ assert((Cond.size() == 2 || Cond.size() == 0 || Cond.size() == 3) &&
+ "ARM branch conditions have two or three components!");
// For conditional branches, we use addOperand to preserve CPSR flags.
@@ -502,19 +520,24 @@ unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB,
BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).add(predOps(ARMCC::AL));
else
BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
- } else
+ } else if (Cond.size() == 2) {
BuildMI(&MBB, DL, get(BccOpc))
.addMBB(TBB)
.addImm(Cond[0].getImm())
.add(Cond[1]);
+ } else
+ BuildMI(&MBB, DL, get(Cond[0].getImm())).add(Cond[1]).addMBB(TBB);
return 1;
}
// Two-way conditional branch.
- BuildMI(&MBB, DL, get(BccOpc))
- .addMBB(TBB)
- .addImm(Cond[0].getImm())
- .add(Cond[1]);
+ if (Cond.size() == 2)
+ BuildMI(&MBB, DL, get(BccOpc))
+ .addMBB(TBB)
+ .addImm(Cond[0].getImm())
+ .add(Cond[1]);
+ else if (Cond.size() == 3)
+ BuildMI(&MBB, DL, get(Cond[0].getImm())).add(Cond[1]).addMBB(TBB);
if (isThumb)
BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL));
else
@@ -524,9 +547,12 @@ unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB,
bool ARMBaseInstrInfo::
reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
- ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
- Cond[0].setImm(ARMCC::getOppositeCondition(CC));
- return false;
+ if (Cond.size() == 2) {
+ ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
+ Cond[0].setImm(ARMCC::getOppositeCondition(CC));
+ return false;
+ }
+ return true;
}
bool ARMBaseInstrInfo::isPredicated(const MachineInstr &MI) const {
@@ -556,7 +582,7 @@ std::string ARMBaseInstrInfo::createMIROperandComment(
return GenericComment;
// If not, check if we have an immediate operand.
- if (Op.getType() != MachineOperand::MO_Immediate)
+ if (!Op.isImm())
return std::string();
// And print its corresponding condition code if the immediate is a
@@ -1703,7 +1729,7 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
// or some other super-register.
int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD);
if (ImpDefIdx != -1)
- MI.RemoveOperand(ImpDefIdx);
+ MI.removeOperand(ImpDefIdx);
// Change the opcode and operands.
MI.setDesc(get(ARM::VMOVD));
@@ -2045,6 +2071,9 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
if (MI.getOpcode() == TargetOpcode::INLINEASM_BR)
return true;
+ if (isSEHInstruction(MI))
+ return true;
+
// Treat the start of the IT block as a scheduling boundary, but schedule
// t2IT along with all instructions following it.
// FIXME: This is a big hammer. But the alternative is to add all potential
@@ -2598,7 +2627,7 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
// ahead: strip all existing registers off and add them back again
// in the right order.
for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
- MI->RemoveOperand(i);
+ MI->removeOperand(i);
// Add the complete list back in.
MachineInstrBuilder MIB(MF, &*MI);
@@ -2626,7 +2655,7 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
// Turn it into a move.
MI.setDesc(TII.get(ARM::MOVr));
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
- MI.RemoveOperand(FrameRegIdx+1);
+ MI.removeOperand(FrameRegIdx+1);
Offset = 0;
return true;
} else if (Offset < 0) {
@@ -5103,7 +5132,7 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI,
SrcReg = MI.getOperand(1).getReg();
for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
- MI.RemoveOperand(i - 1);
+ MI.removeOperand(i - 1);
// Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)
MI.setDesc(get(ARM::VORRd));
@@ -5122,7 +5151,7 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI,
SrcReg = MI.getOperand(1).getReg();
for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
- MI.RemoveOperand(i - 1);
+ MI.removeOperand(i - 1);
DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane);
@@ -5155,7 +5184,7 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI,
break;
for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
- MI.RemoveOperand(i - 1);
+ MI.removeOperand(i - 1);
// Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps)
// Again DDst may be undefined at the beginning of this instruction.
@@ -5190,7 +5219,7 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI,
break;
for (unsigned i = MI.getDesc().getNumOperands(); i; --i)
- MI.RemoveOperand(i - 1);
+ MI.removeOperand(i - 1);
if (DSrc == DDst) {
// Destination can be:
@@ -5766,26 +5795,25 @@ struct OutlinerCosts {
SaveRestoreLROnStack(target.isThumb() ? 8 : 8) {}
};
-unsigned
-ARMBaseInstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
- assert(C.LRUWasSet && "LRU wasn't set?");
+Register
+ARMBaseInstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {
MachineFunction *MF = C.getMF();
- const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo *>(
- MF->getSubtarget().getRegisterInfo());
+ const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
+ const ARMBaseRegisterInfo *ARI =
+ static_cast<const ARMBaseRegisterInfo *>(&TRI);
BitVector regsReserved = ARI->getReservedRegs(*MF);
// Check if there is an available register across the sequence that we can
// use.
- for (unsigned Reg : ARM::rGPRRegClass) {
+ for (Register Reg : ARM::rGPRRegClass) {
if (!(Reg < regsReserved.size() && regsReserved.test(Reg)) &&
Reg != ARM::LR && // LR is not reserved, but don't use it.
Reg != ARM::R12 && // R12 is not guaranteed to be preserved.
- C.LRU.available(Reg) && C.UsedInSequence.available(Reg))
+ C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
+ C.isAvailableInsideSeq(Reg, TRI))
return Reg;
}
-
- // No suitable register. Return 0.
- return 0u;
+ return Register();
}
// Compute liveness of LR at the point after the interval [I, E), which
@@ -5833,9 +5861,8 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
// Compute liveness information for each candidate, and set FlagsSetInAll.
const TargetRegisterInfo &TRI = getRegisterInfo();
- std::for_each(
- RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
- [&FlagsSetInAll](outliner::Candidate &C) { FlagsSetInAll &= C.Flags; });
+ for (outliner::Candidate &C : RepeatedSequenceLocs)
+ FlagsSetInAll &= C.Flags;
// According to the ARM Procedure Call Standard, the following are
// undefined on entry/exit from a function call:
@@ -5854,9 +5881,7 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
// to compute liveness here.
if (C.Flags & UnsafeRegsDead)
return false;
- C.initLRU(TRI);
- LiveRegUnits LRU = C.LRU;
- return (!LRU.available(ARM::R12) || !LRU.available(ARM::CPSR));
+ return C.isAnyUnavailableAcrossOrOutOfSeq({ARM::R12, ARM::CPSR}, TRI);
};
// Are there any candidates where those registers are live?
@@ -5969,7 +5994,6 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
for (outliner::Candidate &C : RepeatedSequenceLocs) {
- C.initLRU(TRI);
// LR liveness is overestimated in return blocks, unless they end with a
// tail call.
const auto Last = C.getMBB()->rbegin();
@@ -5977,7 +6001,7 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
C.getMBB()->isReturnBlock() && !Last->isCall()
? isLRAvailable(TRI, Last,
(MachineBasicBlock::reverse_iterator)C.front())
- : C.LRU.available(ARM::LR);
+ : C.isAvailableAcrossAndOutOfSeq(ARM::LR, TRI);
if (LRIsAvailable) {
FrameID = MachineOutlinerNoLRSave;
NumBytesNoStackCalls += Costs.CallNoLRSave;
@@ -5996,7 +6020,7 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
// Is SP used in the sequence at all? If not, we don't have to modify
// the stack, so we are guaranteed to get the same frame.
- else if (C.UsedInSequence.available(ARM::SP)) {
+ else if (C.isAvailableInsideSeq(ARM::SP, TRI)) {
NumBytesNoStackCalls += Costs.CallDefault;
C.setCallInfo(MachineOutlinerDefault, Costs.CallDefault);
CandidatesWithoutStackFixups.push_back(C);
@@ -6189,8 +6213,8 @@ bool ARMBaseInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
LiveRegUnits LRU(getRegisterInfo());
- std::for_each(MBB.rbegin(), MBB.rend(),
- [&LRU](MachineInstr &MI) { LRU.accumulate(MI); });
+ for (MachineInstr &MI : llvm::reverse(MBB))
+ LRU.accumulate(MI);
// Check if each of the unsafe registers are available...
bool R12AvailableInBlock = LRU.available(ARM::R12);
@@ -6635,7 +6659,7 @@ void ARMBaseInstrInfo::buildOutlinedFrame(
MachineBasicBlock::iterator ARMBaseInstrInfo::insertOutlinedCall(
Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
- MachineFunction &MF, const outliner::Candidate &C) const {
+ MachineFunction &MF, outliner::Candidate &C) const {
MachineInstrBuilder MIB;
MachineBasicBlock::iterator CallPt;
unsigned Opc;
@@ -6726,3 +6750,122 @@ unsigned llvm::getBLXpredOpcode(const MachineFunction &MF) {
: ARM::BLX_pred;
}
+namespace {
+class ARMPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
+ MachineInstr *EndLoop, *LoopCount;
+ MachineFunction *MF;
+ const TargetInstrInfo *TII;
+
+ // Meanings of the various stuff with loop types:
+ // t2Bcc:
+ // EndLoop = branch at end of original BB that will become a kernel
+ // LoopCount = CC setter live into branch
+ // t2LoopEnd:
+ // EndLoop = branch at end of original BB
+ // LoopCount = t2LoopDec
+public:
+ ARMPipelinerLoopInfo(MachineInstr *EndLoop, MachineInstr *LoopCount)
+ : EndLoop(EndLoop), LoopCount(LoopCount),
+ MF(EndLoop->getParent()->getParent()),
+ TII(MF->getSubtarget().getInstrInfo()) {}
+
+ bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
+ // Only ignore the terminator.
+ return MI == EndLoop || MI == LoopCount;
+ }
+
+ Optional<bool> createTripCountGreaterCondition(
+ int TC, MachineBasicBlock &MBB,
+ SmallVectorImpl<MachineOperand> &Cond) override {
+
+ if (isCondBranchOpcode(EndLoop->getOpcode())) {
+ Cond.push_back(EndLoop->getOperand(1));
+ Cond.push_back(EndLoop->getOperand(2));
+ if (EndLoop->getOperand(0).getMBB() == EndLoop->getParent()) {
+ TII->reverseBranchCondition(Cond);
+ }
+ return {};
+ } else if (EndLoop->getOpcode() == ARM::t2LoopEnd) {
+ // General case just lets the unrolled t2LoopDec do the subtraction and
+ // therefore just needs to check if zero has been reached.
+ MachineInstr *LoopDec = nullptr;
+ for (auto &I : MBB.instrs())
+ if (I.getOpcode() == ARM::t2LoopDec)
+ LoopDec = &I;
+ assert(LoopDec && "Unable to find copied LoopDec");
+ // Check if we're done with the loop.
+ BuildMI(&MBB, LoopDec->getDebugLoc(), TII->get(ARM::t2CMPri))
+ .addReg(LoopDec->getOperand(0).getReg())
+ .addImm(0)
+ .addImm(ARMCC::AL)
+ .addReg(ARM::NoRegister);
+ Cond.push_back(MachineOperand::CreateImm(ARMCC::EQ));
+ Cond.push_back(MachineOperand::CreateReg(ARM::CPSR, false));
+ return {};
+ } else
+ llvm_unreachable("Unknown EndLoop");
+ }
+
+ void setPreheader(MachineBasicBlock *NewPreheader) override {}
+
+ void adjustTripCount(int TripCountAdjust) override {}
+
+ void disposed() override {}
+};
+} // namespace
+
+std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
+ARMBaseInstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const {
+ MachineBasicBlock::iterator I = LoopBB->getFirstTerminator();
+ MachineBasicBlock *Preheader = *LoopBB->pred_begin();
+ if (Preheader == LoopBB)
+ Preheader = *std::next(LoopBB->pred_begin());
+
+ if (I != LoopBB->end() && I->getOpcode() == ARM::t2Bcc) {
+ // If the branch is a Bcc, then the CPSR should be set somewhere within the
+ // block. We need to determine the reaching definition of CPSR so that
+ // it can be marked as non-pipelineable, allowing the pipeliner to force
+ // it into stage 0 or give up if it cannot or will not do so.
+ MachineInstr *CCSetter = nullptr;
+ for (auto &L : LoopBB->instrs()) {
+ if (L.isCall())
+ return nullptr;
+ if (isCPSRDefined(L))
+ CCSetter = &L;
+ }
+ if (CCSetter)
+ return std::make_unique<ARMPipelinerLoopInfo>(&*I, CCSetter);
+ else
+ return nullptr; // Unable to find the CC setter, so unable to guarantee
+ // that pipeline will work
+ }
+
+ // Recognize:
+ // preheader:
+ // %1 = t2DoopLoopStart %0
+ // loop:
+ // %2 = phi %1, <not loop>, %..., %loop
+ // %3 = t2LoopDec %2, <imm>
+ // t2LoopEnd %3, %loop
+
+ if (I != LoopBB->end() && I->getOpcode() == ARM::t2LoopEnd) {
+ for (auto &L : LoopBB->instrs())
+ if (L.isCall())
+ return nullptr;
+ else if (isVCTP(&L))
+ return nullptr;
+ Register LoopDecResult = I->getOperand(0).getReg();
+ MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
+ MachineInstr *LoopDec = MRI.getUniqueVRegDef(LoopDecResult);
+ if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec)
+ return nullptr;
+ MachineInstr *LoopStart = nullptr;
+ for (auto &J : Preheader->instrs())
+ if (J.getOpcode() == ARM::t2DoLoopStart)
+ LoopStart = &J;
+ if (!LoopStart)
+ return nullptr;
+ return std::make_unique<ARMPipelinerLoopInfo>(&*I, LoopDec);
+ }
+ return nullptr;
+}
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index defce07dd862..3b8f3403e3c3 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -360,7 +360,7 @@ public:
MachineBasicBlock::iterator
insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
MachineBasicBlock::iterator &It, MachineFunction &MF,
- const outliner::Candidate &C) const override;
+ outliner::Candidate &C) const override;
/// Enable outlining by default at -Oz.
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override;
@@ -372,10 +372,15 @@ public:
MI->getOpcode() == ARM::t2WhileLoopStartTP;
}
+ /// Analyze loop L, which must be a single-basic-block loop, and if the
+ /// conditions can be understood enough produce a PipelinerLoopInfo object.
+ std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
+ analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override;
+
private:
/// Returns an unused general-purpose register which can be used for
/// constructing an outlined call if one exists. Returns 0 otherwise.
- unsigned findRegisterToSaveLRTo(const outliner::Candidate &C) const;
+ Register findRegisterToSaveLRTo(outliner::Candidate &C) const;
/// Adds an instruction which saves the link register on top of the stack into
/// the MachineBasicBlock \p MBB at position \p It. If \p Auth is true,
@@ -752,6 +757,26 @@ static inline bool isValidCoprocessorNumber(unsigned Num,
return true;
}
+static inline bool isSEHInstruction(const MachineInstr &MI) {
+ unsigned Opc = MI.getOpcode();
+ switch (Opc) {
+ case ARM::SEH_StackAlloc:
+ case ARM::SEH_SaveRegs:
+ case ARM::SEH_SaveRegs_Ret:
+ case ARM::SEH_SaveSP:
+ case ARM::SEH_SaveFRegs:
+ case ARM::SEH_SaveLR:
+ case ARM::SEH_Nop:
+ case ARM::SEH_Nop_Ret:
+ case ARM::SEH_PrologEnd:
+ case ARM::SEH_EpilogStart:
+ case ARM::SEH_EpilogEnd:
+ return true;
+ default:
+ return false;
+ }
+}
+
/// getInstrPredicate - If instruction is predicated, returns its predicate
/// condition, otherwise returns AL. It also returns the condition code
/// register by reference.
diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index c543d02ff75a..1d0e743b94db 100644
--- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -63,28 +63,26 @@ const MCPhysReg*
ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
const ARMSubtarget &STI = MF->getSubtarget<ARMSubtarget>();
bool UseSplitPush = STI.splitFramePushPop(*MF);
- const MCPhysReg *RegList =
- STI.isTargetDarwin()
- ? CSR_iOS_SaveList
- : (UseSplitPush ? CSR_AAPCS_SplitPush_SaveList : CSR_AAPCS_SaveList);
-
const Function &F = MF->getFunction();
+
if (F.getCallingConv() == CallingConv::GHC) {
// GHC set of callee saved regs is empty as all those regs are
// used for passing STG regs around
return CSR_NoRegs_SaveList;
+ } else if (STI.splitFramePointerPush(*MF)) {
+ return CSR_Win_SplitFP_SaveList;
} else if (F.getCallingConv() == CallingConv::CFGuard_Check) {
return CSR_Win_AAPCS_CFGuard_Check_SaveList;
} else if (F.getCallingConv() == CallingConv::SwiftTail) {
return STI.isTargetDarwin()
? CSR_iOS_SwiftTail_SaveList
- : (UseSplitPush ? CSR_AAPCS_SplitPush_SwiftTail_SaveList
+ : (UseSplitPush ? CSR_ATPCS_SplitPush_SwiftTail_SaveList
: CSR_AAPCS_SwiftTail_SaveList);
} else if (F.hasFnAttribute("interrupt")) {
if (STI.isMClass()) {
// M-class CPUs have hardware which saves the registers needed to allow a
// function conforming to the AAPCS to function as a handler.
- return UseSplitPush ? CSR_AAPCS_SplitPush_SaveList : CSR_AAPCS_SaveList;
+ return UseSplitPush ? CSR_ATPCS_SplitPush_SaveList : CSR_AAPCS_SaveList;
} else if (F.getFnAttribute("interrupt").getValueAsString() == "FIQ") {
// Fast interrupt mode gives the handler a private copy of R8-R14, so less
// need to be saved to restore user-mode state.
@@ -101,7 +99,7 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (STI.isTargetDarwin())
return CSR_iOS_SwiftError_SaveList;
- return UseSplitPush ? CSR_AAPCS_SplitPush_SwiftError_SaveList :
+ return UseSplitPush ? CSR_ATPCS_SplitPush_SwiftError_SaveList :
CSR_AAPCS_SwiftError_SaveList;
}
@@ -109,7 +107,15 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return MF->getInfo<ARMFunctionInfo>()->isSplitCSR()
? CSR_iOS_CXX_TLS_PE_SaveList
: CSR_iOS_CXX_TLS_SaveList;
- return RegList;
+
+ if (STI.isTargetDarwin())
+ return CSR_iOS_SaveList;
+
+ if (UseSplitPush)
+ return STI.createAAPCSFrameChain() ? CSR_AAPCS_SplitPush_SaveList
+ : CSR_ATPCS_SplitPush_SaveList;
+
+ return CSR_AAPCS_SaveList;
}
const MCPhysReg *ARMBaseRegisterInfo::getCalleeSavedRegsViaCopy(
@@ -238,7 +244,7 @@ bool ARMBaseRegisterInfo::isInlineAsmReadOnlyReg(const MachineFunction &MF,
BitVector Reserved(getNumRegs());
markSuperRegs(Reserved, ARM::PC);
- if (TFI->hasFP(MF))
+ if (TFI->isFPReserved(MF))
markSuperRegs(Reserved, STI.getFramePointerReg());
if (hasBasePointer(MF))
markSuperRegs(Reserved, BasePtr);
diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
index 57d7842c63ca..73ed300ccff4 100644
--- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -43,7 +43,7 @@ namespace ARMRI {
/// isARMArea1Register - Returns true if the register is a low register (r0-r7)
/// or a stack/pc register that we should push/pop.
-static inline bool isARMArea1Register(unsigned Reg, bool isIOS) {
+static inline bool isARMArea1Register(unsigned Reg, bool SplitFramePushPop) {
using namespace ARM;
switch (Reg) {
@@ -53,25 +53,52 @@ static inline bool isARMArea1Register(unsigned Reg, bool isIOS) {
return true;
case R8: case R9: case R10: case R11: case R12:
// For iOS we want r7 and lr to be next to each other.
- return !isIOS;
+ return !SplitFramePushPop;
default:
return false;
}
}
-static inline bool isARMArea2Register(unsigned Reg, bool isIOS) {
+static inline bool isARMArea2Register(unsigned Reg, bool SplitFramePushPop) {
using namespace ARM;
switch (Reg) {
case R8: case R9: case R10: case R11: case R12:
// iOS has this second area.
- return isIOS;
+ return SplitFramePushPop;
default:
return false;
}
}
-static inline bool isARMArea3Register(unsigned Reg, bool isIOS) {
+static inline bool isSplitFPArea1Register(unsigned Reg,
+ bool SplitFramePushPop) {
+ using namespace ARM;
+
+ switch (Reg) {
+ case R0: case R1: case R2: case R3:
+ case R4: case R5: case R6: case R7:
+ case R8: case R9: case R10: case R12:
+ case SP: case PC:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool isSplitFPArea2Register(unsigned Reg,
+ bool SplitFramePushPop) {
+ using namespace ARM;
+
+ switch (Reg) {
+ case R11: case LR:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool isARMArea3Register(unsigned Reg, bool SplitFramePushPop) {
using namespace ARM;
switch (Reg) {
@@ -214,6 +241,8 @@ public:
unsigned DefSubReg,
const TargetRegisterClass *SrcRC,
unsigned SrcSubReg) const override;
+
+ int getSEHRegNum(unsigned i) const { return getEncodingValue(i); }
};
} // end namespace llvm
diff --git a/llvm/lib/Target/ARM/ARMBlockPlacement.cpp b/llvm/lib/Target/ARM/ARMBlockPlacement.cpp
index ddbd6702e528..b2d291bbe7ff 100644
--- a/llvm/lib/Target/ARM/ARMBlockPlacement.cpp
+++ b/llvm/lib/Target/ARM/ARMBlockPlacement.cpp
@@ -16,6 +16,7 @@
#include "ARMBasicBlockInfo.h"
#include "ARMSubtarget.h"
#include "MVETailPredUtils.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
@@ -212,7 +213,7 @@ bool ARMBlockPlacement::processPostOrderLoops(MachineLoop *ML) {
bool ARMBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
- const ARMSubtarget &ST = static_cast<const ARMSubtarget &>(MF.getSubtarget());
+ const ARMSubtarget &ST = MF.getSubtarget<ARMSubtarget>();
if (!ST.hasLOB())
return false;
LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Running on " << MF.getName() << "\n");
diff --git a/llvm/lib/Target/ARM/ARMCallingConv.td b/llvm/lib/Target/ARM/ARMCallingConv.td
index a6dbe563a4ab..d14424c2deca 100644
--- a/llvm/lib/Target/ARM/ARMCallingConv.td
+++ b/llvm/lib/Target/ARM/ARMCallingConv.td
@@ -284,19 +284,32 @@ def CSR_AAPCS_SwiftTail : CalleeSavedRegs<(sub CSR_AAPCS, R10)>;
// The order of callee-saved registers needs to match the order we actually push
// them in FrameLowering, because this order is what's used by
// PrologEpilogInserter to allocate frame index slots. So when R7 is the frame
-// pointer, we use this AAPCS alternative.
-def CSR_AAPCS_SplitPush : CalleeSavedRegs<(add LR, R7, R6, R5, R4,
+// pointer, we use this ATPCS alternative.
+def CSR_ATPCS_SplitPush : CalleeSavedRegs<(add LR, R7, R6, R5, R4,
R11, R10, R9, R8,
(sequence "D%u", 15, 8))>;
+def CSR_Win_SplitFP : CalleeSavedRegs<(add R10, R9, R8, R7, R6, R5, R4,
+ (sequence "D%u", 15, 8),
+ LR, R11)>;
+
// R8 is used to pass swifterror, remove it from CSR.
-def CSR_AAPCS_SplitPush_SwiftError : CalleeSavedRegs<(sub CSR_AAPCS_SplitPush,
+def CSR_ATPCS_SplitPush_SwiftError : CalleeSavedRegs<(sub CSR_ATPCS_SplitPush,
R8)>;
// R10 is used to pass swifterror, remove it from CSR.
-def CSR_AAPCS_SplitPush_SwiftTail : CalleeSavedRegs<(sub CSR_AAPCS_SplitPush,
+def CSR_ATPCS_SplitPush_SwiftTail : CalleeSavedRegs<(sub CSR_ATPCS_SplitPush,
R10)>;
+// When enforcing an AAPCS compliant frame chain, R11 is used as the frame
+// pointer even for Thumb targets, where split pushes are necessary.
+// This AAPCS alternative makes sure the frame index slots match the push
+// order in that case.
+def CSR_AAPCS_SplitPush : CalleeSavedRegs<(add LR, R11,
+ R7, R6, R5, R4,
+ R10, R9, R8,
+ (sequence "D%u", 15, 8))>;
+
// Constructors and destructors return 'this' in the ARM C++ ABI; since 'this'
// and the pointer return value are both passed in R0 in these cases, this can
// be partially modelled by treating R0 as a callee-saved register
diff --git a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
index a2a4f1f3bdfd..d77c3afd05e5 100644
--- a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -396,7 +396,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
<< MCP->getConstants().size() << " CP entries, aligned to "
<< MCP->getConstantPoolAlign().value() << " bytes *****\n");
- STI = &static_cast<const ARMSubtarget &>(MF->getSubtarget());
+ STI = &MF->getSubtarget<ARMSubtarget>();
TII = STI->getInstrInfo();
isPositionIndependentOrROPI =
STI->getTargetLowering()->isPositionIndependent() || STI->isROPI();
diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 2f083561bbd4..613904f702f0 100644
--- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
@@ -2107,6 +2108,10 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::TCRETURNdi:
case ARM::TCRETURNri: {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ if (MBBI->getOpcode() == ARM::SEH_EpilogEnd)
+ MBBI--;
+ if (MBBI->getOpcode() == ARM::SEH_Nop_Ret)
+ MBBI--;
assert(MBBI->isReturn() &&
"Can only insert epilog into returning blocks");
unsigned RetOpcode = MBBI->getOpcode();
@@ -2116,13 +2121,21 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
// Tail call return: adjust the stack pointer and jump to callee.
MBBI = MBB.getLastNonDebugInstr();
+ if (MBBI->getOpcode() == ARM::SEH_EpilogEnd)
+ MBBI--;
+ if (MBBI->getOpcode() == ARM::SEH_Nop_Ret)
+ MBBI--;
MachineOperand &JumpTarget = MBBI->getOperand(0);
// Jump to label or value in register.
if (RetOpcode == ARM::TCRETURNdi) {
+ MachineFunction *MF = MBB.getParent();
+ bool NeedsWinCFI = MF->getTarget().getMCAsmInfo()->usesWindowsCFI() &&
+ MF->getFunction().needsUnwindTableEntry();
unsigned TCOpcode =
STI->isThumb()
- ? (STI->isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND)
+ ? ((STI->isTargetMachO() || NeedsWinCFI) ? ARM::tTAILJMPd
+ : ARM::tTAILJMPdND)
: ARM::TAILJMPd;
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
if (JumpTarget.isGlobal())
@@ -3132,7 +3145,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
}
bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
- STI = &static_cast<const ARMSubtarget &>(MF.getSubtarget());
+ STI = &MF.getSubtarget<ARMSubtarget>();
TII = STI->getInstrInfo();
TRI = STI->getRegisterInfo();
AFI = MF.getInfo<ARMFunctionInfo>();
diff --git a/llvm/lib/Target/ARM/ARMFastISel.cpp b/llvm/lib/Target/ARM/ARMFastISel.cpp
index 5d94b99d4c5d..a167225e2743 100644
--- a/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -122,8 +122,7 @@ class ARMFastISel final : public FastISel {
explicit ARMFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo)
: FastISel(funcInfo, libInfo),
- Subtarget(
- &static_cast<const ARMSubtarget &>(funcInfo.MF->getSubtarget())),
+ Subtarget(&funcInfo.MF->getSubtarget<ARMSubtarget>()),
M(const_cast<Module &>(*funcInfo.Fn->getParent())),
TM(funcInfo.MF->getTarget()), TII(*Subtarget->getInstrInfo()),
TLI(*Subtarget->getTargetLowering()) {
@@ -156,7 +155,7 @@ class ARMFastISel final : public FastISel {
const LoadInst *LI) override;
bool fastLowerArguments() override;
- #include "ARMGenFastISel.inc"
+#include "ARMGenFastISel.inc"
// Instruction selection routines.
@@ -189,10 +188,10 @@ class ARMFastISel final : public FastISel {
bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
bool isZExt);
bool ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
- unsigned Alignment = 0, bool isZExt = true,
+ MaybeAlign Alignment = None, bool isZExt = true,
bool allocReg = true);
bool ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
- unsigned Alignment = 0);
+ MaybeAlign Alignment = None);
bool ARMComputeAddress(const Value *Obj, Address &Addr);
void ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3);
bool ARMIsMemCpySmall(uint64_t Len);
@@ -602,8 +601,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
}
if ((Subtarget->isTargetELF() && Subtarget->isGVInGOT(GV)) ||
- (Subtarget->isTargetMachO() && IsIndirect) ||
- Subtarget->genLongCalls()) {
+ (Subtarget->isTargetMachO() && IsIndirect)) {
MachineInstrBuilder MIB;
Register NewDestReg = createResultReg(TLI.getRegClassFor(VT));
if (isThumb2)
@@ -898,7 +896,8 @@ void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr,
}
bool ARMFastISel::ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
- unsigned Alignment, bool isZExt, bool allocReg) {
+ MaybeAlign Alignment, bool isZExt,
+ bool allocReg) {
unsigned Opc;
bool useAM3 = false;
bool needVMOV = false;
@@ -924,7 +923,8 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
break;
case MVT::i16:
- if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem())
+ if (Alignment && *Alignment < Align(2) &&
+ !Subtarget->allowsUnalignedMem())
return false;
if (isThumb2) {
@@ -939,7 +939,8 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
break;
case MVT::i32:
- if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem())
+ if (Alignment && *Alignment < Align(4) &&
+ !Subtarget->allowsUnalignedMem())
return false;
if (isThumb2) {
@@ -955,7 +956,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
case MVT::f32:
if (!Subtarget->hasVFP2Base()) return false;
// Unaligned loads need special handling. Floats require word-alignment.
- if (Alignment && Alignment < 4) {
+ if (Alignment && *Alignment < Align(4)) {
needVMOV = true;
VT = MVT::i32;
Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
@@ -970,7 +971,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
if (!Subtarget->hasVFP2Base()) return false;
// FIXME: Unaligned loads need special handling. Doublewords require
// word-alignment.
- if (Alignment && Alignment < 4)
+ if (Alignment && *Alignment < Align(4))
return false;
Opc = ARM::VLDRD;
@@ -1030,14 +1031,14 @@ bool ARMFastISel::SelectLoad(const Instruction *I) {
if (!ARMComputeAddress(I->getOperand(0), Addr)) return false;
Register ResultReg;
- if (!ARMEmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment()))
+ if (!ARMEmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlign()))
return false;
updateValueMap(I, ResultReg);
return true;
}
bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
- unsigned Alignment) {
+ MaybeAlign Alignment) {
unsigned StrOpc;
bool useAM3 = false;
switch (VT.SimpleTy) {
@@ -1065,7 +1066,8 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
}
break;
case MVT::i16:
- if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem())
+ if (Alignment && *Alignment < Align(2) &&
+ !Subtarget->allowsUnalignedMem())
return false;
if (isThumb2) {
@@ -1079,7 +1081,8 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
}
break;
case MVT::i32:
- if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem())
+ if (Alignment && *Alignment < Align(4) &&
+ !Subtarget->allowsUnalignedMem())
return false;
if (isThumb2) {
@@ -1094,7 +1097,7 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
case MVT::f32:
if (!Subtarget->hasVFP2Base()) return false;
// Unaligned stores need special handling. Floats require word-alignment.
- if (Alignment && Alignment < 4) {
+ if (Alignment && *Alignment < Align(4)) {
Register MoveReg = createResultReg(TLI.getRegClassFor(MVT::i32));
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(ARM::VMOVRS), MoveReg)
@@ -1111,8 +1114,8 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
if (!Subtarget->hasVFP2Base()) return false;
// FIXME: Unaligned stores need special handling. Doublewords require
// word-alignment.
- if (Alignment && Alignment < 4)
- return false;
+ if (Alignment && *Alignment < Align(4))
+ return false;
StrOpc = ARM::VSTRD;
break;
@@ -1166,7 +1169,7 @@ bool ARMFastISel::SelectStore(const Instruction *I) {
if (!ARMComputeAddress(I->getOperand(1), Addr))
return false;
- if (!ARMEmitStore(VT, SrcReg, Addr, cast<StoreInst>(I)->getAlignment()))
+ if (!ARMEmitStore(VT, SrcReg, Addr, cast<StoreInst>(I)->getAlign()))
return false;
return true;
}
@@ -2939,7 +2942,7 @@ bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false;
Register ResultReg = MI->getOperand(0).getReg();
- if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false))
+ if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlign(), isZExt, false))
return false;
MachineBasicBlock::iterator I(MI);
removeDeadCode(I, std::next(I));
diff --git a/llvm/lib/Target/ARM/ARMFixCortexA57AES1742098Pass.cpp b/llvm/lib/Target/ARM/ARMFixCortexA57AES1742098Pass.cpp
new file mode 100644
index 000000000000..77c8f7134a55
--- /dev/null
+++ b/llvm/lib/Target/ARM/ARMFixCortexA57AES1742098Pass.cpp
@@ -0,0 +1,432 @@
+//===-- ARMFixCortexA57AES1742098Pass.cpp ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// This pass works around a Cortex Core Fused AES erratum:
+// - Cortex-A57 Erratum 1742098
+// - Cortex-A72 Erratum 1655431
+//
+// The erratum may be triggered if an input vector register to AESE or AESD was
+// last written by an instruction that only updated 32 bits of it. This can
+// occur for either of the input registers.
+//
+// The workaround chosen is to update the input register using `r = VORRq r, r`,
+// as this updates all 128 bits of the register unconditionally, but does not
+// change the values observed in `r`, making the input safe.
+//
+// This pass has to be conservative in a few cases:
+// - an input vector register to the AES instruction is defined outside the
+// current function, where we have to assume the register was updated in an
+// unsafe way; and
+// - an input vector register to the AES instruction is updated along multiple
+// different control-flow paths, where we have to ensure all the register
+// updating instructions are safe.
+//
+// Both of these cases may apply to a input vector register. In either case, we
+// need to ensure that, when the pass is finished, there exists a safe
+// instruction between every unsafe register updating instruction and the AES
+// instruction.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "Utils/ARMBaseInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineInstrBundleIterator.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/ReachingDefAnalysis.h"
+#include "llvm/CodeGen/Register.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <assert.h>
+#include <stdint.h>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "arm-fix-cortex-a57-aes-1742098"
+
+//===----------------------------------------------------------------------===//
+
+namespace {
+class ARMFixCortexA57AES1742098 : public MachineFunctionPass {
+public:
+ static char ID;
+ explicit ARMFixCortexA57AES1742098() : MachineFunctionPass(ID) {
+ initializeARMFixCortexA57AES1742098Pass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &F) override;
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+
+ StringRef getPassName() const override {
+ return "ARM fix for Cortex-A57 AES Erratum 1742098";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<ReachingDefAnalysis>();
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+ // This is the information needed to insert the fixup in the right place.
+ struct AESFixupLocation {
+ MachineBasicBlock *Block;
+ // The fixup instruction will be inserted *before* InsertionPt.
+ MachineInstr *InsertionPt;
+ MachineOperand *MOp;
+ };
+
+ void analyzeMF(MachineFunction &MF, ReachingDefAnalysis &RDA,
+ const ARMBaseRegisterInfo *TRI,
+ SmallVectorImpl<AESFixupLocation> &FixupLocsForFn) const;
+
+ void insertAESFixup(AESFixupLocation &FixupLoc, const ARMBaseInstrInfo *TII,
+ const ARMBaseRegisterInfo *TRI) const;
+
+ static bool isFirstAESPairInstr(MachineInstr &MI);
+ static bool isSafeAESInput(MachineInstr &MI);
+};
+char ARMFixCortexA57AES1742098::ID = 0;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS_BEGIN(ARMFixCortexA57AES1742098, DEBUG_TYPE,
+ "ARM fix for Cortex-A57 AES Erratum 1742098", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis);
+INITIALIZE_PASS_END(ARMFixCortexA57AES1742098, DEBUG_TYPE,
+ "ARM fix for Cortex-A57 AES Erratum 1742098", false, false)
+
+//===----------------------------------------------------------------------===//
+
+bool ARMFixCortexA57AES1742098::isFirstAESPairInstr(MachineInstr &MI) {
+ unsigned Opc = MI.getOpcode();
+ return Opc == ARM::AESD || Opc == ARM::AESE;
+}
+
+bool ARMFixCortexA57AES1742098::isSafeAESInput(MachineInstr &MI) {
+ auto CondCodeIsAL = [](MachineInstr &MI) -> bool {
+ int CCIdx = MI.findFirstPredOperandIdx();
+ if (CCIdx == -1)
+ return false;
+ return MI.getOperand(CCIdx).getImm() == (int64_t)ARMCC::AL;
+ };
+
+ switch (MI.getOpcode()) {
+ // Unknown: Assume not safe.
+ default:
+ return false;
+ // 128-bit wide AES instructions
+ case ARM::AESD:
+ case ARM::AESE:
+ case ARM::AESMC:
+ case ARM::AESIMC:
+ // No CondCode.
+ return true;
+ // 128-bit and 64-bit wide bitwise ops (when condition = al)
+ case ARM::VANDd:
+ case ARM::VANDq:
+ case ARM::VORRd:
+ case ARM::VORRq:
+ case ARM::VEORd:
+ case ARM::VEORq:
+ case ARM::VMVNd:
+ case ARM::VMVNq:
+ // VMOV of 64-bit value between D registers (when condition = al)
+ case ARM::VMOVD:
+ // VMOV of 64 bit value from GPRs (when condition = al)
+ case ARM::VMOVDRR:
+ // VMOV of immediate into D or Q registers (when condition = al)
+ case ARM::VMOVv2i64:
+ case ARM::VMOVv1i64:
+ case ARM::VMOVv2f32:
+ case ARM::VMOVv4f32:
+ case ARM::VMOVv2i32:
+ case ARM::VMOVv4i32:
+ case ARM::VMOVv4i16:
+ case ARM::VMOVv8i16:
+ case ARM::VMOVv8i8:
+ case ARM::VMOVv16i8:
+ // Loads (when condition = al)
+ // VLD Dn, [Rn, #imm]
+ case ARM::VLDRD:
+ // VLDM
+ case ARM::VLDMDDB_UPD:
+ case ARM::VLDMDIA_UPD:
+ case ARM::VLDMDIA:
+ // VLDn to all lanes.
+ case ARM::VLD1d64:
+ case ARM::VLD1q64:
+ case ARM::VLD1d32:
+ case ARM::VLD1q32:
+ case ARM::VLD2b32:
+ case ARM::VLD2d32:
+ case ARM::VLD2q32:
+ case ARM::VLD1d16:
+ case ARM::VLD1q16:
+ case ARM::VLD2d16:
+ case ARM::VLD2q16:
+ case ARM::VLD1d8:
+ case ARM::VLD1q8:
+ case ARM::VLD2b8:
+ case ARM::VLD2d8:
+ case ARM::VLD2q8:
+ case ARM::VLD3d32:
+ case ARM::VLD3q32:
+ case ARM::VLD3d16:
+ case ARM::VLD3q16:
+ case ARM::VLD3d8:
+ case ARM::VLD3q8:
+ case ARM::VLD4d32:
+ case ARM::VLD4q32:
+ case ARM::VLD4d16:
+ case ARM::VLD4q16:
+ case ARM::VLD4d8:
+ case ARM::VLD4q8:
+ // VLD1 (single element to one lane)
+ case ARM::VLD1LNd32:
+ case ARM::VLD1LNd32_UPD:
+ case ARM::VLD1LNd8:
+ case ARM::VLD1LNd8_UPD:
+ case ARM::VLD1LNd16:
+ case ARM::VLD1LNd16_UPD:
+ // VLD1 (single element to all lanes)
+ case ARM::VLD1DUPd32:
+ case ARM::VLD1DUPd32wb_fixed:
+ case ARM::VLD1DUPd32wb_register:
+ case ARM::VLD1DUPd16:
+ case ARM::VLD1DUPd16wb_fixed:
+ case ARM::VLD1DUPd16wb_register:
+ case ARM::VLD1DUPd8:
+ case ARM::VLD1DUPd8wb_fixed:
+ case ARM::VLD1DUPd8wb_register:
+ case ARM::VLD1DUPq32:
+ case ARM::VLD1DUPq32wb_fixed:
+ case ARM::VLD1DUPq32wb_register:
+ case ARM::VLD1DUPq16:
+ case ARM::VLD1DUPq16wb_fixed:
+ case ARM::VLD1DUPq16wb_register:
+ case ARM::VLD1DUPq8:
+ case ARM::VLD1DUPq8wb_fixed:
+ case ARM::VLD1DUPq8wb_register:
+ // VMOV
+ case ARM::VSETLNi32:
+ case ARM::VSETLNi16:
+ case ARM::VSETLNi8:
+ return CondCodeIsAL(MI);
+ };
+
+ return false;
+}
+
+bool ARMFixCortexA57AES1742098::runOnMachineFunction(MachineFunction &F) {
+ LLVM_DEBUG(dbgs() << "***** ARMFixCortexA57AES1742098 *****\n");
+ auto &STI = F.getSubtarget<ARMSubtarget>();
+
+ // Fix not requested or AES instructions not present: skip pass.
+ if (!STI.hasAES() || !STI.fixCortexA57AES1742098())
+ return false;
+
+ const ARMBaseRegisterInfo *TRI = STI.getRegisterInfo();
+ const ARMBaseInstrInfo *TII = STI.getInstrInfo();
+
+ auto &RDA = getAnalysis<ReachingDefAnalysis>();
+
+ // Analyze whole function to find instructions which need fixing up...
+ SmallVector<AESFixupLocation> FixupLocsForFn{};
+ analyzeMF(F, RDA, TRI, FixupLocsForFn);
+
+ // ... and fix the instructions up all at the same time.
+ bool Changed = false;
+ LLVM_DEBUG(dbgs() << "Inserting " << FixupLocsForFn.size() << " fixup(s)\n");
+ for (AESFixupLocation &FixupLoc : FixupLocsForFn) {
+ insertAESFixup(FixupLoc, TII, TRI);
+ Changed |= true;
+ }
+
+ return Changed;
+}
+
+void ARMFixCortexA57AES1742098::analyzeMF(
+ MachineFunction &MF, ReachingDefAnalysis &RDA,
+ const ARMBaseRegisterInfo *TRI,
+ SmallVectorImpl<AESFixupLocation> &FixupLocsForFn) const {
+ unsigned MaxAllowedFixups = 0;
+
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ if (!isFirstAESPairInstr(MI))
+ continue;
+
+ // Found an instruction to check the operands of.
+ LLVM_DEBUG(dbgs() << "Found AES Pair starting: " << MI);
+ assert(MI.getNumExplicitOperands() == 3 && MI.getNumExplicitDefs() == 1 &&
+ "Unknown AES Instruction Format. Expected 1 def, 2 uses.");
+
+ // A maximum of two fixups should be inserted for each AES pair (one per
+ // register use).
+ MaxAllowedFixups += 2;
+
+ // Inspect all operands, choosing whether to insert a fixup.
+ for (MachineOperand &MOp : MI.uses()) {
+ SmallPtrSet<MachineInstr *, 1> AllDefs{};
+ RDA.getGlobalReachingDefs(&MI, MOp.getReg(), AllDefs);
+
+ // Planned Fixup: This should be added to FixupLocsForFn at most once.
+ AESFixupLocation NewLoc{&MBB, &MI, &MOp};
+
+ // In small functions with loops, this operand may be both a live-in and
+ // have definitions within the function itself. These will need a fixup.
+ bool IsLiveIn = MF.front().isLiveIn(MOp.getReg());
+
+ // If the register doesn't have defining instructions, and is not a
+ // live-in, then something is wrong and the fixup must always be
+ // inserted to be safe.
+ if (!IsLiveIn && AllDefs.size() == 0) {
+ LLVM_DEBUG(dbgs()
+ << "Fixup Planned: No Defining Instrs found, not live-in: "
+ << printReg(MOp.getReg(), TRI) << "\n");
+ FixupLocsForFn.emplace_back(NewLoc);
+ continue;
+ }
+
+ auto IsUnsafe = [](MachineInstr *MI) -> bool {
+ return !isSafeAESInput(*MI);
+ };
+ size_t UnsafeCount = llvm::count_if(AllDefs, IsUnsafe);
+
+ // If there are no unsafe definitions...
+ if (UnsafeCount == 0) {
+ // ... and the register is not live-in ...
+ if (!IsLiveIn) {
+ // ... then skip the fixup.
+ LLVM_DEBUG(dbgs() << "No Fixup: Defining instrs are all safe: "
+ << printReg(MOp.getReg(), TRI) << "\n");
+ continue;
+ }
+
+ // Otherwise, the only unsafe "definition" is a live-in, so insert the
+ // fixup at the start of the function.
+ LLVM_DEBUG(dbgs()
+ << "Fixup Planned: Live-In (with safe defining instrs): "
+ << printReg(MOp.getReg(), TRI) << "\n");
+ NewLoc.Block = &MF.front();
+ NewLoc.InsertionPt = &*NewLoc.Block->begin();
+ LLVM_DEBUG(dbgs() << "Moving Fixup for Live-In to immediately before "
+ << *NewLoc.InsertionPt);
+ FixupLocsForFn.emplace_back(NewLoc);
+ continue;
+ }
+
+ // If a fixup is needed in more than one place, then the best place to
+ // insert it is adjacent to the use rather than introducing a fixup
+ // adjacent to each def.
+ //
+ // FIXME: It might be better to hoist this to the start of the BB, if
+ // possible.
+ if (IsLiveIn || UnsafeCount > 1) {
+ LLVM_DEBUG(dbgs() << "Fixup Planned: Multiple unsafe defining instrs "
+ "(including live-ins): "
+ << printReg(MOp.getReg(), TRI) << "\n");
+ FixupLocsForFn.emplace_back(NewLoc);
+ continue;
+ }
+
+ assert(UnsafeCount == 1 && !IsLiveIn &&
+ "At this point, there should be one unsafe defining instrs "
+ "and the defined register should not be a live-in.");
+ SmallPtrSetIterator<MachineInstr *> It =
+ llvm::find_if(AllDefs, IsUnsafe);
+ assert(It != AllDefs.end() &&
+ "UnsafeCount == 1 but No Unsafe MachineInstr found.");
+ MachineInstr *DefMI = *It;
+
+ LLVM_DEBUG(
+ dbgs() << "Fixup Planned: Found single unsafe defining instrs for "
+ << printReg(MOp.getReg(), TRI) << ": " << *DefMI);
+
+ // There is one unsafe defining instruction, which needs a fixup. It is
+ // generally good to hoist the fixup to be adjacent to the defining
+ // instruction rather than the using instruction, as the using
+ // instruction may be inside a loop when the defining instruction is
+ // not.
+ MachineBasicBlock::iterator DefIt = DefMI;
+ ++DefIt;
+ if (DefIt != DefMI->getParent()->end()) {
+ LLVM_DEBUG(dbgs() << "Moving Fixup to immediately after " << *DefMI
+ << "And immediately before " << *DefIt);
+ NewLoc.Block = DefIt->getParent();
+ NewLoc.InsertionPt = &*DefIt;
+ }
+
+ FixupLocsForFn.emplace_back(NewLoc);
+ }
+ }
+ }
+
+ assert(FixupLocsForFn.size() <= MaxAllowedFixups &&
+ "Inserted too many fixups for this function.");
+ (void)MaxAllowedFixups;
+}
+
+void ARMFixCortexA57AES1742098::insertAESFixup(
+ AESFixupLocation &FixupLoc, const ARMBaseInstrInfo *TII,
+ const ARMBaseRegisterInfo *TRI) const {
+ MachineOperand *OperandToFixup = FixupLoc.MOp;
+
+ assert(OperandToFixup->isReg() && "OperandToFixup must be a register");
+ Register RegToFixup = OperandToFixup->getReg();
+
+ LLVM_DEBUG(dbgs() << "Inserting VORRq of " << printReg(RegToFixup, TRI)
+ << " before: " << *FixupLoc.InsertionPt);
+
+ // Insert the new `VORRq qN, qN, qN`. There are a few details here:
+ //
+ // The uses are marked as killed, even if the original use of OperandToFixup
+ // is not killed, as the new instruction is clobbering the register. This is
+ // safe even if there are other uses of `qN`, as the VORRq value-wise a no-op
+ // (it is inserted for microarchitectural reasons).
+ //
+ // The def and the uses are still marked as Renamable if the original register
+ // was, to avoid having to rummage through all the other uses and defs and
+ // unset their renamable bits.
+ unsigned Renamable = OperandToFixup->isRenamable() ? RegState::Renamable : 0;
+ BuildMI(*FixupLoc.Block, FixupLoc.InsertionPt, DebugLoc(),
+ TII->get(ARM::VORRq))
+ .addReg(RegToFixup, RegState::Define | Renamable)
+ .addReg(RegToFixup, RegState::Kill | Renamable)
+ .addReg(RegToFixup, RegState::Kill | Renamable)
+ .addImm((uint64_t)ARMCC::AL)
+ .addReg(ARM::NoRegister);
+}
+
+// Factory function used by AArch64TargetMachine to add the pass to
+// the passmanager.
+FunctionPass *llvm::createARMFixCortexA57AES1742098Pass() {
+ return new ARMFixCortexA57AES1742098();
+}
diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 1f2f6f7497e0..48b4d266b41a 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -47,7 +47,8 @@
// | |
// |-----------------------------------|
// | |
-// | prev_fp, prev_lr |
+// | prev_lr |
+// | prev_fp |
// | (a.k.a. "frame record") |
// | |
// |- - - - - - - - - - - - - - - - - -| <- fp (r7 or r11)
@@ -138,6 +139,7 @@
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCInstrDesc.h"
@@ -210,6 +212,12 @@ bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {
MFI.isFrameAddressTaken());
}
+/// isFPReserved - Return true if the frame pointer register should be
+/// considered a reserved register on the scope of the specified function.
+bool ARMFrameLowering::isFPReserved(const MachineFunction &MF) const {
+ return hasFP(MF) || MF.getSubtarget<ARMSubtarget>().createAAPCSFrameChain();
+}
+
/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
/// not required, we reserve argument space for call sites in the function
/// immediately on entry to the current function. This eliminates the need for
@@ -272,6 +280,230 @@ static int getArgumentStackToRestore(MachineFunction &MF,
return ArgumentPopSize;
}
+static bool needsWinCFI(const MachineFunction &MF) {
+ const Function &F = MF.getFunction();
+ return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
+ F.needsUnwindTableEntry();
+}
+
+// Given a load or a store instruction, generate an appropriate unwinding SEH
+// code on Windows.
+static MachineBasicBlock::iterator insertSEH(MachineBasicBlock::iterator MBBI,
+ const TargetInstrInfo &TII,
+ unsigned Flags) {
+ unsigned Opc = MBBI->getOpcode();
+ MachineBasicBlock *MBB = MBBI->getParent();
+ MachineFunction &MF = *MBB->getParent();
+ DebugLoc DL = MBBI->getDebugLoc();
+ MachineInstrBuilder MIB;
+ const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>();
+ const ARMBaseRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+
+ Flags |= MachineInstr::NoMerge;
+
+ switch (Opc) {
+ default:
+ report_fatal_error("No SEH Opcode for instruction " + TII.getName(Opc));
+ break;
+ case ARM::t2ADDri: // add.w r11, sp, #xx
+ case ARM::t2ADDri12: // add.w r11, sp, #xx
+ case ARM::t2MOVTi16: // movt r4, #xx
+ case ARM::tBL: // bl __chkstk
+ // These are harmless if used for just setting up a frame pointer,
+ // but that frame pointer can't be relied upon for unwinding, unless
+ // set up with SEH_SaveSP.
+ MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
+ .addImm(/*Wide=*/1)
+ .setMIFlags(Flags);
+ break;
+
+ case ARM::t2MOVi16: { // mov(w) r4, #xx
+ bool Wide = MBBI->getOperand(1).getImm() >= 256;
+ if (!Wide) {
+ MachineInstrBuilder NewInstr =
+ BuildMI(MF, DL, TII.get(ARM::tMOVi8)).setMIFlags(MBBI->getFlags());
+ NewInstr.add(MBBI->getOperand(0));
+ NewInstr.add(t1CondCodeOp(/*isDead=*/true));
+ for (unsigned i = 1, NumOps = MBBI->getNumOperands(); i != NumOps; ++i)
+ NewInstr.add(MBBI->getOperand(i));
+ MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(MBBI, NewInstr);
+ MBB->erase(MBBI);
+ MBBI = NewMBBI;
+ }
+ MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop)).addImm(Wide).setMIFlags(Flags);
+ break;
+ }
+
+ case ARM::tBLXr: // blx r12 (__chkstk)
+ MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
+ .addImm(/*Wide=*/0)
+ .setMIFlags(Flags);
+ break;
+
+ case ARM::t2MOVi32imm: // movw+movt
+ // This pseudo instruction expands into two mov instructions. If the
+ // second operand is a symbol reference, this will stay as two wide
+ // instructions, movw+movt. If they're immediates, the first one can
+ // end up as a narrow mov though.
+ // As two SEH instructions are appended here, they won't get interleaved
+ // between the two final movw/movt instructions, but it doesn't make any
+ // practical difference.
+ MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
+ .addImm(/*Wide=*/1)
+ .setMIFlags(Flags);
+ MBB->insertAfter(MBBI, MIB);
+ MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
+ .addImm(/*Wide=*/1)
+ .setMIFlags(Flags);
+ break;
+
+ case ARM::t2LDMIA_RET:
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2STMDB_UPD: {
+ unsigned Mask = 0;
+ bool Wide = false;
+ for (unsigned i = 4, NumOps = MBBI->getNumOperands(); i != NumOps; ++i) {
+ const MachineOperand &MO = MBBI->getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ continue;
+ unsigned Reg = RegInfo->getSEHRegNum(MO.getReg());
+ if (Reg == 15)
+ Reg = 14;
+ if (Reg >= 8 && Reg <= 13)
+ Wide = true;
+ else if (Opc == ARM::t2LDMIA_UPD && Reg == 14)
+ Wide = true;
+ Mask |= 1 << Reg;
+ }
+ if (!Wide) {
+ unsigned NewOpc;
+ switch (Opc) {
+ case ARM::t2LDMIA_RET:
+ NewOpc = ARM::tPOP_RET;
+ break;
+ case ARM::t2LDMIA_UPD:
+ NewOpc = ARM::tPOP;
+ break;
+ case ARM::t2STMDB_UPD:
+ NewOpc = ARM::tPUSH;
+ break;
+ default:
+ llvm_unreachable("");
+ }
+ MachineInstrBuilder NewInstr =
+ BuildMI(MF, DL, TII.get(NewOpc)).setMIFlags(MBBI->getFlags());
+ for (unsigned i = 2, NumOps = MBBI->getNumOperands(); i != NumOps; ++i)
+ NewInstr.add(MBBI->getOperand(i));
+ MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(MBBI, NewInstr);
+ MBB->erase(MBBI);
+ MBBI = NewMBBI;
+ }
+ unsigned SEHOpc =
+ (Opc == ARM::t2LDMIA_RET) ? ARM::SEH_SaveRegs_Ret : ARM::SEH_SaveRegs;
+ MIB = BuildMI(MF, DL, TII.get(SEHOpc))
+ .addImm(Mask)
+ .addImm(Wide ? 1 : 0)
+ .setMIFlags(Flags);
+ break;
+ }
+ case ARM::VSTMDDB_UPD:
+ case ARM::VLDMDIA_UPD: {
+ int First = -1, Last = 0;
+ for (unsigned i = 4, NumOps = MBBI->getNumOperands(); i != NumOps; ++i) {
+ const MachineOperand &MO = MBBI->getOperand(i);
+ unsigned Reg = RegInfo->getSEHRegNum(MO.getReg());
+ if (First == -1)
+ First = Reg;
+ Last = Reg;
+ }
+ MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveFRegs))
+ .addImm(First)
+ .addImm(Last)
+ .setMIFlags(Flags);
+ break;
+ }
+ case ARM::tSUBspi:
+ case ARM::tADDspi:
+ MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc))
+ .addImm(MBBI->getOperand(2).getImm() * 4)
+ .addImm(/*Wide=*/0)
+ .setMIFlags(Flags);
+ break;
+ case ARM::t2SUBspImm:
+ case ARM::t2SUBspImm12:
+ case ARM::t2ADDspImm:
+ case ARM::t2ADDspImm12:
+ MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc))
+ .addImm(MBBI->getOperand(2).getImm())
+ .addImm(/*Wide=*/1)
+ .setMIFlags(Flags);
+ break;
+
+ case ARM::tMOVr:
+ if (MBBI->getOperand(1).getReg() == ARM::SP &&
+ (Flags & MachineInstr::FrameSetup)) {
+ unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
+ MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP))
+ .addImm(Reg)
+ .setMIFlags(Flags);
+ } else if (MBBI->getOperand(0).getReg() == ARM::SP &&
+ (Flags & MachineInstr::FrameDestroy)) {
+ unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
+ MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP))
+ .addImm(Reg)
+ .setMIFlags(Flags);
+ } else {
+ report_fatal_error("No SEH Opcode for MOV");
+ }
+ break;
+
+ case ARM::tBX_RET:
+ case ARM::TCRETURNri:
+ MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret))
+ .addImm(/*Wide=*/0)
+ .setMIFlags(Flags);
+ break;
+
+ case ARM::TCRETURNdi:
+ MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret))
+ .addImm(/*Wide=*/1)
+ .setMIFlags(Flags);
+ break;
+ }
+ return MBB->insertAfter(MBBI, MIB);
+}
+
+static MachineBasicBlock::iterator
+initMBBRange(MachineBasicBlock &MBB, const MachineBasicBlock::iterator &MBBI) {
+ if (MBBI == MBB.begin())
+ return MachineBasicBlock::iterator();
+ return std::prev(MBBI);
+}
+
+static void insertSEHRange(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator Start,
+ const MachineBasicBlock::iterator &End,
+ const ARMBaseInstrInfo &TII, unsigned MIFlags) {
+ if (Start.isValid())
+ Start = std::next(Start);
+ else
+ Start = MBB.begin();
+
+ for (auto MI = Start; MI != End;) {
+ auto Next = std::next(MI);
+ // Check if this instruction already has got a SEH opcode added. In that
+ // case, don't do this generic mapping.
+ if (Next != End && isSEHInstruction(*Next)) {
+ MI = std::next(Next);
+ while (MI != End && isSEHInstruction(*MI))
+ ++MI;
+ continue;
+ }
+ insertSEH(MI, TII, MIFlags);
+ MI = Next;
+ }
+}
+
static void emitRegPlusImmediate(
bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg,
@@ -392,8 +624,7 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
const DebugLoc &DL, const unsigned Reg,
const Align Alignment,
const bool MustBeSingleInstruction) {
- const ARMSubtarget &AST =
- static_cast<const ARMSubtarget &>(MF.getSubtarget());
+ const ARMSubtarget &AST = MF.getSubtarget<ARMSubtarget>();
const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops();
const unsigned AlignMask = Alignment.value() - 1U;
const unsigned NrBitsToZero = Log2(Alignment);
@@ -452,15 +683,23 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
/// Unfortunately we cannot determine this value in determineCalleeSaves() yet
/// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
/// this to produce a conservative estimate that we check in an assert() later.
-static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI) {
+static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
+ const MachineFunction &MF) {
// For Thumb1, push.w isn't available, so the first push will always push
// r7 and lr onto the stack first.
if (AFI.isThumb1OnlyFunction())
return -AFI.getArgRegsSaveSize() - (2 * 4);
// This is a conservative estimation: Assume the frame pointer being r7 and
// pc("r15") up to r8 getting spilled before (= 8 registers).
- int FPCXTSaveSize = (STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? 4 : 0;
- return - FPCXTSaveSize - AFI.getArgRegsSaveSize() - (8 * 4);
+ int MaxRegBytes = 8 * 4;
+ if (STI.splitFramePointerPush(MF)) {
+ // Here, r11 can be stored below all of r4-r15 (3 registers more than
+ // above), plus d8-d15.
+ MaxRegBytes = 11 * 4 + 8 * 8;
+ }
+ int FPCXTSaveSize =
+ (STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? 4 : 0;
+ return -FPCXTSaveSize - AFI.getArgRegsSaveSize() - MaxRegBytes;
}
void ARMFrameLowering::emitPrologue(MachineFunction &MF,
@@ -482,6 +721,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
unsigned NumBytes = MFI.getStackSize();
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
int FPCXTSaveSize = 0;
+ bool NeedsWinCFI = needsWinCFI(MF);
// Debug location must be unknown since the first debug location is used
// to determine the end of the prologue.
@@ -510,47 +750,92 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
MachineInstr::FrameSetup);
DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes, true);
}
- DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
+ if (!NeedsWinCFI)
+ DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
+ if (NeedsWinCFI && MBBI != MBB.begin()) {
+ insertSEHRange(MBB, {}, MBBI, TII, MachineInstr::FrameSetup);
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_PrologEnd))
+ .setMIFlag(MachineInstr::FrameSetup);
+ MF.setHasWinCFI(true);
+ }
return;
}
// Determine spill area sizes.
- for (const CalleeSavedInfo &I : CSI) {
- Register Reg = I.getReg();
- int FI = I.getFrameIdx();
- switch (Reg) {
- case ARM::R8:
- case ARM::R9:
- case ARM::R10:
- case ARM::R11:
- case ARM::R12:
- if (STI.splitFramePushPop(MF)) {
+ if (STI.splitFramePointerPush(MF)) {
+ for (const CalleeSavedInfo &I : CSI) {
+ Register Reg = I.getReg();
+ int FI = I.getFrameIdx();
+ switch (Reg) {
+ case ARM::R11:
+ case ARM::LR:
+ if (Reg == FramePtr)
+ FramePtrSpillFI = FI;
GPRCS2Size += 4;
break;
+ case ARM::R0:
+ case ARM::R1:
+ case ARM::R2:
+ case ARM::R3:
+ case ARM::R4:
+ case ARM::R5:
+ case ARM::R6:
+ case ARM::R7:
+ case ARM::R8:
+ case ARM::R9:
+ case ARM::R10:
+ case ARM::R12:
+ GPRCS1Size += 4;
+ break;
+ case ARM::FPCXTNS:
+ FPCXTSaveSize = 4;
+ break;
+ default:
+ // This is a DPR. Exclude the aligned DPRCS2 spills.
+ if (Reg == ARM::D8)
+ D8SpillFI = FI;
+ if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
+ DPRCSSize += 8;
+ }
+ }
+ } else {
+ for (const CalleeSavedInfo &I : CSI) {
+ Register Reg = I.getReg();
+ int FI = I.getFrameIdx();
+ switch (Reg) {
+ case ARM::R8:
+ case ARM::R9:
+ case ARM::R10:
+ case ARM::R11:
+ case ARM::R12:
+ if (STI.splitFramePushPop(MF)) {
+ GPRCS2Size += 4;
+ break;
+ }
+ LLVM_FALLTHROUGH;
+ case ARM::R0:
+ case ARM::R1:
+ case ARM::R2:
+ case ARM::R3:
+ case ARM::R4:
+ case ARM::R5:
+ case ARM::R6:
+ case ARM::R7:
+ case ARM::LR:
+ if (Reg == FramePtr)
+ FramePtrSpillFI = FI;
+ GPRCS1Size += 4;
+ break;
+ case ARM::FPCXTNS:
+ FPCXTSaveSize = 4;
+ break;
+ default:
+ // This is a DPR. Exclude the aligned DPRCS2 spills.
+ if (Reg == ARM::D8)
+ D8SpillFI = FI;
+ if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
+ DPRCSSize += 8;
}
- LLVM_FALLTHROUGH;
- case ARM::R0:
- case ARM::R1:
- case ARM::R2:
- case ARM::R3:
- case ARM::R4:
- case ARM::R5:
- case ARM::R6:
- case ARM::R7:
- case ARM::LR:
- if (Reg == FramePtr)
- FramePtrSpillFI = FI;
- GPRCS1Size += 4;
- break;
- case ARM::FPCXTNS:
- FPCXTSaveSize = 4;
- break;
- default:
- // This is a DPR. Exclude the aligned DPRCS2 spills.
- if (Reg == ARM::D8)
- D8SpillFI = FI;
- if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
- DPRCSSize += 8;
}
}
@@ -585,15 +870,23 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size;
unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
Align DPRAlign = DPRCSSize ? std::min(Align(8), Alignment) : Align(4);
- unsigned DPRGapSize =
- (GPRCS1Size + GPRCS2Size + FPCXTSaveSize + ArgRegsSaveSize) %
- DPRAlign.value();
+ unsigned DPRGapSize = GPRCS1Size + FPCXTSaveSize + ArgRegsSaveSize;
+ if (!STI.splitFramePointerPush(MF)) {
+ DPRGapSize += GPRCS2Size;
+ }
+ DPRGapSize %= DPRAlign.value();
- unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
+ unsigned DPRCSOffset;
+ if (STI.splitFramePointerPush(MF)) {
+ DPRCSOffset = GPRCS1Offset - DPRGapSize - DPRCSSize;
+ GPRCS2Offset = DPRCSOffset - GPRCS2Size;
+ } else {
+ DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
+ }
int FramePtrOffsetInPush = 0;
if (HasFP) {
int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
- assert(getMaxFPOffset(STI, *AFI) <= FPOffset &&
+ assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset &&
"Max FP estimation is wrong");
FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize;
AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
@@ -604,7 +897,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
// Move past area 2.
- if (GPRCS2Size > 0) {
+ if (GPRCS2Size > 0 && !STI.splitFramePointerPush(MF)) {
GPRCS2Push = LastPush = MBBI++;
DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
}
@@ -644,18 +937,37 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
} else
NumBytes = DPRCSOffset;
+ if (GPRCS2Size > 0 && STI.splitFramePointerPush(MF)) {
+ GPRCS2Push = LastPush = MBBI++;
+ DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
+ }
+
+ bool NeedsWinCFIStackAlloc = NeedsWinCFI;
+ if (STI.splitFramePointerPush(MF) && HasFP)
+ NeedsWinCFIStackAlloc = false;
+
if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) {
uint32_t NumWords = NumBytes >> 2;
- if (NumWords < 65536)
+ if (NumWords < 65536) {
BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
.addImm(NumWords)
.setMIFlags(MachineInstr::FrameSetup)
.add(predOps(ARMCC::AL));
- else
- BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R4)
- .addImm(NumWords)
- .setMIFlags(MachineInstr::FrameSetup);
+ } else {
+ // Split into two instructions here, instead of using t2MOVi32imm,
+ // to allow inserting accurate SEH instructions (including accurate
+ // instruction size for each of them).
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
+ .addImm(NumWords & 0xffff)
+ .setMIFlags(MachineInstr::FrameSetup)
+ .add(predOps(ARMCC::AL));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), ARM::R4)
+ .addReg(ARM::R4)
+ .addImm(NumWords >> 16)
+ .setMIFlags(MachineInstr::FrameSetup)
+ .add(predOps(ARMCC::AL));
+ }
switch (TM.getCodeModel()) {
case CodeModel::Tiny:
@@ -682,12 +994,20 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
break;
}
- BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP)
- .addReg(ARM::SP, RegState::Kill)
- .addReg(ARM::R4, RegState::Kill)
- .setMIFlags(MachineInstr::FrameSetup)
- .add(predOps(ARMCC::AL))
- .add(condCodeOp());
+ MachineInstrBuilder Instr, SEH;
+ Instr = BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP)
+ .addReg(ARM::SP, RegState::Kill)
+ .addReg(ARM::R4, RegState::Kill)
+ .setMIFlags(MachineInstr::FrameSetup)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
+ if (NeedsWinCFIStackAlloc) {
+ SEH = BuildMI(MF, dl, TII.get(ARM::SEH_StackAlloc))
+ .addImm(NumBytes)
+ .addImm(/*Wide=*/1)
+ .setMIFlags(MachineInstr::FrameSetup);
+ MBB.insertAfter(Instr, SEH);
+ }
NumBytes = 0;
}
@@ -720,34 +1040,58 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// into spill area 1, including the FP in R11. In either case, it
// is in area one and the adjustment needs to take place just after
// that push.
+ // FIXME: The above is not necessary true when PACBTI is enabled.
+ // AAPCS requires use of R11, and PACBTI gets in the way of regular pushes,
+ // so FP ends up on area two.
+ MachineBasicBlock::iterator AfterPush;
if (HasFP) {
- MachineBasicBlock::iterator AfterPush = std::next(GPRCS1Push);
+ AfterPush = std::next(GPRCS1Push);
unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push);
- emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush,
- dl, TII, FramePtr, ARM::SP,
- PushSize + FramePtrOffsetInPush,
- MachineInstr::FrameSetup);
- if (FramePtrOffsetInPush + PushSize != 0) {
- unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
- nullptr, MRI->getDwarfRegNum(FramePtr, true),
- FPCXTSaveSize + ArgRegsSaveSize - FramePtrOffsetInPush));
- BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex)
- .setMIFlags(MachineInstr::FrameSetup);
+ int FPOffset = PushSize + FramePtrOffsetInPush;
+ if (STI.splitFramePointerPush(MF)) {
+ AfterPush = std::next(GPRCS2Push);
+ emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
+ FramePtr, ARM::SP, 0, MachineInstr::FrameSetup);
} else {
- unsigned CFIIndex =
- MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(
- nullptr, MRI->getDwarfRegNum(FramePtr, true)));
- BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex)
- .setMIFlags(MachineInstr::FrameSetup);
+ emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
+ FramePtr, ARM::SP, FPOffset,
+ MachineInstr::FrameSetup);
}
+ if (!NeedsWinCFI) {
+ if (FramePtrOffsetInPush + PushSize != 0) {
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
+ nullptr, MRI->getDwarfRegNum(FramePtr, true),
+ FPCXTSaveSize + ArgRegsSaveSize - FramePtrOffsetInPush));
+ BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ } else {
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(
+ nullptr, MRI->getDwarfRegNum(FramePtr, true)));
+ BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+ }
+ }
+
+ // Emit a SEH opcode indicating the prologue end. The rest of the prologue
+ // instructions below don't need to be replayed to unwind the stack.
+ if (NeedsWinCFI && MBBI != MBB.begin()) {
+ MachineBasicBlock::iterator End = MBBI;
+ if (HasFP && STI.splitFramePointerPush(MF))
+ End = AfterPush;
+ insertSEHRange(MBB, {}, End, TII, MachineInstr::FrameSetup);
+ BuildMI(MBB, End, dl, TII.get(ARM::SEH_PrologEnd))
+ .setMIFlag(MachineInstr::FrameSetup);
+ MF.setHasWinCFI(true);
}
// Now that the prologue's actual instructions are finalised, we can insert
// the necessary DWARF cf instructions to describe the situation. Start by
// recording where each register ended up:
- if (GPRCS1Size > 0) {
+ if (GPRCS1Size > 0 && !NeedsWinCFI) {
MachineBasicBlock::iterator Pos = std::next(GPRCS1Push);
int CFIIndex;
for (const auto &Entry : CSI) {
@@ -781,7 +1125,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
}
}
- if (GPRCS2Size > 0) {
+ if (GPRCS2Size > 0 && !NeedsWinCFI) {
MachineBasicBlock::iterator Pos = std::next(GPRCS2Push);
for (const auto &Entry : CSI) {
Register Reg = Entry.getReg();
@@ -807,7 +1151,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
}
}
- if (DPRCSSize > 0) {
+ if (DPRCSSize > 0 && !NeedsWinCFI) {
// Since vpush register list cannot have gaps, there may be multiple vpush
// instructions in the prologue.
MachineBasicBlock::iterator Pos = std::next(LastPush);
@@ -831,7 +1175,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// throughout the process. If we have a frame pointer, it takes over the job
// half-way through, so only the first few .cfi_def_cfa_offset instructions
// actually get emitted.
- DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
+ if (!NeedsWinCFI)
+ DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
if (STI.isTargetELF() && hasFP(MF))
MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() -
@@ -928,7 +1273,14 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+ MachineBasicBlock::iterator RangeStart;
if (!AFI->hasStackFrame()) {
+ if (MF.hasWinCFI()) {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart))
+ .setMIFlag(MachineInstr::FrameDestroy);
+ RangeStart = initMBBRange(MBB, MBBI);
+ }
+
if (NumBytes + IncomingArgStackToRestore != 0)
emitSPUpdate(isARM, MBB, MBBI, dl, TII,
NumBytes + IncomingArgStackToRestore,
@@ -944,6 +1296,12 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
++MBBI;
}
+ if (MF.hasWinCFI()) {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart))
+ .setMIFlag(MachineInstr::FrameDestroy);
+ RangeStart = initMBBRange(MBB, MBBI);
+ }
+
// Move SP to start of FP callee save spill area.
NumBytes -= (ReservedArgStack +
AFI->getFPCXTSaveAreaSize() +
@@ -998,6 +1356,9 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
MachineInstr::FrameDestroy);
// Increment past our save areas.
+ if (AFI->getGPRCalleeSavedArea2Size() && STI.splitFramePointerPush(MF))
+ MBBI++;
+
if (MBBI != MBB.end() && AFI->getDPRCalleeSavedAreaSize()) {
MBBI++;
// Since vpop register list cannot have gaps, there may be multiple vpop
@@ -1012,7 +1373,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
MachineInstr::FrameDestroy);
}
- if (AFI->getGPRCalleeSavedArea2Size()) MBBI++;
+ if (AFI->getGPRCalleeSavedArea2Size() && !STI.splitFramePointerPush(MF))
+ MBBI++;
if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
if (ReservedArgStack || IncomingArgStackToRestore) {
@@ -1030,6 +1392,12 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
if (AFI->shouldSignReturnAddress() && !AFI->isCmseNSEntryFunction())
BuildMI(MBB, MBBI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2AUT));
}
+
+ if (MF.hasWinCFI()) {
+ insertSEHRange(MBB, RangeStart, MBB.end(), TII, MachineInstr::FrameDestroy);
+ BuildMI(MBB, MBB.end(), dl, TII.get(ARM::SEH_EpilogEnd))
+ .setMIFlag(MachineInstr::FrameDestroy);
+ }
}
/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
@@ -1245,7 +1613,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
continue;
if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
!isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 &&
- STI.hasV5TOps() && MBB.succ_empty() && !hasPAC) {
+ STI.hasV5TOps() && MBB.succ_empty() && !hasPAC &&
+ !STI.splitFramePointerPush(MF)) {
Reg = ARM::PC;
// Fold the return instruction into the LDM.
DeleteRet = true;
@@ -1609,12 +1978,21 @@ bool ARMFrameLowering::spillCalleeSavedRegisters(
.addImm(-4)
.add(predOps(ARMCC::AL));
}
- emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0,
- MachineInstr::FrameSetup);
- emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0,
- MachineInstr::FrameSetup);
- emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
- NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
+ if (STI.splitFramePointerPush(MF)) {
+ emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false,
+ &isSplitFPArea1Register, 0, MachineInstr::FrameSetup);
+ emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
+ NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
+ emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false,
+ &isSplitFPArea2Register, 0, MachineInstr::FrameSetup);
+ } else {
+ emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register,
+ 0, MachineInstr::FrameSetup);
+ emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register,
+ 0, MachineInstr::FrameSetup);
+ emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
+ NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
+ }
// The code above does not insert spill code for the aligned DPRCS2 registers.
// The stack realignment code will be inserted between the push instructions
@@ -1642,14 +2020,24 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(
emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
- unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM;
+ unsigned LdrOpc =
+ AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
unsigned FltOpc = ARM::VLDMDIA_UPD;
- emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
- NumAlignedDPRCS2Regs);
- emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
- &isARMArea2Register, 0);
- emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
- &isARMArea1Register, 0);
+ if (STI.splitFramePointerPush(MF)) {
+ emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
+ &isSplitFPArea2Register, 0);
+ emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
+ NumAlignedDPRCS2Regs);
+ emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
+ &isSplitFPArea1Register, 0);
+ } else {
+ emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
+ NumAlignedDPRCS2Regs);
+ emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
+ &isARMArea2Register, 0);
+ emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
+ &isARMArea1Register, 0);
+ }
return true;
}
@@ -1768,7 +2156,7 @@ checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs) {
return;
// We are planning to use NEON instructions vst1 / vld1.
- if (!static_cast<const ARMSubtarget &>(MF.getSubtarget()).hasNEON())
+ if (!MF.getSubtarget<ARMSubtarget>().hasNEON())
return;
// Don't bother if the default stack alignment is sufficiently high.
@@ -1818,6 +2206,34 @@ bool ARMFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
return true;
}
+static bool requiresAAPCSFrameRecord(const MachineFunction &MF) {
+ const auto &Subtarget = MF.getSubtarget<ARMSubtarget>();
+ return Subtarget.createAAPCSFrameChainLeaf() ||
+ (Subtarget.createAAPCSFrameChain() && MF.getFrameInfo().hasCalls());
+}
+
+// Thumb1 may require a spill when storing to a frame index through FP, for
+// cases where FP is a high register (R11). This scans the function for cases
+// where this may happen.
+static bool canSpillOnFrameIndexAccess(const MachineFunction &MF,
+ const TargetFrameLowering &TFI) {
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ if (!AFI->isThumb1OnlyFunction())
+ return false;
+
+ for (const auto &MBB : MF)
+ for (const auto &MI : MBB)
+ if (MI.getOpcode() == ARM::tSTRspi || MI.getOpcode() == ARM::tSTRi)
+ for (const auto &Op : MI.operands())
+ if (Op.isFI()) {
+ Register Reg;
+ TFI.getFrameIndexReference(MF, Op.getIndex(), Reg);
+ if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::SP)
+ return true;
+ }
+ return false;
+}
+
void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
BitVector &SavedRegs,
RegScavenger *RS) const {
@@ -1826,7 +2242,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
// to take advantage the eliminateFrameIndex machinery. This also ensures it
// is spilled in the order specified by getCalleeSavedRegs() to make it easier
// to combine multiple loads / stores.
- bool CanEliminateFrame = true;
+ bool CanEliminateFrame = !(requiresAAPCSFrameRecord(MF) && hasFP(MF));
bool CS1Spilled = false;
bool LRSpilled = false;
unsigned NumGPRSpills = 0;
@@ -2021,6 +2437,11 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
// Functions with VLAs or extremely large call frames are rare, and
// if a function is allocating more than 1KB of stack, an extra 4-byte
// slot probably isn't relevant.
+ //
+ // A special case is the scenario where r11 is used as FP, where accesses
+ // to a frame index will require its value to be moved into a low reg.
+ // This is handled later on, once we are able to determine if we have any
+ // fp-relative accesses.
if (RegInfo->hasBasePointer(MF))
EstimatedRSStackSizeLimit = (1U << 5) * 4;
else
@@ -2049,7 +2470,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
//
// We could do slightly better on Thumb1; in some cases, an sp-relative
// offset would be legal even though an fp-relative offset is not.
- int MaxFPOffset = getMaxFPOffset(STI, *AFI);
+ int MaxFPOffset = getMaxFPOffset(STI, *AFI, MF);
bool HasLargeArgumentList =
HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit;
@@ -2067,7 +2488,9 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
SavedRegs.set(FramePtr);
// If the frame pointer is required by the ABI, also spill LR so that we
// emit a complete frame record.
- if (MF.getTarget().Options.DisableFramePointerElim(MF) && !LRSpilled) {
+ if ((requiresAAPCSFrameRecord(MF) ||
+ MF.getTarget().Options.DisableFramePointerElim(MF)) &&
+ !LRSpilled) {
SavedRegs.set(ARM::LR);
LRSpilled = true;
NumGPRSpills++;
@@ -2149,7 +2572,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
}
// r7 can be used if it is not being used as the frame pointer.
- if (!HasFP) {
+ if (!HasFP || FramePtr != ARM::R7) {
if (SavedRegs.test(ARM::R7)) {
--RegDeficit;
LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = "
@@ -2270,8 +2693,10 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
// to materialize a stack offset. If so, either spill one additional
// callee-saved register or reserve a special spill slot to facilitate
// register scavenging. Thumb1 needs a spill slot for stack pointer
- // adjustments also, even when the frame itself is small.
- if (BigFrameOffsets && !ExtraCSSpill) {
+ // adjustments and for frame index accesses when FP is high register,
+ // even when the frame itself is small.
+ if (!ExtraCSSpill &&
+ (BigFrameOffsets || canSpillOnFrameIndexAccess(MF, *this))) {
// If any non-reserved CS register isn't spilled, just spill one or two
// extra. That should take care of it!
unsigned NumExtras = TargetAlign.value() / 4;
@@ -2488,6 +2913,7 @@ void ARMFrameLowering::adjustForSegmentedStacks(
unsigned CFIIndex;
const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();
bool Thumb = ST->isThumb();
+ bool Thumb2 = ST->isThumb2();
// Sadly, this currently doesn't support varargs, platforms other than
// android/linux. Note that thumb1/thumb2 are support for android/linux.
@@ -2505,19 +2931,10 @@ void ARMFrameLowering::adjustForSegmentedStacks(
ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>();
DebugLoc DL;
- uint64_t StackSize = MFI.getStackSize();
-
- // Do not generate a prologue for leaf functions with a stack of size zero.
- // For non-leaf functions we have to allow for the possibility that the
- // callis to a non-split function, as in PR37807. This function could also
- // take the address of a non-split function. When the linker tries to adjust
- // its non-existent prologue, it would fail with an error. Mark the object
- // file so that such failures are not errors. See this Go language bug-report
- // https://go-review.googlesource.com/c/go/+/148819/
- if (StackSize == 0 && !MFI.hasTailCall()) {
- MF.getMMI().setHasNosplitStack(true);
+ if (!MFI.needsSplitStackProlog())
return;
- }
+
+ uint64_t StackSize = MFI.getStackSize();
// Use R4 and R5 as scratch registers.
// We save R4 and R5 before use and restore them before leaving the function.
@@ -2570,8 +2987,9 @@ void ARMFrameLowering::adjustForSegmentedStacks(
// Make sure the LiveIns are still sorted and unique.
MBB->sortUniqueLiveIns();
// Replace the edges to PrologueMBB by edges to the sequences
- // we are about to add.
- MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]);
+ // we are about to add, but only update for immediate predecessors.
+ if (MBB->isSuccessor(&PrologueMBB))
+ MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]);
}
// The required stack size that is aligned to ARM constant criterion.
@@ -2604,17 +3022,19 @@ void ARMFrameLowering::adjustForSegmentedStacks(
// Emit the relevant DWARF information about the change in stack pointer as
// well as where to find both r4 and r5 (the callee-save registers)
- CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 8));
- BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
- CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
- nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4));
- BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
- CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
- nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8));
- BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 8));
+ BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
+ nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4));
+ BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
+ nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8));
+ BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ }
// mov SR1, sp
if (Thumb) {
@@ -2630,17 +3050,46 @@ void ARMFrameLowering::adjustForSegmentedStacks(
// sub SR1, sp, #StackSize
if (!CompareStackPointer && Thumb) {
- BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)
- .add(condCodeOp())
- .addReg(ScratchReg1)
- .addImm(AlignedStackSize)
- .add(predOps(ARMCC::AL));
+ if (AlignedStackSize < 256) {
+ BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)
+ .add(condCodeOp())
+ .addReg(ScratchReg1)
+ .addImm(AlignedStackSize)
+ .add(predOps(ARMCC::AL));
+ } else {
+ if (Thumb2) {
+ BuildMI(McrMBB, DL, TII.get(ARM::t2MOVi32imm), ScratchReg0)
+ .addImm(AlignedStackSize);
+ } else {
+ auto MBBI = McrMBB->end();
+ auto RegInfo = STI.getRegisterInfo();
+ RegInfo->emitLoadConstPool(*McrMBB, MBBI, DL, ScratchReg0, 0,
+ AlignedStackSize);
+ }
+ BuildMI(McrMBB, DL, TII.get(ARM::tSUBrr), ScratchReg1)
+ .add(condCodeOp())
+ .addReg(ScratchReg1)
+ .addReg(ScratchReg0)
+ .add(predOps(ARMCC::AL));
+ }
} else if (!CompareStackPointer) {
- BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1)
- .addReg(ARM::SP)
- .addImm(AlignedStackSize)
- .add(predOps(ARMCC::AL))
- .add(condCodeOp());
+ if (AlignedStackSize < 256) {
+ BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1)
+ .addReg(ARM::SP)
+ .addImm(AlignedStackSize)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
+ } else {
+ auto MBBI = McrMBB->end();
+ auto RegInfo = STI.getRegisterInfo();
+ RegInfo->emitLoadConstPool(*McrMBB, MBBI, DL, ScratchReg0, 0,
+ AlignedStackSize);
+ BuildMI(McrMBB, DL, TII.get(ARM::SUBrr), ScratchReg1)
+ .addReg(ARM::SP)
+ .addReg(ScratchReg0)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
+ }
}
if (Thumb && ST->isThumb1Only()) {
@@ -2707,28 +3156,69 @@ void ARMFrameLowering::adjustForSegmentedStacks(
// Pass first argument for the __morestack by Scratch Register #0.
// The amount size of stack required
if (Thumb) {
- BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0)
- .add(condCodeOp())
- .addImm(AlignedStackSize)
- .add(predOps(ARMCC::AL));
+ if (AlignedStackSize < 256) {
+ BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0)
+ .add(condCodeOp())
+ .addImm(AlignedStackSize)
+ .add(predOps(ARMCC::AL));
+ } else {
+ if (Thumb2) {
+ BuildMI(AllocMBB, DL, TII.get(ARM::t2MOVi32imm), ScratchReg0)
+ .addImm(AlignedStackSize);
+ } else {
+ auto MBBI = AllocMBB->end();
+ auto RegInfo = STI.getRegisterInfo();
+ RegInfo->emitLoadConstPool(*AllocMBB, MBBI, DL, ScratchReg0, 0,
+ AlignedStackSize);
+ }
+ }
} else {
- BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)
- .addImm(AlignedStackSize)
- .add(predOps(ARMCC::AL))
- .add(condCodeOp());
+ if (AlignedStackSize < 256) {
+ BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)
+ .addImm(AlignedStackSize)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
+ } else {
+ auto MBBI = AllocMBB->end();
+ auto RegInfo = STI.getRegisterInfo();
+ RegInfo->emitLoadConstPool(*AllocMBB, MBBI, DL, ScratchReg0, 0,
+ AlignedStackSize);
+ }
}
+
// Pass second argument for the __morestack by Scratch Register #1.
// The amount size of stack consumed to save function arguments.
if (Thumb) {
- BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)
- .add(condCodeOp())
- .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
- .add(predOps(ARMCC::AL));
+ if (ARMFI->getArgumentStackSize() < 256) {
+ BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)
+ .add(condCodeOp())
+ .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
+ .add(predOps(ARMCC::AL));
+ } else {
+ if (Thumb2) {
+ BuildMI(AllocMBB, DL, TII.get(ARM::t2MOVi32imm), ScratchReg1)
+ .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()));
+ } else {
+ auto MBBI = AllocMBB->end();
+ auto RegInfo = STI.getRegisterInfo();
+ RegInfo->emitLoadConstPool(
+ *AllocMBB, MBBI, DL, ScratchReg1, 0,
+ alignToARMConstant(ARMFI->getArgumentStackSize()));
+ }
+ }
} else {
- BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)
- .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
- .add(predOps(ARMCC::AL))
- .add(condCodeOp());
+ if (alignToARMConstant(ARMFI->getArgumentStackSize()) < 256) {
+ BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)
+ .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
+ } else {
+ auto MBBI = AllocMBB->end();
+ auto RegInfo = STI.getRegisterInfo();
+ RegInfo->emitLoadConstPool(
+ *AllocMBB, MBBI, DL, ScratchReg1, 0,
+ alignToARMConstant(ARMFI->getArgumentStackSize()));
+ }
}
// push {lr} - Save return address of this function.
@@ -2746,13 +3236,15 @@ void ARMFrameLowering::adjustForSegmentedStacks(
// Emit the DWARF info about the change in stack as well as where to find the
// previous link register
- CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 12));
- BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
- CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
+ if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 12));
+ BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
nullptr, MRI->getDwarfRegNum(ARM::LR, true), -12));
- BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ }
// Call __morestack().
if (Thumb) {
@@ -2808,9 +3300,11 @@ void ARMFrameLowering::adjustForSegmentedStacks(
}
// Update the CFA offset now that we've popped
- CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
- BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
+ BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ }
// Return from this function.
BuildMI(AllocMBB, DL, TII.get(ST->getReturnOpcode())).add(predOps(ARMCC::AL));
@@ -2832,20 +3326,22 @@ void ARMFrameLowering::adjustForSegmentedStacks(
}
// Update the CFA offset now that we've popped
- CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
- BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
-
- // Tell debuggers that r4 and r5 are now the same as they were in the
- // previous function, that they're the "Same Value".
- CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue(
- nullptr, MRI->getDwarfRegNum(ScratchReg0, true)));
- BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
- CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue(
- nullptr, MRI->getDwarfRegNum(ScratchReg1, true)));
- BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
+ BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+
+ // Tell debuggers that r4 and r5 are now the same as they were in the
+ // previous function, that they're the "Same Value".
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue(
+ nullptr, MRI->getDwarfRegNum(ScratchReg0, true)));
+ BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue(
+ nullptr, MRI->getDwarfRegNum(ScratchReg1, true)));
+ BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ }
// Organizing MBB lists
PostStackMBB->addSuccessor(&PrologueMBB);
diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.h b/llvm/lib/Target/ARM/ARMFrameLowering.h
index 9822e2321bb4..16f2ce6bea6f 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.h
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.h
@@ -46,6 +46,7 @@ public:
bool enableCalleeSaveSkip(const MachineFunction &MF) const override;
bool hasFP(const MachineFunction &MF) const override;
+ bool isFPReserved(const MachineFunction &MF) const;
bool hasReservedCallFrame(const MachineFunction &MF) const override;
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override;
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
diff --git a/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp b/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp
index 0d201a67af46..9b26aac6c0b7 100644
--- a/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp
+++ b/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -11,6 +11,8 @@
#include "ARMBaseRegisterInfo.h"
#include "ARMSubtarget.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 98c8133282a2..e0e4ffd90e0e 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -1058,15 +1058,15 @@ bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
// This case occurs only for VLD1-lane/dup and VST1-lane instructions.
// The maximum alignment is equal to the memory size being referenced.
- unsigned MMOAlign = MemN->getAlignment();
+ llvm::Align MMOAlign = MemN->getAlign();
unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
- if (MMOAlign >= MemSize && MemSize > 1)
+ if (MMOAlign.value() >= MemSize && MemSize > 1)
Alignment = MemSize;
} else {
// All other uses of addrmode6 are for intrinsics. For now just record
// the raw alignment value; it will be refined later based on the legal
// alignment operands for the intrinsic.
- Alignment = MemN->getAlignment();
+ Alignment = MemN->getAlign().value();
}
Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
@@ -3464,40 +3464,39 @@ bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
return false;
}
-/// Target-specific DAG combining for ISD::XOR.
+/// Target-specific DAG combining for ISD::SUB.
/// Target-independent combining lowers SELECT_CC nodes of the form
/// select_cc setg[ge] X, 0, X, -X
/// select_cc setgt X, -1, X, -X
/// select_cc setl[te] X, 0, -X, X
/// select_cc setlt X, 1, -X, X
/// which represent Integer ABS into:
-/// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
+/// Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
/// ARM instruction selection detects the latter and matches it to
/// ARM::ABS or ARM::t2ABS machine node.
bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
- SDValue XORSrc0 = N->getOperand(0);
- SDValue XORSrc1 = N->getOperand(1);
+ SDValue SUBSrc0 = N->getOperand(0);
+ SDValue SUBSrc1 = N->getOperand(1);
EVT VT = N->getValueType(0);
if (Subtarget->isThumb1Only())
return false;
- if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
+ if (SUBSrc0.getOpcode() != ISD::XOR || SUBSrc1.getOpcode() != ISD::SRA)
return false;
- SDValue ADDSrc0 = XORSrc0.getOperand(0);
- SDValue ADDSrc1 = XORSrc0.getOperand(1);
- SDValue SRASrc0 = XORSrc1.getOperand(0);
- SDValue SRASrc1 = XORSrc1.getOperand(1);
+ SDValue XORSrc0 = SUBSrc0.getOperand(0);
+ SDValue XORSrc1 = SUBSrc0.getOperand(1);
+ SDValue SRASrc0 = SUBSrc1.getOperand(0);
+ SDValue SRASrc1 = SUBSrc1.getOperand(1);
ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1);
EVT XType = SRASrc0.getValueType();
unsigned Size = XType.getSizeInBits() - 1;
- if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
- XType.isInteger() && SRAConstant != nullptr &&
- Size == SRAConstant->getZExtValue()) {
+ if (XORSrc1 == SUBSrc1 && XORSrc0 == SRASrc0 && XType.isInteger() &&
+ SRAConstant != nullptr && Size == SRAConstant->getZExtValue()) {
unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
- CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
+ CurDAG->SelectNodeTo(N, Opcode, VT, XORSrc0);
return true;
}
@@ -3673,8 +3672,8 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
if (tryInlineAsm(N))
return;
break;
- case ISD::XOR:
- // Select special operations if XOR node forms integer ABS pattern
+ case ISD::SUB:
+ // Select special operations if SUB node forms integer ABS pattern
if (tryABSOp(N))
return;
// Other cases are autogenerated.
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 1b41427a1cab..85e32c08c74c 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -273,6 +273,10 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::USUBSAT, VT, Legal);
setOperationAction(ISD::ABDS, VT, Legal);
setOperationAction(ISD::ABDU, VT, Legal);
+ setOperationAction(ISD::AVGFLOORS, VT, Legal);
+ setOperationAction(ISD::AVGFLOORU, VT, Legal);
+ setOperationAction(ISD::AVGCEILS, VT, Legal);
+ setOperationAction(ISD::AVGCEILU, VT, Legal);
// No native support for these.
setOperationAction(ISD::UDIV, VT, Expand);
@@ -392,6 +396,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Legal);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
}
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
@@ -476,7 +481,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
- !Subtarget->isTargetWatchOS()) {
+ !Subtarget->isTargetWatchOS() && !Subtarget->isTargetDriverKit()) {
bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard;
for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
@@ -809,8 +814,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
// Combine low-overhead loop intrinsics so that we can lower i1 types.
if (Subtarget->hasLOB()) {
- setTargetDAGCombine(ISD::BRCOND);
- setTargetDAGCombine(ISD::BR_CC);
+ setTargetDAGCombine({ISD::BRCOND, ISD::BR_CC});
}
if (Subtarget->hasNEON()) {
@@ -982,13 +986,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FMA, MVT::v4f32, Expand);
}
- setTargetDAGCombine(ISD::SHL);
- setTargetDAGCombine(ISD::SRL);
- setTargetDAGCombine(ISD::SRA);
- setTargetDAGCombine(ISD::FP_TO_SINT);
- setTargetDAGCombine(ISD::FP_TO_UINT);
- setTargetDAGCombine(ISD::FDIV);
- setTargetDAGCombine(ISD::LOAD);
+ setTargetDAGCombine({ISD::SHL, ISD::SRL, ISD::SRA, ISD::FP_TO_SINT,
+ ISD::FP_TO_UINT, ISD::FDIV, ISD::LOAD});
// It is legal to extload from v4i8 to v4i16 or v4i32.
for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16,
@@ -1002,32 +1001,17 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
}
if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
- setTargetDAGCombine(ISD::BUILD_VECTOR);
- setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
- setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
- setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
- setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
- setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
- setTargetDAGCombine(ISD::STORE);
- setTargetDAGCombine(ISD::SIGN_EXTEND);
- setTargetDAGCombine(ISD::ZERO_EXTEND);
- setTargetDAGCombine(ISD::ANY_EXTEND);
- setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
- setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
- setTargetDAGCombine(ISD::INTRINSIC_VOID);
- setTargetDAGCombine(ISD::VECREDUCE_ADD);
- setTargetDAGCombine(ISD::ADD);
- setTargetDAGCombine(ISD::BITCAST);
+ setTargetDAGCombine(
+ {ISD::BUILD_VECTOR, ISD::VECTOR_SHUFFLE, ISD::INSERT_SUBVECTOR,
+ ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,
+ ISD::SIGN_EXTEND_INREG, ISD::STORE, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND,
+ ISD::ANY_EXTEND, ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN,
+ ISD::INTRINSIC_VOID, ISD::VECREDUCE_ADD, ISD::ADD, ISD::BITCAST});
}
if (Subtarget->hasMVEIntegerOps()) {
- setTargetDAGCombine(ISD::SMIN);
- setTargetDAGCombine(ISD::UMIN);
- setTargetDAGCombine(ISD::SMAX);
- setTargetDAGCombine(ISD::UMAX);
- setTargetDAGCombine(ISD::FP_EXTEND);
- setTargetDAGCombine(ISD::SELECT);
- setTargetDAGCombine(ISD::SELECT_CC);
- setTargetDAGCombine(ISD::SETCC);
+ setTargetDAGCombine({ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX,
+ ISD::FP_EXTEND, ISD::SELECT, ISD::SELECT_CC,
+ ISD::SETCC});
}
if (Subtarget->hasMVEFloatOps()) {
setTargetDAGCombine(ISD::FADD);
@@ -1364,6 +1348,29 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
}
}
+ // Compute supported atomic widths.
+ if (Subtarget->isTargetLinux() ||
+ (!Subtarget->isMClass() && Subtarget->hasV6Ops())) {
+ // For targets where __sync_* routines are reliably available, we use them
+ // if necessary.
+ //
+ // ARM Linux always supports 64-bit atomics through kernel-assisted atomic
+ // routines (kernel 3.1 or later). FIXME: Not with compiler-rt?
+ //
+ // ARMv6 targets have native instructions in ARM mode. For Thumb mode,
+ // such targets should provide __sync_* routines, which use the ARM mode
+ // instructions. (ARMv6 doesn't have dmb, but it has an equivalent
+ // encoding; see ARMISD::MEMBARRIER_MCR.)
+ setMaxAtomicSizeInBitsSupported(64);
+ } else if (Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) {
+ // Cortex-M (besides Cortex-M0) have 32-bit atomics.
+ setMaxAtomicSizeInBitsSupported(32);
+ } else {
+ // We can't assume anything about other targets; just use libatomic
+ // routines.
+ setMaxAtomicSizeInBitsSupported(0);
+ }
+
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
// Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
@@ -1545,12 +1552,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
// We have target-specific dag combine patterns for the following nodes:
// ARMISD::VMOVRRD - No need to call setTargetDAGCombine
- setTargetDAGCombine(ISD::ADD);
- setTargetDAGCombine(ISD::SUB);
- setTargetDAGCombine(ISD::MUL);
- setTargetDAGCombine(ISD::AND);
- setTargetDAGCombine(ISD::OR);
- setTargetDAGCombine(ISD::XOR);
+ setTargetDAGCombine(
+ {ISD::ADD, ISD::SUB, ISD::MUL, ISD::AND, ISD::OR, ISD::XOR});
if (Subtarget->hasMVEIntegerOps())
setTargetDAGCombine(ISD::VSELECT);
@@ -1559,6 +1562,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::SRL);
if (Subtarget->isThumb1Only())
setTargetDAGCombine(ISD::SHL);
+ // Attempt to lower smin/smax to ssat/usat
+ if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) ||
+ Subtarget->isThumb2()) {
+ setTargetDAGCombine({ISD::SMIN, ISD::SMAX});
+ }
setStackPointerRegisterToSaveRestore(ARM::SP);
@@ -1901,13 +1909,14 @@ ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
// source/dest is aligned and the copy size is large enough. We therefore want
// to align such objects passed to memory intrinsics.
bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
- unsigned &PrefAlign) const {
+ Align &PrefAlign) const {
if (!isa<MemIntrinsic>(CI))
return false;
MinSize = 8;
// On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
// cycle faster than 4-byte aligned LDM.
- PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
+ PrefAlign =
+ (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? Align(8) : Align(4));
return true;
}
@@ -2326,7 +2335,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Lower 'returns_twice' calls to a pseudo-instruction.
if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) &&
- !Subtarget->getNoBTIAtReturnTwice())
+ !Subtarget->noBTIAtReturnTwice())
GuardWithBTI = AFI->branchTargetEnforcement();
// Determine whether this is a non-secure function call.
@@ -2778,25 +2787,23 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
RegsToPass[i].second.getValueType()));
// Add a register mask operand representing the call-preserved registers.
- if (!isTailCall) {
- const uint32_t *Mask;
- const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
- if (isThisReturn) {
- // For 'this' returns, use the R0-preserving mask if applicable
- Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
- if (!Mask) {
- // Set isThisReturn to false if the calling convention is not one that
- // allows 'returned' to be modeled in this way, so LowerCallResult does
- // not try to pass 'this' straight through
- isThisReturn = false;
- Mask = ARI->getCallPreservedMask(MF, CallConv);
- }
- } else
+ const uint32_t *Mask;
+ const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
+ if (isThisReturn) {
+ // For 'this' returns, use the R0-preserving mask if applicable
+ Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
+ if (!Mask) {
+ // Set isThisReturn to false if the calling convention is not one that
+ // allows 'returned' to be modeled in this way, so LowerCallResult does
+ // not try to pass 'this' straight through
+ isThisReturn = false;
Mask = ARI->getCallPreservedMask(MF, CallConv);
+ }
+ } else
+ Mask = ARI->getCallPreservedMask(MF, CallConv);
- assert(Mask && "Missing call preserved mask for calling convention");
- Ops.push_back(DAG.getRegisterMask(Mask));
- }
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
if (InFlag.getNode())
Ops.push_back(InFlag);
@@ -4379,7 +4386,7 @@ void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
bool ARMTargetLowering::splitValueIntoRegisterParts(
SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const {
- bool IsABIRegCopy = CC.hasValue();
+ bool IsABIRegCopy = CC.has_value();
EVT ValueVT = Val.getValueType();
if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
PartVT == MVT::f32) {
@@ -4397,7 +4404,7 @@ bool ARMTargetLowering::splitValueIntoRegisterParts(
SDValue ARMTargetLowering::joinRegisterPartsIntoValue(
SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const {
- bool IsABIRegCopy = CC.hasValue();
+ bool IsABIRegCopy = CC.has_value();
if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
PartVT == MVT::f32) {
unsigned ValueBits = ValueVT.getSizeInBits();
@@ -5547,7 +5554,7 @@ static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(),
- Ld->getPointerInfo(), Ld->getAlignment(),
+ Ld->getPointerInfo(), Ld->getAlign(),
Ld->getMemOperand()->getFlags());
llvm_unreachable("Unknown VFP cmp argument!");
@@ -5567,14 +5574,14 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
SDValue Ptr = Ld->getBasePtr();
RetVal1 =
DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
- Ld->getAlignment(), Ld->getMemOperand()->getFlags());
+ Ld->getAlign(), Ld->getMemOperand()->getFlags());
EVT PtrType = Ptr.getValueType();
- unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
SDValue NewPtr = DAG.getNode(ISD::ADD, dl,
PtrType, Ptr, DAG.getConstant(4, dl, PtrType));
RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr,
- Ld->getPointerInfo().getWithOffset(4), NewAlign,
+ Ld->getPointerInfo().getWithOffset(4),
+ commonAlignment(Ld->getAlign(), 4),
Ld->getMemOperand()->getFlags());
return;
}
@@ -5801,8 +5808,7 @@ static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
return DAG.UnrollVectorOp(Op.getNode());
}
- const bool HasFullFP16 =
- static_cast<const ARMSubtarget&>(DAG.getSubtarget()).hasFullFP16();
+ const bool HasFullFP16 = DAG.getSubtarget<ARMSubtarget>().hasFullFP16();
EVT NewTy;
const EVT OpTy = Op.getOperand(0).getValueType();
@@ -5912,8 +5918,7 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
Op.getOperand(0).getValueType() == MVT::v8i16) &&
"Invalid type for custom lowering!");
- const bool HasFullFP16 =
- static_cast<const ARMSubtarget&>(DAG.getSubtarget()).hasFullFP16();
+ const bool HasFullFP16 = DAG.getSubtarget<ARMSubtarget>().hasFullFP16();
EVT DestVecType;
if (VT == MVT::v4f32)
@@ -9359,15 +9364,15 @@ static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {
// The load already has the right type.
if (ExtendedTy == LD->getMemoryVT())
return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(),
- LD->getBasePtr(), LD->getPointerInfo(),
- LD->getAlignment(), LD->getMemOperand()->getFlags());
+ LD->getBasePtr(), LD->getPointerInfo(), LD->getAlign(),
+ LD->getMemOperand()->getFlags());
// We need to create a zextload/sextload. We cannot just create a load
// followed by a zext/zext node because LowerMUL is also run during normal
// operation legalization where we can't create illegal types.
return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
- LD->getMemoryVT(), LD->getAlignment(),
+ LD->getMemoryVT(), LD->getAlign(),
LD->getMemOperand()->getFlags());
}
@@ -9876,7 +9881,7 @@ ARMTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
if (N->getOpcode() != ISD::SDIV)
return SDValue();
- const auto &ST = static_cast<const ARMSubtarget&>(DAG.getSubtarget());
+ const auto &ST = DAG.getSubtarget<ARMSubtarget>();
const bool MinSize = ST.hasMinSize();
const bool HasDivide = ST.isThumb() ? ST.hasDivideInThumbMode()
: ST.hasDivideInARMMode();
@@ -10311,6 +10316,15 @@ SDValue ARMTargetLowering::LowerFSETCC(SDValue Op, SelectionDAG &DAG) const {
return DAG.getMergeValues({Result, Chain}, dl);
}
+SDValue ARMTargetLowering::LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const {
+ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+
+ EVT VT = getPointerTy(DAG.getDataLayout());
+ SDLoc DL(Op);
+ int FI = MFI.CreateFixedObject(4, 0, false);
+ return DAG.getFrameIndex(FI, VT);
+}
+
SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
LLVM_DEBUG(dbgs() << "Lowering node: "; Op.dump());
switch (Op.getOpcode()) {
@@ -10424,6 +10438,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
case ISD::STRICT_FSETCC:
case ISD::STRICT_FSETCCS: return LowerFSETCC(Op, DAG);
+ case ISD::SPONENTRY:
+ return LowerSPONENTRY(Op, DAG);
case ARMISD::WIN__DBZCHK: return SDValue();
}
}
@@ -10509,9 +10525,6 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
return;
case ISD::INTRINSIC_WO_CHAIN:
return ReplaceLongIntrinsic(N, Results, DAG);
- case ISD::ABS:
- lowerABS(N, Results, DAG);
- return ;
case ISD::LOAD:
LowerLOAD(N, Results, DAG);
break;
@@ -12170,7 +12183,7 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
if (Subtarget->isThumb1Only()) {
for (unsigned c = MCID->getNumOperands() - 4; c--;) {
MI.addOperand(MI.getOperand(1));
- MI.RemoveOperand(1);
+ MI.removeOperand(1);
}
// Restore the ties
@@ -12208,7 +12221,7 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
definesCPSR = true;
if (MO.isDead())
deadCPSR = true;
- MI.RemoveOperand(i);
+ MI.removeOperand(i);
break;
}
}
@@ -14775,14 +14788,14 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,
SDValue BasePtr = LD->getBasePtr();
SDValue NewLD1 =
DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, LD->getPointerInfo(),
- LD->getAlignment(), LD->getMemOperand()->getFlags());
+ LD->getAlign(), LD->getMemOperand()->getFlags());
SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
DAG.getConstant(4, DL, MVT::i32));
SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, LD->getChain(), OffsetPtr,
LD->getPointerInfo().getWithOffset(4),
- std::min(4U, LD->getAlignment()),
+ commonAlignment(LD->getAlign(), 4),
LD->getMemOperand()->getFlags());
DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
@@ -15352,6 +15365,10 @@ static SDValue FlattenVectorShuffle(ShuffleVectorSDNode *N, SelectionDAG &DAG) {
case ISD::MULHU:
case ISD::ABDS:
case ISD::ABDU:
+ case ISD::AVGFLOORS:
+ case ISD::AVGFLOORU:
+ case ISD::AVGCEILS:
+ case ISD::AVGCEILU:
break;
default:
return SDValue();
@@ -15721,7 +15738,7 @@ static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target,
// Now, create a _UPD node, taking care of not breaking alignment.
EVT AlignedVecTy = VecTy;
- unsigned Alignment = MemN->getAlignment();
+ Align Alignment = MemN->getAlign();
// If this is a less-than-standard-aligned load/store, change the type to
// match the standard alignment.
@@ -15738,10 +15755,8 @@ static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target,
// memory type to match the explicit alignment. That way, we don't
// generate non-standard-aligned ARMISD::VLDx nodes.
if (isa<LSBaseSDNode>(N)) {
- if (Alignment == 0)
- Alignment = 1;
- if (Alignment < VecTy.getScalarSizeInBits() / 8) {
- MVT EltTy = MVT::getIntegerVT(Alignment * 8);
+ if (Alignment.value() < VecTy.getScalarSizeInBits() / 8) {
+ MVT EltTy = MVT::getIntegerVT(Alignment.value() * 8);
assert(NumVecs == 1 && "Unexpected multi-element generic load/store.");
assert(!isLaneOp && "Unexpected generic load/store lane.");
unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8);
@@ -15754,7 +15769,7 @@ static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target,
// alignment of the memory type.
// Intrinsics, however, always get an explicit alignment, set to the
// alignment of the MMO.
- Alignment = 1;
+ Alignment = Align(1);
}
// Create the new updating load/store node.
@@ -15787,7 +15802,7 @@ static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target,
}
// For all node types, the alignment operand is always the last one.
- Ops.push_back(DAG.getConstant(Alignment, dl, MVT::i32));
+ Ops.push_back(DAG.getConstant(Alignment.value(), dl, MVT::i32));
// If this is a non-standard-aligned STORE, the penultimate operand is the
// stored value. Bitcast it to the aligned type.
@@ -15965,10 +15980,10 @@ static SDValue CombineBaseUpdate(SDNode *N,
// Try to fold with other users. Non-constant updates are considered
// first, and constant updates are sorted to not break a sequence of
// strided accesses (if there is any).
- std::sort(BaseUpdates.begin(), BaseUpdates.end(),
- [](BaseUpdateUser &LHS, BaseUpdateUser &RHS) {
- return LHS.ConstInc < RHS.ConstInc;
- });
+ std::stable_sort(BaseUpdates.begin(), BaseUpdates.end(),
+ [](const BaseUpdateUser &LHS, const BaseUpdateUser &RHS) {
+ return LHS.ConstInc < RHS.ConstInc;
+ });
for (BaseUpdateUser &User : BaseUpdates) {
if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/false, DCI))
return SDValue();
@@ -16258,7 +16273,7 @@ static SDValue PerformVDUPCombine(SDNode *N, SelectionDAG &DAG,
if (LD && Op.hasOneUse() && LD->isUnindexed() &&
LD->getMemoryVT() == N->getValueType(0).getVectorElementType()) {
SDValue Ops[] = {LD->getOperand(0), LD->getOperand(1),
- DAG.getConstant(LD->getAlignment(), SDLoc(N), MVT::i32)};
+ DAG.getConstant(LD->getAlign().value(), SDLoc(N), MVT::i32)};
SDVTList SDTys = DAG.getVTList(N->getValueType(0), MVT::Other);
SDValue VLDDup =
DAG.getMemIntrinsicNode(ARMISD::VLD1DUP, SDLoc(N), SDTys, Ops,
@@ -16360,7 +16375,7 @@ static SDValue PerformTruncatingStoreCombine(StoreSDNode *St,
ShuffWide, DAG.getIntPtrConstant(I, DL));
SDValue Ch =
DAG.getStore(St->getChain(), DL, SubVec, BasePtr, St->getPointerInfo(),
- St->getAlignment(), St->getMemOperand()->getFlags());
+ St->getAlign(), St->getMemOperand()->getFlags());
BasePtr =
DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, Increment);
Chains.push_back(Ch);
@@ -16608,7 +16623,7 @@ static SDValue PerformSTORECombine(SDNode *N,
DCI.AddToWorklist(ExtElt.getNode());
DCI.AddToWorklist(V.getNode());
return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
- St->getPointerInfo(), St->getAlignment(),
+ St->getPointerInfo(), St->getAlign(),
St->getMemOperand()->getFlags(), St->getAAInfo());
}
@@ -16690,14 +16705,16 @@ static SDValue PerformFAddVSelectCombine(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
SDLoc DL(N);
- // The identity element for a fadd is -0.0, which these VMOV's represent.
- auto isNegativeZeroSplat = [&](SDValue Op) {
+ // The identity element for a fadd is -0.0 or +0.0 when the nsz flag is set,
+ // which these VMOV's represent.
+ auto isIdentitySplat = [&](SDValue Op, bool NSZ) {
if (Op.getOpcode() != ISD::BITCAST ||
Op.getOperand(0).getOpcode() != ARMISD::VMOVIMM)
return false;
- if (VT == MVT::v4f32 && Op.getOperand(0).getConstantOperandVal(0) == 1664)
+ uint64_t ImmVal = Op.getOperand(0).getConstantOperandVal(0);
+ if (VT == MVT::v4f32 && (ImmVal == 1664 || (ImmVal == 0 && NSZ)))
return true;
- if (VT == MVT::v8f16 && Op.getOperand(0).getConstantOperandVal(0) == 2688)
+ if (VT == MVT::v8f16 && (ImmVal == 2688 || (ImmVal == 0 && NSZ)))
return true;
return false;
};
@@ -16705,12 +16722,17 @@ static SDValue PerformFAddVSelectCombine(SDNode *N, SelectionDAG &DAG,
if (Op0.getOpcode() == ISD::VSELECT && Op1.getOpcode() != ISD::VSELECT)
std::swap(Op0, Op1);
- if (Op1.getOpcode() != ISD::VSELECT ||
- !isNegativeZeroSplat(Op1.getOperand(2)))
+ if (Op1.getOpcode() != ISD::VSELECT)
+ return SDValue();
+
+ SDNodeFlags FaddFlags = N->getFlags();
+ bool NSZ = FaddFlags.hasNoSignedZeros();
+ if (!isIdentitySplat(Op1.getOperand(2), NSZ))
return SDValue();
+
SDValue FAdd =
- DAG.getNode(ISD::FADD, DL, VT, Op0, Op1.getOperand(1), N->getFlags());
- return DAG.getNode(ISD::VSELECT, DL, VT, Op1.getOperand(0), FAdd, Op0);
+ DAG.getNode(ISD::FADD, DL, VT, Op0, Op1.getOperand(1), FaddFlags);
+ return DAG.getNode(ISD::VSELECT, DL, VT, Op1.getOperand(0), FAdd, Op0, FaddFlags);
}
/// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
@@ -17060,13 +17082,10 @@ static SDValue PerformVMOVNCombine(SDNode *N,
IsTop ? Op1DemandedElts
: APInt::getSplat(NumElts, APInt::getHighBitsSet(2, 1));
- APInt KnownUndef, KnownZero;
const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
- if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, KnownUndef,
- KnownZero, DCI))
+ if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, DCI))
return SDValue(N, 0);
- if (TLI.SimplifyDemandedVectorElts(Op1, Op1DemandedElts, KnownUndef,
- KnownZero, DCI))
+ if (TLI.SimplifyDemandedVectorElts(Op1, Op1DemandedElts, DCI))
return SDValue(N, 0);
return SDValue();
@@ -17082,10 +17101,8 @@ static SDValue PerformVQMOVNCombine(SDNode *N,
APInt::getSplat(NumElts, IsTop ? APInt::getLowBitsSet(2, 1)
: APInt::getHighBitsSet(2, 1));
- APInt KnownUndef, KnownZero;
const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
- if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, KnownUndef,
- KnownZero, DCI))
+ if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, DCI))
return SDValue(N, 0);
return SDValue();
}
@@ -17390,7 +17407,7 @@ static SDValue PerformShiftCombine(SDNode *N,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!VT.isVector() || !TLI.isTypeLegal(VT))
return SDValue();
- if (ST->hasMVEIntegerOps() && VT == MVT::v2i64)
+ if (ST->hasMVEIntegerOps())
return SDValue();
int64_t Cnt;
@@ -17556,12 +17573,57 @@ static SDValue PerformFPExtendCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+// Lower smin(smax(x, C1), C2) to ssat or usat, if they have saturating
+// constant bounds.
+static SDValue PerformMinMaxToSatCombine(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ if ((Subtarget->isThumb() || !Subtarget->hasV6Ops()) &&
+ !Subtarget->isThumb2())
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+ SDValue Op0 = Op.getOperand(0);
+
+ if (VT != MVT::i32 ||
+ (Op0.getOpcode() != ISD::SMIN && Op0.getOpcode() != ISD::SMAX) ||
+ !isa<ConstantSDNode>(Op.getOperand(1)) ||
+ !isa<ConstantSDNode>(Op0.getOperand(1)))
+ return SDValue();
+
+ SDValue Min = Op;
+ SDValue Max = Op0;
+ SDValue Input = Op0.getOperand(0);
+ if (Min.getOpcode() == ISD::SMAX)
+ std::swap(Min, Max);
+
+ APInt MinC = Min.getConstantOperandAPInt(1);
+ APInt MaxC = Max.getConstantOperandAPInt(1);
+
+ if (Min.getOpcode() != ISD::SMIN || Max.getOpcode() != ISD::SMAX ||
+ !(MinC + 1).isPowerOf2())
+ return SDValue();
+
+ SDLoc DL(Op);
+ if (MinC == ~MaxC)
+ return DAG.getNode(ARMISD::SSAT, DL, VT, Input,
+ DAG.getConstant(MinC.countTrailingOnes(), DL, VT));
+ if (MaxC == 0)
+ return DAG.getNode(ARMISD::USAT, DL, VT, Input,
+ DAG.getConstant(MinC.countTrailingOnes(), DL, VT));
+
+ return SDValue();
+}
+
/// PerformMinMaxCombine - Target-specific DAG combining for creating truncating
/// saturates.
static SDValue PerformMinMaxCombine(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *ST) {
EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
+
+ if (VT == MVT::i32)
+ return PerformMinMaxToSatCombine(SDValue(N, 0), DAG, ST);
+
if (!ST->hasMVEIntegerOps())
return SDValue();
@@ -19354,8 +19416,8 @@ bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const {
// Return false to prevent folding
// (mul (add r, c0), c1) -> (add (mul r, c1), c0*c1) in DAGCombine,
// if the folding leads to worse code.
-bool ARMTargetLowering::isMulAddWithConstProfitable(
- const SDValue &AddNode, const SDValue &ConstNode) const {
+bool ARMTargetLowering::isMulAddWithConstProfitable(SDValue AddNode,
+ SDValue ConstNode) const {
// Let the DAGCombiner decide for vector types and large types.
const EVT VT = AddNode.getValueType();
if (VT.isVector() || VT.getScalarSizeInBits() > 32)
@@ -20537,38 +20599,6 @@ SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;
}
-void ARMTargetLowering::lowerABS(SDNode *N, SmallVectorImpl<SDValue> &Results,
- SelectionDAG &DAG) const {
- assert(N->getValueType(0) == MVT::i64 && "Unexpected type (!= i64) on ABS.");
- MVT HalfT = MVT::i32;
- SDLoc dl(N);
- SDValue Hi, Lo, Tmp;
-
- if (!isOperationLegalOrCustom(ISD::ADDCARRY, HalfT) ||
- !isOperationLegalOrCustom(ISD::UADDO, HalfT))
- return ;
-
- unsigned OpTypeBits = HalfT.getScalarSizeInBits();
- SDVTList VTList = DAG.getVTList(HalfT, MVT::i1);
-
- Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0),
- DAG.getConstant(0, dl, HalfT));
- Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0),
- DAG.getConstant(1, dl, HalfT));
-
- Tmp = DAG.getNode(ISD::SRA, dl, HalfT, Hi,
- DAG.getConstant(OpTypeBits - 1, dl,
- getShiftAmountTy(HalfT, DAG.getDataLayout())));
- Lo = DAG.getNode(ISD::UADDO, dl, VTList, Tmp, Lo);
- Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, Tmp, Hi,
- SDValue(Lo.getNode(), 1));
- Hi = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Hi);
- Lo = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Lo);
-
- Results.push_back(Lo);
- Results.push_back(Hi);
-}
-
bool
ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// The ARM target isn't yet aware of offsets.
@@ -20787,24 +20817,24 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::arm_ldaex:
case Intrinsic::arm_ldrex: {
auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
- PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
+ Type *ValTy = I.getParamElementType(0);
Info.opc = ISD::INTRINSIC_W_CHAIN;
- Info.memVT = MVT::getVT(PtrTy->getPointerElementType());
+ Info.memVT = MVT::getVT(ValTy);
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
- Info.align = DL.getABITypeAlign(PtrTy->getPointerElementType());
+ Info.align = DL.getABITypeAlign(ValTy);
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
return true;
}
case Intrinsic::arm_stlex:
case Intrinsic::arm_strex: {
auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
- PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
+ Type *ValTy = I.getParamElementType(1);
Info.opc = ISD::INTRINSIC_W_CHAIN;
- Info.memVT = MVT::getVT(PtrTy->getPointerElementType());
+ Info.memVT = MVT::getVT(ValTy);
Info.ptrVal = I.getArgOperand(1);
Info.offset = 0;
- Info.align = DL.getABITypeAlign(PtrTy->getPointerElementType());
+ Info.align = DL.getABITypeAlign(ValTy);
Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
return true;
}
@@ -20932,9 +20962,19 @@ Instruction *ARMTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
// are doomed anyway, so defer to the default libcall and blame the OS when
// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
// anything for those.
-bool ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
+TargetLoweringBase::AtomicExpansionKind
+ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
+ bool has64BitAtomicStore;
+ if (Subtarget->isMClass())
+ has64BitAtomicStore = false;
+ else if (Subtarget->isThumb())
+ has64BitAtomicStore = Subtarget->hasV7Ops();
+ else
+ has64BitAtomicStore = Subtarget->hasV6Ops();
+
unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
- return (Size == 64) && !Subtarget->isMClass();
+ return Size == 64 && has64BitAtomicStore ? AtomicExpansionKind::Expand
+ : AtomicExpansionKind::None;
}
// Loads and stores less than 64-bits are already atomic; ones above that
@@ -20946,9 +20986,17 @@ bool ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
// sections A8.8.72-74 LDRD)
TargetLowering::AtomicExpansionKind
ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
+ bool has64BitAtomicLoad;
+ if (Subtarget->isMClass())
+ has64BitAtomicLoad = false;
+ else if (Subtarget->isThumb())
+ has64BitAtomicLoad = Subtarget->hasV7Ops();
+ else
+ has64BitAtomicLoad = Subtarget->hasV6Ops();
+
unsigned Size = LI->getType()->getPrimitiveSizeInBits();
- return ((Size == 64) && !Subtarget->isMClass()) ? AtomicExpansionKind::LLOnly
- : AtomicExpansionKind::None;
+ return (Size == 64 && has64BitAtomicLoad) ? AtomicExpansionKind::LLOnly
+ : AtomicExpansionKind::None;
}
// For the real atomic operations, we have ldrex/strex up to 32 bits,
@@ -20958,19 +21006,25 @@ ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
if (AI->isFloatingPointOperation())
return AtomicExpansionKind::CmpXChg;
- // At -O0, fast-regalloc cannot cope with the live vregs necessary to
- // implement atomicrmw without spilling. If the target address is also on the
- // stack and close enough to the spill slot, this can lead to a situation
- // where the monitor always gets cleared and the atomic operation can never
- // succeed. So at -O0 lower this operation to a CAS loop.
- if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
- return AtomicExpansionKind::CmpXChg;
-
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
- bool hasAtomicRMW = !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps();
- return (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW)
- ? AtomicExpansionKind::LLSC
- : AtomicExpansionKind::None;
+ bool hasAtomicRMW;
+ if (Subtarget->isMClass())
+ hasAtomicRMW = Subtarget->hasV8MBaselineOps();
+ else if (Subtarget->isThumb())
+ hasAtomicRMW = Subtarget->hasV7Ops();
+ else
+ hasAtomicRMW = Subtarget->hasV6Ops();
+ if (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) {
+ // At -O0, fast-regalloc cannot cope with the live vregs necessary to
+ // implement atomicrmw without spilling. If the target address is also on
+ // the stack and close enough to the spill slot, this can lead to a
+ // situation where the monitor always gets cleared and the atomic operation
+ // can never succeed. So at -O0 lower this operation to a CAS loop.
+ if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
+ return AtomicExpansionKind::CmpXChg;
+ return AtomicExpansionKind::LLSC;
+ }
+ return AtomicExpansionKind::None;
}
// Similar to shouldExpandAtomicRMWInIR, ldrex/strex can be used up to 32
@@ -20983,8 +21037,13 @@ ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
// situation where the monitor always gets cleared and the atomic operation
// can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
unsigned Size = AI->getOperand(1)->getType()->getPrimitiveSizeInBits();
- bool HasAtomicCmpXchg =
- !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps();
+ bool HasAtomicCmpXchg;
+ if (Subtarget->isMClass())
+ HasAtomicCmpXchg = Subtarget->hasV8MBaselineOps();
+ else if (Subtarget->isThumb())
+ HasAtomicCmpXchg = Subtarget->hasV7Ops();
+ else
+ HasAtomicCmpXchg = Subtarget->hasV6Ops();
if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg &&
Size <= (Subtarget->isMClass() ? 32U : 64U))
return AtomicExpansionKind::LLSC;
@@ -21099,8 +21158,11 @@ Value *ARMTargetLowering::emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy,
Type *Tys[] = { Addr->getType() };
Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
Function *Ldrex = Intrinsic::getDeclaration(M, Int, Tys);
+ CallInst *CI = Builder.CreateCall(Ldrex, Addr);
- return Builder.CreateTruncOrBitCast(Builder.CreateCall(Ldrex, Addr), ValueTy);
+ CI->addParamAttr(
+ 0, Attribute::get(M->getContext(), Attribute::ElementType, ValueTy));
+ return Builder.CreateTruncOrBitCast(CI, ValueTy);
}
void ARMTargetLowering::emitAtomicCmpXchgNoStoreLLBalance(
@@ -21138,10 +21200,13 @@ Value *ARMTargetLowering::emitStoreConditional(IRBuilderBase &Builder,
Type *Tys[] = { Addr->getType() };
Function *Strex = Intrinsic::getDeclaration(M, Int, Tys);
- return Builder.CreateCall(
+ CallInst *CI = Builder.CreateCall(
Strex, {Builder.CreateZExtOrBitCast(
Val, Strex->getFunctionType()->getParamType(0)),
Addr});
+ CI->addParamAttr(1, Attribute::get(M->getContext(), Attribute::ElementType,
+ Val->getType()));
+ return CI;
}
@@ -21273,7 +21338,7 @@ bool ARMTargetLowering::lowerInterleavedLoad(
SmallVector<Value *, 2> Ops;
Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));
- Ops.push_back(Builder.getInt32(LI->getAlignment()));
+ Ops.push_back(Builder.getInt32(LI->getAlign().value()));
return Builder.CreateCall(VldnFunc, Ops, "vldN");
} else {
@@ -21443,7 +21508,7 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
SmallVector<Value *, 6> Ops;
Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));
append_range(Ops, Shuffles);
- Ops.push_back(Builder.getInt32(SI->getAlignment()));
+ Ops.push_back(Builder.getInt32(SI->getAlign().value()));
Builder.CreateCall(VstNFunc, Ops);
} else {
assert((Factor == 2 || Factor == 4) &&
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 1c5f8389f57c..10f60ab93ae3 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -581,7 +581,7 @@ class VectorType;
getRegClassFor(MVT VT, bool isDivergent = false) const override;
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
- unsigned &PrefAlign) const override;
+ Align &PrefAlign) const override;
/// createFastISel - This method returns a target specific FastISel object,
/// or null if the target does not support "fast" ISel.
@@ -665,7 +665,8 @@ class VectorType;
bool shouldInsertFencesForAtomic(const Instruction *I) const override;
TargetLoweringBase::AtomicExpansionKind
shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
- bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
+ TargetLoweringBase::AtomicExpansionKind
+ shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
TargetLoweringBase::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
TargetLoweringBase::AtomicExpansionKind
@@ -713,8 +714,8 @@ class VectorType;
Align Alignment,
const DataLayout &DL) const;
- bool isMulAddWithConstProfitable(const SDValue &AddNode,
- const SDValue &ConstNode) const override;
+ bool isMulAddWithConstProfitable(SDValue AddNode,
+ SDValue ConstNode) const override;
bool alignLoopsWithOptSize() const override;
@@ -845,8 +846,7 @@ class VectorType;
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFSETCC(SDValue Op, SelectionDAG &DAG) const;
- void lowerABS(SDNode *N, SmallVectorImpl<SDValue> &Results,
- SelectionDAG &DAG) const;
+ SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
void LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/ARM/ARMInstrFormats.td b/llvm/lib/Target/ARM/ARMInstrFormats.td
index ff5afd787c82..c9a2d21bec53 100644
--- a/llvm/lib/Target/ARM/ARMInstrFormats.td
+++ b/llvm/lib/Target/ARM/ARMInstrFormats.td
@@ -1589,9 +1589,9 @@ class VFPXI<dag oops, dag iops, AddrMode am, int sz,
}
class VFPAI<dag oops, dag iops, Format f, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
+ string opc, string asm, string cstr, list<dag> pattern>
: VFPI<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin,
- opc, asm, "", pattern> {
+ opc, asm, cstr, pattern> {
let PostEncoderMethod = "VFPThumb2PostEncoder";
}
@@ -1751,8 +1751,8 @@ class AXSI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
// Double precision, unary
class ADuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
- string asm, list<dag> pattern>
- : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, pattern> {
+ string asm, string cstr, list<dag> pattern>
+ : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, cstr, pattern> {
// Instruction operands.
bits<5> Dd;
bits<5> Dm;
@@ -1804,7 +1804,7 @@ class ADuInp<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
class ADbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
dag iops, InstrItinClass itin, string opc, string asm,
list<dag> pattern>
- : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> {
+ : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, "", pattern> {
// Instruction operands.
bits<5> Dd;
bits<5> Dn;
@@ -1862,8 +1862,8 @@ class ADbInp<bits<5> opcod1, bits<2> opcod2, bit opcod3, dag oops, dag iops,
// Single precision, unary, predicated
class ASuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
- string asm, list<dag> pattern>
- : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, pattern> {
+ string asm, string cstr, list<dag> pattern>
+ : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, cstr, pattern> {
// Instruction operands.
bits<5> Sd;
bits<5> Sm;
@@ -1916,14 +1916,14 @@ class ASuIn<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
string asm, list<dag> pattern>
: ASuI<opcod1, opcod2, opcod3, opcod4, opcod5, oops, iops, itin, opc, asm,
- pattern> {
+ "", pattern> {
list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP];
}
// Single precision, binary
class ASbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops,
InstrItinClass itin, string opc, string asm, list<dag> pattern>
- : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> {
+ : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, "", pattern> {
// Instruction operands.
bits<5> Sd;
bits<5> Sn;
@@ -2000,7 +2000,7 @@ class ASbIn<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
class AHuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
string asm, list<dag> pattern>
- : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, pattern> {
+ : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, "", pattern> {
list<Predicate> Predicates = [HasFullFP16];
// Instruction operands.
@@ -2056,7 +2056,7 @@ class AHuInp<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
// Half precision, binary
class AHbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops,
InstrItinClass itin, string opc, string asm, list<dag> pattern>
- : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> {
+ : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, "", pattern> {
list<Predicate> Predicates = [HasFullFP16];
// Instruction operands.
@@ -2116,7 +2116,7 @@ class AHbInp<bits<5> opcod1, bits<2> opcod2, bit opcod3, dag oops, dag iops,
class AVConv1I<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4,
dag oops, dag iops, InstrItinClass itin, string opc, string asm,
list<dag> pattern>
- : VFPAI<oops, iops, VFPConv1Frm, itin, opc, asm, pattern> {
+ : VFPAI<oops, iops, VFPConv1Frm, itin, opc, asm, "", pattern> {
let Inst{27-23} = opcod1;
let Inst{21-20} = opcod2;
let Inst{19-16} = opcod3;
@@ -2149,7 +2149,7 @@ class AVConv1In<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4,
class AVConvXI<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, Format f,
InstrItinClass itin,
string opc, string asm, list<dag> pattern>
- : VFPAI<oops, iops, f, itin, opc, asm, pattern> {
+ : VFPAI<oops, iops, f, itin, opc, asm, "", pattern> {
let Inst{27-20} = opcod1;
let Inst{11-8} = opcod2;
let Inst{4} = 1;
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index 32a3911d3369..88bb74d1fc54 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -5129,6 +5129,7 @@ let hasNoSchedulingInfo = 1 in
def TSB : AInoP<(outs), (ins tsb_opt:$opt), MiscFrm, NoItinerary,
"tsb", "\t$opt", []>, Requires<[IsARM, HasV8_4a]> {
let Inst{31-0} = 0xe320f012;
+ let DecoderMethod = "DecodeTSBInstruction";
}
}
@@ -6387,7 +6388,7 @@ def : ARMInstAlias<"neg${s}${p} $Rd, $Rm",
(RSBri GPR:$Rd, GPR:$Rm, 0, pred:$p, cc_out:$s)>;
// Pre-v6, 'mov r0, r0' was used as a NOP encoding.
-def : InstAlias<"nop${p}", (MOVr R0, R0, pred:$p, zero_reg)>,
+def : InstAlias<"nop${p}", (MOVr R0, R0, pred:$p, zero_reg), 0>,
Requires<[IsARM, NoV6]>;
// MUL/UMLAL/SMLAL/UMULL/SMULL are available on all arches, but
@@ -6415,8 +6416,7 @@ def : InstAlias<"umull${s}${p} $RdLo, $RdHi, $Rn, $Rm",
// 'it' blocks in ARM mode just validate the predicates. The IT itself
// is discarded.
-def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>,
- ComplexDeprecationPredicate<"IT">;
+def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>;
let mayLoad = 1, mayStore =1, hasSideEffects = 1, hasNoSchedulingInfo = 1 in
def SPACE : PseudoInst<(outs GPR:$Rd), (ins i32imm:$size, GPR:$Rn),
@@ -6476,3 +6476,24 @@ def CompilerBarrier : PseudoInst<(outs), (ins i32imm:$ordering), NoItinerary,
let AsmString = "@ COMPILER BARRIER";
let hasNoSchedulingInfo = 1;
}
+
+//===----------------------------------------------------------------------===//
+// Instructions used for emitting unwind opcodes on Windows.
+//===----------------------------------------------------------------------===//
+let isPseudo = 1 in {
+ def SEH_StackAlloc : PseudoInst<(outs), (ins i32imm:$size, i32imm:$wide), NoItinerary, []>, Sched<[]>;
+ def SEH_SaveRegs : PseudoInst<(outs), (ins i32imm:$mask, i32imm:$wide), NoItinerary, []>, Sched<[]>;
+ let isTerminator = 1 in
+ def SEH_SaveRegs_Ret : PseudoInst<(outs), (ins i32imm:$mask, i32imm:$wide), NoItinerary, []>, Sched<[]>;
+ def SEH_SaveSP : PseudoInst<(outs), (ins i32imm:$reg), NoItinerary, []>, Sched<[]>;
+ def SEH_SaveFRegs : PseudoInst<(outs), (ins i32imm:$first, i32imm:$last), NoItinerary, []>, Sched<[]>;
+ let isTerminator = 1 in
+ def SEH_SaveLR : PseudoInst<(outs), (ins i32imm:$offst), NoItinerary, []>, Sched<[]>;
+ def SEH_Nop : PseudoInst<(outs), (ins i32imm:$wide), NoItinerary, []>, Sched<[]>;
+ let isTerminator = 1 in
+ def SEH_Nop_Ret : PseudoInst<(outs), (ins i32imm:$wide), NoItinerary, []>, Sched<[]>;
+ def SEH_PrologEnd : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>;
+ def SEH_EpilogStart : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>;
+ let isTerminator = 1 in
+ def SEH_EpilogEnd : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>;
+}
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index 1ae0354ffc37..15c33014e988 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -2192,36 +2192,29 @@ def subnsw : PatFrag<(ops node:$lhs, node:$rhs),
return N->getFlags().hasNoSignedWrap();
}]>;
-multiclass MVE_VRHADD_m<MVEVectorVTInfo VTI,
- SDNode unpred_op, Intrinsic pred_int> {
+multiclass MVE_VRHADD_m<MVEVectorVTInfo VTI, SDNode Op,
+ SDNode unpred_op, Intrinsic PredInt> {
def "" : MVE_VRHADD_Base<VTI.Suffix, VTI.Unsigned, VTI.Size>;
defvar Inst = !cast<Instruction>(NAME);
+ defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? (i32 VTI.Unsigned)), !cast<Instruction>(NAME)>;
let Predicates = [HasMVEInt] in {
- // Unpredicated rounding add-with-divide-by-two
+ // Unpredicated rounding add-with-divide-by-two intrinsic
def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
(i32 VTI.Unsigned))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
-
- // Predicated add-with-divide-by-two
- def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- (i32 VTI.Unsigned), (VTI.Pred VCCR:$mask),
- (VTI.Vec MQPR:$inactive))),
- (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg,
- (VTI.Vec MQPR:$inactive)))>;
}
}
-multiclass MVE_VRHADD<MVEVectorVTInfo VTI>
- : MVE_VRHADD_m<VTI, int_arm_mve_vrhadd, int_arm_mve_rhadd_predicated>;
+multiclass MVE_VRHADD<MVEVectorVTInfo VTI, SDNode rhadd>
+ : MVE_VRHADD_m<VTI, rhadd, int_arm_mve_vrhadd, int_arm_mve_rhadd_predicated>;
-defm MVE_VRHADDs8 : MVE_VRHADD<MVE_v16s8>;
-defm MVE_VRHADDs16 : MVE_VRHADD<MVE_v8s16>;
-defm MVE_VRHADDs32 : MVE_VRHADD<MVE_v4s32>;
-defm MVE_VRHADDu8 : MVE_VRHADD<MVE_v16u8>;
-defm MVE_VRHADDu16 : MVE_VRHADD<MVE_v8u16>;
-defm MVE_VRHADDu32 : MVE_VRHADD<MVE_v4u32>;
+defm MVE_VRHADDs8 : MVE_VRHADD<MVE_v16s8, avgceils>;
+defm MVE_VRHADDs16 : MVE_VRHADD<MVE_v8s16, avgceils>;
+defm MVE_VRHADDs32 : MVE_VRHADD<MVE_v4s32, avgceils>;
+defm MVE_VRHADDu8 : MVE_VRHADD<MVE_v16u8, avgceilu>;
+defm MVE_VRHADDu16 : MVE_VRHADD<MVE_v8u16, avgceilu>;
+defm MVE_VRHADDu32 : MVE_VRHADD<MVE_v4u32, avgceilu>;
// Rounding Halving Add perform the arithemtic operation with an extra bit of
// precision, before performing the shift, to void clipping errors. We're not
@@ -2303,11 +2296,12 @@ class MVE_VHSUB_<string suffix, bit U, bits<2> size,
list<dag> pattern=[]>
: MVE_VHADDSUB<"vhsub", suffix, U, 0b1, size, pattern>;
-multiclass MVE_VHADD_m<MVEVectorVTInfo VTI,
- SDNode unpred_op, Intrinsic pred_int, PatFrag add_op,
+multiclass MVE_VHADD_m<MVEVectorVTInfo VTI, SDNode Op,
+ SDNode unpred_op, Intrinsic PredInt, PatFrag add_op,
SDNode shift_op> {
def "" : MVE_VHADD_<VTI.Suffix, VTI.Unsigned, VTI.Size>;
defvar Inst = !cast<Instruction>(NAME);
+ defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? (i32 VTI.Unsigned)), !cast<Instruction>(NAME)>;
let Predicates = [HasMVEInt] in {
// Unpredicated add-and-divide-by-two
@@ -2316,30 +2310,23 @@ multiclass MVE_VHADD_m<MVEVectorVTInfo VTI,
def : Pat<(VTI.Vec (shift_op (add_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)), (i32 1))),
(Inst MQPR:$Qm, MQPR:$Qn)>;
-
- // Predicated add-and-divide-by-two
- def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), (i32 VTI.Unsigned),
- (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
- (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg,
- (VTI.Vec MQPR:$inactive)))>;
}
}
-multiclass MVE_VHADD<MVEVectorVTInfo VTI, PatFrag add_op, SDNode shift_op>
- : MVE_VHADD_m<VTI, int_arm_mve_vhadd, int_arm_mve_hadd_predicated, add_op,
+multiclass MVE_VHADD<MVEVectorVTInfo VTI, SDNode Op, PatFrag add_op, SDNode shift_op>
+ : MVE_VHADD_m<VTI, Op, int_arm_mve_vhadd, int_arm_mve_hadd_predicated, add_op,
shift_op>;
// Halving add/sub perform the arithemtic operation with an extra bit of
// precision, before performing the shift, to void clipping errors. We're not
// modelling that here with these patterns, but we're using no wrap forms of
// add/sub to ensure that the extra bit of information is not needed.
-defm MVE_VHADDs8 : MVE_VHADD<MVE_v16s8, addnsw, ARMvshrsImm>;
-defm MVE_VHADDs16 : MVE_VHADD<MVE_v8s16, addnsw, ARMvshrsImm>;
-defm MVE_VHADDs32 : MVE_VHADD<MVE_v4s32, addnsw, ARMvshrsImm>;
-defm MVE_VHADDu8 : MVE_VHADD<MVE_v16u8, addnuw, ARMvshruImm>;
-defm MVE_VHADDu16 : MVE_VHADD<MVE_v8u16, addnuw, ARMvshruImm>;
-defm MVE_VHADDu32 : MVE_VHADD<MVE_v4u32, addnuw, ARMvshruImm>;
+defm MVE_VHADDs8 : MVE_VHADD<MVE_v16s8, avgfloors, addnsw, ARMvshrsImm>;
+defm MVE_VHADDs16 : MVE_VHADD<MVE_v8s16, avgfloors, addnsw, ARMvshrsImm>;
+defm MVE_VHADDs32 : MVE_VHADD<MVE_v4s32, avgfloors, addnsw, ARMvshrsImm>;
+defm MVE_VHADDu8 : MVE_VHADD<MVE_v16u8, avgflooru, addnuw, ARMvshruImm>;
+defm MVE_VHADDu16 : MVE_VHADD<MVE_v8u16, avgflooru, addnuw, ARMvshruImm>;
+defm MVE_VHADDu32 : MVE_VHADD<MVE_v4u32, avgflooru, addnuw, ARMvshruImm>;
multiclass MVE_VHSUB_m<MVEVectorVTInfo VTI,
SDNode unpred_op, Intrinsic pred_int, PatFrag sub_op,
@@ -5372,10 +5359,10 @@ class MVE_VxADDSUB_qr<string iname, string suffix,
let validForTailPredication = 1;
}
-multiclass MVE_VHADDSUB_qr_m<string iname, MVEVectorVTInfo VTI, bit subtract,
- Intrinsic unpred_int, Intrinsic pred_int, PatFrag add_op,
- SDNode shift_op> {
+multiclass MVE_VHADDSUB_qr_m<string iname, MVEVectorVTInfo VTI, bit subtract, SDNode Op,
+ Intrinsic unpred_int, Intrinsic pred_int, PatFrag add_op, PatFrag shift_op> {
def "" : MVE_VxADDSUB_qr<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, subtract, VTI.Size>;
+ defm : MVE_TwoOpPatternDup<VTI, Op, pred_int, (? (i32 VTI.Unsigned)), !cast<Instruction>(NAME)>;
defm : MVE_vec_scalar_int_pat_m<!cast<Instruction>(NAME),
VTI, unpred_int, pred_int, 1, 1>;
defvar Inst = !cast<Instruction>(NAME);
@@ -5386,20 +5373,20 @@ multiclass MVE_VHADDSUB_qr_m<string iname, MVEVectorVTInfo VTI, bit subtract,
}
}
-multiclass MVE_VHADD_qr_m<MVEVectorVTInfo VTI, PatFrag add_op, SDNode shift_op> :
- MVE_VHADDSUB_qr_m<"vhadd", VTI, 0b0, int_arm_mve_vhadd, int_arm_mve_hadd_predicated,
- add_op, shift_op>;
+multiclass MVE_VHADD_qr_m<MVEVectorVTInfo VTI, PatFrag add_op, SDNode shift_op, SDNode Op> :
+ MVE_VHADDSUB_qr_m<"vhadd", VTI, 0b0, Op, int_arm_mve_vhadd,
+ int_arm_mve_hadd_predicated, add_op, shift_op>;
multiclass MVE_VHSUB_qr_m<MVEVectorVTInfo VTI, PatFrag add_op, SDNode shift_op> :
- MVE_VHADDSUB_qr_m<"vhsub", VTI, 0b1, int_arm_mve_vhsub, int_arm_mve_hsub_predicated,
- add_op, shift_op>;
-
-defm MVE_VHADD_qr_s8 : MVE_VHADD_qr_m<MVE_v16s8, addnsw, ARMvshrsImm>;
-defm MVE_VHADD_qr_s16 : MVE_VHADD_qr_m<MVE_v8s16, addnsw, ARMvshrsImm>;
-defm MVE_VHADD_qr_s32 : MVE_VHADD_qr_m<MVE_v4s32, addnsw, ARMvshrsImm>;
-defm MVE_VHADD_qr_u8 : MVE_VHADD_qr_m<MVE_v16u8, addnuw, ARMvshruImm>;
-defm MVE_VHADD_qr_u16 : MVE_VHADD_qr_m<MVE_v8u16, addnuw, ARMvshruImm>;
-defm MVE_VHADD_qr_u32 : MVE_VHADD_qr_m<MVE_v4u32, addnuw, ARMvshruImm>;
+ MVE_VHADDSUB_qr_m<"vhsub", VTI, 0b1, null_frag, int_arm_mve_vhsub,
+ int_arm_mve_hsub_predicated, add_op, shift_op>;
+
+defm MVE_VHADD_qr_s8 : MVE_VHADD_qr_m<MVE_v16s8, addnsw, ARMvshrsImm, avgfloors>;
+defm MVE_VHADD_qr_s16 : MVE_VHADD_qr_m<MVE_v8s16, addnsw, ARMvshrsImm, avgfloors>;
+defm MVE_VHADD_qr_s32 : MVE_VHADD_qr_m<MVE_v4s32, addnsw, ARMvshrsImm, avgfloors>;
+defm MVE_VHADD_qr_u8 : MVE_VHADD_qr_m<MVE_v16u8, addnuw, ARMvshruImm, avgflooru>;
+defm MVE_VHADD_qr_u16 : MVE_VHADD_qr_m<MVE_v8u16, addnuw, ARMvshruImm, avgflooru>;
+defm MVE_VHADD_qr_u32 : MVE_VHADD_qr_m<MVE_v4u32, addnuw, ARMvshruImm, avgflooru>;
defm MVE_VHSUB_qr_s8 : MVE_VHSUB_qr_m<MVE_v16s8, subnsw, ARMvshrsImm>;
defm MVE_VHSUB_qr_s16 : MVE_VHSUB_qr_m<MVE_v8s16, subnsw, ARMvshrsImm>;
diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td
index 357aa6d062e9..cdad8e106de6 100644
--- a/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -6946,6 +6946,9 @@ def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0,
v4f32, v4i16, int_arm_neon_vcvthf2fp>,
Requires<[HasNEON, HasFP16]>;
+def : Pat<(v4f16 (fpround (v4f32 QPR:$src))), (VCVTf2h QPR:$src)>;
+def : Pat<(v4f32 (fpextend (v4f16 DPR:$src))), (VCVTh2f DPR:$src)>;
+
// Vector Reverse.
// VREV64 : Vector Reverse elements within 64-bit doublewords
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td
index f80b9a5053f7..20d8a45aaf49 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -3561,6 +3561,7 @@ let hasNoSchedulingInfo = 1 in
def t2TSB : T2I<(outs), (ins tsb_opt:$opt), NoItinerary,
"tsb", "\t$opt", []>, Requires<[IsThumb, HasV8_4a]> {
let Inst{31-0} = 0xf3af8012;
+ let DecoderMethod = "DecodeTSBInstruction";
}
}
@@ -3950,6 +3951,7 @@ def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br,
// Tail calls. The MachO version of thumb tail calls uses a t2 branch, so
// it goes here.
+// Windows SEH unwinding also needs a strict t2 branch for tail calls.
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
// IOS version.
let Uses = [SP] in
@@ -3957,15 +3959,14 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
(ins thumb_br_target:$dst, pred:$p),
4, IIC_Br, [],
(t2B thumb_br_target:$dst, pred:$p)>,
- Requires<[IsThumb2, IsMachO]>, Sched<[WriteBr]>;
+ Requires<[IsThumb2]>, Sched<[WriteBr]>;
}
// IT block
let Defs = [ITSTATE] in
def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
AddrModeNone, 2, IIC_iALUx,
- "it$mask\t$cc", "", []>,
- ComplexDeprecationPredicate<"IT"> {
+ "it$mask\t$cc", "", []> {
// 16-bit instruction.
let Inst{31-16} = 0x0000;
let Inst{15-8} = 0b10111111;
diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td
index dc5f1b92a6c2..b233555d5225 100644
--- a/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -584,12 +584,12 @@ def : Pat<(fmul (fneg SPR:$a), SPR:$b),
let Defs = [FPSCR_NZCV] in {
def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0,
(outs), (ins DPR:$Dd, DPR:$Dm),
- IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm",
+ IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm", "",
[(arm_cmpfpe DPR:$Dd, (f64 DPR:$Dm))]>;
def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0,
(outs), (ins SPR:$Sd, SPR:$Sm),
- IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm",
+ IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm", "",
[(arm_cmpfpe SPR:$Sd, SPR:$Sm)]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
@@ -603,12 +603,12 @@ def VCMPEH : AHuI<0b11101, 0b11, 0b0100, 0b11, 0,
def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0,
(outs), (ins DPR:$Dd, DPR:$Dm),
- IIC_fpCMP64, "vcmp", ".f64\t$Dd, $Dm",
+ IIC_fpCMP64, "vcmp", ".f64\t$Dd, $Dm", "",
[(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm))]>;
def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0,
(outs), (ins SPR:$Sd, SPR:$Sm),
- IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm",
+ IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm", "",
[(arm_cmpfp SPR:$Sd, SPR:$Sm)]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
@@ -627,7 +627,7 @@ def VCMPH : AHuI<0b11101, 0b11, 0b0100, 0b01, 0,
def VABSD : ADuI<0b11101, 0b11, 0b0000, 0b11, 0,
(outs DPR:$Dd), (ins DPR:$Dm),
- IIC_fpUNA64, "vabs", ".f64\t$Dd, $Dm",
+ IIC_fpUNA64, "vabs", ".f64\t$Dd, $Dm", "",
[(set DPR:$Dd, (fabs (f64 DPR:$Dm)))]>;
def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0,
@@ -647,7 +647,7 @@ def VABSH : AHuI<0b11101, 0b11, 0b0000, 0b11, 0,
let Defs = [FPSCR_NZCV] in {
def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
(outs), (ins DPR:$Dd),
- IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0",
+ IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0", "",
[(arm_cmpfpe0 (f64 DPR:$Dd))]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
@@ -655,7 +655,7 @@ def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0,
(outs), (ins SPR:$Sd),
- IIC_fpCMP32, "vcmpe", ".f32\t$Sd, #0",
+ IIC_fpCMP32, "vcmpe", ".f32\t$Sd, #0", "",
[(arm_cmpfpe0 SPR:$Sd)]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
@@ -675,7 +675,7 @@ def VCMPEZH : AHuI<0b11101, 0b11, 0b0101, 0b11, 0,
def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0,
(outs), (ins DPR:$Dd),
- IIC_fpCMP64, "vcmp", ".f64\t$Dd, #0",
+ IIC_fpCMP64, "vcmp", ".f64\t$Dd, #0", "",
[(arm_cmpfp0 (f64 DPR:$Dd))]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
@@ -683,7 +683,7 @@ def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0,
def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0,
(outs), (ins SPR:$Sd),
- IIC_fpCMP32, "vcmp", ".f32\t$Sd, #0",
+ IIC_fpCMP32, "vcmp", ".f32\t$Sd, #0", "",
[(arm_cmpfp0 SPR:$Sd)]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
@@ -704,7 +704,7 @@ def VCMPZH : AHuI<0b11101, 0b11, 0b0101, 0b01, 0,
def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
(outs DPR:$Dd), (ins SPR:$Sm),
- IIC_fpCVTDS, "vcvt", ".f64.f32\t$Dd, $Sm",
+ IIC_fpCVTDS, "vcvt", ".f64.f32\t$Dd, $Sm", "",
[(set DPR:$Dd, (fpextend SPR:$Sm))]>,
Sched<[WriteFPCVT]> {
// Instruction operands.
@@ -723,7 +723,7 @@ def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
// Special case encoding: bits 11-8 is 0b1011.
def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
- IIC_fpCVTSD, "vcvt", ".f32.f64\t$Sd, $Dm",
+ IIC_fpCVTSD, "vcvt", ".f32.f64\t$Sd, $Dm", "",
[(set SPR:$Sd, (fpround DPR:$Dm))]>,
Sched<[WriteFPCVT]> {
// Instruction operands.
@@ -749,7 +749,7 @@ def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
// Between half, single and double-precision.
let hasSideEffects = 0 in
def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
- /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm",
+ /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm", "",
[/* Intentionally left blank, see patterns below */]>,
Requires<[HasFP16]>,
Sched<[WriteFPCVT]>;
@@ -760,26 +760,30 @@ def : FP16Pat<(f16_to_fp GPR:$a),
(VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
let hasSideEffects = 0 in
-def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
- /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm",
+def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sda, SPR:$Sm),
+ /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm", "$Sd = $Sda",
[/* Intentionally left blank, see patterns below */]>,
Requires<[HasFP16]>,
Sched<[WriteFPCVT]>;
def : FP16Pat<(f16 (fpround SPR:$Sm)),
- (COPY_TO_REGCLASS (VCVTBSH SPR:$Sm), HPR)>;
+ (COPY_TO_REGCLASS (VCVTBSH (IMPLICIT_DEF), SPR:$Sm), HPR)>;
def : FP16Pat<(fp_to_f16 SPR:$a),
- (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
+ (i32 (COPY_TO_REGCLASS (VCVTBSH (IMPLICIT_DEF), SPR:$a), GPR))>;
def : FP16Pat<(insertelt (v8f16 MQPR:$src1), (f16 (fpround (f32 SPR:$src2))), imm_even:$lane),
- (v8f16 (INSERT_SUBREG (v8f16 MQPR:$src1), (VCVTBSH SPR:$src2),
+ (v8f16 (INSERT_SUBREG (v8f16 MQPR:$src1),
+ (VCVTBSH (EXTRACT_SUBREG (v8f16 MQPR:$src1), (SSubReg_f16_reg imm:$lane)),
+ SPR:$src2),
(SSubReg_f16_reg imm:$lane)))>;
def : FP16Pat<(insertelt (v4f16 DPR:$src1), (f16 (fpround (f32 SPR:$src2))), imm_even:$lane),
- (v4f16 (INSERT_SUBREG (v4f16 DPR:$src1), (VCVTBSH SPR:$src2),
+ (v4f16 (INSERT_SUBREG (v4f16 DPR:$src1),
+ (VCVTBSH (EXTRACT_SUBREG (v4f16 DPR:$src1), (SSubReg_f16_reg imm:$lane)),
+ SPR:$src2),
(SSubReg_f16_reg imm:$lane)))>;
let hasSideEffects = 0 in
def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
- /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm",
+ /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm", "",
[/* Intentionally left blank, see patterns below */]>,
Requires<[HasFP16]>,
Sched<[WriteFPCVT]>;
@@ -792,22 +796,26 @@ def : FP16Pat<(f32 (fpextend (extractelt (v4f16 DPR:$src), imm_odd:$lane))),
(SSubReg_f16_reg imm_odd:$lane)))>;
let hasSideEffects = 0 in
-def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
- /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm",
+def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sda, SPR:$Sm),
+ /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm", "$Sd = $Sda",
[/* Intentionally left blank, see patterns below */]>,
Requires<[HasFP16]>,
Sched<[WriteFPCVT]>;
def : FP16Pat<(insertelt (v8f16 MQPR:$src1), (f16 (fpround (f32 SPR:$src2))), imm_odd:$lane),
- (v8f16 (INSERT_SUBREG (v8f16 MQPR:$src1), (VCVTTSH SPR:$src2),
+ (v8f16 (INSERT_SUBREG (v8f16 MQPR:$src1),
+ (VCVTTSH (EXTRACT_SUBREG (v8f16 MQPR:$src1), (SSubReg_f16_reg imm:$lane)),
+ SPR:$src2),
(SSubReg_f16_reg imm:$lane)))>;
def : FP16Pat<(insertelt (v4f16 DPR:$src1), (f16 (fpround (f32 SPR:$src2))), imm_odd:$lane),
- (v4f16 (INSERT_SUBREG (v4f16 DPR:$src1), (VCVTTSH SPR:$src2),
+ (v4f16 (INSERT_SUBREG (v4f16 DPR:$src1),
+ (VCVTTSH (EXTRACT_SUBREG (v4f16 DPR:$src1), (SSubReg_f16_reg imm:$lane)),
+ SPR:$src2),
(SSubReg_f16_reg imm:$lane)))>;
def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0,
(outs DPR:$Dd), (ins SPR:$Sm),
- NoItinerary, "vcvtb", ".f64.f16\t$Dd, $Sm",
+ NoItinerary, "vcvtb", ".f64.f16\t$Dd, $Sm", "",
[/* Intentionally left blank, see patterns below */]>,
Requires<[HasFPARMv8, HasDPVFP]>,
Sched<[WriteFPCVT]> {
@@ -829,8 +837,8 @@ def : FP16Pat<(f64 (f16_to_fp GPR:$a)),
Requires<[HasFPARMv8, HasDPVFP]>;
def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0,
- (outs SPR:$Sd), (ins DPR:$Dm),
- NoItinerary, "vcvtb", ".f16.f64\t$Sd, $Dm",
+ (outs SPR:$Sd), (ins SPR:$Sda, DPR:$Dm),
+ NoItinerary, "vcvtb", ".f16.f64\t$Sd, $Dm", "$Sd = $Sda",
[/* Intentionally left blank, see patterns below */]>,
Requires<[HasFPARMv8, HasDPVFP]> {
// Instruction operands.
@@ -847,15 +855,15 @@ def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0,
}
def : FullFP16Pat<(f16 (fpround DPR:$Dm)),
- (COPY_TO_REGCLASS (VCVTBDH DPR:$Dm), HPR)>,
+ (COPY_TO_REGCLASS (VCVTBDH (IMPLICIT_DEF), DPR:$Dm), HPR)>,
Requires<[HasFPARMv8, HasDPVFP]>;
def : FP16Pat<(fp_to_f16 (f64 DPR:$a)),
- (i32 (COPY_TO_REGCLASS (VCVTBDH DPR:$a), GPR))>,
+ (i32 (COPY_TO_REGCLASS (VCVTBDH (IMPLICIT_DEF), DPR:$a), GPR))>,
Requires<[HasFPARMv8, HasDPVFP]>;
def VCVTTHD : ADuI<0b11101, 0b11, 0b0010, 0b11, 0,
(outs DPR:$Dd), (ins SPR:$Sm),
- NoItinerary, "vcvtt", ".f64.f16\t$Dd, $Sm",
+ NoItinerary, "vcvtt", ".f64.f16\t$Dd, $Sm", "",
[]>, Requires<[HasFPARMv8, HasDPVFP]> {
// Instruction operands.
bits<5> Sm;
@@ -868,8 +876,8 @@ def VCVTTHD : ADuI<0b11101, 0b11, 0b0010, 0b11, 0,
}
def VCVTTDH : ADuI<0b11101, 0b11, 0b0011, 0b11, 0,
- (outs SPR:$Sd), (ins DPR:$Dm),
- NoItinerary, "vcvtt", ".f16.f64\t$Sd, $Dm",
+ (outs SPR:$Sd), (ins SPR:$Sda, DPR:$Dm),
+ NoItinerary, "vcvtt", ".f16.f64\t$Sd, $Dm", "$Sd = $Sda",
[]>, Requires<[HasFPARMv8, HasDPVFP]> {
// Instruction operands.
bits<5> Sd;
@@ -990,7 +998,7 @@ defm VCVTM : vcvt_inst<"m", 0b11, ffloor>;
def VNEGD : ADuI<0b11101, 0b11, 0b0001, 0b01, 0,
(outs DPR:$Dd), (ins DPR:$Dm),
- IIC_fpUNA64, "vneg", ".f64\t$Dd, $Dm",
+ IIC_fpUNA64, "vneg", ".f64\t$Dd, $Dm", "",
[(set DPR:$Dd, (fneg (f64 DPR:$Dm)))]>;
def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0,
@@ -1019,7 +1027,7 @@ multiclass vrint_inst_zrx<string opc, bit op, bit op2, SDPatternOperator node> {
def S : ASuI<0b11101, 0b11, 0b0110, 0b11, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
- NoItinerary, !strconcat("vrint", opc), ".f32\t$Sd, $Sm",
+ NoItinerary, !strconcat("vrint", opc), ".f32\t$Sd, $Sm", "",
[(set (f32 SPR:$Sd), (node (f32 SPR:$Sm)))]>,
Requires<[HasFPARMv8]> {
let Inst{7} = op2;
@@ -1027,7 +1035,7 @@ multiclass vrint_inst_zrx<string opc, bit op, bit op2, SDPatternOperator node> {
}
def D : ADuI<0b11101, 0b11, 0b0110, 0b11, 0,
(outs DPR:$Dd), (ins DPR:$Dm),
- NoItinerary, !strconcat("vrint", opc), ".f64\t$Dd, $Dm",
+ NoItinerary, !strconcat("vrint", opc), ".f64\t$Dd, $Dm", "",
[(set (f64 DPR:$Dd), (node (f64 DPR:$Dm)))]>,
Requires<[HasFPARMv8, HasDPVFP]> {
let Inst{7} = op2;
@@ -1094,13 +1102,13 @@ defm VRINTM : vrint_inst_anpm<"m", 0b11, ffloor>;
def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0,
(outs DPR:$Dd), (ins DPR:$Dm),
- IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm",
+ IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm", "",
[(set DPR:$Dd, (fsqrt (f64 DPR:$Dm)))]>,
Sched<[WriteFPSQRT64]>;
def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
- IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm",
+ IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm", "",
[(set SPR:$Sd, (fsqrt SPR:$Sm))]>,
Sched<[WriteFPSQRT32]>;
@@ -1113,12 +1121,12 @@ let hasSideEffects = 0 in {
let isMoveReg = 1 in {
def VMOVD : ADuI<0b11101, 0b11, 0b0000, 0b01, 0,
(outs DPR:$Dd), (ins DPR:$Dm),
- IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", []>,
+ IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", "", []>,
Requires<[HasFPRegs64]>;
def VMOVS : ASuI<0b11101, 0b11, 0b0000, 0b01, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
- IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", []>,
+ IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", "", []>,
Requires<[HasFPRegs]>;
} // isMoveReg
@@ -1984,7 +1992,7 @@ def VULTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 1,
class BF16_VCVT<string opc, bits<2> op7_6>
: VFPAI<(outs SPR:$Sd), (ins SPR:$dst, SPR:$Sm),
VFPUnaryFrm, NoItinerary,
- opc, ".bf16.f32\t$Sd, $Sm", []>,
+ opc, ".bf16.f32\t$Sd, $Sm", "", []>,
RegConstraint<"$dst = $Sd">,
Requires<[HasBF16]>,
Sched<[]> {
@@ -2440,7 +2448,7 @@ def VMOVHcc : PseudoInst<(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm, cmovpred:$p),
class MovFromVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
list<dag> pattern>:
- VFPAI<oops, iops, VFPMiscFrm, IIC_fpSTAT, opc, asm, pattern> {
+ VFPAI<oops, iops, VFPMiscFrm, IIC_fpSTAT, opc, asm, "", pattern> {
// Instruction operand.
bits<4> Rt;
@@ -2525,7 +2533,7 @@ let DecoderMethod = "DecodeForVMRSandVMSR" in {
class MovToVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
list<dag> pattern>:
- VFPAI<oops, iops, VFPMiscFrm, IIC_fpSTAT, opc, asm, pattern> {
+ VFPAI<oops, iops, VFPMiscFrm, IIC_fpSTAT, opc, asm, "", pattern> {
// Instruction operand.
bits<4> Rt;
@@ -2598,7 +2606,7 @@ let DecoderMethod = "DecodeForVMRSandVMSR" in {
let isReMaterializable = 1 in {
def FCONSTD : VFPAI<(outs DPR:$Dd), (ins vfp_f64imm:$imm),
VFPMiscFrm, IIC_fpUNA64,
- "vmov", ".f64\t$Dd, $imm",
+ "vmov", ".f64\t$Dd, $imm", "",
[(set DPR:$Dd, vfp_f64imm:$imm)]>,
Requires<[HasVFP3,HasDPVFP]> {
bits<5> Dd;
@@ -2617,7 +2625,7 @@ def FCONSTD : VFPAI<(outs DPR:$Dd), (ins vfp_f64imm:$imm),
def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm),
VFPMiscFrm, IIC_fpUNA32,
- "vmov", ".f32\t$Sd, $imm",
+ "vmov", ".f32\t$Sd, $imm", "",
[(set SPR:$Sd, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> {
bits<5> Sd;
bits<8> imm;
@@ -2635,7 +2643,7 @@ def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm),
def FCONSTH : VFPAI<(outs HPR:$Sd), (ins vfp_f16imm:$imm),
VFPMiscFrm, IIC_fpUNA16,
- "vmov", ".f16\t$Sd, $imm",
+ "vmov", ".f16\t$Sd, $imm", "",
[(set (f16 HPR:$Sd), vfp_f16imm:$imm)]>,
Requires<[HasFullFP16]> {
bits<5> Sd;
diff --git a/llvm/lib/Target/ARM/ARMInstructionSelector.cpp b/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
index 188b5562cac9..1c44893581f9 100644
--- a/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
+++ b/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
@@ -624,12 +624,12 @@ bool ARMInstructionSelector::selectGlobal(MachineInstrBuilder &MIB,
bool UseMovt = STI.useMovt();
- unsigned Size = TM.getPointerSize(0);
+ LLT PtrTy = MRI.getType(MIB->getOperand(0).getReg());
const Align Alignment(4);
- auto addOpsForConstantPoolLoad = [&MF, Alignment,
- Size](MachineInstrBuilder &MIB,
- const GlobalValue *GV, bool IsSBREL) {
+ auto addOpsForConstantPoolLoad = [&MF, Alignment, PtrTy](
+ MachineInstrBuilder &MIB,
+ const GlobalValue *GV, bool IsSBREL) {
assert((MIB->getOpcode() == ARM::LDRi12 ||
MIB->getOpcode() == ARM::t2LDRpci) &&
"Unsupported instruction");
@@ -644,7 +644,7 @@ bool ARMInstructionSelector::selectGlobal(MachineInstrBuilder &MIB,
MIB.addConstantPoolIndex(CPIndex, /*Offset*/ 0, /*TargetFlags*/ 0)
.addMemOperand(MF.getMachineMemOperand(
MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad,
- Size, Alignment));
+ PtrTy, Alignment));
if (MIB->getOpcode() == ARM::LDRi12)
MIB.addImm(0);
MIB.add(predOps(ARMCC::AL));
@@ -733,7 +733,7 @@ bool ARMInstructionSelector::selectGlobal(MachineInstrBuilder &MIB,
// Add the offset to the SB register.
MIB->setDesc(TII.get(Opcodes.ADDrr));
- MIB->RemoveOperand(1);
+ MIB->removeOperand(1);
MIB.addReg(ARM::R9) // FIXME: don't hardcode R9
.addReg(Offset)
.add(predOps(ARMCC::AL))
@@ -748,7 +748,7 @@ bool ARMInstructionSelector::selectGlobal(MachineInstrBuilder &MIB,
} else {
// Load the global's address from the constant pool.
MIB->setDesc(TII.get(Opcodes.ConstPoolLoad));
- MIB->RemoveOperand(1);
+ MIB->removeOperand(1);
addOpsForConstantPoolLoad(MIB, GV, /*IsSBREL*/ false);
}
} else if (STI.isTargetMachO()) {
@@ -997,7 +997,7 @@ bool ARMInstructionSelector::select(MachineInstr &I) {
auto CPIndex =
ConstPool->getConstantPoolIndex(I.getOperand(1).getFPImm(), Alignment);
MIB->setDesc(TII.get(LoadOpcode));
- MIB->RemoveOperand(1);
+ MIB->removeOperand(1);
MIB.addConstantPoolIndex(CPIndex, /*Offset*/ 0, /*TargetFlags*/ 0)
.addMemOperand(
MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
diff --git a/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp b/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
index de88ffab1c28..52b6b6f3bcf7 100644
--- a/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
@@ -14,6 +14,7 @@
#include "ARMCallLowering.h"
#include "ARMSubtarget.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
diff --git a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index ef5fc12feb54..0a38f5633ae3 100644
--- a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -24,6 +24,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -33,6 +34,7 @@
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -2108,7 +2110,7 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
return false;
MF = &Fn;
- STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
+ STI = &Fn.getSubtarget<ARMSubtarget>();
TL = STI->getTargetLowering();
AFI = Fn.getInfo<ARMFunctionInfo>();
TII = STI->getInstrInfo();
@@ -2199,7 +2201,7 @@ bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
return false;
TD = &Fn.getDataLayout();
- STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
+ STI = &Fn.getSubtarget<ARMSubtarget>();
TII = STI->getInstrInfo();
TRI = STI->getRegisterInfo();
MRI = &Fn.getRegInfo();
@@ -2894,10 +2896,12 @@ bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) {
LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on VirtualReg "
<< Base.virtRegIndex() << "\n");
- // Make sure that Increment has no uses before BaseAccess.
+ // Make sure that Increment has no uses before BaseAccess that are not PHI
+ // uses.
for (MachineInstr &Use :
MRI->use_nodbg_instructions(Increment->getOperand(0).getReg())) {
- if (!DT->dominates(BaseAccess, &Use) || &Use == BaseAccess) {
+ if (&Use == BaseAccess || (Use.getOpcode() != TargetOpcode::PHI &&
+ !DT->dominates(BaseAccess, &Use))) {
LLVM_DEBUG(dbgs() << " BaseAccess doesn't dominate use of increment\n");
return false;
}
diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
index f822672c4477..aa739db44da2 100644
--- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
+++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
@@ -59,8 +59,10 @@
#include "MVETailPredUtils.h"
#include "Thumb2InstrInfo.h"
#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineLoopUtils.h"
@@ -1297,7 +1299,7 @@ bool LowOverheadLoop::ValidateMVEInst(MachineInstr *MI) {
}
bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) {
- const ARMSubtarget &ST = static_cast<const ARMSubtarget&>(mf.getSubtarget());
+ const ARMSubtarget &ST = mf.getSubtarget<ARMSubtarget>();
if (!ST.hasLOB())
return false;
diff --git a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
index 308d5e7889f2..9596e88deb18 100644
--- a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
@@ -73,3 +73,10 @@ ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF)
std::tie(SignReturnAddress, SignReturnAddressAll) =
GetSignReturnAddress(MF.getFunction());
}
+
+MachineFunctionInfo *
+ARMFunctionInfo::clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *>
+ &Src2DstMBB) const {
+ return DestMF.cloneInfo<ARMFunctionInfo>(*this);
+}
diff --git a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
index d8d937055d23..e906fea1a810 100644
--- a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -86,6 +86,7 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// GPRCS1Size, GPRCS2Size, DPRCSSize - Sizes of callee saved register spills
/// areas.
unsigned FPCXTSaveSize = 0;
+ unsigned FRSaveSize = 0;
unsigned GPRCS1Size = 0;
unsigned GPRCS2Size = 0;
unsigned DPRCSAlignGapSize = 0;
@@ -158,6 +159,11 @@ public:
explicit ARMFunctionInfo(MachineFunction &MF);
+ MachineFunctionInfo *
+ clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
+ const override;
+
bool isThumbFunction() const { return isThumb; }
bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; }
bool isThumb2Function() const { return isThumb && hasThumb2; }
@@ -198,12 +204,14 @@ public:
void setDPRCalleeSavedAreaOffset(unsigned o) { DPRCSOffset = o; }
unsigned getFPCXTSaveAreaSize() const { return FPCXTSaveSize; }
+ unsigned getFrameRecordSavedAreaSize() const { return FRSaveSize; }
unsigned getGPRCalleeSavedArea1Size() const { return GPRCS1Size; }
unsigned getGPRCalleeSavedArea2Size() const { return GPRCS2Size; }
unsigned getDPRCalleeSavedGapSize() const { return DPRCSAlignGapSize; }
unsigned getDPRCalleeSavedAreaSize() const { return DPRCSSize; }
void setFPCXTSaveAreaSize(unsigned s) { FPCXTSaveSize = s; }
+ void setFrameRecordSavedAreaSize(unsigned s) { FRSaveSize = s; }
void setGPRCalleeSavedArea1Size(unsigned s) { GPRCS1Size = s; }
void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; }
void setDPRCalleeSavedGapSize(unsigned s) { DPRCSAlignGapSize = s; }
diff --git a/llvm/lib/Target/ARM/ARMParallelDSP.cpp b/llvm/lib/Target/ARM/ARMParallelDSP.cpp
index 46baf8930939..6effd84041b5 100644
--- a/llvm/lib/Target/ARM/ARMParallelDSP.cpp
+++ b/llvm/lib/Target/ARM/ARMParallelDSP.cpp
@@ -459,6 +459,10 @@ bool ARMParallelDSP::Search(Value *V, BasicBlock *BB, Reduction &R) {
if (ValidLHS && ValidRHS)
return true;
+ // Ensure we don't add the root as the incoming accumulator.
+ if (R.getRoot() == I)
+ return false;
+
return R.InsertAcc(I);
}
case Instruction::Mul: {
@@ -535,6 +539,7 @@ bool ARMParallelDSP::MatchSMLAD(Function &F) {
InsertParallelMACs(R);
Changed = true;
AllAdds.insert(R.getAdds().begin(), R.getAdds().end());
+ LLVM_DEBUG(dbgs() << "BB after inserting parallel MACs:\n" << BB);
}
}
diff --git a/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp b/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
index 1a7f10a13ed3..527fefbd291e 100644
--- a/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
@@ -13,9 +13,9 @@
#include "ARMRegisterBankInfo.h"
#include "ARMInstrInfo.h" // For the register classes
#include "ARMSubtarget.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterBank.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#define GET_TARGET_REGBANK_IMPL
@@ -129,8 +129,7 @@ static void checkValueMappings() {
} // end namespace arm
} // end namespace llvm
-ARMRegisterBankInfo::ARMRegisterBankInfo(const TargetRegisterInfo &TRI)
- : ARMGenRegisterBankInfo() {
+ARMRegisterBankInfo::ARMRegisterBankInfo(const TargetRegisterInfo &TRI) {
// We have only one set of register banks, whatever the subtarget
// is. Therefore, the initialization of the RegBanks table should be
// done only once. Indeed the table of all register banks
diff --git a/llvm/lib/Target/ARM/ARMRegisterBankInfo.h b/llvm/lib/Target/ARM/ARMRegisterBankInfo.h
index b8aff65a967e..c56134aab38c 100644
--- a/llvm/lib/Target/ARM/ARMRegisterBankInfo.h
+++ b/llvm/lib/Target/ARM/ARMRegisterBankInfo.h
@@ -13,7 +13,7 @@
#ifndef LLVM_LIB_TARGET_ARM_ARMREGISTERBANKINFO_H
#define LLVM_LIB_TARGET_ARM_ARMREGISTERBANKINFO_H
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
#define GET_REGBANK_DECLARATIONS
#include "ARMGenRegisterBank.inc"
diff --git a/llvm/lib/Target/ARM/ARMRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMRegisterInfo.cpp
index ff4647dd46fd..d1d30e614fc9 100644
--- a/llvm/lib/Target/ARM/ARMRegisterInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMRegisterInfo.cpp
@@ -15,4 +15,4 @@ using namespace llvm;
void ARMRegisterInfo::anchor() { }
-ARMRegisterInfo::ARMRegisterInfo() {}
+ARMRegisterInfo::ARMRegisterInfo() = default;
diff --git a/llvm/lib/Target/ARM/ARMSLSHardening.cpp b/llvm/lib/Target/ARM/ARMSLSHardening.cpp
index 332acb453124..fa80b75484e1 100644
--- a/llvm/lib/Target/ARM/ARMSLSHardening.cpp
+++ b/llvm/lib/Target/ARM/ARMSLSHardening.cpp
@@ -322,8 +322,8 @@ MachineBasicBlock &ARMSLSHardening::ConvertIndirectCallToIndirectJump(
assert(ImpSPOpIdx != -1);
int FirstOpIdxToRemove = std::max(ImpLROpIdx, ImpSPOpIdx);
int SecondOpIdxToRemove = std::min(ImpLROpIdx, ImpSPOpIdx);
- BL->RemoveOperand(FirstOpIdxToRemove);
- BL->RemoveOperand(SecondOpIdxToRemove);
+ BL->removeOperand(FirstOpIdxToRemove);
+ BL->removeOperand(SecondOpIdxToRemove);
// Now copy over the implicit operands from the original IndirectCall
BL->copyImplicitOps(MF, IndirectCall);
MF.moveCallSiteInfo(&IndirectCall, BL);
diff --git a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index 12d4ad889897..379521752261 100644
--- a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -296,7 +296,7 @@ SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemmove(
SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemset(
SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
- SDValue Size, Align Alignment, bool isVolatile,
+ SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
MachinePointerInfo DstPtrInfo) const {
const ARMSubtarget &Subtarget =
@@ -314,6 +314,9 @@ SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemset(
DAG.getZExtOrTrunc(Size, dl, MVT::i32));
}
- return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
- Alignment.value(), RTLIB::MEMSET);
+ if (!AlwaysInline)
+ return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
+ Alignment.value(), RTLIB::MEMSET);
+
+ return SDValue();
}
diff --git a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
index 7aa831c09248..ffa8b5049351 100644
--- a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
+++ b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
@@ -55,6 +55,7 @@ public:
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Op1, SDValue Op2,
SDValue Op3, Align Alignment, bool isVolatile,
+ bool AlwaysInline,
MachinePointerInfo DstPtrInfo) const override;
SDValue EmitSpecializedLibcall(SelectionDAG &DAG, const SDLoc &dl,
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp
index 32160b109343..79244f634ce3 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -27,6 +27,7 @@
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
@@ -52,19 +53,15 @@ UseFusedMulOps("arm-use-mulops",
enum ITMode {
DefaultIT,
- RestrictedIT,
- NoRestrictedIT
+ RestrictedIT
};
static cl::opt<ITMode>
-IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT),
- cl::ZeroOrMore,
- cl::values(clEnumValN(DefaultIT, "arm-default-it",
- "Generate IT block based on arch"),
- clEnumValN(RestrictedIT, "arm-restrict-it",
- "Disallow deprecated IT based on ARMv8"),
- clEnumValN(NoRestrictedIT, "arm-no-restrict-it",
- "Allow IT blocks based on ARMv7")));
+ IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT),
+ cl::values(clEnumValN(DefaultIT, "arm-default-it",
+ "Generate any type of IT block"),
+ clEnumValN(RestrictedIT, "arm-restrict-it",
+ "Disallow complex IT blocks")));
/// ForceFastISel - Use the fast-isel, even for subtargets where it is not
/// currently supported (for testing only).
@@ -237,21 +234,18 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
switch (IT) {
case DefaultIT:
- RestrictIT = hasV8Ops() && !hasMinSize();
+ RestrictIT = false;
break;
case RestrictedIT:
RestrictIT = true;
break;
- case NoRestrictedIT:
- RestrictIT = false;
- break;
}
// NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default.
const FeatureBitset &Bits = getFeatureBits();
if ((Bits[ARM::ProcA5] || Bits[ARM::ProcA8]) && // Where this matters
(Options.UnsafeFPMath || isTargetDarwin()))
- UseNEONForSinglePrecisionFP = true;
+ HasNEONForFP = true;
if (isRWPI())
ReserveR9 = true;
@@ -399,6 +393,14 @@ bool ARMSubtarget::enableSubRegLiveness() const {
return hasMVEIntegerOps();
}
+bool ARMSubtarget::enableMachinePipeliner() const {
+ // Enable the MachinePipeliner before register allocation for subtargets
+ // with the use-mipipeliner feature.
+ return getSchedModel().hasInstrSchedModel() && useMachinePipeliner();
+}
+
+bool ARMSubtarget::useDFAforSMS() const { return false; }
+
// This overrides the PostRAScheduler bit in the SchedModel for any CPU.
bool ARMSubtarget::enablePostRAScheduler() const {
if (enableMachineScheduler())
@@ -417,8 +419,6 @@ bool ARMSubtarget::enablePostRAMachineScheduler() const {
return !isThumb1Only();
}
-bool ARMSubtarget::enableAtomicExpand() const { return hasAnyDataBarrier(); }
-
bool ARMSubtarget::useStride4VFPs() const {
// For general targets, the prologue can grow when VFPs are allocated with
// stride 4 (more vpush instructions). But WatchOS uses a compact unwind
@@ -491,3 +491,12 @@ bool ARMSubtarget::ignoreCSRForAllocationOrder(const MachineFunction &MF,
return isThumb2() && MF.getFunction().hasMinSize() &&
ARM::GPRRegClass.contains(PhysReg);
}
+
+bool ARMSubtarget::splitFramePointerPush(const MachineFunction &MF) const {
+ const Function &F = MF.getFunction();
+ if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI() ||
+ !F.needsUnwindTableEntry())
+ return false;
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ return MFI.hasVarSizedObjects() || getRegisterInfo()->hasStackRealignment(MF);
+}
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index 7cbdc014299f..460ec62d5a33 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -25,8 +25,8 @@
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/MC/MCSchedule.h"
@@ -150,6 +150,11 @@ public:
};
protected:
+// Bool members corresponding to the SubtargetFeatures defined in tablegen
+#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
+ bool ATTRIBUTE = DEFAULT;
+#include "ARMGenSubtargetInfo.inc"
+
/// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others.
ARMProcFamilyEnum ARMProcFamily = Others;
@@ -159,343 +164,22 @@ protected:
/// ARMArch - ARM architecture
ARMArchEnum ARMArch = ARMv4t;
- /// HasV4TOps, HasV5TOps, HasV5TEOps,
- /// HasV6Ops, HasV6MOps, HasV6KOps, HasV6T2Ops, HasV7Ops, HasV8Ops -
- /// Specify whether target support specific ARM ISA variants.
- bool HasV4TOps = false;
- bool HasV5TOps = false;
- bool HasV5TEOps = false;
- bool HasV6Ops = false;
- bool HasV6MOps = false;
- bool HasV6KOps = false;
- bool HasV6T2Ops = false;
- bool HasV7Ops = false;
- bool HasV8Ops = false;
- bool HasV8_1aOps = false;
- bool HasV8_2aOps = false;
- bool HasV8_3aOps = false;
- bool HasV8_4aOps = false;
- bool HasV8_5aOps = false;
- bool HasV8_6aOps = false;
- bool HasV8_8aOps = false;
- bool HasV8_7aOps = false;
- bool HasV9_0aOps = false;
- bool HasV9_1aOps = false;
- bool HasV9_2aOps = false;
- bool HasV9_3aOps = false;
- bool HasV8MBaselineOps = false;
- bool HasV8MMainlineOps = false;
- bool HasV8_1MMainlineOps = false;
- bool HasMVEIntegerOps = false;
- bool HasMVEFloatOps = false;
- bool HasCDEOps = false;
-
- /// HasVFPv2, HasVFPv3, HasVFPv4, HasFPARMv8, HasNEON - Specify what
- /// floating point ISAs are supported.
- bool HasVFPv2 = false;
- bool HasVFPv3 = false;
- bool HasVFPv4 = false;
- bool HasFPARMv8 = false;
- bool HasNEON = false;
- bool HasFPRegs = false;
- bool HasFPRegs16 = false;
- bool HasFPRegs64 = false;
-
- /// Versions of the VFP flags restricted to single precision, or to
- /// 16 d-registers, or both.
- bool HasVFPv2SP = false;
- bool HasVFPv3SP = false;
- bool HasVFPv4SP = false;
- bool HasFPARMv8SP = false;
- bool HasVFPv3D16 = false;
- bool HasVFPv4D16 = false;
- bool HasFPARMv8D16 = false;
- bool HasVFPv3D16SP = false;
- bool HasVFPv4D16SP = false;
- bool HasFPARMv8D16SP = false;
-
- /// HasDotProd - True if the ARMv8.2A dot product instructions are supported.
- bool HasDotProd = false;
-
- /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been
- /// specified. Use the method useNEONForSinglePrecisionFP() to
- /// determine if NEON should actually be used.
- bool UseNEONForSinglePrecisionFP = false;
-
/// UseMulOps - True if non-microcoded fused integer multiply-add and
/// multiply-subtract instructions should be used.
bool UseMulOps = false;
- /// SlowFPVMLx - If the VFP2 / NEON instructions are available, indicates
- /// whether the FP VML[AS] instructions are slow (if so, don't use them).
- bool SlowFPVMLx = false;
-
- /// SlowFPVFMx - If the VFP4 / NEON instructions are available, indicates
- /// whether the FP VFM[AS] instructions are slow (if so, don't use them).
- bool SlowFPVFMx = false;
-
- /// HasVMLxForwarding - If true, NEON has special multiplier accumulator
- /// forwarding to allow mul + mla being issued back to back.
- bool HasVMLxForwarding = false;
-
- /// SlowFPBrcc - True if floating point compare + branch is slow.
- bool SlowFPBrcc = false;
-
- /// InThumbMode - True if compiling for Thumb, false for ARM.
- bool InThumbMode = false;
-
- /// UseSoftFloat - True if we're using software floating point features.
- bool UseSoftFloat = false;
-
- /// UseMISched - True if MachineScheduler should be used for this subtarget.
- bool UseMISched = false;
-
- /// DisablePostRAScheduler - False if scheduling should happen again after
- /// register allocation.
- bool DisablePostRAScheduler = false;
-
- /// HasThumb2 - True if Thumb2 instructions are supported.
- bool HasThumb2 = false;
-
- /// NoARM - True if subtarget does not support ARM mode execution.
- bool NoARM = false;
-
- /// ReserveR9 - True if R9 is not available as a general purpose register.
- bool ReserveR9 = false;
-
- /// NoMovt - True if MOVT / MOVW pairs are not used for materialization of
- /// 32-bit imms (including global addresses).
- bool NoMovt = false;
-
/// SupportsTailCall - True if the OS supports tail call. The dynamic linker
/// must be able to synthesize call stubs for interworking between ARM and
/// Thumb.
bool SupportsTailCall = false;
- /// HasFP16 - True if subtarget supports half-precision FP conversions
- bool HasFP16 = false;
-
- /// HasFullFP16 - True if subtarget supports half-precision FP operations
- bool HasFullFP16 = false;
-
- /// HasFP16FML - True if subtarget supports half-precision FP fml operations
- bool HasFP16FML = false;
-
- /// HasBF16 - True if subtarget supports BFloat16 floating point operations
- bool HasBF16 = false;
-
- /// HasMatMulInt8 - True if subtarget supports 8-bit integer matrix multiply
- bool HasMatMulInt8 = false;
-
- /// HasD32 - True if subtarget has the full 32 double precision
- /// FP registers for VFPv3.
- bool HasD32 = false;
-
- /// HasHardwareDivide - True if subtarget supports [su]div in Thumb mode
- bool HasHardwareDivideInThumb = false;
-
- /// HasHardwareDivideInARM - True if subtarget supports [su]div in ARM mode
- bool HasHardwareDivideInARM = false;
-
- /// HasDataBarrier - True if the subtarget supports DMB / DSB data barrier
- /// instructions.
- bool HasDataBarrier = false;
-
- /// HasFullDataBarrier - True if the subtarget supports DFB data barrier
- /// instruction.
- bool HasFullDataBarrier = false;
-
- /// HasV7Clrex - True if the subtarget supports CLREX instructions
- bool HasV7Clrex = false;
-
- /// HasAcquireRelease - True if the subtarget supports v8 atomics (LDA/LDAEX etc)
- /// instructions
- bool HasAcquireRelease = false;
-
- /// Pref32BitThumb - If true, codegen would prefer 32-bit Thumb instructions
- /// over 16-bit ones.
- bool Pref32BitThumb = false;
-
- /// AvoidCPSRPartialUpdate - If true, codegen would avoid using instructions
- /// that partially update CPSR and add false dependency on the previous
- /// CPSR setting instruction.
- bool AvoidCPSRPartialUpdate = false;
-
- /// CheapPredicableCPSRDef - If true, disable +1 predication cost
- /// for instructions updating CPSR. Enabled for Cortex-A57.
- bool CheapPredicableCPSRDef = false;
-
- /// AvoidMOVsShifterOperand - If true, codegen should avoid using flag setting
- /// movs with shifter operand (i.e. asr, lsl, lsr).
- bool AvoidMOVsShifterOperand = false;
-
- /// HasRetAddrStack - Some processors perform return stack prediction. CodeGen should
- /// avoid issue "normal" call instructions to callees which do not return.
- bool HasRetAddrStack = false;
-
- /// HasBranchPredictor - True if the subtarget has a branch predictor. Having
- /// a branch predictor or not changes the expected cost of taking a branch
- /// which affects the choice of whether to use predicated instructions.
- bool HasBranchPredictor = true;
-
- /// HasMPExtension - True if the subtarget supports Multiprocessing
- /// extension (ARMv7 only).
- bool HasMPExtension = false;
-
- /// HasVirtualization - True if the subtarget supports the Virtualization
- /// extension.
- bool HasVirtualization = false;
-
- /// HasFP64 - If true, the floating point unit supports double
- /// precision.
- bool HasFP64 = false;
-
- /// If true, the processor supports the Performance Monitor Extensions. These
- /// include a generic cycle-counter as well as more fine-grained (often
- /// implementation-specific) events.
- bool HasPerfMon = false;
-
- /// HasTrustZone - if true, processor supports TrustZone security extensions
- bool HasTrustZone = false;
-
- /// Has8MSecExt - if true, processor supports ARMv8-M Security Extensions
- bool Has8MSecExt = false;
-
- /// HasSHA2 - if true, processor supports SHA1 and SHA256
- bool HasSHA2 = false;
-
- /// HasAES - if true, processor supports AES
- bool HasAES = false;
-
- /// HasCrypto - if true, processor supports Cryptography extensions
- bool HasCrypto = false;
-
- /// HasCRC - if true, processor supports CRC instructions
- bool HasCRC = false;
-
- /// HasRAS - if true, the processor supports RAS extensions
- bool HasRAS = false;
-
- /// HasLOB - if true, the processor supports the Low Overhead Branch extension
- bool HasLOB = false;
-
- bool HasPACBTI = false;
-
- /// If true, the instructions "vmov.i32 d0, #0" and "vmov.i32 q0, #0" are
- /// particularly effective at zeroing a VFP register.
- bool HasZeroCycleZeroing = false;
-
- /// HasFPAO - if true, processor does positive address offset computation faster
- bool HasFPAO = false;
-
- /// HasFuseAES - if true, processor executes back to back AES instruction
- /// pairs faster.
- bool HasFuseAES = false;
-
- /// HasFuseLiterals - if true, processor executes back to back
- /// bottom and top halves of literal generation faster.
- bool HasFuseLiterals = false;
-
- /// If true, if conversion may decide to leave some instructions unpredicated.
- bool IsProfitableToUnpredicate = false;
-
- /// If true, VMOV will be favored over VGETLNi32.
- bool HasSlowVGETLNi32 = false;
-
- /// If true, VMOV will be favored over VDUP.
- bool HasSlowVDUP32 = false;
-
- /// If true, VMOVSR will be favored over VMOVDRR.
- bool PreferVMOVSR = false;
-
- /// If true, ISHST barriers will be used for Release semantics.
- bool PreferISHST = false;
-
- /// If true, a VLDM/VSTM starting with an odd register number is considered to
- /// take more microops than single VLDRS/VSTRS.
- bool SlowOddRegister = false;
-
- /// If true, loading into a D subregister will be penalized.
- bool SlowLoadDSubregister = false;
-
- /// If true, use a wider stride when allocating VFP registers.
- bool UseWideStrideVFP = false;
-
- /// If true, the AGU and NEON/FPU units are multiplexed.
- bool HasMuxedUnits = false;
-
- /// If true, VMOVS will never be widened to VMOVD.
- bool DontWidenVMOVS = false;
-
- /// If true, splat a register between VFP and NEON instructions.
- bool SplatVFPToNeon = false;
-
- /// If true, run the MLx expansion pass.
- bool ExpandMLx = false;
-
- /// If true, VFP/NEON VMLA/VMLS have special RAW hazards.
- bool HasVMLxHazards = false;
-
- // If true, read thread pointer from coprocessor register.
- bool ReadTPHard = false;
-
- /// If true, VMOVRS, VMOVSR and VMOVS will be converted from VFP to NEON.
- bool UseNEONForFPMovs = false;
-
- /// If true, VLDn instructions take an extra cycle for unaligned accesses.
- bool CheckVLDnAlign = false;
-
- /// If true, VFP instructions are not pipelined.
- bool NonpipelinedVFP = false;
-
- /// StrictAlign - If true, the subtarget disallows unaligned memory
- /// accesses for some types. For details, see
- /// ARMTargetLowering::allowsMisalignedMemoryAccesses().
- bool StrictAlign = false;
-
- /// RestrictIT - If true, the subtarget disallows generation of deprecated IT
- /// blocks to conform to ARMv8 rule.
+ /// RestrictIT - If true, the subtarget disallows generation of complex IT
+ /// blocks.
bool RestrictIT = false;
- /// HasDSP - If true, the subtarget supports the DSP (saturating arith
- /// and such) instructions.
- bool HasDSP = false;
-
- /// NaCl TRAP instruction is generated instead of the regular TRAP.
- bool UseNaClTrap = false;
-
- /// Generate calls via indirect call instructions.
- bool GenLongCalls = false;
-
- /// Generate code that does not contain data access to code sections.
- bool GenExecuteOnly = false;
-
- /// Target machine allowed unsafe FP math (such as use of NEON fp)
- bool UnsafeFPMath = false;
-
/// UseSjLjEH - If true, the target uses SjLj exception handling (e.g. iOS).
bool UseSjLjEH = false;
- /// Has speculation barrier
- bool HasSB = false;
-
- /// Implicitly convert an instruction to a different one if its immediates
- /// cannot be encoded. For example, ADD r0, r1, #FFFFFFFF -> SUB r0, r1, #1.
- bool NegativeImmediates = true;
-
- /// Mitigate against the cve-2021-35465 security vulnurability.
- bool FixCMSE_CVE_2021_35465 = false;
-
- /// Harden against Straight Line Speculation for Returns and Indirect
- /// Branches.
- bool HardenSlsRetBr = false;
-
- /// Harden against Straight Line Speculation for indirect calls.
- bool HardenSlsBlr = false;
-
- /// Generate thunk code for SLS mitigation in the normal text section.
- bool HardenSlsNoComdat = false;
-
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
Align stackAlignment = Align(4);
@@ -540,10 +224,6 @@ protected:
/// Selected instruction itineraries (one entry per itinerary class.)
InstrItineraryData InstrItins;
- /// NoBTIAtReturnTwice - Don't place a BTI instruction after
- /// return-twice constructs (setjmp)
- bool NoBTIAtReturnTwice = false;
-
/// Options passed via command line that could influence the target
const TargetOptions &Options;
@@ -622,38 +302,12 @@ private:
std::bitset<8> CoprocCDE = {};
public:
- void computeIssueWidth();
+// Getters for SubtargetFeatures defined in tablegen
+#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
+ bool GETTER() const { return ATTRIBUTE; }
+#include "ARMGenSubtargetInfo.inc"
- bool hasV4TOps() const { return HasV4TOps; }
- bool hasV5TOps() const { return HasV5TOps; }
- bool hasV5TEOps() const { return HasV5TEOps; }
- bool hasV6Ops() const { return HasV6Ops; }
- bool hasV6MOps() const { return HasV6MOps; }
- bool hasV6KOps() const { return HasV6KOps; }
- bool hasV6T2Ops() const { return HasV6T2Ops; }
- bool hasV7Ops() const { return HasV7Ops; }
- bool hasV8Ops() const { return HasV8Ops; }
- bool hasV8_1aOps() const { return HasV8_1aOps; }
- bool hasV8_2aOps() const { return HasV8_2aOps; }
- bool hasV8_3aOps() const { return HasV8_3aOps; }
- bool hasV8_4aOps() const { return HasV8_4aOps; }
- bool hasV8_5aOps() const { return HasV8_5aOps; }
- bool hasV8_6aOps() const { return HasV8_6aOps; }
- bool hasV8_7aOps() const { return HasV8_7aOps; }
- bool hasV8_8aOps() const { return HasV8_8aOps; }
- bool hasV9_0aOps() const { return HasV9_0aOps; }
- bool hasV9_1aOps() const { return HasV9_1aOps; }
- bool hasV9_2aOps() const { return HasV9_2aOps; }
- bool hasV9_3aOps() const { return HasV9_3aOps; }
- bool hasV8MBaselineOps() const { return HasV8MBaselineOps; }
- bool hasV8MMainlineOps() const { return HasV8MMainlineOps; }
- bool hasV8_1MMainlineOps() const { return HasV8_1MMainlineOps; }
- bool hasMVEIntegerOps() const { return HasMVEIntegerOps; }
- bool hasMVEFloatOps() const { return HasMVEFloatOps; }
- bool hasCDEOps() const { return HasCDEOps; }
- bool hasFPRegs() const { return HasFPRegs; }
- bool hasFPRegs16() const { return HasFPRegs16; }
- bool hasFPRegs64() const { return HasFPRegs64; }
+ void computeIssueWidth();
/// @{
/// These functions are obsolete, please consider adding subtarget features
@@ -673,31 +327,14 @@ public:
bool hasARMOps() const { return !NoARM; }
- bool hasVFP2Base() const { return HasVFPv2SP; }
- bool hasVFP3Base() const { return HasVFPv3D16SP; }
- bool hasVFP4Base() const { return HasVFPv4D16SP; }
- bool hasFPARMv8Base() const { return HasFPARMv8D16SP; }
- bool hasNEON() const { return HasNEON; }
- bool hasSHA2() const { return HasSHA2; }
- bool hasAES() const { return HasAES; }
- bool hasCrypto() const { return HasCrypto; }
- bool hasDotProd() const { return HasDotProd; }
- bool hasCRC() const { return HasCRC; }
- bool hasRAS() const { return HasRAS; }
- bool hasLOB() const { return HasLOB; }
- bool hasPACBTI() const { return HasPACBTI; }
- bool hasVirtualization() const { return HasVirtualization; }
-
bool useNEONForSinglePrecisionFP() const {
- return hasNEON() && UseNEONForSinglePrecisionFP;
+ return hasNEON() && hasNEONForFP();
}
- bool hasDivideInThumbMode() const { return HasHardwareDivideInThumb; }
- bool hasDivideInARMMode() const { return HasHardwareDivideInARM; }
- bool hasDataBarrier() const { return HasDataBarrier; }
- bool hasFullDataBarrier() const { return HasFullDataBarrier; }
- bool hasV7Clrex() const { return HasV7Clrex; }
- bool hasAcquireRelease() const { return HasAcquireRelease; }
+ bool hasVFP2Base() const { return hasVFPv2SP(); }
+ bool hasVFP3Base() const { return hasVFPv3D16SP(); }
+ bool hasVFP4Base() const { return hasVFPv4D16SP(); }
+ bool hasFPARMv8Base() const { return hasFPARMv8D16SP(); }
bool hasAnyDataBarrier() const {
return HasDataBarrier || (hasV6Ops() && !isThumb());
@@ -710,43 +347,7 @@ public:
}
bool useFPVFMx16() const { return useFPVFMx() && hasFullFP16(); }
bool useFPVFMx64() const { return useFPVFMx() && hasFP64(); }
- bool hasVMLxForwarding() const { return HasVMLxForwarding; }
- bool isFPBrccSlow() const { return SlowFPBrcc; }
- bool hasFP64() const { return HasFP64; }
- bool hasPerfMon() const { return HasPerfMon; }
- bool hasTrustZone() const { return HasTrustZone; }
- bool has8MSecExt() const { return Has8MSecExt; }
- bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; }
- bool hasFPAO() const { return HasFPAO; }
- bool isProfitableToUnpredicate() const { return IsProfitableToUnpredicate; }
- bool hasSlowVGETLNi32() const { return HasSlowVGETLNi32; }
- bool hasSlowVDUP32() const { return HasSlowVDUP32; }
- bool preferVMOVSR() const { return PreferVMOVSR; }
- bool preferISHSTBarriers() const { return PreferISHST; }
- bool expandMLx() const { return ExpandMLx; }
- bool hasVMLxHazards() const { return HasVMLxHazards; }
- bool hasSlowOddRegister() const { return SlowOddRegister; }
- bool hasSlowLoadDSubregister() const { return SlowLoadDSubregister; }
- bool useWideStrideVFP() const { return UseWideStrideVFP; }
- bool hasMuxedUnits() const { return HasMuxedUnits; }
- bool dontWidenVMOVS() const { return DontWidenVMOVS; }
- bool useSplatVFPToNeon() const { return SplatVFPToNeon; }
- bool useNEONForFPMovs() const { return UseNEONForFPMovs; }
- bool checkVLDnAccessAlignment() const { return CheckVLDnAlign; }
- bool nonpipelinedVFP() const { return NonpipelinedVFP; }
- bool prefers32BitThumb() const { return Pref32BitThumb; }
- bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; }
- bool cheapPredicableCPSRDef() const { return CheapPredicableCPSRDef; }
- bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; }
- bool hasRetAddrStack() const { return HasRetAddrStack; }
- bool hasBranchPredictor() const { return HasBranchPredictor; }
- bool hasMPExtension() const { return HasMPExtension; }
- bool hasDSP() const { return HasDSP; }
- bool useNaClTrap() const { return UseNaClTrap; }
bool useSjLjEH() const { return UseSjLjEH; }
- bool hasSB() const { return HasSB; }
- bool genLongCalls() const { return GenLongCalls; }
- bool genExecuteOnly() const { return GenExecuteOnly; }
bool hasBaseDSP() const {
if (isThumb())
return hasDSP();
@@ -754,25 +355,16 @@ public:
return hasV5TEOps();
}
- bool hasFP16() const { return HasFP16; }
- bool hasD32() const { return HasD32; }
- bool hasFullFP16() const { return HasFullFP16; }
- bool hasFP16FML() const { return HasFP16FML; }
- bool hasBF16() const { return HasBF16; }
-
- bool hasFuseAES() const { return HasFuseAES; }
- bool hasFuseLiterals() const { return HasFuseLiterals; }
/// Return true if the CPU supports any kind of instruction fusion.
bool hasFusion() const { return hasFuseAES() || hasFuseLiterals(); }
- bool hasMatMulInt8() const { return HasMatMulInt8; }
-
const Triple &getTargetTriple() const { return TargetTriple; }
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
bool isTargetIOS() const { return TargetTriple.isiOS(); }
bool isTargetWatchOS() const { return TargetTriple.isWatchOS(); }
bool isTargetWatchABI() const { return TargetTriple.isWatchABI(); }
+ bool isTargetDriverKit() const { return TargetTriple.isDriverKit(); }
bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
bool isTargetNetBSD() const { return TargetTriple.isOSNetBSD(); }
@@ -825,24 +417,21 @@ public:
bool isRWPI() const;
bool useMachineScheduler() const { return UseMISched; }
- bool disablePostRAScheduler() const { return DisablePostRAScheduler; }
- bool useSoftFloat() const { return UseSoftFloat; }
- bool isThumb() const { return InThumbMode; }
+ bool useMachinePipeliner() const { return UseMIPipeliner; }
bool hasMinSize() const { return OptMinSize; }
- bool isThumb1Only() const { return InThumbMode && !HasThumb2; }
- bool isThumb2() const { return InThumbMode && HasThumb2; }
- bool hasThumb2() const { return HasThumb2; }
+ bool isThumb1Only() const { return isThumb() && !hasThumb2(); }
+ bool isThumb2() const { return isThumb() && hasThumb2(); }
bool isMClass() const { return ARMProcClass == MClass; }
bool isRClass() const { return ARMProcClass == RClass; }
bool isAClass() const { return ARMProcClass == AClass; }
- bool isReadTPHard() const { return ReadTPHard; }
bool isR9Reserved() const {
return isTargetMachO() ? (ReserveR9 || !HasV6Ops) : ReserveR9;
}
MCPhysReg getFramePointerReg() const {
- if (isTargetDarwin() || (!isTargetWindows() && isThumb()))
+ if (isTargetDarwin() ||
+ (!isTargetWindows() && isThumb() && !createAAPCSFrameChain()))
return ARM::R7;
return ARM::R11;
}
@@ -859,6 +448,8 @@ public:
isThumb1Only();
}
+ bool splitFramePointerPush(const MachineFunction &MF) const;
+
bool useStride4VFPs() const;
bool useMovt() const;
@@ -878,6 +469,10 @@ public:
/// Returns true if machine scheduler should be enabled.
bool enableMachineScheduler() const override;
+ /// Returns true if machine pipeliner should be enabled.
+ bool enableMachinePipeliner() const override;
+ bool useDFAforSMS() const override;
+
/// True for some subtargets at > -O0.
bool enablePostRAScheduler() const override;
@@ -891,9 +486,6 @@ public:
/// scheduling, DAGCombine, etc.).
bool useAA() const override { return true; }
- // enableAtomicExpand- True if we need to expand our atomics.
- bool enableAtomicExpand() const override;
-
/// getInstrItins - Return the instruction itineraries based on subtarget
/// selection.
const InstrItineraryData *getInstrItineraryData() const override {
@@ -956,14 +548,6 @@ public:
bool ignoreCSRForAllocationOrder(const MachineFunction &MF,
unsigned PhysReg) const override;
unsigned getGPRAllocationOrder(const MachineFunction &MF) const;
-
- bool fixCMSE_CVE_2021_35465() const { return FixCMSE_CVE_2021_35465; }
-
- bool hardenSlsRetBr() const { return HardenSlsRetBr; }
- bool hardenSlsBlr() const { return HardenSlsBlr; }
- bool hardenSlsNoComdat() const { return HardenSlsNoComdat; }
-
- bool getNoBTIAtReturnTwice() const { return NoBTIAtReturnTwice; }
};
} // end namespace llvm
diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index c38970f8e341..d95c21d6504b 100644
--- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -23,6 +23,7 @@
#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/ExecutionDomainFix.h"
+#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
@@ -30,20 +31,20 @@
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Pass.h"
+#include "llvm/Support/ARMTargetParser.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ARMTargetParser.h"
#include "llvm/Support/TargetParser.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
@@ -106,6 +107,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARMTarget() {
initializeMVEGatherScatterLoweringPass(Registry);
initializeARMSLSHardeningPass(Registry);
initializeMVELaneInterleavingPass(Registry);
+ initializeARMFixCortexA57AES1742098Pass(Registry);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -194,7 +196,7 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU,
static Reloc::Model getEffectiveRelocModel(const Triple &TT,
Optional<Reloc::Model> RM) {
- if (!RM.hasValue())
+ if (!RM)
// Default relocation model on Darwin is PIC.
return TT.isOSBinFormatMachO() ? Reloc::PIC_ : Reloc::Static;
@@ -307,7 +309,7 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
}
TargetTransformInfo
-ARMBaseTargetMachine::getTargetTransformInfo(const Function &F) {
+ARMBaseTargetMachine::getTargetTransformInfo(const Function &F) const {
return TargetTransformInfo(ARMTTIImpl(this, F));
}
@@ -434,6 +436,9 @@ void ARMPassConfig::addIRPasses() {
// Add Control Flow Guard checks.
if (TM->getTargetTriple().isOSWindows())
addPass(createCFGuardCheckPass());
+
+ if (TM->Options.JMCInstrument)
+ addPass(createJMCInstrumenterPass());
}
void ARMPassConfig::addCodeGenPrepare() {
@@ -505,6 +510,9 @@ bool ARMPassConfig::addGlobalInstructionSelect() {
void ARMPassConfig::addPreRegAlloc() {
if (getOptLevel() != CodeGenOpt::None) {
+ if (getOptLevel() == CodeGenOpt::Aggressive)
+ addPass(&MachinePipelinerID);
+
addPass(createMVETPAndVPTOptimisationsPass());
addPass(createMLxExpansionPass());
@@ -573,8 +581,20 @@ void ARMPassConfig::addPreEmitPass() {
}
void ARMPassConfig::addPreEmitPass2() {
+ // Inserts fixup instructions before unsafe AES operations. Instructions may
+ // be inserted at the start of blocks and at within blocks so this pass has to
+ // come before those below.
+ addPass(createARMFixCortexA57AES1742098Pass());
+ // Inserts BTIs at the start of functions and indirectly-called basic blocks,
+ // so passes cannot add to the start of basic blocks once this has run.
addPass(createARMBranchTargetsPass());
+ // Inserts Constant Islands. Block sizes cannot be increased after this point,
+ // as this may push the branch ranges and load offsets of accessing constant
+ // pools out of range..
addPass(createARMConstantIslandPass());
+ // Finalises Low-Overhead Loops. This replaces pseudo instructions with real
+ // instructions, but the pseudos all have conservative sizes so that block
+ // sizes will only be decreased by this pass.
addPass(createARMLowOverheadLoopsPass());
if (TM->getTargetTriple().isOSWindows()) {
diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.h b/llvm/lib/Target/ARM/ARMTargetMachine.h
index 8428092bf179..8d33a038deeb 100644
--- a/llvm/lib/Target/ARM/ARMTargetMachine.h
+++ b/llvm/lib/Target/ARM/ARMTargetMachine.h
@@ -52,7 +52,7 @@ public:
const ARMSubtarget *getSubtargetImpl() const = delete;
bool isLittleEndian() const { return isLittle; }
- TargetTransformInfo getTargetTransformInfo(const Function &F) override;
+ TargetTransformInfo getTargetTransformInfo(const Function &F) const override;
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index d9d563ead260..3a9946ee810b 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -1202,7 +1202,8 @@ InstructionCost ARMTTIImpl::getMemcpyCost(const Instruction *I) {
InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
VectorType *Tp, ArrayRef<int> Mask,
- int Index, VectorType *SubTp) {
+ int Index, VectorType *SubTp,
+ ArrayRef<const Value *> Args) {
Kind = improveShuffleKindFromMask(Kind, Mask);
if (ST->hasNEON()) {
if (Kind == TTI::SK_Broadcast) {
@@ -1290,7 +1291,8 @@ InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
if (!Mask.empty()) {
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
- if (Mask.size() <= LT.second.getVectorNumElements() &&
+ if (LT.second.isVector() &&
+ Mask.size() <= LT.second.getVectorNumElements() &&
(isVREVMask(Mask, LT.second, 16) || isVREVMask(Mask, LT.second, 32) ||
isVREVMask(Mask, LT.second, 64)))
return ST->getMVEVectorCostFactor(TTI::TCK_RecipThroughput) * LT.first;
@@ -1764,6 +1766,48 @@ ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
return LT.first * ST->getMVEVectorCostFactor(CostKind);
break;
}
+ case Intrinsic::fptosi_sat:
+ case Intrinsic::fptoui_sat: {
+ if (ICA.getArgTypes().empty())
+ break;
+ bool IsSigned = ICA.getID() == Intrinsic::fptosi_sat;
+ auto LT = TLI->getTypeLegalizationCost(DL, ICA.getArgTypes()[0]);
+ EVT MTy = TLI->getValueType(DL, ICA.getReturnType());
+ // Check for the legal types, with the corect subtarget features.
+ if ((ST->hasVFP2Base() && LT.second == MVT::f32 && MTy == MVT::i32) ||
+ (ST->hasFP64() && LT.second == MVT::f64 && MTy == MVT::i32) ||
+ (ST->hasFullFP16() && LT.second == MVT::f16 && MTy == MVT::i32))
+ return LT.first;
+
+ // Equally for MVE vector types
+ if (ST->hasMVEFloatOps() &&
+ (LT.second == MVT::v4f32 || LT.second == MVT::v8f16) &&
+ LT.second.getScalarSizeInBits() == MTy.getScalarSizeInBits())
+ return LT.first * ST->getMVEVectorCostFactor(CostKind);
+
+ // Otherwise we use a legal convert followed by a min+max
+ if (((ST->hasVFP2Base() && LT.second == MVT::f32) ||
+ (ST->hasFP64() && LT.second == MVT::f64) ||
+ (ST->hasFullFP16() && LT.second == MVT::f16) ||
+ (ST->hasMVEFloatOps() &&
+ (LT.second == MVT::v4f32 || LT.second == MVT::v8f16))) &&
+ LT.second.getScalarSizeInBits() >= MTy.getScalarSizeInBits()) {
+ Type *LegalTy = Type::getIntNTy(ICA.getReturnType()->getContext(),
+ LT.second.getScalarSizeInBits());
+ InstructionCost Cost =
+ LT.second.isVector() ? ST->getMVEVectorCostFactor(CostKind) : 1;
+ IntrinsicCostAttributes Attrs1(IsSigned ? Intrinsic::smin
+ : Intrinsic::umin,
+ LegalTy, {LegalTy, LegalTy});
+ Cost += getIntrinsicInstrCost(Attrs1, CostKind);
+ IntrinsicCostAttributes Attrs2(IsSigned ? Intrinsic::smax
+ : Intrinsic::umax,
+ LegalTy, {LegalTy, LegalTy});
+ Cost += getIntrinsicInstrCost(Attrs2, CostKind);
+ return LT.first * Cost;
+ }
+ break;
+ }
}
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
@@ -1771,7 +1815,7 @@ ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
bool ARMTTIImpl::isLoweredToCall(const Function *F) {
if (!F->isIntrinsic())
- BaseT::isLoweredToCall(F);
+ return BaseT::isLoweredToCall(F);
// Assume all Arm-specific intrinsics map to an instruction.
if (F->getName().startswith("llvm.arm"))
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index 5bb84899e5ef..d7a2bdb3db15 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -213,7 +213,8 @@ public:
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
ArrayRef<int> Mask, int Index,
- VectorType *SubTp);
+ VectorType *SubTp,
+ ArrayRef<const Value *> Args = None);
bool preferInLoopReduction(unsigned Opcode, Type *Ty,
TTI::ReductionFlags Flags) const;
diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index c7734cc2cf11..b725ea3a84e5 100644
--- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -453,6 +453,7 @@ class ARMAsmParser : public MCTargetAsmParser {
bool AllowRAAC = false);
bool parseMemory(OperandVector &);
bool parseOperand(OperandVector &, StringRef Mnemonic);
+ bool parseImmExpr(int64_t &Out);
bool parsePrefix(ARMMCExpr::VariantKind &RefKind);
bool parseMemRegOffsetShift(ARM_AM::ShiftOpc &ShiftType,
unsigned &ShiftAmount);
@@ -488,6 +489,17 @@ class ARMAsmParser : public MCTargetAsmParser {
bool parseDirectiveAlign(SMLoc L);
bool parseDirectiveThumbSet(SMLoc L);
+ bool parseDirectiveSEHAllocStack(SMLoc L, bool Wide);
+ bool parseDirectiveSEHSaveRegs(SMLoc L, bool Wide);
+ bool parseDirectiveSEHSaveSP(SMLoc L);
+ bool parseDirectiveSEHSaveFRegs(SMLoc L);
+ bool parseDirectiveSEHSaveLR(SMLoc L);
+ bool parseDirectiveSEHPrologEnd(SMLoc L, bool Fragment);
+ bool parseDirectiveSEHNop(SMLoc L, bool Wide);
+ bool parseDirectiveSEHEpilogStart(SMLoc L, bool Condition);
+ bool parseDirectiveSEHEpilogEnd(SMLoc L);
+ bool parseDirectiveSEHCustom(SMLoc L);
+
bool isMnemonicVPTPredicable(StringRef Mnemonic, StringRef ExtraToken);
StringRef splitMnemonic(StringRef Mnemonic, StringRef ExtraToken,
unsigned &PredicationCode,
@@ -4528,9 +4540,7 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands, bool EnforceOrder,
if (Reg == EndReg)
continue;
// The register must be in the same register class as the first.
- if ((Reg == ARM::RA_AUTH_CODE &&
- RC != &ARMMCRegisterClasses[ARM::GPRRegClassID]) ||
- (Reg != ARM::RA_AUTH_CODE && !RC->contains(Reg)))
+ if (!RC->contains(Reg))
return Error(AfterMinusLoc, "invalid register in register list");
// Ranges must go from low to high.
if (MRI->getEncodingValue(Reg) > MRI->getEncodingValue(EndReg))
@@ -6319,6 +6329,18 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
}
}
+bool ARMAsmParser::parseImmExpr(int64_t &Out) {
+ const MCExpr *Expr = nullptr;
+ SMLoc L = getParser().getTok().getLoc();
+ if (check(getParser().parseExpression(Expr), L, "expected expression"))
+ return true;
+ const MCConstantExpr *Value = dyn_cast_or_null<MCConstantExpr>(Expr);
+ if (check(!Value, L, "expected constant expression"))
+ return true;
+ Out = Value->getValue();
+ return false;
+}
+
// parsePrefix - Parse ARM 16-bit relocations expression prefix, i.e.
// :lower16: and :upper16:.
bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) {
@@ -6379,7 +6401,9 @@ bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) {
CurrentFormat = WASM;
break;
case MCContext::IsGOFF:
+ case MCContext::IsSPIRV:
case MCContext::IsXCOFF:
+ case MCContext::IsDXContainer:
llvm_unreachable("unexpected object format");
break;
}
@@ -10958,9 +10982,7 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return true;
}
- { // processInstruction() updates inITBlock state, we need to save it away
- bool wasInITBlock = inITBlock();
-
+ {
// Some instructions need post-processing to, for example, tweak which
// encoding is selected. Loop on it while changes happen so the
// individual transformations can chain off each other. E.g.,
@@ -10969,12 +10991,6 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
LLVM_DEBUG(dbgs() << "Changed to: ";
Inst.dump_pretty(dbgs(), MII.getName(Inst.getOpcode()));
dbgs() << "\n");
-
- // Only after the instruction is fully processed, we can validate it
- if (wasInITBlock && hasV8Ops() && isThumb() &&
- !isV8EligibleForIT(&Inst) && !getTargetOptions().MCNoDeprecatedWarn) {
- Warning(IDLoc, "deprecated instruction in IT block");
- }
}
// Only move forward at the very end so that everything in validate
@@ -11090,6 +11106,39 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
parseDirectiveTLSDescSeq(DirectiveID.getLoc());
else
return true;
+ } else if (IsCOFF) {
+ if (IDVal == ".seh_stackalloc")
+ parseDirectiveSEHAllocStack(DirectiveID.getLoc(), /*Wide=*/false);
+ else if (IDVal == ".seh_stackalloc_w")
+ parseDirectiveSEHAllocStack(DirectiveID.getLoc(), /*Wide=*/true);
+ else if (IDVal == ".seh_save_regs")
+ parseDirectiveSEHSaveRegs(DirectiveID.getLoc(), /*Wide=*/false);
+ else if (IDVal == ".seh_save_regs_w")
+ parseDirectiveSEHSaveRegs(DirectiveID.getLoc(), /*Wide=*/true);
+ else if (IDVal == ".seh_save_sp")
+ parseDirectiveSEHSaveSP(DirectiveID.getLoc());
+ else if (IDVal == ".seh_save_fregs")
+ parseDirectiveSEHSaveFRegs(DirectiveID.getLoc());
+ else if (IDVal == ".seh_save_lr")
+ parseDirectiveSEHSaveLR(DirectiveID.getLoc());
+ else if (IDVal == ".seh_endprologue")
+ parseDirectiveSEHPrologEnd(DirectiveID.getLoc(), /*Fragment=*/false);
+ else if (IDVal == ".seh_endprologue_fragment")
+ parseDirectiveSEHPrologEnd(DirectiveID.getLoc(), /*Fragment=*/true);
+ else if (IDVal == ".seh_nop")
+ parseDirectiveSEHNop(DirectiveID.getLoc(), /*Wide=*/false);
+ else if (IDVal == ".seh_nop_w")
+ parseDirectiveSEHNop(DirectiveID.getLoc(), /*Wide=*/true);
+ else if (IDVal == ".seh_startepilogue")
+ parseDirectiveSEHEpilogStart(DirectiveID.getLoc(), /*Condition=*/false);
+ else if (IDVal == ".seh_startepilogue_cond")
+ parseDirectiveSEHEpilogStart(DirectiveID.getLoc(), /*Condition=*/true);
+ else if (IDVal == ".seh_endepilogue")
+ parseDirectiveSEHEpilogEnd(DirectiveID.getLoc());
+ else if (IDVal == ".seh_custom")
+ parseDirectiveSEHCustom(DirectiveID.getLoc());
+ else
+ return true;
} else
return true;
return false;
@@ -11113,8 +11162,7 @@ bool ARMAsmParser::parseLiteralValues(unsigned Size, SMLoc L) {
/// parseDirectiveThumb
/// ::= .thumb
bool ARMAsmParser::parseDirectiveThumb(SMLoc L) {
- if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive") ||
- check(!hasThumb(), L, "target does not support Thumb mode"))
+ if (parseEOL() || check(!hasThumb(), L, "target does not support Thumb mode"))
return true;
if (!isThumb())
@@ -11127,8 +11175,7 @@ bool ARMAsmParser::parseDirectiveThumb(SMLoc L) {
/// parseDirectiveARM
/// ::= .arm
bool ARMAsmParser::parseDirectiveARM(SMLoc L) {
- if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive") ||
- check(!hasARM(), L, "target does not support ARM mode"))
+ if (parseEOL() || check(!hasARM(), L, "target does not support ARM mode"))
return true;
if (isThumb())
@@ -11167,15 +11214,13 @@ bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) {
Parser.getTok().getIdentifier());
getParser().getStreamer().emitThumbFunc(Func);
Parser.Lex();
- if (parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.thumb_func' directive"))
+ if (parseEOL())
return true;
return false;
}
}
- if (parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.thumb_func' directive"))
+ if (parseEOL())
return true;
// .thumb_func implies .thumb
@@ -11204,7 +11249,7 @@ bool ARMAsmParser::parseDirectiveSyntax(SMLoc L) {
"'.syntax divided' arm assembly not supported") ||
check(Mode != "unified" && Mode != "UNIFIED", L,
"unrecognized syntax mode in .syntax directive") ||
- parseToken(AsmToken::EndOfStatement, "unexpected token in directive"))
+ parseEOL())
return true;
// TODO tell the MC streamer the mode
@@ -11226,7 +11271,7 @@ bool ARMAsmParser::parseDirectiveCode(SMLoc L) {
}
Parser.Lex();
- if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive"))
+ if (parseEOL())
return true;
if (Val == 16) {
@@ -11257,8 +11302,7 @@ bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
SMLoc SRegLoc, ERegLoc;
if (check(ParseRegister(Reg, SRegLoc, ERegLoc), SRegLoc,
"register name expected") ||
- parseToken(AsmToken::EndOfStatement,
- "unexpected input in .req directive."))
+ parseEOL())
return true;
if (RegisterReqs.insert(std::make_pair(Name, Reg)).first->second != Reg)
@@ -11276,10 +11320,7 @@ bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) {
return Error(L, "unexpected input in .unreq directive.");
RegisterReqs.erase(Parser.getTok().getIdentifier().lower());
Parser.Lex(); // Eat the identifier.
- if (parseToken(AsmToken::EndOfStatement,
- "unexpected input in '.unreq' directive"))
- return true;
- return false;
+ return parseEOL();
}
// After changing arch/CPU, try to put the ARM/Thumb mode back to what it was
@@ -11340,11 +11381,11 @@ bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) {
StringRef Name = Parser.getTok().getIdentifier();
Optional<unsigned> Ret = ELFAttrs::attrTypeFromString(
Name, ARMBuildAttrs::getARMAttributeTags());
- if (!Ret.hasValue()) {
+ if (!Ret) {
Error(TagLoc, "attribute name not recognised: " + Name);
return false;
}
- Tag = Ret.getValue();
+ Tag = *Ret;
Parser.Lex();
} else {
const MCExpr *AttrExpr;
@@ -11406,8 +11447,7 @@ bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) {
Parser.Lex();
}
- if (Parser.parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.eabi_attribute' directive"))
+ if (Parser.parseEOL())
return true;
if (IsIntegerValue && IsStringValue) {
@@ -11463,8 +11503,7 @@ bool ARMAsmParser::parseDirectiveFPU(SMLoc L) {
/// parseDirectiveFnStart
/// ::= .fnstart
bool ARMAsmParser::parseDirectiveFnStart(SMLoc L) {
- if (parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.fnstart' directive"))
+ if (parseEOL())
return true;
if (UC.hasFnStart()) {
@@ -11485,8 +11524,7 @@ bool ARMAsmParser::parseDirectiveFnStart(SMLoc L) {
/// parseDirectiveFnEnd
/// ::= .fnend
bool ARMAsmParser::parseDirectiveFnEnd(SMLoc L) {
- if (parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.fnend' directive"))
+ if (parseEOL())
return true;
// Check the ordering of unwind directives
if (!UC.hasFnStart())
@@ -11502,8 +11540,7 @@ bool ARMAsmParser::parseDirectiveFnEnd(SMLoc L) {
/// parseDirectiveCantUnwind
/// ::= .cantunwind
bool ARMAsmParser::parseDirectiveCantUnwind(SMLoc L) {
- if (parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.cantunwind' directive"))
+ if (parseEOL())
return true;
UC.recordCantUnwind(L);
@@ -11538,8 +11575,7 @@ bool ARMAsmParser::parseDirectivePersonality(SMLoc L) {
StringRef Name(Parser.getTok().getIdentifier());
Parser.Lex();
- if (parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.personality' directive"))
+ if (parseEOL())
return true;
UC.recordPersonality(L);
@@ -11571,8 +11607,7 @@ bool ARMAsmParser::parseDirectivePersonality(SMLoc L) {
/// parseDirectiveHandlerData
/// ::= .handlerdata
bool ARMAsmParser::parseDirectiveHandlerData(SMLoc L) {
- if (parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.handlerdata' directive"))
+ if (parseEOL())
return true;
UC.recordHandlerData(L);
@@ -11670,8 +11705,7 @@ bool ARMAsmParser::parseDirectivePad(SMLoc L) {
if (!CE)
return Error(ExLoc, "pad offset must be an immediate");
- if (parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.pad' directive"))
+ if (parseEOL())
return true;
getTargetStreamer().emitPad(CE->getValue());
@@ -11692,8 +11726,7 @@ bool ARMAsmParser::parseDirectiveRegSave(SMLoc L, bool IsVector) {
SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Operands;
// Parse the register list
- if (parseRegisterList(Operands, true, true) ||
- parseToken(AsmToken::EndOfStatement, "unexpected token in directive"))
+ if (parseRegisterList(Operands, true, true) || parseEOL())
return true;
ARMOperand &Op = (ARMOperand &)*Operands[0];
if (!IsVector && !Op.isRegList())
@@ -11776,7 +11809,7 @@ bool ARMAsmParser::parseDirectiveInst(SMLoc Loc, char Suffix) {
/// parseDirectiveLtorg
/// ::= .ltorg | .pool
bool ARMAsmParser::parseDirectiveLtorg(SMLoc L) {
- if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive"))
+ if (parseEOL())
return true;
getTargetStreamer().emitCurrentConstantPool();
return false;
@@ -11785,7 +11818,7 @@ bool ARMAsmParser::parseDirectiveLtorg(SMLoc L) {
bool ARMAsmParser::parseDirectiveEven(SMLoc L) {
const MCSection *Section = getStreamer().getCurrentSectionOnly();
- if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive"))
+ if (parseEOL())
return true;
if (!Section) {
@@ -11794,7 +11827,7 @@ bool ARMAsmParser::parseDirectiveEven(SMLoc L) {
}
assert(Section && "must have section to emit alignment");
- if (Section->UseCodeAlign())
+ if (Section->useCodeAlign())
getStreamer().emitCodeAlignment(2, &getSTI());
else
getStreamer().emitValueToAlignment(2);
@@ -11810,9 +11843,7 @@ bool ARMAsmParser::parseDirectivePersonalityIndex(SMLoc L) {
const MCExpr *IndexExpression;
SMLoc IndexLoc = Parser.getTok().getLoc();
- if (Parser.parseExpression(IndexExpression) ||
- parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.personalityindex' directive")) {
+ if (Parser.parseExpression(IndexExpression) || parseEOL()) {
return true;
}
@@ -11913,11 +11944,10 @@ bool ARMAsmParser::parseDirectiveTLSDescSeq(SMLoc L) {
MCSymbolRefExpr::VK_ARM_TLSDESCSEQ, getContext());
Lex();
- if (parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.tlsdescseq' directive"))
+ if (parseEOL())
return true;
- getTargetStreamer().AnnotateTLSDescriptorSequence(SRE);
+ getTargetStreamer().annotateTLSDescriptorSequence(SRE);
return false;
}
@@ -11955,8 +11985,7 @@ bool ARMAsmParser::parseDirectiveMovSP(SMLoc L) {
Offset = CE->getValue();
}
- if (parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.movsp' directive"))
+ if (parseEOL())
return true;
getTargetStreamer().emitMovSP(SPReg, Offset);
@@ -11996,7 +12025,7 @@ bool ARMAsmParser::parseDirectiveAlign(SMLoc L) {
// '.align' is target specifically handled to mean 2**2 byte alignment.
const MCSection *Section = getStreamer().getCurrentSectionOnly();
assert(Section && "must have section to emit alignment");
- if (Section->UseCodeAlign())
+ if (Section->useCodeAlign())
getStreamer().emitCodeAlignment(4, &getSTI(), 0);
else
getStreamer().emitValueToAlignment(4, 0, 1, 0);
@@ -12026,6 +12055,175 @@ bool ARMAsmParser::parseDirectiveThumbSet(SMLoc L) {
return false;
}
+/// parseDirectiveSEHAllocStack
+/// ::= .seh_stackalloc
+/// ::= .seh_stackalloc_w
+bool ARMAsmParser::parseDirectiveSEHAllocStack(SMLoc L, bool Wide) {
+ int64_t Size;
+ if (parseImmExpr(Size))
+ return true;
+ getTargetStreamer().emitARMWinCFIAllocStack(Size, Wide);
+ return false;
+}
+
+/// parseDirectiveSEHSaveRegs
+/// ::= .seh_save_regs
+/// ::= .seh_save_regs_w
+bool ARMAsmParser::parseDirectiveSEHSaveRegs(SMLoc L, bool Wide) {
+ SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Operands;
+
+ if (parseRegisterList(Operands) || parseEOL())
+ return true;
+ ARMOperand &Op = (ARMOperand &)*Operands[0];
+ if (!Op.isRegList())
+ return Error(L, ".seh_save_regs{_w} expects GPR registers");
+ const SmallVectorImpl<unsigned> &RegList = Op.getRegList();
+ uint32_t Mask = 0;
+ for (size_t i = 0; i < RegList.size(); ++i) {
+ unsigned Reg = MRI->getEncodingValue(RegList[i]);
+ if (Reg == 15) // pc -> lr
+ Reg = 14;
+ if (Reg == 13)
+ return Error(L, ".seh_save_regs{_w} can't include SP");
+ assert(Reg < 16U && "Register out of range");
+ unsigned Bit = (1u << Reg);
+ Mask |= Bit;
+ }
+ if (!Wide && (Mask & 0x1f00) != 0)
+ return Error(L,
+ ".seh_save_regs cannot save R8-R12, needs .seh_save_regs_w");
+ getTargetStreamer().emitARMWinCFISaveRegMask(Mask, Wide);
+ return false;
+}
+
+/// parseDirectiveSEHSaveSP
+/// ::= .seh_save_sp
+bool ARMAsmParser::parseDirectiveSEHSaveSP(SMLoc L) {
+ int Reg = tryParseRegister();
+ if (Reg == -1 || !MRI->getRegClass(ARM::GPRRegClassID).contains(Reg))
+ return Error(L, "expected GPR");
+ unsigned Index = MRI->getEncodingValue(Reg);
+ if (Index > 14 || Index == 13)
+ return Error(L, "invalid register for .seh_save_sp");
+ getTargetStreamer().emitARMWinCFISaveSP(Index);
+ return false;
+}
+
+/// parseDirectiveSEHSaveFRegs
+/// ::= .seh_save_fregs
+bool ARMAsmParser::parseDirectiveSEHSaveFRegs(SMLoc L) {
+ SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Operands;
+
+ if (parseRegisterList(Operands) || parseEOL())
+ return true;
+ ARMOperand &Op = (ARMOperand &)*Operands[0];
+ if (!Op.isDPRRegList())
+ return Error(L, ".seh_save_fregs expects DPR registers");
+ const SmallVectorImpl<unsigned> &RegList = Op.getRegList();
+ uint32_t Mask = 0;
+ for (size_t i = 0; i < RegList.size(); ++i) {
+ unsigned Reg = MRI->getEncodingValue(RegList[i]);
+ assert(Reg < 32U && "Register out of range");
+ unsigned Bit = (1u << Reg);
+ Mask |= Bit;
+ }
+
+ if (Mask == 0)
+ return Error(L, ".seh_save_fregs missing registers");
+
+ unsigned First = 0;
+ while ((Mask & 1) == 0) {
+ First++;
+ Mask >>= 1;
+ }
+ if (((Mask + 1) & Mask) != 0)
+ return Error(L,
+ ".seh_save_fregs must take a contiguous range of registers");
+ unsigned Last = First;
+ while ((Mask & 2) != 0) {
+ Last++;
+ Mask >>= 1;
+ }
+ if (First < 16 && Last >= 16)
+ return Error(L, ".seh_save_fregs must be all d0-d15 or d16-d31");
+ getTargetStreamer().emitARMWinCFISaveFRegs(First, Last);
+ return false;
+}
+
+/// parseDirectiveSEHSaveLR
+/// ::= .seh_save_lr
+bool ARMAsmParser::parseDirectiveSEHSaveLR(SMLoc L) {
+ int64_t Offset;
+ if (parseImmExpr(Offset))
+ return true;
+ getTargetStreamer().emitARMWinCFISaveLR(Offset);
+ return false;
+}
+
+/// parseDirectiveSEHPrologEnd
+/// ::= .seh_endprologue
+/// ::= .seh_endprologue_fragment
+bool ARMAsmParser::parseDirectiveSEHPrologEnd(SMLoc L, bool Fragment) {
+ getTargetStreamer().emitARMWinCFIPrologEnd(Fragment);
+ return false;
+}
+
+/// parseDirectiveSEHNop
+/// ::= .seh_nop
+/// ::= .seh_nop_w
+bool ARMAsmParser::parseDirectiveSEHNop(SMLoc L, bool Wide) {
+ getTargetStreamer().emitARMWinCFINop(Wide);
+ return false;
+}
+
+/// parseDirectiveSEHEpilogStart
+/// ::= .seh_startepilogue
+/// ::= .seh_startepilogue_cond
+bool ARMAsmParser::parseDirectiveSEHEpilogStart(SMLoc L, bool Condition) {
+ unsigned CC = ARMCC::AL;
+ if (Condition) {
+ MCAsmParser &Parser = getParser();
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ if (!Tok.is(AsmToken::Identifier))
+ return Error(S, ".seh_startepilogue_cond missing condition");
+ CC = ARMCondCodeFromString(Tok.getString());
+ if (CC == ~0U)
+ return Error(S, "invalid condition");
+ Parser.Lex(); // Eat the token.
+ }
+
+ getTargetStreamer().emitARMWinCFIEpilogStart(CC);
+ return false;
+}
+
+/// parseDirectiveSEHEpilogEnd
+/// ::= .seh_endepilogue
+bool ARMAsmParser::parseDirectiveSEHEpilogEnd(SMLoc L) {
+ getTargetStreamer().emitARMWinCFIEpilogEnd();
+ return false;
+}
+
+/// parseDirectiveSEHCustom
+/// ::= .seh_custom
+bool ARMAsmParser::parseDirectiveSEHCustom(SMLoc L) {
+ unsigned Opcode = 0;
+ do {
+ int64_t Byte;
+ if (parseImmExpr(Byte))
+ return true;
+ if (Byte > 0xff || Byte < 0)
+ return Error(L, "Invalid byte value in .seh_custom");
+ if (Opcode > 0x00ffffff)
+ return Error(L, "Too many bytes in .seh_custom");
+ // Store the bytes as one big endian number in Opcode. In a multi byte
+ // opcode sequence, the first byte can't be zero.
+ Opcode = (Opcode << 8) | Byte;
+ } while (parseOptionalToken(AsmToken::Comma));
+ getTargetStreamer().emitARMWinCFICustom(Opcode);
+ return false;
+}
+
/// Force static initialization.
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARMAsmParser() {
RegisterMCAsmParser<ARMAsmParser> X(getTheARMLETarget());
@@ -12338,8 +12536,7 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) {
SMLoc ExtLoc = Parser.getTok().getLoc();
Lex();
- if (parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.arch_extension' directive"))
+ if (parseEOL())
return true;
if (Name == "nocrypto") {
diff --git a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index c3df7dc88d79..9acd49292268 100644
--- a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -13,8 +13,8 @@
#include "TargetInfo/ARMTargetInfo.h"
#include "Utils/ARMBaseInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDecoderOps.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
-#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCSubtargetInfo.h"
@@ -175,408 +175,529 @@ static bool Check(DecodeStatus &Out, DecodeStatus In) {
// Forward declare these because the autogenerated code will reference them.
// Definitions are further down.
static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeCLRMGPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodetGPROddRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodetGPREvenRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus
DecodeGPRwithAPSR_NZCVnospRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeGPRnospRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeGPRwithAPSRRegisterClass(MCInst &Inst,
- unsigned RegNo, uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeGPRwithZRRegisterClass(MCInst &Inst,
- unsigned RegNo, uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeGPRwithZRnospRegisterClass(
- MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder);
+ const MCDisassembler *Decoder);
+static DecodeStatus
+DecodeGPRwithAPSRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeGPRwithZRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus
+DecodeGPRwithZRnospRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeGPRPairRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeGPRPairnospRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus
+DecodeGPRPairnospRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeGPRspRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeHPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeDPR_8RegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeSPR_8RegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeDPR_VFP2RegisterClass(MCInst &Inst,
- unsigned RegNo,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeDPR_VFP2RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeQPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeMQPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeMQQPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeMQQQQPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeDPairRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeDPairSpacedRegisterClass(MCInst &Inst,
- unsigned RegNo, uint64_t Address,
- const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus
+DecodeDPairSpacedRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodePredicateOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeAddrMode2IdxInstruction(MCInst &Inst,
- unsigned Insn,
- uint64_t Address,
- const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus
+DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeSORegMemOperand(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeAddrMode3Instruction(MCInst &Inst,unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeTSBInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeSORegRegOperand(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst & Inst,
- unsigned Insn,
- uint64_t Adddress,
- const void *Decoder);
+static DecodeStatus
+DecodeMemMultipleWritebackInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Adddress,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeT2MOVTWInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeHINTInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeTSTInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeSETPANInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeT2HintSpaceInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeAddrMode5FP16Operand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeAddrMode7Operand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeT2BInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeBranchImmInstruction(MCInst &Inst,unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeBranchImmInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeVLDST1Instruction(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeVLDST2Instruction(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeVLDST3Instruction(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeVLDST4Instruction(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeVLD2DupInstruction(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeVLD3DupInstruction(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVMOVModImmInstruction(MCInst &Inst,unsigned Val,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeMVEModImmInstruction(MCInst &Inst,unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeVMOVModImmInstruction(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeMVEModImmInstruction(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeMVEVADCInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeShiftRight8Imm(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeShiftRight16Imm(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeShiftRight32Imm(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeShiftRight64Imm(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeTBLInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeMveAddrModeRQ(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
-template<int shift>
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+template <int shift>
static DecodeStatus DecodeMveAddrModeQ(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeInstSyncBarrierOption(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeBankedReg(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeLDRPreImm(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeLDRPreReg(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeSTRPreImm(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeSTRPreReg(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeVCVTImmOperand(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeNEONComplexLane64Instruction(MCInst &Inst,
- unsigned Val,
- uint64_t Address,
- const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus
+DecodeNEONComplexLane64Instruction(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeThumbBROperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeT2BROperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeThumbCmpBROperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeThumbAddrModeRR(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeThumbAddrModeIS(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeThumbAddrModePC(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeThumbAddrModeSP(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeT2LoadImm8(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void* Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeT2LoadImm12(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void* Decoder);
-static DecodeStatus DecodeT2LoadT(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void* Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeT2LoadT(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeT2LoadLabel(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void* Decoder);
-static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2Imm7S4(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeT2Imm7S4(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeT2AddrModeImm7s4(MCInst &Inst, unsigned Val,
uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
-template<int shift>
-static DecodeStatus DecodeT2Imm7(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder);
+template <int shift>
+static DecodeStatus DecodeT2Imm7(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
-template<int shift>
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+template <int shift>
static DecodeStatus DecodeTAddrModeImm7(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
-template<int shift, int WriteBack>
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+template <int shift, int WriteBack>
static DecodeStatus DecodeT2AddrModeImm7(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeThumbAddSPImm(MCInst &Inst, uint16_t Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeQADDInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeThumbBLXOffset(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeThumbTableBranch(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbBCCTargetOperand(MCInst &Inst,unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeThumbBCCTargetOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeIT(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2LDRDPreInstruction(MCInst &Inst,unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2STRDPreInstruction(MCInst &Inst,unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2Adr(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeIT(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeT2LDRDPreInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeT2STRDPreInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeT2Adr(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecoderForMRRC2AndMCRR2(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeForVMRSandVMSR(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
template <bool isSigned, bool isNeg, bool zeroPermitted, int size>
static DecodeStatus DecodeBFLabelOperand(MCInst &Inst, unsigned val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeBFAfterTargetOperand(MCInst &Inst, unsigned val,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodePredNoALOperand(MCInst &Inst, unsigned Val,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeLOLoop(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeLongShiftOperand(MCInst &Inst, unsigned Val,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeVSCCLRM(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeVPTMaskOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeVpredROperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeRestrictedIPredicateOperand(MCInst &Inst, unsigned Val,
- uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeRestrictedSPredicateOperand(MCInst &Inst, unsigned Val,
- uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeRestrictedUPredicateOperand(MCInst &Inst, unsigned Val,
- uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeRestrictedFPPredicateOperand(MCInst &Inst,
- unsigned Val,
- uint64_t Address,
- const void *Decoder);
-template<bool Writeback>
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus
+DecodeRestrictedIPredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus
+DecodeRestrictedSPredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus
+DecodeRestrictedUPredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus
+DecodeRestrictedFPPredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder);
+template <bool Writeback>
static DecodeStatus DecodeVSTRVLDR_SYSREG(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
-template<int shift>
+ const MCDisassembler *Decoder);
+template <int shift>
static DecodeStatus DecodeMVE_MEM_1_pre(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
-template<int shift>
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+template <int shift>
static DecodeStatus DecodeMVE_MEM_2_pre(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
-template<int shift>
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+template <int shift>
static DecodeStatus DecodeMVE_MEM_3_pre(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
-template<unsigned MinLog, unsigned MaxLog>
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+template <unsigned MinLog, unsigned MaxLog>
static DecodeStatus DecodePowerTwoOperand(MCInst &Inst, unsigned Val,
uint64_t Address,
- const void *Decoder);
-template<unsigned start>
-static DecodeStatus DecodeMVEPairVectorIndexOperand(MCInst &Inst, unsigned Val,
- uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
+template <unsigned start>
+static DecodeStatus
+DecodeMVEPairVectorIndexOperand(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeMVEVMOVQtoDReg(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeMVEVMOVDRegtoQ(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeMVEVCVTt1fp(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
typedef DecodeStatus OperandDecoder(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
-template<bool scalar, OperandDecoder predicate_decoder>
-static DecodeStatus DecodeMVEVCMP(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeMveVCTP(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+template <bool scalar, OperandDecoder predicate_decoder>
+static DecodeStatus DecodeMVEVCMP(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeMveVCTP(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeMVEVPNOT(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeMVEOverlappingLongShift(MCInst &Inst, unsigned Insn,
- uint64_t Address,
- const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus
+DecodeMVEOverlappingLongShift(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeT2AddSubSPImm(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
#include "ARMGenDisassemblerTables.inc"
@@ -710,11 +831,12 @@ extern const MCInstrDesc ARMInsts[];
/// operand to the MCInst and false otherwise.
static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value,
bool isBranch, uint64_t InstSize,
- MCInst &MI, const void *Decoder) {
- const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
+ MCInst &MI,
+ const MCDisassembler *Decoder) {
// FIXME: Does it make sense for value to be negative?
- return Dis->tryAddingSymbolicOperand(MI, (uint32_t)Value, Address, isBranch,
- /* Offset */ 0, InstSize);
+ return Decoder->tryAddingSymbolicOperand(MI, (uint32_t)Value, Address,
+ isBranch, /*Offset=*/0, /*OpSize=*/0,
+ InstSize);
}
/// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being
@@ -727,7 +849,7 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value,
/// a literal 'C' string if the referenced address of the literal pool's entry
/// is an address into a section with 'C' string literals.
static void tryAddingPcLoadReferenceComment(uint64_t Address, int Value,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
Dis->tryAddingPcLoadReferenceComment(Value, Address);
}
@@ -1142,7 +1264,8 @@ static const uint16_t CLRMGPRDecoderTable[] = {
};
static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if (RegNo > 15)
return MCDisassembler::Fail;
@@ -1153,7 +1276,7 @@ static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeCLRMGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 15)
return MCDisassembler::Fail;
@@ -1165,9 +1288,9 @@ static DecodeStatus DecodeCLRMGPRRegisterClass(MCInst &Inst, unsigned RegNo,
return MCDisassembler::Success;
}
-static DecodeStatus
-DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
if (RegNo == 15)
@@ -1180,7 +1303,7 @@ DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeGPRnospRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
if (RegNo == 13)
@@ -1192,8 +1315,8 @@ static DecodeStatus DecodeGPRnospRegisterClass(MCInst &Inst, unsigned RegNo,
}
static DecodeStatus
-DecodeGPRwithAPSRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+DecodeGPRwithAPSRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
if (RegNo == 15)
@@ -1207,8 +1330,8 @@ DecodeGPRwithAPSRRegisterClass(MCInst &Inst, unsigned RegNo,
}
static DecodeStatus
-DecodeGPRwithZRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+DecodeGPRwithZRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
if (RegNo == 15)
@@ -1225,8 +1348,8 @@ DecodeGPRwithZRRegisterClass(MCInst &Inst, unsigned RegNo,
}
static DecodeStatus
-DecodeGPRwithZRnospRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+DecodeGPRwithZRnospRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
if (RegNo == 13)
return MCDisassembler::Fail;
@@ -1235,7 +1358,8 @@ DecodeGPRwithZRnospRegisterClass(MCInst &Inst, unsigned RegNo,
}
static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if (RegNo > 7)
return MCDisassembler::Fail;
return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder);
@@ -1247,7 +1371,8 @@ static const uint16_t GPRPairDecoderTable[] = {
};
static DecodeStatus DecodeGPRPairRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
// According to the Arm ARM RegNo = 14 is undefined, but we return fail
@@ -1263,8 +1388,9 @@ static DecodeStatus DecodeGPRPairRegisterClass(MCInst &Inst, unsigned RegNo,
return S;
}
-static DecodeStatus DecodeGPRPairnospRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus
+DecodeGPRPairnospRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address,
+ const MCDisassembler *Decoder) {
if (RegNo > 13)
return MCDisassembler::Fail;
@@ -1278,7 +1404,7 @@ static DecodeStatus DecodeGPRPairnospRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeGPRspRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo != 13)
return MCDisassembler::Fail;
@@ -1288,7 +1414,8 @@ static DecodeStatus DecodeGPRspRegisterClass(MCInst &Inst, unsigned RegNo,
}
static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned Register = 0;
switch (RegNo) {
case 0:
@@ -1318,7 +1445,8 @@ static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo,
}
static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
const FeatureBitset &featureBits =
@@ -1343,7 +1471,8 @@ static const uint16_t SPRDecoderTable[] = {
};
static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if (RegNo > 31)
return MCDisassembler::Fail;
@@ -1353,7 +1482,8 @@ static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo,
}
static DecodeStatus DecodeHPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
return DecodeSPRRegisterClass(Inst, RegNo, Address, Decoder);
}
@@ -1369,7 +1499,8 @@ static const uint16_t DPRDecoderTable[] = {
};
static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
const FeatureBitset &featureBits =
((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
@@ -1384,22 +1515,24 @@ static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo,
}
static DecodeStatus DecodeDPR_8RegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if (RegNo > 7)
return MCDisassembler::Fail;
return DecodeDPRRegisterClass(Inst, RegNo, Address, Decoder);
}
static DecodeStatus DecodeSPR_8RegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if (RegNo > 15)
return MCDisassembler::Fail;
return DecodeSPRRegisterClass(Inst, RegNo, Address, Decoder);
}
-static DecodeStatus
-DecodeDPR_VFP2RegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeDPR_VFP2RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if (RegNo > 15)
return MCDisassembler::Fail;
return DecodeDPRRegisterClass(Inst, RegNo, Address, Decoder);
@@ -1413,7 +1546,8 @@ static const uint16_t QPRDecoderTable[] = {
};
static DecodeStatus DecodeQPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if (RegNo > 31 || (RegNo & 1) != 0)
return MCDisassembler::Fail;
RegNo >>= 1;
@@ -1433,7 +1567,8 @@ static const uint16_t DPairDecoderTable[] = {
};
static DecodeStatus DecodeDPairRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if (RegNo > 30)
return MCDisassembler::Fail;
@@ -1453,10 +1588,9 @@ static const uint16_t DPairSpacedDecoderTable[] = {
ARM::D28_D30, ARM::D29_D31
};
-static DecodeStatus DecodeDPairSpacedRegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus
+DecodeDPairSpacedRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address,
+ const MCDisassembler *Decoder) {
if (RegNo > 29)
return MCDisassembler::Fail;
@@ -1466,7 +1600,8 @@ static DecodeStatus DecodeDPairSpacedRegisterClass(MCInst &Inst,
}
static DecodeStatus DecodePredicateOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
if (Val == 0xF) return MCDisassembler::Fail;
// AL predicate is not allowed on Thumb1 branches.
@@ -1483,7 +1618,8 @@ static DecodeStatus DecodePredicateOperand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if (Val)
Inst.addOperand(MCOperand::createReg(ARM::CPSR));
else
@@ -1492,7 +1628,8 @@ static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rm = fieldFromInstruction(Val, 0, 4);
@@ -1529,7 +1666,8 @@ static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeSORegRegOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rm = fieldFromInstruction(Val, 0, 4);
@@ -1564,7 +1702,8 @@ static DecodeStatus DecodeSORegRegOperand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
bool NeedDisjointWriteback = false;
@@ -1611,7 +1750,8 @@ static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Vd = fieldFromInstruction(Val, 8, 5);
@@ -1635,7 +1775,8 @@ static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Vd = fieldFromInstruction(Val, 8, 5);
@@ -1660,7 +1801,8 @@ static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
// This operand encodes a mask of contiguous zeros between a specified MSB
// and LSB. To decode it, we create the mask of all bits MSB-and-lower,
// the mask of all bits LSB-and-lower, and then xor them to create
@@ -1687,7 +1829,8 @@ static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned pred = fieldFromInstruction(Insn, 28, 4);
@@ -1865,8 +2008,8 @@ static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus
-DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
@@ -1971,7 +2114,8 @@ DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeSORegMemOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Val, 13, 4);
@@ -2013,9 +2157,22 @@ static DecodeStatus DecodeSORegMemOperand(MCInst &Inst, unsigned Val,
return S;
}
-static DecodeStatus
-DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeTSBInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
+ if (Inst.getOpcode() != ARM::TSB && Inst.getOpcode() != ARM::t2TSB)
+ return MCDisassembler::Fail;
+
+ // The "csync" operand is not encoded into the "tsb" instruction (as this is
+ // the only available operand), but LLVM expects the instruction to have one
+ // operand, so we need to add the csync when decoding.
+ Inst.addOperand(MCOperand::createImm(ARM_TSB::CSYNC));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rt = fieldFromInstruction(Insn, 12, 4);
@@ -2206,7 +2363,8 @@ DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeRFEInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
@@ -2235,7 +2393,8 @@ static DecodeStatus DecodeRFEInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeQADDInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rd = fieldFromInstruction(Insn, 12, 4);
@@ -2257,9 +2416,10 @@ static DecodeStatus DecodeQADDInstruction(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst,
- unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus
+DecodeMemMultipleWritebackInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
@@ -2350,7 +2510,8 @@ static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst,
// Check for UNPREDICTABLE predicated ESB instruction
static DecodeStatus DecodeHINTInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned pred = fieldFromInstruction(Insn, 28, 4);
unsigned imm8 = fieldFromInstruction(Insn, 0, 8);
const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
@@ -2372,7 +2533,8 @@ static DecodeStatus DecodeHINTInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned imod = fieldFromInstruction(Insn, 18, 2);
unsigned M = fieldFromInstruction(Insn, 17, 1);
unsigned iflags = fieldFromInstruction(Insn, 6, 3);
@@ -2419,7 +2581,8 @@ static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned imod = fieldFromInstruction(Insn, 9, 2);
unsigned M = fieldFromInstruction(Insn, 8, 1);
unsigned iflags = fieldFromInstruction(Insn, 5, 3);
@@ -2460,9 +2623,9 @@ static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeT2HintSpaceInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus
+DecodeT2HintSpaceInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned imm = fieldFromInstruction(Insn, 0, 8);
unsigned Opcode = ARM::t2HINT;
@@ -2486,7 +2649,8 @@ static DecodeStatus DecodeT2HintSpaceInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeT2MOVTWInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rd = fieldFromInstruction(Insn, 8, 4);
@@ -2510,7 +2674,8 @@ static DecodeStatus DecodeT2MOVTWInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rd = fieldFromInstruction(Insn, 12, 4);
@@ -2537,7 +2702,8 @@ static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rd = fieldFromInstruction(Insn, 16, 4);
@@ -2565,7 +2731,8 @@ static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeTSTInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Pred = fieldFromInstruction(Insn, 28, 4);
@@ -2586,7 +2753,8 @@ static DecodeStatus DecodeTSTInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeSETPANInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Imm = fieldFromInstruction(Insn, 9, 1);
@@ -2614,7 +2782,8 @@ static DecodeStatus DecodeSETPANInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned add = fieldFromInstruction(Val, 12, 1);
@@ -2634,7 +2803,8 @@ static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Val, 9, 4);
@@ -2654,7 +2824,8 @@ static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeAddrMode5FP16Operand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Val, 9, 4);
@@ -2674,13 +2845,14 @@ static DecodeStatus DecodeAddrMode5FP16Operand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeAddrMode7Operand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
return DecodeGPRRegisterClass(Inst, Val, Address, Decoder);
}
-static DecodeStatus
-DecodeT2BInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeT2BInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus Status = MCDisassembler::Success;
// Note the J1 and J2 values are from the encoded instruction. So here
@@ -2705,9 +2877,9 @@ DecodeT2BInstruction(MCInst &Inst, unsigned Insn,
return Status;
}
-static DecodeStatus
-DecodeBranchImmInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeBranchImmInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned pred = fieldFromInstruction(Insn, 28, 4);
@@ -2736,7 +2908,8 @@ DecodeBranchImmInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rm = fieldFromInstruction(Val, 0, 4);
@@ -2753,7 +2926,8 @@ static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rd = fieldFromInstruction(Insn, 12, 4);
@@ -3029,7 +3203,8 @@ static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeVLDST1Instruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned type = fieldFromInstruction(Insn, 8, 4);
unsigned align = fieldFromInstruction(Insn, 4, 2);
if (type == 6 && (align & 2)) return MCDisassembler::Fail;
@@ -3042,7 +3217,8 @@ static DecodeStatus DecodeVLDST1Instruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeVLDST2Instruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned size = fieldFromInstruction(Insn, 6, 2);
if (size == 3) return MCDisassembler::Fail;
@@ -3057,7 +3233,8 @@ static DecodeStatus DecodeVLDST2Instruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeVLDST3Instruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned size = fieldFromInstruction(Insn, 6, 2);
if (size == 3) return MCDisassembler::Fail;
@@ -3070,7 +3247,8 @@ static DecodeStatus DecodeVLDST3Instruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeVLDST4Instruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned size = fieldFromInstruction(Insn, 6, 2);
if (size == 3) return MCDisassembler::Fail;
@@ -3080,7 +3258,8 @@ static DecodeStatus DecodeVLDST4Instruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rd = fieldFromInstruction(Insn, 12, 4);
@@ -3350,7 +3529,8 @@ static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rd = fieldFromInstruction(Insn, 12, 4);
@@ -3397,7 +3577,8 @@ static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeVLD2DupInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rd = fieldFromInstruction(Insn, 12, 4);
@@ -3445,7 +3626,8 @@ static DecodeStatus DecodeVLD2DupInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeVLD3DupInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rd = fieldFromInstruction(Insn, 12, 4);
@@ -3480,7 +3662,8 @@ static DecodeStatus DecodeVLD3DupInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rd = fieldFromInstruction(Insn, 12, 4);
@@ -3531,9 +3714,9 @@ static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus
-DecodeVMOVModImmInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeVMOVModImmInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rd = fieldFromInstruction(Insn, 12, 4);
@@ -3577,9 +3760,9 @@ DecodeVMOVModImmInstruction(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus
-DecodeMVEModImmInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeMVEModImmInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Qd = ((fieldFromInstruction(Insn, 22, 1) << 3) |
@@ -3607,7 +3790,8 @@ DecodeMVEModImmInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeMVEVADCInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Qd = fieldFromInstruction(Insn, 13, 3);
@@ -3632,7 +3816,8 @@ static DecodeStatus DecodeMVEVADCInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rd = fieldFromInstruction(Insn, 12, 4);
@@ -3651,31 +3836,36 @@ static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeShiftRight8Imm(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
Inst.addOperand(MCOperand::createImm(8 - Val));
return MCDisassembler::Success;
}
static DecodeStatus DecodeShiftRight16Imm(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
Inst.addOperand(MCOperand::createImm(16 - Val));
return MCDisassembler::Success;
}
static DecodeStatus DecodeShiftRight32Imm(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
Inst.addOperand(MCOperand::createImm(32 - Val));
return MCDisassembler::Success;
}
static DecodeStatus DecodeShiftRight64Imm(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
Inst.addOperand(MCOperand::createImm(64 - Val));
return MCDisassembler::Success;
}
static DecodeStatus DecodeTBLInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rd = fieldFromInstruction(Insn, 12, 4);
@@ -3711,7 +3901,8 @@ static DecodeStatus DecodeTBLInstruction(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned dst = fieldFromInstruction(Insn, 8, 3);
@@ -3735,7 +3926,8 @@ static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn,
}
static DecodeStatus DecodeThumbBROperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<12>(Val<<1) + 4,
true, 2, Inst, Decoder))
Inst.addOperand(MCOperand::createImm(SignExtend32<12>(Val << 1)));
@@ -3743,7 +3935,8 @@ static DecodeStatus DecodeThumbBROperand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeT2BROperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<21>(Val) + 4,
true, 4, Inst, Decoder))
Inst.addOperand(MCOperand::createImm(SignExtend32<21>(Val)));
@@ -3751,7 +3944,8 @@ static DecodeStatus DecodeT2BROperand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeThumbCmpBROperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if (!tryAddingSymbolicOperand(Address, Address + (Val<<1) + 4,
true, 2, Inst, Decoder))
Inst.addOperand(MCOperand::createImm(Val << 1));
@@ -3759,7 +3953,8 @@ static DecodeStatus DecodeThumbCmpBROperand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeThumbAddrModeRR(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Val, 0, 3);
@@ -3774,7 +3969,8 @@ static DecodeStatus DecodeThumbAddrModeRR(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeThumbAddrModeIS(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Val, 0, 3);
@@ -3788,7 +3984,8 @@ static DecodeStatus DecodeThumbAddrModeIS(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeThumbAddrModePC(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned imm = Val << 2;
Inst.addOperand(MCOperand::createImm(imm));
@@ -3798,7 +3995,8 @@ static DecodeStatus DecodeThumbAddrModePC(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeThumbAddrModeSP(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
Inst.addOperand(MCOperand::createReg(ARM::SP));
Inst.addOperand(MCOperand::createImm(Val));
@@ -3806,7 +4004,8 @@ static DecodeStatus DecodeThumbAddrModeSP(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Val, 6, 4);
@@ -3835,7 +4034,8 @@ static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rt = fieldFromInstruction(Insn, 12, 4);
@@ -3918,7 +4118,8 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeT2LoadImm8(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void* Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
@@ -4002,7 +4203,8 @@ static DecodeStatus DecodeT2LoadImm8(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeT2LoadImm12(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void* Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
@@ -4081,8 +4283,8 @@ static DecodeStatus DecodeT2LoadImm12(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeT2LoadT(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void* Decoder) {
+static DecodeStatus DecodeT2LoadT(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
@@ -4121,7 +4323,8 @@ static DecodeStatus DecodeT2LoadT(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeT2LoadLabel(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void* Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rt = fieldFromInstruction(Insn, 12, 4);
@@ -4173,8 +4376,8 @@ static DecodeStatus DecodeT2LoadLabel(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder) {
if (Val == 0)
Inst.addOperand(MCOperand::createImm(INT32_MIN));
else {
@@ -4188,7 +4391,7 @@ static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeT2Imm7S4(MCInst &Inst, unsigned Val, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (Val == 0)
Inst.addOperand(MCOperand::createImm(INT32_MIN));
else {
@@ -4203,7 +4406,8 @@ static DecodeStatus DecodeT2Imm7S4(MCInst &Inst, unsigned Val, uint64_t Address,
}
static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Val, 9, 4);
@@ -4219,7 +4423,7 @@ static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val,
static DecodeStatus DecodeT2AddrModeImm7s4(MCInst &Inst, unsigned Val,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Val, 8, 4);
@@ -4233,8 +4437,9 @@ static DecodeStatus DecodeT2AddrModeImm7s4(MCInst &Inst, unsigned Val,
return S;
}
-static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Val, 8, 4);
@@ -4248,8 +4453,8 @@ static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val,
return S;
}
-static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder) {
int imm = Val & 0xFF;
if (Val == 0)
imm = INT32_MIN;
@@ -4260,9 +4465,9 @@ static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val,
return MCDisassembler::Success;
}
-template<int shift>
-static DecodeStatus DecodeT2Imm7(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+template <int shift>
+static DecodeStatus DecodeT2Imm7(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder) {
int imm = Val & 0x7F;
if (Val == 0)
imm = INT32_MIN;
@@ -4276,7 +4481,8 @@ static DecodeStatus DecodeT2Imm7(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Val, 9, 4);
@@ -4321,10 +4527,10 @@ static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val,
return S;
}
-template<int shift>
+template <int shift>
static DecodeStatus DecodeTAddrModeImm7(MCInst &Inst, unsigned Val,
- uint64_t Address,
- const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Val, 8, 3);
@@ -4338,10 +4544,10 @@ static DecodeStatus DecodeTAddrModeImm7(MCInst &Inst, unsigned Val,
return S;
}
-template<int shift, int WriteBack>
+template <int shift, int WriteBack>
static DecodeStatus DecodeT2AddrModeImm7(MCInst &Inst, unsigned Val,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Val, 8, 4);
@@ -4358,7 +4564,8 @@ static DecodeStatus DecodeT2AddrModeImm7(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rt = fieldFromInstruction(Insn, 12, 4);
@@ -4419,7 +4626,8 @@ static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Val, 13, 4);
@@ -4445,7 +4653,8 @@ static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeThumbAddSPImm(MCInst &Inst, uint16_t Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned imm = fieldFromInstruction(Insn, 0, 7);
Inst.addOperand(MCOperand::createReg(ARM::SP));
@@ -4456,7 +4665,8 @@ static DecodeStatus DecodeThumbAddSPImm(MCInst &Inst, uint16_t Insn,
}
static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
if (Inst.getOpcode() == ARM::tADDrSP) {
@@ -4481,7 +4691,8 @@ static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn,
}
static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned imod = fieldFromInstruction(Insn, 4, 1) | 0x2;
unsigned flags = fieldFromInstruction(Insn, 0, 3);
@@ -4492,7 +4703,8 @@ static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn,
}
static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rm = fieldFromInstruction(Insn, 0, 4);
unsigned add = fieldFromInstruction(Insn, 4, 1);
@@ -4505,7 +4717,8 @@ static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeMveAddrModeRQ(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Insn, 3, 4);
unsigned Qm = fieldFromInstruction(Insn, 0, 3);
@@ -4518,9 +4731,10 @@ static DecodeStatus DecodeMveAddrModeRQ(MCInst &Inst, unsigned Insn,
return S;
}
-template<int shift>
+template <int shift>
static DecodeStatus DecodeMveAddrModeQ(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Qm = fieldFromInstruction(Insn, 8, 3);
int imm = fieldFromInstruction(Insn, 0, 7);
@@ -4542,7 +4756,8 @@ static DecodeStatus DecodeMveAddrModeQ(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeThumbBLXOffset(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
// Val is passed in as S:J1:J2:imm10H:imm10L:'0'
// Note only one trailing zero not two. Also the J1 and J2 values are from
// the encoded instruction. So here change to I1 and I2 values via:
@@ -4566,7 +4781,8 @@ static DecodeStatus DecodeThumbBLXOffset(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if (Val == 0xA || Val == 0xB)
return MCDisassembler::Fail;
@@ -4580,9 +4796,9 @@ static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Val,
return MCDisassembler::Success;
}
-static DecodeStatus
-DecodeThumbTableBranch(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeThumbTableBranch(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
const FeatureBitset &FeatureBits =
((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
DecodeStatus S = MCDisassembler::Success;
@@ -4598,9 +4814,9 @@ DecodeThumbTableBranch(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus
-DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned pred = fieldFromInstruction(Insn, 22, 4);
@@ -4641,8 +4857,8 @@ DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Insn,
// Decode a shifted immediate operand. These basically consist
// of an 8-bit value, and a 4-bit directive that specifies either
// a splat operation or a rotation.
-static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned ctrl = fieldFromInstruction(Val, 10, 2);
if (ctrl == 0) {
unsigned byte = fieldFromInstruction(Val, 8, 2);
@@ -4672,9 +4888,9 @@ static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val,
return MCDisassembler::Success;
}
-static DecodeStatus
-DecodeThumbBCCTargetOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeThumbBCCTargetOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<9>(Val<<1) + 4,
true, 2, Inst, Decoder))
Inst.addOperand(MCOperand::createImm(SignExtend32<9>(Val << 1)));
@@ -4683,7 +4899,7 @@ DecodeThumbBCCTargetOperand(MCInst &Inst, unsigned Val,
static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
// Val is passed in as S:J1:J2:imm10:imm11
// Note no trailing zero after imm11. Also the J1 and J2 values are from
// the encoded instruction. So here change to I1 and I2 values via:
@@ -4706,7 +4922,8 @@ static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if (Val & ~0xf)
return MCDisassembler::Fail;
@@ -4715,7 +4932,8 @@ static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeInstSyncBarrierOption(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if (Val & ~0xf)
return MCDisassembler::Fail;
@@ -4723,8 +4941,8 @@ static DecodeStatus DecodeInstSyncBarrierOption(MCInst &Inst, unsigned Val,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
const FeatureBitset &FeatureBits =
((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
@@ -4825,7 +5043,8 @@ static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeBankedReg(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned R = fieldFromInstruction(Val, 5, 1);
unsigned SysM = fieldFromInstruction(Val, 0, 5);
@@ -4840,7 +5059,8 @@ static DecodeStatus DecodeBankedReg(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rt = fieldFromInstruction(Insn, 12, 4);
@@ -4862,7 +5082,7 @@ static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn,
static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rd = fieldFromInstruction(Insn, 12, 4);
@@ -4887,7 +5107,8 @@ static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeLDRPreImm(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
@@ -4912,7 +5133,8 @@ static DecodeStatus DecodeLDRPreImm(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeLDRPreReg(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
@@ -4939,7 +5161,8 @@ static DecodeStatus DecodeLDRPreReg(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeSTRPreImm(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
@@ -4964,7 +5187,8 @@ static DecodeStatus DecodeSTRPreImm(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeSTRPreReg(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
@@ -4988,8 +5212,8 @@ static DecodeStatus DecodeSTRPreReg(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
@@ -5055,8 +5279,8 @@ static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
@@ -5120,8 +5344,8 @@ static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
@@ -5187,8 +5411,8 @@ static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
@@ -5250,8 +5474,8 @@ static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
@@ -5320,8 +5544,8 @@ static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
@@ -5383,8 +5607,8 @@ static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
@@ -5464,8 +5688,8 @@ static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
@@ -5536,8 +5760,8 @@ static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rt = fieldFromInstruction(Insn, 12, 4);
unsigned Rt2 = fieldFromInstruction(Insn, 16, 4);
@@ -5562,8 +5786,8 @@ static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rt = fieldFromInstruction(Insn, 12, 4);
unsigned Rt2 = fieldFromInstruction(Insn, 16, 4);
@@ -5588,8 +5812,8 @@ static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeIT(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeIT(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned pred = fieldFromInstruction(Insn, 4, 4);
unsigned mask = fieldFromInstruction(Insn, 0, 4);
@@ -5617,9 +5841,9 @@ static DecodeStatus DecodeIT(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus
-DecodeT2LDRDPreInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeT2LDRDPreInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rt = fieldFromInstruction(Insn, 12, 4);
@@ -5654,9 +5878,9 @@ DecodeT2LDRDPreInstruction(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus
-DecodeT2STRDPreInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeT2STRDPreInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rt = fieldFromInstruction(Insn, 12, 4);
@@ -5689,8 +5913,8 @@ DecodeT2STRDPreInstruction(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeT2Adr(MCInst &Inst, uint32_t Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeT2Adr(MCInst &Inst, uint32_t Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned sign1 = fieldFromInstruction(Insn, 21, 1);
unsigned sign2 = fieldFromInstruction(Insn, 23, 1);
if (sign1 != sign2) return MCDisassembler::Fail;
@@ -5717,7 +5941,7 @@ static DecodeStatus DecodeT2Adr(MCInst &Inst, uint32_t Insn,
static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, uint32_t Val,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
// Shift of "asr #32" is not allowed in Thumb2 mode.
@@ -5726,8 +5950,8 @@ static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, uint32_t Val,
return S;
}
-static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned Rt = fieldFromInstruction(Insn, 12, 4);
unsigned Rt2 = fieldFromInstruction(Insn, 0, 4);
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
@@ -5753,8 +5977,8 @@ static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
const FeatureBitset &featureBits =
((const MCDisassembler *)Decoder)->getSubtargetInfo().getFeatureBits();
bool hasFullFP16 = featureBits[ARM::FeatureFullFP16];
@@ -5812,8 +6036,8 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
const FeatureBitset &featureBits =
((const MCDisassembler *)Decoder)->getSubtargetInfo().getFeatureBits();
bool hasFullFP16 = featureBits[ARM::FeatureFullFP16];
@@ -5871,10 +6095,10 @@ static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeNEONComplexLane64Instruction(MCInst &Inst,
- unsigned Insn,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus
+DecodeNEONComplexLane64Instruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned Vd = (fieldFromInstruction(Insn, 12, 4) << 0);
Vd |= (fieldFromInstruction(Insn, 22, 1) << 4);
unsigned Vn = (fieldFromInstruction(Insn, 16, 4) << 0);
@@ -5904,8 +6128,8 @@ static DecodeStatus DecodeNEONComplexLane64Instruction(MCInst &Inst,
return S;
}
-static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rn = fieldFromInstruction(Val, 16, 4);
@@ -5932,7 +6156,8 @@ static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecoderForMRRC2AndMCRR2(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned CRm = fieldFromInstruction(Val, 0, 4);
@@ -5978,7 +6203,7 @@ static DecodeStatus DecoderForMRRC2AndMCRR2(MCInst &Inst, unsigned Val,
static DecodeStatus DecodeForVMRSandVMSR(MCInst &Inst, unsigned Val,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
const FeatureBitset &featureBits =
((const MCDisassembler *)Decoder)->getSubtargetInfo().getFeatureBits();
DecodeStatus S = MCDisassembler::Success;
@@ -6030,7 +6255,7 @@ static DecodeStatus DecodeForVMRSandVMSR(MCInst &Inst, unsigned Val,
template <bool isSigned, bool isNeg, bool zeroPermitted, int size>
static DecodeStatus DecodeBFLabelOperand(MCInst &Inst, unsigned Val,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
if (Val == 0 && !zeroPermitted)
S = MCDisassembler::Fail;
@@ -6049,7 +6274,7 @@ static DecodeStatus DecodeBFLabelOperand(MCInst &Inst, unsigned Val,
static DecodeStatus DecodeBFAfterTargetOperand(MCInst &Inst, unsigned Val,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
uint64_t LocImm = Inst.getOperand(0).getImm();
Val = LocImm + (2 << Val);
@@ -6061,7 +6286,7 @@ static DecodeStatus DecodeBFAfterTargetOperand(MCInst &Inst, unsigned Val,
static DecodeStatus DecodePredNoALOperand(MCInst &Inst, unsigned Val,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (Val >= ARMCC::AL) // also exclude the non-condition NV
return MCDisassembler::Fail;
Inst.addOperand(MCOperand::createImm(Val));
@@ -6069,7 +6294,7 @@ static DecodeStatus DecodePredNoALOperand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeLOLoop(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
if (Inst.getOpcode() == ARM::MVE_LCTP)
@@ -6132,7 +6357,7 @@ static DecodeStatus DecodeLOLoop(MCInst &Inst, unsigned Insn, uint64_t Address,
static DecodeStatus DecodeLongShiftOperand(MCInst &Inst, unsigned Val,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
if (Val == 0)
@@ -6144,7 +6369,8 @@ static DecodeStatus DecodeLongShiftOperand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodetGPROddRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if ((RegNo) + 1 > 11)
return MCDisassembler::Fail;
@@ -6154,7 +6380,8 @@ static DecodeStatus DecodetGPROddRegisterClass(MCInst &Inst, unsigned RegNo,
}
static DecodeStatus DecodetGPREvenRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if ((RegNo) > 14)
return MCDisassembler::Fail;
@@ -6165,7 +6392,8 @@ static DecodeStatus DecodetGPREvenRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus
DecodeGPRwithAPSR_NZCVnospRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if (RegNo == 15) {
Inst.addOperand(MCOperand::createReg(ARM::APSR_NZCV));
return MCDisassembler::Success;
@@ -6181,7 +6409,7 @@ DecodeGPRwithAPSR_NZCVnospRegisterClass(MCInst &Inst, unsigned RegNo,
}
static DecodeStatus DecodeVSCCLRM(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
Inst.addOperand(MCOperand::createImm(ARMCC::AL));
@@ -6207,8 +6435,8 @@ static DecodeStatus DecodeVSCCLRM(MCInst &Inst, unsigned Insn, uint64_t Address,
}
static DecodeStatus DecodeMQPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address,
- const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if (RegNo > 7)
return MCDisassembler::Fail;
@@ -6224,7 +6452,7 @@ static const uint16_t QQPRDecoderTable[] = {
static DecodeStatus DecodeMQQPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 6)
return MCDisassembler::Fail;
@@ -6240,7 +6468,7 @@ static const uint16_t QQQQPRDecoderTable[] = {
static DecodeStatus DecodeMQQQQPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 4)
return MCDisassembler::Fail;
@@ -6251,7 +6479,7 @@ static DecodeStatus DecodeMQQQQPRRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeVPTMaskOperand(MCInst &Inst, unsigned Val,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
// Parse VPT mask and encode it in the MCInst as an immediate with the same
@@ -6281,7 +6509,8 @@ static DecodeStatus DecodeVPTMaskOperand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeVpredROperand(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
// The vpred_r operand type includes an MQPR register field derived
// from the encoding. But we don't actually want to add an operand
// to the MCInst at this stage, because AddThumbPredicate will do it
@@ -6292,18 +6521,16 @@ static DecodeStatus DecodeVpredROperand(MCInst &Inst, unsigned RegNo,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeRestrictedIPredicateOperand(MCInst &Inst,
- unsigned Val,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus
+DecodeRestrictedIPredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder) {
Inst.addOperand(MCOperand::createImm((Val & 0x1) == 0 ? ARMCC::EQ : ARMCC::NE));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeRestrictedSPredicateOperand(MCInst &Inst,
- unsigned Val,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus
+DecodeRestrictedSPredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned Code;
switch (Val & 0x3) {
case 0:
@@ -6323,17 +6550,16 @@ static DecodeStatus DecodeRestrictedSPredicateOperand(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeRestrictedUPredicateOperand(MCInst &Inst,
- unsigned Val,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus
+DecodeRestrictedUPredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder) {
Inst.addOperand(MCOperand::createImm((Val & 0x1) == 0 ? ARMCC::HS : ARMCC::HI));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeRestrictedFPPredicateOperand(MCInst &Inst, unsigned Val,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus
+DecodeRestrictedFPPredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned Code;
switch (Val) {
default:
@@ -6363,7 +6589,8 @@ static DecodeStatus DecodeRestrictedFPPredicateOperand(MCInst &Inst, unsigned Va
}
static DecodeStatus DecodeVCVTImmOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned DecodedVal = 64 - Val;
@@ -6404,10 +6631,10 @@ static unsigned FixedRegForVSTRVLDR_SYSREG(unsigned Opcode) {
}
}
-template<bool Writeback>
+template <bool Writeback>
static DecodeStatus DecodeVSTRVLDR_SYSREG(MCInst &Inst, unsigned Val,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
switch (Inst.getOpcode()) {
case ARM::VSTR_FPSCR_pre:
case ARM::VSTR_FPSCR_NZCVQC_pre:
@@ -6448,9 +6675,10 @@ static DecodeStatus DecodeVSTRVLDR_SYSREG(MCInst &Inst, unsigned Val,
return S;
}
-static inline DecodeStatus DecodeMVE_MEM_pre(
- MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder,
- unsigned Rn, OperandDecoder RnDecoder, OperandDecoder AddrDecoder) {
+static inline DecodeStatus
+DecodeMVE_MEM_pre(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder, unsigned Rn,
+ OperandDecoder RnDecoder, OperandDecoder AddrDecoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Qd = fieldFromInstruction(Val, 13, 3);
@@ -6469,7 +6697,8 @@ static inline DecodeStatus DecodeMVE_MEM_pre(
template <int shift>
static DecodeStatus DecodeMVE_MEM_1_pre(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
return DecodeMVE_MEM_pre(Inst, Val, Address, Decoder,
fieldFromInstruction(Val, 16, 3),
DecodetGPRRegisterClass,
@@ -6478,7 +6707,8 @@ static DecodeStatus DecodeMVE_MEM_1_pre(MCInst &Inst, unsigned Val,
template <int shift>
static DecodeStatus DecodeMVE_MEM_2_pre(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
return DecodeMVE_MEM_pre(Inst, Val, Address, Decoder,
fieldFromInstruction(Val, 16, 4),
DecoderGPRRegisterClass,
@@ -6487,17 +6717,18 @@ static DecodeStatus DecodeMVE_MEM_2_pre(MCInst &Inst, unsigned Val,
template <int shift>
static DecodeStatus DecodeMVE_MEM_3_pre(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
return DecodeMVE_MEM_pre(Inst, Val, Address, Decoder,
fieldFromInstruction(Val, 17, 3),
DecodeMQPRRegisterClass,
DecodeMveAddrModeQ<shift>);
}
-template<unsigned MinLog, unsigned MaxLog>
+template <unsigned MinLog, unsigned MaxLog>
static DecodeStatus DecodePowerTwoOperand(MCInst &Inst, unsigned Val,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
if (Val < MinLog || Val > MaxLog)
@@ -6507,10 +6738,10 @@ static DecodeStatus DecodePowerTwoOperand(MCInst &Inst, unsigned Val,
return S;
}
-template<unsigned start>
-static DecodeStatus DecodeMVEPairVectorIndexOperand(MCInst &Inst, unsigned Val,
- uint64_t Address,
- const void *Decoder) {
+template <unsigned start>
+static DecodeStatus
+DecodeMVEPairVectorIndexOperand(MCInst &Inst, unsigned Val, uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
Inst.addOperand(MCOperand::createImm(start + Val));
@@ -6519,7 +6750,8 @@ static DecodeStatus DecodeMVEPairVectorIndexOperand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeMVEVMOVQtoDReg(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rt = fieldFromInstruction(Insn, 0, 4);
unsigned Rt2 = fieldFromInstruction(Insn, 16, 4);
@@ -6542,7 +6774,8 @@ static DecodeStatus DecodeMVEVMOVQtoDReg(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeMVEVMOVDRegtoQ(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rt = fieldFromInstruction(Insn, 0, 4);
unsigned Rt2 = fieldFromInstruction(Insn, 16, 4);
@@ -6566,8 +6799,9 @@ static DecodeStatus DecodeMVEVMOVDRegtoQ(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeMVEOverlappingLongShift(
- MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) {
+static DecodeStatus
+DecodeMVEOverlappingLongShift(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned RdaLo = fieldFromInstruction(Insn, 17, 3) << 1;
@@ -6645,8 +6879,9 @@ static DecodeStatus DecodeMVEOverlappingLongShift(
return S;
}
-static DecodeStatus DecodeMVEVCVTt1fp(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeMVEVCVTt1fp(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Qd = ((fieldFromInstruction(Insn, 22, 1) << 3) |
fieldFromInstruction(Insn, 13, 3));
@@ -6664,9 +6899,9 @@ static DecodeStatus DecodeMVEVCVTt1fp(MCInst &Inst, unsigned Insn, uint64_t Addr
return S;
}
-template<bool scalar, OperandDecoder predicate_decoder>
+template <bool scalar, OperandDecoder predicate_decoder>
static DecodeStatus DecodeMVEVCMP(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
Inst.addOperand(MCOperand::createReg(ARM::VPR));
unsigned Qn = fieldFromInstruction(Insn, 17, 3);
@@ -6703,7 +6938,7 @@ static DecodeStatus DecodeMVEVCMP(MCInst &Inst, unsigned Insn, uint64_t Address,
}
static DecodeStatus DecodeMveVCTP(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
Inst.addOperand(MCOperand::createReg(ARM::VPR));
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
@@ -6712,8 +6947,9 @@ static DecodeStatus DecodeMveVCTP(MCInst &Inst, unsigned Insn, uint64_t Address,
return S;
}
-static DecodeStatus DecodeMVEVPNOT(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeMVEVPNOT(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
Inst.addOperand(MCOperand::createReg(ARM::VPR));
Inst.addOperand(MCOperand::createReg(ARM::VPR));
@@ -6721,7 +6957,8 @@ static DecodeStatus DecodeMVEVPNOT(MCInst &Inst, unsigned Insn, uint64_t Address
}
static DecodeStatus DecodeT2AddSubSPImm(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
const unsigned Rd = fieldFromInstruction(Insn, 8, 4);
const unsigned Rn = fieldFromInstruction(Insn, 16, 4);
const unsigned Imm12 = fieldFromInstruction(Insn, 26, 1) << 11 |
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 16bc0ca179a7..d74da27fbc4f 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -17,8 +17,8 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCAsmBackend.h"
@@ -98,9 +98,20 @@ class ARMTargetAsmStreamer : public ARMTargetStreamer {
void emitInst(uint32_t Inst, char Suffix = '\0') override;
void finishAttributeSection() override;
- void AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) override;
+ void annotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) override;
void emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) override;
+ void emitARMWinCFIAllocStack(unsigned Size, bool Wide) override;
+ void emitARMWinCFISaveRegMask(unsigned Mask, bool Wide) override;
+ void emitARMWinCFISaveSP(unsigned Reg) override;
+ void emitARMWinCFISaveFRegs(unsigned First, unsigned Last) override;
+ void emitARMWinCFISaveLR(unsigned Offset) override;
+ void emitARMWinCFIPrologEnd(bool Fragment) override;
+ void emitARMWinCFINop(bool Wide) override;
+ void emitARMWinCFIEpilogStart(unsigned Condition) override;
+ void emitARMWinCFIEpilogEnd() override;
+ void emitARMWinCFICustom(unsigned Opcode) override;
+
public:
ARMTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS,
MCInstPrinter &InstPrinter, bool VerboseAsm);
@@ -239,8 +250,8 @@ void ARMTargetAsmStreamer::emitFPU(unsigned FPU) {
void ARMTargetAsmStreamer::finishAttributeSection() {}
-void
-ARMTargetAsmStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *S) {
+void ARMTargetAsmStreamer::annotateTLSDescriptorSequence(
+ const MCSymbolRefExpr *S) {
OS << "\t.tlsdescseq\t" << S->getSymbol().getName() << "\n";
}
@@ -269,6 +280,101 @@ void ARMTargetAsmStreamer::emitUnwindRaw(int64_t Offset,
OS << '\n';
}
+void ARMTargetAsmStreamer::emitARMWinCFIAllocStack(unsigned Size, bool Wide) {
+ if (Wide)
+ OS << "\t.seh_stackalloc_w\t" << Size << "\n";
+ else
+ OS << "\t.seh_stackalloc\t" << Size << "\n";
+}
+
+static void printRegs(formatted_raw_ostream &OS, ListSeparator &LS, int First,
+ int Last) {
+ if (First != Last)
+ OS << LS << "r" << First << "-r" << Last;
+ else
+ OS << LS << "r" << First;
+}
+
+void ARMTargetAsmStreamer::emitARMWinCFISaveRegMask(unsigned Mask, bool Wide) {
+ if (Wide)
+ OS << "\t.seh_save_regs_w\t";
+ else
+ OS << "\t.seh_save_regs\t";
+ ListSeparator LS;
+ int First = -1;
+ OS << "{";
+ for (int I = 0; I <= 12; I++) {
+ if (Mask & (1 << I)) {
+ if (First < 0)
+ First = I;
+ } else {
+ if (First >= 0) {
+ printRegs(OS, LS, First, I - 1);
+ First = -1;
+ }
+ }
+ }
+ if (First >= 0)
+ printRegs(OS, LS, First, 12);
+ if (Mask & (1 << 14))
+ OS << LS << "lr";
+ OS << "}\n";
+}
+
+void ARMTargetAsmStreamer::emitARMWinCFISaveSP(unsigned Reg) {
+ OS << "\t.seh_save_sp\tr" << Reg << "\n";
+}
+
+void ARMTargetAsmStreamer::emitARMWinCFISaveFRegs(unsigned First,
+ unsigned Last) {
+ if (First != Last)
+ OS << "\t.seh_save_fregs\t{d" << First << "-d" << Last << "}\n";
+ else
+ OS << "\t.seh_save_fregs\t{d" << First << "}\n";
+}
+
+void ARMTargetAsmStreamer::emitARMWinCFISaveLR(unsigned Offset) {
+ OS << "\t.seh_save_lr\t" << Offset << "\n";
+}
+
+void ARMTargetAsmStreamer::emitARMWinCFIPrologEnd(bool Fragment) {
+ if (Fragment)
+ OS << "\t.seh_endprologue_fragment\n";
+ else
+ OS << "\t.seh_endprologue\n";
+}
+
+void ARMTargetAsmStreamer::emitARMWinCFINop(bool Wide) {
+ if (Wide)
+ OS << "\t.seh_nop_w\n";
+ else
+ OS << "\t.seh_nop\n";
+}
+
+void ARMTargetAsmStreamer::emitARMWinCFIEpilogStart(unsigned Condition) {
+ if (Condition == ARMCC::AL)
+ OS << "\t.seh_startepilogue\n";
+ else
+ OS << "\t.seh_startepilogue_cond\t"
+ << ARMCondCodeToString(static_cast<ARMCC::CondCodes>(Condition)) << "\n";
+}
+
+void ARMTargetAsmStreamer::emitARMWinCFIEpilogEnd() {
+ OS << "\t.seh_endepilogue\n";
+}
+
+void ARMTargetAsmStreamer::emitARMWinCFICustom(unsigned Opcode) {
+ int I;
+ for (I = 3; I > 0; I--)
+ if (Opcode & (0xffu << (8 * I)))
+ break;
+ ListSeparator LS;
+ OS << "\t.seh_custom\t";
+ for (; I >= 0; I--)
+ OS << LS << ((Opcode >> (8 * I)) & 0xff);
+ OS << "\n";
+}
+
class ARMTargetELFStreamer : public ARMTargetStreamer {
private:
StringRef CurrentVendor;
@@ -309,7 +415,7 @@ private:
void finishAttributeSection() override;
void emitLabel(MCSymbol *Symbol) override;
- void AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) override;
+ void annotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) override;
void emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) override;
// Reset state between object emissions
@@ -984,8 +1090,8 @@ void ARMTargetELFStreamer::emitLabel(MCSymbol *Symbol) {
Streamer.emitThumbFunc(Symbol);
}
-void
-ARMTargetELFStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *S) {
+void ARMTargetELFStreamer::annotateTLSDescriptorSequence(
+ const MCSymbolRefExpr *S) {
getStreamer().EmitFixup(S, FK_Data_4);
}
@@ -1057,7 +1163,7 @@ inline void ARMELFStreamer::SwitchToEHSection(StringRef Prefix,
assert(EHSection && "Failed to get the required EH section");
// Switch to .ARM.extab or .ARM.exidx section
- SwitchSection(EHSection);
+ switchSection(EHSection);
emitValueToAlignment(4, 0, 1, 0);
}
@@ -1150,7 +1256,7 @@ void ARMELFStreamer::emitFnEnd() {
}
// Switch to the section containing FnStart
- SwitchSection(&FnStart->getSection());
+ switchSection(&FnStart->getSection());
// Clean exception handling frame information
EHReset();
@@ -1369,12 +1475,8 @@ MCTargetStreamer *createARMNullTargetStreamer(MCStreamer &S) {
return new ARMTargetStreamer(S);
}
-MCTargetStreamer *createARMObjectTargetStreamer(MCStreamer &S,
- const MCSubtargetInfo &STI) {
- const Triple &TT = STI.getTargetTriple();
- if (TT.isOSBinFormatELF())
- return new ARMTargetELFStreamer(S);
- return new ARMTargetStreamer(S);
+MCTargetStreamer *createARMObjectTargetELFStreamer(MCStreamer &S) {
+ return new ARMTargetELFStreamer(S);
}
MCELFStreamer *createARMELFStreamer(MCContext &Context,
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index 77c0e3522911..febd8ab8bbc0 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -89,6 +89,7 @@ ARMCOFFMCAsmInfoMicrosoft::ARMCOFFMCAsmInfoMicrosoft() {
AlignmentIsInBytes = false;
SupportsDebugInformation = true;
ExceptionsType = ExceptionHandling::WinEH;
+ WinEHEncodingType = WinEH::EncodingType::Itanium;
PrivateGlobalPrefix = "$M";
PrivateLabelPrefix = "$M";
CommentString = "@";
@@ -110,7 +111,8 @@ ARMCOFFMCAsmInfoGNU::ARMCOFFMCAsmInfoGNU() {
PrivateLabelPrefix = ".L";
SupportsDebugInformation = true;
- ExceptionsType = ExceptionHandling::DwarfCFI;
+ ExceptionsType = ExceptionHandling::WinEH;
+ WinEHEncodingType = WinEH::EncodingType::Itanium;
UseParensForSymbolVariant = true;
DwarfRegNumForCFI = false;
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index 5ecacdab390f..c33bbfcc7114 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -2006,13 +2006,11 @@ getMVEPairVectorIndexOpValue(const MCInst &MI, unsigned OpIdx,
#include "ARMGenMCCodeEmitter.inc"
MCCodeEmitter *llvm::createARMLEMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx) {
return new ARMMCCodeEmitter(MCII, Ctx, true);
}
MCCodeEmitter *llvm::createARMBEMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx) {
return new ARMMCCodeEmitter(MCII, Ctx, false);
}
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 17ca1866cf95..3f1379f135d1 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -87,18 +87,6 @@ static bool getMRCDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI,
return false;
}
-static bool getITDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI,
- std::string &Info) {
- if (STI.getFeatureBits()[llvm::ARM::HasV8Ops] && MI.getOperand(1).isImm() &&
- MI.getOperand(1).getImm() != 8) {
- Info = "applying IT instruction to more than one subsequent instruction is "
- "deprecated";
- return true;
- }
-
- return false;
-}
-
static bool getARMStoreDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI,
std::string &Info) {
assert(!STI.getFeatureBits()[llvm::ARM::ModeThumb] &&
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index 5c8f9bfdca08..e0c992f4fae2 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -71,13 +71,13 @@ MCTargetStreamer *createARMTargetAsmStreamer(MCStreamer &S,
bool isVerboseAsm);
MCTargetStreamer *createARMObjectTargetStreamer(MCStreamer &S,
const MCSubtargetInfo &STI);
+MCTargetStreamer *createARMObjectTargetELFStreamer(MCStreamer &S);
+MCTargetStreamer *createARMObjectTargetWinCOFFStreamer(MCStreamer &S);
MCCodeEmitter *createARMLEMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx);
MCCodeEmitter *createARMBEMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx);
MCAsmBackend *createARMLEAsmBackend(const Target &T, const MCSubtargetInfo &STI,
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index ed4000c7e5be..0ea51839824b 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -9,6 +9,7 @@
#include "MCTargetDesc/ARMBaseInfo.h"
#include "MCTargetDesc/ARMFixupKinds.h"
#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/MC/MCAsmLayout.h"
@@ -21,7 +22,6 @@
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ScopedPrinter.h"
using namespace llvm;
@@ -149,7 +149,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
if (FixupOffset & 0xff000000) {
Asm.getContext().reportError(Fixup.getLoc(),
"can not encode offset '0x" +
- to_hexString(FixupOffset) +
+ utohexstr(FixupOffset) +
"' in resulting scattered relocation.");
return;
}
@@ -264,7 +264,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
if (FixupOffset & 0xff000000) {
Asm.getContext().reportError(Fixup.getLoc(),
"can not encode offset '0x" +
- to_hexString(FixupOffset) +
+ utohexstr(FixupOffset) +
"' in resulting scattered relocation.");
return;
}
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index 02a2d01176fc..16d1ae62053e 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -114,15 +114,28 @@ void ARMTargetStreamer::emitArchExtension(uint64_t ArchExt) {}
void ARMTargetStreamer::emitObjectArch(ARM::ArchKind Arch) {}
void ARMTargetStreamer::emitFPU(unsigned FPU) {}
void ARMTargetStreamer::finishAttributeSection() {}
-void
-ARMTargetStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) {}
+void ARMTargetStreamer::annotateTLSDescriptorSequence(
+ const MCSymbolRefExpr *SRE) {}
void ARMTargetStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) {}
+void ARMTargetStreamer::emitARMWinCFIAllocStack(unsigned Size, bool Wide) {}
+void ARMTargetStreamer::emitARMWinCFISaveRegMask(unsigned Mask, bool Wide) {}
+void ARMTargetStreamer::emitARMWinCFISaveSP(unsigned Reg) {}
+void ARMTargetStreamer::emitARMWinCFISaveFRegs(unsigned First, unsigned Last) {}
+void ARMTargetStreamer::emitARMWinCFISaveLR(unsigned Offset) {}
+void ARMTargetStreamer::emitARMWinCFINop(bool Wide) {}
+void ARMTargetStreamer::emitARMWinCFIPrologEnd(bool Fragment) {}
+void ARMTargetStreamer::emitARMWinCFIEpilogStart(unsigned Condition) {}
+void ARMTargetStreamer::emitARMWinCFIEpilogEnd() {}
+void ARMTargetStreamer::emitARMWinCFICustom(unsigned Opcode) {}
+
static ARMBuildAttrs::CPUArch getArchForCPU(const MCSubtargetInfo &STI) {
if (STI.getCPU() == "xscale")
return ARMBuildAttrs::v5TEJ;
- if (STI.hasFeature(ARM::HasV8Ops)) {
+ if (STI.hasFeature(ARM::HasV9_0aOps))
+ return ARMBuildAttrs::v9_A;
+ else if (STI.hasFeature(ARM::HasV8Ops)) {
if (STI.hasFeature(ARM::FeatureRClass))
return ARMBuildAttrs::v8_R;
return ARMBuildAttrs::v8_A;
@@ -305,3 +318,13 @@ void ARMTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) {
emitAttribute(ARMBuildAttrs::BTI_extension, ARMBuildAttrs::AllowBTI);
}
}
+
+MCTargetStreamer *
+llvm::createARMObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
+ const Triple &TT = STI.getTargetTriple();
+ if (TT.isOSBinFormatELF())
+ return createARMObjectTargetELFStreamer(S);
+ if (TT.isOSBinFormatCOFF())
+ return createARMObjectTargetWinCOFFStreamer(S);
+ return new ARMTargetStreamer(S);
+}
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
index e6f649164a29..cdd7f6fb715a 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
@@ -8,30 +8,59 @@
#include "ARMMCTargetDesc.h"
#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCWin64EH.h"
#include "llvm/MC/MCWinCOFFStreamer.h"
using namespace llvm;
namespace {
class ARMWinCOFFStreamer : public MCWinCOFFStreamer {
+ Win64EH::ARMUnwindEmitter EHStreamer;
+
public:
ARMWinCOFFStreamer(MCContext &C, std::unique_ptr<MCAsmBackend> AB,
std::unique_ptr<MCCodeEmitter> CE,
std::unique_ptr<MCObjectWriter> OW)
: MCWinCOFFStreamer(C, std::move(AB), std::move(CE), std::move(OW)) {}
+ void emitWinEHHandlerData(SMLoc Loc) override;
+ void emitWindowsUnwindTables() override;
+ void emitWindowsUnwindTables(WinEH::FrameInfo *Frame) override;
+
void emitThumbFunc(MCSymbol *Symbol) override;
void finishImpl() override;
};
+void ARMWinCOFFStreamer::emitWinEHHandlerData(SMLoc Loc) {
+ MCStreamer::emitWinEHHandlerData(Loc);
+
+ // We have to emit the unwind info now, because this directive
+ // actually switches to the .xdata section!
+ EHStreamer.EmitUnwindInfo(*this, getCurrentWinFrameInfo(),
+ /* HandlerData = */ true);
+}
+
+void ARMWinCOFFStreamer::emitWindowsUnwindTables(WinEH::FrameInfo *Frame) {
+ EHStreamer.EmitUnwindInfo(*this, Frame, /* HandlerData = */ false);
+}
+
+void ARMWinCOFFStreamer::emitWindowsUnwindTables() {
+ if (!getNumWinFrameInfos())
+ return;
+ EHStreamer.Emit(*this);
+}
+
void ARMWinCOFFStreamer::emitThumbFunc(MCSymbol *Symbol) {
getAssembler().setIsThumbFunc(Symbol);
}
void ARMWinCOFFStreamer::finishImpl() {
emitFrames(nullptr);
+ emitWindowsUnwindTables();
MCWinCOFFStreamer::finishImpl();
}
@@ -48,3 +77,201 @@ MCStreamer *llvm::createARMWinCOFFStreamer(
return S;
}
+namespace {
+class ARMTargetWinCOFFStreamer : public llvm::ARMTargetStreamer {
+private:
+ // True if we are processing SEH directives in an epilogue.
+ bool InEpilogCFI = false;
+
+ // Symbol of the current epilog for which we are processing SEH directives.
+ MCSymbol *CurrentEpilog = nullptr;
+
+public:
+ ARMTargetWinCOFFStreamer(llvm::MCStreamer &S) : ARMTargetStreamer(S) {}
+
+ // The unwind codes on ARM Windows are documented at
+ // https://docs.microsoft.com/en-us/cpp/build/arm-exception-handling
+ void emitARMWinCFIAllocStack(unsigned Size, bool Wide) override;
+ void emitARMWinCFISaveRegMask(unsigned Mask, bool Wide) override;
+ void emitARMWinCFISaveSP(unsigned Reg) override;
+ void emitARMWinCFISaveFRegs(unsigned First, unsigned Last) override;
+ void emitARMWinCFISaveLR(unsigned Offset) override;
+ void emitARMWinCFIPrologEnd(bool Fragment) override;
+ void emitARMWinCFINop(bool Wide) override;
+ void emitARMWinCFIEpilogStart(unsigned Condition) override;
+ void emitARMWinCFIEpilogEnd() override;
+ void emitARMWinCFICustom(unsigned Opcode) override;
+
+private:
+ void emitARMWinUnwindCode(unsigned UnwindCode, int Reg, int Offset);
+};
+
+// Helper function to common out unwind code setup for those codes that can
+// belong to both prolog and epilog.
+void ARMTargetWinCOFFStreamer::emitARMWinUnwindCode(unsigned UnwindCode,
+ int Reg, int Offset) {
+ auto &S = getStreamer();
+ WinEH::FrameInfo *CurFrame = S.EnsureValidWinFrameInfo(SMLoc());
+ if (!CurFrame)
+ return;
+ MCSymbol *Label = S.emitCFILabel();
+ auto Inst = WinEH::Instruction(UnwindCode, Label, Reg, Offset);
+ if (InEpilogCFI)
+ CurFrame->EpilogMap[CurrentEpilog].Instructions.push_back(Inst);
+ else
+ CurFrame->Instructions.push_back(Inst);
+}
+
+void ARMTargetWinCOFFStreamer::emitARMWinCFIAllocStack(unsigned Size,
+ bool Wide) {
+ unsigned Op = Win64EH::UOP_AllocSmall;
+ if (!Wide) {
+ if (Size / 4 > 0xffff)
+ Op = Win64EH::UOP_AllocHuge;
+ else if (Size / 4 > 0x7f)
+ Op = Win64EH::UOP_AllocLarge;
+ } else {
+ Op = Win64EH::UOP_WideAllocMedium;
+ if (Size / 4 > 0xffff)
+ Op = Win64EH::UOP_WideAllocHuge;
+ else if (Size / 4 > 0x3ff)
+ Op = Win64EH::UOP_WideAllocLarge;
+ }
+ emitARMWinUnwindCode(Op, -1, Size);
+}
+
+void ARMTargetWinCOFFStreamer::emitARMWinCFISaveRegMask(unsigned Mask,
+ bool Wide) {
+ assert(Mask != 0);
+ int Lr = (Mask & 0x4000) ? 1 : 0;
+ Mask &= ~0x4000;
+ if (Wide)
+ assert((Mask & ~0x1fff) == 0);
+ else
+ assert((Mask & ~0x00ff) == 0);
+ if (Mask && ((Mask + (1 << 4)) & Mask) == 0) {
+ if (Wide && (Mask & 0x1000) == 0 && (Mask & 0xff) == 0xf0) {
+ // One continuous range from r4 to r8-r11
+ for (int I = 11; I >= 8; I--) {
+ if (Mask & (1 << I)) {
+ emitARMWinUnwindCode(Win64EH::UOP_WideSaveRegsR4R11LR, I, Lr);
+ return;
+ }
+ }
+ // If it actually was from r4 to r4-r7, continue below.
+ } else if (!Wide) {
+ // One continuous range from r4 to r4-r7
+ for (int I = 7; I >= 4; I--) {
+ if (Mask & (1 << I)) {
+ emitARMWinUnwindCode(Win64EH::UOP_SaveRegsR4R7LR, I, Lr);
+ return;
+ }
+ }
+ llvm_unreachable("logic error");
+ }
+ }
+ Mask |= Lr << 14;
+ if (Wide)
+ emitARMWinUnwindCode(Win64EH::UOP_WideSaveRegMask, Mask, 0);
+ else
+ emitARMWinUnwindCode(Win64EH::UOP_SaveRegMask, Mask, 0);
+}
+
+void ARMTargetWinCOFFStreamer::emitARMWinCFISaveSP(unsigned Reg) {
+ emitARMWinUnwindCode(Win64EH::UOP_SaveSP, Reg, 0);
+}
+
+void ARMTargetWinCOFFStreamer::emitARMWinCFISaveFRegs(unsigned First,
+ unsigned Last) {
+ assert(First <= Last);
+ assert(First >= 16 || Last < 16);
+ assert(First <= 31 && Last <= 31);
+ if (First == 8)
+ emitARMWinUnwindCode(Win64EH::UOP_SaveFRegD8D15, Last, 0);
+ else if (First <= 15)
+ emitARMWinUnwindCode(Win64EH::UOP_SaveFRegD0D15, First, Last);
+ else
+ emitARMWinUnwindCode(Win64EH::UOP_SaveFRegD16D31, First, Last);
+}
+
+void ARMTargetWinCOFFStreamer::emitARMWinCFISaveLR(unsigned Offset) {
+ emitARMWinUnwindCode(Win64EH::UOP_SaveLR, 0, Offset);
+}
+
+void ARMTargetWinCOFFStreamer::emitARMWinCFINop(bool Wide) {
+ if (Wide)
+ emitARMWinUnwindCode(Win64EH::UOP_WideNop, -1, 0);
+ else
+ emitARMWinUnwindCode(Win64EH::UOP_Nop, -1, 0);
+}
+
+void ARMTargetWinCOFFStreamer::emitARMWinCFIPrologEnd(bool Fragment) {
+ auto &S = getStreamer();
+ WinEH::FrameInfo *CurFrame = S.EnsureValidWinFrameInfo(SMLoc());
+ if (!CurFrame)
+ return;
+
+ MCSymbol *Label = S.emitCFILabel();
+ CurFrame->PrologEnd = Label;
+ WinEH::Instruction Inst =
+ WinEH::Instruction(Win64EH::UOP_End, /*Label=*/nullptr, -1, 0);
+ auto it = CurFrame->Instructions.begin();
+ CurFrame->Instructions.insert(it, Inst);
+ CurFrame->Fragment = Fragment;
+}
+
+void ARMTargetWinCOFFStreamer::emitARMWinCFIEpilogStart(unsigned Condition) {
+ auto &S = getStreamer();
+ WinEH::FrameInfo *CurFrame = S.EnsureValidWinFrameInfo(SMLoc());
+ if (!CurFrame)
+ return;
+
+ InEpilogCFI = true;
+ CurrentEpilog = S.emitCFILabel();
+ CurFrame->EpilogMap[CurrentEpilog].Condition = Condition;
+}
+
+void ARMTargetWinCOFFStreamer::emitARMWinCFIEpilogEnd() {
+ auto &S = getStreamer();
+ WinEH::FrameInfo *CurFrame = S.EnsureValidWinFrameInfo(SMLoc());
+ if (!CurFrame)
+ return;
+
+ if (!CurrentEpilog) {
+ S.getContext().reportError(SMLoc(), "Stray .seh_endepilogue in " +
+ CurFrame->Function->getName());
+ return;
+ }
+
+ std::vector<WinEH::Instruction> &Epilog =
+ CurFrame->EpilogMap[CurrentEpilog].Instructions;
+
+ unsigned UnwindCode = Win64EH::UOP_End;
+ if (!Epilog.empty()) {
+ WinEH::Instruction EndInstr = Epilog.back();
+ if (EndInstr.Operation == Win64EH::UOP_Nop) {
+ UnwindCode = Win64EH::UOP_EndNop;
+ Epilog.pop_back();
+ } else if (EndInstr.Operation == Win64EH::UOP_WideNop) {
+ UnwindCode = Win64EH::UOP_WideEndNop;
+ Epilog.pop_back();
+ }
+ }
+
+ InEpilogCFI = false;
+ WinEH::Instruction Inst = WinEH::Instruction(UnwindCode, nullptr, -1, 0);
+ CurFrame->EpilogMap[CurrentEpilog].Instructions.push_back(Inst);
+ MCSymbol *Label = S.emitCFILabel();
+ CurFrame->EpilogMap[CurrentEpilog].End = Label;
+ CurrentEpilog = nullptr;
+}
+
+void ARMTargetWinCOFFStreamer::emitARMWinCFICustom(unsigned Opcode) {
+ emitARMWinUnwindCode(Win64EH::UOP_Custom, 0, Opcode);
+}
+
+} // end anonymous namespace
+
+MCTargetStreamer *llvm::createARMObjectTargetWinCOFFStreamer(MCStreamer &S) {
+ return new ARMTargetWinCOFFStreamer(S);
+}
diff --git a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
index cfd275bc0621..30785340ef12 100644
--- a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
+++ b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
@@ -145,7 +145,8 @@ private:
// Optimise the base and offsets of the given address
bool optimiseAddress(Value *Address, BasicBlock *BB, LoopInfo *LI);
// Try to fold consecutive geps together into one
- Value *foldGEP(GetElementPtrInst *GEP, Value *&Offsets, IRBuilder<> &Builder);
+ Value *foldGEP(GetElementPtrInst *GEP, Value *&Offsets, unsigned &Scale,
+ IRBuilder<> &Builder);
// Check whether these offsets could be moved out of the loop they're in
bool optimiseOffsets(Value *Offsets, BasicBlock *BB, LoopInfo *LI);
// Pushes the given add out of the loop
@@ -390,7 +391,7 @@ MVEGatherScatterLowering::getVarAndConst(Value *Inst, int TypeScale) {
return ReturnFalse;
// Check that the constant is small enough for an incrementing gather
- int64_t Immediate = Const.getValue() << TypeScale;
+ int64_t Immediate = *Const << TypeScale;
if (Immediate > 512 || Immediate < -512 || Immediate % 4 != 0)
return ReturnFalse;
@@ -964,7 +965,7 @@ static bool hasAllGatScatUsers(Instruction *I, const DataLayout &DL) {
bool MVEGatherScatterLowering::optimiseOffsets(Value *Offsets, BasicBlock *BB,
LoopInfo *LI) {
- LLVM_DEBUG(dbgs() << "masked gathers/scatters: trying to optimize\n"
+ LLVM_DEBUG(dbgs() << "masked gathers/scatters: trying to optimize: "
<< *Offsets << "\n");
// Optimise the addresses of gathers/scatters by moving invariant
// calculations out of the loop
@@ -1103,8 +1104,8 @@ bool MVEGatherScatterLowering::optimiseOffsets(Value *Offsets, BasicBlock *BB,
return true;
}
-static Value *CheckAndCreateOffsetAdd(Value *X, Value *Y, Value *GEP,
- IRBuilder<> &Builder) {
+static Value *CheckAndCreateOffsetAdd(Value *X, unsigned ScaleX, Value *Y,
+ unsigned ScaleY, IRBuilder<> &Builder) {
// Splat the non-vector value to a vector of the given type - if the value is
// a constant (and its value isn't too big), we can even use this opportunity
// to scale it to the size of the vector elements
@@ -1156,40 +1157,49 @@ static Value *CheckAndCreateOffsetAdd(Value *X, Value *Y, Value *GEP,
ConstantInt *ConstYEl =
dyn_cast<ConstantInt>(ConstY->getAggregateElement(i));
if (!ConstXEl || !ConstYEl ||
- ConstXEl->getZExtValue() + ConstYEl->getZExtValue() >=
+ ConstXEl->getZExtValue() * ScaleX +
+ ConstYEl->getZExtValue() * ScaleY >=
(unsigned)(1 << (TargetElemSize - 1)))
return nullptr;
}
}
- Value *Add = Builder.CreateAdd(X, Y);
+ Value *XScale = Builder.CreateVectorSplat(
+ XElType->getNumElements(),
+ Builder.getIntN(XElType->getScalarSizeInBits(), ScaleX));
+ Value *YScale = Builder.CreateVectorSplat(
+ YElType->getNumElements(),
+ Builder.getIntN(YElType->getScalarSizeInBits(), ScaleY));
+ Value *Add = Builder.CreateAdd(Builder.CreateMul(X, XScale),
+ Builder.CreateMul(Y, YScale));
- FixedVectorType *GEPType = cast<FixedVectorType>(GEP->getType());
- if (checkOffsetSize(Add, GEPType->getNumElements()))
+ if (checkOffsetSize(Add, XElType->getNumElements()))
return Add;
else
return nullptr;
}
Value *MVEGatherScatterLowering::foldGEP(GetElementPtrInst *GEP,
- Value *&Offsets,
+ Value *&Offsets, unsigned &Scale,
IRBuilder<> &Builder) {
Value *GEPPtr = GEP->getPointerOperand();
Offsets = GEP->getOperand(1);
+ Scale = DL->getTypeAllocSize(GEP->getSourceElementType());
// We only merge geps with constant offsets, because only for those
// we can make sure that we do not cause an overflow
- if (!isa<Constant>(Offsets))
+ if (GEP->getNumIndices() != 1 || !isa<Constant>(Offsets))
return nullptr;
- GetElementPtrInst *BaseGEP;
- if ((BaseGEP = dyn_cast<GetElementPtrInst>(GEPPtr))) {
+ if (GetElementPtrInst *BaseGEP = dyn_cast<GetElementPtrInst>(GEPPtr)) {
// Merge the two geps into one
- Value *BaseBasePtr = foldGEP(BaseGEP, Offsets, Builder);
+ Value *BaseBasePtr = foldGEP(BaseGEP, Offsets, Scale, Builder);
if (!BaseBasePtr)
return nullptr;
- Offsets =
- CheckAndCreateOffsetAdd(Offsets, GEP->getOperand(1), GEP, Builder);
+ Offsets = CheckAndCreateOffsetAdd(
+ Offsets, Scale, GEP->getOperand(1),
+ DL->getTypeAllocSize(GEP->getSourceElementType()), Builder);
if (Offsets == nullptr)
return nullptr;
+ Scale = 1; // Scale is always an i8 at this point.
return BaseBasePtr;
}
return GEPPtr;
@@ -1206,15 +1216,24 @@ bool MVEGatherScatterLowering::optimiseAddress(Value *Address, BasicBlock *BB,
Builder.SetInsertPoint(GEP);
Builder.SetCurrentDebugLocation(GEP->getDebugLoc());
Value *Offsets;
- Value *Base = foldGEP(GEP, Offsets, Builder);
+ unsigned Scale;
+ Value *Base = foldGEP(GEP, Offsets, Scale, Builder);
// We only want to merge the geps if there is a real chance that they can be
// used by an MVE gather; thus the offset has to have the correct size
// (always i32 if it is not of vector type) and the base has to be a
// pointer.
if (Offsets && Base && Base != GEP) {
+ assert(Scale == 1 && "Expected to fold GEP to a scale of 1");
+ Type *BaseTy = Builder.getInt8PtrTy();
+ if (auto *VecTy = dyn_cast<FixedVectorType>(Base->getType()))
+ BaseTy = FixedVectorType::get(BaseTy, VecTy);
GetElementPtrInst *NewAddress = GetElementPtrInst::Create(
- GEP->getSourceElementType(), Base, Offsets, "gep.merged", GEP);
- GEP->replaceAllUsesWith(NewAddress);
+ Builder.getInt8Ty(), Builder.CreateBitCast(Base, BaseTy), Offsets,
+ "gep.merged", GEP);
+ LLVM_DEBUG(dbgs() << "Folded GEP: " << *GEP
+ << "\n new : " << *NewAddress << "\n");
+ GEP->replaceAllUsesWith(
+ Builder.CreateBitCast(NewAddress, GEP->getType()));
GEP = NewAddress;
Changed = true;
}
diff --git a/llvm/lib/Target/ARM/MVELaneInterleavingPass.cpp b/llvm/lib/Target/ARM/MVELaneInterleavingPass.cpp
index 538bd10685b0..3e76efb5133f 100644
--- a/llvm/lib/Target/ARM/MVELaneInterleavingPass.cpp
+++ b/llvm/lib/Target/ARM/MVELaneInterleavingPass.cpp
@@ -45,6 +45,7 @@
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMSubtarget.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -176,9 +177,8 @@ static bool tryInterleave(Instruction *Start,
// Truncs
case Instruction::Trunc:
case Instruction::FPTrunc:
- if (Truncs.count(I))
+ if (!Truncs.insert(I))
continue;
- Truncs.insert(I);
Visited.insert(I);
break;
@@ -235,9 +235,8 @@ static bool tryInterleave(Instruction *Start,
case Instruction::FAdd:
case Instruction::FMul:
case Instruction::Select:
- if (Ops.count(I))
+ if (!Ops.insert(I))
continue;
- Ops.insert(I);
for (Use &Op : I->operands()) {
if (!isa<FixedVectorType>(Op->getType()))
diff --git a/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp b/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp
index 7e31ea77f4f5..6bad9d61238e 100644
--- a/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp
+++ b/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp
@@ -404,6 +404,17 @@ bool MVETPAndVPTOptimisations::MergeLoopEnd(MachineLoop *ML) {
LoopPhi->getOperand(3).setReg(DecReg);
}
+ SmallVector<MachineOperand, 4> Cond; // For analyzeBranch.
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For analyzeBranch.
+ if (!TII->analyzeBranch(*LoopEnd->getParent(), TBB, FBB, Cond) && !FBB) {
+ // If the LoopEnd falls through, need to insert a t2B to the fall-through
+ // block so that the non-analyzable t2LoopEndDec doesn't fall through.
+ MachineFunction::iterator MBBI = ++LoopEnd->getParent()->getIterator();
+ BuildMI(LoopEnd->getParent(), DebugLoc(), TII->get(ARM::t2B))
+ .addMBB(&*MBBI)
+ .add(predOps(ARMCC::AL));
+ }
+
// Replace the loop dec and loop end as a single instruction.
MachineInstrBuilder MI =
BuildMI(*LoopEnd->getParent(), *LoopEnd, LoopEnd->getDebugLoc(),
@@ -1041,8 +1052,7 @@ bool MVETPAndVPTOptimisations::HintDoLoopStartReg(MachineBasicBlock &MBB) {
}
bool MVETPAndVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) {
- const ARMSubtarget &STI =
- static_cast<const ARMSubtarget &>(Fn.getSubtarget());
+ const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>();
if (!STI.isThumb2() || !STI.hasLOB())
return false;
diff --git a/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp b/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp
index c7f451cba14f..d6d43b9143d6 100644
--- a/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp
+++ b/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp
@@ -312,8 +312,7 @@ bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) {
}
bool MVEVPTBlock::runOnMachineFunction(MachineFunction &Fn) {
- const ARMSubtarget &STI =
- static_cast<const ARMSubtarget &>(Fn.getSubtarget());
+ const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>();
if (!STI.isThumb2() || !STI.hasMVEIntegerOps())
return false;
diff --git a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
index 71a82a1e3271..df64710712cc 100644
--- a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -176,7 +176,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
// Determine the sizes of each callee-save spill areas and record which frame
// belongs to which callee-save spill areas.
- unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
+ unsigned FRSize = 0, GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
int FramePtrSpillFI = 0;
if (ArgRegsSaveSize) {
@@ -205,26 +205,38 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
return;
}
+ bool HasFrameRecordArea = hasFP(MF) && ARM::hGPRRegClass.contains(FramePtr);
+
for (const CalleeSavedInfo &I : CSI) {
Register Reg = I.getReg();
int FI = I.getFrameIdx();
+ if (Reg == FramePtr)
+ FramePtrSpillFI = FI;
switch (Reg) {
+ case ARM::R11:
+ if (HasFrameRecordArea) {
+ FRSize += 4;
+ break;
+ }
+ LLVM_FALLTHROUGH;
case ARM::R8:
case ARM::R9:
case ARM::R10:
- case ARM::R11:
if (STI.splitFramePushPop(MF)) {
GPRCS2Size += 4;
break;
}
LLVM_FALLTHROUGH;
+ case ARM::LR:
+ if (HasFrameRecordArea) {
+ FRSize += 4;
+ break;
+ }
+ LLVM_FALLTHROUGH;
case ARM::R4:
case ARM::R5:
case ARM::R6:
case ARM::R7:
- case ARM::LR:
- if (Reg == FramePtr)
- FramePtrSpillFI = FI;
GPRCS1Size += 4;
break;
default:
@@ -232,18 +244,53 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
}
}
+ MachineBasicBlock::iterator FRPush, GPRCS1Push, GPRCS2Push;
+ if (HasFrameRecordArea) {
+ // Skip Frame Record setup:
+ // push {lr}
+ // mov lr, r11
+ // push {lr}
+ std::advance(MBBI, 2);
+ FRPush = MBBI++;
+ }
+
if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
+ GPRCS1Push = MBBI;
++MBBI;
}
+ // Find last push instruction for GPRCS2 - spilling of high registers
+ // (r8-r11) could consist of multiple tPUSH and tMOVr instructions.
+ while (true) {
+ MachineBasicBlock::iterator OldMBBI = MBBI;
+ // Skip a run of tMOVr instructions
+ while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr &&
+ MBBI->getFlag(MachineInstr::FrameSetup))
+ MBBI++;
+ if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH &&
+ MBBI->getFlag(MachineInstr::FrameSetup)) {
+ GPRCS2Push = MBBI;
+ MBBI++;
+ } else {
+ // We have reached an instruction which is not a push, so the previous
+ // run of tMOVr instructions (which may have been empty) was not part of
+ // the prologue. Reset MBBI back to the last PUSH of the prologue.
+ MBBI = OldMBBI;
+ break;
+ }
+ }
+
// Determine starting offsets of spill areas.
- unsigned DPRCSOffset = NumBytes - ArgRegsSaveSize - (GPRCS1Size + GPRCS2Size + DPRCSSize);
+ unsigned DPRCSOffset = NumBytes - ArgRegsSaveSize -
+ (FRSize + GPRCS1Size + GPRCS2Size + DPRCSSize);
unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
bool HasFP = hasFP(MF);
if (HasFP)
AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
NumBytes);
+ if (HasFrameRecordArea)
+ AFI->setFrameRecordSavedAreaSize(FRSize);
AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
@@ -252,71 +299,45 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
int FramePtrOffsetInBlock = 0;
unsigned adjustedGPRCS1Size = GPRCS1Size;
if (GPRCS1Size > 0 && GPRCS2Size == 0 &&
- tryFoldSPUpdateIntoPushPop(STI, MF, &*std::prev(MBBI), NumBytes)) {
+ tryFoldSPUpdateIntoPushPop(STI, MF, &*(GPRCS1Push), NumBytes)) {
FramePtrOffsetInBlock = NumBytes;
adjustedGPRCS1Size += NumBytes;
NumBytes = 0;
}
-
- if (adjustedGPRCS1Size) {
- CFAOffset += adjustedGPRCS1Size;
- unsigned CFIIndex =
- MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset));
- BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex)
- .setMIFlags(MachineInstr::FrameSetup);
- }
- for (const CalleeSavedInfo &I : CSI) {
- Register Reg = I.getReg();
- int FI = I.getFrameIdx();
- switch (Reg) {
- case ARM::R8:
- case ARM::R9:
- case ARM::R10:
- case ARM::R11:
- case ARM::R12:
- if (STI.splitFramePushPop(MF))
- break;
- LLVM_FALLTHROUGH;
- case ARM::R0:
- case ARM::R1:
- case ARM::R2:
- case ARM::R3:
- case ARM::R4:
- case ARM::R5:
- case ARM::R6:
- case ARM::R7:
- case ARM::LR:
- unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
- nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
- BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex)
- .setMIFlags(MachineInstr::FrameSetup);
- break;
- }
- }
+ CFAOffset += adjustedGPRCS1Size;
// Adjust FP so it point to the stack slot that contains the previous FP.
if (HasFP) {
- FramePtrOffsetInBlock +=
- MFI.getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize;
- BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
- .addReg(ARM::SP)
- .addImm(FramePtrOffsetInBlock / 4)
- .setMIFlags(MachineInstr::FrameSetup)
- .add(predOps(ARMCC::AL));
+ MachineBasicBlock::iterator AfterPush =
+ HasFrameRecordArea ? std::next(FRPush) : std::next(GPRCS1Push);
+ if (HasFrameRecordArea) {
+ // We have just finished pushing the previous FP into the stack,
+ // so simply capture the SP value as the new Frame Pointer.
+ BuildMI(MBB, AfterPush, dl, TII.get(ARM::tMOVr), FramePtr)
+ .addReg(ARM::SP)
+ .setMIFlags(MachineInstr::FrameSetup)
+ .add(predOps(ARMCC::AL));
+ } else {
+ FramePtrOffsetInBlock +=
+ MFI.getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize;
+ BuildMI(MBB, AfterPush, dl, TII.get(ARM::tADDrSPi), FramePtr)
+ .addReg(ARM::SP)
+ .addImm(FramePtrOffsetInBlock / 4)
+ .setMIFlags(MachineInstr::FrameSetup)
+ .add(predOps(ARMCC::AL));
+ }
+
if(FramePtrOffsetInBlock) {
- CFAOffset -= FramePtrOffsetInBlock;
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
- nullptr, MRI->getDwarfRegNum(FramePtr, true), CFAOffset));
- BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ nullptr, MRI->getDwarfRegNum(FramePtr, true), (CFAOffset - FramePtrOffsetInBlock)));
+ BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
} else {
unsigned CFIIndex =
MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(
nullptr, MRI->getDwarfRegNum(FramePtr, true)));
- BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
}
@@ -326,45 +347,69 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
AFI->setShouldRestoreSPFromFP(true);
}
- // Skip past the spilling of r8-r11, which could consist of multiple tPUSH
- // and tMOVr instructions. We don't need to add any call frame information
- // in-between these instructions, because they do not modify the high
- // registers.
- while (true) {
- MachineBasicBlock::iterator OldMBBI = MBBI;
- // Skip a run of tMOVr instructions
- while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr)
- MBBI++;
- if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
- MBBI++;
- } else {
- // We have reached an instruction which is not a push, so the previous
- // run of tMOVr instructions (which may have been empty) was not part of
- // the prologue. Reset MBBI back to the last PUSH of the prologue.
- MBBI = OldMBBI;
- break;
+ // Emit call frame information for the callee-saved low registers.
+ if (GPRCS1Size > 0) {
+ MachineBasicBlock::iterator Pos = std::next(GPRCS1Push);
+ if (adjustedGPRCS1Size) {
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset));
+ BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+ for (const CalleeSavedInfo &I : CSI) {
+ Register Reg = I.getReg();
+ int FI = I.getFrameIdx();
+ switch (Reg) {
+ case ARM::R8:
+ case ARM::R9:
+ case ARM::R10:
+ case ARM::R11:
+ case ARM::R12:
+ if (STI.splitFramePushPop(MF))
+ break;
+ LLVM_FALLTHROUGH;
+ case ARM::R0:
+ case ARM::R1:
+ case ARM::R2:
+ case ARM::R3:
+ case ARM::R4:
+ case ARM::R5:
+ case ARM::R6:
+ case ARM::R7:
+ case ARM::LR:
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
+ nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
+ BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ break;
+ }
}
}
// Emit call frame information for the callee-saved high registers.
- for (auto &I : CSI) {
- Register Reg = I.getReg();
- int FI = I.getFrameIdx();
- switch (Reg) {
- case ARM::R8:
- case ARM::R9:
- case ARM::R10:
- case ARM::R11:
- case ARM::R12: {
- unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
- nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
- BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex)
- .setMIFlags(MachineInstr::FrameSetup);
- break;
- }
- default:
- break;
+ if (GPRCS2Size > 0) {
+ MachineBasicBlock::iterator Pos = std::next(GPRCS2Push);
+ for (auto &I : CSI) {
+ Register Reg = I.getReg();
+ int FI = I.getFrameIdx();
+ switch (Reg) {
+ case ARM::R8:
+ case ARM::R9:
+ case ARM::R10:
+ case ARM::R11:
+ case ARM::R12: {
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
+ nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
+ BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ break;
+ }
+ default:
+ break;
+ }
}
}
@@ -453,21 +498,6 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
MF.getProperties().reset(MachineFunctionProperties::Property::NoVRegs);
}
-static bool isCSRestore(MachineInstr &MI, const MCPhysReg *CSRegs) {
- if (MI.getOpcode() == ARM::tLDRspi && MI.getOperand(1).isFI() &&
- isCalleeSavedRegister(MI.getOperand(0).getReg(), CSRegs))
- return true;
- else if (MI.getOpcode() == ARM::tPOP) {
- return true;
- } else if (MI.getOpcode() == ARM::tMOVr) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- return ((ARM::tGPRRegClass.contains(Src) || Src == ARM::LR) &&
- ARM::hGPRRegClass.contains(Dst));
- }
- return false;
-}
-
void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
@@ -483,26 +513,26 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
int NumBytes = (int)MFI.getStackSize();
assert((unsigned)NumBytes >= ArgRegsSaveSize &&
"ArgRegsSaveSize is included in NumBytes");
- const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
Register FramePtr = RegInfo->getFrameRegister(MF);
if (!AFI->hasStackFrame()) {
if (NumBytes - ArgRegsSaveSize != 0)
emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo,
NumBytes - ArgRegsSaveSize, ARM::NoRegister,
- MachineInstr::NoFlags);
+ MachineInstr::FrameDestroy);
} else {
// Unwind MBBI to point to first LDR / VLDRD.
if (MBBI != MBB.begin()) {
do
--MBBI;
- while (MBBI != MBB.begin() && isCSRestore(*MBBI, CSRegs));
- if (!isCSRestore(*MBBI, CSRegs))
+ while (MBBI != MBB.begin() && MBBI->getFlag(MachineInstr::FrameDestroy));
+ if (!MBBI->getFlag(MachineInstr::FrameDestroy))
++MBBI;
}
// Move SP to start of FP callee save spill area.
- NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
+ NumBytes -= (AFI->getFrameRecordSavedAreaSize() +
+ AFI->getGPRCalleeSavedArea1Size() +
AFI->getGPRCalleeSavedArea2Size() +
AFI->getDPRCalleeSavedAreaSize() +
ArgRegsSaveSize);
@@ -516,14 +546,16 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
"No scratch register to restore SP from FP!");
emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
- TII, *RegInfo);
+ TII, *RegInfo, MachineInstr::FrameDestroy);
BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
.addReg(ARM::R4)
- .add(predOps(ARMCC::AL));
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::FrameDestroy);
} else
BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
.addReg(FramePtr)
- .add(predOps(ARMCC::AL));
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::FrameDestroy);
} else {
// For a large stack frame, we might need a scratch register to store
// the size of the frame. We know all callee-save registers are free
@@ -542,10 +574,10 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock::iterator PMBBI = std::prev(MBBI);
if (!tryFoldSPUpdateIntoPushPop(STI, MF, &*PMBBI, NumBytes))
emitPrologueEpilogueSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes,
- ScratchRegister, MachineInstr::NoFlags);
+ ScratchRegister, MachineInstr::FrameDestroy);
} else if (!tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes))
emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes,
- ScratchRegister, MachineInstr::NoFlags);
+ ScratchRegister, MachineInstr::FrameDestroy);
}
}
@@ -637,7 +669,8 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
return true;
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET))
- .add(predOps(ARMCC::AL));
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::FrameDestroy);
// Copy implicit ops and popped registers, if any.
for (auto MO: MBBI->operands())
if (MO.isReg() && (MO.isImplicit() || MO.isDef()))
@@ -725,18 +758,20 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
.addReg(PopReg, RegState::Define)
.addReg(ARM::SP)
.addImm(MBBI->getNumExplicitOperands() - 2)
- .add(predOps(ARMCC::AL));
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::FrameDestroy);
// Move from the temporary register to the LR.
BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
.addReg(ARM::LR, RegState::Define)
.addReg(PopReg, RegState::Kill)
- .add(predOps(ARMCC::AL));
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::FrameDestroy);
// Advance past the pop instruction.
MBBI++;
// Increment the SP.
emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo,
ArgRegsSaveSize + 4, ARM::NoRegister,
- MachineInstr::NoFlags);
+ MachineInstr::FrameDestroy);
return true;
}
@@ -746,7 +781,8 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
.addReg(TemporaryReg, RegState::Define)
.addReg(PopReg, RegState::Kill)
- .add(predOps(ARMCC::AL));
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::FrameDestroy);
}
if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPOP_RET) {
@@ -754,7 +790,8 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
// perform the opposite conversion: tPOP_RET to tPOP.
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP))
- .add(predOps(ARMCC::AL));
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::FrameDestroy);
bool Popped = false;
for (auto MO: MBBI->operands())
if (MO.isReg() && (MO.isImplicit() || MO.isDef()) &&
@@ -769,90 +806,82 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
// Erase the old instruction.
MBB.erase(MBBI);
MBBI = BuildMI(MBB, MBB.end(), dl, TII.get(ARM::tBX_RET))
- .add(predOps(ARMCC::AL));
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::FrameDestroy);
}
assert(PopReg && "Do not know how to get LR");
BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))
.add(predOps(ARMCC::AL))
- .addReg(PopReg, RegState::Define);
+ .addReg(PopReg, RegState::Define)
+ .setMIFlag(MachineInstr::FrameDestroy);
emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize,
- ARM::NoRegister, MachineInstr::NoFlags);
+ ARM::NoRegister, MachineInstr::FrameDestroy);
BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
.addReg(ARM::LR, RegState::Define)
.addReg(PopReg, RegState::Kill)
- .add(predOps(ARMCC::AL));
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::FrameDestroy);
if (TemporaryReg)
BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
.addReg(PopReg, RegState::Define)
.addReg(TemporaryReg, RegState::Kill)
- .add(predOps(ARMCC::AL));
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::FrameDestroy);
return true;
}
-using ARMRegSet = std::bitset<ARM::NUM_TARGET_REGS>;
-
-// Return the first iteraror after CurrentReg which is present in EnabledRegs,
-// or OrderEnd if no further registers are in that set. This does not advance
-// the iterator fiorst, so returns CurrentReg if it is in EnabledRegs.
-static const unsigned *findNextOrderedReg(const unsigned *CurrentReg,
- const ARMRegSet &EnabledRegs,
- const unsigned *OrderEnd) {
- while (CurrentReg != OrderEnd && !EnabledRegs[*CurrentReg])
- ++CurrentReg;
- return CurrentReg;
-}
-
-bool Thumb1FrameLowering::spillCalleeSavedRegisters(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
- if (CSI.empty())
- return false;
-
- DebugLoc DL;
- const TargetInstrInfo &TII = *STI.getInstrInfo();
- MachineFunction &MF = *MBB.getParent();
- const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
- MF.getSubtarget().getRegisterInfo());
-
- ARMRegSet LoRegsToSave; // r0-r7, lr
- ARMRegSet HiRegsToSave; // r8-r11
- ARMRegSet CopyRegs; // Registers which can be used after pushing
- // LoRegs for saving HiRegs.
-
- for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
- Register Reg = I.getReg();
-
+static const SmallVector<Register> OrderedLowRegs = {ARM::R4, ARM::R5, ARM::R6,
+ ARM::R7, ARM::LR};
+static const SmallVector<Register> OrderedHighRegs = {ARM::R8, ARM::R9,
+ ARM::R10, ARM::R11};
+static const SmallVector<Register> OrderedCopyRegs = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4,
+ ARM::R5, ARM::R6, ARM::R7, ARM::LR};
+
+static void splitLowAndHighRegs(const std::set<Register> &Regs,
+ std::set<Register> &LowRegs,
+ std::set<Register> &HighRegs) {
+ for (Register Reg : Regs) {
if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) {
- LoRegsToSave[Reg] = true;
+ LowRegs.insert(Reg);
} else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) {
- HiRegsToSave[Reg] = true;
+ HighRegs.insert(Reg);
} else {
llvm_unreachable("callee-saved register of unexpected class");
}
-
- if ((ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) &&
- !MF.getRegInfo().isLiveIn(Reg) &&
- !(hasFP(MF) && Reg == RegInfo->getFrameRegister(MF)))
- CopyRegs[Reg] = true;
}
+}
- // Unused argument registers can be used for the high register saving.
- for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3})
- if (!MF.getRegInfo().isLiveIn(ArgReg))
- CopyRegs[ArgReg] = true;
+template <typename It>
+It getNextOrderedReg(It OrderedStartIt, It OrderedEndIt,
+ const std::set<Register> &RegSet) {
+ return std::find_if(OrderedStartIt, OrderedEndIt,
+ [&](Register Reg) { return RegSet.count(Reg); });
+}
- // Push the low registers and lr
+static void pushRegsToStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const TargetInstrInfo &TII,
+ const std::set<Register> &RegsToSave,
+ const std::set<Register> &CopyRegs) {
+ MachineFunction &MF = *MBB.getParent();
const MachineRegisterInfo &MRI = MF.getRegInfo();
- if (!LoRegsToSave.none()) {
+ DebugLoc DL;
+
+ std::set<Register> LowRegs, HighRegs;
+ splitLowAndHighRegs(RegsToSave, LowRegs, HighRegs);
+
+ // Push low regs first
+ if (!LowRegs.empty()) {
MachineInstrBuilder MIB =
BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL));
- for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6, ARM::R7, ARM::LR}) {
- if (LoRegsToSave[Reg]) {
+ for (unsigned Reg : OrderedLowRegs) {
+ if (LowRegs.count(Reg)) {
bool isKill = !MRI.isLiveIn(Reg);
if (isKill && !MRI.isReserved(Reg))
MBB.addLiveIn(Reg);
@@ -863,31 +892,26 @@ bool Thumb1FrameLowering::spillCalleeSavedRegisters(
MIB.setMIFlags(MachineInstr::FrameSetup);
}
- // Push the high registers. There are no store instructions that can access
- // these registers directly, so we have to move them to low registers, and
- // push them. This might take multiple pushes, as it is possible for there to
+ // Now push the high registers
+ // There are no store instructions that can access high registers directly,
+ // so we have to move them to low registers, and push them.
+ // This might take multiple pushes, as it is possible for there to
// be fewer low registers available than high registers which need saving.
- // These are in reverse order so that in the case where we need to use
+ // Find the first register to save.
+ // Registers must be processed in reverse order so that in case we need to use
// multiple PUSH instructions, the order of the registers on the stack still
// matches the unwind info. They need to be swicthed back to ascending order
// before adding to the PUSH instruction.
- static const unsigned AllCopyRegs[] = {ARM::LR, ARM::R7, ARM::R6,
- ARM::R5, ARM::R4, ARM::R3,
- ARM::R2, ARM::R1, ARM::R0};
- static const unsigned AllHighRegs[] = {ARM::R11, ARM::R10, ARM::R9, ARM::R8};
+ auto HiRegToSave = getNextOrderedReg(OrderedHighRegs.rbegin(),
+ OrderedHighRegs.rend(),
+ HighRegs);
- const unsigned *AllCopyRegsEnd = std::end(AllCopyRegs);
- const unsigned *AllHighRegsEnd = std::end(AllHighRegs);
-
- // Find the first register to save.
- const unsigned *HiRegToSave = findNextOrderedReg(
- std::begin(AllHighRegs), HiRegsToSave, AllHighRegsEnd);
-
- while (HiRegToSave != AllHighRegsEnd) {
+ while (HiRegToSave != OrderedHighRegs.rend()) {
// Find the first low register to use.
- const unsigned *CopyReg =
- findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd);
+ auto CopyRegIt = getNextOrderedReg(OrderedCopyRegs.rbegin(),
+ OrderedCopyRegs.rend(),
+ CopyRegs);
// Create the PUSH, but don't insert it yet (the MOVs need to come first).
MachineInstrBuilder PushMIB = BuildMI(MF, DL, TII.get(ARM::tPUSH))
@@ -895,25 +919,29 @@ bool Thumb1FrameLowering::spillCalleeSavedRegisters(
.setMIFlags(MachineInstr::FrameSetup);
SmallVector<unsigned, 4> RegsToPush;
- while (HiRegToSave != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) {
- if (HiRegsToSave[*HiRegToSave]) {
+ while (HiRegToSave != OrderedHighRegs.rend() &&
+ CopyRegIt != OrderedCopyRegs.rend()) {
+ if (HighRegs.count(*HiRegToSave)) {
bool isKill = !MRI.isLiveIn(*HiRegToSave);
if (isKill && !MRI.isReserved(*HiRegToSave))
MBB.addLiveIn(*HiRegToSave);
// Emit a MOV from the high reg to the low reg.
BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr))
- .addReg(*CopyReg, RegState::Define)
+ .addReg(*CopyRegIt, RegState::Define)
.addReg(*HiRegToSave, getKillRegState(isKill))
.add(predOps(ARMCC::AL))
.setMIFlags(MachineInstr::FrameSetup);
// Record the register that must be added to the PUSH.
- RegsToPush.push_back(*CopyReg);
-
- CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd);
- HiRegToSave =
- findNextOrderedReg(++HiRegToSave, HiRegsToSave, AllHighRegsEnd);
+ RegsToPush.push_back(*CopyRegIt);
+
+ CopyRegIt = getNextOrderedReg(std::next(CopyRegIt),
+ OrderedCopyRegs.rend(),
+ CopyRegs);
+ HiRegToSave = getNextOrderedReg(std::next(HiRegToSave),
+ OrderedHighRegs.rend(),
+ HighRegs);
}
}
@@ -924,84 +952,63 @@ bool Thumb1FrameLowering::spillCalleeSavedRegisters(
// Insert the PUSH instruction after the MOVs.
MBB.insert(MI, PushMIB);
}
-
- return true;
}
-bool Thumb1FrameLowering::restoreCalleeSavedRegisters(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
- if (CSI.empty())
- return false;
+static void popRegsFromStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MI,
+ const TargetInstrInfo &TII,
+ const std::set<Register> &RegsToRestore,
+ const std::set<Register> &AvailableCopyRegs,
+ bool IsVarArg, bool HasV5Ops) {
+ if (RegsToRestore.empty())
+ return;
MachineFunction &MF = *MBB.getParent();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- const TargetInstrInfo &TII = *STI.getInstrInfo();
- const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
- MF.getSubtarget().getRegisterInfo());
-
- bool isVarArg = AFI->getArgRegsSaveSize() > 0;
DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
- ARMRegSet LoRegsToRestore;
- ARMRegSet HiRegsToRestore;
- // Low registers (r0-r7) which can be used to restore the high registers.
- ARMRegSet CopyRegs;
+ std::set<Register> LowRegs, HighRegs;
+ splitLowAndHighRegs(RegsToRestore, LowRegs, HighRegs);
- for (CalleeSavedInfo I : CSI) {
- Register Reg = I.getReg();
-
- if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) {
- LoRegsToRestore[Reg] = true;
- } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) {
- HiRegsToRestore[Reg] = true;
- } else {
- llvm_unreachable("callee-saved register of unexpected class");
- }
-
- // If this is a low register not used as the frame pointer, we may want to
- // use it for restoring the high registers.
- if ((ARM::tGPRRegClass.contains(Reg)) &&
- !(hasFP(MF) && Reg == RegInfo->getFrameRegister(MF)))
- CopyRegs[Reg] = true;
- }
-
- // If this is a return block, we may be able to use some unused return value
- // registers for restoring the high regs.
- auto Terminator = MBB.getFirstTerminator();
- if (Terminator != MBB.end() && Terminator->getOpcode() == ARM::tBX_RET) {
- CopyRegs[ARM::R0] = true;
- CopyRegs[ARM::R1] = true;
- CopyRegs[ARM::R2] = true;
- CopyRegs[ARM::R3] = true;
- for (auto Op : Terminator->implicit_operands()) {
- if (Op.isReg())
- CopyRegs[Op.getReg()] = false;
- }
- }
-
- static const unsigned AllCopyRegs[] = {ARM::R0, ARM::R1, ARM::R2, ARM::R3,
- ARM::R4, ARM::R5, ARM::R6, ARM::R7};
- static const unsigned AllHighRegs[] = {ARM::R8, ARM::R9, ARM::R10, ARM::R11};
-
- const unsigned *AllCopyRegsEnd = std::end(AllCopyRegs);
- const unsigned *AllHighRegsEnd = std::end(AllHighRegs);
+ // Pop the high registers first
+ // There are no store instructions that can access high registers directly,
+ // so we have to pop into low registers and them move to the high registers.
+ // This might take multiple pops, as it is possible for there to
+ // be fewer low registers available than high registers which need restoring.
// Find the first register to restore.
- auto HiRegToRestore = findNextOrderedReg(std::begin(AllHighRegs),
- HiRegsToRestore, AllHighRegsEnd);
+ auto HiRegToRestore = getNextOrderedReg(OrderedHighRegs.begin(),
+ OrderedHighRegs.end(),
+ HighRegs);
+
+ std::set<Register> CopyRegs = AvailableCopyRegs;
+ Register LowScratchReg;
+ if (!HighRegs.empty() && CopyRegs.empty()) {
+ // No copy regs are available to pop high regs. Let's make use of a return
+ // register and the scratch register (IP/R12) to copy things around.
+ LowScratchReg = ARM::R0;
+ BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr))
+ .addReg(ARM::R12, RegState::Define)
+ .addReg(LowScratchReg, RegState::Kill)
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::FrameDestroy);
+ CopyRegs.insert(LowScratchReg);
+ }
- while (HiRegToRestore != AllHighRegsEnd) {
- assert(!CopyRegs.none());
+ while (HiRegToRestore != OrderedHighRegs.end()) {
+ assert(!CopyRegs.empty());
// Find the first low register to use.
- auto CopyReg =
- findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd);
+ auto CopyReg = getNextOrderedReg(OrderedCopyRegs.begin(),
+ OrderedCopyRegs.end(),
+ CopyRegs);
// Create the POP instruction.
- MachineInstrBuilder PopMIB =
- BuildMI(MBB, MI, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL));
+ MachineInstrBuilder PopMIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPOP))
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::FrameDestroy);
- while (HiRegToRestore != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) {
+ while (HiRegToRestore != OrderedHighRegs.end() &&
+ CopyReg != OrderedCopyRegs.end()) {
// Add the low register to the POP.
PopMIB.addReg(*CopyReg, RegState::Define);
@@ -1009,64 +1016,189 @@ bool Thumb1FrameLowering::restoreCalleeSavedRegisters(
BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr))
.addReg(*HiRegToRestore, RegState::Define)
.addReg(*CopyReg, RegState::Kill)
- .add(predOps(ARMCC::AL));
-
- CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd);
- HiRegToRestore =
- findNextOrderedReg(++HiRegToRestore, HiRegsToRestore, AllHighRegsEnd);
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::FrameDestroy);
+
+ CopyReg = getNextOrderedReg(std::next(CopyReg),
+ OrderedCopyRegs.end(),
+ CopyRegs);
+ HiRegToRestore = getNextOrderedReg(std::next(HiRegToRestore),
+ OrderedHighRegs.end(),
+ HighRegs);
}
}
- MachineInstrBuilder MIB =
- BuildMI(MF, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL));
-
- bool NeedsPop = false;
- for (CalleeSavedInfo &Info : llvm::reverse(CSI)) {
- Register Reg = Info.getReg();
-
- // High registers (excluding lr) have already been dealt with
- if (!(ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR))
- continue;
-
- if (Reg == ARM::LR) {
- Info.setRestored(false);
- if (!MBB.succ_empty() ||
- MI->getOpcode() == ARM::TCRETURNdi ||
- MI->getOpcode() == ARM::TCRETURNri)
- // LR may only be popped into PC, as part of return sequence.
- // If this isn't the return sequence, we'll need emitPopSpecialFixUp
- // to restore LR the hard way.
- // FIXME: if we don't pass any stack arguments it would be actually
- // advantageous *and* correct to do the conversion to an ordinary call
- // instruction here.
- continue;
- // Special epilogue for vararg functions. See emitEpilogue
- if (isVarArg)
- continue;
- // ARMv4T requires BX, see emitEpilogue
- if (!STI.hasV5TOps())
- continue;
+ // Restore low register used as scratch if necessary
+ if (LowScratchReg.isValid()) {
+ BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr))
+ .addReg(LowScratchReg, RegState::Define)
+ .addReg(ARM::R12, RegState::Kill)
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::FrameDestroy);
+ }
- // CMSE entry functions must return via BXNS, see emitEpilogue.
- if (AFI->isCmseNSEntryFunction())
+ // Now pop the low registers
+ if (!LowRegs.empty()) {
+ MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP))
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::FrameDestroy);
+
+ bool NeedsPop = false;
+ for (Register Reg : OrderedLowRegs) {
+ if (!LowRegs.count(Reg))
continue;
- // Pop LR into PC.
- Reg = ARM::PC;
- (*MIB).setDesc(TII.get(ARM::tPOP_RET));
- if (MI != MBB.end())
- MIB.copyImplicitOps(*MI);
- MI = MBB.erase(MI);
+ if (Reg == ARM::LR) {
+ if (!MBB.succ_empty() ||
+ MI->getOpcode() == ARM::TCRETURNdi ||
+ MI->getOpcode() == ARM::TCRETURNri)
+ // LR may only be popped into PC, as part of return sequence.
+ // If this isn't the return sequence, we'll need emitPopSpecialFixUp
+ // to restore LR the hard way.
+ // FIXME: if we don't pass any stack arguments it would be actually
+ // advantageous *and* correct to do the conversion to an ordinary call
+ // instruction here.
+ continue;
+ // Special epilogue for vararg functions. See emitEpilogue
+ if (IsVarArg)
+ continue;
+ // ARMv4T requires BX, see emitEpilogue
+ if (!HasV5Ops)
+ continue;
+
+ // CMSE entry functions must return via BXNS, see emitEpilogue.
+ if (AFI->isCmseNSEntryFunction())
+ continue;
+
+ // Pop LR into PC.
+ Reg = ARM::PC;
+ (*MIB).setDesc(TII.get(ARM::tPOP_RET));
+ if (MI != MBB.end())
+ MIB.copyImplicitOps(*MI);
+ MI = MBB.erase(MI);
+ }
+ MIB.addReg(Reg, getDefRegState(true));
+ NeedsPop = true;
}
- MIB.addReg(Reg, getDefRegState(true));
- NeedsPop = true;
+
+ // It's illegal to emit pop instruction without operands.
+ if (NeedsPop)
+ MBB.insert(MI, &*MIB);
+ else
+ MF.deleteMachineInstr(MIB);
+ }
+}
+
+bool Thumb1FrameLowering::spillCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ const TargetInstrInfo &TII = *STI.getInstrInfo();
+ MachineFunction &MF = *MBB.getParent();
+ const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
+ Register FPReg = RegInfo->getFrameRegister(MF);
+
+ // In case FP is a high reg, we need a separate push sequence to generate
+ // a correct Frame Record
+ bool NeedsFrameRecordPush = hasFP(MF) && ARM::hGPRRegClass.contains(FPReg);
+
+ std::set<Register> FrameRecord;
+ std::set<Register> SpilledGPRs;
+ for (const CalleeSavedInfo &I : CSI) {
+ Register Reg = I.getReg();
+ if (NeedsFrameRecordPush && (Reg == FPReg || Reg == ARM::LR))
+ FrameRecord.insert(Reg);
+ else
+ SpilledGPRs.insert(Reg);
}
- // It's illegal to emit pop instruction without operands.
- if (NeedsPop)
- MBB.insert(MI, &*MIB);
- else
- MF.deleteMachineInstr(MIB);
+ pushRegsToStack(MBB, MI, TII, FrameRecord, {ARM::LR});
+
+ // Determine intermediate registers which can be used for pushing high regs:
+ // - Spilled low regs
+ // - Unused argument registers
+ std::set<Register> CopyRegs;
+ for (Register Reg : SpilledGPRs)
+ if ((ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) &&
+ !MF.getRegInfo().isLiveIn(Reg) && !(hasFP(MF) && Reg == FPReg))
+ CopyRegs.insert(Reg);
+ for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3})
+ if (!MF.getRegInfo().isLiveIn(ArgReg))
+ CopyRegs.insert(ArgReg);
+
+ pushRegsToStack(MBB, MI, TII, SpilledGPRs, CopyRegs);
+
+ return true;
+}
+
+bool Thumb1FrameLowering::restoreCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const TargetInstrInfo &TII = *STI.getInstrInfo();
+ const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
+ bool IsVarArg = AFI->getArgRegsSaveSize() > 0;
+ Register FPReg = RegInfo->getFrameRegister(MF);
+
+ // In case FP is a high reg, we need a separate pop sequence to generate
+ // a correct Frame Record
+ bool NeedsFrameRecordPop = hasFP(MF) && ARM::hGPRRegClass.contains(FPReg);
+
+ std::set<Register> FrameRecord;
+ std::set<Register> SpilledGPRs;
+ for (CalleeSavedInfo &I : CSI) {
+ Register Reg = I.getReg();
+ if (NeedsFrameRecordPop && (Reg == FPReg || Reg == ARM::LR))
+ FrameRecord.insert(Reg);
+ else
+ SpilledGPRs.insert(Reg);
+
+ if (Reg == ARM::LR)
+ I.setRestored(false);
+ }
+
+ // Determine intermidiate registers which can be used for popping high regs:
+ // - Spilled low regs
+ // - Unused return registers
+ std::set<Register> CopyRegs;
+ std::set<Register> UnusedReturnRegs;
+ for (Register Reg : SpilledGPRs)
+ if ((ARM::tGPRRegClass.contains(Reg)) && !(hasFP(MF) && Reg == FPReg))
+ CopyRegs.insert(Reg);
+ auto Terminator = MBB.getFirstTerminator();
+ if (Terminator != MBB.end() && Terminator->getOpcode() == ARM::tBX_RET) {
+ UnusedReturnRegs.insert(ARM::R0);
+ UnusedReturnRegs.insert(ARM::R1);
+ UnusedReturnRegs.insert(ARM::R2);
+ UnusedReturnRegs.insert(ARM::R3);
+ for (auto Op : Terminator->implicit_operands()) {
+ if (Op.isReg())
+ UnusedReturnRegs.erase(Op.getReg());
+ }
+ }
+ CopyRegs.insert(UnusedReturnRegs.begin(), UnusedReturnRegs.end());
+
+ // First pop regular spilled regs.
+ popRegsFromStack(MBB, MI, TII, SpilledGPRs, CopyRegs, IsVarArg,
+ STI.hasV5TOps());
+
+ // LR may only be popped into pc, as part of a return sequence.
+ // Check that no other pop instructions are inserted after that.
+ assert((!SpilledGPRs.count(ARM::LR) || FrameRecord.empty()) &&
+ "Can't insert pop after return sequence");
+
+ // Now pop Frame Record regs.
+ // Only unused return registers can be used as copy regs at this point.
+ popRegsFromStack(MBB, MI, TII, FrameRecord, UnusedReturnRegs, IsVarArg,
+ STI.hasV5TOps());
return true;
}
diff --git a/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp b/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
index 5cdaa7f02201..155555152ced 100644
--- a/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -226,9 +226,10 @@ bool Thumb2ITBlock::InsertITInstructions(MachineBasicBlock &MBB) {
ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC);
unsigned Mask = 0, Pos = 3;
- // v8 IT blocks are limited to one conditional op unless -arm-no-restrict-it
+ // IT blocks are limited to one conditional op if -arm-restrict-it
// is set: skip the loop
if (!restrictIT) {
+ LLVM_DEBUG(dbgs() << "Allowing complex IT block\n";);
// Branches, including tricky ones like LDM_RET, need to end an IT
// block so check the instruction we just put in the block.
for (; MBBI != E && Pos &&
@@ -283,8 +284,7 @@ bool Thumb2ITBlock::InsertITInstructions(MachineBasicBlock &MBB) {
}
bool Thumb2ITBlock::runOnMachineFunction(MachineFunction &Fn) {
- const ARMSubtarget &STI =
- static_cast<const ARMSubtarget &>(Fn.getSubtarget());
+ const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>();
if (!STI.isThumb2())
return false;
AFI = Fn.getInfo<ARMFunctionInfo>();
diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
index ebd139af2219..60dbc7b92013 100644
--- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -555,7 +555,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
MI.setDesc(TII.get(ARM::tMOVr));
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
// Remove offset and remaining explicit predicate operands.
- do MI.RemoveOperand(FrameRegIdx+1);
+ do MI.removeOperand(FrameRegIdx+1);
while (MI.getNumOperands() > FrameRegIdx+1);
MachineInstrBuilder MIB(*MI.getParent()->getParent(), &MI);
MIB.add(predOps(ARMCC::AL));
@@ -592,7 +592,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
// Remove the cc_out operand.
if (HasCCOut)
- MI.RemoveOperand(MI.getNumOperands()-1);
+ MI.removeOperand(MI.getNumOperands()-1);
Offset = 0;
return true;
}
@@ -626,7 +626,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
return Offset == 0;
}
- MI.RemoveOperand(FrameRegIdx+1);
+ MI.removeOperand(FrameRegIdx+1);
MI.getOperand(FrameRegIdx+1).ChangeToImmediate(0);
NewOpc = immediateOffsetOpcode(Opcode);
AddrMode = ARMII::AddrModeT2_i12;
diff --git a/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
index 1cc5422523f1..7ae4b19afb60 100644
--- a/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
@@ -205,11 +206,11 @@ namespace {
bool IsSelfLoop);
/// ReduceMI - Attempt to reduce MI, return true on success.
- bool ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
- bool LiveCPSR, bool IsSelfLoop);
+ bool ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI, bool LiveCPSR,
+ bool IsSelfLoop, bool SkipPrologueEpilogue);
/// ReduceMBB - Reduce width of instructions in the specified basic block.
- bool ReduceMBB(MachineBasicBlock &MBB);
+ bool ReduceMBB(MachineBasicBlock &MBB, bool SkipPrologueEpilogue);
bool OptimizeSize;
bool MinimizeSize;
@@ -620,7 +621,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
// Transfer MI flags.
MIB.setMIFlags(MI->getFlags());
- LLVM_DEBUG(errs() << "Converted 32-bit: " << *MI
+ LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI
<< " to 16-bit: " << *MIB);
MBB.erase_instr(MI);
@@ -668,7 +669,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
// Transfer MI flags.
MIB.setMIFlags(MI->getFlags());
- LLVM_DEBUG(errs() << "Converted 32-bit: " << *MI
+ LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI
<< " to 16-bit: " << *MIB);
MBB.erase_instr(MI);
@@ -848,7 +849,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
// Transfer MI flags.
MIB.setMIFlags(MI->getFlags());
- LLVM_DEBUG(errs() << "Converted 32-bit: " << *MI
+ LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI
<< " to 16-bit: " << *MIB);
MBB.erase_instr(MI);
@@ -971,7 +972,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
// Transfer MI flags.
MIB.setMIFlags(MI->getFlags());
- LLVM_DEBUG(errs() << "Converted 32-bit: " << *MI
+ LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI
<< " to 16-bit: " << *MIB);
MBB.erase_instr(MI);
@@ -1012,11 +1013,15 @@ static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) {
}
bool Thumb2SizeReduce::ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
- bool LiveCPSR, bool IsSelfLoop) {
+ bool LiveCPSR, bool IsSelfLoop,
+ bool SkipPrologueEpilogue) {
unsigned Opcode = MI->getOpcode();
DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
if (OPI == ReduceOpcodeMap.end())
return false;
+ if (SkipPrologueEpilogue && (MI->getFlag(MachineInstr::FrameSetup) ||
+ MI->getFlag(MachineInstr::FrameDestroy)))
+ return false;
const ReduceEntry &Entry = ReduceTable[OPI->second];
// Don't attempt normal reductions on "special" cases for now.
@@ -1036,7 +1041,8 @@ bool Thumb2SizeReduce::ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
return false;
}
-bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
+bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB,
+ bool SkipPrologueEpilogue) {
bool Modified = false;
// Yes, CPSR could be livein.
@@ -1080,7 +1086,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
// Does NextMII belong to the same bundle as MI?
bool NextInSameBundle = NextMII != E && NextMII->isBundledWithPred();
- if (ReduceMI(MBB, MI, LiveCPSR, IsSelfLoop)) {
+ if (ReduceMI(MBB, MI, LiveCPSR, IsSelfLoop, SkipPrologueEpilogue)) {
Modified = true;
MachineBasicBlock::instr_iterator I = std::prev(NextMII);
MI = &*I;
@@ -1130,7 +1136,7 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
if (PredicateFtor && !PredicateFtor(MF.getFunction()))
return false;
- STI = &static_cast<const ARMSubtarget &>(MF.getSubtarget());
+ STI = &MF.getSubtarget<ARMSubtarget>();
if (STI->isThumb1Only() || STI->prefers32BitThumb())
return false;
@@ -1147,8 +1153,10 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
// predecessors.
ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
bool Modified = false;
+ bool NeedsWinCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
+ MF.getFunction().needsUnwindTableEntry();
for (MachineBasicBlock *MBB : RPOT)
- Modified |= ReduceMBB(*MBB);
+ Modified |= ReduceMBB(*MBB, /*SkipPrologueEpilogue=*/NeedsWinCFI);
return Modified;
}
diff --git a/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp b/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
index 5d2bc4ebe191..2a3fa3b31512 100644
--- a/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
+++ b/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
@@ -37,7 +37,7 @@ extern cl::opt<bool> ReuseFrameIndexVals;
using namespace llvm;
-ThumbRegisterInfo::ThumbRegisterInfo() {}
+ThumbRegisterInfo::ThumbRegisterInfo() = default;
const TargetRegisterClass *
ThumbRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
@@ -338,7 +338,7 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
static void removeOperands(MachineInstr &MI, unsigned i) {
unsigned Op = i;
for (unsigned e = MI.getNumOperands(); i != e; ++i)
- MI.RemoveOperand(Op);
+ MI.removeOperand(Op);
}
/// convertToNonSPOpcode - Change the opcode to the non-SP version, because
@@ -361,6 +361,7 @@ bool ThumbRegisterInfo::rewriteFrameIndex(MachineBasicBlock::iterator II,
const ARMBaseInstrInfo &TII) const {
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
assert(MBB.getParent()->getSubtarget<ARMSubtarget>().isThumb1Only() &&
"This isn't needed for thumb2!");
DebugLoc dl = MI.getDebugLoc();
@@ -396,7 +397,18 @@ bool ThumbRegisterInfo::rewriteFrameIndex(MachineBasicBlock::iterator II,
if ((unsigned)Offset <= Mask * Scale) {
// Replace the FrameIndex with the frame register (e.g., sp).
- MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ Register DestReg = FrameReg;
+
+ // In case FrameReg is a high register, move it to a low reg to ensure it
+ // can be used as an operand.
+ if (ARM::hGPRRegClass.contains(FrameReg) && FrameReg != ARM::SP) {
+ DestReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass);
+ BuildMI(MBB, II, dl, TII.get(ARM::tMOVr), DestReg)
+ .addReg(FrameReg)
+ .add(predOps(ARMCC::AL));
+ }
+
+ MI.getOperand(FrameRegIdx).ChangeToRegister(DestReg, false);
ImmOp.ChangeToImmediate(ImmedOffset);
// If we're using a register where sp was stored, convert the instruction
@@ -517,7 +529,16 @@ void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Offset, false, TII, *this);
else {
emitLoadConstPool(MBB, II, dl, TmpReg, 0, Offset);
- UseRR = true;
+ if (!ARM::hGPRRegClass.contains(FrameReg)) {
+ UseRR = true;
+ } else {
+ // If FrameReg is a high register, add the reg values in a separate
+ // instruction as the load won't be able to access it.
+ BuildMI(MBB, II, dl, TII.get(ARM::tADDhirr), TmpReg)
+ .addReg(TmpReg)
+ .addReg(FrameReg)
+ .add(predOps(ARMCC::AL));
+ }
}
} else {
emitThumbRegPlusImmediate(MBB, II, dl, TmpReg, FrameReg, Offset, TII,
@@ -526,11 +547,14 @@ void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.setDesc(TII.get(UseRR ? ARM::tLDRr : ARM::tLDRi));
MI.getOperand(FIOperandNum).ChangeToRegister(TmpReg, false, false, true);
- if (UseRR)
+ if (UseRR) {
+ assert(!ARM::hGPRRegClass.contains(FrameReg) &&
+ "Thumb1 loads can't use high register");
// Use [reg, reg] addrmode. Replace the immediate operand w/ the frame
// register. The offset is already handled in the vreg value.
MI.getOperand(FIOperandNum+1).ChangeToRegister(FrameReg, false, false,
false);
+ }
} else if (MI.mayStore()) {
VReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass);
bool UseRR = false;
@@ -541,18 +565,30 @@ void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Offset, false, TII, *this);
else {
emitLoadConstPool(MBB, II, dl, VReg, 0, Offset);
- UseRR = true;
+ if (!ARM::hGPRRegClass.contains(FrameReg)) {
+ UseRR = true;
+ } else {
+ // If FrameReg is a high register, add the reg values in a separate
+ // instruction as the load won't be able to access it.
+ BuildMI(MBB, II, dl, TII.get(ARM::tADDhirr), VReg)
+ .addReg(VReg)
+ .addReg(FrameReg)
+ .add(predOps(ARMCC::AL));
+ }
}
} else
emitThumbRegPlusImmediate(MBB, II, dl, VReg, FrameReg, Offset, TII,
*this);
MI.setDesc(TII.get(UseRR ? ARM::tSTRr : ARM::tSTRi));
MI.getOperand(FIOperandNum).ChangeToRegister(VReg, false, false, true);
- if (UseRR)
+ if (UseRR) {
+ assert(!ARM::hGPRRegClass.contains(FrameReg) &&
+ "Thumb1 stores can't use high register");
// Use [reg, reg] addrmode. Replace the immediate operand w/ the frame
// register. The offset is already handled in the vreg value.
MI.getOperand(FIOperandNum+1).ChangeToRegister(FrameReg, false, false,
false);
+ }
} else {
llvm_unreachable("Unexpected opcode!");
}
diff --git a/llvm/lib/Target/AVR/AVR.h b/llvm/lib/Target/AVR/AVR.h
index 0b512172ba10..d29dc5f70e72 100644
--- a/llvm/lib/Target/AVR/AVR.h
+++ b/llvm/lib/Target/AVR/AVR.h
@@ -15,6 +15,8 @@
#define LLVM_AVR_H
#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
@@ -27,12 +29,10 @@ FunctionPass *createAVRISelDag(AVRTargetMachine &TM,
CodeGenOpt::Level OptLevel);
FunctionPass *createAVRExpandPseudoPass();
FunctionPass *createAVRFrameAnalyzerPass();
-FunctionPass *createAVRRelaxMemPass();
FunctionPass *createAVRBranchSelectionPass();
void initializeAVRShiftExpandPass(PassRegistry &);
void initializeAVRExpandPseudoPass(PassRegistry &);
-void initializeAVRRelaxMemPass(PassRegistry &);
/// Contains the AVR backend.
namespace AVR {
diff --git a/llvm/lib/Target/AVR/AVRAsmPrinter.cpp b/llvm/lib/Target/AVR/AVRAsmPrinter.cpp
index 259ab1bc7aec..0001e520b1fb 100644
--- a/llvm/lib/Target/AVR/AVRAsmPrinter.cpp
+++ b/llvm/lib/Target/AVR/AVRAsmPrinter.cpp
@@ -14,6 +14,7 @@
#include "AVR.h"
#include "AVRMCInstLower.h"
#include "AVRSubtarget.h"
+#include "AVRTargetMachine.h"
#include "MCTargetDesc/AVRInstPrinter.h"
#include "MCTargetDesc/AVRMCExpr.h"
#include "TargetInfo/AVRTargetInfo.h"
@@ -21,6 +22,7 @@
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Mangler.h"
@@ -60,6 +62,8 @@ public:
bool doFinalization(Module &M) override;
+ void emitStartOfAsmFile(Module &M) override;
+
private:
const MCRegisterInfo &MRI;
bool EmittedStructorSymbolAttrs = false;
@@ -236,6 +240,45 @@ bool AVRAsmPrinter::doFinalization(Module &M) {
return AsmPrinter::doFinalization(M);
}
+void AVRAsmPrinter::emitStartOfAsmFile(Module &M) {
+ const AVRTargetMachine &TM = (const AVRTargetMachine &)MMI->getTarget();
+ const AVRSubtarget *SubTM = (const AVRSubtarget *)TM.getSubtargetImpl();
+ if (!SubTM)
+ return;
+
+ // Emit __tmp_reg__.
+ OutStreamer->emitAssignment(
+ MMI->getContext().getOrCreateSymbol(StringRef("__tmp_reg__")),
+ MCConstantExpr::create(SubTM->getRegTmpIndex(), MMI->getContext()));
+ // Emit __zero_reg__.
+ OutStreamer->emitAssignment(
+ MMI->getContext().getOrCreateSymbol(StringRef("__zero_reg__")),
+ MCConstantExpr::create(SubTM->getRegZeroIndex(), MMI->getContext()));
+ // Emit __SREG__.
+ OutStreamer->emitAssignment(
+ MMI->getContext().getOrCreateSymbol(StringRef("__SREG__")),
+ MCConstantExpr::create(SubTM->getIORegSREG(), MMI->getContext()));
+ // Emit __SP_H__ if available.
+ if (!SubTM->hasSmallStack())
+ OutStreamer->emitAssignment(
+ MMI->getContext().getOrCreateSymbol(StringRef("__SP_H__")),
+ MCConstantExpr::create(SubTM->getIORegSPH(), MMI->getContext()));
+ // Emit __SP_L__.
+ OutStreamer->emitAssignment(
+ MMI->getContext().getOrCreateSymbol(StringRef("__SP_L__")),
+ MCConstantExpr::create(SubTM->getIORegSPL(), MMI->getContext()));
+ // Emit __EIND__ if available.
+ if (SubTM->hasEIJMPCALL())
+ OutStreamer->emitAssignment(
+ MMI->getContext().getOrCreateSymbol(StringRef("__EIND__")),
+ MCConstantExpr::create(SubTM->getIORegEIND(), MMI->getContext()));
+ // Emit __RAMPZ__ if available.
+ if (SubTM->hasELPM())
+ OutStreamer->emitAssignment(
+ MMI->getContext().getOrCreateSymbol(StringRef("__RAMPZ__")),
+ MCConstantExpr::create(SubTM->getIORegRAMPZ(), MMI->getContext()));
+}
+
} // end of namespace llvm
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAVRAsmPrinter() {
diff --git a/llvm/lib/Target/AVR/AVRCallingConv.td b/llvm/lib/Target/AVR/AVRCallingConv.td
index b4bc35e191c0..314d59bc2a59 100644
--- a/llvm/lib/Target/AVR/AVRCallingConv.td
+++ b/llvm/lib/Target/AVR/AVRCallingConv.td
@@ -27,6 +27,8 @@ def RetCC_AVR_BUILTIN : CallingConv<[
// Calling convention for variadic functions.
def ArgCC_AVR_Vararg : CallingConv<[
+ // i8 are always passed through the stack with a byte slot and byte alignment.
+ CCIfType<[i8], CCAssignToStack<1, 1>>,
// i16 are always passed through the stack with an alignment of 1.
CCAssignToStack<2, 1>
]>;
@@ -36,4 +38,6 @@ def ArgCC_AVR_Vararg : CallingConv<[
//===----------------------------------------------------------------------===//
def CSR_Normal : CalleeSavedRegs<(add R29, R28, (sequence "R%u", 17, 2))>;
+def CSR_NormalTiny : CalleeSavedRegs<(add R29, R28, R19, R18)>;
def CSR_Interrupts : CalleeSavedRegs<(add(sequence "R%u", 31, 2))>;
+def CSR_InterruptsTiny : CalleeSavedRegs<(add(sequence "R%u", 31, 18))>;
diff --git a/llvm/lib/Target/AVR/AVRDevices.td b/llvm/lib/Target/AVR/AVRDevices.td
index 7ad0fe904a81..3eb5a16204e7 100644
--- a/llvm/lib/Target/AVR/AVRDevices.td
+++ b/llvm/lib/Target/AVR/AVRDevices.td
@@ -174,15 +174,13 @@ def FamilyAVR35
: Family<"avr35",
[FamilyAVR3, FeatureMOVW, FeatureLPMX, FeatureSPM, FeatureBREAK]>;
-def FamilyAVR4 : Family<"avr4", [
- FamilyAVR2, FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM,
- FeatureBREAK
-]>;
+def FamilyAVR4 : Family<"avr4",
+ [FamilyAVR2, FeatureMultiplication, FeatureMOVW,
+ FeatureLPMX, FeatureSPM, FeatureBREAK]>;
-def FamilyAVR5 : Family<"avr5", [
- FamilyAVR3, FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM,
- FeatureBREAK
-]>;
+def FamilyAVR5 : Family<"avr5",
+ [FamilyAVR3, FeatureMultiplication, FeatureMOVW,
+ FeatureLPMX, FeatureSPM, FeatureBREAK]>;
def FamilyAVR51 : Family<"avr51", [FamilyAVR5, FeatureELPM, FeatureELPMX]>;
@@ -190,14 +188,21 @@ def FamilyAVR6 : Family<"avr6", [FamilyAVR51]>;
def FamilyTiny
: Family<"avrtiny",
- [FamilyAVR0, FeatureBREAK, FeatureSRAM, FeatureTinyEncoding]>;
-
-def FamilyXMEGA : Family<"xmega", [
- FamilyAVR0, FeatureLPM, FeatureIJMPCALL, FeatureADDSUBIW, FeatureSRAM,
- FeatureJMPCALL, FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM,
- FeatureBREAK, FeatureEIJMPCALL, FeatureSPMX, FeatureDES, FeatureELPM,
- FeatureELPMX
-]>;
+ [FamilyAVR0, FeatureBREAK, FeatureSRAM, FeatureTinyEncoding,
+ FeatureSmallStack]>;
+
+def FamilyXMEGA3 : Family<"xmega3",
+ [FamilyAVR0, FeatureLPM, FeatureIJMPCALL,
+ FeatureADDSUBIW, FeatureSRAM, FeatureJMPCALL,
+ FeatureMultiplication, FeatureMOVW, FeatureLPMX,
+ FeatureBREAK]>;
+
+def FamilyXMEGA : Family<"xmega",
+ [FamilyAVR0, FeatureLPM, FeatureIJMPCALL,
+ FeatureADDSUBIW, FeatureSRAM, FeatureJMPCALL,
+ FeatureMultiplication, FeatureMOVW, FeatureLPMX,
+ FeatureSPM, FeatureBREAK, FeatureEIJMPCALL,
+ FeatureSPMX, FeatureDES, FeatureELPM, FeatureELPMX]>;
def FamilyXMEGAU : Family<"xmegau", [FamilyXMEGA, FeatureRMW]>;
@@ -237,7 +242,7 @@ def : Device<"avr51", FamilyAVR51, ELFArchAVR51>;
def : Device<"avr6", FamilyAVR6, ELFArchAVR6>;
def : Device<"avrxmega1", FamilyXMEGA, ELFArchXMEGA1>;
def : Device<"avrxmega2", FamilyXMEGA, ELFArchXMEGA2>;
-def : Device<"avrxmega3", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"avrxmega3", FamilyXMEGA3, ELFArchXMEGA3>;
def : Device<"avrxmega4", FamilyXMEGA, ELFArchXMEGA4>;
def : Device<"avrxmega5", FamilyXMEGA, ELFArchXMEGA5>;
def : Device<"avrxmega6", FamilyXMEGA, ELFArchXMEGA6>;
@@ -245,41 +250,44 @@ def : Device<"avrxmega7", FamilyXMEGA, ELFArchXMEGA7>;
def : Device<"avrtiny", FamilyTiny, ELFArchTiny>;
// Specific MCUs
-def : Device<"at90s1200", FamilyAVR0, ELFArchAVR1>;
-def : Device<"attiny11", FamilyAVR1, ELFArchAVR1>;
-def : Device<"attiny12", FamilyAVR1, ELFArchAVR1>;
-def : Device<"attiny15", FamilyAVR1, ELFArchAVR1>;
-def : Device<"attiny28", FamilyAVR1, ELFArchAVR1>;
-def : Device<"at90s2313", FamilyAVR2, ELFArchAVR2>;
-def : Device<"at90s2323", FamilyAVR2, ELFArchAVR2>;
-def : Device<"at90s2333", FamilyAVR2, ELFArchAVR2>;
-def : Device<"at90s2343", FamilyAVR2, ELFArchAVR2>;
-def : Device<"attiny22", FamilyAVR2, ELFArchAVR2>;
-def : Device<"attiny26", FamilyAVR2, ELFArchAVR2, [FeatureLPMX]>;
+// NOTE: This list has been synchronized with gcc-avr 5.4.0 and avr-libc 2.0.0.
+def : Device<"at90s1200", FamilyAVR0, ELFArchAVR1, [FeatureSmallStack]>;
+def : Device<"attiny11", FamilyAVR1, ELFArchAVR1, [FeatureSmallStack]>;
+def : Device<"attiny12", FamilyAVR1, ELFArchAVR1, [FeatureSmallStack]>;
+def : Device<"attiny15", FamilyAVR1, ELFArchAVR1, [FeatureSmallStack]>;
+def : Device<"attiny28", FamilyAVR1, ELFArchAVR1, [FeatureSmallStack]>;
+def : Device<"at90s2313", FamilyAVR2, ELFArchAVR2, [FeatureSmallStack]>;
+def : Device<"at90s2323", FamilyAVR2, ELFArchAVR2, [FeatureSmallStack]>;
+def : Device<"at90s2333", FamilyAVR2, ELFArchAVR2, [FeatureSmallStack]>;
+def : Device<"at90s2343", FamilyAVR2, ELFArchAVR2, [FeatureSmallStack]>;
+def : Device<"attiny22", FamilyAVR2, ELFArchAVR2, [FeatureSmallStack]>;
+def : Device<"attiny26", FamilyAVR2, ELFArchAVR2,
+ [FeatureLPMX, FeatureSmallStack]>;
def : Device<"at86rf401", FamilyAVR2, ELFArchAVR25, [FeatureMOVW, FeatureLPMX]>;
-def : Device<"at90s4414", FamilyAVR2, ELFArchAVR2>;
-def : Device<"at90s4433", FamilyAVR2, ELFArchAVR2>;
-def : Device<"at90s4434", FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s4414", FamilyAVR2, ELFArchAVR2, [FeatureSmallStack]>;
+def : Device<"at90s4433", FamilyAVR2, ELFArchAVR2, [FeatureSmallStack]>;
+def : Device<"at90s4434", FamilyAVR2, ELFArchAVR2, [FeatureSmallStack]>;
def : Device<"at90s8515", FamilyAVR2, ELFArchAVR2>;
def : Device<"at90c8534", FamilyAVR2, ELFArchAVR2>;
def : Device<"at90s8535", FamilyAVR2, ELFArchAVR2>;
def : Device<"ata5272", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny13", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny13a", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny2313", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny2313a", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny24", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny24a", FamilyAVR25, ELFArchAVR25>;
+def : Device<"ata6616c", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny13", FamilyAVR25, ELFArchAVR25, [FeatureSmallStack]>;
+def : Device<"attiny13a", FamilyAVR25, ELFArchAVR25, [FeatureSmallStack]>;
+def : Device<"attiny2313", FamilyAVR25, ELFArchAVR25, [FeatureSmallStack]>;
+def : Device<"attiny2313a", FamilyAVR25, ELFArchAVR25, [FeatureSmallStack]>;
+def : Device<"attiny24", FamilyAVR25, ELFArchAVR25, [FeatureSmallStack]>;
+def : Device<"attiny24a", FamilyAVR25, ELFArchAVR25, [FeatureSmallStack]>;
def : Device<"attiny4313", FamilyAVR25, ELFArchAVR25>;
def : Device<"attiny44", FamilyAVR25, ELFArchAVR25>;
def : Device<"attiny44a", FamilyAVR25, ELFArchAVR25>;
def : Device<"attiny84", FamilyAVR25, ELFArchAVR25>;
def : Device<"attiny84a", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny25", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny25", FamilyAVR25, ELFArchAVR25, [FeatureSmallStack]>;
def : Device<"attiny45", FamilyAVR25, ELFArchAVR25>;
def : Device<"attiny85", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny261", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny261a", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny261", FamilyAVR25, ELFArchAVR25, [FeatureSmallStack]>;
+def : Device<"attiny261a", FamilyAVR25, ELFArchAVR25, [FeatureSmallStack]>;
def : Device<"attiny441", FamilyAVR25, ELFArchAVR25>;
def : Device<"attiny461", FamilyAVR25, ELFArchAVR25>;
def : Device<"attiny461a", FamilyAVR25, ELFArchAVR25>;
@@ -299,6 +307,8 @@ def : Device<"attiny167", FamilyAVR35, ELFArchAVR35>;
def : Device<"at90usb82", FamilyAVR35, ELFArchAVR35>;
def : Device<"at90usb162", FamilyAVR35, ELFArchAVR35>;
def : Device<"ata5505", FamilyAVR35, ELFArchAVR35>;
+def : Device<"ata6617c", FamilyAVR35, ELFArchAVR35>;
+def : Device<"ata664251", FamilyAVR35, ELFArchAVR35>;
def : Device<"atmega8u2", FamilyAVR35, ELFArchAVR35>;
def : Device<"atmega16u2", FamilyAVR35, ELFArchAVR35>;
def : Device<"atmega32u2", FamilyAVR35, ELFArchAVR35>;
@@ -310,6 +320,7 @@ def : Device<"atmega8a", FamilyAVR2, ELFArchAVR4,
[FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>;
def : Device<"ata6285", FamilyAVR4, ELFArchAVR4>;
def : Device<"ata6286", FamilyAVR4, ELFArchAVR4>;
+def : Device<"ata6612c", FamilyAVR4, ELFArchAVR4>;
def : Device<"atmega48", FamilyAVR4, ELFArchAVR4>;
def : Device<"atmega48a", FamilyAVR4, ELFArchAVR4>;
def : Device<"atmega48pa", FamilyAVR4, ELFArchAVR4>;
@@ -331,8 +342,17 @@ def : Device<"at90pwm2b", FamilyAVR4, ELFArchAVR4>;
def : Device<"at90pwm3", FamilyAVR4, ELFArchAVR4>;
def : Device<"at90pwm3b", FamilyAVR4, ELFArchAVR4>;
def : Device<"at90pwm81", FamilyAVR4, ELFArchAVR4>;
+def : Device<"ata5702m322", FamilyAVR5, ELFArchAVR5>;
+def : Device<"ata5782", FamilyAVR5, ELFArchAVR5>;
def : Device<"ata5790", FamilyAVR5, ELFArchAVR5>;
+def : Device<"ata5790n", FamilyAVR5, ELFArchAVR5>;
+def : Device<"ata5791", FamilyAVR5, ELFArchAVR5>;
def : Device<"ata5795", FamilyAVR5, ELFArchAVR5>;
+def : Device<"ata5831", FamilyAVR5, ELFArchAVR5>;
+def : Device<"ata6613c", FamilyAVR5, ELFArchAVR5>;
+def : Device<"ata6614q", FamilyAVR5, ELFArchAVR5>;
+def : Device<"ata8210", FamilyAVR5, ELFArchAVR5>;
+def : Device<"ata8510", FamilyAVR5, ELFArchAVR5>;
def : Device<"atmega16", FamilyAVR5, ELFArchAVR5>;
def : Device<"atmega16a", FamilyAVR5, ELFArchAVR5>;
def : Device<"atmega161", FamilyAVR3, ELFArchAVR5,
@@ -411,6 +431,7 @@ def : Device<"atmega16hvbrevb", FamilyAVR5, ELFArchAVR5>;
def : Device<"atmega32hvb", FamilyAVR5, ELFArchAVR5>;
def : Device<"atmega32hvbrevb", FamilyAVR5, ELFArchAVR5>;
def : Device<"atmega64hve", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega64hve2", FamilyAVR5, ELFArchAVR5>;
def : Device<"at90can32", FamilyAVR5, ELFArchAVR5>;
def : Device<"at90can64", FamilyAVR5, ELFArchAVR5>;
def : Device<"at90pwm161", FamilyAVR5, ELFArchAVR5>;
@@ -452,12 +473,13 @@ def : Device<"atxmega16c4", FamilyXMEGAU, ELFArchXMEGA2>;
def : Device<"atxmega16d4", FamilyXMEGA, ELFArchXMEGA2>;
def : Device<"atxmega32a4", FamilyXMEGA, ELFArchXMEGA2>;
def : Device<"atxmega32a4u", FamilyXMEGAU, ELFArchXMEGA2>;
+def : Device<"atxmega32c3", FamilyXMEGAU, ELFArchXMEGA2>;
def : Device<"atxmega32c4", FamilyXMEGAU, ELFArchXMEGA2>;
+def : Device<"atxmega32d3", FamilyXMEGA, ELFArchXMEGA2>;
def : Device<"atxmega32d4", FamilyXMEGA, ELFArchXMEGA2>;
def : Device<"atxmega32e5", FamilyXMEGAU, ELFArchXMEGA2>;
def : Device<"atxmega16e5", FamilyXMEGAU, ELFArchXMEGA2>;
def : Device<"atxmega8e5", FamilyXMEGAU, ELFArchXMEGA2>;
-def : Device<"atxmega32x1", FamilyXMEGA, ELFArchXMEGA2>;
def : Device<"atxmega64a3", FamilyXMEGA, ELFArchXMEGA4>;
def : Device<"atxmega64a3u", FamilyXMEGAU, ELFArchXMEGA4>;
def : Device<"atxmega64a4u", FamilyXMEGAU, ELFArchXMEGA4>;
@@ -498,28 +520,39 @@ def : Device<"attiny20", FamilyTiny, ELFArchTiny>;
def : Device<"attiny40", FamilyTiny, ELFArchTiny>;
def : Device<"attiny102", FamilyTiny, ELFArchTiny>;
def : Device<"attiny104", FamilyTiny, ELFArchTiny>;
-def : Device<"attiny202", FamilyXMEGA, ELFArchXMEGA3>;
-def : Device<"attiny402", FamilyXMEGA, ELFArchXMEGA3>;
-def : Device<"attiny204", FamilyXMEGA, ELFArchXMEGA3>;
-def : Device<"attiny404", FamilyXMEGA, ELFArchXMEGA3>;
-def : Device<"attiny804", FamilyXMEGA, ELFArchXMEGA3>;
-def : Device<"attiny1604", FamilyXMEGA, ELFArchXMEGA3>;
-def : Device<"attiny406", FamilyXMEGA, ELFArchXMEGA3>;
-def : Device<"attiny806", FamilyXMEGA, ELFArchXMEGA3>;
-def : Device<"attiny1606", FamilyXMEGA, ELFArchXMEGA3>;
-def : Device<"attiny807", FamilyXMEGA, ELFArchXMEGA3>;
-def : Device<"attiny1607", FamilyXMEGA, ELFArchXMEGA3>;
-def : Device<"attiny212", FamilyXMEGA, ELFArchXMEGA3>;
-def : Device<"attiny412", FamilyXMEGA, ELFArchXMEGA3>;
-def : Device<"attiny214", FamilyXMEGA, ELFArchXMEGA3>;
-def : Device<"attiny414", FamilyXMEGA, ELFArchXMEGA3>;
-def : Device<"attiny814", FamilyXMEGA, ELFArchXMEGA3>;
-def : Device<"attiny1614", FamilyXMEGA, ELFArchXMEGA3>;
-def : Device<"attiny416", FamilyXMEGA, ELFArchXMEGA3>;
-def : Device<"attiny816", FamilyXMEGA, ELFArchXMEGA3>;
-def : Device<"attiny1616", FamilyXMEGA, ELFArchXMEGA3>;
-def : Device<"attiny3216", FamilyXMEGA, ELFArchXMEGA3>;
-def : Device<"attiny417", FamilyXMEGA, ELFArchXMEGA3>;
-def : Device<"attiny817", FamilyXMEGA, ELFArchXMEGA3>;
-def : Device<"attiny1617", FamilyXMEGA, ELFArchXMEGA3>;
-def : Device<"attiny3217", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny202", FamilyXMEGA3, ELFArchXMEGA3>;
+def : Device<"attiny402", FamilyXMEGA3, ELFArchXMEGA3>;
+def : Device<"attiny204", FamilyXMEGA3, ELFArchXMEGA3>;
+def : Device<"attiny404", FamilyXMEGA3, ELFArchXMEGA3>;
+def : Device<"attiny804", FamilyXMEGA3, ELFArchXMEGA3>;
+def : Device<"attiny1604", FamilyXMEGA3, ELFArchXMEGA3>;
+def : Device<"attiny406", FamilyXMEGA3, ELFArchXMEGA3>;
+def : Device<"attiny806", FamilyXMEGA3, ELFArchXMEGA3>;
+def : Device<"attiny1606", FamilyXMEGA3, ELFArchXMEGA3>;
+def : Device<"attiny807", FamilyXMEGA3, ELFArchXMEGA3>;
+def : Device<"attiny1607", FamilyXMEGA3, ELFArchXMEGA3>;
+def : Device<"attiny212", FamilyXMEGA3, ELFArchXMEGA3>;
+def : Device<"attiny412", FamilyXMEGA3, ELFArchXMEGA3>;
+def : Device<"attiny214", FamilyXMEGA3, ELFArchXMEGA3>;
+def : Device<"attiny414", FamilyXMEGA3, ELFArchXMEGA3>;
+def : Device<"attiny814", FamilyXMEGA3, ELFArchXMEGA3>;
+def : Device<"attiny1614", FamilyXMEGA3, ELFArchXMEGA3>;
+def : Device<"attiny416", FamilyXMEGA3, ELFArchXMEGA3>;
+def : Device<"attiny816", FamilyXMEGA3, ELFArchXMEGA3>;
+def : Device<"attiny1616", FamilyXMEGA3, ELFArchXMEGA3>;
+def : Device<"attiny3216", FamilyXMEGA3, ELFArchXMEGA3>;
+def : Device<"attiny417", FamilyXMEGA3, ELFArchXMEGA3>;
+def : Device<"attiny817", FamilyXMEGA3, ELFArchXMEGA3>;
+def : Device<"attiny1617", FamilyXMEGA3, ELFArchXMEGA3>;
+def : Device<"attiny3217", FamilyXMEGA3, ELFArchXMEGA3>;
+def : Device<"attiny1624", FamilyXMEGA3, ELFArchXMEGA3>;
+def : Device<"attiny1626", FamilyXMEGA3, ELFArchXMEGA3>;
+def : Device<"attiny1627", FamilyXMEGA3, ELFArchXMEGA3>;
+def : Device<"atmega808", FamilyXMEGA3, ELFArchXMEGA3>;
+def : Device<"atmega809", FamilyXMEGA3, ELFArchXMEGA3>;
+def : Device<"atmega1608", FamilyXMEGA3, ELFArchXMEGA3>;
+def : Device<"atmega1609", FamilyXMEGA3, ELFArchXMEGA3>;
+def : Device<"atmega3208", FamilyXMEGA3, ELFArchXMEGA3>;
+def : Device<"atmega3209", FamilyXMEGA3, ELFArchXMEGA3>;
+def : Device<"atmega4808", FamilyXMEGA3, ELFArchXMEGA3>;
+def : Device<"atmega4809", FamilyXMEGA3, ELFArchXMEGA3>;
diff --git a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
index 144ae2b320f9..a9dc9af819e6 100644
--- a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
@@ -54,8 +54,6 @@ private:
const Register SCRATCH_REGISTER = AVR::R0;
/// The register that will always contain zero.
const Register ZERO_REGISTER = AVR::R1;
- /// The IO address of the status register.
- const unsigned SREG_ADDR = 0x3f;
bool expandMBB(Block &MBB);
bool expandMI(Block &MBB, BlockIt MBBI);
@@ -86,21 +84,23 @@ private:
bool expandAtomicBinaryOp(unsigned Opcode, Block &MBB, BlockIt MBBI);
- bool expandAtomicArithmeticOp(unsigned MemOpcode, unsigned ArithOpcode,
- Block &MBB, BlockIt MBBI);
-
- /// Specific shift implementation.
+ /// Specific shift implementation for int8.
bool expandLSLB7Rd(Block &MBB, BlockIt MBBI);
bool expandLSRB7Rd(Block &MBB, BlockIt MBBI);
bool expandASRB6Rd(Block &MBB, BlockIt MBBI);
bool expandASRB7Rd(Block &MBB, BlockIt MBBI);
+
+ /// Specific shift implementation for int16.
bool expandLSLW4Rd(Block &MBB, BlockIt MBBI);
bool expandLSRW4Rd(Block &MBB, BlockIt MBBI);
+ bool expandASRW7Rd(Block &MBB, BlockIt MBBI);
bool expandLSLW8Rd(Block &MBB, BlockIt MBBI);
bool expandLSRW8Rd(Block &MBB, BlockIt MBBI);
bool expandASRW8Rd(Block &MBB, BlockIt MBBI);
bool expandLSLW12Rd(Block &MBB, BlockIt MBBI);
bool expandLSRW12Rd(Block &MBB, BlockIt MBBI);
+ bool expandASRW14Rd(Block &MBB, BlockIt MBBI);
+ bool expandASRW15Rd(Block &MBB, BlockIt MBBI);
// Common implementation of LPMWRdZ and ELPMWRdZ.
bool expandLPMWELPMW(Block &MBB, BlockIt MBBI, bool IsExt);
@@ -141,6 +141,7 @@ bool AVRExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
// Continue expanding the block until all pseudos are expanded.
do {
assert(ExpandCount < 10 && "pseudo expand limit reached");
+ (void)ExpandCount;
bool BlockModified = expandMBB(MBB);
Modified |= BlockModified;
@@ -453,7 +454,7 @@ bool AVRExpandPseudo::expand<AVR::NEGWRd>(Block &MBB, BlockIt MBBI) {
auto MIBHI =
buildMI(MBB, MBBI, AVR::NEGRd)
.addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstHiReg, getKillRegState(DstIsKill));
+ .addReg(DstHiReg, RegState::Kill);
// SREG is always implicitly dead
MIBHI->getOperand(2).setIsDead();
@@ -917,13 +918,13 @@ bool AVRExpandPseudo::expand<AVR::ELPMWRdZPi>(Block &MBB, BlockIt MBBI) {
template <typename Func>
bool AVRExpandPseudo::expandAtomic(Block &MBB, BlockIt MBBI, Func f) {
- // Remove the pseudo instruction.
MachineInstr &MI = *MBBI;
+ const AVRSubtarget &STI = MBB.getParent()->getSubtarget<AVRSubtarget>();
// Store the SREG.
buildMI(MBB, MBBI, AVR::INRdA)
.addReg(SCRATCH_REGISTER, RegState::Define)
- .addImm(SREG_ADDR);
+ .addImm(STI.getIORegSREG());
// Disable exceptions.
buildMI(MBB, MBBI, AVR::BCLRs).addImm(7); // CLI
@@ -931,7 +932,9 @@ bool AVRExpandPseudo::expandAtomic(Block &MBB, BlockIt MBBI, Func f) {
f(MI);
// Restore the status reg.
- buildMI(MBB, MBBI, AVR::OUTARr).addImm(SREG_ADDR).addReg(SCRATCH_REGISTER);
+ buildMI(MBB, MBBI, AVR::OUTARr)
+ .addImm(STI.getIORegSREG())
+ .addReg(SCRATCH_REGISTER);
MI.eraseFromParent();
return true;
@@ -955,31 +958,6 @@ bool AVRExpandPseudo::expandAtomicBinaryOp(unsigned Opcode, Block &MBB,
return expandAtomicBinaryOp(Opcode, MBB, MBBI, [](MachineInstr &MI) {});
}
-bool AVRExpandPseudo::expandAtomicArithmeticOp(unsigned Width,
- unsigned ArithOpcode, Block &MBB,
- BlockIt MBBI) {
- return expandAtomic(MBB, MBBI, [&](MachineInstr &MI) {
- auto DstReg = MI.getOperand(0).getReg();
- auto PtrOp = MI.getOperand(1);
- auto SrcReg = MI.getOperand(2).getReg();
-
- unsigned LoadOpcode = (Width == 8) ? AVR::LDRdPtr : AVR::LDWRdPtr;
- unsigned StoreOpcode = (Width == 8) ? AVR::STPtrRr : AVR::STWPtrRr;
-
- // FIXME: this returns the new value (after the operation), not the old
- // value as the atomicrmw instruction is supposed to do!
-
- // Create the load
- buildMI(MBB, MBBI, LoadOpcode, DstReg).addReg(PtrOp.getReg());
-
- // Create the arithmetic op
- buildMI(MBB, MBBI, ArithOpcode, DstReg).addReg(DstReg).addReg(SrcReg);
-
- // Create the store
- buildMI(MBB, MBBI, StoreOpcode).add(PtrOp).addReg(DstReg);
- });
-}
-
Register AVRExpandPseudo::scavengeGPR8(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
RegScavenger RS;
@@ -1026,56 +1004,6 @@ bool AVRExpandPseudo::expand<AVR::AtomicStore16>(Block &MBB, BlockIt MBBI) {
}
template <>
-bool AVRExpandPseudo::expand<AVR::AtomicLoadAdd8>(Block &MBB, BlockIt MBBI) {
- return expandAtomicArithmeticOp(8, AVR::ADDRdRr, MBB, MBBI);
-}
-
-template <>
-bool AVRExpandPseudo::expand<AVR::AtomicLoadAdd16>(Block &MBB, BlockIt MBBI) {
- return expandAtomicArithmeticOp(16, AVR::ADDWRdRr, MBB, MBBI);
-}
-
-template <>
-bool AVRExpandPseudo::expand<AVR::AtomicLoadSub8>(Block &MBB, BlockIt MBBI) {
- return expandAtomicArithmeticOp(8, AVR::SUBRdRr, MBB, MBBI);
-}
-
-template <>
-bool AVRExpandPseudo::expand<AVR::AtomicLoadSub16>(Block &MBB, BlockIt MBBI) {
- return expandAtomicArithmeticOp(16, AVR::SUBWRdRr, MBB, MBBI);
-}
-
-template <>
-bool AVRExpandPseudo::expand<AVR::AtomicLoadAnd8>(Block &MBB, BlockIt MBBI) {
- return expandAtomicArithmeticOp(8, AVR::ANDRdRr, MBB, MBBI);
-}
-
-template <>
-bool AVRExpandPseudo::expand<AVR::AtomicLoadAnd16>(Block &MBB, BlockIt MBBI) {
- return expandAtomicArithmeticOp(16, AVR::ANDWRdRr, MBB, MBBI);
-}
-
-template <>
-bool AVRExpandPseudo::expand<AVR::AtomicLoadOr8>(Block &MBB, BlockIt MBBI) {
- return expandAtomicArithmeticOp(8, AVR::ORRdRr, MBB, MBBI);
-}
-
-template <>
-bool AVRExpandPseudo::expand<AVR::AtomicLoadOr16>(Block &MBB, BlockIt MBBI) {
- return expandAtomicArithmeticOp(16, AVR::ORWRdRr, MBB, MBBI);
-}
-
-template <>
-bool AVRExpandPseudo::expand<AVR::AtomicLoadXor8>(Block &MBB, BlockIt MBBI) {
- return expandAtomicArithmeticOp(8, AVR::EORRdRr, MBB, MBBI);
-}
-
-template <>
-bool AVRExpandPseudo::expand<AVR::AtomicLoadXor16>(Block &MBB, BlockIt MBBI) {
- return expandAtomicArithmeticOp(16, AVR::EORWRdRr, MBB, MBBI);
-}
-
-template <>
bool AVRExpandPseudo::expand<AVR::AtomicFence>(Block &MBB, BlockIt MBBI) {
// On AVR, there is only one core and so atomic fences do nothing.
MBBI->eraseFromParent();
@@ -1230,38 +1158,95 @@ bool AVRExpandPseudo::expand<AVR::STWPtrPdRr>(Block &MBB, BlockIt MBBI) {
template <>
bool AVRExpandPseudo::expand<AVR::STDWPtrQRr>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
- Register SrcLoReg, SrcHiReg;
+
Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(2).getReg();
- unsigned Imm = MI.getOperand(1).getImm();
bool DstIsKill = MI.getOperand(0).isKill();
+ unsigned Imm = MI.getOperand(1).getImm();
+ Register SrcReg = MI.getOperand(2).getReg();
bool SrcIsKill = MI.getOperand(2).isKill();
- unsigned OpLo = AVR::STDPtrQRr;
- unsigned OpHi = AVR::STDPtrQRr;
- TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
- // Since we add 1 to the Imm value for the high byte below, and 63 is the
- // highest Imm value allowed for the instruction, 62 is the limit here.
- assert(Imm <= 62 && "Offset is out of range");
+ // STD's maximum displacement is 63, so larger stores have to be split into a
+ // set of operations
+ if (Imm >= 63) {
+ if (!DstIsKill) {
+ buildMI(MBB, MBBI, AVR::PUSHWRr).addReg(DstReg);
+ }
- auto MIBLO = buildMI(MBB, MBBI, OpLo)
- .addReg(DstReg)
- .addImm(Imm)
- .addReg(SrcLoReg, getKillRegState(SrcIsKill));
+ buildMI(MBB, MBBI, AVR::SUBIWRdK)
+ .addReg(DstReg, RegState::Define)
+ .addReg(DstReg, RegState::Kill)
+ .addImm(-Imm);
- auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(DstReg, getKillRegState(DstIsKill))
- .addImm(Imm + 1)
- .addReg(SrcHiReg, getKillRegState(SrcIsKill));
+ buildMI(MBB, MBBI, AVR::STWPtrRr)
+ .addReg(DstReg, RegState::Kill)
+ .addReg(SrcReg, getKillRegState(SrcIsKill));
- MIBLO.setMemRefs(MI.memoperands());
- MIBHI.setMemRefs(MI.memoperands());
+ if (!DstIsKill) {
+ buildMI(MBB, MBBI, AVR::POPWRd).addDef(DstReg, RegState::Define);
+ }
+ } else {
+ unsigned OpLo = AVR::STDPtrQRr;
+ unsigned OpHi = AVR::STDPtrQRr;
+ Register SrcLoReg, SrcHiReg;
+ TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
+
+ auto MIBLO = buildMI(MBB, MBBI, OpLo)
+ .addReg(DstReg)
+ .addImm(Imm)
+ .addReg(SrcLoReg, getKillRegState(SrcIsKill));
+
+ auto MIBHI = buildMI(MBB, MBBI, OpHi)
+ .addReg(DstReg, getKillRegState(DstIsKill))
+ .addImm(Imm + 1)
+ .addReg(SrcHiReg, getKillRegState(SrcIsKill));
+
+ MIBLO.setMemRefs(MI.memoperands());
+ MIBHI.setMemRefs(MI.memoperands());
+ }
MI.eraseFromParent();
return true;
}
template <>
+bool AVRExpandPseudo::expand<AVR::STDSPQRr>(Block &MBB, BlockIt MBBI) {
+ MachineInstr &MI = *MBBI;
+ const MachineFunction &MF = *MBB.getParent();
+ const AVRSubtarget &STI = MF.getSubtarget<AVRSubtarget>();
+
+ assert(MI.getOperand(0).getReg() == AVR::SP &&
+ "SP is expected as base pointer");
+
+ assert(STI.getFrameLowering()->hasReservedCallFrame(MF) &&
+ "unexpected STDSPQRr pseudo instruction");
+ (void)STI;
+
+ MI.setDesc(TII->get(AVR::STDPtrQRr));
+ MI.getOperand(0).setReg(AVR::R29R28);
+
+ return true;
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::STDWSPQRr>(Block &MBB, BlockIt MBBI) {
+ MachineInstr &MI = *MBBI;
+ const MachineFunction &MF = *MBB.getParent();
+ const AVRSubtarget &STI = MF.getSubtarget<AVRSubtarget>();
+
+ assert(MI.getOperand(0).getReg() == AVR::SP &&
+ "SP is expected as base pointer");
+
+ assert(STI.getFrameLowering()->hasReservedCallFrame(MF) &&
+ "unexpected STDWSPQRr pseudo instruction");
+ (void)STI;
+
+ MI.setDesc(TII->get(AVR::STDWPtrQRr));
+ MI.getOperand(0).setReg(AVR::R29R28);
+
+ return true;
+}
+
+template <>
bool AVRExpandPseudo::expand<AVR::INWRdA>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
Register DstLoReg, DstHiReg;
@@ -1378,6 +1363,7 @@ bool AVRExpandPseudo::expand<AVR::ROLBRd>(Block &MBB, BlockIt MBBI) {
unsigned OpShift, OpCarry;
Register DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
+ bool DstIsKill = MI.getOperand(1).isKill();
OpShift = AVR::ADDRdRr;
OpCarry = AVR::ADCRdRr;
@@ -1387,13 +1373,13 @@ bool AVRExpandPseudo::expand<AVR::ROLBRd>(Block &MBB, BlockIt MBBI) {
// Shift part
buildMI(MBB, MBBI, OpShift)
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstReg)
- .addReg(DstReg);
+ .addReg(DstReg, RegState::Kill)
+ .addReg(DstReg, RegState::Kill);
// Add the carry bit
auto MIB = buildMI(MBB, MBBI, OpCarry)
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstReg)
+ .addReg(DstReg, getKillRegState(DstIsKill))
.addReg(ZERO_REGISTER);
// SREG is always implicitly killed
@@ -1446,13 +1432,13 @@ bool AVRExpandPseudo::expand<AVR::LSLWRd>(Block &MBB, BlockIt MBBI) {
// Low part
buildMI(MBB, MBBI, OpLo)
.addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstLoReg)
+ .addReg(DstLoReg, getKillRegState(DstIsKill))
.addReg(DstLoReg, getKillRegState(DstIsKill));
auto MIBHI =
buildMI(MBB, MBBI, OpHi)
.addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstHiReg)
+ .addReg(DstHiReg, getKillRegState(DstIsKill))
.addReg(DstHiReg, getKillRegState(DstIsKill));
if (ImpIsDead)
@@ -1478,7 +1464,7 @@ bool AVRExpandPseudo::expand<AVR::LSLWHiRd>(Block &MBB, BlockIt MBBI) {
// add hireg, hireg <==> lsl hireg
auto MILSL =
buildMI(MBB, MBBI, AVR::ADDRdRr)
- .addReg(DstHiReg, RegState::Define, getDeadRegState(DstIsDead))
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstHiReg, getKillRegState(DstIsKill))
.addReg(DstHiReg, getKillRegState(DstIsKill));
@@ -1502,16 +1488,16 @@ bool AVRExpandPseudo::expandLSLW4Rd(Block &MBB, BlockIt MBBI) {
// swap Rl
buildMI(MBB, MBBI, AVR::SWAPRd)
.addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstHiReg, getKillRegState(DstIsKill));
+ .addReg(DstHiReg, RegState::Kill);
buildMI(MBB, MBBI, AVR::SWAPRd)
.addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstLoReg, getKillRegState(DstIsKill));
+ .addReg(DstLoReg, RegState::Kill);
// andi Rh, 0xf0
auto MI0 =
buildMI(MBB, MBBI, AVR::ANDIRdK)
.addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstHiReg, getKillRegState(DstIsKill))
+ .addReg(DstHiReg, RegState::Kill)
.addImm(0xf0);
// SREG is implicitly dead.
MI0->getOperand(3).setIsDead();
@@ -1520,7 +1506,7 @@ bool AVRExpandPseudo::expandLSLW4Rd(Block &MBB, BlockIt MBBI) {
auto MI1 =
buildMI(MBB, MBBI, AVR::EORRdRr)
.addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstHiReg, getKillRegState(DstIsKill))
+ .addReg(DstHiReg, RegState::Kill)
.addReg(DstLoReg);
// SREG is implicitly dead.
MI1->getOperand(3).setIsDead();
@@ -1591,7 +1577,7 @@ bool AVRExpandPseudo::expandLSLW12Rd(Block &MBB, BlockIt MBBI) {
// swap Rh
buildMI(MBB, MBBI, AVR::SWAPRd)
.addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstHiReg, getKillRegState(DstIsKill));
+ .addReg(DstHiReg, RegState::Kill);
// andi Rh, 0xf0
auto MI0 =
@@ -1700,16 +1686,16 @@ bool AVRExpandPseudo::expandLSRW4Rd(Block &MBB, BlockIt MBBI) {
// swap Rl
buildMI(MBB, MBBI, AVR::SWAPRd)
.addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstHiReg, getKillRegState(DstIsKill));
+ .addReg(DstHiReg, RegState::Kill);
buildMI(MBB, MBBI, AVR::SWAPRd)
.addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstLoReg, getKillRegState(DstIsKill));
+ .addReg(DstLoReg, RegState::Kill);
// andi Rl, 0xf
auto MI0 =
buildMI(MBB, MBBI, AVR::ANDIRdK)
.addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstLoReg, getKillRegState(DstIsKill))
+ .addReg(DstLoReg, RegState::Kill)
.addImm(0xf);
// SREG is implicitly dead.
MI0->getOperand(3).setIsDead();
@@ -1718,7 +1704,7 @@ bool AVRExpandPseudo::expandLSRW4Rd(Block &MBB, BlockIt MBBI) {
auto MI1 =
buildMI(MBB, MBBI, AVR::EORRdRr)
.addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstLoReg, getKillRegState(DstIsKill))
+ .addReg(DstLoReg, RegState::Kill)
.addReg(DstHiReg);
// SREG is implicitly dead.
MI1->getOperand(3).setIsDead();
@@ -1789,7 +1775,7 @@ bool AVRExpandPseudo::expandLSRW12Rd(Block &MBB, BlockIt MBBI) {
// swap Rl
buildMI(MBB, MBBI, AVR::SWAPRd)
.addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstLoReg, getKillRegState(DstIsKill));
+ .addReg(DstLoReg, RegState::Kill);
// andi Rl, 0xf
auto MI0 =
@@ -1897,6 +1883,53 @@ bool AVRExpandPseudo::expand<AVR::ASRWLoRd>(Block &MBB, BlockIt MBBI) {
return true;
}
+bool AVRExpandPseudo::expandASRW7Rd(Block &MBB, BlockIt MBBI) {
+ MachineInstr &MI = *MBBI;
+ Register DstLoReg, DstHiReg;
+ Register DstReg = MI.getOperand(0).getReg();
+ bool DstIsDead = MI.getOperand(0).isDead();
+ bool DstIsKill = MI.getOperand(1).isKill();
+ bool ImpIsDead = MI.getOperand(3).isDead();
+ TRI->splitReg(DstReg, DstLoReg, DstHiReg);
+
+ // lsl r24
+ // mov r24,r25
+ // rol r24
+ // sbc r25,r25
+
+ // lsl r24 <=> add r24, r24
+ buildMI(MBB, MBBI, AVR::ADDRdRr)
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstLoReg, RegState::Kill)
+ .addReg(DstLoReg, RegState::Kill);
+
+ // mov r24, r25
+ buildMI(MBB, MBBI, AVR::MOVRdRr)
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstHiReg);
+
+ // rol r24 <=> adc r24, r24
+ buildMI(MBB, MBBI, AVR::ADCRdRr)
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstLoReg, getKillRegState(DstIsKill))
+ .addReg(DstLoReg, getKillRegState(DstIsKill));
+
+ // sbc r25, r25
+ auto MISBC =
+ buildMI(MBB, MBBI, AVR::SBCRdRr)
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstHiReg, getKillRegState(DstIsKill))
+ .addReg(DstHiReg, getKillRegState(DstIsKill));
+
+ if (ImpIsDead)
+ MISBC->getOperand(3).setIsDead();
+ // SREG is always implicitly killed
+ MISBC->getOperand(4).setIsKill();
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool AVRExpandPseudo::expandASRW8Rd(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
Register DstLoReg, DstHiReg;
@@ -1913,9 +1946,9 @@ bool AVRExpandPseudo::expandASRW8Rd(Block &MBB, BlockIt MBBI) {
// Move the sign bit to the C flag.
buildMI(MBB, MBBI, AVR::ADDRdRr)
- .addReg(DstHiReg, RegState::Define, getDeadRegState(DstIsDead))
- .addReg(DstHiReg, getKillRegState(DstIsKill) | getDeadRegState(DstIsDead))
- .addReg(DstHiReg, getKillRegState(DstIsKill));
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstHiReg, RegState::Kill)
+ .addReg(DstHiReg, RegState::Kill);
// Set upper byte to 0 or -1.
auto MIBHI =
@@ -1923,8 +1956,102 @@ bool AVRExpandPseudo::expandASRW8Rd(Block &MBB, BlockIt MBBI) {
.addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstHiReg, getKillRegState(DstIsKill))
.addReg(DstHiReg, getKillRegState(DstIsKill));
+
if (ImpIsDead)
MIBHI->getOperand(3).setIsDead();
+ // SREG is always implicitly killed
+ MIBHI->getOperand(4).setIsKill();
+
+ MI.eraseFromParent();
+ return true;
+}
+bool AVRExpandPseudo::expandASRW14Rd(Block &MBB, BlockIt MBBI) {
+ MachineInstr &MI = *MBBI;
+ Register DstLoReg, DstHiReg;
+ Register DstReg = MI.getOperand(0).getReg();
+ bool DstIsDead = MI.getOperand(0).isDead();
+ bool DstIsKill = MI.getOperand(1).isKill();
+ bool ImpIsDead = MI.getOperand(3).isDead();
+ TRI->splitReg(DstReg, DstLoReg, DstHiReg);
+
+ // lsl r25
+ // sbc r24, r24
+ // lsl r25
+ // mov r25, r24
+ // rol r24
+
+ // lsl r25 <=> add r25, r25
+ buildMI(MBB, MBBI, AVR::ADDRdRr)
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstHiReg, RegState::Kill)
+ .addReg(DstHiReg, RegState::Kill);
+
+ // sbc r24, r24
+ buildMI(MBB, MBBI, AVR::SBCRdRr)
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstLoReg, RegState::Kill)
+ .addReg(DstLoReg, RegState::Kill);
+
+ // lsl r25 <=> add r25, r25
+ buildMI(MBB, MBBI, AVR::ADDRdRr)
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstHiReg, RegState::Kill)
+ .addReg(DstHiReg, RegState::Kill);
+
+ // mov r25, r24
+ buildMI(MBB, MBBI, AVR::MOVRdRr)
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstLoReg);
+
+ // rol r24 <=> adc r24, r24
+ auto MIROL =
+ buildMI(MBB, MBBI, AVR::ADCRdRr)
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstLoReg, getKillRegState(DstIsKill))
+ .addReg(DstLoReg, getKillRegState(DstIsKill));
+
+ if (ImpIsDead)
+ MIROL->getOperand(3).setIsDead();
+ // SREG is always implicitly killed
+ MIROL->getOperand(4).setIsKill();
+
+ MI.eraseFromParent();
+ return false;
+}
+
+bool AVRExpandPseudo::expandASRW15Rd(Block &MBB, BlockIt MBBI) {
+ MachineInstr &MI = *MBBI;
+ Register DstLoReg, DstHiReg;
+ Register DstReg = MI.getOperand(0).getReg();
+ bool DstIsDead = MI.getOperand(0).isDead();
+ bool ImpIsDead = MI.getOperand(3).isDead();
+ TRI->splitReg(DstReg, DstLoReg, DstHiReg);
+
+ // lsl r25
+ // sbc r25, r25
+ // mov r24, r25
+
+ // lsl r25 <=> add r25, r25
+ buildMI(MBB, MBBI, AVR::ADDRdRr)
+ .addReg(DstHiReg, RegState::Define)
+ .addReg(DstHiReg, RegState::Kill)
+ .addReg(DstHiReg, RegState::Kill);
+
+ // sbc r25, r25
+ auto MISBC =
+ buildMI(MBB, MBBI, AVR::SBCRdRr)
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstHiReg, RegState::Kill)
+ .addReg(DstHiReg, RegState::Kill);
+ if (ImpIsDead)
+ MISBC->getOperand(3).setIsDead();
+ // SREG is always implicitly killed
+ MISBC->getOperand(4).setIsKill();
+
+ // mov r24, r25
+ buildMI(MBB, MBBI, AVR::MOVRdRr)
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstHiReg);
MI.eraseFromParent();
return true;
@@ -1935,8 +2062,14 @@ bool AVRExpandPseudo::expand<AVR::ASRWNRd>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
unsigned Imm = MI.getOperand(2).getImm();
switch (Imm) {
+ case 7:
+ return expandASRW7Rd(MBB, MBBI);
case 8:
return expandASRW8Rd(MBB, MBBI);
+ case 14:
+ return expandASRW14Rd(MBB, MBBI);
+ case 15:
+ return expandASRW15Rd(MBB, MBBI);
default:
llvm_unreachable("unimplemented asrwn");
return false;
@@ -1956,14 +2089,14 @@ bool AVRExpandPseudo::expandLSLB7Rd(Block &MBB, BlockIt MBBI) {
buildMI(MBB, MBBI, AVR::RORRd)
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstReg, getKillRegState(DstIsKill))
+ .addReg(DstReg, RegState::Kill)
->getOperand(3)
.setIsUndef(true);
buildMI(MBB, MBBI, AVR::EORRdRr)
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstReg, getKillRegState(DstIsKill))
- .addReg(DstReg, getKillRegState(DstIsKill));
+ .addReg(DstReg, RegState::Kill)
+ .addReg(DstReg, RegState::Kill);
auto MIRRC =
buildMI(MBB, MBBI, AVR::RORRd)
@@ -2006,15 +2139,15 @@ bool AVRExpandPseudo::expandLSRB7Rd(Block &MBB, BlockIt MBBI) {
buildMI(MBB, MBBI, AVR::ADCRdRr)
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstReg, getKillRegState(DstIsKill))
- .addReg(DstReg, getKillRegState(DstIsKill))
+ .addReg(DstReg, RegState::Kill)
+ .addReg(DstReg, RegState::Kill)
->getOperand(4)
.setIsUndef(true);
buildMI(MBB, MBBI, AVR::EORRdRr)
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstReg, getKillRegState(DstIsKill))
- .addReg(DstReg, getKillRegState(DstIsKill));
+ .addReg(DstReg, RegState::Kill)
+ .addReg(DstReg, RegState::Kill);
auto MIRRC =
buildMI(MBB, MBBI, AVR::ADCRdRr)
@@ -2064,13 +2197,13 @@ bool AVRExpandPseudo::expandASRB6Rd(Block &MBB, BlockIt MBBI) {
buildMI(MBB, MBBI, AVR::ADDRdRr) // LSL Rd <==> ADD Rd, Rd
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstReg, getKillRegState(DstIsKill))
- .addReg(DstReg, getKillRegState(DstIsKill));
+ .addReg(DstReg, RegState::Kill)
+ .addReg(DstReg, RegState::Kill);
buildMI(MBB, MBBI, AVR::SBCRdRr)
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstReg, getKillRegState(DstIsKill))
- .addReg(DstReg, getKillRegState(DstIsKill));
+ .addReg(DstReg, RegState::Kill)
+ .addReg(DstReg, RegState::Kill);
buildMI(MBB, MBBI, AVR::BLD)
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
@@ -2095,8 +2228,8 @@ bool AVRExpandPseudo::expandASRB7Rd(Block &MBB, BlockIt MBBI) {
buildMI(MBB, MBBI, AVR::ADDRdRr)
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstReg, getKillRegState(DstIsKill))
- .addReg(DstReg, getKillRegState(DstIsKill));
+ .addReg(DstReg, RegState::Kill)
+ .addReg(DstReg, RegState::Kill);
auto MIRRC =
buildMI(MBB, MBBI, AVR::SBCRdRr)
@@ -2152,26 +2285,22 @@ template <> bool AVRExpandPseudo::expand<AVR::SEXT>(Block &MBB, BlockIt MBBI) {
bool ImpIsDead = MI.getOperand(2).isDead();
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
- if (SrcReg != DstLoReg) {
- auto MOV =
- buildMI(MBB, MBBI, AVR::MOVRdRr)
- .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(SrcReg);
-
- if (SrcReg == DstHiReg) {
- MOV->getOperand(1).setIsKill();
- }
- }
+ if (SrcReg != DstLoReg)
+ buildMI(MBB, MBBI, AVR::MOVRdRr)
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(SrcReg);
if (SrcReg != DstHiReg) {
- buildMI(MBB, MBBI, AVR::MOVRdRr)
- .addReg(DstHiReg, RegState::Define)
- .addReg(SrcReg, getKillRegState(SrcIsKill));
+ auto MOV = buildMI(MBB, MBBI, AVR::MOVRdRr)
+ .addReg(DstHiReg, RegState::Define)
+ .addReg(SrcReg);
+ if (SrcReg != DstLoReg && SrcIsKill)
+ MOV->getOperand(1).setIsKill();
}
buildMI(MBB, MBBI, AVR::ADDRdRr) // LSL Rd <==> ADD Rd, Rr
.addReg(DstHiReg, RegState::Define)
- .addReg(DstHiReg)
+ .addReg(DstHiReg, RegState::Kill)
.addReg(DstHiReg, RegState::Kill);
auto SBC =
@@ -2256,6 +2385,7 @@ bool AVRExpandPseudo::expand<AVR::SPREAD>(Block &MBB, BlockIt MBBI) {
template <>
bool AVRExpandPseudo::expand<AVR::SPWRITE>(Block &MBB, BlockIt MBBI) {
+ const AVRSubtarget &STI = MBB.getParent()->getSubtarget<AVRSubtarget>();
MachineInstr &MI = *MBBI;
Register SrcLoReg, SrcHiReg;
Register SrcReg = MI.getOperand(1).getReg();
@@ -2265,7 +2395,7 @@ bool AVRExpandPseudo::expand<AVR::SPWRITE>(Block &MBB, BlockIt MBBI) {
buildMI(MBB, MBBI, AVR::INRdA)
.addReg(AVR::R0, RegState::Define)
- .addImm(SREG_ADDR)
+ .addImm(STI.getIORegSREG())
.setMIFlags(Flags);
buildMI(MBB, MBBI, AVR::BCLRs).addImm(0x07).setMIFlags(Flags);
@@ -2276,7 +2406,7 @@ bool AVRExpandPseudo::expand<AVR::SPWRITE>(Block &MBB, BlockIt MBBI) {
.setMIFlags(Flags);
buildMI(MBB, MBBI, AVR::OUTARr)
- .addImm(SREG_ADDR)
+ .addImm(STI.getIORegSREG())
.addReg(AVR::R0, RegState::Kill)
.setMIFlags(Flags);
@@ -2330,22 +2460,14 @@ bool AVRExpandPseudo::expandMI(Block &MBB, BlockIt MBBI) {
EXPAND(AVR::AtomicLoad16);
EXPAND(AVR::AtomicStore8);
EXPAND(AVR::AtomicStore16);
- EXPAND(AVR::AtomicLoadAdd8);
- EXPAND(AVR::AtomicLoadAdd16);
- EXPAND(AVR::AtomicLoadSub8);
- EXPAND(AVR::AtomicLoadSub16);
- EXPAND(AVR::AtomicLoadAnd8);
- EXPAND(AVR::AtomicLoadAnd16);
- EXPAND(AVR::AtomicLoadOr8);
- EXPAND(AVR::AtomicLoadOr16);
- EXPAND(AVR::AtomicLoadXor8);
- EXPAND(AVR::AtomicLoadXor16);
EXPAND(AVR::AtomicFence);
EXPAND(AVR::STSWKRr);
EXPAND(AVR::STWPtrRr);
EXPAND(AVR::STWPtrPiRr);
EXPAND(AVR::STWPtrPdRr);
EXPAND(AVR::STDWPtrQRr);
+ EXPAND(AVR::STDSPQRr);
+ EXPAND(AVR::STDWSPQRr);
EXPAND(AVR::INWRdA);
EXPAND(AVR::OUTWARr);
EXPAND(AVR::PUSHWRr);
diff --git a/llvm/lib/Target/AVR/AVRFrameLowering.cpp b/llvm/lib/Target/AVR/AVRFrameLowering.cpp
index b3bc9ede205e..ec8b74e435ce 100644
--- a/llvm/lib/Target/AVR/AVRFrameLowering.cpp
+++ b/llvm/lib/Target/AVR/AVRFrameLowering.cpp
@@ -73,7 +73,7 @@ void AVRFrameLowering::emitPrologue(MachineFunction &MF,
.setMIFlag(MachineInstr::FrameSetup);
BuildMI(MBB, MBBI, DL, TII.get(AVR::INRdA), AVR::R0)
- .addImm(0x3f)
+ .addImm(STI.getIORegSREG())
.setMIFlag(MachineInstr::FrameSetup);
BuildMI(MBB, MBBI, DL, TII.get(AVR::PUSHRr))
.addReg(AVR::R0, RegState::Kill)
@@ -144,7 +144,7 @@ static void restoreStatusRegister(MachineFunction &MF, MachineBasicBlock &MBB) {
if (AFI->isInterruptOrSignalHandler()) {
BuildMI(MBB, MBBI, DL, TII.get(AVR::POPRd), AVR::R0);
BuildMI(MBB, MBBI, DL, TII.get(AVR::OUTARr))
- .addImm(0x3f)
+ .addImm(STI.getIORegSREG())
.addReg(AVR::R0, RegState::Kill);
BuildMI(MBB, MBBI, DL, TII.get(AVR::POPWRd), AVR::R1R0);
}
@@ -201,8 +201,8 @@ void AVRFrameLowering::emitEpilogue(MachineFunction &MF,
// Restore the frame pointer by doing FP += <size>.
MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opcode), AVR::R29R28)
- .addReg(AVR::R29R28, RegState::Kill)
- .addImm(FrameSize);
+ .addReg(AVR::R29R28, RegState::Kill)
+ .addImm(FrameSize);
// The SREG implicit def is dead.
MI->getOperand(3).setIsDead();
}
@@ -298,11 +298,11 @@ bool AVRFrameLowering::restoreCalleeSavedRegisters(
/// Replace pseudo store instructions that pass arguments through the stack with
/// real instructions.
static void fixStackStores(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const TargetInstrInfo &TII, Register FP) {
+ MachineBasicBlock::iterator StartMI,
+ const TargetInstrInfo &TII) {
// Iterate through the BB until we hit a call instruction or we reach the end.
for (MachineInstr &MI :
- llvm::make_early_inc_range(llvm::make_range(MI, MBB.end()))) {
+ llvm::make_early_inc_range(llvm::make_range(StartMI, MBB.end()))) {
if (MI.isCall())
break;
@@ -313,7 +313,7 @@ static void fixStackStores(MachineBasicBlock &MBB,
continue;
assert(MI.getOperand(0).getReg() == AVR::SP &&
- "Invalid register, should be SP!");
+ "SP is expected as base pointer");
// Replace this instruction with a regular store. Use Y as the base
// pointer since it is guaranteed to contain a copy of SP.
@@ -321,7 +321,7 @@ static void fixStackStores(MachineBasicBlock &MBB,
(Opcode == AVR::STDWSPQRr) ? AVR::STDWPtrQRr : AVR::STDPtrQRr;
MI.setDesc(TII.get(STOpc));
- MI.getOperand(0).setReg(FP);
+ MI.getOperand(0).setReg(AVR::R31R30);
}
}
@@ -331,11 +331,7 @@ MachineBasicBlock::iterator AVRFrameLowering::eliminateCallFramePseudoInstr(
const AVRSubtarget &STI = MF.getSubtarget<AVRSubtarget>();
const AVRInstrInfo &TII = *STI.getInstrInfo();
- // There is nothing to insert when the call frame memory is allocated during
- // function entry. Delete the call frame pseudo and replace all pseudo stores
- // with real store instructions.
if (hasReservedCallFrame(MF)) {
- fixStackStores(MBB, MI, TII, AVR::R29R28);
return MBB.erase(MI);
}
@@ -343,57 +339,58 @@ MachineBasicBlock::iterator AVRFrameLowering::eliminateCallFramePseudoInstr(
unsigned int Opcode = MI->getOpcode();
int Amount = TII.getFrameSize(*MI);
- // ADJCALLSTACKUP and ADJCALLSTACKDOWN are converted to adiw/subi
- // instructions to read and write the stack pointer in I/O space.
- if (Amount != 0) {
- assert(getStackAlign() == Align(1) && "Unsupported stack alignment");
-
- if (Opcode == TII.getCallFrameSetupOpcode()) {
- // Update the stack pointer.
- // In many cases this can be done far more efficiently by pushing the
- // relevant values directly to the stack. However, doing that correctly
- // (in the right order, possibly skipping some empty space for undef
- // values, etc) is tricky and thus left to be optimized in the future.
- BuildMI(MBB, MI, DL, TII.get(AVR::SPREAD), AVR::R31R30).addReg(AVR::SP);
-
- MachineInstr *New =
- BuildMI(MBB, MI, DL, TII.get(AVR::SUBIWRdK), AVR::R31R30)
- .addReg(AVR::R31R30, RegState::Kill)
- .addImm(Amount);
- New->getOperand(3).setIsDead();
-
- BuildMI(MBB, MI, DL, TII.get(AVR::SPWRITE), AVR::SP).addReg(AVR::R31R30);
-
- // Make sure the remaining stack stores are converted to real store
- // instructions.
- fixStackStores(MBB, MI, TII, AVR::R31R30);
- } else {
- assert(Opcode == TII.getCallFrameDestroyOpcode());
-
- // Note that small stack changes could be implemented more efficiently
- // with a few pop instructions instead of the 8-9 instructions now
- // required.
-
- // Select the best opcode to adjust SP based on the offset size.
- unsigned addOpcode;
- if (isUInt<6>(Amount)) {
- addOpcode = AVR::ADIWRdK;
- } else {
- addOpcode = AVR::SUBIWRdK;
- Amount = -Amount;
- }
+ if (Amount == 0) {
+ return MBB.erase(MI);
+ }
+
+ assert(getStackAlign() == Align(1) && "Unsupported stack alignment");
+
+ if (Opcode == TII.getCallFrameSetupOpcode()) {
+ // Update the stack pointer.
+ // In many cases this can be done far more efficiently by pushing the
+ // relevant values directly to the stack. However, doing that correctly
+ // (in the right order, possibly skipping some empty space for undef
+ // values, etc) is tricky and thus left to be optimized in the future.
+ BuildMI(MBB, MI, DL, TII.get(AVR::SPREAD), AVR::R31R30).addReg(AVR::SP);
+
+ MachineInstr *New =
+ BuildMI(MBB, MI, DL, TII.get(AVR::SUBIWRdK), AVR::R31R30)
+ .addReg(AVR::R31R30, RegState::Kill)
+ .addImm(Amount);
+ New->getOperand(3).setIsDead();
- // Build the instruction sequence.
- BuildMI(MBB, MI, DL, TII.get(AVR::SPREAD), AVR::R31R30).addReg(AVR::SP);
+ BuildMI(MBB, MI, DL, TII.get(AVR::SPWRITE), AVR::SP).addReg(AVR::R31R30);
- MachineInstr *New = BuildMI(MBB, MI, DL, TII.get(addOpcode), AVR::R31R30)
- .addReg(AVR::R31R30, RegState::Kill)
- .addImm(Amount);
- New->getOperand(3).setIsDead();
+ // Make sure the remaining stack stores are converted to real store
+ // instructions.
+ fixStackStores(MBB, MI, TII);
+ } else {
+ assert(Opcode == TII.getCallFrameDestroyOpcode());
- BuildMI(MBB, MI, DL, TII.get(AVR::SPWRITE), AVR::SP)
- .addReg(AVR::R31R30, RegState::Kill);
+ // Note that small stack changes could be implemented more efficiently
+ // with a few pop instructions instead of the 8-9 instructions now
+ // required.
+
+ // Select the best opcode to adjust SP based on the offset size.
+ unsigned AddOpcode;
+
+ if (isUInt<6>(Amount)) {
+ AddOpcode = AVR::ADIWRdK;
+ } else {
+ AddOpcode = AVR::SUBIWRdK;
+ Amount = -Amount;
}
+
+ // Build the instruction sequence.
+ BuildMI(MBB, MI, DL, TII.get(AVR::SPREAD), AVR::R31R30).addReg(AVR::SP);
+
+ MachineInstr *New = BuildMI(MBB, MI, DL, TII.get(AddOpcode), AVR::R31R30)
+ .addReg(AVR::R31R30, RegState::Kill)
+ .addImm(Amount);
+ New->getOperand(3).setIsDead();
+
+ BuildMI(MBB, MI, DL, TII.get(AVR::SPWRITE), AVR::SP)
+ .addReg(AVR::R31R30, RegState::Kill);
}
return MBB.erase(MI);
@@ -420,7 +417,7 @@ struct AVRFrameAnalyzer : public MachineFunctionPass {
bool runOnMachineFunction(MachineFunction &MF) override {
const MachineFrameInfo &MFI = MF.getFrameInfo();
- AVRMachineFunctionInfo *FuncInfo = MF.getInfo<AVRMachineFunctionInfo>();
+ AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>();
// If there are no fixed frame indexes during this stage it means there
// are allocas present in the function.
@@ -431,7 +428,7 @@ struct AVRFrameAnalyzer : public MachineFunctionPass {
for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
// Variable sized objects have size 0.
if (MFI.getObjectSize(i)) {
- FuncInfo->setHasAllocas(true);
+ AFI->setHasAllocas(true);
break;
}
}
@@ -460,7 +457,7 @@ struct AVRFrameAnalyzer : public MachineFunctionPass {
}
if (MFI.isFixedObjectIndex(MO.getIndex())) {
- FuncInfo->setHasStackArgs(true);
+ AFI->setHasStackArgs(true);
return false;
}
}
diff --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp
index a58fedf6cd36..7a1e7b1535a7 100644
--- a/llvm/lib/Target/AVR/AVRISelLowering.cpp
+++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp
@@ -13,6 +13,7 @@
#include "AVRISelLowering.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/CodeGen/CallingConvLower.h"
@@ -269,8 +270,6 @@ EVT AVRTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
}
SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const {
- //: TODO: this function has to be completely rewritten to produce optimal
- // code, for now it's producing very long but correct code.
unsigned Opc8;
const SDNode *N = Op.getNode();
EVT VT = Op.getValueType();
@@ -371,6 +370,27 @@ SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const {
ShiftAmount = 0;
}
} else if (VT.getSizeInBits() == 16) {
+ if (Op.getOpcode() == ISD::SRA)
+ // Special optimization for int16 arithmetic right shift.
+ switch (ShiftAmount) {
+ case 15:
+ Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim,
+ DAG.getConstant(15, dl, VT));
+ ShiftAmount = 0;
+ break;
+ case 14:
+ Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim,
+ DAG.getConstant(14, dl, VT));
+ ShiftAmount = 0;
+ break;
+ case 7:
+ Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim,
+ DAG.getConstant(7, dl, VT));
+ ShiftAmount = 0;
+ break;
+ default:
+ break;
+ }
if (4 <= ShiftAmount && ShiftAmount < 8)
switch (Op.getOpcode()) {
case ISD::SHL:
@@ -1023,17 +1043,24 @@ bool AVRTargetLowering::isOffsetFoldingLegal(
/// Registers for calling conventions, ordered in reverse as required by ABI.
/// Both arrays must be of the same length.
-static const MCPhysReg RegList8[] = {
+static const MCPhysReg RegList8AVR[] = {
AVR::R25, AVR::R24, AVR::R23, AVR::R22, AVR::R21, AVR::R20,
AVR::R19, AVR::R18, AVR::R17, AVR::R16, AVR::R15, AVR::R14,
AVR::R13, AVR::R12, AVR::R11, AVR::R10, AVR::R9, AVR::R8};
-static const MCPhysReg RegList16[] = {
+static const MCPhysReg RegList8Tiny[] = {AVR::R25, AVR::R24, AVR::R23,
+ AVR::R22, AVR::R21, AVR::R20};
+static const MCPhysReg RegList16AVR[] = {
AVR::R26R25, AVR::R25R24, AVR::R24R23, AVR::R23R22, AVR::R22R21,
AVR::R21R20, AVR::R20R19, AVR::R19R18, AVR::R18R17, AVR::R17R16,
AVR::R16R15, AVR::R15R14, AVR::R14R13, AVR::R13R12, AVR::R12R11,
AVR::R11R10, AVR::R10R9, AVR::R9R8};
+static const MCPhysReg RegList16Tiny[] = {AVR::R26R25, AVR::R25R24,
+ AVR::R24R23, AVR::R23R22,
+ AVR::R22R21, AVR::R21R20};
-static_assert(array_lengthof(RegList8) == array_lengthof(RegList16),
+static_assert(array_lengthof(RegList8AVR) == array_lengthof(RegList16AVR),
+ "8-bit and 16-bit register arrays must be of equal length");
+static_assert(array_lengthof(RegList8Tiny) == array_lengthof(RegList16Tiny),
"8-bit and 16-bit register arrays must be of equal length");
/// Analyze incoming and outgoing function arguments. We need custom C++ code
@@ -1041,10 +1068,22 @@ static_assert(array_lengthof(RegList8) == array_lengthof(RegList16),
/// In addition, all pieces of a certain argument have to be passed either
/// using registers or the stack but never mixing both.
template <typename ArgT>
-static void
-analyzeArguments(TargetLowering::CallLoweringInfo *CLI, const Function *F,
- const DataLayout *TD, const SmallVectorImpl<ArgT> &Args,
- SmallVectorImpl<CCValAssign> &ArgLocs, CCState &CCInfo) {
+static void analyzeArguments(TargetLowering::CallLoweringInfo *CLI,
+ const Function *F, const DataLayout *TD,
+ const SmallVectorImpl<ArgT> &Args,
+ SmallVectorImpl<CCValAssign> &ArgLocs,
+ CCState &CCInfo, bool Tiny) {
+ // Choose the proper register list for argument passing according to the ABI.
+ ArrayRef<MCPhysReg> RegList8;
+ ArrayRef<MCPhysReg> RegList16;
+ if (Tiny) {
+ RegList8 = makeArrayRef(RegList8Tiny, array_lengthof(RegList8Tiny));
+ RegList16 = makeArrayRef(RegList16Tiny, array_lengthof(RegList16Tiny));
+ } else {
+ RegList8 = makeArrayRef(RegList8AVR, array_lengthof(RegList8AVR));
+ RegList16 = makeArrayRef(RegList16AVR, array_lengthof(RegList16AVR));
+ }
+
unsigned NumArgs = Args.size();
// This is the index of the last used register, in RegList*.
// -1 means R26 (R26 is never actually used in CC).
@@ -1074,7 +1113,7 @@ analyzeArguments(TargetLowering::CallLoweringInfo *CLI, const Function *F,
unsigned RegIdx = RegLastIdx + TotalBytes;
RegLastIdx = RegIdx;
// If there are not enough registers, use the stack
- if (RegIdx >= array_lengthof(RegList8)) {
+ if (RegIdx >= RegList8.size()) {
UseStack = true;
}
for (; i != j; ++i) {
@@ -1123,13 +1162,24 @@ getTotalArgumentsSizeInBytes(const SmallVectorImpl<ArgT> &Args) {
/// one value, possibly an aggregate, and it is limited to 8 bytes.
template <typename ArgT>
static void analyzeReturnValues(const SmallVectorImpl<ArgT> &Args,
- CCState &CCInfo) {
+ CCState &CCInfo, bool Tiny) {
unsigned NumArgs = Args.size();
unsigned TotalBytes = getTotalArgumentsSizeInBytes(Args);
// CanLowerReturn() guarantees this assertion.
assert(TotalBytes <= 8 &&
"return values greater than 8 bytes cannot be lowered");
+ // Choose the proper register list for argument passing according to the ABI.
+ ArrayRef<MCPhysReg> RegList8;
+ ArrayRef<MCPhysReg> RegList16;
+ if (Tiny) {
+ RegList8 = makeArrayRef(RegList8Tiny, array_lengthof(RegList8Tiny));
+ RegList16 = makeArrayRef(RegList16Tiny, array_lengthof(RegList16Tiny));
+ } else {
+ RegList8 = makeArrayRef(RegList8AVR, array_lengthof(RegList8AVR));
+ RegList16 = makeArrayRef(RegList16AVR, array_lengthof(RegList16AVR));
+ }
+
// GCC-ABI says that the size is rounded up to the next even number,
// but actually once it is more than 4 it will always round up to 8.
if (TotalBytes > 4) {
@@ -1174,7 +1224,8 @@ SDValue AVRTargetLowering::LowerFormalArguments(
if (isVarArg) {
CCInfo.AnalyzeFormalArguments(Ins, ArgCC_AVR_Vararg);
} else {
- analyzeArguments(nullptr, &MF.getFunction(), &DL, Ins, ArgLocs, CCInfo);
+ analyzeArguments(nullptr, &MF.getFunction(), &DL, Ins, ArgLocs, CCInfo,
+ Subtarget.hasTinyEncoding());
}
SDValue ArgValue;
@@ -1285,8 +1336,8 @@ SDValue AVRTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
const Function *F = nullptr;
if (const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
const GlobalValue *GV = G->getGlobal();
-
- F = cast<Function>(GV);
+ if (isa<Function>(GV))
+ F = cast<Function>(GV);
Callee =
DAG.getTargetGlobalAddress(GV, DL, getPointerTy(DAG.getDataLayout()));
} else if (const ExternalSymbolSDNode *ES =
@@ -1299,7 +1350,8 @@ SDValue AVRTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (isVarArg) {
CCInfo.AnalyzeCallOperands(Outs, ArgCC_AVR_Vararg);
} else {
- analyzeArguments(&CLI, F, &DAG.getDataLayout(), Outs, ArgLocs, CCInfo);
+ analyzeArguments(&CLI, F, &DAG.getDataLayout(), Outs, ArgLocs, CCInfo,
+ Subtarget.hasTinyEncoding());
}
// Get a count of how many bytes are to be pushed on the stack.
@@ -1444,7 +1496,7 @@ SDValue AVRTargetLowering::LowerCallResult(
if (CallConv == CallingConv::AVR_BUILTIN) {
CCInfo.AnalyzeCallResult(Ins, RetCC_AVR_BUILTIN);
} else {
- analyzeReturnValues(Ins, CCInfo);
+ analyzeReturnValues(Ins, CCInfo, Subtarget.hasTinyEncoding());
}
// Copy all of the result registers out of their specified physreg.
@@ -1495,7 +1547,7 @@ AVRTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
if (CallConv == CallingConv::AVR_BUILTIN) {
CCInfo.AnalyzeReturn(Outs, RetCC_AVR_BUILTIN);
} else {
- analyzeReturnValues(Outs, CCInfo);
+ analyzeReturnValues(Outs, CCInfo, Subtarget.hasTinyEncoding());
}
SDValue Flag;
@@ -1707,6 +1759,60 @@ AVRTargetLowering::insertCopyR1(MachineInstr &MI, MachineBasicBlock *BB) const {
return BB;
}
+// Lower atomicrmw operation to disable interrupts, do operation, and restore
+// interrupts. This works because all AVR microcontrollers are single core.
+MachineBasicBlock *AVRTargetLowering::insertAtomicArithmeticOp(
+ MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode, int Width) const {
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ MachineBasicBlock::iterator I(MI);
+ const Register SCRATCH_REGISTER = AVR::R0;
+ DebugLoc dl = MI.getDebugLoc();
+
+ // Example instruction sequence, for an atomic 8-bit add:
+ // ldi r25, 5
+ // in r0, SREG
+ // cli
+ // ld r24, X
+ // add r25, r24
+ // st X, r25
+ // out SREG, r0
+
+ const TargetRegisterClass *RC =
+ (Width == 8) ? &AVR::GPR8RegClass : &AVR::DREGSRegClass;
+ unsigned LoadOpcode = (Width == 8) ? AVR::LDRdPtr : AVR::LDWRdPtr;
+ unsigned StoreOpcode = (Width == 8) ? AVR::STPtrRr : AVR::STWPtrRr;
+
+ // Disable interrupts.
+ BuildMI(*BB, I, dl, TII.get(AVR::INRdA), SCRATCH_REGISTER)
+ .addImm(Subtarget.getIORegSREG());
+ BuildMI(*BB, I, dl, TII.get(AVR::BCLRs)).addImm(7);
+
+ // Load the original value.
+ BuildMI(*BB, I, dl, TII.get(LoadOpcode), MI.getOperand(0).getReg())
+ .add(MI.getOperand(1));
+
+ // Do the arithmetic operation.
+ Register Result = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, I, dl, TII.get(Opcode), Result)
+ .addReg(MI.getOperand(0).getReg())
+ .add(MI.getOperand(2));
+
+ // Store the result.
+ BuildMI(*BB, I, dl, TII.get(StoreOpcode))
+ .add(MI.getOperand(1))
+ .addReg(Result);
+
+ // Restore interrupts.
+ BuildMI(*BB, I, dl, TII.get(AVR::OUTARr))
+ .addImm(Subtarget.getIORegSREG())
+ .addReg(SCRATCH_REGISTER);
+
+ // Remove the pseudo instruction.
+ MI.eraseFromParent();
+ return BB;
+}
+
MachineBasicBlock *
AVRTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *MBB) const {
@@ -1731,6 +1837,26 @@ AVRTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
return insertMul(MI, MBB);
case AVR::CopyR1:
return insertCopyR1(MI, MBB);
+ case AVR::AtomicLoadAdd8:
+ return insertAtomicArithmeticOp(MI, MBB, AVR::ADDRdRr, 8);
+ case AVR::AtomicLoadAdd16:
+ return insertAtomicArithmeticOp(MI, MBB, AVR::ADDWRdRr, 16);
+ case AVR::AtomicLoadSub8:
+ return insertAtomicArithmeticOp(MI, MBB, AVR::SUBRdRr, 8);
+ case AVR::AtomicLoadSub16:
+ return insertAtomicArithmeticOp(MI, MBB, AVR::SUBWRdRr, 16);
+ case AVR::AtomicLoadAnd8:
+ return insertAtomicArithmeticOp(MI, MBB, AVR::ANDRdRr, 8);
+ case AVR::AtomicLoadAnd16:
+ return insertAtomicArithmeticOp(MI, MBB, AVR::ANDWRdRr, 16);
+ case AVR::AtomicLoadOr8:
+ return insertAtomicArithmeticOp(MI, MBB, AVR::ORRdRr, 8);
+ case AVR::AtomicLoadOr16:
+ return insertAtomicArithmeticOp(MI, MBB, AVR::ORWRdRr, 16);
+ case AVR::AtomicLoadXor8:
+ return insertAtomicArithmeticOp(MI, MBB, AVR::EORRdRr, 8);
+ case AVR::AtomicLoadXor16:
+ return insertAtomicArithmeticOp(MI, MBB, AVR::EORWRdRr, 16);
}
assert((Opc == AVR::Select16 || Opc == AVR::Select8) &&
diff --git a/llvm/lib/Target/AVR/AVRISelLowering.h b/llvm/lib/Target/AVR/AVRISelLowering.h
index 116417b61566..c5c937c983ed 100644
--- a/llvm/lib/Target/AVR/AVRISelLowering.h
+++ b/llvm/lib/Target/AVR/AVRISelLowering.h
@@ -189,6 +189,9 @@ private:
MachineBasicBlock *insertMul(MachineInstr &MI, MachineBasicBlock *BB) const;
MachineBasicBlock *insertCopyR1(MachineInstr &MI,
MachineBasicBlock *BB) const;
+ MachineBasicBlock *insertAtomicArithmeticOp(MachineInstr &MI,
+ MachineBasicBlock *BB,
+ unsigned Opcode, int Width) const;
};
} // end namespace llvm
diff --git a/llvm/lib/Target/AVR/AVRInstrFormats.td b/llvm/lib/Target/AVR/AVRInstrFormats.td
index 2bcbcdfbf925..83c32c80dfb9 100644
--- a/llvm/lib/Target/AVR/AVRInstrFormats.td
+++ b/llvm/lib/Target/AVR/AVRInstrFormats.td
@@ -179,7 +179,8 @@ class FSTDLDD<bit type, dag outs, dag ins, string asmstr, list<dag> pattern>
// r = src/dst register
//
// Note that the bit labelled 'i' above does not follow a simple pattern,
-// so there exists a post encoder method to set it manually.
+// so there exists a post encoder method to set it manually. Also a specified
+// decoder method is needed.
//===---------------------------------------------------------------------===//
class FSTLD<bit type, bits<2> mode, dag outs, dag ins, string asmstr,
list<dag> pattern> : AVRInst16<outs, ins, asmstr, pattern> {
@@ -200,6 +201,7 @@ class FSTLD<bit type, bits<2> mode, dag outs, dag ins, string asmstr,
let Inst{3 - 2} = ptrreg{1 - 0};
let Inst{1 - 0} = mode{1 - 0};
+ let DecoderMethod = "decodeLoadStore";
let PostEncoderMethod = "loadStorePostEncoder";
}
diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.cpp b/llvm/lib/Target/AVR/AVRInstrInfo.cpp
index ac52c47f93d5..510000f231fa 100644
--- a/llvm/lib/Target/AVR/AVRInstrInfo.cpp
+++ b/llvm/lib/Target/AVR/AVRInstrInfo.cpp
@@ -46,8 +46,9 @@ void AVRInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
const AVRRegisterInfo &TRI = *STI.getRegisterInfo();
unsigned Opc;
- // Not all AVR devices support the 16-bit `MOVW` instruction.
if (AVR::DREGSRegClass.contains(DestReg, SrcReg)) {
+ // If our AVR has `movw`, let's emit that; otherwise let's emit two separate
+ // `mov`s.
if (STI.hasMOVW() && AVR::DREGSMOVWRegClass.contains(DestReg, SrcReg)) {
BuildMI(MBB, MI, DL, get(AVR::MOVWRdRr), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
@@ -57,11 +58,17 @@ void AVRInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
TRI.splitReg(DestReg, DestLo, DestHi);
TRI.splitReg(SrcReg, SrcLo, SrcHi);
- // Copy each individual register with the `MOV` instruction.
- BuildMI(MBB, MI, DL, get(AVR::MOVRdRr), DestLo)
- .addReg(SrcLo, getKillRegState(KillSrc));
- BuildMI(MBB, MI, DL, get(AVR::MOVRdRr), DestHi)
- .addReg(SrcHi, getKillRegState(KillSrc));
+ if (DestLo == SrcHi) {
+ BuildMI(MBB, MI, DL, get(AVR::MOVRdRr), DestHi)
+ .addReg(SrcHi, getKillRegState(KillSrc));
+ BuildMI(MBB, MI, DL, get(AVR::MOVRdRr), DestLo)
+ .addReg(SrcLo, getKillRegState(KillSrc));
+ } else {
+ BuildMI(MBB, MI, DL, get(AVR::MOVRdRr), DestLo)
+ .addReg(SrcLo, getKillRegState(KillSrc));
+ BuildMI(MBB, MI, DL, get(AVR::MOVRdRr), DestHi)
+ .addReg(SrcHi, getKillRegState(KillSrc));
+ }
}
} else {
if (AVR::GPR8RegClass.contains(DestReg, SrcReg)) {
@@ -299,9 +306,7 @@ bool AVRInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
}
// If the block has any instructions after a JMP, delete them.
- while (std::next(I) != MBB.end()) {
- std::next(I)->eraseFromParent();
- }
+ MBB.erase(std::next(I), MBB.end());
Cond.clear();
FBB = nullptr;
diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.td b/llvm/lib/Target/AVR/AVRInstrInfo.td
index 2b96dc0b833a..f20ba5edf208 100644
--- a/llvm/lib/Target/AVR/AVRInstrInfo.td
+++ b/llvm/lib/Target/AVR/AVRInstrInfo.td
@@ -177,12 +177,16 @@ def memri : Operand<iPTR> {
let PrintMethod = "printMemri";
let EncoderMethod = "encodeMemri";
+ let DecoderMethod = "decodeMemri";
let ParserMatchClass = MemriAsmOperand;
}
// Address operand for `SP+imm` used by STD{W}SPQRr
-def memspi : Operand<iPTR> { let MIOperandInfo = (ops GPRSP, i16imm); }
+def memspi : Operand<iPTR> {
+ let MIOperandInfo = (ops GPRSP, i16imm);
+ let PrintMethod = "printMemspi";
+}
def relbrtarget_7 : Operand<OtherVT> {
let PrintMethod = "printPCRelImm";
@@ -194,6 +198,11 @@ def brtarget_13 : Operand<OtherVT> {
let EncoderMethod = "encodeRelCondBrTarget<AVR::fixup_13_pcrel>";
}
+def rcalltarget_13 : Operand<i16> {
+ let PrintMethod = "printPCRelImm";
+ let EncoderMethod = "encodeRelCondBrTarget<AVR::fixup_13_pcrel>";
+}
+
// The target of a 22 or 16-bit call/jmp instruction.
def call_target : Operand<iPTR> {
let EncoderMethod = "encodeCallTarget";
@@ -965,10 +974,8 @@ let isBarrier = 1, isBranch = 1, isTerminator = 1 in {
let isCall = 1 in {
// SP is marked as a use to prevent stack-pointer assignments that appear
// immediately before calls from potentially appearing dead.
- let Uses = [SP] in def RCALLk : FBRk<1, (outs),
- (ins brtarget_13
- : $target),
- "rcall\t$target", []>;
+ let Uses = [SP] in def RCALLk : FBRk<1, (outs), (ins rcalltarget_13:$k),
+ "rcall\t$k", [(AVRcall imm:$k)]>;
// SP is marked as a use to prevent stack-pointer assignments that appear
// immediately before calls from potentially appearing dead.
@@ -985,13 +992,10 @@ let isCall = 1 in {
// SP is marked as a use to prevent stack-pointer assignments that appear
// immediately before calls from potentially appearing dead.
//
- //: TODO: the imm field can be either 16 or 22 bits in devices with more
+ // TODO: the imm field can be either 16 or 22 bits in devices with more
// than 64k of ROM, fix it once we support the largest devices.
- let Uses = [SP] in def CALLk : F32BRk<0b111, (outs),
- (ins call_target
- : $k),
- "call\t$k", [(AVRcall imm
- : $k)]>,
+ let Uses = [SP] in def CALLk : F32BRk<0b111, (outs), (ins call_target:$k),
+ "call\t$k", [(AVRcall imm:$k)]>,
Requires<[HasJMPCALL]>;
}
@@ -1446,27 +1450,14 @@ class AtomicStore<PatFrag Op, RegisterClass DRC, RegisterClass PTRRC>
: $rd, DRC
: $rr)]>;
-let Constraints =
- "@earlyclobber $rd" in class AtomicLoadOp<PatFrag Op, RegisterClass DRC,
- RegisterClass PTRRC>
- : Pseudo<(outs DRC
- : $rd),
- (ins PTRRC
- : $rr, DRC
- : $operand),
- "atomic_op", [(set DRC
- : $rd, (Op i16
- : $rr, DRC
- : $operand))]>;
-
-// FIXME: I think 16-bit atomic binary ops need to mark
-// r0 as clobbered.
+class AtomicLoadOp<PatFrag Op, RegisterClass DRC, RegisterClass PTRRC>
+ : Pseudo<(outs DRC:$rd),
+ (ins PTRRC:$rr, DRC:$operand),
+ "atomic_op", [(set DRC:$rd, (Op i16:$rr, DRC:$operand))]>;
// Atomic instructions
// ===================
//
-// These are all expanded by AVRExpandPseudoInsts
-//
// 8-bit operations can use any pointer register because
// they are expanded directly into an LD/ST instruction.
//
@@ -1482,16 +1473,18 @@ def AtomicStore16 : AtomicStore<atomic_store_16, DREGS, PTRDISPREGS>;
class AtomicLoadOp8<PatFrag Op> : AtomicLoadOp<Op, GPR8, PTRREGS>;
class AtomicLoadOp16<PatFrag Op> : AtomicLoadOp<Op, DREGS, PTRDISPREGS>;
-def AtomicLoadAdd8 : AtomicLoadOp8<atomic_load_add_8>;
-def AtomicLoadAdd16 : AtomicLoadOp16<atomic_load_add_16>;
-def AtomicLoadSub8 : AtomicLoadOp8<atomic_load_sub_8>;
-def AtomicLoadSub16 : AtomicLoadOp16<atomic_load_sub_16>;
-def AtomicLoadAnd8 : AtomicLoadOp8<atomic_load_and_8>;
-def AtomicLoadAnd16 : AtomicLoadOp16<atomic_load_and_16>;
-def AtomicLoadOr8 : AtomicLoadOp8<atomic_load_or_8>;
-def AtomicLoadOr16 : AtomicLoadOp16<atomic_load_or_16>;
-def AtomicLoadXor8 : AtomicLoadOp8<atomic_load_xor_8>;
-def AtomicLoadXor16 : AtomicLoadOp16<atomic_load_xor_16>;
+let usesCustomInserter=1 in {
+ def AtomicLoadAdd8 : AtomicLoadOp8<atomic_load_add_8>;
+ def AtomicLoadAdd16 : AtomicLoadOp16<atomic_load_add_16>;
+ def AtomicLoadSub8 : AtomicLoadOp8<atomic_load_sub_8>;
+ def AtomicLoadSub16 : AtomicLoadOp16<atomic_load_sub_16>;
+ def AtomicLoadAnd8 : AtomicLoadOp8<atomic_load_and_8>;
+ def AtomicLoadAnd16 : AtomicLoadOp16<atomic_load_and_16>;
+ def AtomicLoadOr8 : AtomicLoadOp8<atomic_load_or_8>;
+ def AtomicLoadOr16 : AtomicLoadOp16<atomic_load_or_16>;
+ def AtomicLoadXor8 : AtomicLoadOp8<atomic_load_xor_8>;
+ def AtomicLoadXor16 : AtomicLoadOp16<atomic_load_xor_16>;
+}
def AtomicFence
: Pseudo<(outs), (ins), "atomic_fence", [(atomic_fence timm, timm)]>;
@@ -1954,7 +1947,7 @@ let Constraints = "$src = $rd", Defs = [SREG] in {
: $src)),
(implicit SREG)]>;
- def ASRWNRd : Pseudo<(outs DLDREGS
+ def ASRWNRd : Pseudo<(outs DREGS
: $rd),
(ins DREGS
: $src, imm16
@@ -2122,15 +2115,17 @@ def ROL : InstAlias<"rol\t$rd", (ADCRdRr GPR8 : $rd, GPR8 : $rd)>;
// Sets all bits in a register.
def : InstAlias<"ser\t$rd", (LDIRdK LD8 : $rd, 0xff), 0>;
-let Defs = [SREG] in def BSETs : FS<0, (outs),
- (ins i8imm
- : $s),
- "bset\t$s", []>;
+let hasSideEffects=1 in {
+ let Defs = [SREG] in def BSETs : FS<0,
+ (outs),
+ (ins i8imm:$s),
+ "bset\t$s", []>;
-let Defs = [SREG] in def BCLRs : FS<1, (outs),
- (ins i8imm
- : $s),
- "bclr\t$s", []>;
+ let Defs = [SREG] in def BCLRs : FS<1,
+ (outs),
+ (ins i8imm:$s),
+ "bclr\t$s", []>;
+}
// Set/clear aliases for the carry (C) status flag (bit 0).
def : InstAlias<"sec", (BSETs 0)>;
@@ -2457,8 +2452,12 @@ def : Pat<(adde i8
: $src2))>;
// Calls.
-def : Pat<(AVRcall(i16 tglobaladdr : $dst)), (CALLk tglobaladdr : $dst)>;
-def : Pat<(AVRcall(i16 texternalsym : $dst)), (CALLk texternalsym : $dst)>;
+let Predicates = [HasJMPCALL] in {
+ def : Pat<(AVRcall(i16 tglobaladdr:$dst)), (CALLk tglobaladdr:$dst)>;
+ def : Pat<(AVRcall(i16 texternalsym:$dst)), (CALLk texternalsym:$dst)>;
+}
+def : Pat<(AVRcall(i16 tglobaladdr:$dst)), (RCALLk tglobaladdr:$dst)>;
+def : Pat<(AVRcall(i16 texternalsym:$dst)), (RCALLk texternalsym:$dst)>;
// `anyext`
def : Pat<(i16(anyext i8
diff --git a/llvm/lib/Target/AVR/AVRMachineFunctionInfo.h b/llvm/lib/Target/AVR/AVRMachineFunctionInfo.h
index 8b1c247eb6a7..da4c48559d9e 100644
--- a/llvm/lib/Target/AVR/AVRMachineFunctionInfo.h
+++ b/llvm/lib/Target/AVR/AVRMachineFunctionInfo.h
@@ -61,6 +61,13 @@ public:
MF.getFunction().hasFnAttribute("signal");
}
+ MachineFunctionInfo *
+ clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
+ const override {
+ return DestMF.cloneInfo<AVRMachineFunctionInfo>(*this);
+ }
+
bool getHasSpills() const { return HasSpills; }
void setHasSpills(bool B) { HasSpills = B; }
diff --git a/llvm/lib/Target/AVR/AVRRegisterInfo.cpp b/llvm/lib/Target/AVR/AVRRegisterInfo.cpp
index 5dd7f5c55695..87e6558c12c2 100644
--- a/llvm/lib/Target/AVR/AVRRegisterInfo.cpp
+++ b/llvm/lib/Target/AVR/AVRRegisterInfo.cpp
@@ -36,15 +36,20 @@ AVRRegisterInfo::AVRRegisterInfo() : AVRGenRegisterInfo(0) {}
const uint16_t *
AVRRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
const AVRMachineFunctionInfo *AFI = MF->getInfo<AVRMachineFunctionInfo>();
-
- return AFI->isInterruptOrSignalHandler() ? CSR_Interrupts_SaveList
- : CSR_Normal_SaveList;
+ const AVRSubtarget &STI = MF->getSubtarget<AVRSubtarget>();
+ if (STI.hasTinyEncoding())
+ return AFI->isInterruptOrSignalHandler() ? CSR_InterruptsTiny_SaveList
+ : CSR_NormalTiny_SaveList;
+ else
+ return AFI->isInterruptOrSignalHandler() ? CSR_Interrupts_SaveList
+ : CSR_Normal_SaveList;
}
const uint32_t *
AVRRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
- return CSR_Normal_RegMask;
+ const AVRSubtarget &STI = MF.getSubtarget<AVRSubtarget>();
+ return STI.hasTinyEncoding() ? CSR_NormalTiny_RegMask : CSR_Normal_RegMask;
}
BitVector AVRRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
@@ -52,15 +57,26 @@ BitVector AVRRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// Reserve the intermediate result registers r1 and r2
// The result of instructions like 'mul' is always stored here.
+ // R0/R1/R1R0 are always reserved on both avr and avrtiny.
Reserved.set(AVR::R0);
Reserved.set(AVR::R1);
Reserved.set(AVR::R1R0);
- // Reserve the stack pointer.
+ // Reserve the stack pointer.
Reserved.set(AVR::SPL);
Reserved.set(AVR::SPH);
Reserved.set(AVR::SP);
+ // Reserve R2~R17 only on avrtiny.
+ if (MF.getSubtarget<AVRSubtarget>().hasTinyEncoding()) {
+ // Reserve 8-bit registers R2~R15, Rtmp(R16) and Zero(R17).
+ for (unsigned Reg = AVR::R2; Reg <= AVR::R17; Reg++)
+ Reserved.set(Reg);
+ // Reserve 16-bit registers R3R2~R18R17.
+ for (unsigned Reg = AVR::R3R2; Reg <= AVR::R18R17; Reg++)
+ Reserved.set(Reg);
+ }
+
// We tenatively reserve the frame pointer register r29:r28 because the
// function may require one, but we cannot tell until register allocation
// is complete, which can be too late.
@@ -137,6 +153,7 @@ void AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo();
const MachineFrameInfo &MFI = MF.getFrameInfo();
const TargetFrameLowering *TFI = TM.getSubtargetImpl()->getFrameLowering();
+ const AVRSubtarget &STI = MF.getSubtarget<AVRSubtarget>();
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
int Offset = MFI.getObjectOffset(FrameIndex);
@@ -151,7 +168,7 @@ void AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
if (MI.getOpcode() == AVR::FRMIDX) {
MI.setDesc(TII.get(AVR::MOVWRdRr));
MI.getOperand(FIOperandNum).ChangeToRegister(AVR::R29R28, false);
- MI.RemoveOperand(2);
+ MI.removeOperand(2);
assert(Offset > 0 && "Invalid offset");
@@ -219,7 +236,8 @@ void AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// a compare and branch, invalidating the contents of SREG set by the
// compare instruction because of the add/sub pairs. Conservatively save and
// restore SREG before and after each add/sub pair.
- BuildMI(MBB, II, dl, TII.get(AVR::INRdA), AVR::R0).addImm(0x3f);
+ BuildMI(MBB, II, dl, TII.get(AVR::INRdA), AVR::R0)
+ .addImm(STI.getIORegSREG());
MachineInstr *New = BuildMI(MBB, II, dl, TII.get(AddOpc), AVR::R29R28)
.addReg(AVR::R29R28, RegState::Kill)
@@ -228,7 +246,7 @@ void AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Restore SREG.
BuildMI(MBB, std::next(II), dl, TII.get(AVR::OUTARr))
- .addImm(0x3f)
+ .addImm(STI.getIORegSREG())
.addReg(AVR::R0, RegState::Kill);
// No need to set SREG as dead here otherwise if the next instruction is a
diff --git a/llvm/lib/Target/AVR/AVRRelaxMemOperations.cpp b/llvm/lib/Target/AVR/AVRRelaxMemOperations.cpp
deleted file mode 100644
index 76f29eb9f369..000000000000
--- a/llvm/lib/Target/AVR/AVRRelaxMemOperations.cpp
+++ /dev/null
@@ -1,144 +0,0 @@
-//===-- AVRRelaxMemOperations.cpp - Relax out of range loads/stores -------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a pass which relaxes out of range memory operations into
-// equivalent operations which handle bigger addresses.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AVR.h"
-#include "AVRInstrInfo.h"
-#include "AVRTargetMachine.h"
-#include "MCTargetDesc/AVRMCTargetDesc.h"
-
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
-
-using namespace llvm;
-
-#define AVR_RELAX_MEM_OPS_NAME "AVR memory operation relaxation pass"
-
-namespace {
-
-class AVRRelaxMem : public MachineFunctionPass {
-public:
- static char ID;
-
- AVRRelaxMem() : MachineFunctionPass(ID) {
- initializeAVRRelaxMemPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- StringRef getPassName() const override { return AVR_RELAX_MEM_OPS_NAME; }
-
-private:
- typedef MachineBasicBlock Block;
- typedef Block::iterator BlockIt;
-
- const TargetInstrInfo *TII;
-
- template <unsigned OP> bool relax(Block &MBB, BlockIt MBBI);
-
- bool runOnBasicBlock(Block &MBB);
- bool runOnInstruction(Block &MBB, BlockIt MBBI);
-
- MachineInstrBuilder buildMI(Block &MBB, BlockIt MBBI, unsigned Opcode) {
- return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(Opcode));
- }
-};
-
-char AVRRelaxMem::ID = 0;
-
-bool AVRRelaxMem::runOnMachineFunction(MachineFunction &MF) {
- bool Modified = false;
-
- const AVRSubtarget &STI = MF.getSubtarget<AVRSubtarget>();
- TII = STI.getInstrInfo();
-
- for (Block &MBB : MF) {
- bool BlockModified = runOnBasicBlock(MBB);
- Modified |= BlockModified;
- }
-
- return Modified;
-}
-
-bool AVRRelaxMem::runOnBasicBlock(Block &MBB) {
- bool Modified = false;
-
- BlockIt MBBI = MBB.begin(), E = MBB.end();
- while (MBBI != E) {
- BlockIt NMBBI = std::next(MBBI);
- Modified |= runOnInstruction(MBB, MBBI);
- MBBI = NMBBI;
- }
-
- return Modified;
-}
-
-template <> bool AVRRelaxMem::relax<AVR::STDWPtrQRr>(Block &MBB, BlockIt MBBI) {
- MachineInstr &MI = *MBBI;
-
- MachineOperand &Ptr = MI.getOperand(0);
- MachineOperand &Src = MI.getOperand(2);
- int64_t Imm = MI.getOperand(1).getImm();
-
- // We can definitely optimise this better.
- if (Imm > 63) {
- // Push the previous state of the pointer register.
- // This instruction must preserve the value.
- buildMI(MBB, MBBI, AVR::PUSHWRr).addReg(Ptr.getReg());
-
- // Add the immediate to the pointer register.
- buildMI(MBB, MBBI, AVR::SBCIWRdK)
- .addReg(Ptr.getReg(), RegState::Define)
- .addReg(Ptr.getReg())
- .addImm(-Imm);
-
- // Store the value in the source register to the address
- // pointed to by the pointer register.
- buildMI(MBB, MBBI, AVR::STWPtrRr)
- .addReg(Ptr.getReg())
- .addReg(Src.getReg(), getKillRegState(Src.isKill()));
-
- // Pop the original state of the pointer register.
- buildMI(MBB, MBBI, AVR::POPWRd)
- .addDef(Ptr.getReg(), getKillRegState(Ptr.isKill()));
-
- MI.removeFromParent();
- }
-
- return false;
-}
-
-bool AVRRelaxMem::runOnInstruction(Block &MBB, BlockIt MBBI) {
- MachineInstr &MI = *MBBI;
- int Opcode = MBBI->getOpcode();
-
-#define RELAX(Op) \
- case Op: \
- return relax<Op>(MBB, MI)
-
- switch (Opcode) { RELAX(AVR::STDWPtrQRr); }
-#undef RELAX
- return false;
-}
-
-} // end of anonymous namespace
-
-INITIALIZE_PASS(AVRRelaxMem, "avr-relax-mem", AVR_RELAX_MEM_OPS_NAME, false,
- false)
-
-namespace llvm {
-
-FunctionPass *createAVRRelaxMemPass() { return new AVRRelaxMem(); }
-
-} // end of namespace llvm
diff --git a/llvm/lib/Target/AVR/AVRSubtarget.h b/llvm/lib/Target/AVR/AVRSubtarget.h
index f8ca191b1868..2325193bac0a 100644
--- a/llvm/lib/Target/AVR/AVRSubtarget.h
+++ b/llvm/lib/Target/AVR/AVRSubtarget.h
@@ -91,8 +91,16 @@ public:
return ELFArch;
}
- /// Get I/O register address.
- int getIORegRAMPZ(void) const { return 0x3b; }
+ /// Get I/O register addresses.
+ int getIORegRAMPZ(void) const { return hasELPM() ? 0x3b : -1; }
+ int getIORegEIND(void) const { return hasEIJMPCALL() ? 0x3c : -1; }
+ int getIORegSPL(void) const { return 0x3d; }
+ int getIORegSPH(void) const { return hasSmallStack() ? -1 : 0x3e; }
+ int getIORegSREG(void) const { return 0x3f; }
+
+ /// Get GPR aliases.
+ int getRegTmpIndex(void) const { return hasTinyEncoding() ? 16 : 0; }
+ int getRegZeroIndex(void) const { return hasTinyEncoding() ? 17 : 1; }
private:
/// The ELF e_flags architecture.
diff --git a/llvm/lib/Target/AVR/AVRTargetMachine.cpp b/llvm/lib/Target/AVR/AVRTargetMachine.cpp
index 22b9ba3ece07..b9d77e0d1a51 100644
--- a/llvm/lib/Target/AVR/AVRTargetMachine.cpp
+++ b/llvm/lib/Target/AVR/AVRTargetMachine.cpp
@@ -38,7 +38,7 @@ static StringRef getCPU(StringRef CPU) {
}
static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
- return RM.getValueOr(Reloc::Static);
+ return RM.value_or(Reloc::Static);
}
AVRTargetMachine::AVRTargetMachine(const Target &T, const Triple &TT,
@@ -92,7 +92,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAVRTarget() {
auto &PR = *PassRegistry::getPassRegistry();
initializeAVRExpandPseudoPass(PR);
- initializeAVRRelaxMemPass(PR);
initializeAVRShiftExpandPass(PR);
}
@@ -118,7 +117,6 @@ bool AVRPassConfig::addInstSelector() {
}
void AVRPassConfig::addPreSched2() {
- addPass(createAVRRelaxMemPass());
addPass(createAVRExpandPseudoPass());
}
diff --git a/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp b/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
index f19e7840eb31..9e1c7b781f0f 100644
--- a/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
+++ b/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
@@ -43,6 +43,10 @@ class AVRAsmParser : public MCTargetAsmParser {
const MCRegisterInfo *MRI;
const std::string GENERATE_STUBS = "gs";
+ enum AVRMatchResultTy {
+ Match_InvalidRegisterOnTiny = FIRST_TARGET_MATCH_RESULT_TY + 1,
+ };
+
#define GET_ASSEMBLER_HEADER
#include "AVRGenAsmMatcher.inc"
@@ -332,6 +336,8 @@ bool AVRAsmParser::MatchAndEmitInstruction(SMLoc Loc, unsigned &Opcode,
return invalidOperand(Loc, Operands, ErrorInfo);
case Match_MnemonicFail:
return Error(Loc, "invalid instruction");
+ case Match_InvalidRegisterOnTiny:
+ return Error(Loc, "invalid register on avrtiny");
default:
return true;
}
@@ -399,6 +405,11 @@ bool AVRAsmParser::tryParseRegisterOperand(OperandVector &Operands) {
if (RegNo == AVR::NoRegister)
return true;
+ // Reject R0~R15 on avrtiny.
+ if (AVR::R0 <= RegNo && RegNo <= AVR::R15 &&
+ STI.hasFeature(AVR::FeatureTinyEncoding))
+ return Error(Parser.getTok().getLoc(), "invalid register on avrtiny");
+
AsmToken const &T = Parser.getTok();
Operands.push_back(AVROperand::CreateReg(RegNo, T.getLoc(), T.getEndLoc()));
Parser.Lex(); // Eat register token.
@@ -726,6 +737,12 @@ unsigned AVRAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
if (Op.isImm()) {
if (MCConstantExpr const *Const = dyn_cast<MCConstantExpr>(Op.getImm())) {
int64_t RegNum = Const->getValue();
+
+ // Reject R0~R15 on avrtiny.
+ if (0 <= RegNum && RegNum <= 15 &&
+ STI.hasFeature(AVR::FeatureTinyEncoding))
+ return Match_InvalidRegisterOnTiny;
+
std::ostringstream RegName;
RegName << "r" << RegNum;
RegNum = MatchRegisterName(RegName.str());
diff --git a/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp b/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
index 9dcd370b9f1e..ee0ae08e192f 100644
--- a/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
+++ b/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
@@ -18,8 +18,8 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDecoderOps.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
-#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/TargetRegistry.h"
@@ -36,7 +36,7 @@ class AVRDisassembler : public MCDisassembler {
public:
AVRDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
: MCDisassembler(STI, Ctx) {}
- virtual ~AVRDisassembler() {}
+ virtual ~AVRDisassembler() = default;
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
@@ -66,7 +66,7 @@ static const uint16_t GPRDecoderTable[] = {
static DecodeStatus DecodeGPR8RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 31)
return MCDisassembler::Fail;
@@ -77,7 +77,7 @@ static DecodeStatus DecodeGPR8RegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeLD8RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 15)
return MCDisassembler::Fail;
@@ -86,48 +86,51 @@ static DecodeStatus DecodeLD8RegisterClass(MCInst &Inst, unsigned RegNo,
return MCDisassembler::Success;
}
-static DecodeStatus DecodePTRREGSRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address,
- const void *Decoder) {
- // Note: this function must be defined but does not seem to be called.
- assert(false && "unimplemented: PTRREGS register class");
- return MCDisassembler::Success;
-}
-
static DecodeStatus decodeFIOARr(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus decodeFIORdA(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus decodeFIOBIT(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus decodeCallTarget(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus decodeFRd(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus decodeFLPMX(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus decodeFFMULRdRr(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus decodeFMOVWRdRr(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus decodeFWRdK(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus decodeFMUL2RdRr(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+
+static DecodeStatus decodeMemri(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
+
+static DecodeStatus decodeLoadStore(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
#include "AVRGenDisassemblerTables.inc"
static DecodeStatus decodeFIOARr(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
unsigned addr = 0;
addr |= fieldFromInstruction(Insn, 0, 4);
addr |= fieldFromInstruction(Insn, 9, 2) << 4;
@@ -140,7 +143,7 @@ static DecodeStatus decodeFIOARr(MCInst &Inst, unsigned Insn, uint64_t Address,
}
static DecodeStatus decodeFIORdA(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
unsigned addr = 0;
addr |= fieldFromInstruction(Insn, 0, 4);
addr |= fieldFromInstruction(Insn, 9, 2) << 4;
@@ -153,7 +156,7 @@ static DecodeStatus decodeFIORdA(MCInst &Inst, unsigned Insn, uint64_t Address,
}
static DecodeStatus decodeFIOBIT(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
unsigned addr = fieldFromInstruction(Insn, 3, 5);
unsigned b = fieldFromInstruction(Insn, 0, 3);
Inst.addOperand(MCOperand::createImm(addr));
@@ -162,7 +165,8 @@ static DecodeStatus decodeFIOBIT(MCInst &Inst, unsigned Insn, uint64_t Address,
}
static DecodeStatus decodeCallTarget(MCInst &Inst, unsigned Field,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
// Call targets need to be shifted left by one so this needs a custom
// decoder.
Inst.addOperand(MCOperand::createImm(Field << 1));
@@ -170,7 +174,7 @@ static DecodeStatus decodeCallTarget(MCInst &Inst, unsigned Field,
}
static DecodeStatus decodeFRd(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
unsigned d = fieldFromInstruction(Insn, 4, 5);
if (DecodeGPR8RegisterClass(Inst, d, Address, Decoder) ==
MCDisassembler::Fail)
@@ -179,7 +183,7 @@ static DecodeStatus decodeFRd(MCInst &Inst, unsigned Insn, uint64_t Address,
}
static DecodeStatus decodeFLPMX(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (decodeFRd(Inst, Insn, Address, Decoder) == MCDisassembler::Fail)
return MCDisassembler::Fail;
Inst.addOperand(MCOperand::createReg(AVR::R31R30));
@@ -187,7 +191,8 @@ static DecodeStatus decodeFLPMX(MCInst &Inst, unsigned Insn, uint64_t Address,
}
static DecodeStatus decodeFFMULRdRr(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned d = fieldFromInstruction(Insn, 4, 3) + 16;
unsigned r = fieldFromInstruction(Insn, 0, 3) + 16;
if (DecodeGPR8RegisterClass(Inst, d, Address, Decoder) ==
@@ -200,7 +205,8 @@ static DecodeStatus decodeFFMULRdRr(MCInst &Inst, unsigned Insn,
}
static DecodeStatus decodeFMOVWRdRr(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned r = fieldFromInstruction(Insn, 4, 4) * 2;
unsigned d = fieldFromInstruction(Insn, 0, 4) * 2;
if (DecodeGPR8RegisterClass(Inst, r, Address, Decoder) ==
@@ -213,7 +219,7 @@ static DecodeStatus decodeFMOVWRdRr(MCInst &Inst, unsigned Insn,
}
static DecodeStatus decodeFWRdK(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
unsigned d = fieldFromInstruction(Insn, 4, 2) * 2 + 24; // starts at r24:r25
unsigned k = 0;
k |= fieldFromInstruction(Insn, 0, 4);
@@ -229,7 +235,8 @@ static DecodeStatus decodeFWRdK(MCInst &Inst, unsigned Insn, uint64_t Address,
}
static DecodeStatus decodeFMUL2RdRr(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned rd = fieldFromInstruction(Insn, 4, 4) + 16;
unsigned rr = fieldFromInstruction(Insn, 0, 4) + 16;
if (DecodeGPR8RegisterClass(Inst, rd, Address, Decoder) ==
@@ -241,6 +248,128 @@ static DecodeStatus decodeFMUL2RdRr(MCInst &Inst, unsigned Insn,
return MCDisassembler::Success;
}
+static DecodeStatus decodeMemri(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
+ // As in the EncoderMethod `AVRMCCodeEmitter::encodeMemri`, the memory
+ // address is encoded into 7-bit, in which bits 0-5 are the immediate offset,
+ // and the bit-6 is the pointer register bit (Z=0, Y=1).
+ if (Insn > 127)
+ return MCDisassembler::Fail;
+
+ // Append the base register operand.
+ Inst.addOperand(
+ MCOperand::createReg((Insn & 0x40) ? AVR::R29R28 : AVR::R31R30));
+ // Append the immediate offset operand.
+ Inst.addOperand(MCOperand::createImm(Insn & 0x3f));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeLoadStore(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
+ // Get the register will be loaded or stored.
+ unsigned RegVal = GPRDecoderTable[(Insn >> 4) & 0x1f];
+
+ // Decode LDD/STD with offset less than 8.
+ if ((Insn & 0xf000) == 0x8000) {
+ unsigned RegBase = (Insn & 0x8) ? AVR::R29R28 : AVR::R31R30;
+ unsigned Offset = Insn & 7; // We need not consider offset > 7.
+ if ((Insn & 0x200) == 0) { // Decode LDD.
+ Inst.setOpcode(AVR::LDDRdPtrQ);
+ Inst.addOperand(MCOperand::createReg(RegVal));
+ Inst.addOperand(MCOperand::createReg(RegBase));
+ Inst.addOperand(MCOperand::createImm(Offset));
+ } else { // Decode STD.
+ Inst.setOpcode(AVR::STDPtrQRr);
+ Inst.addOperand(MCOperand::createReg(RegBase));
+ Inst.addOperand(MCOperand::createImm(Offset));
+ Inst.addOperand(MCOperand::createReg(RegVal));
+ }
+ return MCDisassembler::Success;
+ }
+
+ // Decode the following 14 instructions. Bit 9 indicates load(0) or store(1),
+ // bits 8~4 indicate the value register, bits 3-2 indicate the base address
+ // register (11-X, 10-Y, 00-Z), bits 1~0 indicate the mode (00-basic,
+ // 01-postinc, 10-predec).
+ // ST X, Rr : 1001 001r rrrr 1100
+ // ST X+, Rr : 1001 001r rrrr 1101
+ // ST -X, Rr : 1001 001r rrrr 1110
+ // ST Y+, Rr : 1001 001r rrrr 1001
+ // ST -Y, Rr : 1001 001r rrrr 1010
+ // ST Z+, Rr : 1001 001r rrrr 0001
+ // ST -Z, Rr : 1001 001r rrrr 0010
+ // LD Rd, X : 1001 000d dddd 1100
+ // LD Rd, X+ : 1001 000d dddd 1101
+ // LD Rd, -X : 1001 000d dddd 1110
+ // LD Rd, Y+ : 1001 000d dddd 1001
+ // LD Rd, -Y : 1001 000d dddd 1010
+ // LD Rd, Z+ : 1001 000d dddd 0001
+ // LD Rd, -Z : 1001 000d dddd 0010
+ if ((Insn & 0xfc00) != 0x9000 || (Insn & 0xf) == 0)
+ return MCDisassembler::Fail;
+
+ // Get the base address register.
+ unsigned RegBase;
+ switch (Insn & 0xc) {
+ case 0xc:
+ RegBase = AVR::R27R26;
+ break;
+ case 0x8:
+ RegBase = AVR::R29R28;
+ break;
+ case 0x0:
+ RegBase = AVR::R31R30;
+ break;
+ default:
+ return MCDisassembler::Fail;
+ }
+
+ // Set the opcode.
+ switch (Insn & 0x203) {
+ case 0x200:
+ Inst.setOpcode(AVR::STPtrRr);
+ Inst.addOperand(MCOperand::createReg(RegBase));
+ Inst.addOperand(MCOperand::createReg(RegVal));
+ return MCDisassembler::Success;
+ case 0x201:
+ Inst.setOpcode(AVR::STPtrPiRr);
+ break;
+ case 0x202:
+ Inst.setOpcode(AVR::STPtrPdRr);
+ break;
+ case 0:
+ Inst.setOpcode(AVR::LDRdPtr);
+ Inst.addOperand(MCOperand::createReg(RegVal));
+ Inst.addOperand(MCOperand::createReg(RegBase));
+ return MCDisassembler::Success;
+ case 1:
+ Inst.setOpcode(AVR::LDRdPtrPi);
+ break;
+ case 2:
+ Inst.setOpcode(AVR::LDRdPtrPd);
+ break;
+ default:
+ return MCDisassembler::Fail;
+ }
+
+ // Build postinc/predec machine instructions.
+ if ((Insn & 0x200) == 0) { // This is a load instruction.
+ Inst.addOperand(MCOperand::createReg(RegVal));
+ Inst.addOperand(MCOperand::createReg(RegBase));
+ Inst.addOperand(MCOperand::createReg(RegBase));
+ } else { // This is a store instruction.
+ Inst.addOperand(MCOperand::createReg(RegBase));
+ Inst.addOperand(MCOperand::createReg(RegBase));
+ Inst.addOperand(MCOperand::createReg(RegVal));
+ // STPtrPiRr and STPtrPdRr have an extra immediate operand.
+ Inst.addOperand(MCOperand::createImm(1));
+ }
+
+ return MCDisassembler::Success;
+}
+
static DecodeStatus readInstruction16(ArrayRef<uint8_t> Bytes, uint64_t Address,
uint64_t &Size, uint32_t &Insn) {
if (Bytes.size() < 2) {
@@ -299,7 +428,12 @@ DecodeStatus AVRDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
// Try to auto-decode a 16-bit instruction.
Result = decodeInstruction(getDecoderTable(Size), Instr, Insn, Address,
this, STI);
+ if (Result != MCDisassembler::Fail)
+ return Result;
+ // Try to decode to a load/store instruction. ST/LD need a specified
+ // DecoderMethod, as they already have a specified PostEncoderMethod.
+ Result = decodeLoadStore(Instr, Insn, Address, this);
if (Result != MCDisassembler::Fail)
return Result;
}
@@ -323,4 +457,4 @@ DecodeStatus AVRDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
}
typedef DecodeStatus (*DecodeFunc)(MCInst &MI, unsigned insn, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp
index b90e103794da..850ddf0d9458 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp
@@ -25,7 +25,7 @@ class AVRELFObjectWriter : public MCELFObjectTargetWriter {
public:
AVRELFObjectWriter(uint8_t OSABI);
- virtual ~AVRELFObjectWriter() {}
+ virtual ~AVRELFObjectWriter() = default;
unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
const MCFixup &Fixup, bool IsPCRel) const override;
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp
index 85933d6b9bb9..ade5df18c3b9 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp
@@ -1,6 +1,7 @@
#include "AVRELFStreamer.h"
#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Support/FormattedStream.h"
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h b/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h
index 11f55f6d253b..54dad3098385 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h
@@ -43,6 +43,9 @@ private:
printPCRelImm(MI, OpNo, O);
}
void printMemri(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printMemspi(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemri(MI, OpNo, O);
+ }
// Autogenerated by TableGen.
std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp
index 9754ff7f1146..c8bb410e4882 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp
@@ -295,7 +295,6 @@ void AVRMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
}
MCCodeEmitter *createAVRMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx) {
return new AVRMCCodeEmitter(MCII, Ctx);
}
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.h b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.h
index 68589763f29a..5bf6c1a581e3 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.h
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.h
@@ -84,7 +84,7 @@ private:
private:
explicit AVRMCExpr(VariantKind Kind, const MCExpr *Expr, bool Negated)
: Kind(Kind), SubExpr(Expr), Negated(Negated) {}
- ~AVRMCExpr() {}
+ ~AVRMCExpr() = default;
};
} // end namespace llvm
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h
index ef116793d326..aaf236d82016 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h
@@ -33,7 +33,6 @@ MCInstrInfo *createAVRMCInstrInfo();
/// Creates a machine code emitter for AVR.
MCCodeEmitter *createAVRMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx);
/// Creates an assembly backend for AVR.
diff --git a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
index 697deb117bcb..4c064d65d919 100644
--- a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
+++ b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
@@ -13,6 +13,7 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
diff --git a/llvm/lib/Target/BPF/BPF.h b/llvm/lib/Target/BPF/BPF.h
index 89990f7e15c2..3de761bf6601 100644
--- a/llvm/lib/Target/BPF/BPF.h
+++ b/llvm/lib/Target/BPF/BPF.h
@@ -11,6 +11,8 @@
#include "MCTargetDesc/BPFMCTargetDesc.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
diff --git a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp
index 46141e69d9d4..349cdd92ae62 100644
--- a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp
+++ b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp
@@ -77,6 +77,7 @@
#include "BPF.h"
#include "BPFCORE.h"
#include "BPFTargetMachine.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instruction.h"
@@ -123,7 +124,7 @@ public:
struct CallInfo {
uint32_t Kind;
uint32_t AccessIndex;
- Align RecordAlignment;
+ MaybeAlign RecordAlignment;
MDNode *Metadata;
Value *Base;
};
@@ -142,9 +143,9 @@ private:
Module *M = nullptr;
static std::map<std::string, GlobalVariable *> GEPGlobals;
- // A map to link preserve_*_access_index instrinsic calls.
+ // A map to link preserve_*_access_index intrinsic calls.
std::map<CallInst *, std::pair<CallInst *, CallInfo>> AIChain;
- // A map to hold all the base preserve_*_access_index instrinsic calls.
+ // A map to hold all the base preserve_*_access_index intrinsic calls.
// The base call is not an input of any other preserve_*
// intrinsics.
std::map<CallInst *, CallInfo> BaseAICalls;
@@ -169,7 +170,7 @@ private:
uint32_t &StartBitOffset, uint32_t &EndBitOffset);
uint32_t GetFieldInfo(uint32_t InfoKind, DICompositeType *CTy,
uint32_t AccessIndex, uint32_t PatchImm,
- Align RecordAlignment);
+ MaybeAlign RecordAlignment);
Value *computeBaseAndAccessKey(CallInst *Call, CallInfo &CInfo,
std::string &AccessKey, MDNode *&BaseMeta);
@@ -270,7 +271,7 @@ static uint32_t calcArraySize(const DICompositeType *CTy, uint32_t StartDim) {
static Type *getBaseElementType(const CallInst *Call) {
// Element type is stored in an elementtype() attribute on the first param.
- return Call->getAttributes().getParamElementType(0);
+ return Call->getParamElementType(0);
}
/// Check whether a call is a preserve_*_access_index intrinsic call or not.
@@ -299,8 +300,6 @@ bool BPFAbstractMemberAccess::IsPreserveDIAccessIndexCall(const CallInst *Call,
report_fatal_error("Missing metadata for llvm.preserve.union.access.index intrinsic");
CInfo.AccessIndex = getConstant(Call->getArgOperand(1));
CInfo.Base = Call->getArgOperand(0);
- CInfo.RecordAlignment =
- DL->getABITypeAlign(CInfo.Base->getType()->getPointerElementType());
return true;
}
if (GV->getName().startswith("llvm.preserve.struct.access.index")) {
@@ -333,6 +332,8 @@ bool BPFAbstractMemberAccess::IsPreserveDIAccessIndexCall(const CallInst *Call,
report_fatal_error("Incorrect flag for llvm.bpf.preserve.type.info intrinsic");
if (Flag == BPFCoreSharedInfo::PRESERVE_TYPE_INFO_EXISTENCE)
CInfo.AccessIndex = BPFCoreSharedInfo::TYPE_EXISTENCE;
+ else if (Flag == BPFCoreSharedInfo::PRESERVE_TYPE_INFO_MATCH)
+ CInfo.AccessIndex = BPFCoreSharedInfo::TYPE_MATCH;
else
CInfo.AccessIndex = BPFCoreSharedInfo::TYPE_SIZE;
return true;
@@ -592,10 +593,20 @@ void BPFAbstractMemberAccess::GetStorageBitRange(DIDerivedType *MemberTy,
uint32_t &EndBitOffset) {
uint32_t MemberBitSize = MemberTy->getSizeInBits();
uint32_t MemberBitOffset = MemberTy->getOffsetInBits();
+
+ if (RecordAlignment > 8) {
+ // If the Bits are within an aligned 8-byte, set the RecordAlignment
+ // to 8, other report the fatal error.
+ if (MemberBitOffset / 64 != (MemberBitOffset + MemberBitSize) / 64)
+ report_fatal_error("Unsupported field expression for llvm.bpf.preserve.field.info, "
+ "requiring too big alignment");
+ RecordAlignment = Align(8);
+ }
+
uint32_t AlignBits = RecordAlignment.value() * 8;
- if (RecordAlignment > 8 || MemberBitSize > AlignBits)
+ if (MemberBitSize > AlignBits)
report_fatal_error("Unsupported field expression for llvm.bpf.preserve.field.info, "
- "requiring too big alignment");
+ "bitfield size greater than record alignment");
StartBitOffset = MemberBitOffset & ~(AlignBits - 1);
if ((StartBitOffset + AlignBits) < (MemberBitOffset + MemberBitSize))
@@ -608,7 +619,7 @@ uint32_t BPFAbstractMemberAccess::GetFieldInfo(uint32_t InfoKind,
DICompositeType *CTy,
uint32_t AccessIndex,
uint32_t PatchImm,
- Align RecordAlignment) {
+ MaybeAlign RecordAlignment) {
if (InfoKind == BPFCoreSharedInfo::FIELD_EXISTENCE)
return 1;
@@ -624,7 +635,7 @@ uint32_t BPFAbstractMemberAccess::GetFieldInfo(uint32_t InfoKind,
PatchImm += MemberTy->getOffsetInBits() >> 3;
} else {
unsigned SBitOffset, NextSBitOffset;
- GetStorageBitRange(MemberTy, RecordAlignment, SBitOffset,
+ GetStorageBitRange(MemberTy, *RecordAlignment, SBitOffset,
NextSBitOffset);
PatchImm += SBitOffset >> 3;
}
@@ -643,7 +654,8 @@ uint32_t BPFAbstractMemberAccess::GetFieldInfo(uint32_t InfoKind,
return SizeInBits >> 3;
unsigned SBitOffset, NextSBitOffset;
- GetStorageBitRange(MemberTy, RecordAlignment, SBitOffset, NextSBitOffset);
+ GetStorageBitRange(MemberTy, *RecordAlignment, SBitOffset,
+ NextSBitOffset);
SizeInBits = NextSBitOffset - SBitOffset;
if (SizeInBits & (SizeInBits - 1))
report_fatal_error("Unsupported field expression for llvm.bpf.preserve.field.info");
@@ -703,7 +715,7 @@ uint32_t BPFAbstractMemberAccess::GetFieldInfo(uint32_t InfoKind,
}
unsigned SBitOffset, NextSBitOffset;
- GetStorageBitRange(MemberTy, RecordAlignment, SBitOffset, NextSBitOffset);
+ GetStorageBitRange(MemberTy, *RecordAlignment, SBitOffset, NextSBitOffset);
if (NextSBitOffset - SBitOffset > 64)
report_fatal_error("too big field size for llvm.bpf.preserve.field.info");
@@ -734,7 +746,7 @@ uint32_t BPFAbstractMemberAccess::GetFieldInfo(uint32_t InfoKind,
}
unsigned SBitOffset, NextSBitOffset;
- GetStorageBitRange(MemberTy, RecordAlignment, SBitOffset, NextSBitOffset);
+ GetStorageBitRange(MemberTy, *RecordAlignment, SBitOffset, NextSBitOffset);
if (NextSBitOffset - SBitOffset > 64)
report_fatal_error("too big field size for llvm.bpf.preserve.field.info");
@@ -923,7 +935,8 @@ MDNode *BPFAbstractMemberAccess::computeAccessKey(CallInst *Call,
int64_t PatchImm;
std::string AccessStr("0");
- if (CInfo.AccessIndex == BPFCoreSharedInfo::TYPE_EXISTENCE) {
+ if (CInfo.AccessIndex == BPFCoreSharedInfo::TYPE_EXISTENCE ||
+ CInfo.AccessIndex == BPFCoreSharedInfo::TYPE_MATCH) {
PatchImm = 1;
} else if (CInfo.AccessIndex == BPFCoreSharedInfo::TYPE_SIZE) {
// typedef debuginfo type has size 0, get the eventual base type.
@@ -933,8 +946,11 @@ MDNode *BPFAbstractMemberAccess::computeAccessKey(CallInst *Call,
// ENUM_VALUE_EXISTENCE and ENUM_VALUE
IsInt32Ret = false;
- const auto *CE = cast<ConstantExpr>(Call->getArgOperand(1));
- const GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
+ // The argument could be a global variable or a getelementptr with base to
+ // a global variable depending on whether the clang option `opaque-options`
+ // is set or not.
+ const GlobalVariable *GV =
+ cast<GlobalVariable>(Call->getArgOperand(1)->stripPointerCasts());
assert(GV->hasInitializer());
const ConstantDataArray *DA = cast<ConstantDataArray>(GV->getInitializer());
assert(DA->isString());
diff --git a/llvm/lib/Target/BPF/BPFAdjustOpt.cpp b/llvm/lib/Target/BPF/BPFAdjustOpt.cpp
index 69d0bca0bd77..98f8d59fbe01 100644
--- a/llvm/lib/Target/BPF/BPFAdjustOpt.cpp
+++ b/llvm/lib/Target/BPF/BPFAdjustOpt.cpp
@@ -259,10 +259,16 @@ bool BPFAdjustOptImpl::serializeICMPCrossBB(BasicBlock &BB) {
return false;
if (Cond1Op == ICmpInst::ICMP_SGT || Cond1Op == ICmpInst::ICMP_SGE) {
- if (Cond2Op != ICmpInst::ICMP_SLT && Cond1Op != ICmpInst::ICMP_SLE)
+ if (Cond2Op != ICmpInst::ICMP_SLT && Cond2Op != ICmpInst::ICMP_SLE)
return false;
} else if (Cond1Op == ICmpInst::ICMP_SLT || Cond1Op == ICmpInst::ICMP_SLE) {
- if (Cond2Op != ICmpInst::ICMP_SGT && Cond1Op != ICmpInst::ICMP_SGE)
+ if (Cond2Op != ICmpInst::ICMP_SGT && Cond2Op != ICmpInst::ICMP_SGE)
+ return false;
+ } else if (Cond1Op == ICmpInst::ICMP_ULT || Cond1Op == ICmpInst::ICMP_ULE) {
+ if (Cond2Op != ICmpInst::ICMP_UGT && Cond2Op != ICmpInst::ICMP_UGE)
+ return false;
+ } else if (Cond1Op == ICmpInst::ICMP_UGT || Cond1Op == ICmpInst::ICMP_UGE) {
+ if (Cond2Op != ICmpInst::ICMP_ULT && Cond2Op != ICmpInst::ICMP_ULE)
return false;
} else {
return false;
diff --git a/llvm/lib/Target/BPF/BPFCORE.h b/llvm/lib/Target/BPF/BPFCORE.h
index 0c504412480d..c9aa135232c1 100644
--- a/llvm/lib/Target/BPF/BPFCORE.h
+++ b/llvm/lib/Target/BPF/BPFCORE.h
@@ -32,6 +32,7 @@ public:
TYPE_SIZE,
ENUM_VALUE_EXISTENCE,
ENUM_VALUE,
+ TYPE_MATCH,
MAX_FIELD_RELOC_KIND,
};
@@ -46,6 +47,7 @@ public:
enum PreserveTypeInfo : uint32_t {
PRESERVE_TYPE_INFO_EXISTENCE = 0,
PRESERVE_TYPE_INFO_SIZE,
+ PRESERVE_TYPE_INFO_MATCH,
MAX_PRESERVE_TYPE_INFO_FLAG,
};
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp
index 0587cb0e16e3..16876e74c4a1 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.cpp
+++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp
@@ -103,7 +103,6 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SDIVREM, VT, Expand);
setOperationAction(ISD::UDIVREM, VT, Expand);
setOperationAction(ISD::SREM, VT, Expand);
- setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::MULHU, VT, Expand);
setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
@@ -168,6 +167,7 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 0;
MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 0;
MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 0;
+ MaxLoadsPerMemcmp = 0;
} else {
// inline memcpy() for kernel to see explicit copy
unsigned CommonMaxStores =
@@ -176,6 +176,7 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
MaxStoresPerMemset = MaxStoresPerMemsetOptSize = CommonMaxStores;
MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = CommonMaxStores;
MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = CommonMaxStores;
+ MaxLoadsPerMemcmp = MaxLoadsPerMemcmpOptSize = CommonMaxStores;
}
// CPU/Feature control
diff --git a/llvm/lib/Target/BPF/BPFInstrFormats.td b/llvm/lib/Target/BPF/BPFInstrFormats.td
index a809065014e5..27db0be080ae 100644
--- a/llvm/lib/Target/BPF/BPFInstrFormats.td
+++ b/llvm/lib/Target/BPF/BPFInstrFormats.td
@@ -39,6 +39,7 @@ def BPF_AND : BPFArithOp<0x5>;
def BPF_LSH : BPFArithOp<0x6>;
def BPF_RSH : BPFArithOp<0x7>;
def BPF_NEG : BPFArithOp<0x8>;
+def BPF_MOD : BPFArithOp<0x9>;
def BPF_XOR : BPFArithOp<0xa>;
def BPF_MOV : BPFArithOp<0xb>;
def BPF_ARSH : BPFArithOp<0xc>;
diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.cpp b/llvm/lib/Target/BPF/BPFInstrInfo.cpp
index 54360a89782b..e61e32b62d83 100644
--- a/llvm/lib/Target/BPF/BPFInstrInfo.cpp
+++ b/llvm/lib/Target/BPF/BPFInstrInfo.cpp
@@ -192,8 +192,7 @@ bool BPFInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
}
// If the block has any instructions after a J, delete them.
- while (std::next(I) != MBB.end())
- std::next(I)->eraseFromParent();
+ MBB.erase(std::next(I), MBB.end());
Cond.clear();
FBB = nullptr;
diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td
index 082e1f4a92c2..6cac478561b2 100644
--- a/llvm/lib/Target/BPF/BPFInstrInfo.td
+++ b/llvm/lib/Target/BPF/BPFInstrInfo.td
@@ -298,6 +298,7 @@ let isAsCheapAsAMove = 1 in {
}
defm MUL : ALU<BPF_MUL, "*=", mul>;
defm DIV : ALU<BPF_DIV, "/=", udiv>;
+ defm MOD : ALU<BPF_MOD, "%=", urem>;
}
class NEG_RR<BPFOpClass Class, BPFArithOp Opc,
@@ -372,6 +373,7 @@ def FI_ri
let Inst{47-32} = 0;
let Inst{31-0} = 0;
let BPFClass = BPF_LD;
+ bit isPseudo = true;
}
def LD_pseudo
diff --git a/llvm/lib/Target/BPF/BPFMIChecking.cpp b/llvm/lib/Target/BPF/BPFMIChecking.cpp
index 2bc2302cf55c..b462f1d1427d 100644
--- a/llvm/lib/Target/BPF/BPFMIChecking.cpp
+++ b/llvm/lib/Target/BPF/BPFMIChecking.cpp
@@ -17,6 +17,7 @@
#include "BPF.h"
#include "BPFInstrInfo.h"
#include "BPFTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
diff --git a/llvm/lib/Target/BPF/BPFMIPeephole.cpp b/llvm/lib/Target/BPF/BPFMIPeephole.cpp
index 7f69c8a63443..cefbe48b7217 100644
--- a/llvm/lib/Target/BPF/BPFMIPeephole.cpp
+++ b/llvm/lib/Target/BPF/BPFMIPeephole.cpp
@@ -24,6 +24,7 @@
#include "BPFInstrInfo.h"
#include "BPFTargetMachine.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
@@ -123,9 +124,8 @@ bool BPFMIPeephole::isPhiFrom32Def(MachineInstr *PhiMI)
if (!PhiDef)
return false;
if (PhiDef->isPHI()) {
- if (PhiInsns.find(PhiDef) != PhiInsns.end())
+ if (!PhiInsns.insert(PhiDef).second)
return false;
- PhiInsns.insert(PhiDef);
if (!isPhiFrom32Def(PhiDef))
return false;
}
@@ -143,9 +143,8 @@ bool BPFMIPeephole::isInsnFrom32Def(MachineInstr *DefInsn)
return false;
if (DefInsn->isPHI()) {
- if (PhiInsns.find(DefInsn) != PhiInsns.end())
+ if (!PhiInsns.insert(DefInsn).second)
return false;
- PhiInsns.insert(DefInsn);
if (!isPhiFrom32Def(DefInsn))
return false;
} else if (DefInsn->getOpcode() == BPF::COPY) {
diff --git a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
index b4232875383c..088195994edd 100644
--- a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
+++ b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
@@ -31,9 +31,11 @@
#include "BPFCORE.h"
#include "BPFInstrInfo.h"
#include "BPFTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
+#include <set>
using namespace llvm;
@@ -52,9 +54,12 @@ struct BPFMISimplifyPatchable : public MachineFunctionPass {
}
private:
+ std::set<MachineInstr *> SkipInsts;
+
// Initialize class variables.
void initialize(MachineFunction &MFParm);
+ bool isLoadInst(unsigned Opcode);
bool removeLD();
void processCandidate(MachineRegisterInfo *MRI, MachineBasicBlock &MBB,
MachineInstr &MI, Register &SrcReg, Register &DstReg,
@@ -88,6 +93,12 @@ void BPFMISimplifyPatchable::initialize(MachineFunction &MFParm) {
LLVM_DEBUG(dbgs() << "*** BPF simplify patchable insts pass ***\n\n");
}
+bool BPFMISimplifyPatchable::isLoadInst(unsigned Opcode) {
+ return Opcode == BPF::LDD || Opcode == BPF::LDW || Opcode == BPF::LDH ||
+ Opcode == BPF::LDB || Opcode == BPF::LDW32 || Opcode == BPF::LDH32 ||
+ Opcode == BPF::LDB32;
+}
+
void BPFMISimplifyPatchable::checkADDrr(MachineRegisterInfo *MRI,
MachineOperand *RelocOp, const GlobalValue *GVal) {
const MachineInstr *Inst = RelocOp->getParent();
@@ -229,6 +240,11 @@ void BPFMISimplifyPatchable::processDstReg(MachineRegisterInfo *MRI,
void BPFMISimplifyPatchable::processInst(MachineRegisterInfo *MRI,
MachineInstr *Inst, MachineOperand *RelocOp, const GlobalValue *GVal) {
unsigned Opcode = Inst->getOpcode();
+ if (isLoadInst(Opcode)) {
+ SkipInsts.insert(Inst);
+ return;
+ }
+
if (Opcode == BPF::ADD_rr)
checkADDrr(MRI, RelocOp, GVal);
else if (Opcode == BPF::SLL_rr)
@@ -253,10 +269,10 @@ bool BPFMISimplifyPatchable::removeLD() {
}
// Ensure the register format is LOAD <reg>, <reg>, 0
- if (MI.getOpcode() != BPF::LDD && MI.getOpcode() != BPF::LDW &&
- MI.getOpcode() != BPF::LDH && MI.getOpcode() != BPF::LDB &&
- MI.getOpcode() != BPF::LDW32 && MI.getOpcode() != BPF::LDH32 &&
- MI.getOpcode() != BPF::LDB32)
+ if (!isLoadInst(MI.getOpcode()))
+ continue;
+
+ if (SkipInsts.find(&MI) != SkipInsts.end())
continue;
if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg())
diff --git a/llvm/lib/Target/BPF/BPFPreserveDIType.cpp b/llvm/lib/Target/BPF/BPFPreserveDIType.cpp
index 6dfb7dc39922..8c58aae5b618 100644
--- a/llvm/lib/Target/BPF/BPFPreserveDIType.cpp
+++ b/llvm/lib/Target/BPF/BPFPreserveDIType.cpp
@@ -12,6 +12,7 @@
#include "BPF.h"
#include "BPFCORE.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instruction.h"
diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/llvm/lib/Target/BPF/BPFTargetMachine.cpp
index 2fb76ab5c440..97d9ed3cad47 100644
--- a/llvm/lib/Target/BPF/BPFTargetMachine.cpp
+++ b/llvm/lib/Target/BPF/BPFTargetMachine.cpp
@@ -59,7 +59,7 @@ static std::string computeDataLayout(const Triple &TT) {
}
static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
- return RM.getValueOr(Reloc::PIC_);
+ return RM.value_or(Reloc::PIC_);
}
BPFTargetMachine::BPFTargetMachine(const Target &T, const Triple &TT,
@@ -149,7 +149,7 @@ void BPFPassConfig::addIRPasses() {
}
TargetTransformInfo
-BPFTargetMachine::getTargetTransformInfo(const Function &F) {
+BPFTargetMachine::getTargetTransformInfo(const Function &F) const {
return TargetTransformInfo(BPFTTIImpl(this, F));
}
diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.h b/llvm/lib/Target/BPF/BPFTargetMachine.h
index 98f64ccc3793..fede52089725 100644
--- a/llvm/lib/Target/BPF/BPFTargetMachine.h
+++ b/llvm/lib/Target/BPF/BPFTargetMachine.h
@@ -34,7 +34,7 @@ public:
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
- TargetTransformInfo getTargetTransformInfo(const Function &F) override;
+ TargetTransformInfo getTargetTransformInfo(const Function &F) const override;
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
diff --git a/llvm/lib/Target/BPF/BPFTargetTransformInfo.h b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h
index 6b86bf6e6cc1..0c8f9604b665 100644
--- a/llvm/lib/Target/BPF/BPFTargetTransformInfo.h
+++ b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h
@@ -71,6 +71,15 @@ public:
Opd2Info, Opd1PropInfo,
Opd2PropInfo);
}
+
+ TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
+ bool IsZeroCmp) const {
+ TTI::MemCmpExpansionOptions Options;
+ Options.LoadSizes = {8, 4, 2, 1};
+ Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
+ return Options;
+ }
+
};
} // end namespace llvm
diff --git a/llvm/lib/Target/BPF/BTF.def b/llvm/lib/Target/BPF/BTF.def
index 0ae4194bc512..1de0e51b4757 100644
--- a/llvm/lib/Target/BPF/BTF.def
+++ b/llvm/lib/Target/BPF/BTF.def
@@ -33,5 +33,6 @@ HANDLE_BTF_KIND(15, DATASEC)
HANDLE_BTF_KIND(16, FLOAT)
HANDLE_BTF_KIND(17, DECL_TAG)
HANDLE_BTF_KIND(18, TYPE_TAG)
+HANDLE_BTF_KIND(19, ENUM64)
#undef HANDLE_BTF_KIND
diff --git a/llvm/lib/Target/BPF/BTF.h b/llvm/lib/Target/BPF/BTF.h
index e54b97cd49a9..4540054aaf34 100644
--- a/llvm/lib/Target/BPF/BTF.h
+++ b/llvm/lib/Target/BPF/BTF.h
@@ -60,6 +60,7 @@ enum {
CommonTypeSize = 12,
BTFArraySize = 12,
BTFEnumSize = 8,
+ BTFEnum64Size = 12,
BTFMemberSize = 12,
BTFParamSize = 8,
BTFDataSecVarSize = 12,
@@ -145,6 +146,15 @@ struct BTFEnum {
int32_t Val; ///< Enum member value
};
+/// BTF_KIND_ENUM64 is followed by multiple "struct BTFEnum64".
+/// The exact number of BTFEnum64 is stored in the vlen (of the
+/// info in "struct CommonType").
+struct BTFEnum64 {
+ uint32_t NameOff; ///< Enum name offset in the string table
+ uint32_t Val_Lo32; ///< Enum member lo32 value
+ uint32_t Val_Hi32; ///< Enum member hi32 value
+};
+
/// BTF_KIND_ARRAY is followed by one "struct BTFArray".
struct BTFArray {
uint32_t ElemType; ///< Element type
diff --git a/llvm/lib/Target/BPF/BTFDebug.cpp b/llvm/lib/Target/BPF/BTFDebug.cpp
index d536aed1d211..a949e925eb60 100644
--- a/llvm/lib/Target/BPF/BTFDebug.cpp
+++ b/llvm/lib/Target/BPF/BTFDebug.cpp
@@ -22,6 +22,7 @@
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/LineIterator.h"
+#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
using namespace llvm;
@@ -161,9 +162,10 @@ void BTFTypeInt::emitType(MCStreamer &OS) {
OS.emitInt32(IntVal);
}
-BTFTypeEnum::BTFTypeEnum(const DICompositeType *ETy, uint32_t VLen) : ETy(ETy) {
+BTFTypeEnum::BTFTypeEnum(const DICompositeType *ETy, uint32_t VLen,
+ bool IsSigned) : ETy(ETy) {
Kind = BTF::BTF_KIND_ENUM;
- BTFType.Info = Kind << 24 | VLen;
+ BTFType.Info = IsSigned << 31 | Kind << 24 | VLen;
BTFType.Size = roundupToBytes(ETy->getSizeInBits());
}
@@ -199,6 +201,48 @@ void BTFTypeEnum::emitType(MCStreamer &OS) {
}
}
+BTFTypeEnum64::BTFTypeEnum64(const DICompositeType *ETy, uint32_t VLen,
+ bool IsSigned) : ETy(ETy) {
+ Kind = BTF::BTF_KIND_ENUM64;
+ BTFType.Info = IsSigned << 31 | Kind << 24 | VLen;
+ BTFType.Size = roundupToBytes(ETy->getSizeInBits());
+}
+
+void BTFTypeEnum64::completeType(BTFDebug &BDebug) {
+ if (IsCompleted)
+ return;
+ IsCompleted = true;
+
+ BTFType.NameOff = BDebug.addString(ETy->getName());
+
+ DINodeArray Elements = ETy->getElements();
+ for (const auto Element : Elements) {
+ const auto *Enum = cast<DIEnumerator>(Element);
+
+ struct BTF::BTFEnum64 BTFEnum;
+ BTFEnum.NameOff = BDebug.addString(Enum->getName());
+ uint64_t Value;
+ if (Enum->isUnsigned())
+ Value = static_cast<uint64_t>(Enum->getValue().getZExtValue());
+ else
+ Value = static_cast<uint64_t>(Enum->getValue().getSExtValue());
+ BTFEnum.Val_Lo32 = Value;
+ BTFEnum.Val_Hi32 = Value >> 32;
+ EnumValues.push_back(BTFEnum);
+ }
+}
+
+void BTFTypeEnum64::emitType(MCStreamer &OS) {
+ BTFTypeBase::emitType(OS);
+ for (const auto &Enum : EnumValues) {
+ OS.emitInt32(Enum.NameOff);
+ OS.AddComment("0x" + Twine::utohexstr(Enum.Val_Lo32));
+ OS.emitInt32(Enum.Val_Lo32);
+ OS.AddComment("0x" + Twine::utohexstr(Enum.Val_Hi32));
+ OS.emitInt32(Enum.Val_Hi32);
+ }
+}
+
BTFTypeArray::BTFTypeArray(uint32_t ElemTypeId, uint32_t NumElems) {
Kind = BTF::BTF_KIND_ARRAY;
BTFType.NameOff = 0;
@@ -552,6 +596,46 @@ void BTFDebug::processDeclAnnotations(DINodeArray Annotations,
}
}
+/// Generate btf_type_tag chains.
+int BTFDebug::genBTFTypeTags(const DIDerivedType *DTy, int BaseTypeId) {
+ SmallVector<const MDString *, 4> MDStrs;
+ DINodeArray Annots = DTy->getAnnotations();
+ if (Annots) {
+ // For type with "int __tag1 __tag2 *p", the MDStrs will have
+ // content: [__tag1, __tag2].
+ for (const Metadata *Annotations : Annots->operands()) {
+ const MDNode *MD = cast<MDNode>(Annotations);
+ const MDString *Name = cast<MDString>(MD->getOperand(0));
+ if (!Name->getString().equals("btf_type_tag"))
+ continue;
+ MDStrs.push_back(cast<MDString>(MD->getOperand(1)));
+ }
+ }
+
+ if (MDStrs.size() == 0)
+ return -1;
+
+ // With MDStrs [__tag1, __tag2], the output type chain looks like
+ // PTR -> __tag2 -> __tag1 -> BaseType
+ // In the below, we construct BTF types with the order of __tag1, __tag2
+ // and PTR.
+ unsigned TmpTypeId;
+ std::unique_ptr<BTFTypeTypeTag> TypeEntry;
+ if (BaseTypeId >= 0)
+ TypeEntry =
+ std::make_unique<BTFTypeTypeTag>(BaseTypeId, MDStrs[0]->getString());
+ else
+ TypeEntry = std::make_unique<BTFTypeTypeTag>(DTy, MDStrs[0]->getString());
+ TmpTypeId = addType(std::move(TypeEntry));
+
+ for (unsigned I = 1; I < MDStrs.size(); I++) {
+ const MDString *Value = MDStrs[I];
+ TypeEntry = std::make_unique<BTFTypeTypeTag>(TmpTypeId, Value->getString());
+ TmpTypeId = addType(std::move(TypeEntry));
+ }
+ return TmpTypeId;
+}
+
/// Handle structure/union types.
void BTFDebug::visitStructType(const DICompositeType *CTy, bool IsStruct,
uint32_t &TypeId) {
@@ -633,8 +717,25 @@ void BTFDebug::visitEnumType(const DICompositeType *CTy, uint32_t &TypeId) {
if (VLen > BTF::MAX_VLEN)
return;
- auto TypeEntry = std::make_unique<BTFTypeEnum>(CTy, VLen);
- TypeId = addType(std::move(TypeEntry), CTy);
+ bool IsSigned = false;
+ unsigned NumBits = 32;
+ // No BaseType implies forward declaration in which case a
+ // BTFTypeEnum with Vlen = 0 is emitted.
+ if (CTy->getBaseType() != nullptr) {
+ const auto *BTy = cast<DIBasicType>(CTy->getBaseType());
+ IsSigned = BTy->getEncoding() == dwarf::DW_ATE_signed ||
+ BTy->getEncoding() == dwarf::DW_ATE_signed_char;
+ NumBits = BTy->getSizeInBits();
+ }
+
+ if (NumBits <= 32) {
+ auto TypeEntry = std::make_unique<BTFTypeEnum>(CTy, VLen, IsSigned);
+ TypeId = addType(std::move(TypeEntry), CTy);
+ } else {
+ assert(NumBits == 64);
+ auto TypeEntry = std::make_unique<BTFTypeEnum64>(CTy, VLen, IsSigned);
+ TypeId = addType(std::move(TypeEntry), CTy);
+ }
// No need to visit base type as BTF does not encode it.
}
@@ -684,9 +785,8 @@ void BTFDebug::visitDerivedType(const DIDerivedType *DTy, uint32_t &TypeId,
/// pointee type will be replaced with either a real type or
/// a forward declaration.
auto TypeEntry = std::make_unique<BTFTypeDerived>(DTy, Tag, true);
- auto &Fixup = FixupDerivedTypes[CTy->getName()];
- Fixup.first = CTag == dwarf::DW_TAG_union_type;
- Fixup.second.push_back(TypeEntry.get());
+ auto &Fixup = FixupDerivedTypes[CTy];
+ Fixup.push_back(std::make_pair(DTy, TypeEntry.get()));
TypeId = addType(std::move(TypeEntry), DTy);
return;
}
@@ -695,34 +795,8 @@ void BTFDebug::visitDerivedType(const DIDerivedType *DTy, uint32_t &TypeId,
}
if (Tag == dwarf::DW_TAG_pointer_type) {
- SmallVector<const MDString *, 4> MDStrs;
- DINodeArray Annots = DTy->getAnnotations();
- if (Annots) {
- // For type with "int __tag1 __tag2 *p", the MDStrs will have
- // content: [__tag1, __tag2].
- for (const Metadata *Annotations : Annots->operands()) {
- const MDNode *MD = cast<MDNode>(Annotations);
- const MDString *Name = cast<MDString>(MD->getOperand(0));
- if (!Name->getString().equals("btf_type_tag"))
- continue;
- MDStrs.push_back(cast<MDString>(MD->getOperand(1)));
- }
- }
-
- if (MDStrs.size() > 0) {
- // With MDStrs [__tag1, __tag2], the output type chain looks like
- // PTR -> __tag2 -> __tag1 -> BaseType
- // In the below, we construct BTF types with the order of __tag1, __tag2
- // and PTR.
- auto TypeEntry =
- std::make_unique<BTFTypeTypeTag>(DTy, MDStrs[0]->getString());
- unsigned TmpTypeId = addType(std::move(TypeEntry));
- for (unsigned I = 1; I < MDStrs.size(); I++) {
- const MDString *Value = MDStrs[I];
- TypeEntry =
- std::make_unique<BTFTypeTypeTag>(TmpTypeId, Value->getString());
- TmpTypeId = addType(std::move(TypeEntry));
- }
+ int TmpTypeId = genBTFTypeTags(DTy, -1);
+ if (TmpTypeId >= 0) {
auto TypeDEntry =
std::make_unique<BTFTypeDerived>(TmpTypeId, Tag, DTy->getName());
TypeId = addType(std::move(TypeDEntry), DTy);
@@ -773,15 +847,31 @@ void BTFDebug::visitTypeEntry(const DIType *Ty, uint32_t &TypeId,
// already defined, we should keep moving to eventually
// bring in types for "struct t". Otherwise, the "struct s2"
// definition won't be correct.
+ //
+ // In the above, we have following debuginfo:
+ // {ptr, struct_member} -> typedef -> struct
+ // and BTF type for 'typedef' is generated while 'struct' may
+ // be in FixUp. But let us generalize the above to handle
+ // {different types} -> [various derived types]+ -> another type.
+ // For example,
+ // {func_param, struct_member} -> const -> ptr -> volatile -> struct
+ // We will traverse const/ptr/volatile which already have corresponding
+ // BTF types and generate type for 'struct' which might be in Fixup
+ // state.
if (Ty && (!CheckPointer || !SeenPointer)) {
if (const auto *DTy = dyn_cast<DIDerivedType>(Ty)) {
- unsigned Tag = DTy->getTag();
- if (Tag == dwarf::DW_TAG_typedef || Tag == dwarf::DW_TAG_const_type ||
- Tag == dwarf::DW_TAG_volatile_type ||
- Tag == dwarf::DW_TAG_restrict_type) {
- uint32_t TmpTypeId;
- visitTypeEntry(DTy->getBaseType(), TmpTypeId, CheckPointer,
- SeenPointer);
+ while (DTy) {
+ const DIType *BaseTy = DTy->getBaseType();
+ if (!BaseTy)
+ break;
+
+ if (DIToIdMap.find(BaseTy) != DIToIdMap.end()) {
+ DTy = dyn_cast<DIDerivedType>(BaseTy);
+ } else {
+ uint32_t TmpTypeId;
+ visitTypeEntry(BaseTy, TmpTypeId, CheckPointer, SeenPointer);
+ break;
+ }
}
}
}
@@ -908,7 +998,7 @@ void BTFDebug::emitBTFSection() {
MCContext &Ctx = OS.getContext();
MCSectionELF *Sec = Ctx.getELFSection(".BTF", ELF::SHT_PROGBITS, 0);
Sec->setAlignment(Align(4));
- OS.SwitchSection(Sec);
+ OS.switchSection(Sec);
// Emit header.
emitCommonHeader();
@@ -948,7 +1038,7 @@ void BTFDebug::emitBTFExtSection() {
MCContext &Ctx = OS.getContext();
MCSectionELF *Sec = Ctx.getELFSection(".BTF.ext", ELF::SHT_PROGBITS, 0);
Sec->setAlignment(Align(4));
- OS.SwitchSection(Sec);
+ OS.switchSection(Sec);
// Emit header.
emitCommonHeader();
@@ -1436,9 +1526,8 @@ void BTFDebug::processFuncPrototypes(const Function *F) {
return;
// Do not emit again if already emitted.
- if (ProtoFunctions.find(F) != ProtoFunctions.end())
+ if (!ProtoFunctions.insert(F).second)
return;
- ProtoFunctions.insert(F);
uint32_t ProtoTypeId;
const std::unordered_map<uint32_t, StringRef> FuncArgNames;
@@ -1480,8 +1569,9 @@ void BTFDebug::endModule() {
// Fixups
for (auto &Fixup : FixupDerivedTypes) {
- StringRef TypeName = Fixup.first;
- bool IsUnion = Fixup.second.first;
+ const DICompositeType *CTy = Fixup.first;
+ StringRef TypeName = CTy->getName();
+ bool IsUnion = CTy->getTag() == dwarf::DW_TAG_union_type;
// Search through struct types
uint32_t StructTypeId = 0;
@@ -1497,8 +1587,15 @@ void BTFDebug::endModule() {
StructTypeId = addType(std::move(FwdTypeEntry));
}
- for (auto &DType : Fixup.second.second) {
- DType->setPointeeType(StructTypeId);
+ for (auto &TypeInfo : Fixup.second) {
+ const DIDerivedType *DTy = TypeInfo.first;
+ BTFTypeDerived *BDType = TypeInfo.second;
+
+ int TmpTypeId = genBTFTypeTags(DTy, StructTypeId);
+ if (TmpTypeId >= 0)
+ BDType->setPointeeType(TmpTypeId);
+ else
+ BDType->setPointeeType(StructTypeId);
}
}
diff --git a/llvm/lib/Target/BPF/BTFDebug.h b/llvm/lib/Target/BPF/BTFDebug.h
index 7c30675c553c..1ad8ec5d918c 100644
--- a/llvm/lib/Target/BPF/BTFDebug.h
+++ b/llvm/lib/Target/BPF/BTFDebug.h
@@ -103,7 +103,7 @@ class BTFTypeEnum : public BTFTypeBase {
std::vector<struct BTF::BTFEnum> EnumValues;
public:
- BTFTypeEnum(const DICompositeType *ETy, uint32_t NumValues);
+ BTFTypeEnum(const DICompositeType *ETy, uint32_t NumValues, bool IsSigned);
uint32_t getSize() override {
return BTFTypeBase::getSize() + EnumValues.size() * BTF::BTFEnumSize;
}
@@ -218,6 +218,20 @@ public:
void emitType(MCStreamer &OS) override;
};
+/// Handle 64-bit enumerate type.
+class BTFTypeEnum64 : public BTFTypeBase {
+ const DICompositeType *ETy;
+ std::vector<struct BTF::BTFEnum64> EnumValues;
+
+public:
+ BTFTypeEnum64(const DICompositeType *ETy, uint32_t NumValues, bool IsSigned);
+ uint32_t getSize() override {
+ return BTFTypeBase::getSize() + EnumValues.size() * BTF::BTFEnum64Size;
+ }
+ void completeType(BTFDebug &BDebug) override;
+ void emitType(MCStreamer &OS) override;
+};
+
class BTFTypeTypeTag : public BTFTypeBase {
const DIDerivedType *DTy;
StringRef Tag;
@@ -289,7 +303,8 @@ class BTFDebug : public DebugHandlerBase {
std::map<std::string, std::unique_ptr<BTFKindDataSec>> DataSecEntries;
std::vector<BTFTypeStruct *> StructTypes;
std::map<const GlobalVariable *, std::pair<int64_t, uint32_t>> PatchImms;
- std::map<StringRef, std::pair<bool, std::vector<BTFTypeDerived *>>>
+ std::map<const DICompositeType *,
+ std::vector<std::pair<const DIDerivedType *, BTFTypeDerived *>>>
FixupDerivedTypes;
std::set<const Function *>ProtoFunctions;
@@ -341,6 +356,13 @@ class BTFDebug : public DebugHandlerBase {
void processDeclAnnotations(DINodeArray Annotations, uint32_t BaseTypeId,
int ComponentId);
+ /// Generate BTF type_tag's. If BaseTypeId is nonnegative, the last
+ /// BTF type_tag in the chain points to BaseTypeId. Otherwise, it points to
+ /// the base type of DTy. Return the type id of the first BTF type_tag
+ /// in the chain. If no type_tag's are generated, a negative value
+ /// is returned.
+ int genBTFTypeTags(const DIDerivedType *DTy, int BaseTypeId);
+
/// Generate one field relocation record.
void generatePatchImmReloc(const MCSymbol *ORSym, uint32_t RootId,
const GlobalVariable *, bool IsAma);
diff --git a/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp b/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
index 3f643d47f934..aa408f8b65f7 100644
--- a/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
+++ b/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
@@ -15,9 +15,10 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDecoderOps.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
-#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/SubtargetFeature.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/MathExtras.h"
#include <cstdint>
@@ -99,7 +100,7 @@ static const unsigned GPRDecoderTable[] = {
static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t /*Address*/,
- const void * /*Decoder*/) {
+ const MCDisassembler * /*Decoder*/) {
if (RegNo > 11)
return MCDisassembler::Fail;
@@ -112,9 +113,9 @@ static const unsigned GPR32DecoderTable[] = {
BPF::W0, BPF::W1, BPF::W2, BPF::W3, BPF::W4, BPF::W5,
BPF::W6, BPF::W7, BPF::W8, BPF::W9, BPF::W10, BPF::W11};
-static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t /*Address*/,
- const void * /*Decoder*/) {
+static DecodeStatus
+DecodeGPR32RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t /*Address*/,
+ const MCDisassembler * /*Decoder*/) {
if (RegNo > 11)
return MCDisassembler::Fail;
@@ -124,7 +125,8 @@ static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, unsigned RegNo,
}
static DecodeStatus decodeMemoryOpValue(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned Register = (Insn >> 16) & 0xf;
if (Register > 11)
return MCDisassembler::Fail;
@@ -220,4 +222,4 @@ DecodeStatus BPFDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
}
typedef DecodeStatus (*DecodeFunc)(MCInst &MI, unsigned insn, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
index bacd00360f82..56fdd6766132 100644
--- a/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
+++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
@@ -87,6 +87,11 @@ void BPFAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
}
} else {
assert(Fixup.getKind() == FK_PCRel_2);
+
+ int64_t ByteOff = (int64_t)Value - 8;
+ if (ByteOff > INT16_MAX * 8 || ByteOff < INT16_MIN * 8)
+ report_fatal_error("Branch target out of insn range");
+
Value = (uint16_t)((Value - 8) / 8);
support::endian::write<uint16_t>(&Data[Fixup.getOffset() + 2], Value,
Endian);
diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp
index 200c72a07ed6..6f041584a955 100644
--- a/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp
+++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp
@@ -15,6 +15,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
using namespace llvm;
diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
index 3292c3e5ebb5..14f6b367b8c7 100644
--- a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
+++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
@@ -41,8 +41,6 @@ public:
// section will be parsable, but with odd offsets and
// line numbers, etc.
CodePointerSize = 8;
-
- UseIntegratedAssembler = false;
}
void setDwarfUsesRelocationsAcrossSections(bool enable) {
diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
index 12af92e0d198..a98d001097bc 100644
--- a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
+++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
@@ -73,15 +73,13 @@ private:
} // end anonymous namespace
MCCodeEmitter *llvm::createBPFMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx) {
- return new BPFMCCodeEmitter(MCII, MRI, true);
+ return new BPFMCCodeEmitter(MCII, *Ctx.getRegisterInfo(), true);
}
MCCodeEmitter *llvm::createBPFbeMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx) {
- return new BPFMCCodeEmitter(MCII, MRI, false);
+ return new BPFMCCodeEmitter(MCII, *Ctx.getRegisterInfo(), false);
}
unsigned BPFMCCodeEmitter::getMachineOpValue(const MCInst &MI,
diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
index a426a132cf47..fc190504581c 100644
--- a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
+++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
@@ -14,6 +14,7 @@
#define LLVM_LIB_TARGET_BPF_MCTARGETDESC_BPFMCTARGETDESC_H
#include "llvm/Config/config.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/Support/DataTypes.h"
#include <memory>
@@ -30,10 +31,8 @@ class MCTargetOptions;
class Target;
MCCodeEmitter *createBPFMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx);
MCCodeEmitter *createBPFbeMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx);
MCAsmBackend *createBPFAsmBackend(const Target &T, const MCSubtargetInfo &STI,
diff --git a/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp b/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp
index a62bd111cba9..63a60473d664 100644
--- a/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp
+++ b/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp
@@ -9,14 +9,17 @@
#include "MCTargetDesc/CSKYInstPrinter.h"
#include "MCTargetDesc/CSKYMCExpr.h"
#include "MCTargetDesc/CSKYMCTargetDesc.h"
+#include "MCTargetDesc/CSKYTargetStreamer.h"
#include "TargetInfo/CSKYTargetInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/Register.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
@@ -25,6 +28,8 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/CSKYAttributes.h"
+#include "llvm/Support/CSKYTargetParser.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -52,6 +57,9 @@ class CSKYAsmParser : public MCTargetAsmParser {
const MCRegisterInfo *MRI;
+ unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
+ unsigned Kind) override;
+
bool generateImmOutOfRangeError(OperandVector &Operands, uint64_t ErrorInfo,
int64_t Lower, int64_t Upper, Twine Msg);
@@ -78,6 +86,16 @@ class CSKYAsmParser : public MCTargetAsmParser {
bool processInstruction(MCInst &Inst, SMLoc IDLoc, OperandVector &Operands,
MCStreamer &Out);
+ bool processLRW(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out);
+ bool processJSRI(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out);
+ bool processJMPI(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out);
+
+ CSKYTargetStreamer &getTargetStreamer() {
+ assert(getParser().getStreamer().getTargetStreamer() &&
+ "do not have a target streamer");
+ MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
+ return static_cast<CSKYTargetStreamer &>(TS);
+ }
// Auto-generated instruction matching functions
#define GET_ASSEMBLER_HEADER
@@ -95,6 +113,8 @@ class CSKYAsmParser : public MCTargetAsmParser {
bool parseOperand(OperandVector &Operands, StringRef Mnemonic);
+ bool parseDirectiveAttribute();
+
public:
enum CSKYMatchResultTy {
Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY,
@@ -108,7 +128,14 @@ public:
CSKYAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
const MCInstrInfo &MII, const MCTargetOptions &Options)
: MCTargetAsmParser(Options, STI, MII) {
+
+ MCAsmParserExtension::Initialize(Parser);
+
+ // Cache the MCRegisterInfo.
+ MRI = getContext().getRegisterInfo();
+
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+ getTargetStreamer().emitTargetAttributes(STI);
}
};
@@ -612,6 +639,11 @@ public:
#define GET_MNEMONIC_SPELL_CHECKER
#include "CSKYGenAsmMatcher.inc"
+static MCRegister convertFPR32ToFPR64(MCRegister Reg) {
+ assert(Reg >= CSKY::F0_32 && Reg <= CSKY::F31_32 && "Invalid register");
+ return Reg - CSKY::F0_32 + CSKY::F0_64;
+}
+
static std::string CSKYMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS,
unsigned VariantID = 0);
@@ -788,6 +820,96 @@ bool CSKYAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
llvm_unreachable("Unknown match type detected!");
}
+bool CSKYAsmParser::processLRW(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out) {
+ Inst.setLoc(IDLoc);
+
+ unsigned Opcode;
+ MCOperand Op;
+ if (Inst.getOpcode() == CSKY::PseudoLRW16)
+ Opcode = CSKY::LRW16;
+ else
+ Opcode = CSKY::LRW32;
+
+ if (Inst.getOperand(1).isImm()) {
+ if (isUInt<8>(Inst.getOperand(1).getImm()) &&
+ Inst.getOperand(0).getReg() <= CSKY::R7) {
+ Opcode = CSKY::MOVI16;
+ } else if (getSTI().getFeatureBits()[CSKY::HasE2] &&
+ isUInt<16>(Inst.getOperand(1).getImm())) {
+ Opcode = CSKY::MOVI32;
+ } else {
+ auto *Expr = getTargetStreamer().addConstantPoolEntry(
+ MCConstantExpr::create(Inst.getOperand(1).getImm(), getContext()),
+ Inst.getLoc());
+ Inst.erase(std::prev(Inst.end()));
+ Inst.addOperand(MCOperand::createExpr(Expr));
+ }
+ } else {
+ const MCExpr *AdjustExpr = nullptr;
+ if (const CSKYMCExpr *CSKYExpr =
+ dyn_cast<CSKYMCExpr>(Inst.getOperand(1).getExpr())) {
+ if (CSKYExpr->getKind() == CSKYMCExpr::VK_CSKY_TLSGD ||
+ CSKYExpr->getKind() == CSKYMCExpr::VK_CSKY_TLSIE ||
+ CSKYExpr->getKind() == CSKYMCExpr::VK_CSKY_TLSLDM) {
+ MCSymbol *Dot = getContext().createNamedTempSymbol();
+ Out.emitLabel(Dot);
+ AdjustExpr = MCSymbolRefExpr::create(Dot, getContext());
+ }
+ }
+ auto *Expr = getTargetStreamer().addConstantPoolEntry(
+ Inst.getOperand(1).getExpr(), Inst.getLoc(), AdjustExpr);
+ Inst.erase(std::prev(Inst.end()));
+ Inst.addOperand(MCOperand::createExpr(Expr));
+ }
+
+ Inst.setOpcode(Opcode);
+
+ Out.emitInstruction(Inst, getSTI());
+ return false;
+}
+
+bool CSKYAsmParser::processJSRI(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out) {
+ Inst.setLoc(IDLoc);
+
+ if (Inst.getOperand(0).isImm()) {
+ const MCExpr *Expr = getTargetStreamer().addConstantPoolEntry(
+ MCConstantExpr::create(Inst.getOperand(0).getImm(), getContext()),
+ Inst.getLoc());
+ Inst.setOpcode(CSKY::JSRI32);
+ Inst.erase(std::prev(Inst.end()));
+ Inst.addOperand(MCOperand::createExpr(Expr));
+ } else {
+ const MCExpr *Expr = getTargetStreamer().addConstantPoolEntry(
+ Inst.getOperand(0).getExpr(), Inst.getLoc());
+ Inst.setOpcode(CSKY::JBSR32);
+ Inst.addOperand(MCOperand::createExpr(Expr));
+ }
+
+ Out.emitInstruction(Inst, getSTI());
+ return false;
+}
+
+bool CSKYAsmParser::processJMPI(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out) {
+ Inst.setLoc(IDLoc);
+
+ if (Inst.getOperand(0).isImm()) {
+ const MCExpr *Expr = getTargetStreamer().addConstantPoolEntry(
+ MCConstantExpr::create(Inst.getOperand(0).getImm(), getContext()),
+ Inst.getLoc());
+ Inst.setOpcode(CSKY::JMPI32);
+ Inst.erase(std::prev(Inst.end()));
+ Inst.addOperand(MCOperand::createExpr(Expr));
+ } else {
+ const MCExpr *Expr = getTargetStreamer().addConstantPoolEntry(
+ Inst.getOperand(0).getExpr(), Inst.getLoc());
+ Inst.setOpcode(CSKY::JBR32);
+ Inst.addOperand(MCOperand::createExpr(Expr));
+ }
+
+ Out.emitInstruction(Inst, getSTI());
+ return false;
+}
+
bool CSKYAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
OperandVector &Operands,
MCStreamer &Out) {
@@ -845,6 +967,28 @@ bool CSKYAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
Inst.erase(std::next(Inst.begin()));
Inst.insert(Inst.end(), MCOperand::createReg(CSKY::C));
break;
+ case CSKY::PseudoLRW16:
+ case CSKY::PseudoLRW32:
+ return processLRW(Inst, IDLoc, Out);
+ case CSKY::PseudoJSRI32:
+ return processJSRI(Inst, IDLoc, Out);
+ case CSKY::PseudoJMPI32:
+ return processJMPI(Inst, IDLoc, Out);
+ case CSKY::JBSR32:
+ case CSKY::JBR16:
+ case CSKY::JBT16:
+ case CSKY::JBF16:
+ case CSKY::JBR32:
+ case CSKY::JBT32:
+ case CSKY::JBF32:
+ unsigned Num = Inst.getNumOperands() - 1;
+ assert(Inst.getOperand(Num).isExpr());
+
+ const MCExpr *Expr = getTargetStreamer().addConstantPoolEntry(
+ Inst.getOperand(Num).getExpr(), Inst.getLoc());
+
+ Inst.addOperand(MCOperand::createExpr(Expr));
+ break;
}
emitToStreamer(Out, Inst);
@@ -1471,7 +1615,132 @@ OperandMatchResultTy CSKYAsmParser::tryParseRegister(unsigned &RegNo,
return MatchOperand_Success;
}
-bool CSKYAsmParser::ParseDirective(AsmToken DirectiveID) { return true; }
+bool CSKYAsmParser::ParseDirective(AsmToken DirectiveID) {
+ // This returns false if this function recognizes the directive
+ // regardless of whether it is successfully handles or reports an
+ // error. Otherwise it returns true to give the generic parser a
+ // chance at recognizing it.
+ StringRef IDVal = DirectiveID.getString();
+
+ if (IDVal == ".csky_attribute")
+ return parseDirectiveAttribute();
+
+ return true;
+}
+
+/// parseDirectiveAttribute
+/// ::= .attribute expression ',' ( expression | "string" )
+bool CSKYAsmParser::parseDirectiveAttribute() {
+ MCAsmParser &Parser = getParser();
+ int64_t Tag;
+ SMLoc TagLoc;
+ TagLoc = Parser.getTok().getLoc();
+ if (Parser.getTok().is(AsmToken::Identifier)) {
+ StringRef Name = Parser.getTok().getIdentifier();
+ Optional<unsigned> Ret =
+ ELFAttrs::attrTypeFromString(Name, CSKYAttrs::getCSKYAttributeTags());
+ if (!Ret.hasValue()) {
+ Error(TagLoc, "attribute name not recognised: " + Name);
+ return false;
+ }
+ Tag = Ret.getValue();
+ Parser.Lex();
+ } else {
+ const MCExpr *AttrExpr;
+
+ TagLoc = Parser.getTok().getLoc();
+ if (Parser.parseExpression(AttrExpr))
+ return true;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(AttrExpr);
+ if (check(!CE, TagLoc, "expected numeric constant"))
+ return true;
+
+ Tag = CE->getValue();
+ }
+
+ if (Parser.parseToken(AsmToken::Comma, "comma expected"))
+ return true;
+
+ StringRef StringValue;
+ int64_t IntegerValue = 0;
+ bool IsIntegerValue = ((Tag != CSKYAttrs::CSKY_ARCH_NAME) &&
+ (Tag != CSKYAttrs::CSKY_CPU_NAME) &&
+ (Tag != CSKYAttrs::CSKY_FPU_NUMBER_MODULE));
+
+ SMLoc ValueExprLoc = Parser.getTok().getLoc();
+ if (IsIntegerValue) {
+ const MCExpr *ValueExpr;
+ if (Parser.parseExpression(ValueExpr))
+ return true;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ValueExpr);
+ if (!CE)
+ return Error(ValueExprLoc, "expected numeric constant");
+ IntegerValue = CE->getValue();
+ } else {
+ if (Parser.getTok().isNot(AsmToken::String))
+ return Error(Parser.getTok().getLoc(), "expected string constant");
+
+ StringValue = Parser.getTok().getStringContents();
+ Parser.Lex();
+ }
+
+ if (Parser.parseEOL())
+ return true;
+
+ if (IsIntegerValue)
+ getTargetStreamer().emitAttribute(Tag, IntegerValue);
+ else if (Tag != CSKYAttrs::CSKY_ARCH_NAME && Tag != CSKYAttrs::CSKY_CPU_NAME)
+ getTargetStreamer().emitTextAttribute(Tag, StringValue);
+ else {
+ CSKY::ArchKind ID = (Tag == CSKYAttrs::CSKY_ARCH_NAME)
+ ? CSKY::parseArch(StringValue)
+ : CSKY::parseCPUArch(StringValue);
+ if (ID == CSKY::ArchKind::INVALID)
+ return Error(ValueExprLoc, (Tag == CSKYAttrs::CSKY_ARCH_NAME)
+ ? "unknown arch name"
+ : "unknown cpu name");
+
+ getTargetStreamer().emitTextAttribute(Tag, StringValue);
+ }
+
+ return false;
+}
+
+unsigned CSKYAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
+ unsigned Kind) {
+ CSKYOperand &Op = static_cast<CSKYOperand &>(AsmOp);
+
+ if (!Op.isReg())
+ return Match_InvalidOperand;
+
+ MCRegister Reg = Op.getReg();
+
+ if (CSKYMCRegisterClasses[CSKY::FPR32RegClassID].contains(Reg)) {
+ // As the parser couldn't differentiate an FPR64 from an FPR32, coerce the
+ // register from FPR32 to FPR64 if necessary.
+ if (Kind == MCK_FPR64 || Kind == MCK_sFPR64) {
+ Op.Reg.RegNum = convertFPR32ToFPR64(Reg);
+ if (Kind == MCK_sFPR64 &&
+ (Op.Reg.RegNum < CSKY::F0_64 || Op.Reg.RegNum > CSKY::F15_64))
+ return Match_InvalidRegOutOfRange;
+ if (Kind == MCK_FPR64 &&
+ (Op.Reg.RegNum < CSKY::F0_64 || Op.Reg.RegNum > CSKY::F31_64))
+ return Match_InvalidRegOutOfRange;
+ return Match_Success;
+ }
+ }
+
+ if (CSKYMCRegisterClasses[CSKY::GPRRegClassID].contains(Reg)) {
+ if (Kind == MCK_GPRPair) {
+ Op.Reg.RegNum = MRI->getEncodingValue(Reg) + CSKY::R0_R1;
+ return Match_Success;
+ }
+ }
+
+ return Match_InvalidOperand;
+}
void CSKYAsmParser::emitToStreamer(MCStreamer &S, const MCInst &Inst) {
MCInst CInst;
diff --git a/llvm/lib/Target/CSKY/CSKY.h b/llvm/lib/Target/CSKY/CSKY.h
index 401d6fa1a0a5..27a6c6d2f250 100644
--- a/llvm/lib/Target/CSKY/CSKY.h
+++ b/llvm/lib/Target/CSKY/CSKY.h
@@ -14,11 +14,13 @@
#ifndef LLVM_LIB_TARGET_CSKY_CSKY_H
#define LLVM_LIB_TARGET_CSKY_CSKY_H
+#include "llvm/PassRegistry.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
class CSKYTargetMachine;
class FunctionPass;
+class PassRegistry;
FunctionPass *createCSKYISelDag(CSKYTargetMachine &TM);
FunctionPass *createCSKYConstantIslandPass();
diff --git a/llvm/lib/Target/CSKY/CSKY.td b/llvm/lib/Target/CSKY/CSKY.td
index ddb7fe93706e..a8db9151e127 100644
--- a/llvm/lib/Target/CSKY/CSKY.td
+++ b/llvm/lib/Target/CSKY/CSKY.td
@@ -32,6 +32,26 @@ def HasFPUv2_DF : Predicate<"Subtarget->hasFPUv2DoubleFloat()">,
AssemblerPredicate<(all_of FeatureFPUV2_DF),
"Enable FPUv2 double float instructions">;
+def FeatureFdivdu : SubtargetFeature<"fdivdu", "HasFdivdu", "true",
+ "Enable float divide instructions">;
+def HasFdivdu : Predicate<"Subtarget->hasFdivdu()">,
+ AssemblerPredicate<(all_of FeatureFdivdu),
+ "Enable float divide instructions">;
+
+def FeatureFPUV3_HI
+ : SubtargetFeature<"fpuv3_hi", "HasFPUv3HalfWord", "true",
+ "Enable FPUv3 harf word converting instructions">;
+def HasFPUv3_HI : Predicate<"Subtarget->hasFPUv3HalfWord()">,
+ AssemblerPredicate<(all_of FeatureFPUV3_HI),
+ "Enable FPUv3 harf word converting instructions">;
+
+def FeatureFPUV3_HF
+ : SubtargetFeature<"fpuv3_hf", "HasFPUv3HalfFloat", "true",
+ "Enable FPUv3 harf precision operate instructions">;
+def HasFPUv3_HF : Predicate<"Subtarget->hasFPUv3HalfFloat()">,
+ AssemblerPredicate<(all_of FeatureFPUV3_HF),
+ "Enable FPUv3 harf precision operate instructions">;
+
def FeatureFPUV3_SF
: SubtargetFeature<"fpuv3_sf", "HasFPUv3SingleFloat", "true",
"Enable FPUv3 single float instructions">;
@@ -46,6 +66,85 @@ def HasFPUv3_DF : Predicate<"Subtarget->hasFPUv3DoubleFloat()">,
AssemblerPredicate<(all_of FeatureFPUV3_DF),
"Enable FPUv3 double float instructions">;
+def HasFLOATE1
+ : SubtargetFeature<"floate1", "HasFLOATE1", "true", "Support CSKY floate1 instructions">;
+def iHasFLOATE1 : Predicate<"Subtarget->hasFLOATE1()">,
+ AssemblerPredicate<(all_of HasFLOATE1),
+ "Support CSKY floate1 instructions">;
+
+def HasFLOAT1E2
+ : SubtargetFeature<"float1e2", "HasFLOAT1E2", "true", "Support CSKY float1e2 instructions">;
+def iHasFLOAT1E2 : Predicate<"Subtarget->hasFLOAT1E2()">,
+ AssemblerPredicate<(all_of HasFLOAT1E2),
+ "Support CSKY float1e2 instructions">;
+
+def HasFLOAT1E3
+ : SubtargetFeature<"float1e3", "HasFLOAT1E3", "true", "Support CSKY float1e3 instructions">;
+def iHasFLOAT1E3 : Predicate<"Subtarget->hasFLOAT1E3()">,
+ AssemblerPredicate<(all_of HasFLOAT1E3),
+ "Support CSKY float1e3 instructions">;
+
+def HasFLOAT3E4
+ : SubtargetFeature<"float3e4", "HasFLOAT3E4", "true", "Support CSKY float3e4 instructions">;
+def iHasFLOAT3E4 : Predicate<"Subtarget->hasFLOAT3E4()">,
+ AssemblerPredicate<(all_of HasFLOAT3E4),
+ "Support CSKY float3e4 instructions">;
+
+def HasFLOAT7E60
+ : SubtargetFeature<"float7e60", "HasFLOAT7E60", "true", "Support CSKY float7e60 instructions">;
+def iHasFLOAT7E60 : Predicate<"Subtarget->hasFLOAT7E60()">,
+ AssemblerPredicate<(all_of HasFLOAT7E60),
+ "Support CSKY float7e60 instructions">;
+
+def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true",
+ "Enable divide instrutions">;
+def HasHWDiv : Predicate<"Subtarget->hasHardwareDivide()">,
+ AssemblerPredicate<(all_of FeatureHWDiv),
+ "Enable divide instrutions">;
+
+def FeatureSTM : SubtargetFeature<"multiple_stld", "HasSTM", "true",
+ "Enable multiple load/store instrutions">;
+def HasSTM : Predicate<"Subtarget->hasSTM()">,
+ AssemblerPredicate<(all_of FeatureSTM),
+ "Enable multiple load/store instrutions">;
+
+def FeaturePushPop : SubtargetFeature<"pushpop", "HasPushPop", "true",
+ "Enable push/pop instrutions">;
+def HasPushPop : Predicate<"Subtarget->hasPushPop()">,
+ AssemblerPredicate<(all_of FeaturePushPop),
+ "Enable push/pop instrutions">;
+
+def FeatureDSP
+ : SubtargetFeature<"edsp", "HasDSP", "true", "Enable DSP instrutions">;
+def HasDSP : Predicate<"Subtarget->hasDSP()">,
+ AssemblerPredicate<(all_of FeatureDSP),
+ "Enable DSP instrutions">;
+
+def HasDSP1E2
+ : SubtargetFeature<"dsp1e2", "HasDSP1E2", "true", "Support CSKY dsp1e2 instructions">;
+def iHasDSP1E2 : Predicate<"Subtarget->hasDSP1E2()">,
+ AssemblerPredicate<(all_of HasDSP1E2),
+ "Support CSKY dsp1e2 instructions">;
+
+def HasDSPE60
+ : SubtargetFeature<"dspe60", "HasDSPE60", "true", "Support CSKY dspe60 instructions">;
+def iHasDSPE60 : Predicate<"Subtarget->hasDSPE60()">,
+ AssemblerPredicate<(all_of HasDSPE60),
+ "Support CSKY dspe60 instructions">;
+
+def FeatureDSPV2 : SubtargetFeature<"dspv2", "HasDSPV2", "true",
+ "Enable DSP V2.0 instrutions">;
+def HasDSPV2 : Predicate<"Subtarget->hasDSPV2()">,
+ AssemblerPredicate<(all_of FeatureDSPV2),
+ "Enable DSP V2.0 instrutions">;
+
+def FeatureDSP_Silan : SubtargetFeature<"dsp_silan", "HasDSP_Silan", "true",
+ "Enable DSP Silan instrutions">;
+def HasDSP_Silan : Predicate<"Subtarget->hasDSP_Silan()">,
+ AssemblerPredicate<(all_of FeatureDSP_Silan),
+ "Enable DSP Silan instrutions">;
+
+// Atomic Support
def FeatureBTST16 : SubtargetFeature<"btst16", "HasBTST16", "true",
"Use the 16-bit btsti instruction">;
def HasBTST16 : Predicate<"Subtarget->hasBTST16()">,
@@ -59,18 +158,110 @@ def HasExtendLrw : Predicate<"Subtarget->hasExtendLrw()">,
AssemblerPredicate<(all_of FeatureExtendLrw),
"Use the extend LRW instruction">;
+def FeatureTrust : SubtargetFeature<"trust", "HasTrust", "true",
+ "Enable trust instructions">;
+def HasTrust : Predicate<"Subtarget->hasTrust()">,
+ AssemblerPredicate<(all_of FeatureTrust),
+ "Enable trust instructions">;
+
def FeatureJAVA
: SubtargetFeature<"java", "HasJAVA", "true", "Enable java instructions">;
def HasJAVA : Predicate<"Subtarget->hasJAVA()">,
AssemblerPredicate<(all_of FeatureJAVA),
"Enable java instructions">;
+def FeatureCache
+ : SubtargetFeature<"cache", "HasCache", "true", "Enable cache">;
+def HasCache : Predicate<"Subtarget->hasCache()">,
+ AssemblerPredicate<(all_of FeatureCache),
+ "Enable cache">;
+
+def FeatureNVIC
+ : SubtargetFeature<"nvic", "HasNVIC", "true", "Enable NVIC">;
+def HasNVIC : Predicate<"Subtarget->hasNVIC()">,
+ AssemblerPredicate<(all_of FeatureNVIC),
+ "Enable NVIC">;
+
def FeatureDoloop : SubtargetFeature<"doloop", "HasDoloop", "true",
"Enable doloop instructions">;
def HasDoloop : Predicate<"Subtarget->hasDoloop()">,
AssemblerPredicate<(all_of FeatureDoloop),
"Enable doloop instructions">;
+// Other features than instructions
+def FeatureHighreg : SubtargetFeature<"high-registers", "HasHighRegisters",
+ "true", "Enable r16-r31 registers">;
+def HasHighRegisters : Predicate<"Subtarget->hasHighRegisters()">,
+ AssemblerPredicate<(all_of FeatureHighreg),
+ "Enable r16-r31 registers">;
+
+def FeatureSmart : SubtargetFeature<"smart", "SmartMode", "true",
+ "Let CPU work in Smart Mode">;
+def SmartMode : Predicate<"Subtarget->smartMode()">,
+ AssemblerPredicate<(all_of FeatureSmart),
+ "Let CPU work in Smart Mode">;
+
+def FeatureVDSPV2 : SubtargetFeature<"vdspv2", "HasVDSPV2", "true",
+ "Enable vdsp-v2 instructions">;
+def HasVDSPV2 : Predicate<"Subtarget->hasVDSPV2()">,
+ AssemblerPredicate<(all_of FeatureVDSPV2),
+ "Enable vdsp-v2 instructions">;
+
+def HasVDSPV2_FLOAT : Predicate<"Subtarget->hasVDSPV2_FLOAT()">;
+def HasVDSPV2_HALF: Predicate<"Subtarget->hasVDSPV2_HALF()">;
+
+def HasVDSP2E3
+ : SubtargetFeature<"vdsp2e3", "HasVDSP2E3", "true", "Support CSKY vdsp2e3 instructions">;
+def iHasVDSP2E3 : Predicate<"Subtarget->hasVDSP2E3()">,
+ AssemblerPredicate<(all_of HasVDSP2E3),
+ "Support CSKY vdsp2e3 instructions">;
+
+def HasVDSP2E60F
+ : SubtargetFeature<"vdsp2e60f", "HasVDSP2E60F", "true", "Support CSKY vdsp2e60f instructions">;
+def iHasVDSP2E60F : Predicate<"Subtarget->hasVDSP2E60F()">,
+ AssemblerPredicate<(all_of HasVDSP2E60F),
+ "Support CSKY vdsp2e60f instructions">;
+
+def FeatureHardTP : SubtargetFeature<"hard-tp", "ReadTPHard", "true",
+ "Enable TLS Pointer register">;
+def ReadTPHard : Predicate<"Subtarget->readTPHard()">,
+ AssemblerPredicate<(all_of FeatureHardTP),
+ "Enable TLS Pointer register">;
+
+def FeatureSoftTP : SubtargetFeature<"soft-tp", "ReadTPHard", "false",
+ "Disable TLS Pointer register">;
+
+def FeatureIstack : SubtargetFeature<"istack", "EnableInterruptAttribute",
+ "true", "Enable interrput attribute">;
+def EnableInterruptAttribute
+ : Predicate<"Subtarget->enableInterruptAttribute()">,
+ AssemblerPredicate<(all_of FeatureIstack),
+ "Enable interrput attribute">;
+
+def FeatureConstPool : SubtargetFeature<"constpool", "DumpConstPool", "true",
+ "Dump the constant pool by compiler">;
+def DumpConstPool : Predicate<"Subtarget->dumpConstPool()">,
+ AssemblerPredicate<(all_of FeatureConstPool),
+ "Dump the constant pool by compiler">;
+
+def FeatureStackSize : SubtargetFeature<"stack-size", "EnableStackSize", "true",
+ "Output stack size information">;
+def EnableStackSize : Predicate<"Subtarget->enableStackSize()">,
+ AssemblerPredicate<(all_of FeatureStackSize),
+ "Output stack size information">;
+
+def FeatureCCRT
+ : SubtargetFeature<"ccrt", "UseCCRT", "true", "Use CSKY compiler runtime">;
+def UseCCRT : Predicate<"Subtarget->useCCRT()">,
+ AssemblerPredicate<(all_of FeatureCCRT),
+ "Use CSKY compiler runtime">;
+
+def FeatureVDSPV1_128 : SubtargetFeature<"vdspv1", "HasVDSPV1_128", "true",
+ "Enable 128bit vdsp-v1 instructions">;
+def HasVDSPV1_128 : Predicate<"Subtarget->hasVDSPV1_128()">,
+ AssemblerPredicate<(all_of FeatureVDSPV1_128),
+ "Enable 128bit vdsp-v1 instructions">;
+
def HasE1
: SubtargetFeature<"e1", "HasE1", "true", "Support CSKY e1 instructions",
[FeatureExtendLrw]>;
@@ -91,12 +282,25 @@ def iHas2E3 : Predicate<"Subtarget->has2E3()">,
AssemblerPredicate<(all_of Has2E3),
"Support CSKY 2e3 instructions">;
+def HasMP : SubtargetFeature<"mp", "HasMP", "true",
+ "Support CSKY mp instructions", [Has2E3]>;
+def iHasMP : Predicate<"Subtarget->hasMP()">,
+ AssemblerPredicate<(all_of HasMP),
+ "Support CSKY mp instructions">;
+
def Has3E3r1 : SubtargetFeature<"3e3r1", "Has3E3r1", "true",
"Support CSKY 3e3r1 instructions">;
def iHas3E3r1 : Predicate<"Subtarget->has3E3r1()">,
AssemblerPredicate<(all_of Has3E3r1),
"Support CSKY 3e3r1 instructions">;
+def Has3r1E3r2 : SubtargetFeature<"3e3r2", "Has3r1E3r2", "true",
+ "Support CSKY 3e3r2 instructions",
+ [Has3E3r1, FeatureDoloop]>;
+def iHas3r1E3r2 : Predicate<"Subtarget->has3r1E3r2()">,
+ AssemblerPredicate<(all_of Has3r1E3r2),
+ "Support CSKY 3e3r2 instructions">;
+
def Has3r2E3r3
: SubtargetFeature<"3e3r3", "Has3r2E3r3", "true",
"Support CSKY 3e3r3 instructions", [FeatureDoloop]>;
@@ -129,6 +333,35 @@ def iHas10E60 : Predicate<"Subtarget->has10E60()">,
"Support CSKY 10e60 instructions">;
//===----------------------------------------------------------------------===//
+// CSKY Processor subtarget features.
+//===----------------------------------------------------------------------===//
+
+def ProcCK801 : SubtargetFeature<"ck801", "CSKYProcFamily", "CK801",
+ "CSKY ck801 processors", []>;
+def isCK801 : Predicate<"Subtarget->isCK801()">,
+ AssemblerPredicate<(all_of ProcCK801)>;
+def ProcCK802 : SubtargetFeature<"ck802", "CSKYProcFamily", "CK802",
+ "CSKY ck802 processors", []>;
+def ProcCK803 : SubtargetFeature<"ck803", "CSKYProcFamily", "CK803",
+ "CSKY ck803 processors", []>;
+def ProcCK803S : SubtargetFeature<"ck803s", "CSKYProcFamily", "CK803S",
+ "CSKY ck803s processors", []>;
+def ProcCK804 : SubtargetFeature<"ck804", "CSKYProcFamily", "CK804",
+ "CSKY ck804 processors", []>;
+def ProcCK805 : SubtargetFeature<"ck805", "CSKYProcFamily", "CK805",
+ "CSKY ck805 processors", []>;
+def ProcCK807 : SubtargetFeature<"ck807", "CSKYProcFamily", "CK807",
+ "CSKY ck807 processors", []>;
+def ProcCK810 : SubtargetFeature<"ck810", "CSKYProcFamily", "CK810",
+ "CSKY ck810 processors", []>;
+def ProcCK810V : SubtargetFeature<"ck810v", "CSKYProcFamily", "CK810V",
+ "CSKY ck810v processors", []>;
+def ProcCK860 : SubtargetFeature<"ck860", "CSKYProcFamily", "CK860",
+ "CSKY ck860 processors", []>;
+def ProcCK860V : SubtargetFeature<"ck860v", "CSKYProcFamily", "CK860V",
+ "CSKY ck860v processors", []>;
+
+//===----------------------------------------------------------------------===//
// Registers, calling conventions, instruction descriptions.
//===----------------------------------------------------------------------===//
@@ -142,6 +375,296 @@ include "CSKYInstrInfo.td"
def : ProcessorModel<"generic", NoSchedModel, []>;
+// CK801 series
+class CK801<string n, SchedMachineModel m, list<SubtargetFeature> f,
+ list<SubtargetFeature> tunef = []>
+ : ProcessorModel<n, m, !listconcat(f, [HasE1, FeatureTrust, FeatureBTST16, ProcCK801]), !listconcat(tunef, [])>;
+
+def : CK801<"ck801", NoSchedModel, []>;
+def : CK801<"ck801t", NoSchedModel, []>;
+def : CK801<"e801", NoSchedModel, []>;
+
+// CK802 series
+class CK802<string n, SchedMachineModel m, list<SubtargetFeature> f,
+ list<SubtargetFeature> tunef = []>
+ : ProcessorModel<n, m, !listconcat(f, [HasE2, FeatureTrust, FeatureBTST16, FeatureNVIC, ProcCK802]), !listconcat(tunef, [])>;
+
+def : CK802<"ck802", NoSchedModel, []>;
+def : CK802<"ck802t", NoSchedModel, []>;
+def : CK802<"ck802j", NoSchedModel, [FeatureJAVA]>;
+def : CK802<"e802", NoSchedModel, []>;
+def : CK802<"e802t", NoSchedModel, []>;
+def : CK802<"s802", NoSchedModel, []>;
+def : CK802<"s802t", NoSchedModel, []>;
+
+// CK803 series
+class CK803<string n, SchedMachineModel m, list<SubtargetFeature> f,
+ list<SubtargetFeature> tunef = []>
+ : ProcessorModel<n, m, !listconcat(f, [Has2E3, HasMP, FeatureTrust, FeatureBTST16, FeatureNVIC, FeatureHWDiv, ProcCK803]), !listconcat(tunef, [])>;
+
+def : CK803<"ck803", NoSchedModel, []>;
+def : CK803<"ck803h", NoSchedModel, []>;
+def : CK803<"ck803t", NoSchedModel, []>;
+def : CK803<"ck803ht", NoSchedModel, []>;
+def : CK803<"ck803f", NoSchedModel, [FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>;
+def : CK803<"ck803fh", NoSchedModel, [FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>;
+def : CK803<"ck803e", NoSchedModel, [FeatureDSP, HasDSP1E2, HasDSPE60]>;
+def : CK803<"ck803eh", NoSchedModel, [FeatureDSP, HasDSP1E2, HasDSPE60]>;
+def : CK803<"ck803et", NoSchedModel, [FeatureDSP, HasDSP1E2, HasDSPE60]>;
+def : CK803<"ck803eht", NoSchedModel, [FeatureDSP, HasDSP1E2, HasDSPE60]>;
+def : CK803<"ck803ef", NoSchedModel,
+ [FeatureDSP, HasDSP1E2, HasDSPE60, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>;
+def : CK803<"ck803efh", NoSchedModel,
+ [FeatureDSP, HasDSP1E2, HasDSPE60, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>;
+def : CK803<"ck803ft", NoSchedModel, [FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>;
+def : CK803<"ck803eft", NoSchedModel,
+ [FeatureDSP, HasDSP1E2, HasDSPE60, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>;
+def : CK803<"ck803efht", NoSchedModel,
+ [FeatureDSP, HasDSP1E2, HasDSPE60, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>;
+def : CK803<"ck803r1", NoSchedModel, [Has3E3r1, Has3r2E3r3, FeatureDSPV2]>;
+def : CK803<"ck803hr1", NoSchedModel, [Has3E3r1, Has3r2E3r3, FeatureDSPV2]>;
+def : CK803<"ck803tr1", NoSchedModel, [Has3E3r1, Has3r2E3r3, FeatureDSPV2]>;
+def : CK803<"ck803htr1", NoSchedModel, [Has3E3r1, Has3r2E3r3, FeatureDSPV2]>;
+def : CK803<"ck803fr1", NoSchedModel,
+ [Has3E3r1, Has3r2E3r3, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3, FeatureDSPV2]>;
+def : CK803<"ck803fhr1", NoSchedModel,
+ [Has3E3r1, Has3r2E3r3, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3, FeatureDSPV2]>;
+def : CK803<"ck803er1", NoSchedModel,
+ [Has3E3r1, Has3r2E3r3, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureHighreg]>;
+def : CK803<"ck803etr1", NoSchedModel,
+ [Has3E3r1, Has3r2E3r3, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureHighreg]>;
+def : CK803<"ck803ehr1", NoSchedModel,
+ [Has3E3r1, Has3r2E3r3, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureHighreg]>;
+def : CK803<"ck803ehtr1", NoSchedModel,
+ [Has3E3r1, Has3r2E3r3, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureHighreg]>;
+def : CK803<"ck803efr1", NoSchedModel,
+ [Has3E3r1, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3, FeatureHighreg]>;
+def : CK803<"ck803efhr1", NoSchedModel,
+ [Has3E3r1, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3, FeatureHighreg]>;
+def : CK803<"ck803ftr1", NoSchedModel, [Has3E3r1, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3, FeatureDSPV2]>;
+def : CK803<"ck803eftr1", NoSchedModel,
+ [Has3E3r1, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3, FeatureHighreg]>;
+def : CK803<"ck803efhtr1", NoSchedModel,
+ [Has3E3r1, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3, FeatureHighreg]>;
+def : CK803<"ck803r2", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSPV2]>;
+def : CK803<"ck803hr2", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSPV2]>;
+def : CK803<"ck803tr2", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSPV2]>;
+def : CK803<"ck803htr2", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSPV2]>;
+def : CK803<"ck803fr2", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>;
+def : CK803<"ck803fhr2", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>;
+def : CK803<"ck803er2", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureHighreg]>;
+def : CK803<"ck803etr2", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureHighreg]>;
+def : CK803<"ck803ehr2", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureHighreg]>;
+def : CK803<"ck803ehtr2", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureHighreg]>;
+def : CK803<"ck803efr2", NoSchedModel,
+ [Has3r1E3r2, Has3r2E3r3, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3, FeatureHighreg]>;
+def : CK803<"ck803efhr2", NoSchedModel,
+ [Has3r1E3r2, Has3r2E3r3, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3, FeatureHighreg]>;
+def : CK803<"ck803ftr2", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>;
+def : CK803<"ck803eftr2", NoSchedModel,
+ [Has3r1E3r2, Has3r2E3r3, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3, FeatureHighreg]>;
+def : CK803<"ck803efhtr2", NoSchedModel,
+ [Has3r1E3r2, Has3r2E3r3, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3, FeatureHighreg]>;
+def : CK803<"ck803r3", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSPV2]>;
+def : CK803<"ck803hr3", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSPV2]>;
+def : CK803<"ck803tr3", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSPV2]>;
+def : CK803<"ck803htr3", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSPV2]>;
+def : CK803<"ck803fr3", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>;
+def : CK803<"ck803fhr3", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>;
+def : CK803<"ck803er3", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureHighreg]>;
+def : CK803<"ck803etr3", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureHighreg]>;
+def : CK803<"ck803ehr3", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureHighreg]>;
+def : CK803<"ck803ehtr3", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureHighreg]>;
+def : CK803<"ck803efr3", NoSchedModel,
+ [Has3r1E3r2, Has3r2E3r3, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3, FeatureHighreg]>;
+def : CK803<"ck803efhr3", NoSchedModel,
+ [Has3r1E3r2, Has3r2E3r3, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3, FeatureHighreg]>;
+def : CK803<"ck803ftr3", NoSchedModel, [Has3r1E3r2, Has3r2E3r3, FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>;
+def : CK803<"ck803eftr3", NoSchedModel,
+ [Has3r1E3r2, Has3r2E3r3, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3, FeatureHighreg]>;
+def : CK803<"ck803efhtr3", NoSchedModel,
+ [Has3r1E3r2, Has3r2E3r3, FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3, FeatureHighreg]>;
+def : CK803<"s803", NoSchedModel, [Has3r1E3r2, Has3r2E3r3]>;
+def : CK803<"s803t", NoSchedModel, [Has3r1E3r2, Has3r2E3r3]>;
+def : CK803<"e803", NoSchedModel, [Has3r1E3r2, Has3r2E3r3]>;
+def : CK803<"e803t", NoSchedModel, [Has3r1E3r2, Has3r2E3r3]>;
+
+// CK803S series
+class CK803S<string n, SchedMachineModel m, list<SubtargetFeature> f,
+list<SubtargetFeature> tunef = []> : CK803<n, m, !listconcat(f, [Has3E3r1, ProcCK803S]), tunef>;
+
+def : CK803S<"ck803s", NoSchedModel, []>;
+def : CK803S<"ck803sn", NoSchedModel, [FeatureDSP_Silan]>;
+def : CK803S<"ck803st", NoSchedModel, []>;
+def : CK803S<"ck803snt", NoSchedModel, [FeatureDSP_Silan]>;
+def : CK803S<"ck803sf", NoSchedModel, [FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>;
+def : CK803S<"ck803sfn", NoSchedModel, [FeatureFPUV2_SF, FeatureDSP_Silan, HasFLOATE1, HasFLOAT1E3]>;
+def : CK803S<"ck803se", NoSchedModel, [FeatureDSP, HasDSP1E2, HasDSPE60]>;
+def : CK803S<"ck803sen", NoSchedModel, [FeatureDSP, HasDSP1E2, HasDSPE60, FeatureDSP_Silan]>;
+def : CK803S<"ck803sef", NoSchedModel,
+ [FeatureDSP, HasDSP1E2, HasDSPE60, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>;
+def : CK803S<"ck803sefn", NoSchedModel,
+ [FeatureDSP, HasDSP1E2, HasDSPE60, FeatureFPUV2_SF, FeatureDSP_Silan,
+ HasFLOATE1, HasFLOAT1E3]>;
+def : CK803S<"ck803seft", NoSchedModel,
+ [FeatureDSP, HasDSP1E2, HasDSPE60, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>;
+def : CK803S<"ck803sefnt", NoSchedModel,
+ [FeatureDSP, HasDSP1E2, HasDSPE60, FeatureFPUV2_SF, FeatureDSP_Silan,
+ HasFLOATE1, HasFLOAT1E3]>;
+
+// CK804 series
+class CK804<string n, SchedMachineModel m, list<SubtargetFeature> f,
+ list<SubtargetFeature> tunef = []>
+ : CK803<n, m, !listconcat(f, [Has3r1E3r2, Has3r2E3r3, ProcCK804]), !listconcat(tunef, [])>;
+
+def : CK804<"ck804", NoSchedModel, []>;
+def : CK804<"ck804h", NoSchedModel, []>;
+def : CK804<"ck804t", NoSchedModel, []>;
+def : CK804<"ck804ht", NoSchedModel, []>;
+def : CK804<"ck804f", NoSchedModel, [FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>;
+def : CK804<"ck804fh", NoSchedModel, [FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>;
+def : CK804<"ck804e", NoSchedModel, [FeatureDSPV2, FeatureHighreg]>;
+def : CK804<"ck804et", NoSchedModel, [FeatureDSPV2, FeatureHighreg]>;
+def : CK804<"ck804eh", NoSchedModel, [FeatureDSPV2, FeatureHighreg]>;
+def : CK804<"ck804eht", NoSchedModel, [FeatureDSPV2, FeatureHighreg]>;
+def : CK804<"ck804ef", NoSchedModel, [FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3, FeatureHighreg]>;
+def : CK804<"ck804efh", NoSchedModel, [FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3, FeatureHighreg]>;
+def : CK804<"ck804ft", NoSchedModel, [FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>;
+def : CK804<"ck804eft", NoSchedModel, [FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3, FeatureHighreg]>;
+def : CK804<"ck804efht", NoSchedModel, [FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3, FeatureHighreg]>;
+def : CK804<"e804d", NoSchedModel, [FeatureDSPV2, FeatureHighreg]>;
+def : CK804<"e804dt", NoSchedModel, [FeatureDSPV2, FeatureHighreg]>;
+def : CK804<"e804f", NoSchedModel, [FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>;
+def : CK804<"e804ft", NoSchedModel, [FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>;
+def : CK804<"e804df", NoSchedModel, [FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3, FeatureHighreg]>;
+def : CK804<"e804dft", NoSchedModel, [FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3, FeatureHighreg]>;
+
+// CK805 series
+class CK805<string n, SchedMachineModel m, list<SubtargetFeature> f,
+ list<SubtargetFeature> tunef = []>
+ : CK803<n, m, !listconcat(f, [FeatureHighreg, FeatureVDSPV2, HasVDSP2E3, Has3r1E3r2, Has3r2E3r3, ProcCK805]),
+ !listconcat(tunef, [])>;
+
+def : CK805<"ck805", NoSchedModel, []>;
+def : CK805<"i805", NoSchedModel, []>;
+def : CK805<"ck805t", NoSchedModel, []>;
+def : CK805<"i805f", NoSchedModel, [FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>;
+def : CK805<"ck805f", NoSchedModel, [FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>;
+def : CK805<"ck805e", NoSchedModel, [FeatureDSPV2]>;
+def : CK805<"ck805ef", NoSchedModel, [FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>;
+def : CK805<"ck805et", NoSchedModel, [FeatureDSPV2]>;
+def : CK805<"ck805ft", NoSchedModel, [FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>;
+def : CK805<"ck805eft", NoSchedModel, [FeatureDSPV2, FeatureFPUV2_SF, HasFLOATE1, HasFLOAT1E3]>;
+
+// CK807 series
+class CK807<string n, SchedMachineModel m, list<SubtargetFeature> f,
+ list<SubtargetFeature> tunef = []>
+ : ProcessorModel<n, m, !listconcat(f,
+ [ Has3E7, HasMP, HasMP1E2, FeatureTrust, FeatureHWDiv, FeatureDSP, HasDSP1E2, HasDSPE60,
+ FeatureHighreg, FeatureHardTP, FeatureNVIC, FeatureCache, ProcCK807]), !listconcat(tunef, [])>;
+
+def : CK807<"ck807", NoSchedModel, []>;
+def : CK807<"c807", NoSchedModel, []>;
+def : CK807<"r807", NoSchedModel, []>;
+def : CK807<"ck807e", NoSchedModel, [FeatureDSP, HasDSP1E2, HasDSPE60]>;
+def : CK807<"ck807f", NoSchedModel,
+ [FeatureFPUV2_SF, FeatureFPUV2_DF, FeatureFdivdu,
+ HasFLOATE1, HasFLOAT1E2, HasFLOAT1E3, HasFLOAT3E4]>;
+def : CK807<"c807f", NoSchedModel,
+ [FeatureFPUV2_SF, FeatureFPUV2_DF, FeatureFdivdu,
+ HasFLOATE1, HasFLOAT1E2, HasFLOAT1E3, HasFLOAT3E4]>;
+def : CK807<"r807f", NoSchedModel,
+ [FeatureFPUV2_SF, FeatureFPUV2_DF, FeatureFdivdu,
+ HasFLOATE1, HasFLOAT1E2, HasFLOAT1E3, HasFLOAT3E4]>;
+def : CK807<"ck807ef", NoSchedModel, [
+ FeatureDSP, HasDSP1E2, HasDSPE60, FeatureFPUV2_SF, FeatureFPUV2_DF,
+ FeatureFdivdu, HasFLOATE1, HasFLOAT1E2, HasFLOAT1E3, HasFLOAT3E4]>;
+
+// CK810 series
+class CK810<string n, SchedMachineModel m, list<SubtargetFeature> f,
+ list<SubtargetFeature> tunef = []>
+ : ProcessorModel<n, m, !listconcat(f,
+ [ Has7E10, HasMP, HasMP1E2, FeatureTrust, FeatureHWDiv, FeatureDSP, HasDSP1E2, HasDSPE60,
+ FeatureHighreg, FeatureHardTP, FeatureNVIC, FeatureCache, ProcCK810]), !listconcat(tunef, [])>;
+
+def : CK810<"ck810", NoSchedModel, []>;
+def : CK810<"ck810e", NoSchedModel, []>;
+def : CK810<"ck810t", NoSchedModel, []>;
+def : CK810<"ck810et", NoSchedModel, []>;
+def : CK810<"c810", NoSchedModel,
+ [FeatureFPUV2_SF, FeatureFPUV2_DF, FeatureFdivdu,
+ HasFLOATE1, HasFLOAT1E2]>;
+def : CK810<"ck810f", NoSchedModel,
+ [FeatureFPUV2_SF, FeatureFPUV2_DF, FeatureFdivdu,
+ HasFLOATE1, HasFLOAT1E2]>;
+def : CK810<"ck810ef", NoSchedModel,
+ [FeatureFPUV2_SF, FeatureFPUV2_DF, FeatureFdivdu,
+ HasFLOATE1, HasFLOAT1E2]>;
+def : CK810<"ck810ft", NoSchedModel,
+ [FeatureFPUV2_SF, FeatureFPUV2_DF, FeatureFdivdu,
+ HasFLOATE1, HasFLOAT1E2]>;
+def : CK810<"ck810eft", NoSchedModel,
+ [FeatureFPUV2_SF, FeatureFPUV2_DF, FeatureFdivdu,
+ HasFLOATE1, HasFLOAT1E2]>;
+def : CK810<"c810t", NoSchedModel,
+ [FeatureFPUV2_SF, FeatureFPUV2_DF, FeatureFdivdu,
+ HasFLOATE1, HasFLOAT1E2]>;
+
+class CK810V<string n, SchedMachineModel m, list<SubtargetFeature> f,
+ list<SubtargetFeature> tunef = []>
+ : CK810<n, m, !listconcat(f, [FeatureVDSPV1_128, ProcCK810V]), !listconcat(tunef, [])>;
+
+def : CK810V<"ck810v", NoSchedModel, []>;
+def : CK810V<"ck810ev", NoSchedModel, []>;
+def : CK810V<"ck810tv", NoSchedModel, []>;
+def : CK810V<"ck810etv", NoSchedModel, []>;
+def : CK810V<"ck810fv", NoSchedModel, [
+ FeatureFPUV2_SF, FeatureFPUV2_DF, FeatureFdivdu,
+ HasFLOATE1, HasFLOAT1E2
+]>;
+def : CK810V<"ck810efv", NoSchedModel, [
+ FeatureFPUV2_SF, FeatureFPUV2_DF, FeatureFdivdu,
+ HasFLOATE1, HasFLOAT1E2
+]>;
+def : CK810V<"c810v", NoSchedModel, [
+ FeatureFPUV2_SF, FeatureFPUV2_DF, FeatureFdivdu,
+ HasFLOATE1, HasFLOAT1E2
+]>;
+def : CK810V<"ck810ftv", NoSchedModel, [
+ FeatureFPUV2_SF, FeatureFPUV2_DF, FeatureFdivdu,
+ HasFLOATE1, HasFLOAT1E2
+]>;
+def : CK810V<"ck810eftv", NoSchedModel, [
+ FeatureFPUV2_SF, FeatureFPUV2_DF, FeatureFdivdu,
+ HasFLOATE1, HasFLOAT1E2
+]>;
+def : CK810V<"c810tv", NoSchedModel, [
+ FeatureFPUV2_SF, FeatureFPUV2_DF, FeatureFdivdu,
+ HasFLOATE1, HasFLOAT1E2
+]>;
+
+// CK860 series
+class CK860<string n, SchedMachineModel m, list<SubtargetFeature> f,
+ list<SubtargetFeature> tunef = []>
+ : ProcessorModel<n, m, !listconcat(f,
+ [ Has10E60, HasMP, HasMP1E2, Has3r1E3r2, Has3r2E3r3, FeatureTrust, FeatureBTST16, FeatureHWDiv, HasDSPE60,
+ FeatureHighreg, FeatureHardTP, FeatureNVIC, FeatureCache, ProcCK860]), !listconcat(tunef, [])>;
+
+class CK860V<string n, SchedMachineModel m, list<SubtargetFeature> f,
+ list<SubtargetFeature> tunef = []>
+ : CK860<n, m, !listconcat(f, [FeatureVDSPV2, HasVDSP2E60F, ProcCK860V]), !listconcat(tunef, [])>;
+
+def : CK860<"ck860", NoSchedModel, []>;
+def : CK860<"ck860f", NoSchedModel,
+ [FeatureFPUV3_HI, FeatureFPUV3_HF, FeatureFPUV3_SF, FeatureFPUV3_DF, HasFLOAT7E60]>;
+def : CK860<"c860", NoSchedModel,
+ [FeatureFPUV3_HI, FeatureFPUV3_HF, FeatureFPUV3_SF, FeatureFPUV3_DF, HasFLOAT7E60]>;
+def : CK860V<"c860v", NoSchedModel,
+ [FeatureFPUV3_HI, FeatureFPUV3_HF, FeatureFPUV3_SF, FeatureFPUV3_DF, HasFLOAT7E60]>;
+def : CK860V<"ck860v", NoSchedModel, []>;
+def : CK860V<"ck860fv", NoSchedModel,
+ [FeatureFPUV3_HI, FeatureFPUV3_HF, FeatureFPUV3_SF, FeatureFPUV3_DF, HasFLOAT7E60]>;
+
//===----------------------------------------------------------------------===//
// Define the CSKY target.
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp b/llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp
index c8269eeacfdb..0236b22ad379 100644
--- a/llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp
+++ b/llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp
@@ -16,10 +16,12 @@
#include "CSKYTargetMachine.h"
#include "MCTargetDesc/CSKYInstPrinter.h"
#include "MCTargetDesc/CSKYMCExpr.h"
+#include "MCTargetDesc/CSKYTargetStreamer.h"
#include "TargetInfo/CSKYTargetInfo.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
@@ -40,7 +42,15 @@ CSKYAsmPrinter::CSKYAsmPrinter(llvm::TargetMachine &TM,
bool CSKYAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
MCP = MF.getConstantPool();
- Subtarget = &MF.getSubtarget<CSKYSubtarget>();
+ TII = MF.getSubtarget().getInstrInfo();
+
+ // Set the current MCSubtargetInfo to a copy which has the correct
+ // feature bits for the current MachineFunction
+ MCSubtargetInfo &NewSTI =
+ OutStreamer->getContext().getSubtargetCopy(*TM.getMCSubtargetInfo());
+ NewSTI.setFeatureBits(MF.getSubtarget().getFeatureBits());
+ Subtarget = &NewSTI;
+
return AsmPrinter::runOnMachineFunction(MF);
}
@@ -59,8 +69,6 @@ void CSKYAsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) {
#include "CSKYGenMCPseudoLowering.inc"
void CSKYAsmPrinter::expandTLSLA(const MachineInstr *MI) {
- const CSKYInstrInfo *TII = Subtarget->getInstrInfo();
-
DebugLoc DL = MI->getDebugLoc();
MCSymbol *PCLabel = OutContext.getOrCreateSymbol(
@@ -119,6 +127,19 @@ void CSKYAsmPrinter::emitFunctionBodyEnd() {
InConstantPool = false;
}
+void CSKYAsmPrinter::emitStartOfAsmFile(Module &M) {
+ if (TM.getTargetTriple().isOSBinFormatELF())
+ emitAttributes();
+}
+
+void CSKYAsmPrinter::emitEndOfAsmFile(Module &M) {
+ CSKYTargetStreamer &CTS =
+ static_cast<CSKYTargetStreamer &>(*OutStreamer->getTargetStreamer());
+
+ if (TM.getTargetTriple().isOSBinFormatELF())
+ CTS.finishAttributeSection();
+}
+
void CSKYAsmPrinter::emitInstruction(const MachineInstr *MI) {
// Do any auto-generated pseudo lowerings.
if (emitPseudoExpansionLowering(*OutStreamer, MI))
@@ -218,6 +239,84 @@ void CSKYAsmPrinter::emitMachineConstantPoolValue(
OutStreamer->emitValue(Expr, Size);
}
+void CSKYAsmPrinter::emitAttributes() {
+ CSKYTargetStreamer &CTS =
+ static_cast<CSKYTargetStreamer &>(*OutStreamer->getTargetStreamer());
+
+ const Triple &TT = TM.getTargetTriple();
+ StringRef CPU = TM.getTargetCPU();
+ StringRef FS = TM.getTargetFeatureString();
+ const CSKYTargetMachine &CTM = static_cast<const CSKYTargetMachine &>(TM);
+ /* TuneCPU doesn't impact emission of ELF attributes, ELF attributes only
+ care about arch related features, so we can set TuneCPU as CPU. */
+ const CSKYSubtarget STI(TT, CPU, /*TuneCPU=*/CPU, FS, CTM);
+
+ CTS.emitTargetAttributes(STI);
+}
+
+bool CSKYAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ const char *ExtraCode, raw_ostream &OS) {
+ // First try the generic code, which knows about modifiers like 'c' and 'n'.
+ if (!AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, OS))
+ return false;
+
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0)
+ return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default:
+ return true; // Unknown modifier.
+ case 'R':
+ if (MO.getType() == MachineOperand::MO_Register) {
+ OS << CSKYInstPrinter::getRegisterName(MO.getReg() + 1);
+ return false;
+ }
+ }
+ }
+
+ switch (MO.getType()) {
+ case MachineOperand::MO_Immediate:
+ OS << MO.getImm();
+ return false;
+ case MachineOperand::MO_Register:
+ if (MO.getReg() == CSKY::C)
+ return false;
+ OS << CSKYInstPrinter::getRegisterName(MO.getReg());
+ return false;
+ case MachineOperand::MO_GlobalAddress:
+ PrintSymbolOperand(MO, OS);
+ return false;
+ case MachineOperand::MO_BlockAddress: {
+ MCSymbol *Sym = GetBlockAddressSymbol(MO.getBlockAddress());
+ Sym->print(OS, MAI);
+ return false;
+ }
+ default:
+ break;
+ }
+
+ return true;
+}
+
+bool CSKYAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo, const char *ExtraCode,
+ raw_ostream &OS) {
+ if (!ExtraCode) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ // For now, we only support register memory operands in registers and
+ // assume there is no addend
+ if (!MO.isReg())
+ return true;
+
+ OS << "(" << CSKYInstPrinter::getRegisterName(MO.getReg()) << ", 0)";
+ return false;
+ }
+
+ return AsmPrinter::PrintAsmMemoryOperand(MI, OpNo, ExtraCode, OS);
+}
+
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeCSKYAsmPrinter() {
RegisterAsmPrinter<CSKYAsmPrinter> X(getTheCSKYTarget());
}
diff --git a/llvm/lib/Target/CSKY/CSKYAsmPrinter.h b/llvm/lib/Target/CSKY/CSKYAsmPrinter.h
index 04a253d349c8..5e87594e4fdf 100644
--- a/llvm/lib/Target/CSKY/CSKYAsmPrinter.h
+++ b/llvm/lib/Target/CSKY/CSKYAsmPrinter.h
@@ -18,7 +18,8 @@ namespace llvm {
class LLVM_LIBRARY_VISIBILITY CSKYAsmPrinter : public AsmPrinter {
CSKYMCInstLower MCInstLowering;
- const CSKYSubtarget *Subtarget;
+ const MCSubtargetInfo *Subtarget;
+ const TargetInstrInfo *TII;
bool InConstantPool = false;
@@ -28,6 +29,7 @@ class LLVM_LIBRARY_VISIBILITY CSKYAsmPrinter : public AsmPrinter {
void expandTLSLA(const MachineInstr *MI);
void emitCustomConstantPool(const MachineInstr *MI);
+ void emitAttributes();
public:
explicit CSKYAsmPrinter(TargetMachine &TM,
@@ -46,12 +48,22 @@ public:
void emitFunctionBodyEnd() override;
+ void emitStartOfAsmFile(Module &M) override;
+
+ void emitEndOfAsmFile(Module &M) override;
+
void emitInstruction(const MachineInstr *MI) override;
bool runOnMachineFunction(MachineFunction &MF) override;
// we emit constant pools customly!
void emitConstantPool() override{};
+
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ const char *ExtraCode, raw_ostream &OS) override;
+
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ const char *ExtraCode, raw_ostream &OS) override;
};
} // end namespace llvm
diff --git a/llvm/lib/Target/CSKY/CSKYConstantIslandPass.cpp b/llvm/lib/Target/CSKY/CSKYConstantIslandPass.cpp
index 3ac335e2ad9d..5d7241258543 100644
--- a/llvm/lib/Target/CSKY/CSKYConstantIslandPass.cpp
+++ b/llvm/lib/Target/CSKY/CSKYConstantIslandPass.cpp
@@ -29,6 +29,7 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -286,7 +287,7 @@ LLVM_DUMP_METHOD void CSKYConstantIslands::dumpBBs() {
bool CSKYConstantIslands::runOnMachineFunction(MachineFunction &Mf) {
MF = &Mf;
MCP = Mf.getConstantPool();
- STI = &static_cast<const CSKYSubtarget &>(Mf.getSubtarget());
+ STI = &Mf.getSubtarget<CSKYSubtarget>();
LLVM_DEBUG(dbgs() << "***** CSKYConstantIslands: "
<< MCP->getConstants().size() << " CP entries, aligned to "
@@ -904,8 +905,7 @@ static inline unsigned getUnconditionalBrDisp(int Opc) {
Scale = 2;
break;
default:
- assert(0);
- break;
+ llvm_unreachable("");
}
unsigned MaxOffs = ((1 << (Bits - 1)) - 1) * Scale;
diff --git a/llvm/lib/Target/CSKY/CSKYFrameLowering.cpp b/llvm/lib/Target/CSKY/CSKYFrameLowering.cpp
index 3bf001c2cee7..9907f39b3f90 100644
--- a/llvm/lib/Target/CSKY/CSKYFrameLowering.cpp
+++ b/llvm/lib/Target/CSKY/CSKYFrameLowering.cpp
@@ -13,6 +13,7 @@
#include "CSKYFrameLowering.h"
#include "CSKYMachineFunctionInfo.h"
#include "CSKYSubtarget.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -270,6 +271,17 @@ void CSKYFrameLowering::emitEpilogue(MachineFunction &MF,
MachineInstr::FrameDestroy);
}
+static unsigned EstimateFunctionSizeInBytes(const MachineFunction &MF,
+ const CSKYInstrInfo &TII) {
+ unsigned FnSize = 0;
+ for (auto &MBB : MF) {
+ for (auto &MI : MBB)
+ FnSize += TII.getInstSizeInBytes(MI);
+ }
+ FnSize += MF.getConstantPool()->getConstants().size() * 4;
+ return FnSize;
+}
+
static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
const CSKYSubtarget &STI) {
unsigned Limit = (1 << 12) - 1;
@@ -349,6 +361,7 @@ void CSKYFrameLowering::determineCalleeSaves(MachineFunction &MF,
CSKYMachineFunctionInfo *CFI = MF.getInfo<CSKYMachineFunctionInfo>();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ const CSKYInstrInfo *TII = STI.getInstrInfo();
const MachineRegisterInfo &MRI = MF.getRegInfo();
MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -411,8 +424,6 @@ void CSKYFrameLowering::determineCalleeSaves(MachineFunction &MF,
}
}
- CFI->setLRIsSpilled(SavedRegs.test(CSKY::R15));
-
unsigned CSStackSize = 0;
for (unsigned Reg : SavedRegs.set_bits()) {
auto RegSize = TRI->getRegSizeInBits(Reg, MRI) / 8;
@@ -432,6 +443,14 @@ void CSKYFrameLowering::determineCalleeSaves(MachineFunction &MF,
RS->addScavengingFrameIndex(MFI.CreateStackObject(size, align, false));
}
+
+ unsigned FnSize = EstimateFunctionSizeInBytes(MF, *TII);
+ // Force R15 to be spilled if the function size is > 65534. This enables
+ // use of BSR to implement far jump.
+ if (FnSize >= ((1 << (16 - 1)) * 2))
+ SavedRegs.set(CSKY::R15);
+
+ CFI->setLRIsSpilled(SavedRegs.test(CSKY::R15));
}
// Not preserve stack space within prologue for outgoing variables when the
diff --git a/llvm/lib/Target/CSKY/CSKYISelDAGToDAG.cpp b/llvm/lib/Target/CSKY/CSKYISelDAGToDAG.cpp
index d58f9095aa0d..b893487f1f0f 100644
--- a/llvm/lib/Target/CSKY/CSKYISelDAGToDAG.cpp
+++ b/llvm/lib/Target/CSKY/CSKYISelDAGToDAG.cpp
@@ -14,6 +14,7 @@
#include "CSKYSubtarget.h"
#include "CSKYTargetMachine.h"
#include "MCTargetDesc/CSKYMCTargetDesc.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
@@ -42,6 +43,13 @@ public:
void Select(SDNode *N) override;
bool selectAddCarry(SDNode *N);
bool selectSubCarry(SDNode *N);
+ bool selectBITCAST_TO_LOHI(SDNode *N);
+ bool selectInlineAsm(SDNode *N);
+
+ SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
+
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
+ std::vector<SDValue> &OutOps) override;
#include "CSKYGenDAGISel.inc"
};
@@ -86,6 +94,13 @@ void CSKYDAGToDAGISel::Select(SDNode *N) {
IsSelected = true;
break;
}
+ case CSKYISD::BITCAST_TO_LOHI:
+ IsSelected = selectBITCAST_TO_LOHI(N);
+ break;
+ case ISD::INLINEASM:
+ case ISD::INLINEASM_BR:
+ IsSelected = selectInlineAsm(N);
+ break;
}
if (IsSelected)
@@ -95,6 +110,185 @@ void CSKYDAGToDAGISel::Select(SDNode *N) {
SelectCode(N);
}
+bool CSKYDAGToDAGISel::selectInlineAsm(SDNode *N) {
+ std::vector<SDValue> AsmNodeOperands;
+ unsigned Flag, Kind;
+ bool Changed = false;
+ unsigned NumOps = N->getNumOperands();
+
+ // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
+ // However, some instructions (e.g. mula.s32) require GPR pair.
+ // Since there is no constraint to explicitly specify a
+ // reg pair, we use GPRPair reg class for "%r" for 64-bit data.
+
+ SDLoc dl(N);
+ SDValue Glue =
+ N->getGluedNode() ? N->getOperand(NumOps - 1) : SDValue(nullptr, 0);
+
+ SmallVector<bool, 8> OpChanged;
+ // Glue node will be appended late.
+ for (unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e;
+ ++i) {
+ SDValue op = N->getOperand(i);
+ AsmNodeOperands.push_back(op);
+
+ if (i < InlineAsm::Op_FirstOperand)
+ continue;
+
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
+ Flag = C->getZExtValue();
+ Kind = InlineAsm::getKind(Flag);
+ } else
+ continue;
+
+ // Immediate operands to inline asm in the SelectionDAG are modeled with
+ // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
+ // the second is a constant with the value of the immediate. If we get here
+ // and we have a Kind_Imm, skip the next operand, and continue.
+ if (Kind == InlineAsm::Kind_Imm) {
+ SDValue op = N->getOperand(++i);
+ AsmNodeOperands.push_back(op);
+ continue;
+ }
+
+ unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
+ if (NumRegs)
+ OpChanged.push_back(false);
+
+ unsigned DefIdx = 0;
+ bool IsTiedToChangedOp = false;
+ // If it's a use that is tied with a previous def, it has no
+ // reg class constraint.
+ if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
+ IsTiedToChangedOp = OpChanged[DefIdx];
+
+ // Memory operands to inline asm in the SelectionDAG are modeled with two
+ // operands: a constant of value InlineAsm::Kind_Mem followed by the input
+ // operand. If we get here and we have a Kind_Mem, skip the next operand (so
+ // it doesn't get misinterpreted), and continue. We do this here because
+ // it's important to update the OpChanged array correctly before moving on.
+ if (Kind == InlineAsm::Kind_Mem) {
+ SDValue op = N->getOperand(++i);
+ AsmNodeOperands.push_back(op);
+ continue;
+ }
+
+ if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef &&
+ Kind != InlineAsm::Kind_RegDefEarlyClobber)
+ continue;
+
+ unsigned RC;
+ bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
+ if ((!IsTiedToChangedOp && (!HasRC || RC != CSKY::GPRRegClassID)) ||
+ NumRegs != 2)
+ continue;
+
+ assert((i + 2 < NumOps) && "Invalid number of operands in inline asm");
+ SDValue V0 = N->getOperand(i + 1);
+ SDValue V1 = N->getOperand(i + 2);
+ unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
+ unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
+ SDValue PairedReg;
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ if (Kind == InlineAsm::Kind_RegDef ||
+ Kind == InlineAsm::Kind_RegDefEarlyClobber) {
+ // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
+ // the original GPRs.
+
+ Register GPVR = MRI.createVirtualRegister(&CSKY::GPRPairRegClass);
+ PairedReg = CurDAG->getRegister(GPVR, MVT::i64);
+ SDValue Chain = SDValue(N, 0);
+
+ SDNode *GU = N->getGluedUser();
+ SDValue RegCopy =
+ CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::i64, Chain.getValue(1));
+
+ // Extract values from a GPRPair reg and copy to the original GPR reg.
+ SDValue Sub0 =
+ CurDAG->getTargetExtractSubreg(CSKY::sub32_0, dl, MVT::i32, RegCopy);
+ SDValue Sub1 =
+ CurDAG->getTargetExtractSubreg(CSKY::sub32_32, dl, MVT::i32, RegCopy);
+ SDValue T0 =
+ CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0, RegCopy.getValue(1));
+ SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
+
+ // Update the original glue user.
+ std::vector<SDValue> Ops(GU->op_begin(), GU->op_end() - 1);
+ Ops.push_back(T1.getValue(1));
+ CurDAG->UpdateNodeOperands(GU, Ops);
+ } else {
+ // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a
+ // GPRPair and then pass the GPRPair to the inline asm.
+ SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
+
+ // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
+ SDValue T0 =
+ CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32, Chain.getValue(1));
+ SDValue T1 =
+ CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32, T0.getValue(1));
+ SDValue Pair = SDValue(createGPRPairNode(MVT::i64, T0, T1), 0);
+
+ // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
+ // i32 VRs of inline asm with it.
+ Register GPVR = MRI.createVirtualRegister(&CSKY::GPRPairRegClass);
+ PairedReg = CurDAG->getRegister(GPVR, MVT::i64);
+ Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
+
+ AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
+ Glue = Chain.getValue(1);
+ }
+
+ Changed = true;
+
+ if (PairedReg.getNode()) {
+ OpChanged[OpChanged.size() - 1] = true;
+ Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
+ if (IsTiedToChangedOp)
+ Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
+ else
+ Flag = InlineAsm::getFlagWordForRegClass(Flag, CSKY::GPRPairRegClassID);
+ // Replace the current flag.
+ AsmNodeOperands[AsmNodeOperands.size() - 1] =
+ CurDAG->getTargetConstant(Flag, dl, MVT::i32);
+ // Add the new register node and skip the original two GPRs.
+ AsmNodeOperands.push_back(PairedReg);
+ // Skip the next two GPRs.
+ i += 2;
+ }
+ }
+
+ if (Glue.getNode())
+ AsmNodeOperands.push_back(Glue);
+ if (!Changed)
+ return false;
+
+ SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N),
+ CurDAG->getVTList(MVT::Other, MVT::Glue),
+ AsmNodeOperands);
+ New->setNodeId(-1);
+ ReplaceNode(N, New.getNode());
+ return true;
+}
+
+bool CSKYDAGToDAGISel::selectBITCAST_TO_LOHI(SDNode *N) {
+ SDLoc Dl(N);
+ auto VT = N->getValueType(0);
+ auto V = N->getOperand(0);
+
+ if (!Subtarget->hasFPUv2DoubleFloat())
+ return false;
+
+ SDValue V1 = SDValue(CurDAG->getMachineNode(CSKY::FMFVRL_D, Dl, VT, V), 0);
+ SDValue V2 = SDValue(CurDAG->getMachineNode(CSKY::FMFVRH_D, Dl, VT, V), 0);
+
+ ReplaceUses(SDValue(N, 0), V1);
+ ReplaceUses(SDValue(N, 1), V2);
+ CurDAG->RemoveDeadNode(N);
+
+ return true;
+}
+
bool CSKYDAGToDAGISel::selectAddCarry(SDNode *N) {
MachineSDNode *NewNode = nullptr;
auto Type0 = N->getValueType(0);
@@ -175,6 +369,31 @@ bool CSKYDAGToDAGISel::selectSubCarry(SDNode *N) {
return true;
}
+SDNode *CSKYDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
+ SDLoc dl(V0.getNode());
+ SDValue RegClass =
+ CurDAG->getTargetConstant(CSKY::GPRPairRegClassID, dl, MVT::i32);
+ SDValue SubReg0 = CurDAG->getTargetConstant(CSKY::sub32_0, dl, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(CSKY::sub32_32, dl, MVT::i32);
+ const SDValue Ops[] = {RegClass, V0, SubReg0, V1, SubReg1};
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
+}
+
+bool CSKYDAGToDAGISel::SelectInlineAsmMemoryOperand(
+ const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
+ switch (ConstraintID) {
+ case InlineAsm::Constraint_m:
+ // We just support simple memory operands that have a single address
+ // operand and need no special handling.
+ OutOps.push_back(Op);
+ return false;
+ default:
+ break;
+ }
+
+ return true;
+}
+
FunctionPass *llvm::createCSKYISelDag(CSKYTargetMachine &TM) {
return new CSKYDAGToDAGISel(TM);
}
diff --git a/llvm/lib/Target/CSKY/CSKYISelLowering.cpp b/llvm/lib/Target/CSKY/CSKYISelLowering.cpp
index 0b589e3d3e4f..012de34c9809 100644
--- a/llvm/lib/Target/CSKY/CSKYISelLowering.cpp
+++ b/llvm/lib/Target/CSKY/CSKYISelLowering.cpp
@@ -19,6 +19,7 @@
#include "CSKYSubtarget.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/Support/Debug.h"
@@ -103,9 +104,7 @@ CSKYTargetLowering::CSKYTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UDIV, MVT::i32, Expand);
}
- if (!Subtarget.has3r2E3r3()) {
- setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
- }
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
// Float
@@ -784,6 +783,175 @@ SDValue CSKYTargetLowering::getTargetConstantPoolValue(GlobalAddressSDNode *N,
return DAG.getTargetConstantPool(CPV, Ty);
}
+CSKYTargetLowering::ConstraintType
+CSKYTargetLowering::getConstraintType(StringRef Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default:
+ break;
+ case 'a':
+ case 'b':
+ case 'v':
+ case 'w':
+ case 'y':
+ return C_RegisterClass;
+ case 'c':
+ case 'l':
+ case 'h':
+ case 'z':
+ return C_Register;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+std::pair<unsigned, const TargetRegisterClass *>
+CSKYTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+ StringRef Constraint,
+ MVT VT) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'r':
+ return std::make_pair(0U, &CSKY::GPRRegClass);
+ case 'a':
+ return std::make_pair(0U, &CSKY::mGPRRegClass);
+ case 'b':
+ return std::make_pair(0U, &CSKY::sGPRRegClass);
+ case 'z':
+ return std::make_pair(CSKY::R14, &CSKY::GPRRegClass);
+ case 'c':
+ return std::make_pair(CSKY::C, &CSKY::CARRYRegClass);
+ case 'w':
+ if ((Subtarget.hasFPUv2SingleFloat() ||
+ Subtarget.hasFPUv3SingleFloat()) &&
+ VT == MVT::f32)
+ return std::make_pair(0U, &CSKY::sFPR32RegClass);
+ if ((Subtarget.hasFPUv2DoubleFloat() ||
+ Subtarget.hasFPUv3DoubleFloat()) &&
+ VT == MVT::f64)
+ return std::make_pair(0U, &CSKY::sFPR64RegClass);
+ break;
+ case 'v':
+ if (Subtarget.hasFPUv2SingleFloat() && VT == MVT::f32)
+ return std::make_pair(0U, &CSKY::sFPR32RegClass);
+ if (Subtarget.hasFPUv3SingleFloat() && VT == MVT::f32)
+ return std::make_pair(0U, &CSKY::FPR32RegClass);
+ if (Subtarget.hasFPUv2DoubleFloat() && VT == MVT::f64)
+ return std::make_pair(0U, &CSKY::sFPR64RegClass);
+ if (Subtarget.hasFPUv3DoubleFloat() && VT == MVT::f64)
+ return std::make_pair(0U, &CSKY::FPR64RegClass);
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (Constraint == "{c}")
+ return std::make_pair(CSKY::C, &CSKY::CARRYRegClass);
+
+ // Clang will correctly decode the usage of register name aliases into their
+ // official names. However, other frontends like `rustc` do not. This allows
+ // users of these frontends to use the ABI names for registers in LLVM-style
+ // register constraints.
+ unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
+ .Case("{a0}", CSKY::R0)
+ .Case("{a1}", CSKY::R1)
+ .Case("{a2}", CSKY::R2)
+ .Case("{a3}", CSKY::R3)
+ .Case("{l0}", CSKY::R4)
+ .Case("{l1}", CSKY::R5)
+ .Case("{l2}", CSKY::R6)
+ .Case("{l3}", CSKY::R7)
+ .Case("{l4}", CSKY::R8)
+ .Case("{l5}", CSKY::R9)
+ .Case("{l6}", CSKY::R10)
+ .Case("{l7}", CSKY::R11)
+ .Case("{t0}", CSKY::R12)
+ .Case("{t1}", CSKY::R13)
+ .Case("{sp}", CSKY::R14)
+ .Case("{lr}", CSKY::R15)
+ .Case("{l8}", CSKY::R16)
+ .Case("{l9}", CSKY::R17)
+ .Case("{t2}", CSKY::R18)
+ .Case("{t3}", CSKY::R19)
+ .Case("{t4}", CSKY::R20)
+ .Case("{t5}", CSKY::R21)
+ .Case("{t6}", CSKY::R22)
+ .Cases("{t7}", "{fp}", CSKY::R23)
+ .Cases("{t8}", "{top}", CSKY::R24)
+ .Cases("{t9}", "{bsp}", CSKY::R25)
+ .Case("{r26}", CSKY::R26)
+ .Case("{r27}", CSKY::R27)
+ .Cases("{gb}", "{rgb}", "{rdb}", CSKY::R28)
+ .Cases("{tb}", "{rtb}", CSKY::R29)
+ .Case("{svbr}", CSKY::R30)
+ .Case("{tls}", CSKY::R31)
+ .Default(CSKY::NoRegister);
+
+ if (XRegFromAlias != CSKY::NoRegister)
+ return std::make_pair(XRegFromAlias, &CSKY::GPRRegClass);
+
+ // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
+ // TableGen record rather than the AsmName to choose registers for InlineAsm
+ // constraints, plus we want to match those names to the widest floating point
+ // register type available, manually select floating point registers here.
+ //
+ // The second case is the ABI name of the register, so that frontends can also
+ // use the ABI names in register constraint lists.
+ if (Subtarget.useHardFloat()) {
+ unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
+ .Cases("{fr0}", "{vr0}", CSKY::F0_32)
+ .Cases("{fr1}", "{vr1}", CSKY::F1_32)
+ .Cases("{fr2}", "{vr2}", CSKY::F2_32)
+ .Cases("{fr3}", "{vr3}", CSKY::F3_32)
+ .Cases("{fr4}", "{vr4}", CSKY::F4_32)
+ .Cases("{fr5}", "{vr5}", CSKY::F5_32)
+ .Cases("{fr6}", "{vr6}", CSKY::F6_32)
+ .Cases("{fr7}", "{vr7}", CSKY::F7_32)
+ .Cases("{fr8}", "{vr8}", CSKY::F8_32)
+ .Cases("{fr9}", "{vr9}", CSKY::F9_32)
+ .Cases("{fr10}", "{vr10}", CSKY::F10_32)
+ .Cases("{fr11}", "{vr11}", CSKY::F11_32)
+ .Cases("{fr12}", "{vr12}", CSKY::F12_32)
+ .Cases("{fr13}", "{vr13}", CSKY::F13_32)
+ .Cases("{fr14}", "{vr14}", CSKY::F14_32)
+ .Cases("{fr15}", "{vr15}", CSKY::F15_32)
+ .Cases("{fr16}", "{vr16}", CSKY::F16_32)
+ .Cases("{fr17}", "{vr17}", CSKY::F17_32)
+ .Cases("{fr18}", "{vr18}", CSKY::F18_32)
+ .Cases("{fr19}", "{vr19}", CSKY::F19_32)
+ .Cases("{fr20}", "{vr20}", CSKY::F20_32)
+ .Cases("{fr21}", "{vr21}", CSKY::F21_32)
+ .Cases("{fr22}", "{vr22}", CSKY::F22_32)
+ .Cases("{fr23}", "{vr23}", CSKY::F23_32)
+ .Cases("{fr24}", "{vr24}", CSKY::F24_32)
+ .Cases("{fr25}", "{vr25}", CSKY::F25_32)
+ .Cases("{fr26}", "{vr26}", CSKY::F26_32)
+ .Cases("{fr27}", "{vr27}", CSKY::F27_32)
+ .Cases("{fr28}", "{vr28}", CSKY::F28_32)
+ .Cases("{fr29}", "{vr29}", CSKY::F29_32)
+ .Cases("{fr30}", "{vr30}", CSKY::F30_32)
+ .Cases("{fr31}", "{vr31}", CSKY::F31_32)
+ .Default(CSKY::NoRegister);
+ if (FReg != CSKY::NoRegister) {
+ assert(CSKY::F0_32 <= FReg && FReg <= CSKY::F31_32 && "Unknown fp-reg");
+ unsigned RegNo = FReg - CSKY::F0_32;
+ unsigned DReg = CSKY::F0_64 + RegNo;
+
+ if (Subtarget.hasFPUv2DoubleFloat())
+ return std::make_pair(DReg, &CSKY::sFPR64RegClass);
+ else if (Subtarget.hasFPUv3DoubleFloat())
+ return std::make_pair(DReg, &CSKY::FPR64RegClass);
+ else if (Subtarget.hasFPUv2SingleFloat())
+ return std::make_pair(FReg, &CSKY::sFPR32RegClass);
+ else if (Subtarget.hasFPUv3SingleFloat())
+ return std::make_pair(FReg, &CSKY::FPR32RegClass);
+ }
+ }
+
+ return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
+}
+
static MachineBasicBlock *
emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode) {
@@ -853,6 +1021,12 @@ CSKYTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
switch (MI.getOpcode()) {
default:
llvm_unreachable("Unexpected instr type to insert");
+ case CSKY::FSELS:
+ case CSKY::FSELD:
+ if (Subtarget.hasE2())
+ return emitSelectPseudo(MI, BB, CSKY::BT32);
+ else
+ return emitSelectPseudo(MI, BB, CSKY::BT16);
case CSKY::ISEL32:
return emitSelectPseudo(MI, BB, CSKY::BT32);
case CSKY::ISEL16:
diff --git a/llvm/lib/Target/CSKY/CSKYISelLowering.h b/llvm/lib/Target/CSKY/CSKYISelLowering.h
index e1744d5ce220..1cd0f99b17bc 100644
--- a/llvm/lib/Target/CSKY/CSKYISelLowering.h
+++ b/llvm/lib/Target/CSKY/CSKYISelLowering.h
@@ -88,6 +88,12 @@ private:
return (Kind != ScalarCondVectorVal);
}
+ ConstraintType getConstraintType(StringRef Constraint) const override;
+
+ std::pair<unsigned, const TargetRegisterClass *>
+ getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+ StringRef Constraint, MVT VT) const override;
+
MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const override;
diff --git a/llvm/lib/Target/CSKY/CSKYInstrAlias.td b/llvm/lib/Target/CSKY/CSKYInstrAlias.td
new file mode 100644
index 000000000000..e3c0538e752e
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYInstrAlias.td
@@ -0,0 +1,38 @@
+//===-- CSKYInstrAlias.td - Target Description for CSKY ----*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the CSKY instructions alias.
+//
+//===----------------------------------------------------------------------===//
+
+def : InstAlias<"nop", (MOV16 R0, R0)>;
+def : InstAlias<"nop", (MOV32 R0, R0)>, Requires<[iHasE2]>;
+
+def : InstAlias<"bgeni16 $dst, $imm", (BGENI GPR:$dst, uimm5:$imm)>;
+def : InstAlias<"bgeni32 $dst, $imm", (BGENI GPR:$dst, uimm5:$imm)>;
+
+def : InstAlias<"bsr $dst", (BSR32 call_symbol:$dst)>;
+
+def : InstAlias<"grs\t$rz, $offset", (GRS32 GPR:$rz, bare_symbol:$offset)>;
+
+def : InstAlias<"jbsr\t$src1", (JBSR32 call_symbol:$src1)>;
+
+def : InstAlias<"jbr $dst", (JBR16 br_symbol_16bit:$dst)>;
+def : InstAlias<"jbt $dst", (JBT16 C, br_symbol_16bit:$dst)>;
+def : InstAlias<"jbf $dst", (JBF16 C, br_symbol_16bit:$dst)>;
+
+def : InstAlias<"lrw $rz, $src", (PseudoLRW16 mGPR:$rz, bare_symbol:$src)>;
+def : InstAlias<"lrw $rz, $src", (LRW16 mGPR:$rz, constpool_symbol_16bit:$src)>;
+def : InstAlias<"lrw $rz, $src", (PseudoLRW32 GPR:$rz, bare_symbol:$src)>;
+def : InstAlias<"lrw $rz, $src", (LRW32 GPR:$rz, constpool_symbol:$src)>;
+
+def : InstAlias<"jsri $dst", (PseudoJSRI32 call_symbol:$dst)>;
+def : InstAlias<"jsri $dst", (JSRI32 constpool_symbol:$dst)>;
+
+def : InstAlias<"jmpi $dst", (PseudoJMPI32 br_symbol:$dst)>;
+def : InstAlias<"jmpi $dst", (JMPI32 constpool_symbol:$dst)>; \ No newline at end of file
diff --git a/llvm/lib/Target/CSKY/CSKYInstrFormats.td b/llvm/lib/Target/CSKY/CSKYInstrFormats.td
index 9b6ef9ca23db..8144a501b3d2 100644
--- a/llvm/lib/Target/CSKY/CSKYInstrFormats.td
+++ b/llvm/lib/Target/CSKY/CSKYInstrFormats.td
@@ -655,7 +655,7 @@ class R_Z_1<bits<6> sop, bits<5> pcode, string op>
// Format< OP[6] | RZ[5] | 00000[5] | SOP[6] | PCODE[5] | 00000[5] >
// Instructions:(2) clrf32, clrt32
-class R_Z_2<bits<6> sop, bits<5> pcode, string op, list<dag> pattern>
+class R_Z_2<bits<6> sop, bits<5> pcode, string op>
: CSKY32Inst<AddrModeNone, 0x31, (outs GPR:$rz),
(ins CARRY:$ca, GPR:$false), !strconcat(op, "\t$rz"), []> {
bits<5> rz;
diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp b/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp
index c57ccb9d6eea..d490b385ac16 100644
--- a/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp
+++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp
@@ -14,6 +14,7 @@
#include "CSKYConstantPoolValue.h"
#include "CSKYMachineFunctionInfo.h"
#include "CSKYTargetMachine.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/MC/MCContext.h"
#define DEBUG_TYPE "csky-instr-info"
@@ -222,9 +223,10 @@ bool CSKYInstrInfo::reverseBranchCondition(
Register CSKYInstrInfo::movImm(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL, int64_t Val,
+ const DebugLoc &DL, uint64_t Val,
MachineInstr::MIFlag Flag) const {
- assert(isUInt<32>(Val) && "should be uint32");
+ if (!isInt<32>(Val))
+ report_fatal_error("Should only materialize 32-bit constants.");
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
@@ -475,9 +477,6 @@ void CSKYInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const DebugLoc &DL, MCRegister DestReg,
MCRegister SrcReg, bool KillSrc) const {
-
- MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
-
if (CSKY::GPRRegClass.contains(SrcReg) &&
CSKY::CARRYRegClass.contains(DestReg)) {
if (STI.hasE2()) {
diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.h b/llvm/lib/Target/CSKY/CSKYInstrInfo.h
index 1a1bbbf9154f..a979b0bf4b0d 100644
--- a/llvm/lib/Target/CSKY/CSKYInstrInfo.h
+++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.h
@@ -79,7 +79,7 @@ public:
// Materializes the given integer Val into DstReg.
Register movImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL, int64_t Val,
+ const DebugLoc &DL, uint64_t Val,
MachineInstr::MIFlag Flag = MachineInstr::NoFlags) const;
};
diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.td b/llvm/lib/Target/CSKY/CSKYInstrInfo.td
index a782efe7f4f4..300ecceae906 100644
--- a/llvm/lib/Target/CSKY/CSKYInstrInfo.td
+++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.td
@@ -413,6 +413,19 @@ def psrflag : Operand<i32>, ImmLeaf<i32, "return isShiftedUInt<5, 0>(Imm);"> {
let PrintMethod = "printPSRFlag";
}
+multiclass uimm8SRLXForm<SDNode opc> {
+ def _0: SDNodeXForm<opc,
+ [{return CurDAG->getTargetConstant((N->getZExtValue() >> 0) & 0xFF, SDLoc(N), MVT::i32);}]>;
+ def _8: SDNodeXForm<opc,
+ [{return CurDAG->getTargetConstant((N->getZExtValue() >> 8) & 0xFF, SDLoc(N), MVT::i32);}]>;
+ def _16: SDNodeXForm<opc,
+ [{return CurDAG->getTargetConstant((N->getZExtValue() >> 16) & 0xFF, SDLoc(N), MVT::i32);}]>;
+ def _24: SDNodeXForm<opc,
+ [{return CurDAG->getTargetConstant((N->getZExtValue() >> 24) & 0xFF, SDLoc(N), MVT::i32);}]>;
+}
+
+defm uimm8SRL : uimm8SRLXForm<imm>;
+
//===----------------------------------------------------------------------===//
// Instruction Formats
//===----------------------------------------------------------------------===//
@@ -709,8 +722,6 @@ let Predicates= [iHasE2] in {
def MOVI32 : I_16_MOV<0x10, "movi32", uimm16>;
let Size = 4, isCodeGenOnly = 0 in
def BGENI : CSKYPseudo<(outs GPR:$dst), (ins uimm5:$imm), "bgeni\t$dst, $imm", []>;
- def : InstAlias<"bgeni16 $dst, $imm", (BGENI GPR:$dst, uimm5:$imm)>;
- def : InstAlias<"bgeni32 $dst, $imm", (BGENI GPR:$dst, uimm5:$imm)>;
def MOVIH32 : I_16_MOV<0x11, "movih32", uimm16_16_xform>;
def MVC32 : R_Z_1<0x1, 0x8, "mvc32">;
let isCodeGenOnly = 1 in
@@ -723,8 +734,8 @@ let Predicates= [iHasE2] in {
let Predicates = [iHas2E3] in {
def MVCV32 : R_Z_1<0x1, 0x10, "mvcv32">;
- def CLRF32 : R_Z_2<0xB, 0x1, "clrf32", []>;
- def CLRT32 : R_Z_2<0xB, 0x2, "clrt32", []>;
+ def CLRF32 : R_Z_2<0xB, 0x1, "clrf32">;
+ def CLRT32 : R_Z_2<0xB, 0x2, "clrt32">;
}
//===----------------------------------------------------------------------===//
@@ -779,8 +790,6 @@ def BNEZAD32 : CSKY32Inst<AddrModeNone, 0x3a,
def BSR32 : J<0x38, (outs), (ins call_symbol:$offset), "bsr32", []>;
-def : InstAlias<"bsr $dst", (BSR32 call_symbol:$dst)>;
-
def BSR32_BR : J<0x38, (outs), (ins call_symbol:$offset), "bsr32", []>{
let isCodeGenOnly = 1;
let isBranch = 1;
@@ -804,7 +813,6 @@ let Predicates = [iHas2E3] in {
def GRS32 : I_18_Z_L<0x3, "grs32\t$rz, $offset",
(outs GPR:$rz), (ins bare_symbol:$offset), []>;
-def : InstAlias<"grs\t$rz, $offset", (GRS32 GPR:$rz, bare_symbol:$offset)>;
let Uses = [R28] in {
def LRS32B : I_18_Z_L<0x0, "lrs32.b\t$rz, $offset",
@@ -1291,8 +1299,6 @@ let Predicates = [iHasE2] in {
let isCall = 1, Defs = [ R15 ], mayLoad = 1, Size = 4, isCodeGenOnly = 0 in
def JBSR32 : CSKYPseudo<(outs), (ins call_symbol:$src1), "jbsr32\t$src1", []>;
-def : InstAlias<"jbsr\t$src1", (JBSR32 call_symbol:$src1)>;
-
def JBR32 : CSKYPseudo<(outs), (ins br_symbol:$src1), "jbr32\t$src1", []> {
let isBranch = 1;
let isTerminator = 1;
@@ -1338,18 +1344,13 @@ let mayLoad = 1, Size = 2, isCodeGenOnly = 0 in
def PseudoLRW32 : CSKYPseudo<(outs GPR:$rz), (ins bare_symbol:$src), "lrw32 $rz, $src", []>;
-def : InstAlias<"lrw $rz, $src", (PseudoLRW32 GPR:$rz, bare_symbol:$src)>;
-def : InstAlias<"lrw $rz, $src", (LRW32 GPR:$rz, constpool_symbol:$src)>;
+
let mayLoad = 1, Size = 4, isCodeGenOnly = 0 in
def PseudoJSRI32 : CSKYPseudo<(outs), (ins call_symbol:$src), "jsri32 $src", []>;
-def : InstAlias<"jsri $dst", (PseudoJSRI32 call_symbol:$dst)>;
-def : InstAlias<"jsri $dst", (JSRI32 constpool_symbol:$dst)>;
let mayLoad = 1, Size = 4, isCodeGenOnly = 0 in
def PseudoJMPI32 : CSKYPseudo<(outs), (ins br_symbol:$src), "jmpi32 $src", []>;
-def : InstAlias<"jmpi $dst", (PseudoJMPI32 br_symbol:$dst)>;
-def : InstAlias<"jmpi $dst", (JMPI32 constpool_symbol:$dst)>;
let isNotDuplicable = 1, mayLoad = 1, mayStore = 0, Size = 8 in
def PseudoTLSLA32 : CSKYPseudo<(outs GPR:$dst1, GPR:$dst2),
@@ -1362,3 +1363,4 @@ def CONSTPOOL_ENTRY : CSKYPseudo<(outs),
include "CSKYInstrInfo16Instr.td"
include "CSKYInstrInfoF1.td"
include "CSKYInstrInfoF2.td"
+include "CSKYInstrAlias.td"
diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td b/llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td
index 6a9dd03dfa1d..3be1ca8b7998 100644
--- a/llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td
+++ b/llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td
@@ -441,6 +441,137 @@ let mayLoad = 1, Size = 2, isCodeGenOnly = 0 in
def PseudoLRW16 : CSKYPseudo<(outs mGPR:$rz),
(ins bare_symbol:$src), "lrw16 $rz, $src", []>;
+//===----------------------------------------------------------------------===//
+// Instruction Patterns.
+//===----------------------------------------------------------------------===//
+
+def : Pat<(sext_inreg mGPR:$src, i1), (ASRI16 (LSLI16 mGPR:$src, 7), 7)>;
+def : Pat<(sext_inreg sGPR:$src, i8), (SEXTB16 sGPR:$src)>;
+def : Pat<(sext_inreg sGPR:$src, i16), (SEXTH16 sGPR:$src)>;
+
+// Load & Store Patterns
+
+defm : LdPat<extloadi8, uimm5, LD16B, i32>;
+defm : LdPat<zextloadi8, uimm5, LD16B, i32>;
+
+defm : LdPat<extloadi16, uimm5_1, LD16H, i32>;
+defm : LdPat<zextloadi16, uimm5_1, LD16H, i32>;
+
+defm : LdPat<load, uimm5_2, LD16W, i32>;
+
+
+defm : StPat<truncstorei8, i32, uimm5, ST16B>;
+defm : StPat<truncstorei16, i32, uimm5_1, ST16H>;
+defm : StPat<store, i32, uimm5_2, ST16W>;
+
+def : Pat<(CSKY_CALLReg sGPR:$src), (JSR16 sGPR:$src)>;
+def : Pat<(CSKY_TAILReg sGPR:$src), (JMP16 sGPR:$src)>;
+
+// Symbol address Patterns
+def : Pat<(CSKY_LOAD_ADDR tglobaladdr, tconstpool:$src2), (LRW16 tconstpool:$src2)>;
+def : Pat<(CSKY_LOAD_ADDR tblockaddress, tconstpool:$src2), (LRW16 tconstpool:$src2)>;
+def : Pat<(CSKY_LOAD_ADDR tjumptable:$src1, tconstpool:$src2), (LRW16_Gen tjumptable:$src1, tconstpool:$src2)>;
+def : Pat<(CSKY_LOAD_ADDR texternalsym, tconstpool:$src2), (LRW16 tconstpool:$src2)>;
+
+def : Pat<(i32 (load constpool:$src)), (LRW16 (to_tconstpool tconstpool:$src))>;
+
+// Branch Patterns.
+
+def : Pat<(brcond CARRY:$ca, bb:$offset),
+ (BT16 CARRY:$ca, bb:$offset)>;
+
+def : Pat<(br bb:$offset), (BR16 bb:$offset)>;
+
+def : Pat<(brcond (i32 (setne mGPR:$rs1, uimm5:$rs2)), bb:$offset),
+ (BT16 (CMPNEI16 mGPR:$rs1, uimm5:$rs2), bb:$offset)>;
+def : Pat<(brcond (i32 (seteq mGPR:$rs1, uimm5:$rs2)), bb:$offset),
+ (BF16 (CMPNEI16 mGPR:$rs1, uimm5:$rs2), bb:$offset)>;
+def : Pat<(brcond (i32 (setuge mGPR:$rs1, oimm5:$rs2)), bb:$offset),
+ (BT16 (CMPHSI16 mGPR:$rs1, oimm5:$rs2), bb:$offset)>;
+def : Pat<(brcond (i32 (setult mGPR:$rs1, oimm5:$rs2)), bb:$offset),
+ (BF16 (CMPHSI16 mGPR:$rs1, oimm5:$rs2), bb:$offset)>;
+def : Pat<(brcond (i32 (setlt mGPR:$rs1, oimm5:$rs2)), bb:$offset),
+ (BT16 (CMPLTI16 mGPR:$rs1, oimm5:$rs2), bb:$offset)>;
+def : Pat<(brcond (i32 (setge mGPR:$rs1, oimm5:$rs2)), bb:$offset),
+ (BF16 (CMPLTI16 mGPR:$rs1, oimm5:$rs2), bb:$offset)>;
+
+def : Pat<(brcond (i32 (setne sGPR:$rs1, sGPR:$rs2)), bb:$offset),
+ (BT16 (CMPNE16 sGPR:$rs1, sGPR:$rs2), bb:$offset)>;
+def : Pat<(brcond (i32 (seteq sGPR:$rs1, sGPR:$rs2)), bb:$offset),
+ (BF16 (CMPNE16 sGPR:$rs1, sGPR:$rs2), bb:$offset)>;
+def : Pat<(brcond (i32 (setuge sGPR:$rs1, sGPR:$rs2)), bb:$offset),
+ (BT16 (CMPHS16 sGPR:$rs1, sGPR:$rs2), bb:$offset)>;
+def : Pat<(brcond (i32 (setule sGPR:$rs1, sGPR:$rs2)), bb:$offset),
+ (BT16 (CMPHS16 sGPR:$rs2, sGPR:$rs1), bb:$offset)>;
+def : Pat<(brcond (i32 (setult sGPR:$rs1, sGPR:$rs2)), bb:$offset),
+ (BF16 (CMPHS16 sGPR:$rs1, sGPR:$rs2), bb:$offset)>;
+def : Pat<(brcond (i32 (setugt sGPR:$rs1, sGPR:$rs2)), bb:$offset),
+ (BF16 (CMPHS16 sGPR:$rs2, sGPR:$rs1), bb:$offset)>;
+def : Pat<(brcond (i32 (setlt sGPR:$rs1, sGPR:$rs2)), bb:$offset),
+ (BT16 (CMPLT16 sGPR:$rs1, sGPR:$rs2), bb:$offset)>;
+def : Pat<(brcond (i32 (setgt sGPR:$rs1, sGPR:$rs2)), bb:$offset),
+ (BT16 (CMPLT16 sGPR:$rs2, sGPR:$rs1), bb:$offset)>;
+def : Pat<(brcond (i32 (setge sGPR:$rs1, sGPR:$rs2)), bb:$offset),
+ (BF16 (CMPLT16 sGPR:$rs1, sGPR:$rs2), bb:$offset)>;
+def : Pat<(brcond (i32 (setle sGPR:$rs1, sGPR:$rs2)), bb:$offset),
+ (BF16 (CMPLT16 sGPR:$rs2, sGPR:$rs1), bb:$offset)>;
+
+// Compare Patterns.
+def : Pat<(setne sGPR:$rs1, sGPR:$rs2),
+ (SUBU16XZ (MOVI16 1), (MVCV16 (CMPNE16 sGPR:$rs1, sGPR:$rs2)))>;
+def : Pat<(seteq sGPR:$rs1, sGPR:$rs2),
+ (MVCV16 (CMPNE16 sGPR:$rs1, sGPR:$rs2))>;
+def : Pat<(setuge sGPR:$rs1, sGPR:$rs2),
+ (SUBU16XZ (MOVI16 1), (MVCV16 (CMPHS16 sGPR:$rs1, sGPR:$rs2)))>;
+def : Pat<(setule sGPR:$rs1, sGPR:$rs2),
+ (SUBU16XZ (MOVI16 1), (MVCV16 (CMPHS16 sGPR:$rs2, sGPR:$rs1)))>;
+def : Pat<(setult sGPR:$rs1, sGPR:$rs2),
+ (MVCV16 (CMPHS16 sGPR:$rs1, sGPR:$rs2))>;
+def : Pat<(setugt sGPR:$rs1, sGPR:$rs2),
+ (MVCV16 (CMPHS16 sGPR:$rs2, sGPR:$rs1))>;
+def : Pat<(setlt sGPR:$rs1, sGPR:$rs2),
+ (SUBU16XZ (MOVI16 1), (MVCV16 (CMPLT16 sGPR:$rs1, sGPR:$rs2)))>;
+def : Pat<(setgt sGPR:$rs1, sGPR:$rs2),
+ (SUBU16XZ (MOVI16 1), (MVCV16 (CMPLT16 sGPR:$rs2, sGPR:$rs1)))>;
+def : Pat<(setge sGPR:$rs1, sGPR:$rs2),
+ (MVCV16 (CMPLT16 sGPR:$rs1, sGPR:$rs2))>;
+def : Pat<(setle sGPR:$rs1, sGPR:$rs2),
+ (MVCV16 (CMPLT16 sGPR:$rs2, sGPR:$rs1))>;
+
+
+def : Pat<(setne mGPR:$rs1, uimm5:$rs2),
+ (SUBU16XZ (MOVI16 1), (MVCV16 (CMPNEI16 mGPR:$rs1, uimm5:$rs2)))>;
+def : Pat<(seteq mGPR:$rs1, uimm5:$rs2),
+ (MVCV16 (CMPNEI16 mGPR:$rs1, uimm5:$rs2))>;
+def : Pat<(setuge mGPR:$rs1, oimm5:$rs2),
+ (SUBU16XZ (MOVI16 1), (MVCV16 (CMPHSI16 mGPR:$rs1, oimm5:$rs2)))>;
+def : Pat<(setult mGPR:$rs1, oimm5:$rs2),
+ (MVCV16 (CMPHSI16 mGPR:$rs1, oimm5:$rs2))>;
+def : Pat<(setlt mGPR:$rs1, oimm5:$rs2),
+ (SUBU16XZ (MOVI16 1), (MVCV16 (CMPLTI16 mGPR:$rs1, oimm5:$rs2)))>;
+def : Pat<(setge mGPR:$rs1, oimm5:$rs2),
+ (MVCV16 (CMPLTI16 mGPR:$rs1, oimm5:$rs2))>;
+
+def : Pat<(select CARRY:$ca, sGPR:$rx, sGPR:$false),
+ (ISEL16 CARRY:$ca, sGPR:$rx, sGPR:$false)>;
+def : Pat<(select (and CARRY:$ca, 1), sGPR:$rx, sGPR:$false),
+ (ISEL16 CARRY:$ca, sGPR:$rx, sGPR:$false)>;
+
+def : Pat<(rotl sGPR:$rs1, sGPR:$rs2),
+ (ROTL16 sGPR:$rs1, (AND16 sGPR:$rs2, (MOVI16 0x1f)))>;
+
+
+// FIXME: This is a temporary treatment for the e801.
+def : Pat<(i32 imm:$imm),
+ (OR16 (MOVI16 (uimm8SRL_0 imm:$imm)),
+ (OR16 (LSLI16 (MOVI16 (uimm8SRL_8 imm:$imm)), 8),
+ (OR16 (LSLI16 (MOVI16 (uimm8SRL_16 imm:$imm)), 16),
+ (LSLI16 (MOVI16 (uimm8SRL_24 imm:$imm)), 24))))>;
+
+// Other operations.
+let Predicates = [iHasE2] in {
+ def : Pat<(bswap sGPR:$rx), (REVB16 sGPR:$rx)>;
+}
//===----------------------------------------------------------------------===//
// Compress Instruction tablegen backend.
diff --git a/llvm/lib/Target/CSKY/CSKYMachineFunctionInfo.h b/llvm/lib/Target/CSKY/CSKYMachineFunctionInfo.h
index b6e303f8ccfb..57e0d62481ad 100644
--- a/llvm/lib/Target/CSKY/CSKYMachineFunctionInfo.h
+++ b/llvm/lib/Target/CSKY/CSKYMachineFunctionInfo.h
@@ -18,8 +18,6 @@
namespace llvm {
class CSKYMachineFunctionInfo : public MachineFunctionInfo {
- MachineFunction &MF;
-
Register GlobalBaseReg = 0;
bool SpillsCR = false;
@@ -33,7 +31,14 @@ class CSKYMachineFunctionInfo : public MachineFunctionInfo {
unsigned PICLabelUId = 0;
public:
- CSKYMachineFunctionInfo(MachineFunction &MF) : MF(MF) {}
+ CSKYMachineFunctionInfo(MachineFunction &) {}
+
+ MachineFunctionInfo *
+ clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
+ const override {
+ return DestMF.cloneInfo<CSKYMachineFunctionInfo>(*this);
+ }
Register getGlobalBaseReg() const { return GlobalBaseReg; }
void setGlobalBaseReg(Register Reg) { GlobalBaseReg = Reg; }
diff --git a/llvm/lib/Target/CSKY/CSKYRegisterInfo.cpp b/llvm/lib/Target/CSKY/CSKYRegisterInfo.cpp
index 57b6ae3c27b5..4f7811d22868 100644
--- a/llvm/lib/Target/CSKY/CSKYRegisterInfo.cpp
+++ b/llvm/lib/Target/CSKY/CSKYRegisterInfo.cpp
@@ -13,6 +13,7 @@
#include "CSKYRegisterInfo.h"
#include "CSKY.h"
#include "CSKYSubtarget.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/MC/MCContext.h"
@@ -29,6 +30,10 @@ const uint32_t *
CSKYRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID Id) const {
const CSKYSubtarget &STI = MF.getSubtarget<CSKYSubtarget>();
+ if (STI.hasFPUv2DoubleFloat() || STI.hasFPUv3DoubleFloat())
+ return CSR_GPR_FPR64_RegMask;
+ if (STI.hasFPUv2SingleFloat() || STI.hasFPUv3SingleFloat())
+ return CSR_GPR_FPR32_RegMask;
return CSR_I32_RegMask;
}
@@ -82,9 +87,21 @@ const MCPhysReg *
CSKYRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
const CSKYSubtarget &STI = MF->getSubtarget<CSKYSubtarget>();
if (MF->getFunction().hasFnAttribute("interrupt")) {
+ if (STI.hasFPUv3DoubleFloat())
+ return CSR_GPR_FPR64v3_ISR_SaveList;
+ if (STI.hasFPUv3SingleFloat())
+ return CSR_GPR_FPR32v3_ISR_SaveList;
+ if (STI.hasFPUv2DoubleFloat())
+ return CSR_GPR_FPR64_ISR_SaveList;
+ if (STI.hasFPUv2SingleFloat())
+ return CSR_GPR_FPR32_ISR_SaveList;
return CSR_GPR_ISR_SaveList;
}
+ if (STI.hasFPUv2DoubleFloat() || STI.hasFPUv3DoubleFloat())
+ return CSR_GPR_FPR64_SaveList;
+ if (STI.hasFPUv2SingleFloat() || STI.hasFPUv3SingleFloat())
+ return CSR_GPR_FPR32_SaveList;
return CSR_I32_SaveList;
}
@@ -248,7 +265,6 @@ void CSKYRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
assert(isInt<32>(Offset) && "Int32 expected");
// The offset won't fit in an immediate, so use a scratch register instead
// Modify Offset and FrameReg appropriately
- assert(Offset >= 0);
Register ScratchReg = TII->movImm(MBB, NewII, DL, Offset);
BuildMI(MBB, NewII, DL,
TII->get(STI.hasE2() ? CSKY::ADDU32 : CSKY::ADDU16XZ), ScratchReg)
@@ -265,7 +281,7 @@ void CSKYRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI->setDesc(TII->get(TargetOpcode::COPY));
MI->getOperand(FIOperandNum)
.ChangeToRegister(FrameReg, false, false, FrameRegIsKill);
- MI->RemoveOperand(FIOperandNum + 1);
+ MI->removeOperand(FIOperandNum + 1);
} else {
MI->getOperand(FIOperandNum)
.ChangeToRegister(FrameReg, false, false, FrameRegIsKill);
diff --git a/llvm/lib/Target/CSKY/CSKYRegisterInfo.td b/llvm/lib/Target/CSKY/CSKYRegisterInfo.td
index b7f4fc17166b..d12532a3c5c1 100644
--- a/llvm/lib/Target/CSKY/CSKYRegisterInfo.td
+++ b/llvm/lib/Target/CSKY/CSKYRegisterInfo.td
@@ -81,17 +81,21 @@ let RegAltNameIndices = [ABIRegAltName] in {
def R29 : CSKYReg<29, "r29", ["rtb"]>, DwarfRegNum<[29]>;
def R30 : CSKYReg<30, "r30", ["svbr"]>, DwarfRegNum<[30]>;
def R31 : CSKYReg<31, "r31", ["tls"]>, DwarfRegNum<[31]>;
- def C : CSKYReg<32, "cr0", ["psr"]>;
+
+ // Faked for GPRTuple
+ def R32 : CSKYReg<32, "r32", ["r32"]>, DwarfRegNum<[32]>;
+
+ def C : CSKYReg<33, "cr0", ["psr"]>;
}
def GPRTuple : RegisterTuples<
[sub32_0, sub32_32],
- [(add (sequence "R%u", 0, 30)), (add (sequence "R%u", 1, 31))],
+ [(add (sequence "R%u", 0, 31)), (add (sequence "R%u", 1, 32))],
[ "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
"r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
- "r24", "r25", "r26", "r27", "r28", "r29", "r30"
+ "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"
]>;
// Floating point registers
@@ -189,9 +193,9 @@ def FPR32 : RegisterClass<"CSKY", [f32], 32,
def sFPR32 : RegisterClass<"CSKY", [f32], 32,
(add (sequence "F%u_32", 0, 15))>;
-def FPR64 : RegisterClass<"CSKY", [f64], 64,
+def FPR64 : RegisterClass<"CSKY", [f64], 32,
(add (sequence "F%u_64", 0, 31))>;
-def sFPR64 : RegisterClass<"CSKY", [f64], 64,
+def sFPR64 : RegisterClass<"CSKY", [f64], 32,
(add (sequence "F%u_64", 0, 15))>;
def sFPR64_V : RegisterClass<"CSKY", [v2f32], 32, (add sFPR64)>;
diff --git a/llvm/lib/Target/CSKY/CSKYSubtarget.cpp b/llvm/lib/Target/CSKY/CSKYSubtarget.cpp
index 963c2ede9c44..251dbed82708 100644
--- a/llvm/lib/Target/CSKY/CSKYSubtarget.cpp
+++ b/llvm/lib/Target/CSKY/CSKYSubtarget.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "CSKYSubtarget.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
using namespace llvm;
@@ -33,14 +34,42 @@ CSKYSubtarget &CSKYSubtarget::initializeSubtargetDependencies(
UseHardFloatABI = false;
HasFPUv2SingleFloat = false;
HasFPUv2DoubleFloat = false;
+ HasFPUv3HalfWord = false;
+ HasFPUv3HalfFloat = false;
HasFPUv3SingleFloat = false;
HasFPUv3DoubleFloat = false;
-
+ HasFdivdu = false;
+ HasFLOATE1 = false;
+ HasFLOAT1E2 = false;
+ HasFLOAT1E3 = false;
+ HasFLOAT3E4 = false;
+ HasFLOAT7E60 = false;
+ HasExtendLrw = false;
HasBTST16 = false;
+ HasTrust = false;
HasJAVA = false;
- HasExtendLrw = false;
+ HasCache = false;
+ HasNVIC = false;
+ HasDSP = false;
+ HasDSP1E2 = false;
+ HasDSPE60 = false;
+ HasDSPV2 = false;
+ HasDSP_Silan = false;
HasDoloop = false;
+ HasHardwareDivide = false;
HasHighRegisters = false;
+ HasVDSPV2 = false;
+ HasVDSP2E3 = false;
+ HasVDSP2E60F = false;
+ ReadTPHard = false;
+ HasVDSPV1_128 = false;
+ UseCCRT = false;
+ DumpConstPool = false;
+ EnableInterruptAttribute = false;
+ HasPushPop = false;
+ HasSTM = false;
+ SmartMode = false;
+ EnableStackSize = false;
HasE1 = false;
HasE2 = false;
diff --git a/llvm/lib/Target/CSKY/CSKYSubtarget.h b/llvm/lib/Target/CSKY/CSKYSubtarget.h
index 4cd590e8e76e..9e7ad00c0a50 100644
--- a/llvm/lib/Target/CSKY/CSKYSubtarget.h
+++ b/llvm/lib/Target/CSKY/CSKYSubtarget.h
@@ -36,18 +36,65 @@ class CSKYSubtarget : public CSKYGenSubtargetInfo {
CSKYTargetLowering TLInfo;
SelectionDAGTargetInfo TSInfo;
+ enum CSKYProcFamilyEnum {
+ Others,
+
+ CK801,
+ CK802,
+ CK803,
+ CK803S,
+ CK804,
+ CK805,
+ CK807,
+ CK810,
+ CK810V,
+ CK860,
+ CK860V
+ };
+
+ /// CSKYProcFamily - CSKY processor family: CK801, CK802, and others.
+ CSKYProcFamilyEnum CSKYProcFamily = Others;
+
bool UseHardFloat;
bool UseHardFloatABI;
bool HasFPUv2SingleFloat;
bool HasFPUv2DoubleFloat;
+ bool HasFPUv3HalfWord;
+ bool HasFPUv3HalfFloat;
bool HasFPUv3SingleFloat;
bool HasFPUv3DoubleFloat;
-
+ bool HasFdivdu;
+ bool HasFLOATE1;
+ bool HasFLOAT1E2;
+ bool HasFLOAT1E3;
+ bool HasFLOAT3E4;
+ bool HasFLOAT7E60;
bool HasBTST16;
- bool HasJAVA;
bool HasExtendLrw;
+ bool HasTrust;
+ bool HasJAVA;
+ bool HasCache;
+ bool HasNVIC;
+ bool HasDSP;
+ bool HasDSP1E2;
+ bool HasDSPE60;
+ bool HasDSPV2;
+ bool HasDSP_Silan;
bool HasDoloop;
+ bool HasHardwareDivide;
bool HasHighRegisters;
+ bool HasVDSPV2;
+ bool HasVDSP2E3;
+ bool HasVDSP2E60F;
+ bool ReadTPHard;
+ bool HasVDSPV1_128;
+ bool UseCCRT;
+ bool DumpConstPool;
+ bool EnableInterruptAttribute;
+ bool HasPushPop;
+ bool HasSTM;
+ bool SmartMode;
+ bool EnableStackSize;
bool HasE1;
bool HasE2;
@@ -92,16 +139,49 @@ public:
bool hasFPUv2SingleFloat() const { return HasFPUv2SingleFloat; }
bool hasFPUv2DoubleFloat() const { return HasFPUv2DoubleFloat; }
bool hasFPUv2() const { return HasFPUv2SingleFloat || HasFPUv2DoubleFloat; }
+ bool hasFPUv3HalfWord() const { return HasFPUv3HalfWord; }
+ bool hasFPUv3HalfFloat() const { return HasFPUv3HalfFloat; }
bool hasFPUv3SingleFloat() const { return HasFPUv3SingleFloat; }
bool hasFPUv3DoubleFloat() const { return HasFPUv3DoubleFloat; }
- bool hasFPUv3() const { return HasFPUv3SingleFloat || HasFPUv3DoubleFloat; }
+ bool hasFPUv3() const {
+ return HasFPUv3HalfFloat || HasFPUv3SingleFloat || HasFPUv3DoubleFloat;
+ }
bool hasAnyFloatExt() const { return hasFPUv2() || hasFPUv3(); };
-
+ bool hasFdivdu() const { return HasFdivdu; }
+ bool hasFLOATE1() const { return HasFLOATE1; }
+ bool hasFLOAT1E2() const { return HasFLOAT1E2; }
+ bool hasFLOAT1E3() const { return HasFLOAT1E3; }
+ bool hasFLOAT3E4() const { return HasFLOAT3E4; }
+ bool hasFLOAT7E60() const { return HasFLOAT7E60; }
+ bool hasExtendLrw() const { return HasExtendLrw; }
bool hasBTST16() const { return HasBTST16; }
+ bool hasTrust() const { return HasTrust; }
bool hasJAVA() const { return HasJAVA; }
- bool hasExtendLrw() const { return HasExtendLrw; }
+ bool hasCache() const { return HasCache; }
+ bool hasNVIC() const { return HasNVIC; }
+ bool hasDSP() const { return HasDSP; }
+ bool hasDSP1E2() const { return HasDSP1E2; }
+ bool hasDSPE60() const { return HasDSPE60; }
+ bool hasDSPV2() const { return HasDSPV2; }
+ bool hasDSP_Silan() const { return HasDSP_Silan; }
bool hasDoloop() const { return HasDoloop; }
bool hasHighRegisters() const { return HasHighRegisters; }
+ bool hasVDSPV2() const { return HasVDSPV2; }
+ bool hasVDSPV2_FLOAT() const { return HasVDSPV2 && UseHardFloat; }
+ bool hasVDSPV2_HALF() const {
+ return HasVDSPV2 && UseHardFloat && HasFPUv3HalfFloat;
+ }
+ bool hasVDSP2E3() const { return HasVDSP2E3; }
+ bool hasVDSP2E60F() const { return HasVDSP2E60F; }
+ bool readTPHard() const { return ReadTPHard; }
+ bool hasVDSPV1_128() const { return HasVDSPV1_128; }
+ bool useCCRT() const { return UseCCRT; }
+ bool dumpConstPool() const { return DumpConstPool; }
+ bool enableInterruptAttribute() const { return EnableInterruptAttribute; }
+ bool hasPushPop() const { return HasPushPop; }
+ bool hasSTM() const { return HasSTM; }
+ bool smartMode() const { return SmartMode; }
+ bool enableStackSize() const { return EnableStackSize; }
bool hasE1() const { return HasE1; }
bool hasE2() const { return HasE2; }
@@ -114,6 +194,18 @@ public:
bool hasMP1E2() const { return HasMP1E2; }
bool has7E10() const { return Has7E10; }
bool has10E60() const { return Has10E60; }
+
+ bool isCK801() const { return CSKYProcFamily == CK801; }
+ bool isCK802() const { return CSKYProcFamily == CK802; }
+ bool isCK803() const { return CSKYProcFamily == CK803; }
+ bool isCK803S() const { return CSKYProcFamily == CK803S; }
+ bool isCK804() const { return CSKYProcFamily == CK804; }
+ bool isCK805() const { return CSKYProcFamily == CK805; }
+ bool isCK807() const { return CSKYProcFamily == CK807; }
+ bool isCK810() const { return CSKYProcFamily == CK810; }
+ bool isCK810V() const { return CSKYProcFamily == CK810V; }
+ bool isCK860() const { return CSKYProcFamily == CK860; }
+ bool isCK860V() const { return CSKYProcFamily == CK860V; }
};
} // namespace llvm
diff --git a/llvm/lib/Target/CSKY/CSKYTargetMachine.cpp b/llvm/lib/Target/CSKY/CSKYTargetMachine.cpp
index 94b24044c27d..d19f28fddd53 100644
--- a/llvm/lib/Target/CSKY/CSKYTargetMachine.cpp
+++ b/llvm/lib/Target/CSKY/CSKYTargetMachine.cpp
@@ -13,7 +13,9 @@
#include "CSKYTargetMachine.h"
#include "CSKY.h"
#include "CSKYSubtarget.h"
+#include "CSKYTargetObjectFile.h"
#include "TargetInfo/CSKYTargetInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -50,9 +52,9 @@ CSKYTargetMachine::CSKYTargetMachine(const Target &T, const Triple &TT,
Optional<CodeModel::Model> CM,
CodeGenOpt::Level OL, bool JIT)
: LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options,
- RM.getValueOr(Reloc::Static),
+ RM.value_or(Reloc::Static),
getEffectiveCodeModel(CM, CodeModel::Small), OL),
- TLOF(std::make_unique<TargetLoweringObjectFileELF>()) {
+ TLOF(std::make_unique<CSKYELFTargetObjectFile>()) {
initAsmInfo();
}
@@ -94,6 +96,7 @@ public:
return getTM<CSKYTargetMachine>();
}
+ void addIRPasses() override;
bool addInstSelector() override;
void addPreEmitPass() override;
};
@@ -104,6 +107,11 @@ TargetPassConfig *CSKYTargetMachine::createPassConfig(PassManagerBase &PM) {
return new CSKYPassConfig(*this, PM);
}
+void CSKYPassConfig::addIRPasses() {
+ addPass(createAtomicExpandPass());
+ TargetPassConfig::addIRPasses();
+}
+
bool CSKYPassConfig::addInstSelector() {
addPass(createCSKYISelDag(getCSKYTargetMachine()));
diff --git a/llvm/lib/Target/CSKY/CSKYTargetObjectFile.cpp b/llvm/lib/Target/CSKY/CSKYTargetObjectFile.cpp
new file mode 100644
index 000000000000..b5592d34ca54
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYTargetObjectFile.cpp
@@ -0,0 +1,25 @@
+//===-- CSKYTargetObjectFile.h - CSKY Object Info -*- C++ ---------------*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "CSKYTargetObjectFile.h"
+#include "CSKYTargetMachine.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+
+using namespace llvm;
+
+void CSKYELFTargetObjectFile::Initialize(MCContext &Ctx,
+ const TargetMachine &TM) {
+ TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+
+ LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+ PersonalityEncoding =
+ dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+ TTypeEncoding =
+ dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+}
diff --git a/llvm/lib/Target/CSKY/CSKYTargetObjectFile.h b/llvm/lib/Target/CSKY/CSKYTargetObjectFile.h
new file mode 100644
index 000000000000..a82f2681c12a
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYTargetObjectFile.h
@@ -0,0 +1,24 @@
+//===-- CSKYTargetObjectFile.h - CSKY Object Info -*- C++ ---------------*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_CSKY_CSKYTARGETOBJECTFILE_H
+#define LLVM_LIB_TARGET_CSKY_CSKYTARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+
+namespace llvm {
+
+class CSKYELFTargetObjectFile : public TargetLoweringObjectFileELF {
+public:
+ void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
+};
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_CSKY_CSKYTARGETOBJECTFILE_H
diff --git a/llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp b/llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp
new file mode 100644
index 000000000000..9b4d8ea8dc56
--- /dev/null
+++ b/llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp
@@ -0,0 +1,553 @@
+//===-- CSKYDisassembler.cpp - Disassembler for CSKY ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CSKYDisassembler class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/CSKYBaseInfo.h"
+#include "MCTargetDesc/CSKYMCTargetDesc.h"
+#include "TargetInfo/CSKYTargetInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDecoderOps.h"
+#include "llvm/MC/MCDisassembler/MCDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/Endian.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "csky-disassembler"
+
+typedef MCDisassembler::DecodeStatus DecodeStatus;
+
+namespace {
+class CSKYDisassembler : public MCDisassembler {
+ std::unique_ptr<MCInstrInfo const> const MCII;
+ mutable StringRef symbolName;
+
+ DecodeStatus handleCROperand(MCInst &Instr) const;
+
+public:
+ CSKYDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
+ MCInstrInfo const *MCII);
+
+ DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &CStream) const override;
+};
+} // end anonymous namespace
+
+CSKYDisassembler::CSKYDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
+ MCInstrInfo const *MCII)
+ : MCDisassembler(STI, Ctx), MCII(MCII) {}
+
+static MCDisassembler *createCSKYDisassembler(const Target &T,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new CSKYDisassembler(STI, Ctx, T.createMCInstrInfo());
+}
+
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeCSKYDisassembler() {
+ TargetRegistry::RegisterMCDisassembler(getTheCSKYTarget(),
+ createCSKYDisassembler);
+}
+
+static const uint16_t GPRDecoderTable[] = {
+ CSKY::R0, CSKY::R1, CSKY::R2, CSKY::R3, CSKY::R4, CSKY::R5, CSKY::R6,
+ CSKY::R7, CSKY::R8, CSKY::R9, CSKY::R10, CSKY::R11, CSKY::R12, CSKY::R13,
+ CSKY::R14, CSKY::R15, CSKY::R16, CSKY::R17, CSKY::R18, CSKY::R19, CSKY::R20,
+ CSKY::R21, CSKY::R22, CSKY::R23, CSKY::R24, CSKY::R25, CSKY::R26, CSKY::R27,
+ CSKY::R28, CSKY::R29, CSKY::R30, CSKY::R31};
+
+static const uint16_t GPRPairDecoderTable[] = {
+ CSKY::R0_R1, CSKY::R1_R2, CSKY::R2_R3, CSKY::R3_R4, CSKY::R4_R5,
+ CSKY::R5_R6, CSKY::R6_R7, CSKY::R7_R8, CSKY::R8_R9, CSKY::R9_R10,
+ CSKY::R10_R11, CSKY::R11_R12, CSKY::R12_R13, CSKY::R13_R14, CSKY::R14_R15,
+ CSKY::R15_R16, CSKY::R16_R17, CSKY::R17_R18, CSKY::R18_R19, CSKY::R19_R20,
+ CSKY::R20_R21, CSKY::R21_R22, CSKY::R22_R23, CSKY::R23_R24, CSKY::R24_R25,
+ CSKY::R25_R26, CSKY::R26_R27, CSKY::R27_R28, CSKY::R28_R29, CSKY::R29_R30,
+ CSKY::R30_R31, CSKY::R31_R32};
+
+static const uint16_t FPR32DecoderTable[] = {
+ CSKY::F0_32, CSKY::F1_32, CSKY::F2_32, CSKY::F3_32, CSKY::F4_32,
+ CSKY::F5_32, CSKY::F6_32, CSKY::F7_32, CSKY::F8_32, CSKY::F9_32,
+ CSKY::F10_32, CSKY::F11_32, CSKY::F12_32, CSKY::F13_32, CSKY::F14_32,
+ CSKY::F15_32, CSKY::F16_32, CSKY::F17_32, CSKY::F18_32, CSKY::F19_32,
+ CSKY::F20_32, CSKY::F21_32, CSKY::F22_32, CSKY::F23_32, CSKY::F24_32,
+ CSKY::F25_32, CSKY::F26_32, CSKY::F27_32, CSKY::F28_32, CSKY::F29_32,
+ CSKY::F30_32, CSKY::F31_32};
+
+static const uint16_t FPR64DecoderTable[] = {
+ CSKY::F0_64, CSKY::F1_64, CSKY::F2_64, CSKY::F3_64, CSKY::F4_64,
+ CSKY::F5_64, CSKY::F6_64, CSKY::F7_64, CSKY::F8_64, CSKY::F9_64,
+ CSKY::F10_64, CSKY::F11_64, CSKY::F12_64, CSKY::F13_64, CSKY::F14_64,
+ CSKY::F15_64, CSKY::F16_64, CSKY::F17_64, CSKY::F18_64, CSKY::F19_64,
+ CSKY::F20_64, CSKY::F21_64, CSKY::F22_64, CSKY::F23_64, CSKY::F24_64,
+ CSKY::F25_64, CSKY::F26_64, CSKY::F27_64, CSKY::F28_64, CSKY::F29_64,
+ CSKY::F30_64, CSKY::F31_64};
+
+static const uint16_t FPR128DecoderTable[] = {
+ CSKY::F0_128, CSKY::F1_128, CSKY::F2_128, CSKY::F3_128, CSKY::F4_128,
+ CSKY::F5_128, CSKY::F6_128, CSKY::F7_128, CSKY::F8_128, CSKY::F9_128,
+ CSKY::F10_128, CSKY::F11_128, CSKY::F12_128, CSKY::F13_128, CSKY::F14_128,
+ CSKY::F15_128, CSKY::F16_128, CSKY::F17_128, CSKY::F18_128, CSKY::F19_128,
+ CSKY::F20_128, CSKY::F21_128, CSKY::F22_128, CSKY::F23_128, CSKY::F24_128,
+ CSKY::F25_128, CSKY::F26_128, CSKY::F27_128, CSKY::F28_128, CSKY::F29_128,
+ CSKY::F30_128, CSKY::F31_128};
+
+static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
+ if (RegNo >= 32)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::createReg(GPRDecoderTable[RegNo]));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
+ if (RegNo >= 32)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::createReg(FPR32DecoderTable[RegNo]));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodesFPR32RegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
+ if (RegNo >= 16)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::createReg(FPR32DecoderTable[RegNo]));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodesFPR64RegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
+ if (RegNo >= 16)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::createReg(FPR64DecoderTable[RegNo]));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodesFPR64_VRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
+ if (RegNo >= 16)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::createReg(FPR64DecoderTable[RegNo]));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
+ if (RegNo >= 32)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::createReg(FPR64DecoderTable[RegNo]));
+ return MCDisassembler::Success;
+}
+
+// TODO
+LLVM_ATTRIBUTE_UNUSED
+static DecodeStatus DecodesFPR128RegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
+ if (RegNo >= 16)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::createReg(FPR128DecoderTable[RegNo]));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodesGPRRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
+ if (RegNo >= 16)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::createReg(GPRDecoderTable[RegNo]));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodemGPRRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
+ if (RegNo >= 8)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::createReg(GPRDecoderTable[RegNo]));
+ return MCDisassembler::Success;
+}
+
+// TODO
+LLVM_ATTRIBUTE_UNUSED
+static DecodeStatus DecodeGPRSPRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
+ if (RegNo != 14)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::createReg(GPRDecoderTable[RegNo]));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeGPRPairRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
+ const FeatureBitset &FeatureBits =
+ Decoder->getSubtargetInfo().getFeatureBits();
+ bool hasHighReg = FeatureBits[CSKY::FeatureHighreg];
+
+ if (RegNo >= 32 || (!hasHighReg && RegNo >= 16))
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::createReg(GPRPairDecoderTable[RegNo]));
+ return MCDisassembler::Success;
+}
+
+template <unsigned N, unsigned S>
+static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm,
+ int64_t Address,
+ const MCDisassembler *Decoder) {
+ assert(isUInt<N>(Imm) && "Invalid immediate");
+ Inst.addOperand(MCOperand::createImm(Imm << S));
+ return MCDisassembler::Success;
+}
+
+template <unsigned N>
+static DecodeStatus decodeOImmOperand(MCInst &Inst, uint64_t Imm,
+ int64_t Address,
+ const MCDisassembler *Decoder) {
+ assert(isUInt<N>(Imm) && "Invalid immediate");
+ Inst.addOperand(MCOperand::createImm(Imm + 1));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeLRW16Imm8(MCInst &Inst, uint64_t Imm, int64_t Address,
+ const MCDisassembler *Decoder) {
+ assert(isUInt<8>(Imm) && "Invalid immediate");
+ if ((Imm >> 7) & 0x1) {
+ Inst.addOperand(MCOperand::createImm((Imm & 0x7F) << 2));
+ } else {
+ uint64_t V = ((Imm ^ 0xFFFFFFFF) & 0xFF);
+ Inst.addOperand(MCOperand::createImm(V << 2));
+ }
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeJMPIXImmOperand(MCInst &Inst, uint64_t Imm,
+ int64_t Address,
+ const MCDisassembler *Decoder) {
+ assert(isUInt<2>(Imm) && "Invalid immediate");
+
+ if (Imm == 0)
+ Inst.addOperand(MCOperand::createImm(16));
+ else if (Imm == 1)
+ Inst.addOperand(MCOperand::createImm(24));
+ else if (Imm == 2)
+ Inst.addOperand(MCOperand::createImm(32));
+ else if (Imm == 3)
+ Inst.addOperand(MCOperand::createImm(40));
+ else
+ return MCDisassembler::Fail;
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeRegSeqOperand(MCInst &Inst, uint64_t Imm,
+ int64_t Address,
+ const MCDisassembler *Decoder) {
+ assert(isUInt<10>(Imm) && "Invalid immediate");
+
+ auto Imm5 = Imm & 0x1f;
+ auto Ry = (Imm >> 5) & 0x1f;
+
+ if (DecodeGPRRegisterClass(Inst, Ry, Address, Decoder) ==
+ MCDisassembler::Fail)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::createReg(GPRDecoderTable[Ry + Imm5]));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeRegSeqOperandF1(MCInst &Inst, uint64_t Imm,
+ int64_t Address,
+ const MCDisassembler *Decoder) {
+ assert(isUInt<10>(Imm) && "Invalid immediate");
+
+ auto Imm5 = Imm & 0x1f;
+ auto Ry = (Imm >> 5) & 0x1f;
+
+ if (DecodesFPR32RegisterClass(Inst, Ry, Address, Decoder) ==
+ MCDisassembler::Fail)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::createReg(FPR32DecoderTable[Ry + Imm5]));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeRegSeqOperandD1(MCInst &Inst, uint64_t Imm,
+ int64_t Address,
+ const MCDisassembler *Decoder) {
+ assert(isUInt<10>(Imm) && "Invalid immediate");
+
+ auto Imm5 = Imm & 0x1f;
+ auto Ry = (Imm >> 5) & 0x1f;
+
+ if (DecodesFPR64RegisterClass(Inst, Ry, Address, Decoder) ==
+ MCDisassembler::Fail)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::createReg(FPR64DecoderTable[Ry + Imm5]));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeRegSeqOperandF2(MCInst &Inst, uint64_t Imm,
+ int64_t Address,
+ const MCDisassembler *Decoder) {
+ assert(isUInt<10>(Imm) && "Invalid immediate");
+
+ auto Imm5 = Imm & 0x1f;
+ auto Ry = (Imm >> 5) & 0x1f;
+
+ if (DecodeFPR32RegisterClass(Inst, Ry, Address, Decoder) ==
+ MCDisassembler::Fail)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::createReg(FPR32DecoderTable[Ry + Imm5]));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeRegSeqOperandD2(MCInst &Inst, uint64_t Imm,
+ int64_t Address,
+ const MCDisassembler *Decoder) {
+ assert(isUInt<10>(Imm) && "Invalid immediate");
+
+ auto Imm5 = Imm & 0x1f;
+ auto Ry = (Imm >> 5) & 0x1f;
+
+ if (DecodeFPR64RegisterClass(Inst, Ry, Address, Decoder) ==
+ MCDisassembler::Fail)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::createReg(FPR64DecoderTable[Ry + Imm5]));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeImmShiftOpValue(MCInst &Inst, uint64_t Imm,
+ int64_t Address,
+ const MCDisassembler *Decoder) {
+ Inst.addOperand(MCOperand::createImm(Log2(Imm)));
+ return MCDisassembler::Success;
+}
+
+template <unsigned N, unsigned S>
+static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm,
+ int64_t Address,
+ const MCDisassembler *Decoder) {
+ assert(isUInt<N>(Imm) && "Invalid immediate");
+ // Sign-extend the number in the bottom N bits of Imm
+ Inst.addOperand(MCOperand::createImm(SignExtend64<N>(Imm) << S));
+ return MCDisassembler::Success;
+}
+
+#include "CSKYGenDisassemblerTables.inc"
+
+DecodeStatus CSKYDisassembler::handleCROperand(MCInst &MI) const {
+
+ // FIXME: To query instruction info from td file or a table inc file
+ switch (MI.getOpcode()) {
+ default:
+ return MCDisassembler::Success;
+ case CSKY::LD16WSP:
+ case CSKY::ST16WSP:
+ case CSKY::ADDI16ZSP:
+ MI.insert(std::next(MI.begin()), MCOperand::createReg(CSKY::R14));
+ return MCDisassembler::Success;
+ case CSKY::ADDI16SPSP:
+ case CSKY::SUBI16SPSP:
+ MI.insert(MI.begin(), MCOperand::createReg(CSKY::R14));
+ MI.insert(MI.begin(), MCOperand::createReg(CSKY::R14));
+ return MCDisassembler::Success;
+ case CSKY::FCMPHS_S:
+ case CSKY::FCMPHS_D:
+ case CSKY::FCMPLT_S:
+ case CSKY::FCMPLT_D:
+ case CSKY::FCMPNE_S:
+ case CSKY::FCMPNE_D:
+ case CSKY::FCMPUO_S:
+ case CSKY::FCMPUO_D:
+ case CSKY::FCMPZHS_S:
+ case CSKY::FCMPZHS_D:
+ case CSKY::FCMPZLS_S:
+ case CSKY::FCMPZLS_D:
+ case CSKY::FCMPZNE_S:
+ case CSKY::FCMPZNE_D:
+ case CSKY::FCMPZUO_S:
+ case CSKY::FCMPZUO_D:
+ case CSKY::f2FCMPHS_S:
+ case CSKY::f2FCMPHS_D:
+ case CSKY::f2FCMPLT_S:
+ case CSKY::f2FCMPLT_D:
+ case CSKY::f2FCMPNE_S:
+ case CSKY::f2FCMPNE_D:
+ case CSKY::f2FCMPUO_S:
+ case CSKY::f2FCMPUO_D:
+ case CSKY::f2FCMPHSZ_S:
+ case CSKY::f2FCMPHSZ_D:
+ case CSKY::f2FCMPHZ_S:
+ case CSKY::f2FCMPHZ_D:
+ case CSKY::f2FCMPLSZ_S:
+ case CSKY::f2FCMPLSZ_D:
+ case CSKY::f2FCMPLTZ_S:
+ case CSKY::f2FCMPLTZ_D:
+ case CSKY::f2FCMPNEZ_S:
+ case CSKY::f2FCMPNEZ_D:
+ case CSKY::f2FCMPUOZ_S:
+ case CSKY::f2FCMPUOZ_D:
+
+ case CSKY::BT32:
+ case CSKY::BF32:
+ case CSKY::BT16:
+ case CSKY::BF16:
+ case CSKY::CMPNEI32:
+ case CSKY::CMPNEI16:
+ case CSKY::CMPNE32:
+ case CSKY::CMPNE16:
+ case CSKY::CMPHSI32:
+ case CSKY::CMPHSI16:
+ case CSKY::CMPHS32:
+ case CSKY::CMPHS16:
+ case CSKY::CMPLTI32:
+ case CSKY::CMPLTI16:
+ case CSKY::CMPLT32:
+ case CSKY::CMPLT16:
+ case CSKY::BTSTI32:
+ case CSKY::BTSTI16:
+ case CSKY::TSTNBZ32:
+ case CSKY::TSTNBZ16:
+ case CSKY::TST32:
+ case CSKY::TST16:
+ MI.insert(MI.begin(), MCOperand::createReg(CSKY::C));
+ return MCDisassembler::Success;
+ case CSKY::LSLC32:
+ case CSKY::LSRC32:
+ case CSKY::ASRC32:
+ MI.insert(std::next(MI.begin()), MCOperand::createReg(CSKY::C));
+ return MCDisassembler::Success;
+ case CSKY::MOVF32:
+ case CSKY::MOVT32:
+ case CSKY::MVC32:
+ case CSKY::MVCV32:
+ case CSKY::MVCV16:
+ case CSKY::INCT32:
+ case CSKY::INCF32:
+ case CSKY::DECT32:
+ case CSKY::DECF32:
+ case CSKY::DECGT32:
+ case CSKY::DECLT32:
+ case CSKY::DECNE32:
+ case CSKY::CLRF32:
+ case CSKY::CLRT32:
+ case CSKY::f2FSEL_S:
+ case CSKY::f2FSEL_D:
+ MI.insert(std::next(MI.begin()), MCOperand::createReg(CSKY::C));
+ return MCDisassembler::Success;
+ case CSKY::ADDC32:
+ case CSKY::ADDC16:
+ case CSKY::SUBC32:
+ case CSKY::SUBC16:
+ case CSKY::XSR32:
+ MI.insert(std::next(MI.begin()), MCOperand::createReg(CSKY::C));
+ MI.insert(MI.end(), MCOperand::createReg(CSKY::C));
+ return MCDisassembler::Success;
+ case CSKY::INS32:
+ MI.getOperand(3).setImm(MI.getOperand(3).getImm() +
+ MI.getOperand(4).getImm());
+ return MCDisassembler::Success;
+ }
+}
+
+static bool decodeFPUV3Instruction(MCInst &MI, uint32_t insn, uint64_t Address,
+ const MCDisassembler *DisAsm,
+ const MCSubtargetInfo &STI) {
+ LLVM_DEBUG(dbgs() << "Trying CSKY 32-bit fpuv3 table :\n");
+ if (!STI.getFeatureBits()[CSKY::FeatureFPUV3_HF] &&
+ !STI.getFeatureBits()[CSKY::FeatureFPUV3_SF] &&
+ !STI.getFeatureBits()[CSKY::FeatureFPUV3_DF])
+ return false;
+
+ DecodeStatus Result =
+ decodeInstruction(DecoderTableFPUV332, MI, insn, Address, DisAsm, STI);
+
+ if (Result == MCDisassembler::Fail) {
+ MI.clear();
+ return false;
+ }
+
+ return true;
+}
+
+DecodeStatus CSKYDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address,
+ raw_ostream &CS) const {
+
+ uint32_t Insn;
+ DecodeStatus Result = MCDisassembler::Fail;
+
+ Insn = support::endian::read16le(Bytes.data());
+
+ if ((Insn >> 14) == 0x3) {
+ if (Bytes.size() < 4) {
+ Size = 0;
+ return MCDisassembler::Fail;
+ }
+ Insn = (Insn << 16) | support::endian::read16le(&Bytes[2]);
+
+ if (decodeFPUV3Instruction(MI, Insn, Address, this, STI))
+ Result = MCDisassembler::Success;
+ else {
+ LLVM_DEBUG(dbgs() << "Trying CSKY 32-bit table :\n");
+ Result = decodeInstruction(DecoderTable32, MI, Insn, Address, this, STI);
+ }
+
+ Size = 4;
+ } else {
+ if (Bytes.size() < 2) {
+ Size = 0;
+ return MCDisassembler::Fail;
+ }
+ LLVM_DEBUG(dbgs() << "Trying CSKY 16-bit table :\n");
+ Result = decodeInstruction(DecoderTable16, MI, Insn, Address, this, STI);
+ Size = 2;
+ }
+
+ handleCROperand(MI);
+
+ return Result;
+}
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp
index daa655416c47..b5dfdfa0b42b 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp
@@ -88,6 +88,13 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
switch (Fixup.getTargetKind()) {
default:
llvm_unreachable("Unknown fixup kind!");
+ case CSKY::fixup_csky_got32:
+ case CSKY::fixup_csky_got_imm18_scale4:
+ case CSKY::fixup_csky_gotoff:
+ case CSKY::fixup_csky_gotpc:
+ case CSKY::fixup_csky_plt32:
+ case CSKY::fixup_csky_plt_imm18_scale4:
+ llvm_unreachable("Relocation should be unconditionally forced\n");
case FK_Data_1:
case FK_Data_2:
case FK_Data_4:
@@ -123,6 +130,71 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
Ctx.reportError(Fixup.getLoc(), "fixup value must be 2-byte aligned.");
return (Value >> 1) & 0x3ffff;
+ case CSKY::fixup_csky_pcrel_uimm8_scale4: {
+ if (!isUIntN(10, Value))
+ Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value.");
+ if (Value & 0x3)
+ Ctx.reportError(Fixup.getLoc(), "fixup value must be 4-byte aligned.");
+
+ unsigned IMM4L = (Value >> 2) & 0xf;
+ unsigned IMM4H = (Value >> 6) & 0xf;
+
+ Value = (IMM4H << 21) | (IMM4L << 4);
+ return Value;
+ }
+ case CSKY::fixup_csky_pcrel_imm10_scale2:
+ if (!isIntN(11, Value))
+ Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value.");
+ if (Value & 0x1)
+ Ctx.reportError(Fixup.getLoc(), "fixup value must be 2-byte aligned.");
+
+ return (Value >> 1) & 0x3ff;
+ case CSKY::fixup_csky_pcrel_uimm7_scale4:
+ if (!isUIntN(9, Value))
+ Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value.");
+ if (Value & 0x3)
+ Ctx.reportError(Fixup.getLoc(), "fixup value must be 4-byte aligned.");
+
+ if ((Value & 0xff) <= 0b111111100) {
+ unsigned IMM5L = (Value >> 2) & 0x1f;
+ unsigned IMM2H = (Value >> 7) & 0x3;
+
+ Value = (1 << 12) | (IMM2H << 8) | IMM5L;
+ } else {
+ unsigned IMM5L = (!Value >> 2) & 0x1f;
+ unsigned IMM2H = (!Value >> 7) & 0x3;
+
+ Value = (IMM2H << 8) | IMM5L;
+ }
+
+ return Value & 0xffff;
+ }
+}
+
+bool CSKYAsmBackend::fixupNeedsRelaxationAdvanced(const MCFixup &Fixup,
+ bool Resolved, uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout,
+ const bool WasForced) const {
+ // Return true if the symbol is actually unresolved.
+ // Resolved could be always false when shouldForceRelocation return true.
+ // We use !WasForced to indicate that the symbol is unresolved and not forced
+ // by shouldForceRelocation.
+ if (!Resolved && !WasForced)
+ return true;
+
+ int64_t Offset = int64_t(Value);
+ switch (Fixup.getTargetKind()) {
+ default:
+ return false;
+ case CSKY::fixup_csky_pcrel_imm10_scale2:
+ return !isShiftedInt<10, 1>(Offset);
+ case CSKY::fixup_csky_pcrel_imm16_scale2:
+ return !isShiftedInt<16, 1>(Offset);
+ case CSKY::fixup_csky_pcrel_imm26_scale2:
+ return !isShiftedInt<26, 1>(Offset);
+ case CSKY::fixup_csky_pcrel_uimm7_scale4:
+ return !isShiftedUInt<8, 2>(Offset);
}
}
@@ -152,8 +224,9 @@ void CSKYAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
// For each byte of the fragment that the fixup touches, mask in the
// bits from the fixup value.
bool IsLittleEndian = (Endian == support::little);
+ bool IsInstFixup = (Kind >= FirstTargetFixupKind);
- if (IsLittleEndian && (NumBytes == 4)) {
+ if (IsLittleEndian && IsInstFixup && (NumBytes == 4)) {
Data[Offset + 0] |= uint8_t((Value >> 16) & 0xff);
Data[Offset + 1] |= uint8_t((Value >> 24) & 0xff);
Data[Offset + 2] |= uint8_t(Value & 0xff);
@@ -166,6 +239,50 @@ void CSKYAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
}
}
+bool CSKYAsmBackend::mayNeedRelaxation(const MCInst &Inst,
+ const MCSubtargetInfo &STI) const {
+ switch (Inst.getOpcode()) {
+ default:
+ return false;
+ case CSKY::JBR32:
+ case CSKY::JBT32:
+ case CSKY::JBF32:
+ case CSKY::JBSR32:
+ if (!STI.getFeatureBits()[CSKY::Has2E3])
+ return false;
+ return true;
+ case CSKY::JBR16:
+ case CSKY::JBT16:
+ case CSKY::JBF16:
+ case CSKY::LRW16:
+ case CSKY::BR16:
+ return true;
+ }
+}
+
+bool CSKYAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
+ const MCFixup &Fixup,
+ const MCValue &Target) {
+ if (Fixup.getKind() >= FirstLiteralRelocationKind)
+ return true;
+ switch (Fixup.getTargetKind()) {
+ default:
+ break;
+ case CSKY::fixup_csky_got32:
+ case CSKY::fixup_csky_got_imm18_scale4:
+ case CSKY::fixup_csky_gotoff:
+ case CSKY::fixup_csky_gotpc:
+ case CSKY::fixup_csky_plt32:
+ case CSKY::fixup_csky_plt_imm18_scale4:
+ case CSKY::fixup_csky_doffset_imm18:
+ case CSKY::fixup_csky_doffset_imm18_scale2:
+ case CSKY::fixup_csky_doffset_imm18_scale4:
+ return true;
+ }
+
+ return false;
+}
+
bool CSKYAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const {
@@ -174,23 +291,62 @@ bool CSKYAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
void CSKYAsmBackend::relaxInstruction(MCInst &Inst,
const MCSubtargetInfo &STI) const {
- llvm_unreachable("CSKYAsmBackend::relaxInstruction() unimplemented");
-}
+ MCInst Res;
-bool CSKYAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
- const MCSubtargetInfo *STI) const {
- if (Count % 2)
- return false;
+ switch (Inst.getOpcode()) {
+ default:
+ LLVM_DEBUG(Inst.dump());
+ llvm_unreachable("Opcode not expected!");
+ case CSKY::LRW16:
+ Res.setOpcode(CSKY::LRW32);
+ Res.addOperand(Inst.getOperand(0));
+ Res.addOperand(Inst.getOperand(1));
+ break;
+ case CSKY::BR16:
+ Res.setOpcode(CSKY::BR32);
+ Res.addOperand(Inst.getOperand(0));
+ break;
+ case CSKY::JBSR32:
+ Res.setOpcode(CSKY::JSRI32);
+ Res.addOperand(Inst.getOperand(1));
+ break;
+ case CSKY::JBR32:
+ Res.setOpcode(CSKY::JMPI32);
+ Res.addOperand(Inst.getOperand(1));
+ break;
+ case CSKY::JBT32:
+ case CSKY::JBF32:
+ Res.setOpcode(Inst.getOpcode() == CSKY::JBT32 ? CSKY::JBT_E : CSKY::JBF_E);
+ Res.addOperand(Inst.getOperand(0));
+ Res.addOperand(Inst.getOperand(1));
+ Res.addOperand(Inst.getOperand(2));
+ break;
+ case CSKY::JBR16:
+ Res.setOpcode(CSKY::JBR32);
+ Res.addOperand(Inst.getOperand(0));
+ Res.addOperand(Inst.getOperand(1));
+ break;
+ case CSKY::JBT16:
+ case CSKY::JBF16:
+ // ck801
+ unsigned opcode;
+ if (STI.getFeatureBits()[CSKY::HasE2])
+ opcode = Inst.getOpcode() == CSKY::JBT16 ? CSKY::JBT32 : CSKY::JBF32;
+ else
+ opcode = Inst.getOpcode() == CSKY::JBT16 ? CSKY::JBT_E : CSKY::JBF_E;
- // MOV32 r0, r0
- while (Count >= 4) {
- OS.write("\xc4\x00\x48\x20", 4);
- Count -= 4;
+ Res.setOpcode(opcode);
+ Res.addOperand(Inst.getOperand(0));
+ Res.addOperand(Inst.getOperand(1));
+ Res.addOperand(Inst.getOperand(2));
+ break;
}
- // MOV16 r0, r0
- if (Count)
- OS.write("\x6c\x03", 2);
+ Inst = std::move(Res);
+}
+bool CSKYAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const {
+ OS.write_zeros(Count);
return true;
}
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h
index e710954e9df8..09b3ce6cc82b 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h
@@ -11,6 +11,7 @@
#include "MCTargetDesc/CSKYFixupKinds.h"
#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCTargetOptions.h"
namespace llvm {
@@ -39,9 +40,21 @@ public:
void relaxInstruction(MCInst &Inst,
const MCSubtargetInfo &STI) const override;
+ bool mayNeedRelaxation(const MCInst &Inst,
+ const MCSubtargetInfo &STI) const override;
+
+ bool fixupNeedsRelaxationAdvanced(const MCFixup &Fixup, bool Resolved,
+ uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout,
+ const bool WasForced) const override;
+
bool writeNopData(raw_ostream &OS, uint64_t Count,
const MCSubtargetInfo *STI) const override;
+ bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target) override;
+
std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const override;
};
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFObjectWriter.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFObjectWriter.cpp
index 163632632290..d7cc4c8525ee 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFObjectWriter.cpp
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFObjectWriter.cpp
@@ -6,6 +6,8 @@
//
//===----------------------------------------------------------------------===//
+#include "CSKYFixupKinds.h"
+#include "CSKYMCExpr.h"
#include "CSKYMCTargetDesc.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFObjectWriter.h"
@@ -33,10 +35,112 @@ unsigned CSKYELFObjectWriter::getRelocType(MCContext &Ctx,
const MCValue &Target,
const MCFixup &Fixup,
bool IsPCRel) const {
- // Determine the type of the relocation.
- switch ((unsigned)Fixup.getKind()) {
+ const MCExpr *Expr = Fixup.getValue();
+ // Determine the type of the relocation
+ unsigned Kind = Fixup.getTargetKind();
+ MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant();
+
+ if (IsPCRel) {
+ switch (Kind) {
+ default:
+ LLVM_DEBUG(dbgs() << "Unknown Kind1 = " << Kind);
+ Ctx.reportError(Fixup.getLoc(), "Unsupported relocation type");
+ return ELF::R_CKCORE_NONE;
+ case FK_Data_4:
+ case FK_PCRel_4:
+ return ELF::R_CKCORE_PCREL32;
+ case CSKY::fixup_csky_pcrel_uimm16_scale4:
+ return ELF::R_CKCORE_PCREL_IMM16_4;
+ case CSKY::fixup_csky_pcrel_uimm8_scale4:
+ return ELF::R_CKCORE_PCREL_IMM8_4;
+ case CSKY::fixup_csky_pcrel_imm26_scale2:
+ return ELF::R_CKCORE_PCREL_IMM26_2;
+ case CSKY::fixup_csky_pcrel_imm18_scale2:
+ return ELF::R_CKCORE_PCREL_IMM18_2;
+ case CSKY::fixup_csky_pcrel_imm16_scale2:
+ return ELF::R_CKCORE_PCREL_IMM16_2;
+ case CSKY::fixup_csky_pcrel_imm10_scale2:
+ return ELF::R_CKCORE_PCREL_IMM10_2;
+ case CSKY::fixup_csky_pcrel_uimm7_scale4:
+ return ELF::R_CKCORE_PCREL_IMM7_4;
+ }
+ }
+
+ switch (Kind) {
default:
- llvm_unreachable("invalid fixup kind!");
+ LLVM_DEBUG(dbgs() << "Unknown Kind2 = " << Kind);
+ Ctx.reportError(Fixup.getLoc(), "Unsupported relocation type");
+ return ELF::R_CKCORE_NONE;
+ case FK_Data_1:
+ Ctx.reportError(Fixup.getLoc(), "1-byte data relocations not supported");
+ return ELF::R_CKCORE_NONE;
+ case FK_Data_2:
+ Ctx.reportError(Fixup.getLoc(), "2-byte data relocations not supported");
+ return ELF::R_CKCORE_NONE;
+ case FK_Data_4:
+ if (Expr->getKind() == MCExpr::Target) {
+ auto TK = cast<CSKYMCExpr>(Expr)->getKind();
+ if (TK == CSKYMCExpr::VK_CSKY_ADDR)
+ return ELF::R_CKCORE_ADDR32;
+ if (TK == CSKYMCExpr::VK_CSKY_GOT)
+ return ELF::R_CKCORE_GOT32;
+ if (TK == CSKYMCExpr::VK_CSKY_GOTOFF)
+ return ELF::R_CKCORE_GOTOFF;
+ if (TK == CSKYMCExpr::VK_CSKY_PLT)
+ return ELF::R_CKCORE_PLT32;
+ if (TK == CSKYMCExpr::VK_CSKY_TLSIE)
+ return ELF::R_CKCORE_TLS_IE32;
+ if (TK == CSKYMCExpr::VK_CSKY_TLSLE)
+ return ELF::R_CKCORE_TLS_LE32;
+ if (TK == CSKYMCExpr::VK_CSKY_TLSGD)
+ return ELF::R_CKCORE_TLS_GD32;
+ if (TK == CSKYMCExpr::VK_CSKY_TLSLDM)
+ return ELF::R_CKCORE_TLS_LDM32;
+ if (TK == CSKYMCExpr::VK_CSKY_TLSLDO)
+ return ELF::R_CKCORE_TLS_LDO32;
+ if (TK == CSKYMCExpr::VK_CSKY_GOTPC)
+ return ELF::R_CKCORE_GOTPC;
+ if (TK == CSKYMCExpr::VK_CSKY_None)
+ return ELF::R_CKCORE_ADDR32;
+
+ LLVM_DEBUG(dbgs() << "Unknown FK_Data_4 TK = " << TK);
+ Ctx.reportError(Fixup.getLoc(), "unknown target FK_Data_4");
+ } else {
+ switch (Modifier) {
+ default:
+ Ctx.reportError(Fixup.getLoc(),
+ "invalid fixup for 4-byte data relocation");
+ return ELF::R_CKCORE_NONE;
+ case MCSymbolRefExpr::VK_GOT:
+ return ELF::R_CKCORE_GOT32;
+ case MCSymbolRefExpr::VK_GOTOFF:
+ return ELF::R_CKCORE_GOTOFF;
+ case MCSymbolRefExpr::VK_PLT:
+ return ELF::R_CKCORE_PLT32;
+ case MCSymbolRefExpr::VK_None:
+ return ELF::R_CKCORE_ADDR32;
+ }
+ }
+ return ELF::R_CKCORE_NONE;
+ case FK_Data_8:
+ Ctx.reportError(Fixup.getLoc(), "8-byte data relocations not supported");
+ return ELF::R_CKCORE_NONE;
+ case CSKY::fixup_csky_addr32:
+ return ELF::R_CKCORE_ADDR32;
+ case CSKY::fixup_csky_addr_hi16:
+ return ELF::R_CKCORE_ADDR_HI16;
+ case CSKY::fixup_csky_addr_lo16:
+ return ELF::R_CKCORE_ADDR_LO16;
+ case CSKY::fixup_csky_doffset_imm18:
+ return ELF::R_CKCORE_DOFFSET_IMM18;
+ case CSKY::fixup_csky_doffset_imm18_scale2:
+ return ELF::R_CKCORE_DOFFSET_IMM18_2;
+ case CSKY::fixup_csky_doffset_imm18_scale4:
+ return ELF::R_CKCORE_DOFFSET_IMM18_4;
+ case CSKY::fixup_csky_got_imm18_scale4:
+ return ELF::R_CKCORE_GOT_IMM18_4;
+ case CSKY::fixup_csky_plt_imm18_scale4:
+ return ELF::R_CKCORE_PLT_IMM18_4;
}
}
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFStreamer.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFStreamer.cpp
new file mode 100644
index 000000000000..90775c1b70f2
--- /dev/null
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFStreamer.cpp
@@ -0,0 +1,335 @@
+//===-- CSKYELFStreamer.cpp - CSKY ELF Target Streamer Methods ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides CSKY specific target streamer methods.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CSKYELFStreamer.h"
+#include "CSKYMCTargetDesc.h"
+#include "MCTargetDesc/CSKYAsmBackend.h"
+#include "MCTargetDesc/CSKYBaseInfo.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/Support/CSKYAttributes.h"
+#include "llvm/Support/CSKYTargetParser.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/LEB128.h"
+
+using namespace llvm;
+
+// This part is for ELF object output.
+CSKYTargetELFStreamer::CSKYTargetELFStreamer(MCStreamer &S,
+ const MCSubtargetInfo &STI)
+ : CSKYTargetStreamer(S), CurrentVendor("csky") {
+ MCAssembler &MCA = getStreamer().getAssembler();
+ const FeatureBitset &Features = STI.getFeatureBits();
+
+ unsigned EFlags = MCA.getELFHeaderEFlags();
+
+ EFlags |= ELF::EF_CSKY_ABIV2;
+
+ if (Features[CSKY::ProcCK801])
+ EFlags |= ELF::EF_CSKY_801;
+ else if (Features[CSKY::ProcCK802])
+ EFlags |= ELF::EF_CSKY_802;
+ else if (Features[CSKY::ProcCK803])
+ EFlags |= ELF::EF_CSKY_803;
+ else if (Features[CSKY::ProcCK804])
+ EFlags |= ELF::EF_CSKY_803;
+ else if (Features[CSKY::ProcCK805])
+ EFlags |= ELF::EF_CSKY_805;
+ else if (Features[CSKY::ProcCK807])
+ EFlags |= ELF::EF_CSKY_807;
+ else if (Features[CSKY::ProcCK810])
+ EFlags |= ELF::EF_CSKY_810;
+ else if (Features[CSKY::ProcCK860])
+ EFlags |= ELF::EF_CSKY_860;
+ else
+ EFlags |= ELF::EF_CSKY_810;
+
+ if (Features[CSKY::FeatureFPUV2_SF] || Features[CSKY::FeatureFPUV3_SF])
+ EFlags |= ELF::EF_CSKY_FLOAT;
+
+ EFlags |= ELF::EF_CSKY_EFV1;
+
+ MCA.setELFHeaderEFlags(EFlags);
+}
+
+MCELFStreamer &CSKYTargetELFStreamer::getStreamer() {
+ return static_cast<MCELFStreamer &>(Streamer);
+}
+
+void CSKYTargetELFStreamer::emitAttribute(unsigned Attribute, unsigned Value) {
+ setAttributeItem(Attribute, Value, /*OverwriteExisting=*/true);
+}
+
+void CSKYTargetELFStreamer::emitTextAttribute(unsigned Attribute,
+ StringRef String) {
+ setAttributeItem(Attribute, String, /*OverwriteExisting=*/true);
+}
+
+void CSKYTargetELFStreamer::finishAttributeSection() {
+ if (Contents.empty())
+ return;
+
+ if (AttributeSection) {
+ Streamer.switchSection(AttributeSection);
+ } else {
+ MCAssembler &MCA = getStreamer().getAssembler();
+ AttributeSection = MCA.getContext().getELFSection(
+ ".csky.attributes", ELF::SHT_CSKY_ATTRIBUTES, 0);
+ Streamer.switchSection(AttributeSection);
+ Streamer.emitInt8(ELFAttrs::Format_Version);
+ }
+
+ // Vendor size + Vendor name + '\0'
+ const size_t VendorHeaderSize = 4 + CurrentVendor.size() + 1;
+
+ // Tag + Tag Size
+ const size_t TagHeaderSize = 1 + 4;
+
+ const size_t ContentsSize = calculateContentSize();
+
+ Streamer.emitInt32(VendorHeaderSize + TagHeaderSize + ContentsSize);
+ Streamer.emitBytes(CurrentVendor);
+ Streamer.emitInt8(0); // '\0'
+
+ Streamer.emitInt8(ELFAttrs::File);
+ Streamer.emitInt32(TagHeaderSize + ContentsSize);
+
+ // Size should have been accounted for already, now
+ // emit each field as its type (ULEB or String).
+ for (AttributeItem item : Contents) {
+ Streamer.emitULEB128IntValue(item.Tag);
+ switch (item.Type) {
+ default:
+ llvm_unreachable("Invalid attribute type");
+ case AttributeType::Numeric:
+ Streamer.emitULEB128IntValue(item.IntValue);
+ break;
+ case AttributeType::Text:
+ Streamer.emitBytes(item.StringValue);
+ Streamer.emitInt8(0); // '\0'
+ break;
+ case AttributeType::NumericAndText:
+ Streamer.emitULEB128IntValue(item.IntValue);
+ Streamer.emitBytes(item.StringValue);
+ Streamer.emitInt8(0); // '\0'
+ break;
+ }
+ }
+
+ Contents.clear();
+}
+
+size_t CSKYTargetELFStreamer::calculateContentSize() const {
+ size_t Result = 0;
+ for (AttributeItem item : Contents) {
+ switch (item.Type) {
+ case AttributeType::Hidden:
+ break;
+ case AttributeType::Numeric:
+ Result += getULEB128Size(item.Tag);
+ Result += getULEB128Size(item.IntValue);
+ break;
+ case AttributeType::Text:
+ Result += getULEB128Size(item.Tag);
+ Result += item.StringValue.size() + 1; // string + '\0'
+ break;
+ case AttributeType::NumericAndText:
+ Result += getULEB128Size(item.Tag);
+ Result += getULEB128Size(item.IntValue);
+ Result += item.StringValue.size() + 1; // string + '\0';
+ break;
+ }
+ }
+ return Result;
+}
+
+void CSKYELFStreamer::EmitMappingSymbol(StringRef Name) {
+ if (Name == "$d" && State == EMS_Data)
+ return;
+ if (Name == "$t" && State == EMS_Text)
+ return;
+ if (Name == "$t" && State == EMS_None) {
+ State = EMS_Text;
+ return;
+ }
+
+ State = (Name == "$t" ? EMS_Text : EMS_Data);
+
+ auto *Symbol = cast<MCSymbolELF>(getContext().getOrCreateSymbol(
+ Name + "." + Twine(MappingSymbolCounter++)));
+ emitLabel(Symbol);
+
+ Symbol->setType(ELF::STT_NOTYPE);
+ Symbol->setBinding(ELF::STB_LOCAL);
+}
+
+void CSKYTargetELFStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) {
+ StringRef CPU = STI.getCPU();
+ CSKY::ArchKind ArchID = CSKY::parseCPUArch(CPU);
+
+ if (ArchID == CSKY::ArchKind::CK804)
+ ArchID = CSKY::ArchKind::CK803;
+
+ StringRef CPU_ARCH = CSKY::getArchName(ArchID);
+
+ if (ArchID == CSKY::ArchKind::INVALID) {
+ CPU = "ck810";
+ CPU_ARCH = "ck810";
+ }
+ emitTextAttribute(CSKYAttrs::CSKY_ARCH_NAME, CPU_ARCH);
+ emitTextAttribute(CSKYAttrs::CSKY_CPU_NAME, CPU);
+
+ unsigned ISAFlag = 0;
+ if (STI.hasFeature(CSKY::HasE1))
+ ISAFlag |= CSKYAttrs::V2_ISA_E1;
+
+ if (STI.hasFeature(CSKY::HasE2))
+ ISAFlag |= CSKYAttrs::V2_ISA_1E2;
+
+ if (STI.hasFeature(CSKY::Has2E3))
+ ISAFlag |= CSKYAttrs::V2_ISA_2E3;
+
+ if (STI.hasFeature(CSKY::HasMP))
+ ISAFlag |= CSKYAttrs::ISA_MP;
+
+ if (STI.hasFeature(CSKY::Has3E3r1))
+ ISAFlag |= CSKYAttrs::V2_ISA_3E3R1;
+
+ if (STI.hasFeature(CSKY::Has3r1E3r2))
+ ISAFlag |= CSKYAttrs::V2_ISA_3E3R2;
+
+ if (STI.hasFeature(CSKY::Has3r2E3r3))
+ ISAFlag |= CSKYAttrs::V2_ISA_3E3R3;
+
+ if (STI.hasFeature(CSKY::Has3E7))
+ ISAFlag |= CSKYAttrs::V2_ISA_3E7;
+
+ if (STI.hasFeature(CSKY::HasMP1E2))
+ ISAFlag |= CSKYAttrs::ISA_MP_1E2;
+
+ if (STI.hasFeature(CSKY::Has7E10))
+ ISAFlag |= CSKYAttrs::V2_ISA_7E10;
+
+ if (STI.hasFeature(CSKY::Has10E60))
+ ISAFlag |= CSKYAttrs::V2_ISA_10E60;
+
+ if (STI.hasFeature(CSKY::FeatureTrust))
+ ISAFlag |= CSKYAttrs::ISA_TRUST;
+
+ if (STI.hasFeature(CSKY::FeatureJAVA))
+ ISAFlag |= CSKYAttrs::ISA_JAVA;
+
+ if (STI.hasFeature(CSKY::FeatureCache))
+ ISAFlag |= CSKYAttrs::ISA_CACHE;
+
+ if (STI.hasFeature(CSKY::FeatureNVIC))
+ ISAFlag |= CSKYAttrs::ISA_NVIC;
+
+ if (STI.hasFeature(CSKY::FeatureDSP))
+ ISAFlag |= CSKYAttrs::ISA_DSP;
+
+ if (STI.hasFeature(CSKY::HasDSP1E2))
+ ISAFlag |= CSKYAttrs::ISA_DSP_1E2;
+
+ if (STI.hasFeature(CSKY::HasDSPE60))
+ ISAFlag |= CSKYAttrs::V2_ISA_DSPE60;
+
+ if (STI.hasFeature(CSKY::FeatureDSPV2))
+ ISAFlag |= CSKYAttrs::ISA_DSP_ENHANCE;
+
+ if (STI.hasFeature(CSKY::FeatureDSP_Silan))
+ ISAFlag |= CSKYAttrs::ISA_DSP_SILAN;
+
+ if (STI.hasFeature(CSKY::FeatureVDSPV1_128))
+ ISAFlag |= CSKYAttrs::ISA_VDSP;
+
+ if (STI.hasFeature(CSKY::FeatureVDSPV2))
+ ISAFlag |= CSKYAttrs::ISA_VDSP_2;
+
+ if (STI.hasFeature(CSKY::HasVDSP2E3))
+ ISAFlag |= CSKYAttrs::ISA_VDSP_2E3;
+
+ if (STI.hasFeature(CSKY::HasVDSP2E60F))
+ ISAFlag |= CSKYAttrs::ISA_VDSP_2E60F;
+
+ emitAttribute(CSKYAttrs::CSKY_ISA_FLAGS, ISAFlag);
+
+ unsigned ISAExtFlag = 0;
+ if (STI.hasFeature(CSKY::HasFLOATE1))
+ ISAExtFlag |= CSKYAttrs::ISA_FLOAT_E1;
+
+ if (STI.hasFeature(CSKY::HasFLOAT1E2))
+ ISAExtFlag |= CSKYAttrs::ISA_FLOAT_1E2;
+
+ if (STI.hasFeature(CSKY::HasFLOAT1E3))
+ ISAExtFlag |= CSKYAttrs::ISA_FLOAT_1E3;
+
+ if (STI.hasFeature(CSKY::HasFLOAT3E4))
+ ISAExtFlag |= CSKYAttrs::ISA_FLOAT_3E4;
+
+ if (STI.hasFeature(CSKY::HasFLOAT7E60))
+ ISAExtFlag |= CSKYAttrs::ISA_FLOAT_7E60;
+
+ emitAttribute(CSKYAttrs::CSKY_ISA_EXT_FLAGS, ISAExtFlag);
+
+ if (STI.hasFeature(CSKY::FeatureDSP))
+ emitAttribute(CSKYAttrs::CSKY_DSP_VERSION,
+ CSKYAttrs::DSP_VERSION_EXTENSION);
+ if (STI.hasFeature(CSKY::FeatureDSPV2))
+ emitAttribute(CSKYAttrs::CSKY_DSP_VERSION, CSKYAttrs::DSP_VERSION_2);
+
+ if (STI.hasFeature(CSKY::FeatureVDSPV2))
+ emitAttribute(CSKYAttrs::CSKY_VDSP_VERSION, CSKYAttrs::VDSP_VERSION_2);
+
+ if (STI.hasFeature(CSKY::FeatureFPUV2_SF) ||
+ STI.hasFeature(CSKY::FeatureFPUV2_DF))
+ emitAttribute(CSKYAttrs::CSKY_FPU_VERSION, CSKYAttrs::FPU_VERSION_2);
+ else if (STI.hasFeature(CSKY::FeatureFPUV3_HF) ||
+ STI.hasFeature(CSKY::FeatureFPUV3_SF) ||
+ STI.hasFeature(CSKY::FeatureFPUV3_DF))
+ emitAttribute(CSKYAttrs::CSKY_FPU_VERSION, CSKYAttrs::FPU_VERSION_3);
+
+ bool hasAnyFloatExt = STI.hasFeature(CSKY::FeatureFPUV2_SF) ||
+ STI.hasFeature(CSKY::FeatureFPUV2_DF) ||
+ STI.hasFeature(CSKY::FeatureFPUV3_HF) ||
+ STI.hasFeature(CSKY::FeatureFPUV3_SF) ||
+ STI.hasFeature(CSKY::FeatureFPUV3_DF);
+
+ if (hasAnyFloatExt && STI.hasFeature(CSKY::ModeHardFloat) &&
+ STI.hasFeature(CSKY::ModeHardFloatABI))
+ emitAttribute(CSKYAttrs::CSKY_FPU_ABI, CSKYAttrs::FPU_ABI_HARD);
+ else if (hasAnyFloatExt && STI.hasFeature(CSKY::ModeHardFloat))
+ emitAttribute(CSKYAttrs::CSKY_FPU_ABI, CSKYAttrs::FPU_ABI_SOFTFP);
+ else
+ emitAttribute(CSKYAttrs::CSKY_FPU_ABI, CSKYAttrs::FPU_ABI_SOFT);
+
+ unsigned HardFPFlag = 0;
+ if (STI.hasFeature(CSKY::FeatureFPUV3_HF))
+ HardFPFlag |= CSKYAttrs::FPU_HARDFP_HALF;
+ if (STI.hasFeature(CSKY::FeatureFPUV2_SF) ||
+ STI.hasFeature(CSKY::FeatureFPUV3_SF))
+ HardFPFlag |= CSKYAttrs::FPU_HARDFP_SINGLE;
+ if (STI.hasFeature(CSKY::FeatureFPUV2_DF) ||
+ STI.hasFeature(CSKY::FeatureFPUV3_DF))
+ HardFPFlag |= CSKYAttrs::FPU_HARDFP_DOUBLE;
+
+ if (HardFPFlag != 0) {
+ emitAttribute(CSKYAttrs::CSKY_FPU_DENORMAL, CSKYAttrs::NEEDED);
+ emitAttribute(CSKYAttrs::CSKY_FPU_EXCEPTION, CSKYAttrs::NEEDED);
+ emitTextAttribute(CSKYAttrs::CSKY_FPU_NUMBER_MODULE, "IEEE 754");
+ emitAttribute(CSKYAttrs::CSKY_FPU_HARDFP, HardFPFlag);
+ }
+}
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFStreamer.h b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFStreamer.h
new file mode 100644
index 000000000000..b7931e922279
--- /dev/null
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFStreamer.h
@@ -0,0 +1,148 @@
+//===-- CSKYELFStreamer.h - CSKY ELF Target Streamer -----------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_CSKY_CSKYELFSTREAMER_H
+#define LLVM_LIB_TARGET_CSKY_CSKYELFSTREAMER_H
+
+#include "CSKYTargetStreamer.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCObjectWriter.h"
+
+namespace llvm {
+
+class CSKYTargetELFStreamer : public CSKYTargetStreamer {
+private:
+ enum class AttributeType { Hidden, Numeric, Text, NumericAndText };
+
+ struct AttributeItem {
+ AttributeType Type;
+ unsigned Tag;
+ unsigned IntValue;
+ std::string StringValue;
+ };
+
+ StringRef CurrentVendor;
+ SmallVector<AttributeItem, 64> Contents;
+
+ MCSection *AttributeSection = nullptr;
+
+ AttributeItem *getAttributeItem(unsigned Attribute) {
+ for (size_t i = 0; i < Contents.size(); ++i)
+ if (Contents[i].Tag == Attribute)
+ return &Contents[i];
+ return nullptr;
+ }
+
+ void setAttributeItem(unsigned Attribute, unsigned Value,
+ bool OverwriteExisting) {
+ // Look for existing attribute item.
+ if (AttributeItem *Item = getAttributeItem(Attribute)) {
+ if (!OverwriteExisting)
+ return;
+ Item->Type = AttributeType::Numeric;
+ Item->IntValue = Value;
+ return;
+ }
+
+ // Create new attribute item.
+ Contents.push_back({AttributeType::Numeric, Attribute, Value, ""});
+ }
+
+ void setAttributeItem(unsigned Attribute, StringRef Value,
+ bool OverwriteExisting) {
+ // Look for existing attribute item.
+ if (AttributeItem *Item = getAttributeItem(Attribute)) {
+ if (!OverwriteExisting)
+ return;
+ Item->Type = AttributeType::Text;
+ Item->StringValue = std::string(Value);
+ return;
+ }
+
+ // Create new attribute item.
+ Contents.push_back({AttributeType::Text, Attribute, 0, std::string(Value)});
+ }
+
+ void setAttributeItems(unsigned Attribute, unsigned IntValue,
+ StringRef StringValue, bool OverwriteExisting) {
+ // Look for existing attribute item.
+ if (AttributeItem *Item = getAttributeItem(Attribute)) {
+ if (!OverwriteExisting)
+ return;
+ Item->Type = AttributeType::NumericAndText;
+ Item->IntValue = IntValue;
+ Item->StringValue = std::string(StringValue);
+ return;
+ }
+
+ // Create new attribute item.
+ Contents.push_back({AttributeType::NumericAndText, Attribute, IntValue,
+ std::string(StringValue)});
+ }
+
+ void emitAttribute(unsigned Attribute, unsigned Value) override;
+ void emitTextAttribute(unsigned Attribute, StringRef String) override;
+ void finishAttributeSection() override;
+ size_t calculateContentSize() const;
+
+ void emitTargetAttributes(const MCSubtargetInfo &STI) override;
+
+public:
+ MCELFStreamer &getStreamer();
+ CSKYTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI);
+};
+
+class CSKYELFStreamer : public MCELFStreamer {
+ int64_t MappingSymbolCounter = 0;
+
+ void EmitMappingSymbol(StringRef Name);
+
+public:
+ friend class CSKYTargetELFStreamer;
+
+ enum ElfMappingSymbol { EMS_None, EMS_Text, EMS_Data };
+
+ ElfMappingSymbol State;
+
+ CSKYELFStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> TAB,
+ std::unique_ptr<MCObjectWriter> OW,
+ std::unique_ptr<MCCodeEmitter> Emitter)
+ : MCELFStreamer(Context, std::move(TAB), std::move(OW),
+ std::move(Emitter)),
+ State(EMS_None) {}
+
+ ~CSKYELFStreamer() override = default;
+
+ void emitFill(const MCExpr &NumBytes, uint64_t FillValue,
+ SMLoc Loc) override {
+ EmitMappingSymbol("$d");
+ MCObjectStreamer::emitFill(NumBytes, FillValue, Loc);
+ }
+ void emitBytes(StringRef Data) override {
+ EmitMappingSymbol("$d");
+ MCELFStreamer::emitBytes(Data);
+ }
+ void emitInstruction(const MCInst &Inst,
+ const MCSubtargetInfo &STI) override {
+ EmitMappingSymbol("$t");
+ MCELFStreamer::emitInstruction(Inst, STI);
+ }
+ void emitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) override {
+ EmitMappingSymbol("$d");
+ MCELFStreamer::emitValueImpl(Value, Size, Loc);
+ }
+ void reset() override {
+ MappingSymbolCounter = 0;
+ State = EMS_None;
+ MCELFStreamer::reset();
+ }
+};
+
+} // namespace llvm
+#endif
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp
index 07757f03c258..3a0017d11e23 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp
@@ -9,16 +9,21 @@
// This class prints an CSKY MCInst to a .s file.
//
//===----------------------------------------------------------------------===//
-
#include "CSKYInstPrinter.h"
+#include "MCTargetDesc/CSKYBaseInfo.h"
+#include "MCTargetDesc/CSKYMCExpr.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
@@ -55,6 +60,14 @@ bool CSKYInstPrinter::applyTargetSpecificCLOption(StringRef Opt) {
ArchRegNames = true;
return true;
}
+ if (Opt == "debug") {
+ DebugFlag = true;
+ return true;
+ }
+ if (Opt == "abi-names") {
+ ABIRegNames = true;
+ return true;
+ }
return false;
}
@@ -70,7 +83,11 @@ void CSKYInstPrinter::printInst(const MCInst *MI, uint64_t Address,
}
void CSKYInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const {
- O << getRegisterName(RegNo);
+ if (PrintBranchImmAsAddress)
+ O << getRegisterName(RegNo, ABIRegNames ? CSKY::ABIRegAltName
+ : CSKY::NoRegAltName);
+ else
+ O << getRegisterName(RegNo);
}
void CSKYInstPrinter::printFPRRegName(raw_ostream &O, unsigned RegNo) const {
@@ -87,15 +104,38 @@ void CSKYInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
const MCOperand &MO = MI->getOperand(OpNo);
if (MO.isReg()) {
- if (MO.getReg() == CSKY::C)
- O << "";
+ unsigned Reg = MO.getReg();
+ bool useABIName = false;
+ if (PrintBranchImmAsAddress)
+ useABIName = ABIRegNames;
else
- printRegName(O, MO.getReg());
+ useABIName = !ArchRegNames;
+
+ if (Reg == CSKY::C)
+ O << "";
+ else if (STI.getFeatureBits()[CSKY::FeatureJAVA]) {
+ if (Reg == CSKY::R23)
+ O << (useABIName ? "fp" : "r23");
+ else if (Reg == CSKY::R24)
+ O << (useABIName ? "top" : "r24");
+ else if (Reg == CSKY::R25)
+ O << (useABIName ? "bsp" : "r25");
+ else
+ printRegName(O, Reg);
+ } else
+ printRegName(O, Reg);
+
return;
}
if (MO.isImm()) {
- O << formatImm(MO.getImm());
+ uint64_t TSFlags = MII.get(MI->getOpcode()).TSFlags;
+
+ if (((TSFlags & CSKYII::AddrModeMask) != CSKYII::AddrModeNone) &&
+ PrintBranchImmAsAddress)
+ O << formatHex(MO.getImm());
+ else
+ O << MO.getImm();
return;
}
@@ -157,6 +197,22 @@ void CSKYInstPrinter::printCSKYSymbolOperand(const MCInst *MI, uint64_t Address,
}
}
+void CSKYInstPrinter::printPSRFlag(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O) {
+ auto V = MI->getOperand(OpNo).getImm();
+
+ ListSeparator LS;
+
+ if ((V >> 3) & 0x1)
+ O << LS << "ee";
+ if ((V >> 2) & 0x1)
+ O << LS << "ie";
+ if ((V >> 1) & 0x1)
+ O << LS << "fe";
+ if ((V >> 0) & 0x1)
+ O << LS << "af";
+}
+
void CSKYInstPrinter::printRegisterSeq(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp
index 1d220b749cb1..540f901fd479 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp
@@ -16,6 +16,9 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/EndianStream.h"
using namespace llvm;
@@ -64,15 +67,170 @@ static void writeData(uint32_t Bin, unsigned Size, raw_ostream &OS) {
support::endian::write<uint16_t>(OS, LO16, support::little);
}
+void CSKYMCCodeEmitter::expandJBTF(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+
+ MCInst TmpInst;
+
+ uint32_t Binary;
+
+ TmpInst =
+ MCInstBuilder(MI.getOpcode() == CSKY::JBT_E ? CSKY::BF16 : CSKY::BT16)
+ .addOperand(MI.getOperand(0))
+ .addImm(6);
+ Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
+ writeData(Binary, 2, OS);
+
+ if (!STI.getFeatureBits()[CSKY::Has2E3])
+ TmpInst = MCInstBuilder(CSKY::BR32)
+ .addOperand(MI.getOperand(1))
+ .addOperand(MI.getOperand(2));
+ else
+ TmpInst = MCInstBuilder(CSKY::JMPI32).addOperand(MI.getOperand(2));
+ Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
+ Fixups[Fixups.size() - 1].setOffset(2);
+ writeData(Binary, 4, OS);
+}
+
+void CSKYMCCodeEmitter::expandNEG(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+
+ MCInst TmpInst;
+ uint32_t Binary;
+ unsigned Size = MI.getOpcode() == CSKY::NEG32 ? 4 : 2;
+
+ TmpInst = MCInstBuilder(Size == 4 ? CSKY::NOT32 : CSKY::NOT16)
+ .addOperand(MI.getOperand(0))
+ .addOperand(MI.getOperand(1));
+ Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
+ writeData(Binary, Size, OS);
+
+ TmpInst = MCInstBuilder(Size == 4 ? CSKY::ADDI32 : CSKY::ADDI16)
+ .addOperand(MI.getOperand(0))
+ .addOperand(MI.getOperand(0))
+ .addImm(1);
+ Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
+ writeData(Binary, Size, OS);
+}
+
+void CSKYMCCodeEmitter::expandRSUBI(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+
+ MCInst TmpInst;
+ uint32_t Binary;
+ unsigned Size = MI.getOpcode() == CSKY::RSUBI32 ? 4 : 2;
+
+ TmpInst = MCInstBuilder(Size == 4 ? CSKY::NOT32 : CSKY::NOT16)
+ .addOperand(MI.getOperand(0))
+ .addOperand(MI.getOperand(1));
+ Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
+ writeData(Binary, Size, OS);
+
+ TmpInst = MCInstBuilder(Size == 4 ? CSKY::ADDI32 : CSKY::ADDI16)
+ .addOperand(MI.getOperand(0))
+ .addOperand(MI.getOperand(0))
+ .addImm(MI.getOperand(2).getImm() + 1);
+ Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
+ writeData(Binary, Size, OS);
+}
+
void CSKYMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
const MCInstrDesc &Desc = MII.get(MI.getOpcode());
unsigned Size = Desc.getSize();
+ MCInst TmpInst;
+
+ switch (MI.getOpcode()) {
+ default:
+ TmpInst = MI;
+ break;
+ case CSKY::JBT_E:
+ case CSKY::JBF_E:
+ expandJBTF(MI, OS, Fixups, STI);
+ MCNumEmitted += 2;
+ return;
+ case CSKY::NEG32:
+ case CSKY::NEG16:
+ expandNEG(MI, OS, Fixups, STI);
+ MCNumEmitted += 2;
+ return;
+ case CSKY::RSUBI32:
+ case CSKY::RSUBI16:
+ expandRSUBI(MI, OS, Fixups, STI);
+ MCNumEmitted += 2;
+ return;
+ case CSKY::JBSR32:
+ TmpInst = MCInstBuilder(CSKY::BSR32).addOperand(MI.getOperand(0));
+ break;
+ case CSKY::JBR16:
+ TmpInst = MCInstBuilder(CSKY::BR16).addOperand(MI.getOperand(0));
+ break;
+ case CSKY::JBR32:
+ TmpInst = MCInstBuilder(CSKY::BR32).addOperand(MI.getOperand(0));
+ break;
+ case CSKY::JBT16:
+ TmpInst = MCInstBuilder(CSKY::BT16)
+ .addOperand(MI.getOperand(0))
+ .addOperand(MI.getOperand(1));
+ break;
+ case CSKY::JBT32:
+ TmpInst = MCInstBuilder(CSKY::BT32)
+ .addOperand(MI.getOperand(0))
+ .addOperand(MI.getOperand(1));
+ break;
+ case CSKY::JBF16:
+ TmpInst = MCInstBuilder(CSKY::BF16)
+ .addOperand(MI.getOperand(0))
+ .addOperand(MI.getOperand(1));
+ break;
+ case CSKY::JBF32:
+ TmpInst = MCInstBuilder(CSKY::BF32)
+ .addOperand(MI.getOperand(0))
+ .addOperand(MI.getOperand(1));
+ break;
+ case CSKY::LRW32_Gen:
+ TmpInst = MCInstBuilder(CSKY::LRW32)
+ .addOperand(MI.getOperand(0))
+ .addOperand(MI.getOperand(2));
+ break;
+ case CSKY::LRW16_Gen:
+ TmpInst = MCInstBuilder(CSKY::LRW16)
+ .addOperand(MI.getOperand(0))
+ .addOperand(MI.getOperand(2));
+ break;
+ case CSKY::CMPLEI32:
+ TmpInst = MCInstBuilder(CSKY::CMPLTI32)
+ .addOperand(MI.getOperand(0))
+ .addOperand(MI.getOperand(1))
+ .addImm(MI.getOperand(2).getImm() + 1);
+ break;
+ case CSKY::CMPLEI16:
+ TmpInst = MCInstBuilder(CSKY::CMPLTI16)
+ .addOperand(MI.getOperand(0))
+ .addOperand(MI.getOperand(1))
+ .addImm(MI.getOperand(2).getImm() + 1);
+ break;
+ case CSKY::ROTRI32:
+ TmpInst = MCInstBuilder(CSKY::ROTLI32)
+ .addOperand(MI.getOperand(0))
+ .addOperand(MI.getOperand(1))
+ .addImm(32 - MI.getOperand(2).getImm());
+ break;
+ case CSKY::BGENI:
+ auto V = 1 << MI.getOperand(1).getImm();
+ TmpInst =
+ MCInstBuilder(CSKY::MOVI32).addOperand(MI.getOperand(0)).addImm(V);
+ break;
+ }
+
++MCNumEmitted;
- uint32_t Bin = getBinaryCodeForInstr(MI, Fixups, STI);
+ uint32_t Bin = getBinaryCodeForInstr(TmpInst, Fixups, STI);
uint16_t LO16 = static_cast<uint16_t>(Bin);
uint16_t HI16 = static_cast<uint16_t>(Bin >> 16);
@@ -170,7 +328,6 @@ MCFixupKind CSKYMCCodeEmitter::getTargetFixup(const MCExpr *Expr) const {
}
MCCodeEmitter *llvm::createCSKYMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx) {
return new CSKYMCCodeEmitter(Ctx, MCII);
}
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.h b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.h
index bfba07bcb32a..128430197cc5 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.h
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.h
@@ -20,6 +20,8 @@
namespace llvm {
+class MCInstrInfo;
+
class CSKYMCCodeEmitter : public MCCodeEmitter {
MCContext &Ctx;
const MCInstrInfo &MII;
@@ -169,6 +171,16 @@ public:
Fixups.push_back(MCFixup::create(0, MO.getExpr(), Kind, MI.getLoc()));
return 0;
}
+
+ void expandJBTF(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ void expandNEG(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ void expandRSUBI(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
};
} // namespace llvm
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCExpr.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCExpr.cpp
index 7987613b0608..b9989822dc36 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCExpr.cpp
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCExpr.cpp
@@ -8,10 +8,12 @@
#include "CSKYMCExpr.h"
#include "CSKYFixupKinds.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/Support/Casting.h"
using namespace llvm;
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp
index 0901c0993607..1a69dc8acde0 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp
@@ -12,10 +12,14 @@
#include "CSKYMCTargetDesc.h"
#include "CSKYAsmBackend.h"
+#include "CSKYELFStreamer.h"
#include "CSKYInstPrinter.h"
#include "CSKYMCAsmInfo.h"
#include "CSKYMCCodeEmitter.h"
+#include "CSKYTargetStreamer.h"
#include "TargetInfo/CSKYTargetInfo.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
@@ -72,6 +76,81 @@ static MCSubtargetInfo *createCSKYMCSubtargetInfo(const Triple &TT,
return createCSKYMCSubtargetInfoImpl(TT, CPUName, /*TuneCPU=*/CPUName, FS);
}
+static MCTargetStreamer *
+createCSKYObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
+ const Triple &TT = STI.getTargetTriple();
+ if (TT.isOSBinFormatELF())
+ return new CSKYTargetELFStreamer(S, STI);
+ return nullptr;
+}
+
+static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx,
+ std::unique_ptr<MCAsmBackend> &&MAB,
+ std::unique_ptr<MCObjectWriter> &&OW,
+ std::unique_ptr<MCCodeEmitter> &&Emitter,
+ bool RelaxAll) {
+ CSKYELFStreamer *S = new CSKYELFStreamer(Ctx, std::move(MAB), std::move(OW),
+ std::move(Emitter));
+
+ if (RelaxAll)
+ S->getAssembler().setRelaxAll(true);
+ return S;
+}
+
+static MCTargetStreamer *createCSKYAsmTargetStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS,
+ MCInstPrinter *InstPrinter,
+ bool isVerboseAsm) {
+ return new CSKYTargetAsmStreamer(S, OS);
+}
+
+static MCTargetStreamer *createCSKYNullTargetStreamer(MCStreamer &S) {
+ return new CSKYTargetStreamer(S);
+}
+
+namespace {
+
+class CSKYMCInstrAnalysis : public MCInstrAnalysis {
+public:
+ explicit CSKYMCInstrAnalysis(const MCInstrInfo *Info)
+ : MCInstrAnalysis(Info) {}
+
+ bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
+ uint64_t &Target) const override {
+ if (isConditionalBranch(Inst) || isUnconditionalBranch(Inst)) {
+ int64_t Imm;
+ Imm = Inst.getOperand(Inst.getNumOperands() - 1).getImm();
+ Target = Addr + Imm;
+ return true;
+ }
+
+ if (Inst.getOpcode() == CSKY::BSR32) {
+ Target = Addr + Inst.getOperand(0).getImm();
+ return true;
+ }
+
+ switch (Inst.getOpcode()) {
+ default:
+ return false;
+ case CSKY::LRW16:
+ case CSKY::LRW32:
+ case CSKY::JSRI32:
+ case CSKY::JMPI32:
+ int64_t Imm = Inst.getOperand(Inst.getNumOperands() - 1).getImm();
+ Target = ((Addr + Imm) & 0xFFFFFFFC);
+ return true;
+ }
+
+ return false;
+ }
+};
+
+} // end anonymous namespace
+
+static MCInstrAnalysis *createCSKYInstrAnalysis(const MCInstrInfo *Info) {
+ return new CSKYMCInstrAnalysis(Info);
+}
+
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeCSKYTargetMC() {
auto &CSKYTarget = getTheCSKYTarget();
TargetRegistry::RegisterMCAsmBackend(CSKYTarget, createCSKYAsmBackend);
@@ -82,4 +161,13 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeCSKYTargetMC() {
TargetRegistry::RegisterMCInstPrinter(CSKYTarget, createCSKYMCInstPrinter);
TargetRegistry::RegisterMCSubtargetInfo(CSKYTarget,
createCSKYMCSubtargetInfo);
+ TargetRegistry::RegisterELFStreamer(CSKYTarget, createELFStreamer);
+ TargetRegistry::RegisterObjectTargetStreamer(CSKYTarget,
+ createCSKYObjectTargetStreamer);
+ TargetRegistry::RegisterAsmTargetStreamer(CSKYTarget,
+ createCSKYAsmTargetStreamer);
+ // Register the null target streamer.
+ TargetRegistry::RegisterNullTargetStreamer(CSKYTarget,
+ createCSKYNullTargetStreamer);
+ TargetRegistry::RegisterMCInstrAnalysis(CSKYTarget, createCSKYInstrAnalysis);
}
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.h b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.h
index 25bbd635fc58..4b8c45e95b74 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.h
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.h
@@ -34,9 +34,7 @@ MCAsmBackend *createCSKYAsmBackend(const Target &T, const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
const MCTargetOptions &Options);
-MCCodeEmitter *createCSKYMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
- MCContext &Ctx);
+MCCodeEmitter *createCSKYMCCodeEmitter(const MCInstrInfo &MCII, MCContext &Ctx);
} // namespace llvm
#define GET_REGINFO_ENUM
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYTargetStreamer.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYTargetStreamer.cpp
new file mode 100644
index 000000000000..dd7053d60aa1
--- /dev/null
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYTargetStreamer.cpp
@@ -0,0 +1,143 @@
+//===-- CSKYTargetStreamer.h - CSKY Target Streamer ----------*- C++ -*----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "CSKYTargetStreamer.h"
+#include "CSKYSubtarget.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Support/FormattedStream.h"
+
+using namespace llvm;
+
+//
+// ConstantPool implementation
+//
+// Emit the contents of the constant pool using the provided streamer.
+void CSKYConstantPool::emitAll(MCStreamer &Streamer) {
+ if (Entries.empty())
+ return;
+
+ if (CurrentSection != nullptr)
+ Streamer.switchSection(CurrentSection);
+
+ Streamer.emitDataRegion(MCDR_DataRegion);
+ for (const ConstantPoolEntry &Entry : Entries) {
+ Streamer.emitCodeAlignment(
+ Entry.Size,
+ Streamer.getContext().getSubtargetInfo()); // align naturally
+ Streamer.emitLabel(Entry.Label);
+ Streamer.emitValue(Entry.Value, Entry.Size, Entry.Loc);
+ }
+ Streamer.emitDataRegion(MCDR_DataRegionEnd);
+ Entries.clear();
+}
+
+const MCExpr *CSKYConstantPool::addEntry(MCStreamer &Streamer,
+ const MCExpr *Value, unsigned Size,
+ SMLoc Loc, const MCExpr *AdjustExpr) {
+ if (CurrentSection == nullptr)
+ CurrentSection = Streamer.getCurrentSectionOnly();
+
+ auto &Context = Streamer.getContext();
+
+ const MCConstantExpr *C = dyn_cast<MCConstantExpr>(Value);
+
+ // Check if there is existing entry for the same constant. If so, reuse it.
+ auto Itr = C ? CachedEntries.find(C->getValue()) : CachedEntries.end();
+ if (Itr != CachedEntries.end())
+ return Itr->second;
+
+ MCSymbol *CPEntryLabel = Context.createTempSymbol();
+ const auto SymRef = MCSymbolRefExpr::create(CPEntryLabel, Context);
+
+ if (AdjustExpr) {
+ const CSKYMCExpr *CSKYExpr = cast<CSKYMCExpr>(Value);
+
+ Value = MCBinaryExpr::createSub(AdjustExpr, SymRef, Context);
+ Value = MCBinaryExpr::createSub(CSKYExpr->getSubExpr(), Value, Context);
+ Value = CSKYMCExpr::create(Value, CSKYExpr->getKind(), Context);
+ }
+
+ Entries.push_back(ConstantPoolEntry(CPEntryLabel, Value, Size, Loc));
+
+ if (C)
+ CachedEntries[C->getValue()] = SymRef;
+ return SymRef;
+}
+
+bool CSKYConstantPool::empty() { return Entries.empty(); }
+
+void CSKYConstantPool::clearCache() {
+ CurrentSection = nullptr;
+ CachedEntries.clear();
+}
+
+CSKYTargetStreamer::CSKYTargetStreamer(MCStreamer &S)
+ : MCTargetStreamer(S), ConstantPool(new CSKYConstantPool()) {}
+
+const MCExpr *
+CSKYTargetStreamer::addConstantPoolEntry(const MCExpr *Expr, SMLoc Loc,
+ const MCExpr *AdjustExpr) {
+ auto ELFRefKind = CSKYMCExpr::VK_CSKY_Invalid;
+ ConstantCounter++;
+
+ const MCExpr *OrigExpr = Expr;
+
+ if (const CSKYMCExpr *CE = dyn_cast<CSKYMCExpr>(Expr)) {
+ Expr = CE->getSubExpr();
+ ELFRefKind = CE->getKind();
+ }
+
+ if (const MCSymbolRefExpr *SymExpr = dyn_cast<MCSymbolRefExpr>(Expr)) {
+ const MCSymbol *Sym = &SymExpr->getSymbol();
+
+ SymbolIndex Index = {Sym, ELFRefKind};
+
+ if (ConstantMap.find(Index) == ConstantMap.end()) {
+ ConstantMap[Index] =
+ ConstantPool->addEntry(getStreamer(), OrigExpr, 4, Loc, AdjustExpr);
+ }
+ return ConstantMap[Index];
+ }
+
+ return ConstantPool->addEntry(getStreamer(), Expr, 4, Loc, AdjustExpr);
+}
+
+void CSKYTargetStreamer::emitCurrentConstantPool() {
+ ConstantPool->emitAll(Streamer);
+ ConstantPool->clearCache();
+}
+
+// finish() - write out any non-empty assembler constant pools.
+void CSKYTargetStreamer::finish() {
+ if (ConstantCounter != 0) {
+ ConstantPool->emitAll(Streamer);
+ }
+
+ finishAttributeSection();
+}
+
+void CSKYTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) {}
+
+void CSKYTargetStreamer::emitAttribute(unsigned Attribute, unsigned Value) {}
+void CSKYTargetStreamer::emitTextAttribute(unsigned Attribute,
+ StringRef String) {}
+void CSKYTargetStreamer::finishAttributeSection() {}
+
+void CSKYTargetAsmStreamer::emitAttribute(unsigned Attribute, unsigned Value) {
+ OS << "\t.csky_attribute\t" << Attribute << ", " << Twine(Value) << "\n";
+}
+
+void CSKYTargetAsmStreamer::emitTextAttribute(unsigned Attribute,
+ StringRef String) {
+ OS << "\t.csky_attribute\t" << Attribute << ", \"" << String << "\"\n";
+}
+
+void CSKYTargetAsmStreamer::finishAttributeSection() {}
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYTargetStreamer.h b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYTargetStreamer.h
new file mode 100644
index 000000000000..270d48d5939c
--- /dev/null
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYTargetStreamer.h
@@ -0,0 +1,110 @@
+//===-- CSKYTargetStreamer.h - CSKY Target Streamer ----------*- C++ -*----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_CSKY_CSKYTARGETSTREAMER_H
+#define LLVM_LIB_TARGET_CSKY_CSKYTARGETSTREAMER_H
+
+#include "MCTargetDesc/CSKYMCExpr.h"
+#include "llvm/MC/ConstantPools.h"
+#include "llvm/MC/MCStreamer.h"
+
+namespace llvm {
+
+class CSKYConstantPool {
+ using EntryVecTy = SmallVector<ConstantPoolEntry, 4>;
+ EntryVecTy Entries;
+ std::map<int64_t, const MCSymbolRefExpr *> CachedEntries;
+
+ MCSection *CurrentSection = nullptr;
+
+public:
+ // Initialize a new empty constant pool
+ CSKYConstantPool() = default;
+
+ // Add a new entry to the constant pool in the next slot.
+ // \param Value is the new entry to put in the constant pool.
+ // \param Size is the size in bytes of the entry
+ //
+ // \returns a MCExpr that references the newly inserted value
+ const MCExpr *addEntry(MCStreamer &Streamer, const MCExpr *Value,
+ unsigned Size, SMLoc Loc, const MCExpr *AdjustExpr);
+
+ void emitAll(MCStreamer &Streamer);
+
+ // Return true if the constant pool is empty
+ bool empty();
+
+ void clearCache();
+};
+
+class CSKYTargetStreamer : public MCTargetStreamer {
+public:
+ typedef struct {
+ const MCSymbol *sym;
+ CSKYMCExpr::VariantKind kind;
+ } SymbolIndex;
+
+protected:
+ std::unique_ptr<CSKYConstantPool> ConstantPool;
+
+ DenseMap<SymbolIndex, const MCExpr *> ConstantMap;
+
+ unsigned ConstantCounter = 0;
+
+public:
+ CSKYTargetStreamer(MCStreamer &S);
+
+ virtual void emitTextAttribute(unsigned Attribute, StringRef String);
+ virtual void emitAttribute(unsigned Attribute, unsigned Value);
+ virtual void finishAttributeSection();
+
+ virtual void emitTargetAttributes(const MCSubtargetInfo &STI);
+ /// Add a new entry to the constant pool for the current section and return an
+ /// MCExpr that can be used to refer to the constant pool location.
+ const MCExpr *addConstantPoolEntry(const MCExpr *, SMLoc Loc,
+ const MCExpr *AdjustExpr = nullptr);
+
+ void emitCurrentConstantPool();
+
+ void finish() override;
+};
+
+template <> struct DenseMapInfo<CSKYTargetStreamer::SymbolIndex> {
+ static inline CSKYTargetStreamer::SymbolIndex getEmptyKey() {
+ return {nullptr, CSKYMCExpr::VK_CSKY_Invalid};
+ }
+ static inline CSKYTargetStreamer::SymbolIndex getTombstoneKey() {
+ return {nullptr, CSKYMCExpr::VK_CSKY_Invalid};
+ }
+ static unsigned getHashValue(const CSKYTargetStreamer::SymbolIndex &V) {
+ return hash_combine(DenseMapInfo<const MCSymbol *>::getHashValue(V.sym),
+ DenseMapInfo<int>::getHashValue(V.kind));
+ }
+ static bool isEqual(const CSKYTargetStreamer::SymbolIndex &A,
+ const CSKYTargetStreamer::SymbolIndex &B) {
+ return A.sym == B.sym && A.kind == B.kind;
+ }
+};
+
+class formatted_raw_ostream;
+
+class CSKYTargetAsmStreamer : public CSKYTargetStreamer {
+ formatted_raw_ostream &OS;
+
+ void emitAttribute(unsigned Attribute, unsigned Value) override;
+ void emitTextAttribute(unsigned Attribute, StringRef String) override;
+ void finishAttributeSection() override;
+
+public:
+ CSKYTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS)
+ : CSKYTargetStreamer(S), OS(OS) {}
+};
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_CSKY_CSKYTARGETSTREAMER_H
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
new file mode 100644
index 000000000000..4d6e1a9d3166
--- /dev/null
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -0,0 +1,144 @@
+//- DXIL.td - Describe DXIL operation -------------------------*- tablegen -*-//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This is a target description file for DXIL operation.
+///
+//===----------------------------------------------------------------------===//
+
+include "llvm/IR/Intrinsics.td"
+
+class dxil_class<string _name> {
+ string name = _name;
+}
+class dxil_category<string _name> {
+ string name = _name;
+}
+
+def Unary : dxil_class<"Unary">;
+def Binary : dxil_class<"Binary">;
+def FlattenedThreadIdInGroupClass : dxil_class<"FlattenedThreadIdInGroup">;
+def ThreadIdInGroupClass : dxil_class<"ThreadIdInGroup">;
+def ThreadIdClass : dxil_class<"ThreadId">;
+def GroupIdClass : dxil_class<"GroupId">;
+
+def binary_uint : dxil_category<"Binary uint">;
+def unary_float : dxil_category<"Unary float">;
+def ComputeID : dxil_category<"Compute/Mesh/Amplification shader">;
+
+
+// The parameter description for a DXIL instruction
+class dxil_param<int _pos, string type, string _name, string _doc,
+ bit _is_const = 0, string _enum_name = "",
+ int _max_value = 0> {
+ int pos = _pos; // position in parameter list
+ string llvm_type = type; // llvm type name, $o for overload, $r for resource
+ // type, $cb for legacy cbuffer, $u4 for u4 struct
+ string name = _name; // short, unique name
+ string doc = _doc; // the documentation description of this parameter
+ bit is_const =
+ _is_const; // whether this argument requires a constant value in the IR
+ string enum_name = _enum_name; // the name of the enum type if applicable
+ int max_value =
+ _max_value; // the maximum value for this parameter if applicable
+}
+
+// A representation for a DXIL instruction
+class dxil_inst<string _name> {
+ string name = _name; // short, unique name
+
+ string dxil_op = ""; // name of DXIL operation
+ int dxil_opid = 0; // ID of DXIL operation
+ dxil_class op_class; // name of the opcode class
+ dxil_category category; // classification for this instruction
+ string doc = ""; // the documentation description of this instruction
+ list<dxil_param> ops = []; // the operands that this instruction takes
+ string oload_types = ""; // overload types if applicable
+ string fn_attr = ""; // attribute shorthands: rn=does not access
+ // memory,ro=only reads from memory,
+ bit is_deriv = 0; // whether this is some kind of derivative
+ bit is_gradient = 0; // whether this requires a gradient calculation
+ bit is_feedback = 0; // whether this is a sampler feedback op
+ bit is_wave = 0; // whether this requires in-wave, cross-lane functionality
+ bit requires_uniform_inputs = 0; // whether this operation requires that all
+ // of its inputs are uniform across the wave
+ // Group dxil operation for stats.
+ // Like how many atomic/float/uint/int/... instructions used in the program.
+ list<string> stats_group = [];
+}
+
+class dxil_op<string name, int code_id, dxil_class code_class, dxil_category op_category, string _doc,
+ string _oload_types, string _fn_attr, list<dxil_param> op_params,
+ list<string> _stats_group = []> : dxil_inst<name> {
+ let dxil_op = name;
+ let dxil_opid = code_id;
+ let doc = _doc;
+ let ops = op_params;
+ let op_class = code_class;
+ let category = op_category;
+ let oload_types = _oload_types;
+ let fn_attr = _fn_attr;
+ let stats_group = _stats_group;
+}
+
+// The intrinsic which map directly to this dxil op.
+class dxil_map_intrinsic<Intrinsic llvm_intrinsic_> { Intrinsic llvm_intrinsic = llvm_intrinsic_; }
+
+def Sin : dxil_op<"Sin", 13, Unary, unary_float, "returns sine(theta) for theta in radians.",
+ "half;float;", "rn",
+ [
+ dxil_param<0, "$o", "", "operation result">,
+ dxil_param<1, "i32", "opcode", "DXIL opcode">,
+ dxil_param<2, "$o", "value", "input value">
+ ],
+ ["floats"]>,
+ dxil_map_intrinsic<int_sin>;
+
+def UMax :dxil_op< "UMax", 39, Binary, binary_uint, "unsigned integer maximum. UMax(a,b) = a > b ? a : b",
+ "i16;i32;i64;", "rn",
+ [
+ dxil_param<0, "$o", "", "operation result">,
+ dxil_param<1, "i32", "opcode", "DXIL opcode">,
+ dxil_param<2, "$o", "a", "input value">,
+ dxil_param<3, "$o", "b", "input value">
+ ],
+ ["uints"]>,
+ dxil_map_intrinsic<int_umax>;
+
+def ThreadId :dxil_op< "ThreadId", 93, ThreadIdClass, ComputeID, "reads the thread ID", "i32;", "rn",
+ [
+ dxil_param<0, "i32", "", "thread ID component">,
+ dxil_param<1, "i32", "opcode", "DXIL opcode">,
+ dxil_param<2, "i32", "component", "component to read (x,y,z)">
+ ]>,
+ dxil_map_intrinsic<int_dxil_thread_id>;
+
+def GroupId :dxil_op< "GroupId", 94, GroupIdClass, ComputeID, "reads the group ID (SV_GroupID)", "i32;", "rn",
+ [
+ dxil_param<0, "i32", "", "group ID component">,
+ dxil_param<1, "i32", "opcode", "DXIL opcode">,
+ dxil_param<2, "i32", "component", "component to read">
+ ]>,
+ dxil_map_intrinsic<int_dxil_group_id>;
+
+def ThreadIdInGroup :dxil_op< "ThreadIdInGroup", 95, ThreadIdInGroupClass, ComputeID,
+ "reads the thread ID within the group (SV_GroupThreadID)", "i32;", "rn",
+ [
+ dxil_param<0, "i32", "", "thread ID in group component">,
+ dxil_param<1, "i32", "opcode", "DXIL opcode">,
+ dxil_param<2, "i32", "component", "component to read (x,y,z)">
+ ]>,
+ dxil_map_intrinsic<int_dxil_thread_id_in_group>;
+
+def FlattenedThreadIdInGroup :dxil_op< "FlattenedThreadIdInGroup", 96, FlattenedThreadIdInGroupClass, ComputeID,
+ "provides a flattened index for a given thread within a given group (SV_GroupIndex)", "i32;", "rn",
+ [
+ dxil_param<0, "i32", "", "result">,
+ dxil_param<1, "i32", "opcode", "DXIL opcode">
+ ]>,
+ dxil_map_intrinsic<int_dxil_flattened_thread_id_in_group>;
diff --git a/llvm/lib/Target/DirectX/DXILConstants.h b/llvm/lib/Target/DirectX/DXILConstants.h
new file mode 100644
index 000000000000..e8e7b5396a46
--- /dev/null
+++ b/llvm/lib/Target/DirectX/DXILConstants.h
@@ -0,0 +1,25 @@
+//===- DXILConstants.h - Essential DXIL constants -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file This file contains essential DXIL constants.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_DIRECTX_DXILCONSTANTS_H
+#define LLVM_LIB_TARGET_DIRECTX_DXILCONSTANTS_H
+
+namespace llvm {
+namespace DXIL {
+
+#define DXIL_OP_ENUM
+#include "DXILOperation.inc"
+#undef DXIL_OP_ENUM
+
+} // namespace DXIL
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
new file mode 100644
index 000000000000..11b89e4ec890
--- /dev/null
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -0,0 +1,265 @@
+//===- DXILOpLower.cpp - Lowering LLVM intrinsic to DIXLOp function -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file This file contains passes and utilities to lower llvm intrinsic call
+/// to DXILOp function call.
+//===----------------------------------------------------------------------===//
+
+#include "DXILConstants.h"
+#include "DirectX.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsDirectX.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#define DEBUG_TYPE "dxil-op-lower"
+
+using namespace llvm;
+using namespace llvm::DXIL;
+
+constexpr StringLiteral DXILOpNamePrefix = "dx.op.";
+
+enum OverloadKind : uint16_t {
+ VOID = 1,
+ HALF = 1 << 1,
+ FLOAT = 1 << 2,
+ DOUBLE = 1 << 3,
+ I1 = 1 << 4,
+ I8 = 1 << 5,
+ I16 = 1 << 6,
+ I32 = 1 << 7,
+ I64 = 1 << 8,
+ UserDefineType = 1 << 9,
+ ObjectType = 1 << 10,
+};
+
+static const char *getOverloadTypeName(OverloadKind Kind) {
+ switch (Kind) {
+ case OverloadKind::HALF:
+ return "f16";
+ case OverloadKind::FLOAT:
+ return "f32";
+ case OverloadKind::DOUBLE:
+ return "f64";
+ case OverloadKind::I1:
+ return "i1";
+ case OverloadKind::I8:
+ return "i8";
+ case OverloadKind::I16:
+ return "i16";
+ case OverloadKind::I32:
+ return "i32";
+ case OverloadKind::I64:
+ return "i64";
+ case OverloadKind::VOID:
+ case OverloadKind::ObjectType:
+ case OverloadKind::UserDefineType:
+ break;
+ }
+ llvm_unreachable("invalid overload type for name");
+ return "void";
+}
+
+static OverloadKind getOverloadKind(Type *Ty) {
+ Type::TypeID T = Ty->getTypeID();
+ switch (T) {
+ case Type::VoidTyID:
+ return OverloadKind::VOID;
+ case Type::HalfTyID:
+ return OverloadKind::HALF;
+ case Type::FloatTyID:
+ return OverloadKind::FLOAT;
+ case Type::DoubleTyID:
+ return OverloadKind::DOUBLE;
+ case Type::IntegerTyID: {
+ IntegerType *ITy = cast<IntegerType>(Ty);
+ unsigned Bits = ITy->getBitWidth();
+ switch (Bits) {
+ case 1:
+ return OverloadKind::I1;
+ case 8:
+ return OverloadKind::I8;
+ case 16:
+ return OverloadKind::I16;
+ case 32:
+ return OverloadKind::I32;
+ case 64:
+ return OverloadKind::I64;
+ default:
+ llvm_unreachable("invalid overload type");
+ return OverloadKind::VOID;
+ }
+ }
+ case Type::PointerTyID:
+ return OverloadKind::UserDefineType;
+ case Type::StructTyID:
+ return OverloadKind::ObjectType;
+ default:
+ llvm_unreachable("invalid overload type");
+ return OverloadKind::VOID;
+ }
+}
+
+static std::string getTypeName(OverloadKind Kind, Type *Ty) {
+ if (Kind < OverloadKind::UserDefineType) {
+ return getOverloadTypeName(Kind);
+ } else if (Kind == OverloadKind::UserDefineType) {
+ StructType *ST = cast<StructType>(Ty);
+ return ST->getStructName().str();
+ } else if (Kind == OverloadKind::ObjectType) {
+ StructType *ST = cast<StructType>(Ty);
+ return ST->getStructName().str();
+ } else {
+ std::string Str;
+ raw_string_ostream OS(Str);
+ Ty->print(OS);
+ return OS.str();
+ }
+}
+
+// Static properties.
+struct OpCodeProperty {
+ DXIL::OpCode OpCode;
+ // Offset in DXILOpCodeNameTable.
+ unsigned OpCodeNameOffset;
+ DXIL::OpCodeClass OpCodeClass;
+ // Offset in DXILOpCodeClassNameTable.
+ unsigned OpCodeClassNameOffset;
+ uint16_t OverloadTys;
+ llvm::Attribute::AttrKind FuncAttr;
+};
+
+// Include getOpCodeClassName getOpCodeProperty and getOpCodeName which
+// generated by tableGen.
+#define DXIL_OP_OPERATION_TABLE
+#include "DXILOperation.inc"
+#undef DXIL_OP_OPERATION_TABLE
+
+static std::string constructOverloadName(OverloadKind Kind, Type *Ty,
+ const OpCodeProperty &Prop) {
+ if (Kind == OverloadKind::VOID) {
+ return (Twine(DXILOpNamePrefix) + getOpCodeClassName(Prop)).str();
+ }
+ return (Twine(DXILOpNamePrefix) + getOpCodeClassName(Prop) + "." +
+ getTypeName(Kind, Ty))
+ .str();
+}
+
+static FunctionCallee createDXILOpFunction(DXIL::OpCode DXILOp, Function &F,
+ Module &M) {
+ const OpCodeProperty *Prop = getOpCodeProperty(DXILOp);
+
+ // Get return type as overload type for DXILOp.
+ // Only simple mapping case here, so return type is good enough.
+ Type *OverloadTy = F.getReturnType();
+
+ OverloadKind Kind = getOverloadKind(OverloadTy);
+ // FIXME: find the issue and report error in clang instead of check it in
+ // backend.
+ if ((Prop->OverloadTys & (uint16_t)Kind) == 0) {
+ llvm_unreachable("invalid overload");
+ }
+
+ std::string FnName = constructOverloadName(Kind, OverloadTy, *Prop);
+ assert(!M.getFunction(FnName) && "Function already exists");
+
+ auto &Ctx = M.getContext();
+ Type *OpCodeTy = Type::getInt32Ty(Ctx);
+
+ SmallVector<Type *> ArgTypes;
+ // DXIL has i32 opcode as first arg.
+ ArgTypes.emplace_back(OpCodeTy);
+ FunctionType *FT = F.getFunctionType();
+ ArgTypes.append(FT->param_begin(), FT->param_end());
+ FunctionType *DXILOpFT = FunctionType::get(OverloadTy, ArgTypes, false);
+ return M.getOrInsertFunction(FnName, DXILOpFT);
+}
+
+static void lowerIntrinsic(DXIL::OpCode DXILOp, Function &F, Module &M) {
+ auto DXILOpFn = createDXILOpFunction(DXILOp, F, M);
+ IRBuilder<> B(M.getContext());
+ Value *DXILOpArg = B.getInt32(static_cast<unsigned>(DXILOp));
+ for (User *U : make_early_inc_range(F.users())) {
+ CallInst *CI = dyn_cast<CallInst>(U);
+ if (!CI)
+ continue;
+
+ SmallVector<Value *> Args;
+ Args.emplace_back(DXILOpArg);
+ Args.append(CI->arg_begin(), CI->arg_end());
+ B.SetInsertPoint(CI);
+ CallInst *DXILCI = B.CreateCall(DXILOpFn, Args);
+ LLVM_DEBUG(DXILCI->setName(getOpCodeName(DXILOp)));
+ CI->replaceAllUsesWith(DXILCI);
+ CI->eraseFromParent();
+ }
+ if (F.user_empty())
+ F.eraseFromParent();
+}
+
+static bool lowerIntrinsics(Module &M) {
+ bool Updated = false;
+
+#define DXIL_OP_INTRINSIC_MAP
+#include "DXILOperation.inc"
+#undef DXIL_OP_INTRINSIC_MAP
+
+ for (Function &F : make_early_inc_range(M.functions())) {
+ if (!F.isDeclaration())
+ continue;
+ Intrinsic::ID ID = F.getIntrinsicID();
+ if (ID == Intrinsic::not_intrinsic)
+ continue;
+ auto LowerIt = LowerMap.find(ID);
+ if (LowerIt == LowerMap.end())
+ continue;
+ lowerIntrinsic(LowerIt->second, F, M);
+ Updated = true;
+ }
+ return Updated;
+}
+
+namespace {
+/// A pass that transforms external global definitions into declarations.
+class DXILOpLowering : public PassInfoMixin<DXILOpLowering> {
+public:
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &) {
+ if (lowerIntrinsics(M))
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
+ }
+};
+} // namespace
+
+namespace {
+class DXILOpLoweringLegacy : public ModulePass {
+public:
+ bool runOnModule(Module &M) override { return lowerIntrinsics(M); }
+ StringRef getPassName() const override { return "DXIL Op Lowering"; }
+ DXILOpLoweringLegacy() : ModulePass(ID) {}
+
+ static char ID; // Pass identification.
+};
+char DXILOpLoweringLegacy::ID = 0;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS_BEGIN(DXILOpLoweringLegacy, DEBUG_TYPE, "DXIL Op Lowering",
+ false, false)
+INITIALIZE_PASS_END(DXILOpLoweringLegacy, DEBUG_TYPE, "DXIL Op Lowering", false,
+ false)
+
+ModulePass *llvm::createDXILOpLoweringLegacyPass() {
+ return new DXILOpLoweringLegacy();
+}
diff --git a/llvm/lib/Target/DirectX/DXILPointerType.cpp b/llvm/lib/Target/DirectX/DXILPointerType.cpp
new file mode 100644
index 000000000000..1e67f1a30ec4
--- /dev/null
+++ b/llvm/lib/Target/DirectX/DXILPointerType.cpp
@@ -0,0 +1,66 @@
+//===- Target/DirectX/DXILTypedPointerType.cpp - DXIL Typed Pointer Type
+//-------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "DXILPointerType.h"
+#include "llvm/ADT/Any.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/IR/LLVMContext.h"
+
+using namespace llvm;
+using namespace llvm::dxil;
+
+class TypedPointerTracking {
+public:
+ TypedPointerTracking() {}
+ DenseMap<Type *, std::unique_ptr<TypedPointerType>> PointerTypes;
+ DenseMap<std::pair<Type *, unsigned>, std::unique_ptr<TypedPointerType>>
+ ASPointerTypes;
+};
+
+TypedPointerType *TypedPointerType::get(Type *EltTy, unsigned AddressSpace) {
+ assert(EltTy && "Can't get a pointer to <null> type!");
+ assert(isValidElementType(EltTy) && "Invalid type for pointer element!");
+
+ llvm::Any &TargetData = EltTy->getContext().getTargetData();
+ if (!TargetData.hasValue())
+ TargetData = Any{std::make_shared<TypedPointerTracking>()};
+
+ assert(any_isa<std::shared_ptr<TypedPointerTracking>>(TargetData) &&
+ "Unexpected target data type");
+
+ std::shared_ptr<TypedPointerTracking> Tracking =
+ any_cast<std::shared_ptr<TypedPointerTracking>>(TargetData);
+
+ // Since AddressSpace #0 is the common case, we special case it.
+ std::unique_ptr<TypedPointerType> &Entry =
+ AddressSpace == 0
+ ? Tracking->PointerTypes[EltTy]
+ : Tracking->ASPointerTypes[std::make_pair(EltTy, AddressSpace)];
+
+ if (!Entry)
+ Entry = std::unique_ptr<TypedPointerType>(
+ new TypedPointerType(EltTy, AddressSpace));
+ return Entry.get();
+}
+
+TypedPointerType::TypedPointerType(Type *E, unsigned AddrSpace)
+ : Type(E->getContext(), DXILPointerTyID), PointeeTy(E) {
+ ContainedTys = &PointeeTy;
+ NumContainedTys = 1;
+ setSubclassData(AddrSpace);
+}
+
+bool TypedPointerType::isValidElementType(Type *ElemTy) {
+ return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
+ !ElemTy->isMetadataTy() && !ElemTy->isTokenTy() &&
+ !ElemTy->isX86_AMXTy();
+}
diff --git a/llvm/lib/Target/DirectX/DXILPointerType.h b/llvm/lib/Target/DirectX/DXILPointerType.h
new file mode 100644
index 000000000000..52cf2dbc40b0
--- /dev/null
+++ b/llvm/lib/Target/DirectX/DXILPointerType.h
@@ -0,0 +1,52 @@
+//===- Target/DirectX/DXILPointerType.h - DXIL Typed Pointer Type ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_DIRECTX_DXILPOINTERTYPE_H
+#define LLVM_TARGET_DIRECTX_DXILPOINTERTYPE_H
+
+#include "llvm/IR/Type.h"
+
+namespace llvm {
+namespace dxil {
+
+// DXIL has typed pointers, this pointer type abstraction is used for tracking
+// in PointerTypeAnalysis and for the bitcode ValueEnumerator
+class TypedPointerType : public Type {
+ explicit TypedPointerType(Type *ElType, unsigned AddrSpace);
+
+ Type *PointeeTy;
+
+public:
+ TypedPointerType(const TypedPointerType &) = delete;
+ TypedPointerType &operator=(const TypedPointerType &) = delete;
+
+ /// This constructs a pointer to an object of the specified type in a numbered
+ /// address space.
+ static TypedPointerType *get(Type *ElementType, unsigned AddressSpace);
+
+ /// Return true if the specified type is valid as a element type.
+ static bool isValidElementType(Type *ElemTy);
+
+ /// Return the address space of the Pointer type.
+ unsigned getAddressSpace() const { return getSubclassData(); }
+
+ Type *getElementType() const { return PointeeTy; }
+
+ /// Implement support type inquiry through isa, cast, and dyn_cast.
+ static bool classof(const Type *T) {
+ return T->getTypeID() == DXILPointerTyID;
+ }
+};
+
+} // namespace dxil
+} // namespace llvm
+
+#endif // LLVM_TARGET_DIRECTX_DXILPOINTERTYPE_H
diff --git a/llvm/lib/Target/DirectX/DXILPrepare.cpp b/llvm/lib/Target/DirectX/DXILPrepare.cpp
new file mode 100644
index 000000000000..14d970e6b69a
--- /dev/null
+++ b/llvm/lib/Target/DirectX/DXILPrepare.cpp
@@ -0,0 +1,184 @@
+//===- DXILPrepare.cpp - Prepare LLVM Module for DXIL encoding ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file This file contains pases and utilities to convert a modern LLVM
+/// module into a module compatible with the LLVM 3.7-based DirectX Intermediate
+/// Language (DXIL).
+//===----------------------------------------------------------------------===//
+
+#include "DirectX.h"
+#include "PointerTypeAnalysis.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Compiler.h"
+
+#define DEBUG_TYPE "dxil-prepare"
+
+using namespace llvm;
+using namespace llvm::dxil;
+
+namespace {
+
+constexpr bool isValidForDXIL(Attribute::AttrKind Attr) {
+ return is_contained({Attribute::Alignment,
+ Attribute::AlwaysInline,
+ Attribute::Builtin,
+ Attribute::ByVal,
+ Attribute::InAlloca,
+ Attribute::Cold,
+ Attribute::Convergent,
+ Attribute::InlineHint,
+ Attribute::InReg,
+ Attribute::JumpTable,
+ Attribute::MinSize,
+ Attribute::Naked,
+ Attribute::Nest,
+ Attribute::NoAlias,
+ Attribute::NoBuiltin,
+ Attribute::NoCapture,
+ Attribute::NoDuplicate,
+ Attribute::NoImplicitFloat,
+ Attribute::NoInline,
+ Attribute::NonLazyBind,
+ Attribute::NonNull,
+ Attribute::Dereferenceable,
+ Attribute::DereferenceableOrNull,
+ Attribute::NoRedZone,
+ Attribute::NoReturn,
+ Attribute::NoUnwind,
+ Attribute::OptimizeForSize,
+ Attribute::OptimizeNone,
+ Attribute::ReadNone,
+ Attribute::ReadOnly,
+ Attribute::ArgMemOnly,
+ Attribute::Returned,
+ Attribute::ReturnsTwice,
+ Attribute::SExt,
+ Attribute::StackAlignment,
+ Attribute::StackProtect,
+ Attribute::StackProtectReq,
+ Attribute::StackProtectStrong,
+ Attribute::SafeStack,
+ Attribute::StructRet,
+ Attribute::SanitizeAddress,
+ Attribute::SanitizeThread,
+ Attribute::SanitizeMemory,
+ Attribute::UWTable,
+ Attribute::ZExt},
+ Attr);
+}
+
+class DXILPrepareModule : public ModulePass {
+
+ static Value *maybeGenerateBitcast(IRBuilder<> &Builder,
+ PointerTypeMap &PointerTypes,
+ Instruction &Inst, Value *Operand,
+ Type *Ty) {
+ // Omit bitcasts if the incoming value matches the instruction type.
+ auto It = PointerTypes.find(Operand);
+ if (It != PointerTypes.end())
+ if (cast<TypedPointerType>(It->second)->getElementType() == Ty)
+ return nullptr;
+ // Insert bitcasts where we are removing the instruction.
+ Builder.SetInsertPoint(&Inst);
+ // This code only gets hit in opaque-pointer mode, so the type of the
+ // pointer doesn't matter.
+ PointerType *PtrTy = cast<PointerType>(Operand->getType());
+ return Builder.Insert(
+ CastInst::Create(Instruction::BitCast, Operand,
+ Builder.getInt8PtrTy(PtrTy->getAddressSpace())));
+ }
+
+public:
+ bool runOnModule(Module &M) override {
+ PointerTypeMap PointerTypes = PointerTypeAnalysis::run(M);
+ AttributeMask AttrMask;
+ for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds;
+ I = Attribute::AttrKind(I + 1)) {
+ if (!isValidForDXIL(I))
+ AttrMask.addAttribute(I);
+ }
+ for (auto &F : M.functions()) {
+ F.removeFnAttrs(AttrMask);
+ F.removeRetAttrs(AttrMask);
+ for (size_t Idx = 0, End = F.arg_size(); Idx < End; ++Idx)
+ F.removeParamAttrs(Idx, AttrMask);
+
+ for (auto &BB : F) {
+ IRBuilder<> Builder(&BB);
+ for (auto &I : make_early_inc_range(BB)) {
+ if (I.getOpcode() == Instruction::FNeg) {
+ Builder.SetInsertPoint(&I);
+ Value *In = I.getOperand(0);
+ Value *Zero = ConstantFP::get(In->getType(), -0.0);
+ I.replaceAllUsesWith(Builder.CreateFSub(Zero, In));
+ I.eraseFromParent();
+ continue;
+ }
+ // Only insert bitcasts if the IR is using opaque pointers.
+ if (M.getContext().supportsTypedPointers())
+ continue;
+
+ // Emtting NoOp bitcast instructions allows the ValueEnumerator to be
+ // unmodified as it reserves instruction IDs during contruction.
+ if (auto LI = dyn_cast<LoadInst>(&I)) {
+ if (Value *NoOpBitcast = maybeGenerateBitcast(
+ Builder, PointerTypes, I, LI->getPointerOperand(),
+ LI->getType())) {
+ LI->replaceAllUsesWith(
+ Builder.CreateLoad(LI->getType(), NoOpBitcast));
+ LI->eraseFromParent();
+ }
+ continue;
+ }
+ if (auto SI = dyn_cast<StoreInst>(&I)) {
+ if (Value *NoOpBitcast = maybeGenerateBitcast(
+ Builder, PointerTypes, I, SI->getPointerOperand(),
+ SI->getValueOperand()->getType())) {
+
+ SI->replaceAllUsesWith(
+ Builder.CreateStore(SI->getValueOperand(), NoOpBitcast));
+ SI->eraseFromParent();
+ }
+ continue;
+ }
+ if (auto GEP = dyn_cast<GetElementPtrInst>(&I)) {
+ if (Value *NoOpBitcast = maybeGenerateBitcast(
+ Builder, PointerTypes, I, GEP->getPointerOperand(),
+ GEP->getResultElementType()))
+ GEP->setOperand(0, NoOpBitcast);
+ continue;
+ }
+ }
+ }
+ }
+ return true;
+ }
+
+ DXILPrepareModule() : ModulePass(ID) {}
+
+ static char ID; // Pass identification.
+};
+char DXILPrepareModule::ID = 0;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS_BEGIN(DXILPrepareModule, DEBUG_TYPE, "DXIL Prepare Module",
+ false, false)
+INITIALIZE_PASS_END(DXILPrepareModule, DEBUG_TYPE, "DXIL Prepare Module", false,
+ false)
+
+ModulePass *llvm::createDXILPrepareModulePass() {
+ return new DXILPrepareModule();
+}
diff --git a/llvm/lib/Target/DirectX/DXILStubs.td b/llvm/lib/Target/DirectX/DXILStubs.td
new file mode 100644
index 000000000000..ce4327f93bc1
--- /dev/null
+++ b/llvm/lib/Target/DirectX/DXILStubs.td
@@ -0,0 +1,18 @@
+// DXIL doesn't actually use registers, but this gets the boilerplate code
+// generated through tablegen.
+let Namespace = "DXIL" in {
+def DXIL : Register<"DXIL">;
+def DXILClass : RegisterClass<"DXIL", [i32], 32, (add DXIL)>;
+}
+
+class DXILInst : Instruction {
+ let Namespace = "DXIL";
+ let DecoderNamespace = "DXIL";
+
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins);
+ let AsmString = "dummy";
+ let Pattern = [];
+}
+
+def DummyInst : DXILInst;
diff --git a/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp
new file mode 100644
index 000000000000..634ead98a6ae
--- /dev/null
+++ b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp
@@ -0,0 +1,121 @@
+//===- DXILTranslateMetadata.cpp - Pass to emit DXIL metadata ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+//===----------------------------------------------------------------------===//
+
+#include "DirectX.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+static uint32_t ConstMDToUint32(const MDOperand &MDO) {
+ ConstantInt *pConst = mdconst::extract<ConstantInt>(MDO);
+ return (uint32_t)pConst->getZExtValue();
+}
+
+static ConstantAsMetadata *Uint32ToConstMD(unsigned v, LLVMContext &Ctx) {
+ return ConstantAsMetadata::get(
+ Constant::getIntegerValue(IntegerType::get(Ctx, 32), APInt(32, v)));
+}
+
+constexpr StringLiteral ValVerKey = "dx.valver";
+constexpr unsigned DXILVersionNumFields = 2;
+
+static void emitDXILValidatorVersion(Module &M, VersionTuple &ValidatorVer) {
+ NamedMDNode *DXILValidatorVersionMD = M.getNamedMetadata(ValVerKey);
+
+ // Allow re-writing the validator version, since this can be changed at
+ // later points.
+ if (DXILValidatorVersionMD)
+ M.eraseNamedMetadata(DXILValidatorVersionMD);
+
+ DXILValidatorVersionMD = M.getOrInsertNamedMetadata(ValVerKey);
+
+ auto &Ctx = M.getContext();
+ Metadata *MDVals[DXILVersionNumFields];
+ MDVals[0] = Uint32ToConstMD(ValidatorVer.getMajor(), Ctx);
+ MDVals[1] = Uint32ToConstMD(ValidatorVer.getMinor().value_or(0), Ctx);
+
+ DXILValidatorVersionMD->addOperand(MDNode::get(Ctx, MDVals));
+}
+
+static VersionTuple loadDXILValidatorVersion(MDNode *ValVerMD) {
+ if (ValVerMD->getNumOperands() != DXILVersionNumFields)
+ return VersionTuple();
+
+ unsigned Major = ConstMDToUint32(ValVerMD->getOperand(0));
+ unsigned Minor = ConstMDToUint32(ValVerMD->getOperand(1));
+ return VersionTuple(Major, Minor);
+}
+
+static void cleanModuleFlags(Module &M) {
+ constexpr StringLiteral DeadKeys[] = {ValVerKey};
+ // Collect DeadKeys in ModuleFlags.
+ StringSet<> DeadKeySet;
+ for (auto &Key : DeadKeys) {
+ if (M.getModuleFlag(Key))
+ DeadKeySet.insert(Key);
+ }
+ if (DeadKeySet.empty())
+ return;
+
+ SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags;
+ M.getModuleFlagsMetadata(ModuleFlags);
+ NamedMDNode *MDFlags = M.getModuleFlagsMetadata();
+ MDFlags->eraseFromParent();
+ // Add ModuleFlag which not dead.
+ for (auto &Flag : ModuleFlags) {
+ StringRef Key = Flag.Key->getString();
+ if (DeadKeySet.contains(Key))
+ continue;
+ M.addModuleFlag(Flag.Behavior, Key, Flag.Val);
+ }
+}
+
+static void cleanModule(Module &M) { cleanModuleFlags(M); }
+
+namespace {
+class DXILTranslateMetadata : public ModulePass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit DXILTranslateMetadata() : ModulePass(ID), ValidatorVer(1, 0) {}
+
+ StringRef getPassName() const override { return "DXIL Metadata Emit"; }
+
+ bool runOnModule(Module &M) override;
+
+private:
+ VersionTuple ValidatorVer;
+};
+
+} // namespace
+
+bool DXILTranslateMetadata::runOnModule(Module &M) {
+ if (MDNode *ValVerMD = cast_or_null<MDNode>(M.getModuleFlag(ValVerKey))) {
+ auto ValVer = loadDXILValidatorVersion(ValVerMD);
+ if (!ValVer.empty())
+ ValidatorVer = ValVer;
+ }
+ emitDXILValidatorVersion(M, ValidatorVer);
+ cleanModule(M);
+ return false;
+}
+
+char DXILTranslateMetadata::ID = 0;
+
+ModulePass *llvm::createDXILTranslateMetadataPass() {
+ return new DXILTranslateMetadata();
+}
+
+INITIALIZE_PASS(DXILTranslateMetadata, "dxil-metadata-emit",
+ "DXIL Metadata Emit", false, false)
diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp
new file mode 100644
index 000000000000..494a71e51a89
--- /dev/null
+++ b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp
@@ -0,0 +1,2963 @@
+//===- Bitcode/Writer/DXILBitcodeWriter.cpp - DXIL Bitcode Writer ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Bitcode writer implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DXILBitcodeWriter.h"
+#include "DXILValueEnumerator.h"
+#include "PointerTypeAnalysis.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Bitcode/BitcodeCommon.h"
+#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/Bitcode/LLVMBitCodes.h"
+#include "llvm/Bitstream/BitCodes.h"
+#include "llvm/Bitstream/BitstreamWriter.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Comdat.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalIFunc.h"
+#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/ModuleSummaryIndex.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/UseListOrder.h"
+#include "llvm/IR/Value.h"
+#include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/Object/IRSymtab.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/SHA1.h"
+
+namespace llvm {
+namespace dxil {
+
+// Generates an enum to use as an index in the Abbrev array of Metadata record.
+enum MetadataAbbrev : unsigned {
+#define HANDLE_MDNODE_LEAF(CLASS) CLASS##AbbrevID,
+#include "llvm/IR/Metadata.def"
+ LastPlusOne
+};
+
+class DXILBitcodeWriter {
+
+ /// These are manifest constants used by the bitcode writer. They do not need
+ /// to be kept in sync with the reader, but need to be consistent within this
+ /// file.
+ enum {
+ // VALUE_SYMTAB_BLOCK abbrev id's.
+ VST_ENTRY_8_ABBREV = bitc::FIRST_APPLICATION_ABBREV,
+ VST_ENTRY_7_ABBREV,
+ VST_ENTRY_6_ABBREV,
+ VST_BBENTRY_6_ABBREV,
+
+ // CONSTANTS_BLOCK abbrev id's.
+ CONSTANTS_SETTYPE_ABBREV = bitc::FIRST_APPLICATION_ABBREV,
+ CONSTANTS_INTEGER_ABBREV,
+ CONSTANTS_CE_CAST_Abbrev,
+ CONSTANTS_NULL_Abbrev,
+
+ // FUNCTION_BLOCK abbrev id's.
+ FUNCTION_INST_LOAD_ABBREV = bitc::FIRST_APPLICATION_ABBREV,
+ FUNCTION_INST_BINOP_ABBREV,
+ FUNCTION_INST_BINOP_FLAGS_ABBREV,
+ FUNCTION_INST_CAST_ABBREV,
+ FUNCTION_INST_RET_VOID_ABBREV,
+ FUNCTION_INST_RET_VAL_ABBREV,
+ FUNCTION_INST_UNREACHABLE_ABBREV,
+ FUNCTION_INST_GEP_ABBREV,
+ };
+
+ // Cache some types
+ Type *I8Ty;
+ Type *I8PtrTy;
+
+ /// The stream created and owned by the client.
+ BitstreamWriter &Stream;
+
+ StringTableBuilder &StrtabBuilder;
+
+ /// The Module to write to bitcode.
+ const Module &M;
+
+ /// Enumerates ids for all values in the module.
+ ValueEnumerator VE;
+
+ /// Map that holds the correspondence between GUIDs in the summary index,
+ /// that came from indirect call profiles, and a value id generated by this
+ /// class to use in the VST and summary block records.
+ std::map<GlobalValue::GUID, unsigned> GUIDToValueIdMap;
+
+ /// Tracks the last value id recorded in the GUIDToValueMap.
+ unsigned GlobalValueId;
+
+ /// Saves the offset of the VSTOffset record that must eventually be
+ /// backpatched with the offset of the actual VST.
+ uint64_t VSTOffsetPlaceholder = 0;
+
+ /// Pointer to the buffer allocated by caller for bitcode writing.
+ const SmallVectorImpl<char> &Buffer;
+
+ /// The start bit of the identification block.
+ uint64_t BitcodeStartBit;
+
+ /// This maps values to their typed pointers
+ PointerTypeMap PointerMap;
+
+public:
+ /// Constructs a ModuleBitcodeWriter object for the given Module,
+ /// writing to the provided \p Buffer.
+ DXILBitcodeWriter(const Module &M, SmallVectorImpl<char> &Buffer,
+ StringTableBuilder &StrtabBuilder, BitstreamWriter &Stream)
+ : I8Ty(Type::getInt8Ty(M.getContext())),
+ I8PtrTy(TypedPointerType::get(I8Ty, 0)), Stream(Stream),
+ StrtabBuilder(StrtabBuilder), M(M), VE(M, I8PtrTy), Buffer(Buffer),
+ BitcodeStartBit(Stream.GetCurrentBitNo()),
+ PointerMap(PointerTypeAnalysis::run(M)) {
+ GlobalValueId = VE.getValues().size();
+ // Enumerate the typed pointers
+ for (auto El : PointerMap)
+ VE.EnumerateType(El.second);
+ }
+
+ /// Emit the current module to the bitstream.
+ void write();
+
+ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind);
+ static void writeStringRecord(BitstreamWriter &Stream, unsigned Code,
+ StringRef Str, unsigned AbbrevToUse);
+ static void writeIdentificationBlock(BitstreamWriter &Stream);
+ static void emitSignedInt64(SmallVectorImpl<uint64_t> &Vals, uint64_t V);
+ static void emitWideAPInt(SmallVectorImpl<uint64_t> &Vals, const APInt &A);
+
+ static unsigned getEncodedComdatSelectionKind(const Comdat &C);
+ static unsigned getEncodedLinkage(const GlobalValue::LinkageTypes Linkage);
+ static unsigned getEncodedLinkage(const GlobalValue &GV);
+ static unsigned getEncodedVisibility(const GlobalValue &GV);
+ static unsigned getEncodedThreadLocalMode(const GlobalValue &GV);
+ static unsigned getEncodedDLLStorageClass(const GlobalValue &GV);
+ static unsigned getEncodedCastOpcode(unsigned Opcode);
+ static unsigned getEncodedUnaryOpcode(unsigned Opcode);
+ static unsigned getEncodedBinaryOpcode(unsigned Opcode);
+ static unsigned getEncodedRMWOperation(AtomicRMWInst::BinOp Op);
+ static unsigned getEncodedOrdering(AtomicOrdering Ordering);
+ static uint64_t getOptimizationFlags(const Value *V);
+
+private:
+ void writeModuleVersion();
+ void writePerModuleGlobalValueSummary();
+
+ void writePerModuleFunctionSummaryRecord(SmallVector<uint64_t, 64> &NameVals,
+ GlobalValueSummary *Summary,
+ unsigned ValueID,
+ unsigned FSCallsAbbrev,
+ unsigned FSCallsProfileAbbrev,
+ const Function &F);
+ void writeModuleLevelReferences(const GlobalVariable &V,
+ SmallVector<uint64_t, 64> &NameVals,
+ unsigned FSModRefsAbbrev,
+ unsigned FSModVTableRefsAbbrev);
+
+ void assignValueId(GlobalValue::GUID ValGUID) {
+ GUIDToValueIdMap[ValGUID] = ++GlobalValueId;
+ }
+
+ unsigned getValueId(GlobalValue::GUID ValGUID) {
+ const auto &VMI = GUIDToValueIdMap.find(ValGUID);
+ // Expect that any GUID value had a value Id assigned by an
+ // earlier call to assignValueId.
+ assert(VMI != GUIDToValueIdMap.end() &&
+ "GUID does not have assigned value Id");
+ return VMI->second;
+ }
+
+ // Helper to get the valueId for the type of value recorded in VI.
+ unsigned getValueId(ValueInfo VI) {
+ if (!VI.haveGVs() || !VI.getValue())
+ return getValueId(VI.getGUID());
+ return VE.getValueID(VI.getValue());
+ }
+
+ std::map<GlobalValue::GUID, unsigned> &valueIds() { return GUIDToValueIdMap; }
+
+ uint64_t bitcodeStartBit() { return BitcodeStartBit; }
+
+ size_t addToStrtab(StringRef Str);
+
+ unsigned createDILocationAbbrev();
+ unsigned createGenericDINodeAbbrev();
+
+ void writeAttributeGroupTable();
+ void writeAttributeTable();
+ void writeTypeTable();
+ void writeComdats();
+ void writeValueSymbolTableForwardDecl();
+ void writeModuleInfo();
+ void writeValueAsMetadata(const ValueAsMetadata *MD,
+ SmallVectorImpl<uint64_t> &Record);
+ void writeMDTuple(const MDTuple *N, SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev);
+ void writeDILocation(const DILocation *N, SmallVectorImpl<uint64_t> &Record,
+ unsigned &Abbrev);
+ void writeGenericDINode(const GenericDINode *N,
+ SmallVectorImpl<uint64_t> &Record, unsigned &Abbrev) {
+ llvm_unreachable("DXIL cannot contain GenericDI Nodes");
+ }
+ void writeDISubrange(const DISubrange *N, SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev);
+ void writeDIGenericSubrange(const DIGenericSubrange *N,
+ SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev) {
+ llvm_unreachable("DXIL cannot contain DIGenericSubrange Nodes");
+ }
+ void writeDIEnumerator(const DIEnumerator *N,
+ SmallVectorImpl<uint64_t> &Record, unsigned Abbrev);
+ void writeDIBasicType(const DIBasicType *N, SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev);
+ void writeDIStringType(const DIStringType *N,
+ SmallVectorImpl<uint64_t> &Record, unsigned Abbrev) {
+ llvm_unreachable("DXIL cannot contain DIStringType Nodes");
+ }
+ void writeDIDerivedType(const DIDerivedType *N,
+ SmallVectorImpl<uint64_t> &Record, unsigned Abbrev);
+ void writeDICompositeType(const DICompositeType *N,
+ SmallVectorImpl<uint64_t> &Record, unsigned Abbrev);
+ void writeDISubroutineType(const DISubroutineType *N,
+ SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev);
+ void writeDIFile(const DIFile *N, SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev);
+ void writeDICompileUnit(const DICompileUnit *N,
+ SmallVectorImpl<uint64_t> &Record, unsigned Abbrev);
+ void writeDISubprogram(const DISubprogram *N,
+ SmallVectorImpl<uint64_t> &Record, unsigned Abbrev);
+ void writeDILexicalBlock(const DILexicalBlock *N,
+ SmallVectorImpl<uint64_t> &Record, unsigned Abbrev);
+ void writeDILexicalBlockFile(const DILexicalBlockFile *N,
+ SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev);
+ void writeDICommonBlock(const DICommonBlock *N,
+ SmallVectorImpl<uint64_t> &Record, unsigned Abbrev) {
+ llvm_unreachable("DXIL cannot contain DICommonBlock Nodes");
+ }
+ void writeDINamespace(const DINamespace *N, SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev);
+ void writeDIMacro(const DIMacro *N, SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev) {
+ llvm_unreachable("DXIL cannot contain DIMacro Nodes");
+ }
+ void writeDIMacroFile(const DIMacroFile *N, SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev) {
+ llvm_unreachable("DXIL cannot contain DIMacroFile Nodes");
+ }
+ void writeDIArgList(const DIArgList *N, SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev) {
+ llvm_unreachable("DXIL cannot contain DIArgList Nodes");
+ }
+ void writeDIModule(const DIModule *N, SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev);
+ void writeDITemplateTypeParameter(const DITemplateTypeParameter *N,
+ SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev);
+ void writeDITemplateValueParameter(const DITemplateValueParameter *N,
+ SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev);
+ void writeDIGlobalVariable(const DIGlobalVariable *N,
+ SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev);
+ void writeDILocalVariable(const DILocalVariable *N,
+ SmallVectorImpl<uint64_t> &Record, unsigned Abbrev);
+ void writeDILabel(const DILabel *N, SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev) {
+ llvm_unreachable("DXIL cannot contain DILabel Nodes");
+ }
+ void writeDIExpression(const DIExpression *N,
+ SmallVectorImpl<uint64_t> &Record, unsigned Abbrev);
+ void writeDIGlobalVariableExpression(const DIGlobalVariableExpression *N,
+ SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev) {
+ llvm_unreachable("DXIL cannot contain GlobalVariableExpression Nodes");
+ }
+ void writeDIObjCProperty(const DIObjCProperty *N,
+ SmallVectorImpl<uint64_t> &Record, unsigned Abbrev);
+ void writeDIImportedEntity(const DIImportedEntity *N,
+ SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev);
+ unsigned createNamedMetadataAbbrev();
+ void writeNamedMetadata(SmallVectorImpl<uint64_t> &Record);
+ unsigned createMetadataStringsAbbrev();
+ void writeMetadataStrings(ArrayRef<const Metadata *> Strings,
+ SmallVectorImpl<uint64_t> &Record);
+ void writeMetadataRecords(ArrayRef<const Metadata *> MDs,
+ SmallVectorImpl<uint64_t> &Record,
+ std::vector<unsigned> *MDAbbrevs = nullptr,
+ std::vector<uint64_t> *IndexPos = nullptr);
+ void writeModuleMetadata();
+ void writeFunctionMetadata(const Function &F);
+ void writeFunctionMetadataAttachment(const Function &F);
+ void pushGlobalMetadataAttachment(SmallVectorImpl<uint64_t> &Record,
+ const GlobalObject &GO);
+ void writeModuleMetadataKinds();
+ void writeOperandBundleTags();
+ void writeSyncScopeNames();
+ void writeConstants(unsigned FirstVal, unsigned LastVal, bool isGlobal);
+ void writeModuleConstants();
+ bool pushValueAndType(const Value *V, unsigned InstID,
+ SmallVectorImpl<unsigned> &Vals);
+ void writeOperandBundles(const CallBase &CB, unsigned InstID);
+ void pushValue(const Value *V, unsigned InstID,
+ SmallVectorImpl<unsigned> &Vals);
+ void pushValueSigned(const Value *V, unsigned InstID,
+ SmallVectorImpl<uint64_t> &Vals);
+ void writeInstruction(const Instruction &I, unsigned InstID,
+ SmallVectorImpl<unsigned> &Vals);
+ void writeFunctionLevelValueSymbolTable(const ValueSymbolTable &VST);
+ void writeGlobalValueSymbolTable(
+ DenseMap<const Function *, uint64_t> &FunctionToBitcodeIndex);
+ void writeUseList(UseListOrder &&Order);
+ void writeUseListBlock(const Function *F);
+ void writeFunction(const Function &F);
+ void writeBlockInfo();
+
+ unsigned getEncodedSyncScopeID(SyncScope::ID SSID) { return unsigned(SSID); }
+
+ unsigned getEncodedAlign(MaybeAlign Alignment) { return encode(Alignment); }
+
+ unsigned getTypeID(Type *T, const Value *V = nullptr);
+ unsigned getTypeID(Type *T, const Function *F);
+};
+
+} // namespace dxil
+} // namespace llvm
+
+using namespace llvm;
+using namespace llvm::dxil;
+
+////////////////////////////////////////////////////////////////////////////////
+/// Begin dxil::BitcodeWriter Implementation
+////////////////////////////////////////////////////////////////////////////////
+
+dxil::BitcodeWriter::BitcodeWriter(SmallVectorImpl<char> &Buffer,
+ raw_fd_stream *FS)
+ : Buffer(Buffer), Stream(new BitstreamWriter(Buffer, FS, 512)) {
+ // Emit the file header.
+ Stream->Emit((unsigned)'B', 8);
+ Stream->Emit((unsigned)'C', 8);
+ Stream->Emit(0x0, 4);
+ Stream->Emit(0xC, 4);
+ Stream->Emit(0xE, 4);
+ Stream->Emit(0xD, 4);
+}
+
+dxil::BitcodeWriter::~BitcodeWriter() { assert(WroteStrtab); }
+
+/// Write the specified module to the specified output stream.
+void dxil::WriteDXILToFile(const Module &M, raw_ostream &Out) {
+ SmallVector<char, 0> Buffer;
+ Buffer.reserve(256 * 1024);
+
+ // If this is darwin or another generic macho target, reserve space for the
+ // header.
+ Triple TT(M.getTargetTriple());
+ if (TT.isOSDarwin() || TT.isOSBinFormatMachO())
+ Buffer.insert(Buffer.begin(), BWH_HeaderSize, 0);
+
+ BitcodeWriter Writer(Buffer, dyn_cast<raw_fd_stream>(&Out));
+ Writer.writeModule(M);
+ Writer.writeSymtab();
+ Writer.writeStrtab();
+
+ // Write the generated bitstream to "Out".
+ if (!Buffer.empty())
+ Out.write((char *)&Buffer.front(), Buffer.size());
+}
+
+void BitcodeWriter::writeBlob(unsigned Block, unsigned Record, StringRef Blob) {
+ Stream->EnterSubblock(Block, 3);
+
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(Record));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
+ auto AbbrevNo = Stream->EmitAbbrev(std::move(Abbv));
+
+ Stream->EmitRecordWithBlob(AbbrevNo, ArrayRef<uint64_t>{Record}, Blob);
+
+ Stream->ExitBlock();
+}
+
+void BitcodeWriter::writeSymtab() {
+ assert(!WroteStrtab && !WroteSymtab);
+
+ // If any module has module-level inline asm, we will require a registered asm
+ // parser for the target so that we can create an accurate symbol table for
+ // the module.
+ for (Module *M : Mods) {
+ if (M->getModuleInlineAsm().empty())
+ continue;
+ }
+
+ WroteSymtab = true;
+ SmallVector<char, 0> Symtab;
+ // The irsymtab::build function may be unable to create a symbol table if the
+ // module is malformed (e.g. it contains an invalid alias). Writing a symbol
+ // table is not required for correctness, but we still want to be able to
+ // write malformed modules to bitcode files, so swallow the error.
+ if (Error E = irsymtab::build(Mods, Symtab, StrtabBuilder, Alloc)) {
+ consumeError(std::move(E));
+ return;
+ }
+
+ writeBlob(bitc::SYMTAB_BLOCK_ID, bitc::SYMTAB_BLOB,
+ {Symtab.data(), Symtab.size()});
+}
+
+void BitcodeWriter::writeStrtab() {
+ assert(!WroteStrtab);
+
+ std::vector<char> Strtab;
+ StrtabBuilder.finalizeInOrder();
+ Strtab.resize(StrtabBuilder.getSize());
+ StrtabBuilder.write((uint8_t *)Strtab.data());
+
+ writeBlob(bitc::STRTAB_BLOCK_ID, bitc::STRTAB_BLOB,
+ {Strtab.data(), Strtab.size()});
+
+ WroteStrtab = true;
+}
+
+void BitcodeWriter::copyStrtab(StringRef Strtab) {
+ writeBlob(bitc::STRTAB_BLOCK_ID, bitc::STRTAB_BLOB, Strtab);
+ WroteStrtab = true;
+}
+
+void BitcodeWriter::writeModule(const Module &M) {
+ assert(!WroteStrtab);
+
+ // The Mods vector is used by irsymtab::build, which requires non-const
+ // Modules in case it needs to materialize metadata. But the bitcode writer
+ // requires that the module is materialized, so we can cast to non-const here,
+ // after checking that it is in fact materialized.
+ assert(M.isMaterialized());
+ Mods.push_back(const_cast<Module *>(&M));
+
+ DXILBitcodeWriter ModuleWriter(M, Buffer, StrtabBuilder, *Stream);
+ ModuleWriter.write();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Begin dxil::BitcodeWriterBase Implementation
+////////////////////////////////////////////////////////////////////////////////
+
+unsigned DXILBitcodeWriter::getEncodedCastOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ llvm_unreachable("Unknown cast instruction!");
+ case Instruction::Trunc:
+ return bitc::CAST_TRUNC;
+ case Instruction::ZExt:
+ return bitc::CAST_ZEXT;
+ case Instruction::SExt:
+ return bitc::CAST_SEXT;
+ case Instruction::FPToUI:
+ return bitc::CAST_FPTOUI;
+ case Instruction::FPToSI:
+ return bitc::CAST_FPTOSI;
+ case Instruction::UIToFP:
+ return bitc::CAST_UITOFP;
+ case Instruction::SIToFP:
+ return bitc::CAST_SITOFP;
+ case Instruction::FPTrunc:
+ return bitc::CAST_FPTRUNC;
+ case Instruction::FPExt:
+ return bitc::CAST_FPEXT;
+ case Instruction::PtrToInt:
+ return bitc::CAST_PTRTOINT;
+ case Instruction::IntToPtr:
+ return bitc::CAST_INTTOPTR;
+ case Instruction::BitCast:
+ return bitc::CAST_BITCAST;
+ case Instruction::AddrSpaceCast:
+ return bitc::CAST_ADDRSPACECAST;
+ }
+}
+
+unsigned DXILBitcodeWriter::getEncodedUnaryOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ llvm_unreachable("Unknown binary instruction!");
+ case Instruction::FNeg:
+ return bitc::UNOP_FNEG;
+ }
+}
+
+unsigned DXILBitcodeWriter::getEncodedBinaryOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ llvm_unreachable("Unknown binary instruction!");
+ case Instruction::Add:
+ case Instruction::FAdd:
+ return bitc::BINOP_ADD;
+ case Instruction::Sub:
+ case Instruction::FSub:
+ return bitc::BINOP_SUB;
+ case Instruction::Mul:
+ case Instruction::FMul:
+ return bitc::BINOP_MUL;
+ case Instruction::UDiv:
+ return bitc::BINOP_UDIV;
+ case Instruction::FDiv:
+ case Instruction::SDiv:
+ return bitc::BINOP_SDIV;
+ case Instruction::URem:
+ return bitc::BINOP_UREM;
+ case Instruction::FRem:
+ case Instruction::SRem:
+ return bitc::BINOP_SREM;
+ case Instruction::Shl:
+ return bitc::BINOP_SHL;
+ case Instruction::LShr:
+ return bitc::BINOP_LSHR;
+ case Instruction::AShr:
+ return bitc::BINOP_ASHR;
+ case Instruction::And:
+ return bitc::BINOP_AND;
+ case Instruction::Or:
+ return bitc::BINOP_OR;
+ case Instruction::Xor:
+ return bitc::BINOP_XOR;
+ }
+}
+
+unsigned DXILBitcodeWriter::getTypeID(Type *T, const Value *V) {
+ if (!T->isOpaquePointerTy())
+ return VE.getTypeID(T);
+ auto It = PointerMap.find(V);
+ if (It != PointerMap.end())
+ return VE.getTypeID(It->second);
+ return VE.getTypeID(I8PtrTy);
+}
+
+unsigned DXILBitcodeWriter::getTypeID(Type *T, const Function *F) {
+ auto It = PointerMap.find(F);
+ if (It != PointerMap.end())
+ return VE.getTypeID(It->second);
+ return VE.getTypeID(T);
+}
+
+unsigned DXILBitcodeWriter::getEncodedRMWOperation(AtomicRMWInst::BinOp Op) {
+ switch (Op) {
+ default:
+ llvm_unreachable("Unknown RMW operation!");
+ case AtomicRMWInst::Xchg:
+ return bitc::RMW_XCHG;
+ case AtomicRMWInst::Add:
+ return bitc::RMW_ADD;
+ case AtomicRMWInst::Sub:
+ return bitc::RMW_SUB;
+ case AtomicRMWInst::And:
+ return bitc::RMW_AND;
+ case AtomicRMWInst::Nand:
+ return bitc::RMW_NAND;
+ case AtomicRMWInst::Or:
+ return bitc::RMW_OR;
+ case AtomicRMWInst::Xor:
+ return bitc::RMW_XOR;
+ case AtomicRMWInst::Max:
+ return bitc::RMW_MAX;
+ case AtomicRMWInst::Min:
+ return bitc::RMW_MIN;
+ case AtomicRMWInst::UMax:
+ return bitc::RMW_UMAX;
+ case AtomicRMWInst::UMin:
+ return bitc::RMW_UMIN;
+ case AtomicRMWInst::FAdd:
+ return bitc::RMW_FADD;
+ case AtomicRMWInst::FSub:
+ return bitc::RMW_FSUB;
+ }
+}
+
+unsigned DXILBitcodeWriter::getEncodedOrdering(AtomicOrdering Ordering) {
+ switch (Ordering) {
+ case AtomicOrdering::NotAtomic:
+ return bitc::ORDERING_NOTATOMIC;
+ case AtomicOrdering::Unordered:
+ return bitc::ORDERING_UNORDERED;
+ case AtomicOrdering::Monotonic:
+ return bitc::ORDERING_MONOTONIC;
+ case AtomicOrdering::Acquire:
+ return bitc::ORDERING_ACQUIRE;
+ case AtomicOrdering::Release:
+ return bitc::ORDERING_RELEASE;
+ case AtomicOrdering::AcquireRelease:
+ return bitc::ORDERING_ACQREL;
+ case AtomicOrdering::SequentiallyConsistent:
+ return bitc::ORDERING_SEQCST;
+ }
+ llvm_unreachable("Invalid ordering");
+}
+
+void DXILBitcodeWriter::writeStringRecord(BitstreamWriter &Stream,
+ unsigned Code, StringRef Str,
+ unsigned AbbrevToUse) {
+ SmallVector<unsigned, 64> Vals;
+
+ // Code: [strchar x N]
+ for (char C : Str) {
+ if (AbbrevToUse && !BitCodeAbbrevOp::isChar6(C))
+ AbbrevToUse = 0;
+ Vals.push_back(C);
+ }
+
+ // Emit the finished record.
+ Stream.EmitRecord(Code, Vals, AbbrevToUse);
+}
+
+uint64_t DXILBitcodeWriter::getAttrKindEncoding(Attribute::AttrKind Kind) {
+ switch (Kind) {
+ case Attribute::Alignment:
+ return bitc::ATTR_KIND_ALIGNMENT;
+ case Attribute::AlwaysInline:
+ return bitc::ATTR_KIND_ALWAYS_INLINE;
+ case Attribute::ArgMemOnly:
+ return bitc::ATTR_KIND_ARGMEMONLY;
+ case Attribute::Builtin:
+ return bitc::ATTR_KIND_BUILTIN;
+ case Attribute::ByVal:
+ return bitc::ATTR_KIND_BY_VAL;
+ case Attribute::Convergent:
+ return bitc::ATTR_KIND_CONVERGENT;
+ case Attribute::InAlloca:
+ return bitc::ATTR_KIND_IN_ALLOCA;
+ case Attribute::Cold:
+ return bitc::ATTR_KIND_COLD;
+ case Attribute::InlineHint:
+ return bitc::ATTR_KIND_INLINE_HINT;
+ case Attribute::InReg:
+ return bitc::ATTR_KIND_IN_REG;
+ case Attribute::JumpTable:
+ return bitc::ATTR_KIND_JUMP_TABLE;
+ case Attribute::MinSize:
+ return bitc::ATTR_KIND_MIN_SIZE;
+ case Attribute::Naked:
+ return bitc::ATTR_KIND_NAKED;
+ case Attribute::Nest:
+ return bitc::ATTR_KIND_NEST;
+ case Attribute::NoAlias:
+ return bitc::ATTR_KIND_NO_ALIAS;
+ case Attribute::NoBuiltin:
+ return bitc::ATTR_KIND_NO_BUILTIN;
+ case Attribute::NoCapture:
+ return bitc::ATTR_KIND_NO_CAPTURE;
+ case Attribute::NoDuplicate:
+ return bitc::ATTR_KIND_NO_DUPLICATE;
+ case Attribute::NoImplicitFloat:
+ return bitc::ATTR_KIND_NO_IMPLICIT_FLOAT;
+ case Attribute::NoInline:
+ return bitc::ATTR_KIND_NO_INLINE;
+ case Attribute::NonLazyBind:
+ return bitc::ATTR_KIND_NON_LAZY_BIND;
+ case Attribute::NonNull:
+ return bitc::ATTR_KIND_NON_NULL;
+ case Attribute::Dereferenceable:
+ return bitc::ATTR_KIND_DEREFERENCEABLE;
+ case Attribute::DereferenceableOrNull:
+ return bitc::ATTR_KIND_DEREFERENCEABLE_OR_NULL;
+ case Attribute::NoRedZone:
+ return bitc::ATTR_KIND_NO_RED_ZONE;
+ case Attribute::NoReturn:
+ return bitc::ATTR_KIND_NO_RETURN;
+ case Attribute::NoUnwind:
+ return bitc::ATTR_KIND_NO_UNWIND;
+ case Attribute::OptimizeForSize:
+ return bitc::ATTR_KIND_OPTIMIZE_FOR_SIZE;
+ case Attribute::OptimizeNone:
+ return bitc::ATTR_KIND_OPTIMIZE_NONE;
+ case Attribute::ReadNone:
+ return bitc::ATTR_KIND_READ_NONE;
+ case Attribute::ReadOnly:
+ return bitc::ATTR_KIND_READ_ONLY;
+ case Attribute::Returned:
+ return bitc::ATTR_KIND_RETURNED;
+ case Attribute::ReturnsTwice:
+ return bitc::ATTR_KIND_RETURNS_TWICE;
+ case Attribute::SExt:
+ return bitc::ATTR_KIND_S_EXT;
+ case Attribute::StackAlignment:
+ return bitc::ATTR_KIND_STACK_ALIGNMENT;
+ case Attribute::StackProtect:
+ return bitc::ATTR_KIND_STACK_PROTECT;
+ case Attribute::StackProtectReq:
+ return bitc::ATTR_KIND_STACK_PROTECT_REQ;
+ case Attribute::StackProtectStrong:
+ return bitc::ATTR_KIND_STACK_PROTECT_STRONG;
+ case Attribute::SafeStack:
+ return bitc::ATTR_KIND_SAFESTACK;
+ case Attribute::StructRet:
+ return bitc::ATTR_KIND_STRUCT_RET;
+ case Attribute::SanitizeAddress:
+ return bitc::ATTR_KIND_SANITIZE_ADDRESS;
+ case Attribute::SanitizeThread:
+ return bitc::ATTR_KIND_SANITIZE_THREAD;
+ case Attribute::SanitizeMemory:
+ return bitc::ATTR_KIND_SANITIZE_MEMORY;
+ case Attribute::UWTable:
+ return bitc::ATTR_KIND_UW_TABLE;
+ case Attribute::ZExt:
+ return bitc::ATTR_KIND_Z_EXT;
+ case Attribute::EndAttrKinds:
+ llvm_unreachable("Can not encode end-attribute kinds marker.");
+ case Attribute::None:
+ llvm_unreachable("Can not encode none-attribute.");
+ case Attribute::EmptyKey:
+ case Attribute::TombstoneKey:
+ llvm_unreachable("Trying to encode EmptyKey/TombstoneKey");
+ default:
+ llvm_unreachable("Trying to encode attribute not supported by DXIL. These "
+ "should be stripped in DXILPrepare");
+ }
+
+ llvm_unreachable("Trying to encode unknown attribute");
+}
+
+void DXILBitcodeWriter::emitSignedInt64(SmallVectorImpl<uint64_t> &Vals,
+ uint64_t V) {
+ if ((int64_t)V >= 0)
+ Vals.push_back(V << 1);
+ else
+ Vals.push_back((-V << 1) | 1);
+}
+
+void DXILBitcodeWriter::emitWideAPInt(SmallVectorImpl<uint64_t> &Vals,
+ const APInt &A) {
+ // We have an arbitrary precision integer value to write whose
+ // bit width is > 64. However, in canonical unsigned integer
+ // format it is likely that the high bits are going to be zero.
+ // So, we only write the number of active words.
+ unsigned NumWords = A.getActiveWords();
+ const uint64_t *RawData = A.getRawData();
+ for (unsigned i = 0; i < NumWords; i++)
+ emitSignedInt64(Vals, RawData[i]);
+}
+
+uint64_t DXILBitcodeWriter::getOptimizationFlags(const Value *V) {
+ uint64_t Flags = 0;
+
+ if (const auto *OBO = dyn_cast<OverflowingBinaryOperator>(V)) {
+ if (OBO->hasNoSignedWrap())
+ Flags |= 1 << bitc::OBO_NO_SIGNED_WRAP;
+ if (OBO->hasNoUnsignedWrap())
+ Flags |= 1 << bitc::OBO_NO_UNSIGNED_WRAP;
+ } else if (const auto *PEO = dyn_cast<PossiblyExactOperator>(V)) {
+ if (PEO->isExact())
+ Flags |= 1 << bitc::PEO_EXACT;
+ } else if (const auto *FPMO = dyn_cast<FPMathOperator>(V)) {
+ if (FPMO->hasAllowReassoc())
+ Flags |= bitc::AllowReassoc;
+ if (FPMO->hasNoNaNs())
+ Flags |= bitc::NoNaNs;
+ if (FPMO->hasNoInfs())
+ Flags |= bitc::NoInfs;
+ if (FPMO->hasNoSignedZeros())
+ Flags |= bitc::NoSignedZeros;
+ if (FPMO->hasAllowReciprocal())
+ Flags |= bitc::AllowReciprocal;
+ if (FPMO->hasAllowContract())
+ Flags |= bitc::AllowContract;
+ if (FPMO->hasApproxFunc())
+ Flags |= bitc::ApproxFunc;
+ }
+
+ return Flags;
+}
+
+unsigned
+DXILBitcodeWriter::getEncodedLinkage(const GlobalValue::LinkageTypes Linkage) {
+ switch (Linkage) {
+ case GlobalValue::ExternalLinkage:
+ return 0;
+ case GlobalValue::WeakAnyLinkage:
+ return 16;
+ case GlobalValue::AppendingLinkage:
+ return 2;
+ case GlobalValue::InternalLinkage:
+ return 3;
+ case GlobalValue::LinkOnceAnyLinkage:
+ return 18;
+ case GlobalValue::ExternalWeakLinkage:
+ return 7;
+ case GlobalValue::CommonLinkage:
+ return 8;
+ case GlobalValue::PrivateLinkage:
+ return 9;
+ case GlobalValue::WeakODRLinkage:
+ return 17;
+ case GlobalValue::LinkOnceODRLinkage:
+ return 19;
+ case GlobalValue::AvailableExternallyLinkage:
+ return 12;
+ }
+ llvm_unreachable("Invalid linkage");
+}
+
+unsigned DXILBitcodeWriter::getEncodedLinkage(const GlobalValue &GV) {
+ return getEncodedLinkage(GV.getLinkage());
+}
+
+unsigned DXILBitcodeWriter::getEncodedVisibility(const GlobalValue &GV) {
+ switch (GV.getVisibility()) {
+ case GlobalValue::DefaultVisibility:
+ return 0;
+ case GlobalValue::HiddenVisibility:
+ return 1;
+ case GlobalValue::ProtectedVisibility:
+ return 2;
+ }
+ llvm_unreachable("Invalid visibility");
+}
+
+unsigned DXILBitcodeWriter::getEncodedDLLStorageClass(const GlobalValue &GV) {
+ switch (GV.getDLLStorageClass()) {
+ case GlobalValue::DefaultStorageClass:
+ return 0;
+ case GlobalValue::DLLImportStorageClass:
+ return 1;
+ case GlobalValue::DLLExportStorageClass:
+ return 2;
+ }
+ llvm_unreachable("Invalid DLL storage class");
+}
+
+unsigned DXILBitcodeWriter::getEncodedThreadLocalMode(const GlobalValue &GV) {
+ switch (GV.getThreadLocalMode()) {
+ case GlobalVariable::NotThreadLocal:
+ return 0;
+ case GlobalVariable::GeneralDynamicTLSModel:
+ return 1;
+ case GlobalVariable::LocalDynamicTLSModel:
+ return 2;
+ case GlobalVariable::InitialExecTLSModel:
+ return 3;
+ case GlobalVariable::LocalExecTLSModel:
+ return 4;
+ }
+ llvm_unreachable("Invalid TLS model");
+}
+
+unsigned DXILBitcodeWriter::getEncodedComdatSelectionKind(const Comdat &C) {
+ switch (C.getSelectionKind()) {
+ case Comdat::Any:
+ return bitc::COMDAT_SELECTION_KIND_ANY;
+ case Comdat::ExactMatch:
+ return bitc::COMDAT_SELECTION_KIND_EXACT_MATCH;
+ case Comdat::Largest:
+ return bitc::COMDAT_SELECTION_KIND_LARGEST;
+ case Comdat::NoDeduplicate:
+ return bitc::COMDAT_SELECTION_KIND_NO_DUPLICATES;
+ case Comdat::SameSize:
+ return bitc::COMDAT_SELECTION_KIND_SAME_SIZE;
+ }
+ llvm_unreachable("Invalid selection kind");
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Begin DXILBitcodeWriter Implementation
+////////////////////////////////////////////////////////////////////////////////
+
+void DXILBitcodeWriter::writeAttributeGroupTable() {
+ const std::vector<ValueEnumerator::IndexAndAttrSet> &AttrGrps =
+ VE.getAttributeGroups();
+ if (AttrGrps.empty())
+ return;
+
+ Stream.EnterSubblock(bitc::PARAMATTR_GROUP_BLOCK_ID, 3);
+
+ SmallVector<uint64_t, 64> Record;
+ for (ValueEnumerator::IndexAndAttrSet Pair : AttrGrps) {
+ unsigned AttrListIndex = Pair.first;
+ AttributeSet AS = Pair.second;
+ Record.push_back(VE.getAttributeGroupID(Pair));
+ Record.push_back(AttrListIndex);
+
+ for (Attribute Attr : AS) {
+ if (Attr.isEnumAttribute()) {
+ uint64_t Val = getAttrKindEncoding(Attr.getKindAsEnum());
+ assert(Val <= bitc::ATTR_KIND_ARGMEMONLY &&
+ "DXIL does not support attributes above ATTR_KIND_ARGMEMONLY");
+ Record.push_back(0);
+ Record.push_back(Val);
+ } else if (Attr.isIntAttribute()) {
+ uint64_t Val = getAttrKindEncoding(Attr.getKindAsEnum());
+ assert(Val <= bitc::ATTR_KIND_ARGMEMONLY &&
+ "DXIL does not support attributes above ATTR_KIND_ARGMEMONLY");
+ Record.push_back(1);
+ Record.push_back(Val);
+ Record.push_back(Attr.getValueAsInt());
+ } else {
+ StringRef Kind = Attr.getKindAsString();
+ StringRef Val = Attr.getValueAsString();
+
+ Record.push_back(Val.empty() ? 3 : 4);
+ Record.append(Kind.begin(), Kind.end());
+ Record.push_back(0);
+ if (!Val.empty()) {
+ Record.append(Val.begin(), Val.end());
+ Record.push_back(0);
+ }
+ }
+ }
+
+ Stream.EmitRecord(bitc::PARAMATTR_GRP_CODE_ENTRY, Record);
+ Record.clear();
+ }
+
+ Stream.ExitBlock();
+}
+
+void DXILBitcodeWriter::writeAttributeTable() {
+ const std::vector<AttributeList> &Attrs = VE.getAttributeLists();
+ if (Attrs.empty())
+ return;
+
+ Stream.EnterSubblock(bitc::PARAMATTR_BLOCK_ID, 3);
+
+ SmallVector<uint64_t, 64> Record;
+ for (unsigned i = 0, e = Attrs.size(); i != e; ++i) {
+ AttributeList AL = Attrs[i];
+ for (unsigned i : AL.indexes()) {
+ AttributeSet AS = AL.getAttributes(i);
+ if (AS.hasAttributes())
+ Record.push_back(VE.getAttributeGroupID({i, AS}));
+ }
+
+ Stream.EmitRecord(bitc::PARAMATTR_CODE_ENTRY, Record);
+ Record.clear();
+ }
+
+ Stream.ExitBlock();
+}
+
+/// WriteTypeTable - Write out the type table for a module.
+void DXILBitcodeWriter::writeTypeTable() {
+ const ValueEnumerator::TypeList &TypeList = VE.getTypes();
+
+ Stream.EnterSubblock(bitc::TYPE_BLOCK_ID_NEW, 4 /*count from # abbrevs */);
+ SmallVector<uint64_t, 64> TypeVals;
+
+ uint64_t NumBits = VE.computeBitsRequiredForTypeIndicies();
+
+ // Abbrev for TYPE_CODE_POINTER.
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_POINTER));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
+ Abbv->Add(BitCodeAbbrevOp(0)); // Addrspace = 0
+ unsigned PtrAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+
+ // Abbrev for TYPE_CODE_FUNCTION.
+ Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_FUNCTION));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // isvararg
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
+ unsigned FunctionAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+
+ // Abbrev for TYPE_CODE_STRUCT_ANON.
+ Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_ANON));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // ispacked
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
+ unsigned StructAnonAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+
+ // Abbrev for TYPE_CODE_STRUCT_NAME.
+ Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_NAME));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
+ unsigned StructNameAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+
+ // Abbrev for TYPE_CODE_STRUCT_NAMED.
+ Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_NAMED));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // ispacked
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
+ unsigned StructNamedAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+
+ // Abbrev for TYPE_CODE_ARRAY.
+ Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_ARRAY));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // size
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
+ unsigned ArrayAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+
+ // Emit an entry count so the reader can reserve space.
+ TypeVals.push_back(TypeList.size());
+ Stream.EmitRecord(bitc::TYPE_CODE_NUMENTRY, TypeVals);
+ TypeVals.clear();
+
+ // Loop over all of the types, emitting each in turn.
+ for (Type *T : TypeList) {
+ int AbbrevToUse = 0;
+ unsigned Code = 0;
+
+ switch (T->getTypeID()) {
+ case Type::BFloatTyID:
+ case Type::X86_AMXTyID:
+ case Type::TokenTyID:
+ llvm_unreachable("These should never be used!!!");
+ break;
+ case Type::VoidTyID:
+ Code = bitc::TYPE_CODE_VOID;
+ break;
+ case Type::HalfTyID:
+ Code = bitc::TYPE_CODE_HALF;
+ break;
+ case Type::FloatTyID:
+ Code = bitc::TYPE_CODE_FLOAT;
+ break;
+ case Type::DoubleTyID:
+ Code = bitc::TYPE_CODE_DOUBLE;
+ break;
+ case Type::X86_FP80TyID:
+ Code = bitc::TYPE_CODE_X86_FP80;
+ break;
+ case Type::FP128TyID:
+ Code = bitc::TYPE_CODE_FP128;
+ break;
+ case Type::PPC_FP128TyID:
+ Code = bitc::TYPE_CODE_PPC_FP128;
+ break;
+ case Type::LabelTyID:
+ Code = bitc::TYPE_CODE_LABEL;
+ break;
+ case Type::MetadataTyID:
+ Code = bitc::TYPE_CODE_METADATA;
+ break;
+ case Type::X86_MMXTyID:
+ Code = bitc::TYPE_CODE_X86_MMX;
+ break;
+ case Type::IntegerTyID:
+ // INTEGER: [width]
+ Code = bitc::TYPE_CODE_INTEGER;
+ TypeVals.push_back(cast<IntegerType>(T)->getBitWidth());
+ break;
+ case Type::DXILPointerTyID: {
+ TypedPointerType *PTy = cast<TypedPointerType>(T);
+ // POINTER: [pointee type, address space]
+ Code = bitc::TYPE_CODE_POINTER;
+ TypeVals.push_back(getTypeID(PTy->getElementType()));
+ unsigned AddressSpace = PTy->getAddressSpace();
+ TypeVals.push_back(AddressSpace);
+ if (AddressSpace == 0)
+ AbbrevToUse = PtrAbbrev;
+ break;
+ }
+ case Type::PointerTyID: {
+ PointerType *PTy = cast<PointerType>(T);
+ // POINTER: [pointee type, address space]
+ Code = bitc::TYPE_CODE_POINTER;
+ // Emitting an empty struct type for the opaque pointer's type allows
+ // this to be order-independent. Non-struct types must be emitted in
+ // bitcode before they can be referenced.
+ if (PTy->isOpaquePointerTy()) {
+ TypeVals.push_back(false);
+ Code = bitc::TYPE_CODE_OPAQUE;
+ writeStringRecord(Stream, bitc::TYPE_CODE_STRUCT_NAME,
+ "dxilOpaquePtrReservedName", StructNameAbbrev);
+ } else {
+ TypeVals.push_back(getTypeID(PTy->getNonOpaquePointerElementType()));
+ unsigned AddressSpace = PTy->getAddressSpace();
+ TypeVals.push_back(AddressSpace);
+ if (AddressSpace == 0)
+ AbbrevToUse = PtrAbbrev;
+ }
+ break;
+ }
+ case Type::FunctionTyID: {
+ FunctionType *FT = cast<FunctionType>(T);
+ // FUNCTION: [isvararg, retty, paramty x N]
+ Code = bitc::TYPE_CODE_FUNCTION;
+ TypeVals.push_back(FT->isVarArg());
+ TypeVals.push_back(getTypeID(FT->getReturnType()));
+ for (Type *PTy : FT->params())
+ TypeVals.push_back(getTypeID(PTy));
+ AbbrevToUse = FunctionAbbrev;
+ break;
+ }
+ case Type::StructTyID: {
+ StructType *ST = cast<StructType>(T);
+ // STRUCT: [ispacked, eltty x N]
+ TypeVals.push_back(ST->isPacked());
+ // Output all of the element types.
+ for (Type *ElTy : ST->elements())
+ TypeVals.push_back(getTypeID(ElTy));
+
+ if (ST->isLiteral()) {
+ Code = bitc::TYPE_CODE_STRUCT_ANON;
+ AbbrevToUse = StructAnonAbbrev;
+ } else {
+ if (ST->isOpaque()) {
+ Code = bitc::TYPE_CODE_OPAQUE;
+ } else {
+ Code = bitc::TYPE_CODE_STRUCT_NAMED;
+ AbbrevToUse = StructNamedAbbrev;
+ }
+
+ // Emit the name if it is present.
+ if (!ST->getName().empty())
+ writeStringRecord(Stream, bitc::TYPE_CODE_STRUCT_NAME, ST->getName(),
+ StructNameAbbrev);
+ }
+ break;
+ }
+ case Type::ArrayTyID: {
+ ArrayType *AT = cast<ArrayType>(T);
+ // ARRAY: [numelts, eltty]
+ Code = bitc::TYPE_CODE_ARRAY;
+ TypeVals.push_back(AT->getNumElements());
+ TypeVals.push_back(getTypeID(AT->getElementType()));
+ AbbrevToUse = ArrayAbbrev;
+ break;
+ }
+ case Type::FixedVectorTyID:
+ case Type::ScalableVectorTyID: {
+ VectorType *VT = cast<VectorType>(T);
+ // VECTOR [numelts, eltty]
+ Code = bitc::TYPE_CODE_VECTOR;
+ TypeVals.push_back(VT->getElementCount().getKnownMinValue());
+ TypeVals.push_back(getTypeID(VT->getElementType()));
+ break;
+ }
+ }
+
+ // Emit the finished record.
+ Stream.EmitRecord(Code, TypeVals, AbbrevToUse);
+ TypeVals.clear();
+ }
+
+ Stream.ExitBlock();
+}
+
+void DXILBitcodeWriter::writeComdats() {
+ SmallVector<uint16_t, 64> Vals;
+ for (const Comdat *C : VE.getComdats()) {
+ // COMDAT: [selection_kind, name]
+ Vals.push_back(getEncodedComdatSelectionKind(*C));
+ size_t Size = C->getName().size();
+ assert(isUInt<16>(Size));
+ Vals.push_back(Size);
+ for (char Chr : C->getName())
+ Vals.push_back((unsigned char)Chr);
+ Stream.EmitRecord(bitc::MODULE_CODE_COMDAT, Vals, /*AbbrevToUse=*/0);
+ Vals.clear();
+ }
+}
+
+void DXILBitcodeWriter::writeValueSymbolTableForwardDecl() {}
+
+/// Emit top-level description of module, including target triple, inline asm,
+/// descriptors for global variables, and function prototype info.
+/// Returns the bit offset to backpatch with the location of the real VST.
+void DXILBitcodeWriter::writeModuleInfo() {
+ // Emit various pieces of data attached to a module.
+ if (!M.getTargetTriple().empty())
+ writeStringRecord(Stream, bitc::MODULE_CODE_TRIPLE, M.getTargetTriple(),
+ 0 /*TODO*/);
+ const std::string &DL = M.getDataLayoutStr();
+ if (!DL.empty())
+ writeStringRecord(Stream, bitc::MODULE_CODE_DATALAYOUT, DL, 0 /*TODO*/);
+ if (!M.getModuleInlineAsm().empty())
+ writeStringRecord(Stream, bitc::MODULE_CODE_ASM, M.getModuleInlineAsm(),
+ 0 /*TODO*/);
+
+ // Emit information about sections and GC, computing how many there are. Also
+ // compute the maximum alignment value.
+ std::map<std::string, unsigned> SectionMap;
+ std::map<std::string, unsigned> GCMap;
+ MaybeAlign MaxAlignment;
+ unsigned MaxGlobalType = 0;
+ const auto UpdateMaxAlignment = [&MaxAlignment](const MaybeAlign A) {
+ if (A)
+ MaxAlignment = !MaxAlignment ? *A : std::max(*MaxAlignment, *A);
+ };
+ for (const GlobalVariable &GV : M.globals()) {
+ UpdateMaxAlignment(GV.getAlign());
+ MaxGlobalType = std::max(MaxGlobalType, getTypeID(GV.getValueType(), &GV));
+ if (GV.hasSection()) {
+ // Give section names unique ID's.
+ unsigned &Entry = SectionMap[std::string(GV.getSection())];
+ if (!Entry) {
+ writeStringRecord(Stream, bitc::MODULE_CODE_SECTIONNAME,
+ GV.getSection(), 0 /*TODO*/);
+ Entry = SectionMap.size();
+ }
+ }
+ }
+ for (const Function &F : M) {
+ UpdateMaxAlignment(F.getAlign());
+ if (F.hasSection()) {
+ // Give section names unique ID's.
+ unsigned &Entry = SectionMap[std::string(F.getSection())];
+ if (!Entry) {
+ writeStringRecord(Stream, bitc::MODULE_CODE_SECTIONNAME, F.getSection(),
+ 0 /*TODO*/);
+ Entry = SectionMap.size();
+ }
+ }
+ if (F.hasGC()) {
+ // Same for GC names.
+ unsigned &Entry = GCMap[F.getGC()];
+ if (!Entry) {
+ writeStringRecord(Stream, bitc::MODULE_CODE_GCNAME, F.getGC(),
+ 0 /*TODO*/);
+ Entry = GCMap.size();
+ }
+ }
+ }
+
+ // Emit abbrev for globals, now that we know # sections and max alignment.
+ unsigned SimpleGVarAbbrev = 0;
+ if (!M.global_empty()) {
+ // Add an abbrev for common globals with no visibility or thread
+ // localness.
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_GLOBALVAR));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+ Log2_32_Ceil(MaxGlobalType + 1)));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // AddrSpace << 2
+ //| explicitType << 1
+ //| constant
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Initializer.
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 5)); // Linkage.
+ if (!MaxAlignment) // Alignment.
+ Abbv->Add(BitCodeAbbrevOp(0));
+ else {
+ unsigned MaxEncAlignment = getEncodedAlign(MaxAlignment);
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+ Log2_32_Ceil(MaxEncAlignment + 1)));
+ }
+ if (SectionMap.empty()) // Section.
+ Abbv->Add(BitCodeAbbrevOp(0));
+ else
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+ Log2_32_Ceil(SectionMap.size() + 1)));
+ // Don't bother emitting vis + thread local.
+ SimpleGVarAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+ }
+
+ // Emit the global variable information.
+ SmallVector<unsigned, 64> Vals;
+ for (const GlobalVariable &GV : M.globals()) {
+ unsigned AbbrevToUse = 0;
+
+ // GLOBALVAR: [type, isconst, initid,
+ // linkage, alignment, section, visibility, threadlocal,
+ // unnamed_addr, externally_initialized, dllstorageclass,
+ // comdat]
+ Vals.push_back(getTypeID(GV.getValueType(), &GV));
+ Vals.push_back(
+ GV.getType()->getAddressSpace() << 2 | 2 |
+ (GV.isConstant() ? 1 : 0)); // HLSL Change - bitwise | was used with
+ // unsigned int and bool
+ Vals.push_back(
+ GV.isDeclaration() ? 0 : (VE.getValueID(GV.getInitializer()) + 1));
+ Vals.push_back(getEncodedLinkage(GV));
+ Vals.push_back(getEncodedAlign(GV.getAlign()));
+ Vals.push_back(GV.hasSection() ? SectionMap[std::string(GV.getSection())]
+ : 0);
+ if (GV.isThreadLocal() ||
+ GV.getVisibility() != GlobalValue::DefaultVisibility ||
+ GV.getUnnamedAddr() != GlobalValue::UnnamedAddr::None ||
+ GV.isExternallyInitialized() ||
+ GV.getDLLStorageClass() != GlobalValue::DefaultStorageClass ||
+ GV.hasComdat()) {
+ Vals.push_back(getEncodedVisibility(GV));
+ Vals.push_back(getEncodedThreadLocalMode(GV));
+ Vals.push_back(GV.getUnnamedAddr() != GlobalValue::UnnamedAddr::None);
+ Vals.push_back(GV.isExternallyInitialized());
+ Vals.push_back(getEncodedDLLStorageClass(GV));
+ Vals.push_back(GV.hasComdat() ? VE.getComdatID(GV.getComdat()) : 0);
+ } else {
+ AbbrevToUse = SimpleGVarAbbrev;
+ }
+
+ Stream.EmitRecord(bitc::MODULE_CODE_GLOBALVAR, Vals, AbbrevToUse);
+ Vals.clear();
+ }
+
+ // Emit the function proto information.
+ for (const Function &F : M) {
+ // FUNCTION: [type, callingconv, isproto, linkage, paramattrs, alignment,
+ // section, visibility, gc, unnamed_addr, prologuedata,
+ // dllstorageclass, comdat, prefixdata, personalityfn]
+ Vals.push_back(getTypeID(F.getFunctionType(), &F));
+ Vals.push_back(F.getCallingConv());
+ Vals.push_back(F.isDeclaration());
+ Vals.push_back(getEncodedLinkage(F));
+ Vals.push_back(VE.getAttributeListID(F.getAttributes()));
+ Vals.push_back(getEncodedAlign(F.getAlign()));
+ Vals.push_back(F.hasSection() ? SectionMap[std::string(F.getSection())]
+ : 0);
+ Vals.push_back(getEncodedVisibility(F));
+ Vals.push_back(F.hasGC() ? GCMap[F.getGC()] : 0);
+ Vals.push_back(F.getUnnamedAddr() != GlobalValue::UnnamedAddr::None);
+ Vals.push_back(
+ F.hasPrologueData() ? (VE.getValueID(F.getPrologueData()) + 1) : 0);
+ Vals.push_back(getEncodedDLLStorageClass(F));
+ Vals.push_back(F.hasComdat() ? VE.getComdatID(F.getComdat()) : 0);
+ Vals.push_back(F.hasPrefixData() ? (VE.getValueID(F.getPrefixData()) + 1)
+ : 0);
+ Vals.push_back(
+ F.hasPersonalityFn() ? (VE.getValueID(F.getPersonalityFn()) + 1) : 0);
+
+ unsigned AbbrevToUse = 0;
+ Stream.EmitRecord(bitc::MODULE_CODE_FUNCTION, Vals, AbbrevToUse);
+ Vals.clear();
+ }
+
+ // Emit the alias information.
+ for (const GlobalAlias &A : M.aliases()) {
+ // ALIAS: [alias type, aliasee val#, linkage, visibility]
+ Vals.push_back(getTypeID(A.getValueType(), &A));
+ Vals.push_back(VE.getValueID(A.getAliasee()));
+ Vals.push_back(getEncodedLinkage(A));
+ Vals.push_back(getEncodedVisibility(A));
+ Vals.push_back(getEncodedDLLStorageClass(A));
+ Vals.push_back(getEncodedThreadLocalMode(A));
+ Vals.push_back(A.getUnnamedAddr() != GlobalValue::UnnamedAddr::None);
+ unsigned AbbrevToUse = 0;
+ Stream.EmitRecord(bitc::MODULE_CODE_ALIAS_OLD, Vals, AbbrevToUse);
+ Vals.clear();
+ }
+}
+
+void DXILBitcodeWriter::writeValueAsMetadata(
+ const ValueAsMetadata *MD, SmallVectorImpl<uint64_t> &Record) {
+ // Mimic an MDNode with a value as one operand.
+ Value *V = MD->getValue();
+ Type *Ty = V->getType();
+ if (Function *F = dyn_cast<Function>(V))
+ Ty = TypedPointerType::get(F->getFunctionType(), F->getAddressSpace());
+ else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+ Ty = TypedPointerType::get(GV->getValueType(), GV->getAddressSpace());
+ Record.push_back(getTypeID(Ty));
+ Record.push_back(VE.getValueID(V));
+ Stream.EmitRecord(bitc::METADATA_VALUE, Record, 0);
+ Record.clear();
+}
+
+void DXILBitcodeWriter::writeMDTuple(const MDTuple *N,
+ SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev) {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ Metadata *MD = N->getOperand(i);
+ assert(!(MD && isa<LocalAsMetadata>(MD)) &&
+ "Unexpected function-local metadata");
+ Record.push_back(VE.getMetadataOrNullID(MD));
+ }
+ Stream.EmitRecord(N->isDistinct() ? bitc::METADATA_DISTINCT_NODE
+ : bitc::METADATA_NODE,
+ Record, Abbrev);
+ Record.clear();
+}
+
+void DXILBitcodeWriter::writeDILocation(const DILocation *N,
+ SmallVectorImpl<uint64_t> &Record,
+ unsigned &Abbrev) {
+ if (!Abbrev)
+ Abbrev = createDILocationAbbrev();
+ Record.push_back(N->isDistinct());
+ Record.push_back(N->getLine());
+ Record.push_back(N->getColumn());
+ Record.push_back(VE.getMetadataID(N->getScope()));
+ Record.push_back(VE.getMetadataOrNullID(N->getInlinedAt()));
+
+ Stream.EmitRecord(bitc::METADATA_LOCATION, Record, Abbrev);
+ Record.clear();
+}
+
+static uint64_t rotateSign(APInt Val) {
+ int64_t I = Val.getSExtValue();
+ uint64_t U = I;
+ return I < 0 ? ~(U << 1) : U << 1;
+}
+
+static uint64_t rotateSign(DISubrange::BoundType Val) {
+ return rotateSign(Val.get<ConstantInt *>()->getValue());
+}
+
+void DXILBitcodeWriter::writeDISubrange(const DISubrange *N,
+ SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev) {
+ Record.push_back(N->isDistinct());
+ Record.push_back(
+ N->getCount().get<ConstantInt *>()->getValue().getSExtValue());
+ Record.push_back(rotateSign(N->getLowerBound()));
+
+ Stream.EmitRecord(bitc::METADATA_SUBRANGE, Record, Abbrev);
+ Record.clear();
+}
+
+void DXILBitcodeWriter::writeDIEnumerator(const DIEnumerator *N,
+ SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev) {
+ Record.push_back(N->isDistinct());
+ Record.push_back(rotateSign(N->getValue()));
+ Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
+
+ Stream.EmitRecord(bitc::METADATA_ENUMERATOR, Record, Abbrev);
+ Record.clear();
+}
+
+void DXILBitcodeWriter::writeDIBasicType(const DIBasicType *N,
+ SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev) {
+ Record.push_back(N->isDistinct());
+ Record.push_back(N->getTag());
+ Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
+ Record.push_back(N->getSizeInBits());
+ Record.push_back(N->getAlignInBits());
+ Record.push_back(N->getEncoding());
+
+ Stream.EmitRecord(bitc::METADATA_BASIC_TYPE, Record, Abbrev);
+ Record.clear();
+}
+
+void DXILBitcodeWriter::writeDIDerivedType(const DIDerivedType *N,
+ SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev) {
+ Record.push_back(N->isDistinct());
+ Record.push_back(N->getTag());
+ Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
+ Record.push_back(VE.getMetadataOrNullID(N->getFile()));
+ Record.push_back(N->getLine());
+ Record.push_back(VE.getMetadataOrNullID(N->getScope()));
+ Record.push_back(VE.getMetadataOrNullID(N->getBaseType()));
+ Record.push_back(N->getSizeInBits());
+ Record.push_back(N->getAlignInBits());
+ Record.push_back(N->getOffsetInBits());
+ Record.push_back(N->getFlags());
+ Record.push_back(VE.getMetadataOrNullID(N->getExtraData()));
+
+ Stream.EmitRecord(bitc::METADATA_DERIVED_TYPE, Record, Abbrev);
+ Record.clear();
+}
+
+void DXILBitcodeWriter::writeDICompositeType(const DICompositeType *N,
+ SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev) {
+ Record.push_back(N->isDistinct());
+ Record.push_back(N->getTag());
+ Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
+ Record.push_back(VE.getMetadataOrNullID(N->getFile()));
+ Record.push_back(N->getLine());
+ Record.push_back(VE.getMetadataOrNullID(N->getScope()));
+ Record.push_back(VE.getMetadataOrNullID(N->getBaseType()));
+ Record.push_back(N->getSizeInBits());
+ Record.push_back(N->getAlignInBits());
+ Record.push_back(N->getOffsetInBits());
+ Record.push_back(N->getFlags());
+ Record.push_back(VE.getMetadataOrNullID(N->getElements().get()));
+ Record.push_back(N->getRuntimeLang());
+ Record.push_back(VE.getMetadataOrNullID(N->getVTableHolder()));
+ Record.push_back(VE.getMetadataOrNullID(N->getTemplateParams().get()));
+ Record.push_back(VE.getMetadataOrNullID(N->getRawIdentifier()));
+
+ Stream.EmitRecord(bitc::METADATA_COMPOSITE_TYPE, Record, Abbrev);
+ Record.clear();
+}
+
+void DXILBitcodeWriter::writeDISubroutineType(const DISubroutineType *N,
+ SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev) {
+ Record.push_back(N->isDistinct());
+ Record.push_back(N->getFlags());
+ Record.push_back(VE.getMetadataOrNullID(N->getTypeArray().get()));
+
+ Stream.EmitRecord(bitc::METADATA_SUBROUTINE_TYPE, Record, Abbrev);
+ Record.clear();
+}
+
+void DXILBitcodeWriter::writeDIFile(const DIFile *N,
+ SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev) {
+ Record.push_back(N->isDistinct());
+ Record.push_back(VE.getMetadataOrNullID(N->getRawFilename()));
+ Record.push_back(VE.getMetadataOrNullID(N->getRawDirectory()));
+
+ Stream.EmitRecord(bitc::METADATA_FILE, Record, Abbrev);
+ Record.clear();
+}
+
+void DXILBitcodeWriter::writeDICompileUnit(const DICompileUnit *N,
+ SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev) {
+ Record.push_back(N->isDistinct());
+ Record.push_back(N->getSourceLanguage());
+ Record.push_back(VE.getMetadataOrNullID(N->getFile()));
+ Record.push_back(VE.getMetadataOrNullID(N->getRawProducer()));
+ Record.push_back(N->isOptimized());
+ Record.push_back(VE.getMetadataOrNullID(N->getRawFlags()));
+ Record.push_back(N->getRuntimeVersion());
+ Record.push_back(VE.getMetadataOrNullID(N->getRawSplitDebugFilename()));
+ Record.push_back(N->getEmissionKind());
+ Record.push_back(VE.getMetadataOrNullID(N->getEnumTypes().get()));
+ Record.push_back(VE.getMetadataOrNullID(N->getRetainedTypes().get()));
+ Record.push_back(/* subprograms */ 0);
+ Record.push_back(VE.getMetadataOrNullID(N->getGlobalVariables().get()));
+ Record.push_back(VE.getMetadataOrNullID(N->getImportedEntities().get()));
+ Record.push_back(N->getDWOId());
+
+ Stream.EmitRecord(bitc::METADATA_COMPILE_UNIT, Record, Abbrev);
+ Record.clear();
+}
+
+void DXILBitcodeWriter::writeDISubprogram(const DISubprogram *N,
+ SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev) {
+ Record.push_back(N->isDistinct());
+ Record.push_back(VE.getMetadataOrNullID(N->getScope()));
+ Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
+ Record.push_back(VE.getMetadataOrNullID(N->getRawLinkageName()));
+ Record.push_back(VE.getMetadataOrNullID(N->getFile()));
+ Record.push_back(N->getLine());
+ Record.push_back(VE.getMetadataOrNullID(N->getType()));
+ Record.push_back(N->isLocalToUnit());
+ Record.push_back(N->isDefinition());
+ Record.push_back(N->getScopeLine());
+ Record.push_back(VE.getMetadataOrNullID(N->getContainingType()));
+ Record.push_back(N->getVirtuality());
+ Record.push_back(N->getVirtualIndex());
+ Record.push_back(N->getFlags());
+ Record.push_back(N->isOptimized());
+ Record.push_back(VE.getMetadataOrNullID(N->getRawUnit()));
+ Record.push_back(VE.getMetadataOrNullID(N->getTemplateParams().get()));
+ Record.push_back(VE.getMetadataOrNullID(N->getDeclaration()));
+ Record.push_back(VE.getMetadataOrNullID(N->getRetainedNodes().get()));
+
+ Stream.EmitRecord(bitc::METADATA_SUBPROGRAM, Record, Abbrev);
+ Record.clear();
+}
+
+void DXILBitcodeWriter::writeDILexicalBlock(const DILexicalBlock *N,
+ SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev) {
+ Record.push_back(N->isDistinct());
+ Record.push_back(VE.getMetadataOrNullID(N->getScope()));
+ Record.push_back(VE.getMetadataOrNullID(N->getFile()));
+ Record.push_back(N->getLine());
+ Record.push_back(N->getColumn());
+
+ Stream.EmitRecord(bitc::METADATA_LEXICAL_BLOCK, Record, Abbrev);
+ Record.clear();
+}
+
+void DXILBitcodeWriter::writeDILexicalBlockFile(
+ const DILexicalBlockFile *N, SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev) {
+ Record.push_back(N->isDistinct());
+ Record.push_back(VE.getMetadataOrNullID(N->getScope()));
+ Record.push_back(VE.getMetadataOrNullID(N->getFile()));
+ Record.push_back(N->getDiscriminator());
+
+ Stream.EmitRecord(bitc::METADATA_LEXICAL_BLOCK_FILE, Record, Abbrev);
+ Record.clear();
+}
+
+void DXILBitcodeWriter::writeDINamespace(const DINamespace *N,
+ SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev) {
+ Record.push_back(N->isDistinct());
+ Record.push_back(VE.getMetadataOrNullID(N->getScope()));
+ Record.push_back(VE.getMetadataOrNullID(N->getFile()));
+ Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
+ Record.push_back(/* line number */ 0);
+
+ Stream.EmitRecord(bitc::METADATA_NAMESPACE, Record, Abbrev);
+ Record.clear();
+}
+
+void DXILBitcodeWriter::writeDIModule(const DIModule *N,
+ SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev) {
+ Record.push_back(N->isDistinct());
+ for (auto &I : N->operands())
+ Record.push_back(VE.getMetadataOrNullID(I));
+
+ Stream.EmitRecord(bitc::METADATA_MODULE, Record, Abbrev);
+ Record.clear();
+}
+
+void DXILBitcodeWriter::writeDITemplateTypeParameter(
+ const DITemplateTypeParameter *N, SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev) {
+ Record.push_back(N->isDistinct());
+ Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
+ Record.push_back(VE.getMetadataOrNullID(N->getType()));
+
+ Stream.EmitRecord(bitc::METADATA_TEMPLATE_TYPE, Record, Abbrev);
+ Record.clear();
+}
+
+void DXILBitcodeWriter::writeDITemplateValueParameter(
+ const DITemplateValueParameter *N, SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev) {
+ Record.push_back(N->isDistinct());
+ Record.push_back(N->getTag());
+ Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
+ Record.push_back(VE.getMetadataOrNullID(N->getType()));
+ Record.push_back(VE.getMetadataOrNullID(N->getValue()));
+
+ Stream.EmitRecord(bitc::METADATA_TEMPLATE_VALUE, Record, Abbrev);
+ Record.clear();
+}
+
+void DXILBitcodeWriter::writeDIGlobalVariable(const DIGlobalVariable *N,
+ SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev) {
+ Record.push_back(N->isDistinct());
+ Record.push_back(VE.getMetadataOrNullID(N->getScope()));
+ Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
+ Record.push_back(VE.getMetadataOrNullID(N->getRawLinkageName()));
+ Record.push_back(VE.getMetadataOrNullID(N->getFile()));
+ Record.push_back(N->getLine());
+ Record.push_back(VE.getMetadataOrNullID(N->getType()));
+ Record.push_back(N->isLocalToUnit());
+ Record.push_back(N->isDefinition());
+ Record.push_back(/* N->getRawVariable() */ 0);
+ Record.push_back(VE.getMetadataOrNullID(N->getStaticDataMemberDeclaration()));
+
+ Stream.EmitRecord(bitc::METADATA_GLOBAL_VAR, Record, Abbrev);
+ Record.clear();
+}
+
+void DXILBitcodeWriter::writeDILocalVariable(const DILocalVariable *N,
+ SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev) {
+ Record.push_back(N->isDistinct());
+ Record.push_back(N->getTag());
+ Record.push_back(VE.getMetadataOrNullID(N->getScope()));
+ Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
+ Record.push_back(VE.getMetadataOrNullID(N->getFile()));
+ Record.push_back(N->getLine());
+ Record.push_back(VE.getMetadataOrNullID(N->getType()));
+ Record.push_back(N->getArg());
+ Record.push_back(N->getFlags());
+
+ Stream.EmitRecord(bitc::METADATA_LOCAL_VAR, Record, Abbrev);
+ Record.clear();
+}
+
+void DXILBitcodeWriter::writeDIExpression(const DIExpression *N,
+ SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev) {
+ Record.reserve(N->getElements().size() + 1);
+
+ Record.push_back(N->isDistinct());
+ Record.append(N->elements_begin(), N->elements_end());
+
+ Stream.EmitRecord(bitc::METADATA_EXPRESSION, Record, Abbrev);
+ Record.clear();
+}
+
+void DXILBitcodeWriter::writeDIObjCProperty(const DIObjCProperty *N,
+ SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev) {
+ llvm_unreachable("DXIL does not support objc!!!");
+}
+
+void DXILBitcodeWriter::writeDIImportedEntity(const DIImportedEntity *N,
+ SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev) {
+ Record.push_back(N->isDistinct());
+ Record.push_back(N->getTag());
+ Record.push_back(VE.getMetadataOrNullID(N->getScope()));
+ Record.push_back(VE.getMetadataOrNullID(N->getEntity()));
+ Record.push_back(N->getLine());
+ Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
+
+ Stream.EmitRecord(bitc::METADATA_IMPORTED_ENTITY, Record, Abbrev);
+ Record.clear();
+}
+
+unsigned DXILBitcodeWriter::createDILocationAbbrev() {
+ // Abbrev for METADATA_LOCATION.
+ //
+ // Assume the column is usually under 128, and always output the inlined-at
+ // location (it's never more expensive than building an array size 1).
+ std::shared_ptr<BitCodeAbbrev> Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_LOCATION));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
+ return Stream.EmitAbbrev(std::move(Abbv));
+}
+
+unsigned DXILBitcodeWriter::createGenericDINodeAbbrev() {
+ // Abbrev for METADATA_GENERIC_DEBUG.
+ //
+ // Assume the column is usually under 128, and always output the inlined-at
+ // location (it's never more expensive than building an array size 1).
+ std::shared_ptr<BitCodeAbbrev> Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_GENERIC_DEBUG));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
+ return Stream.EmitAbbrev(std::move(Abbv));
+}
+
+void DXILBitcodeWriter::writeMetadataRecords(ArrayRef<const Metadata *> MDs,
+ SmallVectorImpl<uint64_t> &Record,
+ std::vector<unsigned> *MDAbbrevs,
+ std::vector<uint64_t> *IndexPos) {
+ if (MDs.empty())
+ return;
+
+ // Initialize MDNode abbreviations.
+#define HANDLE_MDNODE_LEAF(CLASS) unsigned CLASS##Abbrev = 0;
+#include "llvm/IR/Metadata.def"
+
+ for (const Metadata *MD : MDs) {
+ if (IndexPos)
+ IndexPos->push_back(Stream.GetCurrentBitNo());
+ if (const MDNode *N = dyn_cast<MDNode>(MD)) {
+ assert(N->isResolved() && "Expected forward references to be resolved");
+
+ switch (N->getMetadataID()) {
+ default:
+ llvm_unreachable("Invalid MDNode subclass");
+#define HANDLE_MDNODE_LEAF(CLASS) \
+ case Metadata::CLASS##Kind: \
+ if (MDAbbrevs) \
+ write##CLASS(cast<CLASS>(N), Record, \
+ (*MDAbbrevs)[MetadataAbbrev::CLASS##AbbrevID]); \
+ else \
+ write##CLASS(cast<CLASS>(N), Record, CLASS##Abbrev); \
+ continue;
+#include "llvm/IR/Metadata.def"
+ }
+ }
+ writeValueAsMetadata(cast<ValueAsMetadata>(MD), Record);
+ }
+}
+
+unsigned DXILBitcodeWriter::createMetadataStringsAbbrev() {
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_STRING_OLD));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
+ return Stream.EmitAbbrev(std::move(Abbv));
+}
+
+void DXILBitcodeWriter::writeMetadataStrings(
+ ArrayRef<const Metadata *> Strings, SmallVectorImpl<uint64_t> &Record) {
+ for (const Metadata *MD : Strings) {
+ const MDString *MDS = cast<MDString>(MD);
+ // Code: [strchar x N]
+ Record.append(MDS->bytes_begin(), MDS->bytes_end());
+
+ // Emit the finished record.
+ Stream.EmitRecord(bitc::METADATA_STRING_OLD, Record,
+ createMetadataStringsAbbrev());
+ Record.clear();
+ }
+}
+
+void DXILBitcodeWriter::writeModuleMetadata() {
+ if (!VE.hasMDs() && M.named_metadata_empty())
+ return;
+
+ Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 5);
+
+ // Emit all abbrevs upfront, so that the reader can jump in the middle of the
+ // block and load any metadata.
+ std::vector<unsigned> MDAbbrevs;
+
+ MDAbbrevs.resize(MetadataAbbrev::LastPlusOne);
+ MDAbbrevs[MetadataAbbrev::DILocationAbbrevID] = createDILocationAbbrev();
+ MDAbbrevs[MetadataAbbrev::GenericDINodeAbbrevID] =
+ createGenericDINodeAbbrev();
+
+ unsigned NameAbbrev = 0;
+ if (!M.named_metadata_empty()) {
+ // Abbrev for METADATA_NAME.
+ std::shared_ptr<BitCodeAbbrev> Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_NAME));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
+ NameAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+ }
+
+ SmallVector<uint64_t, 64> Record;
+ writeMetadataStrings(VE.getMDStrings(), Record);
+
+ std::vector<uint64_t> IndexPos;
+ IndexPos.reserve(VE.getNonMDStrings().size());
+ writeMetadataRecords(VE.getNonMDStrings(), Record, &MDAbbrevs, &IndexPos);
+
+ // Write named metadata.
+ for (const NamedMDNode &NMD : M.named_metadata()) {
+ // Write name.
+ StringRef Str = NMD.getName();
+ Record.append(Str.bytes_begin(), Str.bytes_end());
+ Stream.EmitRecord(bitc::METADATA_NAME, Record, NameAbbrev);
+ Record.clear();
+
+ // Write named metadata operands.
+ for (const MDNode *N : NMD.operands())
+ Record.push_back(VE.getMetadataID(N));
+ Stream.EmitRecord(bitc::METADATA_NAMED_NODE, Record, 0);
+ Record.clear();
+ }
+
+ Stream.ExitBlock();
+}
+
+void DXILBitcodeWriter::writeFunctionMetadata(const Function &F) {
+ if (!VE.hasMDs())
+ return;
+
+ Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 4);
+ SmallVector<uint64_t, 64> Record;
+ writeMetadataStrings(VE.getMDStrings(), Record);
+ writeMetadataRecords(VE.getNonMDStrings(), Record);
+ Stream.ExitBlock();
+}
+
+void DXILBitcodeWriter::writeFunctionMetadataAttachment(const Function &F) {
+ Stream.EnterSubblock(bitc::METADATA_ATTACHMENT_ID, 3);
+
+ SmallVector<uint64_t, 64> Record;
+
+ // Write metadata attachments
+ // METADATA_ATTACHMENT - [m x [value, [n x [id, mdnode]]]
+ SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
+ F.getAllMetadata(MDs);
+ if (!MDs.empty()) {
+ for (const auto &I : MDs) {
+ Record.push_back(I.first);
+ Record.push_back(VE.getMetadataID(I.second));
+ }
+ Stream.EmitRecord(bitc::METADATA_ATTACHMENT, Record, 0);
+ Record.clear();
+ }
+
+ for (const BasicBlock &BB : F)
+ for (const Instruction &I : BB) {
+ MDs.clear();
+ I.getAllMetadataOtherThanDebugLoc(MDs);
+
+ // If no metadata, ignore instruction.
+ if (MDs.empty())
+ continue;
+
+ Record.push_back(VE.getInstructionID(&I));
+
+ for (unsigned i = 0, e = MDs.size(); i != e; ++i) {
+ Record.push_back(MDs[i].first);
+ Record.push_back(VE.getMetadataID(MDs[i].second));
+ }
+ Stream.EmitRecord(bitc::METADATA_ATTACHMENT, Record, 0);
+ Record.clear();
+ }
+
+ Stream.ExitBlock();
+}
+
+void DXILBitcodeWriter::writeModuleMetadataKinds() {
+ SmallVector<uint64_t, 64> Record;
+
+ // Write metadata kinds
+ // METADATA_KIND - [n x [id, name]]
+ SmallVector<StringRef, 8> Names;
+ M.getMDKindNames(Names);
+
+ if (Names.empty())
+ return;
+
+ Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
+
+ for (unsigned MDKindID = 0, e = Names.size(); MDKindID != e; ++MDKindID) {
+ Record.push_back(MDKindID);
+ StringRef KName = Names[MDKindID];
+ Record.append(KName.begin(), KName.end());
+
+ Stream.EmitRecord(bitc::METADATA_KIND, Record, 0);
+ Record.clear();
+ }
+
+ Stream.ExitBlock();
+}
+
+void DXILBitcodeWriter::writeConstants(unsigned FirstVal, unsigned LastVal,
+ bool isGlobal) {
+ if (FirstVal == LastVal)
+ return;
+
+ Stream.EnterSubblock(bitc::CONSTANTS_BLOCK_ID, 4);
+
+ unsigned AggregateAbbrev = 0;
+ unsigned String8Abbrev = 0;
+ unsigned CString7Abbrev = 0;
+ unsigned CString6Abbrev = 0;
+ // If this is a constant pool for the module, emit module-specific abbrevs.
+ if (isGlobal) {
+ // Abbrev for CST_CODE_AGGREGATE.
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_AGGREGATE));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(
+ BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, Log2_32_Ceil(LastVal + 1)));
+ AggregateAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+
+ // Abbrev for CST_CODE_STRING.
+ Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_STRING));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
+ String8Abbrev = Stream.EmitAbbrev(std::move(Abbv));
+ // Abbrev for CST_CODE_CSTRING.
+ Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CSTRING));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
+ CString7Abbrev = Stream.EmitAbbrev(std::move(Abbv));
+ // Abbrev for CST_CODE_CSTRING.
+ Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CSTRING));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
+ CString6Abbrev = Stream.EmitAbbrev(std::move(Abbv));
+ }
+
+ SmallVector<uint64_t, 64> Record;
+
+ const ValueEnumerator::ValueList &Vals = VE.getValues();
+ Type *LastTy = nullptr;
+ for (unsigned i = FirstVal; i != LastVal; ++i) {
+ const Value *V = Vals[i].first;
+ // If we need to switch types, do so now.
+ if (V->getType() != LastTy) {
+ LastTy = V->getType();
+ Record.push_back(getTypeID(LastTy));
+ Stream.EmitRecord(bitc::CST_CODE_SETTYPE, Record,
+ CONSTANTS_SETTYPE_ABBREV);
+ Record.clear();
+ }
+
+ if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) {
+ Record.push_back(unsigned(IA->hasSideEffects()) |
+ unsigned(IA->isAlignStack()) << 1 |
+ unsigned(IA->getDialect() & 1) << 2);
+
+ // Add the asm string.
+ const std::string &AsmStr = IA->getAsmString();
+ Record.push_back(AsmStr.size());
+ Record.append(AsmStr.begin(), AsmStr.end());
+
+ // Add the constraint string.
+ const std::string &ConstraintStr = IA->getConstraintString();
+ Record.push_back(ConstraintStr.size());
+ Record.append(ConstraintStr.begin(), ConstraintStr.end());
+ Stream.EmitRecord(bitc::CST_CODE_INLINEASM, Record);
+ Record.clear();
+ continue;
+ }
+ const Constant *C = cast<Constant>(V);
+ unsigned Code = -1U;
+ unsigned AbbrevToUse = 0;
+ if (C->isNullValue()) {
+ Code = bitc::CST_CODE_NULL;
+ } else if (isa<UndefValue>(C)) {
+ Code = bitc::CST_CODE_UNDEF;
+ } else if (const ConstantInt *IV = dyn_cast<ConstantInt>(C)) {
+ if (IV->getBitWidth() <= 64) {
+ uint64_t V = IV->getSExtValue();
+ emitSignedInt64(Record, V);
+ Code = bitc::CST_CODE_INTEGER;
+ AbbrevToUse = CONSTANTS_INTEGER_ABBREV;
+ } else { // Wide integers, > 64 bits in size.
+ // We have an arbitrary precision integer value to write whose
+ // bit width is > 64. However, in canonical unsigned integer
+ // format it is likely that the high bits are going to be zero.
+ // So, we only write the number of active words.
+ unsigned NWords = IV->getValue().getActiveWords();
+ const uint64_t *RawWords = IV->getValue().getRawData();
+ for (unsigned i = 0; i != NWords; ++i) {
+ emitSignedInt64(Record, RawWords[i]);
+ }
+ Code = bitc::CST_CODE_WIDE_INTEGER;
+ }
+ } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+ Code = bitc::CST_CODE_FLOAT;
+ Type *Ty = CFP->getType();
+ if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy()) {
+ Record.push_back(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
+ } else if (Ty->isX86_FP80Ty()) {
+ // api needed to prevent premature destruction
+ // bits are not in the same order as a normal i80 APInt, compensate.
+ APInt api = CFP->getValueAPF().bitcastToAPInt();
+ const uint64_t *p = api.getRawData();
+ Record.push_back((p[1] << 48) | (p[0] >> 16));
+ Record.push_back(p[0] & 0xffffLL);
+ } else if (Ty->isFP128Ty() || Ty->isPPC_FP128Ty()) {
+ APInt api = CFP->getValueAPF().bitcastToAPInt();
+ const uint64_t *p = api.getRawData();
+ Record.push_back(p[0]);
+ Record.push_back(p[1]);
+ } else {
+ assert(0 && "Unknown FP type!");
+ }
+ } else if (isa<ConstantDataSequential>(C) &&
+ cast<ConstantDataSequential>(C)->isString()) {
+ const ConstantDataSequential *Str = cast<ConstantDataSequential>(C);
+ // Emit constant strings specially.
+ unsigned NumElts = Str->getNumElements();
+ // If this is a null-terminated string, use the denser CSTRING encoding.
+ if (Str->isCString()) {
+ Code = bitc::CST_CODE_CSTRING;
+ --NumElts; // Don't encode the null, which isn't allowed by char6.
+ } else {
+ Code = bitc::CST_CODE_STRING;
+ AbbrevToUse = String8Abbrev;
+ }
+ bool isCStr7 = Code == bitc::CST_CODE_CSTRING;
+ bool isCStrChar6 = Code == bitc::CST_CODE_CSTRING;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ unsigned char V = Str->getElementAsInteger(i);
+ Record.push_back(V);
+ isCStr7 &= (V & 128) == 0;
+ if (isCStrChar6)
+ isCStrChar6 = BitCodeAbbrevOp::isChar6(V);
+ }
+
+ if (isCStrChar6)
+ AbbrevToUse = CString6Abbrev;
+ else if (isCStr7)
+ AbbrevToUse = CString7Abbrev;
+ } else if (const ConstantDataSequential *CDS =
+ dyn_cast<ConstantDataSequential>(C)) {
+ Code = bitc::CST_CODE_DATA;
+ Type *EltTy = CDS->getType()->getArrayElementType();
+ if (isa<IntegerType>(EltTy)) {
+ for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i)
+ Record.push_back(CDS->getElementAsInteger(i));
+ } else if (EltTy->isFloatTy()) {
+ for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+ union {
+ float F;
+ uint32_t I;
+ };
+ F = CDS->getElementAsFloat(i);
+ Record.push_back(I);
+ }
+ } else {
+ assert(EltTy->isDoubleTy() && "Unknown ConstantData element type");
+ for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+ union {
+ double F;
+ uint64_t I;
+ };
+ F = CDS->getElementAsDouble(i);
+ Record.push_back(I);
+ }
+ }
+ } else if (isa<ConstantArray>(C) || isa<ConstantStruct>(C) ||
+ isa<ConstantVector>(C)) {
+ Code = bitc::CST_CODE_AGGREGATE;
+ for (const Value *Op : C->operands())
+ Record.push_back(VE.getValueID(Op));
+ AbbrevToUse = AggregateAbbrev;
+ } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ switch (CE->getOpcode()) {
+ default:
+ if (Instruction::isCast(CE->getOpcode())) {
+ Code = bitc::CST_CODE_CE_CAST;
+ Record.push_back(getEncodedCastOpcode(CE->getOpcode()));
+ Record.push_back(getTypeID(C->getOperand(0)->getType()));
+ Record.push_back(VE.getValueID(C->getOperand(0)));
+ AbbrevToUse = CONSTANTS_CE_CAST_Abbrev;
+ } else {
+ assert(CE->getNumOperands() == 2 && "Unknown constant expr!");
+ Code = bitc::CST_CODE_CE_BINOP;
+ Record.push_back(getEncodedBinaryOpcode(CE->getOpcode()));
+ Record.push_back(VE.getValueID(C->getOperand(0)));
+ Record.push_back(VE.getValueID(C->getOperand(1)));
+ uint64_t Flags = getOptimizationFlags(CE);
+ if (Flags != 0)
+ Record.push_back(Flags);
+ }
+ break;
+ case Instruction::GetElementPtr: {
+ Code = bitc::CST_CODE_CE_GEP;
+ const auto *GO = cast<GEPOperator>(C);
+ if (GO->isInBounds())
+ Code = bitc::CST_CODE_CE_INBOUNDS_GEP;
+ Record.push_back(getTypeID(GO->getSourceElementType()));
+ for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i) {
+ Record.push_back(getTypeID(C->getOperand(i)->getType()));
+ Record.push_back(VE.getValueID(C->getOperand(i)));
+ }
+ break;
+ }
+ case Instruction::Select:
+ Code = bitc::CST_CODE_CE_SELECT;
+ Record.push_back(VE.getValueID(C->getOperand(0)));
+ Record.push_back(VE.getValueID(C->getOperand(1)));
+ Record.push_back(VE.getValueID(C->getOperand(2)));
+ break;
+ case Instruction::ExtractElement:
+ Code = bitc::CST_CODE_CE_EXTRACTELT;
+ Record.push_back(getTypeID(C->getOperand(0)->getType()));
+ Record.push_back(VE.getValueID(C->getOperand(0)));
+ Record.push_back(getTypeID(C->getOperand(1)->getType()));
+ Record.push_back(VE.getValueID(C->getOperand(1)));
+ break;
+ case Instruction::InsertElement:
+ Code = bitc::CST_CODE_CE_INSERTELT;
+ Record.push_back(VE.getValueID(C->getOperand(0)));
+ Record.push_back(VE.getValueID(C->getOperand(1)));
+ Record.push_back(getTypeID(C->getOperand(2)->getType()));
+ Record.push_back(VE.getValueID(C->getOperand(2)));
+ break;
+ case Instruction::ShuffleVector:
+ // If the return type and argument types are the same, this is a
+ // standard shufflevector instruction. If the types are different,
+ // then the shuffle is widening or truncating the input vectors, and
+ // the argument type must also be encoded.
+ if (C->getType() == C->getOperand(0)->getType()) {
+ Code = bitc::CST_CODE_CE_SHUFFLEVEC;
+ } else {
+ Code = bitc::CST_CODE_CE_SHUFVEC_EX;
+ Record.push_back(getTypeID(C->getOperand(0)->getType()));
+ }
+ Record.push_back(VE.getValueID(C->getOperand(0)));
+ Record.push_back(VE.getValueID(C->getOperand(1)));
+ Record.push_back(VE.getValueID(C->getOperand(2)));
+ break;
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ Code = bitc::CST_CODE_CE_CMP;
+ Record.push_back(getTypeID(C->getOperand(0)->getType()));
+ Record.push_back(VE.getValueID(C->getOperand(0)));
+ Record.push_back(VE.getValueID(C->getOperand(1)));
+ Record.push_back(CE->getPredicate());
+ break;
+ }
+ } else if (const BlockAddress *BA = dyn_cast<BlockAddress>(C)) {
+ Code = bitc::CST_CODE_BLOCKADDRESS;
+ Record.push_back(getTypeID(BA->getFunction()->getType()));
+ Record.push_back(VE.getValueID(BA->getFunction()));
+ Record.push_back(VE.getGlobalBasicBlockID(BA->getBasicBlock()));
+ } else {
+#ifndef NDEBUG
+ C->dump();
+#endif
+ llvm_unreachable("Unknown constant!");
+ }
+ Stream.EmitRecord(Code, Record, AbbrevToUse);
+ Record.clear();
+ }
+
+ Stream.ExitBlock();
+}
+
+void DXILBitcodeWriter::writeModuleConstants() {
+ const ValueEnumerator::ValueList &Vals = VE.getValues();
+
+ // Find the first constant to emit, which is the first non-globalvalue value.
+ // We know globalvalues have been emitted by WriteModuleInfo.
+ for (unsigned i = 0, e = Vals.size(); i != e; ++i) {
+ if (!isa<GlobalValue>(Vals[i].first)) {
+ writeConstants(i, Vals.size(), true);
+ return;
+ }
+ }
+}
+
+/// pushValueAndType - The file has to encode both the value and type id for
+/// many values, because we need to know what type to create for forward
+/// references. However, most operands are not forward references, so this type
+/// field is not needed.
+///
+/// This function adds V's value ID to Vals. If the value ID is higher than the
+/// instruction ID, then it is a forward reference, and it also includes the
+/// type ID. The value ID that is written is encoded relative to the InstID.
+bool DXILBitcodeWriter::pushValueAndType(const Value *V, unsigned InstID,
+ SmallVectorImpl<unsigned> &Vals) {
+ unsigned ValID = VE.getValueID(V);
+ // Make encoding relative to the InstID.
+ Vals.push_back(InstID - ValID);
+ if (ValID >= InstID) {
+ Vals.push_back(getTypeID(V->getType(), V));
+ return true;
+ }
+ return false;
+}
+
+/// pushValue - Like pushValueAndType, but where the type of the value is
+/// omitted (perhaps it was already encoded in an earlier operand).
+void DXILBitcodeWriter::pushValue(const Value *V, unsigned InstID,
+ SmallVectorImpl<unsigned> &Vals) {
+ unsigned ValID = VE.getValueID(V);
+ Vals.push_back(InstID - ValID);
+}
+
+void DXILBitcodeWriter::pushValueSigned(const Value *V, unsigned InstID,
+ SmallVectorImpl<uint64_t> &Vals) {
+ unsigned ValID = VE.getValueID(V);
+ int64_t diff = ((int32_t)InstID - (int32_t)ValID);
+ emitSignedInt64(Vals, diff);
+}
+
+/// WriteInstruction - Emit an instruction
+void DXILBitcodeWriter::writeInstruction(const Instruction &I, unsigned InstID,
+ SmallVectorImpl<unsigned> &Vals) {
+ unsigned Code = 0;
+ unsigned AbbrevToUse = 0;
+ VE.setInstructionID(&I);
+ switch (I.getOpcode()) {
+ default:
+ if (Instruction::isCast(I.getOpcode())) {
+ Code = bitc::FUNC_CODE_INST_CAST;
+ if (!pushValueAndType(I.getOperand(0), InstID, Vals))
+ AbbrevToUse = (unsigned)FUNCTION_INST_CAST_ABBREV;
+ Vals.push_back(getTypeID(I.getType(), &I));
+ Vals.push_back(getEncodedCastOpcode(I.getOpcode()));
+ } else {
+ assert(isa<BinaryOperator>(I) && "Unknown instruction!");
+ Code = bitc::FUNC_CODE_INST_BINOP;
+ if (!pushValueAndType(I.getOperand(0), InstID, Vals))
+ AbbrevToUse = (unsigned)FUNCTION_INST_BINOP_ABBREV;
+ pushValue(I.getOperand(1), InstID, Vals);
+ Vals.push_back(getEncodedBinaryOpcode(I.getOpcode()));
+ uint64_t Flags = getOptimizationFlags(&I);
+ if (Flags != 0) {
+ if (AbbrevToUse == (unsigned)FUNCTION_INST_BINOP_ABBREV)
+ AbbrevToUse = (unsigned)FUNCTION_INST_BINOP_FLAGS_ABBREV;
+ Vals.push_back(Flags);
+ }
+ }
+ break;
+
+ case Instruction::GetElementPtr: {
+ Code = bitc::FUNC_CODE_INST_GEP;
+ AbbrevToUse = (unsigned)FUNCTION_INST_GEP_ABBREV;
+ auto &GEPInst = cast<GetElementPtrInst>(I);
+ Vals.push_back(GEPInst.isInBounds());
+ Vals.push_back(getTypeID(GEPInst.getSourceElementType()));
+ for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
+ pushValueAndType(I.getOperand(i), InstID, Vals);
+ break;
+ }
+ case Instruction::ExtractValue: {
+ Code = bitc::FUNC_CODE_INST_EXTRACTVAL;
+ pushValueAndType(I.getOperand(0), InstID, Vals);
+ const ExtractValueInst *EVI = cast<ExtractValueInst>(&I);
+ Vals.append(EVI->idx_begin(), EVI->idx_end());
+ break;
+ }
+ case Instruction::InsertValue: {
+ Code = bitc::FUNC_CODE_INST_INSERTVAL;
+ pushValueAndType(I.getOperand(0), InstID, Vals);
+ pushValueAndType(I.getOperand(1), InstID, Vals);
+ const InsertValueInst *IVI = cast<InsertValueInst>(&I);
+ Vals.append(IVI->idx_begin(), IVI->idx_end());
+ break;
+ }
+ case Instruction::Select:
+ Code = bitc::FUNC_CODE_INST_VSELECT;
+ pushValueAndType(I.getOperand(1), InstID, Vals);
+ pushValue(I.getOperand(2), InstID, Vals);
+ pushValueAndType(I.getOperand(0), InstID, Vals);
+ break;
+ case Instruction::ExtractElement:
+ Code = bitc::FUNC_CODE_INST_EXTRACTELT;
+ pushValueAndType(I.getOperand(0), InstID, Vals);
+ pushValueAndType(I.getOperand(1), InstID, Vals);
+ break;
+ case Instruction::InsertElement:
+ Code = bitc::FUNC_CODE_INST_INSERTELT;
+ pushValueAndType(I.getOperand(0), InstID, Vals);
+ pushValue(I.getOperand(1), InstID, Vals);
+ pushValueAndType(I.getOperand(2), InstID, Vals);
+ break;
+ case Instruction::ShuffleVector:
+ Code = bitc::FUNC_CODE_INST_SHUFFLEVEC;
+ pushValueAndType(I.getOperand(0), InstID, Vals);
+ pushValue(I.getOperand(1), InstID, Vals);
+ pushValue(I.getOperand(2), InstID, Vals);
+ break;
+ case Instruction::ICmp:
+ case Instruction::FCmp: {
+ // compare returning Int1Ty or vector of Int1Ty
+ Code = bitc::FUNC_CODE_INST_CMP2;
+ pushValueAndType(I.getOperand(0), InstID, Vals);
+ pushValue(I.getOperand(1), InstID, Vals);
+ Vals.push_back(cast<CmpInst>(I).getPredicate());
+ uint64_t Flags = getOptimizationFlags(&I);
+ if (Flags != 0)
+ Vals.push_back(Flags);
+ break;
+ }
+
+ case Instruction::Ret: {
+ Code = bitc::FUNC_CODE_INST_RET;
+ unsigned NumOperands = I.getNumOperands();
+ if (NumOperands == 0)
+ AbbrevToUse = (unsigned)FUNCTION_INST_RET_VOID_ABBREV;
+ else if (NumOperands == 1) {
+ if (!pushValueAndType(I.getOperand(0), InstID, Vals))
+ AbbrevToUse = (unsigned)FUNCTION_INST_RET_VAL_ABBREV;
+ } else {
+ for (unsigned i = 0, e = NumOperands; i != e; ++i)
+ pushValueAndType(I.getOperand(i), InstID, Vals);
+ }
+ } break;
+ case Instruction::Br: {
+ Code = bitc::FUNC_CODE_INST_BR;
+ const BranchInst &II = cast<BranchInst>(I);
+ Vals.push_back(VE.getValueID(II.getSuccessor(0)));
+ if (II.isConditional()) {
+ Vals.push_back(VE.getValueID(II.getSuccessor(1)));
+ pushValue(II.getCondition(), InstID, Vals);
+ }
+ } break;
+ case Instruction::Switch: {
+ Code = bitc::FUNC_CODE_INST_SWITCH;
+ const SwitchInst &SI = cast<SwitchInst>(I);
+ Vals.push_back(getTypeID(SI.getCondition()->getType()));
+ pushValue(SI.getCondition(), InstID, Vals);
+ Vals.push_back(VE.getValueID(SI.getDefaultDest()));
+ for (auto Case : SI.cases()) {
+ Vals.push_back(VE.getValueID(Case.getCaseValue()));
+ Vals.push_back(VE.getValueID(Case.getCaseSuccessor()));
+ }
+ } break;
+ case Instruction::IndirectBr:
+ Code = bitc::FUNC_CODE_INST_INDIRECTBR;
+ Vals.push_back(getTypeID(I.getOperand(0)->getType()));
+ // Encode the address operand as relative, but not the basic blocks.
+ pushValue(I.getOperand(0), InstID, Vals);
+ for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i)
+ Vals.push_back(VE.getValueID(I.getOperand(i)));
+ break;
+
+ case Instruction::Invoke: {
+ const InvokeInst *II = cast<InvokeInst>(&I);
+ const Value *Callee = II->getCalledOperand();
+ FunctionType *FTy = II->getFunctionType();
+ Code = bitc::FUNC_CODE_INST_INVOKE;
+
+ Vals.push_back(VE.getAttributeListID(II->getAttributes()));
+ Vals.push_back(II->getCallingConv() | 1 << 13);
+ Vals.push_back(VE.getValueID(II->getNormalDest()));
+ Vals.push_back(VE.getValueID(II->getUnwindDest()));
+ Vals.push_back(getTypeID(FTy));
+ pushValueAndType(Callee, InstID, Vals);
+
+ // Emit value #'s for the fixed parameters.
+ for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
+ pushValue(I.getOperand(i), InstID, Vals); // fixed param.
+
+ // Emit type/value pairs for varargs params.
+ if (FTy->isVarArg()) {
+ for (unsigned i = FTy->getNumParams(), e = I.getNumOperands() - 3; i != e;
+ ++i)
+ pushValueAndType(I.getOperand(i), InstID, Vals); // vararg
+ }
+ break;
+ }
+ case Instruction::Resume:
+ Code = bitc::FUNC_CODE_INST_RESUME;
+ pushValueAndType(I.getOperand(0), InstID, Vals);
+ break;
+ case Instruction::Unreachable:
+ Code = bitc::FUNC_CODE_INST_UNREACHABLE;
+ AbbrevToUse = (unsigned)FUNCTION_INST_UNREACHABLE_ABBREV;
+ break;
+
+ case Instruction::PHI: {
+ const PHINode &PN = cast<PHINode>(I);
+ Code = bitc::FUNC_CODE_INST_PHI;
+ // With the newer instruction encoding, forward references could give
+ // negative valued IDs. This is most common for PHIs, so we use
+ // signed VBRs.
+ SmallVector<uint64_t, 128> Vals64;
+ Vals64.push_back(getTypeID(PN.getType()));
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
+ pushValueSigned(PN.getIncomingValue(i), InstID, Vals64);
+ Vals64.push_back(VE.getValueID(PN.getIncomingBlock(i)));
+ }
+ // Emit a Vals64 vector and exit.
+ Stream.EmitRecord(Code, Vals64, AbbrevToUse);
+ Vals64.clear();
+ return;
+ }
+
+ case Instruction::LandingPad: {
+ const LandingPadInst &LP = cast<LandingPadInst>(I);
+ Code = bitc::FUNC_CODE_INST_LANDINGPAD;
+ Vals.push_back(getTypeID(LP.getType()));
+ Vals.push_back(LP.isCleanup());
+ Vals.push_back(LP.getNumClauses());
+ for (unsigned I = 0, E = LP.getNumClauses(); I != E; ++I) {
+ if (LP.isCatch(I))
+ Vals.push_back(LandingPadInst::Catch);
+ else
+ Vals.push_back(LandingPadInst::Filter);
+ pushValueAndType(LP.getClause(I), InstID, Vals);
+ }
+ break;
+ }
+
+ case Instruction::Alloca: {
+ Code = bitc::FUNC_CODE_INST_ALLOCA;
+ const AllocaInst &AI = cast<AllocaInst>(I);
+ Vals.push_back(getTypeID(AI.getAllocatedType()));
+ Vals.push_back(getTypeID(I.getOperand(0)->getType()));
+ Vals.push_back(VE.getValueID(I.getOperand(0))); // size.
+ using APV = AllocaPackedValues;
+ unsigned Record = 0;
+ unsigned EncodedAlign = getEncodedAlign(AI.getAlign());
+ Bitfield::set<APV::AlignLower>(
+ Record, EncodedAlign & ((1 << APV::AlignLower::Bits) - 1));
+ Bitfield::set<APV::AlignUpper>(Record,
+ EncodedAlign >> APV::AlignLower::Bits);
+ Bitfield::set<APV::UsedWithInAlloca>(Record, AI.isUsedWithInAlloca());
+ Vals.push_back(Record);
+ break;
+ }
+
+ case Instruction::Load:
+ if (cast<LoadInst>(I).isAtomic()) {
+ Code = bitc::FUNC_CODE_INST_LOADATOMIC;
+ pushValueAndType(I.getOperand(0), InstID, Vals);
+ } else {
+ Code = bitc::FUNC_CODE_INST_LOAD;
+ if (!pushValueAndType(I.getOperand(0), InstID, Vals)) // ptr
+ AbbrevToUse = (unsigned)FUNCTION_INST_LOAD_ABBREV;
+ }
+ Vals.push_back(getTypeID(I.getType()));
+ Vals.push_back(Log2(cast<LoadInst>(I).getAlign()) + 1);
+ Vals.push_back(cast<LoadInst>(I).isVolatile());
+ if (cast<LoadInst>(I).isAtomic()) {
+ Vals.push_back(getEncodedOrdering(cast<LoadInst>(I).getOrdering()));
+ Vals.push_back(getEncodedSyncScopeID(cast<LoadInst>(I).getSyncScopeID()));
+ }
+ break;
+ case Instruction::Store:
+ if (cast<StoreInst>(I).isAtomic())
+ Code = bitc::FUNC_CODE_INST_STOREATOMIC;
+ else
+ Code = bitc::FUNC_CODE_INST_STORE;
+ pushValueAndType(I.getOperand(1), InstID, Vals); // ptrty + ptr
+ pushValueAndType(I.getOperand(0), InstID, Vals); // valty + val
+ Vals.push_back(Log2(cast<StoreInst>(I).getAlign()) + 1);
+ Vals.push_back(cast<StoreInst>(I).isVolatile());
+ if (cast<StoreInst>(I).isAtomic()) {
+ Vals.push_back(getEncodedOrdering(cast<StoreInst>(I).getOrdering()));
+ Vals.push_back(
+ getEncodedSyncScopeID(cast<StoreInst>(I).getSyncScopeID()));
+ }
+ break;
+ case Instruction::AtomicCmpXchg:
+ Code = bitc::FUNC_CODE_INST_CMPXCHG;
+ pushValueAndType(I.getOperand(0), InstID, Vals); // ptrty + ptr
+ pushValueAndType(I.getOperand(1), InstID, Vals); // cmp.
+ pushValue(I.getOperand(2), InstID, Vals); // newval.
+ Vals.push_back(cast<AtomicCmpXchgInst>(I).isVolatile());
+ Vals.push_back(
+ getEncodedOrdering(cast<AtomicCmpXchgInst>(I).getSuccessOrdering()));
+ Vals.push_back(
+ getEncodedSyncScopeID(cast<AtomicCmpXchgInst>(I).getSyncScopeID()));
+ Vals.push_back(
+ getEncodedOrdering(cast<AtomicCmpXchgInst>(I).getFailureOrdering()));
+ Vals.push_back(cast<AtomicCmpXchgInst>(I).isWeak());
+ break;
+ case Instruction::AtomicRMW:
+ Code = bitc::FUNC_CODE_INST_ATOMICRMW;
+ pushValueAndType(I.getOperand(0), InstID, Vals); // ptrty + ptr
+ pushValue(I.getOperand(1), InstID, Vals); // val.
+ Vals.push_back(
+ getEncodedRMWOperation(cast<AtomicRMWInst>(I).getOperation()));
+ Vals.push_back(cast<AtomicRMWInst>(I).isVolatile());
+ Vals.push_back(getEncodedOrdering(cast<AtomicRMWInst>(I).getOrdering()));
+ Vals.push_back(
+ getEncodedSyncScopeID(cast<AtomicRMWInst>(I).getSyncScopeID()));
+ break;
+ case Instruction::Fence:
+ Code = bitc::FUNC_CODE_INST_FENCE;
+ Vals.push_back(getEncodedOrdering(cast<FenceInst>(I).getOrdering()));
+ Vals.push_back(getEncodedSyncScopeID(cast<FenceInst>(I).getSyncScopeID()));
+ break;
+ case Instruction::Call: {
+ const CallInst &CI = cast<CallInst>(I);
+ FunctionType *FTy = CI.getFunctionType();
+
+ Code = bitc::FUNC_CODE_INST_CALL;
+
+ Vals.push_back(VE.getAttributeListID(CI.getAttributes()));
+ Vals.push_back((CI.getCallingConv() << 1) | unsigned(CI.isTailCall()) |
+ unsigned(CI.isMustTailCall()) << 14 | 1 << 15);
+ Vals.push_back(getTypeID(FTy, CI.getCalledFunction()));
+ pushValueAndType(CI.getCalledOperand(), InstID, Vals); // Callee
+
+ // Emit value #'s for the fixed parameters.
+ for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) {
+ // Check for labels (can happen with asm labels).
+ if (FTy->getParamType(i)->isLabelTy())
+ Vals.push_back(VE.getValueID(CI.getArgOperand(i)));
+ else
+ pushValue(CI.getArgOperand(i), InstID, Vals); // fixed param.
+ }
+
+ // Emit type/value pairs for varargs params.
+ if (FTy->isVarArg()) {
+ for (unsigned i = FTy->getNumParams(), e = CI.arg_size(); i != e; ++i)
+ pushValueAndType(CI.getArgOperand(i), InstID, Vals); // varargs
+ }
+ break;
+ }
+ case Instruction::VAArg:
+ Code = bitc::FUNC_CODE_INST_VAARG;
+ Vals.push_back(getTypeID(I.getOperand(0)->getType())); // valistty
+ pushValue(I.getOperand(0), InstID, Vals); // valist.
+ Vals.push_back(getTypeID(I.getType())); // restype.
+ break;
+ }
+
+ Stream.EmitRecord(Code, Vals, AbbrevToUse);
+ Vals.clear();
+}
+
+// Emit names for globals/functions etc.
+void DXILBitcodeWriter::writeFunctionLevelValueSymbolTable(
+ const ValueSymbolTable &VST) {
+ if (VST.empty())
+ return;
+ Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4);
+
+ SmallVector<unsigned, 64> NameVals;
+
+ // HLSL Change
+ // Read the named values from a sorted list instead of the original list
+ // to ensure the binary is the same no matter what values ever existed.
+ SmallVector<const ValueName *, 16> SortedTable;
+
+ for (auto &VI : VST) {
+ SortedTable.push_back(VI.second->getValueName());
+ }
+ // The keys are unique, so there shouldn't be stability issues.
+ std::sort(SortedTable.begin(), SortedTable.end(),
+ [](const ValueName *A, const ValueName *B) {
+ return A->first() < B->first();
+ });
+
+ for (const ValueName *SI : SortedTable) {
+ auto &Name = *SI;
+
+ // Figure out the encoding to use for the name.
+ bool is7Bit = true;
+ bool isChar6 = true;
+ for (const char *C = Name.getKeyData(), *E = C + Name.getKeyLength();
+ C != E; ++C) {
+ if (isChar6)
+ isChar6 = BitCodeAbbrevOp::isChar6(*C);
+ if ((unsigned char)*C & 128) {
+ is7Bit = false;
+ break; // don't bother scanning the rest.
+ }
+ }
+
+ unsigned AbbrevToUse = VST_ENTRY_8_ABBREV;
+
+ // VST_ENTRY: [valueid, namechar x N]
+ // VST_BBENTRY: [bbid, namechar x N]
+ unsigned Code;
+ if (isa<BasicBlock>(SI->getValue())) {
+ Code = bitc::VST_CODE_BBENTRY;
+ if (isChar6)
+ AbbrevToUse = VST_BBENTRY_6_ABBREV;
+ } else {
+ Code = bitc::VST_CODE_ENTRY;
+ if (isChar6)
+ AbbrevToUse = VST_ENTRY_6_ABBREV;
+ else if (is7Bit)
+ AbbrevToUse = VST_ENTRY_7_ABBREV;
+ }
+
+ NameVals.push_back(VE.getValueID(SI->getValue()));
+ for (const char *P = Name.getKeyData(),
+ *E = Name.getKeyData() + Name.getKeyLength();
+ P != E; ++P)
+ NameVals.push_back((unsigned char)*P);
+
+ // Emit the finished record.
+ Stream.EmitRecord(Code, NameVals, AbbrevToUse);
+ NameVals.clear();
+ }
+ Stream.ExitBlock();
+}
+
+void DXILBitcodeWriter::writeUseList(UseListOrder &&Order) {
+ assert(Order.Shuffle.size() >= 2 && "Shuffle too small");
+ unsigned Code;
+ if (isa<BasicBlock>(Order.V))
+ Code = bitc::USELIST_CODE_BB;
+ else
+ Code = bitc::USELIST_CODE_DEFAULT;
+
+ SmallVector<uint64_t, 64> Record(Order.Shuffle.begin(), Order.Shuffle.end());
+ Record.push_back(VE.getValueID(Order.V));
+ Stream.EmitRecord(Code, Record);
+}
+
+void DXILBitcodeWriter::writeUseListBlock(const Function *F) {
+ auto hasMore = [&]() {
+ return !VE.UseListOrders.empty() && VE.UseListOrders.back().F == F;
+ };
+ if (!hasMore())
+ // Nothing to do.
+ return;
+
+ Stream.EnterSubblock(bitc::USELIST_BLOCK_ID, 3);
+ while (hasMore()) {
+ writeUseList(std::move(VE.UseListOrders.back()));
+ VE.UseListOrders.pop_back();
+ }
+ Stream.ExitBlock();
+}
+
+/// Emit a function body to the module stream.
+void DXILBitcodeWriter::writeFunction(const Function &F) {
+ Stream.EnterSubblock(bitc::FUNCTION_BLOCK_ID, 4);
+ VE.incorporateFunction(F);
+
+ SmallVector<unsigned, 64> Vals;
+
+ // Emit the number of basic blocks, so the reader can create them ahead of
+ // time.
+ Vals.push_back(VE.getBasicBlocks().size());
+ Stream.EmitRecord(bitc::FUNC_CODE_DECLAREBLOCKS, Vals);
+ Vals.clear();
+
+ // If there are function-local constants, emit them now.
+ unsigned CstStart, CstEnd;
+ VE.getFunctionConstantRange(CstStart, CstEnd);
+ writeConstants(CstStart, CstEnd, false);
+
+ // If there is function-local metadata, emit it now.
+ writeFunctionMetadata(F);
+
+ // Keep a running idea of what the instruction ID is.
+ unsigned InstID = CstEnd;
+
+ bool NeedsMetadataAttachment = F.hasMetadata();
+
+ DILocation *LastDL = nullptr;
+
+ // Finally, emit all the instructions, in order.
+ for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E;
+ ++I) {
+ writeInstruction(*I, InstID, Vals);
+
+ if (!I->getType()->isVoidTy())
+ ++InstID;
+
+ // If the instruction has metadata, write a metadata attachment later.
+ NeedsMetadataAttachment |= I->hasMetadataOtherThanDebugLoc();
+
+ // If the instruction has a debug location, emit it.
+ DILocation *DL = I->getDebugLoc();
+ if (!DL)
+ continue;
+
+ if (DL == LastDL) {
+ // Just repeat the same debug loc as last time.
+ Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC_AGAIN, Vals);
+ continue;
+ }
+
+ Vals.push_back(DL->getLine());
+ Vals.push_back(DL->getColumn());
+ Vals.push_back(VE.getMetadataOrNullID(DL->getScope()));
+ Vals.push_back(VE.getMetadataOrNullID(DL->getInlinedAt()));
+ Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC, Vals);
+ Vals.clear();
+
+ LastDL = DL;
+ }
+
+ // Emit names for all the instructions etc.
+ if (auto *Symtab = F.getValueSymbolTable())
+ writeFunctionLevelValueSymbolTable(*Symtab);
+
+ if (NeedsMetadataAttachment)
+ writeFunctionMetadataAttachment(F);
+
+ writeUseListBlock(&F);
+ VE.purgeFunction();
+ Stream.ExitBlock();
+}
+
+// Emit blockinfo, which defines the standard abbreviations etc.
+void DXILBitcodeWriter::writeBlockInfo() {
+ // We only want to emit block info records for blocks that have multiple
+ // instances: CONSTANTS_BLOCK, FUNCTION_BLOCK and VALUE_SYMTAB_BLOCK.
+ // Other blocks can define their abbrevs inline.
+ Stream.EnterBlockInfoBlock();
+
+ { // 8-bit fixed-width VST_ENTRY/VST_BBENTRY strings.
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
+ if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID,
+ std::move(Abbv)) != VST_ENTRY_8_ABBREV)
+ assert(false && "Unexpected abbrev ordering!");
+ }
+
+ { // 7-bit fixed width VST_ENTRY strings.
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_ENTRY));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
+ if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID,
+ std::move(Abbv)) != VST_ENTRY_7_ABBREV)
+ assert(false && "Unexpected abbrev ordering!");
+ }
+ { // 6-bit char6 VST_ENTRY strings.
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_ENTRY));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
+ if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID,
+ std::move(Abbv)) != VST_ENTRY_6_ABBREV)
+ assert(false && "Unexpected abbrev ordering!");
+ }
+ { // 6-bit char6 VST_BBENTRY strings.
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_BBENTRY));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
+ if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID,
+ std::move(Abbv)) != VST_BBENTRY_6_ABBREV)
+ assert(false && "Unexpected abbrev ordering!");
+ }
+
+ { // SETTYPE abbrev for CONSTANTS_BLOCK.
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_SETTYPE));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+ VE.computeBitsRequiredForTypeIndicies()));
+ if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID, std::move(Abbv)) !=
+ CONSTANTS_SETTYPE_ABBREV)
+ assert(false && "Unexpected abbrev ordering!");
+ }
+
+ { // INTEGER abbrev for CONSTANTS_BLOCK.
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_INTEGER));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID, std::move(Abbv)) !=
+ CONSTANTS_INTEGER_ABBREV)
+ assert(false && "Unexpected abbrev ordering!");
+ }
+
+ { // CE_CAST abbrev for CONSTANTS_BLOCK.
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CE_CAST));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // cast opc
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, // typeid
+ VE.computeBitsRequiredForTypeIndicies()));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id
+
+ if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID, std::move(Abbv)) !=
+ CONSTANTS_CE_CAST_Abbrev)
+ assert(false && "Unexpected abbrev ordering!");
+ }
+ { // NULL abbrev for CONSTANTS_BLOCK.
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_NULL));
+ if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID, std::move(Abbv)) !=
+ CONSTANTS_NULL_Abbrev)
+ assert(false && "Unexpected abbrev ordering!");
+ }
+
+ // FIXME: This should only use space for first class types!
+
+ { // INST_LOAD abbrev for FUNCTION_BLOCK.
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_LOAD));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Ptr
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, // dest ty
+ VE.computeBitsRequiredForTypeIndicies()));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // Align
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // volatile
+ if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, std::move(Abbv)) !=
+ (unsigned)FUNCTION_INST_LOAD_ABBREV)
+ assert(false && "Unexpected abbrev ordering!");
+ }
+ { // INST_BINOP abbrev for FUNCTION_BLOCK.
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_BINOP));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LHS
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // RHS
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // opc
+ if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, std::move(Abbv)) !=
+ (unsigned)FUNCTION_INST_BINOP_ABBREV)
+ assert(false && "Unexpected abbrev ordering!");
+ }
+ { // INST_BINOP_FLAGS abbrev for FUNCTION_BLOCK.
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_BINOP));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LHS
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // RHS
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // opc
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); // flags
+ if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, std::move(Abbv)) !=
+ (unsigned)FUNCTION_INST_BINOP_FLAGS_ABBREV)
+ assert(false && "Unexpected abbrev ordering!");
+ }
+ { // INST_CAST abbrev for FUNCTION_BLOCK.
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_CAST));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // OpVal
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, // dest ty
+ VE.computeBitsRequiredForTypeIndicies()));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // opc
+ if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, std::move(Abbv)) !=
+ (unsigned)FUNCTION_INST_CAST_ABBREV)
+ assert(false && "Unexpected abbrev ordering!");
+ }
+
+ { // INST_RET abbrev for FUNCTION_BLOCK.
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_RET));
+ if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, std::move(Abbv)) !=
+ (unsigned)FUNCTION_INST_RET_VOID_ABBREV)
+ assert(false && "Unexpected abbrev ordering!");
+ }
+ { // INST_RET abbrev for FUNCTION_BLOCK.
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_RET));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // ValID
+ if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, std::move(Abbv)) !=
+ (unsigned)FUNCTION_INST_RET_VAL_ABBREV)
+ assert(false && "Unexpected abbrev ordering!");
+ }
+ { // INST_UNREACHABLE abbrev for FUNCTION_BLOCK.
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_UNREACHABLE));
+ if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, std::move(Abbv)) !=
+ (unsigned)FUNCTION_INST_UNREACHABLE_ABBREV)
+ assert(false && "Unexpected abbrev ordering!");
+ }
+ {
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_GEP));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, // dest ty
+ Log2_32_Ceil(VE.getTypes().size() + 1)));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
+ if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, std::move(Abbv)) !=
+ (unsigned)FUNCTION_INST_GEP_ABBREV)
+ assert(false && "Unexpected abbrev ordering!");
+ }
+
+ Stream.ExitBlock();
+}
+
+void DXILBitcodeWriter::writeModuleVersion() {
+ // VERSION: [version#]
+ Stream.EmitRecord(bitc::MODULE_CODE_VERSION, ArrayRef<unsigned>{1});
+}
+
+/// WriteModule - Emit the specified module to the bitstream.
+void DXILBitcodeWriter::write() {
+ // The identification block is new since llvm-3.7, but the old bitcode reader
+ // will skip it.
+ // writeIdentificationBlock(Stream);
+
+ Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);
+
+ // It is redundant to fully-specify this here, but nice to make it explicit
+ // so that it is clear the DXIL module version is different.
+ DXILBitcodeWriter::writeModuleVersion();
+
+ // Emit blockinfo, which defines the standard abbreviations etc.
+ writeBlockInfo();
+
+ // Emit information about attribute groups.
+ writeAttributeGroupTable();
+
+ // Emit information about parameter attributes.
+ writeAttributeTable();
+
+ // Emit information describing all of the types in the module.
+ writeTypeTable();
+
+ writeComdats();
+
+ // Emit top-level description of module, including target triple, inline asm,
+ // descriptors for global variables, and function prototype info.
+ writeModuleInfo();
+
+ // Emit constants.
+ writeModuleConstants();
+
+ // Emit metadata.
+ writeModuleMetadataKinds();
+
+ // Emit metadata.
+ writeModuleMetadata();
+
+ // Emit names for globals/functions etc.
+ // DXIL uses the same format for module-level value symbol table as for the
+ // function level table.
+ writeFunctionLevelValueSymbolTable(M.getValueSymbolTable());
+
+ // Emit module-level use-lists.
+ writeUseListBlock(nullptr);
+
+ // Emit function bodies.
+ for (const Function &F : M)
+ if (!F.isDeclaration())
+ writeFunction(F);
+
+ Stream.ExitBlock();
+}
diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.h b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.h
new file mode 100644
index 000000000000..289f692f0f82
--- /dev/null
+++ b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.h
@@ -0,0 +1,82 @@
+//===- Bitcode/Writer/DXILBitcodeWriter.cpp - DXIL Bitcode Writer ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Bitcode writer implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/ModuleSummaryIndex.h"
+#include "llvm/MC/StringTableBuilder.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/MemoryBufferRef.h"
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace llvm {
+
+class BitstreamWriter;
+class Module;
+class raw_ostream;
+
+namespace dxil {
+
+class BitcodeWriter {
+ SmallVectorImpl<char> &Buffer;
+ std::unique_ptr<BitstreamWriter> Stream;
+
+ StringTableBuilder StrtabBuilder{StringTableBuilder::RAW};
+
+ // Owns any strings created by the irsymtab writer until we create the
+ // string table.
+ BumpPtrAllocator Alloc;
+
+ bool WroteStrtab = false, WroteSymtab = false;
+
+ void writeBlob(unsigned Block, unsigned Record, StringRef Blob);
+
+ std::vector<Module *> Mods;
+
+public:
+ /// Create a BitcodeWriter that writes to Buffer.
+ BitcodeWriter(SmallVectorImpl<char> &Buffer, raw_fd_stream *FS = nullptr);
+
+ ~BitcodeWriter();
+
+ /// Attempt to write a symbol table to the bitcode file. This must be called
+ /// at most once after all modules have been written.
+ ///
+ /// A reader does not require a symbol table to interpret a bitcode file;
+ /// the symbol table is needed only to improve link-time performance. So
+ /// this function may decide not to write a symbol table. It may so decide
+ /// if, for example, the target is unregistered or the IR is malformed.
+ void writeSymtab();
+
+ /// Write the bitcode file's string table. This must be called exactly once
+ /// after all modules and the optional symbol table have been written.
+ void writeStrtab();
+
+ /// Copy the string table for another module into this bitcode file. This
+ /// should be called after copying the module itself into the bitcode file.
+ void copyStrtab(StringRef Strtab);
+
+ /// Write the specified module to the buffer specified at construction time.
+ void writeModule(const Module &M);
+};
+
+/// Write the specified module to the specified raw output stream.
+///
+/// For streams where it matters, the given stream should be in "binary"
+/// mode.
+void WriteDXILToFile(const Module &M, raw_ostream &Out);
+
+} // namespace dxil
+
+} // namespace llvm
diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILValueEnumerator.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILValueEnumerator.cpp
new file mode 100644
index 000000000000..08944ee3f1fe
--- /dev/null
+++ b/llvm/lib/Target/DirectX/DXILWriter/DXILValueEnumerator.cpp
@@ -0,0 +1,1147 @@
+//===- ValueEnumerator.cpp - Number values and types for bitcode writer ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ValueEnumerator class.
+// Forked from lib/Bitcode/Writer
+//
+//===----------------------------------------------------------------------===//
+
+#include "DXILValueEnumerator.h"
+#include "DXILPointerType.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalIFunc.h"
+#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cstddef>
+#include <iterator>
+#include <tuple>
+
+using namespace llvm;
+using namespace llvm::dxil;
+
+namespace {
+
+struct OrderMap {
+ DenseMap<const Value *, std::pair<unsigned, bool>> IDs;
+ unsigned LastGlobalConstantID = 0;
+ unsigned LastGlobalValueID = 0;
+
+ OrderMap() = default;
+
+ bool isGlobalConstant(unsigned ID) const {
+ return ID <= LastGlobalConstantID;
+ }
+
+ bool isGlobalValue(unsigned ID) const {
+ return ID <= LastGlobalValueID && !isGlobalConstant(ID);
+ }
+
+ unsigned size() const { return IDs.size(); }
+ std::pair<unsigned, bool> &operator[](const Value *V) { return IDs[V]; }
+
+ std::pair<unsigned, bool> lookup(const Value *V) const {
+ return IDs.lookup(V);
+ }
+
+ void index(const Value *V) {
+ // Explicitly sequence get-size and insert-value operations to avoid UB.
+ unsigned ID = IDs.size() + 1;
+ IDs[V].first = ID;
+ }
+};
+
+} // end anonymous namespace
+
+static void orderValue(const Value *V, OrderMap &OM) {
+ if (OM.lookup(V).first)
+ return;
+
+ if (const Constant *C = dyn_cast<Constant>(V)) {
+ if (C->getNumOperands() && !isa<GlobalValue>(C)) {
+ for (const Value *Op : C->operands())
+ if (!isa<BasicBlock>(Op) && !isa<GlobalValue>(Op))
+ orderValue(Op, OM);
+ if (auto *CE = dyn_cast<ConstantExpr>(C))
+ if (CE->getOpcode() == Instruction::ShuffleVector)
+ orderValue(CE->getShuffleMaskForBitcode(), OM);
+ }
+ }
+
+ // Note: we cannot cache this lookup above, since inserting into the map
+ // changes the map's size, and thus affects the other IDs.
+ OM.index(V);
+}
+
+static OrderMap orderModule(const Module &M) {
+ // This needs to match the order used by ValueEnumerator::ValueEnumerator()
+ // and ValueEnumerator::incorporateFunction().
+ OrderMap OM;
+
+ // In the reader, initializers of GlobalValues are set *after* all the
+ // globals have been read. Rather than awkwardly modeling this behaviour
+ // directly in predictValueUseListOrderImpl(), just assign IDs to
+ // initializers of GlobalValues before GlobalValues themselves to model this
+ // implicitly.
+ for (const GlobalVariable &G : M.globals())
+ if (G.hasInitializer())
+ if (!isa<GlobalValue>(G.getInitializer()))
+ orderValue(G.getInitializer(), OM);
+ for (const GlobalAlias &A : M.aliases())
+ if (!isa<GlobalValue>(A.getAliasee()))
+ orderValue(A.getAliasee(), OM);
+ for (const GlobalIFunc &I : M.ifuncs())
+ if (!isa<GlobalValue>(I.getResolver()))
+ orderValue(I.getResolver(), OM);
+ for (const Function &F : M) {
+ for (const Use &U : F.operands())
+ if (!isa<GlobalValue>(U.get()))
+ orderValue(U.get(), OM);
+ }
+
+ // As constants used in metadata operands are emitted as module-level
+ // constants, we must order them before other operands. Also, we must order
+ // these before global values, as these will be read before setting the
+ // global values' initializers. The latter matters for constants which have
+ // uses towards other constants that are used as initializers.
+ auto orderConstantValue = [&OM](const Value *V) {
+ if ((isa<Constant>(V) && !isa<GlobalValue>(V)) || isa<InlineAsm>(V))
+ orderValue(V, OM);
+ };
+ for (const Function &F : M) {
+ if (F.isDeclaration())
+ continue;
+ for (const BasicBlock &BB : F)
+ for (const Instruction &I : BB)
+ for (const Value *V : I.operands()) {
+ if (const auto *MAV = dyn_cast<MetadataAsValue>(V)) {
+ if (const auto *VAM =
+ dyn_cast<ValueAsMetadata>(MAV->getMetadata())) {
+ orderConstantValue(VAM->getValue());
+ } else if (const auto *AL =
+ dyn_cast<DIArgList>(MAV->getMetadata())) {
+ for (const auto *VAM : AL->getArgs())
+ orderConstantValue(VAM->getValue());
+ }
+ }
+ }
+ }
+ OM.LastGlobalConstantID = OM.size();
+
+ // Initializers of GlobalValues are processed in
+ // BitcodeReader::ResolveGlobalAndAliasInits(). Match the order there rather
+ // than ValueEnumerator, and match the code in predictValueUseListOrderImpl()
+ // by giving IDs in reverse order.
+ //
+ // Since GlobalValues never reference each other directly (just through
+ // initializers), their relative IDs only matter for determining order of
+ // uses in their initializers.
+ for (const Function &F : M)
+ orderValue(&F, OM);
+ for (const GlobalAlias &A : M.aliases())
+ orderValue(&A, OM);
+ for (const GlobalIFunc &I : M.ifuncs())
+ orderValue(&I, OM);
+ for (const GlobalVariable &G : M.globals())
+ orderValue(&G, OM);
+ OM.LastGlobalValueID = OM.size();
+
+ for (const Function &F : M) {
+ if (F.isDeclaration())
+ continue;
+ // Here we need to match the union of ValueEnumerator::incorporateFunction()
+ // and WriteFunction(). Basic blocks are implicitly declared before
+ // anything else (by declaring their size).
+ for (const BasicBlock &BB : F)
+ orderValue(&BB, OM);
+ for (const Argument &A : F.args())
+ orderValue(&A, OM);
+ for (const BasicBlock &BB : F)
+ for (const Instruction &I : BB) {
+ for (const Value *Op : I.operands())
+ if ((isa<Constant>(*Op) && !isa<GlobalValue>(*Op)) ||
+ isa<InlineAsm>(*Op))
+ orderValue(Op, OM);
+ if (auto *SVI = dyn_cast<ShuffleVectorInst>(&I))
+ orderValue(SVI->getShuffleMaskForBitcode(), OM);
+ }
+ for (const BasicBlock &BB : F)
+ for (const Instruction &I : BB)
+ orderValue(&I, OM);
+ }
+ return OM;
+}
+
+static void predictValueUseListOrderImpl(const Value *V, const Function *F,
+ unsigned ID, const OrderMap &OM,
+ UseListOrderStack &Stack) {
+ // Predict use-list order for this one.
+ using Entry = std::pair<const Use *, unsigned>;
+ SmallVector<Entry, 64> List;
+ for (const Use &U : V->uses())
+ // Check if this user will be serialized.
+ if (OM.lookup(U.getUser()).first)
+ List.push_back(std::make_pair(&U, List.size()));
+
+ if (List.size() < 2)
+ // We may have lost some users.
+ return;
+
+ bool IsGlobalValue = OM.isGlobalValue(ID);
+ llvm::sort(List, [&](const Entry &L, const Entry &R) {
+ const Use *LU = L.first;
+ const Use *RU = R.first;
+ if (LU == RU)
+ return false;
+
+ auto LID = OM.lookup(LU->getUser()).first;
+ auto RID = OM.lookup(RU->getUser()).first;
+
+ // Global values are processed in reverse order.
+ //
+ // Moreover, initializers of GlobalValues are set *after* all the globals
+ // have been read (despite having earlier IDs). Rather than awkwardly
+ // modeling this behaviour here, orderModule() has assigned IDs to
+ // initializers of GlobalValues before GlobalValues themselves.
+ if (OM.isGlobalValue(LID) && OM.isGlobalValue(RID)) {
+ if (LID == RID)
+ return LU->getOperandNo() > RU->getOperandNo();
+ return LID < RID;
+ }
+
+ // If ID is 4, then expect: 7 6 5 1 2 3.
+ if (LID < RID) {
+ if (RID <= ID)
+ if (!IsGlobalValue) // GlobalValue uses don't get reversed.
+ return true;
+ return false;
+ }
+ if (RID < LID) {
+ if (LID <= ID)
+ if (!IsGlobalValue) // GlobalValue uses don't get reversed.
+ return false;
+ return true;
+ }
+
+ // LID and RID are equal, so we have different operands of the same user.
+ // Assume operands are added in order for all instructions.
+ if (LID <= ID)
+ if (!IsGlobalValue) // GlobalValue uses don't get reversed.
+ return LU->getOperandNo() < RU->getOperandNo();
+ return LU->getOperandNo() > RU->getOperandNo();
+ });
+
+ if (llvm::is_sorted(List, [](const Entry &L, const Entry &R) {
+ return L.second < R.second;
+ }))
+ // Order is already correct.
+ return;
+
+ // Store the shuffle.
+ Stack.emplace_back(V, F, List.size());
+ assert(List.size() == Stack.back().Shuffle.size() && "Wrong size");
+ for (size_t I = 0, E = List.size(); I != E; ++I)
+ Stack.back().Shuffle[I] = List[I].second;
+}
+
+static void predictValueUseListOrder(const Value *V, const Function *F,
+ OrderMap &OM, UseListOrderStack &Stack) {
+ auto &IDPair = OM[V];
+ assert(IDPair.first && "Unmapped value");
+ if (IDPair.second)
+ // Already predicted.
+ return;
+
+ // Do the actual prediction.
+ IDPair.second = true;
+ if (!V->use_empty() && std::next(V->use_begin()) != V->use_end())
+ predictValueUseListOrderImpl(V, F, IDPair.first, OM, Stack);
+
+ // Recursive descent into constants.
+ if (const Constant *C = dyn_cast<Constant>(V)) {
+ if (C->getNumOperands()) { // Visit GlobalValues.
+ for (const Value *Op : C->operands())
+ if (isa<Constant>(Op)) // Visit GlobalValues.
+ predictValueUseListOrder(Op, F, OM, Stack);
+ if (auto *CE = dyn_cast<ConstantExpr>(C))
+ if (CE->getOpcode() == Instruction::ShuffleVector)
+ predictValueUseListOrder(CE->getShuffleMaskForBitcode(), F, OM,
+ Stack);
+ }
+ }
+}
+
+static UseListOrderStack predictUseListOrder(const Module &M) {
+ OrderMap OM = orderModule(M);
+
+ // Use-list orders need to be serialized after all the users have been added
+ // to a value, or else the shuffles will be incomplete. Store them per
+ // function in a stack.
+ //
+ // Aside from function order, the order of values doesn't matter much here.
+ UseListOrderStack Stack;
+
+ // We want to visit the functions backward now so we can list function-local
+ // constants in the last Function they're used in. Module-level constants
+ // have already been visited above.
+ for (const Function &F : llvm::reverse(M)) {
+ if (F.isDeclaration())
+ continue;
+ for (const BasicBlock &BB : F)
+ predictValueUseListOrder(&BB, &F, OM, Stack);
+ for (const Argument &A : F.args())
+ predictValueUseListOrder(&A, &F, OM, Stack);
+ for (const BasicBlock &BB : F)
+ for (const Instruction &I : BB) {
+ for (const Value *Op : I.operands())
+ if (isa<Constant>(*Op) || isa<InlineAsm>(*Op)) // Visit GlobalValues.
+ predictValueUseListOrder(Op, &F, OM, Stack);
+ if (auto *SVI = dyn_cast<ShuffleVectorInst>(&I))
+ predictValueUseListOrder(SVI->getShuffleMaskForBitcode(), &F, OM,
+ Stack);
+ }
+ for (const BasicBlock &BB : F)
+ for (const Instruction &I : BB)
+ predictValueUseListOrder(&I, &F, OM, Stack);
+ }
+
+ // Visit globals last, since the module-level use-list block will be seen
+ // before the function bodies are processed.
+ for (const GlobalVariable &G : M.globals())
+ predictValueUseListOrder(&G, nullptr, OM, Stack);
+ for (const Function &F : M)
+ predictValueUseListOrder(&F, nullptr, OM, Stack);
+ for (const GlobalAlias &A : M.aliases())
+ predictValueUseListOrder(&A, nullptr, OM, Stack);
+ for (const GlobalIFunc &I : M.ifuncs())
+ predictValueUseListOrder(&I, nullptr, OM, Stack);
+ for (const GlobalVariable &G : M.globals())
+ if (G.hasInitializer())
+ predictValueUseListOrder(G.getInitializer(), nullptr, OM, Stack);
+ for (const GlobalAlias &A : M.aliases())
+ predictValueUseListOrder(A.getAliasee(), nullptr, OM, Stack);
+ for (const GlobalIFunc &I : M.ifuncs())
+ predictValueUseListOrder(I.getResolver(), nullptr, OM, Stack);
+ for (const Function &F : M) {
+ for (const Use &U : F.operands())
+ predictValueUseListOrder(U.get(), nullptr, OM, Stack);
+ }
+
+ return Stack;
+}
+
+ValueEnumerator::ValueEnumerator(const Module &M, Type *PrefixType) {
+ EnumerateType(PrefixType);
+
+ UseListOrders = predictUseListOrder(M);
+
+ // Enumerate the global variables.
+ for (const GlobalVariable &GV : M.globals()) {
+ EnumerateValue(&GV);
+ EnumerateType(GV.getValueType());
+ }
+
+ // Enumerate the functions.
+ for (const Function &F : M) {
+ EnumerateValue(&F);
+ EnumerateType(F.getValueType());
+ EnumerateType(
+ dxil::TypedPointerType::get(F.getFunctionType(), F.getAddressSpace()));
+ EnumerateAttributes(F.getAttributes());
+ }
+
+ // Enumerate the aliases.
+ for (const GlobalAlias &GA : M.aliases()) {
+ EnumerateValue(&GA);
+ EnumerateType(GA.getValueType());
+ }
+
+ // Enumerate the ifuncs.
+ for (const GlobalIFunc &GIF : M.ifuncs()) {
+ EnumerateValue(&GIF);
+ EnumerateType(GIF.getValueType());
+ }
+
+ // Enumerate the global variable initializers and attributes.
+ for (const GlobalVariable &GV : M.globals()) {
+ if (GV.hasInitializer())
+ EnumerateValue(GV.getInitializer());
+ EnumerateType(
+ dxil::TypedPointerType::get(GV.getValueType(), GV.getAddressSpace()));
+ if (GV.hasAttributes())
+ EnumerateAttributes(GV.getAttributesAsList(AttributeList::FunctionIndex));
+ }
+
+ // Enumerate the aliasees.
+ for (const GlobalAlias &GA : M.aliases())
+ EnumerateValue(GA.getAliasee());
+
+ // Enumerate the ifunc resolvers.
+ for (const GlobalIFunc &GIF : M.ifuncs())
+ EnumerateValue(GIF.getResolver());
+
+ // Enumerate any optional Function data.
+ for (const Function &F : M)
+ for (const Use &U : F.operands())
+ EnumerateValue(U.get());
+
+ // Enumerate the metadata type.
+ //
+ // TODO: Move this to ValueEnumerator::EnumerateOperandType() once bitcode
+ // only encodes the metadata type when it's used as a value.
+ EnumerateType(Type::getMetadataTy(M.getContext()));
+
+ // Insert constants and metadata that are named at module level into the slot
+ // pool so that the module symbol table can refer to them...
+ EnumerateValueSymbolTable(M.getValueSymbolTable());
+ EnumerateNamedMetadata(M);
+
+ SmallVector<std::pair<unsigned, MDNode *>, 8> MDs;
+ for (const GlobalVariable &GV : M.globals()) {
+ MDs.clear();
+ GV.getAllMetadata(MDs);
+ for (const auto &I : MDs)
+ // FIXME: Pass GV to EnumerateMetadata and arrange for the bitcode writer
+ // to write metadata to the global variable's own metadata block
+ // (PR28134).
+ EnumerateMetadata(nullptr, I.second);
+ }
+
+ // Enumerate types used by function bodies and argument lists.
+ for (const Function &F : M) {
+ for (const Argument &A : F.args())
+ EnumerateType(A.getType());
+
+ // Enumerate metadata attached to this function.
+ MDs.clear();
+ F.getAllMetadata(MDs);
+ for (const auto &I : MDs)
+ EnumerateMetadata(F.isDeclaration() ? nullptr : &F, I.second);
+
+ for (const BasicBlock &BB : F)
+ for (const Instruction &I : BB) {
+ for (const Use &Op : I.operands()) {
+ auto *MD = dyn_cast<MetadataAsValue>(&Op);
+ if (!MD) {
+ EnumerateOperandType(Op);
+ continue;
+ }
+
+ // Local metadata is enumerated during function-incorporation, but
+ // any ConstantAsMetadata arguments in a DIArgList should be examined
+ // now.
+ if (isa<LocalAsMetadata>(MD->getMetadata()))
+ continue;
+ if (auto *AL = dyn_cast<DIArgList>(MD->getMetadata())) {
+ for (auto *VAM : AL->getArgs())
+ if (isa<ConstantAsMetadata>(VAM))
+ EnumerateMetadata(&F, VAM);
+ continue;
+ }
+
+ EnumerateMetadata(&F, MD->getMetadata());
+ }
+ if (auto *SVI = dyn_cast<ShuffleVectorInst>(&I))
+ EnumerateType(SVI->getShuffleMaskForBitcode()->getType());
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(&I))
+ EnumerateType(GEP->getSourceElementType());
+ if (auto *AI = dyn_cast<AllocaInst>(&I))
+ EnumerateType(AI->getAllocatedType());
+ EnumerateType(I.getType());
+ if (const auto *Call = dyn_cast<CallBase>(&I)) {
+ EnumerateAttributes(Call->getAttributes());
+ EnumerateType(Call->getFunctionType());
+ }
+
+ // Enumerate metadata attached with this instruction.
+ MDs.clear();
+ I.getAllMetadataOtherThanDebugLoc(MDs);
+ for (unsigned i = 0, e = MDs.size(); i != e; ++i)
+ EnumerateMetadata(&F, MDs[i].second);
+
+ // Don't enumerate the location directly -- it has a special record
+ // type -- but enumerate its operands.
+ if (DILocation *L = I.getDebugLoc())
+ for (const Metadata *Op : L->operands())
+ EnumerateMetadata(&F, Op);
+ }
+ }
+
+ // Organize metadata ordering.
+ organizeMetadata();
+}
+
+unsigned ValueEnumerator::getInstructionID(const Instruction *Inst) const {
+ InstructionMapType::const_iterator I = InstructionMap.find(Inst);
+ assert(I != InstructionMap.end() && "Instruction is not mapped!");
+ return I->second;
+}
+
+unsigned ValueEnumerator::getComdatID(const Comdat *C) const {
+ unsigned ComdatID = Comdats.idFor(C);
+ assert(ComdatID && "Comdat not found!");
+ return ComdatID;
+}
+
+void ValueEnumerator::setInstructionID(const Instruction *I) {
+ InstructionMap[I] = InstructionCount++;
+}
+
+unsigned ValueEnumerator::getValueID(const Value *V) const {
+ if (auto *MD = dyn_cast<MetadataAsValue>(V))
+ return getMetadataID(MD->getMetadata());
+
+ ValueMapType::const_iterator I = ValueMap.find(V);
+ assert(I != ValueMap.end() && "Value not in slotcalculator!");
+ return I->second - 1;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void ValueEnumerator::dump() const {
+ print(dbgs(), ValueMap, "Default");
+ dbgs() << '\n';
+ print(dbgs(), MetadataMap, "MetaData");
+ dbgs() << '\n';
+}
+#endif
+
+void ValueEnumerator::print(raw_ostream &OS, const ValueMapType &Map,
+ const char *Name) const {
+ OS << "Map Name: " << Name << "\n";
+ OS << "Size: " << Map.size() << "\n";
+ for (const auto &I : Map) {
+ const Value *V = I.first;
+ if (V->hasName())
+ OS << "Value: " << V->getName();
+ else
+ OS << "Value: [null]\n";
+ V->print(errs());
+ errs() << '\n';
+
+ OS << " Uses(" << V->getNumUses() << "):";
+ for (const Use &U : V->uses()) {
+ if (&U != &*V->use_begin())
+ OS << ",";
+ if (U->hasName())
+ OS << " " << U->getName();
+ else
+ OS << " [null]";
+ }
+ OS << "\n\n";
+ }
+}
+
+void ValueEnumerator::print(raw_ostream &OS, const MetadataMapType &Map,
+ const char *Name) const {
+ OS << "Map Name: " << Name << "\n";
+ OS << "Size: " << Map.size() << "\n";
+ for (const auto &I : Map) {
+ const Metadata *MD = I.first;
+ OS << "Metadata: slot = " << I.second.ID << "\n";
+ OS << "Metadata: function = " << I.second.F << "\n";
+ MD->print(OS);
+ OS << "\n";
+ }
+}
+
+/// EnumerateValueSymbolTable - Insert all of the values in the specified symbol
+/// table into the values table.
+void ValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST) {
+ for (ValueSymbolTable::const_iterator VI = VST.begin(), VE = VST.end();
+ VI != VE; ++VI)
+ EnumerateValue(VI->getValue());
+}
+
+/// Insert all of the values referenced by named metadata in the specified
+/// module.
+void ValueEnumerator::EnumerateNamedMetadata(const Module &M) {
+ for (const auto &I : M.named_metadata())
+ EnumerateNamedMDNode(&I);
+}
+
+void ValueEnumerator::EnumerateNamedMDNode(const NamedMDNode *MD) {
+ for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i)
+ EnumerateMetadata(nullptr, MD->getOperand(i));
+}
+
+unsigned ValueEnumerator::getMetadataFunctionID(const Function *F) const {
+ return F ? getValueID(F) + 1 : 0;
+}
+
+void ValueEnumerator::EnumerateMetadata(const Function *F, const Metadata *MD) {
+ EnumerateMetadata(getMetadataFunctionID(F), MD);
+}
+
+void ValueEnumerator::EnumerateFunctionLocalMetadata(
+ const Function &F, const LocalAsMetadata *Local) {
+ EnumerateFunctionLocalMetadata(getMetadataFunctionID(&F), Local);
+}
+
+void ValueEnumerator::EnumerateFunctionLocalListMetadata(
+ const Function &F, const DIArgList *ArgList) {
+ EnumerateFunctionLocalListMetadata(getMetadataFunctionID(&F), ArgList);
+}
+
+void ValueEnumerator::dropFunctionFromMetadata(
+ MetadataMapType::value_type &FirstMD) {
+ SmallVector<const MDNode *, 64> Worklist;
+ auto push = [&Worklist](MetadataMapType::value_type &MD) {
+ auto &Entry = MD.second;
+
+ // Nothing to do if this metadata isn't tagged.
+ if (!Entry.F)
+ return;
+
+ // Drop the function tag.
+ Entry.F = 0;
+
+ // If this is has an ID and is an MDNode, then its operands have entries as
+ // well. We need to drop the function from them too.
+ if (Entry.ID)
+ if (auto *N = dyn_cast<MDNode>(MD.first))
+ Worklist.push_back(N);
+ };
+ push(FirstMD);
+ while (!Worklist.empty())
+ for (const Metadata *Op : Worklist.pop_back_val()->operands()) {
+ if (!Op)
+ continue;
+ auto MD = MetadataMap.find(Op);
+ if (MD != MetadataMap.end())
+ push(*MD);
+ }
+}
+
+void ValueEnumerator::EnumerateMetadata(unsigned F, const Metadata *MD) {
+ // It's vital for reader efficiency that uniqued subgraphs are done in
+ // post-order; it's expensive when their operands have forward references.
+ // If a distinct node is referenced from a uniqued node, it'll be delayed
+ // until the uniqued subgraph has been completely traversed.
+ SmallVector<const MDNode *, 32> DelayedDistinctNodes;
+
+ // Start by enumerating MD, and then work through its transitive operands in
+ // post-order. This requires a depth-first search.
+ SmallVector<std::pair<const MDNode *, MDNode::op_iterator>, 32> Worklist;
+ if (const MDNode *N = enumerateMetadataImpl(F, MD))
+ Worklist.push_back(std::make_pair(N, N->op_begin()));
+
+ while (!Worklist.empty()) {
+ const MDNode *N = Worklist.back().first;
+
+ // Enumerate operands until we hit a new node. We need to traverse these
+ // nodes' operands before visiting the rest of N's operands.
+ MDNode::op_iterator I = std::find_if(
+ Worklist.back().second, N->op_end(),
+ [&](const Metadata *MD) { return enumerateMetadataImpl(F, MD); });
+ if (I != N->op_end()) {
+ auto *Op = cast<MDNode>(*I);
+ Worklist.back().second = ++I;
+
+ // Delay traversing Op if it's a distinct node and N is uniqued.
+ if (Op->isDistinct() && !N->isDistinct())
+ DelayedDistinctNodes.push_back(Op);
+ else
+ Worklist.push_back(std::make_pair(Op, Op->op_begin()));
+ continue;
+ }
+
+ // All the operands have been visited. Now assign an ID.
+ Worklist.pop_back();
+ MDs.push_back(N);
+ MetadataMap[N].ID = MDs.size();
+
+ // Flush out any delayed distinct nodes; these are all the distinct nodes
+ // that are leaves in last uniqued subgraph.
+ if (Worklist.empty() || Worklist.back().first->isDistinct()) {
+ for (const MDNode *N : DelayedDistinctNodes)
+ Worklist.push_back(std::make_pair(N, N->op_begin()));
+ DelayedDistinctNodes.clear();
+ }
+ }
+}
+
+const MDNode *ValueEnumerator::enumerateMetadataImpl(unsigned F,
+ const Metadata *MD) {
+ if (!MD)
+ return nullptr;
+
+ assert(
+ (isa<MDNode>(MD) || isa<MDString>(MD) || isa<ConstantAsMetadata>(MD)) &&
+ "Invalid metadata kind");
+
+ auto Insertion = MetadataMap.insert(std::make_pair(MD, MDIndex(F)));
+ MDIndex &Entry = Insertion.first->second;
+ if (!Insertion.second) {
+ // Already mapped. If F doesn't match the function tag, drop it.
+ if (Entry.hasDifferentFunction(F))
+ dropFunctionFromMetadata(*Insertion.first);
+ return nullptr;
+ }
+
+ // Don't assign IDs to metadata nodes.
+ if (auto *N = dyn_cast<MDNode>(MD))
+ return N;
+
+ // Save the metadata.
+ MDs.push_back(MD);
+ Entry.ID = MDs.size();
+
+ // Enumerate the constant, if any.
+ if (auto *C = dyn_cast<ConstantAsMetadata>(MD))
+ EnumerateValue(C->getValue());
+
+ return nullptr;
+}
+
+/// EnumerateFunctionLocalMetadata - Incorporate function-local metadata
+/// information reachable from the metadata.
+void ValueEnumerator::EnumerateFunctionLocalMetadata(
+ unsigned F, const LocalAsMetadata *Local) {
+ assert(F && "Expected a function");
+
+ // Check to see if it's already in!
+ MDIndex &Index = MetadataMap[Local];
+ if (Index.ID) {
+ assert(Index.F == F && "Expected the same function");
+ return;
+ }
+
+ MDs.push_back(Local);
+ Index.F = F;
+ Index.ID = MDs.size();
+
+ EnumerateValue(Local->getValue());
+}
+
+/// EnumerateFunctionLocalListMetadata - Incorporate function-local metadata
+/// information reachable from the metadata.
+void ValueEnumerator::EnumerateFunctionLocalListMetadata(
+ unsigned F, const DIArgList *ArgList) {
+ assert(F && "Expected a function");
+
+ // Check to see if it's already in!
+ MDIndex &Index = MetadataMap[ArgList];
+ if (Index.ID) {
+ assert(Index.F == F && "Expected the same function");
+ return;
+ }
+
+ for (ValueAsMetadata *VAM : ArgList->getArgs()) {
+ if (isa<LocalAsMetadata>(VAM)) {
+ assert(MetadataMap.count(VAM) &&
+ "LocalAsMetadata should be enumerated before DIArgList");
+ assert(MetadataMap[VAM].F == F &&
+ "Expected LocalAsMetadata in the same function");
+ } else {
+ assert(isa<ConstantAsMetadata>(VAM) &&
+ "Expected LocalAsMetadata or ConstantAsMetadata");
+ assert(ValueMap.count(VAM->getValue()) &&
+ "Constant should be enumerated beforeDIArgList");
+ EnumerateMetadata(F, VAM);
+ }
+ }
+
+ MDs.push_back(ArgList);
+ Index.F = F;
+ Index.ID = MDs.size();
+}
+
+static unsigned getMetadataTypeOrder(const Metadata *MD) {
+ // Strings are emitted in bulk and must come first.
+ if (isa<MDString>(MD))
+ return 0;
+
+ // ConstantAsMetadata doesn't reference anything. We may as well shuffle it
+ // to the front since we can detect it.
+ auto *N = dyn_cast<MDNode>(MD);
+ if (!N)
+ return 1;
+
+ // The reader is fast forward references for distinct node operands, but slow
+ // when uniqued operands are unresolved.
+ return N->isDistinct() ? 2 : 3;
+}
+
+void ValueEnumerator::organizeMetadata() {
+ assert(MetadataMap.size() == MDs.size() &&
+ "Metadata map and vector out of sync");
+
+ if (MDs.empty())
+ return;
+
+ // Copy out the index information from MetadataMap in order to choose a new
+ // order.
+ SmallVector<MDIndex, 64> Order;
+ Order.reserve(MetadataMap.size());
+ for (const Metadata *MD : MDs)
+ Order.push_back(MetadataMap.lookup(MD));
+
+ // Partition:
+ // - by function, then
+ // - by isa<MDString>
+ // and then sort by the original/current ID. Since the IDs are guaranteed to
+ // be unique, the result of std::sort will be deterministic. There's no need
+ // for std::stable_sort.
+ llvm::sort(Order, [this](MDIndex LHS, MDIndex RHS) {
+ return std::make_tuple(LHS.F, getMetadataTypeOrder(LHS.get(MDs)), LHS.ID) <
+ std::make_tuple(RHS.F, getMetadataTypeOrder(RHS.get(MDs)), RHS.ID);
+ });
+
+ // Rebuild MDs, index the metadata ranges for each function in FunctionMDs,
+ // and fix up MetadataMap.
+ std::vector<const Metadata *> OldMDs;
+ MDs.swap(OldMDs);
+ MDs.reserve(OldMDs.size());
+ for (unsigned I = 0, E = Order.size(); I != E && !Order[I].F; ++I) {
+ auto *MD = Order[I].get(OldMDs);
+ MDs.push_back(MD);
+ MetadataMap[MD].ID = I + 1;
+ if (isa<MDString>(MD))
+ ++NumMDStrings;
+ }
+
+ // Return early if there's nothing for the functions.
+ if (MDs.size() == Order.size())
+ return;
+
+ // Build the function metadata ranges.
+ MDRange R;
+ FunctionMDs.reserve(OldMDs.size());
+ unsigned PrevF = 0;
+ for (unsigned I = MDs.size(), E = Order.size(), ID = MDs.size(); I != E;
+ ++I) {
+ unsigned F = Order[I].F;
+ if (!PrevF) {
+ PrevF = F;
+ } else if (PrevF != F) {
+ R.Last = FunctionMDs.size();
+ std::swap(R, FunctionMDInfo[PrevF]);
+ R.First = FunctionMDs.size();
+
+ ID = MDs.size();
+ PrevF = F;
+ }
+
+ auto *MD = Order[I].get(OldMDs);
+ FunctionMDs.push_back(MD);
+ MetadataMap[MD].ID = ++ID;
+ if (isa<MDString>(MD))
+ ++R.NumStrings;
+ }
+ R.Last = FunctionMDs.size();
+ FunctionMDInfo[PrevF] = R;
+}
+
+void ValueEnumerator::incorporateFunctionMetadata(const Function &F) {
+ NumModuleMDs = MDs.size();
+
+ auto R = FunctionMDInfo.lookup(getValueID(&F) + 1);
+ NumMDStrings = R.NumStrings;
+ MDs.insert(MDs.end(), FunctionMDs.begin() + R.First,
+ FunctionMDs.begin() + R.Last);
+}
+
+void ValueEnumerator::EnumerateValue(const Value *V) {
+ assert(!V->getType()->isVoidTy() && "Can't insert void values!");
+ assert(!isa<MetadataAsValue>(V) && "EnumerateValue doesn't handle Metadata!");
+
+ // Check to see if it's already in!
+ unsigned &ValueID = ValueMap[V];
+ if (ValueID) {
+ // Increment use count.
+ Values[ValueID - 1].second++;
+ return;
+ }
+
+ if (auto *GO = dyn_cast<GlobalObject>(V))
+ if (const Comdat *C = GO->getComdat())
+ Comdats.insert(C);
+
+ // Enumerate the type of this value.
+ EnumerateType(V->getType());
+
+ if (const Constant *C = dyn_cast<Constant>(V)) {
+ if (isa<GlobalValue>(C)) {
+ // Initializers for globals are handled explicitly elsewhere.
+ } else if (C->getNumOperands()) {
+ // If a constant has operands, enumerate them. This makes sure that if a
+ // constant has uses (for example an array of const ints), that they are
+ // inserted also.
+
+ // We prefer to enumerate them with values before we enumerate the user
+ // itself. This makes it more likely that we can avoid forward references
+ // in the reader. We know that there can be no cycles in the constants
+ // graph that don't go through a global variable.
+ for (User::const_op_iterator I = C->op_begin(), E = C->op_end(); I != E;
+ ++I)
+ if (!isa<BasicBlock>(*I)) // Don't enumerate BB operand to BlockAddress.
+ EnumerateValue(*I);
+ if (auto *CE = dyn_cast<ConstantExpr>(C)) {
+ if (CE->getOpcode() == Instruction::ShuffleVector)
+ EnumerateValue(CE->getShuffleMaskForBitcode());
+ if (auto *GEP = dyn_cast<GEPOperator>(CE))
+ EnumerateType(GEP->getSourceElementType());
+ }
+
+ // Finally, add the value. Doing this could make the ValueID reference be
+ // dangling, don't reuse it.
+ Values.push_back(std::make_pair(V, 1U));
+ ValueMap[V] = Values.size();
+ return;
+ }
+ }
+
+ // Add the value.
+ Values.push_back(std::make_pair(V, 1U));
+ ValueID = Values.size();
+}
+
+void ValueEnumerator::EnumerateType(Type *Ty) {
+ unsigned *TypeID = &TypeMap[Ty];
+
+ // We've already seen this type.
+ if (*TypeID)
+ return;
+
+ // If it is a non-anonymous struct, mark the type as being visited so that we
+ // don't recursively visit it. This is safe because we allow forward
+ // references of these in the bitcode reader.
+ if (StructType *STy = dyn_cast<StructType>(Ty))
+ if (!STy->isLiteral())
+ *TypeID = ~0U;
+
+ // Enumerate all of the subtypes before we enumerate this type. This ensures
+ // that the type will be enumerated in an order that can be directly built.
+ for (Type *SubTy : Ty->subtypes())
+ EnumerateType(SubTy);
+
+ // Refresh the TypeID pointer in case the table rehashed.
+ TypeID = &TypeMap[Ty];
+
+ // Check to see if we got the pointer another way. This can happen when
+ // enumerating recursive types that hit the base case deeper than they start.
+ //
+ // If this is actually a struct that we are treating as forward ref'able,
+ // then emit the definition now that all of its contents are available.
+ if (*TypeID && *TypeID != ~0U)
+ return;
+
+ // Add this type now that its contents are all happily enumerated.
+ Types.push_back(Ty);
+
+ *TypeID = Types.size();
+}
+
+// Enumerate the types for the specified value. If the value is a constant,
+// walk through it, enumerating the types of the constant.
+void ValueEnumerator::EnumerateOperandType(const Value *V) {
+ EnumerateType(V->getType());
+
+ assert(!isa<MetadataAsValue>(V) && "Unexpected metadata operand");
+
+ const Constant *C = dyn_cast<Constant>(V);
+ if (!C)
+ return;
+
+ // If this constant is already enumerated, ignore it, we know its type must
+ // be enumerated.
+ if (ValueMap.count(C))
+ return;
+
+ // This constant may have operands, make sure to enumerate the types in
+ // them.
+ for (const Value *Op : C->operands()) {
+ // Don't enumerate basic blocks here, this happens as operands to
+ // blockaddress.
+ if (isa<BasicBlock>(Op))
+ continue;
+
+ EnumerateOperandType(Op);
+ }
+ if (auto *CE = dyn_cast<ConstantExpr>(C)) {
+ if (CE->getOpcode() == Instruction::ShuffleVector)
+ EnumerateOperandType(CE->getShuffleMaskForBitcode());
+ if (CE->getOpcode() == Instruction::GetElementPtr)
+ EnumerateType(cast<GEPOperator>(CE)->getSourceElementType());
+ }
+}
+
+void ValueEnumerator::EnumerateAttributes(AttributeList PAL) {
+ if (PAL.isEmpty())
+ return; // null is always 0.
+
+ // Do a lookup.
+ unsigned &Entry = AttributeListMap[PAL];
+ if (Entry == 0) {
+ // Never saw this before, add it.
+ AttributeLists.push_back(PAL);
+ Entry = AttributeLists.size();
+ }
+
+ // Do lookups for all attribute groups.
+ for (unsigned i : PAL.indexes()) {
+ AttributeSet AS = PAL.getAttributes(i);
+ if (!AS.hasAttributes())
+ continue;
+ IndexAndAttrSet Pair = {i, AS};
+ unsigned &Entry = AttributeGroupMap[Pair];
+ if (Entry == 0) {
+ AttributeGroups.push_back(Pair);
+ Entry = AttributeGroups.size();
+
+ for (Attribute Attr : AS) {
+ if (Attr.isTypeAttribute())
+ EnumerateType(Attr.getValueAsType());
+ }
+ }
+ }
+}
+
+void ValueEnumerator::incorporateFunction(const Function &F) {
+ InstructionCount = 0;
+ NumModuleValues = Values.size();
+
+ // Add global metadata to the function block. This doesn't include
+ // LocalAsMetadata.
+ incorporateFunctionMetadata(F);
+
+ // Adding function arguments to the value table.
+ for (const auto &I : F.args()) {
+ EnumerateValue(&I);
+ if (I.hasAttribute(Attribute::ByVal))
+ EnumerateType(I.getParamByValType());
+ else if (I.hasAttribute(Attribute::StructRet))
+ EnumerateType(I.getParamStructRetType());
+ else if (I.hasAttribute(Attribute::ByRef))
+ EnumerateType(I.getParamByRefType());
+ }
+ FirstFuncConstantID = Values.size();
+
+ // Add all function-level constants to the value table.
+ for (const BasicBlock &BB : F) {
+ for (const Instruction &I : BB) {
+ for (const Use &OI : I.operands()) {
+ if ((isa<Constant>(OI) && !isa<GlobalValue>(OI)) || isa<InlineAsm>(OI))
+ EnumerateValue(OI);
+ }
+ if (auto *SVI = dyn_cast<ShuffleVectorInst>(&I))
+ EnumerateValue(SVI->getShuffleMaskForBitcode());
+ }
+ BasicBlocks.push_back(&BB);
+ ValueMap[&BB] = BasicBlocks.size();
+ }
+
+ // Add the function's parameter attributes so they are available for use in
+ // the function's instruction.
+ EnumerateAttributes(F.getAttributes());
+
+ FirstInstID = Values.size();
+
+ SmallVector<LocalAsMetadata *, 8> FnLocalMDVector;
+ SmallVector<DIArgList *, 8> ArgListMDVector;
+ // Add all of the instructions.
+ for (const BasicBlock &BB : F) {
+ for (const Instruction &I : BB) {
+ for (const Use &OI : I.operands()) {
+ if (auto *MD = dyn_cast<MetadataAsValue>(&OI)) {
+ if (auto *Local = dyn_cast<LocalAsMetadata>(MD->getMetadata())) {
+ // Enumerate metadata after the instructions they might refer to.
+ FnLocalMDVector.push_back(Local);
+ } else if (auto *ArgList = dyn_cast<DIArgList>(MD->getMetadata())) {
+ ArgListMDVector.push_back(ArgList);
+ for (ValueAsMetadata *VMD : ArgList->getArgs()) {
+ if (auto *Local = dyn_cast<LocalAsMetadata>(VMD)) {
+ // Enumerate metadata after the instructions they might refer
+ // to.
+ FnLocalMDVector.push_back(Local);
+ }
+ }
+ }
+ }
+ }
+
+ if (!I.getType()->isVoidTy())
+ EnumerateValue(&I);
+ }
+ }
+
+ // Add all of the function-local metadata.
+ for (unsigned i = 0, e = FnLocalMDVector.size(); i != e; ++i) {
+ // At this point, every local values have been incorporated, we shouldn't
+ // have a metadata operand that references a value that hasn't been seen.
+ assert(ValueMap.count(FnLocalMDVector[i]->getValue()) &&
+ "Missing value for metadata operand");
+ EnumerateFunctionLocalMetadata(F, FnLocalMDVector[i]);
+ }
+ // DIArgList entries must come after function-local metadata, as it is not
+ // possible to forward-reference them.
+ for (const DIArgList *ArgList : ArgListMDVector)
+ EnumerateFunctionLocalListMetadata(F, ArgList);
+}
+
+void ValueEnumerator::purgeFunction() {
+ /// Remove purged values from the ValueMap.
+ for (unsigned i = NumModuleValues, e = Values.size(); i != e; ++i)
+ ValueMap.erase(Values[i].first);
+ for (unsigned i = NumModuleMDs, e = MDs.size(); i != e; ++i)
+ MetadataMap.erase(MDs[i]);
+ for (const BasicBlock *BB : BasicBlocks)
+ ValueMap.erase(BB);
+
+ Values.resize(NumModuleValues);
+ MDs.resize(NumModuleMDs);
+ BasicBlocks.clear();
+ NumMDStrings = 0;
+}
+
+static void IncorporateFunctionInfoGlobalBBIDs(
+ const Function *F, DenseMap<const BasicBlock *, unsigned> &IDMap) {
+ unsigned Counter = 0;
+ for (const BasicBlock &BB : *F)
+ IDMap[&BB] = ++Counter;
+}
+
+/// getGlobalBasicBlockID - This returns the function-specific ID for the
+/// specified basic block. This is relatively expensive information, so it
+/// should only be used by rare constructs such as address-of-label.
+unsigned ValueEnumerator::getGlobalBasicBlockID(const BasicBlock *BB) const {
+ unsigned &Idx = GlobalBasicBlockIDs[BB];
+ if (Idx != 0)
+ return Idx - 1;
+
+ IncorporateFunctionInfoGlobalBBIDs(BB->getParent(), GlobalBasicBlockIDs);
+ return getGlobalBasicBlockID(BB);
+}
+
+uint64_t ValueEnumerator::computeBitsRequiredForTypeIndicies() const {
+ return Log2_32_Ceil(getTypes().size() + 1);
+}
diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILValueEnumerator.h b/llvm/lib/Target/DirectX/DXILWriter/DXILValueEnumerator.h
new file mode 100644
index 000000000000..6cf339b7a5cd
--- /dev/null
+++ b/llvm/lib/Target/DirectX/DXILWriter/DXILValueEnumerator.h
@@ -0,0 +1,308 @@
+//===- DirectX/DXILWriter/ValueEnumerator.h - Number values -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class gives values and types Unique ID's.
+// Forked from lib/Bitcode/Writer
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DXILWRITER_VALUEENUMERATOR_H
+#define LLVM_DXILWRITER_VALUEENUMERATOR_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/UniqueVector.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/UseListOrder.h"
+#include <cassert>
+#include <cstdint>
+#include <utility>
+#include <vector>
+
+namespace llvm {
+
+class BasicBlock;
+class Comdat;
+class DIArgList;
+class Function;
+class Instruction;
+class LocalAsMetadata;
+class MDNode;
+class Metadata;
+class Module;
+class NamedMDNode;
+class raw_ostream;
+class Type;
+class Value;
+class ValueSymbolTable;
+
+namespace dxil {
+
+class ValueEnumerator {
+public:
+ using TypeList = std::vector<Type *>;
+
+ // For each value, we remember its Value* and occurrence frequency.
+ using ValueList = std::vector<std::pair<const Value *, unsigned>>;
+
+ /// Attribute groups as encoded in bitcode are almost AttributeSets, but they
+ /// include the AttributeList index, so we have to track that in our map.
+ using IndexAndAttrSet = std::pair<unsigned, AttributeSet>;
+
+ UseListOrderStack UseListOrders;
+
+private:
+ using TypeMapType = DenseMap<Type *, unsigned>;
+ TypeMapType TypeMap;
+ TypeList Types;
+
+ using ValueMapType = DenseMap<const Value *, unsigned>;
+ ValueMapType ValueMap;
+ ValueList Values;
+
+ using ComdatSetType = UniqueVector<const Comdat *>;
+ ComdatSetType Comdats;
+
+ std::vector<const Metadata *> MDs;
+ std::vector<const Metadata *> FunctionMDs;
+
+ /// Index of information about a piece of metadata.
+ struct MDIndex {
+ unsigned F = 0; ///< The ID of the function for this metadata, if any.
+ unsigned ID = 0; ///< The implicit ID of this metadata in bitcode.
+
+ MDIndex() = default;
+ explicit MDIndex(unsigned F) : F(F) {}
+
+ /// Check if this has a function tag, and it's different from NewF.
+ bool hasDifferentFunction(unsigned NewF) const { return F && F != NewF; }
+
+ /// Fetch the MD this references out of the given metadata array.
+ const Metadata *get(ArrayRef<const Metadata *> MDs) const {
+ assert(ID && "Expected non-zero ID");
+ assert(ID <= MDs.size() && "Expected valid ID");
+ return MDs[ID - 1];
+ }
+ };
+
+ using MetadataMapType = DenseMap<const Metadata *, MDIndex>;
+ MetadataMapType MetadataMap;
+
+ /// Range of metadata IDs, as a half-open range.
+ struct MDRange {
+ unsigned First = 0;
+ unsigned Last = 0;
+
+ /// Number of strings in the prefix of the metadata range.
+ unsigned NumStrings = 0;
+
+ MDRange() = default;
+ explicit MDRange(unsigned First) : First(First) {}
+ };
+ SmallDenseMap<unsigned, MDRange, 1> FunctionMDInfo;
+
+ using AttributeGroupMapType = DenseMap<IndexAndAttrSet, unsigned>;
+ AttributeGroupMapType AttributeGroupMap;
+ std::vector<IndexAndAttrSet> AttributeGroups;
+
+ using AttributeListMapType = DenseMap<AttributeList, unsigned>;
+ AttributeListMapType AttributeListMap;
+ std::vector<AttributeList> AttributeLists;
+
+ /// GlobalBasicBlockIDs - This map memoizes the basic block ID's referenced by
+ /// the "getGlobalBasicBlockID" method.
+ mutable DenseMap<const BasicBlock *, unsigned> GlobalBasicBlockIDs;
+
+ using InstructionMapType = DenseMap<const Instruction *, unsigned>;
+ InstructionMapType InstructionMap;
+ unsigned InstructionCount;
+
+ /// BasicBlocks - This contains all the basic blocks for the currently
+ /// incorporated function. Their reverse mapping is stored in ValueMap.
+ std::vector<const BasicBlock *> BasicBlocks;
+
+ /// When a function is incorporated, this is the size of the Values list
+ /// before incorporation.
+ unsigned NumModuleValues;
+
+ /// When a function is incorporated, this is the size of the Metadatas list
+ /// before incorporation.
+ unsigned NumModuleMDs = 0;
+ unsigned NumMDStrings = 0;
+
+ unsigned FirstFuncConstantID;
+ unsigned FirstInstID;
+
+public:
+ ValueEnumerator(const Module &M, Type *PrefixType);
+ ValueEnumerator(const ValueEnumerator &) = delete;
+ ValueEnumerator &operator=(const ValueEnumerator &) = delete;
+
+ void dump() const;
+ void print(raw_ostream &OS, const ValueMapType &Map, const char *Name) const;
+ void print(raw_ostream &OS, const MetadataMapType &Map,
+ const char *Name) const;
+
+ unsigned getValueID(const Value *V) const;
+
+ unsigned getMetadataID(const Metadata *MD) const {
+ auto ID = getMetadataOrNullID(MD);
+ assert(ID != 0 && "Metadata not in slotcalculator!");
+ return ID - 1;
+ }
+
+ unsigned getMetadataOrNullID(const Metadata *MD) const {
+ return MetadataMap.lookup(MD).ID;
+ }
+
+ unsigned numMDs() const { return MDs.size(); }
+
+ unsigned getTypeID(Type *T) const {
+ TypeMapType::const_iterator I = TypeMap.find(T);
+ assert(I != TypeMap.end() && "Type not in ValueEnumerator!");
+ return I->second - 1;
+ }
+
+ unsigned getInstructionID(const Instruction *I) const;
+ void setInstructionID(const Instruction *I);
+
+ unsigned getAttributeListID(AttributeList PAL) const {
+ if (PAL.isEmpty())
+ return 0; // Null maps to zero.
+ AttributeListMapType::const_iterator I = AttributeListMap.find(PAL);
+ assert(I != AttributeListMap.end() && "Attribute not in ValueEnumerator!");
+ return I->second;
+ }
+
+ unsigned getAttributeGroupID(IndexAndAttrSet Group) const {
+ if (!Group.second.hasAttributes())
+ return 0; // Null maps to zero.
+ AttributeGroupMapType::const_iterator I = AttributeGroupMap.find(Group);
+ assert(I != AttributeGroupMap.end() && "Attribute not in ValueEnumerator!");
+ return I->second;
+ }
+
+ /// getFunctionConstantRange - Return the range of values that corresponds to
+ /// function-local constants.
+ void getFunctionConstantRange(unsigned &Start, unsigned &End) const {
+ Start = FirstFuncConstantID;
+ End = FirstInstID;
+ }
+
+ const ValueList &getValues() const { return Values; }
+
+ /// Check whether the current block has any metadata to emit.
+ bool hasMDs() const { return NumModuleMDs < MDs.size(); }
+
+ /// Get the MDString metadata for this block.
+ ArrayRef<const Metadata *> getMDStrings() const {
+ return makeArrayRef(MDs).slice(NumModuleMDs, NumMDStrings);
+ }
+
+ /// Get the non-MDString metadata for this block.
+ ArrayRef<const Metadata *> getNonMDStrings() const {
+ return makeArrayRef(MDs).slice(NumModuleMDs).slice(NumMDStrings);
+ }
+
+ const TypeList &getTypes() const { return Types; }
+
+ const std::vector<const BasicBlock *> &getBasicBlocks() const {
+ return BasicBlocks;
+ }
+
+ const std::vector<AttributeList> &getAttributeLists() const {
+ return AttributeLists;
+ }
+
+ const std::vector<IndexAndAttrSet> &getAttributeGroups() const {
+ return AttributeGroups;
+ }
+
+ const ComdatSetType &getComdats() const { return Comdats; }
+ unsigned getComdatID(const Comdat *C) const;
+
+ /// getGlobalBasicBlockID - This returns the function-specific ID for the
+ /// specified basic block. This is relatively expensive information, so it
+ /// should only be used by rare constructs such as address-of-label.
+ unsigned getGlobalBasicBlockID(const BasicBlock *BB) const;
+
+ /// incorporateFunction/purgeFunction - If you'd like to deal with a function,
+ /// use these two methods to get its data into the ValueEnumerator!
+ void incorporateFunction(const Function &F);
+
+ void purgeFunction();
+ uint64_t computeBitsRequiredForTypeIndicies() const;
+
+ void EnumerateType(Type *T);
+
+private:
+
+ /// Reorder the reachable metadata.
+ ///
+ /// This is not just an optimization, but is mandatory for emitting MDString
+ /// correctly.
+ void organizeMetadata();
+
+ /// Drop the function tag from the transitive operands of the given node.
+ void dropFunctionFromMetadata(MetadataMapType::value_type &FirstMD);
+
+ /// Incorporate the function metadata.
+ ///
+ /// This should be called before enumerating LocalAsMetadata for the
+ /// function.
+ void incorporateFunctionMetadata(const Function &F);
+
+ /// Enumerate a single instance of metadata with the given function tag.
+ ///
+ /// If \c MD has already been enumerated, check that \c F matches its
+ /// function tag. If not, call \a dropFunctionFromMetadata().
+ ///
+ /// Otherwise, mark \c MD as visited. Assign it an ID, or just return it if
+ /// it's an \a MDNode.
+ const MDNode *enumerateMetadataImpl(unsigned F, const Metadata *MD);
+
+ unsigned getMetadataFunctionID(const Function *F) const;
+
+ /// Enumerate reachable metadata in (almost) post-order.
+ ///
+ /// Enumerate all the metadata reachable from MD. We want to minimize the
+ /// cost of reading bitcode records, and so the primary consideration is that
+ /// operands of uniqued nodes are resolved before the nodes are read. This
+ /// avoids re-uniquing them on the context and factors away RAUW support.
+ ///
+ /// This algorithm guarantees that subgraphs of uniqued nodes are in
+ /// post-order. Distinct subgraphs reachable only from a single uniqued node
+ /// will be in post-order.
+ ///
+ /// \note The relative order of a distinct and uniqued node is irrelevant.
+ /// \a organizeMetadata() will later partition distinct nodes ahead of
+ /// uniqued ones.
+ ///{
+ void EnumerateMetadata(const Function *F, const Metadata *MD);
+ void EnumerateMetadata(unsigned F, const Metadata *MD);
+ ///}
+
+ void EnumerateFunctionLocalMetadata(const Function &F,
+ const LocalAsMetadata *Local);
+ void EnumerateFunctionLocalMetadata(unsigned F, const LocalAsMetadata *Local);
+ void EnumerateFunctionLocalListMetadata(const Function &F,
+ const DIArgList *ArgList);
+ void EnumerateFunctionLocalListMetadata(unsigned F, const DIArgList *Arglist);
+ void EnumerateNamedMDNode(const NamedMDNode *NMD);
+ void EnumerateValue(const Value *V);
+ void EnumerateOperandType(const Value *V);
+ void EnumerateAttributes(AttributeList PAL);
+
+ void EnumerateValueSymbolTable(const ValueSymbolTable &ST);
+ void EnumerateNamedMetadata(const Module &M);
+};
+
+} // end namespace dxil
+} // end namespace llvm
+
+#endif // LLVM_DXILWRITER_VALUEENUMERATOR_H
diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp
new file mode 100644
index 000000000000..c1f9f4aec672
--- /dev/null
+++ b/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp
@@ -0,0 +1,100 @@
+//===- DXILWriterPass.cpp - Bitcode writing pass --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// DXILWriterPass implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DXILWriterPass.h"
+#include "DXILBitcodeWriter.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/ModuleSummaryAnalysis.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Alignment.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+using namespace llvm::dxil;
+
+namespace {
+class WriteDXILPass : public llvm::ModulePass {
+ raw_ostream &OS; // raw_ostream to print on
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ WriteDXILPass() : ModulePass(ID), OS(dbgs()) {
+ initializeWriteDXILPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ explicit WriteDXILPass(raw_ostream &o) : ModulePass(ID), OS(o) {
+ initializeWriteDXILPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override { return "Bitcode Writer"; }
+
+ bool runOnModule(Module &M) override {
+ WriteDXILToFile(M, OS);
+ return false;
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+};
+
+class EmbedDXILPass : public llvm::ModulePass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+ EmbedDXILPass() : ModulePass(ID) {
+ initializeEmbedDXILPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override { return "DXIL Embedder"; }
+
+ bool runOnModule(Module &M) override {
+ std::string Data;
+ llvm::raw_string_ostream OS(Data);
+ WriteDXILToFile(M, OS);
+
+ Constant *ModuleConstant =
+ ConstantDataArray::get(M.getContext(), arrayRefFromStringRef(Data));
+ auto *GV = new llvm::GlobalVariable(M, ModuleConstant->getType(), true,
+ GlobalValue::PrivateLinkage,
+ ModuleConstant, "dx.dxil");
+ GV->setSection("DXIL");
+ GV->setAlignment(Align(4));
+ appendToCompilerUsed(M, {GV});
+ return true;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+};
+} // namespace
+
+char WriteDXILPass::ID = 0;
+INITIALIZE_PASS_BEGIN(WriteDXILPass, "write-bitcode", "Write Bitcode", false,
+ true)
+INITIALIZE_PASS_DEPENDENCY(ModuleSummaryIndexWrapperPass)
+INITIALIZE_PASS_END(WriteDXILPass, "write-bitcode", "Write Bitcode", false,
+ true)
+
+ModulePass *llvm::createDXILWriterPass(raw_ostream &Str) {
+ return new WriteDXILPass(Str);
+}
+
+char EmbedDXILPass::ID = 0;
+INITIALIZE_PASS(EmbedDXILPass, "dxil-embed", "Embed DXIL", false, true)
+
+ModulePass *llvm::createDXILEmbedderPass() { return new EmbedDXILPass(); }
diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.h b/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.h
new file mode 100644
index 000000000000..2c9c12178677
--- /dev/null
+++ b/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.h
@@ -0,0 +1,37 @@
+//===-- DXILWriterPass.h - Bitcode writing pass --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file provides a bitcode writing pass.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BITCODE_DXILWriterPass_H
+#define LLVM_BITCODE_DXILWriterPass_H
+
+#include "DirectX.h"
+#include "llvm/Bitcode/BitcodeWriter.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+class Module;
+class raw_ostream;
+
+/// Create and return a pass that writes the module to the specified
+/// ostream. Note that this pass is designed for use with the legacy pass
+/// manager.
+ModulePass *createDXILWriterPass(raw_ostream &Str);
+
+/// Create and return a pass that writes the module to a global variable in the
+/// module for later emission in the MCStreamer. Note that this pass is designed
+/// for use with the legacy pass manager because it is run in CodeGen only.
+ModulePass *createDXILEmbedderPass();
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/Target/DirectX/DirectX.h b/llvm/lib/Target/DirectX/DirectX.h
new file mode 100644
index 000000000000..3883e4ba4621
--- /dev/null
+++ b/llvm/lib/Target/DirectX/DirectX.h
@@ -0,0 +1,43 @@
+//===- DirectXTargetMachine.h - DirectX Target Implementation ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_DIRECTX_DIRECTX_H
+#define LLVM_LIB_TARGET_DIRECTX_DIRECTX_H
+
+namespace llvm {
+class ModulePass;
+class PassRegistry;
+
+/// Initializer for dxil writer pass
+void initializeWriteDXILPassPass(PassRegistry &);
+
+/// Initializer for dxil embedder pass
+void initializeEmbedDXILPassPass(PassRegistry &);
+
+/// Initializer for DXIL-prepare
+void initializeDXILPrepareModulePass(PassRegistry &);
+
+/// Pass to convert modules into DXIL-compatable modules
+ModulePass *createDXILPrepareModulePass();
+
+/// Initializer for DXILOpLowering
+void initializeDXILOpLoweringLegacyPass(PassRegistry &);
+
+/// Pass to lowering LLVM intrinsic call to DXIL op function call.
+ModulePass *createDXILOpLoweringLegacyPass();
+
+/// Initializer for DXILTranslateMetadata.
+void initializeDXILTranslateMetadataPass(PassRegistry &);
+
+/// Pass to emit metadata for DXIL.
+ModulePass *createDXILTranslateMetadataPass();
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_DIRECTX_DIRECTX_H
diff --git a/llvm/lib/Target/DirectX/DirectX.td b/llvm/lib/Target/DirectX/DirectX.td
new file mode 100644
index 000000000000..4d1d45b84a68
--- /dev/null
+++ b/llvm/lib/Target/DirectX/DirectX.td
@@ -0,0 +1,54 @@
+//- DirectX.td - Describe the DirectX Target Machine ----------*- tablegen -*-//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This is a target description file for the DirectX target
+///
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+include "DXILStubs.td"
+
+//===----------------------------------------------------------------------===//
+// DirectX Subtarget features.
+//===----------------------------------------------------------------------===//
+
+def DirectXInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// DirectX Processors supported.
+//===----------------------------------------------------------------------===//
+
+def : ProcessorModel<"generic", NoSchedModel, []>;
+
+
+//===----------------------------------------------------------------------===//
+// Target Declaration
+//===----------------------------------------------------------------------===//
+
+def DirectXAsmParser : AsmParser {
+ // The physical register names are not in the binary format or asm text
+ let ShouldEmitMatchRegisterName = 0;
+}
+
+def DirectXAsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ int PassSubtarget = 0;
+ int Variant = 0;
+ bit isMCAsmWriter = 1;
+}
+
+def DirectX : Target {
+ let InstructionSet = DirectXInstrInfo;
+ let AssemblyParsers = [DirectXAsmParser];
+ let AssemblyWriters = [DirectXAsmWriter];
+}
diff --git a/llvm/lib/Target/DirectX/DirectXAsmPrinter.cpp b/llvm/lib/Target/DirectX/DirectXAsmPrinter.cpp
new file mode 100644
index 000000000000..cea3283f6756
--- /dev/null
+++ b/llvm/lib/Target/DirectX/DirectXAsmPrinter.cpp
@@ -0,0 +1,57 @@
+//===-- DirectXAsmPrinter.cpp - DirectX assembly writer --------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains AsmPrinters for the DirectX backend.
+//
+//===----------------------------------------------------------------------===//
+
+#include "TargetInfo/DirectXTargetInfo.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/SectionKind.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+namespace {
+
+// The DXILAsmPrinter is mostly a stub because DXIL is just LLVM bitcode which
+// gets embedded into a DXContainer file.
+class DXILAsmPrinter : public AsmPrinter {
+public:
+ explicit DXILAsmPrinter(TargetMachine &TM,
+ std::unique_ptr<MCStreamer> Streamer)
+ : AsmPrinter(TM, std::move(Streamer)) {}
+
+ StringRef getPassName() const override { return "DXIL Assembly Printer"; }
+ void emitGlobalVariable(const GlobalVariable *GV) override;
+ bool runOnMachineFunction(MachineFunction &MF) override { return false; }
+};
+} // namespace
+
+void DXILAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
+ // If there is no initializer or the section is implicit, do nothing
+ if (!GV->hasInitializer() || GV->hasImplicitSection())
+ return;
+ // Skip the LLVM metadata
+ if (GV->getSection() == "llvm.metadata")
+ return;
+ SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM);
+ MCSection *TheSection = getObjFileLowering().SectionForGlobal(GV, GVKind, TM);
+ OutStreamer->switchSection(TheSection);
+ emitGlobalConstant(GV->getParent()->getDataLayout(), GV->getInitializer());
+}
+
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXAsmPrinter() {
+ RegisterAsmPrinter<DXILAsmPrinter> X(getTheDirectXTarget());
+}
diff --git a/llvm/lib/Target/DirectX/DirectXFrameLowering.h b/llvm/lib/Target/DirectX/DirectXFrameLowering.h
new file mode 100644
index 000000000000..76a1450054be
--- /dev/null
+++ b/llvm/lib/Target/DirectX/DirectXFrameLowering.h
@@ -0,0 +1,35 @@
+//===-- DirectXFrameLowering.h - Frame lowering for DirectX --*- C++ ---*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements DirectX-specific bits of TargetFrameLowering class.
+// This is just a stub because the current DXIL backend does not actually lower
+// through the MC layer.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DIRECTX_DIRECTXFRAMELOWERING_H
+#define LLVM_DIRECTX_DIRECTXFRAMELOWERING_H
+
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/Support/Alignment.h"
+
+namespace llvm {
+class DirectXSubtarget;
+
+class DirectXFrameLowering : public TargetFrameLowering {
+public:
+ explicit DirectXFrameLowering(const DirectXSubtarget &STI)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(8), 0) {}
+
+ void emitPrologue(MachineFunction &, MachineBasicBlock &) const override {}
+ void emitEpilogue(MachineFunction &, MachineBasicBlock &) const override {}
+
+ bool hasFP(const MachineFunction &) const override { return false; }
+};
+} // namespace llvm
+#endif // LLVM_DIRECTX_DIRECTXFRAMELOWERING_H
diff --git a/llvm/lib/Target/DirectX/DirectXInstrInfo.cpp b/llvm/lib/Target/DirectX/DirectXInstrInfo.cpp
new file mode 100644
index 000000000000..07b68648f16c
--- /dev/null
+++ b/llvm/lib/Target/DirectX/DirectXInstrInfo.cpp
@@ -0,0 +1,20 @@
+//===-- DirectXInstrInfo.cpp - InstrInfo for DirectX -*- C++ ------------*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the DirectX specific subclass of TargetInstrInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DirectXInstrInfo.h"
+
+#define GET_INSTRINFO_CTOR_DTOR
+#include "DirectXGenInstrInfo.inc"
+
+using namespace llvm;
+
+DirectXInstrInfo::~DirectXInstrInfo() {}
diff --git a/llvm/lib/Target/DirectX/DirectXInstrInfo.h b/llvm/lib/Target/DirectX/DirectXInstrInfo.h
new file mode 100644
index 000000000000..4fe79ee547fe
--- /dev/null
+++ b/llvm/lib/Target/DirectX/DirectXInstrInfo.h
@@ -0,0 +1,30 @@
+//===-- DirectXInstrInfo.h - Define InstrInfo for DirectX -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the DirectX specific subclass of TargetInstrInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DIRECTX_DIRECTXINSTRINFO_H
+#define LLVM_DIRECTX_DIRECTXINSTRINFO_H
+
+#include "DirectXRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "DirectXGenInstrInfo.inc"
+
+namespace llvm {
+struct DirectXInstrInfo : public DirectXGenInstrInfo {
+ explicit DirectXInstrInfo() : DirectXGenInstrInfo() {}
+
+ ~DirectXInstrInfo() override;
+};
+} // namespace llvm
+
+#endif // LLVM_DIRECTX_DIRECTXINSTRINFO_H
diff --git a/llvm/lib/Target/DirectX/DirectXRegisterInfo.cpp b/llvm/lib/Target/DirectX/DirectXRegisterInfo.cpp
new file mode 100644
index 000000000000..c54b494f3730
--- /dev/null
+++ b/llvm/lib/Target/DirectX/DirectXRegisterInfo.cpp
@@ -0,0 +1,24 @@
+//===-- DirectXRegisterInfo.cpp - RegisterInfo for DirectX -*- C++ ------*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the DirectX specific subclass of TargetRegisterInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DirectXRegisterInfo.h"
+#include "DirectXFrameLowering.h"
+#include "MCTargetDesc/DirectXMCTargetDesc.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "DirectXGenRegisterInfo.inc"
+
+using namespace llvm;
+
+DirectXRegisterInfo::~DirectXRegisterInfo() {}
diff --git a/llvm/lib/Target/DirectX/DirectXRegisterInfo.h b/llvm/lib/Target/DirectX/DirectXRegisterInfo.h
new file mode 100644
index 000000000000..023c5c3ef337
--- /dev/null
+++ b/llvm/lib/Target/DirectX/DirectXRegisterInfo.h
@@ -0,0 +1,28 @@
+//===-- DirectXRegisterInfo.h - Define RegisterInfo for DirectX -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the DirectX specific subclass of TargetRegisterInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DIRECTX_DXILREGISTERINFO_H
+#define LLVM_DIRECTX_DXILREGISTERINFO_H
+
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "DirectXGenRegisterInfo.inc"
+
+namespace llvm {
+struct DirectXRegisterInfo : public DirectXGenRegisterInfo {
+ DirectXRegisterInfo() : DirectXGenRegisterInfo(0) {}
+ ~DirectXRegisterInfo();
+};
+} // namespace llvm
+
+#endif // LLVM_DIRECTX_DXILREGISTERINFO_H
diff --git a/llvm/lib/Target/DirectX/DirectXSubtarget.cpp b/llvm/lib/Target/DirectX/DirectXSubtarget.cpp
new file mode 100644
index 000000000000..526b7d29fb13
--- /dev/null
+++ b/llvm/lib/Target/DirectX/DirectXSubtarget.cpp
@@ -0,0 +1,29 @@
+//===-- DirectXSubtarget.cpp - DirectX Subtarget Information --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements the DirectX-specific subclass of TargetSubtarget.
+///
+//===----------------------------------------------------------------------===//
+
+#include "DirectXSubtarget.h"
+#include "DirectXTargetLowering.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "directx-subtarget"
+
+#define GET_SUBTARGETINFO_CTOR
+#define GET_SUBTARGETINFO_TARGET_DESC
+#include "DirectXGenSubtargetInfo.inc"
+
+DirectXSubtarget::DirectXSubtarget(const Triple &TT, StringRef CPU,
+ StringRef FS, const DirectXTargetMachine &TM)
+ : DirectXGenSubtargetInfo(TT, CPU, CPU, FS), FL(*this), TL(TM, *this) {}
+
+void DirectXSubtarget::anchor() {}
diff --git a/llvm/lib/Target/DirectX/DirectXSubtarget.h b/llvm/lib/Target/DirectX/DirectXSubtarget.h
new file mode 100644
index 000000000000..464d05a0e1ff
--- /dev/null
+++ b/llvm/lib/Target/DirectX/DirectXSubtarget.h
@@ -0,0 +1,56 @@
+//===-- DirectXSubtarget.h - Define Subtarget for DirectX -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the DirectX specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DIRECTX_DIRECTXSUBTARGET_H
+#define LLVM_DIRECTX_DIRECTXSUBTARGET_H
+
+#include "DirectXFrameLowering.h"
+#include "DirectXInstrInfo.h"
+#include "DirectXTargetLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define GET_SUBTARGETINFO_HEADER
+#include "DirectXGenSubtargetInfo.inc"
+
+namespace llvm {
+
+class DirectXTargetMachine;
+
+class DirectXSubtarget : public DirectXGenSubtargetInfo {
+ DirectXFrameLowering FL;
+ DirectXTargetLowering TL;
+ DirectXInstrInfo InstrInfo;
+
+ virtual void anchor(); // virtual anchor method
+
+public:
+ DirectXSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
+ const DirectXTargetMachine &TM);
+
+ /// Parses a subtarget feature string, setting appropriate options.
+ /// \note Definition of function is auto generated by `tblgen`.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
+
+ const DirectXTargetLowering *getTargetLowering() const override {
+ return &TL;
+ }
+
+ const DirectXFrameLowering *getFrameLowering() const override { return &FL; }
+
+ const DirectXInstrInfo *getInstrInfo() const override { return &InstrInfo; }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_DIRECTX_DIRECTXSUBTARGET_H
diff --git a/llvm/lib/Target/DirectX/DirectXTargetLowering.h b/llvm/lib/Target/DirectX/DirectXTargetLowering.h
new file mode 100644
index 000000000000..dc19894ab165
--- /dev/null
+++ b/llvm/lib/Target/DirectX/DirectXTargetLowering.h
@@ -0,0 +1,31 @@
+//===-- DirectXTargetLowering.h - Define DX TargetLowering -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the DirectX specific subclass of TargetLowering.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DIRECTX_DIRECTXTARGETLOWERING_H
+#define LLVM_DIRECTX_DIRECTXTARGETLOWERING_H
+
+#include "llvm/CodeGen/TargetLowering.h"
+
+namespace llvm {
+
+class DirectXSubtarget;
+class DirectXTargetMachine;
+
+class DirectXTargetLowering : public TargetLowering {
+public:
+ explicit DirectXTargetLowering(const DirectXTargetMachine &TM,
+ const DirectXSubtarget &STI);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_DIRECTX_DIRECTXTARGETLOWERING_H
diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
new file mode 100644
index 000000000000..44bef80ea6fb
--- /dev/null
+++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
@@ -0,0 +1,144 @@
+//===- DirectXTargetMachine.cpp - DirectX Target Implementation -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains DirectX target initializer.
+///
+//===----------------------------------------------------------------------===//
+
+#include "DirectXTargetMachine.h"
+#include "DXILWriter/DXILWriterPass.h"
+#include "DirectX.h"
+#include "DirectXSubtarget.h"
+#include "DirectXTargetTransformInfo.h"
+#include "TargetInfo/DirectXTargetInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/MC/MCSectionDXContainer.h"
+#include "llvm/MC/SectionKind.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+
+using namespace llvm;
+
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXTarget() {
+ RegisterTargetMachine<DirectXTargetMachine> X(getTheDirectXTarget());
+ auto *PR = PassRegistry::getPassRegistry();
+ initializeDXILPrepareModulePass(*PR);
+ initializeEmbedDXILPassPass(*PR);
+ initializeDXILOpLoweringLegacyPass(*PR);
+ initializeDXILTranslateMetadataPass(*PR);
+}
+
+class DXILTargetObjectFile : public TargetLoweringObjectFile {
+public:
+ DXILTargetObjectFile() = default;
+
+ MCSection *getExplicitSectionGlobal(const GlobalObject *GO, SectionKind Kind,
+ const TargetMachine &TM) const override {
+ return getContext().getDXContainerSection(GO->getSection(), Kind);
+ }
+
+protected:
+ MCSection *SelectSectionForGlobal(const GlobalObject *GO, SectionKind Kind,
+ const TargetMachine &TM) const override {
+ llvm_unreachable("Not supported!");
+ }
+};
+
+class DirectXPassConfig : public TargetPassConfig {
+public:
+ DirectXPassConfig(DirectXTargetMachine &TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ DirectXTargetMachine &getDirectXTargetMachine() const {
+ return getTM<DirectXTargetMachine>();
+ }
+
+ FunctionPass *createTargetRegisterAllocator(bool) override { return nullptr; }
+};
+
+DirectXTargetMachine::DirectXTargetMachine(const Target &T, const Triple &TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Optional<Reloc::Model> RM,
+ Optional<CodeModel::Model> CM,
+ CodeGenOpt::Level OL, bool JIT)
+ : LLVMTargetMachine(T,
+ "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-"
+ "f32:32-f64:64-n8:16:32:64",
+ TT, CPU, FS, Options, Reloc::Static, CodeModel::Small,
+ OL),
+ TLOF(std::make_unique<DXILTargetObjectFile>()),
+ Subtarget(std::make_unique<DirectXSubtarget>(TT, CPU, FS, *this)) {
+ initAsmInfo();
+}
+
+DirectXTargetMachine::~DirectXTargetMachine() {}
+
+bool DirectXTargetMachine::addPassesToEmitFile(
+ PassManagerBase &PM, raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut,
+ CodeGenFileType FileType, bool DisableVerify,
+ MachineModuleInfoWrapperPass *MMIWP) {
+ PM.add(createDXILOpLoweringLegacyPass());
+ PM.add(createDXILPrepareModulePass());
+ PM.add(createDXILTranslateMetadataPass());
+ if (TargetPassConfig::willCompleteCodeGenPipeline()) {
+ PM.add(createDXILEmbedderPass());
+ }
+ switch (FileType) {
+ case CGFT_AssemblyFile:
+ PM.add(createPrintModulePass(Out, "", true));
+ break;
+ case CGFT_ObjectFile:
+ if (TargetPassConfig::willCompleteCodeGenPipeline()) {
+ if (!MMIWP)
+ MMIWP = new MachineModuleInfoWrapperPass(this);
+ PM.add(MMIWP);
+ if (addAsmPrinter(PM, Out, DwoOut, FileType,
+ MMIWP->getMMI().getContext()))
+ return true;
+ } else
+ PM.add(createDXILWriterPass(Out));
+ break;
+ case CGFT_Null:
+ break;
+ }
+ return false;
+}
+
+bool DirectXTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
+ MCContext *&Ctx,
+ raw_pwrite_stream &Out,
+ bool DisableVerify) {
+ return true;
+}
+
+TargetPassConfig *DirectXTargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new DirectXPassConfig(*this, PM);
+}
+
+const DirectXSubtarget *
+DirectXTargetMachine::getSubtargetImpl(const Function &) const {
+ return Subtarget.get();
+}
+
+TargetTransformInfo
+DirectXTargetMachine::getTargetTransformInfo(const Function &F) const {
+ return TargetTransformInfo(DirectXTTIImpl(this, F));
+}
+
+DirectXTargetLowering::DirectXTargetLowering(const DirectXTargetMachine &TM,
+ const DirectXSubtarget &STI)
+ : TargetLowering(TM) {}
diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.h b/llvm/lib/Target/DirectX/DirectXTargetMachine.h
new file mode 100644
index 000000000000..ae41638b6acf
--- /dev/null
+++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.h
@@ -0,0 +1,51 @@
+//===- DirectXTargetMachine.h - DirectX Target Implementation ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DIRECTX_DIRECTXTARGETMACHINE_H
+#define LLVM_DIRECTX_DIRECTXTARGETMACHINE_H
+
+#include "DirectXSubtarget.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+class Function;
+class DirectXTargetMachine : public LLVMTargetMachine {
+ std::unique_ptr<TargetLoweringObjectFile> TLOF;
+ std::unique_ptr<DirectXSubtarget> Subtarget;
+
+public:
+ DirectXTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
+ Optional<Reloc::Model> RM, Optional<CodeModel::Model> CM,
+ CodeGenOpt::Level OL, bool JIT);
+
+ ~DirectXTargetMachine() override;
+
+ bool addPassesToEmitFile(PassManagerBase &PM, raw_pwrite_stream &Out,
+ raw_pwrite_stream *DwoOut, CodeGenFileType FileType,
+ bool DisableVerify,
+ MachineModuleInfoWrapperPass *MMIWP) override;
+
+ bool addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
+ raw_pwrite_stream &Out, bool DisableVerify) override;
+
+ const DirectXSubtarget *getSubtargetImpl(const Function &) const override;
+
+ TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+
+ TargetLoweringObjectFile *getObjFileLowering() const override {
+ return TLOF.get();
+ }
+
+ TargetTransformInfo getTargetTransformInfo(const Function &F) const override;
+};
+} // namespace llvm
+
+#endif // LLVM_DIRECTX_DIRECTXTARGETMACHINE_H
diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.h b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.h
new file mode 100644
index 000000000000..90beb386fa44
--- /dev/null
+++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.h
@@ -0,0 +1,39 @@
+//===- DirectXTargetTransformInfo.h - DirectX TTI ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DIRECTX_DIRECTXTARGETTRANSFORMINFO_H
+#define LLVM_DIRECTX_DIRECTXTARGETTRANSFORMINFO_H
+
+#include "DirectXSubtarget.h"
+#include "DirectXTargetMachine.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/IR/Function.h"
+
+namespace llvm {
+class DirectXTTIImpl : public BasicTTIImplBase<DirectXTTIImpl> {
+ using BaseT = BasicTTIImplBase<DirectXTTIImpl>;
+ using TTI = TargetTransformInfo;
+
+ friend BaseT;
+
+ const DirectXSubtarget *ST;
+ const DirectXTargetLowering *TLI;
+
+ const DirectXSubtarget *getST() const { return ST; }
+ const DirectXTargetLowering *getTLI() const { return TLI; }
+
+public:
+ explicit DirectXTTIImpl(const DirectXTargetMachine *TM, const Function &F)
+ : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
+ TLI(ST->getTargetLowering()) {}
+};
+} // namespace llvm
+
+#endif // LLVM_DIRECTX_DIRECTXTARGETTRANSFORMINFO_H
diff --git a/llvm/lib/Target/DirectX/MCTargetDesc/DirectXContainerObjectWriter.cpp b/llvm/lib/Target/DirectX/MCTargetDesc/DirectXContainerObjectWriter.cpp
new file mode 100644
index 000000000000..78ccbc444bce
--- /dev/null
+++ b/llvm/lib/Target/DirectX/MCTargetDesc/DirectXContainerObjectWriter.cpp
@@ -0,0 +1,28 @@
+//===-- DirectXContainerObjectWriter.cpp - DX object writer ----*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains DXContainer object writers for the DirectX backend.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DirectXContainerObjectWriter.h"
+#include "llvm/MC/MCDXContainerWriter.h"
+
+using namespace llvm;
+
+namespace {
+class DirectXContainerObjectWriter : public MCDXContainerTargetWriter {
+public:
+ DirectXContainerObjectWriter() : MCDXContainerTargetWriter() {}
+};
+} // namespace
+
+std::unique_ptr<MCObjectTargetWriter>
+llvm::createDXContainerTargetObjectWriter() {
+ return std::make_unique<DirectXContainerObjectWriter>();
+}
diff --git a/llvm/lib/Target/DirectX/MCTargetDesc/DirectXContainerObjectWriter.h b/llvm/lib/Target/DirectX/MCTargetDesc/DirectXContainerObjectWriter.h
new file mode 100644
index 000000000000..a6fbdc865f7d
--- /dev/null
+++ b/llvm/lib/Target/DirectX/MCTargetDesc/DirectXContainerObjectWriter.h
@@ -0,0 +1,24 @@
+//===-- DirectXContainerObjectWriter.h - DX object writer ------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains DXContainer object writers for the DirectX backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DIRECTX_DIRECTXCONTAINEROBJECTWRITER_H
+#define LLVM_DIRECTX_DIRECTXCONTAINEROBJECTWRITER_H
+
+#include "llvm/MC/MCObjectWriter.h"
+
+namespace llvm {
+
+std::unique_ptr<MCObjectTargetWriter> createDXContainerTargetObjectWriter();
+
+}
+
+#endif // LLVM_DIRECTX_DIRECTXCONTAINEROBJECTWRITER_H
diff --git a/llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.cpp b/llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.cpp
new file mode 100644
index 000000000000..0c97ab62a37b
--- /dev/null
+++ b/llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.cpp
@@ -0,0 +1,152 @@
+//===- DirectXMCTargetDesc.cpp - DirectX Target Implementation --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains DirectX target initializer.
+///
+//===----------------------------------------------------------------------===//
+
+#include "DirectXMCTargetDesc.h"
+#include "DirectXContainerObjectWriter.h"
+#include "TargetInfo/DirectXTargetInfo.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCDXContainerWriter.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/Compiler.h"
+#include <memory>
+
+using namespace llvm;
+
+#define GET_INSTRINFO_MC_DESC
+#define GET_INSTRINFO_MC_HELPERS
+#include "DirectXGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "DirectXGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "DirectXGenRegisterInfo.inc"
+
+namespace {
+
+// DXILInstPrinter is a null stub because DXIL instructions aren't printed.
+class DXILInstPrinter : public MCInstPrinter {
+public:
+ DXILInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI)
+ : MCInstPrinter(MAI, MII, MRI) {}
+
+ void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
+ const MCSubtargetInfo &STI, raw_ostream &O) override {}
+
+ std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override {
+ return std::make_pair<const char *, uint64_t>("", 0ull);
+ }
+
+private:
+};
+
+class DXILMCCodeEmitter : public MCCodeEmitter {
+public:
+ DXILMCCodeEmitter() {}
+
+ void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const override {}
+};
+
+class DXILAsmBackend : public MCAsmBackend {
+
+public:
+ DXILAsmBackend(const MCSubtargetInfo &STI) : MCAsmBackend(support::little) {}
+ ~DXILAsmBackend() override = default;
+
+ void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target, MutableArrayRef<char> Data,
+ uint64_t Value, bool IsResolved,
+ const MCSubtargetInfo *STI) const override {}
+
+ std::unique_ptr<MCObjectTargetWriter>
+ createObjectTargetWriter() const override {
+ return createDXContainerTargetObjectWriter();
+ }
+
+ unsigned getNumFixupKinds() const override { return 0; }
+
+ bool writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const override {
+ return true;
+ }
+
+ bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const override {
+ return true;
+ }
+};
+
+class DirectXMCAsmInfo : public MCAsmInfo {
+public:
+ explicit DirectXMCAsmInfo(const Triple &TT, const MCTargetOptions &Options)
+ : MCAsmInfo() {}
+};
+
+} // namespace
+
+static MCInstPrinter *createDXILMCInstPrinter(const Triple &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI) {
+ if (SyntaxVariant == 0)
+ return new DXILInstPrinter(MAI, MII, MRI);
+ return nullptr;
+}
+
+MCCodeEmitter *createDXILMCCodeEmitter(const MCInstrInfo &MCII,
+ MCContext &Ctx) {
+ return new DXILMCCodeEmitter();
+}
+
+MCAsmBackend *createDXILMCAsmBackend(const Target &T,
+ const MCSubtargetInfo &STI,
+ const MCRegisterInfo &MRI,
+ const MCTargetOptions &Options) {
+ return new DXILAsmBackend(STI);
+}
+
+static MCSubtargetInfo *
+createDirectXMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
+ return createDirectXMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
+}
+
+static MCRegisterInfo *createDirectXMCRegisterInfo(const Triple &Triple) {
+ return new MCRegisterInfo();
+}
+
+static MCInstrInfo *createDirectXMCInstrInfo() { return new MCInstrInfo(); }
+
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXTargetMC() {
+ Target &T = getTheDirectXTarget();
+ RegisterMCAsmInfo<DirectXMCAsmInfo> X(T);
+ TargetRegistry::RegisterMCInstrInfo(T, createDirectXMCInstrInfo);
+ TargetRegistry::RegisterMCInstPrinter(T, createDXILMCInstPrinter);
+ TargetRegistry::RegisterMCRegInfo(T, createDirectXMCRegisterInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(T, createDirectXMCSubtargetInfo);
+ TargetRegistry::RegisterMCCodeEmitter(T, createDXILMCCodeEmitter);
+ TargetRegistry::RegisterMCAsmBackend(T, createDXILMCAsmBackend);
+}
diff --git a/llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.h b/llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.h
new file mode 100644
index 000000000000..0c3873a24417
--- /dev/null
+++ b/llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.h
@@ -0,0 +1,29 @@
+//===- DirectXMCTargetDesc.h - DirectX Target Interface ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains DirectX target interface.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DIRECTX_DIRECTXMCTARGETDESC_H
+#define LLVM_DIRECTX_DIRECTXMCTARGETDESC_H
+
+// Include DirectX stub register info
+#define GET_REGINFO_ENUM
+#include "DirectXGenRegisterInfo.inc"
+
+// Include DirectX stub instruction info
+#define GET_INSTRINFO_ENUM
+#define GET_INSTRINFO_MC_HELPER_DECLS
+#include "DirectXGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "DirectXGenSubtargetInfo.inc"
+
+#endif // LLVM_DIRECTX_DIRECTXMCTARGETDESC_H
diff --git a/llvm/lib/Target/DirectX/PointerTypeAnalysis.cpp b/llvm/lib/Target/DirectX/PointerTypeAnalysis.cpp
new file mode 100644
index 000000000000..1d536bbd0011
--- /dev/null
+++ b/llvm/lib/Target/DirectX/PointerTypeAnalysis.cpp
@@ -0,0 +1,119 @@
+//===- Target/DirectX/PointerTypeAnalisis.cpp - PointerType analysis ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Analysis pass to assign types to opaque pointers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PointerTypeAnalysis.h"
+#include "llvm/IR/Instructions.h"
+
+using namespace llvm;
+using namespace llvm::dxil;
+
+namespace {
+
+// Classifies the type of the value passed in by walking the value's users to
+// find a typed instruction to materialize a type from.
+TypedPointerType *classifyPointerType(const Value *V) {
+ assert(V->getType()->isOpaquePointerTy() &&
+ "classifyPointerType called with non-opaque pointer");
+ Type *PointeeTy = nullptr;
+ if (auto *Inst = dyn_cast<GetElementPtrInst>(V)) {
+ if (!Inst->getResultElementType()->isOpaquePointerTy())
+ PointeeTy = Inst->getResultElementType();
+ } else if (auto *Inst = dyn_cast<AllocaInst>(V)) {
+ PointeeTy = Inst->getAllocatedType();
+ }
+ for (const auto *User : V->users()) {
+ Type *NewPointeeTy = nullptr;
+ if (const auto *Inst = dyn_cast<LoadInst>(User)) {
+ NewPointeeTy = Inst->getType();
+ } else if (const auto *Inst = dyn_cast<StoreInst>(User)) {
+ NewPointeeTy = Inst->getValueOperand()->getType();
+ } else if (const auto *Inst = dyn_cast<GetElementPtrInst>(User)) {
+ NewPointeeTy = Inst->getSourceElementType();
+ }
+ if (NewPointeeTy) {
+ // HLSL doesn't support pointers, so it is unlikely to get more than one
+ // or two levels of indirection in the IR. Because of this, recursion is
+ // pretty safe.
+ if (NewPointeeTy->isOpaquePointerTy())
+ return TypedPointerType::get(classifyPointerType(User),
+ V->getType()->getPointerAddressSpace());
+ if (!PointeeTy)
+ PointeeTy = NewPointeeTy;
+ else if (PointeeTy != NewPointeeTy)
+ PointeeTy = Type::getInt8Ty(V->getContext());
+ }
+ }
+ // If we were unable to determine the pointee type, set to i8
+ if (!PointeeTy)
+ PointeeTy = Type::getInt8Ty(V->getContext());
+ return TypedPointerType::get(PointeeTy,
+ V->getType()->getPointerAddressSpace());
+}
+
+// This function constructs a function type accepting typed pointers. It only
+// handles function arguments and return types, and assigns the function type to
+// the function's value in the type map.
+void classifyFunctionType(const Function &F, PointerTypeMap &Map) {
+ SmallVector<Type *, 8> NewArgs;
+ bool HasOpaqueTy = false;
+ Type *RetTy = F.getReturnType();
+ if (RetTy->isOpaquePointerTy()) {
+ RetTy = nullptr;
+ for (const auto &B : F) {
+ for (const auto &I : B) {
+ if (const auto *RetInst = dyn_cast_or_null<ReturnInst>(&I)) {
+ Type *NewRetTy = classifyPointerType(RetInst->getReturnValue());
+ if (!RetTy)
+ RetTy = NewRetTy;
+ else if (RetTy != NewRetTy)
+ RetTy = TypedPointerType::get(
+ Type::getInt8Ty(I.getContext()),
+ F.getReturnType()->getPointerAddressSpace());
+ }
+ }
+ }
+ }
+ for (auto &A : F.args()) {
+ Type *ArgTy = A.getType();
+ if (ArgTy->isOpaquePointerTy()) {
+ TypedPointerType *NewTy = classifyPointerType(&A);
+ Map[&A] = NewTy;
+ ArgTy = NewTy;
+ HasOpaqueTy = true;
+ }
+ NewArgs.push_back(ArgTy);
+ }
+ if (!HasOpaqueTy)
+ return;
+ Map[&F] = FunctionType::get(RetTy, NewArgs, false);
+}
+} // anonymous namespace
+
+PointerTypeMap PointerTypeAnalysis::run(const Module &M) {
+ PointerTypeMap Map;
+ for (auto &G : M.globals()) {
+ if (G.getType()->isOpaquePointerTy())
+ Map[&G] = classifyPointerType(&G);
+ }
+ for (auto &F : M) {
+ classifyFunctionType(F, Map);
+
+ for (const auto &B : F) {
+ for (const auto &I : B) {
+ if (I.getType()->isOpaquePointerTy())
+ Map[&I] = classifyPointerType(&I);
+ }
+ }
+ }
+
+ return Map;
+}
diff --git a/llvm/lib/Target/DirectX/PointerTypeAnalysis.h b/llvm/lib/Target/DirectX/PointerTypeAnalysis.h
new file mode 100644
index 000000000000..c4164b6bf359
--- /dev/null
+++ b/llvm/lib/Target/DirectX/PointerTypeAnalysis.h
@@ -0,0 +1,43 @@
+//===- Target/DirectX/PointerTypeAnalysis.h - PointerType analysis --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Analysis pass to assign types to opaque pointers.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_DIRECTX_POINTERTYPEANALYSIS_H
+#define LLVM_TARGET_DIRECTX_POINTERTYPEANALYSIS_H
+
+#include "DXILPointerType.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+namespace dxil {
+
+// Store the underlying type and the number of pointer indirections
+using PointerTypeMap = DenseMap<const Value *, Type *>;
+
+/// An analysis to compute the \c PointerTypes for pointers in a \c Module.
+/// Since this analysis is only run during codegen and the new pass manager
+/// doesn't support codegen passes, this is wrtten as a function in a namespace.
+/// It is very simple to transform it into a proper analysis pass.
+/// This code relies on typed pointers existing as LLVM types, but could be
+/// migrated to a custom Type if PointerType loses typed support.
+namespace PointerTypeAnalysis {
+
+/// Compute the \c PointerTypeMap for the module \c M.
+PointerTypeMap run(const Module &M);
+} // namespace PointerTypeAnalysis
+
+} // namespace dxil
+
+} // namespace llvm
+
+#endif // LLVM_TARGET_DIRECTX_POINTERTYPEANALYSIS_H
diff --git a/llvm/lib/Target/DirectX/TargetInfo/DirectXTargetInfo.cpp b/llvm/lib/Target/DirectX/TargetInfo/DirectXTargetInfo.cpp
new file mode 100644
index 000000000000..54c577debc34
--- /dev/null
+++ b/llvm/lib/Target/DirectX/TargetInfo/DirectXTargetInfo.cpp
@@ -0,0 +1,30 @@
+//===- DirectXTargetInfo.cpp - DirectX Target Implementation ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains DirectX target initializer.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Triple.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+Target &getTheDirectXTarget() {
+ static Target TheDirectXTarget;
+ return TheDirectXTarget;
+}
+} // namespace llvm
+
+using namespace llvm;
+
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXTargetInfo() {
+ RegisterTarget<Triple::dxil, /*HasJIT=*/false> X(
+ getTheDirectXTarget(), "dxil", "DirectX Intermediate Language", "DXIL");
+}
diff --git a/llvm/lib/Target/DirectX/TargetInfo/DirectXTargetInfo.h b/llvm/lib/Target/DirectX/TargetInfo/DirectXTargetInfo.h
new file mode 100644
index 000000000000..a860c430f81a
--- /dev/null
+++ b/llvm/lib/Target/DirectX/TargetInfo/DirectXTargetInfo.h
@@ -0,0 +1,18 @@
+//===-- DirectXTargetInfo.h - DircetX Target Implementation -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DIRECTX_TARGETINFO_DIRECTXTARGETINFO_H
+#define LLVM_DIRECTX_TARGETINFO_DIRECTXTARGETINFO_H
+
+namespace llvm {
+class Target;
+
+Target &getTheDirectXTarget();
+} // namespace llvm
+
+#endif // LLVM_DIRECTX_TARGETINFO_DIRECTXTARGETINFO_H
diff --git a/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
index 15eba89eeb55..4553f2fd9228 100644
--- a/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
+++ b/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
@@ -681,7 +681,7 @@ bool HexagonAsmParser::ParseDirectiveSubsection(SMLoc L) {
Subsection = HexagonMCExpr::create(
MCConstantExpr::create(8192 + Res, getContext()), getContext());
- getStreamer().SubSection(Subsection);
+ getStreamer().subSection(Subsection);
return false;
}
@@ -1450,7 +1450,7 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
MCOperand &MO_0 = Inst.getOperand(0);
// push section onto section stack
- MES->PushSection();
+ MES->pushSection();
std::string myCharStr;
MCSectionELF *mySection;
@@ -1485,7 +1485,7 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
} else
llvm_unreachable("unexpected type of machine operand!");
- MES->SwitchSection(mySection);
+ MES->switchSection(mySection);
unsigned byteSize = is32bit ? 4 : 8;
getStreamer().emitCodeAlignment(byteSize, &getSTI(), byteSize);
@@ -1526,7 +1526,7 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
} else
llvm_unreachable("unexpected type of machine operand!");
- MES->PopSection();
+ MES->popSection();
if (Sym) {
MCInst TmpInst;
diff --git a/llvm/lib/Target/Hexagon/BitTracker.cpp b/llvm/lib/Target/Hexagon/BitTracker.cpp
index 17adf32750db..4d5789a3c5fe 100644
--- a/llvm/lib/Target/Hexagon/BitTracker.cpp
+++ b/llvm/lib/Target/Hexagon/BitTracker.cpp
@@ -1056,9 +1056,8 @@ void BT::runEdgeQueue(BitVector &BlockScanned) {
CFGEdge Edge = FlowQ.front();
FlowQ.pop();
- if (EdgeExec.count(Edge))
+ if (!EdgeExec.insert(Edge).second)
return;
- EdgeExec.insert(Edge);
ReachedBB.insert(Edge.second);
const MachineBasicBlock &B = *MF.getBlockNumbered(Edge.second);
diff --git a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index 3c742c98077b..58d5df4c1f71 100644
--- a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -14,9 +14,9 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDecoderOps.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
@@ -78,11 +78,12 @@ static uint64_t fullValue(HexagonDisassembler const &Disassembler, MCInst &MI,
uint64_t Operand = Upper26 | Lower6;
return Operand;
}
-static HexagonDisassembler const &disassembler(void const *Decoder) {
+static HexagonDisassembler const &disassembler(const MCDisassembler *Decoder) {
return *static_cast<HexagonDisassembler const *>(Decoder);
}
template <size_t T>
-static void signedDecoder(MCInst &MI, unsigned tmp, const void *Decoder) {
+static void signedDecoder(MCInst &MI, unsigned tmp,
+ const MCDisassembler *Decoder) {
HexagonDisassembler const &Disassembler = disassembler(Decoder);
int64_t FullValue = fullValue(Disassembler, MI, SignExtend64<T>(tmp));
int64_t Extended = SignExtend64<32>(FullValue);
@@ -95,65 +96,66 @@ static void signedDecoder(MCInst &MI, unsigned tmp, const void *Decoder) {
static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeGeneralSubRegsRegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeIntRegsLow8RegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
+static DecodeStatus
+DecodeGeneralSubRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus
+DecodeIntRegsLow8RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeHvxVRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
+static DecodeStatus
+DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus
DecodeGeneralDoubleLow8RegsRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeHvxWRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeHvxVQRRegisterClass(MCInst &Inst,
- unsigned RegNo,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeHvxVQRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeHvxQRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeGuestRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeSysRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeGuestRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
+static DecodeStatus
+DecodeGuestRegs64RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeSysRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
-
+ const MCDisassembler *Decoder);
static DecodeStatus unsignedImmDecoder(MCInst &MI, unsigned tmp,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus s32_0ImmDecoder(MCInst &MI, unsigned tmp,
- uint64_t /*Address*/, const void *Decoder);
+ uint64_t /*Address*/,
+ const MCDisassembler *Decoder);
static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
#include "HexagonDepDecoders.inc"
#include "HexagonGenDisassemblerTables.inc"
@@ -542,15 +544,15 @@ static DecodeStatus DecodeRegisterClass(MCInst &Inst, unsigned RegNo,
return MCDisassembler::Fail;
}
-static DecodeStatus DecodeIntRegsLow8RegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus
+DecodeIntRegsLow8RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address,
+ const MCDisassembler *Decoder) {
return DecodeIntRegsRegisterClass(Inst, RegNo, Address, Decoder);
}
static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
static const MCPhysReg IntRegDecoderTable[] = {
Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
Hexagon::R5, Hexagon::R6, Hexagon::R7, Hexagon::R8, Hexagon::R9,
@@ -563,10 +565,10 @@ static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo,
return DecodeRegisterClass(Inst, RegNo, IntRegDecoderTable);
}
-static DecodeStatus DecodeGeneralSubRegsRegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus
+DecodeGeneralSubRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
static const MCPhysReg GeneralSubRegDecoderTable[] = {
Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3,
Hexagon::R4, Hexagon::R5, Hexagon::R6, Hexagon::R7,
@@ -579,7 +581,7 @@ static DecodeStatus DecodeGeneralSubRegsRegisterClass(MCInst &Inst,
static DecodeStatus DecodeHvxVRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t /*Address*/,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
static const MCPhysReg HvxVRDecoderTable[] = {
Hexagon::V0, Hexagon::V1, Hexagon::V2, Hexagon::V3, Hexagon::V4,
Hexagon::V5, Hexagon::V6, Hexagon::V7, Hexagon::V8, Hexagon::V9,
@@ -592,9 +594,10 @@ static DecodeStatus DecodeHvxVRRegisterClass(MCInst &Inst, unsigned RegNo,
return DecodeRegisterClass(Inst, RegNo, HvxVRDecoderTable);
}
-static DecodeStatus DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t /*Address*/,
- const void *Decoder) {
+static DecodeStatus
+DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/,
+ const MCDisassembler *Decoder) {
static const MCPhysReg DoubleRegDecoderTable[] = {
Hexagon::D0, Hexagon::D1, Hexagon::D2, Hexagon::D3,
Hexagon::D4, Hexagon::D5, Hexagon::D6, Hexagon::D7,
@@ -604,8 +607,10 @@ static DecodeStatus DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo,
return DecodeRegisterClass(Inst, RegNo >> 1, DoubleRegDecoderTable);
}
-static DecodeStatus DecodeGeneralDoubleLow8RegsRegisterClass(
- MCInst &Inst, unsigned RegNo, uint64_t /*Address*/, const void *Decoder) {
+static DecodeStatus
+DecodeGeneralDoubleLow8RegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/,
+ const MCDisassembler *Decoder) {
static const MCPhysReg GeneralDoubleLow8RegDecoderTable[] = {
Hexagon::D0, Hexagon::D1, Hexagon::D2, Hexagon::D3,
Hexagon::D8, Hexagon::D9, Hexagon::D10, Hexagon::D11};
@@ -615,7 +620,7 @@ static DecodeStatus DecodeGeneralDoubleLow8RegsRegisterClass(
static DecodeStatus DecodeHvxWRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t /*Address*/,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
static const MCPhysReg HvxWRDecoderTable[] = {
Hexagon::W0, Hexagon::WR0, Hexagon::W1, Hexagon::WR1, Hexagon::W2,
Hexagon::WR2, Hexagon::W3, Hexagon::WR3, Hexagon::W4, Hexagon::WR4,
@@ -629,11 +634,11 @@ static DecodeStatus DecodeHvxWRRegisterClass(MCInst &Inst, unsigned RegNo,
return DecodeRegisterClass(Inst, RegNo, HvxWRDecoderTable);
}
-LLVM_ATTRIBUTE_UNUSED // Suppress warning temporarily.
-static DecodeStatus DecodeHvxVQRRegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t /*Address*/,
- const void *Decoder) {
+LLVM_ATTRIBUTE_UNUSED // Suppress warning temporarily.
+ static DecodeStatus
+ DecodeHvxVQRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/,
+ const MCDisassembler *Decoder) {
static const MCPhysReg HvxVQRDecoderTable[] = {
Hexagon::VQ0, Hexagon::VQ1, Hexagon::VQ2, Hexagon::VQ3,
Hexagon::VQ4, Hexagon::VQ5, Hexagon::VQ6, Hexagon::VQ7};
@@ -643,7 +648,7 @@ static DecodeStatus DecodeHvxVQRRegisterClass(MCInst &Inst,
static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t /*Address*/,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
static const MCPhysReg PredRegDecoderTable[] = {Hexagon::P0, Hexagon::P1,
Hexagon::P2, Hexagon::P3};
@@ -652,7 +657,7 @@ static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeHvxQRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t /*Address*/,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
static const MCPhysReg HvxQRDecoderTable[] = {Hexagon::Q0, Hexagon::Q1,
Hexagon::Q2, Hexagon::Q3};
@@ -661,7 +666,7 @@ static DecodeStatus DecodeHvxQRRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t /*Address*/,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
using namespace Hexagon;
static const MCPhysReg CtrlRegDecoderTable[] = {
@@ -687,9 +692,9 @@ static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t /*Address*/,
- const void *Decoder) {
+static DecodeStatus
+DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t /*Address*/,
+ const MCDisassembler *Decoder) {
using namespace Hexagon;
static const MCPhysReg CtrlReg64DecoderTable[] = {
@@ -717,7 +722,7 @@ static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t /*Address*/,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
unsigned Register = 0;
switch (RegNo) {
case 0:
@@ -735,7 +740,7 @@ static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus unsignedImmDecoder(MCInst &MI, unsigned tmp,
uint64_t /*Address*/,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
HexagonDisassembler const &Disassembler = disassembler(Decoder);
int64_t FullValue = fullValue(Disassembler, MI, tmp);
assert(FullValue >= 0 && "Negative in unsigned decoder");
@@ -744,7 +749,8 @@ static DecodeStatus unsignedImmDecoder(MCInst &MI, unsigned tmp,
}
static DecodeStatus s32_0ImmDecoder(MCInst &MI, unsigned tmp,
- uint64_t /*Address*/, const void *Decoder) {
+ uint64_t /*Address*/,
+ const MCDisassembler *Decoder) {
HexagonDisassembler const &Disassembler = disassembler(Decoder);
unsigned Bits = HexagonMCInstrInfo::getExtentBits(*Disassembler.MCII, MI);
tmp = SignExtend64(tmp, Bits);
@@ -754,7 +760,7 @@ static DecodeStatus s32_0ImmDecoder(MCInst &MI, unsigned tmp,
// custom decoder for various jump/call immediates
static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
HexagonDisassembler const &Disassembler = disassembler(Decoder);
unsigned Bits = HexagonMCInstrInfo::getExtentBits(*Disassembler.MCII, MI);
// r13_2 is not extendable, so if there are no extent bits, it's r13_2
@@ -762,7 +768,8 @@ static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
Bits = 15;
uint64_t FullValue = fullValue(Disassembler, MI, SignExtend64(tmp, Bits));
uint32_t Extended = FullValue + Address;
- if (!Disassembler.tryAddingSymbolicOperand(MI, Extended, Address, true, 0, 4))
+ if (!Disassembler.tryAddingSymbolicOperand(MI, Extended, Address, true, 0, 0,
+ 4))
HexagonMCInstrInfo::addConstant(MI, Extended, Disassembler.getContext());
return MCDisassembler::Success;
}
@@ -799,7 +806,7 @@ static const uint16_t SysRegDecoderTable[] = {
static DecodeStatus DecodeSysRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t /*Address*/,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo >= sizeof(SysRegDecoderTable) / sizeof(SysRegDecoderTable[0]))
return MCDisassembler::Fail;
@@ -824,9 +831,9 @@ static const uint16_t SysReg64DecoderTable[] = {
Hexagon::S73_72, Hexagon::S75_74, Hexagon::S77_76, Hexagon::S79_78,
};
-static DecodeStatus DecodeSysRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t /*Address*/,
- const void *Decoder) {
+static DecodeStatus
+DecodeSysRegs64RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t /*Address*/,
+ const MCDisassembler *Decoder) {
RegNo = RegNo >> 1;
if (RegNo >= sizeof(SysReg64DecoderTable) / sizeof(SysReg64DecoderTable[0]))
return MCDisassembler::Fail;
@@ -839,9 +846,9 @@ static DecodeStatus DecodeSysRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeGuestRegsRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t /*Address*/,
- const void *Decoder) {
+static DecodeStatus
+DecodeGuestRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t /*Address*/,
+ const MCDisassembler *Decoder) {
using namespace Hexagon;
static const MCPhysReg GuestRegDecoderTable[] = {
@@ -865,9 +872,10 @@ static DecodeStatus DecodeGuestRegsRegisterClass(MCInst &Inst, unsigned RegNo,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeGuestRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t /*Address*/,
- const void *Decoder) {
+static DecodeStatus
+DecodeGuestRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/,
+ const MCDisassembler *Decoder) {
using namespace Hexagon;
static const MCPhysReg GuestReg64DecoderTable[] = {
diff --git a/llvm/lib/Target/Hexagon/HexagonArch.h b/llvm/lib/Target/Hexagon/HexagonArch.h
deleted file mode 100644
index 4a42ec98feb1..000000000000
--- a/llvm/lib/Target/Hexagon/HexagonArch.h
+++ /dev/null
@@ -1,31 +0,0 @@
-//===- HexagonArch.h ------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONARCH_H
-#define LLVM_LIB_TARGET_HEXAGON_HEXAGONARCH_H
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/StringRef.h"
-#include "HexagonDepArch.h"
-#include <algorithm>
-
-namespace llvm {
-namespace Hexagon {
-
-template <class ArchCont, typename Val>
-llvm::Optional<ArchEnum> GetCpu(ArchCont const &ArchList, Val CPUString) {
- llvm::Optional<ArchEnum> Res;
- auto Entry = ArchList.find(CPUString);
- if (Entry != ArchList.end())
- Res = Entry->second;
- return Res;
-}
-} // namespace Hexagon
-} // namespace llvm
-#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONARCH_H
diff --git a/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index 411078052e0f..48d339234e9e 100644
--- a/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -202,7 +202,7 @@ static MCSymbol *smallData(AsmPrinter &AP, const MachineInstr &MI,
MCSectionELF *Section = OutStreamer.getContext().getELFSection(
sectionName, ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
- OutStreamer.SwitchSection(Section);
+ OutStreamer.switchSection(Section);
Sym = AP.OutContext.getOrCreateSymbol(Twine(symbolName));
if (Sym->isUndefined()) {
@@ -231,7 +231,7 @@ static MCSymbol *smallData(AsmPrinter &AP, const MachineInstr &MI,
MCSectionELF *Section = OutStreamer.getContext().getELFSection(
".lita", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
- OutStreamer.SwitchSection(Section);
+ OutStreamer.switchSection(Section);
Sym = AP.OutContext.getOrCreateSymbol(Twine(LitaName));
if (Sym->isUndefined()) {
OutStreamer.emitLabel(Sym);
@@ -331,7 +331,7 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst,
MCSymbol *Sym =
smallData(*this, MI, *OutStreamer, Imm, 8, getSubtargetInfo());
- OutStreamer->SwitchSection(Current.first, Current.second);
+ OutStreamer->switchSection(Current.first, Current.second);
MCInst TmpInst;
MCOperand &Reg = MappedInst.getOperand(0);
TmpInst.setOpcode(Hexagon::L2_loadrdgp);
@@ -348,7 +348,7 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst,
MCSectionSubPair Current = OutStreamer->getCurrentSection();
MCSymbol *Sym =
smallData(*this, MI, *OutStreamer, Imm, 4, getSubtargetInfo());
- OutStreamer->SwitchSection(Current.first, Current.second);
+ OutStreamer->switchSection(Current.first, Current.second);
MCInst TmpInst;
MCOperand &Reg = MappedInst.getOperand(0);
TmpInst.setOpcode(Hexagon::L2_loadrigp);
diff --git a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
index b2a842233bb8..673b397ef3c5 100644
--- a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
@@ -39,6 +39,7 @@
#include <algorithm>
#include <cassert>
#include <cstdint>
+#include <deque>
#include <iterator>
#include <limits>
#include <utility>
@@ -62,6 +63,9 @@ static cl::opt<unsigned> MaxBitSplit("hexbit-max-bitsplit", cl::Hidden,
cl::init(std::numeric_limits<unsigned>::max()));
static unsigned CountBitSplit = 0;
+static cl::opt<unsigned> RegisterSetLimit("hexbit-registerset-limit",
+ cl::Hidden, cl::init(1000));
+
namespace llvm {
void initializeHexagonBitSimplifyPass(PassRegistry& Registry);
@@ -72,23 +76,29 @@ namespace llvm {
namespace {
// Set of virtual registers, based on BitVector.
- struct RegisterSet : private BitVector {
+ struct RegisterSet {
RegisterSet() = default;
- explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {}
+ explicit RegisterSet(unsigned s, bool t = false) : Bits(s, t) {}
RegisterSet(const RegisterSet &RS) = default;
- using BitVector::clear;
- using BitVector::count;
+ void clear() {
+ Bits.clear();
+ LRU.clear();
+ }
+
+ unsigned count() const {
+ return Bits.count();
+ }
unsigned find_first() const {
- int First = BitVector::find_first();
+ int First = Bits.find_first();
if (First < 0)
return 0;
return x2v(First);
}
unsigned find_next(unsigned Prev) const {
- int Next = BitVector::find_next(v2x(Prev));
+ int Next = Bits.find_next(v2x(Prev));
if (Next < 0)
return 0;
return x2v(Next);
@@ -97,54 +107,72 @@ namespace {
RegisterSet &insert(unsigned R) {
unsigned Idx = v2x(R);
ensure(Idx);
- return static_cast<RegisterSet&>(BitVector::set(Idx));
+ bool Exists = Bits.test(Idx);
+ Bits.set(Idx);
+ if (!Exists) {
+ LRU.push_back(Idx);
+ if (LRU.size() > RegisterSetLimit) {
+ unsigned T = LRU.front();
+ Bits.reset(T);
+ LRU.pop_front();
+ }
+ }
+ return *this;
}
RegisterSet &remove(unsigned R) {
unsigned Idx = v2x(R);
- if (Idx >= size())
- return *this;
- return static_cast<RegisterSet&>(BitVector::reset(Idx));
+ if (Idx < Bits.size()) {
+ bool Exists = Bits.test(Idx);
+ Bits.reset(Idx);
+ if (Exists) {
+ auto F = llvm::find(LRU, Idx);
+ assert(F != LRU.end());
+ LRU.erase(F);
+ }
+ }
+ return *this;
}
RegisterSet &insert(const RegisterSet &Rs) {
- return static_cast<RegisterSet&>(BitVector::operator|=(Rs));
+ for (unsigned R = Rs.find_first(); R; R = Rs.find_next(R))
+ insert(R);
+ return *this;
}
RegisterSet &remove(const RegisterSet &Rs) {
- return static_cast<RegisterSet&>(BitVector::reset(Rs));
+ for (unsigned R = Rs.find_first(); R; R = Rs.find_next(R))
+ remove(R);
+ return *this;
}
- reference operator[](unsigned R) {
- unsigned Idx = v2x(R);
- ensure(Idx);
- return BitVector::operator[](Idx);
- }
bool operator[](unsigned R) const {
unsigned Idx = v2x(R);
- assert(Idx < size());
- return BitVector::operator[](Idx);
+ return Idx < Bits.size() ? Bits[Idx] : false;
}
bool has(unsigned R) const {
unsigned Idx = v2x(R);
- if (Idx >= size())
+ if (Idx >= Bits.size())
return false;
- return BitVector::test(Idx);
+ return Bits.test(Idx);
}
bool empty() const {
- return !BitVector::any();
+ return !Bits.any();
}
bool includes(const RegisterSet &Rs) const {
- // A.BitVector::test(B) <=> A-B != {}
- return !Rs.BitVector::test(*this);
+ // A.test(B) <=> A-B != {}
+ return !Rs.Bits.test(Bits);
}
bool intersects(const RegisterSet &Rs) const {
- return BitVector::anyCommon(Rs);
+ return Bits.anyCommon(Rs.Bits);
}
private:
+ BitVector Bits;
+ std::deque<unsigned> LRU;
+
void ensure(unsigned Idx) {
- if (size() <= Idx)
- resize(std::max(Idx+1, 32U));
+ if (Bits.size() <= Idx)
+ Bits.resize(std::max(Idx+1, 32U));
}
static inline unsigned v2x(unsigned v) {
@@ -1997,7 +2025,7 @@ bool BitSimplification::genStoreImmediate(MachineInstr *MI) {
if (!isInt<8>(V))
return false;
- MI->RemoveOperand(2);
+ MI->removeOperand(2);
switch (Opc) {
case Hexagon::S2_storerb_io:
MI->setDesc(HII.get(Hexagon::S4_storeirb_io));
diff --git a/llvm/lib/Target/Hexagon/HexagonBranchRelaxation.cpp b/llvm/lib/Target/Hexagon/HexagonBranchRelaxation.cpp
index faa48211cd82..ca7fddb0ebe5 100644
--- a/llvm/lib/Target/Hexagon/HexagonBranchRelaxation.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonBranchRelaxation.cpp
@@ -33,8 +33,9 @@ using namespace llvm;
// Since we have no exact knowledge of code layout, allow some safety buffer
// for jump target. This is measured in bytes.
-static cl::opt<uint32_t> BranchRelaxSafetyBuffer("branch-relax-safety-buffer",
- cl::init(200), cl::Hidden, cl::ZeroOrMore, cl::desc("safety buffer size"));
+static cl::opt<uint32_t>
+ BranchRelaxSafetyBuffer("branch-relax-safety-buffer", cl::init(200),
+ cl::Hidden, cl::desc("safety buffer size"));
namespace llvm {
diff --git a/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp b/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
index fc5e05d8c9a0..2fe2e032714a 100644
--- a/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
@@ -52,13 +52,12 @@
using namespace llvm;
static cl::opt<bool> OptSpeculate("commgep-speculate", cl::init(true),
- cl::Hidden, cl::ZeroOrMore);
+ cl::Hidden);
-static cl::opt<bool> OptEnableInv("commgep-inv", cl::init(true), cl::Hidden,
- cl::ZeroOrMore);
+static cl::opt<bool> OptEnableInv("commgep-inv", cl::init(true), cl::Hidden);
static cl::opt<bool> OptEnableConst("commgep-const", cl::init(true),
- cl::Hidden, cl::ZeroOrMore);
+ cl::Hidden);
namespace llvm {
diff --git a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
index d8af35cbf3a8..56fb50cdb09e 100644
--- a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
@@ -9,6 +9,7 @@
#include "HexagonInstrInfo.h"
#include "HexagonRegisterInfo.h"
#include "HexagonSubtarget.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -28,12 +29,13 @@
using namespace llvm;
-static cl::opt<unsigned> CountThreshold("hexagon-cext-threshold",
- cl::init(3), cl::Hidden, cl::ZeroOrMore,
- cl::desc("Minimum number of extenders to trigger replacement"));
+static cl::opt<unsigned> CountThreshold(
+ "hexagon-cext-threshold", cl::init(3), cl::Hidden,
+ cl::desc("Minimum number of extenders to trigger replacement"));
-static cl::opt<unsigned> ReplaceLimit("hexagon-cext-limit", cl::init(0),
- cl::Hidden, cl::ZeroOrMore, cl::desc("Maximum number of replacements"));
+static cl::opt<unsigned>
+ ReplaceLimit("hexagon-cext-limit", cl::init(0), cl::Hidden,
+ cl::desc("Maximum number of replacements"));
namespace llvm {
void initializeHexagonConstExtendersPass(PassRegistry&);
diff --git a/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp b/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp
index 105bf2811a20..8029dcff8052 100644
--- a/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp
@@ -868,8 +868,8 @@ void MachineConstPropagator::removeCFGEdge(MachineBasicBlock *From,
int N = PN.getNumOperands() - 2;
while (N > 0) {
if (PN.getOperand(N + 1).getMBB() == From) {
- PN.RemoveOperand(N + 1);
- PN.RemoveOperand(N);
+ PN.removeOperand(N + 1);
+ PN.removeOperand(N);
}
N -= 2;
}
@@ -1217,8 +1217,8 @@ bool MachineConstEvaluator::evaluateCMPii(uint32_t Cmp, const APInt &A1,
unsigned W2 = A2.getBitWidth();
unsigned MaxW = (W1 >= W2) ? W1 : W2;
if (Cmp & Comparison::U) {
- const APInt Zx1 = A1.zextOrSelf(MaxW);
- const APInt Zx2 = A2.zextOrSelf(MaxW);
+ APInt Zx1 = A1.zext(MaxW);
+ APInt Zx2 = A2.zext(MaxW);
if (Cmp & Comparison::L)
Result = Zx1.ult(Zx2);
else if (Cmp & Comparison::G)
@@ -1227,8 +1227,8 @@ bool MachineConstEvaluator::evaluateCMPii(uint32_t Cmp, const APInt &A1,
}
// Signed comparison.
- const APInt Sx1 = A1.sextOrSelf(MaxW);
- const APInt Sx2 = A2.sextOrSelf(MaxW);
+ APInt Sx1 = A1.sext(MaxW);
+ APInt Sx2 = A2.sext(MaxW);
if (Cmp & Comparison::L)
Result = Sx1.slt(Sx2);
else if (Cmp & Comparison::G)
@@ -1813,7 +1813,7 @@ bool MachineConstEvaluator::evaluateSplati(const APInt &A1, unsigned Bits,
unsigned Count, APInt &Result) {
assert(Count > 0);
unsigned BW = A1.getBitWidth(), SW = Count*Bits;
- APInt LoBits = (Bits < BW) ? A1.trunc(Bits) : A1.zextOrSelf(Bits);
+ APInt LoBits = (Bits < BW) ? A1.trunc(Bits) : A1.zext(Bits);
if (Count > 1)
LoBits = LoBits.zext(SW);
@@ -2510,7 +2510,7 @@ APInt HexagonConstEvaluator::getCmpImm(unsigned Opc, unsigned OpX,
void HexagonConstEvaluator::replaceWithNop(MachineInstr &MI) {
MI.setDesc(HII.get(Hexagon::A2_nop));
while (MI.getNumOperands() > 0)
- MI.RemoveOperand(0);
+ MI.removeOperand(0);
}
bool HexagonConstEvaluator::evaluateHexRSEQ32(RegisterSubReg RL, RegisterSubReg RH,
@@ -2538,9 +2538,9 @@ bool HexagonConstEvaluator::evaluateHexRSEQ32(RegisterSubReg RL, RegisterSubReg
}
for (unsigned i = 0; i < HiVs.size(); ++i) {
- APInt HV = HiVs[i].zextOrSelf(64) << 32;
+ APInt HV = HiVs[i].zext(64) << 32;
for (unsigned j = 0; j < LoVs.size(); ++j) {
- APInt LV = LoVs[j].zextOrSelf(64);
+ APInt LV = LoVs[j].zext(64);
const Constant *C = intToConst(HV | LV);
Result.add(C);
if (Result.isBottom())
@@ -3165,7 +3165,7 @@ bool HexagonConstEvaluator::rewriteHexBranch(MachineInstr &BrI,
.addMBB(TargetB);
BrI.setDesc(JD);
while (BrI.getNumOperands() > 0)
- BrI.RemoveOperand(0);
+ BrI.removeOperand(0);
// This ensures that all implicit operands (e.g. implicit-def %r31, etc)
// are present in the rewritten branch.
for (auto &Op : NI->operands())
diff --git a/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
index 2ee7f1325df9..dc5b674424c8 100644
--- a/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
@@ -33,16 +33,14 @@ using namespace llvm;
#define DEBUG_TYPE "hexagon-copy-combine"
-static
-cl::opt<bool> IsCombinesDisabled("disable-merge-into-combines",
- cl::Hidden, cl::ZeroOrMore,
- cl::init(false),
- cl::desc("Disable merging into combines"));
-static
-cl::opt<bool> IsConst64Disabled("disable-const64",
- cl::Hidden, cl::ZeroOrMore,
- cl::init(false),
- cl::desc("Disable generation of const64"));
+static cl::opt<bool>
+ IsCombinesDisabled("disable-merge-into-combines", cl::Hidden,
+
+ cl::desc("Disable merging into combines"));
+static cl::opt<bool>
+ IsConst64Disabled("disable-const64", cl::Hidden,
+
+ cl::desc("Disable generation of const64"));
static
cl::opt<unsigned>
MaxNumOfInstsBetweenNewValueStoreAndTFR("max-num-inst-between-tfr-and-nv-store",
diff --git a/llvm/lib/Target/Hexagon/HexagonDepArch.h b/llvm/lib/Target/Hexagon/HexagonDepArch.h
index 56174dc7e136..41ce5c465d41 100644
--- a/llvm/lib/Target/Hexagon/HexagonDepArch.h
+++ b/llvm/lib/Target/Hexagon/HexagonDepArch.h
@@ -12,82 +12,28 @@
#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPARCH_H
#define LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPARCH_H
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/BinaryFormat/ELF.h"
-
-#include <map>
-#include <string>
+#include "llvm/ADT/StringSwitch.h"
namespace llvm {
namespace Hexagon {
enum class ArchEnum { NoArch, Generic, V5, V55, V60, V62, V65, V66, V67, V68, V69 };
-static constexpr unsigned ArchValsNumArray[] = {5, 55, 60, 62, 65, 66, 67, 68, 69};
-static constexpr ArrayRef<unsigned> ArchValsNum(ArchValsNumArray);
-
-static constexpr StringLiteral ArchValsTextArray[] = { "v5", "v55", "v60", "v62", "v65", "v66", "v67", "v68", "v69" };
-static constexpr ArrayRef<StringLiteral> ArchValsText(ArchValsTextArray);
-
-static constexpr StringLiteral CpuValsTextArray[] = { "hexagonv5", "hexagonv55", "hexagonv60", "hexagonv62", "hexagonv65", "hexagonv66", "hexagonv67", "hexagonv67t", "hexagonv68", "hexagonv69" };
-static constexpr ArrayRef<StringLiteral> CpuValsText(CpuValsTextArray);
-
-static constexpr StringLiteral CpuNickTextArray[] = { "v5", "v55", "v60", "v62", "v65", "v66", "v67", "v67t", "v68", "v69" };
-static constexpr ArrayRef<StringLiteral> CpuNickText(CpuNickTextArray);
-
-static const std::map<std::string, ArchEnum> CpuTable{
- {"generic", Hexagon::ArchEnum::V5},
- {"hexagonv5", Hexagon::ArchEnum::V5},
- {"hexagonv55", Hexagon::ArchEnum::V55},
- {"hexagonv60", Hexagon::ArchEnum::V60},
- {"hexagonv62", Hexagon::ArchEnum::V62},
- {"hexagonv65", Hexagon::ArchEnum::V65},
- {"hexagonv66", Hexagon::ArchEnum::V66},
- {"hexagonv67", Hexagon::ArchEnum::V67},
- {"hexagonv67t", Hexagon::ArchEnum::V67},
- {"hexagonv68", Hexagon::ArchEnum::V68},
- {"hexagonv69", Hexagon::ArchEnum::V69},
-};
-
-static const std::map<std::string, unsigned> ElfFlagsByCpuStr = {
- {"generic", llvm::ELF::EF_HEXAGON_MACH_V5},
- {"hexagonv5", llvm::ELF::EF_HEXAGON_MACH_V5},
- {"hexagonv55", llvm::ELF::EF_HEXAGON_MACH_V55},
- {"hexagonv60", llvm::ELF::EF_HEXAGON_MACH_V60},
- {"hexagonv62", llvm::ELF::EF_HEXAGON_MACH_V62},
- {"hexagonv65", llvm::ELF::EF_HEXAGON_MACH_V65},
- {"hexagonv66", llvm::ELF::EF_HEXAGON_MACH_V66},
- {"hexagonv67", llvm::ELF::EF_HEXAGON_MACH_V67},
- {"hexagonv67t", llvm::ELF::EF_HEXAGON_MACH_V67T},
- {"hexagonv68", llvm::ELF::EF_HEXAGON_MACH_V68},
- {"hexagonv69", llvm::ELF::EF_HEXAGON_MACH_V69},
-};
-static const std::map<unsigned, std::string> ElfArchByMachFlags = {
- {llvm::ELF::EF_HEXAGON_MACH_V5, "V5"},
- {llvm::ELF::EF_HEXAGON_MACH_V55, "V55"},
- {llvm::ELF::EF_HEXAGON_MACH_V60, "V60"},
- {llvm::ELF::EF_HEXAGON_MACH_V62, "V62"},
- {llvm::ELF::EF_HEXAGON_MACH_V65, "V65"},
- {llvm::ELF::EF_HEXAGON_MACH_V66, "V66"},
- {llvm::ELF::EF_HEXAGON_MACH_V67, "V67"},
- {llvm::ELF::EF_HEXAGON_MACH_V67T, "V67T"},
- {llvm::ELF::EF_HEXAGON_MACH_V68, "V68"},
- {llvm::ELF::EF_HEXAGON_MACH_V69, "V69"},
-};
-static const std::map<unsigned, std::string> ElfCpuByMachFlags = {
- {llvm::ELF::EF_HEXAGON_MACH_V5, "hexagonv5"},
- {llvm::ELF::EF_HEXAGON_MACH_V55, "hexagonv55"},
- {llvm::ELF::EF_HEXAGON_MACH_V60, "hexagonv60"},
- {llvm::ELF::EF_HEXAGON_MACH_V62, "hexagonv62"},
- {llvm::ELF::EF_HEXAGON_MACH_V65, "hexagonv65"},
- {llvm::ELF::EF_HEXAGON_MACH_V66, "hexagonv66"},
- {llvm::ELF::EF_HEXAGON_MACH_V67, "hexagonv67"},
- {llvm::ELF::EF_HEXAGON_MACH_V67T, "hexagonv67t"},
- {llvm::ELF::EF_HEXAGON_MACH_V68, "hexagonv68"},
- {llvm::ELF::EF_HEXAGON_MACH_V69, "hexagonv69"},
-};
-
+inline Optional<Hexagon::ArchEnum> getCpu(StringRef CPU) {
+ return StringSwitch<Optional<Hexagon::ArchEnum>>(CPU)
+ .Case("generic", Hexagon::ArchEnum::V5)
+ .Case("hexagonv5", Hexagon::ArchEnum::V5)
+ .Case("hexagonv55", Hexagon::ArchEnum::V55)
+ .Case("hexagonv60", Hexagon::ArchEnum::V60)
+ .Case("hexagonv62", Hexagon::ArchEnum::V62)
+ .Case("hexagonv65", Hexagon::ArchEnum::V65)
+ .Case("hexagonv66", Hexagon::ArchEnum::V66)
+ .Case("hexagonv67", Hexagon::ArchEnum::V67)
+ .Case("hexagonv67t", Hexagon::ArchEnum::V67)
+ .Case("hexagonv68", Hexagon::ArchEnum::V68)
+ .Case("hexagonv69", Hexagon::ArchEnum::V69)
+ .Default(None);
+}
} // namespace Hexagon
-} // namespace llvm;
+} // namespace llvm
#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPARCH_H
diff --git a/llvm/lib/Target/Hexagon/HexagonDepDecoders.inc b/llvm/lib/Target/Hexagon/HexagonDepDecoders.inc
index 7164af3ad5c6..e979cfe6e325 100644
--- a/llvm/lib/Target/Hexagon/HexagonDepDecoders.inc
+++ b/llvm/lib/Target/Hexagon/HexagonDepDecoders.inc
@@ -14,58 +14,58 @@
#pragma clang diagnostic ignored "-Wunused-function"
#endif
-static DecodeStatus s6_0ImmDecoder(MCInst &MI, unsigned tmp,
- uint64_t, const void *Decoder) {
+static DecodeStatus s6_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
+ const MCDisassembler *Decoder) {
signedDecoder<6>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
-static DecodeStatus s31_1ImmDecoder(MCInst &MI, unsigned tmp,
- uint64_t, const void *Decoder) {
+static DecodeStatus s31_1ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
+ const MCDisassembler *Decoder) {
signedDecoder<12>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
-static DecodeStatus s30_2ImmDecoder(MCInst &MI, unsigned tmp,
- uint64_t, const void *Decoder) {
+static DecodeStatus s30_2ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
+ const MCDisassembler *Decoder) {
signedDecoder<13>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
-static DecodeStatus s29_3ImmDecoder(MCInst &MI, unsigned tmp,
- uint64_t, const void *Decoder) {
+static DecodeStatus s29_3ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
+ const MCDisassembler *Decoder) {
signedDecoder<14>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
-static DecodeStatus s3_0ImmDecoder(MCInst &MI, unsigned tmp,
- uint64_t, const void *Decoder) {
+static DecodeStatus s3_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
+ const MCDisassembler *Decoder) {
signedDecoder<3>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
-static DecodeStatus s4_0ImmDecoder(MCInst &MI, unsigned tmp,
- uint64_t, const void *Decoder) {
+static DecodeStatus s4_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
+ const MCDisassembler *Decoder) {
signedDecoder<4>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
-static DecodeStatus s4_1ImmDecoder(MCInst &MI, unsigned tmp,
- uint64_t, const void *Decoder) {
+static DecodeStatus s4_1ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
+ const MCDisassembler *Decoder) {
signedDecoder<5>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
-static DecodeStatus s4_2ImmDecoder(MCInst &MI, unsigned tmp,
- uint64_t, const void *Decoder) {
+static DecodeStatus s4_2ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
+ const MCDisassembler *Decoder) {
signedDecoder<6>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
-static DecodeStatus s4_3ImmDecoder(MCInst &MI, unsigned tmp,
- uint64_t, const void *Decoder) {
+static DecodeStatus s4_3ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
+ const MCDisassembler *Decoder) {
signedDecoder<7>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
-static DecodeStatus s6_3ImmDecoder(MCInst &MI, unsigned tmp,
- uint64_t, const void *Decoder) {
+static DecodeStatus s6_3ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
+ const MCDisassembler *Decoder) {
signedDecoder<9>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
-static DecodeStatus s8_0ImmDecoder(MCInst &MI, unsigned tmp,
- uint64_t, const void *Decoder) {
+static DecodeStatus s8_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t,
+ const MCDisassembler *Decoder) {
signedDecoder<8>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
diff --git a/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
index 2207925ceeba..f7227dca3b60 100644
--- a/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
@@ -826,8 +826,8 @@ void HexagonEarlyIfConversion::updatePhiNodes(MachineBasicBlock *WhereB,
FR = RO.getReg(), FSR = RO.getSubReg();
else
continue;
- PN->RemoveOperand(i+1);
- PN->RemoveOperand(i);
+ PN->removeOperand(i+1);
+ PN->removeOperand(i);
}
if (TR == 0)
TR = SR, TSR = SSR;
diff --git a/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
index 2693940bb1e9..853553f57ba4 100644
--- a/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
@@ -696,7 +696,7 @@ bool HexagonExpandCondsets::split(MachineInstr &MI,
MI.setDesc(HII->get(TargetOpcode::COPY));
unsigned S = getRegState(ST);
while (MI.getNumOperands() > 1)
- MI.RemoveOperand(MI.getNumOperands()-1);
+ MI.removeOperand(MI.getNumOperands()-1);
MachineFunction &MF = *MI.getParent()->getParent();
MachineInstrBuilder(MF, MI).addReg(RT.Reg, S, RT.Sub);
return true;
diff --git a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 989a98571434..0b4a95bc9ce5 100644
--- a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -152,33 +152,38 @@ using namespace llvm;
static cl::opt<bool> DisableDeallocRet("disable-hexagon-dealloc-ret",
cl::Hidden, cl::desc("Disable Dealloc Return for Hexagon target"));
-static cl::opt<unsigned> NumberScavengerSlots("number-scavenger-slots",
- cl::Hidden, cl::desc("Set the number of scavenger slots"), cl::init(2),
- cl::ZeroOrMore);
-
-static cl::opt<int> SpillFuncThreshold("spill-func-threshold",
- cl::Hidden, cl::desc("Specify O2(not Os) spill func threshold"),
- cl::init(6), cl::ZeroOrMore);
-
-static cl::opt<int> SpillFuncThresholdOs("spill-func-threshold-Os",
- cl::Hidden, cl::desc("Specify Os spill func threshold"),
- cl::init(1), cl::ZeroOrMore);
-
-static cl::opt<bool> EnableStackOVFSanitizer("enable-stackovf-sanitizer",
- cl::Hidden, cl::desc("Enable runtime checks for stack overflow."),
- cl::init(false), cl::ZeroOrMore);
-
-static cl::opt<bool> EnableShrinkWrapping("hexagon-shrink-frame",
- cl::init(true), cl::Hidden, cl::ZeroOrMore,
- cl::desc("Enable stack frame shrink wrapping"));
-
-static cl::opt<unsigned> ShrinkLimit("shrink-frame-limit",
- cl::init(std::numeric_limits<unsigned>::max()), cl::Hidden, cl::ZeroOrMore,
- cl::desc("Max count of stack frame shrink-wraps"));
-
-static cl::opt<bool> EnableSaveRestoreLong("enable-save-restore-long",
- cl::Hidden, cl::desc("Enable long calls for save-restore stubs."),
- cl::init(false), cl::ZeroOrMore);
+static cl::opt<unsigned>
+ NumberScavengerSlots("number-scavenger-slots", cl::Hidden,
+ cl::desc("Set the number of scavenger slots"),
+ cl::init(2));
+
+static cl::opt<int>
+ SpillFuncThreshold("spill-func-threshold", cl::Hidden,
+ cl::desc("Specify O2(not Os) spill func threshold"),
+ cl::init(6));
+
+static cl::opt<int>
+ SpillFuncThresholdOs("spill-func-threshold-Os", cl::Hidden,
+ cl::desc("Specify Os spill func threshold"),
+ cl::init(1));
+
+static cl::opt<bool> EnableStackOVFSanitizer(
+ "enable-stackovf-sanitizer", cl::Hidden,
+ cl::desc("Enable runtime checks for stack overflow."), cl::init(false));
+
+static cl::opt<bool>
+ EnableShrinkWrapping("hexagon-shrink-frame", cl::init(true), cl::Hidden,
+ cl::desc("Enable stack frame shrink wrapping"));
+
+static cl::opt<unsigned>
+ ShrinkLimit("shrink-frame-limit",
+ cl::init(std::numeric_limits<unsigned>::max()), cl::Hidden,
+ cl::desc("Max count of stack frame shrink-wraps"));
+
+static cl::opt<bool>
+ EnableSaveRestoreLong("enable-save-restore-long", cl::Hidden,
+ cl::desc("Enable long calls for save-restore stubs."),
+ cl::init(false));
static cl::opt<bool> EliminateFramePointer("hexagon-fp-elim", cl::init(true),
cl::Hidden, cl::desc("Refrain from using FP whenever possible"));
@@ -1018,7 +1023,7 @@ findCFILocation(MachineBasicBlock &B) {
void HexagonFrameLowering::insertCFIInstructions(MachineFunction &MF) const {
for (auto &B : MF) {
auto At = findCFILocation(B);
- if (At.hasValue())
+ if (At)
insertCFIInstructionsAt(B, At.getValue());
}
}
diff --git a/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp b/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
index 0bb1658e7698..44f21dbacd3c 100644
--- a/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
@@ -47,34 +47,36 @@
using namespace llvm;
-static cl::opt<unsigned> VRegIndexCutoff("insert-vreg-cutoff", cl::init(~0U),
- cl::Hidden, cl::ZeroOrMore, cl::desc("Vreg# cutoff for insert generation."));
+static cl::opt<unsigned>
+ VRegIndexCutoff("insert-vreg-cutoff", cl::init(~0U), cl::Hidden,
+ cl::desc("Vreg# cutoff for insert generation."));
// The distance cutoff is selected based on the precheckin-perf results:
// cutoffs 20, 25, 35, and 40 are worse than 30.
-static cl::opt<unsigned> VRegDistCutoff("insert-dist-cutoff", cl::init(30U),
- cl::Hidden, cl::ZeroOrMore, cl::desc("Vreg distance cutoff for insert "
- "generation."));
+static cl::opt<unsigned>
+ VRegDistCutoff("insert-dist-cutoff", cl::init(30U), cl::Hidden,
+ cl::desc("Vreg distance cutoff for insert "
+ "generation."));
// Limit the container sizes for extreme cases where we run out of memory.
-static cl::opt<unsigned> MaxORLSize("insert-max-orl", cl::init(4096),
- cl::Hidden, cl::ZeroOrMore, cl::desc("Maximum size of OrderedRegisterList"));
+static cl::opt<unsigned>
+ MaxORLSize("insert-max-orl", cl::init(4096), cl::Hidden,
+ cl::desc("Maximum size of OrderedRegisterList"));
static cl::opt<unsigned> MaxIFMSize("insert-max-ifmap", cl::init(1024),
- cl::Hidden, cl::ZeroOrMore, cl::desc("Maximum size of IFMap"));
-
-static cl::opt<bool> OptTiming("insert-timing", cl::init(false), cl::Hidden,
- cl::ZeroOrMore, cl::desc("Enable timing of insert generation"));
-static cl::opt<bool> OptTimingDetail("insert-timing-detail", cl::init(false),
- cl::Hidden, cl::ZeroOrMore, cl::desc("Enable detailed timing of insert "
- "generation"));
-
-static cl::opt<bool> OptSelectAll0("insert-all0", cl::init(false), cl::Hidden,
- cl::ZeroOrMore);
-static cl::opt<bool> OptSelectHas0("insert-has0", cl::init(false), cl::Hidden,
- cl::ZeroOrMore);
+ cl::Hidden,
+ cl::desc("Maximum size of IFMap"));
+
+static cl::opt<bool> OptTiming("insert-timing", cl::Hidden,
+ cl::desc("Enable timing of insert generation"));
+static cl::opt<bool>
+ OptTimingDetail("insert-timing-detail", cl::Hidden,
+ cl::desc("Enable detailed timing of insert "
+ "generation"));
+
+static cl::opt<bool> OptSelectAll0("insert-all0", cl::init(false), cl::Hidden);
+static cl::opt<bool> OptSelectHas0("insert-has0", cl::init(false), cl::Hidden);
// Whether to construct constant values via "insert". Could eliminate constant
// extenders, but often not practical.
-static cl::opt<bool> OptConst("insert-const", cl::init(false), cl::Hidden,
- cl::ZeroOrMore);
+static cl::opt<bool> OptConst("insert-const", cl::init(false), cl::Hidden);
// The preprocessor gets confused when the DEBUG macro is passed larger
// chunks of code. Use this function to detect debugging.
@@ -92,11 +94,8 @@ namespace {
struct RegisterSet : private BitVector {
RegisterSet() = default;
explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {}
- RegisterSet(const RegisterSet &RS) : BitVector(RS) {}
- RegisterSet &operator=(const RegisterSet &RS) {
- BitVector::operator=(RS);
- return *this;
- }
+ RegisterSet(const RegisterSet &RS) = default;
+ RegisterSet &operator=(const RegisterSet &RS) = default;
using BitVector::clear;
diff --git a/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index 43afae441457..acc0bb8941c1 100644
--- a/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -81,9 +81,9 @@ static cl::opt<bool> HWCreatePreheader("hexagon-hwloop-preheader",
// Turn it off by default. If a preheader block is not created here, the
// software pipeliner may be unable to find a block suitable to serve as
// a preheader. In that case SWP will not run.
-static cl::opt<bool> SpecPreheader("hwloop-spec-preheader", cl::init(false),
- cl::Hidden, cl::ZeroOrMore, cl::desc("Allow speculation of preheader "
- "instructions"));
+static cl::opt<bool> SpecPreheader("hwloop-spec-preheader", cl::Hidden,
+ cl::desc("Allow speculation of preheader "
+ "instructions"));
STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
@@ -1911,8 +1911,8 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
for (int i = PN->getNumOperands()-2; i > 0; i -= 2) {
MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
if (PredB != Latch) {
- PN->RemoveOperand(i+1);
- PN->RemoveOperand(i);
+ PN->removeOperand(i+1);
+ PN->removeOperand(i);
}
}
PN->addOperand(MachineOperand::CreateReg(NewPR, false));
diff --git a/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.cpp b/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.cpp
index e2215c9900d0..577eccd25c19 100644
--- a/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.cpp
@@ -106,7 +106,7 @@ bool HexagonHazardRecognizer::isNewStore(MachineInstr &MI) {
if (!TII->mayBeNewStore(MI))
return false;
MachineOperand &MO = MI.getOperand(MI.getNumOperands() - 1);
- return (MO.isReg() && RegDefs.count(MO.getReg()) != 0);
+ return MO.isReg() && RegDefs.contains(MO.getReg());
}
void HexagonHazardRecognizer::EmitInstruction(SUnit *SU) {
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index 161768b8dc22..b4979c953516 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -1345,7 +1345,8 @@ inline bool HexagonDAGToDAGISel::SelectAnyInt(SDValue &N, SDValue &R) {
EVT T = N.getValueType();
if (!T.isInteger() || T.getSizeInBits() != 32 || !isa<ConstantSDNode>(N))
return false;
- R = N;
+ int32_t V = cast<const ConstantSDNode>(N)->getZExtValue();
+ R = CurDAG->getTargetConstant(V, SDLoc(N), N.getValueType());
return true;
}
@@ -1540,7 +1541,7 @@ bool HexagonDAGToDAGISel::keepsLowBits(const SDValue &Val, unsigned NumBits,
break;
case ISD::AND: {
// Check if this is an AND with NumBits of lower bits set to 1.
- uint64_t Mask = (1 << NumBits) - 1;
+ uint64_t Mask = (1ULL << NumBits) - 1;
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(0))) {
if (C->getZExtValue() == Mask) {
Src = Val.getOperand(1);
@@ -1558,7 +1559,7 @@ bool HexagonDAGToDAGISel::keepsLowBits(const SDValue &Val, unsigned NumBits,
case ISD::OR:
case ISD::XOR: {
// OR/XOR with the lower NumBits bits set to 0.
- uint64_t Mask = (1 << NumBits) - 1;
+ uint64_t Mask = (1ULL << NumBits) - 1;
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(0))) {
if ((C->getZExtValue() & Mask) == 0) {
Src = Val.getOperand(1);
@@ -1580,7 +1581,7 @@ bool HexagonDAGToDAGISel::keepsLowBits(const SDValue &Val, unsigned NumBits,
}
bool HexagonDAGToDAGISel::isAlignedMemNode(const MemSDNode *N) const {
- return N->getAlignment() >= N->getMemoryVT().getStoreSize();
+ return N->getAlign().value() >= N->getMemoryVT().getStoreSize();
}
bool HexagonDAGToDAGISel::isSmallStackStore(const StoreSDNode *N) const {
@@ -1655,7 +1656,7 @@ struct WeightedLeaf {
int Weight;
int InsertionOrder;
- WeightedLeaf() : Value(SDValue()) { }
+ WeightedLeaf() {}
WeightedLeaf(SDValue Value, int Weight, int InsertionOrder) :
Value(Value), Weight(Weight), InsertionOrder(InsertionOrder) {
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
index 0a6dd727eb82..0848d30e7403 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
@@ -801,7 +801,7 @@ static const HexagonTargetLowering &getHexagonLowering(SelectionDAG &G) {
return static_cast<const HexagonTargetLowering&>(G.getTargetLoweringInfo());
}
static const HexagonSubtarget &getHexagonSubtarget(SelectionDAG &G) {
- return static_cast<const HexagonSubtarget&>(G.getSubtarget());
+ return G.getSubtarget<HexagonSubtarget>();
}
namespace llvm {
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index d7ca934a23e6..94411b2e4f98 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -72,41 +72,41 @@ static cl::opt<bool> EmitJumpTables("hexagon-emit-jump-tables",
cl::init(true), cl::Hidden,
cl::desc("Control jump table emission on Hexagon target"));
-static cl::opt<bool> EnableHexSDNodeSched("enable-hexagon-sdnode-sched",
- cl::Hidden, cl::ZeroOrMore, cl::init(false),
- cl::desc("Enable Hexagon SDNode scheduling"));
+static cl::opt<bool>
+ EnableHexSDNodeSched("enable-hexagon-sdnode-sched", cl::Hidden,
+ cl::desc("Enable Hexagon SDNode scheduling"));
-static cl::opt<bool> EnableFastMath("ffast-math",
- cl::Hidden, cl::ZeroOrMore, cl::init(false),
- cl::desc("Enable Fast Math processing"));
+static cl::opt<bool> EnableFastMath("ffast-math", cl::Hidden,
+ cl::desc("Enable Fast Math processing"));
-static cl::opt<int> MinimumJumpTables("minimum-jump-tables",
- cl::Hidden, cl::ZeroOrMore, cl::init(5),
- cl::desc("Set minimum jump tables"));
+static cl::opt<int> MinimumJumpTables("minimum-jump-tables", cl::Hidden,
+ cl::init(5),
+ cl::desc("Set minimum jump tables"));
-static cl::opt<int> MaxStoresPerMemcpyCL("max-store-memcpy",
- cl::Hidden, cl::ZeroOrMore, cl::init(6),
- cl::desc("Max #stores to inline memcpy"));
+static cl::opt<int>
+ MaxStoresPerMemcpyCL("max-store-memcpy", cl::Hidden, cl::init(6),
+ cl::desc("Max #stores to inline memcpy"));
-static cl::opt<int> MaxStoresPerMemcpyOptSizeCL("max-store-memcpy-Os",
- cl::Hidden, cl::ZeroOrMore, cl::init(4),
- cl::desc("Max #stores to inline memcpy"));
+static cl::opt<int>
+ MaxStoresPerMemcpyOptSizeCL("max-store-memcpy-Os", cl::Hidden, cl::init(4),
+ cl::desc("Max #stores to inline memcpy"));
-static cl::opt<int> MaxStoresPerMemmoveCL("max-store-memmove",
- cl::Hidden, cl::ZeroOrMore, cl::init(6),
- cl::desc("Max #stores to inline memmove"));
+static cl::opt<int>
+ MaxStoresPerMemmoveCL("max-store-memmove", cl::Hidden, cl::init(6),
+ cl::desc("Max #stores to inline memmove"));
-static cl::opt<int> MaxStoresPerMemmoveOptSizeCL("max-store-memmove-Os",
- cl::Hidden, cl::ZeroOrMore, cl::init(4),
- cl::desc("Max #stores to inline memmove"));
+static cl::opt<int>
+ MaxStoresPerMemmoveOptSizeCL("max-store-memmove-Os", cl::Hidden,
+ cl::init(4),
+ cl::desc("Max #stores to inline memmove"));
-static cl::opt<int> MaxStoresPerMemsetCL("max-store-memset",
- cl::Hidden, cl::ZeroOrMore, cl::init(8),
- cl::desc("Max #stores to inline memset"));
+static cl::opt<int>
+ MaxStoresPerMemsetCL("max-store-memset", cl::Hidden, cl::init(8),
+ cl::desc("Max #stores to inline memset"));
-static cl::opt<int> MaxStoresPerMemsetOptSizeCL("max-store-memset-Os",
- cl::Hidden, cl::ZeroOrMore, cl::init(4),
- cl::desc("Max #stores to inline memset"));
+static cl::opt<int>
+ MaxStoresPerMemsetOptSizeCL("max-store-memset-Os", cl::Hidden, cl::init(4),
+ cl::desc("Max #stores to inline memset"));
static cl::opt<bool> AlignLoads("hexagon-align-loads",
cl::Hidden, cl::init(false),
@@ -1396,10 +1396,9 @@ HexagonTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, Hexagon::R0, Chain, InFlag);
InFlag = Chain.getValue(1);
- unsigned Flags =
- static_cast<const HexagonSubtarget &>(DAG.getSubtarget()).useLongCalls()
- ? HexagonII::MO_GDPLT | HexagonII::HMOTF_ConstExtended
- : HexagonII::MO_GDPLT;
+ unsigned Flags = DAG.getSubtarget<HexagonSubtarget>().useLongCalls()
+ ? HexagonII::MO_GDPLT | HexagonII::HMOTF_ConstExtended
+ : HexagonII::MO_GDPLT;
return GetDynamicTLSAddr(DAG, Chain, GA, InFlag, PtrVT,
Hexagon::R0, Flags);
@@ -2164,6 +2163,11 @@ HexagonTargetLowering::getPreferredVectorAction(MVT VT) const {
// Always widen (remaining) vectors of i1.
if (ElemTy == MVT::i1)
return TargetLoweringBase::TypeWidenVector;
+ // Widen non-power-of-2 vectors. Such types cannot be split right now,
+ // and computeRegisterProperties will override "split" with "widen",
+ // which can cause other issues.
+ if (!isPowerOf2_32(VecLen))
+ return TargetLoweringBase::TypeWidenVector;
return TargetLoweringBase::TypeSplitVector;
}
@@ -2423,16 +2427,25 @@ HexagonTargetLowering::buildVector32(ArrayRef<SDValue> Elem, const SDLoc &dl,
llvm::all_of(Consts, [](ConstantInt *CI) { return CI->isZero(); }))
return getZero(dl, VecTy, DAG);
- if (ElemTy == MVT::i16) {
+ if (ElemTy == MVT::i16 || ElemTy == MVT::f16) {
assert(Elem.size() == 2);
if (AllConst) {
+ // The 'Consts' array will have all values as integers regardless
+ // of the vector element type.
uint32_t V = (Consts[0]->getZExtValue() & 0xFFFF) |
Consts[1]->getZExtValue() << 16;
- return DAG.getBitcast(MVT::v2i16, DAG.getConstant(V, dl, MVT::i32));
+ return DAG.getBitcast(VecTy, DAG.getConstant(V, dl, MVT::i32));
+ }
+ SDValue E0, E1;
+ if (ElemTy == MVT::f16) {
+ E0 = DAG.getZExtOrTrunc(DAG.getBitcast(MVT::i16, Elem[0]), dl, MVT::i32);
+ E1 = DAG.getZExtOrTrunc(DAG.getBitcast(MVT::i16, Elem[1]), dl, MVT::i32);
+ } else {
+ E0 = Elem[0];
+ E1 = Elem[1];
}
- SDValue N = getInstr(Hexagon::A2_combine_ll, dl, MVT::i32,
- {Elem[1], Elem[0]}, DAG);
- return DAG.getBitcast(MVT::v2i16, N);
+ SDValue N = getInstr(Hexagon::A2_combine_ll, dl, MVT::i32, {E1, E0}, DAG);
+ return DAG.getBitcast(VecTy, N);
}
if (ElemTy == MVT::i8) {
@@ -2506,7 +2519,7 @@ HexagonTargetLowering::buildVector64(ArrayRef<SDValue> Elem, const SDLoc &dl,
return getZero(dl, VecTy, DAG);
// First try splat if possible.
- if (ElemTy == MVT::i16) {
+ if (ElemTy == MVT::i16 || ElemTy == MVT::f16) {
bool IsSplat = true;
for (unsigned i = First+1; i != Num; ++i) {
if (Elem[i] == Elem[First] || isUndef(Elem[i]))
@@ -2516,7 +2529,9 @@ HexagonTargetLowering::buildVector64(ArrayRef<SDValue> Elem, const SDLoc &dl,
}
if (IsSplat) {
// Legalize the operand of SPLAT_VECTOR
- SDValue Ext = DAG.getZExtOrTrunc(Elem[First], dl, MVT::i32);
+ SDValue S = ElemTy == MVT::f16 ? DAG.getBitcast(MVT::i16, Elem[First])
+ : Elem[First];
+ SDValue Ext = DAG.getZExtOrTrunc(S, dl, MVT::i32);
return DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Ext);
}
}
@@ -2525,8 +2540,7 @@ HexagonTargetLowering::buildVector64(ArrayRef<SDValue> Elem, const SDLoc &dl,
if (AllConst) {
uint64_t Val = 0;
unsigned W = ElemTy.getSizeInBits();
- uint64_t Mask = (ElemTy == MVT::i8) ? 0xFFull
- : (ElemTy == MVT::i16) ? 0xFFFFull : 0xFFFFFFFFull;
+ uint64_t Mask = (1ull << W) - 1;
for (unsigned i = 0; i != Num; ++i)
Val = (Val << W) | (Consts[Num-1-i]->getZExtValue() & Mask);
SDValue V0 = DAG.getConstant(Val, dl, MVT::i64);
@@ -3656,9 +3670,12 @@ HexagonTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
: AtomicExpansionKind::None;
}
-bool HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
+TargetLowering::AtomicExpansionKind
+HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
// Do not expand loads and stores that don't exceed 64 bits.
- return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() > 64;
+ return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() > 64
+ ? AtomicExpansionKind::Expand
+ : AtomicExpansionKind::None;
}
TargetLowering::AtomicExpansionKind
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index f9ce7a9407aa..9561dfe8a35d 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -328,7 +328,7 @@ public:
Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr,
AtomicOrdering Ord) const override;
AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
- bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
+ AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
AtomicExpansionKind
shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index 0ba75a544c04..da6ad3ca2c93 100755
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -24,7 +24,6 @@ static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
static const MVT LegalW128[] = { MVT::v256i8, MVT::v128i16, MVT::v64i32 };
-
void
HexagonTargetLowering::initializeHVXLowering() {
if (Subtarget.useHVX64BOps()) {
@@ -79,80 +78,85 @@ HexagonTargetLowering::initializeHVXLowering() {
// Handle bitcasts of vector predicates to scalars (e.g. v32i1 to i32).
// Note: v16i1 -> i16 is handled in type legalization instead of op
// legalization.
- setOperationAction(ISD::BITCAST, MVT::i16, Custom);
- setOperationAction(ISD::BITCAST, MVT::i32, Custom);
- setOperationAction(ISD::BITCAST, MVT::i64, Custom);
+ setOperationAction(ISD::BITCAST, MVT::i16, Custom);
+ setOperationAction(ISD::BITCAST, MVT::i32, Custom);
+ setOperationAction(ISD::BITCAST, MVT::i64, Custom);
setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);
- setOperationAction(ISD::BITCAST, MVT::v128i1, Custom);
- setOperationAction(ISD::BITCAST, MVT::i128, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, ByteV, Legal);
- setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal);
+ setOperationAction(ISD::BITCAST, MVT::v128i1, Custom);
+ setOperationAction(ISD::BITCAST, MVT::i128, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, ByteV, Legal);
+ setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() &&
Subtarget.useHVXFloatingPoint()) {
- setOperationAction(ISD::FMINNUM, MVT::v64f16, Legal);
- setOperationAction(ISD::FMAXNUM, MVT::v64f16, Legal);
- setOperationAction(ISD::FADD, MVT::v64f16, Legal);
- setOperationAction(ISD::FSUB, MVT::v64f16, Legal);
- setOperationAction(ISD::FMUL, MVT::v64f16, Legal);
- setOperationAction(ISD::FADD, MVT::v32f32, Legal);
- setOperationAction(ISD::FSUB, MVT::v32f32, Legal);
- setOperationAction(ISD::FMUL, MVT::v32f32, Legal);
- setOperationAction(ISD::FMINNUM, MVT::v32f32, Legal);
- setOperationAction(ISD::FMAXNUM, MVT::v32f32, Legal);
- setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64f16, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v64f16, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32f32, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32f32, Custom);
-
- // Handle ISD::BUILD_VECTOR for v32f32 in a custom way to generate vsplat
- setOperationAction(ISD::BUILD_VECTOR, MVT::v32f32, Custom);
+
+ static const MVT FloatV[] = { MVT::v64f16, MVT::v32f32 };
+ static const MVT FloatW[] = { MVT::v128f16, MVT::v64f32 };
+
+ for (MVT T : FloatV) {
+ setOperationAction(ISD::FADD, T, Legal);
+ setOperationAction(ISD::FSUB, T, Legal);
+ setOperationAction(ISD::FMUL, T, Legal);
+ setOperationAction(ISD::FMINNUM, T, Legal);
+ setOperationAction(ISD::FMAXNUM, T, Legal);
+
+ setOperationAction(ISD::INSERT_SUBVECTOR, T, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, T, Custom);
+
+ setOperationAction(ISD::SPLAT_VECTOR, T, Legal);
+ setOperationAction(ISD::SPLAT_VECTOR, T, Legal);
+
+ setOperationAction(ISD::MLOAD, T, Custom);
+ setOperationAction(ISD::MSTORE, T, Custom);
+ // Custom-lower BUILD_VECTOR. The standard (target-independent)
+ // handling of it would convert it to a load, which is not always
+ // the optimal choice.
+ setOperationAction(ISD::BUILD_VECTOR, T, Custom);
+ }
+
// BUILD_VECTOR with f16 operands cannot be promoted without
// promoting the result, so lower the node to vsplat or constant pool
- setOperationAction(ISD::BUILD_VECTOR, MVT::f16, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::f16, Custom);
- setOperationAction(ISD::SPLAT_VECTOR, MVT::f16, Custom);
- setOperationAction(ISD::SPLAT_VECTOR, MVT::v64f16, Legal);
- setOperationAction(ISD::SPLAT_VECTOR, MVT::v32f32, Legal);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::f16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::f16, Custom);
+ setOperationAction(ISD::SPLAT_VECTOR, MVT::f16, Custom);
+
// Vector shuffle is always promoted to ByteV and a bitcast to f16 is
// generated.
- setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV);
- setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW);
- setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV);
-
- // Custom-lower BUILD_VECTOR for vector pairs. The standard (target-
- // independent) handling of it would convert it to a load, which is
- // not always the optimal choice.
- setOperationAction(ISD::BUILD_VECTOR, MVT::v64f32, Custom);
- // Make concat-vectors custom to handle concats of more than 2 vectors.
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v128f16, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v64f32, Custom);
-
- setOperationAction(ISD::LOAD, MVT::v64f32, Custom);
- setOperationAction(ISD::STORE, MVT::v64f32, Custom);
- setOperationAction(ISD::FADD, MVT::v64f32, Custom);
- setOperationAction(ISD::FSUB, MVT::v64f32, Custom);
- setOperationAction(ISD::FMUL, MVT::v64f32, Custom);
- setOperationAction(ISD::FMINNUM, MVT::v64f32, Custom);
- setOperationAction(ISD::FMAXNUM, MVT::v64f32, Custom);
- setOperationAction(ISD::VSELECT, MVT::v64f32, Custom);
+ setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128f16, ByteW);
+ setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV);
+ setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW);
+ setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV);
+
+ for (MVT P : FloatW) {
+ setOperationAction(ISD::LOAD, P, Custom);
+ setOperationAction(ISD::STORE, P, Custom);
+ setOperationAction(ISD::FADD, P, Custom);
+ setOperationAction(ISD::FSUB, P, Custom);
+ setOperationAction(ISD::FMUL, P, Custom);
+ setOperationAction(ISD::FMINNUM, P, Custom);
+ setOperationAction(ISD::FMAXNUM, P, Custom);
+ setOperationAction(ISD::VSELECT, P, Custom);
+
+ // Custom-lower BUILD_VECTOR. The standard (target-independent)
+ // handling of it would convert it to a load, which is not always
+ // the optimal choice.
+ setOperationAction(ISD::BUILD_VECTOR, P, Custom);
+ // Make concat-vectors custom to handle concats of more than 2 vectors.
+ setOperationAction(ISD::CONCAT_VECTORS, P, Custom);
+
+ setOperationAction(ISD::MLOAD, P, Custom);
+ setOperationAction(ISD::MSTORE, P, Custom);
+ }
if (Subtarget.useHVXQFloatOps()) {
setOperationAction(ISD::FP_EXTEND, MVT::v64f32, Custom);
- setOperationAction(ISD::FP_ROUND, MVT::v64f16, Legal);
+ setOperationAction(ISD::FP_ROUND, MVT::v64f16, Legal);
} else if (Subtarget.useHVXIEEEFPOps()) {
setOperationAction(ISD::FP_EXTEND, MVT::v64f32, Legal);
- setOperationAction(ISD::FP_ROUND, MVT::v64f16, Legal);
+ setOperationAction(ISD::FP_ROUND, MVT::v64f16, Legal);
}
-
- setOperationAction(ISD::MLOAD, MVT::v32f32, Custom);
- setOperationAction(ISD::MSTORE, MVT::v32f32, Custom);
- setOperationAction(ISD::MLOAD, MVT::v64f16, Custom);
- setOperationAction(ISD::MSTORE, MVT::v64f16, Custom);
- setOperationAction(ISD::MLOAD, MVT::v64f32, Custom);
- setOperationAction(ISD::MSTORE, MVT::v64f32, Custom);
}
for (MVT T : LegalV) {
@@ -382,8 +386,7 @@ HexagonTargetLowering::initializeHVXLowering() {
}
}
- setTargetDAGCombine(ISD::SPLAT_VECTOR);
- setTargetDAGCombine(ISD::VSELECT);
+ setTargetDAGCombine({ISD::SPLAT_VECTOR, ISD::VSELECT});
}
unsigned
@@ -780,7 +783,6 @@ HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
SDValue N = HalfV0;
SDValue M = HalfV1;
for (unsigned i = 0; i != NumWords/2; ++i) {
-
// Rotate by element count since last insertion.
if (Words[i] != Words[n] || VecHist[n] <= 1) {
Sn = DAG.getConstant(Rn, dl, MVT::i32);
@@ -1411,6 +1413,17 @@ HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
for (unsigned i = 0; i != Size; ++i)
Ops.push_back(Op.getOperand(i));
+ // First, split the BUILD_VECTOR for vector pairs. We could generate
+ // some pairs directly (via splat), but splats should be generated
+ // by the combiner prior to getting here.
+ if (VecTy.getSizeInBits() == 16*Subtarget.getVectorLength()) {
+ ArrayRef<SDValue> A(Ops);
+ MVT SingleTy = typeSplit(VecTy).first;
+ SDValue V0 = buildHvxVectorReg(A.take_front(Size/2), dl, SingleTy, DAG);
+ SDValue V1 = buildHvxVectorReg(A.drop_front(Size/2), dl, SingleTy, DAG);
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, V0, V1);
+ }
+
if (VecTy.getVectorElementType() == MVT::i1)
return buildHvxVectorPred(Ops, dl, VecTy, DAG);
@@ -1427,14 +1440,6 @@ HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0);
}
- if (VecTy.getSizeInBits() == 16*Subtarget.getVectorLength()) {
- ArrayRef<SDValue> A(Ops);
- MVT SingleTy = typeSplit(VecTy).first;
- SDValue V0 = buildHvxVectorReg(A.take_front(Size/2), dl, SingleTy, DAG);
- SDValue V1 = buildHvxVectorReg(A.drop_front(Size/2), dl, SingleTy, DAG);
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, V0, V1);
- }
-
return buildHvxVectorReg(Ops, dl, VecTy, DAG);
}
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 9b4e92a16663..c8e6276aa4de 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -77,9 +77,9 @@ cl::opt<bool> ScheduleInlineAsm("hexagon-sched-inline-asm", cl::Hidden,
static cl::opt<bool> EnableBranchPrediction("hexagon-enable-branch-prediction",
cl::Hidden, cl::init(true), cl::desc("Enable branch prediction"));
-static cl::opt<bool> DisableNVSchedule("disable-hexagon-nv-schedule",
- cl::Hidden, cl::ZeroOrMore, cl::init(false),
- cl::desc("Disable schedule adjustment for new value stores."));
+static cl::opt<bool> DisableNVSchedule(
+ "disable-hexagon-nv-schedule", cl::Hidden,
+ cl::desc("Disable schedule adjustment for new value stores."));
static cl::opt<bool> EnableTimingClassLatency(
"enable-timing-class-latency", cl::Hidden, cl::init(false),
@@ -94,11 +94,12 @@ static cl::opt<bool> EnableACCForwarding(
cl::desc("Enable vec acc forwarding"));
static cl::opt<bool> BranchRelaxAsmLarge("branch-relax-asm-large",
- cl::init(true), cl::Hidden, cl::ZeroOrMore, cl::desc("branch relax asm"));
+ cl::init(true), cl::Hidden,
+ cl::desc("branch relax asm"));
-static cl::opt<bool> UseDFAHazardRec("dfa-hazard-rec",
- cl::init(true), cl::Hidden, cl::ZeroOrMore,
- cl::desc("Use the DFA based hazard recognizer."));
+static cl::opt<bool>
+ UseDFAHazardRec("dfa-hazard-rec", cl::init(true), cl::Hidden,
+ cl::desc("Use the DFA based hazard recognizer."));
/// Constants for Hexagon instructions.
const int Hexagon_MEMW_OFFSET_MAX = 4095;
@@ -158,7 +159,7 @@ bool HexagonInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
auto Op = MI.getOperand(1);
// If the instruction has a global address as operand, it is not cheap
// since the operand will be constant extended.
- if (Op.getType() == MachineOperand::MO_GlobalAddress)
+ if (Op.isGlobal())
return false;
// If the instruction has an operand of size > 16bits, its will be
// const-extended and hence, it is not cheap.
@@ -1072,6 +1073,43 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
};
switch (Opc) {
+ case Hexagon::PS_call_instrprof_custom: {
+ auto Op0 = MI.getOperand(0);
+ assert(Op0.isGlobal() &&
+ "First operand must be a global containing handler name.");
+ const GlobalValue *NameVar = Op0.getGlobal();
+ const GlobalVariable *GV = dyn_cast<GlobalVariable>(NameVar);
+ auto *Arr = cast<ConstantDataArray>(GV->getInitializer());
+ StringRef NameStr = Arr->isCString() ? Arr->getAsCString() : Arr->getAsString();
+
+ MachineOperand &Op1 = MI.getOperand(1);
+ // Set R0 with the imm value to be passed to the custom profiling handler.
+ BuildMI(MBB, MI, DL, get(Hexagon::A2_tfrsi), Hexagon::R0)
+ .addImm(Op1.getImm());
+ // The call to the custom handler is being treated as a special one as the
+ // callee is responsible for saving and restoring all the registers
+ // (including caller saved registers) it needs to modify. This is
+ // done to reduce the impact of instrumentation on the code being
+ // instrumented/profiled.
+ // NOTE: R14, R15 and R28 are reserved for PLT handling. These registers
+ // are in the Def list of the Hexagon::PS_call_instrprof_custom and
+ // therefore will be handled appropriately duing register allocation.
+
+ // TODO: It may be a good idea to add a separate pseudo instruction for
+ // static relocation which doesn't need to reserve r14, r15 and r28.
+
+ auto MIB = BuildMI(MBB, MI, DL, get(Hexagon::J2_call))
+ .addUse(Hexagon::R0, RegState::Implicit|RegState::InternalRead)
+ .addDef(Hexagon::R29, RegState::ImplicitDefine)
+ .addDef(Hexagon::R30, RegState::ImplicitDefine)
+ .addDef(Hexagon::R14, RegState::ImplicitDefine)
+ .addDef(Hexagon::R15, RegState::ImplicitDefine)
+ .addDef(Hexagon::R28, RegState::ImplicitDefine);
+ const char *cstr = MF.createExternalSymbolName(NameStr);
+ MIB.addExternalSymbol(cstr);
+ MBB.erase(MI);
+ return true;
+ }
case TargetOpcode::COPY: {
MachineOperand &MD = MI.getOperand(0);
MachineOperand &MS = MI.getOperand(1);
@@ -1392,8 +1430,8 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
// Generate a misaligned load that is guaranteed to cause a crash.
class CrashPseudoSourceValue : public PseudoSourceValue {
public:
- CrashPseudoSourceValue(const TargetInstrInfo &TII)
- : PseudoSourceValue(TargetCustom, TII) {}
+ CrashPseudoSourceValue(const TargetMachine &TM)
+ : PseudoSourceValue(TargetCustom, TM) {}
bool isConstant(const MachineFrameInfo *) const override {
return false;
@@ -1409,7 +1447,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
}
};
- static const CrashPseudoSourceValue CrashPSV(*this);
+ static const CrashPseudoSourceValue CrashPSV(MF.getTarget());
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo(&CrashPSV),
MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile, 8,
@@ -1662,7 +1700,7 @@ bool HexagonInstrInfo::PredicateInstruction(
MI.setDesc(get(PredOpc));
while (unsigned n = MI.getNumOperands())
- MI.RemoveOperand(n-1);
+ MI.removeOperand(n-1);
for (unsigned i = 0, n = T->getNumOperands(); i < n; ++i)
MI.addOperand(T->getOperand(i));
@@ -4464,6 +4502,9 @@ unsigned HexagonInstrInfo::getMemAccessSize(const MachineInstr &MI) const {
unsigned Size = getMemAccessSizeInBytes(MemAccessSize(S));
if (Size != 0)
return Size;
+ // Y2_dcfetchbo is special
+ if (MI.getOpcode() == Hexagon::Y2_dcfetchbo)
+ return HexagonII::DoubleWordAccess;
// Handle vector access sizes.
const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
diff --git a/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
index ccaf1aac1ce0..2d49fa369642 100644
--- a/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
@@ -192,10 +192,8 @@ private:
void push_back(Value *V) {
// Do not push back duplicates.
- if (!S.count(V)) {
+ if (S.insert(V).second)
Q.push_back(V);
- S.insert(V);
- }
}
Value *pop_front_val() {
@@ -1152,9 +1150,8 @@ bool PolynomialMultiplyRecognize::findCycle(Value *Out, Value *In,
if (IsPhi && HadPhi)
return false;
HadPhi |= IsPhi;
- if (Cycle.count(I))
+ if (!Cycle.insert(I))
return false;
- Cycle.insert(I);
if (findCycle(I, In, Cycle))
break;
Cycle.remove(I);
@@ -1487,7 +1484,7 @@ bool PolynomialMultiplyRecognize::convertShiftsToLeft(BasicBlock *LoopB,
void PolynomialMultiplyRecognize::cleanupLoopBody(BasicBlock *LoopB) {
for (auto &I : *LoopB)
- if (Value *SV = SimplifyInstruction(&I, {DL, &TLI, &DT}))
+ if (Value *SV = simplifyInstruction(&I, {DL, &TLI, &DT}))
I.replaceAllUsesWith(SV);
for (Instruction &I : llvm::make_early_inc_range(*LoopB))
@@ -2169,7 +2166,7 @@ CleanupAndExit:
SCEV::FlagNUW);
Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtrTy, ExpPt);
if (Instruction *In = dyn_cast<Instruction>(NumBytes))
- if (Value *Simp = SimplifyInstruction(In, {*DL, TLI, DT}))
+ if (Value *Simp = simplifyInstruction(In, {*DL, TLI, DT}))
NumBytes = Simp;
CallInst *NewCall;
@@ -2279,7 +2276,7 @@ CleanupAndExit:
Value *NumWords = Expander.expandCodeFor(NumWordsS, Int32Ty,
MemmoveB->getTerminator());
if (Instruction *In = dyn_cast<Instruction>(NumWords))
- if (Value *Simp = SimplifyInstruction(In, {*DL, TLI, DT}))
+ if (Value *Simp = simplifyInstruction(In, {*DL, TLI, DT}))
NumWords = Simp;
Value *Op0 = (StoreBasePtr->getType() == Int32PtrTy)
diff --git a/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.cpp b/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.cpp
index aabae009d7c3..539db8f55005 100644
--- a/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.cpp
@@ -13,3 +13,9 @@ using namespace llvm;
// pin vtable to this file
void HexagonMachineFunctionInfo::anchor() {}
+MachineFunctionInfo *HexagonMachineFunctionInfo::clone(
+ BumpPtrAllocator &Allocator, MachineFunction &DestMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
+ const {
+ return DestMF.cloneInfo<HexagonMachineFunctionInfo>(*this);
+}
diff --git a/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
index 89ef5c2a891d..a02de24b176a 100644
--- a/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
@@ -42,6 +42,10 @@ public:
HexagonMachineFunctionInfo() = default;
HexagonMachineFunctionInfo(MachineFunction &MF) {}
+ MachineFunctionInfo *
+ clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
+ const override;
unsigned getSRetReturnReg() const { return SRetReturnReg; }
void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
diff --git a/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp b/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
index 8edcb745d654..f539717e42d5 100644
--- a/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -61,8 +61,7 @@ static cl::opt<int> DbgNVJCount("nvj-count", cl::init(-1), cl::Hidden,
"New Value Jump"));
static cl::opt<bool> DisableNewValueJumps("disable-nvjump", cl::Hidden,
- cl::ZeroOrMore, cl::init(false),
- cl::desc("Disable New Value Jumps"));
+ cl::desc("Disable New Value Jumps"));
namespace llvm {
diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td
index 3abbd896c519..80fbf33d83b7 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatterns.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td
@@ -3273,3 +3273,9 @@ let AddedComplexity = 100 in {
def: Pat<(i1 (seteq (int_hexagon_S4_stored_locked I32:$Rs, I64:$Rt), 0)),
(C2_not (S4_stored_locked I32:$Rs, I64:$Rt))>;
}
+
+def: Pat<(int_hexagon_instrprof_custom (HexagonAtPcrel tglobaladdr:$addr), u32_0ImmPred:$I),
+ (PS_call_instrprof_custom tglobaladdr:$addr, imm:$I)>;
+
+def: Pat<(int_hexagon_instrprof_custom (HexagonCONST32 tglobaladdr:$addr), u32_0ImmPred:$I),
+ (PS_call_instrprof_custom tglobaladdr:$addr, imm:$I)>;
diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
index 0a3dff057ccd..6fb1313667a9 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
@@ -37,7 +37,7 @@ def SDTHexagonVINSERTW0: SDTypeProfile<1, 2,
def HexagonVINSERTW0: SDNode<"HexagonISD::VINSERTW0", SDTHexagonVINSERTW0>;
def HwLen2: SDNodeXForm<imm, [{
- const auto &ST = static_cast<const HexagonSubtarget&>(CurDAG->getSubtarget());
+ const auto &ST = CurDAG->getSubtarget<HexagonSubtarget>();
return CurDAG->getTargetConstant(ST.getVectorLength()/2, SDLoc(N), MVT::i32);
}]>;
@@ -92,19 +92,19 @@ def IsVecOff : PatLeaf<(i32 imm), [{
def alignedload: PatFrag<(ops node:$a), (load $a), [{
- return isAlignedMemNode(dyn_cast<MemSDNode>(N));
+ return isAlignedMemNode(cast<MemSDNode>(N));
}]>;
def unalignedload: PatFrag<(ops node:$a), (load $a), [{
- return !isAlignedMemNode(dyn_cast<MemSDNode>(N));
+ return !isAlignedMemNode(cast<MemSDNode>(N));
}]>;
def alignedstore: PatFrag<(ops node:$v, node:$a), (store $v, $a), [{
- return isAlignedMemNode(dyn_cast<MemSDNode>(N));
+ return isAlignedMemNode(cast<MemSDNode>(N));
}]>;
def unalignedstore: PatFrag<(ops node:$v, node:$a), (store $v, $a), [{
- return !isAlignedMemNode(dyn_cast<MemSDNode>(N));
+ return !isAlignedMemNode(cast<MemSDNode>(N));
}]>;
@@ -738,9 +738,14 @@ let Predicates = [UseHVX] in {
def V2Q: OutPatFrag<(ops node:$Vs), (V6_vandvrt $Vs, (A2_tfrsi -1))>;
-let Predicates = [UseHVX] in
- def: Pat<(select I1:$Pu, VecI1:$Qs, VecI1:$Qt),
+let Predicates = [UseHVX] in {
+ def: Pat<(select I1:$Pu, VecQ8:$Qs, VecQ8:$Qt),
+ (V2Q (PS_vselect $Pu, (Q2V $Qs), (Q2V $Qt)))>;
+ def: Pat<(select I1:$Pu, VecQ16:$Qs, VecQ16:$Qt),
(V2Q (PS_vselect $Pu, (Q2V $Qs), (Q2V $Qt)))>;
+ def: Pat<(select I1:$Pu, VecQ32:$Qs, VecQ32:$Qt),
+ (V2Q (PS_vselect $Pu, (Q2V $Qs), (Q2V $Qt)))>;
+}
let Predicates = [UseHVX] in {
def: Pat<(VecQ8 (qtrue)), (PS_qtrue)>;
diff --git a/llvm/lib/Target/Hexagon/HexagonPeephole.cpp b/llvm/lib/Target/Hexagon/HexagonPeephole.cpp
index 1ff248200572..ccd90f814813 100644
--- a/llvm/lib/Target/Hexagon/HexagonPeephole.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -56,21 +56,21 @@ using namespace llvm;
#define DEBUG_TYPE "hexagon-peephole"
-static cl::opt<bool> DisableHexagonPeephole("disable-hexagon-peephole",
- cl::Hidden, cl::ZeroOrMore, cl::init(false),
- cl::desc("Disable Peephole Optimization"));
+static cl::opt<bool>
+ DisableHexagonPeephole("disable-hexagon-peephole", cl::Hidden,
+ cl::desc("Disable Peephole Optimization"));
-static cl::opt<bool> DisablePNotP("disable-hexagon-pnotp",
- cl::Hidden, cl::ZeroOrMore, cl::init(false),
- cl::desc("Disable Optimization of PNotP"));
+static cl::opt<bool> DisablePNotP("disable-hexagon-pnotp", cl::Hidden,
+ cl::desc("Disable Optimization of PNotP"));
-static cl::opt<bool> DisableOptSZExt("disable-hexagon-optszext",
- cl::Hidden, cl::ZeroOrMore, cl::init(true),
- cl::desc("Disable Optimization of Sign/Zero Extends"));
+static cl::opt<bool>
+ DisableOptSZExt("disable-hexagon-optszext", cl::Hidden, cl::init(true),
+ cl::desc("Disable Optimization of Sign/Zero Extends"));
-static cl::opt<bool> DisableOptExtTo64("disable-hexagon-opt-ext-to-64",
- cl::Hidden, cl::ZeroOrMore, cl::init(true),
- cl::desc("Disable Optimization of extensions to i64."));
+static cl::opt<bool>
+ DisableOptExtTo64("disable-hexagon-opt-ext-to-64", cl::Hidden,
+ cl::init(true),
+ cl::desc("Disable Optimization of extensions to i64."));
namespace llvm {
FunctionPass *createHexagonPeephole();
@@ -208,14 +208,14 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
// Try to find in the map.
if (unsigned PeepholeSrc = PeepholeMap.lookup(SrcReg)) {
// Change the 1st operand.
- MI.RemoveOperand(1);
+ MI.removeOperand(1);
MI.addOperand(MachineOperand::CreateReg(PeepholeSrc, false));
} else {
DenseMap<unsigned, std::pair<unsigned, unsigned> >::iterator DI =
PeepholeDoubleRegsMap.find(SrcReg);
if (DI != PeepholeDoubleRegsMap.end()) {
std::pair<unsigned,unsigned> PeepholeSrc = DI->second;
- MI.RemoveOperand(1);
+ MI.removeOperand(1);
MI.addOperand(MachineOperand::CreateReg(
PeepholeSrc.first, false /*isDef*/, false /*isImp*/,
false /*isKill*/, false /*isDead*/, false /*isUndef*/,
diff --git a/llvm/lib/Target/Hexagon/HexagonPseudo.td b/llvm/lib/Target/Hexagon/HexagonPseudo.td
index afd63d6d4aa7..7c45568f7734 100644
--- a/llvm/lib/Target/Hexagon/HexagonPseudo.td
+++ b/llvm/lib/Target/Hexagon/HexagonPseudo.td
@@ -182,6 +182,28 @@ let isCodeGenOnly = 1, isCall = 1, hasSideEffects = 1,
Defs = [PC, R31, R6, R7, P0] in
def PS_call_stk : T_Call<"">;
+// This pseudo instruction is used to replace int_hexagon_instrprof_custom intrinsic
+// with a call to custom handler passed as the first argument to the intrinsic.
+
+// Pleae Note:
+// 1) The call to the custom handler is being treated as a special one as the
+// callee is responsible for saving and restoring all the registers it needs
+// to modify. This includes caller saved registers as well as r0-r5 argument
+// registers. This is done to reduce the impact of instrumentation on the
+// code being instrumented/profiled.
+// 2) R14, R15 and R28 are reserved for PLT handling and therefore are
+// part of the def list.
+// 3) R0 is used to pass the unique id associated with an instrumentation site
+// to the handler.
+// 4) All the other registers (R29, R30, R31, PC) get modified by the call
+// instruction.
+
+// TODO: It may be a good idea to add a separate pseudo instruction for
+// static relocation which doesn't need to reserve r14, r15 and r28.
+
+let hasSideEffects = 1, isCall = 1, Defs = [R0, R14, R15, R28, R29, R30, R31, PC] in
+def PS_call_instrprof_custom : Pseudo<(outs), (ins s32_0Imm:$dst, u32_0Imm:$Ii), "">;
+
// Call, no return.
let isCall = 1, hasSideEffects = 1, cofMax1 = 1, isCodeGenOnly = 1 in
def PS_callr_nr: InstHexagon<(outs), (ins IntRegs:$Rs),
diff --git a/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp b/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp
index f26e23befde2..fb6918949cce 100644
--- a/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp
@@ -201,7 +201,7 @@ void HexagonDCE::removeOperand(NodeAddr<InstrNode*> IA, unsigned OpNum) {
for (NodeAddr<RefNode*> RA : Refs)
OpMap.insert(std::make_pair(RA.Id, getOpNum(RA.Addr->getOp())));
- MI->RemoveOperand(OpNum);
+ MI->removeOperand(OpNum);
for (NodeAddr<RefNode*> RA : Refs) {
unsigned N = OpMap[RA.Id];
diff --git a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index 6e55bc6b5c2c..f0e56d74fcd1 100644
--- a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -228,7 +228,7 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
case Hexagon::PS_fia:
MI.setDesc(HII.get(Hexagon::A2_addi));
MI.getOperand(FIOp).ChangeToImmediate(RealOffset);
- MI.RemoveOperand(FIOp+1);
+ MI.removeOperand(FIOp+1);
return;
case Hexagon::PS_fi:
// Set up the instruction for updating below.
diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
index bdd2a2cfc5fa..2283d1b7f9c6 100644
--- a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -39,45 +39,46 @@ using namespace llvm;
#define GET_SUBTARGETINFO_TARGET_DESC
#include "HexagonGenSubtargetInfo.inc"
-static cl::opt<bool> EnableBSBSched("enable-bsb-sched",
- cl::Hidden, cl::ZeroOrMore, cl::init(true));
+static cl::opt<bool> EnableBSBSched("enable-bsb-sched", cl::Hidden,
+ cl::init(true));
-static cl::opt<bool> EnableTCLatencySched("enable-tc-latency-sched",
- cl::Hidden, cl::ZeroOrMore, cl::init(false));
+static cl::opt<bool> EnableTCLatencySched("enable-tc-latency-sched", cl::Hidden,
+ cl::init(false));
-static cl::opt<bool> EnableDotCurSched("enable-cur-sched",
- cl::Hidden, cl::ZeroOrMore, cl::init(true),
- cl::desc("Enable the scheduler to generate .cur"));
+static cl::opt<bool>
+ EnableDotCurSched("enable-cur-sched", cl::Hidden, cl::init(true),
+ cl::desc("Enable the scheduler to generate .cur"));
-static cl::opt<bool> DisableHexagonMISched("disable-hexagon-misched",
- cl::Hidden, cl::ZeroOrMore, cl::init(false),
- cl::desc("Disable Hexagon MI Scheduling"));
+static cl::opt<bool>
+ DisableHexagonMISched("disable-hexagon-misched", cl::Hidden,
+ cl::desc("Disable Hexagon MI Scheduling"));
-static cl::opt<bool> EnableSubregLiveness("hexagon-subreg-liveness",
- cl::Hidden, cl::ZeroOrMore, cl::init(true),
- cl::desc("Enable subregister liveness tracking for Hexagon"));
+static cl::opt<bool> EnableSubregLiveness(
+ "hexagon-subreg-liveness", cl::Hidden, cl::init(true),
+ cl::desc("Enable subregister liveness tracking for Hexagon"));
-static cl::opt<bool> OverrideLongCalls("hexagon-long-calls",
- cl::Hidden, cl::ZeroOrMore, cl::init(false),
- cl::desc("If present, forces/disables the use of long calls"));
+static cl::opt<bool> OverrideLongCalls(
+ "hexagon-long-calls", cl::Hidden,
+ cl::desc("If present, forces/disables the use of long calls"));
-static cl::opt<bool> EnablePredicatedCalls("hexagon-pred-calls",
- cl::Hidden, cl::ZeroOrMore, cl::init(false),
- cl::desc("Consider calls to be predicable"));
+static cl::opt<bool>
+ EnablePredicatedCalls("hexagon-pred-calls", cl::Hidden,
+ cl::desc("Consider calls to be predicable"));
-static cl::opt<bool> SchedPredsCloser("sched-preds-closer",
- cl::Hidden, cl::ZeroOrMore, cl::init(true));
+static cl::opt<bool> SchedPredsCloser("sched-preds-closer", cl::Hidden,
+ cl::init(true));
static cl::opt<bool> SchedRetvalOptimization("sched-retval-optimization",
- cl::Hidden, cl::ZeroOrMore, cl::init(true));
+ cl::Hidden, cl::init(true));
-static cl::opt<bool> EnableCheckBankConflict("hexagon-check-bank-conflict",
- cl::Hidden, cl::ZeroOrMore, cl::init(true),
- cl::desc("Enable checking for cache bank conflicts"));
+static cl::opt<bool> EnableCheckBankConflict(
+ "hexagon-check-bank-conflict", cl::Hidden, cl::init(true),
+ cl::desc("Enable checking for cache bank conflicts"));
static cl::opt<bool> EnableV68FloatCodeGen(
- "force-hvx-float", cl::Hidden, cl::ZeroOrMore, cl::init(false),
- cl::desc("Enable the code-generation for vector float instructions on v68."));
+ "force-hvx-float", cl::Hidden,
+ cl::desc(
+ "Enable the code-generation for vector float instructions on v68."));
HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU,
StringRef FS, const TargetMachine &TM)
@@ -95,8 +96,7 @@ HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU,
HexagonSubtarget &
HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
- Optional<Hexagon::ArchEnum> ArchVer =
- Hexagon::GetCpu(Hexagon::CpuTable, CPUString);
+ Optional<Hexagon::ArchEnum> ArchVer = Hexagon::getCpu(CPUString);
if (ArchVer)
HexagonArchVersion = *ArchVer;
else
diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/llvm/lib/Target/Hexagon/HexagonSubtarget.h
index db682676cf12..f6c70928c2f6 100644
--- a/llvm/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.h
@@ -13,7 +13,7 @@
#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONSUBTARGET_H
#define LLVM_LIB_TARGET_HEXAGON_HEXAGONSUBTARGET_H
-#include "HexagonArch.h"
+#include "HexagonDepArch.h"
#include "HexagonFrameLowering.h"
#include "HexagonISelLowering.h"
#include "HexagonInstrInfo.h"
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
index c6703bb8a62a..4e04939e6690 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -32,41 +32,44 @@
using namespace llvm;
-static cl::opt<bool> EnableCExtOpt("hexagon-cext", cl::Hidden, cl::ZeroOrMore,
- cl::init(true), cl::desc("Enable Hexagon constant-extender optimization"));
+static cl::opt<bool>
+ EnableCExtOpt("hexagon-cext", cl::Hidden, cl::init(true),
+ cl::desc("Enable Hexagon constant-extender optimization"));
-static cl::opt<bool> EnableRDFOpt("rdf-opt", cl::Hidden, cl::ZeroOrMore,
- cl::init(true), cl::desc("Enable RDF-based optimizations"));
+static cl::opt<bool> EnableRDFOpt("rdf-opt", cl::Hidden, cl::init(true),
+ cl::desc("Enable RDF-based optimizations"));
static cl::opt<bool> DisableHardwareLoops("disable-hexagon-hwloops",
cl::Hidden, cl::desc("Disable Hardware Loops for Hexagon target"));
-static cl::opt<bool> DisableAModeOpt("disable-hexagon-amodeopt",
- cl::Hidden, cl::ZeroOrMore, cl::init(false),
- cl::desc("Disable Hexagon Addressing Mode Optimization"));
+static cl::opt<bool>
+ DisableAModeOpt("disable-hexagon-amodeopt", cl::Hidden,
+ cl::desc("Disable Hexagon Addressing Mode Optimization"));
-static cl::opt<bool> DisableHexagonCFGOpt("disable-hexagon-cfgopt",
- cl::Hidden, cl::ZeroOrMore, cl::init(false),
- cl::desc("Disable Hexagon CFG Optimization"));
+static cl::opt<bool>
+ DisableHexagonCFGOpt("disable-hexagon-cfgopt", cl::Hidden,
+ cl::desc("Disable Hexagon CFG Optimization"));
-static cl::opt<bool> DisableHCP("disable-hcp", cl::init(false), cl::Hidden,
- cl::ZeroOrMore, cl::desc("Disable Hexagon constant propagation"));
+static cl::opt<bool>
+ DisableHCP("disable-hcp", cl::Hidden,
+ cl::desc("Disable Hexagon constant propagation"));
static cl::opt<bool> DisableStoreWidening("disable-store-widen",
cl::Hidden, cl::init(false), cl::desc("Disable store widening"));
static cl::opt<bool> EnableExpandCondsets("hexagon-expand-condsets",
- cl::init(true), cl::Hidden, cl::ZeroOrMore,
- cl::desc("Early expansion of MUX"));
+ cl::init(true), cl::Hidden,
+ cl::desc("Early expansion of MUX"));
static cl::opt<bool> EnableEarlyIf("hexagon-eif", cl::init(true), cl::Hidden,
- cl::ZeroOrMore, cl::desc("Enable early if-conversion"));
+ cl::desc("Enable early if-conversion"));
static cl::opt<bool> EnableGenInsert("hexagon-insert", cl::init(true),
cl::Hidden, cl::desc("Generate \"insert\" instructions"));
-static cl::opt<bool> EnableCommGEP("hexagon-commgep", cl::init(true),
- cl::Hidden, cl::ZeroOrMore, cl::desc("Enable commoning of GEP instructions"));
+static cl::opt<bool>
+ EnableCommGEP("hexagon-commgep", cl::init(true), cl::Hidden,
+ cl::desc("Enable commoning of GEP instructions"));
static cl::opt<bool> EnableGenExtract("hexagon-extract", cl::init(true),
cl::Hidden, cl::desc("Generate \"extract\" instructions"));
@@ -78,9 +81,9 @@ static cl::opt<bool> EnableGenPred("hexagon-gen-pred", cl::init(true),
cl::Hidden, cl::desc("Enable conversion of arithmetic operations to "
"predicate instructions"));
-static cl::opt<bool> EnableLoopPrefetch("hexagon-loop-prefetch",
- cl::init(false), cl::Hidden, cl::ZeroOrMore,
- cl::desc("Enable loop data prefetch on Hexagon"));
+static cl::opt<bool>
+ EnableLoopPrefetch("hexagon-loop-prefetch", cl::Hidden,
+ cl::desc("Enable loop data prefetch on Hexagon"));
static cl::opt<bool> DisableHSDR("disable-hsdr", cl::init(false), cl::Hidden,
cl::desc("Disable splitting double registers"));
@@ -94,22 +97,24 @@ static cl::opt<bool> EnableLoopResched("hexagon-loop-resched", cl::init(true),
static cl::opt<bool> HexagonNoOpt("hexagon-noopt", cl::init(false),
cl::Hidden, cl::desc("Disable backend optimizations"));
-static cl::opt<bool> EnableVectorPrint("enable-hexagon-vector-print",
- cl::Hidden, cl::ZeroOrMore, cl::init(false),
- cl::desc("Enable Hexagon Vector print instr pass"));
+static cl::opt<bool>
+ EnableVectorPrint("enable-hexagon-vector-print", cl::Hidden,
+ cl::desc("Enable Hexagon Vector print instr pass"));
-static cl::opt<bool> EnableVExtractOpt("hexagon-opt-vextract", cl::Hidden,
- cl::ZeroOrMore, cl::init(true), cl::desc("Enable vextract optimization"));
+static cl::opt<bool>
+ EnableVExtractOpt("hexagon-opt-vextract", cl::Hidden, cl::init(true),
+ cl::desc("Enable vextract optimization"));
-static cl::opt<bool> EnableVectorCombine("hexagon-vector-combine", cl::Hidden,
- cl::ZeroOrMore, cl::init(true), cl::desc("Enable HVX vector combining"));
+static cl::opt<bool>
+ EnableVectorCombine("hexagon-vector-combine", cl::Hidden, cl::init(true),
+ cl::desc("Enable HVX vector combining"));
-static cl::opt<bool> EnableInitialCFGCleanup("hexagon-initial-cfg-cleanup",
- cl::Hidden, cl::ZeroOrMore, cl::init(true),
- cl::desc("Simplify the CFG after atomic expansion pass"));
+static cl::opt<bool> EnableInitialCFGCleanup(
+ "hexagon-initial-cfg-cleanup", cl::Hidden, cl::init(true),
+ cl::desc("Simplify the CFG after atomic expansion pass"));
static cl::opt<bool> EnableInstSimplify("hexagon-instsimplify", cl::Hidden,
- cl::ZeroOrMore, cl::init(true),
+ cl::init(true),
cl::desc("Enable instsimplify"));
/// HexagonTargetMachineModule - Note that this is used on hosts that
@@ -189,7 +194,7 @@ namespace llvm {
} // end namespace llvm;
static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
- return RM.getValueOr(Reloc::Static);
+ return RM.value_or(Reloc::Static);
}
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeHexagonTarget() {
@@ -293,12 +298,11 @@ void HexagonTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
}
TargetTransformInfo
-HexagonTargetMachine::getTargetTransformInfo(const Function &F) {
+HexagonTargetMachine::getTargetTransformInfo(const Function &F) const {
return TargetTransformInfo(HexagonTTIImpl(this, F));
}
-
-HexagonTargetMachine::~HexagonTargetMachine() {}
+HexagonTargetMachine::~HexagonTargetMachine() = default;
namespace {
/// Hexagon Code Generator Pass Configuration Options.
@@ -345,6 +349,7 @@ void HexagonPassConfig::addIRPasses() {
if (EnableInitialCFGCleanup)
addPass(createCFGSimplificationPass(SimplifyCFGOptions()
.forwardSwitchCondToPhi(true)
+ .convertSwitchRangeToICmp(true)
.convertSwitchToLookupTable(true)
.needCanonicalLoops(false)
.hoistCommonInsts(true)
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.h b/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
index 66679df93bd3..947df7574ab3 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
+++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -39,7 +39,7 @@ public:
void adjustPassManager(PassManagerBuilder &PMB) override;
void registerPassBuilderCallbacks(PassBuilder &PB) override;
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
- TargetTransformInfo getTargetTransformInfo(const Function &F) override;
+ TargetTransformInfo getTargetTransformInfo(const Function &F) const override;
HexagonTargetObjectFile *getObjFileLowering() const override {
return static_cast<HexagonTargetObjectFile*>(TLOF.get());
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
index 7df32e4072e3..c83ed16f0272 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -41,9 +41,9 @@ static cl::opt<unsigned> SmallDataThreshold("hexagon-small-data-threshold",
static cl::opt<bool> NoSmallDataSorting("mno-sort-sda", cl::init(false),
cl::Hidden, cl::desc("Disable small data sections sorting"));
-static cl::opt<bool> StaticsInSData("hexagon-statics-in-small-data",
- cl::init(false), cl::Hidden, cl::ZeroOrMore,
- cl::desc("Allow static variables in .sdata"));
+static cl::opt<bool>
+ StaticsInSData("hexagon-statics-in-small-data", cl::Hidden,
+ cl::desc("Allow static variables in .sdata"));
static cl::opt<bool> TraceGVPlacement("trace-gv-placement",
cl::Hidden, cl::init(false),
@@ -332,6 +332,7 @@ unsigned HexagonTargetObjectFile::getSmallestAddressableSize(const Type *Ty,
case Type::X86_MMXTyID:
case Type::X86_AMXTyID:
case Type::TokenTyID:
+ case Type::DXILPointerTyID:
return 0;
}
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
index 1bdd8c3c513a..bb0aaa3150fb 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@@ -223,7 +223,8 @@ HexagonTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
InstructionCost HexagonTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
ArrayRef<int> Mask, int Index,
- Type *SubTp) {
+ Type *SubTp,
+ ArrayRef<const Value *> Args) {
return 1;
}
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
index 9e637dfc3e16..7bbaf7ae9cb2 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
@@ -86,12 +86,11 @@ public:
unsigned getMinVectorRegisterBitWidth() const;
ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const;
- bool shouldMaximizeVectorBandwidth() const {
+ bool
+ shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const {
return true;
}
- bool supportsEfficientVectorElementLoadStore() {
- return false;
- }
+ bool supportsEfficientVectorElementLoadStore() { return false; }
bool hasBranchDivergence() {
return false;
}
@@ -125,7 +124,8 @@ public:
Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind);
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
- ArrayRef<int> Mask, int Index, Type *SubTp);
+ ArrayRef<int> Mask, int Index, Type *SubTp,
+ ArrayRef<const Value *> Args = None);
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
const Value *Ptr, bool VariableMask,
Align Alignment,
diff --git a/llvm/lib/Target/Hexagon/HexagonVExtract.cpp b/llvm/lib/Target/Hexagon/HexagonVExtract.cpp
index b5f06ebd3189..845fa1e49578 100644
--- a/llvm/lib/Target/Hexagon/HexagonVExtract.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVExtract.cpp
@@ -27,9 +27,9 @@
using namespace llvm;
-static cl::opt<unsigned> VExtractThreshold("hexagon-vextract-threshold",
- cl::Hidden, cl::ZeroOrMore, cl::init(1),
- cl::desc("Threshold for triggering vextract replacement"));
+static cl::opt<unsigned> VExtractThreshold(
+ "hexagon-vextract-threshold", cl::Hidden, cl::init(1),
+ cl::desc("Threshold for triggering vextract replacement"));
namespace llvm {
void initializeHexagonVExtractPass(PassRegistry& Registry);
@@ -106,8 +106,7 @@ bool HexagonVExtract::runOnMachineFunction(MachineFunction &MF) {
MachineFrameInfo &MFI = MF.getFrameInfo();
Register AR =
MF.getInfo<HexagonMachineFunctionInfo>()->getStackAlignBaseVReg();
- std::map<unsigned, SmallVector<MachineInstr*,4>> VExtractMap;
- MaybeAlign MaxAlign;
+ std::map<unsigned, SmallVector<MachineInstr *, 4>> VExtractMap;
bool Changed = false;
for (MachineBasicBlock &MBB : MF) {
@@ -131,6 +130,7 @@ bool HexagonVExtract::runOnMachineFunction(MachineFunction &MF) {
return AddrR;
};
+ MaybeAlign MaxAlign;
for (auto &P : VExtractMap) {
unsigned VecR = P.first;
if (P.second.size() <= VExtractThreshold)
@@ -138,7 +138,7 @@ bool HexagonVExtract::runOnMachineFunction(MachineFunction &MF) {
const auto &VecRC = *MRI.getRegClass(VecR);
Align Alignment = HRI.getSpillAlign(VecRC);
- MaxAlign = max(MaxAlign, Alignment);
+ MaxAlign = std::max(MaxAlign.valueOrOne(), Alignment);
// Make sure this is not a spill slot: spill slots cannot be aligned
// if there are variable-sized objects on the stack. They must be
// accessible via FP (which is not aligned), because SP is unknown,
diff --git a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index e9b658d18175..54d33a4113e7 100644
--- a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -55,24 +55,25 @@ using namespace llvm;
#define DEBUG_TYPE "packets"
-static cl::opt<bool> DisablePacketizer("disable-packetizer", cl::Hidden,
- cl::ZeroOrMore, cl::init(false),
- cl::desc("Disable Hexagon packetizer pass"));
+static cl::opt<bool>
+ DisablePacketizer("disable-packetizer", cl::Hidden,
+ cl::desc("Disable Hexagon packetizer pass"));
static cl::opt<bool> Slot1Store("slot1-store-slot0-load", cl::Hidden,
- cl::ZeroOrMore, cl::init(true),
+ cl::init(true),
cl::desc("Allow slot1 store and slot0 load"));
-static cl::opt<bool> PacketizeVolatiles("hexagon-packetize-volatiles",
- cl::ZeroOrMore, cl::Hidden, cl::init(true),
- cl::desc("Allow non-solo packetization of volatile memory references"));
+static cl::opt<bool> PacketizeVolatiles(
+ "hexagon-packetize-volatiles", cl::Hidden, cl::init(true),
+ cl::desc("Allow non-solo packetization of volatile memory references"));
-static cl::opt<bool> EnableGenAllInsnClass("enable-gen-insn", cl::init(false),
- cl::Hidden, cl::ZeroOrMore, cl::desc("Generate all instruction with TC"));
+static cl::opt<bool>
+ EnableGenAllInsnClass("enable-gen-insn", cl::Hidden,
+ cl::desc("Generate all instruction with TC"));
-static cl::opt<bool> DisableVecDblNVStores("disable-vecdbl-nv-stores",
- cl::init(false), cl::Hidden, cl::ZeroOrMore,
- cl::desc("Disable vector double new-value-stores"));
+static cl::opt<bool>
+ DisableVecDblNVStores("disable-vecdbl-nv-stores", cl::Hidden,
+ cl::desc("Disable vector double new-value-stores"));
extern cl::opt<bool> ScheduleInlineAsm;
diff --git a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
index 6aca8d807872..abd84a188cfa 100644
--- a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
@@ -1310,7 +1310,7 @@ auto HexagonVectorCombine::calculatePointerDifference(Value *Ptr0,
auto Simplify = [&](Value *V) {
if (auto *I = dyn_cast<Instruction>(V)) {
SimplifyQuery Q(DL, &TLI, &DT, &AC, I);
- if (Value *S = SimplifyInstruction(I, Q))
+ if (Value *S = simplifyInstruction(I, Q))
return S;
}
return V;
@@ -1404,7 +1404,7 @@ auto HexagonVectorCombine::isSafeToMoveBeforeInBB(const Instruction &In,
if (isa<PHINode>(In) || (To != Block.end() && isa<PHINode>(*To)))
return false;
- if (!mayBeMemoryDependent(In))
+ if (!mayHaveNonDefUseDependency(In))
return true;
bool MayWrite = In.mayWriteToMemory();
auto MaybeLoc = getLocOrNone(In);
diff --git a/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp b/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
index 94b878e21f4d..2b004a9c5ad4 100644
--- a/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
@@ -53,10 +53,10 @@ using namespace llvm;
STATISTIC(HexagonNumVectorLoopCarriedReuse,
"Number of values that were reused from a previous iteration.");
-static cl::opt<int> HexagonVLCRIterationLim("hexagon-vlcr-iteration-lim",
- cl::Hidden,
+static cl::opt<int> HexagonVLCRIterationLim(
+ "hexagon-vlcr-iteration-lim", cl::Hidden,
cl::desc("Maximum distance of loop carried dependences that are handled"),
- cl::init(2), cl::ZeroOrMore);
+ cl::init(2));
namespace llvm {
diff --git a/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.h b/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.h
index f1e0c5804ace..f826b2eb568f 100644
--- a/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.h
+++ b/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.h
@@ -127,7 +127,7 @@ class Loop;
/// Hexagon Vector Loop Carried Reuse Pass
struct HexagonVectorLoopCarriedReusePass
: public PassInfoMixin<HexagonVectorLoopCarriedReusePass> {
- HexagonVectorLoopCarriedReusePass() {}
+ HexagonVectorLoopCarriedReusePass() = default;
/// Run pass over the Loop.
PreservedAnalyses run(Loop &L, LoopAnalysisManager &LAM,
diff --git a/llvm/lib/Target/Hexagon/HexagonVectorPrint.cpp b/llvm/lib/Target/Hexagon/HexagonVectorPrint.cpp
index fbc5e5c344ed..b09a393f7dd5 100644
--- a/llvm/lib/Target/Hexagon/HexagonVectorPrint.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVectorPrint.cpp
@@ -36,9 +36,9 @@ using namespace llvm;
#define DEBUG_TYPE "hexagon-vector-print"
-static cl::opt<bool> TraceHexVectorStoresOnly("trace-hex-vector-stores-only",
- cl::Hidden, cl::ZeroOrMore, cl::init(false),
- cl::desc("Enables tracing of vector stores"));
+static cl::opt<bool>
+ TraceHexVectorStoresOnly("trace-hex-vector-stores-only", cl::Hidden,
+ cl::desc("Enables tracing of vector stores"));
namespace llvm {
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
index 5e5a26fea076..37866a73ed0f 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -21,6 +21,7 @@
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/EndianStream.h"
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
index e5e5d08937ef..f3da67562320 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
@@ -34,5 +34,4 @@ HexagonMCAsmInfo::HexagonMCAsmInfo(const Triple &TT) {
UsesELFSectionDirectiveForBSS = true;
ExceptionsType = ExceptionHandling::DwarfCFI;
UseLogicalShr = false;
- UseIntegratedAssembler = false;
}
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
index 8a866cfe9161..18ff901d6441 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
@@ -22,6 +22,7 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/SourceMgr.h"
#include <cassert>
@@ -29,8 +30,8 @@
using namespace llvm;
static cl::opt<bool>
- RelaxNVChecks("relax-nv-checks", cl::init(false), cl::ZeroOrMore,
- cl::Hidden, cl::desc("Relax checks of new-value validity"));
+ RelaxNVChecks("relax-nv-checks", cl::Hidden,
+ cl::desc("Relax checks of new-value validity"));
const HexagonMCChecker::PredSense
HexagonMCChecker::Unconditional(Hexagon::NoRegister, false);
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
index f8ac35aed7c0..ed2856eb1fe9 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
@@ -789,7 +789,6 @@ HexagonMCCodeEmitter::getMachineOpValue(MCInst const &MI, MCOperand const &MO,
}
MCCodeEmitter *llvm::createHexagonMCCodeEmitter(MCInstrInfo const &MII,
- MCRegisterInfo const &MRI,
MCContext &MCT) {
return new HexagonMCCodeEmitter(MII, MCT);
}
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
index 0624214d284b..49725801f046 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
@@ -108,7 +108,7 @@ void HexagonMCELFStreamer::HexagonMCEmitCommonSymbol(MCSymbol *Symbol,
MCSection &Section = *getAssembler().getContext().getELFSection(
SectionName, ELF::SHT_NOBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
MCSectionSubPair P = getCurrentSection();
- SwitchSection(&Section);
+ switchSection(&Section);
if (ELFSymbol->isUndefined()) {
emitValueToAlignment(ByteAlignment, 0, 1, 0);
@@ -120,7 +120,7 @@ void HexagonMCELFStreamer::HexagonMCEmitCommonSymbol(MCSymbol *Symbol,
if (Align(ByteAlignment) > Section.getAlignment())
Section.setAlignment(Align(ByteAlignment));
- SwitchSection(P.first, P.second);
+ switchSection(P.first, P.second);
} else {
if (ELFSymbol->declareCommon(Size, ByteAlignment))
report_fatal_error("Symbol: " + Symbol->getName() +
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp
index 1e708ba1bcd3..ab5e9eb4eca6 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp
@@ -13,6 +13,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index 6a08d7503bac..d068baf05998 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -11,7 +11,7 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/HexagonMCTargetDesc.h"
-#include "HexagonArch.h"
+#include "HexagonDepArch.h"
#include "HexagonTargetStreamer.h"
#include "MCTargetDesc/HexagonInstPrinter.h"
#include "MCTargetDesc/HexagonMCAsmInfo.h"
@@ -22,6 +22,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDwarf.h"
@@ -409,8 +410,8 @@ std::string selectHexagonFS(StringRef CPU, StringRef FS) {
}
}
-static bool isCPUValid(const std::string &CPU) {
- return Hexagon::CpuTable.find(CPU) != Hexagon::CpuTable.cend();
+static bool isCPUValid(StringRef CPU) {
+ return Hexagon::getCpu(CPU).has_value();
}
namespace {
@@ -559,12 +560,18 @@ void Hexagon_MC::addArchSubtarget(MCSubtargetInfo const *STI,
}
unsigned Hexagon_MC::GetELFFlags(const MCSubtargetInfo &STI) {
- using llvm::Hexagon::ElfFlagsByCpuStr;
-
- const std::string CPU(STI.getCPU().str());
- auto F = ElfFlagsByCpuStr.find(CPU);
- assert(F != ElfFlagsByCpuStr.end() && "Unrecognized Architecture");
- return F->second;
+ return StringSwitch<unsigned>(STI.getCPU())
+ .Case("generic", llvm::ELF::EF_HEXAGON_MACH_V5)
+ .Case("hexagonv5", llvm::ELF::EF_HEXAGON_MACH_V5)
+ .Case("hexagonv55", llvm::ELF::EF_HEXAGON_MACH_V55)
+ .Case("hexagonv60", llvm::ELF::EF_HEXAGON_MACH_V60)
+ .Case("hexagonv62", llvm::ELF::EF_HEXAGON_MACH_V62)
+ .Case("hexagonv65", llvm::ELF::EF_HEXAGON_MACH_V65)
+ .Case("hexagonv66", llvm::ELF::EF_HEXAGON_MACH_V66)
+ .Case("hexagonv67", llvm::ELF::EF_HEXAGON_MACH_V67)
+ .Case("hexagonv67t", llvm::ELF::EF_HEXAGON_MACH_V67T)
+ .Case("hexagonv68", llvm::ELF::EF_HEXAGON_MACH_V68)
+ .Case("hexagonv69", llvm::ELF::EF_HEXAGON_MACH_V69);
}
llvm::ArrayRef<MCPhysReg> Hexagon_MC::GetVectRegRev() {
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
index 5bf7c9a1a908..d717e710f3c0 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@@ -85,7 +85,6 @@ namespace Hexagon_MC {
}
MCCodeEmitter *createHexagonMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &MCT);
MCAsmBackend *createHexagonAsmBackend(const Target &T,
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
index d82731e153fe..c8805296017d 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
@@ -295,7 +295,7 @@ void HexagonShuffler::restrictBranchOrder(HexagonPacketSummary const &Summary) {
Summary.branchInsts[0]->Core.setUnits(jumpSlot.first);
Summary.branchInsts[1]->Core.setUnits(jumpSlot.second);
- const bool HasShuffledPacket = tryAuction(Summary).hasValue();
+ const bool HasShuffledPacket = tryAuction(Summary).has_value();
if (HasShuffledPacket)
return;
@@ -599,7 +599,7 @@ void HexagonShuffler::restrictPreferSlot3(HexagonPacketSummary const &Summary,
// and then pin it to slot #3
const unsigned saveUnits = PrefSlot3Inst->Core.getUnits();
PrefSlot3Inst->Core.setUnits(saveUnits & Slot3Mask);
- const bool HasShuffledPacket = tryAuction(Summary).hasValue();
+ const bool HasShuffledPacket = tryAuction(Summary).has_value();
if (HasShuffledPacket)
return;
diff --git a/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp b/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
index 660215ca7435..d715ba901a2b 100644
--- a/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
+++ b/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
@@ -704,14 +704,14 @@ LanaiAsmParser::parseRegister(bool RestoreOnFailure) {
if (Lexer.getKind() == AsmToken::Identifier) {
RegNum = MatchRegisterName(Lexer.getTok().getIdentifier());
if (RegNum == 0) {
- if (PercentTok.hasValue() && RestoreOnFailure)
+ if (PercentTok && RestoreOnFailure)
Lexer.UnLex(PercentTok.getValue());
return nullptr;
}
Parser.Lex(); // Eat identifier token
return LanaiOperand::createReg(RegNum, Start, End);
}
- if (PercentTok.hasValue() && RestoreOnFailure)
+ if (PercentTok && RestoreOnFailure)
Lexer.UnLex(PercentTok.getValue());
return nullptr;
}
diff --git a/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp b/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp
index 57343784237d..e9fecef4ac5b 100644
--- a/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp
+++ b/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp
@@ -16,7 +16,7 @@
#include "LanaiCondCode.h"
#include "LanaiInstrInfo.h"
#include "TargetInfo/LanaiTargetInfo.h"
-#include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/MC/MCDecoderOps.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/TargetRegistry.h"
@@ -45,26 +45,30 @@ LanaiDisassembler::LanaiDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
// Definition is further down.
static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus decodeRiMemoryValue(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus decodeRrMemoryValue(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus decodeSplsValue(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus decodeBranch(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus decodePredicateOperand(MCInst &Inst, unsigned Val,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus decodeShiftImm(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
#include "LanaiGenDisassemblerTables.inc"
@@ -158,7 +162,7 @@ static const unsigned GPRDecoderTable[] = {
DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t /*Address*/,
- const void * /*Decoder*/) {
+ const MCDisassembler * /*Decoder*/) {
if (RegNo > 31)
return MCDisassembler::Fail;
@@ -168,7 +172,8 @@ DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo,
}
static DecodeStatus decodeRiMemoryValue(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
// RI memory values encoded using 23 bits:
// 5 bit register, 16 bit constant
unsigned Register = (Insn >> 18) & 0x1f;
@@ -180,7 +185,8 @@ static DecodeStatus decodeRiMemoryValue(MCInst &Inst, unsigned Insn,
}
static DecodeStatus decodeRrMemoryValue(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
// RR memory values encoded using 20 bits:
// 5 bit register, 5 bit register, 2 bit PQ, 3 bit ALU operator, 5 bit JJJJJ
unsigned Register = (Insn >> 15) & 0x1f;
@@ -192,7 +198,8 @@ static DecodeStatus decodeRrMemoryValue(MCInst &Inst, unsigned Insn,
}
static DecodeStatus decodeSplsValue(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
// RI memory values encoded using 17 bits:
// 5 bit register, 10 bit constant
unsigned Register = (Insn >> 12) & 0x1f;
@@ -206,14 +213,13 @@ static DecodeStatus decodeSplsValue(MCInst &Inst, unsigned Insn,
static bool tryAddingSymbolicOperand(int64_t Value, bool IsBranch,
uint64_t Address, uint64_t Offset,
uint64_t Width, MCInst &MI,
- const void *Decoder) {
- const MCDisassembler *Dis = static_cast<const MCDisassembler *>(Decoder);
- return Dis->tryAddingSymbolicOperand(MI, Value, Address, IsBranch, Offset,
- Width);
+ const MCDisassembler *Decoder) {
+ return Decoder->tryAddingSymbolicOperand(MI, Value, Address, IsBranch, Offset,
+ Width, /*InstSize=*/0);
}
static DecodeStatus decodeBranch(MCInst &MI, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (!tryAddingSymbolicOperand(Insn + Address, false, Address, 2, 23, MI,
Decoder))
MI.addOperand(MCOperand::createImm(Insn));
@@ -221,7 +227,8 @@ static DecodeStatus decodeBranch(MCInst &MI, unsigned Insn, uint64_t Address,
}
static DecodeStatus decodeShiftImm(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned Offset = (Insn & 0xffff);
Inst.addOperand(MCOperand::createImm(SignExtend32<16>(Offset)));
@@ -230,7 +237,7 @@ static DecodeStatus decodeShiftImm(MCInst &Inst, unsigned Insn,
static DecodeStatus decodePredicateOperand(MCInst &Inst, unsigned Val,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (Val >= LPCC::UNKNOWN)
return MCDisassembler::Fail;
Inst.addOperand(MCOperand::createImm(Val));
diff --git a/llvm/lib/Target/Lanai/LanaiISelLowering.cpp b/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
index 010ff80ad42a..832cafb3dabe 100644
--- a/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
+++ b/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
@@ -138,11 +138,7 @@ LanaiTargetLowering::LanaiTargetLowering(const TargetMachine &TM,
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
}
- setTargetDAGCombine(ISD::ADD);
- setTargetDAGCombine(ISD::SUB);
- setTargetDAGCombine(ISD::AND);
- setTargetDAGCombine(ISD::OR);
- setTargetDAGCombine(ISD::XOR);
+ setTargetDAGCombine({ISD::ADD, ISD::SUB, ISD::AND, ISD::OR, ISD::XOR});
// Function alignments
setMinFunctionAlignment(Align(4));
diff --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
index 4217b8509676..bef2458fd126 100644
--- a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
+++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
@@ -592,9 +592,7 @@ bool LanaiInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
}
// If the block has any instructions after a branch, delete them.
- while (std::next(Instruction) != MBB.end()) {
- std::next(Instruction)->eraseFromParent();
- }
+ MBB.erase(std::next(Instruction), MBB.end());
Condition.clear();
FalseBlock = nullptr;
diff --git a/llvm/lib/Target/Lanai/LanaiMachineFunctionInfo.cpp b/llvm/lib/Target/Lanai/LanaiMachineFunctionInfo.cpp
index eeef1d919925..fe8ce1093bd8 100644
--- a/llvm/lib/Target/Lanai/LanaiMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/Lanai/LanaiMachineFunctionInfo.cpp
@@ -11,3 +11,10 @@
using namespace llvm;
void LanaiMachineFunctionInfo::anchor() {}
+
+MachineFunctionInfo *LanaiMachineFunctionInfo::clone(
+ BumpPtrAllocator &Allocator, MachineFunction &DestMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
+ const {
+ return DestMF.cloneInfo<LanaiMachineFunctionInfo>(*this);
+}
diff --git a/llvm/lib/Target/Lanai/LanaiMachineFunctionInfo.h b/llvm/lib/Target/Lanai/LanaiMachineFunctionInfo.h
index de712637b5a4..edf5f2ee087e 100644
--- a/llvm/lib/Target/Lanai/LanaiMachineFunctionInfo.h
+++ b/llvm/lib/Target/Lanai/LanaiMachineFunctionInfo.h
@@ -40,6 +40,10 @@ class LanaiMachineFunctionInfo : public MachineFunctionInfo {
public:
explicit LanaiMachineFunctionInfo(MachineFunction &MF)
: VarArgsFrameIndex(0) {}
+ MachineFunctionInfo *
+ clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
+ const override;
Register getSRetReturnReg() const { return SRetReturnReg; }
void setSRetReturnReg(Register Reg) { SRetReturnReg = Reg; }
diff --git a/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp b/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp
index 70b6fd2c185d..8af40d18d106 100644
--- a/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp
+++ b/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp
@@ -48,7 +48,7 @@ static std::string computeDataLayout() {
}
static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
- return RM.getValueOr(Reloc::PIC_);
+ return RM.value_or(Reloc::PIC_);
}
LanaiTargetMachine::LanaiTargetMachine(const Target &T, const Triple &TT,
@@ -68,7 +68,7 @@ LanaiTargetMachine::LanaiTargetMachine(const Target &T, const Triple &TT,
}
TargetTransformInfo
-LanaiTargetMachine::getTargetTransformInfo(const Function &F) {
+LanaiTargetMachine::getTargetTransformInfo(const Function &F) const {
return TargetTransformInfo(LanaiTTIImpl(this, F));
}
diff --git a/llvm/lib/Target/Lanai/LanaiTargetMachine.h b/llvm/lib/Target/Lanai/LanaiTargetMachine.h
index 00922f44f33a..258e58c86253 100644
--- a/llvm/lib/Target/Lanai/LanaiTargetMachine.h
+++ b/llvm/lib/Target/Lanai/LanaiTargetMachine.h
@@ -38,7 +38,7 @@ public:
return &Subtarget;
}
- TargetTransformInfo getTargetTransformInfo(const Function &F) override;
+ TargetTransformInfo getTargetTransformInfo(const Function &F) const override;
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &pass_manager) override;
diff --git a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h
index f0d287c858d8..08cc54b858ce 100644
--- a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h
+++ b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h
@@ -13,10 +13,10 @@
#ifndef LLVM_LIB_TARGET_LANAI_MCTARGETDESC_LANAIINSTPRINTER_H
#define LLVM_LIB_TARGET_LANAI_MCTARGETDESC_LANAIINSTPRINTER_H
-#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCInstPrinter.h"
namespace llvm {
+class StringRef;
class LanaiInstPrinter : public MCInstPrinter {
public:
@@ -36,7 +36,6 @@ public:
void printMemSplsOperand(const MCInst *MI, int OpNo, raw_ostream &O,
const char *Modifier = nullptr);
void printCCOperand(const MCInst *MI, int OpNo, raw_ostream &O);
- void printAluOperand(const MCInst *MI, int OpNo, raw_ostream &O);
void printHi16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printHi16AndImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printLo16AndImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
index df4ee297155f..ec573a189a70 100644
--- a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
+++ b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
@@ -304,7 +304,6 @@ unsigned LanaiMCCodeEmitter::getBranchTargetOpValue(
llvm::MCCodeEmitter *
llvm::createLanaiMCCodeEmitter(const MCInstrInfo &InstrInfo,
- const MCRegisterInfo & /*MRI*/,
MCContext &context) {
return new LanaiMCCodeEmitter(InstrInfo, context);
}
diff --git a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h
index 651ed36cdc24..e8da1bc88142 100644
--- a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h
+++ b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h
@@ -27,7 +27,6 @@ class MCSubtargetInfo;
class Target;
MCCodeEmitter *createLanaiMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx);
MCAsmBackend *createLanaiAsmBackend(const Target &T, const MCSubtargetInfo &STI,
diff --git a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
new file mode 100644
index 000000000000..d11f5a9080a0
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
@@ -0,0 +1,556 @@
+// LoongArchAsmParser.cpp - Parse LoongArch assembly to MCInst instructions -=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/LoongArchInstPrinter.h"
+#include "MCTargetDesc/LoongArchMCTargetDesc.h"
+#include "TargetInfo/LoongArchTargetInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCParser/MCTargetAsmParser.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/Casting.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "loongarch-asm-parser"
+
+namespace {
+class LoongArchAsmParser : public MCTargetAsmParser {
+ SMLoc getLoc() const { return getParser().getTok().getLoc(); }
+
+ /// Parse a register as used in CFI directives.
+ bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+ SMLoc &EndLoc) override;
+
+ bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands) override;
+
+ bool ParseDirective(AsmToken DirectiveID) override { return true; }
+
+ bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands, MCStreamer &Out,
+ uint64_t &ErrorInfo,
+ bool MatchingInlineAsm) override;
+
+ unsigned checkTargetMatchPredicate(MCInst &Inst) override;
+
+ unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
+ unsigned Kind) override;
+
+ bool generateImmOutOfRangeError(OperandVector &Operands, uint64_t ErrorInfo,
+ int64_t Lower, int64_t Upper, Twine Msg);
+
+ /// Helper for processing MC instructions that have been successfully matched
+ /// by MatchAndEmitInstruction.
+ bool processInstruction(MCInst &Inst, SMLoc IDLoc, OperandVector &Operands,
+ MCStreamer &Out);
+
+// Auto-generated instruction matching functions.
+#define GET_ASSEMBLER_HEADER
+#include "LoongArchGenAsmMatcher.inc"
+
+ OperandMatchResultTy parseRegister(OperandVector &Operands);
+ OperandMatchResultTy parseImmediate(OperandVector &Operands);
+
+ bool parseOperand(OperandVector &Operands, StringRef Mnemonic);
+
+public:
+ enum LoongArchMatchResultTy {
+ Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY,
+ Match_RequiresMsbNotLessThanLsb,
+ Match_RequiresOpnd2NotR0R1,
+#define GET_OPERAND_DIAGNOSTIC_TYPES
+#include "LoongArchGenAsmMatcher.inc"
+#undef GET_OPERAND_DIAGNOSTIC_TYPES
+ };
+
+ LoongArchAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
+ const MCInstrInfo &MII, const MCTargetOptions &Options)
+ : MCTargetAsmParser(Options, STI, MII) {
+ Parser.addAliasForDirective(".half", ".2byte");
+ Parser.addAliasForDirective(".hword", ".2byte");
+ Parser.addAliasForDirective(".word", ".4byte");
+ Parser.addAliasForDirective(".dword", ".8byte");
+
+ // Initialize the set of available features.
+ setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+ }
+};
+
+// Instances of this class represent a parsed LoongArch machine instruction.
+class LoongArchOperand : public MCParsedAsmOperand {
+ enum class KindTy {
+ Token,
+ Register,
+ Immediate,
+ } Kind;
+
+ struct RegOp {
+ MCRegister RegNum;
+ };
+
+ struct ImmOp {
+ const MCExpr *Val;
+ };
+
+ SMLoc StartLoc, EndLoc;
+ union {
+ StringRef Tok;
+ struct RegOp Reg;
+ struct ImmOp Imm;
+ };
+
+public:
+ LoongArchOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+
+ bool isToken() const override { return Kind == KindTy::Token; }
+ bool isReg() const override { return Kind == KindTy::Register; }
+ bool isImm() const override { return Kind == KindTy::Immediate; }
+ bool isMem() const override { return false; }
+ void setReg(MCRegister PhysReg) { Reg.RegNum = PhysReg; }
+
+ static bool evaluateConstantImm(const MCExpr *Expr, int64_t &Imm) {
+ if (auto CE = dyn_cast<MCConstantExpr>(Expr)) {
+ Imm = CE->getValue();
+ return true;
+ }
+
+ return false;
+ }
+
+ template <unsigned N, int P = 0> bool isUImm() const {
+ if (!isImm())
+ return false;
+
+ int64_t Imm;
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm);
+ return IsConstantImm && isUInt<N>(Imm - P);
+ }
+
+ template <unsigned N, unsigned S = 0> bool isSImm() const {
+ if (!isImm())
+ return false;
+
+ int64_t Imm;
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm);
+ return IsConstantImm && isShiftedInt<N, S>(Imm);
+ }
+
+ bool isUImm2() const { return isUImm<2>(); }
+ bool isUImm2plus1() const { return isUImm<2, 1>(); }
+ bool isUImm3() const { return isUImm<3>(); }
+ bool isUImm5() const { return isUImm<5>(); }
+ bool isUImm6() const { return isUImm<6>(); }
+ bool isUImm8() const { return isUImm<8>(); }
+ bool isUImm12() const { return isUImm<12>(); }
+ bool isUImm14() const { return isUImm<14>(); }
+ bool isUImm15() const { return isUImm<15>(); }
+ bool isSImm12() const { return isSImm<12>(); }
+ bool isSImm14lsl2() const { return isSImm<14, 2>(); }
+ bool isSImm16() const { return isSImm<16>(); }
+ bool isSImm16lsl2() const { return isSImm<16, 2>(); }
+ bool isSImm20() const { return isSImm<20>(); }
+ bool isSImm21lsl2() const { return isSImm<21, 2>(); }
+ bool isSImm26lsl2() const { return isSImm<26, 2>(); }
+
+ /// Gets location of the first token of this operand.
+ SMLoc getStartLoc() const override { return StartLoc; }
+ /// Gets location of the last token of this operand.
+ SMLoc getEndLoc() const override { return EndLoc; }
+
+ unsigned getReg() const override {
+ assert(Kind == KindTy::Register && "Invalid type access!");
+ return Reg.RegNum.id();
+ }
+
+ const MCExpr *getImm() const {
+ assert(Kind == KindTy::Immediate && "Invalid type access!");
+ return Imm.Val;
+ }
+
+ StringRef getToken() const {
+ assert(Kind == KindTy::Token && "Invalid type access!");
+ return Tok;
+ }
+
+ void print(raw_ostream &OS) const override {
+ auto RegName = [](unsigned Reg) {
+ if (Reg)
+ return LoongArchInstPrinter::getRegisterName(Reg);
+ else
+ return "noreg";
+ };
+
+ switch (Kind) {
+ case KindTy::Immediate:
+ OS << *getImm();
+ break;
+ case KindTy::Register:
+ OS << "<register " << RegName(getReg()) << ">";
+ break;
+ case KindTy::Token:
+ OS << "'" << getToken() << "'";
+ break;
+ }
+ }
+
+ static std::unique_ptr<LoongArchOperand> createToken(StringRef Str, SMLoc S) {
+ auto Op = std::make_unique<LoongArchOperand>(KindTy::Token);
+ Op->Tok = Str;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static std::unique_ptr<LoongArchOperand> createReg(unsigned RegNo, SMLoc S,
+ SMLoc E) {
+ auto Op = std::make_unique<LoongArchOperand>(KindTy::Register);
+ Op->Reg.RegNum = RegNo;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static std::unique_ptr<LoongArchOperand> createImm(const MCExpr *Val, SMLoc S,
+ SMLoc E) {
+ auto Op = std::make_unique<LoongArchOperand>(KindTy::Immediate);
+ Op->Imm.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+ if (auto CE = dyn_cast<MCConstantExpr>(Expr))
+ Inst.addOperand(MCOperand::createImm(CE->getValue()));
+ else
+ Inst.addOperand(MCOperand::createExpr(Expr));
+ }
+
+ // Used by the TableGen Code.
+ void addRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(getReg()));
+ }
+ void addImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+};
+} // end anonymous namespace
+
+#define GET_REGISTER_MATCHER
+#define GET_SUBTARGET_FEATURE_NAME
+#define GET_MATCHER_IMPLEMENTATION
+#define GET_MNEMONIC_SPELL_CHECKER
+#include "LoongArchGenAsmMatcher.inc"
+
+static MCRegister convertFPR32ToFPR64(MCRegister Reg) {
+ assert(Reg >= LoongArch::F0 && Reg <= LoongArch::F31 && "Invalid register");
+ return Reg - LoongArch::F0 + LoongArch::F0_64;
+}
+
+// Attempts to match Name as a register (either using the default name or
+// alternative ABI names), setting RegNo to the matching register. Upon
+// failure, returns true and sets RegNo to 0.
+static bool matchRegisterNameHelper(MCRegister &RegNo, StringRef Name) {
+ RegNo = MatchRegisterName(Name);
+ // The 32-bit and 64-bit FPRs have the same asm name. Check that the initial
+ // match always matches the 32-bit variant, and not the 64-bit one.
+ assert(!(RegNo >= LoongArch::F0_64 && RegNo <= LoongArch::F31_64));
+ // The default FPR register class is based on the tablegen enum ordering.
+ static_assert(LoongArch::F0 < LoongArch::F0_64,
+ "FPR matching must be updated");
+ if (RegNo == LoongArch::NoRegister)
+ RegNo = MatchRegisterAltName(Name);
+
+ return RegNo == LoongArch::NoRegister;
+}
+
+bool LoongArchAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+ SMLoc &EndLoc) {
+ return Error(getLoc(), "invalid register number");
+}
+
+OperandMatchResultTy LoongArchAsmParser::tryParseRegister(unsigned &RegNo,
+ SMLoc &StartLoc,
+ SMLoc &EndLoc) {
+ llvm_unreachable("Unimplemented function.");
+}
+
+OperandMatchResultTy
+LoongArchAsmParser::parseRegister(OperandVector &Operands) {
+ if (getLexer().getTok().isNot(AsmToken::Dollar))
+ return MatchOperand_NoMatch;
+
+ // Eat the $ prefix.
+ getLexer().Lex();
+ if (getLexer().getKind() != AsmToken::Identifier)
+ return MatchOperand_NoMatch;
+
+ StringRef Name = getLexer().getTok().getIdentifier();
+ MCRegister RegNo;
+ matchRegisterNameHelper(RegNo, Name);
+ if (RegNo == LoongArch::NoRegister)
+ return MatchOperand_NoMatch;
+
+ SMLoc S = getLoc();
+ SMLoc E = SMLoc::getFromPointer(S.getPointer() + Name.size());
+ getLexer().Lex();
+ Operands.push_back(LoongArchOperand::createReg(RegNo, S, E));
+
+ return MatchOperand_Success;
+}
+
+OperandMatchResultTy
+LoongArchAsmParser::parseImmediate(OperandVector &Operands) {
+ SMLoc S = getLoc();
+ SMLoc E;
+ const MCExpr *Res;
+
+ if (getParser().parseExpression(Res, E))
+ return MatchOperand_ParseFail;
+
+ Operands.push_back(LoongArchOperand::createImm(Res, S, E));
+ return MatchOperand_Success;
+}
+
+/// Looks at a token type and creates the relevant operand from this
+/// information, adding to Operands. Return true upon an error.
+bool LoongArchAsmParser::parseOperand(OperandVector &Operands,
+ StringRef Mnemonic) {
+ if (parseRegister(Operands) == MatchOperand_Success ||
+ parseImmediate(Operands) == MatchOperand_Success)
+ return false;
+
+ // Finally we have exhausted all options and must declare defeat.
+ Error(getLoc(), "unknown operand");
+ return true;
+}
+
+bool LoongArchAsmParser::ParseInstruction(ParseInstructionInfo &Info,
+ StringRef Name, SMLoc NameLoc,
+ OperandVector &Operands) {
+ // First operand in MCInst is instruction mnemonic.
+ Operands.push_back(LoongArchOperand::createToken(Name, NameLoc));
+
+ // If there are no more operands, then finish.
+ if (parseOptionalToken(AsmToken::EndOfStatement))
+ return false;
+
+ // Parse first operand.
+ if (parseOperand(Operands, Name))
+ return true;
+
+ // Parse until end of statement, consuming commas between operands.
+ while (parseOptionalToken(AsmToken::Comma))
+ if (parseOperand(Operands, Name))
+ return true;
+
+ // Parse end of statement and return successfully.
+ if (parseOptionalToken(AsmToken::EndOfStatement))
+ return false;
+
+ SMLoc Loc = getLexer().getLoc();
+ getParser().eatToEndOfStatement();
+ return Error(Loc, "unexpected token");
+}
+
+bool LoongArchAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
+ OperandVector &Operands,
+ MCStreamer &Out) {
+ Inst.setLoc(IDLoc);
+ Out.emitInstruction(Inst, getSTI());
+ return false;
+}
+
+unsigned LoongArchAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
+ switch (Inst.getOpcode()) {
+ default:
+ break;
+ case LoongArch::CSRXCHG: {
+ unsigned Rj = Inst.getOperand(2).getReg();
+ if (Rj == LoongArch::R0 || Rj == LoongArch::R1)
+ return Match_RequiresOpnd2NotR0R1;
+ return Match_Success;
+ }
+ case LoongArch::BSTRINS_W:
+ case LoongArch::BSTRINS_D:
+ case LoongArch::BSTRPICK_W:
+ case LoongArch::BSTRPICK_D: {
+ unsigned Opc = Inst.getOpcode();
+ const signed Msb =
+ (Opc == LoongArch::BSTRINS_W || Opc == LoongArch::BSTRINS_D)
+ ? Inst.getOperand(3).getImm()
+ : Inst.getOperand(2).getImm();
+ const signed Lsb =
+ (Opc == LoongArch::BSTRINS_W || Opc == LoongArch::BSTRINS_D)
+ ? Inst.getOperand(4).getImm()
+ : Inst.getOperand(3).getImm();
+ if (Msb < Lsb)
+ return Match_RequiresMsbNotLessThanLsb;
+ return Match_Success;
+ }
+ }
+
+ return Match_Success;
+}
+
+unsigned
+LoongArchAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
+ unsigned Kind) {
+ LoongArchOperand &Op = static_cast<LoongArchOperand &>(AsmOp);
+ if (!Op.isReg())
+ return Match_InvalidOperand;
+
+ MCRegister Reg = Op.getReg();
+ // As the parser couldn't differentiate an FPR32 from an FPR64, coerce the
+ // register from FPR32 to FPR64 if necessary.
+ if (LoongArchMCRegisterClasses[LoongArch::FPR32RegClassID].contains(Reg) &&
+ Kind == MCK_FPR64) {
+ Op.setReg(convertFPR32ToFPR64(Reg));
+ return Match_Success;
+ }
+
+ return Match_InvalidOperand;
+}
+
+bool LoongArchAsmParser::generateImmOutOfRangeError(
+ OperandVector &Operands, uint64_t ErrorInfo, int64_t Lower, int64_t Upper,
+ Twine Msg = "immediate must be an integer in the range") {
+ SMLoc ErrorLoc = ((LoongArchOperand &)*Operands[ErrorInfo]).getStartLoc();
+ return Error(ErrorLoc, Msg + " [" + Twine(Lower) + ", " + Twine(Upper) + "]");
+}
+
+bool LoongArchAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands,
+ MCStreamer &Out,
+ uint64_t &ErrorInfo,
+ bool MatchingInlineAsm) {
+ MCInst Inst;
+ FeatureBitset MissingFeatures;
+
+ auto Result = MatchInstructionImpl(Operands, Inst, ErrorInfo, MissingFeatures,
+ MatchingInlineAsm);
+ switch (Result) {
+ default:
+ break;
+ case Match_Success:
+ return processInstruction(Inst, IDLoc, Operands, Out);
+ case Match_MissingFeature: {
+ assert(MissingFeatures.any() && "Unknown missing features!");
+ bool FirstFeature = true;
+ std::string Msg = "instruction requires the following:";
+ for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i) {
+ if (MissingFeatures[i]) {
+ Msg += FirstFeature ? " " : ", ";
+ Msg += getSubtargetFeatureName(i);
+ FirstFeature = false;
+ }
+ }
+ return Error(IDLoc, Msg);
+ }
+ case Match_MnemonicFail: {
+ FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
+ std::string Suggestion = LoongArchMnemonicSpellCheck(
+ ((LoongArchOperand &)*Operands[0]).getToken(), FBS, 0);
+ return Error(IDLoc, "unrecognized instruction mnemonic" + Suggestion);
+ }
+ case Match_InvalidOperand: {
+ SMLoc ErrorLoc = IDLoc;
+ if (ErrorInfo != ~0ULL) {
+ if (ErrorInfo >= Operands.size())
+ return Error(ErrorLoc, "too few operands for instruction");
+
+ ErrorLoc = ((LoongArchOperand &)*Operands[ErrorInfo]).getStartLoc();
+ if (ErrorLoc == SMLoc())
+ ErrorLoc = IDLoc;
+ }
+ return Error(ErrorLoc, "invalid operand for instruction");
+ }
+ }
+
+ // Handle the case when the error message is of specific type
+ // other than the generic Match_InvalidOperand, and the
+ // corresponding operand is missing.
+ if (Result > FIRST_TARGET_MATCH_RESULT_TY) {
+ SMLoc ErrorLoc = IDLoc;
+ if (ErrorInfo != ~0ULL && ErrorInfo >= Operands.size())
+ return Error(ErrorLoc, "too few operands for instruction");
+ }
+
+ switch (Result) {
+ default:
+ break;
+ case Match_RequiresMsbNotLessThanLsb: {
+ SMLoc ErrorStart = Operands[3]->getStartLoc();
+ return Error(ErrorStart, "msb is less than lsb",
+ SMRange(ErrorStart, Operands[4]->getEndLoc()));
+ }
+ case Match_RequiresOpnd2NotR0R1:
+ return Error(Operands[2]->getStartLoc(), "must not be $r0 or $r1");
+ case Match_InvalidUImm2:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0,
+ /*Upper=*/(1 << 2) - 1);
+ case Match_InvalidUImm2plus1:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/1,
+ /*Upper=*/(1 << 2));
+ case Match_InvalidUImm3:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0,
+ /*Upper=*/(1 << 3) - 1);
+ case Match_InvalidUImm5:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0,
+ /*Upper=*/(1 << 5) - 1);
+ case Match_InvalidUImm6:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0,
+ /*Upper=*/(1 << 6) - 1);
+ case Match_InvalidUImm12:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0,
+ /*Upper=*/(1 << 12) - 1);
+ case Match_InvalidUImm15:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0,
+ /*Upper=*/(1 << 15) - 1);
+ case Match_InvalidSImm12:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/-(1 << 11),
+ /*Upper=*/(1 << 11) - 1);
+ case Match_InvalidSImm14lsl2:
+ return generateImmOutOfRangeError(
+ Operands, ErrorInfo, /*Lower=*/-(1 << 15), /*Upper=*/(1 << 15) - 4,
+ "immediate must be a multiple of 4 in the range");
+ case Match_InvalidSImm16:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/-(1 << 15),
+ /*Upper=*/(1 << 15) - 1);
+ case Match_InvalidSImm16lsl2:
+ return generateImmOutOfRangeError(
+ Operands, ErrorInfo, /*Lower=*/-(1 << 17), /*Upper=*/(1 << 17) - 4,
+ "immediate must be a multiple of 4 in the range");
+ case Match_InvalidSImm20:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/-(1 << 19),
+ /*Upper=*/(1 << 19) - 1);
+ case Match_InvalidSImm21lsl2:
+ return generateImmOutOfRangeError(
+ Operands, ErrorInfo, /*Lower=*/-(1 << 22), /*Upper=*/(1 << 22) - 4,
+ "immediate must be a multiple of 4 in the range");
+ case Match_InvalidSImm26lsl2:
+ return generateImmOutOfRangeError(
+ Operands, ErrorInfo, /*Lower=*/-(1 << 27), /*Upper=*/(1 << 27) - 4,
+ "immediate must be a multiple of 4 in the range");
+ }
+ llvm_unreachable("Unknown match type detected!");
+}
+
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchAsmParser() {
+ RegisterMCAsmParser<LoongArchAsmParser> X(getTheLoongArch32Target());
+ RegisterMCAsmParser<LoongArchAsmParser> Y(getTheLoongArch64Target());
+}
diff --git a/llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp b/llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp
new file mode 100644
index 000000000000..215d061f11f2
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp
@@ -0,0 +1,145 @@
+//===-- LoongArchDisassembler.cpp - Disassembler for LoongArch ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LoongArchDisassembler class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/LoongArchBaseInfo.h"
+#include "MCTargetDesc/LoongArchMCTargetDesc.h"
+#include "TargetInfo/LoongArchTargetInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDecoderOps.h"
+#include "llvm/MC/MCDisassembler/MCDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/Endian.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "loongarch-disassembler"
+
+typedef MCDisassembler::DecodeStatus DecodeStatus;
+
+namespace {
+class LoongArchDisassembler : public MCDisassembler {
+public:
+ LoongArchDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
+ : MCDisassembler(STI, Ctx) {}
+
+ DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &CStream) const override;
+};
+} // end anonymous namespace
+
+static MCDisassembler *createLoongArchDisassembler(const Target &T,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new LoongArchDisassembler(STI, Ctx);
+}
+
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchDisassembler() {
+ // Register the disassembler for each target.
+ TargetRegistry::RegisterMCDisassembler(getTheLoongArch32Target(),
+ createLoongArchDisassembler);
+ TargetRegistry::RegisterMCDisassembler(getTheLoongArch64Target(),
+ createLoongArchDisassembler);
+}
+
+static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
+ if (RegNo >= 32)
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createReg(LoongArch::R0 + RegNo));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
+ if (RegNo >= 32)
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createReg(LoongArch::F0 + RegNo));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
+ if (RegNo >= 32)
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createReg(LoongArch::F0_64 + RegNo));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeCFRRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
+ if (RegNo >= 8)
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createReg(LoongArch::FCC0 + RegNo));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeFCSRRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
+ if (RegNo >= 4)
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createReg(LoongArch::FCSR0 + RegNo));
+ return MCDisassembler::Success;
+}
+
+template <unsigned N, int P = 0>
+static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm,
+ int64_t Address,
+ const MCDisassembler *Decoder) {
+ assert(isUInt<N>(Imm) && "Invalid immediate");
+ Inst.addOperand(MCOperand::createImm(Imm + P));
+ return MCDisassembler::Success;
+}
+
+template <unsigned N, unsigned S = 0>
+static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm,
+ int64_t Address,
+ const MCDisassembler *Decoder) {
+ assert(isUInt<N>(Imm) && "Invalid immediate");
+ // Sign-extend the number in the bottom <N> bits of Imm, then shift left <S>
+ // bits.
+ Inst.addOperand(MCOperand::createImm(SignExtend64<N>(Imm) << S));
+ return MCDisassembler::Success;
+}
+
+#include "LoongArchGenDisassemblerTables.inc"
+
+DecodeStatus LoongArchDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address,
+ raw_ostream &CS) const {
+ uint32_t Insn;
+ DecodeStatus Result;
+
+ // We want to read exactly 4 bytes of data because all LoongArch instructions
+ // are fixed 32 bits.
+ if (Bytes.size() < 4) {
+ Size = 0;
+ return MCDisassembler::Fail;
+ }
+
+ Insn = support::endian::read32le(Bytes.data());
+ // Calling the auto-generated decoder function.
+ Result = decodeInstruction(DecoderTable32, MI, Insn, Address, this, STI);
+ Size = 4;
+
+ return Result;
+}
diff --git a/llvm/lib/Target/LoongArch/LoongArch.h b/llvm/lib/Target/LoongArch/LoongArch.h
new file mode 100644
index 000000000000..caa7bd31e28b
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/LoongArch.h
@@ -0,0 +1,38 @@
+//===-- LoongArch.h - Top-level interface for LoongArch ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// LoongArch back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCH_H
+#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCH_H
+
+#include "MCTargetDesc/LoongArchBaseInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+class LoongArchTargetMachine;
+class AsmPrinter;
+class FunctionPass;
+class MCInst;
+class MCOperand;
+class MachineInstr;
+class MachineOperand;
+
+bool lowerLoongArchMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+ AsmPrinter &AP);
+bool lowerLoongArchMachineOperandToMCOperand(const MachineOperand &MO,
+ MCOperand &MCOp,
+ const AsmPrinter &AP);
+
+FunctionPass *createLoongArchISelDag(LoongArchTargetMachine &TM);
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCH_H
diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td
new file mode 100644
index 000000000000..bf465c27ef99
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/LoongArch.td
@@ -0,0 +1,139 @@
+//===-- LoongArch.td - Describe the LoongArch Target -------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// LoongArch subtarget features and instruction predicates.
+//===----------------------------------------------------------------------===//
+
+// LoongArch is divided into two versions, the 32-bit version (LA32) and the
+// 64-bit version (LA64).
+def Feature64Bit
+ : SubtargetFeature<"64bit", "HasLA64", "true",
+ "LA64 Basic Integer and Privilege Instruction Set">;
+def IsLA64
+ : Predicate<"Subtarget->is64Bit()">,
+ AssemblerPredicate<(all_of Feature64Bit),
+ "LA64 Basic Integer and Privilege Instruction Set">;
+def IsLA32
+ : Predicate<"!Subtarget->is64Bit()">,
+ AssemblerPredicate<(all_of(not Feature64Bit)),
+ "LA32 Basic Integer and Privilege Instruction Set">;
+
+defvar LA32 = DefaultMode;
+def LA64 : HwMode<"+64bit">;
+
+// Single Precision floating point
+def FeatureBasicF
+ : SubtargetFeature<"f", "HasBasicF", "true",
+ "'F' (Single-Precision Floating-Point)">;
+def HasBasicF
+ : Predicate<"Subtarget->hasBasicF()">,
+ AssemblerPredicate<(all_of FeatureBasicF),
+ "'F' (Single-Precision Floating-Point)">;
+
+// Double Precision floating point
+def FeatureBasicD
+ : SubtargetFeature<"d", "HasBasicD", "true",
+ "'D' (Double-Precision Floating-Point)",
+ [FeatureBasicF]>;
+def HasBasicD
+ : Predicate<"Subtarget->hasBasicD()">,
+ AssemblerPredicate<(all_of FeatureBasicD),
+ "'D' (Double-Precision Floating-Point)">;
+
+// Loongson SIMD eXtension (LSX)
+def FeatureExtLSX
+ : SubtargetFeature<"lsx", "HasExtLSX", "true",
+ "'LSX' (Loongson SIMD Extension)", [FeatureBasicD]>;
+def HasExtLSX
+ : Predicate<"Subtarget->hasExtLSX()">,
+ AssemblerPredicate<(all_of FeatureExtLSX),
+ "'LSX' (Loongson SIMD Extension)">;
+
+// Loongson Advanced SIMD eXtension (LASX)
+def FeatureExtLASX
+ : SubtargetFeature<"lasx", "HasExtLASX", "true",
+ "'LASX' (Loongson Advanced SIMD Extension)",
+ [FeatureExtLSX]>;
+def HasExtLASX
+ : Predicate<"Subtarget->hasExtLASX()">,
+ AssemblerPredicate<(all_of FeatureExtLASX),
+ "'LASX' (Loongson Advanced SIMD Extension)">;
+
+// Loongson VirtualiZation (LVZ)
+def FeatureExtLVZ
+ : SubtargetFeature<"lvz", "HasExtLVZ", "true",
+ "'LVZ' (Loongson Virtualization Extension)">;
+def HasExtLVZ
+ : Predicate<"Subtarget->hasExtLVZ()">,
+ AssemblerPredicate<(all_of FeatureExtLVZ),
+ "'LVZ' (Loongson Virtualization Extension)">;
+
+// Loongson Binary Translation (LBT)
+def FeatureExtLBT
+ : SubtargetFeature<"lbt", "HasExtLBT", "true",
+ "'LBT' (Loongson Binary Translation Extension)">;
+def HasExtLBT
+ : Predicate<"Subtarget->hasExtLBT()">,
+ AssemblerPredicate<(all_of FeatureExtLBT),
+ "'LBT' (Loongson Binary Translation Extension)">;
+
+//===----------------------------------------------------------------------===//
+// Registers, instruction descriptions ...
+//===----------------------------------------------------------------------===//
+
+include "LoongArchRegisterInfo.td"
+include "LoongArchCallingConv.td"
+include "LoongArchInstrInfo.td"
+
+//===----------------------------------------------------------------------===//
+// LoongArch processors supported.
+//===----------------------------------------------------------------------===//
+
+def : ProcessorModel<"generic-la32", NoSchedModel, []>;
+def : ProcessorModel<"generic-la64", NoSchedModel, [Feature64Bit]>;
+
+def : ProcessorModel<"la464", NoSchedModel, [Feature64Bit,
+ FeatureExtLASX,
+ FeatureExtLVZ,
+ FeatureExtLBT]>;
+
+//===----------------------------------------------------------------------===//
+// Define the LoongArch target.
+//===----------------------------------------------------------------------===//
+
+def LoongArchInstrInfo : InstrInfo {
+ // guess mayLoad, mayStore, and hasSideEffects
+ // This option is a temporary migration help. It will go away.
+ let guessInstructionProperties = 1;
+}
+
+def LoongArchAsmParser : AsmParser {
+ let ShouldEmitMatchRegisterAltName = 1;
+ let AllowDuplicateRegisterNames = 1;
+}
+
+def LoongArchAsmParserVariant : AsmParserVariant {
+ int Variant = 0;
+ // Recognize hard coded registers.
+ string RegisterPrefix = "$";
+}
+
+def LoongArchAsmWriter : AsmWriter {
+ int PassSubtarget = 1;
+}
+
+def LoongArch : Target {
+ let InstructionSet = LoongArchInstrInfo;
+ let AssemblyParsers = [LoongArchAsmParser];
+ let AssemblyParserVariants = [LoongArchAsmParserVariant];
+ let AssemblyWriters = [LoongArchAsmWriter];
+ let AllowRegisterRenaming = 1;
+}
diff --git a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp
new file mode 100644
index 000000000000..dd61bb2df077
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp
@@ -0,0 +1,48 @@
+//===- LoongArchAsmPrinter.cpp - LoongArch LLVM Assembly Printer -*- C++ -*--=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format LoongArch assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LoongArchAsmPrinter.h"
+#include "LoongArch.h"
+#include "LoongArchTargetMachine.h"
+#include "TargetInfo/LoongArchTargetInfo.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/TargetRegistry.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "loongarch-asm-printer"
+
+// Simple pseudo-instructions have their lowering (with expansion to real
+// instructions) auto-generated.
+#include "LoongArchGenMCPseudoLowering.inc"
+
+void LoongArchAsmPrinter::emitInstruction(const MachineInstr *MI) {
+ // Do any auto-generated pseudo lowerings.
+ if (emitPseudoExpansionLowering(*OutStreamer, MI))
+ return;
+
+ MCInst TmpInst;
+ if (!lowerLoongArchMachineInstrToMCInst(MI, TmpInst, *this))
+ EmitToStreamer(*OutStreamer, TmpInst);
+}
+
+bool LoongArchAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ AsmPrinter::runOnMachineFunction(MF);
+ return true;
+}
+
+// Force static initialization.
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchAsmPrinter() {
+ RegisterAsmPrinter<LoongArchAsmPrinter> X(getTheLoongArch32Target());
+ RegisterAsmPrinter<LoongArchAsmPrinter> Y(getTheLoongArch64Target());
+}
diff --git a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h
new file mode 100644
index 000000000000..7e5aa49f227c
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h
@@ -0,0 +1,46 @@
+//===- LoongArchAsmPrinter.h - LoongArch LLVM Assembly Printer -*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// LoongArch Assembly printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHASMPRINTER_H
+#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHASMPRINTER_H
+
+#include "LoongArchSubtarget.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+class LLVM_LIBRARY_VISIBILITY LoongArchAsmPrinter : public AsmPrinter {
+ const MCSubtargetInfo *STI;
+
+public:
+ explicit LoongArchAsmPrinter(TargetMachine &TM,
+ std::unique_ptr<MCStreamer> Streamer)
+ : AsmPrinter(TM, std::move(Streamer)), STI(TM.getMCSubtargetInfo()) {}
+
+ StringRef getPassName() const override {
+ return "LoongArch Assembly Printer";
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void emitInstruction(const MachineInstr *MI) override;
+
+ // tblgen'erated function.
+ bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
+ const MachineInstr *MI);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHASMPRINTER_H
diff --git a/llvm/lib/Target/LoongArch/LoongArchCallingConv.td b/llvm/lib/Target/LoongArch/LoongArchCallingConv.td
new file mode 100644
index 000000000000..9844163163a5
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/LoongArchCallingConv.td
@@ -0,0 +1,23 @@
+//=- LoongArchCallingConv.td - Calling Conventions LoongArch -*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the LoongArch architecture.
+//
+//===----------------------------------------------------------------------===//
+
+def CSR_ILP32S_LP64S
+ : CalleeSavedRegs<(add R1, (sequence "R%u", 22, 31))>;
+
+def CSR_ILP32F_LP64F
+ : CalleeSavedRegs<(add CSR_ILP32S_LP64S, (sequence "F%u", 24, 31))>;
+
+def CSR_ILP32D_LP64D
+ : CalleeSavedRegs<(add CSR_ILP32S_LP64S, (sequence "F%u_64", 24, 31))>;
+
+// Needed for implementation of LoongArchRegisterInfo::getNoPreservedMask()
+def CSR_NoRegs : CalleeSavedRegs<(add)>;
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
new file mode 100644
index 000000000000..5b117d40e0a9
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
@@ -0,0 +1,177 @@
+//=-- LoongArchInstrInfoF.td - Single-Precision Float instr --*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the baisc single-precision floating-point instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasBasicF] in {
+
+// Arithmetic Operation Instructions
+def FADD_S : FP_ALU_3R<0b00000001000000001, "fadd.s", FPR32>;
+def FSUB_S : FP_ALU_3R<0b00000001000000101, "fsub.s", FPR32>;
+def FMUL_S : FP_ALU_3R<0b00000001000001001, "fmul.s", FPR32>;
+def FDIV_S : FP_ALU_3R<0b00000001000001101, "fdiv.s", FPR32>;
+def FMADD_S : FP_ALU_4R<0b000010000001, "fmadd.s", FPR32>;
+def FMSUB_S : FP_ALU_4R<0b000010000101, "fmsub.s", FPR32>;
+def FNMADD_S : FP_ALU_4R<0b000010001001, "fnmadd.s", FPR32>;
+def FNMSUB_S : FP_ALU_4R<0b000010001101, "fnmsub.s", FPR32>;
+def FMAX_S : FP_ALU_3R<0b00000001000010001, "fmax.s", FPR32>;
+def FMIN_S : FP_ALU_3R<0b00000001000010101, "fmin.s", FPR32>;
+def FMAXA_S : FP_ALU_3R<0b00000001000011001, "fmaxa.s", FPR32>;
+def FMINA_S : FP_ALU_3R<0b00000001000011101, "fmina.s", FPR32>;
+def FABS_S : FP_ALU_2R<0b0000000100010100000001, "fabs.s", FPR32>;
+def FNEG_S : FP_ALU_2R<0b0000000100010100000101, "fneg.s", FPR32>;
+def FSQRT_S : FP_ALU_2R<0b0000000100010100010001, "fsqrt.s", FPR32>;
+def FRECIP_S : FP_ALU_2R<0b0000000100010100010101, "frecip.s", FPR32>;
+def FRSQRT_S : FP_ALU_2R<0b0000000100010100011001, "frsqrt.s", FPR32>;
+def FSCALEB_S : FP_ALU_3R<0b00000001000100001, "fscaleb.s", FPR32>;
+def FLOGB_S : FP_ALU_2R<0b0000000100010100001001, "flogb.s", FPR32>;
+def FCOPYSIGN_S : FP_ALU_3R<0b00000001000100101, "fcopysign.s", FPR32>;
+def FCLASS_S : FP_ALU_2R<0b0000000100010100001101, "fclass.s", FPR32>;
+
+
+// Comparison Instructions
+def FCMP_CAF_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_CAF, "fcmp.caf.s", FPR32>;
+def FCMP_CUN_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_CUN, "fcmp.cun.s", FPR32>;
+def FCMP_CEQ_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_CEQ, "fcmp.ceq.s", FPR32>;
+def FCMP_CUEQ_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_CUEQ, "fcmp.cueq.s", FPR32>;
+def FCMP_CLT_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_CLT, "fcmp.clt.s", FPR32>;
+def FCMP_CULT_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_CULT, "fcmp.cult.s", FPR32>;
+def FCMP_CLE_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_CLE, "fcmp.cle.s", FPR32>;
+def FCMP_CULE_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_CULE, "fcmp.cule.s", FPR32>;
+def FCMP_CNE_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_CNE, "fcmp.cne.s", FPR32>;
+def FCMP_COR_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_COR, "fcmp.cor.s", FPR32>;
+def FCMP_CUNE_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_CUNE, "fcmp.cune.s", FPR32>;
+def FCMP_SAF_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_SAF, "fcmp.saf.s", FPR32>;
+def FCMP_SUN_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_SUN, "fcmp.sun.s", FPR32>;
+def FCMP_SEQ_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_SEQ, "fcmp.seq.s", FPR32>;
+def FCMP_SUEQ_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_SUEQ, "fcmp.sueq.s", FPR32>;
+def FCMP_SLT_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_SLT, "fcmp.slt.s", FPR32>;
+def FCMP_SULT_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_SULT, "fcmp.sult.s", FPR32>;
+def FCMP_SLE_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_SLE, "fcmp.sle.s", FPR32>;
+def FCMP_SULE_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_SULE, "fcmp.sule.s", FPR32>;
+def FCMP_SNE_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_SNE, "fcmp.sne.s", FPR32>;
+def FCMP_SOR_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_SOR, "fcmp.sor.s", FPR32>;
+def FCMP_SUNE_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_SUNE, "fcmp.sune.s", FPR32>;
+
+// Conversion Instructions
+def FFINT_S_W : FP_CONV<0b0000000100011101000100, "ffint.s.w", FPR32, FPR32>;
+def FTINT_W_S : FP_CONV<0b0000000100011011000001, "ftint.w.s", FPR32, FPR32>;
+def FTINTRM_W_S : FP_CONV<0b0000000100011010000001, "ftintrm.w.s", FPR32,
+ FPR32>;
+def FTINTRP_W_S : FP_CONV<0b0000000100011010010001, "ftintrp.w.s", FPR32,
+ FPR32>;
+def FTINTRZ_W_S : FP_CONV<0b0000000100011010100001, "ftintrz.w.s", FPR32,
+ FPR32>;
+def FTINTRNE_W_S : FP_CONV<0b0000000100011010110001, "ftintrne.w.s", FPR32,
+ FPR32>;
+def FRINT_S : FP_CONV<0b0000000100011110010001, "frint.s", FPR32, FPR32>;
+
+// Move Instructions
+def FSEL_S : FP_SEL<0b00001101000000, "fsel", FPR32>;
+def FMOV_S : FP_MOV<0b0000000100010100100101, "fmov.s", FPR32, FPR32>;
+def MOVGR2FR_W : FP_MOV<0b0000000100010100101001, "movgr2fr.w", FPR32, GPR>;
+def MOVFR2GR_S : FP_MOV<0b0000000100010100101101, "movfr2gr.s", GPR, FPR32>;
+def MOVGR2FCSR : FP_MOV<0b0000000100010100110000, "movgr2fcsr", FCSR, GPR>;
+def MOVFCSR2GR : FP_MOV<0b0000000100010100110010, "movfcsr2gr", GPR, FCSR>;
+def MOVFR2CF_S : FP_MOV<0b0000000100010100110100, "movfr2cf", CFR, FPR32>;
+def MOVCF2FR_S : FP_MOV<0b0000000100010100110101, "movcf2fr", FPR32, CFR>;
+def MOVGR2CF : FP_MOV<0b0000000100010100110110, "movgr2cf", CFR, GPR>;
+def MOVCF2GR : FP_MOV<0b0000000100010100110111, "movcf2gr", GPR, CFR>;
+
+// Branch Instructions
+def BCEQZ : FP_BRANCH<0b01001000, "bceqz">;
+def BCNEZ : FP_BRANCH<0b01001001, "bcnez">;
+
+// Common Memory Access Instructions
+def FLD_S : FP_LOAD_2RI12<0b0010101100, "fld.s", FPR32>;
+def FST_S : FP_STORE_2RI12<0b0010101101, "fst.s", FPR32>;
+def FLDX_S : FP_LOAD_3R<0b00111000001100000, "fldx.s", FPR32>;
+def FSTX_S : FP_STORE_3R<0b00111000001110000, "fstx.s", FPR32>;
+
+// Bound Check Memory Access Instructions
+def FLDGT_S : FP_LOAD_3R<0b00111000011101000, "fldgt.s", FPR32>;
+def FLDLE_S : FP_LOAD_3R<0b00111000011101010, "fldle.s", FPR32>;
+def FSTGT_S : FP_STORE_3R<0b00111000011101100, "fstgt.s", FPR32>;
+def FSTLE_S : FP_STORE_3R<0b00111000011101110, "fstle.s", FPR32>;
+
+} // Predicates = [HasBasicF]
+
+//===----------------------------------------------------------------------===//
+// Pseudo-instructions and codegen patterns
+//===----------------------------------------------------------------------===//
+
+/// Generic pattern classes
+
+class PatFpr<SDPatternOperator OpNode, LAInst Inst, RegisterClass RegTy>
+ : Pat<(OpNode RegTy:$fj), (Inst $fj)>;
+class PatFprFpr<SDPatternOperator OpNode, LAInst Inst, RegisterClass RegTy>
+ : Pat<(OpNode RegTy:$fj, RegTy:$fk), (Inst $fj, $fk)>;
+
+let Predicates = [HasBasicF] in {
+
+/// Float arithmetic operations
+
+def : PatFprFpr<fadd, FADD_S, FPR32>;
+def : PatFprFpr<fsub, FSUB_S, FPR32>;
+def : PatFprFpr<fmul, FMUL_S, FPR32>;
+def : PatFprFpr<fdiv, FDIV_S, FPR32>;
+def : PatFpr<fneg, FNEG_S, FPR32>;
+
+/// Setcc
+
+// Match non-signaling comparison
+
+// TODO: change setcc to any_fsetcc after call is supported because
+// we need to call llvm.experimental.constrained.fcmp.f32 in testcase.
+// See RISCV float-fcmp-strict.ll for reference.
+class PatFPSetcc<CondCode cc, LAInst CmpInst, RegisterClass RegTy>
+ : Pat<(setcc RegTy:$fj, RegTy:$fk, cc),
+ (MOVCF2GR (CmpInst RegTy:$fj, RegTy:$fk))>;
+// SETOGT/SETOGE/SETUGT/SETUGE will expand into SETOLT/SETOLE/SETULT/SETULE.
+def : PatFPSetcc<SETOEQ, FCMP_CEQ_S, FPR32>;
+def : PatFPSetcc<SETOLT, FCMP_CLT_S, FPR32>;
+def : PatFPSetcc<SETOLE, FCMP_CLE_S, FPR32>;
+def : PatFPSetcc<SETONE, FCMP_CNE_S, FPR32>;
+def : PatFPSetcc<SETO, FCMP_COR_S, FPR32>;
+def : PatFPSetcc<SETUEQ, FCMP_CUEQ_S, FPR32>;
+def : PatFPSetcc<SETULT, FCMP_CULT_S, FPR32>;
+def : PatFPSetcc<SETULE, FCMP_CULE_S, FPR32>;
+def : PatFPSetcc<SETUNE, FCMP_CUNE_S, FPR32>;
+def : PatFPSetcc<SETUO, FCMP_CUN_S, FPR32>;
+
+// TODO: Match signaling comparison strict_fsetccs with FCMP_S*_S instructions.
+
+/// Select
+
+def : Pat<(select GPR:$cc, FPR32:$fk, FPR32:$fj),
+ (FSEL_S FPR32:$fj, FPR32:$fk, (MOVGR2CF GPR:$cc))>;
+
+/// Selectcc
+
+class PatFPSelectcc<CondCode cc, LAInst CmpInst, LAInst SelInst,
+ RegisterClass RegTy>
+ : Pat<(select (GRLenVT (setcc RegTy:$a, RegTy:$b, cc)), RegTy:$t, RegTy:$f),
+ (SelInst RegTy:$f, RegTy:$t, (CmpInst RegTy:$a, RegTy:$b))>;
+def : PatFPSelectcc<SETOEQ, FCMP_CEQ_S, FSEL_S, FPR32>;
+def : PatFPSelectcc<SETOLT, FCMP_CLT_S, FSEL_S, FPR32>;
+def : PatFPSelectcc<SETOLE, FCMP_CLE_S, FSEL_S, FPR32>;
+def : PatFPSelectcc<SETONE, FCMP_CNE_S, FSEL_S, FPR32>;
+def : PatFPSelectcc<SETO, FCMP_COR_S, FSEL_S, FPR32>;
+def : PatFPSelectcc<SETUEQ, FCMP_CUEQ_S, FSEL_S, FPR32>;
+def : PatFPSelectcc<SETULT, FCMP_CULT_S, FSEL_S, FPR32>;
+def : PatFPSelectcc<SETULE, FCMP_CULE_S, FSEL_S, FPR32>;
+def : PatFPSelectcc<SETUNE, FCMP_CUNE_S, FSEL_S, FPR32>;
+def : PatFPSelectcc<SETUO, FCMP_CUN_S, FSEL_S, FPR32>;
+
+} // Predicates = [HasBasicF]
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
new file mode 100644
index 000000000000..07fa61f4c361
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
@@ -0,0 +1,188 @@
+//=-- LoongArchInstrInfoD.td - Double-Precision Float instr -*- tablegen -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the basic double-precision floating-point instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasBasicD] in {
+
+// Arithmetic Operation Instructions
+def FADD_D : FP_ALU_3R<0b00000001000000010, "fadd.d", FPR64>;
+def FSUB_D : FP_ALU_3R<0b00000001000000110, "fsub.d", FPR64>;
+def FMUL_D : FP_ALU_3R<0b00000001000001010, "fmul.d", FPR64>;
+def FDIV_D : FP_ALU_3R<0b00000001000001110, "fdiv.d", FPR64>;
+def FMADD_D : FP_ALU_4R<0b000010000010, "fmadd.d", FPR64>;
+def FMSUB_D : FP_ALU_4R<0b000010000110, "fmsub.d", FPR64>;
+def FNMADD_D : FP_ALU_4R<0b000010001010, "fnmadd.d", FPR64>;
+def FNMSUB_D : FP_ALU_4R<0b000010001110, "fnmsub.d", FPR64>;
+def FMAX_D : FP_ALU_3R<0b00000001000010010, "fmax.d", FPR64>;
+def FMIN_D : FP_ALU_3R<0b00000001000010110, "fmin.d", FPR64>;
+def FMAXA_D : FP_ALU_3R<0b00000001000011010, "fmaxa.d", FPR64>;
+def FMINA_D : FP_ALU_3R<0b00000001000011110, "fmina.d", FPR64>;
+def FABS_D : FP_ALU_2R<0b0000000100010100000010, "fabs.d", FPR64>;
+def FNEG_D : FP_ALU_2R<0b0000000100010100000110, "fneg.d", FPR64>;
+def FSQRT_D : FP_ALU_2R<0b0000000100010100010010, "fsqrt.d", FPR64>;
+def FRECIP_D : FP_ALU_2R<0b0000000100010100010110, "frecip.d", FPR64>;
+def FRSQRT_D : FP_ALU_2R<0b0000000100010100011010, "frsqrt.d", FPR64>;
+def FSCALEB_D : FP_ALU_3R<0b00000001000100010, "fscaleb.d", FPR64>;
+def FLOGB_D : FP_ALU_2R<0b0000000100010100001010, "flogb.d", FPR64>;
+def FCOPYSIGN_D : FP_ALU_3R<0b00000001000100110, "fcopysign.d", FPR64>;
+def FCLASS_D : FP_ALU_2R<0b0000000100010100001110, "fclass.d", FPR64>;
+
+// Comparison Instructions
+def FCMP_CAF_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_CAF, "fcmp.caf.d", FPR64>;
+def FCMP_CUN_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_CUN, "fcmp.cun.d", FPR64>;
+def FCMP_CEQ_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_CEQ, "fcmp.ceq.d", FPR64>;
+def FCMP_CUEQ_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_CUEQ, "fcmp.cueq.d", FPR64>;
+def FCMP_CLT_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_CLT, "fcmp.clt.d", FPR64>;
+def FCMP_CULT_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_CULT, "fcmp.cult.d", FPR64>;
+def FCMP_CLE_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_CLE, "fcmp.cle.d", FPR64>;
+def FCMP_CULE_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_CULE, "fcmp.cule.d", FPR64>;
+def FCMP_CNE_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_CNE, "fcmp.cne.d", FPR64>;
+def FCMP_COR_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_COR, "fcmp.cor.d", FPR64>;
+def FCMP_CUNE_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_CUNE, "fcmp.cune.d", FPR64>;
+def FCMP_SAF_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_SAF, "fcmp.saf.d", FPR64>;
+def FCMP_SUN_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_SUN, "fcmp.sun.d", FPR64>;
+def FCMP_SEQ_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_SEQ, "fcmp.seq.d", FPR64>;
+def FCMP_SUEQ_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_SUEQ, "fcmp.sueq.d", FPR64>;
+def FCMP_SLT_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_SLT, "fcmp.slt.d", FPR64>;
+def FCMP_SULT_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_SULT, "fcmp.sult.d", FPR64>;
+def FCMP_SLE_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_SLE, "fcmp.sle.d", FPR64>;
+def FCMP_SULE_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_SULE, "fcmp.sule.d", FPR64>;
+def FCMP_SNE_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_SNE, "fcmp.sne.d", FPR64>;
+def FCMP_SOR_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_SOR, "fcmp.sor.d", FPR64>;
+def FCMP_SUNE_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_SUNE, "fcmp.sune.d", FPR64>;
+
+// Conversion Instructions
+def FFINT_S_L : FP_CONV<0b0000000100011101000110, "ffint.s.l", FPR32, FPR64>;
+def FTINT_L_S : FP_CONV<0b0000000100011011001001, "ftint.l.s", FPR64, FPR32>;
+def FTINTRM_L_S : FP_CONV<0b0000000100011010001001, "ftintrm.l.s", FPR64,
+ FPR32>;
+def FTINTRP_L_S : FP_CONV<0b0000000100011010011001, "ftintrp.l.s", FPR64,
+ FPR32>;
+def FTINTRZ_L_S : FP_CONV<0b0000000100011010101001, "ftintrz.l.s", FPR64,
+ FPR32>;
+def FTINTRNE_L_S : FP_CONV<0b0000000100011010111001, "ftintrne.l.s", FPR64,
+ FPR32>;
+def FCVT_S_D : FP_CONV<0b0000000100011001000110, "fcvt.s.d", FPR32, FPR64>;
+def FCVT_D_S : FP_CONV<0b0000000100011001001001, "fcvt.d.s", FPR64, FPR32>;
+def FFINT_D_W : FP_CONV<0b0000000100011101001000, "ffint.d.w", FPR64, FPR32>;
+def FFINT_D_L : FP_CONV<0b0000000100011101001010, "ffint.d.l", FPR64, FPR64>;
+def FTINT_W_D : FP_CONV<0b0000000100011011000010, "ftint.w.d", FPR32, FPR64>;
+def FTINT_L_D : FP_CONV<0b0000000100011011001010, "ftint.l.d", FPR64, FPR64>;
+def FTINTRM_W_D : FP_CONV<0b0000000100011010000010, "ftintrm.w.d", FPR32,
+ FPR64>;
+def FTINTRM_L_D : FP_CONV<0b0000000100011010001010, "ftintrm.l.d", FPR64,
+ FPR64>;
+def FTINTRP_W_D : FP_CONV<0b0000000100011010010010, "ftintrp.w.d", FPR32,
+ FPR64>;
+def FTINTRP_L_D : FP_CONV<0b0000000100011010011010, "ftintrp.l.d", FPR64,
+ FPR64>;
+def FTINTRZ_W_D : FP_CONV<0b0000000100011010100010, "ftintrz.w.d", FPR32,
+ FPR64>;
+def FTINTRZ_L_D : FP_CONV<0b0000000100011010101010, "ftintrz.l.d", FPR64,
+ FPR64>;
+def FTINTRNE_W_D : FP_CONV<0b0000000100011010110010, "ftintrne.w.d", FPR32,
+ FPR64>;
+def FTINTRNE_L_D : FP_CONV<0b0000000100011010111010, "ftintrne.l.d", FPR64,
+ FPR64>;
+def FRINT_D : FP_CONV<0b0000000100011110010010, "frint.d", FPR64, FPR64>;
+
+// Move Instructions
+def FMOV_D : FP_MOV<0b0000000100010100100110, "fmov.d", FPR64, FPR64>;
+def MOVFRH2GR_S : FP_MOV<0b0000000100010100101111, "movfrh2gr.s", GPR, FPR64>;
+let isCodeGenOnly = 1 in {
+def MOVFR2GR_S_64 : FP_MOV<0b0000000100010100101101, "movfr2gr.s", GPR, FPR64>;
+def FSEL_D : FP_SEL<0b00001101000000, "fsel", FPR64>;
+} // isCodeGenOnly = 1
+let Constraints = "$dst = $out" in {
+def MOVGR2FRH_W : FPFmtMOV<0b0000000100010100101011, (outs FPR64:$out),
+ (ins FPR64:$dst, GPR:$src), "movgr2frh.w",
+ "$dst, $src">;
+} // Constraints = "$dst = $out"
+
+// Common Memory Access Instructions
+def FLD_D : FP_LOAD_2RI12<0b0010101110, "fld.d", FPR64>;
+def FST_D : FP_STORE_2RI12<0b0010101111, "fst.d", FPR64>;
+def FLDX_D : FP_LOAD_3R<0b00111000001101000, "fldx.d", FPR64>;
+def FSTX_D : FP_STORE_3R<0b00111000001111000, "fstx.d", FPR64>;
+
+// Bound Check Memory Access Instructions
+def FLDGT_D : FP_LOAD_3R<0b00111000011101001, "fldgt.d", FPR64>;
+def FLDLE_D : FP_LOAD_3R<0b00111000011101011, "fldle.d", FPR64>;
+def FSTGT_D : FP_STORE_3R<0b00111000011101101, "fstgt.d", FPR64>;
+def FSTLE_D : FP_STORE_3R<0b00111000011101111, "fstle.d", FPR64>;
+
+} // Predicates = [HasBasicD]
+
+// Instructions only available on LA64
+let Predicates = [HasBasicD, IsLA64] in {
+def MOVGR2FR_D : FP_MOV<0b0000000100010100101010, "movgr2fr.d", FPR64, GPR>;
+def MOVFR2GR_D : FP_MOV<0b0000000100010100101110, "movfr2gr.d", GPR, FPR64>;
+} // Predicates = [HasBasicD, IsLA64]
+
+//===----------------------------------------------------------------------===//
+// Pseudo-instructions and codegen patterns
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasBasicD] in {
+
+/// Float arithmetic operations
+
+def : PatFprFpr<fadd, FADD_D, FPR64>;
+def : PatFprFpr<fsub, FSUB_D, FPR64>;
+def : PatFprFpr<fmul, FMUL_D, FPR64>;
+def : PatFprFpr<fdiv, FDIV_D, FPR64>;
+def : PatFpr<fneg, FNEG_D, FPR64>;
+
+/// Setcc
+
+// Match non-signaling comparison
+
+// TODO: Change setcc to any_fsetcc after call is supported because
+// we need to call llvm.experimental.constrained.fcmp.f64 in testcase.
+// See RISCV float-fcmp-strict.ll for reference.
+
+// SETOGT/SETOGE/SETUGT/SETUGE will expand into SETOLT/SETOLE/SETULT/SETULE.
+def : PatFPSetcc<SETOEQ, FCMP_CEQ_D, FPR64>;
+def : PatFPSetcc<SETOLT, FCMP_CLT_D, FPR64>;
+def : PatFPSetcc<SETOLE, FCMP_CLE_D, FPR64>;
+def : PatFPSetcc<SETONE, FCMP_CNE_D, FPR64>;
+def : PatFPSetcc<SETO, FCMP_COR_D, FPR64>;
+def : PatFPSetcc<SETUEQ, FCMP_CUEQ_D, FPR64>;
+def : PatFPSetcc<SETULT, FCMP_CULT_D, FPR64>;
+def : PatFPSetcc<SETULE, FCMP_CULE_D, FPR64>;
+def : PatFPSetcc<SETUNE, FCMP_CUNE_D, FPR64>;
+def : PatFPSetcc<SETUO, FCMP_CUN_D, FPR64>;
+
+// TODO: Match signaling comparison strict_fsetccs with FCMP_S*_D instructions.
+
+/// Select
+
+def : Pat<(select GPR:$cc, FPR64:$fk, FPR64:$fj),
+ (FSEL_D FPR64:$fj, FPR64:$fk, (MOVGR2CF GPR:$cc))>;
+
+/// Selectcc
+
+def : PatFPSelectcc<SETOEQ, FCMP_CEQ_D, FSEL_D, FPR64>;
+def : PatFPSelectcc<SETOLT, FCMP_CLT_D, FSEL_D, FPR64>;
+def : PatFPSelectcc<SETOLE, FCMP_CLE_D, FSEL_D, FPR64>;
+def : PatFPSelectcc<SETONE, FCMP_CNE_D, FSEL_D, FPR64>;
+def : PatFPSelectcc<SETO, FCMP_COR_D, FSEL_D, FPR64>;
+def : PatFPSelectcc<SETUEQ, FCMP_CUEQ_D, FSEL_D, FPR64>;
+def : PatFPSelectcc<SETULT, FCMP_CULT_D, FSEL_D, FPR64>;
+def : PatFPSelectcc<SETULE, FCMP_CULE_D, FSEL_D, FPR64>;
+def : PatFPSelectcc<SETUNE, FCMP_CUNE_D, FSEL_D, FPR64>;
+def : PatFPSelectcc<SETUO, FCMP_CUN_D, FSEL_D, FPR64>;
+
+} // Predicates = [HasBasicD]
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td b/llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td
new file mode 100644
index 000000000000..d2ba1fdfffe4
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td
@@ -0,0 +1,241 @@
+//==- LoongArchInstrFormatsF.td - LoongArch FP Instr Formats -*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Describe LoongArch floating-point instructions format
+//
+// opcode - operation code.
+// fd - destination register operand.
+// {c/f}{j/k/a} - source register operand.
+// immN - immediate data operand.
+//
+//===----------------------------------------------------------------------===//
+
+// 2R-type
+// <opcode | fj | fd>
+class FPFmt2R<bits<22> op, dag outs, dag ins, string opcstr, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ bits<5> fj;
+ bits<5> fd;
+
+ let Inst{31-10} = op;
+ let Inst{9-5} = fj;
+ let Inst{4-0} = fd;
+}
+
+// 3R-type
+// <opcode | fk | fj | fd>
+class FPFmt3R<bits<17> op, dag outs, dag ins, string opcstr, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ bits<5> fk;
+ bits<5> fj;
+ bits<5> fd;
+
+ let Inst{31-15} = op;
+ let Inst{14-10} = fk;
+ let Inst{9-5} = fj;
+ let Inst{4-0} = fd;
+}
+
+// 4R-type
+// <opcode | fa | fk | fj | fd>
+class FPFmt4R<bits<12> op, dag outs, dag ins, string opcstr, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ bits<5> fa;
+ bits<5> fk;
+ bits<5> fj;
+ bits<5> fd;
+
+ let Inst{31-20} = op;
+ let Inst{19-15} = fa;
+ let Inst{14-10} = fk;
+ let Inst{9-5} = fj;
+ let Inst{4-0} = fd;
+}
+
+// 2RI12-type
+// <opcode | I12 | rj | fd>
+class FPFmt2RI12<bits<10> op, dag outs, dag ins, string opcstr, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ bits<12> imm12;
+ bits<5> rj;
+ bits<5> fd;
+
+ let Inst{31-22} = op;
+ let Inst{21-10} = imm12;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = fd;
+}
+
+// FmtFCMP
+// <opcode | cond | fk | fj | 0b00 | cd>
+class FPFmtFCMP<bits<12> op, bits<5> cond, dag outs, dag ins, string opcstr,
+ string opnstr, list<dag> pattern = []>
+ : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ bits<5> fk;
+ bits<5> fj;
+ bits<3> cd;
+
+ let Inst{31-20} = op;
+ let Inst{19-15} = cond;
+ let Inst{14-10} = fk;
+ let Inst{9-5} = fj;
+ let Inst{4-3} = 0b00;
+ let Inst{2-0} = cd;
+}
+
+// FPFmtBR
+// <opcode[7:2] | I21[15:0] | opcode[1:0] | cj | I21[20:16]>
+class FPFmtBR<bits<8> opcode, dag outs, dag ins, string opcstr,
+ string opnstr, list<dag> pattern = []>
+ : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ bits<21> imm21;
+ bits<3> cj;
+
+ let Inst{31-26} = opcode{7-2};
+ let Inst{25-10} = imm21{15-0};
+ let Inst{9-8} = opcode{1-0};
+ let Inst{7-5} = cj;
+ let Inst{4-0} = imm21{20-16};
+}
+
+// FmtFSEL
+// <opcode | ca | fk | fj | fd>
+class FPFmtFSEL<bits<14> op, dag outs, dag ins, string opcstr, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ bits<3> ca;
+ bits<5> fk;
+ bits<5> fj;
+ bits<5> fd;
+
+ let Inst{31-18} = op;
+ let Inst{17-15} = ca;
+ let Inst{14-10} = fk;
+ let Inst{9-5} = fj;
+ let Inst{4-0} = fd;
+}
+
+// FPFmtMOV
+// <opcode | src | dst>
+class FPFmtMOV<bits<22> op, dag outs, dag ins, string opcstr, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ bits<5> src;
+ bits<5> dst;
+
+ let Inst{31-10} = op;
+ let Inst{9-5} = src;
+ let Inst{4-0} = dst;
+}
+
+// FPFmtMEM
+// <opcode | rk | rj | fd>
+class FPFmtMEM<bits<17> op, dag outs, dag ins, string opcstr, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ bits<5> rk;
+ bits<5> rj;
+ bits<5> fd;
+
+ let Inst{31-15} = op;
+ let Inst{14-10} = rk;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = fd;
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction class templates
+//===----------------------------------------------------------------------===//
+
+class FP_ALU_2R<bits<22> op, string opstr, RegisterClass rc>
+ : FPFmt2R<op, (outs rc:$fd), (ins rc:$fj), opstr, "$fd, $fj">;
+
+class FP_ALU_3R<bits<17> op, string opstr, RegisterClass rc>
+ : FPFmt3R<op, (outs rc:$fd), (ins rc:$fj, rc:$fk), opstr, "$fd, $fj, $fk">;
+
+class FP_ALU_4R<bits<12> op, string opstr, RegisterClass rc>
+ : FPFmt4R<op, (outs rc:$fd), (ins rc:$fj, rc:$fk, rc:$fa), opstr,
+ "$fd, $fj, $fk, $fa">;
+
+class FPCMPOpc<bits<12> value> {
+ bits<12> val = value;
+}
+
+class FPCMPCond<bits<5> value> {
+ bits<5> val = value;
+}
+
+class FP_CMP<FPCMPOpc op, FPCMPCond cond, string opstr, RegisterClass rc>
+ : FPFmtFCMP<op.val, cond.val, (outs CFR:$cd), (ins rc:$fj, rc:$fk), opstr,
+ "$cd, $fj, $fk">;
+
+class FP_CONV<bits<22> op, string opstr, RegisterClass rcd, RegisterClass rcs>
+ : FPFmt2R<op, (outs rcd:$fd), (ins rcs:$fj), opstr, "$fd, $fj">;
+
+class FP_MOV<bits<22> op, string opstr, RegisterClass rcd, RegisterClass rcs>
+ : FPFmtMOV<op, (outs rcd:$dst), (ins rcs:$src), opstr, "$dst, $src">;
+
+class FP_SEL<bits<14> op, string opstr, RegisterClass rc>
+ : FPFmtFSEL<op, (outs rc:$fd), (ins rc:$fj, rc:$fk, CFR:$ca), opstr,
+ "$fd, $fj, $fk, $ca">;
+
+class FP_BRANCH<bits<8> opcode, string opstr>
+ : FPFmtBR<opcode, (outs), (ins CFR:$cj, simm21_lsl2:$imm21), opstr,
+ "$cj, $imm21"> {
+ let isBranch = 1;
+ let isTerminator = 1;
+}
+
+let mayLoad = 1 in {
+class FP_LOAD_3R<bits<17> op, string opstr, RegisterClass rc>
+ : FPFmtMEM<op, (outs rc:$fd), (ins GPR:$rj, GPR:$rk), opstr,
+ "$fd, $rj, $rk">;
+class FP_LOAD_2RI12<bits<10> op, string opstr, RegisterClass rc>
+ : FPFmt2RI12<op, (outs rc:$fd), (ins GPR:$rj, simm12:$imm12), opstr,
+ "$fd, $rj, $imm12">;
+} // mayLoad = 1
+
+let mayStore = 1 in {
+class FP_STORE_3R<bits<17> op, string opstr, RegisterClass rc>
+ : FPFmtMEM<op, (outs), (ins rc:$fd, GPR:$rj, GPR:$rk), opstr,
+ "$fd, $rj, $rk">;
+class FP_STORE_2RI12<bits<10> op, string opstr, RegisterClass rc>
+ : FPFmt2RI12<op, (outs), (ins rc:$fd, GPR:$rj, simm12:$imm12), opstr,
+ "$fd, $rj, $imm12">;
+} // mayStore = 1
+
+def FPCMP_OPC_S : FPCMPOpc<0b000011000001>;
+def FPCMP_OPC_D : FPCMPOpc<0b000011000010>;
+
+def FPCMP_COND_CAF : FPCMPCond<0x0>;
+def FPCMP_COND_CUN : FPCMPCond<0x8>;
+def FPCMP_COND_CEQ : FPCMPCond<0x4>;
+def FPCMP_COND_CUEQ : FPCMPCond<0xC>;
+def FPCMP_COND_CLT : FPCMPCond<0x2>;
+def FPCMP_COND_CULT : FPCMPCond<0xA>;
+def FPCMP_COND_CLE : FPCMPCond<0x6>;
+def FPCMP_COND_CULE : FPCMPCond<0xE>;
+def FPCMP_COND_CNE : FPCMPCond<0x10>;
+def FPCMP_COND_COR : FPCMPCond<0x14>;
+def FPCMP_COND_CUNE : FPCMPCond<0x18>;
+def FPCMP_COND_SAF : FPCMPCond<0x1>;
+def FPCMP_COND_SUN : FPCMPCond<0x9>;
+def FPCMP_COND_SEQ : FPCMPCond<0x5>;
+def FPCMP_COND_SUEQ : FPCMPCond<0xD>;
+def FPCMP_COND_SLT : FPCMPCond<0x3>;
+def FPCMP_COND_SULT : FPCMPCond<0xB>;
+def FPCMP_COND_SLE : FPCMPCond<0x7>;
+def FPCMP_COND_SULE : FPCMPCond<0xF>;
+def FPCMP_COND_SNE : FPCMPCond<0x11>;
+def FPCMP_COND_SOR : FPCMPCond<0x15>;
+def FPCMP_COND_SUNE : FPCMPCond<0x19>;
diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp
new file mode 100644
index 000000000000..7182d55ca3cf
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp
@@ -0,0 +1,55 @@
+//===-- LoongArchFrameLowering.cpp - LoongArch Frame Information -*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the LoongArch implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LoongArchFrameLowering.h"
+#include "LoongArchSubtarget.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/MC/MCDwarf.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "loongarch-frame-lowering"
+
+// Return true if the specified function should have a dedicated frame
+// pointer register. This is true if frame pointer elimination is
+// disabled, if it needs dynamic stack realignment, if the function has
+// variable sized allocas, or if the frame address is taken.
+bool LoongArchFrameLowering::hasFP(const MachineFunction &MF) const {
+ const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ return MF.getTarget().Options.DisableFramePointerElim(MF) ||
+ RegInfo->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
+ MFI.isFrameAddressTaken();
+}
+
+bool LoongArchFrameLowering::hasBP(const MachineFunction &MF) const {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+
+ return MFI.hasVarSizedObjects() && TRI->hasStackRealignment(MF);
+}
+
+void LoongArchFrameLowering::emitPrologue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ // TODO: Implement this when we have function calls
+}
+
+void LoongArchFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ // TODO: Implement this when we have function calls
+}
diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h
new file mode 100644
index 000000000000..25c53efc10f1
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h
@@ -0,0 +1,38 @@
+//=- LoongArchFrameLowering.h - TargetFrameLowering for LoongArch -*- C++ -*--//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements LoongArch-specific bits of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHFRAMELOWERING_H
+#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHFRAMELOWERING_H
+
+#include "llvm/CodeGen/TargetFrameLowering.h"
+
+namespace llvm {
+class LoongArchSubtarget;
+
+class LoongArchFrameLowering : public TargetFrameLowering {
+ const LoongArchSubtarget &STI;
+
+public:
+ explicit LoongArchFrameLowering(const LoongArchSubtarget &STI)
+ : TargetFrameLowering(StackGrowsDown,
+ /*StackAlignment=*/Align(16),
+ /*LocalAreaOffset=*/0),
+ STI(STI) {}
+
+ void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+
+ bool hasFP(const MachineFunction &MF) const override;
+ bool hasBP(const MachineFunction &MF) const;
+};
+} // namespace llvm
+#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHFRAMELOWERING_H
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
new file mode 100644
index 000000000000..cc9ea0255d98
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
@@ -0,0 +1,132 @@
+//=- LoongArchISelDAGToDAG.cpp - A dag to dag inst selector for LoongArch -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the LoongArch target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LoongArchISelDAGToDAG.h"
+#include "LoongArchISelLowering.h"
+#include "MCTargetDesc/LoongArchMCTargetDesc.h"
+#include "MCTargetDesc/LoongArchMatInt.h"
+#include "llvm/Support/KnownBits.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "loongarch-isel"
+
+void LoongArchDAGToDAGISel::Select(SDNode *Node) {
+ // If we have a custom node, we have already selected.
+ if (Node->isMachineOpcode()) {
+ LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
+ Node->setNodeId(-1);
+ return;
+ }
+
+ // Instruction Selection not handled by the auto-generated tablegen selection
+ // should be handled here.
+ unsigned Opcode = Node->getOpcode();
+ MVT GRLenVT = Subtarget->getGRLenVT();
+ SDLoc DL(Node);
+
+ switch (Opcode) {
+ default:
+ break;
+ case ISD::Constant: {
+ int64_t Imm = cast<ConstantSDNode>(Node)->getSExtValue();
+ if (Imm == 0 && Node->getSimpleValueType(0) == GRLenVT) {
+ SDValue New = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
+ LoongArch::R0, GRLenVT);
+ ReplaceNode(Node, New.getNode());
+ return;
+ }
+ SDNode *Result = nullptr;
+ SDValue SrcReg = CurDAG->getRegister(LoongArch::R0, GRLenVT);
+ // The instructions in the sequence are handled here.
+ for (LoongArchMatInt::Inst &Inst : LoongArchMatInt::generateInstSeq(Imm)) {
+ SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, GRLenVT);
+ if (Inst.Opc == LoongArch::LU12I_W)
+ Result = CurDAG->getMachineNode(LoongArch::LU12I_W, DL, GRLenVT, SDImm);
+ else
+ Result = CurDAG->getMachineNode(Inst.Opc, DL, GRLenVT, SrcReg, SDImm);
+ SrcReg = SDValue(Result, 0);
+ }
+
+ ReplaceNode(Node, Result);
+ return;
+ }
+ // TODO: Add selection nodes needed later.
+ }
+
+ // Select the default instruction.
+ SelectCode(Node);
+}
+
+bool LoongArchDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
+ SDValue &ShAmt) {
+ // Shift instructions on LoongArch only read the lower 5 or 6 bits of the
+ // shift amount. If there is an AND on the shift amount, we can bypass it if
+ // it doesn't affect any of those bits.
+ if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
+ const APInt &AndMask = N->getConstantOperandAPInt(1);
+
+ // Since the max shift amount is a power of 2 we can subtract 1 to make a
+ // mask that covers the bits needed to represent all shift amounts.
+ assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
+ APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
+
+ if (ShMask.isSubsetOf(AndMask)) {
+ ShAmt = N.getOperand(0);
+ return true;
+ }
+
+ // SimplifyDemandedBits may have optimized the mask so try restoring any
+ // bits that are known zero.
+ KnownBits Known = CurDAG->computeKnownBits(N->getOperand(0));
+ if (ShMask.isSubsetOf(AndMask | Known.Zero)) {
+ ShAmt = N.getOperand(0);
+ return true;
+ }
+ } else if (N.getOpcode() == LoongArchISD::BSTRPICK) {
+ // Similar to the above AND, if there is a BSTRPICK on the shift amount, we
+ // can bypass it.
+ assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
+ assert(isa<ConstantSDNode>(N.getOperand(1)) && "Illegal msb operand!");
+ assert(isa<ConstantSDNode>(N.getOperand(2)) && "Illegal lsb operand!");
+ uint64_t msb = N.getConstantOperandVal(1), lsb = N.getConstantOperandVal(2);
+ if (lsb == 0 && Log2_32(ShiftWidth) <= msb + 1) {
+ ShAmt = N.getOperand(0);
+ return true;
+ }
+ } else if (N.getOpcode() == ISD::SUB &&
+ isa<ConstantSDNode>(N.getOperand(0))) {
+ uint64_t Imm = N.getConstantOperandVal(0);
+ // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
+ // generate a NEG instead of a SUB of a constant.
+ if (Imm != 0 && Imm % ShiftWidth == 0) {
+ SDLoc DL(N);
+ EVT VT = N.getValueType();
+ SDValue Zero =
+ CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, LoongArch::R0, VT);
+ unsigned NegOpc = VT == MVT::i64 ? LoongArch::SUB_D : LoongArch::SUB_W;
+ MachineSDNode *Neg =
+ CurDAG->getMachineNode(NegOpc, DL, VT, Zero, N.getOperand(1));
+ ShAmt = SDValue(Neg, 0);
+ return true;
+ }
+ }
+
+ ShAmt = N;
+ return true;
+}
+
+// This pass converts a legalized DAG into a LoongArch-specific DAG, ready
+// for instruction scheduling.
+FunctionPass *llvm::createLoongArchISelDag(LoongArchTargetMachine &TM) {
+ return new LoongArchDAGToDAGISel(TM);
+}
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
new file mode 100644
index 000000000000..f477129d933c
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
@@ -0,0 +1,55 @@
+//=- LoongArchISelDAGToDAG.h - A dag to dag inst selector for LoongArch ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the LoongArch target.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHISELDAGTODAG_H
+#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHISELDAGTODAG_H
+
+#include "LoongArch.h"
+#include "LoongArchTargetMachine.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+
+// LoongArch-specific code to select LoongArch machine instructions for
+// SelectionDAG operations.
+namespace llvm {
+class LoongArchDAGToDAGISel : public SelectionDAGISel {
+ const LoongArchSubtarget *Subtarget = nullptr;
+
+public:
+ explicit LoongArchDAGToDAGISel(LoongArchTargetMachine &TM)
+ : SelectionDAGISel(TM) {}
+
+ StringRef getPassName() const override {
+ return "LoongArch DAG->DAG Pattern Instruction Selection";
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ Subtarget = &MF.getSubtarget<LoongArchSubtarget>();
+ return SelectionDAGISel::runOnMachineFunction(MF);
+ }
+
+ void Select(SDNode *Node) override;
+
+ bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt);
+ bool selectShiftMaskGRLen(SDValue N, SDValue &ShAmt) {
+ return selectShiftMask(N, Subtarget->getGRLen(), ShAmt);
+ }
+ bool selectShiftMask32(SDValue N, SDValue &ShAmt) {
+ return selectShiftMask(N, 32, ShAmt);
+ }
+
+// Include the pieces autogenerated from the target description.
+#include "LoongArchGenDAGISel.inc"
+};
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHISELDAGTODAG_H
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
new file mode 100644
index 000000000000..d5a469216859
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -0,0 +1,531 @@
+//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that LoongArch uses to lower LLVM code into
+// a selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LoongArchISelLowering.h"
+#include "LoongArch.h"
+#include "LoongArchMachineFunctionInfo.h"
+#include "LoongArchRegisterInfo.h"
+#include "LoongArchSubtarget.h"
+#include "LoongArchTargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "loongarch-isel-lowering"
+
+LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
+ const LoongArchSubtarget &STI)
+ : TargetLowering(TM), Subtarget(STI) {
+
+ MVT GRLenVT = Subtarget.getGRLenVT();
+ // Set up the register classes.
+ addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
+ if (Subtarget.hasBasicF())
+ addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
+ if (Subtarget.hasBasicD())
+ addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
+
+ // TODO: add necessary setOperationAction calls later.
+ setOperationAction(ISD::SHL_PARTS, GRLenVT, Custom);
+ setOperationAction(ISD::SRA_PARTS, GRLenVT, Custom);
+ setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom);
+
+ if (Subtarget.is64Bit()) {
+ setOperationAction(ISD::SHL, MVT::i32, Custom);
+ setOperationAction(ISD::SRA, MVT::i32, Custom);
+ setOperationAction(ISD::SRL, MVT::i32, Custom);
+ }
+
+ static const ISD::CondCode FPCCToExpand[] = {ISD::SETOGT, ISD::SETOGE,
+ ISD::SETUGT, ISD::SETUGE};
+
+ if (Subtarget.hasBasicF()) {
+ setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
+ }
+ if (Subtarget.hasBasicD()) {
+ setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
+ }
+
+ setOperationAction(ISD::SELECT_CC, GRLenVT, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ // Compute derived properties from the register classes.
+ computeRegisterProperties(STI.getRegisterInfo());
+
+ setStackPointerRegisterToSaveRestore(LoongArch::R3);
+
+ setBooleanContents(ZeroOrOneBooleanContent);
+
+ // Function alignments.
+ const Align FunctionAlignment(4);
+ setMinFunctionAlignment(FunctionAlignment);
+
+ setTargetDAGCombine(ISD::AND);
+ setTargetDAGCombine(ISD::SRL);
+}
+
+SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
+ SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ default:
+ report_fatal_error("unimplemented operand");
+ case ISD::SHL_PARTS:
+ return lowerShiftLeftParts(Op, DAG);
+ case ISD::SRA_PARTS:
+ return lowerShiftRightParts(Op, DAG, true);
+ case ISD::SRL_PARTS:
+ return lowerShiftRightParts(Op, DAG, false);
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ // This can be called for an i32 shift amount that needs to be promoted.
+ assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
+ "Unexpected custom legalisation");
+ return SDValue();
+ }
+}
+
+SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ SDValue Lo = Op.getOperand(0);
+ SDValue Hi = Op.getOperand(1);
+ SDValue Shamt = Op.getOperand(2);
+ EVT VT = Lo.getValueType();
+
+ // if Shamt-GRLen < 0: // Shamt < GRLen
+ // Lo = Lo << Shamt
+ // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
+ // else:
+ // Lo = 0
+ // Hi = Lo << (Shamt-GRLen)
+
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ SDValue One = DAG.getConstant(1, DL, VT);
+ SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
+ SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
+ SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
+ SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
+
+ SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
+ SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
+ SDValue ShiftRightLo =
+ DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
+ SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
+ SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
+ SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
+
+ SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
+
+ Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
+ Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
+
+ SDValue Parts[2] = {Lo, Hi};
+ return DAG.getMergeValues(Parts, DL);
+}
+
+SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
+ SelectionDAG &DAG,
+ bool IsSRA) const {
+ SDLoc DL(Op);
+ SDValue Lo = Op.getOperand(0);
+ SDValue Hi = Op.getOperand(1);
+ SDValue Shamt = Op.getOperand(2);
+ EVT VT = Lo.getValueType();
+
+ // SRA expansion:
+ // if Shamt-GRLen < 0: // Shamt < GRLen
+ // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
+ // Hi = Hi >>s Shamt
+ // else:
+ // Lo = Hi >>s (Shamt-GRLen);
+ // Hi = Hi >>s (GRLen-1)
+ //
+ // SRL expansion:
+ // if Shamt-GRLen < 0: // Shamt < GRLen
+ // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
+ // Hi = Hi >>u Shamt
+ // else:
+ // Lo = Hi >>u (Shamt-GRLen);
+ // Hi = 0;
+
+ unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
+
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ SDValue One = DAG.getConstant(1, DL, VT);
+ SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
+ SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
+ SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
+ SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
+
+ SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
+ SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
+ SDValue ShiftLeftHi =
+ DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
+ SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
+ SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
+ SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
+ SDValue HiFalse =
+ IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
+
+ SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
+
+ Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
+ Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
+
+ SDValue Parts[2] = {Lo, Hi};
+ return DAG.getMergeValues(Parts, DL);
+}
+
+// Returns the opcode of the target-specific SDNode that implements the 32-bit
+// form of the given Opcode.
+static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ llvm_unreachable("Unexpected opcode");
+ case ISD::SHL:
+ return LoongArchISD::SLL_W;
+ case ISD::SRA:
+ return LoongArchISD::SRA_W;
+ case ISD::SRL:
+ return LoongArchISD::SRL_W;
+ }
+}
+
+// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
+// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
+// otherwise be promoted to i64, making it difficult to select the
+// SLL_W/.../*W later one because the fact the operation was originally of
+// type i8/i16/i32 is lost.
+static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG,
+ unsigned ExtOpc = ISD::ANY_EXTEND) {
+ SDLoc DL(N);
+ LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
+ SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
+ SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
+ SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
+ // ReplaceNodeResults requires we maintain the same type for the return value.
+ return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
+}
+
+void LoongArchTargetLowering::ReplaceNodeResults(
+ SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
+ SDLoc DL(N);
+ switch (N->getOpcode()) {
+ default:
+ llvm_unreachable("Don't know how to legalize this operation");
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
+ "Unexpected custom legalisation");
+ if (N->getOperand(1).getOpcode() != ISD::Constant) {
+ Results.push_back(customLegalizeToWOp(N, DAG));
+ break;
+ }
+ break;
+ }
+}
+
+static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const LoongArchSubtarget &Subtarget) {
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ SDValue FirstOperand = N->getOperand(0);
+ SDValue SecondOperand = N->getOperand(1);
+ unsigned FirstOperandOpc = FirstOperand.getOpcode();
+ EVT ValTy = N->getValueType(0);
+ SDLoc DL(N);
+ uint64_t lsb, msb;
+ unsigned SMIdx, SMLen;
+ ConstantSDNode *CN;
+ SDValue NewOperand;
+ MVT GRLenVT = Subtarget.getGRLenVT();
+
+ // Op's second operand must be a shifted mask.
+ if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
+ !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
+ return SDValue();
+
+ if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
+ // Pattern match BSTRPICK.
+ // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
+ // => BSTRPICK $dst, $src, msb, lsb
+ // where msb = lsb + len - 1
+
+ // The second operand of the shift must be an immediate.
+ if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
+ return SDValue();
+
+ lsb = CN->getZExtValue();
+
+ // Return if the shifted mask does not start at bit 0 or the sum of its
+ // length and lsb exceeds the word's size.
+ if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
+ return SDValue();
+
+ NewOperand = FirstOperand.getOperand(0);
+ } else {
+ // Pattern match BSTRPICK.
+ // $dst = and $src, (2**len- 1) , if len > 12
+ // => BSTRPICK $dst, $src, msb, lsb
+ // where lsb = 0 and msb = len - 1
+
+ // If the mask is <= 0xfff, andi can be used instead.
+ if (CN->getZExtValue() <= 0xfff)
+ return SDValue();
+
+ // Return if the mask doesn't start at position 0.
+ if (SMIdx)
+ return SDValue();
+
+ lsb = 0;
+ NewOperand = FirstOperand;
+ }
+ msb = lsb + SMLen - 1;
+ return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
+ DAG.getConstant(msb, DL, GRLenVT),
+ DAG.getConstant(lsb, DL, GRLenVT));
+}
+
+static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const LoongArchSubtarget &Subtarget) {
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ // $dst = srl (and $src, Mask), Shamt
+ // =>
+ // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
+ // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
+ //
+
+ SDValue FirstOperand = N->getOperand(0);
+ ConstantSDNode *CN;
+ EVT ValTy = N->getValueType(0);
+ SDLoc DL(N);
+ MVT GRLenVT = Subtarget.getGRLenVT();
+ unsigned MaskIdx, MaskLen;
+ uint64_t Shamt;
+
+ // The first operand must be an AND and the second operand of the AND must be
+ // a shifted mask.
+ if (FirstOperand.getOpcode() != ISD::AND ||
+ !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
+ !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
+ return SDValue();
+
+ // The second operand (shift amount) must be an immediate.
+ if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
+ return SDValue();
+
+ Shamt = CN->getZExtValue();
+ if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
+ return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
+ FirstOperand->getOperand(0),
+ DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
+ DAG.getConstant(Shamt, DL, GRLenVT));
+
+ return SDValue();
+}
+
+SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ switch (N->getOpcode()) {
+ default:
+ break;
+ case ISD::AND:
+ return performANDCombine(N, DAG, DCI, Subtarget);
+ case ISD::SRL:
+ return performSRLCombine(N, DAG, DCI, Subtarget);
+ }
+ return SDValue();
+}
+
+const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch ((LoongArchISD::NodeType)Opcode) {
+ case LoongArchISD::FIRST_NUMBER:
+ break;
+
+#define NODE_NAME_CASE(node) \
+ case LoongArchISD::node: \
+ return "LoongArchISD::" #node;
+
+ // TODO: Add more target-dependent nodes later.
+ NODE_NAME_CASE(RET)
+ NODE_NAME_CASE(SLL_W)
+ NODE_NAME_CASE(SRA_W)
+ NODE_NAME_CASE(SRL_W)
+ NODE_NAME_CASE(BSTRPICK)
+ }
+#undef NODE_NAME_CASE
+ return nullptr;
+}
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+// FIXME: Now, we only support CallingConv::C with fixed arguments which are
+// passed with integer or floating-point registers.
+const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
+ LoongArch::R7, LoongArch::R8, LoongArch::R9,
+ LoongArch::R10, LoongArch::R11};
+const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
+ LoongArch::F3, LoongArch::F4, LoongArch::F5,
+ LoongArch::F6, LoongArch::F7};
+const MCPhysReg ArgFPR64s[] = {
+ LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
+ LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
+
+// Implements the LoongArch calling convention. Returns true upon failure.
+static bool CC_LoongArch(unsigned ValNo, MVT ValVT,
+ CCValAssign::LocInfo LocInfo, CCState &State) {
+ // Allocate to a register if possible.
+ Register Reg;
+
+ if (ValVT == MVT::f32)
+ Reg = State.AllocateReg(ArgFPR32s);
+ else if (ValVT == MVT::f64)
+ Reg = State.AllocateReg(ArgFPR64s);
+ else
+ Reg = State.AllocateReg(ArgGPRs);
+ if (Reg) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, ValVT, LocInfo));
+ return false;
+ }
+
+ // TODO: Handle arguments passed without register.
+ return true;
+}
+
+void LoongArchTargetLowering::analyzeInputArgs(
+ CCState &CCInfo, const SmallVectorImpl<ISD::InputArg> &Ins,
+ LoongArchCCAssignFn Fn) const {
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ MVT ArgVT = Ins[i].VT;
+
+ if (Fn(i, ArgVT, CCValAssign::Full, CCInfo)) {
+ LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
+ << EVT(ArgVT).getEVTString() << '\n');
+ llvm_unreachable("");
+ }
+ }
+}
+
+void LoongArchTargetLowering::analyzeOutputArgs(
+ CCState &CCInfo, const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LoongArchCCAssignFn Fn) const {
+ for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+ MVT ArgVT = Outs[i].VT;
+
+ if (Fn(i, ArgVT, CCValAssign::Full, CCInfo)) {
+ LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
+ << EVT(ArgVT).getEVTString() << "\n");
+ llvm_unreachable("");
+ }
+ }
+}
+
+static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
+ const CCValAssign &VA, const SDLoc &DL,
+ const LoongArchTargetLowering &TLI) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ EVT LocVT = VA.getLocVT();
+ const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
+ Register VReg = RegInfo.createVirtualRegister(RC);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+
+ return DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
+}
+
+// Transform physical registers into virtual registers.
+SDValue LoongArchTargetLowering::LowerFormalArguments(
+ SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
+ SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
+
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ switch (CallConv) {
+ default:
+ llvm_unreachable("Unsupported calling convention");
+ case CallingConv::C:
+ break;
+ }
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign> ArgLocs;
+ CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
+
+ analyzeInputArgs(CCInfo, Ins, CC_LoongArch);
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
+ InVals.push_back(unpackFromRegLoc(DAG, Chain, ArgLocs[i], DL, *this));
+
+ return Chain;
+}
+
+bool LoongArchTargetLowering::CanLowerReturn(
+ CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
+ // Any return value split in to more than two values can't be returned
+ // directly.
+ return Outs.size() <= 2;
+}
+
+SDValue LoongArchTargetLowering::LowerReturn(
+ SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
+ SelectionDAG &DAG) const {
+ // Stores the assignment of the return value to a location.
+ SmallVector<CCValAssign> RVLocs;
+
+ // Info about the registers and stack slot.
+ CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
+
+ analyzeOutputArgs(CCInfo, Outs, CC_LoongArch);
+
+ SDValue Glue;
+ SmallVector<SDValue, 4> RetOps(1, Chain);
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ // Handle a 'normal' return.
+ Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVals[i], Glue);
+
+ // Guarantee that all emitted copies are stuck together.
+ Glue = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+ }
+
+ RetOps[0] = Chain; // Update chain.
+
+ // Add the glue node if we have it.
+ if (Glue.getNode())
+ RetOps.push_back(Glue);
+
+ return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
+}
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
new file mode 100644
index 000000000000..c852577a3744
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -0,0 +1,95 @@
+//=- LoongArchISelLowering.h - LoongArch DAG Lowering Interface -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that LoongArch uses to lower LLVM code into
+// a selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHISELLOWERING_H
+#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHISELLOWERING_H
+
+#include "LoongArch.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLowering.h"
+
+namespace llvm {
+class LoongArchSubtarget;
+struct LoongArchRegisterInfo;
+namespace LoongArchISD {
+enum NodeType : unsigned {
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ // TODO: add more LoongArchISDs
+ RET,
+ // 32-bit shifts, directly matching the semantics of the named LoongArch
+ // instructions.
+ SLL_W,
+ SRA_W,
+ SRL_W,
+
+ BSTRPICK,
+
+};
+} // namespace LoongArchISD
+
+class LoongArchTargetLowering : public TargetLowering {
+ const LoongArchSubtarget &Subtarget;
+
+public:
+ explicit LoongArchTargetLowering(const TargetMachine &TM,
+ const LoongArchSubtarget &STI);
+
+ const LoongArchSubtarget &getSubtarget() const { return Subtarget; }
+
+ // Provide custom lowering hooks for some operations.
+ SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+ void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const override;
+
+ SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
+
+ // This method returns the name of a target specific DAG node.
+ const char *getTargetNodeName(unsigned Opcode) const override;
+
+ // Lower incoming arguments, copy physregs into vregs.
+ SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
+ bool IsVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ const SDLoc &DL, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const override;
+ bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+ bool IsVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const override;
+ SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
+ SelectionDAG &DAG) const override;
+
+private:
+ /// Target-specific function used to lower LoongArch calling conventions.
+ typedef bool LoongArchCCAssignFn(unsigned ValNo, MVT ValVT,
+ CCValAssign::LocInfo LocInfo,
+ CCState &State);
+
+ void analyzeInputArgs(CCState &CCInfo,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ LoongArchCCAssignFn Fn) const;
+ void analyzeOutputArgs(CCState &CCInfo,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LoongArchCCAssignFn Fn) const;
+
+ SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHISELLOWERING_H
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrFormats.td b/llvm/lib/Target/LoongArch/LoongArchInstrFormats.td
new file mode 100644
index 000000000000..bebc83a861ae
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrFormats.td
@@ -0,0 +1,404 @@
+//===- LoongArchInstrFormats.td - LoongArch Instr. Formats -*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Describe LoongArch instructions format
+//
+// opcode - operation code.
+// rd - destination register operand.
+// r{j/k} - source register operand.
+// immN - immediate data operand.
+//
+//===----------------------------------------------------------------------===//
+
+class LAInst<dag outs, dag ins, string opcstr, string opnstr,
+ list<dag> pattern = []>
+ : Instruction {
+ field bits<32> Inst;
+ // SoftFail is a field the disassembler can use to provide a way for
+ // instructions to not match without killing the whole decode process. It is
+ // mainly used for ARM, but Tablegen expects this field to exist or it fails
+ // to build the decode table.
+ field bits<32> SoftFail = 0;
+
+ let Namespace = "LoongArch";
+ let Size = 4;
+ let OutOperandList = outs;
+ let InOperandList = ins;
+ let AsmString = opcstr # "\t" # opnstr;
+ let Pattern = pattern;
+}
+
+// Pseudo instructions
+class Pseudo<dag outs, dag ins, list<dag> pattern = [], string opcstr = "",
+ string opnstr = "">
+ : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ let isPseudo = 1;
+ let isCodeGenOnly = 1;
+}
+
+// 2R-type
+// <opcode | rj | rd>
+class Fmt2R<bits<22> op, dag outs, dag ins, string opcstr, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ bits<5> rj;
+ bits<5> rd;
+
+ let Inst{31-10} = op;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = rd;
+}
+
+// 3R-type
+// <opcode | rk | rj | rd>
+class Fmt3R<bits<17> op, dag outs, dag ins, string opcstr, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ bits<5> rk;
+ bits<5> rj;
+ bits<5> rd;
+
+ let Inst{31-15} = op;
+ let Inst{14-10} = rk;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = rd;
+}
+
+// 3RI2-type
+// <opcode | I2 | rk | rj | rd>
+class Fmt3RI2<bits<15> op, dag outs, dag ins, string opcstr, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ bits<2> imm2;
+ bits<5> rk;
+ bits<5> rj;
+ bits<5> rd;
+
+ let Inst{31-17} = op;
+ let Inst{16-15} = imm2;
+ let Inst{14-10} = rk;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = rd;
+}
+
+// 3RI3-type
+// <opcode | I3 | rk | rj | rd>
+class Fmt3RI3<bits<14> op, dag outs, dag ins, string opcstr, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ bits<3> imm3;
+ bits<5> rk;
+ bits<5> rj;
+ bits<5> rd;
+
+ let Inst{31-18} = op;
+ let Inst{17-15} = imm3;
+ let Inst{14-10} = rk;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = rd;
+}
+
+// 2RI5-type
+// <opcode | I5 | rj | rd>
+class Fmt2RI5<bits<17> op, dag outs, dag ins, string opcstr, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ bits<5> imm5;
+ bits<5> rj;
+ bits<5> rd;
+
+ let Inst{31-15} = op;
+ let Inst{14-10} = imm5;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = rd;
+}
+
+// 2RI6-type
+// <opcode | I6 | rj | rd>
+class Fmt2RI6<bits<16> op, dag outs, dag ins, string opcstr, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ bits<6> imm6;
+ bits<5> rj;
+ bits<5> rd;
+
+ let Inst{31-16} = op;
+ let Inst{15-10} = imm6;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = rd;
+}
+
+// 2RI8-type
+// <opcode | I8 | rj | rd>
+class Fmt2RI8<bits<14> op, dag outs, dag ins, string opcstr, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ bits<8> imm8;
+ bits<5> rj;
+ bits<5> rd;
+
+ let Inst{31-18} = op;
+ let Inst{17-10} = imm8;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = rd;
+}
+
+// 2RI12-type
+// <opcode | I12 | rj | rd>
+class Fmt2RI12<bits<10> op, dag outs, dag ins, string opcstr, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ bits<12> imm12;
+ bits<5> rj;
+ bits<5> rd;
+
+ let Inst{31-22} = op;
+ let Inst{21-10} = imm12;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = rd;
+}
+
+// 2RI14-type
+// <opcode | I14 | rj | rd>
+class Fmt2RI14<bits<8> op, dag outs, dag ins, string opcstr, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ bits<14> imm14;
+ bits<5> rj;
+ bits<5> rd;
+
+ let Inst{31-24} = op;
+ let Inst{23-10} = imm14;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = rd;
+}
+
+// 2RI16-type
+// <opcode | I16 | rj | rd>
+class Fmt2RI16<bits<6> op, dag outs, dag ins, string opcstr, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ bits<16> imm16;
+ bits<5> rj;
+ bits<5> rd;
+
+ let Inst{31-26} = op;
+ let Inst{25-10} = imm16;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = rd;
+}
+
+// 1RI20-type
+// <opcode | I20 | rd>
+class Fmt1RI20<bits<7> op, dag outs, dag ins, string opcstr, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ bits<20> imm20;
+ bits<5> rd;
+
+ let Inst{31-25} = op;
+ let Inst{24-5} = imm20;
+ let Inst{4-0} = rd;
+}
+
+// 1RI21-type
+// <opcode | I21[15:0] | rj | I21[20:16]>
+class Fmt1RI21<bits<6> op, dag outs, dag ins, string opcstr, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ bits<21> imm21;
+ bits<5> rj;
+
+ let Inst{31-26} = op;
+ let Inst{25-10} = imm21{15-0};
+ let Inst{9-5} = rj;
+ let Inst{4-0} = imm21{20-16};
+}
+
+// I15-type
+// <opcode | I15>
+class FmtI15<bits<17> op, dag outs, dag ins, string opcstr, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ bits<15> imm15;
+
+ let Inst{31-15} = op;
+ let Inst{14-0} = imm15;
+}
+
+// I26-type
+// <opcode | I26[15:0] | I26[25:16]>
+class FmtI26<bits<6> op, dag outs, dag ins, string opcstr, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ bits<26> imm26;
+
+ let Inst{31-26} = op;
+ let Inst{25-10} = imm26{15-0};
+ let Inst{9-0} = imm26{25-16};
+}
+
+// FmtBSTR_W
+// <opcode[11:1] | msbw | opcode[0] | lsbw | rj | rd>
+class FmtBSTR_W<bits<12> op, dag outs, dag ins, string opcstr, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ bits<5> msbw;
+ bits<5> lsbw;
+ bits<5> rj;
+ bits<5> rd;
+
+ let Inst{31-21} = op{11-1};
+ let Inst{20-16} = msbw;
+ let Inst{15} = op{0};
+ let Inst{14-10} = lsbw;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = rd;
+}
+
+// FmtBSTR_D
+// <opcode | msbd | lsbd | rj | rd>
+class FmtBSTR_D<bits<10> op, dag outs, dag ins, string opcstr, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ bits<6> msbd;
+ bits<6> lsbd;
+ bits<5> rj;
+ bits<5> rd;
+
+ let Inst{31-22} = op;
+ let Inst{21-16} = msbd;
+ let Inst{15-10} = lsbd;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = rd;
+}
+
+// FmtASRT
+// <opcode | rk | rj | 0x0>
+class FmtASRT<bits<17> op, dag outs, dag ins, string opcstr, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ bits<5> rk;
+ bits<5> rj;
+
+ let Inst{31-15} = op;
+ let Inst{14-10} = rk;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = 0x0;
+}
+
+// FmtPRELD
+// < 0b0010101011 | I12 | rj | I5>
+class FmtPRELD<dag outs, dag ins, string opcstr, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ bits<12> imm12;
+ bits<5> rj;
+ bits<5> imm5;
+
+ let Inst{31-22} = 0b0010101011;
+ let Inst{21-10} = imm12;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = imm5;
+}
+
+// FmtPRELDX
+// < 0b00111000001011000 | rk | rj | I5>
+class FmtPRELDX<dag outs, dag ins, string opcstr, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ bits<5> rk;
+ bits<5> rj;
+ bits<5> imm5;
+
+ let Inst{31-15} = 0b00111000001011000;
+ let Inst{14-10} = rk;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = imm5;
+}
+
+// FmtCSR
+// <opcode[12:5] | csr_num | opcode[4:0] | rd>
+class FmtCSR<bits<13> op, dag outs, dag ins, string opcstr, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ bits<14> csr_num;
+ bits<5> rd;
+
+ let Inst{31-24} = op{12-5};
+ let Inst{23-10} = csr_num;
+ let Inst{9-5} = op{4-0};
+ let Inst{4-0} = rd;
+}
+
+// FmtCSRXCHG
+// <opcode | csr_num | rj | rd>
+class FmtCSRXCHG<bits<8> op, dag outs, dag ins, string opcstr, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ bits<14> csr_num;
+ bits<5> rj;
+ bits<5> rd;
+
+ let Inst{31-24} = op;
+ let Inst{23-10} = csr_num;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = rd;
+}
+
+// FmtCACOP
+// <0b0000011000 | I12 | rj | I5>
+class FmtCACOP<dag outs, dag ins, string opcstr, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ bits<12> imm12;
+ bits<5> rj;
+ bits<5> op;
+
+ let Inst{31-22} = 0b0000011000;
+ let Inst{21-10} = imm12;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = op;
+}
+
+// FmtIMM32
+// <I32>
+class FmtI32<bits<32> op, string opstr, list<dag> pattern = []>
+ : LAInst<(outs), (ins), opstr, "", pattern> {
+ let Inst{31-0} = op;
+}
+
+// FmtINVTLB
+// <0b00000110010010011 | rk | rj | I5>
+class FmtINVTLB<dag outs, dag ins, string opcstr, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ bits<5> rk;
+ bits<5> rj;
+ bits<5> op;
+
+ let Inst{31-15} = 0b00000110010010011;
+ let Inst{14-10} = rk;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = op;
+}
+
+// FmtLDPTE
+// <0b00000110010001 | seq | rj | 00000>
+class FmtLDPTE<dag outs, dag ins, string opcstr, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ bits<8> seq;
+ bits<5> rj;
+
+ let Inst{31-18} = 0b00000110010001;
+ let Inst{17-10} = seq;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = 0b00000;
+}
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
new file mode 100644
index 000000000000..146ef53befd5
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
@@ -0,0 +1,49 @@
+//=- LoongArchInstrInfo.cpp - LoongArch Instruction Information -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the LoongArch implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LoongArchInstrInfo.h"
+#include "LoongArch.h"
+
+using namespace llvm;
+
+#define GET_INSTRINFO_CTOR_DTOR
+#include "LoongArchGenInstrInfo.inc"
+
+LoongArchInstrInfo::LoongArchInstrInfo(LoongArchSubtarget &STI)
+ // FIXME: add CFSetup and CFDestroy Inst when we implement function call.
+ : LoongArchGenInstrInfo() {}
+
+void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, MCRegister DstReg,
+ MCRegister SrcReg, bool KillSrc) const {
+ if (LoongArch::GPRRegClass.contains(DstReg, SrcReg)) {
+ BuildMI(MBB, MBBI, DL, get(LoongArch::OR), DstReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addReg(LoongArch::R0);
+ return;
+ }
+
+ // FPR->FPR copies.
+ unsigned Opc;
+ if (LoongArch::FPR32RegClass.contains(DstReg, SrcReg)) {
+ Opc = LoongArch::FMOV_S;
+ } else if (LoongArch::FPR64RegClass.contains(DstReg, SrcReg)) {
+ Opc = LoongArch::FMOV_D;
+ } else {
+ // TODO: support other copies.
+ llvm_unreachable("Impossible reg-to-reg copy");
+ }
+
+ BuildMI(MBB, MBBI, DL, get(Opc), DstReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+}
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
new file mode 100644
index 000000000000..f31943b85a51
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
@@ -0,0 +1,36 @@
+//=- LoongArchInstrInfo.h - LoongArch Instruction Information ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the LoongArch implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHINSTRINFO_H
+#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHINSTRINFO_H
+
+#include "LoongArchRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "LoongArchGenInstrInfo.inc"
+
+namespace llvm {
+
+class LoongArchSubtarget;
+
+class LoongArchInstrInfo : public LoongArchGenInstrInfo {
+public:
+ explicit LoongArchInstrInfo(LoongArchSubtarget &STI);
+
+ void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg,
+ bool KillSrc) const override;
+};
+
+} // end namespace llvm
+#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHINSTRINFO_H
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
new file mode 100644
index 000000000000..6b8ee9e43f94
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -0,0 +1,730 @@
+//== LoongArchInstrInfo.td - Target Description for LoongArch -*- tablegen -*-//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the LoongArch instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// LoongArch specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+// Target-dependent type requirements.
+def SDT_LoongArchIntBinOpW : SDTypeProfile<1, 2, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<0, i64>
+]>;
+
+def SDT_LoongArchBStrPick: SDTypeProfile<1, 3, [
+ SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisSameAs<2, 3>
+]>;
+
+// TODO: Add LoongArch specific DAG Nodes
+// Target-dependent nodes.
+def loongarch_ret : SDNode<"LoongArchISD::RET", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def loongarch_sll_w : SDNode<"LoongArchISD::SLL_W", SDT_LoongArchIntBinOpW>;
+def loongarch_sra_w : SDNode<"LoongArchISD::SRA_W", SDT_LoongArchIntBinOpW>;
+def loongarch_srl_w : SDNode<"LoongArchISD::SRL_W", SDT_LoongArchIntBinOpW>;
+def loongarch_bstrpick
+ : SDNode<"LoongArchISD::BSTRPICK", SDT_LoongArchBStrPick>;
+
+//===----------------------------------------------------------------------===//
+// Operand and SDNode transformation definitions.
+//===----------------------------------------------------------------------===//
+
+class ImmAsmOperand<string prefix, int width, string suffix>
+ : AsmOperandClass {
+ let Name = prefix # "Imm" # width # suffix;
+ let DiagnosticType = !strconcat("Invalid", Name);
+ let RenderMethod = "addImmOperands";
+}
+
+class SImmAsmOperand<int width, string suffix = "">
+ : ImmAsmOperand<"S", width, suffix> {
+}
+
+class UImmAsmOperand<int width, string suffix = "">
+ : ImmAsmOperand<"U", width, suffix> {
+}
+
+def uimm2 : Operand<GRLenVT> {
+ let ParserMatchClass = UImmAsmOperand<2>;
+}
+
+def uimm2_plus1 : Operand<GRLenVT> {
+ let ParserMatchClass = UImmAsmOperand<2, "plus1">;
+ let EncoderMethod = "getImmOpValueSub1";
+ let DecoderMethod = "decodeUImmOperand<2, 1>";
+}
+
+def uimm3 : Operand<GRLenVT> {
+ let ParserMatchClass = UImmAsmOperand<3>;
+}
+
+def uimm5 : Operand<GRLenVT>, ImmLeaf<GRLenVT, [{return isUInt<5>(Imm);}]> {
+ let ParserMatchClass = UImmAsmOperand<5>;
+}
+
+def uimm6 : Operand<GRLenVT>, ImmLeaf<GRLenVT, [{return isUInt<6>(Imm);}]> {
+ let ParserMatchClass = UImmAsmOperand<6>;
+}
+
+def uimm8 : Operand<GRLenVT> {
+ let ParserMatchClass = UImmAsmOperand<8>;
+}
+
+def uimm12 : Operand<GRLenVT>, ImmLeaf<GRLenVT, [{return isUInt<12>(Imm);}]> {
+ let ParserMatchClass = UImmAsmOperand<12>;
+}
+
+def uimm14 : Operand<GRLenVT> {
+ let ParserMatchClass = UImmAsmOperand<14>;
+}
+
+def uimm15 : Operand<GRLenVT> {
+ let ParserMatchClass = UImmAsmOperand<15>;
+}
+
+def simm12 : Operand<GRLenVT>, ImmLeaf<GRLenVT, [{return isInt<12>(Imm);}]> {
+ let ParserMatchClass = SImmAsmOperand<12>;
+ let DecoderMethod = "decodeSImmOperand<12>";
+}
+
+def simm14_lsl2 : Operand<GRLenVT> {
+ let ParserMatchClass = SImmAsmOperand<14, "lsl2">;
+ let EncoderMethod = "getImmOpValueAsr2";
+ let DecoderMethod = "decodeSImmOperand<14, 2>";
+}
+
+def simm16 : Operand<GRLenVT> {
+ let ParserMatchClass = SImmAsmOperand<16>;
+ let DecoderMethod = "decodeSImmOperand<16>";
+}
+
+def simm16_lsl2 : Operand<GRLenVT> {
+ let ParserMatchClass = SImmAsmOperand<16, "lsl2">;
+ let EncoderMethod = "getImmOpValueAsr2";
+ let DecoderMethod = "decodeSImmOperand<16, 2>";
+}
+
+def simm20 : Operand<GRLenVT> {
+ let ParserMatchClass = SImmAsmOperand<20>;
+ let DecoderMethod = "decodeSImmOperand<20>";
+}
+
+def simm21_lsl2 : Operand<GRLenVT> {
+ let ParserMatchClass = SImmAsmOperand<21, "lsl2">;
+ let EncoderMethod = "getImmOpValueAsr2";
+ let DecoderMethod = "decodeSImmOperand<21, 2>";
+}
+
+def simm26_lsl2 : Operand<GRLenVT> {
+ let ParserMatchClass = SImmAsmOperand<26, "lsl2">;
+ let EncoderMethod = "getImmOpValueAsr2";
+ let DecoderMethod = "decodeSImmOperand<26, 2>";
+}
+
+// Standalone (codegen-only) immleaf patterns.
+
+// A 12-bit signed immediate plus one where the imm range will be [-2047, 2048].
+def simm12_plus1 : ImmLeaf<GRLenVT,
+ [{return (isInt<12>(Imm) && Imm != -2048) || Imm == 2048;}]>;
+
+// Return the negation of an immediate value.
+def NegImm : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(-N->getSExtValue(), SDLoc(N),
+ N->getValueType(0));
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Formats
+//===----------------------------------------------------------------------===//
+
+include "LoongArchInstrFormats.td"
+include "LoongArchFloatInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Class Templates
+//===----------------------------------------------------------------------===//
+
+class ALU_3R<bits<17> op, string opstr>
+ : Fmt3R<op, (outs GPR:$rd), (ins GPR:$rj, GPR:$rk), opstr, "$rd, $rj, $rk">;
+class ALU_2R<bits<22> op, string opstr>
+ : Fmt2R<op, (outs GPR:$rd), (ins GPR:$rj), opstr, "$rd, $rj">;
+
+class ALU_3RI2<bits<15> op, string opstr, Operand ImmOpnd>
+ : Fmt3RI2<op, (outs GPR:$rd), (ins GPR:$rj, GPR:$rk, ImmOpnd:$imm2), opstr,
+ "$rd, $rj, $rk, $imm2">;
+class ALU_3RI3<bits<14> op, string opstr, Operand ImmOpnd>
+ : Fmt3RI3<op, (outs GPR:$rd), (ins GPR:$rj, GPR:$rk, ImmOpnd:$imm3), opstr,
+ "$rd, $rj, $rk, $imm3">;
+class ALU_2RI5<bits<17> op, string opstr, Operand ImmOpnd>
+ : Fmt2RI5<op, (outs GPR:$rd), (ins GPR:$rj, ImmOpnd:$imm5), opstr,
+ "$rd, $rj, $imm5">;
+class ALU_2RI6<bits<16> op, string opstr, Operand ImmOpnd>
+ : Fmt2RI6<op, (outs GPR:$rd), (ins GPR:$rj, ImmOpnd:$imm6), opstr,
+ "$rd, $rj, $imm6">;
+class ALU_2RI12<bits<10> op, string opstr, Operand ImmOpnd>
+ : Fmt2RI12<op, (outs GPR:$rd), (ins GPR:$rj, ImmOpnd:$imm12), opstr,
+ "$rd, $rj, $imm12">;
+class ALU_2RI16<bits<6> op, string opstr, Operand ImmOpnd>
+ : Fmt2RI16<op, (outs GPR:$rd), (ins GPR:$rj, ImmOpnd:$imm16), opstr,
+ "$rd, $rj, $imm16">;
+class ALU_1RI20<bits<7> op, string opstr, Operand ImmOpnd>
+ : Fmt1RI20<op, (outs GPR:$rd), (ins ImmOpnd:$imm20), opstr, "$rd, $imm20">;
+
+class MISC_I15<bits<17> op, string opstr>
+ : FmtI15<op, (outs), (ins uimm15:$imm15), opstr, "$imm15">;
+
+class RDTIME_2R<bits<22> op, string opstr>
+ : Fmt2R<op, (outs GPR:$rd, GPR:$rj), (ins), opstr, "$rd, $rj">;
+
+class BrCC_2RI16<bits<6> op, string opstr>
+ : Fmt2RI16<op, (outs), (ins GPR:$rj, GPR:$rd, simm16_lsl2:$imm16), opstr,
+ "$rj, $rd, $imm16"> {
+ let isBranch = 1;
+ let isTerminator = 1;
+}
+class BrCCZ_1RI21<bits<6> op, string opstr>
+ : Fmt1RI21<op, (outs), (ins GPR:$rj, simm21_lsl2:$imm21), opstr,
+ "$rj, $imm21"> {
+ let isBranch = 1;
+ let isTerminator = 1;
+}
+class Br_I26<bits<6> op, string opstr>
+ : FmtI26<op, (outs), (ins simm26_lsl2:$imm26), opstr, "$imm26"> {
+ let isBranch = 1;
+ let isTerminator = 1;
+}
+
+let mayLoad = 1 in {
+class LOAD_3R<bits<17> op, string opstr>
+ : Fmt3R<op, (outs GPR:$rd), (ins GPR:$rj, GPR:$rk), opstr, "$rd, $rj, $rk">;
+class LOAD_2RI12<bits<10> op, string opstr>
+ : Fmt2RI12<op, (outs GPR:$rd), (ins GPR:$rj, simm12:$imm12), opstr,
+ "$rd, $rj, $imm12">;
+class LOAD_2RI14<bits<8> op, string opstr>
+ : Fmt2RI14<op, (outs GPR:$rd), (ins GPR:$rj, simm14_lsl2:$imm14), opstr,
+ "$rd, $rj, $imm14">;
+} // mayLoad = 1
+
+let mayStore = 1 in {
+class STORE_3R<bits<17> op, string opstr>
+ : Fmt3R<op, (outs), (ins GPR:$rd, GPR:$rj, GPR:$rk), opstr,
+ "$rd, $rj, $rk">;
+class STORE_2RI12<bits<10> op, string opstr>
+ : Fmt2RI12<op, (outs), (ins GPR:$rd, GPR:$rj, simm12:$imm12), opstr,
+ "$rd, $rj, $imm12">;
+class STORE_2RI14<bits<8> op, string opstr>
+ : Fmt2RI14<op, (outs), (ins GPR:$rd, GPR:$rj, simm14_lsl2:$imm14), opstr,
+ "$rd, $rj, $imm14">;
+} // mayStore = 1
+
+let mayLoad = 1, mayStore = 1 in
+class AM_3R<bits<17> op, string opstr>
+ : Fmt3R<op, (outs GPR:$rd), (ins GPR:$rk, GPR:$rj), opstr, "$rd, $rk, $rj">;
+
+let mayLoad = 1 in
+class LLBase<bits<8> op, string opstr>
+ : Fmt2RI14<op, (outs GPR:$rd), (ins GPR:$rj, simm14_lsl2:$imm14), opstr,
+ "$rd, $rj, $imm14">;
+
+let mayStore = 1, Constraints = "$rd = $dst" in
+class SCBase<bits<8> op, string opstr>
+ : Fmt2RI14<op, (outs GPR:$dst), (ins GPR:$rd, GPR:$rj, simm14_lsl2:$imm14),
+ opstr, "$rd, $rj, $imm14">;
+
+class IOCSRRD<bits<22> op, string opstr>
+ : Fmt2R<op, (outs GPR:$rd), (ins GPR:$rj), opstr, "$rd, $rj">;
+
+class IOCSRWR<bits<22> op, string opstr>
+ : Fmt2R<op, (outs), (ins GPR:$rd, GPR:$rj), opstr, "$rd, $rj">;
+
+//===----------------------------------------------------------------------===//
+// Basic Integer Instructions
+//===----------------------------------------------------------------------===//
+
+// Arithmetic Operation Instructions
+def ADD_W : ALU_3R<0b00000000000100000, "add.w">;
+def SUB_W : ALU_3R<0b00000000000100010, "sub.w">;
+def ADDI_W : ALU_2RI12<0b0000001010, "addi.w", simm12>;
+def ALSL_W : ALU_3RI2<0b000000000000010, "alsl.w", uimm2_plus1>;
+def LU12I_W : ALU_1RI20<0b0001010, "lu12i.w", simm20>;
+def SLT : ALU_3R<0b00000000000100100, "slt">;
+def SLTU : ALU_3R<0b00000000000100101, "sltu">;
+def SLTI : ALU_2RI12<0b0000001000, "slti", simm12>;
+def SLTUI : ALU_2RI12<0b0000001001, "sltui", simm12>;
+def PCADDI : ALU_1RI20<0b0001100, "pcaddi", simm20>;
+def PCADDU12I : ALU_1RI20<0b0001110, "pcaddu12i", simm20>;
+def PCALAU12I : ALU_1RI20<0b0001101, "pcalau12i", simm20>;
+def AND : ALU_3R<0b00000000000101001, "and">;
+def OR : ALU_3R<0b00000000000101010, "or">;
+def NOR : ALU_3R<0b00000000000101000, "nor">;
+def XOR : ALU_3R<0b00000000000101011, "xor">;
+def ANDN : ALU_3R<0b00000000000101101, "andn">;
+def ORN : ALU_3R<0b00000000000101100, "orn">;
+def ANDI : ALU_2RI12<0b0000001101, "andi", uimm12>;
+def ORI : ALU_2RI12<0b0000001110, "ori", uimm12>;
+def XORI : ALU_2RI12<0b0000001111, "xori", uimm12>;
+def MUL_W : ALU_3R<0b00000000000111000, "mul.w">;
+def MULH_W : ALU_3R<0b00000000000111001, "mulh.w">;
+def MULH_WU : ALU_3R<0b00000000000111010, "mulh.wu">;
+def DIV_W : ALU_3R<0b00000000001000000, "div.w">;
+def MOD_W : ALU_3R<0b00000000001000001, "mod.w">;
+def DIV_WU : ALU_3R<0b00000000001000010, "div.wu">;
+def MOD_WU : ALU_3R<0b00000000001000011, "mod.wu">;
+
+// Bit-shift Instructions
+def SLL_W : ALU_3R<0b00000000000101110, "sll.w">;
+def SRL_W : ALU_3R<0b00000000000101111, "srl.w">;
+def SRA_W : ALU_3R<0b00000000000110000, "sra.w">;
+def ROTR_W : ALU_3R<0b00000000000110110, "rotr.w">;
+
+def SLLI_W : ALU_2RI5<0b00000000010000001, "slli.w", uimm5>;
+def SRLI_W : ALU_2RI5<0b00000000010001001, "srli.w", uimm5>;
+def SRAI_W : ALU_2RI5<0b00000000010010001, "srai.w", uimm5>;
+def ROTRI_W : ALU_2RI5<0b00000000010011001, "rotri.w", uimm5>;
+
+// Bit-manipulation Instructions
+def EXT_W_B : ALU_2R<0b0000000000000000010111, "ext.w.b">;
+def EXT_W_H : ALU_2R<0b0000000000000000010110, "ext.w.h">;
+def CLO_W : ALU_2R<0b0000000000000000000100, "clo.w">;
+def CLZ_W : ALU_2R<0b0000000000000000000101, "clz.w">;
+def CTO_W : ALU_2R<0b0000000000000000000110, "cto.w">;
+def CTZ_W : ALU_2R<0b0000000000000000000111, "ctz.w">;
+def BYTEPICK_W : ALU_3RI2<0b000000000000100, "bytepick.w", uimm2>;
+def REVB_2H : ALU_2R<0b0000000000000000001100, "revb.2h">;
+def BITREV_4B : ALU_2R<0b0000000000000000010010, "bitrev.4b">;
+def BITREV_W : ALU_2R<0b0000000000000000010100, "bitrev.w">;
+let Constraints = "$rd = $dst" in {
+def BSTRINS_W : FmtBSTR_W<0b000000000110, (outs GPR:$dst),
+ (ins GPR:$rd, GPR:$rj, uimm5:$msbw, uimm5:$lsbw),
+ "bstrins.w", "$rd, $rj, $msbw, $lsbw">;
+}
+def BSTRPICK_W : FmtBSTR_W<0b000000000111, (outs GPR:$rd),
+ (ins GPR:$rj, uimm5:$msbw, uimm5:$lsbw),
+ "bstrpick.w", "$rd, $rj, $msbw, $lsbw">;
+def MASKEQZ : ALU_3R<0b00000000000100110, "maskeqz">;
+def MASKNEZ : ALU_3R<0b00000000000100111, "masknez">;
+
+// Branch Instructions
+def BEQ : BrCC_2RI16<0b010110, "beq">;
+def BNE : BrCC_2RI16<0b010111, "bne">;
+def BLT : BrCC_2RI16<0b011000, "blt">;
+def BGE : BrCC_2RI16<0b011001, "bge">;
+def BLTU : BrCC_2RI16<0b011010, "bltu">;
+def BGEU : BrCC_2RI16<0b011011, "bgeu">;
+def BEQZ : BrCCZ_1RI21<0b010000, "beqz">;
+def BNEZ : BrCCZ_1RI21<0b010001, "bnez">;
+def B : Br_I26<0b010100, "b">;
+
+let isCall = 1 in
+def BL : FmtI26<0b010101, (outs), (ins simm26_lsl2:$imm26), "bl", "$imm26">;
+def JIRL : Fmt2RI16<0b010011, (outs GPR:$rd),
+ (ins GPR:$rj, simm16_lsl2:$imm16), "jirl",
+ "$rd, $rj, $imm16">;
+
+// Common Memory Access Instructions
+def LD_B : LOAD_2RI12<0b0010100000, "ld.b">;
+def LD_H : LOAD_2RI12<0b0010100001, "ld.h">;
+def LD_W : LOAD_2RI12<0b0010100010, "ld.w">;
+def LD_BU : LOAD_2RI12<0b0010101000, "ld.bu">;
+def LD_HU : LOAD_2RI12<0b0010101001, "ld.hu">;
+def ST_B : STORE_2RI12<0b0010100100, "st.b">;
+def ST_H : STORE_2RI12<0b0010100101, "st.h">;
+def ST_W : STORE_2RI12<0b0010100110, "st.w">;
+def PRELD : FmtPRELD<(outs), (ins uimm5:$imm5, GPR:$rj, simm12:$imm12), "preld",
+ "$imm5, $rj, $imm12">;
+
+// Atomic Memory Access Instructions
+def LL_W : LLBase<0b00100000, "ll.w">;
+def SC_W : SCBase<0b00100001, "sc.w">;
+
+// Barrier Instructions
+def DBAR : MISC_I15<0b00111000011100100, "dbar">;
+def IBAR : MISC_I15<0b00111000011100101, "ibar">;
+
+// Other Miscellaneous Instructions
+def SYSCALL : MISC_I15<0b00000000001010110, "syscall">;
+def BREAK : MISC_I15<0b00000000001010100, "break">;
+def RDTIMEL_W : RDTIME_2R<0b0000000000000000011000, "rdtimel.w">;
+def RDTIMEH_W : RDTIME_2R<0b0000000000000000011001, "rdtimeh.w">;
+def CPUCFG : ALU_2R<0b0000000000000000011011, "cpucfg">;
+
+/// LA64 instructions
+
+let Predicates = [IsLA64] in {
+
+// Arithmetic Operation Instructions for 64-bits
+def ADD_D : ALU_3R<0b00000000000100001, "add.d">;
+def SUB_D : ALU_3R<0b00000000000100011, "sub.d">;
+def ADDI_D : ALU_2RI12<0b0000001011, "addi.d", simm12>;
+def ADDU16I_D : ALU_2RI16<0b000100, "addu16i.d", simm16>;
+def ALSL_WU : ALU_3RI2<0b000000000000011, "alsl.wu", uimm2_plus1>;
+def ALSL_D : ALU_3RI2<0b000000000010110, "alsl.d", uimm2_plus1>;
+let Constraints = "$rd = $dst" in {
+def LU32I_D : Fmt1RI20<0b0001011, (outs GPR:$dst),
+ (ins GPR:$rd, simm20:$imm20), "lu32i.d",
+ "$rd, $imm20">;
+}
+def LU52I_D : ALU_2RI12<0b0000001100, "lu52i.d", simm12>;
+def PCADDU18I : ALU_1RI20<0b0001111, "pcaddu18i", simm20>;
+def MUL_D : ALU_3R<0b00000000000111011, "mul.d">;
+def MULH_D : ALU_3R<0b00000000000111100, "mulh.d">;
+def MULH_DU : ALU_3R<0b00000000000111101, "mulh.du">;
+def MULW_D_W : ALU_3R<0b00000000000111110, "mulw.d.w">;
+def MULW_D_WU : ALU_3R<0b00000000000111111, "mulw.d.wu">;
+def DIV_D : ALU_3R<0b00000000001000100, "div.d">;
+def MOD_D : ALU_3R<0b00000000001000101, "mod.d">;
+def DIV_DU : ALU_3R<0b00000000001000110, "div.du">;
+def MOD_DU : ALU_3R<0b00000000001000111, "mod.du">;
+
+// Bit-shift Instructions for 64-bits
+def SLL_D : ALU_3R<0b00000000000110001, "sll.d">;
+def SRL_D : ALU_3R<0b00000000000110010, "srl.d">;
+def SRA_D : ALU_3R<0b00000000000110011, "sra.d">;
+def ROTR_D : ALU_3R<0b00000000000110111, "rotr.d">;
+def SLLI_D : ALU_2RI6<0b0000000001000001, "slli.d", uimm6>;
+def SRLI_D : ALU_2RI6<0b0000000001000101, "srli.d", uimm6>;
+def SRAI_D : ALU_2RI6<0b0000000001001001, "srai.d", uimm6>;
+def ROTRI_D : ALU_2RI6<0b0000000001001101, "rotri.d", uimm6>;
+
+// Bit-manipulation Instructions for 64-bits
+def CLO_D : ALU_2R<0b0000000000000000001000, "clo.d">;
+def CLZ_D : ALU_2R<0b0000000000000000001001, "clz.d">;
+def CTO_D : ALU_2R<0b0000000000000000001010, "cto.d">;
+def CTZ_D : ALU_2R<0b0000000000000000001011, "ctz.d">;
+def BYTEPICK_D : ALU_3RI3<0b00000000000011, "bytepick.d", uimm3>;
+def REVB_4H : ALU_2R<0b0000000000000000001101, "revb.4h">;
+def REVB_2W : ALU_2R<0b0000000000000000001110, "revb.2w">;
+def REVB_D : ALU_2R<0b0000000000000000001111, "revb.d">;
+def REVH_2W : ALU_2R<0b0000000000000000010000, "revh.2w">;
+def REVH_D : ALU_2R<0b0000000000000000010001, "revh.d">;
+def BITREV_8B : ALU_2R<0b0000000000000000010011, "bitrev.8b">;
+def BITREV_D : ALU_2R<0b0000000000000000010101, "bitrev.d">;
+let Constraints = "$rd = $dst" in {
+def BSTRINS_D : FmtBSTR_D<0b0000000010, (outs GPR:$dst),
+ (ins GPR:$rd, GPR:$rj, uimm6:$msbd, uimm6:$lsbd),
+ "bstrins.d", "$rd, $rj, $msbd, $lsbd">;
+}
+def BSTRPICK_D : FmtBSTR_D<0b0000000011, (outs GPR:$rd),
+ (ins GPR:$rj, uimm6:$msbd, uimm6:$lsbd),
+ "bstrpick.d", "$rd, $rj, $msbd, $lsbd">;
+
+// Common Memory Access Instructions for 64-bits
+def LD_WU : LOAD_2RI12<0b0010101010, "ld.wu">;
+def LD_D : LOAD_2RI12<0b0010100011, "ld.d">;
+def ST_D : STORE_2RI12<0b0010100111, "st.d">;
+def LDX_B : LOAD_3R<0b00111000000000000, "ldx.b">;
+def LDX_H : LOAD_3R<0b00111000000001000, "ldx.h">;
+def LDX_W : LOAD_3R<0b00111000000010000, "ldx.w">;
+def LDX_D : LOAD_3R<0b00111000000011000, "ldx.d">;
+def LDX_BU : LOAD_3R<0b00111000001000000, "ldx.bu">;
+def LDX_HU : LOAD_3R<0b00111000001001000, "ldx.hu">;
+def LDX_WU : LOAD_3R<0b00111000001010000, "ldx.wu">;
+def STX_B : STORE_3R<0b00111000000100000, "stx.b">;
+def STX_H : STORE_3R<0b00111000000101000, "stx.h">;
+def STX_W : STORE_3R<0b00111000000110000, "stx.w">;
+def STX_D : STORE_3R<0b00111000000111000, "stx.d">;
+def LDPTR_W : LOAD_2RI14<0b00100100, "ldptr.w">;
+def LDPTR_D : LOAD_2RI14<0b00100110, "ldptr.d">;
+def STPTR_W : STORE_2RI14<0b00100101, "stptr.w">;
+def STPTR_D : STORE_2RI14<0b00100111, "stptr.d">;
+def PRELDX : FmtPRELDX<(outs), (ins uimm5:$imm5, GPR:$rj, GPR:$rk), "preldx",
+ "$imm5, $rj, $rk">;
+
+// Bound Check Memory Access Instructions
+def LDGT_B : LOAD_3R<0b00111000011110000, "ldgt.b">;
+def LDGT_H : LOAD_3R<0b00111000011110001, "ldgt.h">;
+def LDGT_W : LOAD_3R<0b00111000011110010, "ldgt.w">;
+def LDGT_D : LOAD_3R<0b00111000011110011, "ldgt.d">;
+def LDLE_B : LOAD_3R<0b00111000011110100, "ldle.b">;
+def LDLE_H : LOAD_3R<0b00111000011110101, "ldle.h">;
+def LDLE_W : LOAD_3R<0b00111000011110110, "ldle.w">;
+def LDLE_D : LOAD_3R<0b00111000011110111, "ldle.d">;
+def STGT_B : STORE_3R<0b00111000011111000, "stgt.b">;
+def STGT_H : STORE_3R<0b00111000011111001, "stgt.h">;
+def STGT_W : STORE_3R<0b00111000011111010, "stgt.w">;
+def STGT_D : STORE_3R<0b00111000011111011, "stgt.d">;
+def STLE_B : STORE_3R<0b00111000011111100, "stle.b">;
+def STLE_H : STORE_3R<0b00111000011111101, "stle.h">;
+def STLE_W : STORE_3R<0b00111000011111110, "stle.w">;
+def STLE_D : STORE_3R<0b00111000011111111, "stle.d">;
+
+// Atomic Memory Access Instructions for 64-bits
+def AMSWAP_W : AM_3R<0b00111000011000000, "amswap.w">;
+def AMSWAP_D : AM_3R<0b00111000011000001, "amswap.d">;
+def AMADD_W : AM_3R<0b00111000011000010, "amadd.w">;
+def AMADD_D : AM_3R<0b00111000011000011, "amadd.d">;
+def AMAND_W : AM_3R<0b00111000011000100, "amand.w">;
+def AMAND_D : AM_3R<0b00111000011000101, "amand.d">;
+def AMOR_W : AM_3R<0b00111000011000110, "amor.w">;
+def AMOR_D : AM_3R<0b00111000011000111, "amor.d">;
+def AMXOR_W : AM_3R<0b00111000011001000, "amxor.w">;
+def AMXOR_D : AM_3R<0b00111000011001001, "amxor.d">;
+def AMMAX_W : AM_3R<0b00111000011001010, "ammax.w">;
+def AMMAX_D : AM_3R<0b00111000011001011, "ammax.d">;
+def AMMIN_W : AM_3R<0b00111000011001100, "ammin.w">;
+def AMMIN_D : AM_3R<0b00111000011001101, "ammin.d">;
+def AMMAX_WU : AM_3R<0b00111000011001110, "ammax.wu">;
+def AMMAX_DU : AM_3R<0b00111000011001111, "ammax.du">;
+def AMMIN_WU : AM_3R<0b00111000011010000, "ammin.wu">;
+def AMMIN_DU : AM_3R<0b00111000011010001, "ammin.du">;
+def AMSWAP_DB_W : AM_3R<0b00111000011010010, "amswap_db.w">;
+def AMSWAP_DB_D : AM_3R<0b00111000011010011, "amswap_db.d">;
+def AMADD_DB_W : AM_3R<0b00111000011010100, "amadd_db.w">;
+def AMADD_DB_D : AM_3R<0b00111000011010101, "amadd_db.d">;
+def AMAND_DB_W : AM_3R<0b00111000011010110, "amand_db.w">;
+def AMAND_DB_D : AM_3R<0b00111000011010111, "amand_db.d">;
+def AMOR_DB_W : AM_3R<0b00111000011011000, "amor_db.w">;
+def AMOR_DB_D : AM_3R<0b00111000011011001, "amor_db.d">;
+def AMXOR_DB_W : AM_3R<0b00111000011011010, "amxor_db.w">;
+def AMXOR_DB_D : AM_3R<0b00111000011011011, "amxor_db.d">;
+def AMMAX_DB_W : AM_3R<0b00111000011011100, "ammax_db.w">;
+def AMMAX_DB_D : AM_3R<0b00111000011011101, "ammax_db.d">;
+def AMMIN_DB_W : AM_3R<0b00111000011011110, "ammin_db.w">;
+def AMMIN_DB_D : AM_3R<0b00111000011011111, "ammin_db.d">;
+def AMMAX_DB_WU : AM_3R<0b00111000011100000, "ammax_db.wu">;
+def AMMAX_DB_DU : AM_3R<0b00111000011100001, "ammax_db.du">;
+def AMMIN_DB_WU : AM_3R<0b00111000011100010, "ammin_db.wu">;
+def AMMIN_DB_DU : AM_3R<0b00111000011100011, "ammin_db.du">;
+def LL_D : LLBase<0b00100010, "ll.d">;
+def SC_D : SCBase<0b00100011, "sc.d">;
+
+// CRC Check Instructions
+def CRC_W_B_W : ALU_3R<0b00000000001001000, "crc.w.b.w">;
+def CRC_W_H_W : ALU_3R<0b00000000001001001, "crc.w.h.w">;
+def CRC_W_W_W : ALU_3R<0b00000000001001010, "crc.w.w.w">;
+def CRC_W_D_W : ALU_3R<0b00000000001001011, "crc.w.d.w">;
+def CRCC_W_B_W : ALU_3R<0b00000000001001100, "crcc.w.b.w">;
+def CRCC_W_H_W : ALU_3R<0b00000000001001101, "crcc.w.h.w">;
+def CRCC_W_W_W : ALU_3R<0b00000000001001110, "crcc.w.w.w">;
+def CRCC_W_D_W : ALU_3R<0b00000000001001111, "crcc.w.d.w">;
+
+// Other Miscellaneous Instructions for 64-bits
+def ASRTLE_D : FmtASRT<0b00000000000000010, (outs), (ins GPR:$rj, GPR:$rk),
+ "asrtle.d", "$rj, $rk">;
+def ASRTGT_D : FmtASRT<0b00000000000000011, (outs), (ins GPR:$rj, GPR:$rk),
+ "asrtgt.d", "$rj, $rk">;
+def RDTIME_D : RDTIME_2R<0b0000000000000000011010, "rdtime.d">;
+} // Predicates = [IsLA64]
+
+//===----------------------------------------------------------------------===//
+// Pseudo-instructions and codegen patterns
+//
+// Naming convention: For 'generic' pattern classes, we use the naming
+// convention PatTy1Ty2.
+//===----------------------------------------------------------------------===//
+
+/// Generic pattern classes
+
+class PatGprGpr<SDPatternOperator OpNode, LAInst Inst>
+ : Pat<(OpNode GPR:$rj, GPR:$rk), (Inst GPR:$rj, GPR:$rk)>;
+class PatGprGpr_32<SDPatternOperator OpNode, LAInst Inst>
+ : Pat<(sext_inreg (OpNode GPR:$rj, GPR:$rk), i32), (Inst GPR:$rj, GPR:$rk)>;
+
+class PatGprImm<SDPatternOperator OpNode, LAInst Inst, Operand ImmOpnd>
+ : Pat<(OpNode GPR:$rj, ImmOpnd:$imm),
+ (Inst GPR:$rj, ImmOpnd:$imm)>;
+class PatGprImm_32<SDPatternOperator OpNode, LAInst Inst, Operand ImmOpnd>
+ : Pat<(sext_inreg (OpNode GPR:$rj, ImmOpnd:$imm), i32),
+ (Inst GPR:$rj, ImmOpnd:$imm)>;
+
+/// Simple arithmetic operations
+
+// Match both a plain shift and one where the shift amount is masked (this is
+// typically introduced when the legalizer promotes the shift amount and
+// zero-extends it). For LoongArch, the mask is unnecessary as shifts in the
+// base ISA only read the least significant 5 bits (LA32) or 6 bits (LA64).
+def shiftMaskGRLen
+ : ComplexPattern<GRLenVT, 1, "selectShiftMaskGRLen", [], [], 0>;
+def shiftMask32 : ComplexPattern<i64, 1, "selectShiftMask32", [], [], 0>;
+
+class shiftop<SDPatternOperator operator>
+ : PatFrag<(ops node:$val, node:$count),
+ (operator node:$val, (GRLenVT (shiftMaskGRLen node:$count)))>;
+class shiftopw<SDPatternOperator operator>
+ : PatFrag<(ops node:$val, node:$count),
+ (operator node:$val, (i64 (shiftMask32 node:$count)))>;
+
+let Predicates = [IsLA32] in {
+def : PatGprGpr<add, ADD_W>;
+def : PatGprImm<add, ADDI_W, simm12>;
+def : PatGprGpr<sub, SUB_W>;
+} // Predicates = [IsLA32]
+
+let Predicates = [IsLA64] in {
+def : PatGprGpr<add, ADD_D>;
+def : PatGprGpr_32<add, ADD_W>;
+def : PatGprImm<add, ADDI_D, simm12>;
+def : PatGprImm_32<add, ADDI_W, simm12>;
+def : PatGprGpr<sub, SUB_D>;
+def : PatGprGpr_32<sub, SUB_W>;
+} // Predicates = [IsLA64]
+
+def : PatGprGpr<and, AND>;
+def : PatGprImm<and, ANDI, uimm12>;
+def : PatGprGpr<or, OR>;
+def : PatGprImm<or, ORI, uimm12>;
+def : PatGprGpr<xor, XOR>;
+def : PatGprImm<xor, XORI, uimm12>;
+
+/// Shift
+
+let Predicates = [IsLA32] in {
+def : PatGprGpr<shiftop<shl>, SLL_W>;
+def : PatGprGpr<shiftop<sra>, SRA_W>;
+def : PatGprGpr<shiftop<srl>, SRL_W>;
+def : PatGprImm<shl, SLLI_W, uimm5>;
+def : PatGprImm<sra, SRAI_W, uimm5>;
+def : PatGprImm<srl, SRLI_W, uimm5>;
+} // Predicates = [IsLA32]
+
+let Predicates = [IsLA64] in {
+def : PatGprGpr<shiftopw<loongarch_sll_w>, SLL_W>;
+def : PatGprGpr<shiftopw<loongarch_sra_w>, SRA_W>;
+def : PatGprGpr<shiftopw<loongarch_srl_w>, SRL_W>;
+def : PatGprGpr<shiftop<shl>, SLL_D>;
+def : PatGprGpr<shiftop<sra>, SRA_D>;
+def : PatGprGpr<shiftop<srl>, SRL_D>;
+def : PatGprImm<shl, SLLI_D, uimm6>;
+def : PatGprImm<sra, SRAI_D, uimm6>;
+def : PatGprImm<srl, SRLI_D, uimm6>;
+} // Predicates = [IsLA64]
+
+/// sext and zext
+
+def : Pat<(sext_inreg GPR:$rj, i8), (EXT_W_B GPR:$rj)>;
+def : Pat<(sext_inreg GPR:$rj, i16), (EXT_W_H GPR:$rj)>;
+
+let Predicates = [IsLA64] in {
+def : Pat<(sext_inreg GPR:$rj, i32), (ADDI_W GPR:$rj, 0)>;
+} // Predicates = [IsLA64]
+
+/// Setcc
+
+def : PatGprGpr<setlt, SLT>;
+def : PatGprImm<setlt, SLTI, simm12>;
+def : PatGprGpr<setult, SLTU>;
+def : PatGprImm<setult, SLTUI, simm12>;
+
+// Define pattern expansions for setcc operations that aren't directly
+// handled by a LoongArch instruction.
+def : Pat<(seteq GPR:$rj, 0), (SLTUI GPR:$rj, 1)>;
+def : Pat<(seteq GPR:$rj, GPR:$rk), (SLTUI (XOR GPR:$rj, GPR:$rk), 1)>;
+let Predicates = [IsLA32] in {
+def : Pat<(seteq GPR:$rj, simm12_plus1:$imm12),
+ (SLTUI (ADDI_W GPR:$rj, (NegImm simm12_plus1:$imm12)), 1)>;
+} // Predicates = [IsLA32]
+let Predicates = [IsLA64] in {
+def : Pat<(seteq GPR:$rj, simm12_plus1:$imm12),
+ (SLTUI (ADDI_D GPR:$rj, (NegImm simm12_plus1:$imm12)), 1)>;
+} // Predicates = [IsLA64]
+def : Pat<(setne GPR:$rj, 0), (SLTU R0, GPR:$rj)>;
+def : Pat<(setne GPR:$rj, GPR:$rk), (SLTU R0, (XOR GPR:$rj, GPR:$rk))>;
+let Predicates = [IsLA32] in {
+def : Pat<(setne GPR:$rj, simm12_plus1:$imm12),
+ (SLTU R0, (ADDI_W GPR:$rj, (NegImm simm12_plus1:$imm12)))>;
+} // Predicates = [IsLA32]
+let Predicates = [IsLA64] in {
+def : Pat<(setne GPR:$rj, simm12_plus1:$imm12),
+ (SLTU R0, (ADDI_D GPR:$rj, (NegImm simm12_plus1:$imm12)))>;
+} // Predicates = [IsLA64]
+def : Pat<(setugt GPR:$rj, GPR:$rk), (SLTU GPR:$rk, GPR:$rj)>;
+def : Pat<(setuge GPR:$rj, GPR:$rk), (XORI (SLTU GPR:$rj, GPR:$rk), 1)>;
+def : Pat<(setule GPR:$rj, GPR:$rk), (XORI (SLTU GPR:$rk, GPR:$rj), 1)>;
+def : Pat<(setgt GPR:$rj, GPR:$rk), (SLT GPR:$rk, GPR:$rj)>;
+def : Pat<(setge GPR:$rj, GPR:$rk), (XORI (SLT GPR:$rj, GPR:$rk), 1)>;
+def : Pat<(setle GPR:$rj, GPR:$rk), (XORI (SLT GPR:$rk, GPR:$rj), 1)>;
+
+/// Select
+
+def : Pat<(select GPR:$cond, GPR:$t, GPR:$f),
+ (OR (MASKEQZ GPR:$t, GPR:$cond), (MASKNEZ GPR:$f, GPR:$cond))>;
+
+/// Branches and jumps
+
+let isBarrier = 1, isReturn = 1, isTerminator = 1 in
+def PseudoRET : Pseudo<(outs), (ins), [(loongarch_ret)]>,
+ PseudoInstExpansion<(JIRL R0, R1, 0)>;
+
+/// BSTRPICK
+
+let Predicates = [IsLA32] in
+def : Pat<(loongarch_bstrpick GPR:$rj, uimm5:$msbd, uimm5:$lsbd),
+ (BSTRPICK_W GPR:$rj, uimm5:$msbd, uimm5:$lsbd)>;
+
+let Predicates = [IsLA64] in
+def : Pat<(loongarch_bstrpick GPR:$rj, uimm6:$msbd, uimm6:$lsbd),
+ (BSTRPICK_D GPR:$rj, uimm6:$msbd, uimm6:$lsbd)>;
+
+//===----------------------------------------------------------------------===//
+// Assembler Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstAlias<"nop", (ANDI R0, R0, 0)>;
+def : InstAlias<"move $dst, $src", (OR GPR:$dst, GPR:$src, R0)>;
+
+//===----------------------------------------------------------------------===//
+// Basic Floating-Point Instructions
+//===----------------------------------------------------------------------===//
+
+include "LoongArchFloat32InstrInfo.td"
+include "LoongArchFloat64InstrInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Privilege Instructions
+//===----------------------------------------------------------------------===//
+
+// CSR Access Instructions
+def CSRRD : FmtCSR<0b0000010000000, (outs GPR:$rd), (ins uimm14:$csr_num),
+ "csrrd", "$rd, $csr_num">;
+let Constraints = "$rd = $dst" in {
+def CSRWR : FmtCSR<0b0000010000001, (outs GPR:$dst),
+ (ins GPR:$rd, uimm14:$csr_num), "csrwr", "$rd, $csr_num">;
+def CSRXCHG : FmtCSRXCHG<0b00000100, (outs GPR:$dst),
+ (ins GPR:$rd, GPR:$rj, uimm14:$csr_num),
+ "csrxchg", "$rd, $rj, $csr_num">;
+} // Constraints = "$rd = $dst"
+
+// IOCSR Access Instructions
+def IOCSRRD_B : IOCSRRD<0b0000011001001000000000, "iocsrrd.b">;
+def IOCSRRD_H : IOCSRRD<0b0000011001001000000001, "iocsrrd.h">;
+def IOCSRRD_W : IOCSRRD<0b0000011001001000000010, "iocsrrd.w">;
+def IOCSRWR_B : IOCSRWR<0b0000011001001000000100, "iocsrwr.b">;
+def IOCSRWR_H : IOCSRWR<0b0000011001001000000101, "iocsrwr.h">;
+def IOCSRWR_W : IOCSRWR<0b0000011001001000000110, "iocsrwr.w">;
+let Predicates = [IsLA64] in {
+def IOCSRRD_D : IOCSRRD<0b0000011001001000000011, "iocsrrd.d">;
+def IOCSRWR_D : IOCSRWR<0b0000011001001000000111, "iocsrwr.d">;
+} // Predicates = [IsLA64]
+
+// Cache Maintenance Instructions
+def CACOP : FmtCACOP<(outs), (ins uimm5:$op, GPR:$rj, simm12:$imm12), "cacop",
+ "$op, $rj, $imm12">;
+
+// TLB Maintenance Instructions
+def TLBSRCH : FmtI32<0b00000110010010000010100000000000, "tlbsrch">;
+def TLBRD : FmtI32<0b00000110010010000010110000000000, "tlbrd">;
+def TLBWR : FmtI32<0b00000110010010000011000000000000, "tlbwr">;
+def TLBFILL : FmtI32<0b00000110010010000011010000000000, "tlbfill">;
+def TLBCLR : FmtI32<0b00000110010010000010000000000000, "tlbclr">;
+def TLBFLUSH : FmtI32<0b00000110010010000010010000000000, "tlbflush">;
+def INVTLB : FmtINVTLB<(outs), (ins GPR:$rk, GPR:$rj, uimm5:$op), "invtlb",
+ "$op, $rj, $rk">;
+
+// Software Page Walking Instructions
+def LDDIR : Fmt2RI8<0b00000110010000, (outs GPR:$rd),
+ (ins GPR:$rj, uimm8:$imm8), "lddir", "$rd, $rj, $imm8">;
+def LDPTE : FmtLDPTE<(outs), (ins GPR:$rj, uimm8:$seq), "ldpte", "$rj, $seq">;
+
+
+// Other Miscellaneous Instructions
+def ERTN : FmtI32<0b00000110010010000011100000000000, "ertn">;
+def DBCL : MISC_I15<0b00000000001010101, "dbcl">;
+def IDLE : MISC_I15<0b00000110010010001, "idle">;
diff --git a/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp
new file mode 100644
index 000000000000..7416c93b4d05
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp
@@ -0,0 +1,66 @@
+//=- LoongArchMCInstLower.cpp - Convert LoongArch MachineInstr to an MCInst -=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower LoongArch MachineInstrs to their
+// corresponding MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LoongArch.h"
+#include "LoongArchSubtarget.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+bool llvm::lowerLoongArchMachineOperandToMCOperand(const MachineOperand &MO,
+ MCOperand &MCOp,
+ const AsmPrinter &AP) {
+ switch (MO.getType()) {
+ default:
+ report_fatal_error(
+ "lowerLoongArchMachineOperandToMCOperand: unknown operand type");
+ case MachineOperand::MO_Register:
+ // Ignore all implicit register operands.
+ if (MO.isImplicit())
+ return false;
+ MCOp = MCOperand::createReg(MO.getReg());
+ break;
+ case MachineOperand::MO_RegisterMask:
+ // Regmasks are like implicit defs.
+ return false;
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::createImm(MO.getImm());
+ break;
+ // TODO: lower special operands
+ case MachineOperand::MO_MachineBasicBlock:
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_BlockAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_JumpTableIndex:
+ break;
+ }
+ return true;
+}
+
+bool llvm::lowerLoongArchMachineInstrToMCInst(const MachineInstr *MI,
+ MCInst &OutMI, AsmPrinter &AP) {
+ OutMI.setOpcode(MI->getOpcode());
+
+ for (const MachineOperand &MO : MI->operands()) {
+ MCOperand MCOp;
+ if (lowerLoongArchMachineOperandToMCOperand(MO, MCOp, AP))
+ OutMI.addOperand(MCOp);
+ }
+ return false;
+}
diff --git a/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h b/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h
new file mode 100644
index 000000000000..d4a6c884bc9d
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h
@@ -0,0 +1,57 @@
+//=- LoongArchMachineFunctionInfo.h - LoongArch machine function info -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares LoongArch-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHMACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHMACHINEFUNCTIONINFO_H
+
+#include "LoongArchSubtarget.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// LoongArchMachineFunctionInfo - This class is derived from
+/// MachineFunctionInfo and contains private LoongArch-specific information for
+/// each MachineFunction.
+class LoongArchMachineFunctionInfo : public MachineFunctionInfo {
+private:
+ /// FrameIndex for start of varargs area
+ int VarArgsFrameIndex = 0;
+ /// Size of the save area used for varargs
+ int VarArgsSaveSize = 0;
+
+ /// Size of stack frame to save callee saved registers
+ unsigned CalleeSavedStackSize = 0;
+
+public:
+ LoongArchMachineFunctionInfo(const MachineFunction &MF) {}
+
+ MachineFunctionInfo *
+ clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
+ const override {
+ return DestMF.cloneInfo<LoongArchMachineFunctionInfo>(*this);
+ }
+
+ int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+ void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
+
+ unsigned getVarArgsSaveSize() const { return VarArgsSaveSize; }
+ void setVarArgsSaveSize(int Size) { VarArgsSaveSize = Size; }
+
+ unsigned getCalleeSavedStackSize() const { return CalleeSavedStackSize; }
+ void setCalleeSavedStackSize(unsigned Size) { CalleeSavedStackSize = Size; }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHMACHINEFUNCTIONINFO_H
diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp
new file mode 100644
index 000000000000..b9bae8e56304
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp
@@ -0,0 +1,115 @@
+//===- LoongArchRegisterInfo.cpp - LoongArch Register Information -*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the LoongArch implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LoongArchRegisterInfo.h"
+#include "LoongArch.h"
+#include "LoongArchSubtarget.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+#define GET_REGINFO_TARGET_DESC
+#include "LoongArchGenRegisterInfo.inc"
+
+LoongArchRegisterInfo::LoongArchRegisterInfo(unsigned HwMode)
+ : LoongArchGenRegisterInfo(LoongArch::R1, /*DwarfFlavour*/ 0,
+ /*EHFlavor*/ 0,
+ /*PC*/ 0, HwMode) {}
+
+const MCPhysReg *
+LoongArchRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ auto &Subtarget = MF->getSubtarget<LoongArchSubtarget>();
+
+ switch (Subtarget.getTargetABI()) {
+ default:
+ llvm_unreachable("Unrecognized ABI");
+ case LoongArchABI::ABI_ILP32S:
+ case LoongArchABI::ABI_LP64S:
+ return CSR_ILP32S_LP64S_SaveList;
+ case LoongArchABI::ABI_ILP32F:
+ case LoongArchABI::ABI_LP64F:
+ return CSR_ILP32F_LP64F_SaveList;
+ case LoongArchABI::ABI_ILP32D:
+ case LoongArchABI::ABI_LP64D:
+ return CSR_ILP32D_LP64D_SaveList;
+ }
+}
+
+const uint32_t *
+LoongArchRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const {
+ auto &Subtarget = MF.getSubtarget<LoongArchSubtarget>();
+
+ switch (Subtarget.getTargetABI()) {
+ default:
+ llvm_unreachable("Unrecognized ABI");
+ case LoongArchABI::ABI_ILP32S:
+ case LoongArchABI::ABI_LP64S:
+ return CSR_ILP32S_LP64S_RegMask;
+ case LoongArchABI::ABI_ILP32F:
+ case LoongArchABI::ABI_LP64F:
+ return CSR_ILP32F_LP64F_RegMask;
+ case LoongArchABI::ABI_ILP32D:
+ case LoongArchABI::ABI_LP64D:
+ return CSR_ILP32D_LP64D_RegMask;
+ }
+}
+
+const uint32_t *LoongArchRegisterInfo::getNoPreservedMask() const {
+ return CSR_NoRegs_RegMask;
+}
+
+BitVector
+LoongArchRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ const LoongArchFrameLowering *TFI = getFrameLowering(MF);
+ BitVector Reserved(getNumRegs());
+
+ // Use markSuperRegs to ensure any register aliases are also reserved
+ markSuperRegs(Reserved, LoongArch::R0); // zero
+ markSuperRegs(Reserved, LoongArch::R2); // tp
+ markSuperRegs(Reserved, LoongArch::R3); // sp
+ markSuperRegs(Reserved, LoongArch::R21); // non-allocatable
+ if (TFI->hasFP(MF))
+ markSuperRegs(Reserved, LoongArch::R22); // fp
+ // Reserve the base register if we need to realign the stack and allocate
+ // variable-sized objects at runtime.
+ if (TFI->hasBP(MF))
+ markSuperRegs(Reserved, LoongArchABI::getBPReg()); // bp
+
+ assert(checkAllSuperRegsMarked(Reserved));
+ return Reserved;
+}
+
+bool LoongArchRegisterInfo::isConstantPhysReg(MCRegister PhysReg) const {
+ return PhysReg == LoongArch::R0;
+}
+
+Register
+LoongArchRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = getFrameLowering(MF);
+ return TFI->hasFP(MF) ? LoongArch::R22 : LoongArch::R3;
+}
+
+void LoongArchRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj,
+ unsigned FIOperandNum,
+ RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Unexpected non-zero SPAdj value");
+ // TODO: Implement this when we have function calls
+}
diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h
new file mode 100644
index 000000000000..02c9156e2b87
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h
@@ -0,0 +1,50 @@
+//= LoongArchRegisterInfo.h - LoongArch Register Information Impl -*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the LoongArch implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHREGISTERINFO_H
+#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHREGISTERINFO_H
+
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "LoongArchGenRegisterInfo.inc"
+
+namespace llvm {
+
+struct LoongArchRegisterInfo : public LoongArchGenRegisterInfo {
+
+ LoongArchRegisterInfo(unsigned HwMode);
+
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+ const uint32_t *getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID) const override;
+ const uint32_t *getNoPreservedMask() const override;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const override;
+ bool isConstantPhysReg(MCRegister PhysReg) const override;
+
+ const TargetRegisterClass *
+ getPointerRegClass(const MachineFunction &MF,
+ unsigned Kind = 0) const override {
+ return &LoongArch::GPRRegClass;
+ }
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
+ unsigned FIOperandNum,
+ RegScavenger *RS = nullptr) const override;
+
+ Register getFrameRegister(const MachineFunction &MF) const override;
+};
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHREGISTERINFO_H
diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td
new file mode 100644
index 000000000000..2d5ad99f6156
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td
@@ -0,0 +1,161 @@
+//===-- LoongArchRegisterInfo.td - LoongArch Register defs -*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the LoongArch register files
+//===----------------------------------------------------------------------===//
+
+let Namespace = "LoongArch" in {
+class LoongArchReg<bits<16> Enc, string n, list<string> alt = []>
+ : Register<n> {
+ let HWEncoding = Enc;
+ let AltNames = alt;
+}
+
+class LoongArchReg32<bits<16> Enc, string n, list<string> alt = []>
+ : Register<n> {
+ let HWEncoding = Enc;
+ let AltNames = alt;
+}
+
+def sub_32 : SubRegIndex<32>;
+class LoongArchReg64<LoongArchReg32 subreg>
+ : Register<""> {
+ let HWEncoding = subreg.HWEncoding;
+ let SubRegs = [subreg];
+ let SubRegIndices = [sub_32];
+ let AsmName = subreg.AsmName;
+ let AltNames = subreg.AltNames;
+}
+
+let FallbackRegAltNameIndex = NoRegAltName in
+def RegAliasName : RegAltNameIndex;
+} // Namespace = "LoongArch"
+
+// Integer registers
+
+let RegAltNameIndices = [RegAliasName] in {
+ def R0 : LoongArchReg<0, "r0", ["zero"]>, DwarfRegNum<[0]>;
+ def R1 : LoongArchReg<1, "r1", ["ra"]>, DwarfRegNum<[1]>;
+ def R2 : LoongArchReg<2, "r2", ["tp"]>, DwarfRegNum<[2]>;
+ def R3 : LoongArchReg<3, "r3", ["sp"]>, DwarfRegNum<[3]>;
+ def R4 : LoongArchReg<4, "r4", ["a0"]>, DwarfRegNum<[4]>;
+ def R5 : LoongArchReg<5, "r5", ["a1"]>, DwarfRegNum<[5]>;
+ def R6 : LoongArchReg<6, "r6", ["a2"]>, DwarfRegNum<[6]>;
+ def R7 : LoongArchReg<7, "r7", ["a3"]>, DwarfRegNum<[7]>;
+ def R8 : LoongArchReg<8, "r8", ["a4"]>, DwarfRegNum<[8]>;
+ def R9 : LoongArchReg<9, "r9", ["a5"]>, DwarfRegNum<[9]>;
+ def R10 : LoongArchReg<10, "r10", ["a6"]>, DwarfRegNum<[10]>;
+ def R11 : LoongArchReg<11, "r11", ["a7"]>, DwarfRegNum<[11]>;
+ def R12 : LoongArchReg<12, "r12", ["t0"]>, DwarfRegNum<[12]>;
+ def R13 : LoongArchReg<13, "r13", ["t1"]>, DwarfRegNum<[13]>;
+ def R14 : LoongArchReg<14, "r14", ["t2"]>, DwarfRegNum<[14]>;
+ def R15 : LoongArchReg<15, "r15", ["t3"]>, DwarfRegNum<[15]>;
+ def R16 : LoongArchReg<16, "r16", ["t4"]>, DwarfRegNum<[16]>;
+ def R17 : LoongArchReg<17, "r17", ["t5"]>, DwarfRegNum<[17]>;
+ def R18 : LoongArchReg<18, "r18", ["t6"]>, DwarfRegNum<[18]>;
+ def R19 : LoongArchReg<19, "r19", ["t7"]>, DwarfRegNum<[19]>;
+ def R20 : LoongArchReg<20, "r20", ["t8"]>, DwarfRegNum<[20]>;
+ def R21 : LoongArchReg<21, "r21", [""]>, DwarfRegNum<[21]>;
+ def R22 : LoongArchReg<22, "r22", ["fp", "s9"]>, DwarfRegNum<[22]>;
+ def R23 : LoongArchReg<23, "r23", ["s0"]>, DwarfRegNum<[23]>;
+ def R24 : LoongArchReg<24, "r24", ["s1"]>, DwarfRegNum<[24]>;
+ def R25 : LoongArchReg<25, "r25", ["s2"]>, DwarfRegNum<[25]>;
+ def R26 : LoongArchReg<26, "r26", ["s3"]>, DwarfRegNum<[26]>;
+ def R27 : LoongArchReg<27, "r27", ["s4"]>, DwarfRegNum<[27]>;
+ def R28 : LoongArchReg<28, "r28", ["s5"]>, DwarfRegNum<[28]>;
+ def R29 : LoongArchReg<29, "r29", ["s6"]>, DwarfRegNum<[29]>;
+ def R30 : LoongArchReg<30, "r30", ["s7"]>, DwarfRegNum<[30]>;
+ def R31 : LoongArchReg<31, "r31", ["s8"]>, DwarfRegNum<[31]>;
+} // RegAltNameIndices = [RegAliasName]
+
+def GRLenVT : ValueTypeByHwMode<[LA32, LA64],
+ [i32, i64]>;
+def GRLenRI : RegInfoByHwMode<
+ [LA32, LA64],
+ [RegInfo<32,32,32>, RegInfo<64,64,64>]>;
+
+// The order of registers represents the preferred allocation sequence.
+// Registers are listed in the order caller-save, callee-save, specials.
+def GPR : RegisterClass<"LoongArch", [GRLenVT], 32, (add
+ // Argument registers (a0...a7)
+ (sequence "R%u", 4, 11),
+ // Temporary registers (t0...t8)
+ (sequence "R%u", 12, 20),
+ // Static register (s9/fp, s0...s8)
+ (sequence "R%u", 22, 31),
+ // Specials (r0, ra, tp, sp)
+ (sequence "R%u", 0, 3),
+ // Reserved (Non-allocatable)
+ R21
+ )> {
+ let RegInfos = GRLenRI;
+}
+
+// Floating point registers
+
+let RegAltNameIndices = [RegAliasName] in {
+ def F0 : LoongArchReg32<0, "f0", ["fa0"]>, DwarfRegNum<[32]>;
+ def F1 : LoongArchReg32<1, "f1", ["fa1"]>, DwarfRegNum<[33]>;
+ def F2 : LoongArchReg32<2, "f2", ["fa2"]>, DwarfRegNum<[34]>;
+ def F3 : LoongArchReg32<3, "f3", ["fa3"]>, DwarfRegNum<[35]>;
+ def F4 : LoongArchReg32<4, "f4", ["fa4"]>, DwarfRegNum<[36]>;
+ def F5 : LoongArchReg32<5, "f5", ["fa5"]>, DwarfRegNum<[37]>;
+ def F6 : LoongArchReg32<6, "f6", ["fa6"]>, DwarfRegNum<[38]>;
+ def F7 : LoongArchReg32<7, "f7", ["fa7"]>, DwarfRegNum<[39]>;
+ def F8 : LoongArchReg32<8, "f8", ["ft0"]>, DwarfRegNum<[40]>;
+ def F9 : LoongArchReg32<9, "f9", ["ft1"]>, DwarfRegNum<[41]>;
+ def F10 : LoongArchReg32<10,"f10", ["ft2"]>, DwarfRegNum<[42]>;
+ def F11 : LoongArchReg32<11,"f11", ["ft3"]>, DwarfRegNum<[43]>;
+ def F12 : LoongArchReg32<12,"f12", ["ft4"]>, DwarfRegNum<[44]>;
+ def F13 : LoongArchReg32<13,"f13", ["ft5"]>, DwarfRegNum<[45]>;
+ def F14 : LoongArchReg32<14,"f14", ["ft6"]>, DwarfRegNum<[46]>;
+ def F15 : LoongArchReg32<15,"f15", ["ft7"]>, DwarfRegNum<[47]>;
+ def F16 : LoongArchReg32<16,"f16", ["ft8"]>, DwarfRegNum<[48]>;
+ def F17 : LoongArchReg32<17,"f17", ["ft9"]>, DwarfRegNum<[49]>;
+ def F18 : LoongArchReg32<18,"f18", ["ft10"]>, DwarfRegNum<[50]>;
+ def F19 : LoongArchReg32<19,"f19", ["ft11"]>, DwarfRegNum<[51]>;
+ def F20 : LoongArchReg32<20,"f20", ["ft12"]>, DwarfRegNum<[52]>;
+ def F21 : LoongArchReg32<21,"f21", ["ft13"]>, DwarfRegNum<[53]>;
+ def F22 : LoongArchReg32<22,"f22", ["ft14"]>, DwarfRegNum<[54]>;
+ def F23 : LoongArchReg32<23,"f23", ["ft15"]>, DwarfRegNum<[55]>;
+ def F24 : LoongArchReg32<24,"f24", ["fs0"]>, DwarfRegNum<[56]>;
+ def F25 : LoongArchReg32<25,"f25", ["fs1"]>, DwarfRegNum<[57]>;
+ def F26 : LoongArchReg32<26,"f26", ["fs2"]>, DwarfRegNum<[58]>;
+ def F27 : LoongArchReg32<27,"f27", ["fs3"]>, DwarfRegNum<[59]>;
+ def F28 : LoongArchReg32<28,"f28", ["fs4"]>, DwarfRegNum<[60]>;
+ def F29 : LoongArchReg32<29,"f29", ["fs5"]>, DwarfRegNum<[61]>;
+ def F30 : LoongArchReg32<30,"f30", ["fs6"]>, DwarfRegNum<[62]>;
+ def F31 : LoongArchReg32<31,"f31", ["fs7"]>, DwarfRegNum<[63]>;
+
+ foreach I = 0-31 in {
+ def F#I#_64 : LoongArchReg64<!cast<LoongArchReg32>("F"#I)>,
+ DwarfRegNum<[!add(I, 32)]>;
+ }
+}
+
+// The order of registers represents the preferred allocation sequence.
+def FPR32 : RegisterClass<"LoongArch", [f32], 32, (sequence "F%u", 0, 31)>;
+def FPR64 : RegisterClass<"LoongArch", [f64], 64, (sequence "F%u_64", 0, 31)>;
+
+// Condition flag registers
+
+foreach I = 0-7 in
+def FCC#I : LoongArchReg<I, "fcc"#I>;
+
+def CFR : RegisterClass<"LoongArch", [GRLenVT], 32, (sequence "FCC%u", 0, 7)> {
+ let RegInfos = GRLenRI;
+}
+
+// Control and status registers
+
+foreach I = 0-3 in
+def FCSR#I : LoongArchReg<I, "fcsr"#I>;
+
+let isAllocatable = false in
+def FCSR : RegisterClass<"LoongArch", [i32], 32, (sequence "FCSR%u", 0, 3)>;
diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp b/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp
new file mode 100644
index 000000000000..ff84e7c8cc1f
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp
@@ -0,0 +1,54 @@
+//===-- LoongArchSubtarget.cpp - LoongArch Subtarget Information -*- C++ -*--=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LoongArch specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LoongArchSubtarget.h"
+#include "LoongArchFrameLowering.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "loongarch-subtarget"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "LoongArchGenSubtargetInfo.inc"
+
+void LoongArchSubtarget::anchor() {}
+
+LoongArchSubtarget &LoongArchSubtarget::initializeSubtargetDependencies(
+ const Triple &TT, StringRef CPU, StringRef TuneCPU, StringRef FS,
+ StringRef ABIName) {
+ bool Is64Bit = TT.isArch64Bit();
+ if (CPU.empty())
+ CPU = Is64Bit ? "generic-la64" : "generic-la32";
+
+ if (TuneCPU.empty())
+ TuneCPU = CPU;
+
+ ParseSubtargetFeatures(CPU, TuneCPU, FS);
+ if (Is64Bit) {
+ GRLenVT = MVT::i64;
+ GRLen = 64;
+ }
+
+ // TODO: ILP32{S,F} LP64{S,F}
+ TargetABI = Is64Bit ? LoongArchABI::ABI_LP64D : LoongArchABI::ABI_ILP32D;
+ return *this;
+}
+
+LoongArchSubtarget::LoongArchSubtarget(const Triple &TT, StringRef CPU,
+ StringRef TuneCPU, StringRef FS,
+ StringRef ABIName,
+ const TargetMachine &TM)
+ : LoongArchGenSubtargetInfo(TT, CPU, TuneCPU, FS),
+ FrameLowering(
+ initializeSubtargetDependencies(TT, CPU, TuneCPU, FS, ABIName)),
+ InstrInfo(*this), RegInfo(getHwMode()), TLInfo(TM, *this) {}
diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
new file mode 100644
index 000000000000..95c2c676cc3c
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
@@ -0,0 +1,89 @@
+//===- LoongArchSubtarget.h - Define Subtarget for the LoongArch -*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the LoongArch specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHSUBTARGET_H
+#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHSUBTARGET_H
+
+#include "LoongArchFrameLowering.h"
+#include "LoongArchISelLowering.h"
+#include "LoongArchInstrInfo.h"
+#include "LoongArchRegisterInfo.h"
+#include "MCTargetDesc/LoongArchBaseInfo.h"
+#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define GET_SUBTARGETINFO_HEADER
+#include "LoongArchGenSubtargetInfo.inc"
+
+namespace llvm {
+class StringRef;
+
+class LoongArchSubtarget : public LoongArchGenSubtargetInfo {
+ virtual void anchor();
+ bool HasLA64 = false;
+ bool HasBasicF = false;
+ bool HasBasicD = false;
+ bool HasExtLSX = false;
+ bool HasExtLASX = false;
+ bool HasExtLVZ = false;
+ bool HasExtLBT = false;
+ unsigned GRLen = 32;
+ MVT GRLenVT = MVT::i32;
+ LoongArchABI::ABI TargetABI = LoongArchABI::ABI_Unknown;
+ LoongArchFrameLowering FrameLowering;
+ LoongArchInstrInfo InstrInfo;
+ LoongArchRegisterInfo RegInfo;
+ LoongArchTargetLowering TLInfo;
+
+ /// Initializes using the passed in CPU and feature strings so that we can
+ /// use initializer lists for subtarget initialization.
+ LoongArchSubtarget &initializeSubtargetDependencies(const Triple &TT,
+ StringRef CPU,
+ StringRef TuneCPU,
+ StringRef FS,
+ StringRef ABIName);
+
+public:
+ // Initializes the data members to match that of the specified triple.
+ LoongArchSubtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU,
+ StringRef FS, StringRef ABIName, const TargetMachine &TM);
+
+ // Parses features string setting specified subtarget options. The
+ // definition of this function is auto-generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
+
+ const LoongArchFrameLowering *getFrameLowering() const override {
+ return &FrameLowering;
+ }
+ const LoongArchInstrInfo *getInstrInfo() const override { return &InstrInfo; }
+ const LoongArchRegisterInfo *getRegisterInfo() const override {
+ return &RegInfo;
+ }
+ const LoongArchTargetLowering *getTargetLowering() const override {
+ return &TLInfo;
+ }
+ bool is64Bit() const { return HasLA64; }
+ bool hasBasicF() const { return HasBasicF; }
+ bool hasBasicD() const { return HasBasicD; }
+ bool hasExtLSX() const { return HasExtLSX; }
+ bool hasExtLASX() const { return HasExtLASX; }
+ bool hasExtLVZ() const { return HasExtLVZ; }
+ bool hasExtLBT() const { return HasExtLBT; }
+ MVT getGRLenVT() const { return GRLenVT; }
+ unsigned getGRLen() const { return GRLen; }
+ LoongArchABI::ABI getTargetABI() const { return TargetABI; }
+};
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHSUBTARGET_H
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
new file mode 100644
index 000000000000..3a1a46a9e624
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
@@ -0,0 +1,118 @@
+//===-- LoongArchTargetMachine.cpp - Define TargetMachine for LoongArch ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the info about LoongArch target spec.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LoongArchTargetMachine.h"
+#include "LoongArch.h"
+#include "MCTargetDesc/LoongArchBaseInfo.h"
+#include "TargetInfo/LoongArchTargetInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/MC/TargetRegistry.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "loongarch"
+
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchTarget() {
+ // Register the target.
+ RegisterTargetMachine<LoongArchTargetMachine> X(getTheLoongArch32Target());
+ RegisterTargetMachine<LoongArchTargetMachine> Y(getTheLoongArch64Target());
+}
+
+static std::string computeDataLayout(const Triple &TT) {
+ if (TT.isArch64Bit())
+ return "e-m:e-p:64:64-i64:64-i128:128-n64-S128";
+ assert(TT.isArch32Bit() && "only LA32 and LA64 are currently supported");
+ return "e-m:e-p:32:32-i64:64-n32-S128";
+}
+
+static Reloc::Model getEffectiveRelocModel(const Triple &TT,
+ Optional<Reloc::Model> RM) {
+ if (!RM.hasValue())
+ return Reloc::Static;
+ return *RM;
+}
+
+LoongArchTargetMachine::LoongArchTargetMachine(
+ const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
+ const TargetOptions &Options, Optional<Reloc::Model> RM,
+ Optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool JIT)
+ : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options,
+ getEffectiveRelocModel(TT, RM),
+ getEffectiveCodeModel(CM, CodeModel::Small), OL),
+ TLOF(std::make_unique<TargetLoweringObjectFileELF>()) {
+ initAsmInfo();
+}
+
+LoongArchTargetMachine::~LoongArchTargetMachine() = default;
+
+const LoongArchSubtarget *
+LoongArchTargetMachine::getSubtargetImpl(const Function &F) const {
+ Attribute CPUAttr = F.getFnAttribute("target-cpu");
+ Attribute TuneAttr = F.getFnAttribute("tune-cpu");
+ Attribute FSAttr = F.getFnAttribute("target-features");
+
+ std::string CPU =
+ CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU;
+ std::string TuneCPU =
+ TuneAttr.isValid() ? TuneAttr.getValueAsString().str() : CPU;
+ std::string FS =
+ FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS;
+
+ std::string Key = CPU + TuneCPU + FS;
+ auto &I = SubtargetMap[Key];
+ if (!I) {
+ // This needs to be done before we create a new subtarget since any
+ // creation will depend on the TM and the code generation flags on the
+ // function that reside in TargetOptions.
+ resetTargetOptions(F);
+ auto ABIName = Options.MCOptions.getABIName();
+ if (const MDString *ModuleTargetABI = dyn_cast_or_null<MDString>(
+ F.getParent()->getModuleFlag("target-abi"))) {
+ auto TargetABI = LoongArchABI::getTargetABI(ABIName);
+ if (TargetABI != LoongArchABI::ABI_Unknown &&
+ ModuleTargetABI->getString() != ABIName) {
+ report_fatal_error("-target-abi option != target-abi module flag");
+ }
+ ABIName = ModuleTargetABI->getString();
+ }
+ I = std::make_unique<LoongArchSubtarget>(TargetTriple, CPU, TuneCPU, FS,
+ ABIName, *this);
+ }
+ return I.get();
+}
+
+namespace {
+class LoongArchPassConfig : public TargetPassConfig {
+public:
+ LoongArchPassConfig(LoongArchTargetMachine &TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ LoongArchTargetMachine &getLoongArchTargetMachine() const {
+ return getTM<LoongArchTargetMachine>();
+ }
+
+ bool addInstSelector() override;
+};
+} // namespace
+
+TargetPassConfig *
+LoongArchTargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new LoongArchPassConfig(*this, PM);
+}
+
+bool LoongArchPassConfig::addInstSelector() {
+ addPass(createLoongArchISelDag(getLoongArchTargetMachine()));
+
+ return false;
+}
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.h b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.h
new file mode 100644
index 000000000000..cbd872031a32
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.h
@@ -0,0 +1,46 @@
+//=- LoongArchTargetMachine.h - Define TargetMachine for LoongArch -*- C++ -*-//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the LoongArch specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETMACHINE_H
+#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETMACHINE_H
+
+#include "LoongArchSubtarget.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class LoongArchTargetMachine : public LLVMTargetMachine {
+ std::unique_ptr<TargetLoweringObjectFile> TLOF;
+ mutable StringMap<std::unique_ptr<LoongArchSubtarget>> SubtargetMap;
+
+public:
+ LoongArchTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
+ Optional<Reloc::Model> RM,
+ Optional<CodeModel::Model> CM, CodeGenOpt::Level OL,
+ bool JIT);
+ ~LoongArchTargetMachine() override;
+
+ const LoongArchSubtarget *getSubtargetImpl(const Function &F) const override;
+ const LoongArchSubtarget *getSubtargetImpl() const = delete;
+
+ // Pass Pipeline Configuration
+ TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+
+ TargetLoweringObjectFile *getObjFileLowering() const override {
+ return TLOF.get();
+ }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETMACHINE_H
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
new file mode 100644
index 000000000000..94a068897f8c
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
@@ -0,0 +1,68 @@
+//===-- LoongArchAsmBackend.cpp - LoongArch Assembler Backend -*- C++ -*---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LoongArchAsmBackend class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LoongArchAsmBackend.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/EndianStream.h"
+
+#define DEBUG_TYPE "loongarch-asmbackend"
+
+using namespace llvm;
+
+void LoongArchAsmBackend::applyFixup(const MCAssembler &Asm,
+ const MCFixup &Fixup,
+ const MCValue &Target,
+ MutableArrayRef<char> Data, uint64_t Value,
+ bool IsResolved,
+ const MCSubtargetInfo *STI) const {
+ // TODO: Apply the Value for given Fixup into the provided data fragment.
+ return;
+}
+
+bool LoongArchAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
+ const MCFixup &Fixup,
+ const MCValue &Target) {
+ // TODO: Determine which relocation require special processing at linking
+ // time.
+ return false;
+}
+
+bool LoongArchAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const {
+ // Check for byte count not multiple of instruction word size
+ if (Count % 4 != 0)
+ return false;
+
+ // The nop on LoongArch is andi r0, r0, 0.
+ for (; Count >= 4; Count -= 4)
+ support::endian::write<uint32_t>(OS, 0x03400000, support::little);
+
+ return true;
+}
+
+std::unique_ptr<MCObjectTargetWriter>
+LoongArchAsmBackend::createObjectTargetWriter() const {
+ return createLoongArchELFObjectWriter(OSABI, Is64Bit);
+}
+
+MCAsmBackend *llvm::createLoongArchAsmBackend(const Target &T,
+ const MCSubtargetInfo &STI,
+ const MCRegisterInfo &MRI,
+ const MCTargetOptions &Options) {
+ const Triple &TT = STI.getTargetTriple();
+ uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());
+ return new LoongArchAsmBackend(STI, OSABI, TT.isArch64Bit());
+}
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h
new file mode 100644
index 000000000000..77bbfb095747
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h
@@ -0,0 +1,63 @@
+//===-- LoongArchAsmBackend.h - LoongArch Assembler Backend ---*- C++ -*---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the LoongArchAsmBackend class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHASMBACKEND_H
+#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHASMBACKEND_H
+
+#include "MCTargetDesc/LoongArchBaseInfo.h"
+#include "MCTargetDesc/LoongArchMCTargetDesc.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+
+namespace llvm {
+
+class LoongArchAsmBackend : public MCAsmBackend {
+ uint8_t OSABI;
+ bool Is64Bit;
+
+public:
+ LoongArchAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit)
+ : MCAsmBackend(support::little), OSABI(OSABI), Is64Bit(Is64Bit) {}
+ ~LoongArchAsmBackend() override {}
+
+ void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target, MutableArrayRef<char> Data,
+ uint64_t Value, bool IsResolved,
+ const MCSubtargetInfo *STI) const override;
+
+ bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target) override;
+
+ bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const override {
+ return false;
+ }
+
+ unsigned getNumFixupKinds() const override {
+ // FIXME: Implement this when we define fixup kind
+ return 0;
+ }
+
+ void relaxInstruction(MCInst &Inst,
+ const MCSubtargetInfo &STI) const override {}
+
+ bool writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const override;
+
+ std::unique_ptr<MCObjectTargetWriter>
+ createObjectTargetWriter() const override;
+};
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHASMBACKEND_H
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp
new file mode 100644
index 000000000000..f0c985883125
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp
@@ -0,0 +1,40 @@
+//= LoongArchBaseInfo.cpp - Top level definitions for LoongArch MC -*- C++ -*-//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements helper functions for the LoongArch target useful for the
+// compiler back-end and the MC libraries.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LoongArchBaseInfo.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+
+namespace llvm {
+
+namespace LoongArchABI {
+
+ABI getTargetABI(StringRef ABIName) {
+ auto TargetABI = StringSwitch<ABI>(ABIName)
+ .Case("ilp32s", ABI_ILP32S)
+ .Case("ilp32f", ABI_ILP32F)
+ .Case("ilp32d", ABI_ILP32D)
+ .Case("lp64s", ABI_LP64S)
+ .Case("lp64f", ABI_LP64F)
+ .Case("lp64d", ABI_LP64D)
+ .Default(ABI_Unknown);
+ return TargetABI;
+}
+
+// FIXME: other register?
+MCRegister getBPReg() { return LoongArch::R31; }
+
+} // namespace LoongArchABI
+
+} // namespace llvm
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h
new file mode 100644
index 000000000000..e26f22de0cbc
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h
@@ -0,0 +1,44 @@
+//=- LoongArchBaseInfo.h - Top level definitions for LoongArch MC -*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone enum definitions and helper function
+// definitions for the LoongArch target useful for the compiler back-end and the
+// MC libraries.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHBASEINFO_H
+#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHBASEINFO_H
+
+#include "MCTargetDesc/LoongArchMCTargetDesc.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/SubtargetFeature.h"
+
+namespace llvm {
+
+namespace LoongArchABI {
+enum ABI {
+ ABI_ILP32S,
+ ABI_ILP32F,
+ ABI_ILP32D,
+ ABI_LP64S,
+ ABI_LP64F,
+ ABI_LP64D,
+ ABI_Unknown
+};
+
+ABI getTargetABI(StringRef ABIName);
+
+// Returns the register used to hold the stack pointer after realignment.
+MCRegister getBPReg();
+} // namespace LoongArchABI
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHBASEINFO_H
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp
new file mode 100644
index 000000000000..95e1314f363a
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp
@@ -0,0 +1,64 @@
+//===-- LoongArchELFObjectWriter.cpp - LoongArch ELF Writer ---*- C++ -*---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/LoongArchMCTargetDesc.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+namespace {
+class LoongArchELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+ LoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit);
+
+ ~LoongArchELFObjectWriter() override;
+
+ // Return true if the given relocation must be with a symbol rather than
+ // section plus offset.
+ bool needsRelocateWithSymbol(const MCSymbol &Sym,
+ unsigned Type) const override {
+ return true;
+ }
+
+protected:
+ unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
+ const MCFixup &Fixup, bool IsPCRel) const override;
+};
+} // namespace
+
+LoongArchELFObjectWriter::LoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit)
+ : MCELFObjectTargetWriter(Is64Bit, OSABI, ELF::EM_LOONGARCH,
+ /*HasRelocationAddend*/ true) {}
+
+LoongArchELFObjectWriter::~LoongArchELFObjectWriter() {}
+
+unsigned LoongArchELFObjectWriter::getRelocType(MCContext &Ctx,
+ const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
+ // Determine the type of the relocation
+ unsigned Kind = Fixup.getTargetKind();
+
+ if (Kind >= FirstLiteralRelocationKind)
+ return Kind - FirstLiteralRelocationKind;
+
+ switch (Kind) {
+ // TODO: Implement this when we defined fixup kind.
+ default:
+ return ELF::R_LARCH_NONE;
+ }
+}
+
+std::unique_ptr<MCObjectTargetWriter>
+llvm::createLoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit) {
+ return std::make_unique<LoongArchELFObjectWriter>(OSABI, Is64Bit);
+}
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp
new file mode 100644
index 000000000000..66183868f468
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp
@@ -0,0 +1,63 @@
+//===- LoongArchInstPrinter.cpp - Convert LoongArch MCInst to asm syntax --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an LoongArch MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LoongArchInstPrinter.h"
+#include "LoongArchBaseInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "loongarch-asm-printer"
+
+// Include the auto-generated portion of the assembly writer.
+#define PRINT_ALIAS_INSTR
+#include "LoongArchGenAsmWriter.inc"
+
+void LoongArchInstPrinter::printInst(const MCInst *MI, uint64_t Address,
+ StringRef Annot,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ if (!printAliasInstr(MI, Address, STI, O))
+ printInstruction(MI, Address, STI, O);
+ printAnnotation(O, Annot);
+}
+
+void LoongArchInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const {
+ O << '$' << getRegisterName(RegNo);
+}
+
+void LoongArchInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNo);
+
+ if (MO.isReg()) {
+ printRegName(O, MO.getReg());
+ return;
+ }
+
+ if (MO.isImm()) {
+ O << MO.getImm();
+ return;
+ }
+
+ assert(MO.isExpr() && "Unknown operand kind in printOperand");
+ MO.getExpr()->print(O, &MAI);
+}
+
+const char *LoongArchInstPrinter::getRegisterName(unsigned RegNo) {
+ // Default print reg alias name
+ return getRegisterName(RegNo, LoongArch::RegAliasName);
+}
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h
new file mode 100644
index 000000000000..727fc6a3e1f3
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h
@@ -0,0 +1,49 @@
+//===-- LoongArchInstPrinter.h - Convert LoongArch MCInst to asm syntax ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints a LoongArch MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHINSTPRINTER_H
+#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHINSTPRINTER_H
+
+#include "MCTargetDesc/LoongArchMCTargetDesc.h"
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+
+class LoongArchInstPrinter : public MCInstPrinter {
+public:
+ LoongArchInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI)
+ : MCInstPrinter(MAI, MII, MRI) {}
+
+ void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
+ const MCSubtargetInfo &STI, raw_ostream &O) override;
+ void printRegName(raw_ostream &O, unsigned RegNo) const override;
+
+ // Autogenerated by tblgen.
+ std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
+ void printInstruction(const MCInst *MI, uint64_t Address,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ bool printAliasInstr(const MCInst *MI, uint64_t Address,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printCustomAliasOperand(const MCInst *MI, uint64_t Address,
+ unsigned OpIdx, unsigned PrintMethodIdx,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+ static const char *getRegisterName(unsigned RegNo, unsigned AltIdx);
+
+private:
+ void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
+};
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHINSTPRINTER_H
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp
new file mode 100644
index 000000000000..bc946db2f449
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp
@@ -0,0 +1,34 @@
+//===-- LoongArchMCAsmInfo.cpp - LoongArch Asm properties ------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the LoongArchMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LoongArchMCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/MC/MCStreamer.h"
+
+using namespace llvm;
+
+void LoongArchMCAsmInfo::anchor() {}
+
+LoongArchMCAsmInfo::LoongArchMCAsmInfo(const Triple &TT) {
+ CodePointerSize = CalleeSaveStackSlotSize = TT.isArch64Bit() ? 8 : 4;
+ AlignmentIsInBytes = false;
+ Data8bitsDirective = "\t.byte\t";
+ Data16bitsDirective = "\t.half\t";
+ Data32bitsDirective = "\t.word\t";
+ Data64bitsDirective = "\t.dword\t";
+ ZeroDirective = "\t.space\t";
+ CommentString = "#";
+ SupportsDebugInformation = true;
+ DwarfRegNumForCFI = true;
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+}
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.h
new file mode 100644
index 000000000000..1cf8a2fdf8aa
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.h
@@ -0,0 +1,30 @@
+//===-- LoongArchMCAsmInfo.h - LoongArch Asm Info --------------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the LoongArchMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCASMINFO_H
+#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCASMINFO_H
+
+#include "llvm/MC/MCAsmInfoELF.h"
+
+namespace llvm {
+class Triple;
+
+class LoongArchMCAsmInfo : public MCAsmInfoELF {
+ void anchor() override;
+
+public:
+ explicit LoongArchMCAsmInfo(const Triple &TargetTriple);
+};
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCASMINFO_H
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
new file mode 100644
index 000000000000..9c6a4f39b9ea
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
@@ -0,0 +1,127 @@
+//=- LoongArchMCCodeEmitter.cpp - Convert LoongArch code to machine code --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LoongArchMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/LoongArchBaseInfo.h"
+#include "MCTargetDesc/LoongArchMCTargetDesc.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/EndianStream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "mccodeemitter"
+
+namespace {
+class LoongArchMCCodeEmitter : public MCCodeEmitter {
+ LoongArchMCCodeEmitter(const LoongArchMCCodeEmitter &) = delete;
+ void operator=(const LoongArchMCCodeEmitter &) = delete;
+ MCContext &Ctx;
+ MCInstrInfo const &MCII;
+
+public:
+ LoongArchMCCodeEmitter(MCContext &ctx, MCInstrInfo const &MCII)
+ : Ctx(ctx), MCII(MCII) {}
+
+ ~LoongArchMCCodeEmitter() override {}
+
+ void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const override;
+
+ /// TableGen'erated function for getting the binary encoding for an
+ /// instruction.
+ uint64_t getBinaryCodeForInstr(const MCInst &MI,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// Return binary encoding of operand. If the machine operand requires
+ /// relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// Return binary encoding of an immediate operand specified by OpNo.
+ /// The value returned is the value of the immediate minus 1.
+ /// Note that this function is dedicated to specific immediate types,
+ /// e.g. uimm2_plus1.
+ unsigned getImmOpValueSub1(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// Return binary encoding of an immediate operand specified by OpNo.
+ /// The value returned is the value of the immediate shifted right
+ // arithmetically by 2.
+ /// Note that this function is dedicated to specific immediate types,
+ /// e.g. simm14_lsl2, simm16_lsl2, simm21_lsl2 and simm26_lsl2.
+ unsigned getImmOpValueAsr2(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+};
+} // end anonymous namespace
+
+unsigned
+LoongArchMCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+
+ if (MO.isReg())
+ return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg());
+
+ if (MO.isImm())
+ return static_cast<unsigned>(MO.getImm());
+
+ llvm_unreachable("Unhandled expression!");
+}
+
+unsigned
+LoongArchMCCodeEmitter::getImmOpValueSub1(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ return MI.getOperand(OpNo).getImm() - 1;
+}
+
+unsigned
+LoongArchMCCodeEmitter::getImmOpValueAsr2(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ unsigned Res = MI.getOperand(OpNo).getImm();
+ assert((Res & 3) == 0 && "lowest 2 bits are non-zero");
+ return Res >> 2;
+}
+
+void LoongArchMCCodeEmitter::encodeInstruction(
+ const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+ // Get byte count of instruction.
+ unsigned Size = Desc.getSize();
+
+ switch (Size) {
+ default:
+ llvm_unreachable("Unhandled encodeInstruction length!");
+ case 4: {
+ uint32_t Bits = getBinaryCodeForInstr(MI, Fixups, STI);
+ support::endian::write(OS, Bits, support::little);
+ break;
+ }
+ }
+}
+
+MCCodeEmitter *llvm::createLoongArchMCCodeEmitter(const MCInstrInfo &MCII,
+ MCContext &Ctx) {
+ return new LoongArchMCCodeEmitter(Ctx, MCII);
+}
+
+#include "LoongArchGenMCCodeEmitter.inc"
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
new file mode 100644
index 000000000000..c733c194e6a2
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
@@ -0,0 +1,114 @@
+//===-- LoongArchMCTargetDesc.cpp - LoongArch Target Descriptions ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides LoongArch specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LoongArchMCTargetDesc.h"
+#include "LoongArchBaseInfo.h"
+#include "LoongArchInstPrinter.h"
+#include "LoongArchMCAsmInfo.h"
+#include "TargetInfo/LoongArchTargetInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCInstrAnalysis.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/Compiler.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "LoongArchGenInstrInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "LoongArchGenRegisterInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "LoongArchGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+static MCRegisterInfo *createLoongArchMCRegisterInfo(const Triple &TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitLoongArchMCRegisterInfo(X, LoongArch::R1);
+ return X;
+}
+
+static MCInstrInfo *createLoongArchMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitLoongArchMCInstrInfo(X);
+ return X;
+}
+
+static MCSubtargetInfo *
+createLoongArchMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
+ if (CPU.empty())
+ CPU = TT.isArch64Bit() ? "la464" : "generic-la32";
+ return createLoongArchMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
+}
+
+static MCAsmInfo *createLoongArchMCAsmInfo(const MCRegisterInfo &MRI,
+ const Triple &TT,
+ const MCTargetOptions &Options) {
+ MCAsmInfo *MAI = new LoongArchMCAsmInfo(TT);
+
+ // Initial state of the frame pointer is sp(r3).
+ MCRegister SP = MRI.getDwarfRegNum(LoongArch::R3, true);
+ MCCFIInstruction Inst = MCCFIInstruction::cfiDefCfa(nullptr, SP, 0);
+ MAI->addInitialFrameState(Inst);
+
+ return MAI;
+}
+
+static MCInstPrinter *createLoongArchMCInstPrinter(const Triple &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI) {
+ return new LoongArchInstPrinter(MAI, MII, MRI);
+}
+
+namespace {
+
+class LoongArchMCInstrAnalysis : public MCInstrAnalysis {
+public:
+ explicit LoongArchMCInstrAnalysis(const MCInstrInfo *Info)
+ : MCInstrAnalysis(Info) {}
+
+ bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
+ uint64_t &Target) const override {
+ unsigned NumOps = Inst.getNumOperands();
+ if (isBranch(Inst) || Inst.getOpcode() == LoongArch::BL) {
+ Target = Addr + Inst.getOperand(NumOps - 1).getImm();
+ return true;
+ }
+
+ return false;
+ }
+};
+
+} // end anonymous namespace
+
+static MCInstrAnalysis *createLoongArchInstrAnalysis(const MCInstrInfo *Info) {
+ return new LoongArchMCInstrAnalysis(Info);
+}
+
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchTargetMC() {
+ for (Target *T : {&getTheLoongArch32Target(), &getTheLoongArch64Target()}) {
+ TargetRegistry::RegisterMCRegInfo(*T, createLoongArchMCRegisterInfo);
+ TargetRegistry::RegisterMCInstrInfo(*T, createLoongArchMCInstrInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(*T, createLoongArchMCSubtargetInfo);
+ TargetRegistry::RegisterMCAsmInfo(*T, createLoongArchMCAsmInfo);
+ TargetRegistry::RegisterMCCodeEmitter(*T, createLoongArchMCCodeEmitter);
+ TargetRegistry::RegisterMCAsmBackend(*T, createLoongArchAsmBackend);
+ TargetRegistry::RegisterMCInstPrinter(*T, createLoongArchMCInstPrinter);
+ TargetRegistry::RegisterMCInstrAnalysis(*T, createLoongArchInstrAnalysis);
+ }
+}
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h
new file mode 100644
index 000000000000..e576b9a49cd6
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h
@@ -0,0 +1,54 @@
+//===- LoongArchMCTargetDesc.h - LoongArch Target Descriptions --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides LoongArch specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCTARGETDESC_H
+#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCTARGETDESC_H
+
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/Support/DataTypes.h"
+#include <memory>
+
+namespace llvm {
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCObjectTargetWriter;
+class MCRegisterInfo;
+class MCSubtargetInfo;
+class Target;
+
+MCCodeEmitter *createLoongArchMCCodeEmitter(const MCInstrInfo &MCII,
+ MCContext &Ctx);
+
+MCAsmBackend *createLoongArchAsmBackend(const Target &T,
+ const MCSubtargetInfo &STI,
+ const MCRegisterInfo &MRI,
+ const MCTargetOptions &Options);
+
+std::unique_ptr<MCObjectTargetWriter>
+createLoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit);
+
+} // namespace llvm
+
+// Defines symbolic names for LoongArch registers.
+#define GET_REGINFO_ENUM
+#include "LoongArchGenRegisterInfo.inc"
+
+// Defines symbolic names for LoongArch instructions.
+#define GET_INSTRINFO_ENUM
+#include "LoongArchGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "LoongArchGenSubtargetInfo.inc"
+
+#endif // LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCTARGETDESC_H
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.cpp
new file mode 100644
index 000000000000..1509c436c810
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.cpp
@@ -0,0 +1,51 @@
+//===- LoongArchMatInt.cpp - Immediate materialisation ---------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "LoongArchMatInt.h"
+#include "MCTargetDesc/LoongArchMCTargetDesc.h"
+#include "llvm/Support/MathExtras.h"
+
+using namespace llvm;
+
+LoongArchMatInt::InstSeq LoongArchMatInt::generateInstSeq(int64_t Val) {
+ // Val:
+ // | hi32 | lo32 |
+ // +-----------+------------------+------------------+-----------+
+ // | Highest12 | Higher20 | Hi20 | Lo12 |
+ // +-----------+------------------+------------------+-----------+
+ // 63 52 51 32 31 12 11 0
+ //
+ const int64_t Highest12 = Val >> 52 & 0xFFF;
+ const int64_t Higher20 = Val >> 32 & 0xFFFFF;
+ const int64_t Hi20 = Val >> 12 & 0xFFFFF;
+ const int64_t Lo12 = Val & 0xFFF;
+ InstSeq Insts;
+
+ if (Highest12 != 0 && SignExtend64<52>(Val) == 0) {
+ Insts.push_back(Inst(LoongArch::LU52I_D, SignExtend64<12>(Highest12)));
+ return Insts;
+ }
+
+ if (Hi20 == 0)
+ Insts.push_back(Inst(LoongArch::ORI, Lo12));
+ else if (SignExtend32<1>(Lo12 >> 11) == SignExtend32<20>(Hi20))
+ Insts.push_back(Inst(LoongArch::ADDI_W, SignExtend64<12>(Lo12)));
+ else {
+ Insts.push_back(Inst(LoongArch::LU12I_W, SignExtend64<20>(Hi20)));
+ if (Lo12 != 0)
+ Insts.push_back(Inst(LoongArch::ORI, Lo12));
+ }
+
+ if (SignExtend32<1>(Hi20 >> 19) != SignExtend32<20>(Higher20))
+ Insts.push_back(Inst(LoongArch::LU32I_D, SignExtend64<20>(Higher20)));
+
+ if (SignExtend32<1>(Higher20 >> 19) != SignExtend32<12>(Highest12))
+ Insts.push_back(Inst(LoongArch::LU52I_D, SignExtend64<12>(Highest12)));
+
+ return Insts;
+}
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.h
new file mode 100644
index 000000000000..945aa91e40c0
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.h
@@ -0,0 +1,30 @@
+//===- LoongArchMatInt.h - Immediate materialisation - --------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_MATINT_H
+#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_MATINT_H
+
+#include "llvm/ADT/SmallVector.h"
+#include <cstdint>
+
+namespace llvm {
+namespace LoongArchMatInt {
+struct Inst {
+ unsigned Opc;
+ int64_t Imm;
+ Inst(unsigned Opc, int64_t Imm) : Opc(Opc), Imm(Imm) {}
+};
+using InstSeq = SmallVector<Inst, 4>;
+
+// Helper to generate an instruction sequence that will materialise the given
+// immediate value into a register.
+InstSeq generateInstSeq(int64_t Val);
+} // namespace LoongArchMatInt
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.cpp b/llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.cpp
new file mode 100644
index 000000000000..10654510032f
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.cpp
@@ -0,0 +1,30 @@
+//===-- LoongArchTargetInfo.cpp - LoongArch Target Implementation ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "TargetInfo/LoongArchTargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
+using namespace llvm;
+
+Target &llvm::getTheLoongArch32Target() {
+ static Target TheLoongArch32Target;
+ return TheLoongArch32Target;
+}
+
+Target &llvm::getTheLoongArch64Target() {
+ static Target TheLoongArch64Target;
+ return TheLoongArch64Target;
+}
+
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchTargetInfo() {
+ RegisterTarget<Triple::loongarch32, /*HasJIT=*/false> X(
+ getTheLoongArch32Target(), "loongarch32", "32-bit LoongArch",
+ "LoongArch");
+ RegisterTarget<Triple::loongarch64, /*HasJIT=*/false> Y(
+ getTheLoongArch64Target(), "loongarch64", "64-bit LoongArch",
+ "LoongArch");
+}
diff --git a/llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.h b/llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.h
new file mode 100644
index 000000000000..6fc13d52c065
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.h
@@ -0,0 +1,21 @@
+//===-- LoongArchTargetInfo.h - LoongArch Target Implementation -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_LOONGARCH_TARGETINFO_LOONGARCHTARGETINFO_H
+#define LLVM_LIB_TARGET_LOONGARCH_TARGETINFO_LOONGARCHTARGETINFO_H
+
+namespace llvm {
+
+class Target;
+
+Target &getTheLoongArch32Target();
+Target &getTheLoongArch64Target();
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_LOONGARCH_TARGETINFO_LOONGARCHTARGETINFO_H
diff --git a/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp b/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp
index dcd581875f60..0a3d09552535 100644
--- a/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp
+++ b/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp
@@ -11,6 +11,7 @@
#include "TargetInfo/M68kTargetInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCStreamer.h"
diff --git a/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp b/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp
index a565ff4e004d..31b59c17c0ca 100644
--- a/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp
+++ b/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp
@@ -20,8 +20,11 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
+#include "llvm/MC/MCDecoderOps.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
@@ -29,581 +32,112 @@ using namespace llvm;
typedef MCDisassembler::DecodeStatus DecodeStatus;
-namespace {
-constexpr unsigned MaxInstructionWords = 11;
-
-class M68kInstructionBuffer {
- typedef SmallVector<uint16_t, MaxInstructionWords> BufferType;
- BufferType Buffer;
-
-public:
- M68kInstructionBuffer() {}
-
- template <typename TIt>
- M68kInstructionBuffer(TIt Start, TIt End) : Buffer(Start, End) {}
-
- unsigned size() const { return Buffer.size(); }
-
- BufferType::const_iterator begin() const { return Buffer.begin(); }
- BufferType::const_iterator end() const { return Buffer.end(); }
-
- uint16_t operator[](unsigned Index) const {
- assert((Index < Buffer.size()) && "tried to read out of bounds word");
- return Buffer[Index];
- }
-
- void truncate(unsigned NewLength) {
- assert((NewLength <= Buffer.size()) &&
- "instruction buffer too short to truncate");
- Buffer.resize(NewLength);
- }
-
- void dump() const;
-
- static M68kInstructionBuffer fill(ArrayRef<uint8_t> Bytes);
-};
-
-class M68kInstructionReader {
- M68kInstructionBuffer Buffer;
- unsigned NumRead;
-
-public:
- M68kInstructionReader(M68kInstructionBuffer Buf) : Buffer(Buf), NumRead(0) {}
-
- unsigned size() const { return (Buffer.size() * 16) - NumRead; }
-
- uint64_t readBits(unsigned NumBits);
-};
-
-struct M68kInstructionLookup {
- unsigned OpCode;
- M68kInstructionBuffer Mask;
- M68kInstructionBuffer Value;
-
- unsigned size() const { return Mask.size(); }
-
- // Check whether this instruction could possibly match the given bytes.
- bool matches(const M68kInstructionBuffer &Test) const;
- void dump() const;
-};
-
-class M68kInstructionLookupBuilder {
- std::array<uint16_t, MaxInstructionWords> Mask;
- std::array<uint16_t, MaxInstructionWords> Value;
- unsigned NumWritten;
-
-public:
- M68kInstructionLookupBuilder() : NumWritten(0) {
- Mask.fill(0);
- Value.fill(0);
- }
-
- unsigned numWords() const {
- assert(!(NumWritten & 0xf) && "instructions must be whole words");
- return NumWritten >> 4;
- }
-
- bool isValid() const;
- M68kInstructionLookup build(unsigned OpCode);
- void addBits(unsigned N, uint64_t Bits);
- void skipBits(unsigned N);
-};
-
-/// A disassembler class for M68k.
-class M68kDisassembler : public MCDisassembler {
- MCInstrInfo *MCII;
- std::vector<M68kInstructionLookup> Lookups;
-
-public:
- M68kDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
- MCInstrInfo *MCII)
- : MCDisassembler(STI, Ctx), MCII(MCII) {
- buildBeadTable();
- }
- virtual ~M68kDisassembler() {}
-
- void buildBeadTable();
- DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
- ArrayRef<uint8_t> Bytes, uint64_t Address,
- raw_ostream &CStream) const override;
- void decodeReg(MCInst &Instr, unsigned int Bead,
- M68kInstructionReader &Reader, unsigned &Scratch) const;
- void decodeImm(MCInst &Instr, unsigned int Bead,
- M68kInstructionReader &Reader, unsigned &Scratch) const;
- unsigned int getRegOperandIndex(MCInst &Instr, unsigned int Bead) const;
- unsigned int getImmOperandIndex(MCInst &Instr, unsigned int Bead) const;
-};
-} // namespace
-
-static unsigned RegisterDecode[] = {
- M68k::A0, M68k::A1, M68k::A2, M68k::A3, M68k::A4, M68k::A5,
- M68k::A6, M68k::SP, M68k::D0, M68k::D1, M68k::D2, M68k::D3,
- M68k::D4, M68k::D5, M68k::D6, M68k::D7,
+static const unsigned RegisterDecode[] = {
+ M68k::D0, M68k::D1, M68k::D2, M68k::D3, M68k::D4, M68k::D5,
+ M68k::D6, M68k::D7, M68k::A0, M68k::A1, M68k::A2, M68k::A3,
+ M68k::A4, M68k::A5, M68k::A6, M68k::SP,
};
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD
-void M68kInstructionBuffer::dump() const {
- for (auto Word : Buffer) {
- for (unsigned B = 0; B < 16; ++B) {
- uint16_t Bit = (1 << (16 - B - 1));
- unsigned IsClear = !(Word & Bit);
-
- if (B == 8)
- dbgs() << " ";
-
- char Ch = IsClear ? '0' : '1';
- dbgs() << Ch;
- }
-
- dbgs() << " ";
- }
-
- dbgs() << "\n";
+static DecodeStatus DecodeRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo >= 16)
+ return DecodeStatus::Fail;
+ Inst.addOperand(MCOperand::createReg(RegisterDecode[RegNo]));
+ return DecodeStatus::Success;
}
-#endif
-
-M68kInstructionBuffer M68kInstructionBuffer::fill(ArrayRef<uint8_t> Bytes) {
- SmallVector<uint16_t, MaxInstructionWords> Buffer;
- Buffer.resize(std::min(Bytes.size() / 2, Buffer.max_size()));
-
- for (unsigned I = 0, E = Buffer.size(); I < E; ++I) {
- unsigned Offset = I * 2;
- uint64_t Hi = Bytes[Offset];
- uint64_t Lo = Bytes[Offset + 1];
- uint64_t Word = (Hi << 8) | Lo;
- Buffer[I] = Word;
-
- LLVM_DEBUG(
- errs() << format("Read word %x (%d)\n", (unsigned)Word, Buffer.size()));
- }
-
- return M68kInstructionBuffer(Buffer.begin(), Buffer.end());
-}
-
-uint64_t M68kInstructionReader::readBits(unsigned NumBits) {
- assert((size() >= NumBits) && "not enough bits to read");
-
- // We have to read the bits in 16-bit chunks because we read them as
- // 16-bit words but they're actually written in big-endian. If a read
- // crosses a word boundary we have to be careful.
-
- uint64_t Value = 0;
- unsigned BitsRead = 0;
-
- while (BitsRead < NumBits) {
- unsigned AvailableThisWord = 16 - (NumRead & 0xf);
- unsigned ToRead = std::min(NumBits, AvailableThisWord);
-
- unsigned WordIndex = NumRead >> 4;
- uint64_t ThisWord = Buffer[WordIndex] >> (NumRead & 0xf);
- uint64_t Mask = (1 << ToRead) - 1;
- Value |= (ThisWord & Mask) << BitsRead;
- NumRead += ToRead;
- BitsRead += ToRead;
- }
- return Value;
-}
-
-bool M68kInstructionLookup::matches(const M68kInstructionBuffer &Test) const {
- if (Test.size() < Value.size())
- return false;
-
- for (unsigned I = 0, E = Value.size(); I < E; ++I) {
- uint16_t Have = Test[I];
- uint16_t Need = Value[I];
- uint16_t WordMask = Mask[I];
-
- if ((Have & WordMask) != Need)
- return false;
- }
-
- return true;
-}
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD
-void M68kInstructionLookup::dump() const {
- dbgs() << "M68kInstructionLookup " << OpCode << " ";
-
- for (unsigned I = 0, E = Mask.size(); I < E; ++I) {
- uint16_t WordMask = Mask[I];
- uint16_t WordValue = Value[I];
-
- for (unsigned B = 0; B < 16; ++B) {
- uint16_t Bit = (1 << (15 - B));
- unsigned IsMasked = !(WordMask & Bit);
- unsigned IsClear = !(WordValue & Bit);
-
- if (B == 8)
- dbgs() << " ";
-
- char Ch = IsMasked ? '?' : (IsClear ? '0' : '1');
- dbgs() << Ch;
- }
-
- dbgs() << " ";
- }
- dbgs() << "\n";
+static DecodeStatus DecodeDR32RegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return DecodeRegisterClass(Inst, RegNo, Address, Decoder);
}
-#endif
-bool M68kInstructionLookupBuilder::isValid() const {
- for (unsigned I = 0, E = numWords(); I < E; ++I)
- if (Mask[I])
- return true;
-
- return false;
+static DecodeStatus DecodeDR16RegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return DecodeRegisterClass(Inst, RegNo, Address, Decoder);
}
-M68kInstructionLookup M68kInstructionLookupBuilder::build(unsigned OpCode) {
- unsigned NumWords = numWords();
- M68kInstructionBuffer MaskBuffer(Mask.begin(), Mask.begin() + NumWords);
- M68kInstructionBuffer ValueBuffer(Value.begin(), Value.begin() + NumWords);
- M68kInstructionLookup Ret;
- Ret.OpCode = OpCode;
- Ret.Mask = MaskBuffer;
- Ret.Value = ValueBuffer;
- return Ret;
+static DecodeStatus DecodeDR8RegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return DecodeRegisterClass(Inst, RegNo, Address, Decoder);
}
-void M68kInstructionLookupBuilder::addBits(unsigned N, uint64_t Bits) {
- while (N > 0) {
- unsigned WordIndex = NumWritten >> 4;
- unsigned WordOffset = NumWritten & 0xf;
- unsigned AvailableThisWord = 16 - WordOffset;
- unsigned ToWrite = std::min(AvailableThisWord, N);
-
- uint16_t WordMask = (1 << ToWrite) - 1;
- uint16_t BitsToWrite = Bits & WordMask;
-
- Value[WordIndex] |= (BitsToWrite << WordOffset);
- Mask[WordIndex] |= (WordMask << WordOffset);
-
- Bits >>= ToWrite;
- N -= ToWrite;
- NumWritten += ToWrite;
- }
+static DecodeStatus DecodeAR32RegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return DecodeRegisterClass(Inst, RegNo | 8ULL, Address, Decoder);
}
-void M68kInstructionLookupBuilder::skipBits(unsigned N) { NumWritten += N; }
-
-// This is a bit of a hack: we can't generate this table at table-gen time
-// because some of the definitions are in our platform.
-void M68kDisassembler::buildBeadTable() {
- const unsigned NumInstr = M68k::INSTRUCTION_LIST_END;
- Lookups.reserve(NumInstr);
-
- for (unsigned I = 0; I < NumInstr; ++I) {
- M68kInstructionLookupBuilder Builder;
-
- for (const uint8_t *PartPtr = M68k::getMCInstrBeads(I); *PartPtr;
- ++PartPtr) {
- uint8_t Bead = *PartPtr;
- unsigned Ext = Bead >> 4;
- unsigned Op = Bead & 0xf;
-
- switch (Op) {
- case M68kBeads::Ctrl:
- // Term will have already been skipped by the loop.
- assert((Ext == M68kBeads::Ignore) && "unexpected command bead");
- break;
-
- case M68kBeads::Bits1:
- Builder.addBits(1, Ext);
- break;
-
- case M68kBeads::Bits2:
- Builder.addBits(2, Ext);
- break;
-
- case M68kBeads::Bits3:
- Builder.addBits(3, Ext);
- break;
-
- case M68kBeads::Bits4:
- Builder.addBits(4, Ext);
- break;
-
- case M68kBeads::DAReg:
- case M68kBeads::DA:
- case M68kBeads::DReg:
- case M68kBeads::Reg:
- if (Op != M68kBeads::DA)
- Builder.skipBits(3);
-
- if (Op != M68kBeads::Reg && Op != M68kBeads::DReg)
- Builder.skipBits(1);
-
- break;
-
- case M68kBeads::Disp8:
- Builder.skipBits(8);
- break;
-
- case M68kBeads::Imm8:
- case M68kBeads::Imm16:
- Builder.skipBits(16);
- break;
-
- case M68kBeads::Imm32:
- Builder.skipBits(32);
- break;
-
- case M68kBeads::Imm3:
- Builder.skipBits(3);
- break;
-
- default:
- llvm_unreachable("unhandled bead type");
- }
- }
-
- // Ignore instructions which are unmatchable (usually pseudo instructions).
- if (!Builder.isValid())
- continue;
-
- Lookups.push_back(Builder.build(I));
- }
+static DecodeStatus DecodeAR16RegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return DecodeRegisterClass(Inst, RegNo | 8ULL, Address, Decoder);
}
-unsigned M68kDisassembler::getRegOperandIndex(MCInst &Instr,
- unsigned Bead) const {
- unsigned Ext = Bead >> 4;
-
- const MCInstrDesc &Desc = MCII->get(Instr.getOpcode());
- auto MIOpIdx = M68k::getLogicalOperandIdx(Instr.getOpcode(), Ext & 7);
-
- if (M68kII::hasMultiMIOperands(Instr.getOpcode(), Ext & 7)) {
- bool IsPCRel = Desc.OpInfo[MIOpIdx].OperandType == MCOI::OPERAND_PCREL;
- if (IsPCRel)
- MIOpIdx += M68k::PCRelIndex;
- else if (Ext & 8)
- MIOpIdx += M68k::MemIndex;
- else
- MIOpIdx += M68k::MemBase;
- }
-
- return MIOpIdx;
+static DecodeStatus DecodeXR32RegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return DecodeRegisterClass(Inst, RegNo, Address, Decoder);
}
-unsigned M68kDisassembler::getImmOperandIndex(MCInst &Instr,
- unsigned Bead) const {
- unsigned Ext = Bead >> 4;
-
- const MCInstrDesc &Desc = MCII->get(Instr.getOpcode());
- auto MIOpIdx = M68k::getLogicalOperandIdx(Instr.getOpcode(), Ext & 7);
-
- if (M68kII::hasMultiMIOperands(Instr.getOpcode(), Ext & 7)) {
- bool IsPCRel = Desc.OpInfo[MIOpIdx].OperandType == MCOI::OPERAND_PCREL;
- if (IsPCRel)
- MIOpIdx += M68k::PCRelDisp;
- else if (Ext & 8)
- MIOpIdx += M68k::MemOuter;
- else
- MIOpIdx += M68k::MemDisp;
- }
-
- return MIOpIdx;
+static DecodeStatus DecodeXR16RegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return DecodeRegisterClass(Inst, RegNo, Address, Decoder);
}
-void M68kDisassembler::decodeReg(MCInst &Instr, unsigned Bead,
- M68kInstructionReader &Reader,
- unsigned &Scratch) const {
- unsigned Op = Bead & 0xf;
- LLVM_DEBUG(errs() << format("decodeReg %x\n", Bead));
-
- if (Op != M68kBeads::DA)
- Scratch = (Scratch & ~7) | Reader.readBits(3);
-
- if (Op != M68kBeads::Reg) {
- bool DA = (Op != M68kBeads::DReg) && Reader.readBits(1);
- if (!DA)
- Scratch |= 8;
- else
- Scratch &= ~8;
- }
+static DecodeStatus DecodeCCRCRegisterClass(MCInst &Inst, APInt &Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ llvm_unreachable("unimplemented");
}
-void M68kDisassembler::decodeImm(MCInst &Instr, unsigned Bead,
- M68kInstructionReader &Reader,
- unsigned &Scratch) const {
- unsigned Op = Bead & 0xf;
- LLVM_DEBUG(errs() << format("decodeImm %x\n", Bead));
+#include "M68kGenDisassemblerTable.inc"
- unsigned NumToRead;
- switch (Op) {
- case M68kBeads::Disp8:
- NumToRead = 8;
- break;
- case M68kBeads::Imm8:
- case M68kBeads::Imm16:
- NumToRead = 16;
- break;
- case M68kBeads::Imm32:
- NumToRead = 32;
- break;
- case M68kBeads::Imm3:
- NumToRead = 3;
- break;
- default:
- llvm_unreachable("invalid imm");
- }
+/// A disassembler class for M68k.
+struct M68kDisassembler : public MCDisassembler {
+ M68kDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
+ : MCDisassembler(STI, Ctx) {}
+ virtual ~M68kDisassembler() {}
- Scratch = (NumToRead < 32) ? (Scratch << NumToRead) : 0;
- Scratch |= Reader.readBits(NumToRead);
-}
+ DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &CStream) const override;
+};
DecodeStatus M68kDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
uint64_t Address,
raw_ostream &CStream) const {
- // Read and shift the input (fetch as much as we can for now).
- auto Buffer = M68kInstructionBuffer::fill(Bytes);
- if (Buffer.size() == 0)
- return Fail;
-
- // Check through our lookup table.
- bool Found = false;
- for (unsigned I = 0, E = Lookups.size(); I < E; ++I) {
- const M68kInstructionLookup &Lookup = Lookups[I];
- if (!Lookup.matches(Buffer))
- continue;
-
- Found = true;
- Size = Lookup.size() * 2;
- Buffer.truncate(Lookup.size());
- Instr.setOpcode(Lookup.OpCode);
- LLVM_DEBUG(errs() << "decoding instruction " << MCII->getName(Lookup.OpCode)
- << "\n");
- break;
- }
-
- if (!Found)
- return Fail;
-
- M68kInstructionReader Reader(Buffer);
- const MCInstrDesc &Desc = MCII->get(Instr.getOpcode());
- unsigned NumOperands = Desc.NumOperands;
-
- // Now use the beads to decode the operands.
- enum class OperandType {
- Invalid,
- Reg,
- Imm,
- };
-
- SmallVector<OperandType, 6> OpType(NumOperands, OperandType::Invalid);
- SmallVector<unsigned, 6> Scratch(NumOperands, 0);
- for (const uint8_t *PartPtr = M68k::getMCInstrBeads(Instr.getOpcode());
- *PartPtr; ++PartPtr) {
- uint8_t Bead = *PartPtr;
- unsigned Ext = Bead >> 4;
- unsigned Op = Bead & 0xf;
- unsigned MIOpIdx;
-
- switch (Op) {
- case M68kBeads::Ctrl:
- // Term will have already been skipped by the loop.
- assert((Ext == M68kBeads::Ignore) && "unexpected command bead");
- break;
-
- // These bits are constant - if we're here we've already matched them.
- case M68kBeads::Bits1:
- Reader.readBits(1);
- break;
- case M68kBeads::Bits2:
- Reader.readBits(2);
- break;
- case M68kBeads::Bits3:
- Reader.readBits(3);
- break;
- case M68kBeads::Bits4:
- Reader.readBits(4);
- break;
-
- case M68kBeads::DAReg:
- case M68kBeads::DA:
- case M68kBeads::DReg:
- case M68kBeads::Reg:
- MIOpIdx = getRegOperandIndex(Instr, Bead);
- assert(((OpType[MIOpIdx] == OperandType::Invalid) ||
- (OpType[MIOpIdx] == OperandType::Reg)) &&
- "operands cannot change type");
- OpType[MIOpIdx] = OperandType::Reg;
- decodeReg(Instr, Bead, Reader, Scratch[MIOpIdx]);
- break;
-
- case M68kBeads::Disp8:
- case M68kBeads::Imm8:
- case M68kBeads::Imm16:
- case M68kBeads::Imm32:
- case M68kBeads::Imm3:
- MIOpIdx = getImmOperandIndex(Instr, Bead);
- assert(((OpType[MIOpIdx] == OperandType::Invalid) ||
- (OpType[MIOpIdx] == OperandType::Imm)) &&
- "operands cannot change type");
- OpType[MIOpIdx] = OperandType::Imm;
- decodeImm(Instr, Bead, Reader, Scratch[MIOpIdx]);
- break;
-
- default:
- llvm_unreachable("unhandled bead type");
- }
- }
-
- // Copy constrained operands.
- for (unsigned DstMIOpIdx = 0; DstMIOpIdx < NumOperands; ++DstMIOpIdx) {
- int TiedTo = Desc.getOperandConstraint(DstMIOpIdx, MCOI::TIED_TO);
- if (TiedTo < 0)
- continue;
-
- unsigned SrcMIOpIdx = TiedTo;
-
- unsigned OpCount = 0;
- for (unsigned I = 0;; ++I) {
- unsigned Offset = M68k::getLogicalOperandIdx(Instr.getOpcode(), I);
- assert(Offset <= SrcMIOpIdx && "missing logical operand");
- if (Offset == SrcMIOpIdx) {
- OpCount = M68k::getLogicalOperandSize(Instr.getOpcode(), I);
- break;
- }
+ DecodeStatus Result;
+ auto MakeUp = [&](APInt &Insn, unsigned InstrBits) {
+ unsigned Idx = Insn.getBitWidth() >> 3;
+ unsigned RoundUp = alignTo(InstrBits, Align(16));
+ if (RoundUp > Insn.getBitWidth())
+ Insn = Insn.zext(RoundUp);
+ RoundUp = RoundUp >> 3;
+ for (; Idx < RoundUp; Idx += 2) {
+ Insn.insertBits(support::endian::read16be(&Bytes[Idx]), Idx * 8, 16);
}
- assert(OpCount != 0 && "operand count not found");
-
- for (unsigned I = 0; I < OpCount; ++I) {
- assert(OpType[DstMIOpIdx + I] == OperandType::Invalid &&
- "tried to stomp over operand whilst applying constraints");
- OpType[DstMIOpIdx + I] = OpType[SrcMIOpIdx + I];
- Scratch[DstMIOpIdx + I] = Scratch[SrcMIOpIdx + I];
- }
- }
-
- // Create the operands from our scratch space.
- for (unsigned O = 0; O < NumOperands; ++O) {
- switch (OpType[O]) {
- case OperandType::Invalid:
- assert(false && "operand not parsed");
-
- case OperandType::Imm:
- Instr.addOperand(MCOperand::createImm(Scratch[O]));
- break;
-
- case OperandType::Reg:
- Instr.addOperand(MCOperand::createReg(RegisterDecode[Scratch[O]]));
- break;
- }
- }
-
- assert((Reader.size() == 0) && "wrong number of bits consumed");
- return Success;
+ };
+ APInt Insn(16, support::endian::read16be(Bytes.data()));
+ // 2 bytes of data are consumed, so set Size to 2
+ // If we don't do this, disassembler may generate result even
+ // the encoding is invalid. We need to let it fail correctly.
+ Size = 2;
+ Result = decodeInstruction(DecoderTable80, Instr, Insn, Address, this, STI,
+ MakeUp);
+ if (Result == DecodeStatus::Success)
+ Size = InstrLenTable[Instr.getOpcode()] >> 3;
+ return Result;
}
static MCDisassembler *createM68kDisassembler(const Target &T,
const MCSubtargetInfo &STI,
MCContext &Ctx) {
- return new M68kDisassembler(STI, Ctx, T.createMCInstrInfo());
+ return new M68kDisassembler(STI, Ctx);
}
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeM68kDisassembler() {
diff --git a/llvm/lib/Target/M68k/GISel/M68kCallLowering.cpp b/llvm/lib/Target/M68k/GISel/M68kCallLowering.cpp
index b3d17184f1fe..e0aaa9d51cc3 100644
--- a/llvm/lib/Target/M68k/GISel/M68kCallLowering.cpp
+++ b/llvm/lib/Target/M68k/GISel/M68kCallLowering.cpp
@@ -20,6 +20,7 @@
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/TargetCallingConv.h"
using namespace llvm;
@@ -27,10 +28,12 @@ using namespace llvm;
M68kCallLowering::M68kCallLowering(const M68kTargetLowering &TLI)
: CallLowering(&TLI) {}
-struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler {
- OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
- MachineInstrBuilder MIB)
- : OutgoingValueHandler(MIRBuilder, MRI), MIB(MIB) {}
+struct M68kOutgoingArgHandler : public CallLowering::OutgoingValueHandler {
+ M68kOutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+ MachineInstrBuilder MIB)
+ : OutgoingValueHandler(MIRBuilder, MRI), MIB(MIB),
+ DL(MIRBuilder.getMF().getDataLayout()),
+ STI(MIRBuilder.getMF().getSubtarget<M68kSubtarget>()) {}
void assignValueToReg(Register ValVReg, Register PhysReg,
CCValAssign VA) override {
@@ -41,16 +44,29 @@ struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler {
void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
MachinePointerInfo &MPO, CCValAssign &VA) override {
- llvm_unreachable("unimplemented");
+ MachineFunction &MF = MIRBuilder.getMF();
+ Register ExtReg = extendRegister(ValVReg, VA);
+
+ auto *MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, MemTy,
+ inferAlignFromPtrInfo(MF, MPO));
+ MIRBuilder.buildStore(ExtReg, Addr, *MMO);
}
Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO,
ISD::ArgFlagsTy Flags) override {
- llvm_unreachable("unimplemented");
+ LLT p0 = LLT::pointer(0, DL.getPointerSizeInBits(0));
+ LLT SType = LLT::scalar(DL.getPointerSizeInBits(0));
+ Register StackReg = STI.getRegisterInfo()->getStackRegister();
+ auto SPReg = MIRBuilder.buildCopy(p0, StackReg).getReg(0);
+ auto OffsetReg = MIRBuilder.buildConstant(SType, Offset);
+ auto AddrReg = MIRBuilder.buildPtrAdd(p0, SPReg, OffsetReg);
+ MPO = MachinePointerInfo::getStack(MIRBuilder.getMF(), Offset);
+ return AddrReg.getReg(0);
}
-
MachineInstrBuilder MIB;
+ const DataLayout &DL;
+ const M68kSubtarget &STI;
};
bool M68kCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
const Value *Val, ArrayRef<Register> VRegs,
@@ -72,7 +88,7 @@ bool M68kCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F);
splitToValueTypes(OrigArg, SplitArgs, DL, F.getCallingConv());
OutgoingValueAssigner ArgAssigner(AssignFn);
- OutgoingArgHandler ArgHandler(MIRBuilder, MRI, MIB);
+ M68kOutgoingArgHandler ArgHandler(MIRBuilder, MRI, MIB);
Success = determineAndHandleAssignments(ArgHandler, ArgAssigner, SplitArgs,
MIRBuilder, F.getCallingConv(),
F.isVarArg());
@@ -144,9 +160,73 @@ Register M68kIncomingValueHandler::getStackAddress(uint64_t Size,
return AddrReg.getReg(0);
}
+void CallReturnHandler::assignValueToReg(Register ValVReg, Register PhysReg,
+ CCValAssign VA) {
+ MIB.addDef(PhysReg, RegState::Implicit);
+ MIRBuilder.buildCopy(ValVReg, PhysReg);
+}
+
bool M68kCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
CallLoweringInfo &Info) const {
- return false;
+ MachineFunction &MF = MIRBuilder.getMF();
+ Function &F = MF.getFunction();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ auto &DL = F.getParent()->getDataLayout();
+ const M68kTargetLowering &TLI = *getTLI<M68kTargetLowering>();
+ const M68kSubtarget &STI = MF.getSubtarget<M68kSubtarget>();
+ const TargetInstrInfo &TII = *STI.getInstrInfo();
+ const M68kRegisterInfo *TRI = STI.getRegisterInfo();
+
+ SmallVector<ArgInfo, 8> OutArgs;
+ for (auto &OrigArg : Info.OrigArgs)
+ splitToValueTypes(OrigArg, OutArgs, DL, Info.CallConv);
+
+ SmallVector<ArgInfo, 8> InArgs;
+ if (!Info.OrigRet.Ty->isVoidTy())
+ splitToValueTypes(Info.OrigRet, InArgs, DL, Info.CallConv);
+
+ unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
+ auto CallSeqStart = MIRBuilder.buildInstr(AdjStackDown);
+
+ unsigned Opc = TLI.getTargetMachine().isPositionIndependent() ? M68k::CALLq
+ : Info.Callee.isReg() ? M68k::CALLj
+ : M68k::CALLb;
+
+ auto MIB = MIRBuilder.buildInstrNoInsert(Opc)
+ .add(Info.Callee)
+ .addRegMask(TRI->getCallPreservedMask(MF, Info.CallConv));
+
+ CCAssignFn *AssignFn = TLI.getCCAssignFn(Info.CallConv, false, Info.IsVarArg);
+ OutgoingValueAssigner Assigner(AssignFn);
+ M68kOutgoingArgHandler Handler(MIRBuilder, MRI, MIB);
+ if (!determineAndHandleAssignments(Handler, Assigner, OutArgs, MIRBuilder,
+ Info.CallConv, Info.IsVarArg))
+ return false;
+
+ if (Info.Callee.isReg())
+ constrainOperandRegClass(MF, *TRI, MRI, *STI.getInstrInfo(),
+ *STI.getRegBankInfo(), *MIB, MIB->getDesc(),
+ Info.Callee, 0);
+
+ MIRBuilder.insertInstr(MIB);
+
+ if (!Info.OrigRet.Ty->isVoidTy()) {
+ CCAssignFn *RetAssignFn =
+ TLI.getCCAssignFn(Info.CallConv, true, Info.IsVarArg);
+
+ OutgoingValueAssigner Assigner(RetAssignFn, RetAssignFn);
+ CallReturnHandler Handler(MIRBuilder, MRI, MIB);
+ if (!determineAndHandleAssignments(Handler, Assigner, InArgs, MIRBuilder,
+ Info.CallConv, Info.IsVarArg))
+ return false;
+ }
+
+ CallSeqStart.addImm(Assigner.StackOffset).addImm(0);
+
+ unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
+ MIRBuilder.buildInstr(AdjStackUp).addImm(Assigner.StackOffset).addImm(0);
+
+ return true;
}
bool M68kCallLowering::enableBigEndian() const { return true; }
diff --git a/llvm/lib/Target/M68k/GISel/M68kCallLowering.h b/llvm/lib/Target/M68k/GISel/M68kCallLowering.h
index 24212e6dd9c6..a1589e96aa3d 100644
--- a/llvm/lib/Target/M68k/GISel/M68kCallLowering.h
+++ b/llvm/lib/Target/M68k/GISel/M68kCallLowering.h
@@ -22,6 +22,7 @@
namespace llvm {
class M68kTargetLowering;
+class MachineInstrBuilder;
class M68kCallLowering : public CallLowering {
// TODO: We are only supporting return instruction with no value at this time
@@ -67,6 +68,17 @@ struct FormalArgHandler : public M68kIncomingValueHandler {
: M68kIncomingValueHandler(MIRBuilder, MRI) {}
};
+struct CallReturnHandler : public M68kIncomingValueHandler {
+ CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+ MachineInstrBuilder &MIB)
+ : M68kIncomingValueHandler(MIRBuilder, MRI), MIB(MIB) {}
+
+private:
+ void assignValueToReg(Register ValVReg, Register PhysReg,
+ CCValAssign VA) override;
+
+ MachineInstrBuilder &MIB;
+};
} // end namespace llvm
#endif // LLVM_LIB_TARGET_M68K_GLSEL_M68KCALLLOWERING_H
diff --git a/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.cpp b/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.cpp
index b6ed6ab28a5d..f833eb2d19d4 100644
--- a/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.cpp
+++ b/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.cpp
@@ -13,9 +13,9 @@
#include "M68kRegisterBankInfo.h"
#include "M68kInstrInfo.h" // For the register classes
#include "M68kSubtarget.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterBank.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#define GET_TARGET_REGBANK_IMPL
diff --git a/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.h b/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.h
index 6c0b8ca7ba5a..493c139f018c 100644
--- a/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.h
+++ b/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.h
@@ -13,7 +13,7 @@
#ifndef LLVM_LIB_TARGET_M68K_GLSEL_M68KREGISTERBANKINFO_H
#define LLVM_LIB_TARGET_M68K_GLSEL_M68KREGISTERBANKINFO_H
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
#define GET_REGBANK_DECLARATIONS
#include "M68kGenRegisterBank.inc"
diff --git a/llvm/lib/Target/M68k/M68kCollapseMOVEMPass.cpp b/llvm/lib/Target/M68k/M68kCollapseMOVEMPass.cpp
index 7f0c0dd92dbb..cbd69f24666e 100644
--- a/llvm/lib/Target/M68k/M68kCollapseMOVEMPass.cpp
+++ b/llvm/lib/Target/M68k/M68kCollapseMOVEMPass.cpp
@@ -231,7 +231,7 @@ public:
}
bool runOnMachineFunction(MachineFunction &MF) override {
- STI = &static_cast<const M68kSubtarget &>(MF.getSubtarget());
+ STI = &MF.getSubtarget<M68kSubtarget>();
TII = STI->getInstrInfo();
TRI = STI->getRegisterInfo();
MFI = MF.getInfo<M68kMachineFunctionInfo>();
diff --git a/llvm/lib/Target/M68k/M68kExpandPseudo.cpp b/llvm/lib/Target/M68k/M68kExpandPseudo.cpp
index acfa30f28c2b..51a148f5aa04 100644
--- a/llvm/lib/Target/M68k/M68kExpandPseudo.cpp
+++ b/llvm/lib/Target/M68k/M68kExpandPseudo.cpp
@@ -302,7 +302,7 @@ bool M68kExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
}
bool M68kExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
- STI = &static_cast<const M68kSubtarget &>(MF.getSubtarget());
+ STI = &MF.getSubtarget<M68kSubtarget>();
TII = STI->getInstrInfo();
TRI = STI->getRegisterInfo();
MFI = MF.getInfo<M68kMachineFunctionInfo>();
diff --git a/llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp b/llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp
index 9ef97b96ea9a..f9459e284aef 100644
--- a/llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp
+++ b/llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp
@@ -181,6 +181,7 @@ public:
}
bool runOnMachineFunction(MachineFunction &MF) override;
+ bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override;
private:
/// Keep a pointer to the M68kSubtarget around so that we can
@@ -311,8 +312,35 @@ private:
};
} // namespace
+bool M68kDAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U,
+ SDNode *Root) const {
+ if (OptLevel == CodeGenOpt::None)
+ return false;
+
+ if (U == Root) {
+ switch (U->getOpcode()) {
+ default:
+ return true;
+ case M68kISD::SUB:
+ case ISD::SUB:
+ // Prefer NEG instruction when zero subtracts a value.
+ // e.g.
+ // move.l #0, %d0
+ // sub.l (4,%sp), %d0
+ // vs.
+ // move.l (4,%sp), %d0
+ // neg.l %d0
+ if (llvm::isNullConstant(U->getOperand(0)))
+ return false;
+ break;
+ }
+ }
+
+ return true;
+}
+
bool M68kDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
- Subtarget = &static_cast<const M68kSubtarget &>(MF.getSubtarget());
+ Subtarget = &MF.getSubtarget<M68kSubtarget>();
return SelectionDAGISel::runOnMachineFunction(MF);
}
diff --git a/llvm/lib/Target/M68k/M68kISelLowering.cpp b/llvm/lib/Target/M68k/M68kISelLowering.cpp
index dba190a2ebc0..250519efd14a 100644
--- a/llvm/lib/Target/M68k/M68kISelLowering.cpp
+++ b/llvm/lib/Target/M68k/M68kISelLowering.cpp
@@ -101,6 +101,9 @@ M68kTargetLowering::M68kTargetLowering(const M68kTargetMachine &TM,
setOperationAction(OP, MVT::i32, Expand);
}
+ for (auto OP : {ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS})
+ setOperationAction(OP, MVT::i32, Custom);
+
// Add/Sub overflow ops with MVT::Glues are lowered to CCR dependences.
for (auto VT : {MVT::i8, MVT::i16, MVT::i32}) {
setOperationAction(ISD::ADDC, VT, Custom);
@@ -170,7 +173,7 @@ MVT M68kTargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
if (Ty.isSimple()) {
return Ty.getSimpleVT();
}
- return MVT::getIntegerVT(8 * DL.getPointerSize(0));
+ return MVT::getIntegerVT(DL.getPointerSizeInBits(0));
}
#include "M68kGenCallingConv.inc"
@@ -1354,6 +1357,12 @@ SDValue M68kTargetLowering::LowerOperation(SDValue Op,
return LowerVASTART(Op, DAG);
case ISD::DYNAMIC_STACKALLOC:
return LowerDYNAMIC_STACKALLOC(Op, DAG);
+ case ISD::SHL_PARTS:
+ return LowerShiftLeftParts(Op, DAG);
+ case ISD::SRA_PARTS:
+ return LowerShiftRightParts(Op, DAG, true);
+ case ISD::SRL_PARTS:
+ return LowerShiftRightParts(Op, DAG, false);
}
}
@@ -3239,6 +3248,102 @@ SDValue M68kTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
return DAG.getMergeValues(Ops, DL);
}
+SDValue M68kTargetLowering::LowerShiftLeftParts(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ SDValue Lo = Op.getOperand(0);
+ SDValue Hi = Op.getOperand(1);
+ SDValue Shamt = Op.getOperand(2);
+ EVT VT = Lo.getValueType();
+
+ // if Shamt - register size < 0: // Shamt < register size
+ // Lo = Lo << Shamt
+ // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (register size - 1 ^ Shamt))
+ // else:
+ // Lo = 0
+ // Hi = Lo << (Shamt - register size)
+
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ SDValue One = DAG.getConstant(1, DL, VT);
+ SDValue MinusRegisterSize = DAG.getConstant(-32, DL, VT);
+ SDValue RegisterSizeMinus1 = DAG.getConstant(32 - 1, DL, VT);
+ SDValue ShamtMinusRegisterSize =
+ DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusRegisterSize);
+ SDValue RegisterSizeMinus1Shamt =
+ DAG.getNode(ISD::XOR, DL, VT, RegisterSizeMinus1, Shamt);
+
+ SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
+ SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
+ SDValue ShiftRightLo =
+ DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, RegisterSizeMinus1Shamt);
+ SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
+ SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
+ SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusRegisterSize);
+
+ SDValue CC =
+ DAG.getSetCC(DL, MVT::i8, ShamtMinusRegisterSize, Zero, ISD::SETLT);
+
+ Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
+ Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
+
+ return DAG.getMergeValues({Lo, Hi}, DL);
+}
+
+SDValue M68kTargetLowering::LowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
+ bool IsSRA) const {
+ SDLoc DL(Op);
+ SDValue Lo = Op.getOperand(0);
+ SDValue Hi = Op.getOperand(1);
+ SDValue Shamt = Op.getOperand(2);
+ EVT VT = Lo.getValueType();
+
+ // SRA expansion:
+ // if Shamt - register size < 0: // Shamt < register size
+ // Lo = (Lo >>u Shamt) | ((Hi << 1) << (register size - 1 ^ Shamt))
+ // Hi = Hi >>s Shamt
+ // else:
+ // Lo = Hi >>s (Shamt - register size);
+ // Hi = Hi >>s (register size - 1)
+ //
+ // SRL expansion:
+ // if Shamt - register size < 0: // Shamt < register size
+ // Lo = (Lo >>u Shamt) | ((Hi << 1) << (register size - 1 ^ Shamt))
+ // Hi = Hi >>u Shamt
+ // else:
+ // Lo = Hi >>u (Shamt - register size);
+ // Hi = 0;
+
+ unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
+
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ SDValue One = DAG.getConstant(1, DL, VT);
+ SDValue MinusRegisterSize = DAG.getConstant(-32, DL, VT);
+ SDValue RegisterSizeMinus1 = DAG.getConstant(32 - 1, DL, VT);
+ SDValue ShamtMinusRegisterSize =
+ DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusRegisterSize);
+ SDValue RegisterSizeMinus1Shamt =
+ DAG.getNode(ISD::XOR, DL, VT, RegisterSizeMinus1, Shamt);
+
+ SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
+ SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
+ SDValue ShiftLeftHi =
+ DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, RegisterSizeMinus1Shamt);
+ SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
+ SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
+ SDValue LoFalse =
+ DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusRegisterSize);
+ SDValue HiFalse =
+ IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, RegisterSizeMinus1) : Zero;
+
+ SDValue CC =
+ DAG.getSetCC(DL, MVT::i8, ShamtMinusRegisterSize, Zero, ISD::SETLT);
+
+ Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
+ Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
+
+ return DAG.getMergeValues({Lo, Hi}, DL);
+}
+
//===----------------------------------------------------------------------===//
// DAG Combine
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/M68k/M68kISelLowering.h b/llvm/lib/Target/M68k/M68kISelLowering.h
index 9375a99962eb..f759a7d939c8 100644
--- a/llvm/lib/Target/M68k/M68kISelLowering.h
+++ b/llvm/lib/Target/M68k/M68kISelLowering.h
@@ -220,6 +220,8 @@ private:
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const;
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool IsVarArg,
diff --git a/llvm/lib/Target/M68k/M68kInstrArithmetic.td b/llvm/lib/Target/M68k/M68kInstrArithmetic.td
index ef50de576641..2339e3caa517 100644
--- a/llvm/lib/Target/M68k/M68kInstrArithmetic.td
+++ b/llvm/lib/Target/M68k/M68kInstrArithmetic.td
@@ -28,9 +28,34 @@
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
+// OPMODE Encoding
+//===----------------------------------------------------------------------===//
+class MxOpModeEncoding<bits<3> encoding> {
+ bits<3> Value = encoding;
+}
+
+// op EA, Dn
+def MxOpMode8_d_EA : MxOpModeEncoding<0b000>;
+def MxOpMode16_d_EA : MxOpModeEncoding<0b001>;
+def MxOpMode32_d_EA : MxOpModeEncoding<0b010>;
+
+// op Dn, EA
+def MxOpMode8_EA_d : MxOpModeEncoding<0b100>;
+def MxOpMode16_EA_d : MxOpModeEncoding<0b101>;
+def MxOpMode32_EA_d : MxOpModeEncoding<0b110>;
+
+// op EA, An
+def MxOpMode16_a_EA : MxOpModeEncoding<0b011>;
+def MxOpMode32_a_EA : MxOpModeEncoding<0b111>;
+
+
+//===----------------------------------------------------------------------===//
// Encoding
//===----------------------------------------------------------------------===//
+let Defs = [CCR] in {
+let Constraints = "$src = $dst" in {
+
/// Encoding for Normal forms
/// ----------------------------------------------------
/// F E D C | B A 9 | 8 7 6 | 5 4 3 | 2 1 0
@@ -38,23 +63,54 @@
/// | | | EFFECTIVE ADDRESS
/// x x x x | REG | OP MODE | MODE | REG
/// ----------------------------------------------------
-class MxArithEncoding<MxBead4Bits CMD, MxEncOpMode OPMODE, MxBead REG,
- MxEncEA EA, MxEncExt EXT>
- : MxEncoding<EA.Reg, EA.DA, EA.Mode, OPMODE.B0, OPMODE.B1, OPMODE.B2, REG,
- CMD,EXT.Imm, EXT.B8, EXT.Scale, EXT.WL, EXT.DAReg>;
-/// Encoding for Extended forms
-/// ------------------------------------------------------
-/// F E D C | B A 9 | 8 | 7 6 | 5 4 | 3 | 2 1 0
-/// ------------------------------------------------------
-/// x x x x | REG Rx | 1 | SIZE | 0 0 | M | REG Ry
-/// ------------------------------------------------------
-/// Rx - destination
-/// Ry - source
-/// M - address mode switch
-class MxArithXEncoding<MxBead4Bits CMD, MxEncSize SIZE, MxBead1Bit MODE,
- MxBeadDReg SRC, MxBeadDReg DST>
- : MxEncoding<SRC, MODE, MxBead2Bits<0b00>, SIZE, MxBead1Bit<0b1>, DST, CMD>;
+// $reg, $ccr <- $reg op $reg
+class MxBiArOp_R_RR_xEA<string MN, SDNode NODE, MxType DST_TYPE, MxType SRC_TYPE,
+ bits<4> CMD>
+ : MxInst<(outs DST_TYPE.ROp:$dst), (ins DST_TYPE.ROp:$src, SRC_TYPE.ROp:$opd),
+ MN#"."#DST_TYPE.Prefix#"\t$opd, $dst",
+ [(set DST_TYPE.VT:$dst, CCR, (NODE DST_TYPE.VT:$src, SRC_TYPE.VT:$opd))]> {
+ let Inst = (descend
+ CMD, (operand "$dst", 3),
+ !cast<MxOpModeEncoding>("MxOpMode"#DST_TYPE.Size#"_"#DST_TYPE.RLet#"_EA").Value,
+ !cond(
+ !eq(SRC_TYPE.RLet, "r") : (descend 0b00, (operand "$opd", 4)),
+ !eq(SRC_TYPE.RLet, "d") : (descend 0b000, (operand "$opd", 3))
+ )
+ );
+}
+
+/// This Op is similar to the one above except it uses reversed opmode, some
+/// commands(e.g. eor) do not support dEA or rEA modes and require EAd for
+/// register only operations.
+/// NOTE when using dd commands it is irrelevant which opmode to use(as it seems)
+/// but some opcodes support address register and some do not which creates this
+/// mess.
+class MxBiArOp_R_RR_EAd<string MN, SDNode NODE, MxType TYPE, bits<4> CMD>
+ : MxInst<(outs TYPE.ROp:$dst), (ins TYPE.ROp:$src, TYPE.ROp:$opd),
+ MN#"."#TYPE.Prefix#"\t$opd, $dst",
+ [(set TYPE.VT:$dst, CCR, (NODE TYPE.VT:$src, TYPE.VT:$opd))]> {
+ let Inst = (descend
+ CMD, (operand "$opd", 3),
+ !cast<MxOpModeEncoding>("MxOpMode"#TYPE.Size#"_EA_"#TYPE.RLet).Value,
+ /*Destination can only be a data register*/
+ /*MODE*/0b000,
+ /*REGISTER*/(operand "$dst", 3));
+}
+
+let mayLoad = 1 in
+class MxBiArOp_R_RM<string MN, SDNode NODE, MxType TYPE, MxOperand OPD, ComplexPattern PAT,
+ bits<4> CMD, MxEncMemOp SRC_ENC>
+ : MxInst<(outs TYPE.ROp:$dst), (ins TYPE.ROp:$src, OPD:$opd),
+ MN#"."#TYPE.Prefix#"\t$opd, $dst",
+ [(set TYPE.VT:$dst, CCR, (NODE TYPE.VT:$src, (TYPE.Load PAT:$opd)))]> {
+ let Inst = (ascend
+ (descend CMD, (operand "$dst", 3),
+ !cast<MxOpModeEncoding>("MxOpMode"#TYPE.Size#"_"#TYPE.RLet#"_EA").Value,
+ SRC_ENC.EA),
+ SRC_ENC.Supplement
+ );
+}
/// Encoding for Immediate forms
/// ---------------------------------------------------
@@ -69,211 +125,154 @@ class MxArithXEncoding<MxBead4Bits CMD, MxEncSize SIZE, MxBead1Bit MODE,
/// ---------------------------------------------------
/// NOTE It is used to store an immediate to memory, imm-to-reg are handled with
/// normal version
-class MxArithImmEncoding<MxBead4Bits CMD, MxEncSize SIZE,
- MxEncEA DST_EA, MxEncExt DST_EXT, MxEncExt SRC_EXT>
- : MxEncoding<DST_EA.Reg, DST_EA.DA, DST_EA.Mode, SIZE, CMD, MxBead4Bits<0>,
- // Source
- SRC_EXT.Imm, SRC_EXT.B8, SRC_EXT.Scale,
- SRC_EXT.WL, SRC_EXT.DAReg,
- // Destination
- DST_EXT.Imm, DST_EXT.B8, DST_EXT.Scale,
- DST_EXT.WL, DST_EXT.DAReg>;
-
-
-//===----------------------------------------------------------------------===//
-// Add/Sub
-//===----------------------------------------------------------------------===//
-
-let Defs = [CCR] in {
-let Constraints = "$src = $dst" in {
-
-// $reg, $ccr <- $reg op $reg
-class MxBiArOp_RFRR_xEA<string MN, SDNode NODE, MxType DST_TYPE, MxType SRC_TYPE,
- bits<4> CMD, MxBead REG>
- : MxInst<(outs DST_TYPE.ROp:$dst), (ins DST_TYPE.ROp:$src, SRC_TYPE.ROp:$opd),
- MN#"."#DST_TYPE.Prefix#"\t$opd, $dst",
- [(set DST_TYPE.VT:$dst, CCR, (NODE DST_TYPE.VT:$src, SRC_TYPE.VT:$opd))],
- MxArithEncoding<MxBead4Bits<CMD>,
- !cast<MxEncOpMode>("MxOpMode"#DST_TYPE.Size#DST_TYPE.RLet#"EA"),
- REG,
- !cast<MxEncEA>("MxEncEA"#SRC_TYPE.RLet#"_2"),
- MxExtEmpty>>;
-
-/// This Op is similar to the one above except it uses reversed opmode, some
-/// commands(e.g. eor) do not support dEA or rEA modes and require EAd for
-/// register only operations.
-/// NOTE when using dd commands it is irrelevant which opmode to use(as it seems)
-/// but some opcodes support address register and some do not which creates this
-/// mess.
-class MxBiArOp_RFRR_EAd<string MN, SDNode NODE, MxType TYPE, bits<4> CMD>
- : MxInst<(outs TYPE.ROp:$dst), (ins TYPE.ROp:$src, TYPE.ROp:$opd),
- MN#"."#TYPE.Prefix#"\t$opd, $dst",
- [(set TYPE.VT:$dst, CCR, (NODE TYPE.VT:$src, TYPE.VT:$opd))],
- MxArithEncoding<MxBead4Bits<CMD>,
- !cast<MxEncOpMode>("MxOpMode"#TYPE.Size#"EAd"),
- MxBeadDReg<2>, MxEncEAd_0, MxExtEmpty>>;
// $reg <- $reg op $imm
-class MxBiArOp_RFRI_xEA<string MN, SDNode NODE, MxType TYPE, bits<4> CMD>
+class MxBiArOp_R_RI_xEA<string MN, SDNode NODE, MxType TYPE, bits<4> CMD>
: MxInst<(outs TYPE.ROp:$dst), (ins TYPE.ROp:$src, TYPE.IOp:$opd),
MN#"."#TYPE.Prefix#"\t$opd, $dst",
- [(set TYPE.VT:$dst, CCR, (NODE TYPE.VT:$src, TYPE.IPat:$opd))],
- MxArithEncoding<MxBead4Bits<CMD>,
- !cast<MxEncOpMode>("MxOpMode"#TYPE.Size#TYPE.RLet#"EA"),
- MxBeadDReg<0>, MxEncEAi,
- !cast<MxEncExt>("MxExtI"#TYPE.Size#"_2")>>;
+ [(set TYPE.VT:$dst, CCR, (NODE TYPE.VT:$src, TYPE.IPat:$opd))]> {
+ let Inst = (ascend
+ (descend CMD, (operand "$dst", 3),
+ !cast<MxOpModeEncoding>("MxOpMode"#TYPE.Size#"_"#TYPE.RLet#"_EA").Value,
+ MxEncAddrMode_i<"opd", TYPE.Size>.EA),
+ MxEncAddrMode_i<"opd", TYPE.Size>.Supplement
+ );
+}
// Again, there are two ways to write an immediate to Dn register either dEA
-// opmode or using *I encoding, and again some instrucitons also support address
+// opmode or using *I encoding, and again some instructions also support address
// registers some do not.
-class MxBiArOp_RFRI<string MN, SDNode NODE, MxType TYPE, bits<4> CMD>
+class MxBiArOp_R_RI<string MN, SDNode NODE, MxType TYPE, bits<4> CMD>
: MxInst<(outs TYPE.ROp:$dst), (ins TYPE.ROp:$src, TYPE.IOp:$opd),
MN#"i."#TYPE.Prefix#"\t$opd, $dst",
- [(set TYPE.VT:$dst, CCR, (NODE TYPE.VT:$src, TYPE.IPat:$opd))],
- MxArithImmEncoding<MxBead4Bits<CMD>, !cast<MxEncSize>("MxEncSize"#TYPE.Size),
- !cast<MxEncEA>("MxEncEA"#TYPE.RLet#"_0"), MxExtEmpty,
- !cast<MxEncExt>("MxExtI"#TYPE.Size#"_2")>>;
-
-let mayLoad = 1 in
-class MxBiArOp_RFRM<string MN, SDNode NODE, MxType TYPE, MxOperand OPD, ComplexPattern PAT,
- bits<4> CMD, MxEncEA EA, MxEncExt EXT>
- : MxInst<(outs TYPE.ROp:$dst), (ins TYPE.ROp:$src, OPD:$opd),
- MN#"."#TYPE.Prefix#"\t$opd, $dst",
- [(set TYPE.VT:$dst, CCR, (NODE TYPE.VT:$src, (TYPE.Load PAT:$opd)))],
- MxArithEncoding<MxBead4Bits<CMD>,
- !cast<MxEncOpMode>("MxOpMode"#TYPE.Size#TYPE.RLet#"EA"),
- MxBeadDReg<0>, EA, EXT>>;
-
+ [(set TYPE.VT:$dst, CCR, (NODE TYPE.VT:$src, TYPE.IPat:$opd))]> {
+ let Inst = (ascend
+ (descend 0b0000, CMD,
+ !cast<MxNewEncSize>("MxNewEncSize"#TYPE.Size).Value,
+ // The destination cannot be address register, so it's always
+ // the MODE for data register direct mode.
+ /*MODE*/0b000,
+ /*REGISTER*/(operand "$dst", 3)),
+ // Source (i.e. immediate value) encoding
+ MxEncAddrMode_i<"opd", TYPE.Size>.Supplement
+ );
+}
} // Constraints
let mayLoad = 1, mayStore = 1 in {
// FIXME MxBiArOp_FMR/FMI cannot consume CCR from MxAdd/MxSub which leads for
// MxAdd to survive the match and subsequent mismatch.
-class MxBiArOp_FMR<string MN, MxType TYPE, MxOperand MEMOpd,
- bits<4> CMD, MxEncEA EA, MxEncExt EXT>
+class MxBiArOp_MR<string MN, MxType TYPE,
+ MxOperand MEMOpd, bits<4> CMD, MxEncMemOp DST_ENC>
: MxInst<(outs), (ins MEMOpd:$dst, TYPE.ROp:$opd),
- MN#"."#TYPE.Prefix#"\t$opd, $dst",
- [],
- MxArithEncoding<MxBead4Bits<CMD>,
- !cast<MxEncOpMode>("MxOpMode"#TYPE.Size#"EA"#TYPE.RLet),
- MxBeadDReg<1>, EA, EXT>>;
+ MN#"."#TYPE.Prefix#"\t$opd, $dst", []> {
+ let Inst = (ascend
+ (descend CMD, (operand "$opd", 3),
+ !cast<MxOpModeEncoding>("MxOpMode"#TYPE.Size#"_EA_"#TYPE.RLet).Value,
+ DST_ENC.EA),
+ DST_ENC.Supplement
+ );
+}
-class MxBiArOp_FMI<string MN, MxType TYPE, MxOperand MEMOpd,
- bits<4> CMD, MxEncEA MEMEA, MxEncExt MEMExt>
+class MxBiArOp_MI<string MN, MxType TYPE,
+ MxOperand MEMOpd, bits<4> CMD, MxEncMemOp DST_ENC>
: MxInst<(outs), (ins MEMOpd:$dst, TYPE.IOp:$opd),
- MN#"."#TYPE.Prefix#"\t$opd, $dst",
- [],
- MxArithImmEncoding<MxBead4Bits<CMD>,
- !cast<MxEncSize>("MxEncSize"#TYPE.Size),
- MEMEA, MEMExt,
- !cast<MxEncExt>("MxExtI"#TYPE.Size#"_1")>>;
+ MN#"."#TYPE.Prefix#"\t$opd, $dst", []> {
+ let Inst = (ascend
+ (descend 0b0000, CMD,
+ !cast<MxNewEncSize>("MxNewEncSize"#TYPE.Size).Value,
+ DST_ENC.EA),
+ // Source (i.e. immediate value) encoding
+ MxEncAddrMode_i<"opd", TYPE.Size>.Supplement,
+ // Destination encoding
+ DST_ENC.Supplement
+ );
+}
} // mayLoad, mayStore
} // Defs = [CCR]
multiclass MxBiArOp_DF<string MN, SDNode NODE, bit isComm,
bits<4> CMD, bits<4> CMDI> {
- // op $mem, $reg
- def NAME#"8dk" : MxBiArOp_RFRM<MN, NODE, MxType8d, MxType8.KOp, MxType8.KPat,
- CMD, MxEncEAk, MxExtBrief_2>;
- def NAME#"16dk" : MxBiArOp_RFRM<MN, NODE, MxType16d, MxType16.KOp, MxType16.KPat,
- CMD, MxEncEAk, MxExtBrief_2>;
- def NAME#"32dk" : MxBiArOp_RFRM<MN, NODE, MxType32d, MxType32.KOp, MxType32.KPat,
- CMD, MxEncEAk, MxExtBrief_2>;
-
- def NAME#"8dq" : MxBiArOp_RFRM<MN, NODE, MxType8d, MxType8.QOp, MxType8.QPat,
- CMD, MxEncEAq, MxExtI16_2>;
- def NAME#"16dq" : MxBiArOp_RFRM<MN, NODE, MxType16d, MxType16.QOp, MxType16.QPat,
- CMD, MxEncEAq, MxExtI16_2>;
- def NAME#"32dq" : MxBiArOp_RFRM<MN, NODE, MxType32d, MxType32.QOp, MxType32.QPat,
- CMD, MxEncEAq, MxExtI16_2>;
-
- def NAME#"8dp" : MxBiArOp_RFRM<MN, NODE, MxType8d, MxType8.POp, MxType8.PPat,
- CMD, MxEncEAp_2, MxExtI16_2>;
- def NAME#"16dp" : MxBiArOp_RFRM<MN, NODE, MxType16d, MxType16.POp, MxType16.PPat,
- CMD, MxEncEAp_2, MxExtI16_2>;
- def NAME#"32dp" : MxBiArOp_RFRM<MN, NODE, MxType32d, MxType32.POp, MxType32.PPat,
- CMD, MxEncEAp_2, MxExtI16_2>;
-
- def NAME#"8df" : MxBiArOp_RFRM<MN, NODE, MxType8d, MxType8.FOp, MxType8.FPat,
- CMD, MxEncEAf_2, MxExtBrief_2>;
- def NAME#"16df" : MxBiArOp_RFRM<MN, NODE, MxType16d, MxType16.FOp, MxType16.FPat,
- CMD, MxEncEAf_2, MxExtBrief_2>;
- def NAME#"32df" : MxBiArOp_RFRM<MN, NODE, MxType32d, MxType32.FOp, MxType32.FPat,
- CMD, MxEncEAf_2, MxExtBrief_2>;
-
- def NAME#"8dj" : MxBiArOp_RFRM<MN, NODE, MxType8d, MxType8.JOp, MxType8.JPat,
- CMD, MxEncEAj_2, MxExtEmpty>;
- def NAME#"16dj" : MxBiArOp_RFRM<MN, NODE, MxType16d, MxType16.JOp, MxType16.JPat,
- CMD, MxEncEAj_2, MxExtEmpty>;
- def NAME#"32dj" : MxBiArOp_RFRM<MN, NODE, MxType32d, MxType32.JOp, MxType32.JPat,
- CMD, MxEncEAj_2, MxExtEmpty>;
-
- // op $imm, $reg
- def NAME#"8di" : MxBiArOp_RFRI_xEA<MN, NODE, MxType8d, CMD>;
- def NAME#"16di" : MxBiArOp_RFRI_xEA<MN, NODE, MxType16d, CMD>;
- def NAME#"32di" : MxBiArOp_RFRI_xEA<MN, NODE, MxType32d, CMD>;
-
- // op $reg, $mem
- def NAME#"8pd" : MxBiArOp_FMR<MN, MxType8d, MxType8.POp,
- CMD, MxEncEAp_0, MxExtI16_0>;
- def NAME#"16pd" : MxBiArOp_FMR<MN, MxType16d, MxType16.POp,
- CMD, MxEncEAp_0, MxExtI16_0>;
- def NAME#"32pd" : MxBiArOp_FMR<MN, MxType32d, MxType32.POp,
- CMD, MxEncEAp_0, MxExtI16_0>;
-
- def NAME#"8fd" : MxBiArOp_FMR<MN, MxType8d, MxType8.FOp,
- CMD, MxEncEAf_0, MxExtBrief_0>;
- def NAME#"16fd" : MxBiArOp_FMR<MN, MxType16d, MxType16.FOp,
- CMD, MxEncEAf_0, MxExtBrief_0>;
- def NAME#"32fd" : MxBiArOp_FMR<MN, MxType32d, MxType32.FOp,
- CMD, MxEncEAf_0, MxExtBrief_0>;
-
- def NAME#"8jd" : MxBiArOp_FMR<MN, MxType8d, MxType8.JOp,
- CMD, MxEncEAj_0, MxExtEmpty>;
- def NAME#"16jd" : MxBiArOp_FMR<MN, MxType16d, MxType16.JOp,
- CMD, MxEncEAj_0, MxExtEmpty>;
- def NAME#"32jd" : MxBiArOp_FMR<MN, MxType32d, MxType32.JOp,
- CMD, MxEncEAj_0, MxExtEmpty>;
-
- // op $imm, $mem
- def NAME#"8pi" : MxBiArOp_FMI<MN, MxType8, MxType8.POp,
- CMDI, MxEncEAp_0, MxExtI16_0>;
- def NAME#"16pi" : MxBiArOp_FMI<MN, MxType16, MxType16.POp,
- CMDI, MxEncEAp_0, MxExtI16_0>;
- def NAME#"32pi" : MxBiArOp_FMI<MN, MxType32, MxType32.POp,
- CMDI, MxEncEAp_0, MxExtI16_0>;
-
- def NAME#"8fi" : MxBiArOp_FMI<MN, MxType8, MxType8.FOp,
- CMDI, MxEncEAf_0, MxExtBrief_0>;
- def NAME#"16fi" : MxBiArOp_FMI<MN, MxType16, MxType16.FOp,
- CMDI, MxEncEAf_0, MxExtBrief_0>;
- def NAME#"32fi" : MxBiArOp_FMI<MN, MxType32, MxType32.FOp,
- CMDI, MxEncEAf_0, MxExtBrief_0>;
-
- def NAME#"8ji" : MxBiArOp_FMI<MN, MxType8, MxType8.JOp,
- CMDI, MxEncEAj_0, MxExtEmpty>;
- def NAME#"16ji" : MxBiArOp_FMI<MN, MxType16, MxType16.JOp,
- CMDI, MxEncEAj_0, MxExtEmpty>;
- def NAME#"32ji" : MxBiArOp_FMI<MN, MxType32, MxType32.JOp,
- CMDI, MxEncEAj_0, MxExtEmpty>;
-
- def NAME#"16dr" : MxBiArOp_RFRR_xEA<MN, NODE, MxType16d, MxType16r,
- CMD, MxBeadDReg<0>>;
- def NAME#"32dr" : MxBiArOp_RFRR_xEA<MN, NODE, MxType32d, MxType32r,
- CMD, MxBeadDReg<0>>;
-
- let isCommutable = isComm in {
-
- def NAME#"8dd" : MxBiArOp_RFRR_xEA<MN, NODE, MxType8d, MxType8d,
- CMD, MxBeadDReg<0>>;
- def NAME#"16dd" : MxBiArOp_RFRR_xEA<MN, NODE, MxType16d, MxType16d,
- CMD, MxBeadDReg<0>>;
- def NAME#"32dd" : MxBiArOp_RFRR_xEA<MN, NODE, MxType32d, MxType32d,
- CMD, MxBeadDReg<0>>;
-
- } // isComm
+ foreach SZ = [8, 16, 32] in {
+ // op $mem, $reg
+ def NAME#SZ#"dk" : MxBiArOp_R_RM<MN, NODE,
+ !cast<MxType>("MxType"#SZ#"d"),
+ !cast<MxType>("MxType"#SZ).KOp,
+ !cast<MxType>("MxType"#SZ).KPat,
+ CMD, MxEncAddrMode_k<"opd">>;
+
+ def NAME#SZ#"dq" : MxBiArOp_R_RM<MN, NODE,
+ !cast<MxType>("MxType"#SZ#"d"),
+ !cast<MxType>("MxType"#SZ).QOp,
+ !cast<MxType>("MxType"#SZ).QPat,
+ CMD, MxEncAddrMode_q<"opd">>;
+
+ def NAME#SZ#"dp" : MxBiArOp_R_RM<MN, NODE,
+ !cast<MxType>("MxType"#SZ#"d"),
+ !cast<MxType>("MxType"#SZ).POp,
+ !cast<MxType>("MxType"#SZ).PPat,
+ CMD, MxEncAddrMode_p<"opd">>;
+
+ def NAME#SZ#"df" : MxBiArOp_R_RM<MN, NODE,
+ !cast<MxType>("MxType"#SZ#"d"),
+ !cast<MxType>("MxType"#SZ).FOp,
+ !cast<MxType>("MxType"#SZ).FPat,
+ CMD, MxEncAddrMode_f<"opd">>;
+
+ def NAME#SZ#"dj" : MxBiArOp_R_RM<MN, NODE,
+ !cast<MxType>("MxType"#SZ#"d"),
+ !cast<MxType>("MxType"#SZ).JOp,
+ !cast<MxType>("MxType"#SZ).JPat,
+ CMD, MxEncAddrMode_j<"opd">>;
+ // op $imm, $reg
+ def NAME#SZ#"di" : MxBiArOp_R_RI_xEA<MN, NODE,
+ !cast<MxType>("MxType"#SZ#"d"),
+ CMD>;
+ // op $reg, $mem
+ def NAME#SZ#"pd" : MxBiArOp_MR<MN,
+ !cast<MxType>("MxType"#SZ#"d"),
+ !cast<MxType>("MxType"#SZ).POp,
+ CMD, MxEncAddrMode_p<"dst">>;
+
+ def NAME#SZ#"fd" : MxBiArOp_MR<MN,
+ !cast<MxType>("MxType"#SZ#"d"),
+ !cast<MxType>("MxType"#SZ).FOp,
+ CMD, MxEncAddrMode_f<"dst">>;
+
+ def NAME#SZ#"jd" : MxBiArOp_MR<MN,
+ !cast<MxType>("MxType"#SZ#"d"),
+ !cast<MxType>("MxType"#SZ).JOp,
+ CMD, MxEncAddrMode_j<"dst">>;
+ // op $imm, $mem
+ def NAME#SZ#"pi" : MxBiArOp_MI<MN,
+ !cast<MxType>("MxType"#SZ),
+ !cast<MxType>("MxType"#SZ).POp,
+ CMDI, MxEncAddrMode_p<"dst">>;
+
+ def NAME#SZ#"fi" : MxBiArOp_MI<MN,
+ !cast<MxType>("MxType"#SZ),
+ !cast<MxType>("MxType"#SZ).FOp,
+ CMDI, MxEncAddrMode_f<"dst">>;
+
+ def NAME#SZ#"ji" : MxBiArOp_MI<MN,
+ !cast<MxType>("MxType"#SZ),
+ !cast<MxType>("MxType"#SZ).JOp,
+ CMDI, MxEncAddrMode_j<"dst">>;
+ // op $reg, $reg
+ let isCommutable = isComm in
+ def NAME#SZ#"dd" : MxBiArOp_R_RR_xEA<MN, NODE,
+ !cast<MxType>("MxType"#SZ#"d"),
+ !cast<MxType>("MxType"#SZ#"d"),
+ CMD>;
+ } // foreach SZ
+
+ foreach SZ = [16, 32] in
+ def NAME#SZ#"dr" : MxBiArOp_R_RR_xEA<MN, NODE,
+ !cast<MxType>("MxType"#SZ#"d"),
+ !cast<MxType>("MxType"#SZ#"r"),
+ CMD>;
} // MxBiArOp_DF
@@ -284,25 +283,28 @@ multiclass MxBiArOp_DF<string MN, SDNode NODE, bit isComm,
let Pattern = [(null_frag)] in
multiclass MxBiArOp_AF<string MN, SDNode NODE, bits<4> CMD> {
- def NAME#"32ak" : MxBiArOp_RFRM<MN, NODE, MxType32a, MxType32.KOp, MxType32.KPat,
- CMD, MxEncEAk, MxExtBrief_2>;
- def NAME#"32aq" : MxBiArOp_RFRM<MN, NODE, MxType32a, MxType32.QOp, MxType32.QPat,
- CMD, MxEncEAq, MxExtI16_2>;
- def NAME#"32af" : MxBiArOp_RFRM<MN, NODE, MxType32a, MxType32.FOp, MxType32.FPat,
- CMD, MxEncEAf_2, MxExtBrief_2>;
- def NAME#"32ap" : MxBiArOp_RFRM<MN, NODE, MxType32a, MxType32.POp, MxType32.PPat,
- CMD, MxEncEAp_2, MxExtI16_2>;
- def NAME#"32aj" : MxBiArOp_RFRM<MN, NODE, MxType32a, MxType32.JOp, MxType32.JPat,
- CMD, MxEncEAj_2, MxExtEmpty>;
- def NAME#"32ai" : MxBiArOp_RFRI_xEA<MN, NODE, MxType32a, CMD>;
-
- def NAME#"32ar" : MxBiArOp_RFRR_xEA<MN, NODE, MxType32a, MxType32r,
- CMD, MxBeadReg<0>>;
+ def NAME#"32ak" : MxBiArOp_R_RM<MN, NODE, MxType32a, MxType32.KOp, MxType32.KPat,
+ CMD, MxEncAddrMode_k<"opd">>;
+ def NAME#"32aq" : MxBiArOp_R_RM<MN, NODE, MxType32a, MxType32.QOp, MxType32.QPat,
+ CMD, MxEncAddrMode_q<"opd">>;
+ def NAME#"32af" : MxBiArOp_R_RM<MN, NODE, MxType32a, MxType32.FOp, MxType32.FPat,
+ CMD, MxEncAddrMode_f<"opd">>;
+ def NAME#"32ap" : MxBiArOp_R_RM<MN, NODE, MxType32a, MxType32.POp, MxType32.PPat,
+ CMD, MxEncAddrMode_p<"opd">>;
+ def NAME#"32aj" : MxBiArOp_R_RM<MN, NODE, MxType32a, MxType32.JOp, MxType32.JPat,
+ CMD, MxEncAddrMode_j<"opd">>;
+ def NAME#"32ai" : MxBiArOp_R_RI_xEA<MN, NODE, MxType32a, CMD>;
+
+ def NAME#"32ar" : MxBiArOp_R_RR_xEA<MN, NODE, MxType32a, MxType32r, CMD>;
} // MxBiArOp_AF
// NOTE These naturally produce CCR
+//===----------------------------------------------------------------------===//
+// Add/Sub
+//===----------------------------------------------------------------------===//
+
defm ADD : MxBiArOp_DF<"add", MxAdd, 1, 0xD, 0x6>;
defm ADD : MxBiArOp_AF<"adda", MxAdd, 0xD>;
defm SUB : MxBiArOp_DF<"sub", MxSub, 0, 0x9, 0x4>;
@@ -312,26 +314,42 @@ defm SUB : MxBiArOp_AF<"suba", MxSub, 0x9>;
let Uses = [CCR], Defs = [CCR] in {
let Constraints = "$src = $dst" in {
+/// Encoding for Extended forms
+/// ------------------------------------------------------
+/// F E D C | B A 9 | 8 | 7 6 | 5 4 | 3 | 2 1 0
+/// ------------------------------------------------------
+/// x x x x | REG Rx | 1 | SIZE | 0 0 | M | REG Ry
+/// ------------------------------------------------------
+/// Rx - destination
+/// Ry - source
+/// M - address mode switch
+
// $reg, ccr <- $reg op $reg op ccr
-class MxBiArOp_RFRRF<string MN, SDNode NODE, MxType TYPE, bits<4> CMD>
+class MxBiArOp_R_RRX<string MN, SDNode NODE, MxType TYPE, bits<4> CMD>
: MxInst<(outs TYPE.ROp:$dst), (ins TYPE.ROp:$src, TYPE.ROp:$opd),
MN#"."#TYPE.Prefix#"\t$opd, $dst",
- [(set TYPE.VT:$dst, CCR, (NODE TYPE.VT:$src, TYPE.VT:$opd, CCR))],
- MxArithXEncoding<MxBead4Bits<CMD>,
- !cast<MxEncSize>("MxEncSize"#TYPE.Size),
- MxBead1Bit<0>, MxBeadDReg<2>, MxBeadDReg<0>>>;
-
+ [(set TYPE.VT:$dst, CCR, (NODE TYPE.VT:$src, TYPE.VT:$opd, CCR))]> {
+ let Inst = (descend CMD,
+ // Destination register
+ (operand "$dst", 3),
+ 0b1,
+ // SIZE
+ !cond(!eq(TYPE.Size, 8): 0b00,
+ !eq(TYPE.Size, 16): 0b01,
+ !eq(TYPE.Size, 32): 0b10),
+ 0b00, /*R/M*/0b0,
+ // Source register
+ (operand "$opd", 3)
+ );
+}
} // Constraints
} // Uses, Defs
multiclass MxBiArOp_RFF<string MN, SDNode NODE, bit isComm, bits<4> CMD> {
let isCommutable = isComm in {
-
- def NAME#"8dd" : MxBiArOp_RFRRF<MN, NODE, MxType8d, CMD>;
- def NAME#"16dd" : MxBiArOp_RFRRF<MN, NODE, MxType16d, CMD>;
- def NAME#"32dd" : MxBiArOp_RFRRF<MN, NODE, MxType32d, CMD>;
-
+ foreach SZ = [8, 16, 32] in
+ def NAME#SZ#"dd" : MxBiArOp_R_RRX<MN, NODE, !cast<MxType>("MxType"#SZ#"d"), CMD>;
} // isComm
} // MxBiArOp_RFF
@@ -349,19 +367,16 @@ defm AND : MxBiArOp_DF<"and", MxAnd, 1, 0xC, 0x2>;
defm OR : MxBiArOp_DF<"or", MxOr, 1, 0x8, 0x0>;
multiclass MxBiArOp_DF_EAd<string MN, SDNode NODE, bits<4> CMD, bits<4> CMDI> {
-
- let isCommutable = 1 in {
-
- def NAME#"8dd" : MxBiArOp_RFRR_EAd<MN, NODE, MxType8d, CMD>;
- def NAME#"16dd" : MxBiArOp_RFRR_EAd<MN, NODE, MxType16d, CMD>;
- def NAME#"32dd" : MxBiArOp_RFRR_EAd<MN, NODE, MxType32d, CMD>;
-
- } // isCommutable = 1
-
- def NAME#"8di" : MxBiArOp_RFRI<MN, NODE, MxType8d, CMDI>;
- def NAME#"16di" : MxBiArOp_RFRI<MN, NODE, MxType16d, CMDI>;
- def NAME#"32di" : MxBiArOp_RFRI<MN, NODE, MxType32d, CMDI>;
-
+ foreach SZ = [8, 16, 32] in {
+ let isCommutable = 1 in
+ def NAME#SZ#"dd" : MxBiArOp_R_RR_EAd<MN, NODE,
+ !cast<MxType>("MxType"#SZ#"d"),
+ CMD>;
+
+ def NAME#SZ#"di" : MxBiArOp_R_RI<MN, NODE,
+ !cast<MxType>("MxType"#SZ#"d"),
+ CMDI>;
+ } // foreach SZ
} // MxBiArOp_DF_EAd
defm XOR : MxBiArOp_DF_EAd<"eor", MxXor, 0xB, 0xA>;
@@ -372,84 +387,112 @@ defm XOR : MxBiArOp_DF_EAd<"eor", MxXor, 0xB, 0xA>;
//===----------------------------------------------------------------------===//
let Defs = [CCR] in {
-class MxCmp_RR<MxType LHS_TYPE, MxType RHS_TYPE = LHS_TYPE,
- MxBead REG = MxBeadDReg<1>>
+class MxCmp_RR<MxType LHS_TYPE, MxType RHS_TYPE = LHS_TYPE>
: MxInst<(outs), (ins LHS_TYPE.ROp:$lhs, RHS_TYPE.ROp:$rhs),
"cmp."#RHS_TYPE.Prefix#"\t$lhs, $rhs",
- [(set CCR, (MxCmp LHS_TYPE.VT:$lhs, RHS_TYPE.VT:$rhs))],
- MxArithEncoding<MxBead4Bits<0xB>,
- !cast<MxEncOpMode>("MxOpMode"#RHS_TYPE.Size#RHS_TYPE.RLet#"EA"),
- REG,
- !cast<MxEncEA>("MxEncEA"#LHS_TYPE.RLet#"_0"),
- MxExtEmpty>>;
+ [(set CCR, (MxCmp LHS_TYPE.VT:$lhs, RHS_TYPE.VT:$rhs))]> {
+ let Inst = (descend 0b1011,
+ // REGISTER
+ (operand "$rhs", 3),
+ // OPMODE
+ !cast<MxOpModeEncoding>("MxOpMode"#RHS_TYPE.Size#"_"#RHS_TYPE.RLet#"_EA").Value,
+ // MODE without last bit
+ 0b00,
+ // REGISTER prefixed by D/A bit
+ (operand "$lhs", 4)
+ );
+}
class MxCmp_RI<MxType TYPE>
: MxInst<(outs), (ins TYPE.IOp:$imm, TYPE.ROp:$reg),
"cmpi."#TYPE.Prefix#"\t$imm, $reg",
- [(set CCR, (MxCmp TYPE.IPat:$imm, TYPE.VT:$reg))],
- MxArithImmEncoding<MxBead4Bits<0xC>,
- !cast<MxEncSize>("MxEncSize"#TYPE.Size),
- MxEncEAd_1, MxExtEmpty,
- !cast<MxEncExt>("MxExtI"#TYPE.Size#"_0")>>;
+ [(set CCR, (MxCmp TYPE.IPat:$imm, TYPE.VT:$reg))]> {
+ let Inst = (ascend
+ (descend 0b00001100,
+ !cast<MxNewEncSize>("MxNewEncSize"#TYPE.Size).Value,
+ // The destination cannot be address register, so it's always
+ // the MODE for data register direct mode.
+ /*MODE*/0b000,
+ /*REGISTER*/(operand "$reg", 3)),
+ // Source (i.e. immediate value) encoding
+ MxEncAddrMode_i<"imm", TYPE.Size>.Supplement
+ );
+}
let mayLoad = 1 in {
class MxCmp_MI<MxType TYPE, MxOperand MEMOpd, ComplexPattern MEMPat,
- MxEncEA EA, MxEncExt EXT>
+ MxEncMemOp MEM_ENC>
: MxInst<(outs), (ins TYPE.IOp:$imm, MEMOpd:$mem),
"cmpi."#TYPE.Prefix#"\t$imm, $mem",
- [(set CCR, (MxCmp TYPE.IPat:$imm, (load MEMPat:$mem)))],
- MxArithImmEncoding<MxBead4Bits<0xC>,
- !cast<MxEncSize>("MxEncSize"#TYPE.Size),
- EA, EXT,
- !cast<MxEncExt>("MxExtI"#TYPE.Size#"_0")>>;
+ [(set CCR, (MxCmp TYPE.IPat:$imm, (load MEMPat:$mem)))]> {
+ let Inst = (ascend
+ (descend 0b00001100,
+ !cast<MxNewEncSize>("MxNewEncSize"#TYPE.Size).Value,
+ MEM_ENC.EA),
+ // Source (i.e. immediate value) encoding
+ MxEncAddrMode_i<"imm", TYPE.Size>.Supplement,
+ // Destination (i.e. memory operand) encoding
+ MEM_ENC.Supplement
+ );
+}
+// FIXME: What about abs.W?
class MxCmp_BI<MxType TYPE>
: MxInst<(outs), (ins TYPE.IOp:$imm, MxAL32:$abs),
"cmpi."#TYPE.Prefix#"\t$imm, $abs",
[(set CCR, (MxCmp TYPE.IPat:$imm,
- (load (i32 (MxWrapper tglobaladdr:$abs)))))],
- MxArithImmEncoding<MxBead4Bits<0xC>,
- !cast<MxEncSize>("MxEncSize"#TYPE.Size),
- MxEncEAb, MxExtI32_1,
- !cast<MxEncExt>("MxExtI"#TYPE.Size#"_0")>>;
+ (load (i32 (MxWrapper tglobaladdr:$abs)))))]> {
+ defvar AbsEncoding = MxEncAddrMode_abs<"abs", true>;
+ let Inst = (ascend
+ (descend 0b00001100,
+ !cast<MxNewEncSize>("MxNewEncSize"#TYPE.Size).Value,
+ AbsEncoding.EA),
+ // Source (i.e. immediate value) encoding
+ MxEncAddrMode_i<"imm", TYPE.Size>.Supplement,
+ // Destination (i.e. memory operand) encoding
+ AbsEncoding.Supplement
+ );
+}
class MxCmp_RM<MxType TYPE, MxOperand MEMOpd, ComplexPattern MEMPat,
- MxEncEA EA, MxEncExt EXT>
+ MxEncMemOp MEM_ENC>
: MxInst<(outs), (ins TYPE.ROp:$reg, MEMOpd:$mem),
"cmp."#TYPE.Prefix#"\t$mem, $reg",
- [(set CCR, (MxCmp (load MEMPat:$mem), TYPE.ROp:$reg))],
- MxArithEncoding<MxBead4Bits<0xB>,
- !cast<MxEncOpMode>("MxOpMode"#TYPE.Size#"dEA"),
- MxBeadDReg<0>, EA, EXT>>;
+ [(set CCR, (MxCmp (load MEMPat:$mem), TYPE.ROp:$reg))]> {
+ let Inst = (ascend
+ (descend 0b1011,
+ // REGISTER
+ (operand "$reg", 3),
+ // OPMODE
+ !cast<MxOpModeEncoding>("MxOpMode"#TYPE.Size#"_d_EA").Value,
+ MEM_ENC.EA),
+ MEM_ENC.Supplement
+ );
+}
} // let mayLoad = 1
} // let Defs = [CCR]
multiclass MMxCmp_RM<MxType TYPE> {
- def NAME#TYPE.KOp.Letter : MxCmp_RM<TYPE, TYPE.KOp, TYPE.KPat, MxEncEAk,
- MxExtBrief_1>;
- def NAME#TYPE.QOp.Letter : MxCmp_RM<TYPE, TYPE.QOp, TYPE.QPat, MxEncEAq,
- MxExtI16_1>;
- def NAME#TYPE.POp.Letter : MxCmp_RM<TYPE, TYPE.POp, TYPE.PPat, MxEncEAp_1,
- MxExtI16_1>;
- def NAME#TYPE.FOp.Letter : MxCmp_RM<TYPE, TYPE.FOp, TYPE.FPat, MxEncEAf_1,
- MxExtBrief_1>;
- def NAME#TYPE.JOp.Letter : MxCmp_RM<TYPE, TYPE.JOp, TYPE.JPat, MxEncEAj_1,
- MxExtEmpty>;
+ def NAME#TYPE.KOp.Letter : MxCmp_RM<TYPE, TYPE.KOp, TYPE.KPat, MxEncAddrMode_k<"mem">>;
+ def NAME#TYPE.QOp.Letter : MxCmp_RM<TYPE, TYPE.QOp, TYPE.QPat, MxEncAddrMode_q<"mem">>;
+ def NAME#TYPE.POp.Letter : MxCmp_RM<TYPE, TYPE.POp, TYPE.PPat, MxEncAddrMode_p<"mem">>;
+ def NAME#TYPE.FOp.Letter : MxCmp_RM<TYPE, TYPE.FOp, TYPE.FPat, MxEncAddrMode_f<"mem">>;
+ def NAME#TYPE.JOp.Letter : MxCmp_RM<TYPE, TYPE.JOp, TYPE.JPat, MxEncAddrMode_j<"mem">>;
}
multiclass MMxCmp_MI<MxType TYPE> {
- def NAME#TYPE.KOp.Letter#"i" : MxCmp_MI<TYPE, TYPE.KOp, TYPE.KPat, MxEncEAk,
- MxExtBrief_1>;
- def NAME#TYPE.QOp.Letter#"i" : MxCmp_MI<TYPE, TYPE.QOp, TYPE.QPat, MxEncEAq,
- MxExtI16_1>;
- def NAME#TYPE.POp.Letter#"i" : MxCmp_MI<TYPE, TYPE.POp, TYPE.PPat, MxEncEAp_1,
- MxExtI16_1>;
- def NAME#TYPE.FOp.Letter#"i" : MxCmp_MI<TYPE, TYPE.FOp, TYPE.FPat, MxEncEAf_1,
- MxExtBrief_1>;
- def NAME#TYPE.JOp.Letter#"i" : MxCmp_MI<TYPE, TYPE.JOp, TYPE.JPat, MxEncEAj_1,
- MxExtEmpty>;
+ def NAME#TYPE.KOp.Letter#"i" : MxCmp_MI<TYPE, TYPE.KOp, TYPE.KPat,
+ MxEncAddrMode_k<"mem">>;
+ def NAME#TYPE.QOp.Letter#"i" : MxCmp_MI<TYPE, TYPE.QOp, TYPE.QPat,
+ MxEncAddrMode_q<"mem">>;
+ def NAME#TYPE.POp.Letter#"i" : MxCmp_MI<TYPE, TYPE.POp, TYPE.PPat,
+ MxEncAddrMode_p<"mem">>;
+ def NAME#TYPE.FOp.Letter#"i" : MxCmp_MI<TYPE, TYPE.FOp, TYPE.FPat,
+ MxEncAddrMode_f<"mem">>;
+ def NAME#TYPE.JOp.Letter#"i" : MxCmp_MI<TYPE, TYPE.JOp, TYPE.JPat,
+ MxEncAddrMode_j<"mem">>;
}
foreach S = [8, 16, 32] in {
@@ -478,25 +521,31 @@ defm CMP32 : MMxCmp_MI<MxType32d>;
// EXT
//===----------------------------------------------------------------------===//
-def MxExtOpmode_wb : MxBead3Bits<0b010>;
-def MxExtOpmode_lw : MxBead3Bits<0b011>;
-def MxExtOpmode_lb : MxBead3Bits<0b111>;
-
/// ---------------------------------------------------
/// F E D C B A 9 | 8 7 6 | 5 4 3 | 2 1 0
/// ---------------------------------------------------
/// 0 1 0 0 1 0 0 | OPMODE | 0 0 0 | REG
/// ---------------------------------------------------
-class MxExtEncoding<MxBead3Bits OPMODE>
- : MxEncoding<MxBeadDReg<0>, MxBead3Bits<0b000>, OPMODE,
- MxBead3Bits<0b100>, MxBead4Bits<0b0100>>;
-
let Defs = [CCR] in
let Constraints = "$src = $dst" in
class MxExt<MxType TO, MxType FROM>
: MxInst<(outs TO.ROp:$dst), (ins TO.ROp:$src),
- "ext."#TO.Prefix#"\t$src", [],
- MxExtEncoding<!cast<MxBead3Bits>("MxExtOpmode_"#TO.Prefix#FROM.Prefix)>>;
+ "ext."#TO.Prefix#"\t$src", []> {
+ let Inst = (descend 0b0100100,
+ // OPMODE
+ !cond(
+ // byte -> word
+ !and(!eq(FROM.Size, 8), !eq(TO.Size, 16)): 0b010,
+ // word -> long
+ !and(!eq(FROM.Size, 16), !eq(TO.Size, 32)): 0b011,
+ // byte -> long
+ !and(!eq(FROM.Size, 8), !eq(TO.Size, 32)): 0b111
+ ),
+ 0b000,
+ // REGISTER
+ (operand "$src", 3)
+ );
+}
def EXT16 : MxExt<MxType16d, MxType8d>;
def EXT32 : MxExt<MxType32d, MxType16d>;
@@ -511,9 +560,6 @@ def : Pat<(sext_inreg i32:$src, i8),
// DIV/MUL
//===----------------------------------------------------------------------===//
-def MxSDiMuOpmode : MxBead3Bits<0b111>;
-def MxUDiMuOpmode : MxBead3Bits<0b011>;
-
/// Word operation:
/// ----------------------------------------------------
/// F E D C | B A 9 | 8 7 6 | 5 4 3 | 2 1 0
@@ -521,40 +567,45 @@ def MxUDiMuOpmode : MxBead3Bits<0b011>;
/// | | | EFFECTIVE ADDRESS
/// x x x x | REG | OP MODE | MODE | REG
/// ----------------------------------------------------
-class MxDiMuEncoding<MxBead4Bits CMD, MxBead3Bits OPMODE, MxEncEA EA, MxEncExt EXT>
- : MxEncoding<EA.Reg, EA.DA, EA.Mode, OPMODE, MxBeadDReg<0>, CMD,
- EXT.Imm, EXT.B8, EXT.Scale, EXT.WL, EXT.DAReg>;
-
let Defs = [CCR] in {
let Constraints = "$src = $dst" in {
-// $reg <- $reg op $reg
-class MxDiMuOp_DD<string MN, bits<4> CMD, MxBead3Bits OPMODE,
+// $dreg <- $dreg op $dreg
+class MxDiMuOp_DD<string MN, bits<4> CMD, bit SIGNED = false,
MxOperand DST, MxOperand OPD>
- : MxInst<(outs DST:$dst), (ins DST:$src, OPD:$opd), MN#"\t$opd, $dst", [],
- MxDiMuEncoding<MxBead4Bits<CMD>, OPMODE, MxEncEAd_2, MxExtEmpty>>;
+ : MxInst<(outs DST:$dst), (ins DST:$src, OPD:$opd), MN#"\t$opd, $dst", []> {
+ let Inst = (descend CMD,
+ // REGISTER
+ (operand "$dst", 3),
+ !if(SIGNED, 0b111, 0b011),
+ /*MODE*/0b000, /*REGISTER*/(operand "$opd", 3)
+ );
+}
// $reg <- $reg op $imm
-class MxDiMuOp_DI<string MN, bits<4> CMD, MxBead3Bits OPMODE,
+class MxDiMuOp_DI<string MN, bits<4> CMD, bit SIGNED = false,
MxOperand DST, MxOperand OPD>
- : MxInst<(outs DST:$dst), (ins DST:$src, OPD:$opd), MN#"\t$opd, $dst", [],
- MxDiMuEncoding<MxBead4Bits<CMD>, OPMODE, MxEncEAi, MxExtI16_2>>;
+ : MxInst<(outs DST:$dst), (ins DST:$src, OPD:$opd), MN#"\t$opd, $dst", []> {
+ // FIXME: Support immediates with different widths.
+ defvar ImmEnc = MxEncAddrMode_i<"opd", 16>;
+ let Inst = (ascend
+ (descend CMD,
+ // REGISTER
+ (operand "$dst", 3),
+ !if(SIGNED, 0b111, 0b011), ImmEnc.EA),
+ ImmEnc.Supplement
+ );
+}
} // let Constraints
} // Defs = [CCR]
multiclass MxDiMuOp<string MN, bits<4> CMD, bit isComm = 0> {
-
let isCommutable = isComm in {
- def "S"#NAME#"d32d16" : MxDiMuOp_DD<MN#"s", CMD, MxSDiMuOpmode, MxDRD32,
- MxDRD16>;
- def "U"#NAME#"d32d16" : MxDiMuOp_DD<MN#"u", CMD, MxUDiMuOpmode, MxDRD32,
- MxDRD16>;
+ def "S"#NAME#"d32d16" : MxDiMuOp_DD<MN#"s", CMD, /*SIGNED*/true, MxDRD32, MxDRD16>;
+ def "U"#NAME#"d32d16" : MxDiMuOp_DD<MN#"u", CMD, /*SIGNED*/false, MxDRD32, MxDRD16>;
}
- def "S"#NAME#"d32i16" : MxDiMuOp_DI<MN#"s", CMD, MxSDiMuOpmode, MxDRD32,
- Mxi16imm>;
- def "U"#NAME#"d32i16" : MxDiMuOp_DI<MN#"u", CMD, MxUDiMuOpmode, MxDRD32,
- Mxi16imm>;
-
+ def "S"#NAME#"d32i16" : MxDiMuOp_DI<MN#"s", CMD, /*SIGNED*/true, MxDRD32, Mxi16imm>;
+ def "U"#NAME#"d32i16" : MxDiMuOp_DI<MN#"u", CMD, /*SIGNED*/false, MxDRD32, Mxi16imm>;
}
defm DIV : MxDiMuOp<"div", 0x8>;
@@ -697,29 +748,35 @@ def : Pat<(mulhu i16:$dst, MximmSExt16:$opd),
/// | | | EFFECTIVE ADDRESS
/// 0 1 0 0 | x x x x | SIZE | MODE | REG
/// ------------+------------+------+---------+---------
-class MxNEGEncoding<MxBead4Bits CMD, MxEncSize SIZE, MxEncEA EA, MxEncExt EXT>
- : MxEncoding<EA.Reg, EA.DA, EA.Mode, SIZE, CMD, MxBead4Bits<0b0100>,
- EXT.Imm, EXT.B8, EXT.Scale, EXT.WL, EXT.DAReg>;
-
let Defs = [CCR] in {
let Constraints = "$src = $dst" in {
class MxNeg_D<MxType TYPE>
: MxInst<(outs TYPE.ROp:$dst), (ins TYPE.ROp:$src),
"neg."#TYPE.Prefix#"\t$dst",
- [(set TYPE.VT:$dst, (ineg TYPE.VT:$src))],
- MxNEGEncoding<MxBead4Bits<0x4>,
- !cast<MxEncSize>("MxEncSize"#TYPE.Size),
- MxEncEAd_0, MxExtEmpty>>;
+ [(set TYPE.VT:$dst, (ineg TYPE.VT:$src))]> {
+ let Inst = (descend 0b01000100,
+ /*SIZE*/!cast<MxNewEncSize>("MxNewEncSize"#TYPE.Size).Value,
+ //MODE without last bit
+ 0b00,
+ //REGISTER prefixed by D/A bit
+ (operand "$dst", 4)
+ );
+}
let Uses = [CCR] in {
class MxNegX_D<MxType TYPE>
: MxInst<(outs TYPE.ROp:$dst), (ins TYPE.ROp:$src),
"negx."#TYPE.Prefix#"\t$dst",
- [(set TYPE.VT:$dst, (MxSubX 0, TYPE.VT:$src, CCR))],
- MxNEGEncoding<MxBead4Bits<0x0>,
- !cast<MxEncSize>("MxEncSize"#TYPE.Size),
- MxEncEAd_0, MxExtEmpty>>;
+ [(set TYPE.VT:$dst, (MxSubX 0, TYPE.VT:$src, CCR))]> {
+ let Inst = (descend 0b01000000,
+ /*SIZE*/!cast<MxNewEncSize>("MxNewEncSize"#TYPE.Size).Value,
+ //MODE without last bit
+ 0b00,
+ //REGISTER prefixed by D/A bit
+ (operand "$dst", 4)
+ );
+}
}
} // let Constraints
diff --git a/llvm/lib/Target/M68k/M68kInstrBits.td b/llvm/lib/Target/M68k/M68kInstrBits.td
index 0d1278102378..abd2ab3cf012 100644
--- a/llvm/lib/Target/M68k/M68kInstrBits.td
+++ b/llvm/lib/Target/M68k/M68kInstrBits.td
@@ -32,9 +32,15 @@
/// ------------+---------+---------+---------+---------
/// 0 0 0 0 | REG | 1 0 0 | MODE | REG
/// ------------+---------+---------+---------+---------
-class MxBTSTEnc_R<MxBeadDReg REG, MxEncEA EA, MxEncExt EXT>
- : MxEncoding<EA.Reg, EA.DA, EA.Mode, MxBead3Bits<0b100>, REG, MxBead4Bits<0b0000>,
- EXT.Imm, EXT.B8, EXT.Scale, EXT.WL, EXT.DAReg>;
+class MxBTSTEnc_R<MxEncMemOp dst_enc, string bitno_name> {
+ dag Value = (ascend
+ (descend 0b0000,
+ (operand "$"#bitno_name, 3),
+ 0b100, dst_enc.EA
+ ),
+ dst_enc.Supplement
+ );
+}
/// -------------------------------+---------+---------
/// F E D C B A 9 8 . 7 6 | 5 4 3 | 2 1 0
@@ -43,33 +49,40 @@ class MxBTSTEnc_R<MxBeadDReg REG, MxEncEA EA, MxEncExt EXT>
/// ------------------------+------+---------+---------
/// 0 0 0 0 0 0 0 0 | BIT NUMBER
/// ------------------------+--------------------------
-class MxBTSTEnc_I<MxBead8Imm IMM, MxEncEA EA, MxEncExt EXT>
- : MxEncoding<EA.Reg, EA.DA, EA.Mode, MxBead2Bits<0b00>,
- MxBead4Bits<0b1000>, MxBead4Bits<0b0000>, IMM,
- EXT.Imm, EXT.B8, EXT.Scale, EXT.WL, EXT.DAReg>;
+class MxBTSTEnc_I<MxEncMemOp dst_enc, string bitno_name> {
+ dag Value = (ascend
+ (descend 0b0000100000, dst_enc.EA),
+ (descend 0b00000000, (operand "$"#bitno_name, 8)),
+ dst_enc.Supplement
+ );
+}
let Defs = [CCR] in {
class MxBTST_RR<MxType TYPE>
: MxInst<(outs), (ins TYPE.ROp:$dst, TYPE.ROp:$bitno), "btst\t$bitno, $dst",
- [(set CCR, (MxBtst TYPE.VT:$dst, TYPE.VT:$bitno))],
- MxBTSTEnc_R<MxBeadDReg<1>, MxEncEAd_0, MxExtEmpty>>;
+ [(set CCR, (MxBtst TYPE.VT:$dst, TYPE.VT:$bitno))]> {
+ let Inst = MxBTSTEnc_R<MxEncAddrMode_r<"dst">, "bitno">.Value;
+}
class MxBTST_RI<MxType TYPE>
: MxInst<(outs), (ins TYPE.ROp:$dst, TYPE.IOp:$bitno), "btst\t$bitno, $dst",
- [(set CCR, (MxBtst TYPE.VT:$dst, TYPE.IPat:$bitno))],
- MxBTSTEnc_I<MxBead8Imm<1>, MxEncEAd_0, MxExtEmpty>>;
+ [(set CCR, (MxBtst TYPE.VT:$dst, TYPE.IPat:$bitno))]> {
+ let Inst = MxBTSTEnc_I<MxEncAddrMode_r<"dst">, "bitno">.Value;
+}
class MxBTST_MR<MxType TYPE, MxOperand MEMOpd, ComplexPattern MEMPat,
- MxEncEA EA, MxEncExt EXT>
+ MxEncMemOp DST_ENC>
: MxInst<(outs), (ins MEMOpd:$dst, TYPE.ROp:$bitno), "btst\t$bitno, $dst",
- [(set CCR, (MxBtst (TYPE.Load MEMPat:$dst), TYPE.VT:$bitno))],
- MxBTSTEnc_R<MxBeadDReg<1>, EA, EXT>>;
+ [(set CCR, (MxBtst (TYPE.Load MEMPat:$dst), TYPE.VT:$bitno))]> {
+ let Inst = MxBTSTEnc_R<DST_ENC, "bitno">.Value;
+}
class MxBTST_MI<MxType TYPE, MxOperand MEMOpd, ComplexPattern MEMPat,
- MxEncEA EA, MxEncExt EXT>
+ MxEncMemOp DST_ENC>
: MxInst<(outs), (ins MEMOpd:$dst, TYPE.IOp:$bitno), "btst\t$bitno, $dst",
- [(set CCR, (MxBtst (TYPE.Load MEMPat:$dst), TYPE.IPat:$bitno))],
- MxBTSTEnc_I<MxBead8Imm<1>, EA, EXT>>;
+ [(set CCR, (MxBtst (TYPE.Load MEMPat:$dst), TYPE.IPat:$bitno))]> {
+ let Inst = MxBTSTEnc_I<DST_ENC, "bitno">.Value;
+}
} // Defs = [CCR]
// Register BTST limited to 32 bits only
@@ -78,31 +91,31 @@ def BTST32di : MxBTST_RI<MxType32d>;
// Memory BTST limited to 8 bits only
def BTST8jd : MxBTST_MR<MxType8d, MxType8.JOp, MxType8.JPat,
- MxEncEAj_0, MxExtEmpty>;
+ MxEncAddrMode_j<"dst">>;
def BTST8od : MxBTST_MR<MxType8d, MxType8.OOp, MxType8.OPat,
- MxEncEAo_0, MxExtEmpty>;
+ MxEncAddrMode_o<"dst">>;
def BTST8ed : MxBTST_MR<MxType8d, MxType8.EOp, MxType8.EPat,
- MxEncEAe_0, MxExtEmpty>;
+ MxEncAddrMode_e<"dst">>;
def BTST8pd : MxBTST_MR<MxType8d, MxType8.POp, MxType8.PPat,
- MxEncEAp_0, MxExtI16_0>;
+ MxEncAddrMode_p<"dst">>;
def BTST8fd : MxBTST_MR<MxType8d, MxType8.FOp, MxType8.FPat,
- MxEncEAf_0, MxExtBrief_0>;
+ MxEncAddrMode_f<"dst">>;
def BTST8qd : MxBTST_MR<MxType8d, MxType8.QOp, MxType8.QPat,
- MxEncEAq, MxExtI16_0>;
+ MxEncAddrMode_q<"dst">>;
def BTST8kd : MxBTST_MR<MxType8d, MxType8.KOp, MxType8.KPat,
- MxEncEAk, MxExtBrief_0>;
+ MxEncAddrMode_k<"dst">>;
def BTST8ji : MxBTST_MI<MxType8d, MxType8.JOp, MxType8.JPat,
- MxEncEAj_0, MxExtEmpty>;
+ MxEncAddrMode_j<"dst">>;
def BTST8oi : MxBTST_MI<MxType8d, MxType8.OOp, MxType8.OPat,
- MxEncEAo_0, MxExtEmpty>;
+ MxEncAddrMode_o<"dst">>;
def BTST8ei : MxBTST_MI<MxType8d, MxType8.EOp, MxType8.EPat,
- MxEncEAe_0, MxExtEmpty>;
+ MxEncAddrMode_e<"dst">>;
def BTST8pi : MxBTST_MI<MxType8d, MxType8.POp, MxType8.PPat,
- MxEncEAp_0, MxExtI16_0>;
+ MxEncAddrMode_p<"dst">>;
def BTST8fi : MxBTST_MI<MxType8d, MxType8.FOp, MxType8.FPat,
- MxEncEAf_0, MxExtBrief_0>;
+ MxEncAddrMode_f<"dst">>;
def BTST8qi : MxBTST_MI<MxType8d, MxType8.QOp, MxType8.QPat,
- MxEncEAq, MxExtI16_0>;
+ MxEncAddrMode_q<"dst">>;
def BTST8ki : MxBTST_MI<MxType8d, MxType8.KOp, MxType8.KPat,
- MxEncEAk, MxExtBrief_0>;
+ MxEncAddrMode_k<"dst">>;
diff --git a/llvm/lib/Target/M68k/M68kInstrControl.td b/llvm/lib/Target/M68k/M68kInstrControl.td
index be9045b6e0d2..d15283c769f6 100644
--- a/llvm/lib/Target/M68k/M68kInstrControl.td
+++ b/llvm/lib/Target/M68k/M68kInstrControl.td
@@ -12,10 +12,10 @@
///
/// Machine:
///
-/// BRA [x] BSR [ ] Bcc [ ] DBcc [ ] FBcc [ ]
+/// BRA [x] BSR [ ] Bcc [~] DBcc [ ] FBcc [ ]
/// FDBcc [ ] FNOP [ ] FPn [ ] FScc [ ] FTST [ ]
/// JMP [~] JSR [x] NOP [x] RTD [!] RTR [ ]
-/// RTS [x] Scc [x] TST [ ]
+/// RTS [x] Scc [~] TST [ ]
///
/// Pseudo:
///
@@ -43,7 +43,9 @@
//===----------------------------------------------------------------------===//
let hasSideEffects = 0 in {
- def NOP : MxInst<(outs), (ins), "nop", [], MxEncFixed<0x4E71>>;
+ def NOP : MxInst<(outs), (ins), "nop", []> {
+ let Inst = (descend 0b0100, 0b1110, 0b0111, 0b0001);
+ }
}
@@ -61,51 +63,60 @@ let hasSideEffects = 0 in {
/// NE—Not equal VS—Overflow set
///
/// *Not applicable to the Bcc instructions.
-def MxCCt : MxBead4Bits<0b0000>;
-def MxCCf : MxBead4Bits<0b0001>;
-def MxCChi : MxBead4Bits<0b0010>;
-def MxCCls : MxBead4Bits<0b0011>;
-def MxCCcc : MxBead4Bits<0b0100>;
-def MxCCcs : MxBead4Bits<0b0101>;
-def MxCCne : MxBead4Bits<0b0110>;
-def MxCCeq : MxBead4Bits<0b0111>;
-def MxCCvc : MxBead4Bits<0b1000>;
-def MxCCvs : MxBead4Bits<0b1001>;
-def MxCCpl : MxBead4Bits<0b1010>;
-def MxCCmi : MxBead4Bits<0b1011>;
-def MxCCge : MxBead4Bits<0b1100>;
-def MxCClt : MxBead4Bits<0b1101>;
-def MxCCgt : MxBead4Bits<0b1110>;
-def MxCCle : MxBead4Bits<0b1111>;
+class MxEncCondOp<bits<4> cond> {
+ dag Value = (descend cond);
+}
+
+def MxCCt : MxEncCondOp<0b0000>;
+def MxCCf : MxEncCondOp<0b0001>;
+def MxCChi : MxEncCondOp<0b0010>;
+def MxCCls : MxEncCondOp<0b0011>;
+def MxCCcc : MxEncCondOp<0b0100>;
+def MxCCcs : MxEncCondOp<0b0101>;
+def MxCCne : MxEncCondOp<0b0110>;
+def MxCCeq : MxEncCondOp<0b0111>;
+def MxCCvc : MxEncCondOp<0b1000>;
+def MxCCvs : MxEncCondOp<0b1001>;
+def MxCCpl : MxEncCondOp<0b1010>;
+def MxCCmi : MxEncCondOp<0b1011>;
+def MxCCge : MxEncCondOp<0b1100>;
+def MxCClt : MxEncCondOp<0b1101>;
+def MxCCgt : MxEncCondOp<0b1110>;
+def MxCCle : MxEncCondOp<0b1111>;
+
+
/// --------------------------------+---------+---------
/// F E D C | B A 9 8 | 7 6 | 5 4 3 | 2 1 0
/// --------------------------------+---------+---------
/// 0 1 0 1 | CONDITION | 1 1 | MODE | REG
/// ----------------------------------------------------
-class MxSccEncoding<MxEncEA EA, MxEncExt EXT, MxBead4Bits CC>
- : MxEncoding<EA.Reg, EA.DA, EA.Mode, MxBead2Bits<0b11>, CC, MxBead4Bits<0b0101>,
- EXT.Imm, EXT.B8, EXT.Scale, EXT.WL, EXT.DAReg>;
let Uses = [CCR] in {
class MxSccR<string CC>
: MxInst<(outs MxDRD8:$dst), (ins), "s"#CC#"\t$dst",
- [(set i8:$dst, (MxSetCC !cast<PatLeaf>("MxCOND"#CC), CCR))],
- MxSccEncoding<MxEncEAd_0, MxExtEmpty,
- !cast<MxBead4Bits>("MxCC"#CC)>>;
+ [(set i8:$dst, (MxSetCC !cast<PatLeaf>("MxCOND"#CC), CCR))]> {
+ let Inst = (descend 0b0101, !cast<MxEncCondOp>("MxCC"#CC).Value, 0b11,
+ /*MODE without last bit*/0b00,
+ /*REGISTER prefixed with D/A bit*/(operand "$dst", 4));
+}
-class MxSccM<string CC, MxOperand MEMOpd, ComplexPattern MEMPat,
- MxEncEA EA, MxEncExt EXT>
+class MxSccM<string CC, MxOperand MEMOpd, ComplexPattern MEMPat, MxEncMemOp DST_ENC>
: MxInst<(outs), (ins MEMOpd:$dst), "s"#CC#"\t$dst",
- [(store (MxSetCC !cast<PatLeaf>("MxCOND"#CC), CCR), MEMPat:$dst)],
- MxSccEncoding<EA, EXT, !cast<MxBead4Bits>("MxCC"#CC)>>;
+ [(store (MxSetCC !cast<PatLeaf>("MxCOND"#CC), CCR), MEMPat:$dst)]> {
+ let Inst =
+ (ascend
+ (descend 0b0101, !cast<MxEncCondOp>("MxCC"#CC).Value, 0b11, DST_ENC.EA),
+ DST_ENC.Supplement
+ );
+}
}
foreach cc = [ "cc", "ls", "lt", "eq", "mi", "f", "ne", "ge",
"cs", "pl", "gt", "t", "hi", "vc", "le", "vs"] in {
def SET#"d8"#cc : MxSccR<cc>;
-def SET#"j8"#cc : MxSccM<cc, MxType8.JOp, MxType8.JPat, MxEncEAj_0, MxExtEmpty>;
-def SET#"p8"#cc : MxSccM<cc, MxType8.POp, MxType8.PPat, MxEncEAp_0, MxExtI16_0>;
+def SET#"j8"#cc : MxSccM<cc, MxType8.JOp, MxType8.JPat, MxEncAddrMode_j<"dst">>;
+def SET#"p8"#cc : MxSccM<cc, MxType8.POp, MxType8.PPat, MxEncAddrMode_p<"dst">>;
}
//===----------------------------------------------------------------------===//
@@ -118,13 +129,16 @@ def SET#"p8"#cc : MxSccM<cc, MxType8.POp, MxType8.PPat, MxEncEAp_0, MxExtI16_0>;
/// 0 1 0 0 1 1 1 0 1 1 | MODE | REG
///------------------------------+---------+---------
let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in
-class MxJMP<MxOperand LOCOp, MxEncEA EA, MxEncExt EXT>
- : MxInst<(outs), (ins LOCOp:$dst), "jmp\t$dst", [(brind iPTR:$dst)],
- MxEncoding<EA.Reg, EA.DA, EA.Mode, MxBead2Bits<0b11>,
- MxBead4Bits<0b1110>, MxBead4Bits<0b0100>,
- EXT.Imm, EXT.B8, EXT.Scale, EXT.WL, EXT.DAReg>>;
+class MxJMP<MxOperand LOCOp, MxEncMemOp DST_ENC>
+ : MxInst<(outs), (ins LOCOp:$dst), "jmp\t$dst", [(brind iPTR:$dst)]> {
+ let Inst =
+ (ascend
+ (descend 0b0100, 0b1110, 0b11, DST_ENC.EA),
+ DST_ENC.Supplement
+ );
+}
-def JMP32j : MxJMP<MxARI32, MxEncEAj_0, MxExtEmpty>;
+def JMP32j : MxJMP<MxARI32, MxEncAddrMode_j<"dst">>;
// FIXME Support 16 bit indirect jump.
@@ -147,20 +161,35 @@ def JMP32j : MxJMP<MxARI32, MxEncEAj_0, MxExtEmpty>;
/// 32-BIT DISPLACEMENT IF 8-BIT DISPLACEMENT = $FF
/// --------------------------------------------------
let isBranch = 1, isTerminator = 1, Uses = [CCR] in
-class MxBcc<string cc, Operand TARGET, MxEncoding ENC = MxEncEmpty>
- : MxInst<(outs), (ins TARGET:$dst), "b"#cc#"\t$dst", [], ENC>;
+class MxBcc<string cc, Operand TARGET, dag disp_8, dag disp_16_32>
+ : MxInst<(outs), (ins TARGET:$dst), "b"#cc#"\t$dst", []> {
+ // FIXME: If we want to avoid supplying disp_16_32 with empty
+ // (ascend) for 16/32 bits variants, we can use conditional
+ // bang operator like this:
+ // ```
+ // class MxBcc<string cc, Operand TARGET, int SIZE>
+ // ...
+ // let Inst = !cond(
+ // !eq(SIZE, 8): /* encoding for Bcc8 */
+ // !eq(SIZE, 16): /* encoding for Bcc16 */
+ // !eq(SIZE, 32): /* encoding for Bcc32 */
+ // );
+ let Inst =
+ (ascend
+ (descend 0b0110, !cast<MxEncCondOp>("MxCC"#cc).Value, disp_8),
+ disp_16_32
+ );
+}
foreach cc = [ "cc", "ls", "lt", "eq", "mi", "ne", "ge",
"cs", "pl", "gt", "hi", "vc", "le", "vs"] in {
def B#cc#"8"
: MxBcc<cc, MxBrTarget8,
- MxEncoding<MxBead8Disp<0>,
- !cast<MxBead4Bits>("MxCC"#cc), MxBead4Bits<0x6>>>;
+ (operand "$dst", 8, (encoder "encodePCRelImm<8>")), (ascend)>;
+
def B#cc#"16"
- : MxBcc<cc, MxBrTarget16,
- MxEncoding<MxBead4Bits<0x0>,
- MxBead4Bits<0x0>, !cast<MxBead4Bits>("MxCC"#cc),
- MxBead4Bits<0x6>, MxBead16Imm<0>>>;
+ : MxBcc<cc, MxBrTarget16, (descend 0b0000, 0b0000),
+ (operand "$dst", 16, (encoder "encodePCRelImm<16>"))>;
}
foreach cc = [ "cc", "ls", "lt", "eq", "mi", "ne", "ge",
@@ -178,17 +207,21 @@ def : Pat<(MxBrCond bb:$target, !cast<PatLeaf>("MxCOND"#cc), CCR),
/// -------------------------------------------------
/// 32-BIT DISPLACEMENT IF 8-BIT DISPLACEMENT = $FF
/// -------------------------------------------------
-let isBranch = 1, isTerminator = 1, isBarrier=1 in
-class MxBra<Operand TARGET, MxEncoding ENC = MxEncEmpty>
- : MxInst<(outs), (ins TARGET:$dst), "bra\t$dst", [], ENC>;
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in
+class MxBra<Operand TARGET, dag disp_8, dag disp_16_32>
+ : MxInst<(outs), (ins TARGET:$dst), "bra\t$dst", []> {
+ let Inst =
+ (ascend
+ (descend 0b0110, 0b0000, disp_8),
+ disp_16_32
+ );
+}
def BRA8 : MxBra<MxBrTarget8,
- MxEncoding<MxBead8Disp<0>, MxBead4Bits<0x0>,
- MxBead4Bits<0x6>>>;
-def BRA16 : MxBra<MxBrTarget16,
- MxEncoding<MxBead4Bits<0x0>, MxBead4Bits<0x0>,
- MxBead4Bits<0x0>, MxBead4Bits<0x6>,
- MxBead16Imm<0>>>;
+ (operand "$dst", 8, (encoder "encodePCRelImm<8>")), (ascend)>;
+
+def BRA16 : MxBra<MxBrTarget16, (descend 0b0000, 0b0000),
+ (operand "$dst", 16, (encoder "encodePCRelImm<16>"))>;
def : Pat<(br bb:$target), (BRA8 MxBrTarget8:$target)>;
@@ -208,16 +241,19 @@ let isCall = 1 in
///------------------------------+---------+---------
/// 0 1 0 0 1 1 1 0 1 0 | MODE | REG
///------------------------------+---------+---------
-class MxCall<MxOperand LOCOp, MxEncEA EA, MxEncExt EXT>
- : MxInst<(outs), (ins LOCOp:$dst), "jsr\t$dst", [],
- MxEncoding<EA.Reg, EA.DA, EA.Mode, MxBead2Bits<0b10>,
- MxBead4Bits<0b1110>, MxBead4Bits<0b0100>,
- EXT.Imm, EXT.B8, EXT.Scale, EXT.WL, EXT.DAReg>>;
+class MxCall<MxOperand LOCOp, MxEncMemOp DST_ENC>
+ : MxInst<(outs), (ins LOCOp:$dst), "jsr\t$dst", []> {
+ let Inst =
+ (ascend
+ (descend 0b0100, 0b1110, 0b10, DST_ENC.EA),
+ DST_ENC.Supplement
+ );
+}
-def CALLk : MxCall<MxPCI32, MxEncEAk, MxExtBrief_0>;
-def CALLq : MxCall<MxPCD32, MxEncEAq, MxExtI16_0>;
-def CALLb : MxCall<MxAL32, MxEncEAb, MxExtI32_0>;
-def CALLj : MxCall<MxARI32, MxEncEAj_0, MxExtEmpty>;
+def CALLk : MxCall<MxPCI32, MxEncAddrMode_k<"dst">>;
+def CALLq : MxCall<MxPCD32, MxEncAddrMode_q<"dst">>;
+def CALLb : MxCall<MxAL32, MxEncAddrMode_abs<"dst", true>>;
+def CALLj : MxCall<MxARI32, MxEncAddrMode_j<"dst">>;
multiclass CallPat<MxCall callOp, Predicate pred> {
let Predicates = [pred] in {
@@ -261,7 +297,9 @@ def TAILJMPj : MxPseudo<(outs), (ins MxARI32_TC:$dst)>;
let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in {
-def RTS : MxInst<(outs), (ins), "rts", [], MxEncFixed<0x4E75>>;
+def RTS : MxInst<(outs), (ins), "rts", []> {
+ let Inst = (descend 0b0100, 0b1110, 0b0111, 0b0101);
+}
let isCodeGenOnly = 1 in
def RET : MxPseudo<(outs), (ins i32imm:$adj, variable_ops),
diff --git a/llvm/lib/Target/M68k/M68kInstrData.td b/llvm/lib/Target/M68k/M68kInstrData.td
index 3dd5d9f8c7ac..863432b94005 100644
--- a/llvm/lib/Target/M68k/M68kInstrData.td
+++ b/llvm/lib/Target/M68k/M68kInstrData.td
@@ -42,290 +42,192 @@
/// -----------------------------------------------------
///
/// NOTE Move requires EA X version for direct register destination(0)
-class MxMoveEncoding<MxBead2Bits size,
- MxEncEA srcEA, MxEncExt srcExt,
- MxEncEA dstEA, MxEncExt dstExt>
- : MxEncoding<srcEA.Reg, srcEA.DA, srcEA.Mode, dstEA.DA, dstEA.Mode, dstEA.Reg,
- size, MxBead2Bits<0b00>,
- srcExt.Imm, srcExt.B8, srcExt.Scale, srcExt.WL, srcExt.DAReg,
- dstExt.Imm, dstExt.B8, dstExt.Scale, dstExt.WL, dstExt.DAReg>;
-
-/// MOVE has alternate size encoding
-class MxMoveSize<bits<2> value> : MxBead2Bits<value>;
+
+// MOVE has a different size encoding.
+class MxMoveSize<bits<2> value> {
+ bits<2> Value = value;
+}
def MxMoveSize8 : MxMoveSize<0b01>;
def MxMoveSize16 : MxMoveSize<0b11>;
def MxMoveSize32 : MxMoveSize<0b10>;
-let Defs = [CCR] in
-class MxMove<string size, dag outs, dag ins, list<dag> pattern, MxEncoding enc>
- : MxInst<outs, ins, "move."#size#"\t$src, $dst", pattern, enc>;
-
-class MxMove_RR<MxType DST, MxType SRC, MxMoveEncoding ENC>
- : MxMove<DST.Prefix, (outs DST.ROp:$dst), (ins SRC.ROp:$src),
- [(null_frag)], ENC>;
-
-let mayStore = 1 in {
-class MxMove_MR<MxOperand MEMOpd, ComplexPattern MEMPat, MxType REG,
- MxMoveEncoding ENC>
- : MxMove<REG.Prefix, (outs), (ins MEMOpd:$dst, REG.ROp:$src),
- [(store REG.VT:$src, MEMPat:$dst)], ENC>;
-
-class MxMove_MI<MxOperand MEMOpd, ComplexPattern MEMPat, MxType TYPE,
- MxMoveEncoding ENC>
- : MxMove<TYPE.Prefix, (outs), (ins MEMOpd:$dst, TYPE.IOp:$src),
- [(store TYPE.IPat:$src, MEMPat:$dst)], ENC>;
-} // let mayStore = 1
-
-class MxMove_RI<MxType DST, MxMoveEncoding ENC>
- : MxMove<DST.Prefix, (outs DST.ROp:$dst), (ins DST.IOp:$src),
- [(set DST.VT:$dst, DST.IPat:$src)], ENC>;
-
-
-let mayLoad = 1 in
-class MxMove_RM<MxType REG, MxOperand MEMOpd, ComplexPattern MEMPat,
- MxBead2Bits SIZE,
- MxEncEA SRCEA, MxEncExt SRCEXT,
- MxEncEA DSTEA, MxEncExt DSTEXT>
- : MxMove<REG.Prefix, (outs REG.ROp:$dst), (ins MEMOpd:$src),
- [(set REG.VT:$dst, (REG.Load MEMPat:$src))],
- MxMoveEncoding<SIZE, SRCEA, SRCEXT, DSTEA, DSTEXT>>;
-
-multiclass MMxMove_RM<MxType REG, MxMoveSize SIZE, MxEncEA EA_0> {
-
- // REG <- (An)+
- def NAME#REG.OOp.Letter#REG.Postfix : MxMove_RM<REG, REG.OOp, REG.OPat,
- SIZE, MxEncEAo_1, MxExtEmpty, EA_0, MxExtEmpty>;
-
- // REG <- -(An)
- def NAME#REG.EOp.Letter#REG.Postfix : MxMove_RM<REG, REG.EOp, REG.EPat,
- SIZE, MxEncEAe_1, MxExtEmpty, EA_0, MxExtEmpty>;
-
- // REG <- (i,PC,Xn)
- def NAME#REG.KOp.Letter#REG.Postfix : MxMove_RM<REG, REG.KOp, REG.KPat,
- SIZE, MxEncEAk, MxExtBrief_1, EA_0, MxExtEmpty>;
-
- // REG <- (i,PC)
- def NAME#REG.QOp.Letter#REG.Postfix : MxMove_RM<REG, REG.QOp, REG.QPat,
- SIZE, MxEncEAq, MxExtI16_1, EA_0, MxExtEmpty>;
-
- // REG <- (i,An,Xn)
- def NAME#REG.FOp.Letter#REG.Postfix : MxMove_RM<REG, REG.FOp, REG.FPat,
- SIZE, MxEncEAf_1, MxExtBrief_1, EA_0, MxExtEmpty>;
-
- // REG <- (i,An)
- def NAME#REG.POp.Letter#REG.Postfix : MxMove_RM<REG, REG.POp, REG.PPat,
- SIZE, MxEncEAp_1, MxExtI16_1, EA_0, MxExtEmpty>;
-
- // REG <- (ABS)
- def NAME#REG.BOp.Letter#REG.Postfix : MxMove_RM<REG, REG.BOp, REG.BPat,
- SIZE, MxEncEAb, MxExtI32_1, EA_0, MxExtEmpty>;
-
- // REG <- (An)
- def NAME#REG.JOp.Letter#REG.Postfix : MxMove_RM<REG, REG.JOp, REG.JPat,
- SIZE, MxEncEAj_1, MxExtEmpty, EA_0, MxExtEmpty>;
+class MxMoveEncoding<MxMoveSize size, MxEncMemOp dst_enc, MxEncMemOp src_enc> {
+ dag Value = (ascend
+ (descend 0b00, size.Value,
+ !cond(
+ !eq(!getdagop(dst_enc.EA), descend): !setdagop(dst_enc.EA, ascend),
+ !eq(!getdagop(dst_enc.EA), ascend): !setdagop(dst_enc.EA, descend)),
+ src_enc.EA),
+ // Source extension
+ src_enc.Supplement,
+ // Destination extension
+ dst_enc.Supplement
+ );
}
-let mayLoad = 1, mayStore = 1 in {
-class MxMove_MM<string SIZE, PatFrag LOAD,
- MxOperand DSTOpd, ComplexPattern DSTPat,
- MxOperand SRCOpd, ComplexPattern SRCPat,
- MxBead2Bits ESIZE,
- MxEncEA SRCEA, MxEncExt SRCEXT,
- MxEncEA DSTEA, MxEncExt DSTEXT>
- : MxMove<SIZE, (outs), (ins DSTOpd:$dst, SRCOpd:$src),
- [(store (LOAD SRCPat:$src), DSTPat:$dst)],
- MxMoveEncoding<ESIZE, SRCEA, SRCEXT, DSTEA, DSTEXT>>;
-} // let mayLoad = 1, mayStore = 1
-
-multiclass MMxMove_MM<MxType TYPE, MxOperand DSTOpd, ComplexPattern DSTPat,
- MxMoveSize SIZE, MxEncEA EA_0, MxEncExt EXT_0> {
-
- // MEM <- (An)+
- def NAME#TYPE.OOp.Letter#TYPE.Postfix
- : MxMove_MM<TYPE.Prefix, TYPE.Load, DSTOpd, DSTPat, TYPE.OOp, TYPE.OPat,
- SIZE, MxEncEAo_1, MxExtEmpty, EA_0, EXT_0>;
-
- // MEM <- -(An)
- def NAME#TYPE.EOp.Letter#TYPE.Postfix
- : MxMove_MM<TYPE.Prefix, TYPE.Load, DSTOpd, DSTPat, TYPE.EOp, TYPE.EPat,
- SIZE, MxEncEAe_1, MxExtEmpty, EA_0, EXT_0>;
-
- // MEM <- (i,An)
- def NAME#TYPE.POp.Letter#TYPE.Postfix
- : MxMove_MM<TYPE.Prefix, TYPE.Load, DSTOpd, DSTPat, TYPE.POp, TYPE.PPat,
- SIZE, MxEncEAp_1, MxExtI16_1, EA_0, EXT_0>;
-
- // MEM <- (i,An,Xn)
- def NAME#TYPE.FOp.Letter#TYPE.Postfix
- : MxMove_MM<TYPE.Prefix, TYPE.Load, DSTOpd, DSTPat, TYPE.FOp, TYPE.FPat,
- SIZE, MxEncEAf_1, MxExtBrief_1, EA_0, EXT_0>;
-
- // MEM <- (i,PC,Xn)
- def NAME#TYPE.KOp.Letter#TYPE.Postfix
- : MxMove_MM<TYPE.Prefix, TYPE.Load, DSTOpd, DSTPat, TYPE.KOp, TYPE.KPat,
- SIZE, MxEncEAk, MxExtBrief_1, EA_0, EXT_0>;
-
- // MEM <- (i,PC)
- def NAME#TYPE.QOp.Letter#TYPE.Postfix
- : MxMove_MM<TYPE.Prefix, TYPE.Load, DSTOpd, DSTPat, TYPE.QOp, TYPE.QPat,
- SIZE, MxEncEAq, MxExtI16_1, EA_0, EXT_0>;
-
- // MEM <- (ABS)
- def NAME#TYPE.BOp.Letter#TYPE.Postfix
- : MxMove_MM<TYPE.Prefix, TYPE.Load, DSTOpd, DSTPat, TYPE.BOp, TYPE.BPat,
- SIZE, MxEncEAb, MxExtI32_1, EA_0, EXT_0>;
-
- // MEM <- (An)
- def NAME#TYPE.JOp.Letter#TYPE.Postfix
- : MxMove_MM<TYPE.Prefix, TYPE.Load, DSTOpd, DSTPat, TYPE.JOp, TYPE.JPat,
- SIZE, MxEncEAj_1, MxExtEmpty, EA_0, EXT_0>;
+// Special encoding for Xn
+class MxMoveEncAddrMode_r<string reg_opnd> : MxEncMemOp {
+ let EA = (descend (descend 0b00, (slice "$"#reg_opnd, 3, 3)),
+ (operand "$"#reg_opnd, 3));
}
-def MOV8dd
- : MxMove_RR<MxType8d, MxType8d,
- MxMoveEncoding<MxMoveSize8, MxEncEAd_1, MxExtEmpty, MxEncEAd_0, MxExtEmpty>>;
+// TODO: Generalize and adopt this utility in other .td files as well.
+multiclass MxMoveOperandEncodings<string opnd_name> {
+ // Dn
+ def MxMove#NAME#OpEnc_d : MxEncAddrMode_d<opnd_name>;
+ // An
+ def MxMove#NAME#OpEnc_a : MxEncAddrMode_a<opnd_name>;
+ // Xn
+ def MxMove#NAME#OpEnc_r : MxMoveEncAddrMode_r<opnd_name>;
+ // (An)+
+ def MxMove#NAME#OpEnc_o : MxEncAddrMode_o<opnd_name>;
+ // -(An)
+ def MxMove#NAME#OpEnc_e : MxEncAddrMode_e<opnd_name>;
+ // (i,PC,Xn)
+ def MxMove#NAME#OpEnc_k : MxEncAddrMode_k<opnd_name>;
+ // (i,PC)
+ def MxMove#NAME#OpEnc_q : MxEncAddrMode_q<opnd_name>;
+ // (i,An,Xn)
+ def MxMove#NAME#OpEnc_f : MxEncAddrMode_f<opnd_name>;
+ // (i,An)
+ def MxMove#NAME#OpEnc_p : MxEncAddrMode_p<opnd_name>;
+ // (ABS).L
+ def MxMove#NAME#OpEnc_b : MxEncAddrMode_abs<opnd_name, /*W/L=*/true>;
+ // (An)
+ def MxMove#NAME#OpEnc_j : MxEncAddrMode_j<opnd_name>;
+}
-// M <- R
-def MOV8fd : MxMove_MR<MxType8.FOp, MxType8.FPat, MxType8d,
- MxMoveEncoding<MxMoveSize8,
- /*src*/ MxEncEAd_1, MxExtEmpty,
- /*dst*/ MxEncEAf_0, MxExtBrief_0>>;
+defm Src : MxMoveOperandEncodings<"src">;
+defm Dst : MxMoveOperandEncodings<"dst">;
-def MOV8pd : MxMove_MR<MxType8.POp, MxType8.PPat, MxType8d,
- MxMoveEncoding<MxMoveSize8,
- /*src*/ MxEncEAd_1, MxExtEmpty,
- /*dst*/ MxEncEAp_0, MxExtI16_0>>;
+defvar MxMoveSupportedAMs = ["o", "e", "k", "q", "f", "p", "b", "j"];
-def MOV8ed : MxMove_MR<MxType8.EOp, MxType8.EPat, MxType8d,
- MxMoveEncoding<MxMoveSize8,
- /*src*/ MxEncEAd_1, MxExtEmpty,
- /*dst*/ MxEncEAe_0, MxExtEmpty>>;
+let Defs = [CCR] in
+class MxMove<string size, dag outs, dag ins, list<dag> pattern, MxMoveEncoding enc>
+ : MxInst<outs, ins, "move."#size#"\t$src, $dst", pattern> {
+ let Inst = enc.Value;
+}
-def MOV8od : MxMove_MR<MxType8.OOp, MxType8.OPat, MxType8d,
- MxMoveEncoding<MxMoveSize8,
- /*src*/ MxEncEAd_1, MxExtEmpty,
- /*dst*/ MxEncEAo_0, MxExtEmpty>>;
+// R <- R
+class MxMove_RR<MxType TYPE, string DST_REG, string SRC_REG,
+ MxMoveEncoding ENC,
+ MxOpBundle DST = !cast<MxOpBundle>("MxOp"#TYPE.Size#"AddrMode_"#DST_REG),
+ MxOpBundle SRC = !cast<MxOpBundle>("MxOp"#TYPE.Size#"AddrMode_"#SRC_REG)>
+ : MxMove<TYPE.Prefix,
+ (outs DST.Op:$dst), (ins SRC.Op:$src),
+ [(null_frag)], ENC>;
-def MOV8bd : MxMove_MR<MxType8.BOp, MxType8.BPat, MxType8d,
- MxMoveEncoding<MxMoveSize8,
- /*src*/ MxEncEAd_1, MxExtEmpty,
- /*dst*/ MxEncEAb, MxExtI32_0>>;
+foreach DST_REG = ["r", "a"] in {
+ foreach SRC_REG = ["r", "a"] in
+ foreach TYPE = [MxType16, MxType32] in
+ def MOV # TYPE.Size # DST_REG # SRC_REG # TYPE.Postfix
+ : MxMove_RR<TYPE, DST_REG, SRC_REG,
+ MxMoveEncoding<!cast<MxMoveSize>("MxMoveSize"#TYPE.Size),
+ !cast<MxEncMemOp>("MxMoveDstOpEnc_"#DST_REG),
+ !cast<MxEncMemOp>("MxMoveSrcOpEnc_"#SRC_REG)>>;
+} // foreach DST_REG
+foreach TYPE = [MxType8, MxType16, MxType32] in
+def MOV # TYPE.Size # dd # TYPE.Postfix
+ : MxMove_RR<TYPE, "d", "d",
+ MxMoveEncoding<!cast<MxMoveSize>("MxMoveSize"#TYPE.Size),
+ MxMoveDstOpEnc_d, MxMoveSrcOpEnc_d>>;
-def MOV8jd : MxMove_MR<MxType8.JOp, MxType8.JPat, MxType8d,
- MxMoveEncoding<MxMoveSize8,
- /*src*/ MxEncEAd_1, MxExtEmpty,
- /*dst*/ MxEncEAj_0, MxExtEmpty>>;
+// M <- R
+let mayStore = 1 in {
+class MxMove_MR<MxType TYPE, MxOpBundle DST, string SRC_REG, MxMoveEncoding ENC,
+ MxOpBundle SRC = !cast<MxOpBundle>("MxOp"#TYPE.Size#"AddrMode_"#SRC_REG)>
+ : MxMove<TYPE.Prefix, (outs), (ins DST.Op:$dst, SRC.Op:$src),
+ [(store TYPE.VT:$src, DST.Pat:$dst)], ENC>;
+
+class MxMove_MI<MxType TYPE, MxOpBundle DST, MxMoveEncoding ENC,
+ MxImmOpBundle SRC = !cast<MxImmOpBundle>("MxOp"#TYPE.Size#"AddrMode_i")>
+ : MxMove<TYPE.Prefix, (outs), (ins DST.Op:$dst, SRC.Op:$src),
+ [(store SRC.ImmPat:$src, DST.Pat:$dst)], ENC>;
+} // let mayStore = 1
+foreach REG = ["r", "a", "d"] in
+foreach AM = MxMoveSupportedAMs in {
+ foreach TYPE = !if(!eq(REG, "d"), [MxType8, MxType16, MxType32], [MxType16, MxType32]) in
+ def MOV # TYPE.Size # AM # REG # TYPE.Postfix
+ : MxMove_MR<TYPE, !cast<MxOpBundle>("MxOp"#TYPE.Size#"AddrMode_"#AM), REG,
+ MxMoveEncoding<!cast<MxMoveSize>("MxMoveSize"#TYPE.Size),
+ !cast<MxEncMemOp>("MxMoveDstOpEnc_"#AM),
+ !cast<MxEncMemOp>("MxMoveSrcOpEnc_"#REG)>>;
+} // foreach AM
+
+foreach AM = MxMoveSupportedAMs in {
+ foreach TYPE = [MxType8, MxType16, MxType32] in
+ def MOV # TYPE.Size # AM # i # TYPE.Postfix
+ : MxMove_MI<TYPE, !cast<MxOpBundle>("MxOp"#TYPE.Size#"AddrMode_"#AM),
+ MxMoveEncoding<!cast<MxMoveSize>("MxMoveSize"#TYPE.Size),
+ !cast<MxEncMemOp>("MxMoveDstOpEnc_"#AM),
+ MxEncAddrMode_i<"src", TYPE.Size>>>;
+} // foreach AM
// R <- I
-def MOV8di : MxMove_RI<MxType8d,
- MxMoveEncoding<MxMoveSize8, MxEncEAi, MxExtI8_1, MxEncEAd_0, MxExtEmpty>>;
-
-foreach S = [16, 32] in {
- foreach D = [ "r", "a" ] in {
-
- foreach O = [ "r", "a" ] in {
- def MOV#S#D#O : MxMove_RR<
- !cast<MxType>("MxType"#S#D),
- !cast<MxType>("MxType"#S#O),
- MxMoveEncoding<!cast<MxMoveSize>("MxMoveSize"#S),
- !cast<MxEncEA>("MxEncEA"#D#"_1"), MxExtEmpty,
- !cast<MxEncEA>("MxEncEA"#D#"_0_reflected"), MxExtEmpty>>;
- }
-
- // M <- R
- def MOV#S#"f"#D : MxMove_MR<
- !cast<MxType>("MxType"#S).FOp,
- !cast<MxType>("MxType"#S).FPat,
- !cast<MxType>("MxType"#S#D),
- MxMoveEncoding<!cast<MxMoveSize>("MxMoveSize"#S),
- !cast<MxEncEA>("MxEncEA"#D#"_1"), MxExtEmpty,
- MxEncEAf_0, MxExtBrief_0>>;
-
- def MOV#S#"p"#D : MxMove_MR<
- !cast<MxType>("MxType"#S).POp,
- !cast<MxType>("MxType"#S).PPat,
- !cast<MxType>("MxType"#S#D),
- MxMoveEncoding<!cast<MxMoveSize>("MxMoveSize"#S),
- !cast<MxEncEA>("MxEncEA"#D#"_1"), MxExtEmpty,
- MxEncEAp_0, MxExtI16_0>>;
-
- def MOV#S#"e"#D : MxMove_MR<
- !cast<MxType>("MxType"#S).EOp,
- !cast<MxType>("MxType"#S).EPat,
- !cast<MxType>("MxType"#S#D),
- MxMoveEncoding<!cast<MxMoveSize>("MxMoveSize"#S),
- !cast<MxEncEA>("MxEncEA"#D#"_1"), MxExtEmpty,
- MxEncEAe_0, MxExtEmpty>>;
-
- def MOV#S#"o"#D : MxMove_MR<
- !cast<MxType>("MxType"#S).OOp,
- !cast<MxType>("MxType"#S).OPat,
- !cast<MxType>("MxType"#S#D),
- MxMoveEncoding<!cast<MxMoveSize>("MxMoveSize"#S),
- !cast<MxEncEA>("MxEncEA"#D#"_1"), MxExtEmpty,
- MxEncEAo_0, MxExtEmpty>>;
-
- def MOV#S#"b"#D : MxMove_MR<
- !cast<MxType>("MxType"#S).BOp,
- !cast<MxType>("MxType"#S).BPat,
- !cast<MxType>("MxType"#S#D),
- MxMoveEncoding<!cast<MxMoveSize>("MxMoveSize"#S),
- !cast<MxEncEA>("MxEncEA"#D#"_1"), MxExtEmpty,
- MxEncEAb, MxExtI32_0>>;
-
- def MOV#S#"j"#D : MxMove_MR<
- !cast<MxType>("MxType"#S).JOp,
- !cast<MxType>("MxType"#S).JPat,
- !cast<MxType>("MxType"#S#D),
- MxMoveEncoding<!cast<MxMoveSize>("MxMoveSize"#S),
- !cast<MxEncEA>("MxEncEA"#D#"_1"), MxExtEmpty,
- MxEncEAj_0, MxExtEmpty>>;
-
-
- // R <- I
- def MOV#S#D#"i" : MxMove_RI<
- !cast<MxType>("MxType"#S#D),
- MxMoveEncoding<!cast<MxMoveSize>("MxMoveSize"#S),
- MxEncEAi, !cast<MxEncExt>("MxExtI"#S#"_1"),
- !cast<MxEncEA>("MxEncEA"#D#"_0_reflected"), MxExtEmpty>>;
- }
-}
+class MxMove_RI<MxType TYPE, string DST_REG, MxMoveEncoding ENC,
+ MxImmOpBundle SRC = !cast<MxImmOpBundle>("MxOp"#TYPE.Size#"AddrMode_i"),
+ MxOpBundle DST = !cast<MxOpBundle>("MxOp"#TYPE.Size#"AddrMode_"#DST_REG)>
+ : MxMove<TYPE.Prefix, (outs DST.Op:$dst), (ins SRC.Op:$src),
+ [(set TYPE.VT:$dst, SRC.ImmPat:$src)], ENC>;
+
+foreach REG = ["r", "a", "d"] in {
+ foreach TYPE = !if(!eq(REG, "d"), [MxType8, MxType16, MxType32], [MxType16, MxType32]) in
+ def MOV # TYPE.Size # REG # i # TYPE.Postfix
+ : MxMove_RI<TYPE, REG,
+ MxMoveEncoding<!cast<MxMoveSize>("MxMoveSize"#TYPE.Size),
+ !cast<MxEncMemOp>("MxMoveDstOpEnc_"#REG),
+ MxEncAddrMode_i<"src", TYPE.Size>>>;
+} // foreach REG
-// M <- I
-foreach S = [8, 16, 32] in {
- def MOV#S#"f"#"i" : MxMove_MI<
- !cast<MxType>("MxType"#S).FOp,
- !cast<MxType>("MxType"#S).FPat,
- !cast<MxType>("MxType"#S),
- MxMoveEncoding<!cast<MxMoveSize>("MxMoveSize"#S),
- MxEncEAi, !cast<MxEncExt>("MxExtI"#S#"_1"),
- MxEncEAf_0, MxExtBrief_0>>;
-
- def MOV#S#"p"#"i" : MxMove_MI<
- !cast<MxType>("MxType"#S).POp,
- !cast<MxType>("MxType"#S).PPat,
- !cast<MxType>("MxType"#S),
- MxMoveEncoding<!cast<MxMoveSize>("MxMoveSize"#S),
- MxEncEAi, !cast<MxEncExt>("MxExtI"#S#"_1"),
- MxEncEAp_0, MxExtI16_0>>;
-
- def MOV#S#"b"#"i" : MxMove_MI<
- !cast<MxType>("MxType"#S).BOp,
- !cast<MxType>("MxType"#S).BPat,
- !cast<MxType>("MxType"#S),
- MxMoveEncoding<!cast<MxMoveSize>("MxMoveSize"#S),
- MxEncEAi, !cast<MxEncExt>("MxExtI"#S#"_1"),
- MxEncEAb, MxExtI32_0>>;
-
- def MOV#S#"j"#"i" : MxMove_MI<
- !cast<MxType>("MxType"#S).JOp,
- !cast<MxType>("MxType"#S).JPat,
- !cast<MxType>("MxType"#S),
- MxMoveEncoding<!cast<MxMoveSize>("MxMoveSize"#S),
- MxEncEAi, !cast<MxEncExt>("MxExtI"#S#"_1"),
- MxEncEAj_0, MxExtEmpty>>;
-}
+// R <- M
+let mayLoad = 1 in
+class MxMove_RM<MxType TYPE, string DST_REG, MxOpBundle SRC, MxEncMemOp SRC_ENC,
+ MxMoveSize SIZE_ENC = !cast<MxMoveSize>("MxMoveSize"#TYPE.Size),
+ MxOpBundle DST = !cast<MxOpBundle>("MxOp"#TYPE.Size#"AddrMode_"#DST_REG),
+ MxEncMemOp DST_ENC = !cast<MxEncMemOp>("MxMoveDstOpEnc_"#DST_REG)>
+ : MxMove<TYPE.Prefix, (outs DST.Op:$dst), (ins SRC.Op:$src),
+ [(set TYPE.VT:$dst, (TYPE.Load SRC.Pat:$src))],
+ MxMoveEncoding<SIZE_ENC, DST_ENC, SRC_ENC>>;
+
+foreach REG = ["r", "a", "d"] in
+foreach AM = MxMoveSupportedAMs in {
+ foreach TYPE = !if(!eq(REG, "d"), [MxType8, MxType16, MxType32], [MxType16, MxType32]) in
+ def MOV # TYPE.Size # REG # AM # TYPE.Postfix
+ : MxMove_RM<TYPE, REG, !cast<MxOpBundle>("MxOp"#TYPE.Size#"AddrMode_"#AM),
+ !cast<MxEncMemOp>("MxMoveSrcOpEnc_"#AM)>;
+} // foreach AM
+
+// Tail call version
+let Pattern = [(null_frag)] in {
+ foreach REG = ["r", "a"] in
+ foreach AM = MxMoveSupportedAMs in {
+ foreach TYPE = [MxType16, MxType32] in
+ def MOV # TYPE.Size # REG # AM # _TC
+ : MxMove_RM<TYPE, REG, !cast<MxOpBundle>("MxOp"#TYPE.Size#"AddrMode_"#AM),
+ !cast<MxEncMemOp>("MxMoveSrcOpEnc_"#AM)> {
+ let isCodeGenOnly = true;
+ }
+ } // foreach AM
+} // let Pattern
+
+let mayLoad = 1, mayStore = 1 in
+class MxMove_MM<MxType TYPE, MxOpBundle DST, MxOpBundle SRC,
+ MxEncMemOp DST_ENC, MxEncMemOp SRC_ENC>
+ : MxMove<TYPE.Prefix, (outs), (ins DST.Op:$dst, SRC.Op:$src),
+ [(store (TYPE.Load SRC.Pat:$src), DST.Pat:$dst)],
+ MxMoveEncoding<!cast<MxMoveSize>("MxMoveSize"#TYPE.Size),
+ DST_ENC, SRC_ENC>>;
+
+foreach DST_AM = MxMoveSupportedAMs in
+foreach SRC_AM = MxMoveSupportedAMs in {
+ foreach TYPE = [MxType8, MxType16, MxType32] in
+ def MOV # TYPE.Size # DST_AM # SRC_AM # TYPE.Postfix
+ : MxMove_MM<TYPE, !cast<MxOpBundle>("MxOp"#TYPE.Size#"AddrMode_"#DST_AM),
+ !cast<MxOpBundle>("MxOp"#TYPE.Size#"AddrMode_"#SRC_AM),
+ !cast<MxEncMemOp>("MxMoveDstOpEnc_"#DST_AM),
+ !cast<MxEncMemOp>("MxMoveSrcOpEnc_"#SRC_AM)>;
+} // foreach SRC_AM
// Store ABS(basically pointer) as Immdiate to Mem
def : Pat<(store MxType32.BPat :$src, MxType32.PPat :$dst),
@@ -340,66 +242,6 @@ def : Pat<(store MxType32.BPat :$src, MxType32.BPat :$dst),
def : Pat<(store MxType32.BPat :$src, MxType32.JPat :$dst),
(MOV32ji MxType32.JOp :$dst, MxType32.IOp :$src)>;
-// R <- M
-defm MOV8d : MMxMove_RM<MxType8d, MxMoveSize8, MxEncEAd_0>;
-
-defm MOV16r : MMxMove_RM<MxType16r, MxMoveSize16, MxEncEAr_0_reflected>;
-defm MOV16a : MMxMove_RM<MxType16a, MxMoveSize16, MxEncEAa_0>;
-
-defm MOV32r : MMxMove_RM<MxType32r, MxMoveSize32, MxEncEAr_0_reflected>;
-defm MOV32a : MMxMove_RM<MxType32a, MxMoveSize32, MxEncEAa_0>;
-
-let Pattern = [(null_frag)] in {
-defm MOV16r : MMxMove_RM<MxType16r_TC, MxMoveSize16, MxEncEAr_0_reflected>;
-defm MOV16a : MMxMove_RM<MxType16a_TC, MxMoveSize16, MxEncEAa_0>;
-
-defm MOV32r : MMxMove_RM<MxType32r_TC, MxMoveSize32, MxEncEAr_0_reflected>;
-defm MOV32a : MMxMove_RM<MxType32a_TC, MxMoveSize32, MxEncEAa_0>;
-} // Pattern
-
-// M <- M
-defm MOV8p : MMxMove_MM<MxType8, MxType8.POp, MxType8.PPat,
- MxMoveSize8, MxEncEAp_0, MxExtI16_0>;
-defm MOV16p : MMxMove_MM<MxType16, MxType16.POp, MxType16.PPat,
- MxMoveSize16, MxEncEAp_0, MxExtI16_0>;
-defm MOV32p : MMxMove_MM<MxType32, MxType32.POp, MxType32.PPat,
- MxMoveSize32, MxEncEAp_0, MxExtI16_0>;
-
-defm MOV8f : MMxMove_MM<MxType8, MxType8.FOp, MxType8.FPat,
- MxMoveSize8, MxEncEAf_0, MxExtBrief_0>;
-defm MOV16f : MMxMove_MM<MxType16, MxType16.FOp, MxType16.FPat,
- MxMoveSize16, MxEncEAf_0, MxExtBrief_0>;
-defm MOV32f : MMxMove_MM<MxType32, MxType32.FOp, MxType32.FPat,
- MxMoveSize32, MxEncEAf_0, MxExtBrief_0>;
-
-defm MOV8b : MMxMove_MM<MxType8, MxType8.BOp, MxType8.BPat,
- MxMoveSize8, MxEncEAb, MxExtI32_0>;
-defm MOV16b : MMxMove_MM<MxType16, MxType16.BOp, MxType16.BPat,
- MxMoveSize16, MxEncEAb, MxExtI32_0>;
-defm MOV32b : MMxMove_MM<MxType32, MxType32.BOp, MxType32.BPat,
- MxMoveSize32, MxEncEAb, MxExtI32_0>;
-
-defm MOV8e : MMxMove_MM<MxType8, MxType8.EOp, MxType8.EPat,
- MxMoveSize8, MxEncEAe_0, MxExtEmpty>;
-defm MOV16e : MMxMove_MM<MxType16, MxType16.EOp, MxType16.EPat,
- MxMoveSize16, MxEncEAe_0, MxExtEmpty>;
-defm MOV32e : MMxMove_MM<MxType32, MxType32.EOp, MxType32.EPat,
- MxMoveSize32, MxEncEAe_0, MxExtEmpty>;
-
-defm MOV8o : MMxMove_MM<MxType8, MxType8.OOp, MxType8.OPat,
- MxMoveSize8, MxEncEAo_0, MxExtEmpty>;
-defm MOV16o : MMxMove_MM<MxType16, MxType16.OOp, MxType16.OPat,
- MxMoveSize16, MxEncEAo_0, MxExtEmpty>;
-defm MOV32o : MMxMove_MM<MxType32, MxType32.OOp, MxType32.OPat,
- MxMoveSize32, MxEncEAo_0, MxExtEmpty>;
-
-defm MOV8j : MMxMove_MM<MxType8, MxType8.JOp, MxType8.JPat,
- MxMoveSize8, MxEncEAj_0, MxExtEmpty>;
-defm MOV16j : MMxMove_MM<MxType16, MxType16.JOp, MxType16.JPat,
- MxMoveSize16, MxEncEAj_0, MxExtEmpty>;
-defm MOV32j : MMxMove_MM<MxType32, MxType32.JOp, MxType32.JPat,
- MxMoveSize32, MxEncEAj_0, MxExtEmpty>;
-
//===----------------------------------------------------------------------===//
// MOVEM
//
@@ -407,12 +249,12 @@ defm MOV32j : MMxMove_MM<MxType32, MxType32.JOp, MxType32.JPat,
//===----------------------------------------------------------------------===//
// Direction
-def MxMOVEM_MR : MxBead1Bit<0>;
-def MxMOVEM_RM : MxBead1Bit<1>;
+defvar MxMOVEM_MR = false;
+defvar MxMOVEM_RM = true;
// Size
-def MxMOVEM_W : MxBead1Bit<0>;
-def MxMOVEM_L : MxBead1Bit<1>;
+defvar MxMOVEM_W = false;
+defvar MxMOVEM_L = true;
/// ---------------+-------------+-------------+---------
/// F E D C B | A | 9 8 7 | 6 | 5 4 3 | 2 1 0
@@ -423,31 +265,47 @@ def MxMOVEM_L : MxBead1Bit<1>;
/// -----------------------------------------------------
/// D - direction(RM,MR)
/// S - size(W,L)
-class MxMOVEMEncoding<MxEncEA EA, MxEncExt EXT, MxBead1Bit SIZE, MxBead1Bit DIR,
- MxBead16Imm IMM>
- : MxEncoding<EA.Reg, EA.DA, EA.Mode, SIZE, MxBead3Bits<0b001>, DIR,
- MxBead1Bit<1>, MxBead4Bits<0b0100>, IMM,
- EXT.Imm, EXT.B8, EXT.Scale, EXT.WL, EXT.DAReg>;
+class MxMOVEMEncoding<MxEncMemOp opnd_enc, bit size, bit direction,
+ string mask_op_name> {
+ dag Value = (ascend
+ (descend 0b01001, direction, 0b001, size, opnd_enc.EA),
+ // Mask
+ (operand "$"#mask_op_name, 16),
+ opnd_enc.Supplement
+ );
+}
let mayStore = 1 in
-class MxMOVEM_MR<MxType TYPE, MxBead1Bit SIZE,
- MxOperand MEMOp, MxEncEA EA, MxEncExt EXT>
+class MxMOVEM_MR<MxType TYPE, bit SIZE_ENC,
+ MxOperand MEMOp, MxEncMemOp MEM_ENC>
: MxInst<(outs), (ins MEMOp:$dst, MxMoveMask:$mask),
- "movem."#TYPE.Prefix#"\t$mask, $dst", [],
- MxMOVEMEncoding<EA, EXT, SIZE, MxMOVEM_MR, MxBead16Imm<1>>>;
+ "movem."#TYPE.Prefix#"\t$mask, $dst", []> {
+ let Inst = MxMOVEMEncoding<MEM_ENC, SIZE_ENC, MxMOVEM_MR, "mask">.Value;
+}
+
+foreach AM = MxMoveSupportedAMs in {
+ foreach TYPE = [MxType16, MxType32] in
+ def MOVM # TYPE.Size # AM # m # TYPE.Postfix
+ : MxMOVEM_MR<TYPE, !if(!eq(TYPE, MxType16), MxMOVEM_W, MxMOVEM_L),
+ !cast<MxOpBundle>("MxOp"#TYPE.Size#"AddrMode_"#AM).Op,
+ !cast<MxEncMemOp>("MxMoveDstOpEnc_"#AM)>;
+} // foreach AM
let mayLoad = 1 in
-class MxMOVEM_RM<MxType TYPE, MxBead1Bit SIZE,
- MxOperand MEMOp, MxEncEA EA, MxEncExt EXT>
+class MxMOVEM_RM<MxType TYPE, bit SIZE_ENC,
+ MxOperand MEMOp, MxEncMemOp MEM_ENC>
: MxInst<(outs), (ins MxMoveMask:$mask, MEMOp:$src),
- "movem."#TYPE.Prefix#"\t$src, $mask", [],
- MxMOVEMEncoding<EA, EXT, SIZE, MxMOVEM_RM, MxBead16Imm<0>>>;
-
-def MOVM32jm : MxMOVEM_MR<MxType32, MxMOVEM_L, MxType32.JOp, MxEncEAj_0, MxExtEmpty>;
-def MOVM32pm : MxMOVEM_MR<MxType32, MxMOVEM_L, MxType32.POp, MxEncEAp_0, MxExtI16_0>;
+ "movem."#TYPE.Prefix#"\t$src, $mask", []> {
+ let Inst = MxMOVEMEncoding<MEM_ENC, SIZE_ENC, MxMOVEM_RM, "mask">.Value;
+}
-def MOVM32mj : MxMOVEM_RM<MxType32, MxMOVEM_L, MxType32.JOp, MxEncEAj_1, MxExtEmpty>;
-def MOVM32mp : MxMOVEM_RM<MxType32, MxMOVEM_L, MxType32.POp, MxEncEAp_1, MxExtI16_1>;
+foreach AM = MxMoveSupportedAMs in {
+ foreach TYPE = [MxType16, MxType32] in
+ def MOVM # TYPE.Size # m # AM # TYPE.Postfix
+ : MxMOVEM_RM<TYPE, !if(!eq(TYPE, MxType16), MxMOVEM_W, MxMOVEM_L),
+ !cast<MxOpBundle>("MxOp"#TYPE.Size#"AddrMode_"#AM).Op,
+ !cast<MxEncMemOp>("MxMoveSrcOpEnc_"#AM)>;
+} // foreach AM
// Pseudo versions. These a required by virtual register spill/restore since
// the mask requires real register to encode. These instruction will be expanded
@@ -495,21 +353,27 @@ def MOVM32mp_P : MxMOVEM_RM_Pseudo<MxType32r, MxType32.POp>;
/// 0 1 0 0 0 1 0 0 1 1 | MODE | REG
/// --------------------------------------------------
let Defs = [CCR] in
-class MxMoveToCCR<dag INS, MxEncEA EA, MxEncExt EXT>
- : MxInst<(outs CCRC:$dst), INS, "move.w\t$src, $dst", [],
- MxEncoding<EA.Reg, EA.DA, EA.Mode,
- MxBead4Bits<0b0011>, MxBead4Bits<0b0001>, MxBead2Bits<0b01>,
- EXT.Imm, EXT.B8, EXT.Scale, EXT.WL, EXT.DAReg>>;
+class MxMoveToCCR<MxOperand MEMOp, MxEncMemOp SRC_ENC>
+ : MxInst<(outs CCRC:$dst), (ins MEMOp:$src), "move.w\t$src, $dst", []> {
+ let Inst = (ascend
+ (descend 0b0100010011, SRC_ENC.EA),
+ SRC_ENC.Supplement
+ );
+}
-class MxMoveToCCRPseudo<dag INS> : MxPseudo<(outs CCRC:$dst), INS>;
+class MxMoveToCCRPseudo<MxOperand MEMOp>
+ : MxPseudo<(outs CCRC:$dst), (ins MEMOp:$src)>;
-let mayLoad = 1 in {
-def MOV16cp : MxMoveToCCR<(ins MxType16d.POp:$src), MxEncEAp_1, MxExtI16_1>;
-def MOV8cp : MxMoveToCCRPseudo<(ins MxType8d.POp:$src)>;
-} // let mayLoad = 1
+let mayLoad = 1 in
+foreach AM = MxMoveSupportedAMs in {
+ def MOV16c # AM : MxMoveToCCR<!cast<MxOpBundle>("MxOp16AddrMode_"#AM).Op,
+ !cast<MxEncMemOp>("MxMoveSrcOpEnc_"#AM)>;
+ def MOV8c # AM : MxMoveToCCRPseudo<!cast<MxOpBundle>("MxOp8AddrMode_"#AM).Op>;
+} // foreach AM
-def MOV16cd : MxMoveToCCR<(ins MxType16d.ROp:$src), MxEncEAd_1, MxExtEmpty>;
-def MOV8cd : MxMoveToCCRPseudo<(ins MxType8d.ROp:$src)>;
+// Only data register is allowed.
+def MOV16cd : MxMoveToCCR<MxOp16AddrMode_d.Op, MxMoveSrcOpEnc_d>;
+def MOV8cd : MxMoveToCCRPseudo<MxOp8AddrMode_d.Op>;
/// Move from CCR
/// --------------------------------------------------
@@ -518,27 +382,38 @@ def MOV8cd : MxMoveToCCRPseudo<(ins MxType8d.ROp:$src)>;
/// | EFFECTIVE ADDRESS
/// 0 1 0 0 0 0 1 0 1 1 | MODE | REG
/// --------------------------------------------------
-let Uses = [CCR] in
-class MxMoveFromCCR<dag OUTS, dag INS, MxEncEA EA, MxEncExt EXT>
- : MxInst<OUTS, INS, "move.w\t$src, $dst", [],
- MxEncoding<EA.Reg, EA.DA, EA.Mode,
- MxBead4Bits<0b1011>, MxBead4Bits<0b0000>, MxBead2Bits<0b01>,
- EXT.Imm, EXT.B8, EXT.Scale, EXT.WL, EXT.DAReg>>,
- Requires<[ IsM68010 ]>;
-
-class MxMoveFromCCRPseudo<dag INS> : MxPseudo<(outs), INS>;
-
-let mayStore = 1 in {
-def MOV16pc
- : MxMoveFromCCR<(outs), (ins MxType16d.POp:$dst, CCRC:$src), MxEncEAp_0, MxExtI16_0>;
-def MOV8pc : MxMoveFromCCRPseudo<(ins MxType8d.POp:$dst, CCRC:$src)>;
-} // let mayStore = 1
+let Uses = [CCR] in {
+class MxMoveFromCCR_R
+ : MxInst<(outs MxDRD16:$dst), (ins CCRC:$src), "move.w\t$src, $dst", []>,
+ Requires<[ IsM68010 ]> {
+ let Inst = (descend 0b0100001011, MxEncAddrMode_d<"dst">.EA);
+}
-def MOV16dc
- : MxMoveFromCCR<(outs MxType16d.ROp:$dst), (ins CCRC:$src), MxEncEAd_0, MxExtEmpty>;
+class MxMoveFromCCR_M<MxOperand MEMOp, MxEncMemOp DST_ENC>
+ : MxInst<(outs), (ins MEMOp:$dst, CCRC:$src), "move.w\t$src, $dst", []>,
+ Requires<[ IsM68010 ]> {
+ let Inst = (ascend
+ (descend 0b0100001011, DST_ENC.EA),
+ DST_ENC.Supplement
+ );
+}
-def MOV8dc : MxMoveFromCCRPseudo<(ins MxType8d.ROp:$dst, CCRC:$src)>;
+class MxMoveFromCCRPseudo<MxOperand MEMOp>
+ : MxPseudo<(outs), (ins MEMOp:$dst, CCRC:$src)>;
+} // let Uses = [CCR]
+let mayStore = 1 in
+foreach AM = MxMoveSupportedAMs in {
+ def MOV16 # AM # c
+ : MxMoveFromCCR_M<!cast<MxOpBundle>("MxOp16AddrMode_"#AM).Op,
+ !cast<MxEncMemOp>("MxMoveDstOpEnc_"#AM)>;
+ def MOV8 # AM # c
+ : MxMoveFromCCRPseudo<!cast<MxOpBundle>("MxOp8AddrMode_"#AM).Op>;
+} // foreach AM
+
+// Only data register is allowed.
+def MOV16dc : MxMoveFromCCR_R;
+def MOV8dc : MxMoveFromCCRPseudo<MxOp8AddrMode_d.Op>;
//===----------------------------------------------------------------------===//
// LEA
@@ -549,18 +424,18 @@ def MOV8dc : MxMoveFromCCRPseudo<(ins MxType8d.ROp:$dst, CCRC:$src)>;
/// ----------------------------------------------------
/// 0 1 0 0 | DST REG | 1 1 1 | MODE | REG
/// ----------------------------------------------------
-class MxLEA<MxOperand SRCOpd, ComplexPattern SRCPat, MxEncEA EA, MxEncExt EXT>
- : MxInst<(outs MxARD32:$dst), (ins SRCOpd:$src),
- "lea\t$src, $dst", [(set i32:$dst, SRCPat:$src)],
- MxEncoding<EA.Reg, EA.DA, EA.Mode,
- MxBead3Bits<0b111>, MxBeadReg<0>, MxBead4Bits<0x4>,
- EXT.Imm, EXT.B8, EXT.Scale, EXT.WL, EXT.DAReg>>;
-
-def LEA32p : MxLEA<MxARID32, MxCP_ARID, MxEncEAp_1, MxExtI16_1>;
-def LEA32f : MxLEA<MxARII32, MxCP_ARII, MxEncEAf_1, MxExtBrief_1>;
-def LEA32q : MxLEA<MxPCD32, MxCP_PCD, MxEncEAq, MxExtI16_1>;
-def LEA32b : MxLEA<MxAL32, MxCP_AL, MxEncEAb, MxExtI32_1>;
+class MxLEA<MxOpBundle SRC, MxEncMemOp SRC_ENC>
+ : MxInst<(outs MxARD32:$dst), (ins SRC.Op:$src),
+ "lea\t$src, $dst", [(set i32:$dst, SRC.Pat:$src)]> {
+ let Inst = (ascend
+ (descend 0b0100, (operand "$dst", 3), 0b111, SRC_ENC.EA),
+ SRC_ENC.Supplement
+ );
+}
+foreach AM = ["p", "f", "b", "q", "k"] in
+def LEA32 # AM : MxLEA<!cast<MxOpBundle>("MxOp32AddrMode_"#AM),
+ !cast<MxEncMemOp>("MxMoveSrcOpEnc_"#AM)>;
//===----------------------------------------------------------------------===//
// Pseudos
diff --git a/llvm/lib/Target/M68k/M68kInstrFormats.td b/llvm/lib/Target/M68k/M68kInstrFormats.td
index 7e0c96a5b1f6..78aed521f13a 100644
--- a/llvm/lib/Target/M68k/M68kInstrFormats.td
+++ b/llvm/lib/Target/M68k/M68kInstrFormats.td
@@ -200,6 +200,11 @@ class MxEncEA<MxBead reg, MxBead mode, MxBead da = MxBeadIgnore> {
MxBead DA = da;
}
+class MxEncMemOp {
+ dag EA = (ascend);
+ dag Supplement = (ascend);
+}
+
// FIXME: Is there a way to factorize the addressing mode suffix (i.e.
// 'r', 'd', 'a' etc.) and use something like multiclass to replace?
def MxEncEAr_0: MxEncEA<MxBeadDAReg<0>, MxBead2Bits<0b00>>;
@@ -237,6 +242,126 @@ def MxEncEAq : MxEncEA<MxBead3Bits<0b010>, MxBead2Bits<0b11>, MxBead1Bit<1>>;
def MxEncEAk : MxEncEA<MxBead3Bits<0b011>, MxBead2Bits<0b11>, MxBead1Bit<1>>;
def MxEncEAi : MxEncEA<MxBead3Bits<0b100>, MxBead2Bits<0b11>, MxBead1Bit<1>>;
+class MxEncBriefExt<string reg_opnd, string disp_opnd,
+ bit size_w_l = false, int scale = 1,
+ string disp_encoder = ""> {
+ dag Value = (descend
+ // D/A + REGISTER
+ (operand "$"#reg_opnd, 4),
+ // W/L
+ size_w_l,
+ // SCALE
+ !cond(
+ !eq(scale, 1) : 0b00,
+ !eq(scale, 2) : 0b01,
+ !eq(scale, 4) : 0b10,
+ !eq(scale, 8) : 0b11
+ ),
+ 0b0,
+ // Displacement
+ (operand "$"#disp_opnd, 8, (encoder disp_encoder))
+ );
+}
+
+class MxEncAddrMode_d<string reg_opnd> : MxEncMemOp {
+ let EA = (descend /*MODE*/0b000,
+ /*REGISTER*/(operand "$"#reg_opnd, 3));
+}
+
+class MxEncAddrMode_a<string reg_opnd> : MxEncMemOp {
+ let EA = (descend /*MODE*/0b001,
+ /*REGISTER*/(operand "$"#reg_opnd, 3));
+}
+
+class MxEncAddrMode_r<string reg_opnd> : MxEncMemOp {
+ let EA = (descend /*MODE without the last bit*/0b00,
+ /*REGISTER with D/A bit*/(operand "$"#reg_opnd, 4));
+}
+
+class MxEncAddrMode_k<string opnd_name> : MxEncMemOp {
+ let EA = (descend /*MODE*/0b111,
+ /*REGISTER*/0b011);
+
+ let Supplement = MxEncBriefExt<opnd_name#".index", opnd_name#".disp",
+ /*W/L*/true, /*SCALE*/1,
+ "encodePCRelImm<8>">.Value;
+}
+
+class MxEncAddrMode_q<string opnd_name> : MxEncMemOp {
+ let EA = (descend /*MODE*/0b111,
+ /*REGISTER*/0b010);
+
+ // 16-bit Displacement
+ let Supplement = (operand "$"#opnd_name, 16,
+ (encoder "encodePCRelImm<16>"));
+}
+
+class MxEncAddrMode_p<string opnd_name> : MxEncMemOp {
+ let EA = (descend /*MODE*/0b101,
+ /*REGISTER*/(operand "$"#opnd_name#".reg", 3));
+
+ // 16-bit Displacement
+ let Supplement = (operand "$"#opnd_name#".disp", 16,
+ (encoder "encodeRelocImm<16>"));
+}
+
+class MxEncAddrMode_f<string opnd_name> : MxEncMemOp {
+ let EA = (descend /*MODE*/0b110,
+ /*REGISTER*/(operand "$"#opnd_name#".reg", 3));
+
+ let Supplement = MxEncBriefExt<opnd_name#".index", opnd_name#".disp",
+ /*W/L*/true, /*SCALE*/1,
+ "encodeRelocImm<8>">.Value;
+}
+
+class MxEncAddrMode_j<string reg_opnd> : MxEncMemOp {
+ let EA = (descend /*MODE*/0b010,
+ /*REGISTER*/(operand "$"#reg_opnd, 3));
+}
+
+class MxEncAddrMode_i<string opnd_name, int size> : MxEncMemOp {
+ let EA = (descend /*MODE*/0b111,
+ /*REGISTER*/0b100);
+
+ // Immediate
+ let Supplement =
+ !cond(
+ !eq(size, 8) : (descend 0b00000000, (operand "$"#opnd_name, 8)),
+ !eq(size, 16) : (operand "$"#opnd_name, 16),
+ !eq(size, 32) : (ascend (slice "$"#opnd_name, 31, 16),
+ (slice "$"#opnd_name, 15, 0))
+ );
+}
+
+// abs.W -> size_w_l = false
+// abs.L -> size_w_l = true
+class MxEncAddrMode_abs<string opnd_name, bit size_w_l = false> : MxEncMemOp {
+ let EA = (descend /*MODE*/0b111,
+ // Wrap the REGISTER part in another dag to make sure
+ // the dag assigned to EA only has two arguments. Such
+ // that it's easier for MOV instructions to reverse
+ // on its destination part.
+ /*REGISTER*/(descend 0b00, size_w_l));
+
+ // Absolute address
+ let Supplement = !if(size_w_l,
+ // abs.L
+ (operand "$"#opnd_name, 32, (encoder "encodeRelocImm<32>")),
+ // abs.W
+ (operand "$"#opnd_name, 16, (encoder "encodeRelocImm<16>"))
+ );
+}
+
+class MxEncAddrMode_o<string reg_opnd> : MxEncMemOp {
+ let EA = (descend /*MODE*/0b011,
+ /*REGISTER*/(operand "$"#reg_opnd, 3));
+}
+
+class MxEncAddrMode_e<string reg_opnd> : MxEncMemOp {
+ let EA = (descend /*MODE*/0b100,
+ /*REGISTER*/(operand "$"#reg_opnd, 3));
+}
+
// Allows you to specify each bit of opcode
class MxEncOpMode<MxBead b0, MxBead b1 = MxBeadIgnore, MxBead b2 = MxBeadIgnore> {
MxBead B0 = b0;
@@ -332,6 +457,16 @@ def MxEncSize16 : MxEncSize<0b01>;
def MxEncSize32 : MxEncSize<0b10>;
def MxEncSize64 : MxEncSize<0b11>;
+// TODO: Remove "New" in the name after the codebead-based
+// representation is deprecated.
+class MxNewEncSize<bits<2> value> {
+ bits<2> Value = value;
+}
+def MxNewEncSize8 : MxNewEncSize<0b00>;
+def MxNewEncSize16 : MxNewEncSize<0b01>;
+def MxNewEncSize32 : MxNewEncSize<0b10>;
+def MxNewEncSize64 : MxNewEncSize<0b11>;
+
// M68k INSTRUCTION. Most instructions specify the location of an operand by
// using the effective address field in the operation word. The effective address
// is composed of two 3-bit fields: the mode field and the register field. The
@@ -357,6 +492,7 @@ class MxInst<dag outs, dag ins,
// Byte stream
field bits<192> Beads = beads.Value;
+ dag Inst = (ascend);
// Number of bytes
let Size = 0;
diff --git a/llvm/lib/Target/M68k/M68kInstrInfo.cpp b/llvm/lib/Target/M68k/M68kInstrInfo.cpp
index 105c816f9885..b33469529ca5 100644
--- a/llvm/lib/Target/M68k/M68kInstrInfo.cpp
+++ b/llvm/lib/Target/M68k/M68kInstrInfo.cpp
@@ -26,6 +26,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Regex.h"
#include <functional>
@@ -601,40 +602,26 @@ bool M68kInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
bool M68kInstrInfo::isPCRelRegisterOperandLegal(
const MachineOperand &MO) const {
assert(MO.isReg());
- const auto *MI = MO.getParent();
- const uint8_t *Beads = M68k::getMCInstrBeads(MI->getOpcode());
- assert(*Beads);
-
- // Only addressing mode k has (non-pc) register with PCRel
- // So we're looking for EA Beads equal to
- // `3Bits<011>_1Bit<1>_2Bits<11>`
- // FIXME: There is an important caveat and two assumptions
- // here: The caveat is that EA encoding always sit on the LSB.
- // Where the assumptions are that if there are more than one
- // operands, the EA encoding for the source operand always sit
- // on the LSB. At the same time, k addressing mode can not be used
- // on destination operand.
- // The last assumption is kinda dirty so we need to find a way around
- // it
- const uint8_t EncEAk[3] = {0b011, 0b1, 0b11};
- for (const uint8_t Pat : EncEAk) {
- uint8_t Bead = *(Beads++);
- if (!Bead)
- return false;
- switch (Bead & 0xF) {
- default:
- return false;
- case M68kBeads::Bits1:
- case M68kBeads::Bits2:
- case M68kBeads::Bits3: {
- uint8_t Val = (Bead & 0xF0) >> 4;
- if (Val != Pat)
- return false;
- }
- }
- }
- return true;
+ // Check whether this MO belongs to an instruction with addressing mode 'k',
+ // Refer to TargetInstrInfo.h for more information about this function.
+
+ const MachineInstr *MI = MO.getParent();
+ const unsigned NameIndices = M68kInstrNameIndices[MI->getOpcode()];
+ StringRef InstrName(&M68kInstrNameData[NameIndices]);
+ const unsigned OperandNo = MI->getOperandNo(&MO);
+
+ // If this machine operand is the 2nd operand, then check
+ // whether the instruction has destination addressing mode 'k'.
+ if (OperandNo == 1)
+ return Regex("[A-Z]+(8|16|32)k[a-z](_TC)?$").match(InstrName);
+
+ // If this machine operand is the last one, then check
+ // whether the instruction has source addressing mode 'k'.
+ if (OperandNo == MI->getNumExplicitOperands() - 1)
+ return Regex("[A-Z]+(8|16|32)[a-z]k(_TC)?$").match(InstrName);
+
+ return false;
}
void M68kInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
diff --git a/llvm/lib/Target/M68k/M68kInstrInfo.td b/llvm/lib/Target/M68k/M68kInstrInfo.td
index c581dd91eaaa..67500af6bfb2 100644
--- a/llvm/lib/Target/M68k/M68kInstrInfo.td
+++ b/llvm/lib/Target/M68k/M68kInstrInfo.td
@@ -291,13 +291,13 @@ def MxARIPD32_TC : MxMemOp<(ops AR32_TC), MxSize32, "e", "printARIPD32Mem", MxA
// extension word. The reference is classified as a data reference with the
// exception of the jump and jump-to-subroutine instructions.
def MxARID : MxOpClass<"ARID">;
-def MxARID8 : MxMemOp<(ops i16imm, AR32), MxSize8, "p", "printARID8Mem", MxARID>;
-def MxARID16 : MxMemOp<(ops i16imm, AR32), MxSize16, "p", "printARID16Mem", MxARID>;
-def MxARID32 : MxMemOp<(ops i16imm, AR32), MxSize32, "p", "printARID32Mem", MxARID>;
+def MxARID8 : MxMemOp<(ops i16imm:$disp, AR32:$reg), MxSize8, "p", "printARID8Mem", MxARID>;
+def MxARID16 : MxMemOp<(ops i16imm:$disp, AR32:$reg), MxSize16, "p", "printARID16Mem", MxARID>;
+def MxARID32 : MxMemOp<(ops i16imm:$disp, AR32:$reg), MxSize32, "p", "printARID32Mem", MxARID>;
-def MxARID8_TC : MxMemOp<(ops i16imm, AR32_TC), MxSize8, "p", "printARID8Mem", MxARID>;
-def MxARID16_TC : MxMemOp<(ops i16imm, AR32_TC), MxSize16, "p", "printARID16Mem", MxARID>;
-def MxARID32_TC : MxMemOp<(ops i16imm, AR32_TC), MxSize32, "p", "printARID32Mem", MxARID>;
+def MxARID8_TC : MxMemOp<(ops i16imm:$disp, AR32_TC:$reg), MxSize8, "p", "printARID8Mem", MxARID>;
+def MxARID16_TC : MxMemOp<(ops i16imm:$disp, AR32_TC:$reg), MxSize16, "p", "printARID16Mem", MxARID>;
+def MxARID32_TC : MxMemOp<(ops i16imm:$disp, AR32_TC:$reg), MxSize32, "p", "printARID32Mem", MxARID>;
// ADDRESS REGISTER INDIRECT WITH INDEX. This addressing mode requires one word
// of extension. The address of the operand is the sum of the address in the
@@ -306,13 +306,19 @@ def MxARID32_TC : MxMemOp<(ops i16imm, AR32_TC), MxSize32, "p", "printARID32Me
// The reference is classified as a data reference with the exception of the
// jump and jump-to-subroutine instructions
def MxARII : MxOpClass<"ARII">;
-def MxARII8 : MxMemOp<(ops i8imm, AR32, XR32), MxSize8, "f", "printARII8Mem", MxARII>;
-def MxARII16 : MxMemOp<(ops i8imm, AR32, XR32), MxSize16, "f", "printARII16Mem", MxARII>;
-def MxARII32 : MxMemOp<(ops i8imm, AR32, XR32), MxSize32, "f", "printARII32Mem", MxARII>;
-
-def MxARII8_TC : MxMemOp<(ops i8imm, AR32_TC, XR32_TC), MxSize8, "f", "printARII8Mem", MxARII>;
-def MxARII16_TC : MxMemOp<(ops i8imm, AR32_TC, XR32_TC), MxSize16, "f", "printARII16Mem", MxARII>;
-def MxARII32_TC : MxMemOp<(ops i8imm, AR32_TC, XR32_TC), MxSize32, "f", "printARII32Mem", MxARII>;
+def MxARII8 : MxMemOp<(ops i8imm:$disp, AR32:$reg, XR32:$index),
+ MxSize8, "f", "printARII8Mem", MxARII>;
+def MxARII16 : MxMemOp<(ops i8imm:$disp, AR32:$reg, XR32:$index),
+ MxSize16, "f", "printARII16Mem", MxARII>;
+def MxARII32 : MxMemOp<(ops i8imm:$disp, AR32:$reg, XR32:$index),
+ MxSize32, "f", "printARII32Mem", MxARII>;
+
+def MxARII8_TC : MxMemOp<(ops i8imm:$disp, AR32_TC:$reg, XR32_TC:$index),
+ MxSize8, "f", "printARII8Mem", MxARII>;
+def MxARII16_TC : MxMemOp<(ops i8imm:$disp, AR32_TC:$reg, XR32_TC:$index),
+ MxSize16, "f", "printARII16Mem", MxARII>;
+def MxARII32_TC : MxMemOp<(ops i8imm:$disp, AR32_TC:$reg, XR32_TC:$index),
+ MxSize32, "f", "printARII32Mem", MxARII>;
// ABSOLUTE SHORT ADDRESS. This addressing mode requires one word of extension.
// The address of the operand is the extension word. The 16-bit address is sign
@@ -360,9 +366,9 @@ def MxPCD32 : MxMemOp<(ops i16imm), MxSize32, "q", "printPCD32Mem", MxPCD>;
// word, and the contents of the index register. The value in the program
// counter is the address of the extension word. This reference is classified as
// a program reference.
-def MxPCI8 : MxMemOp<(ops i8imm, XR32), MxSize8, "k", "printPCI8Mem", MxPCI>;
-def MxPCI16 : MxMemOp<(ops i8imm, XR32), MxSize16, "k", "printPCI16Mem", MxPCI>;
-def MxPCI32 : MxMemOp<(ops i8imm, XR32), MxSize32, "k", "printPCI32Mem", MxPCI>;
+def MxPCI8 : MxMemOp<(ops i8imm:$disp, XR32:$index), MxSize8, "k", "printPCI8Mem", MxPCI>;
+def MxPCI16 : MxMemOp<(ops i8imm:$disp, XR32:$index), MxSize16, "k", "printPCI16Mem", MxPCI>;
+def MxPCI32 : MxMemOp<(ops i8imm:$disp, XR32:$index), MxSize32, "k", "printPCI32Mem", MxPCI>;
} // OPERAND_PCREL
def MxImm : AsmOperandClass {
@@ -633,6 +639,74 @@ class MxType<ValueType vt, string prefix, string postfix,
PatFrag Load = load;
}
+// Provides an alternative way to access the MxOperand and
+// patterns w.r.t a specific addressing mode.
+class MxOpBundle<int size, MxOperand op, ComplexPattern pat> {
+ int Size = size;
+ MxOperand Op = op;
+ ComplexPattern Pat = pat;
+}
+
+class MxImmOpBundle<int size, MxOperand op, PatFrag pat>
+ : MxOpBundle<size, op, ?> {
+ PatFrag ImmPat = pat;
+}
+
+// TODO: We can use MxOp<S>AddrMode_<AM> in more places to
+// replace MxType-based operand factoring.
+foreach size = [8, 16, 32] in {
+ // Dn
+ def MxOp#size#AddrMode_d
+ : MxOpBundle<size, !cast<MxOperand>("MxDRD"#size), ?>;
+
+ // (An)
+ def MxOp#size#AddrMode_j
+ : MxOpBundle<size, !cast<MxOperand>("MxARI"#size), MxCP_ARI>;
+
+ // (An)+
+ def MxOp#size#AddrMode_o
+ : MxOpBundle<size, !cast<MxOperand>("MxARIPI"#size), MxCP_ARIPI>;
+
+ // -(An)
+ def MxOp#size#AddrMode_e
+ : MxOpBundle<size, !cast<MxOperand>("MxARIPD"#size), MxCP_ARIPD>;
+
+ // (i,An)
+ def MxOp#size#AddrMode_p
+ : MxOpBundle<size, !cast<MxOperand>("MxARID"#size), MxCP_ARID>;
+
+ // (i,An,Xn)
+ def MxOp#size#AddrMode_f
+ : MxOpBundle<size, !cast<MxOperand>("MxARII"#size), MxCP_ARII>;
+
+ // (ABS).L
+ def MxOp#size#AddrMode_b
+ : MxOpBundle<size, !cast<MxOperand>("MxAL"#size), MxCP_AL>;
+
+ // (i,PC)
+ def MxOp#size#AddrMode_q
+ : MxOpBundle<size, !cast<MxOperand>("MxPCD"#size), MxCP_PCD>;
+
+ // (i,PC,Xn)
+ def MxOp#size#AddrMode_k
+ : MxOpBundle<size, !cast<MxOperand>("MxPCI"#size), MxCP_PCI>;
+
+ // #imm
+ def MxOp#size#AddrMode_i
+ : MxImmOpBundle<size, !cast<MxOperand>("Mxi"#size#"imm"),
+ !cast<PatFrag>("MximmSExt"#size)>;
+} // foreach size = [8, 16, 32]
+
+foreach size = [16, 32] in {
+ // An
+ def MxOp#size#AddrMode_a
+ : MxOpBundle<size, !cast<MxOperand>("MxARD"#size), ?>;
+
+ // Xn
+ def MxOp#size#AddrMode_r
+ : MxOpBundle<size, !cast<MxOperand>("MxXRD"#size), ?>;
+} // foreach size = [16, 32]
+
class MxType8Class<string rLet, MxOperand reg>
: MxType<i8, "b", "", rLet, reg,
MxARI8, MxCP_ARI,
diff --git a/llvm/lib/Target/M68k/M68kInstrShiftRotate.td b/llvm/lib/Target/M68k/M68kInstrShiftRotate.td
index f1967ec11928..b50354597a49 100644
--- a/llvm/lib/Target/M68k/M68kInstrShiftRotate.td
+++ b/llvm/lib/Target/M68k/M68kInstrShiftRotate.td
@@ -24,49 +24,55 @@
///
//===----------------------------------------------------------------------===//
-def MxRODI_R : MxBead1Bit<0>;
-def MxRODI_L : MxBead1Bit<1>;
+defvar MxROKind_R = true;
+defvar MxROKind_I = false;
-def MxROOP_AS : MxBead2Bits<0b00>;
-def MxROOP_LS : MxBead2Bits<0b01>;
-def MxROOP_ROX : MxBead2Bits<0b10>;
-def MxROOP_RO : MxBead2Bits<0b11>;
+defvar MxRODI_R = false;
+defvar MxRODI_L = true;
+
+defvar MxROOP_AS = 0b00;
+defvar MxROOP_LS = 0b01;
+defvar MxROOP_ROX = 0b10;
+defvar MxROOP_RO = 0b11;
/// ------------+---------+---+------+---+------+---------
/// F E D C | B A 9 | 8 | 7 6 | 5 | 4 3 | 2 1 0
/// ------------+---------+---+------+---+------+---------
/// 1 1 1 0 | REG/IMM | D | SIZE |R/I| OP | REG
/// ------------+---------+---+------+---+------+---------
-class MxSREncoding_R<MxBead1Bit DIRECTION, MxBead2Bits ROOP, MxEncSize SIZE>
- : MxEncoding<MxBeadDReg<0>, ROOP, MxBead1Bit<1>, SIZE, DIRECTION,
- MxBeadDReg<2>, MxBead4Bits<0b1110>>;
-
-class MxSREncoding_I<MxBead1Bit DIRECTION, MxBead2Bits ROOP, MxEncSize SIZE>
- : MxEncoding<MxBeadDReg<0>, ROOP, MxBead1Bit<0>, SIZE, DIRECTION,
- MxBead3Imm<2, 1>, MxBead4Bits<0b1110>>;
+class MxSREncoding<bit kind, string src_opnd, string dst_opnd,
+ bit direction, bits<2> ro_op, MxNewEncSize size> {
+ dag Value = (descend 0b1110,
+ // REG/IMM
+ (operand "$"#src_opnd, 3),
+ direction, size.Value, kind, ro_op,
+ // REG
+ (operand "$"#dst_opnd, 3)
+ );
+}
// $reg <- $reg op $reg
-class MxSR_DD<string MN, MxType TYPE, SDNode NODE,
- MxBead1Bit RODI, MxBead2Bits ROOP>
+class MxSR_DD<string MN, MxType TYPE, SDNode NODE, bit RODI, bits<2> ROOP>
: MxInst<(outs TYPE.ROp:$dst), (ins TYPE.ROp:$src, TYPE.ROp:$opd),
MN#"."#TYPE.Prefix#"\t$opd, $dst",
- [(set TYPE.VT:$dst, (NODE TYPE.VT:$src, TYPE.VT:$opd))],
- MxSREncoding_R<RODI, ROOP,
- !cast<MxEncSize>("MxEncSize"#TYPE.Size)>>;
+ [(set TYPE.VT:$dst, (NODE TYPE.VT:$src, TYPE.VT:$opd))]> {
+ let Inst = MxSREncoding<MxROKind_R, "opd", "dst", RODI, ROOP,
+ !cast<MxNewEncSize>("MxNewEncSize"#TYPE.Size)>.Value;
+}
// $reg <- $reg op $imm
-class MxSR_DI<string MN, MxType TYPE, SDNode NODE,
- MxBead1Bit RODI, MxBead2Bits ROOP>
+class MxSR_DI<string MN, MxType TYPE, SDNode NODE, bit RODI, bits<2> ROOP>
: MxInst<(outs TYPE.ROp:$dst),
(ins TYPE.ROp:$src, !cast<Operand>("Mxi"#TYPE.Size#"imm"):$opd),
MN#"."#TYPE.Prefix#"\t$opd, $dst",
[(set TYPE.VT:$dst,
(NODE TYPE.VT:$src,
- !cast<ImmLeaf>("Mximm"#TYPE.Size#"_1to8"):$opd))],
- MxSREncoding_I<RODI, ROOP,
- !cast<MxEncSize>("MxEncSize"#TYPE.Size)>>;
+ !cast<ImmLeaf>("Mximm"#TYPE.Size#"_1to8"):$opd))]> {
+ let Inst = MxSREncoding<MxROKind_I, "opd", "dst", RODI, ROOP,
+ !cast<MxNewEncSize>("MxNewEncSize"#TYPE.Size)>.Value;
+}
-multiclass MxSROp<string MN, SDNode NODE, MxBead1Bit RODI, MxBead2Bits ROOP> {
+multiclass MxSROp<string MN, SDNode NODE, bit RODI, bits<2> ROOP> {
let Defs = [CCR] in {
let Constraints = "$src = $dst" in {
diff --git a/llvm/lib/Target/M68k/M68kMachineFunction.cpp b/llvm/lib/Target/M68k/M68kMachineFunction.cpp
index b1e7369116d7..ccc8f87db502 100644
--- a/llvm/lib/Target/M68k/M68kMachineFunction.cpp
+++ b/llvm/lib/Target/M68k/M68kMachineFunction.cpp
@@ -18,3 +18,10 @@
using namespace llvm;
void M68kMachineFunctionInfo::anchor() {}
+
+MachineFunctionInfo *M68kMachineFunctionInfo::clone(
+ BumpPtrAllocator &Allocator, MachineFunction &DestMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
+ const {
+ return DestMF.cloneInfo<M68kMachineFunctionInfo>(*this);
+}
diff --git a/llvm/lib/Target/M68k/M68kMachineFunction.h b/llvm/lib/Target/M68k/M68kMachineFunction.h
index 93c5255199d4..6ddf53d7d693 100644
--- a/llvm/lib/Target/M68k/M68kMachineFunction.h
+++ b/llvm/lib/Target/M68k/M68kMachineFunction.h
@@ -21,8 +21,6 @@
namespace llvm {
class M68kMachineFunctionInfo : public MachineFunctionInfo {
- MachineFunction &MF;
-
/// Non-zero if the function has base pointer and makes call to
/// llvm.eh.sjlj.setjmp. When non-zero, the value is a displacement from the
/// frame pointer to a slot where the base pointer is stashed.
@@ -68,7 +66,12 @@ class M68kMachineFunctionInfo : public MachineFunctionInfo {
unsigned ArgumentStackSize = 0;
public:
- explicit M68kMachineFunctionInfo(MachineFunction &MF) : MF(MF) {}
+ explicit M68kMachineFunctionInfo(const MachineFunction &MF) {}
+
+ MachineFunctionInfo *
+ clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
+ const override;
bool getRestoreBasePointer() const { return RestoreBasePointerOffset != 0; }
void setRestoreBasePointer(const MachineFunction *MF);
diff --git a/llvm/lib/Target/M68k/M68kRegisterInfo.cpp b/llvm/lib/Target/M68k/M68kRegisterInfo.cpp
index 0cae7ac4e312..5b632299fa4c 100644
--- a/llvm/lib/Target/M68k/M68kRegisterInfo.cpp
+++ b/llvm/lib/Target/M68k/M68kRegisterInfo.cpp
@@ -19,6 +19,7 @@
#include "MCTargetDesc/M68kMCTargetDesc.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Type.h"
diff --git a/llvm/lib/Target/M68k/M68kRegisterInfo.h b/llvm/lib/Target/M68k/M68kRegisterInfo.h
index 7f822e1cb34f..fc55e19a958b 100644
--- a/llvm/lib/Target/M68k/M68kRegisterInfo.h
+++ b/llvm/lib/Target/M68k/M68kRegisterInfo.h
@@ -97,6 +97,14 @@ public:
bool canRealignStack(const MachineFunction &MF) const override;
Register getFrameRegister(const MachineFunction &MF) const override;
+
+ const TargetRegisterClass *
+ getCrossCopyRegClass(const TargetRegisterClass *RC) const override {
+ if (RC == &M68k::CCRCRegClass)
+ return &M68k::DR32RegClass;
+ return RC;
+ }
+
unsigned getStackRegister() const { return StackPtr; }
unsigned getBaseRegister() const { return BasePtr; }
unsigned getGlobalBaseRegister() const { return GlobalBasePtr; }
diff --git a/llvm/lib/Target/M68k/M68kSubtarget.h b/llvm/lib/Target/M68k/M68kSubtarget.h
index 9bf2984983a1..9dd52095959e 100644
--- a/llvm/lib/Target/M68k/M68kSubtarget.h
+++ b/llvm/lib/Target/M68k/M68kSubtarget.h
@@ -22,7 +22,7 @@
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp
index 9227bd6c3a78..6b093623a106 100644
--- a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp
+++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp
@@ -27,6 +27,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/raw_ostream.h"
+#include <type_traits>
using namespace llvm;
@@ -39,31 +40,30 @@ class M68kMCCodeEmitter : public MCCodeEmitter {
const MCInstrInfo &MCII;
MCContext &Ctx;
-public:
- M68kMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
- : MCII(mcii), Ctx(ctx) {}
+ void getBinaryCodeForInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
+ APInt &Inst, APInt &Scratch,
+ const MCSubtargetInfo &STI) const;
- ~M68kMCCodeEmitter() override {}
+ void getMachineOpValue(const MCInst &MI, const MCOperand &Op,
+ unsigned InsertPos, APInt &Value,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
- // TableGen'erated function
- const uint8_t *getGenInstrBeads(const MCInst &MI) const {
- return M68k::getMCInstrBeads(MI.getOpcode());
- }
+ template <unsigned Size>
+ void encodeRelocImm(const MCInst &MI, unsigned OpIdx, unsigned InsertPos,
+ APInt &Value, SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
- unsigned encodeBits(unsigned ThisByte, uint8_t Bead, const MCInst &MI,
- const MCInstrDesc &Desc, uint64_t &Buffer,
- unsigned Offset, SmallVectorImpl<MCFixup> &Fixups,
+ template <unsigned Size>
+ void encodePCRelImm(const MCInst &MI, unsigned OpIdx, unsigned InsertPos,
+ APInt &Value, SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
- unsigned encodeReg(unsigned ThisByte, uint8_t Bead, const MCInst &MI,
- const MCInstrDesc &Desc, uint64_t &Buffer, unsigned Offset,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
+public:
+ M68kMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
+ : MCII(mcii), Ctx(ctx) {}
- unsigned encodeImm(unsigned ThisByte, uint8_t Bead, const MCInst &MI,
- const MCInstrDesc &Desc, uint64_t &Buffer, unsigned Offset,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
+ ~M68kMCCodeEmitter() override {}
void encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
@@ -72,316 +72,176 @@ public:
} // end anonymous namespace
-unsigned M68kMCCodeEmitter::encodeBits(unsigned ThisByte, uint8_t Bead,
- const MCInst &MI,
- const MCInstrDesc &Desc,
- uint64_t &Buffer, unsigned Offset,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- unsigned Num = 0;
- switch (Bead & 0xF) {
- case M68kBeads::Bits1:
- Num = 1;
- break;
- case M68kBeads::Bits2:
- Num = 2;
- break;
- case M68kBeads::Bits3:
- Num = 3;
- break;
- case M68kBeads::Bits4:
- Num = 4;
- break;
- }
- unsigned char Val = (Bead & 0xF0) >> 4;
-
- LLVM_DEBUG(dbgs() << "\tEncodeBits"
- << " Num: " << Num << " Val: 0x");
- LLVM_DEBUG(dbgs().write_hex(Val) << "\n");
+#include "M68kGenMCCodeEmitter.inc"
- Buffer |= (Val << Offset);
-
- return Num;
-}
+// Select the proper unsigned integer type from a bit size.
+template <unsigned Size> struct select_uint_t {
+ using type = typename std::conditional<
+ Size == 8, uint8_t,
+ typename std::conditional<
+ Size == 16, uint16_t,
+ typename std::conditional<Size == 32, uint32_t,
+ uint64_t>::type>::type>::type;
+};
-unsigned M68kMCCodeEmitter::encodeReg(unsigned ThisByte, uint8_t Bead,
- const MCInst &MI, const MCInstrDesc &Desc,
- uint64_t &Buffer, unsigned Offset,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- bool DA, Reg;
- switch (Bead & 0xF) {
- default:
- llvm_unreachable("Unrecognized Bead code for register type");
- case M68kBeads::DAReg:
- Reg = true;
- DA = true;
- break;
- case M68kBeads::DA:
- Reg = false;
- DA = true;
- break;
- case M68kBeads::DReg:
- case M68kBeads::Reg:
- Reg = true;
- DA = false;
- break;
+// On a LE host:
+// MSB LSB MSB LSB
+// | 0x12 0x34 | 0xAB 0xCD | -> | 0xAB 0xCD | 0x12 0x34 |
+// (On a BE host nothing changes)
+template <typename value_t> static value_t swapWord(value_t Val) {
+ const unsigned NumWords = sizeof(Val) / 2;
+ if (NumWords <= 1)
+ return Val;
+ Val = support::endian::byte_swap(Val, support::big);
+ value_t NewVal = 0;
+ for (unsigned i = 0U; i != NumWords; ++i) {
+ uint16_t Part = (Val >> (i * 16)) & 0xFFFF;
+ Part = support::endian::byte_swap(Part, support::big);
+ NewVal |= (Part << (i * 16));
}
+ return NewVal;
+}
- unsigned Op = (Bead & 0x70) >> 4;
- bool Alt = (Bead & 0x80);
- LLVM_DEBUG(dbgs() << "\tEncodeReg"
- << " Op: " << Op << ", DA: " << DA << ", Reg: " << Reg
- << ", Alt: " << Alt << "\n");
-
- auto MIOpIdx = M68k::getLogicalOperandIdx(MI.getOpcode(), Op);
- bool IsPCRel = Desc.OpInfo[MIOpIdx].OperandType == MCOI::OPERAND_PCREL;
-
- MCOperand MCO;
- if (M68kII::hasMultiMIOperands(MI.getOpcode(), Op)) {
- if (IsPCRel) {
- assert(Alt &&
- "PCRel addresses use Alt bead register encoding by default");
- MCO = MI.getOperand(MIOpIdx + M68k::PCRelIndex);
- } else {
- MCO = MI.getOperand(MIOpIdx + (Alt ? M68k::MemIndex : M68k::MemBase));
- }
+// Figure out which byte we're at in big endian mode.
+template <unsigned Size> static unsigned getBytePosition(unsigned BitPos) {
+ if (Size % 16) {
+ return static_cast<unsigned>(BitPos / 8 + ((BitPos & 0b1111) < 8 ? 1 : -1));
} else {
- assert(!Alt && "You cannot use Alt register with a simple operand");
- MCO = MI.getOperand(MIOpIdx);
+ assert(!(BitPos & 0b1111) && "Not aligned to word boundary?");
+ return BitPos / 8;
}
-
- unsigned RegNum = MCO.getReg();
- auto RI = Ctx.getRegisterInfo();
-
- unsigned Written = 0;
- if (Reg) {
- uint32_t Val = RI->getEncodingValue(RegNum);
- Buffer |= (Val & 7) << Offset;
- Offset += 3;
- Written += 3;
- }
-
- if (DA) {
- Buffer |= (uint64_t)M68kII::isAddressRegister(RegNum) << Offset;
- Written++;
- }
-
- return Written;
-}
-
-static unsigned EmitConstant(uint64_t Val, unsigned Size, unsigned Pad,
- uint64_t &Buffer, unsigned Offset) {
- assert(Size + Offset <= 64 && isUIntN(Size, Val) && "Value does not fit");
-
- // Writing Value in host's endianness
- Buffer |= (Val & ((1ULL << Size) - 1)) << Offset;
- return Size + Pad;
}
-unsigned M68kMCCodeEmitter::encodeImm(unsigned ThisByte, uint8_t Bead,
- const MCInst &MI, const MCInstrDesc &Desc,
- uint64_t &Buffer, unsigned Offset,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- unsigned ThisWord = ThisByte / 2;
- unsigned Size = 0;
- unsigned Pad = 0;
- unsigned FixOffset = 0;
- int64_t Addendum = 0;
- bool NoExpr = false;
-
- unsigned Type = Bead & 0xF;
- unsigned Op = (Bead & 0x70) >> 4;
- bool Alt = (Bead & 0x80);
-
- auto MIOpIdx = M68k::getLogicalOperandIdx(MI.getOpcode(), Op);
- bool IsPCRel = Desc.OpInfo[MIOpIdx].OperandType == MCOI::OPERAND_PCREL;
-
- // The PC value upon instruction reading of a short jump will point to the
- // next instruction, thus we need to compensate 2 bytes, which is the diff
- // between the patch point and the PC.
- if (IsPCRel && ThisWord == 0)
- Addendum -= 2;
-
- switch (Type) {
- // ??? what happens if it is not byte aligned
- // ??? is it even possible
- case M68kBeads::Disp8:
- Size = 8;
- Pad = 0;
- FixOffset = ThisByte + 1;
- Addendum += 1;
- break;
- case M68kBeads::Imm8:
- Size = 8;
- Pad = 8;
- FixOffset = ThisByte;
- break;
- case M68kBeads::Imm16:
- Size = 16;
- Pad = 0;
- FixOffset = ThisByte;
- break;
- case M68kBeads::Imm32:
- Size = 32;
- Pad = 0;
- FixOffset = ThisByte;
- break;
- case M68kBeads::Imm3:
- Size = 3;
- Pad = 0;
- NoExpr = true;
- break;
- }
-
- LLVM_DEBUG(dbgs() << "\tEncodeImm"
- << " Op: " << Op << ", Size: " << Size << ", Alt: " << Alt
- << "\n");
-
- MCOperand MCO;
- if (M68kII::hasMultiMIOperands(MI.getOpcode(), Op)) {
-
- if (IsPCRel) {
- assert(!Alt && "You cannot use ALT operand with PCRel");
- MCO = MI.getOperand(MIOpIdx + M68k::PCRelDisp);
- } else {
- MCO = MI.getOperand(MIOpIdx + (Alt ? M68k::MemOuter : M68k::MemDisp));
+// We need special handlings for relocatable & pc-relative operands that are
+// larger than a word.
+// A M68k instruction is aligned by word (16 bits). That means, 32-bit
+// (& 64-bit) immediate values are separated into hi & lo words and placed
+// at lower & higher addresses, respectively. For immediate values that can
+// be easily expressed in TG, we explicitly rotate the word ordering like
+// this:
+// ```
+// (ascend (slice "$imm", 31, 16), (slice "$imm", 15, 0))
+// ```
+// For operands that call into encoder functions, we need to use the `swapWord`
+// function to assure the correct word ordering on LE host. Note that
+// M68kMCCodeEmitter does massage _byte_ ordering of the final encoded
+// instruction but it assumes everything aligns on word boundaries. So things
+// will go wrong if we don't take care of the _word_ ordering here.
+template <unsigned Size>
+void M68kMCCodeEmitter::encodeRelocImm(const MCInst &MI, unsigned OpIdx,
+ unsigned InsertPos, APInt &Value,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ using value_t = typename select_uint_t<Size>::type;
+ const MCOperand &MCO = MI.getOperand(OpIdx);
+ if (MCO.isImm()) {
+ Value |= swapWord<value_t>(static_cast<value_t>(MCO.getImm()));
+ } else if (MCO.isExpr()) {
+ const MCExpr *Expr = MCO.getExpr();
+
+ // Absolute address
+ int64_t Addr;
+ if (Expr->evaluateAsAbsolute(Addr)) {
+ Value |= swapWord<value_t>(static_cast<value_t>(Addr));
+ return;
}
- if (MCO.isExpr()) {
- assert(!NoExpr && "Cannot use expression here");
- const MCExpr *Expr = MCO.getExpr();
+ // Relocatable address
+ unsigned InsertByte = getBytePosition<Size>(InsertPos);
+ Fixups.push_back(MCFixup::create(InsertByte, Expr,
+ getFixupForSize(Size, /*IsPCRel=*/false),
+ MI.getLoc()));
+ }
+}
- // This only makes sense for PCRel instructions since PC points to the
- // extension word and Disp8 for example is right justified and requires
- // correction. E.g. R_68K_PC32 is calculated as S + A - P, P for Disp8
- // will be EXTENSION_WORD + 1 thus we need to have A equal to 1 to
- // compensate.
- // TODO count extension words
- if (IsPCRel && Addendum != 0) {
+template <unsigned Size>
+void M68kMCCodeEmitter::encodePCRelImm(const MCInst &MI, unsigned OpIdx,
+ unsigned InsertPos, APInt &Value,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MCO = MI.getOperand(OpIdx);
+ if (MCO.isImm()) {
+ using value_t = typename select_uint_t<Size>::type;
+ Value |= swapWord<value_t>(static_cast<value_t>(MCO.getImm()));
+ } else if (MCO.isExpr()) {
+ const MCExpr *Expr = MCO.getExpr();
+ unsigned InsertByte = getBytePosition<Size>(InsertPos);
+
+ // Special handlings for sizes smaller than a word.
+ if (Size < 16) {
+ int LabelOffset = 0;
+ if (InsertPos < 16)
+ // If the patch point is at the first word, PC is pointing at the
+ // next word.
+ LabelOffset = InsertByte - 2;
+ else if (InsertByte % 2)
+ // Otherwise the PC is pointing at the first byte of this word.
+ // So we need to consider the offset between PC and the fixup byte.
+ LabelOffset = 1;
+
+ if (LabelOffset)
Expr = MCBinaryExpr::createAdd(
- Expr, MCConstantExpr::create(Addendum, Ctx), Ctx);
- }
-
- Fixups.push_back(MCFixup::create(
- FixOffset, Expr, getFixupForSize(Size, IsPCRel), MI.getLoc()));
- // Write zeros
- return EmitConstant(0, Size, Pad, Buffer, Offset);
+ Expr, MCConstantExpr::create(LabelOffset, Ctx), Ctx);
}
- } else {
- MCO = MI.getOperand(MIOpIdx);
- if (MCO.isExpr()) {
- assert(!NoExpr && "Cannot use expression here");
- const MCExpr *Expr = MCO.getExpr();
-
- if (Addendum != 0) {
- Expr = MCBinaryExpr::createAdd(
- Expr, MCConstantExpr::create(Addendum, Ctx), Ctx);
- }
-
- Fixups.push_back(MCFixup::create(
- FixOffset, Expr, getFixupForSize(Size, IsPCRel), MI.getLoc()));
- // Write zeros
- return EmitConstant(0, Size, Pad, Buffer, Offset);
- }
+ Fixups.push_back(MCFixup::create(InsertByte, Expr,
+ getFixupForSize(Size, /*IsPCRel=*/true),
+ MI.getLoc()));
}
+}
- int64_t I = MCO.getImm();
-
- // Store 8 as 0, thus making range 1-8
- if (Type == M68kBeads::Imm3 && Alt) {
- assert(I && "Cannot encode Alt Imm3 zero value");
- I %= 8;
+void M68kMCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &Op,
+ unsigned InsertPos, APInt &Value,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // Register
+ if (Op.isReg()) {
+ unsigned RegNum = Op.getReg();
+ const auto *RI = Ctx.getRegisterInfo();
+ Value |= RI->getEncodingValue(RegNum);
+ // Setup the D/A bit
+ if (M68kII::isAddressRegister(RegNum))
+ Value |= 0b1000;
+ } else if (Op.isImm()) {
+ // Immediate
+ Value |= static_cast<uint64_t>(Op.getImm());
+ } else if (Op.isExpr()) {
+ // Absolute address
+ int64_t Addr;
+ if (!Op.getExpr()->evaluateAsAbsolute(Addr))
+ report_fatal_error("Unsupported asm expression. Only absolute address "
+ "can be placed here.");
+ Value |= static_cast<uint64_t>(Addr);
} else {
- assert(isIntN(Size, I));
+ llvm_unreachable("Unsupported operand type");
}
-
- uint64_t Imm = I;
-
- // 32 bit Imm requires HI16 first then LO16
- if (Size == 32) {
- Offset += EmitConstant((Imm >> 16) & 0xFFFF, 16, Pad, Buffer, Offset);
- EmitConstant(Imm & 0xFFFF, 16, Pad, Buffer, Offset);
- return Size;
- }
-
- return EmitConstant(Imm & ((1ULL << Size) - 1), Size, Pad, Buffer, Offset);
}
-#include "M68kGenMCCodeBeads.inc"
-
void M68kMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
unsigned Opcode = MI.getOpcode();
- const MCInstrDesc &Desc = MCII.get(Opcode);
LLVM_DEBUG(dbgs() << "EncodeInstruction: " << MCII.getName(Opcode) << "("
<< Opcode << ")\n");
- const uint8_t *Beads = getGenInstrBeads(MI);
- if (!Beads || !*Beads) {
- llvm_unreachable("*** Instruction does not have Beads defined");
- }
-
- uint64_t Buffer = 0;
- unsigned Offset = 0;
- unsigned ThisByte = 0;
-
- for (uint8_t Bead = *Beads; Bead; Bead = *++Beads) {
- // Check for control beads
- if (!(Bead & 0xF)) {
- switch (Bead >> 4) {
- case M68kBeads::Ignore:
- continue;
- }
- }
-
- switch (Bead & 0xF) {
- default:
- llvm_unreachable("Unknown Bead code");
- break;
- case M68kBeads::Bits1:
- case M68kBeads::Bits2:
- case M68kBeads::Bits3:
- case M68kBeads::Bits4:
- Offset +=
- encodeBits(ThisByte, Bead, MI, Desc, Buffer, Offset, Fixups, STI);
- break;
- case M68kBeads::DAReg:
- case M68kBeads::DA:
- case M68kBeads::DReg:
- case M68kBeads::Reg:
- Offset +=
- encodeReg(ThisByte, Bead, MI, Desc, Buffer, Offset, Fixups, STI);
- break;
- case M68kBeads::Disp8:
- case M68kBeads::Imm8:
- case M68kBeads::Imm16:
- case M68kBeads::Imm32:
- case M68kBeads::Imm3:
- Offset +=
- encodeImm(ThisByte, Bead, MI, Desc, Buffer, Offset, Fixups, STI);
- break;
- }
-
- // Since M68k is Big Endian we need to rotate each instruction word
- while (Offset / 16) {
- support::endian::write<uint16_t>(OS, Buffer, support::big);
- Buffer >>= 16;
- Offset -= 16;
- ThisByte += 2;
+ // Try using the new method first.
+ APInt EncodedInst(16, 0U);
+ APInt Scratch(16, 0U);
+ getBinaryCodeForInstr(MI, Fixups, EncodedInst, Scratch, STI);
+
+ ArrayRef<uint64_t> Data(EncodedInst.getRawData(), EncodedInst.getNumWords());
+ int64_t InstSize = EncodedInst.getBitWidth();
+ for (uint64_t Word : Data) {
+ for (int i = 0; i < 4 && InstSize > 0; ++i, InstSize -= 16) {
+ support::endian::write<uint16_t>(OS, static_cast<uint16_t>(Word),
+ support::big);
+ Word >>= 16;
}
}
-
- assert(Offset == 0 && "M68k Instructions are % 2 bytes");
- assert((ThisByte && !(ThisByte % 2)) && "M68k Instructions are % 2 bytes");
}
MCCodeEmitter *llvm::createM68kMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx) {
return new M68kMCCodeEmitter(MCII, Ctx);
}
diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.h b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.h
index aa53e13af4fc..0dc601ad876b 100644
--- a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.h
+++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.h
@@ -38,7 +38,6 @@ MCAsmBackend *createM68kAsmBackend(const Target &T, const MCSubtargetInfo &STI,
const MCTargetOptions &Options);
MCCodeEmitter *createM68kMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx);
/// Construct an M68k ELF object writer.
diff --git a/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp b/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
index 13cba8b079a9..196e492046b9 100644
--- a/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
+++ b/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
@@ -16,6 +16,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
diff --git a/llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp b/llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp
index 9bbb2938ab75..a4d63a62f6aa 100644
--- a/llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp
+++ b/llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp
@@ -14,8 +14,8 @@
#include "MSP430.h"
#include "TargetInfo/MSP430TargetInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDecoderOps.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
-#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
@@ -72,7 +72,7 @@ static const unsigned GR8DecoderTable[] = {
static DecodeStatus DecodeGR8RegisterClass(MCInst &MI, uint64_t RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 15)
return MCDisassembler::Fail;
@@ -90,7 +90,7 @@ static const unsigned GR16DecoderTable[] = {
static DecodeStatus DecodeGR16RegisterClass(MCInst &MI, uint64_t RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 15)
return MCDisassembler::Fail;
@@ -100,16 +100,16 @@ static DecodeStatus DecodeGR16RegisterClass(MCInst &MI, uint64_t RegNo,
}
static DecodeStatus DecodeCGImm(MCInst &MI, uint64_t Bits, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeMemOperand(MCInst &MI, uint64_t Bits,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
#include "MSP430GenDisassemblerTables.inc"
static DecodeStatus DecodeCGImm(MCInst &MI, uint64_t Bits, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
int64_t Imm;
switch (Bits) {
default:
@@ -127,7 +127,7 @@ static DecodeStatus DecodeCGImm(MCInst &MI, uint64_t Bits, uint64_t Address,
static DecodeStatus DecodeMemOperand(MCInst &MI, uint64_t Bits,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
unsigned Reg = Bits & 15;
unsigned Imm = Bits >> 4;
diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp
index 953916776c57..23af7d1149ed 100644
--- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp
+++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp
@@ -35,7 +35,7 @@ class MSP430AsmBackend : public MCAsmBackend {
public:
MSP430AsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI)
: MCAsmBackend(support::little), OSABI(OSABI) {}
- ~MSP430AsmBackend() override {}
+ ~MSP430AsmBackend() override = default;
void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target, MutableArrayRef<char> Data,
diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp
index bb5351af6523..aa097ccb9de6 100644
--- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp
+++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp
@@ -24,7 +24,7 @@ public:
: MCELFObjectTargetWriter(false, OSABI, ELF::EM_MSP430,
/*HasRelocationAddend*/ true) {}
- ~MSP430ELFObjectWriter() override {}
+ ~MSP430ELFObjectWriter() override = default;
protected:
unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp
index 087045ccb1df..0cdb3a595f71 100644
--- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp
+++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp
@@ -12,6 +12,7 @@
#include "MSP430MCTargetDesc.h"
#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCSectionELF.h"
@@ -42,7 +43,7 @@ MSP430TargetELFStreamer::MSP430TargetELFStreamer(MCStreamer &S,
// MSP430 EABI (slaa534.pdf, part 13).
MCSection *AttributeSection = getStreamer().getContext().getELFSection(
".MSP430.attributes", ELF::SHT_MSP430_ATTRIBUTES, 0);
- Streamer.SwitchSection(AttributeSection);
+ Streamer.switchSection(AttributeSection);
// Format version.
Streamer.emitInt8(0x41);
diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp
index cf57e87a073d..2b16c6234a51 100644
--- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp
+++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp
@@ -167,7 +167,7 @@ unsigned MSP430MCCodeEmitter::getCGImmOpValue(const MCInst &MI, unsigned Op,
const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(Op);
assert(MO.isImm() && "Expr operand expected");
-
+
int64_t Imm = MO.getImm();
switch (Imm) {
default:
@@ -200,7 +200,6 @@ unsigned MSP430MCCodeEmitter::getCCOpValue(const MCInst &MI, unsigned Op,
}
MCCodeEmitter *createMSP430MCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx) {
return new MSP430MCCodeEmitter(Ctx, MCII);
}
diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
index 02bfbe40c6bf..24b0b3298592 100644
--- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
+++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
@@ -31,7 +31,6 @@ class MCTargetStreamer;
/// Creates a machine code emitter for MSP430.
MCCodeEmitter *createMSP430MCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx);
MCAsmBackend *createMSP430MCAsmBackend(const Target &T,
diff --git a/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp b/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp
index 8eb3fbd58328..85c59d5b14b5 100644
--- a/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp
+++ b/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp
@@ -166,11 +166,11 @@ void MSP430AsmPrinter::EmitInterruptVectorSection(MachineFunction &ISR) {
MCSection *IV = OutStreamer->getContext().getELFSection(
"__interrupt_vector_" + IVIdx,
ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_EXECINSTR);
- OutStreamer->SwitchSection(IV);
+ OutStreamer->switchSection(IV);
const MCSymbol *FunctionSymbol = getSymbol(F);
OutStreamer->emitSymbolValue(FunctionSymbol, TM.getProgramPointerSize());
- OutStreamer->SwitchSection(Cur);
+ OutStreamer->switchSection(Cur);
}
bool MSP430AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
diff --git a/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index abd48dfd5139..b623730e1574 100644
--- a/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -18,7 +18,6 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
@@ -255,7 +254,7 @@ bool MSP430DAGToDAGISel::SelectAddr(SDValue N,
Base = (AM.BaseType == MSP430ISelAddressMode::FrameIndexBase)
? CurDAG->getTargetFrameIndex(
AM.Base.FrameIndex,
- getTargetLowering()->getPointerTy(CurDAG->getDataLayout()))
+ N.getValueType())
: AM.Base.Reg;
if (AM.GV)
diff --git a/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
index aebfc6b0ae2e..73ab3b52e907 100644
--- a/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -670,16 +670,17 @@ SDValue MSP430TargetLowering::LowerCCCArguments(
InVals.push_back(ArgValue);
}
} else {
- // Only arguments passed on the stack should make it here.
+ // Only arguments passed on the stack should make it here.
assert(VA.isMemLoc());
SDValue InVal;
ISD::ArgFlagsTy Flags = Ins[i].Flags;
if (Flags.isByVal()) {
+ MVT PtrVT = VA.getLocVT();
int FI = MFI.CreateFixedObject(Flags.getByValSize(),
VA.getLocMemOffset(), true);
- InVal = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
+ InVal = DAG.getFrameIndex(FI, PtrVT);
} else {
// Load the argument to a virtual register
unsigned ObjSize = VA.getLocVT().getSizeInBits()/8;
@@ -777,13 +778,14 @@ MSP430TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
if (!Reg)
llvm_unreachable("sret virtual register not created in entry block");
+ MVT PtrVT = getFrameIndexTy(DAG.getDataLayout());
SDValue Val =
- DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy(DAG.getDataLayout()));
+ DAG.getCopyFromReg(Chain, dl, Reg, PtrVT);
unsigned R12 = MSP430::R12;
Chain = DAG.getCopyToReg(Chain, dl, R12, Val, Flag);
Flag = Chain.getValue(1);
- RetOps.push_back(DAG.getRegister(R12, getPointerTy(DAG.getDataLayout())));
+ RetOps.push_back(DAG.getRegister(R12, PtrVT));
}
unsigned Opc = (CallConv == CallingConv::MSP430_INTR ?
@@ -814,7 +816,7 @@ SDValue MSP430TargetLowering::LowerCCCCallTo(
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
- auto PtrVT = getPointerTy(DAG.getDataLayout());
+ MVT PtrVT = getFrameIndexTy(DAG.getDataLayout());
Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
@@ -1010,7 +1012,7 @@ SDValue MSP430TargetLowering::LowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
- auto PtrVT = getPointerTy(DAG.getDataLayout());
+ EVT PtrVT = Op.getValueType();
// Create the TargetGlobalAddress node, folding in the constant offset.
SDValue Result = DAG.getTargetGlobalAddress(GV, SDLoc(Op), PtrVT, Offset);
@@ -1021,7 +1023,7 @@ SDValue MSP430TargetLowering::LowerExternalSymbol(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
- auto PtrVT = getPointerTy(DAG.getDataLayout());
+ EVT PtrVT = Op.getValueType();
SDValue Result = DAG.getTargetExternalSymbol(Sym, PtrVT);
return DAG.getNode(MSP430ISD::Wrapper, dl, PtrVT, Result);
@@ -1030,8 +1032,8 @@ SDValue MSP430TargetLowering::LowerExternalSymbol(SDValue Op,
SDValue MSP430TargetLowering::LowerBlockAddress(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
- auto PtrVT = getPointerTy(DAG.getDataLayout());
const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+ EVT PtrVT = Op.getValueType();
SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT);
return DAG.getNode(MSP430ISD::Wrapper, dl, PtrVT, Result);
@@ -1248,11 +1250,11 @@ MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
MSP430MachineFunctionInfo *FuncInfo = MF.getInfo<MSP430MachineFunctionInfo>();
int ReturnAddrIndex = FuncInfo->getRAIndex();
- auto PtrVT = getPointerTy(MF.getDataLayout());
+ MVT PtrVT = getFrameIndexTy(MF.getDataLayout());
if (ReturnAddrIndex == 0) {
// Set up a frame object for the return address.
- uint64_t SlotSize = MF.getDataLayout().getPointerSize();
+ uint64_t SlotSize = PtrVT.getStoreSize();
ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize, -SlotSize,
true);
FuncInfo->setRAIndex(ReturnAddrIndex);
@@ -1271,12 +1273,12 @@ SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op,
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
SDLoc dl(Op);
- auto PtrVT = getPointerTy(DAG.getDataLayout());
+ EVT PtrVT = Op.getValueType();
if (Depth > 0) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
SDValue Offset =
- DAG.getConstant(DAG.getDataLayout().getPointerSize(), dl, MVT::i16);
+ DAG.getConstant(PtrVT.getStoreSize(), dl, MVT::i16);
return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
MachinePointerInfo());
@@ -1308,7 +1310,9 @@ SDValue MSP430TargetLowering::LowerVASTART(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
MSP430MachineFunctionInfo *FuncInfo = MF.getInfo<MSP430MachineFunctionInfo>();
- auto PtrVT = getPointerTy(DAG.getDataLayout());
+
+ SDValue Ptr = Op.getOperand(1);
+ EVT PtrVT = Ptr.getValueType();
// Frame index of first vararg argument
SDValue FrameIndex =
@@ -1316,14 +1320,14 @@ SDValue MSP430TargetLowering::LowerVASTART(SDValue Op,
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
// Create a store of the frame index to the location operand
- return DAG.getStore(Op.getOperand(0), SDLoc(Op), FrameIndex, Op.getOperand(1),
+ return DAG.getStore(Op.getOperand(0), SDLoc(Op), FrameIndex, Ptr,
MachinePointerInfo(SV));
}
SDValue MSP430TargetLowering::LowerJumpTable(SDValue Op,
SelectionDAG &DAG) const {
JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
- auto PtrVT = getPointerTy(DAG.getDataLayout());
+ EVT PtrVT = Op.getValueType();
SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
return DAG.getNode(MSP430ISD::Wrapper, SDLoc(JT), PtrVT, Result);
}
diff --git a/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp b/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
index e9e26e295fd5..0646d6faebed 100644
--- a/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -197,8 +197,7 @@ bool MSP430InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
}
// If the block has any instructions after a JMP, delete them.
- while (std::next(I) != MBB.end())
- std::next(I)->eraseFromParent();
+ MBB.erase(std::next(I), MBB.end());
Cond.clear();
FBB = nullptr;
diff --git a/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp b/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp
index 1d3a6d118bd6..93b37b523a71 100644
--- a/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp
+++ b/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp
@@ -11,3 +11,10 @@
using namespace llvm;
void MSP430MachineFunctionInfo::anchor() { }
+
+MachineFunctionInfo *MSP430MachineFunctionInfo::clone(
+ BumpPtrAllocator &Allocator, MachineFunction &DestMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
+ const {
+ return DestMF.cloneInfo<MSP430MachineFunctionInfo>(*this);
+}
diff --git a/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h b/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h
index 261db9e288f5..93b388255877 100644
--- a/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h
+++ b/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h
@@ -43,6 +43,11 @@ public:
explicit MSP430MachineFunctionInfo(MachineFunction &MF)
: CalleeSavedFrameSize(0), ReturnAddrIndex(0), SRetReturnReg(0) {}
+ MachineFunctionInfo *
+ clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
+ const override;
+
unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
diff --git a/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp b/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
index a33146ce2239..6bba224aab8b 100644
--- a/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -27,9 +27,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeMSP430Target() {
}
static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
- if (!RM.hasValue())
- return Reloc::Static;
- return *RM;
+ return RM.value_or(Reloc::Static);
}
static std::string computeDataLayout(const Triple &TT, StringRef CPU,
@@ -51,7 +49,7 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T, const Triple &TT,
initAsmInfo();
}
-MSP430TargetMachine::~MSP430TargetMachine() {}
+MSP430TargetMachine::~MSP430TargetMachine() = default;
namespace {
/// MSP430 Code Generator Pass Configuration Options.
diff --git a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 736c41f8ac03..b5817d9ae700 100644
--- a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -25,6 +25,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
@@ -3412,10 +3413,10 @@ bool MipsAsmParser::expandLoadSingleImmToFPR(MCInst &Inst, SMLoc IDLoc,
const MipsMCExpr *LoExpr =
MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext());
- getStreamer().SwitchSection(ReadOnlySection);
+ getStreamer().switchSection(ReadOnlySection);
getStreamer().emitLabel(Sym, IDLoc);
getStreamer().emitInt32(ImmOp32);
- getStreamer().SwitchSection(CS);
+ getStreamer().switchSection(CS);
if (emitPartialAddress(TOut, IDLoc, Sym))
return true;
@@ -3464,11 +3465,11 @@ bool MipsAsmParser::expandLoadDoubleImmToGPR(MCInst &Inst, SMLoc IDLoc,
const MipsMCExpr *LoExpr =
MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext());
- getStreamer().SwitchSection(ReadOnlySection);
+ getStreamer().switchSection(ReadOnlySection);
getStreamer().emitLabel(Sym, IDLoc);
getStreamer().emitValueToAlignment(8);
getStreamer().emitIntValue(ImmOp64, 8);
- getStreamer().SwitchSection(CS);
+ getStreamer().switchSection(CS);
unsigned TmpReg = getATReg(IDLoc);
if (!TmpReg)
@@ -3547,11 +3548,11 @@ bool MipsAsmParser::expandLoadDoubleImmToFPR(MCInst &Inst, bool Is64FPU,
const MipsMCExpr *LoExpr =
MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext());
- getStreamer().SwitchSection(ReadOnlySection);
+ getStreamer().switchSection(ReadOnlySection);
getStreamer().emitLabel(Sym, IDLoc);
getStreamer().emitValueToAlignment(8);
getStreamer().emitIntValue(ImmOp64, 8);
- getStreamer().SwitchSection(CS);
+ getStreamer().switchSection(CS);
if (emitPartialAddress(TOut, IDLoc, Sym))
return true;
@@ -8179,7 +8180,7 @@ bool MipsAsmParser::parseRSectionDirective(StringRef Section) {
MCSection *ELFSection = getContext().getELFSection(
Section, ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
- getParser().getStreamer().SwitchSection(ELFSection);
+ getParser().getStreamer().switchSection(ELFSection);
getParser().Lex(); // Eat EndOfStatement token.
return false;
@@ -8197,7 +8198,7 @@ bool MipsAsmParser::parseSSectionDirective(StringRef Section, unsigned Type) {
MCSection *ELFSection = getContext().getELFSection(
Section, Type, ELF::SHF_WRITE | ELF::SHF_ALLOC | ELF::SHF_MIPS_GPREL);
- getParser().getStreamer().SwitchSection(ELFSection);
+ getParser().getStreamer().switchSection(ELFSection);
getParser().Lex(); // Eat EndOfStatement token.
return false;
diff --git a/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index 9a66dd77c0d3..4e40a84ecfd0 100644
--- a/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -15,8 +15,8 @@
#include "TargetInfo/MipsTargetInfo.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDecoderOps.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
-#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
@@ -79,338 +79,279 @@ public:
// Forward declare these because the autogenerated code will reference them.
// Definitions are further down.
-static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeCPU16RegsRegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeCPU16RegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeGPRMM16RegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeGPRMM16RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeGPRMM16ZeroRegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Address,
- const void *Decoder);
+static DecodeStatus
+DecodeGPRMM16ZeroRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address,
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeGPRMM16MovePRegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Address,
- const void *Decoder);
+static DecodeStatus
+DecodeGPRMM16MovePRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address,
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodePtrRegisterClass(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodePtrRegisterClass(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeDSPRRegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeDSPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeFGR32RegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeFGR32RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeCCRRegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeCCRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeFCCRegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeFCCRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeFGRCCRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeHWRegsRegisterClass(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeHWRegsRegisterClass(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeAFGR64RegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeAFGR64RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeACC64DSPRegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeACC64DSPRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeHI32DSPRegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeHI32DSPRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeLO32DSPRegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeLO32DSPRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeMSA128BRegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeMSA128BRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeMSA128HRegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeMSA128HRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeMSA128WRegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeMSA128WRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeMSA128DRegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeMSA128DRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeMSACtrlRegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeMSACtrlRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeCOP0RegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeCOP0RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeCOP2RegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeCOP2RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeBranchTarget(MCInst &Inst,
- unsigned Offset,
+static DecodeStatus DecodeBranchTarget(MCInst &Inst, unsigned Offset,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeBranchTarget1SImm16(MCInst &Inst,
- unsigned Offset,
+static DecodeStatus DecodeBranchTarget1SImm16(MCInst &Inst, unsigned Offset,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeJumpTarget(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeJumpTarget(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeBranchTarget21(MCInst &Inst,
- unsigned Offset,
+static DecodeStatus DecodeBranchTarget21(MCInst &Inst, unsigned Offset,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeBranchTarget21MM(MCInst &Inst,
- unsigned Offset,
+static DecodeStatus DecodeBranchTarget21MM(MCInst &Inst, unsigned Offset,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeBranchTarget26(MCInst &Inst,
- unsigned Offset,
+static DecodeStatus DecodeBranchTarget26(MCInst &Inst, unsigned Offset,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
// DecodeBranchTarget7MM - Decode microMIPS branch offset, which is
// shifted left by 1 bit.
-static DecodeStatus DecodeBranchTarget7MM(MCInst &Inst,
- unsigned Offset,
+static DecodeStatus DecodeBranchTarget7MM(MCInst &Inst, unsigned Offset,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
// DecodeBranchTarget10MM - Decode microMIPS branch offset, which is
// shifted left by 1 bit.
-static DecodeStatus DecodeBranchTarget10MM(MCInst &Inst,
- unsigned Offset,
+static DecodeStatus DecodeBranchTarget10MM(MCInst &Inst, unsigned Offset,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
// DecodeBranchTargetMM - Decode microMIPS branch offset, which is
// shifted left by 1 bit.
-static DecodeStatus DecodeBranchTargetMM(MCInst &Inst,
- unsigned Offset,
+static DecodeStatus DecodeBranchTargetMM(MCInst &Inst, unsigned Offset,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
// DecodeBranchTarget26MM - Decode microMIPS branch offset, which is
// shifted left by 1 bit.
-static DecodeStatus DecodeBranchTarget26MM(MCInst &Inst,
- unsigned Offset,
+static DecodeStatus DecodeBranchTarget26MM(MCInst &Inst, unsigned Offset,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
// DecodeJumpTargetMM - Decode microMIPS jump target, which is
// shifted left by 1 bit.
-static DecodeStatus DecodeJumpTargetMM(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeJumpTargetMM(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
// DecodeJumpTargetXMM - Decode microMIPS jump and link exchange target,
// which is shifted left by 2 bit.
-static DecodeStatus DecodeJumpTargetXMM(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeJumpTargetXMM(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeMem(MCInst &Inst,
- unsigned Insn,
- uint64_t Address,
- const void *Decoder);
+static DecodeStatus DecodeMem(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeMemEVA(MCInst &Inst,
- unsigned Insn,
- uint64_t Address,
- const void *Decoder);
+static DecodeStatus DecodeMemEVA(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeLoadByte15(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeLoadByte15(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeCacheOp(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeCacheeOp_CacheOpR6(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeCacheeOp_CacheOpR6(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeCacheOpMM(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeCacheOpMM(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodePrefeOpMM(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodePrefeOpMM(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeSyncI(MCInst &Inst,
- unsigned Insn,
- uint64_t Address,
- const void *Decoder);
+static DecodeStatus DecodeSyncI(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeSyncI_MM(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeSyncI_MM(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeSynciR6(MCInst &Inst,
- unsigned Insn,
- uint64_t Address,
- const void *Decoder);
+static DecodeStatus DecodeSynciR6(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeMSA128Mem(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeMemMMImm4(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeMemMMImm4(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeMemMMSPImm5Lsl2(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeMemMMSPImm5Lsl2(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeMemMMGPImm7Lsl2(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeMemMMGPImm7Lsl2(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeMemMMReglistImm4Lsl2(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeMemMMReglistImm4Lsl2(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeMemMMImm9(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeMemMMImm9(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeMemMMImm12(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeMemMMImm12(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeMemMMImm16(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeMemMMImm16(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeFMem(MCInst &Inst, unsigned Insn,
- uint64_t Address,
- const void *Decoder);
+static DecodeStatus DecodeFMem(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeFMemMMR2(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeFMem2(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeFMem3(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeFMemCop2R6(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeFMemCop2MMR6(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeSpecial3LlSc(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeSpecial3LlSc(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeAddiur2Simm7(MCInst &Inst,
- unsigned Value,
+static DecodeStatus DecodeAddiur2Simm7(MCInst &Inst, unsigned Value,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeLi16Imm(MCInst &Inst,
- unsigned Value,
+static DecodeStatus DecodeLi16Imm(MCInst &Inst, unsigned Value,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodePOOL16BEncodedField(MCInst &Inst,
- unsigned Value,
+static DecodeStatus DecodePOOL16BEncodedField(MCInst &Inst, unsigned Value,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
template <unsigned Bits, int Offset, int Scale>
static DecodeStatus DecodeUImmWithOffsetAndScale(MCInst &Inst, unsigned Value,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
template <unsigned Bits, int Offset>
static DecodeStatus DecodeUImmWithOffset(MCInst &Inst, unsigned Value,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return DecodeUImmWithOffsetAndScale<Bits, Offset, 1>(Inst, Value, Address,
Decoder);
}
@@ -418,128 +359,132 @@ static DecodeStatus DecodeUImmWithOffset(MCInst &Inst, unsigned Value,
template <unsigned Bits, int Offset = 0, int ScaleBy = 1>
static DecodeStatus DecodeSImmWithOffsetAndScale(MCInst &Inst, unsigned Value,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeInsSize(MCInst &Inst,
- unsigned Insn,
- uint64_t Address,
- const void *Decoder);
+static DecodeStatus DecodeInsSize(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeSimm19Lsl2(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeSimm18Lsl3(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeSimm9SP(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSimm9SP(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeANDI16Imm(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeSimm23Lsl2(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
/// INSVE_[BHWD] have an implicit operand that the generated decoder doesn't
/// handle.
template <typename InsnType>
static DecodeStatus DecodeINSVE_DF(MCInst &MI, InsnType insn, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
template <typename InsnType>
static DecodeStatus DecodeDAHIDATIMMR6(MCInst &MI, InsnType insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
template <typename InsnType>
static DecodeStatus DecodeDAHIDATI(MCInst &MI, InsnType insn, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
template <typename InsnType>
-static DecodeStatus
-DecodeAddiGroupBranch(MCInst &MI, InsnType insn, uint64_t Address,
- const void *Decoder);
+static DecodeStatus DecodeAddiGroupBranch(MCInst &MI, InsnType insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
template <typename InsnType>
-static DecodeStatus
-DecodePOP35GroupBranchMMR6(MCInst &MI, InsnType insn, uint64_t Address,
- const void *Decoder);
+static DecodeStatus DecodePOP35GroupBranchMMR6(MCInst &MI, InsnType insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
template <typename InsnType>
-static DecodeStatus
-DecodeDaddiGroupBranch(MCInst &MI, InsnType insn, uint64_t Address,
- const void *Decoder);
+static DecodeStatus DecodeDaddiGroupBranch(MCInst &MI, InsnType insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
template <typename InsnType>
-static DecodeStatus
-DecodePOP37GroupBranchMMR6(MCInst &MI, InsnType insn, uint64_t Address,
- const void *Decoder);
+static DecodeStatus DecodePOP37GroupBranchMMR6(MCInst &MI, InsnType insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
template <typename InsnType>
-static DecodeStatus
-DecodePOP65GroupBranchMMR6(MCInst &MI, InsnType insn, uint64_t Address,
- const void *Decoder);
+static DecodeStatus DecodePOP65GroupBranchMMR6(MCInst &MI, InsnType insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
template <typename InsnType>
-static DecodeStatus
-DecodePOP75GroupBranchMMR6(MCInst &MI, InsnType insn, uint64_t Address,
- const void *Decoder);
+static DecodeStatus DecodePOP75GroupBranchMMR6(MCInst &MI, InsnType insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
template <typename InsnType>
-static DecodeStatus
-DecodeBlezlGroupBranch(MCInst &MI, InsnType insn, uint64_t Address,
- const void *Decoder);
+static DecodeStatus DecodeBlezlGroupBranch(MCInst &MI, InsnType insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
template <typename InsnType>
-static DecodeStatus
-DecodeBgtzlGroupBranch(MCInst &MI, InsnType insn, uint64_t Address,
- const void *Decoder);
+static DecodeStatus DecodeBgtzlGroupBranch(MCInst &MI, InsnType insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
template <typename InsnType>
-static DecodeStatus
-DecodeBgtzGroupBranch(MCInst &MI, InsnType insn, uint64_t Address,
- const void *Decoder);
+static DecodeStatus DecodeBgtzGroupBranch(MCInst &MI, InsnType insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
template <typename InsnType>
-static DecodeStatus
-DecodeBlezGroupBranch(MCInst &MI, InsnType insn, uint64_t Address,
- const void *Decoder);
+static DecodeStatus DecodeBlezGroupBranch(MCInst &MI, InsnType insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
template <typename InsnType>
-static DecodeStatus
-DecodeBgtzGroupBranchMMR6(MCInst &MI, InsnType insn, uint64_t Address,
- const void *Decoder);
+static DecodeStatus DecodeBgtzGroupBranchMMR6(MCInst &MI, InsnType insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
template <typename InsnType>
-static DecodeStatus
-DecodeBlezGroupBranchMMR6(MCInst &MI, InsnType insn, uint64_t Address,
- const void *Decoder);
+static DecodeStatus DecodeBlezGroupBranchMMR6(MCInst &MI, InsnType insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
template <typename InsnType>
static DecodeStatus DecodeDINS(MCInst &MI, InsnType Insn, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
template <typename InsnType>
static DecodeStatus DecodeDEXT(MCInst &MI, InsnType Insn, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
template <typename InsnType>
static DecodeStatus DecodeCRC(MCInst &MI, InsnType Insn, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeRegListOperand16(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeMovePRegPair(MCInst &Inst, unsigned RegPair,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeMovePOperands(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static MCDisassembler *createMipsDisassembler(
const Target &T,
@@ -569,16 +514,16 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeMipsDisassembler() {
#include "MipsGenDisassemblerTables.inc"
-static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) {
- const MipsDisassembler *Dis = static_cast<const MipsDisassembler*>(D);
- const MCRegisterInfo *RegInfo = Dis->getContext().getRegisterInfo();
+static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo) {
+ const MCRegisterInfo *RegInfo = D->getContext().getRegisterInfo();
return *(RegInfo->getRegClass(RC).begin() + RegNo);
}
template <typename InsnType>
static DecodeStatus DecodeINSVE_DF(MCInst &MI, InsnType insn, uint64_t Address,
- const void *Decoder) {
- using DecodeFN = DecodeStatus (*)(MCInst &, unsigned, uint64_t, const void *);
+ const MCDisassembler *Decoder) {
+ using DecodeFN =
+ DecodeStatus (*)(MCInst &, unsigned, uint64_t, const MCDisassembler *);
// The size of the n field depends on the element size
// The register class also depends on this.
@@ -624,7 +569,8 @@ static DecodeStatus DecodeINSVE_DF(MCInst &MI, InsnType insn, uint64_t Address,
template <typename InsnType>
static DecodeStatus DecodeDAHIDATIMMR6(MCInst &MI, InsnType insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
InsnType Rs = fieldFromInstruction(insn, 16, 5);
InsnType Imm = fieldFromInstruction(insn, 0, 16);
MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR64RegClassID,
@@ -638,7 +584,7 @@ static DecodeStatus DecodeDAHIDATIMMR6(MCInst &MI, InsnType insn,
template <typename InsnType>
static DecodeStatus DecodeDAHIDATI(MCInst &MI, InsnType insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
InsnType Rs = fieldFromInstruction(insn, 21, 5);
InsnType Imm = fieldFromInstruction(insn, 0, 16);
MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR64RegClassID,
@@ -653,7 +599,7 @@ static DecodeStatus DecodeDAHIDATI(MCInst &MI, InsnType insn, uint64_t Address,
template <typename InsnType>
static DecodeStatus DecodeAddiGroupBranch(MCInst &MI, InsnType insn,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
// If we are called then we can assume that MIPS32r6/MIPS64r6 is enabled
// (otherwise we would have matched the ADDI instruction from the earlier
// ISA's instead).
@@ -692,7 +638,7 @@ static DecodeStatus DecodeAddiGroupBranch(MCInst &MI, InsnType insn,
template <typename InsnType>
static DecodeStatus DecodePOP35GroupBranchMMR6(MCInst &MI, InsnType insn,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
InsnType Rt = fieldFromInstruction(insn, 21, 5);
InsnType Rs = fieldFromInstruction(insn, 16, 5);
int64_t Imm = 0;
@@ -726,7 +672,7 @@ static DecodeStatus DecodePOP35GroupBranchMMR6(MCInst &MI, InsnType insn,
template <typename InsnType>
static DecodeStatus DecodeDaddiGroupBranch(MCInst &MI, InsnType insn,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
// If we are called then we can assume that MIPS32r6/MIPS64r6 is enabled
// (otherwise we would have matched the ADDI instruction from the earlier
// ISA's instead).
@@ -765,7 +711,7 @@ static DecodeStatus DecodeDaddiGroupBranch(MCInst &MI, InsnType insn,
template <typename InsnType>
static DecodeStatus DecodePOP37GroupBranchMMR6(MCInst &MI, InsnType insn,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
InsnType Rt = fieldFromInstruction(insn, 21, 5);
InsnType Rs = fieldFromInstruction(insn, 16, 5);
int64_t Imm = 0;
@@ -799,7 +745,7 @@ static DecodeStatus DecodePOP37GroupBranchMMR6(MCInst &MI, InsnType insn,
template <typename InsnType>
static DecodeStatus DecodePOP65GroupBranchMMR6(MCInst &MI, InsnType insn,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
// We have:
// 0b110101 ttttt sssss iiiiiiiiiiiiiiii
// Invalid if rt == 0
@@ -838,7 +784,7 @@ static DecodeStatus DecodePOP65GroupBranchMMR6(MCInst &MI, InsnType insn,
template <typename InsnType>
static DecodeStatus DecodePOP75GroupBranchMMR6(MCInst &MI, InsnType insn,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
// We have:
// 0b111101 ttttt sssss iiiiiiiiiiiiiiii
// Invalid if rt == 0
@@ -877,7 +823,7 @@ static DecodeStatus DecodePOP75GroupBranchMMR6(MCInst &MI, InsnType insn,
template <typename InsnType>
static DecodeStatus DecodeBlezlGroupBranch(MCInst &MI, InsnType insn,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
// If we are called then we can assume that MIPS32r6/MIPS64r6 is enabled
// (otherwise we would have matched the BLEZL instruction from the earlier
// ISA's instead).
@@ -920,7 +866,7 @@ static DecodeStatus DecodeBlezlGroupBranch(MCInst &MI, InsnType insn,
template <typename InsnType>
static DecodeStatus DecodeBgtzlGroupBranch(MCInst &MI, InsnType insn,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
// If we are called then we can assume that MIPS32r6/MIPS64r6 is enabled
// (otherwise we would have matched the BGTZL instruction from the earlier
// ISA's instead).
@@ -964,7 +910,7 @@ static DecodeStatus DecodeBgtzlGroupBranch(MCInst &MI, InsnType insn,
template <typename InsnType>
static DecodeStatus DecodeBgtzGroupBranch(MCInst &MI, InsnType insn,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
// If we are called then we can assume that MIPS32r6/MIPS64r6 is enabled
// (otherwise we would have matched the BGTZ instruction from the earlier
// ISA's instead).
@@ -1012,8 +958,8 @@ static DecodeStatus DecodeBgtzGroupBranch(MCInst &MI, InsnType insn,
template <typename InsnType>
static DecodeStatus DecodeBlezGroupBranch(MCInst &MI, InsnType insn,
- uint64_t Address,
- const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
// If we are called then we can assume that MIPS32r6/MIPS64r6 is enabled
// (otherwise we would have matched the BLEZL instruction from the earlier
// ISA's instead).
@@ -1056,7 +1002,7 @@ static DecodeStatus DecodeBlezGroupBranch(MCInst &MI, InsnType insn,
// for feature / behaviour parity with binutils.
template <typename InsnType>
static DecodeStatus DecodeDEXT(MCInst &MI, InsnType Insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
unsigned Msbd = fieldFromInstruction(Insn, 11, 5);
unsigned Lsb = fieldFromInstruction(Insn, 6, 5);
unsigned Size = 0;
@@ -1098,7 +1044,7 @@ static DecodeStatus DecodeDEXT(MCInst &MI, InsnType Insn, uint64_t Address,
// for feature / behaviour parity with binutils.
template <typename InsnType>
static DecodeStatus DecodeDINS(MCInst &MI, InsnType Insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
unsigned Msbd = fieldFromInstruction(Insn, 11, 5);
unsigned Lsb = fieldFromInstruction(Insn, 6, 5);
unsigned Size = 0;
@@ -1140,7 +1086,7 @@ static DecodeStatus DecodeDINS(MCInst &MI, InsnType Insn, uint64_t Address,
// Auto-generated decoder wouldn't add the third operand for CRC32*.
template <typename InsnType>
static DecodeStatus DecodeCRC(MCInst &MI, InsnType Insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
InsnType Rs = fieldFromInstruction(Insn, 21, 5);
InsnType Rt = fieldFromInstruction(Insn, 16, 5);
MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR32RegClassID,
@@ -1384,17 +1330,15 @@ DecodeStatus MipsDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
return MCDisassembler::Fail;
}
-static DecodeStatus DecodeCPU16RegsRegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus
+DecodeCPU16RegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address,
+ const MCDisassembler *Decoder) {
return MCDisassembler::Fail;
}
-static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 31)
return MCDisassembler::Fail;
@@ -1403,10 +1347,9 @@ static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeGPRMM16RegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeGPRMM16RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 7)
return MCDisassembler::Fail;
unsigned Reg = getReg(Decoder, Mips::GPRMM16RegClassID, RegNo);
@@ -1414,10 +1357,9 @@ static DecodeStatus DecodeGPRMM16RegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeGPRMM16ZeroRegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus
+DecodeGPRMM16ZeroRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address,
+ const MCDisassembler *Decoder) {
if (RegNo > 7)
return MCDisassembler::Fail;
unsigned Reg = getReg(Decoder, Mips::GPRMM16ZeroRegClassID, RegNo);
@@ -1425,10 +1367,9 @@ static DecodeStatus DecodeGPRMM16ZeroRegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeGPRMM16MovePRegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus
+DecodeGPRMM16MovePRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address,
+ const MCDisassembler *Decoder) {
if (RegNo > 7)
return MCDisassembler::Fail;
unsigned Reg = getReg(Decoder, Mips::GPRMM16MovePRegClassID, RegNo);
@@ -1436,10 +1377,9 @@ static DecodeStatus DecodeGPRMM16MovePRegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 31)
return MCDisassembler::Fail;
unsigned Reg = getReg(Decoder, Mips::GPR32RegClassID, RegNo);
@@ -1447,27 +1387,24 @@ static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodePtrRegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodePtrRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (static_cast<const MipsDisassembler *>(Decoder)->isGP64())
return DecodeGPR64RegisterClass(Inst, RegNo, Address, Decoder);
return DecodeGPR32RegisterClass(Inst, RegNo, Address, Decoder);
}
-static DecodeStatus DecodeDSPRRegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeDSPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return DecodeGPR32RegisterClass(Inst, RegNo, Address, Decoder);
}
-static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 31)
return MCDisassembler::Fail;
@@ -1476,10 +1413,9 @@ static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeFGR32RegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeFGR32RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 31)
return MCDisassembler::Fail;
@@ -1488,10 +1424,9 @@ static DecodeStatus DecodeFGR32RegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeCCRRegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeCCRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 31)
return MCDisassembler::Fail;
unsigned Reg = getReg(Decoder, Mips::CCRRegClassID, RegNo);
@@ -1499,10 +1434,9 @@ static DecodeStatus DecodeCCRRegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeFCCRegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeFCCRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 7)
return MCDisassembler::Fail;
unsigned Reg = getReg(Decoder, Mips::FCCRegClassID, RegNo);
@@ -1512,7 +1446,7 @@ static DecodeStatus DecodeFCCRegisterClass(MCInst &Inst,
static DecodeStatus DecodeFGRCCRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 31)
return MCDisassembler::Fail;
@@ -1521,10 +1455,8 @@ static DecodeStatus DecodeFGRCCRegisterClass(MCInst &Inst, unsigned RegNo,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeMem(MCInst &Inst,
- unsigned Insn,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeMem(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
int Offset = SignExtend32<16>(Insn & 0xffff);
unsigned Reg = fieldFromInstruction(Insn, 16, 5);
unsigned Base = fieldFromInstruction(Insn, 21, 5);
@@ -1543,10 +1475,8 @@ static DecodeStatus DecodeMem(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeMemEVA(MCInst &Inst,
- unsigned Insn,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeMemEVA(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
int Offset = SignExtend32<9>(Insn >> 7);
unsigned Reg = fieldFromInstruction(Insn, 16, 5);
unsigned Base = fieldFromInstruction(Insn, 21, 5);
@@ -1564,10 +1494,9 @@ static DecodeStatus DecodeMemEVA(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeLoadByte15(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeLoadByte15(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
int Offset = SignExtend32<16>(Insn & 0xffff);
unsigned Base = fieldFromInstruction(Insn, 16, 5);
unsigned Reg = fieldFromInstruction(Insn, 21, 5);
@@ -1582,10 +1511,8 @@ static DecodeStatus DecodeLoadByte15(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeCacheOp(MCInst &Inst,
- unsigned Insn,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeCacheOp(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
int Offset = SignExtend32<16>(Insn & 0xffff);
unsigned Hint = fieldFromInstruction(Insn, 16, 5);
unsigned Base = fieldFromInstruction(Insn, 21, 5);
@@ -1599,10 +1526,9 @@ static DecodeStatus DecodeCacheOp(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeCacheOpMM(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeCacheOpMM(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
int Offset = SignExtend32<12>(Insn & 0xfff);
unsigned Base = fieldFromInstruction(Insn, 16, 5);
unsigned Hint = fieldFromInstruction(Insn, 21, 5);
@@ -1616,10 +1542,9 @@ static DecodeStatus DecodeCacheOpMM(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodePrefeOpMM(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodePrefeOpMM(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
int Offset = SignExtend32<9>(Insn & 0x1ff);
unsigned Base = fieldFromInstruction(Insn, 16, 5);
unsigned Hint = fieldFromInstruction(Insn, 21, 5);
@@ -1633,10 +1558,9 @@ static DecodeStatus DecodePrefeOpMM(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeCacheeOp_CacheOpR6(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeCacheeOp_CacheOpR6(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
int Offset = SignExtend32<9>(Insn >> 7);
unsigned Hint = fieldFromInstruction(Insn, 16, 5);
unsigned Base = fieldFromInstruction(Insn, 21, 5);
@@ -1650,10 +1574,8 @@ static DecodeStatus DecodeCacheeOp_CacheOpR6(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeSyncI(MCInst &Inst,
- unsigned Insn,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeSyncI(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
int Offset = SignExtend32<16>(Insn & 0xffff);
unsigned Base = fieldFromInstruction(Insn, 21, 5);
@@ -1666,7 +1588,8 @@ static DecodeStatus DecodeSyncI(MCInst &Inst,
}
static DecodeStatus DecodeSyncI_MM(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
int Offset = SignExtend32<16>(Insn & 0xffff);
unsigned Base = fieldFromInstruction(Insn, 16, 5);
@@ -1678,10 +1601,8 @@ static DecodeStatus DecodeSyncI_MM(MCInst &Inst, unsigned Insn,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeSynciR6(MCInst &Inst,
- unsigned Insn,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeSynciR6(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
int Immediate = SignExtend32<16>(Insn & 0xffff);
unsigned Base = fieldFromInstruction(Insn, 16, 5);
@@ -1694,7 +1615,8 @@ static DecodeStatus DecodeSynciR6(MCInst &Inst,
}
static DecodeStatus DecodeMSA128Mem(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
int Offset = SignExtend32<10>(fieldFromInstruction(Insn, 16, 10));
unsigned Reg = fieldFromInstruction(Insn, 6, 5);
unsigned Base = fieldFromInstruction(Insn, 11, 5);
@@ -1739,10 +1661,9 @@ static DecodeStatus DecodeMSA128Mem(MCInst &Inst, unsigned Insn,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeMemMMImm4(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeMemMMImm4(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
unsigned Offset = Insn & 0xf;
unsigned Reg = fieldFromInstruction(Insn, 7, 3);
unsigned Base = fieldFromInstruction(Insn, 4, 3);
@@ -1797,10 +1718,9 @@ static DecodeStatus DecodeMemMMImm4(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeMemMMSPImm5Lsl2(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeMemMMSPImm5Lsl2(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
unsigned Offset = Insn & 0x1F;
unsigned Reg = fieldFromInstruction(Insn, 5, 5);
@@ -1813,10 +1733,9 @@ static DecodeStatus DecodeMemMMSPImm5Lsl2(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeMemMMGPImm7Lsl2(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeMemMMGPImm7Lsl2(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
unsigned Offset = Insn & 0x7F;
unsigned Reg = fieldFromInstruction(Insn, 7, 3);
@@ -1829,10 +1748,9 @@ static DecodeStatus DecodeMemMMGPImm7Lsl2(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeMemMMReglistImm4Lsl2(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeMemMMReglistImm4Lsl2(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
int Offset;
switch (Inst.getOpcode()) {
case Mips::LWM16_MMR6:
@@ -1854,10 +1772,9 @@ static DecodeStatus DecodeMemMMReglistImm4Lsl2(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeMemMMImm9(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeMemMMImm9(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
int Offset = SignExtend32<9>(Insn & 0x1ff);
unsigned Reg = fieldFromInstruction(Insn, 21, 5);
unsigned Base = fieldFromInstruction(Insn, 16, 5);
@@ -1875,10 +1792,9 @@ static DecodeStatus DecodeMemMMImm9(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeMemMMImm12(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeMemMMImm12(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
int Offset = SignExtend32<12>(Insn & 0x0fff);
unsigned Reg = fieldFromInstruction(Insn, 21, 5);
unsigned Base = fieldFromInstruction(Insn, 16, 5);
@@ -1910,10 +1826,9 @@ static DecodeStatus DecodeMemMMImm12(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeMemMMImm16(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeMemMMImm16(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
int Offset = SignExtend32<16>(Insn & 0xffff);
unsigned Reg = fieldFromInstruction(Insn, 21, 5);
unsigned Base = fieldFromInstruction(Insn, 16, 5);
@@ -1928,10 +1843,8 @@ static DecodeStatus DecodeMemMMImm16(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeFMem(MCInst &Inst,
- unsigned Insn,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeFMem(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
int Offset = SignExtend32<16>(Insn & 0xffff);
unsigned Reg = fieldFromInstruction(Insn, 16, 5);
unsigned Base = fieldFromInstruction(Insn, 21, 5);
@@ -1947,7 +1860,8 @@ static DecodeStatus DecodeFMem(MCInst &Inst,
}
static DecodeStatus DecodeFMemMMR2(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
// This function is the same as DecodeFMem but with the Reg and Base fields
// swapped according to microMIPS spec.
int Offset = SignExtend32<16>(Insn & 0xffff);
@@ -1964,10 +1878,8 @@ static DecodeStatus DecodeFMemMMR2(MCInst &Inst, unsigned Insn,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeFMem2(MCInst &Inst,
- unsigned Insn,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeFMem2(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
int Offset = SignExtend32<16>(Insn & 0xffff);
unsigned Reg = fieldFromInstruction(Insn, 16, 5);
unsigned Base = fieldFromInstruction(Insn, 21, 5);
@@ -1982,10 +1894,8 @@ static DecodeStatus DecodeFMem2(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeFMem3(MCInst &Inst,
- unsigned Insn,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeFMem3(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
int Offset = SignExtend32<16>(Insn & 0xffff);
unsigned Reg = fieldFromInstruction(Insn, 16, 5);
unsigned Base = fieldFromInstruction(Insn, 21, 5);
@@ -2000,10 +1910,9 @@ static DecodeStatus DecodeFMem3(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeFMemCop2R6(MCInst &Inst,
- unsigned Insn,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeFMemCop2R6(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
int Offset = SignExtend32<11>(Insn & 0x07ff);
unsigned Reg = fieldFromInstruction(Insn, 16, 5);
unsigned Base = fieldFromInstruction(Insn, 11, 5);
@@ -2019,7 +1928,8 @@ static DecodeStatus DecodeFMemCop2R6(MCInst &Inst,
}
static DecodeStatus DecodeFMemCop2MMR6(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
int Offset = SignExtend32<11>(Insn & 0x07ff);
unsigned Reg = fieldFromInstruction(Insn, 21, 5);
unsigned Base = fieldFromInstruction(Insn, 16, 5);
@@ -2034,10 +1944,9 @@ static DecodeStatus DecodeFMemCop2MMR6(MCInst &Inst, unsigned Insn,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeSpecial3LlSc(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeSpecial3LlSc(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
int64_t Offset = SignExtend64<9>((Insn >> 7) & 0x1ff);
unsigned Rt = fieldFromInstruction(Insn, 16, 5);
unsigned Base = fieldFromInstruction(Insn, 21, 5);
@@ -2056,10 +1965,9 @@ static DecodeStatus DecodeSpecial3LlSc(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeHWRegsRegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeHWRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
// Currently only hardware register 29 is supported.
if (RegNo != 29)
return MCDisassembler::Fail;
@@ -2067,10 +1975,9 @@ static DecodeStatus DecodeHWRegsRegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeAFGR64RegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeAFGR64RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 30 || RegNo %2)
return MCDisassembler::Fail;
@@ -2079,10 +1986,9 @@ static DecodeStatus DecodeAFGR64RegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeACC64DSPRegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeACC64DSPRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo >= 4)
return MCDisassembler::Fail;
@@ -2091,10 +1997,9 @@ static DecodeStatus DecodeACC64DSPRegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeHI32DSPRegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeHI32DSPRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo >= 4)
return MCDisassembler::Fail;
@@ -2103,10 +2008,9 @@ static DecodeStatus DecodeHI32DSPRegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeLO32DSPRegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeLO32DSPRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo >= 4)
return MCDisassembler::Fail;
@@ -2115,10 +2019,9 @@ static DecodeStatus DecodeLO32DSPRegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeMSA128BRegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeMSA128BRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 31)
return MCDisassembler::Fail;
@@ -2127,10 +2030,9 @@ static DecodeStatus DecodeMSA128BRegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeMSA128HRegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeMSA128HRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 31)
return MCDisassembler::Fail;
@@ -2139,10 +2041,9 @@ static DecodeStatus DecodeMSA128HRegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeMSA128WRegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeMSA128WRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 31)
return MCDisassembler::Fail;
@@ -2151,10 +2052,9 @@ static DecodeStatus DecodeMSA128WRegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeMSA128DRegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeMSA128DRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 31)
return MCDisassembler::Fail;
@@ -2163,10 +2063,9 @@ static DecodeStatus DecodeMSA128DRegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeMSACtrlRegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeMSACtrlRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 7)
return MCDisassembler::Fail;
@@ -2175,10 +2074,9 @@ static DecodeStatus DecodeMSACtrlRegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeCOP0RegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeCOP0RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 31)
return MCDisassembler::Fail;
@@ -2187,10 +2085,9 @@ static DecodeStatus DecodeCOP0RegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeCOP2RegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeCOP2RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 31)
return MCDisassembler::Fail;
@@ -2199,122 +2096,109 @@ static DecodeStatus DecodeCOP2RegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeBranchTarget(MCInst &Inst,
- unsigned Offset,
+static DecodeStatus DecodeBranchTarget(MCInst &Inst, unsigned Offset,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
int32_t BranchOffset = (SignExtend32<16>(Offset) * 4) + 4;
Inst.addOperand(MCOperand::createImm(BranchOffset));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeBranchTarget1SImm16(MCInst &Inst,
- unsigned Offset,
+static DecodeStatus DecodeBranchTarget1SImm16(MCInst &Inst, unsigned Offset,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
int32_t BranchOffset = (SignExtend32<16>(Offset) * 2);
Inst.addOperand(MCOperand::createImm(BranchOffset));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeJumpTarget(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeJumpTarget(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
unsigned JumpOffset = fieldFromInstruction(Insn, 0, 26) << 2;
Inst.addOperand(MCOperand::createImm(JumpOffset));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeBranchTarget21(MCInst &Inst,
- unsigned Offset,
+static DecodeStatus DecodeBranchTarget21(MCInst &Inst, unsigned Offset,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
int32_t BranchOffset = SignExtend32<21>(Offset) * 4 + 4;
Inst.addOperand(MCOperand::createImm(BranchOffset));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeBranchTarget21MM(MCInst &Inst,
- unsigned Offset,
+static DecodeStatus DecodeBranchTarget21MM(MCInst &Inst, unsigned Offset,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
int32_t BranchOffset = SignExtend32<21>(Offset) * 4 + 4;
Inst.addOperand(MCOperand::createImm(BranchOffset));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeBranchTarget26(MCInst &Inst,
- unsigned Offset,
+static DecodeStatus DecodeBranchTarget26(MCInst &Inst, unsigned Offset,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
int32_t BranchOffset = SignExtend32<26>(Offset) * 4 + 4;
Inst.addOperand(MCOperand::createImm(BranchOffset));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeBranchTarget7MM(MCInst &Inst,
- unsigned Offset,
+static DecodeStatus DecodeBranchTarget7MM(MCInst &Inst, unsigned Offset,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
int32_t BranchOffset = SignExtend32<8>(Offset << 1);
Inst.addOperand(MCOperand::createImm(BranchOffset));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeBranchTarget10MM(MCInst &Inst,
- unsigned Offset,
+static DecodeStatus DecodeBranchTarget10MM(MCInst &Inst, unsigned Offset,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
int32_t BranchOffset = SignExtend32<11>(Offset << 1);
Inst.addOperand(MCOperand::createImm(BranchOffset));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeBranchTargetMM(MCInst &Inst,
- unsigned Offset,
+static DecodeStatus DecodeBranchTargetMM(MCInst &Inst, unsigned Offset,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
int32_t BranchOffset = SignExtend32<16>(Offset) * 2 + 4;
Inst.addOperand(MCOperand::createImm(BranchOffset));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeBranchTarget26MM(MCInst &Inst,
- unsigned Offset,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeBranchTarget26MM(MCInst &Inst, unsigned Offset,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
int32_t BranchOffset = SignExtend32<27>(Offset << 1);
Inst.addOperand(MCOperand::createImm(BranchOffset));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeJumpTargetMM(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeJumpTargetMM(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
unsigned JumpOffset = fieldFromInstruction(Insn, 0, 26) << 1;
Inst.addOperand(MCOperand::createImm(JumpOffset));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeJumpTargetXMM(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeJumpTargetXMM(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
unsigned JumpOffset = fieldFromInstruction(Insn, 0, 26) << 2;
Inst.addOperand(MCOperand::createImm(JumpOffset));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeAddiur2Simm7(MCInst &Inst,
- unsigned Value,
+static DecodeStatus DecodeAddiur2Simm7(MCInst &Inst, unsigned Value,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (Value == 0)
Inst.addOperand(MCOperand::createImm(1));
else if (Value == 0x7)
@@ -2324,10 +2208,9 @@ static DecodeStatus DecodeAddiur2Simm7(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeLi16Imm(MCInst &Inst,
- unsigned Value,
+static DecodeStatus DecodeLi16Imm(MCInst &Inst, unsigned Value,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (Value == 0x7F)
Inst.addOperand(MCOperand::createImm(-1));
else
@@ -2335,18 +2218,17 @@ static DecodeStatus DecodeLi16Imm(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodePOOL16BEncodedField(MCInst &Inst,
- unsigned Value,
+static DecodeStatus DecodePOOL16BEncodedField(MCInst &Inst, unsigned Value,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
Inst.addOperand(MCOperand::createImm(Value == 0x0 ? 8 : Value));
return MCDisassembler::Success;
}
template <unsigned Bits, int Offset, int Scale>
-static DecodeStatus DecodeUImmWithOffsetAndScale(MCInst &Inst, unsigned Value,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus
+DecodeUImmWithOffsetAndScale(MCInst &Inst, unsigned Value, uint64_t Address,
+ const MCDisassembler *Decoder) {
Value &= ((1 << Bits) - 1);
Value *= Scale;
Inst.addOperand(MCOperand::createImm(Value + Offset));
@@ -2354,18 +2236,16 @@ static DecodeStatus DecodeUImmWithOffsetAndScale(MCInst &Inst, unsigned Value,
}
template <unsigned Bits, int Offset, int ScaleBy>
-static DecodeStatus DecodeSImmWithOffsetAndScale(MCInst &Inst, unsigned Value,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus
+DecodeSImmWithOffsetAndScale(MCInst &Inst, unsigned Value, uint64_t Address,
+ const MCDisassembler *Decoder) {
int32_t Imm = SignExtend32<Bits>(Value) * ScaleBy;
Inst.addOperand(MCOperand::createImm(Imm + Offset));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeInsSize(MCInst &Inst,
- unsigned Insn,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeInsSize(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
// First we need to grab the pos(lsb) from MCInst.
// This function only handles the 32 bit variants of ins, as dins
// variants are handled differently.
@@ -2376,19 +2256,21 @@ static DecodeStatus DecodeInsSize(MCInst &Inst,
}
static DecodeStatus DecodeSimm19Lsl2(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
Inst.addOperand(MCOperand::createImm(SignExtend32<19>(Insn) * 4));
return MCDisassembler::Success;
}
static DecodeStatus DecodeSimm18Lsl3(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
Inst.addOperand(MCOperand::createImm(SignExtend32<18>(Insn) * 8));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeSimm9SP(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeSimm9SP(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
int32_t DecodedValue;
switch (Insn) {
case 0: DecodedValue = 256; break;
@@ -2402,7 +2284,8 @@ static DecodeStatus DecodeSimm9SP(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeANDI16Imm(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
// Insn must be >= 0, since it is unsigned that condition is always true.
assert(Insn < 16);
int32_t DecodedValues[] = {128, 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64,
@@ -2411,10 +2294,9 @@ static DecodeStatus DecodeANDI16Imm(MCInst &Inst, unsigned Insn,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeRegListOperand(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
unsigned Regs[] = {Mips::S0, Mips::S1, Mips::S2, Mips::S3, Mips::S4, Mips::S5,
Mips::S6, Mips::S7, Mips::FP};
unsigned RegNum;
@@ -2442,7 +2324,7 @@ static DecodeStatus DecodeRegListOperand(MCInst &Inst,
static DecodeStatus DecodeRegListOperand16(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
unsigned Regs[] = {Mips::S0, Mips::S1, Mips::S2, Mips::S3};
unsigned RegLst;
switch(Inst.getOpcode()) {
@@ -2465,8 +2347,8 @@ static DecodeStatus DecodeRegListOperand16(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeMovePOperands(MCInst &Inst, unsigned Insn,
- uint64_t Address,
- const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned RegPair = fieldFromInstruction(Insn, 7, 3);
if (DecodeMovePRegPair(Inst, RegPair, Address, Decoder) ==
MCDisassembler::Fail)
@@ -2491,7 +2373,8 @@ static DecodeStatus DecodeMovePOperands(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeMovePRegPair(MCInst &Inst, unsigned RegPair,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
switch (RegPair) {
default:
return MCDisassembler::Fail;
@@ -2533,15 +2416,16 @@ static DecodeStatus DecodeMovePRegPair(MCInst &Inst, unsigned RegPair,
}
static DecodeStatus DecodeSimm23Lsl2(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
Inst.addOperand(MCOperand::createImm(SignExtend32<25>(Insn << 2)));
return MCDisassembler::Success;
}
template <typename InsnType>
static DecodeStatus DecodeBgtzGroupBranchMMR6(MCInst &MI, InsnType insn,
- uint64_t Address,
- const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
// We have:
// 0b000111 ttttt sssss iiiiiiiiiiiiiiii
// Invalid if rt == 0
@@ -2589,8 +2473,8 @@ static DecodeStatus DecodeBgtzGroupBranchMMR6(MCInst &MI, InsnType insn,
template <typename InsnType>
static DecodeStatus DecodeBlezGroupBranchMMR6(MCInst &MI, InsnType insn,
- uint64_t Address,
- const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
// We have:
// 0b000110 ttttt sssss iiiiiiiiiiiiiiii
// Invalid if rt == 0
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h b/llvm/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h
index 6091ee24b04d..1a5bb64863ee 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h
@@ -9,7 +9,6 @@
#ifndef LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSABIFLAGSSECTION_H
#define LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSABIFLAGSSECTION_H
-#include "llvm/ADT/StringRef.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MipsABIFlags.h"
#include <cstdint>
@@ -17,6 +16,7 @@
namespace llvm {
class MCStreamer;
+class StringRef;
struct MipsABIFlagsSection {
// Internal representation of the fp_abi related values used in .module.
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
index 3315a8ba18d6..227947d2766e 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
@@ -9,8 +9,10 @@
#include "MipsABIInfo.h"
#include "MipsRegisterInfo.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/LowLevelTypeImpl.h"
using namespace llvm;
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index a3dbe6f84a1e..8050f9b8cae0 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -301,6 +301,15 @@ void MipsAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
}
Optional<MCFixupKind> MipsAsmBackend::getFixupKind(StringRef Name) const {
+ unsigned Type = llvm::StringSwitch<unsigned>(Name)
+ .Case("BFD_RELOC_NONE", ELF::R_MIPS_NONE)
+ .Case("BFD_RELOC_16", ELF::R_MIPS_16)
+ .Case("BFD_RELOC_32", ELF::R_MIPS_32)
+ .Case("BFD_RELOC_64", ELF::R_MIPS_64)
+ .Default(-1u);
+ if (Type != -1u)
+ return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type);
+
return StringSwitch<Optional<MCFixupKind>>(Name)
.Case("R_MIPS_NONE", FK_NONE)
.Case("R_MIPS_32", FK_Data_4)
@@ -502,6 +511,8 @@ getFixupKindInfo(MCFixupKind Kind) const {
static_assert(array_lengthof(BigEndianInfos) == Mips::NumTargetFixupKinds,
"Not all MIPS big endian fixup kinds added!");
+ if (Kind >= FirstLiteralRelocationKind)
+ return MCAsmBackend::getFixupKindInfo(FK_NONE);
if (Kind < FirstTargetFixupKind)
return MCAsmBackend::getFixupKindInfo(Kind);
@@ -534,6 +545,8 @@ bool MipsAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
bool MipsAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
const MCFixup &Fixup,
const MCValue &Target) {
+ if (Fixup.getKind() >= FirstLiteralRelocationKind)
+ return true;
const unsigned FixupKind = Fixup.getKind();
switch (FixupKind) {
default:
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index 9c317e3f8840..4990696fcfe0 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -220,6 +220,8 @@ unsigned MipsELFObjectWriter::getRelocType(MCContext &Ctx,
bool IsPCRel) const {
// Determine the type of the relocation.
unsigned Kind = Fixup.getTargetKind();
+ if (Kind >= FirstLiteralRelocationKind)
+ return Kind - FirstLiteralRelocationKind;
switch (Kind) {
case FK_NONE:
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
index e6e32ec7f27c..9843b6144343 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
@@ -90,9 +90,9 @@ void MipsELFStreamer::emitLabel(MCSymbol *Symbol, SMLoc Loc) {
Labels.push_back(Symbol);
}
-void MipsELFStreamer::SwitchSection(MCSection *Section,
+void MipsELFStreamer::switchSection(MCSection *Section,
const MCExpr *Subsection) {
- MCELFStreamer::SwitchSection(Section, Subsection);
+ MCELFStreamer::switchSection(Section, Subsection);
Labels.clear();
}
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h b/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
index f6a2c039c0c3..ac70e40d4dfe 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
@@ -50,7 +50,7 @@ public:
/// Overriding this function allows us to dismiss all labels that are
/// candidates for marking as microMIPS when .section directive is processed.
- void SwitchSection(MCSection *Section,
+ void switchSection(MCSection *Section,
const MCExpr *Subsection = nullptr) override;
/// Overriding these functions allows us to dismiss all labels that are
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp
index 3700d6309e1a..632192103d38 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp
@@ -88,29 +88,30 @@ void MipsInstPrinter::printInst(const MCInst *MI, uint64_t Address,
break;
case Mips::Save16:
O << "\tsave\t";
- printSaveRestore(MI, O);
+ printSaveRestore(MI, STI, O);
O << " # 16 bit inst\n";
return;
case Mips::SaveX16:
O << "\tsave\t";
- printSaveRestore(MI, O);
+ printSaveRestore(MI, STI, O);
O << "\n";
return;
case Mips::Restore16:
O << "\trestore\t";
- printSaveRestore(MI, O);
+ printSaveRestore(MI, STI, O);
O << " # 16 bit inst\n";
return;
case Mips::RestoreX16:
O << "\trestore\t";
- printSaveRestore(MI, O);
+ printSaveRestore(MI, STI, O);
O << "\n";
return;
}
// Try to print any aliases first.
- if (!printAliasInstr(MI, Address, O) && !printAlias(*MI, O))
- printInstruction(MI, Address, O);
+ if (!printAliasInstr(MI, Address, STI, O) &&
+ !printAlias(*MI, Address, STI, O))
+ printInstruction(MI, Address, STI, O);
printAnnotation(O, Annot);
switch (MI->getOpcode()) {
@@ -123,7 +124,7 @@ void MipsInstPrinter::printInst(const MCInst *MI, uint64_t Address,
}
void MipsInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
+ const MCSubtargetInfo &STI, raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.isReg()) {
printRegName(O, Op.getReg());
@@ -139,8 +140,42 @@ void MipsInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
Op.getExpr()->print(O, &MAI, true);
}
+void MipsInstPrinter::printJumpOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (!Op.isImm())
+ return printOperand(MI, OpNo, STI, O);
+
+ if (PrintBranchImmAsAddress)
+ O << formatHex(Op.getImm());
+ else
+ O << formatImm(Op.getImm());
+}
+
+void MipsInstPrinter::printBranchOperand(const MCInst *MI, uint64_t Address,
+ unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (!Op.isImm())
+ return printOperand(MI, OpNo, STI, O);
+
+ if (PrintBranchImmAsAddress) {
+ uint64_t Target = Address + Op.getImm();
+ if (STI.hasFeature(Mips::FeatureMips32))
+ Target &= 0xffffffff;
+ else if (STI.hasFeature(Mips::FeatureMips16))
+ Target &= 0xffff;
+ O << formatHex(Target);
+ } else {
+ O << formatImm(Op.getImm());
+ }
+}
+
template <unsigned Bits, unsigned Offset>
-void MipsInstPrinter::printUImm(const MCInst *MI, int opNum, raw_ostream &O) {
+void MipsInstPrinter::printUImm(const MCInst *MI, int opNum,
+ const MCSubtargetInfo &STI, raw_ostream &O) {
const MCOperand &MO = MI->getOperand(opNum);
if (MO.isImm()) {
uint64_t Imm = MO.getImm();
@@ -151,11 +186,12 @@ void MipsInstPrinter::printUImm(const MCInst *MI, int opNum, raw_ostream &O) {
return;
}
- printOperand(MI, opNum, O);
+ printOperand(MI, opNum, STI, O);
}
-void MipsInstPrinter::
-printMemOperand(const MCInst *MI, int opNum, raw_ostream &O) {
+void MipsInstPrinter::printMemOperand(const MCInst *MI, int opNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
// Load/Store memory operands -- imm($reg)
// If PIC target the target is loaded as the
// pattern lw $25,%call16($28)
@@ -175,24 +211,26 @@ printMemOperand(const MCInst *MI, int opNum, raw_ostream &O) {
break;
}
- printOperand(MI, opNum+1, O);
+ printOperand(MI, opNum + 1, STI, O);
O << "(";
- printOperand(MI, opNum, O);
+ printOperand(MI, opNum, STI, O);
O << ")";
}
-void MipsInstPrinter::
-printMemOperandEA(const MCInst *MI, int opNum, raw_ostream &O) {
+void MipsInstPrinter::printMemOperandEA(const MCInst *MI, int opNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
// when using stack locations for not load/store instructions
// print the same way as all normal 3 operand instructions.
- printOperand(MI, opNum, O);
+ printOperand(MI, opNum, STI, O);
O << ", ";
- printOperand(MI, opNum+1, O);
+ printOperand(MI, opNum + 1, STI, O);
}
-void MipsInstPrinter::
-printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O) {
- const MCOperand& MO = MI->getOperand(opNum);
+void MipsInstPrinter::printFCCOperand(const MCInst *MI, int opNum,
+ const MCSubtargetInfo & /* STI */,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(opNum);
O << MipsFCCToString((Mips::CondCode)MO.getImm());
}
@@ -202,82 +240,116 @@ printSHFMask(const MCInst *MI, int opNum, raw_ostream &O) {
}
bool MipsInstPrinter::printAlias(const char *Str, const MCInst &MI,
- unsigned OpNo, raw_ostream &OS) {
+ uint64_t Address, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &OS,
+ bool IsBranch) {
OS << "\t" << Str << "\t";
- printOperand(&MI, OpNo, OS);
+ if (IsBranch)
+ printBranchOperand(&MI, Address, OpNo, STI, OS);
+ else
+ printOperand(&MI, OpNo, STI, OS);
return true;
}
bool MipsInstPrinter::printAlias(const char *Str, const MCInst &MI,
- unsigned OpNo0, unsigned OpNo1,
- raw_ostream &OS) {
- printAlias(Str, MI, OpNo0, OS);
+ uint64_t Address, unsigned OpNo0,
+ unsigned OpNo1, const MCSubtargetInfo &STI,
+ raw_ostream &OS, bool IsBranch) {
+ printAlias(Str, MI, Address, OpNo0, STI, OS, IsBranch);
OS << ", ";
- printOperand(&MI, OpNo1, OS);
+ if (IsBranch)
+ printBranchOperand(&MI, Address, OpNo1, STI, OS);
+ else
+ printOperand(&MI, OpNo1, STI, OS);
return true;
}
-bool MipsInstPrinter::printAlias(const MCInst &MI, raw_ostream &OS) {
+bool MipsInstPrinter::printAlias(const MCInst &MI, uint64_t Address,
+ const MCSubtargetInfo &STI, raw_ostream &OS) {
switch (MI.getOpcode()) {
case Mips::BEQ:
case Mips::BEQ_MM:
// beq $zero, $zero, $L2 => b $L2
// beq $r0, $zero, $L2 => beqz $r0, $L2
return (isReg<Mips::ZERO>(MI, 0) && isReg<Mips::ZERO>(MI, 1) &&
- printAlias("b", MI, 2, OS)) ||
- (isReg<Mips::ZERO>(MI, 1) && printAlias("beqz", MI, 0, 2, OS));
+ printAlias("b", MI, Address, 2, STI, OS, true)) ||
+ (isReg<Mips::ZERO>(MI, 1) &&
+ printAlias("beqz", MI, Address, 0, 2, STI, OS, true));
case Mips::BEQ64:
// beq $r0, $zero, $L2 => beqz $r0, $L2
- return isReg<Mips::ZERO_64>(MI, 1) && printAlias("beqz", MI, 0, 2, OS);
+ return isReg<Mips::ZERO_64>(MI, 1) &&
+ printAlias("beqz", MI, Address, 0, 2, STI, OS, true);
case Mips::BNE:
case Mips::BNE_MM:
// bne $r0, $zero, $L2 => bnez $r0, $L2
- return isReg<Mips::ZERO>(MI, 1) && printAlias("bnez", MI, 0, 2, OS);
+ return isReg<Mips::ZERO>(MI, 1) &&
+ printAlias("bnez", MI, Address, 0, 2, STI, OS, true);
case Mips::BNE64:
// bne $r0, $zero, $L2 => bnez $r0, $L2
- return isReg<Mips::ZERO_64>(MI, 1) && printAlias("bnez", MI, 0, 2, OS);
+ return isReg<Mips::ZERO_64>(MI, 1) &&
+ printAlias("bnez", MI, Address, 0, 2, STI, OS, true);
case Mips::BGEZAL:
// bgezal $zero, $L1 => bal $L1
- return isReg<Mips::ZERO>(MI, 0) && printAlias("bal", MI, 1, OS);
+ return isReg<Mips::ZERO>(MI, 0) &&
+ printAlias("bal", MI, Address, 1, STI, OS, true);
case Mips::BC1T:
// bc1t $fcc0, $L1 => bc1t $L1
- return isReg<Mips::FCC0>(MI, 0) && printAlias("bc1t", MI, 1, OS);
+ return isReg<Mips::FCC0>(MI, 0) &&
+ printAlias("bc1t", MI, Address, 1, STI, OS, true);
case Mips::BC1F:
// bc1f $fcc0, $L1 => bc1f $L1
- return isReg<Mips::FCC0>(MI, 0) && printAlias("bc1f", MI, 1, OS);
+ return isReg<Mips::FCC0>(MI, 0) &&
+ printAlias("bc1f", MI, Address, 1, STI, OS, true);
case Mips::JALR:
+ // jalr $zero, $r1 => jr $r1
// jalr $ra, $r1 => jalr $r1
- return isReg<Mips::RA>(MI, 0) && printAlias("jalr", MI, 1, OS);
+ return (isReg<Mips::ZERO>(MI, 0) &&
+ printAlias("jr", MI, Address, 1, STI, OS)) ||
+ (isReg<Mips::RA>(MI, 0) &&
+ printAlias("jalr", MI, Address, 1, STI, OS));
case Mips::JALR64:
+ // jalr $zero, $r1 => jr $r1
// jalr $ra, $r1 => jalr $r1
- return isReg<Mips::RA_64>(MI, 0) && printAlias("jalr", MI, 1, OS);
+ return (isReg<Mips::ZERO_64>(MI, 0) &&
+ printAlias("jr", MI, Address, 1, STI, OS)) ||
+ (isReg<Mips::RA_64>(MI, 0) &&
+ printAlias("jalr", MI, Address, 1, STI, OS));
case Mips::NOR:
case Mips::NOR_MM:
case Mips::NOR_MMR6:
// nor $r0, $r1, $zero => not $r0, $r1
- return isReg<Mips::ZERO>(MI, 2) && printAlias("not", MI, 0, 1, OS);
+ return isReg<Mips::ZERO>(MI, 2) &&
+ printAlias("not", MI, Address, 0, 1, STI, OS);
case Mips::NOR64:
// nor $r0, $r1, $zero => not $r0, $r1
- return isReg<Mips::ZERO_64>(MI, 2) && printAlias("not", MI, 0, 1, OS);
+ return isReg<Mips::ZERO_64>(MI, 2) &&
+ printAlias("not", MI, Address, 0, 1, STI, OS);
case Mips::OR:
+ case Mips::ADDu:
// or $r0, $r1, $zero => move $r0, $r1
- return isReg<Mips::ZERO>(MI, 2) && printAlias("move", MI, 0, 1, OS);
- default: return false;
+ // addu $r0, $r1, $zero => move $r0, $r1
+ return isReg<Mips::ZERO>(MI, 2) &&
+ printAlias("move", MI, Address, 0, 1, STI, OS);
+ default:
+ return false;
}
}
-void MipsInstPrinter::printSaveRestore(const MCInst *MI, raw_ostream &O) {
+void MipsInstPrinter::printSaveRestore(const MCInst *MI,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
if (i != 0) O << ", ";
if (MI->getOperand(i).isReg())
printRegName(O, MI->getOperand(i).getReg());
else
- printUImm<16>(MI, i, O);
+ printUImm<16>(MI, i, STI, O);
}
}
-void MipsInstPrinter::
-printRegisterList(const MCInst *MI, int opNum, raw_ostream &O) {
+void MipsInstPrinter::printRegisterList(const MCInst *MI, int opNum,
+ const MCSubtargetInfo & /* STI */,
+ raw_ostream &O) {
// - 2 because register List is always first operand of instruction and it is
// always followed by memory operand (base + offset).
for (int i = opNum, e = MI->getNumOperands() - 2; i != e; ++i) {
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h b/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h
index 68b13bf1fcc3..d91612b15a1a 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h
@@ -80,38 +80,50 @@ public:
// Autogenerated by tblgen.
std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
- void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
+ void printInstruction(const MCInst *MI, uint64_t Address,
+ const MCSubtargetInfo &STI, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
const MCSubtargetInfo &STI, raw_ostream &O) override;
- bool printAliasInstr(const MCInst *MI, uint64_t Address, raw_ostream &OS);
+ bool printAliasInstr(const MCInst *MI, uint64_t Address,
+ const MCSubtargetInfo &STI, raw_ostream &OS);
void printCustomAliasOperand(const MCInst *MI, uint64_t Address,
unsigned OpIdx, unsigned PrintMethodIdx,
- raw_ostream &O);
+ const MCSubtargetInfo &STI, raw_ostream &O);
private:
- void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printOperand(const MCInst *MI, uint64_t /*Address*/, unsigned OpNum,
- raw_ostream &O) {
- printOperand(MI, OpNum, O);
- }
+ void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
+ void printJumpOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printBranchOperand(const MCInst *MI, uint64_t Address, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
template <unsigned Bits, unsigned Offset = 0>
- void printUImm(const MCInst *MI, int opNum, raw_ostream &O);
- void printMemOperand(const MCInst *MI, int opNum, raw_ostream &O);
- void printMemOperandEA(const MCInst *MI, int opNum, raw_ostream &O);
- void printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O);
+ void printUImm(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
+ raw_ostream &O);
+ void printMemOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
+ raw_ostream &O);
+ void printMemOperandEA(const MCInst *MI, int opNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printFCCOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
+ raw_ostream &O);
void printSHFMask(const MCInst *MI, int opNum, raw_ostream &O);
- bool printAlias(const char *Str, const MCInst &MI, unsigned OpNo,
- raw_ostream &OS);
- bool printAlias(const char *Str, const MCInst &MI, unsigned OpNo0,
- unsigned OpNo1, raw_ostream &OS);
- bool printAlias(const MCInst &MI, raw_ostream &OS);
- void printSaveRestore(const MCInst *MI, raw_ostream &O);
- void printRegisterList(const MCInst *MI, int opNum, raw_ostream &O);
+ bool printAlias(const char *Str, const MCInst &MI, uint64_t Address,
+ unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &OS,
+ bool IsBranch = false);
+ bool printAlias(const char *Str, const MCInst &MI, uint64_t Address,
+ unsigned OpNo0, unsigned OpNo1, const MCSubtargetInfo &STI,
+ raw_ostream &OS, bool IsBranch = false);
+ bool printAlias(const MCInst &MI, uint64_t Address,
+ const MCSubtargetInfo &STI, raw_ostream &OS);
+ void printSaveRestore(const MCInst *MI, const MCSubtargetInfo &STI,
+ raw_ostream &O);
+ void printRegisterList(const MCInst *MI, int opNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
};
} // end namespace llvm
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index b81ebedfb9c7..cf311337d5eb 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -42,13 +42,11 @@ using namespace llvm;
namespace llvm {
MCCodeEmitter *createMipsMCCodeEmitterEB(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx) {
return new MipsMCCodeEmitter(MCII, Ctx, false);
}
MCCodeEmitter *createMipsMCCodeEmitterEL(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx) {
return new MipsMCCodeEmitter(MCII, Ctx, true);
}
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
index b7ecb0fdca5e..8531177ee924 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
@@ -31,10 +31,8 @@ class Target;
class Triple;
MCCodeEmitter *createMipsMCCodeEmitterEB(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx);
MCCodeEmitter *createMipsMCCodeEmitterEL(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx);
MCAsmBackend *createMipsAsmBackend(const Target &T, const MCSubtargetInfo &STI,
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp
index befa883d5877..f1aa90d24023 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp
@@ -24,7 +24,7 @@ void MipsRegInfoRecord::EmitMipsOptionRecord() {
MipsTargetStreamer *MTS =
static_cast<MipsTargetStreamer *>(Streamer->getTargetStreamer());
- Streamer->PushSection();
+ Streamer->pushSection();
// We need to distinguish between N64 and the rest because at the moment
// we don't emit .Mips.options for other ELFs other than N64.
@@ -38,7 +38,7 @@ void MipsRegInfoRecord::EmitMipsOptionRecord() {
ELF::SHF_ALLOC | ELF::SHF_MIPS_NOSTRIP, 1);
MCA.registerSection(*Sec);
Sec->setAlignment(Align(8));
- Streamer->SwitchSection(Sec);
+ Streamer->switchSection(Sec);
Streamer->emitInt8(ELF::ODK_REGINFO); // kind
Streamer->emitInt8(40); // size
@@ -56,7 +56,7 @@ void MipsRegInfoRecord::EmitMipsOptionRecord() {
ELF::SHF_ALLOC, 24);
MCA.registerSection(*Sec);
Sec->setAlignment(MTS->getABI().IsN32() ? Align(8) : Align(4));
- Streamer->SwitchSection(Sec);
+ Streamer->switchSection(Sec);
Streamer->emitInt32(ri_gprmask);
Streamer->emitInt32(ri_cprmask[0]);
@@ -67,7 +67,7 @@ void MipsRegInfoRecord::EmitMipsOptionRecord() {
Streamer->emitInt32(ri_gp_value);
}
- Streamer->PopSection();
+ Streamer->popSection();
}
void MipsRegInfoRecord::SetPhysRegUsed(unsigned Reg,
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index 57cd016da4dc..caae5890fae1 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -11,17 +11,19 @@
//===----------------------------------------------------------------------===//
#include "MipsTargetStreamer.h"
-#include "MipsInstPrinter.h"
#include "MCTargetDesc/MipsABIInfo.h"
#include "MipsELFStreamer.h"
+#include "MipsInstPrinter.h"
#include "MipsMCExpr.h"
#include "MipsMCTargetDesc.h"
#include "MipsTargetObjectFile.h"
#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
@@ -38,6 +40,10 @@ static bool isMicroMips(const MCSubtargetInfo *STI) {
return STI->getFeatureBits()[Mips::FeatureMicroMips];
}
+static bool isMips32r6(const MCSubtargetInfo *STI) {
+ return STI->getFeatureBits()[Mips::FeatureMips32r6];
+}
+
MipsTargetStreamer::MipsTargetStreamer(MCStreamer &S)
: MCTargetStreamer(S), GPReg(Mips::GP), ModuleDirectiveAllowed(true) {
GPRInfoSet = FPRInfoSet = FrameInfoSet = false;
@@ -277,10 +283,18 @@ void MipsTargetStreamer::emitDSLL(unsigned DstReg, unsigned SrcReg,
void MipsTargetStreamer::emitEmptyDelaySlot(bool hasShortDelaySlot, SMLoc IDLoc,
const MCSubtargetInfo *STI) {
- if (hasShortDelaySlot)
- emitRR(Mips::MOVE16_MM, Mips::ZERO, Mips::ZERO, IDLoc, STI);
- else
- emitRRI(Mips::SLL, Mips::ZERO, Mips::ZERO, 0, IDLoc, STI);
+ // The default case of `nop` is `sll $zero, $zero, 0`.
+ unsigned Opc = Mips::SLL;
+ if (isMicroMips(STI) && hasShortDelaySlot) {
+ Opc = isMips32r6(STI) ? Mips::MOVE16_MMR6 : Mips::MOVE16_MM;
+ emitRR(Opc, Mips::ZERO, Mips::ZERO, IDLoc, STI);
+ return;
+ }
+
+ if (isMicroMips(STI))
+ Opc = isMips32r6(STI) ? Mips::SLL_MMR6 : Mips::SLL_MM;
+
+ emitRRI(Opc, Mips::ZERO, Mips::ZERO, 0, IDLoc, STI);
}
void MipsTargetStreamer::emitNop(SMLoc IDLoc, const MCSubtargetInfo *STI) {
@@ -900,8 +914,8 @@ void MipsTargetELFStreamer::finish() {
unsigned Alignment = Section.getAlignment();
if (Alignment) {
- OS.SwitchSection(&Section);
- if (Section.UseCodeAlign())
+ OS.switchSection(&Section);
+ if (Section.useCodeAlign())
OS.emitCodeAlignment(Alignment, &STI, Alignment);
else
OS.emitValueToAlignment(Alignment, 0, 1, Alignment);
@@ -1012,9 +1026,9 @@ void MipsTargetELFStreamer::emitDirectiveEnd(StringRef Name) {
MCA.registerSection(*Sec);
Sec->setAlignment(Align(4));
- OS.PushSection();
+ OS.pushSection();
- OS.SwitchSection(Sec);
+ OS.switchSection(Sec);
OS.emitValueImpl(ExprRef, 4);
@@ -1032,7 +1046,7 @@ void MipsTargetELFStreamer::emitDirectiveEnd(StringRef Name) {
// the information gathered up until this point.
GPRInfoSet = FPRInfoSet = FrameInfoSet = false;
- OS.PopSection();
+ OS.popSection();
// .end also implicitly sets the size.
MCSymbol *CurPCSym = Context.createTempSymbol();
@@ -1312,7 +1326,7 @@ void MipsTargetELFStreamer::emitMipsAbiFlags() {
".MIPS.abiflags", ELF::SHT_MIPS_ABIFLAGS, ELF::SHF_ALLOC, 24);
MCA.registerSection(*Sec);
Sec->setAlignment(Align(8));
- OS.SwitchSection(Sec);
+ OS.switchSection(Sec);
OS << ABIFlagsSection;
}
diff --git a/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td b/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
index b1a05388884b..26cc6ac4dd38 100644
--- a/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
+++ b/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
@@ -15,6 +15,7 @@ def brtarget21_mm : Operand<OtherVT> {
let OperandType = "OPERAND_PCREL";
let DecoderMethod = "DecodeBranchTarget21MM";
let ParserMatchClass = MipsJumpTargetAsmOperand;
+ let PrintMethod = "printBranchOperand";
}
def brtarget26_mm : Operand<OtherVT> {
@@ -22,6 +23,7 @@ def brtarget26_mm : Operand<OtherVT> {
let OperandType = "OPERAND_PCREL";
let DecoderMethod = "DecodeBranchTarget26MM";
let ParserMatchClass = MipsJumpTargetAsmOperand;
+ let PrintMethod = "printBranchOperand";
}
def brtargetr6 : Operand<OtherVT> {
@@ -29,6 +31,7 @@ def brtargetr6 : Operand<OtherVT> {
let OperandType = "OPERAND_PCREL";
let DecoderMethod = "DecodeBranchTargetMM";
let ParserMatchClass = MipsJumpTargetAsmOperand;
+ let PrintMethod = "printBranchOperand";
}
def brtarget_lsl2_mm : Operand<OtherVT> {
@@ -38,6 +41,7 @@ def brtarget_lsl2_mm : Operand<OtherVT> {
// set with DecodeDisambiguates
let DecoderMethod = "";
let ParserMatchClass = MipsJumpTargetAsmOperand;
+ let PrintMethod = "printBranchOperand";
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/Mips/MicroMipsInstrFPU.td b/llvm/lib/Target/Mips/MicroMipsInstrFPU.td
index eea4d7746fa6..d5fc30cef695 100644
--- a/llvm/lib/Target/Mips/MicroMipsInstrFPU.td
+++ b/llvm/lib/Target/Mips/MicroMipsInstrFPU.td
@@ -278,18 +278,32 @@ let DecoderNamespace = "MicroMips" in {
}
let DecoderNamespace = "MicroMips", DecoderMethod = "DecodeFMemMMR2" in {
- def LDC1_MM : MMRel, LW_FT<"ldc1", AFGR64Opnd, mem_mm_16, II_LDC1, load>,
- LW_FM_MM<0x2f>, ISA_MICROMIPS, FGR_32 {
+ def LDC1_MM_D32 : MMRel, LW_FT<"ldc1", AFGR64Opnd, mem_mm_16, II_LDC1, load>,
+ LW_FM_MM<0x2f>, ISA_MICROMIPS, FGR_32 {
let BaseOpcode = "LDC132";
}
- def SDC1_MM : MMRel, SW_FT<"sdc1", AFGR64Opnd, mem_mm_16, II_SDC1, store>,
- LW_FM_MM<0x2e>, ISA_MICROMIPS, FGR_32;
+ def SDC1_MM_D32 : MMRel, SW_FT<"sdc1", AFGR64Opnd, mem_mm_16, II_SDC1, store>,
+ LW_FM_MM<0x2e>, ISA_MICROMIPS, FGR_32 {
+ let BaseOpcode = "SDC164";
+ }
def LWC1_MM : MMRel, LW_FT<"lwc1", FGR32Opnd, mem_mm_16, II_LWC1, load>,
LW_FM_MM<0x27>, ISA_MICROMIPS;
def SWC1_MM : MMRel, SW_FT<"swc1", FGR32Opnd, mem_mm_16, II_SWC1, store>,
LW_FM_MM<0x26>, ISA_MICROMIPS;
}
+let DecoderNamespace = "Mips64", DecoderMethod = "DecodeFMemMMR2" in {
+ def LDC1_MM_D64 : MMRel, LW_FT<"ldc1", FGR64Opnd, mem_mm_16, II_LDC1, load>,
+ LW_FM_MM<0x2f>, ISA_MICROMIPS, FGR_64 {
+ let BaseOpcode = "LDC164";
+ }
+ def SDC1_MM_D64 : MMRel, SW_FT<"sdc1", FGR64Opnd, mem_mm_16, II_SDC1, store>,
+ LW_FM_MM<0x2e>, ISA_MICROMIPS, FGR_64 {
+ let BaseOpcode = "SDC164";
+ }
+}
+
+
multiclass C_COND_MM<string TypeStr, RegisterOperand RC, bits<2> fmt,
InstrItinClass itin> {
def C_F_#NAME#_MM : MMRel, C_COND_FT<"f", TypeStr, RC, itin>,
@@ -400,8 +414,10 @@ let AdditionalPredicates = [NoNaNsFPMath, HasMadd4,
// Patterns for loads/stores with a reg+imm operand.
let AddedComplexity = 40 in {
- def : LoadRegImmPat<LDC1_MM, f64, load>, ISA_MICROMIPS, FGR_32;
- def : StoreRegImmPat<SDC1_MM, f64>, ISA_MICROMIPS, FGR_32;
+ def : LoadRegImmPat<LDC1_MM_D32, f64, load>, ISA_MICROMIPS, FGR_32;
+ def : StoreRegImmPat<SDC1_MM_D32, f64>, ISA_MICROMIPS, FGR_32;
+ def : LoadRegImmPat<LDC1_MM_D64, f64, load>, ISA_MICROMIPS, FGR_64;
+ def : StoreRegImmPat<SDC1_MM_D64, f64>, ISA_MICROMIPS, FGR_64;
def : LoadRegImmPat<LWC1_MM, f32, load>, ISA_MICROMIPS;
def : StoreRegImmPat<SWC1_MM, f32>, ISA_MICROMIPS;
}
diff --git a/llvm/lib/Target/Mips/MicroMipsInstrInfo.td b/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
index 5f6354e19ebc..43b8eb7faf0e 100644
--- a/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
+++ b/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
@@ -163,10 +163,12 @@ def mem_mm_4sp : Operand<i32> {
def jmptarget_mm : Operand<OtherVT> {
let EncoderMethod = "getJumpTargetOpValueMM";
+ let PrintMethod = "printJumpOperand";
}
def calltarget_mm : Operand<iPTR> {
let EncoderMethod = "getJumpTargetOpValueMM";
+ let PrintMethod = "printJumpOperand";
}
def brtarget7_mm : Operand<OtherVT> {
@@ -174,6 +176,7 @@ def brtarget7_mm : Operand<OtherVT> {
let OperandType = "OPERAND_PCREL";
let DecoderMethod = "DecodeBranchTarget7MM";
let ParserMatchClass = MipsJumpTargetAsmOperand;
+ let PrintMethod = "printBranchOperand";
}
def brtarget10_mm : Operand<OtherVT> {
@@ -181,6 +184,7 @@ def brtarget10_mm : Operand<OtherVT> {
let OperandType = "OPERAND_PCREL";
let DecoderMethod = "DecodeBranchTarget10MM";
let ParserMatchClass = MipsJumpTargetAsmOperand;
+ let PrintMethod = "printBranchOperand";
}
def brtarget_mm : Operand<OtherVT> {
@@ -188,6 +192,7 @@ def brtarget_mm : Operand<OtherVT> {
let OperandType = "OPERAND_PCREL";
let DecoderMethod = "DecodeBranchTargetMM";
let ParserMatchClass = MipsJumpTargetAsmOperand;
+ let PrintMethod = "printBranchOperand";
}
def simm23_lsl2 : Operand<i32> {
diff --git a/llvm/lib/Target/Mips/MicroMipsSizeReduction.cpp b/llvm/lib/Target/Mips/MicroMipsSizeReduction.cpp
index 55d3c59cbf03..b0de8dacf691 100644
--- a/llvm/lib/Target/Mips/MicroMipsSizeReduction.cpp
+++ b/llvm/lib/Target/Mips/MicroMipsSizeReduction.cpp
@@ -774,7 +774,7 @@ bool MicroMipsSizeReduce::ReplaceInstruction(MachineInstr *MI,
bool MicroMipsSizeReduce::runOnMachineFunction(MachineFunction &MF) {
- Subtarget = &static_cast<const MipsSubtarget &>(MF.getSubtarget());
+ Subtarget = &MF.getSubtarget<MipsSubtarget>();
// TODO: Add support for the subtarget microMIPS32R6.
if (!Subtarget->inMicroMipsMode() || !Subtarget->hasMips32r2() ||
diff --git a/llvm/lib/Target/Mips/Mips.h b/llvm/lib/Target/Mips/Mips.h
index faf58545db62..12dc29bbfe85 100644
--- a/llvm/lib/Target/Mips/Mips.h
+++ b/llvm/lib/Target/Mips/Mips.h
@@ -38,6 +38,7 @@ namespace llvm {
FunctionPass *createMicroMipsSizeReducePass();
FunctionPass *createMipsExpandPseudoPass();
FunctionPass *createMipsPreLegalizeCombiner();
+ FunctionPass *createMipsPostLegalizeCombiner(bool IsOptNone);
FunctionPass *createMipsMulMulBugPass();
InstructionSelector *createMipsInstructionSelector(const MipsTargetMachine &,
@@ -48,6 +49,7 @@ namespace llvm {
void initializeMipsBranchExpansionPass(PassRegistry &);
void initializeMicroMipsSizeReducePass(PassRegistry &);
void initializeMipsPreLegalizerCombinerPass(PassRegistry&);
+ void initializeMipsPostLegalizerCombinerPass(PassRegistry &);
void initializeMipsMulMulBugFixPass(PassRegistry&);
} // end namespace llvm;
diff --git a/llvm/lib/Target/Mips/Mips.td b/llvm/lib/Target/Mips/Mips.td
index 792960332bcc..398c38e678ba 100644
--- a/llvm/lib/Target/Mips/Mips.td
+++ b/llvm/lib/Target/Mips/Mips.td
@@ -217,6 +217,7 @@ include "MipsSchedule.td"
include "MipsInstrInfo.td"
include "MipsCallingConv.td"
include "MipsRegisterBanks.td"
+include "MipsCombine.td"
// Avoid forward declaration issues.
include "MipsScheduleP5600.td"
@@ -267,8 +268,13 @@ def MipsAsmParserVariant : AsmParserVariant {
string RegisterPrefix = "$";
}
+def MipsAsmWriter : AsmWriter {
+ int PassSubtarget = 1;
+}
+
def Mips : Target {
let InstructionSet = MipsInstrInfo;
+ let AssemblyWriters = [MipsAsmWriter];
let AssemblyParsers = [MipsAsmParser];
let AssemblyParserVariants = [MipsAsmParserVariant];
let AllowRegisterRenaming = 1;
diff --git a/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
index 50147c019bfd..ce04124a7b00 100644
--- a/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
@@ -35,7 +35,7 @@ using namespace llvm;
#define DEBUG_TYPE "mips-isel"
bool Mips16DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
- Subtarget = &static_cast<const MipsSubtarget &>(MF.getSubtarget());
+ Subtarget = &MF.getSubtarget<MipsSubtarget>();
if (!Subtarget->inMips16Mode())
return false;
return MipsDAGToDAGISel::runOnMachineFunction(MF);
diff --git a/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp b/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp
index 563118dfe627..b7b1d74e66ed 100644
--- a/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp
+++ b/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp
@@ -37,7 +37,7 @@ using namespace llvm;
#define DEBUG_TYPE "mips16-registerinfo"
-Mips16RegisterInfo::Mips16RegisterInfo() {}
+Mips16RegisterInfo::Mips16RegisterInfo() = default;
bool Mips16RegisterInfo::requiresRegisterScavenging
(const MachineFunction &MF) const {
diff --git a/llvm/lib/Target/Mips/Mips32r6InstrInfo.td b/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
index 192d0013d89c..0ae946160477 100644
--- a/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
+++ b/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
@@ -39,6 +39,7 @@ def brtarget21 : Operand<OtherVT> {
let OperandType = "OPERAND_PCREL";
let DecoderMethod = "DecodeBranchTarget21";
let ParserMatchClass = MipsJumpTargetAsmOperand;
+ let PrintMethod = "printBranchOperand";
}
def brtarget26 : Operand<OtherVT> {
@@ -46,6 +47,7 @@ def brtarget26 : Operand<OtherVT> {
let OperandType = "OPERAND_PCREL";
let DecoderMethod = "DecodeBranchTarget26";
let ParserMatchClass = MipsJumpTargetAsmOperand;
+ let PrintMethod = "printBranchOperand";
}
def jmpoffset16 : Operand<OtherVT> {
diff --git a/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
index 4bd8845e9cb9..9330a791a7cc 100644
--- a/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -522,27 +522,27 @@ bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
// See if this is a generic print operand
return AsmPrinter::PrintAsmOperand(MI, OpNum, ExtraCode, O);
case 'X': // hex const int
- if ((MO.getType()) != MachineOperand::MO_Immediate)
+ if (!MO.isImm())
return true;
O << "0x" << Twine::utohexstr(MO.getImm());
return false;
case 'x': // hex const int (low 16 bits)
- if ((MO.getType()) != MachineOperand::MO_Immediate)
+ if (!MO.isImm())
return true;
O << "0x" << Twine::utohexstr(MO.getImm() & 0xffff);
return false;
case 'd': // decimal const int
- if ((MO.getType()) != MachineOperand::MO_Immediate)
+ if (!MO.isImm())
return true;
O << MO.getImm();
return false;
case 'm': // decimal const int minus 1
- if ((MO.getType()) != MachineOperand::MO_Immediate)
+ if (!MO.isImm())
return true;
O << MO.getImm() - 1;
return false;
case 'y': // exact log2
- if ((MO.getType()) != MachineOperand::MO_Immediate)
+ if (!MO.isImm())
return true;
if (!isPowerOf2_64(MO.getImm()))
return true;
@@ -550,7 +550,7 @@ bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
return false;
case 'z':
// $0 if zero, regular printing otherwise
- if (MO.getType() == MachineOperand::MO_Immediate && MO.getImm() == 0) {
+ if (MO.isImm() && MO.getImm() == 0) {
O << "$0";
return false;
}
@@ -798,7 +798,7 @@ void MipsAsmPrinter::emitStartOfAsmFile(Module &M) {
// Tell the assembler which ABI we are using
std::string SectionName = std::string(".mdebug.") + getCurrentABIString();
- OutStreamer->SwitchSection(
+ OutStreamer->switchSection(
OutContext.getELFSection(SectionName, ELF::SHT_PROGBITS, 0));
// NaN: At the moment we only support:
@@ -825,7 +825,7 @@ void MipsAsmPrinter::emitStartOfAsmFile(Module &M) {
TS.emitDirectiveModuleOddSPReg();
// Switch to the .text section.
- OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
+ OutStreamer->switchSection(getObjFileLowering().getTextSection());
}
void MipsAsmPrinter::emitInlineAsmStart() const {
@@ -841,12 +841,12 @@ void MipsAsmPrinter::emitInlineAsmStart() const {
TS.emitDirectiveSetAt();
TS.emitDirectiveSetMacro();
TS.emitDirectiveSetReorder();
- OutStreamer->AddBlankLine();
+ OutStreamer->addBlankLine();
}
void MipsAsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo,
const MCSubtargetInfo *EndInfo) const {
- OutStreamer->AddBlankLine();
+ OutStreamer->addBlankLine();
getTargetStreamer().emitDirectiveSetPop();
}
@@ -1038,14 +1038,14 @@ void MipsAsmPrinter::EmitFPCallStub(
//
// probably not necessary but we save and restore the current section state
//
- OutStreamer->PushSection();
+ OutStreamer->pushSection();
//
// .section mips16.call.fpxxxx,"ax",@progbits
//
MCSectionELF *M = OutContext.getELFSection(
".mips16.call.fp." + std::string(Symbol), ELF::SHT_PROGBITS,
ELF::SHF_ALLOC | ELF::SHF_EXECINSTR);
- OutStreamer->SwitchSection(M, nullptr);
+ OutStreamer->switchSection(M, nullptr);
//
// .align 2
//
@@ -1114,7 +1114,7 @@ void MipsAsmPrinter::EmitFPCallStub(
const MCExpr *T_min_E = MCBinaryExpr::createSub(T, E, OutContext);
OutStreamer->emitELFSize(Stub, T_min_E);
TS.emitDirectiveEnd(x);
- OutStreamer->PopSection();
+ OutStreamer->popSection();
}
void MipsAsmPrinter::emitEndOfAsmFile(Module &M) {
@@ -1130,7 +1130,7 @@ void MipsAsmPrinter::emitEndOfAsmFile(Module &M) {
EmitFPCallStub(Symbol, Signature);
}
// return to the text section
- OutStreamer->SwitchSection(OutContext.getObjectFileInfo()->getTextSection());
+ OutStreamer->switchSection(OutContext.getObjectFileInfo()->getTextSection());
}
void MipsAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind) {
diff --git a/llvm/lib/Target/Mips/MipsBranchExpansion.cpp b/llvm/lib/Target/Mips/MipsBranchExpansion.cpp
index 4e9a23d077da..a4fa0792a998 100644
--- a/llvm/lib/Target/Mips/MipsBranchExpansion.cpp
+++ b/llvm/lib/Target/Mips/MipsBranchExpansion.cpp
@@ -36,7 +36,8 @@
///
/// Regarding compact branch hazard prevention:
///
-/// Hazards handled: forbidden slots for MIPSR6, FPU slots for MIPS3 and below.
+/// Hazards handled: forbidden slots for MIPSR6, FPU slots for MIPS3 and below,
+/// load delay slots for MIPS1.
///
/// A forbidden slot hazard occurs when a compact branch instruction is executed
/// and the adjacent instruction in memory is a control transfer instruction
@@ -164,6 +165,7 @@ private:
bool handleSlot(Pred Predicate, Safe SafeInSlot);
bool handleForbiddenSlot();
bool handleFPUDelaySlot();
+ bool handleLoadDelaySlot();
bool handlePossibleLongBranch();
const MipsSubtarget *STI;
@@ -532,7 +534,7 @@ void MipsBranchExpansion::expandToLongBranch(MBBInfo &I) {
}
if (hasDelaySlot) {
if (STI->isTargetNaCl()) {
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::NOP));
+ TII->insertNop(*BalTgtMBB, Pos, DL);
} else {
BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::SP)
.addReg(Mips::SP)
@@ -675,9 +677,8 @@ void MipsBranchExpansion::expandToLongBranch(MBBInfo &I) {
// nop
// $fallthrough:
//
- MIBundleBuilder(*LongBrMBB, Pos)
- .append(BuildMI(*MFp, DL, TII->get(Mips::J)).addMBB(TgtMBB))
- .append(BuildMI(*MFp, DL, TII->get(Mips::NOP)));
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::J)).addMBB(TgtMBB);
+ TII->insertNop(*LongBrMBB, Pos, DL)->bundleWithPred();
} else {
// At this point, offset where we need to branch does not fit into
// immediate field of the branch instruction and is not in the same
@@ -722,7 +723,7 @@ void MipsBranchExpansion::expandToLongBranch(MBBInfo &I) {
if (I.Br->isUnconditionalBranch()) {
// Change branch destination.
assert(I.Br->getDesc().getNumOperands() == 1);
- I.Br->RemoveOperand(0);
+ I.Br->removeOperand(0);
I.Br->addOperand(MachineOperand::CreateMBB(LongBrMBB));
} else
// Change branch destination and reverse condition.
@@ -762,13 +763,12 @@ bool MipsBranchExpansion::handleSlot(Pred Predicate, Safe SafeInSlot) {
}
if (LastInstInFunction || !SafeInSlot(*IInSlot, *I)) {
-
MachineBasicBlock::instr_iterator Iit = I->getIterator();
if (std::next(Iit) == FI->end() ||
std::next(Iit)->getOpcode() != Mips::NOP) {
Changed = true;
- MIBundleBuilder(&*I).append(
- BuildMI(*MFp, I->getDebugLoc(), TII->get(Mips::NOP)));
+ TII->insertNop(*(I->getParent()), std::next(I), I->getDebugLoc())
+ ->bundleWithPred();
NumInsertedNops++;
}
}
@@ -801,6 +801,18 @@ bool MipsBranchExpansion::handleFPUDelaySlot() {
});
}
+bool MipsBranchExpansion::handleLoadDelaySlot() {
+ // Load delay slot hazards are only for MIPS1.
+ if (STI->hasMips2())
+ return false;
+
+ return handleSlot(
+ [this](auto &I) -> bool { return TII->HasLoadDelaySlot(I); },
+ [this](auto &IInSlot, auto &I) -> bool {
+ return TII->SafeInLoadDelaySlot(IInSlot, I);
+ });
+}
+
bool MipsBranchExpansion::handlePossibleLongBranch() {
if (STI->inMips16Mode() || !STI->enableLongBranchPass())
return false;
@@ -867,7 +879,7 @@ bool MipsBranchExpansion::runOnMachineFunction(MachineFunction &MF) {
const TargetMachine &TM = MF.getTarget();
IsPIC = TM.isPositionIndependent();
ABI = static_cast<const MipsTargetMachine &>(TM).getABI();
- STI = &static_cast<const MipsSubtarget &>(MF.getSubtarget());
+ STI = &MF.getSubtarget<MipsSubtarget>();
TII = static_cast<const MipsInstrInfo *>(STI->getInstrInfo());
if (IsPIC && ABI.IsO32() &&
@@ -877,19 +889,21 @@ bool MipsBranchExpansion::runOnMachineFunction(MachineFunction &MF) {
MFp = &MF;
ForceLongBranchFirstPass = ForceLongBranch;
- // Run these two at least once
+ // Run these at least once.
bool longBranchChanged = handlePossibleLongBranch();
bool forbiddenSlotChanged = handleForbiddenSlot();
bool fpuDelaySlotChanged = handleFPUDelaySlot();
+ bool loadDelaySlotChanged = handleLoadDelaySlot();
- bool Changed =
- longBranchChanged || forbiddenSlotChanged || fpuDelaySlotChanged;
+ bool Changed = longBranchChanged || forbiddenSlotChanged ||
+ fpuDelaySlotChanged || loadDelaySlotChanged;
- // Then run them alternatively while there are changes
+ // Then run them alternatively while there are changes.
while (forbiddenSlotChanged) {
longBranchChanged = handlePossibleLongBranch();
fpuDelaySlotChanged = handleFPUDelaySlot();
- if (!longBranchChanged && !fpuDelaySlotChanged)
+ loadDelaySlotChanged = handleLoadDelaySlot();
+ if (!longBranchChanged && !fpuDelaySlotChanged && !loadDelaySlotChanged)
break;
forbiddenSlotChanged = handleForbiddenSlot();
}
diff --git a/llvm/lib/Target/Mips/MipsCallLowering.cpp b/llvm/lib/Target/Mips/MipsCallLowering.cpp
index f6ec34c7f403..3c1c2bcd7a1b 100644
--- a/llvm/lib/Target/Mips/MipsCallLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsCallLowering.cpp
@@ -18,6 +18,7 @@
#include "MipsTargetMachine.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
using namespace llvm;
@@ -540,8 +541,7 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
}
MIRBuilder.insertInstr(MIB);
if (MIB->getOpcode() == Mips::JALRPseudo) {
- const MipsSubtarget &STI =
- static_cast<const MipsSubtarget &>(MIRBuilder.getMF().getSubtarget());
+ const MipsSubtarget &STI = MIRBuilder.getMF().getSubtarget<MipsSubtarget>();
MIB.constrainAllUses(MIRBuilder.getTII(), *STI.getRegisterInfo(),
*STI.getRegBankInfo());
}
diff --git a/llvm/lib/Target/Mips/MipsCombine.td b/llvm/lib/Target/Mips/MipsCombine.td
new file mode 100644
index 000000000000..29550a15d38d
--- /dev/null
+++ b/llvm/lib/Target/Mips/MipsCombine.td
@@ -0,0 +1,15 @@
+//=- MipsCombine.td - Define Mips Combine Rules --------------*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/GlobalISel/Combine.td"
+
+def MipsPostLegalizerCombinerHelper: GICombinerHelper<
+ "MipsGenPostLegalizerCombinerHelper", []> {
+ let DisableRuleOption = "mipspostlegalizercombiner-disable-rule";
+}
+
diff --git a/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp b/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
index 1efbf5570287..0341af0caac4 100644
--- a/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
+++ b/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
@@ -436,7 +436,7 @@ bool MipsConstantIslands::runOnMachineFunction(MachineFunction &mf) {
// FIXME:
MF = &mf;
MCP = mf.getConstantPool();
- STI = &static_cast<const MipsSubtarget &>(mf.getSubtarget());
+ STI = &mf.getSubtarget<MipsSubtarget>();
LLVM_DEBUG(dbgs() << "constant island machine function "
<< "\n");
if (!STI->inMips16Mode() || !MipsSubtarget::useConstantIslands()) {
@@ -1653,8 +1653,8 @@ void MipsConstantIslands::prescanForConstants() {
I->getOperand(2).ChangeToImmediate(index);
LLVM_DEBUG(dbgs() << "constant island constant " << *I << "\n");
I->setDesc(TII->get(Mips::LwRxPcTcp16));
- I->RemoveOperand(1);
- I->RemoveOperand(1);
+ I->removeOperand(1);
+ I->removeOperand(1);
I->addOperand(MachineOperand::CreateCPI(index, 0));
I->addOperand(MachineOperand::CreateImm(4));
}
diff --git a/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp b/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
index cf6cec22308c..94053fa2eb7a 100644
--- a/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -677,7 +677,7 @@ bool MipsDelaySlotFiller::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
// Bundle the NOP to the instruction with the delay slot.
LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": could not fill delay slot for ";
I->dump());
- BuildMI(MBB, std::next(I), I->getDebugLoc(), TII->get(Mips::NOP));
+ TII->insertNop(MBB, std::next(I), I->getDebugLoc());
MIBundleBuilder(MBB, I, std::next(I, 2));
++FilledSlots;
Changed = true;
diff --git a/llvm/lib/Target/Mips/MipsExpandPseudo.cpp b/llvm/lib/Target/Mips/MipsExpandPseudo.cpp
index 31180d5a23ef..d242083f958b 100644
--- a/llvm/lib/Target/Mips/MipsExpandPseudo.cpp
+++ b/llvm/lib/Target/Mips/MipsExpandPseudo.cpp
@@ -892,7 +892,7 @@ bool MipsExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
}
bool MipsExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
- STI = &static_cast<const MipsSubtarget &>(MF.getSubtarget());
+ STI = &MF.getSubtarget<MipsSubtarget>();
TII = STI->getInstrInfo();
bool Modified = false;
diff --git a/llvm/lib/Target/Mips/MipsFastISel.cpp b/llvm/lib/Target/Mips/MipsFastISel.cpp
index 6ddfec5d0f79..c1b8af70d8b0 100644
--- a/llvm/lib/Target/Mips/MipsFastISel.cpp
+++ b/llvm/lib/Target/Mips/MipsFastISel.cpp
@@ -178,12 +178,8 @@ private:
// Emit helper routines.
bool emitCmp(unsigned DestReg, const CmpInst *CI);
- bool emitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
- unsigned Alignment = 0);
- bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
- MachineMemOperand *MMO = nullptr);
- bool emitStore(MVT VT, unsigned SrcReg, Address &Addr,
- unsigned Alignment = 0);
+ bool emitLoad(MVT VT, unsigned &ResultReg, Address &Addr);
+ bool emitStore(MVT VT, unsigned SrcReg, Address &Addr);
unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
bool emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, unsigned DestReg,
@@ -753,8 +749,7 @@ bool MipsFastISel::emitCmp(unsigned ResultReg, const CmpInst *CI) {
return true;
}
-bool MipsFastISel::emitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
- unsigned Alignment) {
+bool MipsFastISel::emitLoad(MVT VT, unsigned &ResultReg, Address &Addr) {
//
// more cases will be handled here in following patches.
//
@@ -808,8 +803,7 @@ bool MipsFastISel::emitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
return false;
}
-bool MipsFastISel::emitStore(MVT VT, unsigned SrcReg, Address &Addr,
- unsigned Alignment) {
+bool MipsFastISel::emitStore(MVT VT, unsigned SrcReg, Address &Addr) {
//
// more cases will be handled here in following patches.
//
@@ -902,7 +896,7 @@ bool MipsFastISel::selectLoad(const Instruction *I) {
return false;
unsigned ResultReg;
- if (!emitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment()))
+ if (!emitLoad(VT, ResultReg, Addr))
return false;
updateValueMap(I, ResultReg);
return true;
@@ -931,7 +925,7 @@ bool MipsFastISel::selectStore(const Instruction *I) {
if (!computeAddress(I->getOperand(1), Addr))
return false;
- if (!emitStore(VT, SrcReg, Addr, cast<StoreInst>(I)->getAlignment()))
+ if (!emitStore(VT, SrcReg, Addr))
return false;
return true;
}
diff --git a/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp b/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
index d88696525e9e..c4bb3d90b4d5 100644
--- a/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -54,7 +54,7 @@ void MipsDAGToDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
}
bool MipsDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
- Subtarget = &static_cast<const MipsSubtarget &>(MF.getSubtarget());
+ Subtarget = &MF.getSubtarget<MipsSubtarget>();
bool Ret = SelectionDAGISel::runOnMachineFunction(MF);
processFunctionAfterISel(MF);
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp
index 0c2e129b8f1f..b98be4ae4b75 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -94,18 +94,6 @@ static const MCPhysReg Mips64DPRegs[8] = {
Mips::D16_64, Mips::D17_64, Mips::D18_64, Mips::D19_64
};
-// If I is a shifted mask, set the size (Size) and the first bit of the
-// mask (Pos), and return true.
-// For example, if I is 0x003ff800, (Pos, Size) = (11, 11).
-static bool isShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) {
- if (!isShiftedMask_64(I))
- return false;
-
- Size = countPopulation(I);
- Pos = countTrailingZeros(I);
- return true;
-}
-
// The MIPS MSA ABI passes vector arguments in the integer register set.
// The number of integer registers used is dependant on the ABI used.
MVT MipsTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
@@ -192,6 +180,7 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
case MipsISD::Ret: return "MipsISD::Ret";
case MipsISD::ERet: return "MipsISD::ERet";
case MipsISD::EH_RETURN: return "MipsISD::EH_RETURN";
+ case MipsISD::FAbs: return "MipsISD::FAbs";
case MipsISD::FMS: return "MipsISD::FMS";
case MipsISD::FPBrcond: return "MipsISD::FPBrcond";
case MipsISD::FPCmp: return "MipsISD::FPCmp";
@@ -353,15 +342,12 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
setOperationAction(ISD::SETCC, MVT::f32, Custom);
setOperationAction(ISD::SETCC, MVT::f64, Custom);
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
+ setOperationAction(ISD::FABS, MVT::f32, Custom);
+ setOperationAction(ISD::FABS, MVT::f64, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
- if (!(TM.Options.NoNaNsFPMath || Subtarget.inAbs2008Mode())) {
- setOperationAction(ISD::FABS, MVT::f32, Custom);
- setOperationAction(ISD::FABS, MVT::f64, Custom);
- }
-
if (Subtarget.isGP64bit()) {
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
@@ -494,15 +480,8 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
setOperationAction(ISD::TRAP, MVT::Other, Legal);
- setTargetDAGCombine(ISD::SDIVREM);
- setTargetDAGCombine(ISD::UDIVREM);
- setTargetDAGCombine(ISD::SELECT);
- setTargetDAGCombine(ISD::AND);
- setTargetDAGCombine(ISD::OR);
- setTargetDAGCombine(ISD::ADD);
- setTargetDAGCombine(ISD::SUB);
- setTargetDAGCombine(ISD::AssertZext);
- setTargetDAGCombine(ISD::SHL);
+ setTargetDAGCombine({ISD::SDIVREM, ISD::UDIVREM, ISD::SELECT, ISD::AND,
+ ISD::OR, ISD::ADD, ISD::SUB, ISD::AssertZext, ISD::SHL});
if (ABI.IsO32()) {
// These libcalls are not available in 32-bit.
@@ -794,14 +773,15 @@ static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
EVT ValTy = N->getValueType(0);
SDLoc DL(N);
- uint64_t Pos = 0, SMPos, SMSize;
+ uint64_t Pos = 0;
+ unsigned SMPos, SMSize;
ConstantSDNode *CN;
SDValue NewOperand;
unsigned Opc;
// Op's second operand must be a shifted mask.
if (!(CN = dyn_cast<ConstantSDNode>(Mask)) ||
- !isShiftedMask(CN->getZExtValue(), SMPos, SMSize))
+ !isShiftedMask_64(CN->getZExtValue(), SMPos, SMSize))
return SDValue();
if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
@@ -875,7 +855,7 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
SDValue And0 = N->getOperand(0), And1 = N->getOperand(1);
- uint64_t SMPos0, SMSize0, SMPos1, SMSize1;
+ unsigned SMPos0, SMSize0, SMPos1, SMSize1;
ConstantSDNode *CN, *CN1;
// See if Op's first operand matches (and $src1 , mask0).
@@ -883,7 +863,7 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
if (!(CN = dyn_cast<ConstantSDNode>(And0.getOperand(1))) ||
- !isShiftedMask(~CN->getSExtValue(), SMPos0, SMSize0))
+ !isShiftedMask_64(~CN->getSExtValue(), SMPos0, SMSize0))
return SDValue();
// See if Op's second operand matches (and (shl $src, pos), mask1).
@@ -891,7 +871,7 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
And1.getOperand(0).getOpcode() == ISD::SHL) {
if (!(CN = dyn_cast<ConstantSDNode>(And1.getOperand(1))) ||
- !isShiftedMask(CN->getZExtValue(), SMPos1, SMSize1))
+ !isShiftedMask_64(CN->getZExtValue(), SMPos1, SMSize1))
return SDValue();
// The shift masks must have the same position and size.
@@ -970,6 +950,14 @@ static SDValue performMADD_MSUBCombine(SDNode *ROOTNode, SelectionDAG &CurDAG,
ROOTNode->getOperand(1).getOpcode() != ISD::MUL)
return SDValue();
+ // In the case where we have a multiplication as the left operand of
+ // of a subtraction, we can't combine into a MipsISD::MSub node as the
+ // the instruction definition of msub(u) places the multiplication on
+ // on the right.
+ if (ROOTNode->getOpcode() == ISD::SUB &&
+ ROOTNode->getOperand(0).getOpcode() == ISD::MUL)
+ return SDValue();
+
// We don't handle vector types here.
if (ROOTNode->getValueType(0).isVector())
return SDValue();
@@ -1118,7 +1106,8 @@ static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG,
EVT ValTy = N->getValueType(0);
SDLoc DL(N);
- uint64_t Pos = 0, SMPos, SMSize;
+ uint64_t Pos = 0;
+ unsigned SMPos, SMSize;
ConstantSDNode *CN;
SDValue NewOperand;
@@ -1136,7 +1125,7 @@ static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG,
// AND's second operand must be a shifted mask.
if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
- !isShiftedMask(CN->getZExtValue(), SMPos, SMSize))
+ !isShiftedMask_64(CN->getZExtValue(), SMPos, SMSize))
return SDValue();
// Return if the shifted mask does not start at bit 0 or the sum of its size
@@ -1191,6 +1180,16 @@ bool MipsTargetLowering::isCheapToSpeculateCtlz() const {
return Subtarget.hasMips32();
}
+bool MipsTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
+ // We can use ANDI+SLTIU as a bit test. Y contains the bit position.
+ // For MIPSR2 or later, we may be able to use the `ext` instruction or its'
+ // double-word variants.
+ if (auto *C = dyn_cast<ConstantSDNode>(Y))
+ return C->getAPIntValue().ule(15);
+
+ return false;
+}
+
bool MipsTargetLowering::shouldFoldConstantShiftPairToMask(
const SDNode *N, CombineLevel Level) const {
if (N->getOperand(0).getValueType().isVector())
@@ -2421,11 +2420,14 @@ MipsTargetLowering::lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
return lowerFCOPYSIGN32(Op, DAG, Subtarget.hasExtractInsert());
}
-static SDValue lowerFABS32(SDValue Op, SelectionDAG &DAG,
- bool HasExtractInsert) {
+SDValue MipsTargetLowering::lowerFABS32(SDValue Op, SelectionDAG &DAG,
+ bool HasExtractInsert) const {
SDLoc DL(Op);
SDValue Res, Const1 = DAG.getConstant(1, DL, MVT::i32);
+ if (DAG.getTarget().Options.NoNaNsFPMath || Subtarget.inAbs2008Mode())
+ return DAG.getNode(MipsISD::FAbs, DL, Op.getValueType(), Op.getOperand(0));
+
// If operand is of type f64, extract the upper 32-bit. Otherwise, bitcast it
// to i32.
SDValue X = (Op.getValueType() == MVT::f32)
@@ -2458,11 +2460,14 @@ static SDValue lowerFABS32(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, LowX, Res);
}
-static SDValue lowerFABS64(SDValue Op, SelectionDAG &DAG,
- bool HasExtractInsert) {
+SDValue MipsTargetLowering::lowerFABS64(SDValue Op, SelectionDAG &DAG,
+ bool HasExtractInsert) const {
SDLoc DL(Op);
SDValue Res, Const1 = DAG.getConstant(1, DL, MVT::i32);
+ if (DAG.getTarget().Options.NoNaNsFPMath || Subtarget.inAbs2008Mode())
+ return DAG.getNode(MipsISD::FAbs, DL, Op.getValueType(), Op.getOperand(0));
+
// Bitcast to integer node.
SDValue X = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Op.getOperand(0));
@@ -2673,7 +2678,7 @@ SDValue MipsTargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
return Op;
// Return if load is aligned or if MemVT is neither i32 nor i64.
- if ((LD->getAlignment() >= MemVT.getSizeInBits() / 8) ||
+ if ((LD->getAlign().value() >= (MemVT.getSizeInBits() / 8)) ||
((MemVT != MVT::i32) && (MemVT != MVT::i64)))
return SDValue();
@@ -2787,7 +2792,7 @@ static SDValue lowerFP_TO_SINT_STORE(StoreSDNode *SD, SelectionDAG &DAG,
SDValue Tr = DAG.getNode(MipsISD::TruncIntFP, SDLoc(Val), FPTy,
Val.getOperand(0));
return DAG.getStore(SD->getChain(), SDLoc(SD), Tr, SD->getBasePtr(),
- SD->getPointerInfo(), SD->getAlignment(),
+ SD->getPointerInfo(), SD->getAlign(),
SD->getMemOperand()->getFlags());
}
@@ -2797,7 +2802,7 @@ SDValue MipsTargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
// Lower unaligned integer stores.
if (!Subtarget.systemSupportsUnalignedAccess() &&
- (SD->getAlignment() < MemVT.getSizeInBits() / 8) &&
+ (SD->getAlign().value() < (MemVT.getSizeInBits() / 8)) &&
((MemVT == MVT::i32) || (MemVT == MVT::i64)))
return lowerUnalignedIntStore(SD, DAG, Subtarget.isLittle());
@@ -4732,18 +4737,19 @@ MipsTargetLowering::emitPseudoD_SELECT(MachineInstr &MI,
Register
MipsTargetLowering::getRegisterByName(const char *RegName, LLT VT,
const MachineFunction &MF) const {
- // Named registers is expected to be fairly rare. For now, just support $28
- // since the linux kernel uses it.
+ // The Linux kernel uses $28 and sp.
if (Subtarget.isGP64bit()) {
Register Reg = StringSwitch<Register>(RegName)
- .Case("$28", Mips::GP_64)
- .Default(Register());
+ .Case("$28", Mips::GP_64)
+ .Case("sp", Mips::SP_64)
+ .Default(Register());
if (Reg)
return Reg;
} else {
Register Reg = StringSwitch<Register>(RegName)
- .Case("$28", Mips::GP)
- .Default(Register());
+ .Case("$28", Mips::GP)
+ .Case("sp", Mips::SP)
+ .Default(Register());
if (Reg)
return Reg;
}
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.h b/llvm/lib/Target/Mips/MipsISelLowering.h
index 3905a18895de..1f921fbe9491 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.h
+++ b/llvm/lib/Target/Mips/MipsISelLowering.h
@@ -99,6 +99,9 @@ class TargetRegisterClass;
// Floating Point Compare
FPCmp,
+ // Floating point Abs
+ FAbs,
+
// Floating point select
FSELECT,
@@ -157,7 +160,7 @@ class TargetRegisterClass;
Ins,
CIns,
- // EXTR.W instrinsic nodes.
+ // EXTR.W intrinsic nodes.
EXTP,
EXTPDP,
EXTR_S_H,
@@ -282,6 +285,7 @@ class TargetRegisterClass;
bool isCheapToSpeculateCttz() const override;
bool isCheapToSpeculateCtlz() const override;
+ bool hasBitTest(SDValue X, SDValue Y) const override;
bool shouldFoldConstantShiftPairToMask(const SDNode *N,
CombineLevel Level) const override;
@@ -540,6 +544,10 @@ class TargetRegisterClass;
SDValue lowerVAARG(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFABS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerFABS32(SDValue Op, SelectionDAG &DAG,
+ bool HasExtractInsert) const;
+ SDValue lowerFABS64(SDValue Op, SelectionDAG &DAG,
+ bool HasExtractInsert) const;
SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.cpp b/llvm/lib/Target/Mips/MipsInstrInfo.cpp
index 2bf8562895d7..5cb7a0a1804d 100644
--- a/llvm/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsInstrInfo.cpp
@@ -54,7 +54,6 @@ bool MipsInstrInfo::isZeroImm(const MachineOperand &op) const {
/// insertNoop - If data hazard condition is found insert the target nop
/// instruction.
-// FIXME: This appears to be dead code.
void MipsInstrInfo::
insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const
{
@@ -62,6 +61,19 @@ insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const
BuildMI(MBB, MI, DL, get(Mips::NOP));
}
+MachineInstrBuilder MipsInstrInfo::insertNop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ DebugLoc DL) const {
+ assert(!Subtarget.inMips16Mode() &&
+ "insertNop does not support MIPS16e mode at this time");
+ const unsigned MMOpc =
+ Subtarget.hasMips32r6() ? Mips::SLL_MMR6 : Mips::SLL_MM;
+ const unsigned Opc = Subtarget.inMicroMipsMode() ? MMOpc : Mips::SLL;
+ return BuildMI(MBB, MI, DL, get(Opc), Mips::ZERO)
+ .addReg(Mips::ZERO)
+ .addImm(0);
+}
+
MachineMemOperand *
MipsInstrInfo::GetMemOperand(MachineBasicBlock &MBB, int FI,
MachineMemOperand::Flags Flags) const {
@@ -598,6 +610,18 @@ bool MipsInstrInfo::SafeInFPUDelaySlot(const MachineInstr &MIInSlot,
return true;
}
+/// Predicate for distinguishing instructions that are hazardous in a load delay
+/// slot. Consider inline assembly as unsafe as well.
+bool MipsInstrInfo::SafeInLoadDelaySlot(const MachineInstr &MIInSlot,
+ const MachineInstr &LoadMI) const {
+ if (MIInSlot.isInlineAsm())
+ return false;
+
+ return !llvm::any_of(LoadMI.defs(), [&](const MachineOperand &Op) {
+ return Op.isReg() && MIInSlot.readsRegister(Op.getReg());
+ });
+}
+
/// Predicate for distingushing instructions that have forbidden slots.
bool MipsInstrInfo::HasForbiddenSlot(const MachineInstr &MI) const {
return (MI.getDesc().TSFlags & MipsII::HasForbiddenSlot) != 0;
@@ -622,6 +646,22 @@ bool MipsInstrInfo::HasFPUDelaySlot(const MachineInstr &MI) const {
}
}
+/// Predicate for distingushing instructions that have load delay slots.
+bool MipsInstrInfo::HasLoadDelaySlot(const MachineInstr &MI) const {
+ switch (MI.getOpcode()) {
+ case Mips::LB:
+ case Mips::LBu:
+ case Mips::LH:
+ case Mips::LHu:
+ case Mips::LW:
+ case Mips::LWR:
+ case Mips::LWL:
+ return true;
+ default:
+ return false;
+ }
+}
+
/// Return the number of bytes of code the specified instruction may be.
unsigned MipsInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
switch (MI.getOpcode()) {
@@ -695,7 +735,7 @@ MipsInstrInfo::genInstrWithNewOpc(unsigned NewOpc,
NewOpc == Mips::JIALC64) {
if (NewOpc == Mips::JIALC || NewOpc == Mips::JIALC64)
- MIB->RemoveOperand(0);
+ MIB->removeOperand(0);
for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) {
MIB.add(I->getOperand(J));
diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.h b/llvm/lib/Target/Mips/MipsInstrInfo.h
index 46c1b73d512f..8b98ad3dceea 100644
--- a/llvm/lib/Target/Mips/MipsInstrInfo.h
+++ b/llvm/lib/Target/Mips/MipsInstrInfo.h
@@ -96,16 +96,29 @@ public:
bool SafeInFPUDelaySlot(const MachineInstr &MIInSlot,
const MachineInstr &FPUMI) const;
+ /// Predicate to determine if an instruction can go in a load delay slot.
+ bool SafeInLoadDelaySlot(const MachineInstr &MIInSlot,
+ const MachineInstr &LoadMI) const;
+
/// Predicate to determine if an instruction has a forbidden slot.
bool HasForbiddenSlot(const MachineInstr &MI) const;
/// Predicate to determine if an instruction has an FPU delay slot.
bool HasFPUDelaySlot(const MachineInstr &MI) const;
+ /// Predicate to determine if an instruction has a load delay slot.
+ bool HasLoadDelaySlot(const MachineInstr &MI) const;
+
/// Insert nop instruction when hazard condition is found
void insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const override;
+ /// Insert an ISA appropriate `nop`.
+ // FIXME: Add support for MIPS16e.
+ MachineInstrBuilder insertNop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ DebugLoc DL) const;
+
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.td b/llvm/lib/Target/Mips/MipsInstrInfo.td
index 089fed9ec0bf..973f40a21dee 100644
--- a/llvm/lib/Target/Mips/MipsInstrInfo.td
+++ b/llvm/lib/Target/Mips/MipsInstrInfo.td
@@ -833,22 +833,26 @@ def MipsJumpTargetAsmOperand : AsmOperandClass {
def jmptarget : Operand<OtherVT> {
let EncoderMethod = "getJumpTargetOpValue";
let ParserMatchClass = MipsJumpTargetAsmOperand;
+ let PrintMethod = "printJumpOperand";
}
def brtarget : Operand<OtherVT> {
let EncoderMethod = "getBranchTargetOpValue";
let OperandType = "OPERAND_PCREL";
let DecoderMethod = "DecodeBranchTarget";
let ParserMatchClass = MipsJumpTargetAsmOperand;
+ let PrintMethod = "printBranchOperand";
}
def brtarget1SImm16 : Operand<OtherVT> {
let EncoderMethod = "getBranchTargetOpValue1SImm16";
let OperandType = "OPERAND_PCREL";
let DecoderMethod = "DecodeBranchTarget1SImm16";
let ParserMatchClass = MipsJumpTargetAsmOperand;
+ let PrintMethod = "printBranchOperand";
}
def calltarget : Operand<iPTR> {
let EncoderMethod = "getJumpTargetOpValue";
let ParserMatchClass = MipsJumpTargetAsmOperand;
+ let PrintMethod = "printJumpOperand";
}
def imm64: Operand<i64>;
diff --git a/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp b/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
index 588b7e85c94c..35b0fe218d8f 100644
--- a/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
@@ -13,6 +13,7 @@
#include "MipsLegalizerInfo.h"
#include "MipsTargetMachine.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/IR/IntrinsicsMips.h"
using namespace llvm;
@@ -502,8 +503,7 @@ static bool MSA2OpIntrinsicToGeneric(MachineInstr &MI, unsigned Opcode,
bool MipsLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
MachineInstr &MI) const {
MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
- const MipsSubtarget &ST =
- static_cast<const MipsSubtarget &>(MI.getMF()->getSubtarget());
+ const MipsSubtarget &ST = MI.getMF()->getSubtarget<MipsSubtarget>();
const MipsInstrInfo &TII = *ST.getInstrInfo();
const MipsRegisterInfo &TRI = *ST.getRegisterInfo();
const RegisterBankInfo &RBI = *ST.getRegBankInfo();
diff --git a/llvm/lib/Target/Mips/MipsMachineFunction.cpp b/llvm/lib/Target/Mips/MipsMachineFunction.cpp
index 411a26e42713..7d9824aaf8ec 100644
--- a/llvm/lib/Target/Mips/MipsMachineFunction.cpp
+++ b/llvm/lib/Target/Mips/MipsMachineFunction.cpp
@@ -22,6 +22,13 @@ static cl::opt<bool>
FixGlobalBaseReg("mips-fix-global-base-reg", cl::Hidden, cl::init(true),
cl::desc("Always use $gp as the global base register."));
+MachineFunctionInfo *
+MipsFunctionInfo::clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *>
+ &Src2DstMBB) const {
+ return DestMF.cloneInfo<MipsFunctionInfo>(*this);
+}
+
MipsFunctionInfo::~MipsFunctionInfo() = default;
bool MipsFunctionInfo::globalBaseRegSet() const {
@@ -29,7 +36,7 @@ bool MipsFunctionInfo::globalBaseRegSet() const {
}
static const TargetRegisterClass &getGlobalBaseRegClass(MachineFunction &MF) {
- auto &STI = static_cast<const MipsSubtarget &>(MF.getSubtarget());
+ auto &STI = MF.getSubtarget<MipsSubtarget>();
auto &TM = static_cast<const MipsTargetMachine &>(MF.getTarget());
if (STI.inMips16Mode())
diff --git a/llvm/lib/Target/Mips/MipsMachineFunction.h b/llvm/lib/Target/Mips/MipsMachineFunction.h
index 786d210e2aaa..7b17fd3ed0cd 100644
--- a/llvm/lib/Target/Mips/MipsMachineFunction.h
+++ b/llvm/lib/Target/Mips/MipsMachineFunction.h
@@ -26,6 +26,11 @@ class MipsFunctionInfo : public MachineFunctionInfo {
public:
MipsFunctionInfo(MachineFunction &MF) {}
+ MachineFunctionInfo *
+ clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
+ const override;
+
~MipsFunctionInfo() override;
unsigned getSRetReturnReg() const { return SRetReturnReg; }
diff --git a/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp b/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp
index a2b55e8bddcd..2c23d3b72dc6 100644
--- a/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp
@@ -10,8 +10,9 @@
#include "Mips.h"
#include "MipsTargetMachine.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/llvm/lib/Target/Mips/MipsOptimizePICCall.cpp b/llvm/lib/Target/Mips/MipsOptimizePICCall.cpp
index 2823d300dc6e..204c42ae5e5f 100644
--- a/llvm/lib/Target/Mips/MipsOptimizePICCall.cpp
+++ b/llvm/lib/Target/Mips/MipsOptimizePICCall.cpp
@@ -170,7 +170,7 @@ static void eraseGPOpnd(MachineInstr &MI) {
for (unsigned I = 0; I < MI.getNumOperands(); ++I) {
MachineOperand &MO = MI.getOperand(I);
if (MO.isReg() && MO.getReg() == Reg) {
- MI.RemoveOperand(I);
+ MI.removeOperand(I);
return;
}
}
@@ -194,7 +194,7 @@ void MBBInfo::postVisit() {
// OptimizePICCall methods.
bool OptimizePICCall::runOnMachineFunction(MachineFunction &F) {
- if (static_cast<const MipsSubtarget &>(F.getSubtarget()).inMips16Mode())
+ if (F.getSubtarget<MipsSubtarget>().inMips16Mode())
return false;
// Do a pre-order traversal of the dominator tree.
diff --git a/llvm/lib/Target/Mips/MipsOs16.cpp b/llvm/lib/Target/Mips/MipsOs16.cpp
index ac4e55f8a1f5..f6346a8bbc8b 100644
--- a/llvm/lib/Target/Mips/MipsOs16.cpp
+++ b/llvm/lib/Target/Mips/MipsOs16.cpp
@@ -13,6 +13,7 @@
#include "Mips.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/llvm/lib/Target/Mips/MipsPostLegalizerCombiner.cpp b/llvm/lib/Target/Mips/MipsPostLegalizerCombiner.cpp
new file mode 100644
index 000000000000..7723a10af2d7
--- /dev/null
+++ b/llvm/lib/Target/Mips/MipsPostLegalizerCombiner.cpp
@@ -0,0 +1,148 @@
+//=== lib/CodeGen/GlobalISel/MipsPostLegalizerCombiner.cpp ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass does combining of machine instructions at the generic MI level,
+// after the legalizer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/MipsMCTargetDesc.h"
+#include "Mips.h"
+#include "MipsLegalizerInfo.h"
+#include "MipsSubtarget.h"
+#include "llvm/CodeGen/GlobalISel/Combiner.h"
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define DEBUG_TYPE "mips-postlegalizer-combiner"
+
+using namespace llvm;
+using namespace MIPatternMatch;
+
+#define MIPSPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
+#include "MipsGenPostLegalizeGICombiner.inc"
+#undef MIPSPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
+
+namespace {
+#define MIPSPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
+#include "MipsGenPostLegalizeGICombiner.inc"
+#undef MIPSPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
+
+class MipsPostLegalizerCombinerInfo final : public CombinerInfo {
+ GISelKnownBits *KB;
+
+public:
+ MipsGenPostLegalizerCombinerHelperRuleConfig GeneratedRuleCfg;
+
+ MipsPostLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
+ GISelKnownBits *KB, const MipsLegalizerInfo *LI)
+ : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
+ /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize),
+ KB(KB) {
+ if (!GeneratedRuleCfg.parseCommandLineOption())
+ report_fatal_error("Invalid rule identifier");
+ }
+
+ bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
+ MachineIRBuilder &B) const override;
+};
+
+bool MipsPostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
+ MachineInstr &MI,
+ MachineIRBuilder &B) const {
+
+ CombinerHelper Helper(Observer, B, KB,
+ /*DominatorTree*/ nullptr, LInfo);
+ MipsGenPostLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper);
+ return Generated.tryCombineAll(Observer, MI, B, Helper);
+}
+
+#define MIPSPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
+#include "MipsGenPostLegalizeGICombiner.inc"
+#undef MIPSPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
+
+// Pass boilerplate
+// ================
+
+class MipsPostLegalizerCombiner : public MachineFunctionPass {
+public:
+ static char ID;
+
+ MipsPostLegalizerCombiner(bool IsOptNone = false);
+
+ StringRef getPassName() const override {
+ return "MipsPostLegalizerCombiner";
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+private:
+ bool IsOptNone;
+};
+} // end anonymous namespace
+
+void MipsPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetPassConfig>();
+ AU.setPreservesCFG();
+ getSelectionDAGFallbackAnalysisUsage(AU);
+ AU.addRequired<GISelKnownBitsAnalysis>();
+ AU.addPreserved<GISelKnownBitsAnalysis>();
+ if (!IsOptNone) {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ }
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MipsPostLegalizerCombiner::MipsPostLegalizerCombiner(bool IsOptNone)
+ : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
+ initializeMipsPostLegalizerCombinerPass(*PassRegistry::getPassRegistry());
+}
+
+bool MipsPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel))
+ return false;
+ auto *TPC = &getAnalysis<TargetPassConfig>();
+ const Function &F = MF.getFunction();
+ bool EnableOpt =
+ MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
+
+ const MipsSubtarget &ST = MF.getSubtarget<MipsSubtarget>();
+ const MipsLegalizerInfo *LI =
+ static_cast<const MipsLegalizerInfo *>(ST.getLegalizerInfo());
+
+ GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
+ MipsPostLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
+ F.hasMinSize(), KB, LI);
+ Combiner C(PCInfo, TPC);
+ return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
+}
+
+char MipsPostLegalizerCombiner::ID = 0;
+INITIALIZE_PASS_BEGIN(MipsPostLegalizerCombiner, DEBUG_TYPE,
+ "Combine Mips machine instrs after legalization", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
+INITIALIZE_PASS_END(MipsPostLegalizerCombiner, DEBUG_TYPE,
+ "Combine Mips machine instrs after legalization", false,
+ false)
+
+namespace llvm {
+FunctionPass *createMipsPostLegalizeCombiner(bool IsOptNone) {
+ return new MipsPostLegalizerCombiner(IsOptNone);
+}
+} // end namespace llvm
diff --git a/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp b/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp
index 2ad9ffe4eb77..cb6d53ec0a12 100644
--- a/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp
@@ -16,6 +16,7 @@
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/InitializePasses.h"
@@ -50,8 +51,7 @@ bool MipsPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
// Don't attempt to combine non power of 2 loads or unaligned loads when
// subtarget doesn't support them.
auto MMO = *MI.memoperands_begin();
- const MipsSubtarget &STI =
- static_cast<const MipsSubtarget &>(MI.getMF()->getSubtarget());
+ const MipsSubtarget &STI = MI.getMF()->getSubtarget<MipsSubtarget>();
if (!isPowerOf2_64(MMO->getSize()))
return false;
bool isUnaligned = MMO->getAlign() < MMO->getSize();
diff --git a/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp b/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
index 04b69c66bc0d..2544d9d9b76d 100644
--- a/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
@@ -73,8 +73,7 @@ RegisterBankInfo::ValueMapping ValueMappings[] = {
using namespace llvm;
-MipsRegisterBankInfo::MipsRegisterBankInfo(const TargetRegisterInfo &TRI)
- : MipsGenRegisterBankInfo() {}
+MipsRegisterBankInfo::MipsRegisterBankInfo(const TargetRegisterInfo &TRI) {}
const RegisterBank &
MipsRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
@@ -154,8 +153,7 @@ static bool isGprbTwoInstrUnalignedLoadOrStore(const MachineInstr *MI) {
if (MI->getOpcode() == TargetOpcode::G_LOAD ||
MI->getOpcode() == TargetOpcode::G_STORE) {
auto MMO = *MI->memoperands_begin();
- const MipsSubtarget &STI =
- static_cast<const MipsSubtarget &>(MI->getMF()->getSubtarget());
+ const MipsSubtarget &STI = MI->getMF()->getSubtarget<MipsSubtarget>();
if (MMO->getSize() == 4 && (!STI.systemSupportsUnalignedAccess() &&
MMO->getAlign() < MMO->getSize()))
return true;
@@ -399,7 +397,7 @@ void MipsRegisterBankInfo::TypeInfoForMF::cleanupIfNewFunction(
static const MipsRegisterBankInfo::ValueMapping *
getMSAMapping(const MachineFunction &MF) {
- assert(static_cast<const MipsSubtarget &>(MF.getSubtarget()).hasMSA() &&
+ assert(MF.getSubtarget<MipsSubtarget>().hasMSA() &&
"MSA mapping not available on target without MSA.");
return &Mips::ValueMappings[Mips::MSAIdx];
}
diff --git a/llvm/lib/Target/Mips/MipsRegisterBankInfo.h b/llvm/lib/Target/Mips/MipsRegisterBankInfo.h
index df51606e1e8a..9eca4fdab3d6 100644
--- a/llvm/lib/Target/Mips/MipsRegisterBankInfo.h
+++ b/llvm/lib/Target/Mips/MipsRegisterBankInfo.h
@@ -13,7 +13,7 @@
#ifndef LLVM_LIB_TARGET_MIPS_MIPSREGISTERBANKINFO_H
#define LLVM_LIB_TARGET_MIPS_MIPSREGISTERBANKINFO_H
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
#define GET_REGBANK_DECLARATIONS
#include "MipsGenRegisterBank.inc"
diff --git a/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp b/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
index 7ee2ddf3605f..7729d9cf92da 100644
--- a/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -97,7 +97,7 @@ private:
ExpandPseudo::ExpandPseudo(MachineFunction &MF_)
: MF(MF_), MRI(MF.getRegInfo()),
- Subtarget(static_cast<const MipsSubtarget &>(MF.getSubtarget())),
+ Subtarget(MF.getSubtarget<MipsSubtarget>()),
TII(*static_cast<const MipsSEInstrInfo *>(Subtarget.getInstrInfo())),
RegInfo(*Subtarget.getRegisterInfo()) {}
diff --git a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index 03a545605fe1..1124111c1a6e 100644
--- a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -38,7 +38,7 @@ using namespace llvm;
#define DEBUG_TYPE "mips-isel"
bool MipsSEDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
- Subtarget = &static_cast<const MipsSubtarget &>(MF.getSubtarget());
+ Subtarget = &MF.getSubtarget<MipsSubtarget>();
if (Subtarget->inMips16Mode())
return false;
return MipsDAGToDAGISel::runOnMachineFunction(MF);
@@ -282,7 +282,7 @@ bool MipsSEDAGToDAGISel::selectAddrFrameIndexOffset(
SDValue Addr, SDValue &Base, SDValue &Offset, unsigned OffsetBits,
unsigned ShiftAmount = 0) const {
if (CurDAG->isBaseWithConstantOffset(Addr)) {
- ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+ auto *CN = cast<ConstantSDNode>(Addr.getOperand(1));
if (isIntN(OffsetBits + ShiftAmount, CN->getSExtValue())) {
EVT ValTy = Addr.getValueType();
@@ -956,6 +956,38 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) {
break;
}
+ case MipsISD::FAbs: {
+ MVT ResTy = Node->getSimpleValueType(0);
+ assert((ResTy == MVT::f64 || ResTy == MVT::f32) &&
+ "Unsupported float type!");
+ unsigned Opc = 0;
+ if (ResTy == MVT::f64)
+ Opc = (Subtarget->isFP64bit() ? Mips::FABS_D64 : Mips::FABS_D32);
+ else
+ Opc = Mips::FABS_S;
+
+ if (Subtarget->inMicroMipsMode()) {
+ switch (Opc) {
+ case Mips::FABS_D64:
+ Opc = Mips::FABS_D64_MM;
+ break;
+ case Mips::FABS_D32:
+ Opc = Mips::FABS_D32_MM;
+ break;
+ case Mips::FABS_S:
+ Opc = Mips::FABS_S_MM;
+ break;
+ default:
+ llvm_unreachable("Unknown opcode for MIPS floating point abs!");
+ }
+ }
+
+ ReplaceNode(Node,
+ CurDAG->getMachineNode(Opc, DL, ResTy, Node->getOperand(0)));
+
+ return true;
+ }
+
// Manually match MipsISD::Ins nodes to get the correct instruction. It has
// to be done in this fashion so that we respect the differences between
// dins and dinsm, as the difference is that the size operand has the range
diff --git a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
index 346ebe9664fc..f8bde3816fde 100644
--- a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -99,11 +99,8 @@ MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM,
setOperationAction(ISD::BITCAST, VecTy, Legal);
}
- setTargetDAGCombine(ISD::SHL);
- setTargetDAGCombine(ISD::SRA);
- setTargetDAGCombine(ISD::SRL);
- setTargetDAGCombine(ISD::SETCC);
- setTargetDAGCombine(ISD::VSELECT);
+ setTargetDAGCombine(
+ {ISD::SHL, ISD::SRA, ISD::SRL, ISD::SETCC, ISD::VSELECT});
if (Subtarget.hasMips32r2()) {
setOperationAction(ISD::ADDC, MVT::i32, Legal);
@@ -161,11 +158,7 @@ MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM,
setOperationAction(ISD::FMINIMUM, MVT::f16, Promote);
setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote);
- setTargetDAGCombine(ISD::AND);
- setTargetDAGCombine(ISD::OR);
- setTargetDAGCombine(ISD::SRA);
- setTargetDAGCombine(ISD::VSELECT);
- setTargetDAGCombine(ISD::XOR);
+ setTargetDAGCombine({ISD::AND, ISD::OR, ISD::SRA, ISD::VSELECT, ISD::XOR});
}
if (!Subtarget.useSoftFloat()) {
@@ -1184,13 +1177,13 @@ SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
// i32 load from lower address.
SDValue Lo = DAG.getLoad(MVT::i32, DL, Chain, Ptr, MachinePointerInfo(),
- Nd.getAlignment(), Nd.getMemOperand()->getFlags());
+ Nd.getAlign(), Nd.getMemOperand()->getFlags());
// i32 load from higher address.
Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, DL, PtrVT));
SDValue Hi = DAG.getLoad(
MVT::i32, DL, Lo.getValue(1), Ptr, MachinePointerInfo(),
- std::min(Nd.getAlignment(), 4U), Nd.getMemOperand()->getFlags());
+ commonAlignment(Nd.getAlign(), 4), Nd.getMemOperand()->getFlags());
if (!Subtarget.isLittle())
std::swap(Lo, Hi);
@@ -1219,14 +1212,13 @@ SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
std::swap(Lo, Hi);
// i32 store to lower address.
- Chain =
- DAG.getStore(Chain, DL, Lo, Ptr, MachinePointerInfo(), Nd.getAlignment(),
- Nd.getMemOperand()->getFlags(), Nd.getAAInfo());
+ Chain = DAG.getStore(Chain, DL, Lo, Ptr, MachinePointerInfo(), Nd.getAlign(),
+ Nd.getMemOperand()->getFlags(), Nd.getAAInfo());
// i32 store to higher address.
Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, DL, PtrVT));
return DAG.getStore(Chain, DL, Hi, Ptr, MachinePointerInfo(),
- std::min(Nd.getAlignment(), 4U),
+ commonAlignment(Nd.getAlign(), 4),
Nd.getMemOperand()->getFlags(), Nd.getAAInfo());
}
diff --git a/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp b/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp
index d6481793ef49..c86666cc40b6 100644
--- a/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp
@@ -38,7 +38,7 @@ using namespace llvm;
#define DEBUG_TYPE "mips-reg-info"
-MipsSERegisterInfo::MipsSERegisterInfo() {}
+MipsSERegisterInfo::MipsSERegisterInfo() = default;
bool MipsSERegisterInfo::
requiresRegisterScavenging(const MachineFunction &MF) const {
diff --git a/llvm/lib/Target/Mips/MipsScheduleGeneric.td b/llvm/lib/Target/Mips/MipsScheduleGeneric.td
index f076f2f9cf10..931412cb261e 100644
--- a/llvm/lib/Target/Mips/MipsScheduleGeneric.td
+++ b/llvm/lib/Target/Mips/MipsScheduleGeneric.td
@@ -957,13 +957,13 @@ def : InstRW<[GenericWriteFPURcpS], (instrs RECIP_S_MM, RSQRT_S_MM)>;
def : InstRW<[GenericWriteFPURcpD], (instrs RECIP_D32_MM, RECIP_D64_MM,
RSQRT_D32_MM, RSQRT_D64_MM)>;
-def : InstRW<[GenericWriteFPUStore], (instrs SDC1_MM, SWC1_MM, SUXC1_MM,
- SWXC1_MM)>;
+def : InstRW<[GenericWriteFPUStore], (instrs SDC1_MM_D32, SDC1_MM_D64, SWC1_MM,
+ SUXC1_MM, SWXC1_MM)>;
def : InstRW<[GenericWriteFPUMoveGPRFPU], (instrs CFC1_MM, CTC1_MM)>;
-def : InstRW<[GenericWriteFPULoad], (instrs LDC1_MM, LUXC1_MM, LWC1_MM,
- LWXC1_MM)>;
+def : InstRW<[GenericWriteFPULoad], (instrs LDC1_MM_D32, LDC1_MM_D64, LUXC1_MM,
+ LWC1_MM, LWXC1_MM)>;
// microMIPS32r6
// =============
diff --git a/llvm/lib/Target/Mips/MipsSubtarget.cpp b/llvm/lib/Target/Mips/MipsSubtarget.cpp
index c285385a19dd..10530cdafeed 100644
--- a/llvm/lib/Target/Mips/MipsSubtarget.cpp
+++ b/llvm/lib/Target/Mips/MipsSubtarget.cpp
@@ -64,6 +64,7 @@ bool MipsSubtarget::MSAWarningPrinted = false;
bool MipsSubtarget::VirtWarningPrinted = false;
bool MipsSubtarget::CRCWarningPrinted = false;
bool MipsSubtarget::GINVWarningPrinted = false;
+bool MipsSubtarget::MIPS1WarningPrinted = false;
void MipsSubtarget::anchor() {}
@@ -91,10 +92,14 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
if (MipsArchVersion == MipsDefault)
MipsArchVersion = Mips32;
- // Don't even attempt to generate code for MIPS-I and MIPS-V. They have not
- // been tested and currently exist for the integrated assembler only.
- if (MipsArchVersion == Mips1)
- report_fatal_error("Code generation for MIPS-I is not implemented", false);
+ // MIPS-I has not been tested.
+ if (MipsArchVersion == Mips1 && !MIPS1WarningPrinted) {
+ errs() << "warning: MIPS-I support is experimental\n";
+ MIPS1WarningPrinted = true;
+ }
+
+ // Don't even attempt to generate code for MIPS-V. It has not
+ // been tested and currently exists for the integrated assembler only.
if (MipsArchVersion == Mips5)
report_fatal_error("Code generation for MIPS-V is not implemented", false);
@@ -111,7 +116,7 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
if (isFP64bit() && !hasMips64() && hasMips32() && !hasMips32r2())
report_fatal_error(
"FPU with 64-bit registers is not available on MIPS32 pre revision 2. "
- "Use -mcpu=mips32r2 or greater.");
+ "Use -mcpu=mips32r2 or greater.", false);
if (!isABI_O32() && !useOddSPReg())
report_fatal_error("-mattr=+nooddspreg requires the O32 ABI.", false);
diff --git a/llvm/lib/Target/Mips/MipsSubtarget.h b/llvm/lib/Target/Mips/MipsSubtarget.h
index 2b4c2b19a95d..ec8ca64c8ce8 100644
--- a/llvm/lib/Target/Mips/MipsSubtarget.h
+++ b/llvm/lib/Target/Mips/MipsSubtarget.h
@@ -17,12 +17,12 @@
#include "MipsFrameLowering.h"
#include "MipsISelLowering.h"
#include "MipsInstrInfo.h"
-#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
-#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
+#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Support/ErrorHandling.h"
@@ -59,6 +59,9 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
// Used to avoid printing ginv warnings multiple times.
static bool GINVWarningPrinted;
+ // Used to avoid printing Mips1 warnings multiple times.
+ static bool MIPS1WarningPrinted;
+
// Used to avoid printing virt warnings multiple times.
static bool VirtWarningPrinted;
diff --git a/llvm/lib/Target/Mips/MipsTargetMachine.cpp b/llvm/lib/Target/Mips/MipsTargetMachine.cpp
index f9f662a00117..fb0aa397d393 100644
--- a/llvm/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/llvm/lib/Target/Mips/MipsTargetMachine.cpp
@@ -18,12 +18,14 @@
#include "MipsSEISelDAGToDAG.h"
#include "MipsSubtarget.h"
#include "MipsTargetObjectFile.h"
+#include "MipsTargetTransformInfo.h"
#include "TargetInfo/MipsTargetInfo.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
@@ -62,6 +64,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeMipsTarget() {
initializeMipsBranchExpansionPass(*PR);
initializeMicroMipsSizeReducePass(*PR);
initializeMipsPreLegalizerCombinerPass(*PR);
+ initializeMipsPostLegalizerCombinerPass(*PR);
initializeMipsMulMulBugFixPass(*PR);
}
@@ -103,7 +106,7 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU,
static Reloc::Model getEffectiveRelocModel(bool JIT,
Optional<Reloc::Model> RM) {
- if (!RM.hasValue() || JIT)
+ if (!RM || JIT)
return Reloc::Static;
return *RM;
}
@@ -238,6 +241,7 @@ public:
bool addIRTranslator() override;
void addPreLegalizeMachineIR() override;
bool addLegalizeMachineIR() override;
+ void addPreRegBankSelect() override;
bool addRegBankSelect() override;
bool addGlobalInstructionSelect() override;
@@ -276,7 +280,7 @@ void MipsPassConfig::addPreRegAlloc() {
}
TargetTransformInfo
-MipsTargetMachine::getTargetTransformInfo(const Function &F) {
+MipsTargetMachine::getTargetTransformInfo(const Function &F) const {
if (Subtarget->allowMixed16_32()) {
LLVM_DEBUG(errs() << "No Target Transform Info Pass Added\n");
// FIXME: This is no longer necessary as the TTI returned is per-function.
@@ -284,7 +288,7 @@ MipsTargetMachine::getTargetTransformInfo(const Function &F) {
}
LLVM_DEBUG(errs() << "Target Transform Info Pass Added\n");
- return TargetTransformInfo(BasicTTIImpl(this, F));
+ return TargetTransformInfo(MipsTTIImpl(this, F));
}
// Implemented by targets that want to run passes immediately before
@@ -333,6 +337,11 @@ bool MipsPassConfig::addLegalizeMachineIR() {
return false;
}
+void MipsPassConfig::addPreRegBankSelect() {
+ bool IsOptNone = getOptLevel() == CodeGenOpt::None;
+ addPass(createMipsPostLegalizeCombiner(IsOptNone));
+}
+
bool MipsPassConfig::addRegBankSelect() {
addPass(new RegBankSelect());
return false;
diff --git a/llvm/lib/Target/Mips/MipsTargetMachine.h b/llvm/lib/Target/Mips/MipsTargetMachine.h
index e0de924be4fd..46ffc11738df 100644
--- a/llvm/lib/Target/Mips/MipsTargetMachine.h
+++ b/llvm/lib/Target/Mips/MipsTargetMachine.h
@@ -43,7 +43,7 @@ public:
CodeGenOpt::Level OL, bool JIT, bool isLittle);
~MipsTargetMachine() override;
- TargetTransformInfo getTargetTransformInfo(const Function &F) override;
+ TargetTransformInfo getTargetTransformInfo(const Function &F) const override;
const MipsSubtarget *getSubtargetImpl() const {
if (Subtarget)
diff --git a/llvm/lib/Target/Mips/MipsTargetStreamer.h b/llvm/lib/Target/Mips/MipsTargetStreamer.h
index 44615b987e3c..2f4b6eb37aa1 100644
--- a/llvm/lib/Target/Mips/MipsTargetStreamer.h
+++ b/llvm/lib/Target/Mips/MipsTargetStreamer.h
@@ -178,7 +178,7 @@ public:
MipsABIFlagsSection &getABIFlagsSection() { return ABIFlagsSection; }
const MipsABIInfo &getABI() const {
- assert(ABI.hasValue() && "ABI hasn't been set!");
+ assert(ABI && "ABI hasn't been set!");
return *ABI;
}
diff --git a/llvm/lib/Target/Mips/MipsTargetTransformInfo.cpp b/llvm/lib/Target/Mips/MipsTargetTransformInfo.cpp
new file mode 100644
index 000000000000..bd88a0af0ecf
--- /dev/null
+++ b/llvm/lib/Target/Mips/MipsTargetTransformInfo.cpp
@@ -0,0 +1,17 @@
+//===-- MipsTargetTransformInfo.cpp - Mips specific TTI ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsTargetTransformInfo.h"
+
+using namespace llvm;
+
+bool MipsTTIImpl::hasDivRemOp(Type *DataType, bool IsSigned) {
+ EVT VT = TLI->getValueType(DL, DataType);
+ return TLI->isOperationLegalOrCustom(IsSigned ? ISD::SDIVREM : ISD::UDIVREM,
+ VT);
+}
diff --git a/llvm/lib/Target/Mips/MipsTargetTransformInfo.h b/llvm/lib/Target/Mips/MipsTargetTransformInfo.h
new file mode 100644
index 000000000000..6f52eaa2f833
--- /dev/null
+++ b/llvm/lib/Target/Mips/MipsTargetTransformInfo.h
@@ -0,0 +1,40 @@
+//===-- MipsTargetTransformInfo.h - Mips specific TTI -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSTARGETTRANSFORMINFO_H
+#define LLVM_LIB_TARGET_MIPS_MIPSTARGETTRANSFORMINFO_H
+
+#include "MipsTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+
+namespace llvm {
+
+class MipsTTIImpl : public BasicTTIImplBase<MipsTTIImpl> {
+ using BaseT = BasicTTIImplBase<MipsTTIImpl>;
+ using TTI = TargetTransformInfo;
+
+ friend BaseT;
+
+ const MipsSubtarget *ST;
+ const MipsTargetLowering *TLI;
+
+ const MipsSubtarget *getST() const { return ST; }
+ const MipsTargetLowering *getTLI() const { return TLI; }
+
+public:
+ explicit MipsTTIImpl(const MipsTargetMachine *TM, const Function &F)
+ : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
+ TLI(ST->getTargetLowering()) {}
+
+ bool hasDivRemOp(Type *DataType, bool IsSigned);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
index f275011018a3..85ace96eeeaf 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
@@ -49,9 +49,20 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Triple &TheTriple,
SupportsExtendedDwarfLocDirective = false;
SupportsSignedData = false;
+ PrivateGlobalPrefix = "$L__";
+ PrivateLabelPrefix = PrivateGlobalPrefix;
+
// @TODO: Can we just disable this?
WeakDirective = "\t// .weak\t";
GlobalDirective = "\t// .globl\t";
UseIntegratedAssembler = false;
+
+ // Avoid using parens for identifiers starting with $ - ptxas does
+ // not expect them.
+ UseParensForDollarSignNames = false;
+
+ // ptxas does not support DWARF `.file fileno directory filename'
+ // syntax as of v11.X.
+ EnableDwarfFileDirectoryDefault = false;
}
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp
index 1cbd650bdf06..b72cea5d03f1 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp
@@ -93,7 +93,7 @@ void NVPTXTargetStreamer::changeSection(const MCSection *CurSection,
// Emit DWARF .file directives in the outermost scope.
outputDwarfFileDirectives();
OS << "\t.section";
- Section->PrintSwitchToSection(*getStreamer().getContext().getAsmInfo(),
+ Section->printSwitchToSection(*getStreamer().getContext().getAsmInfo(),
getStreamer().getContext().getTargetTriple(),
OS, SubSection);
// DWARF sections are enclosed into braces - emit the open one.
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 3a59306c4998..b1d842122060 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -45,7 +45,6 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Attributes.h"
@@ -329,7 +328,7 @@ MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
const DataLayout &DL = getDataLayout();
const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
- const TargetLowering *TLI = STI.getTargetLowering();
+ const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
Type *Ty = F->getReturnType();
@@ -363,7 +362,7 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
unsigned totalsz = DL.getTypeAllocSize(Ty);
unsigned retAlignment = 0;
if (!getAlign(*F, 0, retAlignment))
- retAlignment = DL.getABITypeAlignment(Ty);
+ retAlignment = TLI->getFunctionParamOptimizedAlign(F, Ty, DL).value();
O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
<< "]";
} else
@@ -513,7 +512,7 @@ void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
OutStreamer->AddComment(Twine("implicit-def: ") +
STI.getRegisterInfo()->getName(RegNo));
}
- OutStreamer->AddBlankLine();
+ OutStreamer->addBlankLine();
}
void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
@@ -818,9 +817,13 @@ void NVPTXAsmPrinter::emitGlobals(const Module &M) {
"Missed a global variable");
assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
+ const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
+ const NVPTXSubtarget &STI =
+ *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
+
// Print out module-level global variables in proper order
for (unsigned i = 0, e = Globals.size(); i != e; ++i)
- printModuleLevelGV(Globals[i], OS2);
+ printModuleLevelGV(Globals[i], OS2, /*processDemoted=*/false, STI);
OS2 << '\n';
@@ -888,17 +891,18 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) {
clearAnnotationCache(&M);
- // Close the last emitted section
- if (HasDebugInfo) {
- static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer())
- ->closeLastSection();
- // Emit empty .debug_loc section for better support of the empty files.
- OutStreamer->emitRawText("\t.section\t.debug_loc\t{\t}");
- }
+ if (auto *TS = static_cast<NVPTXTargetStreamer *>(
+ OutStreamer->getTargetStreamer())) {
+ // Close the last emitted section
+ if (HasDebugInfo) {
+ TS->closeLastSection();
+ // Emit empty .debug_loc section for better support of the empty files.
+ OutStreamer->emitRawText("\t.section\t.debug_loc\t{\t}");
+ }
- // Output last DWARF .file directives, if any.
- static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer())
- ->outputDwarfFileDirectives();
+ // Output last DWARF .file directives, if any.
+ TS->outputDwarfFileDirectives();
+ }
return ret;
@@ -957,8 +961,8 @@ void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
}
void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
- raw_ostream &O,
- bool processDemoted) {
+ raw_ostream &O, bool processDemoted,
+ const NVPTXSubtarget &STI) {
// Skip meta data
if (GVar->hasSection()) {
if (GVar->getSection() == "llvm.metadata")
@@ -1001,7 +1005,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
// (extern) declarations, no definition or initializer
// Currently the only known declaration is for an automatic __local
// (.shared) promoted to global.
- emitPTXGlobalVariable(GVar, O);
+ emitPTXGlobalVariable(GVar, O, STI);
O << ";\n";
return;
}
@@ -1095,6 +1099,10 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
emitPTXAddressSpace(PTy->getAddressSpace(), O);
if (isManaged(*GVar)) {
+ if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30) {
+ report_fatal_error(
+ ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
+ }
O << " .attribute(.managed)";
}
@@ -1214,9 +1222,13 @@ void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
std::vector<const GlobalVariable *> &gvars = localDecls[f];
+ const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
+ const NVPTXSubtarget &STI =
+ *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
+
for (const GlobalVariable *GV : gvars) {
O << "\t// demoted variable\n\t";
- printModuleLevelGV(GV, O, true);
+ printModuleLevelGV(GV, O, /*processDemoted=*/true, STI);
}
}
@@ -1282,7 +1294,8 @@ NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
}
void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
- raw_ostream &O) {
+ raw_ostream &O,
+ const NVPTXSubtarget &STI) {
const DataLayout &DL = getDataLayout();
// GlobalVariables are always constant pointers themselves.
@@ -1290,6 +1303,13 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
O << ".";
emitPTXAddressSpace(GVar->getType()->getAddressSpace(), O);
+ if (isManaged(*GVar)) {
+ if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30) {
+ report_fatal_error(
+ ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
+ }
+ O << " .attribute(.managed)";
+ }
if (MaybeAlign A = GVar->getAlign())
O << " .align " << A->value();
else
@@ -1335,34 +1355,6 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
}
}
-static unsigned int getOpenCLAlignment(const DataLayout &DL, Type *Ty) {
- if (Ty->isSingleValueType())
- return DL.getPrefTypeAlignment(Ty);
-
- auto *ATy = dyn_cast<ArrayType>(Ty);
- if (ATy)
- return getOpenCLAlignment(DL, ATy->getElementType());
-
- auto *STy = dyn_cast<StructType>(Ty);
- if (STy) {
- unsigned int alignStruct = 1;
- // Go through each element of the struct and find the
- // largest alignment.
- for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) {
- Type *ETy = STy->getElementType(i);
- unsigned int align = getOpenCLAlignment(DL, ETy);
- if (align > alignStruct)
- alignStruct = align;
- }
- return alignStruct;
- }
-
- auto *FTy = dyn_cast<FunctionType>(Ty);
- if (FTy)
- return DL.getPointerPrefAlignment().value();
- return DL.getPrefTypeAlignment(Ty);
-}
-
void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
int paramIndex, raw_ostream &O) {
getSymbol(I->getParent())->print(O, MAI);
@@ -1373,7 +1365,8 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
const DataLayout &DL = getDataLayout();
const AttributeList &PAL = F->getAttributes();
const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
- const TargetLowering *TLI = STI.getTargetLowering();
+ const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
+
Function::const_arg_iterator I, E;
unsigned paramIndex = 0;
bool first = true;
@@ -1430,18 +1423,24 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
}
}
+ auto getOptimalAlignForParam = [TLI, &DL, &PAL, F,
+ paramIndex](Type *Ty) -> Align {
+ Align TypeAlign = TLI->getFunctionParamOptimizedAlign(F, Ty, DL);
+ MaybeAlign ParamAlign = PAL.getParamAlignment(paramIndex);
+ return std::max(TypeAlign, ParamAlign.valueOrOne());
+ };
+
if (!PAL.hasParamAttr(paramIndex, Attribute::ByVal)) {
if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
// Just print .param .align <a> .b8 .param[size];
- // <a> = PAL.getparamalignment
+ // <a> = optimal alignment for the element type; always multiple of
+ // PAL.getParamAlignment
// size = typeallocsize of element type
- const Align align = DL.getValueOrABITypeAlignment(
- PAL.getParamAlignment(paramIndex), Ty);
+ Align OptimalAlign = getOptimalAlignForParam(Ty);
- unsigned sz = DL.getTypeAllocSize(Ty);
- O << "\t.param .align " << align.value() << " .b8 ";
+ O << "\t.param .align " << OptimalAlign.value() << " .b8 ";
printParamName(I, paramIndex, O);
- O << "[" << sz << "]";
+ O << "[" << DL.getTypeAllocSize(Ty) << "]";
continue;
}
@@ -1454,7 +1453,6 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() !=
NVPTX::CUDA) {
- Type *ETy = PTy->getPointerElementType();
int addrSpace = PTy->getAddressSpace();
switch (addrSpace) {
default:
@@ -1470,7 +1468,8 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
O << ".ptr .global ";
break;
}
- O << ".align " << (int)getOpenCLAlignment(DL, ETy) << " ";
+ Align ParamAlign = I->getParamAlign().valueOrOne();
+ O << ".align " << ParamAlign.value() << " ";
}
printParamName(I, paramIndex, O);
continue;
@@ -1511,17 +1510,17 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
continue;
}
- // param has byVal attribute. So should be a pointer
- auto *PTy = dyn_cast<PointerType>(Ty);
- assert(PTy && "Param with byval attribute should be a pointer type");
- Type *ETy = PTy->getPointerElementType();
+ // param has byVal attribute.
+ Type *ETy = PAL.getParamByValType(paramIndex);
+ assert(ETy && "Param should have byval type");
if (isABI || isKernelFunc) {
// Just print .param .align <a> .b8 .param[size];
- // <a> = PAL.getparamalignment
+ // <a> = optimal alignment for the element type; always multiple of
+ // PAL.getParamAlignment
// size = typeallocsize of element type
- Align align =
- DL.getValueOrABITypeAlignment(PAL.getParamAlignment(paramIndex), ETy);
+ Align OptimalAlign = getOptimalAlignForParam(ETy);
+
// Work around a bug in ptxas. When PTX code takes address of
// byval parameter with alignment < 4, ptxas generates code to
// spill argument into memory. Alas on sm_50+ ptxas generates
@@ -1533,10 +1532,10 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
// TODO: this will need to be undone when we get to support multi-TU
// device-side compilation as it breaks ABI compatibility with nvcc.
// Hopefully ptxas bug is fixed by then.
- if (!isKernelFunc && align < Align(4))
- align = Align(4);
+ if (!isKernelFunc && OptimalAlign < Align(4))
+ OptimalAlign = Align(4);
unsigned sz = DL.getTypeAllocSize(ETy);
- O << "\t.param .align " << align.value() << " .b8 ";
+ O << "\t.param .align " << OptimalAlign.value() << " .b8 ";
printParamName(I, paramIndex, O);
O << "[" << sz << "]";
continue;
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
index 2a3a38d7b2f1..cd61e99a103a 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -218,7 +218,7 @@ private:
void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
const char *Modifier = nullptr);
void printModuleLevelGV(const GlobalVariable *GVar, raw_ostream &O,
- bool = false);
+ bool processDemoted, const NVPTXSubtarget &STI);
void printParamName(Function::const_arg_iterator I, int paramIndex,
raw_ostream &O);
void emitGlobals(const Module &M);
@@ -258,7 +258,8 @@ private:
// List of variables demoted to a function scope.
std::map<const Function *, std::vector<const GlobalVariable *>> localDecls;
- void emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O);
+ void emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O,
+ const NVPTXSubtarget &STI);
void emitPTXAddressSpace(unsigned int AddressSpace, raw_ostream &O) const;
std::string getPTXFundamentalTypeStr(Type *Ty, bool = true) const;
void printScalarConstant(const Constant *CPV, raw_ostream &O);
diff --git a/llvm/lib/Target/NVPTX/NVPTXAtomicLower.cpp b/llvm/lib/Target/NVPTX/NVPTXAtomicLower.cpp
index 10bf56fd9a91..9661dffd3dae 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAtomicLower.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAtomicLower.cpp
@@ -17,7 +17,7 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/Transforms/Scalar/LowerAtomic.h"
+#include "llvm/Transforms/Utils/LowerAtomic.h"
#include "MCTargetDesc/NVPTXBaseInfo.h"
using namespace llvm;
diff --git a/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
index 888fc8ffac2c..2201eb19c80f 100644
--- a/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
@@ -83,6 +83,7 @@ bool GenericToNVVM::runOnModule(Module &M) {
GV.hasInitializer() ? GV.getInitializer() : nullptr, "", &GV,
GV.getThreadLocalMode(), llvm::ADDRESS_SPACE_GLOBAL);
NewGV->copyAttributesFrom(&GV);
+ NewGV->copyMetadata(&GV, /*Offset=*/0);
GVMap[&GV] = NewGV;
}
}
@@ -269,24 +270,16 @@ Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C,
// ShuffleVector
return Builder.CreateShuffleVector(NewOperands[0], NewOperands[1],
NewOperands[2]);
- case Instruction::ExtractValue:
- // ExtractValueConstantExpr
- return Builder.CreateExtractValue(NewOperands[0], C->getIndices());
case Instruction::InsertValue:
// InsertValueConstantExpr
return Builder.CreateInsertValue(NewOperands[0], NewOperands[1],
C->getIndices());
case Instruction::GetElementPtr:
// GetElementPtrConstantExpr
- return cast<GEPOperator>(C)->isInBounds()
- ? Builder.CreateGEP(
- cast<GEPOperator>(C)->getSourceElementType(),
- NewOperands[0],
- makeArrayRef(&NewOperands[1], NumOperands - 1))
- : Builder.CreateInBoundsGEP(
- cast<GEPOperator>(C)->getSourceElementType(),
- NewOperands[0],
- makeArrayRef(&NewOperands[1], NumOperands - 1));
+ return Builder.CreateGEP(cast<GEPOperator>(C)->getSourceElementType(),
+ NewOperands[0],
+ makeArrayRef(&NewOperands[1], NumOperands - 1), "",
+ cast<GEPOperator>(C)->isInBounds());
case Instruction::Select:
// SelectConstantExpr
return Builder.CreateSelect(NewOperands[0], NewOperands[1], NewOperands[2]);
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index dd4290a605a9..48fa387e563a 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -42,7 +42,7 @@ NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
}
bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
- Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
+ Subtarget = &MF.getSubtarget<NVPTXSubtarget>();
return SelectionDAGISel::runOnMachineFunction(MF);
}
@@ -923,8 +923,7 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
getI32Imm(vecType, dl), getI32Imm(fromType, dl),
getI32Imm(fromTypeWidth, dl), Addr, Chain };
- NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
- MVT::Other, Ops);
+ NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
} else if (PointerSize == 64 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
: SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi,
@@ -936,8 +935,7 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
getI32Imm(vecType, dl), getI32Imm(fromType, dl),
getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
- NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
- MVT::Other, Ops);
+ NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
} else if (PointerSize == 64 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
: SelectADDRri(N1.getNode(), N1, Base, Offset)) {
if (PointerSize == 64)
@@ -955,8 +953,7 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
getI32Imm(vecType, dl), getI32Imm(fromType, dl),
getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
- NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
- MVT::Other, Ops);
+ NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
} else {
if (PointerSize == 64)
Opcode = pickOpcodeForVT(
@@ -974,8 +971,7 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
getI32Imm(vecType, dl), getI32Imm(fromType, dl),
getI32Imm(fromTypeWidth, dl), N1, Chain };
- NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
- MVT::Other, Ops);
+ NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
}
if (!NVPTXLD)
@@ -1092,7 +1088,7 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
getI32Imm(VecType, DL), getI32Imm(FromType, DL),
getI32Imm(FromTypeWidth, DL), Addr, Chain };
- LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
+ LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
} else if (PointerSize == 64
? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
: SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
@@ -1119,7 +1115,7 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
getI32Imm(VecType, DL), getI32Imm(FromType, DL),
getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
- LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
+ LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
} else if (PointerSize == 64
? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
: SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
@@ -1169,7 +1165,7 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
getI32Imm(VecType, DL), getI32Imm(FromType, DL),
getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
- LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
+ LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
} else {
if (PointerSize == 64) {
switch (N->getOpcode()) {
@@ -1217,7 +1213,7 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
getI32Imm(VecType, DL), getI32Imm(FromType, DL),
getI32Imm(FromTypeWidth, DL), Op1, Chain };
- LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
+ LD = CurDAG->getMachineNode(*Opcode, DL, N->getVTList(), Ops);
}
MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
@@ -1361,7 +1357,7 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
if (!Opcode)
return false;
SDValue Ops[] = { Addr, Chain };
- LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
+ LD = CurDAG->getMachineNode(*Opcode, DL, InstVTList, Ops);
} else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
: SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
if (TM.is64Bit()) {
@@ -1508,7 +1504,7 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
if (!Opcode)
return false;
SDValue Ops[] = {Base, Offset, Chain};
- LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
+ LD = CurDAG->getMachineNode(*Opcode, DL, InstVTList, Ops);
} else {
if (TM.is64Bit()) {
switch (N->getOpcode()) {
@@ -1654,7 +1650,7 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
if (!Opcode)
return false;
SDValue Ops[] = { Op1, Chain };
- LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
+ LD = CurDAG->getMachineNode(*Opcode, DL, InstVTList, Ops);
}
MachineMemOperand *MemRef = Mem->getMemOperand();
@@ -1787,7 +1783,7 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
getI32Imm(toTypeWidth, dl),
Addr,
Chain};
- NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
+ NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops);
} else if (PointerSize == 64
? SelectADDRsi64(BasePtr.getNode(), BasePtr, Base, Offset)
: SelectADDRsi(BasePtr.getNode(), BasePtr, Base, Offset)) {
@@ -1806,7 +1802,7 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
Base,
Offset,
Chain};
- NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
+ NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops);
} else if (PointerSize == 64
? SelectADDRri64(BasePtr.getNode(), BasePtr, Base, Offset)
: SelectADDRri(BasePtr.getNode(), BasePtr, Base, Offset)) {
@@ -1832,7 +1828,7 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
Base,
Offset,
Chain};
- NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
+ NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops);
} else {
if (PointerSize == 64)
Opcode =
@@ -1855,7 +1851,7 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
getI32Imm(toTypeWidth, dl),
BasePtr,
Chain};
- NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
+ NVPTXST = CurDAG->getMachineNode(*Opcode, dl, MVT::Other, Ops);
}
if (!NVPTXST)
@@ -2082,7 +2078,7 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
StOps.push_back(Chain);
- ST = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, StOps);
+ ST = CurDAG->getMachineNode(*Opcode, DL, MVT::Other, StOps);
MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
CurDAG->setNodeMemRefs(cast<MachineSDNode>(ST), {MemRef});
@@ -2164,7 +2160,7 @@ bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
Ops.push_back(Chain);
Ops.push_back(Flag);
- ReplaceNode(Node, CurDAG->getMachineNode(Opcode.getValue(), DL, VTs, Ops));
+ ReplaceNode(Node, CurDAG->getMachineNode(*Opcode, DL, VTs, Ops));
return true;
}
@@ -2230,7 +2226,7 @@ bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
if (!Opcode)
return false;
- SDNode *Ret = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, Ops);
+ SDNode *Ret = CurDAG->getMachineNode(*Opcode, DL, MVT::Other, Ops);
MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ret), {MemRef});
@@ -2333,8 +2329,7 @@ bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
}
SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
- SDNode *Ret =
- CurDAG->getMachineNode(Opcode.getValue(), DL, RetVTs, Ops);
+ SDNode *Ret = CurDAG->getMachineNode(*Opcode, DL, RetVTs, Ops);
MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ret), {MemRef});
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 7b5248906b56..746f652bfa36 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -35,6 +35,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/FPEnv.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Instruction.h"
@@ -48,7 +49,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachineValueType.h"
-#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -71,14 +71,14 @@ static cl::opt<bool> sched4reg(
"nvptx-sched4reg",
cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false));
-static cl::opt<unsigned>
-FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden,
- cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
- " 1: do it 2: do it aggressively"),
- cl::init(2));
+static cl::opt<unsigned> FMAContractLevelOpt(
+ "nvptx-fma-level", cl::Hidden,
+ cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
+ " 1: do it 2: do it aggressively"),
+ cl::init(2));
static cl::opt<int> UsePrecDivF32(
- "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
+ "nvptx-prec-divf32", cl::Hidden,
cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
" IEEE Compliant F32 div.rnd if available."),
cl::init(2));
@@ -487,6 +487,17 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
setOperationAction(ISD::CTLZ, Ty, Legal);
}
+ setOperationAction(ISD::ADDC, MVT::i32, Legal);
+ setOperationAction(ISD::ADDE, MVT::i32, Legal);
+ setOperationAction(ISD::SUBC, MVT::i32, Legal);
+ setOperationAction(ISD::SUBE, MVT::i32, Legal);
+ if (STI.getPTXVersion() >= 43) {
+ setOperationAction(ISD::ADDC, MVT::i64, Legal);
+ setOperationAction(ISD::ADDE, MVT::i64, Legal);
+ setOperationAction(ISD::SUBC, MVT::i64, Legal);
+ setOperationAction(ISD::SUBE, MVT::i64, Legal);
+ }
+
setOperationAction(ISD::CTTZ, MVT::i16, Expand);
setOperationAction(ISD::CTTZ, MVT::i32, Expand);
setOperationAction(ISD::CTTZ, MVT::i64, Expand);
@@ -499,13 +510,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
// We have some custom DAG combine patterns for these nodes
- setTargetDAGCombine(ISD::ADD);
- setTargetDAGCombine(ISD::AND);
- setTargetDAGCombine(ISD::FADD);
- setTargetDAGCombine(ISD::MUL);
- setTargetDAGCombine(ISD::SHL);
- setTargetDAGCombine(ISD::SREM);
- setTargetDAGCombine(ISD::UREM);
+ setTargetDAGCombine({ISD::ADD, ISD::AND, ISD::FADD, ISD::MUL, ISD::SHL,
+ ISD::SREM, ISD::UREM});
// setcc for f16x2 needs special handling to prevent legalizer's
// attempt to scalarize it due to v2i1 not being legal.
@@ -583,6 +589,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
// Now deduce the information based on the above mentioned
// actions
computeRegisterProperties(STI.getRegisterInfo());
+
+ setMinCmpXchgSizeInBits(32);
}
const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
@@ -1302,8 +1310,8 @@ std::string NVPTXTargetLowering::getPrototype(
bool first = true;
- unsigned OIdx = 0;
- for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) {
+ const Function *F = CB.getFunction();
+ for (unsigned i = 0, e = Args.size(), OIdx = 0; i != e; ++i, ++OIdx) {
Type *Ty = Args[i].Ty;
if (!first) {
O << ", ";
@@ -1312,15 +1320,14 @@ std::string NVPTXTargetLowering::getPrototype(
if (!Outs[OIdx].Flags.isByVal()) {
if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
- unsigned align = 0;
+ unsigned ParamAlign = 0;
const CallInst *CallI = cast<CallInst>(&CB);
// +1 because index 0 is reserved for return type alignment
- if (!getAlign(*CallI, i + 1, align))
- align = DL.getABITypeAlignment(Ty);
- unsigned sz = DL.getTypeAllocSize(Ty);
- O << ".param .align " << align << " .b8 ";
+ if (!getAlign(*CallI, i + 1, ParamAlign))
+ ParamAlign = getFunctionParamOptimizedAlign(F, Ty, DL).value();
+ O << ".param .align " << ParamAlign << " .b8 ";
O << "_";
- O << "[" << sz << "]";
+ O << "[" << DL.getTypeAllocSize(Ty) << "]";
// update the index for Outs
SmallVector<EVT, 16> vtparts;
ComputeValueVTs(*this, DL, Ty, vtparts);
@@ -1351,15 +1358,18 @@ std::string NVPTXTargetLowering::getPrototype(
O << "_";
continue;
}
- auto *PTy = dyn_cast<PointerType>(Ty);
- assert(PTy && "Param with byval attribute should be a pointer type");
- Type *ETy = PTy->getPointerElementType();
- Align align = Outs[OIdx].Flags.getNonZeroByValAlign();
- unsigned sz = DL.getTypeAllocSize(ETy);
- O << ".param .align " << align.value() << " .b8 ";
+ Align ParamByValAlign = Outs[OIdx].Flags.getNonZeroByValAlign();
+
+ // Try to increase alignment. This code matches logic in LowerCall when
+ // alignment increase is performed to increase vectorization options.
+ Type *ETy = Args[i].IndirectType;
+ Align AlignCandidate = getFunctionParamOptimizedAlign(F, ETy, DL);
+ ParamByValAlign = std::max(ParamByValAlign, AlignCandidate);
+
+ O << ".param .align " << ParamByValAlign.value() << " .b8 ";
O << "_";
- O << "[" << sz << "]";
+ O << "[" << Outs[OIdx].Flags.getByValSize() << "]";
}
O << ");";
return O.str();
@@ -1406,12 +1416,15 @@ Align NVPTXTargetLowering::getArgumentAlignment(SDValue Callee,
// Check for function alignment information if we found that the
// ultimate target is a Function
- if (DirectCallee)
+ if (DirectCallee) {
if (getAlign(*DirectCallee, Idx, Alignment))
return Align(Alignment);
+ // If alignment information is not available, fall back to the
+ // default function param optimized type alignment
+ return getFunctionParamOptimizedAlign(DirectCallee, Ty, DL);
+ }
- // Call is indirect or alignment information is not available, fall back to
- // the ABI type alignment
+ // Call is indirect, fall back to the ABI type alignment
return DL.getABITypeAlign(Ty);
}
@@ -1436,11 +1449,11 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
return Chain;
unsigned UniqueCallSite = GlobalUniqueCallSite.fetch_add(1);
- SDValue tempChain = Chain;
+ SDValue TempChain = Chain;
Chain = DAG.getCALLSEQ_START(Chain, UniqueCallSite, 0, dl);
SDValue InFlag = Chain.getValue(1);
- unsigned paramCount = 0;
+ unsigned ParamCount = 0;
// Args.size() and Outs.size() need not match.
// Outs.size() will be larger
// * if there is an aggregate argument with multiple fields (each field
@@ -1456,173 +1469,155 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) {
EVT VT = Outs[OIdx].VT;
Type *Ty = Args[i].Ty;
+ bool IsByVal = Outs[OIdx].Flags.isByVal();
- if (!Outs[OIdx].Flags.isByVal()) {
- SmallVector<EVT, 16> VTs;
- SmallVector<uint64_t, 16> Offsets;
- ComputePTXValueVTs(*this, DL, Ty, VTs, &Offsets);
- Align ArgAlign = getArgumentAlignment(Callee, CB, Ty, paramCount + 1, DL);
- unsigned AllocSize = DL.getTypeAllocSize(Ty);
- SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- bool NeedAlign; // Does argument declaration specify alignment?
- if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
- // declare .param .align <align> .b8 .param<n>[<size>];
- SDValue DeclareParamOps[] = {
- Chain, DAG.getConstant(ArgAlign.value(), dl, MVT::i32),
- DAG.getConstant(paramCount, dl, MVT::i32),
- DAG.getConstant(AllocSize, dl, MVT::i32), InFlag};
- Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
- DeclareParamOps);
- NeedAlign = true;
- } else {
- // declare .param .b<size> .param<n>;
- if ((VT.isInteger() || VT.isFloatingPoint()) && AllocSize < 4) {
- // PTX ABI requires integral types to be at least 32 bits in
- // size. FP16 is loaded/stored using i16, so it's handled
- // here as well.
- AllocSize = 4;
- }
- SDValue DeclareScalarParamOps[] = {
- Chain, DAG.getConstant(paramCount, dl, MVT::i32),
- DAG.getConstant(AllocSize * 8, dl, MVT::i32),
- DAG.getConstant(0, dl, MVT::i32), InFlag};
- Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
- DeclareScalarParamOps);
- NeedAlign = false;
- }
- InFlag = Chain.getValue(1);
+ SmallVector<EVT, 16> VTs;
+ SmallVector<uint64_t, 16> Offsets;
- // PTX Interoperability Guide 3.3(A): [Integer] Values shorter
- // than 32-bits are sign extended or zero extended, depending on
- // whether they are signed or unsigned types. This case applies
- // only to scalar parameters and not to aggregate values.
- bool ExtendIntegerParam =
- Ty->isIntegerTy() && DL.getTypeAllocSizeInBits(Ty) < 32;
-
- auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, ArgAlign);
- SmallVector<SDValue, 6> StoreOperands;
- for (unsigned j = 0, je = VTs.size(); j != je; ++j) {
- // New store.
- if (VectorInfo[j] & PVF_FIRST) {
- assert(StoreOperands.empty() && "Unfinished preceding store.");
- StoreOperands.push_back(Chain);
- StoreOperands.push_back(DAG.getConstant(paramCount, dl, MVT::i32));
- StoreOperands.push_back(DAG.getConstant(Offsets[j], dl, MVT::i32));
- }
+ assert((!IsByVal || Args[i].IndirectType) &&
+ "byval arg must have indirect type");
+ Type *ETy = (IsByVal ? Args[i].IndirectType : Ty);
+ ComputePTXValueVTs(*this, DL, ETy, VTs, &Offsets);
+
+ Align ArgAlign;
+ if (IsByVal) {
+ // The ByValAlign in the Outs[OIdx].Flags is always set at this point,
+ // so we don't need to worry whether it's naturally aligned or not.
+ // See TargetLowering::LowerCallTo().
+ ArgAlign = Outs[OIdx].Flags.getNonZeroByValAlign();
+
+ // Try to increase alignment to enhance vectorization options.
+ ArgAlign = std::max(ArgAlign, getFunctionParamOptimizedAlign(
+ CB->getCalledFunction(), ETy, DL));
+
+ // Enforce minumum alignment of 4 to work around ptxas miscompile
+ // for sm_50+. See corresponding alignment adjustment in
+ // emitFunctionParamList() for details.
+ ArgAlign = std::max(ArgAlign, Align(4));
+ } else {
+ ArgAlign = getArgumentAlignment(Callee, CB, Ty, ParamCount + 1, DL);
+ }
- EVT EltVT = VTs[j];
- SDValue StVal = OutVals[OIdx];
- if (ExtendIntegerParam) {
- assert(VTs.size() == 1 && "Scalar can't have multiple parts.");
- // zext/sext to i32
- StVal = DAG.getNode(Outs[OIdx].Flags.isSExt() ? ISD::SIGN_EXTEND
- : ISD::ZERO_EXTEND,
- dl, MVT::i32, StVal);
- } else if (EltVT.getSizeInBits() < 16) {
- // Use 16-bit registers for small stores as it's the
- // smallest general purpose register size supported by NVPTX.
- StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal);
- }
+ unsigned TypeSize =
+ (IsByVal ? Outs[OIdx].Flags.getByValSize() : DL.getTypeAllocSize(Ty));
+ SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- // Record the value to store.
- StoreOperands.push_back(StVal);
-
- if (VectorInfo[j] & PVF_LAST) {
- unsigned NumElts = StoreOperands.size() - 3;
- NVPTXISD::NodeType Op;
- switch (NumElts) {
- case 1:
- Op = NVPTXISD::StoreParam;
- break;
- case 2:
- Op = NVPTXISD::StoreParamV2;
- break;
- case 4:
- Op = NVPTXISD::StoreParamV4;
- break;
- default:
- llvm_unreachable("Invalid vector info.");
- }
+ bool NeedAlign; // Does argument declaration specify alignment?
+ if (IsByVal ||
+ (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128))) {
+ // declare .param .align <align> .b8 .param<n>[<size>];
+ SDValue DeclareParamOps[] = {
+ Chain, DAG.getConstant(ArgAlign.value(), dl, MVT::i32),
+ DAG.getConstant(ParamCount, dl, MVT::i32),
+ DAG.getConstant(TypeSize, dl, MVT::i32), InFlag};
+ Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
+ DeclareParamOps);
+ NeedAlign = true;
+ } else {
+ // declare .param .b<size> .param<n>;
+ if ((VT.isInteger() || VT.isFloatingPoint()) && TypeSize < 4) {
+ // PTX ABI requires integral types to be at least 32 bits in
+ // size. FP16 is loaded/stored using i16, so it's handled
+ // here as well.
+ TypeSize = 4;
+ }
+ SDValue DeclareScalarParamOps[] = {
+ Chain, DAG.getConstant(ParamCount, dl, MVT::i32),
+ DAG.getConstant(TypeSize * 8, dl, MVT::i32),
+ DAG.getConstant(0, dl, MVT::i32), InFlag};
+ Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
+ DeclareScalarParamOps);
+ NeedAlign = false;
+ }
+ InFlag = Chain.getValue(1);
- StoreOperands.push_back(InFlag);
+ // PTX Interoperability Guide 3.3(A): [Integer] Values shorter
+ // than 32-bits are sign extended or zero extended, depending on
+ // whether they are signed or unsigned types. This case applies
+ // only to scalar parameters and not to aggregate values.
+ bool ExtendIntegerParam =
+ Ty->isIntegerTy() && DL.getTypeAllocSizeInBits(Ty) < 32;
- // Adjust type of the store op if we've extended the scalar
- // return value.
- EVT TheStoreType = ExtendIntegerParam ? MVT::i32 : VTs[j];
- MaybeAlign EltAlign;
- if (NeedAlign)
- EltAlign = commonAlignment(ArgAlign, Offsets[j]);
+ auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, ArgAlign);
+ SmallVector<SDValue, 6> StoreOperands;
+ for (unsigned j = 0, je = VTs.size(); j != je; ++j) {
+ EVT EltVT = VTs[j];
+ int CurOffset = Offsets[j];
+ MaybeAlign PartAlign;
+ if (NeedAlign)
+ PartAlign = commonAlignment(ArgAlign, CurOffset);
+
+ // New store.
+ if (VectorInfo[j] & PVF_FIRST) {
+ assert(StoreOperands.empty() && "Unfinished preceding store.");
+ StoreOperands.push_back(Chain);
+ StoreOperands.push_back(DAG.getConstant(ParamCount, dl, MVT::i32));
+ StoreOperands.push_back(DAG.getConstant(CurOffset, dl, MVT::i32));
+ }
- Chain = DAG.getMemIntrinsicNode(
- Op, dl, DAG.getVTList(MVT::Other, MVT::Glue), StoreOperands,
- TheStoreType, MachinePointerInfo(), EltAlign,
- MachineMemOperand::MOStore);
- InFlag = Chain.getValue(1);
+ SDValue StVal = OutVals[OIdx];
+ if (IsByVal) {
+ auto PtrVT = getPointerTy(DL);
+ SDValue srcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, StVal,
+ DAG.getConstant(CurOffset, dl, PtrVT));
+ StVal = DAG.getLoad(EltVT, dl, TempChain, srcAddr, MachinePointerInfo(),
+ PartAlign);
+ } else if (ExtendIntegerParam) {
+ assert(VTs.size() == 1 && "Scalar can't have multiple parts.");
+ // zext/sext to i32
+ StVal = DAG.getNode(Outs[OIdx].Flags.isSExt() ? ISD::SIGN_EXTEND
+ : ISD::ZERO_EXTEND,
+ dl, MVT::i32, StVal);
+ }
- // Cleanup.
- StoreOperands.clear();
- }
- ++OIdx;
+ if (!ExtendIntegerParam && EltVT.getSizeInBits() < 16) {
+ // Use 16-bit registers for small stores as it's the
+ // smallest general purpose register size supported by NVPTX.
+ StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal);
}
- assert(StoreOperands.empty() && "Unfinished parameter store.");
- if (VTs.size() > 0)
- --OIdx;
- ++paramCount;
- continue;
- }
- // ByVal arguments
- SmallVector<EVT, 16> VTs;
- SmallVector<uint64_t, 16> Offsets;
- auto *PTy = dyn_cast<PointerType>(Args[i].Ty);
- assert(PTy && "Type of a byval parameter should be pointer");
- ComputePTXValueVTs(*this, DL, PTy->getPointerElementType(), VTs, &Offsets,
- 0);
+ // Record the value to store.
+ StoreOperands.push_back(StVal);
- // declare .param .align <align> .b8 .param<n>[<size>];
- unsigned sz = Outs[OIdx].Flags.getByValSize();
- SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- Align ArgAlign = Outs[OIdx].Flags.getNonZeroByValAlign();
- // The ByValAlign in the Outs[OIdx].Flags is alway set at this point,
- // so we don't need to worry about natural alignment or not.
- // See TargetLowering::LowerCallTo().
-
- // Enforce minumum alignment of 4 to work around ptxas miscompile
- // for sm_50+. See corresponding alignment adjustment in
- // emitFunctionParamList() for details.
- if (ArgAlign < Align(4))
- ArgAlign = Align(4);
- SDValue DeclareParamOps[] = {
- Chain, DAG.getConstant(ArgAlign.value(), dl, MVT::i32),
- DAG.getConstant(paramCount, dl, MVT::i32),
- DAG.getConstant(sz, dl, MVT::i32), InFlag};
- Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
- DeclareParamOps);
- InFlag = Chain.getValue(1);
- for (unsigned j = 0, je = VTs.size(); j != je; ++j) {
- EVT elemtype = VTs[j];
- int curOffset = Offsets[j];
- unsigned PartAlign = GreatestCommonDivisor64(ArgAlign.value(), curOffset);
- auto PtrVT = getPointerTy(DL);
- SDValue srcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, OutVals[OIdx],
- DAG.getConstant(curOffset, dl, PtrVT));
- SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
- MachinePointerInfo(), PartAlign);
- if (elemtype.getSizeInBits() < 16) {
- theVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, theVal);
- }
- SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue CopyParamOps[] = { Chain,
- DAG.getConstant(paramCount, dl, MVT::i32),
- DAG.getConstant(curOffset, dl, MVT::i32),
- theVal, InFlag };
- Chain = DAG.getMemIntrinsicNode(
- NVPTXISD::StoreParam, dl, CopyParamVTs, CopyParamOps, elemtype,
- MachinePointerInfo(), /* Align */ None, MachineMemOperand::MOStore);
+ if (VectorInfo[j] & PVF_LAST) {
+ unsigned NumElts = StoreOperands.size() - 3;
+ NVPTXISD::NodeType Op;
+ switch (NumElts) {
+ case 1:
+ Op = NVPTXISD::StoreParam;
+ break;
+ case 2:
+ Op = NVPTXISD::StoreParamV2;
+ break;
+ case 4:
+ Op = NVPTXISD::StoreParamV4;
+ break;
+ default:
+ llvm_unreachable("Invalid vector info.");
+ }
- InFlag = Chain.getValue(1);
+ StoreOperands.push_back(InFlag);
+
+ // Adjust type of the store op if we've extended the scalar
+ // return value.
+ EVT TheStoreType = ExtendIntegerParam ? MVT::i32 : EltVT;
+
+ Chain = DAG.getMemIntrinsicNode(
+ Op, dl, DAG.getVTList(MVT::Other, MVT::Glue), StoreOperands,
+ TheStoreType, MachinePointerInfo(), PartAlign,
+ MachineMemOperand::MOStore);
+ InFlag = Chain.getValue(1);
+
+ // Cleanup.
+ StoreOperands.clear();
+ }
+ if (!IsByVal)
+ ++OIdx;
}
- ++paramCount;
+ assert(StoreOperands.empty() && "Unfinished parameter store.");
+ if (!IsByVal && VTs.size() > 0)
+ --OIdx;
+ ++ParamCount;
}
GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode());
@@ -1729,7 +1724,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CallArgBeginOps);
InFlag = Chain.getValue(1);
- for (unsigned i = 0, e = paramCount; i != e; ++i) {
+ for (unsigned i = 0, e = ParamCount; i != e; ++i) {
unsigned opcode;
if (i == (e - 1))
opcode = NVPTXISD::LastCallArg;
@@ -1865,7 +1860,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Chain = Ret.getValue(1);
InFlag = Ret.getValue(2);
- if (ProxyRegTruncates[i].hasValue()) {
+ if (ProxyRegTruncates[i]) {
Ret = DAG.getNode(ISD::TRUNCATE, dl, ProxyRegTruncates[i].getValue(), Ret);
}
@@ -2249,7 +2244,7 @@ SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
assert(Node->getValueType(0) == MVT::i1 &&
"Custom lowering for i1 load only");
SDValue newLD = DAG.getLoad(MVT::i16, dl, LD->getChain(), LD->getBasePtr(),
- LD->getPointerInfo(), LD->getAlignment(),
+ LD->getPointerInfo(), LD->getAlign(),
LD->getMemOperand()->getFlags());
SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD);
// The legalizer (the caller) is expecting two values from the legalized
@@ -2414,7 +2409,7 @@ SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Tmp3);
SDValue Result =
DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), MVT::i8,
- ST->getAlignment(), ST->getMemOperand()->getFlags());
+ ST->getAlign(), ST->getMemOperand()->getFlags());
return Result;
}
@@ -2431,29 +2426,6 @@ NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const {
return DAG.getTargetExternalSymbol(SavedStr->c_str(), v);
}
-// Check to see if the kernel argument is image*_t or sampler_t
-
-static bool isImageOrSamplerVal(const Value *arg, const Module *context) {
- static const char *const specialTypes[] = { "struct._image2d_t",
- "struct._image3d_t",
- "struct._sampler_t" };
-
- Type *Ty = arg->getType();
- auto *PTy = dyn_cast<PointerType>(Ty);
-
- if (!PTy)
- return false;
-
- if (!context)
- return false;
-
- auto *STy = dyn_cast<StructType>(PTy->getPointerElementType());
- if (!STy || STy->isLiteral())
- return false;
-
- return llvm::is_contained(specialTypes, STy->getName());
-}
-
SDValue NVPTXTargetLowering::LowerFormalArguments(
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
@@ -2495,19 +2467,6 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
for (unsigned i = 0, e = theArgs.size(); i != e; ++i, ++idx, ++InsIdx) {
Type *Ty = argTypes[i];
- // If the kernel argument is image*_t or sampler_t, convert it to
- // a i32 constant holding the parameter position. This can later
- // matched in the AsmPrinter to output the correct mangled name.
- if (isImageOrSamplerVal(
- theArgs[i],
- (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent()
- : nullptr))) {
- assert(isKernelFunction(*F) &&
- "Only kernels can have image/sampler params");
- InVals.push_back(DAG.getConstant(i + 1, dl, MVT::i32));
- continue;
- }
-
if (theArgs[i]->use_empty()) {
// argument is dead
if (Ty->isAggregateType() || Ty->isIntegerTy(128)) {
@@ -2658,7 +2617,8 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SDLoc &dl, SelectionDAG &DAG) const {
- MachineFunction &MF = DAG.getMachineFunction();
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const Function &F = MF.getFunction();
Type *RetTy = MF.getFunction().getReturnType();
bool isABI = (STI.getSmVersion() >= 20);
@@ -2673,7 +2633,9 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
assert(VTs.size() == OutVals.size() && "Bad return value decomposition");
auto VectorInfo = VectorizePTXValueVTs(
- VTs, Offsets, RetTy->isSized() ? DL.getABITypeAlign(RetTy) : Align(1));
+ VTs, Offsets,
+ RetTy->isSized() ? getFunctionParamOptimizedAlign(&F, RetTy, DL)
+ : Align(1));
// PTX Interoperability Guide 3.3(A): [Integer] Values shorter than
// 32-bits are sign extended or zero extended, depending on whether
@@ -4293,6 +4255,26 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
return false;
}
+/// getFunctionParamOptimizedAlign - since function arguments are passed via
+/// .param space, we may want to increase their alignment in a way that
+/// ensures that we can effectively vectorize their loads & stores. We can
+/// increase alignment only if the function has internal or has private
+/// linkage as for other linkage types callers may already rely on default
+/// alignment. To allow using 128-bit vectorized loads/stores, this function
+/// ensures that alignment is 16 or greater.
+Align NVPTXTargetLowering::getFunctionParamOptimizedAlign(
+ const Function *F, Type *ArgTy, const DataLayout &DL) const {
+ const uint64_t ABITypeAlign = DL.getABITypeAlign(ArgTy).value();
+
+ // If a function has linkage different from internal or private, we
+ // must use default ABI alignment as external users rely on it.
+ if (!F->hasLocalLinkage())
+ return Align(ABITypeAlign);
+
+ assert(!isKernelFunction(*F) && "Expect kernels to have non-local linkage");
+ return Align(std::max(uint64_t(16), ABITypeAlign));
+}
+
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
/// Used to guide target specific optimizations, like loop strength reduction
@@ -4516,6 +4498,17 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
return SDValue();
}
+static SDValue PerformStoreRetvalCombine(SDNode *N) {
+ // Operands from the 2nd to the last one are the values to be stored
+ for (std::size_t I = 2, OpsCount = N->ops().size(); I != OpsCount; ++I)
+ if (!N->getOperand(I).isUndef())
+ return SDValue();
+
+ // Operand 0 is the previous value in the chain. Cannot return EntryToken
+ // as the previous value will become unused and eliminated later.
+ return N->getOperand(0);
+}
+
/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
///
static SDValue PerformADDCombine(SDNode *N,
@@ -4844,6 +4837,10 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
return PerformREMCombine(N, DCI, OptLevel);
case ISD::SETCC:
return PerformSETCCCombine(N, DCI);
+ case NVPTXISD::StoreRetval:
+ case NVPTXISD::StoreRetvalV2:
+ case NVPTXISD::StoreRetvalV4:
+ return PerformStoreRetvalCombine(N);
}
return SDValue();
}
@@ -5130,8 +5127,69 @@ void NVPTXTargetLowering::ReplaceNodeResults(
}
}
+NVPTXTargetLowering::AtomicExpansionKind
+NVPTXTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+ Type *Ty = AI->getValOperand()->getType();
+
+ if (AI->isFloatingPointOperation()) {
+ if (AI->getOperation() == AtomicRMWInst::BinOp::FAdd) {
+ if (Ty->isFloatTy())
+ return AtomicExpansionKind::None;
+ if (Ty->isDoubleTy() && STI.hasAtomAddF64())
+ return AtomicExpansionKind::None;
+ }
+ return AtomicExpansionKind::CmpXChg;
+ }
+
+ assert(Ty->isIntegerTy() && "Ty should be integer at this point");
+ auto ITy = cast<llvm::IntegerType>(Ty);
+
+ switch (AI->getOperation()) {
+ default:
+ return AtomicExpansionKind::CmpXChg;
+ case AtomicRMWInst::BinOp::And:
+ case AtomicRMWInst::BinOp::Or:
+ case AtomicRMWInst::BinOp::Xor:
+ case AtomicRMWInst::BinOp::Xchg:
+ switch (ITy->getBitWidth()) {
+ case 8:
+ case 16:
+ return AtomicExpansionKind::CmpXChg;
+ case 32:
+ return AtomicExpansionKind::None;
+ case 64:
+ if (STI.hasAtomBitwise64())
+ return AtomicExpansionKind::None;
+ return AtomicExpansionKind::CmpXChg;
+ default:
+ llvm_unreachable("unsupported width encountered");
+ }
+ case AtomicRMWInst::BinOp::Add:
+ case AtomicRMWInst::BinOp::Sub:
+ case AtomicRMWInst::BinOp::Max:
+ case AtomicRMWInst::BinOp::Min:
+ case AtomicRMWInst::BinOp::UMax:
+ case AtomicRMWInst::BinOp::UMin:
+ switch (ITy->getBitWidth()) {
+ case 8:
+ case 16:
+ return AtomicExpansionKind::CmpXChg;
+ case 32:
+ return AtomicExpansionKind::None;
+ case 64:
+ if (STI.hasAtomMinMax64())
+ return AtomicExpansionKind::None;
+ return AtomicExpansionKind::CmpXChg;
+ default:
+ llvm_unreachable("unsupported width encountered");
+ }
+ }
+
+ return AtomicExpansionKind::CmpXChg;
+}
+
// Pin NVPTXTargetObjectFile's vtables to this file.
-NVPTXTargetObjectFile::~NVPTXTargetObjectFile() {}
+NVPTXTargetObjectFile::~NVPTXTargetObjectFile() = default;
MCSection *NVPTXTargetObjectFile::SelectSectionForGlobal(
const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index 13829b924d4b..fb09f99a019d 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -451,6 +451,16 @@ public:
MachineFunction &MF,
unsigned Intrinsic) const override;
+ /// getFunctionParamOptimizedAlign - since function arguments are passed via
+ /// .param space, we may want to increase their alignment in a way that
+ /// ensures that we can effectively vectorize their loads & stores. We can
+ /// increase alignment only if the function has internal or has private
+ /// linkage as for other linkage types callers may already rely on default
+ /// alignment. To allow using 128-bit vectorized loads/stores, this function
+ /// ensures that alignment is 16 or greater.
+ Align getFunctionParamOptimizedAlign(const Function *F, Type *ArgTy,
+ const DataLayout &DL) const;
+
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type
/// Used to guide target specific optimizations, like loop strength
@@ -551,6 +561,17 @@ public:
// instruction, so we say that ctlz is cheap to speculate.
bool isCheapToSpeculateCtlz() const override { return true; }
+ AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override {
+ return AtomicExpansionKind::None;
+ }
+
+ AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override {
+ return AtomicExpansionKind::None;
+ }
+
+ AtomicExpansionKind
+ shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+
private:
const NVPTXSubtarget &STI; // cache the subtarget here
SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const;
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index 953d95e55f65..8df6f13aa68e 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -27,7 +27,7 @@ using namespace llvm;
// Pin the vtable to this file.
void NVPTXInstrInfo::anchor() {}
-NVPTXInstrInfo::NVPTXInstrInfo() : NVPTXGenInstrInfo(), RegInfo() {}
+NVPTXInstrInfo::NVPTXInstrInfo() : RegInfo() {}
void NVPTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 22084cddc092..6f9c40feb10e 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -145,6 +145,8 @@ def noHWROT32 : Predicate<"!Subtarget->hasHWROT32()">;
def True : Predicate<"true">;
def hasPTX31 : Predicate<"Subtarget->getPTXVersion() >= 31">;
+def hasPTX42 : Predicate<"Subtarget->getPTXVersion() >= 42">;
+def hasPTX43 : Predicate<"Subtarget->getPTXVersion() >= 43">;
def hasPTX60 : Predicate<"Subtarget->getPTXVersion() >= 60">;
def hasPTX61 : Predicate<"Subtarget->getPTXVersion() >= 61">;
def hasPTX63 : Predicate<"Subtarget->getPTXVersion() >= 63">;
@@ -152,12 +154,16 @@ def hasPTX64 : Predicate<"Subtarget->getPTXVersion() >= 64">;
def hasPTX65 : Predicate<"Subtarget->getPTXVersion() >= 65">;
def hasPTX70 : Predicate<"Subtarget->getPTXVersion() >= 70">;
def hasPTX71 : Predicate<"Subtarget->getPTXVersion() >= 71">;
+def hasPTX72 : Predicate<"Subtarget->getPTXVersion() >= 72">;
def hasSM30 : Predicate<"Subtarget->getSmVersion() >= 30">;
+def hasSM32 : Predicate<"Subtarget->getSmVersion() >= 32">;
+def hasSM53 : Predicate<"Subtarget->getSmVersion() >= 53">;
def hasSM70 : Predicate<"Subtarget->getSmVersion() >= 70">;
def hasSM72 : Predicate<"Subtarget->getSmVersion() >= 72">;
def hasSM75 : Predicate<"Subtarget->getSmVersion() >= 75">;
def hasSM80 : Predicate<"Subtarget->getSmVersion() >= 80">;
+def hasSM86 : Predicate<"Subtarget->getSmVersion() >= 86">;
// non-sync shfl instructions are not available on sm_70+ in PTX6.4+
def hasSHFL : Predicate<"!(Subtarget->getSmVersion() >= 70"
@@ -199,17 +205,29 @@ multiclass I3<string OpcStr, SDNode OpNode> {
[(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
}
-// Template for instructions which take 3 int32 args. The instructions are
+// Template for instructions which take 3 int args. The instructions are
// named "<OpcStr>.s32" (e.g. "addc.cc.s32").
-multiclass ADD_SUB_INT_32<string OpcStr, SDNode OpNode> {
- def i32rr :
- NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
- !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
- [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
- def i32ri :
- NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
- !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
- [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
+multiclass ADD_SUB_INT_CARRY<string OpcStr, SDNode OpNode> {
+ let hasSideEffects = 1 in {
+ def i32rr :
+ NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
+ !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
+ def i32ri :
+ NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
+ !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
+ def i64rr :
+ NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
+ !strconcat(OpcStr, ".s64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>,
+ Requires<[hasPTX43]>;
+ def i64ri :
+ NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
+ !strconcat(OpcStr, ".s64 \t$dst, $a, $b;"),
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>,
+ Requires<[hasPTX43]>;
+ }
}
// Template for instructions which take three fp64 or fp32 args. The
@@ -579,14 +597,13 @@ defm SUB_i1 : ADD_SUB_i1<sub>;
defm ADD : I3<"add.s", add>;
defm SUB : I3<"sub.s", sub>;
-// int32 addition and subtraction with carry-out.
-// FIXME: PTX 4.3 adds a 64-bit add.cc (and maybe also 64-bit addc.cc?).
-defm ADDCC : ADD_SUB_INT_32<"add.cc", addc>;
-defm SUBCC : ADD_SUB_INT_32<"sub.cc", subc>;
+// in32 and int64 addition and subtraction with carry-out.
+defm ADDCC : ADD_SUB_INT_CARRY<"add.cc", addc>;
+defm SUBCC : ADD_SUB_INT_CARRY<"sub.cc", subc>;
-// int32 addition and subtraction with carry-in and carry-out.
-defm ADDCCC : ADD_SUB_INT_32<"addc.cc", adde>;
-defm SUBCCC : ADD_SUB_INT_32<"subc.cc", sube>;
+// int32 and int64 addition and subtraction with carry-in and carry-out.
+defm ADDCCC : ADD_SUB_INT_CARRY<"addc.cc", adde>;
+defm SUBCCC : ADD_SUB_INT_CARRY<"subc.cc", sube>;
defm MULT : I3<"mul.lo.s", mul>;
@@ -2653,6 +2670,8 @@ def BITCONVERT_64_I2F : F_BITCONVERT<"64", Int64Regs, Float64Regs>;
def BITCONVERT_64_F2I : F_BITCONVERT<"64", Float64Regs, Int64Regs>;
def BITCONVERT_32_I2F16x2 : F_BITCONVERT<"32", Int32Regs, Float16x2Regs>;
def BITCONVERT_32_F16x22I : F_BITCONVERT<"32", Float16x2Regs, Int32Regs>;
+def BITCONVERT_32_F2F16x2 : F_BITCONVERT<"32", Float32Regs, Float16x2Regs>;
+def BITCONVERT_32_F16x22F : F_BITCONVERT<"32", Float16x2Regs, Float32Regs>;
// NOTE: pred->fp are currently sub-optimal due to an issue in TableGen where
// we cannot specify floating-point literals in isel patterns. Therefore, we
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index ec069a0a02ae..1192cc078408 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -182,7 +182,7 @@ foreach sync = [false, true] in {
foreach threadmask_imm = THREADMASK_INFO<sync>.ret in {
def : SHFL_INSTR<sync, mode, regclass, return_pred,
offset_imm, mask_imm, threadmask_imm>,
- Requires<!if(sync, [hasSM30], [hasSM30, hasSHFL])>;
+ Requires<!if(sync, [hasSM30, hasPTX60], [hasSM30, hasSHFL])>;
}
}
}
@@ -223,21 +223,21 @@ defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_
multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
Operand ImmOp> {
- def ii : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, ImmOp:$value),
+ def ii : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, ImmOp:$value),
"match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
- [(set regclass:$dest, (IntOp imm:$mask, imm:$value))]>,
+ [(set Int32Regs:$dest, (IntOp imm:$mask, imm:$value))]>,
Requires<[hasPTX60, hasSM70]>;
- def ir : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, ImmOp:$value),
+ def ir : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, ImmOp:$value),
"match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
- [(set regclass:$dest, (IntOp Int32Regs:$mask, imm:$value))]>,
+ [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, imm:$value))]>,
Requires<[hasPTX60, hasSM70]>;
- def ri : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, regclass:$value),
+ def ri : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, regclass:$value),
"match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
- [(set regclass:$dest, (IntOp imm:$mask, regclass:$value))]>,
+ [(set Int32Regs:$dest, (IntOp imm:$mask, regclass:$value))]>,
Requires<[hasPTX60, hasSM70]>;
- def rr : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, regclass:$value),
+ def rr : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, regclass:$value),
"match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
- [(set regclass:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>,
+ [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>,
Requires<[hasPTX60, hasSM70]>;
}
@@ -248,25 +248,25 @@ defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_syn
multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
Operand ImmOp> {
- def ii : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
+ def ii : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
(ins i32imm:$mask, ImmOp:$value),
"match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
- [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>,
+ [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>,
Requires<[hasPTX60, hasSM70]>;
- def ir : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
+ def ir : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
(ins Int32Regs:$mask, ImmOp:$value),
"match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
- [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>,
+ [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>,
Requires<[hasPTX60, hasSM70]>;
- def ri : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
+ def ri : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
(ins i32imm:$mask, regclass:$value),
"match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
- [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>,
+ [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>,
Requires<[hasPTX60, hasSM70]>;
- def rr : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
+ def rr : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
(ins Int32Regs:$mask, regclass:$value),
"match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
- [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>,
+ [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>,
Requires<[hasPTX60, hasSM70]>;
}
defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p,
@@ -549,28 +549,32 @@ def : Pat<(int_nvvm_fmin_d
// We need a full string for OpcStr here because we need to deal with case like
// INT_PTX_RECIP.
class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass,
- NVPTXRegClass src_regclass, Intrinsic IntOP>
+ NVPTXRegClass src_regclass, Intrinsic IntOP, list<Predicate> Preds = []>
: NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0),
OpcStr,
- [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>;
+ [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>,
+ Requires<Preds>;
// We need a full string for OpcStr here because we need to deal with the case
// like INT_PTX_NATIVE_POWR_F.
class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass,
- NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP>
+ NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP,
+ list<Predicate> Preds = []>
: NVPTXInst<(outs t_regclass:$dst),
(ins s0_regclass:$src0, s1_regclass:$src1),
OpcStr,
- [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>;
+ [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>,
+ Requires<Preds>;
class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass,
- NVPTXRegClass s2_regclass, Intrinsic IntOP>
+ NVPTXRegClass s2_regclass, Intrinsic IntOP, list<Predicate> Preds = []>
: NVPTXInst<(outs t_regclass:$dst),
(ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2),
OpcStr,
[(set t_regclass:$dst,
- (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>;
+ (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>,
+ Requires<Preds>;
//
// MISC
@@ -587,17 +591,145 @@ def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
Float32Regs, Float32Regs, int_nvvm_fmin_f>;
def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
+def INT_NVVM_FMIN_NAN_F : F_MATH_2<"min.NaN.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_f,
+ [hasPTX70, hasSM80]>;
+def INT_NVVM_FMIN_FTZ_NAN_F : F_MATH_2<"min.ftz.NaN.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_f,
+ [hasPTX70, hasSM80]>;
+def INT_NVVM_FMIN_XORSIGN_ABS_F :
+ F_MATH_2<"min.xorsign.abs.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_xorsign_abs_f,
+ [hasPTX72, hasSM86]>;
+def INT_NVVM_FMIN_FTZ_XORSIGN_ABS_F :
+ F_MATH_2<"min.ftz.xorsign.abs.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_xorsign_abs_f,
+ [hasPTX72, hasSM86]>;
+def INT_NVVM_FMIN_NAN_XORSIGN_ABS_F :
+ F_MATH_2<"min.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_xorsign_abs_f,
+ [hasPTX72, hasSM86]>;
+def INT_NVVM_FMIN_FTZ_NAN_XORSIGN_ABS_F :
+ F_MATH_2<"min.ftz.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_xorsign_abs_f,
+ [hasPTX72, hasSM86]>;
def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs,
Float32Regs, Float32Regs, int_nvvm_fmax_f>;
def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;",
Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
+def INT_NVVM_FMAX_NAN_F : F_MATH_2<"max.NaN.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_f,
+ [hasPTX70, hasSM80]>;
+def INT_NVVM_FMAX_FTZ_NAN_F : F_MATH_2<"max.ftz.NaN.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_f,
+ [hasPTX70, hasSM80]>;
+def INT_NVVM_FMAX_XORSIGN_ABS_F :
+ F_MATH_2<"max.xorsign.abs.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_xorsign_abs_f,
+ [hasPTX72, hasSM86]>;
+def INT_NVVM_FMAX_FTZ_XORSIGN_ABS_F :
+ F_MATH_2<"max.ftz.xorsign.abs.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_xorsign_abs_f,
+ [hasPTX72, hasSM86]>;
+def INT_NVVM_FMAX_NAN_XORSIGN_ABS_F :
+ F_MATH_2<"max.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_xorsign_abs_f,
+ [hasPTX72, hasSM86]>;
+def INT_NVVM_FMAX_FTZ_NAN_XORSIGN_ABS_F :
+ F_MATH_2<"max.ftz.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
+ Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_xorsign_abs_f,
+ [hasPTX72, hasSM86]>;
def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
Float64Regs, Float64Regs, int_nvvm_fmin_d>;
def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
Float64Regs, Float64Regs, int_nvvm_fmax_d>;
+//
+// Min Max f16, f16x2, bf16, bf16x2
+//
+
+class MIN_MAX_TUPLE<string V, Intrinsic I, NVPTXRegClass RC,
+ list<Predicate> Preds = [hasPTX70, hasSM80]> {
+ string Variant = V;
+ Intrinsic Intr = I;
+ NVPTXRegClass RegClass = RC;
+ list<Predicate> Predicates = Preds;
+}
+
+multiclass MIN_MAX<string IntName> {
+ foreach P = [
+ MIN_MAX_TUPLE<"_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_f16,
+ int_nvvm_fmax_f16), Float16Regs>,
+ MIN_MAX_TUPLE<"_ftz_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_ftz_f16,
+ int_nvvm_fmax_ftz_f16), Float16Regs>,
+ MIN_MAX_TUPLE<"_NaN_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_f16,
+ int_nvvm_fmax_nan_f16), Float16Regs>,
+ MIN_MAX_TUPLE<"_ftz_NaN_f16", !if(!eq(IntName, "min"),
+ int_nvvm_fmin_ftz_nan_f16, int_nvvm_fmax_ftz_nan_f16), Float16Regs>,
+ MIN_MAX_TUPLE<"_xorsign_abs_f16", !if(!eq(IntName, "min"),
+ int_nvvm_fmin_xorsign_abs_f16, int_nvvm_fmax_xorsign_abs_f16),
+ Float16Regs, [hasPTX72, hasSM86]>,
+ MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16", !if(!eq(IntName, "min"),
+ int_nvvm_fmin_ftz_xorsign_abs_f16, int_nvvm_fmax_ftz_xorsign_abs_f16),
+ Float16Regs, [hasPTX72, hasSM86]>,
+ MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"),
+ int_nvvm_fmin_nan_xorsign_abs_f16, int_nvvm_fmax_nan_xorsign_abs_f16),
+ Float16Regs, [hasPTX72, hasSM86]>,
+ MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"),
+ int_nvvm_fmin_ftz_nan_xorsign_abs_f16,
+ int_nvvm_fmax_ftz_nan_xorsign_abs_f16), Float16Regs, [hasPTX72, hasSM86]>,
+ MIN_MAX_TUPLE<"_f16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_f16x2,
+ int_nvvm_fmax_f16x2), Float16x2Regs>,
+ MIN_MAX_TUPLE<"_ftz_f16x2", !if(!eq(IntName, "min"),
+ int_nvvm_fmin_ftz_f16x2, int_nvvm_fmax_ftz_f16x2), Float16x2Regs>,
+ MIN_MAX_TUPLE<"_NaN_f16x2", !if(!eq(IntName, "min"),
+ int_nvvm_fmin_nan_f16x2, int_nvvm_fmax_nan_f16x2), Float16x2Regs>,
+ MIN_MAX_TUPLE<"_ftz_NaN_f16x2", !if(!eq(IntName, "min"),
+ int_nvvm_fmin_ftz_nan_f16x2, int_nvvm_fmax_ftz_nan_f16x2), Float16x2Regs>,
+ MIN_MAX_TUPLE<"_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
+ int_nvvm_fmin_xorsign_abs_f16x2, int_nvvm_fmax_xorsign_abs_f16x2),
+ Float16x2Regs, [hasPTX72, hasSM86]>,
+ MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
+ int_nvvm_fmin_ftz_xorsign_abs_f16x2, int_nvvm_fmax_ftz_xorsign_abs_f16x2),
+ Float16x2Regs, [hasPTX72, hasSM86]>,
+ MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
+ int_nvvm_fmin_nan_xorsign_abs_f16x2, int_nvvm_fmax_nan_xorsign_abs_f16x2),
+ Float16x2Regs, [hasPTX72, hasSM86]>,
+ MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
+ int_nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
+ int_nvvm_fmax_ftz_nan_xorsign_abs_f16x2),
+ Float16x2Regs, [hasPTX72, hasSM86]>,
+ MIN_MAX_TUPLE<"_bf16", !if(!eq(IntName, "min"),
+ int_nvvm_fmin_bf16, int_nvvm_fmax_bf16), Int16Regs>,
+ MIN_MAX_TUPLE<"_NaN_bf16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_bf16,
+ int_nvvm_fmax_nan_bf16), Int16Regs>,
+ MIN_MAX_TUPLE<"_xorsign_abs_bf16", !if(!eq(IntName, "min"),
+ int_nvvm_fmin_xorsign_abs_bf16, int_nvvm_fmax_xorsign_abs_bf16),
+ Int16Regs, [hasPTX72, hasSM86]>,
+ MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16", !if(!eq(IntName, "min"),
+ int_nvvm_fmin_nan_xorsign_abs_bf16, int_nvvm_fmax_nan_xorsign_abs_bf16),
+ Int16Regs, [hasPTX72, hasSM86]>,
+ MIN_MAX_TUPLE<"_bf16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_bf16x2,
+ int_nvvm_fmax_bf16x2), Int32Regs>,
+ MIN_MAX_TUPLE<"_NaN_bf16x2", !if(!eq(IntName, "min"),
+ int_nvvm_fmin_nan_bf16x2, int_nvvm_fmax_nan_bf16x2), Int32Regs>,
+ MIN_MAX_TUPLE<"_xorsign_abs_bf16x2", !if(!eq(IntName, "min"),
+ int_nvvm_fmin_xorsign_abs_bf16x2, int_nvvm_fmax_xorsign_abs_bf16x2),
+ Int32Regs, [hasPTX72, hasSM86]>,
+ MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16x2", !if(!eq(IntName, "min"),
+ int_nvvm_fmin_nan_xorsign_abs_bf16x2,
+ int_nvvm_fmax_nan_xorsign_abs_bf16x2),
+ Int32Regs, [hasPTX72, hasSM86]>] in {
+ def P.Variant : F_MATH_2<!strconcat(
+ IntName, !subst("_", ".", P.Variant), " \t$dst, $src0, $src1;"),
+ P.RegClass, P.RegClass, P.RegClass, P.Intr, P.Predicates>;
+ }
+}
+
+defm INT_NVVM_FMIN : MIN_MAX<"min">;
+defm INT_NVVM_FMAN : MIN_MAX<"max">;
//
// Multiplication
@@ -720,6 +852,19 @@ def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
Float64Regs, int_nvvm_fabs_d>;
//
+// Abs, Neg bf16, bf16x2
+//
+
+def INT_NVVM_ABS_BF16 : F_MATH_1<"abs.bf16 \t$dst, $src0;", Int16Regs,
+ Int16Regs, int_nvvm_abs_bf16, [hasPTX70, hasSM80]>;
+def INT_NVVM_ABS_BF16X2 : F_MATH_1<"abs.bf16x2 \t$dst, $src0;", Int32Regs,
+ Int32Regs, int_nvvm_abs_bf16x2, [hasPTX70, hasSM80]>;
+def INT_NVVM_NEG_BF16 : F_MATH_1<"neg.bf16 \t$dst, $src0;", Int16Regs,
+ Int16Regs, int_nvvm_neg_bf16, [hasPTX70, hasSM80]>;
+def INT_NVVM_NEG_BF16X2 : F_MATH_1<"neg.bf16x2 \t$dst, $src0;", Int32Regs,
+ Int32Regs, int_nvvm_neg_bf16x2, [hasPTX70, hasSM80]>;
+
+//
// Round
//
@@ -762,6 +907,10 @@ def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
+def INT_NVVM_EX2_APPROX_F16 : F_MATH_1<"ex2.approx.f16 \t$dst, $src0;",
+ Float16Regs, Float16Regs, int_nvvm_ex2_approx_f16, [hasPTX70, hasSM75]>;
+def INT_NVVM_EX2_APPROX_F16X2 : F_MATH_1<"ex2.approx.f16x2 \t$dst, $src0;",
+ Float16x2Regs, Float16x2Regs, int_nvvm_ex2_approx_f16x2, [hasPTX70, hasSM75]>;
def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
@@ -788,35 +937,72 @@ def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
// Fma
//
-def INT_NVVM_FMA_RN_FTZ_F
- : F_MATH_3<"fma.rn.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_ftz_f>;
-def INT_NVVM_FMA_RN_F : F_MATH_3<"fma.rn.f32 \t$dst, $src0, $src1, $src2;",
- Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_f>;
-def INT_NVVM_FMA_RZ_FTZ_F
- : F_MATH_3<"fma.rz.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_ftz_f>;
-def INT_NVVM_FMA_RZ_F : F_MATH_3<"fma.rz.f32 \t$dst, $src0, $src1, $src2;",
- Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_f>;
-def INT_NVVM_FMA_RM_FTZ_F
- : F_MATH_3<"fma.rm.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_ftz_f>;
-def INT_NVVM_FMA_RM_F : F_MATH_3<"fma.rm.f32 \t$dst, $src0, $src1, $src2;",
- Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_f>;
-def INT_NVVM_FMA_RP_FTZ_F
- : F_MATH_3<"fma.rp.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_ftz_f>;
-def INT_NVVM_FMA_RP_F : F_MATH_3<"fma.rp.f32 \t$dst, $src0, $src1, $src2;",
- Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_f>;
-
-def INT_NVVM_FMA_RN_D : F_MATH_3<"fma.rn.f64 \t$dst, $src0, $src1, $src2;",
- Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rn_d>;
-def INT_NVVM_FMA_RZ_D : F_MATH_3<"fma.rz.f64 \t$dst, $src0, $src1, $src2;",
- Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rz_d>;
-def INT_NVVM_FMA_RM_D : F_MATH_3<"fma.rm.f64 \t$dst, $src0, $src1, $src2;",
- Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rm_d>;
-def INT_NVVM_FMA_RP_D : F_MATH_3<"fma.rp.f64 \t$dst, $src0, $src1, $src2;",
- Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rp_d>;
+class FMA_TUPLE<string V, Intrinsic I, NVPTXRegClass RC,
+ list<Predicate> Preds = []> {
+ string Variant = V;
+ Intrinsic Intr = I;
+ NVPTXRegClass RegClass = RC;
+ list<Predicate> Predicates = Preds;
+}
+
+multiclass FMA_INST {
+ foreach P = [
+ FMA_TUPLE<"_rn_f64", int_nvvm_fma_rn_d, Float64Regs>,
+ FMA_TUPLE<"_rz_f64", int_nvvm_fma_rz_d, Float64Regs>,
+ FMA_TUPLE<"_rm_f64", int_nvvm_fma_rm_d, Float64Regs>,
+ FMA_TUPLE<"_rp_f64", int_nvvm_fma_rp_d, Float64Regs>,
+
+ FMA_TUPLE<"_rn_ftz_f32", int_nvvm_fma_rn_ftz_f, Float32Regs>,
+ FMA_TUPLE<"_rn_f32", int_nvvm_fma_rn_f, Float32Regs>,
+ FMA_TUPLE<"_rz_ftz_f32", int_nvvm_fma_rz_ftz_f, Float32Regs>,
+ FMA_TUPLE<"_rz_f32", int_nvvm_fma_rz_f, Float32Regs>,
+ FMA_TUPLE<"_rm_f32", int_nvvm_fma_rm_f, Float32Regs>,
+ FMA_TUPLE<"_rm_ftz_f32", int_nvvm_fma_rm_ftz_f, Float32Regs>,
+ FMA_TUPLE<"_rp_f32", int_nvvm_fma_rp_f, Float32Regs>,
+ FMA_TUPLE<"_rp_ftz_f32", int_nvvm_fma_rp_ftz_f, Float32Regs>,
+
+ FMA_TUPLE<"_rn_f16", int_nvvm_fma_rn_f16, Float16Regs, [hasPTX42, hasSM53]>,
+ FMA_TUPLE<"_rn_ftz_f16", int_nvvm_fma_rn_ftz_f16, Float16Regs,
+ [hasPTX42, hasSM53]>,
+ FMA_TUPLE<"_rn_sat_f16", int_nvvm_fma_rn_sat_f16, Float16Regs,
+ [hasPTX42, hasSM53]>,
+ FMA_TUPLE<"_rn_ftz_sat_f16", int_nvvm_fma_rn_ftz_sat_f16, Float16Regs,
+ [hasPTX42, hasSM53]>,
+ FMA_TUPLE<"_rn_relu_f16", int_nvvm_fma_rn_relu_f16, Float16Regs,
+ [hasPTX70, hasSM80]>,
+ FMA_TUPLE<"_rn_ftz_relu_f16", int_nvvm_fma_rn_ftz_relu_f16, Float16Regs,
+ [hasPTX70, hasSM80]>,
+
+ FMA_TUPLE<"_rn_f16x2", int_nvvm_fma_rn_f16x2, Float16x2Regs,
+ [hasPTX42, hasSM53]>,
+ FMA_TUPLE<"_rn_ftz_f16x2", int_nvvm_fma_rn_ftz_f16x2, Float16x2Regs,
+ [hasPTX42, hasSM53]>,
+ FMA_TUPLE<"_rn_sat_f16x2", int_nvvm_fma_rn_sat_f16x2, Float16x2Regs,
+ [hasPTX42, hasSM53]>,
+ FMA_TUPLE<"_rn_ftz_sat_f16x2", int_nvvm_fma_rn_ftz_sat_f16x2,
+ Float16x2Regs, [hasPTX42, hasSM53]>,
+ FMA_TUPLE<"_rn_relu_f16x2", int_nvvm_fma_rn_relu_f16x2, Float16x2Regs,
+ [hasPTX70, hasSM80]>,
+ FMA_TUPLE<"_rn_ftz_relu_f16x2", int_nvvm_fma_rn_ftz_relu_f16x2,
+ Float16x2Regs, [hasPTX70, hasSM80]>,
+
+ FMA_TUPLE<"_rn_bf16", int_nvvm_fma_rn_bf16, Int16Regs, [hasPTX70, hasSM80]>,
+ FMA_TUPLE<"_rn_relu_bf16", int_nvvm_fma_rn_relu_bf16, Int16Regs,
+ [hasPTX70, hasSM80]>,
+
+ FMA_TUPLE<"_rn_bf16x2", int_nvvm_fma_rn_bf16x2, Int32Regs,
+ [hasPTX70, hasSM80]>,
+ FMA_TUPLE<"_rn_relu_bf16x2", int_nvvm_fma_rn_relu_bf16x2, Int32Regs,
+ [hasPTX70, hasSM80]>
+ ] in {
+ def P.Variant :
+ F_MATH_3<!strconcat("fma",
+ !subst("_", ".", P.Variant), " \t$dst, $src0, $src1, $src2;"),
+ P.RegClass, P.RegClass, P.RegClass, P.RegClass, P.Intr, P.Predicates>;
+ }
+}
+
+defm INT_NVVM_FMA : FMA_INST;
//
// Rcp
@@ -848,6 +1034,8 @@ def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs,
def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs,
Float64Regs, int_nvvm_rcp_rp_d>;
+def INT_NVVM_RCP_APPROX_FTZ_F : F_MATH_1<"rcp.approx.ftz.f32 \t$dst, $src0;",
+ Float32Regs, Float32Regs, int_nvvm_rcp_approx_ftz_f>;
def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;",
Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>;
@@ -1472,13 +1660,13 @@ defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max",
defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
".s32", ".max", atomic_load_max_32_gen, i32imm, imm>;
defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
- ".max", atomic_load_max_64_g, i64imm, imm>;
+ ".max", atomic_load_max_64_g, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
- ".max", atomic_load_max_64_s, i64imm, imm>;
+ ".max", atomic_load_max_64_s, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max",
- atomic_load_max_64_gen, i64imm, imm>;
+ atomic_load_max_64_gen, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
- ".s64", ".max", atomic_load_max_64_gen, i64imm, imm>;
+ ".s64", ".max", atomic_load_max_64_gen, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
".max", atomic_load_umax_32_g, i32imm, imm>;
defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
@@ -1488,13 +1676,13 @@ defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max",
defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>;
defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
- ".max", atomic_load_umax_64_g, i64imm, imm>;
+ ".max", atomic_load_umax_64_g, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
- ".max", atomic_load_umax_64_s, i64imm, imm>;
+ ".max", atomic_load_umax_64_s, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max",
- atomic_load_umax_64_gen, i64imm, imm>;
+ atomic_load_umax_64_gen, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
- ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm>;
+ ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm, [hasSM32]>;
// atom_min
@@ -1532,13 +1720,13 @@ defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min",
defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
".s32", ".min", atomic_load_min_32_gen, i32imm, imm>;
defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
- ".min", atomic_load_min_64_g, i64imm, imm>;
+ ".min", atomic_load_min_64_g, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
- ".min", atomic_load_min_64_s, i64imm, imm>;
+ ".min", atomic_load_min_64_s, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min",
- atomic_load_min_64_gen, i64imm, imm>;
+ atomic_load_min_64_gen, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
- ".s64", ".min", atomic_load_min_64_gen, i64imm, imm>;
+ ".s64", ".min", atomic_load_min_64_gen, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
".min", atomic_load_umin_32_g, i32imm, imm>;
defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
@@ -1548,13 +1736,13 @@ defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min",
defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>;
defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
- ".min", atomic_load_umin_64_g, i64imm, imm>;
+ ".min", atomic_load_umin_64_g, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
- ".min", atomic_load_umin_64_s, i64imm, imm>;
+ ".min", atomic_load_umin_64_s, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min",
- atomic_load_umin_64_gen, i64imm, imm>;
+ atomic_load_umin_64_gen, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
- ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm>;
+ ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm, [hasSM32]>;
// atom_inc atom_dec
@@ -1612,13 +1800,13 @@ defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and",
defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
".and", atomic_load_and_32_gen, i32imm, imm>;
defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and",
- atomic_load_and_64_g, i64imm, imm>;
+ atomic_load_and_64_g, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and",
- atomic_load_and_64_s, i64imm, imm>;
+ atomic_load_and_64_s, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and",
- atomic_load_and_64_gen, i64imm, imm>;
+ atomic_load_and_64_gen, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
- ".and", atomic_load_and_64_gen, i64imm, imm>;
+ ".and", atomic_load_and_64_gen, i64imm, imm, [hasSM32]>;
// atom_or
@@ -1644,13 +1832,13 @@ defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or",
atomic_load_or_32_s, i32imm, imm>;
defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or",
- atomic_load_or_64_g, i64imm, imm>;
+ atomic_load_or_64_g, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or",
- atomic_load_or_64_gen, i64imm, imm>;
+ atomic_load_or_64_gen, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
- ".or", atomic_load_or_64_gen, i64imm, imm>;
+ ".or", atomic_load_or_64_gen, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or",
- atomic_load_or_64_s, i64imm, imm>;
+ atomic_load_or_64_s, i64imm, imm, [hasSM32]>;
// atom_xor
@@ -1676,13 +1864,13 @@ defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor",
defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
".xor", atomic_load_xor_32_gen, i32imm, imm>;
defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor",
- atomic_load_xor_64_g, i64imm, imm>;
+ atomic_load_xor_64_g, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor",
- atomic_load_xor_64_s, i64imm, imm>;
+ atomic_load_xor_64_s, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor",
- atomic_load_xor_64_gen, i64imm, imm>;
+ atomic_load_xor_64_gen, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
- ".xor", atomic_load_xor_64_gen, i64imm, imm>;
+ ".xor", atomic_load_xor_64_gen, i64imm, imm, [hasSM32]>;
// atom_cas
@@ -1788,7 +1976,7 @@ multiclass ATOM3P_impl<string AsmStr, Intrinsic Intr,
(Intr Int64Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
}
-// Constructs instrinsic name and instruction asm strings.
+// Constructs intrinsic name and instruction asm strings.
multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
string ScopeStr, string SpaceStr,
NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
@@ -2473,7 +2661,7 @@ def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
// SW version of rotate 64
def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
- (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
+ (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>,
Requires<[noHWROT32]>;
def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
(ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
index f655f25602bc..f57c2920449b 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
@@ -115,7 +115,8 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
/* SrcAlign */ LI->getAlign(),
/* DestAlign */ SI->getAlign(),
/* SrcIsVolatile */ LI->isVolatile(),
- /* DstIsVolatile */ SI->isVolatile(), TTI);
+ /* DstIsVolatile */ SI->isVolatile(),
+ /* CanOverlap */ true, TTI);
SI->eraseFromParent();
LI->eraseFromParent();
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
index 67aa49132016..53812d7552a9 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -88,16 +88,17 @@
// cancel the addrspacecast pair this pass emits.
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "NVPTX.h"
#include "NVPTXTargetMachine.h"
#include "NVPTXUtilities.h"
-#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/Pass.h"
+#include <queue>
#define DEBUG_TYPE "nvptx-lower-args"
@@ -206,10 +207,8 @@ static void convertToParamAS(Value *OldUser, Value *Param) {
// We've created a new instruction. Queue users of the old instruction to
// be converted and the instruction itself to be deleted. We can't delete
// the old instruction yet, because it's still in use by a load somewhere.
- llvm::for_each(
- I.OldInstruction->users(), [NewInst, &ItemsToConvert](Value *V) {
- ItemsToConvert.push_back({cast<Instruction>(V), NewInst});
- });
+ for (Value *V : I.OldInstruction->users())
+ ItemsToConvert.push_back({cast<Instruction>(V), NewInst});
InstructionsToDelete.push_back(I.OldInstruction);
}
@@ -222,18 +221,99 @@ static void convertToParamAS(Value *OldUser, Value *Param) {
// E.g if we have Value = Load(BitCast(GEP(arg))), InstructionsToDelete will
// have {GEP,BitCast}. GEP can't be deleted first, because it's still used by
// the BitCast.
- llvm::for_each(reverse(InstructionsToDelete),
- [](Instruction *I) { I->eraseFromParent(); });
+ for (Instruction *I : llvm::reverse(InstructionsToDelete))
+ I->eraseFromParent();
}
-void NVPTXLowerArgs::handleByValParam(Argument *Arg) {
+// Adjust alignment of arguments passed byval in .param address space. We can
+// increase alignment of such arguments in a way that ensures that we can
+// effectively vectorize their loads. We should also traverse all loads from
+// byval pointer and adjust their alignment, if those were using known offset.
+// Such alignment changes must be conformed with parameter store and load in
+// NVPTXTargetLowering::LowerCall.
+static void adjustByValArgAlignment(Argument *Arg, Value *ArgInParamAS,
+ const NVPTXTargetLowering *TLI) {
Function *Func = Arg->getParent();
- Instruction *FirstInst = &(Func->getEntryBlock().front());
- PointerType *PType = dyn_cast<PointerType>(Arg->getType());
+ Type *StructType = Arg->getParamByValType();
+ const DataLayout DL(Func->getParent());
+
+ uint64_t NewArgAlign =
+ TLI->getFunctionParamOptimizedAlign(Func, StructType, DL).value();
+ uint64_t CurArgAlign =
+ Arg->getAttribute(Attribute::Alignment).getValueAsInt();
+
+ if (CurArgAlign >= NewArgAlign)
+ return;
+
+ LLVM_DEBUG(dbgs() << "Try to use alignment " << NewArgAlign << " instead of "
+ << CurArgAlign << " for " << *Arg << '\n');
+
+ auto NewAlignAttr =
+ Attribute::get(Func->getContext(), Attribute::Alignment, NewArgAlign);
+ Arg->removeAttr(Attribute::Alignment);
+ Arg->addAttr(NewAlignAttr);
+
+ struct Load {
+ LoadInst *Inst;
+ uint64_t Offset;
+ };
+
+ struct LoadContext {
+ Value *InitialVal;
+ uint64_t Offset;
+ };
+
+ SmallVector<Load> Loads;
+ std::queue<LoadContext> Worklist;
+ Worklist.push({ArgInParamAS, 0});
+
+ while (!Worklist.empty()) {
+ LoadContext Ctx = Worklist.front();
+ Worklist.pop();
+
+ for (User *CurUser : Ctx.InitialVal->users()) {
+ if (auto *I = dyn_cast<LoadInst>(CurUser)) {
+ Loads.push_back({I, Ctx.Offset});
+ continue;
+ }
+
+ if (auto *I = dyn_cast<BitCastInst>(CurUser)) {
+ Worklist.push({I, Ctx.Offset});
+ continue;
+ }
+
+ if (auto *I = dyn_cast<GetElementPtrInst>(CurUser)) {
+ APInt OffsetAccumulated =
+ APInt::getZero(DL.getIndexSizeInBits(ADDRESS_SPACE_PARAM));
+
+ if (!I->accumulateConstantOffset(DL, OffsetAccumulated))
+ continue;
+
+ uint64_t OffsetLimit = -1;
+ uint64_t Offset = OffsetAccumulated.getLimitedValue(OffsetLimit);
+ assert(Offset != OffsetLimit && "Expect Offset less than UINT64_MAX");
+
+ Worklist.push({I, Ctx.Offset + Offset});
+ continue;
+ }
+
+ llvm_unreachable("All users must be one of: load, "
+ "bitcast, getelementptr.");
+ }
+ }
- assert(PType && "Expecting pointer type in handleByValParam");
+ for (Load &CurLoad : Loads) {
+ Align NewLoadAlign(greatestCommonDivisor(NewArgAlign, CurLoad.Offset));
+ Align CurLoadAlign(CurLoad.Inst->getAlign());
+ CurLoad.Inst->setAlignment(std::max(NewLoadAlign, CurLoadAlign));
+ }
+}
- Type *StructType = PType->getPointerElementType();
+void NVPTXLowerArgs::handleByValParam(Argument *Arg) {
+ Function *Func = Arg->getParent();
+ Instruction *FirstInst = &(Func->getEntryBlock().front());
+ Type *StructType = Arg->getParamByValType();
+ assert(StructType && "Missing byval type");
auto IsALoadChain = [&](Value *Start) {
SmallVector<Value *, 16> ValuesToCheck = {Start};
@@ -269,10 +349,19 @@ void NVPTXLowerArgs::handleByValParam(Argument *Arg) {
Value *ArgInParamAS = new AddrSpaceCastInst(
Arg, PointerType::get(StructType, ADDRESS_SPACE_PARAM), Arg->getName(),
FirstInst);
- llvm::for_each(UsersToUpdate, [ArgInParamAS](Value *V) {
+ for (Value *V : UsersToUpdate)
convertToParamAS(V, ArgInParamAS);
- });
LLVM_DEBUG(dbgs() << "No need to copy " << *Arg << "\n");
+
+ // Further optimizations require target lowering info.
+ if (!TM)
+ return;
+
+ const auto *TLI =
+ cast<NVPTXTargetLowering>(TM->getSubtargetImpl()->getTargetLowering());
+
+ adjustByValArgAlignment(Arg, ArgInParamAS, TLI);
+
return;
}
@@ -284,7 +373,7 @@ void NVPTXLowerArgs::handleByValParam(Argument *Arg) {
// later load/stores assume that alignment, and we are going to replace
// the use of the byval parameter with this alloca instruction.
AllocA->setAlignment(Func->getParamAlign(Arg->getArgNo())
- .getValueOr(DL.getPrefTypeAlign(StructType)));
+ .value_or(DL.getPrefTypeAlign(StructType)));
Arg->replaceAllUsesWith(AllocA);
Value *ArgInParam = new AddrSpaceCastInst(
diff --git a/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h b/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
index cf63fc33e621..0a7b9cf468a6 100644
--- a/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
+++ b/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
@@ -26,6 +26,13 @@ private:
public:
NVPTXMachineFunctionInfo(MachineFunction &MF) {}
+ MachineFunctionInfo *
+ clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
+ const override {
+ return DestMF.cloneInfo<NVPTXMachineFunctionInfo>(*this);
+ }
+
/// Returns the index for the symbol \p Symbol. If the symbol was previously,
/// added, the same index is returned. Otherwise, the symbol is added and the
/// new index is returned.
diff --git a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
index f4934f0bc20b..4bd820e98f05 100644
--- a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
@@ -64,8 +64,12 @@ bool NVPTXReplaceImageHandles::runOnMachineFunction(MachineFunction &MF) {
// This is needed in debug mode when code cleanup passes are not executed,
// but we need the handle access to be eliminated because they are not
// valid instructions when image handles are disabled.
- for (MachineInstr *MI : InstrsToRemove)
- MI->eraseFromParent();
+ for (MachineInstr *MI : InstrsToRemove) {
+ unsigned DefReg = MI->getOperand(0).getReg();
+ // Only these that are not used can be removed.
+ if (MF.getRegInfo().use_nodbg_empty(DefReg))
+ MI->eraseFromParent();
+ }
return Changed;
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
index 5a6440c91fca..a03492a92bac 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -23,7 +23,7 @@ using namespace llvm;
#include "NVPTXGenSubtargetInfo.inc"
static cl::opt<bool>
- NoF16Math("nvptx-no-f16-math", cl::ZeroOrMore, cl::Hidden,
+ NoF16Math("nvptx-no-f16-math", cl::Hidden,
cl::desc("NVPTX Specific: Disable generation of f16 math ops."),
cl::init(false));
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 0a1c61a35795..597b8af176a2 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -237,7 +237,7 @@ void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
}
TargetTransformInfo
-NVPTXTargetMachine::getTargetTransformInfo(const Function &F) {
+NVPTXTargetMachine::getTargetTransformInfo(const Function &F) const {
return TargetTransformInfo(NVPTXTTIImpl(this, F));
}
@@ -330,6 +330,8 @@ void NVPTXPassConfig::addIRPasses() {
addStraightLineScalarOptimizationPasses();
}
+ addPass(createAtomicExpandPass());
+
// === LSR and other generic IR passes ===
TargetPassConfig::addIRPasses();
// EarlyCSE is not always strong enough to clean up what LSR produces. For
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
index 7a69197abcff..491e721479d3 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -65,7 +65,7 @@ public:
void adjustPassManager(PassManagerBuilder &) override;
void registerPassBuilderCallbacks(PassBuilder &PB) override;
- TargetTransformInfo getTargetTransformInfo(const Function &F) override;
+ TargetTransformInfo getTargetTransformInfo(const Function &F) const override;
bool isMachineVerifierClean() const override {
return false;
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h
index 4645671a0cd8..37b0a44243cb 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h
@@ -17,7 +17,7 @@ namespace llvm {
class NVPTXTargetObjectFile : public TargetLoweringObjectFile {
public:
- NVPTXTargetObjectFile() {}
+ NVPTXTargetObjectFile() = default;
~NVPTXTargetObjectFile() override;
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index 466aa7130216..fc4bc6b3cbf7 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -96,7 +96,7 @@ bool NVPTXTTIImpl::isSourceOfDivergence(const Value *V) {
// Instructions that read threadIdx are obviously divergent.
if (readsThreadIndex(II) || readsLaneId(II))
return true;
- // Handle the NVPTX atomic instrinsics that cannot be represented as an
+ // Handle the NVPTX atomic intrinsics that cannot be represented as an
// atomic IR instruction.
if (isNVVMAtomic(II))
return true;
@@ -145,11 +145,15 @@ static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) {
Optional<SpecialCase> Special;
FtzRequirementTy FtzRequirement = FTZ_Any;
+ // Denormal handling is guarded by different attributes depending on the
+ // type (denormal-fp-math vs denormal-fp-math-f32), take note of halfs.
+ bool IsHalfTy = false;
SimplifyAction() = default;
- SimplifyAction(Intrinsic::ID IID, FtzRequirementTy FtzReq)
- : IID(IID), FtzRequirement(FtzReq) {}
+ SimplifyAction(Intrinsic::ID IID, FtzRequirementTy FtzReq,
+ bool IsHalfTy = false)
+ : IID(IID), FtzRequirement(FtzReq), IsHalfTy(IsHalfTy) {}
// Cast operations don't have anything to do with FTZ, so we skip that
// argument.
@@ -191,18 +195,66 @@ static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) {
return {Intrinsic::fma, FTZ_MustBeOff};
case Intrinsic::nvvm_fma_rn_ftz_f:
return {Intrinsic::fma, FTZ_MustBeOn};
+ case Intrinsic::nvvm_fma_rn_f16:
+ return {Intrinsic::fma, FTZ_MustBeOff, true};
+ case Intrinsic::nvvm_fma_rn_ftz_f16:
+ return {Intrinsic::fma, FTZ_MustBeOn, true};
+ case Intrinsic::nvvm_fma_rn_f16x2:
+ return {Intrinsic::fma, FTZ_MustBeOff, true};
+ case Intrinsic::nvvm_fma_rn_ftz_f16x2:
+ return {Intrinsic::fma, FTZ_MustBeOn, true};
case Intrinsic::nvvm_fmax_d:
return {Intrinsic::maxnum, FTZ_Any};
case Intrinsic::nvvm_fmax_f:
return {Intrinsic::maxnum, FTZ_MustBeOff};
case Intrinsic::nvvm_fmax_ftz_f:
return {Intrinsic::maxnum, FTZ_MustBeOn};
+ case Intrinsic::nvvm_fmax_nan_f:
+ return {Intrinsic::maximum, FTZ_MustBeOff};
+ case Intrinsic::nvvm_fmax_ftz_nan_f:
+ return {Intrinsic::maximum, FTZ_MustBeOn};
+ case Intrinsic::nvvm_fmax_f16:
+ return {Intrinsic::maxnum, FTZ_MustBeOff, true};
+ case Intrinsic::nvvm_fmax_ftz_f16:
+ return {Intrinsic::maxnum, FTZ_MustBeOn, true};
+ case Intrinsic::nvvm_fmax_f16x2:
+ return {Intrinsic::maxnum, FTZ_MustBeOff, true};
+ case Intrinsic::nvvm_fmax_ftz_f16x2:
+ return {Intrinsic::maxnum, FTZ_MustBeOn, true};
+ case Intrinsic::nvvm_fmax_nan_f16:
+ return {Intrinsic::maximum, FTZ_MustBeOff, true};
+ case Intrinsic::nvvm_fmax_ftz_nan_f16:
+ return {Intrinsic::maximum, FTZ_MustBeOn, true};
+ case Intrinsic::nvvm_fmax_nan_f16x2:
+ return {Intrinsic::maximum, FTZ_MustBeOff, true};
+ case Intrinsic::nvvm_fmax_ftz_nan_f16x2:
+ return {Intrinsic::maximum, FTZ_MustBeOn, true};
case Intrinsic::nvvm_fmin_d:
return {Intrinsic::minnum, FTZ_Any};
case Intrinsic::nvvm_fmin_f:
return {Intrinsic::minnum, FTZ_MustBeOff};
case Intrinsic::nvvm_fmin_ftz_f:
return {Intrinsic::minnum, FTZ_MustBeOn};
+ case Intrinsic::nvvm_fmin_nan_f:
+ return {Intrinsic::minimum, FTZ_MustBeOff};
+ case Intrinsic::nvvm_fmin_ftz_nan_f:
+ return {Intrinsic::minimum, FTZ_MustBeOn};
+ case Intrinsic::nvvm_fmin_f16:
+ return {Intrinsic::minnum, FTZ_MustBeOff, true};
+ case Intrinsic::nvvm_fmin_ftz_f16:
+ return {Intrinsic::minnum, FTZ_MustBeOn, true};
+ case Intrinsic::nvvm_fmin_f16x2:
+ return {Intrinsic::minnum, FTZ_MustBeOff, true};
+ case Intrinsic::nvvm_fmin_ftz_f16x2:
+ return {Intrinsic::minnum, FTZ_MustBeOn, true};
+ case Intrinsic::nvvm_fmin_nan_f16:
+ return {Intrinsic::minimum, FTZ_MustBeOff, true};
+ case Intrinsic::nvvm_fmin_ftz_nan_f16:
+ return {Intrinsic::minimum, FTZ_MustBeOn, true};
+ case Intrinsic::nvvm_fmin_nan_f16x2:
+ return {Intrinsic::minimum, FTZ_MustBeOff, true};
+ case Intrinsic::nvvm_fmin_ftz_nan_f16x2:
+ return {Intrinsic::minimum, FTZ_MustBeOn, true};
case Intrinsic::nvvm_round_d:
return {Intrinsic::round, FTZ_Any};
case Intrinsic::nvvm_round_f:
@@ -316,9 +368,10 @@ static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) {
// intrinsic, we don't have to look up any module metadata, as
// FtzRequirementTy will be FTZ_Any.)
if (Action.FtzRequirement != FTZ_Any) {
- StringRef Attr = II->getFunction()
- ->getFnAttribute("denormal-fp-math-f32")
- .getValueAsString();
+ const char *AttrName =
+ Action.IsHalfTy ? "denormal-fp-math" : "denormal-fp-math-f32";
+ StringRef Attr =
+ II->getFunction()->getFnAttribute(AttrName).getValueAsString();
DenormalMode Mode = parseDenormalFPAttribute(Attr);
bool FtzEnabled = Mode.Output != DenormalMode::IEEE;
diff --git a/llvm/lib/Target/NVPTX/NVVMReflect.cpp b/llvm/lib/Target/NVPTX/NVVMReflect.cpp
index 339f51d21087..3f3c4967609a 100644
--- a/llvm/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/llvm/lib/Target/NVPTX/NVVMReflect.cpp
@@ -133,15 +133,13 @@ static bool runNVVMReflect(Function &F, unsigned SmVersion) {
// FIXME: Add assertions about ConvCall.
Str = ConvCall->getArgOperand(0);
}
- assert(isa<ConstantExpr>(Str) &&
- "Format of __nvvm__reflect function not recognized");
- const ConstantExpr *GEP = cast<ConstantExpr>(Str);
-
- const Value *Sym = GEP->getOperand(0);
- assert(isa<Constant>(Sym) &&
+ // Pre opaque pointers we have a constant expression wrapping the constant
+ // string.
+ Str = Str->stripPointerCasts();
+ assert(isa<Constant>(Str) &&
"Format of __nvvm_reflect function not recognized");
- const Value *Operand = cast<Constant>(Sym)->getOperand(0);
+ const Value *Operand = cast<Constant>(Str)->getOperand(0);
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Operand)) {
// For CUDA-7.0 style __nvvm_reflect calls, we need to find the operand's
// initializer.
diff --git a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index 715cff72dcab..7113fe33b5d7 100644
--- a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -341,31 +341,11 @@ public:
bool isU10Imm() const { return Kind == Immediate && isUInt<10>(getImm()); }
bool isU12Imm() const { return Kind == Immediate && isUInt<12>(getImm()); }
- bool isU16Imm() const {
- switch (Kind) {
- case Expression:
- return true;
- case Immediate:
- case ContextImmediate:
- return isUInt<16>(getImmU16Context());
- default:
- return false;
- }
- }
- bool isS16Imm() const {
- switch (Kind) {
- case Expression:
- return true;
- case Immediate:
- case ContextImmediate:
- return isInt<16>(getImmS16Context());
- default:
- return false;
- }
- }
- bool isS16ImmX4() const { return Kind == Expression ||
- (Kind == Immediate && isInt<16>(getImm()) &&
- (getImm() & 3) == 0); }
+ bool isU16Imm() const { return isExtImm<16>(/*Signed*/ false, 1); }
+ bool isS16Imm() const { return isExtImm<16>(/*Signed*/ true, 1); }
+ bool isS16ImmX4() const { return isExtImm<16>(/*Signed*/ true, 4); }
+ bool isS16ImmX16() const { return isExtImm<16>(/*Signed*/ true, 16); }
+ bool isS17Imm() const { return isExtImm<17>(/*Signed*/ true, 1); }
bool isHashImmX8() const {
// The Hash Imm form is used for instructions that check or store a hash.
@@ -375,9 +355,6 @@ public:
(getImm() & 7) == 0);
}
- bool isS16ImmX16() const { return Kind == Expression ||
- (Kind == Immediate && isInt<16>(getImm()) &&
- (getImm() & 15) == 0); }
bool isS34ImmX16() const {
return Kind == Expression ||
(Kind == Immediate && isInt<34>(getImm()) && (getImm() & 15) == 0);
@@ -388,17 +365,6 @@ public:
return Kind == Expression || (Kind == Immediate && isInt<34>(getImm()));
}
- bool isS17Imm() const {
- switch (Kind) {
- case Expression:
- return true;
- case Immediate:
- case ContextImmediate:
- return isInt<17>(getImmS16Context());
- default:
- return false;
- }
- }
bool isTLSReg() const { return Kind == TLSRegister; }
bool isDirectBr() const {
if (Kind == Expression)
@@ -712,6 +678,25 @@ public:
return CreateExpr(Val, S, E, IsPPC64);
}
+
+private:
+ template <unsigned Width>
+ bool isExtImm(bool Signed, unsigned Multiple) const {
+ switch (Kind) {
+ default:
+ return false;
+ case Expression:
+ return true;
+ case Immediate:
+ case ContextImmediate:
+ if (Signed)
+ return isInt<Width>(getImmS16Context()) &&
+ (getImmS16Context() & (Multiple - 1)) == 0;
+ else
+ return isUInt<Width>(getImmU16Context()) &&
+ (getImmU16Context() & (Multiple - 1)) == 0;
+ }
+ }
};
} // end anonymous namespace.
diff --git a/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index 5a12c3f22dee..d3d720054f16 100644
--- a/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -8,8 +8,8 @@
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "TargetInfo/PowerPCTargetInfo.h"
+#include "llvm/MC/MCDecoderOps.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
-#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/TargetRegistry.h"
@@ -64,14 +64,14 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCDisassembler() {
static DecodeStatus decodeCondBrTarget(MCInst &Inst, unsigned Imm,
uint64_t /*Address*/,
- const void * /*Decoder*/) {
+ const MCDisassembler * /*Decoder*/) {
Inst.addOperand(MCOperand::createImm(SignExtend32<14>(Imm)));
return MCDisassembler::Success;
}
static DecodeStatus decodeDirectBrTarget(MCInst &Inst, unsigned Imm,
uint64_t /*Address*/,
- const void * /*Decoder*/) {
+ const MCDisassembler * /*Decoder*/) {
int32_t Offset = SignExtend32<24>(Imm);
Inst.addOperand(MCOperand::createImm(Offset));
return MCDisassembler::Success;
@@ -90,85 +90,85 @@ static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo,
static DecodeStatus DecodeCRRCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return decodeRegisterClass(Inst, RegNo, CRRegs);
}
static DecodeStatus DecodeCRBITRCRegisterClass(MCInst &Inst, uint64_t RegNo,
- uint64_t Address,
- const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
return decodeRegisterClass(Inst, RegNo, CRBITRegs);
}
static DecodeStatus DecodeF4RCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return decodeRegisterClass(Inst, RegNo, FRegs);
}
static DecodeStatus DecodeF8RCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return decodeRegisterClass(Inst, RegNo, FRegs);
}
static DecodeStatus DecodeVFRCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return decodeRegisterClass(Inst, RegNo, VFRegs);
}
static DecodeStatus DecodeVRRCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return decodeRegisterClass(Inst, RegNo, VRegs);
}
static DecodeStatus DecodeVSRCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return decodeRegisterClass(Inst, RegNo, VSRegs);
}
static DecodeStatus DecodeVSFRCRegisterClass(MCInst &Inst, uint64_t RegNo,
- uint64_t Address,
- const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
return decodeRegisterClass(Inst, RegNo, VSFRegs);
}
static DecodeStatus DecodeVSSRCRegisterClass(MCInst &Inst, uint64_t RegNo,
- uint64_t Address,
- const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
return decodeRegisterClass(Inst, RegNo, VSSRegs);
}
static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return decodeRegisterClass(Inst, RegNo, RRegs);
}
-static DecodeStatus DecodeGPRC_NOR0RegisterClass(MCInst &Inst, uint64_t RegNo,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus
+DecodeGPRC_NOR0RegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address,
+ const MCDisassembler *Decoder) {
return decodeRegisterClass(Inst, RegNo, RRegsNoR0);
}
static DecodeStatus DecodeG8RCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return decodeRegisterClass(Inst, RegNo, XRegs);
}
static DecodeStatus DecodeG8pRCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return decodeRegisterClass(Inst, RegNo, XRegs);
}
-static DecodeStatus DecodeG8RC_NOX0RegisterClass(MCInst &Inst, uint64_t RegNo,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus
+DecodeG8RC_NOX0RegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address,
+ const MCDisassembler *Decoder) {
return decodeRegisterClass(Inst, RegNo, XRegsNoX0);
}
@@ -176,44 +176,47 @@ static DecodeStatus DecodeG8RC_NOX0RegisterClass(MCInst &Inst, uint64_t RegNo,
#define DecodePointerLikeRegClass1 DecodeGPRC_NOR0RegisterClass
static DecodeStatus DecodeSPERCRegisterClass(MCInst &Inst, uint64_t RegNo,
- uint64_t Address,
- const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
return decodeRegisterClass(Inst, RegNo, SPERegs);
}
static DecodeStatus DecodeACCRCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return decodeRegisterClass(Inst, RegNo, ACCRegs);
}
static DecodeStatus DecodeVSRpRCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return decodeRegisterClass(Inst, RegNo, VSRpRegs);
}
#define DecodeQSRCRegisterClass DecodeQFRCRegisterClass
#define DecodeQBRCRegisterClass DecodeQFRCRegisterClass
-template<unsigned N>
+template <unsigned N>
static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm,
- int64_t Address, const void *Decoder) {
+ int64_t Address,
+ const MCDisassembler *Decoder) {
assert(isUInt<N>(Imm) && "Invalid immediate");
Inst.addOperand(MCOperand::createImm(Imm));
return MCDisassembler::Success;
}
-template<unsigned N>
+template <unsigned N>
static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm,
- int64_t Address, const void *Decoder) {
+ int64_t Address,
+ const MCDisassembler *Decoder) {
assert(isUInt<N>(Imm) && "Invalid immediate");
Inst.addOperand(MCOperand::createImm(SignExtend64<N>(Imm)));
return MCDisassembler::Success;
}
static DecodeStatus decodeImmZeroOperand(MCInst &Inst, uint64_t Imm,
- int64_t Address, const void *Decoder) {
+ int64_t Address,
+ const MCDisassembler *Decoder) {
if (Imm != 0)
return MCDisassembler::Fail;
Inst.addOperand(MCOperand::createImm(Imm));
@@ -222,7 +225,7 @@ static DecodeStatus decodeImmZeroOperand(MCInst &Inst, uint64_t Imm,
static DecodeStatus decodeVSRpEvenOperands(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo & 1)
return MCDisassembler::Fail;
Inst.addOperand(MCOperand::createReg(VSRpRegs[RegNo >> 1]));
@@ -230,7 +233,8 @@ static DecodeStatus decodeVSRpEvenOperands(MCInst &Inst, uint64_t RegNo,
}
static DecodeStatus decodeMemRIOperands(MCInst &Inst, uint64_t Imm,
- int64_t Address, const void *Decoder) {
+ int64_t Address,
+ const MCDisassembler *Decoder) {
// Decode the memri field (imm, reg), which has the low 16-bits as the
// displacement and the next 5 bits as the register #.
@@ -265,7 +269,8 @@ static DecodeStatus decodeMemRIOperands(MCInst &Inst, uint64_t Imm,
}
static DecodeStatus decodeMemRIXOperands(MCInst &Inst, uint64_t Imm,
- int64_t Address, const void *Decoder) {
+ int64_t Address,
+ const MCDisassembler *Decoder) {
// Decode the memrix field (imm, reg), which has the low 14-bits as the
// displacement and the next 5 bits as the register #.
@@ -287,7 +292,7 @@ static DecodeStatus decodeMemRIXOperands(MCInst &Inst, uint64_t Imm,
static DecodeStatus decodeMemRIHashOperands(MCInst &Inst, uint64_t Imm,
int64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
// Decode the memrix field for a hash store or hash check operation.
// The field is composed of a register and an immediate value that is 6 bits
// and covers the range -8 to -512. The immediate is always negative and 2s
@@ -303,7 +308,8 @@ static DecodeStatus decodeMemRIHashOperands(MCInst &Inst, uint64_t Imm,
}
static DecodeStatus decodeMemRIX16Operands(MCInst &Inst, uint64_t Imm,
- int64_t Address, const void *Decoder) {
+ int64_t Address,
+ const MCDisassembler *Decoder) {
// Decode the memrix16 field (imm, reg), which has the low 12-bits as the
// displacement with 16-byte aligned, and the next 5 bits as the register #.
@@ -319,7 +325,7 @@ static DecodeStatus decodeMemRIX16Operands(MCInst &Inst, uint64_t Imm,
static DecodeStatus decodeMemRI34PCRelOperands(MCInst &Inst, uint64_t Imm,
int64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
// Decode the memri34_pcrel field (imm, reg), which has the low 34-bits as the
// displacement, and the next 5 bits as an immediate 0.
uint64_t Base = Imm >> 34;
@@ -333,7 +339,7 @@ static DecodeStatus decodeMemRI34PCRelOperands(MCInst &Inst, uint64_t Imm,
static DecodeStatus decodeMemRI34Operands(MCInst &Inst, uint64_t Imm,
int64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
// Decode the memri34 field (imm, reg), which has the low 34-bits as the
// displacement, and the next 5 bits as the register #.
uint64_t Base = Imm >> 34;
@@ -347,7 +353,8 @@ static DecodeStatus decodeMemRI34Operands(MCInst &Inst, uint64_t Imm,
}
static DecodeStatus decodeSPE8Operands(MCInst &Inst, uint64_t Imm,
- int64_t Address, const void *Decoder) {
+ int64_t Address,
+ const MCDisassembler *Decoder) {
// Decode the spe8disp field (imm, reg), which has the low 5-bits as the
// displacement with 8-byte aligned, and the next 5 bits as the register #.
@@ -362,7 +369,8 @@ static DecodeStatus decodeSPE8Operands(MCInst &Inst, uint64_t Imm,
}
static DecodeStatus decodeSPE4Operands(MCInst &Inst, uint64_t Imm,
- int64_t Address, const void *Decoder) {
+ int64_t Address,
+ const MCDisassembler *Decoder) {
// Decode the spe4disp field (imm, reg), which has the low 5-bits as the
// displacement with 4-byte aligned, and the next 5 bits as the register #.
@@ -377,7 +385,8 @@ static DecodeStatus decodeSPE4Operands(MCInst &Inst, uint64_t Imm,
}
static DecodeStatus decodeSPE2Operands(MCInst &Inst, uint64_t Imm,
- int64_t Address, const void *Decoder) {
+ int64_t Address,
+ const MCDisassembler *Decoder) {
// Decode the spe2disp field (imm, reg), which has the low 5-bits as the
// displacement with 2-byte aligned, and the next 5 bits as the register #.
@@ -392,7 +401,8 @@ static DecodeStatus decodeSPE2Operands(MCInst &Inst, uint64_t Imm,
}
static DecodeStatus decodeCRBitMOperand(MCInst &Inst, uint64_t Imm,
- int64_t Address, const void *Decoder) {
+ int64_t Address,
+ const MCDisassembler *Decoder) {
// The cr bit encoding is 0x80 >> cr_reg_num.
unsigned Zeros = countTrailingZeros(Imm);
diff --git a/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp b/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp
index 6b16af293244..b71d59ed79ed 100644
--- a/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp
+++ b/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/TargetCallingConv.h"
#include "llvm/Support/Debug.h"
diff --git a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp
index 6af79324919c..58165fcaac03 100644
--- a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp
+++ b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp
@@ -23,5 +23,4 @@
using namespace llvm;
-PPCRegisterBankInfo::PPCRegisterBankInfo(const TargetRegisterInfo &TRI)
- : PPCGenRegisterBankInfo() {}
+PPCRegisterBankInfo::PPCRegisterBankInfo(const TargetRegisterInfo &TRI) {}
diff --git a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h
index 358d5ed3cf14..31a4c528751f 100644
--- a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h
+++ b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h
@@ -14,8 +14,8 @@
#ifndef LLVM_LIB_TARGET_PPC_GISEL_PPCREGISTERBANKINFO_H
#define LLVM_LIB_TARGET_PPC_GISEL_PPCREGISTERBANKINFO_H
-#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/RegisterBank.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#define GET_REGBANK_DECLARATIONS
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 9df94edc8cdf..2e678ffd58c2 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -44,6 +44,7 @@ static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) {
case PPC::fixup_ppc_half16:
return Value & 0xffff;
case PPC::fixup_ppc_half16ds:
+ case PPC::fixup_ppc_half16dq:
return Value & 0xfffc;
case PPC::fixup_ppc_pcrel34:
case PPC::fixup_ppc_imm34:
@@ -60,6 +61,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
case FK_Data_2:
case PPC::fixup_ppc_half16:
case PPC::fixup_ppc_half16ds:
+ case PPC::fixup_ppc_half16dq:
return 2;
case FK_Data_4:
case PPC::fixup_ppc_brcond14:
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index 94ef7b45434f..1e58039582c2 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -125,6 +125,7 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
}
break;
case PPC::fixup_ppc_half16ds:
+ case PPC::fixup_ppc_half16dq:
Target.print(errs());
errs() << '\n';
report_fatal_error("Invalid PC-relative half16ds relocation");
@@ -349,6 +350,7 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
}
break;
case PPC::fixup_ppc_half16ds:
+ case PPC::fixup_ppc_half16dq:
switch (Modifier) {
default: llvm_unreachable("Unsupported Modifier");
case MCSymbolRefExpr::VK_None:
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp
index b92b0fc342ec..b020635f4209 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp
@@ -77,7 +77,7 @@ void PPCELFStreamer::emitPrefixedInstruction(const MCInst &Inst,
// label to the top of the fragment containing the aligned instruction that
// was just added.
if (InstLine == LabelLine) {
- AssignFragment(LastLabel, InstructionFragment);
+ assignFragment(LastLabel, InstructionFragment);
LastLabel->setOffset(0);
}
}
@@ -98,7 +98,7 @@ void PPCELFStreamer::emitInstruction(const MCInst &Inst,
// For example, the load that will get the relocation as follows:
// .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8)
// lwa 3, 4(3)
- if (IsPartOfGOTToPCRelPair.hasValue() && !IsPartOfGOTToPCRelPair.getValue())
+ if (IsPartOfGOTToPCRelPair && !*IsPartOfGOTToPCRelPair)
emitGOTToPCRelReloc(Inst);
// Special handling is only for prefixed instructions.
@@ -113,7 +113,7 @@ void PPCELFStreamer::emitInstruction(const MCInst &Inst,
// follows:
// pld 3, vec@got@pcrel(0), 1
// .Lpcrel1:
- if (IsPartOfGOTToPCRelPair.hasValue() && IsPartOfGOTToPCRelPair.getValue())
+ if (IsPartOfGOTToPCRelPair && *IsPartOfGOTToPCRelPair)
emitGOTToPCRelLabel(Inst);
}
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
index 73292f7b7938..df0c666f5b11 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
@@ -51,6 +51,10 @@ enum Fixups {
/// register number.
fixup_ppc_nofixup,
+ /// A 16-bit fixup corresponding to lo16(_foo) with implied 3 zero bits for
+ /// instrs like 'lxv'. Produces the same relocation as fixup_ppc_half16ds.
+ fixup_ppc_half16dq,
+
// Marker
LastTargetFixupKind,
NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 4dfa7d5e600c..46bbc44e1681 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -34,7 +34,6 @@ using namespace llvm;
STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
MCCodeEmitter *llvm::createPPCMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx) {
return new PPCMCCodeEmitter(MCII, Ctx);
}
@@ -47,10 +46,12 @@ getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
if (MO.isReg() || MO.isImm())
return getMachineOpValue(MI, MO, Fixups, STI);
+
+ const PPCInstrInfo *InstrInfo = static_cast<const PPCInstrInfo *>(&MCII);
+ unsigned Opcode = MI.getOpcode();
// Add a fixup for the branch target.
Fixups.push_back(MCFixup::create(0, MO.getExpr(),
- ((MI.getOpcode() == PPC::BL8_NOTOC ||
- MI.getOpcode() == PPC::BL8_NOTOC_TLS)
+ (InstrInfo->isNoTOCCallInstr(Opcode)
? (MCFixupKind)PPC::fixup_ppc_br24_notoc
: (MCFixupKind)PPC::fixup_ppc_br24)));
return 0;
@@ -198,8 +199,8 @@ unsigned PPCMCCodeEmitter::getMemRIX16Encoding(const MCInst &MI, unsigned OpNo,
}
// Otherwise add a fixup for the displacement field.
- Fixups.push_back(MCFixup::create(IsLittleEndian? 0 : 2, MO.getExpr(),
- (MCFixupKind)PPC::fixup_ppc_half16ds));
+ Fixups.push_back(MCFixup::create(IsLittleEndian ? 0 : 2, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_half16dq));
return RegBits;
}
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
index abff44449131..6cd04ee018fd 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
@@ -110,9 +110,18 @@ PPCMCExpr::evaluateAsRelocatableImpl(MCValue &Res,
if (Value.isAbsolute()) {
int64_t Result = evaluateAsInt64(Value.getConstant());
- if ((Fixup == nullptr || (unsigned)Fixup->getKind() != PPC::fixup_ppc_half16) &&
- (Result >= 0x8000))
+ bool IsHalf16 = Fixup && Fixup->getTargetKind() == PPC::fixup_ppc_half16;
+ bool IsHalf16DS =
+ Fixup && Fixup->getTargetKind() == PPC::fixup_ppc_half16ds;
+ bool IsHalf16DQ =
+ Fixup && Fixup->getTargetKind() == PPC::fixup_ppc_half16dq;
+ bool IsHalf = IsHalf16 || IsHalf16DS || IsHalf16DQ;
+
+ if (!IsHalf && Result >= 0x8000)
return false;
+ if ((IsHalf16DS && (Result & 0x3)) || (IsHalf16DQ && (Result & 0xf)))
+ return false;
+
Res = MCValue::get(Result);
} else {
if (!Layout)
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 03b316341717..acb860e16518 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -34,7 +34,6 @@ class MCTargetOptions;
class Target;
MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx);
MCAsmBackend *createPPCAsmBackend(const Target &T, const MCSubtargetInfo &STI,
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
index 64e11dbc1efc..729cb35cbebc 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
@@ -71,6 +71,19 @@ std::pair<uint8_t, uint8_t> PPCXCOFFObjectWriter::getRelocTypeAndSignSize(
return {XCOFF::RelocationType::R_TOCL, SignAndSizeForHalf16};
}
} break;
+ case PPC::fixup_ppc_half16ds:
+ case PPC::fixup_ppc_half16dq: {
+ if (IsPCRel)
+ report_fatal_error("Invalid PC-relative relocation.");
+ switch (Modifier) {
+ default:
+ llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_None:
+ return {XCOFF::RelocationType::R_TOC, 15};
+ case MCSymbolRefExpr::VK_PPC_L:
+ return {XCOFF::RelocationType::R_TOCL, 15};
+ }
+ } break;
case PPC::fixup_ppc_br24:
// Branches are 4 byte aligned, so the 24 bits we encode in
// the instruction actually represents a 26 bit offset.
@@ -78,15 +91,19 @@ std::pair<uint8_t, uint8_t> PPCXCOFFObjectWriter::getRelocTypeAndSignSize(
case PPC::fixup_ppc_br24abs:
return {XCOFF::RelocationType::R_RBA, EncodedSignednessIndicator | 25};
case FK_Data_4:
+ case FK_Data_8:
+ const uint8_t SignAndSizeForFKData =
+ EncodedSignednessIndicator |
+ ((unsigned)Fixup.getKind() == FK_Data_4 ? 31 : 63);
switch (Modifier) {
default:
report_fatal_error("Unsupported modifier");
case MCSymbolRefExpr::VK_PPC_AIX_TLSGD:
- return {XCOFF::RelocationType::R_TLS, EncodedSignednessIndicator | 31};
+ return {XCOFF::RelocationType::R_TLS, SignAndSizeForFKData};
case MCSymbolRefExpr::VK_PPC_AIX_TLSGDM:
- return {XCOFF::RelocationType::R_TLSM, EncodedSignednessIndicator | 31};
+ return {XCOFF::RelocationType::R_TLSM, SignAndSizeForFKData};
case MCSymbolRefExpr::VK_None:
- return {XCOFF::RelocationType::R_POS, EncodedSignednessIndicator | 31};
+ return {XCOFF::RelocationType::R_POS, SignAndSizeForFKData};
}
}
}
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.cpp
index 79db03b0331b..f8b1914bd520 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.cpp
@@ -21,6 +21,7 @@
#include "PPCMCCodeEmitter.h"
#include "llvm/BinaryFormat/XCOFF.h"
#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCObjectWriter.h"
diff --git a/llvm/lib/Target/PowerPC/P10InstrResources.td b/llvm/lib/Target/PowerPC/P10InstrResources.td
index edd3b42d47e1..a6ba5adda839 100644
--- a/llvm/lib/Target/PowerPC/P10InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P10InstrResources.td
@@ -956,7 +956,7 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read],
WAIT,
XSABSDP,
XSABSQP,
- XSNABSDP,
+ XSNABSDP, XSNABSDPs,
XSNABSQP,
XSNEGDP,
XSNEGQP,
@@ -1372,7 +1372,7 @@ def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DISP_ANY, P10LD_Read, P10LD_Read]
LDCIX,
LHZCIX,
LWZCIX,
- MTSPR, MTSPR8, MTSR, MTVRSAVE, MTVRSAVEv
+ MTSPR, MTSPR8, MTSR, MTUDSCR, MTVRSAVE, MTVRSAVEv
)>;
// Expand instructions
@@ -1469,7 +1469,7 @@ def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_SX_3C],
// 13 Cycles Unknown operations, 1 input operands
def : InstRW<[P10W_MFL_13C, P10W_DISP_EVEN, P10W_DISP_ANY],
(instrs
- MFSPR, MFSPR8, MFSR, MFTB8, MFVRSAVE, MFVRSAVEv
+ MFSPR, MFSPR8, MFSR, MFTB8, MFUDSCR, MFVRSAVE, MFVRSAVEv
)>;
// 10 Cycles SIMD Matrix Multiply Engine operations, 0 input operands
@@ -1625,6 +1625,7 @@ def : InstRW<[P10W_PM_4C, P10W_DISP_ANY, P10PM_Read],
(instrs
LVSL,
LVSR,
+ LXVKQ,
MFVSRLD,
MTVSRWS,
VCLZLSBB,
@@ -1979,7 +1980,6 @@ def : InstRW<[P10W_SX, P10W_DISP_ANY],
ICBTLS,
ICCCI,
LA, LA8,
- LDMX,
MFDCR,
MFPMR,
MFSRIN,
diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td
index c088d7847ce4..2bbab64ce0da 100644
--- a/llvm/lib/Target/PowerPC/P9InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P9InstrResources.td
@@ -156,6 +156,7 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C],
MCRF,
MCRXRX,
XSNABSDP,
+ XSNABSDPs,
XSXEXPDP,
XSABSDP,
XSNEGDP,
@@ -807,14 +808,6 @@ def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
(instregex "ST(B|H|W|D)CX$")
)>;
-// Cracked Load Instruction.
-// Two consecutive load operations for a total of 8 cycles.
-def : InstRW<[P9_LoadAndLoadOp_8C, IP_AGEN_1C, IP_AGEN_1C,
- DISP_1C, DISP_1C],
- (instrs
- LDMX
-)>;
-
// Cracked Load instruction.
// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
// operations cannot be done at the same time and so their latencies are added.
@@ -940,6 +933,7 @@ def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C],
(instregex "M(T|F)TB(8)?$"),
(instregex "MF(SPR|CTR|LR)(8)?$"),
(instregex "M(T|F)MSR(D)?$"),
+ (instregex "M(T|F)(U)?DSCR$"),
(instregex "MTSPR(8)?$")
)>;
diff --git a/llvm/lib/Target/PowerPC/PPC.h b/llvm/lib/Target/PowerPC/PPC.h
index 7235a878e38b..4eceb3afc70f 100644
--- a/llvm/lib/Target/PowerPC/PPC.h
+++ b/llvm/lib/Target/PowerPC/PPC.h
@@ -33,7 +33,6 @@ class MCInst;
class MCOperand;
class ModulePass;
-FunctionPass *createPPCCTRLoops();
#ifndef NDEBUG
FunctionPass *createPPCCTRLoopsVerify();
#endif
@@ -53,12 +52,12 @@ FunctionPass *createPPCCTRLoops();
FunctionPass *createPPCExpandISELPass();
FunctionPass *createPPCPreEmitPeepholePass();
FunctionPass *createPPCExpandAtomicPseudoPass();
+ FunctionPass *createPPCCTRLoopsPass();
void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
AsmPrinter &AP);
bool LowerPPCMachineOperandToMCOperand(const MachineOperand &MO,
MCOperand &OutMO, AsmPrinter &AP);
- void initializePPCCTRLoopsPass(PassRegistry&);
#ifndef NDEBUG
void initializePPCCTRLoopsVerifyPass(PassRegistry&);
#endif
@@ -77,6 +76,7 @@ FunctionPass *createPPCCTRLoops();
void initializePPCTLSDynamicCallPass(PassRegistry &);
void initializePPCMIPeepholePass(PassRegistry&);
void initializePPCExpandAtomicPseudoPass(PassRegistry &);
+ void initializePPCCTRLoopsPass(PassRegistry &);
extern char &PPCVSXFMAMutateID;
@@ -84,6 +84,10 @@ FunctionPass *createPPCCTRLoops();
void initializePPCLowerMASSVEntriesPass(PassRegistry &);
extern char &PPCLowerMASSVEntriesID;
+ ModulePass *createPPCGenScalarMASSEntriesPass();
+ void initializePPCGenScalarMASSEntriesPass(PassRegistry &);
+ extern char &PPCGenScalarMASSEntriesID;
+
InstructionSelector *
createPPCInstructionSelector(const PPCTargetMachine &, const PPCSubtarget &,
const PPCRegisterBankInfo &);
diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index bbd5f5fd1941..310bf8125f1c 100644
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -263,6 +263,10 @@ def FeatureISA3_1 : SubtargetFeature<"isa-v31-instructions", "IsISA3_1",
"true",
"Enable instructions in ISA 3.1.",
[FeatureISA3_0]>;
+def FeatureISAFuture : SubtargetFeature<"isa-future-instructions",
+ "IsISAFuture", "true",
+ "Enable instructions for Future ISA.",
+ [FeatureISA3_1]>;
def FeatureP9Altivec : SubtargetFeature<"power9-altivec", "HasP9Altivec", "true",
"Enable POWER9 Altivec instructions",
[FeatureISA3_0, FeatureP8Altivec]>;
@@ -376,7 +380,8 @@ def ProcessorFeatures {
FeaturePartwordAtomic,
FeatureQuadwordAtomic,
FeaturePredictableSelectIsExpensive,
- FeatureISA2_07
+ FeatureISA2_07,
+ FeatureCRBits
];
list<SubtargetFeature> P8SpecificFeatures = [FeatureAddiLoadFusion,
@@ -429,7 +434,7 @@ def ProcessorFeatures {
// Future
// For future CPU we assume that all of the existing features from Power10
// still exist with the exception of those we know are Power10 specific.
- list<SubtargetFeature> FutureAdditionalFeatures = [];
+ list<SubtargetFeature> FutureAdditionalFeatures = [FeatureISAFuture];
list<SubtargetFeature> FutureSpecificFeatures = [];
list<SubtargetFeature> FutureInheritableFeatures =
!listconcat(P10InheritableFeatures, FutureAdditionalFeatures);
@@ -591,7 +596,8 @@ def : ProcessorModel<"a2", PPCA2Model,
FeatureSTFIWX, FeatureLFIWAX,
FeatureFPRND, FeatureFPCVT, FeatureISEL,
FeatureSlowPOPCNTD, FeatureCMPB, FeatureLDBRX,
- Feature64Bit /*, Feature64BitRegs */, FeatureMFTB]>;
+ Feature64Bit /*, Feature64BitRegs */, FeatureMFTB,
+ FeatureISA2_06]>;
def : ProcessorModel<"pwr3", G5Model,
[DirectivePwr3, FeatureAltivec,
FeatureFRES, FeatureFRSQRTE, FeatureMFOCRF,
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 780981806996..22f35c8fa8d3 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -34,6 +34,7 @@
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
@@ -254,6 +255,8 @@ public:
void emitFunctionBodyEnd() override;
+ void emitPGORefs();
+
void emitEndOfAsmFile(Module &) override;
void emitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const override;
@@ -879,7 +882,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
// Print MO for better readability
if (isVerbose())
- OutStreamer->GetCommentOS() << MO << '\n';
+ OutStreamer->getCommentOS() << MO << '\n';
EmitToStreamer(*OutStreamer, TmpInst);
return;
}
@@ -950,7 +953,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
// Print MO for better readability
if (isVerbose() && IsAIX)
- OutStreamer->GetCommentOS() << MO << '\n';
+ OutStreamer->getCommentOS() << MO << '\n';
EmitToStreamer(*OutStreamer, TmpInst);
return;
}
@@ -1582,7 +1585,7 @@ void PPCLinuxAsmPrinter::emitStartOfAsmFile(Module &M) {
if (M.getPICLevel() == PICLevel::SmallPIC)
return AsmPrinter::emitStartOfAsmFile(M);
- OutStreamer->SwitchSection(OutContext.getELFSection(
+ OutStreamer->switchSection(OutContext.getELFSection(
".got2", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC));
MCSymbol *TOCSym = OutContext.getOrCreateSymbol(Twine(".LTOC"));
@@ -1599,7 +1602,7 @@ void PPCLinuxAsmPrinter::emitStartOfAsmFile(Module &M) {
OutStreamer->emitAssignment(TOCSym, tocExpr);
- OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
+ OutStreamer->switchSection(getObjFileLowering().getTextSection());
}
void PPCLinuxAsmPrinter::emitFunctionEntryLabel() {
@@ -1657,7 +1660,7 @@ void PPCLinuxAsmPrinter::emitFunctionEntryLabel() {
MCSectionSubPair Current = OutStreamer->getCurrentSection();
MCSectionELF *Section = OutStreamer->getContext().getELFSection(
".opd", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
- OutStreamer->SwitchSection(Section);
+ OutStreamer->switchSection(Section);
OutStreamer->emitLabel(CurrentFnSym);
OutStreamer->emitValueToAlignment(8);
MCSymbol *Symbol1 = CurrentFnSymForSize;
@@ -1672,7 +1675,7 @@ void PPCLinuxAsmPrinter::emitFunctionEntryLabel() {
8/*size*/);
// Emit a null environment pointer.
OutStreamer->emitIntValue(0, 8 /* size */);
- OutStreamer->SwitchSection(Current.first, Current.second);
+ OutStreamer->switchSection(Current.first, Current.second);
}
void PPCLinuxAsmPrinter::emitEndOfAsmFile(Module &M) {
@@ -1689,7 +1692,7 @@ void PPCLinuxAsmPrinter::emitEndOfAsmFile(Module &M) {
const char *Name = isPPC64 ? ".toc" : ".got2";
MCSectionELF *Section = OutContext.getELFSection(
Name, ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
- OutStreamer->SwitchSection(Section);
+ OutStreamer->switchSection(Section);
if (!isPPC64)
OutStreamer->emitValueToAlignment(4);
@@ -1895,10 +1898,15 @@ void PPCAIXAsmPrinter::emitLinkage(const GlobalValue *GV,
MCSymbolAttr VisibilityAttr = MCSA_Invalid;
if (!TM.getIgnoreXCOFFVisibility()) {
+ if (GV->hasDLLExportStorageClass() && !GV->hasDefaultVisibility())
+ report_fatal_error(
+ "Cannot not be both dllexport and non-default visibility");
switch (GV->getVisibility()) {
- // TODO: "exported" and "internal" Visibility needs to go here.
+ // TODO: "internal" Visibility needs to go here.
case GlobalValue::DefaultVisibility:
+ if (GV->hasDLLExportStorageClass())
+ VisibilityAttr = MAI->getExportedVisibilityAttr();
break;
case GlobalValue::HiddenVisibility:
VisibilityAttr = MAI->getHiddenVisibilityAttr();
@@ -1956,7 +1964,7 @@ void PPCAIXAsmPrinter::emitFunctionBodyEnd() {
if (!TargetLoweringObjectFileXCOFF::ShouldEmitEHBlock(MF) &&
(getNumberOfVRSaved() > 0)) {
// Emit dummy EH Info Table.
- OutStreamer->SwitchSection(getObjFileLowering().getCompactUnwindSection());
+ OutStreamer->switchSection(getObjFileLowering().getCompactUnwindSection());
MCSymbol *EHInfoLabel =
TargetLoweringObjectFileXCOFF::getEHInfoTableSymbol(MF);
OutStreamer->emitLabel(EHInfoLabel);
@@ -1971,7 +1979,7 @@ void PPCAIXAsmPrinter::emitFunctionBodyEnd() {
OutStreamer->emitIntValue(0, PointerSize);
OutStreamer->emitIntValue(0, PointerSize);
- OutStreamer->SwitchSection(MF->getSection());
+ OutStreamer->switchSection(MF->getSection());
}
}
@@ -2382,9 +2390,9 @@ void PPCAIXAsmPrinter::emitGlobalVariableHelper(const GlobalVariable *GV) {
// Print GV in verbose mode
if (isVerbose()) {
if (GV->hasInitializer()) {
- GV->printAsOperand(OutStreamer->GetCommentOS(),
+ GV->printAsOperand(OutStreamer->getCommentOS(),
/*PrintType=*/false, GV->getParent());
- OutStreamer->GetCommentOS() << '\n';
+ OutStreamer->getCommentOS() << '\n';
}
}
@@ -2392,14 +2400,14 @@ void PPCAIXAsmPrinter::emitGlobalVariableHelper(const GlobalVariable *GV) {
getObjFileLowering().SectionForGlobal(GV, GVKind, TM));
// Switch to the containing csect.
- OutStreamer->SwitchSection(Csect);
+ OutStreamer->switchSection(Csect);
const DataLayout &DL = GV->getParent()->getDataLayout();
// Handle common and zero-initialized local symbols.
if (GV->hasCommonLinkage() || GVKind.isBSSLocal() ||
GVKind.isThreadBSSLocal()) {
- Align Alignment = GV->getAlign().getValueOr(DL.getPreferredAlign(GV));
+ Align Alignment = GV->getAlign().value_or(DL.getPreferredAlign(GV));
uint64_t Size = DL.getTypeAllocSize(GV->getValueType());
GVSym->setStorageClass(
TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GV));
@@ -2424,9 +2432,8 @@ void PPCAIXAsmPrinter::emitGlobalVariableHelper(const GlobalVariable *GV) {
}
// Emit aliasing label for global variable.
- llvm::for_each(GOAliasMap[GV], [this](const GlobalAlias *Alias) {
+ for (const GlobalAlias *Alias : GOAliasMap[GV])
OutStreamer->emitLabel(getSymbol(Alias));
- });
emitGlobalConstant(GV->getParent()->getDataLayout(), GV->getInitializer());
}
@@ -2437,14 +2444,12 @@ void PPCAIXAsmPrinter::emitFunctionDescriptor() {
MCSectionSubPair Current = OutStreamer->getCurrentSection();
// Emit function descriptor.
- OutStreamer->SwitchSection(
+ OutStreamer->switchSection(
cast<MCSymbolXCOFF>(CurrentFnDescSym)->getRepresentedCsect());
// Emit aliasing label for function descriptor csect.
- llvm::for_each(GOAliasMap[&MF->getFunction()],
- [this](const GlobalAlias *Alias) {
- OutStreamer->emitLabel(getSymbol(Alias));
- });
+ for (const GlobalAlias *Alias : GOAliasMap[&MF->getFunction()])
+ OutStreamer->emitLabel(getSymbol(Alias));
// Emit function entry point address.
OutStreamer->emitValue(MCSymbolRefExpr::create(CurrentFnSym, OutContext),
@@ -2458,7 +2463,7 @@ void PPCAIXAsmPrinter::emitFunctionDescriptor() {
// Emit a null environment pointer.
OutStreamer->emitIntValue(0, PointerSize);
- OutStreamer->SwitchSection(Current.first, Current.second);
+ OutStreamer->switchSection(Current.first, Current.second);
}
void PPCAIXAsmPrinter::emitFunctionEntryLabel() {
@@ -2468,11 +2473,34 @@ void PPCAIXAsmPrinter::emitFunctionEntryLabel() {
PPCAsmPrinter::emitFunctionEntryLabel();
// Emit aliasing label for function entry point label.
- llvm::for_each(
- GOAliasMap[&MF->getFunction()], [this](const GlobalAlias *Alias) {
- OutStreamer->emitLabel(
- getObjFileLowering().getFunctionEntryPointSymbol(Alias, TM));
- });
+ for (const GlobalAlias *Alias : GOAliasMap[&MF->getFunction()])
+ OutStreamer->emitLabel(
+ getObjFileLowering().getFunctionEntryPointSymbol(Alias, TM));
+}
+
+void PPCAIXAsmPrinter::emitPGORefs() {
+ if (OutContext.hasXCOFFSection(
+ "__llvm_prf_cnts",
+ XCOFF::CsectProperties(XCOFF::XMC_RW, XCOFF::XTY_SD))) {
+ MCSection *CntsSection = OutContext.getXCOFFSection(
+ "__llvm_prf_cnts", SectionKind::getData(),
+ XCOFF::CsectProperties(XCOFF::XMC_RW, XCOFF::XTY_SD),
+ /*MultiSymbolsAllowed*/ true);
+
+ OutStreamer->switchSection(CntsSection);
+ if (OutContext.hasXCOFFSection(
+ "__llvm_prf_data",
+ XCOFF::CsectProperties(XCOFF::XMC_RW, XCOFF::XTY_SD)))
+ OutStreamer->emitXCOFFRefDirective("__llvm_prf_data[RW]");
+ if (OutContext.hasXCOFFSection(
+ "__llvm_prf_names",
+ XCOFF::CsectProperties(XCOFF::XMC_RO, XCOFF::XTY_SD)))
+ OutStreamer->emitXCOFFRefDirective("__llvm_prf_names[RO]");
+ if (OutContext.hasXCOFFSection(
+ "__llvm_prf_vnds",
+ XCOFF::CsectProperties(XCOFF::XMC_RW, XCOFF::XTY_SD)))
+ OutStreamer->emitXCOFFRefDirective("__llvm_prf_vnds[RW]");
+ }
}
void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) {
@@ -2481,8 +2509,10 @@ void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) {
if (M.empty() && TOCDataGlobalVars.empty())
return;
+ emitPGORefs();
+
// Switch to section to emit TOC base.
- OutStreamer->SwitchSection(getObjFileLowering().getTOCBaseSection());
+ OutStreamer->switchSection(getObjFileLowering().getTOCBaseSection());
PPCTargetStreamer *TS =
static_cast<PPCTargetStreamer *>(OutStreamer->getTargetStreamer());
@@ -2504,7 +2534,7 @@ void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) {
TCEntry = cast<MCSectionXCOFF>(
getObjFileLowering().getSectionForTOCEntry(I.first.first, TM));
}
- OutStreamer->SwitchSection(TCEntry);
+ OutStreamer->switchSection(TCEntry);
OutStreamer->emitLabel(I.second);
if (TS != nullptr)
diff --git a/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def b/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def
index 38ed5f2e78e3..f1eecfea5a5e 100644
--- a/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def
+++ b/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def
@@ -434,6 +434,7 @@ FUSION_FEATURE(GeneralBack2Back, hasBack2BackFusion, -1,
XSMINDP,
XSMINJDP,
XSNABSDP,
+ XSNABSDPs,
XSNABSQP,
XSNEGDP,
XSNEGQP,
@@ -978,6 +979,7 @@ FUSION_FEATURE(GeneralBack2Back, hasBack2BackFusion, -1,
XSMINDP,
XSMINJDP,
XSNABSDP,
+ XSNABSDPs,
XSNABSQP,
XSNEGDP,
XSNEGQP,
diff --git a/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp b/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
index b1f5bdd885cd..48167c3dc9ca 100644
--- a/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -1,4 +1,4 @@
-//===-- PPCCTRLoops.cpp - Verify CTR loops -----------------===//
+//===-- PPCCTRLoops.cpp - Generate CTR loops ------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,31 +6,38 @@
//
//===----------------------------------------------------------------------===//
//
-// This pass verifies that all bdnz/bdz instructions are dominated by a loop
-// mtctr before any other instructions that might clobber the ctr register.
+// This pass generates machine instructions for the CTR loops related pseudos:
+// 1: MTCTRPseudo/DecreaseCTRPseudo
+// 2: MTCTR8Pseudo/DecreaseCTR8Pseudo
+//
+// If a CTR loop can be generated:
+// 1: MTCTRPseudo/MTCTR8Pseudo will be converted to "mtctr"
+// 2: DecreaseCTRPseudo/DecreaseCTR8Pseudo will be converted to "bdnz/bdz" and
+// its user branch instruction can be deleted.
+//
+// If a CTR loop can not be generated due to clobber of CTR:
+// 1: MTCTRPseudo/MTCTR8Pseudo can be deleted.
+// 2: DecreaseCTRPseudo/DecreaseCTR8Pseudo will be converted to "addi -1" and
+// a "cmplwi/cmpldi".
+//
+// This pass runs just before register allocation, because we don't want
+// register allocator to allocate register for DecreaseCTRPseudo if a CTR can be
+// generated or if a CTR loop can not be generated, we don't have any condition
+// register for the new added "cmplwi/cmpldi".
//
//===----------------------------------------------------------------------===//
-// CTR loops are produced by the HardwareLoops pass and this pass is simply a
-// verification that no invalid CTR loops are produced. As such, it isn't
-// something that needs to be run (or even defined) for Release builds so the
-// entire file is guarded by NDEBUG.
-#ifndef NDEBUG
-#include <vector>
-
-#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "PPC.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/ilist_iterator.h"
+#include "PPCInstrInfo.h"
+#include "PPCSubtarget.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBundleIterator.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Register.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -38,148 +45,314 @@
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/GenericDomTreeConstruction.h"
-#include "llvm/Support/Printable.h"
-#include "llvm/Support/raw_ostream.h"
+#include <cassert>
using namespace llvm;
-#define DEBUG_TYPE "ppc-ctrloops-verify"
+#define DEBUG_TYPE "ppc-ctrloops"
+
+STATISTIC(NumCTRLoops, "Number of CTR loops generated");
+STATISTIC(NumNormalLoops, "Number of normal compare + branch loops generated");
namespace {
+class PPCCTRLoops : public MachineFunctionPass {
+public:
+ static char ID;
- struct PPCCTRLoopsVerify : public MachineFunctionPass {
- public:
- static char ID;
+ PPCCTRLoops() : MachineFunctionPass(ID) {
+ initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
+ }
- PPCCTRLoopsVerify() : MachineFunctionPass(ID) {
- initializePPCCTRLoopsVerifyPass(*PassRegistry::getPassRegistry());
- }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<MachineDominatorTree>();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
+ bool runOnMachineFunction(MachineFunction &MF) override;
- bool runOnMachineFunction(MachineFunction &MF) override;
+private:
+ const PPCInstrInfo *TII = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
- private:
- MachineDominatorTree *MDT;
- };
+ bool processLoop(MachineLoop *ML);
+ bool isCTRClobber(MachineInstr *MI, bool CheckReads) const;
+ void expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
+ MachineInstr *Dec);
+ void expandCTRLoops(MachineLoop *ML, MachineInstr *Start, MachineInstr *Dec);
+};
+} // namespace
+
+char PPCCTRLoops::ID = 0;
+
+INITIALIZE_PASS_BEGIN(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
+ false, false)
- char PPCCTRLoopsVerify::ID = 0;
-} // end anonymous namespace
+FunctionPass *llvm::createPPCCTRLoopsPass() { return new PPCCTRLoops(); }
-INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
- "PowerPC CTR Loops Verify", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_END(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
- "PowerPC CTR Loops Verify", false, false)
+bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) {
+ bool Changed = false;
-FunctionPass *llvm::createPPCCTRLoopsVerify() {
- return new PPCCTRLoopsVerify();
+ auto &MLI = getAnalysis<MachineLoopInfo>();
+ TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ MRI = &MF.getRegInfo();
+
+ for (auto ML : MLI) {
+ if (ML->isOutermost())
+ Changed |= processLoop(ML);
+ }
+
+ return Changed;
}
-static bool clobbersCTR(const MachineInstr &MI) {
- for (const MachineOperand &MO : MI.operands()) {
- if (MO.isReg()) {
- if (MO.isDef() && (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8))
- return true;
- } else if (MO.isRegMask()) {
- if (MO.clobbersPhysReg(PPC::CTR) || MO.clobbersPhysReg(PPC::CTR8))
- return true;
- }
+bool PPCCTRLoops::isCTRClobber(MachineInstr *MI, bool CheckReads) const {
+ if (!CheckReads) {
+ // If we are only checking for defs, that is we are going to find
+ // definitions before MTCTRloop, for this case:
+ // CTR defination inside the callee of a call instruction will not impact
+ // the defination of MTCTRloop, so we can use definesRegister() for the
+ // check, no need to check the regmask.
+ return (MI->definesRegister(PPC::CTR) &&
+ !MI->registerDefIsDead(PPC::CTR)) ||
+ (MI->definesRegister(PPC::CTR8) &&
+ !MI->registerDefIsDead(PPC::CTR8));
}
+ if ((MI->modifiesRegister(PPC::CTR) && !MI->registerDefIsDead(PPC::CTR)) ||
+ (MI->modifiesRegister(PPC::CTR8) && !MI->registerDefIsDead(PPC::CTR8)))
+ return true;
+
+ if (MI->getDesc().isCall())
+ return true;
+
+ // We define the CTR in the loop preheader, so if there is any CTR reader in
+ // the loop, we also can not use CTR loop form.
+ if (MI->readsRegister(PPC::CTR) || MI->readsRegister(PPC::CTR8))
+ return true;
+
return false;
}
-static bool verifyCTRBranch(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator I) {
- MachineBasicBlock::iterator BI = I;
- SmallSet<MachineBasicBlock *, 16> Visited;
- SmallVector<MachineBasicBlock *, 8> Preds;
- bool CheckPreds;
-
- if (I == MBB->begin()) {
- Visited.insert(MBB);
- goto queue_preds;
- } else
- --I;
-
-check_block:
- Visited.insert(MBB);
- if (I == MBB->end())
- goto queue_preds;
-
- CheckPreds = true;
- for (MachineBasicBlock::iterator IE = MBB->begin();; --I) {
- unsigned Opc = I->getOpcode();
- if (Opc == PPC::MTCTRloop || Opc == PPC::MTCTR8loop) {
- CheckPreds = false;
+bool PPCCTRLoops::processLoop(MachineLoop *ML) {
+ bool Changed = false;
+
+ // Align with HardwareLoop pass, process inner loops first.
+ for (auto I = ML->begin(), E = ML->end(); I != E; ++I)
+ Changed |= processLoop(*I);
+
+ // If any inner loop is changed, outter loop must be without hardware loop
+ // intrinsics.
+ if (Changed)
+ return true;
+
+ auto IsLoopStart = [](MachineInstr &MI) {
+ return MI.getOpcode() == PPC::MTCTRPseudo ||
+ MI.getOpcode() == PPC::MTCTR8Pseudo;
+ };
+
+ auto SearchForStart =
+ [&IsLoopStart](MachineBasicBlock *MBB) -> MachineInstr * {
+ for (auto &MI : *MBB) {
+ if (IsLoopStart(MI))
+ return &MI;
+ }
+ return nullptr;
+ };
+
+ MachineInstr *Start = nullptr;
+ MachineInstr *Dec = nullptr;
+ bool InvalidCTRLoop = false;
+
+ MachineBasicBlock *Preheader = ML->getLoopPreheader();
+ // If there is no preheader for this loop, there must be no MTCTRPseudo
+ // either.
+ if (!Preheader)
+ return false;
+
+ Start = SearchForStart(Preheader);
+ // This is not a CTR loop candidate.
+ if (!Start)
+ return false;
+
+ // If CTR is live to the preheader, we can not redefine the CTR register.
+ if (Preheader->isLiveIn(PPC::CTR) || Preheader->isLiveIn(PPC::CTR8))
+ InvalidCTRLoop = true;
+
+ // Make sure there is also no CTR clobber in the block preheader between the
+ // begin and MTCTR.
+ for (MachineBasicBlock::reverse_instr_iterator I =
+ std::next(Start->getReverseIterator());
+ I != Preheader->instr_rend(); ++I)
+ // Only check the definitions of CTR. If there is non-dead definition for
+ // the CTR, we conservatively don't generate a CTR loop.
+ if (isCTRClobber(&*I, /* CheckReads */ false)) {
+ InvalidCTRLoop = true;
break;
}
- if (I != BI && clobbersCTR(*I)) {
- LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " (" << MBB->getFullName()
- << ") instruction " << *I
- << " clobbers CTR, invalidating "
- << printMBBReference(*BI->getParent()) << " ("
- << BI->getParent()->getFullName() << ") instruction "
- << *BI << "\n");
- return false;
+ // Make sure there is also no CTR clobber/user in the block preheader between
+ // MTCTR and the end.
+ for (MachineBasicBlock::instr_iterator I = std::next(Start->getIterator());
+ I != Preheader->instr_end(); ++I)
+ if (isCTRClobber(&*I, /* CheckReads */ true)) {
+ InvalidCTRLoop = true;
+ break;
}
- if (I == IE)
+ // Find the CTR loop components and decide whether or not to fall back to a
+ // normal loop.
+ for (auto *MBB : reverse(ML->getBlocks())) {
+ for (auto &MI : *MBB) {
+ if (MI.getOpcode() == PPC::DecreaseCTRPseudo ||
+ MI.getOpcode() == PPC::DecreaseCTR8Pseudo)
+ Dec = &MI;
+ else if (!InvalidCTRLoop)
+ // If any instruction clobber CTR, then we can not generate a CTR loop.
+ InvalidCTRLoop |= isCTRClobber(&MI, /* CheckReads */ true);
+ }
+ if (Dec && InvalidCTRLoop)
break;
}
- if (!CheckPreds && Preds.empty())
- return true;
-
- if (CheckPreds) {
-queue_preds:
- if (MachineFunction::iterator(MBB) == MBB->getParent()->begin()) {
- LLVM_DEBUG(dbgs() << "Unable to find a MTCTR instruction for "
- << printMBBReference(*BI->getParent()) << " ("
- << BI->getParent()->getFullName() << ") instruction "
- << *BI << "\n");
- return false;
- }
+ assert(Dec && "CTR loop is not complete!");
- append_range(Preds, MBB->predecessors());
+ if (InvalidCTRLoop) {
+ expandNormalLoops(ML, Start, Dec);
+ ++NumNormalLoops;
}
+ else {
+ expandCTRLoops(ML, Start, Dec);
+ ++NumCTRLoops;
+ }
+ return true;
+}
+
+void PPCCTRLoops::expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
+ MachineInstr *Dec) {
+ bool Is64Bit =
+ Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
+
+ MachineBasicBlock *Preheader = Start->getParent();
+ MachineBasicBlock *Exiting = Dec->getParent();
+ assert((Preheader && Exiting) &&
+ "Preheader and exiting should exist for CTR loop!");
+
+ assert(Dec->getOperand(1).getImm() == 1 &&
+ "Loop decrement stride must be 1");
+
+ unsigned ADDIOpcode = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
+ unsigned CMPOpcode = Is64Bit ? PPC::CMPLDI : PPC::CMPLWI;
+
+ Register PHIDef =
+ MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
+ : &PPC::GPRC_and_GPRC_NOR0RegClass);
- do {
- MBB = Preds.pop_back_val();
- if (!Visited.count(MBB)) {
- I = MBB->getLastNonDebugInstr();
- goto check_block;
+ Start->getParent()->getParent()->getProperties().reset(
+ MachineFunctionProperties::Property::NoPHIs);
+
+ // Generate "PHI" in the header block.
+ auto PHIMIB = BuildMI(*ML->getHeader(), ML->getHeader()->getFirstNonPHI(),
+ DebugLoc(), TII->get(TargetOpcode::PHI), PHIDef);
+ PHIMIB.addReg(Start->getOperand(0).getReg()).addMBB(Preheader);
+
+ Register ADDIDef =
+ MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
+ : &PPC::GPRC_and_GPRC_NOR0RegClass);
+ // Generate "addi -1" in the exiting block.
+ BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(ADDIOpcode), ADDIDef)
+ .addReg(PHIDef)
+ .addImm(-1);
+
+ // Add other inputs for the PHI node.
+ if (ML->isLoopLatch(Exiting)) {
+ // There must be only two predecessors for the loop header, one is the
+ // Preheader and the other one is loop latch Exiting. In hardware loop
+ // insertion pass, the block containing DecreaseCTRloop must dominate all
+ // loop latches. So there must be only one latch.
+ assert(ML->getHeader()->pred_size() == 2 &&
+ "Loop header predecessor is not right!");
+ PHIMIB.addReg(ADDIDef).addMBB(Exiting);
+ } else {
+ // If the block containing DecreaseCTRloop is not a loop latch, we can use
+ // ADDIDef as the value for all other blocks for the PHI. In hardware loop
+ // insertion pass, the block containing DecreaseCTRloop must dominate all
+ // loop latches.
+ for (MachineBasicBlock *P : ML->getHeader()->predecessors()) {
+ if (ML->contains(P)) {
+ assert(ML->isLoopLatch(P) &&
+ "Loop's header in-loop predecessor is not loop latch!");
+ PHIMIB.addReg(ADDIDef).addMBB(P);
+ } else
+ assert(P == Preheader &&
+ "CTR loop should not be generated for irreducible loop!");
}
- } while (!Preds.empty());
+ }
- return true;
+ // Generate the compare in the exiting block.
+ Register CMPDef = MRI->createVirtualRegister(&PPC::CRRCRegClass);
+ auto CMPMIB =
+ BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(CMPOpcode), CMPDef)
+ .addReg(ADDIDef)
+ .addImm(0);
+
+ BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(TargetOpcode::COPY),
+ Dec->getOperand(0).getReg())
+ .addReg(CMPMIB->getOperand(0).getReg(), 0, PPC::sub_gt);
+
+ // Remove the pseudo instructions.
+ Start->eraseFromParent();
+ Dec->eraseFromParent();
}
-bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) {
- MDT = &getAnalysis<MachineDominatorTree>();
-
- // Verify that all bdnz/bdz instructions are dominated by a loop mtctr before
- // any other instructions that might clobber the ctr register.
- for (MachineBasicBlock &MBB : MF) {
- if (!MDT->isReachableFromEntry(&MBB))
- continue;
-
- for (MachineBasicBlock::iterator MII = MBB.getFirstTerminator(),
- MIIE = MBB.end(); MII != MIIE; ++MII) {
- unsigned Opc = MII->getOpcode();
- if (Opc == PPC::BDNZ8 || Opc == PPC::BDNZ ||
- Opc == PPC::BDZ8 || Opc == PPC::BDZ)
- if (!verifyCTRBranch(&MBB, MII))
- llvm_unreachable("Invalid PPC CTR loop!");
- }
+void PPCCTRLoops::expandCTRLoops(MachineLoop *ML, MachineInstr *Start,
+ MachineInstr *Dec) {
+ bool Is64Bit =
+ Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
+
+ MachineBasicBlock *Preheader = Start->getParent();
+ MachineBasicBlock *Exiting = Dec->getParent();
+ assert((Preheader && Exiting) &&
+ "Preheader and exiting should exist for CTR loop!");
+
+ assert(Dec->getOperand(1).getImm() == 1 && "Loop decrement must be 1!");
+
+ unsigned BDNZOpcode = Is64Bit ? PPC::BDNZ8 : PPC::BDNZ;
+ unsigned BDZOpcode = Is64Bit ? PPC::BDZ8 : PPC::BDZ;
+ auto BrInstr = MRI->use_instr_begin(Dec->getOperand(0).getReg());
+ assert(MRI->hasOneUse(Dec->getOperand(0).getReg()) &&
+ "There should be only one user for loop decrement pseudo!");
+
+ unsigned Opcode = 0;
+ switch (BrInstr->getOpcode()) {
+ case PPC::BC:
+ Opcode = BDNZOpcode;
+ (void) ML;
+ assert(ML->contains(BrInstr->getOperand(1).getMBB()) &&
+ "Invalid ctr loop!");
+ break;
+ case PPC::BCn:
+ Opcode = BDZOpcode;
+ assert(!ML->contains(BrInstr->getOperand(1).getMBB()) &&
+ "Invalid ctr loop!");
+ break;
+ default:
+ llvm_unreachable("Unhandled branch user for DecreaseCTRloop.");
}
- return false;
+ unsigned MTCTROpcode = Is64Bit ? PPC::MTCTR8 : PPC::MTCTR;
+
+ // Generate "mtctr" in the loop preheader.
+ BuildMI(*Preheader, Start, Start->getDebugLoc(), TII->get(MTCTROpcode))
+ .addReg(Start->getOperand(0).getReg());
+
+ // Generate "bdnz/bdz" in the exiting block just before the terminator.
+ BuildMI(*Exiting, &*BrInstr, BrInstr->getDebugLoc(), TII->get(Opcode))
+ .addMBB(BrInstr->getOperand(1).getMBB());
+
+ // Remove the pseudo instructions.
+ Start->eraseFromParent();
+ BrInstr->eraseFromParent();
+ Dec->eraseFromParent();
}
-#endif // NDEBUG
diff --git a/llvm/lib/Target/PowerPC/PPCCTRLoopsVerify.cpp b/llvm/lib/Target/PowerPC/PPCCTRLoopsVerify.cpp
new file mode 100644
index 000000000000..b1f5bdd885cd
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/PPCCTRLoopsVerify.cpp
@@ -0,0 +1,185 @@
+//===-- PPCCTRLoops.cpp - Verify CTR loops -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass verifies that all bdnz/bdz instructions are dominated by a loop
+// mtctr before any other instructions that might clobber the ctr register.
+//
+//===----------------------------------------------------------------------===//
+
+// CTR loops are produced by the HardwareLoops pass and this pass is simply a
+// verification that no invalid CTR loops are produced. As such, it isn't
+// something that needs to be run (or even defined) for Release builds so the
+// entire file is guarded by NDEBUG.
+#ifndef NDEBUG
+#include <vector>
+
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "PPC.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/ilist_iterator.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBundleIterator.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/Register.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GenericDomTreeConstruction.h"
+#include "llvm/Support/Printable.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-ctrloops-verify"
+
+namespace {
+
+ struct PPCCTRLoopsVerify : public MachineFunctionPass {
+ public:
+ static char ID;
+
+ PPCCTRLoopsVerify() : MachineFunctionPass(ID) {
+ initializePPCCTRLoopsVerifyPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ private:
+ MachineDominatorTree *MDT;
+ };
+
+ char PPCCTRLoopsVerify::ID = 0;
+} // end anonymous namespace
+
+INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
+ "PowerPC CTR Loops Verify", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
+ "PowerPC CTR Loops Verify", false, false)
+
+FunctionPass *llvm::createPPCCTRLoopsVerify() {
+ return new PPCCTRLoopsVerify();
+}
+
+static bool clobbersCTR(const MachineInstr &MI) {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg()) {
+ if (MO.isDef() && (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8))
+ return true;
+ } else if (MO.isRegMask()) {
+ if (MO.clobbersPhysReg(PPC::CTR) || MO.clobbersPhysReg(PPC::CTR8))
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static bool verifyCTRBranch(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I) {
+ MachineBasicBlock::iterator BI = I;
+ SmallSet<MachineBasicBlock *, 16> Visited;
+ SmallVector<MachineBasicBlock *, 8> Preds;
+ bool CheckPreds;
+
+ if (I == MBB->begin()) {
+ Visited.insert(MBB);
+ goto queue_preds;
+ } else
+ --I;
+
+check_block:
+ Visited.insert(MBB);
+ if (I == MBB->end())
+ goto queue_preds;
+
+ CheckPreds = true;
+ for (MachineBasicBlock::iterator IE = MBB->begin();; --I) {
+ unsigned Opc = I->getOpcode();
+ if (Opc == PPC::MTCTRloop || Opc == PPC::MTCTR8loop) {
+ CheckPreds = false;
+ break;
+ }
+
+ if (I != BI && clobbersCTR(*I)) {
+ LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " (" << MBB->getFullName()
+ << ") instruction " << *I
+ << " clobbers CTR, invalidating "
+ << printMBBReference(*BI->getParent()) << " ("
+ << BI->getParent()->getFullName() << ") instruction "
+ << *BI << "\n");
+ return false;
+ }
+
+ if (I == IE)
+ break;
+ }
+
+ if (!CheckPreds && Preds.empty())
+ return true;
+
+ if (CheckPreds) {
+queue_preds:
+ if (MachineFunction::iterator(MBB) == MBB->getParent()->begin()) {
+ LLVM_DEBUG(dbgs() << "Unable to find a MTCTR instruction for "
+ << printMBBReference(*BI->getParent()) << " ("
+ << BI->getParent()->getFullName() << ") instruction "
+ << *BI << "\n");
+ return false;
+ }
+
+ append_range(Preds, MBB->predecessors());
+ }
+
+ do {
+ MBB = Preds.pop_back_val();
+ if (!Visited.count(MBB)) {
+ I = MBB->getLastNonDebugInstr();
+ goto check_block;
+ }
+ } while (!Preds.empty());
+
+ return true;
+}
+
+bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) {
+ MDT = &getAnalysis<MachineDominatorTree>();
+
+ // Verify that all bdnz/bdz instructions are dominated by a loop mtctr before
+ // any other instructions that might clobber the ctr register.
+ for (MachineBasicBlock &MBB : MF) {
+ if (!MDT->isReachableFromEntry(&MBB))
+ continue;
+
+ for (MachineBasicBlock::iterator MII = MBB.getFirstTerminator(),
+ MIIE = MBB.end(); MII != MIIE; ++MII) {
+ unsigned Opc = MII->getOpcode();
+ if (Opc == PPC::BDNZ8 || Opc == PPC::BDNZ ||
+ Opc == PPC::BDZ8 || Opc == PPC::BDZ)
+ if (!verifyCTRBranch(&MBB, MII))
+ llvm_unreachable("Invalid PPC CTR loop!");
+ }
+ }
+
+ return false;
+}
+#endif // NDEBUG
diff --git a/llvm/lib/Target/PowerPC/PPCCallingConv.td b/llvm/lib/Target/PowerPC/PPCCallingConv.td
index 1e81276f1de3..1901e8d1ebf1 100644
--- a/llvm/lib/Target/PowerPC/PPCCallingConv.td
+++ b/llvm/lib/Target/PowerPC/PPCCallingConv.td
@@ -363,3 +363,25 @@ def CSR_64_AllRegs_VSX : CalleeSavedRegs<(add CSR_64_AllRegs_Altivec,
def CSR_64_AllRegs_AIX_Dflt_VSX : CalleeSavedRegs<(add CSR_64_AllRegs_Altivec,
(sequence "VSL%u", 0, 19))>;
+
+def CSR_ALL_VSRP : CalleeSavedRegs<(sequence "VSRp%u", 0, 31)>;
+
+def CSR_VSRP :
+ CalleeSavedRegs<(add VSRp26, VSRp27, VSRp28, VSRp29, VSRp30, VSRp31)>;
+
+def CSR_SVR432_VSRP : CalleeSavedRegs<(add CSR_SVR432_Altivec, CSR_VSRP)>;
+
+def CSR_SVR464_VSRP : CalleeSavedRegs<(add CSR_PPC64_Altivec, CSR_VSRP)>;
+
+def CSR_SVR464_R2_VSRP : CalleeSavedRegs<(add CSR_SVR464_VSRP, X2)>;
+
+def CSR_SVR32_ColdCC_VSRP : CalleeSavedRegs<(add CSR_SVR32_ColdCC_Altivec,
+ (sub CSR_ALL_VSRP, VSRp17))>;
+
+def CSR_SVR64_ColdCC_VSRP : CalleeSavedRegs<(add CSR_SVR64_ColdCC,
+ (sub CSR_ALL_VSRP, VSRp17))>;
+
+def CSR_SVR64_ColdCC_R2_VSRP : CalleeSavedRegs<(add CSR_SVR64_ColdCC_VSRP, X2)>;
+
+def CSR_64_AllRegs_VSRP :
+ CalleeSavedRegs<(add CSR_64_AllRegs_VSX, CSR_ALL_VSRP)>;
diff --git a/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/llvm/lib/Target/PowerPC/PPCFastISel.cpp
index e7cd107c5046..5c7f0619161c 100644
--- a/llvm/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFastISel.cpp
@@ -775,7 +775,7 @@ bool PPCFastISel::SelectBranch(const Instruction *I) {
if (!OptPPCPred)
return false;
- PPC::Predicate PPCPred = OptPPCPred.getValue();
+ PPC::Predicate PPCPred = *OptPPCPred;
// Take advantage of fall-through opportunities.
if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 65c969c196e1..0f70ec576af1 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -10,14 +10,15 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/PPCPredicates.h"
#include "PPCFrameLowering.h"
+#include "MCTargetDesc/PPCPredicates.h"
#include "PPCInstrBuilder.h"
#include "PPCInstrInfo.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -625,7 +626,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
// Work out frame sizes.
uint64_t FrameSize = determineFrameLayoutAndUpdate(MF);
int64_t NegFrameSize = -FrameSize;
- if (!isInt<32>(FrameSize) || !isInt<32>(NegFrameSize))
+ if (!isPPC64 && (!isInt<32>(FrameSize) || !isInt<32>(NegFrameSize)))
llvm_unreachable("Unhandled stack size!");
if (MFI.isFrameAddressTaken())
@@ -660,10 +661,6 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
: PPC::STWU );
const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX
: PPC::STWUX);
- const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8
- : PPC::LIS );
- const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8
- : PPC::ORI );
const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
: PPC::OR );
const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8
@@ -934,11 +931,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
.addImm(NegFrameSize);
} else {
assert(!SingleScratchReg && "Only a single scratch reg available");
- BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
- .addImm(NegFrameSize >> 16);
- BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
- .addReg(TempReg, RegState::Kill)
- .addImm(NegFrameSize & 0xFFFF);
+ TII.materializeImmPostRA(MBB, MBBI, dl, TempReg, NegFrameSize);
BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
.addReg(ScratchReg, RegState::Kill)
.addReg(TempReg, RegState::Kill);
@@ -957,11 +950,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
.addReg(SPReg);
} else {
- BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
- .addImm(NegFrameSize >> 16);
- BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
- .addReg(ScratchReg, RegState::Kill)
- .addImm(NegFrameSize & 0xFFFF);
+ TII.materializeImmPostRA(MBB, MBBI, dl, ScratchReg, NegFrameSize);
BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
.addReg(SPReg, RegState::Kill)
.addReg(SPReg)
@@ -1668,7 +1657,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
// values from the stack, and set SPAdd to the value that needs to be added
// to the SP at the end. The default values are as if red zone was present.
unsigned RBReg = SPReg;
- unsigned SPAdd = 0;
+ uint64_t SPAdd = 0;
// Check if we can move the stack update instruction up the epilogue
// past the callee saves. This will allow the move to LR instruction
@@ -1726,11 +1715,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
BuildMI(MBB, MBBI, dl, AddImmInst, RBReg)
.addReg(FPReg).addImm(FrameSize);
} else {
- BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
- .addImm(FrameSize >> 16);
- BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
- .addReg(ScratchReg, RegState::Kill)
- .addImm(FrameSize & 0xFFFF);
+ TII.materializeImmPostRA(MBB, MBBI, dl, ScratchReg, FrameSize);
BuildMI(MBB, MBBI, dl, AddInst)
.addReg(RBReg)
.addReg(FPReg)
@@ -1974,6 +1959,15 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+ // Do not explicitly save the callee saved VSRp registers.
+ // The individual VSR subregisters will be saved instead.
+ SavedRegs.reset(PPC::VSRp26);
+ SavedRegs.reset(PPC::VSRp27);
+ SavedRegs.reset(PPC::VSRp28);
+ SavedRegs.reset(PPC::VSRp29);
+ SavedRegs.reset(PPC::VSRp30);
+ SavedRegs.reset(PPC::VSRp31);
+
// Save and clear the LR state.
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
unsigned LR = RegInfo->getRARegister();
@@ -2383,7 +2377,7 @@ bool PPCFrameLowering::spillCalleeSavedRegisters(
// Map each VSR to GPRs to be spilled with into it. Single VSR can contain one
// or two GPRs, so we need table to record information for later save/restore.
- llvm::for_each(CSI, [&](const CalleeSavedInfo &Info) {
+ for (const CalleeSavedInfo &Info : CSI) {
if (Info.isSpilledToReg()) {
auto &SpilledVSR =
VSRContainingGPRs.FindAndConstruct(Info.getDstReg()).second;
@@ -2394,7 +2388,7 @@ bool PPCFrameLowering::spillCalleeSavedRegisters(
else
SpilledVSR.second = Info.getReg();
}
- });
+ }
for (const CalleeSavedInfo &I : CSI) {
Register Reg = I.getReg();
diff --git a/llvm/lib/Target/PowerPC/PPCGenScalarMASSEntries.cpp b/llvm/lib/Target/PowerPC/PPCGenScalarMASSEntries.cpp
new file mode 100644
index 000000000000..00931b1f63b2
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/PPCGenScalarMASSEntries.cpp
@@ -0,0 +1,149 @@
+//===-- PPCGenScalarMASSEntries.cpp ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation converts standard math functions into their
+// corresponding MASS (scalar) entries for PowerPC targets.
+// Following are examples of such conversion:
+// tanh ---> __xl_tanh_finite
+// Such lowering is legal under the fast-math option.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "PPCSubtarget.h"
+#include "PPCTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+
+#define DEBUG_TYPE "ppc-gen-scalar-mass"
+
+using namespace llvm;
+
+namespace {
+
+class PPCGenScalarMASSEntries : public ModulePass {
+public:
+ static char ID;
+
+ PPCGenScalarMASSEntries() : ModulePass(ID) {
+ ScalarMASSFuncs = {
+#define TLI_DEFINE_SCALAR_MASS_FUNCS
+#include "llvm/Analysis/ScalarFuncs.def"
+ };
+ }
+
+ bool runOnModule(Module &M) override;
+
+ StringRef getPassName() const override {
+ return "PPC Generate Scalar MASS Entries";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ }
+
+private:
+ std::map<StringRef, StringRef> ScalarMASSFuncs;
+ bool isCandidateSafeToLower(const CallInst &CI) const;
+ bool isFiniteCallSafe(const CallInst &CI) const;
+ bool createScalarMASSCall(StringRef MASSEntry, CallInst &CI,
+ Function &Func) const;
+};
+
+} // namespace
+
+// Returns true if 'afn' flag exists on the call instruction with the math
+// function
+bool PPCGenScalarMASSEntries::isCandidateSafeToLower(const CallInst &CI) const {
+ // skip functions with no scalar or vector FP type (like cosisin)
+ if (!isa<FPMathOperator>(CI))
+ return false;
+
+ return CI.hasApproxFunc();
+}
+
+// Returns true if 'nnan', 'ninf' and 'nsz' flags exist on the call instruction
+// with the math function
+bool PPCGenScalarMASSEntries::isFiniteCallSafe(const CallInst &CI) const {
+ // skip functions with no scalar or vector FP type (like cosisin)
+ if (!isa<FPMathOperator>(CI))
+ return false;
+
+ // FIXME: no-errno and trapping-math need to be set for MASS converstion
+ // but they don't have IR representation.
+ return CI.hasNoNaNs() && CI.hasNoInfs() && CI.hasNoSignedZeros();
+}
+
+/// Lowers scalar math functions to scalar MASS functions.
+/// e.g.: tanh --> __xl_tanh_finite or __xl_tanh
+/// Both function prototype and its callsite is updated during lowering.
+bool PPCGenScalarMASSEntries::createScalarMASSCall(StringRef MASSEntry,
+ CallInst &CI,
+ Function &Func) const {
+ if (CI.use_empty())
+ return false;
+
+ Module *M = Func.getParent();
+ assert(M && "Expecting a valid Module");
+
+ std::string MASSEntryStr = MASSEntry.str();
+ if (isFiniteCallSafe(CI))
+ MASSEntryStr += "_finite";
+
+ FunctionCallee FCache = M->getOrInsertFunction(
+ MASSEntryStr, Func.getFunctionType(), Func.getAttributes());
+
+ CI.setCalledFunction(FCache);
+
+ return true;
+}
+
+bool PPCGenScalarMASSEntries::runOnModule(Module &M) {
+ bool Changed = false;
+
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ if (!TPC || skipModule(M))
+ return false;
+
+ for (Function &Func : M) {
+ if (!Func.isDeclaration())
+ continue;
+
+ auto Iter = ScalarMASSFuncs.find(Func.getName());
+ if (Iter == ScalarMASSFuncs.end())
+ continue;
+
+ // The call to createScalarMASSCall() invalidates the iterator over users
+ // upon replacing the users. Precomputing the current list of users allows
+ // us to replace all the call sites.
+ SmallVector<User *, 4> TheUsers;
+ for (auto *User : Func.users())
+ TheUsers.push_back(User);
+
+ for (auto *User : TheUsers)
+ if (auto *CI = dyn_cast_or_null<CallInst>(User)) {
+ if (isCandidateSafeToLower(*CI))
+ Changed |= createScalarMASSCall(Iter->second, *CI, Func);
+ }
+ }
+
+ return Changed;
+}
+
+char PPCGenScalarMASSEntries::ID = 0;
+
+char &llvm::PPCGenScalarMASSEntriesID = PPCGenScalarMASSEntries::ID;
+
+INITIALIZE_PASS(PPCGenScalarMASSEntries, DEBUG_TYPE,
+ "Generate Scalar MASS entries", false, false)
+
+ModulePass *llvm::createPPCGenScalarMASSEntriesPass() {
+ return new PPCGenScalarMASSEntries();
+}
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index fdcf6e7e80f2..4247cf557c2a 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -28,6 +28,7 @@
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -188,7 +189,7 @@ namespace {
}
/// getSmallIPtrImm - Return a target constant of pointer type.
- inline SDValue getSmallIPtrImm(unsigned Imm, const SDLoc &dl) {
+ inline SDValue getSmallIPtrImm(uint64_t Imm, const SDLoc &dl) {
return CurDAG->getTargetConstant(
Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout()));
}
@@ -202,7 +203,7 @@ namespace {
/// base register. Return the virtual register that holds this value.
SDNode *getGlobalBaseReg();
- void selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset = 0);
+ void selectFrameIndex(SDNode *SN, SDNode *N, uint64_t Offset = 0);
// Select - Convert the specified operand from a target-independent to a
// target-specific node if it hasn't already been changed.
@@ -639,7 +640,7 @@ static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
&& isInt32Immediate(N->getOperand(1).getNode(), Imm);
}
-void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) {
+void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, uint64_t Offset) {
SDLoc dl(SN);
int FI = cast<FrameIndexSDNode>(N)->getIndex();
SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
@@ -4645,7 +4646,8 @@ static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
static bool isSWTestOp(SDValue N) {
if (N.getOpcode() == PPCISD::FTSQRT)
return true;
- if (N.getNumOperands() < 1 || !isa<ConstantSDNode>(N.getOperand(0)))
+ if (N.getNumOperands() < 1 || !isa<ConstantSDNode>(N.getOperand(0)) ||
+ N.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
return false;
switch (N.getConstantOperandVal(0)) {
case Intrinsic::ppc_vsx_xvtdivdp:
@@ -5377,7 +5379,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
// If this is equivalent to an add, then we can fold it with the
// FrameIndex calculation.
if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) {
- selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
+ selectFrameIndex(N, N->getOperand(0).getNode(), (int64_t)Imm);
return;
}
}
@@ -5435,7 +5437,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
int16_t Imm;
if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
isIntS16Immediate(N->getOperand(1), Imm)) {
- selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
+ selectFrameIndex(N, N->getOperand(0).getNode(), (int64_t)Imm);
return;
}
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index cbeae0ab03b8..5b9d1e66b04e 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -126,6 +126,16 @@ static cl::opt<bool> EnableQuadwordAtomics(
cl::desc("enable quadword lock-free atomic operations"), cl::init(false),
cl::Hidden);
+static cl::opt<bool>
+ DisablePerfectShuffle("ppc-disable-perfect-shuffle",
+ cl::desc("disable vector permute decomposition"),
+ cl::init(true), cl::Hidden);
+
+cl::opt<bool> DisableAutoPairedVecSt(
+ "disable-auto-paired-vec-st",
+ cl::desc("disable automatically generated 32byte paired vector stores"),
+ cl::init(true), cl::Hidden);
+
STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumSiblingCalls, "Number of sibling calls");
STATISTIC(ShufflesHandledWithVPERM, "Number of shuffles lowered to a VPERM");
@@ -379,6 +389,25 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
setOperationAction(ISD::FREM , MVT::f32, Expand);
setOperationAction(ISD::FPOW , MVT::f32, Expand);
+
+ // MASS transformation for LLVM intrinsics with replicating fast-math flag
+ // to be consistent to PPCGenScalarMASSEntries pass
+ if (TM.getOptLevel() == CodeGenOpt::Aggressive &&
+ TM.Options.PPCGenScalarMASSEntries) {
+ setOperationAction(ISD::FSIN , MVT::f64, Custom);
+ setOperationAction(ISD::FCOS , MVT::f64, Custom);
+ setOperationAction(ISD::FPOW , MVT::f64, Custom);
+ setOperationAction(ISD::FLOG, MVT::f64, Custom);
+ setOperationAction(ISD::FLOG10, MVT::f64, Custom);
+ setOperationAction(ISD::FEXP, MVT::f64, Custom);
+ setOperationAction(ISD::FSIN , MVT::f32, Custom);
+ setOperationAction(ISD::FCOS , MVT::f32, Custom);
+ setOperationAction(ISD::FPOW , MVT::f32, Custom);
+ setOperationAction(ISD::FLOG, MVT::f32, Custom);
+ setOperationAction(ISD::FLOG10, MVT::f32, Custom);
+ setOperationAction(ISD::FEXP, MVT::f32, Custom);
+ }
+
if (Subtarget.hasSPE()) {
setOperationAction(ISD::FMA , MVT::f64, Expand);
setOperationAction(ISD::FMA , MVT::f32, Expand);
@@ -603,6 +632,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f64, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::ppcf128, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f64, Custom);
// To handle counter-based loop conditions.
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
@@ -1000,7 +1031,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
setOperationAction(ISD::STORE, MVT::v2f64, Legal);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
if (Subtarget.hasP8Vector())
addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
@@ -1048,7 +1079,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::STORE, MVT::v2i64, Promote);
AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal);
@@ -1264,6 +1295,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
}
+
+ if (Subtarget.hasP10Vector()) {
+ setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
+ }
}
if (Subtarget.pairedVectorMemops()) {
@@ -1291,8 +1326,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
}
- if (EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics()) {
- setMaxAtomicSizeInBitsSupported(128);
+ if (shouldInlineQuadwordAtomics()) {
setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::i128, Custom);
@@ -1305,57 +1339,46 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
}
+ setLibcallName(RTLIB::MULO_I128, nullptr);
if (!isPPC64) {
// These libcalls are not available in 32-bit.
setLibcallName(RTLIB::SHL_I128, nullptr);
setLibcallName(RTLIB::SRL_I128, nullptr);
setLibcallName(RTLIB::SRA_I128, nullptr);
+ setLibcallName(RTLIB::MUL_I128, nullptr);
setLibcallName(RTLIB::MULO_I64, nullptr);
}
if (!isPPC64)
setMaxAtomicSizeInBitsSupported(32);
+ else if (shouldInlineQuadwordAtomics())
+ setMaxAtomicSizeInBitsSupported(128);
+ else
+ setMaxAtomicSizeInBitsSupported(64);
setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
// We have target-specific dag combine patterns for the following nodes:
- setTargetDAGCombine(ISD::ADD);
- setTargetDAGCombine(ISD::SHL);
- setTargetDAGCombine(ISD::SRA);
- setTargetDAGCombine(ISD::SRL);
- setTargetDAGCombine(ISD::MUL);
- setTargetDAGCombine(ISD::FMA);
- setTargetDAGCombine(ISD::SINT_TO_FP);
- setTargetDAGCombine(ISD::BUILD_VECTOR);
+ setTargetDAGCombine({ISD::ADD, ISD::SHL, ISD::SRA, ISD::SRL, ISD::MUL,
+ ISD::FMA, ISD::SINT_TO_FP, ISD::BUILD_VECTOR});
if (Subtarget.hasFPCVT())
setTargetDAGCombine(ISD::UINT_TO_FP);
- setTargetDAGCombine(ISD::LOAD);
- setTargetDAGCombine(ISD::STORE);
- setTargetDAGCombine(ISD::BR_CC);
+ setTargetDAGCombine({ISD::LOAD, ISD::STORE, ISD::BR_CC});
if (Subtarget.useCRBits())
setTargetDAGCombine(ISD::BRCOND);
- setTargetDAGCombine(ISD::BSWAP);
- setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
- setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
- setTargetDAGCombine(ISD::INTRINSIC_VOID);
-
- setTargetDAGCombine(ISD::SIGN_EXTEND);
- setTargetDAGCombine(ISD::ZERO_EXTEND);
- setTargetDAGCombine(ISD::ANY_EXTEND);
+ setTargetDAGCombine({ISD::BSWAP, ISD::INTRINSIC_WO_CHAIN,
+ ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID});
- setTargetDAGCombine(ISD::TRUNCATE);
- setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
+ setTargetDAGCombine({ISD::SIGN_EXTEND, ISD::ZERO_EXTEND, ISD::ANY_EXTEND});
+ setTargetDAGCombine({ISD::TRUNCATE, ISD::VECTOR_SHUFFLE});
if (Subtarget.useCRBits()) {
- setTargetDAGCombine(ISD::TRUNCATE);
- setTargetDAGCombine(ISD::SETCC);
- setTargetDAGCombine(ISD::SELECT_CC);
+ setTargetDAGCombine({ISD::TRUNCATE, ISD::SETCC, ISD::SELECT_CC});
}
if (Subtarget.hasP9Altivec()) {
- setTargetDAGCombine(ISD::ABS);
- setTargetDAGCombine(ISD::VSELECT);
+ setTargetDAGCombine({ISD::ABS, ISD::VSELECT});
}
setLibcallName(RTLIB::LOG_F128, "logf128");
@@ -1586,8 +1609,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((PPCISD::NodeType)Opcode) {
case PPCISD::FIRST_NUMBER: break;
case PPCISD::FSEL: return "PPCISD::FSEL";
- case PPCISD::XSMAXCDP: return "PPCISD::XSMAXCDP";
- case PPCISD::XSMINCDP: return "PPCISD::XSMINCDP";
+ case PPCISD::XSMAXC: return "PPCISD::XSMAXC";
+ case PPCISD::XSMINC: return "PPCISD::XSMINC";
case PPCISD::FCFID: return "PPCISD::FCFID";
case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
@@ -1865,8 +1888,7 @@ bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
SelectionDAG &DAG) {
- const PPCSubtarget& Subtarget =
- static_cast<const PPCSubtarget&>(DAG.getSubtarget());
+ const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
if (!Subtarget.hasP8Vector())
return false;
@@ -2120,7 +2142,11 @@ int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
/// specifies a splat of a single element that is suitable for input to
/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
- assert(N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) &&
+ EVT VT = N->getValueType(0);
+ if (VT == MVT::v2i64 || VT == MVT::v2f64)
+ return EltSize == 8 && N->getMaskElt(0) == N->getMaskElt(1);
+
+ assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
// The consecutive indices need to specify an element, not part of two
@@ -2421,6 +2447,12 @@ unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
SelectionDAG &DAG) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
assert(isSplatShuffleMask(SVOp, EltSize));
+ EVT VT = SVOp->getValueType(0);
+
+ if (VT == MVT::v2i64 || VT == MVT::v2f64)
+ return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(0)
+ : SVOp->getMaskElt(0);
+
if (DAG.getDataLayout().isLittleEndian())
return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
else
@@ -2957,15 +2989,15 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
bool isLoad = true;
SDValue Ptr;
EVT VT;
- unsigned Alignment;
+ Align Alignment;
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
Ptr = LD->getBasePtr();
VT = LD->getMemoryVT();
- Alignment = LD->getAlignment();
+ Alignment = LD->getAlign();
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
Ptr = ST->getBasePtr();
VT = ST->getMemoryVT();
- Alignment = ST->getAlignment();
+ Alignment = ST->getAlign();
isLoad = false;
} else
return false;
@@ -3009,7 +3041,7 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
return false;
} else {
// LDU/STU need an address with at least 4-byte alignment.
- if (Alignment < 4)
+ if (Alignment < Align(4))
return false;
if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
@@ -4416,8 +4448,11 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
SDValue Off = DAG.getConstant(j, dl, PtrVT);
Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
}
- SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
- MachinePointerInfo(&*FuncArg, j));
+ unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8;
+ EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), StoreSizeInBits);
+ SDValue Store =
+ DAG.getTruncStore(Val.getValue(1), dl, Val, Addr,
+ MachinePointerInfo(&*FuncArg, j), ObjType);
MemOps.push_back(Store);
++GPR_idx;
}
@@ -6254,8 +6289,11 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
if (GPR_idx != NumGPRs) {
- SDValue Load =
- DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
+ unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8;
+ EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), LoadSizeInBits);
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, AddArg,
+ MachinePointerInfo(), ObjType);
+
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
ArgOffset += PtrByteSize;
@@ -6888,8 +6926,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
if (useSoftFloat())
report_fatal_error("Soft float support is unimplemented on AIX.");
- const PPCSubtarget &Subtarget =
- static_cast<const PPCSubtarget &>(DAG.getSubtarget());
+ const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
const bool IsPPC64 = Subtarget.isPPC64();
const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
@@ -7194,8 +7231,7 @@ SDValue PPCTargetLowering::LowerCall_AIX(
if (CFlags.IsPatchPoint)
report_fatal_error("This call type is unimplemented on AIX.");
- const PPCSubtarget& Subtarget =
- static_cast<const PPCSubtarget&>(DAG.getSubtarget());
+ const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
MachineFunction &MF = DAG.getMachineFunction();
SmallVector<CCValAssign, 16> ArgLocs;
@@ -7879,7 +7915,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDNodeFlags Flags = Op.getNode()->getFlags();
- // We have xsmaxcdp/xsmincdp which are OK to emit even in the
+ // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
// presence of infinities.
if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
switch (CC) {
@@ -7887,10 +7923,10 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
break;
case ISD::SETOGT:
case ISD::SETGT:
- return DAG.getNode(PPCISD::XSMAXCDP, dl, Op.getValueType(), LHS, RHS);
+ return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS);
case ISD::SETOLT:
case ISD::SETLT:
- return DAG.getNode(PPCISD::XSMINCDP, dl, Op.getValueType(), LHS, RHS);
+ return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS);
}
}
@@ -9037,7 +9073,7 @@ SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
const SDValue *InputLoad = &Op;
- if (InputLoad->getOpcode() == ISD::BITCAST)
+ while (InputLoad->getOpcode() == ISD::BITCAST)
InputLoad = &InputLoad->getOperand(0);
if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {
@@ -9801,7 +9837,7 @@ SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
unsigned SHLAmt = N1.getConstantOperandVal(0);
if (SHLAmt % 8 == 0) {
- SmallVector<int, 16> Mask(16, 0);
+ std::array<int, 16> Mask;
std::iota(Mask.begin(), Mask.end(), 0);
std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
if (SDValue Shuffle =
@@ -9903,6 +9939,11 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
return LdSplt;
}
}
+
+ // All v2i64 and v2f64 shuffles are legal
+ if (VT == MVT::v2i64 || VT == MVT::v2f64)
+ return Op;
+
if (Subtarget.hasP9Vector() &&
PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
isLittleEndian)) {
@@ -10048,56 +10089,59 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
// perfect shuffle table to emit an optimal matching sequence.
ArrayRef<int> PermMask = SVOp->getMask();
- unsigned PFIndexes[4];
- bool isFourElementShuffle = true;
- for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
- unsigned EltNo = 8; // Start out undef.
- for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
- if (PermMask[i*4+j] < 0)
- continue; // Undef, ignore it.
-
- unsigned ByteSource = PermMask[i*4+j];
- if ((ByteSource & 3) != j) {
- isFourElementShuffle = false;
- break;
- }
+ if (!DisablePerfectShuffle && !isLittleEndian) {
+ unsigned PFIndexes[4];
+ bool isFourElementShuffle = true;
+ for (unsigned i = 0; i != 4 && isFourElementShuffle;
+ ++i) { // Element number
+ unsigned EltNo = 8; // Start out undef.
+ for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
+ if (PermMask[i * 4 + j] < 0)
+ continue; // Undef, ignore it.
+
+ unsigned ByteSource = PermMask[i * 4 + j];
+ if ((ByteSource & 3) != j) {
+ isFourElementShuffle = false;
+ break;
+ }
- if (EltNo == 8) {
- EltNo = ByteSource/4;
- } else if (EltNo != ByteSource/4) {
- isFourElementShuffle = false;
- break;
+ if (EltNo == 8) {
+ EltNo = ByteSource / 4;
+ } else if (EltNo != ByteSource / 4) {
+ isFourElementShuffle = false;
+ break;
+ }
}
+ PFIndexes[i] = EltNo;
+ }
+
+ // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
+ // perfect shuffle vector to determine if it is cost effective to do this as
+ // discrete instructions, or whether we should use a vperm.
+ // For now, we skip this for little endian until such time as we have a
+ // little-endian perfect shuffle table.
+ if (isFourElementShuffle) {
+ // Compute the index in the perfect shuffle table.
+ unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
+ PFIndexes[2] * 9 + PFIndexes[3];
+
+ unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
+ unsigned Cost = (PFEntry >> 30);
+
+ // Determining when to avoid vperm is tricky. Many things affect the cost
+ // of vperm, particularly how many times the perm mask needs to be
+ // computed. For example, if the perm mask can be hoisted out of a loop or
+ // is already used (perhaps because there are multiple permutes with the
+ // same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
+ // permute mask out of the loop requires an extra register.
+ //
+ // As a compromise, we only emit discrete instructions if the shuffle can
+ // be generated in 3 or fewer operations. When we have loop information
+ // available, if this block is within a loop, we should avoid using vperm
+ // for 3-operation perms and use a constant pool load instead.
+ if (Cost < 3)
+ return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
}
- PFIndexes[i] = EltNo;
- }
-
- // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
- // perfect shuffle vector to determine if it is cost effective to do this as
- // discrete instructions, or whether we should use a vperm.
- // For now, we skip this for little endian until such time as we have a
- // little-endian perfect shuffle table.
- if (isFourElementShuffle && !isLittleEndian) {
- // Compute the index in the perfect shuffle table.
- unsigned PFTableIndex =
- PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
-
- unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
- unsigned Cost = (PFEntry >> 30);
-
- // Determining when to avoid vperm is tricky. Many things affect the cost
- // of vperm, particularly how many times the perm mask needs to be computed.
- // For example, if the perm mask can be hoisted out of a loop or is already
- // used (perhaps because there are multiple permutes with the same shuffle
- // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of
- // the loop requires an extra register.
- //
- // As a compromise, we only emit discrete instructions if the shuffle can be
- // generated in 3 or fewer operations. When we have loop information
- // available, if this block is within a loop, we should avoid using vperm
- // for 3-operation perms and use a constant pool load instead.
- if (Cost < 3)
- return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
}
// Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
@@ -10518,6 +10562,16 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
0);
}
+ case Intrinsic::ppc_fnmsub: {
+ EVT VT = Op.getOperand(1).getValueType();
+ if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
+ return DAG.getNode(
+ ISD::FNEG, dl, VT,
+ DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2),
+ DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3))));
+ return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1),
+ Op.getOperand(2), Op.getOperand(3));
+ }
case Intrinsic::ppc_convert_f128_to_ppcf128:
case Intrinsic::ppc_convert_ppcf128_to_f128: {
RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
@@ -10529,6 +10583,31 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
dl, SDValue());
return Result.first;
}
+ case Intrinsic::ppc_maxfe:
+ case Intrinsic::ppc_maxfl:
+ case Intrinsic::ppc_maxfs:
+ case Intrinsic::ppc_minfe:
+ case Intrinsic::ppc_minfl:
+ case Intrinsic::ppc_minfs: {
+ EVT VT = Op.getValueType();
+ assert(
+ all_of(Op->ops().drop_front(4),
+ [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
+ "ppc_[max|min]f[e|l|s] must have uniform type arguments");
+ (void)VT;
+ ISD::CondCode CC = ISD::SETGT;
+ if (IntrinsicID == Intrinsic::ppc_minfe ||
+ IntrinsicID == Intrinsic::ppc_minfl ||
+ IntrinsicID == Intrinsic::ppc_minfs)
+ CC = ISD::SETLT;
+ unsigned I = Op.getNumOperands() - 2, Cnt = I;
+ SDValue Res = Op.getOperand(I);
+ for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
+ Res =
+ DAG.getSelectCC(dl, Res, Op.getOperand(I), Res, Op.getOperand(I), CC);
+ }
+ return Res;
+ }
}
// If this is a lowered altivec predicate compare, CompareOpc is set to the
@@ -11055,6 +11134,12 @@ SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Wasn't expecting to be able to lower this!");
+ case ISD::FPOW: return lowerPow(Op, DAG);
+ case ISD::FSIN: return lowerSin(Op, DAG);
+ case ISD::FCOS: return lowerCos(Op, DAG);
+ case ISD::FLOG: return lowerLog(Op, DAG);
+ case ISD::FLOG10: return lowerLog10(Op, DAG);
+ case ISD::FEXP: return lowerExp(Op, DAG);
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
@@ -11183,6 +11268,9 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
N->getOperand(2), N->getOperand(1)));
break;
+ case Intrinsic::ppc_maxfe:
+ case Intrinsic::ppc_minfe:
+ case Intrinsic::ppc_fnmsub:
case Intrinsic::ppc_convert_f128_to_ppcf128:
Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
break;
@@ -14075,13 +14163,13 @@ static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
assert(LD1 && "Input needs to be a LoadSDNode.");
return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(),
LD1->getBasePtr(), LD1->getPointerInfo(),
- LD1->getAlignment());
+ LD1->getAlign());
}
if (InputsAreReverseConsecutive) {
assert(LDL && "Input needs to be a LoadSDNode.");
- SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(),
- LDL->getBasePtr(), LDL->getPointerInfo(),
- LDL->getAlignment());
+ SDValue Load =
+ DAG.getLoad(N->getValueType(0), dl, LDL->getChain(), LDL->getBasePtr(),
+ LDL->getPointerInfo(), LDL->getAlign());
SmallVector<int, 16> Ops;
for (int i = N->getNumOperands() - 1; i >= 0; i--)
Ops.push_back(i);
@@ -14469,6 +14557,11 @@ SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
// builtins) into loads with swaps.
SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
DAGCombinerInfo &DCI) const {
+ // Delay VSX load for LE combine until after LegalizeOps to prioritize other
+ // load combines.
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
SelectionDAG &DAG = DCI.DAG;
SDLoc dl(N);
SDValue Chain;
@@ -14503,13 +14596,6 @@ SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
MVT VecTy = N->getValueType(0).getSimpleVT();
- // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is
- // aligned and the type is a vector with elements up to 4 bytes
- if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
- VecTy.getScalarSizeInBits() <= 32) {
- return SDValue();
- }
-
SDValue LoadOps[] = { Chain, Base };
SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
DAG.getVTList(MVT::v2f64, MVT::Other),
@@ -14537,6 +14623,11 @@ SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
// builtins) into stores with swaps.
SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
DAGCombinerInfo &DCI) const {
+ // Delay VSX store for LE combine until after LegalizeOps to prioritize other
+ // store combines.
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
SelectionDAG &DAG = DCI.DAG;
SDLoc dl(N);
SDValue Chain;
@@ -14574,13 +14665,6 @@ SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
SDValue Src = N->getOperand(SrcOpnd);
MVT VecTy = Src.getValueType().getSimpleVT();
- // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is
- // aligned and the type is a vector with elements up to 4 bytes
- if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
- VecTy.getScalarSizeInBits() <= 32) {
- return SDValue();
- }
-
// All stores are done as v2f64 and possible bit cast.
if (VecTy != MVT::v2f64) {
Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
@@ -14806,6 +14890,17 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
SDValue SToVLHS = isScalarToVec(LHS);
SDValue SToVRHS = isScalarToVec(RHS);
if (SToVLHS || SToVRHS) {
+ // FIXME: If both LHS and RHS are SCALAR_TO_VECTOR, but are not the
+ // same type and have differing element sizes, then do not perform
+ // the following transformation. The current transformation for
+ // SCALAR_TO_VECTOR assumes that both input vectors have the same
+ // element size. This will be updated in the future to account for
+ // differing sizes of the LHS and RHS.
+ if (SToVLHS && SToVRHS &&
+ (SToVLHS.getValueType().getScalarSizeInBits() !=
+ SToVRHS.getValueType().getScalarSizeInBits()))
+ return Res;
+
int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
: SToVRHS.getValueType().getVectorNumElements();
int NumEltsOut = ShuffV.size();
@@ -14889,24 +14984,36 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
// Example (even elements from first vector):
// vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
if (Mask[0] < NumElts)
- for (int i = 1, e = Mask.size(); i < e; i += 2)
+ for (int i = 1, e = Mask.size(); i < e; i += 2) {
+ if (ShuffV[i] < 0)
+ continue;
ShuffV[i] = (ShuffV[i - 1] + NumElts);
+ }
// Example (odd elements from first vector):
// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
else
- for (int i = 0, e = Mask.size(); i < e; i += 2)
+ for (int i = 0, e = Mask.size(); i < e; i += 2) {
+ if (ShuffV[i] < 0)
+ continue;
ShuffV[i] = (ShuffV[i + 1] + NumElts);
+ }
} else {
// Example (even elements from first vector):
// vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
if (Mask[0] < NumElts)
- for (int i = 0, e = Mask.size(); i < e; i += 2)
+ for (int i = 0, e = Mask.size(); i < e; i += 2) {
+ if (ShuffV[i] < 0)
+ continue;
ShuffV[i] = ShuffV[i + 1] - NumElts;
+ }
// Example (odd elements from first vector):
// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
else
- for (int i = 1, e = Mask.size(); i < e; i += 2)
+ for (int i = 1, e = Mask.size(); i < e; i += 2) {
+ if (ShuffV[i] < 0)
+ continue;
ShuffV[i] = ShuffV[i - 1] - NumElts;
+ }
}
// If the RHS has undefs, we need to remove them since we may have created
@@ -15223,7 +15330,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
auto MMOFlags =
LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
- LD->getPointerInfo(), LD->getAlignment(),
+ LD->getPointerInfo(), LD->getAlign(),
MMOFlags, LD->getAAInfo());
SDValue AddPtr =
DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
@@ -15231,7 +15338,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
SDValue FloatLoad2 = DAG.getLoad(
MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
LD->getPointerInfo().getWithOffset(4),
- MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo());
+ commonAlignment(LD->getAlign(), 4), MMOFlags, LD->getAAInfo());
if (LD->isIndexed()) {
// Note that DAGCombine should re-form any pre-increment load(s) from
@@ -15544,7 +15651,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
return SDValue();
SDValue BasePtr = LD->getBasePtr();
SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
- LD->getPointerInfo(), LD->getAlignment());
+ LD->getPointerInfo(), LD->getAlign());
Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
DAG.getIntPtrConstant(4, dl));
@@ -17718,6 +17825,114 @@ bool PPCTargetLowering::splitValueIntoRegisterParts(
return false;
}
+SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
+ SelectionDAG &DAG) const {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ EVT RetVT = Op.getValueType();
+ Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+ SDValue Callee =
+ DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout()));
+ bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetVT, false);
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (const SDValue &N : Op->op_values()) {
+ EVT ArgVT = N.getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = N;
+ Entry.Ty = ArgTy;
+ Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgVT, SignExtend);
+ Entry.IsZExt = !Entry.IsSExt;
+ Args.push_back(Entry);
+ }
+
+ SDValue InChain = DAG.getEntryNode();
+ SDValue TCChain = InChain;
+ const Function &F = DAG.getMachineFunction().getFunction();
+ bool isTailCall =
+ TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
+ (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
+ if (isTailCall)
+ InChain = TCChain;
+ CLI.setDebugLoc(SDLoc(Op))
+ .setChain(InChain)
+ .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
+ .setTailCall(isTailCall)
+ .setSExtResult(SignExtend)
+ .setZExtResult(!SignExtend)
+ .setIsPostTypeLegalization(true);
+ return TLI.LowerCallTo(CLI).first;
+}
+
+SDValue PPCTargetLowering::lowerLibCallBasedOnType(
+ const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op,
+ SelectionDAG &DAG) const {
+ if (Op.getValueType() == MVT::f32)
+ return lowerToLibCall(LibCallFloatName, Op, DAG);
+
+ if (Op.getValueType() == MVT::f64)
+ return lowerToLibCall(LibCallDoubleName, Op, DAG);
+
+ return SDValue();
+}
+
+bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
+ SDNodeFlags Flags = Op.getNode()->getFlags();
+ return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
+ Flags.hasNoNaNs() && Flags.hasNoInfs();
+}
+
+bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
+ return Op.getNode()->getFlags().hasApproximateFuncs();
+}
+
+SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
+ const char *LibCallFloatName,
+ const char *LibCallDoubleNameFinite,
+ const char *LibCallFloatNameFinite,
+ SDValue Op,
+ SelectionDAG &DAG) const {
+ if (!isLowringToMASSSafe(Op))
+ return SDValue();
+
+ if (!isLowringToMASSFiniteSafe(Op))
+ return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
+ DAG);
+
+ return lowerLibCallBasedOnType(LibCallFloatNameFinite,
+ LibCallDoubleNameFinite, Op, DAG);
+}
+
+SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
+ return lowerLibCallBase("__xl_pow", "__xl_powf", "__xl_pow_finite",
+ "__xl_powf_finite", Op, DAG);
+}
+
+SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
+ return lowerLibCallBase("__xl_sin", "__xl_sinf", "__xl_sin_finite",
+ "__xl_sinf_finite", Op, DAG);
+}
+
+SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
+ return lowerLibCallBase("__xl_cos", "__xl_cosf", "__xl_cos_finite",
+ "__xl_cosf_finite", Op, DAG);
+}
+
+SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
+ return lowerLibCallBase("__xl_log", "__xl_logf", "__xl_log_finite",
+ "__xl_logf_finite", Op, DAG);
+}
+
+SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
+ return lowerLibCallBase("__xl_log10", "__xl_log10f", "__xl_log10_finite",
+ "__xl_log10f_finite", Op, DAG);
+}
+
+SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
+ return lowerLibCallBase("__xl_exp", "__xl_expf", "__xl_exp_finite",
+ "__xl_expf_finite", Op, DAG);
+}
+
// If we happen to match to an aligned D-Form, check if the Frame Index is
// adequately aligned. If it is not, reset the mode to match to X-Form.
static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
@@ -17878,10 +18093,18 @@ CCAssignFn *PPCTargetLowering::ccAssignFnForCall(CallingConv::ID CC,
}
}
+bool PPCTargetLowering::shouldInlineQuadwordAtomics() const {
+ // TODO: 16-byte atomic type support for AIX is in progress; we should be able
+ // to inline 16-byte atomic ops on AIX too in the future.
+ return Subtarget.isPPC64() &&
+ (EnableQuadwordAtomics || !Subtarget.getTargetTriple().isOSAIX()) &&
+ Subtarget.hasQuadwordAtomics();
+}
+
TargetLowering::AtomicExpansionKind
PPCTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
- if (EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() && Size == 128)
+ if (shouldInlineQuadwordAtomics() && Size == 128)
return AtomicExpansionKind::MaskedIntrinsic;
return TargetLowering::shouldExpandAtomicRMWInIR(AI);
}
@@ -17889,7 +18112,7 @@ PPCTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
TargetLowering::AtomicExpansionKind
PPCTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
- if (EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() && Size == 128)
+ if (shouldInlineQuadwordAtomics() && Size == 128)
return AtomicExpansionKind::MaskedIntrinsic;
return TargetLowering::shouldExpandAtomicCmpXchgInIR(AI);
}
@@ -17919,10 +18142,9 @@ getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp) {
Value *PPCTargetLowering::emitMaskedAtomicRMWIntrinsic(
IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
- assert(EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() &&
- "Only support quadword now");
+ assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
- Type *ValTy = AlignedAddr->getType()->getPointerElementType();
+ Type *ValTy = Incr->getType();
assert(ValTy->getPrimitiveSizeInBits() == 128);
Function *RMW = Intrinsic::getDeclaration(
M, getIntrinsicForAtomicRMWBinOp128(AI->getOperation()));
@@ -17944,10 +18166,9 @@ Value *PPCTargetLowering::emitMaskedAtomicRMWIntrinsic(
Value *PPCTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
- assert(EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() &&
- "Only support quadword now");
+ assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
- Type *ValTy = AlignedAddr->getType()->getPointerElementType();
+ Type *ValTy = CmpVal->getType();
assert(ValTy->getPrimitiveSizeInBits() == 128);
Function *IntCmpXchg =
Intrinsic::getDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index eb52e4aa6273..f92a117fe27f 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -51,9 +51,9 @@ namespace llvm {
///
FSEL,
- /// XSMAXCDP, XSMINCDP - C-type min/max instructions.
- XSMAXCDP,
- XSMINCDP,
+ /// XSMAXC[DQ]P, XSMINC[DQ]P - C-type min/max instructions.
+ XSMAXC,
+ XSMINC,
/// FCFID - The FCFID instruction, taking an f64 operand and producing
/// and f64 value containing the FP representation of the integer that
@@ -77,7 +77,7 @@ namespace llvm {
FCTIDUZ,
FCTIWUZ,
- /// Floating-point-to-interger conversion instructions
+ /// Floating-point-to-integer conversion instructions
FP_TO_UINT_IN_VSR,
FP_TO_SINT_IN_VSR,
@@ -765,8 +765,19 @@ namespace llvm {
/// then the VPERM for the shuffle. All in all a very slow sequence.
TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT)
const override {
- if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
- VT.getScalarSizeInBits() % 8 == 0)
+ // Default handling for scalable and single-element vectors.
+ if (VT.isScalableVector() || VT.getVectorNumElements() == 1)
+ return TargetLoweringBase::getPreferredVectorAction(VT);
+
+ // Split and promote vNi1 vectors so we don't produce v256i1/v512i1
+ // types as those are only for MMA instructions.
+ if (VT.getScalarSizeInBits() == 1 && VT.getSizeInBits() > 16)
+ return TypeSplitVector;
+ if (VT.getScalarSizeInBits() == 1)
+ return TypePromoteInteger;
+
+ // Widen vectors that have reasonably sized elements.
+ if (VT.getScalarSizeInBits() % 8 == 0)
return TypeWidenVector;
return TargetLoweringBase::getPreferredVectorAction(VT);
}
@@ -899,6 +910,8 @@ namespace llvm {
Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
AtomicOrdering Ord) const override;
+ bool shouldInlineQuadwordAtomics() const;
+
TargetLowering::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
@@ -1273,6 +1286,24 @@ namespace llvm {
SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerToLibCall(const char *LibCallName, SDValue Op,
+ SelectionDAG &DAG) const;
+ SDValue lowerLibCallBasedOnType(const char *LibCallFloatName,
+ const char *LibCallDoubleName, SDValue Op,
+ SelectionDAG &DAG) const;
+ bool isLowringToMASSFiniteSafe(SDValue Op) const;
+ bool isLowringToMASSSafe(SDValue Op) const;
+ SDValue lowerLibCallBase(const char *LibCallDoubleName,
+ const char *LibCallFloatName,
+ const char *LibCallDoubleNameFinite,
+ const char *LibCallFloatNameFinite, SDValue Op,
+ SelectionDAG &DAG) const;
+ SDValue lowerPow(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerSin(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerCos(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerLog(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerLog10(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerExp(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_LOAD_STORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index eae8e36e475e..dbe7a7805c61 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -580,6 +580,14 @@ def MTCTR8loop : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS),
PPC970_DGroup_First, PPC970_Unit_FXU;
}
+
+let hasSideEffects = 1, Defs = [CTR8] in
+def MTCTR8Pseudo : PPCEmitTimePseudo<(outs), (ins g8rc:$rS), "#MTCTR8Pseudo", []>;
+
+let hasSideEffects = 1, Uses = [CTR8], Defs = [CTR8] in
+def DecreaseCTR8Pseudo : PPCEmitTimePseudo<(outs crbitrc:$rT), (ins i64imm:$stride),
+ "#DecreaseCTR8Pseudo", []>;
+
let Pattern = [(set i64:$rT, readcyclecounter)] in
def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs g8rc:$rT), (ins),
"mfspr $rT, 268", IIC_SprMFTB>,
@@ -1014,8 +1022,6 @@ let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
def SETB8 : XForm_44<31, 128, (outs g8rc:$RT), (ins crrc:$BFA),
"setb $RT, $BFA", IIC_IntGeneral>, isPPC64;
}
-def DARN : XForm_45<31, 755, (outs g8rc:$RT), (ins u2imm:$L),
- "darn $RT, $L", IIC_LdStLD>, isPPC64;
def ADDPCIS : DXForm<19, 2, (outs g8rc:$RT), (ins i32imm:$D),
"addpcis $RT, $D", IIC_BrB, []>, isPPC64;
def MODSD : XForm_8<31, 777, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
@@ -1040,6 +1046,11 @@ def MULLI8 : DForm_2<7, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm),
[(set i64:$rD, (mul i64:$rA, imm64SExt16:$imm))]>;
}
+let hasSideEffects = 1 in {
+def DARN : XForm_45<31, 755, (outs g8rc:$RT), (ins u2imm:$L),
+ "darn $RT, $L", IIC_LdStLD>, isPPC64;
+}
+
let hasSideEffects = 0 in {
defm RLDIMI : MDForm_1r<30, 3, (outs g8rc:$rA),
(ins g8rc:$rSi, g8rc:$rS, u6imm:$SH, u6imm:$MBE),
@@ -1396,10 +1407,6 @@ def LDUX : XForm_1_memOp<31, 53, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
"ldux $rD, $addr", IIC_LdStLDUX,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">, isPPC64;
-
-def LDMX : XForm_1<31, 309, (outs g8rc:$rD), (ins memrr:$src),
- "ldmx $rD, $src", IIC_LdStLD, []>, isPPC64,
- Requires<[IsISA3_0]>;
}
let mayLoad = 1, hasNoSchedulingInfo = 1 in {
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index eada872c2a7d..59486c323567 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -2218,7 +2218,7 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
.addReg(Pred[1].getReg(), RegState::ImplicitDefine);
} else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
- MI.RemoveOperand(0);
+ MI.removeOperand(0);
MI.setDesc(get(PPC::BC));
MachineInstrBuilder(*MI.getParent()->getParent(), MI)
@@ -2226,7 +2226,7 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
.addMBB(MBB);
} else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
- MI.RemoveOperand(0);
+ MI.removeOperand(0);
MI.setDesc(get(PPC::BCn));
MachineInstrBuilder(*MI.getParent()->getParent(), MI)
@@ -2234,7 +2234,7 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
.addMBB(MBB);
} else {
MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
- MI.RemoveOperand(0);
+ MI.removeOperand(0);
MI.setDesc(get(PPC::BCC));
MachineInstrBuilder(*MI.getParent()->getParent(), MI)
@@ -2714,8 +2714,8 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
}
// If we've set the mask, we can transform.
if (Mask != ~0LLU) {
- MI->RemoveOperand(4);
- MI->RemoveOperand(3);
+ MI->removeOperand(4);
+ MI->removeOperand(3);
MI->getOperand(2).setImm(Mask);
NumRcRotatesConvertedToRcAnd++;
}
@@ -2724,7 +2724,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
if (MB >= 48) {
uint64_t Mask = (1LLU << (63 - MB + 1)) - 1;
NewOpC = PPC::ANDI8_rec;
- MI->RemoveOperand(3);
+ MI->removeOperand(3);
MI->getOperand(2).setImm(Mask);
NumRcRotatesConvertedToRcAnd++;
}
@@ -3026,8 +3026,8 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
}
case PPC::KILL_PAIR: {
MI.setDesc(get(PPC::UNENCODED_NOP));
- MI.RemoveOperand(1);
- MI.RemoveOperand(0);
+ MI.removeOperand(1);
+ MI.removeOperand(0);
return true;
}
case TargetOpcode::LOAD_STACK_GUARD: {
@@ -3122,7 +3122,7 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
.addReg(PPC::CR7)
.addImm(1);
MI.setDesc(get(PPC::ISYNC));
- MI.RemoveOperand(0);
+ MI.removeOperand(0);
return true;
}
}
@@ -3188,7 +3188,7 @@ void PPCInstrInfo::replaceInstrOperandWithImm(MachineInstr &MI,
// - implicit reg uses
// Therefore, removing the implicit operand won't change the explicit
// operands layout.
- MI.RemoveOperand(UseOpIdx);
+ MI.removeOperand(UseOpIdx);
}
}
@@ -3199,7 +3199,7 @@ void PPCInstrInfo::replaceInstrWithLI(MachineInstr &MI,
// Remove existing operands.
int OperandToKeep = LII.SetCR ? 1 : 0;
for (int i = MI.getNumOperands() - 1; i > OperandToKeep; i--)
- MI.RemoveOperand(i);
+ MI.removeOperand(i);
// Replace the instruction.
if (LII.SetCR) {
@@ -3234,6 +3234,47 @@ MachineInstr *PPCInstrInfo::getDefMIPostRA(unsigned Reg, MachineInstr &MI,
return nullptr;
}
+void PPCInstrInfo::materializeImmPostRA(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, Register Reg,
+ int64_t Imm) const {
+ assert(!MBB.getParent()->getRegInfo().isSSA() &&
+ "Register should be in non-SSA form after RA");
+ bool isPPC64 = Subtarget.isPPC64();
+ // FIXME: Materialization here is not optimal.
+ // For some special bit patterns we can use less instructions.
+ // See `selectI64ImmDirect` in PPCISelDAGToDAG.cpp.
+ if (isInt<16>(Imm)) {
+ BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::LI8 : PPC::LI), Reg).addImm(Imm);
+ } else if (isInt<32>(Imm)) {
+ BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::LIS8 : PPC::LIS), Reg)
+ .addImm(Imm >> 16);
+ if (Imm & 0xFFFF)
+ BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::ORI8 : PPC::ORI), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(Imm & 0xFFFF);
+ } else {
+ assert(isPPC64 && "Materializing 64-bit immediate to single register is "
+ "only supported in PPC64");
+ BuildMI(MBB, MBBI, DL, get(PPC::LIS8), Reg).addImm(Imm >> 48);
+ if ((Imm >> 32) & 0xFFFF)
+ BuildMI(MBB, MBBI, DL, get(PPC::ORI8), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm((Imm >> 32) & 0xFFFF);
+ BuildMI(MBB, MBBI, DL, get(PPC::RLDICR), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(32)
+ .addImm(31);
+ BuildMI(MBB, MBBI, DL, get(PPC::ORIS8), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm((Imm >> 16) & 0xFFFF);
+ if (Imm & 0xFFFF)
+ BuildMI(MBB, MBBI, DL, get(PPC::ORI8), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(Imm & 0xFFFF);
+ }
+}
+
MachineInstr *PPCInstrInfo::getForwardingDefMI(
MachineInstr &MI,
unsigned &OpNoForForwarding,
@@ -3790,15 +3831,15 @@ bool PPCInstrInfo::combineRLWINM(MachineInstr &MI,
if (MI.getOpcode() == PPC::RLWINM || MI.getOpcode() == PPC::RLWINM8) {
// Replace MI with "LI 0"
- MI.RemoveOperand(4);
- MI.RemoveOperand(3);
- MI.RemoveOperand(2);
+ MI.removeOperand(4);
+ MI.removeOperand(3);
+ MI.removeOperand(2);
MI.getOperand(1).ChangeToImmediate(0);
MI.setDesc(get(Is64Bit ? PPC::LI8 : PPC::LI));
} else {
// Replace MI with "ANDI_rec reg, 0"
- MI.RemoveOperand(4);
- MI.RemoveOperand(3);
+ MI.removeOperand(4);
+ MI.removeOperand(3);
MI.getOperand(2).setImm(0);
MI.setDesc(get(Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));
MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
@@ -4282,8 +4323,8 @@ static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2) {
unsigned MinOp = std::min(Op1, Op2);
MachineOperand MOp1 = MI.getOperand(MinOp);
MachineOperand MOp2 = MI.getOperand(MaxOp);
- MI.RemoveOperand(std::max(Op1, Op2));
- MI.RemoveOperand(std::min(Op1, Op2));
+ MI.removeOperand(std::max(Op1, Op2));
+ MI.removeOperand(std::min(Op1, Op2));
// If the operands we are swapping are the two at the end (the common case)
// we can just remove both and add them in the opposite order.
@@ -4297,7 +4338,7 @@ static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2) {
unsigned TotalOps = MI.getNumOperands() + 2; // We've already removed 2 ops.
for (unsigned i = MI.getNumOperands() - 1; i >= MinOp; i--) {
MOps.push_back(MI.getOperand(i));
- MI.RemoveOperand(i);
+ MI.removeOperand(i);
}
// MOp2 needs to be added next.
MI.addOperand(MOp2);
@@ -4532,8 +4573,8 @@ bool PPCInstrInfo::simplifyToLI(MachineInstr &MI, MachineInstr &DefMI,
if (RegToCopy == PPC::ZERO || RegToCopy == PPC::ZERO8) {
CompareUseMI.setDesc(get(UseOpc == PPC::ISEL8 ? PPC::LI8 : PPC::LI));
replaceInstrOperandWithImm(CompareUseMI, 1, 0);
- CompareUseMI.RemoveOperand(3);
- CompareUseMI.RemoveOperand(2);
+ CompareUseMI.removeOperand(3);
+ CompareUseMI.removeOperand(2);
continue;
}
LLVM_DEBUG(
@@ -4542,8 +4583,8 @@ bool PPCInstrInfo::simplifyToLI(MachineInstr &MI, MachineInstr &DefMI,
LLVM_DEBUG(dbgs() << "Is converted to:\n");
// Convert to copy and remove unneeded operands.
CompareUseMI.setDesc(get(PPC::COPY));
- CompareUseMI.RemoveOperand(3);
- CompareUseMI.RemoveOperand(RegToCopy == TrueReg ? 2 : 1);
+ CompareUseMI.removeOperand(3);
+ CompareUseMI.removeOperand(RegToCopy == TrueReg ? 2 : 1);
CmpIselsConverted++;
Changed = true;
LLVM_DEBUG(CompareUseMI.dump());
@@ -4887,7 +4928,7 @@ bool PPCInstrInfo::transformToImmFormFedByAdd(
SmallVector<MachineOperand, 2> MOps;
for (unsigned i = MI.getNumOperands() - 1; i >= III.ZeroIsSpecialOrig; i--) {
MOps.push_back(MI.getOperand(i));
- MI.RemoveOperand(i);
+ MI.removeOperand(i);
}
// Remove the last MO in the list, which is ZERO operand in fact.
@@ -5010,7 +5051,7 @@ bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
// just convert this to a COPY. Can't do this post-RA since we've already
// cleaned up the copies.
else if (!SetCR && ShAmt == 0 && !PostRA) {
- MI.RemoveOperand(2);
+ MI.removeOperand(2);
MI.setDesc(get(PPC::COPY));
} else {
// The 32 bit and 64 bit instructions are quite different.
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index c16e146da247..e22b0086bde8 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -295,6 +295,99 @@ public:
return get(Opcode).TSFlags & PPCII::Prefixed;
}
+ /// Check if Opcode corresponds to a call instruction that should be marked
+ /// with the NOTOC relocation.
+ bool isNoTOCCallInstr(unsigned Opcode) const {
+ if (!get(Opcode).isCall())
+ return false;
+
+ switch (Opcode) {
+ default:
+#ifndef NDEBUG
+ llvm_unreachable("Unknown call opcode");
+#endif
+ return false;
+ case PPC::BL8_NOTOC:
+ case PPC::BL8_NOTOC_TLS:
+ case PPC::BL8_NOTOC_RM:
+ return true;
+#ifndef NDEBUG
+ case PPC::BL8:
+ case PPC::BL:
+ case PPC::BL8_TLS:
+ case PPC::BL_TLS:
+ case PPC::BLA8:
+ case PPC::BLA:
+ case PPC::BCCL:
+ case PPC::BCCLA:
+ case PPC::BCL:
+ case PPC::BCLn:
+ case PPC::BL8_NOP:
+ case PPC::BL_NOP:
+ case PPC::BL8_NOP_TLS:
+ case PPC::BLA8_NOP:
+ case PPC::BCTRL8:
+ case PPC::BCTRL:
+ case PPC::BCCCTRL8:
+ case PPC::BCCCTRL:
+ case PPC::BCCTRL8:
+ case PPC::BCCTRL:
+ case PPC::BCCTRL8n:
+ case PPC::BCCTRLn:
+ case PPC::BL8_RM:
+ case PPC::BLA8_RM:
+ case PPC::BL8_NOP_RM:
+ case PPC::BLA8_NOP_RM:
+ case PPC::BCTRL8_RM:
+ case PPC::BCTRL8_LDinto_toc:
+ case PPC::BCTRL8_LDinto_toc_RM:
+ case PPC::BL8_TLS_:
+ case PPC::TCRETURNdi8:
+ case PPC::TCRETURNai8:
+ case PPC::TCRETURNri8:
+ case PPC::TAILBCTR8:
+ case PPC::TAILB8:
+ case PPC::TAILBA8:
+ case PPC::BCLalways:
+ case PPC::BLRL:
+ case PPC::BCCLRL:
+ case PPC::BCLRL:
+ case PPC::BCLRLn:
+ case PPC::BDZL:
+ case PPC::BDNZL:
+ case PPC::BDZLA:
+ case PPC::BDNZLA:
+ case PPC::BDZLp:
+ case PPC::BDNZLp:
+ case PPC::BDZLAp:
+ case PPC::BDNZLAp:
+ case PPC::BDZLm:
+ case PPC::BDNZLm:
+ case PPC::BDZLAm:
+ case PPC::BDNZLAm:
+ case PPC::BDZLRL:
+ case PPC::BDNZLRL:
+ case PPC::BDZLRLp:
+ case PPC::BDNZLRLp:
+ case PPC::BDZLRLm:
+ case PPC::BDNZLRLm:
+ case PPC::BL_RM:
+ case PPC::BLA_RM:
+ case PPC::BL_NOP_RM:
+ case PPC::BCTRL_RM:
+ case PPC::TCRETURNdi:
+ case PPC::TCRETURNai:
+ case PPC::TCRETURNri:
+ case PPC::BCTRL_LWZinto_toc:
+ case PPC::BCTRL_LWZinto_toc_RM:
+ case PPC::TAILBCTR:
+ case PPC::TAILB:
+ case PPC::TAILBA:
+ return false;
+#endif
+ }
+ }
+
static bool isSameClassPhysRegCopy(unsigned Opcode) {
unsigned CopyOpcodes[] = {PPC::OR, PPC::OR8, PPC::FMR,
PPC::VOR, PPC::XXLOR, PPC::XXLORf,
@@ -653,6 +746,12 @@ public:
MachineInstr *getDefMIPostRA(unsigned Reg, MachineInstr &MI,
bool &SeenIntermediateUse) const;
+ // Materialize immediate after RA.
+ void materializeImmPostRA(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, Register Reg,
+ int64_t Imm) const;
+
/// getRegNumForOperand - some operands use different numbering schemes
/// for the same registers. For example, a VSX instruction may have any of
/// vs0-vs63 allocated whereas an Altivec instruction could only have
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index c26b4f6ceb7d..f651b51d2684 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -198,8 +198,8 @@ def PPCfsel : SDNode<"PPCISD::FSEL",
// Type constraint for fsel.
SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,
SDTCisFP<0>, SDTCisVT<1, f64>]>, []>;
-def PPCxsmaxc : SDNode<"PPCISD::XSMAXCDP", SDT_PPCFPMinMax, []>;
-def PPCxsminc : SDNode<"PPCISD::XSMINCDP", SDT_PPCFPMinMax, []>;
+def PPCxsmaxc : SDNode<"PPCISD::XSMAXC", SDT_PPCFPMinMax, []>;
+def PPCxsminc : SDNode<"PPCISD::XSMINC", SDT_PPCFPMinMax, []>;
def PPChi : SDNode<"PPCISD::Hi", SDTIntBinOp, []>;
def PPClo : SDNode<"PPCISD::Lo", SDTIntBinOp, []>;
def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp,
@@ -633,514 +633,6 @@ class NoEncode<string E> {
}
-//===----------------------------------------------------------------------===//
-// PowerPC Operand Definitions.
-
-// In the default PowerPC assembler syntax, registers are specified simply
-// by number, so they cannot be distinguished from immediate values (without
-// looking at the opcode). This means that the default operand matching logic
-// for the asm parser does not work, and we need to specify custom matchers.
-// Since those can only be specified with RegisterOperand classes and not
-// directly on the RegisterClass, all instructions patterns used by the asm
-// parser need to use a RegisterOperand (instead of a RegisterClass) for
-// all their register operands.
-// For this purpose, we define one RegisterOperand for each RegisterClass,
-// using the same name as the class, just in lower case.
-
-def PPCRegGPRCAsmOperand : AsmOperandClass {
- let Name = "RegGPRC"; let PredicateMethod = "isRegNumber";
-}
-def gprc : RegisterOperand<GPRC> {
- let ParserMatchClass = PPCRegGPRCAsmOperand;
-}
-def PPCRegG8RCAsmOperand : AsmOperandClass {
- let Name = "RegG8RC"; let PredicateMethod = "isRegNumber";
-}
-def g8rc : RegisterOperand<G8RC> {
- let ParserMatchClass = PPCRegG8RCAsmOperand;
-}
-def PPCRegG8pRCAsmOperand : AsmOperandClass {
- let Name = "RegG8pRC"; let PredicateMethod = "isEvenRegNumber";
-}
-def g8prc : RegisterOperand<G8pRC> {
- let ParserMatchClass = PPCRegG8pRCAsmOperand;
-}
-def PPCRegGPRCNoR0AsmOperand : AsmOperandClass {
- let Name = "RegGPRCNoR0"; let PredicateMethod = "isRegNumber";
-}
-def gprc_nor0 : RegisterOperand<GPRC_NOR0> {
- let ParserMatchClass = PPCRegGPRCNoR0AsmOperand;
-}
-def PPCRegG8RCNoX0AsmOperand : AsmOperandClass {
- let Name = "RegG8RCNoX0"; let PredicateMethod = "isRegNumber";
-}
-def g8rc_nox0 : RegisterOperand<G8RC_NOX0> {
- let ParserMatchClass = PPCRegG8RCNoX0AsmOperand;
-}
-def PPCRegF8RCAsmOperand : AsmOperandClass {
- let Name = "RegF8RC"; let PredicateMethod = "isRegNumber";
-}
-def f8rc : RegisterOperand<F8RC> {
- let ParserMatchClass = PPCRegF8RCAsmOperand;
-}
-def PPCRegF4RCAsmOperand : AsmOperandClass {
- let Name = "RegF4RC"; let PredicateMethod = "isRegNumber";
-}
-def f4rc : RegisterOperand<F4RC> {
- let ParserMatchClass = PPCRegF4RCAsmOperand;
-}
-def PPCRegVRRCAsmOperand : AsmOperandClass {
- let Name = "RegVRRC"; let PredicateMethod = "isRegNumber";
-}
-def vrrc : RegisterOperand<VRRC> {
- let ParserMatchClass = PPCRegVRRCAsmOperand;
-}
-def PPCRegVFRCAsmOperand : AsmOperandClass {
- let Name = "RegVFRC"; let PredicateMethod = "isRegNumber";
-}
-def vfrc : RegisterOperand<VFRC> {
- let ParserMatchClass = PPCRegVFRCAsmOperand;
-}
-def PPCRegCRBITRCAsmOperand : AsmOperandClass {
- let Name = "RegCRBITRC"; let PredicateMethod = "isCRBitNumber";
-}
-def crbitrc : RegisterOperand<CRBITRC> {
- let ParserMatchClass = PPCRegCRBITRCAsmOperand;
-}
-def PPCRegCRRCAsmOperand : AsmOperandClass {
- let Name = "RegCRRC"; let PredicateMethod = "isCCRegNumber";
-}
-def crrc : RegisterOperand<CRRC> {
- let ParserMatchClass = PPCRegCRRCAsmOperand;
-}
-def PPCRegSPERCAsmOperand : AsmOperandClass {
- let Name = "RegSPERC"; let PredicateMethod = "isRegNumber";
-}
-def sperc : RegisterOperand<SPERC> {
- let ParserMatchClass = PPCRegSPERCAsmOperand;
-}
-def PPCRegSPE4RCAsmOperand : AsmOperandClass {
- let Name = "RegSPE4RC"; let PredicateMethod = "isRegNumber";
-}
-def spe4rc : RegisterOperand<GPRC> {
- let ParserMatchClass = PPCRegSPE4RCAsmOperand;
-}
-
-def PPCU1ImmAsmOperand : AsmOperandClass {
- let Name = "U1Imm"; let PredicateMethod = "isU1Imm";
- let RenderMethod = "addImmOperands";
-}
-def u1imm : Operand<i32> {
- let PrintMethod = "printU1ImmOperand";
- let ParserMatchClass = PPCU1ImmAsmOperand;
- let OperandType = "OPERAND_IMMEDIATE";
-}
-
-def PPCU2ImmAsmOperand : AsmOperandClass {
- let Name = "U2Imm"; let PredicateMethod = "isU2Imm";
- let RenderMethod = "addImmOperands";
-}
-def u2imm : Operand<i32> {
- let PrintMethod = "printU2ImmOperand";
- let ParserMatchClass = PPCU2ImmAsmOperand;
- let OperandType = "OPERAND_IMMEDIATE";
-}
-
-def PPCATBitsAsHintAsmOperand : AsmOperandClass {
- let Name = "ATBitsAsHint"; let PredicateMethod = "isATBitsAsHint";
- let RenderMethod = "addImmOperands"; // Irrelevant, predicate always fails.
-}
-def atimm : Operand<i32> {
- let PrintMethod = "printATBitsAsHint";
- let ParserMatchClass = PPCATBitsAsHintAsmOperand;
- let OperandType = "OPERAND_IMMEDIATE";
-}
-
-def PPCU3ImmAsmOperand : AsmOperandClass {
- let Name = "U3Imm"; let PredicateMethod = "isU3Imm";
- let RenderMethod = "addImmOperands";
-}
-def u3imm : Operand<i32> {
- let PrintMethod = "printU3ImmOperand";
- let ParserMatchClass = PPCU3ImmAsmOperand;
- let OperandType = "OPERAND_IMMEDIATE";
-}
-
-def PPCU4ImmAsmOperand : AsmOperandClass {
- let Name = "U4Imm"; let PredicateMethod = "isU4Imm";
- let RenderMethod = "addImmOperands";
-}
-def u4imm : Operand<i32> {
- let PrintMethod = "printU4ImmOperand";
- let ParserMatchClass = PPCU4ImmAsmOperand;
- let OperandType = "OPERAND_IMMEDIATE";
-}
-def PPCS5ImmAsmOperand : AsmOperandClass {
- let Name = "S5Imm"; let PredicateMethod = "isS5Imm";
- let RenderMethod = "addImmOperands";
-}
-def s5imm : Operand<i32> {
- let PrintMethod = "printS5ImmOperand";
- let ParserMatchClass = PPCS5ImmAsmOperand;
- let DecoderMethod = "decodeSImmOperand<5>";
- let OperandType = "OPERAND_IMMEDIATE";
-}
-def PPCU5ImmAsmOperand : AsmOperandClass {
- let Name = "U5Imm"; let PredicateMethod = "isU5Imm";
- let RenderMethod = "addImmOperands";
-}
-def u5imm : Operand<i32> {
- let PrintMethod = "printU5ImmOperand";
- let ParserMatchClass = PPCU5ImmAsmOperand;
- let DecoderMethod = "decodeUImmOperand<5>";
- let OperandType = "OPERAND_IMMEDIATE";
-}
-def PPCU6ImmAsmOperand : AsmOperandClass {
- let Name = "U6Imm"; let PredicateMethod = "isU6Imm";
- let RenderMethod = "addImmOperands";
-}
-def u6imm : Operand<i32> {
- let PrintMethod = "printU6ImmOperand";
- let ParserMatchClass = PPCU6ImmAsmOperand;
- let DecoderMethod = "decodeUImmOperand<6>";
- let OperandType = "OPERAND_IMMEDIATE";
-}
-def PPCU7ImmAsmOperand : AsmOperandClass {
- let Name = "U7Imm"; let PredicateMethod = "isU7Imm";
- let RenderMethod = "addImmOperands";
-}
-def u7imm : Operand<i32> {
- let PrintMethod = "printU7ImmOperand";
- let ParserMatchClass = PPCU7ImmAsmOperand;
- let DecoderMethod = "decodeUImmOperand<7>";
- let OperandType = "OPERAND_IMMEDIATE";
-}
-def PPCU8ImmAsmOperand : AsmOperandClass {
- let Name = "U8Imm"; let PredicateMethod = "isU8Imm";
- let RenderMethod = "addImmOperands";
-}
-def u8imm : Operand<i32> {
- let PrintMethod = "printU8ImmOperand";
- let ParserMatchClass = PPCU8ImmAsmOperand;
- let DecoderMethod = "decodeUImmOperand<8>";
- let OperandType = "OPERAND_IMMEDIATE";
-}
-def PPCU10ImmAsmOperand : AsmOperandClass {
- let Name = "U10Imm"; let PredicateMethod = "isU10Imm";
- let RenderMethod = "addImmOperands";
-}
-def u10imm : Operand<i32> {
- let PrintMethod = "printU10ImmOperand";
- let ParserMatchClass = PPCU10ImmAsmOperand;
- let DecoderMethod = "decodeUImmOperand<10>";
- let OperandType = "OPERAND_IMMEDIATE";
-}
-def PPCU12ImmAsmOperand : AsmOperandClass {
- let Name = "U12Imm"; let PredicateMethod = "isU12Imm";
- let RenderMethod = "addImmOperands";
-}
-def u12imm : Operand<i32> {
- let PrintMethod = "printU12ImmOperand";
- let ParserMatchClass = PPCU12ImmAsmOperand;
- let DecoderMethod = "decodeUImmOperand<12>";
- let OperandType = "OPERAND_IMMEDIATE";
-}
-def PPCS16ImmAsmOperand : AsmOperandClass {
- let Name = "S16Imm"; let PredicateMethod = "isS16Imm";
- let RenderMethod = "addS16ImmOperands";
-}
-def s16imm : Operand<i32> {
- let PrintMethod = "printS16ImmOperand";
- let EncoderMethod = "getImm16Encoding";
- let ParserMatchClass = PPCS16ImmAsmOperand;
- let DecoderMethod = "decodeSImmOperand<16>";
- let OperandType = "OPERAND_IMMEDIATE";
-}
-def PPCU16ImmAsmOperand : AsmOperandClass {
- let Name = "U16Imm"; let PredicateMethod = "isU16Imm";
- let RenderMethod = "addU16ImmOperands";
-}
-def u16imm : Operand<i32> {
- let PrintMethod = "printU16ImmOperand";
- let EncoderMethod = "getImm16Encoding";
- let ParserMatchClass = PPCU16ImmAsmOperand;
- let DecoderMethod = "decodeUImmOperand<16>";
- let OperandType = "OPERAND_IMMEDIATE";
-}
-def PPCS17ImmAsmOperand : AsmOperandClass {
- let Name = "S17Imm"; let PredicateMethod = "isS17Imm";
- let RenderMethod = "addS16ImmOperands";
-}
-def s17imm : Operand<i32> {
- // This operand type is used for addis/lis to allow the assembler parser
- // to accept immediates in the range -65536..65535 for compatibility with
- // the GNU assembler. The operand is treated as 16-bit otherwise.
- let PrintMethod = "printS16ImmOperand";
- let EncoderMethod = "getImm16Encoding";
- let ParserMatchClass = PPCS17ImmAsmOperand;
- let DecoderMethod = "decodeSImmOperand<16>";
- let OperandType = "OPERAND_IMMEDIATE";
-}
-def PPCS34ImmAsmOperand : AsmOperandClass {
- let Name = "S34Imm";
- let PredicateMethod = "isS34Imm";
- let RenderMethod = "addImmOperands";
-}
-def s34imm : Operand<i64> {
- let PrintMethod = "printS34ImmOperand";
- let EncoderMethod = "getImm34EncodingNoPCRel";
- let ParserMatchClass = PPCS34ImmAsmOperand;
- let DecoderMethod = "decodeSImmOperand<34>";
- let OperandType = "OPERAND_IMMEDIATE";
-}
-def s34imm_pcrel : Operand<i64> {
- let PrintMethod = "printS34ImmOperand";
- let EncoderMethod = "getImm34EncodingPCRel";
- let ParserMatchClass = PPCS34ImmAsmOperand;
- let DecoderMethod = "decodeSImmOperand<34>";
- let OperandType = "OPERAND_IMMEDIATE";
-}
-def PPCImmZeroAsmOperand : AsmOperandClass {
- let Name = "ImmZero";
- let PredicateMethod = "isImmZero";
- let RenderMethod = "addImmOperands";
-}
-def immZero : Operand<i32> {
- let PrintMethod = "printImmZeroOperand";
- let ParserMatchClass = PPCImmZeroAsmOperand;
- let DecoderMethod = "decodeImmZeroOperand";
- let OperandType = "OPERAND_IMMEDIATE";
-}
-
-def fpimm0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(+0.0); }]>;
-
-def PPCDirectBrAsmOperand : AsmOperandClass {
- let Name = "DirectBr"; let PredicateMethod = "isDirectBr";
- let RenderMethod = "addBranchTargetOperands";
-}
-def directbrtarget : Operand<OtherVT> {
- let PrintMethod = "printBranchOperand";
- let EncoderMethod = "getDirectBrEncoding";
- let DecoderMethod = "decodeDirectBrTarget";
- let ParserMatchClass = PPCDirectBrAsmOperand;
- let OperandType = "OPERAND_PCREL";
-}
-def absdirectbrtarget : Operand<OtherVT> {
- let PrintMethod = "printAbsBranchOperand";
- let EncoderMethod = "getAbsDirectBrEncoding";
- let ParserMatchClass = PPCDirectBrAsmOperand;
-}
-def PPCCondBrAsmOperand : AsmOperandClass {
- let Name = "CondBr"; let PredicateMethod = "isCondBr";
- let RenderMethod = "addBranchTargetOperands";
-}
-def condbrtarget : Operand<OtherVT> {
- let PrintMethod = "printBranchOperand";
- let EncoderMethod = "getCondBrEncoding";
- let DecoderMethod = "decodeCondBrTarget";
- let ParserMatchClass = PPCCondBrAsmOperand;
- let OperandType = "OPERAND_PCREL";
-}
-def abscondbrtarget : Operand<OtherVT> {
- let PrintMethod = "printAbsBranchOperand";
- let EncoderMethod = "getAbsCondBrEncoding";
- let ParserMatchClass = PPCCondBrAsmOperand;
-}
-def calltarget : Operand<iPTR> {
- let PrintMethod = "printBranchOperand";
- let EncoderMethod = "getDirectBrEncoding";
- let DecoderMethod = "decodeDirectBrTarget";
- let ParserMatchClass = PPCDirectBrAsmOperand;
- let OperandType = "OPERAND_PCREL";
-}
-def abscalltarget : Operand<iPTR> {
- let PrintMethod = "printAbsBranchOperand";
- let EncoderMethod = "getAbsDirectBrEncoding";
- let ParserMatchClass = PPCDirectBrAsmOperand;
-}
-def PPCCRBitMaskOperand : AsmOperandClass {
- let Name = "CRBitMask"; let PredicateMethod = "isCRBitMask";
-}
-def crbitm: Operand<i8> {
- let PrintMethod = "printcrbitm";
- let EncoderMethod = "get_crbitm_encoding";
- let DecoderMethod = "decodeCRBitMOperand";
- let ParserMatchClass = PPCCRBitMaskOperand;
-}
-// Address operands
-// A version of ptr_rc which excludes R0 (or X0 in 64-bit mode).
-def PPCRegGxRCNoR0Operand : AsmOperandClass {
- let Name = "RegGxRCNoR0"; let PredicateMethod = "isRegNumber";
-}
-def ptr_rc_nor0 : Operand<iPTR>, PointerLikeRegClass<1> {
- let ParserMatchClass = PPCRegGxRCNoR0Operand;
-}
-
-// New addressing modes with 34 bit immediates.
-def PPCDispRI34Operand : AsmOperandClass {
- let Name = "DispRI34"; let PredicateMethod = "isS34Imm";
- let RenderMethod = "addImmOperands";
-}
-def dispRI34 : Operand<iPTR> {
- let ParserMatchClass = PPCDispRI34Operand;
-}
-def memri34 : Operand<iPTR> { // memri, imm is a 34-bit value.
- let PrintMethod = "printMemRegImm34";
- let MIOperandInfo = (ops dispRI34:$imm, ptr_rc_nor0:$reg);
- let EncoderMethod = "getMemRI34Encoding";
- let DecoderMethod = "decodeMemRI34Operands";
-}
-// memri, imm is a 34-bit value for pc-relative instructions where
-// base register is set to zero.
-def memri34_pcrel : Operand<iPTR> { // memri, imm is a 34-bit value.
- let PrintMethod = "printMemRegImm34PCRel";
- let MIOperandInfo = (ops dispRI34:$imm, immZero:$reg);
- let EncoderMethod = "getMemRI34PCRelEncoding";
- let DecoderMethod = "decodeMemRI34PCRelOperands";
-}
-
-// A version of ptr_rc usable with the asm parser.
-def PPCRegGxRCOperand : AsmOperandClass {
- let Name = "RegGxRC"; let PredicateMethod = "isRegNumber";
-}
-def ptr_rc_idx : Operand<iPTR>, PointerLikeRegClass<0> {
- let ParserMatchClass = PPCRegGxRCOperand;
-}
-
-def PPCDispRIOperand : AsmOperandClass {
- let Name = "DispRI"; let PredicateMethod = "isS16Imm";
- let RenderMethod = "addS16ImmOperands";
-}
-def dispRI : Operand<iPTR> {
- let ParserMatchClass = PPCDispRIOperand;
-}
-def PPCDispRIXOperand : AsmOperandClass {
- let Name = "DispRIX"; let PredicateMethod = "isS16ImmX4";
- let RenderMethod = "addImmOperands";
-}
-def dispRIX : Operand<iPTR> {
- let ParserMatchClass = PPCDispRIXOperand;
-}
-def PPCDispRIHashOperand : AsmOperandClass {
- let Name = "DispRIHash"; let PredicateMethod = "isHashImmX8";
- let RenderMethod = "addImmOperands";
-}
-def dispRIHash : Operand<iPTR> {
- let ParserMatchClass = PPCDispRIHashOperand;
-}
-def PPCDispRIX16Operand : AsmOperandClass {
- let Name = "DispRIX16"; let PredicateMethod = "isS16ImmX16";
- let RenderMethod = "addImmOperands";
-}
-def dispRIX16 : Operand<iPTR> {
- let ParserMatchClass = PPCDispRIX16Operand;
-}
-def PPCDispSPE8Operand : AsmOperandClass {
- let Name = "DispSPE8"; let PredicateMethod = "isU8ImmX8";
- let RenderMethod = "addImmOperands";
-}
-def dispSPE8 : Operand<iPTR> {
- let ParserMatchClass = PPCDispSPE8Operand;
-}
-def PPCDispSPE4Operand : AsmOperandClass {
- let Name = "DispSPE4"; let PredicateMethod = "isU7ImmX4";
- let RenderMethod = "addImmOperands";
-}
-def dispSPE4 : Operand<iPTR> {
- let ParserMatchClass = PPCDispSPE4Operand;
-}
-def PPCDispSPE2Operand : AsmOperandClass {
- let Name = "DispSPE2"; let PredicateMethod = "isU6ImmX2";
- let RenderMethod = "addImmOperands";
-}
-def dispSPE2 : Operand<iPTR> {
- let ParserMatchClass = PPCDispSPE2Operand;
-}
-
-def memri : Operand<iPTR> {
- let PrintMethod = "printMemRegImm";
- let MIOperandInfo = (ops dispRI:$imm, ptr_rc_nor0:$reg);
- let EncoderMethod = "getMemRIEncoding";
- let DecoderMethod = "decodeMemRIOperands";
- let OperandType = "OPERAND_MEMORY";
-}
-def memrr : Operand<iPTR> {
- let PrintMethod = "printMemRegReg";
- let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg, ptr_rc_idx:$offreg);
- let OperandType = "OPERAND_MEMORY";
-}
-def memrix : Operand<iPTR> { // memri where the imm is 4-aligned.
- let PrintMethod = "printMemRegImm";
- let MIOperandInfo = (ops dispRIX:$imm, ptr_rc_nor0:$reg);
- let EncoderMethod = "getMemRIXEncoding";
- let DecoderMethod = "decodeMemRIXOperands";
- let OperandType = "OPERAND_MEMORY";
-}
-def memrihash : Operand<iPTR> {
- // memrihash 8-aligned for ROP Protection Instructions.
- let PrintMethod = "printMemRegImmHash";
- let MIOperandInfo = (ops dispRIHash:$imm, ptr_rc_nor0:$reg);
- let EncoderMethod = "getMemRIHashEncoding";
- let DecoderMethod = "decodeMemRIHashOperands";
- let OperandType = "OPERAND_MEMORY";
-}
-def memrix16 : Operand<iPTR> { // memri, imm is 16-aligned, 12-bit, Inst{16:27}
- let PrintMethod = "printMemRegImm";
- let MIOperandInfo = (ops dispRIX16:$imm, ptr_rc_nor0:$reg);
- let EncoderMethod = "getMemRIX16Encoding";
- let DecoderMethod = "decodeMemRIX16Operands";
- let OperandType = "OPERAND_MEMORY";
-}
-def spe8dis : Operand<iPTR> { // SPE displacement where the imm is 8-aligned.
- let PrintMethod = "printMemRegImm";
- let MIOperandInfo = (ops dispSPE8:$imm, ptr_rc_nor0:$reg);
- let EncoderMethod = "getSPE8DisEncoding";
- let DecoderMethod = "decodeSPE8Operands";
- let OperandType = "OPERAND_MEMORY";
-}
-def spe4dis : Operand<iPTR> { // SPE displacement where the imm is 4-aligned.
- let PrintMethod = "printMemRegImm";
- let MIOperandInfo = (ops dispSPE4:$imm, ptr_rc_nor0:$reg);
- let EncoderMethod = "getSPE4DisEncoding";
- let DecoderMethod = "decodeSPE4Operands";
- let OperandType = "OPERAND_MEMORY";
-}
-def spe2dis : Operand<iPTR> { // SPE displacement where the imm is 2-aligned.
- let PrintMethod = "printMemRegImm";
- let MIOperandInfo = (ops dispSPE2:$imm, ptr_rc_nor0:$reg);
- let EncoderMethod = "getSPE2DisEncoding";
- let DecoderMethod = "decodeSPE2Operands";
- let OperandType = "OPERAND_MEMORY";
-}
-
-// A single-register address. This is used with the SjLj
-// pseudo-instructions which translates to LD/LWZ. These instructions requires
-// G8RC_NOX0 registers.
-def memr : Operand<iPTR> {
- let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg);
- let OperandType = "OPERAND_MEMORY";
-}
-def PPCTLSRegOperand : AsmOperandClass {
- let Name = "TLSReg"; let PredicateMethod = "isTLSReg";
- let RenderMethod = "addTLSRegOperands";
-}
-def tlsreg32 : Operand<i32> {
- let EncoderMethod = "getTLSRegEncoding";
- let ParserMatchClass = PPCTLSRegOperand;
-}
-def tlsgd32 : Operand<i32> {}
-def tlscall32 : Operand<i32> {
- let PrintMethod = "printTLSCall";
- let MIOperandInfo = (ops calltarget:$func, tlsgd32:$sym);
- let EncoderMethod = "getTLSCallEncoding";
-}
-
-// PowerPC Predicate operand.
-def pred : Operand<OtherVT> {
- let PrintMethod = "printPredicateOperand";
- let MIOperandInfo = (ops i32imm:$bibo, crrc:$reg);
-}
-
// Define PowerPC specific addressing mode.
// d-form
@@ -1212,6 +704,7 @@ def ModernAs: Predicate<"!Subtarget->isAIXABI() || Subtarget->HasModernAIXAs">,
AssemblerPredicate<(any_of (not AIXOS), FeatureModernAIXAs)>;
def IsAIX : Predicate<"Subtarget->isAIXABI()">;
def NotAIX : Predicate<"!Subtarget->isAIXABI()">;
+def IsISAFuture : Predicate<"Subtarget->isISAFuture()">;
//===----------------------------------------------------------------------===//
// PowerPC Multiclass Definitions.
@@ -3056,6 +2549,13 @@ def MTCTRloop : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS),
PPC970_DGroup_First, PPC970_Unit_FXU;
}
+let hasSideEffects = 1, Defs = [CTR] in
+def MTCTRPseudo : PPCEmitTimePseudo<(outs), (ins gprc:$rS), "#MTCTRPseudo", []>;
+
+let hasSideEffects = 1, Uses = [CTR], Defs = [CTR] in
+def DecreaseCTRPseudo : PPCEmitTimePseudo<(outs crbitrc:$rT), (ins i32imm:$stride),
+ "#DecreaseCTRPseudo", []>;
+
let hasSideEffects = 0 in {
let Defs = [LR] in {
def MTLR : XFXForm_7_ext<31, 467, 8, (outs), (ins gprc:$rS),
@@ -3069,6 +2569,22 @@ def MFLR : XFXForm_1_ext<31, 339, 8, (outs gprc:$rT), (ins),
}
}
+let hasSideEffects = 1 in {
+ def MTUDSCR : XFXForm_7_ext<31, 467, 3, (outs), (ins gprc:$rX),
+ "mtspr 3, $rX", IIC_SprMTSPR>,
+ PPC970_DGroup_Single, PPC970_Unit_FXU;
+ def MFUDSCR : XFXForm_1_ext<31, 339, 3, (outs gprc:$rX), (ins),
+ "mfspr $rX, 3", IIC_SprMFSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+// Disable these alias on AIX since they are not supported.
+let Predicates = [ModernAs] in {
+// Aliases for moving to/from dscr to mtspr/mfspr
+def : InstAlias<"mtudscr $Rx", (MTUDSCR gprc:$Rx)>;
+def : InstAlias<"mfudscr $Rx", (MFUDSCR gprc:$Rx)>;
+}
+
let isCodeGenOnly = 1 in {
// Move to/from VRSAVE: despite being a SPR, the VRSAVE register is renamed
// like a GPR on the PPC970. As such, copies in and out have the same
@@ -3728,12 +3244,12 @@ def : Pat<(fcopysign f32:$frB, f64:$frA),
// XL Compat intrinsics.
def : Pat<(int_ppc_fmsub f64:$A, f64:$B, f64:$C), (FMSUB $A, $B, $C)>;
def : Pat<(int_ppc_fmsubs f32:$A, f32:$B, f32:$C), (FMSUBS $A, $B, $C)>;
-def : Pat<(int_ppc_fnmsub f64:$A, f64:$B, f64:$C), (FNMSUB $A, $B, $C)>;
-def : Pat<(int_ppc_fnmsubs f32:$A, f32:$B, f32:$C), (FNMSUBS $A, $B, $C)>;
def : Pat<(int_ppc_fnmadd f64:$A, f64:$B, f64:$C), (FNMADD $A, $B, $C)>;
def : Pat<(int_ppc_fnmadds f32:$A, f32:$B, f32:$C), (FNMADDS $A, $B, $C)>;
def : Pat<(int_ppc_fre f64:$A), (FRE $A)>;
def : Pat<(int_ppc_fres f32:$A), (FRES $A)>;
+def : Pat<(int_ppc_fnabs f64:$A), (FNABSD $A)>;
+def : Pat<(int_ppc_fnabss f32:$A), (FNABSS $A)>;
include "PPCInstrAltivec.td"
include "PPCInstrSPE.td"
@@ -3748,7 +3264,8 @@ def : Pat<(not i1:$in),
// Prefixed instructions may require access to the above defs at a later
// time so we include this after the def.
-include "PPCInstrPrefix.td"
+include "PPCInstrP10.td"
+include "PPCInstrMMA.td"
// Patterns for arithmetic i1 operations.
def : Pat<(add i1:$a, i1:$b),
diff --git a/llvm/lib/Target/PowerPC/PPCInstrMMA.td b/llvm/lib/Target/PowerPC/PPCInstrMMA.td
new file mode 100644
index 000000000000..a7e85cda781f
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/PPCInstrMMA.td
@@ -0,0 +1,628 @@
+
+// Mask immediates for MMA instructions (2, 4 and 8 bits).
+def Msk2Imm : ImmLeaf<i32, [{ return isUInt<2>(Imm); }]>;
+def Msk4Imm : ImmLeaf<i32, [{ return isUInt<4>(Imm); }]>;
+def Msk8Imm : ImmLeaf<i32, [{ return isUInt<8>(Imm); }]>;
+
+def MMA : Predicate<"Subtarget->hasMMA()">;
+
+
+// Multiclass definitions for MMA accumulator instructions.
+// ----------------------------------------------------------------------------
+
+// Defines 2 unmasked instructions where the xo field for acc/non-acc version
+// is even/odd.
+multiclass ACC_UM_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
+ string asmstr> {
+ let Predicates = [MMA] in {
+ def NAME :
+ XX3Form_AT3_XAB6<opcode, !or(xo, 0x01), (outs acc:$AT), IOL,
+ !strconcat(asmbase#" ", asmstr), IIC_VecFP, []>,
+ RegConstraint<"@earlyclobber $AT">;
+ def PP :
+ XX3Form_AT3_XAB6<opcode, xo, (outs acc:$AT), !con((ins acc:$ATi), IOL),
+ !strconcat(asmbase#"pp ", asmstr), IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ }
+}
+
+// Defines 4 instructions, masked/unmasked with masks 8, 4, 4 bits.
+// The XO field for acc/non-acc version is even/odd.
+multiclass ACC_UM_M844_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
+ string asmstr> {
+ defm NAME : ACC_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>;
+ let Predicates = [MMA, PrefixInstrs] in {
+ def PM#NAME :
+ MMIRR_XX3Form_XY4P8_XAB6<
+ opcode, !or(xo, 0x01), (outs acc:$AT),
+ !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u8imm:$PMSK)),
+ !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"@earlyclobber $AT">;
+ def PM#NAME#PP :
+ MMIRR_XX3Form_XY4P8_XAB6<
+ opcode, xo, (outs acc:$AT),
+ !con((ins acc:$ATi),
+ !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u8imm:$PMSK))),
+ !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ }
+}
+
+// Defines 4 instructions, masked/unmasked with masks 4, 4, 4 bits.
+// The XO field for acc/non-acc version is even/odd.
+multiclass ACC_UM_M444_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
+ string asmstr> {
+ defm NAME : ACC_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>;
+ let Predicates = [MMA, PrefixInstrs] in {
+ def PM#NAME :
+ MMIRR_XX3Form_XYP4_XAB6<
+ opcode, !or(xo, 0x01), (outs acc:$AT),
+ !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK)),
+ !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"@earlyclobber $AT">;
+ def PM#NAME#PP :
+ MMIRR_XX3Form_XYP4_XAB6<
+ opcode, xo, (outs acc:$AT),
+ !con((ins acc:$ATi),
+ !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK))),
+ !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ }
+}
+
+// Defines 4 instructions, masked/unmasked with masks 2, 4, 4 bits.
+// The XO field for acc/non-acc version is even/odd.
+multiclass ACC_UM_M244_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
+ string asmstr> {
+ defm NAME : ACC_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>;
+ let Predicates = [MMA, PrefixInstrs] in {
+ def PM#NAME :
+ MMIRR_XX3Form_XY4P2_XAB6<
+ opcode, !or(xo, 0x01), (outs acc:$AT),
+ !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK)),
+ !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"@earlyclobber $AT">;
+ def PM#NAME#PP :
+ MMIRR_XX3Form_XY4P2_XAB6<
+ opcode, xo, (outs acc:$AT),
+ !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
+ !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ }
+}
+
+// Defines 4 instructions, masked/unmasked with masks 2, 4, 4 bits.
+// Upper nibble of XO field for acc/non-acc version is 0x4/0x6.
+multiclass ACC_UM_M244_XO46<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
+ string asmstr> {
+ let Predicates = [MMA] in {
+ def NAME :
+ XX3Form_AT3_XAB6<opcode, xo, (outs acc:$AT), IOL,
+ !strconcat(asmbase#" ", asmstr), IIC_VecFP, []>,
+ RegConstraint<"@earlyclobber $AT">;
+ def PP :
+ XX3Form_AT3_XAB6<
+ opcode, !or(xo, 0x20), (outs acc:$AT), !con((ins acc:$ATi), IOL),
+ !strconcat(asmbase#"pp ", asmstr), IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ }
+ let Predicates = [MMA, PrefixInstrs] in {
+ def PM#NAME :
+ MMIRR_XX3Form_XY4P2_XAB6<
+ opcode, xo, (outs acc:$AT),
+ !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK)),
+ !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"@earlyclobber $AT">;
+ def PM#NAME#PP :
+ MMIRR_XX3Form_XY4P2_XAB6<
+ opcode, !or(xo, 0x20), (outs acc:$AT),
+ !con((ins acc:$ATi),
+ !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
+ !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ }
+}
+
+// Defines 10 instructions, operand negating, unmasked, masked with 2, 4, 4
+// bits. Upper nibble are masked with 0x8, 0x4, 0xC for negating operands.
+multiclass ACC_NEG_UM_M244_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
+ string asmbase, string asmstr> {
+ defm NAME : ACC_UM_M244_XOEO<opcode, xo, IOL, asmbase, asmstr>;
+ let Predicates = [MMA] in {
+ def PN : XX3Form_AT3_XAB6<
+ opcode, !or(xo, 0x80), (outs acc:$AT), !con((ins acc:$ATi), IOL),
+ !strconcat(asmbase#"pn ", asmstr), IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def NP : XX3Form_AT3_XAB6<
+ opcode, !or(xo, 0x40), (outs acc:$AT), !con((ins acc:$ATi), IOL),
+ !strconcat(asmbase#"np ", asmstr), IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def NN : XX3Form_AT3_XAB6<
+ opcode, !or(xo, 0xC0), (outs acc:$AT), !con((ins acc:$ATi), IOL),
+ !strconcat(asmbase#"nn ", asmstr), IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ }
+ let Predicates = [MMA, PrefixInstrs] in {
+ def PM#NAME#PN :
+ MMIRR_XX3Form_XY4P2_XAB6<
+ opcode, !or(xo, 0x80), (outs acc:$AT),
+ !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
+ !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK, $PMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def PM#NAME#NP :
+ MMIRR_XX3Form_XY4P2_XAB6<
+ opcode, !or(xo, 0x40), (outs acc:$AT),
+ !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
+ !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK, $PMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def PM#NAME#NN :
+ MMIRR_XX3Form_XY4P2_XAB6<
+ opcode, !or(xo, 0xC0), (outs acc:$AT),
+ !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
+ !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK, $PMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ }
+}
+
+// Defines 5 instructions, unmasked, operand negating.
+// Upper nibble are masked with 0x8, 0x4, 0xC for negating operands.
+multiclass ACC_NEG_UM_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
+ string asmbase, string asmstr> {
+ defm NAME : ACC_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>;
+ let Predicates = [MMA] in {
+ def PN : XX3Form_AT3_XAB6<opcode, !or(xo, 0x80), (outs acc:$AT),
+ !con((ins acc:$ATi), IOL),
+ !strconcat(asmbase#"pn ", asmstr), IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def NP : XX3Form_AT3_XAB6<opcode, !or(xo, 0x40), (outs acc:$AT),
+ !con((ins acc:$ATi), IOL),
+ !strconcat(asmbase#"np ", asmstr), IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def NN : XX3Form_AT3_XAB6<opcode, !or(xo, 0xC0), (outs acc:$AT),
+ !con((ins acc:$ATi), IOL),
+ !strconcat(asmbase#"nn ", asmstr), IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ }
+}
+
+// Defines 10 instructions, operand negating, unmasked, masked with 4, 4 bits.
+// Upper nibble are masked with 0x8, 0x4, 0xC for negating operands.
+multiclass ACC_NEG_UM_M44_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
+ string asmbase, string asmstr> {
+ defm NAME : ACC_NEG_UM_XOM84C<opcode, xo, IOL, asmbase, asmstr>;
+ let Predicates = [MMA, PrefixInstrs] in {
+ def PM#NAME :
+ MMIRR_XX3Form_XY4_XAB6<
+ opcode, !or(xo, 0x01), (outs acc:$AT),
+ !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK)),
+ !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"@earlyclobber $AT">;
+ def PM#NAME#PP :
+ MMIRR_XX3Form_XY4_XAB6<
+ opcode, xo, (outs acc:$AT),
+ !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))),
+ !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def PM#NAME#PN :
+ MMIRR_XX3Form_XY4_XAB6<
+ opcode, !or(xo, 0x80), (outs acc:$AT),
+ !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))),
+ !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def PM#NAME#NP :
+ MMIRR_XX3Form_XY4_XAB6<
+ opcode, !or(xo, 0x40), (outs acc:$AT),
+ !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))),
+ !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def PM#NAME#NN :
+ MMIRR_XX3Form_XY4_XAB6<
+ opcode, !or(xo, 0xC0), (outs acc:$AT),
+ !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))),
+ !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ }
+}
+
+// Defines 10 instructions, operand negating, unmasked, masked with 4, 2 bits.
+// Upper nibble are masked with 0x8, 0x4, 0xC for negating operands.
+multiclass ACC_NEG_UM_M42_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
+ string asmbase, string asmstr> {
+ defm NAME : ACC_NEG_UM_XOM84C<opcode, xo, IOL, asmbase, asmstr>;
+ let Predicates = [MMA, PrefixInstrs] in {
+ def PM#NAME :
+ MMIRR_XX3Form_X4Y2_XAB6<
+ opcode, !or(xo, 0x01), (outs acc:$AT),
+ !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK)),
+ !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"@earlyclobber $AT">;
+ def PM#NAME#PP :
+ MMIRR_XX3Form_X4Y2_XAB6<
+ opcode, xo, (outs acc:$AT),
+ !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))),
+ !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def PM#NAME#PN :
+ MMIRR_XX3Form_X4Y2_XAB6<
+ opcode, !or(xo, 0x80), (outs acc:$AT),
+ !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))),
+ !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def PM#NAME#NP :
+ MMIRR_XX3Form_X4Y2_XAB6<
+ opcode, !or(xo, 0x40), (outs acc:$AT),
+ !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))),
+ !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def PM#NAME#NN :
+ MMIRR_XX3Form_X4Y2_XAB6<
+ opcode, !or(xo, 0xC0), (outs acc:$AT),
+ !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))),
+ !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK"),
+ IIC_VecFP, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ }
+}
+
+// End of class definitions.
+//-----------------------------------------------------------------------------
+
+let Predicates = [MMA] in {
+ def XXMFACC :
+ XForm_AT3<31, 0, 177, (outs acc:$ASo), (ins acc:$AS), "xxmfacc $AS",
+ IIC_VecGeneral,
+ [(set v512i1:$ASo, (int_ppc_mma_xxmfacc v512i1:$AS))]>,
+ RegConstraint<"$ASo = $AS">, NoEncode<"$ASo">;
+ def XXMTACC :
+ XForm_AT3<31, 1, 177, (outs acc:$AT), (ins acc:$ATi), "xxmtacc $AT",
+ IIC_VecGeneral,
+ [(set v512i1:$AT, (int_ppc_mma_xxmtacc v512i1:$ATi))]>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ def KILL_PAIR : PPCPostRAExpPseudo<(outs vsrprc:$XTp), (ins vsrprc:$XSp),
+ "#KILL_PAIR", []>,
+ RegConstraint<"$XTp = $XSp">;
+ def BUILD_UACC : PPCPostRAExpPseudo<(outs acc:$AT), (ins uacc:$AS),
+ "#BUILD_UACC $AT, $AS", []>;
+ // We define XXSETACCZ as rematerializable to undo CSE of that intrinsic in
+ // the backend. We avoid CSE here because it generates a copy of the acc
+ // register and this copy is more expensive than calling the intrinsic again.
+ let isAsCheapAsAMove = 1, isReMaterializable = 1 in {
+ def XXSETACCZ :
+ XForm_AT3<31, 3, 177, (outs acc:$AT), (ins), "xxsetaccz $AT", IIC_VecGeneral,
+ [(set v512i1:$AT, (int_ppc_mma_xxsetaccz))]>;
+ }
+ def XVI8GER4SPP :
+ XX3Form_AT3_XAB6<59, 99, (outs acc:$AT), (ins acc:$ATi, vsrc:$XA, vsrc:$XB),
+ "xvi8ger4spp $AT, $XA, $XB", IIC_VecGeneral, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+ let mayStore = 1 in {
+ def SPILL_ACC: PPCEmitTimePseudo<(outs), (ins acc:$AT, memrix16:$dst),
+ "#SPILL_ACC", []>;
+ def SPILL_UACC: PPCEmitTimePseudo<(outs), (ins uacc:$AT, memrix16:$dst),
+ "#SPILL_UACC", []>;
+ }
+ let mayLoad = 1, hasSideEffects = 0 in {
+ def RESTORE_ACC: PPCEmitTimePseudo<(outs acc:$AT), (ins memrix16:$src),
+ "#RESTORE_ACC", []>;
+ def RESTORE_UACC: PPCEmitTimePseudo<(outs uacc:$AT), (ins memrix16:$src),
+ "#RESTORE_UACC", []>;
+ }
+}
+
+let Predicates = [MMA, PrefixInstrs] in {
+ def PMXVI8GER4SPP :
+ MMIRR_XX3Form_XYP4_XAB6<59, 99, (outs acc:$AT),
+ (ins acc:$ATi, vsrc:$XA,vsrc:$XB, u4imm:$XMSK,
+ u4imm:$YMSK, u4imm:$PMSK),
+ "pmxvi8ger4spp $AT, $XA, $XB, $XMSK, $YMSK, $PMSK",
+ IIC_VecGeneral, []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+}
+
+// MMA accumulating/non-accumulating instructions.
+//------------------------------------------------------------------------------
+
+// XVBF16GER2, XVBF16GER2PP, XVBF16GER2PN, XVBF16GER2NP, XVBF16GER2NN
+// PMXVBF16GER2, PMXVBF16GER2PP, PMXVBF16GER2PN, PMXVBF16GER2NP, PMXVBF16GER2NN
+defm XVBF16GER2 : ACC_NEG_UM_M244_XOM84C<59, 50, (ins vsrc:$XA, vsrc:$XB),
+ "xvbf16ger2", "$AT, $XA, $XB">;
+
+// XVI4GER8, XVI4GER8PP, PMXVI4GER8, PMXVI4GER8PP
+defm XVI4GER8 : ACC_UM_M844_XOEO<59, 34, (ins vsrc:$XA, vsrc:$XB),
+ "xvi4ger8", "$AT, $XA, $XB">;
+
+// XVI8GER4, XVI8GER4PP, PMXVI8GER4, PMXVI8GER4PP
+defm XVI8GER4 : ACC_UM_M444_XOEO<59, 2, (ins vsrc:$XA, vsrc:$XB),
+ "xvi8ger4", "$AT, $XA, $XB">;
+
+// XVI16GER2, XVI16GER2PP, PMXVI16GER2, PMXVI16GER2PP
+defm XVI16GER2 : ACC_UM_M244_XO46<59, 75, (ins vsrc:$XA, vsrc:$XB),
+ "xvi16ger2", "$AT, $XA, $XB">;
+
+// XVI16GER2S, XVI16GER2SPP, PMXVI16GER2S, PMXVI16GER2SPP
+defm XVI16GER2S : ACC_UM_M244_XOEO<59, 42, (ins vsrc:$XA, vsrc:$XB),
+ "xvi16ger2s", "$AT, $XA, $XB">;
+
+// XVF16GER2, XVF16GER2PP, XVF16GER2PN, XVF16GER2NP, XVF16GER2NN
+// PMXVF16GER2, PMXVF16GER2PP, PMXVF16GER2PN, PMXVF16GER2NP, PMXVF16GER2NN
+defm XVF16GER2 : ACC_NEG_UM_M244_XOM84C<59, 18, (ins vsrc:$XA, vsrc:$XB),
+ "xvf16ger2", "$AT, $XA, $XB">;
+
+// XVF32GER, XVF32GERPP, XVF32GERPN, XVF32GERNP, XVF32GERPP
+// PMXVF32GER, PMXVF32GERPP, PMXVF32GERPN, PMXVF32GERNP, PMXVF32GERPP
+defm XVF32GER : ACC_NEG_UM_M44_XOM84C<59, 26, (ins vsrc:$XA, vsrc:$XB),
+ "xvf32ger", "$AT, $XA, $XB">;
+
+// XVF64GER, XVF64GERPP, XVF64GERPN, XVF64GERNP, XVF64GERNN
+// PMXVF64GER, PMXVF64GERPP, PMXVF64GERPN, PMXVF64GERNP, PMXVF64GERNN
+defm XVF64GER : ACC_NEG_UM_M42_XOM84C<59, 58, (ins vsrpevenrc:$XA, vsrc:$XB),
+ "xvf64ger", "$AT, $XA, $XB">;
+//------------------------------------------------------------------------------
+
+// MMA Intrinsics
+let Predicates = [MMA] in {
+ def : Pat<(v512i1 (int_ppc_mma_xvi4ger8 v16i8:$XA, v16i8:$XB)),
+ (XVI4GER8 RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvi4ger8pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVI4GER8PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+
+ def : Pat<(v512i1 (int_ppc_mma_xvi8ger4 v16i8:$XA, v16i8:$XB)),
+ (XVI8GER4 RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvi8ger4pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVI8GER4PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+
+ def : Pat<(v512i1 (int_ppc_mma_xvi16ger2s v16i8:$XA, v16i8:$XB)),
+ (XVI16GER2S RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvi16ger2spp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVI16GER2SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+
+ def : Pat<(v512i1 (int_ppc_mma_xvf16ger2 v16i8:$XA, v16i8:$XB)),
+ (XVF16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+
+ def : Pat<(v512i1 (int_ppc_mma_xvf32ger v16i8:$XA, v16i8:$XB)),
+ (XVF32GER RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvf32gerpp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVF32GERPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvf32gerpn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVF32GERPN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvf32gernp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVF32GERNP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvf32gernn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVF32GERNN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvf64ger v256i1:$XA, v16i8:$XB)),
+ (XVF64GER $XA, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvf64gerpp v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
+ (XVF64GERPP $ATi, $XA, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvf64gerpn v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
+ (XVF64GERPN $ATi, $XA, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvf64gernp v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
+ (XVF64GERNP $ATi, $XA, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvf64gernn v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
+ (XVF64GERNN $ATi, $XA, RCCp.BToVSRC)>;
+
+ def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2 v16i8:$XA, v16i8:$XB)),
+ (XVBF16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVBF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVBF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVBF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVBF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvi16ger2 v16i8:$XA, v16i8:$XB)),
+ (XVI16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvi16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVI16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+ def : Pat<(v512i1 (int_ppc_mma_xvi8ger4spp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
+ (XVI8GER4SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
+}
+
+// MMA Intrinsics
+let Predicates = [MMA, PrefixInstrs] in {
+ def : Pat<(v512i1 (int_ppc_mma_pmxvi4ger8 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk8Imm:$PMSK)),
+ (PMXVI4GER8 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk8Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvi4ger8pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk8Imm:$PMSK)),
+ (PMXVI4GER8PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk8Imm:$PMSK)>;
+
+ def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk4Imm:$PMSK)),
+ (PMXVI8GER4 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk4Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk4Imm:$PMSK)),
+ (PMXVI8GER4PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk4Imm:$PMSK)>;
+
+ def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2s v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
+ (PMXVI16GER2S RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2spp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk2Imm:$PMSK)),
+ (PMXVI16GER2SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
+ (PMXVF16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk2Imm:$PMSK)),
+ (PMXVF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk2Imm:$PMSK)),
+ (PMXVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk2Imm:$PMSK)),
+ (PMXVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk2Imm:$PMSK)),
+ (PMXVF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf32ger v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK)),
+ (PMXVF32GER RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf32gerpp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
+ (PMXVF32GERPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf32gerpn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
+ (PMXVF32GERPN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf32gernp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
+ (PMXVF32GERNP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf32gernn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
+ (PMXVF32GERNN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK)>;
+
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf64ger v256i1:$XA, v16i8:$XB, Msk4Imm:$XMSK,
+ Msk2Imm:$YMSK)),
+ (PMXVF64GER $XA, RCCp.BToVSRC, Msk4Imm:$XMSK, Msk2Imm:$YMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf64gerpp v512i1:$ATi, v256i1:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
+ (PMXVF64GERPP $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk2Imm:$YMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf64gerpn v512i1:$ATi, v256i1:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
+ (PMXVF64GERPN $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk2Imm:$YMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf64gernp v512i1:$ATi, v256i1:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
+ (PMXVF64GERNP $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk2Imm:$YMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvf64gernn v512i1:$ATi, v256i1:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
+ (PMXVF64GERNN $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk2Imm:$YMSK)>;
+
+ def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
+ (PMXVBF16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk2Imm:$PMSK)),
+ (PMXVBF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk2Imm:$PMSK)),
+ (PMXVBF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk2Imm:$PMSK)),
+ (PMXVBF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk2Imm:$PMSK)),
+ (PMXVBF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
+ (PMXVI16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4spp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk2Imm:$PMSK)),
+ (PMXVI8GER4SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+ def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
+ Msk4Imm:$XMSK, Msk4Imm:$YMSK,
+ Msk2Imm:$PMSK)),
+ (PMXVI16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
+ Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
+}
+
+def ConcatsMMA {
+ dag VecsToVecPair0 =
+ (v256i1 (INSERT_SUBREG
+ (INSERT_SUBREG (IMPLICIT_DEF), $vs0, sub_vsx1),
+ $vs1, sub_vsx0));
+ dag VecsToVecPair1 =
+ (v256i1 (INSERT_SUBREG
+ (INSERT_SUBREG (IMPLICIT_DEF), $vs2, sub_vsx1),
+ $vs3, sub_vsx0));
+ dag VecsToVecQuad =
+ (BUILD_UACC (INSERT_SUBREG
+ (INSERT_SUBREG (v512i1 (IMPLICIT_DEF)),
+ (KILL_PAIR VecsToVecPair0), sub_pair0),
+ (KILL_PAIR VecsToVecPair1), sub_pair1));
+}
+
+def Extracts {
+ dag Pair0 = (v256i1 (EXTRACT_SUBREG $v, sub_pair0));
+ dag Pair1 = (v256i1 (EXTRACT_SUBREG $v, sub_pair1));
+ dag Vec0 = (v4i32 (EXTRACT_SUBREG Pair0, sub_vsx0));
+ dag Vec1 = (v4i32 (EXTRACT_SUBREG Pair0, sub_vsx1));
+ dag Vec2 = (v4i32 (EXTRACT_SUBREG Pair1, sub_vsx0));
+ dag Vec3 = (v4i32 (EXTRACT_SUBREG Pair1, sub_vsx1));
+}
+
+let Predicates = [MMA] in {
+ def : Pat<(v512i1 (PPCAccBuild v4i32:$vs1, v4i32:$vs0, v4i32:$vs3, v4i32:$vs2)),
+ (XXMTACC ConcatsMMA.VecsToVecQuad)>;
+ def : Pat<(v512i1 (int_ppc_mma_assemble_acc v16i8:$vs1, v16i8:$vs0,
+ v16i8:$vs3, v16i8:$vs2)),
+ (XXMTACC ConcatsMMA.VecsToVecQuad)>;
+ def : Pat<(v512i1 (PPCxxmfacc v512i1:$AS)), (XXMFACC acc:$AS)>;
+ def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, 0)),
+ Extracts.Vec0>;
+ def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, 1)),
+ Extracts.Vec1>;
+ def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, 2)),
+ Extracts.Vec2>;
+ def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, 3)),
+ Extracts.Vec3>;
+}
+
+
diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrP10.td
index ff43426dd1ef..6cf3f1d3341e 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrP10.td
@@ -1,10 +1,59 @@
+//===-- PPCInstrP10.td - Power10 Instruction Set -----------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the instructions introduced for the Power10 CPU.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Naming convention for future instruction formats
+//
+// <INSTR_FORM>{_<OP_TYPE><OP_LENGTH>}+
+//
+// Where:
+// <INSTR_FORM> - name of instruction format as per the ISA
+// (X-Form, VX-Form, etc.)
+// <OP_TYPE> - operand type
+// * FRT/RT/VT/XT/BT - target register
+// (FPR, GPR, VR, VSR, CR-bit respectively)
+// In some situations, the 'T' is replaced by
+// 'D' when describing the target register.
+// * [FR|R|V|X|B][A-Z] - register source (i.e. FRA, RA, XB, etc.)
+// * IMM - immediate (where signedness matters,
+// this is SI/UI for signed/unsigned)
+// * [R|X|FR]Tp - register pair target (i.e. FRTp, RTp)
+// * R - PC-Relative bit
+// (denotes that the address is computed pc-relative)
+// * VRM - Masked Registers
+// * AT - target accumulator
+// * N - the Nth bit in a VSR
+// * Additional 1-bit operands may be required for certain
+// instruction formats such as: MC, P, MP
+// * X / Y / P - mask values. In the instruction encoding, this is
+// represented as XMSK, YMSK and PMSK.
+// * MEM - indicates if the instruction format requires any memory
+// accesses. This does not have <OP_LENGTH> attached to it.
+// <OP_LENGTH> - the length of each operand in bits.
+// For operands that are 1 bit, the '1' is omitted from the name.
+//
+// Example: 8RR_XX4Form_IMM8_XTAB6
+// 8RR_XX4Form is the instruction format.
+// The operand is an 8-bit immediate (IMM), the destination (XT)
+// and sources (XA, XB) that are all 6-bits. The destination and
+// source registers are combined if they are of the same length.
+// Moreover, the order of operands reflects the order of operands
+// in the encoding.
+
//-------------------------- Predicate definitions ---------------------------//
def IsPPC32 : Predicate<"!Subtarget->isPPC64()">;
-// Mask immediates for MMA instructions (2, 4 and 8 bits).
-def Msk2Imm : ImmLeaf<i32, [{ return isUInt<2>(Imm); }]>;
-def Msk4Imm : ImmLeaf<i32, [{ return isUInt<4>(Imm); }]>;
-def Msk8Imm : ImmLeaf<i32, [{ return isUInt<8>(Imm); }]>;
//===----------------------------------------------------------------------===//
// PowerPC ISA 3.1 specific type constraints.
@@ -205,8 +254,8 @@ multiclass MLS_DForm_R_SI34_RTA5_p<bits<6> opcode, dag OOL, dag IOL,
!strconcat(asmstr, ", 1"), itin, []>, isPCRel;
}
-class 8LS_DForm_R_SI34_RTA5<bits<6> opcode, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
+class 8LS_DForm_R_SI34_RTA5_MEM<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
: PI<1, opcode, OOL, IOL, asmstr, itin> {
bits<5> RT;
bits<39> D_RA;
@@ -227,8 +276,9 @@ class 8LS_DForm_R_SI34_RTA5<bits<6> opcode, dag OOL, dag IOL, string asmstr,
// 8LS:D-Form: [ 1 0 0 // R // d0
// PO TX T RA d1 ]
-class 8LS_DForm_R_SI34_XT6_RA5<bits<5> opcode, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
+class 8LS_DForm_R_SI34_XT6_RA5_MEM<bits<5> opcode, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin,
+ list<dag> pattern>
: PI<1, { opcode, ? }, OOL, IOL, asmstr, itin> {
bits<6> XT;
bits<39> D_RA;
@@ -529,41 +579,177 @@ multiclass MLS_DForm_R_SI34_RTA5_MEM_p<bits<6> opcode, dag OOL, dag IOL,
isPCRel;
}
-multiclass 8LS_DForm_R_SI34_RTA5_p<bits<6> opcode, dag OOL, dag IOL,
- dag PCRel_IOL, string asmstr,
- InstrItinClass itin> {
- def NAME : 8LS_DForm_R_SI34_RTA5<opcode, OOL, IOL,
- !strconcat(asmstr, ", 0"), itin, []>;
- def pc : 8LS_DForm_R_SI34_RTA5<opcode, OOL, PCRel_IOL,
- !strconcat(asmstr, ", 1"), itin, []>, isPCRel;
+multiclass 8LS_DForm_R_SI34_RTA5_MEM_p<bits<6> opcode, dag OOL, dag IOL,
+ dag PCRel_IOL, string asmstr,
+ InstrItinClass itin> {
+ def NAME : 8LS_DForm_R_SI34_RTA5_MEM<opcode, OOL, IOL,
+ !strconcat(asmstr, ", 0"), itin, []>;
+ def pc : 8LS_DForm_R_SI34_RTA5_MEM<opcode, OOL, PCRel_IOL,
+ !strconcat(asmstr, ", 1"), itin, []>,
+ isPCRel;
}
-multiclass 8LS_DForm_R_SI34_XT6_RA5_p<bits<5> opcode, dag OOL, dag IOL,
- dag PCRel_IOL, string asmstr,
- InstrItinClass itin> {
- def NAME : 8LS_DForm_R_SI34_XT6_RA5<opcode, OOL, IOL,
- !strconcat(asmstr, ", 0"), itin, []>;
- def pc : 8LS_DForm_R_SI34_XT6_RA5<opcode, OOL, PCRel_IOL,
- !strconcat(asmstr, ", 1"), itin, []>,
- isPCRel;
+multiclass 8LS_DForm_R_SI34_XT6_RA5_MEM_p<bits<5> opcode, dag OOL, dag IOL,
+ dag PCRel_IOL, string asmstr,
+ InstrItinClass itin> {
+ def NAME : 8LS_DForm_R_SI34_XT6_RA5_MEM<opcode, OOL, IOL,
+ !strconcat(asmstr, ", 0"), itin, []>;
+ def pc : 8LS_DForm_R_SI34_XT6_RA5_MEM<opcode, OOL, PCRel_IOL,
+ !strconcat(asmstr, ", 1"), itin, []>,
+ isPCRel;
}
-def PPCRegVSRpRCAsmOperand : AsmOperandClass {
- let Name = "RegVSRpRC"; let PredicateMethod = "isVSRpEvenRegNumber";
+def PrefixInstrs : Predicate<"Subtarget->hasPrefixInstrs()">;
+def IsISA3_1 : Predicate<"Subtarget->isISA3_1()">;
+def PairedVectorMemops : Predicate<"Subtarget->pairedVectorMemops()">;
+def RCCp {
+ dag AToVSRC = (COPY_TO_REGCLASS $XA, VSRC);
+ dag BToVSRC = (COPY_TO_REGCLASS $XB, VSRC);
}
-def vsrprc : RegisterOperand<VSRpRC> {
- let ParserMatchClass = PPCRegVSRpRCAsmOperand;
-}
+let Predicates = [PrefixInstrs] in {
+ let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+ defm PADDI8 :
+ MLS_DForm_R_SI34_RTA5_p<14, (outs g8rc:$RT), (ins g8rc:$RA, s34imm:$SI),
+ (ins immZero:$RA, s34imm_pcrel:$SI),
+ "paddi $RT, $RA, $SI", IIC_LdStLFD>;
+ let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
+ def PLI8 : MLS_DForm_SI34_RT5<14, (outs g8rc:$RT),
+ (ins s34imm:$SI),
+ "pli $RT, $SI", IIC_IntSimple, []>;
+ }
+ }
+ defm PADDI :
+ MLS_DForm_R_SI34_RTA5_p<14, (outs gprc:$RT), (ins gprc:$RA, s34imm:$SI),
+ (ins immZero:$RA, s34imm_pcrel:$SI),
+ "paddi $RT, $RA, $SI", IIC_LdStLFD>;
+ let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
+ def PLI : MLS_DForm_SI34_RT5<14, (outs gprc:$RT),
+ (ins s34imm:$SI),
+ "pli $RT, $SI", IIC_IntSimple, []>;
+ }
-def PPCRegVSRpEvenRCAsmOperand : AsmOperandClass {
- let Name = "RegVSRpEvenRC"; let PredicateMethod = "isVSRpEvenRegNumber";
-}
+ let mayLoad = 1, mayStore = 0 in {
+ defm PLXV :
+ 8LS_DForm_R_SI34_XT6_RA5_MEM_p<25, (outs vsrc:$XT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA),
+ "plxv $XT, $D_RA", IIC_LdStLFD>;
+ defm PLFS :
+ MLS_DForm_R_SI34_RTA5_MEM_p<48, (outs f4rc:$FRT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plfs $FRT, $D_RA",
+ IIC_LdStLFD>;
+ defm PLFD :
+ MLS_DForm_R_SI34_RTA5_MEM_p<50, (outs f8rc:$FRT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plfd $FRT, $D_RA",
+ IIC_LdStLFD>;
+ defm PLXSSP :
+ 8LS_DForm_R_SI34_RTA5_MEM_p<43, (outs vfrc:$VRT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA),
+ "plxssp $VRT, $D_RA", IIC_LdStLFD>;
+ defm PLXSD :
+ 8LS_DForm_R_SI34_RTA5_MEM_p<42, (outs vfrc:$VRT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA),
+ "plxsd $VRT, $D_RA", IIC_LdStLFD>;
+ let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+ defm PLBZ8 :
+ MLS_DForm_R_SI34_RTA5_MEM_p<34, (outs g8rc:$RT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plbz $RT, $D_RA",
+ IIC_LdStLFD>;
+ defm PLHZ8 :
+ MLS_DForm_R_SI34_RTA5_MEM_p<40, (outs g8rc:$RT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plhz $RT, $D_RA",
+ IIC_LdStLFD>;
+ defm PLHA8 :
+ MLS_DForm_R_SI34_RTA5_MEM_p<42, (outs g8rc:$RT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plha $RT, $D_RA",
+ IIC_LdStLFD>;
+ defm PLWA8 :
+ 8LS_DForm_R_SI34_RTA5_MEM_p<41, (outs g8rc:$RT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA),
+ "plwa $RT, $D_RA", IIC_LdStLFD>;
+ defm PLWZ8 :
+ MLS_DForm_R_SI34_RTA5_MEM_p<32, (outs g8rc:$RT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plwz $RT, $D_RA",
+ IIC_LdStLFD>;
+ }
+ defm PLBZ :
+ MLS_DForm_R_SI34_RTA5_MEM_p<34, (outs gprc:$RT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plbz $RT, $D_RA",
+ IIC_LdStLFD>;
+ defm PLHZ :
+ MLS_DForm_R_SI34_RTA5_MEM_p<40, (outs gprc:$RT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plhz $RT, $D_RA",
+ IIC_LdStLFD>;
+ defm PLHA :
+ MLS_DForm_R_SI34_RTA5_MEM_p<42, (outs gprc:$RT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plha $RT, $D_RA",
+ IIC_LdStLFD>;
+ defm PLWZ :
+ MLS_DForm_R_SI34_RTA5_MEM_p<32, (outs gprc:$RT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plwz $RT, $D_RA",
+ IIC_LdStLFD>;
+ defm PLWA :
+ 8LS_DForm_R_SI34_RTA5_MEM_p<41, (outs gprc:$RT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "plwa $RT, $D_RA",
+ IIC_LdStLFD>;
+ defm PLD :
+ 8LS_DForm_R_SI34_RTA5_MEM_p<57, (outs g8rc:$RT), (ins memri34:$D_RA),
+ (ins memri34_pcrel:$D_RA), "pld $RT, $D_RA",
+ IIC_LdStLFD>;
+ }
-def vsrpevenrc : RegisterOperand<VSRpRC> {
- let ParserMatchClass = PPCRegVSRpEvenRCAsmOperand;
- let EncoderMethod = "getVSRpEvenEncoding";
- let DecoderMethod = "decodeVSRpEvenOperands";
+ let mayStore = 1, mayLoad = 0 in {
+ defm PSTXV :
+ 8LS_DForm_R_SI34_XT6_RA5_MEM_p<27, (outs), (ins vsrc:$XS, memri34:$D_RA),
+ (ins vsrc:$XS, memri34_pcrel:$D_RA),
+ "pstxv $XS, $D_RA", IIC_LdStLFD>;
+ defm PSTFS :
+ MLS_DForm_R_SI34_RTA5_MEM_p<52, (outs), (ins f4rc:$FRS, memri34:$D_RA),
+ (ins f4rc:$FRS, memri34_pcrel:$D_RA),
+ "pstfs $FRS, $D_RA", IIC_LdStLFD>;
+ defm PSTFD :
+ MLS_DForm_R_SI34_RTA5_MEM_p<54, (outs), (ins f8rc:$FRS, memri34:$D_RA),
+ (ins f8rc:$FRS, memri34_pcrel:$D_RA),
+ "pstfd $FRS, $D_RA", IIC_LdStLFD>;
+ defm PSTXSSP :
+ 8LS_DForm_R_SI34_RTA5_MEM_p<47, (outs), (ins vfrc:$VRS, memri34:$D_RA),
+ (ins vfrc:$VRS, memri34_pcrel:$D_RA),
+ "pstxssp $VRS, $D_RA", IIC_LdStLFD>;
+ defm PSTXSD :
+ 8LS_DForm_R_SI34_RTA5_MEM_p<46, (outs), (ins vfrc:$VRS, memri34:$D_RA),
+ (ins vfrc:$VRS, memri34_pcrel:$D_RA),
+ "pstxsd $VRS, $D_RA", IIC_LdStLFD>;
+ let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+ defm PSTB8 :
+ MLS_DForm_R_SI34_RTA5_MEM_p<38, (outs), (ins g8rc:$RS, memri34:$D_RA),
+ (ins g8rc:$RS, memri34_pcrel:$D_RA),
+ "pstb $RS, $D_RA", IIC_LdStLFD>;
+ defm PSTH8 :
+ MLS_DForm_R_SI34_RTA5_MEM_p<44, (outs), (ins g8rc:$RS, memri34:$D_RA),
+ (ins g8rc:$RS, memri34_pcrel:$D_RA),
+ "psth $RS, $D_RA", IIC_LdStLFD>;
+ defm PSTW8 :
+ MLS_DForm_R_SI34_RTA5_MEM_p<36, (outs), (ins g8rc:$RS, memri34:$D_RA),
+ (ins g8rc:$RS, memri34_pcrel:$D_RA),
+ "pstw $RS, $D_RA", IIC_LdStLFD>;
+ }
+ defm PSTB :
+ MLS_DForm_R_SI34_RTA5_MEM_p<38, (outs), (ins gprc:$RS, memri34:$D_RA),
+ (ins gprc:$RS, memri34_pcrel:$D_RA),
+ "pstb $RS, $D_RA", IIC_LdStLFD>;
+ defm PSTH :
+ MLS_DForm_R_SI34_RTA5_MEM_p<44, (outs), (ins gprc:$RS, memri34:$D_RA),
+ (ins gprc:$RS, memri34_pcrel:$D_RA),
+ "psth $RS, $D_RA", IIC_LdStLFD>;
+ defm PSTW :
+ MLS_DForm_R_SI34_RTA5_MEM_p<36, (outs), (ins gprc:$RS, memri34:$D_RA),
+ (ins gprc:$RS, memri34_pcrel:$D_RA),
+ "pstw $RS, $D_RA", IIC_LdStLFD>;
+ defm PSTD :
+ 8LS_DForm_R_SI34_RTA5_MEM_p<61, (outs), (ins g8rc:$RS, memri34:$D_RA),
+ (ins g8rc:$RS, memri34_pcrel:$D_RA),
+ "pstd $RS, $D_RA", IIC_LdStLFD>;
+ }
}
class DQForm_XTp5_RA17_MEM<bits<6> opcode, bits<4> xo, dag OOL, dag IOL,
@@ -627,17 +813,7 @@ multiclass 8LS_DForm_R_XTp5_SI34_MEM_p<bits<6> opcode, dag OOL,
isPCRel;
}
-def PPCRegACCRCAsmOperand : AsmOperandClass {
- let Name = "RegACCRC"; let PredicateMethod = "isACCRegNumber";
-}
-def acc : RegisterOperand<ACCRC> {
- let ParserMatchClass = PPCRegACCRCAsmOperand;
-}
-
-def uacc : RegisterOperand<UACCRC> {
- let ParserMatchClass = PPCRegACCRCAsmOperand;
-}
// [PO AS XO2 XO]
class XForm_AT3<bits<6> opcode, bits<5> xo2, bits<10> xo, dag OOL, dag IOL,
@@ -655,6 +831,22 @@ class XForm_AT3<bits<6> opcode, bits<5> xo2, bits<10> xo, dag OOL, dag IOL,
let Inst{31} = 0;
}
+// X-Form: [ PO T EO UIM XO TX ]
+class XForm_XT6_IMM5<bits<6> opcode, bits<5> eo, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<5> UIM;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-15} = eo;
+ let Inst{16-20} = UIM;
+ let Inst{21-30} = xo;
+ let Inst{31} = XT{5};
+}
+
class XX3Form_AT3_XAB6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL,
string asmstr, InstrItinClass itin,
list<dag> pattern>
@@ -834,746 +1026,7 @@ class MMIRR_XX3Form_XYP4_XAB6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL,
let Inst{63} = 0;
}
-def PrefixInstrs : Predicate<"Subtarget->hasPrefixInstrs()">;
-def IsISA3_1 : Predicate<"Subtarget->isISA3_1()">;
-def PairedVectorMemops : Predicate<"Subtarget->pairedVectorMemops()">;
-def MMA : Predicate<"Subtarget->hasMMA()">;
-
-def RCCp {
- dag AToVSRC = (COPY_TO_REGCLASS $XA, VSRC);
- dag BToVSRC = (COPY_TO_REGCLASS $XB, VSRC);
-}
-
-let Predicates = [PrefixInstrs] in {
- let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
- defm PADDI8 :
- MLS_DForm_R_SI34_RTA5_p<14, (outs g8rc:$RT), (ins g8rc:$RA, s34imm:$SI),
- (ins immZero:$RA, s34imm_pcrel:$SI),
- "paddi $RT, $RA, $SI", IIC_LdStLFD>;
- let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
- def PLI8 : MLS_DForm_SI34_RT5<14, (outs g8rc:$RT),
- (ins s34imm:$SI),
- "pli $RT, $SI", IIC_IntSimple, []>;
- }
- }
- defm PADDI :
- MLS_DForm_R_SI34_RTA5_p<14, (outs gprc:$RT), (ins gprc:$RA, s34imm:$SI),
- (ins immZero:$RA, s34imm_pcrel:$SI),
- "paddi $RT, $RA, $SI", IIC_LdStLFD>;
- let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
- def PLI : MLS_DForm_SI34_RT5<14, (outs gprc:$RT),
- (ins s34imm:$SI),
- "pli $RT, $SI", IIC_IntSimple, []>;
- }
- let mayLoad = 1, mayStore = 0 in {
- defm PLXV :
- 8LS_DForm_R_SI34_XT6_RA5_p<25, (outs vsrc:$XT), (ins memri34:$D_RA),
- (ins memri34_pcrel:$D_RA), "plxv $XT, $D_RA",
- IIC_LdStLFD>;
- defm PLFS :
- MLS_DForm_R_SI34_RTA5_MEM_p<48, (outs f4rc:$FRT), (ins memri34:$D_RA),
- (ins memri34_pcrel:$D_RA), "plfs $FRT, $D_RA",
- IIC_LdStLFD>;
- defm PLFD :
- MLS_DForm_R_SI34_RTA5_MEM_p<50, (outs f8rc:$FRT), (ins memri34:$D_RA),
- (ins memri34_pcrel:$D_RA), "plfd $FRT, $D_RA",
- IIC_LdStLFD>;
- defm PLXSSP :
- 8LS_DForm_R_SI34_RTA5_p<43, (outs vfrc:$VRT), (ins memri34:$D_RA),
- (ins memri34_pcrel:$D_RA), "plxssp $VRT, $D_RA",
- IIC_LdStLFD>;
- defm PLXSD :
- 8LS_DForm_R_SI34_RTA5_p<42, (outs vfrc:$VRT), (ins memri34:$D_RA),
- (ins memri34_pcrel:$D_RA), "plxsd $VRT, $D_RA",
- IIC_LdStLFD>;
- let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
- defm PLBZ8 :
- MLS_DForm_R_SI34_RTA5_MEM_p<34, (outs g8rc:$RT), (ins memri34:$D_RA),
- (ins memri34_pcrel:$D_RA), "plbz $RT, $D_RA",
- IIC_LdStLFD>;
- defm PLHZ8 :
- MLS_DForm_R_SI34_RTA5_MEM_p<40, (outs g8rc:$RT), (ins memri34:$D_RA),
- (ins memri34_pcrel:$D_RA), "plhz $RT, $D_RA",
- IIC_LdStLFD>;
- defm PLHA8 :
- MLS_DForm_R_SI34_RTA5_MEM_p<42, (outs g8rc:$RT), (ins memri34:$D_RA),
- (ins memri34_pcrel:$D_RA), "plha $RT, $D_RA",
- IIC_LdStLFD>;
- defm PLWA8 :
- 8LS_DForm_R_SI34_RTA5_p<41, (outs g8rc:$RT), (ins memri34:$D_RA),
- (ins memri34_pcrel:$D_RA), "plwa $RT, $D_RA",
- IIC_LdStLFD>;
- defm PLWZ8 :
- MLS_DForm_R_SI34_RTA5_MEM_p<32, (outs g8rc:$RT), (ins memri34:$D_RA),
- (ins memri34_pcrel:$D_RA), "plwz $RT, $D_RA",
- IIC_LdStLFD>;
- }
- defm PLBZ :
- MLS_DForm_R_SI34_RTA5_MEM_p<34, (outs gprc:$RT), (ins memri34:$D_RA),
- (ins memri34_pcrel:$D_RA), "plbz $RT, $D_RA",
- IIC_LdStLFD>;
- defm PLHZ :
- MLS_DForm_R_SI34_RTA5_MEM_p<40, (outs gprc:$RT), (ins memri34:$D_RA),
- (ins memri34_pcrel:$D_RA), "plhz $RT, $D_RA",
- IIC_LdStLFD>;
- defm PLHA :
- MLS_DForm_R_SI34_RTA5_MEM_p<42, (outs gprc:$RT), (ins memri34:$D_RA),
- (ins memri34_pcrel:$D_RA), "plha $RT, $D_RA",
- IIC_LdStLFD>;
- defm PLWZ :
- MLS_DForm_R_SI34_RTA5_MEM_p<32, (outs gprc:$RT), (ins memri34:$D_RA),
- (ins memri34_pcrel:$D_RA), "plwz $RT, $D_RA",
- IIC_LdStLFD>;
- defm PLWA :
- 8LS_DForm_R_SI34_RTA5_p<41, (outs gprc:$RT), (ins memri34:$D_RA),
- (ins memri34_pcrel:$D_RA), "plwa $RT, $D_RA",
- IIC_LdStLFD>;
- defm PLD :
- 8LS_DForm_R_SI34_RTA5_p<57, (outs g8rc:$RT), (ins memri34:$D_RA),
- (ins memri34_pcrel:$D_RA), "pld $RT, $D_RA",
- IIC_LdStLFD>;
- }
-
- let mayStore = 1, mayLoad = 0 in {
- defm PSTXV :
- 8LS_DForm_R_SI34_XT6_RA5_p<27, (outs), (ins vsrc:$XS, memri34:$D_RA),
- (ins vsrc:$XS, memri34_pcrel:$D_RA),
- "pstxv $XS, $D_RA", IIC_LdStLFD>;
- defm PSTFS :
- MLS_DForm_R_SI34_RTA5_MEM_p<52, (outs), (ins f4rc:$FRS, memri34:$D_RA),
- (ins f4rc:$FRS, memri34_pcrel:$D_RA),
- "pstfs $FRS, $D_RA", IIC_LdStLFD>;
- defm PSTFD :
- MLS_DForm_R_SI34_RTA5_MEM_p<54, (outs), (ins f8rc:$FRS, memri34:$D_RA),
- (ins f8rc:$FRS, memri34_pcrel:$D_RA),
- "pstfd $FRS, $D_RA", IIC_LdStLFD>;
- defm PSTXSSP :
- 8LS_DForm_R_SI34_RTA5_p<47, (outs), (ins vfrc:$VRS, memri34:$D_RA),
- (ins vfrc:$VRS, memri34_pcrel:$D_RA),
- "pstxssp $VRS, $D_RA", IIC_LdStLFD>;
- defm PSTXSD :
- 8LS_DForm_R_SI34_RTA5_p<46, (outs), (ins vfrc:$VRS, memri34:$D_RA),
- (ins vfrc:$VRS, memri34_pcrel:$D_RA),
- "pstxsd $VRS, $D_RA", IIC_LdStLFD>;
- let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
- defm PSTB8 :
- MLS_DForm_R_SI34_RTA5_MEM_p<38, (outs), (ins g8rc:$RS, memri34:$D_RA),
- (ins g8rc:$RS, memri34_pcrel:$D_RA),
- "pstb $RS, $D_RA", IIC_LdStLFD>;
- defm PSTH8 :
- MLS_DForm_R_SI34_RTA5_MEM_p<44, (outs), (ins g8rc:$RS, memri34:$D_RA),
- (ins g8rc:$RS, memri34_pcrel:$D_RA),
- "psth $RS, $D_RA", IIC_LdStLFD>;
- defm PSTW8 :
- MLS_DForm_R_SI34_RTA5_MEM_p<36, (outs), (ins g8rc:$RS, memri34:$D_RA),
- (ins g8rc:$RS, memri34_pcrel:$D_RA),
- "pstw $RS, $D_RA", IIC_LdStLFD>;
- }
- defm PSTB :
- MLS_DForm_R_SI34_RTA5_MEM_p<38, (outs), (ins gprc:$RS, memri34:$D_RA),
- (ins gprc:$RS, memri34_pcrel:$D_RA),
- "pstb $RS, $D_RA", IIC_LdStLFD>;
- defm PSTH :
- MLS_DForm_R_SI34_RTA5_MEM_p<44, (outs), (ins gprc:$RS, memri34:$D_RA),
- (ins gprc:$RS, memri34_pcrel:$D_RA),
- "psth $RS, $D_RA", IIC_LdStLFD>;
- defm PSTW :
- MLS_DForm_R_SI34_RTA5_MEM_p<36, (outs), (ins gprc:$RS, memri34:$D_RA),
- (ins gprc:$RS, memri34_pcrel:$D_RA),
- "pstw $RS, $D_RA", IIC_LdStLFD>;
- defm PSTD :
- 8LS_DForm_R_SI34_RTA5_p<61, (outs), (ins g8rc:$RS, memri34:$D_RA),
- (ins g8rc:$RS, memri34_pcrel:$D_RA),
- "pstd $RS, $D_RA", IIC_LdStLFD>;
- }
-}
-
-// Multiclass definitions for MMA accumulator instructions.
-// ----------------------------------------------------------------------------
-
-// Defines 2 unmasked instructions where the xo field for acc/non-acc version
-// is even/odd.
-multiclass ACC_UM_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
- string asmstr> {
- let Predicates = [MMA] in {
- def NAME :
- XX3Form_AT3_XAB6<opcode, !or(xo, 0x01), (outs acc:$AT), IOL,
- !strconcat(asmbase#" ", asmstr), IIC_VecFP, []>,
- RegConstraint<"@earlyclobber $AT">;
- def PP :
- XX3Form_AT3_XAB6<opcode, xo, (outs acc:$AT), !con((ins acc:$ATi), IOL),
- !strconcat(asmbase#"pp ", asmstr), IIC_VecFP, []>,
- RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
- }
-}
-
-// Defines 4 instructions, masked/unmasked with masks 8, 4, 4 bits.
-// The XO field for acc/non-acc version is even/odd.
-multiclass ACC_UM_M844_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
- string asmstr> {
- defm NAME : ACC_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>;
- let Predicates = [MMA, PrefixInstrs] in {
- def PM#NAME :
- MMIRR_XX3Form_XY4P8_XAB6<
- opcode, !or(xo, 0x01), (outs acc:$AT),
- !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u8imm:$PMSK)),
- !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"),
- IIC_VecFP, []>,
- RegConstraint<"@earlyclobber $AT">;
- def PM#NAME#PP :
- MMIRR_XX3Form_XY4P8_XAB6<
- opcode, xo, (outs acc:$AT),
- !con((ins acc:$ATi),
- !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u8imm:$PMSK))),
- !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"),
- IIC_VecFP, []>,
- RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
- }
-}
-
-// Defines 4 instructions, masked/unmasked with masks 4, 4, 4 bits.
-// The XO field for acc/non-acc version is even/odd.
-multiclass ACC_UM_M444_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
- string asmstr> {
- defm NAME : ACC_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>;
- let Predicates = [MMA, PrefixInstrs] in {
- def PM#NAME :
- MMIRR_XX3Form_XYP4_XAB6<
- opcode, !or(xo, 0x01), (outs acc:$AT),
- !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK)),
- !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"),
- IIC_VecFP, []>,
- RegConstraint<"@earlyclobber $AT">;
- def PM#NAME#PP :
- MMIRR_XX3Form_XYP4_XAB6<
- opcode, xo, (outs acc:$AT),
- !con((ins acc:$ATi),
- !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK))),
- !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"),
- IIC_VecFP, []>,
- RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
- }
-}
-
-// Defines 4 instructions, masked/unmasked with masks 2, 4, 4 bits.
-// The XO field for acc/non-acc version is even/odd.
-multiclass ACC_UM_M244_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
- string asmstr> {
- defm NAME : ACC_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>;
- let Predicates = [MMA, PrefixInstrs] in {
- def PM#NAME :
- MMIRR_XX3Form_XY4P2_XAB6<
- opcode, !or(xo, 0x01), (outs acc:$AT),
- !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK)),
- !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"),
- IIC_VecFP, []>,
- RegConstraint<"@earlyclobber $AT">;
- def PM#NAME#PP :
- MMIRR_XX3Form_XY4P2_XAB6<
- opcode, xo, (outs acc:$AT),
- !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
- !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"),
- IIC_VecFP, []>,
- RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
- }
-}
-
-// Defines 4 instructions, masked/unmasked with masks 2, 4, 4 bits.
-// Upper nibble of XO field for acc/non-acc version is 0x4/0x6.
-multiclass ACC_UM_M244_XO46<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
- string asmstr> {
- let Predicates = [MMA] in {
- def NAME :
- XX3Form_AT3_XAB6<opcode, xo, (outs acc:$AT), IOL,
- !strconcat(asmbase#" ", asmstr), IIC_VecFP, []>,
- RegConstraint<"@earlyclobber $AT">;
- def PP :
- XX3Form_AT3_XAB6<
- opcode, !or(xo, 0x20), (outs acc:$AT), !con((ins acc:$ATi), IOL),
- !strconcat(asmbase#"pp ", asmstr), IIC_VecFP, []>,
- RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
- }
- let Predicates = [MMA, PrefixInstrs] in {
- def PM#NAME :
- MMIRR_XX3Form_XY4P2_XAB6<
- opcode, xo, (outs acc:$AT),
- !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK)),
- !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"),
- IIC_VecFP, []>,
- RegConstraint<"@earlyclobber $AT">;
- def PM#NAME#PP :
- MMIRR_XX3Form_XY4P2_XAB6<
- opcode, !or(xo, 0x20), (outs acc:$AT),
- !con((ins acc:$ATi),
- !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
- !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"),
- IIC_VecFP, []>,
- RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
- }
-}
-
-// Defines 10 instructions, operand negating, unmasked, masked with 2, 4, 4
-// bits. Upper nibble are masked with 0x8, 0x4, 0xC for negating operands.
-multiclass ACC_NEG_UM_M244_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
- string asmbase, string asmstr> {
- defm NAME : ACC_UM_M244_XOEO<opcode, xo, IOL, asmbase, asmstr>;
- let Predicates = [MMA] in {
- def PN : XX3Form_AT3_XAB6<
- opcode, !or(xo, 0x80), (outs acc:$AT), !con((ins acc:$ATi), IOL),
- !strconcat(asmbase#"pn ", asmstr), IIC_VecFP, []>,
- RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
- def NP : XX3Form_AT3_XAB6<
- opcode, !or(xo, 0x40), (outs acc:$AT), !con((ins acc:$ATi), IOL),
- !strconcat(asmbase#"np ", asmstr), IIC_VecFP, []>,
- RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
- def NN : XX3Form_AT3_XAB6<
- opcode, !or(xo, 0xC0), (outs acc:$AT), !con((ins acc:$ATi), IOL),
- !strconcat(asmbase#"nn ", asmstr), IIC_VecFP, []>,
- RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
- }
- let Predicates = [MMA, PrefixInstrs] in {
- def PM#NAME#PN :
- MMIRR_XX3Form_XY4P2_XAB6<
- opcode, !or(xo, 0x80), (outs acc:$AT),
- !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
- !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK, $PMSK"),
- IIC_VecFP, []>,
- RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
- def PM#NAME#NP :
- MMIRR_XX3Form_XY4P2_XAB6<
- opcode, !or(xo, 0x40), (outs acc:$AT),
- !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
- !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK, $PMSK"),
- IIC_VecFP, []>,
- RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
- def PM#NAME#NN :
- MMIRR_XX3Form_XY4P2_XAB6<
- opcode, !or(xo, 0xC0), (outs acc:$AT),
- !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
- !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK, $PMSK"),
- IIC_VecFP, []>,
- RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
- }
-}
-
-// Defines 5 instructions, unmasked, operand negating.
-// Upper nibble are masked with 0x8, 0x4, 0xC for negating operands.
-multiclass ACC_NEG_UM_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
- string asmbase, string asmstr> {
- defm NAME : ACC_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>;
- let Predicates = [MMA] in {
- def PN : XX3Form_AT3_XAB6<opcode, !or(xo, 0x80), (outs acc:$AT),
- !con((ins acc:$ATi), IOL),
- !strconcat(asmbase#"pn ", asmstr), IIC_VecFP, []>,
- RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
- def NP : XX3Form_AT3_XAB6<opcode, !or(xo, 0x40), (outs acc:$AT),
- !con((ins acc:$ATi), IOL),
- !strconcat(asmbase#"np ", asmstr), IIC_VecFP, []>,
- RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
- def NN : XX3Form_AT3_XAB6<opcode, !or(xo, 0xC0), (outs acc:$AT),
- !con((ins acc:$ATi), IOL),
- !strconcat(asmbase#"nn ", asmstr), IIC_VecFP, []>,
- RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
- }
-}
-
-// Defines 10 instructions, operand negating, unmasked, masked with 4, 4 bits.
-// Upper nibble are masked with 0x8, 0x4, 0xC for negating operands.
-multiclass ACC_NEG_UM_M44_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
- string asmbase, string asmstr> {
- defm NAME : ACC_NEG_UM_XOM84C<opcode, xo, IOL, asmbase, asmstr>;
- let Predicates = [MMA, PrefixInstrs] in {
- def PM#NAME :
- MMIRR_XX3Form_XY4_XAB6<
- opcode, !or(xo, 0x01), (outs acc:$AT),
- !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK)),
- !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK"),
- IIC_VecFP, []>,
- RegConstraint<"@earlyclobber $AT">;
- def PM#NAME#PP :
- MMIRR_XX3Form_XY4_XAB6<
- opcode, xo, (outs acc:$AT),
- !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))),
- !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK"),
- IIC_VecFP, []>,
- RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
- def PM#NAME#PN :
- MMIRR_XX3Form_XY4_XAB6<
- opcode, !or(xo, 0x80), (outs acc:$AT),
- !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))),
- !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK"),
- IIC_VecFP, []>,
- RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
- def PM#NAME#NP :
- MMIRR_XX3Form_XY4_XAB6<
- opcode, !or(xo, 0x40), (outs acc:$AT),
- !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))),
- !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK"),
- IIC_VecFP, []>,
- RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
- def PM#NAME#NN :
- MMIRR_XX3Form_XY4_XAB6<
- opcode, !or(xo, 0xC0), (outs acc:$AT),
- !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))),
- !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK"),
- IIC_VecFP, []>,
- RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
- }
-}
-
-// Defines 10 instructions, operand negating, unmasked, masked with 4, 2 bits.
-// Upper nibble are masked with 0x8, 0x4, 0xC for negating operands.
-multiclass ACC_NEG_UM_M42_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
- string asmbase, string asmstr> {
- defm NAME : ACC_NEG_UM_XOM84C<opcode, xo, IOL, asmbase, asmstr>;
- let Predicates = [MMA, PrefixInstrs] in {
- def PM#NAME :
- MMIRR_XX3Form_X4Y2_XAB6<
- opcode, !or(xo, 0x01), (outs acc:$AT),
- !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK)),
- !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK"),
- IIC_VecFP, []>,
- RegConstraint<"@earlyclobber $AT">;
- def PM#NAME#PP :
- MMIRR_XX3Form_X4Y2_XAB6<
- opcode, xo, (outs acc:$AT),
- !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))),
- !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK"),
- IIC_VecFP, []>,
- RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
- def PM#NAME#PN :
- MMIRR_XX3Form_X4Y2_XAB6<
- opcode, !or(xo, 0x80), (outs acc:$AT),
- !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))),
- !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK"),
- IIC_VecFP, []>,
- RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
- def PM#NAME#NP :
- MMIRR_XX3Form_X4Y2_XAB6<
- opcode, !or(xo, 0x40), (outs acc:$AT),
- !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))),
- !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK"),
- IIC_VecFP, []>,
- RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
- def PM#NAME#NN :
- MMIRR_XX3Form_X4Y2_XAB6<
- opcode, !or(xo, 0xC0), (outs acc:$AT),
- !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))),
- !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK"),
- IIC_VecFP, []>,
- RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
- }
-}
-
-// End of class definitions.
-//-----------------------------------------------------------------------------
-
-let Predicates = [MMA] in {
- def XXMFACC :
- XForm_AT3<31, 0, 177, (outs acc:$ASo), (ins acc:$AS), "xxmfacc $AS",
- IIC_VecGeneral,
- [(set v512i1:$ASo, (int_ppc_mma_xxmfacc v512i1:$AS))]>,
- RegConstraint<"$ASo = $AS">, NoEncode<"$ASo">;
- def XXMTACC :
- XForm_AT3<31, 1, 177, (outs acc:$AT), (ins acc:$ATi), "xxmtacc $AT",
- IIC_VecGeneral,
- [(set v512i1:$AT, (int_ppc_mma_xxmtacc v512i1:$ATi))]>,
- RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
- def KILL_PAIR : PPCPostRAExpPseudo<(outs vsrprc:$XTp), (ins vsrprc:$XSp),
- "#KILL_PAIR", []>,
- RegConstraint<"$XTp = $XSp">;
- def BUILD_UACC : PPCPostRAExpPseudo<(outs acc:$AT), (ins uacc:$AS),
- "#BUILD_UACC $AT, $AS", []>;
- // We define XXSETACCZ as rematerializable to undo CSE of that intrinsic in
- // the backend. We avoid CSE here because it generates a copy of the acc
- // register and this copy is more expensive than calling the intrinsic again.
- let isAsCheapAsAMove = 1, isReMaterializable = 1 in {
- def XXSETACCZ :
- XForm_AT3<31, 3, 177, (outs acc:$AT), (ins), "xxsetaccz $AT", IIC_VecGeneral,
- [(set v512i1:$AT, (int_ppc_mma_xxsetaccz))]>;
- }
- def XVI8GER4SPP :
- XX3Form_AT3_XAB6<59, 99, (outs acc:$AT), (ins acc:$ATi, vsrc:$XA, vsrc:$XB),
- "xvi8ger4spp $AT, $XA, $XB", IIC_VecGeneral, []>,
- RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
- let mayStore = 1 in {
- def SPILL_ACC: PPCEmitTimePseudo<(outs), (ins acc:$AT, memrix16:$dst),
- "#SPILL_ACC", []>;
- def SPILL_UACC: PPCEmitTimePseudo<(outs), (ins uacc:$AT, memrix16:$dst),
- "#SPILL_UACC", []>;
- }
- let mayLoad = 1, hasSideEffects = 0 in {
- def RESTORE_ACC: PPCEmitTimePseudo<(outs acc:$AT), (ins memrix16:$src),
- "#RESTORE_ACC", []>;
- def RESTORE_UACC: PPCEmitTimePseudo<(outs uacc:$AT), (ins memrix16:$src),
- "#RESTORE_UACC", []>;
- }
-}
-
-let Predicates = [MMA, PrefixInstrs] in {
- def PMXVI8GER4SPP :
- MMIRR_XX3Form_XYP4_XAB6<59, 99, (outs acc:$AT),
- (ins acc:$ATi, vsrc:$XA,vsrc:$XB, u4imm:$XMSK,
- u4imm:$YMSK, u4imm:$PMSK),
- "pmxvi8ger4spp $AT, $XA, $XB, $XMSK, $YMSK, $PMSK",
- IIC_VecGeneral, []>,
- RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
-}
-
-// MMA accumulating/non-accumulating instructions.
-//------------------------------------------------------------------------------
-
-// XVBF16GER2, XVBF16GER2PP, XVBF16GER2PN, XVBF16GER2NP, XVBF16GER2NN
-// PMXVBF16GER2, PMXVBF16GER2PP, PMXVBF16GER2PN, PMXVBF16GER2NP, PMXVBF16GER2NN
-defm XVBF16GER2 : ACC_NEG_UM_M244_XOM84C<59, 50, (ins vsrc:$XA, vsrc:$XB),
- "xvbf16ger2", "$AT, $XA, $XB">;
-
-// XVI4GER8, XVI4GER8PP, PMXVI4GER8, PMXVI4GER8PP
-defm XVI4GER8 : ACC_UM_M844_XOEO<59, 34, (ins vsrc:$XA, vsrc:$XB),
- "xvi4ger8", "$AT, $XA, $XB">;
-
-// XVI8GER4, XVI8GER4PP, PMXVI8GER4, PMXVI8GER4PP
-defm XVI8GER4 : ACC_UM_M444_XOEO<59, 2, (ins vsrc:$XA, vsrc:$XB),
- "xvi8ger4", "$AT, $XA, $XB">;
-
-// XVI16GER2, XVI16GER2PP, PMXVI16GER2, PMXVI16GER2PP
-defm XVI16GER2 : ACC_UM_M244_XO46<59, 75, (ins vsrc:$XA, vsrc:$XB),
- "xvi16ger2", "$AT, $XA, $XB">;
-
-// XVI16GER2S, XVI16GER2SPP, PMXVI16GER2S, PMXVI16GER2SPP
-defm XVI16GER2S : ACC_UM_M244_XOEO<59, 42, (ins vsrc:$XA, vsrc:$XB),
- "xvi16ger2s", "$AT, $XA, $XB">;
-
-// XVF16GER2, XVF16GER2PP, XVF16GER2PN, XVF16GER2NP, XVF16GER2NN
-// PMXVF16GER2, PMXVF16GER2PP, PMXVF16GER2PN, PMXVF16GER2NP, PMXVF16GER2NN
-defm XVF16GER2 : ACC_NEG_UM_M244_XOM84C<59, 18, (ins vsrc:$XA, vsrc:$XB),
- "xvf16ger2", "$AT, $XA, $XB">;
-
-// XVF32GER, XVF32GERPP, XVF32GERPN, XVF32GERNP, XVF32GERPP
-// PMXVF32GER, PMXVF32GERPP, PMXVF32GERPN, PMXVF32GERNP, PMXVF32GERPP
-defm XVF32GER : ACC_NEG_UM_M44_XOM84C<59, 26, (ins vsrc:$XA, vsrc:$XB),
- "xvf32ger", "$AT, $XA, $XB">;
-
-// XVF64GER, XVF64GERPP, XVF64GERPN, XVF64GERNP, XVF64GERNN
-// PMXVF64GER, PMXVF64GERPP, PMXVF64GERPN, PMXVF64GERNP, PMXVF64GERNN
-defm XVF64GER : ACC_NEG_UM_M42_XOM84C<59, 58, (ins vsrpevenrc:$XA, vsrc:$XB),
- "xvf64ger", "$AT, $XA, $XB">;
-//------------------------------------------------------------------------------
-
-// MMA Intrinsics
-let Predicates = [MMA] in {
- def : Pat<(v512i1 (int_ppc_mma_xvi4ger8 v16i8:$XA, v16i8:$XB)),
- (XVI4GER8 RCCp.AToVSRC, RCCp.BToVSRC)>;
- def : Pat<(v512i1 (int_ppc_mma_xvi4ger8pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
- (XVI4GER8PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
-
- def : Pat<(v512i1 (int_ppc_mma_xvi8ger4 v16i8:$XA, v16i8:$XB)),
- (XVI8GER4 RCCp.AToVSRC, RCCp.BToVSRC)>;
- def : Pat<(v512i1 (int_ppc_mma_xvi8ger4pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
- (XVI8GER4PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
-
- def : Pat<(v512i1 (int_ppc_mma_xvi16ger2s v16i8:$XA, v16i8:$XB)),
- (XVI16GER2S RCCp.AToVSRC, RCCp.BToVSRC)>;
- def : Pat<(v512i1 (int_ppc_mma_xvi16ger2spp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
- (XVI16GER2SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
-
- def : Pat<(v512i1 (int_ppc_mma_xvf16ger2 v16i8:$XA, v16i8:$XB)),
- (XVF16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>;
- def : Pat<(v512i1 (int_ppc_mma_xvf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
- (XVF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
- def : Pat<(v512i1 (int_ppc_mma_xvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
- (XVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
- def : Pat<(v512i1 (int_ppc_mma_xvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
- (XVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
- def : Pat<(v512i1 (int_ppc_mma_xvf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
- (XVF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
-
- def : Pat<(v512i1 (int_ppc_mma_xvf32ger v16i8:$XA, v16i8:$XB)),
- (XVF32GER RCCp.AToVSRC, RCCp.BToVSRC)>;
- def : Pat<(v512i1 (int_ppc_mma_xvf32gerpp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
- (XVF32GERPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
- def : Pat<(v512i1 (int_ppc_mma_xvf32gerpn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
- (XVF32GERPN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
- def : Pat<(v512i1 (int_ppc_mma_xvf32gernp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
- (XVF32GERNP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
- def : Pat<(v512i1 (int_ppc_mma_xvf32gernn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
- (XVF32GERNN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
- def : Pat<(v512i1 (int_ppc_mma_xvf64ger v256i1:$XA, v16i8:$XB)),
- (XVF64GER $XA, RCCp.BToVSRC)>;
- def : Pat<(v512i1 (int_ppc_mma_xvf64gerpp v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
- (XVF64GERPP $ATi, $XA, RCCp.BToVSRC)>;
- def : Pat<(v512i1 (int_ppc_mma_xvf64gerpn v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
- (XVF64GERPN $ATi, $XA, RCCp.BToVSRC)>;
- def : Pat<(v512i1 (int_ppc_mma_xvf64gernp v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
- (XVF64GERNP $ATi, $XA, RCCp.BToVSRC)>;
- def : Pat<(v512i1 (int_ppc_mma_xvf64gernn v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
- (XVF64GERNN $ATi, $XA, RCCp.BToVSRC)>;
-
- def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2 v16i8:$XA, v16i8:$XB)),
- (XVBF16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>;
- def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
- (XVBF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
- def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
- (XVBF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
- def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
- (XVBF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
- def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
- (XVBF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
- def : Pat<(v512i1 (int_ppc_mma_xvi16ger2 v16i8:$XA, v16i8:$XB)),
- (XVI16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>;
- def : Pat<(v512i1 (int_ppc_mma_xvi16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
- (XVI16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
- def : Pat<(v512i1 (int_ppc_mma_xvi8ger4spp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
- (XVI8GER4SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
-}
-
-// MMA Intrinsics
-let Predicates = [MMA, PrefixInstrs] in {
- def : Pat<(v512i1 (int_ppc_mma_pmxvi4ger8 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
- Msk4Imm:$YMSK, Msk8Imm:$PMSK)),
- (PMXVI4GER8 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
- Msk4Imm:$YMSK, Msk8Imm:$PMSK)>;
- def : Pat<(v512i1 (int_ppc_mma_pmxvi4ger8pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
- Msk4Imm:$XMSK, Msk4Imm:$YMSK,
- Msk8Imm:$PMSK)),
- (PMXVI4GER8PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
- Msk4Imm:$YMSK, Msk8Imm:$PMSK)>;
-
- def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
- Msk4Imm:$YMSK, Msk4Imm:$PMSK)),
- (PMXVI8GER4 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
- Msk4Imm:$YMSK, Msk4Imm:$PMSK)>;
- def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
- Msk4Imm:$XMSK, Msk4Imm:$YMSK,
- Msk4Imm:$PMSK)),
- (PMXVI8GER4PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
- Msk4Imm:$YMSK, Msk4Imm:$PMSK)>;
-
- def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2s v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
- Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
- (PMXVI16GER2S RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
- Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
- def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2spp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
- Msk4Imm:$XMSK, Msk4Imm:$YMSK,
- Msk2Imm:$PMSK)),
- (PMXVI16GER2SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
- Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
- def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
- Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
- (PMXVF16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
- Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
- def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
- Msk4Imm:$XMSK, Msk4Imm:$YMSK,
- Msk2Imm:$PMSK)),
- (PMXVF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
- Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
- def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
- Msk4Imm:$XMSK, Msk4Imm:$YMSK,
- Msk2Imm:$PMSK)),
- (PMXVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
- Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
- def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB,
- Msk4Imm:$XMSK, Msk4Imm:$YMSK,
- Msk2Imm:$PMSK)),
- (PMXVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
- Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
- def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
- Msk4Imm:$XMSK, Msk4Imm:$YMSK,
- Msk2Imm:$PMSK)),
- (PMXVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
- Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
- def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB,
- Msk4Imm:$XMSK, Msk4Imm:$YMSK,
- Msk2Imm:$PMSK)),
- (PMXVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
- Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
- def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
- Msk4Imm:$XMSK, Msk4Imm:$YMSK,
- Msk2Imm:$PMSK)),
- (PMXVF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
- Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
-
- def : Pat<(v512i1 (int_ppc_mma_pmxvf32ger v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
- Msk4Imm:$YMSK)),
- (PMXVF32GER RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
- Msk4Imm:$YMSK)>;
- def : Pat<(v512i1 (int_ppc_mma_pmxvf32gerpp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
- Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
- (PMXVF32GERPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
- Msk4Imm:$YMSK)>;
- def : Pat<(v512i1 (int_ppc_mma_pmxvf32gerpn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
- Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
- (PMXVF32GERPN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
- Msk4Imm:$YMSK)>;
- def : Pat<(v512i1 (int_ppc_mma_pmxvf32gernp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
- Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
- (PMXVF32GERNP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
- Msk4Imm:$YMSK)>;
- def : Pat<(v512i1 (int_ppc_mma_pmxvf32gernn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
- Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
- (PMXVF32GERNN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
- Msk4Imm:$YMSK)>;
-
- def : Pat<(v512i1 (int_ppc_mma_pmxvf64ger v256i1:$XA, v16i8:$XB, Msk4Imm:$XMSK,
- Msk2Imm:$YMSK)),
- (PMXVF64GER $XA, RCCp.BToVSRC, Msk4Imm:$XMSK, Msk2Imm:$YMSK)>;
- def : Pat<(v512i1 (int_ppc_mma_pmxvf64gerpp v512i1:$ATi, v256i1:$XA, v16i8:$XB,
- Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
- (PMXVF64GERPP $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
- Msk2Imm:$YMSK)>;
- def : Pat<(v512i1 (int_ppc_mma_pmxvf64gerpn v512i1:$ATi, v256i1:$XA, v16i8:$XB,
- Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
- (PMXVF64GERPN $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
- Msk2Imm:$YMSK)>;
- def : Pat<(v512i1 (int_ppc_mma_pmxvf64gernp v512i1:$ATi, v256i1:$XA, v16i8:$XB,
- Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
- (PMXVF64GERNP $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
- Msk2Imm:$YMSK)>;
- def : Pat<(v512i1 (int_ppc_mma_pmxvf64gernn v512i1:$ATi, v256i1:$XA, v16i8:$XB,
- Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
- (PMXVF64GERNN $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
- Msk2Imm:$YMSK)>;
-
- def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
- Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
- (PMXVBF16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
- Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
- def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
- Msk4Imm:$XMSK, Msk4Imm:$YMSK,
- Msk2Imm:$PMSK)),
- (PMXVBF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
- Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
- def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
- Msk4Imm:$XMSK, Msk4Imm:$YMSK,
- Msk2Imm:$PMSK)),
- (PMXVBF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
- Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
- def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB,
- Msk4Imm:$XMSK, Msk4Imm:$YMSK,
- Msk2Imm:$PMSK)),
- (PMXVBF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
- Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
- def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
- Msk4Imm:$XMSK, Msk4Imm:$YMSK,
- Msk2Imm:$PMSK)),
- (PMXVBF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
- Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
- def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
- Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
- (PMXVI16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
- Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
- def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4spp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
- Msk4Imm:$XMSK, Msk4Imm:$YMSK,
- Msk2Imm:$PMSK)),
- (PMXVI8GER4SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
- Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
- def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
- Msk4Imm:$XMSK, Msk4Imm:$YMSK,
- Msk2Imm:$PMSK)),
- (PMXVI16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
- Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
-}
def Concats {
dag VecsToVecPair0 =
@@ -1584,37 +1037,6 @@ def Concats {
(v256i1 (INSERT_SUBREG
(INSERT_SUBREG (IMPLICIT_DEF), $vs2, sub_vsx1),
$vs3, sub_vsx0));
- dag VecsToVecQuad =
- (BUILD_UACC (INSERT_SUBREG
- (INSERT_SUBREG (v512i1 (IMPLICIT_DEF)),
- (KILL_PAIR VecsToVecPair0), sub_pair0),
- (KILL_PAIR VecsToVecPair1), sub_pair1));
-}
-
-def Extracts {
- dag Pair0 = (v256i1 (EXTRACT_SUBREG $v, sub_pair0));
- dag Pair1 = (v256i1 (EXTRACT_SUBREG $v, sub_pair1));
- dag Vec0 = (v4i32 (EXTRACT_SUBREG Pair0, sub_vsx0));
- dag Vec1 = (v4i32 (EXTRACT_SUBREG Pair0, sub_vsx1));
- dag Vec2 = (v4i32 (EXTRACT_SUBREG Pair1, sub_vsx0));
- dag Vec3 = (v4i32 (EXTRACT_SUBREG Pair1, sub_vsx1));
-}
-
-let Predicates = [MMA] in {
- def : Pat<(v512i1 (PPCAccBuild v4i32:$vs1, v4i32:$vs0, v4i32:$vs3, v4i32:$vs2)),
- (XXMTACC Concats.VecsToVecQuad)>;
- def : Pat<(v512i1 (int_ppc_mma_assemble_acc v16i8:$vs1, v16i8:$vs0,
- v16i8:$vs3, v16i8:$vs2)),
- (XXMTACC Concats.VecsToVecQuad)>;
- def : Pat<(v512i1 (PPCxxmfacc v512i1:$AS)), (XXMFACC acc:$AS)>;
- def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, 0)),
- Extracts.Vec0>;
- def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, 1)),
- Extracts.Vec1>;
- def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, 2)),
- Extracts.Vec2>;
- def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, 3)),
- Extracts.Vec3>;
}
let Predicates = [PairedVectorMemops] in {
@@ -1919,7 +1341,7 @@ let Predicates = [IsISA3_1] in {
def VSLDBI : VNForm_VTAB5_SD3<22, 0, (outs vrrc:$VRT),
(ins vrrc:$VRA, vrrc:$VRB, u3imm:$SH),
"vsldbi $VRT, $VRA, $VRB, $SH",
- IIC_VecGeneral,
+ IIC_VecGeneral,
[(set v16i8:$VRT,
(int_ppc_altivec_vsldbi v16i8:$VRA,
v16i8:$VRB,
@@ -2393,13 +1815,17 @@ let Predicates = [IsISA3_1] in {
def XSCVQPSQZ : X_VT5_XO5_VB5<63, 8, 836, "xscvqpsqz", []>;
def XSCVUQQP : X_VT5_XO5_VB5<63, 3, 836, "xscvuqqp", []>;
def XSCVSQQP : X_VT5_XO5_VB5<63, 11, 836, "xscvsqqp", []>;
+ def LXVKQ : XForm_XT6_IMM5<60, 31, 360, (outs vsrc:$XT), (ins u5imm:$UIM),
+ "lxvkq $XT, $UIM", IIC_VecGeneral, []>;
}
let Predicates = [IsISA3_1, HasVSX] in {
def XVCVSPBF16 : XX2_XT6_XO5_XB6<60, 17, 475, "xvcvspbf16", vsrc, []>;
def XVCVBF16SPN : XX2_XT6_XO5_XB6<60, 16, 475, "xvcvbf16spn", vsrc, []>;
- def XSMAXCQP : X_VT5_VA5_VB5<63, 676, "xsmaxcqp", []>;
- def XSMINCQP : X_VT5_VA5_VB5<63, 740, "xsmincqp", []>;
+ def XSMAXCQP : X_VT5_VA5_VB5<63, 676, "xsmaxcqp",
+ [(set f128:$vT, (PPCxsmaxc f128:$vA, f128:$vB))]>;
+ def XSMINCQP : X_VT5_VA5_VB5<63, 740, "xsmincqp",
+ [(set f128:$vT, (PPCxsminc f128:$vA, f128:$vB))]>;
}
// Multiclass defining patterns for Set Boolean Extension Reverse Instructions.
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 110f7d79fbc5..6e562498dcf9 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -51,35 +51,6 @@
// ** printing (for example: xxswapd for xxpermdi with 0x2 as the imm). **
// ****************************************************************************
-def PPCRegVSRCAsmOperand : AsmOperandClass {
- let Name = "RegVSRC"; let PredicateMethod = "isVSRegNumber";
-}
-def vsrc : RegisterOperand<VSRC> {
- let ParserMatchClass = PPCRegVSRCAsmOperand;
-}
-
-def PPCRegVSFRCAsmOperand : AsmOperandClass {
- let Name = "RegVSFRC"; let PredicateMethod = "isVSRegNumber";
-}
-def vsfrc : RegisterOperand<VSFRC> {
- let ParserMatchClass = PPCRegVSFRCAsmOperand;
-}
-
-def PPCRegVSSRCAsmOperand : AsmOperandClass {
- let Name = "RegVSSRC"; let PredicateMethod = "isVSRegNumber";
-}
-def vssrc : RegisterOperand<VSSRC> {
- let ParserMatchClass = PPCRegVSSRCAsmOperand;
-}
-
-def PPCRegSPILLTOVSRRCAsmOperand : AsmOperandClass {
- let Name = "RegSPILLTOVSRRC"; let PredicateMethod = "isVSRegNumber";
-}
-
-def spilltovsrrc : RegisterOperand<SPILLTOVSRRC> {
- let ParserMatchClass = PPCRegSPILLTOVSRRCAsmOperand;
-}
-
def SDT_PPCldvsxlh : SDTypeProfile<1, 1, [
SDTCisVT<0, v4f32>, SDTCisPtrTy<1>
]>;
@@ -732,6 +703,11 @@ let hasSideEffects = 0 in {
(outs vsfrc:$XT), (ins vsfrc:$XB),
"xsnabsdp $XT, $XB", IIC_VecFP,
[(set f64:$XT, (fneg (fabs f64:$XB)))]>;
+ let isCodeGenOnly = 1 in
+ def XSNABSDPs : XX2Form<60, 361,
+ (outs vssrc:$XT), (ins vssrc:$XB),
+ "xsnabsdp $XT, $XB", IIC_VecFP,
+ [(set f32:$XT, (fneg (fabs f32:$XB)))]>;
def XSNEGDP : XX2Form<60, 377,
(outs vsfrc:$XT), (ins vsfrc:$XB),
"xsnegdp $XT, $XB", IIC_VecFP,
@@ -2897,10 +2873,32 @@ def : Pat<(v2i64 (PPCvcmp_rec v2i64:$vA, v2i64:$vB, 199)),
// XL Compat builtins.
def : Pat<(int_ppc_fmsub f64:$A, f64:$B, f64:$C), (XSMSUBMDP $A, $B, $C)>;
-def : Pat<(int_ppc_fnmsub f64:$A, f64:$B, f64:$C), (XSNMSUBMDP $A, $B, $C)>;
def : Pat<(int_ppc_fnmadd f64:$A, f64:$B, f64:$C), (XSNMADDMDP $A, $B, $C)>;
def : Pat<(int_ppc_fre f64:$A), (XSREDP $A)>;
def : Pat<(int_ppc_frsqrte vsfrc:$XB), (XSRSQRTEDP $XB)>;
+def : Pat<(int_ppc_fnabs f64:$A), (XSNABSDP $A)>;
+def : Pat<(int_ppc_fnabss f32:$A), (XSNABSDPs $A)>;
+
+// XXMRG[LH]W is a direct replacement for VMRG[LH]W respectively.
+// Prefer the VSX form for greater register range.
+def:Pat<(vmrglw_unary_shuffle v16i8:$vA, undef),
+ (COPY_TO_REGCLASS (XXMRGLW (COPY_TO_REGCLASS $vA, VSRC),
+ (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>;
+def:Pat<(vmrghw_unary_shuffle v16i8:$vA, undef),
+ (COPY_TO_REGCLASS (XXMRGHW (COPY_TO_REGCLASS $vA, VSRC),
+ (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>;
+def:Pat<(vmrglw_shuffle v16i8:$vA, v16i8:$vB),
+ (COPY_TO_REGCLASS (XXMRGLW (COPY_TO_REGCLASS $vA, VSRC),
+ (COPY_TO_REGCLASS $vB, VSRC)), VRRC)>;
+def:Pat<(vmrghw_shuffle v16i8:$vA, v16i8:$vB),
+ (COPY_TO_REGCLASS (XXMRGHW (COPY_TO_REGCLASS $vA, VSRC),
+ (COPY_TO_REGCLASS $vB, VSRC)), VRRC)>;
+def:Pat<(vmrglw_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (COPY_TO_REGCLASS (XXMRGLW (COPY_TO_REGCLASS $vB, VSRC),
+ (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>;
+def:Pat<(vmrghw_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (COPY_TO_REGCLASS (XXMRGHW (COPY_TO_REGCLASS $vB, VSRC),
+ (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>;
} // HasVSX
// Any big endian VSX subtarget.
@@ -3311,7 +3309,6 @@ def : Pat<(v16i8 (bitconvert (v16i8 immAllOnesV))),
// XL Compat builtins.
def : Pat<(int_ppc_fmsubs f32:$A, f32:$B, f32:$C), (XSMSUBMSP $A, $B, $C)>;
-def : Pat<(int_ppc_fnmsubs f32:$A, f32:$B, f32:$C), (XSNMSUBMSP $A, $B, $C)>;
def : Pat<(int_ppc_fnmadds f32:$A, f32:$B, f32:$C), (XSNMADDMSP $A, $B, $C)>;
def : Pat<(int_ppc_fres f32:$A), (XSRESP $A)>;
def : Pat<(i32 (int_ppc_extract_exp f64:$A)),
@@ -3370,6 +3367,15 @@ def : Pat<(f32 (vector_extract v4f32:$S, i32:$Idx)),
def : Pat<(f64 (vector_extract v2f64:$S, i32:$Idx)),
(f64 VectorExtractions.BE_32B_VARIABLE_DOUBLE)>;
+
+defm : ScalToVecWPermute<
+ v4i32, (i32 (load ForceXForm:$src)),
+ (XXSLDWIs (LIWZX ForceXForm:$src), 1),
+ (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64)>;
+defm : ScalToVecWPermute<
+ v4f32, (f32 (load ForceXForm:$src)),
+ (XXSLDWIs (LIWZX ForceXForm:$src), 1),
+ (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64)>;
} // HasVSX, HasP8Vector, IsBigEndian
// Big endian Power8 64Bit VSX subtarget.
@@ -3384,14 +3390,6 @@ def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 ForceXForm:$src)))),
(v2i64 (SUBREG_TO_REG (i64 1), (LIWAX ForceXForm:$src), sub_64))>;
def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 ForceXForm:$src)))),
(v2i64 (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64))>;
-defm : ScalToVecWPermute<
- v4i32, (i32 (load ForceXForm:$src)),
- (XXSLDWIs (LIWZX ForceXForm:$src), 1),
- (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64)>;
-defm : ScalToVecWPermute<
- v4f32, (f32 (load ForceXForm:$src)),
- (XXSLDWIs (LIWZX ForceXForm:$src), 1),
- (SUBREG_TO_REG (i64 1), (LIWZX ForceXForm:$src), sub_64)>;
def : Pat<DWToSPExtractConv.BVU,
(v4f32 (VPKUDUM (XXSLDWI (XVCVUXDSP $S1), (XVCVUXDSP $S1), 3),
diff --git a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
index 0c7be96a0595..4689c0638ca6 100644
--- a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
+++ b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
@@ -117,7 +117,6 @@ using namespace llvm;
static cl::opt<unsigned>
MaxVarsPrep("ppc-formprep-max-vars", cl::Hidden, cl::init(24),
- cl::ZeroOrMore,
cl::desc("Potential common base number threshold per function "
"for PPC loop prep"));
diff --git a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
index 22c5b6c11289..976effb96adc 100644
--- a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -107,7 +107,7 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
if (Subtarget->isUsingPCRelativeCalls()) {
if (MIOpcode == PPC::TAILB || MIOpcode == PPC::TAILB8 ||
MIOpcode == PPC::TCRETURNdi || MIOpcode == PPC::TCRETURNdi8 ||
- MIOpcode == PPC::BL8_NOTOC) {
+ MIOpcode == PPC::BL8_NOTOC || MIOpcode == PPC::BL8_NOTOC_RM) {
RefKind = MCSymbolRefExpr::VK_PPC_NOTOC;
}
if (MO.getTargetFlags() == PPCII::MO_PCREL_OPT_FLAG)
diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index e5fa02bc8ccf..67d91d23962c 100644
--- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -28,6 +28,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachinePostDominators.h"
@@ -985,7 +986,7 @@ bool PPCMIPeephole::simplifyCode() {
LiMI->getOpcode() == PPC::LI8) &&
"Invalid Opcode!");
auto LiImm = LiMI->getOperand(1).getImm(); // save the imm of LI
- LiMI->RemoveOperand(1); // remove the imm of LI
+ LiMI->removeOperand(1); // remove the imm of LI
LiMI->setDesc(TII->get(LiMI->getOpcode() == PPC::LI ? PPC::ADDI
: PPC::ADDI8));
MachineInstrBuilder(*LiMI->getParent()->getParent(), *LiMI)
diff --git a/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
index 782d41f93ae5..9d6dfd16ff9d 100644
--- a/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
@@ -23,6 +23,13 @@ void PPCFunctionInfo::anchor() {}
PPCFunctionInfo::PPCFunctionInfo(const MachineFunction &MF)
: DisableNonVolatileCR(PPCDisableNonVolatileCR) {}
+MachineFunctionInfo *
+PPCFunctionInfo::clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *>
+ &Src2DstMBB) const {
+ return DestMF.cloneInfo<PPCFunctionInfo>(*this);
+}
+
MCSymbol *PPCFunctionInfo::getPICOffsetSymbol(MachineFunction &MF) const {
const DataLayout &DL = MF.getDataLayout();
return MF.getContext().getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) +
diff --git a/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index 07c503d47e98..b918e723de00 100644
--- a/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -153,6 +153,11 @@ private:
public:
explicit PPCFunctionInfo(const MachineFunction &MF);
+ MachineFunctionInfo *
+ clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
+ const override;
+
int getFramePointerSaveIndex() const { return FramePointerSaveIndex; }
void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; }
diff --git a/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp b/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp
index 9d5206f8fd43..58b74c6b8c7a 100644
--- a/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp
@@ -15,6 +15,7 @@
#include "PPCSubtarget.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/CodeGen/MacroFusion.h"
+#include "llvm/CodeGen/ScheduleDAGMutation.h"
using namespace llvm;
namespace {
@@ -266,13 +267,13 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
continue;
auto DepOpIdx = Feature.depOpIdx();
- if (DepOpIdx.hasValue()) {
+ if (DepOpIdx) {
// Checking if the result of the FirstMI is the desired operand of the
// SecondMI if the DepOpIdx is set. Otherwise, ignore it.
if (!matchingRegOps(*FirstMI, 0, SecondMI, *DepOpIdx))
return false;
}
-
+
// Checking more on the instruction operands.
if (checkOpConstraints(Feature.getKind(), *FirstMI, SecondMI))
return true;
diff --git a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
index a8853609a7c8..82c150b988ab 100644
--- a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
@@ -21,6 +21,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/MC/MCContext.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -46,6 +47,10 @@ static cl::opt<bool>
RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(true),
cl::desc("Run pre-emit peephole optimizations."));
+static cl::opt<uint64_t>
+DSCRValue("ppc-set-dscr", cl::Hidden,
+ cl::desc("Set the Data Stream Control Register."));
+
namespace {
static bool hasPCRelativeForm(MachineInstr &Use) {
@@ -407,6 +412,38 @@ static bool hasPCRelativeForm(MachineInstr &Use) {
}
bool runOnMachineFunction(MachineFunction &MF) override {
+ // If the user wants to set the DSCR using command-line options,
+ // load in the specified value at the start of main.
+ if (DSCRValue.getNumOccurrences() > 0 && MF.getName().equals("main") &&
+ MF.getFunction().hasExternalLinkage()) {
+ DSCRValue = (uint32_t)(DSCRValue & 0x01FFFFFF); // 25-bit DSCR mask
+ RegScavenger RS;
+ MachineBasicBlock &MBB = MF.front();
+ // Find an unused GPR according to register liveness
+ RS.enterBasicBlock(MBB);
+ unsigned InDSCR = RS.FindUnusedReg(&PPC::GPRCRegClass);
+ if (InDSCR) {
+ const PPCInstrInfo *TII =
+ MF.getSubtarget<PPCSubtarget>().getInstrInfo();
+ DebugLoc dl;
+ MachineBasicBlock::iterator IP = MBB.begin(); // Insert Point
+ // Copy the 32-bit DSCRValue integer into the GPR InDSCR using LIS and
+ // ORI, then move to DSCR. If the requested DSCR value is contained
+ // in a 16-bit signed number, we can emit a single `LI`, but the
+ // impact of saving one instruction in one function does not warrant
+ // any additional complexity in the logic here.
+ BuildMI(MBB, IP, dl, TII->get(PPC::LIS), InDSCR)
+ .addImm(DSCRValue >> 16);
+ BuildMI(MBB, IP, dl, TII->get(PPC::ORI), InDSCR)
+ .addReg(InDSCR)
+ .addImm(DSCRValue & 0xFFFF);
+ BuildMI(MBB, IP, dl, TII->get(PPC::MTUDSCR))
+ .addReg(InDSCR, RegState::Kill);
+ } else
+ errs() << "Warning: Ran out of registers - Unable to set DSCR as "
+ "requested";
+ }
+
if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole) {
// Remove UNENCODED_NOP even when this pass is disabled.
// This needs to be done unconditionally so we don't emit zeros
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 76b016c0ee79..7349eb8addc9 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -28,6 +28,7 @@
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
@@ -90,6 +91,8 @@ ReportAccMoves("ppc-report-acc-moves",
cl::Hidden, cl::init(false));
#endif
+extern cl::opt<bool> DisableAutoPairedVecSt;
+
static unsigned offsetMinAlignForOpcode(unsigned OpC);
PPCRegisterInfo::PPCRegisterInfo(const PPCTargetMachine &TM)
@@ -113,6 +116,8 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCTargetMachine &TM)
ImmToIdxMap[PPC::STB8] = PPC::STBX8; ImmToIdxMap[PPC::STH8] = PPC::STHX8;
ImmToIdxMap[PPC::STW8] = PPC::STWX8; ImmToIdxMap[PPC::STDU] = PPC::STDUX;
ImmToIdxMap[PPC::ADDI8] = PPC::ADD8;
+ ImmToIdxMap[PPC::LQ] = PPC::LQX_PSEUDO;
+ ImmToIdxMap[PPC::STQ] = PPC::STQX_PSEUDO;
// VSX
ImmToIdxMap[PPC::DFLOADf32] = PPC::LXSSPX;
@@ -183,6 +188,8 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (!TM.isPPC64() && Subtarget.isAIXABI())
report_fatal_error("AnyReg unimplemented on 32-bit AIX.");
if (Subtarget.hasVSX()) {
+ if (Subtarget.pairedVectorMemops())
+ return CSR_64_AllRegs_VSRP_SaveList;
if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI())
return CSR_64_AllRegs_AIX_Dflt_VSX_SaveList;
return CSR_64_AllRegs_VSX_SaveList;
@@ -210,6 +217,9 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (Subtarget.isAIXABI())
report_fatal_error("Cold calling unimplemented on AIX.");
if (TM.isPPC64()) {
+ if (Subtarget.pairedVectorMemops())
+ return SaveR2 ? CSR_SVR64_ColdCC_R2_VSRP_SaveList
+ : CSR_SVR64_ColdCC_VSRP_SaveList;
if (Subtarget.hasAltivec())
return SaveR2 ? CSR_SVR64_ColdCC_R2_Altivec_SaveList
: CSR_SVR64_ColdCC_Altivec_SaveList;
@@ -217,7 +227,9 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
: CSR_SVR64_ColdCC_SaveList;
}
// 32-bit targets.
- if (Subtarget.hasAltivec())
+ if (Subtarget.pairedVectorMemops())
+ return CSR_SVR32_ColdCC_VSRP_SaveList;
+ else if (Subtarget.hasAltivec())
return CSR_SVR32_ColdCC_Altivec_SaveList;
else if (Subtarget.hasSPE())
return CSR_SVR32_ColdCC_SPE_SaveList;
@@ -225,6 +237,8 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
}
// Standard calling convention CSRs.
if (TM.isPPC64()) {
+ if (Subtarget.pairedVectorMemops())
+ return SaveR2 ? CSR_SVR464_R2_VSRP_SaveList : CSR_SVR464_VSRP_SaveList;
if (Subtarget.hasAltivec() &&
(!Subtarget.isAIXABI() || TM.getAIXExtendedAltivecABI())) {
return SaveR2 ? CSR_PPC64_R2_Altivec_SaveList
@@ -239,6 +253,8 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
: CSR_AIX32_SaveList;
return CSR_AIX32_SaveList;
}
+ if (Subtarget.pairedVectorMemops())
+ return CSR_SVR432_VSRP_SaveList;
if (Subtarget.hasAltivec())
return CSR_SVR432_Altivec_SaveList;
else if (Subtarget.hasSPE())
@@ -252,6 +268,8 @@ PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
if (CC == CallingConv::AnyReg) {
if (Subtarget.hasVSX()) {
+ if (Subtarget.pairedVectorMemops())
+ return CSR_64_AllRegs_VSRP_RegMask;
if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI())
return CSR_64_AllRegs_AIX_Dflt_VSX_RegMask;
return CSR_64_AllRegs_VSX_RegMask;
@@ -275,20 +293,32 @@ PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
}
if (CC == CallingConv::Cold) {
- return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_SVR64_ColdCC_Altivec_RegMask
- : CSR_SVR64_ColdCC_RegMask)
- : (Subtarget.hasAltivec() ? CSR_SVR32_ColdCC_Altivec_RegMask
- : (Subtarget.hasSPE()
- ? CSR_SVR32_ColdCC_SPE_RegMask
- : CSR_SVR32_ColdCC_RegMask));
+ if (TM.isPPC64())
+ return Subtarget.pairedVectorMemops()
+ ? CSR_SVR64_ColdCC_VSRP_RegMask
+ : (Subtarget.hasAltivec() ? CSR_SVR64_ColdCC_Altivec_RegMask
+ : CSR_SVR64_ColdCC_RegMask);
+ else
+ return Subtarget.pairedVectorMemops()
+ ? CSR_SVR32_ColdCC_VSRP_RegMask
+ : (Subtarget.hasAltivec()
+ ? CSR_SVR32_ColdCC_Altivec_RegMask
+ : (Subtarget.hasSPE() ? CSR_SVR32_ColdCC_SPE_RegMask
+ : CSR_SVR32_ColdCC_RegMask));
}
- return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_PPC64_Altivec_RegMask
- : CSR_PPC64_RegMask)
- : (Subtarget.hasAltivec()
- ? CSR_SVR432_Altivec_RegMask
- : (Subtarget.hasSPE() ? CSR_SVR432_SPE_RegMask
- : CSR_SVR432_RegMask));
+ if (TM.isPPC64())
+ return Subtarget.pairedVectorMemops()
+ ? CSR_SVR464_VSRP_RegMask
+ : (Subtarget.hasAltivec() ? CSR_PPC64_Altivec_RegMask
+ : CSR_PPC64_RegMask);
+ else
+ return Subtarget.pairedVectorMemops()
+ ? CSR_SVR432_VSRP_RegMask
+ : (Subtarget.hasAltivec()
+ ? CSR_SVR432_Altivec_RegMask
+ : (Subtarget.hasSPE() ? CSR_SVR432_SPE_RegMask
+ : CSR_SVR432_RegMask));
}
const uint32_t*
@@ -463,6 +493,14 @@ bool PPCRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) co
LLVM_DEBUG(dbgs() << "TRUE - Memory operand is X-Form.\n");
return true;
}
+
+ // This is a spill/restore of a quadword.
+ if ((Opcode == PPC::RESTORE_QUADWORD) || (Opcode == PPC::SPILL_QUADWORD)) {
+ LLVM_DEBUG(dbgs() << "Memory Operand: " << InstrInfo->getName(Opcode)
+ << " for register " << printReg(Reg, this) << ".\n");
+ LLVM_DEBUG(dbgs() << "TRUE - Memory operand is a quadword.\n");
+ return true;
+ }
}
LLVM_DEBUG(dbgs() << "FALSE - Scavenging is not required.\n");
return false;
@@ -1082,7 +1120,7 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
MBB.erase(II);
if (SpillsKnownBit && KillsCRBit && !SeenUse) {
Ins->setDesc(TII.get(PPC::UNENCODED_NOP));
- Ins->RemoveOperand(0);
+ Ins->removeOperand(0);
}
}
@@ -1163,6 +1201,59 @@ static void emitAccSpillRestoreInfo(MachineBasicBlock &MBB, bool IsPrimed,
#endif
}
+static void spillRegPairs(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator II, DebugLoc DL,
+ const TargetInstrInfo &TII, Register SrcReg,
+ unsigned FrameIndex, bool IsLittleEndian,
+ bool IsKilled, bool TwoPairs) {
+ unsigned Offset = 0;
+ if (TwoPairs)
+ Offset = IsLittleEndian ? 48 : 0;
+ else
+ Offset = IsLittleEndian ? 16 : 0;
+ Register Reg = (SrcReg > PPC::VSRp15) ? PPC::V0 + (SrcReg - PPC::VSRp16) * 2
+ : PPC::VSL0 + (SrcReg - PPC::VSRp0) * 2;
+ addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXV))
+ .addReg(Reg, getKillRegState(IsKilled)),
+ FrameIndex, Offset);
+ Offset += IsLittleEndian ? -16 : 16;
+ addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXV))
+ .addReg(Reg + 1, getKillRegState(IsKilled)),
+ FrameIndex, Offset);
+ if (TwoPairs) {
+ Offset += IsLittleEndian ? -16 : 16;
+ addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXV))
+ .addReg(Reg + 2, getKillRegState(IsKilled)),
+ FrameIndex, Offset);
+ Offset += IsLittleEndian ? -16 : 16;
+ addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXV))
+ .addReg(Reg + 3, getKillRegState(IsKilled)),
+ FrameIndex, Offset);
+ }
+}
+
+/// Remove any STXVP[X] instructions and split them out into a pair of
+/// STXV[X] instructions if --disable-auto-paired-vec-st is specified on
+/// the command line.
+void PPCRegisterInfo::lowerOctWordSpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const {
+ assert(DisableAutoPairedVecSt &&
+ "Expecting to do this only if paired vector stores are disabled.");
+ MachineInstr &MI = *II; // STXVP <SrcReg>, <offset>
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ DebugLoc DL = MI.getDebugLoc();
+ Register SrcReg = MI.getOperand(0).getReg();
+ bool IsLittleEndian = Subtarget.isLittleEndian();
+ bool IsKilled = MI.getOperand(0).isKill();
+ spillRegPairs(MBB, II, DL, TII, SrcReg, FrameIndex, IsLittleEndian, IsKilled,
+ /* TwoPairs */ false);
+ // Discard the original instruction.
+ MBB.erase(II);
+}
+
/// lowerACCSpilling - Generate the code for spilling the accumulator register.
/// Similarly to other spills/reloads that use pseudo-ops, we do not actually
/// eliminate the FrameIndex here nor compute the stack offset. We simply
@@ -1192,12 +1283,17 @@ void PPCRegisterInfo::lowerACCSpilling(MachineBasicBlock::iterator II,
// adjust the offset of the store that is within the 64-byte stack slot.
if (IsPrimed)
BuildMI(MBB, II, DL, TII.get(PPC::XXMFACC), SrcReg).addReg(SrcReg);
- addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
- .addReg(Reg, getKillRegState(IsKilled)),
- FrameIndex, IsLittleEndian ? 32 : 0);
- addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
- .addReg(Reg + 1, getKillRegState(IsKilled)),
- FrameIndex, IsLittleEndian ? 0 : 32);
+ if (DisableAutoPairedVecSt)
+ spillRegPairs(MBB, II, DL, TII, Reg, FrameIndex, IsLittleEndian, IsKilled,
+ /* TwoPairs */ true);
+ else {
+ addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
+ .addReg(Reg, getKillRegState(IsKilled)),
+ FrameIndex, IsLittleEndian ? 32 : 0);
+ addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
+ .addReg(Reg + 1, getKillRegState(IsKilled)),
+ FrameIndex, IsLittleEndian ? 0 : 32);
+ }
if (IsPrimed && !IsKilled)
BuildMI(MBB, II, DL, TII.get(PPC::XXMTACC), SrcReg).addReg(SrcReg);
@@ -1433,6 +1529,9 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
} else if (OpC == PPC::RESTORE_ACC || OpC == PPC::RESTORE_UACC) {
lowerACCRestore(II, FrameIndex);
return;
+ } else if (OpC == PPC::STXVP && DisableAutoPairedVecSt) {
+ lowerOctWordSpilling(II, FrameIndex);
+ return;
} else if (OpC == PPC::SPILL_QUADWORD) {
lowerQuadwordSpilling(II, FrameIndex);
return;
@@ -1451,7 +1550,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
OpC != TargetOpcode::PATCHPOINT && !ImmToIdxMap.count(OpC);
// Now add the frame object offset to the offset from r1.
- int Offset = MFI.getObjectOffset(FrameIndex);
+ int64_t Offset = MFI.getObjectOffset(FrameIndex);
Offset += MI.getOperand(OffsetOperandNo).getImm();
// If we're not using a Frame Pointer that has been set to the value of the
@@ -1507,17 +1606,21 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
const TargetRegisterClass *RC = is64Bit ? G8RC : GPRC;
Register SRegHi = MF.getRegInfo().createVirtualRegister(RC),
SReg = MF.getRegInfo().createVirtualRegister(RC);
+ unsigned NewOpcode = 0u;
// Insert a set of rA with the full offset value before the ld, st, or add
if (isInt<16>(Offset))
BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LI8 : PPC::LI), SReg)
- .addImm(Offset);
- else {
+ .addImm(Offset);
+ else if (isInt<32>(Offset)) {
BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SRegHi)
- .addImm(Offset >> 16);
+ .addImm(Offset >> 16);
BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::ORI8 : PPC::ORI), SReg)
- .addReg(SRegHi, RegState::Kill)
- .addImm(Offset);
+ .addReg(SRegHi, RegState::Kill)
+ .addImm(Offset);
+ } else {
+ assert(is64Bit && "Huge stack is only supported on PPC64");
+ TII.materializeImmPostRA(MBB, II, dl, SReg, Offset);
}
// Convert into indexed form of the instruction:
@@ -1532,7 +1635,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
OpC != TargetOpcode::INLINEASM_BR) {
assert(ImmToIdxMap.count(OpC) &&
"No indexed form of load or store available!");
- unsigned NewOpcode = ImmToIdxMap.find(OpC)->second;
+ NewOpcode = ImmToIdxMap.find(OpC)->second;
MI.setDesc(TII.get(NewOpcode));
OperandBase = 1;
} else {
@@ -1542,6 +1645,20 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Register StackReg = MI.getOperand(FIOperandNum).getReg();
MI.getOperand(OperandBase).ChangeToRegister(StackReg, false);
MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false, false, true);
+
+ // Since these are not real X-Form instructions, we must
+ // add the registers and access 0(NewReg) rather than
+ // emitting the X-Form pseudo.
+ if (NewOpcode == PPC::LQX_PSEUDO || NewOpcode == PPC::STQX_PSEUDO) {
+ assert(is64Bit && "Quadword loads/stores only supported in 64-bit mode");
+ Register NewReg = MF.getRegInfo().createVirtualRegister(&PPC::G8RCRegClass);
+ BuildMI(MBB, II, dl, TII.get(PPC::ADD8), NewReg)
+ .addReg(SReg, RegState::Kill)
+ .addReg(StackReg);
+ MI.setDesc(TII.get(NewOpcode == PPC::LQX_PSEUDO ? PPC::LQ : PPC::STQ));
+ MI.getOperand(OperandBase + 1).ChangeToRegister(NewReg, false);
+ MI.getOperand(OperandBase).ChangeToImmediate(0);
+ }
}
Register PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index 114f6d0f4c66..aaa841fffa1b 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -130,6 +130,8 @@ public:
void lowerCRBitRestore(MachineBasicBlock::iterator II,
unsigned FrameIndex) const;
+ void lowerOctWordSpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const;
void lowerACCSpilling(MachineBasicBlock::iterator II,
unsigned FrameIndex) const;
void lowerACCRestore(MachineBasicBlock::iterator II,
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
index 044035e0ef29..7892b0d12d01 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -18,8 +18,6 @@ def sub_32 : SubRegIndex<32>;
def sub_64 : SubRegIndex<64>;
def sub_vsx0 : SubRegIndex<128>;
def sub_vsx1 : SubRegIndex<128, 128>;
-def sub_pair0 : SubRegIndex<256>;
-def sub_pair1 : SubRegIndex<256, 256>;
def sub_gp8_x0 : SubRegIndex<64>;
def sub_gp8_x1 : SubRegIndex<64, 64>;
}
@@ -100,21 +98,6 @@ class CRBIT<bits<5> num, string n> : PPCReg<n> {
let HWEncoding{4-0} = num;
}
-// ACC - One of the 8 512-bit VSX accumulators.
-class ACC<bits<3> num, string n, list<Register> subregs> : PPCReg<n> {
- let HWEncoding{2-0} = num;
- let SubRegs = subregs;
-}
-
-// UACC - One of the 8 512-bit VSX accumulators prior to being primed.
-// Without using this register class, the register allocator has no way to
-// differentiate a primed accumulator from an unprimed accumulator.
-// This may result in invalid copies between primed and unprimed accumulators.
-class UACC<bits<3> num, string n, list<Register> subregs> : PPCReg<n> {
- let HWEncoding{2-0} = num;
- let SubRegs = subregs;
-}
-
// VSR Pairs - One of the 32 paired even-odd consecutive VSRs.
class VSRPair<bits<5> num, string n, list<Register> subregs> : PPCReg<n> {
let HWEncoding{4-0} = num;
@@ -272,9 +255,6 @@ def CTR8 : SPR<9, "ctr">, DwarfRegNum<[66, -2]>;
def VRSAVE: SPR<256, "vrsave">, DwarfRegNum<[109]>;
// SPE extra registers
-// SPE Accumulator for multiply-accumulate SPE operations. Never directly
-// accessed, so there's no real encoding for it.
-def SPEACC: DwarfRegNum<[99, 111]>;
def SPEFSCR: SPR<512, "spefscr">, DwarfRegNum<[612, 112]>;
def XER: SPR<1, "xer">, DwarfRegNum<[76]>;
@@ -448,72 +428,6 @@ def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY, XER)> {
let CopyCost = -1;
}
-let SubRegIndices = [sub_pair0, sub_pair1] in {
- def ACC0 : ACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[-1, -1]>;
- def ACC1 : ACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[-1, -1]>;
- def ACC2 : ACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[-1, -1]>;
- def ACC3 : ACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[-1, -1]>;
- def ACC4 : ACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[-1, -1]>;
- def ACC5 : ACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[-1, -1]>;
- def ACC6 : ACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[-1, -1]>;
- def ACC7 : ACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[-1, -1]>;
-}
-def ACCRC : RegisterClass<"PPC", [v512i1], 128, (add ACC0, ACC1, ACC2, ACC3,
- ACC4, ACC5, ACC6, ACC7)> {
- // The AllocationPriority is in the range [0, 63]. Assigned the ACC registers
- // the highest possible priority in this range to force the register allocator
- // to assign these registers first. This is done because the ACC registers
- // must represent 4 advacent vector registers. For example ACC1 must be
- // VS4 - VS7. The value here must be at least 32 as we want to allocate
- // these registers even before we allocate global ranges.
- let AllocationPriority = 63;
- let Size = 512;
-}
-
-let SubRegIndices = [sub_pair0, sub_pair1] in {
- def UACC0 : UACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[-1, -1]>;
- def UACC1 : UACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[-1, -1]>;
- def UACC2 : UACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[-1, -1]>;
- def UACC3 : UACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[-1, -1]>;
- def UACC4 : UACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[-1, -1]>;
- def UACC5 : UACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[-1, -1]>;
- def UACC6 : UACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[-1, -1]>;
- def UACC7 : UACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[-1, -1]>;
-}
-def UACCRC : RegisterClass<"PPC", [v512i1], 128,
- (add UACC0, UACC1, UACC2, UACC3,
- UACC4, UACC5, UACC6, UACC7)> {
- // The AllocationPriority for the UACC registers is still high and must be at
- // least 32 as we want to allocate these registers before we allocate other
- // global ranges. The value must be less than the AllocationPriority of the
- // ACC registers.
- let AllocationPriority = 36;
- let Size = 512;
-}
-
-// FIXME: This allocation order may increase stack frame size when allocating
-// non-volatile registers.
-//
-// Placing Altivec registers first and allocate the rest as underlying VSX
-// ones, to reduce interference with accumulator registers (lower 32 VSRs).
-// This reduces copies when loading for accumulators, which is common use for
-// paired VSX registers.
-def VSRpRC :
- RegisterClass<"PPC", [v256i1], 128,
- (add VSRp17, VSRp18, VSRp16, VSRp19, VSRp20, VSRp21,
- VSRp22, VSRp23, VSRp24, VSRp25, VSRp31, VSRp30,
- VSRp29, VSRp28, VSRp27, VSRp26,
- (sequence "VSRp%u", 0, 6),
- (sequence "VSRp%u", 15, 7))> {
- // Give the VSRp registers a non-zero AllocationPriority. The value is less
- // than 32 as these registers should not always be allocated before global
- // ranges and the value should be less than the AllocationPriority - 32 for
- // the UACC registers. Even global VSRp registers should be allocated after
- // the UACC registers have been chosen.
- let AllocationPriority = 2;
- let Size = 256;
-}
-
// Make AllocationOrder as similar as G8RC's to avoid potential spilling.
// Similarly, we have an AltOrder for 64-bit ELF ABI which r2 is allocated
// at last.
@@ -528,3 +442,572 @@ def G8pRC :
}];
let Size = 128;
}
+
+include "PPCRegisterInfoMMA.td"
+
+//===----------------------------------------------------------------------===//
+// PowerPC Operand Definitions.
+
+// In the default PowerPC assembler syntax, registers are specified simply
+// by number, so they cannot be distinguished from immediate values (without
+// looking at the opcode). This means that the default operand matching logic
+// for the asm parser does not work, and we need to specify custom matchers.
+// Since those can only be specified with RegisterOperand classes and not
+// directly on the RegisterClass, all instructions patterns used by the asm
+// parser need to use a RegisterOperand (instead of a RegisterClass) for
+// all their register operands.
+// For this purpose, we define one RegisterOperand for each RegisterClass,
+// using the same name as the class, just in lower case.
+
+def PPCRegGPRCAsmOperand : AsmOperandClass {
+ let Name = "RegGPRC"; let PredicateMethod = "isRegNumber";
+}
+def gprc : RegisterOperand<GPRC> {
+ let ParserMatchClass = PPCRegGPRCAsmOperand;
+}
+def PPCRegG8RCAsmOperand : AsmOperandClass {
+ let Name = "RegG8RC"; let PredicateMethod = "isRegNumber";
+}
+def g8rc : RegisterOperand<G8RC> {
+ let ParserMatchClass = PPCRegG8RCAsmOperand;
+}
+def PPCRegG8pRCAsmOperand : AsmOperandClass {
+ let Name = "RegG8pRC"; let PredicateMethod = "isEvenRegNumber";
+}
+def g8prc : RegisterOperand<G8pRC> {
+ let ParserMatchClass = PPCRegG8pRCAsmOperand;
+}
+def PPCRegGPRCNoR0AsmOperand : AsmOperandClass {
+ let Name = "RegGPRCNoR0"; let PredicateMethod = "isRegNumber";
+}
+def gprc_nor0 : RegisterOperand<GPRC_NOR0> {
+ let ParserMatchClass = PPCRegGPRCNoR0AsmOperand;
+}
+def PPCRegG8RCNoX0AsmOperand : AsmOperandClass {
+ let Name = "RegG8RCNoX0"; let PredicateMethod = "isRegNumber";
+}
+def g8rc_nox0 : RegisterOperand<G8RC_NOX0> {
+ let ParserMatchClass = PPCRegG8RCNoX0AsmOperand;
+}
+def PPCRegF8RCAsmOperand : AsmOperandClass {
+ let Name = "RegF8RC"; let PredicateMethod = "isRegNumber";
+}
+def f8rc : RegisterOperand<F8RC> {
+ let ParserMatchClass = PPCRegF8RCAsmOperand;
+}
+def PPCRegF4RCAsmOperand : AsmOperandClass {
+ let Name = "RegF4RC"; let PredicateMethod = "isRegNumber";
+}
+def f4rc : RegisterOperand<F4RC> {
+ let ParserMatchClass = PPCRegF4RCAsmOperand;
+}
+def PPCRegVRRCAsmOperand : AsmOperandClass {
+ let Name = "RegVRRC"; let PredicateMethod = "isRegNumber";
+}
+def vrrc : RegisterOperand<VRRC> {
+ let ParserMatchClass = PPCRegVRRCAsmOperand;
+}
+def PPCRegVFRCAsmOperand : AsmOperandClass {
+ let Name = "RegVFRC"; let PredicateMethod = "isRegNumber";
+}
+def vfrc : RegisterOperand<VFRC> {
+ let ParserMatchClass = PPCRegVFRCAsmOperand;
+}
+def PPCRegCRBITRCAsmOperand : AsmOperandClass {
+ let Name = "RegCRBITRC"; let PredicateMethod = "isCRBitNumber";
+}
+def crbitrc : RegisterOperand<CRBITRC> {
+ let ParserMatchClass = PPCRegCRBITRCAsmOperand;
+}
+def PPCRegCRRCAsmOperand : AsmOperandClass {
+ let Name = "RegCRRC"; let PredicateMethod = "isCCRegNumber";
+}
+def crrc : RegisterOperand<CRRC> {
+ let ParserMatchClass = PPCRegCRRCAsmOperand;
+}
+def PPCRegSPERCAsmOperand : AsmOperandClass {
+ let Name = "RegSPERC"; let PredicateMethod = "isRegNumber";
+}
+def sperc : RegisterOperand<SPERC> {
+ let ParserMatchClass = PPCRegSPERCAsmOperand;
+}
+def PPCRegSPE4RCAsmOperand : AsmOperandClass {
+ let Name = "RegSPE4RC"; let PredicateMethod = "isRegNumber";
+}
+def spe4rc : RegisterOperand<GPRC> {
+ let ParserMatchClass = PPCRegSPE4RCAsmOperand;
+}
+
+def PPCU1ImmAsmOperand : AsmOperandClass {
+ let Name = "U1Imm"; let PredicateMethod = "isU1Imm";
+ let RenderMethod = "addImmOperands";
+}
+def u1imm : Operand<i32> {
+ let PrintMethod = "printU1ImmOperand";
+ let ParserMatchClass = PPCU1ImmAsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+
+def PPCU2ImmAsmOperand : AsmOperandClass {
+ let Name = "U2Imm"; let PredicateMethod = "isU2Imm";
+ let RenderMethod = "addImmOperands";
+}
+def u2imm : Operand<i32> {
+ let PrintMethod = "printU2ImmOperand";
+ let ParserMatchClass = PPCU2ImmAsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+
+def PPCATBitsAsHintAsmOperand : AsmOperandClass {
+ let Name = "ATBitsAsHint"; let PredicateMethod = "isATBitsAsHint";
+ let RenderMethod = "addImmOperands"; // Irrelevant, predicate always fails.
+}
+def atimm : Operand<i32> {
+ let PrintMethod = "printATBitsAsHint";
+ let ParserMatchClass = PPCATBitsAsHintAsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+
+def PPCU3ImmAsmOperand : AsmOperandClass {
+ let Name = "U3Imm"; let PredicateMethod = "isU3Imm";
+ let RenderMethod = "addImmOperands";
+}
+def u3imm : Operand<i32> {
+ let PrintMethod = "printU3ImmOperand";
+ let ParserMatchClass = PPCU3ImmAsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+
+def PPCU4ImmAsmOperand : AsmOperandClass {
+ let Name = "U4Imm"; let PredicateMethod = "isU4Imm";
+ let RenderMethod = "addImmOperands";
+}
+def u4imm : Operand<i32> {
+ let PrintMethod = "printU4ImmOperand";
+ let ParserMatchClass = PPCU4ImmAsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+def PPCS5ImmAsmOperand : AsmOperandClass {
+ let Name = "S5Imm"; let PredicateMethod = "isS5Imm";
+ let RenderMethod = "addImmOperands";
+}
+def s5imm : Operand<i32> {
+ let PrintMethod = "printS5ImmOperand";
+ let ParserMatchClass = PPCS5ImmAsmOperand;
+ let DecoderMethod = "decodeSImmOperand<5>";
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+def PPCU5ImmAsmOperand : AsmOperandClass {
+ let Name = "U5Imm"; let PredicateMethod = "isU5Imm";
+ let RenderMethod = "addImmOperands";
+}
+def u5imm : Operand<i32> {
+ let PrintMethod = "printU5ImmOperand";
+ let ParserMatchClass = PPCU5ImmAsmOperand;
+ let DecoderMethod = "decodeUImmOperand<5>";
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+def PPCU6ImmAsmOperand : AsmOperandClass {
+ let Name = "U6Imm"; let PredicateMethod = "isU6Imm";
+ let RenderMethod = "addImmOperands";
+}
+def u6imm : Operand<i32> {
+ let PrintMethod = "printU6ImmOperand";
+ let ParserMatchClass = PPCU6ImmAsmOperand;
+ let DecoderMethod = "decodeUImmOperand<6>";
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+def PPCU7ImmAsmOperand : AsmOperandClass {
+ let Name = "U7Imm"; let PredicateMethod = "isU7Imm";
+ let RenderMethod = "addImmOperands";
+}
+def u7imm : Operand<i32> {
+ let PrintMethod = "printU7ImmOperand";
+ let ParserMatchClass = PPCU7ImmAsmOperand;
+ let DecoderMethod = "decodeUImmOperand<7>";
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+def PPCU8ImmAsmOperand : AsmOperandClass {
+ let Name = "U8Imm"; let PredicateMethod = "isU8Imm";
+ let RenderMethod = "addImmOperands";
+}
+def u8imm : Operand<i32> {
+ let PrintMethod = "printU8ImmOperand";
+ let ParserMatchClass = PPCU8ImmAsmOperand;
+ let DecoderMethod = "decodeUImmOperand<8>";
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+def PPCU10ImmAsmOperand : AsmOperandClass {
+ let Name = "U10Imm"; let PredicateMethod = "isU10Imm";
+ let RenderMethod = "addImmOperands";
+}
+def u10imm : Operand<i32> {
+ let PrintMethod = "printU10ImmOperand";
+ let ParserMatchClass = PPCU10ImmAsmOperand;
+ let DecoderMethod = "decodeUImmOperand<10>";
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+def PPCU12ImmAsmOperand : AsmOperandClass {
+ let Name = "U12Imm"; let PredicateMethod = "isU12Imm";
+ let RenderMethod = "addImmOperands";
+}
+def u12imm : Operand<i32> {
+ let PrintMethod = "printU12ImmOperand";
+ let ParserMatchClass = PPCU12ImmAsmOperand;
+ let DecoderMethod = "decodeUImmOperand<12>";
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+def PPCS16ImmAsmOperand : AsmOperandClass {
+ let Name = "S16Imm"; let PredicateMethod = "isS16Imm";
+ let RenderMethod = "addS16ImmOperands";
+}
+def s16imm : Operand<i32> {
+ let PrintMethod = "printS16ImmOperand";
+ let EncoderMethod = "getImm16Encoding";
+ let ParserMatchClass = PPCS16ImmAsmOperand;
+ let DecoderMethod = "decodeSImmOperand<16>";
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+def PPCU16ImmAsmOperand : AsmOperandClass {
+ let Name = "U16Imm"; let PredicateMethod = "isU16Imm";
+ let RenderMethod = "addU16ImmOperands";
+}
+def u16imm : Operand<i32> {
+ let PrintMethod = "printU16ImmOperand";
+ let EncoderMethod = "getImm16Encoding";
+ let ParserMatchClass = PPCU16ImmAsmOperand;
+ let DecoderMethod = "decodeUImmOperand<16>";
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+def PPCS17ImmAsmOperand : AsmOperandClass {
+ let Name = "S17Imm"; let PredicateMethod = "isS17Imm";
+ let RenderMethod = "addS16ImmOperands";
+}
+def s17imm : Operand<i32> {
+ // This operand type is used for addis/lis to allow the assembler parser
+ // to accept immediates in the range -65536..65535 for compatibility with
+ // the GNU assembler. The operand is treated as 16-bit otherwise.
+ let PrintMethod = "printS16ImmOperand";
+ let EncoderMethod = "getImm16Encoding";
+ let ParserMatchClass = PPCS17ImmAsmOperand;
+ let DecoderMethod = "decodeSImmOperand<16>";
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+def PPCS34ImmAsmOperand : AsmOperandClass {
+ let Name = "S34Imm";
+ let PredicateMethod = "isS34Imm";
+ let RenderMethod = "addImmOperands";
+}
+def s34imm : Operand<i64> {
+ let PrintMethod = "printS34ImmOperand";
+ let EncoderMethod = "getImm34EncodingNoPCRel";
+ let ParserMatchClass = PPCS34ImmAsmOperand;
+ let DecoderMethod = "decodeSImmOperand<34>";
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+def s34imm_pcrel : Operand<i64> {
+ let PrintMethod = "printS34ImmOperand";
+ let EncoderMethod = "getImm34EncodingPCRel";
+ let ParserMatchClass = PPCS34ImmAsmOperand;
+ let DecoderMethod = "decodeSImmOperand<34>";
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+def PPCImmZeroAsmOperand : AsmOperandClass {
+ let Name = "ImmZero";
+ let PredicateMethod = "isImmZero";
+ let RenderMethod = "addImmOperands";
+}
+def immZero : Operand<i32> {
+ let PrintMethod = "printImmZeroOperand";
+ let ParserMatchClass = PPCImmZeroAsmOperand;
+ let DecoderMethod = "decodeImmZeroOperand";
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+
+def fpimm0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(+0.0); }]>;
+
+def PPCDirectBrAsmOperand : AsmOperandClass {
+ let Name = "DirectBr"; let PredicateMethod = "isDirectBr";
+ let RenderMethod = "addBranchTargetOperands";
+}
+def directbrtarget : Operand<OtherVT> {
+ let PrintMethod = "printBranchOperand";
+ let EncoderMethod = "getDirectBrEncoding";
+ let DecoderMethod = "decodeDirectBrTarget";
+ let ParserMatchClass = PPCDirectBrAsmOperand;
+ let OperandType = "OPERAND_PCREL";
+}
+def absdirectbrtarget : Operand<OtherVT> {
+ let PrintMethod = "printAbsBranchOperand";
+ let EncoderMethod = "getAbsDirectBrEncoding";
+ let ParserMatchClass = PPCDirectBrAsmOperand;
+}
+def PPCCondBrAsmOperand : AsmOperandClass {
+ let Name = "CondBr"; let PredicateMethod = "isCondBr";
+ let RenderMethod = "addBranchTargetOperands";
+}
+def condbrtarget : Operand<OtherVT> {
+ let PrintMethod = "printBranchOperand";
+ let EncoderMethod = "getCondBrEncoding";
+ let DecoderMethod = "decodeCondBrTarget";
+ let ParserMatchClass = PPCCondBrAsmOperand;
+ let OperandType = "OPERAND_PCREL";
+}
+def abscondbrtarget : Operand<OtherVT> {
+ let PrintMethod = "printAbsBranchOperand";
+ let EncoderMethod = "getAbsCondBrEncoding";
+ let ParserMatchClass = PPCCondBrAsmOperand;
+}
+def calltarget : Operand<iPTR> {
+ let PrintMethod = "printBranchOperand";
+ let EncoderMethod = "getDirectBrEncoding";
+ let DecoderMethod = "decodeDirectBrTarget";
+ let ParserMatchClass = PPCDirectBrAsmOperand;
+ let OperandType = "OPERAND_PCREL";
+}
+def abscalltarget : Operand<iPTR> {
+ let PrintMethod = "printAbsBranchOperand";
+ let EncoderMethod = "getAbsDirectBrEncoding";
+ let ParserMatchClass = PPCDirectBrAsmOperand;
+}
+def PPCCRBitMaskOperand : AsmOperandClass {
+ let Name = "CRBitMask"; let PredicateMethod = "isCRBitMask";
+}
+def crbitm: Operand<i8> {
+ let PrintMethod = "printcrbitm";
+ let EncoderMethod = "get_crbitm_encoding";
+ let DecoderMethod = "decodeCRBitMOperand";
+ let ParserMatchClass = PPCCRBitMaskOperand;
+}
+// Address operands
+// A version of ptr_rc which excludes R0 (or X0 in 64-bit mode).
+def PPCRegGxRCNoR0Operand : AsmOperandClass {
+ let Name = "RegGxRCNoR0"; let PredicateMethod = "isRegNumber";
+}
+def ptr_rc_nor0 : Operand<iPTR>, PointerLikeRegClass<1> {
+ let ParserMatchClass = PPCRegGxRCNoR0Operand;
+}
+
+// New addressing modes with 34 bit immediates.
+def PPCDispRI34Operand : AsmOperandClass {
+ let Name = "DispRI34"; let PredicateMethod = "isS34Imm";
+ let RenderMethod = "addImmOperands";
+}
+def dispRI34 : Operand<iPTR> {
+ let ParserMatchClass = PPCDispRI34Operand;
+}
+def memri34 : Operand<iPTR> { // memri, imm is a 34-bit value.
+ let PrintMethod = "printMemRegImm34";
+ let MIOperandInfo = (ops dispRI34:$imm, ptr_rc_nor0:$reg);
+ let EncoderMethod = "getMemRI34Encoding";
+ let DecoderMethod = "decodeMemRI34Operands";
+}
+// memri, imm is a 34-bit value for pc-relative instructions where
+// base register is set to zero.
+def memri34_pcrel : Operand<iPTR> { // memri, imm is a 34-bit value.
+ let PrintMethod = "printMemRegImm34PCRel";
+ let MIOperandInfo = (ops dispRI34:$imm, immZero:$reg);
+ let EncoderMethod = "getMemRI34PCRelEncoding";
+ let DecoderMethod = "decodeMemRI34PCRelOperands";
+}
+
+// A version of ptr_rc usable with the asm parser.
+def PPCRegGxRCOperand : AsmOperandClass {
+ let Name = "RegGxRC"; let PredicateMethod = "isRegNumber";
+}
+def ptr_rc_idx : Operand<iPTR>, PointerLikeRegClass<0> {
+ let ParserMatchClass = PPCRegGxRCOperand;
+}
+
+def PPCDispRIOperand : AsmOperandClass {
+ let Name = "DispRI"; let PredicateMethod = "isS16Imm";
+ let RenderMethod = "addS16ImmOperands";
+}
+def dispRI : Operand<iPTR> {
+ let ParserMatchClass = PPCDispRIOperand;
+}
+def PPCDispRIXOperand : AsmOperandClass {
+ let Name = "DispRIX"; let PredicateMethod = "isS16ImmX4";
+ let RenderMethod = "addS16ImmOperands";
+}
+def dispRIX : Operand<iPTR> {
+ let ParserMatchClass = PPCDispRIXOperand;
+}
+def PPCDispRIHashOperand : AsmOperandClass {
+ let Name = "DispRIHash"; let PredicateMethod = "isHashImmX8";
+ let RenderMethod = "addImmOperands";
+}
+def dispRIHash : Operand<iPTR> {
+ let ParserMatchClass = PPCDispRIHashOperand;
+}
+def PPCDispRIX16Operand : AsmOperandClass {
+ let Name = "DispRIX16"; let PredicateMethod = "isS16ImmX16";
+ let RenderMethod = "addS16ImmOperands";
+}
+def dispRIX16 : Operand<iPTR> {
+ let ParserMatchClass = PPCDispRIX16Operand;
+}
+def PPCDispSPE8Operand : AsmOperandClass {
+ let Name = "DispSPE8"; let PredicateMethod = "isU8ImmX8";
+ let RenderMethod = "addImmOperands";
+}
+def dispSPE8 : Operand<iPTR> {
+ let ParserMatchClass = PPCDispSPE8Operand;
+}
+def PPCDispSPE4Operand : AsmOperandClass {
+ let Name = "DispSPE4"; let PredicateMethod = "isU7ImmX4";
+ let RenderMethod = "addImmOperands";
+}
+def dispSPE4 : Operand<iPTR> {
+ let ParserMatchClass = PPCDispSPE4Operand;
+}
+def PPCDispSPE2Operand : AsmOperandClass {
+ let Name = "DispSPE2"; let PredicateMethod = "isU6ImmX2";
+ let RenderMethod = "addImmOperands";
+}
+def dispSPE2 : Operand<iPTR> {
+ let ParserMatchClass = PPCDispSPE2Operand;
+}
+
+def memri : Operand<iPTR> {
+ let PrintMethod = "printMemRegImm";
+ let MIOperandInfo = (ops dispRI:$imm, ptr_rc_nor0:$reg);
+ let EncoderMethod = "getMemRIEncoding";
+ let DecoderMethod = "decodeMemRIOperands";
+ let OperandType = "OPERAND_MEMORY";
+}
+def memrr : Operand<iPTR> {
+ let PrintMethod = "printMemRegReg";
+ let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg, ptr_rc_idx:$offreg);
+ let OperandType = "OPERAND_MEMORY";
+}
+def memrix : Operand<iPTR> { // memri where the imm is 4-aligned.
+ let PrintMethod = "printMemRegImm";
+ let MIOperandInfo = (ops dispRIX:$imm, ptr_rc_nor0:$reg);
+ let EncoderMethod = "getMemRIXEncoding";
+ let DecoderMethod = "decodeMemRIXOperands";
+ let OperandType = "OPERAND_MEMORY";
+}
+def memrihash : Operand<iPTR> {
+ // memrihash 8-aligned for ROP Protection Instructions.
+ let PrintMethod = "printMemRegImmHash";
+ let MIOperandInfo = (ops dispRIHash:$imm, ptr_rc_nor0:$reg);
+ let EncoderMethod = "getMemRIHashEncoding";
+ let DecoderMethod = "decodeMemRIHashOperands";
+ let OperandType = "OPERAND_MEMORY";
+}
+def memrix16 : Operand<iPTR> { // memri, imm is 16-aligned, 12-bit, Inst{16:27}
+ let PrintMethod = "printMemRegImm";
+ let MIOperandInfo = (ops dispRIX16:$imm, ptr_rc_nor0:$reg);
+ let EncoderMethod = "getMemRIX16Encoding";
+ let DecoderMethod = "decodeMemRIX16Operands";
+ let OperandType = "OPERAND_MEMORY";
+}
+def spe8dis : Operand<iPTR> { // SPE displacement where the imm is 8-aligned.
+ let PrintMethod = "printMemRegImm";
+ let MIOperandInfo = (ops dispSPE8:$imm, ptr_rc_nor0:$reg);
+ let EncoderMethod = "getSPE8DisEncoding";
+ let DecoderMethod = "decodeSPE8Operands";
+ let OperandType = "OPERAND_MEMORY";
+}
+def spe4dis : Operand<iPTR> { // SPE displacement where the imm is 4-aligned.
+ let PrintMethod = "printMemRegImm";
+ let MIOperandInfo = (ops dispSPE4:$imm, ptr_rc_nor0:$reg);
+ let EncoderMethod = "getSPE4DisEncoding";
+ let DecoderMethod = "decodeSPE4Operands";
+ let OperandType = "OPERAND_MEMORY";
+}
+def spe2dis : Operand<iPTR> { // SPE displacement where the imm is 2-aligned.
+ let PrintMethod = "printMemRegImm";
+ let MIOperandInfo = (ops dispSPE2:$imm, ptr_rc_nor0:$reg);
+ let EncoderMethod = "getSPE2DisEncoding";
+ let DecoderMethod = "decodeSPE2Operands";
+ let OperandType = "OPERAND_MEMORY";
+}
+
+// A single-register address. This is used with the SjLj
+// pseudo-instructions which translates to LD/LWZ. These instructions requires
+// G8RC_NOX0 registers.
+def memr : Operand<iPTR> {
+ let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg);
+ let OperandType = "OPERAND_MEMORY";
+}
+def PPCTLSRegOperand : AsmOperandClass {
+ let Name = "TLSReg"; let PredicateMethod = "isTLSReg";
+ let RenderMethod = "addTLSRegOperands";
+}
+def tlsreg32 : Operand<i32> {
+ let EncoderMethod = "getTLSRegEncoding";
+ let ParserMatchClass = PPCTLSRegOperand;
+}
+def tlsgd32 : Operand<i32> {}
+def tlscall32 : Operand<i32> {
+ let PrintMethod = "printTLSCall";
+ let MIOperandInfo = (ops calltarget:$func, tlsgd32:$sym);
+ let EncoderMethod = "getTLSCallEncoding";
+}
+
+// PowerPC Predicate operand.
+def pred : Operand<OtherVT> {
+ let PrintMethod = "printPredicateOperand";
+ let MIOperandInfo = (ops i32imm:$bibo, crrc:$reg);
+}
+
+def PPCRegVSRCAsmOperand : AsmOperandClass {
+ let Name = "RegVSRC"; let PredicateMethod = "isVSRegNumber";
+}
+def vsrc : RegisterOperand<VSRC> {
+ let ParserMatchClass = PPCRegVSRCAsmOperand;
+}
+
+def PPCRegVSFRCAsmOperand : AsmOperandClass {
+ let Name = "RegVSFRC"; let PredicateMethod = "isVSRegNumber";
+}
+def vsfrc : RegisterOperand<VSFRC> {
+ let ParserMatchClass = PPCRegVSFRCAsmOperand;
+}
+
+def PPCRegVSSRCAsmOperand : AsmOperandClass {
+ let Name = "RegVSSRC"; let PredicateMethod = "isVSRegNumber";
+}
+def vssrc : RegisterOperand<VSSRC> {
+ let ParserMatchClass = PPCRegVSSRCAsmOperand;
+}
+
+def PPCRegSPILLTOVSRRCAsmOperand : AsmOperandClass {
+ let Name = "RegSPILLTOVSRRC"; let PredicateMethod = "isVSRegNumber";
+}
+
+def spilltovsrrc : RegisterOperand<SPILLTOVSRRC> {
+ let ParserMatchClass = PPCRegSPILLTOVSRRCAsmOperand;
+}
+
+def PPCRegVSRpRCAsmOperand : AsmOperandClass {
+ let Name = "RegVSRpRC"; let PredicateMethod = "isVSRpEvenRegNumber";
+}
+
+def vsrprc : RegisterOperand<VSRpRC> {
+ let ParserMatchClass = PPCRegVSRpRCAsmOperand;
+}
+
+def PPCRegVSRpEvenRCAsmOperand : AsmOperandClass {
+ let Name = "RegVSRpEvenRC"; let PredicateMethod = "isVSRpEvenRegNumber";
+}
+
+def vsrpevenrc : RegisterOperand<VSRpRC> {
+ let ParserMatchClass = PPCRegVSRpEvenRCAsmOperand;
+ let EncoderMethod = "getVSRpEvenEncoding";
+ let DecoderMethod = "decodeVSRpEvenOperands";
+}
+
+def PPCRegACCRCAsmOperand : AsmOperandClass {
+ let Name = "RegACCRC"; let PredicateMethod = "isACCRegNumber";
+}
+
+def acc : RegisterOperand<ACCRC> {
+ let ParserMatchClass = PPCRegACCRCAsmOperand;
+}
+
+def uacc : RegisterOperand<UACCRC> {
+ let ParserMatchClass = PPCRegACCRCAsmOperand;
+}
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfoMMA.td b/llvm/lib/Target/PowerPC/PPCRegisterInfoMMA.td
new file mode 100644
index 000000000000..0b6305f95a0a
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfoMMA.td
@@ -0,0 +1,106 @@
+//===-- PPCRegisterInfoMMA.td - The PowerPC Register File --*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Register info for registers related to MMA. These are the ACC and UACC
+// registers.
+//
+//===----------------------------------------------------------------------===//
+
+let Namespace = "PPC" in {
+def sub_pair0 : SubRegIndex<256>;
+def sub_pair1 : SubRegIndex<256, 256>;
+}
+
+// ACC - One of the 8 512-bit VSX accumulators.
+class ACC<bits<3> num, string n, list<Register> subregs> : PPCReg<n> {
+ let HWEncoding{2-0} = num;
+ let SubRegs = subregs;
+}
+
+// UACC - One of the 8 512-bit VSX accumulators prior to being primed.
+// Without using this register class, the register allocator has no way to
+// differentiate a primed accumulator from an unprimed accumulator.
+// This may result in invalid copies between primed and unprimed accumulators.
+class UACC<bits<3> num, string n, list<Register> subregs> : PPCReg<n> {
+ let HWEncoding{2-0} = num;
+ let SubRegs = subregs;
+}
+
+// SPE Accumulator for multiply-accumulate SPE operations. Never directly
+// accessed, so there's no real encoding for it.
+def SPEACC: DwarfRegNum<[99, 111]>;
+
+let SubRegIndices = [sub_pair0, sub_pair1] in {
+ def ACC0 : ACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[-1, -1]>;
+ def ACC1 : ACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[-1, -1]>;
+ def ACC2 : ACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[-1, -1]>;
+ def ACC3 : ACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[-1, -1]>;
+ def ACC4 : ACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[-1, -1]>;
+ def ACC5 : ACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[-1, -1]>;
+ def ACC6 : ACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[-1, -1]>;
+ def ACC7 : ACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[-1, -1]>;
+}
+def ACCRC : RegisterClass<"PPC", [v512i1], 128, (add ACC0, ACC1, ACC2, ACC3,
+ ACC4, ACC5, ACC6, ACC7)> {
+ // The AllocationPriority is in the range [0, 63]. Assigned the ACC registers
+ // the highest possible priority in this range to force the register allocator
+ // to assign these registers first. This is done because the ACC registers
+ // must represent 4 advacent vector registers. For example ACC1 must be
+ // VS4 - VS7. The value here must be at least 32 as we want to allocate
+ // these registers even before we allocate global ranges.
+ let AllocationPriority = 63;
+ let Size = 512;
+}
+
+let SubRegIndices = [sub_pair0, sub_pair1] in {
+ def UACC0 : UACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[-1, -1]>;
+ def UACC1 : UACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[-1, -1]>;
+ def UACC2 : UACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[-1, -1]>;
+ def UACC3 : UACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[-1, -1]>;
+ def UACC4 : UACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[-1, -1]>;
+ def UACC5 : UACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[-1, -1]>;
+ def UACC6 : UACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[-1, -1]>;
+ def UACC7 : UACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[-1, -1]>;
+}
+def UACCRC : RegisterClass<"PPC", [v512i1], 128,
+ (add UACC0, UACC1, UACC2, UACC3,
+ UACC4, UACC5, UACC6, UACC7)> {
+ // The AllocationPriority for the UACC registers is still high and must be at
+ // least 32 as we want to allocate these registers before we allocate other
+ // global ranges. The value must be less than the AllocationPriority of the
+ // ACC registers.
+ let AllocationPriority = 36;
+ let Size = 512;
+}
+
+// FIXME: This allocation order may increase stack frame size when allocating
+// non-volatile registers.
+//
+// Placing Altivec registers first and allocate the rest as underlying VSX
+// ones, to reduce interference with accumulator registers (lower 32 VSRs).
+// This reduces copies when loading for accumulators, which is common use for
+// paired VSX registers.
+def VSRpRC :
+ RegisterClass<"PPC", [v256i1], 128,
+ (add VSRp17, VSRp18, VSRp16, VSRp19, VSRp20, VSRp21,
+ VSRp22, VSRp23, VSRp24, VSRp25, VSRp31, VSRp30,
+ VSRp29, VSRp28, VSRp27, VSRp26,
+ (sequence "VSRp%u", 0, 6),
+ (sequence "VSRp%u", 15, 7))> {
+ // Give the VSRp registers a non-zero AllocationPriority. The value is less
+ // than 32 as these registers should not always be allocated before global
+ // ranges and the value should be less than the AllocationPriority - 32 for
+ // the UACC registers. Even global VSRp registers should be allocated after
+ // the UACC registers have been chosen.
+ let AllocationPriority = 2;
+ let Size = 256;
+}
+
+
+
+
diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP10.td b/llvm/lib/Target/PowerPC/PPCScheduleP10.td
index bf56491f373a..f89ef735a367 100644
--- a/llvm/lib/Target/PowerPC/PPCScheduleP10.td
+++ b/llvm/lib/Target/PowerPC/PPCScheduleP10.td
@@ -36,7 +36,7 @@ def P10Model : SchedMachineModel {
let CompleteModel = 1;
// Do not support SPE (Signal Procesing Engine) on Power 10.
- let UnsupportedFeatures = [HasSPE, IsE500, IsBookE];
+ let UnsupportedFeatures = [HasSPE, IsE500, IsBookE, IsISAFuture];
}
let SchedModel = P10Model in {
diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP9.td b/llvm/lib/Target/PowerPC/PPCScheduleP9.td
index 3dc069ecad8a..d35011171715 100644
--- a/llvm/lib/Target/PowerPC/PPCScheduleP9.td
+++ b/llvm/lib/Target/PowerPC/PPCScheduleP9.td
@@ -42,7 +42,7 @@ def P9Model : SchedMachineModel {
// Power 9, paired vector mem ops, MMA, PC relative mem ops, or instructions
// introduced in ISA 3.1.
let UnsupportedFeatures = [HasSPE, PrefixInstrs, PairedVectorMemops, MMA,
- PCRelativeMemops, IsISA3_1];
+ PCRelativeMemops, IsISA3_1, IsISAFuture];
}
let SchedModel = P9Model in {
@@ -404,7 +404,6 @@ let SchedModel = P9Model in {
def P9_LoadAndALU2Op_7C : WriteSequence<[P9_LS_4C, P9_ALU_3C]>;
def P9_LoadAndALU2Op_8C : WriteSequence<[P9_LS_5C, P9_ALU_3C]>;
def P9_LoadAndPMOp_8C : WriteSequence<[P9_LS_5C, P9_PM_3C]>;
- def P9_LoadAndLoadOp_8C : WriteSequence<[P9_LS_4C, P9_LS_4C]>;
def P9_IntDivAndALUOp_18C_8 : WriteSequence<[P9_DIV_16C_8, P9_ALU_2C]>;
def P9_IntDivAndALUOp_26C_8 : WriteSequence<[P9_DIV_24C_8, P9_ALU_2C]>;
def P9_IntDivAndALUOp_42C_8 : WriteSequence<[P9_DIV_40C_8, P9_ALU_2C]>;
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
index f11b4e14073e..98424234a592 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -18,6 +18,7 @@
#include "PPCRegisterInfo.h"
#include "PPCTargetMachine.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/IR/Attributes.h"
@@ -140,6 +141,7 @@ void PPCSubtarget::initializeEnvironment() {
IsISA2_07 = false;
IsISA3_0 = false;
IsISA3_1 = false;
+ IsISAFuture = false;
UseLongCalls = false;
SecurePlt = false;
VectorsUseTwoUnits = false;
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h
index 1300b62b623a..3281816eab4a 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -19,7 +19,7 @@
#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
@@ -160,6 +160,7 @@ protected:
bool IsISA2_07;
bool IsISA3_0;
bool IsISA3_1;
+ bool IsISAFuture;
bool UseLongCalls;
bool SecurePlt;
bool VectorsUseTwoUnits;
@@ -336,6 +337,7 @@ public:
bool isISA2_07() const { return IsISA2_07; }
bool isISA3_0() const { return IsISA3_0; }
bool isISA3_1() const { return IsISA3_1; }
+ bool isISAFuture() const { return IsISAFuture; }
bool useLongCalls() const { return UseLongCalls; }
bool hasFusion() const { return HasFusion; }
bool hasStoreFusion() const { return HasStoreFusion; }
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index 3eff00fc3c05..fe396cbfc011 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -26,6 +26,7 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
#include "llvm/CodeGen/GlobalISel/Localizer.h"
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
@@ -97,6 +98,13 @@ static cl::opt<bool>
ReduceCRLogical("ppc-reduce-cr-logicals",
cl::desc("Expand eligible cr-logical binary ops to branches"),
cl::init(true), cl::Hidden);
+
+static cl::opt<bool> EnablePPCGenScalarMASSEntries(
+ "enable-ppc-gen-scalar-mass", cl::init(false),
+ cl::desc("Enable lowering math functions to their corresponding MASS "
+ "(scalar) entries"),
+ cl::Hidden);
+
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTarget() {
// Register the targets
RegisterTargetMachine<PPCTargetMachine> A(getThePPC32Target());
@@ -123,8 +131,10 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTarget() {
initializePPCTLSDynamicCallPass(PR);
initializePPCMIPeepholePass(PR);
initializePPCLowerMASSVEntriesPass(PR);
+ initializePPCGenScalarMASSEntriesPass(PR);
initializePPCExpandAtomicPseudoPass(PR);
initializeGlobalISel(PR);
+ initializePPCCTRLoopsPass(PR);
}
static bool isLittleEndianTriple(const Triple &T) {
@@ -236,10 +246,10 @@ static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
static Reloc::Model getEffectiveRelocModel(const Triple &TT,
Optional<Reloc::Model> RM) {
- assert((!TT.isOSAIX() || !RM.hasValue() || *RM == Reloc::PIC_) &&
+ assert((!TT.isOSAIX() || !RM || *RM == Reloc::PIC_) &&
"Invalid relocation model for AIX.");
- if (RM.hasValue())
+ if (RM)
return *RM;
// Big Endian PPC and AIX default to PIC.
@@ -429,6 +439,14 @@ void PPCPassConfig::addIRPasses() {
// Lower generic MASSV routines to PowerPC subtarget-specific entries.
addPass(createPPCLowerMASSVEntriesPass());
+ // Generate PowerPC target-specific entries for scalar math functions
+ // that are available in IBM MASS (scalar) library.
+ if (TM->getOptLevel() == CodeGenOpt::Aggressive &&
+ EnablePPCGenScalarMASSEntries) {
+ TM->Options.PPCGenScalarMASSEntries = EnablePPCGenScalarMASSEntries;
+ addPass(createPPCGenScalarMASSEntriesPass());
+ }
+
// If explicitly requested, add explicit data prefetch intrinsics.
if (EnablePrefetch.getNumOccurrences() > 0)
addPass(createLoopDataPrefetchPass());
@@ -522,6 +540,16 @@ void PPCPassConfig::addPreRegAlloc() {
if (EnableExtraTOCRegDeps)
addPass(createPPCTOCRegDepsPass());
+ // Run CTR loops pass before MachinePipeliner pass.
+ // MachinePipeliner will pipeline all instructions before the terminator, but
+ // we don't want DecreaseCTRPseudo to be pipelined.
+ // Note we may lose some MachinePipeliner opportunities if we run CTR loops
+ // generation pass before MachinePipeliner and the loop is converted back to
+ // a normal loop. We can revisit this later for running PPCCTRLoops after
+ // MachinePipeliner and handling DecreaseCTRPseudo in MachinePipeliner pass.
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createPPCCTRLoopsPass());
+
if (getOptLevel() != CodeGenOpt::None)
addPass(&MachinePipelinerID);
}
@@ -549,7 +577,7 @@ void PPCPassConfig::addPreEmitPass2() {
}
TargetTransformInfo
-PPCTargetMachine::getTargetTransformInfo(const Function &F) {
+PPCTargetMachine::getTargetTransformInfo(const Function &F) const {
return TargetTransformInfo(PPCTTIImpl(this, F));
}
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.h b/llvm/lib/Target/PowerPC/PPCTargetMachine.h
index d3fe5362ccdc..bafb79c84942 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.h
@@ -51,7 +51,7 @@ public:
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
- TargetTransformInfo getTargetTransformInfo(const Function &F) override;
+ TargetTransformInfo getTargetTransformInfo(const Function &F) const override;
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
diff --git a/llvm/lib/Target/PowerPC/PPCTargetStreamer.h b/llvm/lib/Target/PowerPC/PPCTargetStreamer.h
index 82fcd9e1c2bc..e3fc6285494c 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetStreamer.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetStreamer.h
@@ -10,6 +10,7 @@
#define LLVM_LIB_TARGET_POWERPC_PPCTARGETSTREAMER_H
#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
namespace llvm {
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index cc5738a5d7b6..cf728933c08d 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -28,11 +28,6 @@ using namespace llvm;
static cl::opt<bool> DisablePPCConstHoist("disable-ppc-constant-hoisting",
cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden);
-// This is currently only used for the data prefetch pass
-static cl::opt<unsigned>
-CacheLineSize("ppc-loop-prefetch-cache-line", cl::Hidden, cl::init(64),
- cl::desc("The loop prefetch cache line size"));
-
static cl::opt<bool>
EnablePPCColdCC("ppc-enable-coldcc", cl::Hidden, cl::init(false),
cl::desc("Enable using coldcc calling conv for cold "
@@ -491,15 +486,13 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
case Intrinsic::experimental_constrained_sin:
case Intrinsic::experimental_constrained_cos:
return true;
- // There is no corresponding FMA instruction for PPC double double.
- // Thus, we need to disable CTR loop generation for this type.
- case Intrinsic::fmuladd:
case Intrinsic::copysign:
if (CI->getArgOperand(0)->getType()->getScalarType()->
isPPC_FP128Ty())
return true;
else
continue; // ISD::FCOPYSIGN is never a library call.
+ case Intrinsic::fmuladd:
case Intrinsic::fma: Opcode = ISD::FMA; break;
case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
@@ -903,10 +896,6 @@ PPCTTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
}
unsigned PPCTTIImpl::getCacheLineSize() const {
- // Check first if the user specified a custom line size.
- if (CacheLineSize.getNumOccurrences() > 0)
- return CacheLineSize;
-
// Starting with P7 we have a cache line size of 128.
unsigned Directive = ST->getCPUDirective();
// Assume that Future CPU has the same cache line size as the others.
@@ -1015,7 +1004,8 @@ InstructionCost PPCTTIImpl::getArithmeticInstrCost(
InstructionCost PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
ArrayRef<int> Mask, int Index,
- Type *SubTp) {
+ Type *SubTp,
+ ArrayRef<const Value *> Args) {
InstructionCost CostFactor =
vectorCostAdjustmentFactor(Instruction::ShuffleVector, Tp, nullptr);
@@ -1319,8 +1309,8 @@ bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
return true;
}
-bool PPCTTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1,
- TargetTransformInfo::LSRCost &C2) {
+bool PPCTTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
+ const TargetTransformInfo::LSRCost &C2) {
// PowerPC default behaviour here is "instruction number 1st priority".
// If LsrNoInsnsCost is set, call default implementation.
if (!LsrNoInsnsCost)
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 0af6f2a308d9..790eb0b42afa 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -76,8 +76,8 @@ public:
OptimizationRemarkEmitter *ORE);
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP);
- bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
- TargetTransformInfo::LSRCost &C2);
+ bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
+ const TargetTransformInfo::LSRCost &C2);
bool isNumRegsMajorCostOfLSR();
bool shouldBuildRelLookupTables() const;
/// @}
@@ -111,7 +111,8 @@ public:
ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
const Instruction *CxtI = nullptr);
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
- ArrayRef<int> Mask, int Index, Type *SubTp);
+ ArrayRef<int> Mask, int Index, Type *SubTp,
+ ArrayRef<const Value *> Args = None);
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
TTI::CastContextHint CCH,
TTI::TargetCostKind CostKind,
diff --git a/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
index ff251f55afff..04fc7667257e 100644
--- a/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
+++ b/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
@@ -519,6 +519,8 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
case PPC::XXSLDWI:
case PPC::XSCVDPSPN:
case PPC::XSCVSPDPN:
+ case PPC::MTVSCR:
+ case PPC::MFVSCR:
break;
}
}
diff --git a/llvm/lib/Target/PowerPC/README_P9.txt b/llvm/lib/Target/PowerPC/README_P9.txt
index c9984b7604bd..ee1ea735acad 100644
--- a/llvm/lib/Target/PowerPC/README_P9.txt
+++ b/llvm/lib/Target/PowerPC/README_P9.txt
@@ -310,7 +310,7 @@ VSX:
. I checked existing instruction "XSCMPUDP". They are different in target
register. "XSCMPUDP" write to CR field, xscmp*dp write to VSX register
- . Use instrinsic:
+ . Use intrinsic:
(set i128:$XT, (int_ppc_vsx_xscmpeqdp f64:$XA, f64:$XB))
(set i128:$XT, (int_ppc_vsx_xscmpgedp f64:$XA, f64:$XB))
(set i128:$XT, (int_ppc_vsx_xscmpgtdp f64:$XA, f64:$XB))
@@ -322,7 +322,7 @@ VSX:
"xvcmpeqdp", "$XT, $XA, $XB", IIC_VecFPCompare,
int_ppc_vsx_xvcmpeqdp, v2i64, v2f64>;
- . So we should use "XX3Form_Rcr" to implement instrinsic
+ . So we should use "XX3Form_Rcr" to implement intrinsic
- Convert DP -> QP: xscvdpqp
. Similar to XSCVDPSP:
@@ -579,11 +579,6 @@ Atomic operations (l[dw]at, st[dw]at):
- Provide builtins since not all FC's necessarily have an existing LLVM
atomic operation
-Load Doubleword Monitored (ldmx):
-- Investigate whether there are any uses for this. It seems to be related to
- Garbage Collection so it isn't likely to be all that useful for most
- languages we deal with.
-
Move to CR from XER Extended (mcrxrx):
- Is there a use for this in LLVM?
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 01f36e6dcdd2..69fb9d2844d3 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -24,6 +24,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
@@ -161,7 +162,7 @@ class RISCVAsmParser : public MCTargetAsmParser {
OperandMatchResultTy parseRegister(OperandVector &Operands,
bool AllowParens = false);
OperandMatchResultTy parseMemOpBaseReg(OperandVector &Operands);
- OperandMatchResultTy parseAtomicMemOp(OperandVector &Operands);
+ OperandMatchResultTy parseZeroOffsetMemOp(OperandVector &Operands);
OperandMatchResultTy parseOperandWithModifier(OperandVector &Operands);
OperandMatchResultTy parseBareSymbol(OperandVector &Operands);
OperandMatchResultTy parseCallSymbol(OperandVector &Operands);
@@ -170,6 +171,7 @@ class RISCVAsmParser : public MCTargetAsmParser {
OperandMatchResultTy parseVTypeI(OperandVector &Operands);
OperandMatchResultTy parseMaskReg(OperandVector &Operands);
OperandMatchResultTy parseInsnDirectiveOpcode(OperandVector &Operands);
+ OperandMatchResultTy parseGPRAsFPR(OperandVector &Operands);
bool parseOperand(OperandVector &Operands, StringRef Mnemonic);
@@ -254,6 +256,11 @@ public:
"target-abi)\n";
}
+ // Use computeTargetABI to check if ABIName is valid. If invalid, output
+ // error message.
+ RISCVABI::computeTargetABI(STI.getTargetTriple(), STI.getFeatureBits(),
+ ABIName);
+
const MCObjectFileInfo *MOFI = Parser.getContext().getObjectFileInfo();
ParserOptions.IsPicEnabled = MOFI->isPositionIndependent();
}
@@ -273,6 +280,8 @@ struct RISCVOperand : public MCParsedAsmOperand {
bool IsRV64;
+ bool IsGPRAsFPR;
+
struct RegOp {
MCRegister RegNum;
};
@@ -343,6 +352,14 @@ public:
RISCVMCRegisterClasses[RISCV::GPRRegClassID].contains(Reg.RegNum);
}
+ bool isGPRAsFPR() const { return isGPR() && IsGPRAsFPR; }
+
+ bool isGPRF64AsFPR() const { return isGPR() && IsGPRAsFPR && IsRV64; }
+
+ bool isGPRPF64AsFPR() const {
+ return isGPR() && IsGPRAsFPR && !IsRV64 && !((Reg.RegNum - RISCV::X0) & 1);
+ }
+
static bool evaluateConstantImm(const MCExpr *Expr, int64_t &Imm,
RISCVMCExpr::VariantKind &VK) {
if (auto *RE = dyn_cast<RISCVMCExpr>(Expr)) {
@@ -447,8 +464,16 @@ public:
bool isFenceArg() const {
if (!isImm())
return false;
- const MCExpr *Val = getImm();
- auto *SVal = dyn_cast<MCSymbolRefExpr>(Val);
+
+ int64_t Imm;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
+ if (evaluateConstantImm(getImm(), Imm, VK)) {
+ // Only accept 0 as a constant immediate.
+ return VK == RISCVMCExpr::VK_RISCV_None && Imm == 0;
+ }
+
+ auto *SVal = dyn_cast<MCSymbolRefExpr>(getImm());
+
if (!SVal || SVal->getKind() != MCSymbolRefExpr::VK_None)
return false;
@@ -530,41 +555,19 @@ public:
return (isRV64() && isUInt<5>(Imm)) || isUInt<4>(Imm);
}
- bool isUImm2() const {
+ template <unsigned N> bool IsUImm() const {
int64_t Imm;
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
if (!isImm())
return false;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
- return IsConstantImm && isUInt<2>(Imm) && VK == RISCVMCExpr::VK_RISCV_None;
+ return IsConstantImm && isUInt<N>(Imm) && VK == RISCVMCExpr::VK_RISCV_None;
}
- bool isUImm3() const {
- int64_t Imm;
- RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
- if (!isImm())
- return false;
- bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
- return IsConstantImm && isUInt<3>(Imm) && VK == RISCVMCExpr::VK_RISCV_None;
- }
-
- bool isUImm5() const {
- int64_t Imm;
- RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
- if (!isImm())
- return false;
- bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
- return IsConstantImm && isUInt<5>(Imm) && VK == RISCVMCExpr::VK_RISCV_None;
- }
-
- bool isUImm7() const {
- int64_t Imm;
- RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
- if (!isImm())
- return false;
- bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
- return IsConstantImm && isUInt<7>(Imm) && VK == RISCVMCExpr::VK_RISCV_None;
- }
+ bool isUImm2() { return IsUImm<2>(); }
+ bool isUImm3() { return IsUImm<3>(); }
+ bool isUImm5() { return IsUImm<5>(); }
+ bool isUImm7() { return IsUImm<7>(); }
bool isRnumArg() const {
int64_t Imm;
@@ -686,6 +689,16 @@ public:
bool isSImm12Lsb0() const { return isBareSimmNLsb0<12>(); }
+ bool isSImm12Lsb00000() const {
+ if (!isImm())
+ return false;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
+ int64_t Imm;
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
+ return IsConstantImm && isShiftedInt<7, 5>(Imm) &&
+ VK == RISCVMCExpr::VK_RISCV_None;
+ }
+
bool isSImm13Lsb0() const { return isBareSimmNLsb0<13>(); }
bool isSImm10Lsb0000NonZero() const {
@@ -831,12 +844,14 @@ public:
}
static std::unique_ptr<RISCVOperand> createReg(unsigned RegNo, SMLoc S,
- SMLoc E, bool IsRV64) {
+ SMLoc E, bool IsRV64,
+ bool IsGPRAsFPR = false) {
auto Op = std::make_unique<RISCVOperand>(KindTy::Register);
Op->Reg.RegNum = RegNo;
Op->StartLoc = S;
Op->EndLoc = E;
Op->IsRV64 = IsRV64;
+ Op->IsGPRAsFPR = IsGPRAsFPR;
return Op;
}
@@ -897,6 +912,17 @@ public:
void addFenceArgOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
+
+ int64_t Constant = 0;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
+ if (evaluateConstantImm(getImm(), Constant, VK)) {
+ if (Constant == 0) {
+ Inst.addOperand(MCOperand::createImm(Constant));
+ return;
+ }
+ llvm_unreachable("FenceArg must contain only [iorw] or be 0");
+ }
+
// isFenceArg has validated the operand, meaning this cast is safe
auto SE = cast<MCSymbolRefExpr>(getImm());
@@ -904,7 +930,7 @@ public:
for (char c : SE->getSymbol().getName()) {
switch (c) {
default:
- llvm_unreachable("FenceArg must contain only [iorw]");
+ llvm_unreachable("FenceArg must contain only [iorw] or be 0");
case 'i':
Imm |= RISCVFenceField::I;
break;
@@ -1182,6 +1208,10 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return generateImmOutOfRangeError(
Operands, ErrorInfo, -(1 << 11), (1 << 11) - 2,
"immediate must be a multiple of 2 bytes in the range");
+ case Match_InvalidSImm12Lsb00000:
+ return generateImmOutOfRangeError(
+ Operands, ErrorInfo, -(1 << 11), (1 << 11) - 32,
+ "immediate must be a multiple of 32 bytes in the range");
case Match_InvalidSImm13Lsb0:
return generateImmOutOfRangeError(
Operands, ErrorInfo, -(1 << 12), (1 << 12) - 2,
@@ -1208,9 +1238,8 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
}
case Match_InvalidFenceArg: {
SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
- return Error(
- ErrorLoc,
- "operand must be formed of letters selected in-order from 'iorw'");
+ return Error(ErrorLoc, "operand must be formed of letters selected "
+ "in-order from 'iorw' or be 0");
}
case Match_InvalidFRMArg: {
SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
@@ -1594,9 +1623,11 @@ OperandMatchResultTy RISCVAsmParser::parseBareSymbol(OperandVector &Operands) {
return MatchOperand_Success;
case AsmToken::Plus:
Opcode = MCBinaryExpr::Add;
+ getLexer().Lex();
break;
case AsmToken::Minus:
Opcode = MCBinaryExpr::Sub;
+ getLexer().Lex();
break;
}
@@ -1737,9 +1768,7 @@ OperandMatchResultTy RISCVAsmParser::parseVTypeI(OperandVector &Operands) {
else
goto MatchFail;
- unsigned LmulLog2 = Log2_32(Lmul);
- RISCVII::VLMUL VLMUL =
- static_cast<RISCVII::VLMUL>(Fractional ? 8 - LmulLog2 : LmulLog2);
+ RISCVII::VLMUL VLMUL = RISCVVType::encodeLMUL(Lmul, Fractional);
unsigned VTypeI =
RISCVVType::encodeVTYPE(VLMUL, Sew, TailAgnostic, MaskAgnostic);
@@ -1780,6 +1809,26 @@ OperandMatchResultTy RISCVAsmParser::parseMaskReg(OperandVector &Operands) {
return MatchOperand_Success;
}
+OperandMatchResultTy RISCVAsmParser::parseGPRAsFPR(OperandVector &Operands) {
+ switch (getLexer().getKind()) {
+ default:
+ return MatchOperand_NoMatch;
+ case AsmToken::Identifier:
+ StringRef Name = getLexer().getTok().getIdentifier();
+ MCRegister RegNo;
+ matchRegisterNameHelper(isRV32E(), RegNo, Name);
+
+ if (RegNo == RISCV::NoRegister)
+ return MatchOperand_NoMatch;
+ SMLoc S = getLoc();
+ SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
+ getLexer().Lex();
+ Operands.push_back(RISCVOperand::createReg(
+ RegNo, S, E, isRV64(), !getSTI().hasFeature(RISCV::FeatureStdExtF)));
+ }
+ return MatchOperand_Success;
+}
+
OperandMatchResultTy
RISCVAsmParser::parseMemOpBaseReg(OperandVector &Operands) {
if (getLexer().isNot(AsmToken::LParen)) {
@@ -1806,7 +1855,8 @@ RISCVAsmParser::parseMemOpBaseReg(OperandVector &Operands) {
return MatchOperand_Success;
}
-OperandMatchResultTy RISCVAsmParser::parseAtomicMemOp(OperandVector &Operands) {
+OperandMatchResultTy
+RISCVAsmParser::parseZeroOffsetMemOp(OperandVector &Operands) {
// Atomic operations such as lr.w, sc.w, and amo*.w accept a "memory operand"
// as one of their register operands, such as `(a0)`. This just denotes that
// the register (in this case `a0`) contains a memory address.
@@ -1822,9 +1872,9 @@ OperandMatchResultTy RISCVAsmParser::parseAtomicMemOp(OperandVector &Operands) {
// offset if it is zero; require (and discard) parentheses; and add only the
// parsed register operand to `Operands`.
//
- // These operands are printed with RISCVInstPrinter::printAtomicMemOp, which
- // will only print the register surrounded by parentheses (which GNU as also
- // uses as its canonical representation for these operands).
+ // These operands are printed with RISCVInstPrinter::printZeroOffsetMemOp,
+ // which will only print the register surrounded by parentheses (which GNU as
+ // also uses as its canonical representation for these operands).
std::unique_ptr<RISCVOperand> OptionalImmOp;
if (getLexer().isNot(AsmToken::LParen)) {
@@ -1935,7 +1985,6 @@ bool RISCVAsmParser::ParseInstruction(ParseInstructionInfo &Info,
return true;
// Parse until end of statement, consuming commas between operands
- unsigned OperandIdx = 1;
while (getLexer().is(AsmToken::Comma)) {
// Consume comma token
getLexer().Lex();
@@ -1943,8 +1992,6 @@ bool RISCVAsmParser::ParseInstruction(ParseInstructionInfo &Info,
// Parse next operand
if (parseOperand(Operands, Name))
return true;
-
- ++OperandIdx;
}
if (getLexer().isNot(AsmToken::EndOfStatement)) {
@@ -2120,11 +2167,11 @@ bool RISCVAsmParser::parseDirectiveAttribute() {
StringRef Name = Parser.getTok().getIdentifier();
Optional<unsigned> Ret =
ELFAttrs::attrTypeFromString(Name, RISCVAttrs::getRISCVAttributeTags());
- if (!Ret.hasValue()) {
+ if (!Ret) {
Error(TagLoc, "attribute name not recognised: " + Name);
return false;
}
- Tag = Ret.getValue();
+ Tag = *Ret;
Parser.Lex();
} else {
const MCExpr *AttrExpr;
@@ -2170,8 +2217,7 @@ bool RISCVAsmParser::parseDirectiveAttribute() {
Parser.Lex();
}
- if (Parser.parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.attribute' directive"))
+ if (Parser.parseEOL())
return true;
if (IsIntegerValue)
@@ -2263,23 +2309,26 @@ void RISCVAsmParser::emitLoadImm(MCRegister DestReg, int64_t Value,
MCRegister SrcReg = RISCV::X0;
for (RISCVMatInt::Inst &Inst : Seq) {
- if (Inst.Opc == RISCV::LUI) {
+ switch (Inst.getOpndKind()) {
+ case RISCVMatInt::Imm:
+ emitToStreamer(Out,
+ MCInstBuilder(Inst.Opc).addReg(DestReg).addImm(Inst.Imm));
+ break;
+ case RISCVMatInt::RegX0:
emitToStreamer(
- Out, MCInstBuilder(RISCV::LUI).addReg(DestReg).addImm(Inst.Imm));
- } else if (Inst.Opc == RISCV::ADD_UW) {
- emitToStreamer(Out, MCInstBuilder(RISCV::ADD_UW)
- .addReg(DestReg)
- .addReg(SrcReg)
- .addReg(RISCV::X0));
- } else if (Inst.Opc == RISCV::SH1ADD || Inst.Opc == RISCV::SH2ADD ||
- Inst.Opc == RISCV::SH3ADD) {
+ Out, MCInstBuilder(Inst.Opc).addReg(DestReg).addReg(SrcReg).addReg(
+ RISCV::X0));
+ break;
+ case RISCVMatInt::RegReg:
emitToStreamer(
Out, MCInstBuilder(Inst.Opc).addReg(DestReg).addReg(SrcReg).addReg(
SrcReg));
- } else {
+ break;
+ case RISCVMatInt::RegImm:
emitToStreamer(
Out, MCInstBuilder(Inst.Opc).addReg(DestReg).addReg(SrcReg).addImm(
Inst.Imm));
+ break;
}
// Only the first instruction has X0 as its source.
@@ -2541,8 +2590,7 @@ bool RISCVAsmParser::validateInstruction(MCInst &Inst,
}
const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
- RISCVII::VConstraintType Constraints =
- RISCVII::getConstraint(MCID.TSFlags);
+ RISCVII::VConstraintType Constraints = RISCVII::getConstraint(MCID.TSFlags);
if (Constraints == RISCVII::NoConstraint)
return false;
diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index ff96b2b254ca..1c732a15de2f 100644
--- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -14,8 +14,8 @@
#include "MCTargetDesc/RISCVMCTargetDesc.h"
#include "TargetInfo/RISCVTargetInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDecoderOps.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
-#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
@@ -60,11 +60,9 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVDisassembler() {
static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
const FeatureBitset &FeatureBits =
- static_cast<const MCDisassembler *>(Decoder)
- ->getSubtargetInfo()
- .getFeatureBits();
+ Decoder->getSubtargetInfo().getFeatureBits();
bool IsRV32E = FeatureBits[RISCV::FeatureRV32E];
if (RegNo >= 32 || (IsRV32E && RegNo >= 16))
@@ -77,7 +75,7 @@ static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, uint64_t RegNo,
static DecodeStatus DecodeFPR16RegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo >= 32)
return MCDisassembler::Fail;
@@ -88,7 +86,7 @@ static DecodeStatus DecodeFPR16RegisterClass(MCInst &Inst, uint64_t RegNo,
static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo >= 32)
return MCDisassembler::Fail;
@@ -99,7 +97,7 @@ static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, uint64_t RegNo,
static DecodeStatus DecodeFPR32CRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo >= 8) {
return MCDisassembler::Fail;
}
@@ -110,7 +108,7 @@ static DecodeStatus DecodeFPR32CRegisterClass(MCInst &Inst, uint64_t RegNo,
static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo >= 32)
return MCDisassembler::Fail;
@@ -121,7 +119,7 @@ static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, uint64_t RegNo,
static DecodeStatus DecodeFPR64CRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo >= 8) {
return MCDisassembler::Fail;
}
@@ -132,7 +130,7 @@ static DecodeStatus DecodeFPR64CRegisterClass(MCInst &Inst, uint64_t RegNo,
static DecodeStatus DecodeGPRNoX0RegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo == 0) {
return MCDisassembler::Fail;
}
@@ -140,9 +138,9 @@ static DecodeStatus DecodeGPRNoX0RegisterClass(MCInst &Inst, uint64_t RegNo,
return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder);
}
-static DecodeStatus DecodeGPRNoX0X2RegisterClass(MCInst &Inst, uint64_t RegNo,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus
+DecodeGPRNoX0X2RegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address,
+ const MCDisassembler *Decoder) {
if (RegNo == 2) {
return MCDisassembler::Fail;
}
@@ -152,7 +150,7 @@ static DecodeStatus DecodeGPRNoX0X2RegisterClass(MCInst &Inst, uint64_t RegNo,
static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo >= 8)
return MCDisassembler::Fail;
@@ -161,9 +159,20 @@ static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeGPRPF64RegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
+ if (RegNo >= 32 || RegNo & 1)
+ return MCDisassembler::Fail;
+
+ MCRegister Reg = RISCV::X0 + RegNo;
+ Inst.addOperand(MCOperand::createReg(Reg));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeVRRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo >= 32)
return MCDisassembler::Fail;
@@ -174,7 +183,7 @@ static DecodeStatus DecodeVRRegisterClass(MCInst &Inst, uint64_t RegNo,
static DecodeStatus DecodeVRM2RegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo >= 32)
return MCDisassembler::Fail;
@@ -194,7 +203,7 @@ static DecodeStatus DecodeVRM2RegisterClass(MCInst &Inst, uint64_t RegNo,
static DecodeStatus DecodeVRM4RegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo >= 32)
return MCDisassembler::Fail;
@@ -214,7 +223,7 @@ static DecodeStatus DecodeVRM4RegisterClass(MCInst &Inst, uint64_t RegNo,
static DecodeStatus DecodeVRM8RegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo >= 32)
return MCDisassembler::Fail;
@@ -233,7 +242,8 @@ static DecodeStatus DecodeVRM8RegisterClass(MCInst &Inst, uint64_t RegNo,
}
static DecodeStatus decodeVMaskReg(MCInst &Inst, uint64_t RegNo,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
MCRegister Reg = RISCV::NoRegister;
switch (RegNo) {
default:
@@ -250,7 +260,8 @@ static DecodeStatus decodeVMaskReg(MCInst &Inst, uint64_t RegNo,
// Add implied SP operand for instructions *SP compressed instructions. The SP
// operand isn't explicitly encoded in the instruction.
-static void addImplySP(MCInst &Inst, int64_t Address, const void *Decoder) {
+static void addImplySP(MCInst &Inst, int64_t Address,
+ const MCDisassembler *Decoder) {
if (Inst.getOpcode() == RISCV::C_LWSP || Inst.getOpcode() == RISCV::C_SWSP ||
Inst.getOpcode() == RISCV::C_LDSP || Inst.getOpcode() == RISCV::C_SDSP ||
Inst.getOpcode() == RISCV::C_FLWSP ||
@@ -268,7 +279,8 @@ static void addImplySP(MCInst &Inst, int64_t Address, const void *Decoder) {
template <unsigned N>
static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm,
- int64_t Address, const void *Decoder) {
+ int64_t Address,
+ const MCDisassembler *Decoder) {
assert(isUInt<N>(Imm) && "Invalid immediate");
addImplySP(Inst, Address, Decoder);
Inst.addOperand(MCOperand::createImm(Imm));
@@ -278,7 +290,7 @@ static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm,
template <unsigned N>
static DecodeStatus decodeUImmNonZeroOperand(MCInst &Inst, uint64_t Imm,
int64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (Imm == 0)
return MCDisassembler::Fail;
return decodeUImmOperand<N>(Inst, Imm, Address, Decoder);
@@ -286,7 +298,8 @@ static DecodeStatus decodeUImmNonZeroOperand(MCInst &Inst, uint64_t Imm,
template <unsigned N>
static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm,
- int64_t Address, const void *Decoder) {
+ int64_t Address,
+ const MCDisassembler *Decoder) {
assert(isUInt<N>(Imm) && "Invalid immediate");
addImplySP(Inst, Address, Decoder);
// Sign-extend the number in the bottom N bits of Imm
@@ -297,7 +310,7 @@ static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm,
template <unsigned N>
static DecodeStatus decodeSImmNonZeroOperand(MCInst &Inst, uint64_t Imm,
int64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (Imm == 0)
return MCDisassembler::Fail;
return decodeSImmOperand<N>(Inst, Imm, Address, Decoder);
@@ -306,7 +319,7 @@ static DecodeStatus decodeSImmNonZeroOperand(MCInst &Inst, uint64_t Imm,
template <unsigned N>
static DecodeStatus decodeSImmOperandAndLsl1(MCInst &Inst, uint64_t Imm,
int64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
assert(isUInt<N>(Imm) && "Invalid immediate");
// Sign-extend the number in the bottom N bits of Imm after accounting for
// the fact that the N bit immediate is stored in N-1 bits (the LSB is
@@ -317,7 +330,7 @@ static DecodeStatus decodeSImmOperandAndLsl1(MCInst &Inst, uint64_t Imm,
static DecodeStatus decodeCLUIImmOperand(MCInst &Inst, uint64_t Imm,
int64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
assert(isUInt<6>(Imm) && "Invalid immediate");
if (Imm > 31) {
Imm = (SignExtend64<6>(Imm) & 0xfffff);
@@ -326,9 +339,8 @@ static DecodeStatus decodeCLUIImmOperand(MCInst &Inst, uint64_t Imm,
return MCDisassembler::Success;
}
-static DecodeStatus decodeFRMArg(MCInst &Inst, uint64_t Imm,
- int64_t Address,
- const void *Decoder) {
+static DecodeStatus decodeFRMArg(MCInst &Inst, uint64_t Imm, int64_t Address,
+ const MCDisassembler *Decoder) {
assert(isUInt<3>(Imm) && "Invalid immediate");
if (!llvm::RISCVFPRndMode::isValidRoundingMode(Imm))
return MCDisassembler::Fail;
@@ -338,26 +350,30 @@ static DecodeStatus decodeFRMArg(MCInst &Inst, uint64_t Imm,
}
static DecodeStatus decodeRVCInstrSImm(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus decodeRVCInstrRdSImm(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus decodeRVCInstrRdRs1UImm(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus decodeRVCInstrRdRs2(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus decodeRVCInstrRdRs1Rs2(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
#include "RISCVGenDisassemblerTables.inc"
static DecodeStatus decodeRVCInstrSImm(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
uint64_t SImm6 =
fieldFromInstruction(Insn, 12, 1) << 5 | fieldFromInstruction(Insn, 2, 5);
DecodeStatus Result = decodeSImmOperand<6>(Inst, SImm6, Address, Decoder);
@@ -368,7 +384,7 @@ static DecodeStatus decodeRVCInstrSImm(MCInst &Inst, unsigned Insn,
static DecodeStatus decodeRVCInstrRdSImm(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
DecodeGPRRegisterClass(Inst, 0, Address, Decoder);
uint64_t SImm6 =
fieldFromInstruction(Insn, 12, 1) << 5 | fieldFromInstruction(Insn, 2, 5);
@@ -380,7 +396,7 @@ static DecodeStatus decodeRVCInstrRdSImm(MCInst &Inst, unsigned Insn,
static DecodeStatus decodeRVCInstrRdRs1UImm(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
DecodeGPRRegisterClass(Inst, 0, Address, Decoder);
Inst.addOperand(Inst.getOperand(0));
uint64_t UImm6 =
@@ -392,7 +408,8 @@ static DecodeStatus decodeRVCInstrRdRs1UImm(MCInst &Inst, unsigned Insn,
}
static DecodeStatus decodeRVCInstrRdRs2(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned Rd = fieldFromInstruction(Insn, 7, 5);
unsigned Rs2 = fieldFromInstruction(Insn, 2, 5);
DecodeGPRRegisterClass(Inst, Rd, Address, Decoder);
@@ -402,7 +419,7 @@ static DecodeStatus decodeRVCInstrRdRs2(MCInst &Inst, unsigned Insn,
static DecodeStatus decodeRVCInstrRdRs1Rs2(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
unsigned Rd = fieldFromInstruction(Insn, 7, 5);
unsigned Rs2 = fieldFromInstruction(Insn, 2, 5);
DecodeGPRRegisterClass(Inst, Rd, Address, Decoder);
@@ -427,6 +444,27 @@ DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return MCDisassembler::Fail;
}
Insn = support::endian::read32le(Bytes.data());
+ if (STI.getFeatureBits()[RISCV::FeatureStdExtZdinx] &&
+ !STI.getFeatureBits()[RISCV::Feature64Bit]) {
+ LLVM_DEBUG(dbgs() << "Trying RV32Zdinx table (Double in Integer and"
+ "rv32)\n");
+ Result = decodeInstruction(DecoderTableRV32Zdinx32, MI, Insn, Address,
+ this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = 4;
+ return Result;
+ }
+ }
+
+ if (STI.getFeatureBits()[RISCV::FeatureStdExtZfinx]) {
+ LLVM_DEBUG(dbgs() << "Trying RVZfinx table (Float in Integer):\n");
+ Result = decodeInstruction(DecoderTableRVZfinx32, MI, Insn, Address, this,
+ STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = 4;
+ return Result;
+ }
+ }
LLVM_DEBUG(dbgs() << "Trying RISCV32 table :\n");
Result = decodeInstruction(DecoderTable32, MI, Insn, Address, this, STI);
Size = 4;
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
index 514789b3f645..a494adf8e210 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -583,16 +583,17 @@ void RISCVAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
bool RISCVAsmBackend::shouldInsertExtraNopBytesForCodeAlign(
const MCAlignFragment &AF, unsigned &Size) {
// Calculate Nops Size only when linker relaxation enabled.
- if (!STI.getFeatureBits()[RISCV::FeatureRelax])
+ const MCSubtargetInfo *STI = AF.getSubtargetInfo();
+ if (!STI->getFeatureBits()[RISCV::FeatureRelax])
return false;
- bool HasStdExtC = STI.getFeatureBits()[RISCV::FeatureStdExtC];
+ bool HasStdExtC = STI->getFeatureBits()[RISCV::FeatureStdExtC];
unsigned MinNopLen = HasStdExtC ? 2 : 4;
if (AF.getAlignment() <= MinNopLen) {
return false;
} else {
- Size = AF.getAlignment() - MinNopLen;
+ Size = AF.getAlignment().value() - MinNopLen;
return true;
}
}
@@ -606,7 +607,8 @@ bool RISCVAsmBackend::shouldInsertFixupForCodeAlign(MCAssembler &Asm,
const MCAsmLayout &Layout,
MCAlignFragment &AF) {
// Insert the fixup only when linker relaxation enabled.
- if (!STI.getFeatureBits()[RISCV::FeatureRelax])
+ const MCSubtargetInfo *STI = AF.getSubtargetInfo();
+ if (!STI->getFeatureBits()[RISCV::FeatureRelax])
return false;
// Calculate total Nops we need to insert. If there are none to insert
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
index f04d2912f09d..5d62c3a8b0df 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
@@ -27,18 +27,15 @@ class RISCVAsmBackend : public MCAsmBackend {
bool Is64Bit;
bool ForceRelocs = false;
const MCTargetOptions &TargetOptions;
- RISCVABI::ABI TargetABI = RISCVABI::ABI_Unknown;
public:
RISCVAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit,
const MCTargetOptions &Options)
: MCAsmBackend(support::little), STI(STI), OSABI(OSABI), Is64Bit(Is64Bit),
TargetOptions(Options) {
- TargetABI = RISCVABI::computeTargetABI(
- STI.getTargetTriple(), STI.getFeatureBits(), Options.getABIName());
RISCVFeatures::validate(STI.getTargetTriple(), STI.getFeatureBits());
}
- ~RISCVAsmBackend() override {}
+ ~RISCVAsmBackend() override = default;
void setForceRelocs() { ForceRelocs = true; }
@@ -103,7 +100,6 @@ public:
const MCSubtargetInfo *STI) const override;
const MCTargetOptions &getTargetOptions() const { return TargetOptions; }
- RISCVABI::ABI getTargetABI() const { return TargetABI; }
};
}
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
index 144e761f002d..9b69170d1c4a 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/RISCVISAInfo.h"
+#include "llvm/Support/TargetParser.h"
#include "llvm/Support/raw_ostream.h"
namespace llvm {
@@ -61,15 +62,11 @@ ABI computeTargetABI(const Triple &TT, FeatureBitset FeatureBits,
if (TargetABI != ABI_Unknown)
return TargetABI;
- // For now, default to the ilp32/ilp32e/lp64 ABI if no explicit ABI is given
- // or an invalid/unrecognised string is given. In the future, it might be
- // worth changing this to default to ilp32f/lp64f and ilp32d/lp64d when
- // hardware support for floating point is present.
- if (IsRV32E)
- return ABI_ILP32E;
- if (IsRV64)
- return ABI_LP64;
- return ABI_ILP32;
+ // If no explicit ABI is given, try to compute the default ABI.
+ auto ISAInfo = RISCVFeatures::parseFeatureBits(IsRV64, FeatureBits);
+ if (!ISAInfo)
+ report_fatal_error(ISAInfo.takeError());
+ return getTargetABI((*ISAInfo)->computeDefaultABI());
}
ABI getTargetABI(StringRef ABIName) {
@@ -106,13 +103,17 @@ void validate(const Triple &TT, const FeatureBitset &FeatureBits) {
report_fatal_error("RV32E can't be enabled for an RV64 target");
}
-void toFeatureVector(std::vector<std::string> &FeatureVector,
- const FeatureBitset &FeatureBits) {
+llvm::Expected<std::unique_ptr<RISCVISAInfo>>
+parseFeatureBits(bool IsRV64, const FeatureBitset &FeatureBits) {
+ unsigned XLen = IsRV64 ? 64 : 32;
+ std::vector<std::string> FeatureVector;
+ // Convert FeatureBitset to FeatureVector.
for (auto Feature : RISCVFeatureKV) {
if (FeatureBits[Feature.Value] &&
llvm::RISCVISAInfo::isSupportedExtensionFeature(Feature.Key))
FeatureVector.push_back(std::string("+") + Feature.Key);
}
+ return llvm::RISCVISAInfo::parseFeatures(XLen, FeatureVector);
}
} // namespace RISCVFeatures
@@ -130,7 +131,7 @@ unsigned RISCVVType::encodeVTYPE(RISCVII::VLMUL VLMUL, unsigned SEW,
bool TailAgnostic, bool MaskAgnostic) {
assert(isValidSEW(SEW) && "Invalid SEW");
unsigned VLMULBits = static_cast<unsigned>(VLMUL);
- unsigned VSEWBits = Log2_32(SEW) - 3;
+ unsigned VSEWBits = encodeSEW(SEW);
unsigned VTypeI = (VSEWBits << 3) | (VLMULBits & 0x7);
if (TailAgnostic)
VTypeI |= 0x40;
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
index 01c6bd90ea58..fa408f7fc5d7 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
@@ -18,6 +18,7 @@
#include "llvm/ADT/StringSwitch.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/RISCVISAInfo.h"
namespace llvm {
@@ -87,9 +88,16 @@ enum {
// Pseudos.
IsRVVWideningReductionShift = HasVecPolicyOpShift + 1,
IsRVVWideningReductionMask = 1 << IsRVVWideningReductionShift,
+
+ // Does this instruction care about mask policy. If it is not, the mask policy
+ // could be either agnostic or undisturbed. For example, unmasked, store, and
+ // reduction operations result would not be affected by mask policy, so
+ // compiler has free to select either one.
+ UsesMaskPolicyShift = IsRVVWideningReductionShift + 1,
+ UsesMaskPolicyMask = 1 << UsesMaskPolicyShift,
};
-// Match with the definitions in RISCVInstrFormatsV.td
+// Match with the definitions in RISCVInstrFormats.td
enum VConstraintType {
NoConstraint = 0,
VS2Constraint = 0b001,
@@ -109,8 +117,8 @@ enum VLMUL : uint8_t {
};
enum {
- TAIL_UNDISTURBED = 0,
TAIL_AGNOSTIC = 1,
+ MASK_AGNOSTIC = 2,
};
// Helper functions to read TSFlags.
@@ -120,8 +128,8 @@ static inline unsigned getFormat(uint64_t TSFlags) {
}
/// \returns the constraint for the instruction.
static inline VConstraintType getConstraint(uint64_t TSFlags) {
- return static_cast<VConstraintType>
- ((TSFlags & ConstraintMask) >> ConstraintShift);
+ return static_cast<VConstraintType>((TSFlags & ConstraintMask) >>
+ ConstraintShift);
}
/// \returns the LMUL for the instruction.
static inline VLMUL getLMul(uint64_t TSFlags) {
@@ -155,6 +163,30 @@ static inline bool hasVecPolicyOp(uint64_t TSFlags) {
static inline bool isRVVWideningReduction(uint64_t TSFlags) {
return TSFlags & IsRVVWideningReductionMask;
}
+/// \returns true if mask policy is valid for the instruction.
+static inline bool usesMaskPolicy(uint64_t TSFlags) {
+ return TSFlags & UsesMaskPolicyMask;
+}
+
+static inline unsigned getVLOpNum(const MCInstrDesc &Desc) {
+ const uint64_t TSFlags = Desc.TSFlags;
+ // This method is only called if we expect to have a VL operand, and all
+ // instructions with VL also have SEW.
+ assert(hasSEWOp(TSFlags) && hasVLOp(TSFlags));
+ unsigned Offset = 2;
+ if (hasVecPolicyOp(TSFlags))
+ Offset = 3;
+ return Desc.getNumOperands() - Offset;
+}
+
+static inline unsigned getSEWOpNum(const MCInstrDesc &Desc) {
+ const uint64_t TSFlags = Desc.TSFlags;
+ assert(hasSEWOp(TSFlags));
+ unsigned Offset = 1;
+ if (hasVecPolicyOp(TSFlags))
+ Offset = 2;
+ return Desc.getNumOperands() - Offset;
+}
// RISC-V Specific Machine Operand Flags
enum {
@@ -189,6 +221,7 @@ enum OperandType : unsigned {
OPERAND_UIMM7,
OPERAND_UIMM12,
OPERAND_SIMM12,
+ OPERAND_SIMM12_LSB00000,
OPERAND_UIMM20,
OPERAND_UIMMLOG2XLEN,
OPERAND_RVKRNUM,
@@ -344,9 +377,8 @@ namespace RISCVFeatures {
// triple. Exits with report_fatal_error if not.
void validate(const Triple &TT, const FeatureBitset &FeatureBits);
-// Convert FeatureBitset to FeatureVector.
-void toFeatureVector(std::vector<std::string> &FeatureVector,
- const FeatureBitset &FeatureBits);
+llvm::Expected<std::unique_ptr<RISCVISAInfo>>
+parseFeatureBits(bool IsRV64, const FeatureBitset &FeatureBits);
} // namespace RISCVFeatures
@@ -372,11 +404,22 @@ inline static RISCVII::VLMUL getVLMUL(unsigned VType) {
// Decode VLMUL into 1,2,4,8 and fractional indicator.
std::pair<unsigned, bool> decodeVLMUL(RISCVII::VLMUL VLMUL);
+inline static RISCVII::VLMUL encodeLMUL(unsigned LMUL, bool Fractional) {
+ assert(isValidLMUL(LMUL, Fractional) && "Unsupported LMUL");
+ unsigned LmulLog2 = Log2_32(LMUL);
+ return static_cast<RISCVII::VLMUL>(Fractional ? 8 - LmulLog2 : LmulLog2);
+}
+
inline static unsigned decodeVSEW(unsigned VSEW) {
assert(VSEW < 8 && "Unexpected VSEW value");
return 1 << (VSEW + 3);
}
+inline static unsigned encodeSEW(unsigned SEW) {
+ assert(isValidSEW(SEW) && "Unexpected SEW value");
+ return Log2_32(SEW) - 3;
+}
+
inline static unsigned getSEW(unsigned VType) {
unsigned VSEW = (VType >> 3) & 0x7;
return decodeVSEW(VSEW);
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
index fb1ce19d73bc..0c362c57e5c0 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
@@ -43,7 +43,7 @@ RISCVELFObjectWriter::RISCVELFObjectWriter(uint8_t OSABI, bool Is64Bit)
: MCELFObjectTargetWriter(Is64Bit, OSABI, ELF::EM_RISCV,
/*HasRelocationAddend*/ true) {}
-RISCVELFObjectWriter::~RISCVELFObjectWriter() {}
+RISCVELFObjectWriter::~RISCVELFObjectWriter() = default;
unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx,
const MCValue &Target,
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
index d88ba9e4ac72..c5f8a42bab6a 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
@@ -16,6 +16,7 @@
#include "RISCVMCTargetDesc.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCObjectWriter.h"
@@ -30,38 +31,12 @@ using namespace llvm;
// This part is for ELF object output.
RISCVTargetELFStreamer::RISCVTargetELFStreamer(MCStreamer &S,
const MCSubtargetInfo &STI)
- : RISCVTargetStreamer(S), CurrentVendor("riscv") {
+ : RISCVTargetStreamer(S), CurrentVendor("riscv"), STI(STI) {
MCAssembler &MCA = getStreamer().getAssembler();
const FeatureBitset &Features = STI.getFeatureBits();
auto &MAB = static_cast<RISCVAsmBackend &>(MCA.getBackend());
- RISCVABI::ABI ABI = MAB.getTargetABI();
- assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
-
- unsigned EFlags = MCA.getELFHeaderEFlags();
-
- if (Features[RISCV::FeatureStdExtC])
- EFlags |= ELF::EF_RISCV_RVC;
-
- switch (ABI) {
- case RISCVABI::ABI_ILP32:
- case RISCVABI::ABI_LP64:
- break;
- case RISCVABI::ABI_ILP32F:
- case RISCVABI::ABI_LP64F:
- EFlags |= ELF::EF_RISCV_FLOAT_ABI_SINGLE;
- break;
- case RISCVABI::ABI_ILP32D:
- case RISCVABI::ABI_LP64D:
- EFlags |= ELF::EF_RISCV_FLOAT_ABI_DOUBLE;
- break;
- case RISCVABI::ABI_ILP32E:
- EFlags |= ELF::EF_RISCV_RVE;
- break;
- case RISCVABI::ABI_Unknown:
- llvm_unreachable("Improperly initialised target ABI");
- }
-
- MCA.setELFHeaderEFlags(EFlags);
+ setTargetABI(RISCVABI::computeTargetABI(STI.getTargetTriple(), Features,
+ MAB.getTargetOptions().getABIName()));
}
MCELFStreamer &RISCVTargetELFStreamer::getStreamer() {
@@ -98,12 +73,12 @@ void RISCVTargetELFStreamer::finishAttributeSection() {
return;
if (AttributeSection) {
- Streamer.SwitchSection(AttributeSection);
+ Streamer.switchSection(AttributeSection);
} else {
MCAssembler &MCA = getStreamer().getAssembler();
AttributeSection = MCA.getContext().getELFSection(
".riscv.attributes", ELF::SHT_RISCV_ATTRIBUTES, 0);
- Streamer.SwitchSection(AttributeSection);
+ Streamer.switchSection(AttributeSection);
Streamer.emitInt8(ELFAttrs::Format_Version);
}
@@ -172,6 +147,44 @@ size_t RISCVTargetELFStreamer::calculateContentSize() const {
return Result;
}
+void RISCVTargetELFStreamer::finish() {
+ RISCVTargetStreamer::finish();
+ MCAssembler &MCA = getStreamer().getAssembler();
+ const FeatureBitset &Features = STI.getFeatureBits();
+ RISCVABI::ABI ABI = getTargetABI();
+
+ unsigned EFlags = MCA.getELFHeaderEFlags();
+
+ if (Features[RISCV::FeatureStdExtC])
+ EFlags |= ELF::EF_RISCV_RVC;
+
+ switch (ABI) {
+ case RISCVABI::ABI_ILP32:
+ case RISCVABI::ABI_LP64:
+ break;
+ case RISCVABI::ABI_ILP32F:
+ case RISCVABI::ABI_LP64F:
+ EFlags |= ELF::EF_RISCV_FLOAT_ABI_SINGLE;
+ break;
+ case RISCVABI::ABI_ILP32D:
+ case RISCVABI::ABI_LP64D:
+ EFlags |= ELF::EF_RISCV_FLOAT_ABI_DOUBLE;
+ break;
+ case RISCVABI::ABI_ILP32E:
+ EFlags |= ELF::EF_RISCV_RVE;
+ break;
+ case RISCVABI::ABI_Unknown:
+ llvm_unreachable("Improperly initialised target ABI");
+ }
+
+ MCA.setELFHeaderEFlags(EFlags);
+}
+
+void RISCVTargetELFStreamer::reset() {
+ AttributeSection = nullptr;
+ Contents.clear();
+}
+
namespace {
class RISCVELFStreamer : public MCELFStreamer {
static std::pair<unsigned, unsigned> getRelocPairForSize(unsigned Size) {
@@ -194,6 +207,14 @@ class RISCVELFStreamer : public MCELFStreamer {
static bool requiresFixups(MCContext &C, const MCExpr *Value,
const MCExpr *&LHS, const MCExpr *&RHS) {
+ auto IsMetadataOrEHFrameSection = [](const MCSection &S) -> bool {
+ // Additionally check .apple_names/.apple_types. They are fixed-size and
+ // do not need fixups. llvm-dwarfdump --apple-names does not process
+ // R_RISCV_{ADD,SUB}32 in them.
+ return S.getKind().isMetadata() || S.getName() == ".eh_frame" ||
+ S.getName() == ".apple_names" || S.getName() == ".apple_types";
+ };
+
const auto *MBE = dyn_cast<MCBinaryExpr>(Value);
if (MBE == nullptr)
return false;
@@ -212,10 +233,20 @@ class RISCVELFStreamer : public MCELFStreamer {
MCConstantExpr::create(E.getConstant(), C), C);
RHS = E.getSymB();
- return (A.isInSection() ? A.getSection().hasInstructions()
- : !A.getName().empty()) ||
- (B.isInSection() ? B.getSection().hasInstructions()
- : !B.getName().empty());
+ // TODO: when available, R_RISCV_n_PCREL should be preferred.
+
+ // Avoid pairwise relocations for symbolic difference in debug and .eh_frame
+ if (A.isInSection())
+ return !IsMetadataOrEHFrameSection(A.getSection());
+ if (B.isInSection())
+ return !IsMetadataOrEHFrameSection(B.getSection());
+ // as well as for absolute symbols.
+ return !A.getName().empty() || !B.getName().empty();
+ }
+
+ void reset() override {
+ static_cast<RISCVTargetStreamer *>(getTargetStreamer())->reset();
+ MCELFStreamer::reset();
}
public:
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
index 7ce7dafb8ca1..7ca2f5ab5623 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
@@ -29,6 +29,7 @@ private:
SmallVector<AttributeItem, 64> Contents;
MCSection *AttributeSection = nullptr;
+ const MCSubtargetInfo &STI;
AttributeItem *getAttributeItem(unsigned Attribute) {
for (size_t i = 0; i < Contents.size(); ++i)
@@ -91,6 +92,8 @@ private:
void finishAttributeSection() override;
size_t calculateContentSize() const;
+ void reset() override;
+
public:
MCELFStreamer &getStreamer();
RISCVTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI);
@@ -103,6 +106,8 @@ public:
void emitDirectiveOptionNoRVC() override;
void emitDirectiveOptionRelax() override;
void emitDirectiveOptionNoRelax() override;
+
+ void finish() override;
};
MCELFStreamer *createRISCVELFStreamer(MCContext &C,
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
index 3268740849f0..7f88589374dd 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
@@ -146,7 +146,7 @@ void RISCVInstPrinter::printFenceArg(const MCInst *MI, unsigned OpNo,
if ((FenceArg & RISCVFenceField::W) != 0)
O << 'w';
if (FenceArg == 0)
- O << "unknown";
+ O << "0";
}
void RISCVInstPrinter::printFRMArg(const MCInst *MI, unsigned OpNo,
@@ -156,12 +156,12 @@ void RISCVInstPrinter::printFRMArg(const MCInst *MI, unsigned OpNo,
O << RISCVFPRndMode::roundingModeToString(FRMArg);
}
-void RISCVInstPrinter::printAtomicMemOp(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI,
- raw_ostream &O) {
+void RISCVInstPrinter::printZeroOffsetMemOp(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
const MCOperand &MO = MI->getOperand(OpNo);
- assert(MO.isReg() && "printAtomicMemOp can only print register operands");
+ assert(MO.isReg() && "printZeroOffsetMemOp can only print register operands");
O << "(";
printRegName(O, MO.getReg());
O << ")";
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h
index d078ead2c8ad..763ce9c95d73 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h
@@ -40,8 +40,8 @@ public:
const MCSubtargetInfo &STI, raw_ostream &O);
void printFRMArg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
- void printAtomicMemOp(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI, raw_ostream &O);
+ void printZeroOffsetMemOp(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printVTypeI(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
void printVMaskReg(const MCInst *MI, unsigned OpNo,
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
index 1078403a3fd2..7c062387fecd 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
@@ -23,6 +23,7 @@
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/EndianStream.h"
@@ -46,7 +47,7 @@ public:
RISCVMCCodeEmitter(MCContext &ctx, MCInstrInfo const &MCII)
: Ctx(ctx), MCII(MCII) {}
- ~RISCVMCCodeEmitter() override {}
+ ~RISCVMCCodeEmitter() override = default;
void encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
@@ -93,7 +94,6 @@ private:
} // end anonymous namespace
MCCodeEmitter *llvm::createRISCVMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx) {
return new RISCVMCCodeEmitter(Ctx, MCII);
}
@@ -132,9 +132,7 @@ void RISCVMCCodeEmitter::expandFunctionCall(const MCInst &MI, raw_ostream &OS,
const MCExpr *CallExpr = Func.getExpr();
// Emit AUIPC Ra, Func with R_RISCV_CALL relocation type.
- TmpInst = MCInstBuilder(RISCV::AUIPC)
- .addReg(Ra)
- .addOperand(MCOperand::createExpr(CallExpr));
+ TmpInst = MCInstBuilder(RISCV::AUIPC).addReg(Ra).addExpr(CallExpr);
Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
support::endian::write(OS, Binary, support::little);
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp
index 65714b914c60..336289cf107b 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp
@@ -21,6 +21,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCObjectFileInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCObjectFileInfo.cpp
index 9c9d9221578c..554711e87521 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCObjectFileInfo.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCObjectFileInfo.cpp
@@ -13,6 +13,7 @@
#include "RISCVMCObjectFileInfo.h"
#include "RISCVMCTargetDesc.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSubtargetInfo.h"
using namespace llvm;
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
index 07c2be624932..917d93479f18 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
@@ -77,11 +77,9 @@ createRISCVMCObjectFileInfo(MCContext &Ctx, bool PIC,
static MCSubtargetInfo *createRISCVMCSubtargetInfo(const Triple &TT,
StringRef CPU, StringRef FS) {
- if (CPU.empty())
+ if (CPU.empty() || CPU == "generic")
CPU = TT.isArch64Bit() ? "generic-rv64" : "generic-rv32";
- if (CPU == "generic")
- report_fatal_error(Twine("CPU 'generic' is not supported. Use ") +
- (TT.isArch64Bit() ? "generic-rv64" : "generic-rv32"));
+
return createRISCVMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
}
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h
index 5216a689715a..276fc9efb6c0 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h
@@ -29,7 +29,6 @@ class MCSubtargetInfo;
class Target;
MCCodeEmitter *createRISCVMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx);
MCAsmBackend *createRISCVAsmBackend(const Target &T, const MCSubtargetInfo &STI,
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
index e935179e5f9b..d19da6bd3664 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
@@ -18,10 +18,9 @@ static int getInstSeqCost(RISCVMatInt::InstSeq &Res, bool HasRVC) {
int Cost = 0;
for (auto Instr : Res) {
- bool Compressed;
+ // Assume instructions that aren't listed aren't compressible.
+ bool Compressed = false;
switch (Instr.Opc) {
- default:
- llvm_unreachable("Unexpected opcode");
case RISCV::SLLI:
case RISCV::SRLI:
Compressed = true;
@@ -31,9 +30,6 @@ static int getInstSeqCost(RISCVMatInt::InstSeq &Res, bool HasRVC) {
case RISCV::LUI:
Compressed = isInt<6>(Instr.Imm);
break;
- case RISCV::ADD_UW:
- Compressed = false;
- break;
}
// Two RVC instructions take the same space as one RVI instruction, but
// can take longer to execute than the single RVI instruction. Thus, we
@@ -77,6 +73,12 @@ static void generateInstSeqImpl(int64_t Val,
assert(IsRV64 && "Can't emit >32-bit imm for non-RV64 target");
+ // Use BSETI for a single bit.
+ if (ActiveFeatures[RISCV::FeatureStdExtZbs] && isPowerOf2_64(Val)) {
+ Res.push_back(RISCVMatInt::Inst(RISCV::BSETI, Log2_64(Val)));
+ return;
+ }
+
// In the worst case, for a full 64-bit constant, a sequence of 8 instructions
// (i.e., LUI+ADDIW+SLLI+ADDI+SLLI+ADDI+SLLI+ADDI) has to be emitted. Note
// that the first two instructions (LUI+ADDIW) can contribute up to 32 bits
@@ -101,43 +103,53 @@ static void generateInstSeqImpl(int64_t Val,
// performed when the recursion returns.
int64_t Lo12 = SignExtend64<12>(Val);
- int64_t Hi52 = ((uint64_t)Val + 0x800ull) >> 12;
- int ShiftAmount = 12 + findFirstSet((uint64_t)Hi52);
- Hi52 = SignExtend64(Hi52 >> (ShiftAmount - 12), 64 - ShiftAmount);
+ Val = (uint64_t)Val - (uint64_t)Lo12;
- // If the remaining bits don't fit in 12 bits, we might be able to reduce the
- // shift amount in order to use LUI which will zero the lower 12 bits.
+ int ShiftAmount = 0;
bool Unsigned = false;
- if (ShiftAmount > 12 && !isInt<12>(Hi52)) {
- if (isInt<32>((uint64_t)Hi52 << 12)) {
- // Reduce the shift amount and add zeros to the LSBs so it will match LUI.
- ShiftAmount -= 12;
- Hi52 = (uint64_t)Hi52 << 12;
- } else if (isUInt<32>((uint64_t)Hi52 << 12) &&
- ActiveFeatures[RISCV::FeatureStdExtZba]) {
- // Reduce the shift amount and add zeros to the LSBs so it will match
- // LUI, then shift left with SLLI.UW to clear the upper 32 set bits.
- ShiftAmount -= 12;
- Hi52 = ((uint64_t)Hi52 << 12) | (0xffffffffull << 32);
+
+ // Val might now be valid for LUI without needing a shift.
+ if (!isInt<32>(Val)) {
+ ShiftAmount = findFirstSet((uint64_t)Val);
+ Val >>= ShiftAmount;
+
+ // If the remaining bits don't fit in 12 bits, we might be able to reduce the
+ // shift amount in order to use LUI which will zero the lower 12 bits.
+ if (ShiftAmount > 12 && !isInt<12>(Val)) {
+ if (isInt<32>((uint64_t)Val << 12)) {
+ // Reduce the shift amount and add zeros to the LSBs so it will match LUI.
+ ShiftAmount -= 12;
+ Val = (uint64_t)Val << 12;
+ } else if (isUInt<32>((uint64_t)Val << 12) &&
+ ActiveFeatures[RISCV::FeatureStdExtZba]) {
+ // Reduce the shift amount and add zeros to the LSBs so it will match
+ // LUI, then shift left with SLLI.UW to clear the upper 32 set bits.
+ ShiftAmount -= 12;
+ Val = ((uint64_t)Val << 12) | (0xffffffffull << 32);
+ Unsigned = true;
+ }
+ }
+
+ // Try to use SLLI_UW for Val when it is uint32 but not int32.
+ if (isUInt<32>((uint64_t)Val) && !isInt<32>((uint64_t)Val) &&
+ ActiveFeatures[RISCV::FeatureStdExtZba]) {
+ // Use LUI+ADDI or LUI to compose, then clear the upper 32 bits with
+ // SLLI_UW.
+ Val = ((uint64_t)Val) | (0xffffffffull << 32);
Unsigned = true;
}
}
- // Try to use SLLI_UW for Hi52 when it is uint32 but not int32.
- if (isUInt<32>((uint64_t)Hi52) && !isInt<32>((uint64_t)Hi52) &&
- ActiveFeatures[RISCV::FeatureStdExtZba]) {
- // Use LUI+ADDI or LUI to compose, then clear the upper 32 bits with
- // SLLI_UW.
- Hi52 = ((uint64_t)Hi52) | (0xffffffffull << 32);
- Unsigned = true;
- }
+ generateInstSeqImpl(Val, ActiveFeatures, Res);
- generateInstSeqImpl(Hi52, ActiveFeatures, Res);
+ // Skip shift if we were able to use LUI directly.
+ if (ShiftAmount) {
+ if (Unsigned)
+ Res.push_back(RISCVMatInt::Inst(RISCV::SLLI_UW, ShiftAmount));
+ else
+ Res.push_back(RISCVMatInt::Inst(RISCV::SLLI, ShiftAmount));
+ }
- if (Unsigned)
- Res.push_back(RISCVMatInt::Inst(RISCV::SLLI_UW, ShiftAmount));
- else
- Res.push_back(RISCVMatInt::Inst(RISCV::SLLI, ShiftAmount));
if (Lo12)
Res.push_back(RISCVMatInt::Inst(RISCV::ADDI, Lo12));
}
@@ -166,6 +178,24 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
RISCVMatInt::InstSeq Res;
generateInstSeqImpl(Val, ActiveFeatures, Res);
+ // If there are trailing zeros, try generating a sign extended constant with
+ // no trailing zeros and use a final SLLI to restore them.
+ if ((Val & 1) == 0 && Res.size() > 2) {
+ unsigned TrailingZeros = countTrailingZeros((uint64_t)Val);
+ int64_t ShiftedVal = Val >> TrailingZeros;
+ RISCVMatInt::InstSeq TmpSeq;
+ generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
+ TmpSeq.push_back(RISCVMatInt::Inst(RISCV::SLLI, TrailingZeros));
+
+ // Keep the new sequence if it is an improvement.
+ if (TmpSeq.size() < Res.size()) {
+ Res = TmpSeq;
+ // A 2 instruction sequence is the best we can do.
+ if (Res.size() <= 2)
+ return Res;
+ }
+ }
+
// If the constant is positive we might be able to generate a shifted constant
// with no leading zeros and use a final SRLI to restore them.
if (Val > 0 && Res.size() > 2) {
@@ -302,32 +332,34 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
TmpSeq.push_back(RISCVMatInt::Inst(Opc, 0));
if (TmpSeq.size() < Res.size())
Res = TmpSeq;
- }
- // Try to use LUI+SH*ADD+ADDI.
- int64_t Hi52 = ((uint64_t)Val + 0x800ull) & ~0xfffull;
- int64_t Lo12 = SignExtend64<12>(Val);
- Div = 0;
- if (isInt<32>(Hi52 / 3) && (Hi52 % 3) == 0) {
- Div = 3;
- Opc = RISCV::SH1ADD;
- } else if (isInt<32>(Hi52 / 5) && (Hi52 % 5) == 0) {
- Div = 5;
- Opc = RISCV::SH2ADD;
- } else if (isInt<32>(Hi52 / 9) && (Hi52 % 9) == 0) {
- Div = 9;
- Opc = RISCV::SH3ADD;
- }
- // Build the new instruction sequence.
- if (Div > 0) {
- // For Val that has zero Lo12 (implies Val equals to Hi52) should has
- // already been processed to LUI+SH*ADD by previous optimization.
- assert(Lo12 != 0 &&
- "unexpected instruction sequence for immediate materialisation");
- generateInstSeqImpl(Hi52 / Div, ActiveFeatures, TmpSeq);
- TmpSeq.push_back(RISCVMatInt::Inst(Opc, 0));
- TmpSeq.push_back(RISCVMatInt::Inst(RISCV::ADDI, Lo12));
- if (TmpSeq.size() < Res.size())
- Res = TmpSeq;
+ } else {
+ // Try to use LUI+SH*ADD+ADDI.
+ int64_t Hi52 = ((uint64_t)Val + 0x800ull) & ~0xfffull;
+ int64_t Lo12 = SignExtend64<12>(Val);
+ Div = 0;
+ if (isInt<32>(Hi52 / 3) && (Hi52 % 3) == 0) {
+ Div = 3;
+ Opc = RISCV::SH1ADD;
+ } else if (isInt<32>(Hi52 / 5) && (Hi52 % 5) == 0) {
+ Div = 5;
+ Opc = RISCV::SH2ADD;
+ } else if (isInt<32>(Hi52 / 9) && (Hi52 % 9) == 0) {
+ Div = 9;
+ Opc = RISCV::SH3ADD;
+ }
+ // Build the new instruction sequence.
+ if (Div > 0) {
+ // For Val that has zero Lo12 (implies Val equals to Hi52) should has
+ // already been processed to LUI+SH*ADD by previous optimization.
+ assert(Lo12 != 0 &&
+ "unexpected instruction sequence for immediate materialisation");
+ assert(TmpSeq.empty() && "Expected empty TmpSeq");
+ generateInstSeqImpl(Hi52 / Div, ActiveFeatures, TmpSeq);
+ TmpSeq.push_back(RISCVMatInt::Inst(Opc, 0));
+ TmpSeq.push_back(RISCVMatInt::Inst(RISCV::ADDI, Lo12));
+ if (TmpSeq.size() < Res.size())
+ Res = TmpSeq;
+ }
}
}
@@ -362,5 +394,30 @@ int getIntMatCost(const APInt &Val, unsigned Size,
}
return std::max(1, Cost);
}
+
+OpndKind Inst::getOpndKind() const {
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unexpected opcode!");
+ case RISCV::LUI:
+ return RISCVMatInt::Imm;
+ case RISCV::ADD_UW:
+ return RISCVMatInt::RegX0;
+ case RISCV::SH1ADD:
+ case RISCV::SH2ADD:
+ case RISCV::SH3ADD:
+ return RISCVMatInt::RegReg;
+ case RISCV::ADDI:
+ case RISCV::ADDIW:
+ case RISCV::SLLI:
+ case RISCV::SRLI:
+ case RISCV::SLLI_UW:
+ case RISCV::RORI:
+ case RISCV::BSETI:
+ case RISCV::BCLRI:
+ return RISCVMatInt::RegImm;
+ }
+}
+
} // namespace RISCVMatInt
} // namespace llvm
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h
index 6a8e0c640001..90c29f01c43d 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h
@@ -17,11 +17,21 @@ namespace llvm {
class APInt;
namespace RISCVMatInt {
+
+enum OpndKind {
+ RegImm, // ADDI/ADDIW/SLLI/SRLI/BSETI/BCLRI
+ Imm, // LUI
+ RegReg, // SH1ADD/SH2ADD/SH3ADD
+ RegX0, // ADD_UW
+};
+
struct Inst {
unsigned Opc;
int64_t Imm;
Inst(unsigned Opc, int64_t Imm) : Opc(Opc), Imm(Imm) {}
+
+ OpndKind getOpndKind() const;
};
using InstSeq = SmallVector<Inst, 8>;
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
index 2f016374e6a2..5f9ed77d07cf 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
@@ -22,6 +22,7 @@ using namespace llvm;
RISCVTargetStreamer::RISCVTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
void RISCVTargetStreamer::finish() { finishAttributeSection(); }
+void RISCVTargetStreamer::reset() {}
void RISCVTargetStreamer::emitDirectiveOptionPush() {}
void RISCVTargetStreamer::emitDirectiveOptionPop() {}
@@ -38,6 +39,10 @@ void RISCVTargetStreamer::emitTextAttribute(unsigned Attribute,
void RISCVTargetStreamer::emitIntTextAttribute(unsigned Attribute,
unsigned IntValue,
StringRef StringValue) {}
+void RISCVTargetStreamer::setTargetABI(RISCVABI::ABI ABI) {
+ assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialized target ABI");
+ TargetABI = ABI;
+}
void RISCVTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) {
if (STI.hasFeature(RISCV::FeatureRV32E))
@@ -45,15 +50,10 @@ void RISCVTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) {
else
emitAttribute(RISCVAttrs::STACK_ALIGN, RISCVAttrs::ALIGN_16);
- unsigned XLen = STI.hasFeature(RISCV::Feature64Bit) ? 64 : 32;
- std::vector<std::string> FeatureVector;
- RISCVFeatures::toFeatureVector(FeatureVector, STI.getFeatureBits());
-
- auto ParseResult = llvm::RISCVISAInfo::parseFeatures(XLen, FeatureVector);
+ auto ParseResult = RISCVFeatures::parseFeatureBits(
+ STI.hasFeature(RISCV::Feature64Bit), STI.getFeatureBits());
if (!ParseResult) {
- /* Assume any error about features should handled earlier. */
- consumeError(ParseResult.takeError());
- llvm_unreachable("Parsing feature error when emitTargetAttributes?");
+ report_fatal_error(ParseResult.takeError());
} else {
auto &ISAInfo = *ParseResult;
emitTextAttribute(RISCVAttrs::ARCH, ISAInfo->toString());
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h
index 171780d94ce7..0d35d0b698a9 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h
@@ -9,6 +9,7 @@
#ifndef LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVTARGETSTREAMER_H
#define LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVTARGETSTREAMER_H
+#include "RISCV.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
@@ -17,9 +18,12 @@ namespace llvm {
class formatted_raw_ostream;
class RISCVTargetStreamer : public MCTargetStreamer {
+ RISCVABI::ABI TargetABI = RISCVABI::ABI_Unknown;
+
public:
RISCVTargetStreamer(MCStreamer &S);
void finish() override;
+ virtual void reset();
virtual void emitDirectiveOptionPush();
virtual void emitDirectiveOptionPop();
@@ -36,6 +40,8 @@ public:
StringRef StringValue);
void emitTargetAttributes(const MCSubtargetInfo &STI);
+ void setTargetABI(RISCVABI::ABI ABI);
+ RISCVABI::ABI getTargetABI() const { return TargetABI; }
};
// This part is for ascii assembly output
diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h
index 03462240fd93..917837a307ad 100644
--- a/llvm/lib/Target/RISCV/RISCV.h
+++ b/llvm/lib/Target/RISCV/RISCV.h
@@ -32,10 +32,14 @@ class PassRegistry;
bool lowerRISCVMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
AsmPrinter &AP);
-bool LowerRISCVMachineOperandToMCOperand(const MachineOperand &MO,
+bool lowerRISCVMachineOperandToMCOperand(const MachineOperand &MO,
MCOperand &MCOp, const AsmPrinter &AP);
-FunctionPass *createRISCVISelDag(RISCVTargetMachine &TM);
+FunctionPass *createRISCVISelDag(RISCVTargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+
+FunctionPass *createRISCVMakeCompressibleOptPass();
+void initializeRISCVMakeCompressibleOptPass(PassRegistry &);
FunctionPass *createRISCVGatherScatterLoweringPass();
void initializeRISCVGatherScatterLoweringPass(PassRegistry &);
@@ -55,6 +59,9 @@ void initializeRISCVExpandAtomicPseudoPass(PassRegistry &);
FunctionPass *createRISCVInsertVSETVLIPass();
void initializeRISCVInsertVSETVLIPass(PassRegistry &);
+FunctionPass *createRISCVRedundantCopyEliminationPass();
+void initializeRISCVRedundantCopyEliminationPass(PassRegistry &);
+
InstructionSelector *createRISCVInstructionSelector(const RISCVTargetMachine &,
RISCVSubtarget &,
RISCVRegisterBankInfo &);
diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index e32a8fb010de..e783ef38b448 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -41,6 +41,13 @@ def HasStdExtD : Predicate<"Subtarget->hasStdExtD()">,
AssemblerPredicate<(all_of FeatureStdExtD),
"'D' (Double-Precision Floating-Point)">;
+def FeatureStdExtZihintpause
+ : SubtargetFeature<"zihintpause", "HasStdExtZihintpause", "true",
+ "'zihintpause' (Pause Hint)">;
+def HasStdExtZihintpause : Predicate<"Subtarget->hasStdExtZihintpause()">,
+ AssemblerPredicate<(all_of FeatureStdExtZihintpause),
+ "'Zihintpause' (Pause Hint)">;
+
def FeatureStdExtZfhmin
: SubtargetFeature<"zfhmin", "HasStdExtZfhmin", "true",
"'Zfhmin' (Half-Precision Floating-Point Minimal)",
@@ -63,6 +70,43 @@ def HasStdExtZfhOrZfhmin
"'Zfh' (Half-Precision Floating-Point) or "
"'Zfhmin' (Half-Precision Floating-Point Minimal)">;
+def FeatureStdExtZfinx
+ : SubtargetFeature<"zfinx", "HasStdExtZfinx", "true",
+ "'Zfinx' (Float in Integer)">;
+def HasStdExtZfinx : Predicate<"Subtarget->hasStdExtZfinx()">,
+ AssemblerPredicate<(all_of FeatureStdExtZfinx),
+ "'Zfinx' (Float in Integer)">;
+
+def FeatureStdExtZdinx
+ : SubtargetFeature<"zdinx", "HasStdExtZdinx", "true",
+ "'Zdinx' (Double in Integer)",
+ [FeatureStdExtZfinx]>;
+def HasStdExtZdinx : Predicate<"Subtarget->hasStdExtZdinx()">,
+ AssemblerPredicate<(all_of FeatureStdExtZdinx),
+ "'Zdinx' (Double in Integer)">;
+
+def FeatureStdExtZhinxmin
+ : SubtargetFeature<"zhinxmin", "HasStdExtZhinxmin", "true",
+ "'Zhinxmin' (Half Float in Integer Minimal)",
+ [FeatureStdExtZfinx]>;
+def HasStdExtZhinxmin : Predicate<"Subtarget->hasStdExtZhinxmin()">,
+ AssemblerPredicate<(all_of FeatureStdExtZhinxmin),
+ "'Zhinxmin' (Half Float in Integer Minimal)">;
+
+def FeatureStdExtZhinx
+ : SubtargetFeature<"zhinx", "HasStdExtZhinx", "true",
+ "'Zhinx' (Half Float in Integer)",
+ [FeatureStdExtZfinx]>;
+def HasStdExtZhinx : Predicate<"Subtarget->hasStdExtZhinx()">,
+ AssemblerPredicate<(all_of FeatureStdExtZhinx),
+ "'Zhinx' (Half Float in Integer)">;
+
+def HasStdExtZhinxOrZhinxmin
+ : Predicate<"Subtarget->hasStdExtZhinx() || Subtarget->hasStdExtZhinxmin()">,
+ AssemblerPredicate<(any_of FeatureStdExtZhinx, FeatureStdExtZhinxmin),
+ "'Zhinx' (Half Float in Integer) or "
+ "'Zhinxmin' (Half Float in Integer Minimal)">;
+
def FeatureStdExtC
: SubtargetFeature<"c", "HasStdExtC", "true",
"'C' (Compressed Instructions)">;
@@ -290,13 +334,13 @@ def HasRVCHints : Predicate<"Subtarget->enableRVCHintInstrs()">,
AssemblerPredicate<(all_of(not FeatureNoRVCHints)),
"RVC Hint Instructions">;
-def FeatureStdExtZvl32b : SubtargetFeature<"zvl32b", "ZvlLen", "ExtZvl::Zvl32b",
+def FeatureStdExtZvl32b : SubtargetFeature<"zvl32b", "ZvlLen", "32",
"'Zvl' (Minimum Vector Length) 32">;
foreach i = { 6-15 } in {
defvar I = !shl(1, i);
def FeatureStdExtZvl#I#b :
- SubtargetFeature<"zvl"#I#"b", "ZvlLen", "ExtZvl::Zvl"#I#"b",
+ SubtargetFeature<"zvl"#I#"b", "ZvlLen", !cast<string>(I),
"'Zvl' (Minimum Vector Length) "#I,
[!cast<SubtargetFeature>("FeatureStdExtZvl"#!srl(I, 1)#"b")]>;
}
@@ -333,24 +377,50 @@ def FeatureStdExtZve64d
def FeatureStdExtV
: SubtargetFeature<"v", "HasStdExtV", "true",
"'V' (Vector Extension for Application Processors)",
- [FeatureStdExtZvl128b, FeatureStdExtF, FeatureStdExtD]>;
+ [FeatureStdExtZvl128b, FeatureStdExtZve64d, FeatureStdExtF, FeatureStdExtD]>;
def HasVInstructions : Predicate<"Subtarget->hasVInstructions()">,
AssemblerPredicate<
- (any_of FeatureStdExtZve32x, FeatureStdExtV),
+ (any_of FeatureStdExtZve32x),
"'V' (Vector Extension for Application Processors), 'Zve32x' or "
"'Zve64x' (Vector Extensions for Embedded Processors)">;
def HasVInstructionsI64 : Predicate<"Subtarget->hasVInstructionsI64()">,
AssemblerPredicate<
- (any_of FeatureStdExtZve64x, FeatureStdExtV),
+ (any_of FeatureStdExtZve64x),
"'V' (Vector Extension for Application Processors) or 'Zve64x' "
"(Vector Extensions for Embedded Processors)">;
def HasVInstructionsAnyF : Predicate<"Subtarget->hasVInstructionsAnyF()">,
AssemblerPredicate<
- (any_of FeatureStdExtZve32f, FeatureStdExtV),
+ (any_of FeatureStdExtZve32f),
"'V' (Vector Extension for Application Processors), 'Zve32f', "
"'Zve64f' or 'Zve64d' (Vector Extensions for Embedded Processors)">;
+def FeatureStdExtZvfh
+ : SubtargetFeature<"experimental-zvfh", "HasStdExtZvfh", "true",
+ "'Zvfh' (Vector Half-Precision Floating-Point)",
+ [FeatureStdExtZve32f]>;
+
+def FeatureStdExtZicbom
+ : SubtargetFeature<"zicbom", "HasStdExtZicbom", "true",
+ "'Zicbom' (Cache-Block Management Instructions)">;
+def HasStdExtZicbom : Predicate<"Subtarget->hasStdExtZicbom()">,
+ AssemblerPredicate<(all_of FeatureStdExtZicbom),
+ "'Zicbom' (Cache-Block Management Instructions)">;
+
+def FeatureStdExtZicboz
+ : SubtargetFeature<"zicboz", "HasStdExtZicboz", "true",
+ "'Zicboz' (Cache-Block Zero Instructions)">;
+def HasStdExtZicboz : Predicate<"Subtarget->hasStdExtZicboz()">,
+ AssemblerPredicate<(all_of FeatureStdExtZicboz),
+ "'Zicboz' (Cache-Block Zero Instructions)">;
+
+def FeatureStdExtZicbop
+ : SubtargetFeature<"zicbop", "HasStdExtZicbop", "true",
+ "'Zicbop' (Cache-Block Prefetch Instructions)">;
+def HasStdExtZicbop : Predicate<"Subtarget->hasStdExtZicbop()">,
+ AssemblerPredicate<(all_of FeatureStdExtZicbop),
+ "'Zicbop' (Cache-Block Prefetch Instructions)">;
+
def Feature64Bit
: SubtargetFeature<"64bit", "HasRV64", "true", "Implements RV64">;
def IsRV64 : Predicate<"Subtarget->is64Bit()">,
@@ -381,6 +451,19 @@ foreach i = {1-31} in
def FeatureSaveRestore : SubtargetFeature<"save-restore", "EnableSaveRestore",
"true", "Enable save/restore.">;
+def FeatureUnalignedScalarMem
+ : SubtargetFeature<"unaligned-scalar-mem", "EnableUnalignedScalarMem",
+ "true", "Has reasonably performant unaligned scalar "
+ "loads and stores">;
+
+def TuneLUIADDIFusion
+ : SubtargetFeature<"lui-addi-fusion", "HasLUIADDIFusion",
+ "true", "Enable LUI+ADDI macrofusion">;
+
+def TuneNoDefaultUnroll
+ : SubtargetFeature<"no-default-unroll", "EnableDefaultUnroll", "false",
+ "Disable default unroll preference.">;
+
def TuneSiFive7 : SubtargetFeature<"sifive7", "RISCVProcFamily", "SiFive7",
"SiFive 7-Series processors">;
@@ -408,14 +491,17 @@ include "RISCVSchedSiFive7.td"
def : ProcessorModel<"generic-rv32", NoSchedModel, []>;
def : ProcessorModel<"generic-rv64", NoSchedModel, [Feature64Bit]>;
+// Support generic for compatibility with other targets. The triple will be used
+// to change to the appropriate rv32/rv64 version.
+def : ProcessorModel<"generic", NoSchedModel, []>;
def : ProcessorModel<"rocket-rv32", RocketModel, []>;
def : ProcessorModel<"rocket-rv64", RocketModel, [Feature64Bit]>;
def : ProcessorModel<"sifive-7-rv32", SiFive7Model, [],
- [TuneSiFive7]>;
+ [TuneSiFive7, TuneNoDefaultUnroll]>;
def : ProcessorModel<"sifive-7-rv64", SiFive7Model, [Feature64Bit],
- [TuneSiFive7]>;
+ [TuneSiFive7, TuneNoDefaultUnroll]>;
def : ProcessorModel<"sifive-e20", RocketModel, [FeatureStdExtM,
FeatureStdExtC]>;
@@ -442,7 +528,7 @@ def : ProcessorModel<"sifive-e76", SiFive7Model, [FeatureStdExtM,
FeatureStdExtA,
FeatureStdExtF,
FeatureStdExtC],
- [TuneSiFive7]>;
+ [TuneSiFive7, TuneNoDefaultUnroll]>;
def : ProcessorModel<"sifive-s21", RocketModel, [Feature64Bit,
FeatureStdExtM,
@@ -467,7 +553,7 @@ def : ProcessorModel<"sifive-s76", SiFive7Model, [Feature64Bit,
FeatureStdExtF,
FeatureStdExtD,
FeatureStdExtC],
- [TuneSiFive7]>;
+ [TuneSiFive7, TuneNoDefaultUnroll]>;
def : ProcessorModel<"sifive-u54", RocketModel, [Feature64Bit,
FeatureStdExtM,
@@ -482,7 +568,7 @@ def : ProcessorModel<"sifive-u74", SiFive7Model, [Feature64Bit,
FeatureStdExtF,
FeatureStdExtD,
FeatureStdExtC],
- [TuneSiFive7]>;
+ [TuneSiFive7, TuneNoDefaultUnroll]>;
//===----------------------------------------------------------------------===//
// Define the RISC-V target.
diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
index 9fed6e7baadc..5b2a247ebda0 100644
--- a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
+++ b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
@@ -38,12 +38,13 @@ STATISTIC(RISCVNumInstrsCompressed,
namespace {
class RISCVAsmPrinter : public AsmPrinter {
- const MCSubtargetInfo *STI;
+ const MCSubtargetInfo *MCSTI;
+ const RISCVSubtarget *STI;
public:
explicit RISCVAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer)
- : AsmPrinter(TM, std::move(Streamer)), STI(TM.getMCSubtargetInfo()) {}
+ : AsmPrinter(TM, std::move(Streamer)), MCSTI(TM.getMCSubtargetInfo()) {}
StringRef getPassName() const override { return "RISCV Assembly Printer"; }
@@ -62,12 +63,14 @@ public:
// Wrapper needed for tblgenned pseudo lowering.
bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const {
- return LowerRISCVMachineOperandToMCOperand(MO, MCOp, *this);
+ return lowerRISCVMachineOperandToMCOperand(MO, MCOp, *this);
}
void emitStartOfAsmFile(Module &M) override;
void emitEndOfAsmFile(Module &M) override;
+ void emitFunctionEntryLabel() override;
+
private:
void emitAttributes();
};
@@ -170,7 +173,8 @@ bool RISCVAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
MCSubtargetInfo &NewSTI =
OutStreamer->getContext().getSubtargetCopy(*TM.getMCSubtargetInfo());
NewSTI.setFeatureBits(MF.getSubtarget().getFeatureBits());
- STI = &NewSTI;
+ MCSTI = &NewSTI;
+ STI = &MF.getSubtarget<RISCVSubtarget>();
SetupMachineFunction(MF);
emitFunctionBody();
@@ -193,7 +197,14 @@ void RISCVAsmPrinter::emitEndOfAsmFile(Module &M) {
void RISCVAsmPrinter::emitAttributes() {
RISCVTargetStreamer &RTS =
static_cast<RISCVTargetStreamer &>(*OutStreamer->getTargetStreamer());
- RTS.emitTargetAttributes(*STI);
+ RTS.emitTargetAttributes(*MCSTI);
+}
+
+void RISCVAsmPrinter::emitFunctionEntryLabel() {
+ AsmPrinter::emitFunctionEntryLabel();
+ RISCVTargetStreamer &RTS =
+ static_cast<RISCVTargetStreamer &>(*OutStreamer->getTargetStreamer());
+ RTS.setTargetABI(STI->getTargetABI());
}
// Force static initialization.
diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index 0c5c13db7112..e4e01d9f6f2f 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -290,7 +290,7 @@ bool RISCVExpandPseudo::expandVSPILL(MachineBasicBlock &MBB,
Register SrcReg = MBBI->getOperand(0).getReg();
Register Base = MBBI->getOperand(1).getReg();
Register VL = MBBI->getOperand(2).getReg();
- auto ZvlssegInfo = TII->isRVVSpillForZvlsseg(MBBI->getOpcode());
+ auto ZvlssegInfo = RISCV::isRVVSpillForZvlsseg(MBBI->getOpcode());
if (!ZvlssegInfo)
return false;
unsigned NF = ZvlssegInfo->first;
@@ -314,10 +314,15 @@ bool RISCVExpandPseudo::expandVSPILL(MachineBasicBlock &MBB,
assert(LMUL == 1 && "LMUL must be 1, 2, or 4.");
for (unsigned I = 0; I < NF; ++I) {
+ // Adding implicit-use of super register to describe we are using part of
+ // super register, that prevents machine verifier complaining when part of
+ // subreg is undef, see comment in MachineVerifier::checkLiveness for more
+ // detail.
BuildMI(MBB, MBBI, DL, TII->get(Opcode))
.addReg(TRI->getSubReg(SrcReg, SubRegIdx + I))
.addReg(Base)
- .addMemOperand(*(MBBI->memoperands_begin()));
+ .addMemOperand(*(MBBI->memoperands_begin()))
+ .addReg(SrcReg, RegState::Implicit);
if (I != NF - 1)
BuildMI(MBB, MBBI, DL, TII->get(RISCV::ADD), Base)
.addReg(Base)
@@ -335,7 +340,7 @@ bool RISCVExpandPseudo::expandVRELOAD(MachineBasicBlock &MBB,
Register DestReg = MBBI->getOperand(0).getReg();
Register Base = MBBI->getOperand(1).getReg();
Register VL = MBBI->getOperand(2).getReg();
- auto ZvlssegInfo = TII->isRVVSpillForZvlsseg(MBBI->getOpcode());
+ auto ZvlssegInfo = RISCV::isRVVSpillForZvlsseg(MBBI->getOpcode());
if (!ZvlssegInfo)
return false;
unsigned NF = ZvlssegInfo->first;
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index ad003404d793..57d8ba6f0161 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -21,6 +21,8 @@
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/MC/MCDwarf.h"
+#include <algorithm>
+
using namespace llvm;
// For now we use x18, a.k.a s2, as pointer to shadow call stack.
@@ -250,6 +252,7 @@ bool RISCVFrameLowering::hasBP(const MachineFunction &MF) const {
// Determines the size of the frame and maximum call frame size.
void RISCVFrameLowering::determineFrameLayout(MachineFunction &MF) const {
MachineFrameInfo &MFI = MF.getFrameInfo();
+ auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
// Get the number of bytes to allocate from the FrameInfo.
uint64_t FrameSize = MFI.getStackSize();
@@ -262,6 +265,28 @@ void RISCVFrameLowering::determineFrameLayout(MachineFunction &MF) const {
// Update frame info.
MFI.setStackSize(FrameSize);
+
+ // When using SP or BP to access stack objects, we may require extra padding
+ // to ensure the bottom of the RVV stack is correctly aligned within the main
+ // stack. We calculate this as the amount required to align the scalar local
+ // variable section up to the RVV alignment.
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+ if (RVFI->getRVVStackSize() && (!hasFP(MF) || TRI->hasStackRealignment(MF))) {
+ int ScalarLocalVarSize = FrameSize - RVFI->getCalleeSavedStackSize() -
+ RVFI->getVarArgsSaveSize();
+ if (auto RVVPadding =
+ offsetToAlignment(ScalarLocalVarSize, RVFI->getRVVStackAlign()))
+ RVFI->setRVVPadding(RVVPadding);
+ }
+}
+
+// Returns the stack size including RVV padding (when required), rounded back
+// up to the required stack alignment.
+uint64_t RISCVFrameLowering::getStackSizeWithRVVPadding(
+ const MachineFunction &MF) const {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
+ return alignTo(MFI.getStackSize() + RVFI->getRVVPadding(), getStackAlign());
}
void RISCVFrameLowering::adjustReg(MachineBasicBlock &MBB,
@@ -280,21 +305,43 @@ void RISCVFrameLowering::adjustReg(MachineBasicBlock &MBB,
.addReg(SrcReg)
.addImm(Val)
.setMIFlag(Flag);
- } else {
- unsigned Opc = RISCV::ADD;
- bool isSub = Val < 0;
- if (isSub) {
- Val = -Val;
- Opc = RISCV::SUB;
- }
+ return;
+ }
- Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
- TII->movImm(MBB, MBBI, DL, ScratchReg, Val, Flag);
- BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
+ // Try to split the offset across two ADDIs. We need to keep the stack pointer
+ // aligned after each ADDI. We need to determine the maximum value we can put
+ // in each ADDI. In the negative direction, we can use -2048 which is always
+ // sufficiently aligned. In the positive direction, we need to find the
+ // largest 12-bit immediate that is aligned. Exclude -4096 since it can be
+ // created with LUI.
+ assert(getStackAlign().value() < 2048 && "Stack alignment too large");
+ int64_t MaxPosAdjStep = 2048 - getStackAlign().value();
+ if (Val > -4096 && Val <= (2 * MaxPosAdjStep)) {
+ int64_t FirstAdj = Val < 0 ? -2048 : MaxPosAdjStep;
+ Val -= FirstAdj;
+ BuildMI(MBB, MBBI, DL, TII->get(RISCV::ADDI), DestReg)
.addReg(SrcReg)
- .addReg(ScratchReg, RegState::Kill)
+ .addImm(FirstAdj)
+ .setMIFlag(Flag);
+ BuildMI(MBB, MBBI, DL, TII->get(RISCV::ADDI), DestReg)
+ .addReg(DestReg, RegState::Kill)
+ .addImm(Val)
.setMIFlag(Flag);
+ return;
+ }
+
+ unsigned Opc = RISCV::ADD;
+ if (Val < 0) {
+ Val = -Val;
+ Opc = RISCV::SUB;
}
+
+ Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
+ TII->movImm(MBB, MBBI, DL, ScratchReg, Val, Flag);
+ BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
+ .addReg(SrcReg)
+ .addReg(ScratchReg, RegState::Kill)
+ .setMIFlag(Flag);
}
// Returns the register used to hold the frame pointer.
@@ -401,7 +448,7 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
// FIXME (note copied from Lanai): This appears to be overallocating. Needs
// investigation. Get the number of bytes to allocate from the FrameInfo.
- uint64_t StackSize = MFI.getStackSize() + RVFI->getRVVPadding();
+ uint64_t StackSize = getStackSizeWithRVVPadding(MF);
uint64_t RealStackSize = StackSize + RVFI->getLibCallStackSize();
uint64_t RVVStackSize = RVFI->getRVVStackSize();
@@ -482,7 +529,8 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
// Emit the second SP adjustment after saving callee saved registers.
if (FirstSPAdjustAmount) {
- uint64_t SecondSPAdjustAmount = MFI.getStackSize() - FirstSPAdjustAmount;
+ uint64_t SecondSPAdjustAmount =
+ getStackSizeWithRVVPadding(MF) - FirstSPAdjustAmount;
assert(SecondSPAdjustAmount > 0 &&
"SecondSPAdjustAmount should be greater than zero");
adjustReg(MBB, MBBI, DL, SPReg, SPReg, -SecondSPAdjustAmount,
@@ -492,8 +540,8 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
// don't emit an sp-based .cfi_def_cfa_offset
if (!hasFP(MF)) {
// Emit ".cfi_def_cfa_offset StackSize"
- unsigned CFIIndex = MF.addFrameInst(
- MCCFIInstruction::cfiDefCfaOffset(nullptr, MFI.getStackSize()));
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(
+ nullptr, getStackSizeWithRVVPadding(MF)));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlag(MachineInstr::FrameSetup);
@@ -561,15 +609,11 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock::iterator MBBI = MBB.end();
DebugLoc DL;
if (!MBB.empty()) {
- MBBI = MBB.getFirstTerminator();
- if (MBBI == MBB.end())
- MBBI = MBB.getLastNonDebugInstr();
- DL = MBBI->getDebugLoc();
+ MBBI = MBB.getLastNonDebugInstr();
+ if (MBBI != MBB.end())
+ DL = MBBI->getDebugLoc();
- // If this is not a terminator, the actual insert location should be after the
- // last instruction.
- if (!MBBI->isTerminator())
- MBBI = std::next(MBBI);
+ MBBI = MBB.getFirstTerminator();
// If callee-saved registers are saved via libcall, place stack adjustment
// before this call.
@@ -587,7 +631,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
if (!CSI.empty())
LastFrameDestroy = std::prev(MBBI, CSI.size());
- uint64_t StackSize = MFI.getStackSize() + RVFI->getRVVPadding();
+ uint64_t StackSize = getStackSizeWithRVVPadding(MF);
uint64_t RealStackSize = StackSize + RVFI->getLibCallStackSize();
uint64_t FPOffset = RealStackSize - RVFI->getVarArgsSaveSize();
uint64_t RVVStackSize = RVFI->getRVVStackSize();
@@ -595,7 +639,15 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
// Restore the stack pointer using the value of the frame pointer. Only
// necessary if the stack pointer was modified, meaning the stack size is
// unknown.
- if (RI->hasStackRealignment(MF) || MFI.hasVarSizedObjects()) {
+ //
+ // In order to make sure the stack point is right through the EH region,
+ // we also need to restore stack pointer from the frame pointer if we
+ // don't preserve stack space within prologue/epilogue for outgoing variables,
+ // normally it's just checking the variable sized object is present or not
+ // is enough, but we also don't preserve that at prologue/epilogue when
+ // have vector objects in stack.
+ if (RI->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
+ !hasReservedCallFrame(MF)) {
assert(hasFP(MF) && "frame pointer should not have been eliminated");
adjustReg(MBB, LastFrameDestroy, DL, SPReg, FPReg, -FPOffset,
MachineInstr::FrameDestroy);
@@ -607,7 +659,8 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF);
if (FirstSPAdjustAmount) {
- uint64_t SecondSPAdjustAmount = MFI.getStackSize() - FirstSPAdjustAmount;
+ uint64_t SecondSPAdjustAmount =
+ getStackSizeWithRVVPadding(MF) - FirstSPAdjustAmount;
assert(SecondSPAdjustAmount > 0 &&
"SecondSPAdjustAmount should be greater than zero");
@@ -665,134 +718,138 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
if (FirstSPAdjustAmount)
Offset += StackOffset::getFixed(FirstSPAdjustAmount);
else
- Offset +=
- StackOffset::getFixed(MFI.getStackSize() + RVFI->getRVVPadding());
- } else if (RI->hasStackRealignment(MF) && !MFI.isFixedObjectIndex(FI)) {
+ Offset += StackOffset::getFixed(getStackSizeWithRVVPadding(MF));
+ return Offset;
+ }
+
+ if (RI->hasStackRealignment(MF) && !MFI.isFixedObjectIndex(FI)) {
// If the stack was realigned, the frame pointer is set in order to allow
// SP to be restored, so we need another base register to record the stack
// after realignment.
+ // |--------------------------| -- <-- FP
+ // | callee-allocated save | | <----|
+ // | area for register varargs| | |
+ // |--------------------------| | |
+ // | callee-saved registers | | |
+ // |--------------------------| -- |
+ // | realignment (the size of | | |
+ // | this area is not counted | | |
+ // | in MFI.getStackSize()) | | |
+ // |--------------------------| -- |-- MFI.getStackSize()
+ // | RVV alignment padding | | |
+ // | (not counted in | | |
+ // | MFI.getStackSize() but | | |
+ // | counted in | | |
+ // | RVFI.getRVVStackSize()) | | |
+ // |--------------------------| -- |
+ // | RVV objects | | |
+ // | (not counted in | | |
+ // | MFI.getStackSize()) | | |
+ // |--------------------------| -- |
+ // | padding before RVV | | |
+ // | (not counted in | | |
+ // | MFI.getStackSize() or in | | |
+ // | RVFI.getRVVStackSize()) | | |
+ // |--------------------------| -- |
+ // | scalar local variables | | <----'
+ // |--------------------------| -- <-- BP (if var sized objects present)
+ // | VarSize objects | |
+ // |--------------------------| -- <-- SP
if (hasBP(MF)) {
FrameReg = RISCVABI::getBPReg();
- // |--------------------------| -- <-- FP
- // | callee-saved registers | | <----.
- // |--------------------------| -- |
- // | realignment (the size of | | |
- // | this area is not counted | | |
- // | in MFI.getStackSize()) | | |
- // |--------------------------| -- |
- // | Padding after RVV | | |
- // | (not counted in | | |
- // | MFI.getStackSize()) | | |
- // |--------------------------| -- |-- MFI.getStackSize()
- // | RVV objects | | |
- // | (not counted in | | |
- // | MFI.getStackSize()) | | |
- // |--------------------------| -- |
- // | Padding before RVV | | |
- // | (not counted in | | |
- // | MFI.getStackSize()) | | |
- // |--------------------------| -- |
- // | scalar local variables | | <----'
- // |--------------------------| -- <-- BP
- // | VarSize objects | |
- // |--------------------------| -- <-- SP
} else {
+ // VarSize objects must be empty in this case!
+ assert(!MFI.hasVarSizedObjects());
FrameReg = RISCV::X2;
- // |--------------------------| -- <-- FP
- // | callee-saved registers | | <----.
- // |--------------------------| -- |
- // | realignment (the size of | | |
- // | this area is not counted | | |
- // | in MFI.getStackSize()) | | |
- // |--------------------------| -- |
- // | Padding after RVV | | |
- // | (not counted in | | |
- // | MFI.getStackSize()) | | |
- // |--------------------------| -- |-- MFI.getStackSize()
- // | RVV objects | | |
- // | (not counted in | | |
- // | MFI.getStackSize()) | | |
- // |--------------------------| -- |
- // | Padding before RVV | | |
- // | (not counted in | | |
- // | MFI.getStackSize()) | | |
- // |--------------------------| -- |
- // | scalar local variables | | <----'
- // |--------------------------| -- <-- SP
- }
- // The total amount of padding surrounding RVV objects is described by
- // RVV->getRVVPadding() and it can be zero. It allows us to align the RVV
- // objects to 8 bytes.
- if (MFI.getStackID(FI) == TargetStackID::Default) {
- Offset += StackOffset::getFixed(MFI.getStackSize());
- if (FI < 0)
- Offset += StackOffset::getFixed(RVFI->getLibCallStackSize());
- } else if (MFI.getStackID(FI) == TargetStackID::ScalableVector) {
- Offset += StackOffset::get(
- alignTo(MFI.getStackSize() - RVFI->getCalleeSavedStackSize(), 8),
- RVFI->getRVVStackSize());
}
} else {
FrameReg = RI->getFrameRegister(MF);
- if (hasFP(MF)) {
- Offset += StackOffset::getFixed(RVFI->getVarArgsSaveSize());
- if (FI >= 0)
- Offset -= StackOffset::getFixed(RVFI->getLibCallStackSize());
- // When using FP to access scalable vector objects, we need to minus
- // the frame size.
- //
- // |--------------------------| -- <-- FP
- // | callee-saved registers | |
- // |--------------------------| | MFI.getStackSize()
- // | scalar local variables | |
- // |--------------------------| -- (Offset of RVV objects is from here.)
- // | RVV objects |
- // |--------------------------|
- // | VarSize objects |
- // |--------------------------| <-- SP
- if (MFI.getStackID(FI) == TargetStackID::ScalableVector)
- Offset -= StackOffset::getFixed(MFI.getStackSize());
- } else {
- // When using SP to access frame objects, we need to add RVV stack size.
- //
- // |--------------------------| -- <-- FP
- // | callee-saved registers | | <----.
- // |--------------------------| -- |
- // | Padding after RVV | | |
- // | (not counted in | | |
- // | MFI.getStackSize()) | | |
- // |--------------------------| -- |
- // | RVV objects | | |-- MFI.getStackSize()
- // | (not counted in | | |
- // | MFI.getStackSize()) | | |
- // |--------------------------| -- |
- // | Padding before RVV | | |
- // | (not counted in | | |
- // | MFI.getStackSize()) | | |
- // |--------------------------| -- |
- // | scalar local variables | | <----'
- // |--------------------------| -- <-- SP
- //
- // The total amount of padding surrounding RVV objects is described by
- // RVV->getRVVPadding() and it can be zero. It allows us to align the RVV
- // objects to 8 bytes.
- if (MFI.getStackID(FI) == TargetStackID::Default) {
- if (MFI.isFixedObjectIndex(FI)) {
- Offset +=
- StackOffset::get(MFI.getStackSize() + RVFI->getRVVPadding() +
- RVFI->getLibCallStackSize(),
- RVFI->getRVVStackSize());
- } else {
- Offset += StackOffset::getFixed(MFI.getStackSize());
- }
- } else if (MFI.getStackID(FI) == TargetStackID::ScalableVector) {
- Offset += StackOffset::get(
- alignTo(MFI.getStackSize() - RVFI->getCalleeSavedStackSize(), 8),
- RVFI->getRVVStackSize());
- }
+ }
+
+ if (FrameReg == getFPReg(STI)) {
+ Offset += StackOffset::getFixed(RVFI->getVarArgsSaveSize());
+ if (FI >= 0)
+ Offset -= StackOffset::getFixed(RVFI->getLibCallStackSize());
+ // When using FP to access scalable vector objects, we need to minus
+ // the frame size.
+ //
+ // |--------------------------| -- <-- FP
+ // | callee-allocated save | |
+ // | area for register varargs| |
+ // |--------------------------| |
+ // | callee-saved registers | |
+ // |--------------------------| | MFI.getStackSize()
+ // | scalar local variables | |
+ // |--------------------------| -- (Offset of RVV objects is from here.)
+ // | RVV objects |
+ // |--------------------------|
+ // | VarSize objects |
+ // |--------------------------| <-- SP
+ if (MFI.getStackID(FI) == TargetStackID::ScalableVector) {
+ assert(!RI->hasStackRealignment(MF) &&
+ "Can't index across variable sized realign");
+ // We don't expect any extra RVV alignment padding, as the stack size
+ // and RVV object sections should be correct aligned in their own
+ // right.
+ assert(MFI.getStackSize() == getStackSizeWithRVVPadding(MF) &&
+ "Inconsistent stack layout");
+ Offset -= StackOffset::getFixed(MFI.getStackSize());
}
+ return Offset;
}
+ // This case handles indexing off both SP and BP.
+ // If indexing off SP, there must not be any var sized objects
+ assert(FrameReg == RISCVABI::getBPReg() || !MFI.hasVarSizedObjects());
+
+ // When using SP to access frame objects, we need to add RVV stack size.
+ //
+ // |--------------------------| -- <-- FP
+ // | callee-allocated save | | <----|
+ // | area for register varargs| | |
+ // |--------------------------| | |
+ // | callee-saved registers | | |
+ // |--------------------------| -- |
+ // | RVV alignment padding | | |
+ // | (not counted in | | |
+ // | MFI.getStackSize() but | | |
+ // | counted in | | |
+ // | RVFI.getRVVStackSize()) | | |
+ // |--------------------------| -- |
+ // | RVV objects | | |-- MFI.getStackSize()
+ // | (not counted in | | |
+ // | MFI.getStackSize()) | | |
+ // |--------------------------| -- |
+ // | padding before RVV | | |
+ // | (not counted in | | |
+ // | MFI.getStackSize()) | | |
+ // |--------------------------| -- |
+ // | scalar local variables | | <----'
+ // |--------------------------| -- <-- BP (if var sized objects present)
+ // | VarSize objects | |
+ // |--------------------------| -- <-- SP
+ //
+ // The total amount of padding surrounding RVV objects is described by
+ // RVV->getRVVPadding() and it can be zero. It allows us to align the RVV
+ // objects to the required alignment.
+ if (MFI.getStackID(FI) == TargetStackID::Default) {
+ if (MFI.isFixedObjectIndex(FI)) {
+ assert(!RI->hasStackRealignment(MF) &&
+ "Can't index across variable sized realign");
+ Offset += StackOffset::get(getStackSizeWithRVVPadding(MF) +
+ RVFI->getLibCallStackSize(),
+ RVFI->getRVVStackSize());
+ } else {
+ Offset += StackOffset::getFixed(MFI.getStackSize());
+ }
+ } else if (MFI.getStackID(FI) == TargetStackID::ScalableVector) {
+ // Ensure the base of the RVV stack is correctly aligned: add on the
+ // alignment padding.
+ int ScalarLocalVarSize =
+ MFI.getStackSize() - RVFI->getCalleeSavedStackSize() -
+ RVFI->getVarArgsSaveSize() + RVFI->getRVVPadding();
+ Offset += StackOffset::get(ScalarLocalVarSize, RVFI->getRVVStackSize());
+ }
return Offset;
}
@@ -841,9 +898,8 @@ void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF,
}
}
-int64_t
+std::pair<int64_t, Align>
RISCVFrameLowering::assignRVVStackObjectOffsets(MachineFrameInfo &MFI) const {
- int64_t Offset = 0;
// Create a buffer of RVV objects to allocate.
SmallVector<int, 8> ObjectsToAllocate;
for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) {
@@ -857,29 +913,78 @@ RISCVFrameLowering::assignRVVStackObjectOffsets(MachineFrameInfo &MFI) const {
}
// Allocate all RVV locals and spills
+ int64_t Offset = 0;
+ // The minimum alignment is 16 bytes.
+ Align RVVStackAlign(16);
for (int FI : ObjectsToAllocate) {
// ObjectSize in bytes.
int64_t ObjectSize = MFI.getObjectSize(FI);
+ auto ObjectAlign = std::max(Align(8), MFI.getObjectAlign(FI));
// If the data type is the fractional vector type, reserve one vector
// register for it.
if (ObjectSize < 8)
ObjectSize = 8;
- // Currently, all scalable vector types are aligned to 8 bytes.
- Offset = alignTo(Offset + ObjectSize, 8);
+ Offset = alignTo(Offset + ObjectSize, ObjectAlign);
MFI.setObjectOffset(FI, -Offset);
+ // Update the maximum alignment of the RVV stack section
+ RVVStackAlign = std::max(RVVStackAlign, ObjectAlign);
}
- return Offset;
+ // Ensure the alignment of the RVV stack. Since we want the most-aligned
+ // object right at the bottom (i.e., any padding at the top of the frame),
+ // readjust all RVV objects down by the alignment padding.
+ uint64_t StackSize = Offset;
+ if (auto AlignmentPadding = offsetToAlignment(StackSize, RVVStackAlign)) {
+ StackSize += AlignmentPadding;
+ for (int FI : ObjectsToAllocate)
+ MFI.setObjectOffset(FI, MFI.getObjectOffset(FI) - AlignmentPadding);
+ }
+
+ return std::make_pair(StackSize, RVVStackAlign);
}
-static bool hasRVVSpillWithFIs(MachineFunction &MF, const RISCVInstrInfo &TII) {
+static unsigned getScavSlotsNumForRVV(MachineFunction &MF) {
+ // For RVV spill, scalable stack offsets computing requires up to two scratch
+ // registers
+ static constexpr unsigned ScavSlotsNumRVVSpillScalableObject = 2;
+
+ // For RVV spill, non-scalable stack offsets computing requires up to one
+ // scratch register.
+ static constexpr unsigned ScavSlotsNumRVVSpillNonScalableObject = 1;
+
+ // ADDI instruction's destination register can be used for computing
+ // offsets. So Scalable stack offsets require up to one scratch register.
+ static constexpr unsigned ScavSlotsADDIScalableObject = 1;
+
+ static constexpr unsigned MaxScavSlotsNumKnown =
+ std::max({ScavSlotsADDIScalableObject, ScavSlotsNumRVVSpillScalableObject,
+ ScavSlotsNumRVVSpillNonScalableObject});
+
+ unsigned MaxScavSlotsNum = 0;
if (!MF.getSubtarget<RISCVSubtarget>().hasVInstructions())
return false;
- return any_of(MF, [&TII](const MachineBasicBlock &MBB) {
- return any_of(MBB, [&TII](const MachineInstr &MI) {
- return TII.isRVVSpill(MI, /*CheckFIs*/ true);
- });
- });
+ for (const MachineBasicBlock &MBB : MF)
+ for (const MachineInstr &MI : MBB) {
+ bool IsRVVSpill = RISCV::isRVVSpill(MI);
+ for (auto &MO : MI.operands()) {
+ if (!MO.isFI())
+ continue;
+ bool IsScalableVectorID = MF.getFrameInfo().getStackID(MO.getIndex()) ==
+ TargetStackID::ScalableVector;
+ if (IsRVVSpill) {
+ MaxScavSlotsNum = std::max(
+ MaxScavSlotsNum, IsScalableVectorID
+ ? ScavSlotsNumRVVSpillScalableObject
+ : ScavSlotsNumRVVSpillNonScalableObject);
+ } else if (MI.getOpcode() == RISCV::ADDI && IsScalableVectorID) {
+ MaxScavSlotsNum =
+ std::max(MaxScavSlotsNum, ScavSlotsADDIScalableObject);
+ }
+ }
+ if (MaxScavSlotsNum == MaxScavSlotsNumKnown)
+ return MaxScavSlotsNumKnown;
+ }
+ return MaxScavSlotsNum;
}
void RISCVFrameLowering::processFunctionBeforeFrameFinalized(
@@ -890,9 +995,17 @@ void RISCVFrameLowering::processFunctionBeforeFrameFinalized(
const TargetRegisterClass *RC = &RISCV::GPRRegClass;
auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
- int64_t RVVStackSize = assignRVVStackObjectOffsets(MFI);
+ int64_t RVVStackSize;
+ Align RVVStackAlign;
+ std::tie(RVVStackSize, RVVStackAlign) = assignRVVStackObjectOffsets(MFI);
+
RVFI->setRVVStackSize(RVVStackSize);
- const RISCVInstrInfo &TII = *MF.getSubtarget<RISCVSubtarget>().getInstrInfo();
+ RVFI->setRVVStackAlign(RVVStackAlign);
+
+ // Ensure the entire stack is aligned to at least the RVV requirement: some
+ // scalable-vector object alignments are not considered by the
+ // target-independent code.
+ MFI.ensureMaxAlignment(RVVStackAlign);
// estimateStackSize has been observed to under-estimate the final stack
// size, so give ourselves wiggle-room by checking for stack size
@@ -903,17 +1016,14 @@ void RISCVFrameLowering::processFunctionBeforeFrameFinalized(
// RVV loads & stores have no capacity to hold the immediate address offsets
// so we must always reserve an emergency spill slot if the MachineFunction
// contains any RVV spills.
- if (!isInt<11>(MFI.estimateStackSize(MF)) || hasRVVSpillWithFIs(MF, TII)) {
- int RegScavFI = MFI.CreateStackObject(RegInfo->getSpillSize(*RC),
- RegInfo->getSpillAlign(*RC), false);
- RS->addScavengingFrameIndex(RegScavFI);
- // For RVV, scalable stack offsets require up to two scratch registers to
- // compute the final offset. Reserve an additional emergency spill slot.
- if (RVVStackSize != 0) {
- int RVVRegScavFI = MFI.CreateStackObject(
- RegInfo->getSpillSize(*RC), RegInfo->getSpillAlign(*RC), false);
- RS->addScavengingFrameIndex(RVVRegScavFI);
- }
+ unsigned ScavSlotsNum = 0;
+ if (!isInt<11>(MFI.estimateStackSize(MF)))
+ ScavSlotsNum = 1;
+
+ ScavSlotsNum = std::max(ScavSlotsNum, getScavSlotsNumForRVV(MF));
+ for (unsigned i = 0; i < ScavSlotsNum; i++) {
+ RS->addScavengingFrameIndex(MFI.CreateStackObject(
+ RegInfo->getSpillSize(*RC), RegInfo->getSpillAlign(*RC), false));
}
if (MFI.getCalleeSavedInfo().empty() || RVFI->useSaveRestoreLibCalls(MF)) {
@@ -930,14 +1040,6 @@ void RISCVFrameLowering::processFunctionBeforeFrameFinalized(
Size += MFI.getObjectSize(FrameIdx);
}
RVFI->setCalleeSavedStackSize(Size);
-
- // Padding required to keep the RVV stack aligned to 8 bytes
- // within the main stack. We only need this when not using FP.
- if (RVVStackSize && !hasFP(MF) && Size % 8 != 0) {
- // Because we add the padding to the size of the stack, adding
- // getStackAlign() will keep it aligned.
- RVFI->setRVVPadding(getStackAlign().value());
- }
}
static bool hasRVVFrameObject(const MachineFunction &MF) {
@@ -1012,23 +1114,23 @@ RISCVFrameLowering::getFirstSPAdjustAmount(const MachineFunction &MF) const {
const auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
const MachineFrameInfo &MFI = MF.getFrameInfo();
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
- uint64_t StackSize = MFI.getStackSize();
+ uint64_t StackSize = getStackSizeWithRVVPadding(MF);
- // Disable SplitSPAdjust if save-restore libcall used. The callee saved
+ // Disable SplitSPAdjust if save-restore libcall is used. The callee-saved
// registers will be pushed by the save-restore libcalls, so we don't have to
// split the SP adjustment in this case.
if (RVFI->getLibCallStackSize())
return 0;
- // Return the FirstSPAdjustAmount if the StackSize can not fit in signed
- // 12-bit and there exists a callee saved register need to be pushed.
+ // Return the FirstSPAdjustAmount if the StackSize can not fit in a signed
+ // 12-bit and there exists a callee-saved register needing to be pushed.
if (!isInt<12>(StackSize) && (CSI.size() > 0)) {
- // FirstSPAdjustAmount is choosed as (2048 - StackAlign)
- // because 2048 will cause sp = sp + 2048 in epilogue split into
- // multi-instructions. The offset smaller than 2048 can fit in signle
- // load/store instruction and we have to stick with the stack alignment.
- // 2048 is 16-byte alignment. The stack alignment for RV32 and RV64 is 16,
- // for RV32E is 4. So (2048 - StackAlign) will satisfy the stack alignment.
+ // FirstSPAdjustAmount is chosen as (2048 - StackAlign) because 2048 will
+ // cause sp = sp + 2048 in the epilogue to be split into multiple
+ // instructions. Offsets smaller than 2048 can fit in a single load/store
+ // instruction, and we have to stick with the stack alignment. 2048 has
+ // 16-byte alignment. The stack alignment for RV32 and RV64 is 16 and for
+ // RV32E it is 4. So (2048 - StackAlign) will satisfy the stack alignment.
return 2048 - getStackAlign().value();
}
return 0;
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
index 1e94e34acf2f..466cd059b749 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
@@ -30,6 +30,8 @@ public:
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+ uint64_t getStackSizeWithRVVPadding(const MachineFunction &MF) const;
+
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
Register &FrameReg) const override;
@@ -81,7 +83,8 @@ private:
void adjustStackForRVV(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
int64_t Amount, MachineInstr::MIFlag Flag) const;
- int64_t assignRVVStackObjectOffsets(MachineFrameInfo &MFI) const;
+ std::pair<int64_t, Align>
+ assignRVVStackObjectOffsets(MachineFrameInfo &MFI) const;
};
}
#endif
diff --git a/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp b/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
index ba91b16661a4..2410cc1f8859 100644
--- a/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
@@ -37,6 +37,11 @@ class RISCVGatherScatterLowering : public FunctionPass {
SmallVector<WeakTrackingVH> MaybeDeadPHIs;
+ // Cache of the BasePtr and Stride determined from this GEP. When a GEP is
+ // used by multiple gathers/scatters, this allow us to reuse the scalar
+ // instructions we created for the first gather/scatter for the others.
+ DenseMap<GetElementPtrInst *, std::pair<Value *, Value *>> StridedAddrs;
+
public:
static char ID; // Pass identification, replacement for typeid
@@ -323,15 +328,19 @@ std::pair<Value *, Value *>
RISCVGatherScatterLowering::determineBaseAndStride(GetElementPtrInst *GEP,
IRBuilder<> &Builder) {
+ auto I = StridedAddrs.find(GEP);
+ if (I != StridedAddrs.end())
+ return I->second;
+
SmallVector<Value *, 2> Ops(GEP->operands());
// Base pointer needs to be a scalar.
if (Ops[0]->getType()->isVectorTy())
return std::make_pair(nullptr, nullptr);
- // Make sure we're in a loop and it is in loop simplify form.
+ // Make sure we're in a loop and that has a pre-header and a single latch.
Loop *L = LI->getLoopFor(GEP->getParent());
- if (!L || !L->isLoopSimplifyForm())
+ if (!L || !L->getLoopPreheader() || !L->getLoopLatch())
return std::make_pair(nullptr, nullptr);
Optional<unsigned> VecOperand;
@@ -387,13 +396,6 @@ RISCVGatherScatterLowering::determineBaseAndStride(GetElementPtrInst *GEP,
Value *BasePtr =
Builder.CreateGEP(SourceTy, Ops[0], makeArrayRef(Ops).drop_front());
- // Cast the GEP to an i8*.
- LLVMContext &Ctx = GEP->getContext();
- Type *I8PtrTy =
- Type::getInt8PtrTy(Ctx, GEP->getType()->getPointerAddressSpace());
- if (BasePtr->getType() != I8PtrTy)
- BasePtr = Builder.CreatePointerCast(BasePtr, I8PtrTy);
-
// Final adjustments to stride should go in the start block.
Builder.SetInsertPoint(
BasePhi->getIncomingBlock(1 - IncrementingBlock)->getTerminator());
@@ -406,7 +408,9 @@ RISCVGatherScatterLowering::determineBaseAndStride(GetElementPtrInst *GEP,
if (TypeScale != 1)
Stride = Builder.CreateMul(Stride, ConstantInt::get(IntPtrTy, TypeScale));
- return std::make_pair(BasePtr, Stride);
+ auto P = std::make_pair(BasePtr, Stride);
+ StridedAddrs[GEP] = P;
+ return P;
}
bool RISCVGatherScatterLowering::tryCreateStridedLoadStore(IntrinsicInst *II,
@@ -468,6 +472,8 @@ bool RISCVGatherScatterLowering::runOnFunction(Function &F) {
DL = &F.getParent()->getDataLayout();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ StridedAddrs.clear();
+
SmallVector<IntrinsicInst *, 4> Gathers;
SmallVector<IntrinsicInst *, 4> Scatters;
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 6f77428ae721..cfaafc7b53d2 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -37,6 +37,7 @@ namespace RISCV {
#define GET_RISCVVSETable_IMPL
#define GET_RISCVVLXTable_IMPL
#define GET_RISCVVSXTable_IMPL
+#define GET_RISCVMaskedPseudosTable_IMPL
#include "RISCVGenSearchableTables.inc"
} // namespace RISCV
} // namespace llvm
@@ -47,17 +48,36 @@ void RISCVDAGToDAGISel::PreprocessISelDAG() {
I != E;) {
SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
+ // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
+ // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
+ if (N->getOpcode() == ISD::SPLAT_VECTOR) {
+ MVT VT = N->getSimpleValueType(0);
+ unsigned Opc =
+ VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
+ SDLoc DL(N);
+ SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
+ SDValue Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT),
+ N->getOperand(0), VL);
+
+ --I;
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
+ ++I;
+ CurDAG->DeleteNode(N);
+ continue;
+ }
+
// Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
// load. Done after lowering and combining so that we have a chance to
// optimize this to VMV_V_X_VL when the upper bits aren't needed.
if (N->getOpcode() != RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL)
continue;
- assert(N->getNumOperands() == 3 && "Unexpected number of operands");
+ assert(N->getNumOperands() == 4 && "Unexpected number of operands");
MVT VT = N->getSimpleValueType(0);
- SDValue Lo = N->getOperand(0);
- SDValue Hi = N->getOperand(1);
- SDValue VL = N->getOperand(2);
+ SDValue Passthru = N->getOperand(0);
+ SDValue Lo = N->getOperand(1);
+ SDValue Hi = N->getOperand(2);
+ SDValue VL = N->getOperand(3);
assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
"Unexpected VTs!");
@@ -88,7 +108,7 @@ void RISCVDAGToDAGISel::PreprocessISelDAG() {
CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
SDValue Ops[] = {Chain,
IntID,
- CurDAG->getUNDEF(VT),
+ Passthru,
StackSlot,
CurDAG->getRegister(RISCV::X0, MVT::i64),
VL};
@@ -112,6 +132,7 @@ void RISCVDAGToDAGISel::PreprocessISelDAG() {
}
void RISCVDAGToDAGISel::PostprocessISelDAG() {
+ HandleSDNode Dummy(CurDAG->getRoot());
SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
bool MadeChange = false;
@@ -123,57 +144,70 @@ void RISCVDAGToDAGISel::PostprocessISelDAG() {
MadeChange |= doPeepholeSExtW(N);
MadeChange |= doPeepholeLoadStoreADDI(N);
+ MadeChange |= doPeepholeMaskedRVV(N);
}
+ CurDAG->setRoot(Dummy.getValue());
+
if (MadeChange)
CurDAG->RemoveDeadNodes();
}
-static SDNode *selectImmWithConstantPool(SelectionDAG *CurDAG, const SDLoc &DL,
- const MVT VT, int64_t Imm,
- const RISCVSubtarget &Subtarget) {
- assert(VT == MVT::i64 && "Expecting MVT::i64");
- const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
- ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(CurDAG->getConstantPool(
- ConstantInt::get(EVT(VT).getTypeForEVT(*CurDAG->getContext()), Imm), VT));
- SDValue Addr = TLI->getAddr(CP, *CurDAG);
- SDValue Offset = CurDAG->getTargetConstant(0, DL, VT);
- // Since there is no data race, the chain can be the entry node.
- SDNode *Load = CurDAG->getMachineNode(RISCV::LD, DL, VT, Addr, Offset,
- CurDAG->getEntryNode());
- MachineFunction &MF = CurDAG->getMachineFunction();
- MachineMemOperand *MemOp = MF.getMachineMemOperand(
- MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad,
- LLT(VT), CP->getAlign());
- CurDAG->setNodeMemRefs(cast<MachineSDNode>(Load), {MemOp});
- return Load;
-}
-
-static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
- int64_t Imm, const RISCVSubtarget &Subtarget) {
- MVT XLenVT = Subtarget.getXLenVT();
- RISCVMatInt::InstSeq Seq =
- RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits());
+// Returns true if N is a MachineSDNode that has a reg and simm12 memory
+// operand. The indices of the base pointer and offset are returned in BaseOpIdx
+// and OffsetOpIdx.
+static bool hasMemOffset(SDNode *N, unsigned &BaseOpIdx,
+ unsigned &OffsetOpIdx) {
+ switch (N->getMachineOpcode()) {
+ case RISCV::LB:
+ case RISCV::LH:
+ case RISCV::LW:
+ case RISCV::LBU:
+ case RISCV::LHU:
+ case RISCV::LWU:
+ case RISCV::LD:
+ case RISCV::FLH:
+ case RISCV::FLW:
+ case RISCV::FLD:
+ BaseOpIdx = 0;
+ OffsetOpIdx = 1;
+ return true;
+ case RISCV::SB:
+ case RISCV::SH:
+ case RISCV::SW:
+ case RISCV::SD:
+ case RISCV::FSH:
+ case RISCV::FSW:
+ case RISCV::FSD:
+ BaseOpIdx = 1;
+ OffsetOpIdx = 2;
+ return true;
+ }
- // If Imm is expensive to build, then we put it into constant pool.
- if (Subtarget.useConstantPoolForLargeInts() &&
- Seq.size() > Subtarget.getMaxBuildIntsCost())
- return selectImmWithConstantPool(CurDAG, DL, VT, Imm, Subtarget);
+ return false;
+}
+static SDNode *selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
+ RISCVMatInt::InstSeq &Seq) {
SDNode *Result = nullptr;
- SDValue SrcReg = CurDAG->getRegister(RISCV::X0, XLenVT);
+ SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);
for (RISCVMatInt::Inst &Inst : Seq) {
- SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, XLenVT);
- if (Inst.Opc == RISCV::LUI)
- Result = CurDAG->getMachineNode(RISCV::LUI, DL, XLenVT, SDImm);
- else if (Inst.Opc == RISCV::ADD_UW)
- Result = CurDAG->getMachineNode(RISCV::ADD_UW, DL, XLenVT, SrcReg,
- CurDAG->getRegister(RISCV::X0, XLenVT));
- else if (Inst.Opc == RISCV::SH1ADD || Inst.Opc == RISCV::SH2ADD ||
- Inst.Opc == RISCV::SH3ADD)
- Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SrcReg);
- else
- Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SDImm);
+ SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, VT);
+ switch (Inst.getOpndKind()) {
+ case RISCVMatInt::Imm:
+ Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SDImm);
+ break;
+ case RISCVMatInt::RegX0:
+ Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SrcReg,
+ CurDAG->getRegister(RISCV::X0, VT));
+ break;
+ case RISCVMatInt::RegReg:
+ Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SrcReg, SrcReg);
+ break;
+ case RISCVMatInt::RegImm:
+ Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SrcReg, SDImm);
+ break;
+ }
// Only the first instruction has X0 as its source.
SrcReg = SDValue(Result, 0);
@@ -182,51 +216,28 @@ static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
return Result;
}
-static SDValue createTupleImpl(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
- unsigned RegClassID, unsigned SubReg0) {
- assert(Regs.size() >= 2 && Regs.size() <= 8);
-
- SDLoc DL(Regs[0]);
- SmallVector<SDValue, 8> Ops;
-
- Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32));
+static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
+ int64_t Imm, const RISCVSubtarget &Subtarget) {
+ RISCVMatInt::InstSeq Seq =
+ RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits());
- for (unsigned I = 0; I < Regs.size(); ++I) {
- Ops.push_back(Regs[I]);
- Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32));
- }
- SDNode *N =
- CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
- return SDValue(N, 0);
+ return selectImmSeq(CurDAG, DL, VT, Seq);
}
-static SDValue createM1Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
- unsigned NF) {
- static const unsigned RegClassIDs[] = {
+static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
+ unsigned NF, RISCVII::VLMUL LMUL) {
+ static const unsigned M1TupleRegClassIDs[] = {
RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID,
RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID,
RISCV::VRN8M1RegClassID};
+ static const unsigned M2TupleRegClassIDs[] = {RISCV::VRN2M2RegClassID,
+ RISCV::VRN3M2RegClassID,
+ RISCV::VRN4M2RegClassID};
- return createTupleImpl(CurDAG, Regs, RegClassIDs[NF - 2], RISCV::sub_vrm1_0);
-}
-
-static SDValue createM2Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
- unsigned NF) {
- static const unsigned RegClassIDs[] = {RISCV::VRN2M2RegClassID,
- RISCV::VRN3M2RegClassID,
- RISCV::VRN4M2RegClassID};
-
- return createTupleImpl(CurDAG, Regs, RegClassIDs[NF - 2], RISCV::sub_vrm2_0);
-}
-
-static SDValue createM4Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
- unsigned NF) {
- return createTupleImpl(CurDAG, Regs, RISCV::VRN2M4RegClassID,
- RISCV::sub_vrm4_0);
-}
+ assert(Regs.size() >= 2 && Regs.size() <= 8);
-static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
- unsigned NF, RISCVII::VLMUL LMUL) {
+ unsigned RegClassID;
+ unsigned SubReg0;
switch (LMUL) {
default:
llvm_unreachable("Invalid LMUL.");
@@ -234,12 +245,37 @@ static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
case RISCVII::VLMUL::LMUL_F4:
case RISCVII::VLMUL::LMUL_F2:
case RISCVII::VLMUL::LMUL_1:
- return createM1Tuple(CurDAG, Regs, NF);
+ static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
+ "Unexpected subreg numbering");
+ SubReg0 = RISCV::sub_vrm1_0;
+ RegClassID = M1TupleRegClassIDs[NF - 2];
+ break;
case RISCVII::VLMUL::LMUL_2:
- return createM2Tuple(CurDAG, Regs, NF);
+ static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
+ "Unexpected subreg numbering");
+ SubReg0 = RISCV::sub_vrm2_0;
+ RegClassID = M2TupleRegClassIDs[NF - 2];
+ break;
case RISCVII::VLMUL::LMUL_4:
- return createM4Tuple(CurDAG, Regs, NF);
+ static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
+ "Unexpected subreg numbering");
+ SubReg0 = RISCV::sub_vrm4_0;
+ RegClassID = RISCV::VRN2M4RegClassID;
+ break;
+ }
+
+ SDLoc DL(Regs[0]);
+ SmallVector<SDValue, 8> Ops;
+
+ Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32));
+
+ for (unsigned I = 0; I < Regs.size(); ++I) {
+ Ops.push_back(Regs[I]);
+ Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32));
}
+ SDNode *N =
+ CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
+ return SDValue(N, 0);
}
void RISCVDAGToDAGISel::addVectorLoadStoreOperands(
@@ -287,6 +323,10 @@ void RISCVDAGToDAGISel::addVectorLoadStoreOperands(
Operands.push_back(Glue);
}
+static bool isAllUndef(ArrayRef<SDValue> Values) {
+ return llvm::all_of(Values, [](SDValue V) { return V->isUndef(); });
+}
+
void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked,
bool IsStrided) {
SDLoc DL(Node);
@@ -297,19 +337,21 @@ void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked,
unsigned CurOp = 2;
SmallVector<SDValue, 8> Operands;
- if (IsMasked) {
- SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
- Node->op_begin() + CurOp + NF);
- SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
- Operands.push_back(MaskedOff);
- CurOp += NF;
+
+ SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
+ Node->op_begin() + CurOp + NF);
+ bool IsTU = IsMasked || !isAllUndef(Regs);
+ if (IsTU) {
+ SDValue Merge = createTuple(*CurDAG, Regs, NF, LMUL);
+ Operands.push_back(Merge);
}
+ CurOp += NF;
addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
Operands, /*IsLoad=*/true);
const RISCV::VLSEGPseudo *P =
- RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,
+ RISCV::getVLSEGPseudo(NF, IsMasked, IsTU, IsStrided, /*FF*/ false, Log2SEW,
static_cast<unsigned>(LMUL));
MachineSDNode *Load =
CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
@@ -338,25 +380,25 @@ void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) {
unsigned CurOp = 2;
SmallVector<SDValue, 7> Operands;
- if (IsMasked) {
- SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
- Node->op_begin() + CurOp + NF);
+
+ SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
+ Node->op_begin() + CurOp + NF);
+ bool IsTU = IsMasked || !isAllUndef(Regs);
+ if (IsTU) {
SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
Operands.push_back(MaskedOff);
- CurOp += NF;
}
+ CurOp += NF;
addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
/*IsStridedOrIndexed*/ false, Operands,
/*IsLoad=*/true);
const RISCV::VLSEGPseudo *P =
- RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,
+ RISCV::getVLSEGPseudo(NF, IsMasked, IsTU, /*Strided*/ false, /*FF*/ true,
Log2SEW, static_cast<unsigned>(LMUL));
MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
- MVT::Other, MVT::Glue, Operands);
- SDNode *ReadVL = CurDAG->getMachineNode(RISCV::PseudoReadVL, DL, XLenVT,
- /*Glue*/ SDValue(Load, 2));
+ XLenVT, MVT::Other, Operands);
if (auto *MemOp = dyn_cast<MemSDNode>(Node))
CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
@@ -368,8 +410,8 @@ void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) {
CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
}
- ReplaceUses(SDValue(Node, NF), SDValue(ReadVL, 0)); // VL
- ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 1)); // Chain
+ ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); // VL
+ ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 2)); // Chain
CurDAG->RemoveDeadNode(Node);
}
@@ -383,13 +425,15 @@ void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked,
unsigned CurOp = 2;
SmallVector<SDValue, 8> Operands;
- if (IsMasked) {
- SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
- Node->op_begin() + CurOp + NF);
+
+ SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
+ Node->op_begin() + CurOp + NF);
+ bool IsTU = IsMasked || !isAllUndef(Regs);
+ if (IsTU) {
SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
Operands.push_back(MaskedOff);
- CurOp += NF;
}
+ CurOp += NF;
MVT IndexVT;
addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
@@ -406,7 +450,7 @@ void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked,
"values when XLEN=32");
}
const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
- NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
+ NF, IsMasked, IsTU, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
static_cast<unsigned>(IndexLMUL));
MachineSDNode *Load =
CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
@@ -596,32 +640,125 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
int64_t Imm = ConstNode->getSExtValue();
// If the upper XLen-16 bits are not used, try to convert this to a simm12
// by sign extending bit 15.
- if (isUInt<16>(Imm) && isInt<12>(SignExtend64(Imm, 16)) &&
+ if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) &&
hasAllHUsers(Node))
- Imm = SignExtend64(Imm, 16);
+ Imm = SignExtend64<16>(Imm);
// If the upper 32-bits are not used try to convert this into a simm32 by
// sign extending bit 32.
if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
- Imm = SignExtend64(Imm, 32);
+ Imm = SignExtend64<32>(Imm);
ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget));
return;
}
- case ISD::FrameIndex: {
- SDValue Imm = CurDAG->getTargetConstant(0, DL, XLenVT);
- int FI = cast<FrameIndexSDNode>(Node)->getIndex();
- SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT);
- ReplaceNode(Node, CurDAG->getMachineNode(RISCV::ADDI, DL, VT, TFI, Imm));
+ case ISD::ADD: {
+ // Try to select ADD + immediate used as memory addresses to
+ // (ADDI (ADD X, Imm-Lo12), Lo12) if it will allow the ADDI to be removed by
+ // doPeepholeLoadStoreADDI.
+
+ // LHS should be an immediate.
+ auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
+ if (!N1C)
+ break;
+
+ int64_t Offset = N1C->getSExtValue();
+ int64_t Lo12 = SignExtend64<12>(Offset);
+
+ // Don't do this if the lower 12 bits are 0 or we could use ADDI directly.
+ if (Lo12 == 0 || isInt<12>(Offset))
+ break;
+
+ // Don't do this if we can use a pair of ADDIs.
+ if (isInt<12>(Offset / 2) && isInt<12>(Offset - Offset / 2))
+ break;
+
+ RISCVMatInt::InstSeq Seq =
+ RISCVMatInt::generateInstSeq(Offset, Subtarget->getFeatureBits());
+
+ Offset -= Lo12;
+ // Restore sign bits for RV32.
+ if (!Subtarget->is64Bit())
+ Offset = SignExtend64<32>(Offset);
+
+ // We can fold if the last operation is an ADDI or its an ADDIW that could
+ // be treated as an ADDI.
+ if (Seq.back().Opc != RISCV::ADDI &&
+ !(Seq.back().Opc == RISCV::ADDIW && isInt<32>(Offset)))
+ break;
+ assert(Seq.back().Imm == Lo12 && "Expected immediate to match Lo12");
+ // Drop the last operation.
+ Seq.pop_back();
+ assert(!Seq.empty() && "Expected more instructions in sequence");
+
+ bool AllPointerUses = true;
+ for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+
+ // Is this user a memory instruction that uses a register and immediate
+ // that has this ADD as its pointer.
+ unsigned BaseOpIdx, OffsetOpIdx;
+ if (!User->isMachineOpcode() ||
+ !hasMemOffset(User, BaseOpIdx, OffsetOpIdx) ||
+ UI.getOperandNo() != BaseOpIdx) {
+ AllPointerUses = false;
+ break;
+ }
+
+ // If the memory instruction already has an offset, make sure the combined
+ // offset is foldable.
+ int64_t MemOffs =
+ cast<ConstantSDNode>(User->getOperand(OffsetOpIdx))->getSExtValue();
+ MemOffs += Lo12;
+ if (!isInt<12>(MemOffs)) {
+ AllPointerUses = false;
+ break;
+ }
+ }
+
+ if (!AllPointerUses)
+ break;
+
+ // Emit (ADDI (ADD X, Hi), Lo)
+ SDNode *Imm = selectImmSeq(CurDAG, DL, VT, Seq);
+ SDNode *ADD = CurDAG->getMachineNode(RISCV::ADD, DL, VT,
+ Node->getOperand(0), SDValue(Imm, 0));
+ SDNode *ADDI =
+ CurDAG->getMachineNode(RISCV::ADDI, DL, VT, SDValue(ADD, 0),
+ CurDAG->getTargetConstant(Lo12, DL, VT));
+ ReplaceNode(Node, ADDI);
return;
}
+ case ISD::SHL: {
+ auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
+ if (!N1C)
+ break;
+ SDValue N0 = Node->getOperand(0);
+ if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
+ !isa<ConstantSDNode>(N0.getOperand(1)))
+ break;
+ unsigned ShAmt = N1C->getZExtValue();
+ uint64_t Mask = N0.getConstantOperandVal(1);
+
+ // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) where C2 has
+ // 32 leading zeros and C3 trailing zeros.
+ if (ShAmt <= 32 && isShiftedMask_64(Mask)) {
+ unsigned XLen = Subtarget->getXLen();
+ unsigned LeadingZeros = XLen - (64 - countLeadingZeros(Mask));
+ unsigned TrailingZeros = countTrailingZeros(Mask);
+ if (TrailingZeros > 0 && LeadingZeros == 32) {
+ SDNode *SRLIW = CurDAG->getMachineNode(
+ RISCV::SRLIW, DL, VT, N0->getOperand(0),
+ CurDAG->getTargetConstant(TrailingZeros, DL, VT));
+ SDNode *SLLI = CurDAG->getMachineNode(
+ RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
+ CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));
+ ReplaceNode(Node, SLLI);
+ return;
+ }
+ }
+ break;
+ }
case ISD::SRL: {
- // Optimize (srl (and X, C2), C) ->
- // (srli (slli X, (XLen-C3), (XLen-C3) + C)
- // Where C2 is a mask with C3 trailing ones.
- // Taking into account that the C2 may have had lower bits unset by
- // SimplifyDemandedBits. This avoids materializing the C2 immediate.
- // This pattern occurs when type legalizing right shifts for types with
- // less than XLen bits.
auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
if (!N1C)
break;
@@ -631,6 +768,32 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
break;
unsigned ShAmt = N1C->getZExtValue();
uint64_t Mask = N0.getConstantOperandVal(1);
+
+ // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
+ // 32 leading zeros and C3 trailing zeros.
+ if (isShiftedMask_64(Mask)) {
+ unsigned XLen = Subtarget->getXLen();
+ unsigned LeadingZeros = XLen - (64 - countLeadingZeros(Mask));
+ unsigned TrailingZeros = countTrailingZeros(Mask);
+ if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
+ SDNode *SRLIW = CurDAG->getMachineNode(
+ RISCV::SRLIW, DL, VT, N0->getOperand(0),
+ CurDAG->getTargetConstant(TrailingZeros, DL, VT));
+ SDNode *SLLI = CurDAG->getMachineNode(
+ RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
+ CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT));
+ ReplaceNode(Node, SLLI);
+ return;
+ }
+ }
+
+ // Optimize (srl (and X, C2), C) ->
+ // (srli (slli X, (XLen-C3), (XLen-C3) + C)
+ // Where C2 is a mask with C3 trailing ones.
+ // Taking into account that the C2 may have had lower bits unset by
+ // SimplifyDemandedBits. This avoids materializing the C2 immediate.
+ // This pattern occurs when type legalizing right shifts for types with
+ // less than XLen bits.
Mask |= maskTrailingOnes<uint64_t>(ShAmt);
if (!isMask_64(Mask))
break;
@@ -700,13 +863,12 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
uint64_t C1 = N1C->getZExtValue();
- // Keep track of whether this is a andi, zext.h, or zext.w.
- bool ZExtOrANDI = isInt<12>(N1C->getSExtValue());
- if (C1 == UINT64_C(0xFFFF) &&
- (Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp()))
- ZExtOrANDI = true;
- if (C1 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba())
- ZExtOrANDI = true;
+ // Keep track of whether this is a c.andi. If we can't use c.andi, the
+ // shift pair might offer more compression opportunities.
+ // TODO: We could check for C extension here, but we don't have many lit
+ // tests with the C extension enabled so not checking gets better coverage.
+ // TODO: What if ANDI faster than shift?
+ bool IsCANDI = isInt<6>(N1C->getSExtValue());
// Clear irrelevant bits in the mask.
if (LeftShift)
@@ -727,9 +889,8 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
if (C2 < C3) {
// If the number of leading zeros is C2+32 this can be SRLIW.
if (C2 + 32 == C3) {
- SDNode *SRLIW =
- CurDAG->getMachineNode(RISCV::SRLIW, DL, XLenVT, X,
- CurDAG->getTargetConstant(C2, DL, XLenVT));
+ SDNode *SRLIW = CurDAG->getMachineNode(
+ RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
ReplaceNode(Node, SRLIW);
return;
}
@@ -739,27 +900,33 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
//
// This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
// legalized and goes through DAG combine.
- SDValue Y;
if (C2 >= 32 && (C3 - C2) == 1 && N0.hasOneUse() &&
- selectSExti32(X, Y)) {
+ X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
SDNode *SRAIW =
- CurDAG->getMachineNode(RISCV::SRAIW, DL, XLenVT, Y,
- CurDAG->getTargetConstant(31, DL, XLenVT));
+ CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0),
+ CurDAG->getTargetConstant(31, DL, VT));
SDNode *SRLIW = CurDAG->getMachineNode(
- RISCV::SRLIW, DL, XLenVT, SDValue(SRAIW, 0),
- CurDAG->getTargetConstant(C3 - 32, DL, XLenVT));
+ RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),
+ CurDAG->getTargetConstant(C3 - 32, DL, VT));
ReplaceNode(Node, SRLIW);
return;
}
// (srli (slli x, c3-c2), c3).
- if (OneUseOrZExtW && !ZExtOrANDI) {
+ // Skip if we could use (zext.w (sraiw X, C2)).
+ bool Skip = Subtarget->hasStdExtZba() && C3 == 32 &&
+ X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
+ // Also Skip if we can use bexti.
+ Skip |= Subtarget->hasStdExtZbs() && C3 == XLen - 1;
+ if (OneUseOrZExtW && !Skip) {
SDNode *SLLI = CurDAG->getMachineNode(
- RISCV::SLLI, DL, XLenVT, X,
- CurDAG->getTargetConstant(C3 - C2, DL, XLenVT));
+ RISCV::SLLI, DL, VT, X,
+ CurDAG->getTargetConstant(C3 - C2, DL, VT));
SDNode *SRLI =
- CurDAG->getMachineNode(RISCV::SRLI, DL, XLenVT, SDValue(SLLI, 0),
- CurDAG->getTargetConstant(C3, DL, XLenVT));
+ CurDAG->getMachineNode(RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
+ CurDAG->getTargetConstant(C3, DL, VT));
ReplaceNode(Node, SRLI);
return;
}
@@ -775,21 +942,20 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + C3)) << C2)) {
// Use slli.uw when possible.
if ((XLen - (C2 + C3)) == 32 && Subtarget->hasStdExtZba()) {
- SDNode *SLLI_UW =
- CurDAG->getMachineNode(RISCV::SLLI_UW, DL, XLenVT, X,
- CurDAG->getTargetConstant(C2, DL, XLenVT));
+ SDNode *SLLI_UW = CurDAG->getMachineNode(
+ RISCV::SLLI_UW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
ReplaceNode(Node, SLLI_UW);
return;
}
// (srli (slli c2+c3), c3)
- if (OneUseOrZExtW && !ZExtOrANDI) {
+ if (OneUseOrZExtW && !IsCANDI) {
SDNode *SLLI = CurDAG->getMachineNode(
- RISCV::SLLI, DL, XLenVT, X,
- CurDAG->getTargetConstant(C2 + C3, DL, XLenVT));
+ RISCV::SLLI, DL, VT, X,
+ CurDAG->getTargetConstant(C2 + C3, DL, VT));
SDNode *SRLI =
- CurDAG->getMachineNode(RISCV::SRLI, DL, XLenVT, SDValue(SLLI, 0),
- CurDAG->getTargetConstant(C3, DL, XLenVT));
+ CurDAG->getMachineNode(RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
+ CurDAG->getTargetConstant(C3, DL, VT));
ReplaceNode(Node, SRLI);
return;
}
@@ -801,25 +967,31 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
if (!LeftShift && isShiftedMask_64(C1)) {
uint64_t Leading = XLen - (64 - countLeadingZeros(C1));
uint64_t C3 = countTrailingZeros(C1);
- if (Leading == C2 && C2 + C3 < XLen && OneUseOrZExtW && !ZExtOrANDI) {
+ if (Leading == C2 && C2 + C3 < XLen && OneUseOrZExtW && !IsCANDI) {
+ unsigned SrliOpc = RISCV::SRLI;
+ // If the input is zexti32 we should use SRLIW.
+ if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
+ X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
+ SrliOpc = RISCV::SRLIW;
+ X = X.getOperand(0);
+ }
SDNode *SRLI = CurDAG->getMachineNode(
- RISCV::SRLI, DL, XLenVT, X,
- CurDAG->getTargetConstant(C2 + C3, DL, XLenVT));
+ SrliOpc, DL, VT, X, CurDAG->getTargetConstant(C2 + C3, DL, VT));
SDNode *SLLI =
- CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLI, 0),
- CurDAG->getTargetConstant(C3, DL, XLenVT));
+ CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
+ CurDAG->getTargetConstant(C3, DL, VT));
ReplaceNode(Node, SLLI);
return;
}
// If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
if (Leading > 32 && (Leading - 32) == C2 && C2 + C3 < 32 &&
- OneUseOrZExtW && !ZExtOrANDI) {
- SDNode *SRLIW = CurDAG->getMachineNode(
- RISCV::SRLIW, DL, XLenVT, X,
- CurDAG->getTargetConstant(C2 + C3, DL, XLenVT));
+ OneUseOrZExtW && !IsCANDI) {
+ SDNode *SRLIW =
+ CurDAG->getMachineNode(RISCV::SRLIW, DL, VT, X,
+ CurDAG->getTargetConstant(C2 + C3, DL, VT));
SDNode *SLLI =
- CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLIW, 0),
- CurDAG->getTargetConstant(C3, DL, XLenVT));
+ CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
+ CurDAG->getTargetConstant(C3, DL, VT));
ReplaceNode(Node, SLLI);
return;
}
@@ -830,24 +1002,23 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
if (LeftShift && isShiftedMask_64(C1)) {
uint64_t Leading = XLen - (64 - countLeadingZeros(C1));
uint64_t C3 = countTrailingZeros(C1);
- if (Leading == 0 && C2 < C3 && OneUseOrZExtW && !ZExtOrANDI) {
+ if (Leading == 0 && C2 < C3 && OneUseOrZExtW && !IsCANDI) {
SDNode *SRLI = CurDAG->getMachineNode(
- RISCV::SRLI, DL, XLenVT, X,
- CurDAG->getTargetConstant(C3 - C2, DL, XLenVT));
+ RISCV::SRLI, DL, VT, X, CurDAG->getTargetConstant(C3 - C2, DL, VT));
SDNode *SLLI =
- CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLI, 0),
- CurDAG->getTargetConstant(C3, DL, XLenVT));
+ CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
+ CurDAG->getTargetConstant(C3, DL, VT));
ReplaceNode(Node, SLLI);
return;
}
// If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
- if (C2 < C3 && Leading + C2 == 32 && OneUseOrZExtW && !ZExtOrANDI) {
- SDNode *SRLIW = CurDAG->getMachineNode(
- RISCV::SRLIW, DL, XLenVT, X,
- CurDAG->getTargetConstant(C3 - C2, DL, XLenVT));
+ if (C2 < C3 && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
+ SDNode *SRLIW =
+ CurDAG->getMachineNode(RISCV::SRLIW, DL, VT, X,
+ CurDAG->getTargetConstant(C3 - C2, DL, VT));
SDNode *SLLI =
- CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLIW, 0),
- CurDAG->getTargetConstant(C3, DL, XLenVT));
+ CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
+ CurDAG->getTargetConstant(C3, DL, VT));
ReplaceNode(Node, SLLI);
return;
}
@@ -908,7 +1079,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
uint64_t ShiftedC1 = C1 << ConstantShift;
// If this RV32, we need to sign extend the constant.
if (XLen == 32)
- ShiftedC1 = SignExtend64(ShiftedC1, 32);
+ ShiftedC1 = SignExtend64<32>(ShiftedC1);
// Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget);
@@ -1005,45 +1176,44 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
}
MVT Src1VT = Src1.getSimpleValueType();
unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
- VMSetOpcode, VMANDOpcode;
+ VMOROpcode;
switch (RISCVTargetLowering::getLMUL(Src1VT)) {
default:
llvm_unreachable("Unexpected LMUL!");
-#define CASE_VMSLT_VMSET_OPCODES(lmulenum, suffix, suffix_b) \
+#define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b) \
case RISCVII::VLMUL::lmulenum: \
VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
: RISCV::PseudoVMSLT_VX_##suffix; \
VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \
: RISCV::PseudoVMSLT_VX_##suffix##_MASK; \
- VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \
break;
- CASE_VMSLT_VMSET_OPCODES(LMUL_F8, MF8, B1)
- CASE_VMSLT_VMSET_OPCODES(LMUL_F4, MF4, B2)
- CASE_VMSLT_VMSET_OPCODES(LMUL_F2, MF2, B4)
- CASE_VMSLT_VMSET_OPCODES(LMUL_1, M1, B8)
- CASE_VMSLT_VMSET_OPCODES(LMUL_2, M2, B16)
- CASE_VMSLT_VMSET_OPCODES(LMUL_4, M4, B32)
- CASE_VMSLT_VMSET_OPCODES(LMUL_8, M8, B64)
-#undef CASE_VMSLT_VMSET_OPCODES
+ CASE_VMSLT_OPCODES(LMUL_F8, MF8, B1)
+ CASE_VMSLT_OPCODES(LMUL_F4, MF4, B2)
+ CASE_VMSLT_OPCODES(LMUL_F2, MF2, B4)
+ CASE_VMSLT_OPCODES(LMUL_1, M1, B8)
+ CASE_VMSLT_OPCODES(LMUL_2, M2, B16)
+ CASE_VMSLT_OPCODES(LMUL_4, M4, B32)
+ CASE_VMSLT_OPCODES(LMUL_8, M8, B64)
+#undef CASE_VMSLT_OPCODES
}
// Mask operations use the LMUL from the mask type.
switch (RISCVTargetLowering::getLMUL(VT)) {
default:
llvm_unreachable("Unexpected LMUL!");
-#define CASE_VMXOR_VMANDN_VMAND_OPCODES(lmulenum, suffix) \
+#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \
case RISCVII::VLMUL::lmulenum: \
VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \
VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \
- VMANDOpcode = RISCV::PseudoVMAND_MM_##suffix; \
+ VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \
break;
- CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_F8, MF8)
- CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_F4, MF4)
- CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_F2, MF2)
- CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_1, M1)
- CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_2, M2)
- CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_4, M4)
- CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_8, M8)
-#undef CASE_VMXOR_VMANDN_VMAND_OPCODES
+ CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, MF8)
+ CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, MF4)
+ CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, MF2)
+ CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_1, M1)
+ CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_2, M2)
+ CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_4, M4)
+ CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_8, M8)
+#undef CASE_VMXOR_VMANDN_VMOR_OPCODES
}
SDValue SEW = CurDAG->getTargetConstant(
Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
@@ -1053,12 +1223,17 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
SDValue MaskedOff = Node->getOperand(1);
SDValue Mask = Node->getOperand(4);
- // If vmsgeu_mask with 0 immediate, expand it to {vmset, vmand}.
+ // If vmsgeu_mask with 0 immediate, expand it to vmor mask, maskedoff.
if (IsCmpUnsignedZero) {
- SDValue VMSet =
- SDValue(CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW), 0);
- ReplaceNode(Node, CurDAG->getMachineNode(VMANDOpcode, DL, VT,
- {Mask, VMSet, VL, MaskSEW}));
+ // We don't need vmor if the MaskedOff and the Mask are the same
+ // value.
+ if (Mask == MaskedOff) {
+ ReplaceUses(Node, Mask.getNode());
+ return;
+ }
+ ReplaceNode(Node,
+ CurDAG->getMachineNode(VMOROpcode, DL, VT,
+ {Mask, MaskedOff, VL, MaskSEW}));
return;
}
@@ -1082,10 +1257,14 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
// Otherwise use
// vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
+ // The result is mask undisturbed.
+ // We use the same instructions to emulate mask agnostic behavior, because
+ // the agnostic result can be either undisturbed or all 1.
SDValue Cmp = SDValue(
CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
{MaskedOff, Src1, Src2, V0, VL, SEW, Glue}),
0);
+ // vmxor.mm vd, vd, v0 is used to update active value.
ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
{Cmp, Mask, VL, MaskSEW}));
return;
@@ -1215,7 +1394,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
unsigned CurOp = 2;
// Masked intrinsic only have TU version pseduo instructions.
- bool IsTU = IsMasked || (!IsMasked && !Node->getOperand(CurOp).isUndef());
+ bool IsTU = IsMasked || !Node->getOperand(CurOp).isUndef();
SmallVector<SDValue, 8> Operands;
if (IsTU)
Operands.push_back(Node->getOperand(CurOp++));
@@ -1267,9 +1446,8 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
// The riscv_vlm intrinsic are always tail agnostic and no passthru operand.
bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
// Masked intrinsic only have TU version pseduo instructions.
- bool IsTU =
- HasPassthruOperand &&
- ((!IsMasked && !Node->getOperand(CurOp).isUndef()) || IsMasked);
+ bool IsTU = HasPassthruOperand &&
+ (IsMasked || !Node->getOperand(CurOp).isUndef());
SmallVector<SDValue, 8> Operands;
if (IsTU)
Operands.push_back(Node->getOperand(CurOp++));
@@ -1302,7 +1480,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
unsigned CurOp = 2;
// Masked intrinsic only have TU version pseduo instructions.
- bool IsTU = IsMasked || (!IsMasked && !Node->getOperand(CurOp).isUndef());
+ bool IsTU = IsMasked || !Node->getOperand(CurOp).isUndef();
SmallVector<SDValue, 7> Operands;
if (IsTU)
Operands.push_back(Node->getOperand(CurOp++));
@@ -1318,19 +1496,12 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
const RISCV::VLEPseudo *P =
RISCV::getVLEPseudo(IsMasked, IsTU, /*Strided*/ false, /*FF*/ true,
Log2SEW, static_cast<unsigned>(LMUL));
- MachineSDNode *Load =
- CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0),
- MVT::Other, MVT::Glue, Operands);
- SDNode *ReadVL = CurDAG->getMachineNode(RISCV::PseudoReadVL, DL, XLenVT,
- /*Glue*/ SDValue(Load, 2));
-
+ MachineSDNode *Load = CurDAG->getMachineNode(
+ P->Pseudo, DL, Node->getVTList(), Operands);
if (auto *MemOp = dyn_cast<MemSDNode>(Node))
CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
- ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
- ReplaceUses(SDValue(Node, 1), SDValue(ReadVL, 0)); // VL
- ReplaceUses(SDValue(Node, 2), SDValue(Load, 1)); // Chain
- CurDAG->RemoveDeadNode(Node);
+ ReplaceNode(Node, Load);
return;
}
}
@@ -1610,9 +1781,10 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
// Try to match splat of a scalar load to a strided load with stride of x0.
bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
- if (IsScalarMove && !Node->getOperand(0).isUndef())
+ bool HasPassthruOperand = Node->getOpcode() != ISD::SPLAT_VECTOR;
+ if (HasPassthruOperand && !Node->getOperand(0).isUndef())
break;
- SDValue Src = IsScalarMove ? Node->getOperand(1) : Node->getOperand(0);
+ SDValue Src = HasPassthruOperand ? Node->getOperand(1) : Node->getOperand(0);
auto *Ld = dyn_cast<LoadSDNode>(Src);
if (!Ld)
break;
@@ -1634,7 +1806,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
break;
selectVLOp(Node->getOperand(2), VL);
} else
- selectVLOp(Node->getOperand(1), VL);
+ selectVLOp(Node->getOperand(2), VL);
unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
@@ -1650,8 +1822,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
MachineSDNode *Load =
CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
- if (auto *MemOp = dyn_cast<MemSDNode>(Node))
- CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
+ CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});
ReplaceNode(Node, Load);
return;
@@ -1680,11 +1851,37 @@ bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand(
return true;
}
-bool RISCVDAGToDAGISel::SelectAddrFI(SDValue Addr, SDValue &Base) {
+bool RISCVDAGToDAGISel::SelectAddrFrameIndex(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
+ Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT());
return true;
}
+
+ return false;
+}
+
+// Select a frame index and an optional immediate offset from an ADD or OR.
+bool RISCVDAGToDAGISel::SelectFrameAddrRegImm(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (SelectAddrFrameIndex(Addr, Base, Offset))
+ return true;
+
+ if (!CurDAG->isBaseWithConstantOffset(Addr))
+ return false;
+
+ if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
+ int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
+ if (isInt<12>(CVal)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(),
+ Subtarget->getXLenVT());
+ Offset = CurDAG->getTargetConstant(CVal, SDLoc(Addr),
+ Subtarget->getXLenVT());
+ return true;
+ }
+ }
+
return false;
}
@@ -1698,6 +1895,76 @@ bool RISCVDAGToDAGISel::SelectBaseAddr(SDValue Addr, SDValue &Base) {
return true;
}
+bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (SelectAddrFrameIndex(Addr, Base, Offset))
+ return true;
+
+ SDLoc DL(Addr);
+ MVT VT = Addr.getSimpleValueType();
+
+ if (Addr.getOpcode() == RISCVISD::ADD_LO) {
+ Base = Addr.getOperand(0);
+ Offset = Addr.getOperand(1);
+ return true;
+ }
+
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
+ if (isInt<12>(CVal)) {
+ Base = Addr.getOperand(0);
+ if (Base.getOpcode() == RISCVISD::ADD_LO) {
+ SDValue LoOperand = Base.getOperand(1);
+ if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) {
+ // If the Lo in (ADD_LO hi, lo) is a global variable's address
+ // (its low part, really), then we can rely on the alignment of that
+ // variable to provide a margin of safety before low part can overflow
+ // the 12 bits of the load/store offset. Check if CVal falls within
+ // that margin; if so (low part + CVal) can't overflow.
+ const DataLayout &DL = CurDAG->getDataLayout();
+ Align Alignment = commonAlignment(
+ GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
+ if (CVal == 0 || Alignment > CVal) {
+ int64_t CombinedOffset = CVal + GA->getOffset();
+ Base = Base.getOperand(0);
+ Offset = CurDAG->getTargetGlobalAddress(
+ GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(),
+ CombinedOffset, GA->getTargetFlags());
+ return true;
+ }
+ }
+ }
+
+ if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
+ Offset = CurDAG->getTargetConstant(CVal, DL, VT);
+ return true;
+ }
+ }
+
+ // Handle ADD with large immediates.
+ if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
+ int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
+ assert(!isInt<12>(CVal) && "simm12 not already handled?");
+
+ if (isInt<12>(CVal / 2) && isInt<12>(CVal - CVal / 2)) {
+ // We can use an ADDI for part of the offset and fold the rest into the
+ // load/store. This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
+ int64_t Adj = CVal < 0 ? -2048 : 2047;
+ Base = SDValue(
+ CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
+ CurDAG->getTargetConstant(Adj, DL, VT)),
+ 0);
+ Offset = CurDAG->getTargetConstant(CVal - Adj, DL, VT);
+ return true;
+ }
+ }
+
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, DL, VT);
+ return true;
+}
+
bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
SDValue &ShAmt) {
// Shift instructions on RISCV only read the lower 5 or 6 bits of the shift
@@ -1723,6 +1990,21 @@ bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
ShAmt = N.getOperand(0);
return true;
}
+ } else if (N.getOpcode() == ISD::SUB &&
+ isa<ConstantSDNode>(N.getOperand(0))) {
+ uint64_t Imm = N.getConstantOperandVal(0);
+ // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
+ // generate a NEG instead of a SUB of a constant.
+ if (Imm != 0 && Imm % ShiftWidth == 0) {
+ SDLoc DL(N);
+ EVT VT = N.getValueType();
+ SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);
+ unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
+ MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
+ N.getOperand(1));
+ ShAmt = SDValue(Neg, 0);
+ return true;
+ }
}
ShAmt = N;
@@ -1778,6 +2060,8 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const {
Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
Node->getOpcode() == ISD::SRL ||
Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
+ Node->getOpcode() == RISCVISD::GREV ||
+ Node->getOpcode() == RISCVISD::GORC ||
isa<ConstantSDNode>(Node)) &&
"Unexpected opcode");
@@ -1812,6 +2096,7 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const {
case RISCV::CTZW:
case RISCV::CPOPW:
case RISCV::SLLI_UW:
+ case RISCV::FMV_W_X:
case RISCV::FCVT_H_W:
case RISCV::FCVT_H_WU:
case RISCV::FCVT_S_W:
@@ -1835,6 +2120,7 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const {
return false;
break;
case RISCV::SEXT_H:
+ case RISCV::FMV_H_X:
case RISCV::ZEXT_H_RV32:
case RISCV::ZEXT_H_RV64:
if (Bits < 16)
@@ -1871,22 +2157,32 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const {
// allows us to choose betwen VSETIVLI or VSETVLI later.
bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) {
auto *C = dyn_cast<ConstantSDNode>(N);
- if (C && (isUInt<5>(C->getZExtValue()) ||
- C->getSExtValue() == RISCV::VLMaxSentinel))
+ if (C && isUInt<5>(C->getZExtValue())) {
VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
N->getValueType(0));
- else
+ } else if (C && C->isAllOnesValue()) {
+ // Treat all ones as VLMax.
+ VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
+ N->getValueType(0));
+ } else if (isa<RegisterSDNode>(N) &&
+ cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {
+ // All our VL operands use an operand that allows GPRNoX0 or an immediate
+ // as the register class. Convert X0 to a special immediate to pass the
+ // MachineVerifier. This is recognized specially by the vsetvli insertion
+ // pass.
+ VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
+ N->getValueType(0));
+ } else {
VL = N;
+ }
return true;
}
bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) {
- if (N.getOpcode() != ISD::SPLAT_VECTOR &&
- N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64 &&
- N.getOpcode() != RISCVISD::VMV_V_X_VL)
+ if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef())
return false;
- SplatVal = N.getOperand(0);
+ SplatVal = N.getOperand(1);
return true;
}
@@ -1896,23 +2192,22 @@ static bool selectVSplatSimmHelper(SDValue N, SDValue &SplatVal,
SelectionDAG &DAG,
const RISCVSubtarget &Subtarget,
ValidateFn ValidateImm) {
- if ((N.getOpcode() != ISD::SPLAT_VECTOR &&
- N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64 &&
- N.getOpcode() != RISCVISD::VMV_V_X_VL) ||
- !isa<ConstantSDNode>(N.getOperand(0)))
+ if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() ||
+ !isa<ConstantSDNode>(N.getOperand(1)))
return false;
- int64_t SplatImm = cast<ConstantSDNode>(N.getOperand(0))->getSExtValue();
+ int64_t SplatImm =
+ cast<ConstantSDNode>(N.getOperand(1))->getSExtValue();
- // ISD::SPLAT_VECTOR, RISCVISD::SPLAT_VECTOR_I64 and RISCVISD::VMV_V_X_VL
- // share semantics when the operand type is wider than the resulting vector
- // element type: an implicit truncation first takes place. Therefore, perform
- // a manual truncation/sign-extension in order to ignore any truncated bits
- // and catch any zero-extended immediate.
+ // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
+ // type is wider than the resulting vector element type: an implicit
+ // truncation first takes place. Therefore, perform a manual
+ // truncation/sign-extension in order to ignore any truncated bits and catch
+ // any zero-extended immediate.
// For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
// sign-extending to (XLenVT -1).
MVT XLenVT = Subtarget.getXLenVT();
- assert(XLenVT == N.getOperand(0).getSimpleValueType() &&
+ assert(XLenVT == N.getOperand(1).getSimpleValueType() &&
"Unexpected splat operand type");
MVT EltVT = N.getSimpleValueType().getVectorElementType();
if (EltVT.bitsLT(XLenVT))
@@ -1945,13 +2240,12 @@ bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N,
}
bool RISCVDAGToDAGISel::selectVSplatUimm5(SDValue N, SDValue &SplatVal) {
- if ((N.getOpcode() != ISD::SPLAT_VECTOR &&
- N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64 &&
- N.getOpcode() != RISCVISD::VMV_V_X_VL) ||
- !isa<ConstantSDNode>(N.getOperand(0)))
+ if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() ||
+ !isa<ConstantSDNode>(N.getOperand(1)))
return false;
- int64_t SplatImm = cast<ConstantSDNode>(N.getOperand(0))->getSExtValue();
+ int64_t SplatImm =
+ cast<ConstantSDNode>(N.getOperand(1))->getSExtValue();
if (!isUInt<5>(SplatImm))
return false;
@@ -1980,49 +2274,42 @@ bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width,
// Merge an ADDI into the offset of a load/store instruction where possible.
// (load (addi base, off1), off2) -> (load base, off1+off2)
// (store val, (addi base, off1), off2) -> (store val, base, off1+off2)
+// (load (add base, (addi src, off1)), off2)
+// -> (load (add base, src), off1+off2)
+// (store val, (add base, (addi src, off1)), off2)
+// -> (store val, (add base, src), off1+off2)
// This is possible when off1+off2 fits a 12-bit immediate.
bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) {
- int OffsetOpIdx;
- int BaseOpIdx;
-
- // Only attempt this optimisation for I-type loads and S-type stores.
- switch (N->getMachineOpcode()) {
- default:
+ unsigned OffsetOpIdx, BaseOpIdx;
+ if (!hasMemOffset(N, BaseOpIdx, OffsetOpIdx))
return false;
- case RISCV::LB:
- case RISCV::LH:
- case RISCV::LW:
- case RISCV::LBU:
- case RISCV::LHU:
- case RISCV::LWU:
- case RISCV::LD:
- case RISCV::FLH:
- case RISCV::FLW:
- case RISCV::FLD:
- BaseOpIdx = 0;
- OffsetOpIdx = 1;
- break;
- case RISCV::SB:
- case RISCV::SH:
- case RISCV::SW:
- case RISCV::SD:
- case RISCV::FSH:
- case RISCV::FSW:
- case RISCV::FSD:
- BaseOpIdx = 1;
- OffsetOpIdx = 2;
- break;
- }
if (!isa<ConstantSDNode>(N->getOperand(OffsetOpIdx)))
return false;
SDValue Base = N->getOperand(BaseOpIdx);
- // If the base is an ADDI, we can merge it in to the load/store.
- if (!Base.isMachineOpcode() || Base.getMachineOpcode() != RISCV::ADDI)
+ if (!Base.isMachineOpcode())
return false;
+ if (Base.getMachineOpcode() == RISCV::ADDI) {
+ // If the base is an ADDI, we can merge it in to the load/store.
+ } else if (Base.getMachineOpcode() == RISCV::ADDIW &&
+ isa<ConstantSDNode>(Base.getOperand(1)) &&
+ Base.getOperand(0).isMachineOpcode() &&
+ Base.getOperand(0).getMachineOpcode() == RISCV::LUI &&
+ isa<ConstantSDNode>(Base.getOperand(0).getOperand(0))) {
+ // ADDIW can be merged if it's part of LUI+ADDIW constant materialization
+ // and LUI+ADDI would have produced the same result. This is true for all
+ // simm32 values except 0x7ffff800-0x7fffffff.
+ int64_t Offset =
+ SignExtend64<32>(Base.getOperand(0).getConstantOperandVal(0) << 12);
+ Offset += cast<ConstantSDNode>(Base.getOperand(1))->getSExtValue();
+ if (!isInt<32>(Offset))
+ return false;
+ } else
+ return false;
+
SDValue ImmOperand = Base.getOperand(1);
uint64_t Offset2 = N->getConstantOperandVal(OffsetOpIdx);
@@ -2039,7 +2326,8 @@ bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) {
// to provide a margin of safety before off1 can overflow the 12 bits.
// Check if off2 falls within that margin; if so off1+off2 can't overflow.
const DataLayout &DL = CurDAG->getDataLayout();
- Align Alignment = GA->getGlobal()->getPointerAlignment(DL);
+ Align Alignment = commonAlignment(GA->getGlobal()->getPointerAlignment(DL),
+ GA->getOffset());
if (Offset2 != 0 && Alignment <= Offset2)
return false;
int64_t Offset1 = GA->getOffset();
@@ -2049,7 +2337,7 @@ bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) {
CombinedOffset, GA->getTargetFlags());
} else if (auto *CP = dyn_cast<ConstantPoolSDNode>(ImmOperand)) {
// Ditto.
- Align Alignment = CP->getAlign();
+ Align Alignment = commonAlignment(CP->getAlign(), CP->getOffset());
if (Offset2 != 0 && Alignment <= Offset2)
return false;
int64_t Offset1 = CP->getOffset();
@@ -2068,12 +2356,13 @@ bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) {
LLVM_DEBUG(dbgs() << "\n");
// Modify the offset operand of the load/store.
- if (BaseOpIdx == 0) // Load
- CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand,
- N->getOperand(2));
- else // Store
- CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0),
- ImmOperand, N->getOperand(3));
+ if (BaseOpIdx == 0) { // Load
+ N = CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand,
+ N->getOperand(2));
+ } else { // Store
+ N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0),
+ ImmOperand, N->getOperand(3));
+ }
return true;
}
@@ -2130,6 +2419,8 @@ bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
case RISCV::SUBW:
case RISCV::MULW:
case RISCV::SLLIW:
+ case RISCV::GREVIW:
+ case RISCV::GORCIW:
// Result is already sign extended just remove the sext.w.
// NOTE: We only handle the nodes that are selected with hasAllWUsers.
ReplaceUses(N, N0.getNode());
@@ -2139,8 +2430,113 @@ bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
return false;
}
+// Optimize masked RVV pseudo instructions with a known all-ones mask to their
+// corresponding "unmasked" pseudo versions. The mask we're interested in will
+// take the form of a V0 physical register operand, with a glued
+// register-setting instruction.
+bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) {
+ const RISCV::RISCVMaskedPseudoInfo *I =
+ RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
+ if (!I)
+ return false;
+
+ unsigned MaskOpIdx = I->MaskOpIdx;
+
+ // Check that we're using V0 as a mask register.
+ if (!isa<RegisterSDNode>(N->getOperand(MaskOpIdx)) ||
+ cast<RegisterSDNode>(N->getOperand(MaskOpIdx))->getReg() != RISCV::V0)
+ return false;
+
+ // The glued user defines V0.
+ const auto *Glued = N->getGluedNode();
+
+ if (!Glued || Glued->getOpcode() != ISD::CopyToReg)
+ return false;
+
+ // Check that we're defining V0 as a mask register.
+ if (!isa<RegisterSDNode>(Glued->getOperand(1)) ||
+ cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0)
+ return false;
+
+ // Check the instruction defining V0; it needs to be a VMSET pseudo.
+ SDValue MaskSetter = Glued->getOperand(2);
+
+ const auto IsVMSet = [](unsigned Opc) {
+ return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
+ Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
+ Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
+ Opc == RISCV::PseudoVMSET_M_B8;
+ };
+
+ // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
+ // undefined behaviour if it's the wrong bitwidth, so we could choose to
+ // assume that it's all-ones? Same applies to its VL.
+ if (!MaskSetter->isMachineOpcode() || !IsVMSet(MaskSetter.getMachineOpcode()))
+ return false;
+
+ // Retrieve the tail policy operand index, if any.
+ Optional<unsigned> TailPolicyOpIdx;
+ const RISCVInstrInfo &TII = *Subtarget->getInstrInfo();
+ const MCInstrDesc &MaskedMCID = TII.get(N->getMachineOpcode());
+
+ bool IsTA = true;
+ if (RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags)) {
+ // The last operand of the pseudo is the policy op, but we might have a
+ // Glue operand last. We might also have a chain.
+ TailPolicyOpIdx = N->getNumOperands() - 1;
+ if (N->getOperand(*TailPolicyOpIdx).getValueType() == MVT::Glue)
+ (*TailPolicyOpIdx)--;
+ if (N->getOperand(*TailPolicyOpIdx).getValueType() == MVT::Other)
+ (*TailPolicyOpIdx)--;
+
+ if (!(N->getConstantOperandVal(*TailPolicyOpIdx) &
+ RISCVII::TAIL_AGNOSTIC)) {
+ // Keep the true-masked instruction when there is no unmasked TU
+ // instruction
+ if (I->UnmaskedTUPseudo == I->MaskedPseudo && !N->getOperand(0).isUndef())
+ return false;
+ // We can't use TA if the tie-operand is not IMPLICIT_DEF
+ if (!N->getOperand(0).isUndef())
+ IsTA = false;
+ }
+ }
+
+ unsigned Opc = IsTA ? I->UnmaskedPseudo : I->UnmaskedTUPseudo;
+
+ // Check that we're dropping the mask operand and any policy operand
+ // when we transform to this unmasked pseudo. Additionally, if this insturtion
+ // is tail agnostic, the unmasked instruction should not have a merge op.
+ uint64_t TSFlags = TII.get(Opc).TSFlags;
+ assert((IsTA != RISCVII::hasMergeOp(TSFlags)) &&
+ RISCVII::hasDummyMaskOp(TSFlags) &&
+ !RISCVII::hasVecPolicyOp(TSFlags) &&
+ "Unexpected pseudo to transform to");
+ (void)TSFlags;
+
+ SmallVector<SDValue, 8> Ops;
+ // Skip the merge operand at index 0 if IsTA
+ for (unsigned I = IsTA, E = N->getNumOperands(); I != E; I++) {
+ // Skip the mask, the policy, and the Glue.
+ SDValue Op = N->getOperand(I);
+ if (I == MaskOpIdx || I == TailPolicyOpIdx ||
+ Op.getValueType() == MVT::Glue)
+ continue;
+ Ops.push_back(Op);
+ }
+
+ // Transitively apply any node glued to our new node.
+ if (auto *TGlued = Glued->getGluedNode())
+ Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1));
+
+ SDNode *Result = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
+ ReplaceUses(N, Result);
+
+ return true;
+}
+
// This pass converts a legalized DAG into a RISCV-specific DAG, ready
// for instruction scheduling.
-FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM) {
- return new RISCVDAGToDAGISel(TM);
+FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new RISCVDAGToDAGISel(TM, OptLevel);
}
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index c429a9298739..b50927cfcca5 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -24,8 +24,9 @@ class RISCVDAGToDAGISel : public SelectionDAGISel {
const RISCVSubtarget *Subtarget = nullptr;
public:
- explicit RISCVDAGToDAGISel(RISCVTargetMachine &TargetMachine)
- : SelectionDAGISel(TargetMachine) {}
+ explicit RISCVDAGToDAGISel(RISCVTargetMachine &TargetMachine,
+ CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(TargetMachine, OptLevel) {}
StringRef getPassName() const override {
return "RISCV DAG->DAG Pattern Instruction Selection";
@@ -44,8 +45,10 @@ public:
bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
std::vector<SDValue> &OutOps) override;
- bool SelectAddrFI(SDValue Addr, SDValue &Base);
+ bool SelectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset);
+ bool SelectFrameAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset);
bool SelectBaseAddr(SDValue Addr, SDValue &Base);
+ bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset);
bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt);
bool selectShiftMaskXLen(SDValue N, SDValue &ShAmt) {
@@ -117,12 +120,14 @@ public:
private:
bool doPeepholeLoadStoreADDI(SDNode *Node);
bool doPeepholeSExtW(SDNode *Node);
+ bool doPeepholeMaskedRVV(SDNode *Node);
};
namespace RISCV {
struct VLSEGPseudo {
uint16_t NF : 4;
uint16_t Masked : 1;
+ uint16_t IsTU : 1;
uint16_t Strided : 1;
uint16_t FF : 1;
uint16_t Log2SEW : 3;
@@ -133,6 +138,7 @@ struct VLSEGPseudo {
struct VLXSEGPseudo {
uint16_t NF : 4;
uint16_t Masked : 1;
+ uint16_t IsTU : 1;
uint16_t Ordered : 1;
uint16_t Log2SEW : 3;
uint16_t LMUL : 3;
@@ -187,6 +193,13 @@ struct VLX_VSXPseudo {
uint16_t Pseudo;
};
+struct RISCVMaskedPseudoInfo {
+ uint16_t MaskedPseudo;
+ uint16_t UnmaskedPseudo;
+ uint16_t UnmaskedTUPseudo;
+ uint8_t MaskOpIdx;
+};
+
#define GET_RISCVVSSEGTable_DECL
#define GET_RISCVVLSEGTable_DECL
#define GET_RISCVVLXSEGTable_DECL
@@ -195,6 +208,7 @@ struct VLX_VSXPseudo {
#define GET_RISCVVSETable_DECL
#define GET_RISCVVLXTable_DECL
#define GET_RISCVVSXTable_DECL
+#define GET_RISCVMaskedPseudosTable_DECL
#include "RISCVGenSearchableTables.inc"
} // namespace RISCV
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 97d24c8e9c0b..ff645dea4e7a 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -112,17 +112,24 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (Subtarget.hasVInstructions()) {
auto addRegClassForRVV = [this](MVT VT) {
+ // Disable the smallest fractional LMUL types if ELEN is less than
+ // RVVBitsPerBlock.
+ unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELEN();
+ if (VT.getVectorMinNumElements() < MinElts)
+ return;
+
unsigned Size = VT.getSizeInBits().getKnownMinValue();
- assert(Size <= 512 && isPowerOf2_32(Size));
const TargetRegisterClass *RC;
- if (Size <= 64)
+ if (Size <= RISCV::RVVBitsPerBlock)
RC = &RISCV::VRRegClass;
- else if (Size == 128)
+ else if (Size == 2 * RISCV::RVVBitsPerBlock)
RC = &RISCV::VRM2RegClass;
- else if (Size == 256)
+ else if (Size == 4 * RISCV::RVVBitsPerBlock)
RC = &RISCV::VRM4RegClass;
- else
+ else if (Size == 8 * RISCV::RVVBitsPerBlock)
RC = &RISCV::VRM8RegClass;
+ else
+ llvm_unreachable("Unexpected size");
addRegisterClass(VT, RC);
};
@@ -170,8 +177,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setStackPointerRegisterToSaveRestore(RISCV::X2);
- for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD})
- setLoadExtAction(N, XLenVT, MVT::i1, Promote);
+ setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, XLenVT,
+ MVT::i1, Promote);
// TODO: add all necessary setOperationAction calls.
setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
@@ -181,100 +188,75 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
- setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
- setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
setOperationAction(ISD::VASTART, MVT::Other, Custom);
- setOperationAction(ISD::VAARG, MVT::Other, Expand);
- setOperationAction(ISD::VACOPY, MVT::Other, Expand);
- setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
- if (!Subtarget.hasStdExtZbb()) {
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
- }
+
+ setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
+
+ if (!Subtarget.hasStdExtZbb())
+ setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
if (Subtarget.is64Bit()) {
- setOperationAction(ISD::ADD, MVT::i32, Custom);
- setOperationAction(ISD::SUB, MVT::i32, Custom);
- setOperationAction(ISD::SHL, MVT::i32, Custom);
- setOperationAction(ISD::SRA, MVT::i32, Custom);
- setOperationAction(ISD::SRL, MVT::i32, Custom);
-
- setOperationAction(ISD::UADDO, MVT::i32, Custom);
- setOperationAction(ISD::USUBO, MVT::i32, Custom);
- setOperationAction(ISD::UADDSAT, MVT::i32, Custom);
- setOperationAction(ISD::USUBSAT, MVT::i32, Custom);
+ setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
+
+ setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL},
+ MVT::i32, Custom);
+
+ setOperationAction({ISD::UADDO, ISD::USUBO, ISD::UADDSAT, ISD::USUBSAT},
+ MVT::i32, Custom);
} else {
- setLibcallName(RTLIB::SHL_I128, nullptr);
- setLibcallName(RTLIB::SRL_I128, nullptr);
- setLibcallName(RTLIB::SRA_I128, nullptr);
- setLibcallName(RTLIB::MUL_I128, nullptr);
+ setLibcallName(
+ {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128},
+ nullptr);
setLibcallName(RTLIB::MULO_I64, nullptr);
}
if (!Subtarget.hasStdExtM()) {
- setOperationAction(ISD::MUL, XLenVT, Expand);
- setOperationAction(ISD::MULHS, XLenVT, Expand);
- setOperationAction(ISD::MULHU, XLenVT, Expand);
- setOperationAction(ISD::SDIV, XLenVT, Expand);
- setOperationAction(ISD::UDIV, XLenVT, Expand);
- setOperationAction(ISD::SREM, XLenVT, Expand);
- setOperationAction(ISD::UREM, XLenVT, Expand);
+ setOperationAction({ISD::MUL, ISD::MULHS, ISD::MULHU, ISD::SDIV, ISD::UDIV,
+ ISD::SREM, ISD::UREM},
+ XLenVT, Expand);
} else {
if (Subtarget.is64Bit()) {
- setOperationAction(ISD::MUL, MVT::i32, Custom);
- setOperationAction(ISD::MUL, MVT::i128, Custom);
-
- setOperationAction(ISD::SDIV, MVT::i8, Custom);
- setOperationAction(ISD::UDIV, MVT::i8, Custom);
- setOperationAction(ISD::UREM, MVT::i8, Custom);
- setOperationAction(ISD::SDIV, MVT::i16, Custom);
- setOperationAction(ISD::UDIV, MVT::i16, Custom);
- setOperationAction(ISD::UREM, MVT::i16, Custom);
- setOperationAction(ISD::SDIV, MVT::i32, Custom);
- setOperationAction(ISD::UDIV, MVT::i32, Custom);
- setOperationAction(ISD::UREM, MVT::i32, Custom);
+ setOperationAction(ISD::MUL, {MVT::i32, MVT::i128}, Custom);
+
+ setOperationAction({ISD::SDIV, ISD::UDIV, ISD::UREM},
+ {MVT::i8, MVT::i16, MVT::i32}, Custom);
} else {
setOperationAction(ISD::MUL, MVT::i64, Custom);
}
}
- setOperationAction(ISD::SDIVREM, XLenVT, Expand);
- setOperationAction(ISD::UDIVREM, XLenVT, Expand);
- setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
- setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand);
+ setOperationAction(
+ {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, XLenVT,
+ Expand);
- setOperationAction(ISD::SHL_PARTS, XLenVT, Custom);
- setOperationAction(ISD::SRL_PARTS, XLenVT, Custom);
- setOperationAction(ISD::SRA_PARTS, XLenVT, Custom);
+ setOperationAction({ISD::SHL_PARTS, ISD::SRL_PARTS, ISD::SRA_PARTS}, XLenVT,
+ Custom);
if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp() ||
Subtarget.hasStdExtZbkb()) {
- if (Subtarget.is64Bit()) {
- setOperationAction(ISD::ROTL, MVT::i32, Custom);
- setOperationAction(ISD::ROTR, MVT::i32, Custom);
- }
+ if (Subtarget.is64Bit())
+ setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom);
} else {
- setOperationAction(ISD::ROTL, XLenVT, Expand);
- setOperationAction(ISD::ROTR, XLenVT, Expand);
+ setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Expand);
}
if (Subtarget.hasStdExtZbp()) {
// Custom lower bswap/bitreverse so we can convert them to GREVI to enable
// more combining.
- setOperationAction(ISD::BITREVERSE, XLenVT, Custom);
- setOperationAction(ISD::BSWAP, XLenVT, Custom);
- setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);
+ setOperationAction({ISD::BITREVERSE, ISD::BSWAP}, XLenVT, Custom);
+
// BSWAP i8 doesn't exist.
- setOperationAction(ISD::BITREVERSE, MVT::i16, Custom);
- setOperationAction(ISD::BSWAP, MVT::i16, Custom);
+ setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);
- if (Subtarget.is64Bit()) {
- setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
- setOperationAction(ISD::BSWAP, MVT::i32, Custom);
- }
+ setOperationAction({ISD::BITREVERSE, ISD::BSWAP}, MVT::i16, Custom);
+
+ if (Subtarget.is64Bit())
+ setOperationAction({ISD::BITREVERSE, ISD::BSWAP}, MVT::i32, Custom);
} else {
// With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
// pattern match it directly in isel.
@@ -288,36 +270,38 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
if (Subtarget.hasStdExtZbb()) {
- setOperationAction(ISD::SMIN, XLenVT, Legal);
- setOperationAction(ISD::SMAX, XLenVT, Legal);
- setOperationAction(ISD::UMIN, XLenVT, Legal);
- setOperationAction(ISD::UMAX, XLenVT, Legal);
+ setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, XLenVT,
+ Legal);
- if (Subtarget.is64Bit()) {
- setOperationAction(ISD::CTTZ, MVT::i32, Custom);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom);
- setOperationAction(ISD::CTLZ, MVT::i32, Custom);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom);
- }
+ if (Subtarget.is64Bit())
+ setOperationAction(
+ {ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF},
+ MVT::i32, Custom);
} else {
- setOperationAction(ISD::CTTZ, XLenVT, Expand);
- setOperationAction(ISD::CTLZ, XLenVT, Expand);
- setOperationAction(ISD::CTPOP, XLenVT, Expand);
+ setOperationAction({ISD::CTTZ, ISD::CTLZ, ISD::CTPOP}, XLenVT, Expand);
+
+ if (Subtarget.is64Bit())
+ setOperationAction(ISD::ABS, MVT::i32, Custom);
}
if (Subtarget.hasStdExtZbt()) {
- setOperationAction(ISD::FSHL, XLenVT, Custom);
- setOperationAction(ISD::FSHR, XLenVT, Custom);
+ setOperationAction({ISD::FSHL, ISD::FSHR}, XLenVT, Custom);
setOperationAction(ISD::SELECT, XLenVT, Legal);
- if (Subtarget.is64Bit()) {
- setOperationAction(ISD::FSHL, MVT::i32, Custom);
- setOperationAction(ISD::FSHR, MVT::i32, Custom);
- }
+ if (Subtarget.is64Bit())
+ setOperationAction({ISD::FSHL, ISD::FSHR}, MVT::i32, Custom);
} else {
setOperationAction(ISD::SELECT, XLenVT, Custom);
}
+ static constexpr ISD::NodeType FPLegalNodeTypes[] = {
+ ISD::FMINNUM, ISD::FMAXNUM, ISD::LRINT,
+ ISD::LLRINT, ISD::LROUND, ISD::LLROUND,
+ ISD::STRICT_LRINT, ISD::STRICT_LLRINT, ISD::STRICT_LROUND,
+ ISD::STRICT_LLROUND, ISD::STRICT_FMA, ISD::STRICT_FADD,
+ ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV,
+ ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS};
+
static const ISD::CondCode FPCCToExpand[] = {
ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
@@ -331,50 +315,21 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
if (Subtarget.hasStdExtZfh()) {
- setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
- setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
- setOperationAction(ISD::LRINT, MVT::f16, Legal);
- setOperationAction(ISD::LLRINT, MVT::f16, Legal);
- setOperationAction(ISD::LROUND, MVT::f16, Legal);
- setOperationAction(ISD::LLROUND, MVT::f16, Legal);
- setOperationAction(ISD::STRICT_LRINT, MVT::f16, Legal);
- setOperationAction(ISD::STRICT_LLRINT, MVT::f16, Legal);
- setOperationAction(ISD::STRICT_LROUND, MVT::f16, Legal);
- setOperationAction(ISD::STRICT_LLROUND, MVT::f16, Legal);
- setOperationAction(ISD::STRICT_FADD, MVT::f16, Legal);
- setOperationAction(ISD::STRICT_FMA, MVT::f16, Legal);
- setOperationAction(ISD::STRICT_FSUB, MVT::f16, Legal);
- setOperationAction(ISD::STRICT_FMUL, MVT::f16, Legal);
- setOperationAction(ISD::STRICT_FDIV, MVT::f16, Legal);
+ for (auto NT : FPLegalNodeTypes)
+ setOperationAction(NT, MVT::f16, Legal);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_FSQRT, MVT::f16, Legal);
- setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Legal);
- setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Legal);
- for (auto CC : FPCCToExpand)
- setCondCodeAction(CC, MVT::f16, Expand);
+ setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
setOperationAction(ISD::SELECT, MVT::f16, Custom);
setOperationAction(ISD::BR_CC, MVT::f16, Expand);
- setOperationAction(ISD::FREM, MVT::f16, Promote);
- setOperationAction(ISD::FCEIL, MVT::f16, Promote);
- setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
- setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
- setOperationAction(ISD::FRINT, MVT::f16, Promote);
- setOperationAction(ISD::FROUND, MVT::f16, Promote);
- setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote);
- setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
- setOperationAction(ISD::FPOW, MVT::f16, Promote);
- setOperationAction(ISD::FPOWI, MVT::f16, Promote);
- setOperationAction(ISD::FCOS, MVT::f16, Promote);
- setOperationAction(ISD::FSIN, MVT::f16, Promote);
- setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
- setOperationAction(ISD::FEXP, MVT::f16, Promote);
- setOperationAction(ISD::FEXP2, MVT::f16, Promote);
- setOperationAction(ISD::FLOG, MVT::f16, Promote);
- setOperationAction(ISD::FLOG2, MVT::f16, Promote);
- setOperationAction(ISD::FLOG10, MVT::f16, Promote);
+ setOperationAction({ISD::FREM, ISD::FCEIL, ISD::FFLOOR, ISD::FNEARBYINT,
+ ISD::FRINT, ISD::FROUND, ISD::FROUNDEVEN, ISD::FTRUNC,
+ ISD::FPOW, ISD::FPOWI, ISD::FCOS, ISD::FSIN,
+ ISD::FSINCOS, ISD::FEXP, ISD::FEXP2, ISD::FLOG,
+ ISD::FLOG2, ISD::FLOG10},
+ MVT::f16, Promote);
// FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
// complete support for all operations in LegalizeDAG.
@@ -385,26 +340,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
if (Subtarget.hasStdExtF()) {
- setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
- setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
- setOperationAction(ISD::LRINT, MVT::f32, Legal);
- setOperationAction(ISD::LLRINT, MVT::f32, Legal);
- setOperationAction(ISD::LROUND, MVT::f32, Legal);
- setOperationAction(ISD::LLROUND, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_LRINT, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_LLRINT, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_LROUND, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_LLROUND, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
- for (auto CC : FPCCToExpand)
- setCondCodeAction(CC, MVT::f32, Expand);
+ for (auto NT : FPLegalNodeTypes)
+ setOperationAction(NT, MVT::f32, Legal);
+ setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
setOperationAction(ISD::SELECT, MVT::f32, Custom);
setOperationAction(ISD::BR_CC, MVT::f32, Expand);
@@ -418,28 +356,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BITCAST, MVT::i32, Custom);
if (Subtarget.hasStdExtD()) {
- setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
- setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
- setOperationAction(ISD::LRINT, MVT::f64, Legal);
- setOperationAction(ISD::LLRINT, MVT::f64, Legal);
- setOperationAction(ISD::LROUND, MVT::f64, Legal);
- setOperationAction(ISD::LLROUND, MVT::f64, Legal);
- setOperationAction(ISD::STRICT_LRINT, MVT::f64, Legal);
- setOperationAction(ISD::STRICT_LLRINT, MVT::f64, Legal);
- setOperationAction(ISD::STRICT_LROUND, MVT::f64, Legal);
- setOperationAction(ISD::STRICT_LLROUND, MVT::f64, Legal);
- setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal);
- setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
- setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
- setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
- setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
+ for (auto NT : FPLegalNodeTypes)
+ setOperationAction(NT, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
- setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
- setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
- setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
- for (auto CC : FPCCToExpand)
- setCondCodeAction(CC, MVT::f64, Expand);
+ setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
setOperationAction(ISD::SELECT, MVT::f64, Custom);
setOperationAction(ISD::BR_CC, MVT::f64, Expand);
@@ -451,40 +372,38 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
}
- if (Subtarget.is64Bit()) {
- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
- setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
- setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
- setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
- }
+ if (Subtarget.is64Bit())
+ setOperationAction({ISD::FP_TO_UINT, ISD::FP_TO_SINT,
+ ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT},
+ MVT::i32, Custom);
if (Subtarget.hasStdExtF()) {
- setOperationAction(ISD::FP_TO_UINT_SAT, XLenVT, Custom);
- setOperationAction(ISD::FP_TO_SINT_SAT, XLenVT, Custom);
+ setOperationAction({ISD::FP_TO_UINT_SAT, ISD::FP_TO_SINT_SAT}, XLenVT,
+ Custom);
- setOperationAction(ISD::STRICT_FP_TO_UINT, XLenVT, Legal);
- setOperationAction(ISD::STRICT_FP_TO_SINT, XLenVT, Legal);
- setOperationAction(ISD::STRICT_UINT_TO_FP, XLenVT, Legal);
- setOperationAction(ISD::STRICT_SINT_TO_FP, XLenVT, Legal);
+ setOperationAction({ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT,
+ ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP},
+ XLenVT, Legal);
setOperationAction(ISD::FLT_ROUNDS_, XLenVT, Custom);
setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
}
- setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
- setOperationAction(ISD::BlockAddress, XLenVT, Custom);
- setOperationAction(ISD::ConstantPool, XLenVT, Custom);
- setOperationAction(ISD::JumpTable, XLenVT, Custom);
+ setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,
+ ISD::JumpTable},
+ XLenVT, Custom);
setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);
+ if (Subtarget.is64Bit())
+ setOperationAction(ISD::Constant, MVT::i64, Custom);
+
// TODO: On M-mode only targets, the cycle[h] CSR may not be present.
// Unfortunately this can't be determined just from the ISA naming string.
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
Subtarget.is64Bit() ? Legal : Custom);
- setOperationAction(ISD::TRAP, MVT::Other, Legal);
- setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
+ setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Legal);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
if (Subtarget.is64Bit())
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
@@ -505,19 +424,16 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// RVV intrinsics may have illegal operands.
// We also need to custom legalize vmv.x.s.
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom);
- if (Subtarget.is64Bit()) {
+ setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN},
+ {MVT::i8, MVT::i16}, Custom);
+ if (Subtarget.is64Bit())
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
- } else {
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
- }
+ else
+ setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN},
+ MVT::i64, Custom);
- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
- setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
+ setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID},
+ MVT::Other, Custom);
static const unsigned IntegerVPOps[] = {
ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
@@ -527,191 +443,175 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
- ISD::VP_MERGE, ISD::VP_SELECT};
+ ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FPTOSI,
+ ISD::VP_FPTOUI, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
+ ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE};
static const unsigned FloatingPointVPOps[] = {
- ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
- ISD::VP_FDIV, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
- ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
- ISD::VP_SELECT};
+ ISD::VP_FADD, ISD::VP_FSUB,
+ ISD::VP_FMUL, ISD::VP_FDIV,
+ ISD::VP_FNEG, ISD::VP_FMA,
+ ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
+ ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX,
+ ISD::VP_MERGE, ISD::VP_SELECT,
+ ISD::VP_SITOFP, ISD::VP_UITOFP,
+ ISD::VP_SETCC, ISD::VP_FP_ROUND,
+ ISD::VP_FP_EXTEND};
if (!Subtarget.is64Bit()) {
// We must custom-lower certain vXi64 operations on RV32 due to the vector
// element type being illegal.
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom);
-
- setOperationAction(ISD::VECREDUCE_ADD, MVT::i64, Custom);
- setOperationAction(ISD::VECREDUCE_AND, MVT::i64, Custom);
- setOperationAction(ISD::VECREDUCE_OR, MVT::i64, Custom);
- setOperationAction(ISD::VECREDUCE_XOR, MVT::i64, Custom);
- setOperationAction(ISD::VECREDUCE_SMAX, MVT::i64, Custom);
- setOperationAction(ISD::VECREDUCE_SMIN, MVT::i64, Custom);
- setOperationAction(ISD::VECREDUCE_UMAX, MVT::i64, Custom);
- setOperationAction(ISD::VECREDUCE_UMIN, MVT::i64, Custom);
-
- setOperationAction(ISD::VP_REDUCE_ADD, MVT::i64, Custom);
- setOperationAction(ISD::VP_REDUCE_AND, MVT::i64, Custom);
- setOperationAction(ISD::VP_REDUCE_OR, MVT::i64, Custom);
- setOperationAction(ISD::VP_REDUCE_XOR, MVT::i64, Custom);
- setOperationAction(ISD::VP_REDUCE_SMAX, MVT::i64, Custom);
- setOperationAction(ISD::VP_REDUCE_SMIN, MVT::i64, Custom);
- setOperationAction(ISD::VP_REDUCE_UMAX, MVT::i64, Custom);
- setOperationAction(ISD::VP_REDUCE_UMIN, MVT::i64, Custom);
+ setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT},
+ MVT::i64, Custom);
+
+ setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND,
+ ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR,
+ ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN,
+ ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN},
+ MVT::i64, Custom);
+
+ setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
+ ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
+ ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
+ ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
+ MVT::i64, Custom);
}
for (MVT VT : BoolVecVTs) {
+ if (!isTypeLegal(VT))
+ continue;
+
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
// Mask VTs are custom-expanded into a series of standard nodes
- setOperationAction(ISD::TRUNCATE, VT, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
+ setOperationAction({ISD::TRUNCATE, ISD::CONCAT_VECTORS,
+ ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR},
+ VT, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
+ Custom);
setOperationAction(ISD::SELECT, VT, Custom);
- setOperationAction(ISD::SELECT_CC, VT, Expand);
- setOperationAction(ISD::VSELECT, VT, Expand);
- setOperationAction(ISD::VP_MERGE, VT, Expand);
- setOperationAction(ISD::VP_SELECT, VT, Expand);
+ setOperationAction(
+ {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT,
+ Expand);
- setOperationAction(ISD::VP_AND, VT, Custom);
- setOperationAction(ISD::VP_OR, VT, Custom);
- setOperationAction(ISD::VP_XOR, VT, Custom);
+ setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
- setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
- setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
- setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
+ setOperationAction(
+ {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
+ Custom);
- setOperationAction(ISD::VP_REDUCE_AND, VT, Custom);
- setOperationAction(ISD::VP_REDUCE_OR, VT, Custom);
- setOperationAction(ISD::VP_REDUCE_XOR, VT, Custom);
+ setOperationAction(
+ {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
+ Custom);
// RVV has native int->float & float->int conversions where the
// element type sizes are within one power-of-two of each other. Any
// wider distances between type sizes have to be lowered as sequences
// which progressively narrow the gap in stages.
- setOperationAction(ISD::SINT_TO_FP, VT, Custom);
- setOperationAction(ISD::UINT_TO_FP, VT, Custom);
- setOperationAction(ISD::FP_TO_SINT, VT, Custom);
- setOperationAction(ISD::FP_TO_UINT, VT, Custom);
+ setOperationAction(
+ {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT},
+ VT, Custom);
// Expand all extending loads to types larger than this, and truncating
// stores from types larger than this.
for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
setTruncStoreAction(OtherVT, VT, Expand);
- setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
- setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
+ setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, OtherVT,
+ VT, Expand);
}
+
+ setOperationAction(
+ {ISD::VP_FPTOSI, ISD::VP_FPTOUI, ISD::VP_TRUNCATE, ISD::VP_SETCC}, VT,
+ Custom);
+ setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
}
for (MVT VT : IntVecVTs) {
- if (VT.getVectorElementType() == MVT::i64 &&
- !Subtarget.hasVInstructionsI64())
+ if (!isTypeLegal(VT))
continue;
setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
// Vectors implement MULHS/MULHU.
- setOperationAction(ISD::SMUL_LOHI, VT, Expand);
- setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+ setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT, Expand);
// nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
- if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV()) {
- setOperationAction(ISD::MULHU, VT, Expand);
- setOperationAction(ISD::MULHS, VT, Expand);
- }
+ if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
+ setOperationAction({ISD::MULHU, ISD::MULHS}, VT, Expand);
- setOperationAction(ISD::SMIN, VT, Legal);
- setOperationAction(ISD::SMAX, VT, Legal);
- setOperationAction(ISD::UMIN, VT, Legal);
- setOperationAction(ISD::UMAX, VT, Legal);
+ setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, VT,
+ Legal);
- setOperationAction(ISD::ROTL, VT, Expand);
- setOperationAction(ISD::ROTR, VT, Expand);
+ setOperationAction({ISD::ROTL, ISD::ROTR}, VT, Expand);
- setOperationAction(ISD::CTTZ, VT, Expand);
- setOperationAction(ISD::CTLZ, VT, Expand);
- setOperationAction(ISD::CTPOP, VT, Expand);
+ setOperationAction({ISD::CTTZ, ISD::CTLZ, ISD::CTPOP, ISD::BSWAP}, VT,
+ Expand);
setOperationAction(ISD::BSWAP, VT, Expand);
// Custom-lower extensions and truncations from/to mask types.
- setOperationAction(ISD::ANY_EXTEND, VT, Custom);
- setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
- setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
+ setOperationAction({ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND},
+ VT, Custom);
// RVV has native int->float & float->int conversions where the
// element type sizes are within one power-of-two of each other. Any
// wider distances between type sizes have to be lowered as sequences
// which progressively narrow the gap in stages.
- setOperationAction(ISD::SINT_TO_FP, VT, Custom);
- setOperationAction(ISD::UINT_TO_FP, VT, Custom);
- setOperationAction(ISD::FP_TO_SINT, VT, Custom);
- setOperationAction(ISD::FP_TO_UINT, VT, Custom);
+ setOperationAction(
+ {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT},
+ VT, Custom);
- setOperationAction(ISD::SADDSAT, VT, Legal);
- setOperationAction(ISD::UADDSAT, VT, Legal);
- setOperationAction(ISD::SSUBSAT, VT, Legal);
- setOperationAction(ISD::USUBSAT, VT, Legal);
+ setOperationAction(
+ {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT, Legal);
// Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
// nodes which truncate by one power of two at a time.
setOperationAction(ISD::TRUNCATE, VT, Custom);
// Custom-lower insert/extract operations to simplify patterns.
- setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
+ Custom);
// Custom-lower reduction operations to set up the corresponding custom
// nodes' operands.
- setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
- setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
- setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
- setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
- setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
- setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
- setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
- setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
-
- for (unsigned VPOpc : IntegerVPOps)
- setOperationAction(VPOpc, VT, Custom);
-
- setOperationAction(ISD::LOAD, VT, Custom);
- setOperationAction(ISD::STORE, VT, Custom);
-
- setOperationAction(ISD::MLOAD, VT, Custom);
- setOperationAction(ISD::MSTORE, VT, Custom);
- setOperationAction(ISD::MGATHER, VT, Custom);
- setOperationAction(ISD::MSCATTER, VT, Custom);
-
- setOperationAction(ISD::VP_LOAD, VT, Custom);
- setOperationAction(ISD::VP_STORE, VT, Custom);
- setOperationAction(ISD::VP_GATHER, VT, Custom);
- setOperationAction(ISD::VP_SCATTER, VT, Custom);
-
- setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
+ setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND,
+ ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR,
+ ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN,
+ ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN},
+ VT, Custom);
+
+ setOperationAction(IntegerVPOps, VT, Custom);
+
+ setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
+
+ setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
+ VT, Custom);
+
+ setOperationAction(
+ {ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, VT,
+ Custom);
+
+ setOperationAction(
+ {ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR},
+ VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
- setOperationAction(ISD::STEP_VECTOR, VT, Custom);
- setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
+ setOperationAction({ISD::STEP_VECTOR, ISD::VECTOR_REVERSE}, VT, Custom);
for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
setTruncStoreAction(VT, OtherVT, Expand);
- setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
- setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
+ setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, OtherVT,
+ VT, Expand);
}
+ // Splice
+ setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
+
// Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if we have a floating point
// type that can represent the value exactly.
if (VT.getVectorElementType() != MVT::i64) {
@@ -719,8 +619,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
VT.getVectorElementType() == MVT::i32 ? MVT::f64 : MVT::f32;
EVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
if (isTypeLegal(FloatVT)) {
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
+ setOperationAction({ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT,
+ Custom);
}
}
}
@@ -745,21 +645,35 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// sizes are within one power-of-two of each other. Therefore conversions
// between vXf16 and vXf64 must be lowered as sequences which convert via
// vXf32.
- setOperationAction(ISD::FP_ROUND, VT, Custom);
- setOperationAction(ISD::FP_EXTEND, VT, Custom);
+ setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
// Custom-lower insert/extract operations to simplify patterns.
- setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
+ Custom);
// Expand various condition codes (explained above).
- for (auto CC : VFPCCToExpand)
- setCondCodeAction(CC, VT, Expand);
-
- setOperationAction(ISD::FMINNUM, VT, Legal);
- setOperationAction(ISD::FMAXNUM, VT, Legal);
-
- setOperationAction(ISD::FTRUNC, VT, Custom);
- setOperationAction(ISD::FCEIL, VT, Custom);
- setOperationAction(ISD::FFLOOR, VT, Custom);
+ setCondCodeAction(VFPCCToExpand, VT, Expand);
+
+ setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, VT, Legal);
+
+ setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND},
+ VT, Custom);
+
+ setOperationAction({ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD,
+ ISD::VECREDUCE_FMIN, ISD::VECREDUCE_FMAX},
+ VT, Custom);
+
+ // Expand FP operations that need libcalls.
+ setOperationAction(ISD::FREM, VT, Expand);
+ setOperationAction(ISD::FPOW, VT, Expand);
+ setOperationAction(ISD::FCOS, VT, Expand);
+ setOperationAction(ISD::FSIN, VT, Expand);
+ setOperationAction(ISD::FSINCOS, VT, Expand);
+ setOperationAction(ISD::FEXP, VT, Expand);
+ setOperationAction(ISD::FEXP2, VT, Expand);
+ setOperationAction(ISD::FLOG, VT, Expand);
+ setOperationAction(ISD::FLOG2, VT, Expand);
+ setOperationAction(ISD::FLOG10, VT, Expand);
+ setOperationAction(ISD::FRINT, VT, Expand);
+ setOperationAction(ISD::FNEARBYINT, VT, Expand);
setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
@@ -768,30 +682,25 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FCOPYSIGN, VT, Legal);
- setOperationAction(ISD::LOAD, VT, Custom);
- setOperationAction(ISD::STORE, VT, Custom);
+ setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
- setOperationAction(ISD::MLOAD, VT, Custom);
- setOperationAction(ISD::MSTORE, VT, Custom);
- setOperationAction(ISD::MGATHER, VT, Custom);
- setOperationAction(ISD::MSCATTER, VT, Custom);
+ setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
+ VT, Custom);
- setOperationAction(ISD::VP_LOAD, VT, Custom);
- setOperationAction(ISD::VP_STORE, VT, Custom);
- setOperationAction(ISD::VP_GATHER, VT, Custom);
- setOperationAction(ISD::VP_SCATTER, VT, Custom);
+ setOperationAction(
+ {ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, VT,
+ Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
- setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
+ setOperationAction(
+ {ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR},
+ VT, Custom);
- setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
+ setOperationAction({ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE}, VT, Custom);
- for (unsigned VPOpc : FloatingPointVPOps)
- setOperationAction(VPOpc, VT, Custom);
+ setOperationAction(FloatingPointVPOps, VT, Custom);
};
// Sets common extload/truncstore actions on RVV floating-point vector
@@ -804,21 +713,31 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
};
- if (Subtarget.hasVInstructionsF16())
- for (MVT VT : F16VecVTs)
+ if (Subtarget.hasVInstructionsF16()) {
+ for (MVT VT : F16VecVTs) {
+ if (!isTypeLegal(VT))
+ continue;
SetCommonVFPActions(VT);
+ }
+ }
- for (MVT VT : F32VecVTs) {
- if (Subtarget.hasVInstructionsF32())
+ if (Subtarget.hasVInstructionsF32()) {
+ for (MVT VT : F32VecVTs) {
+ if (!isTypeLegal(VT))
+ continue;
SetCommonVFPActions(VT);
- SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
+ SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
+ }
}
- for (MVT VT : F64VecVTs) {
- if (Subtarget.hasVInstructionsF64())
+ if (Subtarget.hasVInstructionsF64()) {
+ for (MVT VT : F64VecVTs) {
+ if (!isTypeLegal(VT))
+ continue;
SetCommonVFPActions(VT);
- SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
- SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
+ SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
+ SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
+ }
}
if (Subtarget.useRVVForFixedLengthVectors()) {
@@ -831,23 +750,21 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(Op, VT, Expand);
for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) {
setTruncStoreAction(VT, OtherVT, Expand);
- setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
- setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
+ setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD},
+ OtherVT, VT, Expand);
}
// We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
- setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
+ setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT,
+ Custom);
- setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
+ setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS}, VT,
+ Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT},
+ VT, Custom);
- setOperationAction(ISD::LOAD, VT, Custom);
- setOperationAction(ISD::STORE, VT, Custom);
+ setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
@@ -857,100 +774,80 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BITCAST, VT, Custom);
- setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
- setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
- setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
+ setOperationAction(
+ {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
+ Custom);
- setOperationAction(ISD::VP_REDUCE_AND, VT, Custom);
- setOperationAction(ISD::VP_REDUCE_OR, VT, Custom);
- setOperationAction(ISD::VP_REDUCE_XOR, VT, Custom);
+ setOperationAction(
+ {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
+ Custom);
- setOperationAction(ISD::SINT_TO_FP, VT, Custom);
- setOperationAction(ISD::UINT_TO_FP, VT, Custom);
- setOperationAction(ISD::FP_TO_SINT, VT, Custom);
- setOperationAction(ISD::FP_TO_UINT, VT, Custom);
+ setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT,
+ ISD::FP_TO_UINT},
+ VT, Custom);
// Operations below are different for between masks and other vectors.
if (VT.getVectorElementType() == MVT::i1) {
- setOperationAction(ISD::VP_AND, VT, Custom);
- setOperationAction(ISD::VP_OR, VT, Custom);
- setOperationAction(ISD::VP_XOR, VT, Custom);
- setOperationAction(ISD::AND, VT, Custom);
- setOperationAction(ISD::OR, VT, Custom);
- setOperationAction(ISD::XOR, VT, Custom);
+ setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
+ ISD::OR, ISD::XOR},
+ VT, Custom);
+
+ setOperationAction(
+ {ISD::VP_FPTOSI, ISD::VP_FPTOUI, ISD::VP_SETCC, ISD::VP_TRUNCATE},
+ VT, Custom);
continue;
}
- // Use SPLAT_VECTOR to prevent type legalization from destroying the
- // splats when type legalizing i64 scalar on RV32.
+ // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
+ // it before type legalization for i64 vectors on RV32. It will then be
+ // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
// FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
// improvements first.
if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
- setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
+ setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
}
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::MLOAD, VT, Custom);
- setOperationAction(ISD::MSTORE, VT, Custom);
- setOperationAction(ISD::MGATHER, VT, Custom);
- setOperationAction(ISD::MSCATTER, VT, Custom);
-
- setOperationAction(ISD::VP_LOAD, VT, Custom);
- setOperationAction(ISD::VP_STORE, VT, Custom);
- setOperationAction(ISD::VP_GATHER, VT, Custom);
- setOperationAction(ISD::VP_SCATTER, VT, Custom);
-
- setOperationAction(ISD::ADD, VT, Custom);
- setOperationAction(ISD::MUL, VT, Custom);
- setOperationAction(ISD::SUB, VT, Custom);
- setOperationAction(ISD::AND, VT, Custom);
- setOperationAction(ISD::OR, VT, Custom);
- setOperationAction(ISD::XOR, VT, Custom);
- setOperationAction(ISD::SDIV, VT, Custom);
- setOperationAction(ISD::SREM, VT, Custom);
- setOperationAction(ISD::UDIV, VT, Custom);
- setOperationAction(ISD::UREM, VT, Custom);
- setOperationAction(ISD::SHL, VT, Custom);
- setOperationAction(ISD::SRA, VT, Custom);
- setOperationAction(ISD::SRL, VT, Custom);
-
- setOperationAction(ISD::SMIN, VT, Custom);
- setOperationAction(ISD::SMAX, VT, Custom);
- setOperationAction(ISD::UMIN, VT, Custom);
- setOperationAction(ISD::UMAX, VT, Custom);
- setOperationAction(ISD::ABS, VT, Custom);
+ setOperationAction(
+ {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom);
+
+ setOperationAction(
+ {ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, VT,
+ Custom);
+
+ setOperationAction({ISD::ADD, ISD::MUL, ISD::SUB, ISD::AND, ISD::OR,
+ ISD::XOR, ISD::SDIV, ISD::SREM, ISD::UDIV,
+ ISD::UREM, ISD::SHL, ISD::SRA, ISD::SRL},
+ VT, Custom);
+
+ setOperationAction(
+ {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX, ISD::ABS}, VT, Custom);
// vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
- if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV()) {
- setOperationAction(ISD::MULHS, VT, Custom);
- setOperationAction(ISD::MULHU, VT, Custom);
- }
+ if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
+ setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Custom);
- setOperationAction(ISD::SADDSAT, VT, Custom);
- setOperationAction(ISD::UADDSAT, VT, Custom);
- setOperationAction(ISD::SSUBSAT, VT, Custom);
- setOperationAction(ISD::USUBSAT, VT, Custom);
+ setOperationAction(
+ {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT,
+ Custom);
setOperationAction(ISD::VSELECT, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
- setOperationAction(ISD::ANY_EXTEND, VT, Custom);
- setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
- setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
+ setOperationAction(
+ {ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND}, VT, Custom);
// Custom-lower reduction operations to set up the corresponding custom
// nodes' operands.
- setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
- setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
- setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
- setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
- setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
+ setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_SMAX,
+ ISD::VECREDUCE_SMIN, ISD::VECREDUCE_UMAX,
+ ISD::VECREDUCE_UMIN},
+ VT, Custom);
- for (unsigned VPOpc : IntegerVPOps)
- setOperationAction(VPOpc, VT, Custom);
+ setOperationAction(IntegerVPOps, VT, Custom);
// Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if we have a floating point
// type that can represent the value exactly.
@@ -959,10 +856,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
VT.getVectorElementType() == MVT::i32 ? MVT::f64 : MVT::f32;
EVT FloatVT =
MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
- if (isTypeLegal(FloatVT)) {
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
- }
+ if (isTypeLegal(FloatVT))
+ setOperationAction({ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT,
+ Custom);
}
}
@@ -979,69 +875,50 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
// We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
- setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
+ setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT,
+ Custom);
- setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
-
- setOperationAction(ISD::LOAD, VT, Custom);
- setOperationAction(ISD::STORE, VT, Custom);
- setOperationAction(ISD::MLOAD, VT, Custom);
- setOperationAction(ISD::MSTORE, VT, Custom);
- setOperationAction(ISD::MGATHER, VT, Custom);
- setOperationAction(ISD::MSCATTER, VT, Custom);
-
- setOperationAction(ISD::VP_LOAD, VT, Custom);
- setOperationAction(ISD::VP_STORE, VT, Custom);
- setOperationAction(ISD::VP_GATHER, VT, Custom);
- setOperationAction(ISD::VP_SCATTER, VT, Custom);
-
- setOperationAction(ISD::FADD, VT, Custom);
- setOperationAction(ISD::FSUB, VT, Custom);
- setOperationAction(ISD::FMUL, VT, Custom);
- setOperationAction(ISD::FDIV, VT, Custom);
- setOperationAction(ISD::FNEG, VT, Custom);
- setOperationAction(ISD::FABS, VT, Custom);
- setOperationAction(ISD::FCOPYSIGN, VT, Custom);
- setOperationAction(ISD::FSQRT, VT, Custom);
- setOperationAction(ISD::FMA, VT, Custom);
- setOperationAction(ISD::FMINNUM, VT, Custom);
- setOperationAction(ISD::FMAXNUM, VT, Custom);
-
- setOperationAction(ISD::FP_ROUND, VT, Custom);
- setOperationAction(ISD::FP_EXTEND, VT, Custom);
-
- setOperationAction(ISD::FTRUNC, VT, Custom);
- setOperationAction(ISD::FCEIL, VT, Custom);
- setOperationAction(ISD::FFLOOR, VT, Custom);
+ setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS,
+ ISD::VECTOR_SHUFFLE, ISD::INSERT_VECTOR_ELT,
+ ISD::EXTRACT_VECTOR_ELT},
+ VT, Custom);
+
+ setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
+ ISD::MGATHER, ISD::MSCATTER},
+ VT, Custom);
+
+ setOperationAction(
+ {ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, VT,
+ Custom);
+
+ setOperationAction({ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV,
+ ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT,
+ ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM},
+ VT, Custom);
+
+ setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
+
+ setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND},
+ VT, Custom);
for (auto CC : VFPCCToExpand)
setCondCodeAction(CC, VT, Expand);
- setOperationAction(ISD::VSELECT, VT, Custom);
- setOperationAction(ISD::SELECT, VT, Custom);
+ setOperationAction({ISD::VSELECT, ISD::SELECT}, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::BITCAST, VT, Custom);
- setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
- setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
- setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
- setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
+ setOperationAction({ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD,
+ ISD::VECREDUCE_FMIN, ISD::VECREDUCE_FMAX},
+ VT, Custom);
- for (unsigned VPOpc : FloatingPointVPOps)
- setOperationAction(VPOpc, VT, Custom);
+ setOperationAction(FloatingPointVPOps, VT, Custom);
}
// Custom-legalize bitcasts from fixed-length vectors to scalar types.
- setOperationAction(ISD::BITCAST, MVT::i8, Custom);
- setOperationAction(ISD::BITCAST, MVT::i16, Custom);
- setOperationAction(ISD::BITCAST, MVT::i32, Custom);
- setOperationAction(ISD::BITCAST, MVT::i64, Custom);
+ setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64},
+ Custom);
if (Subtarget.hasStdExtZfh())
setOperationAction(ISD::BITCAST, MVT::f16, Custom);
if (Subtarget.hasStdExtF())
@@ -1061,30 +938,33 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// Jumps are expensive, compared to logic
setJumpIsExpensive();
- setTargetDAGCombine(ISD::ADD);
- setTargetDAGCombine(ISD::SUB);
- setTargetDAGCombine(ISD::AND);
- setTargetDAGCombine(ISD::OR);
- setTargetDAGCombine(ISD::XOR);
- setTargetDAGCombine(ISD::ANY_EXTEND);
- if (Subtarget.hasStdExtF()) {
- setTargetDAGCombine(ISD::ZERO_EXTEND);
- setTargetDAGCombine(ISD::FP_TO_SINT);
- setTargetDAGCombine(ISD::FP_TO_UINT);
- setTargetDAGCombine(ISD::FP_TO_SINT_SAT);
- setTargetDAGCombine(ISD::FP_TO_UINT_SAT);
- }
- if (Subtarget.hasVInstructions()) {
- setTargetDAGCombine(ISD::FCOPYSIGN);
- setTargetDAGCombine(ISD::MGATHER);
- setTargetDAGCombine(ISD::MSCATTER);
- setTargetDAGCombine(ISD::VP_GATHER);
- setTargetDAGCombine(ISD::VP_SCATTER);
+ setTargetDAGCombine({ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::AND,
+ ISD::OR, ISD::XOR});
+ if (Subtarget.is64Bit())
setTargetDAGCombine(ISD::SRA);
- setTargetDAGCombine(ISD::SRL);
- setTargetDAGCombine(ISD::SHL);
- setTargetDAGCombine(ISD::STORE);
- }
+
+ if (Subtarget.hasStdExtF())
+ setTargetDAGCombine({ISD::FADD, ISD::FMAXNUM, ISD::FMINNUM});
+
+ if (Subtarget.hasStdExtZbp())
+ setTargetDAGCombine({ISD::ROTL, ISD::ROTR});
+
+ if (Subtarget.hasStdExtZbb())
+ setTargetDAGCombine({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN});
+
+ if (Subtarget.hasStdExtZbkb())
+ setTargetDAGCombine(ISD::BITREVERSE);
+ if (Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZbb())
+ setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
+ if (Subtarget.hasStdExtF())
+ setTargetDAGCombine({ISD::ZERO_EXTEND, ISD::FP_TO_SINT, ISD::FP_TO_UINT,
+ ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT});
+ if (Subtarget.hasVInstructions())
+ setTargetDAGCombine({ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER,
+ ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
+ ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR});
+ if (Subtarget.useRVVForFixedLengthVectors())
+ setTargetDAGCombine(ISD::BITCAST);
setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
@@ -1149,6 +1029,24 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.size = MemoryLocation::UnknownSize;
Info.flags |= MachineMemOperand::MOStore;
return true;
+ case Intrinsic::riscv_seg2_load:
+ case Intrinsic::riscv_seg3_load:
+ case Intrinsic::riscv_seg4_load:
+ case Intrinsic::riscv_seg5_load:
+ case Intrinsic::riscv_seg6_load:
+ case Intrinsic::riscv_seg7_load:
+ case Intrinsic::riscv_seg8_load:
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.memVT =
+ getValueType(DL, I.getType()->getStructElementType(0)->getScalarType());
+ Info.align =
+ Align(DL.getTypeSizeInBits(
+ I.getType()->getStructElementType(0)->getScalarType()) /
+ 8);
+ Info.size = MemoryLocation::UnknownSize;
+ Info.flags |= MachineMemOperand::MOLoad;
+ return true;
}
}
@@ -1160,6 +1058,10 @@ bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
if (AM.BaseGV)
return false;
+ // RVV instructions only support register addressing.
+ if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
+ return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
+
// Require a 12-bit signed offset.
if (!isInt<12>(AM.BaseOffs))
return false;
@@ -1225,6 +1127,10 @@ bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
}
+bool RISCVTargetLowering::signExtendConstant(const ConstantInt *CI) const {
+ return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
+}
+
bool RISCVTargetLowering::isCheapToSpeculateCttz() const {
return Subtarget.hasStdExtZbb();
}
@@ -1245,6 +1151,36 @@ bool RISCVTargetLowering::hasAndNotCompare(SDValue Y) const {
!isa<ConstantSDNode>(Y);
}
+bool RISCVTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
+ // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
+ auto *C = dyn_cast<ConstantSDNode>(Y);
+ return C && C->getAPIntValue().ule(10);
+}
+
+bool RISCVTargetLowering::
+ shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
+ SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
+ unsigned OldShiftOpcode, unsigned NewShiftOpcode,
+ SelectionDAG &DAG) const {
+ // One interesting pattern that we'd want to form is 'bit extract':
+ // ((1 >> Y) & 1) ==/!= 0
+ // But we also need to be careful not to try to reverse that fold.
+
+ // Is this '((1 >> Y) & 1)'?
+ if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
+ return false; // Keep the 'bit extract' pattern.
+
+ // Will this be '((1 >> Y) & 1)' after the transform?
+ if (NewShiftOpcode == ISD::SRL && CC->isOne())
+ return true; // Do form the 'bit extract' pattern.
+
+ // If 'X' is a constant, and we transform, then we will immediately
+ // try to undo the fold, thus causing endless combine loop.
+ // So only do the transform if X is not a constant. This matches the default
+ // implementation of this function.
+ return !XC;
+}
+
/// Check if sinking \p I's operands to I's basic block is profitable, because
/// the operands can be folded into a target instruction, e.g.
/// splats of scalars can fold into vector instructions.
@@ -1282,6 +1218,7 @@ bool RISCVTargetLowering::shouldSinkOperands(
if (auto *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
case Intrinsic::fma:
+ case Intrinsic::vp_fma:
return Operand == 0 || Operand == 1;
// FIXME: Our patterns can only match vx/vf instructions when the splat
// it on the RHS, because TableGen doesn't recognize our VP operations
@@ -1345,6 +1282,15 @@ bool RISCVTargetLowering::shouldSinkOperands(
return true;
}
+bool RISCVTargetLowering::isOffsetFoldingLegal(
+ const GlobalAddressSDNode *GA) const {
+ // In order to maximise the opportunity for common subexpression elimination,
+ // keep a separate ADD node for the global address offset instead of folding
+ // it in the global address node. Later peephole optimisations may choose to
+ // fold it back in when profitable.
+ return false;
+}
+
bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const {
// FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin.
@@ -1583,7 +1529,7 @@ static bool useRVVForFixedLengthVectorVT(MVT VT,
if (VT.getFixedSizeInBits() > 1024 * 8)
return false;
- unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits();
+ unsigned MinVLen = Subtarget.getRealMinVLen();
MVT EltVT = VT.getVectorElementType();
@@ -1621,7 +1567,7 @@ static bool useRVVForFixedLengthVectorVT(MVT VT,
}
// Reject elements larger than ELEN.
- if (EltVT.getSizeInBits() > Subtarget.getMaxELENForFixedLengthVectors())
+ if (EltVT.getSizeInBits() > Subtarget.getELEN())
return false;
unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
@@ -1649,8 +1595,8 @@ static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT,
useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
"Expected legal fixed length vector!");
- unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits();
- unsigned MaxELen = Subtarget.getMaxELENForFixedLengthVectors();
+ unsigned MinVLen = Subtarget.getRealMinVLen();
+ unsigned MaxELen = Subtarget.getELEN();
MVT EltVT = VT.getVectorElementType();
switch (EltVT.SimpleTy) {
@@ -1710,6 +1656,23 @@ static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
}
+/// Return the type of the mask type suitable for masking the provided
+/// vector type. This is simply an i1 element type vector of the same
+/// (possibly scalable) length.
+static MVT getMaskTypeFor(EVT VecVT) {
+ assert(VecVT.isVector());
+ ElementCount EC = VecVT.getVectorElementCount();
+ return MVT::getVectorVT(MVT::i1, EC);
+}
+
+/// Creates an all ones mask suitable for masking a vector of type VecTy with
+/// vector length VL. .
+static SDValue getAllOnesMask(MVT VecVT, SDValue VL, SDLoc DL,
+ SelectionDAG &DAG) {
+ MVT MaskVT = getMaskTypeFor(VecVT);
+ return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
+}
+
// Gets the two common "VL" operands: an all-ones mask and the vector length.
// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
// the vector type that it is contained in.
@@ -1720,9 +1683,8 @@ getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG,
MVT XLenVT = Subtarget.getXLenVT();
SDValue VL = VecVT.isFixedLengthVector()
? DAG.getConstant(VecVT.getVectorNumElements(), DL, XLenVT)
- : DAG.getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT);
- MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
- SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
+ : DAG.getRegister(RISCV::X0, XLenVT);
+ SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
return {Mask, VL};
}
@@ -1747,14 +1709,6 @@ bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles(
return false;
}
-bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
- // Only splats are currently supported.
- if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
- return true;
-
- return false;
-}
-
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
// RISCV FP-to-int conversions saturate to the destination register size, but
@@ -1796,7 +1750,7 @@ static SDValue lowerFTRUNC_FCEIL_FFLOOR(SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);
// Freeze the source since we are increasing the number of uses.
- SDValue Src = DAG.getNode(ISD::FREEZE, DL, VT, Op.getOperand(0));
+ SDValue Src = DAG.getFreeze(Op.getOperand(0));
// Truncate to integer and convert back to FP.
MVT IntVT = VT.changeVectorElementTypeToInteger();
@@ -1844,21 +1798,56 @@ static SDValue lowerFTRUNC_FCEIL_FFLOOR(SDValue Op, SelectionDAG &DAG) {
return DAG.getSelect(DL, VT, Setcc, Truncated, Src);
}
-static SDValue lowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG,
- const RISCVSubtarget &Subtarget) {
+// ISD::FROUND is defined to round to nearest with ties rounding away from 0.
+// This mode isn't supported in vector hardware on RISCV. But as long as we
+// aren't compiling with trapping math, we can emulate this with
+// floor(X + copysign(nextafter(0.5, 0.0), X)).
+// FIXME: Could be shorter by changing rounding mode, but we don't have FRM
+// dependencies modeled yet.
+// FIXME: Use masked operations to avoid final merge.
+static SDValue lowerFROUND(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
- assert(VT.isFixedLengthVector() && "Unexpected vector!");
-
- MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
+ assert(VT.isVector() && "Unexpected type");
SDLoc DL(Op);
- SDValue Mask, VL;
- std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
- unsigned Opc =
- VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
- SDValue Splat = DAG.getNode(Opc, DL, ContainerVT, Op.getOperand(0), VL);
- return convertFromScalableVector(VT, Splat, DAG, Subtarget);
+ // Freeze the source since we are increasing the number of uses.
+ SDValue Src = DAG.getFreeze(Op.getOperand(0));
+
+ // We do the conversion on the absolute value and fix the sign at the end.
+ SDValue Abs = DAG.getNode(ISD::FABS, DL, VT, Src);
+
+ const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
+ bool Ignored;
+ APFloat Point5Pred = APFloat(0.5f);
+ Point5Pred.convert(FltSem, APFloat::rmNearestTiesToEven, &Ignored);
+ Point5Pred.next(/*nextDown*/ true);
+
+ // Add the adjustment.
+ SDValue Adjust = DAG.getNode(ISD::FADD, DL, VT, Abs,
+ DAG.getConstantFP(Point5Pred, DL, VT));
+
+ // Truncate to integer and convert back to fp.
+ MVT IntVT = VT.changeVectorElementTypeToInteger();
+ SDValue Truncated = DAG.getNode(ISD::FP_TO_SINT, DL, IntVT, Adjust);
+ Truncated = DAG.getNode(ISD::SINT_TO_FP, DL, VT, Truncated);
+
+ // Restore the original sign.
+ Truncated = DAG.getNode(ISD::FCOPYSIGN, DL, VT, Truncated, Src);
+
+ // Determine the largest integer that can be represented exactly. This and
+ // values larger than it don't have any fractional bits so don't need to
+ // be converted.
+ unsigned Precision = APFloat::semanticsPrecision(FltSem);
+ APFloat MaxVal = APFloat(FltSem);
+ MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
+ /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
+ SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
+
+ // If abs(Src) was larger than MaxVal or nan, keep it.
+ MVT SetccVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
+ SDValue Setcc = DAG.getSetCC(DL, SetccVT, Abs, MaxValNode, ISD::SETOLT);
+ return DAG.getSelect(DL, VT, Setcc, Truncated, Src);
}
struct VIDSequence {
@@ -1908,37 +1897,27 @@ static Optional<VIDSequence> isSimpleVIDSequence(SDValue Op) {
// A zero-value value difference means that we're somewhere in the middle
// of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
// step change before evaluating the sequence.
- if (ValDiff != 0) {
- int64_t Remainder = ValDiff % IdxDiff;
- // Normalize the step if it's greater than 1.
- if (Remainder != ValDiff) {
- // The difference must cleanly divide the element span.
- if (Remainder != 0)
- return None;
- ValDiff /= IdxDiff;
- IdxDiff = 1;
- }
-
- if (!SeqStepNum)
- SeqStepNum = ValDiff;
- else if (ValDiff != SeqStepNum)
- return None;
+ if (ValDiff == 0)
+ continue;
- if (!SeqStepDenom)
- SeqStepDenom = IdxDiff;
- else if (IdxDiff != *SeqStepDenom)
+ int64_t Remainder = ValDiff % IdxDiff;
+ // Normalize the step if it's greater than 1.
+ if (Remainder != ValDiff) {
+ // The difference must cleanly divide the element span.
+ if (Remainder != 0)
return None;
+ ValDiff /= IdxDiff;
+ IdxDiff = 1;
}
- }
- // Record and/or check any addend.
- if (SeqStepNum && SeqStepDenom) {
- uint64_t ExpectedVal =
- (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
- int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits);
- if (!SeqAddend)
- SeqAddend = Addend;
- else if (SeqAddend != Addend)
+ if (!SeqStepNum)
+ SeqStepNum = ValDiff;
+ else if (ValDiff != SeqStepNum)
+ return None;
+
+ if (!SeqStepDenom)
+ SeqStepDenom = IdxDiff;
+ else if (IdxDiff != *SeqStepDenom)
return None;
}
@@ -1946,14 +1925,68 @@ static Optional<VIDSequence> isSimpleVIDSequence(SDValue Op) {
if (!PrevElt || PrevElt->first != Val)
PrevElt = std::make_pair(Val, Idx);
}
- // We need to have logged both a step and an addend for this to count as
- // a legal index sequence.
- if (!SeqStepNum || !SeqStepDenom || !SeqAddend)
+
+ // We need to have logged a step for this to count as a legal index sequence.
+ if (!SeqStepNum || !SeqStepDenom)
return None;
+ // Loop back through the sequence and validate elements we might have skipped
+ // while waiting for a valid step. While doing this, log any sequence addend.
+ for (unsigned Idx = 0; Idx < NumElts; Idx++) {
+ if (Op.getOperand(Idx).isUndef())
+ continue;
+ uint64_t Val = Op.getConstantOperandVal(Idx) &
+ maskTrailingOnes<uint64_t>(EltSizeInBits);
+ uint64_t ExpectedVal =
+ (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
+ int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits);
+ if (!SeqAddend)
+ SeqAddend = Addend;
+ else if (Addend != SeqAddend)
+ return None;
+ }
+
+ assert(SeqAddend && "Must have an addend if we have a step");
+
return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
}
+// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
+// and lower it as a VRGATHER_VX_VL from the source vector.
+static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
+ SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return SDValue();
+ SDValue Vec = SplatVal.getOperand(0);
+ // Only perform this optimization on vectors of the same size for simplicity.
+ // Don't perform this optimization for i1 vectors.
+ // FIXME: Support i1 vectors, maybe by promoting to i8?
+ if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1)
+ return SDValue();
+ SDValue Idx = SplatVal.getOperand(1);
+ // The index must be a legal type.
+ if (Idx.getValueType() != Subtarget.getXLenVT())
+ return SDValue();
+
+ MVT ContainerVT = VT;
+ if (VT.isFixedLengthVector()) {
+ ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
+ Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
+ }
+
+ SDValue Mask, VL;
+ std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
+
+ SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
+ Idx, Mask, DAG.getUNDEF(ContainerVT), VL);
+
+ if (!VT.isFixedLengthVector())
+ return Gather;
+
+ return convertFromScalableVector(VT, Gather, DAG, Subtarget);
+}
+
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
MVT VT = Op.getSimpleValueType();
@@ -1989,8 +2022,7 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// codegen across RV32 and RV64.
unsigned NumViaIntegerBits =
std::min(std::max(NumElts, 8u), Subtarget.getXLen());
- NumViaIntegerBits = std::min(NumViaIntegerBits,
- Subtarget.getMaxELENForFixedLengthVectors());
+ NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELEN());
if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
// If we have to use more than one INSERT_VECTOR_ELT then this
// optimization is likely to increase code size; avoid peforming it in
@@ -2012,7 +2044,7 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// our vector and clear our accumulated data.
if (I != 0 && I % NumViaIntegerBits == 0) {
if (NumViaIntegerBits <= 32)
- Bits = SignExtend64(Bits, 32);
+ Bits = SignExtend64<32>(Bits);
SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec,
Elt, DAG.getConstant(IntegerEltIdx, DL, XLenVT));
@@ -2028,7 +2060,7 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// Insert the (remaining) scalar value into position in our integer
// vector type.
if (NumViaIntegerBits <= 32)
- Bits = SignExtend64(Bits, 32);
+ Bits = SignExtend64<32>(Bits);
SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, Elt,
DAG.getConstant(IntegerEltIdx, DL, XLenVT));
@@ -2077,9 +2109,12 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
}
if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
+ if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
+ return Gather;
unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
: RISCVISD::VMV_V_X_VL;
- Splat = DAG.getNode(Opc, DL, ContainerVT, Splat, VL);
+ Splat =
+ DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
return convertFromScalableVector(VT, Splat, DAG, Subtarget);
}
@@ -2109,7 +2144,8 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// a single addi instruction.
if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
(StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
- isPowerOf2_32(StepDenominator) && isInt<5>(Addend)) {
+ isPowerOf2_32(StepDenominator) &&
+ (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL);
// Convert right out of the scalable type so we can use standard ISD
// nodes for the rest of the computation. If we used scalable types with
@@ -2118,18 +2154,18 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
VID = convertFromScalableVector(VT, VID, DAG, Subtarget);
if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
(StepOpcode == ISD::SHL && SplatStepVal != 0)) {
- SDValue SplatStep = DAG.getSplatVector(
+ SDValue SplatStep = DAG.getSplatBuildVector(
VT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT));
VID = DAG.getNode(StepOpcode, DL, VT, VID, SplatStep);
}
if (StepDenominator != 1) {
- SDValue SplatStep = DAG.getSplatVector(
+ SDValue SplatStep = DAG.getSplatBuildVector(
VT, DL, DAG.getConstant(Log2_64(StepDenominator), DL, XLenVT));
VID = DAG.getNode(ISD::SRL, DL, VT, VID, SplatStep);
}
if (Addend != 0 || Negate) {
- SDValue SplatAddend =
- DAG.getSplatVector(VT, DL, DAG.getConstant(Addend, DL, XLenVT));
+ SDValue SplatAddend = DAG.getSplatBuildVector(
+ VT, DL, DAG.getConstant(Addend, DL, XLenVT));
VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VT, SplatAddend, VID);
}
return VID;
@@ -2172,7 +2208,7 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// On RV64, sign-extend from 32 to 64 bits where possible in order to
// achieve better constant materializion.
if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
- SplatValue = SignExtend64(SplatValue, 32);
+ SplatValue = SignExtend64<32>(SplatValue);
// Since we can't introduce illegal i64 types at this stage, we can only
// perform an i64 splat on RV32 if it is its own sign-extended value. That
@@ -2187,6 +2223,7 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
SDValue Splat =
DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
+ DAG.getUNDEF(ViaContainerVT),
DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
return DAG.getBitcast(VT, Splat);
@@ -2274,57 +2311,66 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
return SDValue();
}
-static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Lo,
- SDValue Hi, SDValue VL, SelectionDAG &DAG) {
+static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
+ SDValue Lo, SDValue Hi, SDValue VL,
+ SelectionDAG &DAG) {
+ if (!Passthru)
+ Passthru = DAG.getUNDEF(VT);
if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
// If Hi constant is all the same sign bit as Lo, lower this as a custom
// node in order to try and match RVV vector/scalar instructions.
if ((LoC >> 31) == HiC)
- return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Lo, VL);
+ return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
- // If vl is equal to VLMax and Hi constant is equal to Lo, we could use
+ // If vl is equal to XLEN_MAX and Hi constant is equal to Lo, we could use
// vmv.v.x whose EEW = 32 to lower it.
auto *Const = dyn_cast<ConstantSDNode>(VL);
- if (LoC == HiC && Const && Const->getSExtValue() == RISCV::VLMaxSentinel) {
+ if (LoC == HiC && Const && Const->isAllOnesValue()) {
MVT InterVT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
// TODO: if vl <= min(VLMAX), we can also do this. But we could not
// access the subtarget here now.
- auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT, Lo, VL);
+ auto InterVec = DAG.getNode(
+ RISCVISD::VMV_V_X_VL, DL, InterVT, DAG.getUNDEF(InterVT), Lo,
+ DAG.getRegister(RISCV::X0, MVT::i32));
return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
}
}
// Fall back to a stack store and stride x0 vector load.
- return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Lo, Hi, VL);
+ return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
+ Hi, VL);
}
// Called by type legalization to handle splat of i64 on RV32.
// FIXME: We can optimize this when the type has sign or zero bits in one
// of the halves.
-static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Scalar,
- SDValue VL, SelectionDAG &DAG) {
+static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
+ SDValue Scalar, SDValue VL,
+ SelectionDAG &DAG) {
assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
DAG.getConstant(0, DL, MVT::i32));
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
DAG.getConstant(1, DL, MVT::i32));
- return splatPartsI64WithVL(DL, VT, Lo, Hi, VL, DAG);
+ return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
}
// This function lowers a splat of a scalar operand Splat with the vector
// length VL. It ensures the final sequence is type legal, which is useful when
// lowering a splat after type legalization.
-static SDValue lowerScalarSplat(SDValue Scalar, SDValue VL, MVT VT, SDLoc DL,
- SelectionDAG &DAG,
+static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
+ MVT VT, SDLoc DL, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
+ bool HasPassthru = Passthru && !Passthru.isUndef();
+ if (!HasPassthru && !Passthru)
+ Passthru = DAG.getUNDEF(VT);
if (VT.isFloatingPoint()) {
// If VL is 1, we could use vfmv.s.f.
if (isOneConstant(VL))
- return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, DAG.getUNDEF(VT),
- Scalar, VL);
- return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Scalar, VL);
+ return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
+ return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
}
MVT XLenVT = Subtarget.getXLenVT();
@@ -2343,55 +2389,25 @@ static SDValue lowerScalarSplat(SDValue Scalar, SDValue VL, MVT VT, SDLoc DL,
// use vmv.s.x.
if (isOneConstant(VL) &&
(!Const || isNullConstant(Scalar) || !isInt<5>(Const->getSExtValue())))
- return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
- VL);
- return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Scalar, VL);
+ return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
+ return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
}
assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
"Unexpected scalar for splat lowering!");
if (isOneConstant(VL) && isNullConstant(Scalar))
- return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT),
+ return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
DAG.getConstant(0, DL, XLenVT), VL);
// Otherwise use the more complicated splatting algorithm.
- return splatSplitI64WithVL(DL, VT, Scalar, VL, DAG);
-}
-
-// Is the mask a slidedown that shifts in undefs.
-static int matchShuffleAsSlideDown(ArrayRef<int> Mask) {
- int Size = Mask.size();
-
- // Elements shifted in should be undef.
- auto CheckUndefs = [&](int Shift) {
- for (int i = Size - Shift; i != Size; ++i)
- if (Mask[i] >= 0)
- return false;
- return true;
- };
-
- // Elements should be shifted or undef.
- auto MatchShift = [&](int Shift) {
- for (int i = 0; i != Size - Shift; ++i)
- if (Mask[i] >= 0 && Mask[i] != Shift + i)
- return false;
- return true;
- };
-
- // Try all possible shifts.
- for (int Shift = 1; Shift != Size; ++Shift)
- if (CheckUndefs(Shift) && MatchShift(Shift))
- return Shift;
-
- // No match.
- return -1;
+ return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
}
static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, bool &SwapSources,
const RISCVSubtarget &Subtarget) {
// We need to be able to widen elements to the next larger integer type.
- if (VT.getScalarSizeInBits() >= Subtarget.getMaxELENForFixedLengthVectors())
+ if (VT.getScalarSizeInBits() >= Subtarget.getELEN())
return false;
int Size = Mask.size();
@@ -2430,6 +2446,79 @@ static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, bool &SwapSources,
return true;
}
+/// Match shuffles that concatenate two vectors, rotate the concatenation,
+/// and then extract the original number of elements from the rotated result.
+/// This is equivalent to vector.splice or X86's PALIGNR instruction. The
+/// returned rotation amount is for a rotate right, where elements move from
+/// higher elements to lower elements. \p LoSrc indicates the first source
+/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
+/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
+/// 0 or 1 if a rotation is found.
+///
+/// NOTE: We talk about rotate to the right which matches how bit shift and
+/// rotate instructions are described where LSBs are on the right, but LLVM IR
+/// and the table below write vectors with the lowest elements on the left.
+static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
+ int Size = Mask.size();
+
+ // We need to detect various ways of spelling a rotation:
+ // [11, 12, 13, 14, 15, 0, 1, 2]
+ // [-1, 12, 13, 14, -1, -1, 1, -1]
+ // [-1, -1, -1, -1, -1, -1, 1, 2]
+ // [ 3, 4, 5, 6, 7, 8, 9, 10]
+ // [-1, 4, 5, 6, -1, -1, 9, -1]
+ // [-1, 4, 5, 6, -1, -1, -1, -1]
+ int Rotation = 0;
+ LoSrc = -1;
+ HiSrc = -1;
+ for (int i = 0; i != Size; ++i) {
+ int M = Mask[i];
+ if (M < 0)
+ continue;
+
+ // Determine where a rotate vector would have started.
+ int StartIdx = i - (M % Size);
+ // The identity rotation isn't interesting, stop.
+ if (StartIdx == 0)
+ return -1;
+
+ // If we found the tail of a vector the rotation must be the missing
+ // front. If we found the head of a vector, it must be how much of the
+ // head.
+ int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
+
+ if (Rotation == 0)
+ Rotation = CandidateRotation;
+ else if (Rotation != CandidateRotation)
+ // The rotations don't match, so we can't match this mask.
+ return -1;
+
+ // Compute which value this mask is pointing at.
+ int MaskSrc = M < Size ? 0 : 1;
+
+ // Compute which of the two target values this index should be assigned to.
+ // This reflects whether the high elements are remaining or the low elemnts
+ // are remaining.
+ int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
+
+ // Either set up this value if we've not encountered it before, or check
+ // that it remains consistent.
+ if (TargetSrc < 0)
+ TargetSrc = MaskSrc;
+ else if (TargetSrc != MaskSrc)
+ // This may be a rotation, but it pulls from the inputs in some
+ // unsupported interleaving.
+ return -1;
+ }
+
+ // Check that we successfully analyzed the mask, and normalize the results.
+ assert(Rotation != 0 && "Failed to locate a viable rotation!");
+ assert((LoSrc >= 0 || HiSrc >= 0) &&
+ "Failed to find a rotated input vector!");
+
+ return Rotation;
+}
+
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
SDValue V1 = Op.getOperand(0);
@@ -2506,33 +2595,59 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
unsigned Opc =
VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
- SDValue Splat = DAG.getNode(Opc, DL, ContainerVT, V, VL);
+ SDValue Splat =
+ DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), V, VL);
return convertFromScalableVector(VT, Splat, DAG, Subtarget);
}
V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
assert(Lane < (int)NumElts && "Unexpected lane!");
- SDValue Gather =
- DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, V1,
- DAG.getConstant(Lane, DL, XLenVT), TrueMask, VL);
+ SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
+ V1, DAG.getConstant(Lane, DL, XLenVT),
+ TrueMask, DAG.getUNDEF(ContainerVT), VL);
return convertFromScalableVector(VT, Gather, DAG, Subtarget);
}
}
ArrayRef<int> Mask = SVN->getMask();
- // Try to match as a slidedown.
- int SlideAmt = matchShuffleAsSlideDown(Mask);
- if (SlideAmt >= 0) {
- // TODO: Should we reduce the VL to account for the upper undef elements?
- // Requires additional vsetvlis, but might be faster to execute.
- V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
- SDValue SlideDown =
- DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
- DAG.getUNDEF(ContainerVT), V1,
- DAG.getConstant(SlideAmt, DL, XLenVT),
- TrueMask, VL);
- return convertFromScalableVector(VT, SlideDown, DAG, Subtarget);
+ // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
+ // be undef which can be handled with a single SLIDEDOWN/UP.
+ int LoSrc, HiSrc;
+ int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
+ if (Rotation > 0) {
+ SDValue LoV, HiV;
+ if (LoSrc >= 0) {
+ LoV = LoSrc == 0 ? V1 : V2;
+ LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);
+ }
+ if (HiSrc >= 0) {
+ HiV = HiSrc == 0 ? V1 : V2;
+ HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);
+ }
+
+ // We found a rotation. We need to slide HiV down by Rotation. Then we need
+ // to slide LoV up by (NumElts - Rotation).
+ unsigned InvRotate = NumElts - Rotation;
+
+ SDValue Res = DAG.getUNDEF(ContainerVT);
+ if (HiV) {
+ // If we are doing a SLIDEDOWN+SLIDEUP, reduce the VL for the SLIDEDOWN.
+ // FIXME: If we are only doing a SLIDEDOWN, don't reduce the VL as it
+ // causes multiple vsetvlis in some test cases such as lowering
+ // reduce.mul
+ SDValue DownVL = VL;
+ if (LoV)
+ DownVL = DAG.getConstant(InvRotate, DL, XLenVT);
+ Res =
+ DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, Res, HiV,
+ DAG.getConstant(Rotation, DL, XLenVT), TrueMask, DownVL);
+ }
+ if (LoV)
+ Res = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Res, LoV,
+ DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL);
+
+ return convertFromScalableVector(VT, Res, DAG, Subtarget);
}
// Detect an interleave shuffle and lower to
@@ -2576,18 +2691,17 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
// Freeze V2 since we use it twice and we need to be sure that the add and
// multiply see the same value.
- V2 = DAG.getNode(ISD::FREEZE, DL, IntHalfVT, V2);
+ V2 = DAG.getFreeze(V2);
// Recreate TrueMask using the widened type's element count.
- MVT MaskVT =
- MVT::getVectorVT(MVT::i1, HalfContainerVT.getVectorElementCount());
- TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
+ TrueMask = getAllOnesMask(HalfContainerVT, VL, DL, DAG);
// Widen V1 and V2 with 0s and add one copy of V2 to V1.
SDValue Add = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideIntContainerVT, V1,
V2, TrueMask, VL);
// Create 2^eltbits - 1 copies of V2 by multiplying by the largest integer.
SDValue Multiplier = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntHalfVT,
+ DAG.getUNDEF(IntHalfVT),
DAG.getAllOnesConstant(DL, XLenVT));
SDValue WidenMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideIntContainerVT,
V2, Multiplier, TrueMask, VL);
@@ -2691,7 +2805,8 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
// TODO: This doesn't trigger for i64 vectors on RV32, since there we
// encounter a bitcasted BUILD_VECTOR with low/high i32 values.
if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) {
- Gather = lowerScalarSplat(SplatValue, VL, ContainerVT, DL, DAG, Subtarget);
+ Gather = lowerScalarSplat(SDValue(), SplatValue, VL, ContainerVT, DL, DAG,
+ Subtarget);
} else {
V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
// If only one index is used, we can use a "splat" vrgather.
@@ -2699,16 +2814,16 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
// that's beneficial.
if (LHSIndexCounts.size() == 1) {
int SplatIndex = LHSIndexCounts.begin()->getFirst();
- Gather =
- DAG.getNode(GatherVXOpc, DL, ContainerVT, V1,
- DAG.getConstant(SplatIndex, DL, XLenVT), TrueMask, VL);
+ Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V1,
+ DAG.getConstant(SplatIndex, DL, XLenVT), TrueMask,
+ DAG.getUNDEF(ContainerVT), VL);
} else {
SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
LHSIndices =
convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
- TrueMask, VL);
+ TrueMask, DAG.getUNDEF(ContainerVT), VL);
}
}
@@ -2716,45 +2831,46 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
// additional vrgather.
if (!V2.isUndef()) {
V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
+
+ MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
+ SelectMask =
+ convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget);
+
// If only one index is used, we can use a "splat" vrgather.
// TODO: We can splat the most-common index and fix-up any stragglers, if
// that's beneficial.
if (RHSIndexCounts.size() == 1) {
int SplatIndex = RHSIndexCounts.begin()->getFirst();
- V2 = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2,
- DAG.getConstant(SplatIndex, DL, XLenVT), TrueMask, VL);
+ Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2,
+ DAG.getConstant(SplatIndex, DL, XLenVT), SelectMask,
+ Gather, VL);
} else {
SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS);
RHSIndices =
convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget);
- V2 = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, TrueMask,
- VL);
+ Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices,
+ SelectMask, Gather, VL);
}
-
- MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
- SelectMask =
- convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget);
-
- Gather = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, SelectMask, V2,
- Gather, VL);
}
return convertFromScalableVector(VT, Gather, DAG, Subtarget);
}
-static SDValue getRVVFPExtendOrRound(SDValue Op, MVT VT, MVT ContainerVT,
- SDLoc DL, SelectionDAG &DAG,
- const RISCVSubtarget &Subtarget) {
- if (VT.isScalableVector())
- return DAG.getFPExtendOrRound(Op, DL, VT);
- assert(VT.isFixedLengthVector() &&
- "Unexpected value type for RVV FP extend/round lowering");
- SDValue Mask, VL;
- std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
- unsigned RVVOpc = ContainerVT.bitsGT(Op.getSimpleValueType())
- ? RISCVISD::FP_EXTEND_VL
- : RISCVISD::FP_ROUND_VL;
- return DAG.getNode(RVVOpc, DL, ContainerVT, Op, Mask, VL);
+bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
+ // Support splats for any type. These should type legalize well.
+ if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
+ return true;
+
+ // Only support legal VTs for other shuffles for now.
+ if (!isTypeLegal(VT))
+ return false;
+
+ MVT SVT = VT.getSimpleVT();
+
+ bool SwapSources;
+ int LoSrc, HiSrc;
+ return (isElementRotate(LoSrc, HiSrc, M) > 0) ||
+ isInterleaveShuffle(M, SVT, SwapSources, Subtarget);
}
// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
@@ -2868,6 +2984,32 @@ SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
Store->getMemOperand()->getFlags());
}
+static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
+
+ int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
+
+ // All simm32 constants should be handled by isel.
+ // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
+ // this check redundant, but small immediates are common so this check
+ // should have better compile time.
+ if (isInt<32>(Imm))
+ return Op;
+
+ // We only need to cost the immediate, if constant pool lowering is enabled.
+ if (!Subtarget.useConstantPoolForLargeInts())
+ return Op;
+
+ RISCVMatInt::InstSeq Seq =
+ RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits());
+ if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
+ return Op;
+
+ // Expand to a constant pool using the default expansion code.
+ return SDValue();
+}
+
SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
@@ -2883,6 +3025,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerJumpTable(Op, DAG);
case ISD::GlobalTLSAddress:
return lowerGlobalTLSAddress(Op, DAG);
+ case ISD::Constant:
+ return lowerConstant(Op, DAG, Subtarget);
case ISD::SELECT:
return lowerSELECT(Op, DAG);
case ISD::BRCOND:
@@ -2905,6 +3049,30 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
SDValue Op0 = Op.getOperand(0);
EVT Op0VT = Op0.getValueType();
MVT XLenVT = Subtarget.getXLenVT();
+ if (VT == MVT::f16 && Op0VT == MVT::i16 && Subtarget.hasStdExtZfh()) {
+ SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
+ SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
+ return FPConv;
+ }
+ if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
+ Subtarget.hasStdExtF()) {
+ SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
+ SDValue FPConv =
+ DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
+ return FPConv;
+ }
+
+ // Consider other scalar<->scalar casts as legal if the types are legal.
+ // Otherwise expand them.
+ if (!VT.isVector() && !Op0VT.isVector()) {
+ if (isTypeLegal(VT) && isTypeLegal(Op0VT))
+ return Op;
+ return SDValue();
+ }
+
+ assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
+ "Unexpected types");
+
if (VT.isFixedLengthVector()) {
// We can handle fixed length vector bitcasts with a simple replacement
// in isel.
@@ -2934,18 +3102,6 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
DAG.getConstant(0, DL, XLenVT));
}
- if (VT == MVT::f16 && Op0VT == MVT::i16 && Subtarget.hasStdExtZfh()) {
- SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
- SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
- return FPConv;
- }
- if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
- Subtarget.hasStdExtF()) {
- SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
- SDValue FPConv =
- DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
- return FPConv;
- }
return SDValue();
}
case ISD::INTRINSIC_WO_CHAIN:
@@ -3002,55 +3158,11 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
}
return DAG.getNode(Opc, DL, VT, Op0, Op1, ShAmt);
}
- case ISD::TRUNCATE: {
- SDLoc DL(Op);
- MVT VT = Op.getSimpleValueType();
+ case ISD::TRUNCATE:
// Only custom-lower vector truncates
- if (!VT.isVector())
+ if (!Op.getSimpleValueType().isVector())
return Op;
-
- // Truncates to mask types are handled differently
- if (VT.getVectorElementType() == MVT::i1)
- return lowerVectorMaskTrunc(Op, DAG);
-
- // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
- // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
- // truncate by one power of two at a time.
- MVT DstEltVT = VT.getVectorElementType();
-
- SDValue Src = Op.getOperand(0);
- MVT SrcVT = Src.getSimpleValueType();
- MVT SrcEltVT = SrcVT.getVectorElementType();
-
- assert(DstEltVT.bitsLT(SrcEltVT) &&
- isPowerOf2_64(DstEltVT.getSizeInBits()) &&
- isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
- "Unexpected vector truncate lowering");
-
- MVT ContainerVT = SrcVT;
- if (SrcVT.isFixedLengthVector()) {
- ContainerVT = getContainerForFixedLengthVector(SrcVT);
- Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
- }
-
- SDValue Result = Src;
- SDValue Mask, VL;
- std::tie(Mask, VL) =
- getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
- LLVMContext &Context = *DAG.getContext();
- const ElementCount Count = ContainerVT.getVectorElementCount();
- do {
- SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
- EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
- Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
- Mask, VL);
- } while (SrcEltVT != DstEltVT);
-
- if (SrcVT.isFixedLengthVector())
- Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
-
- return Result;
- }
+ return lowerVectorTruncLike(Op, DAG);
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND:
if (Op.getOperand(0).getValueType().isVector() &&
@@ -3076,28 +3188,26 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
// minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
// vscale as VLENB / 8.
static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
- if (Subtarget.getMinVLen() < RISCV::RVVBitsPerBlock)
+ if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
report_fatal_error("Support for VLEN==32 is incomplete.");
- if (isa<ConstantSDNode>(Op.getOperand(0))) {
- // We assume VLENB is a multiple of 8. We manually choose the best shift
- // here because SimplifyDemandedBits isn't always able to simplify it.
- uint64_t Val = Op.getConstantOperandVal(0);
- if (isPowerOf2_64(Val)) {
- uint64_t Log2 = Log2_64(Val);
- if (Log2 < 3)
- return DAG.getNode(ISD::SRL, DL, VT, VLENB,
- DAG.getConstant(3 - Log2, DL, VT));
- if (Log2 > 3)
- return DAG.getNode(ISD::SHL, DL, VT, VLENB,
- DAG.getConstant(Log2 - 3, DL, VT));
- return VLENB;
- }
- // If the multiplier is a multiple of 8, scale it down to avoid needing
- // to shift the VLENB value.
- if ((Val % 8) == 0)
- return DAG.getNode(ISD::MUL, DL, VT, VLENB,
- DAG.getConstant(Val / 8, DL, VT));
- }
+ // We assume VLENB is a multiple of 8. We manually choose the best shift
+ // here because SimplifyDemandedBits isn't always able to simplify it.
+ uint64_t Val = Op.getConstantOperandVal(0);
+ if (isPowerOf2_64(Val)) {
+ uint64_t Log2 = Log2_64(Val);
+ if (Log2 < 3)
+ return DAG.getNode(ISD::SRL, DL, VT, VLENB,
+ DAG.getConstant(3 - Log2, DL, VT));
+ if (Log2 > 3)
+ return DAG.getNode(ISD::SHL, DL, VT, VLENB,
+ DAG.getConstant(Log2 - 3, DL, VT));
+ return VLENB;
+ }
+ // If the multiplier is a multiple of 8, scale it down to avoid needing
+ // to shift the VLENB value.
+ if ((Val % 8) == 0)
+ return DAG.getNode(ISD::MUL, DL, VT, VLENB,
+ DAG.getConstant(Val / 8, DL, VT));
SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
DAG.getConstant(3, DL, VT));
@@ -3117,88 +3227,11 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
}
return SDValue();
}
- case ISD::FP_EXTEND: {
- // RVV can only do fp_extend to types double the size as the source. We
- // custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going
- // via f32.
- SDLoc DL(Op);
- MVT VT = Op.getSimpleValueType();
- SDValue Src = Op.getOperand(0);
- MVT SrcVT = Src.getSimpleValueType();
-
- // Prepare any fixed-length vector operands.
- MVT ContainerVT = VT;
- if (SrcVT.isFixedLengthVector()) {
- ContainerVT = getContainerForFixedLengthVector(VT);
- MVT SrcContainerVT =
- ContainerVT.changeVectorElementType(SrcVT.getVectorElementType());
- Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
- }
-
- if (!VT.isVector() || VT.getVectorElementType() != MVT::f64 ||
- SrcVT.getVectorElementType() != MVT::f16) {
- // For scalable vectors, we only need to close the gap between
- // vXf16->vXf64.
- if (!VT.isFixedLengthVector())
- return Op;
- // For fixed-length vectors, lower the FP_EXTEND to a custom "VL" version.
- Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
- return convertFromScalableVector(VT, Src, DAG, Subtarget);
- }
-
- MVT InterVT = VT.changeVectorElementType(MVT::f32);
- MVT InterContainerVT = ContainerVT.changeVectorElementType(MVT::f32);
- SDValue IntermediateExtend = getRVVFPExtendOrRound(
- Src, InterVT, InterContainerVT, DL, DAG, Subtarget);
-
- SDValue Extend = getRVVFPExtendOrRound(IntermediateExtend, VT, ContainerVT,
- DL, DAG, Subtarget);
- if (VT.isFixedLengthVector())
- return convertFromScalableVector(VT, Extend, DAG, Subtarget);
- return Extend;
- }
- case ISD::FP_ROUND: {
- // RVV can only do fp_round to types half the size as the source. We
- // custom-lower f64->f16 rounds via RVV's round-to-odd float
- // conversion instruction.
- SDLoc DL(Op);
- MVT VT = Op.getSimpleValueType();
- SDValue Src = Op.getOperand(0);
- MVT SrcVT = Src.getSimpleValueType();
-
- // Prepare any fixed-length vector operands.
- MVT ContainerVT = VT;
- if (VT.isFixedLengthVector()) {
- MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
- ContainerVT =
- SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
- Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
- }
-
- if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 ||
- SrcVT.getVectorElementType() != MVT::f64) {
- // For scalable vectors, we only need to close the gap between
- // vXf64<->vXf16.
- if (!VT.isFixedLengthVector())
- return Op;
- // For fixed-length vectors, lower the FP_ROUND to a custom "VL" version.
- Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
- return convertFromScalableVector(VT, Src, DAG, Subtarget);
- }
-
- SDValue Mask, VL;
- std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
-
- MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
- SDValue IntermediateRound =
- DAG.getNode(RISCVISD::VFNCVT_ROD_VL, DL, InterVT, Src, Mask, VL);
- SDValue Round = getRVVFPExtendOrRound(IntermediateRound, VT, ContainerVT,
- DL, DAG, Subtarget);
-
- if (VT.isFixedLengthVector())
- return convertFromScalableVector(VT, Round, DAG, Subtarget);
- return Round;
- }
+ case ISD::FP_EXTEND:
+ case ISD::FP_ROUND:
+ if (!Op.getValueType().isVector())
+ return Op;
+ return lowerVectorFPExtendOrRoundLike(Op, DAG);
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::SINT_TO_FP:
@@ -3221,10 +3254,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
bool IsInt2FP = SrcEltVT.isInteger();
// Widening conversions
- if (EltSize > SrcEltSize && (EltSize / SrcEltSize >= 4)) {
+ if (EltSize > (2 * SrcEltSize)) {
if (IsInt2FP) {
// Do a regular integer sign/zero extension then convert to float.
- MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltVT.getSizeInBits()),
+ MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize),
VT.getVectorElementCount());
unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP
? ISD::ZERO_EXTEND
@@ -3242,7 +3275,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
}
// Narrowing conversions
- if (SrcEltSize > EltSize && (SrcEltSize / EltSize >= 4)) {
+ if (SrcEltSize > (2 * EltSize)) {
if (IsInt2FP) {
// One narrowing int_to_fp, then an fp_round.
assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
@@ -3253,9 +3286,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
// FP2Int
// One narrowing fp_to_int, then truncate the integer. If the float isn't
// representable by the integer, the result is poison.
- MVT IVecVT =
- MVT::getVectorVT(MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2),
- VT.getVectorElementCount());
+ MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
+ VT.getVectorElementCount());
SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
}
@@ -3309,6 +3341,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::FCEIL:
case ISD::FFLOOR:
return lowerFTRUNC_FCEIL_FFLOOR(Op, DAG);
+ case ISD::FROUND:
+ return lowerFROUND(Op, DAG);
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_SMAX:
@@ -3350,12 +3384,14 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerSTEP_VECTOR(Op, DAG);
case ISD::VECTOR_REVERSE:
return lowerVECTOR_REVERSE(Op, DAG);
+ case ISD::VECTOR_SPLICE:
+ return lowerVECTOR_SPLICE(Op, DAG);
case ISD::BUILD_VECTOR:
return lowerBUILD_VECTOR(Op, DAG, Subtarget);
case ISD::SPLAT_VECTOR:
if (Op.getValueType().getVectorElementType() == MVT::i1)
return lowerVectorMaskSplat(Op, DAG);
- return lowerSPLAT_VECTOR(Op, DAG, Subtarget);
+ return SDValue();
case ISD::VECTOR_SHUFFLE:
return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
case ISD::CONCAT_VECTORS: {
@@ -3455,7 +3491,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::FSQRT:
return lowerToScalableOp(Op, DAG, RISCVISD::FSQRT_VL);
case ISD::FMA:
- return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL);
+ return lowerToScalableOp(Op, DAG, RISCVISD::VFMADD_VL);
case ISD::SMIN:
return lowerToScalableOp(Op, DAG, RISCVISD::SMIN_VL);
case ISD::SMAX:
@@ -3487,6 +3523,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerGET_ROUNDING(Op, DAG);
case ISD::SET_ROUNDING:
return lowerSET_ROUNDING(Op, DAG);
+ case ISD::EH_DWARF_CFA:
+ return lowerEH_DWARF_CFA(Op, DAG);
case ISD::VP_SELECT:
return lowerVPOp(Op, DAG, RISCVISD::VSELECT_VL);
case ISD::VP_MERGE:
@@ -3525,6 +3563,35 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerVPOp(Op, DAG, RISCVISD::FMUL_VL);
case ISD::VP_FDIV:
return lowerVPOp(Op, DAG, RISCVISD::FDIV_VL);
+ case ISD::VP_FNEG:
+ return lowerVPOp(Op, DAG, RISCVISD::FNEG_VL);
+ case ISD::VP_FMA:
+ return lowerVPOp(Op, DAG, RISCVISD::VFMADD_VL);
+ case ISD::VP_SIGN_EXTEND:
+ case ISD::VP_ZERO_EXTEND:
+ if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
+ return lowerVPExtMaskOp(Op, DAG);
+ return lowerVPOp(Op, DAG,
+ Op.getOpcode() == ISD::VP_SIGN_EXTEND
+ ? RISCVISD::VSEXT_VL
+ : RISCVISD::VZEXT_VL);
+ case ISD::VP_TRUNCATE:
+ return lowerVectorTruncLike(Op, DAG);
+ case ISD::VP_FP_EXTEND:
+ case ISD::VP_FP_ROUND:
+ return lowerVectorFPExtendOrRoundLike(Op, DAG);
+ case ISD::VP_FPTOSI:
+ return lowerVPFPIntConvOp(Op, DAG, RISCVISD::FP_TO_SINT_VL);
+ case ISD::VP_FPTOUI:
+ return lowerVPFPIntConvOp(Op, DAG, RISCVISD::FP_TO_UINT_VL);
+ case ISD::VP_SITOFP:
+ return lowerVPFPIntConvOp(Op, DAG, RISCVISD::SINT_TO_FP_VL);
+ case ISD::VP_UITOFP:
+ return lowerVPFPIntConvOp(Op, DAG, RISCVISD::UINT_TO_FP_VL);
+ case ISD::VP_SETCC:
+ if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
+ return lowerVPSetCCMaskOp(Op, DAG);
+ return lowerVPOp(Op, DAG, RISCVISD::SETCC_VL);
}
}
@@ -3562,12 +3629,21 @@ SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
// Use PC-relative addressing to access the symbol. This generates the
// pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
// %pcrel_lo(auipc)).
- return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
+ return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
// Use PC-relative addressing to access the GOT for this symbol, then load
// the address from the GOT. This generates the pattern (PseudoLA sym),
// which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
- return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0);
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineMemOperand *MemOp = MF.getMachineMemOperand(
+ MachinePointerInfo::getGOT(MF),
+ MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOInvariant,
+ LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
+ SDValue Load =
+ DAG.getMemIntrinsicNode(RISCVISD::LA, DL, DAG.getVTList(Ty, MVT::Other),
+ {DAG.getEntryNode(), Addr}, Ty, MemOp);
+ return Load;
}
switch (getTargetMachine().getCodeModel()) {
@@ -3578,15 +3654,15 @@ SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
// address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
- SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
- return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0);
+ SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
+ return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
}
case CodeModel::Medium: {
// Generate a sequence for accessing addresses within any 2GiB range within
// the address space. This generates the pattern (PseudoLLA sym), which
// expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
- return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
+ return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
}
}
}
@@ -3594,23 +3670,12 @@ SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
- EVT Ty = Op.getValueType();
GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
- int64_t Offset = N->getOffset();
- MVT XLenVT = Subtarget.getXLenVT();
+ assert(N->getOffset() == 0 && "unexpected offset in global node");
const GlobalValue *GV = N->getGlobal();
bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
- SDValue Addr = getAddr(N, DAG, IsLocal);
-
- // In order to maximise the opportunity for common subexpression elimination,
- // emit a separate ADD node for the global address offset instead of folding
- // it in the global address node. Later peephole optimisations may choose to
- // fold it back in when profitable.
- if (Offset != 0)
- return DAG.getNode(ISD::ADD, DL, Ty, Addr,
- DAG.getConstant(Offset, DL, XLenVT));
- return Addr;
+ return getAddr(N, DAG, IsLocal);
}
SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
@@ -3648,8 +3713,15 @@ SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
// the pattern (PseudoLA_TLS_IE sym), which expands to
// (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
- SDValue Load =
- SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineMemOperand *MemOp = MF.getMachineMemOperand(
+ MachinePointerInfo::getGOT(MF),
+ MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOInvariant,
+ LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
+ SDValue Load = DAG.getMemIntrinsicNode(
+ RISCVISD::LA_TLS_IE, DL, DAG.getVTList(Ty, MVT::Other),
+ {DAG.getEntryNode(), Addr}, Ty, MemOp);
// Add the thread pointer.
SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
@@ -3667,12 +3739,11 @@ SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
SDValue AddrLo =
DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO);
- SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
+ SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
- SDValue MNAdd = SDValue(
- DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd),
- 0);
- return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0);
+ SDValue MNAdd =
+ DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
+ return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
}
SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
@@ -3686,8 +3757,7 @@ SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
// This generates the pattern (PseudoLA_TLS_GD sym), which expands to
// (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
- SDValue Load =
- SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
+ SDValue Load = DAG.getNode(RISCVISD::LA_TLS_GD, DL, Ty, Addr);
// Prepare argument list to generate call.
ArgListTy Args;
@@ -3710,10 +3780,8 @@ SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
- EVT Ty = Op.getValueType();
GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
- int64_t Offset = N->getOffset();
- MVT XLenVT = Subtarget.getXLenVT();
+ assert(N->getOffset() == 0 && "unexpected offset in global node");
TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
@@ -3735,13 +3803,6 @@ SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
break;
}
- // In order to maximise the opportunity for common subexpression elimination,
- // emit a separate ADD node for the global address offset instead of folding
- // it in the global address node. Later peephole optimisations may choose to
- // fold it back in when profitable.
- if (Offset != 0)
- return DAG.getNode(ISD::ADD, DL, Ty, Addr,
- DAG.getConstant(Offset, DL, XLenVT));
return Addr;
}
@@ -3911,7 +3972,7 @@ SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
// if Shamt-XLEN < 0: // Shamt < XLEN
// Lo = Lo << Shamt
- // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
+ // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 ^ Shamt))
// else:
// Lo = 0
// Hi = Lo << (Shamt-XLEN)
@@ -3921,7 +3982,7 @@ SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
- SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
+ SDValue XLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, XLenMinus1);
SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
@@ -3950,7 +4011,7 @@ SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
// SRA expansion:
// if Shamt-XLEN < 0: // Shamt < XLEN
- // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
+ // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ XLEN-1))
// Hi = Hi >>s Shamt
// else:
// Lo = Hi >>s (Shamt-XLEN);
@@ -3958,7 +4019,7 @@ SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
//
// SRL expansion:
// if Shamt-XLEN < 0: // Shamt < XLEN
- // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
+ // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ XLEN-1))
// Hi = Hi >>u Shamt
// else:
// Lo = Hi >>u (Shamt-XLEN);
@@ -3971,7 +4032,7 @@ SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
- SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
+ SDValue XLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, XLenMinus1);
SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
@@ -4022,7 +4083,7 @@ SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
// illegal (currently only vXi64 RV32).
// FIXME: We could also catch non-constant sign-extended i32 values and lower
-// them to SPLAT_VECTOR_I64
+// them to VMV_V_X_VL.
SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
@@ -4041,7 +4102,8 @@ SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
std::tie(Mask, VL) =
getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
- SDValue Res = splatPartsI64WithVL(DL, ContainerVT, Lo, Hi, VL, DAG);
+ SDValue Res =
+ splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
return convertFromScalableVector(VecVT, Res, DAG, Subtarget);
}
@@ -4051,18 +4113,21 @@ SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
// If Hi constant is all the same sign bit as Lo, lower this as a custom
// node in order to try and match RVV vector/scalar instructions.
if ((LoC >> 31) == HiC)
- return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
+ return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
+ Lo, DAG.getRegister(RISCV::X0, MVT::i32));
}
// Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
isa<ConstantSDNode>(Hi.getOperand(1)) &&
Hi.getConstantOperandVal(1) == 31)
- return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
+ return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT), Lo,
+ DAG.getRegister(RISCV::X0, MVT::i32));
// Fall back to use a stack store and stride x0 vector load. Use X0 as VL.
- return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VecVT, Lo, Hi,
- DAG.getTargetConstant(RISCV::VLMaxSentinel, DL, MVT::i64));
+ return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VecVT,
+ DAG.getUNDEF(VecVT), Lo, Hi,
+ DAG.getRegister(RISCV::X0, MVT::i32));
}
// Custom-lower extensions from mask vectors by using a vselect either with 1
@@ -4078,27 +4143,9 @@ SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
assert(Src.getValueType().isVector() &&
Src.getValueType().getVectorElementType() == MVT::i1);
- MVT XLenVT = Subtarget.getXLenVT();
- SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
- SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
-
if (VecVT.isScalableVector()) {
- // Be careful not to introduce illegal scalar types at this stage, and be
- // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is
- // illegal and must be expanded. Since we know that the constants are
- // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly.
- bool IsRV32E64 =
- !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64;
-
- if (!IsRV32E64) {
- SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero);
- SplatTrueVal = DAG.getSplatVector(VecVT, DL, SplatTrueVal);
- } else {
- SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero);
- SplatTrueVal =
- DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatTrueVal);
- }
-
+ SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
+ SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, VecVT);
return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
}
@@ -4111,9 +4158,14 @@ SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
SDValue Mask, VL;
std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
- SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero, VL);
- SplatTrueVal =
- DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatTrueVal, VL);
+ MVT XLenVT = Subtarget.getXLenVT();
+ SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
+ SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
+
+ SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
+ DAG.getUNDEF(ContainerVT), SplatZero, VL);
+ SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
+ DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC,
SplatTrueVal, SplatZero, VL);
@@ -4151,8 +4203,9 @@ SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
// Custom-lower truncations from vectors to mask vectors by using a mask and a
// setcc operation:
// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
-SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
+ SelectionDAG &DAG) const {
+ bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
SDLoc DL(Op);
EVT MaskVT = Op.getValueType();
// Only expect to custom-lower truncations to mask types
@@ -4160,34 +4213,176 @@ SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op,
"Unexpected type for vector mask lowering");
SDValue Src = Op.getOperand(0);
MVT VecVT = Src.getSimpleValueType();
-
+ SDValue Mask, VL;
+ if (IsVPTrunc) {
+ Mask = Op.getOperand(1);
+ VL = Op.getOperand(2);
+ }
// If this is a fixed vector, we need to convert it to a scalable vector.
MVT ContainerVT = VecVT;
+
if (VecVT.isFixedLengthVector()) {
ContainerVT = getContainerForFixedLengthVector(VecVT);
Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
+ if (IsVPTrunc) {
+ MVT MaskContainerVT =
+ getContainerForFixedLengthVector(Mask.getSimpleValueType());
+ Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
+ }
+ }
+
+ if (!IsVPTrunc) {
+ std::tie(Mask, VL) =
+ getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
}
SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
- SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatOne);
- SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero);
-
- if (VecVT.isScalableVector()) {
- SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne);
- return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE);
- }
-
- SDValue Mask, VL;
- std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
+ SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
+ DAG.getUNDEF(ContainerVT), SplatOne, VL);
+ SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
+ DAG.getUNDEF(ContainerVT), SplatZero, VL);
MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
SDValue Trunc =
DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne, Mask, VL);
Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, Trunc, SplatZero,
DAG.getCondCode(ISD::SETNE), Mask, VL);
- return convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
+ if (MaskVT.isFixedLengthVector())
+ Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
+ return Trunc;
+}
+
+SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
+ SelectionDAG &DAG) const {
+ bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
+ SDLoc DL(Op);
+
+ MVT VT = Op.getSimpleValueType();
+ // Only custom-lower vector truncates
+ assert(VT.isVector() && "Unexpected type for vector truncate lowering");
+
+ // Truncates to mask types are handled differently
+ if (VT.getVectorElementType() == MVT::i1)
+ return lowerVectorMaskTruncLike(Op, DAG);
+
+ // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
+ // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
+ // truncate by one power of two at a time.
+ MVT DstEltVT = VT.getVectorElementType();
+
+ SDValue Src = Op.getOperand(0);
+ MVT SrcVT = Src.getSimpleValueType();
+ MVT SrcEltVT = SrcVT.getVectorElementType();
+
+ assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
+ isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
+ "Unexpected vector truncate lowering");
+
+ MVT ContainerVT = SrcVT;
+ SDValue Mask, VL;
+ if (IsVPTrunc) {
+ Mask = Op.getOperand(1);
+ VL = Op.getOperand(2);
+ }
+ if (SrcVT.isFixedLengthVector()) {
+ ContainerVT = getContainerForFixedLengthVector(SrcVT);
+ Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
+ if (IsVPTrunc) {
+ MVT MaskVT = getMaskTypeFor(ContainerVT);
+ Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
+ }
+ }
+
+ SDValue Result = Src;
+ if (!IsVPTrunc) {
+ std::tie(Mask, VL) =
+ getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
+ }
+
+ LLVMContext &Context = *DAG.getContext();
+ const ElementCount Count = ContainerVT.getVectorElementCount();
+ do {
+ SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
+ EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
+ Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
+ Mask, VL);
+ } while (SrcEltVT != DstEltVT);
+
+ if (SrcVT.isFixedLengthVector())
+ Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
+
+ return Result;
+}
+
+SDValue
+RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
+ SelectionDAG &DAG) const {
+ bool IsVP =
+ Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
+ bool IsExtend =
+ Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
+ // RVV can only do truncate fp to types half the size as the source. We
+ // custom-lower f64->f16 rounds via RVV's round-to-odd float
+ // conversion instruction.
+ SDLoc DL(Op);
+ MVT VT = Op.getSimpleValueType();
+
+ assert(VT.isVector() && "Unexpected type for vector truncate lowering");
+
+ SDValue Src = Op.getOperand(0);
+ MVT SrcVT = Src.getSimpleValueType();
+
+ bool IsDirectExtend = IsExtend && (VT.getVectorElementType() != MVT::f64 ||
+ SrcVT.getVectorElementType() != MVT::f16);
+ bool IsDirectTrunc = !IsExtend && (VT.getVectorElementType() != MVT::f16 ||
+ SrcVT.getVectorElementType() != MVT::f64);
+
+ bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
+
+ // Prepare any fixed-length vector operands.
+ MVT ContainerVT = VT;
+ SDValue Mask, VL;
+ if (IsVP) {
+ Mask = Op.getOperand(1);
+ VL = Op.getOperand(2);
+ }
+ if (VT.isFixedLengthVector()) {
+ MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
+ ContainerVT =
+ SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
+ Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
+ if (IsVP) {
+ MVT MaskVT = getMaskTypeFor(ContainerVT);
+ Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
+ }
+ }
+
+ if (!IsVP)
+ std::tie(Mask, VL) =
+ getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
+
+ unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
+
+ if (IsDirectConv) {
+ Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
+ if (VT.isFixedLengthVector())
+ Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
+ return Src;
+ }
+
+ unsigned InterConvOpc =
+ IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::VFNCVT_ROD_VL;
+
+ MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
+ SDValue IntermediateConv =
+ DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
+ SDValue Result =
+ DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
+ if (VT.isFixedLengthVector())
+ return convertFromScalableVector(VT, Result, DAG, Subtarget);
+ return Result;
}
// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
@@ -4268,13 +4463,15 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
// Note: We can't pass a UNDEF to the first VSLIDE1UP_VL since an untied
// undef doesn't obey the earlyclobber constraint. Just splat a zero value.
- ValInVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, I32ContainerVT, Zero,
- InsertI64VL);
+ ValInVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, I32ContainerVT,
+ DAG.getUNDEF(I32ContainerVT), Zero, InsertI64VL);
// First slide in the hi value, then the lo in underneath it.
- ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec,
- ValHi, I32Mask, InsertI64VL);
- ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec,
- ValLo, I32Mask, InsertI64VL);
+ ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT,
+ DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
+ I32Mask, InsertI64VL);
+ ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT,
+ DAG.getUNDEF(I32ContainerVT), ValInVec, ValLo,
+ I32Mask, InsertI64VL);
// Bitcast back to the right container type.
ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
}
@@ -4310,7 +4507,7 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
unsigned WidenVecLen;
SDValue ExtractElementIdx;
SDValue ExtractBitIdx;
- unsigned MaxEEW = Subtarget.getMaxELENForFixedLengthVectors();
+ unsigned MaxEEW = Subtarget.getELEN();
MVT LargestEltVT = MVT::getIntegerVT(
std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
if (NumElts <= LargestEltVT.getSizeInBits()) {
@@ -4360,8 +4557,7 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
if (!isNullConstant(Idx)) {
// Use a VL of 1 to avoid processing more elements than we need.
SDValue VL = DAG.getConstant(1, DL, XLenVT);
- MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
- SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
+ SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
}
@@ -4378,8 +4574,8 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
// Some RVV intrinsics may claim that they want an integer operand to be
// promoted or expanded.
-static SDValue lowerVectorIntrinsicSplats(SDValue Op, SelectionDAG &DAG,
- const RISCVSubtarget &Subtarget) {
+static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
"Unexpected opcode");
@@ -4393,10 +4589,10 @@ static SDValue lowerVectorIntrinsicSplats(SDValue Op, SelectionDAG &DAG,
const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
- if (!II || !II->hasSplatOperand())
+ if (!II || !II->hasScalarOperand())
return SDValue();
- unsigned SplatOp = II->SplatOperand + 1 + HasChain;
+ unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
assert(SplatOp < Op.getNumOperands());
SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
@@ -4426,28 +4622,141 @@ static SDValue lowerVectorIntrinsicSplats(SDValue Op, SelectionDAG &DAG,
// that a widening operation never uses SEW=64.
// NOTE: If this fails the below assert, we can probably just find the
// element count from any operand or result and use it to construct the VT.
- assert(II->SplatOperand > 0 && "Unexpected splat operand!");
+ assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
// The more complex case is when the scalar is larger than XLenVT.
assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
- // If this is a sign-extended 32-bit constant, we can truncate it and rely
- // on the instruction to sign-extend since SEW>XLEN.
- if (auto *CVal = dyn_cast<ConstantSDNode>(ScalarOp)) {
- if (isInt<32>(CVal->getSExtValue())) {
- ScalarOp = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
- return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
+ // If this is a sign-extended 32-bit value, we can truncate it and rely on the
+ // instruction to sign-extend since SEW>XLEN.
+ if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
+ ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
+ return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
+ }
+
+ switch (IntNo) {
+ case Intrinsic::riscv_vslide1up:
+ case Intrinsic::riscv_vslide1down:
+ case Intrinsic::riscv_vslide1up_mask:
+ case Intrinsic::riscv_vslide1down_mask: {
+ // We need to special case these when the scalar is larger than XLen.
+ unsigned NumOps = Op.getNumOperands();
+ bool IsMasked = NumOps == 7;
+
+ // Convert the vector source to the equivalent nxvXi32 vector.
+ MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
+ SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
+
+ SDValue ScalarLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, ScalarOp,
+ DAG.getConstant(0, DL, XLenVT));
+ SDValue ScalarHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, ScalarOp,
+ DAG.getConstant(1, DL, XLenVT));
+
+ // Double the VL since we halved SEW.
+ SDValue AVL = getVLOperand(Op);
+ SDValue I32VL;
+
+ // Optimize for constant AVL
+ if (isa<ConstantSDNode>(AVL)) {
+ unsigned EltSize = VT.getScalarSizeInBits();
+ unsigned MinSize = VT.getSizeInBits().getKnownMinValue();
+
+ unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
+ unsigned MaxVLMAX =
+ RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
+
+ unsigned VectorBitsMin = Subtarget.getRealMinVLen();
+ unsigned MinVLMAX =
+ RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
+
+ uint64_t AVLInt = cast<ConstantSDNode>(AVL)->getZExtValue();
+ if (AVLInt <= MinVLMAX) {
+ I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
+ } else if (AVLInt >= 2 * MaxVLMAX) {
+ // Just set vl to VLMAX in this situation
+ RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(I32VT);
+ SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
+ unsigned Sew = RISCVVType::encodeSEW(I32VT.getScalarSizeInBits());
+ SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
+ SDValue SETVLMAX = DAG.getTargetConstant(
+ Intrinsic::riscv_vsetvlimax_opt, DL, MVT::i32);
+ I32VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVLMAX, SEW,
+ LMUL);
+ } else {
+ // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
+ // is related to the hardware implementation.
+ // So let the following code handle
+ }
}
+ if (!I32VL) {
+ RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(VT);
+ SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
+ unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
+ SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
+ SDValue SETVL =
+ DAG.getTargetConstant(Intrinsic::riscv_vsetvli_opt, DL, MVT::i32);
+ // Using vsetvli instruction to get actually used length which related to
+ // the hardware implementation
+ SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
+ SEW, LMUL);
+ I32VL =
+ DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
+ }
+
+ SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
+
+ // Shift the two scalar parts in using SEW=32 slide1up/slide1down
+ // instructions.
+ SDValue Passthru;
+ if (IsMasked)
+ Passthru = DAG.getUNDEF(I32VT);
+ else
+ Passthru = DAG.getBitcast(I32VT, Operands[1]);
+
+ if (IntNo == Intrinsic::riscv_vslide1up ||
+ IntNo == Intrinsic::riscv_vslide1up_mask) {
+ Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
+ ScalarHi, I32Mask, I32VL);
+ Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
+ ScalarLo, I32Mask, I32VL);
+ } else {
+ Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
+ ScalarLo, I32Mask, I32VL);
+ Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
+ ScalarHi, I32Mask, I32VL);
+ }
+
+ // Convert back to nxvXi64.
+ Vec = DAG.getBitcast(VT, Vec);
+
+ if (!IsMasked)
+ return Vec;
+ // Apply mask after the operation.
+ SDValue Mask = Operands[NumOps - 3];
+ SDValue MaskedOff = Operands[1];
+ // Assume Policy operand is the last operand.
+ uint64_t Policy =
+ cast<ConstantSDNode>(Operands[NumOps - 1])->getZExtValue();
+ // We don't need to select maskedoff if it's undef.
+ if (MaskedOff.isUndef())
+ return Vec;
+ // TAMU
+ if (Policy == RISCVII::TAIL_AGNOSTIC)
+ return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, Mask, Vec, MaskedOff,
+ AVL);
+ // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
+ // It's fine because vmerge does not care mask policy.
+ return DAG.getNode(RISCVISD::VP_MERGE_VL, DL, VT, Mask, Vec, MaskedOff,
+ AVL);
+ }
}
// We need to convert the scalar to a splat vector.
- // FIXME: Can we implicitly truncate the scalar if it is known to
- // be sign extended?
SDValue VL = getVLOperand(Op);
assert(VL.getValueType() == XLenVT);
- ScalarOp = splatSplitI64WithVL(DL, VT, ScalarOp, VL, DAG);
+ ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
}
@@ -4481,7 +4790,7 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::riscv_zip:
case Intrinsic::riscv_unzip: {
// Lower to the SHFLI encoding for zip or the UNSHFLI encoding for unzip.
- // For i32 the immdiate is 15. For i64 the immediate is 31.
+ // For i32 the immediate is 15. For i64 the immediate is 31.
unsigned Opc =
IntNo == Intrinsic::riscv_zip ? RISCVISD::SHFL : RISCVISD::UNSHFL;
unsigned BitWidth = Op.getValueSizeInBits();
@@ -4516,10 +4825,11 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(1));
case Intrinsic::riscv_vmv_v_x:
return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
- Op.getSimpleValueType(), DL, DAG, Subtarget);
+ Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
+ Subtarget);
case Intrinsic::riscv_vfmv_v_f:
return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
case Intrinsic::riscv_vmv_s_x: {
SDValue Scalar = Op.getOperand(2);
@@ -4533,7 +4843,7 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
// This is an i64 value that lives in two scalar registers. We have to
// insert this in a convoluted way. First we build vXi64 splat containing
- // the/ two values that we assemble using some bit math. Next we'll use
+ // the two values that we assemble using some bit math. Next we'll use
// vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
// to merge element 0 from our splat into the source vector.
// FIXME: This is probably not the best way to do this, but it is
@@ -4550,12 +4860,15 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDValue Vec = Op.getOperand(1);
SDValue VL = getVLOperand(Op);
- SDValue SplattedVal = splatSplitI64WithVL(DL, VT, Scalar, VL, DAG);
- SDValue SplattedIdx = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT,
- DAG.getConstant(0, DL, MVT::i32), VL);
+ SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
+ if (Op.getOperand(1).isUndef())
+ return SplattedVal;
+ SDValue SplattedIdx =
+ DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
+ DAG.getConstant(0, DL, MVT::i32), VL);
- MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
- SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
+ MVT MaskVT = getMaskTypeFor(VT);
+ SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
SDValue SelectCond =
DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, VID, SplattedIdx,
@@ -4563,73 +4876,9 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal,
Vec, VL);
}
- case Intrinsic::riscv_vslide1up:
- case Intrinsic::riscv_vslide1down:
- case Intrinsic::riscv_vslide1up_mask:
- case Intrinsic::riscv_vslide1down_mask: {
- // We need to special case these when the scalar is larger than XLen.
- unsigned NumOps = Op.getNumOperands();
- bool IsMasked = NumOps == 7;
- unsigned OpOffset = IsMasked ? 1 : 0;
- SDValue Scalar = Op.getOperand(2 + OpOffset);
- if (Scalar.getValueType().bitsLE(XLenVT))
- break;
-
- // Splatting a sign extended constant is fine.
- if (auto *CVal = dyn_cast<ConstantSDNode>(Scalar))
- if (isInt<32>(CVal->getSExtValue()))
- break;
-
- MVT VT = Op.getSimpleValueType();
- assert(VT.getVectorElementType() == MVT::i64 &&
- Scalar.getValueType() == MVT::i64 && "Unexpected VTs");
-
- // Convert the vector source to the equivalent nxvXi32 vector.
- MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
- SDValue Vec = DAG.getBitcast(I32VT, Op.getOperand(1 + OpOffset));
-
- SDValue ScalarLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
- DAG.getConstant(0, DL, XLenVT));
- SDValue ScalarHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
- DAG.getConstant(1, DL, XLenVT));
-
- // Double the VL since we halved SEW.
- SDValue VL = getVLOperand(Op);
- SDValue I32VL =
- DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
-
- MVT I32MaskVT = MVT::getVectorVT(MVT::i1, I32VT.getVectorElementCount());
- SDValue I32Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, I32MaskVT, VL);
-
- // Shift the two scalar parts in using SEW=32 slide1up/slide1down
- // instructions.
- if (IntNo == Intrinsic::riscv_vslide1up ||
- IntNo == Intrinsic::riscv_vslide1up_mask) {
- Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Vec, ScalarHi,
- I32Mask, I32VL);
- Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Vec, ScalarLo,
- I32Mask, I32VL);
- } else {
- Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Vec, ScalarLo,
- I32Mask, I32VL);
- Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Vec, ScalarHi,
- I32Mask, I32VL);
- }
-
- // Convert back to nxvXi64.
- Vec = DAG.getBitcast(VT, Vec);
-
- if (!IsMasked)
- return Vec;
-
- // Apply mask after the operation.
- SDValue Mask = Op.getOperand(NumOps - 3);
- SDValue MaskedOff = Op.getOperand(1);
- return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, Mask, Vec, MaskedOff, VL);
- }
}
- return lowerVectorIntrinsicSplats(Op, DAG, Subtarget);
+ return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
}
SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
@@ -4652,8 +4901,7 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
SDValue PassThru = Op.getOperand(2);
if (!IsUnmasked) {
- MVT MaskVT =
- MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
+ MVT MaskVT = getMaskTypeFor(ContainerVT);
Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
}
@@ -4688,9 +4936,48 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
return DAG.getMergeValues({Result, Chain}, DL);
}
+ case Intrinsic::riscv_seg2_load:
+ case Intrinsic::riscv_seg3_load:
+ case Intrinsic::riscv_seg4_load:
+ case Intrinsic::riscv_seg5_load:
+ case Intrinsic::riscv_seg6_load:
+ case Intrinsic::riscv_seg7_load:
+ case Intrinsic::riscv_seg8_load: {
+ SDLoc DL(Op);
+ static const Intrinsic::ID VlsegInts[7] = {
+ Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
+ Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
+ Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
+ Intrinsic::riscv_vlseg8};
+ unsigned NF = Op->getNumValues() - 1;
+ assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
+ MVT XLenVT = Subtarget.getXLenVT();
+ MVT VT = Op->getSimpleValueType(0);
+ MVT ContainerVT = getContainerForFixedLengthVector(VT);
+
+ SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
+ SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
+ auto *Load = cast<MemIntrinsicSDNode>(Op);
+ SmallVector<EVT, 9> ContainerVTs(NF, ContainerVT);
+ ContainerVTs.push_back(MVT::Other);
+ SDVTList VTs = DAG.getVTList(ContainerVTs);
+ SmallVector<SDValue, 12> Ops = {Load->getChain(), IntID};
+ Ops.insert(Ops.end(), NF, DAG.getUNDEF(ContainerVT));
+ Ops.push_back(Op.getOperand(2));
+ Ops.push_back(VL);
+ SDValue Result =
+ DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
+ Load->getMemoryVT(), Load->getMemOperand());
+ SmallVector<SDValue, 9> Results;
+ for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++)
+ Results.push_back(convertFromScalableVector(VT, Result.getValue(RetIdx),
+ DAG, Subtarget));
+ Results.push_back(Result.getValue(NF));
+ return DAG.getMergeValues(Results, DL);
+ }
}
- return lowerVectorIntrinsicSplats(Op, DAG, Subtarget);
+ return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
}
SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
@@ -4714,8 +5001,7 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
if (!IsUnmasked) {
- MVT MaskVT =
- MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
+ MVT MaskVT = getMaskTypeFor(ContainerVT);
Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
}
@@ -4898,8 +5184,9 @@ SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
SDValue NeutralElem =
DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
- SDValue IdentitySplat = lowerScalarSplat(
- NeutralElem, DAG.getConstant(1, DL, XLenVT), M1VT, DL, DAG, Subtarget);
+ SDValue IdentitySplat =
+ lowerScalarSplat(SDValue(), NeutralElem, DAG.getConstant(1, DL, XLenVT),
+ M1VT, DL, DAG, Subtarget);
SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, DAG.getUNDEF(M1VT), Vec,
IdentitySplat, Mask, VL);
SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
@@ -4960,8 +5247,9 @@ SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
SDValue Mask, VL;
std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
- SDValue ScalarSplat = lowerScalarSplat(
- ScalarVal, DAG.getConstant(1, DL, XLenVT), M1VT, DL, DAG, Subtarget);
+ SDValue ScalarSplat =
+ lowerScalarSplat(SDValue(), ScalarVal, DAG.getConstant(1, DL, XLenVT),
+ M1VT, DL, DAG, Subtarget);
SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, DAG.getUNDEF(M1VT),
VectorVal, ScalarSplat, Mask, VL);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
@@ -5027,9 +5315,9 @@ SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
MVT XLenVT = Subtarget.getXLenVT();
MVT ResVT = !VecVT.isInteger() || VecEltVT.bitsGE(XLenVT) ? VecEltVT : XLenVT;
- SDValue StartSplat =
- lowerScalarSplat(Op.getOperand(0), DAG.getConstant(1, DL, XLenVT), M1VT,
- DL, DAG, Subtarget);
+ SDValue StartSplat = lowerScalarSplat(SDValue(), Op.getOperand(0),
+ DAG.getConstant(1, DL, XLenVT), M1VT,
+ DL, DAG, Subtarget);
SDValue Reduction =
DAG.getNode(RVVOpcode, DL, M1VT, StartSplat, Vec, StartSplat, Mask, VL);
SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
@@ -5331,13 +5619,13 @@ SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
if (StepValImm != 1) {
if (isPowerOf2_64(StepValImm)) {
SDValue StepVal =
- DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT,
+ DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
DAG.getConstant(Log2_64(StepValImm), DL, XLenVT));
StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
} else {
SDValue StepVal = lowerScalarSplat(
- DAG.getConstant(StepValImm, DL, VT.getVectorElementType()), VL, VT,
- DL, DAG, Subtarget);
+ SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
+ VL, VT, DL, DAG, Subtarget);
StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
}
}
@@ -5353,22 +5641,26 @@ SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
MVT VecVT = Op.getSimpleValueType();
+ if (VecVT.getVectorElementType() == MVT::i1) {
+ MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
+ SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
+ SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
+ return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Op2);
+ }
unsigned EltSize = VecVT.getScalarSizeInBits();
unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
-
- unsigned MaxVLMAX = 0;
- unsigned VectorBitsMax = Subtarget.getMaxRVVVectorSizeInBits();
- if (VectorBitsMax != 0)
- MaxVLMAX = ((VectorBitsMax / EltSize) * MinSize) / RISCV::RVVBitsPerBlock;
+ unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
+ unsigned MaxVLMAX =
+ RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
MVT IntVT = VecVT.changeVectorElementTypeToInteger();
- // If this is SEW=8 and VLMAX is unknown or more than 256, we need
+ // If this is SEW=8 and VLMAX is potentially more than 256, we need
// to use vrgatherei16.vv.
// TODO: It's also possible to use vrgatherei16.vv for other types to
// decrease register width for the index calculation.
- if ((MaxVLMAX == 0 || MaxVLMAX > 256) && EltSize == 8) {
+ if (MaxVLMAX > 256 && EltSize == 8) {
// If this is LMUL=8, we have to split before can use vrgatherei16.vv.
// Reverse each half, then reassemble them in reverse order.
// NOTE: It's also possible that after splitting that VLMAX no longer
@@ -5413,13 +5705,51 @@ SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
if (!IsRV32E64)
SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
else
- SplatVL = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, IntVT, VLMinus1);
+ SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
+ VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
SDValue Indices =
DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID, Mask, VL);
- return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices, Mask, VL);
+ return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices, Mask,
+ DAG.getUNDEF(VecVT), VL);
+}
+
+SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ MVT XLenVT = Subtarget.getXLenVT();
+ MVT VecVT = Op.getSimpleValueType();
+
+ unsigned MinElts = VecVT.getVectorMinNumElements();
+ SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT,
+ DAG.getConstant(MinElts, DL, XLenVT));
+
+ int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
+ SDValue DownOffset, UpOffset;
+ if (ImmValue >= 0) {
+ // The operand is a TargetConstant, we need to rebuild it as a regular
+ // constant.
+ DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
+ UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
+ } else {
+ // The operand is a TargetConstant, we need to rebuild it as a regular
+ // constant rather than negating the original operand.
+ UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
+ DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
+ }
+
+ SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
+
+ SDValue SlideDown =
+ DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VecVT, DAG.getUNDEF(VecVT), V1,
+ DownOffset, TrueMask, UpOffset);
+ return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VecVT, SlideDown, V2, UpOffset,
+ TrueMask,
+ DAG.getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT));
}
SDValue
@@ -5434,18 +5764,26 @@ RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
"Expecting a correctly-aligned load");
MVT VT = Op.getSimpleValueType();
+ MVT XLenVT = Subtarget.getXLenVT();
MVT ContainerVT = getContainerForFixedLengthVector(VT);
- SDValue VL =
- DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
+ SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
+ bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
+ SDValue IntID = DAG.getTargetConstant(
+ IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
+ SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
+ if (!IsMaskOp)
+ Ops.push_back(DAG.getUNDEF(ContainerVT));
+ Ops.push_back(Load->getBasePtr());
+ Ops.push_back(VL);
SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
- SDValue NewLoad = DAG.getMemIntrinsicNode(
- RISCVISD::VLE_VL, DL, VTs, {Load->getChain(), Load->getBasePtr(), VL},
- Load->getMemoryVT(), Load->getMemOperand());
+ SDValue NewLoad =
+ DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
+ Load->getMemoryVT(), Load->getMemOperand());
SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
- return DAG.getMergeValues({Result, Load->getChain()}, DL);
+ return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
}
SDValue
@@ -5461,6 +5799,7 @@ RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
SDValue StoreVal = Store->getValue();
MVT VT = StoreVal.getSimpleValueType();
+ MVT XLenVT = Subtarget.getXLenVT();
// If the size less than a byte, we need to pad with zeros to make a byte.
if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
@@ -5472,14 +5811,17 @@ RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
MVT ContainerVT = getContainerForFixedLengthVector(VT);
- SDValue VL =
- DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
+ SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
SDValue NewValue =
convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
+
+ bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
+ SDValue IntID = DAG.getTargetConstant(
+ IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
return DAG.getMemIntrinsicNode(
- RISCVISD::VSE_VL, DL, DAG.getVTList(MVT::Other),
- {Store->getChain(), NewValue, Store->getBasePtr(), VL},
+ ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
+ {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
Store->getMemoryVT(), Store->getMemOperand());
}
@@ -5514,8 +5856,7 @@ SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
ContainerVT = getContainerForFixedLengthVector(VT);
PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
if (!IsUnmasked) {
- MVT MaskVT =
- MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
+ MVT MaskVT = getMaskTypeFor(ContainerVT);
Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
}
}
@@ -5581,8 +5922,7 @@ SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
if (!IsUnmasked) {
- MVT MaskVT =
- MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
+ MVT MaskVT = getMaskTypeFor(ContainerVT);
Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
}
}
@@ -5620,8 +5960,8 @@ RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
SDValue VL =
DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
- MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
- SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
+ MVT MaskVT = getMaskTypeFor(ContainerVT);
+ SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
SDValue Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op2,
Op.getOperand(2), Mask, VL);
@@ -5667,9 +6007,9 @@ SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
SDValue Mask, VL;
std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
- SDValue SplatZero =
- DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
- DAG.getConstant(0, DL, Subtarget.getXLenVT()));
+ SDValue SplatZero = DAG.getNode(
+ RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
+ DAG.getConstant(0, DL, Subtarget.getXLenVT()));
SDValue NegX =
DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X, Mask, VL);
SDValue Max =
@@ -5787,15 +6127,260 @@ SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG,
}
if (!VT.isFixedLengthVector())
- return DAG.getNode(RISCVISDOpc, DL, VT, Ops);
+ return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
MVT ContainerVT = getContainerForFixedLengthVector(VT);
- SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops);
+ SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
}
+SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ MVT VT = Op.getSimpleValueType();
+
+ SDValue Src = Op.getOperand(0);
+ // NOTE: Mask is dropped.
+ SDValue VL = Op.getOperand(2);
+
+ MVT ContainerVT = VT;
+ if (VT.isFixedLengthVector()) {
+ ContainerVT = getContainerForFixedLengthVector(VT);
+ MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
+ Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
+ }
+
+ MVT XLenVT = Subtarget.getXLenVT();
+ SDValue Zero = DAG.getConstant(0, DL, XLenVT);
+ SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
+ DAG.getUNDEF(ContainerVT), Zero, VL);
+
+ SDValue SplatValue = DAG.getConstant(
+ Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
+ SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
+ DAG.getUNDEF(ContainerVT), SplatValue, VL);
+
+ SDValue Result = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, Src,
+ Splat, ZeroSplat, VL);
+ if (!VT.isFixedLengthVector())
+ return Result;
+ return convertFromScalableVector(VT, Result, DAG, Subtarget);
+}
+
+SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ MVT VT = Op.getSimpleValueType();
+
+ SDValue Op1 = Op.getOperand(0);
+ SDValue Op2 = Op.getOperand(1);
+ ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ // NOTE: Mask is dropped.
+ SDValue VL = Op.getOperand(4);
+
+ MVT ContainerVT = VT;
+ if (VT.isFixedLengthVector()) {
+ ContainerVT = getContainerForFixedLengthVector(VT);
+ Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
+ Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
+ }
+
+ SDValue Result;
+ SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
+
+ switch (Condition) {
+ default:
+ break;
+ // X != Y --> (X^Y)
+ case ISD::SETNE:
+ Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
+ break;
+ // X == Y --> ~(X^Y)
+ case ISD::SETEQ: {
+ SDValue Temp =
+ DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
+ Result =
+ DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
+ break;
+ }
+ // X >s Y --> X == 0 & Y == 1 --> ~X & Y
+ // X <u Y --> X == 0 & Y == 1 --> ~X & Y
+ case ISD::SETGT:
+ case ISD::SETULT: {
+ SDValue Temp =
+ DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
+ Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
+ break;
+ }
+ // X <s Y --> X == 1 & Y == 0 --> ~Y & X
+ // X >u Y --> X == 1 & Y == 0 --> ~Y & X
+ case ISD::SETLT:
+ case ISD::SETUGT: {
+ SDValue Temp =
+ DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
+ Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
+ break;
+ }
+ // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
+ // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
+ case ISD::SETGE:
+ case ISD::SETULE: {
+ SDValue Temp =
+ DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
+ Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
+ break;
+ }
+ // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
+ // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
+ case ISD::SETLE:
+ case ISD::SETUGE: {
+ SDValue Temp =
+ DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
+ Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
+ break;
+ }
+ }
+
+ if (!VT.isFixedLengthVector())
+ return Result;
+ return convertFromScalableVector(VT, Result, DAG, Subtarget);
+}
+
+// Lower Floating-Point/Integer Type-Convert VP SDNodes
+SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG,
+ unsigned RISCVISDOpc) const {
+ SDLoc DL(Op);
+
+ SDValue Src = Op.getOperand(0);
+ SDValue Mask = Op.getOperand(1);
+ SDValue VL = Op.getOperand(2);
+
+ MVT DstVT = Op.getSimpleValueType();
+ MVT SrcVT = Src.getSimpleValueType();
+ if (DstVT.isFixedLengthVector()) {
+ DstVT = getContainerForFixedLengthVector(DstVT);
+ SrcVT = getContainerForFixedLengthVector(SrcVT);
+ Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
+ MVT MaskVT = getMaskTypeFor(DstVT);
+ Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
+ }
+
+ unsigned RISCVISDExtOpc = (RISCVISDOpc == RISCVISD::SINT_TO_FP_VL ||
+ RISCVISDOpc == RISCVISD::FP_TO_SINT_VL)
+ ? RISCVISD::VSEXT_VL
+ : RISCVISD::VZEXT_VL;
+
+ unsigned DstEltSize = DstVT.getScalarSizeInBits();
+ unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
+
+ SDValue Result;
+ if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
+ if (SrcVT.isInteger()) {
+ assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
+
+ // Do we need to do any pre-widening before converting?
+ if (SrcEltSize == 1) {
+ MVT IntVT = DstVT.changeVectorElementTypeToInteger();
+ MVT XLenVT = Subtarget.getXLenVT();
+ SDValue Zero = DAG.getConstant(0, DL, XLenVT);
+ SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
+ DAG.getUNDEF(IntVT), Zero, VL);
+ SDValue One = DAG.getConstant(
+ RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
+ SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
+ DAG.getUNDEF(IntVT), One, VL);
+ Src = DAG.getNode(RISCVISD::VSELECT_VL, DL, IntVT, Src, OneSplat,
+ ZeroSplat, VL);
+ } else if (DstEltSize > (2 * SrcEltSize)) {
+ // Widen before converting.
+ MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
+ DstVT.getVectorElementCount());
+ Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
+ }
+
+ Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
+ } else {
+ assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
+ "Wrong input/output vector types");
+
+ // Convert f16 to f32 then convert f32 to i64.
+ if (DstEltSize > (2 * SrcEltSize)) {
+ assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
+ MVT InterimFVT =
+ MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
+ Src =
+ DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
+ }
+
+ Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
+ }
+ } else { // Narrowing + Conversion
+ if (SrcVT.isInteger()) {
+ assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
+ // First do a narrowing convert to an FP type half the size, then round
+ // the FP type to a small FP type if needed.
+
+ MVT InterimFVT = DstVT;
+ if (SrcEltSize > (2 * DstEltSize)) {
+ assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
+ assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
+ InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
+ }
+
+ Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
+
+ if (InterimFVT != DstVT) {
+ Src = Result;
+ Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
+ }
+ } else {
+ assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
+ "Wrong input/output vector types");
+ // First do a narrowing conversion to an integer half the size, then
+ // truncate if needed.
+
+ if (DstEltSize == 1) {
+ // First convert to the same size integer, then convert to mask using
+ // setcc.
+ assert(SrcEltSize >= 16 && "Unexpected FP type!");
+ MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
+ DstVT.getVectorElementCount());
+ Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
+
+ // Compare the integer result to 0. The integer should be 0 or 1/-1,
+ // otherwise the conversion was undefined.
+ MVT XLenVT = Subtarget.getXLenVT();
+ SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
+ SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
+ DAG.getUNDEF(InterimIVT), SplatZero);
+ Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT, Result, SplatZero,
+ DAG.getCondCode(ISD::SETNE), Mask, VL);
+ } else {
+ MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
+ DstVT.getVectorElementCount());
+
+ Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
+
+ while (InterimIVT != DstVT) {
+ SrcEltSize /= 2;
+ Src = Result;
+ InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
+ DstVT.getVectorElementCount());
+ Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
+ Src, Mask, VL);
+ }
+ }
+ }
+ }
+
+ MVT VT = Op.getSimpleValueType();
+ if (!VT.isFixedLengthVector())
+ return Result;
+ return convertFromScalableVector(VT, Result, DAG, Subtarget);
+}
+
SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op, SelectionDAG &DAG,
unsigned MaskOpc,
unsigned VecOpc) const {
@@ -5876,23 +6461,14 @@ SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
MVT ContainerVT = VT;
if (VT.isFixedLengthVector()) {
- // We need to use the larger of the result and index type to determine the
- // scalable type to use so we don't increase LMUL for any operand/result.
- if (VT.bitsGE(IndexVT)) {
- ContainerVT = getContainerForFixedLengthVector(VT);
- IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
- ContainerVT.getVectorElementCount());
- } else {
- IndexVT = getContainerForFixedLengthVector(IndexVT);
- ContainerVT = MVT::getVectorVT(ContainerVT.getVectorElementType(),
- IndexVT.getVectorElementCount());
- }
+ ContainerVT = getContainerForFixedLengthVector(VT);
+ IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
+ ContainerVT.getVectorElementCount());
Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
if (!IsUnmasked) {
- MVT MaskVT =
- MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
+ MVT MaskVT = getMaskTypeFor(ContainerVT);
Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
}
@@ -5987,24 +6563,15 @@ SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
MVT ContainerVT = VT;
if (VT.isFixedLengthVector()) {
- // We need to use the larger of the value and index type to determine the
- // scalable type to use so we don't increase LMUL for any operand/result.
- if (VT.bitsGE(IndexVT)) {
- ContainerVT = getContainerForFixedLengthVector(VT);
- IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
- ContainerVT.getVectorElementCount());
- } else {
- IndexVT = getContainerForFixedLengthVector(IndexVT);
- ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
- IndexVT.getVectorElementCount());
- }
+ ContainerVT = getContainerForFixedLengthVector(VT);
+ IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
+ ContainerVT.getVectorElementCount());
Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
if (!IsUnmasked) {
- MVT MaskVT =
- MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
+ MVT MaskVT = getMaskTypeFor(ContainerVT);
Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
}
}
@@ -6095,14 +6662,21 @@ SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
RMValue);
}
+SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ bool isRISCV64 = Subtarget.is64Bit();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+
+ int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
+ return DAG.getFrameIndex(FI, PtrVT);
+}
+
static RISCVISD::NodeType getRISCVWOpcodeByIntr(unsigned IntNo) {
switch (IntNo) {
default:
llvm_unreachable("Unexpected Intrinsic");
- case Intrinsic::riscv_grev:
- return RISCVISD::GREVW;
- case Intrinsic::riscv_gorc:
- return RISCVISD::GORCW;
case Intrinsic::riscv_bcompress:
return RISCVISD::BCOMPRESSW;
case Intrinsic::riscv_bdecompress:
@@ -6121,9 +6695,12 @@ static SDValue customLegalizeToWOpByIntr(SDNode *N, SelectionDAG &DAG,
unsigned IntNo) {
SDLoc DL(N);
RISCVISD::NodeType WOpcode = getRISCVWOpcodeByIntr(IntNo);
- SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
- SDValue NewOp2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
- SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp1, NewOp2);
+ // Deal with the Instruction Operands
+ SmallVector<SDValue, 3> NewOps;
+ for (SDValue Op : drop_begin(N->ops()))
+ // Promote the operand to i64 type
+ NewOps.push_back(DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op));
+ SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOps);
// ReplaceNodeResults requires we maintain the same type for the return value.
return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
}
@@ -6150,10 +6727,6 @@ static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
return RISCVISD::ROLW;
case ISD::ROTR:
return RISCVISD::RORW;
- case RISCVISD::GREV:
- return RISCVISD::GREVW;
- case RISCVISD::GORC:
- return RISCVISD::GORCW;
}
}
@@ -6309,6 +6882,10 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
if (N->getOperand(1).getOpcode() != ISD::Constant) {
+ // If we can use a BSET instruction, allow default promotion to apply.
+ if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
+ isOneConstant(N->getOperand(0)))
+ break;
Results.push_back(customLegalizeToWOp(N, DAG));
break;
}
@@ -6388,12 +6965,23 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
DAG.getValueType(MVT::i32));
- // Sign extend the LHS and perform an unsigned compare with the ADDW result.
- // Since the inputs are sign extended from i32, this is equivalent to
- // comparing the lower 32 bits.
- LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
- SDValue Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
- IsAdd ? ISD::SETULT : ISD::SETUGT);
+ SDValue Overflow;
+ if (IsAdd && isOneConstant(RHS)) {
+ // Special case uaddo X, 1 overflowed if the addition result is 0.
+ // The general case (X + C) < C is not necessarily beneficial. Although we
+ // reduce the live range of X, we may introduce the materialization of
+ // constant C, especially when the setcc result is used by branch. We have
+ // no compare with constant and branch instructions.
+ Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
+ DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
+ } else {
+ // Sign extend the LHS and perform an unsigned compare with the ADDW
+ // result. Since the inputs are sign extended from i32, this is equivalent
+ // to comparing the lower 32 bits.
+ LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
+ Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
+ IsAdd ? ISD::SETULT : ISD::SETUGT);
+ }
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
Results.push_back(Overflow);
@@ -6421,6 +7009,33 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(expandAddSubSat(N, DAG));
return;
}
+ case ISD::ABS: {
+ assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
+ "Unexpected custom legalisation");
+
+ // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
+
+ SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
+
+ // Freeze the source so we can increase it's use count.
+ Src = DAG.getFreeze(Src);
+
+ // Copy sign bit to all bits using the sraiw pattern.
+ SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
+ DAG.getValueType(MVT::i32));
+ SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
+ DAG.getConstant(31, DL, MVT::i64));
+
+ SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
+ NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
+
+ // NOTE: The result is only required to be anyextended, but sext is
+ // consistent with type legalization of sub.
+ NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
+ DAG.getValueType(MVT::i32));
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
+ return;
+ }
case ISD::BITCAST: {
EVT VT = N->getValueType(0);
assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
@@ -6451,37 +7066,24 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
break;
}
case RISCVISD::GREV:
- case RISCVISD::GORC: {
- assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
- "Unexpected custom legalisation");
- assert(isa<ConstantSDNode>(N->getOperand(1)) && "Expected constant");
- // This is similar to customLegalizeToWOp, except that we pass the second
- // operand (a TargetConstant) straight through: it is already of type
- // XLenVT.
- RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
- SDValue NewOp0 =
- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
- SDValue NewOp1 =
- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
- SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
- // ReplaceNodeResults requires we maintain the same type for the return
- // value.
- Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
- break;
- }
+ case RISCVISD::GORC:
case RISCVISD::SHFL: {
- // There is no SHFLIW instruction, but we can just promote the operation.
- assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
+ MVT VT = N->getSimpleValueType(0);
+ MVT XLenVT = Subtarget.getXLenVT();
+ assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
"Unexpected custom legalisation");
assert(isa<ConstantSDNode>(N->getOperand(1)) && "Expected constant");
- SDValue NewOp0 =
- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
+ assert((Subtarget.hasStdExtZbp() ||
+ (Subtarget.hasStdExtZbkb() && N->getOpcode() == RISCVISD::GREV &&
+ N->getConstantOperandVal(1) == 7)) &&
+ "Unexpected extension");
+ SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
SDValue NewOp1 =
- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
- SDValue NewRes = DAG.getNode(RISCVISD::SHFL, DL, MVT::i64, NewOp0, NewOp1);
+ DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
+ SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp0, NewOp1);
// ReplaceNodeResults requires we maintain the same type for the return
// value.
- Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
break;
}
case ISD::BSWAP:
@@ -6496,9 +7098,8 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
// If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
if (N->getOpcode() == ISD::BSWAP)
Imm &= ~0x7U;
- unsigned Opc = Subtarget.is64Bit() ? RISCVISD::GREVW : RISCVISD::GREV;
- SDValue GREVI =
- DAG.getNode(Opc, DL, XLenVT, NewOp0, DAG.getConstant(Imm, DL, XLenVT));
+ SDValue GREVI = DAG.getNode(RISCVISD::GREV, DL, XLenVT, NewOp0,
+ DAG.getConstant(Imm, DL, XLenVT));
// ReplaceNodeResults requires we maintain the same type for the return
// value.
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, GREVI));
@@ -6564,9 +7165,8 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
MVT XLenVT = Subtarget.getXLenVT();
// Use a VL of 1 to avoid processing more elements than we need.
- MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
SDValue VL = DAG.getConstant(1, DL, XLenVT);
- SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
+ SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
// Unless the index is known to be 0, we must slide the vector down to get
// the desired element into index 0.
@@ -6581,6 +7181,7 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
// To extract the upper XLEN bits of the vector element, shift the first
// element right by 32 bits and re-extract the lower XLEN bits.
SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
+ DAG.getUNDEF(ContainerVT),
DAG.getConstant(32, DL, XLenVT), VL);
SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec,
ThirtyTwoV, Mask, VL);
@@ -6597,38 +7198,42 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
llvm_unreachable(
"Don't know how to custom type legalize this intrinsic!");
case Intrinsic::riscv_grev:
- case Intrinsic::riscv_gorc:
- case Intrinsic::riscv_bcompress:
- case Intrinsic::riscv_bdecompress:
- case Intrinsic::riscv_bfp: {
+ case Intrinsic::riscv_gorc: {
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
- Results.push_back(customLegalizeToWOpByIntr(N, DAG, IntNo));
+ SDValue NewOp1 =
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
+ SDValue NewOp2 =
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
+ unsigned Opc =
+ IntNo == Intrinsic::riscv_grev ? RISCVISD::GREVW : RISCVISD::GORCW;
+ // If the control is a constant, promote the node by clearing any extra
+ // bits bits in the control. isel will form greviw/gorciw if the result is
+ // sign extended.
+ if (isa<ConstantSDNode>(NewOp2)) {
+ NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2,
+ DAG.getConstant(0x1f, DL, MVT::i64));
+ Opc = IntNo == Intrinsic::riscv_grev ? RISCVISD::GREV : RISCVISD::GORC;
+ }
+ SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp1, NewOp2);
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
break;
}
+ case Intrinsic::riscv_bcompress:
+ case Intrinsic::riscv_bdecompress:
+ case Intrinsic::riscv_bfp:
case Intrinsic::riscv_fsl:
case Intrinsic::riscv_fsr: {
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
- SDValue NewOp1 =
- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
- SDValue NewOp2 =
- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
- SDValue NewOp3 =
- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3));
- unsigned Opc = getRISCVWOpcodeByIntr(IntNo);
- SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp1, NewOp2, NewOp3);
- Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
+ Results.push_back(customLegalizeToWOpByIntr(N, DAG, IntNo));
break;
}
case Intrinsic::riscv_orc_b: {
// Lower to the GORCI encoding for orc.b with the operand extended.
SDValue NewOp =
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
- // If Zbp is enabled, use GORCIW which will sign extend the result.
- unsigned Opc =
- Subtarget.hasStdExtZbp() ? RISCVISD::GORCW : RISCVISD::GORC;
- SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp,
+ SDValue Res = DAG.getNode(RISCVISD::GORC, DL, MVT::i64, NewOp,
DAG.getConstant(7, DL, MVT::i64));
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
return;
@@ -6681,10 +7286,11 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
// To extract the upper XLEN bits of the vector element, shift the first
// element right by 32 bits and re-extract the lower XLEN bits.
SDValue VL = DAG.getConstant(1, DL, XLenVT);
- MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount());
- SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
- SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT,
- DAG.getConstant(32, DL, XLenVT), VL);
+ SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
+
+ SDValue ThirtyTwoV =
+ DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
+ DAG.getConstant(32, DL, XLenVT), VL);
SDValue LShr32 =
DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV, Mask, VL);
SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
@@ -6840,6 +7446,110 @@ static Optional<RISCVBitmanipPat> matchGREVIPat(SDValue Op) {
return matchRISCVBitmanipPat(Op, BitmanipMasks);
}
+// Try to fold (<bop> x, (reduction.<bop> vec, start))
+static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG) {
+ auto BinOpToRVVReduce = [](unsigned Opc) {
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unhandled binary to transfrom reduction");
+ case ISD::ADD:
+ return RISCVISD::VECREDUCE_ADD_VL;
+ case ISD::UMAX:
+ return RISCVISD::VECREDUCE_UMAX_VL;
+ case ISD::SMAX:
+ return RISCVISD::VECREDUCE_SMAX_VL;
+ case ISD::UMIN:
+ return RISCVISD::VECREDUCE_UMIN_VL;
+ case ISD::SMIN:
+ return RISCVISD::VECREDUCE_SMIN_VL;
+ case ISD::AND:
+ return RISCVISD::VECREDUCE_AND_VL;
+ case ISD::OR:
+ return RISCVISD::VECREDUCE_OR_VL;
+ case ISD::XOR:
+ return RISCVISD::VECREDUCE_XOR_VL;
+ case ISD::FADD:
+ return RISCVISD::VECREDUCE_FADD_VL;
+ case ISD::FMAXNUM:
+ return RISCVISD::VECREDUCE_FMAX_VL;
+ case ISD::FMINNUM:
+ return RISCVISD::VECREDUCE_FMIN_VL;
+ }
+ };
+
+ auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
+ return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ isNullConstant(V.getOperand(1)) &&
+ V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
+ };
+
+ unsigned Opc = N->getOpcode();
+ unsigned ReduceIdx;
+ if (IsReduction(N->getOperand(0), Opc))
+ ReduceIdx = 0;
+ else if (IsReduction(N->getOperand(1), Opc))
+ ReduceIdx = 1;
+ else
+ return SDValue();
+
+ // Skip if FADD disallows reassociation but the combiner needs.
+ if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
+ return SDValue();
+
+ SDValue Extract = N->getOperand(ReduceIdx);
+ SDValue Reduce = Extract.getOperand(0);
+ if (!Reduce.hasOneUse())
+ return SDValue();
+
+ SDValue ScalarV = Reduce.getOperand(2);
+
+ // Make sure that ScalarV is a splat with VL=1.
+ if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
+ ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
+ ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
+ return SDValue();
+
+ if (!isOneConstant(ScalarV.getOperand(2)))
+ return SDValue();
+
+ // TODO: Deal with value other than neutral element.
+ auto IsRVVNeutralElement = [Opc, &DAG](SDNode *N, SDValue V) {
+ if (Opc == ISD::FADD && N->getFlags().hasNoSignedZeros() &&
+ isNullFPConstant(V))
+ return true;
+ return DAG.getNeutralElement(Opc, SDLoc(V), V.getSimpleValueType(),
+ N->getFlags()) == V;
+ };
+
+ // Check the scalar of ScalarV is neutral element
+ if (!IsRVVNeutralElement(N, ScalarV.getOperand(1)))
+ return SDValue();
+
+ if (!ScalarV.hasOneUse())
+ return SDValue();
+
+ EVT SplatVT = ScalarV.getValueType();
+ SDValue NewStart = N->getOperand(1 - ReduceIdx);
+ unsigned SplatOpc = RISCVISD::VFMV_S_F_VL;
+ if (SplatVT.isInteger()) {
+ auto *C = dyn_cast<ConstantSDNode>(NewStart.getNode());
+ if (!C || C->isZero() || !isInt<5>(C->getSExtValue()))
+ SplatOpc = RISCVISD::VMV_S_X_VL;
+ else
+ SplatOpc = RISCVISD::VMV_V_X_VL;
+ }
+
+ SDValue NewScalarV =
+ DAG.getNode(SplatOpc, SDLoc(N), SplatVT, ScalarV.getOperand(0), NewStart,
+ ScalarV.getOperand(2));
+ SDValue NewReduce =
+ DAG.getNode(Reduce.getOpcode(), SDLoc(Reduce), Reduce.getValueType(),
+ Reduce.getOperand(0), Reduce.getOperand(1), NewScalarV,
+ Reduce.getOperand(3), Reduce.getOperand(4));
+ return DAG.getNode(Extract.getOpcode(), SDLoc(Extract),
+ Extract.getValueType(), NewReduce, Extract.getOperand(1));
+}
+
// Match the following pattern as a GREVI(W) operation
// (or (BITMANIP_SHL x), (BITMANIP_SRL x))
static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG,
@@ -7066,11 +7776,70 @@ static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::SHL, DL, VT, NA1, DAG.getConstant(Bits, DL, VT));
}
+// Combine
+// ROTR ((GREVI x, 24), 16) -> (GREVI x, 8) for RV32
+// ROTL ((GREVI x, 24), 16) -> (GREVI x, 8) for RV32
+// ROTR ((GREVI x, 56), 32) -> (GREVI x, 24) for RV64
+// ROTL ((GREVI x, 56), 32) -> (GREVI x, 24) for RV64
+// RORW ((GREVI x, 24), 16) -> (GREVIW x, 8) for RV64
+// ROLW ((GREVI x, 24), 16) -> (GREVIW x, 8) for RV64
+// The grev patterns represents BSWAP.
+// FIXME: This can be generalized to any GREV. We just need to toggle the MSB
+// off the grev.
+static SDValue combineROTR_ROTL_RORW_ROLW(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ bool IsWInstruction =
+ N->getOpcode() == RISCVISD::RORW || N->getOpcode() == RISCVISD::ROLW;
+ assert((N->getOpcode() == ISD::ROTR || N->getOpcode() == ISD::ROTL ||
+ IsWInstruction) &&
+ "Unexpected opcode!");
+ SDValue Src = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ if (!Subtarget.hasStdExtZbp() || Src.getOpcode() != RISCVISD::GREV)
+ return SDValue();
+
+ if (!isa<ConstantSDNode>(N->getOperand(1)) ||
+ !isa<ConstantSDNode>(Src.getOperand(1)))
+ return SDValue();
+
+ unsigned BitWidth = IsWInstruction ? 32 : VT.getSizeInBits();
+ assert(isPowerOf2_32(BitWidth) && "Expected a power of 2");
+
+ // Needs to be a rotate by half the bitwidth for ROTR/ROTL or by 16 for
+ // RORW/ROLW. And the grev should be the encoding for bswap for this width.
+ unsigned ShAmt1 = N->getConstantOperandVal(1);
+ unsigned ShAmt2 = Src.getConstantOperandVal(1);
+ if (BitWidth < 32 || ShAmt1 != (BitWidth / 2) || ShAmt2 != (BitWidth - 8))
+ return SDValue();
+
+ Src = Src.getOperand(0);
+
+ // Toggle bit the MSB of the shift.
+ unsigned CombinedShAmt = ShAmt1 ^ ShAmt2;
+ if (CombinedShAmt == 0)
+ return Src;
+
+ SDValue Res = DAG.getNode(
+ RISCVISD::GREV, DL, VT, Src,
+ DAG.getConstant(CombinedShAmt, DL, N->getOperand(1).getValueType()));
+ if (!IsWInstruction)
+ return Res;
+
+ // Sign extend the result to match the behavior of the rotate. This will be
+ // selected to GREVIW in isel.
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Res,
+ DAG.getValueType(MVT::i32));
+}
+
// Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is
// non-zero, and to x when it is. Any repeated GREVI stage undoes itself.
// Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does
// not undo itself, but they are redundant.
static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) {
+ bool IsGORC = N->getOpcode() == RISCVISD::GORC;
+ assert((IsGORC || N->getOpcode() == RISCVISD::GREV) && "Unexpected opcode");
SDValue Src = N->getOperand(0);
if (Src.getOpcode() != N->getOpcode())
@@ -7085,7 +7854,7 @@ static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) {
Src = Src.getOperand(0);
unsigned CombinedShAmt;
- if (N->getOpcode() == RISCVISD::GORC || N->getOpcode() == RISCVISD::GORCW)
+ if (IsGORC)
CombinedShAmt = ShAmt1 | ShAmt2;
else
CombinedShAmt = ShAmt1 ^ ShAmt2;
@@ -7203,6 +7972,11 @@ static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG,
auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (!N0C || !N1C)
return SDValue();
+ // If N0C has multiple uses it's possible one of the cases in
+ // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
+ // in an infinite loop.
+ if (!N0C->hasOneUse())
+ return SDValue();
int64_t C0 = N0C->getSExtValue();
int64_t C1 = N1C->getSExtValue();
int64_t CA, CB;
@@ -7238,6 +8012,8 @@ static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG,
return V;
if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
return V;
+ if (SDValue V = combineBinOpToReduce(N, DAG))
+ return V;
// fold (add (select lhs, rhs, cc, 0, y), x) ->
// (select lhs, rhs, cc, x, (add x, y))
return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false);
@@ -7251,7 +8027,30 @@ static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG) {
return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false);
}
-static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ SDValue N0 = N->getOperand(0);
+ // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
+ // extending X. This is safe since we only need the LSB after the shift and
+ // shift amounts larger than 31 would produce poison. If we wait until
+ // type legalization, we'll create RISCVISD::SRLW and we can't recover it
+ // to use a BEXT instruction.
+ if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
+ N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
+ N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
+ N0.hasOneUse()) {
+ SDLoc DL(N);
+ SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
+ SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
+ SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
+ SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
+ DAG.getConstant(1, DL, MVT::i64));
+ return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
+ }
+
+ if (SDValue V = combineBinOpToReduce(N, DAG))
+ return V;
+
// fold (and (select lhs, rhs, cc, -1, y), x) ->
// (select lhs, rhs, cc, x, (and x, y))
return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true);
@@ -7268,99 +8067,197 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
return SHFL;
}
+ if (SDValue V = combineBinOpToReduce(N, DAG))
+ return V;
// fold (or (select cond, 0, y), x) ->
// (select cond, x, (or x, y))
return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false);
}
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
+ // NOTE: Assumes ROL being legal means ROLW is legal.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (N0.getOpcode() == RISCVISD::SLLW &&
+ isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0)) &&
+ TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
+ SDLoc DL(N);
+ return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
+ DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
+ }
+
+ if (SDValue V = combineBinOpToReduce(N, DAG))
+ return V;
// fold (xor (select cond, 0, y), x) ->
// (select cond, x, (xor x, y))
return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false);
}
-// Attempt to turn ANY_EXTEND into SIGN_EXTEND if the input to the ANY_EXTEND
-// has users that require SIGN_EXTEND and the SIGN_EXTEND can be done for free
-// by an instruction like ADDW/SUBW/MULW. Without this the ANY_EXTEND would be
-// removed during type legalization leaving an ADD/SUB/MUL use that won't use
-// ADDW/SUBW/MULW.
-static SDValue performANY_EXTENDCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
- const RISCVSubtarget &Subtarget) {
- if (!Subtarget.is64Bit())
- return SDValue();
-
- SelectionDAG &DAG = DCI.DAG;
-
+static SDValue
+performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
SDValue Src = N->getOperand(0);
EVT VT = N->getValueType(0);
- if (VT != MVT::i64 || Src.getValueType() != MVT::i32)
- return SDValue();
- // The opcode must be one that can implicitly sign_extend.
- // FIXME: Additional opcodes.
- switch (Src.getOpcode()) {
- default:
- return SDValue();
- case ISD::MUL:
- if (!Subtarget.hasStdExtM())
- return SDValue();
- LLVM_FALLTHROUGH;
- case ISD::ADD:
- case ISD::SUB:
- break;
+ // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
+ if (Src.getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
+ cast<VTSDNode>(N->getOperand(1))->getVT().bitsGE(MVT::i16))
+ return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT,
+ Src.getOperand(0));
+
+ // Fold (i64 (sext_inreg (abs X), i32)) ->
+ // (i64 (smax (sext_inreg (neg X), i32), X)) if X has more than 32 sign bits.
+ // The (sext_inreg (neg X), i32) will be selected to negw by isel. This
+ // pattern occurs after type legalization of (i32 (abs X)) on RV64 if the user
+ // of the (i32 (abs X)) is a sext or setcc or something else that causes type
+ // legalization to add a sext_inreg after the abs. The (i32 (abs X)) will have
+ // been type legalized to (i64 (abs (sext_inreg X, i32))), but the sext_inreg
+ // may get combined into an earlier operation so we need to use
+ // ComputeNumSignBits.
+ // NOTE: (i64 (sext_inreg (abs X), i32)) can also be created for
+ // (i64 (ashr (shl (abs X), 32), 32)) without any type legalization so
+ // we can't assume that X has 33 sign bits. We must check.
+ if (Subtarget.hasStdExtZbb() && Subtarget.is64Bit() &&
+ Src.getOpcode() == ISD::ABS && Src.hasOneUse() && VT == MVT::i64 &&
+ cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32 &&
+ DAG.ComputeNumSignBits(Src.getOperand(0)) > 32) {
+ SDLoc DL(N);
+ SDValue Freeze = DAG.getFreeze(Src.getOperand(0));
+ SDValue Neg =
+ DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, MVT::i64), Freeze);
+ Neg = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Neg,
+ DAG.getValueType(MVT::i32));
+ return DAG.getNode(ISD::SMAX, DL, MVT::i64, Freeze, Neg);
}
- // Only handle cases where the result is used by a CopyToReg. That likely
- // means the value is a liveout of the basic block. This helps prevent
- // infinite combine loops like PR51206.
- if (none_of(N->uses(),
- [](SDNode *User) { return User->getOpcode() == ISD::CopyToReg; }))
- return SDValue();
+ return SDValue();
+}
- SmallVector<SDNode *, 4> SetCCs;
- for (SDNode::use_iterator UI = Src.getNode()->use_begin(),
- UE = Src.getNode()->use_end();
- UI != UE; ++UI) {
- SDNode *User = *UI;
- if (User == N)
- continue;
- if (UI.getUse().getResNo() != Src.getResNo())
- continue;
- // All i32 setccs are legalized by sign extending operands.
- if (User->getOpcode() == ISD::SETCC) {
- SetCCs.push_back(User);
- continue;
- }
- // We don't know if we can extend this user.
- break;
+// Try to form vwadd(u).wv/wx or vwsub(u).wv/wx. It might later be optimized to
+// vwadd(u).vv/vx or vwsub(u).vv/vx.
+static SDValue combineADDSUB_VLToVWADDSUB_VL(SDNode *N, SelectionDAG &DAG,
+ bool Commute = false) {
+ assert((N->getOpcode() == RISCVISD::ADD_VL ||
+ N->getOpcode() == RISCVISD::SUB_VL) &&
+ "Unexpected opcode");
+ bool IsAdd = N->getOpcode() == RISCVISD::ADD_VL;
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ if (Commute)
+ std::swap(Op0, Op1);
+
+ MVT VT = N->getSimpleValueType(0);
+
+ // Determine the narrow size for a widening add/sub.
+ unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
+ MVT NarrowVT = MVT::getVectorVT(MVT::getIntegerVT(NarrowSize),
+ VT.getVectorElementCount());
+
+ SDValue Mask = N->getOperand(2);
+ SDValue VL = N->getOperand(3);
+
+ SDLoc DL(N);
+
+ // If the RHS is a sext or zext, we can form a widening op.
+ if ((Op1.getOpcode() == RISCVISD::VZEXT_VL ||
+ Op1.getOpcode() == RISCVISD::VSEXT_VL) &&
+ Op1.hasOneUse() && Op1.getOperand(1) == Mask && Op1.getOperand(2) == VL) {
+ unsigned ExtOpc = Op1.getOpcode();
+ Op1 = Op1.getOperand(0);
+ // Re-introduce narrower extends if needed.
+ if (Op1.getValueType() != NarrowVT)
+ Op1 = DAG.getNode(ExtOpc, DL, NarrowVT, Op1, Mask, VL);
+
+ unsigned WOpc;
+ if (ExtOpc == RISCVISD::VSEXT_VL)
+ WOpc = IsAdd ? RISCVISD::VWADD_W_VL : RISCVISD::VWSUB_W_VL;
+ else
+ WOpc = IsAdd ? RISCVISD::VWADDU_W_VL : RISCVISD::VWSUBU_W_VL;
+
+ return DAG.getNode(WOpc, DL, VT, Op0, Op1, Mask, VL);
}
- // If we don't have any SetCCs, this isn't worthwhile.
- if (SetCCs.empty())
- return SDValue();
+ // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
+ // sext/zext?
+
+ return SDValue();
+}
+
+// Try to convert vwadd(u).wv/wx or vwsub(u).wv/wx to vwadd(u).vv/vx or
+// vwsub(u).vv/vx.
+static SDValue combineVWADD_W_VL_VWSUB_W_VL(SDNode *N, SelectionDAG &DAG) {
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ SDValue Mask = N->getOperand(2);
+ SDValue VL = N->getOperand(3);
+
+ MVT VT = N->getSimpleValueType(0);
+ MVT NarrowVT = Op1.getSimpleValueType();
+ unsigned NarrowSize = NarrowVT.getScalarSizeInBits();
+
+ unsigned VOpc;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unexpected opcode");
+ case RISCVISD::VWADD_W_VL: VOpc = RISCVISD::VWADD_VL; break;
+ case RISCVISD::VWSUB_W_VL: VOpc = RISCVISD::VWSUB_VL; break;
+ case RISCVISD::VWADDU_W_VL: VOpc = RISCVISD::VWADDU_VL; break;
+ case RISCVISD::VWSUBU_W_VL: VOpc = RISCVISD::VWSUBU_VL; break;
+ }
+
+ bool IsSigned = N->getOpcode() == RISCVISD::VWADD_W_VL ||
+ N->getOpcode() == RISCVISD::VWSUB_W_VL;
SDLoc DL(N);
- SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Src);
- DCI.CombineTo(N, SExt);
- // Promote all the setccs.
- for (SDNode *SetCC : SetCCs) {
- SmallVector<SDValue, 4> Ops;
+ // If the LHS is a sext or zext, we can narrow this op to the same size as
+ // the RHS.
+ if (((Op0.getOpcode() == RISCVISD::VZEXT_VL && !IsSigned) ||
+ (Op0.getOpcode() == RISCVISD::VSEXT_VL && IsSigned)) &&
+ Op0.hasOneUse() && Op0.getOperand(1) == Mask && Op0.getOperand(2) == VL) {
+ unsigned ExtOpc = Op0.getOpcode();
+ Op0 = Op0.getOperand(0);
+ // Re-introduce narrower extends if needed.
+ if (Op0.getValueType() != NarrowVT)
+ Op0 = DAG.getNode(ExtOpc, DL, NarrowVT, Op0, Mask, VL);
+ return DAG.getNode(VOpc, DL, VT, Op0, Op1, Mask, VL);
+ }
- for (unsigned j = 0; j != 2; ++j) {
- SDValue SOp = SetCC->getOperand(j);
- if (SOp == Src)
- Ops.push_back(SExt);
- else
- Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, SOp));
+ bool IsAdd = N->getOpcode() == RISCVISD::VWADD_W_VL ||
+ N->getOpcode() == RISCVISD::VWADDU_W_VL;
+
+ // Look for splats on the left hand side of a vwadd(u).wv. We might be able
+ // to commute and use a vwadd(u).vx instead.
+ if (IsAdd && Op0.getOpcode() == RISCVISD::VMV_V_X_VL &&
+ Op0.getOperand(0).isUndef() && Op0.getOperand(2) == VL) {
+ Op0 = Op0.getOperand(1);
+
+ // See if have enough sign bits or zero bits in the scalar to use a
+ // widening add/sub by splatting to smaller element size.
+ unsigned EltBits = VT.getScalarSizeInBits();
+ unsigned ScalarBits = Op0.getValueSizeInBits();
+ // Make sure we're getting all element bits from the scalar register.
+ // FIXME: Support implicit sign extension of vmv.v.x?
+ if (ScalarBits < EltBits)
+ return SDValue();
+
+ if (IsSigned) {
+ if (DAG.ComputeNumSignBits(Op0) <= (ScalarBits - NarrowSize))
+ return SDValue();
+ } else {
+ APInt Mask = APInt::getBitsSetFrom(ScalarBits, NarrowSize);
+ if (!DAG.MaskedValueIsZero(Op0, Mask))
+ return SDValue();
}
- Ops.push_back(SetCC->getOperand(2));
- DCI.CombineTo(SetCC,
- DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
+ Op0 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
+ DAG.getUNDEF(NarrowVT), Op0, VL);
+ return DAG.getNode(VOpc, DL, VT, Op1, Op0, Mask, VL);
}
- return SDValue(N, 0);
+
+ return SDValue();
}
// Try to form VWMUL, VWMULU or VWMULSU.
@@ -7408,12 +8305,15 @@ static SDValue combineMUL_VLToVWMUL_VL(SDNode *N, SelectionDAG &DAG,
} else if (Op1.getOpcode() == RISCVISD::VMV_V_X_VL) {
// The operand is a splat of a scalar.
+ // The pasthru must be undef for tail agnostic
+ if (!Op1.getOperand(0).isUndef())
+ return SDValue();
// The VL must be the same.
- if (Op1.getOperand(1) != VL)
+ if (Op1.getOperand(2) != VL)
return SDValue();
// Get the scalar value.
- Op1 = Op1.getOperand(0);
+ Op1 = Op1.getOperand(1);
// See if have enough sign bits or zero bits in the scalar to use a
// widening multiply by splatting to smaller element size.
@@ -7424,16 +8324,20 @@ static SDValue combineMUL_VLToVWMUL_VL(SDNode *N, SelectionDAG &DAG,
if (ScalarBits < EltBits)
return SDValue();
- if (IsSignExt) {
- if (DAG.ComputeNumSignBits(Op1) <= (ScalarBits - NarrowSize))
- return SDValue();
+ // If the LHS is a sign extend, try to use vwmul.
+ if (IsSignExt && DAG.ComputeNumSignBits(Op1) > (ScalarBits - NarrowSize)) {
+ // Can use vwmul.
} else {
+ // Otherwise try to use vwmulu or vwmulsu.
APInt Mask = APInt::getBitsSetFrom(ScalarBits, NarrowSize);
- if (!DAG.MaskedValueIsZero(Op1, Mask))
+ if (DAG.MaskedValueIsZero(Op1, Mask))
+ IsVWMULSU = IsSignExt;
+ else
return SDValue();
}
- Op1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT, Op1, VL);
+ Op1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
+ DAG.getUNDEF(NarrowVT), Op1, VL);
} else
return SDValue();
@@ -7443,6 +8347,8 @@ static SDValue combineMUL_VLToVWMUL_VL(SDNode *N, SelectionDAG &DAG,
unsigned ExtOpc = IsSignExt ? RISCVISD::VSEXT_VL : RISCVISD::VZEXT_VL;
if (Op0.getValueType() != NarrowVT)
Op0 = DAG.getNode(ExtOpc, DL, NarrowVT, Op0, Mask, VL);
+ // vwmulsu requires second operand to be zero extended.
+ ExtOpc = IsVWMULSU ? RISCVISD::VZEXT_VL : ExtOpc;
if (Op1.getValueType() != NarrowVT)
Op1 = DAG.getNode(ExtOpc, DL, NarrowVT, Op1, Mask, VL);
@@ -7569,6 +8475,133 @@ static SDValue performFP_TO_INT_SATCombine(SDNode *N,
return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
}
+// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
+// smaller than XLenVT.
+static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
+
+ SDValue Src = N->getOperand(0);
+ if (Src.getOpcode() != ISD::BSWAP)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
+ !isPowerOf2_32(VT.getSizeInBits()))
+ return SDValue();
+
+ SDLoc DL(N);
+ return DAG.getNode(RISCVISD::GREV, DL, VT, Src.getOperand(0),
+ DAG.getConstant(7, DL, VT));
+}
+
+// Convert from one FMA opcode to another based on whether we are negating the
+// multiply result and/or the accumulator.
+// NOTE: Only supports RVV operations with VL.
+static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
+ assert((NegMul || NegAcc) && "Not negating anything?");
+
+ // Negating the multiply result changes ADD<->SUB and toggles 'N'.
+ if (NegMul) {
+ // clang-format off
+ switch (Opcode) {
+ default: llvm_unreachable("Unexpected opcode");
+ case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
+ case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
+ case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
+ case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
+ }
+ // clang-format on
+ }
+
+ // Negating the accumulator changes ADD<->SUB.
+ if (NegAcc) {
+ // clang-format off
+ switch (Opcode) {
+ default: llvm_unreachable("Unexpected opcode");
+ case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
+ case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
+ case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
+ case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
+ }
+ // clang-format on
+ }
+
+ return Opcode;
+}
+
+// Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
+// FIXME: Should this be a generic combine? There's a similar combine on X86.
+//
+// Also try these folds where an add or sub is in the middle.
+// (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
+// (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
+static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
+
+ if (N->getValueType(0) != MVT::i64 || !Subtarget.is64Bit())
+ return SDValue();
+
+ auto *ShAmtC = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!ShAmtC || ShAmtC->getZExtValue() > 32)
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+
+ SDValue Shl;
+ ConstantSDNode *AddC = nullptr;
+
+ // We might have an ADD or SUB between the SRA and SHL.
+ bool IsAdd = N0.getOpcode() == ISD::ADD;
+ if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
+ if (!N0.hasOneUse())
+ return SDValue();
+ // Other operand needs to be a constant we can modify.
+ AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
+ if (!AddC)
+ return SDValue();
+
+ // AddC needs to have at least 32 trailing zeros.
+ if (AddC->getAPIntValue().countTrailingZeros() < 32)
+ return SDValue();
+
+ Shl = N0.getOperand(IsAdd ? 0 : 1);
+ } else {
+ // Not an ADD or SUB.
+ Shl = N0;
+ }
+
+ // Look for a shift left by 32.
+ if (Shl.getOpcode() != ISD::SHL || !Shl.hasOneUse() ||
+ !isa<ConstantSDNode>(Shl.getOperand(1)) ||
+ Shl.getConstantOperandVal(1) != 32)
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue In = Shl.getOperand(0);
+
+ // If we looked through an ADD or SUB, we need to rebuild it with the shifted
+ // constant.
+ if (AddC) {
+ SDValue ShiftedAddC =
+ DAG.getConstant(AddC->getAPIntValue().lshr(32), DL, MVT::i64);
+ if (IsAdd)
+ In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
+ else
+ In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
+ }
+
+ SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
+ DAG.getValueType(MVT::i32));
+ if (ShAmtC->getZExtValue() == 32)
+ return SExt;
+
+ return DAG.getNode(
+ ISD::SHL, DL, MVT::i64, SExt,
+ DAG.getConstant(32 - ShAmtC->getZExtValue(), DL, MVT::i64));
+}
+
SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -7597,6 +8630,12 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
if (Op0->getOpcode() == RISCVISD::BuildPairF64)
return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
+ if (Op0->isUndef()) {
+ SDValue Lo = DAG.getUNDEF(MVT::i32);
+ SDValue Hi = DAG.getUNDEF(MVT::i32);
+ return DCI.CombineTo(N, Lo, Hi);
+ }
+
SDLoc DL(N);
// It's cheaper to materialise two 32-bit integers than to load a double
@@ -7634,15 +8673,27 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
}
case RISCVISD::SLLW:
case RISCVISD::SRAW:
- case RISCVISD::SRLW:
- case RISCVISD::ROLW:
- case RISCVISD::RORW: {
+ case RISCVISD::SRLW: {
// Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
if (SimplifyDemandedLowBitsHelper(0, 32) ||
SimplifyDemandedLowBitsHelper(1, 5))
return SDValue(N, 0);
+
break;
}
+ case ISD::ROTR:
+ case ISD::ROTL:
+ case RISCVISD::RORW:
+ case RISCVISD::ROLW: {
+ if (N->getOpcode() == RISCVISD::RORW || N->getOpcode() == RISCVISD::ROLW) {
+ // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
+ if (SimplifyDemandedLowBitsHelper(0, 32) ||
+ SimplifyDemandedLowBitsHelper(1, 5))
+ return SDValue(N, 0);
+ }
+
+ return combineROTR_ROTL_RORW_ROLW(N, DAG, Subtarget);
+ }
case RISCVISD::CLZW:
case RISCVISD::CTZW: {
// Only the lower 32 bits of the first operand are read
@@ -7667,7 +8718,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
SimplifyDemandedLowBitsHelper(1, 5))
return SDValue(N, 0);
- return combineGREVI_GORCI(N, DAG);
+ break;
}
case RISCVISD::SHFL:
case RISCVISD::UNSHFL: {
@@ -7682,10 +8733,6 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
case RISCVISD::SHFLW:
case RISCVISD::UNSHFLW: {
// Only the lower 32 bits of LHS and lower 4 bits of RHS are read.
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
- APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
- APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 4);
if (SimplifyDemandedLowBitsHelper(0, 32) ||
SimplifyDemandedLowBitsHelper(1, 4))
return SDValue(N, 0);
@@ -7701,6 +8748,21 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
break;
}
+ case RISCVISD::FSR:
+ case RISCVISD::FSL:
+ case RISCVISD::FSRW:
+ case RISCVISD::FSLW: {
+ bool IsWInstruction =
+ N->getOpcode() == RISCVISD::FSRW || N->getOpcode() == RISCVISD::FSLW;
+ unsigned BitWidth =
+ IsWInstruction ? 32 : N->getSimpleValueType(0).getSizeInBits();
+ assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
+ // Only the lower log2(Bitwidth)+1 bits of the the shift amount are read.
+ if (SimplifyDemandedLowBitsHelper(1, Log2_32(BitWidth) + 1))
+ return SDValue(N, 0);
+
+ break;
+ }
case RISCVISD::FMV_X_ANYEXTH:
case RISCVISD::FMV_X_ANYEXTW_RV64: {
SDLoc DL(N);
@@ -7727,7 +8789,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
break;
SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
- APInt SignBit = APInt::getSignMask(FPBits).sextOrSelf(VT.getSizeInBits());
+ APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
if (Op0.getOpcode() == ISD::FNEG)
return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
DAG.getConstant(SignBit, DL, VT));
@@ -7741,13 +8803,21 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SUB:
return performSUBCombine(N, DAG);
case ISD::AND:
- return performANDCombine(N, DAG);
+ return performANDCombine(N, DAG, Subtarget);
case ISD::OR:
return performORCombine(N, DAG, Subtarget);
case ISD::XOR:
return performXORCombine(N, DAG);
- case ISD::ANY_EXTEND:
- return performANY_EXTENDCombine(N, DCI, Subtarget);
+ case ISD::FADD:
+ case ISD::UMAX:
+ case ISD::UMIN:
+ case ISD::SMAX:
+ case ISD::SMIN:
+ case ISD::FMAXNUM:
+ case ISD::FMINNUM:
+ return combineBinOpToReduce(N, DAG);
+ case ISD::SIGN_EXTEND_INREG:
+ return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
case ISD::ZERO_EXTEND:
// Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
// type legalization. This is safe because fp_to_uint produces poison if
@@ -7879,6 +8949,8 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
}
break;
}
+ case ISD::BITREVERSE:
+ return performBITREVERSECombine(N, DAG, Subtarget);
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
return performFP_TO_INTCombine(N, DCI, Subtarget);
@@ -7952,40 +9024,41 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
DL, IndexVT, Index);
}
- unsigned Scale = cast<ConstantSDNode>(ScaleOp)->getZExtValue();
- if (IsIndexScaled && Scale != 1) {
- // Manually scale the indices by the element size.
+ if (IsIndexScaled) {
+ // Manually scale the indices.
// TODO: Sanitize the scale operand here?
// TODO: For VP nodes, should we use VP_SHL here?
+ unsigned Scale = cast<ConstantSDNode>(ScaleOp)->getZExtValue();
assert(isPowerOf2_32(Scale) && "Expecting power-of-two types");
SDValue SplatScale = DAG.getConstant(Log2_32(Scale), DL, IndexVT);
Index = DAG.getNode(ISD::SHL, DL, IndexVT, Index, SplatScale);
+ ScaleOp = DAG.getTargetConstant(1, DL, ScaleOp.getValueType());
}
- ISD::MemIndexType NewIndexTy = ISD::UNSIGNED_UNSCALED;
+ ISD::MemIndexType NewIndexTy = ISD::UNSIGNED_SCALED;
if (const auto *VPGN = dyn_cast<VPGatherSDNode>(N))
return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
{VPGN->getChain(), VPGN->getBasePtr(), Index,
- VPGN->getScale(), VPGN->getMask(),
+ ScaleOp, VPGN->getMask(),
VPGN->getVectorLength()},
VPGN->getMemOperand(), NewIndexTy);
if (const auto *VPSN = dyn_cast<VPScatterSDNode>(N))
return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
{VPSN->getChain(), VPSN->getValue(),
- VPSN->getBasePtr(), Index, VPSN->getScale(),
+ VPSN->getBasePtr(), Index, ScaleOp,
VPSN->getMask(), VPSN->getVectorLength()},
VPSN->getMemOperand(), NewIndexTy);
if (const auto *MGN = dyn_cast<MaskedGatherSDNode>(N))
return DAG.getMaskedGather(
N->getVTList(), MGN->getMemoryVT(), DL,
{MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
- MGN->getBasePtr(), Index, MGN->getScale()},
+ MGN->getBasePtr(), Index, ScaleOp},
MGN->getMemOperand(), NewIndexTy, MGN->getExtensionType());
const auto *MSN = cast<MaskedScatterSDNode>(N);
return DAG.getMaskedScatter(
N->getVTList(), MSN->getMemoryVT(), DL,
{MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
- Index, MSN->getScale()},
+ Index, ScaleOp},
MSN->getMemOperand(), NewIndexTy, MSN->isTruncatingStore());
}
case RISCVISD::SRA_VL:
@@ -7997,14 +9070,17 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
SDLoc DL(N);
SDValue VL = N->getOperand(3);
EVT VT = N->getValueType(0);
- ShAmt =
- DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, ShAmt.getOperand(0), VL);
+ ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
+ ShAmt.getOperand(1), VL);
return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
N->getOperand(2), N->getOperand(3));
}
break;
}
case ISD::SRA:
+ if (SDValue V = performSRACombine(N, DAG, Subtarget))
+ return V;
+ LLVM_FALLTHROUGH;
case ISD::SRL:
case ISD::SHL: {
SDValue ShAmt = N->getOperand(1);
@@ -8012,17 +9088,63 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
// We don't need the upper 32 bits of a 64-bit element for a shift amount.
SDLoc DL(N);
EVT VT = N->getValueType(0);
- ShAmt =
- DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VT, ShAmt.getOperand(0));
+ ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
+ ShAmt.getOperand(1),
+ DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
}
break;
}
+ case RISCVISD::ADD_VL:
+ if (SDValue V = combineADDSUB_VLToVWADDSUB_VL(N, DAG, /*Commute*/ false))
+ return V;
+ return combineADDSUB_VLToVWADDSUB_VL(N, DAG, /*Commute*/ true);
+ case RISCVISD::SUB_VL:
+ return combineADDSUB_VLToVWADDSUB_VL(N, DAG);
+ case RISCVISD::VWADD_W_VL:
+ case RISCVISD::VWADDU_W_VL:
+ case RISCVISD::VWSUB_W_VL:
+ case RISCVISD::VWSUBU_W_VL:
+ return combineVWADD_W_VL_VWSUB_W_VL(N, DAG);
case RISCVISD::MUL_VL:
if (SDValue V = combineMUL_VLToVWMUL_VL(N, DAG, /*Commute*/ false))
return V;
// Mul is commutative.
return combineMUL_VLToVWMUL_VL(N, DAG, /*Commute*/ true);
+ case RISCVISD::VFMADD_VL:
+ case RISCVISD::VFNMADD_VL:
+ case RISCVISD::VFMSUB_VL:
+ case RISCVISD::VFNMSUB_VL: {
+ // Fold FNEG_VL into FMA opcodes.
+ SDValue A = N->getOperand(0);
+ SDValue B = N->getOperand(1);
+ SDValue C = N->getOperand(2);
+ SDValue Mask = N->getOperand(3);
+ SDValue VL = N->getOperand(4);
+
+ auto invertIfNegative = [&Mask, &VL](SDValue &V) {
+ if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
+ V.getOperand(2) == VL) {
+ // Return the negated input.
+ V = V.getOperand(0);
+ return true;
+ }
+
+ return false;
+ };
+
+ bool NegA = invertIfNegative(A);
+ bool NegB = invertIfNegative(B);
+ bool NegC = invertIfNegative(C);
+
+ // If no operands are negated, we're done.
+ if (!NegA && !NegB && !NegC)
+ return SDValue();
+
+ unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
+ return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
+ VL);
+ }
case ISD::STORE: {
auto *Store = cast<StoreSDNode>(N);
SDValue Val = Store->getValue();
@@ -8035,7 +9157,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
// The memory VT and the element type must match.
if (VecVT.getVectorElementType() == MemVT) {
SDLoc DL(N);
- MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount());
+ MVT MaskVT = getMaskTypeFor(VecVT);
return DAG.getStoreVP(
Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
DAG.getConstant(1, DL, MaskVT),
@@ -8047,6 +9169,73 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
break;
}
+ case ISD::SPLAT_VECTOR: {
+ EVT VT = N->getValueType(0);
+ // Only perform this combine on legal MVT types.
+ if (!isTypeLegal(VT))
+ break;
+ if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
+ DAG, Subtarget))
+ return Gather;
+ break;
+ }
+ case RISCVISD::VMV_V_X_VL: {
+ // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
+ // scalar input.
+ unsigned ScalarSize = N->getOperand(1).getValueSizeInBits();
+ unsigned EltWidth = N->getValueType(0).getScalarSizeInBits();
+ if (ScalarSize > EltWidth && N->getOperand(0).isUndef())
+ if (SimplifyDemandedLowBitsHelper(1, EltWidth))
+ return SDValue(N, 0);
+
+ break;
+ }
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntNo = N->getConstantOperandVal(0);
+ switch (IntNo) {
+ // By default we do not combine any intrinsic.
+ default:
+ return SDValue();
+ case Intrinsic::riscv_vcpop:
+ case Intrinsic::riscv_vcpop_mask:
+ case Intrinsic::riscv_vfirst:
+ case Intrinsic::riscv_vfirst_mask: {
+ SDValue VL = N->getOperand(2);
+ if (IntNo == Intrinsic::riscv_vcpop_mask ||
+ IntNo == Intrinsic::riscv_vfirst_mask)
+ VL = N->getOperand(3);
+ if (!isNullConstant(VL))
+ return SDValue();
+ // If VL is 0, vcpop -> li 0, vfirst -> li -1.
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ if (IntNo == Intrinsic::riscv_vfirst ||
+ IntNo == Intrinsic::riscv_vfirst_mask)
+ return DAG.getConstant(-1, DL, VT);
+ return DAG.getConstant(0, DL, VT);
+ }
+ }
+ }
+ case ISD::BITCAST: {
+ assert(Subtarget.useRVVForFixedLengthVectors());
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ EVT SrcVT = N0.getValueType();
+ // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
+ // type, widen both sides to avoid a trip through memory.
+ if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
+ VT.isScalarInteger()) {
+ unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
+ SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
+ Ops[0] = N0;
+ SDLoc DL(N);
+ N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
+ N0 = DAG.getBitcast(MVT::i8, N0);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
+ }
+
+ return SDValue();
+ }
}
return SDValue();
@@ -8182,22 +9371,23 @@ bool RISCVTargetLowering::targetShrinkDemandedConstant(
return UseMask(NewMask);
}
-static void computeGREV(APInt &Src, unsigned ShAmt) {
- ShAmt &= Src.getBitWidth() - 1;
- uint64_t x = Src.getZExtValue();
- if (ShAmt & 1)
- x = ((x & 0x5555555555555555LL) << 1) | ((x & 0xAAAAAAAAAAAAAAAALL) >> 1);
- if (ShAmt & 2)
- x = ((x & 0x3333333333333333LL) << 2) | ((x & 0xCCCCCCCCCCCCCCCCLL) >> 2);
- if (ShAmt & 4)
- x = ((x & 0x0F0F0F0F0F0F0F0FLL) << 4) | ((x & 0xF0F0F0F0F0F0F0F0LL) >> 4);
- if (ShAmt & 8)
- x = ((x & 0x00FF00FF00FF00FFLL) << 8) | ((x & 0xFF00FF00FF00FF00LL) >> 8);
- if (ShAmt & 16)
- x = ((x & 0x0000FFFF0000FFFFLL) << 16) | ((x & 0xFFFF0000FFFF0000LL) >> 16);
- if (ShAmt & 32)
- x = ((x & 0x00000000FFFFFFFFLL) << 32) | ((x & 0xFFFFFFFF00000000LL) >> 32);
- Src = x;
+static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
+ static const uint64_t GREVMasks[] = {
+ 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
+ 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
+
+ for (unsigned Stage = 0; Stage != 6; ++Stage) {
+ unsigned Shift = 1 << Stage;
+ if (ShAmt & Shift) {
+ uint64_t Mask = GREVMasks[Stage];
+ uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
+ if (IsGORC)
+ Res |= x;
+ x = Res;
+ }
+ }
+
+ return x;
}
void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
@@ -8263,28 +9453,28 @@ void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
break;
}
case RISCVISD::GREV:
- case RISCVISD::GREVW: {
+ case RISCVISD::GORC: {
if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
- if (Opc == RISCVISD::GREVW)
- Known = Known.trunc(32);
- unsigned ShAmt = C->getZExtValue();
- computeGREV(Known.Zero, ShAmt);
- computeGREV(Known.One, ShAmt);
- if (Opc == RISCVISD::GREVW)
- Known = Known.sext(BitWidth);
+ unsigned ShAmt = C->getZExtValue() & (Known.getBitWidth() - 1);
+ bool IsGORC = Op.getOpcode() == RISCVISD::GORC;
+ // To compute zeros, we need to invert the value and invert it back after.
+ Known.Zero =
+ ~computeGREVOrGORC(~Known.Zero.getZExtValue(), ShAmt, IsGORC);
+ Known.One = computeGREVOrGORC(Known.One.getZExtValue(), ShAmt, IsGORC);
}
break;
}
case RISCVISD::READ_VLENB: {
- // If we know the minimum VLen from Zvl extensions, we can use that to
- // determine the trailing zeros of VLENB.
- // FIXME: Limit to 128 bit vectors until we have more testing.
- unsigned MinVLenB = std::min(128U, Subtarget.getMinVLen()) / 8;
- if (MinVLenB > 0)
- Known.Zero.setLowBits(Log2_32(MinVLenB));
- // We assume VLENB is no more than 65536 / 8 bytes.
- Known.Zero.setBitsFrom(14);
+ // We can use the minimum and maximum VLEN values to bound VLENB. We
+ // know VLEN must be a power of two.
+ const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
+ const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
+ assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
+ Known.Zero.setLowBits(Log2_32(MinVLenB));
+ Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
+ if (MaxVLenB == MinVLenB)
+ Known.One.setBit(Log2_32(MinVLenB));
break;
}
case ISD::INTRINSIC_W_CHAIN:
@@ -8381,6 +9571,51 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
return 1;
}
+const Constant *
+RISCVTargetLowering::getTargetConstantFromLoad(LoadSDNode *Ld) const {
+ assert(Ld && "Unexpected null LoadSDNode");
+ if (!ISD::isNormalLoad(Ld))
+ return nullptr;
+
+ SDValue Ptr = Ld->getBasePtr();
+
+ // Only constant pools with no offset are supported.
+ auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
+ auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
+ if (!CNode || CNode->isMachineConstantPoolEntry() ||
+ CNode->getOffset() != 0)
+ return nullptr;
+
+ return CNode;
+ };
+
+ // Simple case, LLA.
+ if (Ptr.getOpcode() == RISCVISD::LLA) {
+ auto *CNode = GetSupportedConstantPool(Ptr);
+ if (!CNode || CNode->getTargetFlags() != 0)
+ return nullptr;
+
+ return CNode->getConstVal();
+ }
+
+ // Look for a HI and ADD_LO pair.
+ if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
+ Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
+ return nullptr;
+
+ auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
+ auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
+
+ if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
+ !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
+ return nullptr;
+
+ if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
+ return nullptr;
+
+ return CNodeLo->getConstVal();
+}
+
static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI,
MachineBasicBlock *BB) {
assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction");
@@ -8559,6 +9794,109 @@ static MachineBasicBlock *emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB,
return BB;
}
+static MachineBasicBlock *
+EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second,
+ MachineBasicBlock *ThisMBB,
+ const RISCVSubtarget &Subtarget) {
+ // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
+ // Without this, custom-inserter would have generated:
+ //
+ // A
+ // | \
+ // | B
+ // | /
+ // C
+ // | \
+ // | D
+ // | /
+ // E
+ //
+ // A: X = ...; Y = ...
+ // B: empty
+ // C: Z = PHI [X, A], [Y, B]
+ // D: empty
+ // E: PHI [X, C], [Z, D]
+ //
+ // If we lower both Select_FPRX_ in a single step, we can instead generate:
+ //
+ // A
+ // | \
+ // | C
+ // | /|
+ // |/ |
+ // | |
+ // | D
+ // | /
+ // E
+ //
+ // A: X = ...; Y = ...
+ // D: empty
+ // E: PHI [X, A], [X, C], [Y, D]
+
+ const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
+ const DebugLoc &DL = First.getDebugLoc();
+ const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
+ MachineFunction *F = ThisMBB->getParent();
+ MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineFunction::iterator It = ++ThisMBB->getIterator();
+ F->insert(It, FirstMBB);
+ F->insert(It, SecondMBB);
+ F->insert(It, SinkMBB);
+
+ // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
+ SinkMBB->splice(SinkMBB->begin(), ThisMBB,
+ std::next(MachineBasicBlock::iterator(First)),
+ ThisMBB->end());
+ SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
+
+ // Fallthrough block for ThisMBB.
+ ThisMBB->addSuccessor(FirstMBB);
+ // Fallthrough block for FirstMBB.
+ FirstMBB->addSuccessor(SecondMBB);
+ ThisMBB->addSuccessor(SinkMBB);
+ FirstMBB->addSuccessor(SinkMBB);
+ // This is fallthrough.
+ SecondMBB->addSuccessor(SinkMBB);
+
+ auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
+ Register FLHS = First.getOperand(1).getReg();
+ Register FRHS = First.getOperand(2).getReg();
+ // Insert appropriate branch.
+ BuildMI(ThisMBB, DL, TII.getBrCond(FirstCC))
+ .addReg(FLHS)
+ .addReg(FRHS)
+ .addMBB(SinkMBB);
+
+ Register SLHS = Second.getOperand(1).getReg();
+ Register SRHS = Second.getOperand(2).getReg();
+ Register Op1Reg4 = First.getOperand(4).getReg();
+ Register Op1Reg5 = First.getOperand(5).getReg();
+
+ auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
+ // Insert appropriate branch.
+ BuildMI(FirstMBB, DL, TII.getBrCond(SecondCC))
+ .addReg(SLHS)
+ .addReg(SRHS)
+ .addMBB(SinkMBB);
+
+ Register DestReg = Second.getOperand(0).getReg();
+ Register Op2Reg4 = Second.getOperand(4).getReg();
+ BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
+ .addReg(Op1Reg4)
+ .addMBB(ThisMBB)
+ .addReg(Op2Reg4)
+ .addMBB(FirstMBB)
+ .addReg(Op1Reg5)
+ .addMBB(SecondMBB);
+
+ // Now remove the Select_FPRX_s.
+ First.eraseFromParent();
+ Second.eraseFromParent();
+ return SinkMBB;
+}
+
static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
MachineBasicBlock *BB,
const RISCVSubtarget &Subtarget) {
@@ -8586,6 +9924,10 @@ static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
// previous selects in the sequence.
// These conditions could be further relaxed. See the X86 target for a
// related approach and more information.
+ //
+ // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
+ // is checked here and handled by a separate function -
+ // EmitLoweredCascadedSelect.
Register LHS = MI.getOperand(1).getReg();
Register RHS = MI.getOperand(2).getReg();
auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
@@ -8595,12 +9937,19 @@ static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
SelectDests.insert(MI.getOperand(0).getReg());
MachineInstr *LastSelectPseudo = &MI;
+ auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
+ if (MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR && Next != BB->end() &&
+ Next->getOpcode() == MI.getOpcode() &&
+ Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
+ Next->getOperand(5).isKill()) {
+ return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
+ }
for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
SequenceMBBI != E; ++SequenceMBBI) {
if (SequenceMBBI->isDebugInstr())
continue;
- else if (isSelectPseudo(*SequenceMBBI)) {
+ if (isSelectPseudo(*SequenceMBBI)) {
if (SequenceMBBI->getOperand(1).getReg() != LHS ||
SequenceMBBI->getOperand(2).getReg() != RHS ||
SequenceMBBI->getOperand(3).getImm() != CC ||
@@ -8831,7 +10180,7 @@ static unsigned allocateRVVReg(MVT ValVT, unsigned ValNo,
// Assign the first mask argument to V0.
// This is an interim calling convention and it may be changed in the
// future.
- if (FirstMaskArgument.hasValue() && ValNo == FirstMaskArgument.getValue())
+ if (FirstMaskArgument && ValNo == *FirstMaskArgument)
return State.AllocateReg(RISCV::V0);
return State.AllocateReg(ArgVRs);
}
@@ -10112,6 +11461,13 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(BuildPairF64)
NODE_NAME_CASE(SplitF64)
NODE_NAME_CASE(TAIL)
+ NODE_NAME_CASE(ADD_LO)
+ NODE_NAME_CASE(HI)
+ NODE_NAME_CASE(LLA)
+ NODE_NAME_CASE(ADD_TPREL)
+ NODE_NAME_CASE(LA)
+ NODE_NAME_CASE(LA_TLS_IE)
+ NODE_NAME_CASE(LA_TLS_GD)
NODE_NAME_CASE(MULHSU)
NODE_NAME_CASE(SLLW)
NODE_NAME_CASE(SRAW)
@@ -10129,6 +11485,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(FSR)
NODE_NAME_CASE(FMV_H_X)
NODE_NAME_CASE(FMV_X_ANYEXTH)
+ NODE_NAME_CASE(FMV_X_SIGNEXTH)
NODE_NAME_CASE(FMV_W_X_RV64)
NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
NODE_NAME_CASE(FCVT_X)
@@ -10157,7 +11514,6 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VMV_X_S)
NODE_NAME_CASE(VMV_S_X_VL)
NODE_NAME_CASE(VFMV_S_F_VL)
- NODE_NAME_CASE(SPLAT_VECTOR_I64)
NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
NODE_NAME_CASE(READ_VLENB)
NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
@@ -10203,7 +11559,10 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(FNEG_VL)
NODE_NAME_CASE(FABS_VL)
NODE_NAME_CASE(FSQRT_VL)
- NODE_NAME_CASE(FMA_VL)
+ NODE_NAME_CASE(VFMADD_VL)
+ NODE_NAME_CASE(VFNMADD_VL)
+ NODE_NAME_CASE(VFMSUB_VL)
+ NODE_NAME_CASE(VFNMSUB_VL)
NODE_NAME_CASE(FCOPYSIGN_VL)
NODE_NAME_CASE(SMIN_VL)
NODE_NAME_CASE(SMAX_VL)
@@ -10222,7 +11581,14 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VWMUL_VL)
NODE_NAME_CASE(VWMULU_VL)
NODE_NAME_CASE(VWMULSU_VL)
+ NODE_NAME_CASE(VWADD_VL)
NODE_NAME_CASE(VWADDU_VL)
+ NODE_NAME_CASE(VWSUB_VL)
+ NODE_NAME_CASE(VWSUBU_VL)
+ NODE_NAME_CASE(VWADD_W_VL)
+ NODE_NAME_CASE(VWADDU_W_VL)
+ NODE_NAME_CASE(VWSUB_W_VL)
+ NODE_NAME_CASE(VWSUBU_W_VL)
NODE_NAME_CASE(SETCC_VL)
NODE_NAME_CASE(VSELECT_VL)
NODE_NAME_CASE(VP_MERGE_VL)
@@ -10237,8 +11603,6 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VSEXT_VL)
NODE_NAME_CASE(VZEXT_VL)
NODE_NAME_CASE(VCPOP_VL)
- NODE_NAME_CASE(VLE_VL)
- NODE_NAME_CASE(VSE_VL)
NODE_NAME_CASE(READ_CSR)
NODE_NAME_CASE(WRITE_CSR)
NODE_NAME_CASE(SWAP_CSR)
@@ -10459,7 +11823,18 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
}
}
- return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
+ std::pair<Register, const TargetRegisterClass *> Res =
+ TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
+
+ // If we picked one of the Zfinx register classes, remap it to the GPR class.
+ // FIXME: When Zfinx is supported in CodeGen this will need to take the
+ // Subtarget into account.
+ if (Res.second == &RISCV::GPRF16RegClass ||
+ Res.second == &RISCV::GPRF32RegClass ||
+ Res.second == &RISCV::GPRF64RegClass)
+ return std::make_pair(Res.first, &RISCV::GPRRegClass);
+
+ return Res;
}
unsigned
@@ -10681,7 +12056,8 @@ Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
return Result;
}
-bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const {
+bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(EVT IndexVT,
+ EVT DataVT) const {
return false;
}
@@ -10797,7 +12173,7 @@ bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
APInt ImmS = Imm.ashr(Imm.countTrailingZeros());
if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
(1 - ImmS).isPowerOf2())
- return true;
+ return true;
}
}
}
@@ -10805,8 +12181,8 @@ bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
return false;
}
-bool RISCVTargetLowering::isMulAddWithConstProfitable(
- const SDValue &AddNode, const SDValue &ConstNode) const {
+bool RISCVTargetLowering::isMulAddWithConstProfitable(SDValue AddNode,
+ SDValue ConstNode) const {
// Let the DAGCombiner decide for vectors.
EVT VT = AddNode.getValueType();
if (VT.isVector())
@@ -10831,9 +12207,13 @@ bool RISCVTargetLowering::isMulAddWithConstProfitable(
bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
bool *Fast) const {
- if (!VT.isVector())
- return false;
+ if (!VT.isVector()) {
+ if (Fast)
+ *Fast = false;
+ return Subtarget.enableUnalignedScalarMem();
+ }
+ // All vector implementations must support element alignment
EVT ElemVT = VT.getVectorElementType();
if (Alignment >= ElemVT.getStoreSize()) {
if (Fast)
@@ -10847,7 +12227,7 @@ bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
bool RISCVTargetLowering::splitValueIntoRegisterParts(
SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const {
- bool IsABIRegCopy = CC.hasValue();
+ bool IsABIRegCopy = CC.has_value();
EVT ValueVT = Val.getValueType();
if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) {
// Cast the f16 to i16, extend to i32, pad with ones to make a float nan,
@@ -10901,7 +12281,7 @@ bool RISCVTargetLowering::splitValueIntoRegisterParts(
SDValue RISCVTargetLowering::joinRegisterPartsIntoValue(
SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const {
- bool IsABIRegCopy = CC.hasValue();
+ bool IsABIRegCopy = CC.has_value();
if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) {
SDValue Val = Parts[0];
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 840a821870a7..eb013d4b6682 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -41,6 +41,21 @@ enum NodeType : unsigned {
BuildPairF64,
SplitF64,
TAIL,
+
+ // Add the Lo 12 bits from an address. Selected to ADDI.
+ ADD_LO,
+ // Get the Hi 20 bits from an address. Selected to LUI.
+ HI,
+
+ // Represents an AUIPC+ADDI pair. Selected to PseudoLLA.
+ LLA,
+
+ // Selected as PseudoAddTPRel. Used to emit a TP-relative relocation.
+ ADD_TPREL,
+
+ // Load address.
+ LA_TLS_GD,
+
// Multiply high for signedxunsigned.
MULHSU,
// RV64I shifts, directly matching the semantics of the named RISC-V
@@ -75,6 +90,7 @@ enum NodeType : unsigned {
//
// FMV_H_X matches the semantics of the FMV.H.X.
// FMV_X_ANYEXTH is similar to FMV.X.H but has an any-extended result.
+ // FMV_X_SIGNEXTH is similar to FMV.X.H and has a sign-extended result.
// FMV_W_X_RV64 matches the semantics of the FMV.W.X.
// FMV_X_ANYEXTW_RV64 is similar to FMV.X.W but has an any-extended result.
//
@@ -82,6 +98,7 @@ enum NodeType : unsigned {
// unnecessary GPR->FPR->GPR moves.
FMV_H_X,
FMV_X_ANYEXTH,
+ FMV_X_SIGNEXTH,
FMV_W_X_RV64,
FMV_X_ANYEXTW_RV64,
// FP to XLen int conversions. Corresponds to fcvt.l(u).s/d/h on RV64 and
@@ -129,10 +146,12 @@ enum NodeType : unsigned {
BFPW,
// Vector Extension
// VMV_V_X_VL matches the semantics of vmv.v.x but includes an extra operand
- // for the VL value to be used for the operation.
+ // for the VL value to be used for the operation. The first operand is
+ // passthru operand.
VMV_V_X_VL,
// VFMV_V_F_VL matches the semantics of vfmv.v.f but includes an extra operand
- // for the VL value to be used for the operation.
+ // for the VL value to be used for the operation. The first operand is
+ // passthru operand.
VFMV_V_F_VL,
// VMV_X_S matches the semantics of vmv.x.s. The result is always XLenVT sign
// extended from the vector element size.
@@ -141,11 +160,9 @@ enum NodeType : unsigned {
VMV_S_X_VL,
// VFMV_S_F_VL matches the semantics of vfmv.s.f. It carries a VL operand.
VFMV_S_F_VL,
- // Splats an i64 scalar to a vector type (with element type i64) where the
- // scalar is a sign-extended i32.
- SPLAT_VECTOR_I64,
// Splats an 64-bit value that has been split into two i32 parts. This is
// expanded late to two scalar stores and a stride 0 vector load.
+ // The first operand is passthru operand.
SPLAT_VECTOR_SPLIT_I64_VL,
// Read VLENB CSR
READ_VLENB,
@@ -158,9 +175,9 @@ enum NodeType : unsigned {
// and the fifth the VL.
VSLIDEUP_VL,
VSLIDEDOWN_VL,
- // Matches the semantics of vslide1up/slide1down. The first operand is the
- // source vector, the second is the XLenVT scalar value. The third and fourth
- // operands are the mask and VL operands.
+ // Matches the semantics of vslide1up/slide1down. The first operand is
+ // passthru operand, the second is source vector, third is the XLenVT scalar
+ // value. The fourth and fifth operands are the mask and VL operands.
VSLIDE1UP_VL,
VSLIDE1DOWN_VL,
// Matches the semantics of the vid.v instruction, with a mask and VL
@@ -225,7 +242,10 @@ enum NodeType : unsigned {
FNEG_VL,
FABS_VL,
FSQRT_VL,
- FMA_VL,
+ VFMADD_VL,
+ VFNMADD_VL,
+ VFMSUB_VL,
+ VFNMSUB_VL,
FCOPYSIGN_VL,
SMIN_VL,
SMAX_VL,
@@ -246,7 +266,14 @@ enum NodeType : unsigned {
VWMUL_VL,
VWMULU_VL,
VWMULSU_VL,
+ VWADD_VL,
VWADDU_VL,
+ VWSUB_VL,
+ VWSUBU_VL,
+ VWADD_W_VL,
+ VWADDU_W_VL,
+ VWSUB_W_VL,
+ VWSUBU_W_VL,
// Vector compare producing a mask. Fourth operand is input mask. Fifth
// operand is VL.
@@ -268,8 +295,8 @@ enum NodeType : unsigned {
VMCLR_VL,
VMSET_VL,
- // Matches the semantics of vrgather.vx and vrgather.vv with an extra operand
- // for VL.
+ // Matches the semantics of vrgather.vx and vrgather.vv with extra operands
+ // for passthru and VL. Operands are (src, index, mask, passthru, vl).
VRGATHER_VX_VL,
VRGATHER_VV_VL,
VRGATHEREI16_VV_VL,
@@ -302,16 +329,21 @@ enum NodeType : unsigned {
STRICT_FCVT_W_RV64 = ISD::FIRST_TARGET_STRICTFP_OPCODE,
STRICT_FCVT_WU_RV64,
- // Memory opcodes start here.
- VLE_VL = ISD::FIRST_TARGET_MEMORY_OPCODE,
- VSE_VL,
-
// WARNING: Do not add anything in the end unless you want the node to
// have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
// opcodes will be thought as target memory ops!
+
+ // Load address.
+ LA = ISD::FIRST_TARGET_MEMORY_OPCODE,
+ LA_TLS_IE,
};
} // namespace RISCVISD
+namespace RISCV {
+// We use 64 bits as the known part in the scalable vector types.
+static constexpr unsigned RVVBitsPerBlock = 64;
+} // namespace RISCV
+
class RISCVTargetLowering : public TargetLowering {
const RISCVSubtarget &Subtarget;
@@ -333,11 +365,18 @@ public:
bool isTruncateFree(EVT SrcVT, EVT DstVT) const override;
bool isZExtFree(SDValue Val, EVT VT2) const override;
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override;
+ bool signExtendConstant(const ConstantInt *CI) const override;
bool isCheapToSpeculateCttz() const override;
bool isCheapToSpeculateCtlz() const override;
bool hasAndNotCompare(SDValue Y) const override;
+ bool hasBitTest(SDValue X, SDValue Y) const override;
+ bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
+ SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
+ unsigned OldShiftOpcode, unsigned NewShiftOpcode,
+ SelectionDAG &DAG) const override;
bool shouldSinkOperands(Instruction *I,
SmallVectorImpl<Use *> &Ops) const override;
+ bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
@@ -384,6 +423,8 @@ public:
const SelectionDAG &DAG,
unsigned Depth) const override;
+ const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
+
// This method returns the name of a target specific DAG node.
const char *getTargetNodeName(unsigned Opcode) const override;
@@ -477,8 +518,6 @@ public:
SelectionDAG &DAG) const override;
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
- template <class NodeTy>
- SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true) const;
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const override {
@@ -490,8 +529,8 @@ public:
bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
SDValue C) const override;
- bool isMulAddWithConstProfitable(const SDValue &AddNode,
- const SDValue &ConstNode) const override;
+ bool isMulAddWithConstProfitable(SDValue AddNode,
+ SDValue ConstNode) const override;
TargetLowering::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
@@ -526,6 +565,15 @@ public:
Optional<CallingConv::ID> CC) const override;
static RISCVII::VLMUL getLMUL(MVT VT);
+ inline static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize,
+ unsigned MinSize) {
+ // Original equation:
+ // VLMAX = (VectorBits / EltSize) * LMUL
+ // where LMUL = MinSize / RISCV::RVVBitsPerBlock
+ // The following equations have been reordered to prevent loss of precision
+ // when calculating fractional LMUL.
+ return ((VectorBits / EltSize) * MinSize) / RISCV::RVVBitsPerBlock;
+ };
static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul);
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index);
static unsigned getRegClassIDForVecVT(MVT VT);
@@ -535,7 +583,7 @@ public:
const RISCVRegisterInfo *TRI);
MVT getContainerForFixedLengthVector(MVT VT) const;
- bool shouldRemoveExtendFromGSIndex(EVT VT) const override;
+ bool shouldRemoveExtendFromGSIndex(EVT IndexVT, EVT DataVT) const override;
bool isLegalElementTypeForRVV(Type *ScalarTy) const;
@@ -571,6 +619,8 @@ private:
bool IsRet, CallLoweringInfo *CLI,
RISCVCCAssignFn Fn) const;
+ template <class NodeTy>
+ SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true) const;
SDValue getStaticTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG,
bool UseGOT) const;
SDValue getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const;
@@ -591,7 +641,9 @@ private:
SDValue lowerVectorMaskSplat(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
int64_t ExtTrueVal) const;
- SDValue lowerVectorMaskTrunc(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVectorMaskTruncLike(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVectorTruncLike(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVectorFPExtendOrRoundLike(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
@@ -606,6 +658,7 @@ private:
SDValue lowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSTEP_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVECTOR_REVERSE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerABS(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerMaskedLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerMaskedStore(SDValue Op, SelectionDAG &DAG) const;
@@ -627,11 +680,17 @@ private:
SDValue lowerVPOp(SDValue Op, SelectionDAG &DAG, unsigned RISCVISDOpc) const;
SDValue lowerLogicVPOp(SDValue Op, SelectionDAG &DAG, unsigned MaskOpc,
unsigned VecOpc) const;
+ SDValue lowerVPExtMaskOp(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVPSetCCMaskOp(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG,
+ unsigned RISCVISDOpc) const;
SDValue lowerFixedLengthVectorExtendToRVV(SDValue Op, SelectionDAG &DAG,
unsigned ExtendOpc) const;
SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const;
+
SDValue expandUnalignedRVVLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue expandUnalignedRVVStore(SDValue Op, SelectionDAG &DAG) const;
@@ -665,21 +724,15 @@ private:
return false;
};
};
-
-namespace RISCV {
-// We use 64 bits as the known part in the scalable vector types.
-static constexpr unsigned RVVBitsPerBlock = 64;
-} // namespace RISCV
-
namespace RISCVVIntrinsicsTable {
struct RISCVVIntrinsicInfo {
unsigned IntrinsicID;
- uint8_t SplatOperand;
+ uint8_t ScalarOperand;
uint8_t VLOperand;
- bool hasSplatOperand() const {
- // 0xF is not valid. See NoSplatOperand in IntrinsicsRISCV.td.
- return SplatOperand != 0xF;
+ bool hasScalarOperand() const {
+ // 0xF is not valid. See NoScalarOperand in IntrinsicsRISCV.td.
+ return ScalarOperand != 0xF;
}
bool hasVLOperand() const {
// 0x1F is not valid. See NoVLOperand in IntrinsicsRISCV.td.
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 649eb57b325b..fc0a983f6542 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -7,7 +7,8 @@
//===----------------------------------------------------------------------===//
//
// This file implements a function pass that inserts VSETVLI instructions where
-// needed.
+// needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL
+// instructions.
//
// This pass consists of 3 phases:
//
@@ -37,8 +38,371 @@ static cl::opt<bool> DisableInsertVSETVLPHIOpt(
"riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden,
cl::desc("Disable looking through phis when inserting vsetvlis."));
+static cl::opt<bool> UseStrictAsserts(
+ "riscv-insert-vsetvl-strict-asserts", cl::init(true), cl::Hidden,
+ cl::desc("Enable strict assertion checking for the dataflow algorithm"));
+
namespace {
+static unsigned getVLOpNum(const MachineInstr &MI) {
+ return RISCVII::getVLOpNum(MI.getDesc());
+}
+
+static unsigned getSEWOpNum(const MachineInstr &MI) {
+ return RISCVII::getSEWOpNum(MI.getDesc());
+}
+
+static bool isScalarMoveInstr(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ case RISCV::PseudoVMV_S_X_M1:
+ case RISCV::PseudoVMV_S_X_M2:
+ case RISCV::PseudoVMV_S_X_M4:
+ case RISCV::PseudoVMV_S_X_M8:
+ case RISCV::PseudoVMV_S_X_MF2:
+ case RISCV::PseudoVMV_S_X_MF4:
+ case RISCV::PseudoVMV_S_X_MF8:
+ case RISCV::PseudoVFMV_S_F16_M1:
+ case RISCV::PseudoVFMV_S_F16_M2:
+ case RISCV::PseudoVFMV_S_F16_M4:
+ case RISCV::PseudoVFMV_S_F16_M8:
+ case RISCV::PseudoVFMV_S_F16_MF2:
+ case RISCV::PseudoVFMV_S_F16_MF4:
+ case RISCV::PseudoVFMV_S_F32_M1:
+ case RISCV::PseudoVFMV_S_F32_M2:
+ case RISCV::PseudoVFMV_S_F32_M4:
+ case RISCV::PseudoVFMV_S_F32_M8:
+ case RISCV::PseudoVFMV_S_F32_MF2:
+ case RISCV::PseudoVFMV_S_F64_M1:
+ case RISCV::PseudoVFMV_S_F64_M2:
+ case RISCV::PseudoVFMV_S_F64_M4:
+ case RISCV::PseudoVFMV_S_F64_M8:
+ return true;
+ }
+}
+
+/// Get the EEW for a load or store instruction. Return None if MI is not
+/// a load or store which ignores SEW.
+static Optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ default:
+ return None;
+ case RISCV::PseudoVLE8_V_M1:
+ case RISCV::PseudoVLE8_V_M1_MASK:
+ case RISCV::PseudoVLE8_V_M2:
+ case RISCV::PseudoVLE8_V_M2_MASK:
+ case RISCV::PseudoVLE8_V_M4:
+ case RISCV::PseudoVLE8_V_M4_MASK:
+ case RISCV::PseudoVLE8_V_M8:
+ case RISCV::PseudoVLE8_V_M8_MASK:
+ case RISCV::PseudoVLE8_V_MF2:
+ case RISCV::PseudoVLE8_V_MF2_MASK:
+ case RISCV::PseudoVLE8_V_MF4:
+ case RISCV::PseudoVLE8_V_MF4_MASK:
+ case RISCV::PseudoVLE8_V_MF8:
+ case RISCV::PseudoVLE8_V_MF8_MASK:
+ case RISCV::PseudoVLSE8_V_M1:
+ case RISCV::PseudoVLSE8_V_M1_MASK:
+ case RISCV::PseudoVLSE8_V_M2:
+ case RISCV::PseudoVLSE8_V_M2_MASK:
+ case RISCV::PseudoVLSE8_V_M4:
+ case RISCV::PseudoVLSE8_V_M4_MASK:
+ case RISCV::PseudoVLSE8_V_M8:
+ case RISCV::PseudoVLSE8_V_M8_MASK:
+ case RISCV::PseudoVLSE8_V_MF2:
+ case RISCV::PseudoVLSE8_V_MF2_MASK:
+ case RISCV::PseudoVLSE8_V_MF4:
+ case RISCV::PseudoVLSE8_V_MF4_MASK:
+ case RISCV::PseudoVLSE8_V_MF8:
+ case RISCV::PseudoVLSE8_V_MF8_MASK:
+ case RISCV::PseudoVSE8_V_M1:
+ case RISCV::PseudoVSE8_V_M1_MASK:
+ case RISCV::PseudoVSE8_V_M2:
+ case RISCV::PseudoVSE8_V_M2_MASK:
+ case RISCV::PseudoVSE8_V_M4:
+ case RISCV::PseudoVSE8_V_M4_MASK:
+ case RISCV::PseudoVSE8_V_M8:
+ case RISCV::PseudoVSE8_V_M8_MASK:
+ case RISCV::PseudoVSE8_V_MF2:
+ case RISCV::PseudoVSE8_V_MF2_MASK:
+ case RISCV::PseudoVSE8_V_MF4:
+ case RISCV::PseudoVSE8_V_MF4_MASK:
+ case RISCV::PseudoVSE8_V_MF8:
+ case RISCV::PseudoVSE8_V_MF8_MASK:
+ case RISCV::PseudoVSSE8_V_M1:
+ case RISCV::PseudoVSSE8_V_M1_MASK:
+ case RISCV::PseudoVSSE8_V_M2:
+ case RISCV::PseudoVSSE8_V_M2_MASK:
+ case RISCV::PseudoVSSE8_V_M4:
+ case RISCV::PseudoVSSE8_V_M4_MASK:
+ case RISCV::PseudoVSSE8_V_M8:
+ case RISCV::PseudoVSSE8_V_M8_MASK:
+ case RISCV::PseudoVSSE8_V_MF2:
+ case RISCV::PseudoVSSE8_V_MF2_MASK:
+ case RISCV::PseudoVSSE8_V_MF4:
+ case RISCV::PseudoVSSE8_V_MF4_MASK:
+ case RISCV::PseudoVSSE8_V_MF8:
+ case RISCV::PseudoVSSE8_V_MF8_MASK:
+ return 8;
+ case RISCV::PseudoVLE16_V_M1:
+ case RISCV::PseudoVLE16_V_M1_MASK:
+ case RISCV::PseudoVLE16_V_M2:
+ case RISCV::PseudoVLE16_V_M2_MASK:
+ case RISCV::PseudoVLE16_V_M4:
+ case RISCV::PseudoVLE16_V_M4_MASK:
+ case RISCV::PseudoVLE16_V_M8:
+ case RISCV::PseudoVLE16_V_M8_MASK:
+ case RISCV::PseudoVLE16_V_MF2:
+ case RISCV::PseudoVLE16_V_MF2_MASK:
+ case RISCV::PseudoVLE16_V_MF4:
+ case RISCV::PseudoVLE16_V_MF4_MASK:
+ case RISCV::PseudoVLSE16_V_M1:
+ case RISCV::PseudoVLSE16_V_M1_MASK:
+ case RISCV::PseudoVLSE16_V_M2:
+ case RISCV::PseudoVLSE16_V_M2_MASK:
+ case RISCV::PseudoVLSE16_V_M4:
+ case RISCV::PseudoVLSE16_V_M4_MASK:
+ case RISCV::PseudoVLSE16_V_M8:
+ case RISCV::PseudoVLSE16_V_M8_MASK:
+ case RISCV::PseudoVLSE16_V_MF2:
+ case RISCV::PseudoVLSE16_V_MF2_MASK:
+ case RISCV::PseudoVLSE16_V_MF4:
+ case RISCV::PseudoVLSE16_V_MF4_MASK:
+ case RISCV::PseudoVSE16_V_M1:
+ case RISCV::PseudoVSE16_V_M1_MASK:
+ case RISCV::PseudoVSE16_V_M2:
+ case RISCV::PseudoVSE16_V_M2_MASK:
+ case RISCV::PseudoVSE16_V_M4:
+ case RISCV::PseudoVSE16_V_M4_MASK:
+ case RISCV::PseudoVSE16_V_M8:
+ case RISCV::PseudoVSE16_V_M8_MASK:
+ case RISCV::PseudoVSE16_V_MF2:
+ case RISCV::PseudoVSE16_V_MF2_MASK:
+ case RISCV::PseudoVSE16_V_MF4:
+ case RISCV::PseudoVSE16_V_MF4_MASK:
+ case RISCV::PseudoVSSE16_V_M1:
+ case RISCV::PseudoVSSE16_V_M1_MASK:
+ case RISCV::PseudoVSSE16_V_M2:
+ case RISCV::PseudoVSSE16_V_M2_MASK:
+ case RISCV::PseudoVSSE16_V_M4:
+ case RISCV::PseudoVSSE16_V_M4_MASK:
+ case RISCV::PseudoVSSE16_V_M8:
+ case RISCV::PseudoVSSE16_V_M8_MASK:
+ case RISCV::PseudoVSSE16_V_MF2:
+ case RISCV::PseudoVSSE16_V_MF2_MASK:
+ case RISCV::PseudoVSSE16_V_MF4:
+ case RISCV::PseudoVSSE16_V_MF4_MASK:
+ return 16;
+ case RISCV::PseudoVLE32_V_M1:
+ case RISCV::PseudoVLE32_V_M1_MASK:
+ case RISCV::PseudoVLE32_V_M2:
+ case RISCV::PseudoVLE32_V_M2_MASK:
+ case RISCV::PseudoVLE32_V_M4:
+ case RISCV::PseudoVLE32_V_M4_MASK:
+ case RISCV::PseudoVLE32_V_M8:
+ case RISCV::PseudoVLE32_V_M8_MASK:
+ case RISCV::PseudoVLE32_V_MF2:
+ case RISCV::PseudoVLE32_V_MF2_MASK:
+ case RISCV::PseudoVLSE32_V_M1:
+ case RISCV::PseudoVLSE32_V_M1_MASK:
+ case RISCV::PseudoVLSE32_V_M2:
+ case RISCV::PseudoVLSE32_V_M2_MASK:
+ case RISCV::PseudoVLSE32_V_M4:
+ case RISCV::PseudoVLSE32_V_M4_MASK:
+ case RISCV::PseudoVLSE32_V_M8:
+ case RISCV::PseudoVLSE32_V_M8_MASK:
+ case RISCV::PseudoVLSE32_V_MF2:
+ case RISCV::PseudoVLSE32_V_MF2_MASK:
+ case RISCV::PseudoVSE32_V_M1:
+ case RISCV::PseudoVSE32_V_M1_MASK:
+ case RISCV::PseudoVSE32_V_M2:
+ case RISCV::PseudoVSE32_V_M2_MASK:
+ case RISCV::PseudoVSE32_V_M4:
+ case RISCV::PseudoVSE32_V_M4_MASK:
+ case RISCV::PseudoVSE32_V_M8:
+ case RISCV::PseudoVSE32_V_M8_MASK:
+ case RISCV::PseudoVSE32_V_MF2:
+ case RISCV::PseudoVSE32_V_MF2_MASK:
+ case RISCV::PseudoVSSE32_V_M1:
+ case RISCV::PseudoVSSE32_V_M1_MASK:
+ case RISCV::PseudoVSSE32_V_M2:
+ case RISCV::PseudoVSSE32_V_M2_MASK:
+ case RISCV::PseudoVSSE32_V_M4:
+ case RISCV::PseudoVSSE32_V_M4_MASK:
+ case RISCV::PseudoVSSE32_V_M8:
+ case RISCV::PseudoVSSE32_V_M8_MASK:
+ case RISCV::PseudoVSSE32_V_MF2:
+ case RISCV::PseudoVSSE32_V_MF2_MASK:
+ return 32;
+ case RISCV::PseudoVLE64_V_M1:
+ case RISCV::PseudoVLE64_V_M1_MASK:
+ case RISCV::PseudoVLE64_V_M2:
+ case RISCV::PseudoVLE64_V_M2_MASK:
+ case RISCV::PseudoVLE64_V_M4:
+ case RISCV::PseudoVLE64_V_M4_MASK:
+ case RISCV::PseudoVLE64_V_M8:
+ case RISCV::PseudoVLE64_V_M8_MASK:
+ case RISCV::PseudoVLSE64_V_M1:
+ case RISCV::PseudoVLSE64_V_M1_MASK:
+ case RISCV::PseudoVLSE64_V_M2:
+ case RISCV::PseudoVLSE64_V_M2_MASK:
+ case RISCV::PseudoVLSE64_V_M4:
+ case RISCV::PseudoVLSE64_V_M4_MASK:
+ case RISCV::PseudoVLSE64_V_M8:
+ case RISCV::PseudoVLSE64_V_M8_MASK:
+ case RISCV::PseudoVSE64_V_M1:
+ case RISCV::PseudoVSE64_V_M1_MASK:
+ case RISCV::PseudoVSE64_V_M2:
+ case RISCV::PseudoVSE64_V_M2_MASK:
+ case RISCV::PseudoVSE64_V_M4:
+ case RISCV::PseudoVSE64_V_M4_MASK:
+ case RISCV::PseudoVSE64_V_M8:
+ case RISCV::PseudoVSE64_V_M8_MASK:
+ case RISCV::PseudoVSSE64_V_M1:
+ case RISCV::PseudoVSSE64_V_M1_MASK:
+ case RISCV::PseudoVSSE64_V_M2:
+ case RISCV::PseudoVSSE64_V_M2_MASK:
+ case RISCV::PseudoVSSE64_V_M4:
+ case RISCV::PseudoVSSE64_V_M4_MASK:
+ case RISCV::PseudoVSSE64_V_M8:
+ case RISCV::PseudoVSSE64_V_M8_MASK:
+ return 64;
+ }
+}
+
+/// Return true if this is an operation on mask registers. Note that
+/// this includes both arithmetic/logical ops and load/store (vlm/vsm).
+static bool isMaskRegOp(const MachineInstr &MI) {
+ if (RISCVII::hasSEWOp(MI.getDesc().TSFlags)) {
+ const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
+ // A Log2SEW of 0 is an operation on mask registers only.
+ return Log2SEW == 0;
+ }
+ return false;
+}
+
+static unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul) {
+ unsigned LMul;
+ bool Fractional;
+ std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(VLMul);
+
+ // Convert LMul to a fixed point value with 3 fractional bits.
+ LMul = Fractional ? (8 / LMul) : (LMul * 8);
+
+ assert(SEW >= 8 && "Unexpected SEW value");
+ return (SEW * 8) / LMul;
+}
+
+/// Which subfields of VL or VTYPE have values we need to preserve?
+struct DemandedFields {
+ bool VL = false;
+ bool SEW = false;
+ bool LMUL = false;
+ bool SEWLMULRatio = false;
+ bool TailPolicy = false;
+ bool MaskPolicy = false;
+
+ // Return true if any part of VTYPE was used
+ bool usedVTYPE() {
+ return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy;
+ }
+
+ // Mark all VTYPE subfields and properties as demanded
+ void demandVTYPE() {
+ SEW = true;
+ LMUL = true;
+ SEWLMULRatio = true;
+ TailPolicy = true;
+ MaskPolicy = true;
+ }
+};
+
+/// Return true if the two values of the VTYPE register provided are
+/// indistinguishable from the perspective of an instruction (or set of
+/// instructions) which use only the Used subfields and properties.
+static bool areCompatibleVTYPEs(uint64_t VType1,
+ uint64_t VType2,
+ const DemandedFields &Used) {
+ if (Used.SEW &&
+ RISCVVType::getSEW(VType1) != RISCVVType::getSEW(VType2))
+ return false;
+
+ if (Used.LMUL &&
+ RISCVVType::getVLMUL(VType1) != RISCVVType::getVLMUL(VType2))
+ return false;
+
+ if (Used.SEWLMULRatio) {
+ auto Ratio1 = getSEWLMULRatio(RISCVVType::getSEW(VType1),
+ RISCVVType::getVLMUL(VType1));
+ auto Ratio2 = getSEWLMULRatio(RISCVVType::getSEW(VType2),
+ RISCVVType::getVLMUL(VType2));
+ if (Ratio1 != Ratio2)
+ return false;
+ }
+
+ if (Used.TailPolicy &&
+ RISCVVType::isTailAgnostic(VType1) != RISCVVType::isTailAgnostic(VType2))
+ return false;
+ if (Used.MaskPolicy &&
+ RISCVVType::isMaskAgnostic(VType1) != RISCVVType::isMaskAgnostic(VType2))
+ return false;
+ return true;
+}
+
+/// Return the fields and properties demanded by the provided instruction.
+static DemandedFields getDemanded(const MachineInstr &MI) {
+ // Warning: This function has to work on both the lowered (i.e. post
+ // emitVSETVLIs) and pre-lowering forms. The main implication of this is
+ // that it can't use the value of a SEW, VL, or Policy operand as they might
+ // be stale after lowering.
+
+ // Most instructions don't use any of these subfeilds.
+ DemandedFields Res;
+ // Start conservative if registers are used
+ if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VL))
+ Res.VL = true;
+ if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VTYPE))
+ Res.demandVTYPE();
+ // Start conservative on the unlowered form too
+ uint64_t TSFlags = MI.getDesc().TSFlags;
+ if (RISCVII::hasSEWOp(TSFlags)) {
+ Res.demandVTYPE();
+ if (RISCVII::hasVLOp(TSFlags))
+ Res.VL = true;
+ }
+
+ // Loads and stores with implicit EEW do not demand SEW or LMUL directly.
+ // They instead demand the ratio of the two which is used in computing
+ // EMUL, but which allows us the flexibility to change SEW and LMUL
+ // provided we don't change the ratio.
+ // Note: We assume that the instructions initial SEW is the EEW encoded
+ // in the opcode. This is asserted when constructing the VSETVLIInfo.
+ if (getEEWForLoadStore(MI)) {
+ Res.SEW = false;
+ Res.LMUL = false;
+ }
+
+ // Store instructions don't use the policy fields.
+ if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) {
+ Res.TailPolicy = false;
+ Res.MaskPolicy = false;
+ }
+
+ // If this is a mask reg operation, it only cares about VLMAX.
+ // TODO: Possible extensions to this logic
+ // * Probably ok if available VLMax is larger than demanded
+ // * The policy bits can probably be ignored..
+ if (isMaskRegOp(MI)) {
+ Res.SEW = false;
+ Res.LMUL = false;
+ }
+
+ return Res;
+}
+
+/// Defines the abstract state with which the forward dataflow models the
+/// values of the VL and VTYPE registers after insertion.
class VSETVLIInfo {
union {
Register AVLReg;
@@ -57,15 +421,12 @@ class VSETVLIInfo {
uint8_t SEW = 0;
uint8_t TailAgnostic : 1;
uint8_t MaskAgnostic : 1;
- uint8_t MaskRegOp : 1;
- uint8_t StoreOp : 1;
- uint8_t ScalarMovOp : 1;
uint8_t SEWLMULRatioOnly : 1;
public:
VSETVLIInfo()
- : AVLImm(0), TailAgnostic(false), MaskAgnostic(false), MaskRegOp(false),
- StoreOp(false), ScalarMovOp(false), SEWLMULRatioOnly(false) {}
+ : AVLImm(0), TailAgnostic(false), MaskAgnostic(false),
+ SEWLMULRatioOnly(false) {}
static VSETVLIInfo getUnknown() {
VSETVLIInfo Info;
@@ -97,11 +458,10 @@ public:
assert(hasAVLImm());
return AVLImm;
}
- bool hasZeroAVL() const {
- if (hasAVLImm())
- return getAVLImm() == 0;
- return false;
- }
+
+ unsigned getSEW() const { return SEW; }
+ RISCVII::VLMUL getVLMUL() const { return VLMul; }
+
bool hasNonZeroAVL() const {
if (hasAVLImm())
return getAVLImm() > 0;
@@ -132,17 +492,13 @@ public:
TailAgnostic = RISCVVType::isTailAgnostic(VType);
MaskAgnostic = RISCVVType::isMaskAgnostic(VType);
}
- void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA, bool MRO,
- bool IsStore, bool IsScalarMovOp) {
+ void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) {
assert(isValid() && !isUnknown() &&
"Can't set VTYPE for uninitialized or unknown");
VLMul = L;
SEW = S;
TailAgnostic = TA;
MaskAgnostic = MA;
- MaskRegOp = MRO;
- StoreOp = IsStore;
- ScalarMovOp = IsScalarMovOp;
}
unsigned encodeVTYPE() const {
@@ -175,25 +531,16 @@ public:
Other.MaskAgnostic);
}
- static unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul) {
- unsigned LMul;
- bool Fractional;
- std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(VLMul);
-
- // Convert LMul to a fixed point value with 3 fractional bits.
- LMul = Fractional ? (8 / LMul) : (LMul * 8);
-
- assert(SEW >= 8 && "Unexpected SEW value");
- return (SEW * 8) / LMul;
- }
-
unsigned getSEWLMULRatio() const {
assert(isValid() && !isUnknown() &&
"Can't use VTYPE for uninitialized or unknown");
- return getSEWLMULRatio(SEW, VLMul);
+ return ::getSEWLMULRatio(SEW, VLMul);
}
// Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX.
+ // Note that having the same VLMAX ensures that both share the same
+ // function from AVL to VL; that is, they must produce the same VL value
+ // for any given AVL value.
bool hasSameVLMAX(const VSETVLIInfo &Other) const {
assert(isValid() && Other.isValid() &&
"Can't compare invalid VSETVLIInfos");
@@ -211,36 +558,22 @@ public:
MaskAgnostic == Other.MaskAgnostic;
}
- bool hasCompatibleVTYPE(const VSETVLIInfo &InstrInfo, bool Strict) const {
- // Simple case, see if full VTYPE matches.
- if (hasSameVTYPE(InstrInfo))
- return true;
-
- if (Strict)
- return false;
-
- // If this is a mask reg operation, it only cares about VLMAX.
- // FIXME: Mask reg operations are probably ok if "this" VLMAX is larger
- // than "InstrInfo".
- // FIXME: The policy bits can probably be ignored for mask reg operations.
- if (InstrInfo.MaskRegOp && hasSameVLMAX(InstrInfo) &&
- TailAgnostic == InstrInfo.TailAgnostic &&
- MaskAgnostic == InstrInfo.MaskAgnostic)
- return true;
-
- return false;
+ bool hasCompatibleVTYPE(const MachineInstr &MI,
+ const VSETVLIInfo &Require) const {
+ const DemandedFields Used = getDemanded(MI);
+ return areCompatibleVTYPEs(encodeVTYPE(), Require.encodeVTYPE(), Used);
}
// Determine whether the vector instructions requirements represented by
- // InstrInfo are compatible with the previous vsetvli instruction represented
- // by this.
- bool isCompatible(const VSETVLIInfo &InstrInfo, bool Strict) const {
- assert(isValid() && InstrInfo.isValid() &&
+ // Require are compatible with the previous vsetvli instruction represented
+ // by this. MI is the instruction whose requirements we're considering.
+ bool isCompatible(const MachineInstr &MI, const VSETVLIInfo &Require) const {
+ assert(isValid() && Require.isValid() &&
"Can't compare invalid VSETVLIInfos");
- assert(!InstrInfo.SEWLMULRatioOnly &&
+ assert(!Require.SEWLMULRatioOnly &&
"Expected a valid VTYPE for instruction!");
// Nothing is compatible with Unknown.
- if (isUnknown() || InstrInfo.isUnknown())
+ if (isUnknown() || Require.isUnknown())
return false;
// If only our VLMAX ratio is valid, then this isn't compatible.
@@ -249,61 +582,11 @@ public:
// If the instruction doesn't need an AVLReg and the SEW matches, consider
// it compatible.
- if (!Strict && InstrInfo.hasAVLReg() &&
- InstrInfo.AVLReg == RISCV::NoRegister) {
- if (SEW == InstrInfo.SEW)
+ if (Require.hasAVLReg() && Require.AVLReg == RISCV::NoRegister)
+ if (SEW == Require.SEW)
return true;
- }
-
- // For vmv.s.x and vfmv.s.f, there is only two behaviors, VL = 0 and VL > 0.
- // So it's compatible when we could make sure that both VL be the same
- // situation.
- if (!Strict && InstrInfo.ScalarMovOp && InstrInfo.hasAVLImm() &&
- ((hasNonZeroAVL() && InstrInfo.hasNonZeroAVL()) ||
- (hasZeroAVL() && InstrInfo.hasZeroAVL())) &&
- hasSameSEW(InstrInfo) && hasSamePolicy(InstrInfo))
- return true;
-
- // The AVL must match.
- if (!hasSameAVL(InstrInfo))
- return false;
-
- if (hasCompatibleVTYPE(InstrInfo, Strict))
- return true;
-
- // Strict matches must ensure a full VTYPE match.
- if (Strict)
- return false;
-
- // Store instructions don't use the policy fields.
- // TODO: Move into hasCompatibleVTYPE?
- if (InstrInfo.StoreOp && VLMul == InstrInfo.VLMul && SEW == InstrInfo.SEW)
- return true;
- // Anything else is not compatible.
- return false;
- }
-
- bool isCompatibleWithLoadStoreEEW(unsigned EEW,
- const VSETVLIInfo &InstrInfo) const {
- assert(isValid() && InstrInfo.isValid() &&
- "Can't compare invalid VSETVLIInfos");
- assert(!InstrInfo.SEWLMULRatioOnly &&
- "Expected a valid VTYPE for instruction!");
- assert(EEW == InstrInfo.SEW && "Mismatched EEW/SEW for store");
-
- if (isUnknown() || hasSEWLMULRatioOnly())
- return false;
-
- if (!hasSameAVL(InstrInfo))
- return false;
-
- // Stores can ignore the tail and mask policies.
- if (!InstrInfo.StoreOp && (TailAgnostic != InstrInfo.TailAgnostic ||
- MaskAgnostic != InstrInfo.MaskAgnostic))
- return false;
-
- return getSEWLMULRatio() == getSEWLMULRatio(EEW, InstrInfo.VLMul);
+ return hasSameAVL(Require) && hasCompatibleVTYPE(MI, Require);
}
bool operator==(const VSETVLIInfo &Other) const {
@@ -322,16 +605,20 @@ public:
if (!hasSameAVL(Other))
return false;
+ // If the SEWLMULRatioOnly bits are different, then they aren't equal.
+ if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly)
+ return false;
+
// If only the VLMAX is valid, check that it is the same.
- if (SEWLMULRatioOnly && Other.SEWLMULRatioOnly)
+ if (SEWLMULRatioOnly)
return hasSameVLMAX(Other);
// If the full VTYPE is valid, check that it is the same.
- if (!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly)
- return hasSameVTYPE(Other);
+ return hasSameVTYPE(Other);
+ }
- // If the SEWLMULRatioOnly bits are different, then they aren't equal.
- return false;
+ bool operator!=(const VSETVLIInfo &Other) const {
+ return !(*this == Other);
}
// Calculate the VSETVLIInfo visible to a block assuming this and Other are
@@ -365,25 +652,43 @@ public:
return VSETVLIInfo::getUnknown();
}
- // Calculate the VSETVLIInfo visible at the end of the block assuming this
- // is the predecessor value, and Other is change for this block.
- VSETVLIInfo merge(const VSETVLIInfo &Other) const {
- assert(isValid() && "Can only merge with a valid VSETVLInfo");
-
- // Nothing changed from the predecessor, keep it.
- if (!Other.isValid())
- return *this;
-
- // If the change is compatible with the input, we won't create a VSETVLI
- // and should keep the predecessor.
- if (isCompatible(Other, /*Strict*/ true))
- return *this;
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Support for debugging, callable in GDB: V->dump()
+ LLVM_DUMP_METHOD void dump() const {
+ print(dbgs());
+ dbgs() << "\n";
+ }
- // Otherwise just use whatever is in this block.
- return Other;
+ /// Implement operator<<.
+ /// @{
+ void print(raw_ostream &OS) const {
+ OS << "{";
+ if (!isValid())
+ OS << "Uninitialized";
+ if (isUnknown())
+ OS << "unknown";
+ if (hasAVLReg())
+ OS << "AVLReg=" << (unsigned)AVLReg;
+ if (hasAVLImm())
+ OS << "AVLImm=" << (unsigned)AVLImm;
+ OS << ", "
+ << "VLMul=" << (unsigned)VLMul << ", "
+ << "SEW=" << (unsigned)SEW << ", "
+ << "TailAgnostic=" << (bool)TailAgnostic << ", "
+ << "MaskAgnostic=" << (bool)MaskAgnostic << ", "
+ << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}";
}
+#endif
};
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_ATTRIBUTE_USED
+inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) {
+ V.print(OS);
+ return OS;
+}
+#endif
+
struct BlockData {
// The VSETVLIInfo that represents the net changes to the VL/VTYPE registers
// made by this block. Calculated in Phase 1.
@@ -400,7 +705,7 @@ struct BlockData {
// Keeps track of whether the block is already in the queue.
bool InQueue = false;
- BlockData() {}
+ BlockData() = default;
};
class RISCVInsertVSETVLI : public MachineFunctionPass {
@@ -426,14 +731,24 @@ public:
StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; }
private:
- bool needVSETVLI(const VSETVLIInfo &Require, const VSETVLIInfo &CurInfo);
- bool needVSETVLIPHI(const VSETVLIInfo &Require, const MachineBasicBlock &MBB);
+ bool needVSETVLI(const MachineInstr &MI, const VSETVLIInfo &Require,
+ const VSETVLIInfo &CurInfo) const;
+ bool needVSETVLIPHI(const VSETVLIInfo &Require,
+ const MachineBasicBlock &MBB) const;
void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
+ void insertVSETVLI(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertPt, DebugLoc DL,
+ const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
+ void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI);
+ void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI);
bool computeVLVTYPEChanges(const MachineBasicBlock &MBB);
void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
void emitVSETVLIs(MachineBasicBlock &MBB);
+ void doLocalPostpass(MachineBasicBlock &MBB);
+ void doPRE(MachineBasicBlock &MBB);
+ void insertReadVL(MachineBasicBlock &MBB);
};
} // end anonymous namespace
@@ -443,100 +758,76 @@ char RISCVInsertVSETVLI::ID = 0;
INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME,
false, false)
-static MachineInstr *elideCopies(MachineInstr *MI,
- const MachineRegisterInfo *MRI) {
- while (true) {
- if (!MI->isFullCopy())
- return MI;
- if (!Register::isVirtualRegister(MI->getOperand(1).getReg()))
- return nullptr;
- MI = MRI->getVRegDef(MI->getOperand(1).getReg());
- if (!MI)
- return nullptr;
- }
+static bool isVectorConfigInstr(const MachineInstr &MI) {
+ return MI.getOpcode() == RISCV::PseudoVSETVLI ||
+ MI.getOpcode() == RISCV::PseudoVSETVLIX0 ||
+ MI.getOpcode() == RISCV::PseudoVSETIVLI;
}
-static bool isScalarMoveInstr(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- default:
+/// Return true if this is 'vsetvli x0, x0, vtype' which preserves
+/// VL and only sets VTYPE.
+static bool isVLPreservingConfig(const MachineInstr &MI) {
+ if (MI.getOpcode() != RISCV::PseudoVSETVLIX0)
return false;
- case RISCV::PseudoVMV_S_X_M1:
- case RISCV::PseudoVMV_S_X_M2:
- case RISCV::PseudoVMV_S_X_M4:
- case RISCV::PseudoVMV_S_X_M8:
- case RISCV::PseudoVMV_S_X_MF2:
- case RISCV::PseudoVMV_S_X_MF4:
- case RISCV::PseudoVMV_S_X_MF8:
- case RISCV::PseudoVFMV_S_F16_M1:
- case RISCV::PseudoVFMV_S_F16_M2:
- case RISCV::PseudoVFMV_S_F16_M4:
- case RISCV::PseudoVFMV_S_F16_M8:
- case RISCV::PseudoVFMV_S_F16_MF2:
- case RISCV::PseudoVFMV_S_F16_MF4:
- case RISCV::PseudoVFMV_S_F32_M1:
- case RISCV::PseudoVFMV_S_F32_M2:
- case RISCV::PseudoVFMV_S_F32_M4:
- case RISCV::PseudoVFMV_S_F32_M8:
- case RISCV::PseudoVFMV_S_F32_MF2:
- case RISCV::PseudoVFMV_S_F64_M1:
- case RISCV::PseudoVFMV_S_F64_M2:
- case RISCV::PseudoVFMV_S_F64_M4:
- case RISCV::PseudoVFMV_S_F64_M8:
- return true;
- }
+ assert(RISCV::X0 == MI.getOperand(1).getReg());
+ return RISCV::X0 == MI.getOperand(0).getReg();
}
static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
const MachineRegisterInfo *MRI) {
VSETVLIInfo InstrInfo;
- unsigned NumOperands = MI.getNumExplicitOperands();
- bool HasPolicy = RISCVII::hasVecPolicyOp(TSFlags);
-
- // Default to tail agnostic unless the destination is tied to a source.
- // Unless the source is undef. In that case the user would have some control
- // over the tail values. Some pseudo instructions force a tail agnostic policy
- // despite having a tied def.
- bool ForceTailAgnostic = RISCVII::doesForceTailAgnostic(TSFlags);
- bool TailAgnostic = true;
- // If the instruction has policy argument, use the argument.
- if (HasPolicy) {
- const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1);
- TailAgnostic = Op.getImm() & 0x1;
- }
+ // If the instruction has policy argument, use the argument.
+ // If there is no policy argument, default to tail agnostic unless the
+ // destination is tied to a source. Unless the source is undef. In that case
+ // the user would have some control over the policy values.
+ bool TailAgnostic = true;
+ bool UsesMaskPolicy = RISCVII::usesMaskPolicy(TSFlags);
+ // FIXME: Could we look at the above or below instructions to choose the
+ // matched mask policy to reduce vsetvli instructions? Default mask policy is
+ // agnostic if instructions use mask policy, otherwise is undisturbed. Because
+ // most mask operations are mask undisturbed, so we could possibly reduce the
+ // vsetvli between mask and nomasked instruction sequence.
+ bool MaskAgnostic = UsesMaskPolicy;
unsigned UseOpIdx;
- if (!(ForceTailAgnostic || (HasPolicy && TailAgnostic)) &&
- MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
+ if (RISCVII::hasVecPolicyOp(TSFlags)) {
+ const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1);
+ uint64_t Policy = Op.getImm();
+ assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) &&
+ "Invalid Policy Value");
+ // Although in some cases, mismatched passthru/maskedoff with policy value
+ // does not make sense (ex. tied operand is IMPLICIT_DEF with non-TAMA
+ // policy, or tied operand is not IMPLICIT_DEF with TAMA policy), but users
+ // have set the policy value explicitly, so compiler would not fix it.
+ TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC;
+ MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC;
+ } else if (MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
TailAgnostic = false;
+ if (UsesMaskPolicy)
+ MaskAgnostic = false;
// If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic.
const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
MachineInstr *UseMI = MRI->getVRegDef(UseMO.getReg());
- if (UseMI) {
- UseMI = elideCopies(UseMI, MRI);
- if (UseMI && UseMI->isImplicitDef())
- TailAgnostic = true;
+ if (UseMI && UseMI->isImplicitDef()) {
+ TailAgnostic = true;
+ if (UsesMaskPolicy)
+ MaskAgnostic = true;
}
+ // Some pseudo instructions force a tail agnostic policy despite having a
+ // tied def.
+ if (RISCVII::doesForceTailAgnostic(TSFlags))
+ TailAgnostic = true;
}
- // Remove the tail policy so we can find the SEW and VL.
- if (HasPolicy)
- --NumOperands;
-
RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags);
- unsigned Log2SEW = MI.getOperand(NumOperands - 1).getImm();
+ unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
// A Log2SEW of 0 is an operation on mask registers only.
- bool MaskRegOp = Log2SEW == 0;
unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
- // If there are no explicit defs, this is a store instruction which can
- // ignore the tail and mask policies.
- bool StoreOp = MI.getNumExplicitDefs() == 0;
- bool ScalarMovOp = isScalarMoveInstr(MI);
-
if (RISCVII::hasVLOp(TSFlags)) {
- const MachineOperand &VLOp = MI.getOperand(NumOperands - 2);
+ const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
if (VLOp.isImm()) {
int64_t Imm = VLOp.getImm();
// Conver the VLMax sentintel to X0 register.
@@ -547,10 +838,15 @@ static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
} else {
InstrInfo.setAVLReg(VLOp.getReg());
}
- } else
+ } else {
InstrInfo.setAVLReg(RISCV::NoRegister);
- InstrInfo.setVTYPE(VLMul, SEW, /*TailAgnostic*/ TailAgnostic,
- /*MaskAgnostic*/ false, MaskRegOp, StoreOp, ScalarMovOp);
+ }
+#ifndef NDEBUG
+ if (Optional<unsigned> EEW = getEEWForLoadStore(MI)) {
+ assert(SEW == EEW && "Initial SEW doesn't match expected EEW");
+ }
+#endif
+ InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
return InstrInfo;
}
@@ -559,12 +855,18 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
const VSETVLIInfo &Info,
const VSETVLIInfo &PrevInfo) {
DebugLoc DL = MI.getDebugLoc();
+ insertVSETVLI(MBB, MachineBasicBlock::iterator(&MI), DL, Info, PrevInfo);
+}
+
+void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertPt, DebugLoc DL,
+ const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) {
// Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
// VLMAX.
if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) {
- BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoVSETVLIX0))
+ BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
.addReg(RISCV::X0, RegState::Define | RegState::Dead)
.addReg(RISCV::X0, RegState::Kill)
.addImm(Info.encodeVTYPE())
@@ -573,7 +875,7 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
}
if (Info.hasAVLImm()) {
- BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoVSETIVLI))
+ BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
.addReg(RISCV::X0, RegState::Define | RegState::Dead)
.addImm(Info.getAVLImm())
.addImm(Info.encodeVTYPE());
@@ -586,7 +888,7 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
// the previous vl to become invalid.
if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
Info.hasSameVLMAX(PrevInfo)) {
- BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoVSETVLIX0))
+ BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
.addReg(RISCV::X0, RegState::Define | RegState::Dead)
.addReg(RISCV::X0, RegState::Kill)
.addImm(Info.encodeVTYPE())
@@ -594,7 +896,7 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
return;
}
// Otherwise use an AVL of 0 to avoid depending on previous vl.
- BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoVSETIVLI))
+ BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
.addReg(RISCV::X0, RegState::Define | RegState::Dead)
.addImm(0)
.addImm(Info.encodeVTYPE());
@@ -613,7 +915,7 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass);
Opcode = RISCV::PseudoVSETVLIX0;
}
- BuildMI(MBB, MI, DL, TII->get(Opcode))
+ BuildMI(MBB, InsertPt, DL, TII->get(Opcode))
.addReg(DestReg, RegState::Define | RegState::Dead)
.addReg(AVLReg)
.addImm(Info.encodeVTYPE());
@@ -638,24 +940,44 @@ static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
return NewInfo;
}
-bool RISCVInsertVSETVLI::needVSETVLI(const VSETVLIInfo &Require,
- const VSETVLIInfo &CurInfo) {
- if (CurInfo.isCompatible(Require, /*Strict*/ false))
+/// Return true if a VSETVLI is required to transition from CurInfo to Require
+/// before MI.
+bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
+ const VSETVLIInfo &Require,
+ const VSETVLIInfo &CurInfo) const {
+ assert(Require == computeInfoForInstr(MI, MI.getDesc().TSFlags, MRI));
+
+ if (CurInfo.isCompatible(MI, Require))
return false;
+ if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly())
+ return true;
+
+ // For vmv.s.x and vfmv.s.f, there is only two behaviors, VL = 0 and VL > 0.
+ // VL=0 is uninteresting (as it should have been deleted already), so it is
+ // compatible if we can prove both are non-zero. Additionally, if writing
+ // to an implicit_def operand, we don't need to preserve any other bits and
+ // are thus compatible with any larger etype, and can disregard policy bits.
+ if (isScalarMoveInstr(MI) &&
+ CurInfo.hasNonZeroAVL() && Require.hasNonZeroAVL()) {
+ auto *VRegDef = MRI->getVRegDef(MI.getOperand(1).getReg());
+ if (VRegDef && VRegDef->isImplicitDef() &&
+ CurInfo.getSEW() >= Require.getSEW())
+ return false;
+ if (CurInfo.hasSameSEW(Require) && CurInfo.hasSamePolicy(Require))
+ return false;
+ }
+
// We didn't find a compatible value. If our AVL is a virtual register,
- // it might be defined by a VSET(I)VLI. If it has the same VTYPE we need
+ // it might be defined by a VSET(I)VLI. If it has the same VLMAX we need
// and the last VL/VTYPE we observed is the same, we don't need a
// VSETVLI here.
- if (!CurInfo.isUnknown() && Require.hasAVLReg() &&
- Require.getAVLReg().isVirtual() && !CurInfo.hasSEWLMULRatioOnly() &&
- CurInfo.hasCompatibleVTYPE(Require, /*Strict*/ false)) {
+ if (Require.hasAVLReg() && Require.getAVLReg().isVirtual() &&
+ CurInfo.hasCompatibleVTYPE(MI, Require)) {
if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) {
- if (DefMI->getOpcode() == RISCV::PseudoVSETVLI ||
- DefMI->getOpcode() == RISCV::PseudoVSETVLIX0 ||
- DefMI->getOpcode() == RISCV::PseudoVSETIVLI) {
+ if (isVectorConfigInstr(*DefMI)) {
VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
- if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVTYPE(CurInfo))
+ if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVLMAX(CurInfo))
return false;
}
}
@@ -664,253 +986,121 @@ bool RISCVInsertVSETVLI::needVSETVLI(const VSETVLIInfo &Require,
return true;
}
-bool canSkipVSETVLIForLoadStore(const MachineInstr &MI,
- const VSETVLIInfo &Require,
- const VSETVLIInfo &CurInfo) {
- unsigned EEW;
- switch (MI.getOpcode()) {
- default:
- return false;
- case RISCV::PseudoVLE8_V_M1:
- case RISCV::PseudoVLE8_V_M1_MASK:
- case RISCV::PseudoVLE8_V_M2:
- case RISCV::PseudoVLE8_V_M2_MASK:
- case RISCV::PseudoVLE8_V_M4:
- case RISCV::PseudoVLE8_V_M4_MASK:
- case RISCV::PseudoVLE8_V_M8:
- case RISCV::PseudoVLE8_V_M8_MASK:
- case RISCV::PseudoVLE8_V_MF2:
- case RISCV::PseudoVLE8_V_MF2_MASK:
- case RISCV::PseudoVLE8_V_MF4:
- case RISCV::PseudoVLE8_V_MF4_MASK:
- case RISCV::PseudoVLE8_V_MF8:
- case RISCV::PseudoVLE8_V_MF8_MASK:
- case RISCV::PseudoVLSE8_V_M1:
- case RISCV::PseudoVLSE8_V_M1_MASK:
- case RISCV::PseudoVLSE8_V_M2:
- case RISCV::PseudoVLSE8_V_M2_MASK:
- case RISCV::PseudoVLSE8_V_M4:
- case RISCV::PseudoVLSE8_V_M4_MASK:
- case RISCV::PseudoVLSE8_V_M8:
- case RISCV::PseudoVLSE8_V_M8_MASK:
- case RISCV::PseudoVLSE8_V_MF2:
- case RISCV::PseudoVLSE8_V_MF2_MASK:
- case RISCV::PseudoVLSE8_V_MF4:
- case RISCV::PseudoVLSE8_V_MF4_MASK:
- case RISCV::PseudoVLSE8_V_MF8:
- case RISCV::PseudoVLSE8_V_MF8_MASK:
- case RISCV::PseudoVSE8_V_M1:
- case RISCV::PseudoVSE8_V_M1_MASK:
- case RISCV::PseudoVSE8_V_M2:
- case RISCV::PseudoVSE8_V_M2_MASK:
- case RISCV::PseudoVSE8_V_M4:
- case RISCV::PseudoVSE8_V_M4_MASK:
- case RISCV::PseudoVSE8_V_M8:
- case RISCV::PseudoVSE8_V_M8_MASK:
- case RISCV::PseudoVSE8_V_MF2:
- case RISCV::PseudoVSE8_V_MF2_MASK:
- case RISCV::PseudoVSE8_V_MF4:
- case RISCV::PseudoVSE8_V_MF4_MASK:
- case RISCV::PseudoVSE8_V_MF8:
- case RISCV::PseudoVSE8_V_MF8_MASK:
- case RISCV::PseudoVSSE8_V_M1:
- case RISCV::PseudoVSSE8_V_M1_MASK:
- case RISCV::PseudoVSSE8_V_M2:
- case RISCV::PseudoVSSE8_V_M2_MASK:
- case RISCV::PseudoVSSE8_V_M4:
- case RISCV::PseudoVSSE8_V_M4_MASK:
- case RISCV::PseudoVSSE8_V_M8:
- case RISCV::PseudoVSSE8_V_M8_MASK:
- case RISCV::PseudoVSSE8_V_MF2:
- case RISCV::PseudoVSSE8_V_MF2_MASK:
- case RISCV::PseudoVSSE8_V_MF4:
- case RISCV::PseudoVSSE8_V_MF4_MASK:
- case RISCV::PseudoVSSE8_V_MF8:
- case RISCV::PseudoVSSE8_V_MF8_MASK:
- EEW = 8;
- break;
- case RISCV::PseudoVLE16_V_M1:
- case RISCV::PseudoVLE16_V_M1_MASK:
- case RISCV::PseudoVLE16_V_M2:
- case RISCV::PseudoVLE16_V_M2_MASK:
- case RISCV::PseudoVLE16_V_M4:
- case RISCV::PseudoVLE16_V_M4_MASK:
- case RISCV::PseudoVLE16_V_M8:
- case RISCV::PseudoVLE16_V_M8_MASK:
- case RISCV::PseudoVLE16_V_MF2:
- case RISCV::PseudoVLE16_V_MF2_MASK:
- case RISCV::PseudoVLE16_V_MF4:
- case RISCV::PseudoVLE16_V_MF4_MASK:
- case RISCV::PseudoVLSE16_V_M1:
- case RISCV::PseudoVLSE16_V_M1_MASK:
- case RISCV::PseudoVLSE16_V_M2:
- case RISCV::PseudoVLSE16_V_M2_MASK:
- case RISCV::PseudoVLSE16_V_M4:
- case RISCV::PseudoVLSE16_V_M4_MASK:
- case RISCV::PseudoVLSE16_V_M8:
- case RISCV::PseudoVLSE16_V_M8_MASK:
- case RISCV::PseudoVLSE16_V_MF2:
- case RISCV::PseudoVLSE16_V_MF2_MASK:
- case RISCV::PseudoVLSE16_V_MF4:
- case RISCV::PseudoVLSE16_V_MF4_MASK:
- case RISCV::PseudoVSE16_V_M1:
- case RISCV::PseudoVSE16_V_M1_MASK:
- case RISCV::PseudoVSE16_V_M2:
- case RISCV::PseudoVSE16_V_M2_MASK:
- case RISCV::PseudoVSE16_V_M4:
- case RISCV::PseudoVSE16_V_M4_MASK:
- case RISCV::PseudoVSE16_V_M8:
- case RISCV::PseudoVSE16_V_M8_MASK:
- case RISCV::PseudoVSE16_V_MF2:
- case RISCV::PseudoVSE16_V_MF2_MASK:
- case RISCV::PseudoVSE16_V_MF4:
- case RISCV::PseudoVSE16_V_MF4_MASK:
- case RISCV::PseudoVSSE16_V_M1:
- case RISCV::PseudoVSSE16_V_M1_MASK:
- case RISCV::PseudoVSSE16_V_M2:
- case RISCV::PseudoVSSE16_V_M2_MASK:
- case RISCV::PseudoVSSE16_V_M4:
- case RISCV::PseudoVSSE16_V_M4_MASK:
- case RISCV::PseudoVSSE16_V_M8:
- case RISCV::PseudoVSSE16_V_M8_MASK:
- case RISCV::PseudoVSSE16_V_MF2:
- case RISCV::PseudoVSSE16_V_MF2_MASK:
- case RISCV::PseudoVSSE16_V_MF4:
- case RISCV::PseudoVSSE16_V_MF4_MASK:
- EEW = 16;
- break;
- case RISCV::PseudoVLE32_V_M1:
- case RISCV::PseudoVLE32_V_M1_MASK:
- case RISCV::PseudoVLE32_V_M2:
- case RISCV::PseudoVLE32_V_M2_MASK:
- case RISCV::PseudoVLE32_V_M4:
- case RISCV::PseudoVLE32_V_M4_MASK:
- case RISCV::PseudoVLE32_V_M8:
- case RISCV::PseudoVLE32_V_M8_MASK:
- case RISCV::PseudoVLE32_V_MF2:
- case RISCV::PseudoVLE32_V_MF2_MASK:
- case RISCV::PseudoVLSE32_V_M1:
- case RISCV::PseudoVLSE32_V_M1_MASK:
- case RISCV::PseudoVLSE32_V_M2:
- case RISCV::PseudoVLSE32_V_M2_MASK:
- case RISCV::PseudoVLSE32_V_M4:
- case RISCV::PseudoVLSE32_V_M4_MASK:
- case RISCV::PseudoVLSE32_V_M8:
- case RISCV::PseudoVLSE32_V_M8_MASK:
- case RISCV::PseudoVLSE32_V_MF2:
- case RISCV::PseudoVLSE32_V_MF2_MASK:
- case RISCV::PseudoVSE32_V_M1:
- case RISCV::PseudoVSE32_V_M1_MASK:
- case RISCV::PseudoVSE32_V_M2:
- case RISCV::PseudoVSE32_V_M2_MASK:
- case RISCV::PseudoVSE32_V_M4:
- case RISCV::PseudoVSE32_V_M4_MASK:
- case RISCV::PseudoVSE32_V_M8:
- case RISCV::PseudoVSE32_V_M8_MASK:
- case RISCV::PseudoVSE32_V_MF2:
- case RISCV::PseudoVSE32_V_MF2_MASK:
- case RISCV::PseudoVSSE32_V_M1:
- case RISCV::PseudoVSSE32_V_M1_MASK:
- case RISCV::PseudoVSSE32_V_M2:
- case RISCV::PseudoVSSE32_V_M2_MASK:
- case RISCV::PseudoVSSE32_V_M4:
- case RISCV::PseudoVSSE32_V_M4_MASK:
- case RISCV::PseudoVSSE32_V_M8:
- case RISCV::PseudoVSSE32_V_M8_MASK:
- case RISCV::PseudoVSSE32_V_MF2:
- case RISCV::PseudoVSSE32_V_MF2_MASK:
- EEW = 32;
- break;
- case RISCV::PseudoVLE64_V_M1:
- case RISCV::PseudoVLE64_V_M1_MASK:
- case RISCV::PseudoVLE64_V_M2:
- case RISCV::PseudoVLE64_V_M2_MASK:
- case RISCV::PseudoVLE64_V_M4:
- case RISCV::PseudoVLE64_V_M4_MASK:
- case RISCV::PseudoVLE64_V_M8:
- case RISCV::PseudoVLE64_V_M8_MASK:
- case RISCV::PseudoVLSE64_V_M1:
- case RISCV::PseudoVLSE64_V_M1_MASK:
- case RISCV::PseudoVLSE64_V_M2:
- case RISCV::PseudoVLSE64_V_M2_MASK:
- case RISCV::PseudoVLSE64_V_M4:
- case RISCV::PseudoVLSE64_V_M4_MASK:
- case RISCV::PseudoVLSE64_V_M8:
- case RISCV::PseudoVLSE64_V_M8_MASK:
- case RISCV::PseudoVSE64_V_M1:
- case RISCV::PseudoVSE64_V_M1_MASK:
- case RISCV::PseudoVSE64_V_M2:
- case RISCV::PseudoVSE64_V_M2_MASK:
- case RISCV::PseudoVSE64_V_M4:
- case RISCV::PseudoVSE64_V_M4_MASK:
- case RISCV::PseudoVSE64_V_M8:
- case RISCV::PseudoVSE64_V_M8_MASK:
- case RISCV::PseudoVSSE64_V_M1:
- case RISCV::PseudoVSSE64_V_M1_MASK:
- case RISCV::PseudoVSSE64_V_M2:
- case RISCV::PseudoVSSE64_V_M2_MASK:
- case RISCV::PseudoVSSE64_V_M4:
- case RISCV::PseudoVSSE64_V_M4_MASK:
- case RISCV::PseudoVSSE64_V_M8:
- case RISCV::PseudoVSSE64_V_M8_MASK:
- EEW = 64;
- break;
+// Given an incoming state reaching MI, modifies that state so that it is minimally
+// compatible with MI. The resulting state is guaranteed to be semantically legal
+// for MI, but may not be the state requested by MI.
+void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) {
+ uint64_t TSFlags = MI.getDesc().TSFlags;
+ if (!RISCVII::hasSEWOp(TSFlags))
+ return;
+
+ const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI);
+ if (Info.isValid() && !needVSETVLI(MI, NewInfo, Info))
+ return;
+
+ const VSETVLIInfo PrevInfo = Info;
+ Info = NewInfo;
+
+ if (!RISCVII::hasVLOp(TSFlags))
+ return;
+
+ // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and
+ // VL > 0. We can discard the user requested AVL and just use the last
+ // one if we can prove it equally zero. This removes a vsetvli entirely
+ // if the types match or allows use of cheaper avl preserving variant
+ // if VLMAX doesn't change. If VLMAX might change, we couldn't use
+ // the 'vsetvli x0, x0, vtype" variant, so we avoid the transform to
+ // prevent extending live range of an avl register operand.
+ // TODO: We can probably relax this for immediates.
+ if (isScalarMoveInstr(MI) && PrevInfo.isValid() &&
+ PrevInfo.hasNonZeroAVL() && Info.hasNonZeroAVL() &&
+ Info.hasSameVLMAX(PrevInfo)) {
+ if (PrevInfo.hasAVLImm())
+ Info.setAVLImm(PrevInfo.getAVLImm());
+ else
+ Info.setAVLReg(PrevInfo.getAVLReg());
+ return;
+ }
+
+ // Two cases involving an AVL resulting from a previous vsetvli.
+ // 1) If the AVL is the result of a previous vsetvli which has the
+ // same AVL and VLMAX as our current state, we can reuse the AVL
+ // from the current state for the new one. This allows us to
+ // generate 'vsetvli x0, x0, vtype" or possible skip the transition
+ // entirely.
+ // 2) If AVL is defined by a vsetvli with the same VLMAX, we can
+ // replace the AVL operand with the AVL of the defining vsetvli.
+ // We avoid general register AVLs to avoid extending live ranges
+ // without being sure we can kill the original source reg entirely.
+ if (!Info.hasAVLReg() || !Info.getAVLReg().isVirtual())
+ return;
+ MachineInstr *DefMI = MRI->getVRegDef(Info.getAVLReg());
+ if (!DefMI || !isVectorConfigInstr(*DefMI))
+ return;
+
+ VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
+ // case 1
+ if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
+ DefInfo.hasSameAVL(PrevInfo) &&
+ DefInfo.hasSameVLMAX(PrevInfo)) {
+ if (PrevInfo.hasAVLImm())
+ Info.setAVLImm(PrevInfo.getAVLImm());
+ else
+ Info.setAVLReg(PrevInfo.getAVLReg());
+ return;
+ }
+ // case 2
+ if (DefInfo.hasSameVLMAX(Info) &&
+ (DefInfo.hasAVLImm() || DefInfo.getAVLReg() == RISCV::X0)) {
+ if (DefInfo.hasAVLImm())
+ Info.setAVLImm(DefInfo.getAVLImm());
+ else
+ Info.setAVLReg(DefInfo.getAVLReg());
+ return;
+ }
+}
+
+// Given a state with which we evaluated MI (see transferBefore above for why
+// this might be different that the state MI requested), modify the state to
+// reflect the changes MI might make.
+void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) {
+ if (isVectorConfigInstr(MI)) {
+ Info = getInfoForVSETVLI(MI);
+ return;
}
- return CurInfo.isCompatibleWithLoadStoreEEW(EEW, Require);
+ if (RISCV::isFaultFirstLoad(MI)) {
+ // Update AVL to vl-output of the fault first load.
+ Info.setAVLReg(MI.getOperand(1).getReg());
+ return;
+ }
+
+ // If this is something that updates VL/VTYPE that we don't know about, set
+ // the state to unknown.
+ if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
+ MI.modifiesRegister(RISCV::VTYPE))
+ Info = VSETVLIInfo::getUnknown();
}
bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB) {
bool HadVectorOp = false;
BlockData &BBInfo = BlockInfo[MBB.getNumber()];
+ BBInfo.Change = BBInfo.Pred;
for (const MachineInstr &MI : MBB) {
- // If this is an explicit VSETVLI or VSETIVLI, update our state.
- if (MI.getOpcode() == RISCV::PseudoVSETVLI ||
- MI.getOpcode() == RISCV::PseudoVSETVLIX0 ||
- MI.getOpcode() == RISCV::PseudoVSETIVLI) {
- HadVectorOp = true;
- BBInfo.Change = getInfoForVSETVLI(MI);
- continue;
- }
+ transferBefore(BBInfo.Change, MI);
- uint64_t TSFlags = MI.getDesc().TSFlags;
- if (RISCVII::hasSEWOp(TSFlags)) {
+ if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags))
HadVectorOp = true;
- VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI);
-
- if (!BBInfo.Change.isValid()) {
- BBInfo.Change = NewInfo;
- } else {
- // If this instruction isn't compatible with the previous VL/VTYPE
- // we need to insert a VSETVLI.
- // If this is a unit-stride or strided load/store, we may be able to use
- // the EMUL=(EEW/SEW)*LMUL relationship to avoid changing vtype.
- // NOTE: We only do this if the vtype we're comparing against was
- // created in this block. We need the first and third phase to treat
- // the store the same way.
- if (!canSkipVSETVLIForLoadStore(MI, NewInfo, BBInfo.Change) &&
- needVSETVLI(NewInfo, BBInfo.Change))
- BBInfo.Change = NewInfo;
- }
- }
-
- // If this is something that updates VL/VTYPE that we don't know about, set
- // the state to unknown.
- if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
- MI.modifiesRegister(RISCV::VTYPE)) {
- BBInfo.Change = VSETVLIInfo::getUnknown();
- }
+ transferAfter(BBInfo.Change, MI);
}
- // Initial exit state is whatever change we found in the block.
- BBInfo.Exit = BBInfo.Change;
-
return HadVectorOp;
}
void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
+
BlockData &BBInfo = BlockInfo[MBB.getNumber()];
BBInfo.InQueue = false;
@@ -928,9 +1118,20 @@ void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
if (!InInfo.isValid())
return;
+ // If no change, no need to rerun block
+ if (InInfo == BBInfo.Pred)
+ return;
+
BBInfo.Pred = InInfo;
+ LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB)
+ << " changed to " << BBInfo.Pred << "\n");
- VSETVLIInfo TmpStatus = BBInfo.Pred.merge(BBInfo.Change);
+ // Note: It's tempting to cache the state changes here, but due to the
+ // compatibility checks performed a blocks output state can change based on
+ // the input state. To cache, we'd have to add logic for finding
+ // never-compatible state changes.
+ computeVLVTYPEChanges(MBB);
+ VSETVLIInfo TmpStatus = BBInfo.Change;
// If the new exit value matches the old exit value, we don't need to revisit
// any blocks.
@@ -938,6 +1139,8 @@ void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
return;
BBInfo.Exit = TmpStatus;
+ LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB)
+ << " changed to " << BBInfo.Exit << "\n");
// Add the successors to the work list so we can propagate the changed exit
// status.
@@ -947,10 +1150,10 @@ void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
}
// If we weren't able to prove a vsetvli was directly unneeded, it might still
-// be/ unneeded if the AVL is a phi node where all incoming values are VL
+// be unneeded if the AVL is a phi node where all incoming values are VL
// outputs from the last VSETVLI in their respective basic blocks.
bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
- const MachineBasicBlock &MBB) {
+ const MachineBasicBlock &MBB) const {
if (DisableInsertVSETVLPHIOpt)
return true;
@@ -973,15 +1176,12 @@ bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
const BlockData &PBBInfo = BlockInfo[PBB->getNumber()];
// If the exit from the predecessor has the VTYPE we are looking for
// we might be able to avoid a VSETVLI.
- if (PBBInfo.Exit.isUnknown() ||
- !PBBInfo.Exit.hasCompatibleVTYPE(Require, /*Strict*/ false))
+ if (PBBInfo.Exit.isUnknown() || !PBBInfo.Exit.hasSameVTYPE(Require))
return true;
// We need the PHI input to the be the output of a VSET(I)VLI.
MachineInstr *DefMI = MRI->getVRegDef(InReg);
- if (!DefMI || (DefMI->getOpcode() != RISCV::PseudoVSETVLI &&
- DefMI->getOpcode() != RISCV::PseudoVSETVLIX0 &&
- DefMI->getOpcode() != RISCV::PseudoVSETIVLI))
+ if (!DefMI || !isVectorConfigInstr(*DefMI))
return true;
// We found a VSET(I)VLI make sure it matches the output of the
@@ -998,42 +1198,42 @@ bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
}
void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
- VSETVLIInfo CurInfo;
- // BBLocalInfo tracks the VL/VTYPE state the same way BBInfo.Change was
- // calculated in computeIncomingVLVTYPE. We need this to apply
- // canSkipVSETVLIForLoadStore the same way computeIncomingVLVTYPE did. We
- // can't include predecessor information in that decision to avoid disagreeing
- // with the global analysis.
- VSETVLIInfo BBLocalInfo;
- // Only be set if current VSETVLIInfo is from an explicit VSET(I)VLI.
- MachineInstr *PrevVSETVLIMI = nullptr;
-
+ VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred;
+ // Track whether the prefix of the block we've scanned is transparent
+ // (meaning has not yet changed the abstract state).
+ bool PrefixTransparent = true;
for (MachineInstr &MI : MBB) {
+ const VSETVLIInfo PrevInfo = CurInfo;
+ transferBefore(CurInfo, MI);
+
// If this is an explicit VSETVLI or VSETIVLI, update our state.
- if (MI.getOpcode() == RISCV::PseudoVSETVLI ||
- MI.getOpcode() == RISCV::PseudoVSETVLIX0 ||
- MI.getOpcode() == RISCV::PseudoVSETIVLI) {
+ if (isVectorConfigInstr(MI)) {
// Conservatively, mark the VL and VTYPE as live.
assert(MI.getOperand(3).getReg() == RISCV::VL &&
MI.getOperand(4).getReg() == RISCV::VTYPE &&
"Unexpected operands where VL and VTYPE should be");
MI.getOperand(3).setIsDead(false);
MI.getOperand(4).setIsDead(false);
- CurInfo = getInfoForVSETVLI(MI);
- BBLocalInfo = getInfoForVSETVLI(MI);
- PrevVSETVLIMI = &MI;
- continue;
+ PrefixTransparent = false;
}
uint64_t TSFlags = MI.getDesc().TSFlags;
if (RISCVII::hasSEWOp(TSFlags)) {
- VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI);
+ if (PrevInfo != CurInfo) {
+ // If this is the first implicit state change, and the state change
+ // requested can be proven to produce the same register contents, we
+ // can skip emitting the actual state change and continue as if we
+ // had since we know the GPR result of the implicit state change
+ // wouldn't be used and VL/VTYPE registers are correct. Note that
+ // we *do* need to model the state as if it changed as while the
+ // register contents are unchanged, the abstract model can change.
+ if (!PrefixTransparent || needVSETVLIPHI(CurInfo, MBB))
+ insertVSETVLI(MBB, MI, CurInfo, PrevInfo);
+ PrefixTransparent = false;
+ }
+
if (RISCVII::hasVLOp(TSFlags)) {
- unsigned Offset = 2;
- if (RISCVII::hasVecPolicyOp(TSFlags))
- Offset = 3;
- MachineOperand &VLOp =
- MI.getOperand(MI.getNumExplicitOperands() - Offset);
+ MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
if (VLOp.isReg()) {
// Erase the AVL operand from the instruction.
VLOp.setReg(RISCV::NoRegister);
@@ -1044,76 +1244,217 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
}
MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
/*isImp*/ true));
+ }
- if (!CurInfo.isValid()) {
- // We haven't found any vector instructions or VL/VTYPE changes yet,
- // use the predecessor information.
- assert(BlockInfo[MBB.getNumber()].Pred.isValid() &&
- "Expected a valid predecessor state.");
- // Don't use predecessor information if there was an earlier instruction
- // in this block that allowed a vsetvli to be skipped for load/store.
- if (!(BBLocalInfo.isValid() &&
- canSkipVSETVLIForLoadStore(MI, NewInfo, BBLocalInfo)) &&
- needVSETVLI(NewInfo, BlockInfo[MBB.getNumber()].Pred) &&
- needVSETVLIPHI(NewInfo, MBB)) {
- insertVSETVLI(MBB, MI, NewInfo, BlockInfo[MBB.getNumber()].Pred);
- CurInfo = NewInfo;
- BBLocalInfo = NewInfo;
- }
+ if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
+ MI.modifiesRegister(RISCV::VTYPE))
+ PrefixTransparent = false;
- // We must update BBLocalInfo for every vector instruction.
- if (!BBLocalInfo.isValid())
- BBLocalInfo = NewInfo;
- } else {
- assert(BBLocalInfo.isValid());
- // If this instruction isn't compatible with the previous VL/VTYPE
- // we need to insert a VSETVLI.
- // If this is a unit-stride or strided load/store, we may be able to use
- // the EMUL=(EEW/SEW)*LMUL relationship to avoid changing vtype.
- // NOTE: We can't use predecessor information for the store. We must
- // treat it the same as the first phase so that we produce the correct
- // vl/vtype for succesor blocks.
- if (!canSkipVSETVLIForLoadStore(MI, NewInfo, CurInfo) &&
- needVSETVLI(NewInfo, CurInfo)) {
- // If the previous VL/VTYPE is set by VSETVLI and do not use, Merge it
- // with current VL/VTYPE.
- bool NeedInsertVSETVLI = true;
- if (PrevVSETVLIMI) {
- bool HasSameAVL =
- CurInfo.hasSameAVL(NewInfo) ||
- (NewInfo.hasAVLReg() && NewInfo.getAVLReg().isVirtual() &&
- NewInfo.getAVLReg() == PrevVSETVLIMI->getOperand(0).getReg());
- // If these two VSETVLI have the same AVL and the same VLMAX,
- // we could merge these two VSETVLI.
- if (HasSameAVL &&
- CurInfo.getSEWLMULRatio() == NewInfo.getSEWLMULRatio()) {
- PrevVSETVLIMI->getOperand(2).setImm(NewInfo.encodeVTYPE());
- NeedInsertVSETVLI = false;
- }
- if (isScalarMoveInstr(MI) &&
- ((CurInfo.hasNonZeroAVL() && NewInfo.hasNonZeroAVL()) ||
- (CurInfo.hasZeroAVL() && NewInfo.hasZeroAVL())) &&
- NewInfo.hasSameVLMAX(CurInfo)) {
- PrevVSETVLIMI->getOperand(2).setImm(NewInfo.encodeVTYPE());
- NeedInsertVSETVLI = false;
- }
- }
- if (NeedInsertVSETVLI)
- insertVSETVLI(MBB, MI, NewInfo, CurInfo);
- CurInfo = NewInfo;
- BBLocalInfo = NewInfo;
- }
+ transferAfter(CurInfo, MI);
+ }
+
+ // If we reach the end of the block and our current info doesn't match the
+ // expected info, insert a vsetvli to correct.
+ if (!UseStrictAsserts) {
+ const VSETVLIInfo &ExitInfo = BlockInfo[MBB.getNumber()].Exit;
+ if (CurInfo.isValid() && ExitInfo.isValid() && !ExitInfo.isUnknown() &&
+ CurInfo != ExitInfo) {
+ // Note there's an implicit assumption here that terminators never use
+ // or modify VL or VTYPE. Also, fallthrough will return end().
+ auto InsertPt = MBB.getFirstInstrTerminator();
+ insertVSETVLI(MBB, InsertPt, MBB.findDebugLoc(InsertPt), ExitInfo,
+ CurInfo);
+ CurInfo = ExitInfo;
+ }
+ }
+
+ if (UseStrictAsserts && CurInfo.isValid()) {
+ const auto &Info = BlockInfo[MBB.getNumber()];
+ if (CurInfo != Info.Exit) {
+ LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n");
+ LLVM_DEBUG(dbgs() << " begin state: " << Info.Pred << "\n");
+ LLVM_DEBUG(dbgs() << " expected end state: " << Info.Exit << "\n");
+ LLVM_DEBUG(dbgs() << " actual end state: " << CurInfo << "\n");
+ }
+ assert(CurInfo == Info.Exit &&
+ "InsertVSETVLI dataflow invariant violated");
+ }
+}
+
+/// Return true if the VL value configured must be equal to the requested one.
+static bool hasFixedResult(const VSETVLIInfo &Info, const RISCVSubtarget &ST) {
+ if (!Info.hasAVLImm())
+ // VLMAX is always the same value.
+ // TODO: Could extend to other registers by looking at the associated vreg
+ // def placement.
+ return RISCV::X0 == Info.getAVLReg();
+
+ unsigned AVL = Info.getAVLImm();
+ unsigned SEW = Info.getSEW();
+ unsigned AVLInBits = AVL * SEW;
+
+ unsigned LMul;
+ bool Fractional;
+ std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(Info.getVLMUL());
+
+ if (Fractional)
+ return ST.getRealMinVLen() / LMul >= AVLInBits;
+ return ST.getRealMinVLen() * LMul >= AVLInBits;
+}
+
+/// Perform simple partial redundancy elimination of the VSETVLI instructions
+/// we're about to insert by looking for cases where we can PRE from the
+/// beginning of one block to the end of one of its predecessors. Specifically,
+/// this is geared to catch the common case of a fixed length vsetvl in a single
+/// block loop when it could execute once in the preheader instead.
+void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
+ const MachineFunction &MF = *MBB.getParent();
+ const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
+
+ if (!BlockInfo[MBB.getNumber()].Pred.isUnknown())
+ return;
+
+ MachineBasicBlock *UnavailablePred = nullptr;
+ VSETVLIInfo AvailableInfo;
+ for (MachineBasicBlock *P : MBB.predecessors()) {
+ const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit;
+ if (PredInfo.isUnknown()) {
+ if (UnavailablePred)
+ return;
+ UnavailablePred = P;
+ } else if (!AvailableInfo.isValid()) {
+ AvailableInfo = PredInfo;
+ } else if (AvailableInfo != PredInfo) {
+ return;
+ }
+ }
+
+ // Unreachable, single pred, or full redundancy. Note that FRE is handled by
+ // phase 3.
+ if (!UnavailablePred || !AvailableInfo.isValid())
+ return;
+
+ // Critical edge - TODO: consider splitting?
+ if (UnavailablePred->succ_size() != 1)
+ return;
+
+ // If VL can be less than AVL, then we can't reduce the frequency of exec.
+ if (!hasFixedResult(AvailableInfo, ST))
+ return;
+
+ // Does it actually let us remove an implicit transition in MBB?
+ bool Found = false;
+ for (auto &MI : MBB) {
+ if (isVectorConfigInstr(MI))
+ return;
+
+ const uint64_t TSFlags = MI.getDesc().TSFlags;
+ if (RISCVII::hasSEWOp(TSFlags)) {
+ if (AvailableInfo != computeInfoForInstr(MI, TSFlags, MRI))
+ return;
+ Found = true;
+ break;
+ }
+ }
+ if (!Found)
+ return;
+
+ // Finally, update both data flow state and insert the actual vsetvli.
+ // Doing both keeps the code in sync with the dataflow results, which
+ // is critical for correctness of phase 3.
+ auto OldInfo = BlockInfo[UnavailablePred->getNumber()].Exit;
+ LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to "
+ << UnavailablePred->getName() << " with state "
+ << AvailableInfo << "\n");
+ BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo;
+ BlockInfo[MBB.getNumber()].Pred = AvailableInfo;
+
+ // Note there's an implicit assumption here that terminators never use
+ // or modify VL or VTYPE. Also, fallthrough will return end().
+ auto InsertPt = UnavailablePred->getFirstInstrTerminator();
+ insertVSETVLI(*UnavailablePred, InsertPt,
+ UnavailablePred->findDebugLoc(InsertPt),
+ AvailableInfo, OldInfo);
+}
+
+static void doUnion(DemandedFields &A, DemandedFields B) {
+ A.VL |= B.VL;
+ A.SEW |= B.SEW;
+ A.LMUL |= B.LMUL;
+ A.SEWLMULRatio |= B.SEWLMULRatio;
+ A.TailPolicy |= B.TailPolicy;
+ A.MaskPolicy |= B.MaskPolicy;
+}
+
+// Return true if we can mutate PrevMI's VTYPE to match MI's
+// without changing any the fields which have been used.
+// TODO: Restructure code to allow code reuse between this and isCompatible
+// above.
+static bool canMutatePriorConfig(const MachineInstr &PrevMI,
+ const MachineInstr &MI,
+ const DemandedFields &Used) {
+ // TODO: Extend this to handle cases where VL does change, but VL
+ // has not been used. (e.g. over a vmv.x.s)
+ if (!isVLPreservingConfig(MI))
+ // Note: `vsetvli x0, x0, vtype' is the canonical instruction
+ // for this case. If you find yourself wanting to add other forms
+ // to this "unused VTYPE" case, we're probably missing a
+ // canonicalization earlier.
+ return false;
+
+ if (!PrevMI.getOperand(2).isImm() || !MI.getOperand(2).isImm())
+ return false;
+
+ auto PriorVType = PrevMI.getOperand(2).getImm();
+ auto VType = MI.getOperand(2).getImm();
+ return areCompatibleVTYPEs(PriorVType, VType, Used);
+}
+
+void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
+ MachineInstr *PrevMI = nullptr;
+ DemandedFields Used;
+ SmallVector<MachineInstr*> ToDelete;
+ for (MachineInstr &MI : MBB) {
+ // Note: Must be *before* vsetvli handling to account for config cases
+ // which only change some subfields.
+ doUnion(Used, getDemanded(MI));
+
+ if (!isVectorConfigInstr(MI))
+ continue;
+
+ if (PrevMI) {
+ if (!Used.VL && !Used.usedVTYPE()) {
+ ToDelete.push_back(PrevMI);
+ // fallthrough
+ } else if (canMutatePriorConfig(*PrevMI, MI, Used)) {
+ PrevMI->getOperand(2).setImm(MI.getOperand(2).getImm());
+ ToDelete.push_back(&MI);
+ // Leave PrevMI unchanged
+ continue;
}
- PrevVSETVLIMI = nullptr;
}
+ PrevMI = &MI;
+ Used = getDemanded(MI);
+ Register VRegDef = MI.getOperand(0).getReg();
+ if (VRegDef != RISCV::X0 &&
+ !(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef)))
+ Used.VL = true;
+ }
- // If this is something updates VL/VTYPE that we don't know about, set
- // the state to unknown.
- if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
- MI.modifiesRegister(RISCV::VTYPE)) {
- CurInfo = VSETVLIInfo::getUnknown();
- BBLocalInfo = VSETVLIInfo::getUnknown();
- PrevVSETVLIMI = nullptr;
+ for (auto *MI : ToDelete)
+ MI->eraseFromParent();
+}
+
+void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
+ for (auto I = MBB.begin(), E = MBB.end(); I != E;) {
+ MachineInstr &MI = *I++;
+ if (RISCV::isFaultFirstLoad(MI)) {
+ Register VLOutput = MI.getOperand(1).getReg();
+ if (!MRI->use_nodbg_empty(VLOutput))
+ BuildMI(MBB, I, MI.getDebugLoc(), TII->get(RISCV::PseudoReadVL),
+ VLOutput);
+ // We don't use the vl output of the VLEFF/VLSEGFF anymore.
+ MI.getOperand(1).setReg(RISCV::X0);
}
}
}
@@ -1124,6 +1465,8 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
if (!ST.hasVInstructions())
return false;
+ LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n");
+
TII = ST.getInstrInfo();
MRI = &MF.getRegInfo();
@@ -1133,34 +1476,77 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
bool HaveVectorOp = false;
// Phase 1 - determine how VL/VTYPE are affected by the each block.
- for (const MachineBasicBlock &MBB : MF)
+ for (const MachineBasicBlock &MBB : MF) {
HaveVectorOp |= computeVLVTYPEChanges(MBB);
+ // Initial exit state is whatever change we found in the block.
+ BlockData &BBInfo = BlockInfo[MBB.getNumber()];
+ BBInfo.Exit = BBInfo.Change;
+ LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB)
+ << " is " << BBInfo.Exit << "\n");
+
+ }
// If we didn't find any instructions that need VSETVLI, we're done.
- if (HaveVectorOp) {
- // Phase 2 - determine the exit VL/VTYPE from each block. We add all
- // blocks to the list here, but will also add any that need to be revisited
- // during Phase 2 processing.
- for (const MachineBasicBlock &MBB : MF) {
- WorkList.push(&MBB);
- BlockInfo[MBB.getNumber()].InQueue = true;
- }
- while (!WorkList.empty()) {
- const MachineBasicBlock &MBB = *WorkList.front();
- WorkList.pop();
- computeIncomingVLVTYPE(MBB);
- }
+ if (!HaveVectorOp) {
+ BlockInfo.clear();
+ return false;
+ }
- // Phase 3 - add any vsetvli instructions needed in the block. Use the
- // Phase 2 information to avoid adding vsetvlis before the first vector
- // instruction in the block if the VL/VTYPE is satisfied by its
- // predecessors.
- for (MachineBasicBlock &MBB : MF)
- emitVSETVLIs(MBB);
+ // Phase 2 - determine the exit VL/VTYPE from each block. We add all
+ // blocks to the list here, but will also add any that need to be revisited
+ // during Phase 2 processing.
+ for (const MachineBasicBlock &MBB : MF) {
+ WorkList.push(&MBB);
+ BlockInfo[MBB.getNumber()].InQueue = true;
+ }
+ while (!WorkList.empty()) {
+ const MachineBasicBlock &MBB = *WorkList.front();
+ WorkList.pop();
+ computeIncomingVLVTYPE(MBB);
}
- BlockInfo.clear();
+ // Perform partial redundancy elimination of vsetvli transitions.
+ for (MachineBasicBlock &MBB : MF)
+ doPRE(MBB);
+
+ // Phase 3 - add any vsetvli instructions needed in the block. Use the
+ // Phase 2 information to avoid adding vsetvlis before the first vector
+ // instruction in the block if the VL/VTYPE is satisfied by its
+ // predecessors.
+ for (MachineBasicBlock &MBB : MF)
+ emitVSETVLIs(MBB);
+
+ // Now that all vsetvlis are explicit, go through and do block local
+ // DSE and peephole based demanded fields based transforms. Note that
+ // this *must* be done outside the main dataflow so long as we allow
+ // any cross block analysis within the dataflow. We can't have both
+ // demanded fields based mutation and non-local analysis in the
+ // dataflow at the same time without introducing inconsistencies.
+ for (MachineBasicBlock &MBB : MF)
+ doLocalPostpass(MBB);
+
+ // Once we're fully done rewriting all the instructions, do a final pass
+ // through to check for VSETVLIs which write to an unused destination.
+ // For the non X0, X0 variant, we can replace the destination register
+ // with X0 to reduce register pressure. This is really a generic
+ // optimization which can be applied to any dead def (TODO: generalize).
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ if (MI.getOpcode() == RISCV::PseudoVSETVLI ||
+ MI.getOpcode() == RISCV::PseudoVSETIVLI) {
+ Register VRegDef = MI.getOperand(0).getReg();
+ if (VRegDef != RISCV::X0 && MRI->use_nodbg_empty(VRegDef))
+ MI.getOperand(0).setReg(RISCV::X0);
+ }
+ }
+ }
+ // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
+ // of VLEFF/VLSEGFF.
+ for (MachineBasicBlock &MBB : MF)
+ insertReadVL(MBB);
+
+ BlockInfo.clear();
return HaveVectorOp;
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
index f99d0f56c406..18b31f85bfdb 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
@@ -196,7 +196,10 @@ class RVInst<dag outs, dag ins, string opcodestr, string argstr,
let TSFlags{16} = HasVecPolicyOp;
bit IsRVVWideningReduction = 0;
- let TSFlags{17} = IsRVVWideningReduction;
+ let TSFlags{17} = IsRVVWideningReduction;
+
+ bit UsesMaskPolicy = 0;
+ let TSFlags{18} = UsesMaskPolicy;
}
// Pseudo instructions
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 55f4a19b79eb..685604ad9a59 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -264,6 +264,16 @@ void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
+ // Handle copy from csr
+ if (RISCV::VCSRRegClass.contains(SrcReg) &&
+ RISCV::GPRRegClass.contains(DstReg)) {
+ const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
+ BuildMI(MBB, MBBI, DL, get(RISCV::CSRRS), DstReg)
+ .addImm(RISCVSysReg::lookupSysRegByName(TRI.getName(SrcReg))->Encoding)
+ .addReg(RISCV::X0);
+ return;
+ }
+
// FPR->FPR copies and VR->VR copies.
unsigned Opc;
bool IsScalableVector = true;
@@ -631,11 +641,7 @@ void RISCVInstrInfo::movImm(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, Register DstReg, uint64_t Val,
MachineInstr::MIFlag Flag) const {
- MachineFunction *MF = MBB.getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
Register SrcReg = RISCV::X0;
- Register Result = MRI.createVirtualRegister(&RISCV::GPRRegClass);
- unsigned Num = 0;
if (!STI.is64Bit() && !isInt<32>(Val))
report_fatal_error("Should only materialize 32-bit constants for RV32");
@@ -645,34 +651,34 @@ void RISCVInstrInfo::movImm(MachineBasicBlock &MBB,
assert(!Seq.empty());
for (RISCVMatInt::Inst &Inst : Seq) {
- // Write the final result to DstReg if it's the last instruction in the Seq.
- // Otherwise, write the result to the temp register.
- if (++Num == Seq.size())
- Result = DstReg;
-
- if (Inst.Opc == RISCV::LUI) {
- BuildMI(MBB, MBBI, DL, get(RISCV::LUI), Result)
+ switch (Inst.getOpndKind()) {
+ case RISCVMatInt::Imm:
+ BuildMI(MBB, MBBI, DL, get(Inst.Opc), DstReg)
.addImm(Inst.Imm)
.setMIFlag(Flag);
- } else if (Inst.Opc == RISCV::ADD_UW) {
- BuildMI(MBB, MBBI, DL, get(RISCV::ADD_UW), Result)
+ break;
+ case RISCVMatInt::RegX0:
+ BuildMI(MBB, MBBI, DL, get(Inst.Opc), DstReg)
.addReg(SrcReg, RegState::Kill)
.addReg(RISCV::X0)
.setMIFlag(Flag);
- } else if (Inst.Opc == RISCV::SH1ADD || Inst.Opc == RISCV::SH2ADD ||
- Inst.Opc == RISCV::SH3ADD) {
- BuildMI(MBB, MBBI, DL, get(Inst.Opc), Result)
+ break;
+ case RISCVMatInt::RegReg:
+ BuildMI(MBB, MBBI, DL, get(Inst.Opc), DstReg)
.addReg(SrcReg, RegState::Kill)
.addReg(SrcReg, RegState::Kill)
.setMIFlag(Flag);
- } else {
- BuildMI(MBB, MBBI, DL, get(Inst.Opc), Result)
+ break;
+ case RISCVMatInt::RegImm:
+ BuildMI(MBB, MBBI, DL, get(Inst.Opc), DstReg)
.addReg(SrcReg, RegState::Kill)
.addImm(Inst.Imm)
.setMIFlag(Flag);
+ break;
}
+
// Only the first instruction has X0 as its source.
- SrcReg = Result;
+ SrcReg = DstReg;
}
}
@@ -1052,29 +1058,25 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
switch (OpType) {
default:
llvm_unreachable("Unexpected operand type");
- case RISCVOp::OPERAND_UIMM2:
- Ok = isUInt<2>(Imm);
- break;
- case RISCVOp::OPERAND_UIMM3:
- Ok = isUInt<3>(Imm);
- break;
- case RISCVOp::OPERAND_UIMM4:
- Ok = isUInt<4>(Imm);
- break;
- case RISCVOp::OPERAND_UIMM5:
- Ok = isUInt<5>(Imm);
- break;
- case RISCVOp::OPERAND_UIMM7:
- Ok = isUInt<7>(Imm);
- break;
- case RISCVOp::OPERAND_UIMM12:
- Ok = isUInt<12>(Imm);
- break;
+
+ // clang-format off
+#define CASE_OPERAND_UIMM(NUM) \
+ case RISCVOp::OPERAND_UIMM##NUM: \
+ Ok = isUInt<NUM>(Imm); \
+ break;
+ CASE_OPERAND_UIMM(2)
+ CASE_OPERAND_UIMM(3)
+ CASE_OPERAND_UIMM(4)
+ CASE_OPERAND_UIMM(5)
+ CASE_OPERAND_UIMM(7)
+ CASE_OPERAND_UIMM(12)
+ CASE_OPERAND_UIMM(20)
+ // clang-format on
case RISCVOp::OPERAND_SIMM12:
Ok = isInt<12>(Imm);
break;
- case RISCVOp::OPERAND_UIMM20:
- Ok = isUInt<20>(Imm);
+ case RISCVOp::OPERAND_SIMM12_LSB00000:
+ Ok = isShiftedInt<7, 5>(Imm);
break;
case RISCVOp::OPERAND_UIMMLOG2XLEN:
if (STI.getTargetTriple().isArch64Bit())
@@ -1205,6 +1207,11 @@ enum MachineOutlinerConstructionID {
MachineOutlinerDefault
};
+bool RISCVInstrInfo::shouldOutlineFromFunctionByDefault(
+ MachineFunction &MF) const {
+ return MF.getFunction().hasMinSize();
+}
+
outliner::OutlinedFunction RISCVInstrInfo::getOutliningCandidateInfo(
std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
@@ -1212,10 +1219,7 @@ outliner::OutlinedFunction RISCVInstrInfo::getOutliningCandidateInfo(
// be used to setup the function call.
auto CannotInsertCall = [](outliner::Candidate &C) {
const TargetRegisterInfo *TRI = C.getMF()->getSubtarget().getRegisterInfo();
-
- C.initLRU(*TRI);
- LiveRegUnits LRU = C.LRU;
- return !LRU.available(RISCV::X5);
+ return !C.isAvailableAcrossAndOutOfSeq(RISCV::X5, *TRI);
};
llvm::erase_if(RepeatedSequenceLocs, CannotInsertCall);
@@ -1258,7 +1262,12 @@ RISCVInstrInfo::getOutliningType(MachineBasicBlock::iterator &MBBI,
if (MI.isPosition()) {
// We can manually strip out CFI instructions later.
if (MI.isCFIInstruction())
- return outliner::InstrType::Invisible;
+ // If current function has exception handling code, we can't outline &
+ // strip these CFI instructions since it may break .eh_frame section
+ // needed in unwinding.
+ return MI.getMF()->getFunction().needsUnwindTableEntry()
+ ? outliner::InstrType::Illegal
+ : outliner::InstrType::Invisible;
return outliner::InstrType::Illegal;
}
@@ -1325,7 +1334,7 @@ void RISCVInstrInfo::buildOutlinedFrame(
MachineBasicBlock::iterator RISCVInstrInfo::insertOutlinedCall(
Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
- MachineFunction &MF, const outliner::Candidate &C) const {
+ MachineFunction &MF, outliner::Candidate &C) const {
// Add in a call instruction to the outlined function at the given location.
It = MBB.insert(It,
@@ -1335,6 +1344,53 @@ MachineBasicBlock::iterator RISCVInstrInfo::insertOutlinedCall(
return It;
}
+// MIR printer helper function to annotate Operands with a comment.
+std::string RISCVInstrInfo::createMIROperandComment(
+ const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx,
+ const TargetRegisterInfo *TRI) const {
+ // Print a generic comment for this operand if there is one.
+ std::string GenericComment =
+ TargetInstrInfo::createMIROperandComment(MI, Op, OpIdx, TRI);
+ if (!GenericComment.empty())
+ return GenericComment;
+
+ // If not, we must have an immediate operand.
+ if (!Op.isImm())
+ return std::string();
+
+ std::string Comment;
+ raw_string_ostream OS(Comment);
+
+ uint64_t TSFlags = MI.getDesc().TSFlags;
+
+ // Print the full VType operand of vsetvli/vsetivli instructions, and the SEW
+ // operand of vector codegen pseudos.
+ if ((MI.getOpcode() == RISCV::VSETVLI || MI.getOpcode() == RISCV::VSETIVLI ||
+ MI.getOpcode() == RISCV::PseudoVSETVLI ||
+ MI.getOpcode() == RISCV::PseudoVSETIVLI ||
+ MI.getOpcode() == RISCV::PseudoVSETVLIX0) &&
+ OpIdx == 2) {
+ unsigned Imm = MI.getOperand(OpIdx).getImm();
+ RISCVVType::printVType(Imm, OS);
+ } else if (RISCVII::hasSEWOp(TSFlags)) {
+ unsigned NumOperands = MI.getNumExplicitOperands();
+ bool HasPolicy = RISCVII::hasVecPolicyOp(TSFlags);
+
+ // The SEW operand is before any policy operand.
+ if (OpIdx != NumOperands - HasPolicy - 1)
+ return std::string();
+
+ unsigned Log2SEW = MI.getOperand(OpIdx).getImm();
+ unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
+ assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
+
+ OS << "e" << SEW;
+ }
+
+ OS.flush();
+ return Comment;
+}
+
// clang-format off
#define CASE_VFMA_OPCODE_COMMON(OP, TYPE, LMUL) \
RISCV::PseudoV##OP##_##TYPE##_##LMUL
@@ -1653,6 +1709,12 @@ MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI,
case CASE_WIDEOP_OPCODE_LMULS(WADDU_WV):
case CASE_WIDEOP_OPCODE_LMULS(WSUB_WV):
case CASE_WIDEOP_OPCODE_LMULS(WSUBU_WV): {
+ // If the tail policy is undisturbed we can't convert.
+ assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) &&
+ MI.getNumExplicitOperands() == 6);
+ if ((MI.getOperand(5).getImm() & 1) == 0)
+ return nullptr;
+
// clang-format off
unsigned NewOpc;
switch (MI.getOpcode()) {
@@ -1722,11 +1784,10 @@ Register RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF,
"Reserve the stack by the multiple of one vector size.");
MachineRegisterInfo &MRI = MF.getRegInfo();
- const RISCVInstrInfo *TII = MF.getSubtarget<RISCVSubtarget>().getInstrInfo();
int64_t NumOfVReg = Amount / 8;
Register VL = MRI.createVirtualRegister(&RISCV::GPRRegClass);
- BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VL)
+ BuildMI(MBB, II, DL, get(RISCV::PseudoReadVLENB), VL)
.setMIFlag(Flag);
assert(isInt<32>(NumOfVReg) &&
"Expect the number of vector registers within 32-bits.");
@@ -1734,47 +1795,55 @@ Register RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF,
uint32_t ShiftAmount = Log2_32(NumOfVReg);
if (ShiftAmount == 0)
return VL;
- BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), VL)
+ BuildMI(MBB, II, DL, get(RISCV::SLLI), VL)
.addReg(VL, RegState::Kill)
.addImm(ShiftAmount)
.setMIFlag(Flag);
+ } else if ((NumOfVReg == 3 || NumOfVReg == 5 || NumOfVReg == 9) &&
+ STI.hasStdExtZba()) {
+ // We can use Zba SHXADD instructions for multiply in some cases.
+ // TODO: Generalize to SHXADD+SLLI.
+ unsigned Opc;
+ switch (NumOfVReg) {
+ default: llvm_unreachable("Unexpected number of vregs");
+ case 3: Opc = RISCV::SH1ADD; break;
+ case 5: Opc = RISCV::SH2ADD; break;
+ case 9: Opc = RISCV::SH3ADD; break;
+ }
+ BuildMI(MBB, II, DL, get(Opc), VL)
+ .addReg(VL, RegState::Kill)
+ .addReg(VL)
+ .setMIFlag(Flag);
} else if (isPowerOf2_32(NumOfVReg - 1)) {
Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass);
uint32_t ShiftAmount = Log2_32(NumOfVReg - 1);
- BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), ScaledRegister)
+ BuildMI(MBB, II, DL, get(RISCV::SLLI), ScaledRegister)
.addReg(VL)
.addImm(ShiftAmount)
.setMIFlag(Flag);
- BuildMI(MBB, II, DL, TII->get(RISCV::ADD), VL)
+ BuildMI(MBB, II, DL, get(RISCV::ADD), VL)
.addReg(ScaledRegister, RegState::Kill)
.addReg(VL, RegState::Kill)
.setMIFlag(Flag);
} else if (isPowerOf2_32(NumOfVReg + 1)) {
Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass);
uint32_t ShiftAmount = Log2_32(NumOfVReg + 1);
- BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), ScaledRegister)
+ BuildMI(MBB, II, DL, get(RISCV::SLLI), ScaledRegister)
.addReg(VL)
.addImm(ShiftAmount)
.setMIFlag(Flag);
- BuildMI(MBB, II, DL, TII->get(RISCV::SUB), VL)
+ BuildMI(MBB, II, DL, get(RISCV::SUB), VL)
.addReg(ScaledRegister, RegState::Kill)
.addReg(VL, RegState::Kill)
.setMIFlag(Flag);
} else {
Register N = MRI.createVirtualRegister(&RISCV::GPRRegClass);
- if (!isInt<12>(NumOfVReg))
- movImm(MBB, II, DL, N, NumOfVReg);
- else {
- BuildMI(MBB, II, DL, TII->get(RISCV::ADDI), N)
- .addReg(RISCV::X0)
- .addImm(NumOfVReg)
- .setMIFlag(Flag);
- }
- if (!MF.getSubtarget<RISCVSubtarget>().hasStdExtM())
+ movImm(MBB, II, DL, N, NumOfVReg, Flag);
+ if (!STI.hasStdExtM())
MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
MF.getFunction(),
"M-extension must be enabled to calculate the vscaled size/offset."});
- BuildMI(MBB, II, DL, TII->get(RISCV::MUL), VL)
+ BuildMI(MBB, II, DL, get(RISCV::MUL), VL)
.addReg(VL, RegState::Kill)
.addReg(N, RegState::Kill)
.setMIFlag(Flag);
@@ -1811,20 +1880,18 @@ static bool isRVVWholeLoadStore(unsigned Opcode) {
}
}
-bool RISCVInstrInfo::isRVVSpill(const MachineInstr &MI, bool CheckFIs) const {
+bool RISCV::isRVVSpill(const MachineInstr &MI) {
// RVV lacks any support for immediate addressing for stack addresses, so be
// conservative.
unsigned Opcode = MI.getOpcode();
if (!RISCVVPseudosTable::getPseudoInfo(Opcode) &&
!isRVVWholeLoadStore(Opcode) && !isRVVSpillForZvlsseg(Opcode))
return false;
- return !CheckFIs || any_of(MI.operands(), [](const MachineOperand &MO) {
- return MO.isFI();
- });
+ return true;
}
Optional<std::pair<unsigned, unsigned>>
-RISCVInstrInfo::isRVVSpillForZvlsseg(unsigned Opcode) const {
+RISCV::isRVVSpillForZvlsseg(unsigned Opcode) {
switch (Opcode) {
default:
return None;
@@ -1863,3 +1930,8 @@ RISCVInstrInfo::isRVVSpillForZvlsseg(unsigned Opcode) const {
return std::make_pair(8u, 1u);
}
}
+
+bool RISCV::isFaultFirstLoad(const MachineInstr &MI) {
+ return MI.getNumExplicitDefs() == 2 && MI.modifiesRegister(RISCV::VL) &&
+ !MI.isInlineAsm();
+}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index da0877c4299a..5368437618bd 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -135,6 +135,8 @@ public:
virtual bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
unsigned &Flags) const override;
+ bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override;
+
// Calculate target-specific information for a set of outlining candidates.
outliner::OutlinedFunction getOutliningCandidateInfo(
std::vector<outliner::Candidate> &RepeatedSequenceLocs) const override;
@@ -153,7 +155,7 @@ public:
virtual MachineBasicBlock::iterator
insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
MachineBasicBlock::iterator &It, MachineFunction &MF,
- const outliner::Candidate &C) const override;
+ outliner::Candidate &C) const override;
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const override;
@@ -164,25 +166,31 @@ public:
MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV,
LiveIntervals *LIS) const override;
+ // MIR printer helper function to annotate Operands with a comment.
+ std::string
+ createMIROperandComment(const MachineInstr &MI, const MachineOperand &Op,
+ unsigned OpIdx,
+ const TargetRegisterInfo *TRI) const override;
+
Register getVLENFactoredAmount(
MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator II, const DebugLoc &DL, int64_t Amount,
MachineInstr::MIFlag Flag = MachineInstr::NoFlags) const;
- // Returns true if the given MI is an RVV instruction opcode for which we may
- // expect to see a FrameIndex operand. When CheckFIs is true, the instruction
- // must contain at least one FrameIndex operand.
- bool isRVVSpill(const MachineInstr &MI, bool CheckFIs) const;
-
- Optional<std::pair<unsigned, unsigned>>
- isRVVSpillForZvlsseg(unsigned Opcode) const;
-
protected:
const RISCVSubtarget &STI;
};
namespace RISCV {
+// Returns true if the given MI is an RVV instruction opcode for which we may
+// expect to see a FrameIndex operand.
+bool isRVVSpill(const MachineInstr &MI);
+
+Optional<std::pair<unsigned, unsigned>> isRVVSpillForZvlsseg(unsigned Opcode);
+
+bool isFaultFirstLoad(const MachineInstr &MI);
+
// Implemented in RISCVGenInstrInfo.inc
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index ee6a74b7f14f..ee4c026af8f4 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -83,6 +83,21 @@ def riscv_read_cycle_wide : SDNode<"RISCVISD::READ_CYCLE_WIDE",
SDT_RISCVReadCycleWide,
[SDNPHasChain, SDNPSideEffect]>;
+def riscv_add_lo : SDNode<"RISCVISD::ADD_LO", SDTIntBinOp>;
+def riscv_hi : SDNode<"RISCVISD::HI", SDTIntUnaryOp>;
+def riscv_lla : SDNode<"RISCVISD::LLA", SDTIntUnaryOp>;
+def riscv_add_tprel : SDNode<"RISCVISD::ADD_TPREL",
+ SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisInt<0>]>>;
+
+def riscv_la : SDNode<"RISCVISD::LA", SDTLoad,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def riscv_la_tls_ie : SDNode<"RISCVISD::LA_TLS_IE", SDTLoad,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def riscv_la_tls_gd : SDNode<"RISCVISD::LA_TLS_GD", SDTIntUnaryOp>;
+
//===----------------------------------------------------------------------===//
// Operand and SDNode transformation definitions.
//===----------------------------------------------------------------------===//
@@ -105,6 +120,19 @@ def ImmZeroAsmOperand : AsmOperandClass {
let DiagnosticType = !strconcat("Invalid", Name);
}
+// A parse method for (${gpr}) or 0(${gpr}), where the 0 is be silently ignored.
+def ZeroOffsetMemOpOperand : AsmOperandClass {
+ let Name = "ZeroOffsetMemOpOperand";
+ let RenderMethod = "addRegOperands";
+ let PredicateMethod = "isGPR";
+ let ParserMethod = "parseZeroOffsetMemOp";
+}
+
+def GPRMemZeroOffset : RegisterOperand<GPR> {
+ let ParserMatchClass = ZeroOffsetMemOpOperand;
+ let PrintMethod = "printZeroOffsetMemOp";
+}
+
class SImmAsmOperand<int width, string suffix = "">
: ImmAsmOperand<"S", width, suffix> {
}
@@ -334,10 +362,19 @@ def ixlenimm_li : Operand<XLenVT> {
// Standalone (codegen-only) immleaf patterns.
-// A 12-bit signed immediate plus one where the imm range will be -2047~2048.
+// A 12-bit signed immediate plus one where the imm range will be [-2047, 2048].
def simm12_plus1 : ImmLeaf<XLenVT,
[{return (isInt<12>(Imm) && Imm != -2048) || Imm == 2048;}]>;
+// A 12-bit signed immediate sub one and exclude zero
+def simm12_minus1_nonzero : PatLeaf<(imm), [{
+ if (!N->hasOneUse())
+ return false;
+ // The immediate operand must be in range [-2049, 0) or (0, 2046].
+ int64_t Imm = N->getSExtValue();
+ return (Imm >= -2049 && Imm < 0) || (Imm > 0 && Imm <= 2046);
+}]>;
+
// A 6-bit constant greater than 32.
def uimm6gt32 : ImmLeaf<XLenVT, [{
return isUInt<6>(Imm) && Imm > 32;
@@ -345,8 +382,10 @@ def uimm6gt32 : ImmLeaf<XLenVT, [{
// Addressing modes.
// Necessary because a frameindex can't be matched directly in a pattern.
-def AddrFI : ComplexPattern<iPTR, 1, "SelectAddrFI", [frameindex], []>;
+def FrameAddrRegImm : ComplexPattern<iPTR, 2, "SelectFrameAddrRegImm",
+ [frameindex, or, add]>;
def BaseAddr : ComplexPattern<iPTR, 1, "SelectBaseAddr">;
+def AddrRegImm : ComplexPattern<iPTR, 2, "SelectAddrRegImm">;
// Return the negation of an immediate value.
def NegImm : SDNodeXForm<imm, [{
@@ -360,9 +399,9 @@ def ImmSub32 : SDNodeXForm<imm, [{
N->getValueType(0));
}]>;
-// Return an immediate value plus 32.
-def ImmPlus32 : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(N->getSExtValue() + 32, SDLoc(N),
+// Return an immediate value plus 1.
+def ImmPlus1 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getSExtValue() + 1, SDLoc(N),
N->getValueType(0));
}]>;
@@ -380,7 +419,9 @@ def ImmSubFrom32 : SDNodeXForm<imm, [{
}]>;
// Check if (add r, imm) can be optimized to (ADDI (ADDI r, imm0), imm1),
-// in which imm = imm0 + imm1 and both imm0 and imm1 are simm12.
+// in which imm = imm0 + imm1 and both imm0 and imm1 are simm12. We make imm0
+// as large as possible and imm1 as small as possible so that we might be able
+// to use c.addi for the small immediate.
def AddiPair : PatLeaf<(imm), [{
if (!N->hasOneUse())
return false;
@@ -389,19 +430,27 @@ def AddiPair : PatLeaf<(imm), [{
return (-4096 <= Imm && Imm <= -2049) || (2048 <= Imm && Imm <= 4094);
}]>;
-// Return imm/2.
-def AddiPairImmA : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(N->getSExtValue() / 2, SDLoc(N),
+// Return imm - (imm < 0 ? -2048 : 2047).
+def AddiPairImmSmall : SDNodeXForm<imm, [{
+ int64_t Imm = N->getSExtValue();
+ int64_t Adj = N->getSExtValue() < 0 ? -2048 : 2047;
+ return CurDAG->getTargetConstant(Imm - Adj, SDLoc(N),
N->getValueType(0));
}]>;
-// Return imm - imm/2.
-def AddiPairImmB : SDNodeXForm<imm, [{
- int64_t Imm = N->getSExtValue();
- return CurDAG->getTargetConstant(Imm - Imm / 2, SDLoc(N),
+// Return -2048 if immediate is negative or 2047 if positive. These are the
+// largest simm12 values.
+def AddiPairImmLarge : SDNodeXForm<imm, [{
+ int64_t Imm = N->getSExtValue() < 0 ? -2048 : 2047;
+ return CurDAG->getTargetConstant(Imm, SDLoc(N),
N->getValueType(0));
}]>;
+def TrailingZeros : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getAPIntValue().countTrailingZeros(),
+ SDLoc(N), N->getValueType(0));
+}]>;
+
def XLenSubTrailingOnes : SDNodeXForm<imm, [{
uint64_t XLen = Subtarget->getXLen();
uint64_t TrailingOnes = N->getAPIntValue().countTrailingOnes();
@@ -410,7 +459,13 @@ def XLenSubTrailingOnes : SDNodeXForm<imm, [{
}]>;
// Checks if this mask is a non-empty sequence of ones starting at the
-// least significant bit with the remainder zero and exceeds simm12.
+// most/least significant bit with the remainder zero and exceeds simm32/simm12.
+def LeadingOnesMask : PatLeaf<(imm), [{
+ if (!N->hasOneUse())
+ return false;
+ return !isInt<32>(N->getSExtValue()) && isMask_64(~N->getSExtValue());
+}], TrailingZeros>;
+
def TrailingOnesMask : PatLeaf<(imm), [{
if (!N->hasOneUse())
return false;
@@ -437,20 +492,35 @@ class BranchCC_rri<bits<3> funct3, string opcodestr>
let isTerminator = 1;
}
-let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
+let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
class Load_ri<bits<3> funct3, string opcodestr>
: RVInstI<funct3, OPC_LOAD, (outs GPR:$rd), (ins GPR:$rs1, simm12:$imm12),
opcodestr, "$rd, ${imm12}(${rs1})">;
+class HLoad_r<bits<7> funct7, bits<5> funct5, string opcodestr>
+ : RVInstR<funct7, 0b100, OPC_SYSTEM, (outs GPR:$rd),
+ (ins GPRMemZeroOffset:$rs1), opcodestr, "$rd, $rs1"> {
+ let rs2 = funct5;
+}
+}
+
// Operands for stores are in the order srcreg, base, offset rather than
// reflecting the order these fields are specified in the instruction
// encoding.
-let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
+let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in {
class Store_rri<bits<3> funct3, string opcodestr>
: RVInstS<funct3, OPC_STORE, (outs),
(ins GPR:$rs2, GPR:$rs1, simm12:$imm12),
opcodestr, "$rs2, ${imm12}(${rs1})">;
+class HStore_rr<bits<7> funct7, string opcodestr>
+ : RVInstR<funct7, 0b100, OPC_SYSTEM, (outs),
+ (ins GPR:$rs2, GPRMemZeroOffset:$rs1),
+ opcodestr, "$rs2, $rs1"> {
+ let rd = 0;
+}
+}
+
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class ALU_ri<bits<3> funct3, string opcodestr>
: RVInstI<funct3, OPC_OP_IMM, (outs GPR:$rd), (ins GPR:$rs1, simm12:$imm12),
@@ -465,9 +535,12 @@ class Shift_ri<bits<5> imm11_7, bits<3> funct3, string opcodestr>
Sched<[WriteShiftImm, ReadShiftImm]>;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class ALU_rr<bits<7> funct7, bits<3> funct3, string opcodestr>
+class ALU_rr<bits<7> funct7, bits<3> funct3, string opcodestr,
+ bit Commutable = 0>
: RVInstR<funct7, funct3, OPC_OP, (outs GPR:$rd), (ins GPR:$rs1, GPR:$rs2),
- opcodestr, "$rd, $rs1, $rs2">;
+ opcodestr, "$rd, $rs1, $rs2"> {
+ let isCommutable = Commutable;
+}
let hasNoSchedulingInfo = 1,
hasSideEffects = 1, mayLoad = 0, mayStore = 0 in
@@ -490,15 +563,25 @@ class ShiftW_ri<bits<7> imm11_5, bits<3> funct3, string opcodestr>
Sched<[WriteShiftImm32, ReadShiftImm32]>;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class ALUW_rr<bits<7> funct7, bits<3> funct3, string opcodestr>
+class ALUW_rr<bits<7> funct7, bits<3> funct3, string opcodestr,
+ bit Commutable = 0>
: RVInstR<funct7, funct3, OPC_OP_32, (outs GPR:$rd),
- (ins GPR:$rs1, GPR:$rs2), opcodestr, "$rd, $rs1, $rs2">;
+ (ins GPR:$rs1, GPR:$rs2), opcodestr, "$rd, $rs1, $rs2"> {
+ let isCommutable = Commutable;
+}
let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in
class Priv<string opcodestr, bits<7> funct7>
: RVInstR<funct7, 0b000, OPC_SYSTEM, (outs), (ins GPR:$rs1, GPR:$rs2),
opcodestr, "">;
+let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in
+class Priv_rr<string opcodestr, bits<7> funct7>
+ : RVInstR<funct7, 0b000, OPC_SYSTEM, (outs), (ins GPR:$rs1, GPR:$rs2),
+ opcodestr, "$rs1, $rs2"> {
+ let rd = 0;
+}
+
//===----------------------------------------------------------------------===//
// Instructions
//===----------------------------------------------------------------------===//
@@ -558,16 +641,26 @@ def SLLI : Shift_ri<0b00000, 0b001, "slli">;
def SRLI : Shift_ri<0b00000, 0b101, "srli">;
def SRAI : Shift_ri<0b01000, 0b101, "srai">;
-def ADD : ALU_rr<0b0000000, 0b000, "add">, Sched<[WriteIALU, ReadIALU, ReadIALU]>;
-def SUB : ALU_rr<0b0100000, 0b000, "sub">, Sched<[WriteIALU, ReadIALU, ReadIALU]>;
-def SLL : ALU_rr<0b0000000, 0b001, "sll">, Sched<[WriteShiftReg, ReadShiftReg, ReadShiftReg]>;
-def SLT : ALU_rr<0b0000000, 0b010, "slt">, Sched<[WriteIALU, ReadIALU, ReadIALU]>;
-def SLTU : ALU_rr<0b0000000, 0b011, "sltu">, Sched<[WriteIALU, ReadIALU, ReadIALU]>;
-def XOR : ALU_rr<0b0000000, 0b100, "xor">, Sched<[WriteIALU, ReadIALU, ReadIALU]>;
-def SRL : ALU_rr<0b0000000, 0b101, "srl">, Sched<[WriteShiftReg, ReadShiftReg, ReadShiftReg]>;
-def SRA : ALU_rr<0b0100000, 0b101, "sra">, Sched<[WriteShiftReg, ReadShiftReg, ReadShiftReg]>;
-def OR : ALU_rr<0b0000000, 0b110, "or">, Sched<[WriteIALU, ReadIALU, ReadIALU]>;
-def AND : ALU_rr<0b0000000, 0b111, "and">, Sched<[WriteIALU, ReadIALU, ReadIALU]>;
+def ADD : ALU_rr<0b0000000, 0b000, "add", /*Commutable*/1>,
+ Sched<[WriteIALU, ReadIALU, ReadIALU]>;
+def SUB : ALU_rr<0b0100000, 0b000, "sub">,
+ Sched<[WriteIALU, ReadIALU, ReadIALU]>;
+def SLL : ALU_rr<0b0000000, 0b001, "sll">,
+ Sched<[WriteShiftReg, ReadShiftReg, ReadShiftReg]>;
+def SLT : ALU_rr<0b0000000, 0b010, "slt">,
+ Sched<[WriteIALU, ReadIALU, ReadIALU]>;
+def SLTU : ALU_rr<0b0000000, 0b011, "sltu">,
+ Sched<[WriteIALU, ReadIALU, ReadIALU]>;
+def XOR : ALU_rr<0b0000000, 0b100, "xor", /*Commutable*/1>,
+ Sched<[WriteIALU, ReadIALU, ReadIALU]>;
+def SRL : ALU_rr<0b0000000, 0b101, "srl">,
+ Sched<[WriteShiftReg, ReadShiftReg, ReadShiftReg]>;
+def SRA : ALU_rr<0b0100000, 0b101, "sra">,
+ Sched<[WriteShiftReg, ReadShiftReg, ReadShiftReg]>;
+def OR : ALU_rr<0b0000000, 0b110, "or", /*Commutable*/1>,
+ Sched<[WriteIALU, ReadIALU, ReadIALU]>;
+def AND : ALU_rr<0b0000000, 0b111, "and", /*Commutable*/1>,
+ Sched<[WriteIALU, ReadIALU, ReadIALU]>;
let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in {
def FENCE : RVInstI<0b000, OPC_MISC_MEM, (outs),
@@ -642,7 +735,7 @@ def SLLIW : ShiftW_ri<0b0000000, 0b001, "slliw">;
def SRLIW : ShiftW_ri<0b0000000, 0b101, "srliw">;
def SRAIW : ShiftW_ri<0b0100000, 0b101, "sraiw">;
-def ADDW : ALUW_rr<0b0000000, 0b000, "addw">,
+def ADDW : ALUW_rr<0b0000000, 0b000, "addw", /*Commutable*/1>,
Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>;
def SUBW : ALUW_rr<0b0100000, 0b000, "subw">,
Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>;
@@ -684,11 +777,40 @@ def WFI : Priv<"wfi", 0b0001000>, Sched<[]> {
let rs2 = 0b00101;
}
-let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in
-def SFENCE_VMA : RVInstR<0b0001001, 0b000, OPC_SYSTEM, (outs),
- (ins GPR:$rs1, GPR:$rs2),
- "sfence.vma", "$rs1, $rs2">, Sched<[]> {
+def SFENCE_W_INVAL : Priv<"sfence.w.inval", 0b0001100>, Sched<[]> {
let rd = 0;
+ let rs1 = 0;
+ let rs2 = 0;
+}
+
+def SFENCE_INVAL_IR : Priv<"sfence.inval.ir", 0b0001100>, Sched<[]> {
+ let rd = 0;
+ let rs1 = 0;
+ let rs2 = 0b00001;
+}
+
+def SFENCE_VMA : Priv_rr<"sfence.vma", 0b0001001>, Sched<[]>;
+def SINVAL_VMA : Priv_rr<"sinval.vma", 0b0001011>, Sched<[]>;
+def HFENCE_VVMA : Priv_rr<"hfence.vvma", 0b0010001>, Sched<[]>;
+def HFENCE_GVMA : Priv_rr<"hfence.gvma", 0b0110001>, Sched<[]>;
+def HINVAL_VVMA : Priv_rr<"hinval.vvma", 0b0010011>, Sched<[]>;
+def HINVAL_GVMA : Priv_rr<"hinval.gvma", 0b0110011>, Sched<[]>;
+
+def HLV_B : HLoad_r<0b0110000, 0b00000, "hlv.b">, Sched<[]>;
+def HLV_BU : HLoad_r<0b0110000, 0b00001, "hlv.bu">, Sched<[]>;
+def HLV_H : HLoad_r<0b0110010, 0b00000, "hlv.h">, Sched<[]>;
+def HLV_HU : HLoad_r<0b0110010, 0b00001, "hlv.hu">, Sched<[]>;
+def HLVX_HU : HLoad_r<0b0110010, 0b00011, "hlvx.hu">, Sched<[]>;
+def HLV_W : HLoad_r<0b0110100, 0b00000, "hlv.w">, Sched<[]>;
+def HLVX_WU : HLoad_r<0b0110100, 0b00011, "hlvx.wu">, Sched<[]>;
+def HSV_B : HStore_rr<0b0110001, "hsv.b">, Sched<[]>;
+def HSV_H : HStore_rr<0b0110011, "hsv.h">, Sched<[]>;
+def HSV_W : HStore_rr<0b0110101, "hsv.w">, Sched<[]>;
+
+let Predicates = [IsRV64] in {
+def HLV_WU : HLoad_r<0b0110100, 0b00001, "hlv.wu">, Sched<[]>;
+def HLV_D : HLoad_r<0b0110110, 0b00000, "hlv.d">, Sched<[]>;
+def HSV_D : HStore_rr<0b0110111, "hsv.d">, Sched<[]>;
}
//===----------------------------------------------------------------------===//
@@ -799,6 +921,9 @@ def : InstAlias<"jalr $rd, $rs, $offset", (JALR GPR:$rd, GPR:$rs, simm12:$offset
def : InstAlias<"fence", (FENCE 0xF, 0xF)>; // 0xF == iorw
+let Predicates = [HasStdExtZihintpause] in
+def : InstAlias<"pause", (FENCE 0x1, 0x0)>; // 0x1 == w
+
def : InstAlias<"rdinstret $rd", (CSRRS GPR:$rd, INSTRET.Encoding, X0)>;
def : InstAlias<"rdcycle $rd", (CSRRS GPR:$rd, CYCLE.Encoding, X0)>;
def : InstAlias<"rdtime $rd", (CSRRS GPR:$rd, TIME.Encoding, X0)>;
@@ -831,6 +956,12 @@ def : InstAlias<"csrrc $rd, $csr, $imm", (CSRRCI GPR:$rd, csr_sysreg:$csr, uimm5
def : InstAlias<"sfence.vma", (SFENCE_VMA X0, X0)>;
def : InstAlias<"sfence.vma $rs", (SFENCE_VMA GPR:$rs, X0)>;
+def : InstAlias<"hfence.gvma", (HFENCE_GVMA X0, X0)>;
+def : InstAlias<"hfence.gvma $rs", (HFENCE_GVMA GPR:$rs, X0)>;
+
+def : InstAlias<"hfence.vvma", (HFENCE_VVMA X0, X0)>;
+def : InstAlias<"hfence.vvma $rs", (HFENCE_VVMA GPR:$rs, X0)>;
+
let EmitPriority = 0 in {
def : InstAlias<"lb $rd, (${rs1})",
(LB GPR:$rd, GPR:$rs1, 0)>;
@@ -1006,9 +1137,6 @@ class PatGprUimmLog2XLen<SDPatternOperator OpNode, RVInstIShift Inst>
/// Predicates
-def IsOrAdd: PatFrag<(ops node:$A, node:$B), (or node:$A, node:$B), [{
- return isOrEquivalentToAdd(N);
-}]>;
def assertsexti32 : PatFrag<(ops node:$src), (assertsext node:$src), [{
return cast<VTSDNode>(N->getOperand(1))->getVT().bitsLE(MVT::i32);
}]>;
@@ -1018,13 +1146,14 @@ def assertzexti32 : PatFrag<(ops node:$src), (assertzext node:$src), [{
}]>;
def zexti32 : ComplexPattern<i64, 1, "selectZExti32">;
-def add_oneuse : PatFrag<(ops node:$A, node:$B), (add node:$A, node:$B), [{
+class binop_oneuse<SDPatternOperator operator>
+ : PatFrag<(ops node:$A, node:$B),
+ (operator node:$A, node:$B), [{
return N->hasOneUse();
}]>;
-def mul_oneuse : PatFrag<(ops node:$A, node:$B), (mul node:$A, node:$B), [{
- return N->hasOneUse();
-}]>;
+def add_oneuse : binop_oneuse<add>;
+def mul_oneuse : binop_oneuse<mul>;
def mul_const_oneuse : PatFrag<(ops node:$A, node:$B),
(mul node:$A, node:$B), [{
@@ -1034,22 +1163,16 @@ def mul_const_oneuse : PatFrag<(ops node:$A, node:$B),
return false;
}]>;
-def sext_oneuse : PatFrag<(ops node:$A), (sext node:$A), [{
- return N->hasOneUse();
-}]>;
-
-def zext_oneuse : PatFrag<(ops node:$A), (zext node:$A), [{
+class unop_oneuse<SDPatternOperator operator>
+ : PatFrag<(ops node:$A),
+ (operator node:$A), [{
return N->hasOneUse();
}]>;
-def anyext_oneuse : PatFrag<(ops node:$A), (anyext node:$A), [{
- return N->hasOneUse();
-}]>;
-
-def fpext_oneuse : PatFrag<(ops node:$A),
- (any_fpextend node:$A), [{
- return N->hasOneUse();
-}]>;
+def sext_oneuse : unop_oneuse<sext>;
+def zext_oneuse : unop_oneuse<zext>;
+def anyext_oneuse : unop_oneuse<anyext>;
+def fpext_oneuse : unop_oneuse<any_fpextend>;
/// Simple arithmetic operations
@@ -1066,7 +1189,9 @@ def : PatGprUimmLog2XLen<shl, SLLI>;
def : PatGprUimmLog2XLen<srl, SRLI>;
def : PatGprUimmLog2XLen<sra, SRAI>;
-// AND with trailing ones mask exceeding simm12.
+// AND with leading/trailing ones mask exceeding simm32/simm12.
+def : Pat<(i64 (and GPR:$rs, LeadingOnesMask:$mask)),
+ (SLLI (SRLI $rs, LeadingOnesMask:$mask), LeadingOnesMask:$mask)>;
def : Pat<(XLenVT (and GPR:$rs, TrailingOnesMask:$mask)),
(SRLI (SLLI $rs, TrailingOnesMask:$mask), TrailingOnesMask:$mask)>;
@@ -1099,10 +1224,32 @@ def PseudoAddTPRel : Pseudo<(outs GPR:$rd),
/// FrameIndex calculations
-def : Pat<(add (XLenVT AddrFI:$Rs), simm12:$imm12),
- (ADDI (XLenVT AddrFI:$Rs), simm12:$imm12)>;
-def : Pat<(IsOrAdd (XLenVT AddrFI:$Rs), simm12:$imm12),
- (ADDI (XLenVT AddrFI:$Rs), simm12:$imm12)>;
+def : Pat<(FrameAddrRegImm GPR:$rs1, simm12:$imm12),
+ (ADDI GPR:$rs1, simm12:$imm12)>;
+
+/// HI and ADD_LO address nodes.
+
+def : Pat<(riscv_hi tglobaladdr:$in), (LUI tglobaladdr:$in)>;
+def : Pat<(riscv_hi tblockaddress:$in), (LUI tblockaddress:$in)>;
+def : Pat<(riscv_hi tjumptable:$in), (LUI tjumptable:$in)>;
+def : Pat<(riscv_hi tconstpool:$in), (LUI tconstpool:$in)>;
+
+def : Pat<(riscv_add_lo GPR:$hi, tglobaladdr:$lo),
+ (ADDI GPR:$hi, tglobaladdr:$lo)>;
+def : Pat<(riscv_add_lo GPR:$hi, tblockaddress:$lo),
+ (ADDI GPR:$hi, tblockaddress:$lo)>;
+def : Pat<(riscv_add_lo GPR:$hi, tjumptable:$lo),
+ (ADDI GPR:$hi, tjumptable:$lo)>;
+def : Pat<(riscv_add_lo GPR:$hi, tconstpool:$lo),
+ (ADDI GPR:$hi, tconstpool:$lo)>;
+
+/// TLS address nodes.
+
+def : Pat<(riscv_hi tglobaltlsaddr:$in), (LUI tglobaltlsaddr:$in)>;
+def : Pat<(riscv_add_tprel GPR:$rs1, GPR:$rs2, tglobaltlsaddr:$src),
+ (PseudoAddTPRel GPR:$rs1, GPR:$rs2, tglobaltlsaddr:$src)>;
+def : Pat<(riscv_add_lo GPR:$src, tglobaltlsaddr:$lo),
+ (ADDI GPR:$src, tglobaltlsaddr:$lo)>;
/// Setcc
@@ -1127,6 +1274,10 @@ def : Pat<(setule GPR:$rs1, GPR:$rs2), (XORI (SLTU GPR:$rs2, GPR:$rs1), 1)>;
def : Pat<(setgt GPR:$rs1, GPR:$rs2), (SLT GPR:$rs2, GPR:$rs1)>;
def : Pat<(setge GPR:$rs1, GPR:$rs2), (XORI (SLT GPR:$rs1, GPR:$rs2), 1)>;
def : Pat<(setle GPR:$rs1, GPR:$rs2), (XORI (SLT GPR:$rs2, GPR:$rs1), 1)>;
+def : Pat<(setgt GPR:$rs1, simm12_minus1_nonzero:$imm),
+ (XORI (SLTI GPR:$rs1, (ImmPlus1 simm12_minus1_nonzero:$imm)), 1)>;
+def : Pat<(setugt GPR:$rs1, simm12_minus1_nonzero:$imm),
+ (XORI (SLTIU GPR:$rs1, (ImmPlus1 simm12_minus1_nonzero:$imm)), 1)>;
def IntCCtoRISCVCC : SDNodeXForm<riscv_selectcc, [{
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
@@ -1185,7 +1336,8 @@ def : Pat<(brind (add GPRJALR:$rs1, simm12:$imm12)),
// Define isCodeGenOnly = 0 to support parsing assembly "call" instruction.
let isCall = 1, isBarrier = 1, isCodeGenOnly = 0, Size = 8, hasSideEffects = 0,
mayStore = 0, mayLoad = 0 in
-def PseudoCALLReg : Pseudo<(outs GPR:$rd), (ins call_symbol:$func), []> {
+def PseudoCALLReg : Pseudo<(outs GPR:$rd), (ins call_symbol:$func), []>,
+ Sched<[WriteIALU, WriteJalr, ReadJalr]> {
let AsmString = "call\t$rd, $func";
}
@@ -1196,7 +1348,8 @@ def PseudoCALLReg : Pseudo<(outs GPR:$rd), (ins call_symbol:$func), []> {
// Define AsmString to print "call" when compile with -S flag.
// Define isCodeGenOnly = 0 to support parsing assembly "call" instruction.
let isCall = 1, Defs = [X1], isCodeGenOnly = 0, Size = 8 in
-def PseudoCALL : Pseudo<(outs), (ins call_symbol:$func), []> {
+def PseudoCALL : Pseudo<(outs), (ins call_symbol:$func), []>,
+ Sched<[WriteIALU, WriteJalr, ReadJalr]> {
let AsmString = "call\t$func";
}
@@ -1221,7 +1374,8 @@ def PseudoRET : Pseudo<(outs), (ins), [(riscv_ret_flag)]>,
// Define AsmString to print "tail" when compile with -S flag.
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [X2],
Size = 8, isCodeGenOnly = 0 in
-def PseudoTAIL : Pseudo<(outs), (ins call_symbol:$dst), []> {
+def PseudoTAIL : Pseudo<(outs), (ins call_symbol:$dst), []>,
+ Sched<[WriteIALU, WriteJalr, ReadJalr]> {
let AsmString = "tail\t$dst";
}
@@ -1231,13 +1385,14 @@ def PseudoTAILIndirect : Pseudo<(outs), (ins GPRTC:$rs1),
PseudoInstExpansion<(JALR X0, GPR:$rs1, 0)>;
def : Pat<(riscv_tail (iPTR tglobaladdr:$dst)),
- (PseudoTAIL texternalsym:$dst)>;
+ (PseudoTAIL tglobaladdr:$dst)>;
def : Pat<(riscv_tail (iPTR texternalsym:$dst)),
(PseudoTAIL texternalsym:$dst)>;
let isCall = 0, isBarrier = 1, isBranch = 1, isTerminator = 1, Size = 8,
isCodeGenOnly = 0, hasSideEffects = 0, mayStore = 0, mayLoad = 0 in
-def PseudoJump : Pseudo<(outs GPR:$rd), (ins pseudo_jump_symbol:$target), []> {
+def PseudoJump : Pseudo<(outs GPR:$rd), (ins pseudo_jump_symbol:$target), []>,
+ Sched<[WriteIALU, WriteJalr, ReadJalr]> {
let AsmString = "jump\t$target, $rd";
}
@@ -1246,21 +1401,33 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 8, isCodeGenOnly = 0,
def PseudoLLA : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
"lla", "$dst, $src">;
+def : Pat<(riscv_lla tglobaladdr:$in), (PseudoLLA tglobaladdr:$in)>;
+def : Pat<(riscv_lla tblockaddress:$in), (PseudoLLA tblockaddress:$in)>;
+def : Pat<(riscv_lla tjumptable:$in), (PseudoLLA tjumptable:$in)>;
+def : Pat<(riscv_lla tconstpool:$in), (PseudoLLA tconstpool:$in)>;
+
let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 0,
isAsmParserOnly = 1 in
def PseudoLA : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
"la", "$dst, $src">;
+def : Pat<(riscv_la tglobaladdr:$in), (PseudoLA tglobaladdr:$in)>;
+
let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 0,
isAsmParserOnly = 1 in
def PseudoLA_TLS_IE : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
"la.tls.ie", "$dst, $src">;
-let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 0,
+def : Pat<(riscv_la_tls_ie tglobaltlsaddr:$in),
+ (PseudoLA_TLS_IE tglobaltlsaddr:$in)>;
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 8, isCodeGenOnly = 0,
isAsmParserOnly = 1 in
def PseudoLA_TLS_GD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
"la.tls.gd", "$dst, $src">;
+def : Pat<(riscv_la_tls_gd tglobaltlsaddr:$in),
+ (PseudoLA_TLS_GD tglobaltlsaddr:$in)>;
/// Sign/Zero Extends
@@ -1283,11 +1450,8 @@ def PseudoZEXT_W : Pseudo<(outs GPR:$rd), (ins GPR:$rs), [], "zext.w", "$rd, $rs
/// Loads
multiclass LdPat<PatFrag LoadOp, RVInst Inst, ValueType vt = XLenVT> {
- def : Pat<(vt (LoadOp BaseAddr:$rs1)), (Inst BaseAddr:$rs1, 0)>;
- def : Pat<(vt (LoadOp (add BaseAddr:$rs1, simm12:$imm12))),
- (Inst BaseAddr:$rs1, simm12:$imm12)>;
- def : Pat<(vt (LoadOp (IsOrAdd AddrFI:$rs1, simm12:$imm12))),
- (Inst AddrFI:$rs1, simm12:$imm12)>;
+ def : Pat<(vt (LoadOp (AddrRegImm GPR:$rs1, simm12:$imm12))),
+ (Inst GPR:$rs1, simm12:$imm12)>;
}
defm : LdPat<sextloadi8, LB>;
@@ -1302,12 +1466,8 @@ defm : LdPat<zextloadi16, LHU>;
multiclass StPat<PatFrag StoreOp, RVInst Inst, RegisterClass StTy,
ValueType vt> {
- def : Pat<(StoreOp (vt StTy:$rs2), BaseAddr:$rs1),
- (Inst StTy:$rs2, BaseAddr:$rs1, 0)>;
- def : Pat<(StoreOp (vt StTy:$rs2), (add BaseAddr:$rs1, simm12:$imm12)),
- (Inst StTy:$rs2, BaseAddr:$rs1, simm12:$imm12)>;
- def : Pat<(StoreOp (vt StTy:$rs2), (IsOrAdd AddrFI:$rs1, simm12:$imm12)),
- (Inst StTy:$rs2, AddrFI:$rs1, simm12:$imm12)>;
+ def : Pat<(StoreOp (vt StTy:$rs2), (AddrRegImm GPR:$rs1, simm12:$imm12)),
+ (Inst StTy:$rs2, GPR:$rs1, simm12:$imm12)>;
}
defm : StPat<truncstorei8, SB, GPR, XLenVT>;
@@ -1415,7 +1575,7 @@ def : Pat<(i64 (shl (and GPR:$rs1, 0xffffffff), uimm5:$shamt)),
// if only the lower 32 bits of their result is used.
class binop_allwusers<SDPatternOperator operator>
: PatFrag<(ops node:$lhs, node:$rhs),
- (operator node:$lhs, node:$rhs), [{
+ (i64 (operator node:$lhs, node:$rhs)), [{
return hasAllWUsers(Node);
}]>;
@@ -1496,14 +1656,14 @@ def : Pat<(debugtrap), (EBREAK)>;
/// Simple optimization
def : Pat<(add GPR:$rs1, (AddiPair:$rs2)),
- (ADDI (ADDI GPR:$rs1, (AddiPairImmB AddiPair:$rs2)),
- (AddiPairImmA GPR:$rs2))>;
+ (ADDI (ADDI GPR:$rs1, (AddiPairImmLarge AddiPair:$rs2)),
+ (AddiPairImmSmall GPR:$rs2))>;
let Predicates = [IsRV64] in {
// Select W instructions if only the lower 32-bits of the result are used.
def : Pat<(binop_allwusers<add> GPR:$rs1, (AddiPair:$rs2)),
- (ADDIW (ADDIW GPR:$rs1, (AddiPairImmB AddiPair:$rs2)),
- (AddiPairImmA AddiPair:$rs2))>;
+ (ADDIW (ADDIW GPR:$rs1, (AddiPairImmLarge AddiPair:$rs2)),
+ (AddiPairImmSmall AddiPair:$rs2))>;
}
//===----------------------------------------------------------------------===//
@@ -1519,3 +1679,4 @@ include "RISCVInstrInfoZb.td"
include "RISCVInstrInfoZk.td"
include "RISCVInstrInfoV.td"
include "RISCVInstrInfoZfh.td"
+include "RISCVInstrInfoZicbo.td"
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
index 7d23dafb0346..dd4b174d7e62 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
@@ -12,31 +12,13 @@
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
-// Operand and SDNode transformation definitions.
-//===----------------------------------------------------------------------===//
-
-// A parse method for (${gpr}) or 0(${gpr}), where the 0 is be silently ignored.
-// Used for GNU as Compatibility.
-def AtomicMemOpOperand : AsmOperandClass {
- let Name = "AtomicMemOpOperand";
- let RenderMethod = "addRegOperands";
- let PredicateMethod = "isGPR";
- let ParserMethod = "parseAtomicMemOp";
-}
-
-def GPRMemAtomic : RegisterOperand<GPR> {
- let ParserMatchClass = AtomicMemOpOperand;
- let PrintMethod = "printAtomicMemOp";
-}
-
-//===----------------------------------------------------------------------===//
// Instruction class templates
//===----------------------------------------------------------------------===//
let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
class LR_r<bit aq, bit rl, bits<3> funct3, string opcodestr>
: RVInstRAtomic<0b00010, aq, rl, funct3, OPC_AMO,
- (outs GPR:$rd), (ins GPRMemAtomic:$rs1),
+ (outs GPR:$rd), (ins GPRMemZeroOffset:$rs1),
opcodestr, "$rd, $rs1"> {
let rs2 = 0;
}
@@ -51,7 +33,7 @@ multiclass LR_r_aq_rl<bits<3> funct3, string opcodestr> {
let hasSideEffects = 0, mayLoad = 1, mayStore = 1 in
class AMO_rr<bits<5> funct5, bit aq, bit rl, bits<3> funct3, string opcodestr>
: RVInstRAtomic<funct5, aq, rl, funct3, OPC_AMO,
- (outs GPR:$rd), (ins GPRMemAtomic:$rs1, GPR:$rs2),
+ (outs GPR:$rd), (ins GPRMemZeroOffset:$rs1, GPR:$rs2),
opcodestr, "$rd, $rs2, $rs1">;
multiclass AMO_rr_aq_rl<bits<5> funct5, bits<3> funct3, string opcodestr> {
@@ -63,12 +45,8 @@ multiclass AMO_rr_aq_rl<bits<5> funct5, bits<3> funct3, string opcodestr> {
multiclass AtomicStPat<PatFrag StoreOp, RVInst Inst, RegisterClass StTy,
ValueType vt = XLenVT> {
- def : Pat<(StoreOp BaseAddr:$rs1, (vt StTy:$rs2)),
- (Inst StTy:$rs2, BaseAddr:$rs1, 0)>;
- def : Pat<(StoreOp (add BaseAddr:$rs1, simm12:$imm12), (vt StTy:$rs2)),
- (Inst StTy:$rs2, BaseAddr:$rs1, simm12:$imm12)>;
- def : Pat<(StoreOp (IsOrAdd AddrFI:$rs1, simm12:$imm12), (vt StTy:$rs2)),
- (Inst StTy:$rs2, AddrFI:$rs1, simm12:$imm12)>;
+ def : Pat<(StoreOp (AddrRegImm GPR:$rs1, simm12:$imm12), (vt StTy:$rs2)),
+ (Inst StTy:$rs2, GPR:$rs1, simm12:$imm12)>;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
index 2837b92da81f..6fb9e36d7666 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
@@ -26,6 +26,69 @@ def RISCVBuildPairF64 : SDNode<"RISCVISD::BuildPairF64", SDT_RISCVBuildPairF64>;
def RISCVSplitF64 : SDNode<"RISCVISD::SplitF64", SDT_RISCVSplitF64>;
//===----------------------------------------------------------------------===//
+// Operand and SDNode transformation definitions.
+//===----------------------------------------------------------------------===//
+
+// Zdinx
+
+def GPRPF64AsFPR : AsmOperandClass {
+ let Name = "GPRPF64AsFPR";
+ let ParserMethod = "parseGPRAsFPR";
+ let RenderMethod = "addRegOperands";
+}
+
+def GPRF64AsFPR : AsmOperandClass {
+ let Name = "GPRF64AsFPR";
+ let ParserMethod = "parseGPRAsFPR";
+ let RenderMethod = "addRegOperands";
+}
+
+def FPR64INX : RegisterOperand<GPRF64> {
+ let ParserMatchClass = GPRF64AsFPR;
+ let DecoderMethod = "DecodeGPRRegisterClass";
+}
+
+def FPR64IN32X : RegisterOperand<GPRPF64> {
+ let ParserMatchClass = GPRPF64AsFPR;
+}
+
+def DExt : ExtInfo<0, [HasStdExtD]>;
+def D64Ext : ExtInfo<0, [HasStdExtD, IsRV64]>;
+def ZdinxExt : ExtInfo<1, [HasStdExtZdinx, IsRV64]>;
+def Zdinx32Ext : ExtInfo<2, [HasStdExtZdinx, IsRV32]>;
+
+def D : ExtInfo_r<DExt, FPR64>;
+def D_INX : ExtInfo_r<ZdinxExt, FPR64INX>;
+def D_IN32X : ExtInfo_r<Zdinx32Ext, FPR64IN32X>;
+
+def DD : ExtInfo_rr<DExt, FPR64, FPR64>;
+def DD_INX : ExtInfo_rr<ZdinxExt, FPR64INX, FPR64INX>;
+def DD_IN32X : ExtInfo_rr<Zdinx32Ext, FPR64IN32X, FPR64IN32X>;
+def DF : ExtInfo_rr<DExt, FPR64, FPR32>;
+def DF_INX : ExtInfo_rr<ZdinxExt, FPR64INX, FPR32INX>;
+def DF_IN32X : ExtInfo_rr<Zdinx32Ext, FPR64IN32X, FPR32INX>;
+def DX : ExtInfo_rr<DExt, FPR64, GPR>;
+def DX_INX : ExtInfo_rr<ZdinxExt, FPR64INX, GPR>;
+def DX_IN32X : ExtInfo_rr<Zdinx32Ext, FPR64IN32X, GPR>;
+def DX_64 : ExtInfo_rr<D64Ext, FPR64, GPR>;
+def FD : ExtInfo_rr<DExt, FPR32, FPR64>;
+def FD_INX : ExtInfo_rr<ZdinxExt, FPR32INX, FPR64INX>;
+def FD_IN32X : ExtInfo_rr<Zdinx32Ext, FPR32INX, FPR64IN32X>;
+def XD : ExtInfo_rr<DExt, GPR, FPR64>;
+def XD_INX : ExtInfo_rr<ZdinxExt, GPR, FPR64INX>;
+def XD_IN32X : ExtInfo_rr<Zdinx32Ext, GPR, FPR64IN32X>;
+def XD_64 : ExtInfo_rr<D64Ext, GPR, FPR64>;
+
+defvar DINX = [D, D_INX, D_IN32X];
+defvar DDINX = [DD, DD_INX, DD_IN32X];
+defvar DXINX = [DX, DX_INX, DX_IN32X];
+defvar DFINX = [DF, DF_INX, DF_IN32X];
+defvar FDINX = [FD, FD_INX, FD_IN32X];
+defvar XDINX = [XD, XD_INX, XD_IN32X];
+defvar DXIN64X = [DX_64, DX_INX];
+defvar XDIN64X = [XD_64, XD_INX];
+
+//===----------------------------------------------------------------------===//
// Instructions
//===----------------------------------------------------------------------===//
@@ -36,106 +99,104 @@ def FLD : FPLoad_r<0b011, "fld", FPR64, WriteFLD64>;
// reflecting the order these fields are specified in the instruction
// encoding.
def FSD : FPStore_r<0b011, "fsd", FPR64, WriteFST64>;
+} // Predicates = [HasStdExtD]
let SchedRW = [WriteFMA64, ReadFMA64, ReadFMA64, ReadFMA64] in {
-def FMADD_D : FPFMA_rrr_frm<OPC_MADD, 0b01, "fmadd.d", FPR64>;
-def FMSUB_D : FPFMA_rrr_frm<OPC_MSUB, 0b01, "fmsub.d", FPR64>;
-def FNMSUB_D : FPFMA_rrr_frm<OPC_NMSUB, 0b01, "fnmsub.d", FPR64>;
-def FNMADD_D : FPFMA_rrr_frm<OPC_NMADD, 0b01, "fnmadd.d", FPR64>;
+defm FMADD_D : FPFMA_rrr_frm_m<OPC_MADD, 0b01, "fmadd.d", DINX>;
+defm FMSUB_D : FPFMA_rrr_frm_m<OPC_MSUB, 0b01, "fmsub.d", DINX>;
+defm FNMSUB_D : FPFMA_rrr_frm_m<OPC_NMSUB, 0b01, "fnmsub.d", DINX>;
+defm FNMADD_D : FPFMA_rrr_frm_m<OPC_NMADD, 0b01, "fnmadd.d", DINX>;
}
-def : FPFMADynFrmAlias<FMADD_D, "fmadd.d", FPR64>;
-def : FPFMADynFrmAlias<FMSUB_D, "fmsub.d", FPR64>;
-def : FPFMADynFrmAlias<FNMSUB_D, "fnmsub.d", FPR64>;
-def : FPFMADynFrmAlias<FNMADD_D, "fnmadd.d", FPR64>;
-
-def FADD_D : FPALU_rr_frm<0b0000001, "fadd.d", FPR64>,
- Sched<[WriteFALU64, ReadFALU64, ReadFALU64]>;
-def FSUB_D : FPALU_rr_frm<0b0000101, "fsub.d", FPR64>,
- Sched<[WriteFALU64, ReadFALU64, ReadFALU64]>;
-def FMUL_D : FPALU_rr_frm<0b0001001, "fmul.d", FPR64>,
- Sched<[WriteFMul64, ReadFMul64, ReadFMul64]>;
-def FDIV_D : FPALU_rr_frm<0b0001101, "fdiv.d", FPR64>,
- Sched<[WriteFDiv64, ReadFDiv64, ReadFDiv64]>;
-
-def : FPALUDynFrmAlias<FADD_D, "fadd.d", FPR64>;
-def : FPALUDynFrmAlias<FSUB_D, "fsub.d", FPR64>;
-def : FPALUDynFrmAlias<FMUL_D, "fmul.d", FPR64>;
-def : FPALUDynFrmAlias<FDIV_D, "fdiv.d", FPR64>;
-
-def FSQRT_D : FPUnaryOp_r_frm<0b0101101, 0b00000, FPR64, FPR64, "fsqrt.d">,
- Sched<[WriteFSqrt64, ReadFSqrt64]>;
-def : FPUnaryOpDynFrmAlias<FSQRT_D, "fsqrt.d", FPR64, FPR64>;
+defm : FPFMADynFrmAlias_m<FMADD_D, "fmadd.d", DINX>;
+defm : FPFMADynFrmAlias_m<FMSUB_D, "fmsub.d", DINX>;
+defm : FPFMADynFrmAlias_m<FNMSUB_D, "fnmsub.d", DINX>;
+defm : FPFMADynFrmAlias_m<FNMADD_D, "fnmadd.d", DINX>;
+
+let SchedRW = [WriteFALU64, ReadFALU64, ReadFALU64] in {
+defm FADD_D : FPALU_rr_frm_m<0b0000001, "fadd.d", DINX, /*Commutable*/1>;
+defm FSUB_D : FPALU_rr_frm_m<0b0000101, "fsub.d", DINX>;
+}
+let SchedRW = [WriteFMul64, ReadFMul64, ReadFMul64] in
+defm FMUL_D : FPALU_rr_frm_m<0b0001001, "fmul.d", DINX, /*Commutable*/1>;
+
+let SchedRW = [WriteFDiv64, ReadFDiv64, ReadFDiv64] in
+defm FDIV_D : FPALU_rr_frm_m<0b0001101, "fdiv.d", DINX>;
+
+defm : FPALUDynFrmAlias_m<FADD_D, "fadd.d", DINX>;
+defm : FPALUDynFrmAlias_m<FSUB_D, "fsub.d", DINX>;
+defm : FPALUDynFrmAlias_m<FMUL_D, "fmul.d", DINX>;
+defm : FPALUDynFrmAlias_m<FDIV_D, "fdiv.d", DINX>;
+
+defm FSQRT_D : FPUnaryOp_r_frm_m<0b0101101, 0b00000, DDINX, "fsqrt.d">,
+ Sched<[WriteFSqrt64, ReadFSqrt64]>;
+defm : FPUnaryOpDynFrmAlias_m<FSQRT_D, "fsqrt.d", DDINX>;
let SchedRW = [WriteFSGNJ64, ReadFSGNJ64, ReadFSGNJ64],
mayRaiseFPException = 0 in {
-def FSGNJ_D : FPALU_rr<0b0010001, 0b000, "fsgnj.d", FPR64>;
-def FSGNJN_D : FPALU_rr<0b0010001, 0b001, "fsgnjn.d", FPR64>;
-def FSGNJX_D : FPALU_rr<0b0010001, 0b010, "fsgnjx.d", FPR64>;
+defm FSGNJ_D : FPALU_rr_m<0b0010001, 0b000, "fsgnj.d", DINX>;
+defm FSGNJN_D : FPALU_rr_m<0b0010001, 0b001, "fsgnjn.d", DINX>;
+defm FSGNJX_D : FPALU_rr_m<0b0010001, 0b010, "fsgnjx.d", DINX>;
}
let SchedRW = [WriteFMinMax64, ReadFMinMax64, ReadFMinMax64] in {
-def FMIN_D : FPALU_rr<0b0010101, 0b000, "fmin.d", FPR64>;
-def FMAX_D : FPALU_rr<0b0010101, 0b001, "fmax.d", FPR64>;
+defm FMIN_D : FPALU_rr_m<0b0010101, 0b000, "fmin.d", DINX, /*Commutable*/1>;
+defm FMAX_D : FPALU_rr_m<0b0010101, 0b001, "fmax.d", DINX, /*Commutable*/1>;
}
-def FCVT_S_D : FPUnaryOp_r_frm<0b0100000, 0b00001, FPR32, FPR64, "fcvt.s.d">,
- Sched<[WriteFCvtF64ToF32, ReadFCvtF64ToF32]>;
-def : FPUnaryOpDynFrmAlias<FCVT_S_D, "fcvt.s.d", FPR32, FPR64>;
+defm FCVT_S_D : FPUnaryOp_r_frm_m<0b0100000, 0b00001, FDINX, "fcvt.s.d">,
+ Sched<[WriteFCvtF64ToF32, ReadFCvtF64ToF32]>;
+defm : FPUnaryOpDynFrmAlias_m<FCVT_S_D, "fcvt.s.d", FDINX>;
-def FCVT_D_S : FPUnaryOp_r<0b0100001, 0b00000, 0b000, FPR64, FPR32, "fcvt.d.s">,
- Sched<[WriteFCvtF32ToF64, ReadFCvtF32ToF64]>;
+defm FCVT_D_S : FPUnaryOp_r_m<0b0100001, 0b00000, 0b000, DFINX, "fcvt.d.s">,
+ Sched<[WriteFCvtF32ToF64, ReadFCvtF32ToF64]>;
let SchedRW = [WriteFCmp64, ReadFCmp64, ReadFCmp64] in {
-def FEQ_D : FPCmp_rr<0b1010001, 0b010, "feq.d", FPR64>;
-def FLT_D : FPCmp_rr<0b1010001, 0b001, "flt.d", FPR64>;
-def FLE_D : FPCmp_rr<0b1010001, 0b000, "fle.d", FPR64>;
+defm FEQ_D : FPCmp_rr_m<0b1010001, 0b010, "feq.d", DINX, /*Commutable*/1>;
+defm FLT_D : FPCmp_rr_m<0b1010001, 0b001, "flt.d", DINX>;
+defm FLE_D : FPCmp_rr_m<0b1010001, 0b000, "fle.d", DINX>;
}
-let mayRaiseFPException = 0 in
-def FCLASS_D : FPUnaryOp_r<0b1110001, 0b00000, 0b001, GPR, FPR64, "fclass.d">,
- Sched<[WriteFClass64, ReadFClass64]>;
+defm FCLASS_D : FPUnaryOp_r_m<0b1110001, 0b00000, 0b001, XDINX, "fclass.d">,
+ Sched<[WriteFClass64, ReadFClass64]>;
-def FCVT_W_D : FPUnaryOp_r_frm<0b1100001, 0b00000, GPR, FPR64, "fcvt.w.d">,
+defm FCVT_W_D : FPUnaryOp_r_frm_m<0b1100001, 0b00000, XDINX, "fcvt.w.d">,
Sched<[WriteFCvtF64ToI32, ReadFCvtF64ToI32]>;
-def : FPUnaryOpDynFrmAlias<FCVT_W_D, "fcvt.w.d", GPR, FPR64>;
+defm : FPUnaryOpDynFrmAlias_m<FCVT_W_D, "fcvt.w.d", XDINX>;
-def FCVT_WU_D : FPUnaryOp_r_frm<0b1100001, 0b00001, GPR, FPR64, "fcvt.wu.d">,
- Sched<[WriteFCvtF64ToI32, ReadFCvtF64ToI32]>;
-def : FPUnaryOpDynFrmAlias<FCVT_WU_D, "fcvt.wu.d", GPR, FPR64>;
+defm FCVT_WU_D : FPUnaryOp_r_frm_m<0b1100001, 0b00001, XDINX, "fcvt.wu.d">,
+ Sched<[WriteFCvtF64ToI32, ReadFCvtF64ToI32]>;
+defm : FPUnaryOpDynFrmAlias_m<FCVT_WU_D, "fcvt.wu.d", XDINX>;
-def FCVT_D_W : FPUnaryOp_r<0b1101001, 0b00000, 0b000, FPR64, GPR, "fcvt.d.w">,
- Sched<[WriteFCvtI32ToF64, ReadFCvtI32ToF64]>;
-
-def FCVT_D_WU : FPUnaryOp_r<0b1101001, 0b00001, 0b000, FPR64, GPR, "fcvt.d.wu">,
+defm FCVT_D_W : FPUnaryOp_r_m<0b1101001, 0b00000, 0b000, DXINX, "fcvt.d.w">,
Sched<[WriteFCvtI32ToF64, ReadFCvtI32ToF64]>;
-} // Predicates = [HasStdExtD]
-let Predicates = [HasStdExtD, IsRV64] in {
-def FCVT_L_D : FPUnaryOp_r_frm<0b1100001, 0b00010, GPR, FPR64, "fcvt.l.d">,
- Sched<[WriteFCvtF64ToI64, ReadFCvtF64ToI64]>;
-def : FPUnaryOpDynFrmAlias<FCVT_L_D, "fcvt.l.d", GPR, FPR64>;
+defm FCVT_D_WU : FPUnaryOp_r_m<0b1101001, 0b00001, 0b000, DXINX, "fcvt.d.wu">,
+ Sched<[WriteFCvtI32ToF64, ReadFCvtI32ToF64]>;
-def FCVT_LU_D : FPUnaryOp_r_frm<0b1100001, 0b00011, GPR, FPR64, "fcvt.lu.d">,
+defm FCVT_L_D : FPUnaryOp_r_frm_m<0b1100001, 0b00010, XDIN64X, "fcvt.l.d">,
Sched<[WriteFCvtF64ToI64, ReadFCvtF64ToI64]>;
-def : FPUnaryOpDynFrmAlias<FCVT_LU_D, "fcvt.lu.d", GPR, FPR64>;
+defm : FPUnaryOpDynFrmAlias_m<FCVT_L_D, "fcvt.l.d", XDIN64X>;
-let mayRaiseFPException = 0 in
+defm FCVT_LU_D : FPUnaryOp_r_frm_m<0b1100001, 0b00011, XDIN64X, "fcvt.lu.d">,
+ Sched<[WriteFCvtF64ToI64, ReadFCvtF64ToI64]>;
+defm : FPUnaryOpDynFrmAlias_m<FCVT_LU_D, "fcvt.lu.d", XDIN64X>;
+
+let Predicates = [HasStdExtD, IsRV64], mayRaiseFPException = 0 in
def FMV_X_D : FPUnaryOp_r<0b1110001, 0b00000, 0b000, GPR, FPR64, "fmv.x.d">,
Sched<[WriteFMovF64ToI64, ReadFMovF64ToI64]>;
-def FCVT_D_L : FPUnaryOp_r_frm<0b1101001, 0b00010, FPR64, GPR, "fcvt.d.l">,
- Sched<[WriteFCvtI64ToF64, ReadFCvtI64ToF64]>;
-def : FPUnaryOpDynFrmAlias<FCVT_D_L, "fcvt.d.l", FPR64, GPR>;
-
-def FCVT_D_LU : FPUnaryOp_r_frm<0b1101001, 0b00011, FPR64, GPR, "fcvt.d.lu">,
+defm FCVT_D_L : FPUnaryOp_r_frm_m<0b1101001, 0b00010, DXIN64X, "fcvt.d.l">,
Sched<[WriteFCvtI64ToF64, ReadFCvtI64ToF64]>;
-def : FPUnaryOpDynFrmAlias<FCVT_D_LU, "fcvt.d.lu", FPR64, GPR>;
+defm : FPUnaryOpDynFrmAlias_m<FCVT_D_L, "fcvt.d.l", DXIN64X>;
-let mayRaiseFPException = 0 in
+defm FCVT_D_LU : FPUnaryOp_r_frm_m<0b1101001, 0b00011, DXIN64X, "fcvt.d.lu">,
+ Sched<[WriteFCvtI64ToF64, ReadFCvtI64ToF64]>;
+defm : FPUnaryOpDynFrmAlias_m<FCVT_D_LU, "fcvt.d.lu", DXIN64X>;
+
+let Predicates = [HasStdExtD, IsRV64], mayRaiseFPException = 0 in
def FMV_D_X : FPUnaryOp_r<0b1111001, 0b00000, 0b000, FPR64, GPR, "fmv.d.x">,
Sched<[WriteFMovI64ToF64, ReadFMovI64ToF64]>;
-} // Predicates = [HasStdExtD, IsRV64]
//===----------------------------------------------------------------------===//
// Assembler Pseudo Instructions (User-Level ISA, Version 2.2, Chapter 20)
@@ -164,16 +225,30 @@ def PseudoQuietFLT_D : PseudoQuietFCMP<FPR64>;
}
} // Predicates = [HasStdExtD]
+let Predicates = [HasStdExtZdinx, IsRV64] in {
+def : InstAlias<"fabs.d $rd, $rs", (FSGNJX_D_INX FPR64INX:$rd, FPR64INX:$rs, FPR64INX:$rs)>;
+def : InstAlias<"fneg.d $rd, $rs", (FSGNJN_D_INX FPR64INX:$rd, FPR64INX:$rs, FPR64INX:$rs)>;
+
+def : InstAlias<"fgt.d $rd, $rs, $rt",
+ (FLT_D_INX GPR:$rd, FPR64INX:$rt, FPR64INX:$rs), 0>;
+def : InstAlias<"fge.d $rd, $rs, $rt",
+ (FLE_D_INX GPR:$rd, FPR64INX:$rt, FPR64INX:$rs), 0>;
+} // Predicates = [HasStdExtZdinx, IsRV64]
+
+let Predicates = [HasStdExtZdinx, IsRV32] in {
+def : InstAlias<"fabs.d $rd, $rs", (FSGNJX_D_IN32X FPR64IN32X:$rd, FPR64IN32X:$rs, FPR64IN32X:$rs)>;
+def : InstAlias<"fneg.d $rd, $rs", (FSGNJN_D_IN32X FPR64IN32X:$rd, FPR64IN32X:$rs, FPR64IN32X:$rs)>;
+
+def : InstAlias<"fgt.d $rd, $rs, $rt",
+ (FLT_D_IN32X GPR:$rd, FPR64IN32X:$rt, FPR64IN32X:$rs), 0>;
+def : InstAlias<"fge.d $rd, $rs, $rt",
+ (FLE_D_IN32X GPR:$rd, FPR64IN32X:$rt, FPR64IN32X:$rs), 0>;
+} // Predicates = [HasStdExtZdinx, IsRV32]
+
//===----------------------------------------------------------------------===//
// Pseudo-instructions and codegen patterns
//===----------------------------------------------------------------------===//
-class PatFpr64Fpr64<SDPatternOperator OpNode, RVInstR Inst>
- : Pat<(OpNode FPR64:$rs1, FPR64:$rs2), (Inst $rs1, $rs2)>;
-
-class PatFpr64Fpr64DynFrm<SDPatternOperator OpNode, RVInstRFrm Inst>
- : Pat<(OpNode FPR64:$rs1, FPR64:$rs2), (Inst $rs1, $rs2, 0b111)>;
-
let Predicates = [HasStdExtD] in {
/// Float conversion operations
@@ -187,17 +262,17 @@ def : Pat<(any_fpextend FPR32:$rs1), (FCVT_D_S FPR32:$rs1)>;
/// Float arithmetic operations
-def : PatFpr64Fpr64DynFrm<any_fadd, FADD_D>;
-def : PatFpr64Fpr64DynFrm<any_fsub, FSUB_D>;
-def : PatFpr64Fpr64DynFrm<any_fmul, FMUL_D>;
-def : PatFpr64Fpr64DynFrm<any_fdiv, FDIV_D>;
+def : PatFprFprDynFrm<any_fadd, FADD_D, FPR64>;
+def : PatFprFprDynFrm<any_fsub, FSUB_D, FPR64>;
+def : PatFprFprDynFrm<any_fmul, FMUL_D, FPR64>;
+def : PatFprFprDynFrm<any_fdiv, FDIV_D, FPR64>;
def : Pat<(any_fsqrt FPR64:$rs1), (FSQRT_D FPR64:$rs1, 0b111)>;
def : Pat<(fneg FPR64:$rs1), (FSGNJN_D $rs1, $rs1)>;
def : Pat<(fabs FPR64:$rs1), (FSGNJX_D $rs1, $rs1)>;
-def : PatFpr64Fpr64<fcopysign, FSGNJ_D>;
+def : PatFprFpr<fcopysign, FSGNJ_D, FPR64>;
def : Pat<(fcopysign FPR64:$rs1, (fneg FPR64:$rs2)), (FSGNJN_D $rs1, $rs2)>;
def : Pat<(fcopysign FPR64:$rs1, FPR32:$rs2), (FSGNJ_D $rs1, (FCVT_D_S $rs2))>;
def : Pat<(fcopysign FPR32:$rs1, FPR64:$rs2), (FSGNJ_S $rs1, (FCVT_S_D $rs2,
@@ -219,11 +294,15 @@ def : Pat<(any_fma (fneg FPR64:$rs1), FPR64:$rs2, FPR64:$rs3),
def : Pat<(any_fma (fneg FPR64:$rs1), FPR64:$rs2, (fneg FPR64:$rs3)),
(FNMADD_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>;
+// fnmadd: -(rs1 * rs2 + rs3) (the nsz flag on the FMA)
+def : Pat<(fneg (any_fma_nsz FPR64:$rs1, FPR64:$rs2, FPR64:$rs3)),
+ (FNMADD_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>;
+
// The ratified 20191213 ISA spec defines fmin and fmax in a way that matches
// LLVM's fminnum and fmaxnum.
// <https://github.com/riscv/riscv-isa-manual/commit/cd20cee7efd9bac7c5aa127ec3b451749d2b3cce>.
-def : PatFpr64Fpr64<fminnum, FMIN_D>;
-def : PatFpr64Fpr64<fmaxnum, FMAX_D>;
+def : PatFprFpr<fminnum, FMIN_D, FPR64>;
+def : PatFprFpr<fmaxnum, FMAX_D, FPR64>;
/// Setcc
// FIXME: SETEQ/SETLT/SETLE imply nonans, can we pick better instructions for
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
index a8ac06ba8da3..a71d5b4737c3 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
@@ -53,10 +53,81 @@ def riscv_any_fcvt_wu_rv64 : PatFrags<(ops node:$src, node:$frm),
[(riscv_strict_fcvt_wu_rv64 node:$src, node:$frm),
(riscv_fcvt_wu_rv64 node:$src, node:$frm)]>;
+def any_fma_nsz : PatFrag<(ops node:$rs1, node:$rs2, node:$rs3),
+ (any_fma node:$rs1, node:$rs2, node:$rs3), [{
+ return N->getFlags().hasNoSignedZeros();
+}]>;
//===----------------------------------------------------------------------===//
// Operand and SDNode transformation definitions.
//===----------------------------------------------------------------------===//
+// Zfinx
+
+def GPRAsFPR : AsmOperandClass {
+ let Name = "GPRAsFPR";
+ let ParserMethod = "parseGPRAsFPR";
+ let RenderMethod = "addRegOperands";
+}
+
+def FPR32INX : RegisterOperand<GPRF32> {
+ let ParserMatchClass = GPRAsFPR;
+ let DecoderMethod = "DecodeGPRRegisterClass";
+}
+
+// inx = 0 : f, d, zfh, zfhmin
+// = 1 : zfinx, zdinx, zhinx, zhinxmin
+// = 2 : zdinx_rv32
+class ExtInfo<bits<2> inx, list<Predicate> pres> {
+ string Suffix = !cond(!eq(inx, 0): "",
+ !eq(inx, 1): "_INX",
+ !eq(inx, 2): "_IN32X");
+ list<Predicate> Predicates = pres;
+ string Space = !cond(!eq(inx, 0): "",
+ !eq(inx, 1): "RVZfinx",
+ !eq(inx, 2): "RV32Zdinx");
+}
+
+class ExtInfo_r<ExtInfo ext, DAGOperand reg> {
+ string Suffix = ext.Suffix;
+ list<Predicate> Predicates = ext.Predicates;
+ string Space = ext.Space;
+ DAGOperand Reg = reg;
+}
+
+class ExtInfo_rr<ExtInfo ext, DAGOperand rdty, DAGOperand rs1ty> {
+ string Suffix = ext.Suffix;
+ list<Predicate> Predicates = ext.Predicates;
+ string Space = ext.Space;
+ DAGOperand RdTy = rdty;
+ DAGOperand Rs1Ty = rs1ty;
+}
+
+def FExt : ExtInfo<0, [HasStdExtF]>;
+def F64Ext : ExtInfo<0, [HasStdExtF, IsRV64]>;
+def ZfinxExt : ExtInfo<1, [HasStdExtZfinx]>;
+def Zfinx64Ext : ExtInfo<1, [HasStdExtZfinx, IsRV64]>;
+
+def F : ExtInfo_r<FExt, FPR32>;
+def F_INX : ExtInfo_r<ZfinxExt, FPR32INX>;
+
+def FF : ExtInfo_rr<FExt, FPR32, FPR32>;
+def FF_INX : ExtInfo_rr<ZfinxExt, FPR32INX, FPR32INX>;
+def FX : ExtInfo_rr<FExt, FPR32, GPR>;
+def FX_INX : ExtInfo_rr<ZfinxExt, FPR32INX, GPR>;
+def FX_64 : ExtInfo_rr<F64Ext, FPR32, GPR>;
+def FX_INX_64 : ExtInfo_rr<Zfinx64Ext, FPR32INX, GPR>;
+def XF : ExtInfo_rr<FExt, GPR, FPR32>;
+def XF_64 : ExtInfo_rr<F64Ext, GPR, FPR32>;
+def XF_INX : ExtInfo_rr<ZfinxExt, GPR, FPR32INX>;
+def XF_INX_64 : ExtInfo_rr<Zfinx64Ext, GPR, FPR32INX>;
+
+defvar FINX = [F, F_INX];
+defvar FFINX = [FF, FF_INX];
+defvar FXINX = [FX, FX_INX];
+defvar XFINX = [XF, XF_INX];
+defvar XFIN64X = [XF_64, XF_INX_64];
+defvar FXIN64X = [FX_64, FX_INX_64];
+
// Floating-point rounding mode
def FRMArg : AsmOperandClass {
@@ -92,64 +163,131 @@ class FPStore_r<bits<3> funct3, string opcodestr, RegisterClass rty,
Sched<[sw, ReadStoreData, ReadFMemBase]>;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1,
- UseNamedOperandTable = 1, hasPostISelHook = 1 in
+ UseNamedOperandTable = 1, hasPostISelHook = 1, isCommutable = 1 in
class FPFMA_rrr_frm<RISCVOpcode opcode, bits<2> funct2, string opcodestr,
- RegisterClass rty>
+ DAGOperand rty>
: RVInstR4Frm<funct2, opcode, (outs rty:$rd),
(ins rty:$rs1, rty:$rs2, rty:$rs3, frmarg:$frm),
opcodestr, "$rd, $rs1, $rs2, $rs3, $frm">;
+multiclass FPFMA_rrr_frm_m<RISCVOpcode opcode, bits<2> funct2,
+ string opcodestr, list<ExtInfo_r> Exts> {
+ foreach Ext = Exts in
+ let Predicates = Ext.Predicates, DecoderNamespace = Ext.Space in
+ def Ext.Suffix : FPFMA_rrr_frm<opcode, funct2, opcodestr, Ext.Reg>;
+}
+
class FPFMADynFrmAlias<FPFMA_rrr_frm Inst, string OpcodeStr,
- RegisterClass rty>
+ DAGOperand rty>
: InstAlias<OpcodeStr#" $rd, $rs1, $rs2, $rs3",
(Inst rty:$rd, rty:$rs1, rty:$rs2, rty:$rs3, 0b111)>;
+multiclass FPFMADynFrmAlias_m<FPFMA_rrr_frm Inst, string OpcodeStr,
+ list<ExtInfo_r> Exts> {
+ foreach Ext = Exts in
+ let Predicates = Ext.Predicates in
+ def : FPFMADynFrmAlias<!cast<FPFMA_rrr_frm>(Inst#Ext.Suffix), OpcodeStr,
+ Ext.Reg>;
+}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1 in
class FPALU_rr<bits<7> funct7, bits<3> funct3, string opcodestr,
- RegisterClass rty>
+ DAGOperand rty, bit Commutable>
: RVInstR<funct7, funct3, OPC_OP_FP, (outs rty:$rd),
- (ins rty:$rs1, rty:$rs2), opcodestr, "$rd, $rs1, $rs2">;
+ (ins rty:$rs1, rty:$rs2), opcodestr, "$rd, $rs1, $rs2"> {
+ let isCommutable = Commutable;
+}
+multiclass FPALU_rr_m<bits<7> funct7, bits<3> funct3, string opcodestr,
+ list<ExtInfo_r> Exts, bit Commutable = 0> {
+ foreach Ext = Exts in
+ let Predicates = Ext.Predicates, DecoderNamespace = Ext.Space in
+ def Ext.Suffix : FPALU_rr<funct7, funct3, opcodestr, Ext.Reg, Commutable>;
+}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1,
UseNamedOperandTable = 1, hasPostISelHook = 1 in
-class FPALU_rr_frm<bits<7> funct7, string opcodestr, RegisterClass rty>
+class FPALU_rr_frm<bits<7> funct7, string opcodestr, DAGOperand rty,
+ bit Commutable>
: RVInstRFrm<funct7, OPC_OP_FP, (outs rty:$rd),
(ins rty:$rs1, rty:$rs2, frmarg:$frm), opcodestr,
- "$rd, $rs1, $rs2, $frm">;
+ "$rd, $rs1, $rs2, $frm"> {
+ let isCommutable = Commutable;
+}
+multiclass FPALU_rr_frm_m<bits<7> funct7, string opcodestr,
+ list<ExtInfo_r> Exts, bit Commutable = 0> {
+ foreach Ext = Exts in
+ let Predicates = Ext.Predicates, DecoderNamespace = Ext.Space in
+ def Ext.Suffix : FPALU_rr_frm<funct7, opcodestr, Ext.Reg, Commutable>;
+}
class FPALUDynFrmAlias<FPALU_rr_frm Inst, string OpcodeStr,
- RegisterClass rty>
+ DAGOperand rty>
: InstAlias<OpcodeStr#" $rd, $rs1, $rs2",
(Inst rty:$rd, rty:$rs1, rty:$rs2, 0b111)>;
+multiclass FPALUDynFrmAlias_m<FPALU_rr_frm Inst, string OpcodeStr,
+ list<ExtInfo_r> Exts> {
+ foreach Ext = Exts in
+ let Predicates = Ext.Predicates in
+ def : FPALUDynFrmAlias<!cast<FPALU_rr_frm>(Inst#Ext.Suffix), OpcodeStr,
+ Ext.Reg>;
+}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1 in
class FPUnaryOp_r<bits<7> funct7, bits<5> rs2val, bits<3> funct3,
- RegisterClass rdty, RegisterClass rs1ty, string opcodestr>
+ DAGOperand rdty, DAGOperand rs1ty, string opcodestr>
: RVInstR<funct7, funct3, OPC_OP_FP, (outs rdty:$rd), (ins rs1ty:$rs1),
opcodestr, "$rd, $rs1"> {
let rs2 = rs2val;
}
+multiclass FPUnaryOp_r_m<bits<7> funct7, bits<5> rs2val, bits<3> funct3,
+ list<ExtInfo_rr> Exts, string opcodestr> {
+ foreach Ext = Exts in
+ let Predicates = Ext.Predicates, DecoderNamespace = Ext.Space in
+ def Ext.Suffix : FPUnaryOp_r<funct7, rs2val, funct3, Ext.RdTy, Ext.Rs1Ty,
+ opcodestr>;
+}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1,
UseNamedOperandTable = 1, hasPostISelHook = 1 in
-class FPUnaryOp_r_frm<bits<7> funct7, bits<5> rs2val, RegisterClass rdty,
- RegisterClass rs1ty, string opcodestr>
+class FPUnaryOp_r_frm<bits<7> funct7, bits<5> rs2val, DAGOperand rdty,
+ DAGOperand rs1ty, string opcodestr>
: RVInstRFrm<funct7, OPC_OP_FP, (outs rdty:$rd),
(ins rs1ty:$rs1, frmarg:$frm), opcodestr,
"$rd, $rs1, $frm"> {
let rs2 = rs2val;
}
+multiclass FPUnaryOp_r_frm_m<bits<7> funct7, bits<5> rs2val,
+ list<ExtInfo_rr> Exts, string opcodestr> {
+ foreach Ext = Exts in
+ let Predicates = Ext.Predicates, DecoderNamespace = Ext.Space in
+ def Ext.Suffix : FPUnaryOp_r_frm<funct7, rs2val, Ext.RdTy, Ext.Rs1Ty,
+ opcodestr>;
+}
class FPUnaryOpDynFrmAlias<FPUnaryOp_r_frm Inst, string OpcodeStr,
- RegisterClass rdty, RegisterClass rs1ty>
+ DAGOperand rdty, DAGOperand rs1ty>
: InstAlias<OpcodeStr#" $rd, $rs1",
(Inst rdty:$rd, rs1ty:$rs1, 0b111)>;
+multiclass FPUnaryOpDynFrmAlias_m<FPUnaryOp_r_frm Inst, string OpcodeStr,
+ list<ExtInfo_rr> Exts> {
+ foreach Ext = Exts in
+ let Predicates = Ext.Predicates in
+ def : FPUnaryOpDynFrmAlias<!cast<FPUnaryOp_r_frm>(Inst#Ext.Suffix),
+ OpcodeStr, Ext.RdTy, Ext.Rs1Ty>;
+}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1 in
class FPCmp_rr<bits<7> funct7, bits<3> funct3, string opcodestr,
- RegisterClass rty>
+ DAGOperand rty, bit Commutable>
: RVInstR<funct7, funct3, OPC_OP_FP, (outs GPR:$rd),
- (ins rty:$rs1, rty:$rs2), opcodestr, "$rd, $rs1, $rs2">;
+ (ins rty:$rs1, rty:$rs2), opcodestr, "$rd, $rs1, $rs2"> {
+ let isCommutable = Commutable;
+}
+multiclass FPCmp_rr_m<bits<7> funct7, bits<3> funct3, string opcodestr,
+ list<ExtInfo_r> Exts, bit Commutable = 0> {
+ foreach Ext = Exts in
+ let Predicates = Ext.Predicates, DecoderNamespace = Ext.Space in
+ def Ext.Suffix : FPCmp_rr<funct7, funct3, opcodestr, Ext.Reg, Commutable>;
+}
//===----------------------------------------------------------------------===//
// Instructions
@@ -162,101 +300,100 @@ def FLW : FPLoad_r<0b010, "flw", FPR32, WriteFLD32>;
// reflecting the order these fields are specified in the instruction
// encoding.
def FSW : FPStore_r<0b010, "fsw", FPR32, WriteFST32>;
+} // Predicates = [HasStdExtF]
let SchedRW = [WriteFMA32, ReadFMA32, ReadFMA32, ReadFMA32] in {
-def FMADD_S : FPFMA_rrr_frm<OPC_MADD, 0b00, "fmadd.s", FPR32>;
-def FMSUB_S : FPFMA_rrr_frm<OPC_MSUB, 0b00, "fmsub.s", FPR32>;
-def FNMSUB_S : FPFMA_rrr_frm<OPC_NMSUB, 0b00, "fnmsub.s", FPR32>;
-def FNMADD_S : FPFMA_rrr_frm<OPC_NMADD, 0b00, "fnmadd.s", FPR32>;
+defm FMADD_S : FPFMA_rrr_frm_m<OPC_MADD, 0b00, "fmadd.s", FINX>;
+defm FMSUB_S : FPFMA_rrr_frm_m<OPC_MSUB, 0b00, "fmsub.s", FINX>;
+defm FNMSUB_S : FPFMA_rrr_frm_m<OPC_NMSUB, 0b00, "fnmsub.s", FINX>;
+defm FNMADD_S : FPFMA_rrr_frm_m<OPC_NMADD, 0b00, "fnmadd.s", FINX>;
+}
+
+defm : FPFMADynFrmAlias_m<FMADD_S, "fmadd.s", FINX>;
+defm : FPFMADynFrmAlias_m<FMSUB_S, "fmsub.s", FINX>;
+defm : FPFMADynFrmAlias_m<FNMSUB_S, "fnmsub.s", FINX>;
+defm : FPFMADynFrmAlias_m<FNMADD_S, "fnmadd.s", FINX>;
+
+let SchedRW = [WriteFALU32, ReadFALU32, ReadFALU32] in {
+defm FADD_S : FPALU_rr_frm_m<0b0000000, "fadd.s", FINX, /*Commutable*/1>;
+defm FSUB_S : FPALU_rr_frm_m<0b0000100, "fsub.s", FINX>;
}
+let SchedRW = [WriteFMul32, ReadFMul32, ReadFMul32] in
+defm FMUL_S : FPALU_rr_frm_m<0b0001000, "fmul.s", FINX, /*Commutable*/1>;
-def : FPFMADynFrmAlias<FMADD_S, "fmadd.s", FPR32>;
-def : FPFMADynFrmAlias<FMSUB_S, "fmsub.s", FPR32>;
-def : FPFMADynFrmAlias<FNMSUB_S, "fnmsub.s", FPR32>;
-def : FPFMADynFrmAlias<FNMADD_S, "fnmadd.s", FPR32>;
-
-def FADD_S : FPALU_rr_frm<0b0000000, "fadd.s", FPR32>,
- Sched<[WriteFALU32, ReadFALU32, ReadFALU32]>;
-def FSUB_S : FPALU_rr_frm<0b0000100, "fsub.s", FPR32>,
- Sched<[WriteFALU32, ReadFALU32, ReadFALU32]>;
-def FMUL_S : FPALU_rr_frm<0b0001000, "fmul.s", FPR32>,
- Sched<[WriteFMul32, ReadFMul32, ReadFMul32]>;
-def FDIV_S : FPALU_rr_frm<0b0001100, "fdiv.s", FPR32>,
- Sched<[WriteFDiv32, ReadFDiv32, ReadFDiv32]>;
-
-def : FPALUDynFrmAlias<FADD_S, "fadd.s", FPR32>;
-def : FPALUDynFrmAlias<FSUB_S, "fsub.s", FPR32>;
-def : FPALUDynFrmAlias<FMUL_S, "fmul.s", FPR32>;
-def : FPALUDynFrmAlias<FDIV_S, "fdiv.s", FPR32>;
-
-def FSQRT_S : FPUnaryOp_r_frm<0b0101100, 0b00000, FPR32, FPR32, "fsqrt.s">,
- Sched<[WriteFSqrt32, ReadFSqrt32]>;
-def : FPUnaryOpDynFrmAlias<FSQRT_S, "fsqrt.s", FPR32, FPR32>;
+let SchedRW = [WriteFDiv32, ReadFDiv32, ReadFDiv32] in
+defm FDIV_S : FPALU_rr_frm_m<0b0001100, "fdiv.s", FINX>;
+
+defm : FPALUDynFrmAlias_m<FADD_S, "fadd.s", FINX>;
+defm : FPALUDynFrmAlias_m<FSUB_S, "fsub.s", FINX>;
+defm : FPALUDynFrmAlias_m<FMUL_S, "fmul.s", FINX>;
+defm : FPALUDynFrmAlias_m<FDIV_S, "fdiv.s", FINX>;
+
+defm FSQRT_S : FPUnaryOp_r_frm_m<0b0101100, 0b00000, FFINX, "fsqrt.s">,
+ Sched<[WriteFSqrt32, ReadFSqrt32]>;
+defm : FPUnaryOpDynFrmAlias_m<FSQRT_S, "fsqrt.s", FFINX>;
let SchedRW = [WriteFSGNJ32, ReadFSGNJ32, ReadFSGNJ32],
mayRaiseFPException = 0 in {
-def FSGNJ_S : FPALU_rr<0b0010000, 0b000, "fsgnj.s", FPR32>;
-def FSGNJN_S : FPALU_rr<0b0010000, 0b001, "fsgnjn.s", FPR32>;
-def FSGNJX_S : FPALU_rr<0b0010000, 0b010, "fsgnjx.s", FPR32>;
+defm FSGNJ_S : FPALU_rr_m<0b0010000, 0b000, "fsgnj.s", FINX>;
+defm FSGNJN_S : FPALU_rr_m<0b0010000, 0b001, "fsgnjn.s", FINX>;
+defm FSGNJX_S : FPALU_rr_m<0b0010000, 0b010, "fsgnjx.s", FINX>;
}
let SchedRW = [WriteFMinMax32, ReadFMinMax32, ReadFMinMax32] in {
-def FMIN_S : FPALU_rr<0b0010100, 0b000, "fmin.s", FPR32>;
-def FMAX_S : FPALU_rr<0b0010100, 0b001, "fmax.s", FPR32>;
+defm FMIN_S : FPALU_rr_m<0b0010100, 0b000, "fmin.s", FINX, /*Commutable*/1>;
+defm FMAX_S : FPALU_rr_m<0b0010100, 0b001, "fmax.s", FINX, /*Commutable*/1>;
}
-def FCVT_W_S : FPUnaryOp_r_frm<0b1100000, 0b00000, GPR, FPR32, "fcvt.w.s">,
- Sched<[WriteFCvtF32ToI32, ReadFCvtF32ToI32]>;
-def : FPUnaryOpDynFrmAlias<FCVT_W_S, "fcvt.w.s", GPR, FPR32>;
-
-def FCVT_WU_S : FPUnaryOp_r_frm<0b1100000, 0b00001, GPR, FPR32, "fcvt.wu.s">,
+defm FCVT_W_S : FPUnaryOp_r_frm_m<0b1100000, 0b00000, XFINX, "fcvt.w.s">,
Sched<[WriteFCvtF32ToI32, ReadFCvtF32ToI32]>;
-def : FPUnaryOpDynFrmAlias<FCVT_WU_S, "fcvt.wu.s", GPR, FPR32>;
+defm : FPUnaryOpDynFrmAlias_m<FCVT_W_S, "fcvt.w.s", XFINX>;
+
+defm FCVT_WU_S : FPUnaryOp_r_frm_m<0b1100000, 0b00001, XFINX, "fcvt.wu.s">,
+ Sched<[WriteFCvtF32ToI32, ReadFCvtF32ToI32]>;
+defm : FPUnaryOpDynFrmAlias_m<FCVT_WU_S, "fcvt.wu.s", XFINX>;
let mayRaiseFPException = 0 in
def FMV_X_W : FPUnaryOp_r<0b1110000, 0b00000, 0b000, GPR, FPR32, "fmv.x.w">,
Sched<[WriteFMovF32ToI32, ReadFMovF32ToI32]>;
let SchedRW = [WriteFCmp32, ReadFCmp32, ReadFCmp32] in {
-def FEQ_S : FPCmp_rr<0b1010000, 0b010, "feq.s", FPR32>;
-def FLT_S : FPCmp_rr<0b1010000, 0b001, "flt.s", FPR32>;
-def FLE_S : FPCmp_rr<0b1010000, 0b000, "fle.s", FPR32>;
+defm FEQ_S : FPCmp_rr_m<0b1010000, 0b010, "feq.s", FINX, /*Commutable*/1>;
+defm FLT_S : FPCmp_rr_m<0b1010000, 0b001, "flt.s", FINX>;
+defm FLE_S : FPCmp_rr_m<0b1010000, 0b000, "fle.s", FINX>;
}
let mayRaiseFPException = 0 in
-def FCLASS_S : FPUnaryOp_r<0b1110000, 0b00000, 0b001, GPR, FPR32, "fclass.s">,
- Sched<[WriteFClass32, ReadFClass32]>;
-
-def FCVT_S_W : FPUnaryOp_r_frm<0b1101000, 0b00000, FPR32, GPR, "fcvt.s.w">,
- Sched<[WriteFCvtI32ToF32, ReadFCvtI32ToF32]>;
-def : FPUnaryOpDynFrmAlias<FCVT_S_W, "fcvt.s.w", FPR32, GPR>;
+defm FCLASS_S : FPUnaryOp_r_m<0b1110000, 0b00000, 0b001, XFINX, "fclass.s">,
+ Sched<[WriteFClass32, ReadFClass32]>;
-def FCVT_S_WU : FPUnaryOp_r_frm<0b1101000, 0b00001, FPR32, GPR, "fcvt.s.wu">,
+defm FCVT_S_W : FPUnaryOp_r_frm_m<0b1101000, 0b00000, FXINX, "fcvt.s.w">,
Sched<[WriteFCvtI32ToF32, ReadFCvtI32ToF32]>;
-def : FPUnaryOpDynFrmAlias<FCVT_S_WU, "fcvt.s.wu", FPR32, GPR>;
+defm : FPUnaryOpDynFrmAlias_m<FCVT_S_W, "fcvt.s.w", FXINX>;
+
+defm FCVT_S_WU : FPUnaryOp_r_frm_m<0b1101000, 0b00001, FXINX, "fcvt.s.wu">,
+ Sched<[WriteFCvtI32ToF32, ReadFCvtI32ToF32]>;
+defm : FPUnaryOpDynFrmAlias_m<FCVT_S_WU, "fcvt.s.wu", FXINX>;
let mayRaiseFPException = 0 in
def FMV_W_X : FPUnaryOp_r<0b1111000, 0b00000, 0b000, FPR32, GPR, "fmv.w.x">,
Sched<[WriteFMovI32ToF32, ReadFMovI32ToF32]>;
-} // Predicates = [HasStdExtF]
-
-let Predicates = [HasStdExtF, IsRV64] in {
-def FCVT_L_S : FPUnaryOp_r_frm<0b1100000, 0b00010, GPR, FPR32, "fcvt.l.s">,
- Sched<[WriteFCvtF32ToI64, ReadFCvtF32ToI64]>;
-def : FPUnaryOpDynFrmAlias<FCVT_L_S, "fcvt.l.s", GPR, FPR32>;
-def FCVT_LU_S : FPUnaryOp_r_frm<0b1100000, 0b00011, GPR, FPR32, "fcvt.lu.s">,
+defm FCVT_L_S : FPUnaryOp_r_frm_m<0b1100000, 0b00010, XFIN64X, "fcvt.l.s">,
Sched<[WriteFCvtF32ToI64, ReadFCvtF32ToI64]>;
-def : FPUnaryOpDynFrmAlias<FCVT_LU_S, "fcvt.lu.s", GPR, FPR32>;
+defm : FPUnaryOpDynFrmAlias_m<FCVT_L_S, "fcvt.l.s", XFIN64X>;
-def FCVT_S_L : FPUnaryOp_r_frm<0b1101000, 0b00010, FPR32, GPR, "fcvt.s.l">,
- Sched<[WriteFCvtI64ToF32, ReadFCvtI64ToF32]>;
-def : FPUnaryOpDynFrmAlias<FCVT_S_L, "fcvt.s.l", FPR32, GPR>;
+defm FCVT_LU_S : FPUnaryOp_r_frm_m<0b1100000, 0b00011, XFIN64X, "fcvt.lu.s">,
+ Sched<[WriteFCvtF32ToI64, ReadFCvtF32ToI64]>;
+defm : FPUnaryOpDynFrmAlias_m<FCVT_LU_S, "fcvt.lu.s", XFIN64X>;
-def FCVT_S_LU : FPUnaryOp_r_frm<0b1101000, 0b00011, FPR32, GPR, "fcvt.s.lu">,
+defm FCVT_S_L : FPUnaryOp_r_frm_m<0b1101000, 0b00010, FXIN64X, "fcvt.s.l">,
Sched<[WriteFCvtI64ToF32, ReadFCvtI64ToF32]>;
-def : FPUnaryOpDynFrmAlias<FCVT_S_LU, "fcvt.s.lu", FPR32, GPR>;
-} // Predicates = [HasStdExtF, IsRV64]
+defm : FPUnaryOpDynFrmAlias_m<FCVT_S_L, "fcvt.s.l", FXIN64X>;
+
+defm FCVT_S_LU : FPUnaryOp_r_frm_m<0b1101000, 0b00011, FXIN64X, "fcvt.s.lu">,
+ Sched<[WriteFCvtI64ToF32, ReadFCvtI64ToF32]>;
+defm : FPUnaryOpDynFrmAlias_m<FCVT_S_LU, "fcvt.s.lu", FXIN64X>;
//===----------------------------------------------------------------------===//
// Assembler Pseudo Instructions (User-Level ISA, Version 2.2, Chapter 20)
@@ -315,6 +452,16 @@ def PseudoQuietFLT_S : PseudoQuietFCMP<FPR32>;
}
} // Predicates = [HasStdExtF]
+let Predicates = [HasStdExtZfinx] in {
+def : InstAlias<"fabs.s $rd, $rs", (FSGNJX_S_INX FPR32INX:$rd, FPR32INX:$rs, FPR32INX:$rs)>;
+def : InstAlias<"fneg.s $rd, $rs", (FSGNJN_S_INX FPR32INX:$rd, FPR32INX:$rs, FPR32INX:$rs)>;
+
+def : InstAlias<"fgt.s $rd, $rs, $rt",
+ (FLT_S_INX GPR:$rd, FPR32INX:$rt, FPR32INX:$rs), 0>;
+def : InstAlias<"fge.s $rd, $rs, $rt",
+ (FLE_S_INX GPR:$rd, FPR32INX:$rt, FPR32INX:$rs), 0>;
+} // Predicates = [HasStdExtZfinx]
+
//===----------------------------------------------------------------------===//
// Pseudo-instructions and codegen patterns
//===----------------------------------------------------------------------===//
@@ -327,11 +474,13 @@ def fpimmneg0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(-0.0); }]>;
class PatSetCC<RegisterClass Ty, SDPatternOperator OpNode, CondCode Cond, RVInst Inst>
: Pat<(OpNode Ty:$rs1, Ty:$rs2, Cond), (Inst $rs1, $rs2)>;
-class PatFpr32Fpr32<SDPatternOperator OpNode, RVInstR Inst>
- : Pat<(OpNode FPR32:$rs1, FPR32:$rs2), (Inst $rs1, $rs2)>;
+class PatFprFpr<SDPatternOperator OpNode, RVInstR Inst,
+ RegisterClass RegTy>
+ : Pat<(OpNode RegTy:$rs1, RegTy:$rs2), (Inst $rs1, $rs2)>;
-class PatFpr32Fpr32DynFrm<SDPatternOperator OpNode, RVInstRFrm Inst>
- : Pat<(OpNode FPR32:$rs1, FPR32:$rs2), (Inst $rs1, $rs2, 0b111)>;
+class PatFprFprDynFrm<SDPatternOperator OpNode, RVInstRFrm Inst,
+ RegisterClass RegTy>
+ : Pat<(OpNode RegTy:$rs1, RegTy:$rs2), (Inst $rs1, $rs2, 0b111)>;
let Predicates = [HasStdExtF] in {
@@ -346,17 +495,17 @@ def : Pat<(f32 (fpimmneg0)), (FSGNJN_S (FMV_W_X X0), (FMV_W_X X0))>;
/// Float arithmetic operations
-def : PatFpr32Fpr32DynFrm<any_fadd, FADD_S>;
-def : PatFpr32Fpr32DynFrm<any_fsub, FSUB_S>;
-def : PatFpr32Fpr32DynFrm<any_fmul, FMUL_S>;
-def : PatFpr32Fpr32DynFrm<any_fdiv, FDIV_S>;
+def : PatFprFprDynFrm<any_fadd, FADD_S, FPR32>;
+def : PatFprFprDynFrm<any_fsub, FSUB_S, FPR32>;
+def : PatFprFprDynFrm<any_fmul, FMUL_S, FPR32>;
+def : PatFprFprDynFrm<any_fdiv, FDIV_S, FPR32>;
def : Pat<(any_fsqrt FPR32:$rs1), (FSQRT_S FPR32:$rs1, 0b111)>;
def : Pat<(fneg FPR32:$rs1), (FSGNJN_S $rs1, $rs1)>;
def : Pat<(fabs FPR32:$rs1), (FSGNJX_S $rs1, $rs1)>;
-def : PatFpr32Fpr32<fcopysign, FSGNJ_S>;
+def : PatFprFpr<fcopysign, FSGNJ_S, FPR32>;
def : Pat<(fcopysign FPR32:$rs1, (fneg FPR32:$rs2)), (FSGNJN_S $rs1, $rs2)>;
// fmadd: rs1 * rs2 + rs3
@@ -375,11 +524,15 @@ def : Pat<(any_fma (fneg FPR32:$rs1), FPR32:$rs2, FPR32:$rs3),
def : Pat<(any_fma (fneg FPR32:$rs1), FPR32:$rs2, (fneg FPR32:$rs3)),
(FNMADD_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>;
+// fnmadd: -(rs1 * rs2 + rs3) (the nsz flag on the FMA)
+def : Pat<(fneg (any_fma_nsz FPR32:$rs1, FPR32:$rs2, FPR32:$rs3)),
+ (FNMADD_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>;
+
// The ratified 20191213 ISA spec defines fmin and fmax in a way that matches
// LLVM's fminnum and fmaxnum
// <https://github.com/riscv/riscv-isa-manual/commit/cd20cee7efd9bac7c5aa127ec3b451749d2b3cce>.
-def : PatFpr32Fpr32<fminnum, FMIN_S>;
-def : PatFpr32Fpr32<fmaxnum, FMAX_S>;
+def : PatFprFpr<fminnum, FMIN_S, FPR32>;
+def : PatFprFpr<fmaxnum, FMAX_S, FPR32>;
/// Setcc
// FIXME: SETEQ/SETLT/SETLE imply nonans, can we pick better instructions for
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
index b62e23d3b0fa..72ba8460116f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
@@ -25,13 +25,13 @@ def riscv_remuw : SDNode<"RISCVISD::REMUW", SDT_RISCVIntBinOpW>;
//===----------------------------------------------------------------------===//
let Predicates = [HasStdExtM] in {
-def MUL : ALU_rr<0b0000001, 0b000, "mul">,
+def MUL : ALU_rr<0b0000001, 0b000, "mul", /*Commutable*/1>,
Sched<[WriteIMul, ReadIMul, ReadIMul]>;
-def MULH : ALU_rr<0b0000001, 0b001, "mulh">,
+def MULH : ALU_rr<0b0000001, 0b001, "mulh", /*Commutable*/1>,
Sched<[WriteIMul, ReadIMul, ReadIMul]>;
def MULHSU : ALU_rr<0b0000001, 0b010, "mulhsu">,
Sched<[WriteIMul, ReadIMul, ReadIMul]>;
-def MULHU : ALU_rr<0b0000001, 0b011, "mulhu">,
+def MULHU : ALU_rr<0b0000001, 0b011, "mulhu", /*Commutable*/1>,
Sched<[WriteIMul, ReadIMul, ReadIMul]>;
def DIV : ALU_rr<0b0000001, 0b100, "div">,
Sched<[WriteIDiv, ReadIDiv, ReadIDiv]>;
@@ -44,7 +44,7 @@ def REMU : ALU_rr<0b0000001, 0b111, "remu">,
} // Predicates = [HasStdExtM]
let Predicates = [HasStdExtM, IsRV64] in {
-def MULW : ALUW_rr<0b0000001, 0b000, "mulw">,
+def MULW : ALUW_rr<0b0000001, 0b000, "mulw", /*Commutable*/1>,
Sched<[WriteIMul32, ReadIMul32, ReadIMul32]>;
def DIVW : ALUW_rr<0b0000001, 0b100, "divw">,
Sched<[WriteIDiv32, ReadIDiv32, ReadIDiv32]>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
index 306024a3e4fd..f8bc241039f8 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
@@ -7,9 +7,7 @@
//===----------------------------------------------------------------------===//
///
/// This file describes the RISC-V instructions from the standard 'V' Vector
-/// extension, version 0.10.
-/// This version is still experimental as the 'V' extension hasn't been
-/// ratified yet.
+/// extension, version 1.0.
///
//===----------------------------------------------------------------------===//
@@ -895,6 +893,7 @@ defm VSUB_V : VALU_IV_V_X<"vsub", 0b000010>;
defm VRSUB_V : VALU_IV_X_I<"vrsub", 0b000011>;
def : InstAlias<"vneg.v $vd, $vs$vm", (VRSUB_VX VR:$vd, VR:$vs, X0, VMaskOp:$vm)>;
+def : InstAlias<"vneg.v $vd, $vs", (VRSUB_VX VR:$vd, VR:$vs, X0, zero_reg)>;
// Vector Widening Integer Add/Subtract
// Refer to 11.2 Widening Vector Arithmetic Instructions
@@ -922,8 +921,12 @@ defm VWSUB_W : VALU_MV_V_X<"vwsub", 0b110111, "w">;
def : InstAlias<"vwcvt.x.x.v $vd, $vs$vm",
(VWADD_VX VR:$vd, VR:$vs, X0, VMaskOp:$vm)>;
+def : InstAlias<"vwcvt.x.x.v $vd, $vs",
+ (VWADD_VX VR:$vd, VR:$vs, X0, zero_reg)>;
def : InstAlias<"vwcvtu.x.x.v $vd, $vs$vm",
(VWADDU_VX VR:$vd, VR:$vs, X0, VMaskOp:$vm)>;
+def : InstAlias<"vwcvtu.x.x.v $vd, $vs",
+ (VWADDU_VX VR:$vd, VR:$vs, X0, zero_reg)>;
// Vector Integer Extension
defm VZEXT_VF8 : VALU_MV_VS2<"vzext.vf8", 0b010010, 0b00010>;
@@ -952,6 +955,8 @@ defm VXOR_V : VALU_IV_V_X_I<"vxor", 0b001011>;
def : InstAlias<"vnot.v $vd, $vs$vm",
(VXOR_VI VR:$vd, VR:$vs, -1, VMaskOp:$vm)>;
+def : InstAlias<"vnot.v $vd, $vs",
+ (VXOR_VI VR:$vd, VR:$vs, -1, zero_reg)>;
// Vector Single-Width Bit Shift Instructions
defm VSLL_V : VSHT_IV_V_X_I<"vsll", 0b100101, uimm5>;
@@ -970,6 +975,8 @@ defm VNSRA_W : VNSHT_IV_V_X_I<"vnsra", 0b101101, uimm5, "w">;
def : InstAlias<"vncvt.x.x.w $vd, $vs$vm",
(VNSRL_WX VR:$vd, VR:$vs, X0, VMaskOp:$vm)>;
+def : InstAlias<"vncvt.x.x.w $vd, $vs",
+ (VNSRL_WX VR:$vd, VR:$vs, X0, zero_reg)>;
// Vector Integer Comparison Instructions
let RVVConstraint = NoConstraint in {
@@ -1124,12 +1131,16 @@ defm VNCLIP_W : VNCLP_IV_V_X_I<"vnclip", 0b101111, uimm5, "w">;
let Predicates = [HasVInstructionsAnyF] in {
// Vector Single-Width Floating-Point Add/Subtract Instructions
+let Uses = [FRM], mayRaiseFPException = true in {
defm VFADD_V : VALU_FV_V_F<"vfadd", 0b000000>;
defm VFSUB_V : VALU_FV_V_F<"vfsub", 0b000010>;
defm VFRSUB_V : VALU_FV_F<"vfrsub", 0b100111>;
+}
// Vector Widening Floating-Point Add/Subtract Instructions
-let Constraints = "@earlyclobber $vd" in {
+let Constraints = "@earlyclobber $vd",
+ Uses = [FRM],
+ mayRaiseFPException = true in {
let RVVConstraint = WidenV in {
defm VFWADD_V : VWALU_FV_V_F<"vfwadd", 0b110000>;
defm VFWSUB_V : VWALU_FV_V_F<"vfwsub", 0b110010>;
@@ -1142,19 +1153,23 @@ let RVVConstraint = WidenW in {
defm VFWADD_W : VWALU_FV_V_F<"vfwadd", 0b110100, "w">;
defm VFWSUB_W : VWALU_FV_V_F<"vfwsub", 0b110110, "w">;
} // RVVConstraint = WidenW
-} // Constraints = "@earlyclobber $vd"
+} // Constraints = "@earlyclobber $vd", Uses = [FRM], mayRaiseFPException = true
// Vector Single-Width Floating-Point Multiply/Divide Instructions
+let Uses = [FRM], mayRaiseFPException = true in {
defm VFMUL_V : VMUL_FV_V_F<"vfmul", 0b100100>;
defm VFDIV_V : VDIV_FV_V_F<"vfdiv", 0b100000>;
defm VFRDIV_V : VRDIV_FV_F<"vfrdiv", 0b100001>;
+}
// Vector Widening Floating-Point Multiply
-let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in {
+let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV,
+ Uses = [FRM], mayRaiseFPException = true in {
defm VFWMUL_V : VWMUL_FV_V_F<"vfwmul", 0b111000>;
-} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV
+} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV, Uses = [FRM], mayRaiseFPException = true
// Vector Single-Width Floating-Point Fused Multiply-Add Instructions
+let Uses = [FRM], mayRaiseFPException = true in {
defm VFMACC_V : VMAC_FV_V_F<"vfmacc", 0b101100>;
defm VFNMACC_V : VMAC_FV_V_F<"vfnmacc", 0b101101>;
defm VFMSAC_V : VMAC_FV_V_F<"vfmsac", 0b101110>;
@@ -1163,23 +1178,31 @@ defm VFMADD_V : VMAC_FV_V_F<"vfmadd", 0b101000>;
defm VFNMADD_V : VMAC_FV_V_F<"vfnmadd", 0b101001>;
defm VFMSUB_V : VMAC_FV_V_F<"vfmsub", 0b101010>;
defm VFNMSUB_V : VMAC_FV_V_F<"vfnmsub", 0b101011>;
+}
// Vector Widening Floating-Point Fused Multiply-Add Instructions
-let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in {
+let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV,
+ Uses = [FRM], mayRaiseFPException = true in {
defm VFWMACC_V : VWMAC_FV_V_F<"vfwmacc", 0b111100>;
defm VFWNMACC_V : VWMAC_FV_V_F<"vfwnmacc", 0b111101>;
defm VFWMSAC_V : VWMAC_FV_V_F<"vfwmsac", 0b111110>;
defm VFWNMSAC_V : VWMAC_FV_V_F<"vfwnmsac", 0b111111>;
-} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV
+} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV, Uses = [FRM], mayRaiseFPException = true
// Vector Floating-Point Square-Root Instruction
+let Uses = [FRM], mayRaiseFPException = true in {
defm VFSQRT_V : VSQR_FV_VS2<"vfsqrt.v", 0b010011, 0b00000>;
-defm VFRSQRT7_V : VRCP_FV_VS2<"vfrsqrt7.v", 0b010011, 0b00100>;
defm VFREC7_V : VRCP_FV_VS2<"vfrec7.v", 0b010011, 0b00101>;
+}
+
+let mayRaiseFPException = true in
+defm VFRSQRT7_V : VRCP_FV_VS2<"vfrsqrt7.v", 0b010011, 0b00100>;
// Vector Floating-Point MIN/MAX Instructions
+let mayRaiseFPException = true in {
defm VFMIN_V : VCMP_FV_V_F<"vfmin", 0b000100>;
defm VFMAX_V : VCMP_FV_V_F<"vfmax", 0b000110>;
+}
// Vector Floating-Point Sign-Injection Instructions
defm VFSGNJ_V : VSGNJ_FV_V_F<"vfsgnj", 0b001000>;
@@ -1188,18 +1211,22 @@ defm VFSGNJX_V : VSGNJ_FV_V_F<"vfsgnjx", 0b001010>;
def : InstAlias<"vfneg.v $vd, $vs$vm",
(VFSGNJN_VV VR:$vd, VR:$vs, VR:$vs, VMaskOp:$vm)>;
+def : InstAlias<"vfneg.v $vd, $vs",
+ (VFSGNJN_VV VR:$vd, VR:$vs, VR:$vs, zero_reg)>;
def : InstAlias<"vfabs.v $vd, $vs$vm",
(VFSGNJX_VV VR:$vd, VR:$vs, VR:$vs, VMaskOp:$vm)>;
+def : InstAlias<"vfabs.v $vd, $vs",
+ (VFSGNJX_VV VR:$vd, VR:$vs, VR:$vs, zero_reg)>;
// Vector Floating-Point Compare Instructions
-let RVVConstraint = NoConstraint in {
+let RVVConstraint = NoConstraint, mayRaiseFPException = true in {
defm VMFEQ_V : VCMP_FV_V_F<"vmfeq", 0b011000>;
defm VMFNE_V : VCMP_FV_V_F<"vmfne", 0b011100>;
defm VMFLT_V : VCMP_FV_V_F<"vmflt", 0b011011>;
defm VMFLE_V : VCMP_FV_V_F<"vmfle", 0b011001>;
defm VMFGT_V : VCMP_FV_F<"vmfgt", 0b011101>;
defm VMFGE_V : VCMP_FV_F<"vmfge", 0b011111>;
-} // RVVConstraint = NoConstraint
+} // RVVConstraint = NoConstraint, mayRaiseFPException = true
def : InstAlias<"vmfgt.vv $vd, $va, $vb$vm",
(VMFLT_VV VR:$vd, VR:$vb, VR:$va, VMaskOp:$vm), 0>;
@@ -1288,10 +1315,14 @@ defm VWREDSUM : VWRED_IV_V<"vwredsum", 0b110001>;
let Predicates = [HasVInstructionsAnyF] in {
// Vector Single-Width Floating-Point Reduction Instructions
let RVVConstraint = NoConstraint in {
+let Uses = [FRM], mayRaiseFPException = true in {
defm VFREDOSUM : VREDO_FV_V<"vfredosum", 0b000011>;
defm VFREDUSUM : VRED_FV_V<"vfredusum", 0b000001>;
+}
+let mayRaiseFPException = true in {
defm VFREDMAX : VRED_FV_V<"vfredmax", 0b000111>;
defm VFREDMIN : VRED_FV_V<"vfredmin", 0b000101>;
+}
} // RVVConstraint = NoConstraint
def : InstAlias<"vfredsum.vs $vd, $vs2, $vs1$vm",
@@ -1303,8 +1334,10 @@ let Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint in {
// This has the downside that the earlyclobber constraint is too coarse and
// will impose unnecessary restrictions by not allowing the destination to
// overlap with the first (wide) operand.
+let Uses = [FRM], mayRaiseFPException = true in {
defm VFWREDOSUM : VWREDO_FV_V<"vfwredosum", 0b110011>;
defm VFWREDUSUM : VWRED_FV_V<"vfwredusum", 0b110001>;
+}
} // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint
def : InstAlias<"vfwredsum.vs $vd, $vs2, $vs1$vm",
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 9087ed50f9fc..fbe396d278b4 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -7,8 +7,7 @@
//===----------------------------------------------------------------------===//
///
/// This file contains the required infrastructure to support code generation
-/// for the standard 'V' (Vector) extension, version 0.10. This version is still
-/// experimental as the 'V' extension hasn't been ratified yet.
+/// for the standard 'V' (Vector) extension, version 1.0.
///
/// This file is included from RISCVInstrInfoV.td
///
@@ -40,13 +39,37 @@ def DecImm : SDNodeXForm<imm, [{
N->getValueType(0));
}]>;
-defvar TAIL_UNDISTURBED = 0;
+defvar TAIL_UNDISTURBED_MASK_UNDISTURBED = 0;
defvar TAIL_AGNOSTIC = 1;
//===----------------------------------------------------------------------===//
// Utilities.
//===----------------------------------------------------------------------===//
+class PseudoToVInst<string PseudoInst> {
+ string VInst = !subst("_M8", "",
+ !subst("_M4", "",
+ !subst("_M2", "",
+ !subst("_M1", "",
+ !subst("_MF2", "",
+ !subst("_MF4", "",
+ !subst("_MF8", "",
+ !subst("_B1", "",
+ !subst("_B2", "",
+ !subst("_B4", "",
+ !subst("_B8", "",
+ !subst("_B16", "",
+ !subst("_B32", "",
+ !subst("_B64", "",
+ !subst("_MASK", "",
+ !subst("_TIED", "",
+ !subst("_TU", "",
+ !subst("F16", "F",
+ !subst("F32", "F",
+ !subst("F64", "F",
+ !subst("Pseudo", "", PseudoInst)))))))))))))))))))));
+}
+
// This class describes information associated to the LMUL.
class LMULInfo<int lmul, int oct, VReg regclass, VReg wregclass,
VReg f2regclass, VReg f4regclass, VReg f8regclass, string mx> {
@@ -403,7 +426,7 @@ class CONST8b<bits<8> val> {
def InvalidIndex : CONST8b<0x80>;
class RISCVVPseudo {
Pseudo Pseudo = !cast<Pseudo>(NAME); // Used as a key.
- Instruction BaseInstr;
+ Instruction BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
// The actual table.
@@ -419,11 +442,26 @@ def RISCVVPseudosTable : GenericTable {
def RISCVVIntrinsicsTable : GenericTable {
let FilterClass = "RISCVVIntrinsic";
let CppTypeName = "RISCVVIntrinsicInfo";
- let Fields = ["IntrinsicID", "SplatOperand", "VLOperand"];
+ let Fields = ["IntrinsicID", "ScalarOperand", "VLOperand"];
let PrimaryKey = ["IntrinsicID"];
let PrimaryKeyName = "getRISCVVIntrinsicInfo";
}
+class RISCVMaskedPseudo<bits<4> MaskIdx, bit HasTU = true> {
+ Pseudo MaskedPseudo = !cast<Pseudo>(NAME);
+ Pseudo UnmaskedPseudo = !cast<Pseudo>(!subst("_MASK", "", NAME));
+ Pseudo UnmaskedTUPseudo = !if(HasTU, !cast<Pseudo>(!subst("_MASK", "", NAME # "_TU")), MaskedPseudo);
+ bits<4> MaskOpIdx = MaskIdx;
+}
+
+def RISCVMaskedPseudosTable : GenericTable {
+ let FilterClass = "RISCVMaskedPseudo";
+ let CppTypeName = "RISCVMaskedPseudoInfo";
+ let Fields = ["MaskedPseudo", "UnmaskedPseudo", "UnmaskedTUPseudo", "MaskOpIdx"];
+ let PrimaryKey = ["MaskedPseudo"];
+ let PrimaryKeyName = "getMaskedPseudoInfo";
+}
+
class RISCVVLE<bit M, bit TU, bit Str, bit F, bits<3> S, bits<3> L> {
bits<1> Masked = M;
bits<1> IsTU = TU;
@@ -489,9 +527,10 @@ def RISCVVSXTable : RISCVVLX_VSXTable {
let PrimaryKeyName = "getVSXPseudo";
}
-class RISCVVLSEG<bits<4> N, bit M, bit Str, bit F, bits<3> S, bits<3> L> {
+class RISCVVLSEG<bits<4> N, bit M, bit TU, bit Str, bit F, bits<3> S, bits<3> L> {
bits<4> NF = N;
bits<1> Masked = M;
+ bits<1> IsTU = TU;
bits<1> Strided = Str;
bits<1> FF = F;
bits<3> Log2SEW = S;
@@ -502,14 +541,15 @@ class RISCVVLSEG<bits<4> N, bit M, bit Str, bit F, bits<3> S, bits<3> L> {
def RISCVVLSEGTable : GenericTable {
let FilterClass = "RISCVVLSEG";
let CppTypeName = "VLSEGPseudo";
- let Fields = ["NF", "Masked", "Strided", "FF", "Log2SEW", "LMUL", "Pseudo"];
- let PrimaryKey = ["NF", "Masked", "Strided", "FF", "Log2SEW", "LMUL"];
+ let Fields = ["NF", "Masked", "IsTU", "Strided", "FF", "Log2SEW", "LMUL", "Pseudo"];
+ let PrimaryKey = ["NF", "Masked", "IsTU", "Strided", "FF", "Log2SEW", "LMUL"];
let PrimaryKeyName = "getVLSEGPseudo";
}
-class RISCVVLXSEG<bits<4> N, bit M, bit O, bits<3> S, bits<3> L, bits<3> IL> {
+class RISCVVLXSEG<bits<4> N, bit M, bit TU, bit O, bits<3> S, bits<3> L, bits<3> IL> {
bits<4> NF = N;
bits<1> Masked = M;
+ bits<1> IsTU = TU;
bits<1> Ordered = O;
bits<3> Log2SEW = S;
bits<3> LMUL = L;
@@ -520,8 +560,8 @@ class RISCVVLXSEG<bits<4> N, bit M, bit O, bits<3> S, bits<3> L, bits<3> IL> {
def RISCVVLXSEGTable : GenericTable {
let FilterClass = "RISCVVLXSEG";
let CppTypeName = "VLXSEGPseudo";
- let Fields = ["NF", "Masked", "Ordered", "Log2SEW", "LMUL", "IndexLMUL", "Pseudo"];
- let PrimaryKey = ["NF", "Masked", "Ordered", "Log2SEW", "LMUL", "IndexLMUL"];
+ let Fields = ["NF", "Masked", "IsTU", "Ordered", "Log2SEW", "LMUL", "IndexLMUL", "Pseudo"];
+ let PrimaryKey = ["NF", "Masked", "IsTU", "Ordered", "Log2SEW", "LMUL", "IndexLMUL"];
let PrimaryKeyName = "getVLXSEGPseudo";
}
@@ -564,30 +604,6 @@ def RISCVVSXSEGTable : GenericTable {
// Helpers to define the different pseudo instructions.
//===----------------------------------------------------------------------===//
-class PseudoToVInst<string PseudoInst> {
- string VInst = !subst("_M8", "",
- !subst("_M4", "",
- !subst("_M2", "",
- !subst("_M1", "",
- !subst("_MF2", "",
- !subst("_MF4", "",
- !subst("_MF8", "",
- !subst("_B1", "",
- !subst("_B2", "",
- !subst("_B4", "",
- !subst("_B8", "",
- !subst("_B16", "",
- !subst("_B32", "",
- !subst("_B64", "",
- !subst("_MASK", "",
- !subst("_TIED", "",
- !subst("_TU", "",
- !subst("F16", "F",
- !subst("F32", "F",
- !subst("F64", "F",
- !subst("Pseudo", "", PseudoInst)))))))))))))))))))));
-}
-
// The destination vector register group for a masked vector instruction cannot
// overlap the source mask register (v0), unless the destination vector register
// is being written with a mask value (e.g., comparisons) or the scalar result
@@ -627,25 +643,24 @@ class VPseudo<Instruction instr, LMULInfo m, dag outs, dag ins> :
let VLMul = m.value;
}
-class VPseudoUSLoadNoMask<VReg RetClass, int EEW, bit isFF> :
+class VPseudoUSLoadNoMask<VReg RetClass, int EEW, bit DummyMask = 1> :
Pseudo<(outs RetClass:$rd),
(ins GPR:$rs1, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
- RISCVVLE</*Masked*/0, /*TU*/0, /*Strided*/0, /*FF*/isFF, log2<EEW>.val, VLMul> {
+ RISCVVLE</*Masked*/0, /*TU*/0, /*Strided*/0, /*FF*/0, log2<EEW>.val, VLMul> {
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasDummyMask = 1;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+ let HasDummyMask = DummyMask;
}
-class VPseudoUSLoadNoMaskTU<VReg RetClass, int EEW, bit isFF> :
+class VPseudoUSLoadNoMaskTU<VReg RetClass, int EEW> :
Pseudo<(outs RetClass:$rd),
(ins RetClass:$dest, GPR:$rs1, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
- RISCVVLE</*Masked*/0, /*TU*/1, /*Strided*/0, /*FF*/isFF, log2<EEW>.val, VLMul> {
+ RISCVVLE</*Masked*/0, /*TU*/1, /*Strided*/0, /*FF*/0, log2<EEW>.val, VLMul> {
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
@@ -654,16 +669,15 @@ class VPseudoUSLoadNoMaskTU<VReg RetClass, int EEW, bit isFF> :
let HasDummyMask = 1;
let HasMergeOp = 1;
let Constraints = "$rd = $dest";
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
-class VPseudoUSLoadMask<VReg RetClass, int EEW, bit isFF> :
+class VPseudoUSLoadMask<VReg RetClass, int EEW> :
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$merge,
GPR:$rs1,
VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>,
RISCVVPseudo,
- RISCVVLE</*Masked*/1, /*TU*/1, /*Strided*/0, /*FF*/isFF, log2<EEW>.val, VLMul> {
+ RISCVVLE</*Masked*/1, /*TU*/1, /*Strided*/0, /*FF*/0, log2<EEW>.val, VLMul> {
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
@@ -672,7 +686,53 @@ class VPseudoUSLoadMask<VReg RetClass, int EEW, bit isFF> :
let HasSEWOp = 1;
let HasMergeOp = 1;
let HasVecPolicyOp = 1;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+ let UsesMaskPolicy = 1;
+}
+
+class VPseudoUSLoadFFNoMask<VReg RetClass, int EEW, bit DummyMask = 1> :
+ Pseudo<(outs RetClass:$rd, GPR:$vl),
+ (ins GPR:$rs1, AVL:$avl, ixlenimm:$sew),[]>,
+ RISCVVPseudo,
+ RISCVVLE</*Masked*/0, /*TU*/0, /*Strided*/0, /*FF*/1, log2<EEW>.val, VLMul> {
+ let mayLoad = 1;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasDummyMask = DummyMask;
+}
+
+class VPseudoUSLoadFFNoMaskTU<VReg RetClass, int EEW> :
+ Pseudo<(outs RetClass:$rd, GPR:$vl),
+ (ins RetClass:$dest, GPR:$rs1, AVL:$avl, ixlenimm:$sew),[]>,
+ RISCVVPseudo,
+ RISCVVLE</*Masked*/0, /*TU*/1, /*Strided*/0, /*FF*/1, log2<EEW>.val, VLMul> {
+ let mayLoad = 1;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasDummyMask = 1;
+ let HasMergeOp = 1;
+ let Constraints = "$rd = $dest";
+}
+
+class VPseudoUSLoadFFMask<VReg RetClass, int EEW> :
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd, GPR:$vl),
+ (ins GetVRegNoV0<RetClass>.R:$merge,
+ GPR:$rs1,
+ VMaskOp:$vm, AVL:$avl, ixlenimm:$sew, ixlenimm:$policy),[]>,
+ RISCVVPseudo,
+ RISCVVLE</*Masked*/1, /*TU*/1, /*Strided*/0, /*FF*/1, log2<EEW>.val, VLMul> {
+ let mayLoad = 1;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let Constraints = "$rd = $merge";
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasMergeOp = 1;
+ let HasVecPolicyOp = 1;
+ let UsesMaskPolicy = 1;
}
class VPseudoSLoadNoMask<VReg RetClass, int EEW>:
@@ -686,7 +746,6 @@ class VPseudoSLoadNoMask<VReg RetClass, int EEW>:
let HasVLOp = 1;
let HasSEWOp = 1;
let HasDummyMask = 1;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
class VPseudoSLoadNoMaskTU<VReg RetClass, int EEW>:
@@ -702,7 +761,6 @@ class VPseudoSLoadNoMaskTU<VReg RetClass, int EEW>:
let HasDummyMask = 1;
let HasMergeOp = 1;
let Constraints = "$rd = $dest";
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
class VPseudoSLoadMask<VReg RetClass, int EEW>:
@@ -720,7 +778,7 @@ class VPseudoSLoadMask<VReg RetClass, int EEW>:
let HasSEWOp = 1;
let HasMergeOp = 1;
let HasVecPolicyOp = 1;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+ let UsesMaskPolicy = 1;
}
class VPseudoILoadNoMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
@@ -737,7 +795,6 @@ class VPseudoILoadNoMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
let HasSEWOp = 1;
let HasDummyMask = 1;
let Constraints = !if(!eq(EarlyClobber, 1), "@earlyclobber $rd", "");
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
class VPseudoILoadNoMaskTU<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
@@ -755,7 +812,6 @@ class VPseudoILoadNoMaskTU<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
let HasDummyMask = 1;
let HasMergeOp = 1;
let Constraints = !if(!eq(EarlyClobber, 1), "@earlyclobber $rd, $rd = $dest", "$rd = $dest");
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
class VPseudoILoadMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
@@ -774,10 +830,10 @@ class VPseudoILoadMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
let HasSEWOp = 1;
let HasMergeOp = 1;
let HasVecPolicyOp = 1;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+ let UsesMaskPolicy = 1;
}
-class VPseudoUSStoreNoMask<VReg StClass, int EEW>:
+class VPseudoUSStoreNoMask<VReg StClass, int EEW, bit DummyMask = 1>:
Pseudo<(outs),
(ins StClass:$rd, GPR:$rs1, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
@@ -787,8 +843,7 @@ class VPseudoUSStoreNoMask<VReg StClass, int EEW>:
let hasSideEffects = 0;
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasDummyMask = 1;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+ let HasDummyMask = DummyMask;
}
class VPseudoUSStoreMask<VReg StClass, int EEW>:
@@ -801,7 +856,6 @@ class VPseudoUSStoreMask<VReg StClass, int EEW>:
let hasSideEffects = 0;
let HasVLOp = 1;
let HasSEWOp = 1;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
class VPseudoSStoreNoMask<VReg StClass, int EEW>:
@@ -815,7 +869,6 @@ class VPseudoSStoreNoMask<VReg StClass, int EEW>:
let HasVLOp = 1;
let HasSEWOp = 1;
let HasDummyMask = 1;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
class VPseudoSStoreMask<VReg StClass, int EEW>:
@@ -828,7 +881,6 @@ class VPseudoSStoreMask<VReg StClass, int EEW>:
let hasSideEffects = 0;
let HasVLOp = 1;
let HasSEWOp = 1;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
// Unary instruction that is never masked so HasDummyMask=0.
@@ -842,7 +894,20 @@ class VPseudoUnaryNoDummyMask<VReg RetClass,
let hasSideEffects = 0;
let HasVLOp = 1;
let HasSEWOp = 1;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+class VPseudoUnaryNoDummyMaskTU<VReg RetClass,
+ DAGOperand Op2Class> :
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$dest, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasMergeOp = 1;
+ let Constraints = "$rd = $dest";
}
class VPseudoNullaryNoMask<VReg RegClass>:
@@ -855,13 +920,26 @@ class VPseudoNullaryNoMask<VReg RegClass>:
let HasVLOp = 1;
let HasSEWOp = 1;
let HasDummyMask = 1;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+class VPseudoNullaryNoMaskTU<VReg RegClass>:
+ Pseudo<(outs RegClass:$rd),
+ (ins RegClass:$merge, AVL:$vl, ixlenimm:$sew),
+ []>, RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let Constraints = "$rd = $merge";
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasDummyMask = 1;
+ let HasMergeOp = 1;
}
class VPseudoNullaryMask<VReg RegClass>:
Pseudo<(outs GetVRegNoV0<RegClass>.R:$rd),
(ins GetVRegNoV0<RegClass>.R:$merge, VMaskOp:$vm, AVL:$vl,
- ixlenimm:$sew), []>, RISCVVPseudo {
+ ixlenimm:$sew, ixlenimm:$policy), []>, RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -869,7 +947,8 @@ class VPseudoNullaryMask<VReg RegClass>:
let HasVLOp = 1;
let HasSEWOp = 1;
let HasMergeOp = 1;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+ let UsesMaskPolicy = 1;
+ let HasVecPolicyOp = 1;
}
// Nullary for pseudo instructions. They are expanded in
@@ -899,7 +978,21 @@ class VPseudoUnaryNoMask<DAGOperand RetClass, VReg OpClass, string Constraint =
let HasVLOp = 1;
let HasSEWOp = 1;
let HasDummyMask = 1;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+// RetClass could be GPR or VReg.
+class VPseudoUnaryNoMaskTU<DAGOperand RetClass, VReg OpClass, string Constraint = ""> :
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$merge, OpClass:$rs2, AVL:$vl, ixlenimm:$sew), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret;
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasDummyMask = 1;
+ let HasMergeOp = 1;
}
class VPseudoUnaryMask<VReg RetClass, VReg OpClass, string Constraint = ""> :
@@ -914,7 +1007,7 @@ class VPseudoUnaryMask<VReg RetClass, VReg OpClass, string Constraint = ""> :
let HasVLOp = 1;
let HasSEWOp = 1;
let HasMergeOp = 1;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+ let UsesMaskPolicy = 1;
}
class VPseudoUnaryMaskTA<VReg RetClass, VReg OpClass, string Constraint = ""> :
@@ -930,7 +1023,7 @@ class VPseudoUnaryMaskTA<VReg RetClass, VReg OpClass, string Constraint = ""> :
let HasSEWOp = 1;
let HasMergeOp = 1;
let HasVecPolicyOp = 1;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+ let UsesMaskPolicy = 1;
}
// mask unary operation without maskedoff
@@ -943,7 +1036,6 @@ class VPseudoMaskUnarySOutMask:
let hasSideEffects = 0;
let HasVLOp = 1;
let HasSEWOp = 1;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
// Mask can be V0~V31
@@ -962,13 +1054,13 @@ class VPseudoUnaryAnyMask<VReg RetClass,
let HasVLOp = 1;
let HasSEWOp = 1;
let HasMergeOp = 1;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
class VPseudoBinaryNoMask<VReg RetClass,
VReg Op1Class,
DAGOperand Op2Class,
- string Constraint> :
+ string Constraint,
+ int DummyMask = 1> :
Pseudo<(outs RetClass:$rd),
(ins Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew), []>,
RISCVVPseudo {
@@ -978,8 +1070,24 @@ class VPseudoBinaryNoMask<VReg RetClass,
let Constraints = Constraint;
let HasVLOp = 1;
let HasSEWOp = 1;
+ let HasDummyMask = DummyMask;
+}
+
+class VPseudoBinaryNoMaskTU<VReg RetClass,
+ VReg Op1Class,
+ DAGOperand Op2Class,
+ string Constraint> :
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret;
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
let HasDummyMask = 1;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+ let HasMergeOp = 1;
}
// Special version of VPseudoBinaryNoMask where we pretend the first source is
@@ -989,7 +1097,8 @@ class VPseudoTiedBinaryNoMask<VReg RetClass,
DAGOperand Op2Class,
string Constraint> :
Pseudo<(outs RetClass:$rd),
- (ins RetClass:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew), []>,
+ (ins RetClass:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew,
+ ixlenimm:$policy), []>,
RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
@@ -998,9 +1107,8 @@ class VPseudoTiedBinaryNoMask<VReg RetClass,
let HasVLOp = 1;
let HasSEWOp = 1;
let HasDummyMask = 1;
- let ForceTailAgnostic = 1;
+ let HasVecPolicyOp = 1;
let isConvertibleToThreeAddress = 1;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
class VPseudoIStoreNoMask<VReg StClass, VReg IdxClass, int EEW, bits<3> LMUL,
@@ -1015,7 +1123,6 @@ class VPseudoIStoreNoMask<VReg StClass, VReg IdxClass, int EEW, bits<3> LMUL,
let HasVLOp = 1;
let HasSEWOp = 1;
let HasDummyMask = 1;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
class VPseudoIStoreMask<VReg StClass, VReg IdxClass, int EEW, bits<3> LMUL,
@@ -1029,7 +1136,6 @@ class VPseudoIStoreMask<VReg StClass, VReg IdxClass, int EEW, bits<3> LMUL,
let hasSideEffects = 0;
let HasVLOp = 1;
let HasSEWOp = 1;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
class VPseudoBinaryMask<VReg RetClass,
@@ -1048,13 +1154,12 @@ class VPseudoBinaryMask<VReg RetClass,
let HasVLOp = 1;
let HasSEWOp = 1;
let HasMergeOp = 1;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
-class VPseudoBinaryMaskTA<VReg RetClass,
- RegisterClass Op1Class,
- DAGOperand Op2Class,
- string Constraint> :
+class VPseudoBinaryMaskPolicy<VReg RetClass,
+ RegisterClass Op1Class,
+ DAGOperand Op2Class,
+ string Constraint> :
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$merge,
Op1Class:$rs2, Op2Class:$rs1,
@@ -1068,7 +1173,7 @@ class VPseudoBinaryMaskTA<VReg RetClass,
let HasSEWOp = 1;
let HasMergeOp = 1;
let HasVecPolicyOp = 1;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+ let UsesMaskPolicy = 1;
}
// Like VPseudoBinaryMask, but output can be V0.
@@ -1088,7 +1193,7 @@ class VPseudoBinaryMOutMask<VReg RetClass,
let HasVLOp = 1;
let HasSEWOp = 1;
let HasMergeOp = 1;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+ let UsesMaskPolicy = 1;
}
// Special version of VPseudoBinaryMask where we pretend the first source is
@@ -1110,7 +1215,7 @@ class VPseudoTiedBinaryMask<VReg RetClass,
let HasSEWOp = 1;
let HasMergeOp = 0; // Merge is also rs2.
let HasVecPolicyOp = 1;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+ let UsesMaskPolicy = 1;
}
class VPseudoBinaryCarryIn<VReg RetClass,
@@ -1132,7 +1237,6 @@ class VPseudoBinaryCarryIn<VReg RetClass,
let HasVLOp = 1;
let HasSEWOp = 1;
let HasMergeOp = 0;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
let VLMul = MInfo.value;
}
@@ -1156,7 +1260,6 @@ class VPseudoTiedBinaryCarryIn<VReg RetClass,
let HasSEWOp = 1;
let HasMergeOp = 1;
let HasVecPolicyOp = 0;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
let VLMul = MInfo.value;
}
@@ -1177,7 +1280,6 @@ class VPseudoTernaryNoMask<VReg RetClass,
let HasSEWOp = 1;
let HasMergeOp = 1;
let HasDummyMask = 1;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
class VPseudoTernaryNoMaskWithPolicy<VReg RetClass,
@@ -1198,29 +1300,87 @@ class VPseudoTernaryNoMaskWithPolicy<VReg RetClass,
let HasSEWOp = 1;
let HasMergeOp = 1;
let HasDummyMask = 1;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
-class VPseudoUSSegLoadNoMask<VReg RetClass, int EEW, bits<4> NF, bit isFF>:
+class VPseudoUSSegLoadNoMask<VReg RetClass, int EEW, bits<4> NF>:
Pseudo<(outs RetClass:$rd),
(ins GPR:$rs1, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
- RISCVVLSEG<NF, /*Masked*/0, /*Strided*/0, /*FF*/isFF, log2<EEW>.val, VLMul> {
+ RISCVVLSEG<NF, /*Masked*/0, /*TU*/0, /*Strided*/0, /*FF*/0, log2<EEW>.val, VLMul> {
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
let HasVLOp = 1;
let HasSEWOp = 1;
let HasDummyMask = 1;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
-class VPseudoUSSegLoadMask<VReg RetClass, int EEW, bits<4> NF, bit isFF>:
+class VPseudoUSSegLoadNoMaskTU<VReg RetClass, int EEW, bits<4> NF>:
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$dest, GPR:$rs1, AVL:$vl, ixlenimm:$sew),[]>,
+ RISCVVPseudo,
+ RISCVVLSEG<NF, /*Masked*/0, /*TU*/1, /*Strided*/0, /*FF*/0, log2<EEW>.val, VLMul> {
+ let mayLoad = 1;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasDummyMask = 1;
+ let HasMergeOp = 1;
+ let Constraints = "$rd = $dest";
+}
+
+class VPseudoUSSegLoadMask<VReg RetClass, int EEW, bits<4> NF>:
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$merge, GPR:$rs1,
VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>,
RISCVVPseudo,
- RISCVVLSEG<NF, /*Masked*/1, /*Strided*/0, /*FF*/isFF, log2<EEW>.val, VLMul> {
+ RISCVVLSEG<NF, /*Masked*/1, /*TU*/1, /*Strided*/0, /*FF*/0, log2<EEW>.val, VLMul> {
+ let mayLoad = 1;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let Constraints = "$rd = $merge";
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasMergeOp = 1;
+ let HasVecPolicyOp = 1;
+ let UsesMaskPolicy = 1;
+}
+
+class VPseudoUSSegLoadFFNoMask<VReg RetClass, int EEW, bits<4> NF>:
+ Pseudo<(outs RetClass:$rd, GPR:$vl),
+ (ins GPR:$rs1, AVL:$avl, ixlenimm:$sew),[]>,
+ RISCVVPseudo,
+ RISCVVLSEG<NF, /*Masked*/0, /*TU*/0, /*Strided*/0, /*FF*/1, log2<EEW>.val, VLMul> {
+ let mayLoad = 1;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasDummyMask = 1;
+}
+
+class VPseudoUSSegLoadFFNoMaskTU<VReg RetClass, int EEW, bits<4> NF>:
+ Pseudo<(outs RetClass:$rd, GPR:$vl),
+ (ins RetClass:$dest, GPR:$rs1, AVL:$avl, ixlenimm:$sew),[]>,
+ RISCVVPseudo,
+ RISCVVLSEG<NF, /*Masked*/0, /*TU*/1, /*Strided*/0, /*FF*/1, log2<EEW>.val, VLMul> {
+ let mayLoad = 1;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasDummyMask = 1;
+ let HasMergeOp = 1;
+ let Constraints = "$rd = $dest";
+}
+
+class VPseudoUSSegLoadFFMask<VReg RetClass, int EEW, bits<4> NF>:
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd, GPR:$vl),
+ (ins GetVRegNoV0<RetClass>.R:$merge, GPR:$rs1,
+ VMaskOp:$vm, AVL:$avl, ixlenimm:$sew, ixlenimm:$policy),[]>,
+ RISCVVPseudo,
+ RISCVVLSEG<NF, /*Masked*/1, /*TU*/1, /*Strided*/0, /*FF*/1, log2<EEW>.val, VLMul> {
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1229,14 +1389,14 @@ class VPseudoUSSegLoadMask<VReg RetClass, int EEW, bits<4> NF, bit isFF>:
let HasSEWOp = 1;
let HasMergeOp = 1;
let HasVecPolicyOp = 1;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+ let UsesMaskPolicy = 1;
}
class VPseudoSSegLoadNoMask<VReg RetClass, int EEW, bits<4> NF>:
Pseudo<(outs RetClass:$rd),
(ins GPR:$rs1, GPR:$offset, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
- RISCVVLSEG<NF, /*Masked*/0, /*Strided*/1, /*FF*/0, log2<EEW>.val, VLMul> {
+ RISCVVLSEG<NF, /*Masked*/0, /*TU*/0, /*Strided*/1, /*FF*/0, log2<EEW>.val, VLMul> {
let mayLoad = 1;
let mayLoad = 1;
let mayStore = 0;
@@ -1244,7 +1404,22 @@ class VPseudoSSegLoadNoMask<VReg RetClass, int EEW, bits<4> NF>:
let HasVLOp = 1;
let HasSEWOp = 1;
let HasDummyMask = 1;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+class VPseudoSSegLoadNoMaskTU<VReg RetClass, int EEW, bits<4> NF>:
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$merge, GPR:$rs1, GPR:$offset, AVL:$vl, ixlenimm:$sew),[]>,
+ RISCVVPseudo,
+ RISCVVLSEG<NF, /*Masked*/0, /*TU*/1, /*Strided*/1, /*FF*/0, log2<EEW>.val, VLMul> {
+ let mayLoad = 1;
+ let mayLoad = 1;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasDummyMask = 1;
+ let HasMergeOp = 1;
+ let Constraints = "$rd = $merge";
}
class VPseudoSSegLoadMask<VReg RetClass, int EEW, bits<4> NF>:
@@ -1253,7 +1428,7 @@ class VPseudoSSegLoadMask<VReg RetClass, int EEW, bits<4> NF>:
GPR:$offset, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew,
ixlenimm:$policy),[]>,
RISCVVPseudo,
- RISCVVLSEG<NF, /*Masked*/1, /*Strided*/1, /*FF*/0, log2<EEW>.val, VLMul> {
+ RISCVVLSEG<NF, /*Masked*/1, /*TU*/1, /*Strided*/1, /*FF*/0, log2<EEW>.val, VLMul> {
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1262,7 +1437,7 @@ class VPseudoSSegLoadMask<VReg RetClass, int EEW, bits<4> NF>:
let HasSEWOp = 1;
let HasMergeOp = 1;
let HasVecPolicyOp = 1;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+ let UsesMaskPolicy = 1;
}
class VPseudoISegLoadNoMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
@@ -1270,7 +1445,7 @@ class VPseudoISegLoadNoMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
Pseudo<(outs RetClass:$rd),
(ins GPR:$rs1, IdxClass:$offset, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
- RISCVVLXSEG<NF, /*Masked*/0, Ordered, log2<EEW>.val, VLMul, LMUL> {
+ RISCVVLXSEG<NF, /*Masked*/0, /*TU*/0, Ordered, log2<EEW>.val, VLMul, LMUL> {
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1280,7 +1455,24 @@ class VPseudoISegLoadNoMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
let HasVLOp = 1;
let HasSEWOp = 1;
let HasDummyMask = 1;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+class VPseudoISegLoadNoMaskTU<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
+ bits<4> NF, bit Ordered>:
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$merge, GPR:$rs1, IdxClass:$offset, AVL:$vl, ixlenimm:$sew),[]>,
+ RISCVVPseudo,
+ RISCVVLXSEG<NF, /*Masked*/0, /*TU*/1, Ordered, log2<EEW>.val, VLMul, LMUL> {
+ let mayLoad = 1;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ // For vector indexed segment loads, the destination vector register groups
+ // cannot overlap the source vector register group
+ let Constraints = "@earlyclobber $rd, $rd = $merge";
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasDummyMask = 1;
+ let HasMergeOp = 1;
}
class VPseudoISegLoadMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
@@ -1290,7 +1482,7 @@ class VPseudoISegLoadMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
IdxClass:$offset, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew,
ixlenimm:$policy),[]>,
RISCVVPseudo,
- RISCVVLXSEG<NF, /*Masked*/1, Ordered, log2<EEW>.val, VLMul, LMUL> {
+ RISCVVLXSEG<NF, /*Masked*/1, /*TU*/1, Ordered, log2<EEW>.val, VLMul, LMUL> {
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1301,7 +1493,7 @@ class VPseudoISegLoadMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
let HasSEWOp = 1;
let HasMergeOp = 1;
let HasVecPolicyOp = 1;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+ let UsesMaskPolicy = 1;
}
class VPseudoUSSegStoreNoMask<VReg ValClass, int EEW, bits<4> NF>:
@@ -1315,7 +1507,6 @@ class VPseudoUSSegStoreNoMask<VReg ValClass, int EEW, bits<4> NF>:
let HasVLOp = 1;
let HasSEWOp = 1;
let HasDummyMask = 1;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
class VPseudoUSSegStoreMask<VReg ValClass, int EEW, bits<4> NF>:
@@ -1329,7 +1520,6 @@ class VPseudoUSSegStoreMask<VReg ValClass, int EEW, bits<4> NF>:
let hasSideEffects = 0;
let HasVLOp = 1;
let HasSEWOp = 1;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
class VPseudoSSegStoreNoMask<VReg ValClass, int EEW, bits<4> NF>:
@@ -1343,7 +1533,6 @@ class VPseudoSSegStoreNoMask<VReg ValClass, int EEW, bits<4> NF>:
let HasVLOp = 1;
let HasSEWOp = 1;
let HasDummyMask = 1;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
class VPseudoSSegStoreMask<VReg ValClass, int EEW, bits<4> NF>:
@@ -1357,7 +1546,6 @@ class VPseudoSSegStoreMask<VReg ValClass, int EEW, bits<4> NF>:
let hasSideEffects = 0;
let HasVLOp = 1;
let HasSEWOp = 1;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
class VPseudoISegStoreNoMask<VReg ValClass, VReg IdxClass, int EEW, bits<3> LMUL,
@@ -1373,7 +1561,6 @@ class VPseudoISegStoreNoMask<VReg ValClass, VReg IdxClass, int EEW, bits<3> LMUL
let HasVLOp = 1;
let HasSEWOp = 1;
let HasDummyMask = 1;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
class VPseudoISegStoreMask<VReg ValClass, VReg IdxClass, int EEW, bits<3> LMUL,
@@ -1388,7 +1575,6 @@ class VPseudoISegStoreMask<VReg ValClass, VReg IdxClass, int EEW, bits<3> LMUL,
let hasSideEffects = 0;
let HasVLOp = 1;
let HasSEWOp = 1;
- let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
multiclass VPseudoUSLoad {
@@ -1398,13 +1584,13 @@ multiclass VPseudoUSLoad {
defvar vreg = lmul.vrclass;
let VLMul = lmul.value in {
def "E" # eew # "_V_" # LInfo :
- VPseudoUSLoadNoMask<vreg, eew, false>,
+ VPseudoUSLoadNoMask<vreg, eew>,
VLESched<eew>;
def "E" # eew # "_V_" # LInfo # "_TU":
- VPseudoUSLoadNoMaskTU<vreg, eew, false>,
+ VPseudoUSLoadNoMaskTU<vreg, eew>,
VLESched<eew>;
def "E" # eew # "_V_" # LInfo # "_MASK" :
- VPseudoUSLoadMask<vreg, eew, false>,
+ VPseudoUSLoadMask<vreg, eew>,
VLESched<eew>;
}
}
@@ -1417,14 +1603,14 @@ multiclass VPseudoFFLoad {
defvar LInfo = lmul.MX;
defvar vreg = lmul.vrclass;
let VLMul = lmul.value in {
- def "E" # eew # "FF_V_" # LInfo :
- VPseudoUSLoadNoMask<vreg, eew, true>,
+ def "E" # eew # "FF_V_" # LInfo:
+ VPseudoUSLoadFFNoMask<vreg, eew>,
VLFSched<eew>;
def "E" # eew # "FF_V_" # LInfo # "_TU":
- VPseudoUSLoadNoMaskTU<vreg, eew, true>,
+ VPseudoUSLoadFFNoMaskTU<vreg, eew>,
VLFSched<eew>;
- def "E" # eew # "FF_V_" # LInfo # "_MASK" :
- VPseudoUSLoadMask<vreg, eew, true>,
+ def "E" # eew # "FF_V_" # LInfo # "_MASK":
+ VPseudoUSLoadFFMask<vreg, eew>,
VLFSched<eew>;
}
}
@@ -1434,7 +1620,7 @@ multiclass VPseudoFFLoad {
multiclass VPseudoLoadMask {
foreach mti = AllMasks in {
let VLMul = mti.LMul.value in {
- def "_V_" # mti.BX : VPseudoUSLoadNoMask<VR, /*EEW*/1, /*isFF*/0>;
+ def "_V_" # mti.BX : VPseudoUSLoadNoMask<VR, /*EEW*/1, /*DummyMask*/0>;
}
}
}
@@ -1506,7 +1692,7 @@ multiclass VPseudoUSStore {
multiclass VPseudoStoreMask {
foreach mti = AllMasks in {
let VLMul = mti.LMul.value in {
- def "_V_" # mti.BX : VPseudoUSStoreNoMask<VR, /*EEW*/1>;
+ def "_V_" # mti.BX : VPseudoUSStoreNoMask<VR, /*EEW*/1, /*DummyMask*/0>;
}
}
}
@@ -1596,6 +1782,8 @@ multiclass VPseudoVID_V {
let VLMul = m.value in {
def "_V_" # m.MX : VPseudoNullaryNoMask<m.vrclass>,
Sched<[WriteVMIdxV, ReadVMask]>;
+ def "_V_" # m.MX # "_TU": VPseudoNullaryNoMaskTU<m.vrclass>,
+ Sched<[WriteVMIdxV, ReadVMask]>;
def "_V_" # m.MX # "_MASK" : VPseudoNullaryMask<m.vrclass>,
Sched<[WriteVMIdxV, ReadVMask]>;
}
@@ -1616,7 +1804,9 @@ multiclass VPseudoVIOT_M {
let VLMul = m.value in {
def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, VR, constraint>,
Sched<[WriteVMIotV, ReadVMIotV, ReadVMask]>;
- def "_" # m.MX # "_MASK" : VPseudoUnaryMask<m.vrclass, VR, constraint>,
+ def "_" # m.MX # "_TU" : VPseudoUnaryNoMaskTU<m.vrclass, VR, constraint>,
+ Sched<[WriteVMIotV, ReadVMIotV, ReadVMask]>;
+ def "_" # m.MX # "_MASK" : VPseudoUnaryMaskTA<m.vrclass, VR, constraint>,
Sched<[WriteVMIotV, ReadVMIotV, ReadVMask]>;
}
}
@@ -1638,8 +1828,11 @@ multiclass VPseudoBinary<VReg RetClass,
let VLMul = MInfo.value in {
def "_" # MInfo.MX : VPseudoBinaryNoMask<RetClass, Op1Class, Op2Class,
Constraint>;
- def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMaskTA<RetClass, Op1Class, Op2Class,
+ def "_" # MInfo.MX # "_TU" : VPseudoBinaryNoMaskTU<RetClass, Op1Class, Op2Class,
Constraint>;
+ def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMaskPolicy<RetClass, Op1Class, Op2Class,
+ Constraint>,
+ RISCVMaskedPseudo</*MaskOpIdx*/ 3>;
}
}
@@ -1653,7 +1846,8 @@ multiclass VPseudoBinaryM<VReg RetClass,
Constraint>;
let ForceTailAgnostic = true in
def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMOutMask<RetClass, Op1Class,
- Op2Class, Constraint>;
+ Op2Class, Constraint>,
+ RISCVMaskedPseudo</*MaskOpIdx*/ 3, /*HasTU*/ false>;
}
}
@@ -1666,8 +1860,11 @@ multiclass VPseudoBinaryEmul<VReg RetClass,
let VLMul = lmul.value in {
def "_" # lmul.MX # "_" # emul.MX : VPseudoBinaryNoMask<RetClass, Op1Class, Op2Class,
Constraint>;
- def "_" # lmul.MX # "_" # emul.MX # "_MASK" : VPseudoBinaryMaskTA<RetClass, Op1Class, Op2Class,
- Constraint>;
+ def "_" # lmul.MX # "_" # emul.MX # "_TU": VPseudoBinaryNoMaskTU<RetClass, Op1Class, Op2Class,
+ Constraint>;
+ def "_" # lmul.MX # "_" # emul.MX # "_MASK" : VPseudoBinaryMaskPolicy<RetClass, Op1Class, Op2Class,
+ Constraint>,
+ RISCVMaskedPseudo</*MaskOpIdx*/ 3>;
}
}
@@ -1744,7 +1941,7 @@ multiclass VPseudoBinaryV_VI<Operand ImmType = simm5, string Constraint = ""> {
multiclass VPseudoVALU_MM {
foreach m = MxList in
let VLMul = m.value in {
- def "_MM_" # m.MX : VPseudoBinaryNoMask<VR, VR, VR, "">,
+ def "_MM_" # m.MX : VPseudoBinaryNoMask<VR, VR, VR, "", /*DummyMask*/0>,
Sched<[WriteVMALUV, ReadVMALUV, ReadVMALUV]>;
}
}
@@ -1907,6 +2104,12 @@ multiclass VPseudoUnaryVMV_V_X_I {
Sched<[WriteVIMovX, ReadVIMovX]>;
def "_I_" # m.MX : VPseudoUnaryNoDummyMask<m.vrclass, simm5>,
Sched<[WriteVIMovI]>;
+ def "_V_" # m.MX # "_TU": VPseudoUnaryNoDummyMaskTU<m.vrclass, m.vrclass>,
+ Sched<[WriteVIMovV, ReadVIMovV]>;
+ def "_X_" # m.MX # "_TU": VPseudoUnaryNoDummyMaskTU<m.vrclass, GPR>,
+ Sched<[WriteVIMovX, ReadVIMovX]>;
+ def "_I_" # m.MX # "_TU": VPseudoUnaryNoDummyMaskTU<m.vrclass, simm5>,
+ Sched<[WriteVIMovI]>;
}
}
}
@@ -1918,6 +2121,9 @@ multiclass VPseudoVMV_F {
def "_" # f.FX # "_" # m.MX :
VPseudoUnaryNoDummyMask<m.vrclass, f.fprclass>,
Sched<[WriteVFMovV, ReadVFMovF]>;
+ def "_" # f.FX # "_" # m.MX # "_TU":
+ VPseudoUnaryNoDummyMaskTU<m.vrclass, f.fprclass>,
+ Sched<[WriteVFMovV, ReadVFMovF]>;
}
}
}
@@ -1928,7 +2134,9 @@ multiclass VPseudoVCLS_V {
let VLMul = m.value in {
def "_V_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.vrclass>,
Sched<[WriteVFClassV, ReadVFClassV, ReadVMask]>;
- def "_V_" # m.MX # "_MASK" : VPseudoUnaryMask<m.vrclass, m.vrclass>,
+ def "_V_" # m.MX # "_TU": VPseudoUnaryNoMaskTU<m.vrclass, m.vrclass>,
+ Sched<[WriteVFClassV, ReadVFClassV, ReadVMask]>;
+ def "_V_" # m.MX # "_MASK" : VPseudoUnaryMaskTA<m.vrclass, m.vrclass>,
Sched<[WriteVFClassV, ReadVFClassV, ReadVMask]>;
}
}
@@ -1939,6 +2147,8 @@ multiclass VPseudoVSQR_V {
let VLMul = m.value in {
def "_V_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.vrclass>,
Sched<[WriteVFSqrtV, ReadVFSqrtV, ReadVMask]>;
+ def "_V_" # m.MX # "_TU": VPseudoUnaryNoMaskTU<m.vrclass, m.vrclass>,
+ Sched<[WriteVFSqrtV, ReadVFSqrtV, ReadVMask]>;
def "_V_" # m.MX # "_MASK" : VPseudoUnaryMaskTA<m.vrclass, m.vrclass>,
Sched<[WriteVFSqrtV, ReadVFSqrtV, ReadVMask]>;
}
@@ -1950,6 +2160,8 @@ multiclass VPseudoVRCP_V {
let VLMul = m.value in {
def "_V_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.vrclass>,
Sched<[WriteVFRecpV, ReadVFRecpV, ReadVMask]>;
+ def "_V_" # m.MX # "_TU": VPseudoUnaryNoMaskTU<m.vrclass, m.vrclass>,
+ Sched<[WriteVFRecpV, ReadVFRecpV, ReadVMask]>;
def "_V_" # m.MX # "_MASK" : VPseudoUnaryMaskTA<m.vrclass, m.vrclass>,
Sched<[WriteVFRecpV, ReadVFRecpV, ReadVMask]>;
}
@@ -1963,8 +2175,11 @@ multiclass PseudoVEXT_VF2 {
let VLMul = m.value in {
def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.f2vrclass, constraints>,
Sched<[WriteVExtV, ReadVExtV, ReadVMask]>;
+ def "_" # m.MX # "_TU": VPseudoUnaryNoMaskTU<m.vrclass, m.f2vrclass, constraints>,
+ Sched<[WriteVExtV, ReadVExtV, ReadVMask]>;
def "_" # m.MX # "_MASK" :
VPseudoUnaryMaskTA<m.vrclass, m.f2vrclass, constraints>,
+ RISCVMaskedPseudo</*MaskOpIdx*/ 2>,
Sched<[WriteVExtV, ReadVExtV, ReadVMask]>;
}
}
@@ -1977,8 +2192,11 @@ multiclass PseudoVEXT_VF4 {
let VLMul = m.value in {
def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.f4vrclass, constraints>,
Sched<[WriteVExtV, ReadVExtV, ReadVMask]>;
+ def "_" # m.MX # "_TU": VPseudoUnaryNoMaskTU<m.vrclass, m.f4vrclass, constraints>,
+ Sched<[WriteVExtV, ReadVExtV, ReadVMask]>;
def "_" # m.MX # "_MASK" :
VPseudoUnaryMaskTA<m.vrclass, m.f4vrclass, constraints>,
+ RISCVMaskedPseudo</*MaskOpIdx*/ 2>,
Sched<[WriteVExtV, ReadVExtV, ReadVMask]>;
}
}
@@ -1991,8 +2209,11 @@ multiclass PseudoVEXT_VF8 {
let VLMul = m.value in {
def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.f8vrclass, constraints>,
Sched<[WriteVExtV, ReadVExtV, ReadVMask]>;
+ def "_" # m.MX # "_TU": VPseudoUnaryNoMaskTU<m.vrclass, m.f8vrclass, constraints>,
+ Sched<[WriteVExtV, ReadVExtV, ReadVMask]>;
def "_" # m.MX # "_MASK" :
VPseudoUnaryMaskTA<m.vrclass, m.f8vrclass, constraints>,
+ RISCVMaskedPseudo</*MaskOpIdx*/ 2>,
Sched<[WriteVExtV, ReadVExtV, ReadVMask]>;
}
}
@@ -2248,6 +2469,13 @@ multiclass VPseudoVCALU_VM_XM_IM {
Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX, ReadVMask]>;
defm "" : VPseudoBinaryV_IM,
Sched<[WriteVICALUI, ReadVIALUCV, ReadVMask]>;
+ // Tied versions to allow codegen control over the tail elements
+ defm "" : VPseudoTiedBinaryV_VM,
+ Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV, ReadVMask]>;
+ defm "" : VPseudoTiedBinaryV_XM,
+ Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX, ReadVMask]>;
+ defm "" : VPseudoTiedBinaryV_IM,
+ Sched<[WriteVICALUI, ReadVIALUCV, ReadVMask]>;
}
multiclass VPseudoVCALU_VM_XM {
@@ -2255,6 +2483,11 @@ multiclass VPseudoVCALU_VM_XM {
Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV, ReadVMask]>;
defm "" : VPseudoBinaryV_XM,
Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX, ReadVMask]>;
+ // Tied versions to allow codegen control over the tail elements
+ defm "" : VPseudoTiedBinaryV_VM,
+ Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV, ReadVMask]>;
+ defm "" : VPseudoTiedBinaryV_XM,
+ Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX, ReadVMask]>;
}
multiclass VPseudoVCALUM_VM_XM_IM<string Constraint> {
@@ -2318,6 +2551,19 @@ multiclass VPseudoTernary<VReg RetClass,
}
}
+multiclass VPseudoTernaryNoMaskNoPolicy<VReg RetClass,
+ RegisterClass Op1Class,
+ DAGOperand Op2Class,
+ LMULInfo MInfo,
+ string Constraint = ""> {
+ let VLMul = MInfo.value in {
+ def "_" # MInfo.MX : VPseudoTernaryNoMask<RetClass, Op1Class, Op2Class, Constraint>;
+ def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMaskPolicy<RetClass, Op1Class, Op2Class,
+ Constraint>;
+
+ }
+}
+
multiclass VPseudoTernaryWithPolicy<VReg RetClass,
RegisterClass Op1Class,
DAGOperand Op2Class,
@@ -2327,7 +2573,7 @@ multiclass VPseudoTernaryWithPolicy<VReg RetClass,
let VLMul = MInfo.value in {
let isCommutable = Commutable in
def "_" # MInfo.MX : VPseudoTernaryNoMaskWithPolicy<RetClass, Op1Class, Op2Class, Constraint>;
- def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMask<RetClass, Op1Class, Op2Class, Constraint>;
+ def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMaskPolicy<RetClass, Op1Class, Op2Class, Constraint>;
}
}
@@ -2339,9 +2585,9 @@ multiclass VPseudoTernaryV_VV_AAXA<string Constraint = "",
}
}
-multiclass VPseudoTernaryV_VX<string Constraint = ""> {
+multiclass VPseudoVSLDV_VX<string Constraint = ""> {
foreach m = MxList in
- defm _VX : VPseudoTernary<m.vrclass, m.vrclass, GPR, m, Constraint>;
+ defm _VX : VPseudoTernaryWithPolicy<m.vrclass, m.vrclass, GPR, m, Constraint>;
}
multiclass VPseudoTernaryV_VX_AAXA<string Constraint = ""> {
@@ -2380,9 +2626,9 @@ multiclass VPseudoTernaryW_VF {
m.vrclass, m, constraint>;
}
-multiclass VPseudoTernaryV_VI<Operand ImmType = simm5, string Constraint = ""> {
+multiclass VPseudoVSLDV_VI<Operand ImmType = simm5, string Constraint = ""> {
foreach m = MxList in
- defm _VI : VPseudoTernary<m.vrclass, m.vrclass, ImmType, m, Constraint>;
+ defm _VI : VPseudoTernaryWithPolicy<m.vrclass, m.vrclass, ImmType, m, Constraint>;
}
multiclass VPseudoVMAC_VV_VX_AAXA<string Constraint = ""> {
@@ -2400,9 +2646,9 @@ multiclass VPseudoVMAC_VV_VF_AAXA<string Constraint = ""> {
}
multiclass VPseudoVSLD_VX_VI<Operand ImmType = simm5, string Constraint = ""> {
- defm "" : VPseudoTernaryV_VX<Constraint>,
+ defm "" : VPseudoVSLDV_VX<Constraint>,
Sched<[WriteVISlideX, ReadVISlideV, ReadVISlideV, ReadVISlideX, ReadVMask]>;
- defm "" : VPseudoTernaryV_VI<ImmType, Constraint>,
+ defm "" : VPseudoVSLDV_VI<ImmType, Constraint>,
Sched<[WriteVISlideI, ReadVISlideV, ReadVISlideV, ReadVMask]>;
}
@@ -2501,8 +2747,10 @@ multiclass VPseudoConversion<VReg RetClass,
string Constraint = ""> {
let VLMul = MInfo.value in {
def "_" # MInfo.MX : VPseudoUnaryNoMask<RetClass, Op1Class, Constraint>;
+ def "_" # MInfo.MX # "_TU": VPseudoUnaryNoMaskTU<RetClass, Op1Class, Constraint>;
def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMaskTA<RetClass, Op1Class,
- Constraint>;
+ Constraint>,
+ RISCVMaskedPseudo</*MaskOpIdx*/ 2>;
}
}
@@ -2566,18 +2814,38 @@ multiclass VPseudoVNCVTD_W {
Sched<[WriteVFNCvtFToFV, ReadVFNCvtFToFV, ReadVMask]>;
}
-multiclass VPseudoUSSegLoad<bit isFF> {
+multiclass VPseudoUSSegLoad {
foreach eew = EEWList in {
foreach lmul = MxSet<eew>.m in {
defvar LInfo = lmul.MX;
let VLMul = lmul.value in {
foreach nf = NFSet<lmul>.L in {
defvar vreg = SegRegClass<lmul, nf>.RC;
- defvar FFStr = !if(isFF, "FF", "");
- def nf # "E" # eew # FFStr # "_V_" # LInfo :
- VPseudoUSSegLoadNoMask<vreg, eew, nf, isFF>;
- def nf # "E" # eew # FFStr # "_V_" # LInfo # "_MASK" :
- VPseudoUSSegLoadMask<vreg, eew, nf, isFF>;
+ def nf # "E" # eew # "_V_" # LInfo :
+ VPseudoUSSegLoadNoMask<vreg, eew, nf>;
+ def nf # "E" # eew # "_V_" # LInfo # "_TU" :
+ VPseudoUSSegLoadNoMaskTU<vreg, eew, nf>;
+ def nf # "E" # eew # "_V_" # LInfo # "_MASK" :
+ VPseudoUSSegLoadMask<vreg, eew, nf>;
+ }
+ }
+ }
+ }
+}
+
+multiclass VPseudoUSSegLoadFF {
+ foreach eew = EEWList in {
+ foreach lmul = MxSet<eew>.m in {
+ defvar LInfo = lmul.MX;
+ let VLMul = lmul.value in {
+ foreach nf = NFSet<lmul>.L in {
+ defvar vreg = SegRegClass<lmul, nf>.RC;
+ def nf # "E" # eew # "FF_V_" # LInfo :
+ VPseudoUSSegLoadFFNoMask<vreg, eew, nf>;
+ def nf # "E" # eew # "FF_V_" # LInfo # "_TU" :
+ VPseudoUSSegLoadFFNoMaskTU<vreg, eew, nf>;
+ def nf # "E" # eew # "FF_V_" # LInfo # "_MASK" :
+ VPseudoUSSegLoadFFMask<vreg, eew, nf>;
}
}
}
@@ -2592,6 +2860,7 @@ multiclass VPseudoSSegLoad {
foreach nf = NFSet<lmul>.L in {
defvar vreg = SegRegClass<lmul, nf>.RC;
def nf # "E" # eew # "_V_" # LInfo : VPseudoSSegLoadNoMask<vreg, eew, nf>;
+ def nf # "E" # eew # "_V_" # LInfo # "_TU" : VPseudoSSegLoadNoMaskTU<vreg, eew, nf>;
def nf # "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoSSegLoadMask<vreg, eew, nf>;
}
}
@@ -2618,6 +2887,9 @@ multiclass VPseudoISegLoad<bit Ordered> {
def nf # "EI" # idx_eew # "_V_" # IdxLInfo # "_" # ValLInfo :
VPseudoISegLoadNoMask<ValVreg, IdxVreg, idx_eew, idx_lmul.value,
nf, Ordered>;
+ def nf # "EI" # idx_eew # "_V_" # IdxLInfo # "_" # ValLInfo # "_TU" :
+ VPseudoISegLoadNoMaskTU<ValVreg, IdxVreg, idx_eew, idx_lmul.value,
+ nf, Ordered>;
def nf # "EI" # idx_eew # "_V_" # IdxLInfo # "_" # ValLInfo # "_MASK" :
VPseudoISegLoadMask<ValVreg, IdxVreg, idx_eew, idx_lmul.value,
nf, Ordered>;
@@ -2702,12 +2974,31 @@ class VPatUnaryNoMask<string intrinsic_name,
LMULInfo vlmul,
VReg op2_reg_class> :
Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
+ (result_type undef),
(op2_type op2_reg_class:$rs2),
VLOpFrag)),
(!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX)
(op2_type op2_reg_class:$rs2),
GPR:$vl, sew)>;
+class VPatUnaryNoMaskTU<string intrinsic_name,
+ string inst,
+ string kind,
+ ValueType result_type,
+ ValueType op2_type,
+ int sew,
+ LMULInfo vlmul,
+ VReg result_reg_class,
+ VReg op2_reg_class> :
+ Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
+ (result_type result_reg_class:$merge),
+ (op2_type op2_reg_class:$rs2),
+ VLOpFrag)),
+ (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX#"_TU")
+ (result_type result_reg_class:$merge),
+ (op2_type op2_reg_class:$rs2),
+ GPR:$vl, sew)>;
+
class VPatUnaryMask<string intrinsic_name,
string inst,
string kind,
@@ -2792,15 +3083,33 @@ class VPatUnaryAnyMask<string intrinsic,
(mask_type VR:$rs2),
GPR:$vl, sew)>;
-class VPatBinaryNoMask<string intrinsic_name,
- string inst,
- ValueType result_type,
- ValueType op1_type,
- ValueType op2_type,
- int sew,
- VReg op1_reg_class,
- DAGOperand op2_kind> :
+class VPatBinaryM<string intrinsic_name,
+ string inst,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType op2_type,
+ int sew,
+ VReg op1_reg_class,
+ DAGOperand op2_kind> :
+ Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
+ (op1_type op1_reg_class:$rs1),
+ (op2_type op2_kind:$rs2),
+ VLOpFrag)),
+ (!cast<Instruction>(inst)
+ (op1_type op1_reg_class:$rs1),
+ (op2_type op2_kind:$rs2),
+ GPR:$vl, sew)>;
+
+class VPatBinaryNoMaskTA<string intrinsic_name,
+ string inst,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType op2_type,
+ int sew,
+ VReg op1_reg_class,
+ DAGOperand op2_kind> :
Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
+ (result_type (undef)),
(op1_type op1_reg_class:$rs1),
(op2_type op2_kind:$rs2),
VLOpFrag)),
@@ -2809,6 +3118,26 @@ class VPatBinaryNoMask<string intrinsic_name,
(op2_type op2_kind:$rs2),
GPR:$vl, sew)>;
+class VPatBinaryNoMaskTU<string intrinsic_name,
+ string inst,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType op2_type,
+ int sew,
+ VReg result_reg_class,
+ VReg op1_reg_class,
+ DAGOperand op2_kind> :
+ Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
+ (result_type result_reg_class:$merge),
+ (op1_type op1_reg_class:$rs1),
+ (op2_type op2_kind:$rs2),
+ VLOpFrag)),
+ (!cast<Instruction>(inst#"_TU")
+ (result_type result_reg_class:$merge),
+ (op1_type op1_reg_class:$rs1),
+ (op2_type op2_kind:$rs2),
+ GPR:$vl, sew)>;
+
// Same as above but source operands are swapped.
class VPatBinaryNoMaskSwapped<string intrinsic_name,
string inst,
@@ -2902,13 +3231,31 @@ class VPatTiedBinaryNoMask<string intrinsic_name,
VReg result_reg_class,
DAGOperand op2_kind> :
Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
+ (result_type (undef)),
(result_type result_reg_class:$rs1),
(op2_type op2_kind:$rs2),
VLOpFrag)),
(!cast<Instruction>(inst#"_TIED")
(result_type result_reg_class:$rs1),
(op2_type op2_kind:$rs2),
- GPR:$vl, sew)>;
+ GPR:$vl, sew, TAIL_AGNOSTIC)>;
+
+class VPatTiedBinaryNoMaskTU<string intrinsic_name,
+ string inst,
+ ValueType result_type,
+ ValueType op2_type,
+ int sew,
+ VReg result_reg_class,
+ DAGOperand op2_kind> :
+ Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
+ (result_type result_reg_class:$merge),
+ (result_type result_reg_class:$merge),
+ (op2_type op2_kind:$rs2),
+ VLOpFrag)),
+ (!cast<Instruction>(inst#"_TIED")
+ (result_type result_reg_class:$merge),
+ (op2_type op2_kind:$rs2),
+ GPR:$vl, sew, TAIL_UNDISTURBED_MASK_UNDISTURBED)>;
class VPatTiedBinaryMask<string intrinsic_name,
string inst,
@@ -2966,12 +3313,12 @@ class VPatTernaryNoMaskWithPolicy<string intrinsic,
(result_type result_reg_class:$rs3),
(op1_type op1_reg_class:$rs1),
(op2_type op2_kind:$rs2),
- VLOpFrag)),
+ VLOpFrag, (XLenVT timm:$policy))),
(!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX)
result_reg_class:$rs3,
(op1_type op1_reg_class:$rs1),
op2_kind:$rs2,
- GPR:$vl, sew, TAIL_UNDISTURBED)>;
+ GPR:$vl, sew, (XLenVT timm:$policy))>;
class VPatTernaryMask<string intrinsic,
string inst,
@@ -2998,6 +3345,31 @@ class VPatTernaryMask<string intrinsic,
(mask_type V0),
GPR:$vl, sew)>;
+class VPatTernaryMaskPolicy<string intrinsic,
+ string inst,
+ string kind,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType op2_type,
+ ValueType mask_type,
+ int sew,
+ LMULInfo vlmul,
+ VReg result_reg_class,
+ RegisterClass op1_reg_class,
+ DAGOperand op2_kind> :
+ Pat<(result_type (!cast<Intrinsic>(intrinsic#"_mask")
+ (result_type result_reg_class:$rs3),
+ (op1_type op1_reg_class:$rs1),
+ (op2_type op2_kind:$rs2),
+ (mask_type V0),
+ VLOpFrag, (XLenVT timm:$policy))),
+ (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX # "_MASK")
+ result_reg_class:$rs3,
+ (op1_type op1_reg_class:$rs1),
+ op2_kind:$rs2,
+ (mask_type V0),
+ GPR:$vl, sew, (XLenVT timm:$policy))>;
+
multiclass VPatUnaryS_M<string intrinsic_name,
string inst>
{
@@ -3037,8 +3409,10 @@ multiclass VPatUnaryV_M<string intrinsic, string instruction>
foreach vti = AllIntegerVectors in {
def : VPatUnaryNoMask<intrinsic, instruction, "M", vti.Vector, vti.Mask,
vti.Log2SEW, vti.LMul, VR>;
- def : VPatUnaryMask<intrinsic, instruction, "M", vti.Vector, vti.Mask,
- vti.Mask, vti.Log2SEW, vti.LMul, vti.RegClass, VR>;
+ def : VPatUnaryNoMaskTU<intrinsic, instruction, "M", vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass,VR>;
+ def : VPatUnaryMaskTA<intrinsic, instruction, "M", vti.Vector, vti.Mask,
+ vti.Mask, vti.Log2SEW, vti.LMul, vti.RegClass, VR>;
}
}
@@ -3052,6 +3426,9 @@ multiclass VPatUnaryV_VF<string intrinsic, string instruction, string suffix,
def : VPatUnaryNoMask<intrinsic, instruction, suffix,
vti.Vector, fti.Vector,
vti.Log2SEW, vti.LMul, fti.RegClass>;
+ def : VPatUnaryNoMaskTU<intrinsic, instruction, suffix,
+ vti.Vector, fti.Vector,
+ vti.Log2SEW, vti.LMul, vti.RegClass, fti.RegClass>;
def : VPatUnaryMaskTA<intrinsic, instruction, suffix,
vti.Vector, fti.Vector, vti.Mask,
vti.Log2SEW, vti.LMul, vti.RegClass, fti.RegClass>;
@@ -3064,6 +3441,9 @@ multiclass VPatUnaryV_V<string intrinsic, string instruction,
def : VPatUnaryNoMask<intrinsic, instruction, "V",
vti.Vector, vti.Vector,
vti.Log2SEW, vti.LMul, vti.RegClass>;
+ def : VPatUnaryNoMaskTU<intrinsic, instruction, "V",
+ vti.Vector, vti.Vector,
+ vti.Log2SEW, vti.LMul, vti.RegClass, vti.RegClass>;
def : VPatUnaryMaskTA<intrinsic, instruction, "V",
vti.Vector, vti.Vector, vti.Mask,
vti.Log2SEW, vti.LMul, vti.RegClass, vti.RegClass>;
@@ -3074,27 +3454,33 @@ multiclass VPatNullaryV<string intrinsic, string instruction>
{
foreach vti = AllIntegerVectors in {
def : Pat<(vti.Vector (!cast<Intrinsic>(intrinsic)
+ (vti.Vector undef),
VLOpFrag)),
(!cast<Instruction>(instruction#"_V_" # vti.LMul.MX)
GPR:$vl, vti.Log2SEW)>;
+ def : Pat<(vti.Vector (!cast<Intrinsic>(intrinsic)
+ (vti.Vector vti.RegClass:$merge),
+ VLOpFrag)),
+ (!cast<Instruction>(instruction#"_V_" # vti.LMul.MX # "_TU")
+ vti.RegClass:$merge, GPR:$vl, vti.Log2SEW)>;
def : Pat<(vti.Vector (!cast<Intrinsic>(intrinsic # "_mask")
(vti.Vector vti.RegClass:$merge),
- (vti.Mask V0), VLOpFrag)),
+ (vti.Mask V0), VLOpFrag, (XLenVT timm:$policy))),
(!cast<Instruction>(instruction#"_V_" # vti.LMul.MX # "_MASK")
vti.RegClass:$merge, (vti.Mask V0),
- GPR:$vl, vti.Log2SEW)>;
+ GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>;
}
}
multiclass VPatNullaryM<string intrinsic, string inst> {
foreach mti = AllMasks in
def : Pat<(mti.Mask (!cast<Intrinsic>(intrinsic)
- (XLenVT (VLOp (XLenVT (XLenVT GPR:$vl)))))),
+ VLOpFrag)),
(!cast<Instruction>(inst#"_M_"#mti.BX)
GPR:$vl, mti.Log2SEW)>;
}
-multiclass VPatBinary<string intrinsic,
+multiclass VPatBinaryM<string intrinsic,
string inst,
ValueType result_type,
ValueType op1_type,
@@ -3105,8 +3491,8 @@ multiclass VPatBinary<string intrinsic,
VReg op1_reg_class,
DAGOperand op2_kind>
{
- def : VPatBinaryNoMask<intrinsic, inst, result_type, op1_type, op2_type,
- sew, op1_reg_class, op2_kind>;
+ def : VPatBinaryM<intrinsic, inst, result_type, op1_type, op2_type,
+ sew, op1_reg_class, op2_kind>;
def : VPatBinaryMask<intrinsic, inst, result_type, op1_type, op2_type,
mask_type, sew, result_reg_class, op1_reg_class,
op2_kind>;
@@ -3123,8 +3509,10 @@ multiclass VPatBinaryTA<string intrinsic,
VReg op1_reg_class,
DAGOperand op2_kind>
{
- def : VPatBinaryNoMask<intrinsic, inst, result_type, op1_type, op2_type,
- sew, op1_reg_class, op2_kind>;
+ def : VPatBinaryNoMaskTA<intrinsic, inst, result_type, op1_type, op2_type,
+ sew, op1_reg_class, op2_kind>;
+ def : VPatBinaryNoMaskTU<intrinsic, inst, result_type, op1_type, op2_type,
+ sew, result_reg_class, op1_reg_class, op2_kind>;
def : VPatBinaryMaskTA<intrinsic, inst, result_type, op1_type, op2_type,
mask_type, sew, result_reg_class, op1_reg_class,
op2_kind>;
@@ -3148,6 +3536,42 @@ multiclass VPatBinarySwapped<string intrinsic,
op2_kind>;
}
+multiclass VPatBinaryCarryInTAIL<string intrinsic,
+ string inst,
+ string kind,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType op2_type,
+ ValueType mask_type,
+ int sew,
+ LMULInfo vlmul,
+ VReg result_reg_class,
+ VReg op1_reg_class,
+ DAGOperand op2_kind>
+{
+ def : Pat<(result_type (!cast<Intrinsic>(intrinsic)
+ (result_type undef),
+ (op1_type op1_reg_class:$rs1),
+ (op2_type op2_kind:$rs2),
+ (mask_type V0),
+ VLOpFrag)),
+ (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX)
+ (op1_type op1_reg_class:$rs1),
+ (op2_type op2_kind:$rs2),
+ (mask_type V0), GPR:$vl, sew)>;
+ def : Pat<(result_type (!cast<Intrinsic>(intrinsic)
+ (result_type result_reg_class:$merge),
+ (op1_type op1_reg_class:$rs1),
+ (op2_type op2_kind:$rs2),
+ (mask_type V0),
+ VLOpFrag)),
+ (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX#"_TU")
+ (result_type result_reg_class:$merge),
+ (op1_type op1_reg_class:$rs1),
+ (op2_type op2_kind:$rs2),
+ (mask_type V0), GPR:$vl, sew)>;
+}
+
multiclass VPatBinaryCarryIn<string intrinsic,
string inst,
string kind,
@@ -3192,23 +3616,6 @@ multiclass VPatBinaryMaskOut<string intrinsic,
GPR:$vl, sew)>;
}
-multiclass VPatConversion<string intrinsic,
- string inst,
- string kind,
- ValueType result_type,
- ValueType op1_type,
- ValueType mask_type,
- int sew,
- LMULInfo vlmul,
- VReg result_reg_class,
- VReg op1_reg_class>
-{
- def : VPatUnaryNoMask<intrinsic, inst, kind, result_type, op1_type,
- sew, vlmul, op1_reg_class>;
- def : VPatUnaryMask<intrinsic, inst, kind, result_type, op1_type,
- mask_type, sew, vlmul, result_reg_class, op1_reg_class>;
-}
-
multiclass VPatConversionTA<string intrinsic,
string inst,
string kind,
@@ -3222,6 +3629,8 @@ multiclass VPatConversionTA<string intrinsic,
{
def : VPatUnaryNoMask<intrinsic, inst, kind, result_type, op1_type,
sew, vlmul, op1_reg_class>;
+ def : VPatUnaryNoMaskTU<intrinsic, inst, kind, result_type, op1_type,
+ sew, vlmul, result_reg_class, op1_reg_class>;
def : VPatUnaryMaskTA<intrinsic, inst, kind, result_type, op1_type,
mask_type, sew, vlmul, result_reg_class, op1_reg_class>;
}
@@ -3296,9 +3705,9 @@ multiclass VPatBinaryV_VI<string intrinsic, string instruction,
multiclass VPatBinaryM_MM<string intrinsic, string instruction> {
foreach mti = AllMasks in
- def : VPatBinaryNoMask<intrinsic, instruction # "_MM_" # mti.LMul.MX,
- mti.Mask, mti.Mask, mti.Mask,
- mti.Log2SEW, VR, VR>;
+ def : VPatBinaryM<intrinsic, instruction # "_MM_" # mti.LMul.MX,
+ mti.Mask, mti.Mask, mti.Mask,
+ mti.Log2SEW, VR, VR>;
}
multiclass VPatBinaryW_VV<string intrinsic, string instruction,
@@ -3334,10 +3743,17 @@ multiclass VPatBinaryW_WV<string intrinsic, string instruction,
def : VPatTiedBinaryNoMask<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
Wti.Vector, Vti.Vector,
Vti.Log2SEW, Wti.RegClass, Vti.RegClass>;
- let AddedComplexity = 1 in
+ def : VPatBinaryNoMaskTU<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
+ Wti.Vector, Wti.Vector, Vti.Vector, Vti.Log2SEW,
+ Wti.RegClass, Wti.RegClass, Vti.RegClass>;
+ let AddedComplexity = 1 in {
+ def : VPatTiedBinaryNoMaskTU<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
+ Wti.Vector, Vti.Vector,
+ Vti.Log2SEW, Wti.RegClass, Vti.RegClass>;
def : VPatTiedBinaryMask<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
Wti.Vector, Vti.Vector, Vti.Mask,
Vti.Log2SEW, Wti.RegClass, Vti.RegClass>;
+ }
def : VPatBinaryMaskTA<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
Wti.Vector, Wti.Vector, Vti.Vector, Vti.Mask,
Vti.Log2SEW, Wti.RegClass,
@@ -3428,6 +3844,39 @@ multiclass VPatBinaryV_IM<string intrinsic, string instruction,
vti.RegClass, simm5>;
}
+multiclass VPatBinaryV_VM_TAIL<string intrinsic, string instruction,
+ bit CarryOut = 0,
+ list<VTypeInfo> vtilist = AllIntegerVectors> {
+ foreach vti = vtilist in
+ defm : VPatBinaryCarryInTAIL<intrinsic, instruction, "VVM",
+ !if(CarryOut, vti.Mask, vti.Vector),
+ vti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass,
+ vti.RegClass, vti.RegClass>;
+}
+
+multiclass VPatBinaryV_XM_TAIL<string intrinsic, string instruction,
+ bit CarryOut = 0,
+ list<VTypeInfo> vtilist = AllIntegerVectors> {
+ foreach vti = vtilist in
+ defm : VPatBinaryCarryInTAIL<intrinsic, instruction,
+ "V"#vti.ScalarSuffix#"M",
+ !if(CarryOut, vti.Mask, vti.Vector),
+ vti.Vector, vti.Scalar, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass,
+ vti.RegClass, vti.ScalarRegClass>;
+}
+
+multiclass VPatBinaryV_IM_TAIL<string intrinsic, string instruction,
+ bit CarryOut = 0> {
+ foreach vti = AllIntegerVectors in
+ defm : VPatBinaryCarryInTAIL<intrinsic, instruction, "VIM",
+ !if(CarryOut, vti.Mask, vti.Vector),
+ vti.Vector, XLenVT, vti.Mask,
+ vti.Log2SEW, vti.LMul,
+ vti.RegClass, vti.RegClass, simm5>;
+}
+
multiclass VPatBinaryV_V<string intrinsic, string instruction> {
foreach vti = AllIntegerVectors in
defm : VPatBinaryMaskOut<intrinsic, instruction, "VV",
@@ -3455,10 +3904,10 @@ multiclass VPatBinaryV_I<string intrinsic, string instruction> {
multiclass VPatBinaryM_VV<string intrinsic, string instruction,
list<VTypeInfo> vtilist> {
foreach vti = vtilist in
- defm : VPatBinary<intrinsic, instruction # "_VV_" # vti.LMul.MX,
- vti.Mask, vti.Vector, vti.Vector, vti.Mask,
- vti.Log2SEW, VR,
- vti.RegClass, vti.RegClass>;
+ defm : VPatBinaryM<intrinsic, instruction # "_VV_" # vti.LMul.MX,
+ vti.Mask, vti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, VR,
+ vti.RegClass, vti.RegClass>;
}
multiclass VPatBinarySwappedM_VV<string intrinsic, string instruction,
@@ -3474,20 +3923,20 @@ multiclass VPatBinaryM_VX<string intrinsic, string instruction,
list<VTypeInfo> vtilist> {
foreach vti = vtilist in {
defvar kind = "V"#vti.ScalarSuffix;
- defm : VPatBinary<intrinsic, instruction#"_"#kind#"_"#vti.LMul.MX,
- vti.Mask, vti.Vector, vti.Scalar, vti.Mask,
- vti.Log2SEW, VR,
- vti.RegClass, vti.ScalarRegClass>;
+ defm : VPatBinaryM<intrinsic, instruction#"_"#kind#"_"#vti.LMul.MX,
+ vti.Mask, vti.Vector, vti.Scalar, vti.Mask,
+ vti.Log2SEW, VR,
+ vti.RegClass, vti.ScalarRegClass>;
}
}
multiclass VPatBinaryM_VI<string intrinsic, string instruction,
list<VTypeInfo> vtilist> {
foreach vti = vtilist in
- defm : VPatBinary<intrinsic, instruction # "_VI_" # vti.LMul.MX,
- vti.Mask, vti.Vector, XLenVT, vti.Mask,
- vti.Log2SEW, VR,
- vti.RegClass, simm5>;
+ defm : VPatBinaryM<intrinsic, instruction # "_VI_" # vti.LMul.MX,
+ vti.Mask, vti.Vector, XLenVT, vti.Mask,
+ vti.Log2SEW, VR,
+ vti.RegClass, simm5>;
}
multiclass VPatBinaryV_VV_VX_VI<string intrinsic, string instruction,
@@ -3523,9 +3972,9 @@ multiclass VPatBinaryV_WV_WX_WI<string intrinsic, string instruction,
VPatBinaryV_WI<intrinsic, instruction, vtilist>;
multiclass VPatBinaryV_VM_XM_IM<string intrinsic, string instruction>
- : VPatBinaryV_VM<intrinsic, instruction>,
- VPatBinaryV_XM<intrinsic, instruction>,
- VPatBinaryV_IM<intrinsic, instruction>;
+ : VPatBinaryV_VM_TAIL<intrinsic, instruction>,
+ VPatBinaryV_XM_TAIL<intrinsic, instruction>,
+ VPatBinaryV_IM_TAIL<intrinsic, instruction>;
multiclass VPatBinaryM_VM_XM_IM<string intrinsic, string instruction>
: VPatBinaryV_VM<intrinsic, instruction, /*CarryOut=*/1>,
@@ -3538,8 +3987,8 @@ multiclass VPatBinaryM_V_X_I<string intrinsic, string instruction>
VPatBinaryV_I<intrinsic, instruction>;
multiclass VPatBinaryV_VM_XM<string intrinsic, string instruction>
- : VPatBinaryV_VM<intrinsic, instruction>,
- VPatBinaryV_XM<intrinsic, instruction>;
+ : VPatBinaryV_VM_TAIL<intrinsic, instruction>,
+ VPatBinaryV_XM_TAIL<intrinsic, instruction>;
multiclass VPatBinaryM_VM_XM<string intrinsic, string instruction>
: VPatBinaryV_VM<intrinsic, instruction, /*CarryOut=*/1>,
@@ -3569,6 +4018,26 @@ multiclass VPatTernary<string intrinsic,
op2_kind>;
}
+multiclass VPatTernaryNoMaskNoPolicy<string intrinsic,
+ string inst,
+ string kind,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType op2_type,
+ ValueType mask_type,
+ int sew,
+ LMULInfo vlmul,
+ VReg result_reg_class,
+ RegisterClass op1_reg_class,
+ DAGOperand op2_kind> {
+ def : VPatTernaryNoMask<intrinsic, inst, kind, result_type, op1_type, op2_type,
+ sew, vlmul, result_reg_class, op1_reg_class,
+ op2_kind>;
+ def : VPatTernaryMaskPolicy<intrinsic, inst, kind, result_type, op1_type, op2_type,
+ mask_type, sew, vlmul, result_reg_class, op1_reg_class,
+ op2_kind>;
+}
+
multiclass VPatTernaryWithPolicy<string intrinsic,
string inst,
string kind,
@@ -3584,9 +4053,9 @@ multiclass VPatTernaryWithPolicy<string intrinsic,
def : VPatTernaryNoMaskWithPolicy<intrinsic, inst, kind, result_type, op1_type,
op2_type, sew, vlmul, result_reg_class,
op1_reg_class, op2_kind>;
- def : VPatTernaryMask<intrinsic, inst, kind, result_type, op1_type, op2_type,
- mask_type, sew, vlmul, result_reg_class, op1_reg_class,
- op2_kind>;
+ def : VPatTernaryMaskPolicy<intrinsic, inst, kind, result_type, op1_type, op2_type,
+ mask_type, sew, vlmul, result_reg_class, op1_reg_class,
+ op2_kind>;
}
multiclass VPatTernaryV_VV_AAXA<string intrinsic, string instruction,
@@ -3601,10 +4070,10 @@ multiclass VPatTernaryV_VV_AAXA<string intrinsic, string instruction,
multiclass VPatTernaryV_VX<string intrinsic, string instruction,
list<VTypeInfo> vtilist> {
foreach vti = vtilist in
- defm : VPatTernary<intrinsic, instruction, "VX",
- vti.Vector, vti.Vector, XLenVT, vti.Mask,
- vti.Log2SEW, vti.LMul, vti.RegClass,
- vti.RegClass, GPR>;
+ defm : VPatTernaryWithPolicy<intrinsic, instruction, "VX",
+ vti.Vector, vti.Vector, XLenVT, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass,
+ vti.RegClass, GPR>;
}
multiclass VPatTernaryV_VX_AAXA<string intrinsic, string instruction,
@@ -3620,10 +4089,10 @@ multiclass VPatTernaryV_VX_AAXA<string intrinsic, string instruction,
multiclass VPatTernaryV_VI<string intrinsic, string instruction,
list<VTypeInfo> vtilist, Operand Imm_type> {
foreach vti = vtilist in
- defm : VPatTernary<intrinsic, instruction, "VI",
- vti.Vector, vti.Vector, XLenVT, vti.Mask,
- vti.Log2SEW, vti.LMul, vti.RegClass,
- vti.RegClass, Imm_type>;
+ defm : VPatTernaryWithPolicy<intrinsic, instruction, "VI",
+ vti.Vector, vti.Vector, XLenVT, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass,
+ vti.RegClass, Imm_type>;
}
multiclass VPatTernaryW_VV<string intrinsic, string instruction,
@@ -3661,6 +4130,7 @@ multiclass VPatTernaryV_VX_VI<string intrinsic, string instruction,
: VPatTernaryV_VX<intrinsic, instruction, vtilist>,
VPatTernaryV_VI<intrinsic, instruction, vtilist, Imm_type>;
+
multiclass VPatBinaryM_VV_VX_VI<string intrinsic, string instruction,
list<VTypeInfo> vtilist>
: VPatBinaryM_VV<intrinsic, instruction, vtilist>,
@@ -3724,19 +4194,6 @@ multiclass VPatReductionW_VS<string intrinsic, string instruction, bit IsFloat =
}
}
-multiclass VPatClassifyVI_VF<string intrinsic,
- string instruction>
-{
- foreach fvti = AllFloatVectors in
- {
- defvar ivti = GetIntVTypeInfo<fvti>.Vti;
-
- defm : VPatConversion<intrinsic, instruction, "V",
- ivti.Vector, fvti.Vector, ivti.Mask, fvti.Log2SEW,
- fvti.LMul, ivti.RegClass, fvti.RegClass>;
- }
-}
-
multiclass VPatConversionVI_VF<string intrinsic,
string instruction>
{
@@ -3973,7 +4430,7 @@ defm PseudoVL : VPseudoFFLoad;
//===----------------------------------------------------------------------===//
// 7.8. Vector Load/Store Segment Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVLSEG : VPseudoUSSegLoad</*isFF=*/false>;
+defm PseudoVLSEG : VPseudoUSSegLoad;
defm PseudoVLSSEG : VPseudoSSegLoad;
defm PseudoVLOXSEG : VPseudoISegLoad</*Ordered=*/true>;
defm PseudoVLUXSEG : VPseudoISegLoad</*Ordered=*/false>;
@@ -3983,8 +4440,9 @@ defm PseudoVSOXSEG : VPseudoISegStore</*Ordered=*/true>;
defm PseudoVSUXSEG : VPseudoISegStore</*Ordered=*/false>;
// vlseg<nf>e<eew>ff.v may update VL register
-let hasSideEffects = 1, Defs = [VL] in
-defm PseudoVLSEG : VPseudoUSSegLoad</*isFF=*/true>;
+let hasSideEffects = 1, Defs = [VL] in {
+defm PseudoVLSEG : VPseudoUSSegLoadFF;
+}
//===----------------------------------------------------------------------===//
// 12. Vector Integer Arithmetic Instructions
@@ -4002,13 +4460,24 @@ foreach vti = AllIntegerVectors in {
// Occurs when legalizing vrsub.vx intrinsics for i64 on RV32 since we need
// to use a more complex splat sequence. Add the pattern for all VTs for
// consistency.
- def : Pat<(vti.Vector (int_riscv_vrsub (vti.Vector vti.RegClass:$rs2),
+ def : Pat<(vti.Vector (int_riscv_vrsub (vti.Vector (undef)),
+ (vti.Vector vti.RegClass:$rs2),
(vti.Vector vti.RegClass:$rs1),
VLOpFrag)),
(!cast<Instruction>("PseudoVSUB_VV_"#vti.LMul.MX) vti.RegClass:$rs1,
vti.RegClass:$rs2,
GPR:$vl,
vti.Log2SEW)>;
+ def : Pat<(vti.Vector (int_riscv_vrsub (vti.Vector vti.RegClass:$merge),
+ (vti.Vector vti.RegClass:$rs2),
+ (vti.Vector vti.RegClass:$rs1),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVSUB_VV_"#vti.LMul.MX#"_TU")
+ vti.RegClass:$merge,
+ vti.RegClass:$rs1,
+ vti.RegClass:$rs2,
+ GPR:$vl,
+ vti.Log2SEW)>;
def : Pat<(vti.Vector (int_riscv_vrsub_mask (vti.Vector vti.RegClass:$merge),
(vti.Vector vti.RegClass:$rs2),
(vti.Vector vti.RegClass:$rs1),
@@ -4025,7 +4494,8 @@ foreach vti = AllIntegerVectors in {
(XLenVT timm:$policy))>;
// Match VSUB with a small immediate to vadd.vi by negating the immediate.
- def : Pat<(vti.Vector (int_riscv_vsub (vti.Vector vti.RegClass:$rs1),
+ def : Pat<(vti.Vector (int_riscv_vsub (vti.Vector (undef)),
+ (vti.Vector vti.RegClass:$rs1),
(vti.Scalar simm5_plus1:$rs2),
VLOpFrag)),
(!cast<Instruction>("PseudoVADD_VI_"#vti.LMul.MX) vti.RegClass:$rs1,
@@ -4219,33 +4689,42 @@ let Predicates = [HasVInstructionsAnyF] in {
//===----------------------------------------------------------------------===//
// 14.2. Vector Single-Width Floating-Point Add/Subtract Instructions
//===----------------------------------------------------------------------===//
+let Uses = [FRM], mayRaiseFPException = true in {
defm PseudoVFADD : VPseudoVALU_VV_VF;
defm PseudoVFSUB : VPseudoVALU_VV_VF;
defm PseudoVFRSUB : VPseudoVALU_VF;
+}
//===----------------------------------------------------------------------===//
// 14.3. Vector Widening Floating-Point Add/Subtract Instructions
//===----------------------------------------------------------------------===//
+let Uses = [FRM], mayRaiseFPException = true in {
defm PseudoVFWADD : VPseudoVFWALU_VV_VF;
defm PseudoVFWSUB : VPseudoVFWALU_VV_VF;
defm PseudoVFWADD : VPseudoVFWALU_WV_WF;
defm PseudoVFWSUB : VPseudoVFWALU_WV_WF;
+}
//===----------------------------------------------------------------------===//
// 14.4. Vector Single-Width Floating-Point Multiply/Divide Instructions
//===----------------------------------------------------------------------===//
+let Uses = [FRM], mayRaiseFPException = true in {
defm PseudoVFMUL : VPseudoVFMUL_VV_VF;
defm PseudoVFDIV : VPseudoVFDIV_VV_VF;
defm PseudoVFRDIV : VPseudoVFRDIV_VF;
+}
//===----------------------------------------------------------------------===//
// 14.5. Vector Widening Floating-Point Multiply
//===----------------------------------------------------------------------===//
+let Uses = [FRM], mayRaiseFPException = true in {
defm PseudoVFWMUL : VPseudoVWMUL_VV_VF;
+}
//===----------------------------------------------------------------------===//
// 14.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions
//===----------------------------------------------------------------------===//
+let Uses = [FRM], mayRaiseFPException = true in {
defm PseudoVFMACC : VPseudoVMAC_VV_VF_AAXA;
defm PseudoVFNMACC : VPseudoVMAC_VV_VF_AAXA;
defm PseudoVFMSAC : VPseudoVMAC_VV_VF_AAXA;
@@ -4254,35 +4733,43 @@ defm PseudoVFMADD : VPseudoVMAC_VV_VF_AAXA;
defm PseudoVFNMADD : VPseudoVMAC_VV_VF_AAXA;
defm PseudoVFMSUB : VPseudoVMAC_VV_VF_AAXA;
defm PseudoVFNMSUB : VPseudoVMAC_VV_VF_AAXA;
+}
//===----------------------------------------------------------------------===//
// 14.7. Vector Widening Floating-Point Fused Multiply-Add Instructions
//===----------------------------------------------------------------------===//
+let Uses = [FRM], mayRaiseFPException = true in {
defm PseudoVFWMACC : VPseudoVWMAC_VV_VF;
defm PseudoVFWNMACC : VPseudoVWMAC_VV_VF;
defm PseudoVFWMSAC : VPseudoVWMAC_VV_VF;
defm PseudoVFWNMSAC : VPseudoVWMAC_VV_VF;
+}
//===----------------------------------------------------------------------===//
// 14.8. Vector Floating-Point Square-Root Instruction
//===----------------------------------------------------------------------===//
+let Uses = [FRM], mayRaiseFPException = true in
defm PseudoVFSQRT : VPseudoVSQR_V;
//===----------------------------------------------------------------------===//
// 14.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction
//===----------------------------------------------------------------------===//
+let mayRaiseFPException = true in
defm PseudoVFRSQRT7 : VPseudoVRCP_V;
//===----------------------------------------------------------------------===//
// 14.10. Vector Floating-Point Reciprocal Estimate Instruction
//===----------------------------------------------------------------------===//
+let Uses = [FRM], mayRaiseFPException = true in
defm PseudoVFREC7 : VPseudoVRCP_V;
//===----------------------------------------------------------------------===//
// 14.11. Vector Floating-Point Min/Max Instructions
//===----------------------------------------------------------------------===//
+let mayRaiseFPException = true in {
defm PseudoVFMIN : VPseudoVMAX_VV_VF;
defm PseudoVFMAX : VPseudoVMAX_VV_VF;
+}
//===----------------------------------------------------------------------===//
// 14.12. Vector Floating-Point Sign-Injection Instructions
@@ -4294,12 +4781,14 @@ defm PseudoVFSGNJX : VPseudoVSGNJ_VV_VF;
//===----------------------------------------------------------------------===//
// 14.13. Vector Floating-Point Compare Instructions
//===----------------------------------------------------------------------===//
+let mayRaiseFPException = true in {
defm PseudoVMFEQ : VPseudoVCMPM_VV_VF;
defm PseudoVMFNE : VPseudoVCMPM_VV_VF;
defm PseudoVMFLT : VPseudoVCMPM_VV_VF;
defm PseudoVMFLE : VPseudoVCMPM_VV_VF;
defm PseudoVMFGT : VPseudoVCMPM_VF;
defm PseudoVMFGE : VPseudoVCMPM_VF;
+}
//===----------------------------------------------------------------------===//
// 14.14. Vector Floating-Point Classify Instruction
@@ -4376,15 +4865,21 @@ let Predicates = [HasVInstructionsAnyF] in {
//===----------------------------------------------------------------------===//
// 15.3. Vector Single-Width Floating-Point Reduction Instructions
//===----------------------------------------------------------------------===//
+let Uses = [FRM], mayRaiseFPException = true in {
defm PseudoVFREDOSUM : VPseudoVFREDO_VS;
defm PseudoVFREDUSUM : VPseudoVFRED_VS;
+}
+let mayRaiseFPException = true in {
defm PseudoVFREDMIN : VPseudoVFRED_VS;
defm PseudoVFREDMAX : VPseudoVFRED_VS;
+}
//===----------------------------------------------------------------------===//
// 15.4. Vector Widening Floating-Point Reduction Instructions
//===----------------------------------------------------------------------===//
-let IsRVVWideningReduction = 1 in {
+let IsRVVWideningReduction = 1,
+ Uses = [FRM],
+ mayRaiseFPException = true in {
defm PseudoVFWREDUSUM : VPseudoVFWRED_VS;
defm PseudoVFWREDOSUM : VPseudoVFWRED_VS;
}
@@ -4611,7 +5106,8 @@ defm : VPatBinaryV_VV_VX_VI<"int_riscv_vsra", "PseudoVSRA", AllIntegerVectors,
foreach vti = AllIntegerVectors in {
// Emit shift by 1 as an add since it might be faster.
- def : Pat<(vti.Vector (int_riscv_vsll (vti.Vector vti.RegClass:$rs1),
+ def : Pat<(vti.Vector (int_riscv_vsll (vti.Vector undef),
+ (vti.Vector vti.RegClass:$rs1),
(XLenVT 1), VLOpFrag)),
(!cast<Instruction>("PseudoVADD_VV_"#vti.LMul.MX) vti.RegClass:$rs1,
vti.RegClass:$rs1,
@@ -4726,10 +5222,16 @@ defm : VPatBinaryV_VM_XM_IM<"int_riscv_vmerge", "PseudoVMERGE">;
// 12.16. Vector Integer Move Instructions
//===----------------------------------------------------------------------===//
foreach vti = AllVectors in {
- def : Pat<(vti.Vector (int_riscv_vmv_v_v (vti.Vector vti.RegClass:$rs1),
+ def : Pat<(vti.Vector (int_riscv_vmv_v_v (vti.Vector undef),
+ (vti.Vector vti.RegClass:$rs1),
VLOpFrag)),
(!cast<Instruction>("PseudoVMV_V_V_"#vti.LMul.MX)
$rs1, GPR:$vl, vti.Log2SEW)>;
+ def : Pat<(vti.Vector (int_riscv_vmv_v_v (vti.Vector vti.RegClass:$passthru),
+ (vti.Vector vti.RegClass:$rs1),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMV_V_V_"#vti.LMul.MX#"_TU")
+ $passthru, $rs1, GPR:$vl, vti.Log2SEW)>;
// vmv.v.x/vmv.v.i are handled in RISCInstrVInstrInfoVVLPatterns.td
}
@@ -4862,7 +5364,7 @@ defm : VPatBinarySwappedM_VV<"int_riscv_vmfge", "PseudoVMFLE", AllFloatVectors>;
//===----------------------------------------------------------------------===//
// 14.14. Vector Floating-Point Classify Instruction
//===----------------------------------------------------------------------===//
-defm : VPatClassifyVI_VF<"int_riscv_vfclass", "PseudoVFCLASS">;
+defm : VPatConversionVI_VF<"int_riscv_vfclass", "PseudoVFCLASS">;
//===----------------------------------------------------------------------===//
// 14.15. Vector Floating-Point Merge Instruction
@@ -4870,19 +5372,27 @@ defm : VPatClassifyVI_VF<"int_riscv_vfclass", "PseudoVFCLASS">;
// We can use vmerge.vvm to support vector-vector vfmerge.
// NOTE: Clang previously used int_riscv_vfmerge for vector-vector, but now uses
// int_riscv_vmerge. Support both for compatibility.
-defm : VPatBinaryV_VM<"int_riscv_vmerge", "PseudoVMERGE",
- /*CarryOut = */0, /*vtilist=*/AllFloatVectors>;
-defm : VPatBinaryV_VM<"int_riscv_vfmerge", "PseudoVMERGE",
- /*CarryOut = */0, /*vtilist=*/AllFloatVectors>;
-defm : VPatBinaryV_XM<"int_riscv_vfmerge", "PseudoVFMERGE",
- /*CarryOut = */0, /*vtilist=*/AllFloatVectors>;
+defm : VPatBinaryV_VM_TAIL<"int_riscv_vmerge", "PseudoVMERGE",
+ /*CarryOut = */0, /*vtilist=*/AllFloatVectors>;
+defm : VPatBinaryV_VM_TAIL<"int_riscv_vfmerge", "PseudoVMERGE",
+ /*CarryOut = */0, /*vtilist=*/AllFloatVectors>;
+defm : VPatBinaryV_XM_TAIL<"int_riscv_vfmerge", "PseudoVFMERGE",
+ /*CarryOut = */0, /*vtilist=*/AllFloatVectors>;
foreach fvti = AllFloatVectors in {
defvar instr = !cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX);
- def : Pat<(fvti.Vector (int_riscv_vfmerge (fvti.Vector fvti.RegClass:$rs2),
+ def : Pat<(fvti.Vector (int_riscv_vfmerge (fvti.Vector undef),
+ (fvti.Vector fvti.RegClass:$rs2),
(fvti.Scalar (fpimm0)),
(fvti.Mask V0), VLOpFrag)),
(instr fvti.RegClass:$rs2, 0, (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>;
+ defvar instr_tu = !cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX#"_TU");
+ def : Pat<(fvti.Vector (int_riscv_vfmerge (fvti.Vector fvti.RegClass:$merge),
+ (fvti.Vector fvti.RegClass:$rs2),
+ (fvti.Scalar (fpimm0)),
+ (fvti.Mask V0), VLOpFrag)),
+ (instr_tu fvti.RegClass:$merge, fvti.RegClass:$rs2, 0,
+ (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>;
}
//===----------------------------------------------------------------------===//
@@ -5048,6 +5558,11 @@ foreach fvti = AllFloatVectors in {
(fvti.Vector $rs1),
(fvti.Scalar fvti.ScalarRegClass:$rs2),
GPR:$vl, fvti.Log2SEW)>;
+
+ def : Pat<(fvti.Vector (int_riscv_vfmv_s_f (fvti.Vector fvti.RegClass:$rs1),
+ (fvti.Scalar (fpimm0)), VLOpFrag)),
+ (!cast<Instruction>("PseudoVMV_S_X_" # fvti.LMul.MX)
+ (fvti.Vector $rs1), X0, GPR:$vl, fvti.Log2SEW)>;
}
} // Predicates = [HasVInstructionsAnyF]
@@ -5097,5 +5612,5 @@ let Predicates = [HasVInstructionsAnyF] in {
} // Predicates = [HasVInstructionsAnyF]
// Include the non-intrinsic ISel patterns
-include "RISCVInstrInfoVSDPatterns.td"
include "RISCVInstrInfoVVLPatterns.td"
+include "RISCVInstrInfoVSDPatterns.td"
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index 2b920d29ab81..06d4c4d0a9e6 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -8,8 +8,7 @@
///
/// This file contains the required infrastructure and SDNode patterns to
/// support code generation for the standard 'V' (Vector) extension, version
-/// 0.10. This version is still experimental as the 'V' extension hasn't been
-/// ratified yet.
+/// version 1.0.
///
/// This file is included from and depends upon RISCVInstrInfoVPseudos.td
///
@@ -22,35 +21,9 @@
// Helpers to define the SDNode patterns.
//===----------------------------------------------------------------------===//
-def SDTSplatI64 : SDTypeProfile<1, 1, [
- SDTCVecEltisVT<0, i64>, SDTCisVT<1, i32>
-]>;
-
-def rv32_splat_i64 : SDNode<"RISCVISD::SPLAT_VECTOR_I64", SDTSplatI64>;
-
-def SDT_RISCVVMSETCLR_VL : SDTypeProfile<1, 1, [SDTCVecEltisVT<0, i1>,
- SDTCisVT<1, XLenVT>]>;
-def riscv_vmclr_vl : SDNode<"RISCVISD::VMCLR_VL", SDT_RISCVVMSETCLR_VL>;
-def riscv_vmset_vl : SDNode<"RISCVISD::VMSET_VL", SDT_RISCVVMSETCLR_VL>;
-
def rvv_vnot : PatFrag<(ops node:$in),
(xor node:$in, (riscv_vmset_vl (XLenVT srcvalue)))>;
-// Give explicit Complexity to prefer simm5/uimm5.
-def SplatPat : ComplexPattern<vAny, 1, "selectVSplat", [splat_vector, rv32_splat_i64], [], 1>;
-def SplatPat_simm5 : ComplexPattern<vAny, 1, "selectVSplatSimm5", [splat_vector, rv32_splat_i64], [], 2>;
-def SplatPat_uimm5 : ComplexPattern<vAny, 1, "selectVSplatUimm5", [splat_vector, rv32_splat_i64], [], 2>;
-def SplatPat_simm5_plus1
- : ComplexPattern<vAny, 1, "selectVSplatSimm5Plus1",
- [splat_vector, rv32_splat_i64], [], 2>;
-def SplatPat_simm5_plus1_nonzero
- : ComplexPattern<vAny, 1, "selectVSplatSimm5Plus1NonZero",
- [splat_vector, rv32_splat_i64], [], 2>;
-
-class SwapHelper<dag Prefix, dag A, dag B, dag Suffix, bit swap> {
- dag Value = !con(Prefix, !if(swap, B, A), !if(swap, A, B), Suffix);
-}
-
multiclass VPatUSLoadStoreSDNode<ValueType type,
int log2sew,
LMULInfo vlmul,
@@ -169,7 +142,7 @@ class VPatBinarySDNode_VF<SDNode vop,
VReg vop_reg_class,
DAGOperand xop_kind> :
Pat<(result_type (vop (vop_type vop_reg_class:$rs1),
- (vop_type (splat_vector xop_kind:$rs2)))),
+ (vop_type (SplatFPOp xop_kind:$rs2)))),
(!cast<Instruction>(instruction_name#"_"#vlmul.MX)
vop_reg_class:$rs1,
(xop_type xop_kind:$rs2),
@@ -189,7 +162,7 @@ multiclass VPatBinaryFPSDNode_VV_VF<SDNode vop, string instruction_name> {
multiclass VPatBinaryFPSDNode_R_VF<SDNode vop, string instruction_name> {
foreach fvti = AllFloatVectors in
- def : Pat<(fvti.Vector (vop (fvti.Vector (splat_vector fvti.Scalar:$rs2)),
+ def : Pat<(fvti.Vector (vop (fvti.Vector (SplatFPOp fvti.Scalar:$rs2)),
(fvti.Vector fvti.RegClass:$rs1))),
(!cast<Instruction>(instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX)
fvti.RegClass:$rs1,
@@ -197,67 +170,70 @@ multiclass VPatBinaryFPSDNode_R_VF<SDNode vop, string instruction_name> {
fvti.AVL, fvti.Log2SEW)>;
}
-multiclass VPatIntegerSetCCSDNode_VV<CondCode cc,
- string instruction_name,
- bit swap = 0> {
+multiclass VPatIntegerSetCCSDNode_VV<string instruction_name,
+ CondCode cc> {
foreach vti = AllIntegerVectors in {
defvar instruction = !cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX);
def : Pat<(vti.Mask (setcc (vti.Vector vti.RegClass:$rs1),
(vti.Vector vti.RegClass:$rs2), cc)),
- SwapHelper<(instruction),
- (instruction vti.RegClass:$rs1),
- (instruction vti.RegClass:$rs2),
- (instruction vti.AVL, vti.Log2SEW),
- swap>.Value>;
+ (instruction vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL,
+ vti.Log2SEW)>;
}
}
-multiclass VPatIntegerSetCCSDNode_XI<CondCode cc,
+multiclass VPatIntegerSetCCSDNode_VV_Swappable<string instruction_name,
+ CondCode cc, CondCode invcc>
+ : VPatIntegerSetCCSDNode_VV<instruction_name, cc> {
+ foreach vti = AllIntegerVectors in {
+ defvar instruction = !cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX);
+ def : Pat<(vti.Mask (setcc (vti.Vector vti.RegClass:$rs2),
+ (vti.Vector vti.RegClass:$rs1), invcc)),
+ (instruction vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL,
+ vti.Log2SEW)>;
+ }
+}
+
+multiclass VPatIntegerSetCCSDNode_XI<
string instruction_name,
+ CondCode cc,
string kind,
ComplexPattern SplatPatKind,
- DAGOperand xop_kind,
- bit swap = 0> {
+ DAGOperand xop_kind> {
foreach vti = AllIntegerVectors in {
defvar instruction = !cast<Instruction>(instruction_name#_#kind#_#vti.LMul.MX);
def : Pat<(vti.Mask (setcc (vti.Vector vti.RegClass:$rs1),
(vti.Vector (SplatPatKind xop_kind:$rs2)), cc)),
- SwapHelper<(instruction),
- (instruction vti.RegClass:$rs1),
- (instruction xop_kind:$rs2),
- (instruction vti.AVL, vti.Log2SEW),
- swap>.Value>;
+ (instruction vti.RegClass:$rs1, xop_kind:$rs2, vti.AVL, vti.Log2SEW)>;
}
}
-multiclass VPatIntegerSetCCSDNode_VV_VX_VI<CondCode cc,
- string instruction_name,
- bit swap = 0> {
- defm : VPatIntegerSetCCSDNode_VV<cc, instruction_name, swap>;
- defm : VPatIntegerSetCCSDNode_XI<cc, instruction_name, "VX",
- SplatPat, GPR, swap>;
- defm : VPatIntegerSetCCSDNode_XI<cc, instruction_name, "VI",
- SplatPat_simm5, simm5, swap>;
+multiclass VPatIntegerSetCCSDNode_XI_Swappable<string instruction_name,
+ CondCode cc, CondCode invcc,
+ string kind,
+ ComplexPattern SplatPatKind,
+ DAGOperand xop_kind>
+ : VPatIntegerSetCCSDNode_XI<instruction_name, cc, kind, SplatPatKind,
+ xop_kind> {
+ foreach vti = AllIntegerVectors in {
+ defvar instruction = !cast<Instruction>(instruction_name#_#kind#_#vti.LMul.MX);
+ def : Pat<(vti.Mask (setcc (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector (SplatPatKind xop_kind:$rs2)), cc)),
+ (instruction vti.RegClass:$rs1, xop_kind:$rs2, vti.AVL, vti.Log2SEW)>;
+ def : Pat<(vti.Mask (setcc (vti.Vector (SplatPatKind xop_kind:$rs2)),
+ (vti.Vector vti.RegClass:$rs1), invcc)),
+ (instruction vti.RegClass:$rs1, xop_kind:$rs2, vti.AVL, vti.Log2SEW)>;
+ }
}
-multiclass VPatIntegerSetCCSDNode_VV_VX<CondCode cc,
- string instruction_name,
- bit swap = 0> {
- defm : VPatIntegerSetCCSDNode_VV<cc, instruction_name, swap>;
- defm : VPatIntegerSetCCSDNode_XI<cc, instruction_name, "VX",
- SplatPat, GPR, swap>;
-}
+multiclass VPatIntegerSetCCSDNode_VX_Swappable<string instruction_name,
+ CondCode cc, CondCode invcc>
+ : VPatIntegerSetCCSDNode_XI_Swappable<instruction_name, cc, invcc, "VX",
+ SplatPat, GPR>;
-multiclass VPatIntegerSetCCSDNode_VX_VI<CondCode cc,
- string instruction_name,
- bit swap = 0> {
- defm : VPatIntegerSetCCSDNode_XI<cc, instruction_name, "VX",
- SplatPat, GPR, swap>;
- defm : VPatIntegerSetCCSDNode_XI<cc, instruction_name, "VI",
- SplatPat_simm5, simm5, swap>;
-}
+multiclass VPatIntegerSetCCSDNode_VI<string instruction_name, CondCode cc>
+ : VPatIntegerSetCCSDNode_XI<instruction_name, cc, "VI", SplatPat_simm5, simm5>;
-multiclass VPatIntegerSetCCSDNode_VIPlus1<CondCode cc, string instruction_name,
+multiclass VPatIntegerSetCCSDNode_VIPlus1<string instruction_name, CondCode cc,
ComplexPattern splatpat_kind> {
foreach vti = AllIntegerVectors in {
defvar instruction = !cast<Instruction>(instruction_name#"_VI_"#vti.LMul.MX);
@@ -279,12 +255,12 @@ multiclass VPatFPSetCCSDNode_VV_VF_FV<CondCode cc,
(!cast<Instruction>(inst_name#"_VV_"#fvti.LMul.MX)
fvti.RegClass:$rs1, fvti.RegClass:$rs2, fvti.AVL, fvti.Log2SEW)>;
def : Pat<(fvti.Mask (setcc (fvti.Vector fvti.RegClass:$rs1),
- (splat_vector fvti.ScalarRegClass:$rs2),
+ (SplatFPOp fvti.ScalarRegClass:$rs2),
cc)),
(!cast<Instruction>(inst_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX)
fvti.RegClass:$rs1, fvti.ScalarRegClass:$rs2,
fvti.AVL, fvti.Log2SEW)>;
- def : Pat<(fvti.Mask (setcc (splat_vector fvti.ScalarRegClass:$rs2),
+ def : Pat<(fvti.Mask (setcc (SplatFPOp fvti.ScalarRegClass:$rs2),
(fvti.Vector fvti.RegClass:$rs1),
cc)),
(!cast<Instruction>(swapped_op_inst_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX)
@@ -363,83 +339,122 @@ multiclass VPatNConvertFP2ISDNode_V<SDNode vop, string instruction_name> {
}
}
-multiclass VPatWidenBinarySDNode_VV_VX_WV_WX<SDNode op, PatFrags extop, string instruction_name> {
- foreach vti = AllWidenableIntVectors in {
- def : Pat<(op (vti.Wti.Vector (extop (vti.Vti.Vector vti.Vti.RegClass:$rs2))),
- (vti.Wti.Vector (extop (vti.Vti.Vector vti.Vti.RegClass:$rs1)))),
- (!cast<Instruction>(instruction_name#"_VV_"#vti.Vti.LMul.MX)
- vti.Vti.RegClass:$rs2, vti.Vti.RegClass:$rs1,
- vti.Vti.AVL, vti.Vti.Log2SEW)>;
- def : Pat<(op (vti.Wti.Vector (extop (vti.Vti.Vector vti.Vti.RegClass:$rs2))),
- (vti.Wti.Vector (extop (vti.Vti.Vector (SplatPat GPR:$rs1))))),
- (!cast<Instruction>(instruction_name#"_VX_"#vti.Vti.LMul.MX)
- vti.Vti.RegClass:$rs2, GPR:$rs1,
- vti.Vti.AVL, vti.Vti.Log2SEW)>;
- def : Pat<(op (vti.Wti.Vector vti.Wti.RegClass:$rs2),
- (vti.Wti.Vector (extop (vti.Vti.Vector vti.Vti.RegClass:$rs1)))),
- (!cast<Instruction>(instruction_name#"_WV_"#vti.Vti.LMul.MX)
- vti.Wti.RegClass:$rs2, vti.Vti.RegClass:$rs1,
- vti.Vti.AVL, vti.Vti.Log2SEW)>;
- def : Pat<(op (vti.Wti.Vector vti.Wti.RegClass:$rs2),
- (vti.Wti.Vector (extop (vti.Vti.Vector (SplatPat GPR:$rs1))))),
- (!cast<Instruction>(instruction_name#"_WX_"#vti.Vti.LMul.MX)
- vti.Wti.RegClass:$rs2, GPR:$rs1,
- vti.Vti.AVL, vti.Vti.Log2SEW)>;
+multiclass VPatWidenBinarySDNode_VV_VX<SDNode op, PatFrags extop1, PatFrags extop2,
+ string instruction_name> {
+ foreach vtiToWti = AllWidenableIntVectors in {
+ defvar vti = vtiToWti.Vti;
+ defvar wti = vtiToWti.Wti;
+ def : Pat<(op (wti.Vector (extop1 (vti.Vector vti.RegClass:$rs2))),
+ (wti.Vector (extop2 (vti.Vector vti.RegClass:$rs1)))),
+ (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX)
+ vti.RegClass:$rs2, vti.RegClass:$rs1, vti.AVL, vti.Log2SEW)>;
+ def : Pat<(op (wti.Vector (extop1 (vti.Vector vti.RegClass:$rs2))),
+ (wti.Vector (extop2 (vti.Vector (SplatPat GPR:$rs1))))),
+ (!cast<Instruction>(instruction_name#"_VX_"#vti.LMul.MX)
+ vti.RegClass:$rs2, GPR:$rs1, vti.AVL, vti.Log2SEW)>;
+ }
+}
+
+multiclass VPatWidenBinarySDNode_WV_WX<SDNode op, PatFrags extop,
+ string instruction_name> {
+ foreach vtiToWti = AllWidenableIntVectors in {
+ defvar vti = vtiToWti.Vti;
+ defvar wti = vtiToWti.Wti;
+ def : Pat<(op (wti.Vector wti.RegClass:$rs2),
+ (wti.Vector (extop (vti.Vector vti.RegClass:$rs1)))),
+ (!cast<Instruction>(instruction_name#"_WV_"#vti.LMul.MX)
+ wti.RegClass:$rs2, vti.RegClass:$rs1, vti.AVL, vti.Log2SEW)>;
+ def : Pat<(op (wti.Vector wti.RegClass:$rs2),
+ (wti.Vector (extop (vti.Vector (SplatPat GPR:$rs1))))),
+ (!cast<Instruction>(instruction_name#"_WX_"#vti.LMul.MX)
+ wti.RegClass:$rs2, GPR:$rs1, vti.AVL, vti.Log2SEW)>;
}
}
+multiclass VPatWidenBinarySDNode_VV_VX_WV_WX<SDNode op, PatFrags extop,
+ string instruction_name> {
+ defm : VPatWidenBinarySDNode_VV_VX<op, extop, extop, instruction_name>;
+ defm : VPatWidenBinarySDNode_WV_WX<op, extop, instruction_name>;
+}
+
multiclass VPatWidenMulAddSDNode_VV<PatFrags extop1, PatFrags extop2, string instruction_name> {
- foreach vti = AllWidenableIntVectors in {
+ foreach vtiToWti = AllWidenableIntVectors in {
+ defvar vti = vtiToWti.Vti;
+ defvar wti = vtiToWti.Wti;
def : Pat<
- (add (vti.Wti.Vector vti.Wti.RegClass:$rd),
- (mul_oneuse (vti.Wti.Vector (extop1 (vti.Vti.Vector vti.Vti.RegClass:$rs1))),
- (vti.Wti.Vector (extop2 (vti.Vti.Vector vti.Vti.RegClass:$rs2))))),
- (!cast<Instruction>(instruction_name#"_VV_"#vti.Vti.LMul.MX)
- vti.Wti.RegClass:$rd, vti.Vti.RegClass:$rs1, vti.Vti.RegClass:$rs2,
- vti.Vti.AVL, vti.Vti.Log2SEW, TAIL_AGNOSTIC
+ (add (wti.Vector wti.RegClass:$rd),
+ (mul_oneuse (wti.Vector (extop1 (vti.Vector vti.RegClass:$rs1))),
+ (wti.Vector (extop2 (vti.Vector vti.RegClass:$rs2))))),
+ (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX)
+ wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
+ vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC
)>;
}
}
multiclass VPatWidenMulAddSDNode_VX<PatFrags extop1, PatFrags extop2, string instruction_name> {
- foreach vti = AllWidenableIntVectors in {
+ foreach vtiToWti = AllWidenableIntVectors in {
+ defvar vti = vtiToWti.Vti;
+ defvar wti = vtiToWti.Wti;
def : Pat<
- (add (vti.Wti.Vector vti.Wti.RegClass:$rd),
- (mul_oneuse (vti.Wti.Vector (extop1 (vti.Vti.Vector (SplatPat GPR:$rs1)))),
- (vti.Wti.Vector (extop2 (vti.Vti.Vector vti.Vti.RegClass:$rs2))))),
- (!cast<Instruction>(instruction_name#"_VX_"#vti.Vti.LMul.MX)
- vti.Wti.RegClass:$rd, GPR:$rs1, vti.Vti.RegClass:$rs2,
- vti.Vti.AVL, vti.Vti.Log2SEW, TAIL_AGNOSTIC
+ (add (wti.Vector wti.RegClass:$rd),
+ (mul_oneuse (wti.Vector (extop1 (vti.Vector (SplatPat GPR:$rs1)))),
+ (wti.Vector (extop2 (vti.Vector vti.RegClass:$rs2))))),
+ (!cast<Instruction>(instruction_name#"_VX_"#vti.LMul.MX)
+ wti.RegClass:$rd, GPR:$rs1, vti.RegClass:$rs2,
+ vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC
)>;
}
}
multiclass VPatWidenBinaryFPSDNode_VV_VF<SDNode op, string instruction_name> {
- foreach vti = AllWidenableFloatVectors in {
- def : Pat<(op (vti.Wti.Vector (fpext_oneuse (vti.Vti.Vector vti.Vti.RegClass:$rs2))),
- (vti.Wti.Vector (fpext_oneuse (vti.Vti.Vector vti.Vti.RegClass:$rs1)))),
- (!cast<Instruction>(instruction_name#"_VV_"#vti.Vti.LMul.MX)
- vti.Vti.RegClass:$rs2, vti.Vti.RegClass:$rs1,
- vti.Vti.AVL, vti.Vti.Log2SEW)>;
- def : Pat<(op (vti.Wti.Vector (fpext_oneuse (vti.Vti.Vector vti.Vti.RegClass:$rs2))),
- (vti.Wti.Vector (fpext_oneuse (vti.Vti.Vector (SplatPat vti.Vti.ScalarRegClass:$rs1))))),
- (!cast<Instruction>(instruction_name#"_V"#vti.Vti.ScalarSuffix#"_"#vti.Vti.LMul.MX)
- vti.Vti.RegClass:$rs2, vti.Vti.ScalarRegClass:$rs1,
- vti.Vti.AVL, vti.Vti.Log2SEW)>;
+ foreach vtiToWti = AllWidenableFloatVectors in {
+ defvar vti = vtiToWti.Vti;
+ defvar wti = vtiToWti.Wti;
+ def : Pat<(op (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector vti.RegClass:$rs2),
+ (vti.Mask true_mask), (XLenVT srcvalue))),
+ (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector vti.RegClass:$rs1),
+ (vti.Mask true_mask), (XLenVT srcvalue)))),
+ (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX)
+ vti.RegClass:$rs2, vti.RegClass:$rs1, vti.AVL, vti.Log2SEW)>;
+ def : Pat<(op (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector vti.RegClass:$rs2),
+ (vti.Mask true_mask), (XLenVT srcvalue))),
+ (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)),
+ (vti.Mask true_mask), (XLenVT srcvalue)))),
+ (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ vti.RegClass:$rs2, vti.ScalarRegClass:$rs1, vti.AVL, vti.Log2SEW)>;
+ def : Pat<(op (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector vti.RegClass:$rs2),
+ (vti.Mask true_mask), (XLenVT srcvalue))),
+ (wti.Vector (SplatFPOp (fpext_oneuse vti.ScalarRegClass:$rs1)))),
+ (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ vti.RegClass:$rs2, vti.ScalarRegClass:$rs1, vti.AVL, vti.Log2SEW)>;
}
}
multiclass VPatWidenBinaryFPSDNode_WV_WF<SDNode op, string instruction_name> {
- foreach vti = AllWidenableFloatVectors in {
- def : Pat<(op (vti.Wti.Vector vti.Wti.RegClass:$rs2),
- (vti.Wti.Vector (fpext_oneuse (vti.Vti.Vector vti.Vti.RegClass:$rs1)))),
- (!cast<Instruction>(instruction_name#"_WV_"#vti.Vti.LMul.MX)
- vti.Wti.RegClass:$rs2, vti.Vti.RegClass:$rs1,
- vti.Vti.AVL, vti.Vti.Log2SEW)>;
- def : Pat<(op (vti.Wti.Vector vti.Wti.RegClass:$rs2),
- (vti.Wti.Vector (fpext_oneuse (vti.Vti.Vector (SplatPat vti.Vti.ScalarRegClass:$rs1))))),
- (!cast<Instruction>(instruction_name#"_W"#vti.Vti.ScalarSuffix#"_"#vti.Vti.LMul.MX)
- vti.Wti.RegClass:$rs2, vti.Vti.ScalarRegClass:$rs1,
- vti.Vti.AVL, vti.Vti.Log2SEW)>;
+ foreach vtiToWti = AllWidenableFloatVectors in {
+ defvar vti = vtiToWti.Vti;
+ defvar wti = vtiToWti.Wti;
+ def : Pat<(op (wti.Vector wti.RegClass:$rs2),
+ (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector vti.RegClass:$rs1),
+ (vti.Mask true_mask), (XLenVT srcvalue)))),
+ (!cast<Instruction>(instruction_name#"_WV_"#vti.LMul.MX)
+ wti.RegClass:$rs2, vti.RegClass:$rs1, vti.AVL, vti.Log2SEW)>;
+ def : Pat<(op (wti.Vector wti.RegClass:$rs2),
+ (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)),
+ (vti.Mask true_mask), (XLenVT srcvalue)))),
+ (!cast<Instruction>(instruction_name#"_W"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ wti.RegClass:$rs2, vti.ScalarRegClass:$rs1, vti.AVL, vti.Log2SEW)>;
+ def : Pat<(op (wti.Vector wti.RegClass:$rs2),
+ (wti.Vector (SplatFPOp (fpext_oneuse vti.ScalarRegClass:$rs1)))),
+ (!cast<Instruction>(instruction_name#"_W"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ wti.RegClass:$rs2, vti.ScalarRegClass:$rs1, vti.AVL, vti.Log2SEW)>;
}
}
@@ -448,6 +463,148 @@ multiclass VPatWidenBinaryFPSDNode_VV_VF_WV_WF<SDNode op, string instruction_nam
defm : VPatWidenBinaryFPSDNode_WV_WF<op, instruction_name>;
}
+multiclass VPatWidenFPMulAccSDNode_VV_VF<string instruction_name> {
+ foreach vtiToWti = AllWidenableFloatVectors in {
+ defvar vti = vtiToWti.Vti;
+ defvar wti = vtiToWti.Wti;
+ def : Pat<(fma (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector vti.RegClass:$rs1),
+ (vti.Mask true_mask), (XLenVT srcvalue))),
+ (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector vti.RegClass:$rs2),
+ (vti.Mask true_mask), (XLenVT srcvalue))),
+ (wti.Vector wti.RegClass:$rd)),
+ (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX)
+ wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
+ vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(fma (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)),
+ (vti.Mask true_mask), (XLenVT srcvalue))),
+ (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector vti.RegClass:$rs2),
+ (vti.Mask true_mask), (XLenVT srcvalue))),
+ (wti.Vector wti.RegClass:$rd)),
+ (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
+ vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ }
+}
+
+multiclass VPatWidenFPNegMulAccSDNode_VV_VF<string instruction_name> {
+ foreach vtiToWti = AllWidenableFloatVectors in {
+ defvar vti = vtiToWti.Vti;
+ defvar wti = vtiToWti.Wti;
+ def : Pat<(fma (fneg (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector vti.RegClass:$rs1),
+ (vti.Mask true_mask), (XLenVT srcvalue)))),
+ (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2),
+ (vti.Mask true_mask), (XLenVT srcvalue)),
+ (fneg wti.RegClass:$rd)),
+ (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX)
+ wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
+ vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(fma (riscv_fpextend_vl_oneuse
+ (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)),
+ (vti.Mask true_mask), (XLenVT srcvalue)),
+ (fneg (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector vti.RegClass:$rs2),
+ (vti.Mask true_mask), (XLenVT srcvalue)))),
+ (fneg wti.RegClass:$rd)),
+ (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
+ vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(fma (fneg (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)),
+ (vti.Mask true_mask), (XLenVT srcvalue)))),
+ (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2),
+ (vti.Mask true_mask), (XLenVT srcvalue)),
+ (fneg wti.RegClass:$rd)),
+ (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
+ vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ }
+}
+
+multiclass VPatWidenFPMulSacSDNode_VV_VF<string instruction_name> {
+ foreach vtiToWti = AllWidenableFloatVectors in {
+ defvar vti = vtiToWti.Vti;
+ defvar wti = vtiToWti.Wti;
+ def : Pat<(fma (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector vti.RegClass:$rs1),
+ (vti.Mask true_mask), (XLenVT srcvalue))),
+ (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2),
+ (vti.Mask true_mask), (XLenVT srcvalue)),
+ (fneg wti.RegClass:$rd)),
+ (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX)
+ wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
+ vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(fma (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)),
+ (vti.Mask true_mask), (XLenVT srcvalue))),
+ (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2),
+ (vti.Mask true_mask), (XLenVT srcvalue)),
+ (fneg wti.RegClass:$rd)),
+ (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
+ vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ }
+}
+
+multiclass VPatWidenFPNegMulSacSDNode_VV_VF<string instruction_name> {
+ foreach vtiToWti = AllWidenableFloatVectors in {
+ defvar vti = vtiToWti.Vti;
+ defvar wti = vtiToWti.Wti;
+ def : Pat<(fma (fneg (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector vti.RegClass:$rs1),
+ (vti.Mask true_mask), (XLenVT srcvalue)))),
+ (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2),
+ (vti.Mask true_mask), (XLenVT srcvalue)),
+ wti.RegClass:$rd),
+ (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX)
+ wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
+ vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(fma (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)),
+ (vti.Mask true_mask), (XLenVT srcvalue))),
+ (fneg (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector vti.RegClass:$rs2),
+ (vti.Mask true_mask), (XLenVT srcvalue)))),
+ wti.RegClass:$rd),
+ (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
+ vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(fma (fneg (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)),
+ (vti.Mask true_mask), (XLenVT srcvalue)))),
+ (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2),
+ (vti.Mask true_mask), (XLenVT srcvalue)),
+ wti.RegClass:$rd),
+ (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
+ vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ }
+}
+
+multiclass VPatMultiplyAddSDNode_VV_VX<SDNode op, string instruction_name> {
+ foreach vti = AllIntegerVectors in {
+ defvar suffix = vti.LMul.MX;
+ // NOTE: We choose VMADD because it has the most commuting freedom. So it
+ // works best with how TwoAddressInstructionPass tries commuting.
+ def : Pat<(vti.Vector (op vti.RegClass:$rs2,
+ (mul_oneuse vti.RegClass:$rs1, vti.RegClass:$rd))),
+ (!cast<Instruction>(instruction_name#"_VV_"# suffix)
+ vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
+ vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ // The choice of VMADD here is arbitrary, vmadd.vx and vmacc.vx are equally
+ // commutable.
+ def : Pat<(vti.Vector (op vti.RegClass:$rs2,
+ (mul_oneuse (SplatPat XLenVT:$rs1), vti.RegClass:$rd))),
+ (!cast<Instruction>(instruction_name#"_VX_" # suffix)
+ vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
+ vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ }
+}
+
//===----------------------------------------------------------------------===//
// Patterns.
//===----------------------------------------------------------------------===//
@@ -520,42 +677,45 @@ defm : VPatBinarySDNode_VV_VX_VI<sra, "PseudoVSRA", uimm5>;
foreach vti = AllIntegerVectors in {
// Emit shift by 1 as an add since it might be faster.
def : Pat<(shl (vti.Vector vti.RegClass:$rs1),
- (vti.Vector (splat_vector (XLenVT 1)))),
- (!cast<Instruction>("PseudoVADD_VV_"# vti.LMul.MX)
- vti.RegClass:$rs1, vti.RegClass:$rs1, vti.AVL, vti.Log2SEW)>;
-}
-foreach vti = [VI64M1, VI64M2, VI64M4, VI64M8] in {
- def : Pat<(shl (vti.Vector vti.RegClass:$rs1),
- (vti.Vector (rv32_splat_i64 (XLenVT 1)))),
+ (vti.Vector (riscv_vmv_v_x_vl (vti.Vector undef), 1, (XLenVT srcvalue)))),
(!cast<Instruction>("PseudoVADD_VV_"# vti.LMul.MX)
vti.RegClass:$rs1, vti.RegClass:$rs1, vti.AVL, vti.Log2SEW)>;
}
// 12.8. Vector Integer Comparison Instructions
-defm : VPatIntegerSetCCSDNode_VV_VX_VI<SETEQ, "PseudoVMSEQ">;
-defm : VPatIntegerSetCCSDNode_VV_VX_VI<SETNE, "PseudoVMSNE">;
-
-defm : VPatIntegerSetCCSDNode_VV_VX<SETLT, "PseudoVMSLT">;
-defm : VPatIntegerSetCCSDNode_VV_VX<SETULT, "PseudoVMSLTU">;
-defm : VPatIntegerSetCCSDNode_VIPlus1<SETLT, "PseudoVMSLE",
+defm : VPatIntegerSetCCSDNode_VV<"PseudoVMSEQ", SETEQ>;
+defm : VPatIntegerSetCCSDNode_VV<"PseudoVMSNE", SETNE>;
+
+defm : VPatIntegerSetCCSDNode_VV_Swappable<"PseudoVMSLT", SETLT, SETGT>;
+defm : VPatIntegerSetCCSDNode_VV_Swappable<"PseudoVMSLTU", SETULT, SETUGT>;
+defm : VPatIntegerSetCCSDNode_VV_Swappable<"PseudoVMSLE", SETLE, SETGE>;
+defm : VPatIntegerSetCCSDNode_VV_Swappable<"PseudoVMSLEU", SETULE, SETUGE>;
+
+defm : VPatIntegerSetCCSDNode_VX_Swappable<"PseudoVMSEQ", SETEQ, SETEQ>;
+defm : VPatIntegerSetCCSDNode_VX_Swappable<"PseudoVMSNE", SETNE, SETNE>;
+defm : VPatIntegerSetCCSDNode_VX_Swappable<"PseudoVMSLT", SETLT, SETGT>;
+defm : VPatIntegerSetCCSDNode_VX_Swappable<"PseudoVMSLTU", SETULT, SETUGT>;
+defm : VPatIntegerSetCCSDNode_VX_Swappable<"PseudoVMSLE", SETLE, SETGE>;
+defm : VPatIntegerSetCCSDNode_VX_Swappable<"PseudoVMSLEU", SETULE, SETUGE>;
+defm : VPatIntegerSetCCSDNode_VX_Swappable<"PseudoVMSGT", SETGT, SETLT>;
+defm : VPatIntegerSetCCSDNode_VX_Swappable<"PseudoVMSGTU", SETUGT, SETULT>;
+// There is no VMSGE(U)_VX instruction
+
+defm : VPatIntegerSetCCSDNode_VI<"PseudoVMSEQ", SETEQ>;
+defm : VPatIntegerSetCCSDNode_VI<"PseudoVMSNE", SETNE>;
+defm : VPatIntegerSetCCSDNode_VI<"PseudoVMSLE", SETLE>;
+defm : VPatIntegerSetCCSDNode_VI<"PseudoVMSLEU", SETULE>;
+defm : VPatIntegerSetCCSDNode_VI<"PseudoVMSGT", SETGT>;
+defm : VPatIntegerSetCCSDNode_VI<"PseudoVMSGTU", SETUGT>;
+
+defm : VPatIntegerSetCCSDNode_VIPlus1<"PseudoVMSLE", SETLT,
SplatPat_simm5_plus1_nonzero>;
-defm : VPatIntegerSetCCSDNode_VIPlus1<SETULT, "PseudoVMSLEU",
+defm : VPatIntegerSetCCSDNode_VIPlus1<"PseudoVMSLEU", SETULT,
SplatPat_simm5_plus1_nonzero>;
-
-defm : VPatIntegerSetCCSDNode_VV<SETGT, "PseudoVMSLT", /*swap*/1>;
-defm : VPatIntegerSetCCSDNode_VV<SETUGT, "PseudoVMSLTU", /*swap*/1>;
-defm : VPatIntegerSetCCSDNode_VX_VI<SETGT, "PseudoVMSGT">;
-defm : VPatIntegerSetCCSDNode_VX_VI<SETUGT, "PseudoVMSGTU">;
-
-defm : VPatIntegerSetCCSDNode_VV_VX_VI<SETLE, "PseudoVMSLE">;
-defm : VPatIntegerSetCCSDNode_VV_VX_VI<SETULE, "PseudoVMSLEU">;
-
-defm : VPatIntegerSetCCSDNode_VV<SETGE, "PseudoVMSLE", /*swap*/1>;
-defm : VPatIntegerSetCCSDNode_VV<SETUGE, "PseudoVMSLEU", /*swap*/1>;
-defm : VPatIntegerSetCCSDNode_VIPlus1<SETGE, "PseudoVMSGT",
+defm : VPatIntegerSetCCSDNode_VIPlus1<"PseudoVMSGT", SETGE,
SplatPat_simm5_plus1>;
-defm : VPatIntegerSetCCSDNode_VIPlus1<SETUGE, "PseudoVMSGTU",
+defm : VPatIntegerSetCCSDNode_VIPlus1<"PseudoVMSGTU", SETUGE,
SplatPat_simm5_plus1_nonzero>;
// 12.9. Vector Integer Min/Max Instructions
@@ -575,37 +735,23 @@ defm : VPatBinarySDNode_VV_VX<sdiv, "PseudoVDIV">;
defm : VPatBinarySDNode_VV_VX<urem, "PseudoVREMU">;
defm : VPatBinarySDNode_VV_VX<srem, "PseudoVREM">;
-// 12.13 Vector Single-Width Integer Multiply-Add Instructions.
-foreach vti = AllIntegerVectors in {
- // NOTE: We choose VMADD because it has the most commuting freedom. So it
- // works best with how TwoAddressInstructionPass tries commuting.
- defvar suffix = vti.LMul.MX;
- def : Pat<(vti.Vector (add vti.RegClass:$rs2,
- (mul_oneuse vti.RegClass:$rs1, vti.RegClass:$rd))),
- (!cast<Instruction>("PseudoVMADD_VV_"# suffix)
- vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(vti.Vector (sub vti.RegClass:$rs2,
- (mul_oneuse vti.RegClass:$rs1, vti.RegClass:$rd))),
- (!cast<Instruction>("PseudoVNMSUB_VV_"# suffix)
- vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
+// 12.12. Vector Widening Integer Multiply Instructions
+defm : VPatWidenBinarySDNode_VV_VX<mul, sext_oneuse, sext_oneuse,
+ "PseudoVWMUL">;
+defm : VPatWidenBinarySDNode_VV_VX<mul, zext_oneuse, zext_oneuse,
+ "PseudoVWMULU">;
+defm : VPatWidenBinarySDNode_VV_VX<mul, anyext_oneuse, anyext_oneuse,
+ "PseudoVWMULU">;
+defm : VPatWidenBinarySDNode_VV_VX<mul, zext_oneuse, anyext_oneuse,
+ "PseudoVWMULU">;
+defm : VPatWidenBinarySDNode_VV_VX<mul, sext_oneuse, zext_oneuse,
+ "PseudoVWMULSU">;
+defm : VPatWidenBinarySDNode_VV_VX<mul, sext_oneuse, anyext_oneuse,
+ "PseudoVWMULSU">;
- // The choice of VMADD here is arbitrary, vmadd.vx and vmacc.vx are equally
- // commutable.
- def : Pat<(vti.Vector (add vti.RegClass:$rs2,
- (mul_oneuse (SplatPat XLenVT:$rs1),
- vti.RegClass:$rd))),
- (!cast<Instruction>("PseudoVMADD_VX_" # suffix)
- vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(vti.Vector (sub vti.RegClass:$rs2,
- (mul_oneuse (SplatPat XLenVT:$rs1),
- vti.RegClass:$rd))),
- (!cast<Instruction>("PseudoVNMSUB_VX_" # suffix)
- vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
-}
+// 12.13 Vector Single-Width Integer Multiply-Add Instructions.
+defm : VPatMultiplyAddSDNode_VV_VX<add, "PseudoVMADD">;
+defm : VPatMultiplyAddSDNode_VV_VX<sub, "PseudoVNMSUB">;
// 12.14 Vector Widening Integer Multiply-Add Instructions
defm : VPatWidenMulAddSDNode_VV<sext_oneuse, sext_oneuse, "PseudoVWMACC">;
@@ -725,41 +871,47 @@ foreach fvti = AllFloatVectors in {
// The choice of VFMADD here is arbitrary, vfmadd.vf and vfmacc.vf are equally
// commutable.
- def : Pat<(fvti.Vector (fma (splat_vector fvti.ScalarRegClass:$rs1),
+ def : Pat<(fvti.Vector (fma (SplatFPOp fvti.ScalarRegClass:$rs1),
fvti.RegClass:$rd, fvti.RegClass:$rs2)),
(!cast<Instruction>("PseudoVFMADD_V" # fvti.ScalarSuffix # "_" # suffix)
fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2,
fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(fvti.Vector (fma (splat_vector fvti.ScalarRegClass:$rs1),
+ def : Pat<(fvti.Vector (fma (SplatFPOp fvti.ScalarRegClass:$rs1),
fvti.RegClass:$rd, (fneg fvti.RegClass:$rs2))),
(!cast<Instruction>("PseudoVFMSUB_V" # fvti.ScalarSuffix # "_" # suffix)
fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2,
fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(fvti.Vector (fma (splat_vector fvti.ScalarRegClass:$rs1),
+ def : Pat<(fvti.Vector (fma (SplatFPOp fvti.ScalarRegClass:$rs1),
(fneg fvti.RegClass:$rd), (fneg fvti.RegClass:$rs2))),
(!cast<Instruction>("PseudoVFNMADD_V" # fvti.ScalarSuffix # "_" # suffix)
fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2,
fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(fvti.Vector (fma (splat_vector fvti.ScalarRegClass:$rs1),
+ def : Pat<(fvti.Vector (fma (SplatFPOp fvti.ScalarRegClass:$rs1),
(fneg fvti.RegClass:$rd), fvti.RegClass:$rs2)),
(!cast<Instruction>("PseudoVFNMSUB_V" # fvti.ScalarSuffix # "_" # suffix)
fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2,
fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
// The splat might be negated.
- def : Pat<(fvti.Vector (fma (fneg (splat_vector fvti.ScalarRegClass:$rs1)),
+ def : Pat<(fvti.Vector (fma (fneg (SplatFPOp fvti.ScalarRegClass:$rs1)),
fvti.RegClass:$rd, (fneg fvti.RegClass:$rs2))),
(!cast<Instruction>("PseudoVFNMADD_V" # fvti.ScalarSuffix # "_" # suffix)
fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2,
fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(fvti.Vector (fma (fneg (splat_vector fvti.ScalarRegClass:$rs1)),
+ def : Pat<(fvti.Vector (fma (fneg (SplatFPOp fvti.ScalarRegClass:$rs1)),
fvti.RegClass:$rd, fvti.RegClass:$rs2)),
(!cast<Instruction>("PseudoVFNMSUB_V" # fvti.ScalarSuffix # "_" # suffix)
fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2,
fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
}
+// 14.7. Vector Widening Floating-Point Fused Multiply-Add Instructions
+defm : VPatWidenFPMulAccSDNode_VV_VF<"PseudoVFWMACC">;
+defm : VPatWidenFPNegMulAccSDNode_VV_VF<"PseudoVFWNMACC">;
+defm : VPatWidenFPMulSacSDNode_VV_VF<"PseudoVFWMSAC">;
+defm : VPatWidenFPNegMulSacSDNode_VV_VF<"PseudoVFWNMSAC">;
+
foreach vti = AllFloatVectors in {
// 14.8. Vector Floating-Point Square-Root Instruction
def : Pat<(fsqrt (vti.Vector vti.RegClass:$rs2)),
@@ -780,7 +932,7 @@ foreach vti = AllFloatVectors in {
(!cast<Instruction>("PseudoVFSGNJ_VV_"# vti.LMul.MX)
vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW)>;
def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
- (vti.Vector (splat_vector vti.ScalarRegClass:$rs2)))),
+ (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs2)))),
(!cast<Instruction>("PseudoVFSGNJ_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW)>;
@@ -789,7 +941,7 @@ foreach vti = AllFloatVectors in {
(!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX)
vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW)>;
def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
- (vti.Vector (fneg (splat_vector vti.ScalarRegClass:$rs2))))),
+ (vti.Vector (fneg (SplatFPOp vti.ScalarRegClass:$rs2))))),
(!cast<Instruction>("PseudoVFSGNJN_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW)>;
}
@@ -822,7 +974,7 @@ foreach fvti = AllFloatVectors in {
fvti.AVL, fvti.Log2SEW)>;
def : Pat<(fvti.Vector (vselect (fvti.Mask V0),
- (splat_vector fvti.ScalarRegClass:$rs1),
+ (SplatFPOp fvti.ScalarRegClass:$rs1),
fvti.RegClass:$rs2)),
(!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX)
fvti.RegClass:$rs2,
@@ -830,7 +982,7 @@ foreach fvti = AllFloatVectors in {
(fvti.Mask V0), fvti.AVL, fvti.Log2SEW)>;
def : Pat<(fvti.Vector (vselect (fvti.Mask V0),
- (splat_vector (fvti.Scalar fpimm0)),
+ (SplatFPOp (fvti.Scalar fpimm0)),
fvti.RegClass:$rs2)),
(!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX)
fvti.RegClass:$rs2, 0, (fvti.Mask V0), fvti.AVL, fvti.Log2SEW)>;
@@ -847,13 +999,6 @@ defm : VPatWConvertFP2ISDNode_V<fp_to_sint, "PseudoVFWCVT_RTZ_X_F_V">;
defm : VPatWConvertFP2ISDNode_V<fp_to_uint, "PseudoVFWCVT_RTZ_XU_F_V">;
defm : VPatWConvertI2FPSDNode_V<sint_to_fp, "PseudoVFWCVT_F_X_V">;
defm : VPatWConvertI2FPSDNode_V<uint_to_fp, "PseudoVFWCVT_F_XU_V">;
-foreach fvtiToFWti = AllWidenableFloatVectors in {
- defvar fvti = fvtiToFWti.Vti;
- defvar fwti = fvtiToFWti.Wti;
- def : Pat<(fwti.Vector (fpextend (fvti.Vector fvti.RegClass:$rs1))),
- (!cast<Instruction>("PseudoVFWCVT_F_F_V_"#fvti.LMul.MX)
- fvti.RegClass:$rs1, fvti.AVL, fvti.Log2SEW)>;
-}
// 14.19. Narrowing Floating-Point/Integer Type-Convert Instructions
defm : VPatNConvertFP2ISDNode_V<fp_to_sint, "PseudoVFNCVT_RTZ_X_F_W">;
@@ -873,25 +1018,14 @@ foreach fvtiToFWti = AllWidenableFloatVectors in {
// Vector Splats
//===----------------------------------------------------------------------===//
-let Predicates = [HasVInstructions] in {
-foreach vti = AllIntegerVectors in {
- def : Pat<(vti.Vector (SplatPat GPR:$rs1)),
- (!cast<Instruction>("PseudoVMV_V_X_" # vti.LMul.MX)
- GPR:$rs1, vti.AVL, vti.Log2SEW)>;
- def : Pat<(vti.Vector (SplatPat_simm5 simm5:$rs1)),
- (!cast<Instruction>("PseudoVMV_V_I_" # vti.LMul.MX)
- simm5:$rs1, vti.AVL, vti.Log2SEW)>;
-}
-} // Predicates = [HasVInstructions]
-
let Predicates = [HasVInstructionsAnyF] in {
foreach fvti = AllFloatVectors in {
- def : Pat<(fvti.Vector (splat_vector fvti.ScalarRegClass:$rs1)),
+ def : Pat<(fvti.Vector (SplatFPOp fvti.ScalarRegClass:$rs1)),
(!cast<Instruction>("PseudoVFMV_V_"#fvti.ScalarSuffix#"_"#fvti.LMul.MX)
(fvti.Scalar fvti.ScalarRegClass:$rs1),
fvti.AVL, fvti.Log2SEW)>;
- def : Pat<(fvti.Vector (splat_vector (fvti.Scalar fpimm0))),
+ def : Pat<(fvti.Vector (SplatFPOp (fvti.Scalar fpimm0))),
(!cast<Instruction>("PseudoVMV_V_I_"#fvti.LMul.MX)
0, fvti.AVL, fvti.Log2SEW)>;
}
@@ -902,6 +1036,13 @@ foreach fvti = AllFloatVectors in {
//===----------------------------------------------------------------------===//
let Predicates = [HasVInstructionsAnyF] in
foreach vti = AllFloatVectors in {
+ // Fold store of vmv.f.s to a vse with VL=1.
+ defvar store_instr = !cast<Instruction>("PseudoVSE"#vti.SEW#"_V_"#vti.LMul.MX);
+ def : Pat<(store (vti.Scalar (int_riscv_vfmv_f_s (vti.Vector vti.RegClass:$rs2))), BaseAddr:$rs1),
+ (store_instr vti.RegClass:$rs2, BaseAddr:$rs1, 1, vti.Log2SEW)>;
+ def : Pat<(store (extractelt (vti.Vector vti.RegClass:$rs2), 0), BaseAddr:$rs1),
+ (store_instr vti.RegClass:$rs2, BaseAddr:$rs1, 1, vti.Log2SEW)>;
+
defvar vmv_f_s_inst = !cast<Instruction>(!strconcat("PseudoVFMV_",
vti.ScalarSuffix,
"_S_", vti.LMul.MX));
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index e71c498fd5f4..081f61617d59 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -8,8 +8,7 @@
///
/// This file contains the required infrastructure and VL patterns to
/// support code generation for the standard 'V' (Vector) extension, version
-/// 0.10. This version is still experimental as the 'V' extension hasn't been
-/// ratified yet.
+/// version 1.0.
///
/// This file is included from and depends upon RISCVInstrInfoVPseudos.td
///
@@ -22,11 +21,6 @@
// Helpers to define the VL patterns.
//===----------------------------------------------------------------------===//
-def SDT_RISCVVLE_VL : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisPtrTy<1>,
- SDTCisVT<2, XLenVT>]>;
-def SDT_RISCVVSE_VL : SDTypeProfile<0, 3, [SDTCisVec<0>, SDTCisPtrTy<1>,
- SDTCisVT<2, XLenVT>]>;
-
def SDT_RISCVIntBinOp_VL : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,
SDTCisVec<0>, SDTCisInt<0>,
@@ -47,13 +41,15 @@ def SDT_RISCVFPBinOp_VL : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
SDTCisVT<4, XLenVT>]>;
def riscv_vmv_v_x_vl : SDNode<"RISCVISD::VMV_V_X_VL",
- SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<0>,
- SDTCisVT<1, XLenVT>,
- SDTCisVT<2, XLenVT>]>>;
+ SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<0>,
+ SDTCisSameAs<0, 1>,
+ SDTCisVT<2, XLenVT>,
+ SDTCisVT<3, XLenVT>]>>;
def riscv_vfmv_v_f_vl : SDNode<"RISCVISD::VFMV_V_F_VL",
- SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisFP<0>,
- SDTCisEltOfVec<1, 0>,
- SDTCisVT<2, XLenVT>]>>;
+ SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisFP<0>,
+ SDTCisSameAs<0, 1>,
+ SDTCisEltOfVec<2, 0>,
+ SDTCisVT<3, XLenVT>]>>;
def riscv_vmv_s_x_vl : SDNode<"RISCVISD::VMV_S_X_VL",
SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>,
SDTCisInt<0>,
@@ -65,11 +61,6 @@ def riscv_vfmv_s_f_vl : SDNode<"RISCVISD::VFMV_S_F_VL",
SDTCisEltOfVec<2, 0>,
SDTCisVT<3, XLenVT>]>>;
-def riscv_vle_vl : SDNode<"RISCVISD::VLE_VL", SDT_RISCVVLE_VL,
- [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
-def riscv_vse_vl : SDNode<"RISCVISD::VSE_VL", SDT_RISCVVSE_VL,
- [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
-
def riscv_add_vl : SDNode<"RISCVISD::ADD_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
def riscv_sub_vl : SDNode<"RISCVISD::SUB_VL", SDT_RISCVIntBinOp_VL>;
def riscv_mul_vl : SDNode<"RISCVISD::MUL_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
@@ -113,7 +104,10 @@ def SDT_RISCVVecFMA_VL : SDTypeProfile<1, 5, [SDTCisSameAs<0, 1>,
SDTCVecEltisVT<4, i1>,
SDTCisSameNumEltsAs<0, 4>,
SDTCisVT<5, XLenVT>]>;
-def riscv_fma_vl : SDNode<"RISCVISD::FMA_VL", SDT_RISCVVecFMA_VL, [SDNPCommutative]>;
+def riscv_vfmadd_vl : SDNode<"RISCVISD::VFMADD_VL", SDT_RISCVVecFMA_VL, [SDNPCommutative]>;
+def riscv_vfnmadd_vl : SDNode<"RISCVISD::VFNMADD_VL", SDT_RISCVVecFMA_VL, [SDNPCommutative]>;
+def riscv_vfmsub_vl : SDNode<"RISCVISD::VFMSUB_VL", SDT_RISCVVecFMA_VL, [SDNPCommutative]>;
+def riscv_vfnmsub_vl : SDNode<"RISCVISD::VFNMSUB_VL", SDT_RISCVVecFMA_VL, [SDNPCommutative]>;
def SDT_RISCVFPRoundOp_VL : SDTypeProfile<1, 3, [
SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<0, 1>, SDTCisSameNumEltsAs<0, 1>,
@@ -152,30 +146,33 @@ def riscv_setcc_vl : SDNode<"RISCVISD::SETCC_VL",
SDTCisVT<5, XLenVT>]>>;
def riscv_vrgather_vx_vl : SDNode<"RISCVISD::VRGATHER_VX_VL",
- SDTypeProfile<1, 4, [SDTCisVec<0>,
+ SDTypeProfile<1, 5, [SDTCisVec<0>,
SDTCisSameAs<0, 1>,
SDTCisVT<2, XLenVT>,
SDTCVecEltisVT<3, i1>,
SDTCisSameNumEltsAs<0, 3>,
- SDTCisVT<4, XLenVT>]>>;
+ SDTCisSameAs<0, 4>,
+ SDTCisVT<5, XLenVT>]>>;
def riscv_vrgather_vv_vl : SDNode<"RISCVISD::VRGATHER_VV_VL",
- SDTypeProfile<1, 4, [SDTCisVec<0>,
+ SDTypeProfile<1, 5, [SDTCisVec<0>,
SDTCisSameAs<0, 1>,
SDTCisInt<2>,
SDTCisSameNumEltsAs<0, 2>,
SDTCisSameSizeAs<0, 2>,
SDTCVecEltisVT<3, i1>,
SDTCisSameNumEltsAs<0, 3>,
- SDTCisVT<4, XLenVT>]>>;
+ SDTCisSameAs<0, 4>,
+ SDTCisVT<5, XLenVT>]>>;
def riscv_vrgatherei16_vv_vl : SDNode<"RISCVISD::VRGATHEREI16_VV_VL",
- SDTypeProfile<1, 4, [SDTCisVec<0>,
+ SDTypeProfile<1, 5, [SDTCisVec<0>,
SDTCisSameAs<0, 1>,
SDTCisInt<2>,
SDTCVecEltisVT<2, i16>,
SDTCisSameNumEltsAs<0, 2>,
SDTCVecEltisVT<3, i1>,
SDTCisSameNumEltsAs<0, 3>,
- SDTCisVT<4, XLenVT>]>>;
+ SDTCisSameAs<0, 4>,
+ SDTCisVT<5, XLenVT>]>>;
def SDT_RISCVSelect_VL : SDTypeProfile<1, 4, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisSameNumEltsAs<0, 1>, SDTCVecEltisVT<1, i1>,
@@ -185,6 +182,11 @@ def SDT_RISCVSelect_VL : SDTypeProfile<1, 4, [
def riscv_vselect_vl : SDNode<"RISCVISD::VSELECT_VL", SDT_RISCVSelect_VL>;
def riscv_vp_merge_vl : SDNode<"RISCVISD::VP_MERGE_VL", SDT_RISCVSelect_VL>;
+def SDT_RISCVVMSETCLR_VL : SDTypeProfile<1, 1, [SDTCVecEltisVT<0, i1>,
+ SDTCisVT<1, XLenVT>]>;
+def riscv_vmclr_vl : SDNode<"RISCVISD::VMCLR_VL", SDT_RISCVVMSETCLR_VL>;
+def riscv_vmset_vl : SDNode<"RISCVISD::VMSET_VL", SDT_RISCVVMSETCLR_VL>;
+
def SDT_RISCVMaskBinOp_VL : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,
SDTCVecEltisVT<0, i1>,
@@ -229,7 +231,22 @@ def SDT_RISCVVWBinOp_VL : SDTypeProfile<1, 4, [SDTCisVec<0>,
def riscv_vwmul_vl : SDNode<"RISCVISD::VWMUL_VL", SDT_RISCVVWBinOp_VL, [SDNPCommutative]>;
def riscv_vwmulu_vl : SDNode<"RISCVISD::VWMULU_VL", SDT_RISCVVWBinOp_VL, [SDNPCommutative]>;
def riscv_vwmulsu_vl : SDNode<"RISCVISD::VWMULSU_VL", SDT_RISCVVWBinOp_VL>;
+def riscv_vwadd_vl : SDNode<"RISCVISD::VWADD_VL", SDT_RISCVVWBinOp_VL, [SDNPCommutative]>;
def riscv_vwaddu_vl : SDNode<"RISCVISD::VWADDU_VL", SDT_RISCVVWBinOp_VL, [SDNPCommutative]>;
+def riscv_vwsub_vl : SDNode<"RISCVISD::VWSUB_VL", SDT_RISCVVWBinOp_VL, [SDNPCommutative]>;
+def riscv_vwsubu_vl : SDNode<"RISCVISD::VWSUBU_VL", SDT_RISCVVWBinOp_VL, [SDNPCommutative]>;
+
+def SDT_RISCVVWBinOpW_VL : SDTypeProfile<1, 4, [SDTCisVec<0>,
+ SDTCisSameAs<0, 1>,
+ SDTCisSameNumEltsAs<1, 2>,
+ SDTCisOpSmallerThanOp<2, 1>,
+ SDTCisSameNumEltsAs<1, 3>,
+ SDTCVecEltisVT<3, i1>,
+ SDTCisVT<4, XLenVT>]>;
+def riscv_vwadd_w_vl : SDNode<"RISCVISD::VWADD_W_VL", SDT_RISCVVWBinOpW_VL>;
+def riscv_vwaddu_w_vl : SDNode<"RISCVISD::VWADDU_W_VL", SDT_RISCVVWBinOpW_VL>;
+def riscv_vwsub_w_vl : SDNode<"RISCVISD::VWSUB_W_VL", SDT_RISCVVWBinOpW_VL>;
+def riscv_vwsubu_w_vl : SDNode<"RISCVISD::VWSUBU_W_VL", SDT_RISCVVWBinOpW_VL>;
def SDTRVVVecReduce : SDTypeProfile<1, 5, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisSameAs<0, 3>,
@@ -254,45 +271,69 @@ def riscv_vwmulu_vl_oneuse : PatFrag<(ops node:$A, node:$B, node:$C, node:$D),
return N->hasOneUse();
}]>;
+def riscv_vwmulsu_vl_oneuse : PatFrag<(ops node:$A, node:$B, node:$C, node:$D),
+ (riscv_vwmulsu_vl node:$A, node:$B, node:$C,
+ node:$D), [{
+ return N->hasOneUse();
+}]>;
+
+def riscv_sext_vl_oneuse : PatFrag<(ops node:$A, node:$B, node:$C),
+ (riscv_sext_vl node:$A, node:$B, node:$C), [{
+ return N->hasOneUse();
+}]>;
+
+def riscv_zext_vl_oneuse : PatFrag<(ops node:$A, node:$B, node:$C),
+ (riscv_zext_vl node:$A, node:$B, node:$C), [{
+ return N->hasOneUse();
+}]>;
+
+def riscv_fpextend_vl_oneuse : PatFrag<(ops node:$A, node:$B, node:$C),
+ (riscv_fpextend_vl node:$A, node:$B, node:$C), [{
+ return N->hasOneUse();
+}]>;
+
foreach kind = ["ADD", "UMAX", "SMAX", "UMIN", "SMIN", "AND", "OR", "XOR",
"FADD", "SEQ_FADD", "FMIN", "FMAX"] in
def rvv_vecreduce_#kind#_vl : SDNode<"RISCVISD::VECREDUCE_"#kind#"_VL", SDTRVVVecReduce>;
+// Give explicit Complexity to prefer simm5/uimm5.
+def SplatPat : ComplexPattern<vAny, 1, "selectVSplat", [], [], 1>;
+def SplatPat_simm5 : ComplexPattern<vAny, 1, "selectVSplatSimm5", [], [], 2>;
+def SplatPat_uimm5 : ComplexPattern<vAny, 1, "selectVSplatUimm5", [], [], 2>;
+def SplatPat_simm5_plus1
+ : ComplexPattern<vAny, 1, "selectVSplatSimm5Plus1", [], [], 2>;
+def SplatPat_simm5_plus1_nonzero
+ : ComplexPattern<vAny, 1, "selectVSplatSimm5Plus1NonZero", [], [], 2>;
+
// Ignore the vl operand.
def SplatFPOp : PatFrag<(ops node:$op),
- (riscv_vfmv_v_f_vl node:$op, srcvalue)>;
+ (riscv_vfmv_v_f_vl undef, node:$op, srcvalue)>;
def sew8simm5 : ComplexPattern<XLenVT, 1, "selectRVVSimm5<8>", []>;
def sew16simm5 : ComplexPattern<XLenVT, 1, "selectRVVSimm5<16>", []>;
def sew32simm5 : ComplexPattern<XLenVT, 1, "selectRVVSimm5<32>", []>;
def sew64simm5 : ComplexPattern<XLenVT, 1, "selectRVVSimm5<64>", []>;
-multiclass VPatBinaryVL_VV<SDNode vop,
- string instruction_name,
- ValueType result_type,
- ValueType op_type,
- ValueType mask_type,
- int sew,
- LMULInfo vlmul,
- VReg op_reg_class> {
- def : Pat<(result_type (vop
- (op_type op_reg_class:$rs1),
- (op_type op_reg_class:$rs2),
- (mask_type true_mask),
- VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_VV_"# vlmul.MX)
- op_reg_class:$rs1,
- op_reg_class:$rs2,
- GPR:$vl, sew)>;
+multiclass VPatBinaryVL_V<SDNode vop,
+ string instruction_name,
+ string suffix,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType op2_type,
+ ValueType mask_type,
+ int sew,
+ LMULInfo vlmul,
+ VReg op1_reg_class,
+ VReg op2_reg_class> {
def : Pat<(result_type (vop
- (op_type op_reg_class:$rs1),
- (op_type op_reg_class:$rs2),
+ (op1_type op1_reg_class:$rs1),
+ (op2_type op2_reg_class:$rs2),
(mask_type V0),
VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_VV_"# vlmul.MX#"_MASK")
+ (!cast<Instruction>(instruction_name#"_"#suffix#"_"# vlmul.MX#"_MASK")
(result_type (IMPLICIT_DEF)),
- op_reg_class:$rs1,
- op_reg_class:$rs2,
+ op1_reg_class:$rs1,
+ op2_reg_class:$rs2,
(mask_type V0), GPR:$vl, sew, TAIL_AGNOSTIC)>;
}
@@ -300,7 +341,8 @@ multiclass VPatBinaryVL_XI<SDNode vop,
string instruction_name,
string suffix,
ValueType result_type,
- ValueType vop_type,
+ ValueType vop1_type,
+ ValueType vop2_type,
ValueType mask_type,
int sew,
LMULInfo vlmul,
@@ -308,17 +350,8 @@ multiclass VPatBinaryVL_XI<SDNode vop,
ComplexPattern SplatPatKind,
DAGOperand xop_kind> {
def : Pat<(result_type (vop
- (vop_type vop_reg_class:$rs1),
- (vop_type (SplatPatKind (XLenVT xop_kind:$rs2))),
- (mask_type true_mask),
- VLOpFrag)),
- (!cast<Instruction>(instruction_name#_#suffix#_# vlmul.MX)
- vop_reg_class:$rs1,
- xop_kind:$rs2,
- GPR:$vl, sew)>;
- def : Pat<(result_type (vop
- (vop_type vop_reg_class:$rs1),
- (vop_type (SplatPatKind (XLenVT xop_kind:$rs2))),
+ (vop1_type vop_reg_class:$rs1),
+ (vop2_type (SplatPatKind (XLenVT xop_kind:$rs2))),
(mask_type V0),
VLOpFrag)),
(!cast<Instruction>(instruction_name#_#suffix#_# vlmul.MX#"_MASK")
@@ -330,12 +363,12 @@ multiclass VPatBinaryVL_XI<SDNode vop,
multiclass VPatBinaryVL_VV_VX<SDNode vop, string instruction_name> {
foreach vti = AllIntegerVectors in {
- defm : VPatBinaryVL_VV<vop, instruction_name,
- vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.RegClass>;
+ defm : VPatBinaryVL_V<vop, instruction_name, "VV",
+ vti.Vector, vti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass, vti.RegClass>;
defm : VPatBinaryVL_XI<vop, instruction_name, "VX",
- vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.RegClass, SplatPat, GPR>;
+ vti.Vector, vti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass, SplatPat, GPR>;
}
}
@@ -344,8 +377,8 @@ multiclass VPatBinaryVL_VV_VX_VI<SDNode vop, string instruction_name,
: VPatBinaryVL_VV_VX<vop, instruction_name> {
foreach vti = AllIntegerVectors in {
defm : VPatBinaryVL_XI<vop, instruction_name, "VI",
- vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.RegClass,
+ vti.Vector, vti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass,
!cast<ComplexPattern>(SplatPat#_#ImmType),
ImmType>;
}
@@ -355,12 +388,26 @@ multiclass VPatBinaryWVL_VV_VX<SDNode vop, string instruction_name> {
foreach VtiToWti = AllWidenableIntVectors in {
defvar vti = VtiToWti.Vti;
defvar wti = VtiToWti.Wti;
- defm : VPatBinaryVL_VV<vop, instruction_name,
- wti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.RegClass>;
+ defm : VPatBinaryVL_V<vop, instruction_name, "VV",
+ wti.Vector, vti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass, vti.RegClass>;
defm : VPatBinaryVL_XI<vop, instruction_name, "VX",
- wti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.RegClass, SplatPat, GPR>;
+ wti.Vector, vti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass, SplatPat, GPR>;
+ }
+}
+multiclass VPatBinaryWVL_VV_VX_WV_WX<SDNode vop, SDNode vop_w,
+ string instruction_name>
+ : VPatBinaryWVL_VV_VX<vop, instruction_name> {
+ foreach VtiToWti = AllWidenableIntVectors in {
+ defvar vti = VtiToWti.Vti;
+ defvar wti = VtiToWti.Wti;
+ defm : VPatBinaryVL_V<vop_w, instruction_name, "WV",
+ wti.Vector, wti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, wti.RegClass, vti.RegClass>;
+ defm : VPatBinaryVL_XI<vop_w, instruction_name, "WX",
+ wti.Vector, wti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, wti.RegClass, SplatPat, GPR>;
}
}
@@ -375,14 +422,6 @@ multiclass VPatBinaryVL_VF<SDNode vop,
RegisterClass scalar_reg_class> {
def : Pat<(result_type (vop (vop_type vop_reg_class:$rs1),
(vop_type (SplatFPOp scalar_reg_class:$rs2)),
- (mask_type true_mask),
- VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_"#vlmul.MX)
- vop_reg_class:$rs1,
- scalar_reg_class:$rs2,
- GPR:$vl, sew)>;
- def : Pat<(result_type (vop (vop_type vop_reg_class:$rs1),
- (vop_type (SplatFPOp scalar_reg_class:$rs2)),
(mask_type V0),
VLOpFrag)),
(!cast<Instruction>(instruction_name#"_"#vlmul.MX#"_MASK")
@@ -394,9 +433,9 @@ multiclass VPatBinaryVL_VF<SDNode vop,
multiclass VPatBinaryFPVL_VV_VF<SDNode vop, string instruction_name> {
foreach vti = AllFloatVectors in {
- defm : VPatBinaryVL_VV<vop, instruction_name,
- vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.RegClass>;
+ defm : VPatBinaryVL_V<vop, instruction_name, "VV",
+ vti.Vector, vti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass, vti.RegClass>;
defm : VPatBinaryVL_VF<vop, instruction_name#"_V"#vti.ScalarSuffix,
vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
vti.LMul, vti.RegClass, vti.ScalarRegClass>;
@@ -407,13 +446,6 @@ multiclass VPatBinaryFPVL_R_VF<SDNode vop, string instruction_name> {
foreach fvti = AllFloatVectors in {
def : Pat<(fvti.Vector (vop (SplatFPOp fvti.ScalarRegClass:$rs2),
fvti.RegClass:$rs1,
- (fvti.Mask true_mask),
- VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX)
- fvti.RegClass:$rs1, fvti.ScalarRegClass:$rs2,
- GPR:$vl, fvti.Log2SEW)>;
- def : Pat<(fvti.Vector (vop (SplatFPOp fvti.ScalarRegClass:$rs2),
- fvti.RegClass:$rs1,
(fvti.Mask V0),
VLOpFrag)),
(!cast<Instruction>(instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_MASK")
@@ -427,65 +459,87 @@ multiclass VPatIntegerSetCCVL_VV<VTypeInfo vti, string instruction_name,
CondCode cc> {
def : Pat<(vti.Mask (riscv_setcc_vl (vti.Vector vti.RegClass:$rs1),
vti.RegClass:$rs2, cc,
- (vti.Mask true_mask),
+ (vti.Mask V0),
VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX)
- vti.RegClass:$rs1, vti.RegClass:$rs2, GPR:$vl,
- vti.Log2SEW)>;
+ (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX#"_MASK")
+ (vti.Mask (IMPLICIT_DEF)),
+ vti.RegClass:$rs1,
+ vti.RegClass:$rs2,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
}
// Inherits from VPatIntegerSetCCVL_VV and adds a pattern with operands swapped.
multiclass VPatIntegerSetCCVL_VV_Swappable<VTypeInfo vti, string instruction_name,
- CondCode cc, CondCode invcc> :
- VPatIntegerSetCCVL_VV<vti, instruction_name, cc> {
+ CondCode cc, CondCode invcc>
+ : VPatIntegerSetCCVL_VV<vti, instruction_name, cc> {
def : Pat<(vti.Mask (riscv_setcc_vl (vti.Vector vti.RegClass:$rs2),
vti.RegClass:$rs1, invcc,
- (vti.Mask true_mask),
+ (vti.Mask V0),
VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX)
- vti.RegClass:$rs1, vti.RegClass:$rs2, GPR:$vl,
- vti.Log2SEW)>;
+ (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX#"_MASK")
+ (vti.Mask (IMPLICIT_DEF)), vti.RegClass:$rs1,
+ vti.RegClass:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
}
multiclass VPatIntegerSetCCVL_VX_Swappable<VTypeInfo vti, string instruction_name,
CondCode cc, CondCode invcc> {
- defvar instruction = !cast<Instruction>(instruction_name#"_VX_"#vti.LMul.MX);
+ defvar instruction_masked = !cast<Instruction>(instruction_name#"_VX_"#vti.LMul.MX#"_MASK");
def : Pat<(vti.Mask (riscv_setcc_vl (vti.Vector vti.RegClass:$rs1),
(SplatPat (XLenVT GPR:$rs2)), cc,
- (vti.Mask true_mask),
+ (vti.Mask V0),
VLOpFrag)),
- (instruction vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>;
+ (instruction_masked (vti.Mask (IMPLICIT_DEF)), vti.RegClass:$rs1,
+ GPR:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
def : Pat<(vti.Mask (riscv_setcc_vl (SplatPat (XLenVT GPR:$rs2)),
(vti.Vector vti.RegClass:$rs1), invcc,
- (vti.Mask true_mask),
+ (vti.Mask V0),
VLOpFrag)),
- (instruction vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>;
+ (instruction_masked (vti.Mask (IMPLICIT_DEF)), vti.RegClass:$rs1,
+ GPR:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
}
multiclass VPatIntegerSetCCVL_VI_Swappable<VTypeInfo vti, string instruction_name,
CondCode cc, CondCode invcc> {
- defvar instruction = !cast<Instruction>(instruction_name#"_VI_"#vti.LMul.MX);
+ defvar instruction_masked = !cast<Instruction>(instruction_name#"_VI_"#vti.LMul.MX#"_MASK");
def : Pat<(vti.Mask (riscv_setcc_vl (vti.Vector vti.RegClass:$rs1),
(SplatPat_simm5 simm5:$rs2), cc,
- (vti.Mask true_mask),
+ (vti.Mask V0),
VLOpFrag)),
- (instruction vti.RegClass:$rs1, XLenVT:$rs2, GPR:$vl, vti.Log2SEW)>;
+ (instruction_masked (vti.Mask (IMPLICIT_DEF)), vti.RegClass:$rs1,
+ XLenVT:$rs2, (vti.Mask V0), GPR:$vl,
+ vti.Log2SEW)>;
+
+ // FIXME: Can do some canonicalization to remove these patterns.
def : Pat<(vti.Mask (riscv_setcc_vl (SplatPat_simm5 simm5:$rs2),
(vti.Vector vti.RegClass:$rs1), invcc,
- (vti.Mask true_mask),
+ (vti.Mask V0),
VLOpFrag)),
- (instruction vti.RegClass:$rs1, simm5:$rs2, GPR:$vl, vti.Log2SEW)>;
+ (instruction_masked (vti.Mask (IMPLICIT_DEF)), vti.RegClass:$rs1,
+ simm5:$rs2, (vti.Mask V0), GPR:$vl,
+ vti.Log2SEW)>;
}
-multiclass VPatIntegerSetCCVL_VIPlus1<VTypeInfo vti, string instruction_name,
- CondCode cc, ComplexPattern splatpat_kind> {
- defvar instruction = !cast<Instruction>(instruction_name#"_VI_"#vti.LMul.MX);
+multiclass VPatIntegerSetCCVL_VIPlus1_Swappable<VTypeInfo vti,
+ string instruction_name,
+ CondCode cc, CondCode invcc,
+ ComplexPattern splatpat_kind> {
+ defvar instruction_masked = !cast<Instruction>(instruction_name#"_VI_"#vti.LMul.MX#"_MASK");
def : Pat<(vti.Mask (riscv_setcc_vl (vti.Vector vti.RegClass:$rs1),
(splatpat_kind simm5:$rs2), cc,
- (vti.Mask true_mask),
+ (vti.Mask V0),
VLOpFrag)),
- (instruction vti.RegClass:$rs1, (DecImm simm5:$rs2),
- GPR:$vl, vti.Log2SEW)>;
+ (instruction_masked (vti.Mask (IMPLICIT_DEF)), vti.RegClass:$rs1,
+ (DecImm simm5:$rs2), (vti.Mask V0), GPR:$vl,
+ vti.Log2SEW)>;
+
+ // FIXME: Can do some canonicalization to remove these patterns.
+ def : Pat<(vti.Mask (riscv_setcc_vl (splatpat_kind simm5:$rs2),
+ (vti.Vector vti.RegClass:$rs1), invcc,
+ (vti.Mask V0),
+ VLOpFrag)),
+ (instruction_masked (vti.Mask (IMPLICIT_DEF)), vti.RegClass:$rs1,
+ (DecImm simm5:$rs2), (vti.Mask V0), GPR:$vl,
+ vti.Log2SEW)>;
}
multiclass VPatFPSetCCVL_VV_VF_FV<CondCode cc,
@@ -495,25 +549,29 @@ multiclass VPatFPSetCCVL_VV_VF_FV<CondCode cc,
def : Pat<(fvti.Mask (riscv_setcc_vl (fvti.Vector fvti.RegClass:$rs1),
fvti.RegClass:$rs2,
cc,
- (fvti.Mask true_mask),
+ (fvti.Mask V0),
VLOpFrag)),
- (!cast<Instruction>(inst_name#"_VV_"#fvti.LMul.MX)
- fvti.RegClass:$rs1, fvti.RegClass:$rs2, GPR:$vl, fvti.Log2SEW)>;
+ (!cast<Instruction>(inst_name#"_VV_"#fvti.LMul.MX#"_MASK")
+ (fvti.Mask (IMPLICIT_DEF)), fvti.RegClass:$rs1,
+ fvti.RegClass:$rs2, (fvti.Mask V0),
+ GPR:$vl, fvti.Log2SEW)>;
def : Pat<(fvti.Mask (riscv_setcc_vl (fvti.Vector fvti.RegClass:$rs1),
(SplatFPOp fvti.ScalarRegClass:$rs2),
cc,
- (fvti.Mask true_mask),
+ (fvti.Mask V0),
VLOpFrag)),
- (!cast<Instruction>(inst_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX)
- fvti.RegClass:$rs1, fvti.ScalarRegClass:$rs2,
+ (!cast<Instruction>(inst_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_MASK")
+ (fvti.Mask (IMPLICIT_DEF)), fvti.RegClass:$rs1,
+ fvti.ScalarRegClass:$rs2, (fvti.Mask V0),
GPR:$vl, fvti.Log2SEW)>;
def : Pat<(fvti.Mask (riscv_setcc_vl (SplatFPOp fvti.ScalarRegClass:$rs2),
(fvti.Vector fvti.RegClass:$rs1),
cc,
- (fvti.Mask true_mask),
+ (fvti.Mask V0),
VLOpFrag)),
- (!cast<Instruction>(swapped_op_inst_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX)
- fvti.RegClass:$rs1, fvti.ScalarRegClass:$rs2,
+ (!cast<Instruction>(swapped_op_inst_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_MASK")
+ (fvti.Mask (IMPLICIT_DEF)), fvti.RegClass:$rs1,
+ fvti.ScalarRegClass:$rs2, (fvti.Mask V0),
GPR:$vl, fvti.Log2SEW)>;
}
}
@@ -524,9 +582,11 @@ multiclass VPatExtendSDNode_V_VL<SDNode vop, string inst_name, string suffix,
defvar vti = vtiTofti.Vti;
defvar fti = vtiTofti.Fti;
def : Pat<(vti.Vector (vop (fti.Vector fti.RegClass:$rs2),
- true_mask, VLOpFrag)),
- (!cast<Instruction>(inst_name#"_"#suffix#"_"#vti.LMul.MX)
- fti.RegClass:$rs2, GPR:$vl, vti.Log2SEW)>;
+ (fti.Mask V0), VLOpFrag)),
+ (!cast<Instruction>(inst_name#"_"#suffix#"_"#vti.LMul.MX#"_MASK")
+ (vti.Vector (IMPLICIT_DEF)),
+ fti.RegClass:$rs2,
+ (fti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
}
}
@@ -534,10 +594,11 @@ multiclass VPatConvertFP2ISDNode_V_VL<SDNode vop, string instruction_name> {
foreach fvti = AllFloatVectors in {
defvar ivti = GetIntVTypeInfo<fvti>.Vti;
def : Pat<(ivti.Vector (vop (fvti.Vector fvti.RegClass:$rs1),
- (fvti.Mask true_mask),
+ (fvti.Mask V0),
VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_"#ivti.LMul.MX)
- fvti.RegClass:$rs1, GPR:$vl, ivti.Log2SEW)>;
+ (!cast<Instruction>(instruction_name#"_"#ivti.LMul.MX#"_MASK")
+ (ivti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1,
+ (fvti.Mask V0), GPR:$vl, ivti.Log2SEW, TAIL_AGNOSTIC)>;
}
}
@@ -545,10 +606,11 @@ multiclass VPatConvertI2FPSDNode_V_VL<SDNode vop, string instruction_name> {
foreach fvti = AllFloatVectors in {
defvar ivti = GetIntVTypeInfo<fvti>.Vti;
def : Pat<(fvti.Vector (vop (ivti.Vector ivti.RegClass:$rs1),
- (ivti.Mask true_mask),
+ (ivti.Mask V0),
VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX)
- ivti.RegClass:$rs1, GPR:$vl, fvti.Log2SEW)>;
+ (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX#"_MASK")
+ (fvti.Vector (IMPLICIT_DEF)), ivti.RegClass:$rs1,
+ (ivti.Mask V0), GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>;
}
}
@@ -557,10 +619,11 @@ multiclass VPatWConvertFP2ISDNode_V_VL<SDNode vop, string instruction_name> {
defvar fvti = fvtiToFWti.Vti;
defvar iwti = GetIntVTypeInfo<fvtiToFWti.Wti>.Vti;
def : Pat<(iwti.Vector (vop (fvti.Vector fvti.RegClass:$rs1),
- (fvti.Mask true_mask),
+ (fvti.Mask V0),
VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX)
- fvti.RegClass:$rs1, GPR:$vl, fvti.Log2SEW)>;
+ (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX#"_MASK")
+ (iwti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1,
+ (fvti.Mask V0), GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>;
}
}
@@ -569,10 +632,11 @@ multiclass VPatWConvertI2FPSDNode_V_VL<SDNode vop, string instruction_name> {
defvar ivti = vtiToWti.Vti;
defvar fwti = vtiToWti.Wti;
def : Pat<(fwti.Vector (vop (ivti.Vector ivti.RegClass:$rs1),
- (ivti.Mask true_mask),
+ (ivti.Mask V0),
VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_"#ivti.LMul.MX)
- ivti.RegClass:$rs1, GPR:$vl, ivti.Log2SEW)>;
+ (!cast<Instruction>(instruction_name#"_"#ivti.LMul.MX#"_MASK")
+ (fwti.Vector (IMPLICIT_DEF)), ivti.RegClass:$rs1,
+ (ivti.Mask V0), GPR:$vl, ivti.Log2SEW, TAIL_AGNOSTIC)>;
}
}
@@ -581,10 +645,11 @@ multiclass VPatNConvertFP2ISDNode_V_VL<SDNode vop, string instruction_name> {
defvar vti = vtiToWti.Vti;
defvar fwti = vtiToWti.Wti;
def : Pat<(vti.Vector (vop (fwti.Vector fwti.RegClass:$rs1),
- (fwti.Mask true_mask),
+ (fwti.Mask V0),
VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_"#vti.LMul.MX)
- fwti.RegClass:$rs1, GPR:$vl, vti.Log2SEW)>;
+ (!cast<Instruction>(instruction_name#"_"#vti.LMul.MX#"_MASK")
+ (vti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1,
+ (fwti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
}
}
@@ -593,10 +658,11 @@ multiclass VPatNConvertI2FPSDNode_V_VL<SDNode vop, string instruction_name> {
defvar fvti = fvtiToFWti.Vti;
defvar iwti = GetIntVTypeInfo<fvtiToFWti.Wti>.Vti;
def : Pat<(fvti.Vector (vop (iwti.Vector iwti.RegClass:$rs1),
- (iwti.Mask true_mask),
+ (iwti.Mask V0),
VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX)
- iwti.RegClass:$rs1, GPR:$vl, fvti.Log2SEW)>;
+ (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX#"_MASK")
+ (fvti.Vector (IMPLICIT_DEF)), iwti.RegClass:$rs1,
+ (iwti.Mask V0), GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>;
}
}
@@ -622,45 +688,286 @@ multiclass VPatReductionVL<SDNode vop, string instruction_name, bit is_float> {
}
}
-multiclass VPatBinarySDNodeExt_V_WV<SDNode op, PatFrags extop, string instruction_name> {
- foreach vti = AllWidenableIntVectors in {
+multiclass VPatBinarySDNodeExt_V_WV_WX<SDNode op, PatFrags extop, string instruction_name> {
+ foreach vtiToWti = AllWidenableIntVectors in {
+ defvar vti = vtiToWti.Vti;
+ defvar wti = vtiToWti.Wti;
+ def : Pat<
+ (vti.Vector
+ (riscv_trunc_vector_vl
+ (op (wti.Vector wti.RegClass:$rs2),
+ (wti.Vector (extop (vti.Vector vti.RegClass:$rs1)))),
+ (vti.Mask true_mask),
+ VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_WV_"#vti.LMul.MX)
+ wti.RegClass:$rs2, vti.RegClass:$rs1, GPR:$vl, vti.Log2SEW)>;
+ def : Pat<
+ (vti.Vector
+ (riscv_trunc_vector_vl
+ (op (wti.Vector wti.RegClass:$rs2),
+ (wti.Vector (extop (vti.Vector (SplatPat GPR:$rs1))))),
+ (vti.Mask true_mask),
+ VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_WX_"#vti.LMul.MX)
+ wti.RegClass:$rs2, GPR:$rs1, GPR:$vl, vti.Log2SEW)>;
+ }
+}
+
+multiclass VPatBinarySDNode_V_WV_WX_WI<SDNode op, string instruction_name> {
+ defm : VPatBinarySDNodeExt_V_WV_WX<op, sext_oneuse, instruction_name>;
+ defm : VPatBinarySDNodeExt_V_WV_WX<op, zext_oneuse, instruction_name>;
+ foreach vtiToWti = AllWidenableIntVectors in {
+ defvar vti = vtiToWti.Vti;
+ defvar wti = vtiToWti.Wti;
def : Pat<
- (vti.Vti.Vector
+ (vti.Vector
(riscv_trunc_vector_vl
- (op (vti.Wti.Vector vti.Wti.RegClass:$rs2),
- (vti.Wti.Vector (extop (vti.Vti.Vector vti.Vti.RegClass:$rs1)))),
- (riscv_vmset_vl VLMax),
- VLMax)),
- (!cast<Instruction>(instruction_name#"_WV_"#vti.Vti.LMul.MX)
- vti.Wti.RegClass:$rs2, vti.Vti.RegClass:$rs1,
- vti.Vti.AVL, vti.Vti.Log2SEW)>;
+ (op (wti.Vector wti.RegClass:$rs2),
+ (wti.Vector (SplatPat_uimm5 uimm5:$rs1))), (vti.Mask true_mask),
+ VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_WI_"#vti.LMul.MX)
+ wti.RegClass:$rs2, uimm5:$rs1, GPR:$vl, vti.Log2SEW)>;
+ }
+}
+
+multiclass VPatWidenReductionVL<SDNode vop, PatFrags extop, string instruction_name, bit is_float> {
+ foreach vtiToWti = !if(is_float, AllWidenableFloatVectors, AllWidenableIntVectors) in {
+ defvar vti = vtiToWti.Vti;
+ defvar wti = vtiToWti.Wti;
+ defvar wti_m1 = !cast<VTypeInfo>(!if(is_float, "VF", "VI") # wti.SEW # "M1");
+ def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge),
+ (wti.Vector (extop (vti.Vector vti.RegClass:$rs1))),
+ VR:$rs2, (vti.Mask true_mask), VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX)
+ (wti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1),
+ (wti_m1.Vector VR:$rs2), GPR:$vl, vti.Log2SEW)>;
+ def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge),
+ (wti.Vector (extop (vti.Vector vti.RegClass:$rs1))),
+ VR:$rs2, (vti.Mask V0), VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_MASK")
+ (wti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1),
+ (wti_m1.Vector VR:$rs2), (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
+ }
+}
+
+multiclass VPatWidenReductionVL_Ext_VL<SDNode vop, PatFrags extop, string instruction_name, bit is_float> {
+ foreach vtiToWti = !if(is_float, AllWidenableFloatVectors, AllWidenableIntVectors) in {
+ defvar vti = vtiToWti.Vti;
+ defvar wti = vtiToWti.Wti;
+ defvar wti_m1 = !cast<VTypeInfo>(!if(is_float, "VF", "VI") # wti.SEW # "M1");
+ def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge),
+ (wti.Vector (extop (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), VLOpFrag)),
+ VR:$rs2, (vti.Mask true_mask), VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX)
+ (wti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1),
+ (wti_m1.Vector VR:$rs2), GPR:$vl, vti.Log2SEW)>;
+ def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge),
+ (wti.Vector (extop (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), VLOpFrag)),
+ VR:$rs2, (vti.Mask V0), VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_MASK")
+ (wti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1),
+ (wti_m1.Vector VR:$rs2), (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
}
}
-multiclass VPatBinarySDNodeExt_V_WX<SDNode op, PatFrags extop, string instruction_name> {
- foreach vti = AllWidenableIntVectors in {
+multiclass VPatWidenBinaryFPVL_VV_VF<SDNode op, PatFrags extop, string instruction_name> {
+ foreach fvtiToFWti = AllWidenableFloatVectors in {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar fwti = fvtiToFWti.Wti;
+ def : Pat<(fwti.Vector (op (fwti.Vector (extop (fvti.Vector fvti.RegClass:$rs2),
+ (fvti.Mask true_mask), VLOpFrag)),
+ (fwti.Vector (extop (fvti.Vector fvti.RegClass:$rs1),
+ (fvti.Mask true_mask), VLOpFrag)),
+ (fwti.Mask true_mask), VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_VV_"#fvti.LMul.MX)
+ fvti.RegClass:$rs2, fvti.RegClass:$rs1,
+ GPR:$vl, fvti.Log2SEW)>;
+ def : Pat<(fwti.Vector (op (fwti.Vector (extop (fvti.Vector fvti.RegClass:$rs2),
+ (fvti.Mask true_mask), VLOpFrag)),
+ (fwti.Vector (extop (fvti.Vector (SplatFPOp fvti.ScalarRegClass:$rs1)),
+ (fvti.Mask true_mask), VLOpFrag)),
+ (fwti.Mask true_mask), VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX)
+ fvti.RegClass:$rs2, fvti.ScalarRegClass:$rs1,
+ GPR:$vl, fvti.Log2SEW)>;
+ }
+}
+
+multiclass VPatWidenBinaryFPVL_WV_WF<SDNode op, PatFrags extop, string instruction_name> {
+ foreach fvtiToFWti = AllWidenableFloatVectors in {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar fwti = fvtiToFWti.Wti;
+ def : Pat<(fwti.Vector (op (fwti.Vector fwti.RegClass:$rs2),
+ (fwti.Vector (extop (fvti.Vector fvti.RegClass:$rs1),
+ (fvti.Mask true_mask), VLOpFrag)),
+ (fwti.Mask true_mask), VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_WV_"#fvti.LMul.MX)
+ fwti.RegClass:$rs2, fvti.RegClass:$rs1,
+ GPR:$vl, fvti.Log2SEW)>;
+ def : Pat<(fwti.Vector (op (fwti.Vector fwti.RegClass:$rs2),
+ (fwti.Vector (extop (fvti.Vector (SplatFPOp fvti.ScalarRegClass:$rs1)),
+ (fvti.Mask true_mask), VLOpFrag)),
+ (fwti.Mask true_mask), VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_W"#fvti.ScalarSuffix#"_"#fvti.LMul.MX)
+ fwti.RegClass:$rs2, fvti.ScalarRegClass:$rs1,
+ GPR:$vl, fvti.Log2SEW)>;
+ }
+}
+
+multiclass VPatWidenBinaryFPVL_VV_VF_WV_WF<SDNode op, string instruction_name> {
+ defm : VPatWidenBinaryFPVL_VV_VF<op, riscv_fpextend_vl_oneuse, instruction_name>;
+ defm : VPatWidenBinaryFPVL_WV_WF<op, riscv_fpextend_vl_oneuse, instruction_name>;
+}
+
+multiclass VPatNarrowShiftSplatExt_WX<SDNode op, PatFrags extop, string instruction_name> {
+ foreach vtiToWti = AllWidenableIntVectors in {
+ defvar vti = vtiToWti.Vti;
+ defvar wti = vtiToWti.Wti;
def : Pat<
- (vti.Vti.Vector
+ (vti.Vector
(riscv_trunc_vector_vl
- (op (vti.Wti.Vector vti.Wti.RegClass:$rs2),
- (vti.Wti.Vector (extop (vti.Vti.Vector (SplatPat GPR:$rs1))))),
- (riscv_vmset_vl VLMax),
- VLMax)),
- (!cast<Instruction>(instruction_name#"_WX_"#vti.Vti.LMul.MX)
- vti.Wti.RegClass:$rs2, GPR:$rs1,
- vti.Vti.AVL, vti.Vti.Log2SEW)>;
+ (op (wti.Vector wti.RegClass:$rs2),
+ (wti.Vector (extop (vti.Vector (SplatPat GPR:$rs1)),
+ (vti.Mask true_mask), VLOpFrag)),
+ (wti.Mask true_mask), VLOpFrag),
+ (vti.Mask true_mask), VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_WX_"#vti.LMul.MX)
+ wti.RegClass:$rs2, GPR:$rs1, GPR:$vl, vti.Log2SEW)>;
+ }
+}
+
+multiclass VPatMultiplyAddVL_VV_VX<SDNode op, string instruction_name> {
+ foreach vti = AllIntegerVectors in {
+ defvar suffix = vti.LMul.MX;
+ // NOTE: We choose VMADD because it has the most commuting freedom. So it
+ // works best with how TwoAddressInstructionPass tries commuting.
+ def : Pat<(vti.Vector
+ (op vti.RegClass:$rs2,
+ (riscv_mul_vl_oneuse vti.RegClass:$rs1,
+ vti.RegClass:$rd,
+ (vti.Mask true_mask), VLOpFrag),
+ (vti.Mask true_mask), VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_VV_"# suffix)
+ vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ // The choice of VMADD here is arbitrary, vmadd.vx and vmacc.vx are equally
+ // commutable.
+ def : Pat<(vti.Vector
+ (op vti.RegClass:$rs2,
+ (riscv_mul_vl_oneuse (SplatPat XLenVT:$rs1),
+ vti.RegClass:$rd,
+ (vti.Mask true_mask), VLOpFrag),
+ (vti.Mask true_mask), VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_VX_" # suffix)
+ vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ }
+}
+
+multiclass VPatWidenMultiplyAddVL_VV_VX<PatFrag op1, string instruction_name> {
+ foreach vtiTowti = AllWidenableIntVectors in {
+ defvar vti = vtiTowti.Vti;
+ defvar wti = vtiTowti.Wti;
+ def : Pat<(wti.Vector
+ (riscv_add_vl wti.RegClass:$rd,
+ (op1 vti.RegClass:$rs1,
+ (vti.Vector vti.RegClass:$rs2),
+ (vti.Mask true_mask), VLOpFrag),
+ (vti.Mask true_mask), VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_VV_" # vti.LMul.MX)
+ wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(wti.Vector
+ (riscv_add_vl wti.RegClass:$rd,
+ (op1 (SplatPat XLenVT:$rs1),
+ (vti.Vector vti.RegClass:$rs2),
+ (vti.Mask true_mask), VLOpFrag),
+ (vti.Mask true_mask), VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_VX_" # vti.LMul.MX)
+ wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
}
}
+multiclass VPatNarrowShiftSplat_WX_WI<SDNode op, string instruction_name> {
+ foreach vtiTowti = AllWidenableIntVectors in {
+ defvar vti = vtiTowti.Vti;
+ defvar wti = vtiTowti.Wti;
+ def : Pat<(vti.Vector (riscv_trunc_vector_vl
+ (wti.Vector (op wti.RegClass:$rs1, (SplatPat XLenVT:$rs2),
+ true_mask, VLOpFrag)), true_mask, VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_WX_"#vti.LMul.MX)
+ wti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>;
+ def : Pat<(vti.Vector (riscv_trunc_vector_vl
+ (wti.Vector (op wti.RegClass:$rs1, (SplatPat_uimm5 uimm5:$rs2),
+ true_mask, VLOpFrag)), true_mask, VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_WI_"#vti.LMul.MX)
+ wti.RegClass:$rs1, uimm5:$rs2, GPR:$vl, vti.Log2SEW)>;
+ }
+}
-multiclass VPatBinarySDNode_V_WV<SDNode op, string instruction_name> {
- defm : VPatBinarySDNodeExt_V_WV<op, sext_oneuse, instruction_name>;
- defm : VPatBinarySDNodeExt_V_WV<op, zext_oneuse, instruction_name>;
+multiclass VPatFPMulAddVL_VV_VF<SDNode vop, string instruction_name> {
+ foreach vti = AllFloatVectors in {
+ defvar suffix = vti.LMul.MX;
+ def : Pat<(vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rd,
+ vti.RegClass:$rs2, (vti.Mask true_mask),
+ VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_VV_"# suffix)
+ vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rd,
+ vti.RegClass:$rs2, (vti.Mask V0),
+ VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_VV_"# suffix #"_MASK")
+ vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+
+ def : Pat<(vti.Vector (vop (SplatFPOp vti.ScalarRegClass:$rs1),
+ vti.RegClass:$rd, vti.RegClass:$rs2,
+ (vti.Mask true_mask),
+ VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix)
+ vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(vti.Vector (vop (SplatFPOp vti.ScalarRegClass:$rs1),
+ vti.RegClass:$rd, vti.RegClass:$rs2,
+ (vti.Mask V0),
+ VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix # "_MASK")
+ vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ }
}
-multiclass VPatBinarySDNode_V_WX<SDNode op, string instruction_name> {
- defm : VPatBinarySDNodeExt_V_WX<op, sext_oneuse, instruction_name>;
- defm : VPatBinarySDNodeExt_V_WX<op, zext_oneuse, instruction_name>;
+multiclass VPatWidenFPMulAccVL_VV_VF<SDNode vop, string instruction_name> {
+ foreach vtiToWti = AllWidenableFloatVectors in {
+ defvar vti = vtiToWti.Vti;
+ defvar wti = vtiToWti.Wti;
+ def : Pat<(vop
+ (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector vti.RegClass:$rs1),
+ (vti.Mask true_mask), VLOpFrag)),
+ (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector vti.RegClass:$rs2),
+ (vti.Mask true_mask), VLOpFrag)),
+ (wti.Vector wti.RegClass:$rd), (vti.Mask true_mask),
+ VLOpFrag),
+ (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX)
+ wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(vop
+ (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)),
+ (vti.Mask true_mask), VLOpFrag)),
+ (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector vti.RegClass:$rs2),
+ (vti.Mask true_mask), VLOpFrag)),
+ (wti.Vector wti.RegClass:$rd), (vti.Mask true_mask),
+ VLOpFrag),
+ (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ }
}
//===----------------------------------------------------------------------===//
@@ -669,29 +976,6 @@ multiclass VPatBinarySDNode_V_WX<SDNode op, string instruction_name> {
let Predicates = [HasVInstructions] in {
-// 7.4. Vector Unit-Stride Instructions
-foreach vti = AllVectors in {
- defvar load_instr = !cast<Instruction>("PseudoVLE"#vti.SEW#"_V_"#vti.LMul.MX);
- defvar store_instr = !cast<Instruction>("PseudoVSE"#vti.SEW#"_V_"#vti.LMul.MX);
- // Load
- def : Pat<(vti.Vector (riscv_vle_vl BaseAddr:$rs1, VLOpFrag)),
- (load_instr BaseAddr:$rs1, GPR:$vl, vti.Log2SEW)>;
- // Store
- def : Pat<(riscv_vse_vl (vti.Vector vti.RegClass:$rs2), BaseAddr:$rs1,
- VLOpFrag),
- (store_instr vti.RegClass:$rs2, BaseAddr:$rs1, GPR:$vl, vti.Log2SEW)>;
-}
-
-foreach mti = AllMasks in {
- defvar load_instr = !cast<Instruction>("PseudoVLM_V_"#mti.BX);
- defvar store_instr = !cast<Instruction>("PseudoVSM_V_"#mti.BX);
- def : Pat<(mti.Mask (riscv_vle_vl BaseAddr:$rs1, VLOpFrag)),
- (load_instr BaseAddr:$rs1, GPR:$vl, mti.Log2SEW)>;
- def : Pat<(riscv_vse_vl (mti.Mask VR:$rs2), BaseAddr:$rs1,
- VLOpFrag),
- (store_instr VR:$rs2, BaseAddr:$rs1, GPR:$vl, mti.Log2SEW)>;
-}
-
// 12.1. Vector Single-Width Integer Add and Subtract
defm : VPatBinaryVL_VV_VX_VI<riscv_add_vl, "PseudoVADD">;
defm : VPatBinaryVL_VV_VX<riscv_sub_vl, "PseudoVSUB">;
@@ -699,22 +983,12 @@ defm : VPatBinaryVL_VV_VX<riscv_sub_vl, "PseudoVSUB">;
// pattern operands
foreach vti = AllIntegerVectors in {
def : Pat<(riscv_sub_vl (vti.Vector (SplatPat (XLenVT GPR:$rs2))),
- (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask),
- VLOpFrag),
- (!cast<Instruction>("PseudoVRSUB_VX_"# vti.LMul.MX)
- vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>;
- def : Pat<(riscv_sub_vl (vti.Vector (SplatPat (XLenVT GPR:$rs2))),
(vti.Vector vti.RegClass:$rs1), (vti.Mask V0),
VLOpFrag),
(!cast<Instruction>("PseudoVRSUB_VX_"# vti.LMul.MX#"_MASK")
(vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, GPR:$rs2,
(vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(riscv_sub_vl (vti.Vector (SplatPat_simm5 simm5:$rs2)),
- (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask),
- VLOpFrag),
- (!cast<Instruction>("PseudoVRSUB_VI_"# vti.LMul.MX)
- vti.RegClass:$rs1, simm5:$rs2, GPR:$vl, vti.Log2SEW)>;
- def : Pat<(riscv_sub_vl (vti.Vector (SplatPat_simm5 simm5:$rs2)),
(vti.Vector vti.RegClass:$rs1), (vti.Mask V0),
VLOpFrag),
(!cast<Instruction>("PseudoVRSUB_VI_"# vti.LMul.MX#"_MASK")
@@ -723,7 +997,10 @@ foreach vti = AllIntegerVectors in {
}
// 12.2. Vector Widening Integer Add/Subtract
-defm : VPatBinaryWVL_VV_VX<riscv_vwaddu_vl, "PseudoVWADDU">;
+defm : VPatBinaryWVL_VV_VX_WV_WX<riscv_vwadd_vl, riscv_vwadd_w_vl, "PseudoVWADD">;
+defm : VPatBinaryWVL_VV_VX_WV_WX<riscv_vwaddu_vl, riscv_vwaddu_w_vl, "PseudoVWADDU">;
+defm : VPatBinaryWVL_VV_VX_WV_WX<riscv_vwsub_vl, riscv_vwsub_w_vl, "PseudoVWSUB">;
+defm : VPatBinaryWVL_VV_VX_WV_WX<riscv_vwsubu_vl, riscv_vwsubu_w_vl, "PseudoVWSUBU">;
// 12.3. Vector Integer Extension
defm : VPatExtendSDNode_V_VL<riscv_zext_vl, "PseudoVZEXT", "VF2",
@@ -737,7 +1014,7 @@ defm : VPatExtendSDNode_V_VL<riscv_sext_vl, "PseudoVSEXT", "VF4",
defm : VPatExtendSDNode_V_VL<riscv_zext_vl, "PseudoVZEXT", "VF8",
AllFractionableVF8IntVectors>;
defm : VPatExtendSDNode_V_VL<riscv_sext_vl, "PseudoVSEXT", "VF8",
- AllFractionableVF8IntVectors>;
+ AllFractionableVF8IntVectors>;
// 12.5. Vector Bitwise Logical Instructions
defm : VPatBinaryVL_VV_VX_VI<riscv_and_vl, "PseudoVAND">;
@@ -752,7 +1029,7 @@ defm : VPatBinaryVL_VV_VX_VI<riscv_sra_vl, "PseudoVSRA", uimm5>;
foreach vti = AllIntegerVectors in {
// Emit shift by 1 as an add since it might be faster.
def : Pat<(riscv_shl_vl (vti.Vector vti.RegClass:$rs1),
- (riscv_vmv_v_x_vl 1, (XLenVT srcvalue)),
+ (riscv_vmv_v_x_vl (vti.Vector undef), 1, (XLenVT srcvalue)),
(vti.Mask true_mask),
VLOpFrag),
(!cast<Instruction>("PseudoVADD_VV_"# vti.LMul.MX)
@@ -760,49 +1037,25 @@ foreach vti = AllIntegerVectors in {
}
// 12.7. Vector Narrowing Integer Right Shift Instructions
-defm : VPatBinarySDNode_V_WV<srl, "PseudoVNSRL">;
-defm : VPatBinarySDNode_V_WX<srl, "PseudoVNSRL">;
-defm : VPatBinarySDNode_V_WV<sra, "PseudoVNSRA">;
-defm : VPatBinarySDNode_V_WX<sra, "PseudoVNSRA">;
+defm : VPatBinarySDNode_V_WV_WX_WI<srl, "PseudoVNSRL">;
+defm : VPatBinarySDNode_V_WV_WX_WI<sra, "PseudoVNSRA">;
+
+defm : VPatNarrowShiftSplat_WX_WI<riscv_sra_vl, "PseudoVNSRA">;
+defm : VPatNarrowShiftSplat_WX_WI<riscv_srl_vl, "PseudoVNSRL">;
+defm : VPatNarrowShiftSplatExt_WX<riscv_sra_vl, riscv_sext_vl_oneuse, "PseudoVNSRA">;
+defm : VPatNarrowShiftSplatExt_WX<riscv_sra_vl, riscv_zext_vl_oneuse, "PseudoVNSRA">;
+defm : VPatNarrowShiftSplatExt_WX<riscv_srl_vl, riscv_sext_vl_oneuse, "PseudoVNSRL">;
+defm : VPatNarrowShiftSplatExt_WX<riscv_srl_vl, riscv_zext_vl_oneuse, "PseudoVNSRL">;
foreach vtiTowti = AllWidenableIntVectors in {
defvar vti = vtiTowti.Vti;
defvar wti = vtiTowti.Wti;
def : Pat<(vti.Vector (riscv_trunc_vector_vl (wti.Vector wti.RegClass:$rs1),
- (vti.Mask true_mask),
+ (vti.Mask V0),
VLOpFrag)),
- (!cast<Instruction>("PseudoVNSRL_WX_"#vti.LMul.MX)
- wti.RegClass:$rs1, X0, GPR:$vl, vti.Log2SEW)>;
-
- def : Pat<(vti.Vector
- (riscv_trunc_vector_vl
- (wti.Vector
- (riscv_sra_vl wti.RegClass:$rs1, (SplatPat XLenVT:$rs2),
- true_mask, VLOpFrag)), true_mask, VLOpFrag)),
- (!cast<Instruction>("PseudoVNSRA_WX_"#vti.LMul.MX)
- wti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>;
- def : Pat<(vti.Vector
- (riscv_trunc_vector_vl
- (wti.Vector
- (riscv_sra_vl wti.RegClass:$rs1, (SplatPat_uimm5 uimm5:$rs2),
- true_mask, VLOpFrag)), true_mask, VLOpFrag)),
- (!cast<Instruction>("PseudoVNSRA_WI_"#vti.LMul.MX)
- wti.RegClass:$rs1, uimm5:$rs2, GPR:$vl, vti.Log2SEW)>;
-
- def : Pat<(vti.Vector
- (riscv_trunc_vector_vl
- (wti.Vector
- (riscv_srl_vl wti.RegClass:$rs1, (SplatPat XLenVT:$rs2),
- true_mask, VLOpFrag)), true_mask, VLOpFrag)),
- (!cast<Instruction>("PseudoVNSRL_WX_"#vti.LMul.MX)
- wti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>;
- def : Pat<(vti.Vector
- (riscv_trunc_vector_vl
- (wti.Vector
- (riscv_srl_vl wti.RegClass:$rs1, (SplatPat_uimm5 uimm5:$rs2),
- true_mask, VLOpFrag)), true_mask, VLOpFrag)),
- (!cast<Instruction>("PseudoVNSRL_WI_"#vti.LMul.MX)
- wti.RegClass:$rs1, uimm5:$rs2, GPR:$vl, vti.Log2SEW)>;
+ (!cast<Instruction>("PseudoVNSRL_WX_"#vti.LMul.MX#"_MASK")
+ (vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, X0,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
}
// 12.8. Vector Integer Comparison Instructions
@@ -832,14 +1085,14 @@ foreach vti = AllIntegerVectors in {
defm : VPatIntegerSetCCVL_VI_Swappable<vti, "PseudoVMSGT", SETGT, SETLT>;
defm : VPatIntegerSetCCVL_VI_Swappable<vti, "PseudoVMSGTU", SETUGT, SETULT>;
- defm : VPatIntegerSetCCVL_VIPlus1<vti, "PseudoVMSLE", SETLT,
- SplatPat_simm5_plus1_nonzero>;
- defm : VPatIntegerSetCCVL_VIPlus1<vti, "PseudoVMSLEU", SETULT,
- SplatPat_simm5_plus1_nonzero>;
- defm : VPatIntegerSetCCVL_VIPlus1<vti, "PseudoVMSGT", SETGE,
- SplatPat_simm5_plus1>;
- defm : VPatIntegerSetCCVL_VIPlus1<vti, "PseudoVMSGTU", SETUGE,
- SplatPat_simm5_plus1_nonzero>;
+ defm : VPatIntegerSetCCVL_VIPlus1_Swappable<vti, "PseudoVMSLE", SETLT, SETGT,
+ SplatPat_simm5_plus1_nonzero>;
+ defm : VPatIntegerSetCCVL_VIPlus1_Swappable<vti, "PseudoVMSLEU", SETULT, SETUGT,
+ SplatPat_simm5_plus1_nonzero>;
+ defm : VPatIntegerSetCCVL_VIPlus1_Swappable<vti, "PseudoVMSGT", SETGE, SETLE,
+ SplatPat_simm5_plus1>;
+ defm : VPatIntegerSetCCVL_VIPlus1_Swappable<vti, "PseudoVMSGTU", SETUGE, SETULE,
+ SplatPat_simm5_plus1_nonzero>;
} // foreach vti = AllIntegerVectors
// 12.9. Vector Integer Min/Max Instructions
@@ -865,92 +1118,24 @@ defm : VPatBinaryWVL_VV_VX<riscv_vwmulu_vl, "PseudoVWMULU">;
defm : VPatBinaryWVL_VV_VX<riscv_vwmulsu_vl, "PseudoVWMULSU">;
// 12.13 Vector Single-Width Integer Multiply-Add Instructions
-foreach vti = AllIntegerVectors in {
- // NOTE: We choose VMADD because it has the most commuting freedom. So it
- // works best with how TwoAddressInstructionPass tries commuting.
- defvar suffix = vti.LMul.MX;
- def : Pat<(vti.Vector
- (riscv_add_vl vti.RegClass:$rs2,
- (riscv_mul_vl_oneuse vti.RegClass:$rs1,
- vti.RegClass:$rd,
- (vti.Mask true_mask), VLOpFrag),
- (vti.Mask true_mask), VLOpFrag)),
- (!cast<Instruction>("PseudoVMADD_VV_"# suffix)
- vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(vti.Vector
- (riscv_sub_vl vti.RegClass:$rs2,
- (riscv_mul_vl_oneuse vti.RegClass:$rs1,
- vti.RegClass:$rd,
- (vti.Mask true_mask), VLOpFrag),
- (vti.Mask true_mask), VLOpFrag)),
- (!cast<Instruction>("PseudoVNMSUB_VV_"# suffix)
- vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
-
- // The choice of VMADD here is arbitrary, vmadd.vx and vmacc.vx are equally
- // commutable.
- def : Pat<(vti.Vector
- (riscv_add_vl vti.RegClass:$rs2,
- (riscv_mul_vl_oneuse (SplatPat XLenVT:$rs1),
- vti.RegClass:$rd,
- (vti.Mask true_mask), VLOpFrag),
- (vti.Mask true_mask), VLOpFrag)),
- (!cast<Instruction>("PseudoVMADD_VX_" # suffix)
- vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(vti.Vector
- (riscv_sub_vl vti.RegClass:$rs2,
- (riscv_mul_vl_oneuse (SplatPat XLenVT:$rs1),
- vti.RegClass:$rd,
- (vti.Mask true_mask),
- VLOpFrag),
- (vti.Mask true_mask), VLOpFrag)),
- (!cast<Instruction>("PseudoVNMSUB_VX_" # suffix)
- vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
-}
+defm : VPatMultiplyAddVL_VV_VX<riscv_add_vl, "PseudoVMADD">;
+defm : VPatMultiplyAddVL_VV_VX<riscv_sub_vl, "PseudoVNMSUB">;
// 12.14. Vector Widening Integer Multiply-Add Instructions
+defm : VPatWidenMultiplyAddVL_VV_VX<riscv_vwmul_vl_oneuse, "PseudoVWMACC">;
+defm : VPatWidenMultiplyAddVL_VV_VX<riscv_vwmulu_vl_oneuse, "PseudoVWMACCU">;
+defm : VPatWidenMultiplyAddVL_VV_VX<riscv_vwmulsu_vl_oneuse, "PseudoVWMACCSU">;
foreach vtiTowti = AllWidenableIntVectors in {
defvar vti = vtiTowti.Vti;
defvar wti = vtiTowti.Wti;
def : Pat<(wti.Vector
(riscv_add_vl wti.RegClass:$rd,
- (riscv_vwmul_vl_oneuse vti.RegClass:$rs1,
- (vti.Vector vti.RegClass:$rs2),
- (vti.Mask true_mask), VLOpFrag),
- (vti.Mask true_mask), VLOpFrag)),
- (!cast<Instruction>("PseudoVWMACC_VV_" # vti.LMul.MX)
- wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(wti.Vector
- (riscv_add_vl wti.RegClass:$rd,
- (riscv_vwmulu_vl_oneuse vti.RegClass:$rs1,
- (vti.Vector vti.RegClass:$rs2),
- (vti.Mask true_mask), VLOpFrag),
+ (riscv_vwmulsu_vl_oneuse (vti.Vector vti.RegClass:$rs1),
+ (SplatPat XLenVT:$rs2),
+ (vti.Mask true_mask), VLOpFrag),
(vti.Mask true_mask), VLOpFrag)),
- (!cast<Instruction>("PseudoVWMACCU_VV_" # vti.LMul.MX)
- wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
-
- def : Pat<(wti.Vector
- (riscv_add_vl wti.RegClass:$rd,
- (riscv_vwmul_vl_oneuse (SplatPat XLenVT:$rs1),
- (vti.Vector vti.RegClass:$rs2),
- (vti.Mask true_mask), VLOpFrag),
- (vti.Mask true_mask), VLOpFrag)),
- (!cast<Instruction>("PseudoVWMACC_VX_" # vti.LMul.MX)
- wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(wti.Vector
- (riscv_add_vl wti.RegClass:$rd,
- (riscv_vwmulu_vl_oneuse (SplatPat XLenVT:$rs1),
- (vti.Vector vti.RegClass:$rs2),
- (vti.Mask true_mask), VLOpFrag),
- (vti.Mask true_mask), VLOpFrag)),
- (!cast<Instruction>("PseudoVWMACCU_VX_" # vti.LMul.MX)
- wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
+ (!cast<Instruction>("PseudoVWMACCUS_VX_" # vti.LMul.MX)
+ wti.RegClass:$rd, vti.ScalarRegClass:$rs2, vti.RegClass:$rs1,
GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
}
@@ -1005,14 +1190,21 @@ foreach vti = AllIntegerVectors in {
// 12.16. Vector Integer Move Instructions
foreach vti = AllIntegerVectors in {
- def : Pat<(vti.Vector (riscv_vmv_v_x_vl GPR:$rs2, VLOpFrag)),
+ def : Pat<(vti.Vector (riscv_vmv_v_x_vl (vti.Vector undef), GPR:$rs2, VLOpFrag)),
(!cast<Instruction>("PseudoVMV_V_X_"#vti.LMul.MX)
$rs2, GPR:$vl, vti.Log2SEW)>;
+ def : Pat<(vti.Vector (riscv_vmv_v_x_vl vti.Vector:$passthru, GPR:$rs2, VLOpFrag)),
+ (!cast<Instruction>("PseudoVMV_V_X_"#vti.LMul.MX#"_TU")
+ $passthru, $rs2, GPR:$vl, vti.Log2SEW)>;
defvar ImmPat = !cast<ComplexPattern>("sew"#vti.SEW#"simm5");
- def : Pat<(vti.Vector (riscv_vmv_v_x_vl (ImmPat XLenVT:$imm5),
+ def : Pat<(vti.Vector (riscv_vmv_v_x_vl (vti.Vector undef), (ImmPat XLenVT:$imm5),
VLOpFrag)),
(!cast<Instruction>("PseudoVMV_V_I_"#vti.LMul.MX)
XLenVT:$imm5, GPR:$vl, vti.Log2SEW)>;
+ def : Pat<(vti.Vector (riscv_vmv_v_x_vl vti.Vector:$passthru, (ImmPat XLenVT:$imm5),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMV_V_I_"#vti.LMul.MX#"_TU")
+ $passthru, XLenVT:$imm5, GPR:$vl, vti.Log2SEW)>;
}
// 12.1. Vector Single-Width Saturating Add and Subtract
@@ -1033,6 +1225,13 @@ defm : VPatReductionVL<rvv_vecreduce_SMIN_vl, "PseudoVREDMIN", /*is_float*/0>;
defm : VPatReductionVL<rvv_vecreduce_AND_vl, "PseudoVREDAND", /*is_float*/0>;
defm : VPatReductionVL<rvv_vecreduce_OR_vl, "PseudoVREDOR", /*is_float*/0>;
defm : VPatReductionVL<rvv_vecreduce_XOR_vl, "PseudoVREDXOR", /*is_float*/0>;
+
+// 15.2. Vector Widening Integer Reduction Instructions
+defm : VPatWidenReductionVL<rvv_vecreduce_ADD_vl, anyext_oneuse, "PseudoVWREDSUMU", /*is_float*/0>;
+defm : VPatWidenReductionVL<rvv_vecreduce_ADD_vl, zext_oneuse, "PseudoVWREDSUMU", /*is_float*/0>;
+defm : VPatWidenReductionVL_Ext_VL<rvv_vecreduce_ADD_vl, riscv_zext_vl_oneuse, "PseudoVWREDSUMU", /*is_float*/0>;
+defm : VPatWidenReductionVL<rvv_vecreduce_ADD_vl, sext_oneuse, "PseudoVWREDSUM", /*is_float*/0>;
+defm : VPatWidenReductionVL_Ext_VL<rvv_vecreduce_ADD_vl, riscv_sext_vl_oneuse, "PseudoVWREDSUM", /*is_float*/0>;
} // Predicates = [HasVInstructions]
// 15.3. Vector Single-Width Floating-Point Reduction Instructions
@@ -1041,6 +1240,12 @@ defm : VPatReductionVL<rvv_vecreduce_SEQ_FADD_vl, "PseudoVFREDOSUM", /*is_float*
defm : VPatReductionVL<rvv_vecreduce_FADD_vl, "PseudoVFREDUSUM", /*is_float*/1>;
defm : VPatReductionVL<rvv_vecreduce_FMIN_vl, "PseudoVFREDMIN", /*is_float*/1>;
defm : VPatReductionVL<rvv_vecreduce_FMAX_vl, "PseudoVFREDMAX", /*is_float*/1>;
+
+// 15.4. Vector Widening Floating-Point Reduction Instructions
+defm : VPatWidenReductionVL<rvv_vecreduce_SEQ_FADD_vl, fpext_oneuse, "PseudoVFWREDOSUM", /*is_float*/1>;
+defm : VPatWidenReductionVL_Ext_VL<rvv_vecreduce_SEQ_FADD_vl, riscv_fpextend_vl_oneuse, "PseudoVFWREDOSUM", /*is_float*/1>;
+defm : VPatWidenReductionVL<rvv_vecreduce_FADD_vl, fpext_oneuse, "PseudoVFWREDUSUM", /*is_float*/1>;
+defm : VPatWidenReductionVL_Ext_VL<rvv_vecreduce_FADD_vl, riscv_fpextend_vl_oneuse, "PseudoVFWREDUSUM", /*is_float*/1>;
} // Predicates = [HasVInstructionsAnyF]
let Predicates = [HasVInstructionsAnyF] in {
@@ -1050,118 +1255,29 @@ defm : VPatBinaryFPVL_VV_VF<riscv_fadd_vl, "PseudoVFADD">;
defm : VPatBinaryFPVL_VV_VF<riscv_fsub_vl, "PseudoVFSUB">;
defm : VPatBinaryFPVL_R_VF<riscv_fsub_vl, "PseudoVFRSUB">;
+// 14.3. Vector Widening Floating-Point Add/Subtract Instructions
+defm : VPatWidenBinaryFPVL_VV_VF_WV_WF<riscv_fadd_vl, "PseudoVFWADD">;
+defm : VPatWidenBinaryFPVL_VV_VF_WV_WF<riscv_fsub_vl, "PseudoVFWSUB">;
+
// 14.4. Vector Single-Width Floating-Point Multiply/Divide Instructions
defm : VPatBinaryFPVL_VV_VF<riscv_fmul_vl, "PseudoVFMUL">;
defm : VPatBinaryFPVL_VV_VF<riscv_fdiv_vl, "PseudoVFDIV">;
defm : VPatBinaryFPVL_R_VF<riscv_fdiv_vl, "PseudoVFRDIV">;
-// 14.6 Vector Single-Width Floating-Point Fused Multiply-Add Instructions.
-foreach vti = AllFloatVectors in {
- // NOTE: We choose VFMADD because it has the most commuting freedom. So it
- // works best with how TwoAddressInstructionPass tries commuting.
- defvar suffix = vti.LMul.MX;
- def : Pat<(vti.Vector (riscv_fma_vl vti.RegClass:$rs1, vti.RegClass:$rd,
- vti.RegClass:$rs2, (vti.Mask true_mask),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVFMADD_VV_"# suffix)
- vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(vti.Vector (riscv_fma_vl vti.RegClass:$rs1, vti.RegClass:$rd,
- (riscv_fneg_vl vti.RegClass:$rs2,
- (vti.Mask true_mask),
- VLOpFrag),
- (vti.Mask true_mask),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVFMSUB_VV_"# suffix)
- vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(vti.Vector (riscv_fma_vl (riscv_fneg_vl vti.RegClass:$rs1,
- (vti.Mask true_mask),
- VLOpFrag),
- vti.RegClass:$rd,
- (riscv_fneg_vl vti.RegClass:$rs2,
- (vti.Mask true_mask),
- VLOpFrag),
- (vti.Mask true_mask),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVFNMADD_VV_"# suffix)
- vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(vti.Vector (riscv_fma_vl (riscv_fneg_vl vti.RegClass:$rs1,
- (vti.Mask true_mask),
- VLOpFrag),
- vti.RegClass:$rd, vti.RegClass:$rs2,
- (vti.Mask true_mask),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVFNMSUB_VV_"# suffix)
- vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+// 14.5. Vector Widening Floating-Point Multiply Instructions
+defm : VPatWidenBinaryFPVL_VV_VF<riscv_fmul_vl, riscv_fpextend_vl_oneuse, "PseudoVFWMUL">;
- // The choice of VFMADD here is arbitrary, vfmadd.vf and vfmacc.vf are equally
- // commutable.
- def : Pat<(vti.Vector (riscv_fma_vl (SplatFPOp vti.ScalarRegClass:$rs1),
- vti.RegClass:$rd, vti.RegClass:$rs2,
- (vti.Mask true_mask),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVFMADD_V" # vti.ScalarSuffix # "_" # suffix)
- vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(vti.Vector (riscv_fma_vl (SplatFPOp vti.ScalarRegClass:$rs1),
- vti.RegClass:$rd,
- (riscv_fneg_vl vti.RegClass:$rs2,
- (vti.Mask true_mask),
- VLOpFrag),
- (vti.Mask true_mask),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVFMSUB_V" # vti.ScalarSuffix # "_" # suffix)
- vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(vti.Vector (riscv_fma_vl (SplatFPOp vti.ScalarRegClass:$rs1),
- (riscv_fneg_vl vti.RegClass:$rd,
- (vti.Mask true_mask),
- VLOpFrag),
- (riscv_fneg_vl vti.RegClass:$rs2,
- (vti.Mask true_mask),
- VLOpFrag),
- (vti.Mask true_mask),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVFNMADD_V" # vti.ScalarSuffix # "_" # suffix)
- vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(vti.Vector (riscv_fma_vl (SplatFPOp vti.ScalarRegClass:$rs1),
- (riscv_fneg_vl vti.RegClass:$rd,
- (vti.Mask true_mask),
- VLOpFrag),
- vti.RegClass:$rs2,
- (vti.Mask true_mask),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVFNMSUB_V" # vti.ScalarSuffix # "_" # suffix)
- vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+// 14.6 Vector Single-Width Floating-Point Fused Multiply-Add Instructions.
+defm : VPatFPMulAddVL_VV_VF<riscv_vfmadd_vl, "PseudoVFMADD">;
+defm : VPatFPMulAddVL_VV_VF<riscv_vfmsub_vl, "PseudoVFMSUB">;
+defm : VPatFPMulAddVL_VV_VF<riscv_vfnmadd_vl, "PseudoVFNMADD">;
+defm : VPatFPMulAddVL_VV_VF<riscv_vfnmsub_vl, "PseudoVFNMSUB">;
- // The splat might be negated.
- def : Pat<(vti.Vector (riscv_fma_vl (riscv_fneg_vl (SplatFPOp vti.ScalarRegClass:$rs1),
- (vti.Mask true_mask),
- VLOpFrag),
- vti.RegClass:$rd,
- (riscv_fneg_vl vti.RegClass:$rs2,
- (vti.Mask true_mask),
- VLOpFrag),
- (vti.Mask true_mask),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVFNMADD_V" # vti.ScalarSuffix # "_" # suffix)
- vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(vti.Vector (riscv_fma_vl (riscv_fneg_vl (SplatFPOp vti.ScalarRegClass:$rs1),
- (vti.Mask true_mask),
- VLOpFrag),
- vti.RegClass:$rd, vti.RegClass:$rs2,
- (vti.Mask true_mask),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVFNMSUB_V" # vti.ScalarSuffix # "_" # suffix)
- vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
-}
+// 14.7. Vector Widening Floating-Point Fused Multiply-Add Instructions
+defm : VPatWidenFPMulAccVL_VV_VF<riscv_vfmadd_vl, "PseudoVFWMACC">;
+defm : VPatWidenFPMulAccVL_VV_VF<riscv_vfnmadd_vl, "PseudoVFWNMACC">;
+defm : VPatWidenFPMulAccVL_VV_VF<riscv_vfmsub_vl, "PseudoVFWMSAC">;
+defm : VPatWidenFPMulAccVL_VV_VF<riscv_vfnmsub_vl, "PseudoVFWNMSAC">;
// 14.11. Vector Floating-Point MIN/MAX Instructions
defm : VPatBinaryFPVL_VV_VF<riscv_fminnum_vl, "PseudoVFMIN">;
@@ -1193,10 +1309,13 @@ foreach vti = AllFloatVectors in {
(!cast<Instruction>("PseudoVFSGNJX_VV_"# vti.LMul.MX)
vti.RegClass:$rs, vti.RegClass:$rs, GPR:$vl, vti.Log2SEW)>;
// Handle fneg with VFSGNJN using the same input for both operands.
- def : Pat<(riscv_fneg_vl (vti.Vector vti.RegClass:$rs), (vti.Mask true_mask),
+ def : Pat<(riscv_fneg_vl (vti.Vector vti.RegClass:$rs), (vti.Mask V0),
VLOpFrag),
- (!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX)
- vti.RegClass:$rs, vti.RegClass:$rs, GPR:$vl, vti.Log2SEW)>;
+ (!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX #"_MASK")
+ (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs,
+ vti.RegClass:$rs, (vti.Mask V0), GPR:$vl, vti.Log2SEW,
+ TAIL_AGNOSTIC)>;
+
def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1),
(vti.Vector vti.RegClass:$rs2),
(vti.Mask true_mask),
@@ -1276,16 +1395,26 @@ foreach fvti = AllFloatVectors in {
// 14.16. Vector Floating-Point Move Instruction
// If we're splatting fpimm0, use vmv.v.x vd, x0.
def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl
- (fvti.Scalar (fpimm0)), VLOpFrag)),
+ (fvti.Vector undef), (fvti.Scalar (fpimm0)), VLOpFrag)),
(!cast<Instruction>("PseudoVMV_V_I_"#fvti.LMul.MX)
0, GPR:$vl, fvti.Log2SEW)>;
+ def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl
+ fvti.Vector:$passthru, (fvti.Scalar (fpimm0)), VLOpFrag)),
+ (!cast<Instruction>("PseudoVMV_V_I_"#fvti.LMul.MX#"_TU")
+ $passthru, 0, GPR:$vl, fvti.Log2SEW)>;
def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl
- (fvti.Scalar fvti.ScalarRegClass:$rs2), VLOpFrag)),
+ (fvti.Vector undef), (fvti.Scalar fvti.ScalarRegClass:$rs2), VLOpFrag)),
(!cast<Instruction>("PseudoVFMV_V_" # fvti.ScalarSuffix # "_" #
fvti.LMul.MX)
(fvti.Scalar fvti.ScalarRegClass:$rs2),
GPR:$vl, fvti.Log2SEW)>;
+ def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl
+ fvti.Vector:$passthru, (fvti.Scalar fvti.ScalarRegClass:$rs2), VLOpFrag)),
+ (!cast<Instruction>("PseudoVFMV_V_" # fvti.ScalarSuffix # "_" #
+ fvti.LMul.MX # "_TU")
+ $passthru, (fvti.Scalar fvti.ScalarRegClass:$rs2),
+ GPR:$vl, fvti.Log2SEW)>;
// 14.17. Vector Single-Width Floating-Point/Integer Type-Convert Instructions
defm : VPatConvertFP2ISDNode_V_VL<riscv_fp_to_sint_vl, "PseudoVFCVT_RTZ_X_F_V">;
@@ -1302,10 +1431,11 @@ foreach fvti = AllFloatVectors in {
defvar fvti = fvtiToFWti.Vti;
defvar fwti = fvtiToFWti.Wti;
def : Pat<(fwti.Vector (riscv_fpextend_vl (fvti.Vector fvti.RegClass:$rs1),
- (fvti.Mask true_mask),
+ (fvti.Mask V0),
VLOpFrag)),
- (!cast<Instruction>("PseudoVFWCVT_F_F_V_"#fvti.LMul.MX)
- fvti.RegClass:$rs1, GPR:$vl, fvti.Log2SEW)>;
+ (!cast<Instruction>("PseudoVFWCVT_F_F_V_"#fvti.LMul.MX#"_MASK")
+ (fwti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1,
+ (fvti.Mask V0), GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>;
}
// 14.19 Narrowing Floating-Point/Integer Type-Convert Instructions
@@ -1317,16 +1447,18 @@ foreach fvti = AllFloatVectors in {
defvar fvti = fvtiToFWti.Vti;
defvar fwti = fvtiToFWti.Wti;
def : Pat<(fvti.Vector (riscv_fpround_vl (fwti.Vector fwti.RegClass:$rs1),
- (fwti.Mask true_mask),
+ (fwti.Mask V0),
VLOpFrag)),
- (!cast<Instruction>("PseudoVFNCVT_F_F_W_"#fvti.LMul.MX)
- fwti.RegClass:$rs1, GPR:$vl, fvti.Log2SEW)>;
+ (!cast<Instruction>("PseudoVFNCVT_F_F_W_"#fvti.LMul.MX#"_MASK")
+ (fvti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1,
+ (fwti.Mask V0), GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(fvti.Vector (riscv_fncvt_rod_vl (fwti.Vector fwti.RegClass:$rs1),
- (fwti.Mask true_mask),
+ (fwti.Mask V0),
VLOpFrag)),
- (!cast<Instruction>("PseudoVFNCVT_ROD_F_F_W_"#fvti.LMul.MX)
- fwti.RegClass:$rs1, GPR:$vl, fvti.Log2SEW)>;
+ (!cast<Instruction>("PseudoVFNCVT_ROD_F_F_W_"#fvti.LMul.MX#"_MASK")
+ (fvti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1,
+ (fwti.Mask V0), GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>;
}
}
@@ -1412,43 +1544,27 @@ foreach vti = AllIntegerVectors in {
(!cast<Instruction>("PseudoVMV_S_X_"#vti.LMul.MX)
vti.RegClass:$merge,
(vti.Scalar vti.ScalarRegClass:$rs1), GPR:$vl, vti.Log2SEW)>;
+
def : Pat<(vti.Vector (riscv_vrgather_vv_vl vti.RegClass:$rs2,
- (vti.Vector vti.RegClass:$rs1),
- (vti.Mask true_mask),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVRGATHER_VV_"# vti.LMul.MX)
- vti.RegClass:$rs2, vti.RegClass:$rs1, GPR:$vl, vti.Log2SEW)>;
- def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2, GPR:$rs1,
- (vti.Mask true_mask),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVRGATHER_VX_"# vti.LMul.MX)
- vti.RegClass:$rs2, GPR:$rs1, GPR:$vl, vti.Log2SEW)>;
- def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2, uimm5:$imm,
- (vti.Mask true_mask),
+ vti.RegClass:$rs1,
+ (vti.Mask V0),
+ vti.RegClass:$merge,
VLOpFrag)),
- (!cast<Instruction>("PseudoVRGATHER_VI_"# vti.LMul.MX)
- vti.RegClass:$rs2, uimm5:$imm, GPR:$vl, vti.Log2SEW)>;
-
- def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask V0),
- (riscv_vrgather_vv_vl
- vti.RegClass:$rs2,
- vti.RegClass:$rs1,
- (vti.Mask true_mask),
- VLOpFrag),
- vti.RegClass:$merge,
- VLOpFrag)),
(!cast<Instruction>("PseudoVRGATHER_VV_"# vti.LMul.MX#"_MASK")
vti.RegClass:$merge, vti.RegClass:$rs2, vti.RegClass:$rs1,
(vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
-
- def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask V0),
- (riscv_vrgather_vx_vl
- vti.RegClass:$rs2,
- uimm5:$imm,
- (vti.Mask true_mask),
- VLOpFrag),
- vti.RegClass:$merge,
- VLOpFrag)),
+ def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2, GPR:$rs1,
+ (vti.Mask V0),
+ vti.RegClass:$merge,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVRGATHER_VX_"# vti.LMul.MX#"_MASK")
+ vti.RegClass:$merge, vti.RegClass:$rs2, GPR:$rs1,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2,
+ uimm5:$imm,
+ (vti.Mask V0),
+ vti.RegClass:$merge,
+ VLOpFrag)),
(!cast<Instruction>("PseudoVRGATHER_VI_"# vti.LMul.MX#"_MASK")
vti.RegClass:$merge, vti.RegClass:$rs2, uimm5:$imm,
(vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
@@ -1461,21 +1577,13 @@ foreach vti = AllIntegerVectors in {
defvar emul_str = octuple_to_str<octuple_emul>.ret;
defvar ivti = !cast<VTypeInfo>("VI16" # emul_str);
defvar inst = "PseudoVRGATHEREI16_VV_" # vti.LMul.MX # "_" # emul_str;
- def : Pat<(vti.Vector (riscv_vrgatherei16_vv_vl vti.RegClass:$rs2,
- (ivti.Vector ivti.RegClass:$rs1),
- (vti.Mask true_mask),
- VLOpFrag)),
- (!cast<Instruction>(inst)
- vti.RegClass:$rs2, ivti.RegClass:$rs1, GPR:$vl, vti.Log2SEW)>;
-
- def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask V0),
- (riscv_vrgatherei16_vv_vl
- vti.RegClass:$rs2,
- (ivti.Vector ivti.RegClass:$rs1),
- (vti.Mask true_mask),
- VLOpFrag),
- vti.RegClass:$merge,
- VLOpFrag)),
+
+ def : Pat<(vti.Vector
+ (riscv_vrgatherei16_vv_vl vti.RegClass:$rs2,
+ (ivti.Vector ivti.RegClass:$rs1),
+ (vti.Mask V0),
+ vti.RegClass:$merge,
+ VLOpFrag)),
(!cast<Instruction>(inst#"_MASK")
vti.RegClass:$merge, vti.RegClass:$rs2, ivti.RegClass:$rs1,
(vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
@@ -1500,43 +1608,29 @@ foreach vti = AllFloatVectors in {
vti.RegClass:$merge,
(vti.Scalar vti.ScalarRegClass:$rs1), GPR:$vl, vti.Log2SEW)>;
defvar ivti = GetIntVTypeInfo<vti>.Vti;
- def : Pat<(vti.Vector (riscv_vrgather_vv_vl vti.RegClass:$rs2,
- (ivti.Vector vti.RegClass:$rs1),
- (vti.Mask true_mask),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVRGATHER_VV_"# vti.LMul.MX)
- vti.RegClass:$rs2, vti.RegClass:$rs1, GPR:$vl, vti.Log2SEW)>;
- def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2, GPR:$rs1,
- (vti.Mask true_mask),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVRGATHER_VX_"# vti.LMul.MX)
- vti.RegClass:$rs2, GPR:$rs1, GPR:$vl, vti.Log2SEW)>;
- def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2, uimm5:$imm,
- (vti.Mask true_mask),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVRGATHER_VI_"# vti.LMul.MX)
- vti.RegClass:$rs2, uimm5:$imm, GPR:$vl, vti.Log2SEW)>;
- def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask V0),
- (riscv_vrgather_vv_vl
- vti.RegClass:$rs2,
- (ivti.Vector vti.RegClass:$rs1),
- (vti.Mask true_mask),
- VLOpFrag),
- vti.RegClass:$merge,
- VLOpFrag)),
+ def : Pat<(vti.Vector
+ (riscv_vrgather_vv_vl vti.RegClass:$rs2,
+ (ivti.Vector vti.RegClass:$rs1),
+ (vti.Mask V0),
+ vti.RegClass:$merge,
+ VLOpFrag)),
(!cast<Instruction>("PseudoVRGATHER_VV_"# vti.LMul.MX#"_MASK")
vti.RegClass:$merge, vti.RegClass:$rs2, vti.RegClass:$rs1,
(vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
-
- def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask V0),
- (riscv_vrgather_vx_vl
- vti.RegClass:$rs2,
- uimm5:$imm,
- (vti.Mask true_mask),
- VLOpFrag),
- vti.RegClass:$merge,
- VLOpFrag)),
+ def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2, GPR:$rs1,
+ (vti.Mask V0),
+ vti.RegClass:$merge,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVRGATHER_VX_"# vti.LMul.MX#"_MASK")
+ vti.RegClass:$merge, vti.RegClass:$rs2, GPR:$rs1,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(vti.Vector
+ (riscv_vrgather_vx_vl vti.RegClass:$rs2,
+ uimm5:$imm,
+ (vti.Mask V0),
+ vti.RegClass:$merge,
+ VLOpFrag)),
(!cast<Instruction>("PseudoVRGATHER_VI_"# vti.LMul.MX#"_MASK")
vti.RegClass:$merge, vti.RegClass:$rs2, uimm5:$imm,
(vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
@@ -1548,21 +1642,13 @@ foreach vti = AllFloatVectors in {
defvar emul_str = octuple_to_str<octuple_emul>.ret;
defvar ivti = !cast<VTypeInfo>("VI16" # emul_str);
defvar inst = "PseudoVRGATHEREI16_VV_" # vti.LMul.MX # "_" # emul_str;
- def : Pat<(vti.Vector (riscv_vrgatherei16_vv_vl vti.RegClass:$rs2,
- (ivti.Vector ivti.RegClass:$rs1),
- (vti.Mask true_mask),
- VLOpFrag)),
- (!cast<Instruction>(inst)
- vti.RegClass:$rs2, ivti.RegClass:$rs1, GPR:$vl, vti.Log2SEW)>;
-
- def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask V0),
- (riscv_vrgatherei16_vv_vl
- vti.RegClass:$rs2,
- (ivti.Vector ivti.RegClass:$rs1),
- (vti.Mask true_mask),
- VLOpFrag),
- vti.RegClass:$merge,
- VLOpFrag)),
+
+ def : Pat<(vti.Vector
+ (riscv_vrgatherei16_vv_vl vti.RegClass:$rs2,
+ (ivti.Vector ivti.RegClass:$rs1),
+ (vti.Mask V0),
+ vti.RegClass:$merge,
+ VLOpFrag)),
(!cast<Instruction>(inst#"_MASK")
vti.RegClass:$merge, vti.RegClass:$rs2, ivti.RegClass:$rs1,
(vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
@@ -1583,9 +1669,10 @@ def SDTRVVSlide : SDTypeProfile<1, 5, [
SDTCisVec<0>, SDTCisSameAs<1, 0>, SDTCisSameAs<2, 0>, SDTCisVT<3, XLenVT>,
SDTCVecEltisVT<4, i1>, SDTCisSameNumEltsAs<0, 4>, SDTCisVT<5, XLenVT>
]>;
-def SDTRVVSlide1 : SDTypeProfile<1, 4, [
- SDTCisVec<0>, SDTCisSameAs<1, 0>, SDTCisInt<0>, SDTCisVT<2, XLenVT>,
- SDTCVecEltisVT<3, i1>, SDTCisSameNumEltsAs<0, 3>, SDTCisVT<4, XLenVT>
+def SDTRVVSlide1 : SDTypeProfile<1, 5, [
+ SDTCisVec<0>, SDTCisSameAs<1, 0>, SDTCisSameAs<2, 0>, SDTCisInt<0>,
+ SDTCisVT<3, XLenVT>, SDTCVecEltisVT<4, i1>, SDTCisSameNumEltsAs<0, 4>,
+ SDTCisVT<5, XLenVT>
]>;
def riscv_slideup_vl : SDNode<"RISCVISD::VSLIDEUP_VL", SDTRVVSlide, []>;
@@ -1600,16 +1687,30 @@ foreach vti = AllIntegerVectors in {
VLOpFrag)),
(!cast<Instruction>("PseudoVID_V_"#vti.LMul.MX) GPR:$vl, vti.Log2SEW)>;
- def : Pat<(vti.Vector (riscv_slide1up_vl (vti.Vector vti.RegClass:$rs1),
+ def : Pat<(vti.Vector (riscv_slide1up_vl (vti.Vector undef),
+ (vti.Vector vti.RegClass:$rs1),
GPR:$rs2, (vti.Mask true_mask),
VLOpFrag)),
(!cast<Instruction>("PseudoVSLIDE1UP_VX_"#vti.LMul.MX)
vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>;
- def : Pat<(vti.Vector (riscv_slide1down_vl (vti.Vector vti.RegClass:$rs1),
+ def : Pat<(vti.Vector (riscv_slide1up_vl (vti.Vector vti.RegClass:$rd),
+ (vti.Vector vti.RegClass:$rs1),
GPR:$rs2, (vti.Mask true_mask),
VLOpFrag)),
+ (!cast<Instruction>("PseudoVSLIDE1UP_VX_"#vti.LMul.MX#"_TU")
+ vti.RegClass:$rd, vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>;
+ def : Pat<(vti.Vector (riscv_slide1down_vl (vti.Vector undef),
+ (vti.Vector vti.RegClass:$rs1),
+ GPR:$rs2, (vti.Mask true_mask),
+ VLOpFrag)),
(!cast<Instruction>("PseudoVSLIDE1DOWN_VX_"#vti.LMul.MX)
vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>;
+ def : Pat<(vti.Vector (riscv_slide1down_vl (vti.Vector vti.RegClass:$rd),
+ (vti.Vector vti.RegClass:$rs1),
+ GPR:$rs2, (vti.Mask true_mask),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVSLIDE1DOWN_VX_"#vti.LMul.MX#"_TU")
+ vti.RegClass:$rd, vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>;
}
foreach vti = !listconcat(AllIntegerVectors, AllFloatVectors) in {
@@ -1619,7 +1720,7 @@ foreach vti = !listconcat(AllIntegerVectors, AllFloatVectors) in {
VLOpFrag)),
(!cast<Instruction>("PseudoVSLIDEUP_VI_"#vti.LMul.MX)
vti.RegClass:$rs3, vti.RegClass:$rs1, uimm5:$rs2,
- GPR:$vl, vti.Log2SEW)>;
+ GPR:$vl, vti.Log2SEW, TAIL_UNDISTURBED_MASK_UNDISTURBED)>;
def : Pat<(vti.Vector (riscv_slideup_vl (vti.Vector vti.RegClass:$rs3),
(vti.Vector vti.RegClass:$rs1),
@@ -1627,7 +1728,7 @@ foreach vti = !listconcat(AllIntegerVectors, AllFloatVectors) in {
VLOpFrag)),
(!cast<Instruction>("PseudoVSLIDEUP_VX_"#vti.LMul.MX)
vti.RegClass:$rs3, vti.RegClass:$rs1, GPR:$rs2,
- GPR:$vl, vti.Log2SEW)>;
+ GPR:$vl, vti.Log2SEW, TAIL_UNDISTURBED_MASK_UNDISTURBED)>;
def : Pat<(vti.Vector (riscv_slidedown_vl (vti.Vector vti.RegClass:$rs3),
(vti.Vector vti.RegClass:$rs1),
@@ -1635,7 +1736,14 @@ foreach vti = !listconcat(AllIntegerVectors, AllFloatVectors) in {
VLOpFrag)),
(!cast<Instruction>("PseudoVSLIDEDOWN_VI_"#vti.LMul.MX)
vti.RegClass:$rs3, vti.RegClass:$rs1, uimm5:$rs2,
- GPR:$vl, vti.Log2SEW)>;
+ GPR:$vl, vti.Log2SEW, TAIL_UNDISTURBED_MASK_UNDISTURBED)>;
+ def : Pat<(vti.Vector (riscv_slidedown_vl (vti.Vector undef),
+ (vti.Vector vti.RegClass:$rs1),
+ uimm5:$rs2, (vti.Mask true_mask),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVSLIDEDOWN_VI_"#vti.LMul.MX)
+ (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, uimm5:$rs2,
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(vti.Vector (riscv_slidedown_vl (vti.Vector vti.RegClass:$rs3),
(vti.Vector vti.RegClass:$rs1),
@@ -1643,7 +1751,14 @@ foreach vti = !listconcat(AllIntegerVectors, AllFloatVectors) in {
VLOpFrag)),
(!cast<Instruction>("PseudoVSLIDEDOWN_VX_"#vti.LMul.MX)
vti.RegClass:$rs3, vti.RegClass:$rs1, GPR:$rs2,
- GPR:$vl, vti.Log2SEW)>;
+ GPR:$vl, vti.Log2SEW, TAIL_UNDISTURBED_MASK_UNDISTURBED)>;
+ def : Pat<(vti.Vector (riscv_slidedown_vl (vti.Vector undef),
+ (vti.Vector vti.RegClass:$rs1),
+ GPR:$rs2, (vti.Mask true_mask),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVSLIDEDOWN_VX_"#vti.LMul.MX)
+ (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, GPR:$rs2,
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
}
} // Predicates = [HasVInstructions]
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index 07884d35f63c..9532d1dd3dd2 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -211,15 +211,16 @@ def CSImm12MulBy4 : PatLeaf<(imm), [{
return false;
int64_t C = N->getSExtValue();
// Skip if C is simm12 or can be optimized by the PatLeaf AddiPair.
- return !isInt<13>(C) && isInt<14>(C) && (C & 3) == 0;
+ return !isInt<13>(C) && isShiftedInt<12, 2>(C);
}]>;
def CSImm12MulBy8 : PatLeaf<(imm), [{
if (!N->hasOneUse())
return false;
int64_t C = N->getSExtValue();
- // Skip if C is simm12 or can be optimized by the PatLeaf AddiPair.
- return !isInt<13>(C) && isInt<15>(C) && (C & 7) == 0;
+ // Skip if C is simm12 or can be optimized by the PatLeaf AddiPair or
+ // CSImm12MulBy4.
+ return !isInt<14>(C) && isShiftedInt<12, 3>(C);
}]>;
def SimmShiftRightBy2XForm : SDNodeXForm<imm, [{
@@ -232,6 +233,12 @@ def SimmShiftRightBy3XForm : SDNodeXForm<imm, [{
N->getValueType(0));
}]>;
+// Pattern to exclude simm12 immediates from matching.
+def non_imm12 : PatLeaf<(XLenVT GPR:$a), [{
+ auto *C = dyn_cast<ConstantSDNode>(N);
+ return !C || !isInt<12>(C->getSExtValue());
+}]>;
+
//===----------------------------------------------------------------------===//
// Instruction class templates
//===----------------------------------------------------------------------===//
@@ -348,7 +355,7 @@ def SH2ADD_UW : ALUW_rr<0b0010000, 0b100, "sh2add.uw">,
Sched<[WriteSHXADD32, ReadSHXADD32, ReadSHXADD32]>;
def SH3ADD_UW : ALUW_rr<0b0010000, 0b110, "sh3add.uw">,
Sched<[WriteSHXADD32, ReadSHXADD32, ReadSHXADD32]>;
-} // Predicates = [HasStdExtZbb, IsRV64]
+} // Predicates = [HasStdExtZba, IsRV64]
let Predicates = [HasStdExtZbbOrZbpOrZbkb] in {
def ROL : ALU_rr<0b0110000, 0b001, "rol">,
@@ -368,7 +375,7 @@ def RORW : ALUW_rr<0b0110000, 0b101, "rorw">,
def RORIW : RVBShiftW_ri<0b0110000, 0b101, OPC_OP_IMM_32, "roriw">,
Sched<[WriteRotateImm32, ReadRotateImm32]>;
-} // Predicates = [HasStdExtZbbOrZbp, IsRV64]
+} // Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64]
let Predicates = [HasStdExtZbs] in {
def BCLR : ALU_rr<0b0100100, 0b001, "bclr">,
@@ -391,32 +398,48 @@ def BEXTI : RVBShift_ri<0b01001, 0b101, OPC_OP_IMM, "bexti">,
} // Predicates = [HasStdExtZbs]
let Predicates = [HasStdExtZbp] in {
-def GORC : ALU_rr<0b0010100, 0b101, "gorc">, Sched<[]>;
-def GREV : ALU_rr<0b0110100, 0b101, "grev">, Sched<[]>;
-
-def GREVI : RVBShift_ri<0b01101, 0b101, OPC_OP_IMM, "grevi">, Sched<[]>;
-def GORCI : RVBShift_ri<0b00101, 0b101, OPC_OP_IMM, "gorci">, Sched<[]>;
-
-def SHFL : ALU_rr<0b0000100, 0b001, "shfl">, Sched<[]>;
-def UNSHFL : ALU_rr<0b0000100, 0b101, "unshfl">, Sched<[]>;
-
-def SHFLI : RVBShfl_ri<0b0000100, 0b001, OPC_OP_IMM, "shfli">, Sched<[]>;
-def UNSHFLI : RVBShfl_ri<0b0000100, 0b101, OPC_OP_IMM, "unshfli">, Sched<[]>;
-
-def XPERM_H : ALU_rr<0b0010100, 0b110, "xperm.h">, Sched<[]>;
+def GORC : ALU_rr<0b0010100, 0b101, "gorc">,
+ Sched<[WriteORC, ReadORC, ReadORC]>;
+def GREV : ALU_rr<0b0110100, 0b101, "grev">,
+ Sched<[WriteREV, ReadREV, ReadREV]>;
+
+def GREVI : RVBShift_ri<0b01101, 0b101, OPC_OP_IMM, "grevi">,
+ Sched<[WriteREVImm, ReadREVImm]>;
+def GORCI : RVBShift_ri<0b00101, 0b101, OPC_OP_IMM, "gorci">,
+ Sched<[WriteORCImm, ReadORCImm]>;
+
+def SHFL : ALU_rr<0b0000100, 0b001, "shfl">,
+ Sched<[WriteSHFL, ReadSHFL, ReadSHFL]>;
+def UNSHFL : ALU_rr<0b0000100, 0b101, "unshfl">,
+ Sched<[WriteUNSHFL, ReadUNSHFL, ReadUNSHFL]>;
+
+def SHFLI : RVBShfl_ri<0b0000100, 0b001, OPC_OP_IMM, "shfli">,
+ Sched<[WriteSHFLImm, ReadSHFLImm]>;
+def UNSHFLI : RVBShfl_ri<0b0000100, 0b101, OPC_OP_IMM, "unshfli">,
+ Sched<[WriteUNSHFLImm, ReadUNSHFLImm]>;
+
+def XPERM_H : ALU_rr<0b0010100, 0b110, "xperm.h">,
+ Sched<[WriteXPERMH, ReadXPERMH, ReadXPERMH]>;
} // Predicates = [HasStdExtZbp]
let Predicates = [HasStdExtZbp, IsRV64] in {
-def GORCW : ALUW_rr<0b0010100, 0b101, "gorcw">, Sched<[]>;
-def GREVW : ALUW_rr<0b0110100, 0b101, "grevw">, Sched<[]>;
-
-def GORCIW : RVBShiftW_ri<0b0010100, 0b101, OPC_OP_IMM_32, "gorciw">, Sched<[]>;
-def GREVIW : RVBShiftW_ri<0b0110100, 0b101, OPC_OP_IMM_32, "greviw">, Sched<[]>;
-
-def SHFLW : ALUW_rr<0b0000100, 0b001, "shflw">, Sched<[]>;
-def UNSHFLW : ALUW_rr<0b0000100, 0b101, "unshflw">, Sched<[]>;
-
-def XPERM_W : ALU_rr<0b0010100, 0b000, "xperm.w">, Sched<[]>;
+def GORCW : ALUW_rr<0b0010100, 0b101, "gorcw">,
+ Sched<[WriteORC32, ReadORC32, ReadORC32]>;
+def GREVW : ALUW_rr<0b0110100, 0b101, "grevw">,
+ Sched<[WriteREV32, ReadREV32, ReadREV32]>;
+
+def GORCIW : RVBShiftW_ri<0b0010100, 0b101, OPC_OP_IMM_32, "gorciw">,
+ Sched<[WriteREVImm32, ReadREVImm32]>;
+def GREVIW : RVBShiftW_ri<0b0110100, 0b101, OPC_OP_IMM_32, "greviw">,
+ Sched<[WriteORCImm32, ReadORCImm32]>;
+
+def SHFLW : ALUW_rr<0b0000100, 0b001, "shflw">,
+ Sched<[WriteSHFL32, ReadSHFL32, ReadSHFL32]>;
+def UNSHFLW : ALUW_rr<0b0000100, 0b101, "unshflw">,
+ Sched<[WriteUNSHFL32, ReadUNSHFL32, ReadUNSHFL32]>;
+
+def XPERM_W : ALU_rr<0b0010100, 0b000, "xperm.w">,
+ Sched<[WriteXPERMW, ReadXPERMW, ReadXPERMW]>;
} // Predicates = [HasStdExtZbp, IsRV64]
// These instructions were named xperm.n and xperm.b in the last version of
@@ -429,24 +452,28 @@ def XPERM8 : ALU_rr<0b0010100, 0b100, "xperm8">, Sched<[]>;
let Predicates = [HasStdExtZbt] in {
def CMIX : RVBTernaryR<0b11, 0b001, OPC_OP, "cmix", "$rd, $rs2, $rs1, $rs3">,
- Sched<[]>;
+ Sched<[WriteCMix, ReadCMix, ReadCMix, ReadCMix]>;
def CMOV : RVBTernaryR<0b11, 0b101, OPC_OP, "cmov", "$rd, $rs2, $rs1, $rs3">,
- Sched<[]>;
+ Sched<[WriteCMov, ReadCMov, ReadCMov, ReadCMov]>;
def FSL : RVBTernaryR<0b10, 0b001, OPC_OP, "fsl", "$rd, $rs1, $rs3, $rs2">,
- Sched<[]>;
+ Sched<[WriteFSReg, ReadFSReg, ReadFSReg, ReadFSReg]>;
def FSR : RVBTernaryR<0b10, 0b101, OPC_OP, "fsr", "$rd, $rs1, $rs3, $rs2">,
- Sched<[]>;
+ Sched<[WriteFSReg, ReadFSReg, ReadFSReg, ReadFSReg]>;
def FSRI : RVBTernaryImm6<0b101, OPC_OP_IMM, "fsri",
- "$rd, $rs1, $rs3, $shamt">, Sched<[]>;
+ "$rd, $rs1, $rs3, $shamt">,
+ Sched<[WriteFSRImm, ReadFSRImm, ReadFSRImm]>;
} // Predicates = [HasStdExtZbt]
let Predicates = [HasStdExtZbt, IsRV64] in {
def FSLW : RVBTernaryR<0b10, 0b001, OPC_OP_32,
- "fslw", "$rd, $rs1, $rs3, $rs2">, Sched<[]>;
+ "fslw", "$rd, $rs1, $rs3, $rs2">,
+ Sched<[WriteFSReg32, ReadFSReg32, ReadFSReg32, ReadFSReg32]>;
def FSRW : RVBTernaryR<0b10, 0b101, OPC_OP_32, "fsrw",
- "$rd, $rs1, $rs3, $rs2">, Sched<[]>;
+ "$rd, $rs1, $rs3, $rs2">,
+ Sched<[WriteFSReg32, ReadFSReg32, ReadFSReg32, ReadFSReg32]>;
def FSRIW : RVBTernaryImm5<0b10, 0b101, OPC_OP_IMM_32,
- "fsriw", "$rd, $rs1, $rs3, $shamt">, Sched<[]>;
+ "fsriw", "$rd, $rs1, $rs3, $shamt">,
+ Sched<[WriteFSRImm32, ReadFSRImm32, ReadFSRImm32]>;
} // Predicates = [HasStdExtZbt, IsRV64]
let Predicates = [HasStdExtZbb] in {
@@ -476,88 +503,96 @@ def SEXT_H : RVBUnary<0b0110000, 0b00101, 0b001, OPC_OP_IMM, "sext.h">,
let Predicates = [HasStdExtZbr] in {
def CRC32_B : RVBUnary<0b0110000, 0b10000, 0b001, OPC_OP_IMM, "crc32.b">,
- Sched<[]>;
+ Sched<[WriteCRCB, ReadCRCB]>;
def CRC32_H : RVBUnary<0b0110000, 0b10001, 0b001, OPC_OP_IMM, "crc32.h">,
- Sched<[]>;
+ Sched<[WriteCRCH, ReadCRCH]>;
def CRC32_W : RVBUnary<0b0110000, 0b10010, 0b001, OPC_OP_IMM, "crc32.w">,
- Sched<[]>;
+ Sched<[WriteCRCW, ReadCRCW]>;
def CRC32C_B : RVBUnary<0b0110000, 0b11000, 0b001, OPC_OP_IMM, "crc32c.b">,
- Sched<[]>;
+ Sched<[WriteCRCCB, ReadCRCCB]>;
def CRC32C_H : RVBUnary<0b0110000, 0b11001, 0b001, OPC_OP_IMM, "crc32c.h">,
- Sched<[]>;
+ Sched<[WriteCRCCH, ReadCRCCH]>;
def CRC32C_W : RVBUnary<0b0110000, 0b11010, 0b001, OPC_OP_IMM, "crc32c.w">,
- Sched<[]>;
+ Sched<[WriteCRCCW, ReadCRCCW]>;
} // Predicates = [HasStdExtZbr]
let Predicates = [HasStdExtZbr, IsRV64] in {
def CRC32_D : RVBUnary<0b0110000, 0b10011, 0b001, OPC_OP_IMM, "crc32.d">,
- Sched<[]>;
+ Sched<[WriteCRCD, ReadCRCD]>;
def CRC32C_D : RVBUnary<0b0110000, 0b11011, 0b001, OPC_OP_IMM, "crc32c.d">,
- Sched<[]>;
+ Sched<[WriteCRCCD, ReadCRCCD]>;
} // Predicates = [HasStdExtZbr, IsRV64]
let Predicates = [HasStdExtZbc] in {
-def CLMULR : ALU_rr<0b0000101, 0b010, "clmulr">,
+def CLMULR : ALU_rr<0b0000101, 0b010, "clmulr", /*Commutable*/1>,
Sched<[WriteCLMUL, ReadCLMUL, ReadCLMUL]>;
} // Predicates = [HasStdExtZbc]
let Predicates = [HasStdExtZbcOrZbkc] in {
-def CLMUL : ALU_rr<0b0000101, 0b001, "clmul">,
+def CLMUL : ALU_rr<0b0000101, 0b001, "clmul", /*Commutable*/1>,
Sched<[WriteCLMUL, ReadCLMUL, ReadCLMUL]>;
-def CLMULH : ALU_rr<0b0000101, 0b011, "clmulh">,
+def CLMULH : ALU_rr<0b0000101, 0b011, "clmulh", /*Commutable*/1>,
Sched<[WriteCLMUL, ReadCLMUL, ReadCLMUL]>;
} // Predicates = [HasStdExtZbcOrZbkc]
let Predicates = [HasStdExtZbb] in {
-def MIN : ALU_rr<0b0000101, 0b100, "min">,
+def MIN : ALU_rr<0b0000101, 0b100, "min", /*Commutable*/1>,
Sched<[WriteIALU, ReadIALU, ReadIALU]>;
-def MINU : ALU_rr<0b0000101, 0b101, "minu">,
+def MINU : ALU_rr<0b0000101, 0b101, "minu", /*Commutable*/1>,
Sched<[WriteIALU, ReadIALU, ReadIALU]>;
-def MAX : ALU_rr<0b0000101, 0b110, "max">,
+def MAX : ALU_rr<0b0000101, 0b110, "max", /*Commutable*/1>,
Sched<[WriteIALU, ReadIALU, ReadIALU]>;
-def MAXU : ALU_rr<0b0000101, 0b111, "maxu">,
+def MAXU : ALU_rr<0b0000101, 0b111, "maxu", /*Commutable*/1>,
Sched<[WriteIALU, ReadIALU, ReadIALU]>;
} // Predicates = [HasStdExtZbb]
-let Predicates = [HasStdExtZbp] in {
-} // Predicates = [HasStdExtZbp]
-
let Predicates = [HasStdExtZbe] in {
// NOTE: These mnemonics are from the 0.94 spec. There is a name conflict with
// bext in the 0.93 spec.
-def BDECOMPRESS : ALU_rr<0b0100100, 0b110, "bdecompress">, Sched<[]>;
-def BCOMPRESS : ALU_rr<0b0000100, 0b110, "bcompress">, Sched<[]>;
+def BDECOMPRESS : ALU_rr<0b0100100, 0b110, "bdecompress">,
+ Sched<[WriteDecompress, ReadDecompress, ReadDecompress]>;
+def BCOMPRESS : ALU_rr<0b0000100, 0b110, "bcompress">,
+ Sched<[WriteCompress, ReadCompress, ReadCompress]>;
} // Predicates = [HasStdExtZbe]
let Predicates = [HasStdExtZbe, IsRV64] in {
// NOTE: These mnemonics are from the 0.94 spec. There is a name conflict with
// bextw in the 0.93 spec.
-def BDECOMPRESSW : ALUW_rr<0b0100100, 0b110, "bdecompressw">, Sched<[]>;
-def BCOMPRESSW : ALUW_rr<0b0000100, 0b110, "bcompressw">, Sched<[]>;
+def BDECOMPRESSW : ALUW_rr<0b0100100, 0b110, "bdecompressw">,
+ Sched<[WriteDecompress32, ReadDecompress32, ReadDecompress32]>;
+def BCOMPRESSW : ALUW_rr<0b0000100, 0b110, "bcompressw">,
+ Sched<[WriteCompress32, ReadCompress32, ReadCompress32]>;
} // Predicates = [HasStdExtZbe, IsRV64]
let Predicates = [HasStdExtZbpOrZbkb] in {
-def PACK : ALU_rr<0b0000100, 0b100, "pack">, Sched<[]>;
-def PACKH : ALU_rr<0b0000100, 0b111, "packh">, Sched<[]>;
+def PACK : ALU_rr<0b0000100, 0b100, "pack">,
+ Sched<[WritePACK, ReadPACK, ReadPACK]>;
+def PACKH : ALU_rr<0b0000100, 0b111, "packh">,
+ Sched<[WritePACK, ReadPACK, ReadPACK]>;
} // Predicates = [HasStdExtZbpOrZbkb]
let Predicates = [HasStdExtZbpOrZbkb, IsRV64] in
-def PACKW : ALUW_rr<0b0000100, 0b100, "packw">, Sched<[]>;
+def PACKW : ALUW_rr<0b0000100, 0b100, "packw">,
+ Sched<[WritePACK32, ReadPACK32, ReadPACK32]>;
let Predicates = [HasStdExtZbp] in
-def PACKU : ALU_rr<0b0100100, 0b100, "packu">, Sched<[]>;
+def PACKU : ALU_rr<0b0100100, 0b100, "packu">,
+ Sched<[WritePACKU, ReadPACKU, ReadPACKU]>;
let Predicates = [HasStdExtZbp, IsRV64] in
-def PACKUW : ALUW_rr<0b0100100, 0b100, "packuw">, Sched<[]>;
+def PACKUW : ALUW_rr<0b0100100, 0b100, "packuw">,
+ Sched<[WritePACKU32, ReadPACKU32, ReadPACKU32]>;
let Predicates = [HasStdExtZbm, IsRV64] in {
def BMATFLIP : RVBUnary<0b0110000, 0b00011, 0b001, OPC_OP_IMM, "bmatflip">,
- Sched<[]>;
+ Sched<[WriteBMatrix, ReadBMatrix]>;
-def BMATOR : ALU_rr<0b0000100, 0b011, "bmator">, Sched<[]>;
-def BMATXOR : ALU_rr<0b0100100, 0b011, "bmatxor">, Sched<[]>;
+def BMATOR : ALU_rr<0b0000100, 0b011, "bmator">,
+ Sched<[WriteBMatrix, ReadBMatrix, ReadBMatrix]>;
+def BMATXOR : ALU_rr<0b0100100, 0b011, "bmatxor">,
+ Sched<[WriteBMatrix, ReadBMatrix, ReadBMatrix]>;
} // Predicates = [HasStdExtZbm, IsRV64]
let Predicates = [HasStdExtZbf] in
@@ -601,12 +636,15 @@ def ORC_B : RVBUnary<0b0010100, 0b00111, 0b101, OPC_OP_IMM, "orc.b">,
} // Predicates = [HasStdExtZbbOrZbp]
let Predicates = [HasStdExtZbpOrZbkb] in
-def BREV8 : RVBUnary<0b0110100, 0b00111, 0b101, OPC_OP_IMM, "brev8">;
+def BREV8 : RVBUnary<0b0110100, 0b00111, 0b101, OPC_OP_IMM, "brev8">,
+ Sched<[]>;
let Predicates = [HasStdExtZbpOrZbkb, IsRV32] in {
-def ZIP_RV32 : RVBUnary<0b0000100, 0b01111, 0b001, OPC_OP_IMM, "zip">;
-def UNZIP_RV32 : RVBUnary<0b0000100, 0b01111, 0b101, OPC_OP_IMM, "unzip">;
-} // Predicates = [HasStdExtZbkb, IsRV32]
+def ZIP_RV32 : RVBUnary<0b0000100, 0b01111, 0b001, OPC_OP_IMM, "zip">,
+ Sched<[]>;
+def UNZIP_RV32 : RVBUnary<0b0000100, 0b01111, 0b101, OPC_OP_IMM, "unzip">,
+ Sched<[]>;
+} // Predicates = [HasStdExtZbpOrZbkb, IsRV32]
//===----------------------------------------------------------------------===//
@@ -615,7 +653,7 @@ def UNZIP_RV32 : RVBUnary<0b0000100, 0b01111, 0b101, OPC_OP_IMM, "unzip">;
let Predicates = [HasStdExtZba, IsRV64] in {
def : InstAlias<"zext.w $rd, $rs", (ADD_UW GPR:$rd, GPR:$rs, X0)>;
-}
+} // Predicates = [HasStdExtZba, IsRV64]
let Predicates = [HasStdExtZbp] in {
def : InstAlias<"rev.p $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00001)>;
@@ -780,8 +818,8 @@ def : Pat<(xor GPR:$rs1, (not GPR:$rs2)), (XNOR GPR:$rs1, GPR:$rs2)>;
} // Predicates = [HasStdExtZbbOrZbpOrZbkb]
let Predicates = [HasStdExtZbbOrZbpOrZbkb] in {
-def : PatGprGpr<rotl, ROL>;
-def : PatGprGpr<rotr, ROR>;
+def : PatGprGpr<shiftop<rotl>, ROL>;
+def : PatGprGpr<shiftop<rotr>, ROR>;
def : PatGprImm<rotr, RORI, uimmlog2xlen>;
// There's no encoding for roli in the the 'B' extension as it can be
@@ -791,8 +829,8 @@ def : Pat<(rotl GPR:$rs1, uimmlog2xlen:$shamt),
} // Predicates = [HasStdExtZbbOrZbpOrZbkb]
let Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64] in {
-def : PatGprGpr<riscv_rolw, ROLW>;
-def : PatGprGpr<riscv_rorw, RORW>;
+def : PatGprGpr<shiftopw<riscv_rolw>, ROLW>;
+def : PatGprGpr<shiftopw<riscv_rorw>, RORW>;
def : PatGprImm<riscv_rorw, RORIW, uimm5>;
def : Pat<(riscv_rolw GPR:$rs1, uimm5:$rs2),
(RORIW GPR:$rs1, (ImmSubFrom32 uimm5:$rs2))>;
@@ -843,23 +881,25 @@ def : Pat<(and GPR:$r, BCLRITwoBitsMask:$i),
def : Pat<(and GPR:$r, BCLRIANDIMask:$i),
(BCLRI (ANDI GPR:$r, (BCLRIANDIMaskLow BCLRIANDIMask:$i)),
(BCLRITwoBitsMaskHigh BCLRIANDIMask:$i))>;
-}
+} // Predicates = [HasStdExtZbs]
let Predicates = [HasStdExtZbbOrZbp] in {
// We treat orc.b as a separate instruction, so match it directly. We also
// lower the Zbb orc.b intrinsic to this.
def : Pat<(riscv_gorc GPR:$rs1, 7), (ORC_B GPR:$rs1)>;
-}
+} // Predicates = [HasStdExtZbbOrZbp]
let Predicates = [HasStdExtZbpOrZbkb] in {
// We treat brev8 as a separate instruction, so match it directly. We also
// use this for brev8 when lowering bitreverse with Zbkb.
def : Pat<(riscv_grev GPR:$rs1, 7), (BREV8 GPR:$rs1)>;
+} // Predicates = [HasStdExtZbpOrZbkb]
+let Predicates = [HasStdExtZbpOrZbkb, IsRV32] in {
// We treat zip and unzip as separate instructions, so match it directly.
def : Pat<(i32 (riscv_shfl GPR:$rs1, 15)), (ZIP_RV32 GPR:$rs1)>;
def : Pat<(i32 (riscv_unshfl GPR:$rs1, 15)), (UNZIP_RV32 GPR:$rs1)>;
-}
+} // Predicates = [HasStdExtZbpOrZbkb, IsRV32]
let Predicates = [HasStdExtZbp] in {
def : PatGprGpr<riscv_grev, GREV>;
@@ -880,12 +920,16 @@ def : PatGprGpr<int_riscv_xperm_h, XPERM_H>;
let Predicates = [HasStdExtZbp, IsRV64] in {
def : PatGprGpr<riscv_grevw, GREVW>;
def : PatGprGpr<riscv_gorcw, GORCW>;
-def : PatGprImm<riscv_grevw, GREVIW, uimm5>;
-def : PatGprImm<riscv_gorcw, GORCIW, uimm5>;
-// FIXME: Move to DAG combine.
-def : Pat<(riscv_rorw (riscv_grevw GPR:$rs1, 24), 16), (GREVIW GPR:$rs1, 8)>;
-def : Pat<(riscv_rolw (riscv_grevw GPR:$rs1, 24), 16), (GREVIW GPR:$rs1, 8)>;
+// Select GREVIW/GORCIW when the immediate doesn't have bit 5 set and the result
+// is sign extended.
+// FIXME: Two special patterns keeped when Imm is 7.
+def : Pat<(i64 (sext_inreg (binop_oneuse<riscv_grev> GPR:$rs1, 7), i32)),
+ (GREVIW GPR:$rs1, 7)>;
+def : Pat<(i64 (sext_inreg (binop_oneuse<riscv_gorc> GPR:$rs1, 7), i32)),
+ (GORCIW GPR:$rs1, 7)>;
+def : PatGprImm<binop_allwusers<riscv_grev>, GREVIW, uimm5>;
+def : PatGprImm<binop_allwusers<riscv_gorc>, GORCIW, uimm5>;
def : PatGprGpr<riscv_shflw, SHFLW>;
def : PatGprGpr<riscv_unshflw, UNSHFLW>;
@@ -895,10 +939,6 @@ let Predicates = [HasStdExtZbp, IsRV64] in
def : PatGprGpr<int_riscv_xperm_w, XPERM_W>;
let Predicates = [HasStdExtZbp, IsRV32] in {
-// FIXME : Move to DAG combine.
-def : Pat<(i32 (rotr (riscv_grev GPR:$rs1, 24), (i32 16))), (GREVI GPR:$rs1, 8)>;
-def : Pat<(i32 (rotl (riscv_grev GPR:$rs1, 24), (i32 16))), (GREVI GPR:$rs1, 8)>;
-
// We treat rev8 as a separate instruction, so match it directly.
def : Pat<(i32 (riscv_grev GPR:$rs1, 24)), (REV8_RV32 GPR:$rs1)>;
} // Predicates = [HasStdExtZbp, IsRV32]
@@ -911,6 +951,8 @@ def : Pat<(i64 (riscv_grev GPR:$rs1, 56)), (REV8_RV64 GPR:$rs1)>;
let Predicates = [HasStdExtZbt] in {
def : Pat<(or (and (not GPR:$rs2), GPR:$rs3), (and GPR:$rs2, GPR:$rs1)),
(CMIX GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
+def : Pat<(xor (and (xor GPR:$rs1, GPR:$rs3), GPR:$rs2), GPR:$rs3),
+ (CMIX GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
def : Pat<(select (XLenVT (setne GPR:$rs2, 0)), GPR:$rs1, GPR:$rs3),
(CMOV GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
@@ -932,6 +974,13 @@ def : Pat<(select (XLenVT (setge GPR:$x, GPR:$y)), GPR:$rs3, GPR:$rs1),
(CMOV GPR:$rs1, (SLT GPR:$x, GPR:$y), GPR:$rs3)>;
def : Pat<(select (XLenVT (setle GPR:$y, GPR:$x)), GPR:$rs3, GPR:$rs1),
(CMOV GPR:$rs1, (SLT GPR:$x, GPR:$y), GPR:$rs3)>;
+
+// setge X, Imm is canonicalized to setgt X, (Imm - 1).
+def : Pat<(select (XLenVT (setgt GPR:$x, simm12_minus1_nonzero:$imm)), GPR:$rs3, GPR:$rs1),
+ (CMOV GPR:$rs1, (SLTI GPR:$x, (ImmPlus1 simm12_minus1_nonzero:$imm)), GPR:$rs3)>;
+def : Pat<(select (XLenVT (setugt GPR:$x, simm12_minus1_nonzero:$imm)), GPR:$rs3, GPR:$rs1),
+ (CMOV GPR:$rs1, (SLTIU GPR:$x, (ImmPlus1 simm12_minus1_nonzero:$imm)), GPR:$rs3)>;
+
def : Pat<(select GPR:$rs2, GPR:$rs1, GPR:$rs3),
(CMOV GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
} // Predicates = [HasStdExtZbt]
@@ -977,7 +1026,7 @@ def : Pat<(i64 (ctpop (i64 (zexti32 (i64 GPR:$rs1))))), (CPOPW GPR:$rs1)>;
let Predicates = [HasStdExtZbb] in {
def : Pat<(sext_inreg GPR:$rs1, i8), (SEXT_B GPR:$rs1)>;
def : Pat<(sext_inreg GPR:$rs1, i16), (SEXT_H GPR:$rs1)>;
-}
+} // Predicates = [HasStdExtZbb]
let Predicates = [HasStdExtZbb] in {
def : PatGprGpr<smin, MIN>;
@@ -1018,7 +1067,7 @@ def : Pat<(i64 (sext_inreg (or (shl GPR:$rs2, (i64 16)),
def : Pat<(i64 (or (sext_inreg (shl GPR:$rs2, (i64 16)), i32),
(and GPR:$rs1, 0x000000000000FFFF))),
(PACKW GPR:$rs1, GPR:$rs2)>;
-}
+} // Predicates = [HasStdExtZbpOrZbkb, IsRV64]
let Predicates = [HasStdExtZbp, IsRV32] in
def : Pat<(i32 (or (and GPR:$rs2, 0xFFFF0000), (srl GPR:$rs1, (i32 16)))),
@@ -1031,19 +1080,13 @@ def : Pat<(i64 (or (and GPR:$rs2, 0xFFFFFFFF00000000), (srl GPR:$rs1, (i64 32)))
def : Pat<(i64 (or (and (assertsexti32 GPR:$rs2), 0xFFFFFFFFFFFF0000),
(srl (and GPR:$rs1, 0xFFFFFFFF), (i64 16)))),
(PACKUW GPR:$rs1, GPR:$rs2)>;
-}
+} // Predicates = [HasStdExtZbp, IsRV64]
let Predicates = [HasStdExtZbbOrZbp, IsRV32] in
def : Pat<(i32 (and GPR:$rs, 0xFFFF)), (ZEXT_H_RV32 GPR:$rs)>;
let Predicates = [HasStdExtZbbOrZbp, IsRV64] in
def : Pat<(i64 (and GPR:$rs, 0xFFFF)), (ZEXT_H_RV64 GPR:$rs)>;
-// Pattern to exclude simm12 immediates from matching.
-def non_imm12 : PatLeaf<(XLenVT GPR:$a), [{
- auto *C = dyn_cast<ConstantSDNode>(N);
- return !C || !isInt<12>(C->getSExtValue());
-}]>;
-
let Predicates = [HasStdExtZba] in {
def : Pat<(add (shl GPR:$rs1, (XLenVT 1)), non_imm12:$rs2),
(SH1ADD GPR:$rs1, GPR:$rs2)>;
@@ -1132,6 +1175,33 @@ def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 2)), 0x3FFFFFFFF), non_imm12:$rs2))
(SH2ADD_UW GPR:$rs1, GPR:$rs2)>;
def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 3)), 0x7FFFFFFFF), non_imm12:$rs2)),
(SH3ADD_UW GPR:$rs1, GPR:$rs2)>;
+
+def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFFE), non_imm12:$rs2)),
+ (SH1ADD (SRLIW GPR:$rs1, 1), GPR:$rs2)>;
+def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFFC), non_imm12:$rs2)),
+ (SH2ADD (SRLIW GPR:$rs1, 2), GPR:$rs2)>;
+def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFF8), non_imm12:$rs2)),
+ (SH3ADD (SRLIW GPR:$rs1, 3), GPR:$rs2)>;
+
+// Use SRLI to clear the LSBs and SHXADD_UW to mask and shift.
+def : Pat<(i64 (add (and GPR:$rs1, 0x1FFFFFFFE), non_imm12:$rs2)),
+ (SH1ADD_UW (SRLI GPR:$rs1, 1), GPR:$rs2)>;
+def : Pat<(i64 (add (and GPR:$rs1, 0x3FFFFFFFC), non_imm12:$rs2)),
+ (SH2ADD_UW (SRLI GPR:$rs1, 2), GPR:$rs2)>;
+def : Pat<(i64 (add (and GPR:$rs1, 0x7FFFFFFF8), non_imm12:$rs2)),
+ (SH3ADD_UW (SRLI GPR:$rs1, 3), GPR:$rs2)>;
+
+// Use SRLIW to shift out the LSBs and zero the upper 32-bits. Use SHXADD to
+// shift zeros into the LSBs the addition shl amount.
+def : Pat<(i64 (add (shl (binop_oneuse<and> GPR:$rs1, 0xFFFFFFFE), (i64 1)),
+ non_imm12:$rs2)),
+ (SH2ADD (SRLIW GPR:$rs1, 1), GPR:$rs2)>;
+def : Pat<(i64 (add (shl (binop_oneuse<and> GPR:$rs1, 0xFFFFFFFE), (i64 2)),
+ non_imm12:$rs2)),
+ (SH3ADD (SRLIW GPR:$rs1, 1), GPR:$rs2)>;
+def : Pat<(i64 (add (shl (binop_oneuse<and> GPR:$rs1, 0xFFFFFFFC), (i64 1)),
+ non_imm12:$rs2)),
+ (SH3ADD (SRLIW GPR:$rs1, 2), GPR:$rs2)>;
} // Predicates = [HasStdExtZba, IsRV64]
let Predicates = [HasStdExtZbcOrZbkc] in {
@@ -1175,4 +1245,4 @@ def : PatGprGpr<riscv_bfpw, BFPW>;
let Predicates = [HasStdExtZbkx] in {
def : PatGprGpr<int_riscv_xperm4, XPERM4>;
def : PatGprGpr<int_riscv_xperm8, XPERM8>;
-}
+} // Predicates = [HasStdExtZbkx]
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
index a2753c132354..5a4366b0908c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
@@ -17,13 +17,71 @@
def SDT_RISCVFMV_H_X
: SDTypeProfile<1, 1, [SDTCisVT<0, f16>, SDTCisVT<1, XLenVT>]>;
-def SDT_RISCVFMV_X_ANYEXTH
+def SDT_RISCVFMV_X_EXTH
: SDTypeProfile<1, 1, [SDTCisVT<0, XLenVT>, SDTCisVT<1, f16>]>;
def riscv_fmv_h_x
: SDNode<"RISCVISD::FMV_H_X", SDT_RISCVFMV_H_X>;
def riscv_fmv_x_anyexth
- : SDNode<"RISCVISD::FMV_X_ANYEXTH", SDT_RISCVFMV_X_ANYEXTH>;
+ : SDNode<"RISCVISD::FMV_X_ANYEXTH", SDT_RISCVFMV_X_EXTH>;
+def riscv_fmv_x_signexth
+ : SDNode<"RISCVISD::FMV_X_SIGNEXTH", SDT_RISCVFMV_X_EXTH>;
+
+//===----------------------------------------------------------------------===//
+// Operand and SDNode transformation definitions.
+//===----------------------------------------------------------------------===//
+
+// Zhinxmin and Zhinx
+
+def FPR16INX : RegisterOperand<GPRF16> {
+ let ParserMatchClass = GPRAsFPR;
+ let DecoderMethod = "DecodeGPRRegisterClass";
+}
+
+def ZfhExt : ExtInfo<0, [HasStdExtZfh]>;
+def Zfh64Ext : ExtInfo<0, [HasStdExtZfh, IsRV64]>;
+def ZfhminExt : ExtInfo<0, [HasStdExtZfhOrZfhmin]>;
+def ZhinxExt : ExtInfo<1, [HasStdExtZhinx]>;
+def ZhinxminExt : ExtInfo<1, [HasStdExtZhinxOrZhinxmin]>;
+def Zhinx64Ext : ExtInfo<1, [HasStdExtZhinx, IsRV64]>;
+
+def ZfhminDExt : ExtInfo<0, [HasStdExtZfhOrZfhmin, HasStdExtD]>;
+def ZhinxminZdinxExt : ExtInfo<1, [HasStdExtZhinxOrZhinxmin, HasStdExtZdinx]>;
+
+def H : ExtInfo_r<ZfhExt, FPR16>;
+def H_INX : ExtInfo_r<ZhinxExt, FPR16INX>;
+
+def HH : ExtInfo_rr<ZfhExt, FPR16, FPR16>;
+def HH_INX : ExtInfo_rr<ZhinxExt, FPR16INX, FPR16INX>;
+def XH : ExtInfo_rr<ZfhExt, GPR, FPR16>;
+def XH_INX : ExtInfo_rr<ZhinxExt, GPR, FPR16INX>;
+def HX : ExtInfo_rr<ZfhExt, FPR16, GPR>;
+def HX_INX : ExtInfo_rr<ZhinxExt, FPR16INX, GPR>;
+def XH_64 : ExtInfo_rr<Zfh64Ext, GPR, FPR16>;
+def HX_64 : ExtInfo_rr<Zfh64Ext, FPR16, GPR>;
+def XH_INX_64 : ExtInfo_rr<Zhinx64Ext, GPR, FPR16INX>;
+def HX_INX_64 : ExtInfo_rr<Zhinx64Ext, FPR16INX, GPR>;
+def HFmin : ExtInfo_rr<ZfhminExt, FPR16, FPR32>;
+def HF_INXmin : ExtInfo_rr<ZhinxminExt, FPR16INX, FPR32INX>;
+def HF_INX : ExtInfo_rr<ZhinxExt, FPR16INX, FPR32INX>;
+def FHmin : ExtInfo_rr<ZfhminExt, FPR32, FPR16>;
+def FH_INXmin : ExtInfo_rr<ZhinxminExt, FPR32INX, FPR16INX>;
+def FH_INX : ExtInfo_rr<ZhinxExt, FPR32INX, FPR16INX>;
+def DHmin : ExtInfo_rr<ZfhminDExt, FPR64, FPR16>;
+def DH_INXmin : ExtInfo_rr<ZhinxminZdinxExt, FPR64INX, FPR16INX>;
+def HDmin : ExtInfo_rr<ZfhminDExt, FPR16, FPR64>;
+def HD_INXmin : ExtInfo_rr<ZhinxminZdinxExt, FPR16INX, FPR64INX>;
+
+defvar HINX = [H, H_INX];
+defvar HHINX = [HH, HH_INX];
+defvar XHINX = [XH, XH_INX];
+defvar HXINX = [HX, HX_INX];
+defvar XHIN64X = [XH_64, XH_INX_64];
+defvar HXIN64X = [HX_64, HX_INX_64];
+defvar HFINXmin = [HFmin, HF_INXmin];
+defvar FHINXmin = [FHmin, FH_INXmin];
+defvar DHINXmin = [DHmin, DH_INXmin];
+defvar HDINXmin = [HDmin, HD_INXmin];
//===----------------------------------------------------------------------===//
// Instructions
@@ -38,74 +96,73 @@ def FLH : FPLoad_r<0b001, "flh", FPR16, WriteFLD16>;
def FSH : FPStore_r<0b001, "fsh", FPR16, WriteFST16>;
} // Predicates = [HasStdExtZfhOrZfhmin]
-let Predicates = [HasStdExtZfh] in {
let SchedRW = [WriteFMA16, ReadFMA16, ReadFMA16, ReadFMA16] in {
-def FMADD_H : FPFMA_rrr_frm<OPC_MADD, 0b10, "fmadd.h", FPR16>;
-def FMSUB_H : FPFMA_rrr_frm<OPC_MSUB, 0b10, "fmsub.h", FPR16>;
-def FNMSUB_H : FPFMA_rrr_frm<OPC_NMSUB, 0b10, "fnmsub.h", FPR16>;
-def FNMADD_H : FPFMA_rrr_frm<OPC_NMADD, 0b10, "fnmadd.h", FPR16>;
+defm FMADD_H : FPFMA_rrr_frm_m<OPC_MADD, 0b10, "fmadd.h", HINX>;
+defm FMSUB_H : FPFMA_rrr_frm_m<OPC_MSUB, 0b10, "fmsub.h", HINX>;
+defm FNMSUB_H : FPFMA_rrr_frm_m<OPC_NMSUB, 0b10, "fnmsub.h", HINX>;
+defm FNMADD_H : FPFMA_rrr_frm_m<OPC_NMADD, 0b10, "fnmadd.h", HINX>;
+}
+
+defm : FPFMADynFrmAlias_m<FMADD_H, "fmadd.h", HINX>;
+defm : FPFMADynFrmAlias_m<FMSUB_H, "fmsub.h", HINX>;
+defm : FPFMADynFrmAlias_m<FNMSUB_H, "fnmsub.h", HINX>;
+defm : FPFMADynFrmAlias_m<FNMADD_H, "fnmadd.h", HINX>;
+
+let SchedRW = [WriteFALU16, ReadFALU16, ReadFALU16] in {
+defm FADD_H : FPALU_rr_frm_m<0b0000010, "fadd.h", HINX, /*Commutable*/1>;
+defm FSUB_H : FPALU_rr_frm_m<0b0000110, "fsub.h", HINX>;
}
+let SchedRW = [WriteFMul16, ReadFMul16, ReadFMul16] in
+defm FMUL_H : FPALU_rr_frm_m<0b0001010, "fmul.h", HINX, /*Commutable*/1>;
+
+let SchedRW = [WriteFDiv16, ReadFDiv16, ReadFDiv16] in
+defm FDIV_H : FPALU_rr_frm_m<0b0001110, "fdiv.h", HINX>;
-def : FPFMADynFrmAlias<FMADD_H, "fmadd.h", FPR16>;
-def : FPFMADynFrmAlias<FMSUB_H, "fmsub.h", FPR16>;
-def : FPFMADynFrmAlias<FNMSUB_H, "fnmsub.h", FPR16>;
-def : FPFMADynFrmAlias<FNMADD_H, "fnmadd.h", FPR16>;
-
-def FADD_H : FPALU_rr_frm<0b0000010, "fadd.h", FPR16>,
- Sched<[WriteFALU16, ReadFALU16, ReadFALU16]>;
-def FSUB_H : FPALU_rr_frm<0b0000110, "fsub.h", FPR16>,
- Sched<[WriteFALU16, ReadFALU16, ReadFALU16]>;
-def FMUL_H : FPALU_rr_frm<0b0001010, "fmul.h", FPR16>,
- Sched<[WriteFMul16, ReadFMul16, ReadFMul16]>;
-def FDIV_H : FPALU_rr_frm<0b0001110, "fdiv.h", FPR16>,
- Sched<[WriteFDiv16, ReadFDiv16, ReadFDiv16]>;
-
-def : FPALUDynFrmAlias<FADD_H, "fadd.h", FPR16>;
-def : FPALUDynFrmAlias<FSUB_H, "fsub.h", FPR16>;
-def : FPALUDynFrmAlias<FMUL_H, "fmul.h", FPR16>;
-def : FPALUDynFrmAlias<FDIV_H, "fdiv.h", FPR16>;
-
-def FSQRT_H : FPUnaryOp_r_frm<0b0101110, 0b00000, FPR16, FPR16, "fsqrt.h">,
- Sched<[WriteFSqrt16, ReadFSqrt16]>;
-def : FPUnaryOpDynFrmAlias<FSQRT_H, "fsqrt.h", FPR16, FPR16>;
+defm : FPALUDynFrmAlias_m<FADD_H, "fadd.h", HINX>;
+defm : FPALUDynFrmAlias_m<FSUB_H, "fsub.h", HINX>;
+defm : FPALUDynFrmAlias_m<FMUL_H, "fmul.h", HINX>;
+defm : FPALUDynFrmAlias_m<FDIV_H, "fdiv.h", HINX>;
+
+defm FSQRT_H : FPUnaryOp_r_frm_m<0b0101110, 0b00000, HHINX, "fsqrt.h">,
+ Sched<[WriteFSqrt16, ReadFSqrt16]>;
+defm : FPUnaryOpDynFrmAlias_m<FSQRT_H, "fsqrt.h", HHINX>;
let SchedRW = [WriteFSGNJ16, ReadFSGNJ16, ReadFSGNJ16],
mayRaiseFPException = 0 in {
-def FSGNJ_H : FPALU_rr<0b0010010, 0b000, "fsgnj.h", FPR16>;
-def FSGNJN_H : FPALU_rr<0b0010010, 0b001, "fsgnjn.h", FPR16>;
-def FSGNJX_H : FPALU_rr<0b0010010, 0b010, "fsgnjx.h", FPR16>;
+defm FSGNJ_H : FPALU_rr_m<0b0010010, 0b000, "fsgnj.h", HINX>;
+defm FSGNJN_H : FPALU_rr_m<0b0010010, 0b001, "fsgnjn.h", HINX>;
+defm FSGNJX_H : FPALU_rr_m<0b0010010, 0b010, "fsgnjx.h", HINX>;
}
let SchedRW = [WriteFMinMax16, ReadFMinMax16, ReadFMinMax16] in {
-def FMIN_H : FPALU_rr<0b0010110, 0b000, "fmin.h", FPR16>;
-def FMAX_H : FPALU_rr<0b0010110, 0b001, "fmax.h", FPR16>;
+defm FMIN_H : FPALU_rr_m<0b0010110, 0b000, "fmin.h", HINX, /*Commutable*/1>;
+defm FMAX_H : FPALU_rr_m<0b0010110, 0b001, "fmax.h", HINX, /*Commutable*/1>;
}
-def FCVT_W_H : FPUnaryOp_r_frm<0b1100010, 0b00000, GPR, FPR16, "fcvt.w.h">,
- Sched<[WriteFCvtF16ToI32, ReadFCvtF16ToI32]>;
-def : FPUnaryOpDynFrmAlias<FCVT_W_H, "fcvt.w.h", GPR, FPR16>;
-
-def FCVT_WU_H : FPUnaryOp_r_frm<0b1100010, 0b00001, GPR, FPR16, "fcvt.wu.h">,
+defm FCVT_W_H : FPUnaryOp_r_frm_m<0b1100010, 0b00000, XHINX, "fcvt.w.h">,
Sched<[WriteFCvtF16ToI32, ReadFCvtF16ToI32]>;
-def : FPUnaryOpDynFrmAlias<FCVT_WU_H, "fcvt.wu.h", GPR, FPR16>;
+defm : FPUnaryOpDynFrmAlias_m<FCVT_W_H, "fcvt.w.h", XHINX>;
-def FCVT_H_W : FPUnaryOp_r_frm<0b1101010, 0b00000, FPR16, GPR, "fcvt.h.w">,
- Sched<[WriteFCvtI32ToF16, ReadFCvtI32ToF16]>;
-def : FPUnaryOpDynFrmAlias<FCVT_H_W, "fcvt.h.w", FPR16, GPR>;
+defm FCVT_WU_H : FPUnaryOp_r_frm_m<0b1100010, 0b00001, XHINX, "fcvt.wu.h">,
+ Sched<[WriteFCvtF16ToI32, ReadFCvtF16ToI32]>;
+defm : FPUnaryOpDynFrmAlias_m<FCVT_WU_H, "fcvt.wu.h", XHINX>;
-def FCVT_H_WU : FPUnaryOp_r_frm<0b1101010, 0b00001, FPR16, GPR, "fcvt.h.wu">,
+defm FCVT_H_W : FPUnaryOp_r_frm_m<0b1101010, 0b00000, HXINX, "fcvt.h.w">,
Sched<[WriteFCvtI32ToF16, ReadFCvtI32ToF16]>;
-def : FPUnaryOpDynFrmAlias<FCVT_H_WU, "fcvt.h.wu", FPR16, GPR>;
-} // Predicates = [HasStdExtZfh]
+defm : FPUnaryOpDynFrmAlias_m<FCVT_H_W, "fcvt.h.w", HXINX>;
-let Predicates = [HasStdExtZfhOrZfhmin] in {
-def FCVT_H_S : FPUnaryOp_r_frm<0b0100010, 0b00000, FPR16, FPR32, "fcvt.h.s">,
- Sched<[WriteFCvtF32ToF16, ReadFCvtF32ToF16]>;
-def : FPUnaryOpDynFrmAlias<FCVT_H_S, "fcvt.h.s", FPR16, FPR32>;
+defm FCVT_H_WU : FPUnaryOp_r_frm_m<0b1101010, 0b00001, HXINX, "fcvt.h.wu">,
+ Sched<[WriteFCvtI32ToF16, ReadFCvtI32ToF16]>;
+defm : FPUnaryOpDynFrmAlias_m<FCVT_H_WU, "fcvt.h.wu", HXINX>;
-def FCVT_S_H : FPUnaryOp_r<0b0100000, 0b00010, 0b000, FPR32, FPR16, "fcvt.s.h">,
+defm FCVT_H_S : FPUnaryOp_r_frm_m<0b0100010, 0b00000, HFINXmin, "fcvt.h.s">,
+ Sched<[WriteFCvtF32ToF16, ReadFCvtF32ToF16]>;
+defm : FPUnaryOpDynFrmAlias_m<FCVT_H_S, "fcvt.h.s", HFINXmin>;
+
+defm FCVT_S_H : FPUnaryOp_r_m<0b0100000, 0b00010, 0b000, FHINXmin, "fcvt.s.h">,
Sched<[WriteFCvtF16ToF32, ReadFCvtF16ToF32]>;
+let Predicates = [HasStdExtZfhOrZfhmin] in {
let mayRaiseFPException = 0 in
def FMV_X_H : FPUnaryOp_r<0b1110010, 0b00000, 0b000, GPR, FPR16, "fmv.x.h">,
Sched<[WriteFMovF16ToI16, ReadFMovF16ToI16]>;
@@ -115,45 +172,38 @@ def FMV_H_X : FPUnaryOp_r<0b1111010, 0b00000, 0b000, FPR16, GPR, "fmv.h.x">,
Sched<[WriteFMovI16ToF16, ReadFMovI16ToF16]>;
} // Predicates = [HasStdExtZfhOrZfhmin]
-let Predicates = [HasStdExtZfh] in {
-
let SchedRW = [WriteFCmp16, ReadFCmp16, ReadFCmp16] in {
-def FEQ_H : FPCmp_rr<0b1010010, 0b010, "feq.h", FPR16>;
-def FLT_H : FPCmp_rr<0b1010010, 0b001, "flt.h", FPR16>;
-def FLE_H : FPCmp_rr<0b1010010, 0b000, "fle.h", FPR16>;
+defm FEQ_H : FPCmp_rr_m<0b1010010, 0b010, "feq.h", HINX, /*Commutable*/1>;
+defm FLT_H : FPCmp_rr_m<0b1010010, 0b001, "flt.h", HINX>;
+defm FLE_H : FPCmp_rr_m<0b1010010, 0b000, "fle.h", HINX>;
}
let mayRaiseFPException = 0 in
-def FCLASS_H : FPUnaryOp_r<0b1110010, 0b00000, 0b001, GPR, FPR16, "fclass.h">,
- Sched<[WriteFClass16, ReadFClass16]>;
-} // Predicates = [HasStdExtZfh]
-
-let Predicates = [HasStdExtZfh, IsRV64] in {
-def FCVT_L_H : FPUnaryOp_r_frm<0b1100010, 0b00010, GPR, FPR16, "fcvt.l.h">,
- Sched<[WriteFCvtF16ToI64, ReadFCvtF16ToI64]>;
-def : FPUnaryOpDynFrmAlias<FCVT_L_H, "fcvt.l.h", GPR, FPR16>;
+defm FCLASS_H : FPUnaryOp_r_m<0b1110010, 0b00000, 0b001, XHINX, "fclass.h">,
+ Sched<[WriteFClass16, ReadFClass16]>;
-def FCVT_LU_H : FPUnaryOp_r_frm<0b1100010, 0b00011, GPR, FPR16, "fcvt.lu.h">,
+defm FCVT_L_H : FPUnaryOp_r_frm_m<0b1100010, 0b00010, XHIN64X, "fcvt.l.h">,
Sched<[WriteFCvtF16ToI64, ReadFCvtF16ToI64]>;
-def : FPUnaryOpDynFrmAlias<FCVT_LU_H, "fcvt.lu.h", GPR, FPR16>;
+defm : FPUnaryOpDynFrmAlias_m<FCVT_L_H, "fcvt.l.h", XHIN64X>;
-def FCVT_H_L : FPUnaryOp_r_frm<0b1101010, 0b00010, FPR16, GPR, "fcvt.h.l">,
- Sched<[WriteFCvtI64ToF16, ReadFCvtI64ToF16]>;
-def : FPUnaryOpDynFrmAlias<FCVT_H_L, "fcvt.h.l", FPR16, GPR>;
+defm FCVT_LU_H : FPUnaryOp_r_frm_m<0b1100010, 0b00011, XHIN64X, "fcvt.lu.h">,
+ Sched<[WriteFCvtF16ToI64, ReadFCvtF16ToI64]>;
+defm : FPUnaryOpDynFrmAlias_m<FCVT_LU_H, "fcvt.lu.h", XHIN64X>;
-def FCVT_H_LU : FPUnaryOp_r_frm<0b1101010, 0b00011, FPR16, GPR, "fcvt.h.lu">,
+defm FCVT_H_L : FPUnaryOp_r_frm_m<0b1101010, 0b00010, HXIN64X, "fcvt.h.l">,
Sched<[WriteFCvtI64ToF16, ReadFCvtI64ToF16]>;
-def : FPUnaryOpDynFrmAlias<FCVT_H_LU, "fcvt.h.lu", FPR16, GPR>;
-} // Predicates = [HasStdExtZfh, IsRV64]
+defm : FPUnaryOpDynFrmAlias_m<FCVT_H_L, "fcvt.h.l", HXIN64X>;
-let Predicates = [HasStdExtZfhOrZfhmin, HasStdExtD] in {
-def FCVT_H_D : FPUnaryOp_r_frm<0b0100010, 0b00001, FPR16, FPR64, "fcvt.h.d">,
- Sched<[WriteFCvtF64ToF16, ReadFCvtF64ToF16]>;
-def : FPUnaryOpDynFrmAlias<FCVT_H_D, "fcvt.h.d", FPR16, FPR64>;
+defm FCVT_H_LU : FPUnaryOp_r_frm_m<0b1101010, 0b00011, HXIN64X, "fcvt.h.lu">,
+ Sched<[WriteFCvtI64ToF16, ReadFCvtI64ToF16]>;
+defm : FPUnaryOpDynFrmAlias_m<FCVT_H_LU, "fcvt.h.lu", HXIN64X>;
-def FCVT_D_H : FPUnaryOp_r<0b0100001, 0b00010, 0b000, FPR64, FPR16, "fcvt.d.h">,
- Sched<[WriteFCvtF16ToF64, ReadFCvtF16ToF64]>;
-} // Predicates = [HasStdExtZfhOrZfhmin, HasStdExtD]
+defm FCVT_H_D : FPUnaryOp_r_frm_m<0b0100010, 0b00001, HDINXmin, "fcvt.h.d">,
+ Sched<[WriteFCvtF64ToF16, ReadFCvtF64ToF16]>;
+defm : FPUnaryOpDynFrmAlias_m<FCVT_H_D, "fcvt.h.d", HDINXmin>;
+
+defm FCVT_D_H : FPUnaryOp_r_m<0b0100001, 0b00010, 0b000, DHINXmin, "fcvt.d.h">,
+ Sched<[WriteFCvtF16ToF64, ReadFCvtF16ToF64]>;
//===----------------------------------------------------------------------===//
// Assembler Pseudo Instructions (User-Level ISA, Version 2.2, Chapter 20)
@@ -186,17 +236,21 @@ def PseudoQuietFLT_H : PseudoQuietFCMP<FPR16>;
}
} // Predicates = [HasStdExtZfhOrZfhmin]
+let Predicates = [HasStdExtZhinx] in {
+def : InstAlias<"fmv.h $rd, $rs", (FSGNJ_H_INX FPR16INX:$rd, FPR16INX:$rs, FPR16INX:$rs)>;
+def : InstAlias<"fabs.h $rd, $rs", (FSGNJX_H_INX FPR16INX:$rd, FPR16INX:$rs, FPR16INX:$rs)>;
+def : InstAlias<"fneg.h $rd, $rs", (FSGNJN_H_INX FPR16INX:$rd, FPR16INX:$rs, FPR16INX:$rs)>;
+
+def : InstAlias<"fgt.h $rd, $rs, $rt",
+ (FLT_H_INX GPR:$rd, FPR16INX:$rt, FPR16INX:$rs), 0>;
+def : InstAlias<"fge.h $rd, $rs, $rt",
+ (FLE_H_INX GPR:$rd, FPR16INX:$rt, FPR16INX:$rs), 0>;
+} // Predicates = [HasStdExtZhinx]
+
//===----------------------------------------------------------------------===//
// Pseudo-instructions and codegen patterns
//===----------------------------------------------------------------------===//
-/// Generic pattern classes
-class PatFpr16Fpr16<SDPatternOperator OpNode, RVInstR Inst>
- : Pat<(OpNode FPR16:$rs1, FPR16:$rs2), (Inst $rs1, $rs2)>;
-
-class PatFpr16Fpr16DynFrm<SDPatternOperator OpNode, RVInstRFrm Inst>
- : Pat<(OpNode FPR16:$rs1, FPR16:$rs2), (Inst $rs1, $rs2, 0b111)>;
-
let Predicates = [HasStdExtZfh] in {
/// Float constants
@@ -210,17 +264,17 @@ def : Pat<(f16 (fpimmneg0)), (FSGNJN_H (FMV_H_X X0), (FMV_H_X X0))>;
/// Float arithmetic operations
-def : PatFpr16Fpr16DynFrm<any_fadd, FADD_H>;
-def : PatFpr16Fpr16DynFrm<any_fsub, FSUB_H>;
-def : PatFpr16Fpr16DynFrm<any_fmul, FMUL_H>;
-def : PatFpr16Fpr16DynFrm<any_fdiv, FDIV_H>;
+def : PatFprFprDynFrm<any_fadd, FADD_H, FPR16>;
+def : PatFprFprDynFrm<any_fsub, FSUB_H, FPR16>;
+def : PatFprFprDynFrm<any_fmul, FMUL_H, FPR16>;
+def : PatFprFprDynFrm<any_fdiv, FDIV_H, FPR16>;
def : Pat<(any_fsqrt FPR16:$rs1), (FSQRT_H FPR16:$rs1, 0b111)>;
def : Pat<(fneg FPR16:$rs1), (FSGNJN_H $rs1, $rs1)>;
def : Pat<(fabs FPR16:$rs1), (FSGNJX_H $rs1, $rs1)>;
-def : PatFpr16Fpr16<fcopysign, FSGNJ_H>;
+def : PatFprFpr<fcopysign, FSGNJ_H, FPR16>;
def : Pat<(fcopysign FPR16:$rs1, (fneg FPR16:$rs2)), (FSGNJN_H $rs1, $rs2)>;
def : Pat<(fcopysign FPR16:$rs1, FPR32:$rs2),
(FSGNJ_H $rs1, (FCVT_H_S $rs2, 0b111))>;
@@ -242,11 +296,15 @@ def : Pat<(any_fma (fneg FPR16:$rs1), FPR16:$rs2, FPR16:$rs3),
def : Pat<(any_fma (fneg FPR16:$rs1), FPR16:$rs2, (fneg FPR16:$rs3)),
(FNMADD_H FPR16:$rs1, FPR16:$rs2, FPR16:$rs3, 0b111)>;
+// fnmadd: -(rs1 * rs2 + rs3) (the nsz flag on the FMA)
+def : Pat<(fneg (any_fma_nsz FPR16:$rs1, FPR16:$rs2, FPR16:$rs3)),
+ (FNMADD_H FPR16:$rs1, FPR16:$rs2, FPR16:$rs3, 0b111)>;
+
// The ratified 20191213 ISA spec defines fmin and fmax in a way that matches
// LLVM's fminnum and fmaxnum
// <https://github.com/riscv/riscv-isa-manual/commit/cd20cee7efd9bac7c5aa127ec3b451749d2b3cce>.
-def : PatFpr16Fpr16<fminnum, FMIN_H>;
-def : PatFpr16Fpr16<fmaxnum, FMAX_H>;
+def : PatFprFpr<fminnum, FMIN_H, FPR16>;
+def : PatFprFpr<fmaxnum, FMAX_H, FPR16>;
/// Setcc
// FIXME: SETEQ/SETLT/SETLE imply nonans, can we pick better instructions for
@@ -299,6 +357,7 @@ def : Pat<(any_fpextend FPR16:$rs1), (FCVT_S_H FPR16:$rs1)>;
// Moves (no conversion)
def : Pat<(riscv_fmv_h_x GPR:$src), (FMV_H_X GPR:$src)>;
def : Pat<(riscv_fmv_x_anyexth FPR16:$src), (FMV_X_H FPR16:$src)>;
+def : Pat<(riscv_fmv_x_signexth FPR16:$src), (FMV_X_H FPR16:$src)>;
} // Predicates = [HasStdExtZfhOrZfhmin]
let Predicates = [HasStdExtZfh, IsRV32] in {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td
new file mode 100644
index 000000000000..57fd74b0c0fe
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td
@@ -0,0 +1,71 @@
+//===-- RISCVInstrInfoZicbo.td - RISC-V CMO instructions ---*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the RISC-V instructions from the standard Base Cache
+// Management Operation ISA Extensions document (Zicbop, Zicboz, and Zicbop).
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Operand definitions.
+//===----------------------------------------------------------------------===//
+
+// A 12-bit signed immediate where the least significant five bits are zero.
+def simm12_lsb00000 : Operand<XLenVT>,
+ ImmLeaf<XLenVT, [{return isShiftedInt<7, 5>(Imm);}]> {
+ let ParserMatchClass = SImmAsmOperand<12, "Lsb00000">;
+ let EncoderMethod = "getImmOpValue";
+ let DecoderMethod = "decodeSImmOperand<12>";
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (MCOp.evaluateAsConstantImm(Imm))
+ return isShiftedInt<7, 5>(Imm);
+ return MCOp.isBareSymbolRef();
+ }];
+ let OperandType = "OPERAND_SIMM12_LSB00000";
+ let OperandNamespace = "RISCVOp";
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction Class Templates
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
+class CBO_r<bits<12> optype, string opcodestr>
+ : RVInstI<0b010, OPC_MISC_MEM, (outs), (ins GPRMemZeroOffset:$rs1),
+ opcodestr, "$rs1"> {
+ let imm12 = optype;
+ let rd = 0b00000;
+}
+
+let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
+class Prefetch_ri<bits<5> optype, string opcodestr>
+ : RVInstS<0b110, OPC_OP_IMM, (outs), (ins GPR:$rs1, simm12_lsb00000:$imm12),
+ opcodestr, "${imm12}(${rs1})"> {
+ let Inst{11-7} = 0b00000;
+ let rs2 = optype;
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasStdExtZicbom] in {
+def CBO_CLEAN : CBO_r<0b000000000001, "cbo.clean">, Sched<[]>;
+def CBO_FLUSH : CBO_r<0b000000000010, "cbo.flush">, Sched<[]>;
+def CBO_INVAL : CBO_r<0b000000000000, "cbo.inval">, Sched<[]>;
+} // Predicates = [HasStdExtZicbom]
+
+let Predicates = [HasStdExtZicboz] in {
+def CBO_ZERO : CBO_r<0b000000000100, "cbo.zero">, Sched<[]>;
+} // Predicates = [HasStdExtZicboz]
+
+let Predicates = [HasStdExtZicbop] in {
+def PREFETCH_I : Prefetch_ri<0b00000, "prefetch.i">, Sched<[]>;
+def PREFETCH_R : Prefetch_ri<0b00001, "prefetch.r">, Sched<[]>;
+def PREFETCH_W : Prefetch_ri<0b00011, "prefetch.w">, Sched<[]>;
+} // Predicates = [HasStdExtZicbop]
diff --git a/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp b/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp
index c167c095521a..c457a95544cf 100644
--- a/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp
@@ -87,7 +87,7 @@ static MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym,
return MCOperand::createExpr(ME);
}
-bool llvm::LowerRISCVMachineOperandToMCOperand(const MachineOperand &MO,
+bool llvm::lowerRISCVMachineOperandToMCOperand(const MachineOperand &MO,
MCOperand &MCOp,
const AsmPrinter &AP) {
switch (MO.getType()) {
@@ -145,6 +145,7 @@ static bool lowerRISCVVMachineInstrToMCInst(const MachineInstr *MI,
const TargetRegisterInfo *TRI =
MF->getSubtarget<RISCVSubtarget>().getRegisterInfo();
+
assert(TRI && "TargetRegisterInfo expected");
uint64_t TSFlags = MI->getDesc().TSFlags;
@@ -158,12 +159,16 @@ static bool lowerRISCVVMachineInstrToMCInst(const MachineInstr *MI,
if (RISCVII::hasSEWOp(TSFlags))
--NumOps;
+ bool hasVLOutput = RISCV::isFaultFirstLoad(*MI);
for (unsigned OpNo = 0; OpNo != NumOps; ++OpNo) {
const MachineOperand &MO = MI->getOperand(OpNo);
+ // Skip vl ouput. It should be the second output.
+ if (hasVLOutput && OpNo == 1)
+ continue;
// Skip merge op. It should be the first operand after the result.
- if (RISCVII::hasMergeOp(TSFlags) && OpNo == 1) {
- assert(MI->getNumExplicitDefs() == 1);
+ if (RISCVII::hasMergeOp(TSFlags) && OpNo == 1U + hasVLOutput) {
+ assert(MI->getNumExplicitDefs() == 1U + hasVLOutput);
continue;
}
@@ -214,7 +219,7 @@ bool llvm::lowerRISCVMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
for (const MachineOperand &MO : MI->operands()) {
MCOperand MCOp;
- if (LowerRISCVMachineOperandToMCOperand(MO, MCOp, AP))
+ if (lowerRISCVMachineOperandToMCOperand(MO, MCOp, AP))
OutMI.addOperand(MCOp);
}
diff --git a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp
new file mode 100644
index 000000000000..8cb046bcfbb6
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp
@@ -0,0 +1,37 @@
+//=- RISCVMachineFunctionInfo.cpp - RISCV machine function info ---*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares RISCV-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCVMachineFunctionInfo.h"
+
+using namespace llvm;
+
+yaml::RISCVMachineFunctionInfo::RISCVMachineFunctionInfo(
+ const llvm::RISCVMachineFunctionInfo &MFI)
+ : VarArgsFrameIndex(MFI.getVarArgsFrameIndex()),
+ VarArgsSaveSize(MFI.getVarArgsSaveSize()) {}
+
+MachineFunctionInfo *RISCVMachineFunctionInfo::clone(
+ BumpPtrAllocator &Allocator, MachineFunction &DestMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
+ const {
+ return DestMF.cloneInfo<RISCVMachineFunctionInfo>(*this);
+}
+
+void yaml::RISCVMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
+ MappingTraits<RISCVMachineFunctionInfo>::mapping(YamlIO, *this);
+}
+
+void RISCVMachineFunctionInfo::initializeBaseYamlFields(
+ const yaml::RISCVMachineFunctionInfo &YamlMFI) {
+ VarArgsFrameIndex = YamlMFI.VarArgsFrameIndex;
+ VarArgsSaveSize = YamlMFI.VarArgsSaveSize;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
index b5609e9a3890..622767540d99 100644
--- a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
@@ -14,11 +14,34 @@
#define LLVM_LIB_TARGET_RISCV_RISCVMACHINEFUNCTIONINFO_H
#include "RISCVSubtarget.h"
+#include "llvm/CodeGen/MIRYamlMapping.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
namespace llvm {
+class RISCVMachineFunctionInfo;
+
+namespace yaml {
+struct RISCVMachineFunctionInfo final : public yaml::MachineFunctionInfo {
+ int VarArgsFrameIndex;
+ int VarArgsSaveSize;
+
+ RISCVMachineFunctionInfo() = default;
+ RISCVMachineFunctionInfo(const llvm::RISCVMachineFunctionInfo &MFI);
+
+ void mappingImpl(yaml::IO &YamlIO) override;
+ ~RISCVMachineFunctionInfo() = default;
+};
+
+template <> struct MappingTraits<RISCVMachineFunctionInfo> {
+ static void mapping(IO &YamlIO, RISCVMachineFunctionInfo &MFI) {
+ YamlIO.mapOptional("varArgsFrameIndex", MFI.VarArgsFrameIndex);
+ YamlIO.mapOptional("varArgsSaveSize", MFI.VarArgsSaveSize);
+ }
+};
+} // end namespace yaml
+
/// RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo
/// and contains private RISCV-specific information for each MachineFunction.
class RISCVMachineFunctionInfo : public MachineFunctionInfo {
@@ -34,6 +57,8 @@ private:
unsigned LibCallStackSize = 0;
/// Size of RVV stack.
uint64_t RVVStackSize = 0;
+ /// Alignment of RVV stack.
+ Align RVVStackAlign;
/// Padding required to keep RVV stack aligned within the main stack.
uint64_t RVVPadding = 0;
/// Size of stack frame to save callee saved registers
@@ -42,6 +67,11 @@ private:
public:
RISCVMachineFunctionInfo(const MachineFunction &MF) {}
+ MachineFunctionInfo *
+ clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
+ const override;
+
int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
@@ -69,11 +99,16 @@ public:
uint64_t getRVVStackSize() const { return RVVStackSize; }
void setRVVStackSize(uint64_t Size) { RVVStackSize = Size; }
+ Align getRVVStackAlign() const { return RVVStackAlign; }
+ void setRVVStackAlign(Align StackAlign) { RVVStackAlign = StackAlign; }
+
uint64_t getRVVPadding() const { return RVVPadding; }
void setRVVPadding(uint64_t Padding) { RVVPadding = Padding; }
unsigned getCalleeSavedStackSize() const { return CalleeSavedStackSize; }
void setCalleeSavedStackSize(unsigned Size) { CalleeSavedStackSize = Size; }
+
+ void initializeBaseYamlFields(const yaml::RISCVMachineFunctionInfo &YamlMFI);
};
} // end namespace llvm
diff --git a/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp b/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp
new file mode 100644
index 000000000000..3b9177bc1635
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp
@@ -0,0 +1,67 @@
+//===- RISCVMacroFusion.cpp - RISCV Macro Fusion --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file contains the RISCV implementation of the DAG scheduling
+/// mutation to pair instructions back to back.
+//
+//===----------------------------------------------------------------------===//
+//
+#include "RISCVMacroFusion.h"
+#include "RISCVSubtarget.h"
+#include "llvm/CodeGen/MacroFusion.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+
+using namespace llvm;
+
+// Fuse LUI followed by ADDI or ADDIW.
+// rd = imm[31:0] which decomposes to
+// lui rd, imm[31:12]
+// addi(w) rd, rd, imm[11:0]
+static bool isLUIADDI(const MachineInstr *FirstMI,
+ const MachineInstr &SecondMI) {
+ if (SecondMI.getOpcode() != RISCV::ADDI &&
+ SecondMI.getOpcode() != RISCV::ADDIW)
+ return false;
+
+ // Assume the 1st instr to be a wildcard if it is unspecified.
+ if (!FirstMI)
+ return true;
+
+ if (FirstMI->getOpcode() != RISCV::LUI)
+ return false;
+
+ // The first operand of ADDI might be a frame index.
+ if (!SecondMI.getOperand(1).isReg())
+ return false;
+
+ Register FirstDest = FirstMI->getOperand(0).getReg();
+
+ // Destination of LUI should be the ADDI(W) source register.
+ if (SecondMI.getOperand(1).getReg() != FirstDest)
+ return false;
+
+ // If the FirstMI destination is non-virtual, it should match the SecondMI
+ // destination.
+ return FirstDest.isVirtual() || SecondMI.getOperand(0).getReg() == FirstDest;
+}
+
+static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
+ const TargetSubtargetInfo &TSI,
+ const MachineInstr *FirstMI,
+ const MachineInstr &SecondMI) {
+ const RISCVSubtarget &ST = static_cast<const RISCVSubtarget &>(TSI);
+
+ if (ST.hasLUIADDIFusion() && isLUIADDI(FirstMI, SecondMI))
+ return true;
+
+ return false;
+}
+
+std::unique_ptr<ScheduleDAGMutation> llvm::createRISCVMacroFusionDAGMutation() {
+ return createMacroFusionDAGMutation(shouldScheduleAdjacent);
+}
diff --git a/llvm/lib/Target/RISCV/RISCVMacroFusion.h b/llvm/lib/Target/RISCV/RISCVMacroFusion.h
new file mode 100644
index 000000000000..c238dacc37f6
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVMacroFusion.h
@@ -0,0 +1,28 @@
+//===- RISCVMacroFusion.h - RISCV Macro Fusion ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file contains the RISCV definition of the DAG scheduling mutation
+/// to pair instructions back to back.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_RISCV_RISCVMACROFUSION_H
+#define LLVM_LIB_TARGET_RISCV_RISCVMACROFUSION_H
+
+#include "llvm/CodeGen/MachineScheduler.h"
+
+namespace llvm {
+
+/// Note that you have to add:
+/// DAG.addMutation(createRISCVMacroFusionDAGMutation());
+/// to RISCVPassConfig::createMachineScheduler() to have an effect.
+std::unique_ptr<ScheduleDAGMutation> createRISCVMacroFusionDAGMutation();
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp b/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp
new file mode 100644
index 000000000000..1fc424411c12
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp
@@ -0,0 +1,382 @@
+//===-- RISCVMakeCompressible.cpp - Make more instructions compressible ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass searches for instructions that are prevented from being compressed
+// by one of the following:
+//
+// 1. The use of a single uncompressed register.
+// 2. A base register + offset where the offset is too large to be compressed
+// and the base register may or may not be compressed.
+//
+//
+// For case 1, if a compressed register is available, then the uncompressed
+// register is copied to the compressed register and its uses are replaced.
+//
+// For example, storing zero uses the uncompressible zero register:
+// sw zero, 0(a0) # if zero
+// sw zero, 8(a0) # if zero
+// sw zero, 4(a0) # if zero
+// sw zero, 24(a0) # if zero
+//
+// If a compressed register (e.g. a1) is available, the above can be transformed
+// to the following to improve code size:
+// li a1, 0
+// c.sw a1, 0(a0)
+// c.sw a1, 8(a0)
+// c.sw a1, 4(a0)
+// c.sw a1, 24(a0)
+//
+//
+// For case 2, if a compressed register is available, then the original base
+// is copied and adjusted such that:
+//
+// new_base_register = base_register + adjustment
+// base_register + large_offset = new_base_register + small_offset
+//
+// For example, the following offsets are too large for c.sw:
+// lui a2, 983065
+// sw a1, -236(a2)
+// sw a1, -240(a2)
+// sw a1, -244(a2)
+// sw a1, -248(a2)
+// sw a1, -252(a2)
+// sw a0, -256(a2)
+//
+// If a compressed register is available (e.g. a3), a new base could be created
+// such that the addresses can accessed with a compressible offset, thus
+// improving code size:
+// lui a2, 983065
+// addi a3, a2, -256
+// c.sw a1, 20(a3)
+// c.sw a1, 16(a3)
+// c.sw a1, 12(a3)
+// c.sw a1, 8(a3)
+// c.sw a1, 4(a3)
+// c.sw a0, 0(a3)
+//
+//
+// This optimization is only applied if there are enough uses of the copied
+// register for code size to be reduced.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCV.h"
+#include "RISCVSubtarget.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "riscv-make-compressible"
+#define RISCV_COMPRESS_INSTRS_NAME "RISCV Make Compressible"
+
+namespace {
+
+struct RISCVMakeCompressibleOpt : public MachineFunctionPass {
+ static char ID;
+
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ RISCVMakeCompressibleOpt() : MachineFunctionPass(ID) {
+ initializeRISCVMakeCompressibleOptPass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override { return RISCV_COMPRESS_INSTRS_NAME; }
+};
+} // namespace
+
+char RISCVMakeCompressibleOpt::ID = 0;
+INITIALIZE_PASS(RISCVMakeCompressibleOpt, "riscv-make-compressible",
+ RISCV_COMPRESS_INSTRS_NAME, false, false)
+
+// Return log2(widthInBytes) of load/store done by Opcode.
+static unsigned log2LdstWidth(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ llvm_unreachable("Unexpected opcode");
+ case RISCV::LW:
+ case RISCV::SW:
+ case RISCV::FLW:
+ case RISCV::FSW:
+ return 2;
+ case RISCV::LD:
+ case RISCV::SD:
+ case RISCV::FLD:
+ case RISCV::FSD:
+ return 3;
+ }
+}
+
+// Return a mask for the offset bits of a non-stack-pointer based compressed
+// load/store.
+static uint8_t compressedLDSTOffsetMask(unsigned Opcode) {
+ return 0x1f << log2LdstWidth(Opcode);
+}
+
+// Return true if Offset fits within a compressed stack-pointer based
+// load/store.
+static bool compressibleSPOffset(int64_t Offset, unsigned Opcode) {
+ return log2LdstWidth(Opcode) == 2 ? isShiftedUInt<6, 2>(Offset)
+ : isShiftedUInt<6, 3>(Offset);
+}
+
+// Given an offset for a load/store, return the adjustment required to the base
+// register such that the address can be accessed with a compressible offset.
+// This will return 0 if the offset is already compressible.
+static int64_t getBaseAdjustForCompression(int64_t Offset, unsigned Opcode) {
+ // Return the excess bits that do not fit in a compressible offset.
+ return Offset & ~compressedLDSTOffsetMask(Opcode);
+}
+
+// Return true if Reg is in a compressed register class.
+static bool isCompressedReg(Register Reg) {
+ return RISCV::GPRCRegClass.contains(Reg) ||
+ RISCV::FPR32CRegClass.contains(Reg) ||
+ RISCV::FPR64CRegClass.contains(Reg);
+}
+
+// Return true if MI is a load for which there exists a compressed version.
+static bool isCompressibleLoad(const MachineInstr &MI) {
+ const RISCVSubtarget &STI = MI.getMF()->getSubtarget<RISCVSubtarget>();
+ const unsigned Opcode = MI.getOpcode();
+
+ return Opcode == RISCV::LW || (!STI.is64Bit() && Opcode == RISCV::FLW) ||
+ Opcode == RISCV::LD || Opcode == RISCV::FLD;
+}
+
+// Return true if MI is a store for which there exists a compressed version.
+static bool isCompressibleStore(const MachineInstr &MI) {
+ const RISCVSubtarget &STI = MI.getMF()->getSubtarget<RISCVSubtarget>();
+ const unsigned Opcode = MI.getOpcode();
+
+ return Opcode == RISCV::SW || (!STI.is64Bit() && Opcode == RISCV::FSW) ||
+ Opcode == RISCV::SD || Opcode == RISCV::FSD;
+}
+
+// Find a single register and/or large offset which, if compressible, would
+// allow the given instruction to be compressed.
+//
+// Possible return values:
+//
+// {Reg, 0} - Uncompressed Reg needs replacing with a compressed
+// register.
+// {Reg, N} - Reg needs replacing with a compressed register and
+// N needs adding to the new register. (Reg may be
+// compressed or uncompressed).
+// {RISCV::NoRegister, 0} - No suitable optimization found for this
+// instruction.
+static RegImmPair getRegImmPairPreventingCompression(const MachineInstr &MI) {
+ const unsigned Opcode = MI.getOpcode();
+
+ if (isCompressibleLoad(MI) || isCompressibleStore(MI)) {
+ const MachineOperand &MOImm = MI.getOperand(2);
+ if (!MOImm.isImm())
+ return RegImmPair(RISCV::NoRegister, 0);
+
+ int64_t Offset = MOImm.getImm();
+ int64_t NewBaseAdjust = getBaseAdjustForCompression(Offset, Opcode);
+ Register Base = MI.getOperand(1).getReg();
+
+ // Memory accesses via the stack pointer do not have a requirement for
+ // either of the registers to be compressible and can take a larger offset.
+ if (RISCV::SPRegClass.contains(Base)) {
+ if (!compressibleSPOffset(Offset, Opcode) && NewBaseAdjust)
+ return RegImmPair(Base, NewBaseAdjust);
+ } else {
+ Register SrcDest = MI.getOperand(0).getReg();
+ bool SrcDestCompressed = isCompressedReg(SrcDest);
+ bool BaseCompressed = isCompressedReg(Base);
+
+ // If only Base and/or offset prevent compression, then return Base and
+ // any adjustment required to make the offset compressible.
+ if ((!BaseCompressed || NewBaseAdjust) && SrcDestCompressed)
+ return RegImmPair(Base, NewBaseAdjust);
+
+ // For loads, we can only change the base register since dest is defined
+ // rather than used.
+ //
+ // For stores, we can change SrcDest (and Base if SrcDest == Base) but
+ // cannot resolve an uncompressible offset in this case.
+ if (isCompressibleStore(MI)) {
+ if (!SrcDestCompressed && (BaseCompressed || SrcDest == Base) &&
+ !NewBaseAdjust)
+ return RegImmPair(SrcDest, NewBaseAdjust);
+ }
+ }
+ }
+ return RegImmPair(RISCV::NoRegister, 0);
+}
+
+// Check all uses after FirstMI of the given register, keeping a vector of
+// instructions that would be compressible if the given register (and offset if
+// applicable) were compressible.
+//
+// If there are enough uses for this optimization to improve code size and a
+// compressed register is available, return that compressed register.
+static Register analyzeCompressibleUses(MachineInstr &FirstMI,
+ RegImmPair RegImm,
+ SmallVectorImpl<MachineInstr *> &MIs) {
+ MachineBasicBlock &MBB = *FirstMI.getParent();
+ const TargetRegisterInfo *TRI =
+ MBB.getParent()->getSubtarget().getRegisterInfo();
+
+ RegScavenger RS;
+ RS.enterBasicBlock(MBB);
+
+ for (MachineBasicBlock::instr_iterator I = FirstMI.getIterator(),
+ E = MBB.instr_end();
+ I != E; ++I) {
+ MachineInstr &MI = *I;
+
+ // Determine if this is an instruction which would benefit from using the
+ // new register.
+ RegImmPair CandidateRegImm = getRegImmPairPreventingCompression(MI);
+ if (CandidateRegImm.Reg == RegImm.Reg &&
+ CandidateRegImm.Imm == RegImm.Imm) {
+ // Advance tracking since the value in the new register must be live for
+ // this instruction too.
+ RS.forward(I);
+
+ MIs.push_back(&MI);
+ }
+
+ // If RegImm.Reg is modified by this instruction, then we cannot optimize
+ // past this instruction. If the register is already compressed, then it may
+ // possible to optimize a large offset in the current instruction - this
+ // will have been detected by the preceeding call to
+ // getRegImmPairPreventingCompression.
+ if (MI.modifiesRegister(RegImm.Reg, TRI))
+ break;
+ }
+
+ // Adjusting the base costs one new uncompressed addi and therefore three uses
+ // are required for a code size reduction. If no base adjustment is required,
+ // then copying the register costs one new c.mv (or c.li Rd, 0 for "copying"
+ // the zero register) and therefore two uses are required for a code size
+ // reduction.
+ if (MIs.size() < 2 || (RegImm.Imm != 0 && MIs.size() < 3))
+ return RISCV::NoRegister;
+
+ // Find a compressible register which will be available from the first
+ // instruction we care about to the last.
+ const TargetRegisterClass *RCToScavenge;
+
+ // Work out the compressed register class from which to scavenge.
+ if (RISCV::GPRRegClass.contains(RegImm.Reg))
+ RCToScavenge = &RISCV::GPRCRegClass;
+ else if (RISCV::FPR32RegClass.contains(RegImm.Reg))
+ RCToScavenge = &RISCV::FPR32CRegClass;
+ else if (RISCV::FPR64RegClass.contains(RegImm.Reg))
+ RCToScavenge = &RISCV::FPR64CRegClass;
+ else
+ return RISCV::NoRegister;
+
+ return RS.scavengeRegisterBackwards(*RCToScavenge, FirstMI.getIterator(),
+ /*RestoreAfter=*/false, /*SPAdj=*/0,
+ /*AllowSpill=*/false);
+}
+
+// Update uses of the old register in the given instruction to the new register.
+static void updateOperands(MachineInstr &MI, RegImmPair OldRegImm,
+ Register NewReg) {
+ unsigned Opcode = MI.getOpcode();
+
+ // If this pass is extended to support more instructions, the check for
+ // definedness may need to be strengthened.
+ assert((isCompressibleLoad(MI) || isCompressibleStore(MI)) &&
+ "Unsupported instruction for this optimization.");
+
+ // Update registers
+ for (MachineOperand &MO : MI.operands())
+ if (MO.isReg() && MO.getReg() == OldRegImm.Reg) {
+ // Do not update operands that define the old register.
+ //
+ // The new register was scavenged for the range of instructions that are
+ // being updated, therefore it should not be defined within this range
+ // except possibly in the final instruction.
+ if (MO.isDef()) {
+ assert(isCompressibleLoad(MI));
+ continue;
+ }
+ // Update reg
+ MO.setReg(NewReg);
+ }
+
+ // Update offset
+ MachineOperand &MOImm = MI.getOperand(2);
+ int64_t NewOffset = MOImm.getImm() & compressedLDSTOffsetMask(Opcode);
+ MOImm.setImm(NewOffset);
+}
+
+bool RISCVMakeCompressibleOpt::runOnMachineFunction(MachineFunction &Fn) {
+ // This is a size optimization.
+ if (skipFunction(Fn.getFunction()) || !Fn.getFunction().hasMinSize())
+ return false;
+
+ const RISCVSubtarget &STI = Fn.getSubtarget<RISCVSubtarget>();
+ const RISCVInstrInfo &TII = *STI.getInstrInfo();
+
+ // This optimization only makes sense if compressed instructions are emitted.
+ if (!STI.hasStdExtC())
+ return false;
+
+ for (MachineBasicBlock &MBB : Fn) {
+ LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
+ for (MachineInstr &MI : MBB) {
+ // Determine if this instruction would otherwise be compressed if not for
+ // an uncompressible register or offset.
+ RegImmPair RegImm = getRegImmPairPreventingCompression(MI);
+ if (!RegImm.Reg && RegImm.Imm == 0)
+ continue;
+
+ // Determine if there is a set of instructions for which replacing this
+ // register with a compressed register (and compressible offset if
+ // applicable) is possible and will allow compression.
+ SmallVector<MachineInstr *, 8> MIs;
+ Register NewReg = analyzeCompressibleUses(MI, RegImm, MIs);
+ if (!NewReg)
+ continue;
+
+ // Create the appropriate copy and/or offset.
+ if (RISCV::GPRRegClass.contains(RegImm.Reg)) {
+ assert(isInt<12>(RegImm.Imm));
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(RISCV::ADDI), NewReg)
+ .addReg(RegImm.Reg)
+ .addImm(RegImm.Imm);
+ } else {
+ // If we are looking at replacing an FPR register we don't expect to
+ // have any offset. The only compressible FP instructions with an offset
+ // are loads and stores, for which the offset applies to the GPR operand
+ // not the FPR operand.
+ assert(RegImm.Imm == 0);
+ unsigned Opcode = RISCV::FPR32RegClass.contains(RegImm.Reg)
+ ? RISCV::FSGNJ_S
+ : RISCV::FSGNJ_D;
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(Opcode), NewReg)
+ .addReg(RegImm.Reg)
+ .addReg(RegImm.Reg);
+ }
+
+ // Update the set of instructions to use the compressed register and
+ // compressible offset instead. These instructions should now be
+ // compressible.
+ // TODO: Update all uses if RegImm.Imm == 0? Not just those that are
+ // expected to become compressible.
+ for (MachineInstr *UpdateMI : MIs)
+ updateOperands(*UpdateMI, RegImm, NewReg);
+ }
+ }
+ return true;
+}
+
+/// Returns an instance of the Make Compressible Optimization pass.
+FunctionPass *llvm::createRISCVMakeCompressibleOptPass() {
+ return new RISCVMakeCompressibleOpt();
+}
diff --git a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
index 5f4022439abb..b060a73846c4 100644
--- a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
@@ -25,6 +25,7 @@
#include "RISCV.h"
#include "RISCVTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Debug.h"
@@ -37,6 +38,10 @@ using namespace llvm;
namespace {
struct RISCVMergeBaseOffsetOpt : public MachineFunctionPass {
+private:
+ const RISCVSubtarget *ST = nullptr;
+
+public:
static char ID;
bool runOnMachineFunction(MachineFunction &Fn) override;
bool detectLuiAddiGlobal(MachineInstr &LUI, MachineInstr *&ADDI);
@@ -45,6 +50,9 @@ struct RISCVMergeBaseOffsetOpt : public MachineFunctionPass {
void foldOffset(MachineInstr &HiLUI, MachineInstr &LoADDI, MachineInstr &Tail,
int64_t Offset);
bool matchLargeOffset(MachineInstr &TailAdd, Register GSReg, int64_t &Offset);
+ bool matchShiftedOffset(MachineInstr &TailShXAdd, Register GSReg,
+ int64_t &Offset);
+
RISCVMergeBaseOffsetOpt() : MachineFunctionPass(ID) {}
MachineFunctionProperties getRequiredProperties() const override {
@@ -85,17 +93,16 @@ bool RISCVMergeBaseOffsetOpt::detectLuiAddiGlobal(MachineInstr &HiLUI,
MachineInstr *&LoADDI) {
if (HiLUI.getOpcode() != RISCV::LUI ||
HiLUI.getOperand(1).getTargetFlags() != RISCVII::MO_HI ||
- HiLUI.getOperand(1).getType() != MachineOperand::MO_GlobalAddress ||
+ !HiLUI.getOperand(1).isGlobal() ||
HiLUI.getOperand(1).getOffset() != 0 ||
!MRI->hasOneUse(HiLUI.getOperand(0).getReg()))
return false;
Register HiLuiDestReg = HiLUI.getOperand(0).getReg();
- LoADDI = MRI->use_begin(HiLuiDestReg)->getParent();
+ LoADDI = &*MRI->use_instr_begin(HiLuiDestReg);
if (LoADDI->getOpcode() != RISCV::ADDI ||
LoADDI->getOperand(2).getTargetFlags() != RISCVII::MO_LO ||
- LoADDI->getOperand(2).getType() != MachineOperand::MO_GlobalAddress ||
- LoADDI->getOperand(2).getOffset() != 0 ||
- !MRI->hasOneUse(LoADDI->getOperand(0).getReg()))
+ !LoADDI->getOperand(2).isGlobal() ||
+ LoADDI->getOperand(2).getOffset() != 0)
return false;
return true;
}
@@ -106,6 +113,7 @@ bool RISCVMergeBaseOffsetOpt::detectLuiAddiGlobal(MachineInstr &HiLUI,
void RISCVMergeBaseOffsetOpt::foldOffset(MachineInstr &HiLUI,
MachineInstr &LoADDI,
MachineInstr &Tail, int64_t Offset) {
+ assert(isInt<32>(Offset) && "Unexpected offset");
// Put the offset back in HiLUI and the LoADDI
HiLUI.getOperand(1).setOffset(Offset);
LoADDI.getOperand(2).setOffset(Offset);
@@ -148,7 +156,8 @@ bool RISCVMergeBaseOffsetOpt::matchLargeOffset(MachineInstr &TailAdd,
return false;
// This can point to an ADDI or a LUI:
MachineInstr &OffsetTail = *MRI->getVRegDef(Reg);
- if (OffsetTail.getOpcode() == RISCV::ADDI) {
+ if (OffsetTail.getOpcode() == RISCV::ADDI ||
+ OffsetTail.getOpcode() == RISCV::ADDIW) {
// The offset value has non zero bits in both %hi and %lo parts.
// Detect an ADDI that feeds from a LUI instruction.
MachineOperand &AddiImmOp = OffsetTail.getOperand(2);
@@ -162,8 +171,14 @@ bool RISCVMergeBaseOffsetOpt::matchLargeOffset(MachineInstr &TailAdd,
LuiImmOp.getTargetFlags() != RISCVII::MO_None ||
!MRI->hasOneUse(OffsetLui.getOperand(0).getReg()))
return false;
- int64_t OffHi = OffsetLui.getOperand(1).getImm();
- Offset = (OffHi << 12) + OffLo;
+ Offset = SignExtend64<32>(LuiImmOp.getImm() << 12);
+ Offset += OffLo;
+ // RV32 ignores the upper 32 bits. ADDIW sign extends the result.
+ if (!ST->is64Bit() || OffsetTail.getOpcode() == RISCV::ADDIW)
+ Offset = SignExtend64<32>(Offset);
+ // We can only fold simm32 offsets.
+ if (!isInt<32>(Offset))
+ return false;
LLVM_DEBUG(dbgs() << " Offset Instrs: " << OffsetTail
<< " " << OffsetLui);
DeadInstrs.insert(&OffsetTail);
@@ -173,98 +188,204 @@ bool RISCVMergeBaseOffsetOpt::matchLargeOffset(MachineInstr &TailAdd,
// The offset value has all zero bits in the lower 12 bits. Only LUI
// exists.
LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail);
- Offset = OffsetTail.getOperand(1).getImm() << 12;
+ Offset = SignExtend64<32>(OffsetTail.getOperand(1).getImm() << 12);
DeadInstrs.insert(&OffsetTail);
return true;
}
return false;
}
+// Detect patterns for offsets that are passed into a SHXADD instruction.
+// The offset has 1,2, or 3 trailing zeros and fits in simm13, simm14, simm15.
+// The constant is created with addi voff, x0, C, and shXadd is used to
+// fill insert the trailing zeros and do the addition.
+//
+// HiLUI: lui vreg1, %hi(s)
+// LoADDI: addi vreg2, vreg1, %lo(s)
+// OffsetTail: addi voff, x0, C
+// TailAdd: shXadd vreg4, voff, vreg2
+bool RISCVMergeBaseOffsetOpt::matchShiftedOffset(MachineInstr &TailShXAdd,
+ Register GAReg,
+ int64_t &Offset) {
+ assert((TailShXAdd.getOpcode() == RISCV::SH1ADD ||
+ TailShXAdd.getOpcode() == RISCV::SH2ADD ||
+ TailShXAdd.getOpcode() == RISCV::SH3ADD) &&
+ "Expected SHXADD instruction!");
+
+ // The first source is the shifted operand.
+ Register Rs1 = TailShXAdd.getOperand(1).getReg();
+
+ if (GAReg != TailShXAdd.getOperand(2).getReg())
+ return false;
+
+ // Can't fold if the register has more than one use.
+ if (!MRI->hasOneUse(Rs1))
+ return false;
+ // This can point to an ADDI X0, C.
+ MachineInstr &OffsetTail = *MRI->getVRegDef(Rs1);
+ if (OffsetTail.getOpcode() != RISCV::ADDI)
+ return false;
+ if (!OffsetTail.getOperand(1).isReg() ||
+ OffsetTail.getOperand(1).getReg() != RISCV::X0 ||
+ !OffsetTail.getOperand(2).isImm())
+ return false;
+
+ Offset = OffsetTail.getOperand(2).getImm();
+ assert(isInt<12>(Offset) && "Unexpected offset");
+
+ unsigned ShAmt;
+ switch (TailShXAdd.getOpcode()) {
+ default: llvm_unreachable("Unexpected opcode");
+ case RISCV::SH1ADD: ShAmt = 1; break;
+ case RISCV::SH2ADD: ShAmt = 2; break;
+ case RISCV::SH3ADD: ShAmt = 3; break;
+ }
+
+ Offset = (uint64_t)Offset << ShAmt;
+
+ LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail);
+ DeadInstrs.insert(&OffsetTail);
+ return true;
+}
+
bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &HiLUI,
MachineInstr &LoADDI) {
Register DestReg = LoADDI.getOperand(0).getReg();
- assert(MRI->hasOneUse(DestReg) && "expected one use for LoADDI");
- // LoADDI has only one use.
- MachineInstr &Tail = *MRI->use_begin(DestReg)->getParent();
- switch (Tail.getOpcode()) {
- default:
- LLVM_DEBUG(dbgs() << "Don't know how to get offset from this instr:"
- << Tail);
- return false;
- case RISCV::ADDI: {
- // Offset is simply an immediate operand.
- int64_t Offset = Tail.getOperand(2).getImm();
- LLVM_DEBUG(dbgs() << " Offset Instr: " << Tail);
- foldOffset(HiLUI, LoADDI, Tail, Offset);
- return true;
+
+ // First, look for arithmetic instructions we can get an offset from.
+ // We might be able to remove the arithmetic instructions by folding the
+ // offset into the LUI+ADDI.
+ if (MRI->hasOneUse(DestReg)) {
+ // LoADDI has only one use.
+ MachineInstr &Tail = *MRI->use_instr_begin(DestReg);
+ switch (Tail.getOpcode()) {
+ default:
+ LLVM_DEBUG(dbgs() << "Don't know how to get offset from this instr:"
+ << Tail);
+ break;
+ case RISCV::ADDI: {
+ // Offset is simply an immediate operand.
+ int64_t Offset = Tail.getOperand(2).getImm();
+
+ // We might have two ADDIs in a row.
+ Register TailDestReg = Tail.getOperand(0).getReg();
+ if (MRI->hasOneUse(TailDestReg)) {
+ MachineInstr &TailTail = *MRI->use_instr_begin(TailDestReg);
+ if (TailTail.getOpcode() == RISCV::ADDI) {
+ Offset += TailTail.getOperand(2).getImm();
+ LLVM_DEBUG(dbgs() << " Offset Instrs: " << Tail << TailTail);
+ DeadInstrs.insert(&Tail);
+ foldOffset(HiLUI, LoADDI, TailTail, Offset);
+ return true;
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << " Offset Instr: " << Tail);
+ foldOffset(HiLUI, LoADDI, Tail, Offset);
+ return true;
+ }
+ case RISCV::ADD: {
+ // The offset is too large to fit in the immediate field of ADDI.
+ // This can be in two forms:
+ // 1) LUI hi_Offset followed by:
+ // ADDI lo_offset
+ // This happens in case the offset has non zero bits in
+ // both hi 20 and lo 12 bits.
+ // 2) LUI (offset20)
+ // This happens in case the lower 12 bits of the offset are zeros.
+ int64_t Offset;
+ if (!matchLargeOffset(Tail, DestReg, Offset))
+ return false;
+ foldOffset(HiLUI, LoADDI, Tail, Offset);
+ return true;
+ }
+ case RISCV::SH1ADD:
+ case RISCV::SH2ADD:
+ case RISCV::SH3ADD: {
+ // The offset is too large to fit in the immediate field of ADDI.
+ // It may be encoded as (SH2ADD (ADDI X0, C), DestReg) or
+ // (SH3ADD (ADDI X0, C), DestReg).
+ int64_t Offset;
+ if (!matchShiftedOffset(Tail, DestReg, Offset))
+ return false;
+ foldOffset(HiLUI, LoADDI, Tail, Offset);
+ return true;
+ }
+ }
}
- case RISCV::ADD: {
- // The offset is too large to fit in the immediate field of ADDI.
- // This can be in two forms:
- // 1) LUI hi_Offset followed by:
- // ADDI lo_offset
- // This happens in case the offset has non zero bits in
- // both hi 20 and lo 12 bits.
- // 2) LUI (offset20)
- // This happens in case the lower 12 bits of the offset are zeros.
- int64_t Offset;
- if (!matchLargeOffset(Tail, DestReg, Offset))
+
+ // We didn't find an arithmetic instruction. If all the uses are memory ops
+ // with the same offset, we can transform
+ // HiLUI: lui vreg1, %hi(foo) ---> lui vreg1, %hi(foo+8)
+ // LoADDI: addi vreg2, vreg1, %lo(foo) ---> lw vreg3, lo(foo+8)(vreg1)
+ // Tail: lw vreg3, 8(vreg2)
+
+ Optional<int64_t> CommonOffset;
+ for (const MachineInstr &UseMI : MRI->use_instructions(DestReg)) {
+ switch (UseMI.getOpcode()) {
+ default:
+ LLVM_DEBUG(dbgs() << "Not a load or store instruction: " << UseMI);
return false;
- foldOffset(HiLUI, LoADDI, Tail, Offset);
- return true;
+ case RISCV::LB:
+ case RISCV::LH:
+ case RISCV::LW:
+ case RISCV::LBU:
+ case RISCV::LHU:
+ case RISCV::LWU:
+ case RISCV::LD:
+ case RISCV::FLH:
+ case RISCV::FLW:
+ case RISCV::FLD:
+ case RISCV::SB:
+ case RISCV::SH:
+ case RISCV::SW:
+ case RISCV::SD:
+ case RISCV::FSH:
+ case RISCV::FSW:
+ case RISCV::FSD: {
+ if (UseMI.getOperand(1).isFI())
+ return false;
+ // Register defined by LoADDI should not be the value register.
+ if (DestReg == UseMI.getOperand(0).getReg())
+ return false;
+ assert(DestReg == UseMI.getOperand(1).getReg() &&
+ "Expected base address use");
+ // All load/store instructions must use the same offset.
+ int64_t Offset = UseMI.getOperand(2).getImm();
+ if (CommonOffset && Offset != CommonOffset)
+ return false;
+ CommonOffset = Offset;
+ }
+ }
}
- case RISCV::LB:
- case RISCV::LH:
- case RISCV::LW:
- case RISCV::LBU:
- case RISCV::LHU:
- case RISCV::LWU:
- case RISCV::LD:
- case RISCV::FLH:
- case RISCV::FLW:
- case RISCV::FLD:
- case RISCV::SB:
- case RISCV::SH:
- case RISCV::SW:
- case RISCV::SD:
- case RISCV::FSH:
- case RISCV::FSW:
- case RISCV::FSD: {
- // Transforms the sequence: Into:
- // HiLUI: lui vreg1, %hi(foo) ---> lui vreg1, %hi(foo+8)
- // LoADDI: addi vreg2, vreg1, %lo(foo) ---> lw vreg3, lo(foo+8)(vreg1)
- // Tail: lw vreg3, 8(vreg2)
- if (Tail.getOperand(1).isFI())
- return false;
- // Register defined by LoADDI should be used in the base part of the
- // load\store instruction. Otherwise, no folding possible.
- Register BaseAddrReg = Tail.getOperand(1).getReg();
- if (DestReg != BaseAddrReg)
- return false;
- MachineOperand &TailImmOp = Tail.getOperand(2);
- int64_t Offset = TailImmOp.getImm();
- // Update the offsets in global address lowering.
- HiLUI.getOperand(1).setOffset(Offset);
- // Update the immediate in the Tail instruction to add the offset.
- Tail.RemoveOperand(2);
- MachineOperand &ImmOp = LoADDI.getOperand(2);
- ImmOp.setOffset(Offset);
- Tail.addOperand(ImmOp);
+
+ // We found a common offset.
+ // Update the offsets in global address lowering.
+ HiLUI.getOperand(1).setOffset(*CommonOffset);
+ MachineOperand &ImmOp = LoADDI.getOperand(2);
+ ImmOp.setOffset(*CommonOffset);
+
+ // Update the immediate in the load/store instructions to add the offset.
+ for (MachineInstr &UseMI :
+ llvm::make_early_inc_range(MRI->use_instructions(DestReg))) {
+ UseMI.removeOperand(2);
+ UseMI.addOperand(ImmOp);
// Update the base reg in the Tail instruction to feed from LUI.
// Output of HiLUI is only used in LoADDI, no need to use
// MRI->replaceRegWith().
- Tail.getOperand(1).setReg(HiLUI.getOperand(0).getReg());
- DeadInstrs.insert(&LoADDI);
- return true;
+ UseMI.getOperand(1).setReg(HiLUI.getOperand(0).getReg());
}
- }
- return false;
+
+ DeadInstrs.insert(&LoADDI);
+ return true;
}
bool RISCVMergeBaseOffsetOpt::runOnMachineFunction(MachineFunction &Fn) {
if (skipFunction(Fn.getFunction()))
return false;
+ ST = &Fn.getSubtarget<RISCVSubtarget>();
+
bool MadeChange = false;
DeadInstrs.clear();
MRI = &Fn.getRegInfo();
@@ -274,9 +395,8 @@ bool RISCVMergeBaseOffsetOpt::runOnMachineFunction(MachineFunction &Fn) {
MachineInstr *LoADDI = nullptr;
if (!detectLuiAddiGlobal(HiLUI, LoADDI))
continue;
- LLVM_DEBUG(dbgs() << " Found lowered global address with one use: "
+ LLVM_DEBUG(dbgs() << " Found lowered global address: "
<< *LoADDI->getOperand(2).getGlobal() << "\n");
- // If the use count is only one, merge the offset
MadeChange |= detectAndFoldOffset(HiLUI, *LoADDI);
}
}
diff --git a/llvm/lib/Target/RISCV/RISCVRedundantCopyElimination.cpp b/llvm/lib/Target/RISCV/RISCVRedundantCopyElimination.cpp
new file mode 100644
index 000000000000..3c4a60b81d8e
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVRedundantCopyElimination.cpp
@@ -0,0 +1,179 @@
+//=- RISCVRedundantCopyElimination.cpp - Remove useless copy for RISCV ------=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass removes unnecessary zero copies in BBs that are targets of
+// beqz/bnez instructions. For instance, the copy instruction in the code below
+// can be removed because the beqz jumps to BB#2 when a0 is zero.
+// BB#1:
+// beqz %a0, <BB#2>
+// BB#2:
+// %a0 = COPY %x0
+// This pass should be run after register allocation.
+//
+// This pass is based on the earliest versions of
+// AArch64RedundantCopyElimination.
+//
+// FIXME: Support compares with constants other than zero? This is harder to
+// do on RISC-V since branches can't have immediates.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCV.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "riscv-copyelim"
+
+STATISTIC(NumCopiesRemoved, "Number of copies removed.");
+
+namespace {
+class RISCVRedundantCopyElimination : public MachineFunctionPass {
+ const MachineRegisterInfo *MRI;
+ const TargetRegisterInfo *TRI;
+
+public:
+ static char ID;
+ RISCVRedundantCopyElimination() : MachineFunctionPass(ID) {
+ initializeRISCVRedundantCopyEliminationPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+
+ StringRef getPassName() const override {
+ return "RISCV Redundant Copy Elimination";
+ }
+
+private:
+ bool optimizeBlock(MachineBasicBlock &MBB);
+};
+
+} // end anonymous namespace
+
+char RISCVRedundantCopyElimination::ID = 0;
+
+INITIALIZE_PASS(RISCVRedundantCopyElimination, "riscv-copyelim",
+ "RISCV redundant copy elimination pass", false, false)
+
+static bool guaranteesZeroRegInBlock(const MachineInstr &MI,
+ const MachineBasicBlock &MBB) {
+ unsigned Opc = MI.getOpcode();
+ if (Opc == RISCV::BEQ && MI.getOperand(1).getReg() == RISCV::X0 &&
+ &MBB == MI.getOperand(2).getMBB())
+ return true;
+ if (Opc == RISCV::BNE && MI.getOperand(1).getReg() == RISCV::X0 &&
+ &MBB != MI.getOperand(2).getMBB())
+ return true;
+
+ return false;
+}
+
+bool RISCVRedundantCopyElimination::optimizeBlock(MachineBasicBlock &MBB) {
+ // Check if the current basic block has a single predecessor.
+ if (MBB.pred_size() != 1)
+ return false;
+
+ // Check if the predecessor has two successors, implying the block ends in a
+ // conditional branch.
+ MachineBasicBlock *PredMBB = *MBB.pred_begin();
+ if (PredMBB->succ_size() != 2)
+ return false;
+
+ MachineBasicBlock::iterator CondBr = PredMBB->getLastNonDebugInstr();
+ if (CondBr == PredMBB->end())
+ return false;
+
+ while (true) {
+ // If we run out of terminators, give up.
+ if (!CondBr->isTerminator())
+ return false;
+ // If we found a branch with X0, stop searching and try to remove copies.
+ // TODO: Handle multiple branches with different registers.
+ if (guaranteesZeroRegInBlock(*CondBr, MBB))
+ break;
+ // If we reached the beginning of the basic block, give up.
+ if (CondBr == PredMBB->begin())
+ return false;
+ --CondBr;
+ }
+
+ Register TargetReg = CondBr->getOperand(0).getReg();
+ if (!TargetReg)
+ return false;
+
+ bool Changed = false;
+ MachineBasicBlock::iterator LastChange = MBB.begin();
+ // Remove redundant Copy instructions unless TargetReg is modified.
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;) {
+ MachineInstr *MI = &*I;
+ ++I;
+ if (MI->isCopy() && MI->getOperand(0).isReg() &&
+ MI->getOperand(1).isReg()) {
+ Register DefReg = MI->getOperand(0).getReg();
+ Register SrcReg = MI->getOperand(1).getReg();
+
+ if (SrcReg == RISCV::X0 && !MRI->isReserved(DefReg) &&
+ TargetReg == DefReg) {
+ LLVM_DEBUG(dbgs() << "Remove redundant Copy : ");
+ LLVM_DEBUG(MI->print(dbgs()));
+
+ MI->eraseFromParent();
+ Changed = true;
+ LastChange = I;
+ ++NumCopiesRemoved;
+ continue;
+ }
+ }
+
+ if (MI->modifiesRegister(TargetReg, TRI))
+ break;
+ }
+
+ if (!Changed)
+ return false;
+
+ // Otherwise, we have to fixup the use-def chain, starting with the
+ // BEQ/BNE. Conservatively mark as much as we can live.
+ CondBr->clearRegisterKills(TargetReg, TRI);
+
+ // Add newly used reg to the block's live-in list if it isn't there already.
+ if (!MBB.isLiveIn(TargetReg))
+ MBB.addLiveIn(TargetReg);
+
+ // Clear any kills of TargetReg between CondBr and the last removed COPY.
+ for (MachineInstr &MMI : make_range(MBB.begin(), LastChange))
+ MMI.clearRegisterKills(TargetReg, TRI);
+
+ return true;
+}
+
+bool RISCVRedundantCopyElimination::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ TRI = MF.getSubtarget().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+
+ bool Changed = false;
+ for (MachineBasicBlock &MBB : MF)
+ Changed |= optimizeBlock(MBB);
+
+ return Changed;
+}
+
+FunctionPass *llvm::createRISCVRedundantCopyEliminationPass() {
+ return new RISCVRedundantCopyElimination();
+}
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterBankInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterBankInfo.cpp
index bd3b95a98b9f..5371b790a148 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterBankInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterBankInfo.cpp
@@ -12,9 +12,9 @@
#include "RISCVRegisterBankInfo.h"
#include "MCTargetDesc/RISCVMCTargetDesc.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterBank.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#define GET_TARGET_REGBANK_IMPL
@@ -22,5 +22,4 @@
using namespace llvm;
-RISCVRegisterBankInfo::RISCVRegisterBankInfo(const TargetRegisterInfo &TRI)
- : RISCVGenRegisterBankInfo() {}
+RISCVRegisterBankInfo::RISCVRegisterBankInfo(const TargetRegisterInfo &TRI) {}
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterBankInfo.h b/llvm/lib/Target/RISCV/RISCVRegisterBankInfo.h
index 05fac992734d..194a1548af24 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterBankInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVRegisterBankInfo.h
@@ -13,7 +13,7 @@
#ifndef LLVM_LIB_TARGET_RISCV_RISCVREGISTERBANKINFO_H
#define LLVM_LIB_TARGET_RISCV_RISCVREGISTERBANKINFO_H
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
#define GET_REGBANK_DECLARATIONS
#include "RISCVGenRegisterBank.inc"
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index 35363bf37c0d..0c9219076498 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -14,6 +14,7 @@
#include "RISCV.h"
#include "RISCVMachineFunctionInfo.h"
#include "RISCVSubtarget.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -101,6 +102,7 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
markSuperRegs(Reserved, RISCV::VTYPE);
markSuperRegs(Reserved, RISCV::VXSAT);
markSuperRegs(Reserved, RISCV::VXRM);
+ markSuperRegs(Reserved, RISCV::VLENB); // vlenb (constant)
// Floating point environment registers.
markSuperRegs(Reserved, RISCV::FRM);
@@ -116,7 +118,7 @@ bool RISCVRegisterInfo::isAsmClobberable(const MachineFunction &MF,
}
bool RISCVRegisterInfo::isConstantPhysReg(MCRegister PhysReg) const {
- return PhysReg == RISCV::X0;
+ return PhysReg == RISCV::X0 || PhysReg == RISCV::VLENB;
}
const uint32_t *RISCVRegisterInfo::getNoPreservedMask() const {
@@ -125,7 +127,7 @@ const uint32_t *RISCVRegisterInfo::getNoPreservedMask() const {
// Frame indexes representing locations of CSRs which are given a fixed location
// by save/restore libcalls.
-static const std::map<unsigned, int> FixedCSRFIMap = {
+static const std::pair<unsigned, int> FixedCSRFIMap[] = {
{/*ra*/ RISCV::X1, -1},
{/*s0*/ RISCV::X8, -2},
{/*s1*/ RISCV::X9, -3},
@@ -148,8 +150,9 @@ bool RISCVRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
if (!RVFI->useSaveRestoreLibCalls(MF))
return false;
- auto FII = FixedCSRFIMap.find(Reg);
- if (FII == FixedCSRFIMap.end())
+ const auto *FII =
+ llvm::find_if(FixedCSRFIMap, [&](auto P) { return P.first == Reg; });
+ if (FII == std::end(FixedCSRFIMap))
return false;
FrameIdx = FII->second;
@@ -171,7 +174,7 @@ void RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Register FrameReg;
StackOffset Offset =
getFrameLowering(MF)->getFrameIndexReference(MF, FrameIndex, FrameReg);
- bool IsRVVSpill = TII->isRVVSpill(MI, /*CheckFIs*/ false);
+ bool IsRVVSpill = RISCV::isRVVSpill(MI);
if (!IsRVVSpill)
Offset += StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm());
@@ -270,7 +273,7 @@ void RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed());
}
- auto ZvlssegInfo = TII->isRVVSpillForZvlsseg(MI.getOpcode());
+ auto ZvlssegInfo = RISCV::isRVVSpillForZvlsseg(MI.getOpcode());
if (ZvlssegInfo) {
Register VL = MRI.createVirtualRegister(&RISCV::GPRRegClass);
BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VL);
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index 8c1c03b51c24..4ff60ebda5aa 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -66,6 +66,7 @@ def sub_vrm1_5 : ComposedSubRegIndex<sub_vrm2_2, sub_vrm1_1>;
def sub_vrm1_6 : ComposedSubRegIndex<sub_vrm2_3, sub_vrm1_0>;
def sub_vrm1_7 : ComposedSubRegIndex<sub_vrm2_3, sub_vrm1_1>;
+def sub_32_hi : SubRegIndex<32, 32>;
} // Namespace = "RISCV"
// Integer registers
@@ -461,6 +462,12 @@ let RegAltNameIndices = [ABIRegAltName] in {
DwarfRegNum<[!add(4096, SysRegVLENB.Encoding)]>;
}
+def VCSR : RegisterClass<"RISCV", [XLenVT], 32,
+ (add VTYPE, VL, VLENB)> {
+ let RegInfos = XLenRI;
+}
+
+
foreach m = [1, 2, 4] in {
foreach n = NFList<m>.L in {
def "VN" # n # "M" # m # "NoV0": RegisterTuples<
@@ -534,6 +541,35 @@ def VMV0 : RegisterClass<"RISCV", VMaskVTs, 64, (add V0)> {
let Size = 64;
}
+let RegInfos = XLenRI in {
+def GPRF16 : RegisterClass<"RISCV", [f16], 16, (add GPR)>;
+def GPRF32 : RegisterClass<"RISCV", [f32], 32, (add GPR)>;
+def GPRF64 : RegisterClass<"RISCV", [f64], 64, (add GPR)>;
+} // RegInfos = XLenRI
+
+let RegAltNameIndices = [ABIRegAltName] in {
+ foreach Index = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22,
+ 24, 26, 28, 30] in {
+ defvar Reg = !cast<Register>("X"#Index);
+ def X#Index#_PD : RISCVRegWithSubRegs<Index, Reg.AsmName,
+ [!cast<Register>("X"#Index),
+ !cast<Register>("X"#!add(Index, 1))],
+ Reg.AltNames> {
+ let SubRegIndices = [sub_32, sub_32_hi];
+ }
+ }
+}
+
+let RegInfos = RegInfoByHwMode<[RV64], [RegInfo<64, 64, 64>]> in
+def GPRPF64 : RegisterClass<"RISCV", [f64], 64, (add
+ X10_PD, X12_PD, X14_PD, X16_PD,
+ X6_PD,
+ X28_PD, X30_PD,
+ X8_PD,
+ X18_PD, X20_PD, X22_PD, X24_PD, X26_PD,
+ X0_PD, X2_PD, X4_PD
+)>;
+
// The register class is added for inline assembly for vector mask types.
def VM : VReg<VMaskVTs,
(add (sequence "V%u", 8, 31),
diff --git a/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp b/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp
index 715d92b036e3..dadf8f81a2c0 100644
--- a/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp
@@ -21,6 +21,8 @@ using namespace llvm;
#define DEBUG_TYPE "riscv-sextw-removal"
STATISTIC(NumRemovedSExtW, "Number of removed sign-extensions");
+STATISTIC(NumTransformedToWInstrs,
+ "Number of instructions transformed to W-ops");
static cl::opt<bool> DisableSExtWRemoval("riscv-disable-sextw-removal",
cl::desc("Disable removal of sext.w"),
@@ -55,11 +57,143 @@ FunctionPass *llvm::createRISCVSExtWRemovalPass() {
return new RISCVSExtWRemoval();
}
+// add uses of MI to the Worklist
+static void addUses(const MachineInstr &MI,
+ SmallVectorImpl<const MachineInstr *> &Worklist,
+ MachineRegisterInfo &MRI) {
+ for (auto &UserOp : MRI.reg_operands(MI.getOperand(0).getReg())) {
+ const auto *User = UserOp.getParent();
+ if (User == &MI) // ignore the def, current MI
+ continue;
+ Worklist.push_back(User);
+ }
+}
+
+// returns true if all uses of OrigMI only depend on the lower word of its
+// output, so we can transform OrigMI to the corresponding W-version.
+// TODO: handle multiple interdependent transformations
+static bool isAllUsesReadW(const MachineInstr &OrigMI,
+ MachineRegisterInfo &MRI) {
+
+ SmallPtrSet<const MachineInstr *, 4> Visited;
+ SmallVector<const MachineInstr *, 4> Worklist;
+
+ Visited.insert(&OrigMI);
+ addUses(OrigMI, Worklist, MRI);
+
+ while (!Worklist.empty()) {
+ const MachineInstr *MI = Worklist.pop_back_val();
+
+ if (!Visited.insert(MI).second) {
+ // If we've looped back to OrigMI through a PHI cycle, we can't transform
+ // LD or LWU, because these operations use all 64 bits of input.
+ if (MI == &OrigMI) {
+ unsigned opcode = MI->getOpcode();
+ if (opcode == RISCV::LD || opcode == RISCV::LWU)
+ return false;
+ }
+ continue;
+ }
+
+ switch (MI->getOpcode()) {
+ case RISCV::ADDIW:
+ case RISCV::ADDW:
+ case RISCV::DIVUW:
+ case RISCV::DIVW:
+ case RISCV::MULW:
+ case RISCV::REMUW:
+ case RISCV::REMW:
+ case RISCV::SLLIW:
+ case RISCV::SLLW:
+ case RISCV::SRAIW:
+ case RISCV::SRAW:
+ case RISCV::SRLIW:
+ case RISCV::SRLW:
+ case RISCV::SUBW:
+ case RISCV::ROLW:
+ case RISCV::RORW:
+ case RISCV::RORIW:
+ case RISCV::CLZW:
+ case RISCV::CTZW:
+ case RISCV::CPOPW:
+ case RISCV::SLLI_UW:
+ case RISCV::FCVT_S_W:
+ case RISCV::FCVT_S_WU:
+ case RISCV::FCVT_D_W:
+ case RISCV::FCVT_D_WU:
+ continue;
+
+ // these overwrite higher input bits, otherwise the lower word of output
+ // depends only on the lower word of input. So check their uses read W.
+ case RISCV::SLLI:
+ if (MI->getOperand(2).getImm() >= 32)
+ continue;
+ addUses(*MI, Worklist, MRI);
+ continue;
+ case RISCV::ANDI:
+ if (isUInt<11>(MI->getOperand(2).getImm()))
+ continue;
+ addUses(*MI, Worklist, MRI);
+ continue;
+ case RISCV::ORI:
+ if (!isUInt<11>(MI->getOperand(2).getImm()))
+ continue;
+ addUses(*MI, Worklist, MRI);
+ continue;
+
+ case RISCV::BEXTI:
+ if (MI->getOperand(2).getImm() >= 32)
+ return false;
+ continue;
+
+ // For these, lower word of output in these operations, depends only on
+ // the lower word of input. So, we check all uses only read lower word.
+ case RISCV::COPY:
+ case RISCV::PHI:
+
+ case RISCV::ADD:
+ case RISCV::ADDI:
+ case RISCV::AND:
+ case RISCV::MUL:
+ case RISCV::OR:
+ case RISCV::SLL:
+ case RISCV::SUB:
+ case RISCV::XOR:
+ case RISCV::XORI:
+
+ case RISCV::ADD_UW:
+ case RISCV::ANDN:
+ case RISCV::CLMUL:
+ case RISCV::ORC_B:
+ case RISCV::ORN:
+ case RISCV::SEXT_B:
+ case RISCV::SEXT_H:
+ case RISCV::SH1ADD:
+ case RISCV::SH1ADD_UW:
+ case RISCV::SH2ADD:
+ case RISCV::SH2ADD_UW:
+ case RISCV::SH3ADD:
+ case RISCV::SH3ADD_UW:
+ case RISCV::XNOR:
+ case RISCV::ZEXT_H_RV64:
+ addUses(*MI, Worklist, MRI);
+ continue;
+ default:
+ return false;
+ }
+ }
+ return true;
+}
+
// This function returns true if the machine instruction always outputs a value
// where bits 63:32 match bit 31.
+// Alternatively, if the instruction can be converted to W variant
+// (e.g. ADD->ADDW) and all of its uses only use the lower word of its output,
+// then return true and add the instr to FixableDef to be convereted later
// TODO: Allocate a bit in TSFlags for the W instructions?
// TODO: Add other W instructions.
-static bool isSignExtendingOpW(const MachineInstr &MI) {
+static bool isSignExtendingOpW(MachineInstr &MI, MachineRegisterInfo &MRI,
+ SmallPtrSetImpl<MachineInstr *> &FixableDef) {
switch (MI.getOpcode()) {
case RISCV::LUI:
case RISCV::LW:
@@ -89,8 +223,9 @@ static bool isSignExtendingOpW(const MachineInstr &MI) {
case RISCV::FCVT_WU_S:
case RISCV::FCVT_W_D:
case RISCV::FCVT_WU_D:
+ case RISCV::FMV_X_W:
// The following aren't W instructions, but are either sign extended from a
- // smaller size or put zeros in bits 63:31.
+ // smaller size, always outputs a small integer, or put zeros in bits 63:31.
case RISCV::LBU:
case RISCV::LHU:
case RISCV::LB:
@@ -102,6 +237,12 @@ static bool isSignExtendingOpW(const MachineInstr &MI) {
case RISCV::SEXT_B:
case RISCV::SEXT_H:
case RISCV::ZEXT_H_RV64:
+ case RISCV::FMV_X_H:
+ case RISCV::BEXT:
+ case RISCV::BEXTI:
+ case RISCV::CLZ:
+ case RISCV::CPOP:
+ case RISCV::CTZ:
return true;
// shifting right sufficiently makes the value 32-bit sign-extended
case RISCV::SRAI:
@@ -110,7 +251,14 @@ static bool isSignExtendingOpW(const MachineInstr &MI) {
return MI.getOperand(2).getImm() > 32;
// The LI pattern ADDI rd, X0, imm is sign extended.
case RISCV::ADDI:
- return MI.getOperand(1).isReg() && MI.getOperand(1).getReg() == RISCV::X0;
+ if (MI.getOperand(1).isReg() && MI.getOperand(1).getReg() == RISCV::X0)
+ return true;
+ if (isAllUsesReadW(MI, MRI)) {
+ // transform to ADDIW
+ FixableDef.insert(&MI);
+ return true;
+ }
+ return false;
// An ANDI with an 11 bit immediate will zero bits 63:11.
case RISCV::ANDI:
return isUInt<11>(MI.getOperand(2).getImm());
@@ -120,28 +268,45 @@ static bool isSignExtendingOpW(const MachineInstr &MI) {
// Copying from X0 produces zero.
case RISCV::COPY:
return MI.getOperand(1).getReg() == RISCV::X0;
+
+ // With these opcode, we can "fix" them with the W-version
+ // if we know all users of the result only rely on bits 31:0
+ case RISCV::SLLI:
+ // SLLIW reads the lowest 5 bits, while SLLI reads lowest 6 bits
+ if (MI.getOperand(2).getImm() >= 32)
+ return false;
+ LLVM_FALLTHROUGH;
+ case RISCV::ADD:
+ case RISCV::LD:
+ case RISCV::LWU:
+ case RISCV::MUL:
+ case RISCV::SUB:
+ if (isAllUsesReadW(MI, MRI)) {
+ FixableDef.insert(&MI);
+ return true;
+ }
}
return false;
}
-static bool isSignExtendedW(const MachineInstr &OrigMI,
- MachineRegisterInfo &MRI) {
+static bool isSignExtendedW(MachineInstr &OrigMI, MachineRegisterInfo &MRI,
+ SmallPtrSetImpl<MachineInstr *> &FixableDef) {
SmallPtrSet<const MachineInstr *, 4> Visited;
- SmallVector<const MachineInstr *, 4> Worklist;
+ SmallVector<MachineInstr *, 4> Worklist;
Worklist.push_back(&OrigMI);
while (!Worklist.empty()) {
- const MachineInstr *MI = Worklist.pop_back_val();
+ MachineInstr *MI = Worklist.pop_back_val();
// If we already visited this instruction, we don't need to check it again.
if (!Visited.insert(MI).second)
continue;
// If this is a sign extending operation we don't need to look any further.
- if (isSignExtendingOpW(*MI))
+ if (isSignExtendingOpW(*MI, MRI, FixableDef))
continue;
// Is this an instruction that propagates sign extend.
@@ -157,7 +322,7 @@ static bool isSignExtendedW(const MachineInstr &OrigMI,
// If this is a copy from another register, check its source instruction.
if (!SrcReg.isVirtual())
return false;
- const MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
+ MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
if (!SrcMI)
return false;
@@ -165,18 +330,25 @@ static bool isSignExtendedW(const MachineInstr &OrigMI,
Worklist.push_back(SrcMI);
break;
}
+
+ // For these, we just need to check if the 1st operand is sign extended.
+ case RISCV::BCLRI:
+ case RISCV::BINVI:
+ case RISCV::BSETI:
+ if (MI->getOperand(2).getImm() >= 31)
+ return false;
+ LLVM_FALLTHROUGH;
case RISCV::REM:
case RISCV::ANDI:
case RISCV::ORI:
case RISCV::XORI: {
// |Remainder| is always <= |Dividend|. If D is 32-bit, then so is R.
// DIV doesn't work because of the edge case 0xf..f 8000 0000 / (long)-1
- // Logical operations use a sign extended 12-bit immediate. We just need
- // to check if the other operand is sign extended.
+ // Logical operations use a sign extended 12-bit immediate.
Register SrcReg = MI->getOperand(1).getReg();
if (!SrcReg.isVirtual())
return false;
- const MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
+ MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
if (!SrcMI)
return false;
@@ -214,7 +386,7 @@ static bool isSignExtendedW(const MachineInstr &OrigMI,
Register SrcReg = MI->getOperand(I).getReg();
if (!SrcReg.isVirtual())
return false;
- const MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
+ MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
if (!SrcMI)
return false;
@@ -232,6 +404,26 @@ static bool isSignExtendedW(const MachineInstr &OrigMI,
return true;
}
+static unsigned getWOp(unsigned Opcode) {
+ switch (Opcode) {
+ case RISCV::ADDI:
+ return RISCV::ADDIW;
+ case RISCV::ADD:
+ return RISCV::ADDW;
+ case RISCV::LD:
+ case RISCV::LWU:
+ return RISCV::LW;
+ case RISCV::MUL:
+ return RISCV::MULW;
+ case RISCV::SLLI:
+ return RISCV::SLLIW;
+ case RISCV::SUB:
+ return RISCV::SUBW;
+ default:
+ llvm_unreachable("Unexpected opcode for replacement with W variant");
+ }
+}
+
bool RISCVSExtWRemoval::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()) || DisableSExtWRemoval)
return false;
@@ -242,7 +434,10 @@ bool RISCVSExtWRemoval::runOnMachineFunction(MachineFunction &MF) {
if (!ST.is64Bit())
return false;
- bool MadeChange = false;
+ SmallPtrSet<MachineInstr *, 4> SExtWRemovalCands;
+
+ // Replacing instructions invalidates the MI iterator
+ // we collect the candidates, then iterate over them separately.
for (MachineBasicBlock &MBB : MF) {
for (auto I = MBB.begin(), IE = MBB.end(); I != IE;) {
MachineInstr *MI = &*I++;
@@ -257,21 +452,49 @@ bool RISCVSExtWRemoval::runOnMachineFunction(MachineFunction &MF) {
if (!SrcReg.isVirtual())
continue;
- const MachineInstr &SrcMI = *MRI.getVRegDef(SrcReg);
- if (!isSignExtendedW(SrcMI, MRI))
- continue;
+ SExtWRemovalCands.insert(MI);
+ }
+ }
- Register DstReg = MI->getOperand(0).getReg();
- if (!MRI.constrainRegClass(SrcReg, MRI.getRegClass(DstReg)))
- continue;
+ bool MadeChange = false;
+ for (auto MI : SExtWRemovalCands) {
+ SmallPtrSet<MachineInstr *, 4> FixableDef;
+ Register SrcReg = MI->getOperand(1).getReg();
+ MachineInstr &SrcMI = *MRI.getVRegDef(SrcReg);
+
+ // If all definitions reaching MI sign-extend their output,
+ // then sext.w is redundant
+ if (!isSignExtendedW(SrcMI, MRI, FixableDef))
+ continue;
- LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
- MRI.replaceRegWith(DstReg, SrcReg);
- MRI.clearKillFlags(SrcReg);
- MI->eraseFromParent();
- ++NumRemovedSExtW;
- MadeChange = true;
+ Register DstReg = MI->getOperand(0).getReg();
+ if (!MRI.constrainRegClass(SrcReg, MRI.getRegClass(DstReg)))
+ continue;
+ // Replace Fixable instructions with their W versions.
+ for (MachineInstr *Fixable : FixableDef) {
+ MachineBasicBlock &MBB = *Fixable->getParent();
+ const DebugLoc &DL = Fixable->getDebugLoc();
+ unsigned Code = getWOp(Fixable->getOpcode());
+ MachineInstrBuilder Replacement =
+ BuildMI(MBB, Fixable, DL, ST.getInstrInfo()->get(Code));
+ for (auto Op : Fixable->operands())
+ Replacement.add(Op);
+ for (auto Op : Fixable->memoperands())
+ Replacement.addMemOperand(Op);
+
+ LLVM_DEBUG(dbgs() << "Replacing " << *Fixable);
+ LLVM_DEBUG(dbgs() << " with " << *Replacement);
+
+ Fixable->eraseFromParent();
+ ++NumTransformedToWInstrs;
}
+
+ LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
+ MRI.replaceRegWith(DstReg, SrcReg);
+ MRI.clearKillFlags(SrcReg);
+ MI->eraseFromParent();
+ ++NumRemovedSExtW;
+ MadeChange = true;
}
return MadeChange;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/llvm/lib/Target/RISCV/RISCVSchedRocket.td
index 78cf34c8c582..5a3c8deb7943 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedRocket.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedRocket.td
@@ -242,6 +242,11 @@ defm : UnsupportedSchedZba;
defm : UnsupportedSchedZbb;
defm : UnsupportedSchedZbc;
defm : UnsupportedSchedZbs;
+defm : UnsupportedSchedZbe;
defm : UnsupportedSchedZbf;
+defm : UnsupportedSchedZbm;
+defm : UnsupportedSchedZbp;
+defm : UnsupportedSchedZbr;
+defm : UnsupportedSchedZbt;
defm : UnsupportedSchedZfh;
}
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index 9f5e5ff1223c..cfbd9722d7bc 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -229,6 +229,11 @@ defm : UnsupportedSchedZba;
defm : UnsupportedSchedZbb;
defm : UnsupportedSchedZbc;
defm : UnsupportedSchedZbs;
+defm : UnsupportedSchedZbe;
defm : UnsupportedSchedZbf;
+defm : UnsupportedSchedZbm;
+defm : UnsupportedSchedZbp;
+defm : UnsupportedSchedZbr;
+defm : UnsupportedSchedZbt;
defm : UnsupportedSchedZfh;
}
diff --git a/llvm/lib/Target/RISCV/RISCVScheduleB.td b/llvm/lib/Target/RISCV/RISCVScheduleB.td
index 193760e1e15b..4bfe7b316eeb 100644
--- a/llvm/lib/Target/RISCV/RISCVScheduleB.td
+++ b/llvm/lib/Target/RISCV/RISCVScheduleB.td
@@ -33,10 +33,59 @@ def WriteCLMUL : SchedWrite; // CLMUL/CLMULR/CLMULH
def WriteSingleBit : SchedWrite; // BCLR/BSET/BINV/BEXT
def WriteSingleBitImm: SchedWrite; // BCLRI/BSETI/BINVI/BEXTI
+// Zbe extension
+def WriteDecompress : SchedWrite; // bdecompress
+def WriteCompress : SchedWrite; // bcompress
+def WriteDecompress32: SchedWrite; // bdecompressw
+def WriteCompress32 : SchedWrite; // bcompressw
+
// Zbf extension
def WriteBFP : SchedWrite; // BFP
def WriteBFP32 : SchedWrite; // BFPW
+// Zbm extension
+def WriteBMatrix : SchedWrite; // bmator/bmatxor/bmatflip
+
+// Zbp extension
+def WriteORC : SchedWrite; // gorc
+def WriteREV : SchedWrite; // grev
+def WriteORC32 : SchedWrite; // gorcw
+def WriteREV32 : SchedWrite; // grevw
+def WriteREVImm : SchedWrite; // grevi
+def WriteORCImm : SchedWrite; // gorci
+def WriteREVImm32 : SchedWrite; // greviw
+def WriteORCImm32 : SchedWrite; // gorciw
+def WriteSHFL : SchedWrite; // shfl
+def WriteUNSHFL : SchedWrite; // unshfl
+def WriteSHFL32 : SchedWrite; // shflw
+def WriteUNSHFL32 : SchedWrite; // unshflw
+def WriteSHFLImm : SchedWrite; // shfli
+def WriteUNSHFLImm : SchedWrite; // unshfli
+def WriteXPERMH : SchedWrite; // xperm.h
+def WriteXPERMW : SchedWrite; // xperm.w
+def WritePACK : SchedWrite; // pack/packh
+def WritePACK32 : SchedWrite; // packw
+def WritePACKU : SchedWrite; // packu
+def WritePACKU32 : SchedWrite; // packuw
+
+// Zbr extension
+def WriteCRCB : SchedWrite; // crc32.b
+def WriteCRCH : SchedWrite; // crc32.h
+def WriteCRCW : SchedWrite; // crc32.w
+def WriteCRCD : SchedWrite; // crc32.d
+def WriteCRCCB : SchedWrite; // crc32c.b
+def WriteCRCCH : SchedWrite; // crc32c.h
+def WriteCRCCW : SchedWrite; // crc32c.w
+def WriteCRCCD : SchedWrite; // crc32c.d
+
+// Zbt extension
+def WriteCMix : SchedWrite; // cmix
+def WriteCMov : SchedWrite; // cmov
+def WriteFSReg : SchedWrite; // fsl/fsr
+def WriteFSRImm : SchedWrite; // fsri
+def WriteFSReg32 : SchedWrite; // fslw/fsrw
+def WriteFSRImm32 : SchedWrite; // fsriw
+
/// Define scheduler resources associated with use operands.
// Zba extension
@@ -64,10 +113,59 @@ def ReadCLMUL : SchedRead; // CLMUL/CLMULR/CLMULH
def ReadSingleBit : SchedRead; // BCLR/BSET/BINV/BEXT
def ReadSingleBitImm: SchedRead; // BCLRI/BSETI/BINVI/BEXTI
+// Zbe extension
+def ReadDecompress : SchedRead; // bdecompress
+def ReadCompress : SchedRead; // bcompress
+def ReadDecompress32: SchedRead; // bdecompressw
+def ReadCompress32 : SchedRead; // bcompressw
+
// Zbf extension
def ReadBFP : SchedRead; // BFP
def ReadBFP32 : SchedRead; // BFPW
+// Zbm extension
+def ReadBMatrix : SchedRead; // bmator/bmatxor/bmatflip
+
+// Zbp extension
+def ReadORC : SchedRead; // gorc
+def ReadREV : SchedRead; // grev
+def ReadORC32 : SchedRead; // gorcw
+def ReadREV32 : SchedRead; // grevw
+def ReadREVImm : SchedRead; // grevi
+def ReadORCImm : SchedRead; // groci
+def ReadREVImm32 : SchedRead; // greviw
+def ReadORCImm32 : SchedRead; // gorciw
+def ReadSHFL : SchedRead; // shfl
+def ReadUNSHFL : SchedRead; // unshfl
+def ReadSHFL32 : SchedRead; // shflw
+def ReadUNSHFL32 : SchedRead; // unshflw
+def ReadSHFLImm : SchedRead; // shfli
+def ReadUNSHFLImm : SchedRead; // unshfli
+def ReadXPERMH : SchedRead; // xperm.h
+def ReadXPERMW : SchedRead; // xperm.w
+def ReadPACK : SchedRead; // pack/packh
+def ReadPACK32 : SchedRead; // packw
+def ReadPACKU : SchedRead; // packu
+def ReadPACKU32 : SchedRead; // packuw
+
+// Zbr extension
+def ReadCRCB : SchedRead; // crc32.b
+def ReadCRCH : SchedRead; // crc32.h
+def ReadCRCW : SchedRead; // crc32.w
+def ReadCRCD : SchedRead; // crc32.d
+def ReadCRCCB : SchedRead; // crc32c.b
+def ReadCRCCH : SchedRead; // crc32c.h
+def ReadCRCCW : SchedRead; // crc32c.w
+def ReadCRCCD : SchedRead; // crc32c.d
+
+// Zbt extension
+def ReadCMix : SchedRead; // cmix
+def ReadCMov : SchedRead; // cmov
+def ReadFSReg : SchedRead; // fsl/fsr
+def ReadFSRImm : SchedRead; // fsri
+def ReadFSReg32 : SchedRead; // fslw/fsrw
+def ReadFSRImm32 : SchedRead; // fsriw
+
/// Define default scheduler resources for B.
multiclass UnsupportedSchedZba {
@@ -128,6 +226,20 @@ def : ReadAdvance<ReadSingleBitImm, 0>;
}
}
+multiclass UnsupportedSchedZbe {
+let Unsupported = true in {
+def : WriteRes<WriteDecompress, []>;
+def : WriteRes<WriteCompress, []>;
+def : WriteRes<WriteDecompress32, []>;
+def : WriteRes<WriteCompress32, []>;
+
+def : ReadAdvance<ReadDecompress, 0>;
+def : ReadAdvance<ReadCompress, 0>;
+def : ReadAdvance<ReadDecompress32, 0>;
+def : ReadAdvance<ReadCompress32, 0>;
+}
+}
+
multiclass UnsupportedSchedZbf {
let Unsupported = true in {
def : WriteRes<WriteBFP, []>;
@@ -137,3 +249,97 @@ def : ReadAdvance<ReadBFP, 0>;
def : ReadAdvance<ReadBFP32, 0>;
}
}
+
+multiclass UnsupportedSchedZbm {
+let Unsupported = true in {
+def : WriteRes<WriteBMatrix, []>;
+
+def : ReadAdvance<ReadBMatrix, 0>;
+}
+}
+
+multiclass UnsupportedSchedZbp {
+let Unsupported = true in {
+def : WriteRes<WriteORC, []>;
+def : WriteRes<WriteREV, []>;
+def : WriteRes<WriteORC32, []>;
+def : WriteRes<WriteREV32, []>;
+def : WriteRes<WriteREVImm, []>;
+def : WriteRes<WriteORCImm, []>;
+def : WriteRes<WriteREVImm32, []>;
+def : WriteRes<WriteORCImm32, []>;
+def : WriteRes<WriteSHFL, []>;
+def : WriteRes<WriteUNSHFL, []>;
+def : WriteRes<WriteSHFL32, []>;
+def : WriteRes<WriteUNSHFL32, []>;
+def : WriteRes<WriteSHFLImm, []>;
+def : WriteRes<WriteUNSHFLImm, []>;
+def : WriteRes<WriteXPERMH, []>;
+def : WriteRes<WriteXPERMW, []>;
+def : WriteRes<WritePACK, []>;
+def : WriteRes<WritePACK32, []>;
+def : WriteRes<WritePACKU, []>;
+def : WriteRes<WritePACKU32, []>;
+
+def : ReadAdvance<ReadORC, 0>;
+def : ReadAdvance<ReadREV, 0>;
+def : ReadAdvance<ReadORC32, 0>;
+def : ReadAdvance<ReadREV32, 0>;
+def : ReadAdvance<ReadREVImm, 0>;
+def : ReadAdvance<ReadORCImm, 0>;
+def : ReadAdvance<ReadREVImm32, 0>;
+def : ReadAdvance<ReadORCImm32, 0>;
+def : ReadAdvance<ReadSHFL, 0>;
+def : ReadAdvance<ReadUNSHFL, 0>;
+def : ReadAdvance<ReadSHFL32, 0>;
+def : ReadAdvance<ReadUNSHFL32, 0>;
+def : ReadAdvance<ReadSHFLImm, 0>;
+def : ReadAdvance<ReadUNSHFLImm, 0>;
+def : ReadAdvance<ReadXPERMH, 0>;
+def : ReadAdvance<ReadXPERMW, 0>;
+def : ReadAdvance<ReadPACK, 0>;
+def : ReadAdvance<ReadPACK32, 0>;
+def : ReadAdvance<ReadPACKU, 0>;
+def : ReadAdvance<ReadPACKU32, 0>;
+}
+}
+
+multiclass UnsupportedSchedZbr {
+let Unsupported = true in {
+def : WriteRes<WriteCRCB, []>;
+def : WriteRes<WriteCRCH, []>;
+def : WriteRes<WriteCRCW, []>;
+def : WriteRes<WriteCRCD, []>;
+def : WriteRes<WriteCRCCB, []>;
+def : WriteRes<WriteCRCCH, []>;
+def : WriteRes<WriteCRCCW, []>;
+def : WriteRes<WriteCRCCD, []>;
+
+def : ReadAdvance<ReadCRCB, 0>;
+def : ReadAdvance<ReadCRCH, 0>;
+def : ReadAdvance<ReadCRCW, 0>;
+def : ReadAdvance<ReadCRCD, 0>;
+def : ReadAdvance<ReadCRCCB, 0>;
+def : ReadAdvance<ReadCRCCH, 0>;
+def : ReadAdvance<ReadCRCCW, 0>;
+def : ReadAdvance<ReadCRCCD, 0>;
+}
+}
+
+multiclass UnsupportedSchedZbt {
+let Unsupported = true in {
+def : WriteRes<WriteCMix, []>;
+def : WriteRes<WriteCMov, []>;
+def : WriteRes<WriteFSReg, []>;
+def : WriteRes<WriteFSRImm, []>;
+def : WriteRes<WriteFSReg32, []>;
+def : WriteRes<WriteFSRImm32, []>;
+
+def : ReadAdvance<ReadCMix, 0>;
+def : ReadAdvance<ReadCMov, 0>;
+def : ReadAdvance<ReadFSReg, 0>;
+def : ReadAdvance<ReadFSRImm, 0>;
+def : ReadAdvance<ReadFSReg32, 0>;
+def : ReadAdvance<ReadFSRImm32, 0>;
+}
+}
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
index 976e4ccb1422..7589b44b81d3 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -15,6 +15,7 @@
#include "RISCVCallLowering.h"
#include "RISCVFrameLowering.h"
#include "RISCVLegalizerInfo.h"
+#include "RISCVMacroFusion.h"
#include "RISCVRegisterBankInfo.h"
#include "RISCVTargetMachine.h"
#include "llvm/MC/TargetRegistry.h"
@@ -28,16 +29,21 @@ using namespace llvm;
#define GET_SUBTARGETINFO_CTOR
#include "RISCVGenSubtargetInfo.inc"
-static cl::opt<unsigned> RVVVectorBitsMax(
+static cl::opt<bool> EnableSubRegLiveness("riscv-enable-subreg-liveness",
+ cl::init(false), cl::Hidden);
+
+static cl::opt<int> RVVVectorBitsMax(
"riscv-v-vector-bits-max",
cl::desc("Assume V extension vector registers are at most this big, "
"with zero meaning no maximum size is assumed."),
cl::init(0), cl::Hidden);
-static cl::opt<unsigned> RVVVectorBitsMin(
+static cl::opt<int> RVVVectorBitsMin(
"riscv-v-vector-bits-min",
cl::desc("Assume V extension vector registers are at least this big, "
- "with zero meaning no minimum size is assumed."),
+ "with zero meaning no minimum size is assumed. A value of -1 "
+ "means use Zvl*b extension. This is primarily used to enable "
+ "autovectorization with fixed width vectors."),
cl::init(0), cl::Hidden);
static cl::opt<unsigned> RVVVectorLMULMax(
@@ -46,11 +52,6 @@ static cl::opt<unsigned> RVVVectorLMULMax(
"Fractional LMUL values are not supported."),
cl::init(8), cl::Hidden);
-static cl::opt<unsigned> RVVVectorELENMax(
- "riscv-v-fixed-length-vector-elen-max",
- cl::desc("The maximum ELEN value to use for fixed length vectors."),
- cl::init(64), cl::Hidden);
-
static cl::opt<bool> RISCVDisableUsingConstantPoolForLargeInts(
"riscv-disable-using-constant-pool-for-large-ints",
cl::desc("Disable using constant pool for large integers."),
@@ -69,11 +70,8 @@ RISCVSubtarget::initializeSubtargetDependencies(const Triple &TT, StringRef CPU,
StringRef ABIName) {
// Determine default and user-specified characteristics
bool Is64Bit = TT.isArch64Bit();
- if (CPU.empty())
+ if (CPU.empty() || CPU == "generic")
CPU = Is64Bit ? "generic-rv64" : "generic-rv32";
- if (CPU == "generic")
- report_fatal_error(Twine("CPU 'generic' is not supported. Use ") +
- (Is64Bit ? "generic-rv64" : "generic-rv32"));
if (TuneCPU.empty())
TuneCPU = CPU;
@@ -144,7 +142,7 @@ unsigned RISCVSubtarget::getMaxRVVVectorSizeInBits() const {
// ZvlLen specifies the minimum required vlen. The upper bound provided by
// riscv-v-vector-bits-max should be no less than it.
- if (RVVVectorBitsMax < ZvlLen)
+ if (RVVVectorBitsMax < (int)ZvlLen)
report_fatal_error("riscv-v-vector-bits-max specified is lower "
"than the Zvl*b limitation");
@@ -162,14 +160,18 @@ unsigned RISCVSubtarget::getMaxRVVVectorSizeInBits() const {
}
unsigned RISCVSubtarget::getMinRVVVectorSizeInBits() const {
+ assert(hasVInstructions() &&
+ "Tried to get vector length without Zve or V extension support!");
+
+ if (RVVVectorBitsMin == -1)
+ return ZvlLen;
+
// ZvlLen specifies the minimum required vlen. The lower bound provided by
// riscv-v-vector-bits-min should be no less than it.
- if (RVVVectorBitsMin != 0 && RVVVectorBitsMin < ZvlLen)
+ if (RVVVectorBitsMin != 0 && RVVVectorBitsMin < (int)ZvlLen)
report_fatal_error("riscv-v-vector-bits-min specified is lower "
"than the Zvl*b limitation");
- assert(hasVInstructions() &&
- "Tried to get vector length without Zve or V extension support!");
// FIXME: Change to >= 32 when VLEN = 32 is supported
assert(
(RVVVectorBitsMin == 0 ||
@@ -195,17 +197,19 @@ unsigned RISCVSubtarget::getMaxLMULForFixedLengthVectors() const {
std::max<unsigned>(std::min<unsigned>(RVVVectorLMULMax, 8), 1));
}
-unsigned RISCVSubtarget::getMaxELENForFixedLengthVectors() const {
- assert(hasVInstructions() &&
- "Tried to get maximum ELEN without Zve or V extension support!");
- assert(RVVVectorELENMax <= 64 && RVVVectorELENMax >= 8 &&
- isPowerOf2_32(RVVVectorELENMax) &&
- "V extension requires a ELEN to be a power of 2 between 8 and 64!");
- unsigned ELEN = hasVInstructionsI64() ? 64 : 32;
- return PowerOf2Floor(
- std::max<unsigned>(std::min<unsigned>(RVVVectorELENMax, ELEN), 8));
-}
-
bool RISCVSubtarget::useRVVForFixedLengthVectors() const {
return hasVInstructions() && getMinRVVVectorSizeInBits() != 0;
}
+
+bool RISCVSubtarget::enableSubRegLiveness() const {
+ if (EnableSubRegLiveness.getNumOccurrences())
+ return EnableSubRegLiveness;
+ // Enable subregister liveness for RVV to better handle LMUL>1 and segment
+ // load/store.
+ return hasVInstructions();
+}
+
+void RISCVSubtarget::getPostRAMutations(
+ std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
+ Mutations.push_back(createRISCVMacroFusionDAGMutation());
+}
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 34c6e8e684ac..831f7fadaa62 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -20,7 +20,7 @@
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
@@ -34,22 +34,6 @@ class StringRef;
class RISCVSubtarget : public RISCVGenSubtargetInfo {
public:
- enum ExtZvl : unsigned {
- NotSet = 0,
- Zvl32b = 32,
- Zvl64b = 64,
- Zvl128b = 128,
- Zvl256b = 256,
- Zvl512b = 512,
- Zvl1024b = 1024,
- Zvl2048b = 2048,
- Zvl4096b = 4096,
- Zvl8192b = 8192,
- Zvl16384b = 16384,
- Zvl32768b = 32768,
- Zvl65536b = 65536
- };
-
enum RISCVProcFamilyEnum : uint8_t {
Others,
SiFive7,
@@ -65,6 +49,7 @@ private:
bool HasStdExtF = false;
bool HasStdExtD = false;
bool HasStdExtC = false;
+ bool HasStdExtZihintpause = false;
bool HasStdExtZba = false;
bool HasStdExtZbb = false;
bool HasStdExtZbc = false;
@@ -81,8 +66,13 @@ private:
bool HasStdExtZve64x = false;
bool HasStdExtZve64f = false;
bool HasStdExtZve64d = false;
+ bool HasStdExtZvfh = false;
bool HasStdExtZfhmin = false;
bool HasStdExtZfh = false;
+ bool HasStdExtZfinx = false;
+ bool HasStdExtZdinx = false;
+ bool HasStdExtZhinxmin = false;
+ bool HasStdExtZhinx = false;
bool HasStdExtZbkb = false;
bool HasStdExtZbkc = false;
bool HasStdExtZbkx = false;
@@ -96,13 +86,19 @@ private:
bool HasStdExtZks = false;
bool HasStdExtZkt = false;
bool HasStdExtZk = false;
+ bool HasStdExtZicbom = false;
+ bool HasStdExtZicboz = false;
+ bool HasStdExtZicbop = false;
bool HasRV64 = false;
bool IsRV32E = false;
bool EnableLinkerRelax = false;
bool EnableRVCHintInstrs = true;
+ bool EnableDefaultUnroll = true;
bool EnableSaveRestore = false;
+ bool EnableUnalignedScalarMem = false;
+ bool HasLUIADDIFusion = false;
unsigned XLen = 32;
- ExtZvl ZvlLen = ExtZvl::NotSet;
+ unsigned ZvlLen = 0;
MVT XLenVT = MVT::i32;
uint8_t MaxInterleaveFactor = 2;
RISCVABI::ABI TargetABI = RISCVABI::ABI_Unknown;
@@ -157,6 +153,7 @@ public:
bool hasStdExtD() const { return HasStdExtD; }
bool hasStdExtC() const { return HasStdExtC; }
bool hasStdExtV() const { return HasStdExtV; }
+ bool hasStdExtZihintpause() const { return HasStdExtZihintpause; }
bool hasStdExtZba() const { return HasStdExtZba; }
bool hasStdExtZbb() const { return HasStdExtZbb; }
bool hasStdExtZbc() const { return HasStdExtZbc; }
@@ -167,9 +164,14 @@ public:
bool hasStdExtZbr() const { return HasStdExtZbr; }
bool hasStdExtZbs() const { return HasStdExtZbs; }
bool hasStdExtZbt() const { return HasStdExtZbt; }
- bool hasStdExtZvl() const { return ZvlLen != ExtZvl::NotSet; }
+ bool hasStdExtZvl() const { return ZvlLen != 0; }
+ bool hasStdExtZvfh() const { return HasStdExtZvfh; }
bool hasStdExtZfhmin() const { return HasStdExtZfhmin; }
bool hasStdExtZfh() const { return HasStdExtZfh; }
+ bool hasStdExtZfinx() const { return HasStdExtZfinx; }
+ bool hasStdExtZdinx() const { return HasStdExtZdinx; }
+ bool hasStdExtZhinxmin() const { return HasStdExtZhinxmin; }
+ bool hasStdExtZhinx() const { return HasStdExtZhinx; }
bool hasStdExtZbkb() const { return HasStdExtZbkb; }
bool hasStdExtZbkc() const { return HasStdExtZbkc; }
bool hasStdExtZbkx() const { return HasStdExtZbkx; }
@@ -179,11 +181,17 @@ public:
bool hasStdExtZksed() const { return HasStdExtZksed; }
bool hasStdExtZksh() const { return HasStdExtZksh; }
bool hasStdExtZkr() const { return HasStdExtZkr; }
+ bool hasStdExtZicbom() const { return HasStdExtZicbom; }
+ bool hasStdExtZicboz() const { return HasStdExtZicboz; }
+ bool hasStdExtZicbop() const { return HasStdExtZicbop; }
bool is64Bit() const { return HasRV64; }
bool isRV32E() const { return IsRV32E; }
bool enableLinkerRelax() const { return EnableLinkerRelax; }
bool enableRVCHintInstrs() const { return EnableRVCHintInstrs; }
+ bool enableDefaultUnroll() const { return EnableDefaultUnroll; }
bool enableSaveRestore() const { return EnableSaveRestore; }
+ bool enableUnalignedScalarMem() const { return EnableUnalignedScalarMem; }
+ bool hasLUIADDIFusion() const { return HasLUIADDIFusion; }
MVT getXLenVT() const { return XLenVT; }
unsigned getXLen() const { return XLen; }
unsigned getFLen() const {
@@ -195,27 +203,34 @@ public:
return 0;
}
- unsigned getMinVLen() const { return ZvlLen; }
+ unsigned getELEN() const {
+ assert(hasVInstructions() && "Expected V extension");
+ return hasVInstructionsI64() ? 64 : 32;
+ }
+ unsigned getRealMinVLen() const {
+ unsigned VLen = getMinRVVVectorSizeInBits();
+ return VLen == 0 ? getArchMinVLen() : VLen;
+ }
+ unsigned getRealMaxVLen() const {
+ unsigned VLen = getMaxRVVVectorSizeInBits();
+ return VLen == 0 ? getArchMaxVLen() : VLen;
+ }
RISCVABI::ABI getTargetABI() const { return TargetABI; }
bool isRegisterReservedByUser(Register i) const {
assert(i < RISCV::NUM_TARGET_REGS && "Register out of range");
return UserReservedRegister[i];
}
+ bool hasMacroFusion() const { return hasLUIADDIFusion(); }
+
// Vector codegen related methods.
- bool hasVInstructions() const { return HasStdExtV || HasStdExtZve32x; }
- bool hasVInstructionsI64() const { return HasStdExtV || HasStdExtZve64x; }
- bool hasVInstructionsF16() const {
- return (HasStdExtV || HasStdExtZve32f) && HasStdExtZfh;
- }
+ bool hasVInstructions() const { return HasStdExtZve32x; }
+ bool hasVInstructionsI64() const { return HasStdExtZve64x; }
+ bool hasVInstructionsF16() const { return HasStdExtZvfh && HasStdExtZfh; }
// FIXME: Consider Zfinx in the future
- bool hasVInstructionsF32() const {
- return HasStdExtV || (HasStdExtZve32f && HasStdExtF);
- }
+ bool hasVInstructionsF32() const { return HasStdExtZve32f && HasStdExtF; }
// FIXME: Consider Zdinx in the future
- bool hasVInstructionsF64() const {
- return HasStdExtV || (HasStdExtZve64d && HasStdExtD);
- }
+ bool hasVInstructionsF64() const { return HasStdExtZve64d && HasStdExtD; }
// F16 and F64 both require F32.
bool hasVInstructionsAnyF() const { return hasVInstructionsF32(); }
unsigned getMaxInterleaveFactor() const {
@@ -229,6 +244,18 @@ protected:
std::unique_ptr<LegalizerInfo> Legalizer;
std::unique_ptr<RegisterBankInfo> RegBankInfo;
+ // Return the known range for the bit length of RVV data registers as set
+ // at the command line. A value of 0 means nothing is known about that particular
+ // limit beyond what's implied by the architecture.
+ // NOTE: Please use getRealMinVLen and getRealMaxVLen instead!
+ unsigned getMaxRVVVectorSizeInBits() const;
+ unsigned getMinRVVVectorSizeInBits() const;
+
+ // Return the known range for the bit length of RVV data registers as indicated
+ // by -march and -mattr.
+ unsigned getArchMinVLen() const { return ZvlLen; }
+ unsigned getArchMaxVLen() const { return 65536; }
+
public:
const CallLowering *getCallLowering() const override;
InstructionSelector *getInstructionSelector() const override;
@@ -241,14 +268,13 @@ public:
// pool if exceeded.
unsigned getMaxBuildIntsCost() const;
- // Return the known range for the bit length of RVV data registers. A value
- // of 0 means nothing is known about that particular limit beyond what's
- // implied by the architecture.
- unsigned getMaxRVVVectorSizeInBits() const;
- unsigned getMinRVVVectorSizeInBits() const;
unsigned getMaxLMULForFixedLengthVectors() const;
- unsigned getMaxELENForFixedLengthVectors() const;
bool useRVVForFixedLengthVectors() const;
+
+ bool enableSubRegLiveness() const override;
+
+ void getPostRAMutations(std::vector<std::unique_ptr<ScheduleDAGMutation>>
+ &Mutations) const override;
};
} // End llvm namespace
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index db5e2f1eeb6f..b2707b753e87 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -13,6 +13,8 @@
#include "RISCVTargetMachine.h"
#include "MCTargetDesc/RISCVBaseInfo.h"
#include "RISCV.h"
+#include "RISCVMachineFunctionInfo.h"
+#include "RISCVMacroFusion.h"
#include "RISCVTargetObjectFile.h"
#include "RISCVTargetTransformInfo.h"
#include "TargetInfo/RISCVTargetInfo.h"
@@ -22,6 +24,8 @@
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
+#include "llvm/CodeGen/MIRParser/MIParser.h"
+#include "llvm/CodeGen/MIRYamlMapping.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -30,13 +34,20 @@
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/IPO.h"
using namespace llvm;
+static cl::opt<bool> EnableRedundantCopyElimination(
+ "riscv-enable-copyelim",
+ cl::desc("Enable the redundant copy elimination pass"), cl::init(true),
+ cl::Hidden);
+
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target());
RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target());
auto *PR = PassRegistry::getPassRegistry();
initializeGlobalISel(*PR);
+ initializeRISCVMakeCompressibleOptPass(*PR);
initializeRISCVGatherScatterLoweringPass(*PR);
initializeRISCVMergeBaseOffsetOptPass(*PR);
initializeRISCVSExtWRemovalPass(*PR);
@@ -53,9 +64,7 @@ static StringRef computeDataLayout(const Triple &TT) {
static Reloc::Model getEffectiveRelocModel(const Triple &TT,
Optional<Reloc::Model> RM) {
- if (!RM.hasValue())
- return Reloc::Static;
- return *RM;
+ return RM.value_or(Reloc::Static);
}
RISCVTargetMachine::RISCVTargetMachine(const Target &T, const Triple &TT,
@@ -72,6 +81,7 @@ RISCVTargetMachine::RISCVTargetMachine(const Target &T, const Triple &TT,
// RISC-V supports the MachineOutliner.
setMachineOutliner(true);
+ setSupportsDefaultOutlining(true);
}
const RISCVSubtarget *
@@ -109,7 +119,7 @@ RISCVTargetMachine::getSubtargetImpl(const Function &F) const {
}
TargetTransformInfo
-RISCVTargetMachine::getTargetTransformInfo(const Function &F) {
+RISCVTargetMachine::getTargetTransformInfo(const Function &F) const {
return TargetTransformInfo(RISCVTTIImpl(this, F));
}
@@ -132,7 +142,30 @@ public:
return getTM<RISCVTargetMachine>();
}
+ ScheduleDAGInstrs *
+ createMachineScheduler(MachineSchedContext *C) const override {
+ const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>();
+ if (ST.hasMacroFusion()) {
+ ScheduleDAGMILive *DAG = createGenericSchedLive(C);
+ DAG->addMutation(createRISCVMacroFusionDAGMutation());
+ return DAG;
+ }
+ return nullptr;
+ }
+
+ ScheduleDAGInstrs *
+ createPostMachineScheduler(MachineSchedContext *C) const override {
+ const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>();
+ if (ST.hasMacroFusion()) {
+ ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
+ DAG->addMutation(createRISCVMacroFusionDAGMutation());
+ return DAG;
+ }
+ return nullptr;
+ }
+
void addIRPasses() override;
+ bool addPreISel() override;
bool addInstSelector() override;
bool addIRTranslator() override;
bool addLegalizeMachineIR() override;
@@ -143,6 +176,7 @@ public:
void addPreSched2() override;
void addMachineSSAOptimization() override;
void addPreRegAlloc() override;
+ void addPostRegAlloc() override;
};
} // namespace
@@ -158,8 +192,18 @@ void RISCVPassConfig::addIRPasses() {
TargetPassConfig::addIRPasses();
}
+bool RISCVPassConfig::addPreISel() {
+ if (TM->getOptLevel() != CodeGenOpt::None) {
+ // Add a barrier before instruction selection so that we will not get
+ // deleted block address after enabling default outlining. See D99707 for
+ // more details.
+ addPass(createBarrierNoopPass());
+ }
+ return false;
+}
+
bool RISCVPassConfig::addInstSelector() {
- addPass(createRISCVISelDag(getRISCVTargetMachine()));
+ addPass(createRISCVISelDag(getRISCVTargetMachine(), getOptLevel()));
return false;
}
@@ -186,7 +230,10 @@ bool RISCVPassConfig::addGlobalInstructionSelect() {
void RISCVPassConfig::addPreSched2() {}
-void RISCVPassConfig::addPreEmitPass() { addPass(&BranchRelaxationPassID); }
+void RISCVPassConfig::addPreEmitPass() {
+ addPass(&BranchRelaxationPassID);
+ addPass(createRISCVMakeCompressibleOptPass());
+}
void RISCVPassConfig::addPreEmitPass2() {
addPass(createRISCVExpandPseudoPass());
@@ -208,3 +255,28 @@ void RISCVPassConfig::addPreRegAlloc() {
addPass(createRISCVMergeBaseOffsetOptPass());
addPass(createRISCVInsertVSETVLIPass());
}
+
+void RISCVPassConfig::addPostRegAlloc() {
+ if (TM->getOptLevel() != CodeGenOpt::None && EnableRedundantCopyElimination)
+ addPass(createRISCVRedundantCopyEliminationPass());
+}
+
+yaml::MachineFunctionInfo *
+RISCVTargetMachine::createDefaultFuncInfoYAML() const {
+ return new yaml::RISCVMachineFunctionInfo();
+}
+
+yaml::MachineFunctionInfo *
+RISCVTargetMachine::convertFuncInfoToYAML(const MachineFunction &MF) const {
+ const auto *MFI = MF.getInfo<RISCVMachineFunctionInfo>();
+ return new yaml::RISCVMachineFunctionInfo(*MFI);
+}
+
+bool RISCVTargetMachine::parseMachineFunctionInfo(
+ const yaml::MachineFunctionInfo &MFI, PerFunctionMIParsingState &PFS,
+ SMDiagnostic &Error, SMRange &SourceRange) const {
+ const auto &YamlMFI =
+ static_cast<const yaml::RISCVMachineFunctionInfo &>(MFI);
+ PFS.MF.getInfo<RISCVMachineFunctionInfo>()->initializeBaseYamlFields(YamlMFI);
+ return false;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.h b/llvm/lib/Target/RISCV/RISCVTargetMachine.h
index 3156333f7ee1..087646fb5ed9 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.h
@@ -42,10 +42,18 @@ public:
return TLOF.get();
}
- TargetTransformInfo getTargetTransformInfo(const Function &F) override;
+ TargetTransformInfo getTargetTransformInfo(const Function &F) const override;
virtual bool isNoopAddrSpaceCast(unsigned SrcAS,
unsigned DstAS) const override;
+
+ yaml::MachineFunctionInfo *createDefaultFuncInfoYAML() const override;
+ yaml::MachineFunctionInfo *
+ convertFuncInfoToYAML(const MachineFunction &MF) const override;
+ bool parseMachineFunctionInfo(const yaml::MachineFunctionInfo &,
+ PerFunctionMIParsingState &PFS,
+ SMDiagnostic &Error,
+ SMRange &SourceRange) const override;
};
} // namespace llvm
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 99e6774a02e4..29d3c5e491de 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -11,6 +11,7 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/CodeGen/TargetLowering.h"
+#include <cmath>
using namespace llvm;
#define DEBUG_TYPE "riscvtti"
@@ -131,19 +132,17 @@ bool RISCVTTIImpl::shouldExpandReduction(const IntrinsicInst *II) const {
}
Optional<unsigned> RISCVTTIImpl::getMaxVScale() const {
- // There is no assumption of the maximum vector length in V specification.
- // We use the value specified by users as the maximum vector length.
- // This function will use the assumed maximum vector length to get the
- // maximum vscale for LoopVectorizer.
- // If users do not specify the maximum vector length, we have no way to
- // know whether the LoopVectorizer is safe to do or not.
- // We only consider to use single vector register (LMUL = 1) to vectorize.
- unsigned MaxVectorSizeInBits = ST->getMaxRVVVectorSizeInBits();
- if (ST->hasVInstructions() && MaxVectorSizeInBits != 0)
- return MaxVectorSizeInBits / RISCV::RVVBitsPerBlock;
+ if (ST->hasVInstructions())
+ return ST->getRealMaxVLen() / RISCV::RVVBitsPerBlock;
return BaseT::getMaxVScale();
}
+Optional<unsigned> RISCVTTIImpl::getVScaleForTuning() const {
+ if (ST->hasVInstructions())
+ return ST->getRealMinVLen() / RISCV::RVVBitsPerBlock;
+ return BaseT::getVScaleForTuning();
+}
+
TypeSize
RISCVTTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
unsigned LMUL = PowerOf2Floor(
@@ -153,7 +152,7 @@ RISCVTTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
return TypeSize::getFixed(ST->getXLen());
case TargetTransformInfo::RGK_FixedWidthVector:
return TypeSize::getFixed(
- ST->hasVInstructions() ? LMUL * ST->getMinRVVVectorSizeInBits() : 0);
+ ST->useRVVForFixedLengthVectors() ? LMUL * ST->getRealMinVLen() : 0);
case TargetTransformInfo::RGK_ScalableVector:
return TypeSize::getScalable(
ST->hasVInstructions() ? LMUL * RISCV::RVVBitsPerBlock : 0);
@@ -162,6 +161,61 @@ RISCVTTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
llvm_unreachable("Unsupported register kind");
}
+InstructionCost RISCVTTIImpl::getSpliceCost(VectorType *Tp, int Index) {
+ std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+
+ unsigned Cost = 2; // vslidedown+vslideup.
+ // TODO: LMUL should increase cost.
+ // TODO: Multiplying by LT.first implies this legalizes into multiple copies
+ // of similar code, but I think we expand through memory.
+ return Cost * LT.first;
+}
+
+InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
+ VectorType *Tp, ArrayRef<int> Mask,
+ int Index, VectorType *SubTp,
+ ArrayRef<const Value *> Args) {
+ if (isa<ScalableVectorType>(Tp)) {
+ std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+ switch (Kind) {
+ default:
+ // Fallthrough to generic handling.
+ // TODO: Most of these cases will return getInvalid in generic code, and
+ // must be implemented here.
+ break;
+ case TTI::SK_Broadcast: {
+ return LT.first * 1;
+ }
+ case TTI::SK_Splice:
+ return getSpliceCost(Tp, Index);
+ case TTI::SK_Reverse:
+ // Most of the cost here is producing the vrgather index register
+ // Example sequence:
+ // csrr a0, vlenb
+ // srli a0, a0, 3
+ // addi a0, a0, -1
+ // vsetvli a1, zero, e8, mf8, ta, mu (ignored)
+ // vid.v v9
+ // vrsub.vx v10, v9, a0
+ // vrgather.vv v9, v8, v10
+ return LT.first * 6;
+ }
+ }
+
+ return BaseT::getShuffleCost(Kind, Tp, Mask, Index, SubTp);
+}
+
+InstructionCost
+RISCVTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
+ unsigned AddressSpace,
+ TTI::TargetCostKind CostKind) {
+ if (!isa<ScalableVectorType>(Src))
+ return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
+ CostKind);
+
+ return getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind);
+}
+
InstructionCost RISCVTTIImpl::getGatherScatterOpCost(
unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) {
@@ -176,31 +230,152 @@ InstructionCost RISCVTTIImpl::getGatherScatterOpCost(
return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
Alignment, CostKind, I);
- // FIXME: Only supporting fixed vectors for now.
- if (!isa<FixedVectorType>(DataTy))
- return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
- Alignment, CostKind, I);
-
- auto *VTy = cast<FixedVectorType>(DataTy);
- unsigned NumLoads = VTy->getNumElements();
- InstructionCost MemOpCost =
- getMemoryOpCost(Opcode, VTy->getElementType(), Alignment, 0, CostKind, I);
+ // Cost is proportional to the number of memory operations implied. For
+ // scalable vectors, we use an upper bound on that number since we don't
+ // know exactly what VL will be.
+ auto &VTy = *cast<VectorType>(DataTy);
+ InstructionCost MemOpCost = getMemoryOpCost(Opcode, VTy.getElementType(),
+ Alignment, 0, CostKind, I);
+ unsigned NumLoads = getMaxVLFor(&VTy);
return NumLoads * MemOpCost;
}
+InstructionCost
+RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+ TTI::TargetCostKind CostKind) {
+ auto *RetTy = ICA.getReturnType();
+ switch (ICA.getID()) {
+ // TODO: add more intrinsic
+ case Intrinsic::experimental_stepvector: {
+ unsigned Cost = 1; // vid
+ auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
+ return Cost + (LT.first - 1);
+ }
+ default:
+ break;
+ }
+ return BaseT::getIntrinsicInstrCost(ICA, CostKind);
+}
+
+InstructionCost RISCVTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
+ Type *Src,
+ TTI::CastContextHint CCH,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I) {
+ if (isa<VectorType>(Dst) && isa<VectorType>(Src)) {
+ // FIXME: Need to compute legalizing cost for illegal types.
+ if (!isTypeLegal(Src) || !isTypeLegal(Dst))
+ return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
+
+ // Skip if element size of Dst or Src is bigger than ELEN.
+ if (Src->getScalarSizeInBits() > ST->getELEN() ||
+ Dst->getScalarSizeInBits() > ST->getELEN())
+ return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
+
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+
+ // FIXME: Need to consider vsetvli and lmul.
+ int PowDiff = (int)Log2_32(Dst->getScalarSizeInBits()) -
+ (int)Log2_32(Src->getScalarSizeInBits());
+ switch (ISD) {
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ return 1;
+ case ISD::TRUNCATE:
+ case ISD::FP_EXTEND:
+ case ISD::FP_ROUND:
+ // Counts of narrow/widen instructions.
+ return std::abs(PowDiff);
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ if (std::abs(PowDiff) <= 1)
+ return 1;
+ // Backend could lower (v[sz]ext i8 to double) to vfcvt(v[sz]ext.f8 i8),
+ // so it only need two conversion.
+ if (Src->isIntOrIntVectorTy())
+ return 2;
+ // Counts of narrow/widen instructions.
+ return std::abs(PowDiff);
+ }
+ }
+ return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
+}
+
+unsigned RISCVTTIImpl::getMaxVLFor(VectorType *Ty) {
+ if (isa<ScalableVectorType>(Ty)) {
+ const unsigned EltSize = DL.getTypeSizeInBits(Ty->getElementType());
+ const unsigned MinSize = DL.getTypeSizeInBits(Ty).getKnownMinValue();
+ const unsigned VectorBitsMax = ST->getRealMaxVLen();
+ return RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
+ }
+ return cast<FixedVectorType>(Ty)->getNumElements();
+}
+
+InstructionCost
+RISCVTTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
+ bool IsUnsigned,
+ TTI::TargetCostKind CostKind) {
+ if (isa<FixedVectorType>(Ty) && !ST->useRVVForFixedLengthVectors())
+ return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
+
+ // Skip if scalar size of Ty is bigger than ELEN.
+ if (Ty->getScalarSizeInBits() > ST->getELEN())
+ return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
+
+ std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+ if (Ty->getElementType()->isIntegerTy(1))
+ // vcpop sequences, see vreduction-mask.ll. umax, smin actually only
+ // cost 2, but we don't have enough info here so we slightly over cost.
+ return (LT.first - 1) + 3;
+
+ // IR Reduction is composed by two vmv and one rvv reduction instruction.
+ InstructionCost BaseCost = 2;
+ unsigned VL = getMaxVLFor(Ty);
+ return (LT.first - 1) + BaseCost + Log2_32_Ceil(VL);
+}
+
+InstructionCost
+RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
+ Optional<FastMathFlags> FMF,
+ TTI::TargetCostKind CostKind) {
+ if (isa<FixedVectorType>(Ty) && !ST->useRVVForFixedLengthVectors())
+ return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
+
+ // Skip if scalar size of Ty is bigger than ELEN.
+ if (Ty->getScalarSizeInBits() > ST->getELEN())
+ return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
+
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+
+ if (ISD != ISD::ADD && ISD != ISD::OR && ISD != ISD::XOR && ISD != ISD::AND &&
+ ISD != ISD::FADD)
+ return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
+
+ std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+ if (Ty->getElementType()->isIntegerTy(1))
+ // vcpop sequences, see vreduction-mask.ll
+ return (LT.first - 1) + (ISD == ISD::AND ? 3 : 2);
+
+ // IR Reduction is composed by two vmv and one rvv reduction instruction.
+ InstructionCost BaseCost = 2;
+ unsigned VL = getMaxVLFor(Ty);
+ if (TTI::requiresOrderedReduction(FMF))
+ return (LT.first - 1) + BaseCost + VL;
+ return (LT.first - 1) + BaseCost + Log2_32_Ceil(VL);
+}
+
void RISCVTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP,
OptimizationRemarkEmitter *ORE) {
// TODO: More tuning on benchmarks and metrics with changes as needed
// would apply to all settings below to enable performance.
- // Support explicit targets enabled for SiFive with the unrolling preferences
- // below
- bool UseDefaultPreferences = true;
- if (ST->getProcFamily() == RISCVSubtarget::SiFive7)
- UseDefaultPreferences = false;
- if (UseDefaultPreferences)
+ if (ST->enableDefaultUnroll())
return BasicTTIImplBase::getUnrollingPreferences(L, SE, UP, ORE);
// Enable Upper bound unrolling universally, not dependant upon the conditions
@@ -276,14 +451,14 @@ void RISCVTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
BaseT::getPeelingPreferences(L, SE, PP);
}
-InstructionCost RISCVTTIImpl::getRegUsageForType(Type *Ty) {
+unsigned RISCVTTIImpl::getRegUsageForType(Type *Ty) {
TypeSize Size = Ty->getPrimitiveSizeInBits();
if (Ty->isVectorTy()) {
if (Size.isScalable() && ST->hasVInstructions())
return divideCeil(Size.getKnownMinValue(), RISCV::RVVBitsPerBlock);
if (ST->useRVVForFixedLengthVectors())
- return divideCeil(Size, ST->getMinRVVVectorSizeInBits());
+ return divideCeil(Size, ST->getRealMinVLen());
}
return BaseT::getRegUsageForType(Ty);
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index e79c4f75712b..7caf0fedb2ca 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -37,6 +37,7 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
const RISCVSubtarget *getST() const { return ST; }
const RISCVTargetLowering *getTLI() const { return TLI; }
+ unsigned getMaxVLFor(VectorType *Ty);
public:
explicit RISCVTTIImpl(const RISCVTargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
@@ -57,10 +58,15 @@ public:
bool shouldExpandReduction(const IntrinsicInst *II) const;
bool supportsScalableVectors() const { return ST->hasVInstructions(); }
Optional<unsigned> getMaxVScale() const;
+ Optional<unsigned> getVScaleForTuning() const;
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const;
- InstructionCost getRegUsageForType(Type *Ty);
+ unsigned getRegUsageForType(Type *Ty);
+
+ InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
+ Align Alignment, unsigned AddressSpace,
+ TTI::TargetCostKind CostKind);
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP,
@@ -73,24 +79,50 @@ public:
return ST->useRVVForFixedLengthVectors() ? 16 : 0;
}
+ InstructionCost getSpliceCost(VectorType *Tp, int Index);
+ InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
+ ArrayRef<int> Mask, int Index,
+ VectorType *SubTp,
+ ArrayRef<const Value *> Args = None);
+
+ InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+ TTI::TargetCostKind CostKind);
+
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
const Value *Ptr, bool VariableMask,
Align Alignment,
TTI::TargetCostKind CostKind,
const Instruction *I);
+ InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ TTI::CastContextHint CCH,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr);
+
+ InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
+ bool IsUnsigned,
+ TTI::TargetCostKind CostKind);
+
+ InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
+ Optional<FastMathFlags> FMF,
+ TTI::TargetCostKind CostKind);
+
+ bool isElementTypeLegalForScalableVector(Type *Ty) const {
+ return TLI->isLegalElementTypeForRVV(Ty);
+ }
+
bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) {
if (!ST->hasVInstructions())
return false;
// Only support fixed vectors if we know the minimum vector size.
- if (isa<FixedVectorType>(DataType) && ST->getMinRVVVectorSizeInBits() == 0)
+ if (isa<FixedVectorType>(DataType) && !ST->useRVVForFixedLengthVectors())
return false;
// Don't allow elements larger than the ELEN.
// FIXME: How to limit for scalable vectors?
if (isa<FixedVectorType>(DataType) &&
- DataType->getScalarSizeInBits() > ST->getMaxELENForFixedLengthVectors())
+ DataType->getScalarSizeInBits() > ST->getELEN())
return false;
if (Alignment <
@@ -112,13 +144,13 @@ public:
return false;
// Only support fixed vectors if we know the minimum vector size.
- if (isa<FixedVectorType>(DataType) && ST->getMinRVVVectorSizeInBits() == 0)
+ if (isa<FixedVectorType>(DataType) && !ST->useRVVForFixedLengthVectors())
return false;
// Don't allow elements larger than the ELEN.
// FIXME: How to limit for scalable vectors?
if (isa<FixedVectorType>(DataType) &&
- DataType->getScalarSizeInBits() > ST->getMaxELENForFixedLengthVectors())
+ DataType->getScalarSizeInBits() > ST->getELEN())
return false;
if (Alignment <
@@ -135,6 +167,16 @@ public:
return isLegalMaskedGatherScatter(DataType, Alignment);
}
+ bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment) {
+ // Scalarize masked gather for RV64 if EEW=64 indices aren't supported.
+ return ST->is64Bit() && !ST->hasVInstructionsI64();
+ }
+
+ bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment) {
+ // Scalarize masked scatter for RV64 if EEW=64 indices aren't supported.
+ return ST->is64Bit() && !ST->hasVInstructionsI64();
+ }
+
/// \returns How the target needs this vector-predicated operation to be
/// transformed.
TargetTransformInfo::VPLegalization
@@ -145,9 +187,6 @@ public:
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
ElementCount VF) const {
- if (!ST->hasVInstructions())
- return false;
-
if (!VF.isScalable())
return true;
@@ -179,18 +218,53 @@ public:
return VF == 1 ? 1 : ST->getMaxInterleaveFactor();
}
- // TODO: We should define RISC-V's own register classes.
- // e.g. register class for FPR.
+ enum RISCVRegisterClass { GPRRC, FPRRC, VRRC };
unsigned getNumberOfRegisters(unsigned ClassID) const {
- bool Vector = (ClassID == 1);
- if (Vector) {
- if (ST->hasVInstructions())
+ switch (ClassID) {
+ case RISCVRegisterClass::GPRRC:
+ // 31 = 32 GPR - x0 (zero register)
+ // FIXME: Should we exclude fixed registers like SP, TP or GP?
+ return 31;
+ case RISCVRegisterClass::FPRRC:
+ if (ST->hasStdExtF())
return 32;
return 0;
+ case RISCVRegisterClass::VRRC:
+ // Although there are 32 vector registers, v0 is special in that it is the
+ // only register that can be used to hold a mask.
+ // FIXME: Should we conservatively return 31 as the number of usable
+ // vector registers?
+ return ST->hasVInstructions() ? 32 : 0;
+ }
+ llvm_unreachable("unknown register class");
+ }
+
+ unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const {
+ if (Vector)
+ return RISCVRegisterClass::VRRC;
+ if (!Ty)
+ return RISCVRegisterClass::GPRRC;
+
+ Type *ScalarTy = Ty->getScalarType();
+ if ((ScalarTy->isHalfTy() && ST->hasStdExtZfh()) ||
+ (ScalarTy->isFloatTy() && ST->hasStdExtF()) ||
+ (ScalarTy->isDoubleTy() && ST->hasStdExtD())) {
+ return RISCVRegisterClass::FPRRC;
+ }
+
+ return RISCVRegisterClass::GPRRC;
+ }
+
+ const char *getRegisterClassName(unsigned ClassID) const {
+ switch (ClassID) {
+ case RISCVRegisterClass::GPRRC:
+ return "RISCV::GPRRC";
+ case RISCVRegisterClass::FPRRC:
+ return "RISCV::FPRRC";
+ case RISCVRegisterClass::VRRC:
+ return "RISCV::VRRC";
}
- // 31 = 32 GPR - x0 (zero register)
- // FIXME: Should we exclude fixed registers like SP, TP or GP?
- return 31;
+ llvm_unreachable("unknown register class");
}
};
diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVAsmBackend.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVAsmBackend.cpp
new file mode 100644
index 000000000000..4156a0026411
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVAsmBackend.cpp
@@ -0,0 +1,63 @@
+//===-- SPIRVAsmBackend.cpp - SPIR-V Assembler Backend ---------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/SPIRVMCTargetDesc.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/Support/EndianStream.h"
+
+using namespace llvm;
+
+namespace {
+
+class SPIRVAsmBackend : public MCAsmBackend {
+public:
+ SPIRVAsmBackend(support::endianness Endian) : MCAsmBackend(Endian) {}
+
+ void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target, MutableArrayRef<char> Data,
+ uint64_t Value, bool IsResolved,
+ const MCSubtargetInfo *STI) const override {}
+
+ std::unique_ptr<MCObjectTargetWriter>
+ createObjectTargetWriter() const override {
+ return createSPIRVObjectTargetWriter();
+ }
+
+ // No instruction requires relaxation.
+ bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const override {
+ return false;
+ }
+
+ unsigned getNumFixupKinds() const override { return 1; }
+
+ bool mayNeedRelaxation(const MCInst &Inst,
+ const MCSubtargetInfo &STI) const override {
+ return false;
+ }
+
+ void relaxInstruction(MCInst &Inst,
+ const MCSubtargetInfo &STI) const override {}
+
+ bool writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const override {
+ return false;
+ }
+};
+
+} // end anonymous namespace
+
+MCAsmBackend *llvm::createSPIRVAsmBackend(const Target &T,
+ const MCSubtargetInfo &STI,
+ const MCRegisterInfo &MRI,
+ const MCTargetOptions &) {
+ return new SPIRVAsmBackend(support::little);
+}
diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.cpp
new file mode 100644
index 000000000000..1a3e35a5f901
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.cpp
@@ -0,0 +1,1072 @@
+//===-- SPIRVBaseInfo.cpp - Top level definitions for SPIRV ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the SPIRV target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPIRVBaseInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+namespace SPIRV {
+
+#define CASE(CLASS, ATTR) \
+ case CLASS::ATTR: \
+ return #ATTR;
+#define CASE_SUF(CLASS, SF, ATTR) \
+ case CLASS::SF##_##ATTR: \
+ return #ATTR;
+
+// Implement getEnumName(Enum e) helper functions.
+// TODO: re-implement all the functions using TableGen.
+StringRef getCapabilityName(Capability e) {
+ switch (e) {
+ CASE(Capability, Matrix)
+ CASE(Capability, Shader)
+ CASE(Capability, Geometry)
+ CASE(Capability, Tessellation)
+ CASE(Capability, Addresses)
+ CASE(Capability, Linkage)
+ CASE(Capability, Kernel)
+ CASE(Capability, Vector16)
+ CASE(Capability, Float16Buffer)
+ CASE(Capability, Float16)
+ CASE(Capability, Float64)
+ CASE(Capability, Int64)
+ CASE(Capability, Int64Atomics)
+ CASE(Capability, ImageBasic)
+ CASE(Capability, ImageReadWrite)
+ CASE(Capability, ImageMipmap)
+ CASE(Capability, Pipes)
+ CASE(Capability, Groups)
+ CASE(Capability, DeviceEnqueue)
+ CASE(Capability, LiteralSampler)
+ CASE(Capability, AtomicStorage)
+ CASE(Capability, Int16)
+ CASE(Capability, TessellationPointSize)
+ CASE(Capability, GeometryPointSize)
+ CASE(Capability, ImageGatherExtended)
+ CASE(Capability, StorageImageMultisample)
+ CASE(Capability, UniformBufferArrayDynamicIndexing)
+ CASE(Capability, SampledImageArrayDymnamicIndexing)
+ CASE(Capability, ClipDistance)
+ CASE(Capability, CullDistance)
+ CASE(Capability, ImageCubeArray)
+ CASE(Capability, SampleRateShading)
+ CASE(Capability, ImageRect)
+ CASE(Capability, SampledRect)
+ CASE(Capability, GenericPointer)
+ CASE(Capability, Int8)
+ CASE(Capability, InputAttachment)
+ CASE(Capability, SparseResidency)
+ CASE(Capability, MinLod)
+ CASE(Capability, Sampled1D)
+ CASE(Capability, Image1D)
+ CASE(Capability, SampledCubeArray)
+ CASE(Capability, SampledBuffer)
+ CASE(Capability, ImageBuffer)
+ CASE(Capability, ImageMSArray)
+ CASE(Capability, StorageImageExtendedFormats)
+ CASE(Capability, ImageQuery)
+ CASE(Capability, DerivativeControl)
+ CASE(Capability, InterpolationFunction)
+ CASE(Capability, TransformFeedback)
+ CASE(Capability, GeometryStreams)
+ CASE(Capability, StorageImageReadWithoutFormat)
+ CASE(Capability, StorageImageWriteWithoutFormat)
+ CASE(Capability, MultiViewport)
+ CASE(Capability, SubgroupDispatch)
+ CASE(Capability, NamedBarrier)
+ CASE(Capability, PipeStorage)
+ CASE(Capability, GroupNonUniform)
+ CASE(Capability, GroupNonUniformVote)
+ CASE(Capability, GroupNonUniformArithmetic)
+ CASE(Capability, GroupNonUniformBallot)
+ CASE(Capability, GroupNonUniformShuffle)
+ CASE(Capability, GroupNonUniformShuffleRelative)
+ CASE(Capability, GroupNonUniformClustered)
+ CASE(Capability, GroupNonUniformQuad)
+ CASE(Capability, SubgroupBallotKHR)
+ CASE(Capability, DrawParameters)
+ CASE(Capability, SubgroupVoteKHR)
+ CASE(Capability, StorageBuffer16BitAccess)
+ CASE(Capability, StorageUniform16)
+ CASE(Capability, StoragePushConstant16)
+ CASE(Capability, StorageInputOutput16)
+ CASE(Capability, DeviceGroup)
+ CASE(Capability, MultiView)
+ CASE(Capability, VariablePointersStorageBuffer)
+ CASE(Capability, VariablePointers)
+ CASE(Capability, AtomicStorageOps)
+ CASE(Capability, SampleMaskPostDepthCoverage)
+ CASE(Capability, StorageBuffer8BitAccess)
+ CASE(Capability, UniformAndStorageBuffer8BitAccess)
+ CASE(Capability, StoragePushConstant8)
+ CASE(Capability, DenormPreserve)
+ CASE(Capability, DenormFlushToZero)
+ CASE(Capability, SignedZeroInfNanPreserve)
+ CASE(Capability, RoundingModeRTE)
+ CASE(Capability, RoundingModeRTZ)
+ CASE(Capability, Float16ImageAMD)
+ CASE(Capability, ImageGatherBiasLodAMD)
+ CASE(Capability, FragmentMaskAMD)
+ CASE(Capability, StencilExportEXT)
+ CASE(Capability, ImageReadWriteLodAMD)
+ CASE(Capability, SampleMaskOverrideCoverageNV)
+ CASE(Capability, GeometryShaderPassthroughNV)
+ CASE(Capability, ShaderViewportIndexLayerEXT)
+ CASE(Capability, ShaderViewportMaskNV)
+ CASE(Capability, ShaderStereoViewNV)
+ CASE(Capability, PerViewAttributesNV)
+ CASE(Capability, FragmentFullyCoveredEXT)
+ CASE(Capability, MeshShadingNV)
+ CASE(Capability, ShaderNonUniformEXT)
+ CASE(Capability, RuntimeDescriptorArrayEXT)
+ CASE(Capability, InputAttachmentArrayDynamicIndexingEXT)
+ CASE(Capability, UniformTexelBufferArrayDynamicIndexingEXT)
+ CASE(Capability, StorageTexelBufferArrayDynamicIndexingEXT)
+ CASE(Capability, UniformBufferArrayNonUniformIndexingEXT)
+ CASE(Capability, SampledImageArrayNonUniformIndexingEXT)
+ CASE(Capability, StorageBufferArrayNonUniformIndexingEXT)
+ CASE(Capability, StorageImageArrayNonUniformIndexingEXT)
+ CASE(Capability, InputAttachmentArrayNonUniformIndexingEXT)
+ CASE(Capability, UniformTexelBufferArrayNonUniformIndexingEXT)
+ CASE(Capability, StorageTexelBufferArrayNonUniformIndexingEXT)
+ CASE(Capability, RayTracingNV)
+ CASE(Capability, SubgroupShuffleINTEL)
+ CASE(Capability, SubgroupBufferBlockIOINTEL)
+ CASE(Capability, SubgroupImageBlockIOINTEL)
+ CASE(Capability, SubgroupImageMediaBlockIOINTEL)
+ CASE(Capability, SubgroupAvcMotionEstimationINTEL)
+ CASE(Capability, SubgroupAvcMotionEstimationIntraINTEL)
+ CASE(Capability, SubgroupAvcMotionEstimationChromaINTEL)
+ CASE(Capability, GroupNonUniformPartitionedNV)
+ CASE(Capability, VulkanMemoryModelKHR)
+ CASE(Capability, VulkanMemoryModelDeviceScopeKHR)
+ CASE(Capability, ImageFootprintNV)
+ CASE(Capability, FragmentBarycentricNV)
+ CASE(Capability, ComputeDerivativeGroupQuadsNV)
+ CASE(Capability, ComputeDerivativeGroupLinearNV)
+ CASE(Capability, FragmentDensityEXT)
+ CASE(Capability, PhysicalStorageBufferAddressesEXT)
+ CASE(Capability, CooperativeMatrixNV)
+ break;
+ }
+ llvm_unreachable("Unexpected operand");
+}
+
+StringRef getSourceLanguageName(SourceLanguage e) {
+ switch (e) {
+ CASE(SourceLanguage, Unknown)
+ CASE(SourceLanguage, ESSL)
+ CASE(SourceLanguage, GLSL)
+ CASE(SourceLanguage, OpenCL_C)
+ CASE(SourceLanguage, OpenCL_CPP)
+ CASE(SourceLanguage, HLSL)
+ break;
+ }
+ llvm_unreachable("Unexpected operand");
+}
+
+StringRef getExecutionModelName(ExecutionModel e) {
+ switch (e) {
+ CASE(ExecutionModel, Vertex)
+ CASE(ExecutionModel, TessellationControl)
+ CASE(ExecutionModel, TessellationEvaluation)
+ CASE(ExecutionModel, Geometry)
+ CASE(ExecutionModel, Fragment)
+ CASE(ExecutionModel, GLCompute)
+ CASE(ExecutionModel, Kernel)
+ CASE(ExecutionModel, TaskNV)
+ CASE(ExecutionModel, MeshNV)
+ CASE(ExecutionModel, RayGenerationNV)
+ CASE(ExecutionModel, IntersectionNV)
+ CASE(ExecutionModel, AnyHitNV)
+ CASE(ExecutionModel, ClosestHitNV)
+ CASE(ExecutionModel, MissNV)
+ CASE(ExecutionModel, CallableNV)
+ break;
+ }
+ llvm_unreachable("Unexpected operand");
+}
+
+StringRef getAddressingModelName(AddressingModel e) {
+ switch (e) {
+ CASE(AddressingModel, Logical)
+ CASE(AddressingModel, Physical32)
+ CASE(AddressingModel, Physical64)
+ CASE(AddressingModel, PhysicalStorageBuffer64EXT)
+ break;
+ }
+ llvm_unreachable("Unexpected operand");
+}
+
+StringRef getMemoryModelName(MemoryModel e) {
+ switch (e) {
+ CASE(MemoryModel, Simple)
+ CASE(MemoryModel, GLSL450)
+ CASE(MemoryModel, OpenCL)
+ CASE(MemoryModel, VulkanKHR)
+ break;
+ }
+ llvm_unreachable("Unexpected operand");
+}
+
+StringRef getExecutionModeName(ExecutionMode e) {
+ switch (e) {
+ CASE(ExecutionMode, Invocations)
+ CASE(ExecutionMode, SpacingEqual)
+ CASE(ExecutionMode, SpacingFractionalEven)
+ CASE(ExecutionMode, SpacingFractionalOdd)
+ CASE(ExecutionMode, VertexOrderCw)
+ CASE(ExecutionMode, VertexOrderCcw)
+ CASE(ExecutionMode, PixelCenterInteger)
+ CASE(ExecutionMode, OriginUpperLeft)
+ CASE(ExecutionMode, OriginLowerLeft)
+ CASE(ExecutionMode, EarlyFragmentTests)
+ CASE(ExecutionMode, PointMode)
+ CASE(ExecutionMode, Xfb)
+ CASE(ExecutionMode, DepthReplacing)
+ CASE(ExecutionMode, DepthGreater)
+ CASE(ExecutionMode, DepthLess)
+ CASE(ExecutionMode, DepthUnchanged)
+ CASE(ExecutionMode, LocalSize)
+ CASE(ExecutionMode, LocalSizeHint)
+ CASE(ExecutionMode, InputPoints)
+ CASE(ExecutionMode, InputLines)
+ CASE(ExecutionMode, InputLinesAdjacency)
+ CASE(ExecutionMode, Triangles)
+ CASE(ExecutionMode, InputTrianglesAdjacency)
+ CASE(ExecutionMode, Quads)
+ CASE(ExecutionMode, Isolines)
+ CASE(ExecutionMode, OutputVertices)
+ CASE(ExecutionMode, OutputPoints)
+ CASE(ExecutionMode, OutputLineStrip)
+ CASE(ExecutionMode, OutputTriangleStrip)
+ CASE(ExecutionMode, VecTypeHint)
+ CASE(ExecutionMode, ContractionOff)
+ CASE(ExecutionMode, Initializer)
+ CASE(ExecutionMode, Finalizer)
+ CASE(ExecutionMode, SubgroupSize)
+ CASE(ExecutionMode, SubgroupsPerWorkgroup)
+ CASE(ExecutionMode, SubgroupsPerWorkgroupId)
+ CASE(ExecutionMode, LocalSizeId)
+ CASE(ExecutionMode, LocalSizeHintId)
+ CASE(ExecutionMode, PostDepthCoverage)
+ CASE(ExecutionMode, DenormPreserve)
+ CASE(ExecutionMode, DenormFlushToZero)
+ CASE(ExecutionMode, SignedZeroInfNanPreserve)
+ CASE(ExecutionMode, RoundingModeRTE)
+ CASE(ExecutionMode, RoundingModeRTZ)
+ CASE(ExecutionMode, StencilRefReplacingEXT)
+ CASE(ExecutionMode, OutputLinesNV)
+ CASE(ExecutionMode, DerivativeGroupQuadsNV)
+ CASE(ExecutionMode, DerivativeGroupLinearNV)
+ CASE(ExecutionMode, OutputTrianglesNV)
+ break;
+ }
+ llvm_unreachable("Unexpected operand");
+}
+
+StringRef getStorageClassName(StorageClass e) {
+ switch (e) {
+ CASE(StorageClass, UniformConstant)
+ CASE(StorageClass, Input)
+ CASE(StorageClass, Uniform)
+ CASE(StorageClass, Output)
+ CASE(StorageClass, Workgroup)
+ CASE(StorageClass, CrossWorkgroup)
+ CASE(StorageClass, Private)
+ CASE(StorageClass, Function)
+ CASE(StorageClass, Generic)
+ CASE(StorageClass, PushConstant)
+ CASE(StorageClass, AtomicCounter)
+ CASE(StorageClass, Image)
+ CASE(StorageClass, StorageBuffer)
+ CASE(StorageClass, CallableDataNV)
+ CASE(StorageClass, IncomingCallableDataNV)
+ CASE(StorageClass, RayPayloadNV)
+ CASE(StorageClass, HitAttributeNV)
+ CASE(StorageClass, IncomingRayPayloadNV)
+ CASE(StorageClass, ShaderRecordBufferNV)
+ CASE(StorageClass, PhysicalStorageBufferEXT)
+ break;
+ }
+ llvm_unreachable("Unexpected operand");
+}
+
+StringRef getDimName(Dim dim) {
+ switch (dim) {
+ CASE_SUF(Dim, DIM, 1D)
+ CASE_SUF(Dim, DIM, 2D)
+ CASE_SUF(Dim, DIM, 3D)
+ CASE_SUF(Dim, DIM, Cube)
+ CASE_SUF(Dim, DIM, Rect)
+ CASE_SUF(Dim, DIM, Buffer)
+ CASE_SUF(Dim, DIM, SubpassData)
+ break;
+ }
+ llvm_unreachable("Unexpected operand");
+}
+
+StringRef getSamplerAddressingModeName(SamplerAddressingMode e) {
+ switch (e) {
+ CASE(SamplerAddressingMode, None)
+ CASE(SamplerAddressingMode, ClampToEdge)
+ CASE(SamplerAddressingMode, Clamp)
+ CASE(SamplerAddressingMode, Repeat)
+ CASE(SamplerAddressingMode, RepeatMirrored)
+ break;
+ }
+ llvm_unreachable("Unexpected operand");
+}
+
+StringRef getSamplerFilterModeName(SamplerFilterMode e) {
+ switch (e) {
+ CASE(SamplerFilterMode, Nearest)
+ CASE(SamplerFilterMode, Linear)
+ break;
+ }
+ llvm_unreachable("Unexpected operand");
+}
+
+StringRef getImageFormatName(ImageFormat e) {
+ switch (e) {
+ CASE(ImageFormat, Unknown)
+ CASE(ImageFormat, Rgba32f)
+ CASE(ImageFormat, Rgba16f)
+ CASE(ImageFormat, R32f)
+ CASE(ImageFormat, Rgba8)
+ CASE(ImageFormat, Rgba8Snorm)
+ CASE(ImageFormat, Rg32f)
+ CASE(ImageFormat, Rg16f)
+ CASE(ImageFormat, R11fG11fB10f)
+ CASE(ImageFormat, R16f)
+ CASE(ImageFormat, Rgba16)
+ CASE(ImageFormat, Rgb10A2)
+ CASE(ImageFormat, Rg16)
+ CASE(ImageFormat, Rg8)
+ CASE(ImageFormat, R16)
+ CASE(ImageFormat, R8)
+ CASE(ImageFormat, Rgba16Snorm)
+ CASE(ImageFormat, Rg16Snorm)
+ CASE(ImageFormat, Rg8Snorm)
+ CASE(ImageFormat, R16Snorm)
+ CASE(ImageFormat, R8Snorm)
+ CASE(ImageFormat, Rgba32i)
+ CASE(ImageFormat, Rgba16i)
+ CASE(ImageFormat, Rgba8i)
+ CASE(ImageFormat, R32i)
+ CASE(ImageFormat, Rg32i)
+ CASE(ImageFormat, Rg16i)
+ CASE(ImageFormat, Rg8i)
+ CASE(ImageFormat, R16i)
+ CASE(ImageFormat, R8i)
+ CASE(ImageFormat, Rgba32ui)
+ CASE(ImageFormat, Rgba16ui)
+ CASE(ImageFormat, Rgba8ui)
+ CASE(ImageFormat, R32ui)
+ CASE(ImageFormat, Rgb10a2ui)
+ CASE(ImageFormat, Rg32ui)
+ CASE(ImageFormat, Rg16ui)
+ CASE(ImageFormat, Rg8ui)
+ CASE(ImageFormat, R16ui)
+ CASE(ImageFormat, R8ui)
+ break;
+ }
+ llvm_unreachable("Unexpected operand");
+}
+
+StringRef getImageChannelOrderName(ImageChannelOrder e) {
+ switch (e) {
+ CASE(ImageChannelOrder, R)
+ CASE(ImageChannelOrder, A)
+ CASE(ImageChannelOrder, RG)
+ CASE(ImageChannelOrder, RA)
+ CASE(ImageChannelOrder, RGB)
+ CASE(ImageChannelOrder, RGBA)
+ CASE(ImageChannelOrder, BGRA)
+ CASE(ImageChannelOrder, ARGB)
+ CASE(ImageChannelOrder, Intensity)
+ CASE(ImageChannelOrder, Luminance)
+ CASE(ImageChannelOrder, Rx)
+ CASE(ImageChannelOrder, RGx)
+ CASE(ImageChannelOrder, RGBx)
+ CASE(ImageChannelOrder, Depth)
+ CASE(ImageChannelOrder, DepthStencil)
+ CASE(ImageChannelOrder, sRGB)
+ CASE(ImageChannelOrder, sRGBx)
+ CASE(ImageChannelOrder, sRGBA)
+ CASE(ImageChannelOrder, sBGRA)
+ CASE(ImageChannelOrder, ABGR)
+ break;
+ }
+ llvm_unreachable("Unexpected operand");
+}
+
+StringRef getImageChannelDataTypeName(ImageChannelDataType e) {
+ switch (e) {
+ CASE(ImageChannelDataType, SnormInt8)
+ CASE(ImageChannelDataType, SnormInt16)
+ CASE(ImageChannelDataType, UnormInt8)
+ CASE(ImageChannelDataType, UnormInt16)
+ CASE(ImageChannelDataType, UnormShort565)
+ CASE(ImageChannelDataType, UnormShort555)
+ CASE(ImageChannelDataType, UnormInt101010)
+ CASE(ImageChannelDataType, SignedInt8)
+ CASE(ImageChannelDataType, SignedInt16)
+ CASE(ImageChannelDataType, SignedInt32)
+ CASE(ImageChannelDataType, UnsignedInt8)
+ CASE(ImageChannelDataType, UnsignedInt16)
+ CASE(ImageChannelDataType, UnsigendInt32)
+ CASE(ImageChannelDataType, HalfFloat)
+ CASE(ImageChannelDataType, Float)
+ CASE(ImageChannelDataType, UnormInt24)
+ CASE(ImageChannelDataType, UnormInt101010_2)
+ break;
+ }
+ llvm_unreachable("Unexpected operand");
+}
+
+std::string getImageOperandName(uint32_t e) {
+ std::string nameString = "";
+ std::string sep = "";
+ if (e == static_cast<uint32_t>(ImageOperand::None))
+ return "None";
+ if (e == static_cast<uint32_t>(ImageOperand::Bias))
+ return "Bias";
+ if (e & static_cast<uint32_t>(ImageOperand::Bias)) {
+ nameString += sep + "Bias";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(ImageOperand::Lod))
+ return "Lod";
+ if (e & static_cast<uint32_t>(ImageOperand::Lod)) {
+ nameString += sep + "Lod";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(ImageOperand::Grad))
+ return "Grad";
+ if (e & static_cast<uint32_t>(ImageOperand::Grad)) {
+ nameString += sep + "Grad";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(ImageOperand::ConstOffset))
+ return "ConstOffset";
+ if (e & static_cast<uint32_t>(ImageOperand::ConstOffset)) {
+ nameString += sep + "ConstOffset";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(ImageOperand::Offset))
+ return "Offset";
+ if (e & static_cast<uint32_t>(ImageOperand::Offset)) {
+ nameString += sep + "Offset";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(ImageOperand::ConstOffsets))
+ return "ConstOffsets";
+ if (e & static_cast<uint32_t>(ImageOperand::ConstOffsets)) {
+ nameString += sep + "ConstOffsets";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(ImageOperand::Sample))
+ return "Sample";
+ if (e & static_cast<uint32_t>(ImageOperand::Sample)) {
+ nameString += sep + "Sample";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(ImageOperand::MinLod))
+ return "MinLod";
+ if (e & static_cast<uint32_t>(ImageOperand::MinLod)) {
+ nameString += sep + "MinLod";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(ImageOperand::MakeTexelAvailableKHR))
+ return "MakeTexelAvailableKHR";
+ if (e & static_cast<uint32_t>(ImageOperand::MakeTexelAvailableKHR)) {
+ nameString += sep + "MakeTexelAvailableKHR";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(ImageOperand::MakeTexelVisibleKHR))
+ return "MakeTexelVisibleKHR";
+ if (e & static_cast<uint32_t>(ImageOperand::MakeTexelVisibleKHR)) {
+ nameString += sep + "MakeTexelVisibleKHR";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(ImageOperand::NonPrivateTexelKHR))
+ return "NonPrivateTexelKHR";
+ if (e & static_cast<uint32_t>(ImageOperand::NonPrivateTexelKHR)) {
+ nameString += sep + "NonPrivateTexelKHR";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(ImageOperand::VolatileTexelKHR))
+ return "VolatileTexelKHR";
+ if (e & static_cast<uint32_t>(ImageOperand::VolatileTexelKHR)) {
+ nameString += sep + "VolatileTexelKHR";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(ImageOperand::SignExtend))
+ return "SignExtend";
+ if (e & static_cast<uint32_t>(ImageOperand::SignExtend)) {
+ nameString += sep + "SignExtend";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(ImageOperand::ZeroExtend))
+ return "ZeroExtend";
+ if (e & static_cast<uint32_t>(ImageOperand::ZeroExtend)) {
+ nameString += sep + "ZeroExtend";
+ sep = "|";
+ };
+ return nameString;
+}
+
+std::string getFPFastMathModeName(uint32_t e) {
+ std::string nameString = "";
+ std::string sep = "";
+ if (e == static_cast<uint32_t>(FPFastMathMode::None))
+ return "None";
+ if (e == static_cast<uint32_t>(FPFastMathMode::NotNaN))
+ return "NotNaN";
+ if (e & static_cast<uint32_t>(FPFastMathMode::NotNaN)) {
+ nameString += sep + "NotNaN";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(FPFastMathMode::NotInf))
+ return "NotInf";
+ if (e & static_cast<uint32_t>(FPFastMathMode::NotInf)) {
+ nameString += sep + "NotInf";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(FPFastMathMode::NSZ))
+ return "NSZ";
+ if (e & static_cast<uint32_t>(FPFastMathMode::NSZ)) {
+ nameString += sep + "NSZ";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(FPFastMathMode::AllowRecip))
+ return "AllowRecip";
+ if (e & static_cast<uint32_t>(FPFastMathMode::AllowRecip)) {
+ nameString += sep + "AllowRecip";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(FPFastMathMode::Fast))
+ return "Fast";
+ if (e & static_cast<uint32_t>(FPFastMathMode::Fast)) {
+ nameString += sep + "Fast";
+ sep = "|";
+ };
+ return nameString;
+}
+
+StringRef getFPRoundingModeName(FPRoundingMode e) {
+ switch (e) {
+ CASE(FPRoundingMode, RTE)
+ CASE(FPRoundingMode, RTZ)
+ CASE(FPRoundingMode, RTP)
+ CASE(FPRoundingMode, RTN)
+ break;
+ }
+ llvm_unreachable("Unexpected operand");
+}
+
+StringRef getLinkageTypeName(LinkageType e) {
+ switch (e) {
+ CASE(LinkageType, Export)
+ CASE(LinkageType, Import)
+ break;
+ }
+ llvm_unreachable("Unexpected operand");
+}
+
+StringRef getAccessQualifierName(AccessQualifier e) {
+ switch (e) {
+ CASE(AccessQualifier, ReadOnly)
+ CASE(AccessQualifier, WriteOnly)
+ CASE(AccessQualifier, ReadWrite)
+ break;
+ }
+ llvm_unreachable("Unexpected operand");
+}
+
+StringRef getFunctionParameterAttributeName(FunctionParameterAttribute e) {
+ switch (e) {
+ CASE(FunctionParameterAttribute, Zext)
+ CASE(FunctionParameterAttribute, Sext)
+ CASE(FunctionParameterAttribute, ByVal)
+ CASE(FunctionParameterAttribute, Sret)
+ CASE(FunctionParameterAttribute, NoAlias)
+ CASE(FunctionParameterAttribute, NoCapture)
+ CASE(FunctionParameterAttribute, NoWrite)
+ CASE(FunctionParameterAttribute, NoReadWrite)
+ break;
+ }
+ llvm_unreachable("Unexpected operand");
+}
+
+StringRef getDecorationName(Decoration e) {
+ switch (e) {
+ CASE(Decoration, RelaxedPrecision)
+ CASE(Decoration, SpecId)
+ CASE(Decoration, Block)
+ CASE(Decoration, BufferBlock)
+ CASE(Decoration, RowMajor)
+ CASE(Decoration, ColMajor)
+ CASE(Decoration, ArrayStride)
+ CASE(Decoration, MatrixStride)
+ CASE(Decoration, GLSLShared)
+ CASE(Decoration, GLSLPacked)
+ CASE(Decoration, CPacked)
+ CASE(Decoration, BuiltIn)
+ CASE(Decoration, NoPerspective)
+ CASE(Decoration, Flat)
+ CASE(Decoration, Patch)
+ CASE(Decoration, Centroid)
+ CASE(Decoration, Sample)
+ CASE(Decoration, Invariant)
+ CASE(Decoration, Restrict)
+ CASE(Decoration, Aliased)
+ CASE(Decoration, Volatile)
+ CASE(Decoration, Constant)
+ CASE(Decoration, Coherent)
+ CASE(Decoration, NonWritable)
+ CASE(Decoration, NonReadable)
+ CASE(Decoration, Uniform)
+ CASE(Decoration, UniformId)
+ CASE(Decoration, SaturatedConversion)
+ CASE(Decoration, Stream)
+ CASE(Decoration, Location)
+ CASE(Decoration, Component)
+ CASE(Decoration, Index)
+ CASE(Decoration, Binding)
+ CASE(Decoration, DescriptorSet)
+ CASE(Decoration, Offset)
+ CASE(Decoration, XfbBuffer)
+ CASE(Decoration, XfbStride)
+ CASE(Decoration, FuncParamAttr)
+ CASE(Decoration, FPRoundingMode)
+ CASE(Decoration, FPFastMathMode)
+ CASE(Decoration, LinkageAttributes)
+ CASE(Decoration, NoContraction)
+ CASE(Decoration, InputAttachmentIndex)
+ CASE(Decoration, Alignment)
+ CASE(Decoration, MaxByteOffset)
+ CASE(Decoration, AlignmentId)
+ CASE(Decoration, MaxByteOffsetId)
+ CASE(Decoration, NoSignedWrap)
+ CASE(Decoration, NoUnsignedWrap)
+ CASE(Decoration, ExplicitInterpAMD)
+ CASE(Decoration, OverrideCoverageNV)
+ CASE(Decoration, PassthroughNV)
+ CASE(Decoration, ViewportRelativeNV)
+ CASE(Decoration, SecondaryViewportRelativeNV)
+ CASE(Decoration, PerPrimitiveNV)
+ CASE(Decoration, PerViewNV)
+ CASE(Decoration, PerVertexNV)
+ CASE(Decoration, NonUniformEXT)
+ CASE(Decoration, CountBuffer)
+ CASE(Decoration, UserSemantic)
+ CASE(Decoration, RestrictPointerEXT)
+ CASE(Decoration, AliasedPointerEXT)
+ break;
+ }
+ llvm_unreachable("Unexpected operand");
+}
+
+StringRef getBuiltInName(BuiltIn e) {
+ switch (e) {
+ CASE(BuiltIn, Position)
+ CASE(BuiltIn, PointSize)
+ CASE(BuiltIn, ClipDistance)
+ CASE(BuiltIn, CullDistance)
+ CASE(BuiltIn, VertexId)
+ CASE(BuiltIn, InstanceId)
+ CASE(BuiltIn, PrimitiveId)
+ CASE(BuiltIn, InvocationId)
+ CASE(BuiltIn, Layer)
+ CASE(BuiltIn, ViewportIndex)
+ CASE(BuiltIn, TessLevelOuter)
+ CASE(BuiltIn, TessLevelInner)
+ CASE(BuiltIn, TessCoord)
+ CASE(BuiltIn, PatchVertices)
+ CASE(BuiltIn, FragCoord)
+ CASE(BuiltIn, PointCoord)
+ CASE(BuiltIn, FrontFacing)
+ CASE(BuiltIn, SampleId)
+ CASE(BuiltIn, SamplePosition)
+ CASE(BuiltIn, SampleMask)
+ CASE(BuiltIn, FragDepth)
+ CASE(BuiltIn, HelperInvocation)
+ CASE(BuiltIn, NumWorkgroups)
+ CASE(BuiltIn, WorkgroupSize)
+ CASE(BuiltIn, WorkgroupId)
+ CASE(BuiltIn, LocalInvocationId)
+ CASE(BuiltIn, GlobalInvocationId)
+ CASE(BuiltIn, LocalInvocationIndex)
+ CASE(BuiltIn, WorkDim)
+ CASE(BuiltIn, GlobalSize)
+ CASE(BuiltIn, EnqueuedWorkgroupSize)
+ CASE(BuiltIn, GlobalOffset)
+ CASE(BuiltIn, GlobalLinearId)
+ CASE(BuiltIn, SubgroupSize)
+ CASE(BuiltIn, SubgroupMaxSize)
+ CASE(BuiltIn, NumSubgroups)
+ CASE(BuiltIn, NumEnqueuedSubgroups)
+ CASE(BuiltIn, SubgroupId)
+ CASE(BuiltIn, SubgroupLocalInvocationId)
+ CASE(BuiltIn, VertexIndex)
+ CASE(BuiltIn, InstanceIndex)
+ CASE(BuiltIn, SubgroupEqMask)
+ CASE(BuiltIn, SubgroupGeMask)
+ CASE(BuiltIn, SubgroupGtMask)
+ CASE(BuiltIn, SubgroupLeMask)
+ CASE(BuiltIn, SubgroupLtMask)
+ CASE(BuiltIn, BaseVertex)
+ CASE(BuiltIn, BaseInstance)
+ CASE(BuiltIn, DrawIndex)
+ CASE(BuiltIn, DeviceIndex)
+ CASE(BuiltIn, ViewIndex)
+ CASE(BuiltIn, BaryCoordNoPerspAMD)
+ CASE(BuiltIn, BaryCoordNoPerspCentroidAMD)
+ CASE(BuiltIn, BaryCoordNoPerspSampleAMD)
+ CASE(BuiltIn, BaryCoordSmoothAMD)
+ CASE(BuiltIn, BaryCoordSmoothCentroid)
+ CASE(BuiltIn, BaryCoordSmoothSample)
+ CASE(BuiltIn, BaryCoordPullModel)
+ CASE(BuiltIn, FragStencilRefEXT)
+ CASE(BuiltIn, ViewportMaskNV)
+ CASE(BuiltIn, SecondaryPositionNV)
+ CASE(BuiltIn, SecondaryViewportMaskNV)
+ CASE(BuiltIn, PositionPerViewNV)
+ CASE(BuiltIn, ViewportMaskPerViewNV)
+ CASE(BuiltIn, FullyCoveredEXT)
+ CASE(BuiltIn, TaskCountNV)
+ CASE(BuiltIn, PrimitiveCountNV)
+ CASE(BuiltIn, PrimitiveIndicesNV)
+ CASE(BuiltIn, ClipDistancePerViewNV)
+ CASE(BuiltIn, CullDistancePerViewNV)
+ CASE(BuiltIn, LayerPerViewNV)
+ CASE(BuiltIn, MeshViewCountNV)
+ CASE(BuiltIn, MeshViewIndices)
+ CASE(BuiltIn, BaryCoordNV)
+ CASE(BuiltIn, BaryCoordNoPerspNV)
+ CASE(BuiltIn, FragSizeEXT)
+ CASE(BuiltIn, FragInvocationCountEXT)
+ CASE(BuiltIn, LaunchIdNV)
+ CASE(BuiltIn, LaunchSizeNV)
+ CASE(BuiltIn, WorldRayOriginNV)
+ CASE(BuiltIn, WorldRayDirectionNV)
+ CASE(BuiltIn, ObjectRayOriginNV)
+ CASE(BuiltIn, ObjectRayDirectionNV)
+ CASE(BuiltIn, RayTminNV)
+ CASE(BuiltIn, RayTmaxNV)
+ CASE(BuiltIn, InstanceCustomIndexNV)
+ CASE(BuiltIn, ObjectToWorldNV)
+ CASE(BuiltIn, WorldToObjectNV)
+ CASE(BuiltIn, HitTNV)
+ CASE(BuiltIn, HitKindNV)
+ CASE(BuiltIn, IncomingRayFlagsNV)
+ break;
+ }
+ llvm_unreachable("Unexpected operand");
+}
+
+std::string getSelectionControlName(uint32_t e) {
+ std::string nameString = "";
+ std::string sep = "";
+ if (e == static_cast<uint32_t>(SelectionControl::None))
+ return "None";
+ if (e == static_cast<uint32_t>(SelectionControl::Flatten))
+ return "Flatten";
+ if (e & static_cast<uint32_t>(SelectionControl::Flatten)) {
+ nameString += sep + "Flatten";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(SelectionControl::DontFlatten))
+ return "DontFlatten";
+ if (e & static_cast<uint32_t>(SelectionControl::DontFlatten)) {
+ nameString += sep + "DontFlatten";
+ sep = "|";
+ };
+ return nameString;
+}
+
+std::string getLoopControlName(uint32_t e) {
+ std::string nameString = "";
+ std::string sep = "";
+ if (e == static_cast<uint32_t>(LoopControl::None))
+ return "None";
+ if (e == static_cast<uint32_t>(LoopControl::Unroll))
+ return "Unroll";
+ if (e & static_cast<uint32_t>(LoopControl::Unroll)) {
+ nameString += sep + "Unroll";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(LoopControl::DontUnroll))
+ return "DontUnroll";
+ if (e & static_cast<uint32_t>(LoopControl::DontUnroll)) {
+ nameString += sep + "DontUnroll";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(LoopControl::DependencyInfinite))
+ return "DependencyInfinite";
+ if (e & static_cast<uint32_t>(LoopControl::DependencyInfinite)) {
+ nameString += sep + "DependencyInfinite";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(LoopControl::DependencyLength))
+ return "DependencyLength";
+ if (e & static_cast<uint32_t>(LoopControl::DependencyLength)) {
+ nameString += sep + "DependencyLength";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(LoopControl::MinIterations))
+ return "MinIterations";
+ if (e & static_cast<uint32_t>(LoopControl::MinIterations)) {
+ nameString += sep + "MinIterations";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(LoopControl::MaxIterations))
+ return "MaxIterations";
+ if (e & static_cast<uint32_t>(LoopControl::MaxIterations)) {
+ nameString += sep + "MaxIterations";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(LoopControl::IterationMultiple))
+ return "IterationMultiple";
+ if (e & static_cast<uint32_t>(LoopControl::IterationMultiple)) {
+ nameString += sep + "IterationMultiple";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(LoopControl::PeelCount))
+ return "PeelCount";
+ if (e & static_cast<uint32_t>(LoopControl::PeelCount)) {
+ nameString += sep + "PeelCount";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(LoopControl::PartialCount))
+ return "PartialCount";
+ if (e & static_cast<uint32_t>(LoopControl::PartialCount)) {
+ nameString += sep + "PartialCount";
+ sep = "|";
+ };
+ return nameString;
+}
+
+std::string getFunctionControlName(uint32_t e) {
+ std::string nameString = "";
+ std::string sep = "";
+ if (e == static_cast<uint32_t>(FunctionControl::None))
+ return "None";
+ if (e == static_cast<uint32_t>(FunctionControl::Inline))
+ return "Inline";
+ if (e & static_cast<uint32_t>(FunctionControl::Inline)) {
+ nameString += sep + "Inline";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(FunctionControl::DontInline))
+ return "DontInline";
+ if (e & static_cast<uint32_t>(FunctionControl::DontInline)) {
+ nameString += sep + "DontInline";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(FunctionControl::Pure))
+ return "Pure";
+ if (e & static_cast<uint32_t>(FunctionControl::Pure)) {
+ nameString += sep + "Pure";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(FunctionControl::Const))
+ return "Const";
+ if (e & static_cast<uint32_t>(FunctionControl::Const)) {
+ nameString += sep + "Const";
+ sep = "|";
+ };
+ return nameString;
+}
+
+std::string getMemorySemanticsName(uint32_t e) {
+ std::string nameString = "";
+ std::string sep = "";
+ if (e == static_cast<uint32_t>(MemorySemantics::None))
+ return "None";
+ if (e == static_cast<uint32_t>(MemorySemantics::Acquire))
+ return "Acquire";
+ if (e & static_cast<uint32_t>(MemorySemantics::Acquire)) {
+ nameString += sep + "Acquire";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(MemorySemantics::Release))
+ return "Release";
+ if (e & static_cast<uint32_t>(MemorySemantics::Release)) {
+ nameString += sep + "Release";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(MemorySemantics::AcquireRelease))
+ return "AcquireRelease";
+ if (e & static_cast<uint32_t>(MemorySemantics::AcquireRelease)) {
+ nameString += sep + "AcquireRelease";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(MemorySemantics::SequentiallyConsistent))
+ return "SequentiallyConsistent";
+ if (e & static_cast<uint32_t>(MemorySemantics::SequentiallyConsistent)) {
+ nameString += sep + "SequentiallyConsistent";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(MemorySemantics::UniformMemory))
+ return "UniformMemory";
+ if (e & static_cast<uint32_t>(MemorySemantics::UniformMemory)) {
+ nameString += sep + "UniformMemory";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(MemorySemantics::SubgroupMemory))
+ return "SubgroupMemory";
+ if (e & static_cast<uint32_t>(MemorySemantics::SubgroupMemory)) {
+ nameString += sep + "SubgroupMemory";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(MemorySemantics::WorkgroupMemory))
+ return "WorkgroupMemory";
+ if (e & static_cast<uint32_t>(MemorySemantics::WorkgroupMemory)) {
+ nameString += sep + "WorkgroupMemory";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(MemorySemantics::CrossWorkgroupMemory))
+ return "CrossWorkgroupMemory";
+ if (e & static_cast<uint32_t>(MemorySemantics::CrossWorkgroupMemory)) {
+ nameString += sep + "CrossWorkgroupMemory";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(MemorySemantics::AtomicCounterMemory))
+ return "AtomicCounterMemory";
+ if (e & static_cast<uint32_t>(MemorySemantics::AtomicCounterMemory)) {
+ nameString += sep + "AtomicCounterMemory";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(MemorySemantics::ImageMemory))
+ return "ImageMemory";
+ if (e & static_cast<uint32_t>(MemorySemantics::ImageMemory)) {
+ nameString += sep + "ImageMemory";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(MemorySemantics::OutputMemoryKHR))
+ return "OutputMemoryKHR";
+ if (e & static_cast<uint32_t>(MemorySemantics::OutputMemoryKHR)) {
+ nameString += sep + "OutputMemoryKHR";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(MemorySemantics::MakeAvailableKHR))
+ return "MakeAvailableKHR";
+ if (e & static_cast<uint32_t>(MemorySemantics::MakeAvailableKHR)) {
+ nameString += sep + "MakeAvailableKHR";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(MemorySemantics::MakeVisibleKHR))
+ return "MakeVisibleKHR";
+ if (e & static_cast<uint32_t>(MemorySemantics::MakeVisibleKHR)) {
+ nameString += sep + "MakeVisibleKHR";
+ sep = "|";
+ };
+ return nameString;
+}
+
+std::string getMemoryOperandName(uint32_t e) {
+ std::string nameString = "";
+ std::string sep = "";
+ if (e == static_cast<uint32_t>(MemoryOperand::None))
+ return "None";
+ if (e == static_cast<uint32_t>(MemoryOperand::Volatile))
+ return "Volatile";
+ if (e & static_cast<uint32_t>(MemoryOperand::Volatile)) {
+ nameString += sep + "Volatile";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(MemoryOperand::Aligned))
+ return "Aligned";
+ if (e & static_cast<uint32_t>(MemoryOperand::Aligned)) {
+ nameString += sep + "Aligned";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(MemoryOperand::Nontemporal))
+ return "Nontemporal";
+ if (e & static_cast<uint32_t>(MemoryOperand::Nontemporal)) {
+ nameString += sep + "Nontemporal";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(MemoryOperand::MakePointerAvailableKHR))
+ return "MakePointerAvailableKHR";
+ if (e & static_cast<uint32_t>(MemoryOperand::MakePointerAvailableKHR)) {
+ nameString += sep + "MakePointerAvailableKHR";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(MemoryOperand::MakePointerVisibleKHR))
+ return "MakePointerVisibleKHR";
+ if (e & static_cast<uint32_t>(MemoryOperand::MakePointerVisibleKHR)) {
+ nameString += sep + "MakePointerVisibleKHR";
+ sep = "|";
+ }
+ if (e == static_cast<uint32_t>(MemoryOperand::NonPrivatePointerKHR))
+ return "NonPrivatePointerKHR";
+ if (e & static_cast<uint32_t>(MemoryOperand::NonPrivatePointerKHR)) {
+ nameString += sep + "NonPrivatePointerKHR";
+ sep = "|";
+ };
+ return nameString;
+}
+
+StringRef getScopeName(Scope e) {
+ switch (e) {
+ CASE(Scope, CrossDevice)
+ CASE(Scope, Device)
+ CASE(Scope, Workgroup)
+ CASE(Scope, Subgroup)
+ CASE(Scope, Invocation)
+ CASE(Scope, QueueFamilyKHR)
+ break;
+ }
+ llvm_unreachable("Unexpected operand");
+}
+
+StringRef getGroupOperationName(GroupOperation e) {
+ switch (e) {
+ CASE(GroupOperation, Reduce)
+ CASE(GroupOperation, InclusiveScan)
+ CASE(GroupOperation, ExclusiveScan)
+ CASE(GroupOperation, ClusteredReduce)
+ CASE(GroupOperation, PartitionedReduceNV)
+ CASE(GroupOperation, PartitionedInclusiveScanNV)
+ CASE(GroupOperation, PartitionedExclusiveScanNV)
+ break;
+ }
+ llvm_unreachable("Unexpected operand");
+}
+
+StringRef getKernelEnqueueFlagsName(KernelEnqueueFlags e) {
+ switch (e) {
+ CASE(KernelEnqueueFlags, NoWait)
+ CASE(KernelEnqueueFlags, WaitKernel)
+ CASE(KernelEnqueueFlags, WaitWorkGroup)
+ break;
+ }
+ llvm_unreachable("Unexpected operand");
+}
+
+StringRef getKernelProfilingInfoName(KernelProfilingInfo e) {
+ switch (e) {
+ CASE(KernelProfilingInfo, None)
+ CASE(KernelProfilingInfo, CmdExecTime)
+ break;
+ }
+ llvm_unreachable("Unexpected operand");
+}
+} // namespace SPIRV
+} // namespace llvm
diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.h b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.h
new file mode 100644
index 000000000000..2aa9f076c78e
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.h
@@ -0,0 +1,739 @@
+//===-- SPIRVBaseInfo.h - Top level definitions for SPIRV ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the SPIRV target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SPIRV_MCTARGETDESC_SPIRVBASEINFO_H
+#define LLVM_LIB_TARGET_SPIRV_MCTARGETDESC_SPIRVBASEINFO_H
+
+#include "llvm/ADT/StringRef.h"
+#include <string>
+
+namespace llvm {
+namespace SPIRV {
+enum class Capability : uint32_t {
+ Matrix = 0,
+ Shader = 1,
+ Geometry = 2,
+ Tessellation = 3,
+ Addresses = 4,
+ Linkage = 5,
+ Kernel = 6,
+ Vector16 = 7,
+ Float16Buffer = 8,
+ Float16 = 9,
+ Float64 = 10,
+ Int64 = 11,
+ Int64Atomics = 12,
+ ImageBasic = 13,
+ ImageReadWrite = 14,
+ ImageMipmap = 15,
+ Pipes = 17,
+ Groups = 18,
+ DeviceEnqueue = 19,
+ LiteralSampler = 20,
+ AtomicStorage = 21,
+ Int16 = 22,
+ TessellationPointSize = 23,
+ GeometryPointSize = 24,
+ ImageGatherExtended = 25,
+ StorageImageMultisample = 27,
+ UniformBufferArrayDynamicIndexing = 28,
+ SampledImageArrayDymnamicIndexing = 29,
+ ClipDistance = 32,
+ CullDistance = 33,
+ ImageCubeArray = 34,
+ SampleRateShading = 35,
+ ImageRect = 36,
+ SampledRect = 37,
+ GenericPointer = 38,
+ Int8 = 39,
+ InputAttachment = 40,
+ SparseResidency = 41,
+ MinLod = 42,
+ Sampled1D = 43,
+ Image1D = 44,
+ SampledCubeArray = 45,
+ SampledBuffer = 46,
+ ImageBuffer = 47,
+ ImageMSArray = 48,
+ StorageImageExtendedFormats = 49,
+ ImageQuery = 50,
+ DerivativeControl = 51,
+ InterpolationFunction = 52,
+ TransformFeedback = 53,
+ GeometryStreams = 54,
+ StorageImageReadWithoutFormat = 55,
+ StorageImageWriteWithoutFormat = 56,
+ MultiViewport = 57,
+ SubgroupDispatch = 58,
+ NamedBarrier = 59,
+ PipeStorage = 60,
+ GroupNonUniform = 61,
+ GroupNonUniformVote = 62,
+ GroupNonUniformArithmetic = 63,
+ GroupNonUniformBallot = 64,
+ GroupNonUniformShuffle = 65,
+ GroupNonUniformShuffleRelative = 66,
+ GroupNonUniformClustered = 67,
+ GroupNonUniformQuad = 68,
+ SubgroupBallotKHR = 4423,
+ DrawParameters = 4427,
+ SubgroupVoteKHR = 4431,
+ StorageBuffer16BitAccess = 4433,
+ StorageUniform16 = 4434,
+ StoragePushConstant16 = 4435,
+ StorageInputOutput16 = 4436,
+ DeviceGroup = 4437,
+ MultiView = 4439,
+ VariablePointersStorageBuffer = 4441,
+ VariablePointers = 4442,
+ AtomicStorageOps = 4445,
+ SampleMaskPostDepthCoverage = 4447,
+ StorageBuffer8BitAccess = 4448,
+ UniformAndStorageBuffer8BitAccess = 4449,
+ StoragePushConstant8 = 4450,
+ DenormPreserve = 4464,
+ DenormFlushToZero = 4465,
+ SignedZeroInfNanPreserve = 4466,
+ RoundingModeRTE = 4467,
+ RoundingModeRTZ = 4468,
+ Float16ImageAMD = 5008,
+ ImageGatherBiasLodAMD = 5009,
+ FragmentMaskAMD = 5010,
+ StencilExportEXT = 5013,
+ ImageReadWriteLodAMD = 5015,
+ SampleMaskOverrideCoverageNV = 5249,
+ GeometryShaderPassthroughNV = 5251,
+ ShaderViewportIndexLayerEXT = 5254,
+ ShaderViewportMaskNV = 5255,
+ ShaderStereoViewNV = 5259,
+ PerViewAttributesNV = 5260,
+ FragmentFullyCoveredEXT = 5265,
+ MeshShadingNV = 5266,
+ ShaderNonUniformEXT = 5301,
+ RuntimeDescriptorArrayEXT = 5302,
+ InputAttachmentArrayDynamicIndexingEXT = 5303,
+ UniformTexelBufferArrayDynamicIndexingEXT = 5304,
+ StorageTexelBufferArrayDynamicIndexingEXT = 5305,
+ UniformBufferArrayNonUniformIndexingEXT = 5306,
+ SampledImageArrayNonUniformIndexingEXT = 5307,
+ StorageBufferArrayNonUniformIndexingEXT = 5308,
+ StorageImageArrayNonUniformIndexingEXT = 5309,
+ InputAttachmentArrayNonUniformIndexingEXT = 5310,
+ UniformTexelBufferArrayNonUniformIndexingEXT = 5311,
+ StorageTexelBufferArrayNonUniformIndexingEXT = 5312,
+ RayTracingNV = 5340,
+ SubgroupShuffleINTEL = 5568,
+ SubgroupBufferBlockIOINTEL = 5569,
+ SubgroupImageBlockIOINTEL = 5570,
+ SubgroupImageMediaBlockIOINTEL = 5579,
+ SubgroupAvcMotionEstimationINTEL = 5696,
+ SubgroupAvcMotionEstimationIntraINTEL = 5697,
+ SubgroupAvcMotionEstimationChromaINTEL = 5698,
+ GroupNonUniformPartitionedNV = 5297,
+ VulkanMemoryModelKHR = 5345,
+ VulkanMemoryModelDeviceScopeKHR = 5346,
+ ImageFootprintNV = 5282,
+ FragmentBarycentricNV = 5284,
+ ComputeDerivativeGroupQuadsNV = 5288,
+ ComputeDerivativeGroupLinearNV = 5350,
+ FragmentDensityEXT = 5291,
+ PhysicalStorageBufferAddressesEXT = 5347,
+ CooperativeMatrixNV = 5357,
+};
+StringRef getCapabilityName(Capability e);
+
+enum class SourceLanguage : uint32_t {
+ Unknown = 0,
+ ESSL = 1,
+ GLSL = 2,
+ OpenCL_C = 3,
+ OpenCL_CPP = 4,
+ HLSL = 5,
+};
+StringRef getSourceLanguageName(SourceLanguage e);
+
+enum class AddressingModel : uint32_t {
+ Logical = 0,
+ Physical32 = 1,
+ Physical64 = 2,
+ PhysicalStorageBuffer64EXT = 5348,
+};
+StringRef getAddressingModelName(AddressingModel e);
+
+enum class ExecutionModel : uint32_t {
+ Vertex = 0,
+ TessellationControl = 1,
+ TessellationEvaluation = 2,
+ Geometry = 3,
+ Fragment = 4,
+ GLCompute = 5,
+ Kernel = 6,
+ TaskNV = 5267,
+ MeshNV = 5268,
+ RayGenerationNV = 5313,
+ IntersectionNV = 5314,
+ AnyHitNV = 5315,
+ ClosestHitNV = 5316,
+ MissNV = 5317,
+ CallableNV = 5318,
+};
+StringRef getExecutionModelName(ExecutionModel e);
+
+enum class MemoryModel : uint32_t {
+ Simple = 0,
+ GLSL450 = 1,
+ OpenCL = 2,
+ VulkanKHR = 3,
+};
+StringRef getMemoryModelName(MemoryModel e);
+
+enum class ExecutionMode : uint32_t {
+ Invocations = 0,
+ SpacingEqual = 1,
+ SpacingFractionalEven = 2,
+ SpacingFractionalOdd = 3,
+ VertexOrderCw = 4,
+ VertexOrderCcw = 5,
+ PixelCenterInteger = 6,
+ OriginUpperLeft = 7,
+ OriginLowerLeft = 8,
+ EarlyFragmentTests = 9,
+ PointMode = 10,
+ Xfb = 11,
+ DepthReplacing = 12,
+ DepthGreater = 14,
+ DepthLess = 15,
+ DepthUnchanged = 16,
+ LocalSize = 17,
+ LocalSizeHint = 18,
+ InputPoints = 19,
+ InputLines = 20,
+ InputLinesAdjacency = 21,
+ Triangles = 22,
+ InputTrianglesAdjacency = 23,
+ Quads = 24,
+ Isolines = 25,
+ OutputVertices = 26,
+ OutputPoints = 27,
+ OutputLineStrip = 28,
+ OutputTriangleStrip = 29,
+ VecTypeHint = 30,
+ ContractionOff = 31,
+ Initializer = 33,
+ Finalizer = 34,
+ SubgroupSize = 35,
+ SubgroupsPerWorkgroup = 36,
+ SubgroupsPerWorkgroupId = 37,
+ LocalSizeId = 38,
+ LocalSizeHintId = 39,
+ PostDepthCoverage = 4446,
+ DenormPreserve = 4459,
+ DenormFlushToZero = 4460,
+ SignedZeroInfNanPreserve = 4461,
+ RoundingModeRTE = 4462,
+ RoundingModeRTZ = 4463,
+ StencilRefReplacingEXT = 5027,
+ OutputLinesNV = 5269,
+ DerivativeGroupQuadsNV = 5289,
+ DerivativeGroupLinearNV = 5290,
+ OutputTrianglesNV = 5298,
+};
+StringRef getExecutionModeName(ExecutionMode e);
+
+enum class StorageClass : uint32_t {
+ UniformConstant = 0,
+ Input = 1,
+ Uniform = 2,
+ Output = 3,
+ Workgroup = 4,
+ CrossWorkgroup = 5,
+ Private = 6,
+ Function = 7,
+ Generic = 8,
+ PushConstant = 9,
+ AtomicCounter = 10,
+ Image = 11,
+ StorageBuffer = 12,
+ CallableDataNV = 5328,
+ IncomingCallableDataNV = 5329,
+ RayPayloadNV = 5338,
+ HitAttributeNV = 5339,
+ IncomingRayPayloadNV = 5342,
+ ShaderRecordBufferNV = 5343,
+ PhysicalStorageBufferEXT = 5349,
+};
+StringRef getStorageClassName(StorageClass e);
+
+enum class Dim : uint32_t {
+ DIM_1D = 0,
+ DIM_2D = 1,
+ DIM_3D = 2,
+ DIM_Cube = 3,
+ DIM_Rect = 4,
+ DIM_Buffer = 5,
+ DIM_SubpassData = 6,
+};
+StringRef getDimName(Dim e);
+
+enum class SamplerAddressingMode : uint32_t {
+ None = 0,
+ ClampToEdge = 1,
+ Clamp = 2,
+ Repeat = 3,
+ RepeatMirrored = 4,
+};
+StringRef getSamplerAddressingModeName(SamplerAddressingMode e);
+
+enum class SamplerFilterMode : uint32_t {
+ Nearest = 0,
+ Linear = 1,
+};
+StringRef getSamplerFilterModeName(SamplerFilterMode e);
+
+enum class ImageFormat : uint32_t {
+ Unknown = 0,
+ Rgba32f = 1,
+ Rgba16f = 2,
+ R32f = 3,
+ Rgba8 = 4,
+ Rgba8Snorm = 5,
+ Rg32f = 6,
+ Rg16f = 7,
+ R11fG11fB10f = 8,
+ R16f = 9,
+ Rgba16 = 10,
+ Rgb10A2 = 11,
+ Rg16 = 12,
+ Rg8 = 13,
+ R16 = 14,
+ R8 = 15,
+ Rgba16Snorm = 16,
+ Rg16Snorm = 17,
+ Rg8Snorm = 18,
+ R16Snorm = 19,
+ R8Snorm = 20,
+ Rgba32i = 21,
+ Rgba16i = 22,
+ Rgba8i = 23,
+ R32i = 24,
+ Rg32i = 25,
+ Rg16i = 26,
+ Rg8i = 27,
+ R16i = 28,
+ R8i = 29,
+ Rgba32ui = 30,
+ Rgba16ui = 31,
+ Rgba8ui = 32,
+ R32ui = 33,
+ Rgb10a2ui = 34,
+ Rg32ui = 35,
+ Rg16ui = 36,
+ Rg8ui = 37,
+ R16ui = 38,
+ R8ui = 39,
+};
+StringRef getImageFormatName(ImageFormat e);
+
+enum class ImageChannelOrder : uint32_t {
+ R = 0,
+ A = 1,
+ RG = 2,
+ RA = 3,
+ RGB = 4,
+ RGBA = 5,
+ BGRA = 6,
+ ARGB = 7,
+ Intensity = 8,
+ Luminance = 9,
+ Rx = 10,
+ RGx = 11,
+ RGBx = 12,
+ Depth = 13,
+ DepthStencil = 14,
+ sRGB = 15,
+ sRGBx = 16,
+ sRGBA = 17,
+ sBGRA = 18,
+ ABGR = 19,
+};
+StringRef getImageChannelOrderName(ImageChannelOrder e);
+
+enum class ImageChannelDataType : uint32_t {
+ SnormInt8 = 0,
+ SnormInt16 = 1,
+ UnormInt8 = 2,
+ UnormInt16 = 3,
+ UnormShort565 = 4,
+ UnormShort555 = 5,
+ UnormInt101010 = 6,
+ SignedInt8 = 7,
+ SignedInt16 = 8,
+ SignedInt32 = 9,
+ UnsignedInt8 = 10,
+ UnsignedInt16 = 11,
+ UnsigendInt32 = 12,
+ HalfFloat = 13,
+ Float = 14,
+ UnormInt24 = 15,
+ UnormInt101010_2 = 16,
+};
+StringRef getImageChannelDataTypeName(ImageChannelDataType e);
+
+enum class ImageOperand : uint32_t {
+ None = 0x0,
+ Bias = 0x1,
+ Lod = 0x2,
+ Grad = 0x4,
+ ConstOffset = 0x8,
+ Offset = 0x10,
+ ConstOffsets = 0x20,
+ Sample = 0x40,
+ MinLod = 0x80,
+ MakeTexelAvailableKHR = 0x100,
+ MakeTexelVisibleKHR = 0x200,
+ NonPrivateTexelKHR = 0x400,
+ VolatileTexelKHR = 0x800,
+ SignExtend = 0x1000,
+ ZeroExtend = 0x2000,
+};
+std::string getImageOperandName(uint32_t e);
+
+enum class FPFastMathMode : uint32_t {
+ None = 0x0,
+ NotNaN = 0x1,
+ NotInf = 0x2,
+ NSZ = 0x4,
+ AllowRecip = 0x8,
+ Fast = 0x10,
+};
+std::string getFPFastMathModeName(uint32_t e);
+
+enum class FPRoundingMode : uint32_t {
+ RTE = 0,
+ RTZ = 1,
+ RTP = 2,
+ RTN = 3,
+};
+StringRef getFPRoundingModeName(FPRoundingMode e);
+
+enum class LinkageType : uint32_t {
+ Export = 0,
+ Import = 1,
+};
+StringRef getLinkageTypeName(LinkageType e);
+
+enum class AccessQualifier : uint32_t {
+ ReadOnly = 0,
+ WriteOnly = 1,
+ ReadWrite = 2,
+};
+StringRef getAccessQualifierName(AccessQualifier e);
+
+enum class FunctionParameterAttribute : uint32_t {
+ Zext = 0,
+ Sext = 1,
+ ByVal = 2,
+ Sret = 3,
+ NoAlias = 4,
+ NoCapture = 5,
+ NoWrite = 6,
+ NoReadWrite = 7,
+};
+StringRef getFunctionParameterAttributeName(FunctionParameterAttribute e);
+
+enum class Decoration : uint32_t {
+ RelaxedPrecision = 0,
+ SpecId = 1,
+ Block = 2,
+ BufferBlock = 3,
+ RowMajor = 4,
+ ColMajor = 5,
+ ArrayStride = 6,
+ MatrixStride = 7,
+ GLSLShared = 8,
+ GLSLPacked = 9,
+ CPacked = 10,
+ BuiltIn = 11,
+ NoPerspective = 13,
+ Flat = 14,
+ Patch = 15,
+ Centroid = 16,
+ Sample = 17,
+ Invariant = 18,
+ Restrict = 19,
+ Aliased = 20,
+ Volatile = 21,
+ Constant = 22,
+ Coherent = 23,
+ NonWritable = 24,
+ NonReadable = 25,
+ Uniform = 26,
+ UniformId = 27,
+ SaturatedConversion = 28,
+ Stream = 29,
+ Location = 30,
+ Component = 31,
+ Index = 32,
+ Binding = 33,
+ DescriptorSet = 34,
+ Offset = 35,
+ XfbBuffer = 36,
+ XfbStride = 37,
+ FuncParamAttr = 38,
+ FPRoundingMode = 39,
+ FPFastMathMode = 40,
+ LinkageAttributes = 41,
+ NoContraction = 42,
+ InputAttachmentIndex = 43,
+ Alignment = 44,
+ MaxByteOffset = 45,
+ AlignmentId = 46,
+ MaxByteOffsetId = 47,
+ NoSignedWrap = 4469,
+ NoUnsignedWrap = 4470,
+ ExplicitInterpAMD = 4999,
+ OverrideCoverageNV = 5248,
+ PassthroughNV = 5250,
+ ViewportRelativeNV = 5252,
+ SecondaryViewportRelativeNV = 5256,
+ PerPrimitiveNV = 5271,
+ PerViewNV = 5272,
+ PerVertexNV = 5273,
+ NonUniformEXT = 5300,
+ CountBuffer = 5634,
+ UserSemantic = 5635,
+ RestrictPointerEXT = 5355,
+ AliasedPointerEXT = 5356,
+};
+StringRef getDecorationName(Decoration e);
+
+enum class BuiltIn : uint32_t {
+ Position = 0,
+ PointSize = 1,
+ ClipDistance = 3,
+ CullDistance = 4,
+ VertexId = 5,
+ InstanceId = 6,
+ PrimitiveId = 7,
+ InvocationId = 8,
+ Layer = 9,
+ ViewportIndex = 10,
+ TessLevelOuter = 11,
+ TessLevelInner = 12,
+ TessCoord = 13,
+ PatchVertices = 14,
+ FragCoord = 15,
+ PointCoord = 16,
+ FrontFacing = 17,
+ SampleId = 18,
+ SamplePosition = 19,
+ SampleMask = 20,
+ FragDepth = 22,
+ HelperInvocation = 23,
+ NumWorkgroups = 24,
+ WorkgroupSize = 25,
+ WorkgroupId = 26,
+ LocalInvocationId = 27,
+ GlobalInvocationId = 28,
+ LocalInvocationIndex = 29,
+ WorkDim = 30,
+ GlobalSize = 31,
+ EnqueuedWorkgroupSize = 32,
+ GlobalOffset = 33,
+ GlobalLinearId = 34,
+ SubgroupSize = 36,
+ SubgroupMaxSize = 37,
+ NumSubgroups = 38,
+ NumEnqueuedSubgroups = 39,
+ SubgroupId = 40,
+ SubgroupLocalInvocationId = 41,
+ VertexIndex = 42,
+ InstanceIndex = 43,
+ SubgroupEqMask = 4416,
+ SubgroupGeMask = 4417,
+ SubgroupGtMask = 4418,
+ SubgroupLeMask = 4419,
+ SubgroupLtMask = 4420,
+ BaseVertex = 4424,
+ BaseInstance = 4425,
+ DrawIndex = 4426,
+ DeviceIndex = 4438,
+ ViewIndex = 4440,
+ BaryCoordNoPerspAMD = 4492,
+ BaryCoordNoPerspCentroidAMD = 4493,
+ BaryCoordNoPerspSampleAMD = 4494,
+ BaryCoordSmoothAMD = 4495,
+ BaryCoordSmoothCentroid = 4496,
+ BaryCoordSmoothSample = 4497,
+ BaryCoordPullModel = 4498,
+ FragStencilRefEXT = 5014,
+ ViewportMaskNV = 5253,
+ SecondaryPositionNV = 5257,
+ SecondaryViewportMaskNV = 5258,
+ PositionPerViewNV = 5261,
+ ViewportMaskPerViewNV = 5262,
+ FullyCoveredEXT = 5264,
+ TaskCountNV = 5274,
+ PrimitiveCountNV = 5275,
+ PrimitiveIndicesNV = 5276,
+ ClipDistancePerViewNV = 5277,
+ CullDistancePerViewNV = 5278,
+ LayerPerViewNV = 5279,
+ MeshViewCountNV = 5280,
+ MeshViewIndices = 5281,
+ BaryCoordNV = 5286,
+ BaryCoordNoPerspNV = 5287,
+ FragSizeEXT = 5292,
+ FragInvocationCountEXT = 5293,
+ LaunchIdNV = 5319,
+ LaunchSizeNV = 5320,
+ WorldRayOriginNV = 5321,
+ WorldRayDirectionNV = 5322,
+ ObjectRayOriginNV = 5323,
+ ObjectRayDirectionNV = 5324,
+ RayTminNV = 5325,
+ RayTmaxNV = 5326,
+ InstanceCustomIndexNV = 5327,
+ ObjectToWorldNV = 5330,
+ WorldToObjectNV = 5331,
+ HitTNV = 5332,
+ HitKindNV = 5333,
+ IncomingRayFlagsNV = 5351,
+};
+StringRef getBuiltInName(BuiltIn e);
+
+enum class SelectionControl : uint32_t {
+ None = 0x0,
+ Flatten = 0x1,
+ DontFlatten = 0x2,
+};
+std::string getSelectionControlName(uint32_t e);
+
+enum class LoopControl : uint32_t {
+ None = 0x0,
+ Unroll = 0x1,
+ DontUnroll = 0x2,
+ DependencyInfinite = 0x4,
+ DependencyLength = 0x8,
+ MinIterations = 0x10,
+ MaxIterations = 0x20,
+ IterationMultiple = 0x40,
+ PeelCount = 0x80,
+ PartialCount = 0x100,
+};
+std::string getLoopControlName(uint32_t e);
+
+enum class FunctionControl : uint32_t {
+ None = 0x0,
+ Inline = 0x1,
+ DontInline = 0x2,
+ Pure = 0x4,
+ Const = 0x8,
+};
+std::string getFunctionControlName(uint32_t e);
+
+enum class MemorySemantics : uint32_t {
+ None = 0x0,
+ Acquire = 0x2,
+ Release = 0x4,
+ AcquireRelease = 0x8,
+ SequentiallyConsistent = 0x10,
+ UniformMemory = 0x40,
+ SubgroupMemory = 0x80,
+ WorkgroupMemory = 0x100,
+ CrossWorkgroupMemory = 0x200,
+ AtomicCounterMemory = 0x400,
+ ImageMemory = 0x800,
+ OutputMemoryKHR = 0x1000,
+ MakeAvailableKHR = 0x2000,
+ MakeVisibleKHR = 0x4000,
+};
+std::string getMemorySemanticsName(uint32_t e);
+
+enum class MemoryOperand : uint32_t {
+ None = 0x0,
+ Volatile = 0x1,
+ Aligned = 0x2,
+ Nontemporal = 0x4,
+ MakePointerAvailableKHR = 0x8,
+ MakePointerVisibleKHR = 0x10,
+ NonPrivatePointerKHR = 0x20,
+};
+std::string getMemoryOperandName(uint32_t e);
+
+enum class Scope : uint32_t {
+ CrossDevice = 0,
+ Device = 1,
+ Workgroup = 2,
+ Subgroup = 3,
+ Invocation = 4,
+ QueueFamilyKHR = 5,
+};
+StringRef getScopeName(Scope e);
+
+enum class GroupOperation : uint32_t {
+ Reduce = 0,
+ InclusiveScan = 1,
+ ExclusiveScan = 2,
+ ClusteredReduce = 3,
+ PartitionedReduceNV = 6,
+ PartitionedInclusiveScanNV = 7,
+ PartitionedExclusiveScanNV = 8,
+};
+StringRef getGroupOperationName(GroupOperation e);
+
+enum class KernelEnqueueFlags : uint32_t {
+ NoWait = 0,
+ WaitKernel = 1,
+ WaitWorkGroup = 2,
+};
+StringRef getKernelEnqueueFlagsName(KernelEnqueueFlags e);
+
+enum class KernelProfilingInfo : uint32_t {
+ None = 0x0,
+ CmdExecTime = 0x1,
+};
+StringRef getKernelProfilingInfoName(KernelProfilingInfo e);
+} // namespace SPIRV
+} // namespace llvm
+
+// Return a string representation of the operands from startIndex onwards.
+// Templated to allow both MachineInstr and MCInst to use the same logic.
+template <class InstType>
+std::string getSPIRVStringOperand(const InstType &MI, unsigned StartIndex) {
+ std::string s; // Iteratively append to this string.
+
+ const unsigned NumOps = MI.getNumOperands();
+ bool IsFinished = false;
+ for (unsigned i = StartIndex; i < NumOps && !IsFinished; ++i) {
+ const auto &Op = MI.getOperand(i);
+ if (!Op.isImm()) // Stop if we hit a register operand.
+ break;
+ assert((Op.getImm() >> 32) == 0 && "Imm operand should be i32 word");
+ const uint32_t Imm = Op.getImm(); // Each i32 word is up to 4 characters.
+ for (unsigned ShiftAmount = 0; ShiftAmount < 32; ShiftAmount += 8) {
+ char c = (Imm >> ShiftAmount) & 0xff;
+ if (c == 0) { // Stop if we hit a null-terminator character.
+ IsFinished = true;
+ break;
+ } else {
+ s += c; // Otherwise, append the character to the result string.
+ }
+ }
+ }
+ return s;
+}
+
+#endif // LLVM_LIB_TARGET_SPIRV_MCTARGETDESC_SPIRVBASEINFO_H
diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp
new file mode 100644
index 000000000000..3105baa02c90
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp
@@ -0,0 +1,556 @@
+//===-- SPIRVInstPrinter.cpp - Output SPIR-V MCInsts as ASM -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints a SPIR-V MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPIRVInstPrinter.h"
+#include "SPIRV.h"
+#include "SPIRVBaseInfo.h"
+#include "llvm/CodeGen/Register.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+// Include the auto-generated portion of the assembly writer.
+#include "SPIRVGenAsmWriter.inc"
+
+void SPIRVInstPrinter::printRemainingVariableOps(const MCInst *MI,
+ unsigned StartIndex,
+ raw_ostream &O,
+ bool SkipFirstSpace,
+ bool SkipImmediates) {
+ const unsigned NumOps = MI->getNumOperands();
+ for (unsigned i = StartIndex; i < NumOps; ++i) {
+ if (!SkipImmediates || !MI->getOperand(i).isImm()) {
+ if (!SkipFirstSpace || i != StartIndex)
+ O << ' ';
+ printOperand(MI, i, O);
+ }
+ }
+}
+
+void SPIRVInstPrinter::printOpConstantVarOps(const MCInst *MI,
+ unsigned StartIndex,
+ raw_ostream &O) {
+ O << ' ';
+ if (MI->getNumOperands() - StartIndex == 2) { // Handle 64 bit literals.
+ uint64_t Imm = MI->getOperand(StartIndex).getImm();
+ Imm |= (MI->getOperand(StartIndex + 1).getImm() << 32);
+ O << Imm;
+ } else {
+ printRemainingVariableOps(MI, StartIndex, O, true, false);
+ }
+}
+
+void SPIRVInstPrinter::recordOpExtInstImport(const MCInst *MI) {
+ llvm_unreachable("Unimplemented recordOpExtInstImport");
+}
+
+void SPIRVInstPrinter::printInst(const MCInst *MI, uint64_t Address,
+ StringRef Annot, const MCSubtargetInfo &STI,
+ raw_ostream &OS) {
+ const unsigned OpCode = MI->getOpcode();
+ printInstruction(MI, Address, OS);
+
+ if (OpCode == SPIRV::OpDecorate) {
+ printOpDecorate(MI, OS);
+ } else if (OpCode == SPIRV::OpExtInstImport) {
+ recordOpExtInstImport(MI);
+ } else if (OpCode == SPIRV::OpExtInst) {
+ printOpExtInst(MI, OS);
+ } else {
+ // Print any extra operands for variadic instructions.
+ MCInstrDesc MCDesc = MII.get(OpCode);
+ if (MCDesc.isVariadic()) {
+ const unsigned NumFixedOps = MCDesc.getNumOperands();
+ const unsigned LastFixedIndex = NumFixedOps - 1;
+ const int FirstVariableIndex = NumFixedOps;
+ if (NumFixedOps > 0 &&
+ MCDesc.OpInfo[LastFixedIndex].OperandType == MCOI::OPERAND_UNKNOWN) {
+ // For instructions where a custom type (not reg or immediate) comes as
+ // the last operand before the variable_ops. This is usually a StringImm
+ // operand, but there are a few other cases.
+ switch (OpCode) {
+ case SPIRV::OpTypeImage:
+ OS << ' ';
+ printAccessQualifier(MI, FirstVariableIndex, OS);
+ break;
+ case SPIRV::OpVariable:
+ OS << ' ';
+ printOperand(MI, FirstVariableIndex, OS);
+ break;
+ case SPIRV::OpEntryPoint: {
+ // Print the interface ID operands, skipping the name's string
+ // literal.
+ printRemainingVariableOps(MI, NumFixedOps, OS, false, true);
+ break;
+ }
+ case SPIRV::OpExecutionMode:
+ case SPIRV::OpExecutionModeId:
+ case SPIRV::OpLoopMerge: {
+ // Print any literals after the OPERAND_UNKNOWN argument normally.
+ printRemainingVariableOps(MI, NumFixedOps, OS);
+ break;
+ }
+ default:
+ break; // printStringImm has already been handled
+ }
+ } else {
+ // For instructions with no fixed ops or a reg/immediate as the final
+ // fixed operand, we can usually print the rest with "printOperand", but
+ // check for a few cases with custom types first.
+ switch (OpCode) {
+ case SPIRV::OpLoad:
+ case SPIRV::OpStore:
+ OS << ' ';
+ printMemoryOperand(MI, FirstVariableIndex, OS);
+ printRemainingVariableOps(MI, FirstVariableIndex + 1, OS);
+ break;
+ case SPIRV::OpImageSampleImplicitLod:
+ case SPIRV::OpImageSampleDrefImplicitLod:
+ case SPIRV::OpImageSampleProjImplicitLod:
+ case SPIRV::OpImageSampleProjDrefImplicitLod:
+ case SPIRV::OpImageFetch:
+ case SPIRV::OpImageGather:
+ case SPIRV::OpImageDrefGather:
+ case SPIRV::OpImageRead:
+ case SPIRV::OpImageWrite:
+ case SPIRV::OpImageSparseSampleImplicitLod:
+ case SPIRV::OpImageSparseSampleDrefImplicitLod:
+ case SPIRV::OpImageSparseSampleProjImplicitLod:
+ case SPIRV::OpImageSparseSampleProjDrefImplicitLod:
+ case SPIRV::OpImageSparseFetch:
+ case SPIRV::OpImageSparseGather:
+ case SPIRV::OpImageSparseDrefGather:
+ case SPIRV::OpImageSparseRead:
+ case SPIRV::OpImageSampleFootprintNV:
+ OS << ' ';
+ printImageOperand(MI, FirstVariableIndex, OS);
+ printRemainingVariableOps(MI, NumFixedOps + 1, OS);
+ break;
+ case SPIRV::OpCopyMemory:
+ case SPIRV::OpCopyMemorySized: {
+ const unsigned NumOps = MI->getNumOperands();
+ for (unsigned i = NumFixedOps; i < NumOps; ++i) {
+ OS << ' ';
+ printMemoryOperand(MI, i, OS);
+ if (MI->getOperand(i).getImm() &
+ static_cast<unsigned>(SPIRV::MemoryOperand::Aligned)) {
+ assert(i + 1 < NumOps && "Missing alignment operand");
+ OS << ' ';
+ printOperand(MI, i + 1, OS);
+ i += 1;
+ }
+ }
+ break;
+ }
+ case SPIRV::OpConstantI:
+ case SPIRV::OpConstantF:
+ printOpConstantVarOps(MI, NumFixedOps, OS);
+ break;
+ default:
+ printRemainingVariableOps(MI, NumFixedOps, OS);
+ break;
+ }
+ }
+ }
+ }
+
+ printAnnotation(OS, Annot);
+}
+
+void SPIRVInstPrinter::printOpExtInst(const MCInst *MI, raw_ostream &O) {
+ llvm_unreachable("Unimplemented printOpExtInst");
+}
+
+void SPIRVInstPrinter::printOpDecorate(const MCInst *MI, raw_ostream &O) {
+ // The fixed operands have already been printed, so just need to decide what
+ // type of decoration operands to print based on the Decoration type.
+ MCInstrDesc MCDesc = MII.get(MI->getOpcode());
+ unsigned NumFixedOps = MCDesc.getNumOperands();
+
+ if (NumFixedOps != MI->getNumOperands()) {
+ auto DecOp = MI->getOperand(NumFixedOps - 1);
+ auto Dec = static_cast<SPIRV::Decoration>(DecOp.getImm());
+
+ O << ' ';
+
+ switch (Dec) {
+ case SPIRV::Decoration::BuiltIn:
+ printBuiltIn(MI, NumFixedOps, O);
+ break;
+ case SPIRV::Decoration::UniformId:
+ printScope(MI, NumFixedOps, O);
+ break;
+ case SPIRV::Decoration::FuncParamAttr:
+ printFunctionParameterAttribute(MI, NumFixedOps, O);
+ break;
+ case SPIRV::Decoration::FPRoundingMode:
+ printFPRoundingMode(MI, NumFixedOps, O);
+ break;
+ case SPIRV::Decoration::FPFastMathMode:
+ printFPFastMathMode(MI, NumFixedOps, O);
+ break;
+ case SPIRV::Decoration::LinkageAttributes:
+ case SPIRV::Decoration::UserSemantic:
+ printStringImm(MI, NumFixedOps, O);
+ break;
+ default:
+ printRemainingVariableOps(MI, NumFixedOps, O, true);
+ break;
+ }
+ }
+}
+
+static void printExpr(const MCExpr *Expr, raw_ostream &O) {
+#ifndef NDEBUG
+ const MCSymbolRefExpr *SRE;
+
+ if (const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(Expr))
+ SRE = cast<MCSymbolRefExpr>(BE->getLHS());
+ else
+ SRE = cast<MCSymbolRefExpr>(Expr);
+
+ MCSymbolRefExpr::VariantKind Kind = SRE->getKind();
+
+ assert(Kind == MCSymbolRefExpr::VK_None);
+#endif
+ O << *Expr;
+}
+
+void SPIRVInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O, const char *Modifier) {
+ assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported");
+ if (OpNo < MI->getNumOperands()) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg())
+ O << '%' << (Register::virtReg2Index(Op.getReg()) + 1);
+ else if (Op.isImm())
+ O << formatImm((int64_t)Op.getImm());
+ else if (Op.isDFPImm())
+ O << formatImm((double)Op.getDFPImm());
+ else if (Op.isExpr())
+ printExpr(Op.getExpr(), O);
+ else
+ llvm_unreachable("Unexpected operand type");
+ }
+}
+
+void SPIRVInstPrinter::printStringImm(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const unsigned NumOps = MI->getNumOperands();
+ unsigned StrStartIndex = OpNo;
+ while (StrStartIndex < NumOps) {
+ if (MI->getOperand(StrStartIndex).isReg())
+ break;
+
+ std::string Str = getSPIRVStringOperand(*MI, OpNo);
+ if (StrStartIndex != OpNo)
+ O << ' '; // Add a space if we're starting a new string/argument.
+ O << '"';
+ for (char c : Str) {
+ if (c == '"')
+ O.write('\\'); // Escape " characters (might break for complex UTF-8).
+ O.write(c);
+ }
+ O << '"';
+
+ unsigned numOpsInString = (Str.size() / 4) + 1;
+ StrStartIndex += numOpsInString;
+
+ // Check for final Op of "OpDecorate %x %stringImm %linkageAttribute".
+ if (MI->getOpcode() == SPIRV::OpDecorate &&
+ MI->getOperand(1).getImm() ==
+ static_cast<unsigned>(SPIRV::Decoration::LinkageAttributes)) {
+ O << ' ';
+ printLinkageType(MI, StrStartIndex, O);
+ break;
+ }
+ }
+}
+
+void SPIRVInstPrinter::printExtInst(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ llvm_unreachable("Unimplemented printExtInst");
+}
+
+void SPIRVInstPrinter::printCapability(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (OpNo < MI->getNumOperands()) {
+ SPIRV::Capability e =
+ static_cast<SPIRV::Capability>(MI->getOperand(OpNo).getImm());
+ O << SPIRV::getCapabilityName(e);
+ }
+}
+
+void SPIRVInstPrinter::printSourceLanguage(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (OpNo < MI->getNumOperands()) {
+ SPIRV::SourceLanguage e =
+ static_cast<SPIRV::SourceLanguage>(MI->getOperand(OpNo).getImm());
+ O << SPIRV::getSourceLanguageName(e);
+ }
+}
+
+void SPIRVInstPrinter::printExecutionModel(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (OpNo < MI->getNumOperands()) {
+ SPIRV::ExecutionModel e =
+ static_cast<SPIRV::ExecutionModel>(MI->getOperand(OpNo).getImm());
+ O << SPIRV::getExecutionModelName(e);
+ }
+}
+
+void SPIRVInstPrinter::printAddressingModel(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (OpNo < MI->getNumOperands()) {
+ SPIRV::AddressingModel e =
+ static_cast<SPIRV::AddressingModel>(MI->getOperand(OpNo).getImm());
+ O << SPIRV::getAddressingModelName(e);
+ }
+}
+
+void SPIRVInstPrinter::printMemoryModel(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (OpNo < MI->getNumOperands()) {
+ SPIRV::MemoryModel e =
+ static_cast<SPIRV::MemoryModel>(MI->getOperand(OpNo).getImm());
+ O << SPIRV::getMemoryModelName(e);
+ }
+}
+
+void SPIRVInstPrinter::printExecutionMode(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (OpNo < MI->getNumOperands()) {
+ SPIRV::ExecutionMode e =
+ static_cast<SPIRV::ExecutionMode>(MI->getOperand(OpNo).getImm());
+ O << SPIRV::getExecutionModeName(e);
+ }
+}
+
+void SPIRVInstPrinter::printStorageClass(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (OpNo < MI->getNumOperands()) {
+ SPIRV::StorageClass e =
+ static_cast<SPIRV::StorageClass>(MI->getOperand(OpNo).getImm());
+ O << SPIRV::getStorageClassName(e);
+ }
+}
+
+void SPIRVInstPrinter::printDim(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (OpNo < MI->getNumOperands()) {
+ SPIRV::Dim e = static_cast<SPIRV::Dim>(MI->getOperand(OpNo).getImm());
+ O << SPIRV::getDimName(e);
+ }
+}
+
+void SPIRVInstPrinter::printSamplerAddressingMode(const MCInst *MI,
+ unsigned OpNo,
+ raw_ostream &O) {
+ if (OpNo < MI->getNumOperands()) {
+ SPIRV::SamplerAddressingMode e = static_cast<SPIRV::SamplerAddressingMode>(
+ MI->getOperand(OpNo).getImm());
+ O << SPIRV::getSamplerAddressingModeName(e);
+ }
+}
+
+void SPIRVInstPrinter::printSamplerFilterMode(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (OpNo < MI->getNumOperands()) {
+ SPIRV::SamplerFilterMode e =
+ static_cast<SPIRV::SamplerFilterMode>(MI->getOperand(OpNo).getImm());
+ O << SPIRV::getSamplerFilterModeName(e);
+ }
+}
+
+void SPIRVInstPrinter::printImageFormat(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (OpNo < MI->getNumOperands()) {
+ SPIRV::ImageFormat e =
+ static_cast<SPIRV::ImageFormat>(MI->getOperand(OpNo).getImm());
+ O << SPIRV::getImageFormatName(e);
+ }
+}
+
+void SPIRVInstPrinter::printImageChannelOrder(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (OpNo < MI->getNumOperands()) {
+ SPIRV::ImageChannelOrder e =
+ static_cast<SPIRV::ImageChannelOrder>(MI->getOperand(OpNo).getImm());
+ O << SPIRV::getImageChannelOrderName(e);
+ }
+}
+
+void SPIRVInstPrinter::printImageChannelDataType(const MCInst *MI,
+ unsigned OpNo,
+ raw_ostream &O) {
+ if (OpNo < MI->getNumOperands()) {
+ SPIRV::ImageChannelDataType e =
+ static_cast<SPIRV::ImageChannelDataType>(MI->getOperand(OpNo).getImm());
+ O << SPIRV::getImageChannelDataTypeName(e);
+ }
+}
+
+void SPIRVInstPrinter::printImageOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (OpNo < MI->getNumOperands()) {
+ unsigned e = static_cast<unsigned>(MI->getOperand(OpNo).getImm());
+ O << SPIRV::getImageOperandName(e);
+ }
+}
+
+void SPIRVInstPrinter::printFPFastMathMode(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (OpNo < MI->getNumOperands()) {
+ unsigned e = static_cast<unsigned>(MI->getOperand(OpNo).getImm());
+ O << SPIRV::getFPFastMathModeName(e);
+ }
+}
+
+void SPIRVInstPrinter::printFPRoundingMode(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (OpNo < MI->getNumOperands()) {
+ SPIRV::FPRoundingMode e =
+ static_cast<SPIRV::FPRoundingMode>(MI->getOperand(OpNo).getImm());
+ O << SPIRV::getFPRoundingModeName(e);
+ }
+}
+
+void SPIRVInstPrinter::printLinkageType(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (OpNo < MI->getNumOperands()) {
+ SPIRV::LinkageType e =
+ static_cast<SPIRV::LinkageType>(MI->getOperand(OpNo).getImm());
+ O << SPIRV::getLinkageTypeName(e);
+ }
+}
+
+void SPIRVInstPrinter::printAccessQualifier(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (OpNo < MI->getNumOperands()) {
+ SPIRV::AccessQualifier e =
+ static_cast<SPIRV::AccessQualifier>(MI->getOperand(OpNo).getImm());
+ O << SPIRV::getAccessQualifierName(e);
+ }
+}
+
+void SPIRVInstPrinter::printFunctionParameterAttribute(const MCInst *MI,
+ unsigned OpNo,
+ raw_ostream &O) {
+ if (OpNo < MI->getNumOperands()) {
+ SPIRV::FunctionParameterAttribute e =
+ static_cast<SPIRV::FunctionParameterAttribute>(
+ MI->getOperand(OpNo).getImm());
+ O << SPIRV::getFunctionParameterAttributeName(e);
+ }
+}
+
+void SPIRVInstPrinter::printDecoration(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (OpNo < MI->getNumOperands()) {
+ SPIRV::Decoration e =
+ static_cast<SPIRV::Decoration>(MI->getOperand(OpNo).getImm());
+ O << SPIRV::getDecorationName(e);
+ }
+}
+
+void SPIRVInstPrinter::printBuiltIn(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (OpNo < MI->getNumOperands()) {
+ SPIRV::BuiltIn e =
+ static_cast<SPIRV::BuiltIn>(MI->getOperand(OpNo).getImm());
+ O << SPIRV::getBuiltInName(e);
+ }
+}
+
+void SPIRVInstPrinter::printSelectionControl(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (OpNo < MI->getNumOperands()) {
+ unsigned e = static_cast<unsigned>(MI->getOperand(OpNo).getImm());
+ O << SPIRV::getSelectionControlName(e);
+ }
+}
+
+void SPIRVInstPrinter::printLoopControl(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (OpNo < MI->getNumOperands()) {
+ unsigned e = static_cast<unsigned>(MI->getOperand(OpNo).getImm());
+ O << SPIRV::getLoopControlName(e);
+ }
+}
+
+void SPIRVInstPrinter::printFunctionControl(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (OpNo < MI->getNumOperands()) {
+ unsigned e = static_cast<unsigned>(MI->getOperand(OpNo).getImm());
+ O << SPIRV::getFunctionControlName(e);
+ }
+}
+
+void SPIRVInstPrinter::printMemorySemantics(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (OpNo < MI->getNumOperands()) {
+ unsigned e = static_cast<unsigned>(MI->getOperand(OpNo).getImm());
+ O << SPIRV::getMemorySemanticsName(e);
+ }
+}
+
+void SPIRVInstPrinter::printMemoryOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (OpNo < MI->getNumOperands()) {
+ unsigned e = static_cast<unsigned>(MI->getOperand(OpNo).getImm());
+ O << SPIRV::getMemoryOperandName(e);
+ }
+}
+
+void SPIRVInstPrinter::printScope(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (OpNo < MI->getNumOperands()) {
+ SPIRV::Scope e = static_cast<SPIRV::Scope>(MI->getOperand(OpNo).getImm());
+ O << SPIRV::getScopeName(e);
+ }
+}
+
+void SPIRVInstPrinter::printGroupOperation(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (OpNo < MI->getNumOperands()) {
+ SPIRV::GroupOperation e =
+ static_cast<SPIRV::GroupOperation>(MI->getOperand(OpNo).getImm());
+ O << SPIRV::getGroupOperationName(e);
+ }
+}
+
+void SPIRVInstPrinter::printKernelEnqueueFlags(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (OpNo < MI->getNumOperands()) {
+ SPIRV::KernelEnqueueFlags e =
+ static_cast<SPIRV::KernelEnqueueFlags>(MI->getOperand(OpNo).getImm());
+ O << SPIRV::getKernelEnqueueFlagsName(e);
+ }
+}
+
+void SPIRVInstPrinter::printKernelProfilingInfo(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (OpNo < MI->getNumOperands()) {
+ SPIRV::KernelProfilingInfo e =
+ static_cast<SPIRV::KernelProfilingInfo>(MI->getOperand(OpNo).getImm());
+ O << SPIRV::getKernelProfilingInfoName(e);
+ }
+}
diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.h b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.h
new file mode 100644
index 000000000000..cd3b6f1e6d66
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.h
@@ -0,0 +1,94 @@
+//===-- SPIRVInstPrinter.h - Output SPIR-V MCInsts as ASM -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints a SPIR-V MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SPIRV_INSTPRINTER_SPIRVINSTPRINTER_H
+#define LLVM_LIB_TARGET_SPIRV_INSTPRINTER_SPIRVINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+class SPIRVInstPrinter : public MCInstPrinter {
+private:
+ void recordOpExtInstImport(const MCInst *MI);
+
+public:
+ using MCInstPrinter::MCInstPrinter;
+
+ void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
+ const MCSubtargetInfo &STI, raw_ostream &OS) override;
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
+ const char *Modifier = nullptr);
+
+ void printStringImm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+ void printOpDecorate(const MCInst *MI, raw_ostream &O);
+ void printOpExtInst(const MCInst *MI, raw_ostream &O);
+ void printRemainingVariableOps(const MCInst *MI, unsigned StartIndex,
+ raw_ostream &O, bool SkipFirstSpace = false,
+ bool SkipImmediates = false);
+ void printOpConstantVarOps(const MCInst *MI, unsigned StartIndex,
+ raw_ostream &O);
+
+ void printExtInst(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+ // SPIR-V enumerations printing.
+ void printCapability(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printSourceLanguage(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printExecutionModel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printAddressingModel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printMemoryModel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printExecutionMode(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printStorageClass(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printDim(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+ void printSamplerAddressingMode(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O);
+ void printSamplerFilterMode(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+ void printImageFormat(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printImageChannelOrder(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printImageChannelDataType(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O);
+ void printImageOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+ void printFPFastMathMode(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printFPRoundingMode(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+ void printLinkageType(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printAccessQualifier(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printFunctionParameterAttribute(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O);
+
+ void printDecoration(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printBuiltIn(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+ void printSelectionControl(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printLoopControl(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printFunctionControl(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+ void printMemorySemantics(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printMemoryOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+ void printScope(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printGroupOperation(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+ void printKernelEnqueueFlags(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printKernelProfilingInfo(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O);
+ // Autogenerated by tblgen.
+ std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
+ void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+};
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_SPIRV_INSTPRINTER_SPIRVINSTPRINTER_H
diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCAsmInfo.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCAsmInfo.cpp
new file mode 100644
index 000000000000..2f3462f419e5
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCAsmInfo.cpp
@@ -0,0 +1,34 @@
+//===-- SPIRVMCAsmInfo.h - SPIR-V asm properties --------------*- C++ -*--====//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the SPIRVMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPIRVMCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
+
+using namespace llvm;
+
+SPIRVMCAsmInfo::SPIRVMCAsmInfo(const Triple &TT,
+ const MCTargetOptions &Options) {
+ IsLittleEndian = true;
+
+ HasSingleParameterDotFile = false;
+ HasDotTypeDotSizeDirective = false;
+
+ MinInstAlignment = 4;
+
+ CodePointerSize = 4;
+ CommentString = ";";
+ HasFunctionAlignment = false;
+}
+
+bool SPIRVMCAsmInfo::shouldOmitSectionDirective(StringRef SectionName) const {
+ return true;
+}
diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCAsmInfo.h b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCAsmInfo.h
new file mode 100644
index 000000000000..08e579e1c32c
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCAsmInfo.h
@@ -0,0 +1,29 @@
+//===-- SPIRVMCAsmInfo.h - SPIR-V asm properties --------------*- C++ -*--====//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the SPIRVMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SPIRV_MCTARGETDESC_SPIRVMCASMINFO_H
+#define LLVM_LIB_TARGET_SPIRV_MCTARGETDESC_SPIRVMCASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+
+class Triple;
+
+class SPIRVMCAsmInfo : public MCAsmInfo {
+public:
+ explicit SPIRVMCAsmInfo(const Triple &TT, const MCTargetOptions &Options);
+ bool shouldOmitSectionDirective(StringRef SectionName) const override;
+};
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_SPIRV_MCTARGETDESC_SPIRVMCASMINFO_H
diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCCodeEmitter.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCCodeEmitter.cpp
new file mode 100644
index 000000000000..d953bc590473
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCCodeEmitter.cpp
@@ -0,0 +1,132 @@
+//===-- SPIRVMCCodeEmitter.cpp - Emit SPIR-V machine code -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SPIRVMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/SPIRVMCTargetDesc.h"
+#include "llvm/CodeGen/Register.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/EndianStream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "spirv-mccodeemitter"
+
+namespace {
+
+class SPIRVMCCodeEmitter : public MCCodeEmitter {
+ const MCInstrInfo &MCII;
+
+public:
+ SPIRVMCCodeEmitter(const MCInstrInfo &mcii) : MCII(mcii) {}
+ SPIRVMCCodeEmitter(const SPIRVMCCodeEmitter &) = delete;
+ void operator=(const SPIRVMCCodeEmitter &) = delete;
+ ~SPIRVMCCodeEmitter() override = default;
+
+ // getBinaryCodeForInstr - TableGen'erated function for getting the
+ // binary encoding for an instruction.
+ uint64_t getBinaryCodeForInstr(const MCInst &MI,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const override;
+
+private:
+ FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const;
+ void
+ verifyInstructionPredicates(const MCInst &MI,
+ const FeatureBitset &AvailableFeatures) const;
+};
+
+} // end anonymous namespace
+
+MCCodeEmitter *llvm::createSPIRVMCCodeEmitter(const MCInstrInfo &MCII,
+ MCContext &Ctx) {
+ return new SPIRVMCCodeEmitter(MCII);
+}
+
+using EndianWriter = support::endian::Writer;
+
+// Check if the instruction has a type argument for operand 1, and defines an ID
+// output register in operand 0. If so, we need to swap operands 0 and 1 so the
+// type comes first in the output, despide coming second in the MCInst.
+static bool hasType(const MCInst &MI, const MCInstrInfo &MII) {
+ MCInstrDesc MCDesc = MII.get(MI.getOpcode());
+ // If we define an output, and have at least one other argument.
+ if (MCDesc.getNumDefs() == 1 && MCDesc.getNumOperands() >= 2) {
+ // Check if we define an ID, and take a type as operand 1.
+ auto DefOpInfo = MCDesc.opInfo_begin();
+ auto FirstArgOpInfo = MCDesc.opInfo_begin() + 1;
+ return (DefOpInfo->RegClass == SPIRV::IDRegClassID ||
+ DefOpInfo->RegClass == SPIRV::ANYIDRegClassID) &&
+ FirstArgOpInfo->RegClass == SPIRV::TYPERegClassID;
+ }
+ return false;
+}
+
+static void emitOperand(const MCOperand &Op, EndianWriter &OSE) {
+ if (Op.isReg()) {
+ // Emit the id index starting at 1 (0 is an invalid index).
+ OSE.write<uint32_t>(Register::virtReg2Index(Op.getReg()) + 1);
+ } else if (Op.isImm()) {
+ OSE.write<uint32_t>(Op.getImm());
+ } else {
+ llvm_unreachable("Unexpected operand type in VReg");
+ }
+}
+
+// Emit the type in operand 1 before the ID in operand 0 it defines, and all
+// remaining operands in the order they come naturally.
+static void emitTypedInstrOperands(const MCInst &MI, EndianWriter &OSE) {
+ unsigned NumOps = MI.getNumOperands();
+ emitOperand(MI.getOperand(1), OSE);
+ emitOperand(MI.getOperand(0), OSE);
+ for (unsigned i = 2; i < NumOps; ++i)
+ emitOperand(MI.getOperand(i), OSE);
+}
+
+// Emit operands in the order they come naturally.
+static void emitUntypedInstrOperands(const MCInst &MI, EndianWriter &OSE) {
+ for (const auto &Op : MI)
+ emitOperand(Op, OSE);
+}
+
+void SPIRVMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ auto Features = computeAvailableFeatures(STI.getFeatureBits());
+ verifyInstructionPredicates(MI, Features);
+
+ EndianWriter OSE(OS, support::little);
+
+ // Encode the first 32 SPIR-V bytes with the number of args and the opcode.
+ const uint64_t OpCode = getBinaryCodeForInstr(MI, Fixups, STI);
+ const uint32_t NumWords = MI.getNumOperands() + 1;
+ const uint32_t FirstWord = (NumWords << 16) | OpCode;
+ OSE.write<uint32_t>(FirstWord);
+
+ // Emit the instruction arguments (emitting the output type first if present).
+ if (hasType(MI, MCII))
+ emitTypedInstrOperands(MI, OSE);
+ else
+ emitUntypedInstrOperands(MI, OSE);
+}
+
+#define ENABLE_INSTR_PREDICATE_VERIFIER
+#include "SPIRVGenMCCodeEmitter.inc"
diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.cpp
new file mode 100644
index 000000000000..6b8b4a73af92
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.cpp
@@ -0,0 +1,102 @@
+//===-- SPIRVMCTargetDesc.cpp - SPIR-V Target Descriptions ----*- C++ -*---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides SPIR-V specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPIRVMCTargetDesc.h"
+#include "SPIRVInstPrinter.h"
+#include "SPIRVMCAsmInfo.h"
+#include "SPIRVTargetStreamer.h"
+#include "TargetInfo/SPIRVTargetInfo.h"
+#include "llvm/MC/MCInstrAnalysis.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "SPIRVGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "SPIRVGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "SPIRVGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createSPIRVMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitSPIRVMCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createSPIRVMCRegisterInfo(const Triple &TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ return X;
+}
+
+static MCSubtargetInfo *
+createSPIRVMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
+ return createSPIRVMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
+}
+
+static MCStreamer *
+createSPIRVMCStreamer(const Triple &T, MCContext &Ctx,
+ std::unique_ptr<MCAsmBackend> &&MAB,
+ std::unique_ptr<MCObjectWriter> &&OW,
+ std::unique_ptr<MCCodeEmitter> &&Emitter, bool RelaxAll) {
+ return createSPIRVStreamer(Ctx, std::move(MAB), std::move(OW),
+ std::move(Emitter), RelaxAll);
+}
+
+static MCTargetStreamer *createTargetAsmStreamer(MCStreamer &S,
+ formatted_raw_ostream &,
+ MCInstPrinter *, bool) {
+ return new SPIRVTargetStreamer(S);
+}
+
+static MCInstPrinter *createSPIRVMCInstPrinter(const Triple &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI) {
+ assert(SyntaxVariant == 0);
+ return new SPIRVInstPrinter(MAI, MII, MRI);
+}
+
+namespace {
+
+class SPIRVMCInstrAnalysis : public MCInstrAnalysis {
+public:
+ explicit SPIRVMCInstrAnalysis(const MCInstrInfo *Info)
+ : MCInstrAnalysis(Info) {}
+};
+
+} // end anonymous namespace
+
+static MCInstrAnalysis *createSPIRVInstrAnalysis(const MCInstrInfo *Info) {
+ return new SPIRVMCInstrAnalysis(Info);
+}
+
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSPIRVTargetMC() {
+ for (Target *T : {&getTheSPIRV32Target(), &getTheSPIRV64Target()}) {
+ RegisterMCAsmInfo<SPIRVMCAsmInfo> X(*T);
+ TargetRegistry::RegisterMCInstrInfo(*T, createSPIRVMCInstrInfo);
+ TargetRegistry::RegisterMCRegInfo(*T, createSPIRVMCRegisterInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(*T, createSPIRVMCSubtargetInfo);
+ TargetRegistry::RegisterSPIRVStreamer(*T, createSPIRVMCStreamer);
+ TargetRegistry::RegisterMCInstPrinter(*T, createSPIRVMCInstPrinter);
+ TargetRegistry::RegisterMCInstrAnalysis(*T, createSPIRVInstrAnalysis);
+ TargetRegistry::RegisterMCCodeEmitter(*T, createSPIRVMCCodeEmitter);
+ TargetRegistry::RegisterMCAsmBackend(*T, createSPIRVAsmBackend);
+ TargetRegistry::RegisterAsmTargetStreamer(*T, createTargetAsmStreamer);
+ }
+}
diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.h b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.h
new file mode 100644
index 000000000000..4009fa96aa68
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.h
@@ -0,0 +1,52 @@
+//===-- SPIRVMCTargetDesc.h - SPIR-V Target Descriptions --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides SPIR-V specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SPIRV_MCTARGETDESC_SPIRVMCTARGETDESC_H
+#define LLVM_LIB_TARGET_SPIRV_MCTARGETDESC_SPIRVMCTARGETDESC_H
+
+#include "llvm/Support/DataTypes.h"
+#include <memory>
+
+namespace llvm {
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCObjectTargetWriter;
+class MCRegisterInfo;
+class MCSubtargetInfo;
+class MCTargetOptions;
+class Target;
+
+MCCodeEmitter *createSPIRVMCCodeEmitter(const MCInstrInfo &MCII,
+ MCContext &Ctx);
+
+MCAsmBackend *createSPIRVAsmBackend(const Target &T, const MCSubtargetInfo &STI,
+ const MCRegisterInfo &MRI,
+ const MCTargetOptions &Options);
+
+std::unique_ptr<MCObjectTargetWriter> createSPIRVObjectTargetWriter();
+} // namespace llvm
+
+// Defines symbolic names for SPIR-V registers. This defines a mapping from
+// register name to register number.
+#define GET_REGINFO_ENUM
+#include "SPIRVGenRegisterInfo.inc"
+
+// Defines symbolic names for the SPIR-V instructions.
+#define GET_INSTRINFO_ENUM
+#include "SPIRVGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "SPIRVGenSubtargetInfo.inc"
+
+#endif // LLVM_LIB_TARGET_SPIRV_MCTARGETDESC_SPIRVMCTARGETDESC_H
diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVObjectTargetWriter.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVObjectTargetWriter.cpp
new file mode 100644
index 000000000000..685168b4073d
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVObjectTargetWriter.cpp
@@ -0,0 +1,25 @@
+//===- SPIRVObjectTargetWriter.cpp - SPIR-V Object Target Writer *- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPIRVMCTargetDesc.h"
+#include "llvm/MC/MCSPIRVObjectWriter.h"
+
+using namespace llvm;
+
+namespace {
+
+class SPIRVObjectTargetWriter : public MCSPIRVObjectTargetWriter {
+public:
+ SPIRVObjectTargetWriter() = default;
+};
+
+} // namespace
+
+std::unique_ptr<MCObjectTargetWriter> llvm::createSPIRVObjectTargetWriter() {
+ return std::make_unique<SPIRVObjectTargetWriter>();
+}
diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.cpp
new file mode 100644
index 000000000000..0a318e0e01e5
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.cpp
@@ -0,0 +1,18 @@
+//=====- SPIRVTargetStreamer.cpp - SPIRVTargetStreamer class ------------=====//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SPIRVTargetStreamer class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPIRVTargetStreamer.h"
+
+using namespace llvm;
+
+SPIRVTargetStreamer::SPIRVTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
+SPIRVTargetStreamer::~SPIRVTargetStreamer() {}
diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.h b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.h
new file mode 100644
index 000000000000..2cc8f50aba67
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.h
@@ -0,0 +1,28 @@
+//===-- SPIRVTargetStreamer.h - SPIRV Target Streamer ----------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LIB_TARGET_SPIRV_MCTARGETDESC_SPIRVTARGETSTREAMER_H
+#define LIB_TARGET_SPIRV_MCTARGETDESC_SPIRVTARGETSTREAMER_H
+
+#include "llvm/MC/MCStreamer.h"
+
+namespace llvm {
+
+class MCSection;
+
+class SPIRVTargetStreamer : public MCTargetStreamer {
+public:
+ SPIRVTargetStreamer(MCStreamer &S);
+ ~SPIRVTargetStreamer() override;
+
+ void changeSection(const MCSection *CurSection, MCSection *Section,
+ const MCExpr *SubSection, raw_ostream &OS) override{};
+};
+} // namespace llvm
+
+#endif // LIB_TARGET_SPIRV_MCTARGETDESC_SPIRVTARGETSTREAMER_H_
diff --git a/llvm/lib/Target/SPIRV/SPIRV.h b/llvm/lib/Target/SPIRV/SPIRV.h
new file mode 100644
index 000000000000..8da54a5d6e61
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRV.h
@@ -0,0 +1,34 @@
+//===-- SPIRV.h - Top-level interface for SPIR-V representation -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SPIRV_SPIRV_H
+#define LLVM_LIB_TARGET_SPIRV_SPIRV_H
+
+#include "MCTargetDesc/SPIRVMCTargetDesc.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+class SPIRVTargetMachine;
+class SPIRVSubtarget;
+class InstructionSelector;
+class RegisterBankInfo;
+
+FunctionPass *createSPIRVPreLegalizerPass();
+FunctionPass *createSPIRVEmitIntrinsicsPass(SPIRVTargetMachine *TM);
+InstructionSelector *
+createSPIRVInstructionSelector(const SPIRVTargetMachine &TM,
+ const SPIRVSubtarget &Subtarget,
+ const RegisterBankInfo &RBI);
+
+void initializeSPIRVModuleAnalysisPass(PassRegistry &);
+void initializeSPIRVPreLegalizerPass(PassRegistry &);
+void initializeSPIRVEmitIntrinsicsPass(PassRegistry &);
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_SPIRV_SPIRV_H
diff --git a/llvm/lib/Target/SPIRV/SPIRV.td b/llvm/lib/Target/SPIRV/SPIRV.td
new file mode 100644
index 000000000000..27374acb8882
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRV.td
@@ -0,0 +1,43 @@
+//===-- SPIRV.td - Describe the SPIR-V Target Machine ------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+include "SPIRVRegisterInfo.td"
+include "SPIRVRegisterBanks.td"
+include "SPIRVInstrInfo.td"
+
+def SPIRVInstrInfo : InstrInfo;
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"generic", []>;
+
+def SPIRV10 : SubtargetFeature<"spirv1.0", "SPIRVVersion", "10",
+ "Use SPIR-V version 1.0">;
+def SPIRV11 : SubtargetFeature<"spirv1.1", "SPIRVVersion", "11",
+ "Use SPIR-V version 1.1">;
+def SPIRV12 : SubtargetFeature<"spirv1.2", "SPIRVVersion", "12",
+ "Use SPIR-V version 1.2">;
+def SPIRV13 : SubtargetFeature<"spirv1.3", "SPIRVVersion", "13",
+ "Use SPIR-V version 1.3">;
+def SPIRV14 : SubtargetFeature<"spirv1.4", "SPIRVVersion", "14",
+ "Use SPIR-V version 1.4">;
+def SPIRV15 : SubtargetFeature<"spirv1.5", "SPIRVVersion", "15",
+ "Use SPIR-V version 1.5">;
+
+def SPIRVInstPrinter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ bit isMCAsmWriter = 1;
+}
+
+def SPIRV : Target {
+ let InstructionSet = SPIRVInstrInfo;
+ let AssemblyWriters = [SPIRVInstPrinter];
+}
diff --git a/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp b/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp
new file mode 100644
index 000000000000..0de232651377
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp
@@ -0,0 +1,348 @@
+//===-- SPIRVAsmPrinter.cpp - SPIR-V LLVM assembly writer ------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to the SPIR-V assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/SPIRVInstPrinter.h"
+#include "SPIRV.h"
+#include "SPIRVInstrInfo.h"
+#include "SPIRVMCInstLower.h"
+#include "SPIRVModuleAnalysis.h"
+#include "SPIRVSubtarget.h"
+#include "SPIRVTargetMachine.h"
+#include "SPIRVUtils.h"
+#include "TargetInfo/SPIRVTargetInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+namespace {
+class SPIRVAsmPrinter : public AsmPrinter {
+public:
+ explicit SPIRVAsmPrinter(TargetMachine &TM,
+ std::unique_ptr<MCStreamer> Streamer)
+ : AsmPrinter(TM, std::move(Streamer)), ST(nullptr), TII(nullptr) {}
+ bool ModuleSectionsEmitted;
+ const SPIRVSubtarget *ST;
+ const SPIRVInstrInfo *TII;
+
+ StringRef getPassName() const override { return "SPIRV Assembly Printer"; }
+ void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O);
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ const char *ExtraCode, raw_ostream &O) override;
+
+ void outputMCInst(MCInst &Inst);
+ void outputInstruction(const MachineInstr *MI);
+ void outputModuleSection(SPIRV::ModuleSectionType MSType);
+ void outputEntryPoints();
+ void outputDebugSourceAndStrings(const Module &M);
+ void outputOpMemoryModel();
+ void outputOpFunctionEnd();
+ void outputExtFuncDecls();
+ void outputModuleSections();
+
+ void emitInstruction(const MachineInstr *MI) override;
+ void emitFunctionEntryLabel() override {}
+ void emitFunctionHeader() override;
+ void emitFunctionBodyStart() override {}
+ void emitFunctionBodyEnd() override;
+ void emitBasicBlockStart(const MachineBasicBlock &MBB) override;
+ void emitBasicBlockEnd(const MachineBasicBlock &MBB) override {}
+ void emitGlobalVariable(const GlobalVariable *GV) override {}
+ void emitOpLabel(const MachineBasicBlock &MBB);
+ void emitEndOfAsmFile(Module &M) override;
+ bool doInitialization(Module &M) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ SPIRV::ModuleAnalysisInfo *MAI;
+};
+} // namespace
+
+void SPIRVAsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<SPIRVModuleAnalysis>();
+ AU.addPreserved<SPIRVModuleAnalysis>();
+ AsmPrinter::getAnalysisUsage(AU);
+}
+
+// If the module has no functions, we need output global info anyway.
+void SPIRVAsmPrinter::emitEndOfAsmFile(Module &M) {
+ if (ModuleSectionsEmitted == false) {
+ outputModuleSections();
+ ModuleSectionsEmitted = true;
+ }
+}
+
+void SPIRVAsmPrinter::emitFunctionHeader() {
+ if (ModuleSectionsEmitted == false) {
+ outputModuleSections();
+ ModuleSectionsEmitted = true;
+ }
+ // Get the subtarget from the current MachineFunction.
+ ST = &MF->getSubtarget<SPIRVSubtarget>();
+ TII = ST->getInstrInfo();
+ const Function &F = MF->getFunction();
+
+ if (isVerbose()) {
+ OutStreamer->getCommentOS()
+ << "-- Begin function "
+ << GlobalValue::dropLLVMManglingEscape(F.getName()) << '\n';
+ }
+
+ auto Section = getObjFileLowering().SectionForGlobal(&F, TM);
+ MF->setSection(Section);
+}
+
+void SPIRVAsmPrinter::outputOpFunctionEnd() {
+ MCInst FunctionEndInst;
+ FunctionEndInst.setOpcode(SPIRV::OpFunctionEnd);
+ outputMCInst(FunctionEndInst);
+}
+
+// Emit OpFunctionEnd at the end of MF and clear BBNumToRegMap.
+void SPIRVAsmPrinter::emitFunctionBodyEnd() {
+ outputOpFunctionEnd();
+ MAI->BBNumToRegMap.clear();
+}
+
+void SPIRVAsmPrinter::emitOpLabel(const MachineBasicBlock &MBB) {
+ MCInst LabelInst;
+ LabelInst.setOpcode(SPIRV::OpLabel);
+ LabelInst.addOperand(MCOperand::createReg(MAI->getOrCreateMBBRegister(MBB)));
+ outputMCInst(LabelInst);
+}
+
+void SPIRVAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
+ // If it's the first MBB in MF, it has OpFunction and OpFunctionParameter, so
+ // OpLabel should be output after them.
+ if (MBB.getNumber() == MF->front().getNumber()) {
+ for (const MachineInstr &MI : MBB)
+ if (MI.getOpcode() == SPIRV::OpFunction)
+ return;
+ // TODO: this case should be checked by the verifier.
+ report_fatal_error("OpFunction is expected in the front MBB of MF");
+ }
+ emitOpLabel(MBB);
+}
+
+void SPIRVAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(OpNum);
+
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ O << SPIRVInstPrinter::getRegisterName(MO.getReg());
+ break;
+
+ case MachineOperand::MO_Immediate:
+ O << MO.getImm();
+ break;
+
+ case MachineOperand::MO_FPImmediate:
+ O << MO.getFPImm();
+ break;
+
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ break;
+
+ case MachineOperand::MO_GlobalAddress:
+ O << *getSymbol(MO.getGlobal());
+ break;
+
+ case MachineOperand::MO_BlockAddress: {
+ MCSymbol *BA = GetBlockAddressSymbol(MO.getBlockAddress());
+ O << BA->getName();
+ break;
+ }
+
+ case MachineOperand::MO_ExternalSymbol:
+ O << *GetExternalSymbolSymbol(MO.getSymbolName());
+ break;
+
+ case MachineOperand::MO_JumpTableIndex:
+ case MachineOperand::MO_ConstantPoolIndex:
+ default:
+ llvm_unreachable("<unknown operand type>");
+ }
+}
+
+bool SPIRVAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ const char *ExtraCode, raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Invalid instruction - SPIR-V does not have special modifiers
+
+ printOperand(MI, OpNo, O);
+ return false;
+}
+
+static bool isFuncOrHeaderInstr(const MachineInstr *MI,
+ const SPIRVInstrInfo *TII) {
+ return TII->isHeaderInstr(*MI) || MI->getOpcode() == SPIRV::OpFunction ||
+ MI->getOpcode() == SPIRV::OpFunctionParameter;
+}
+
+void SPIRVAsmPrinter::outputMCInst(MCInst &Inst) {
+ OutStreamer->emitInstruction(Inst, *OutContext.getSubtargetInfo());
+}
+
+void SPIRVAsmPrinter::outputInstruction(const MachineInstr *MI) {
+ SPIRVMCInstLower MCInstLowering;
+ MCInst TmpInst;
+ MCInstLowering.lower(MI, TmpInst, MAI);
+ outputMCInst(TmpInst);
+}
+
+void SPIRVAsmPrinter::emitInstruction(const MachineInstr *MI) {
+ if (!MAI->getSkipEmission(MI))
+ outputInstruction(MI);
+
+ // Output OpLabel after OpFunction and OpFunctionParameter in the first MBB.
+ const MachineInstr *NextMI = MI->getNextNode();
+ if (!MAI->hasMBBRegister(*MI->getParent()) && isFuncOrHeaderInstr(MI, TII) &&
+ (!NextMI || !isFuncOrHeaderInstr(NextMI, TII))) {
+ assert(MI->getParent()->getNumber() == MF->front().getNumber() &&
+ "OpFunction is not in the front MBB of MF");
+ emitOpLabel(*MI->getParent());
+ }
+}
+
+void SPIRVAsmPrinter::outputModuleSection(SPIRV::ModuleSectionType MSType) {
+ for (MachineInstr *MI : MAI->getMSInstrs(MSType))
+ outputInstruction(MI);
+}
+
+void SPIRVAsmPrinter::outputDebugSourceAndStrings(const Module &M) {
+ // Output OpSource.
+ MCInst Inst;
+ Inst.setOpcode(SPIRV::OpSource);
+ Inst.addOperand(MCOperand::createImm(static_cast<unsigned>(MAI->SrcLang)));
+ Inst.addOperand(
+ MCOperand::createImm(static_cast<unsigned>(MAI->SrcLangVersion)));
+ outputMCInst(Inst);
+}
+
+void SPIRVAsmPrinter::outputOpMemoryModel() {
+ MCInst Inst;
+ Inst.setOpcode(SPIRV::OpMemoryModel);
+ Inst.addOperand(MCOperand::createImm(static_cast<unsigned>(MAI->Addr)));
+ Inst.addOperand(MCOperand::createImm(static_cast<unsigned>(MAI->Mem)));
+ outputMCInst(Inst);
+}
+
+// Before the OpEntryPoints' output, we need to add the entry point's
+// interfaces. The interface is a list of IDs of global OpVariable instructions.
+// These declare the set of global variables from a module that form
+// the interface of this entry point.
+void SPIRVAsmPrinter::outputEntryPoints() {
+ // Find all OpVariable IDs with required StorageClass.
+ DenseSet<Register> InterfaceIDs;
+ for (MachineInstr *MI : MAI->GlobalVarList) {
+ assert(MI->getOpcode() == SPIRV::OpVariable);
+ auto SC = static_cast<SPIRV::StorageClass>(MI->getOperand(2).getImm());
+ // Before version 1.4, the interface's storage classes are limited to
+ // the Input and Output storage classes. Starting with version 1.4,
+ // the interface's storage classes are all storage classes used in
+ // declaring all global variables referenced by the entry point call tree.
+ if (ST->getSPIRVVersion() >= 14 || SC == SPIRV::StorageClass::Input ||
+ SC == SPIRV::StorageClass::Output) {
+ MachineFunction *MF = MI->getMF();
+ Register Reg = MAI->getRegisterAlias(MF, MI->getOperand(0).getReg());
+ InterfaceIDs.insert(Reg);
+ }
+ }
+
+ // Output OpEntryPoints adding interface args to all of them.
+ for (MachineInstr *MI : MAI->getMSInstrs(SPIRV::MB_EntryPoints)) {
+ SPIRVMCInstLower MCInstLowering;
+ MCInst TmpInst;
+ MCInstLowering.lower(MI, TmpInst, MAI);
+ for (Register Reg : InterfaceIDs) {
+ assert(Reg.isValid());
+ TmpInst.addOperand(MCOperand::createReg(Reg));
+ }
+ outputMCInst(TmpInst);
+ }
+}
+
+void SPIRVAsmPrinter::outputExtFuncDecls() {
+ // Insert OpFunctionEnd after each declaration.
+ SmallVectorImpl<MachineInstr *>::iterator
+ I = MAI->getMSInstrs(SPIRV::MB_ExtFuncDecls).begin(),
+ E = MAI->getMSInstrs(SPIRV::MB_ExtFuncDecls).end();
+ for (; I != E; ++I) {
+ outputInstruction(*I);
+ if ((I + 1) == E || (*(I + 1))->getOpcode() == SPIRV::OpFunction)
+ outputOpFunctionEnd();
+ }
+}
+
+void SPIRVAsmPrinter::outputModuleSections() {
+ const Module *M = MMI->getModule();
+ // Get the global subtarget to output module-level info.
+ ST = static_cast<const SPIRVTargetMachine &>(TM).getSubtargetImpl();
+ TII = ST->getInstrInfo();
+ MAI = &SPIRVModuleAnalysis::MAI;
+ assert(ST && TII && MAI && M && "Module analysis is required");
+ // Output instructions according to the Logical Layout of a Module:
+ // TODO: 1,2. All OpCapability instructions, then optional OpExtension
+ // instructions.
+ // TODO: 3. Optional OpExtInstImport instructions.
+ // 4. The single required OpMemoryModel instruction.
+ outputOpMemoryModel();
+ // 5. All entry point declarations, using OpEntryPoint.
+ outputEntryPoints();
+ // 6. Execution-mode declarations, using OpExecutionMode or OpExecutionModeId.
+ // TODO:
+ // 7a. Debug: all OpString, OpSourceExtension, OpSource, and
+ // OpSourceContinued, without forward references.
+ outputDebugSourceAndStrings(*M);
+ // 7b. Debug: all OpName and all OpMemberName.
+ outputModuleSection(SPIRV::MB_DebugNames);
+ // 7c. Debug: all OpModuleProcessed instructions.
+ outputModuleSection(SPIRV::MB_DebugModuleProcessed);
+ // 8. All annotation instructions (all decorations).
+ outputModuleSection(SPIRV::MB_Annotations);
+ // 9. All type declarations (OpTypeXXX instructions), all constant
+ // instructions, and all global variable declarations. This section is
+ // the first section to allow use of: OpLine and OpNoLine debug information;
+ // non-semantic instructions with OpExtInst.
+ outputModuleSection(SPIRV::MB_TypeConstVars);
+ // 10. All function declarations (functions without a body).
+ outputExtFuncDecls();
+ // 11. All function definitions (functions with a body).
+ // This is done in regular function output.
+}
+
+bool SPIRVAsmPrinter::doInitialization(Module &M) {
+ ModuleSectionsEmitted = false;
+ // We need to call the parent's one explicitly.
+ return AsmPrinter::doInitialization(M);
+}
+
+// Force static initialization.
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSPIRVAsmPrinter() {
+ RegisterAsmPrinter<SPIRVAsmPrinter> X(getTheSPIRV32Target());
+ RegisterAsmPrinter<SPIRVAsmPrinter> Y(getTheSPIRV64Target());
+}
diff --git a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
new file mode 100644
index 000000000000..df07a126eeea
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
@@ -0,0 +1,223 @@
+//===--- SPIRVCallLowering.cpp - Call lowering ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the lowering of LLVM calls to machine code calls for
+// GlobalISel.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPIRVCallLowering.h"
+#include "MCTargetDesc/SPIRVBaseInfo.h"
+#include "SPIRV.h"
+#include "SPIRVGlobalRegistry.h"
+#include "SPIRVISelLowering.h"
+#include "SPIRVRegisterInfo.h"
+#include "SPIRVSubtarget.h"
+#include "SPIRVUtils.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+
+using namespace llvm;
+
+SPIRVCallLowering::SPIRVCallLowering(const SPIRVTargetLowering &TLI,
+ const SPIRVSubtarget &ST,
+ SPIRVGlobalRegistry *GR)
+ : CallLowering(&TLI), ST(ST), GR(GR) {}
+
+bool SPIRVCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
+ const Value *Val, ArrayRef<Register> VRegs,
+ FunctionLoweringInfo &FLI,
+ Register SwiftErrorVReg) const {
+ // Currently all return types should use a single register.
+ // TODO: handle the case of multiple registers.
+ if (VRegs.size() > 1)
+ return false;
+ if (Val)
+ return MIRBuilder.buildInstr(SPIRV::OpReturnValue)
+ .addUse(VRegs[0])
+ .constrainAllUses(MIRBuilder.getTII(), *ST.getRegisterInfo(),
+ *ST.getRegBankInfo());
+ MIRBuilder.buildInstr(SPIRV::OpReturn);
+ return true;
+}
+
+// Based on the LLVM function attributes, get a SPIR-V FunctionControl.
+static uint32_t getFunctionControl(const Function &F) {
+ uint32_t FuncControl = static_cast<uint32_t>(SPIRV::FunctionControl::None);
+ if (F.hasFnAttribute(Attribute::AttrKind::AlwaysInline)) {
+ FuncControl |= static_cast<uint32_t>(SPIRV::FunctionControl::Inline);
+ }
+ if (F.hasFnAttribute(Attribute::AttrKind::ReadNone)) {
+ FuncControl |= static_cast<uint32_t>(SPIRV::FunctionControl::Pure);
+ }
+ if (F.hasFnAttribute(Attribute::AttrKind::ReadOnly)) {
+ FuncControl |= static_cast<uint32_t>(SPIRV::FunctionControl::Const);
+ }
+ if (F.hasFnAttribute(Attribute::AttrKind::NoInline)) {
+ FuncControl |= static_cast<uint32_t>(SPIRV::FunctionControl::DontInline);
+ }
+ return FuncControl;
+}
+
+bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
+ const Function &F,
+ ArrayRef<ArrayRef<Register>> VRegs,
+ FunctionLoweringInfo &FLI) const {
+ assert(GR && "Must initialize the SPIRV type registry before lowering args.");
+
+ // Assign types and names to all args, and store their types for later.
+ SmallVector<Register, 4> ArgTypeVRegs;
+ if (VRegs.size() > 0) {
+ unsigned i = 0;
+ for (const auto &Arg : F.args()) {
+ // Currently formal args should use single registers.
+ // TODO: handle the case of multiple registers.
+ if (VRegs[i].size() > 1)
+ return false;
+ auto *SpirvTy =
+ GR->assignTypeToVReg(Arg.getType(), VRegs[i][0], MIRBuilder);
+ ArgTypeVRegs.push_back(GR->getSPIRVTypeID(SpirvTy));
+
+ if (Arg.hasName())
+ buildOpName(VRegs[i][0], Arg.getName(), MIRBuilder);
+ if (Arg.getType()->isPointerTy()) {
+ auto DerefBytes = static_cast<unsigned>(Arg.getDereferenceableBytes());
+ if (DerefBytes != 0)
+ buildOpDecorate(VRegs[i][0], MIRBuilder,
+ SPIRV::Decoration::MaxByteOffset, {DerefBytes});
+ }
+ if (Arg.hasAttribute(Attribute::Alignment)) {
+ buildOpDecorate(VRegs[i][0], MIRBuilder, SPIRV::Decoration::Alignment,
+ {static_cast<unsigned>(Arg.getParamAlignment())});
+ }
+ if (Arg.hasAttribute(Attribute::ReadOnly)) {
+ auto Attr =
+ static_cast<unsigned>(SPIRV::FunctionParameterAttribute::NoWrite);
+ buildOpDecorate(VRegs[i][0], MIRBuilder,
+ SPIRV::Decoration::FuncParamAttr, {Attr});
+ }
+ if (Arg.hasAttribute(Attribute::ZExt)) {
+ auto Attr =
+ static_cast<unsigned>(SPIRV::FunctionParameterAttribute::Zext);
+ buildOpDecorate(VRegs[i][0], MIRBuilder,
+ SPIRV::Decoration::FuncParamAttr, {Attr});
+ }
+ ++i;
+ }
+ }
+
+ // Generate a SPIR-V type for the function.
+ auto MRI = MIRBuilder.getMRI();
+ Register FuncVReg = MRI->createGenericVirtualRegister(LLT::scalar(32));
+ MRI->setRegClass(FuncVReg, &SPIRV::IDRegClass);
+
+ auto *FTy = F.getFunctionType();
+ auto FuncTy = GR->assignTypeToVReg(FTy, FuncVReg, MIRBuilder);
+
+ // Build the OpTypeFunction declaring it.
+ Register ReturnTypeID = FuncTy->getOperand(1).getReg();
+ uint32_t FuncControl = getFunctionControl(F);
+
+ MIRBuilder.buildInstr(SPIRV::OpFunction)
+ .addDef(FuncVReg)
+ .addUse(ReturnTypeID)
+ .addImm(FuncControl)
+ .addUse(GR->getSPIRVTypeID(FuncTy));
+
+ // Add OpFunctionParameters.
+ const unsigned NumArgs = ArgTypeVRegs.size();
+ for (unsigned i = 0; i < NumArgs; ++i) {
+ assert(VRegs[i].size() == 1 && "Formal arg has multiple vregs");
+ MRI->setRegClass(VRegs[i][0], &SPIRV::IDRegClass);
+ MIRBuilder.buildInstr(SPIRV::OpFunctionParameter)
+ .addDef(VRegs[i][0])
+ .addUse(ArgTypeVRegs[i]);
+ }
+ // Name the function.
+ if (F.hasName())
+ buildOpName(FuncVReg, F.getName(), MIRBuilder);
+
+ // Handle entry points and function linkage.
+ if (F.getCallingConv() == CallingConv::SPIR_KERNEL) {
+ auto MIB = MIRBuilder.buildInstr(SPIRV::OpEntryPoint)
+ .addImm(static_cast<uint32_t>(SPIRV::ExecutionModel::Kernel))
+ .addUse(FuncVReg);
+ addStringImm(F.getName(), MIB);
+ } else if (F.getLinkage() == GlobalValue::LinkageTypes::ExternalLinkage ||
+ F.getLinkage() == GlobalValue::LinkOnceODRLinkage) {
+ auto LnkTy = F.isDeclaration() ? SPIRV::LinkageType::Import
+ : SPIRV::LinkageType::Export;
+ buildOpDecorate(FuncVReg, MIRBuilder, SPIRV::Decoration::LinkageAttributes,
+ {static_cast<uint32_t>(LnkTy)}, F.getGlobalIdentifier());
+ }
+
+ return true;
+}
+
+bool SPIRVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
+ CallLoweringInfo &Info) const {
+ // Currently call returns should have single vregs.
+ // TODO: handle the case of multiple registers.
+ if (Info.OrigRet.Regs.size() > 1)
+ return false;
+
+ Register ResVReg =
+ Info.OrigRet.Regs.empty() ? Register(0) : Info.OrigRet.Regs[0];
+ // Emit a regular OpFunctionCall. If it's an externally declared function,
+ // be sure to emit its type and function declaration here. It will be
+ // hoisted globally later.
+ if (Info.Callee.isGlobal()) {
+ auto *CF = dyn_cast_or_null<const Function>(Info.Callee.getGlobal());
+ // TODO: support constexpr casts and indirect calls.
+ if (CF == nullptr)
+ return false;
+ if (CF->isDeclaration()) {
+ // Emit the type info and forward function declaration to the first MBB
+ // to ensure VReg definition dependencies are valid across all MBBs.
+ MachineBasicBlock::iterator OldII = MIRBuilder.getInsertPt();
+ MachineBasicBlock &OldBB = MIRBuilder.getMBB();
+ MachineBasicBlock &FirstBB = *MIRBuilder.getMF().getBlockNumbered(0);
+ MIRBuilder.setInsertPt(FirstBB, FirstBB.instr_end());
+
+ SmallVector<ArrayRef<Register>, 8> VRegArgs;
+ SmallVector<SmallVector<Register, 1>, 8> ToInsert;
+ for (const Argument &Arg : CF->args()) {
+ if (MIRBuilder.getDataLayout().getTypeStoreSize(Arg.getType()).isZero())
+ continue; // Don't handle zero sized types.
+ ToInsert.push_back({MIRBuilder.getMRI()->createGenericVirtualRegister(
+ LLT::scalar(32))});
+ VRegArgs.push_back(ToInsert.back());
+ }
+ // TODO: Reuse FunctionLoweringInfo.
+ FunctionLoweringInfo FuncInfo;
+ lowerFormalArguments(MIRBuilder, *CF, VRegArgs, FuncInfo);
+ MIRBuilder.setInsertPt(OldBB, OldII);
+ }
+ }
+
+ // Make sure there's a valid return reg, even for functions returning void.
+ if (!ResVReg.isValid()) {
+ ResVReg = MIRBuilder.getMRI()->createVirtualRegister(&SPIRV::IDRegClass);
+ }
+ SPIRVType *RetType =
+ GR->assignTypeToVReg(Info.OrigRet.Ty, ResVReg, MIRBuilder);
+
+ // Emit the OpFunctionCall and its args.
+ auto MIB = MIRBuilder.buildInstr(SPIRV::OpFunctionCall)
+ .addDef(ResVReg)
+ .addUse(GR->getSPIRVTypeID(RetType))
+ .add(Info.Callee);
+
+ for (const auto &Arg : Info.OrigArgs) {
+ // Currently call args should have single vregs.
+ if (Arg.Regs.size() > 1)
+ return false;
+ MIB.addUse(Arg.Regs[0]);
+ }
+ return MIB.constrainAllUses(MIRBuilder.getTII(), *ST.getRegisterInfo(),
+ *ST.getRegBankInfo());
+}
diff --git a/llvm/lib/Target/SPIRV/SPIRVCallLowering.h b/llvm/lib/Target/SPIRV/SPIRVCallLowering.h
new file mode 100644
index 000000000000..c179bb35154b
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRVCallLowering.h
@@ -0,0 +1,50 @@
+//===--- SPIRVCallLowering.h - Call lowering --------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes how to lower LLVM calls to machine code calls.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SPIRV_SPIRVCALLLOWERING_H
+#define LLVM_LIB_TARGET_SPIRV_SPIRVCALLLOWERING_H
+
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+
+namespace llvm {
+
+class SPIRVGlobalRegistry;
+class SPIRVSubtarget;
+class SPIRVTargetLowering;
+
+class SPIRVCallLowering : public CallLowering {
+private:
+ const SPIRVSubtarget &ST;
+ // Used to create and assign function, argument, and return type information.
+ SPIRVGlobalRegistry *GR;
+
+public:
+ SPIRVCallLowering(const SPIRVTargetLowering &TLI, const SPIRVSubtarget &ST,
+ SPIRVGlobalRegistry *GR);
+
+ // Built OpReturn or OpReturnValue.
+ bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val,
+ ArrayRef<Register> VRegs, FunctionLoweringInfo &FLI,
+ Register SwiftErrorVReg) const override;
+
+ // Build OpFunction, OpFunctionParameter, and any EntryPoint or Linkage data.
+ bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
+ ArrayRef<ArrayRef<Register>> VRegs,
+ FunctionLoweringInfo &FLI) const override;
+
+ // Build OpCall, or replace with a builtin function.
+ bool lowerCall(MachineIRBuilder &MIRBuilder,
+ CallLoweringInfo &Info) const override;
+};
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_SPIRV_SPIRVCALLLOWERING_H
diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
new file mode 100644
index 000000000000..9624482e3622
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -0,0 +1,433 @@
+//===-- SPIRVEmitIntrinsics.cpp - emit SPIRV intrinsics ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The pass emits SPIRV intrinsics keeping essential high-level information for
+// the translation of LLVM IR to SPIR-V.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPIRV.h"
+#include "SPIRVTargetMachine.h"
+#include "SPIRVUtils.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/IntrinsicsSPIRV.h"
+
+#include <queue>
+
+// This pass performs the following transformation on LLVM IR level required
+// for the following translation to SPIR-V:
+// - replaces direct usages of aggregate constants with target-specific
+// intrinsics;
+// - replaces aggregates-related instructions (extract/insert, ld/st, etc)
+// with a target-specific intrinsics;
+// - emits intrinsics for the global variable initializers since IRTranslator
+// doesn't handle them and it's not very convenient to translate them
+// ourselves;
+// - emits intrinsics to keep track of the string names assigned to the values;
+// - emits intrinsics to keep track of constants (this is necessary to have an
+// LLVM IR constant after the IRTranslation is completed) for their further
+// deduplication;
+// - emits intrinsics to keep track of original LLVM types of the values
+// to be able to emit proper SPIR-V types eventually.
+//
+// TODO: consider removing spv.track.constant in favor of spv.assign.type.
+
+using namespace llvm;
+
+namespace llvm {
+void initializeSPIRVEmitIntrinsicsPass(PassRegistry &);
+} // namespace llvm
+
+namespace {
+class SPIRVEmitIntrinsics
+ : public FunctionPass,
+ public InstVisitor<SPIRVEmitIntrinsics, Instruction *> {
+ SPIRVTargetMachine *TM = nullptr;
+ IRBuilder<> *IRB = nullptr;
+ Function *F = nullptr;
+ bool TrackConstants = true;
+ DenseMap<Instruction *, Constant *> AggrConsts;
+ DenseSet<Instruction *> AggrStores;
+ void preprocessCompositeConstants();
+ CallInst *buildIntrWithMD(Intrinsic::ID IntrID, ArrayRef<Type *> Types,
+ Value *Arg, Value *Arg2) {
+ ConstantAsMetadata *CM = ValueAsMetadata::getConstant(Arg);
+ MDTuple *TyMD = MDNode::get(F->getContext(), CM);
+ MetadataAsValue *VMD = MetadataAsValue::get(F->getContext(), TyMD);
+ return IRB->CreateIntrinsic(IntrID, {Types}, {Arg2, VMD});
+ }
+ void replaceMemInstrUses(Instruction *Old, Instruction *New);
+ void processInstrAfterVisit(Instruction *I);
+ void insertAssignTypeIntrs(Instruction *I);
+ void processGlobalValue(GlobalVariable &GV);
+
+public:
+ static char ID;
+ SPIRVEmitIntrinsics() : FunctionPass(ID) {
+ initializeSPIRVEmitIntrinsicsPass(*PassRegistry::getPassRegistry());
+ }
+ SPIRVEmitIntrinsics(SPIRVTargetMachine *_TM) : FunctionPass(ID), TM(_TM) {
+ initializeSPIRVEmitIntrinsicsPass(*PassRegistry::getPassRegistry());
+ }
+ Instruction *visitInstruction(Instruction &I) { return &I; }
+ Instruction *visitSwitchInst(SwitchInst &I);
+ Instruction *visitGetElementPtrInst(GetElementPtrInst &I);
+ Instruction *visitBitCastInst(BitCastInst &I);
+ Instruction *visitInsertElementInst(InsertElementInst &I);
+ Instruction *visitExtractElementInst(ExtractElementInst &I);
+ Instruction *visitInsertValueInst(InsertValueInst &I);
+ Instruction *visitExtractValueInst(ExtractValueInst &I);
+ Instruction *visitLoadInst(LoadInst &I);
+ Instruction *visitStoreInst(StoreInst &I);
+ Instruction *visitAllocaInst(AllocaInst &I);
+ bool runOnFunction(Function &F) override;
+};
+} // namespace
+
+char SPIRVEmitIntrinsics::ID = 0;
+
+INITIALIZE_PASS(SPIRVEmitIntrinsics, "emit-intrinsics", "SPIRV emit intrinsics",
+ false, false)
+
+static inline bool isAssignTypeInstr(const Instruction *I) {
+ return isa<IntrinsicInst>(I) &&
+ cast<IntrinsicInst>(I)->getIntrinsicID() == Intrinsic::spv_assign_type;
+}
+
+static bool isMemInstrToReplace(Instruction *I) {
+ return isa<StoreInst>(I) || isa<LoadInst>(I) || isa<InsertValueInst>(I) ||
+ isa<ExtractValueInst>(I);
+}
+
+static bool isAggrToReplace(const Value *V) {
+ return isa<ConstantAggregate>(V) || isa<ConstantDataArray>(V) ||
+ (isa<ConstantAggregateZero>(V) && !V->getType()->isVectorTy());
+}
+
+static void setInsertPointSkippingPhis(IRBuilder<> &B, Instruction *I) {
+ if (isa<PHINode>(I))
+ B.SetInsertPoint(I->getParent(), I->getParent()->getFirstInsertionPt());
+ else
+ B.SetInsertPoint(I);
+}
+
+static bool requireAssignType(Instruction *I) {
+ IntrinsicInst *Intr = dyn_cast<IntrinsicInst>(I);
+ if (Intr) {
+ switch (Intr->getIntrinsicID()) {
+ case Intrinsic::invariant_start:
+ case Intrinsic::invariant_end:
+ return false;
+ }
+ }
+ return true;
+}
+
+void SPIRVEmitIntrinsics::replaceMemInstrUses(Instruction *Old,
+ Instruction *New) {
+ while (!Old->user_empty()) {
+ auto *U = Old->user_back();
+ if (isMemInstrToReplace(U) || isa<ReturnInst>(U)) {
+ U->replaceUsesOfWith(Old, New);
+ } else if (isAssignTypeInstr(U)) {
+ IRB->SetInsertPoint(U);
+ SmallVector<Value *, 2> Args = {New, U->getOperand(1)};
+ IRB->CreateIntrinsic(Intrinsic::spv_assign_type, {New->getType()}, Args);
+ U->eraseFromParent();
+ } else {
+ llvm_unreachable("illegal aggregate intrinsic user");
+ }
+ }
+ Old->eraseFromParent();
+}
+
+void SPIRVEmitIntrinsics::preprocessCompositeConstants() {
+ std::queue<Instruction *> Worklist;
+ for (auto &I : instructions(F))
+ Worklist.push(&I);
+
+ while (!Worklist.empty()) {
+ auto *I = Worklist.front();
+ assert(I);
+ bool KeepInst = false;
+ for (const auto &Op : I->operands()) {
+ auto BuildCompositeIntrinsic = [&KeepInst, &Worklist, &I, &Op,
+ this](Constant *AggrC,
+ ArrayRef<Value *> Args) {
+ IRB->SetInsertPoint(I);
+ auto *CCI =
+ IRB->CreateIntrinsic(Intrinsic::spv_const_composite, {}, {Args});
+ Worklist.push(CCI);
+ I->replaceUsesOfWith(Op, CCI);
+ KeepInst = true;
+ AggrConsts[CCI] = AggrC;
+ };
+
+ if (auto *AggrC = dyn_cast<ConstantAggregate>(Op)) {
+ SmallVector<Value *> Args(AggrC->op_begin(), AggrC->op_end());
+ BuildCompositeIntrinsic(AggrC, Args);
+ } else if (auto *AggrC = dyn_cast<ConstantDataArray>(Op)) {
+ SmallVector<Value *> Args;
+ for (unsigned i = 0; i < AggrC->getNumElements(); ++i)
+ Args.push_back(AggrC->getElementAsConstant(i));
+ BuildCompositeIntrinsic(AggrC, Args);
+ } else if (isa<ConstantAggregateZero>(Op) &&
+ !Op->getType()->isVectorTy()) {
+ auto *AggrC = cast<ConstantAggregateZero>(Op);
+ SmallVector<Value *> Args(AggrC->op_begin(), AggrC->op_end());
+ BuildCompositeIntrinsic(AggrC, Args);
+ }
+ }
+ if (!KeepInst)
+ Worklist.pop();
+ }
+}
+
+Instruction *SPIRVEmitIntrinsics::visitSwitchInst(SwitchInst &I) {
+ SmallVector<Value *, 4> Args;
+ for (auto &Op : I.operands())
+ if (Op.get()->getType()->isSized())
+ Args.push_back(Op);
+ IRB->CreateIntrinsic(Intrinsic::spv_switch, {I.getOperand(0)->getType()},
+ {Args});
+ return &I;
+}
+
+Instruction *SPIRVEmitIntrinsics::visitGetElementPtrInst(GetElementPtrInst &I) {
+ SmallVector<Type *, 2> Types = {I.getType(), I.getOperand(0)->getType()};
+ SmallVector<Value *, 4> Args;
+ Args.push_back(IRB->getInt1(I.isInBounds()));
+ for (auto &Op : I.operands())
+ Args.push_back(Op);
+ auto *NewI = IRB->CreateIntrinsic(Intrinsic::spv_gep, {Types}, {Args});
+ I.replaceAllUsesWith(NewI);
+ I.eraseFromParent();
+ return NewI;
+}
+
+Instruction *SPIRVEmitIntrinsics::visitBitCastInst(BitCastInst &I) {
+ SmallVector<Type *, 2> Types = {I.getType(), I.getOperand(0)->getType()};
+ SmallVector<Value *> Args(I.op_begin(), I.op_end());
+ auto *NewI = IRB->CreateIntrinsic(Intrinsic::spv_bitcast, {Types}, {Args});
+ std::string InstName = I.hasName() ? I.getName().str() : "";
+ I.replaceAllUsesWith(NewI);
+ I.eraseFromParent();
+ NewI->setName(InstName);
+ return NewI;
+}
+
+Instruction *SPIRVEmitIntrinsics::visitInsertElementInst(InsertElementInst &I) {
+ SmallVector<Type *, 4> Types = {I.getType(), I.getOperand(0)->getType(),
+ I.getOperand(1)->getType(),
+ I.getOperand(2)->getType()};
+ SmallVector<Value *> Args(I.op_begin(), I.op_end());
+ auto *NewI = IRB->CreateIntrinsic(Intrinsic::spv_insertelt, {Types}, {Args});
+ std::string InstName = I.hasName() ? I.getName().str() : "";
+ I.replaceAllUsesWith(NewI);
+ I.eraseFromParent();
+ NewI->setName(InstName);
+ return NewI;
+}
+
+Instruction *
+SPIRVEmitIntrinsics::visitExtractElementInst(ExtractElementInst &I) {
+ SmallVector<Type *, 3> Types = {I.getType(), I.getVectorOperandType(),
+ I.getIndexOperand()->getType()};
+ SmallVector<Value *, 2> Args = {I.getVectorOperand(), I.getIndexOperand()};
+ auto *NewI = IRB->CreateIntrinsic(Intrinsic::spv_extractelt, {Types}, {Args});
+ std::string InstName = I.hasName() ? I.getName().str() : "";
+ I.replaceAllUsesWith(NewI);
+ I.eraseFromParent();
+ NewI->setName(InstName);
+ return NewI;
+}
+
+Instruction *SPIRVEmitIntrinsics::visitInsertValueInst(InsertValueInst &I) {
+ SmallVector<Type *, 1> Types = {I.getInsertedValueOperand()->getType()};
+ SmallVector<Value *> Args;
+ for (auto &Op : I.operands())
+ if (isa<UndefValue>(Op))
+ Args.push_back(UndefValue::get(IRB->getInt32Ty()));
+ else
+ Args.push_back(Op);
+ for (auto &Op : I.indices())
+ Args.push_back(IRB->getInt32(Op));
+ Instruction *NewI =
+ IRB->CreateIntrinsic(Intrinsic::spv_insertv, {Types}, {Args});
+ replaceMemInstrUses(&I, NewI);
+ return NewI;
+}
+
+Instruction *SPIRVEmitIntrinsics::visitExtractValueInst(ExtractValueInst &I) {
+ SmallVector<Value *> Args;
+ for (auto &Op : I.operands())
+ Args.push_back(Op);
+ for (auto &Op : I.indices())
+ Args.push_back(IRB->getInt32(Op));
+ auto *NewI =
+ IRB->CreateIntrinsic(Intrinsic::spv_extractv, {I.getType()}, {Args});
+ I.replaceAllUsesWith(NewI);
+ I.eraseFromParent();
+ return NewI;
+}
+
+Instruction *SPIRVEmitIntrinsics::visitLoadInst(LoadInst &I) {
+ if (!I.getType()->isAggregateType())
+ return &I;
+ TrackConstants = false;
+ const auto *TLI = TM->getSubtargetImpl()->getTargetLowering();
+ MachineMemOperand::Flags Flags =
+ TLI->getLoadMemOperandFlags(I, F->getParent()->getDataLayout());
+ auto *NewI =
+ IRB->CreateIntrinsic(Intrinsic::spv_load, {I.getOperand(0)->getType()},
+ {I.getPointerOperand(), IRB->getInt16(Flags),
+ IRB->getInt8(I.getAlign().value())});
+ replaceMemInstrUses(&I, NewI);
+ return NewI;
+}
+
+Instruction *SPIRVEmitIntrinsics::visitStoreInst(StoreInst &I) {
+ if (!AggrStores.contains(&I))
+ return &I;
+ TrackConstants = false;
+ const auto *TLI = TM->getSubtargetImpl()->getTargetLowering();
+ MachineMemOperand::Flags Flags =
+ TLI->getStoreMemOperandFlags(I, F->getParent()->getDataLayout());
+ auto *PtrOp = I.getPointerOperand();
+ auto *NewI =
+ IRB->CreateIntrinsic(Intrinsic::spv_store, {PtrOp->getType()},
+ {I.getValueOperand(), PtrOp, IRB->getInt16(Flags),
+ IRB->getInt8(I.getAlign().value())});
+ I.eraseFromParent();
+ return NewI;
+}
+
+Instruction *SPIRVEmitIntrinsics::visitAllocaInst(AllocaInst &I) {
+ TrackConstants = false;
+ return &I;
+}
+
+void SPIRVEmitIntrinsics::processGlobalValue(GlobalVariable &GV) {
+ // Skip special artifical variable llvm.global.annotations.
+ if (GV.getName() == "llvm.global.annotations")
+ return;
+ if (GV.hasInitializer() && !isa<UndefValue>(GV.getInitializer())) {
+ Constant *Init = GV.getInitializer();
+ Type *Ty = isAggrToReplace(Init) ? IRB->getInt32Ty() : Init->getType();
+ Constant *Const = isAggrToReplace(Init) ? IRB->getInt32(1) : Init;
+ auto *InitInst = IRB->CreateIntrinsic(Intrinsic::spv_init_global,
+ {GV.getType(), Ty}, {&GV, Const});
+ InitInst->setArgOperand(1, Init);
+ }
+ if ((!GV.hasInitializer() || isa<UndefValue>(GV.getInitializer())) &&
+ GV.getNumUses() == 0)
+ IRB->CreateIntrinsic(Intrinsic::spv_unref_global, GV.getType(), &GV);
+}
+
+void SPIRVEmitIntrinsics::insertAssignTypeIntrs(Instruction *I) {
+ Type *Ty = I->getType();
+ if (!Ty->isVoidTy() && requireAssignType(I)) {
+ setInsertPointSkippingPhis(*IRB, I->getNextNode());
+ Type *TypeToAssign = Ty;
+ if (auto *II = dyn_cast<IntrinsicInst>(I)) {
+ if (II->getIntrinsicID() == Intrinsic::spv_const_composite) {
+ auto t = AggrConsts.find(II);
+ assert(t != AggrConsts.end());
+ TypeToAssign = t->second->getType();
+ }
+ }
+ Constant *Const = Constant::getNullValue(TypeToAssign);
+ buildIntrWithMD(Intrinsic::spv_assign_type, {Ty}, Const, I);
+ }
+ for (const auto &Op : I->operands()) {
+ if (isa<ConstantPointerNull>(Op) || isa<UndefValue>(Op) ||
+ // Check GetElementPtrConstantExpr case.
+ (isa<ConstantExpr>(Op) && isa<GEPOperator>(Op))) {
+ IRB->SetInsertPoint(I);
+ buildIntrWithMD(Intrinsic::spv_assign_type, {Op->getType()}, Op, Op);
+ }
+ }
+ // StoreInst's operand type can be changed in the next stage so we need to
+ // store it in the set.
+ if (isa<StoreInst>(I) &&
+ cast<StoreInst>(I)->getValueOperand()->getType()->isAggregateType())
+ AggrStores.insert(I);
+}
+
+void SPIRVEmitIntrinsics::processInstrAfterVisit(Instruction *I) {
+ auto *II = dyn_cast<IntrinsicInst>(I);
+ if (II && II->getIntrinsicID() == Intrinsic::spv_const_composite &&
+ TrackConstants) {
+ IRB->SetInsertPoint(I->getNextNode());
+ Type *Ty = IRB->getInt32Ty();
+ auto t = AggrConsts.find(I);
+ assert(t != AggrConsts.end());
+ auto *NewOp =
+ buildIntrWithMD(Intrinsic::spv_track_constant, {Ty, Ty}, t->second, I);
+ I->replaceAllUsesWith(NewOp);
+ NewOp->setArgOperand(0, I);
+ }
+ for (const auto &Op : I->operands()) {
+ if ((isa<ConstantAggregateZero>(Op) && Op->getType()->isVectorTy()) ||
+ isa<PHINode>(I) || isa<SwitchInst>(I))
+ TrackConstants = false;
+ if (isa<ConstantData>(Op) && TrackConstants) {
+ unsigned OpNo = Op.getOperandNo();
+ if (II && ((II->getIntrinsicID() == Intrinsic::spv_gep && OpNo == 0) ||
+ (II->paramHasAttr(OpNo, Attribute::ImmArg))))
+ continue;
+ IRB->SetInsertPoint(I);
+ auto *NewOp = buildIntrWithMD(Intrinsic::spv_track_constant,
+ {Op->getType(), Op->getType()}, Op, Op);
+ I->setOperand(OpNo, NewOp);
+ }
+ }
+ if (I->hasName()) {
+ setInsertPointSkippingPhis(*IRB, I->getNextNode());
+ std::vector<Value *> Args = {I};
+ addStringImm(I->getName(), *IRB, Args);
+ IRB->CreateIntrinsic(Intrinsic::spv_assign_name, {I->getType()}, Args);
+ }
+}
+
+bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) {
+ if (Func.isDeclaration())
+ return false;
+ F = &Func;
+ IRB = new IRBuilder<>(Func.getContext());
+ AggrConsts.clear();
+ AggrStores.clear();
+
+ IRB->SetInsertPoint(&Func.getEntryBlock().front());
+
+ for (auto &GV : Func.getParent()->globals())
+ processGlobalValue(GV);
+
+ preprocessCompositeConstants();
+ SmallVector<Instruction *> Worklist;
+ for (auto &I : instructions(Func))
+ Worklist.push_back(&I);
+
+ for (auto &I : Worklist)
+ insertAssignTypeIntrs(I);
+
+ for (auto *I : Worklist) {
+ TrackConstants = true;
+ if (!I->getType()->isVoidTy() || isa<StoreInst>(I))
+ IRB->SetInsertPoint(I->getNextNode());
+ I = visit(*I);
+ processInstrAfterVisit(I);
+ }
+ return true;
+}
+
+FunctionPass *llvm::createSPIRVEmitIntrinsicsPass(SPIRVTargetMachine *TM) {
+ return new SPIRVEmitIntrinsics(TM);
+}
diff --git a/llvm/lib/Target/SPIRV/SPIRVEnums.td b/llvm/lib/Target/SPIRV/SPIRVEnums.td
new file mode 100644
index 000000000000..1d0c6ffd6e37
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRVEnums.td
@@ -0,0 +1,51 @@
+//===-- SPIRVEnums.td - Describe SPIRV Enum Operands -------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// All SPIRV enums defined in SPIRVBaseInfo.h should have a corresponding enum
+// operand here. This enables the correct PrintMethod to be defined so
+// its name or mask bits can be automatically printed in SPIRVInstPrinter
+// when referred to in SPIRVInstrInfo.td.
+//
+//===----------------------------------------------------------------------===//
+
+class EnumOperand<string Name> : Operand<i32>{
+ let PrintMethod = "print"#Name;
+}
+
+def ExtInst : EnumOperand<"ExtInst">;
+
+def Capability : EnumOperand<"Capability">;
+def SourceLanguage : EnumOperand<"SourceLanguage">;
+def ExecutionModel : EnumOperand<"ExecutionModel">;
+def AddressingModel : EnumOperand<"AddressingModel">;
+def MemoryModel : EnumOperand<"MemoryModel">;
+def ExecutionMode : EnumOperand<"ExecutionMode">;
+def StorageClass : EnumOperand<"StorageClass">;
+def Dim : EnumOperand<"Dim">;
+def SamplerAddressingMode : EnumOperand<"SamplerAddressingMode">;
+def SamplerFilterMode : EnumOperand<"SamplerFilterMode">;
+def ImageFormat : EnumOperand<"ImageFormat">;
+def ImageChannelOrder : EnumOperand<"ImageChannelOrder">;
+def ImageChannelDataType : EnumOperand<"ImageChannelDataType">;
+def ImageOperand : EnumOperand<"ImageOperand">;
+def FPFastMathMode : EnumOperand<"FPFastMathMode">;
+def FProundingMode : EnumOperand<"FPRoundingMode">;
+def LinkageType : EnumOperand<"LinkageType">;
+def AccessQualifier : EnumOperand<"AccessQualifier">;
+def FunctionParameterAttribute : EnumOperand<"FunctionParameterAttribute">;
+def Decoration : EnumOperand<"Decoration">;
+def Builtin : EnumOperand<"Builtin">;
+def SelectionControl: EnumOperand<"SelectionControl">;
+def LoopControl: EnumOperand<"LoopControl">;
+def FunctionControl : EnumOperand<"FunctionControl">;
+def MemorySemantics : EnumOperand<"MemorySemantics">;
+def MemoryOperand : EnumOperand<"MemoryOperand">;
+def Scope : EnumOperand<"Scope">;
+def GroupOperation : EnumOperand<"GroupOperation">;
+def KernelEnqueueFlags : EnumOperand<"KernelEnqueueFlags">;
+def KernelProfilingInfo : EnumOperand<"KernelProfilingInfo">;
diff --git a/llvm/lib/Target/SPIRV/SPIRVFrameLowering.h b/llvm/lib/Target/SPIRV/SPIRVFrameLowering.h
new file mode 100644
index 000000000000..b98f8d0928e5
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRVFrameLowering.h
@@ -0,0 +1,39 @@
+//===-- SPIRVFrameLowering.h - Define frame lowering for SPIR-V -*- C++-*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements SPIRV-specific bits of TargetFrameLowering class.
+// The target uses only virtual registers. It does not operate with stack frame
+// explicitly and does not generate prologues/epilogues of functions.
+// As a result, we are not required to implemented the frame lowering
+// functionality substantially.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SPIRV_SPIRVFRAMELOWERING_H
+#define LLVM_LIB_TARGET_SPIRV_SPIRVFRAMELOWERING_H
+
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/Support/Alignment.h"
+
+namespace llvm {
+class SPIRVSubtarget;
+
+class SPIRVFrameLowering : public TargetFrameLowering {
+public:
+ explicit SPIRVFrameLowering(const SPIRVSubtarget &sti)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(8), 0) {}
+
+ void emitPrologue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const override {}
+ void emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const override {}
+
+ bool hasFP(const MachineFunction &MF) const override { return false; }
+};
+} // namespace llvm
+#endif // LLVM_LIB_TARGET_SPIRV_SPIRVFRAMELOWERING_H
diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
new file mode 100644
index 000000000000..02a6905a1abc
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
@@ -0,0 +1,459 @@
+//===-- SPIRVGlobalRegistry.cpp - SPIR-V Global Registry --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the SPIRVGlobalRegistry class,
+// which is used to maintain rich type information required for SPIR-V even
+// after lowering from LLVM IR to GMIR. It can convert an llvm::Type into
+// an OpTypeXXX instruction, and map it to a virtual register. Also it builds
+// and supports consistency of constants and global variables.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPIRVGlobalRegistry.h"
+#include "SPIRV.h"
+#include "SPIRVSubtarget.h"
+#include "SPIRVTargetMachine.h"
+#include "SPIRVUtils.h"
+
+using namespace llvm;
+SPIRVGlobalRegistry::SPIRVGlobalRegistry(unsigned PointerSize)
+ : PointerSize(PointerSize) {}
+
+SPIRVType *SPIRVGlobalRegistry::assignTypeToVReg(
+ const Type *Type, Register VReg, MachineIRBuilder &MIRBuilder,
+ SPIRV::AccessQualifier AccessQual, bool EmitIR) {
+
+ SPIRVType *SpirvType =
+ getOrCreateSPIRVType(Type, MIRBuilder, AccessQual, EmitIR);
+ assignSPIRVTypeToVReg(SpirvType, VReg, MIRBuilder.getMF());
+ return SpirvType;
+}
+
+void SPIRVGlobalRegistry::assignSPIRVTypeToVReg(SPIRVType *SpirvType,
+ Register VReg,
+ MachineFunction &MF) {
+ VRegToTypeMap[&MF][VReg] = SpirvType;
+}
+
+static Register createTypeVReg(MachineIRBuilder &MIRBuilder) {
+ auto &MRI = MIRBuilder.getMF().getRegInfo();
+ auto Res = MRI.createGenericVirtualRegister(LLT::scalar(32));
+ MRI.setRegClass(Res, &SPIRV::TYPERegClass);
+ return Res;
+}
+
+static Register createTypeVReg(MachineRegisterInfo &MRI) {
+ auto Res = MRI.createGenericVirtualRegister(LLT::scalar(32));
+ MRI.setRegClass(Res, &SPIRV::TYPERegClass);
+ return Res;
+}
+
+SPIRVType *SPIRVGlobalRegistry::getOpTypeBool(MachineIRBuilder &MIRBuilder) {
+ return MIRBuilder.buildInstr(SPIRV::OpTypeBool)
+ .addDef(createTypeVReg(MIRBuilder));
+}
+
+SPIRVType *SPIRVGlobalRegistry::getOpTypeInt(uint32_t Width,
+ MachineIRBuilder &MIRBuilder,
+ bool IsSigned) {
+ auto MIB = MIRBuilder.buildInstr(SPIRV::OpTypeInt)
+ .addDef(createTypeVReg(MIRBuilder))
+ .addImm(Width)
+ .addImm(IsSigned ? 1 : 0);
+ return MIB;
+}
+
+SPIRVType *SPIRVGlobalRegistry::getOpTypeFloat(uint32_t Width,
+ MachineIRBuilder &MIRBuilder) {
+ auto MIB = MIRBuilder.buildInstr(SPIRV::OpTypeFloat)
+ .addDef(createTypeVReg(MIRBuilder))
+ .addImm(Width);
+ return MIB;
+}
+
+SPIRVType *SPIRVGlobalRegistry::getOpTypeVoid(MachineIRBuilder &MIRBuilder) {
+ return MIRBuilder.buildInstr(SPIRV::OpTypeVoid)
+ .addDef(createTypeVReg(MIRBuilder));
+}
+
+SPIRVType *SPIRVGlobalRegistry::getOpTypeVector(uint32_t NumElems,
+ SPIRVType *ElemType,
+ MachineIRBuilder &MIRBuilder) {
+ auto EleOpc = ElemType->getOpcode();
+ assert((EleOpc == SPIRV::OpTypeInt || EleOpc == SPIRV::OpTypeFloat ||
+ EleOpc == SPIRV::OpTypeBool) &&
+ "Invalid vector element type");
+
+ auto MIB = MIRBuilder.buildInstr(SPIRV::OpTypeVector)
+ .addDef(createTypeVReg(MIRBuilder))
+ .addUse(getSPIRVTypeID(ElemType))
+ .addImm(NumElems);
+ return MIB;
+}
+
+Register SPIRVGlobalRegistry::buildConstantInt(uint64_t Val,
+ MachineIRBuilder &MIRBuilder,
+ SPIRVType *SpvType,
+ bool EmitIR) {
+ auto &MF = MIRBuilder.getMF();
+ Register Res;
+ const IntegerType *LLVMIntTy;
+ if (SpvType)
+ LLVMIntTy = cast<IntegerType>(getTypeForSPIRVType(SpvType));
+ else
+ LLVMIntTy = IntegerType::getInt32Ty(MF.getFunction().getContext());
+ // Find a constant in DT or build a new one.
+ const auto ConstInt =
+ ConstantInt::get(const_cast<IntegerType *>(LLVMIntTy), Val);
+ unsigned BitWidth = SpvType ? getScalarOrVectorBitWidth(SpvType) : 32;
+ Res = MF.getRegInfo().createGenericVirtualRegister(LLT::scalar(BitWidth));
+ assignTypeToVReg(LLVMIntTy, Res, MIRBuilder);
+ if (EmitIR)
+ MIRBuilder.buildConstant(Res, *ConstInt);
+ else
+ MIRBuilder.buildInstr(SPIRV::OpConstantI)
+ .addDef(Res)
+ .addImm(ConstInt->getSExtValue());
+ return Res;
+}
+
+Register SPIRVGlobalRegistry::buildConstantFP(APFloat Val,
+ MachineIRBuilder &MIRBuilder,
+ SPIRVType *SpvType) {
+ auto &MF = MIRBuilder.getMF();
+ Register Res;
+ const Type *LLVMFPTy;
+ if (SpvType) {
+ LLVMFPTy = getTypeForSPIRVType(SpvType);
+ assert(LLVMFPTy->isFloatingPointTy());
+ } else {
+ LLVMFPTy = IntegerType::getFloatTy(MF.getFunction().getContext());
+ }
+ // Find a constant in DT or build a new one.
+ const auto ConstFP = ConstantFP::get(LLVMFPTy->getContext(), Val);
+ unsigned BitWidth = SpvType ? getScalarOrVectorBitWidth(SpvType) : 32;
+ Res = MF.getRegInfo().createGenericVirtualRegister(LLT::scalar(BitWidth));
+ assignTypeToVReg(LLVMFPTy, Res, MIRBuilder);
+ MIRBuilder.buildFConstant(Res, *ConstFP);
+ return Res;
+}
+
+Register SPIRVGlobalRegistry::buildGlobalVariable(
+ Register ResVReg, SPIRVType *BaseType, StringRef Name,
+ const GlobalValue *GV, SPIRV::StorageClass Storage,
+ const MachineInstr *Init, bool IsConst, bool HasLinkageTy,
+ SPIRV::LinkageType LinkageType, MachineIRBuilder &MIRBuilder,
+ bool IsInstSelector) {
+ const GlobalVariable *GVar = nullptr;
+ if (GV)
+ GVar = cast<const GlobalVariable>(GV);
+ else {
+ // If GV is not passed explicitly, use the name to find or construct
+ // the global variable.
+ Module *M = MIRBuilder.getMF().getFunction().getParent();
+ GVar = M->getGlobalVariable(Name);
+ if (GVar == nullptr) {
+ const Type *Ty = getTypeForSPIRVType(BaseType); // TODO: check type.
+ GVar = new GlobalVariable(*M, const_cast<Type *>(Ty), false,
+ GlobalValue::ExternalLinkage, nullptr,
+ Twine(Name));
+ }
+ GV = GVar;
+ }
+ Register Reg;
+ auto MIB = MIRBuilder.buildInstr(SPIRV::OpVariable)
+ .addDef(ResVReg)
+ .addUse(getSPIRVTypeID(BaseType))
+ .addImm(static_cast<uint32_t>(Storage));
+
+ if (Init != 0) {
+ MIB.addUse(Init->getOperand(0).getReg());
+ }
+
+ // ISel may introduce a new register on this step, so we need to add it to
+ // DT and correct its type avoiding fails on the next stage.
+ if (IsInstSelector) {
+ const auto &Subtarget = CurMF->getSubtarget();
+ constrainSelectedInstRegOperands(*MIB, *Subtarget.getInstrInfo(),
+ *Subtarget.getRegisterInfo(),
+ *Subtarget.getRegBankInfo());
+ }
+ Reg = MIB->getOperand(0).getReg();
+
+ // Set to Reg the same type as ResVReg has.
+ auto MRI = MIRBuilder.getMRI();
+ assert(MRI->getType(ResVReg).isPointer() && "Pointer type is expected");
+ if (Reg != ResVReg) {
+ LLT RegLLTy = LLT::pointer(MRI->getType(ResVReg).getAddressSpace(), 32);
+ MRI->setType(Reg, RegLLTy);
+ assignSPIRVTypeToVReg(BaseType, Reg, MIRBuilder.getMF());
+ }
+
+ // If it's a global variable with name, output OpName for it.
+ if (GVar && GVar->hasName())
+ buildOpName(Reg, GVar->getName(), MIRBuilder);
+
+ // Output decorations for the GV.
+ // TODO: maybe move to GenerateDecorations pass.
+ if (IsConst)
+ buildOpDecorate(Reg, MIRBuilder, SPIRV::Decoration::Constant, {});
+
+ if (GVar && GVar->getAlign().valueOrOne().value() != 1)
+ buildOpDecorate(
+ Reg, MIRBuilder, SPIRV::Decoration::Alignment,
+ {static_cast<uint32_t>(GVar->getAlign().valueOrOne().value())});
+
+ if (HasLinkageTy)
+ buildOpDecorate(Reg, MIRBuilder, SPIRV::Decoration::LinkageAttributes,
+ {static_cast<uint32_t>(LinkageType)}, Name);
+ return Reg;
+}
+
+SPIRVType *SPIRVGlobalRegistry::getOpTypeArray(uint32_t NumElems,
+ SPIRVType *ElemType,
+ MachineIRBuilder &MIRBuilder,
+ bool EmitIR) {
+ assert((ElemType->getOpcode() != SPIRV::OpTypeVoid) &&
+ "Invalid array element type");
+ Register NumElementsVReg =
+ buildConstantInt(NumElems, MIRBuilder, nullptr, EmitIR);
+ auto MIB = MIRBuilder.buildInstr(SPIRV::OpTypeArray)
+ .addDef(createTypeVReg(MIRBuilder))
+ .addUse(getSPIRVTypeID(ElemType))
+ .addUse(NumElementsVReg);
+ return MIB;
+}
+
+SPIRVType *SPIRVGlobalRegistry::getOpTypePointer(SPIRV::StorageClass SC,
+ SPIRVType *ElemType,
+ MachineIRBuilder &MIRBuilder) {
+ auto MIB = MIRBuilder.buildInstr(SPIRV::OpTypePointer)
+ .addDef(createTypeVReg(MIRBuilder))
+ .addImm(static_cast<uint32_t>(SC))
+ .addUse(getSPIRVTypeID(ElemType));
+ return MIB;
+}
+
+SPIRVType *SPIRVGlobalRegistry::getOpTypeFunction(
+ SPIRVType *RetType, const SmallVectorImpl<SPIRVType *> &ArgTypes,
+ MachineIRBuilder &MIRBuilder) {
+ auto MIB = MIRBuilder.buildInstr(SPIRV::OpTypeFunction)
+ .addDef(createTypeVReg(MIRBuilder))
+ .addUse(getSPIRVTypeID(RetType));
+ for (const SPIRVType *ArgType : ArgTypes)
+ MIB.addUse(getSPIRVTypeID(ArgType));
+ return MIB;
+}
+
+SPIRVType *SPIRVGlobalRegistry::createSPIRVType(const Type *Ty,
+ MachineIRBuilder &MIRBuilder,
+ SPIRV::AccessQualifier AccQual,
+ bool EmitIR) {
+ if (auto IType = dyn_cast<IntegerType>(Ty)) {
+ const unsigned Width = IType->getBitWidth();
+ return Width == 1 ? getOpTypeBool(MIRBuilder)
+ : getOpTypeInt(Width, MIRBuilder, false);
+ }
+ if (Ty->isFloatingPointTy())
+ return getOpTypeFloat(Ty->getPrimitiveSizeInBits(), MIRBuilder);
+ if (Ty->isVoidTy())
+ return getOpTypeVoid(MIRBuilder);
+ if (Ty->isVectorTy()) {
+ auto El = getOrCreateSPIRVType(cast<FixedVectorType>(Ty)->getElementType(),
+ MIRBuilder);
+ return getOpTypeVector(cast<FixedVectorType>(Ty)->getNumElements(), El,
+ MIRBuilder);
+ }
+ if (Ty->isArrayTy()) {
+ auto *El = getOrCreateSPIRVType(Ty->getArrayElementType(), MIRBuilder);
+ return getOpTypeArray(Ty->getArrayNumElements(), El, MIRBuilder, EmitIR);
+ }
+ assert(!isa<StructType>(Ty) && "Unsupported StructType");
+ if (auto FType = dyn_cast<FunctionType>(Ty)) {
+ SPIRVType *RetTy = getOrCreateSPIRVType(FType->getReturnType(), MIRBuilder);
+ SmallVector<SPIRVType *, 4> ParamTypes;
+ for (const auto &t : FType->params()) {
+ ParamTypes.push_back(getOrCreateSPIRVType(t, MIRBuilder));
+ }
+ return getOpTypeFunction(RetTy, ParamTypes, MIRBuilder);
+ }
+ if (auto PType = dyn_cast<PointerType>(Ty)) {
+ SPIRVType *SpvElementType;
+ // At the moment, all opaque pointers correspond to i8 element type.
+ // TODO: change the implementation once opaque pointers are supported
+ // in the SPIR-V specification.
+ if (PType->isOpaque()) {
+ SpvElementType = getOrCreateSPIRVIntegerType(8, MIRBuilder);
+ } else {
+ Type *ElemType = PType->getNonOpaquePointerElementType();
+ // TODO: support OpenCL and SPIRV builtins like image2d_t that are passed
+ // as pointers, but should be treated as custom types like OpTypeImage.
+ assert(!isa<StructType>(ElemType) && "Unsupported StructType pointer");
+
+ // Otherwise, treat it as a regular pointer type.
+ SpvElementType = getOrCreateSPIRVType(
+ ElemType, MIRBuilder, SPIRV::AccessQualifier::ReadWrite, EmitIR);
+ }
+ auto SC = addressSpaceToStorageClass(PType->getAddressSpace());
+ return getOpTypePointer(SC, SpvElementType, MIRBuilder);
+ }
+ llvm_unreachable("Unable to convert LLVM type to SPIRVType");
+}
+
+SPIRVType *SPIRVGlobalRegistry::getSPIRVTypeForVReg(Register VReg) const {
+ auto t = VRegToTypeMap.find(CurMF);
+ if (t != VRegToTypeMap.end()) {
+ auto tt = t->second.find(VReg);
+ if (tt != t->second.end())
+ return tt->second;
+ }
+ return nullptr;
+}
+
+SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVType(
+ const Type *Type, MachineIRBuilder &MIRBuilder,
+ SPIRV::AccessQualifier AccessQual, bool EmitIR) {
+ SPIRVType *SpirvType = createSPIRVType(Type, MIRBuilder, AccessQual, EmitIR);
+ VRegToTypeMap[&MIRBuilder.getMF()][getSPIRVTypeID(SpirvType)] = SpirvType;
+ SPIRVToLLVMType[SpirvType] = Type;
+ return SpirvType;
+}
+
+bool SPIRVGlobalRegistry::isScalarOfType(Register VReg,
+ unsigned TypeOpcode) const {
+ SPIRVType *Type = getSPIRVTypeForVReg(VReg);
+ assert(Type && "isScalarOfType VReg has no type assigned");
+ return Type->getOpcode() == TypeOpcode;
+}
+
+bool SPIRVGlobalRegistry::isScalarOrVectorOfType(Register VReg,
+ unsigned TypeOpcode) const {
+ SPIRVType *Type = getSPIRVTypeForVReg(VReg);
+ assert(Type && "isScalarOrVectorOfType VReg has no type assigned");
+ if (Type->getOpcode() == TypeOpcode)
+ return true;
+ if (Type->getOpcode() == SPIRV::OpTypeVector) {
+ Register ScalarTypeVReg = Type->getOperand(1).getReg();
+ SPIRVType *ScalarType = getSPIRVTypeForVReg(ScalarTypeVReg);
+ return ScalarType->getOpcode() == TypeOpcode;
+ }
+ return false;
+}
+
+unsigned
+SPIRVGlobalRegistry::getScalarOrVectorBitWidth(const SPIRVType *Type) const {
+ assert(Type && "Invalid Type pointer");
+ if (Type->getOpcode() == SPIRV::OpTypeVector) {
+ auto EleTypeReg = Type->getOperand(1).getReg();
+ Type = getSPIRVTypeForVReg(EleTypeReg);
+ }
+ if (Type->getOpcode() == SPIRV::OpTypeInt ||
+ Type->getOpcode() == SPIRV::OpTypeFloat)
+ return Type->getOperand(1).getImm();
+ if (Type->getOpcode() == SPIRV::OpTypeBool)
+ return 1;
+ llvm_unreachable("Attempting to get bit width of non-integer/float type.");
+}
+
+bool SPIRVGlobalRegistry::isScalarOrVectorSigned(const SPIRVType *Type) const {
+ assert(Type && "Invalid Type pointer");
+ if (Type->getOpcode() == SPIRV::OpTypeVector) {
+ auto EleTypeReg = Type->getOperand(1).getReg();
+ Type = getSPIRVTypeForVReg(EleTypeReg);
+ }
+ if (Type->getOpcode() == SPIRV::OpTypeInt)
+ return Type->getOperand(2).getImm() != 0;
+ llvm_unreachable("Attempting to get sign of non-integer type.");
+}
+
+SPIRV::StorageClass
+SPIRVGlobalRegistry::getPointerStorageClass(Register VReg) const {
+ SPIRVType *Type = getSPIRVTypeForVReg(VReg);
+ assert(Type && Type->getOpcode() == SPIRV::OpTypePointer &&
+ Type->getOperand(1).isImm() && "Pointer type is expected");
+ return static_cast<SPIRV::StorageClass>(Type->getOperand(1).getImm());
+}
+
+SPIRVType *
+SPIRVGlobalRegistry::getOrCreateSPIRVIntegerType(unsigned BitWidth,
+ MachineIRBuilder &MIRBuilder) {
+ return getOrCreateSPIRVType(
+ IntegerType::get(MIRBuilder.getMF().getFunction().getContext(), BitWidth),
+ MIRBuilder);
+}
+
+SPIRVType *SPIRVGlobalRegistry::restOfCreateSPIRVType(Type *LLVMTy,
+ MachineInstrBuilder MIB) {
+ SPIRVType *SpirvType = MIB;
+ VRegToTypeMap[CurMF][getSPIRVTypeID(SpirvType)] = SpirvType;
+ SPIRVToLLVMType[SpirvType] = LLVMTy;
+ return SpirvType;
+}
+
+SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVIntegerType(
+ unsigned BitWidth, MachineInstr &I, const SPIRVInstrInfo &TII) {
+ Type *LLVMTy = IntegerType::get(CurMF->getFunction().getContext(), BitWidth);
+ MachineBasicBlock &BB = *I.getParent();
+ auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpTypeInt))
+ .addDef(createTypeVReg(CurMF->getRegInfo()))
+ .addImm(BitWidth)
+ .addImm(0);
+ return restOfCreateSPIRVType(LLVMTy, MIB);
+}
+
+SPIRVType *
+SPIRVGlobalRegistry::getOrCreateSPIRVBoolType(MachineIRBuilder &MIRBuilder) {
+ return getOrCreateSPIRVType(
+ IntegerType::get(MIRBuilder.getMF().getFunction().getContext(), 1),
+ MIRBuilder);
+}
+
+SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVVectorType(
+ SPIRVType *BaseType, unsigned NumElements, MachineIRBuilder &MIRBuilder) {
+ return getOrCreateSPIRVType(
+ FixedVectorType::get(const_cast<Type *>(getTypeForSPIRVType(BaseType)),
+ NumElements),
+ MIRBuilder);
+}
+
+SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVVectorType(
+ SPIRVType *BaseType, unsigned NumElements, MachineInstr &I,
+ const SPIRVInstrInfo &TII) {
+ Type *LLVMTy = FixedVectorType::get(
+ const_cast<Type *>(getTypeForSPIRVType(BaseType)), NumElements);
+ MachineBasicBlock &BB = *I.getParent();
+ auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpTypeVector))
+ .addDef(createTypeVReg(CurMF->getRegInfo()))
+ .addUse(getSPIRVTypeID(BaseType))
+ .addImm(NumElements);
+ return restOfCreateSPIRVType(LLVMTy, MIB);
+}
+
+SPIRVType *
+SPIRVGlobalRegistry::getOrCreateSPIRVPointerType(SPIRVType *BaseType,
+ MachineIRBuilder &MIRBuilder,
+ SPIRV::StorageClass SClass) {
+ return getOrCreateSPIRVType(
+ PointerType::get(const_cast<Type *>(getTypeForSPIRVType(BaseType)),
+ storageClassToAddressSpace(SClass)),
+ MIRBuilder);
+}
+
+SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVPointerType(
+ SPIRVType *BaseType, MachineInstr &I, const SPIRVInstrInfo &TII,
+ SPIRV::StorageClass SC) {
+ Type *LLVMTy =
+ PointerType::get(const_cast<Type *>(getTypeForSPIRVType(BaseType)),
+ storageClassToAddressSpace(SC));
+ MachineBasicBlock &BB = *I.getParent();
+ auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpTypePointer))
+ .addDef(createTypeVReg(CurMF->getRegInfo()))
+ .addImm(static_cast<uint32_t>(SC))
+ .addUse(getSPIRVTypeID(BaseType));
+ return restOfCreateSPIRVType(LLVMTy, MIB);
+}
diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
new file mode 100644
index 000000000000..952ab4c13e29
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
@@ -0,0 +1,174 @@
+//===-- SPIRVGlobalRegistry.h - SPIR-V Global Registry ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// SPIRVGlobalRegistry is used to maintain rich type information required for
+// SPIR-V even after lowering from LLVM IR to GMIR. It can convert an llvm::Type
+// into an OpTypeXXX instruction, and map it to a virtual register. Also it
+// builds and supports consistency of constants and global variables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SPIRV_SPIRVTYPEMANAGER_H
+#define LLVM_LIB_TARGET_SPIRV_SPIRVTYPEMANAGER_H
+
+#include "MCTargetDesc/SPIRVBaseInfo.h"
+#include "SPIRVInstrInfo.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+
+namespace llvm {
+using SPIRVType = const MachineInstr;
+
+class SPIRVGlobalRegistry {
+ // Registers holding values which have types associated with them.
+ // Initialized upon VReg definition in IRTranslator.
+ // Do not confuse this with DuplicatesTracker as DT maps Type* to <MF, Reg>
+ // where Reg = OpType...
+ // while VRegToTypeMap tracks SPIR-V type assigned to other regs (i.e. not
+ // type-declaring ones)
+ DenseMap<MachineFunction *, DenseMap<Register, SPIRVType *>> VRegToTypeMap;
+
+ DenseMap<SPIRVType *, const Type *> SPIRVToLLVMType;
+
+ // Number of bits pointers and size_t integers require.
+ const unsigned PointerSize;
+
+ // Add a new OpTypeXXX instruction without checking for duplicates.
+ SPIRVType *
+ createSPIRVType(const Type *Type, MachineIRBuilder &MIRBuilder,
+ SPIRV::AccessQualifier AQ = SPIRV::AccessQualifier::ReadWrite,
+ bool EmitIR = true);
+
+public:
+ SPIRVGlobalRegistry(unsigned PointerSize);
+
+ MachineFunction *CurMF;
+
+ // Get or create a SPIR-V type corresponding the given LLVM IR type,
+ // and map it to the given VReg by creating an ASSIGN_TYPE instruction.
+ SPIRVType *assignTypeToVReg(
+ const Type *Type, Register VReg, MachineIRBuilder &MIRBuilder,
+ SPIRV::AccessQualifier AQ = SPIRV::AccessQualifier::ReadWrite,
+ bool EmitIR = true);
+
+ // In cases where the SPIR-V type is already known, this function can be
+ // used to map it to the given VReg via an ASSIGN_TYPE instruction.
+ void assignSPIRVTypeToVReg(SPIRVType *Type, Register VReg,
+ MachineFunction &MF);
+
+ // Either generate a new OpTypeXXX instruction or return an existing one
+ // corresponding to the given LLVM IR type.
+ // EmitIR controls if we emit GMIR or SPV constants (e.g. for array sizes)
+ // because this method may be called from InstructionSelector and we don't
+ // want to emit extra IR instructions there.
+ SPIRVType *getOrCreateSPIRVType(
+ const Type *Type, MachineIRBuilder &MIRBuilder,
+ SPIRV::AccessQualifier AQ = SPIRV::AccessQualifier::ReadWrite,
+ bool EmitIR = true);
+
+ const Type *getTypeForSPIRVType(const SPIRVType *Ty) const {
+ auto Res = SPIRVToLLVMType.find(Ty);
+ assert(Res != SPIRVToLLVMType.end());
+ return Res->second;
+ }
+
+ // Return the SPIR-V type instruction corresponding to the given VReg, or
+ // nullptr if no such type instruction exists.
+ SPIRVType *getSPIRVTypeForVReg(Register VReg) const;
+
+ // Whether the given VReg has a SPIR-V type mapped to it yet.
+ bool hasSPIRVTypeForVReg(Register VReg) const {
+ return getSPIRVTypeForVReg(VReg) != nullptr;
+ }
+
+ // Return the VReg holding the result of the given OpTypeXXX instruction.
+ Register getSPIRVTypeID(const SPIRVType *SpirvType) const {
+ assert(SpirvType && "Attempting to get type id for nullptr type.");
+ return SpirvType->defs().begin()->getReg();
+ }
+
+ void setCurrentFunc(MachineFunction &MF) { CurMF = &MF; }
+
+ // Whether the given VReg has an OpTypeXXX instruction mapped to it with the
+ // given opcode (e.g. OpTypeFloat).
+ bool isScalarOfType(Register VReg, unsigned TypeOpcode) const;
+
+ // Return true if the given VReg's assigned SPIR-V type is either a scalar
+ // matching the given opcode, or a vector with an element type matching that
+ // opcode (e.g. OpTypeBool, or OpTypeVector %x 4, where %x is OpTypeBool).
+ bool isScalarOrVectorOfType(Register VReg, unsigned TypeOpcode) const;
+
+ // For vectors or scalars of ints/floats, return the scalar type's bitwidth.
+ unsigned getScalarOrVectorBitWidth(const SPIRVType *Type) const;
+
+ // For integer vectors or scalars, return whether the integers are signed.
+ bool isScalarOrVectorSigned(const SPIRVType *Type) const;
+
+ // Gets the storage class of the pointer type assigned to this vreg.
+ SPIRV::StorageClass getPointerStorageClass(Register VReg) const;
+
+ // Return the number of bits SPIR-V pointers and size_t variables require.
+ unsigned getPointerSize() const { return PointerSize; }
+
+private:
+ SPIRVType *getOpTypeBool(MachineIRBuilder &MIRBuilder);
+
+ SPIRVType *getOpTypeInt(uint32_t Width, MachineIRBuilder &MIRBuilder,
+ bool IsSigned = false);
+
+ SPIRVType *getOpTypeFloat(uint32_t Width, MachineIRBuilder &MIRBuilder);
+
+ SPIRVType *getOpTypeVoid(MachineIRBuilder &MIRBuilder);
+
+ SPIRVType *getOpTypeVector(uint32_t NumElems, SPIRVType *ElemType,
+ MachineIRBuilder &MIRBuilder);
+
+ SPIRVType *getOpTypeArray(uint32_t NumElems, SPIRVType *ElemType,
+ MachineIRBuilder &MIRBuilder, bool EmitIR = true);
+
+ SPIRVType *getOpTypePointer(SPIRV::StorageClass SC, SPIRVType *ElemType,
+ MachineIRBuilder &MIRBuilder);
+
+ SPIRVType *getOpTypeFunction(SPIRVType *RetType,
+ const SmallVectorImpl<SPIRVType *> &ArgTypes,
+ MachineIRBuilder &MIRBuilder);
+ SPIRVType *restOfCreateSPIRVType(Type *LLVMTy, MachineInstrBuilder MIB);
+
+public:
+ Register buildConstantInt(uint64_t Val, MachineIRBuilder &MIRBuilder,
+ SPIRVType *SpvType = nullptr, bool EmitIR = true);
+ Register buildConstantFP(APFloat Val, MachineIRBuilder &MIRBuilder,
+ SPIRVType *SpvType = nullptr);
+ Register
+ buildGlobalVariable(Register Reg, SPIRVType *BaseType, StringRef Name,
+ const GlobalValue *GV, SPIRV::StorageClass Storage,
+ const MachineInstr *Init, bool IsConst, bool HasLinkageTy,
+ SPIRV::LinkageType LinkageType,
+ MachineIRBuilder &MIRBuilder, bool IsInstSelector);
+
+ // Convenient helpers for getting types with check for duplicates.
+ SPIRVType *getOrCreateSPIRVIntegerType(unsigned BitWidth,
+ MachineIRBuilder &MIRBuilder);
+ SPIRVType *getOrCreateSPIRVIntegerType(unsigned BitWidth, MachineInstr &I,
+ const SPIRVInstrInfo &TII);
+ SPIRVType *getOrCreateSPIRVBoolType(MachineIRBuilder &MIRBuilder);
+ SPIRVType *getOrCreateSPIRVVectorType(SPIRVType *BaseType,
+ unsigned NumElements,
+ MachineIRBuilder &MIRBuilder);
+ SPIRVType *getOrCreateSPIRVVectorType(SPIRVType *BaseType,
+ unsigned NumElements, MachineInstr &I,
+ const SPIRVInstrInfo &TII);
+
+ SPIRVType *getOrCreateSPIRVPointerType(
+ SPIRVType *BaseType, MachineIRBuilder &MIRBuilder,
+ SPIRV::StorageClass SClass = SPIRV::StorageClass::Function);
+ SPIRVType *getOrCreateSPIRVPointerType(
+ SPIRVType *BaseType, MachineInstr &I, const SPIRVInstrInfo &TII,
+ SPIRV::StorageClass SClass = SPIRV::StorageClass::Function);
+};
+} // end namespace llvm
+#endif // LLLVM_LIB_TARGET_SPIRV_SPIRVTYPEMANAGER_H
diff --git a/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp
new file mode 100644
index 000000000000..66ff51c912b0
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp
@@ -0,0 +1,45 @@
+//===- SPIRVISelLowering.cpp - SPIR-V DAG Lowering Impl ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SPIRVTargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPIRVISelLowering.h"
+#include "SPIRV.h"
+
+#define DEBUG_TYPE "spirv-lower"
+
+using namespace llvm;
+
+unsigned SPIRVTargetLowering::getNumRegistersForCallingConv(
+ LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
+ // This code avoids CallLowering fail inside getVectorTypeBreakdown
+ // on v3i1 arguments. Maybe we need to return 1 for all types.
+ // TODO: remove it once this case is supported by the default implementation.
+ if (VT.isVector() && VT.getVectorNumElements() == 3 &&
+ (VT.getVectorElementType() == MVT::i1 ||
+ VT.getVectorElementType() == MVT::i8))
+ return 1;
+ return getNumRegisters(Context, VT);
+}
+
+MVT SPIRVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
+ EVT VT) const {
+ // This code avoids CallLowering fail inside getVectorTypeBreakdown
+ // on v3i1 arguments. Maybe we need to return i32 for all types.
+ // TODO: remove it once this case is supported by the default implementation.
+ if (VT.isVector() && VT.getVectorNumElements() == 3) {
+ if (VT.getVectorElementType() == MVT::i1)
+ return MVT::v4i1;
+ else if (VT.getVectorElementType() == MVT::i8)
+ return MVT::v4i8;
+ }
+ return getRegisterType(Context, VT);
+}
diff --git a/llvm/lib/Target/SPIRV/SPIRVISelLowering.h b/llvm/lib/Target/SPIRV/SPIRVISelLowering.h
new file mode 100644
index 000000000000..bee9220f5248
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRVISelLowering.h
@@ -0,0 +1,47 @@
+//===-- SPIRVISelLowering.h - SPIR-V DAG Lowering Interface -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that SPIR-V uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SPIRV_SPIRVISELLOWERING_H
+#define LLVM_LIB_TARGET_SPIRV_SPIRVISELLOWERING_H
+
+#include "llvm/CodeGen/TargetLowering.h"
+
+namespace llvm {
+class SPIRVSubtarget;
+
+class SPIRVTargetLowering : public TargetLowering {
+public:
+ explicit SPIRVTargetLowering(const TargetMachine &TM,
+ const SPIRVSubtarget &STI)
+ : TargetLowering(TM) {}
+
+ // Stop IRTranslator breaking up FMA instrs to preserve types information.
+ bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
+ EVT) const override {
+ return true;
+ }
+
+ // This is to prevent sexts of non-i64 vector indices which are generated
+ // within general IRTranslator hence type generation for it is omitted.
+ MVT getVectorIdxTy(const DataLayout &DL) const override {
+ return MVT::getIntegerVT(32);
+ }
+ unsigned getNumRegistersForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
+ EVT VT) const override;
+ MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
+ EVT VT) const override;
+};
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_SPIRV_SPIRVISELLOWERING_H
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrFormats.td b/llvm/lib/Target/SPIRV/SPIRVInstrFormats.td
new file mode 100644
index 000000000000..c78c8ee11590
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRVInstrFormats.td
@@ -0,0 +1,31 @@
+//===-- SPIRVInstrFormats.td - SPIR-V Instruction Formats --*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+def StringImm: Operand<i32>{
+ let PrintMethod="printStringImm";
+}
+
+class Op<bits<16> Opcode, dag outs, dag ins, string asmstr, list<dag> pattern = []>
+ : Instruction {
+ field bits<16> Inst;
+
+ let Inst = Opcode;
+
+ let Namespace = "SPIRV";
+ let DecoderNamespace = "SPIRV";
+
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+ let AsmString = asmstr;
+ let Pattern = pattern;
+}
+
+// Pseudo instructions
+class Pseudo<dag outs, dag ins> : Op<0, outs, ins, ""> {
+ let isPseudo = 1;
+}
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp
new file mode 100644
index 000000000000..754906308114
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp
@@ -0,0 +1,195 @@
+//===-- SPIRVInstrInfo.cpp - SPIR-V Instruction Information ------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SPIR-V implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPIRVInstrInfo.h"
+#include "SPIRV.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#define GET_INSTRINFO_CTOR_DTOR
+#include "SPIRVGenInstrInfo.inc"
+
+using namespace llvm;
+
+SPIRVInstrInfo::SPIRVInstrInfo() : SPIRVGenInstrInfo() {}
+
+bool SPIRVInstrInfo::isConstantInstr(const MachineInstr &MI) const {
+ switch (MI.getOpcode()) {
+ case SPIRV::OpConstantTrue:
+ case SPIRV::OpConstantFalse:
+ case SPIRV::OpConstantI:
+ case SPIRV::OpConstantF:
+ case SPIRV::OpConstantComposite:
+ case SPIRV::OpConstantSampler:
+ case SPIRV::OpConstantNull:
+ case SPIRV::OpSpecConstantTrue:
+ case SPIRV::OpSpecConstantFalse:
+ case SPIRV::OpSpecConstant:
+ case SPIRV::OpSpecConstantComposite:
+ case SPIRV::OpSpecConstantOp:
+ case SPIRV::OpUndef:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool SPIRVInstrInfo::isTypeDeclInstr(const MachineInstr &MI) const {
+ auto &MRI = MI.getMF()->getRegInfo();
+ if (MI.getNumDefs() >= 1 && MI.getOperand(0).isReg()) {
+ auto DefRegClass = MRI.getRegClassOrNull(MI.getOperand(0).getReg());
+ return DefRegClass && DefRegClass->getID() == SPIRV::TYPERegClass.getID();
+ } else {
+ return false;
+ }
+}
+
+bool SPIRVInstrInfo::isDecorationInstr(const MachineInstr &MI) const {
+ switch (MI.getOpcode()) {
+ case SPIRV::OpDecorate:
+ case SPIRV::OpDecorateId:
+ case SPIRV::OpDecorateString:
+ case SPIRV::OpMemberDecorate:
+ case SPIRV::OpMemberDecorateString:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool SPIRVInstrInfo::isHeaderInstr(const MachineInstr &MI) const {
+ switch (MI.getOpcode()) {
+ case SPIRV::OpCapability:
+ case SPIRV::OpExtension:
+ case SPIRV::OpExtInstImport:
+ case SPIRV::OpMemoryModel:
+ case SPIRV::OpEntryPoint:
+ case SPIRV::OpExecutionMode:
+ case SPIRV::OpExecutionModeId:
+ case SPIRV::OpString:
+ case SPIRV::OpSourceExtension:
+ case SPIRV::OpSource:
+ case SPIRV::OpSourceContinued:
+ case SPIRV::OpName:
+ case SPIRV::OpMemberName:
+ case SPIRV::OpModuleProcessed:
+ return true;
+ default:
+ return isTypeDeclInstr(MI) || isConstantInstr(MI) || isDecorationInstr(MI);
+ }
+}
+
+// Analyze the branching code at the end of MBB, returning
+// true if it cannot be understood (e.g. it's a switch dispatch or isn't
+// implemented for a target). Upon success, this returns false and returns
+// with the following information in various cases:
+//
+// 1. If this block ends with no branches (it just falls through to its succ)
+// just return false, leaving TBB/FBB null.
+// 2. If this block ends with only an unconditional branch, it sets TBB to be
+// the destination block.
+// 3. If this block ends with a conditional branch and it falls through to a
+// successor block, it sets TBB to be the branch destination block and a
+// list of operands that evaluate the condition. These operands can be
+// passed to other TargetInstrInfo methods to create new branches.
+// 4. If this block ends with a conditional branch followed by an
+// unconditional branch, it returns the 'true' destination in TBB, the
+// 'false' destination in FBB, and a list of operands that evaluate the
+// condition. These operands can be passed to other TargetInstrInfo
+// methods to create new branches.
+//
+// Note that removeBranch and insertBranch must be implemented to support
+// cases where this method returns success.
+//
+// If AllowModify is true, then this routine is allowed to modify the basic
+// block (e.g. delete instructions after the unconditional branch).
+//
+// The CFG information in MBB.Predecessors and MBB.Successors must be valid
+// before calling this function.
+bool SPIRVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ TBB = nullptr;
+ FBB = nullptr;
+ if (MBB.empty())
+ return false;
+ auto MI = MBB.getLastNonDebugInstr();
+ if (!MI.isValid())
+ return false;
+ if (MI->getOpcode() == SPIRV::OpBranch) {
+ TBB = MI->getOperand(0).getMBB();
+ return false;
+ } else if (MI->getOpcode() == SPIRV::OpBranchConditional) {
+ Cond.push_back(MI->getOperand(0));
+ TBB = MI->getOperand(1).getMBB();
+ if (MI->getNumOperands() == 3) {
+ FBB = MI->getOperand(2).getMBB();
+ }
+ return false;
+ } else {
+ return true;
+ }
+}
+
+// Remove the branching code at the end of the specific MBB.
+// This is only invoked in cases where analyzeBranch returns success. It
+// returns the number of instructions that were removed.
+// If \p BytesRemoved is non-null, report the change in code size from the
+// removed instructions.
+unsigned SPIRVInstrInfo::removeBranch(MachineBasicBlock &MBB,
+ int *BytesRemoved) const {
+ report_fatal_error("Branch removal not supported, as MBB info not propagated"
+ " to OpPhi instructions. Try using -O0 instead.");
+}
+
+// Insert branch code into the end of the specified MachineBasicBlock. The
+// operands to this method are the same as those returned by analyzeBranch.
+// This is only invoked in cases where analyzeBranch returns success. It
+// returns the number of instructions inserted. If \p BytesAdded is non-null,
+// report the change in code size from the added instructions.
+//
+// It is also invoked by tail merging to add unconditional branches in
+// cases where analyzeBranch doesn't apply because there was no original
+// branch to analyze. At least this much must be implemented, else tail
+// merging needs to be disabled.
+//
+// The CFG information in MBB.Predecessors and MBB.Successors must be valid
+// before calling this function.
+unsigned SPIRVInstrInfo::insertBranch(
+ MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+ ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
+ report_fatal_error("Branch insertion not supported, as MBB info not "
+ "propagated to OpPhi instructions. Try using "
+ "-O0 instead.");
+}
+
+void SPIRVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ const DebugLoc &DL, MCRegister DestReg,
+ MCRegister SrcReg, bool KillSrc) const {
+ // Actually we don't need this COPY instruction. However if we do nothing with
+ // it, post RA pseudo instrs expansion just removes it and we get the code
+ // with undef registers. Therefore, we need to replace all uses of dst with
+ // the src register. COPY instr itself will be safely removed later.
+ assert(I->isCopy() && "Copy instruction is expected");
+ auto DstOp = I->getOperand(0);
+ auto SrcOp = I->getOperand(1);
+ assert(DstOp.isReg() && SrcOp.isReg() &&
+ "Register operands are expected in COPY");
+ auto &MRI = I->getMF()->getRegInfo();
+ MRI.replaceRegWith(DstOp.getReg(), SrcOp.getReg());
+}
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h
new file mode 100644
index 000000000000..2600d9cfca2e
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h
@@ -0,0 +1,54 @@
+//===-- SPIRVInstrInfo.h - SPIR-V Instruction Information -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SPIR-V implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SPIRV_SPIRVINSTRINFO_H
+#define LLVM_LIB_TARGET_SPIRV_SPIRVINSTRINFO_H
+
+#include "SPIRVRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "SPIRVGenInstrInfo.inc"
+
+namespace llvm {
+
+class SPIRVInstrInfo : public SPIRVGenInstrInfo {
+ const SPIRVRegisterInfo RI;
+
+public:
+ SPIRVInstrInfo();
+
+ const SPIRVRegisterInfo &getRegisterInfo() const { return RI; }
+ bool isHeaderInstr(const MachineInstr &MI) const;
+ bool isConstantInstr(const MachineInstr &MI) const;
+ bool isTypeDeclInstr(const MachineInstr &MI) const;
+ bool isDecorationInstr(const MachineInstr &MI) const;
+
+ bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify = false) const override;
+
+ unsigned removeBranch(MachineBasicBlock &MBB,
+ int *BytesRemoved = nullptr) const override;
+
+ unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
+ const DebugLoc &DL,
+ int *BytesAdded = nullptr) const override;
+ void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
+ bool KillSrc) const override;
+};
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_SPIRV_SPIRVINSTRINFO_H
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
new file mode 100644
index 000000000000..d6fec5fd0785
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
@@ -0,0 +1,732 @@
+//===-- SPIRVInstrInfo.td - Target Description for SPIR-V Target ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the SPIR-V instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+include "SPIRVInstrFormats.td"
+include "SPIRVEnums.td"
+
+// Codegen only metadata instructions
+let isCodeGenOnly=1 in {
+ def ASSIGN_TYPE: Pseudo<(outs ANYID:$dst_id), (ins ANYID:$src_id, TYPE:$src_ty)>;
+ def DECL_TYPE: Pseudo<(outs ANYID:$dst_id), (ins ANYID:$src_id, TYPE:$src_ty)>;
+ def GET_ID: Pseudo<(outs ID:$dst_id), (ins ANYID:$src)>;
+ def GET_fID: Pseudo<(outs fID:$dst_id), (ins ANYID:$src)>;
+ def GET_pID: Pseudo<(outs pID:$dst_id), (ins ANYID:$src)>;
+ def GET_vID: Pseudo<(outs vID:$dst_id), (ins ANYID:$src)>;
+ def GET_vfID: Pseudo<(outs vfID:$dst_id), (ins ANYID:$src)>;
+}
+
+def SPVTypeBin : SDTypeProfile<1, 2, []>;
+
+def assigntype : SDNode<"SPIRVISD::AssignType", SPVTypeBin>;
+
+def : GINodeEquiv<ASSIGN_TYPE, assigntype>;
+
+class BinOp<string name, bits<16> opCode, list<dag> pattern=[]>
+ : Op<opCode, (outs ANYID:$dst), (ins TYPE:$src_ty, ANYID:$src, ANYID:$src2),
+ "$dst = "#name#" $src_ty $src $src2", pattern>;
+
+class BinOpTyped<string name, bits<16> opCode, RegisterClass CID, SDNode node>
+ : Op<opCode, (outs ID:$dst), (ins TYPE:$src_ty, CID:$src, CID:$src2),
+ "$dst = "#name#" $src_ty $src $src2", [(set ID:$dst, (assigntype (node CID:$src, CID:$src2), TYPE:$src_ty))]>;
+
+class TernOpTyped<string name, bits<16> opCode, RegisterClass CCond, RegisterClass CID, SDNode node>
+ : Op<opCode, (outs ID:$dst), (ins TYPE:$src_ty, CCond:$cond, CID:$src1, CID:$src2),
+ "$dst = "#name#" $src_ty $cond $src1 $src2", [(set ID:$dst, (assigntype (node CCond:$cond, CID:$src1, CID:$src2), TYPE:$src_ty))]>;
+
+multiclass BinOpTypedGen<string name, bits<16> opCode, SDNode node, bit genF = 0, bit genV = 0> {
+ if genF then
+ def S: BinOpTyped<name, opCode, fID, node>;
+ else
+ def S: BinOpTyped<name, opCode, ID, node>;
+ if genV then {
+ if genF then
+ def V: BinOpTyped<name, opCode, vfID, node>;
+ else
+ def V: BinOpTyped<name, opCode, vID, node>;
+ }
+}
+
+multiclass TernOpTypedGen<string name, bits<16> opCode, SDNode node, bit genI = 1, bit genF = 0, bit genV = 0> {
+ if genF then {
+ def SFSCond: TernOpTyped<name, opCode, ID, fID, node>;
+ def SFVCond: TernOpTyped<name, opCode, vID, fID, node>;
+ }
+ if genI then {
+ def SISCond: TernOpTyped<name, opCode, ID, ID, node>;
+ def SIVCond: TernOpTyped<name, opCode, vID, ID, node>;
+ }
+ if genV then {
+ if genF then {
+ def VFSCond: TernOpTyped<name, opCode, ID, vfID, node>;
+ def VFVCond: TernOpTyped<name, opCode, vID, vfID, node>;
+ }
+ if genI then {
+ def VISCond: TernOpTyped<name, opCode, ID, vID, node>;
+ def VIVCond: TernOpTyped<name, opCode, vID, vID, node>;
+ }
+ }
+}
+
+class UnOp<string name, bits<16> opCode, list<dag> pattern=[]>
+ : Op<opCode, (outs ANYID:$dst), (ins TYPE:$type, ANYID:$src),
+ "$dst = "#name#" $type $src", pattern>;
+class UnOpTyped<string name, bits<16> opCode, RegisterClass CID, SDNode node>
+ : Op<opCode, (outs ID:$dst), (ins TYPE:$src_ty, CID:$src),
+ "$dst = "#name#" $src_ty $src", [(set ID:$dst, (assigntype (node CID:$src), TYPE:$src_ty))]>;
+
+class SimpleOp<string name, bits<16> opCode>: Op<opCode, (outs), (ins), name>;
+
+// 3.42.1 Miscellaneous Instructions
+
+def OpNop: SimpleOp<"OpNop", 0>;
+def OpUndef: Op<1, (outs ID:$res), (ins TYPE:$type), "$res = OpUndef $type">;
+def OpSizeOf: Op<321, (outs ID:$res), (ins TYPE:$ty, ID:$ptr), "$res = OpSizeOf $ty $ptr">;
+
+// 3.42.2 Debug Instructions
+
+def OpSourceContinued: Op<2, (outs), (ins StringImm:$str, variable_ops),
+ "OpSourceContinued $str">;
+def OpSource: Op<3, (outs), (ins SourceLanguage:$lang, i32imm:$version, variable_ops),
+ "OpSource $lang $version">;
+def OpSourceExtension: Op<4, (outs), (ins StringImm:$extension, variable_ops),
+ "OpSourceExtension $extension">;
+def OpName: Op<5, (outs), (ins ANY:$tar, StringImm:$name, variable_ops), "OpName $tar $name">;
+def OpMemberName: Op<6, (outs), (ins TYPE:$ty, i32imm:$mem, StringImm:$name, variable_ops),
+ "OpMemberName $ty $mem $name">;
+def OpString: Op<7, (outs ID:$r), (ins StringImm:$s, variable_ops), "$r = OpString $s">;
+def OpLine: Op<8, (outs), (ins ID:$file, i32imm:$ln, i32imm:$col), "OpLine $file $ln $col">;
+def OpNoLine: Op<317, (outs), (ins), "OpNoLine">;
+def OpModuleProcessed: Op<330, (outs), (ins StringImm:$process, variable_ops),
+ "OpModuleProcessed $process">;
+
+// 3.42.3 Annotation Instructions
+
+def OpDecorate: Op<71, (outs), (ins ANY:$target, Decoration:$dec, variable_ops),
+ "OpDecorate $target $dec">;
+def OpMemberDecorate: Op<72, (outs), (ins TYPE:$t, i32imm:$m, Decoration:$d, variable_ops),
+ "OpMemberDecorate $t $m $d">;
+
+// TODO Currently some deprecated opcodes are missing: OpDecorationGroup,
+// OpGroupDecorate and OpGroupMemberDecorate
+
+def OpDecorateId: Op<332, (outs), (ins ANY:$target, Decoration:$dec, variable_ops),
+ "OpDecorateId $target $dec">;
+def OpDecorateString: Op<5632, (outs), (ins ANY:$t, Decoration:$d, StringImm:$s, variable_ops),
+ "OpDecorateString $t $d $s">;
+def OpMemberDecorateString: Op<5633, (outs),
+ (ins TYPE:$ty, i32imm:$mem, Decoration:$dec, StringImm:$str, variable_ops),
+ "OpMemberDecorateString $ty $mem $dec $str">;
+
+// 3.42.4 Extension Instructions
+
+def OpExtension: Op<10, (outs), (ins StringImm:$name, variable_ops), "OpExtension $name">;
+def OpExtInstImport: Op<11, (outs ID:$res), (ins StringImm:$extInstsName, variable_ops),
+ "$res = OpExtInstImport $extInstsName">;
+def OpExtInst: Op<12, (outs ID:$res), (ins TYPE:$ty, ID:$set, ExtInst:$inst, variable_ops),
+ "$res = OpExtInst $ty $set $inst">;
+
+// 3.42.5 Mode-Setting Instructions
+
+def OpMemoryModel: Op<14, (outs), (ins AddressingModel:$addr, MemoryModel:$mem),
+ "OpMemoryModel $addr $mem">;
+def OpEntryPoint: Op<15, (outs),
+ (ins ExecutionModel:$model, ID:$entry, StringImm:$name, variable_ops),
+ "OpEntryPoint $model $entry $name">;
+def OpExecutionMode: Op<16, (outs), (ins ID:$entry, ExecutionMode:$mode, variable_ops),
+ "OpExecutionMode $entry $mode">;
+def OpCapability: Op<17, (outs), (ins Capability:$cap), "OpCapability $cap">;
+def OpExecutionModeId: Op<331, (outs), (ins ID:$entry, ExecutionMode:$mode, variable_ops),
+ "OpExecutionModeId $entry $mode">;
+
+// 3.42.6 Type-Declaration Instructions
+
+def OpTypeVoid: Op<19, (outs TYPE:$type), (ins), "$type = OpTypeVoid">;
+def OpTypeBool: Op<20, (outs TYPE:$type), (ins), "$type = OpTypeBool">;
+def OpTypeInt: Op<21, (outs TYPE:$type), (ins i32imm:$width, i32imm:$signedness),
+ "$type = OpTypeInt $width $signedness">;
+def OpTypeFloat: Op<22, (outs TYPE:$type), (ins i32imm:$width),
+ "$type = OpTypeFloat $width">;
+def OpTypeVector: Op<23, (outs TYPE:$type), (ins TYPE:$compType, i32imm:$compCount),
+ "$type = OpTypeVector $compType $compCount">;
+def OpTypeMatrix: Op<24, (outs TYPE:$type), (ins TYPE:$colType, i32imm:$colCount),
+ "$type = OpTypeMatrix $colType $colCount">;
+def OpTypeImage: Op<25, (outs TYPE:$res), (ins TYPE:$sampTy, Dim:$dim, i32imm:$depth,
+ i32imm:$arrayed, i32imm:$MS, i32imm:$sampled, ImageFormat:$imFormat, variable_ops),
+ "$res = OpTypeImage $sampTy $dim $depth $arrayed $MS $sampled $imFormat">;
+def OpTypeSampler: Op<26, (outs TYPE:$res), (ins), "$res = OpTypeSampler">;
+def OpTypeSampledImage: Op<27, (outs TYPE:$res), (ins TYPE:$imageType),
+ "$res = OpTypeSampledImage $imageType">;
+def OpTypeArray: Op<28, (outs TYPE:$type), (ins TYPE:$elementType, ID:$length),
+ "$type = OpTypeArray $elementType $length">;
+def OpTypeRuntimeArray: Op<29, (outs TYPE:$type), (ins TYPE:$elementType),
+ "$type = OpTypeRuntimeArray $elementType">;
+def OpTypeStruct: Op<30, (outs TYPE:$res), (ins variable_ops), "$res = OpTypeStruct">;
+def OpTypeOpaque: Op<31, (outs TYPE:$res), (ins StringImm:$name, variable_ops),
+ "$res = OpTypeOpaque $name">;
+def OpTypePointer: Op<32, (outs TYPE:$res), (ins StorageClass:$storage, TYPE:$type),
+ "$res = OpTypePointer $storage $type">;
+def OpTypeFunction: Op<33, (outs TYPE:$funcType), (ins TYPE:$returnType, variable_ops),
+ "$funcType = OpTypeFunction $returnType">;
+def OpTypeEvent: Op<34, (outs TYPE:$res), (ins), "$res = OpTypeEvent">;
+def OpTypeDeviceEvent: Op<35, (outs TYPE:$res), (ins), "$res = OpTypeDeviceEvent">;
+def OpTypeReserveId: Op<36, (outs TYPE:$res), (ins), "$res = OpTypeReserveId">;
+def OpTypeQueue: Op<37, (outs TYPE:$res), (ins), "$res = OpTypeQueue">;
+def OpTypePipe: Op<38, (outs TYPE:$res), (ins AccessQualifier:$a), "$res = OpTypePipe $a">;
+def OpTypeForwardPointer: Op<39, (outs), (ins TYPE:$ptrType, StorageClass:$storageClass),
+ "OpTypeForwardPointer $ptrType $storageClass">;
+def OpTypePipeStorage: Op<322, (outs TYPE:$res), (ins), "$res = OpTypePipeStorage">;
+def OpTypeNamedBarrier: Op<327, (outs TYPE:$res), (ins), "$res = OpTypeNamedBarrier">;
+def OpTypeAccelerationStructureNV: Op<5341, (outs TYPE:$res), (ins),
+ "$res = OpTypeAccelerationStructureNV">;
+def OpTypeCooperativeMatrixNV: Op<5358, (outs TYPE:$res),
+ (ins TYPE:$compType, ID:$scope, ID:$rows, ID:$cols),
+ "$res = OpTypeCooperativeMatrixNV $compType $scope $rows $cols">;
+
+// 3.42.7 Constant-Creation Instructions
+
+def imm_to_i32 : SDNodeXForm<imm, [{
+return CurDAG->getTargetConstant(
+ N->getValueAP().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32);
+}]>;
+
+def fimm_to_i32 : SDNodeXForm<imm, [{
+return CurDAG->getTargetConstant(
+ N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32);
+}]>;
+
+def gi_bitcast_fimm_to_i32 : GICustomOperandRenderer<"renderFImm32">,
+ GISDNodeXFormEquiv<fimm_to_i32>;
+
+def gi_bitcast_imm_to_i32 : GICustomOperandRenderer<"renderImm32">,
+ GISDNodeXFormEquiv<imm_to_i32>;
+
+def PseudoConstI: IntImmLeaf<i32, [{ return Imm.getBitWidth() <= 32; }], imm_to_i32>;
+def PseudoConstF: FPImmLeaf<f32, [{ return true; }], fimm_to_i32>;
+def ConstPseudoTrue: IntImmLeaf<i32, [{ return Imm.getBitWidth() == 1 && Imm.getZExtValue() == 1; }]>;
+def ConstPseudoFalse: IntImmLeaf<i32, [{ return Imm.getBitWidth() == 1 && Imm.getZExtValue() == 0; }]>;
+def ConstPseudoNull: IntImmLeaf<i64, [{ return Imm.isNullValue(); }]>;
+
+multiclass IntFPImm<bits<16> opCode, string name> {
+ def I: Op<opCode, (outs ID:$dst), (ins TYPE:$type, ID:$src, variable_ops),
+ "$dst = "#name#" $type $src", [(set ID:$dst, (assigntype PseudoConstI:$src, TYPE:$type))]>;
+ def F: Op<opCode, (outs ID:$dst), (ins TYPE:$type, fID:$src, variable_ops),
+ "$dst = "#name#" $type $src", [(set ID:$dst, (assigntype PseudoConstF:$src, TYPE:$type))]>;
+}
+
+def OpConstantTrue: Op<41, (outs ID:$dst), (ins TYPE:$src_ty), "$dst = OpConstantTrue $src_ty",
+ [(set ID:$dst, (assigntype ConstPseudoTrue, TYPE:$src_ty))]>;
+def OpConstantFalse: Op<42, (outs ID:$dst), (ins TYPE:$src_ty), "$dst = OpConstantFalse $src_ty",
+ [(set ID:$dst, (assigntype ConstPseudoFalse, TYPE:$src_ty))]>;
+
+defm OpConstant: IntFPImm<43, "OpConstant">;
+
+def OpConstantComposite: Op<44, (outs ID:$res), (ins TYPE:$type, variable_ops),
+ "$res = OpConstantComposite $type">;
+def OpConstantSampler: Op<45, (outs ID:$res),
+ (ins TYPE:$t, SamplerAddressingMode:$s, i32imm:$p, SamplerFilterMode:$f),
+ "$res = OpConstantSampler $t $s $p $f">;
+def OpConstantNull: Op<46, (outs ID:$dst), (ins TYPE:$src_ty), "$dst = OpConstantNull $src_ty",
+ [(set ID:$dst, (assigntype ConstPseudoNull, TYPE:$src_ty))]>;
+
+def OpSpecConstantTrue: Op<48, (outs ID:$r), (ins TYPE:$t), "$r = OpSpecConstantTrue $t">;
+def OpSpecConstantFalse: Op<49, (outs ID:$r), (ins TYPE:$t), "$r = OpSpecConstantFalse $t">;
+def OpSpecConstant: Op<50, (outs ID:$res), (ins TYPE:$type, i32imm:$imm, variable_ops),
+ "$res = OpSpecConstant $type $imm">;
+def OpSpecConstantComposite: Op<51, (outs ID:$res), (ins TYPE:$type, variable_ops),
+ "$res = OpSpecConstantComposite $type">;
+def OpSpecConstantOp: Op<52, (outs ID:$res), (ins TYPE:$t, i32imm:$c, ID:$o, variable_ops),
+ "$res = OpSpecConstantOp $t $c $o">;
+
+// 3.42.8 Memory Instructions
+
+def OpVariable: Op<59, (outs ID:$res), (ins TYPE:$type, StorageClass:$sc, variable_ops),
+ "$res = OpVariable $type $sc">;
+def OpImageTexelPointer: Op<60, (outs ID:$res),
+ (ins TYPE:$resType, ID:$image, ID:$coord, ID:$sample),
+ "$res = OpImageTexelPointer $resType $image $coord $sample">;
+def OpLoad: Op<61, (outs ID:$res), (ins TYPE:$resType, ID:$pointer, variable_ops),
+ "$res = OpLoad $resType $pointer">;
+def OpStore: Op<62, (outs), (ins ID:$pointer, ID:$objectToStore, variable_ops),
+ "OpStore $pointer $objectToStore">;
+def OpCopyMemory: Op<63, (outs), (ins ID:$dest, ID:$src, variable_ops),
+ "OpCopyMemory $dest $src">;
+def OpCopyMemorySized: Op<64, (outs), (ins ID:$dest, ID:$src, ID:$size, variable_ops),
+ "OpCopyMemorySized $dest $src $size">;
+def OpAccessChain: Op<65, (outs ID:$res), (ins TYPE:$type, ID:$base, variable_ops),
+ "$res = OpAccessChain $type $base">;
+def OpInBoundsAccessChain: Op<66, (outs ID:$res),
+ (ins TYPE:$type, ID:$base, variable_ops),
+ "$res = OpInBoundsAccessChain $type $base">;
+def OpPtrAccessChain: Op<67, (outs ID:$res),
+ (ins TYPE:$type, ID:$base, ID:$element, variable_ops),
+ "$res = OpPtrAccessChain $type $base $element">;
+def OpArrayLength: Op<68, (outs ID:$res), (ins TYPE:$resTy, ID:$struct, i32imm:$arrayMember),
+ "$res = OpArrayLength $resTy $struct $arrayMember">;
+def OpGenericPtrMemSemantics: Op<69, (outs ID:$res), (ins TYPE:$resType, ID:$pointer),
+ "$res = OpGenericPtrMemSemantics $resType $pointer">;
+def OpInBoundsPtrAccessChain: Op<70, (outs ID:$res),
+ (ins TYPE:$type, ID:$base, ID:$element, variable_ops),
+ "$res = OpInBoundsPtrAccessChain $type $base $element">;
+def OpPtrEqual: Op<401, (outs ID:$res), (ins TYPE:$resType, ID:$a, ID:$b),
+ "$res = OpPtrEqual $resType $a $b">;
+def OpPtrNotEqual: Op<402, (outs ID:$res), (ins TYPE:$resType, ID:$a, ID:$b),
+ "$res = OpPtrNotEqual $resType $a $b">;
+def OpPtrDiff: Op<403, (outs ID:$res), (ins TYPE:$resType, ID:$a, ID:$b),
+ "$res = OpPtrDiff $resType $a $b">;
+
+// 3.42.9 Function Instructions
+
+def OpFunction: Op<54, (outs ID:$func),
+ (ins TYPE:$resType, FunctionControl:$funcControl, TYPE:$funcType),
+ "$func = OpFunction $resType $funcControl $funcType">;
+def OpFunctionParameter: Op<55, (outs ID:$arg), (ins TYPE:$type),
+ "$arg = OpFunctionParameter $type">;
+def OpFunctionEnd: Op<56, (outs), (ins), "OpFunctionEnd"> {
+ let isTerminator=1;
+}
+def OpFunctionCall: Op<57, (outs ID:$res), (ins TYPE:$resType, ID:$function, variable_ops),
+ "$res = OpFunctionCall $resType $function">;
+
+// 3.42.10 Image Instructions
+
+def OpSampledImage: BinOp<"OpSampledImage", 86>;
+
+def OpImageSampleImplicitLod: Op<87, (outs ID:$res),
+ (ins TYPE:$type, ID:$sampledImage, ID:$coord, variable_ops),
+ "$res = OpImageSampleImplicitLod $type $sampledImage $coord">;
+def OpImageSampleExplicitLod: Op<88, (outs ID:$res),
+ (ins TYPE:$ty, ID:$sImage, ID:$uv, ImageOperand:$op, ID:$i, variable_ops),
+ "$res = OpImageSampleExplicitLod $ty $sImage $uv $op $i">;
+
+def OpImageSampleDrefImplicitLod: Op<89, (outs ID:$res),
+ (ins TYPE:$type, ID:$sampledImage, ID:$coord, ID:$dref, variable_ops),
+ "$res = OpImageSampleDrefImplicitLod $type $sampledImage $dref $coord">;
+def OpImageSampleDrefExplicitLod: Op<90, (outs ID:$res),
+ (ins TYPE:$ty, ID:$im, ID:$uv, ID:$d, ImageOperand:$op, ID:$i, variable_ops),
+ "$res = OpImageSampleDrefExplicitLod $ty $im $uv $d $op $i">;
+
+def OpImageSampleProjImplicitLod: Op<91, (outs ID:$res),
+ (ins TYPE:$type, ID:$sampledImage, ID:$coord, variable_ops),
+ "$res = OpImageSampleProjImplicitLod $type $sampledImage $coord">;
+def OpImageSampleProjExplicitLod: Op<92, (outs ID:$res),
+ (ins TYPE:$ty, ID:$im, ID:$uv, ID:$d, ImageOperand:$op, ID:$i, variable_ops),
+ "$res = OpImageSampleProjExplicitLod $ty $im $uv $op $i">;
+
+def OpImageSampleProjDrefImplicitLod: Op<93, (outs ID:$res),
+ (ins TYPE:$type, ID:$sampledImage, ID:$coord, ID:$dref, variable_ops),
+ "$res = OpImageSampleProjDrefImplicitLod $type $sampledImage $dref $coord">;
+def OpImageSampleProjDrefExplicitLod: Op<94, (outs ID:$res),
+ (ins TYPE:$ty, ID:$im, ID:$uv, ID:$d, ImageOperand:$op, ID:$i, variable_ops),
+ "$res = OpImageSampleProjDrefExplicitLod $ty $im $uv $d $op $i">;
+
+def OpImageFetch: Op<95, (outs ID:$res),
+ (ins TYPE:$type, ID:$image, ID:$coord, variable_ops),
+ "$res = OpImageFetch $type $image $coord">;
+def OpImageGather: Op<96, (outs ID:$res),
+ (ins TYPE:$type, ID:$sampledImage, ID:$coord, ID:$component, variable_ops),
+ "$res = OpImageGather $type $sampledImage $coord $component">;
+def OpImageDrefGather: Op<97, (outs ID:$res),
+ (ins TYPE:$type, ID:$sampledImage, ID:$coord, ID:$dref, variable_ops),
+ "$res = OpImageDrefGather $type $sampledImage $coord $dref">;
+
+def OpImageRead: Op<98, (outs ID:$res),
+ (ins TYPE:$type, ID:$image, ID:$coord, variable_ops),
+ "$res = OpImageRead $type $image $coord">;
+def OpImageWrite: Op<99, (outs), (ins ID:$image, ID:$coord, ID:$texel, variable_ops),
+ "OpImageWrite $image $coord $texel">;
+
+def OpImage: UnOp<"OpImage", 100>;
+def OpImageQueryFormat: UnOp<"OpImageQueryFormat", 101>;
+def OpImageQueryOrder: UnOp<"OpImageQueryOrder", 102>;
+def OpImageQuerySizeLod: BinOp<"OpImageQuerySizeLod", 103>;
+def OpImageQuerySize: UnOp<"OpImageQuerySize", 104>;
+def OpImageQueryLod: BinOp<"OpImageQueryLod", 105>;
+def OpImageQueryLevels: UnOp<"OpImageQueryLevels", 106>;
+def OpImageQuerySamples: UnOp<"OpImageQuerySamples", 107>;
+
+def OpImageSparseSampleImplicitLod: Op<305, (outs ID:$res),
+ (ins TYPE:$type, ID:$sampledImage, ID:$coord, variable_ops),
+ "$res = OpImageSparseSampleImplicitLod $type $sampledImage $coord">;
+def OpImageSparseSampleExplicitLod: Op<306, (outs ID:$res),
+ (ins TYPE:$ty, ID:$sImage, ID:$uv, ImageOperand:$op, ID:$i, variable_ops),
+ "$res = OpImageSparseSampleExplicitLod $ty $sImage $uv $op $i">;
+
+def OpImageSparseSampleDrefImplicitLod: Op<307, (outs ID:$res),
+ (ins TYPE:$type, ID:$sampledImg, ID:$coord, ID:$dref, variable_ops),
+ "$res = OpImageSparseSampleDrefImplicitLod $type $sampledImg $dref $coord">;
+def OpImageSparseSampleDrefExplicitLod: Op<308, (outs ID:$res),
+ (ins TYPE:$ty, ID:$im, ID:$uv, ID:$d, ImageOperand:$op, ID:$i, variable_ops),
+ "$res = OpImageSparseSampleDrefExplicitLod $ty $im $uv $d $op $i">;
+
+def OpImageSparseSampleProjImplicitLod: Op<309, (outs ID:$res),
+ (ins TYPE:$type, ID:$sampledImage, ID:$coord, variable_ops),
+ "$res = OpImageSparseSampleProjImplicitLod $type $sampledImage $coord">;
+def OpImageSparseSampleProjExplicitLod: Op<310, (outs ID:$res),
+ (ins TYPE:$ty, ID:$im, ID:$uv, ID:$d, ImageOperand:$op, ID:$i, variable_ops),
+ "$res = OpImageSparseSampleProjExplicitLod $ty $im $uv $op $i">;
+
+def OpImageSparseSampleProjDrefImplicitLod: Op<311, (outs ID:$res),
+ (ins TYPE:$type, ID:$sImage, ID:$coord, ID:$dref, variable_ops),
+ "$res = OpImageSparseSampleProjDrefImplicitLod $type $sImage $dref $coord">;
+def OpImageSparseSampleProjDrefExplicitLod: Op<312, (outs ID:$res),
+ (ins TYPE:$ty, ID:$im, ID:$uv, ID:$d, ImageOperand:$op, ID:$i, variable_ops),
+ "$res = OpImageSparseSampleProjDrefExplicitLod $ty $im $uv $d $op $i">;
+
+def OpImageSparseFetch: Op<313, (outs ID:$res),
+ (ins TYPE:$type, ID:$image, ID:$coord, variable_ops),
+ "$res = OpImageSparseFetch $type $image $coord">;
+def OpImageSparseGather: Op<314, (outs ID:$res),
+ (ins TYPE:$type, ID:$sampledImage, ID:$coord, ID:$component, variable_ops),
+ "$res = OpImageSparseGather $type $sampledImage $coord $component">;
+def OpImageSparseDrefGather: Op<315, (outs ID:$res),
+ (ins TYPE:$type, ID:$sampledImage, ID:$coord, ID:$dref, variable_ops),
+ "$res = OpImageSparseDrefGather $type $sampledImage $coord $dref">;
+
+def OpImageSparseTexelsResident: UnOp<"OpImageSparseTexelsResident", 316>;
+
+def OpImageSparseRead: Op<320, (outs ID:$res),
+ (ins TYPE:$type, ID:$image, ID:$coord, variable_ops),
+ "$res = OpImageSparseRead $type $image $coord">;
+
+def OpImageSampleFootprintNV: Op<5283, (outs ID:$res),
+ (ins TYPE:$ty, ID:$sImg, ID:$uv, ID:$granularity, ID:$coarse, variable_ops),
+ "$res = OpImageSampleFootprintNV $ty $sImg $uv $granularity $coarse">;
+
+// 3.42.11 Conversion instructions
+
+def OpConvertFToU : UnOp<"OpConvertFToU", 109>;
+def OpConvertFToS : UnOp<"OpConvertFToS", 110>;
+def OpConvertSToF : UnOp<"OpConvertSToF", 111>;
+def OpConvertUToF : UnOp<"OpConvertUToF", 112>;
+
+def OpUConvert : UnOp<"OpUConvert", 113>;
+def OpSConvert : UnOp<"OpSConvert", 114>;
+def OpFConvert : UnOp<"OpFConvert", 115>;
+
+def OpQuantizeToF16 : UnOp<"OpQuantizeToF16", 116>;
+
+def OpConvertPtrToU : UnOp<"OpConvertPtrToU", 117>;
+
+def OpSatConvertSToU : UnOp<"OpSatConvertSToU", 118>;
+def OpSatConvertUToS : UnOp<"OpSatConvertUToS", 119>;
+
+def OpConvertUToPtr : UnOp<"OpConvertUToPtr", 120>;
+def OpPtrCastToGeneric : UnOp<"OpPtrCastToGeneric", 121>;
+def OpGenericCastToPtr : UnOp<"OpGenericCastToPtr", 122>;
+def OpGenericCastToPtrExplicit : Op<123, (outs ID:$r), (ins TYPE:$t, ID:$p, StorageClass:$s),
+ "$r = OpGenericCastToPtrExplicit $t $p $s">;
+def OpBitcast : UnOp<"OpBitcast", 124>;
+
+// 3.42.12 Composite Instructions
+
+def OpVectorExtractDynamic: Op<77, (outs ID:$res), (ins TYPE:$type, vID:$vec, ID:$idx),
+ "$res = OpVectorExtractDynamic $type $vec $idx", [(set ID:$res, (assigntype (extractelt vID:$vec, ID:$idx), TYPE:$type))]>;
+
+def OpVectorInsertDynamic: Op<78, (outs ID:$res), (ins TYPE:$ty, ID:$vec, ID:$comp, ID:$idx),
+ "$res = OpVectorInsertDynamic $ty $vec $comp $idx">;
+def OpVectorShuffle: Op<79, (outs ID:$res), (ins TYPE:$ty, ID:$v1, ID:$v2, variable_ops),
+ "$res = OpVectorShuffle $ty $v1 $v2">;
+def OpCompositeConstruct: Op<80, (outs ID:$res), (ins TYPE:$type, variable_ops),
+ "$res = OpCompositeConstruct $type">;
+def OpCompositeExtract: Op<81, (outs ID:$res), (ins TYPE:$type, ID:$base, variable_ops),
+ "$res = OpCompositeExtract $type $base">;
+def OpCompositeInsert: Op<82, (outs ID:$r), (ins TYPE:$ty, ID:$obj, ID:$base, variable_ops),
+ "$r = OpCompositeInsert $ty $obj $base">;
+def OpCopyObject: UnOp<"OpCopyObject", 83>;
+def OpTranspose: UnOp<"OpTranspose", 84>;
+def OpCopyLogical: UnOp<"OpCopyLogical", 400>;
+
+// 3.42.13 Arithmetic Instructions
+
+def OpSNegate: UnOp<"OpSNegate", 126>;
+def OpFNegate: UnOpTyped<"OpFNegate", 127, fID, fneg>;
+defm OpIAdd: BinOpTypedGen<"OpIAdd", 128, add, 0, 1>;
+defm OpFAdd: BinOpTypedGen<"OpFAdd", 129, fadd, 1, 1>;
+
+defm OpISub: BinOpTypedGen<"OpISub", 130, sub, 0, 1>;
+defm OpFSub: BinOpTypedGen<"OpFSub", 131, fsub, 1, 1>;
+
+defm OpIMul: BinOpTypedGen<"OpIMul", 132, mul, 0, 1>;
+defm OpFMul: BinOpTypedGen<"OpFMul", 133, fmul, 1, 1>;
+
+defm OpUDiv: BinOpTypedGen<"OpUDiv", 134, udiv, 0, 1>;
+defm OpSDiv: BinOpTypedGen<"OpSDiv", 135, sdiv, 0, 1>;
+defm OpFDiv: BinOpTypedGen<"OpFDiv", 136, fdiv, 1, 1>;
+
+defm OpUMod: BinOpTypedGen<"OpUMod", 137, urem, 0, 1>;
+defm OpSRem: BinOpTypedGen<"OpSRem", 138, srem, 0, 1>;
+
+def OpSMod: BinOp<"OpSMod", 139>;
+
+defm OpFRem: BinOpTypedGen<"OpFRem", 140, frem, 1, 1>;
+def OpFMod: BinOp<"OpFMod", 141>;
+
+def OpVectorTimesScalar: BinOp<"OpVectorTimesScalar", 142>;
+def OpMatrixTimesScalar: BinOp<"OpMatrixTimesScalar", 143>;
+def OpVectorTimesMatrix: BinOp<"OpVectorTimesMatrix", 144>;
+def OpMatrixTimesVector: BinOp<"OpMatrixTimesVector", 145>;
+def OpMatrixTimesMatrix: BinOp<"OpMatrixTimesMatrix", 146>;
+
+def OpOuterProduct: BinOp<"OpOuterProduct", 147>;
+def OpDot: BinOp<"OpDot", 148>;
+
+def OpIAddCarry: BinOpTyped<"OpIAddCarry", 149, ID, addc>;
+def OpISubBorrow: BinOpTyped<"OpISubBorrow", 150, ID, subc>;
+def OpUMulExtended: BinOp<"OpUMulExtended", 151>;
+def OpSMulExtended: BinOp<"OpSMulExtended", 152>;
+
+// 3.42.14 Bit Instructions
+
+defm OpShiftRightLogical: BinOpTypedGen<"OpShiftRightLogical", 194, srl, 0, 1>;
+defm OpShiftRightArithmetic: BinOpTypedGen<"OpShiftRightArithmetic", 195, sra, 0, 1>;
+defm OpShiftLeftLogical: BinOpTypedGen<"OpShiftLeftLogical", 196, shl, 0, 1>;
+
+defm OpBitwiseOr: BinOpTypedGen<"OpBitwiseOr", 197, or, 0, 1>;
+defm OpBitwiseXor: BinOpTypedGen<"OpBitwiseXor", 198, xor, 0, 1>;
+defm OpBitwiseAnd: BinOpTypedGen<"OpBitwiseAnd", 199, and, 0, 1>;
+def OpNot: UnOp<"OpNot", 200>;
+
+def OpBitFieldInsert: Op<201, (outs ID:$res),
+ (ins TYPE:$ty, ID:$base, ID:$insert, ID:$offset, ID:$count),
+ "$res = OpBitFieldInsert $ty $base $insert $offset $count">;
+def OpBitFieldSExtract: Op<202, (outs ID:$res),
+ (ins TYPE:$ty, ID:$base, ID:$offset, ID:$count),
+ "$res = OpBitFieldSExtract $ty $base $offset $count">;
+def OpBitFieldUExtract: Op<203, (outs ID:$res),
+ (ins TYPE:$ty, ID:$base, ID:$offset, ID:$count),
+ "$res = OpBitFieldUExtract $ty $base $offset $count">;
+def OpBitReverse: Op<204, (outs ID:$r), (ins TYPE:$ty, ID:$b), "$r = OpBitReverse $ty $b">;
+def OpBitCount: Op<205, (outs ID:$r), (ins TYPE:$ty, ID:$b), "$r = OpBitCount $ty $b">;
+
+// 3.42.15 Relational and Logical Instructions
+
+def OpAny: Op<154, (outs ID:$res), (ins TYPE:$ty, ID:$vec),
+ "$res = OpAny $ty $vec">;
+def OpAll: Op<155, (outs ID:$res), (ins TYPE:$ty, ID:$vec),
+ "$res = OpAll $ty $vec">;
+
+def OpIsNan: UnOp<"OpIsNan", 156>;
+def OpIsInf: UnOp<"OpIsInf", 157>;
+def OpIsFinite: UnOp<"OpIsFinite", 158>;
+def OpIsNormal: UnOp<"OpIsNormal", 159>;
+def OpSignBitSet: UnOp<"OpSignBitSet", 160>;
+
+def OpLessOrGreater: BinOp<"OpLessOrGreater", 161>;
+def OpOrdered: BinOp<"OpOrdered", 162>;
+def OpUnordered: BinOp<"OpUnordered", 163>;
+
+def OpLogicalEqual: BinOp<"OpLogicalEqual", 164>;
+def OpLogicalNotEqual: BinOp<"OpLogicalNotEqual", 165>;
+def OpLogicalOr: BinOp<"OpLogicalOr", 166>;
+def OpLogicalAnd: BinOp<"OpLogicalAnd", 167>;
+def OpLogicalNot: UnOp<"OpLogicalNot", 168>;
+
+defm OpSelect: TernOpTypedGen<"OpSelect", 169, select, 1, 1, 1>;
+
+def OpIEqual: BinOp<"OpIEqual", 170>;
+def OpINotEqual: BinOp<"OpINotEqual", 171>;
+
+def OpUGreaterThan: BinOp<"OpUGreaterThan", 172>;
+def OpSGreaterThan: BinOp<"OpSGreaterThan", 173>;
+def OpUGreaterThanEqual: BinOp<"OpUGreaterThanEqual", 174>;
+def OpSGreaterThanEqual: BinOp<"OpSGreaterThanEqual", 175>;
+def OpULessThan: BinOp<"OpULessThan", 176>;
+def OpSLessThan: BinOp<"OpSLessThan", 177>;
+def OpULessThanEqual: BinOp<"OpULessThanEqual", 178>;
+def OpSLessThanEqual: BinOp<"OpSLessThanEqual", 179>;
+
+def OpFOrdEqual: BinOp<"OpFOrdEqual", 180>;
+def OpFUnordEqual: BinOp<"OpFUnordEqual", 181>;
+def OpFOrdNotEqual: BinOp<"OpFOrdNotEqual", 182>;
+def OpFUnordNotEqual: BinOp<"OpFUnordNotEqual", 183>;
+
+def OpFOrdLessThan: BinOp<"OpFOrdLessThan", 184>;
+def OpFUnordLessThan: BinOp<"OpFUnordLessThan", 185>;
+def OpFOrdGreaterThan: BinOp<"OpFOrdGreaterThan", 186>;
+def OpFUnordGreaterThan: BinOp<"OpFUnordGreaterThan", 187>;
+
+def OpFOrdLessThanEqual: BinOp<"OpFOrdLessThanEqual", 188>;
+def OpFUnordLessThanEqual: BinOp<"OpFUnordLessThanEqual", 189>;
+def OpFOrdGreaterThanEqual: BinOp<"OpFOrdGreaterThanEqual", 190>;
+def OpFUnordGreaterThanEqual: BinOp<"OpFUnordGreaterThanEqual", 191>;
+
+// 3.42.16 Derivative Instructions
+
+def OpDPdx: UnOp<"OpDPdx", 207>;
+def OpDPdy: UnOp<"OpDPdy", 208>;
+def OpFwidth: UnOp<"OpFwidth", 209>;
+
+def OpDPdxFine: UnOp<"OpDPdxFine", 210>;
+def OpDPdyFine: UnOp<"OpDPdyFine", 211>;
+def OpFwidthFine: UnOp<"OpFwidthFine", 212>;
+
+def OpDPdxCoarse: UnOp<"OpDPdxCoarse", 213>;
+def OpDPdyCoarse: UnOp<"OpDPdyCoarse", 214>;
+def OpFwidthCoarse: UnOp<"OpFwidthCoarse", 215>;
+
+// 3.42.17 Control-Flow Instructions
+
+def OpPhi: Op<245, (outs ID:$res), (ins TYPE:$type, ID:$var0, ID:$block0, variable_ops),
+ "$res = OpPhi $type $var0 $block0">;
+def OpLoopMerge: Op<246, (outs), (ins ID:$merge, ID:$continue, LoopControl:$lc, variable_ops),
+ "OpLoopMerge $merge $merge $continue $lc">;
+def OpSelectionMerge: Op<247, (outs), (ins ID:$merge, SelectionControl:$sc),
+ "OpSelectionMerge $merge $sc">;
+def OpLabel: Op<248, (outs ID:$label), (ins), "$label = OpLabel">;
+let isTerminator=1 in {
+ def OpBranch: Op<249, (outs), (ins ID:$label), "OpBranch $label">;
+ def OpBranchConditional: Op<250, (outs), (ins ID:$cond, ID:$true, ID:$false, variable_ops),
+ "OpBranchConditional $cond $true $false">;
+ def OpSwitch: Op<251, (outs), (ins ID:$sel, ID:$dflt, variable_ops), "OpSwitch $sel $dflt">;
+}
+let isReturn = 1, hasDelaySlot=0, isBarrier = 0, isTerminator=1, isNotDuplicable = 1 in {
+ def OpKill: SimpleOp<"OpKill", 252>;
+ def OpReturn: SimpleOp<"OpReturn", 253>;
+ def OpReturnValue: Op<254, (outs), (ins ANYID:$ret), "OpReturnValue $ret">;
+ def OpUnreachable: SimpleOp<"OpUnreachable", 255>;
+}
+def OpLifetimeStart: Op<256, (outs), (ins ID:$ptr, i32imm:$sz), "OpLifetimeStart $ptr, $sz">;
+def OpLifetimeStop: Op<257, (outs), (ins ID:$ptr, i32imm:$sz), "OpLifetimeStop $ptr, $sz">;
+
+// 3.42.18 Atomic Instructions
+
+class AtomicOp<string name, bits<16> opCode>: Op<opCode, (outs ID:$res),
+ (ins TYPE:$ty, ID:$ptr, ID:$sc, ID:$sem),
+ "$res = "#name#" $ty $ptr $sc $sem">;
+
+class AtomicOpVal<string name, bits<16> opCode>: Op<opCode, (outs ID:$res),
+ (ins TYPE:$ty, ID:$ptr, ID:$sc, ID:$sem, ID:$val),
+ "$res = "#name#" $ty $ptr $sc $sem $val">;
+
+def OpAtomicLoad: AtomicOp<"OpAtomicLoad", 227>;
+
+def OpAtomicStore: Op<228, (outs), (ins ID:$ptr, ID:$sc, ID:$sem, ID:$val),
+ "OpAtomicStore $ptr $sc $sem $val">;
+def OpAtomicExchange: Op<229, (outs ID:$res),
+ (ins TYPE:$ty, ID:$ptr, ID:$sc, ID:$sem, ID:$val),
+ "$res = OpAtomicExchange $ty $ptr $sc $sem $val">;
+def OpAtomicCompareExchange: Op<230, (outs ID:$res),
+ (ins TYPE:$ty, ID:$ptr, ID:$sc, ID:$eq,
+ ID:$neq, ID:$val, ID:$cmp),
+ "$res = OpAtomicCompareExchange $ty $ptr $sc $eq $neq $val $cmp">;
+// TODO Currently the following deprecated opcode is missing:
+// OpAtomicCompareExchangeWeak
+
+def OpAtomicIIncrement: AtomicOp<"OpAtomicIIncrement", 232>;
+def OpAtomicIDecrement: AtomicOp<"OpAtomicIDecrement", 233>;
+
+def OpAtomicIAdd: AtomicOpVal<"OpAtomicIAdd", 234>;
+def OpAtomicISub: AtomicOpVal<"OpAtomicISub", 235>;
+
+def OpAtomicSMin: AtomicOpVal<"OpAtomicSMin", 236>;
+def OpAtomicUMin: AtomicOpVal<"OpAtomicUMin", 237>;
+def OpAtomicSMax: AtomicOpVal<"OpAtomicSMax", 238>;
+def OpAtomicUMax: AtomicOpVal<"OpAtomicUMax", 239>;
+
+def OpAtomicAnd: AtomicOpVal<"OpAtomicAnd", 240>;
+def OpAtomicOr: AtomicOpVal<"OpAtomicOr", 241>;
+def OpAtomicXor: AtomicOpVal<"OpAtomicXor", 242>;
+
+
+def OpAtomicFlagTestAndSet: AtomicOp<"OpAtomicFlagTestAndSet", 318>;
+def OpAtomicFlagClear: Op<319, (outs), (ins ID:$ptr, ID:$sc, ID:$sem),
+ "OpAtomicFlagClear $ptr $sc $sem">;
+
+// 3.42.19 Primitive Instructions
+
+def OpEmitVertex: SimpleOp<"OpEmitVertex", 218>;
+def OpEndPrimitive: SimpleOp<"OpEndPrimitive", 219>;
+def OpEmitStreamVertex: Op<220, (outs), (ins ID:$stream), "OpEmitStreamVertex $stream">;
+def OpEndStreamPrimitive: Op<221, (outs), (ins ID:$stream), "OpEndStreamPrimitive $stream">;
+
+// 3.42.20 Barrier Instructions
+
+def OpControlBarrier: Op<224, (outs), (ins ID:$exec, ID:$mem, ID:$sem),
+ "OpControlBarrier $exec $mem $sem">;
+def OpMemoryBarrier: Op<225, (outs), (ins ID:$mem, ID:$sem),
+ "OpMemoryBarrier $mem $sem">;
+def OpNamedBarrierInitialize: UnOp<"OpNamedBarrierInitialize", 328>;
+def OpMemoryNamedBarrier: Op<329, (outs), (ins ID:$barr, ID:$mem, ID:$sem),
+ "OpMemoryNamedBarrier $barr $mem $sem">;
+
+// 3.42.21. Group and Subgroup Instructions
+
+def OpGroupAll: Op<261, (outs ID:$res), (ins TYPE:$ty, ID:$scope, ID:$pr),
+ "$res = OpGroupAll $ty $scope $pr">;
+def OpGroupAny: Op<262, (outs ID:$res), (ins TYPE:$ty, ID:$scope, ID:$pr),
+ "$res = OpGroupAny $ty $scope $pr">;
+def OpGroupBroadcast: Op<263, (outs ID:$res), (ins TYPE:$ty, ID:$scope,
+ ID:$val, ID:$id),
+ "$res = OpGroupBroadcast $ty $scope $val $id">;
+class OpGroup<string name, bits<16> opCode>: Op<opCode, (outs ID:$res),
+ (ins TYPE:$ty, ID:$scope, GroupOperation:$groupOp, ID:$x),
+ "$res = OpGroup"#name#" $ty $scope $groupOp $x">;
+def OpGroupIAdd: OpGroup<"IAdd", 264>;
+def OpGroupFAdd: OpGroup<"FAdd", 265>;
+def OpGroupFMin: OpGroup<"FMin", 266>;
+def OpGroupUMin: OpGroup<"UMin", 267>;
+def OpGroupSMin: OpGroup<"SMin", 268>;
+def OpGroupFMax: OpGroup<"FMax", 269>;
+def OpGroupUMax: OpGroup<"UMax", 270>;
+def OpGroupSMax: OpGroup<"SMax", 271>;
+
+// TODO: 3.42.22. Device-Side Enqueue Instructions
+// TODO: 3.42.23. Pipe Instructions
+
+// 3.42.24. Non-Uniform Instructions
+
+def OpGroupNonUniformElect: Op<333, (outs ID:$res), (ins TYPE:$ty, ID:$scope),
+ "$res = OpGroupNonUniformElect $ty $scope">;
+class OpGroupNU3<string name, bits<16> opCode>: Op<opCode,
+ (outs ID:$res), (ins TYPE:$ty, ID:$scope, ID:$pred),
+ "$res = OpGroupNonUniform"#name#" $ty $scope $pred">;
+class OpGroupNU4<string name, bits<16> opCode>: Op<opCode,
+ (outs ID:$res), (ins TYPE:$ty, ID:$scope, ID:$val, ID:$id),
+ "$res = OpGroupNonUniform"#name#" $ty $scope $val $id">;
+def OpGroupNonUniformAll: OpGroupNU3<"All", 334>;
+def OpGroupNonUniformAny: OpGroupNU3<"Any", 335>;
+def OpGroupNonUniformAllEqual: OpGroupNU3<"AllEqual", 336>;
+def OpGroupNonUniformBroadcast: OpGroupNU4<"Broadcast", 337>;
+def OpGroupNonUniformBroadcastFirst: OpGroupNU3<"BroadcastFirst", 338>;
+def OpGroupNonUniformBallot: OpGroupNU3<"Ballot", 339>;
+def OpGroupNonUniformInverseBallot: OpGroupNU3<"InverseBallot", 340>;
+def OpGroupNonUniformBallotBitExtract: OpGroupNU4<"BallotBitExtract", 341>;
+def OpGroupNonUniformBallotBitCount: Op<342, (outs ID:$res),
+ (ins TYPE:$ty, ID:$scope, GroupOperation:$groupOp, ID:$val),
+ "$res = OpGroupNonUniformBallotBitCount "
+ "$ty $scope $groupOp $val">;
+def OpGroupNonUniformBallotFindLSB: OpGroupNU3<"BallotFindLSB", 343>;
+def OpGroupNonUniformBallotFindMSB: OpGroupNU3<"BallotFindMSB", 344>;
+def OpGroupNonUniformShuffle: OpGroupNU4<"Shuffle", 345>;
+def OpGroupNonUniformShuffleXor: OpGroupNU4<"ShuffleXor", 346>;
+def OpGroupNonUniformShuffleUp: OpGroupNU4<"ShuffleUp", 347>;
+def OpGroupNonUniformShuffleDown: OpGroupNU4<"ShuffleDown", 348>;
+class OpGroupNUGroup<string name, bits<16> opCode>: Op<opCode, (outs ID:$res),
+ (ins TYPE:$ty, ID:$scope, GroupOperation:$groupOp,
+ ID:$val, variable_ops),
+ "$res = OpGroupNonUniform"#name#" $ty $scope $groupOp $val">;
+def OpGroupNonUniformIAdd: OpGroupNUGroup<"IAdd", 349>;
+def OpGroupNonUniformFAdd: OpGroupNUGroup<"FAdd", 350>;
+def OpGroupNonUniformIMul: OpGroupNUGroup<"IMul", 351>;
+def OpGroupNonUniformFMul: OpGroupNUGroup<"FMul", 352>;
+def OpGroupNonUniformSMin: OpGroupNUGroup<"SMin", 353>;
+def OpGroupNonUniformUMin: OpGroupNUGroup<"UMin", 354>;
+def OpGroupNonUniformFMin: OpGroupNUGroup<"FMin", 355>;
+def OpGroupNonUniformSMax: OpGroupNUGroup<"SMax", 356>;
+def OpGroupNonUniformUMax: OpGroupNUGroup<"UMax", 357>;
+def OpGroupNonUniformFMax: OpGroupNUGroup<"FMax", 358>;
+def OpGroupNonUniformBitwiseAnd: OpGroupNUGroup<"BitwiseAnd", 359>;
+def OpGroupNonUniformBitwiseOr: OpGroupNUGroup<"BitwiseOr", 360>;
+def OpGroupNonUniformBitwiseXor: OpGroupNUGroup<"BitwiseXor", 361>;
+def OpGroupNonUniformLogicalAnd: OpGroupNUGroup<"LogicalAnd", 362>;
+def OpGroupNonUniformLogicalOr: OpGroupNUGroup<"LogicalOr", 363>;
+def OpGroupNonUniformLogicalXor: OpGroupNUGroup<"LogicalXor", 364>;
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
new file mode 100644
index 000000000000..9294a60506a8
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -0,0 +1,1268 @@
+//===- SPIRVInstructionSelector.cpp ------------------------------*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the targeting of the InstructionSelector class for
+// SPIRV.
+// TODO: This should be generated by TableGen.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPIRV.h"
+#include "SPIRVGlobalRegistry.h"
+#include "SPIRVInstrInfo.h"
+#include "SPIRVRegisterBankInfo.h"
+#include "SPIRVRegisterInfo.h"
+#include "SPIRVTargetMachine.h"
+#include "SPIRVUtils.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/IntrinsicsSPIRV.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "spirv-isel"
+
+using namespace llvm;
+
+namespace {
+
+#define GET_GLOBALISEL_PREDICATE_BITSET
+#include "SPIRVGenGlobalISel.inc"
+#undef GET_GLOBALISEL_PREDICATE_BITSET
+
+class SPIRVInstructionSelector : public InstructionSelector {
+ const SPIRVSubtarget &STI;
+ const SPIRVInstrInfo &TII;
+ const SPIRVRegisterInfo &TRI;
+ const RegisterBankInfo &RBI;
+ SPIRVGlobalRegistry &GR;
+ MachineRegisterInfo *MRI;
+
+public:
+ SPIRVInstructionSelector(const SPIRVTargetMachine &TM,
+ const SPIRVSubtarget &ST,
+ const RegisterBankInfo &RBI);
+ void setupMF(MachineFunction &MF, GISelKnownBits *KB,
+ CodeGenCoverage &CoverageInfo, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI) override;
+ // Common selection code. Instruction-specific selection occurs in spvSelect.
+ bool select(MachineInstr &I) override;
+ static const char *getName() { return DEBUG_TYPE; }
+
+#define GET_GLOBALISEL_PREDICATES_DECL
+#include "SPIRVGenGlobalISel.inc"
+#undef GET_GLOBALISEL_PREDICATES_DECL
+
+#define GET_GLOBALISEL_TEMPORARIES_DECL
+#include "SPIRVGenGlobalISel.inc"
+#undef GET_GLOBALISEL_TEMPORARIES_DECL
+
+private:
+ // tblgen-erated 'select' implementation, used as the initial selector for
+ // the patterns that don't require complex C++.
+ bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
+
+ // All instruction-specific selection that didn't happen in "select()".
+ // Is basically a large Switch/Case delegating to all other select method.
+ bool spvSelect(Register ResVReg, const SPIRVType *ResType,
+ MachineInstr &I) const;
+
+ bool selectGlobalValue(Register ResVReg, MachineInstr &I,
+ const MachineInstr *Init = nullptr) const;
+
+ bool selectUnOpWithSrc(Register ResVReg, const SPIRVType *ResType,
+ MachineInstr &I, Register SrcReg,
+ unsigned Opcode) const;
+ bool selectUnOp(Register ResVReg, const SPIRVType *ResType, MachineInstr &I,
+ unsigned Opcode) const;
+
+ bool selectLoad(Register ResVReg, const SPIRVType *ResType,
+ MachineInstr &I) const;
+ bool selectStore(MachineInstr &I) const;
+
+ bool selectMemOperation(Register ResVReg, MachineInstr &I) const;
+
+ bool selectAtomicRMW(Register ResVReg, const SPIRVType *ResType,
+ MachineInstr &I, unsigned NewOpcode) const;
+
+ bool selectAtomicCmpXchg(Register ResVReg, const SPIRVType *ResType,
+ MachineInstr &I) const;
+
+ bool selectFence(MachineInstr &I) const;
+
+ bool selectAddrSpaceCast(Register ResVReg, const SPIRVType *ResType,
+ MachineInstr &I) const;
+
+ bool selectBitreverse(Register ResVReg, const SPIRVType *ResType,
+ MachineInstr &I) const;
+
+ bool selectConstVector(Register ResVReg, const SPIRVType *ResType,
+ MachineInstr &I) const;
+
+ bool selectCmp(Register ResVReg, const SPIRVType *ResType,
+ unsigned comparisonOpcode, MachineInstr &I) const;
+
+ bool selectICmp(Register ResVReg, const SPIRVType *ResType,
+ MachineInstr &I) const;
+ bool selectFCmp(Register ResVReg, const SPIRVType *ResType,
+ MachineInstr &I) const;
+
+ void renderImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
+ int OpIdx) const;
+ void renderFImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
+ int OpIdx) const;
+
+ bool selectConst(Register ResVReg, const SPIRVType *ResType, const APInt &Imm,
+ MachineInstr &I) const;
+
+ bool selectSelect(Register ResVReg, const SPIRVType *ResType, MachineInstr &I,
+ bool IsSigned) const;
+ bool selectIToF(Register ResVReg, const SPIRVType *ResType, MachineInstr &I,
+ bool IsSigned, unsigned Opcode) const;
+ bool selectExt(Register ResVReg, const SPIRVType *ResType, MachineInstr &I,
+ bool IsSigned) const;
+
+ bool selectTrunc(Register ResVReg, const SPIRVType *ResType,
+ MachineInstr &I) const;
+
+ bool selectIntToBool(Register IntReg, Register ResVReg,
+ const SPIRVType *intTy, const SPIRVType *boolTy,
+ MachineInstr &I) const;
+
+ bool selectOpUndef(Register ResVReg, const SPIRVType *ResType,
+ MachineInstr &I) const;
+ bool selectIntrinsic(Register ResVReg, const SPIRVType *ResType,
+ MachineInstr &I) const;
+ bool selectExtractVal(Register ResVReg, const SPIRVType *ResType,
+ MachineInstr &I) const;
+ bool selectInsertVal(Register ResVReg, const SPIRVType *ResType,
+ MachineInstr &I) const;
+ bool selectExtractElt(Register ResVReg, const SPIRVType *ResType,
+ MachineInstr &I) const;
+ bool selectInsertElt(Register ResVReg, const SPIRVType *ResType,
+ MachineInstr &I) const;
+ bool selectGEP(Register ResVReg, const SPIRVType *ResType,
+ MachineInstr &I) const;
+
+ bool selectFrameIndex(Register ResVReg, const SPIRVType *ResType,
+ MachineInstr &I) const;
+
+ bool selectBranch(MachineInstr &I) const;
+ bool selectBranchCond(MachineInstr &I) const;
+
+ bool selectPhi(Register ResVReg, const SPIRVType *ResType,
+ MachineInstr &I) const;
+
+ Register buildI32Constant(uint32_t Val, MachineInstr &I,
+ const SPIRVType *ResType = nullptr) const;
+
+ Register buildZerosVal(const SPIRVType *ResType, MachineInstr &I) const;
+ Register buildOnesVal(bool AllOnes, const SPIRVType *ResType,
+ MachineInstr &I) const;
+};
+
+} // end anonymous namespace
+
+#define GET_GLOBALISEL_IMPL
+#include "SPIRVGenGlobalISel.inc"
+#undef GET_GLOBALISEL_IMPL
+
+SPIRVInstructionSelector::SPIRVInstructionSelector(const SPIRVTargetMachine &TM,
+ const SPIRVSubtarget &ST,
+ const RegisterBankInfo &RBI)
+ : InstructionSelector(), STI(ST), TII(*ST.getInstrInfo()),
+ TRI(*ST.getRegisterInfo()), RBI(RBI), GR(*ST.getSPIRVGlobalRegistry()),
+#define GET_GLOBALISEL_PREDICATES_INIT
+#include "SPIRVGenGlobalISel.inc"
+#undef GET_GLOBALISEL_PREDICATES_INIT
+#define GET_GLOBALISEL_TEMPORARIES_INIT
+#include "SPIRVGenGlobalISel.inc"
+#undef GET_GLOBALISEL_TEMPORARIES_INIT
+{
+}
+
+void SPIRVInstructionSelector::setupMF(MachineFunction &MF, GISelKnownBits *KB,
+ CodeGenCoverage &CoverageInfo,
+ ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI) {
+ MRI = &MF.getRegInfo();
+ GR.setCurrentFunc(MF);
+ InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
+}
+
+// Defined in SPIRVLegalizerInfo.cpp.
+extern bool isTypeFoldingSupported(unsigned Opcode);
+
+bool SPIRVInstructionSelector::select(MachineInstr &I) {
+ assert(I.getParent() && "Instruction should be in a basic block!");
+ assert(I.getParent()->getParent() && "Instruction should be in a function!");
+
+ Register Opcode = I.getOpcode();
+ // If it's not a GMIR instruction, we've selected it already.
+ if (!isPreISelGenericOpcode(Opcode)) {
+ if (Opcode == SPIRV::ASSIGN_TYPE) { // These pseudos aren't needed any more.
+ auto *Def = MRI->getVRegDef(I.getOperand(1).getReg());
+ if (isTypeFoldingSupported(Def->getOpcode())) {
+ auto Res = selectImpl(I, *CoverageInfo);
+ assert(Res || Def->getOpcode() == TargetOpcode::G_CONSTANT);
+ if (Res)
+ return Res;
+ }
+ MRI->replaceRegWith(I.getOperand(1).getReg(), I.getOperand(0).getReg());
+ I.removeFromParent();
+ } else if (I.getNumDefs() == 1) {
+ // Make all vregs 32 bits (for SPIR-V IDs).
+ MRI->setType(I.getOperand(0).getReg(), LLT::scalar(32));
+ }
+ return true;
+ }
+
+ if (I.getNumOperands() != I.getNumExplicitOperands()) {
+ LLVM_DEBUG(errs() << "Generic instr has unexpected implicit operands\n");
+ return false;
+ }
+
+ // Common code for getting return reg+type, and removing selected instr
+ // from parent occurs here. Instr-specific selection happens in spvSelect().
+ bool HasDefs = I.getNumDefs() > 0;
+ Register ResVReg = HasDefs ? I.getOperand(0).getReg() : Register(0);
+ SPIRVType *ResType = HasDefs ? GR.getSPIRVTypeForVReg(ResVReg) : nullptr;
+ assert(!HasDefs || ResType || I.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
+ if (spvSelect(ResVReg, ResType, I)) {
+ if (HasDefs) // Make all vregs 32 bits (for SPIR-V IDs).
+ MRI->setType(ResVReg, LLT::scalar(32));
+ I.removeFromParent();
+ return true;
+ }
+ return false;
+}
+
+bool SPIRVInstructionSelector::spvSelect(Register ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I) const {
+ assert(!isTypeFoldingSupported(I.getOpcode()) ||
+ I.getOpcode() == TargetOpcode::G_CONSTANT);
+ const unsigned Opcode = I.getOpcode();
+ switch (Opcode) {
+ case TargetOpcode::G_CONSTANT:
+ return selectConst(ResVReg, ResType, I.getOperand(1).getCImm()->getValue(),
+ I);
+ case TargetOpcode::G_GLOBAL_VALUE:
+ return selectGlobalValue(ResVReg, I);
+ case TargetOpcode::G_IMPLICIT_DEF:
+ return selectOpUndef(ResVReg, ResType, I);
+
+ case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
+ return selectIntrinsic(ResVReg, ResType, I);
+ case TargetOpcode::G_BITREVERSE:
+ return selectBitreverse(ResVReg, ResType, I);
+
+ case TargetOpcode::G_BUILD_VECTOR:
+ return selectConstVector(ResVReg, ResType, I);
+
+ case TargetOpcode::G_SHUFFLE_VECTOR: {
+ MachineBasicBlock &BB = *I.getParent();
+ auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpVectorShuffle))
+ .addDef(ResVReg)
+ .addUse(GR.getSPIRVTypeID(ResType))
+ .addUse(I.getOperand(1).getReg())
+ .addUse(I.getOperand(2).getReg());
+ for (auto V : I.getOperand(3).getShuffleMask())
+ MIB.addImm(V);
+ return MIB.constrainAllUses(TII, TRI, RBI);
+ }
+ case TargetOpcode::G_MEMMOVE:
+ case TargetOpcode::G_MEMCPY:
+ return selectMemOperation(ResVReg, I);
+
+ case TargetOpcode::G_ICMP:
+ return selectICmp(ResVReg, ResType, I);
+ case TargetOpcode::G_FCMP:
+ return selectFCmp(ResVReg, ResType, I);
+
+ case TargetOpcode::G_FRAME_INDEX:
+ return selectFrameIndex(ResVReg, ResType, I);
+
+ case TargetOpcode::G_LOAD:
+ return selectLoad(ResVReg, ResType, I);
+ case TargetOpcode::G_STORE:
+ return selectStore(I);
+
+ case TargetOpcode::G_BR:
+ return selectBranch(I);
+ case TargetOpcode::G_BRCOND:
+ return selectBranchCond(I);
+
+ case TargetOpcode::G_PHI:
+ return selectPhi(ResVReg, ResType, I);
+
+ case TargetOpcode::G_FPTOSI:
+ return selectUnOp(ResVReg, ResType, I, SPIRV::OpConvertFToS);
+ case TargetOpcode::G_FPTOUI:
+ return selectUnOp(ResVReg, ResType, I, SPIRV::OpConvertFToU);
+
+ case TargetOpcode::G_SITOFP:
+ return selectIToF(ResVReg, ResType, I, true, SPIRV::OpConvertSToF);
+ case TargetOpcode::G_UITOFP:
+ return selectIToF(ResVReg, ResType, I, false, SPIRV::OpConvertUToF);
+
+ case TargetOpcode::G_CTPOP:
+ return selectUnOp(ResVReg, ResType, I, SPIRV::OpBitCount);
+
+ case TargetOpcode::G_SEXT:
+ return selectExt(ResVReg, ResType, I, true);
+ case TargetOpcode::G_ANYEXT:
+ case TargetOpcode::G_ZEXT:
+ return selectExt(ResVReg, ResType, I, false);
+ case TargetOpcode::G_TRUNC:
+ return selectTrunc(ResVReg, ResType, I);
+ case TargetOpcode::G_FPTRUNC:
+ case TargetOpcode::G_FPEXT:
+ return selectUnOp(ResVReg, ResType, I, SPIRV::OpFConvert);
+
+ case TargetOpcode::G_PTRTOINT:
+ return selectUnOp(ResVReg, ResType, I, SPIRV::OpConvertPtrToU);
+ case TargetOpcode::G_INTTOPTR:
+ return selectUnOp(ResVReg, ResType, I, SPIRV::OpConvertUToPtr);
+ case TargetOpcode::G_BITCAST:
+ return selectUnOp(ResVReg, ResType, I, SPIRV::OpBitcast);
+ case TargetOpcode::G_ADDRSPACE_CAST:
+ return selectAddrSpaceCast(ResVReg, ResType, I);
+
+ case TargetOpcode::G_ATOMICRMW_OR:
+ return selectAtomicRMW(ResVReg, ResType, I, SPIRV::OpAtomicOr);
+ case TargetOpcode::G_ATOMICRMW_ADD:
+ return selectAtomicRMW(ResVReg, ResType, I, SPIRV::OpAtomicIAdd);
+ case TargetOpcode::G_ATOMICRMW_AND:
+ return selectAtomicRMW(ResVReg, ResType, I, SPIRV::OpAtomicAnd);
+ case TargetOpcode::G_ATOMICRMW_MAX:
+ return selectAtomicRMW(ResVReg, ResType, I, SPIRV::OpAtomicSMax);
+ case TargetOpcode::G_ATOMICRMW_MIN:
+ return selectAtomicRMW(ResVReg, ResType, I, SPIRV::OpAtomicSMin);
+ case TargetOpcode::G_ATOMICRMW_SUB:
+ return selectAtomicRMW(ResVReg, ResType, I, SPIRV::OpAtomicISub);
+ case TargetOpcode::G_ATOMICRMW_XOR:
+ return selectAtomicRMW(ResVReg, ResType, I, SPIRV::OpAtomicXor);
+ case TargetOpcode::G_ATOMICRMW_UMAX:
+ return selectAtomicRMW(ResVReg, ResType, I, SPIRV::OpAtomicUMax);
+ case TargetOpcode::G_ATOMICRMW_UMIN:
+ return selectAtomicRMW(ResVReg, ResType, I, SPIRV::OpAtomicUMin);
+ case TargetOpcode::G_ATOMICRMW_XCHG:
+ return selectAtomicRMW(ResVReg, ResType, I, SPIRV::OpAtomicExchange);
+ case TargetOpcode::G_ATOMIC_CMPXCHG:
+ return selectAtomicCmpXchg(ResVReg, ResType, I);
+
+ case TargetOpcode::G_FENCE:
+ return selectFence(I);
+
+ default:
+ return false;
+ }
+}
+
+bool SPIRVInstructionSelector::selectUnOpWithSrc(Register ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I,
+ Register SrcReg,
+ unsigned Opcode) const {
+ return BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opcode))
+ .addDef(ResVReg)
+ .addUse(GR.getSPIRVTypeID(ResType))
+ .addUse(SrcReg)
+ .constrainAllUses(TII, TRI, RBI);
+}
+
+bool SPIRVInstructionSelector::selectUnOp(Register ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I,
+ unsigned Opcode) const {
+ return selectUnOpWithSrc(ResVReg, ResType, I, I.getOperand(1).getReg(),
+ Opcode);
+}
+
+static SPIRV::MemorySemantics getMemSemantics(AtomicOrdering Ord) {
+ switch (Ord) {
+ case AtomicOrdering::Acquire:
+ return SPIRV::MemorySemantics::Acquire;
+ case AtomicOrdering::Release:
+ return SPIRV::MemorySemantics::Release;
+ case AtomicOrdering::AcquireRelease:
+ return SPIRV::MemorySemantics::AcquireRelease;
+ case AtomicOrdering::SequentiallyConsistent:
+ return SPIRV::MemorySemantics::SequentiallyConsistent;
+ case AtomicOrdering::Unordered:
+ case AtomicOrdering::Monotonic:
+ case AtomicOrdering::NotAtomic:
+ return SPIRV::MemorySemantics::None;
+ }
+}
+
+static SPIRV::Scope getScope(SyncScope::ID Ord) {
+ switch (Ord) {
+ case SyncScope::SingleThread:
+ return SPIRV::Scope::Invocation;
+ case SyncScope::System:
+ return SPIRV::Scope::Device;
+ default:
+ llvm_unreachable("Unsupported synchronization Scope ID.");
+ }
+}
+
+static void addMemoryOperands(MachineMemOperand *MemOp,
+ MachineInstrBuilder &MIB) {
+ uint32_t SpvMemOp = static_cast<uint32_t>(SPIRV::MemoryOperand::None);
+ if (MemOp->isVolatile())
+ SpvMemOp |= static_cast<uint32_t>(SPIRV::MemoryOperand::Volatile);
+ if (MemOp->isNonTemporal())
+ SpvMemOp |= static_cast<uint32_t>(SPIRV::MemoryOperand::Nontemporal);
+ if (MemOp->getAlign().value())
+ SpvMemOp |= static_cast<uint32_t>(SPIRV::MemoryOperand::Aligned);
+
+ if (SpvMemOp != static_cast<uint32_t>(SPIRV::MemoryOperand::None)) {
+ MIB.addImm(SpvMemOp);
+ if (SpvMemOp & static_cast<uint32_t>(SPIRV::MemoryOperand::Aligned))
+ MIB.addImm(MemOp->getAlign().value());
+ }
+}
+
+static void addMemoryOperands(uint64_t Flags, MachineInstrBuilder &MIB) {
+ uint32_t SpvMemOp = static_cast<uint32_t>(SPIRV::MemoryOperand::None);
+ if (Flags & MachineMemOperand::Flags::MOVolatile)
+ SpvMemOp |= static_cast<uint32_t>(SPIRV::MemoryOperand::Volatile);
+ if (Flags & MachineMemOperand::Flags::MONonTemporal)
+ SpvMemOp |= static_cast<uint32_t>(SPIRV::MemoryOperand::Nontemporal);
+
+ if (SpvMemOp != static_cast<uint32_t>(SPIRV::MemoryOperand::None))
+ MIB.addImm(SpvMemOp);
+}
+
+bool SPIRVInstructionSelector::selectLoad(Register ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I) const {
+ unsigned OpOffset =
+ I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS ? 1 : 0;
+ Register Ptr = I.getOperand(1 + OpOffset).getReg();
+ auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpLoad))
+ .addDef(ResVReg)
+ .addUse(GR.getSPIRVTypeID(ResType))
+ .addUse(Ptr);
+ if (!I.getNumMemOperands()) {
+ assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
+ addMemoryOperands(I.getOperand(2 + OpOffset).getImm(), MIB);
+ } else {
+ addMemoryOperands(*I.memoperands_begin(), MIB);
+ }
+ return MIB.constrainAllUses(TII, TRI, RBI);
+}
+
+bool SPIRVInstructionSelector::selectStore(MachineInstr &I) const {
+ unsigned OpOffset =
+ I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS ? 1 : 0;
+ Register StoreVal = I.getOperand(0 + OpOffset).getReg();
+ Register Ptr = I.getOperand(1 + OpOffset).getReg();
+ MachineBasicBlock &BB = *I.getParent();
+ auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpStore))
+ .addUse(Ptr)
+ .addUse(StoreVal);
+ if (!I.getNumMemOperands()) {
+ assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
+ addMemoryOperands(I.getOperand(2 + OpOffset).getImm(), MIB);
+ } else {
+ addMemoryOperands(*I.memoperands_begin(), MIB);
+ }
+ return MIB.constrainAllUses(TII, TRI, RBI);
+}
+
+bool SPIRVInstructionSelector::selectMemOperation(Register ResVReg,
+ MachineInstr &I) const {
+ MachineBasicBlock &BB = *I.getParent();
+ auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCopyMemorySized))
+ .addDef(I.getOperand(0).getReg())
+ .addUse(I.getOperand(1).getReg())
+ .addUse(I.getOperand(2).getReg());
+ if (I.getNumMemOperands())
+ addMemoryOperands(*I.memoperands_begin(), MIB);
+ bool Result = MIB.constrainAllUses(TII, TRI, RBI);
+ if (ResVReg.isValid() && ResVReg != MIB->getOperand(0).getReg()) {
+ BuildMI(BB, I, I.getDebugLoc(), TII.get(TargetOpcode::COPY), ResVReg)
+ .addUse(MIB->getOperand(0).getReg());
+ }
+ return Result;
+}
+
+bool SPIRVInstructionSelector::selectAtomicRMW(Register ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I,
+ unsigned NewOpcode) const {
+ assert(I.hasOneMemOperand());
+ const MachineMemOperand *MemOp = *I.memoperands_begin();
+ uint32_t Scope = static_cast<uint32_t>(getScope(MemOp->getSyncScopeID()));
+ Register ScopeReg = buildI32Constant(Scope, I);
+
+ Register Ptr = I.getOperand(1).getReg();
+ // TODO: Changed as it's implemented in the translator. See test/atomicrmw.ll
+ // auto ScSem =
+ // getMemSemanticsForStorageClass(GR.getPointerStorageClass(Ptr));
+ AtomicOrdering AO = MemOp->getSuccessOrdering();
+ uint32_t MemSem = static_cast<uint32_t>(getMemSemantics(AO));
+ Register MemSemReg = buildI32Constant(MemSem /*| ScSem*/, I);
+
+ return BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(NewOpcode))
+ .addDef(ResVReg)
+ .addUse(GR.getSPIRVTypeID(ResType))
+ .addUse(Ptr)
+ .addUse(ScopeReg)
+ .addUse(MemSemReg)
+ .addUse(I.getOperand(2).getReg())
+ .constrainAllUses(TII, TRI, RBI);
+}
+
+bool SPIRVInstructionSelector::selectFence(MachineInstr &I) const {
+ AtomicOrdering AO = AtomicOrdering(I.getOperand(0).getImm());
+ uint32_t MemSem = static_cast<uint32_t>(getMemSemantics(AO));
+ Register MemSemReg = buildI32Constant(MemSem, I);
+ SyncScope::ID Ord = SyncScope::ID(I.getOperand(1).getImm());
+ uint32_t Scope = static_cast<uint32_t>(getScope(Ord));
+ Register ScopeReg = buildI32Constant(Scope, I);
+ MachineBasicBlock &BB = *I.getParent();
+ return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpMemoryBarrier))
+ .addUse(ScopeReg)
+ .addUse(MemSemReg)
+ .constrainAllUses(TII, TRI, RBI);
+}
+
+bool SPIRVInstructionSelector::selectAtomicCmpXchg(Register ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I) const {
+ assert(I.hasOneMemOperand());
+ const MachineMemOperand *MemOp = *I.memoperands_begin();
+ uint32_t Scope = static_cast<uint32_t>(getScope(MemOp->getSyncScopeID()));
+ Register ScopeReg = buildI32Constant(Scope, I);
+
+ Register Ptr = I.getOperand(2).getReg();
+ Register Cmp = I.getOperand(3).getReg();
+ Register Val = I.getOperand(4).getReg();
+
+ SPIRVType *SpvValTy = GR.getSPIRVTypeForVReg(Val);
+ SPIRV::StorageClass SC = GR.getPointerStorageClass(Ptr);
+ uint32_t ScSem = static_cast<uint32_t>(getMemSemanticsForStorageClass(SC));
+ AtomicOrdering AO = MemOp->getSuccessOrdering();
+ uint32_t MemSemEq = static_cast<uint32_t>(getMemSemantics(AO)) | ScSem;
+ Register MemSemEqReg = buildI32Constant(MemSemEq, I);
+ AtomicOrdering FO = MemOp->getFailureOrdering();
+ uint32_t MemSemNeq = static_cast<uint32_t>(getMemSemantics(FO)) | ScSem;
+ Register MemSemNeqReg =
+ MemSemEq == MemSemNeq ? MemSemEqReg : buildI32Constant(MemSemNeq, I);
+ const DebugLoc &DL = I.getDebugLoc();
+ return BuildMI(*I.getParent(), I, DL, TII.get(SPIRV::OpAtomicCompareExchange))
+ .addDef(ResVReg)
+ .addUse(GR.getSPIRVTypeID(SpvValTy))
+ .addUse(Ptr)
+ .addUse(ScopeReg)
+ .addUse(MemSemEqReg)
+ .addUse(MemSemNeqReg)
+ .addUse(Val)
+ .addUse(Cmp)
+ .constrainAllUses(TII, TRI, RBI);
+}
+
+static bool isGenericCastablePtr(SPIRV::StorageClass SC) {
+ switch (SC) {
+ case SPIRV::StorageClass::Workgroup:
+ case SPIRV::StorageClass::CrossWorkgroup:
+ case SPIRV::StorageClass::Function:
+ return true;
+ default:
+ return false;
+ }
+}
+
+// In SPIR-V address space casting can only happen to and from the Generic
+// storage class. We can also only case Workgroup, CrossWorkgroup, or Function
+// pointers to and from Generic pointers. As such, we can convert e.g. from
+// Workgroup to Function by going via a Generic pointer as an intermediary. All
+// other combinations can only be done by a bitcast, and are probably not safe.
+bool SPIRVInstructionSelector::selectAddrSpaceCast(Register ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I) const {
+ Register SrcPtr = I.getOperand(1).getReg();
+ SPIRVType *SrcPtrTy = GR.getSPIRVTypeForVReg(SrcPtr);
+ SPIRV::StorageClass SrcSC = GR.getPointerStorageClass(SrcPtr);
+ SPIRV::StorageClass DstSC = GR.getPointerStorageClass(ResVReg);
+
+ // Casting from an eligable pointer to Generic.
+ if (DstSC == SPIRV::StorageClass::Generic && isGenericCastablePtr(SrcSC))
+ return selectUnOp(ResVReg, ResType, I, SPIRV::OpPtrCastToGeneric);
+ // Casting from Generic to an eligable pointer.
+ if (SrcSC == SPIRV::StorageClass::Generic && isGenericCastablePtr(DstSC))
+ return selectUnOp(ResVReg, ResType, I, SPIRV::OpGenericCastToPtr);
+ // Casting between 2 eligable pointers using Generic as an intermediary.
+ if (isGenericCastablePtr(SrcSC) && isGenericCastablePtr(DstSC)) {
+ Register Tmp = MRI->createVirtualRegister(&SPIRV::IDRegClass);
+ SPIRVType *GenericPtrTy = GR.getOrCreateSPIRVPointerType(
+ SrcPtrTy, I, TII, SPIRV::StorageClass::Generic);
+ MachineBasicBlock &BB = *I.getParent();
+ const DebugLoc &DL = I.getDebugLoc();
+ bool Success = BuildMI(BB, I, DL, TII.get(SPIRV::OpPtrCastToGeneric))
+ .addDef(Tmp)
+ .addUse(GR.getSPIRVTypeID(GenericPtrTy))
+ .addUse(SrcPtr)
+ .constrainAllUses(TII, TRI, RBI);
+ return Success && BuildMI(BB, I, DL, TII.get(SPIRV::OpGenericCastToPtr))
+ .addDef(ResVReg)
+ .addUse(GR.getSPIRVTypeID(ResType))
+ .addUse(Tmp)
+ .constrainAllUses(TII, TRI, RBI);
+ }
+ // TODO Should this case just be disallowed completely?
+ // We're casting 2 other arbitrary address spaces, so have to bitcast.
+ return selectUnOp(ResVReg, ResType, I, SPIRV::OpBitcast);
+}
+
+static unsigned getFCmpOpcode(unsigned PredNum) {
+ auto Pred = static_cast<CmpInst::Predicate>(PredNum);
+ switch (Pred) {
+ case CmpInst::FCMP_OEQ:
+ return SPIRV::OpFOrdEqual;
+ case CmpInst::FCMP_OGE:
+ return SPIRV::OpFOrdGreaterThanEqual;
+ case CmpInst::FCMP_OGT:
+ return SPIRV::OpFOrdGreaterThan;
+ case CmpInst::FCMP_OLE:
+ return SPIRV::OpFOrdLessThanEqual;
+ case CmpInst::FCMP_OLT:
+ return SPIRV::OpFOrdLessThan;
+ case CmpInst::FCMP_ONE:
+ return SPIRV::OpFOrdNotEqual;
+ case CmpInst::FCMP_ORD:
+ return SPIRV::OpOrdered;
+ case CmpInst::FCMP_UEQ:
+ return SPIRV::OpFUnordEqual;
+ case CmpInst::FCMP_UGE:
+ return SPIRV::OpFUnordGreaterThanEqual;
+ case CmpInst::FCMP_UGT:
+ return SPIRV::OpFUnordGreaterThan;
+ case CmpInst::FCMP_ULE:
+ return SPIRV::OpFUnordLessThanEqual;
+ case CmpInst::FCMP_ULT:
+ return SPIRV::OpFUnordLessThan;
+ case CmpInst::FCMP_UNE:
+ return SPIRV::OpFUnordNotEqual;
+ case CmpInst::FCMP_UNO:
+ return SPIRV::OpUnordered;
+ default:
+ llvm_unreachable("Unknown predicate type for FCmp");
+ }
+}
+
+static unsigned getICmpOpcode(unsigned PredNum) {
+ auto Pred = static_cast<CmpInst::Predicate>(PredNum);
+ switch (Pred) {
+ case CmpInst::ICMP_EQ:
+ return SPIRV::OpIEqual;
+ case CmpInst::ICMP_NE:
+ return SPIRV::OpINotEqual;
+ case CmpInst::ICMP_SGE:
+ return SPIRV::OpSGreaterThanEqual;
+ case CmpInst::ICMP_SGT:
+ return SPIRV::OpSGreaterThan;
+ case CmpInst::ICMP_SLE:
+ return SPIRV::OpSLessThanEqual;
+ case CmpInst::ICMP_SLT:
+ return SPIRV::OpSLessThan;
+ case CmpInst::ICMP_UGE:
+ return SPIRV::OpUGreaterThanEqual;
+ case CmpInst::ICMP_UGT:
+ return SPIRV::OpUGreaterThan;
+ case CmpInst::ICMP_ULE:
+ return SPIRV::OpULessThanEqual;
+ case CmpInst::ICMP_ULT:
+ return SPIRV::OpULessThan;
+ default:
+ llvm_unreachable("Unknown predicate type for ICmp");
+ }
+}
+
+static unsigned getPtrCmpOpcode(unsigned Pred) {
+ switch (static_cast<CmpInst::Predicate>(Pred)) {
+ case CmpInst::ICMP_EQ:
+ return SPIRV::OpPtrEqual;
+ case CmpInst::ICMP_NE:
+ return SPIRV::OpPtrNotEqual;
+ default:
+ llvm_unreachable("Unknown predicate type for pointer comparison");
+ }
+}
+
+// Return the logical operation, or abort if none exists.
+static unsigned getBoolCmpOpcode(unsigned PredNum) {
+ auto Pred = static_cast<CmpInst::Predicate>(PredNum);
+ switch (Pred) {
+ case CmpInst::ICMP_EQ:
+ return SPIRV::OpLogicalEqual;
+ case CmpInst::ICMP_NE:
+ return SPIRV::OpLogicalNotEqual;
+ default:
+ llvm_unreachable("Unknown predicate type for Bool comparison");
+ }
+}
+
+bool SPIRVInstructionSelector::selectBitreverse(Register ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I) const {
+ MachineBasicBlock &BB = *I.getParent();
+ return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpBitReverse))
+ .addDef(ResVReg)
+ .addUse(GR.getSPIRVTypeID(ResType))
+ .addUse(I.getOperand(1).getReg())
+ .constrainAllUses(TII, TRI, RBI);
+}
+
+bool SPIRVInstructionSelector::selectConstVector(Register ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I) const {
+ // TODO: only const case is supported for now.
+ assert(std::all_of(
+ I.operands_begin(), I.operands_end(), [this](const MachineOperand &MO) {
+ if (MO.isDef())
+ return true;
+ if (!MO.isReg())
+ return false;
+ SPIRVType *ConstTy = this->MRI->getVRegDef(MO.getReg());
+ assert(ConstTy && ConstTy->getOpcode() == SPIRV::ASSIGN_TYPE &&
+ ConstTy->getOperand(1).isReg());
+ Register ConstReg = ConstTy->getOperand(1).getReg();
+ const MachineInstr *Const = this->MRI->getVRegDef(ConstReg);
+ assert(Const);
+ return (Const->getOpcode() == TargetOpcode::G_CONSTANT ||
+ Const->getOpcode() == TargetOpcode::G_FCONSTANT);
+ }));
+
+ auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(),
+ TII.get(SPIRV::OpConstantComposite))
+ .addDef(ResVReg)
+ .addUse(GR.getSPIRVTypeID(ResType));
+ for (unsigned i = I.getNumExplicitDefs(); i < I.getNumExplicitOperands(); ++i)
+ MIB.addUse(I.getOperand(i).getReg());
+ return MIB.constrainAllUses(TII, TRI, RBI);
+}
+
+bool SPIRVInstructionSelector::selectCmp(Register ResVReg,
+ const SPIRVType *ResType,
+ unsigned CmpOpc,
+ MachineInstr &I) const {
+ Register Cmp0 = I.getOperand(2).getReg();
+ Register Cmp1 = I.getOperand(3).getReg();
+ assert(GR.getSPIRVTypeForVReg(Cmp0)->getOpcode() ==
+ GR.getSPIRVTypeForVReg(Cmp1)->getOpcode() &&
+ "CMP operands should have the same type");
+ return BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CmpOpc))
+ .addDef(ResVReg)
+ .addUse(GR.getSPIRVTypeID(ResType))
+ .addUse(Cmp0)
+ .addUse(Cmp1)
+ .constrainAllUses(TII, TRI, RBI);
+}
+
+bool SPIRVInstructionSelector::selectICmp(Register ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I) const {
+ auto Pred = I.getOperand(1).getPredicate();
+ unsigned CmpOpc;
+
+ Register CmpOperand = I.getOperand(2).getReg();
+ if (GR.isScalarOfType(CmpOperand, SPIRV::OpTypePointer))
+ CmpOpc = getPtrCmpOpcode(Pred);
+ else if (GR.isScalarOrVectorOfType(CmpOperand, SPIRV::OpTypeBool))
+ CmpOpc = getBoolCmpOpcode(Pred);
+ else
+ CmpOpc = getICmpOpcode(Pred);
+ return selectCmp(ResVReg, ResType, CmpOpc, I);
+}
+
+void SPIRVInstructionSelector::renderFImm32(MachineInstrBuilder &MIB,
+ const MachineInstr &I,
+ int OpIdx) const {
+ assert(I.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
+ "Expected G_FCONSTANT");
+ const ConstantFP *FPImm = I.getOperand(1).getFPImm();
+ addNumImm(FPImm->getValueAPF().bitcastToAPInt(), MIB);
+}
+
+void SPIRVInstructionSelector::renderImm32(MachineInstrBuilder &MIB,
+ const MachineInstr &I,
+ int OpIdx) const {
+ assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
+ "Expected G_CONSTANT");
+ addNumImm(I.getOperand(1).getCImm()->getValue(), MIB);
+}
+
+Register
+SPIRVInstructionSelector::buildI32Constant(uint32_t Val, MachineInstr &I,
+ const SPIRVType *ResType) const {
+ const SPIRVType *SpvI32Ty =
+ ResType ? ResType : GR.getOrCreateSPIRVIntegerType(32, I, TII);
+ Register NewReg;
+ NewReg = MRI->createGenericVirtualRegister(LLT::scalar(32));
+ MachineInstr *MI;
+ MachineBasicBlock &BB = *I.getParent();
+ if (Val == 0) {
+ MI = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantNull))
+ .addDef(NewReg)
+ .addUse(GR.getSPIRVTypeID(SpvI32Ty));
+ } else {
+ MI = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantI))
+ .addDef(NewReg)
+ .addUse(GR.getSPIRVTypeID(SpvI32Ty))
+ .addImm(APInt(32, Val).getZExtValue());
+ }
+ constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
+ return NewReg;
+}
+
+bool SPIRVInstructionSelector::selectFCmp(Register ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I) const {
+ unsigned CmpOp = getFCmpOpcode(I.getOperand(1).getPredicate());
+ return selectCmp(ResVReg, ResType, CmpOp, I);
+}
+
+Register SPIRVInstructionSelector::buildZerosVal(const SPIRVType *ResType,
+ MachineInstr &I) const {
+ return buildI32Constant(0, I, ResType);
+}
+
+Register SPIRVInstructionSelector::buildOnesVal(bool AllOnes,
+ const SPIRVType *ResType,
+ MachineInstr &I) const {
+ unsigned BitWidth = GR.getScalarOrVectorBitWidth(ResType);
+ APInt One = AllOnes ? APInt::getAllOnesValue(BitWidth)
+ : APInt::getOneBitSet(BitWidth, 0);
+ Register OneReg = buildI32Constant(One.getZExtValue(), I, ResType);
+ if (ResType->getOpcode() == SPIRV::OpTypeVector) {
+ const unsigned NumEles = ResType->getOperand(2).getImm();
+ Register OneVec = MRI->createVirtualRegister(&SPIRV::IDRegClass);
+ unsigned Opcode = SPIRV::OpConstantComposite;
+ auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opcode))
+ .addDef(OneVec)
+ .addUse(GR.getSPIRVTypeID(ResType));
+ for (unsigned i = 0; i < NumEles; ++i)
+ MIB.addUse(OneReg);
+ constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+ return OneVec;
+ }
+ return OneReg;
+}
+
+bool SPIRVInstructionSelector::selectSelect(Register ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I,
+ bool IsSigned) const {
+ // To extend a bool, we need to use OpSelect between constants.
+ Register ZeroReg = buildZerosVal(ResType, I);
+ Register OneReg = buildOnesVal(IsSigned, ResType, I);
+ bool IsScalarBool =
+ GR.isScalarOfType(I.getOperand(1).getReg(), SPIRV::OpTypeBool);
+ unsigned Opcode =
+ IsScalarBool ? SPIRV::OpSelectSISCond : SPIRV::OpSelectSIVCond;
+ return BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opcode))
+ .addDef(ResVReg)
+ .addUse(GR.getSPIRVTypeID(ResType))
+ .addUse(I.getOperand(1).getReg())
+ .addUse(OneReg)
+ .addUse(ZeroReg)
+ .constrainAllUses(TII, TRI, RBI);
+}
+
+bool SPIRVInstructionSelector::selectIToF(Register ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I, bool IsSigned,
+ unsigned Opcode) const {
+ Register SrcReg = I.getOperand(1).getReg();
+ // We can convert bool value directly to float type without OpConvert*ToF,
+ // however the translator generates OpSelect+OpConvert*ToF, so we do the same.
+ if (GR.isScalarOrVectorOfType(I.getOperand(1).getReg(), SPIRV::OpTypeBool)) {
+ unsigned BitWidth = GR.getScalarOrVectorBitWidth(ResType);
+ SPIRVType *TmpType = GR.getOrCreateSPIRVIntegerType(BitWidth, I, TII);
+ if (ResType->getOpcode() == SPIRV::OpTypeVector) {
+ const unsigned NumElts = ResType->getOperand(2).getImm();
+ TmpType = GR.getOrCreateSPIRVVectorType(TmpType, NumElts, I, TII);
+ }
+ SrcReg = MRI->createVirtualRegister(&SPIRV::IDRegClass);
+ selectSelect(SrcReg, TmpType, I, false);
+ }
+ return selectUnOpWithSrc(ResVReg, ResType, I, SrcReg, Opcode);
+}
+
+bool SPIRVInstructionSelector::selectExt(Register ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I, bool IsSigned) const {
+ if (GR.isScalarOrVectorOfType(I.getOperand(1).getReg(), SPIRV::OpTypeBool))
+ return selectSelect(ResVReg, ResType, I, IsSigned);
+ unsigned Opcode = IsSigned ? SPIRV::OpSConvert : SPIRV::OpUConvert;
+ return selectUnOp(ResVReg, ResType, I, Opcode);
+}
+
+bool SPIRVInstructionSelector::selectIntToBool(Register IntReg,
+ Register ResVReg,
+ const SPIRVType *IntTy,
+ const SPIRVType *BoolTy,
+ MachineInstr &I) const {
+ // To truncate to a bool, we use OpBitwiseAnd 1 and OpINotEqual to zero.
+ Register BitIntReg = MRI->createVirtualRegister(&SPIRV::IDRegClass);
+ bool IsVectorTy = IntTy->getOpcode() == SPIRV::OpTypeVector;
+ unsigned Opcode = IsVectorTy ? SPIRV::OpBitwiseAndV : SPIRV::OpBitwiseAndS;
+ Register Zero = buildZerosVal(IntTy, I);
+ Register One = buildOnesVal(false, IntTy, I);
+ MachineBasicBlock &BB = *I.getParent();
+ BuildMI(BB, I, I.getDebugLoc(), TII.get(Opcode))
+ .addDef(BitIntReg)
+ .addUse(GR.getSPIRVTypeID(IntTy))
+ .addUse(IntReg)
+ .addUse(One)
+ .constrainAllUses(TII, TRI, RBI);
+ return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpINotEqual))
+ .addDef(ResVReg)
+ .addUse(GR.getSPIRVTypeID(BoolTy))
+ .addUse(BitIntReg)
+ .addUse(Zero)
+ .constrainAllUses(TII, TRI, RBI);
+}
+
+bool SPIRVInstructionSelector::selectTrunc(Register ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I) const {
+ if (GR.isScalarOrVectorOfType(ResVReg, SPIRV::OpTypeBool)) {
+ Register IntReg = I.getOperand(1).getReg();
+ const SPIRVType *ArgType = GR.getSPIRVTypeForVReg(IntReg);
+ return selectIntToBool(IntReg, ResVReg, ArgType, ResType, I);
+ }
+ bool IsSigned = GR.isScalarOrVectorSigned(ResType);
+ unsigned Opcode = IsSigned ? SPIRV::OpSConvert : SPIRV::OpUConvert;
+ return selectUnOp(ResVReg, ResType, I, Opcode);
+}
+
+bool SPIRVInstructionSelector::selectConst(Register ResVReg,
+ const SPIRVType *ResType,
+ const APInt &Imm,
+ MachineInstr &I) const {
+ assert(ResType->getOpcode() != SPIRV::OpTypePointer || Imm.isNullValue());
+ MachineBasicBlock &BB = *I.getParent();
+ if (ResType->getOpcode() == SPIRV::OpTypePointer && Imm.isNullValue()) {
+ return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantNull))
+ .addDef(ResVReg)
+ .addUse(GR.getSPIRVTypeID(ResType))
+ .constrainAllUses(TII, TRI, RBI);
+ }
+ auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantI))
+ .addDef(ResVReg)
+ .addUse(GR.getSPIRVTypeID(ResType));
+ // <=32-bit integers should be caught by the sdag pattern.
+ assert(Imm.getBitWidth() > 32);
+ addNumImm(Imm, MIB);
+ return MIB.constrainAllUses(TII, TRI, RBI);
+}
+
+bool SPIRVInstructionSelector::selectOpUndef(Register ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I) const {
+ return BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpUndef))
+ .addDef(ResVReg)
+ .addUse(GR.getSPIRVTypeID(ResType))
+ .constrainAllUses(TII, TRI, RBI);
+}
+
+static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI) {
+ assert(MO.isReg());
+ const SPIRVType *TypeInst = MRI->getVRegDef(MO.getReg());
+ if (TypeInst->getOpcode() != SPIRV::ASSIGN_TYPE)
+ return false;
+ assert(TypeInst->getOperand(1).isReg());
+ MachineInstr *ImmInst = MRI->getVRegDef(TypeInst->getOperand(1).getReg());
+ return ImmInst->getOpcode() == TargetOpcode::G_CONSTANT;
+}
+
+static int64_t foldImm(const MachineOperand &MO, MachineRegisterInfo *MRI) {
+ const SPIRVType *TypeInst = MRI->getVRegDef(MO.getReg());
+ MachineInstr *ImmInst = MRI->getVRegDef(TypeInst->getOperand(1).getReg());
+ assert(ImmInst->getOpcode() == TargetOpcode::G_CONSTANT);
+ return ImmInst->getOperand(1).getCImm()->getZExtValue();
+}
+
+bool SPIRVInstructionSelector::selectInsertVal(Register ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I) const {
+ MachineBasicBlock &BB = *I.getParent();
+ return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeInsert))
+ .addDef(ResVReg)
+ .addUse(GR.getSPIRVTypeID(ResType))
+ // object to insert
+ .addUse(I.getOperand(3).getReg())
+ // composite to insert into
+ .addUse(I.getOperand(2).getReg())
+ // TODO: support arbitrary number of indices
+ .addImm(foldImm(I.getOperand(4), MRI))
+ .constrainAllUses(TII, TRI, RBI);
+}
+
+bool SPIRVInstructionSelector::selectExtractVal(Register ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I) const {
+ MachineBasicBlock &BB = *I.getParent();
+ return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeExtract))
+ .addDef(ResVReg)
+ .addUse(GR.getSPIRVTypeID(ResType))
+ .addUse(I.getOperand(2).getReg())
+ // TODO: support arbitrary number of indices
+ .addImm(foldImm(I.getOperand(3), MRI))
+ .constrainAllUses(TII, TRI, RBI);
+}
+
+bool SPIRVInstructionSelector::selectInsertElt(Register ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I) const {
+ if (isImm(I.getOperand(4), MRI))
+ return selectInsertVal(ResVReg, ResType, I);
+ MachineBasicBlock &BB = *I.getParent();
+ return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpVectorInsertDynamic))
+ .addDef(ResVReg)
+ .addUse(GR.getSPIRVTypeID(ResType))
+ .addUse(I.getOperand(2).getReg())
+ .addUse(I.getOperand(3).getReg())
+ .addUse(I.getOperand(4).getReg())
+ .constrainAllUses(TII, TRI, RBI);
+}
+
+bool SPIRVInstructionSelector::selectExtractElt(Register ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I) const {
+ if (isImm(I.getOperand(3), MRI))
+ return selectExtractVal(ResVReg, ResType, I);
+ MachineBasicBlock &BB = *I.getParent();
+ return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpVectorExtractDynamic))
+ .addDef(ResVReg)
+ .addUse(GR.getSPIRVTypeID(ResType))
+ .addUse(I.getOperand(2).getReg())
+ .addUse(I.getOperand(3).getReg())
+ .constrainAllUses(TII, TRI, RBI);
+}
+
+bool SPIRVInstructionSelector::selectGEP(Register ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I) const {
+ // In general we should also support OpAccessChain instrs here (i.e. not
+ // PtrAccessChain) but SPIRV-LLVM Translator doesn't emit them at all and so
+ // do we to stay compliant with its test and more importantly consumers.
+ unsigned Opcode = I.getOperand(2).getImm() ? SPIRV::OpInBoundsPtrAccessChain
+ : SPIRV::OpPtrAccessChain;
+ auto Res = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opcode))
+ .addDef(ResVReg)
+ .addUse(GR.getSPIRVTypeID(ResType))
+ // Object to get a pointer to.
+ .addUse(I.getOperand(3).getReg());
+ // Adding indices.
+ for (unsigned i = 4; i < I.getNumExplicitOperands(); ++i)
+ Res.addUse(I.getOperand(i).getReg());
+ return Res.constrainAllUses(TII, TRI, RBI);
+}
+
+bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I) const {
+ MachineBasicBlock &BB = *I.getParent();
+ switch (I.getIntrinsicID()) {
+ case Intrinsic::spv_load:
+ return selectLoad(ResVReg, ResType, I);
+ break;
+ case Intrinsic::spv_store:
+ return selectStore(I);
+ break;
+ case Intrinsic::spv_extractv:
+ return selectExtractVal(ResVReg, ResType, I);
+ break;
+ case Intrinsic::spv_insertv:
+ return selectInsertVal(ResVReg, ResType, I);
+ break;
+ case Intrinsic::spv_extractelt:
+ return selectExtractElt(ResVReg, ResType, I);
+ break;
+ case Intrinsic::spv_insertelt:
+ return selectInsertElt(ResVReg, ResType, I);
+ break;
+ case Intrinsic::spv_gep:
+ return selectGEP(ResVReg, ResType, I);
+ break;
+ case Intrinsic::spv_unref_global:
+ case Intrinsic::spv_init_global: {
+ MachineInstr *MI = MRI->getVRegDef(I.getOperand(1).getReg());
+ MachineInstr *Init = I.getNumExplicitOperands() > 2
+ ? MRI->getVRegDef(I.getOperand(2).getReg())
+ : nullptr;
+ assert(MI);
+ return selectGlobalValue(MI->getOperand(0).getReg(), *MI, Init);
+ } break;
+ case Intrinsic::spv_const_composite: {
+ // If no values are attached, the composite is null constant.
+ bool IsNull = I.getNumExplicitDefs() + 1 == I.getNumExplicitOperands();
+ unsigned Opcode =
+ IsNull ? SPIRV::OpConstantNull : SPIRV::OpConstantComposite;
+ auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(Opcode))
+ .addDef(ResVReg)
+ .addUse(GR.getSPIRVTypeID(ResType));
+ // skip type MD node we already used when generated assign.type for this
+ if (!IsNull) {
+ for (unsigned i = I.getNumExplicitDefs() + 1;
+ i < I.getNumExplicitOperands(); ++i) {
+ MIB.addUse(I.getOperand(i).getReg());
+ }
+ }
+ return MIB.constrainAllUses(TII, TRI, RBI);
+ } break;
+ case Intrinsic::spv_assign_name: {
+ auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpName));
+ MIB.addUse(I.getOperand(I.getNumExplicitDefs() + 1).getReg());
+ for (unsigned i = I.getNumExplicitDefs() + 2;
+ i < I.getNumExplicitOperands(); ++i) {
+ MIB.addImm(I.getOperand(i).getImm());
+ }
+ return MIB.constrainAllUses(TII, TRI, RBI);
+ } break;
+ case Intrinsic::spv_switch: {
+ auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpSwitch));
+ for (unsigned i = 1; i < I.getNumExplicitOperands(); ++i) {
+ if (I.getOperand(i).isReg())
+ MIB.addReg(I.getOperand(i).getReg());
+ else if (I.getOperand(i).isCImm())
+ addNumImm(I.getOperand(i).getCImm()->getValue(), MIB);
+ else if (I.getOperand(i).isMBB())
+ MIB.addMBB(I.getOperand(i).getMBB());
+ else
+ llvm_unreachable("Unexpected OpSwitch operand");
+ }
+ return MIB.constrainAllUses(TII, TRI, RBI);
+ } break;
+ default:
+ llvm_unreachable("Intrinsic selection not implemented");
+ }
+ return true;
+}
+
+bool SPIRVInstructionSelector::selectFrameIndex(Register ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I) const {
+ return BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpVariable))
+ .addDef(ResVReg)
+ .addUse(GR.getSPIRVTypeID(ResType))
+ .addImm(static_cast<uint32_t>(SPIRV::StorageClass::Function))
+ .constrainAllUses(TII, TRI, RBI);
+}
+
+bool SPIRVInstructionSelector::selectBranch(MachineInstr &I) const {
+ // InstructionSelector walks backwards through the instructions. We can use
+ // both a G_BR and a G_BRCOND to create an OpBranchConditional. We hit G_BR
+ // first, so can generate an OpBranchConditional here. If there is no
+ // G_BRCOND, we just use OpBranch for a regular unconditional branch.
+ const MachineInstr *PrevI = I.getPrevNode();
+ MachineBasicBlock &MBB = *I.getParent();
+ if (PrevI != nullptr && PrevI->getOpcode() == TargetOpcode::G_BRCOND) {
+ return BuildMI(MBB, I, I.getDebugLoc(), TII.get(SPIRV::OpBranchConditional))
+ .addUse(PrevI->getOperand(0).getReg())
+ .addMBB(PrevI->getOperand(1).getMBB())
+ .addMBB(I.getOperand(0).getMBB())
+ .constrainAllUses(TII, TRI, RBI);
+ }
+ return BuildMI(MBB, I, I.getDebugLoc(), TII.get(SPIRV::OpBranch))
+ .addMBB(I.getOperand(0).getMBB())
+ .constrainAllUses(TII, TRI, RBI);
+}
+
+bool SPIRVInstructionSelector::selectBranchCond(MachineInstr &I) const {
+ // InstructionSelector walks backwards through the instructions. For an
+ // explicit conditional branch with no fallthrough, we use both a G_BR and a
+ // G_BRCOND to create an OpBranchConditional. We should hit G_BR first, and
+ // generate the OpBranchConditional in selectBranch above.
+ //
+ // If an OpBranchConditional has been generated, we simply return, as the work
+ // is alread done. If there is no OpBranchConditional, LLVM must be relying on
+ // implicit fallthrough to the next basic block, so we need to create an
+ // OpBranchConditional with an explicit "false" argument pointing to the next
+ // basic block that LLVM would fall through to.
+ const MachineInstr *NextI = I.getNextNode();
+ // Check if this has already been successfully selected.
+ if (NextI != nullptr && NextI->getOpcode() == SPIRV::OpBranchConditional)
+ return true;
+ // Must be relying on implicit block fallthrough, so generate an
+ // OpBranchConditional with the "next" basic block as the "false" target.
+ MachineBasicBlock &MBB = *I.getParent();
+ unsigned NextMBBNum = MBB.getNextNode()->getNumber();
+ MachineBasicBlock *NextMBB = I.getMF()->getBlockNumbered(NextMBBNum);
+ return BuildMI(MBB, I, I.getDebugLoc(), TII.get(SPIRV::OpBranchConditional))
+ .addUse(I.getOperand(0).getReg())
+ .addMBB(I.getOperand(1).getMBB())
+ .addMBB(NextMBB)
+ .constrainAllUses(TII, TRI, RBI);
+}
+
+bool SPIRVInstructionSelector::selectPhi(Register ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I) const {
+ auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpPhi))
+ .addDef(ResVReg)
+ .addUse(GR.getSPIRVTypeID(ResType));
+ const unsigned NumOps = I.getNumOperands();
+ for (unsigned i = 1; i < NumOps; i += 2) {
+ MIB.addUse(I.getOperand(i + 0).getReg());
+ MIB.addMBB(I.getOperand(i + 1).getMBB());
+ }
+ return MIB.constrainAllUses(TII, TRI, RBI);
+}
+
+bool SPIRVInstructionSelector::selectGlobalValue(
+ Register ResVReg, MachineInstr &I, const MachineInstr *Init) const {
+ // FIXME: don't use MachineIRBuilder here, replace it with BuildMI.
+ MachineIRBuilder MIRBuilder(I);
+ const GlobalValue *GV = I.getOperand(1).getGlobal();
+ SPIRVType *ResType = GR.getOrCreateSPIRVType(
+ GV->getType(), MIRBuilder, SPIRV::AccessQualifier::ReadWrite, false);
+
+ std::string GlobalIdent = GV->getGlobalIdentifier();
+ // TODO: suport @llvm.global.annotations.
+ auto GlobalVar = cast<GlobalVariable>(GV);
+
+ bool HasInit = GlobalVar->hasInitializer() &&
+ !isa<UndefValue>(GlobalVar->getInitializer());
+ // Skip empty declaration for GVs with initilaizers till we get the decl with
+ // passed initializer.
+ if (HasInit && !Init)
+ return true;
+
+ unsigned AddrSpace = GV->getAddressSpace();
+ SPIRV::StorageClass Storage = addressSpaceToStorageClass(AddrSpace);
+ bool HasLnkTy = GV->getLinkage() != GlobalValue::InternalLinkage &&
+ Storage != SPIRV::StorageClass::Function;
+ SPIRV::LinkageType LnkType =
+ (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())
+ ? SPIRV::LinkageType::Import
+ : SPIRV::LinkageType::Export;
+
+ Register Reg = GR.buildGlobalVariable(ResVReg, ResType, GlobalIdent, GV,
+ Storage, Init, GlobalVar->isConstant(),
+ HasLnkTy, LnkType, MIRBuilder, true);
+ return Reg.isValid();
+}
+
+namespace llvm {
+InstructionSelector *
+createSPIRVInstructionSelector(const SPIRVTargetMachine &TM,
+ const SPIRVSubtarget &Subtarget,
+ const RegisterBankInfo &RBI) {
+ return new SPIRVInstructionSelector(TM, Subtarget, RBI);
+}
+} // namespace llvm
diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
new file mode 100644
index 000000000000..87f9e9545dd3
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
@@ -0,0 +1,301 @@
+//===- SPIRVLegalizerInfo.cpp --- SPIR-V Legalization Rules ------*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the targeting of the Machinelegalizer class for SPIR-V.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPIRVLegalizerInfo.h"
+#include "SPIRV.h"
+#include "SPIRVGlobalRegistry.h"
+#include "SPIRVSubtarget.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+
+using namespace llvm;
+using namespace llvm::LegalizeActions;
+using namespace llvm::LegalityPredicates;
+
+static const std::set<unsigned> TypeFoldingSupportingOpcs = {
+ TargetOpcode::G_ADD,
+ TargetOpcode::G_FADD,
+ TargetOpcode::G_SUB,
+ TargetOpcode::G_FSUB,
+ TargetOpcode::G_MUL,
+ TargetOpcode::G_FMUL,
+ TargetOpcode::G_SDIV,
+ TargetOpcode::G_UDIV,
+ TargetOpcode::G_FDIV,
+ TargetOpcode::G_SREM,
+ TargetOpcode::G_UREM,
+ TargetOpcode::G_FREM,
+ TargetOpcode::G_FNEG,
+ TargetOpcode::G_CONSTANT,
+ TargetOpcode::G_FCONSTANT,
+ TargetOpcode::G_AND,
+ TargetOpcode::G_OR,
+ TargetOpcode::G_XOR,
+ TargetOpcode::G_SHL,
+ TargetOpcode::G_ASHR,
+ TargetOpcode::G_LSHR,
+ TargetOpcode::G_SELECT,
+ TargetOpcode::G_EXTRACT_VECTOR_ELT,
+};
+
+bool isTypeFoldingSupported(unsigned Opcode) {
+ return TypeFoldingSupportingOpcs.count(Opcode) > 0;
+}
+
+SPIRVLegalizerInfo::SPIRVLegalizerInfo(const SPIRVSubtarget &ST) {
+ using namespace TargetOpcode;
+
+ this->ST = &ST;
+ GR = ST.getSPIRVGlobalRegistry();
+
+ const LLT s1 = LLT::scalar(1);
+ const LLT s8 = LLT::scalar(8);
+ const LLT s16 = LLT::scalar(16);
+ const LLT s32 = LLT::scalar(32);
+ const LLT s64 = LLT::scalar(64);
+
+ const LLT v16s64 = LLT::fixed_vector(16, 64);
+ const LLT v16s32 = LLT::fixed_vector(16, 32);
+ const LLT v16s16 = LLT::fixed_vector(16, 16);
+ const LLT v16s8 = LLT::fixed_vector(16, 8);
+ const LLT v16s1 = LLT::fixed_vector(16, 1);
+
+ const LLT v8s64 = LLT::fixed_vector(8, 64);
+ const LLT v8s32 = LLT::fixed_vector(8, 32);
+ const LLT v8s16 = LLT::fixed_vector(8, 16);
+ const LLT v8s8 = LLT::fixed_vector(8, 8);
+ const LLT v8s1 = LLT::fixed_vector(8, 1);
+
+ const LLT v4s64 = LLT::fixed_vector(4, 64);
+ const LLT v4s32 = LLT::fixed_vector(4, 32);
+ const LLT v4s16 = LLT::fixed_vector(4, 16);
+ const LLT v4s8 = LLT::fixed_vector(4, 8);
+ const LLT v4s1 = LLT::fixed_vector(4, 1);
+
+ const LLT v3s64 = LLT::fixed_vector(3, 64);
+ const LLT v3s32 = LLT::fixed_vector(3, 32);
+ const LLT v3s16 = LLT::fixed_vector(3, 16);
+ const LLT v3s8 = LLT::fixed_vector(3, 8);
+ const LLT v3s1 = LLT::fixed_vector(3, 1);
+
+ const LLT v2s64 = LLT::fixed_vector(2, 64);
+ const LLT v2s32 = LLT::fixed_vector(2, 32);
+ const LLT v2s16 = LLT::fixed_vector(2, 16);
+ const LLT v2s8 = LLT::fixed_vector(2, 8);
+ const LLT v2s1 = LLT::fixed_vector(2, 1);
+
+ const unsigned PSize = ST.getPointerSize();
+ const LLT p0 = LLT::pointer(0, PSize); // Function
+ const LLT p1 = LLT::pointer(1, PSize); // CrossWorkgroup
+ const LLT p2 = LLT::pointer(2, PSize); // UniformConstant
+ const LLT p3 = LLT::pointer(3, PSize); // Workgroup
+ const LLT p4 = LLT::pointer(4, PSize); // Generic
+ const LLT p5 = LLT::pointer(5, PSize); // Input
+
+ // TODO: remove copy-pasting here by using concatenation in some way.
+ auto allPtrsScalarsAndVectors = {
+ p0, p1, p2, p3, p4, p5, s1, s8, s16,
+ s32, s64, v2s1, v2s8, v2s16, v2s32, v2s64, v3s1, v3s8,
+ v3s16, v3s32, v3s64, v4s1, v4s8, v4s16, v4s32, v4s64, v8s1,
+ v8s8, v8s16, v8s32, v8s64, v16s1, v16s8, v16s16, v16s32, v16s64};
+
+ auto allScalarsAndVectors = {
+ s1, s8, s16, s32, s64, v2s1, v2s8, v2s16, v2s32, v2s64,
+ v3s1, v3s8, v3s16, v3s32, v3s64, v4s1, v4s8, v4s16, v4s32, v4s64,
+ v8s1, v8s8, v8s16, v8s32, v8s64, v16s1, v16s8, v16s16, v16s32, v16s64};
+
+ auto allIntScalarsAndVectors = {s8, s16, s32, s64, v2s8, v2s16,
+ v2s32, v2s64, v3s8, v3s16, v3s32, v3s64,
+ v4s8, v4s16, v4s32, v4s64, v8s8, v8s16,
+ v8s32, v8s64, v16s8, v16s16, v16s32, v16s64};
+
+ auto allBoolScalarsAndVectors = {s1, v2s1, v3s1, v4s1, v8s1, v16s1};
+
+ auto allIntScalars = {s8, s16, s32, s64};
+
+ auto allFloatScalarsAndVectors = {
+ s16, s32, s64, v2s16, v2s32, v2s64, v3s16, v3s32, v3s64,
+ v4s16, v4s32, v4s64, v8s16, v8s32, v8s64, v16s16, v16s32, v16s64};
+
+ auto allFloatAndIntScalars = allIntScalars;
+
+ auto allPtrs = {p0, p1, p2, p3, p4, p5};
+ auto allWritablePtrs = {p0, p1, p3, p4};
+
+ for (auto Opc : TypeFoldingSupportingOpcs)
+ getActionDefinitionsBuilder(Opc).custom();
+
+ getActionDefinitionsBuilder(G_GLOBAL_VALUE).alwaysLegal();
+
+ // TODO: add proper rules for vectors legalization.
+ getActionDefinitionsBuilder({G_BUILD_VECTOR, G_SHUFFLE_VECTOR}).alwaysLegal();
+
+ getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE})
+ .legalIf(all(typeInSet(0, allWritablePtrs), typeInSet(1, allPtrs)));
+
+ getActionDefinitionsBuilder(G_ADDRSPACE_CAST)
+ .legalForCartesianProduct(allPtrs, allPtrs);
+
+ getActionDefinitionsBuilder({G_LOAD, G_STORE}).legalIf(typeInSet(1, allPtrs));
+
+ getActionDefinitionsBuilder(G_BITREVERSE).legalFor(allFloatScalarsAndVectors);
+
+ getActionDefinitionsBuilder(G_FMA).legalFor(allFloatScalarsAndVectors);
+
+ getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
+ .legalForCartesianProduct(allIntScalarsAndVectors,
+ allFloatScalarsAndVectors);
+
+ getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
+ .legalForCartesianProduct(allFloatScalarsAndVectors,
+ allScalarsAndVectors);
+
+ getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX, G_ABS})
+ .legalFor(allIntScalarsAndVectors);
+
+ getActionDefinitionsBuilder(G_CTPOP).legalForCartesianProduct(
+ allIntScalarsAndVectors, allIntScalarsAndVectors);
+
+ getActionDefinitionsBuilder(G_PHI).legalFor(allPtrsScalarsAndVectors);
+
+ getActionDefinitionsBuilder(G_BITCAST).legalIf(all(
+ typeInSet(0, allPtrsScalarsAndVectors),
+ typeInSet(1, allPtrsScalarsAndVectors),
+ LegalityPredicate(([=](const LegalityQuery &Query) {
+ return Query.Types[0].getSizeInBits() == Query.Types[1].getSizeInBits();
+ }))));
+
+ getActionDefinitionsBuilder(G_IMPLICIT_DEF).alwaysLegal();
+
+ getActionDefinitionsBuilder(G_INTTOPTR)
+ .legalForCartesianProduct(allPtrs, allIntScalars);
+ getActionDefinitionsBuilder(G_PTRTOINT)
+ .legalForCartesianProduct(allIntScalars, allPtrs);
+ getActionDefinitionsBuilder(G_PTR_ADD).legalForCartesianProduct(
+ allPtrs, allIntScalars);
+
+ // ST.canDirectlyComparePointers() for pointer args is supported in
+ // legalizeCustom().
+ getActionDefinitionsBuilder(G_ICMP).customIf(
+ all(typeInSet(0, allBoolScalarsAndVectors),
+ typeInSet(1, allPtrsScalarsAndVectors)));
+
+ getActionDefinitionsBuilder(G_FCMP).legalIf(
+ all(typeInSet(0, allBoolScalarsAndVectors),
+ typeInSet(1, allFloatScalarsAndVectors)));
+
+ getActionDefinitionsBuilder({G_ATOMICRMW_OR, G_ATOMICRMW_ADD, G_ATOMICRMW_AND,
+ G_ATOMICRMW_MAX, G_ATOMICRMW_MIN,
+ G_ATOMICRMW_SUB, G_ATOMICRMW_XOR,
+ G_ATOMICRMW_UMAX, G_ATOMICRMW_UMIN})
+ .legalForCartesianProduct(allIntScalars, allWritablePtrs);
+
+ getActionDefinitionsBuilder(G_ATOMICRMW_XCHG)
+ .legalForCartesianProduct(allFloatAndIntScalars, allWritablePtrs);
+
+ getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS).lower();
+ // TODO: add proper legalization rules.
+ getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG).alwaysLegal();
+
+ getActionDefinitionsBuilder({G_UADDO, G_USUBO, G_SMULO, G_UMULO})
+ .alwaysLegal();
+
+ // Extensions.
+ getActionDefinitionsBuilder({G_TRUNC, G_ZEXT, G_SEXT, G_ANYEXT})
+ .legalForCartesianProduct(allScalarsAndVectors);
+
+ // FP conversions.
+ getActionDefinitionsBuilder({G_FPTRUNC, G_FPEXT})
+ .legalForCartesianProduct(allFloatScalarsAndVectors);
+
+ // Pointer-handling.
+ getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
+
+ // Control-flow.
+ getActionDefinitionsBuilder(G_BRCOND).legalFor({s1});
+
+ getActionDefinitionsBuilder({G_FPOW,
+ G_FEXP,
+ G_FEXP2,
+ G_FLOG,
+ G_FLOG2,
+ G_FABS,
+ G_FMINNUM,
+ G_FMAXNUM,
+ G_FCEIL,
+ G_FCOS,
+ G_FSIN,
+ G_FSQRT,
+ G_FFLOOR,
+ G_FRINT,
+ G_FNEARBYINT,
+ G_INTRINSIC_ROUND,
+ G_INTRINSIC_TRUNC,
+ G_FMINIMUM,
+ G_FMAXIMUM,
+ G_INTRINSIC_ROUNDEVEN})
+ .legalFor(allFloatScalarsAndVectors);
+
+ getActionDefinitionsBuilder(G_FCOPYSIGN)
+ .legalForCartesianProduct(allFloatScalarsAndVectors,
+ allFloatScalarsAndVectors);
+
+ getActionDefinitionsBuilder(G_FPOWI).legalForCartesianProduct(
+ allFloatScalarsAndVectors, allIntScalarsAndVectors);
+
+ getLegacyLegalizerInfo().computeTables();
+ verify(*ST.getInstrInfo());
+}
+
+static Register convertPtrToInt(Register Reg, LLT ConvTy, SPIRVType *SpirvType,
+ LegalizerHelper &Helper,
+ MachineRegisterInfo &MRI,
+ SPIRVGlobalRegistry *GR) {
+ Register ConvReg = MRI.createGenericVirtualRegister(ConvTy);
+ GR->assignSPIRVTypeToVReg(SpirvType, ConvReg, Helper.MIRBuilder.getMF());
+ Helper.MIRBuilder.buildInstr(TargetOpcode::G_PTRTOINT)
+ .addDef(ConvReg)
+ .addUse(Reg);
+ return ConvReg;
+}
+
+bool SPIRVLegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
+ MachineInstr &MI) const {
+ auto Opc = MI.getOpcode();
+ MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
+ if (!isTypeFoldingSupported(Opc)) {
+ assert(Opc == TargetOpcode::G_ICMP);
+ assert(GR->getSPIRVTypeForVReg(MI.getOperand(0).getReg()));
+ auto &Op0 = MI.getOperand(2);
+ auto &Op1 = MI.getOperand(3);
+ Register Reg0 = Op0.getReg();
+ Register Reg1 = Op1.getReg();
+ CmpInst::Predicate Cond =
+ static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
+ if ((!ST->canDirectlyComparePointers() ||
+ (Cond != CmpInst::ICMP_EQ && Cond != CmpInst::ICMP_NE)) &&
+ MRI.getType(Reg0).isPointer() && MRI.getType(Reg1).isPointer()) {
+ LLT ConvT = LLT::scalar(ST->getPointerSize());
+ Type *LLVMTy = IntegerType::get(MI.getMF()->getFunction().getContext(),
+ ST->getPointerSize());
+ SPIRVType *SpirvTy = GR->getOrCreateSPIRVType(LLVMTy, Helper.MIRBuilder);
+ Op0.setReg(convertPtrToInt(Reg0, ConvT, SpirvTy, Helper, MRI, GR));
+ Op1.setReg(convertPtrToInt(Reg1, ConvT, SpirvTy, Helper, MRI, GR));
+ }
+ return true;
+ }
+ // TODO: implement legalization for other opcodes.
+ return true;
+}
diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.h b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.h
new file mode 100644
index 000000000000..2541ff29edb0
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.h
@@ -0,0 +1,36 @@
+//===- SPIRVLegalizerInfo.h --- SPIR-V Legalization Rules --------*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the targeting of the MachineLegalizer class for SPIR-V.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SPIRV_SPIRVMACHINELEGALIZER_H
+#define LLVM_LIB_TARGET_SPIRV_SPIRVMACHINELEGALIZER_H
+
+#include "SPIRVGlobalRegistry.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+
+bool isTypeFoldingSupported(unsigned Opcode);
+
+namespace llvm {
+
+class LLVMContext;
+class SPIRVSubtarget;
+
+// This class provides the information for legalizing SPIR-V instructions.
+class SPIRVLegalizerInfo : public LegalizerInfo {
+ const SPIRVSubtarget *ST;
+ SPIRVGlobalRegistry *GR;
+
+public:
+ bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI) const override;
+ SPIRVLegalizerInfo(const SPIRVSubtarget &ST);
+};
+} // namespace llvm
+#endif // LLVM_LIB_TARGET_SPIRV_SPIRVMACHINELEGALIZER_H
diff --git a/llvm/lib/Target/SPIRV/SPIRVMCInstLower.cpp b/llvm/lib/Target/SPIRV/SPIRVMCInstLower.cpp
new file mode 100644
index 000000000000..8e4ab973bf07
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRVMCInstLower.cpp
@@ -0,0 +1,58 @@
+//=- SPIRVMCInstLower.cpp - Convert SPIR-V MachineInstr to MCInst -*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower SPIR-V MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPIRVMCInstLower.h"
+#include "SPIRV.h"
+#include "SPIRVModuleAnalysis.h"
+#include "SPIRVUtils.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/IR/Constants.h"
+
+using namespace llvm;
+
+void SPIRVMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI,
+ SPIRV::ModuleAnalysisInfo *MAI) const {
+ OutMI.setOpcode(MI->getOpcode());
+ const MachineFunction *MF = MI->getMF();
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ MCOperand MCOp;
+ switch (MO.getType()) {
+ default:
+ llvm_unreachable("unknown operand type");
+ case MachineOperand::MO_GlobalAddress: {
+ Register FuncReg = MAI->getFuncReg(MO.getGlobal()->getGlobalIdentifier());
+ assert(FuncReg.isValid() && "Cannot find function Id");
+ MCOp = MCOperand::createReg(FuncReg);
+ break;
+ }
+ case MachineOperand::MO_MachineBasicBlock:
+ MCOp = MCOperand::createReg(MAI->getOrCreateMBBRegister(*MO.getMBB()));
+ break;
+ case MachineOperand::MO_Register: {
+ Register NewReg = MAI->getRegisterAlias(MF, MO.getReg());
+ MCOp = MCOperand::createReg(NewReg.isValid() ? NewReg : MO.getReg());
+ break;
+ }
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::createImm(MO.getImm());
+ break;
+ case MachineOperand::MO_FPImmediate:
+ MCOp = MCOperand::createDFPImm(
+ MO.getFPImm()->getValueAPF().convertToFloat());
+ break;
+ }
+
+ OutMI.addOperand(MCOp);
+ }
+}
diff --git a/llvm/lib/Target/SPIRV/SPIRVMCInstLower.h b/llvm/lib/Target/SPIRV/SPIRVMCInstLower.h
new file mode 100644
index 000000000000..8392656ed067
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRVMCInstLower.h
@@ -0,0 +1,29 @@
+//=- SPIRVMCInstLower.h -- Convert SPIR-V MachineInstr to MCInst --*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SPIRV_SPIRVMCINSTLOWER_H
+#define LLVM_LIB_TARGET_SPIRV_SPIRVMCINSTLOWER_H
+
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+class MCInst;
+class MachineInstr;
+namespace SPIRV {
+struct ModuleAnalysisInfo;
+} // namespace SPIRV
+
+// This class is used to lower a MachineInstr into an MCInst.
+class LLVM_LIBRARY_VISIBILITY SPIRVMCInstLower {
+public:
+ void lower(const MachineInstr *MI, MCInst &OutMI,
+ SPIRV::ModuleAnalysisInfo *MAI) const;
+};
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_SPIRV_SPIRVMCINSTLOWER_H
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
new file mode 100644
index 000000000000..fa78dd7942c6
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
@@ -0,0 +1,250 @@
+//===- SPIRVModuleAnalysis.cpp - analysis of global instrs & regs - C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The analysis collects instructions that should be output at the module level
+// and performs the global register numbering.
+//
+// The results of this analysis are used in AsmPrinter to rename registers
+// globally and to output required instructions at the module level.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPIRVModuleAnalysis.h"
+#include "SPIRV.h"
+#include "SPIRVGlobalRegistry.h"
+#include "SPIRVSubtarget.h"
+#include "SPIRVTargetMachine.h"
+#include "SPIRVUtils.h"
+#include "TargetInfo/SPIRVTargetInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "spirv-module-analysis"
+
+char llvm::SPIRVModuleAnalysis::ID = 0;
+
+namespace llvm {
+void initializeSPIRVModuleAnalysisPass(PassRegistry &);
+} // namespace llvm
+
+INITIALIZE_PASS(SPIRVModuleAnalysis, DEBUG_TYPE, "SPIRV module analysis", true,
+ true)
+
+// Retrieve an unsigned from an MDNode with a list of them as operands.
+static unsigned getMetadataUInt(MDNode *MdNode, unsigned OpIndex,
+ unsigned DefaultVal = 0) {
+ if (MdNode && OpIndex < MdNode->getNumOperands()) {
+ const auto &Op = MdNode->getOperand(OpIndex);
+ return mdconst::extract<ConstantInt>(Op)->getZExtValue();
+ }
+ return DefaultVal;
+}
+
+void SPIRVModuleAnalysis::setBaseInfo(const Module &M) {
+ MAI.MaxID = 0;
+ for (int i = 0; i < SPIRV::NUM_MODULE_SECTIONS; i++)
+ MAI.MS[i].clear();
+ MAI.RegisterAliasTable.clear();
+ MAI.InstrsToDelete.clear();
+ MAI.FuncNameMap.clear();
+ MAI.GlobalVarList.clear();
+
+ // TODO: determine memory model and source language from the configuratoin.
+ MAI.Mem = SPIRV::MemoryModel::OpenCL;
+ MAI.SrcLang = SPIRV::SourceLanguage::OpenCL_C;
+ unsigned PtrSize = ST->getPointerSize();
+ MAI.Addr = PtrSize == 32 ? SPIRV::AddressingModel::Physical32
+ : PtrSize == 64 ? SPIRV::AddressingModel::Physical64
+ : SPIRV::AddressingModel::Logical;
+ // Get the OpenCL version number from metadata.
+ // TODO: support other source languages.
+ MAI.SrcLangVersion = 0;
+ if (auto VerNode = M.getNamedMetadata("opencl.ocl.version")) {
+ // Construct version literal according to OpenCL 2.2 environment spec.
+ auto VersionMD = VerNode->getOperand(0);
+ unsigned MajorNum = getMetadataUInt(VersionMD, 0, 2);
+ unsigned MinorNum = getMetadataUInt(VersionMD, 1);
+ unsigned RevNum = getMetadataUInt(VersionMD, 2);
+ MAI.SrcLangVersion = 0 | (MajorNum << 16) | (MinorNum << 8) | RevNum;
+ }
+}
+
+// True if there is an instruction in the MS list with all the same operands as
+// the given instruction has (after the given starting index).
+// TODO: maybe it needs to check Opcodes too.
+static bool findSameInstrInMS(const MachineInstr &A,
+ SPIRV::ModuleSectionType MSType,
+ SPIRV::ModuleAnalysisInfo &MAI,
+ bool UpdateRegAliases,
+ unsigned StartOpIndex = 0) {
+ for (const auto *B : MAI.MS[MSType]) {
+ const unsigned NumAOps = A.getNumOperands();
+ if (NumAOps == B->getNumOperands() && A.getNumDefs() == B->getNumDefs()) {
+ bool AllOpsMatch = true;
+ for (unsigned i = StartOpIndex; i < NumAOps && AllOpsMatch; ++i) {
+ if (A.getOperand(i).isReg() && B->getOperand(i).isReg()) {
+ Register RegA = A.getOperand(i).getReg();
+ Register RegB = B->getOperand(i).getReg();
+ AllOpsMatch = MAI.getRegisterAlias(A.getMF(), RegA) ==
+ MAI.getRegisterAlias(B->getMF(), RegB);
+ } else {
+ AllOpsMatch = A.getOperand(i).isIdenticalTo(B->getOperand(i));
+ }
+ }
+ if (AllOpsMatch) {
+ if (UpdateRegAliases) {
+ assert(A.getOperand(0).isReg() && B->getOperand(0).isReg());
+ Register LocalReg = A.getOperand(0).getReg();
+ Register GlobalReg =
+ MAI.getRegisterAlias(B->getMF(), B->getOperand(0).getReg());
+ MAI.setRegisterAlias(A.getMF(), LocalReg, GlobalReg);
+ }
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+// Look for IDs declared with Import linkage, and map the imported name string
+// to the register defining that variable (which will usually be the result of
+// an OpFunction). This lets us call externally imported functions using
+// the correct ID registers.
+void SPIRVModuleAnalysis::collectFuncNames(MachineInstr &MI,
+ const Function &F) {
+ if (MI.getOpcode() == SPIRV::OpDecorate) {
+ // If it's got Import linkage.
+ auto Dec = MI.getOperand(1).getImm();
+ if (Dec == static_cast<unsigned>(SPIRV::Decoration::LinkageAttributes)) {
+ auto Lnk = MI.getOperand(MI.getNumOperands() - 1).getImm();
+ if (Lnk == static_cast<unsigned>(SPIRV::LinkageType::Import)) {
+ // Map imported function name to function ID register.
+ std::string Name = getStringImm(MI, 2);
+ Register Target = MI.getOperand(0).getReg();
+ // TODO: check defs from different MFs.
+ MAI.FuncNameMap[Name] = MAI.getRegisterAlias(MI.getMF(), Target);
+ }
+ }
+ } else if (MI.getOpcode() == SPIRV::OpFunction) {
+ // Record all internal OpFunction declarations.
+ Register Reg = MI.defs().begin()->getReg();
+ Register GlobalReg = MAI.getRegisterAlias(MI.getMF(), Reg);
+ assert(GlobalReg.isValid());
+ // TODO: check that it does not conflict with existing entries.
+ MAI.FuncNameMap[F.getGlobalIdentifier()] = GlobalReg;
+ }
+}
+
+// Collect the given instruction in the specified MS. We assume global register
+// numbering has already occurred by this point. We can directly compare reg
+// arguments when detecting duplicates.
+static void collectOtherInstr(MachineInstr &MI, SPIRV::ModuleAnalysisInfo &MAI,
+ SPIRV::ModuleSectionType MSType,
+ bool IsConstOrType = false) {
+ MAI.setSkipEmission(&MI);
+ if (findSameInstrInMS(MI, MSType, MAI, IsConstOrType, IsConstOrType ? 1 : 0))
+ return; // Found a duplicate, so don't add it.
+ // No duplicates, so add it.
+ MAI.MS[MSType].push_back(&MI);
+}
+
+// Some global instructions make reference to function-local ID regs, so cannot
+// be correctly collected until these registers are globally numbered.
+void SPIRVModuleAnalysis::processOtherInstrs(const Module &M) {
+ for (auto F = M.begin(), E = M.end(); F != E; ++F) {
+ if ((*F).isDeclaration())
+ continue;
+ MachineFunction *MF = MMI->getMachineFunction(*F);
+ assert(MF);
+ unsigned FCounter = 0;
+ for (MachineBasicBlock &MBB : *MF)
+ for (MachineInstr &MI : MBB) {
+ if (MI.getOpcode() == SPIRV::OpFunction)
+ FCounter++;
+ if (MAI.getSkipEmission(&MI))
+ continue;
+ const unsigned OpCode = MI.getOpcode();
+ const bool IsFuncOrParm =
+ OpCode == SPIRV::OpFunction || OpCode == SPIRV::OpFunctionParameter;
+ const bool IsConstOrType =
+ TII->isConstantInstr(MI) || TII->isTypeDeclInstr(MI);
+ if (OpCode == SPIRV::OpName || OpCode == SPIRV::OpMemberName) {
+ collectOtherInstr(MI, MAI, SPIRV::MB_DebugNames);
+ } else if (OpCode == SPIRV::OpEntryPoint) {
+ collectOtherInstr(MI, MAI, SPIRV::MB_EntryPoints);
+ } else if (TII->isDecorationInstr(MI)) {
+ collectOtherInstr(MI, MAI, SPIRV::MB_Annotations);
+ collectFuncNames(MI, *F);
+ } else if (IsConstOrType || (FCounter > 1 && IsFuncOrParm)) {
+ // Now OpSpecConstant*s are not in DT,
+ // but they need to be collected anyway.
+ enum SPIRV::ModuleSectionType Type =
+ IsFuncOrParm ? SPIRV::MB_ExtFuncDecls : SPIRV::MB_TypeConstVars;
+ collectOtherInstr(MI, MAI, Type, IsConstOrType);
+ } else if (OpCode == SPIRV::OpFunction) {
+ collectFuncNames(MI, *F);
+ }
+ }
+ }
+}
+
+// Number registers in all functions globally from 0 onwards and store
+// the result in global register alias table.
+void SPIRVModuleAnalysis::numberRegistersGlobally(const Module &M) {
+ for (auto F = M.begin(), E = M.end(); F != E; ++F) {
+ if ((*F).isDeclaration())
+ continue;
+ MachineFunction *MF = MMI->getMachineFunction(*F);
+ assert(MF);
+ for (MachineBasicBlock &MBB : *MF) {
+ for (MachineInstr &MI : MBB) {
+ for (MachineOperand &Op : MI.operands()) {
+ if (!Op.isReg())
+ continue;
+ Register Reg = Op.getReg();
+ if (MAI.hasRegisterAlias(MF, Reg))
+ continue;
+ Register NewReg = Register::index2VirtReg(MAI.getNextID());
+ MAI.setRegisterAlias(MF, Reg, NewReg);
+ }
+ }
+ }
+ }
+}
+
+struct SPIRV::ModuleAnalysisInfo SPIRVModuleAnalysis::MAI;
+
+void SPIRVModuleAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetPassConfig>();
+ AU.addRequired<MachineModuleInfoWrapperPass>();
+}
+
+bool SPIRVModuleAnalysis::runOnModule(Module &M) {
+ SPIRVTargetMachine &TM =
+ getAnalysis<TargetPassConfig>().getTM<SPIRVTargetMachine>();
+ ST = TM.getSubtargetImpl();
+ GR = ST->getSPIRVGlobalRegistry();
+ TII = ST->getInstrInfo();
+
+ MMI = &getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
+
+ setBaseInfo(M);
+
+ // TODO: Process type/const/global var/func decl instructions, number their
+ // destination registers from 0 to N, collect Extensions and Capabilities.
+
+ // Number rest of registers from N+1 onwards.
+ numberRegistersGlobally(M);
+
+ // Collect OpName, OpEntryPoint, OpDecorate etc, process other instructions.
+ processOtherInstrs(M);
+
+ return false;
+}
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h
new file mode 100644
index 000000000000..1bef13d458c1
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h
@@ -0,0 +1,137 @@
+//===- SPIRVModuleAnalysis.h - analysis of global instrs & regs -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The analysis collects instructions that should be output at the module level
+// and performs the global register numbering.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SPIRV_SPIRVMODULEANALYSIS_H
+#define LLVM_LIB_TARGET_SPIRV_SPIRVMODULEANALYSIS_H
+
+#include "MCTargetDesc/SPIRVBaseInfo.h"
+#include "SPIRVSubtarget.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+
+namespace llvm {
+class MachineFunction;
+class MachineModuleInfo;
+
+namespace SPIRV {
+// The enum contains logical module sections for the instruction collection.
+enum ModuleSectionType {
+ // MB_Capabilities, MB_Extensions, MB_ExtInstImports, MB_MemoryModel,
+ MB_EntryPoints, // All OpEntryPoint instructions (if any).
+ // MB_ExecutionModes, MB_DebugSourceAndStrings,
+ MB_DebugNames, // All OpName and OpMemberName intrs.
+ MB_DebugModuleProcessed, // All OpModuleProcessed instructions.
+ MB_Annotations, // OpDecorate, OpMemberDecorate etc.
+ MB_TypeConstVars, // OpTypeXXX, OpConstantXXX, and global OpVariables.
+ MB_ExtFuncDecls, // OpFunction etc. to declare for external funcs.
+ NUM_MODULE_SECTIONS // Total number of sections requiring basic blocks.
+};
+
+using InstrList = SmallVector<MachineInstr *>;
+// Maps a local register to the corresponding global alias.
+using LocalToGlobalRegTable = std::map<Register, Register>;
+using RegisterAliasMapTy =
+ std::map<const MachineFunction *, LocalToGlobalRegTable>;
+
+// The struct contains results of the module analysis and methods
+// to access them.
+struct ModuleAnalysisInfo {
+ SPIRV::MemoryModel Mem;
+ SPIRV::AddressingModel Addr;
+ SPIRV::SourceLanguage SrcLang;
+ unsigned SrcLangVersion;
+ // Contains the list of all global OpVariables in the module.
+ SmallVector<MachineInstr *, 4> GlobalVarList;
+ // Maps function names to coresponding function ID registers.
+ StringMap<Register> FuncNameMap;
+ // The set contains machine instructions which are necessary
+ // for correct MIR but will not be emitted in function bodies.
+ DenseSet<MachineInstr *> InstrsToDelete;
+ // The table contains global aliases of local registers for each machine
+ // function. The aliases are used to substitute local registers during
+ // code emission.
+ RegisterAliasMapTy RegisterAliasTable;
+ // The counter holds the maximum ID we have in the module.
+ unsigned MaxID;
+ // The array contains lists of MIs for each module section.
+ InstrList MS[NUM_MODULE_SECTIONS];
+ // The table maps MBB number to SPIR-V unique ID register.
+ DenseMap<int, Register> BBNumToRegMap;
+
+ Register getFuncReg(std::string FuncName) {
+ auto FuncReg = FuncNameMap.find(FuncName);
+ assert(FuncReg != FuncNameMap.end() && "Cannot find function Id");
+ return FuncReg->second;
+ }
+ InstrList &getMSInstrs(unsigned MSType) { return MS[MSType]; }
+ void setSkipEmission(MachineInstr *MI) { InstrsToDelete.insert(MI); }
+ bool getSkipEmission(const MachineInstr *MI) {
+ return InstrsToDelete.contains(MI);
+ }
+ void setRegisterAlias(const MachineFunction *MF, Register Reg,
+ Register AliasReg) {
+ RegisterAliasTable[MF][Reg] = AliasReg;
+ }
+ Register getRegisterAlias(const MachineFunction *MF, Register Reg) {
+ auto RI = RegisterAliasTable[MF].find(Reg);
+ if (RI == RegisterAliasTable[MF].end()) {
+ return Register(0);
+ }
+ return RegisterAliasTable[MF][Reg];
+ }
+ bool hasRegisterAlias(const MachineFunction *MF, Register Reg) {
+ return RegisterAliasTable.find(MF) != RegisterAliasTable.end() &&
+ RegisterAliasTable[MF].find(Reg) != RegisterAliasTable[MF].end();
+ }
+ unsigned getNextID() { return MaxID++; }
+ bool hasMBBRegister(const MachineBasicBlock &MBB) {
+ return BBNumToRegMap.find(MBB.getNumber()) != BBNumToRegMap.end();
+ }
+ // Convert MBB's number to corresponding ID register.
+ Register getOrCreateMBBRegister(const MachineBasicBlock &MBB) {
+ auto f = BBNumToRegMap.find(MBB.getNumber());
+ if (f != BBNumToRegMap.end())
+ return f->second;
+ Register NewReg = Register::index2VirtReg(getNextID());
+ BBNumToRegMap[MBB.getNumber()] = NewReg;
+ return NewReg;
+ }
+};
+} // namespace SPIRV
+
+struct SPIRVModuleAnalysis : public ModulePass {
+ static char ID;
+
+public:
+ SPIRVModuleAnalysis() : ModulePass(ID) {}
+
+ bool runOnModule(Module &M) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ static struct SPIRV::ModuleAnalysisInfo MAI;
+
+private:
+ void setBaseInfo(const Module &M);
+ template <typename T> void collectTypesConstsVars();
+ void processDefInstrs(const Module &M);
+ void collectFuncNames(MachineInstr &MI, const Function &F);
+ void processOtherInstrs(const Module &M);
+ void numberRegistersGlobally(const Module &M);
+
+ const SPIRVSubtarget *ST;
+ SPIRVGlobalRegistry *GR;
+ const SPIRVInstrInfo *TII;
+ MachineModuleInfo *MMI;
+};
+} // namespace llvm
+#endif // LLVM_LIB_TARGET_SPIRV_SPIRVMODULEANALYSIS_H
diff --git a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp
new file mode 100644
index 000000000000..687f84046650
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp
@@ -0,0 +1,440 @@
+//===-- SPIRVPreLegalizer.cpp - prepare IR for legalization -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The pass prepares IR for legalization: it assigns SPIR-V types to registers
+// and removes intrinsics which holded these types during IR translation.
+// Also it processes constants and registers them in GR to avoid duplication.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPIRV.h"
+#include "SPIRVGlobalRegistry.h"
+#include "SPIRVSubtarget.h"
+#include "SPIRVUtils.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/IntrinsicsSPIRV.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+
+#define DEBUG_TYPE "spirv-prelegalizer"
+
+using namespace llvm;
+
+namespace {
+class SPIRVPreLegalizer : public MachineFunctionPass {
+public:
+ static char ID;
+ SPIRVPreLegalizer() : MachineFunctionPass(ID) {
+ initializeSPIRVPreLegalizerPass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+} // namespace
+
+static bool isSpvIntrinsic(MachineInstr &MI, Intrinsic::ID IntrinsicID) {
+ if (MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS &&
+ MI.getIntrinsicID() == IntrinsicID)
+ return true;
+ return false;
+}
+
+static void foldConstantsIntoIntrinsics(MachineFunction &MF) {
+ SmallVector<MachineInstr *, 10> ToErase;
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const unsigned AssignNameOperandShift = 2;
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ if (!isSpvIntrinsic(MI, Intrinsic::spv_assign_name))
+ continue;
+ unsigned NumOp = MI.getNumExplicitDefs() + AssignNameOperandShift;
+ while (MI.getOperand(NumOp).isReg()) {
+ MachineOperand &MOp = MI.getOperand(NumOp);
+ MachineInstr *ConstMI = MRI.getVRegDef(MOp.getReg());
+ assert(ConstMI->getOpcode() == TargetOpcode::G_CONSTANT);
+ MI.removeOperand(NumOp);
+ MI.addOperand(MachineOperand::CreateImm(
+ ConstMI->getOperand(1).getCImm()->getZExtValue()));
+ if (MRI.use_empty(ConstMI->getOperand(0).getReg()))
+ ToErase.push_back(ConstMI);
+ }
+ }
+ }
+ for (MachineInstr *MI : ToErase)
+ MI->eraseFromParent();
+}
+
+static void insertBitcasts(MachineFunction &MF, SPIRVGlobalRegistry *GR,
+ MachineIRBuilder MIB) {
+ SmallVector<MachineInstr *, 10> ToErase;
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ if (!isSpvIntrinsic(MI, Intrinsic::spv_bitcast))
+ continue;
+ assert(MI.getOperand(2).isReg());
+ MIB.setInsertPt(*MI.getParent(), MI);
+ MIB.buildBitcast(MI.getOperand(0).getReg(), MI.getOperand(2).getReg());
+ ToErase.push_back(&MI);
+ }
+ }
+ for (MachineInstr *MI : ToErase)
+ MI->eraseFromParent();
+}
+
+// Translating GV, IRTranslator sometimes generates following IR:
+// %1 = G_GLOBAL_VALUE
+// %2 = COPY %1
+// %3 = G_ADDRSPACE_CAST %2
+// New registers have no SPIRVType and no register class info.
+//
+// Set SPIRVType for GV, propagate it from GV to other instructions,
+// also set register classes.
+static SPIRVType *propagateSPIRVType(MachineInstr *MI, SPIRVGlobalRegistry *GR,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIB) {
+ SPIRVType *SpirvTy = nullptr;
+ assert(MI && "Machine instr is expected");
+ if (MI->getOperand(0).isReg()) {
+ Register Reg = MI->getOperand(0).getReg();
+ SpirvTy = GR->getSPIRVTypeForVReg(Reg);
+ if (!SpirvTy) {
+ switch (MI->getOpcode()) {
+ case TargetOpcode::G_CONSTANT: {
+ MIB.setInsertPt(*MI->getParent(), MI);
+ Type *Ty = MI->getOperand(1).getCImm()->getType();
+ SpirvTy = GR->getOrCreateSPIRVType(Ty, MIB);
+ break;
+ }
+ case TargetOpcode::G_GLOBAL_VALUE: {
+ MIB.setInsertPt(*MI->getParent(), MI);
+ Type *Ty = MI->getOperand(1).getGlobal()->getType();
+ SpirvTy = GR->getOrCreateSPIRVType(Ty, MIB);
+ break;
+ }
+ case TargetOpcode::G_TRUNC:
+ case TargetOpcode::G_ADDRSPACE_CAST:
+ case TargetOpcode::COPY: {
+ MachineOperand &Op = MI->getOperand(1);
+ MachineInstr *Def = Op.isReg() ? MRI.getVRegDef(Op.getReg()) : nullptr;
+ if (Def)
+ SpirvTy = propagateSPIRVType(Def, GR, MRI, MIB);
+ break;
+ }
+ default:
+ break;
+ }
+ if (SpirvTy)
+ GR->assignSPIRVTypeToVReg(SpirvTy, Reg, MIB.getMF());
+ if (!MRI.getRegClassOrNull(Reg))
+ MRI.setRegClass(Reg, &SPIRV::IDRegClass);
+ }
+ }
+ return SpirvTy;
+}
+
+// Insert ASSIGN_TYPE instuction between Reg and its definition, set NewReg as
+// a dst of the definition, assign SPIRVType to both registers. If SpirvTy is
+// provided, use it as SPIRVType in ASSIGN_TYPE, otherwise create it from Ty.
+// TODO: maybe move to SPIRVUtils.
+static Register insertAssignInstr(Register Reg, Type *Ty, SPIRVType *SpirvTy,
+ SPIRVGlobalRegistry *GR,
+ MachineIRBuilder &MIB,
+ MachineRegisterInfo &MRI) {
+ MachineInstr *Def = MRI.getVRegDef(Reg);
+ assert((Ty || SpirvTy) && "Either LLVM or SPIRV type is expected.");
+ MIB.setInsertPt(*Def->getParent(),
+ (Def->getNextNode() ? Def->getNextNode()->getIterator()
+ : Def->getParent()->end()));
+ Register NewReg = MRI.createGenericVirtualRegister(MRI.getType(Reg));
+ if (auto *RC = MRI.getRegClassOrNull(Reg))
+ MRI.setRegClass(NewReg, RC);
+ SpirvTy = SpirvTy ? SpirvTy : GR->getOrCreateSPIRVType(Ty, MIB);
+ GR->assignSPIRVTypeToVReg(SpirvTy, Reg, MIB.getMF());
+ // This is to make it convenient for Legalizer to get the SPIRVType
+ // when processing the actual MI (i.e. not pseudo one).
+ GR->assignSPIRVTypeToVReg(SpirvTy, NewReg, MIB.getMF());
+ MIB.buildInstr(SPIRV::ASSIGN_TYPE)
+ .addDef(Reg)
+ .addUse(NewReg)
+ .addUse(GR->getSPIRVTypeID(SpirvTy));
+ Def->getOperand(0).setReg(NewReg);
+ MRI.setRegClass(Reg, &SPIRV::ANYIDRegClass);
+ return NewReg;
+}
+
+static void generateAssignInstrs(MachineFunction &MF, SPIRVGlobalRegistry *GR,
+ MachineIRBuilder MIB) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ SmallVector<MachineInstr *, 10> ToErase;
+
+ for (MachineBasicBlock *MBB : post_order(&MF)) {
+ if (MBB->empty())
+ continue;
+
+ bool ReachedBegin = false;
+ for (auto MII = std::prev(MBB->end()), Begin = MBB->begin();
+ !ReachedBegin;) {
+ MachineInstr &MI = *MII;
+
+ if (isSpvIntrinsic(MI, Intrinsic::spv_assign_type)) {
+ Register Reg = MI.getOperand(1).getReg();
+ Type *Ty = getMDOperandAsType(MI.getOperand(2).getMetadata(), 0);
+ MachineInstr *Def = MRI.getVRegDef(Reg);
+ assert(Def && "Expecting an instruction that defines the register");
+ // G_GLOBAL_VALUE already has type info.
+ if (Def->getOpcode() != TargetOpcode::G_GLOBAL_VALUE)
+ insertAssignInstr(Reg, Ty, nullptr, GR, MIB, MF.getRegInfo());
+ ToErase.push_back(&MI);
+ } else if (MI.getOpcode() == TargetOpcode::G_CONSTANT ||
+ MI.getOpcode() == TargetOpcode::G_FCONSTANT ||
+ MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
+ // %rc = G_CONSTANT ty Val
+ // ===>
+ // %cty = OpType* ty
+ // %rctmp = G_CONSTANT ty Val
+ // %rc = ASSIGN_TYPE %rctmp, %cty
+ Register Reg = MI.getOperand(0).getReg();
+ if (MRI.hasOneUse(Reg)) {
+ MachineInstr &UseMI = *MRI.use_instr_begin(Reg);
+ if (isSpvIntrinsic(UseMI, Intrinsic::spv_assign_type) ||
+ isSpvIntrinsic(UseMI, Intrinsic::spv_assign_name))
+ continue;
+ }
+ Type *Ty = nullptr;
+ if (MI.getOpcode() == TargetOpcode::G_CONSTANT)
+ Ty = MI.getOperand(1).getCImm()->getType();
+ else if (MI.getOpcode() == TargetOpcode::G_FCONSTANT)
+ Ty = MI.getOperand(1).getFPImm()->getType();
+ else {
+ assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
+ Type *ElemTy = nullptr;
+ MachineInstr *ElemMI = MRI.getVRegDef(MI.getOperand(1).getReg());
+ assert(ElemMI);
+
+ if (ElemMI->getOpcode() == TargetOpcode::G_CONSTANT)
+ ElemTy = ElemMI->getOperand(1).getCImm()->getType();
+ else if (ElemMI->getOpcode() == TargetOpcode::G_FCONSTANT)
+ ElemTy = ElemMI->getOperand(1).getFPImm()->getType();
+ else
+ llvm_unreachable("Unexpected opcode");
+ unsigned NumElts =
+ MI.getNumExplicitOperands() - MI.getNumExplicitDefs();
+ Ty = VectorType::get(ElemTy, NumElts, false);
+ }
+ insertAssignInstr(Reg, Ty, nullptr, GR, MIB, MRI);
+ } else if (MI.getOpcode() == TargetOpcode::G_TRUNC ||
+ MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE ||
+ MI.getOpcode() == TargetOpcode::COPY ||
+ MI.getOpcode() == TargetOpcode::G_ADDRSPACE_CAST) {
+ propagateSPIRVType(&MI, GR, MRI, MIB);
+ }
+
+ if (MII == Begin)
+ ReachedBegin = true;
+ else
+ --MII;
+ }
+ }
+ for (MachineInstr *MI : ToErase)
+ MI->eraseFromParent();
+}
+
+static std::pair<Register, unsigned>
+createNewIdReg(Register ValReg, unsigned Opcode, MachineRegisterInfo &MRI,
+ const SPIRVGlobalRegistry &GR) {
+ LLT NewT = LLT::scalar(32);
+ SPIRVType *SpvType = GR.getSPIRVTypeForVReg(ValReg);
+ assert(SpvType && "VReg is expected to have SPIRV type");
+ bool IsFloat = SpvType->getOpcode() == SPIRV::OpTypeFloat;
+ bool IsVectorFloat =
+ SpvType->getOpcode() == SPIRV::OpTypeVector &&
+ GR.getSPIRVTypeForVReg(SpvType->getOperand(1).getReg())->getOpcode() ==
+ SPIRV::OpTypeFloat;
+ IsFloat |= IsVectorFloat;
+ auto GetIdOp = IsFloat ? SPIRV::GET_fID : SPIRV::GET_ID;
+ auto DstClass = IsFloat ? &SPIRV::fIDRegClass : &SPIRV::IDRegClass;
+ if (MRI.getType(ValReg).isPointer()) {
+ NewT = LLT::pointer(0, 32);
+ GetIdOp = SPIRV::GET_pID;
+ DstClass = &SPIRV::pIDRegClass;
+ } else if (MRI.getType(ValReg).isVector()) {
+ NewT = LLT::fixed_vector(2, NewT);
+ GetIdOp = IsFloat ? SPIRV::GET_vfID : SPIRV::GET_vID;
+ DstClass = IsFloat ? &SPIRV::vfIDRegClass : &SPIRV::vIDRegClass;
+ }
+ Register IdReg = MRI.createGenericVirtualRegister(NewT);
+ MRI.setRegClass(IdReg, DstClass);
+ return {IdReg, GetIdOp};
+}
+
+static void processInstr(MachineInstr &MI, MachineIRBuilder &MIB,
+ MachineRegisterInfo &MRI, SPIRVGlobalRegistry *GR) {
+ unsigned Opc = MI.getOpcode();
+ assert(MI.getNumDefs() > 0 && MRI.hasOneUse(MI.getOperand(0).getReg()));
+ MachineInstr &AssignTypeInst =
+ *(MRI.use_instr_begin(MI.getOperand(0).getReg()));
+ auto NewReg = createNewIdReg(MI.getOperand(0).getReg(), Opc, MRI, *GR).first;
+ AssignTypeInst.getOperand(1).setReg(NewReg);
+ MI.getOperand(0).setReg(NewReg);
+ MIB.setInsertPt(*MI.getParent(),
+ (MI.getNextNode() ? MI.getNextNode()->getIterator()
+ : MI.getParent()->end()));
+ for (auto &Op : MI.operands()) {
+ if (!Op.isReg() || Op.isDef())
+ continue;
+ auto IdOpInfo = createNewIdReg(Op.getReg(), Opc, MRI, *GR);
+ MIB.buildInstr(IdOpInfo.second).addDef(IdOpInfo.first).addUse(Op.getReg());
+ Op.setReg(IdOpInfo.first);
+ }
+}
+
+// Defined in SPIRVLegalizerInfo.cpp.
+extern bool isTypeFoldingSupported(unsigned Opcode);
+
+static void processInstrsWithTypeFolding(MachineFunction &MF,
+ SPIRVGlobalRegistry *GR,
+ MachineIRBuilder MIB) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ if (isTypeFoldingSupported(MI.getOpcode()))
+ processInstr(MI, MIB, MRI, GR);
+ }
+ }
+}
+
+static void processSwitches(MachineFunction &MF, SPIRVGlobalRegistry *GR,
+ MachineIRBuilder MIB) {
+ DenseMap<Register, SmallDenseMap<uint64_t, MachineBasicBlock *>>
+ SwitchRegToMBB;
+ DenseMap<Register, MachineBasicBlock *> DefaultMBBs;
+ DenseSet<Register> SwitchRegs;
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ // Before IRTranslator pass, spv_switch calls are inserted before each
+ // switch instruction. IRTranslator lowers switches to ICMP+CBr+Br triples.
+ // A switch with two cases may be translated to this MIR sequesnce:
+ // intrinsic(@llvm.spv.switch), %CmpReg, %Const0, %Const1
+ // %Dst0 = G_ICMP intpred(eq), %CmpReg, %Const0
+ // G_BRCOND %Dst0, %bb.2
+ // G_BR %bb.5
+ // bb.5.entry:
+ // %Dst1 = G_ICMP intpred(eq), %CmpReg, %Const1
+ // G_BRCOND %Dst1, %bb.3
+ // G_BR %bb.4
+ // bb.2.sw.bb:
+ // ...
+ // bb.3.sw.bb1:
+ // ...
+ // bb.4.sw.epilog:
+ // ...
+ // Walk MIs and collect information about destination MBBs to update
+ // spv_switch call. We assume that all spv_switch precede corresponding ICMPs.
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ if (isSpvIntrinsic(MI, Intrinsic::spv_switch)) {
+ assert(MI.getOperand(1).isReg());
+ Register Reg = MI.getOperand(1).getReg();
+ SwitchRegs.insert(Reg);
+ // Set the first successor as default MBB to support empty switches.
+ DefaultMBBs[Reg] = *MBB.succ_begin();
+ }
+ // Process only ICMPs that relate to spv_switches.
+ if (MI.getOpcode() == TargetOpcode::G_ICMP && MI.getOperand(2).isReg() &&
+ SwitchRegs.contains(MI.getOperand(2).getReg())) {
+ assert(MI.getOperand(0).isReg() && MI.getOperand(1).isPredicate() &&
+ MI.getOperand(3).isReg());
+ Register Dst = MI.getOperand(0).getReg();
+ // Set type info for destination register of switch's ICMP instruction.
+ if (GR->getSPIRVTypeForVReg(Dst) == nullptr) {
+ MIB.setInsertPt(*MI.getParent(), MI);
+ Type *LLVMTy = IntegerType::get(MF.getFunction().getContext(), 1);
+ SPIRVType *SpirvTy = GR->getOrCreateSPIRVType(LLVMTy, MIB);
+ MRI.setRegClass(Dst, &SPIRV::IDRegClass);
+ GR->assignSPIRVTypeToVReg(SpirvTy, Dst, MIB.getMF());
+ }
+ Register CmpReg = MI.getOperand(2).getReg();
+ MachineOperand &PredOp = MI.getOperand(1);
+ const auto CC = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
+ assert(CC == CmpInst::ICMP_EQ && MRI.hasOneUse(Dst) &&
+ MRI.hasOneDef(CmpReg));
+ uint64_t Val = getIConstVal(MI.getOperand(3).getReg(), &MRI);
+ MachineInstr *CBr = MRI.use_begin(Dst)->getParent();
+ assert(CBr->getOpcode() == SPIRV::G_BRCOND &&
+ CBr->getOperand(1).isMBB());
+ SwitchRegToMBB[CmpReg][Val] = CBr->getOperand(1).getMBB();
+ // The next MI is always BR to either the next case or the default.
+ MachineInstr *NextMI = CBr->getNextNode();
+ assert(NextMI->getOpcode() == SPIRV::G_BR &&
+ NextMI->getOperand(0).isMBB());
+ MachineBasicBlock *NextMBB = NextMI->getOperand(0).getMBB();
+ assert(NextMBB != nullptr);
+ // The default MBB is not started by ICMP with switch's cmp register.
+ if (NextMBB->front().getOpcode() != SPIRV::G_ICMP ||
+ (NextMBB->front().getOperand(2).isReg() &&
+ NextMBB->front().getOperand(2).getReg() != CmpReg))
+ DefaultMBBs[CmpReg] = NextMBB;
+ }
+ }
+ }
+ // Modify spv_switch's operands by collected values. For the example above,
+ // the result will be like this:
+ // intrinsic(@llvm.spv.switch), %CmpReg, %bb.4, i32 0, %bb.2, i32 1, %bb.3
+ // Note that ICMP+CBr+Br sequences are not removed, but ModuleAnalysis marks
+ // them as skipped and AsmPrinter does not output them.
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ if (!isSpvIntrinsic(MI, Intrinsic::spv_switch))
+ continue;
+ assert(MI.getOperand(1).isReg());
+ Register Reg = MI.getOperand(1).getReg();
+ unsigned NumOp = MI.getNumExplicitOperands();
+ SmallVector<const ConstantInt *, 3> Vals;
+ SmallVector<MachineBasicBlock *, 3> MBBs;
+ for (unsigned i = 2; i < NumOp; i++) {
+ Register CReg = MI.getOperand(i).getReg();
+ uint64_t Val = getIConstVal(CReg, &MRI);
+ MachineInstr *ConstInstr = getDefInstrMaybeConstant(CReg, &MRI);
+ Vals.push_back(ConstInstr->getOperand(1).getCImm());
+ MBBs.push_back(SwitchRegToMBB[Reg][Val]);
+ }
+ for (unsigned i = MI.getNumExplicitOperands() - 1; i > 1; i--)
+ MI.removeOperand(i);
+ MI.addOperand(MachineOperand::CreateMBB(DefaultMBBs[Reg]));
+ for (unsigned i = 0; i < Vals.size(); i++) {
+ MI.addOperand(MachineOperand::CreateCImm(Vals[i]));
+ MI.addOperand(MachineOperand::CreateMBB(MBBs[i]));
+ }
+ }
+ }
+}
+
+bool SPIRVPreLegalizer::runOnMachineFunction(MachineFunction &MF) {
+ // Initialize the type registry.
+ const SPIRVSubtarget &ST = MF.getSubtarget<SPIRVSubtarget>();
+ SPIRVGlobalRegistry *GR = ST.getSPIRVGlobalRegistry();
+ GR->setCurrentFunc(MF);
+ MachineIRBuilder MIB(MF);
+ foldConstantsIntoIntrinsics(MF);
+ insertBitcasts(MF, GR, MIB);
+ generateAssignInstrs(MF, GR, MIB);
+ processInstrsWithTypeFolding(MF, GR, MIB);
+ processSwitches(MF, GR, MIB);
+
+ return true;
+}
+
+INITIALIZE_PASS(SPIRVPreLegalizer, DEBUG_TYPE, "SPIRV pre legalizer", false,
+ false)
+
+char SPIRVPreLegalizer::ID = 0;
+
+FunctionPass *llvm::createSPIRVPreLegalizerPass() {
+ return new SPIRVPreLegalizer();
+}
diff --git a/llvm/lib/Target/SPIRV/SPIRVRegisterBankInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVRegisterBankInfo.cpp
new file mode 100644
index 000000000000..9bf9d7fe5b39
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRVRegisterBankInfo.cpp
@@ -0,0 +1,47 @@
+//===- SPIRVRegisterBankInfo.cpp ------------------------------*- C++ -*---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the targeting of the RegisterBankInfo class for SPIR-V.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPIRVRegisterBankInfo.h"
+#include "SPIRVRegisterInfo.h"
+#include "llvm/CodeGen/RegisterBank.h"
+
+#define GET_REGINFO_ENUM
+#include "SPIRVGenRegisterInfo.inc"
+
+#define GET_TARGET_REGBANK_IMPL
+#include "SPIRVGenRegisterBank.inc"
+
+using namespace llvm;
+
+// This required for .td selection patterns to work or we'd end up with RegClass
+// checks being redundant as all the classes would be mapped to the same bank.
+const RegisterBank &
+SPIRVRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
+ LLT Ty) const {
+ switch (RC.getID()) {
+ case SPIRV::TYPERegClassID:
+ return SPIRV::TYPERegBank;
+ case SPIRV::pIDRegClassID:
+ case SPIRV::IDRegClassID:
+ return SPIRV::IDRegBank;
+ case SPIRV::fIDRegClassID:
+ return SPIRV::fIDRegBank;
+ case SPIRV::vIDRegClassID:
+ return SPIRV::vIDRegBank;
+ case SPIRV::vfIDRegClassID:
+ return SPIRV::vfIDRegBank;
+ case SPIRV::ANYIDRegClassID:
+ case SPIRV::ANYRegClassID:
+ return SPIRV::IDRegBank;
+ }
+ llvm_unreachable("Unknown register class");
+}
diff --git a/llvm/lib/Target/SPIRV/SPIRVRegisterBankInfo.h b/llvm/lib/Target/SPIRV/SPIRVRegisterBankInfo.h
new file mode 100644
index 000000000000..67ddcdefb7dd
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRVRegisterBankInfo.h
@@ -0,0 +1,38 @@
+//===- SPIRVRegisterBankInfo.h -----------------------------------*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the targeting of the RegisterBankInfo class for SPIR-V.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SPIRV_SPIRVREGISTERBANKINFO_H
+#define LLVM_LIB_TARGET_SPIRV_SPIRVREGISTERBANKINFO_H
+
+#include "llvm/CodeGen/RegisterBankInfo.h"
+
+#define GET_REGBANK_DECLARATIONS
+#include "SPIRVGenRegisterBank.inc"
+
+namespace llvm {
+
+class TargetRegisterInfo;
+
+class SPIRVGenRegisterBankInfo : public RegisterBankInfo {
+protected:
+#define GET_TARGET_REGBANK_CLASS
+#include "SPIRVGenRegisterBank.inc"
+};
+
+// This class provides the information for the target register banks.
+class SPIRVRegisterBankInfo final : public SPIRVGenRegisterBankInfo {
+public:
+ const RegisterBank &getRegBankFromRegClass(const TargetRegisterClass &RC,
+ LLT Ty) const override;
+};
+} // namespace llvm
+#endif // LLVM_LIB_TARGET_SPIRV_SPIRVREGISTERBANKINFO_H
diff --git a/llvm/lib/Target/SPIRV/SPIRVRegisterBanks.td b/llvm/lib/Target/SPIRV/SPIRVRegisterBanks.td
new file mode 100644
index 000000000000..90c7f3a6e672
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRVRegisterBanks.td
@@ -0,0 +1,15 @@
+//===-- SPIRVRegisterBanks.td - Describe SPIR-V RegBanks ---*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// Although RegisterBankSelection is disabled we need to distinct the banks
+// as InstructionSelector RegClass checking code relies on them
+def IDRegBank : RegisterBank<"IDBank", [ID]>;
+def fIDRegBank : RegisterBank<"fIDBank", [fID]>;
+def vIDRegBank : RegisterBank<"vIDBank", [vID]>;
+def vfIDRegBank : RegisterBank<"vfIDBank", [vfID]>;
+def TYPERegBank : RegisterBank<"TYPEBank", [TYPE]>;
diff --git a/llvm/lib/Target/SPIRV/SPIRVRegisterInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVRegisterInfo.cpp
new file mode 100644
index 000000000000..cf8a967d59c4
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRVRegisterInfo.cpp
@@ -0,0 +1,32 @@
+//===-- SPIRVRegisterInfo.cpp - SPIR-V Register Information -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SPIR-V implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPIRVRegisterInfo.h"
+#include "SPIRV.h"
+#include "SPIRVSubtarget.h"
+#include "llvm/CodeGen/MachineFunction.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "SPIRVGenRegisterInfo.inc"
+using namespace llvm;
+
+SPIRVRegisterInfo::SPIRVRegisterInfo() : SPIRVGenRegisterInfo(SPIRV::ID0) {}
+
+BitVector SPIRVRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ return BitVector(getNumRegs());
+}
+
+const MCPhysReg *
+SPIRVRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ static const MCPhysReg CalleeSavedReg = {0};
+ return &CalleeSavedReg;
+}
diff --git a/llvm/lib/Target/SPIRV/SPIRVRegisterInfo.h b/llvm/lib/Target/SPIRV/SPIRVRegisterInfo.h
new file mode 100644
index 000000000000..f6f22b81e0bc
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRVRegisterInfo.h
@@ -0,0 +1,36 @@
+//===-- SPIRVRegisterInfo.h - SPIR-V Register Information -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SPIR-V implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SPIRV_SPIRVREGISTERINFO_H
+#define LLVM_LIB_TARGET_SPIRV_SPIRVREGISTERINFO_H
+
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "SPIRVGenRegisterInfo.inc"
+
+namespace llvm {
+
+struct SPIRVRegisterInfo : public SPIRVGenRegisterInfo {
+ SPIRVRegisterInfo();
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+ BitVector getReservedRegs(const MachineFunction &MF) const override;
+ void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
+ unsigned FIOperandNum,
+ RegScavenger *RS = nullptr) const override {}
+ Register getFrameRegister(const MachineFunction &MF) const override {
+ return 0;
+ }
+};
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_SPIRV_SPIRVREGISTERINFO_H
diff --git a/llvm/lib/Target/SPIRV/SPIRVRegisterInfo.td b/llvm/lib/Target/SPIRV/SPIRVRegisterInfo.td
new file mode 100644
index 000000000000..d0b64b6895d0
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRVRegisterInfo.td
@@ -0,0 +1,39 @@
+//===-- SPIRVRegisterInfo.td - SPIR-V Register defs --------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Declarations that describe the SPIR-V register file.
+//
+//===----------------------------------------------------------------------===//
+
+let Namespace = "SPIRV" in {
+ def p0 : PtrValueType <i32, 0>;
+ // All registers are for 32-bit identifiers, so have a single dummy register
+
+ // Class for registers that are the result of OpTypeXXX instructions
+ def TYPE0 : Register<"TYPE0">;
+ def TYPE : RegisterClass<"SPIRV", [i32], 32, (add TYPE0)>;
+
+ // Class for every other non-type ID
+ def ID0 : Register<"ID0">;
+ def ID : RegisterClass<"SPIRV", [i32], 32, (add ID0)>;
+ def fID0 : Register<"FID0">;
+ def fID : RegisterClass<"SPIRV", [f32], 32, (add fID0)>;
+ def pID0 : Register<"pID0">;
+ def pID : RegisterClass<"SPIRV", [p0], 32, (add pID0)>;
+ def vID0 : Register<"pID0">;
+ def vID : RegisterClass<"SPIRV", [v2i32], 32, (add vID0)>;
+ def vfID0 : Register<"pID0">;
+ def vfID : RegisterClass<"SPIRV", [v2f32], 32, (add vfID0)>;
+
+ def ANYID : RegisterClass<"SPIRV", [i32, f32, p0, v2i32, v2f32], 32, (add ID, fID, pID, vID, vfID)>;
+
+ // A few instructions like OpName can take ids from both type and non-type
+ // instructions, so we need a super-class to allow for both to count as valid
+ // arguments for these instructions.
+ def ANY : RegisterClass<"SPIRV", [i32], 32, (add TYPE, ID)>;
+}
diff --git a/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp b/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp
new file mode 100644
index 000000000000..cdf3a160f373
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp
@@ -0,0 +1,68 @@
+//===-- SPIRVSubtarget.cpp - SPIR-V Subtarget Information ------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SPIR-V specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPIRVSubtarget.h"
+#include "SPIRV.h"
+#include "SPIRVGlobalRegistry.h"
+#include "SPIRVLegalizerInfo.h"
+#include "SPIRVRegisterBankInfo.h"
+#include "SPIRVTargetMachine.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/Host.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "spirv-subtarget"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "SPIRVGenSubtargetInfo.inc"
+
+// Compare version numbers, but allow 0 to mean unspecified.
+static bool isAtLeastVer(uint32_t Target, uint32_t VerToCompareTo) {
+ return Target == 0 || Target >= VerToCompareTo;
+}
+
+static unsigned computePointerSize(const Triple &TT) {
+ const auto Arch = TT.getArch();
+ // TODO: unify this with pointers legalization.
+ assert(TT.isSPIRV());
+ return Arch == Triple::spirv32 ? 32 : 64;
+}
+
+SPIRVSubtarget::SPIRVSubtarget(const Triple &TT, const std::string &CPU,
+ const std::string &FS,
+ const SPIRVTargetMachine &TM)
+ : SPIRVGenSubtargetInfo(TT, CPU, /*TuneCPU=*/CPU, FS),
+ PointerSize(computePointerSize(TT)), SPIRVVersion(0), InstrInfo(),
+ FrameLowering(initSubtargetDependencies(CPU, FS)), TLInfo(TM, *this) {
+ GR = std::make_unique<SPIRVGlobalRegistry>(PointerSize);
+ CallLoweringInfo =
+ std::make_unique<SPIRVCallLowering>(TLInfo, *this, GR.get());
+ Legalizer = std::make_unique<SPIRVLegalizerInfo>(*this);
+ RegBankInfo = std::make_unique<SPIRVRegisterBankInfo>();
+ InstSelector.reset(
+ createSPIRVInstructionSelector(TM, *this, *RegBankInfo.get()));
+}
+
+SPIRVSubtarget &SPIRVSubtarget::initSubtargetDependencies(StringRef CPU,
+ StringRef FS) {
+ ParseSubtargetFeatures(CPU, /*TuneCPU=*/CPU, FS);
+ if (SPIRVVersion == 0)
+ SPIRVVersion = 14;
+ return *this;
+}
+
+// If the SPIR-V version is >= 1.4 we can call OpPtrEqual and OpPtrNotEqual.
+bool SPIRVSubtarget::canDirectlyComparePointers() const {
+ return isAtLeastVer(SPIRVVersion, 14);
+}
diff --git a/llvm/lib/Target/SPIRV/SPIRVSubtarget.h b/llvm/lib/Target/SPIRV/SPIRVSubtarget.h
new file mode 100644
index 000000000000..a6332cfefa8e
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRVSubtarget.h
@@ -0,0 +1,93 @@
+//===-- SPIRVSubtarget.h - SPIR-V Subtarget Information --------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SPIR-V specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SPIRV_SPIRVSUBTARGET_H
+#define LLVM_LIB_TARGET_SPIRV_SPIRVSUBTARGET_H
+
+#include "SPIRVCallLowering.h"
+#include "SPIRVFrameLowering.h"
+#include "SPIRVISelLowering.h"
+#include "SPIRVInstrInfo.h"
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define GET_SUBTARGETINFO_HEADER
+#include "SPIRVGenSubtargetInfo.inc"
+
+namespace llvm {
+class StringRef;
+class SPIRVGlobalRegistry;
+class SPIRVTargetMachine;
+
+class SPIRVSubtarget : public SPIRVGenSubtargetInfo {
+private:
+ const unsigned PointerSize;
+ uint32_t SPIRVVersion;
+
+ std::unique_ptr<SPIRVGlobalRegistry> GR;
+
+ SPIRVInstrInfo InstrInfo;
+ SPIRVFrameLowering FrameLowering;
+ SPIRVTargetLowering TLInfo;
+
+ // GlobalISel related APIs.
+ std::unique_ptr<CallLowering> CallLoweringInfo;
+ std::unique_ptr<RegisterBankInfo> RegBankInfo;
+ std::unique_ptr<LegalizerInfo> Legalizer;
+ std::unique_ptr<InstructionSelector> InstSelector;
+
+public:
+ // This constructor initializes the data members to match that
+ // of the specified triple.
+ SPIRVSubtarget(const Triple &TT, const std::string &CPU,
+ const std::string &FS, const SPIRVTargetMachine &TM);
+ SPIRVSubtarget &initSubtargetDependencies(StringRef CPU, StringRef FS);
+
+ // Parses features string setting specified subtarget options.
+ // The definition of this function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
+ unsigned getPointerSize() const { return PointerSize; }
+ bool canDirectlyComparePointers() const;
+ uint32_t getSPIRVVersion() const { return SPIRVVersion; };
+ SPIRVGlobalRegistry *getSPIRVGlobalRegistry() const { return GR.get(); }
+
+ const CallLowering *getCallLowering() const override {
+ return CallLoweringInfo.get();
+ }
+ const RegisterBankInfo *getRegBankInfo() const override {
+ return RegBankInfo.get();
+ }
+ const LegalizerInfo *getLegalizerInfo() const override {
+ return Legalizer.get();
+ }
+ InstructionSelector *getInstructionSelector() const override {
+ return InstSelector.get();
+ }
+ const SPIRVInstrInfo *getInstrInfo() const override { return &InstrInfo; }
+ const SPIRVFrameLowering *getFrameLowering() const override {
+ return &FrameLowering;
+ }
+ const SPIRVTargetLowering *getTargetLowering() const override {
+ return &TLInfo;
+ }
+ const SPIRVRegisterInfo *getRegisterInfo() const override {
+ return &InstrInfo.getRegisterInfo();
+ }
+};
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_SPIRV_SPIRVSUBTARGET_H
diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
new file mode 100644
index 000000000000..f7c88a5c6d4a
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
@@ -0,0 +1,186 @@
+//===- SPIRVTargetMachine.cpp - Define TargetMachine for SPIR-V -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the info about SPIR-V target spec.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPIRVTargetMachine.h"
+#include "SPIRV.h"
+#include "SPIRVCallLowering.h"
+#include "SPIRVGlobalRegistry.h"
+#include "SPIRVLegalizerInfo.h"
+#include "SPIRVTargetObjectFile.h"
+#include "SPIRVTargetTransformInfo.h"
+#include "TargetInfo/SPIRVTargetInfo.h"
+#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
+#include "llvm/CodeGen/GlobalISel/Legalizer.h"
+#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSPIRVTarget() {
+ // Register the target.
+ RegisterTargetMachine<SPIRVTargetMachine> X(getTheSPIRV32Target());
+ RegisterTargetMachine<SPIRVTargetMachine> Y(getTheSPIRV64Target());
+
+ PassRegistry &PR = *PassRegistry::getPassRegistry();
+ initializeGlobalISel(PR);
+ initializeSPIRVModuleAnalysisPass(PR);
+}
+
+static std::string computeDataLayout(const Triple &TT) {
+ const auto Arch = TT.getArch();
+ if (Arch == Triple::spirv32)
+ return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-"
+ "v96:128-v192:256-v256:256-v512:512-v1024:1024";
+ return "e-i64:64-v16:16-v24:32-v32:32-v48:64-"
+ "v96:128-v192:256-v256:256-v512:512-v1024:1024";
+}
+
+static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
+ if (!RM)
+ return Reloc::PIC_;
+ return *RM;
+}
+
+// Pin SPIRVTargetObjectFile's vtables to this file.
+SPIRVTargetObjectFile::~SPIRVTargetObjectFile() {}
+
+SPIRVTargetMachine::SPIRVTargetMachine(const Target &T, const Triple &TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Optional<Reloc::Model> RM,
+ Optional<CodeModel::Model> CM,
+ CodeGenOpt::Level OL, bool JIT)
+ : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options,
+ getEffectiveRelocModel(RM),
+ getEffectiveCodeModel(CM, CodeModel::Small), OL),
+ TLOF(std::make_unique<TargetLoweringObjectFileELF>()),
+ Subtarget(TT, CPU.str(), FS.str(), *this) {
+ initAsmInfo();
+ setGlobalISel(true);
+ setFastISel(false);
+ setO0WantsFastISel(false);
+ setRequiresStructuredCFG(false);
+}
+
+namespace {
+// SPIR-V Code Generator Pass Configuration Options.
+class SPIRVPassConfig : public TargetPassConfig {
+public:
+ SPIRVPassConfig(SPIRVTargetMachine &TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ SPIRVTargetMachine &getSPIRVTargetMachine() const {
+ return getTM<SPIRVTargetMachine>();
+ }
+ void addIRPasses() override;
+ void addISelPrepare() override;
+
+ bool addIRTranslator() override;
+ void addPreLegalizeMachineIR() override;
+ bool addLegalizeMachineIR() override;
+ bool addRegBankSelect() override;
+ bool addGlobalInstructionSelect() override;
+
+ FunctionPass *createTargetRegisterAllocator(bool) override;
+ void addFastRegAlloc() override {}
+ void addOptimizedRegAlloc() override {}
+
+ void addPostRegAlloc() override;
+};
+} // namespace
+
+// We do not use physical registers, and maintain virtual registers throughout
+// the entire pipeline, so return nullptr to disable register allocation.
+FunctionPass *SPIRVPassConfig::createTargetRegisterAllocator(bool) {
+ return nullptr;
+}
+
+// Disable passes that break from assuming no virtual registers exist.
+void SPIRVPassConfig::addPostRegAlloc() {
+ // Do not work with vregs instead of physical regs.
+ disablePass(&MachineCopyPropagationID);
+ disablePass(&PostRAMachineSinkingID);
+ disablePass(&PostRASchedulerID);
+ disablePass(&FuncletLayoutID);
+ disablePass(&StackMapLivenessID);
+ disablePass(&PatchableFunctionID);
+ disablePass(&ShrinkWrapID);
+ disablePass(&LiveDebugValuesID);
+
+ // Do not work with OpPhi.
+ disablePass(&BranchFolderPassID);
+ disablePass(&MachineBlockPlacementID);
+
+ TargetPassConfig::addPostRegAlloc();
+}
+
+TargetTransformInfo
+SPIRVTargetMachine::getTargetTransformInfo(const Function &F) const {
+ return TargetTransformInfo(SPIRVTTIImpl(this, F));
+}
+
+TargetPassConfig *SPIRVTargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new SPIRVPassConfig(*this, PM);
+}
+
+void SPIRVPassConfig::addIRPasses() { TargetPassConfig::addIRPasses(); }
+
+void SPIRVPassConfig::addISelPrepare() {
+ addPass(createSPIRVEmitIntrinsicsPass(&getTM<SPIRVTargetMachine>()));
+ TargetPassConfig::addISelPrepare();
+}
+
+bool SPIRVPassConfig::addIRTranslator() {
+ addPass(new IRTranslator(getOptLevel()));
+ return false;
+}
+
+void SPIRVPassConfig::addPreLegalizeMachineIR() {
+ addPass(createSPIRVPreLegalizerPass());
+}
+
+// Use a default legalizer.
+bool SPIRVPassConfig::addLegalizeMachineIR() {
+ addPass(new Legalizer());
+ return false;
+}
+
+// Do not add a RegBankSelect pass, as we only ever need virtual registers.
+bool SPIRVPassConfig::addRegBankSelect() {
+ disablePass(&RegBankSelect::ID);
+ return false;
+}
+
+namespace {
+// A custom subclass of InstructionSelect, which is mostly the same except from
+// not requiring RegBankSelect to occur previously.
+class SPIRVInstructionSelect : public InstructionSelect {
+ // We don't use register banks, so unset the requirement for them
+ MachineFunctionProperties getRequiredProperties() const override {
+ return InstructionSelect::getRequiredProperties().reset(
+ MachineFunctionProperties::Property::RegBankSelected);
+ }
+};
+} // namespace
+
+bool SPIRVPassConfig::addGlobalInstructionSelect() {
+ addPass(new SPIRVInstructionSelect());
+ return false;
+}
diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.h b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.h
new file mode 100644
index 000000000000..f3597971bc95
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.h
@@ -0,0 +1,47 @@
+//===-- SPIRVTargetMachine.h - Define TargetMachine for SPIR-V -*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SPIR-V specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SPIRV_SPIRVTARGETMACHINE_H
+#define LLVM_LIB_TARGET_SPIRV_SPIRVTARGETMACHINE_H
+
+#include "SPIRVSubtarget.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+class SPIRVTargetMachine : public LLVMTargetMachine {
+ std::unique_ptr<TargetLoweringObjectFile> TLOF;
+ SPIRVSubtarget Subtarget;
+
+public:
+ SPIRVTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
+ Optional<Reloc::Model> RM, Optional<CodeModel::Model> CM,
+ CodeGenOpt::Level OL, bool JIT);
+
+ const SPIRVSubtarget *getSubtargetImpl() const { return &Subtarget; }
+
+ const SPIRVSubtarget *getSubtargetImpl(const Function &) const override {
+ return &Subtarget;
+ }
+
+ TargetTransformInfo getTargetTransformInfo(const Function &F) const override;
+
+ TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+ bool usesPhysRegsForValues() const override { return false; }
+
+ TargetLoweringObjectFile *getObjFileLowering() const override {
+ return TLOF.get();
+ }
+};
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_SPIRV_SPIRVTARGETMACHINE_H
diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetObjectFile.h b/llvm/lib/Target/SPIRV/SPIRVTargetObjectFile.h
new file mode 100644
index 000000000000..00c456971ef1
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRVTargetObjectFile.h
@@ -0,0 +1,45 @@
+//===-- SPIRVTargetObjectFile.h - SPIRV Object Info -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SPIRV_SPIRVTARGETOBJECTFILE_H
+#define LLVM_LIB_TARGET_SPIRV_SPIRVTARGETOBJECTFILE_H
+
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/SectionKind.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+
+namespace llvm {
+
+class SPIRVTargetObjectFile : public TargetLoweringObjectFile {
+public:
+ ~SPIRVTargetObjectFile() override;
+
+ void Initialize(MCContext &ctx, const TargetMachine &TM) override {
+ TargetLoweringObjectFile::Initialize(ctx, TM);
+ }
+ // All words in a SPIR-V module (excepting the first 5 ones) are a linear
+ // sequence of instructions in a specific order. We put all the instructions
+ // in the single text section.
+ MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
+ const Constant *C,
+ Align &Alignment) const override {
+ return TextSection;
+ }
+ MCSection *getExplicitSectionGlobal(const GlobalObject *GO, SectionKind Kind,
+ const TargetMachine &TM) const override {
+ return TextSection;
+ }
+ MCSection *SelectSectionForGlobal(const GlobalObject *GO, SectionKind Kind,
+ const TargetMachine &TM) const override {
+ return TextSection;
+ }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_SPIRV_SPIRVTARGETOBJECTFILE_H
diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h b/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h
new file mode 100644
index 000000000000..ac351cf42f5c
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h
@@ -0,0 +1,44 @@
+//===- SPIRVTargetTransformInfo.h - SPIR-V specific TTI ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// \file
+// This file contains a TargetTransformInfo::Concept conforming object specific
+// to the SPIRV target machine. It uses the target's detailed information to
+// provide more precise answers to certain TTI queries, while letting the
+// target independent and default TTI implementations handle the rest.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SPIRV_SPIRVTARGETTRANSFORMINFO_H
+#define LLVM_LIB_TARGET_SPIRV_SPIRVTARGETTRANSFORMINFO_H
+
+#include "SPIRV.h"
+#include "SPIRVTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+
+namespace llvm {
+class SPIRVTTIImpl : public BasicTTIImplBase<SPIRVTTIImpl> {
+ using BaseT = BasicTTIImplBase<SPIRVTTIImpl>;
+
+ friend BaseT;
+
+ const SPIRVSubtarget *ST;
+ const SPIRVTargetLowering *TLI;
+
+ const TargetSubtargetInfo *getST() const { return ST; }
+ const SPIRVTargetLowering *getTLI() const { return TLI; }
+
+public:
+ explicit SPIRVTTIImpl(const SPIRVTargetMachine *TM, const Function &F)
+ : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
+ TLI(ST->getTargetLowering()) {}
+};
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_SPIRV_SPIRVTARGETTRANSFORMINFO_H
diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
new file mode 100644
index 000000000000..b92dc12735f8
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
@@ -0,0 +1,207 @@
+//===--- SPIRVUtils.cpp ---- SPIR-V Utility Functions -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains miscellaneous utility functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPIRVUtils.h"
+#include "MCTargetDesc/SPIRVBaseInfo.h"
+#include "SPIRV.h"
+#include "SPIRVInstrInfo.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/IR/IntrinsicsSPIRV.h"
+
+using namespace llvm;
+
+// The following functions are used to add these string literals as a series of
+// 32-bit integer operands with the correct format, and unpack them if necessary
+// when making string comparisons in compiler passes.
+// SPIR-V requires null-terminated UTF-8 strings padded to 32-bit alignment.
+static uint32_t convertCharsToWord(const StringRef &Str, unsigned i) {
+ uint32_t Word = 0u; // Build up this 32-bit word from 4 8-bit chars.
+ for (unsigned WordIndex = 0; WordIndex < 4; ++WordIndex) {
+ unsigned StrIndex = i + WordIndex;
+ uint8_t CharToAdd = 0; // Initilize char as padding/null.
+ if (StrIndex < Str.size()) { // If it's within the string, get a real char.
+ CharToAdd = Str[StrIndex];
+ }
+ Word |= (CharToAdd << (WordIndex * 8));
+ }
+ return Word;
+}
+
+// Get length including padding and null terminator.
+static size_t getPaddedLen(const StringRef &Str) {
+ const size_t Len = Str.size() + 1;
+ return (Len % 4 == 0) ? Len : Len + (4 - (Len % 4));
+}
+
+void addStringImm(const StringRef &Str, MachineInstrBuilder &MIB) {
+ const size_t PaddedLen = getPaddedLen(Str);
+ for (unsigned i = 0; i < PaddedLen; i += 4) {
+ // Add an operand for the 32-bits of chars or padding.
+ MIB.addImm(convertCharsToWord(Str, i));
+ }
+}
+
+void addStringImm(const StringRef &Str, IRBuilder<> &B,
+ std::vector<Value *> &Args) {
+ const size_t PaddedLen = getPaddedLen(Str);
+ for (unsigned i = 0; i < PaddedLen; i += 4) {
+ // Add a vector element for the 32-bits of chars or padding.
+ Args.push_back(B.getInt32(convertCharsToWord(Str, i)));
+ }
+}
+
+std::string getStringImm(const MachineInstr &MI, unsigned StartIndex) {
+ return getSPIRVStringOperand(MI, StartIndex);
+}
+
+void addNumImm(const APInt &Imm, MachineInstrBuilder &MIB) {
+ const auto Bitwidth = Imm.getBitWidth();
+ switch (Bitwidth) {
+ case 1:
+ break; // Already handled.
+ case 8:
+ case 16:
+ case 32:
+ MIB.addImm(Imm.getZExtValue());
+ break;
+ case 64: {
+ uint64_t FullImm = Imm.getZExtValue();
+ uint32_t LowBits = FullImm & 0xffffffff;
+ uint32_t HighBits = (FullImm >> 32) & 0xffffffff;
+ MIB.addImm(LowBits).addImm(HighBits);
+ break;
+ }
+ default:
+ report_fatal_error("Unsupported constant bitwidth");
+ }
+}
+
+void buildOpName(Register Target, const StringRef &Name,
+ MachineIRBuilder &MIRBuilder) {
+ if (!Name.empty()) {
+ auto MIB = MIRBuilder.buildInstr(SPIRV::OpName).addUse(Target);
+ addStringImm(Name, MIB);
+ }
+}
+
+static void finishBuildOpDecorate(MachineInstrBuilder &MIB,
+ const std::vector<uint32_t> &DecArgs,
+ StringRef StrImm) {
+ if (!StrImm.empty())
+ addStringImm(StrImm, MIB);
+ for (const auto &DecArg : DecArgs)
+ MIB.addImm(DecArg);
+}
+
+void buildOpDecorate(Register Reg, MachineIRBuilder &MIRBuilder,
+ llvm::SPIRV::Decoration Dec,
+ const std::vector<uint32_t> &DecArgs, StringRef StrImm) {
+ auto MIB = MIRBuilder.buildInstr(SPIRV::OpDecorate)
+ .addUse(Reg)
+ .addImm(static_cast<uint32_t>(Dec));
+ finishBuildOpDecorate(MIB, DecArgs, StrImm);
+}
+
+void buildOpDecorate(Register Reg, MachineInstr &I, const SPIRVInstrInfo &TII,
+ llvm::SPIRV::Decoration Dec,
+ const std::vector<uint32_t> &DecArgs, StringRef StrImm) {
+ MachineBasicBlock &MBB = *I.getParent();
+ auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(SPIRV::OpDecorate))
+ .addUse(Reg)
+ .addImm(static_cast<uint32_t>(Dec));
+ finishBuildOpDecorate(MIB, DecArgs, StrImm);
+}
+
+// TODO: maybe the following two functions should be handled in the subtarget
+// to allow for different OpenCL vs Vulkan handling.
+unsigned storageClassToAddressSpace(SPIRV::StorageClass SC) {
+ switch (SC) {
+ case SPIRV::StorageClass::Function:
+ return 0;
+ case SPIRV::StorageClass::CrossWorkgroup:
+ return 1;
+ case SPIRV::StorageClass::UniformConstant:
+ return 2;
+ case SPIRV::StorageClass::Workgroup:
+ return 3;
+ case SPIRV::StorageClass::Generic:
+ return 4;
+ case SPIRV::StorageClass::Input:
+ return 7;
+ default:
+ llvm_unreachable("Unable to get address space id");
+ }
+}
+
+SPIRV::StorageClass addressSpaceToStorageClass(unsigned AddrSpace) {
+ switch (AddrSpace) {
+ case 0:
+ return SPIRV::StorageClass::Function;
+ case 1:
+ return SPIRV::StorageClass::CrossWorkgroup;
+ case 2:
+ return SPIRV::StorageClass::UniformConstant;
+ case 3:
+ return SPIRV::StorageClass::Workgroup;
+ case 4:
+ return SPIRV::StorageClass::Generic;
+ case 7:
+ return SPIRV::StorageClass::Input;
+ default:
+ llvm_unreachable("Unknown address space");
+ }
+}
+
+SPIRV::MemorySemantics getMemSemanticsForStorageClass(SPIRV::StorageClass SC) {
+ switch (SC) {
+ case SPIRV::StorageClass::StorageBuffer:
+ case SPIRV::StorageClass::Uniform:
+ return SPIRV::MemorySemantics::UniformMemory;
+ case SPIRV::StorageClass::Workgroup:
+ return SPIRV::MemorySemantics::WorkgroupMemory;
+ case SPIRV::StorageClass::CrossWorkgroup:
+ return SPIRV::MemorySemantics::CrossWorkgroupMemory;
+ case SPIRV::StorageClass::AtomicCounter:
+ return SPIRV::MemorySemantics::AtomicCounterMemory;
+ case SPIRV::StorageClass::Image:
+ return SPIRV::MemorySemantics::ImageMemory;
+ default:
+ return SPIRV::MemorySemantics::None;
+ }
+}
+
+MachineInstr *getDefInstrMaybeConstant(Register &ConstReg,
+ const MachineRegisterInfo *MRI) {
+ MachineInstr *ConstInstr = MRI->getVRegDef(ConstReg);
+ if (ConstInstr->getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS &&
+ ConstInstr->getIntrinsicID() == Intrinsic::spv_track_constant) {
+ ConstReg = ConstInstr->getOperand(2).getReg();
+ ConstInstr = MRI->getVRegDef(ConstReg);
+ } else if (ConstInstr->getOpcode() == SPIRV::ASSIGN_TYPE) {
+ ConstReg = ConstInstr->getOperand(1).getReg();
+ ConstInstr = MRI->getVRegDef(ConstReg);
+ }
+ return ConstInstr;
+}
+
+uint64_t getIConstVal(Register ConstReg, const MachineRegisterInfo *MRI) {
+ const MachineInstr *MI = getDefInstrMaybeConstant(ConstReg, MRI);
+ assert(MI && MI->getOpcode() == TargetOpcode::G_CONSTANT);
+ return MI->getOperand(1).getCImm()->getValue().getZExtValue();
+}
+
+Type *getMDOperandAsType(const MDNode *N, unsigned I) {
+ return cast<ValueAsMetadata>(N->getOperand(I))->getType();
+}
diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.h b/llvm/lib/Target/SPIRV/SPIRVUtils.h
new file mode 100644
index 000000000000..ffa82c9c1fe4
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRVUtils.h
@@ -0,0 +1,83 @@
+//===--- SPIRVUtils.h ---- SPIR-V Utility Functions -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains miscellaneous utility functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SPIRV_SPIRVUTILS_H
+#define LLVM_LIB_TARGET_SPIRV_SPIRVUTILS_H
+
+#include "MCTargetDesc/SPIRVBaseInfo.h"
+#include "llvm/IR/IRBuilder.h"
+#include <string>
+
+namespace llvm {
+class MCInst;
+class MachineFunction;
+class MachineInstr;
+class MachineInstrBuilder;
+class MachineIRBuilder;
+class MachineRegisterInfo;
+class Register;
+class StringRef;
+class SPIRVInstrInfo;
+} // namespace llvm
+
+// Add the given string as a series of integer operand, inserting null
+// terminators and padding to make sure the operands all have 32-bit
+// little-endian words.
+void addStringImm(const llvm::StringRef &Str, llvm::MachineInstrBuilder &MIB);
+void addStringImm(const llvm::StringRef &Str, llvm::IRBuilder<> &B,
+ std::vector<llvm::Value *> &Args);
+
+// Read the series of integer operands back as a null-terminated string using
+// the reverse of the logic in addStringImm.
+std::string getStringImm(const llvm::MachineInstr &MI, unsigned StartIndex);
+
+// Add the given numerical immediate to MIB.
+void addNumImm(const llvm::APInt &Imm, llvm::MachineInstrBuilder &MIB);
+
+// Add an OpName instruction for the given target register.
+void buildOpName(llvm::Register Target, const llvm::StringRef &Name,
+ llvm::MachineIRBuilder &MIRBuilder);
+
+// Add an OpDecorate instruction for the given Reg.
+void buildOpDecorate(llvm::Register Reg, llvm::MachineIRBuilder &MIRBuilder,
+ llvm::SPIRV::Decoration Dec,
+ const std::vector<uint32_t> &DecArgs,
+ llvm::StringRef StrImm = "");
+void buildOpDecorate(llvm::Register Reg, llvm::MachineInstr &I,
+ const llvm::SPIRVInstrInfo &TII,
+ llvm::SPIRV::Decoration Dec,
+ const std::vector<uint32_t> &DecArgs,
+ llvm::StringRef StrImm = "");
+
+// Convert a SPIR-V storage class to the corresponding LLVM IR address space.
+unsigned storageClassToAddressSpace(llvm::SPIRV::StorageClass SC);
+
+// Convert an LLVM IR address space to a SPIR-V storage class.
+llvm::SPIRV::StorageClass addressSpaceToStorageClass(unsigned AddrSpace);
+
+llvm::SPIRV::MemorySemantics
+getMemSemanticsForStorageClass(llvm::SPIRV::StorageClass SC);
+
+// Find def instruction for the given ConstReg, walking through
+// spv_track_constant and ASSIGN_TYPE instructions. Updates ConstReg by def
+// of OpConstant instruction.
+llvm::MachineInstr *
+getDefInstrMaybeConstant(llvm::Register &ConstReg,
+ const llvm::MachineRegisterInfo *MRI);
+
+// Get constant integer value of the given ConstReg.
+uint64_t getIConstVal(llvm::Register ConstReg,
+ const llvm::MachineRegisterInfo *MRI);
+
+// Get type of i-th operand of the metadata node.
+llvm::Type *getMDOperandAsType(const llvm::MDNode *N, unsigned I);
+#endif // LLVM_LIB_TARGET_SPIRV_SPIRVUTILS_H
diff --git a/llvm/lib/Target/SPIRV/TargetInfo/SPIRVTargetInfo.cpp b/llvm/lib/Target/SPIRV/TargetInfo/SPIRVTargetInfo.cpp
new file mode 100644
index 000000000000..fb7cab4fe779
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/TargetInfo/SPIRVTargetInfo.cpp
@@ -0,0 +1,28 @@
+//===-- SPIRVTargetInfo.cpp - SPIR-V Target Implementation ----*- C++ -*---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "TargetInfo/SPIRVTargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
+
+using namespace llvm;
+
+Target &llvm::getTheSPIRV32Target() {
+ static Target TheSPIRV32Target;
+ return TheSPIRV32Target;
+}
+Target &llvm::getTheSPIRV64Target() {
+ static Target TheSPIRV64Target;
+ return TheSPIRV64Target;
+}
+
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSPIRVTargetInfo() {
+ RegisterTarget<Triple::spirv32> X(getTheSPIRV32Target(), "spirv32",
+ "SPIR-V 32-bit", "SPIRV");
+ RegisterTarget<Triple::spirv64> Y(getTheSPIRV64Target(), "spirv64",
+ "SPIR-V 64-bit", "SPIRV");
+}
diff --git a/llvm/lib/Target/SPIRV/TargetInfo/SPIRVTargetInfo.h b/llvm/lib/Target/SPIRV/TargetInfo/SPIRVTargetInfo.h
new file mode 100644
index 000000000000..4353258e1d1a
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/TargetInfo/SPIRVTargetInfo.h
@@ -0,0 +1,21 @@
+//===-- SPIRVTargetInfo.h - SPIRV Target Implementation ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SPIRV_TARGETINFO_SPIRVTARGETINFO_H
+#define LLVM_LIB_TARGET_SPIRV_TARGETINFO_SPIRVTARGETINFO_H
+
+namespace llvm {
+
+class Target;
+
+Target &getTheSPIRV32Target();
+Target &getTheSPIRV64Target();
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_SPIRV_TARGETINFO_SPIRVTARGETINFO_H
diff --git a/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
index af3304f0907d..77e9b1d96612 100644
--- a/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
+++ b/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
@@ -16,6 +16,7 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
@@ -54,6 +55,8 @@ class SparcOperand;
class SparcAsmParser : public MCTargetAsmParser {
MCAsmParser &Parser;
+ enum class TailRelocKind { Load_GOT, Add_TLS, Load_TLS, Call_TLS };
+
/// @name Auto-generated Match Functions
/// {
@@ -82,6 +85,9 @@ class SparcAsmParser : public MCTargetAsmParser {
OperandMatchResultTy parseMembarTag(OperandVector &Operands);
+ template <TailRelocKind Kind>
+ OperandMatchResultTy parseTailRelocSym(OperandVector &Operands);
+
template <unsigned N>
OperandMatchResultTy parseShiftAmtImm(OperandVector &Operands);
@@ -112,6 +118,8 @@ class SparcAsmParser : public MCTargetAsmParser {
bool expandSET(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
+ SMLoc getLoc() const { return getParser().getTok().getLoc(); }
+
public:
SparcAsmParser(const MCSubtargetInfo &sti, MCAsmParser &parser,
const MCInstrInfo &MII,
@@ -266,6 +274,7 @@ public:
bool isMEMrr() const { return Kind == k_MemoryReg; }
bool isMEMri() const { return Kind == k_MemoryImm; }
bool isMembarTag() const { return Kind == k_Immediate; }
+ bool isTailRelocSym() const { return Kind == k_Immediate; }
bool isCallTarget() const {
if (!isImm())
@@ -426,6 +435,11 @@ public:
addExpr(Inst, getImm());
}
+ void addTailRelocSymOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
static std::unique_ptr<SparcOperand> CreateToken(StringRef Str, SMLoc S) {
auto Op = std::make_unique<SparcOperand>(k_Token);
Op->Tok.Data = Str.data();
@@ -849,6 +863,97 @@ OperandMatchResultTy SparcAsmParser::parseShiftAmtImm(OperandVector &Operands) {
return MatchOperand_Success;
}
+template <SparcAsmParser::TailRelocKind Kind>
+OperandMatchResultTy
+SparcAsmParser::parseTailRelocSym(OperandVector &Operands) {
+ SMLoc S = getLoc();
+ SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
+
+ auto MatchesKind = [](SparcMCExpr::VariantKind VK) -> bool {
+ switch (Kind) {
+ case TailRelocKind::Load_GOT:
+ // Non-TLS relocations on ld (or ldx).
+ // ld [%rr + %rr], %rr, %rel(sym)
+ return VK == SparcMCExpr::VK_Sparc_GOTDATA_OP;
+ case TailRelocKind::Add_TLS:
+ // TLS relocations on add.
+ // add %rr, %rr, %rr, %rel(sym)
+ switch (VK) {
+ case SparcMCExpr::VK_Sparc_TLS_GD_ADD:
+ case SparcMCExpr::VK_Sparc_TLS_IE_ADD:
+ case SparcMCExpr::VK_Sparc_TLS_LDM_ADD:
+ case SparcMCExpr::VK_Sparc_TLS_LDO_ADD:
+ return true;
+ default:
+ return false;
+ }
+ case TailRelocKind::Load_TLS:
+ // TLS relocations on ld (or ldx).
+ // ld[x] %addr, %rr, %rel(sym)
+ switch (VK) {
+ case SparcMCExpr::VK_Sparc_TLS_IE_LD:
+ case SparcMCExpr::VK_Sparc_TLS_IE_LDX:
+ return true;
+ default:
+ return false;
+ }
+ case TailRelocKind::Call_TLS:
+ // TLS relocations on call.
+ // call sym, %rel(sym)
+ switch (VK) {
+ case SparcMCExpr::VK_Sparc_TLS_GD_CALL:
+ case SparcMCExpr::VK_Sparc_TLS_LDM_CALL:
+ return true;
+ default:
+ return false;
+ }
+ }
+ llvm_unreachable("Unhandled SparcAsmParser::TailRelocKind enum");
+ };
+
+ if (getLexer().getKind() != AsmToken::Percent) {
+ Error(getLoc(), "expected '%' for operand modifier");
+ return MatchOperand_ParseFail;
+ }
+
+ const AsmToken Tok = Parser.getTok();
+ getParser().Lex(); // Eat '%'
+
+ if (getLexer().getKind() != AsmToken::Identifier) {
+ Error(getLoc(), "expected valid identifier for operand modifier");
+ return MatchOperand_ParseFail;
+ }
+
+ StringRef Name = getParser().getTok().getIdentifier();
+ SparcMCExpr::VariantKind VK = SparcMCExpr::parseVariantKind(Name);
+ if (VK == SparcMCExpr::VK_Sparc_None) {
+ Error(getLoc(), "invalid operand modifier");
+ return MatchOperand_ParseFail;
+ }
+
+ if (!MatchesKind(VK)) {
+ // Did not match the specified set of relocation types, put '%' back.
+ getLexer().UnLex(Tok);
+ return MatchOperand_NoMatch;
+ }
+
+ Parser.Lex(); // Eat the identifier.
+ if (getLexer().getKind() != AsmToken::LParen) {
+ Error(getLoc(), "expected '('");
+ return MatchOperand_ParseFail;
+ }
+
+ getParser().Lex(); // Eat '('
+ const MCExpr *SubExpr;
+ if (getParser().parseParenExpression(SubExpr, E)) {
+ return MatchOperand_ParseFail;
+ }
+
+ const MCExpr *Val = adjustPICRelocation(VK, SubExpr);
+ Operands.push_back(SparcOperand::CreateImm(Val, S, E));
+ return MatchOperand_Success;
+}
+
OperandMatchResultTy SparcAsmParser::parseMembarTag(OperandVector &Operands) {
SMLoc S = Parser.getTok().getLoc();
const MCExpr *EVal;
@@ -1408,10 +1513,27 @@ bool SparcAsmParser::matchSparcAsmModifiers(const MCExpr *&EVal,
StringRef name = Tok.getString();
SparcMCExpr::VariantKind VK = SparcMCExpr::parseVariantKind(name);
+ switch (VK) {
+ case SparcMCExpr::VK_Sparc_None:
+ Error(getLoc(), "invalid operand modifier");
+ return false;
- if (VK == SparcMCExpr::VK_Sparc_None)
+ case SparcMCExpr::VK_Sparc_GOTDATA_OP:
+ case SparcMCExpr::VK_Sparc_TLS_GD_ADD:
+ case SparcMCExpr::VK_Sparc_TLS_GD_CALL:
+ case SparcMCExpr::VK_Sparc_TLS_IE_ADD:
+ case SparcMCExpr::VK_Sparc_TLS_IE_LD:
+ case SparcMCExpr::VK_Sparc_TLS_IE_LDX:
+ case SparcMCExpr::VK_Sparc_TLS_LDM_ADD:
+ case SparcMCExpr::VK_Sparc_TLS_LDM_CALL:
+ case SparcMCExpr::VK_Sparc_TLS_LDO_ADD:
+ // These are special-cased at tablegen level.
return false;
+ default:
+ break;
+ }
+
Parser.Lex(); // Eat the identifier.
if (Parser.getTok().getKind() != AsmToken::LParen)
return false;
diff --git a/llvm/lib/Target/Sparc/DelaySlotFiller.cpp b/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
index 259b37954183..cc132d46de85 100644
--- a/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -174,17 +174,20 @@ Filler::findDelayInstr(MachineBasicBlock &MBB,
if (slot == MBB.begin())
return MBB.end();
- if (slot->getOpcode() == SP::RET || slot->getOpcode() == SP::TLS_CALL)
+ unsigned Opc = slot->getOpcode();
+
+ if (Opc == SP::RET || Opc == SP::TLS_CALL)
return MBB.end();
- if (slot->getOpcode() == SP::RETL) {
+ if (Opc == SP::RETL || Opc == SP::TAIL_CALL || Opc == SP::TAIL_CALLri) {
MachineBasicBlock::iterator J = slot;
--J;
if (J->getOpcode() == SP::RESTORErr
|| J->getOpcode() == SP::RESTOREri) {
// change retl to ret.
- slot->setDesc(Subtarget->getInstrInfo()->get(SP::RET));
+ if (Opc == SP::RETL)
+ slot->setDesc(Subtarget->getInstrInfo()->get(SP::RET));
return J;
}
}
@@ -360,6 +363,8 @@ bool Filler::needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize)
case SP::CALLrr:
case SP::CALLri: structSizeOpNum = 2; break;
case SP::TLS_CALL: return false;
+ case SP::TAIL_CALLri:
+ case SP::TAIL_CALL: return false;
}
const MachineOperand &MO = I->getOperand(structSizeOpNum);
diff --git a/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
index 142124a8e0d9..1825b95dd6ac 100644
--- a/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
+++ b/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
@@ -14,8 +14,8 @@
#include "TargetInfo/SparcTargetInfo.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDecoderOps.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
-#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/TargetRegistry.h"
@@ -32,7 +32,7 @@ class SparcDisassembler : public MCDisassembler {
public:
SparcDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
: MCDisassembler(STI, Ctx) {}
- virtual ~SparcDisassembler() {}
+ virtual ~SparcDisassembler() = default;
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
@@ -142,10 +142,9 @@ static const uint16_t CPPairDecoderTable[] = {
SP::C24_C25, SP::C26_C27, SP::C28_C29, SP::C30_C31
};
-static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 31)
return MCDisassembler::Fail;
unsigned Reg = IntRegDecoderTable[RegNo];
@@ -153,10 +152,9 @@ static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeI64RegsRegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeI64RegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 31)
return MCDisassembler::Fail;
unsigned Reg = IntRegDecoderTable[RegNo];
@@ -164,11 +162,9 @@ static DecodeStatus DecodeI64RegsRegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
-
-static DecodeStatus DecodeFPRegsRegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeFPRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 31)
return MCDisassembler::Fail;
unsigned Reg = FPRegDecoderTable[RegNo];
@@ -176,11 +172,9 @@ static DecodeStatus DecodeFPRegsRegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
-
-static DecodeStatus DecodeDFPRegsRegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeDFPRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 31)
return MCDisassembler::Fail;
unsigned Reg = DFPRegDecoderTable[RegNo];
@@ -188,11 +182,9 @@ static DecodeStatus DecodeDFPRegsRegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
-
-static DecodeStatus DecodeQFPRegsRegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeQFPRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 31)
return MCDisassembler::Fail;
@@ -203,10 +195,9 @@ static DecodeStatus DecodeQFPRegsRegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeCPRegsRegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeCPRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if (RegNo > 31)
return MCDisassembler::Fail;
unsigned Reg = CPRegDecoderTable[RegNo];
@@ -216,7 +207,7 @@ static DecodeStatus DecodeCPRegsRegisterClass(MCInst &Inst,
static DecodeStatus DecodeFCCRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 3)
return MCDisassembler::Fail;
Inst.addOperand(MCOperand::createReg(FCCRegDecoderTable[RegNo]));
@@ -225,7 +216,7 @@ static DecodeStatus DecodeFCCRegsRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeASRRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 31)
return MCDisassembler::Fail;
Inst.addOperand(MCOperand::createReg(ASRRegDecoderTable[RegNo]));
@@ -233,8 +224,8 @@ static DecodeStatus DecodeASRRegsRegisterClass(MCInst &Inst, unsigned RegNo,
}
static DecodeStatus DecodePRRegsRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address,
- const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if (RegNo >= array_lengthof(PRRegDecoderTable))
return MCDisassembler::Fail;
Inst.addOperand(MCOperand::createReg(PRRegDecoderTable[RegNo]));
@@ -242,7 +233,8 @@ static DecodeStatus DecodePRRegsRegisterClass(MCInst &Inst, unsigned RegNo,
}
static DecodeStatus DecodeIntPairRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;
if (RegNo > 31)
@@ -257,7 +249,8 @@ static DecodeStatus DecodeIntPairRegisterClass(MCInst &Inst, unsigned RegNo,
}
static DecodeStatus DecodeCPPairRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if (RegNo > 31)
return MCDisassembler::Fail;
@@ -267,45 +260,52 @@ static DecodeStatus DecodeCPPairRegisterClass(MCInst &Inst, unsigned RegNo,
}
static DecodeStatus DecodeLoadInt(MCInst &Inst, unsigned insn, uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeLoadIntPair(MCInst &Inst, unsigned insn, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeLoadIntPair(MCInst &Inst, unsigned insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeLoadFP(MCInst &Inst, unsigned insn, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeLoadDFP(MCInst &Inst, unsigned insn, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeLoadQFP(MCInst &Inst, unsigned insn, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeLoadCP(MCInst &Inst, unsigned insn, uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeLoadCPPair(MCInst &Inst, unsigned insn, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeLoadCPPair(MCInst &Inst, unsigned insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeStoreInt(MCInst &Inst, unsigned insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeStoreIntPair(MCInst &Inst, unsigned insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeStoreFP(MCInst &Inst, unsigned insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeStoreFP(MCInst &Inst, unsigned insn, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeStoreDFP(MCInst &Inst, unsigned insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeStoreQFP(MCInst &Inst, unsigned insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeStoreCP(MCInst &Inst, unsigned insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeStoreCP(MCInst &Inst, unsigned insn, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeStoreCPPair(MCInst &Inst, unsigned insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeCall(MCInst &Inst, unsigned insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeSIMM13(MCInst &Inst, unsigned insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeCall(MCInst &Inst, unsigned insn, uint64_t Address,
+ const MCDisassembler *Decoder);
+static DecodeStatus DecodeSIMM13(MCInst &Inst, unsigned insn, uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeJMPL(MCInst &Inst, unsigned insn, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeReturn(MCInst &MI, unsigned insn, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeSWAP(MCInst &Inst, unsigned insn, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeTRAP(MCInst &Inst, unsigned insn, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
#include "SparcGenDisassemblerTables.inc"
@@ -363,13 +363,12 @@ DecodeStatus SparcDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
return MCDisassembler::Fail;
}
-
typedef DecodeStatus (*DecodeFunc)(MCInst &MI, unsigned insn, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeMem(MCInst &MI, unsigned insn, uint64_t Address,
- const void *Decoder,
- bool isLoad, DecodeFunc DecodeRD) {
+ const MCDisassembler *Decoder, bool isLoad,
+ DecodeFunc DecodeRD) {
unsigned rd = fieldFromInstruction(insn, 25, 5);
unsigned rs1 = fieldFromInstruction(insn, 14, 5);
bool isImm = fieldFromInstruction(insn, 13, 1);
@@ -415,100 +414,106 @@ static DecodeStatus DecodeMem(MCInst &MI, unsigned insn, uint64_t Address,
}
static DecodeStatus DecodeLoadInt(MCInst &Inst, unsigned insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return DecodeMem(Inst, insn, Address, Decoder, true,
DecodeIntRegsRegisterClass);
}
-static DecodeStatus DecodeLoadIntPair(MCInst &Inst, unsigned insn, uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeLoadIntPair(MCInst &Inst, unsigned insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
return DecodeMem(Inst, insn, Address, Decoder, true,
DecodeIntPairRegisterClass);
}
static DecodeStatus DecodeLoadFP(MCInst &Inst, unsigned insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return DecodeMem(Inst, insn, Address, Decoder, true,
DecodeFPRegsRegisterClass);
}
static DecodeStatus DecodeLoadDFP(MCInst &Inst, unsigned insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return DecodeMem(Inst, insn, Address, Decoder, true,
DecodeDFPRegsRegisterClass);
}
static DecodeStatus DecodeLoadQFP(MCInst &Inst, unsigned insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return DecodeMem(Inst, insn, Address, Decoder, true,
DecodeQFPRegsRegisterClass);
}
static DecodeStatus DecodeLoadCP(MCInst &Inst, unsigned insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return DecodeMem(Inst, insn, Address, Decoder, true,
DecodeCPRegsRegisterClass);
}
-static DecodeStatus DecodeLoadCPPair(MCInst &Inst, unsigned insn, uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeLoadCPPair(MCInst &Inst, unsigned insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
return DecodeMem(Inst, insn, Address, Decoder, true,
DecodeCPPairRegisterClass);
}
static DecodeStatus DecodeStoreInt(MCInst &Inst, unsigned insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
return DecodeMem(Inst, insn, Address, Decoder, false,
DecodeIntRegsRegisterClass);
}
static DecodeStatus DecodeStoreIntPair(MCInst &Inst, unsigned insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
return DecodeMem(Inst, insn, Address, Decoder, false,
DecodeIntPairRegisterClass);
}
static DecodeStatus DecodeStoreFP(MCInst &Inst, unsigned insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return DecodeMem(Inst, insn, Address, Decoder, false,
DecodeFPRegsRegisterClass);
}
static DecodeStatus DecodeStoreDFP(MCInst &Inst, unsigned insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
return DecodeMem(Inst, insn, Address, Decoder, false,
DecodeDFPRegsRegisterClass);
}
static DecodeStatus DecodeStoreQFP(MCInst &Inst, unsigned insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
return DecodeMem(Inst, insn, Address, Decoder, false,
DecodeQFPRegsRegisterClass);
}
-static DecodeStatus DecodeStoreCP(MCInst &Inst, unsigned insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeStoreCP(MCInst &Inst, unsigned insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
return DecodeMem(Inst, insn, Address, Decoder, false,
DecodeCPRegsRegisterClass);
}
static DecodeStatus DecodeStoreCPPair(MCInst &Inst, unsigned insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
return DecodeMem(Inst, insn, Address, Decoder, false,
DecodeCPPairRegisterClass);
}
-static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch,
+static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch,
uint64_t Address, uint64_t Offset,
uint64_t Width, MCInst &MI,
- const void *Decoder) {
- const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
- return Dis->tryAddingSymbolicOperand(MI, Value, Address, isBranch,
- Offset, Width);
+ const MCDisassembler *Decoder) {
+ return Decoder->tryAddingSymbolicOperand(MI, Value, Address, isBranch, Offset,
+ Width, /*InstSize=*/4);
}
-static DecodeStatus DecodeCall(MCInst &MI, unsigned insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeCall(MCInst &MI, unsigned insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned tgt = fieldFromInstruction(insn, 0, 30);
tgt <<= 2;
if (!tryAddingSymbolicOperand(tgt+Address, false, Address,
@@ -517,15 +522,15 @@ static DecodeStatus DecodeCall(MCInst &MI, unsigned insn,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeSIMM13(MCInst &MI, unsigned insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeSIMM13(MCInst &MI, unsigned insn, uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned tgt = SignExtend32<13>(fieldFromInstruction(insn, 0, 13));
MI.addOperand(MCOperand::createImm(tgt));
return MCDisassembler::Success;
}
static DecodeStatus DecodeJMPL(MCInst &MI, unsigned insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
unsigned rd = fieldFromInstruction(insn, 25, 5);
unsigned rs1 = fieldFromInstruction(insn, 14, 5);
@@ -559,7 +564,7 @@ static DecodeStatus DecodeJMPL(MCInst &MI, unsigned insn, uint64_t Address,
}
static DecodeStatus DecodeReturn(MCInst &MI, unsigned insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
unsigned rs1 = fieldFromInstruction(insn, 14, 5);
unsigned isImm = fieldFromInstruction(insn, 13, 1);
@@ -587,7 +592,7 @@ static DecodeStatus DecodeReturn(MCInst &MI, unsigned insn, uint64_t Address,
}
static DecodeStatus DecodeSWAP(MCInst &MI, unsigned insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
unsigned rd = fieldFromInstruction(insn, 25, 5);
unsigned rs1 = fieldFromInstruction(insn, 14, 5);
@@ -627,7 +632,7 @@ static DecodeStatus DecodeSWAP(MCInst &MI, unsigned insn, uint64_t Address,
}
static DecodeStatus DecodeTRAP(MCInst &MI, unsigned insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
unsigned rs1 = fieldFromInstruction(insn, 14, 5);
unsigned isImm = fieldFromInstruction(insn, 13, 1);
diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
index 4d69040a4508..7b2d8afd3605 100644
--- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
+++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
@@ -47,6 +47,9 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
case Sparc::fixup_sparc_br16_14:
return (Value >> 2) & 0x3fff;
+ case Sparc::fixup_sparc_hix22:
+ return (~Value >> 10) & 0x3fffff;
+
case Sparc::fixup_sparc_pc22:
case Sparc::fixup_sparc_got22:
case Sparc::fixup_sparc_tls_gd_hi22:
@@ -60,6 +63,9 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
case Sparc::fixup_sparc_13:
return Value & 0x1fff;
+ case Sparc::fixup_sparc_lox10:
+ return (Value & 0x3ff) | 0x1c00;
+
case Sparc::fixup_sparc_pc10:
case Sparc::fixup_sparc_got10:
case Sparc::fixup_sparc_tls_gd_lo10:
@@ -98,6 +104,9 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
case Sparc::fixup_sparc_tls_ie_ld:
case Sparc::fixup_sparc_tls_ie_ldx:
case Sparc::fixup_sparc_tls_ie_add:
+ case Sparc::fixup_sparc_gotdata_lox10:
+ case Sparc::fixup_sparc_gotdata_hix22:
+ case Sparc::fixup_sparc_gotdata_op:
return 0;
}
}
@@ -189,7 +198,12 @@ namespace {
{ "fixup_sparc_tls_ie_ldx", 0, 0, 0 },
{ "fixup_sparc_tls_ie_add", 0, 0, 0 },
{ "fixup_sparc_tls_le_hix22", 0, 0, 0 },
- { "fixup_sparc_tls_le_lox10", 0, 0, 0 }
+ { "fixup_sparc_tls_le_lox10", 0, 0, 0 },
+ { "fixup_sparc_hix22", 10, 22, 0 },
+ { "fixup_sparc_lox10", 19, 13, 0 },
+ { "fixup_sparc_gotdata_hix22", 0, 0, 0 },
+ { "fixup_sparc_gotdata_lox10", 0, 0, 0 },
+ { "fixup_sparc_gotdata_op", 0, 0, 0 },
};
const static MCFixupKindInfo InfosLE[Sparc::NumTargetFixupKinds] = {
@@ -231,7 +245,12 @@ namespace {
{ "fixup_sparc_tls_ie_ldx", 0, 0, 0 },
{ "fixup_sparc_tls_ie_add", 0, 0, 0 },
{ "fixup_sparc_tls_le_hix22", 0, 0, 0 },
- { "fixup_sparc_tls_le_lox10", 0, 0, 0 }
+ { "fixup_sparc_tls_le_lox10", 0, 0, 0 },
+ { "fixup_sparc_hix22", 0, 22, 0 },
+ { "fixup_sparc_lox10", 0, 13, 0 },
+ { "fixup_sparc_gotdata_hix22", 0, 0, 0 },
+ { "fixup_sparc_gotdata_lox10", 0, 0, 0 },
+ { "fixup_sparc_gotdata_op", 0, 0, 0 },
};
// Fixup kinds from .reloc directive are like R_SPARC_NONE. They do
diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
index 02261dc5c4cd..9c50c41f6bf2 100644
--- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
+++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
@@ -26,7 +26,7 @@ namespace {
Is64Bit ? ELF::EM_SPARCV9 : ELF::EM_SPARC,
/*HasRelocationAddend*/ true) {}
- ~SparcELFObjectWriter() override {}
+ ~SparcELFObjectWriter() override = default;
protected:
unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
@@ -112,6 +112,11 @@ unsigned SparcELFObjectWriter::getRelocType(MCContext &Ctx,
case Sparc::fixup_sparc_tls_ie_add: return ELF::R_SPARC_TLS_IE_ADD;
case Sparc::fixup_sparc_tls_le_hix22: return ELF::R_SPARC_TLS_LE_HIX22;
case Sparc::fixup_sparc_tls_le_lox10: return ELF::R_SPARC_TLS_LE_LOX10;
+ case Sparc::fixup_sparc_hix22: return ELF::R_SPARC_HIX22;
+ case Sparc::fixup_sparc_lox10: return ELF::R_SPARC_LOX10;
+ case Sparc::fixup_sparc_gotdata_hix22: return ELF::R_SPARC_GOTDATA_HIX22;
+ case Sparc::fixup_sparc_gotdata_lox10: return ELF::R_SPARC_GOTDATA_LOX10;
+ case Sparc::fixup_sparc_gotdata_op: return ELF::R_SPARC_GOTDATA_OP;
}
return ELF::R_SPARC_NONE;
diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h b/llvm/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h
index e0a43095ec0b..701d8513e657 100644
--- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h
+++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h
@@ -95,6 +95,18 @@ namespace llvm {
fixup_sparc_tls_le_hix22,
fixup_sparc_tls_le_lox10,
+ /// 22-bit fixup corresponding to %hix(foo)
+ fixup_sparc_hix22,
+ /// 13-bit fixup corresponding to %lox(foo)
+ fixup_sparc_lox10,
+
+ /// 22-bit fixup corresponding to %gdop_hix22(foo)
+ fixup_sparc_gotdata_hix22,
+ /// 13-bit fixup corresponding to %gdop_lox10(foo)
+ fixup_sparc_gotdata_lox10,
+ /// 32-bit fixup corresponding to %gdop(foo)
+ fixup_sparc_gotdata_op,
+
// Marker
LastTargetFixupKind,
NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
index 9f8522541332..d75d41b35838 100644
--- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
+++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
@@ -104,17 +104,21 @@ void SparcMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
support::endian::write(OS, Bits,
Ctx.getAsmInfo()->isLittleEndian() ? support::little
: support::big);
- unsigned tlsOpNo = 0;
+
+ // Some instructions have phantom operands that only contribute a fixup entry.
+ unsigned SymOpNo = 0;
switch (MI.getOpcode()) {
default: break;
- case SP::TLS_CALL: tlsOpNo = 1; break;
+ case SP::TLS_CALL: SymOpNo = 1; break;
+ case SP::GDOP_LDrr:
+ case SP::GDOP_LDXrr:
case SP::TLS_ADDrr:
case SP::TLS_ADDXrr:
case SP::TLS_LDrr:
- case SP::TLS_LDXrr: tlsOpNo = 3; break;
+ case SP::TLS_LDXrr: SymOpNo = 3; break;
}
- if (tlsOpNo != 0) {
- const MCOperand &MO = MI.getOperand(tlsOpNo);
+ if (SymOpNo != 0) {
+ const MCOperand &MO = MI.getOperand(SymOpNo);
uint64_t op = getMachineOpValue(MI, MO, Fixups, STI);
assert(op == 0 && "Unexpected operand value!");
(void)op; // suppress warning.
@@ -253,7 +257,6 @@ getBranchOnRegTargetOpValue(const MCInst &MI, unsigned OpNo,
#include "SparcGenMCCodeEmitter.inc"
MCCodeEmitter *llvm::createSparcMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx) {
return new SparcMCCodeEmitter(MCII, Ctx);
}
diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
index c2db4526ef66..cc73ea7e6120 100644
--- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
+++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
@@ -17,6 +17,7 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/Support/Casting.h"
using namespace llvm;
@@ -80,6 +81,11 @@ bool SparcMCExpr::printVariantKind(raw_ostream &OS, VariantKind Kind)
case VK_Sparc_TLS_IE_ADD: OS << "%tie_add("; return true;
case VK_Sparc_TLS_LE_HIX22: OS << "%tle_hix22("; return true;
case VK_Sparc_TLS_LE_LOX10: OS << "%tle_lox10("; return true;
+ case VK_Sparc_HIX22: OS << "%hix("; return true;
+ case VK_Sparc_LOX10: OS << "%lox("; return true;
+ case VK_Sparc_GOTDATA_HIX22: OS << "%gdop_hix22("; return true;
+ case VK_Sparc_GOTDATA_LOX10: OS << "%gdop_lox10("; return true;
+ case VK_Sparc_GOTDATA_OP: OS << "%gdop("; return true;
}
llvm_unreachable("Unhandled SparcMCExpr::VariantKind");
}
@@ -119,6 +125,11 @@ SparcMCExpr::VariantKind SparcMCExpr::parseVariantKind(StringRef name)
.Case("tie_add", VK_Sparc_TLS_IE_ADD)
.Case("tle_hix22", VK_Sparc_TLS_LE_HIX22)
.Case("tle_lox10", VK_Sparc_TLS_LE_LOX10)
+ .Case("hix", VK_Sparc_HIX22)
+ .Case("lox", VK_Sparc_LOX10)
+ .Case("gdop_hix22", VK_Sparc_GOTDATA_HIX22)
+ .Case("gdop_lox10", VK_Sparc_GOTDATA_LOX10)
+ .Case("gdop", VK_Sparc_GOTDATA_OP)
.Default(VK_Sparc_None);
}
@@ -159,6 +170,11 @@ Sparc::Fixups SparcMCExpr::getFixupKind(SparcMCExpr::VariantKind Kind) {
case VK_Sparc_TLS_IE_ADD: return Sparc::fixup_sparc_tls_ie_add;
case VK_Sparc_TLS_LE_HIX22: return Sparc::fixup_sparc_tls_le_hix22;
case VK_Sparc_TLS_LE_LOX10: return Sparc::fixup_sparc_tls_le_lox10;
+ case VK_Sparc_HIX22: return Sparc::fixup_sparc_hix22;
+ case VK_Sparc_LOX10: return Sparc::fixup_sparc_lox10;
+ case VK_Sparc_GOTDATA_HIX22: return Sparc::fixup_sparc_gotdata_hix22;
+ case VK_Sparc_GOTDATA_LOX10: return Sparc::fixup_sparc_gotdata_lox10;
+ case VK_Sparc_GOTDATA_OP: return Sparc::fixup_sparc_gotdata_op;
}
}
diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
index 504e959194f5..d98ad26c96a9 100644
--- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
+++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
@@ -58,7 +58,12 @@ public:
VK_Sparc_TLS_IE_LDX,
VK_Sparc_TLS_IE_ADD,
VK_Sparc_TLS_LE_HIX22,
- VK_Sparc_TLS_LE_LOX10
+ VK_Sparc_TLS_LE_LOX10,
+ VK_Sparc_HIX22,
+ VK_Sparc_LOX10,
+ VK_Sparc_GOTDATA_HIX22,
+ VK_Sparc_GOTDATA_LOX10,
+ VK_Sparc_GOTDATA_OP,
};
private:
diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
index f360946b9a79..7ef043d9df40 100644
--- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
+++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
@@ -29,7 +29,6 @@ class MCTargetOptions;
class Target;
MCCodeEmitter *createSparcMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx);
MCAsmBackend *createSparcAsmBackend(const Target &T, const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
diff --git a/llvm/lib/Target/Sparc/SparcCallingConv.td b/llvm/lib/Target/Sparc/SparcCallingConv.td
index db540d6f0c42..e6d23f741ea5 100644
--- a/llvm/lib/Target/Sparc/SparcCallingConv.td
+++ b/llvm/lib/Target/Sparc/SparcCallingConv.td
@@ -134,7 +134,7 @@ def RetCC_Sparc64 : CallingConv<[
// Callee-saved registers are handled by the register window mechanism.
def CSR : CalleeSavedRegs<(add)> {
let OtherPreserved = (add (sequence "I%u", 0, 7),
- (sequence "L%u", 0, 7));
+ (sequence "L%u", 0, 7), O6);
}
// Callee-saved registers for calls with ReturnsTwice attribute.
diff --git a/llvm/lib/Target/Sparc/SparcFrameLowering.cpp b/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
index a740de9123c9..000418be9a9e 100644
--- a/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -218,8 +218,9 @@ void SparcFrameLowering::emitEpilogue(MachineFunction &MF,
const SparcInstrInfo &TII =
*static_cast<const SparcInstrInfo *>(MF.getSubtarget().getInstrInfo());
DebugLoc dl = MBBI->getDebugLoc();
- assert(MBBI->getOpcode() == SP::RETL &&
- "Can only put epilog before 'retl' instruction!");
+ assert((MBBI->getOpcode() == SP::RETL || MBBI->getOpcode() == SP::TAIL_CALL ||
+ MBBI->getOpcode() == SP::TAIL_CALLri) &&
+ "Can only put epilog before 'retl' or 'tail_call' instruction!");
if (!FuncInfo->isLeafProc()) {
BuildMI(MBB, MBBI, dl, TII.get(SP::RESTORErr), SP::G0).addReg(SP::G0)
.addReg(SP::G0);
@@ -228,10 +229,19 @@ void SparcFrameLowering::emitEpilogue(MachineFunction &MF,
MachineFrameInfo &MFI = MF.getFrameInfo();
int NumBytes = (int) MFI.getStackSize();
- if (NumBytes == 0)
- return;
-
- emitSPAdjustment(MF, MBB, MBBI, NumBytes, SP::ADDrr, SP::ADDri);
+ if (NumBytes != 0)
+ emitSPAdjustment(MF, MBB, MBBI, NumBytes, SP::ADDrr, SP::ADDri);
+
+ // Preserve return address in %o7
+ if (MBBI->getOpcode() == SP::TAIL_CALL) {
+ MBB.addLiveIn(SP::O7);
+ BuildMI(MBB, MBBI, dl, TII.get(SP::ORrr), SP::G1)
+ .addReg(SP::G0)
+ .addReg(SP::O7);
+ BuildMI(MBB, MBBI, dl, TII.get(SP::ORrr), SP::O7)
+ .addReg(SP::G0)
+ .addReg(SP::G1);
+ }
}
bool SparcFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
@@ -316,10 +326,11 @@ bool SparcFrameLowering::isLeafProc(MachineFunction &MF) const
MachineRegisterInfo &MRI = MF.getRegInfo();
MachineFrameInfo &MFI = MF.getFrameInfo();
- return !(MFI.hasCalls() // has calls
- || MRI.isPhysRegUsed(SP::L0) // Too many registers needed
- || MRI.isPhysRegUsed(SP::O6) // %sp is used
- || hasFP(MF)); // need %fp
+ return !(MFI.hasCalls() // has calls
+ || MRI.isPhysRegUsed(SP::L0) // Too many registers needed
+ || MRI.isPhysRegUsed(SP::O6) // %sp is used
+ || hasFP(MF) // need %fp
+ || MF.hasInlineAsm()); // has inline assembly
}
void SparcFrameLowering::remapRegsForLeafProc(MachineFunction &MF) const {
diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index 6d6879bc94b3..2cb74e7709c7 100644
--- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -710,6 +710,36 @@ static bool hasReturnsTwiceAttr(SelectionDAG &DAG, SDValue Callee,
return CalleeFn->hasFnAttribute(Attribute::ReturnsTwice);
}
+/// IsEligibleForTailCallOptimization - Check whether the call is eligible
+/// for tail call optimization.
+bool SparcTargetLowering::IsEligibleForTailCallOptimization(
+ CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF) const {
+
+ auto &Outs = CLI.Outs;
+ auto &Caller = MF.getFunction();
+
+ // Do not tail call opt functions with "disable-tail-calls" attribute.
+ if (Caller.getFnAttribute("disable-tail-calls").getValueAsString() == "true")
+ return false;
+
+ // Do not tail call opt if the stack is used to pass parameters.
+ if (CCInfo.getNextStackOffset() != 0)
+ return false;
+
+ // Do not tail call opt if either the callee or caller returns
+ // a struct and the other does not.
+ if (!Outs.empty() && Caller.hasStructRetAttr() != Outs[0].Flags.isSRet())
+ return false;
+
+ // Byval parameters hand the function a pointer directly into the stack area
+ // we want to reuse during a tail call.
+ for (auto &Arg : Outs)
+ if (Arg.Flags.isByVal())
+ return false;
+
+ return true;
+}
+
// Lower a call for the 32-bit ABI.
SDValue
SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
@@ -725,15 +755,15 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
CallingConv::ID CallConv = CLI.CallConv;
bool isVarArg = CLI.IsVarArg;
- // Sparc target does not yet support tail call optimization.
- isTailCall = false;
-
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
*DAG.getContext());
CCInfo.AnalyzeCallOperands(Outs, CC_Sparc32);
+ isTailCall = isTailCall && IsEligibleForTailCallOptimization(
+ CCInfo, CLI, DAG.getMachineFunction());
+
// Get the size of the outgoing arguments stack space requirement.
unsigned ArgsSize = CCInfo.getNextStackOffset();
@@ -771,7 +801,10 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
}
}
- Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, dl);
+ assert(!isTailCall || ArgsSize == 0);
+
+ if (!isTailCall)
+ Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, dl);
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
@@ -816,6 +849,10 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
if (Flags.isSRet()) {
assert(VA.needsCustom());
+
+ if (isTailCall)
+ continue;
+
// store SRet argument in %sp+64
SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
SDValue PtrOff = DAG.getIntPtrConstant(64, dl);
@@ -825,9 +862,8 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
hasStructRetAttr = true;
// sret only allowed on first argument
assert(Outs[realArgIdx].OrigArgIndex == 0);
- PointerType *Ty = cast<PointerType>(CLI.getArgs()[0].Ty);
- Type *ElementTy = Ty->getPointerElementType();
- SRetArgSize = DAG.getDataLayout().getTypeAllocSize(ElementTy);
+ SRetArgSize =
+ DAG.getDataLayout().getTypeAllocSize(CLI.getArgs()[0].IndirectType);
continue;
}
@@ -929,7 +965,9 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
// stuck together.
SDValue InFlag;
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Register Reg = toCallerWindow(RegsToPass[i].first);
+ Register Reg = RegsToPass[i].first;
+ if (!isTailCall)
+ Reg = toCallerWindow(Reg);
Chain = DAG.getCopyToReg(Chain, dl, Reg, RegsToPass[i].second, InFlag);
InFlag = Chain.getValue(1);
}
@@ -953,9 +991,12 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
Ops.push_back(Callee);
if (hasStructRetAttr)
Ops.push_back(DAG.getTargetConstant(SRetArgSize, dl, MVT::i32));
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
- Ops.push_back(DAG.getRegister(toCallerWindow(RegsToPass[i].first),
- RegsToPass[i].second.getValueType()));
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Register Reg = RegsToPass[i].first;
+ if (!isTailCall)
+ Reg = toCallerWindow(Reg);
+ Ops.push_back(DAG.getRegister(Reg, RegsToPass[i].second.getValueType()));
+ }
// Add a register mask operand representing the call-preserved registers.
const SparcRegisterInfo *TRI = Subtarget->getRegisterInfo();
@@ -969,6 +1010,11 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
if (InFlag.getNode())
Ops.push_back(InFlag);
+ if (isTailCall) {
+ DAG.getMachineFunction().getFrameInfo().setHasTailCall();
+ return DAG.getNode(SPISD::TAIL_CALL, dl, MVT::Other, Ops);
+ }
+
Chain = DAG.getNode(SPISD::CALL, dl, NodeTys, Ops);
InFlag = Chain.getValue(1);
@@ -1408,7 +1454,7 @@ static SPCC::CondCodes FPCondCCodeToFCC(ISD::CondCode CC) {
SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
const SparcSubtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
- MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize(0));
+ MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
// Instructions which use registers as conditionals examine all the
// bits (as does the pseudo SELECT_CC expansion). I don't think it
@@ -1853,6 +1899,8 @@ const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const {
case SPISD::TLS_ADD: return "SPISD::TLS_ADD";
case SPISD::TLS_LD: return "SPISD::TLS_LD";
case SPISD::TLS_CALL: return "SPISD::TLS_CALL";
+ case SPISD::TAIL_CALL: return "SPISD::TAIL_CALL";
+ case SPISD::LOAD_GDOP: return "SPISD::LOAD_GDOP";
}
return nullptr;
}
@@ -2178,8 +2226,10 @@ SparcTargetLowering::LowerF128Op(SDValue Op, SelectionDAG &DAG,
RetPtr = DAG.getFrameIndex(RetFI, PtrVT);
Entry.Node = RetPtr;
Entry.Ty = PointerType::getUnqual(RetTy);
- if (!Subtarget->is64Bit())
+ if (!Subtarget->is64Bit()) {
Entry.IsSRet = true;
+ Entry.IndirectType = RetTy;
+ }
Entry.IsReturned = false;
Args.push_back(Entry);
RetTyABI = Type::getVoidTy(*DAG.getContext());
@@ -3126,6 +3176,11 @@ SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case SP::SELECT_CC_DFP_ICC:
case SP::SELECT_CC_QFP_ICC:
return expandSelectCC(MI, BB, SP::BCOND);
+ case SP::SELECT_CC_Int_XCC:
+ case SP::SELECT_CC_FP_XCC:
+ case SP::SELECT_CC_DFP_XCC:
+ case SP::SELECT_CC_QFP_XCC:
+ return expandSelectCC(MI, BB, SP::BPXCC);
case SP::SELECT_CC_Int_FCC:
case SP::SELECT_CC_FP_FCC:
case SP::SELECT_CC_DFP_FCC:
@@ -3276,6 +3331,9 @@ std::pair<unsigned, const TargetRegisterClass *>
SparcTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
StringRef Constraint,
MVT VT) const {
+ if (Constraint.empty())
+ return std::make_pair(0U, nullptr);
+
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'r':
@@ -3304,46 +3362,60 @@ SparcTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
// This will generate an error message
return std::make_pair(0U, nullptr);
}
- } else if (!Constraint.empty() && Constraint.size() <= 5
- && Constraint[0] == '{' && *(Constraint.end()-1) == '}') {
- // constraint = '{r<d>}'
- // Remove the braces from around the name.
- StringRef name(Constraint.data()+1, Constraint.size()-2);
- // Handle register aliases:
- // r0-r7 -> g0-g7
- // r8-r15 -> o0-o7
- // r16-r23 -> l0-l7
- // r24-r31 -> i0-i7
- uint64_t intVal = 0;
- if (name.substr(0, 1).equals("r")
- && !name.substr(1).getAsInteger(10, intVal) && intVal <= 31) {
- const char regTypes[] = { 'g', 'o', 'l', 'i' };
- char regType = regTypes[intVal/8];
- char regIdx = '0' + (intVal % 8);
- char tmp[] = { '{', regType, regIdx, '}', 0 };
- std::string newConstraint = std::string(tmp);
- return TargetLowering::getRegForInlineAsmConstraint(TRI, newConstraint,
- VT);
- }
- if (name.substr(0, 1).equals("f") &&
- !name.substr(1).getAsInteger(10, intVal) && intVal <= 63) {
- std::string newConstraint;
-
- if (VT == MVT::f32 || VT == MVT::Other) {
- newConstraint = "{f" + utostr(intVal) + "}";
- } else if (VT == MVT::f64 && (intVal % 2 == 0)) {
- newConstraint = "{d" + utostr(intVal / 2) + "}";
- } else if (VT == MVT::f128 && (intVal % 4 == 0)) {
- newConstraint = "{q" + utostr(intVal / 4) + "}";
- } else {
- return std::make_pair(0U, nullptr);
- }
- return TargetLowering::getRegForInlineAsmConstraint(TRI, newConstraint,
- VT);
+ }
+
+ if (Constraint.front() != '{')
+ return std::make_pair(0U, nullptr);
+
+ assert(Constraint.back() == '}' && "Not a brace enclosed constraint?");
+ StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
+ if (RegName.empty())
+ return std::make_pair(0U, nullptr);
+
+ unsigned long long RegNo;
+ // Handle numbered register aliases.
+ if (RegName[0] == 'r' &&
+ getAsUnsignedInteger(RegName.begin() + 1, 10, RegNo)) {
+ // r0-r7 -> g0-g7
+ // r8-r15 -> o0-o7
+ // r16-r23 -> l0-l7
+ // r24-r31 -> i0-i7
+ if (RegNo > 31)
+ return std::make_pair(0U, nullptr);
+ const char RegTypes[] = {'g', 'o', 'l', 'i'};
+ char RegType = RegTypes[RegNo / 8];
+ char RegIndex = '0' + (RegNo % 8);
+ char Tmp[] = {'{', RegType, RegIndex, '}', 0};
+ return getRegForInlineAsmConstraint(TRI, Tmp, VT);
+ }
+
+ // Rewrite the fN constraint according to the value type if needed.
+ if (VT != MVT::f32 && VT != MVT::Other && RegName[0] == 'f' &&
+ getAsUnsignedInteger(RegName.begin() + 1, 10, RegNo)) {
+ if (VT == MVT::f64 && (RegNo % 2 == 0)) {
+ return getRegForInlineAsmConstraint(
+ TRI, StringRef("{d" + utostr(RegNo / 2) + "}"), VT);
+ } else if (VT == MVT::f128 && (RegNo % 4 == 0)) {
+ return getRegForInlineAsmConstraint(
+ TRI, StringRef("{q" + utostr(RegNo / 4) + "}"), VT);
+ } else {
+ return std::make_pair(0U, nullptr);
}
}
- return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
+ auto ResultPair =
+ TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
+ if (!ResultPair.second)
+ return std::make_pair(0U, nullptr);
+
+ // Force the use of I64Regs over IntRegs for 64-bit values.
+ if (Subtarget->is64Bit() && VT == MVT::i64) {
+ assert(ResultPair.second == &SP::IntRegsRegClass &&
+ "Unexpected register class");
+ return std::make_pair(ResultPair.first, &SP::I64RegsRegClass);
+ }
+
+ return ResultPair;
}
bool
diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.h b/llvm/lib/Target/Sparc/SparcISelLowering.h
index 5c9703823a64..2768bb20566a 100644
--- a/llvm/lib/Target/Sparc/SparcISelLowering.h
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.h
@@ -44,9 +44,13 @@ namespace llvm {
GLOBAL_BASE_REG, // Global base reg for PIC.
FLUSHW, // FLUSH register windows to stack.
+ TAIL_CALL, // Tail call
+
TLS_ADD, // For Thread Local Storage (TLS).
TLS_LD,
- TLS_CALL
+ TLS_CALL,
+
+ LOAD_GDOP, // Load operation w/ gdop relocation.
};
}
@@ -182,6 +186,10 @@ namespace llvm {
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
+ bool IsEligibleForTailCallOptimization(CCState &CCInfo,
+ CallLoweringInfo &CLI,
+ MachineFunction &MF) const;
+
bool ShouldShrinkFPConstant(EVT VT) const override {
// Do not shrink FP constpool if VT == MVT::f128.
// (ldd, call _Q_fdtoq) is more expensive than two ldds.
diff --git a/llvm/lib/Target/Sparc/SparcInstr64Bit.td b/llvm/lib/Target/Sparc/SparcInstr64Bit.td
index df65c5457c1d..a471d65201c3 100644
--- a/llvm/lib/Target/Sparc/SparcInstr64Bit.td
+++ b/llvm/lib/Target/Sparc/SparcInstr64Bit.td
@@ -163,7 +163,7 @@ defm ADDX : F3_12<"add", 0b000000, add, I64Regs, i64, i64imm>;
defm SUBX : F3_12<"sub", 0b000100, sub, I64Regs, i64, i64imm>;
def TLS_ADDXrr : F3_1<2, 0b000000, (outs I64Regs:$rd),
- (ins I64Regs:$rs1, I64Regs:$rs2, TLSSym:$sym),
+ (ins I64Regs:$rs1, I64Regs:$rs2, TailRelocSymTLSAdd:$sym),
"add $rs1, $rs2, $rd, $sym",
[(set i64:$rd,
(tlsadd i64:$rs1, i64:$rs2, tglobaltlsaddr:$sym))]>;
@@ -238,12 +238,20 @@ let Predicates = [Is64Bit] in {
let DecoderMethod = "DecodeLoadInt" in
defm LDX : Load<"ldx", 0b001011, load, I64Regs, i64>;
-let mayLoad = 1, isAsmParserOnly = 1 in
+let mayLoad = 1, isAsmParserOnly = 1 in {
def TLS_LDXrr : F3_1<3, 0b001011,
- (outs IntRegs:$dst), (ins MEMrr:$addr, TLSSym:$sym),
+ (outs IntRegs:$dst),
+ (ins MEMrr:$addr, TailRelocSymTLSLoad:$sym),
"ldx [$addr], $dst, $sym",
[(set i64:$dst,
(tlsld ADDRrr:$addr, tglobaltlsaddr:$sym))]>;
+ def GDOP_LDXrr : F3_1<3, 0b001011,
+ (outs I64Regs:$dst),
+ (ins MEMrr:$addr, TailRelocSymGOTLoad:$sym),
+ "ldx [$addr], $dst, $sym",
+ [(set i64:$dst,
+ (load_gdop ADDRrr:$addr, tglobaladdr:$sym))]>;
+}
// Extending loads to i64.
def : Pat<(i64 (zextloadi1 ADDRrr:$addr)), (LDUBrr ADDRrr:$addr)>;
@@ -336,6 +344,7 @@ def FMOVD_XCC : F4_3<0b110101, 0b000010, (outs DFPRegs:$rd),
"fmovd$cond %xcc, $rs2, $rd",
[(set f64:$rd,
(SPselectxcc f64:$rs2, f64:$f, imm:$cond))]>;
+let Predicates = [Is64Bit, HasHardQuad] in
def FMOVQ_XCC : F4_3<0b110101, 0b000011, (outs QFPRegs:$rd),
(ins QFPRegs:$rs2, QFPRegs:$f, CCOp:$cond),
"fmovq$cond %xcc, $rs2, $rd",
@@ -436,11 +445,11 @@ def FXTOD : F3_3u<2, 0b110100, 0b010001000,
(outs DFPRegs:$rd), (ins DFPRegs:$rs2),
"fxtod $rs2, $rd",
[(set DFPRegs:$rd, (SPxtof DFPRegs:$rs2))]>;
+let Predicates = [Is64Bit, HasHardQuad] in
def FXTOQ : F3_3u<2, 0b110100, 0b010001100,
(outs QFPRegs:$rd), (ins DFPRegs:$rs2),
"fxtoq $rs2, $rd",
- [(set QFPRegs:$rd, (SPxtof DFPRegs:$rs2))]>,
- Requires<[HasHardQuad]>;
+ [(set QFPRegs:$rd, (SPxtof DFPRegs:$rs2))]>;
def FSTOX : F3_3u<2, 0b110100, 0b010000001,
(outs DFPRegs:$rd), (ins FPRegs:$rs2),
@@ -450,11 +459,11 @@ def FDTOX : F3_3u<2, 0b110100, 0b010000010,
(outs DFPRegs:$rd), (ins DFPRegs:$rs2),
"fdtox $rs2, $rd",
[(set DFPRegs:$rd, (SPftox DFPRegs:$rs2))]>;
+let Predicates = [Is64Bit, HasHardQuad] in
def FQTOX : F3_3u<2, 0b110100, 0b010000011,
(outs DFPRegs:$rd), (ins QFPRegs:$rs2),
"fqtox $rs2, $rd",
- [(set DFPRegs:$rd, (SPftox QFPRegs:$rs2))]>,
- Requires<[HasHardQuad]>;
+ [(set DFPRegs:$rd, (SPftox QFPRegs:$rs2))]>;
} // Predicates = [Is64Bit]
diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.td b/llvm/lib/Target/Sparc/SparcInstrInfo.td
index 5e305fc9df71..481bd7d2f7fa 100644
--- a/llvm/lib/Target/Sparc/SparcInstrInfo.td
+++ b/llvm/lib/Target/Sparc/SparcInstrInfo.td
@@ -147,7 +147,29 @@ def MEMri : Operand<iPTR> {
let ParserMatchClass = SparcMEMriAsmOperand;
}
-def TLSSym : Operand<iPTR>;
+// Represents a tail relocation operand for instructions such as add, ld, call.
+class SparcTailRelocSymAsmOperand<string Kind> : AsmOperandClass {
+ let Name = "TailRelocSym" # Kind;
+ let RenderMethod = "addTailRelocSymOperands";
+ let PredicateMethod = "isTailRelocSym";
+ let ParserMethod = "parseTailRelocSym<TailRelocKind::" # Kind # ">";
+}
+
+def TailRelocSymGOTLoad : Operand<iPTR> {
+ let ParserMatchClass = SparcTailRelocSymAsmOperand<"Load_GOT">;
+}
+
+def TailRelocSymTLSAdd : Operand<iPTR> {
+ let ParserMatchClass = SparcTailRelocSymAsmOperand<"Add_TLS">;
+}
+
+def TailRelocSymTLSLoad : Operand<iPTR> {
+ let ParserMatchClass = SparcTailRelocSymAsmOperand<"Load_TLS">;
+}
+
+def TailRelocSymTLSCall : Operand<iPTR> {
+ let ParserMatchClass = SparcTailRelocSymAsmOperand<"Call_TLS">;
+}
def SparcMembarTagAsmOperand : AsmOperandClass {
let Name = "MembarTag";
@@ -214,6 +236,9 @@ SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
def SDTSPtlsld :
SDTypeProfile<1, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
+def SDTSPloadgdop :
+SDTypeProfile<1, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
+
def SPcmpicc : SDNode<"SPISD::CMPICC", SDTSPcmpicc, [SDNPOutGlue]>;
def SPcmpfcc : SDNode<"SPISD::CMPFCC", SDTSPcmpfcc, [SDNPOutGlue]>;
def SPbricc : SDNode<"SPISD::BRICC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>;
@@ -248,6 +273,10 @@ def call : SDNode<"SPISD::CALL", SDT_SPCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
+def tailcall : SDNode<"SPISD::TAIL_CALL", SDT_SPCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+
def SDT_SPRet : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
def retflag : SDNode<"SPISD::RET_FLAG", SDT_SPRet,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
@@ -261,6 +290,8 @@ def tlscall : SDNode<"SPISD::TLS_CALL", SDT_SPCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
+def load_gdop : SDNode<"SPISD::LOAD_GDOP", SDTSPloadgdop>;
+
def getPCX : Operand<iPTR> {
let PrintMethod = "printGetPCX";
}
@@ -484,6 +515,27 @@ let Uses = [ICC], usesCustomInserter = 1 in {
[(set f128:$dst, (SPselecticc f128:$T, f128:$F, imm:$Cond))]>;
}
+let Uses = [ICC], usesCustomInserter = 1 in {
+ def SELECT_CC_Int_XCC
+ : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_Int_XCC PSEUDO!",
+ [(set i32:$dst, (SPselectxcc i32:$T, i32:$F, imm:$Cond))]>;
+ def SELECT_CC_FP_XCC
+ : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_FP_XCC PSEUDO!",
+ [(set f32:$dst, (SPselectxcc f32:$T, f32:$F, imm:$Cond))]>;
+
+ def SELECT_CC_DFP_XCC
+ : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_DFP_XCC PSEUDO!",
+ [(set f64:$dst, (SPselectxcc f64:$T, f64:$F, imm:$Cond))]>;
+
+ def SELECT_CC_QFP_XCC
+ : Pseudo<(outs QFPRegs:$dst), (ins QFPRegs:$T, QFPRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_QFP_XCC PSEUDO!",
+ [(set f128:$dst, (SPselectxcc f128:$T, f128:$F, imm:$Cond))]>;
+}
+
let usesCustomInserter = 1, Uses = [FCC0] in {
def SELECT_CC_Int_FCC
@@ -562,6 +614,15 @@ let DecoderMethod = "DecodeLoadFP" in
}
}
+let mayLoad = 1, isAsmParserOnly = 1 in {
+ def GDOP_LDrr : F3_1<3, 0b000000,
+ (outs IntRegs:$dst),
+ (ins MEMrr:$addr, TailRelocSymGOTLoad:$sym),
+ "ld [$addr], $dst, $sym",
+ [(set i32:$dst,
+ (load_gdop ADDRrr:$addr, tglobaladdr:$sym))]>;
+}
+
// Section B.4 - Store Integer Instructions, p. 95
let DecoderMethod = "DecodeStoreInt" in {
defm STB : StoreA<"stb", 0b000101, 0b010101, truncstorei8, IntRegs, i32>;
@@ -1344,21 +1405,24 @@ let Defs = [FCC0], rd = 0, isCodeGenOnly = 1 in {
let isAsmParserOnly = 1 in {
def TLS_ADDrr : F3_1<2, 0b000000,
(outs IntRegs:$rd),
- (ins IntRegs:$rs1, IntRegs:$rs2, TLSSym:$sym),
+ (ins IntRegs:$rs1, IntRegs:$rs2, TailRelocSymTLSAdd:$sym),
"add $rs1, $rs2, $rd, $sym",
[(set i32:$rd,
(tlsadd i32:$rs1, i32:$rs2, tglobaltlsaddr:$sym))]>;
-let mayLoad = 1 in
+let mayLoad = 1 in {
def TLS_LDrr : F3_1<3, 0b000000,
- (outs IntRegs:$dst), (ins MEMrr:$addr, TLSSym:$sym),
+ (outs IntRegs:$dst),
+ (ins MEMrr:$addr, TailRelocSymTLSLoad:$sym),
"ld [$addr], $dst, $sym",
[(set i32:$dst,
(tlsld ADDRrr:$addr, tglobaltlsaddr:$sym))]>;
+}
let Uses = [O6], isCall = 1, hasDelaySlot = 1 in
def TLS_CALL : InstSP<(outs),
- (ins calltarget:$disp, TLSSym:$sym, variable_ops),
+ (ins calltarget:$disp, TailRelocSymTLSCall:$sym,
+ variable_ops),
"call $disp, $sym",
[(tlscall texternalsym:$disp, tglobaltlsaddr:$sym)],
IIC_jmp_or_call> {
@@ -1369,6 +1433,31 @@ let Uses = [O6], isCall = 1, hasDelaySlot = 1 in
}
//===----------------------------------------------------------------------===//
+// Instructions for tail calls.
+//===----------------------------------------------------------------------===//
+let isCodeGenOnly = 1, isReturn = 1, hasDelaySlot = 1,
+ isTerminator = 1, isBarrier = 1 in {
+ def TAIL_CALL : InstSP<(outs), (ins calltarget:$disp, variable_ops),
+ "call $disp",
+ [(tailcall tglobaladdr:$disp)]> {
+ bits<30> disp;
+ let op = 1;
+ let Inst{29-0} = disp;
+ }
+}
+
+def : Pat<(tailcall (iPTR texternalsym:$dst)),
+ (TAIL_CALL texternalsym:$dst)>;
+
+let isCodeGenOnly = 1, isReturn = 1, hasDelaySlot = 1, isTerminator = 1,
+ isBarrier = 1, rd = 0 in {
+ def TAIL_CALLri : F3_2<2, 0b111000,
+ (outs), (ins MEMri:$ptr, variable_ops),
+ "jmp $ptr",
+ [(tailcall ADDRri:$ptr)]>;
+}
+
+//===----------------------------------------------------------------------===//
// V9 Instructions
//===----------------------------------------------------------------------===//
@@ -1415,12 +1504,12 @@ let Predicates = [HasV9], Constraints = "$f = $rd" in {
(ins DFPRegs:$rs2, DFPRegs:$f, CCOp:$cond),
"fmovd$cond %icc, $rs2, $rd",
[(set f64:$rd, (SPselecticc f64:$rs2, f64:$f, imm:$cond))]>;
+ let Predicates = [HasV9, HasHardQuad] in
def FMOVQ_ICC
: F4_3<0b110101, 0b000011, (outs QFPRegs:$rd),
(ins QFPRegs:$rs2, QFPRegs:$f, CCOp:$cond),
"fmovq$cond %icc, $rs2, $rd",
- [(set f128:$rd, (SPselecticc f128:$rs2, f128:$f, imm:$cond))]>,
- Requires<[HasHardQuad]>;
+ [(set f128:$rd, (SPselecticc f128:$rs2, f128:$f, imm:$cond))]>;
}
let Uses = [FCC0], intcc = 0, opf_cc = 0b00 in {
@@ -1434,12 +1523,12 @@ let Predicates = [HasV9], Constraints = "$f = $rd" in {
(ins DFPRegs:$rs2, DFPRegs:$f, CCOp:$cond),
"fmovd$cond %fcc0, $rs2, $rd",
[(set f64:$rd, (SPselectfcc f64:$rs2, f64:$f, imm:$cond))]>;
+ let Predicates = [HasV9, HasHardQuad] in
def FMOVQ_FCC
: F4_3<0b110101, 0b000011, (outs QFPRegs:$rd),
(ins QFPRegs:$rs2, QFPRegs:$f, CCOp:$cond),
"fmovq$cond %fcc0, $rs2, $rd",
- [(set f128:$rd, (SPselectfcc f128:$rs2, f128:$f, imm:$cond))]>,
- Requires<[HasHardQuad]>;
+ [(set f128:$rd, (SPselectfcc f128:$rs2, f128:$f, imm:$cond))]>;
}
}
@@ -1449,28 +1538,28 @@ let Predicates = [HasV9] in {
def FMOVD : F3_3u<2, 0b110100, 0b000000010,
(outs DFPRegs:$rd), (ins DFPRegs:$rs2),
"fmovd $rs2, $rd", []>;
+ let Predicates = [HasV9, HasHardQuad] in
def FMOVQ : F3_3u<2, 0b110100, 0b000000011,
(outs QFPRegs:$rd), (ins QFPRegs:$rs2),
- "fmovq $rs2, $rd", []>,
- Requires<[HasHardQuad]>;
+ "fmovq $rs2, $rd", []>;
def FNEGD : F3_3u<2, 0b110100, 0b000000110,
(outs DFPRegs:$rd), (ins DFPRegs:$rs2),
"fnegd $rs2, $rd",
[(set f64:$rd, (fneg f64:$rs2))]>;
+ let Predicates = [HasV9, HasHardQuad] in
def FNEGQ : F3_3u<2, 0b110100, 0b000000111,
(outs QFPRegs:$rd), (ins QFPRegs:$rs2),
"fnegq $rs2, $rd",
- [(set f128:$rd, (fneg f128:$rs2))]>,
- Requires<[HasHardQuad]>;
+ [(set f128:$rd, (fneg f128:$rs2))]>;
def FABSD : F3_3u<2, 0b110100, 0b000001010,
(outs DFPRegs:$rd), (ins DFPRegs:$rs2),
"fabsd $rs2, $rd",
[(set f64:$rd, (fabs f64:$rs2))]>;
+ let Predicates = [HasV9, HasHardQuad] in
def FABSQ : F3_3u<2, 0b110100, 0b000001011,
(outs QFPRegs:$rd), (ins QFPRegs:$rs2),
"fabsq $rs2, $rd",
- [(set f128:$rd, (fabs f128:$rs2))]>,
- Requires<[HasHardQuad]>;
+ [(set f128:$rd, (fabs f128:$rs2))]>;
}
// Floating-point compare instruction with %fcc0-%fcc3.
@@ -1517,11 +1606,11 @@ let Predicates = [HasV9] in {
: F4_3<0b110101, 0b000010, (outs DFPRegs:$rd),
(ins FCCRegs:$opf_cc, DFPRegs:$rs2, DFPRegs:$f, CCOp:$cond),
"fmovd$cond $opf_cc, $rs2, $rd", []>;
+ let Predicates = [HasV9, HasHardQuad] in
def V9FMOVQ_FCC
: F4_3<0b110101, 0b000011, (outs QFPRegs:$rd),
(ins FCCRegs:$opf_cc, QFPRegs:$rs2, QFPRegs:$f, CCOp:$cond),
- "fmovq$cond $opf_cc, $rs2, $rd", []>,
- Requires<[HasHardQuad]>;
+ "fmovq$cond $opf_cc, $rs2, $rd", []>;
} // Constraints = "$f = $rd", ...
} // let Predicates = [hasV9]
diff --git a/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.cpp b/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.cpp
index 7c36c4ab865f..01db1f3747eb 100644
--- a/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.cpp
@@ -11,3 +11,10 @@
using namespace llvm;
void SparcMachineFunctionInfo::anchor() { }
+
+MachineFunctionInfo *SparcMachineFunctionInfo::clone(
+ BumpPtrAllocator &Allocator, MachineFunction &DestMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
+ const {
+ return DestMF.cloneInfo<SparcMachineFunctionInfo>(*this);
+}
diff --git a/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h b/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h
index d557c8ea22e2..e1a1568d28a2 100644
--- a/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h
+++ b/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h
@@ -38,6 +38,11 @@ namespace llvm {
: GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0),
IsLeafProc(false) {}
+ MachineFunctionInfo *
+ clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
+ const override;
+
Register getGlobalBaseReg() const { return GlobalBaseReg; }
void setGlobalBaseReg(Register Reg) { GlobalBaseReg = Reg; }
diff --git a/llvm/lib/Target/Sparc/SparcTargetMachine.cpp b/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
index 27c49a408a02..8bd51a703d47 100644
--- a/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -55,7 +55,7 @@ static std::string computeDataLayout(const Triple &T, bool is64Bit) {
}
static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
- return RM.getValueOr(Reloc::Static);
+ return RM.value_or(Reloc::Static);
}
// Code models. Some only make sense for 64-bit code.
@@ -102,7 +102,7 @@ SparcTargetMachine::SparcTargetMachine(
initAsmInfo();
}
-SparcTargetMachine::~SparcTargetMachine() {}
+SparcTargetMachine::~SparcTargetMachine() = default;
const SparcSubtarget *
SparcTargetMachine::getSubtargetImpl(const Function &F) const {
diff --git a/llvm/lib/Target/Sparc/SparcTargetObjectFile.h b/llvm/lib/Target/Sparc/SparcTargetObjectFile.h
index f30ddc7b4955..28ab13918042 100644
--- a/llvm/lib/Target/Sparc/SparcTargetObjectFile.h
+++ b/llvm/lib/Target/Sparc/SparcTargetObjectFile.h
@@ -18,7 +18,7 @@ class TargetMachine;
class SparcELFTargetObjectFile : public TargetLoweringObjectFileELF {
public:
- SparcELFTargetObjectFile() {}
+ SparcELFTargetObjectFile() = default;
void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
diff --git a/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index 40ed417d0817..60e1b05a6d1a 100644
--- a/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -19,6 +19,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCAsmParserExtension.h"
@@ -1589,9 +1590,11 @@ SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal,
if (getParser().parseExpression(Expr))
return MatchOperand_NoMatch;
- auto isOutOfRangeConstant = [&](const MCExpr *E) -> bool {
+ auto isOutOfRangeConstant = [&](const MCExpr *E, bool Negate) -> bool {
if (auto *CE = dyn_cast<MCConstantExpr>(E)) {
int64_t Value = CE->getValue();
+ if (Negate)
+ Value = -Value;
if ((Value & 1) || Value < MinVal || Value > MaxVal)
return true;
}
@@ -1605,7 +1608,7 @@ SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal,
Error(StartLoc, "Expected PC-relative expression");
return MatchOperand_ParseFail;
}
- if (isOutOfRangeConstant(CE)) {
+ if (isOutOfRangeConstant(CE, false)) {
Error(StartLoc, "offset out of range");
return MatchOperand_ParseFail;
}
@@ -1620,8 +1623,9 @@ SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal,
// For consistency with the GNU assembler, conservatively assume that a
// constant offset must by itself be within the given size range.
if (const auto *BE = dyn_cast<MCBinaryExpr>(Expr))
- if (isOutOfRangeConstant(BE->getLHS()) ||
- isOutOfRangeConstant(BE->getRHS())) {
+ if (isOutOfRangeConstant(BE->getLHS(), false) ||
+ isOutOfRangeConstant(BE->getRHS(),
+ BE->getOpcode() == MCBinaryExpr::Sub)) {
Error(StartLoc, "offset out of range");
return MatchOperand_ParseFail;
}
diff --git a/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
index 5eba150dadc3..979141a1962a 100644
--- a/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
+++ b/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
@@ -9,8 +9,8 @@
#include "MCTargetDesc/SystemZMCTargetDesc.h"
#include "SystemZ.h"
#include "TargetInfo/SystemZTargetInfo.h"
+#include "llvm/MC/MCDecoderOps.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
-#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/TargetRegistry.h"
@@ -73,10 +73,9 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZDisassembler() {
static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch,
uint64_t Address, uint64_t Offset,
uint64_t Width, MCInst &MI,
- const void *Decoder) {
- const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
- return Dis->tryAddingSymbolicOperand(MI, Value, Address, isBranch,
- Offset, Width);
+ const MCDisassembler *Decoder) {
+ return Decoder->tryAddingSymbolicOperand(MI, Value, Address, isBranch, Offset,
+ Width, /*InstSize=*/0);
}
static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo,
@@ -91,79 +90,79 @@ static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo,
static DecodeStatus DecodeGR32BitRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return decodeRegisterClass(Inst, RegNo, SystemZMC::GR32Regs, 16);
}
static DecodeStatus DecodeGRH32BitRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return decodeRegisterClass(Inst, RegNo, SystemZMC::GRH32Regs, 16);
}
static DecodeStatus DecodeGR64BitRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs, 16);
}
static DecodeStatus DecodeGR128BitRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return decodeRegisterClass(Inst, RegNo, SystemZMC::GR128Regs, 16);
}
-static DecodeStatus DecodeADDR64BitRegisterClass(MCInst &Inst, uint64_t RegNo,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus
+DecodeADDR64BitRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address,
+ const MCDisassembler *Decoder) {
return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs, 16);
}
static DecodeStatus DecodeFP32BitRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return decodeRegisterClass(Inst, RegNo, SystemZMC::FP32Regs, 16);
}
static DecodeStatus DecodeFP64BitRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return decodeRegisterClass(Inst, RegNo, SystemZMC::FP64Regs, 16);
}
static DecodeStatus DecodeFP128BitRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return decodeRegisterClass(Inst, RegNo, SystemZMC::FP128Regs, 16);
}
static DecodeStatus DecodeVR32BitRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return decodeRegisterClass(Inst, RegNo, SystemZMC::VR32Regs, 32);
}
static DecodeStatus DecodeVR64BitRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return decodeRegisterClass(Inst, RegNo, SystemZMC::VR64Regs, 32);
}
static DecodeStatus DecodeVR128BitRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return decodeRegisterClass(Inst, RegNo, SystemZMC::VR128Regs, 32);
}
static DecodeStatus DecodeAR32BitRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return decodeRegisterClass(Inst, RegNo, SystemZMC::AR32Regs, 16);
}
static DecodeStatus DecodeCR64BitRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return decodeRegisterClass(Inst, RegNo, SystemZMC::CR64Regs, 16);
}
@@ -184,70 +183,81 @@ static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm) {
}
static DecodeStatus decodeU1ImmOperand(MCInst &Inst, uint64_t Imm,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
return decodeUImmOperand<1>(Inst, Imm);
}
static DecodeStatus decodeU2ImmOperand(MCInst &Inst, uint64_t Imm,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
return decodeUImmOperand<2>(Inst, Imm);
}
static DecodeStatus decodeU3ImmOperand(MCInst &Inst, uint64_t Imm,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
return decodeUImmOperand<3>(Inst, Imm);
}
static DecodeStatus decodeU4ImmOperand(MCInst &Inst, uint64_t Imm,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
return decodeUImmOperand<4>(Inst, Imm);
}
static DecodeStatus decodeU6ImmOperand(MCInst &Inst, uint64_t Imm,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
return decodeUImmOperand<6>(Inst, Imm);
}
static DecodeStatus decodeU8ImmOperand(MCInst &Inst, uint64_t Imm,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
return decodeUImmOperand<8>(Inst, Imm);
}
static DecodeStatus decodeU12ImmOperand(MCInst &Inst, uint64_t Imm,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
return decodeUImmOperand<12>(Inst, Imm);
}
static DecodeStatus decodeU16ImmOperand(MCInst &Inst, uint64_t Imm,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
return decodeUImmOperand<16>(Inst, Imm);
}
static DecodeStatus decodeU32ImmOperand(MCInst &Inst, uint64_t Imm,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
return decodeUImmOperand<32>(Inst, Imm);
}
static DecodeStatus decodeS8ImmOperand(MCInst &Inst, uint64_t Imm,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
return decodeSImmOperand<8>(Inst, Imm);
}
static DecodeStatus decodeS16ImmOperand(MCInst &Inst, uint64_t Imm,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
return decodeSImmOperand<16>(Inst, Imm);
}
static DecodeStatus decodeS32ImmOperand(MCInst &Inst, uint64_t Imm,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
return decodeSImmOperand<32>(Inst, Imm);
}
-template<unsigned N>
+template <unsigned N>
static DecodeStatus decodePCDBLOperand(MCInst &Inst, uint64_t Imm,
- uint64_t Address,
- bool isBranch,
- const void *Decoder) {
+ uint64_t Address, bool isBranch,
+ const MCDisassembler *Decoder) {
assert(isUInt<N>(Imm) && "Invalid PC-relative offset");
uint64_t Value = SignExtend64<N>(Imm) * 2 + Address;
@@ -260,31 +270,31 @@ static DecodeStatus decodePCDBLOperand(MCInst &Inst, uint64_t Imm,
static DecodeStatus decodePC12DBLBranchOperand(MCInst &Inst, uint64_t Imm,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return decodePCDBLOperand<12>(Inst, Imm, Address, true, Decoder);
}
static DecodeStatus decodePC16DBLBranchOperand(MCInst &Inst, uint64_t Imm,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return decodePCDBLOperand<16>(Inst, Imm, Address, true, Decoder);
}
static DecodeStatus decodePC24DBLBranchOperand(MCInst &Inst, uint64_t Imm,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return decodePCDBLOperand<24>(Inst, Imm, Address, true, Decoder);
}
static DecodeStatus decodePC32DBLBranchOperand(MCInst &Inst, uint64_t Imm,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return decodePCDBLOperand<32>(Inst, Imm, Address, true, Decoder);
}
static DecodeStatus decodePC32DBLOperand(MCInst &Inst, uint64_t Imm,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return decodePCDBLOperand<32>(Inst, Imm, Address, false, Decoder);
}
@@ -382,64 +392,61 @@ static DecodeStatus decodeBDVAddr12Operand(MCInst &Inst, uint64_t Field,
static DecodeStatus decodeBDAddr32Disp12Operand(MCInst &Inst, uint64_t Field,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return decodeBDAddr12Operand(Inst, Field, SystemZMC::GR32Regs);
}
static DecodeStatus decodeBDAddr32Disp20Operand(MCInst &Inst, uint64_t Field,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return decodeBDAddr20Operand(Inst, Field, SystemZMC::GR32Regs);
}
static DecodeStatus decodeBDAddr64Disp12Operand(MCInst &Inst, uint64_t Field,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return decodeBDAddr12Operand(Inst, Field, SystemZMC::GR64Regs);
}
static DecodeStatus decodeBDAddr64Disp20Operand(MCInst &Inst, uint64_t Field,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return decodeBDAddr20Operand(Inst, Field, SystemZMC::GR64Regs);
}
-static DecodeStatus decodeBDXAddr64Disp12Operand(MCInst &Inst, uint64_t Field,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus
+decodeBDXAddr64Disp12Operand(MCInst &Inst, uint64_t Field, uint64_t Address,
+ const MCDisassembler *Decoder) {
return decodeBDXAddr12Operand(Inst, Field, SystemZMC::GR64Regs);
}
-static DecodeStatus decodeBDXAddr64Disp20Operand(MCInst &Inst, uint64_t Field,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus
+decodeBDXAddr64Disp20Operand(MCInst &Inst, uint64_t Field, uint64_t Address,
+ const MCDisassembler *Decoder) {
return decodeBDXAddr20Operand(Inst, Field, SystemZMC::GR64Regs);
}
-static DecodeStatus decodeBDLAddr64Disp12Len4Operand(MCInst &Inst,
- uint64_t Field,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus
+decodeBDLAddr64Disp12Len4Operand(MCInst &Inst, uint64_t Field, uint64_t Address,
+ const MCDisassembler *Decoder) {
return decodeBDLAddr12Len4Operand(Inst, Field, SystemZMC::GR64Regs);
}
-static DecodeStatus decodeBDLAddr64Disp12Len8Operand(MCInst &Inst,
- uint64_t Field,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus
+decodeBDLAddr64Disp12Len8Operand(MCInst &Inst, uint64_t Field, uint64_t Address,
+ const MCDisassembler *Decoder) {
return decodeBDLAddr12Len8Operand(Inst, Field, SystemZMC::GR64Regs);
}
-static DecodeStatus decodeBDRAddr64Disp12Operand(MCInst &Inst,
- uint64_t Field,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus
+decodeBDRAddr64Disp12Operand(MCInst &Inst, uint64_t Field, uint64_t Address,
+ const MCDisassembler *Decoder) {
return decodeBDRAddr12Operand(Inst, Field, SystemZMC::GR64Regs);
}
-static DecodeStatus decodeBDVAddr64Disp12Operand(MCInst &Inst, uint64_t Field,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus
+decodeBDVAddr64Disp12Operand(MCInst &Inst, uint64_t Field, uint64_t Address,
+ const MCDisassembler *Decoder) {
return decodeBDVAddr12Operand(Inst, Field, SystemZMC::GR64Regs);
}
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
index c83796b8579b..242f566da2c9 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
@@ -37,6 +37,8 @@ class SystemZMCCodeEmitter : public MCCodeEmitter {
const MCInstrInfo &MCII;
MCContext &Ctx;
+ mutable unsigned MemOpsEmitted;
+
public:
SystemZMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
: MCII(mcii), Ctx(ctx) {
@@ -165,6 +167,7 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
verifyInstructionPredicates(MI,
computeAvailableFeatures(STI.getFeatureBits()));
+ MemOpsEmitted = 0;
uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI);
unsigned Size = MCII.get(MI.getOpcode()).getSize();
// Big-endian insertion of Size bytes.
@@ -191,12 +194,14 @@ getDispOpValue(const MCInst &MI, unsigned OpNum,
SmallVectorImpl<MCFixup> &Fixups,
SystemZ::FixupKind Kind) const {
const MCOperand &MO = MI.getOperand(OpNum);
- if (MO.isImm())
+ if (MO.isImm()) {
+ ++MemOpsEmitted;
return static_cast<uint64_t>(MO.getImm());
+ }
if (MO.isExpr()) {
// All instructions follow the pattern where the first displacement has a
// 2 bytes offset, and the second one 4 bytes.
- unsigned ByteOffs = Fixups.size() == 0 ? 2 : 4;
+ unsigned ByteOffs = MemOpsEmitted++ == 0 ? 2 : 4;
Fixups.push_back(MCFixup::create(ByteOffs, MO.getExpr(), (MCFixupKind)Kind,
MI.getLoc()));
assert(Fixups.size() <= 2 && "More than two memory operands in MI?");
@@ -328,7 +333,6 @@ SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned OpNum,
#include "SystemZGenMCCodeEmitter.inc"
MCCodeEmitter *llvm::createSystemZMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx) {
return new SystemZMCCodeEmitter(MCII, Ctx);
}
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
index c7b73fd3b805..03141ecf551d 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -13,6 +13,7 @@
#include "TargetInfo/SystemZTargetInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
@@ -193,7 +194,7 @@ void SystemZTargetStreamer::emitConstantPools() {
return;
// Switch to the .text section.
const MCObjectFileInfo &OFI = *Streamer.getContext().getObjectFileInfo();
- Streamer.SwitchSection(OFI.getTextSection());
+ Streamer.switchSection(OFI.getTextSection());
for (auto &I : EXRLTargets2Sym) {
Streamer.emitLabel(I.second);
const MCInstSTIPair &MCI_STI = I.first;
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
index e76fa03af3bf..db4485423416 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
@@ -78,7 +78,6 @@ inline unsigned getRegAsVR128(unsigned Reg) {
} // end namespace SystemZMC
MCCodeEmitter *createSystemZMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx);
MCAsmBackend *createSystemZMCAsmBackend(const Target &T,
diff --git a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index e01adcce04ab..6fb080607f51 100644
--- a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -88,13 +88,19 @@ static const MCSymbolRefExpr *getGlobalOffsetTable(MCContext &Context) {
// an instruction with the corresponding hint set.
static void lowerAlignmentHint(const MachineInstr *MI, MCInst &LoweredMI,
unsigned Opcode) {
- if (!MI->hasOneMemOperand())
+ if (MI->memoperands_empty())
return;
- const MachineMemOperand *MMO = *MI->memoperands_begin();
+
+ Align Alignment = Align(16);
+ for (MachineInstr::mmo_iterator MMOI = MI->memoperands_begin(),
+ EE = MI->memoperands_end(); MMOI != EE; ++MMOI)
+ if ((*MMOI)->getAlign() < Alignment)
+ Alignment = (*MMOI)->getAlign();
+
unsigned AlignmentHint = 0;
- if (MMO->getAlign() >= Align(16))
+ if (Alignment >= Align(16))
AlignmentHint = 4;
- else if (MMO->getAlign() >= Align(8))
+ else if (Alignment >= Align(8))
AlignmentHint = 3;
if (AlignmentHint == 0)
return;
@@ -124,17 +130,32 @@ static MCInst lowerSubvectorStore(const MachineInstr *MI, unsigned Opcode) {
.addImm(0);
}
+// The XPLINK ABI requires that a no-op encoding the call type is emitted after
+// each call to a subroutine. This information can be used by the called
+// function to determine its entry point, e.g. for generating a backtrace. The
+// call type is encoded as a register number in the bcr instruction. See
+// enumeration CallType for the possible values.
+void SystemZAsmPrinter::emitCallInformation(CallType CT) {
+ EmitToStreamer(*OutStreamer,
+ MCInstBuilder(SystemZ::BCRAsm)
+ .addImm(0)
+ .addReg(SystemZMC::GR64Regs[static_cast<unsigned>(CT)]));
+}
+
void SystemZAsmPrinter::emitInstruction(const MachineInstr *MI) {
SystemZMCInstLower Lower(MF->getContext(), *this);
- const SystemZSubtarget *Subtarget = &MF->getSubtarget<SystemZSubtarget>();
MCInst LoweredMI;
switch (MI->getOpcode()) {
case SystemZ::Return:
- if (Subtarget->isTargetXPLINK64())
- LoweredMI =
- MCInstBuilder(SystemZ::B).addReg(SystemZ::R7D).addImm(2).addReg(0);
- else
- LoweredMI = MCInstBuilder(SystemZ::BR).addReg(SystemZ::R14D);
+ LoweredMI = MCInstBuilder(SystemZ::BR)
+ .addReg(SystemZ::R14D);
+ break;
+
+ case SystemZ::Return_XPLINK:
+ LoweredMI = MCInstBuilder(SystemZ::B)
+ .addReg(SystemZ::R7D)
+ .addImm(2)
+ .addReg(0);
break;
case SystemZ::CondReturn:
@@ -144,6 +165,15 @@ void SystemZAsmPrinter::emitInstruction(const MachineInstr *MI) {
.addReg(SystemZ::R14D);
break;
+ case SystemZ::CondReturn_XPLINK:
+ LoweredMI = MCInstBuilder(SystemZ::BC)
+ .addImm(MI->getOperand(0).getImm())
+ .addImm(MI->getOperand(1).getImm())
+ .addReg(SystemZ::R7D)
+ .addImm(2)
+ .addReg(0);
+ break;
+
case SystemZ::CRBReturn:
LoweredMI = MCInstBuilder(SystemZ::CRB)
.addReg(MI->getOperand(0).getReg())
@@ -222,18 +252,21 @@ void SystemZAsmPrinter::emitInstruction(const MachineInstr *MI) {
.addReg(SystemZ::R7D)
.addExpr(Lower.getExpr(MI->getOperand(0),
MCSymbolRefExpr::VK_PLT)));
- EmitToStreamer(
- *OutStreamer,
- MCInstBuilder(SystemZ::BCRAsm).addImm(0).addReg(SystemZ::R3D));
+ emitCallInformation(CallType::BRASL7);
return;
case SystemZ::CallBASR_XPLINK64:
EmitToStreamer(*OutStreamer, MCInstBuilder(SystemZ::BASR)
.addReg(SystemZ::R7D)
.addReg(MI->getOperand(0).getReg()));
- EmitToStreamer(
- *OutStreamer,
- MCInstBuilder(SystemZ::BCRAsm).addImm(0).addReg(SystemZ::R0D));
+ emitCallInformation(CallType::BASR76);
+ return;
+
+ case SystemZ::CallBASR_STACKEXT:
+ EmitToStreamer(*OutStreamer, MCInstBuilder(SystemZ::BASR)
+ .addReg(SystemZ::R3D)
+ .addReg(MI->getOperand(0).getReg()));
+ emitCallInformation(CallType::BASR33);
return;
case SystemZ::CallBRASL:
@@ -608,11 +641,11 @@ void SystemZAsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI,
MCContext &Ctx = MF->getContext();
if (MF->getFunction().hasFnAttribute("mrecord-mcount")) {
MCSymbol *DotSym = OutContext.createTempSymbol();
- OutStreamer->PushSection();
- OutStreamer->SwitchSection(
+ OutStreamer->pushSection();
+ OutStreamer->switchSection(
Ctx.getELFSection("__mcount_loc", ELF::SHT_PROGBITS, ELF::SHF_ALLOC));
OutStreamer->emitSymbolValue(DotSym, 8);
- OutStreamer->PopSection();
+ OutStreamer->popSection();
OutStreamer->emitLabel(DotSym);
}
@@ -630,8 +663,7 @@ void SystemZAsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI,
}
void SystemZAsmPrinter::LowerSTACKMAP(const MachineInstr &MI) {
- const SystemZInstrInfo *TII =
- static_cast<const SystemZInstrInfo *>(MF->getSubtarget().getInstrInfo());
+ auto *TII = MF->getSubtarget<SystemZSubtarget>().getInstrInfo();
unsigned NumNOPBytes = MI.getOperand(1).getImm();
@@ -786,13 +818,253 @@ void SystemZAsmPrinter::emitEndOfAsmFile(Module &M) {
emitStackMaps(SM);
}
+void SystemZAsmPrinter::emitFunctionBodyEnd() {
+ if (TM.getTargetTriple().isOSzOS()) {
+ // Emit symbol for the end of function if the z/OS target streamer
+ // is used. This is needed to calculate the size of the function.
+ MCSymbol *FnEndSym = createTempSymbol("func_end");
+ OutStreamer->emitLabel(FnEndSym);
+
+ OutStreamer->pushSection();
+ OutStreamer->switchSection(getObjFileLowering().getPPA1Section());
+ emitPPA1(FnEndSym);
+ OutStreamer->popSection();
+
+ CurrentFnPPA1Sym = nullptr;
+ CurrentFnEPMarkerSym = nullptr;
+ }
+}
+
+static void emitPPA1Flags(std::unique_ptr<MCStreamer> &OutStreamer, bool VarArg,
+ bool StackProtector, bool FPRMask, bool VRMask) {
+ enum class PPA1Flag1 : uint8_t {
+ DSA64Bit = (0x80 >> 0),
+ VarArg = (0x80 >> 7),
+ LLVM_MARK_AS_BITMASK_ENUM(DSA64Bit)
+ };
+ enum class PPA1Flag2 : uint8_t {
+ ExternalProcedure = (0x80 >> 0),
+ STACKPROTECTOR = (0x80 >> 3),
+ LLVM_MARK_AS_BITMASK_ENUM(ExternalProcedure)
+ };
+ enum class PPA1Flag3 : uint8_t {
+ FPRMask = (0x80 >> 2),
+ LLVM_MARK_AS_BITMASK_ENUM(FPRMask)
+ };
+ enum class PPA1Flag4 : uint8_t {
+ EPMOffsetPresent = (0x80 >> 0),
+ VRMask = (0x80 >> 2),
+ ProcedureNamePresent = (0x80 >> 7),
+ LLVM_MARK_AS_BITMASK_ENUM(EPMOffsetPresent)
+ };
+
+ // Declare optional section flags that can be modified.
+ auto Flags1 = PPA1Flag1(0);
+ auto Flags2 = PPA1Flag2::ExternalProcedure;
+ auto Flags3 = PPA1Flag3(0);
+ auto Flags4 = PPA1Flag4::EPMOffsetPresent | PPA1Flag4::ProcedureNamePresent;
+
+ Flags1 |= PPA1Flag1::DSA64Bit;
+
+ if (VarArg)
+ Flags1 |= PPA1Flag1::VarArg;
+
+ if (StackProtector)
+ Flags2 |= PPA1Flag2::STACKPROTECTOR;
+
+ // SavedGPRMask, SavedFPRMask, and SavedVRMask are precomputed in.
+ if (FPRMask)
+ Flags3 |= PPA1Flag3::FPRMask; // Add emit FPR mask flag.
+
+ if (VRMask)
+ Flags4 |= PPA1Flag4::VRMask; // Add emit VR mask flag.
+
+ OutStreamer->AddComment("PPA1 Flags 1");
+ if ((Flags1 & PPA1Flag1::DSA64Bit) == PPA1Flag1::DSA64Bit)
+ OutStreamer->AddComment(" Bit 0: 1 = 64-bit DSA");
+ else
+ OutStreamer->AddComment(" Bit 0: 0 = 32-bit DSA");
+ if ((Flags1 & PPA1Flag1::VarArg) == PPA1Flag1::VarArg)
+ OutStreamer->AddComment(" Bit 7: 1 = Vararg function");
+ OutStreamer->emitInt8(static_cast<uint8_t>(Flags1)); // Flags 1.
+
+ OutStreamer->AddComment("PPA1 Flags 2");
+ if ((Flags2 & PPA1Flag2::ExternalProcedure) == PPA1Flag2::ExternalProcedure)
+ OutStreamer->AddComment(" Bit 0: 1 = External procedure");
+ if ((Flags2 & PPA1Flag2::STACKPROTECTOR) == PPA1Flag2::STACKPROTECTOR)
+ OutStreamer->AddComment(" Bit 3: 1 = STACKPROTECT is enabled");
+ else
+ OutStreamer->AddComment(" Bit 3: 0 = STACKPROTECT is not enabled");
+ OutStreamer->emitInt8(static_cast<uint8_t>(Flags2)); // Flags 2.
+
+ OutStreamer->AddComment("PPA1 Flags 3");
+ if ((Flags3 & PPA1Flag3::FPRMask) == PPA1Flag3::FPRMask)
+ OutStreamer->AddComment(" Bit 2: 1 = FP Reg Mask is in optional area");
+ OutStreamer->emitInt8(
+ static_cast<uint8_t>(Flags3)); // Flags 3 (optional sections).
+
+ OutStreamer->AddComment("PPA1 Flags 4");
+ if ((Flags4 & PPA1Flag4::VRMask) == PPA1Flag4::VRMask)
+ OutStreamer->AddComment(" Bit 2: 1 = Vector Reg Mask is in optional area");
+ OutStreamer->emitInt8(static_cast<uint8_t>(
+ Flags4)); // Flags 4 (optional sections, always emit these).
+}
+
+void SystemZAsmPrinter::emitPPA1(MCSymbol *FnEndSym) {
+ const TargetRegisterInfo *TRI = MF->getRegInfo().getTargetRegisterInfo();
+ const SystemZSubtarget &Subtarget = MF->getSubtarget<SystemZSubtarget>();
+ const auto TargetHasVector = Subtarget.hasVector();
+
+ const SystemZMachineFunctionInfo *ZFI =
+ MF->getInfo<SystemZMachineFunctionInfo>();
+ const auto *ZFL = static_cast<const SystemZXPLINKFrameLowering *>(
+ Subtarget.getFrameLowering());
+ const MachineFrameInfo &MFFrame = MF->getFrameInfo();
+
+ // Get saved GPR/FPR/VPR masks.
+ const std::vector<CalleeSavedInfo> &CSI = MFFrame.getCalleeSavedInfo();
+ uint16_t SavedGPRMask = 0;
+ uint16_t SavedFPRMask = 0;
+ uint8_t SavedVRMask = 0;
+ int64_t OffsetFPR = 0;
+ int64_t OffsetVR = 0;
+ const int64_t TopOfStack =
+ MFFrame.getOffsetAdjustment() + MFFrame.getStackSize();
+
+ // Loop over the spilled registers. The CalleeSavedInfo can't be used because
+ // it does not contain all spilled registers.
+ for (unsigned I = ZFI->getSpillGPRRegs().LowGPR,
+ E = ZFI->getSpillGPRRegs().HighGPR;
+ I && E && I <= E; ++I) {
+ unsigned V = TRI->getEncodingValue((Register)I);
+ assert(V < 16 && "GPR index out of range");
+ SavedGPRMask |= 1 << (15 - V);
+ }
+
+ for (auto &CS : CSI) {
+ unsigned Reg = CS.getReg();
+ unsigned I = TRI->getEncodingValue(Reg);
+
+ if (SystemZ::FP64BitRegClass.contains(Reg)) {
+ assert(I < 16 && "FPR index out of range");
+ SavedFPRMask |= 1 << (15 - I);
+ int64_t Temp = MFFrame.getObjectOffset(CS.getFrameIdx());
+ if (Temp < OffsetFPR)
+ OffsetFPR = Temp;
+ } else if (SystemZ::VR128BitRegClass.contains(Reg)) {
+ assert(I >= 16 && I <= 23 && "VPR index out of range");
+ unsigned BitNum = I - 16;
+ SavedVRMask |= 1 << (7 - BitNum);
+ int64_t Temp = MFFrame.getObjectOffset(CS.getFrameIdx());
+ if (Temp < OffsetVR)
+ OffsetVR = Temp;
+ }
+ }
+
+ // Adjust the offset.
+ OffsetFPR += (OffsetFPR < 0) ? TopOfStack : 0;
+ OffsetVR += (OffsetVR < 0) ? TopOfStack : 0;
+
+ // Get alloca register.
+ uint8_t FrameReg = TRI->getEncodingValue(TRI->getFrameRegister(*MF));
+ uint8_t AllocaReg = ZFL->hasFP(*MF) ? FrameReg : 0;
+ assert(AllocaReg < 16 && "Can't have alloca register larger than 15");
+ (void)AllocaReg;
+
+ // Build FPR save area offset.
+ uint32_t FrameAndFPROffset = 0;
+ if (SavedFPRMask) {
+ uint64_t FPRSaveAreaOffset = OffsetFPR;
+ assert(FPRSaveAreaOffset < 0x10000000 && "Offset out of range");
+
+ FrameAndFPROffset = FPRSaveAreaOffset & 0x0FFFFFFF; // Lose top 4 bits.
+ FrameAndFPROffset |= FrameReg << 28; // Put into top 4 bits.
+ }
+
+ // Build VR save area offset.
+ uint32_t FrameAndVROffset = 0;
+ if (TargetHasVector && SavedVRMask) {
+ uint64_t VRSaveAreaOffset = OffsetVR;
+ assert(VRSaveAreaOffset < 0x10000000 && "Offset out of range");
+
+ FrameAndVROffset = VRSaveAreaOffset & 0x0FFFFFFF; // Lose top 4 bits.
+ FrameAndVROffset |= FrameReg << 28; // Put into top 4 bits.
+ }
+
+ // Emit PPA1 section.
+ OutStreamer->AddComment("PPA1");
+ OutStreamer->emitLabel(CurrentFnPPA1Sym);
+ OutStreamer->AddComment("Version");
+ OutStreamer->emitInt8(0x02); // Version.
+ OutStreamer->AddComment("LE Signature X'CE'");
+ OutStreamer->emitInt8(0xCE); // CEL signature.
+ OutStreamer->AddComment("Saved GPR Mask");
+ OutStreamer->emitInt16(SavedGPRMask);
+
+ emitPPA1Flags(OutStreamer, MF->getFunction().isVarArg(),
+ MFFrame.hasStackProtectorIndex(), SavedFPRMask != 0,
+ TargetHasVector && SavedVRMask != 0);
+
+ OutStreamer->AddComment("Length/4 of Parms");
+ OutStreamer->emitInt16(
+ static_cast<uint16_t>(MFFrame.getMaxCallFrameSize() / 4)); // Parms/4.
+ OutStreamer->AddComment("Length of Code");
+ OutStreamer->emitAbsoluteSymbolDiff(FnEndSym, CurrentFnEPMarkerSym, 4);
+
+ // Emit saved FPR mask and offset to FPR save area (0x20 of flags 3).
+ if (SavedFPRMask) {
+ OutStreamer->AddComment("FPR mask");
+ OutStreamer->emitInt16(SavedFPRMask);
+ OutStreamer->AddComment("AR mask");
+ OutStreamer->emitInt16(0); // AR Mask, unused currently.
+ OutStreamer->AddComment("FPR Save Area Locator");
+ OutStreamer->AddComment(Twine(" Bit 0-3: Register R")
+ .concat(utostr(FrameAndFPROffset >> 28))
+ .str());
+ OutStreamer->AddComment(Twine(" Bit 4-31: Offset ")
+ .concat(utostr(FrameAndFPROffset & 0x0FFFFFFF))
+ .str());
+ OutStreamer->emitInt32(FrameAndFPROffset); // Offset to FPR save area with
+ // register to add value to
+ // (alloca reg).
+ }
+
+ // Emit saved VR mask to VR save area.
+ if (TargetHasVector && SavedVRMask) {
+ OutStreamer->AddComment("VR mask");
+ OutStreamer->emitInt8(SavedVRMask);
+ OutStreamer->emitInt8(0); // Reserved.
+ OutStreamer->emitInt16(0); // Also reserved.
+ OutStreamer->AddComment("VR Save Area Locator");
+ OutStreamer->AddComment(Twine(" Bit 0-3: Register R")
+ .concat(utostr(FrameAndVROffset >> 28))
+ .str());
+ OutStreamer->AddComment(Twine(" Bit 4-31: Offset ")
+ .concat(utostr(FrameAndVROffset & 0x0FFFFFFF))
+ .str());
+ OutStreamer->emitInt32(FrameAndVROffset);
+ }
+
+ // Emit offset to entry point optional section (0x80 of flags 4).
+ OutStreamer->emitAbsoluteSymbolDiff(CurrentFnEPMarkerSym, CurrentFnPPA1Sym,
+ 4);
+}
+
void SystemZAsmPrinter::emitFunctionEntryLabel() {
- const SystemZSubtarget &Subtarget =
- static_cast<const SystemZSubtarget &>(MF->getSubtarget());
+ const SystemZSubtarget &Subtarget = MF->getSubtarget<SystemZSubtarget>();
if (Subtarget.getTargetTriple().isOSzOS()) {
MCContext &OutContext = OutStreamer->getContext();
- MCSymbol *EPMarkerSym = OutContext.createTempSymbol("CM_", true);
+
+ // Save information for later use.
+ std::string N(MF->getFunction().hasName()
+ ? Twine(MF->getFunction().getName()).concat("_").str()
+ : "");
+
+ CurrentFnEPMarkerSym =
+ OutContext.createTempSymbol(Twine("EPM_").concat(N).str(), true);
+ CurrentFnPPA1Sym =
+ OutContext.createTempSymbol(Twine("PPA1_").concat(N).str(), true);
// EntryPoint Marker
const MachineFrameInfo &MFFrame = MF->getFrameInfo();
@@ -811,11 +1083,14 @@ void SystemZAsmPrinter::emitFunctionEntryLabel() {
// Emit entry point marker section.
OutStreamer->AddComment("XPLINK Routine Layout Entry");
- OutStreamer->emitLabel(EPMarkerSym);
+ OutStreamer->emitLabel(CurrentFnEPMarkerSym);
OutStreamer->AddComment("Eyecatcher 0x00C300C500C500");
OutStreamer->emitIntValueInHex(0x00C300C500C500, 7); // Eyecatcher.
OutStreamer->AddComment("Mark Type C'1'");
OutStreamer->emitInt8(0xF1); // Mark Type.
+ OutStreamer->AddComment("Offset to PPA1");
+ OutStreamer->emitAbsoluteSymbolDiff(CurrentFnPPA1Sym, CurrentFnEPMarkerSym,
+ 4);
if (OutStreamer->isVerboseAsm()) {
OutStreamer->AddComment("DSA Size 0x" + Twine::utohexstr(DSASize));
OutStreamer->AddComment("Entry Flags");
diff --git a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h
index 80d68d1b93ff..f14b4a184f62 100644
--- a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h
+++ b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h
@@ -26,6 +26,8 @@ class raw_ostream;
class LLVM_LIBRARY_VISIBILITY SystemZAsmPrinter : public AsmPrinter {
private:
StackMaps SM;
+ MCSymbol *CurrentFnPPA1Sym; // PPA1 Symbol.
+ MCSymbol *CurrentFnEPMarkerSym; // Entry Point Marker.
SystemZTargetStreamer *getTargetStreamer() {
MCTargetStreamer *TS = OutStreamer->getTargetStreamer();
@@ -33,9 +35,24 @@ private:
return static_cast<SystemZTargetStreamer *>(TS);
}
+ /// Call type information for XPLINK.
+ enum class CallType {
+ BASR76 = 0, // b'x000' == BASR r7,r6
+ BRAS7 = 1, // b'x001' == BRAS r7,ep
+ RESVD_2 = 2, // b'x010'
+ BRASL7 = 3, // b'x011' == BRASL r7,ep
+ RESVD_4 = 4, // b'x100'
+ RESVD_5 = 5, // b'x101'
+ BALR1415 = 6, // b'x110' == BALR r14,r15
+ BASR33 = 7, // b'x111' == BASR r3,r3
+ };
+
+ void emitPPA1(MCSymbol *FnEndSym);
+
public:
SystemZAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
- : AsmPrinter(TM, std::move(Streamer)), SM(*this) {}
+ : AsmPrinter(TM, std::move(Streamer)), SM(*this),
+ CurrentFnPPA1Sym(nullptr), CurrentFnEPMarkerSym(nullptr) {}
// Override AsmPrinter.
StringRef getPassName() const override { return "SystemZ Assembly Printer"; }
@@ -52,8 +69,10 @@ public:
return AsmPrinter::doInitialization(M);
}
void emitFunctionEntryLabel() override;
+ void emitFunctionBodyEnd() override;
private:
+ void emitCallInformation(CallType CT);
void LowerFENTRY_CALL(const MachineInstr &MI, SystemZMCInstLower &MCIL);
void LowerSTACKMAP(const MachineInstr &MI);
void LowerPATCHPOINT(const MachineInstr &MI, SystemZMCInstLower &Lower);
diff --git a/llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp b/llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp
index 763aa8c0e41f..9fc6765dbbf7 100644
--- a/llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp
@@ -100,7 +100,7 @@ bool SystemZCopyPhysRegs::visitMBB(MachineBasicBlock &MBB) {
}
bool SystemZCopyPhysRegs::runOnMachineFunction(MachineFunction &F) {
- TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo());
+ TII = F.getSubtarget<SystemZSubtarget>().getInstrInfo();
MRI = &F.getRegInfo();
bool Modified = false;
diff --git a/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp b/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
index 4893acc81335..340dba1362af 100644
--- a/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
@@ -224,7 +224,7 @@ bool SystemZElimCompare::convertToBRCT(
// The transformation is OK. Rebuild Branch as a BRCT(G) or BRCTH.
MachineOperand Target(Branch->getOperand(2));
while (Branch->getNumOperands())
- Branch->RemoveOperand(0);
+ Branch->removeOperand(0);
Branch->setDesc(TII->get(BRCT));
MachineInstrBuilder MIB(*Branch->getParent()->getParent(), Branch);
MIB.add(MI.getOperand(0)).add(MI.getOperand(1)).add(Target);
@@ -267,7 +267,7 @@ bool SystemZElimCompare::convertToLoadAndTrap(
// The transformation is OK. Rebuild Branch as a load-and-trap.
while (Branch->getNumOperands())
- Branch->RemoveOperand(0);
+ Branch->removeOperand(0);
Branch->setDesc(TII->get(LATOpcode));
MachineInstrBuilder(*Branch->getParent()->getParent(), Branch)
.add(MI.getOperand(0))
@@ -649,16 +649,16 @@ bool SystemZElimCompare::fuseCompareOperations(
// Clear out all current operands.
int CCUse = MBBI->findRegisterUseOperandIdx(SystemZ::CC, false, TRI);
assert(CCUse >= 0 && "BRC/BCR must use CC");
- Branch->RemoveOperand(CCUse);
+ Branch->removeOperand(CCUse);
// Remove regmask (sibcall).
if (Type == SystemZII::CompareAndSibcall)
- Branch->RemoveOperand(3);
+ Branch->removeOperand(3);
// Remove target (branch or sibcall).
if (Type == SystemZII::CompareAndBranch ||
Type == SystemZII::CompareAndSibcall)
- Branch->RemoveOperand(2);
- Branch->RemoveOperand(1);
- Branch->RemoveOperand(0);
+ Branch->removeOperand(2);
+ Branch->removeOperand(1);
+ Branch->removeOperand(0);
// Rebuild Branch as a fused compare and branch.
// SrcNOps is the number of MI operands of the compare instruction
@@ -735,7 +735,7 @@ bool SystemZElimCompare::runOnMachineFunction(MachineFunction &F) {
if (skipFunction(F.getFunction()))
return false;
- TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo());
+ TII = F.getSubtarget<SystemZSubtarget>().getInstrInfo();
TRI = &TII->getRegisterInfo();
bool Changed = false;
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 610627e7e3f0..43bc7426cfa8 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -13,6 +13,7 @@
#include "SystemZMachineFunctionInfo.h"
#include "SystemZRegisterInfo.h"
#include "SystemZSubtarget.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
@@ -95,8 +96,7 @@ typedef std::vector<SZFrameSortingObj> SZFrameObjVec;
void SystemZELFFrameLowering::orderFrameObjects(
const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
- const SystemZInstrInfo *TII =
- static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ auto *TII = MF.getSubtarget<SystemZSubtarget>().getInstrInfo();
// Make a vector of sorting objects to track all MFI objects and mark those
// to be sorted as valid.
@@ -1153,12 +1153,6 @@ void SystemZXPLINKFrameLowering::emitPrologue(MachineFunction &MF,
MFFrame.setStackSize(MFFrame.getStackSize() + Regs.getCallFrameSize());
uint64_t StackSize = MFFrame.getStackSize();
- // FIXME: Implement support for large stack sizes, when the stack extension
- // routine needs to be called.
- if (StackSize > 1024 * 1024) {
- llvm_unreachable("Huge Stack Frame not yet supported on z/OS");
- }
-
if (ZFI->getSpillGPRRegs().LowGPR) {
// Skip over the GPR saves.
if ((MBBI != MBB.end()) && ((MBBI->getOpcode() == SystemZ::STMG))) {
@@ -1201,6 +1195,18 @@ void SystemZXPLINKFrameLowering::emitPrologue(MachineFunction &MF,
emitIncrement(MBB, InsertPt, DL, Regs.getStackPointerRegister(), Delta,
ZII);
+
+ // If the requested stack size is larger than the guard page, then we need
+ // to check if we need to call the stack extender. This requires adding a
+ // conditional branch, but splitting the prologue block is not possible at
+ // this point since it would invalidate the SaveBlocks / RestoreBlocks sets
+ // of PEI in the single block function case. Build a pseudo to be handled
+ // later by inlineStackProbe().
+ const uint64_t GuardPageSize = 1024 * 1024;
+ if (StackSize > GuardPageSize) {
+ assert(StoreInstr && "Wrong insertion point");
+ BuildMI(MBB, InsertPt, DL, ZII->get(SystemZ::XPLINK_STACKALLOC));
+ }
}
if (HasFP) {
@@ -1239,6 +1245,74 @@ void SystemZXPLINKFrameLowering::emitEpilogue(MachineFunction &MF,
}
}
+// Emit a compare of the stack pointer against the stack floor, and a call to
+// the LE stack extender if needed.
+void SystemZXPLINKFrameLowering::inlineStackProbe(
+ MachineFunction &MF, MachineBasicBlock &PrologMBB) const {
+ auto *ZII =
+ static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
+
+ MachineInstr *StackAllocMI = nullptr;
+ for (MachineInstr &MI : PrologMBB)
+ if (MI.getOpcode() == SystemZ::XPLINK_STACKALLOC) {
+ StackAllocMI = &MI;
+ break;
+ }
+ if (StackAllocMI == nullptr)
+ return;
+
+ MachineBasicBlock &MBB = PrologMBB;
+ const DebugLoc DL = StackAllocMI->getDebugLoc();
+
+ // The 2nd half of block MBB after split.
+ MachineBasicBlock *NextMBB;
+
+ // Add new basic block for the call to the stack overflow function.
+ MachineBasicBlock *StackExtMBB =
+ MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+ MF.push_back(StackExtMBB);
+
+ // LG r3,72(,r3)
+ BuildMI(StackExtMBB, DL, ZII->get(SystemZ::LG), SystemZ::R3D)
+ .addReg(SystemZ::R3D)
+ .addImm(72)
+ .addReg(0);
+ // BASR r3,r3
+ BuildMI(StackExtMBB, DL, ZII->get(SystemZ::CallBASR_STACKEXT))
+ .addReg(SystemZ::R3D);
+
+ // LLGT r3,1208
+ BuildMI(MBB, StackAllocMI, DL, ZII->get(SystemZ::LLGT), SystemZ::R3D)
+ .addReg(0)
+ .addImm(1208)
+ .addReg(0);
+ // CG r4,64(,r3)
+ BuildMI(MBB, StackAllocMI, DL, ZII->get(SystemZ::CG))
+ .addReg(SystemZ::R4D)
+ .addReg(SystemZ::R3D)
+ .addImm(64)
+ .addReg(0);
+ // JLL b'0100',F'37'
+ BuildMI(MBB, StackAllocMI, DL, ZII->get(SystemZ::BRC))
+ .addImm(SystemZ::CCMASK_ICMP)
+ .addImm(SystemZ::CCMASK_CMP_LT)
+ .addMBB(StackExtMBB);
+
+ NextMBB = SystemZ::splitBlockBefore(StackAllocMI, &MBB);
+ MBB.addSuccessor(NextMBB);
+ MBB.addSuccessor(StackExtMBB);
+
+ // Add jump back from stack extension BB.
+ BuildMI(StackExtMBB, DL, ZII->get(SystemZ::J)).addMBB(NextMBB);
+ StackExtMBB->addSuccessor(NextMBB);
+
+ StackAllocMI->eraseFromParent();
+
+ // Compute the live-in lists for the new blocks.
+ recomputeLiveIns(*NextMBB);
+ recomputeLiveIns(*StackExtMBB);
+}
+
bool SystemZXPLINKFrameLowering::hasFP(const MachineFunction &MF) const {
return (MF.getFrameInfo().hasVarSizedObjects());
}
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
index 2b3d7efed53b..bec83a9457e0 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -127,6 +127,9 @@ public:
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+ void inlineStackProbe(MachineFunction &MF,
+ MachineBasicBlock &PrologMBB) const override;
+
bool hasFP(const MachineFunction &MF) const override;
void processFunctionBeforeFrameFinalized(MachineFunction &MF,
diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index cf55318d328d..9ac7eafd5f34 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -968,7 +968,7 @@ bool SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) {
if (RISBG.Input.getOpcode() != ISD::ANY_EXTEND &&
RISBG.Input.getOpcode() != ISD::TRUNCATE)
Count += 1;
- if (Count == 0)
+ if (Count == 0 || isa<ConstantSDNode>(RISBG.Input))
return false;
// Prefer to use normal shift instructions over RISBG, since they can handle
@@ -1472,7 +1472,7 @@ bool SystemZDAGToDAGISel::storeLoadIsAligned(SDNode *N) const {
assert(MMO && "Expected a memory operand.");
// The memory access must have a proper alignment and no index register.
- if (MemAccess->getAlignment() < StoreSize ||
+ if (MemAccess->getAlign().value() < StoreSize ||
!MemAccess->getOffset().isUndef())
return false;
@@ -1683,16 +1683,19 @@ SelectInlineAsmMemoryOperand(const SDValue &Op,
llvm_unreachable("Unexpected asm memory constraint");
case InlineAsm::Constraint_i:
case InlineAsm::Constraint_Q:
+ case InlineAsm::Constraint_ZQ:
// Accept an address with a short displacement, but no index.
Form = SystemZAddressingMode::FormBD;
DispRange = SystemZAddressingMode::Disp12Only;
break;
case InlineAsm::Constraint_R:
+ case InlineAsm::Constraint_ZR:
// Accept an address with a short displacement and an index.
Form = SystemZAddressingMode::FormBDXNormal;
DispRange = SystemZAddressingMode::Disp12Only;
break;
case InlineAsm::Constraint_S:
+ case InlineAsm::Constraint_ZS:
// Accept an address with a long displacement, but no index.
Form = SystemZAddressingMode::FormBD;
DispRange = SystemZAddressingMode::Disp20Only;
@@ -1700,6 +1703,8 @@ SelectInlineAsmMemoryOperand(const SDValue &Op,
case InlineAsm::Constraint_T:
case InlineAsm::Constraint_m:
case InlineAsm::Constraint_o:
+ case InlineAsm::Constraint_p:
+ case InlineAsm::Constraint_ZT:
// Accept an address with a long displacement and an index.
// m works the same as T, as this is the most general case.
// We don't really have any special handling of "offsettable"
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index f10651d5c5d7..42c1c77f14e4 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -80,7 +80,7 @@ static MachineOperand earlyUseOperand(MachineOperand Op) {
SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
const SystemZSubtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
- MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize(0));
+ MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
auto *Regs = STI.getSpecialRegisters();
@@ -471,6 +471,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FREM, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
+ // Special treatment.
+ setOperationAction(ISD::IS_FPCLASS, VT, Custom);
+
// Handle constrained floating-point operations.
setOperationAction(ISD::STRICT_FADD, VT, Legal);
setOperationAction(ISD::STRICT_FSUB, VT, Legal);
@@ -640,33 +643,33 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VAEND, MVT::Other, Expand);
// Codes for which we want to perform some z-specific combinations.
- setTargetDAGCombine(ISD::ZERO_EXTEND);
- setTargetDAGCombine(ISD::SIGN_EXTEND);
- setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
- setTargetDAGCombine(ISD::LOAD);
- setTargetDAGCombine(ISD::STORE);
- setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
- setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
- setTargetDAGCombine(ISD::FP_ROUND);
- setTargetDAGCombine(ISD::STRICT_FP_ROUND);
- setTargetDAGCombine(ISD::FP_EXTEND);
- setTargetDAGCombine(ISD::SINT_TO_FP);
- setTargetDAGCombine(ISD::UINT_TO_FP);
- setTargetDAGCombine(ISD::STRICT_FP_EXTEND);
- setTargetDAGCombine(ISD::BSWAP);
- setTargetDAGCombine(ISD::SDIV);
- setTargetDAGCombine(ISD::UDIV);
- setTargetDAGCombine(ISD::SREM);
- setTargetDAGCombine(ISD::UREM);
- setTargetDAGCombine(ISD::INTRINSIC_VOID);
- setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
+ setTargetDAGCombine({ISD::ZERO_EXTEND,
+ ISD::SIGN_EXTEND,
+ ISD::SIGN_EXTEND_INREG,
+ ISD::LOAD,
+ ISD::STORE,
+ ISD::VECTOR_SHUFFLE,
+ ISD::EXTRACT_VECTOR_ELT,
+ ISD::FP_ROUND,
+ ISD::STRICT_FP_ROUND,
+ ISD::FP_EXTEND,
+ ISD::SINT_TO_FP,
+ ISD::UINT_TO_FP,
+ ISD::STRICT_FP_EXTEND,
+ ISD::BSWAP,
+ ISD::SDIV,
+ ISD::UDIV,
+ ISD::SREM,
+ ISD::UREM,
+ ISD::INTRINSIC_VOID,
+ ISD::INTRINSIC_W_CHAIN});
// Handle intrinsics.
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
// We want to use MVC in preference to even a single load/store pair.
- MaxStoresPerMemcpy = 0;
+ MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0;
MaxStoresPerMemcpyOptSize = 0;
// The main memset sequence is a byte store followed by an MVC.
@@ -674,7 +677,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
// generated by target-independent code don't when the byte value is
// variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
// than "STC;MVC". Handle the choice in target-specific code instead.
- MaxStoresPerMemset = 0;
+ MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0;
MaxStoresPerMemsetOptSize = 0;
// Default to having -disable-strictnode-mutation on
@@ -716,8 +719,7 @@ bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(
// such as VGM, VGMB or VREPI.
bool SystemZVectorConstantInfo::isVectorConstantLegal(
const SystemZSubtarget &Subtarget) {
- const SystemZInstrInfo *TII =
- static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+ const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
if (!Subtarget.hasVector() ||
(isFP128 && !Subtarget.hasVectorEnhancements1()))
return false;
@@ -790,14 +792,17 @@ bool SystemZVectorConstantInfo::isVectorConstantLegal(
return tryValue(SplatBitsZ | Middle);
}
-SystemZVectorConstantInfo::SystemZVectorConstantInfo(APFloat FPImm) {
- IntBits = FPImm.bitcastToAPInt().zextOrSelf(128);
- isFP128 = (&FPImm.getSemantics() == &APFloat::IEEEquad());
- SplatBits = FPImm.bitcastToAPInt();
- unsigned Width = SplatBits.getBitWidth();
- IntBits <<= (SystemZ::VectorBits - Width);
+SystemZVectorConstantInfo::SystemZVectorConstantInfo(APInt IntImm) {
+ if (IntImm.isSingleWord()) {
+ IntBits = APInt(128, IntImm.getZExtValue());
+ IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth());
+ } else
+ IntBits = IntImm;
+ assert(IntBits.getBitWidth() == 128 && "Unsupported APInt.");
// Find the smallest splat.
+ SplatBits = IntImm;
+ unsigned Width = SplatBits.getBitWidth();
while (Width > 8) {
unsigned HalfSize = Width / 2;
APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
@@ -973,7 +978,8 @@ bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL,
if (!isInt<20>(AM.BaseOffs))
return false;
- AddressingMode SupportedAM(true, true);
+ bool RequireD12 = Subtarget.hasVector() && Ty->isVectorTy();
+ AddressingMode SupportedAM(!RequireD12, true);
if (I != nullptr)
SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
@@ -988,6 +994,30 @@ bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL,
return AM.Scale == 0 || AM.Scale == 1;
}
+bool SystemZTargetLowering::findOptimalMemOpLowering(
+ std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
+ unsigned SrcAS, const AttributeList &FuncAttributes) const {
+ const int MVCFastLen = 16;
+
+ if (Limit != ~unsigned(0)) {
+ // Don't expand Op into scalar loads/stores in these cases:
+ if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen)
+ return false; // Small memcpy: Use MVC
+ if (Op.isMemset() && Op.size() - 1 <= MVCFastLen)
+ return false; // Small memset (first byte with STC/MVI): Use MVC
+ if (Op.isZeroMemset())
+ return false; // Memset zero: Use XC
+ }
+
+ return TargetLowering::findOptimalMemOpLowering(MemOps, Limit, Op, DstAS,
+ SrcAS, FuncAttributes);
+}
+
+EVT SystemZTargetLowering::getOptimalMemOpType(const MemOp &Op,
+ const AttributeList &FuncAttributes) const {
+ return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other;
+}
+
bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
return false;
@@ -1037,6 +1067,17 @@ SystemZTargetLowering::getConstraintType(StringRef Constraint) const {
default:
break;
}
+ } else if (Constraint.size() == 2 && Constraint[0] == 'Z') {
+ switch (Constraint[1]) {
+ case 'Q': // Address with base and unsigned 12-bit displacement
+ case 'R': // Likewise, plus an index
+ case 'S': // Address with base and signed 20-bit displacement
+ case 'T': // Likewise, plus an index
+ return C_Address;
+
+ default:
+ break;
+ }
}
return TargetLowering::getConstraintType(Constraint);
}
@@ -1218,12 +1259,17 @@ SystemZTargetLowering::getRegForInlineAsmConstraint(
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
-Register SystemZTargetLowering::getRegisterByName(const char *RegName, LLT VT,
- const MachineFunction &MF) const {
+Register
+SystemZTargetLowering::getRegisterByName(const char *RegName, LLT VT,
+ const MachineFunction &MF) const {
+ const SystemZSubtarget *Subtarget = &MF.getSubtarget<SystemZSubtarget>();
+
+ Register Reg =
+ StringSwitch<Register>(RegName)
+ .Case("r4", Subtarget->isTargetXPLINK64() ? SystemZ::R4D : 0)
+ .Case("r15", Subtarget->isTargetELF() ? SystemZ::R15D : 0)
+ .Default(0);
- Register Reg = StringSwitch<Register>(RegName)
- .Case("r15", SystemZ::R15D)
- .Default(0);
if (Reg)
return Reg;
report_fatal_error("Invalid register name global variable");
@@ -1833,6 +1879,40 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
return Chain;
}
+// Generate a call taking the given operands as arguments and returning a
+// result of type RetVT.
+std::pair<SDValue, SDValue> SystemZTargetLowering::makeExternalCall(
+ SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT,
+ ArrayRef<SDValue> Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL,
+ bool DoesNotReturn, bool IsReturnValueUsed) const {
+ TargetLowering::ArgListTy Args;
+ Args.reserve(Ops.size());
+
+ TargetLowering::ArgListEntry Entry;
+ for (SDValue Op : Ops) {
+ Entry.Node = Op;
+ Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
+ Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned);
+ Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned);
+ Args.push_back(Entry);
+ }
+
+ SDValue Callee =
+ DAG.getExternalSymbol(CalleeName, getPointerTy(DAG.getDataLayout()));
+
+ Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ bool SignExtend = shouldSignExtendTypeInLibCall(RetVT, IsSigned);
+ CLI.setDebugLoc(DL)
+ .setChain(Chain)
+ .setCallee(CallConv, RetTy, Callee, std::move(Args))
+ .setNoReturn(DoesNotReturn)
+ .setDiscardResult(!IsReturnValueUsed)
+ .setSExtResult(SignExtend)
+ .setZExtResult(!SignExtend);
+ return LowerCallTo(CLI);
+}
+
bool SystemZTargetLowering::
CanLowerReturn(CallingConv::ID CallConv,
MachineFunction &MF, bool isVarArg,
@@ -2237,7 +2317,7 @@ static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
Load->getExtensionType() != ExtType) {
C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
Load->getBasePtr(), Load->getPointerInfo(),
- Load->getMemoryVT(), Load->getAlignment(),
+ Load->getMemoryVT(), Load->getAlign(),
Load->getMemOperand()->getFlags());
// Update the chain uses.
DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
@@ -3471,6 +3551,32 @@ SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
SelectionDAG &DAG) const {
+
+ if (Subtarget.isTargetXPLINK64())
+ return lowerVASTART_XPLINK(Op, DAG);
+ else
+ return lowerVASTART_ELF(Op, DAG);
+}
+
+SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ SystemZMachineFunctionInfo *FuncInfo =
+ MF.getInfo<SystemZMachineFunctionInfo>();
+
+ SDLoc DL(Op);
+
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
+ MachinePointerInfo(SV));
+}
+
+SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op,
+ SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
SystemZMachineFunctionInfo *FuncInfo =
MF.getInfo<SystemZMachineFunctionInfo>();
@@ -3514,14 +3620,90 @@ SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
SDLoc DL(Op);
- return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32, DL),
+ uint32_t Sz =
+ Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(0) : 32;
+ return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(Sz, DL),
Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false,
/*isTailCall*/ false, MachinePointerInfo(DstSV),
MachinePointerInfo(SrcSV));
}
-SDValue SystemZTargetLowering::
-lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
+SDValue
+SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
+ SelectionDAG &DAG) const {
+ if (Subtarget.isTargetXPLINK64())
+ return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG);
+ else
+ return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG);
+}
+
+SDValue
+SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op,
+ SelectionDAG &DAG) const {
+ const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+ SDValue Align = Op.getOperand(2);
+ SDLoc DL(Op);
+
+ // If user has set the no alignment function attribute, ignore
+ // alloca alignments.
+ uint64_t AlignVal =
+ (RealignOpt ? cast<ConstantSDNode>(Align)->getZExtValue() : 0);
+
+ uint64_t StackAlign = TFI->getStackAlignment();
+ uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
+ uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
+
+ SDValue NeededSpace = Size;
+
+ // Add extra space for alignment if needed.
+ EVT PtrVT = getPointerTy(MF.getDataLayout());
+ if (ExtraAlignSpace)
+ NeededSpace = DAG.getNode(ISD::ADD, DL, PtrVT, NeededSpace,
+ DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
+
+ bool IsSigned = false;
+ bool DoesNotReturn = false;
+ bool IsReturnValueUsed = false;
+ EVT VT = Op.getValueType();
+ SDValue AllocaCall =
+ makeExternalCall(Chain, DAG, "@@ALCAXP", VT, makeArrayRef(NeededSpace),
+ CallingConv::C, IsSigned, DL, DoesNotReturn,
+ IsReturnValueUsed)
+ .first;
+
+ // Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue
+ // to end of call in order to ensure it isn't broken up from the call
+ // sequence.
+ auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
+ Register SPReg = Regs.getStackPointerRegister();
+ Chain = AllocaCall.getValue(1);
+ SDValue Glue = AllocaCall.getValue(2);
+ SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, DL, SPReg, PtrVT, Glue);
+ Chain = NewSPRegNode.getValue(1);
+
+ MVT PtrMVT = getPointerMemTy(MF.getDataLayout());
+ SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, PtrMVT);
+ SDValue Result = DAG.getNode(ISD::ADD, DL, PtrMVT, NewSPRegNode, ArgAdjust);
+
+ // Dynamically realign if needed.
+ if (ExtraAlignSpace) {
+ Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
+ DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
+ Result = DAG.getNode(ISD::AND, DL, PtrVT, Result,
+ DAG.getConstant(~(RequiredAlign - 1), DL, PtrVT));
+ }
+
+ SDValue Ops[2] = {Result, Chain};
+ return DAG.getMergeValues(Ops, DL);
+}
+
+SDValue
+SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op,
+ SelectionDAG &DAG) const {
const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
MachineFunction &MF = DAG.getMachineFunction();
bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
@@ -5468,6 +5650,41 @@ SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
return Op;
}
+SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ MVT ResultVT = Op.getSimpleValueType();
+ SDValue Arg = Op.getOperand(0);
+ auto CNode = cast<ConstantSDNode>(Op.getOperand(1));
+ unsigned Check = CNode->getZExtValue();
+
+ unsigned TDCMask = 0;
+ if (Check & fcSNan)
+ TDCMask |= SystemZ::TDCMASK_SNAN_PLUS | SystemZ::TDCMASK_SNAN_MINUS;
+ if (Check & fcQNan)
+ TDCMask |= SystemZ::TDCMASK_QNAN_PLUS | SystemZ::TDCMASK_QNAN_MINUS;
+ if (Check & fcPosInf)
+ TDCMask |= SystemZ::TDCMASK_INFINITY_PLUS;
+ if (Check & fcNegInf)
+ TDCMask |= SystemZ::TDCMASK_INFINITY_MINUS;
+ if (Check & fcPosNormal)
+ TDCMask |= SystemZ::TDCMASK_NORMAL_PLUS;
+ if (Check & fcNegNormal)
+ TDCMask |= SystemZ::TDCMASK_NORMAL_MINUS;
+ if (Check & fcPosSubnormal)
+ TDCMask |= SystemZ::TDCMASK_SUBNORMAL_PLUS;
+ if (Check & fcNegSubnormal)
+ TDCMask |= SystemZ::TDCMASK_SUBNORMAL_MINUS;
+ if (Check & fcPosZero)
+ TDCMask |= SystemZ::TDCMASK_ZERO_PLUS;
+ if (Check & fcNegZero)
+ TDCMask |= SystemZ::TDCMASK_ZERO_MINUS;
+ SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, MVT::i64);
+
+ SDValue Intr = DAG.getNode(SystemZISD::TDC, DL, ResultVT, Arg, TDCMaskV);
+ return getCCResult(DAG, Intr);
+}
+
SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
@@ -5585,6 +5802,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
case ISD::SRA:
return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
+ case ISD::IS_FPCLASS:
+ return lowerIS_FPCLASS(Op, DAG);
default:
llvm_unreachable("Unexpected node to lower");
}
@@ -6142,6 +6361,23 @@ static bool isVectorElementSwap(ArrayRef<int> M, EVT VT) {
return true;
}
+static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) {
+ for (auto *U : StoredVal->uses()) {
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(U)) {
+ EVT CurrMemVT = ST->getMemoryVT().getScalarType();
+ if (CurrMemVT.isRound() && CurrMemVT.getStoreSize() <= 16)
+ continue;
+ } else if (isa<BuildVectorSDNode>(U)) {
+ SDValue BuildVector = SDValue(U, 0);
+ if (DAG.isSplatValue(BuildVector, true/*AllowUndefs*/) &&
+ isOnlyUsedByStores(BuildVector, DAG))
+ continue;
+ }
+ return false;
+ }
+ return true;
+}
+
SDValue SystemZTargetLowering::combineSTORE(
SDNode *N, DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -6200,6 +6436,82 @@ SDValue SystemZTargetLowering::combineSTORE(
}
}
+ // Replicate a reg or immediate with VREP instead of scalar multiply or
+ // immediate load. It seems best to do this during the first DAGCombine as
+ // it is straight-forward to handle the zero-extend node in the initial
+ // DAG, and also not worry about the keeping the new MemVT legal (e.g. when
+ // extracting an i16 element from a v16i8 vector).
+ if (Subtarget.hasVector() && DCI.Level == BeforeLegalizeTypes &&
+ isOnlyUsedByStores(Op1, DAG)) {
+ SDValue Word = SDValue();
+ EVT WordVT;
+
+ // Find a replicated immediate and return it if found in Word and its
+ // type in WordVT.
+ auto FindReplicatedImm = [&](ConstantSDNode *C, unsigned TotBytes) {
+ // Some constants are better handled with a scalar store.
+ if (C->getAPIntValue().getBitWidth() > 64 || C->isAllOnes() ||
+ isInt<16>(C->getSExtValue()) || MemVT.getStoreSize() <= 2)
+ return;
+ SystemZVectorConstantInfo VCI(APInt(TotBytes * 8, C->getZExtValue()));
+ if (VCI.isVectorConstantLegal(Subtarget) &&
+ VCI.Opcode == SystemZISD::REPLICATE) {
+ Word = DAG.getConstant(VCI.OpVals[0], SDLoc(SN), MVT::i32);
+ WordVT = VCI.VecVT.getScalarType();
+ }
+ };
+
+ // Find a replicated register and return it if found in Word and its type
+ // in WordVT.
+ auto FindReplicatedReg = [&](SDValue MulOp) {
+ EVT MulVT = MulOp.getValueType();
+ if (MulOp->getOpcode() == ISD::MUL &&
+ (MulVT == MVT::i16 || MulVT == MVT::i32 || MulVT == MVT::i64)) {
+ // Find a zero extended value and its type.
+ SDValue LHS = MulOp->getOperand(0);
+ if (LHS->getOpcode() == ISD::ZERO_EXTEND)
+ WordVT = LHS->getOperand(0).getValueType();
+ else if (LHS->getOpcode() == ISD::AssertZext)
+ WordVT = cast<VTSDNode>(LHS->getOperand(1))->getVT();
+ else
+ return;
+ // Find a replicating constant, e.g. 0x00010001.
+ if (auto *C = dyn_cast<ConstantSDNode>(MulOp->getOperand(1))) {
+ SystemZVectorConstantInfo VCI(
+ APInt(MulVT.getSizeInBits(), C->getZExtValue()));
+ if (VCI.isVectorConstantLegal(Subtarget) &&
+ VCI.Opcode == SystemZISD::REPLICATE && VCI.OpVals[0] == 1 &&
+ WordVT == VCI.VecVT.getScalarType())
+ Word = DAG.getZExtOrTrunc(LHS->getOperand(0), SDLoc(SN), WordVT);
+ }
+ }
+ };
+
+ if (isa<BuildVectorSDNode>(Op1) &&
+ DAG.isSplatValue(Op1, true/*AllowUndefs*/)) {
+ SDValue SplatVal = Op1->getOperand(0);
+ if (auto *C = dyn_cast<ConstantSDNode>(SplatVal))
+ FindReplicatedImm(C, SplatVal.getValueType().getStoreSize());
+ else
+ FindReplicatedReg(SplatVal);
+ } else {
+ if (auto *C = dyn_cast<ConstantSDNode>(Op1))
+ FindReplicatedImm(C, MemVT.getStoreSize());
+ else
+ FindReplicatedReg(Op1);
+ }
+
+ if (Word != SDValue()) {
+ assert(MemVT.getSizeInBits() % WordVT.getSizeInBits() == 0 &&
+ "Bad type handling");
+ unsigned NumElts = MemVT.getSizeInBits() / WordVT.getSizeInBits();
+ EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), WordVT, NumElts);
+ SDValue SplatVal = DAG.getSplatVector(SplatVT, SDLoc(SN), Word);
+ return DAG.getStore(SN->getChain(), SDLoc(SN), SplatVal,
+ SN->getBasePtr(), SN->getMemOperand());
+ }
+ }
+
return SDValue();
}
@@ -6442,22 +6754,26 @@ SDValue SystemZTargetLowering::combineINT_TO_FP(
SDNode *N, DAGCombinerInfo &DCI) const {
if (DCI.Level != BeforeLegalizeTypes)
return SDValue();
+ SelectionDAG &DAG = DCI.DAG;
+ LLVMContext &Ctx = *DAG.getContext();
unsigned Opcode = N->getOpcode();
EVT OutVT = N->getValueType(0);
- SelectionDAG &DAG = DCI.DAG;
+ Type *OutLLVMTy = OutVT.getTypeForEVT(Ctx);
SDValue Op = N->getOperand(0);
- unsigned OutScalarBits = OutVT.getScalarSizeInBits();
+ unsigned OutScalarBits = OutLLVMTy->getScalarSizeInBits();
unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits();
// Insert an extension before type-legalization to avoid scalarization, e.g.:
// v2f64 = uint_to_fp v2i16
// =>
// v2f64 = uint_to_fp (v2i64 zero_extend v2i16)
- if (OutVT.isVector() && OutScalarBits > InScalarBits) {
- MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(OutVT.getScalarSizeInBits()),
- OutVT.getVectorNumElements());
+ if (OutLLVMTy->isVectorTy() && OutScalarBits > InScalarBits &&
+ OutScalarBits <= 64) {
+ unsigned NumElts = cast<FixedVectorType>(OutLLVMTy)->getNumElements();
+ EVT ExtVT = EVT::getVectorVT(
+ Ctx, EVT::getIntegerVT(Ctx, OutLLVMTy->getScalarSizeInBits()), NumElts);
unsigned ExtOpcode =
- (Opcode == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND);
+ (Opcode == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND);
SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op);
return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp);
}
@@ -7271,8 +7587,7 @@ MachineBasicBlock *
SystemZTargetLowering::emitSelect(MachineInstr &MI,
MachineBasicBlock *MBB) const {
assert(isSelectPseudo(MI) && "Bad call to emitSelect()");
- const SystemZInstrInfo *TII =
- static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+ const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
unsigned CCValid = MI.getOperand(3).getImm();
unsigned CCMask = MI.getOperand(4).getImm();
@@ -7368,8 +7683,7 @@ MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
unsigned StoreOpcode,
unsigned STOCOpcode,
bool Invert) const {
- const SystemZInstrInfo *TII =
- static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+ const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
Register SrcReg = MI.getOperand(0).getReg();
MachineOperand Base = MI.getOperand(1);
@@ -7460,8 +7774,7 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode,
unsigned BitSize, bool Invert) const {
MachineFunction &MF = *MBB->getParent();
- const SystemZInstrInfo *TII =
- static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+ const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
bool IsSubWord = (BitSize < 32);
@@ -7579,8 +7892,7 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode,
unsigned KeepOldMask, unsigned BitSize) const {
MachineFunction &MF = *MBB->getParent();
- const SystemZInstrInfo *TII =
- static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+ const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
bool IsSubWord = (BitSize < 32);
@@ -7693,8 +8005,7 @@ MachineBasicBlock *
SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
MachineBasicBlock *MBB) const {
MachineFunction &MF = *MBB->getParent();
- const SystemZInstrInfo *TII =
- static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+ const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
// Extract the operands. Base can be a register or a frame index.
@@ -7810,8 +8121,7 @@ MachineBasicBlock *
SystemZTargetLowering::emitPair128(MachineInstr &MI,
MachineBasicBlock *MBB) const {
MachineFunction &MF = *MBB->getParent();
- const SystemZInstrInfo *TII =
- static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+ const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
DebugLoc DL = MI.getDebugLoc();
@@ -7838,8 +8148,7 @@ MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,
MachineBasicBlock *MBB,
bool ClearEven) const {
MachineFunction &MF = *MBB->getParent();
- const SystemZInstrInfo *TII =
- static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+ const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
DebugLoc DL = MI.getDebugLoc();
@@ -7870,8 +8179,7 @@ SystemZTargetLowering::emitMemMemWrapper(MachineInstr &MI,
MachineBasicBlock *MBB,
unsigned Opcode, bool IsMemset) const {
MachineFunction &MF = *MBB->getParent();
- const SystemZInstrInfo *TII =
- static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+ const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
DebugLoc DL = MI.getDebugLoc();
@@ -8225,8 +8533,7 @@ SystemZTargetLowering::emitMemMemWrapper(MachineInstr &MI,
MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(
MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
MachineFunction &MF = *MBB->getParent();
- const SystemZInstrInfo *TII =
- static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+ const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
DebugLoc DL = MI.getDebugLoc();
@@ -8331,8 +8638,7 @@ MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
MachineFunction &MF = *MBB->getParent();
MachineRegisterInfo *MRI = &MF.getRegInfo();
- const SystemZInstrInfo *TII =
- static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+ const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
DebugLoc DL = MI.getDebugLoc();
Register SrcReg = MI.getOperand(0).getReg();
@@ -8355,8 +8661,7 @@ MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca(
MachineInstr &MI, MachineBasicBlock *MBB) const {
MachineFunction &MF = *MBB->getParent();
MachineRegisterInfo *MRI = &MF.getRegInfo();
- const SystemZInstrInfo *TII =
- static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+ const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
DebugLoc DL = MI.getDebugLoc();
const unsigned ProbeSize = getStackProbeSize(MF);
Register DstReg = MI.getOperand(0).getReg();
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index de446f33f5f1..b9c95274f62b 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -419,8 +419,7 @@ public:
getNumRegisters(LLVMContext &Context, EVT VT,
Optional<MVT> RegisterVT) const override {
// i128 inline assembly operand.
- if (VT == MVT::i128 &&
- RegisterVT.hasValue() && RegisterVT.getValue() == MVT::Untyped)
+ if (VT == MVT::i128 && RegisterVT && *RegisterVT == MVT::Untyped)
return 1;
return TargetLowering::getNumRegisters(Context, VT);
}
@@ -457,6 +456,12 @@ public:
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment,
MachineMemOperand::Flags Flags,
bool *Fast) const override;
+ bool
+ findOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit,
+ const MemOp &Op, unsigned DstAS, unsigned SrcAS,
+ const AttributeList &FuncAttributes) const override;
+ EVT getOptimalMemOpType(const MemOp &Op,
+ const AttributeList &FuncAttributes) const override;
bool isTruncateFree(Type *, Type *) const override;
bool isTruncateFree(EVT, EVT) const override;
@@ -467,6 +472,8 @@ public:
return VT == MVT::i32 || VT == MVT::i64;
}
+ bool shouldConsiderGEPOffsetSplit() const override { return true; }
+
const char *getTargetNodeName(unsigned Opcode) const override;
std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
@@ -497,6 +504,19 @@ public:
case 'T':
return InlineAsm::Constraint_T;
}
+ } else if (ConstraintCode.size() == 2 && ConstraintCode[0] == 'Z') {
+ switch (ConstraintCode[1]) {
+ default:
+ break;
+ case 'Q':
+ return InlineAsm::Constraint_ZQ;
+ case 'R':
+ return InlineAsm::Constraint_ZR;
+ case 'S':
+ return InlineAsm::Constraint_ZS;
+ case 'T':
+ return InlineAsm::Constraint_ZT;
+ }
}
return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
}
@@ -553,6 +573,12 @@ public:
SDValue LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
+ std::pair<SDValue, SDValue>
+ makeExternalCall(SDValue Chain, SelectionDAG &DAG, const char *CalleeName,
+ EVT RetVT, ArrayRef<SDValue> Ops, CallingConv::ID CallConv,
+ bool IsSigned, SDLoc DL, bool DoesNotReturn,
+ bool IsReturnValueUsed) const;
+
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
@@ -622,8 +648,12 @@ private:
SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVASTART_ELF(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVASTART_XPLINK(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerDYNAMIC_STACKALLOC_ELF(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
@@ -657,6 +687,7 @@ private:
SDValue lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const;
+ SDValue lowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const;
bool canTreatAsByteVector(EVT VT) const;
SDValue combineExtract(const SDLoc &DL, EVT ElemVT, EVT VecVT, SDValue OrigOp,
@@ -743,12 +774,15 @@ private:
APInt SplatUndef; // Bits correspoding to undef operands of the BVN.
unsigned SplatBitSize = 0;
bool isFP128 = false;
-
public:
unsigned Opcode = 0;
SmallVector<unsigned, 2> OpVals;
MVT VecVT;
- SystemZVectorConstantInfo(APFloat FPImm);
+ SystemZVectorConstantInfo(APInt IntImm);
+ SystemZVectorConstantInfo(APFloat FPImm)
+ : SystemZVectorConstantInfo(FPImm.bitcastToAPInt()) {
+ isFP128 = (&FPImm.getSemantics() == &APFloat::IEEEquad());
+ }
SystemZVectorConstantInfo(BuildVectorSDNode *BVN);
bool isVectorConstantLegal(const SystemZSubtarget &Subtarget);
};
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 4b6aa60f5d55..1436be1e4052 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -30,6 +31,7 @@
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/BranchProbability.h"
@@ -119,9 +121,11 @@ void SystemZInstrInfo::splitAdjDynAlloc(MachineBasicBlock::iterator MI) const {
MachineFunction &MF = *MBB->getParent();
MachineFrameInfo &MFFrame = MF.getFrameInfo();
MachineOperand &OffsetMO = MI->getOperand(2);
+ SystemZCallingConventionRegisters *Regs = STI.getSpecialRegisters();
uint64_t Offset = (MFFrame.getMaxCallFrameSize() +
- SystemZMC::ELFCallFrameSize +
+ Regs->getCallFrameSize() +
+ Regs->getStackPointerBias() +
OffsetMO.getImm());
unsigned NewOpcode = getOpcodeForOffset(SystemZ::LA, Offset);
assert(NewOpcode && "No support for huge argument lists yet");
@@ -393,8 +397,7 @@ bool SystemZInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
}
// If the block has any instructions after a JMP, delete them.
- while (std::next(I) != MBB.end())
- std::next(I)->eraseFromParent();
+ MBB.erase(std::next(I), MBB.end());
Cond.clear();
FBB = nullptr;
@@ -674,6 +677,7 @@ bool SystemZInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
bool SystemZInstrInfo::isPredicable(const MachineInstr &MI) const {
unsigned Opcode = MI.getOpcode();
if (Opcode == SystemZ::Return ||
+ Opcode == SystemZ::Return_XPLINK ||
Opcode == SystemZ::Trap ||
Opcode == SystemZ::CallJG ||
Opcode == SystemZ::CallBR)
@@ -731,18 +735,20 @@ bool SystemZInstrInfo::PredicateInstruction(
.addReg(SystemZ::CC, RegState::Implicit);
return true;
}
- if (Opcode == SystemZ::Return) {
- MI.setDesc(get(SystemZ::CondReturn));
+ if (Opcode == SystemZ::Return || Opcode == SystemZ::Return_XPLINK) {
+ MI.setDesc(get(Opcode == SystemZ::Return ? SystemZ::CondReturn
+ : SystemZ::CondReturn_XPLINK));
MachineInstrBuilder(*MI.getParent()->getParent(), MI)
- .addImm(CCValid).addImm(CCMask)
- .addReg(SystemZ::CC, RegState::Implicit);
+ .addImm(CCValid)
+ .addImm(CCMask)
+ .addReg(SystemZ::CC, RegState::Implicit);
return true;
}
if (Opcode == SystemZ::CallJG) {
MachineOperand FirstOp = MI.getOperand(0);
const uint32_t *RegMask = MI.getOperand(1).getRegMask();
- MI.RemoveOperand(1);
- MI.RemoveOperand(0);
+ MI.removeOperand(1);
+ MI.removeOperand(0);
MI.setDesc(get(SystemZ::CallBRCL));
MachineInstrBuilder(*MI.getParent()->getParent(), MI)
.addImm(CCValid)
@@ -755,8 +761,8 @@ bool SystemZInstrInfo::PredicateInstruction(
if (Opcode == SystemZ::CallBR) {
MachineOperand Target = MI.getOperand(0);
const uint32_t *RegMask = MI.getOperand(1).getRegMask();
- MI.RemoveOperand(1);
- MI.RemoveOperand(0);
+ MI.removeOperand(1);
+ MI.removeOperand(0);
MI.setDesc(get(SystemZ::CallBCR));
MachineInstrBuilder(*MI.getParent()->getParent(), MI)
.addImm(CCValid).addImm(CCMask)
@@ -1626,7 +1632,8 @@ void SystemZInstrInfo::getLoadStoreOpcodes(const TargetRegisterClass *RC,
}
unsigned SystemZInstrInfo::getOpcodeForOffset(unsigned Opcode,
- int64_t Offset) const {
+ int64_t Offset,
+ const MachineInstr *MI) const {
const MCInstrDesc &MCID = get(Opcode);
int64_t Offset2 = (MCID.TSFlags & SystemZII::Is128Bit ? Offset + 8 : Offset);
if (isUInt<12>(Offset) && isUInt<12>(Offset2)) {
@@ -1648,6 +1655,24 @@ unsigned SystemZInstrInfo::getOpcodeForOffset(unsigned Opcode,
// Check whether Opcode allows signed 20-bit displacements.
if (MCID.TSFlags & SystemZII::Has20BitOffset)
return Opcode;
+
+ // If a VR32/VR64 reg ended up in an FP register, use the FP opcode.
+ if (MI && MI->getOperand(0).isReg()) {
+ Register Reg = MI->getOperand(0).getReg();
+ if (Reg.isPhysical() && SystemZMC::getFirstReg(Reg) < 16) {
+ switch (Opcode) {
+ case SystemZ::VL32:
+ return SystemZ::LEY;
+ case SystemZ::VST32:
+ return SystemZ::STEY;
+ case SystemZ::VL64:
+ return SystemZ::LDY;
+ case SystemZ::VST64:
+ return SystemZ::STDY;
+ default: break;
+ }
+ }
+ }
}
return 0;
}
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
index 9e5b2729a707..0525f5827736 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -47,8 +47,7 @@ enum {
CCMaskFirst = (1 << 18),
CCMaskLast = (1 << 19),
IsLogical = (1 << 20),
- CCIfNoSignedWrap = (1 << 21),
- MemMemOp = (1 << 22)
+ CCIfNoSignedWrap = (1 << 21)
};
static inline unsigned getAccessSize(unsigned int Flags) {
@@ -309,8 +308,10 @@ public:
// and the caller wants to perform that instruction's operation on an
// address that has displacement Offset. Return the opcode of a suitable
// instruction (which might be Opcode itself) or 0 if no such instruction
- // exists.
- unsigned getOpcodeForOffset(unsigned Opcode, int64_t Offset) const;
+ // exists. MI may be passed in order to allow examination of physical
+ // register operands (i.e. if a VR32/64 reg ended up as an FP or Vector reg).
+ unsigned getOpcodeForOffset(unsigned Opcode, int64_t Offset,
+ const MachineInstr *MI = nullptr) const;
// Return true if Opcode has a mapping in 12 <-> 20 bit displacements.
bool hasDisplacementPairInsn(unsigned Opcode) const;
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index 84f1e0fb428c..ed7e3c02a10d 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -42,6 +42,10 @@ let Defs = [R1D, R15D, CC], Uses = [R15D], hasNoSchedulingInfo = 1,
hasSideEffects = 1 in
def PROBED_STACKALLOC : Pseudo<(outs), (ins i64imm:$stacksize), []>;
+let Defs = [R3D, CC], Uses = [R3D, R4D], hasNoSchedulingInfo = 1,
+ hasSideEffects = 1 in
+ def XPLINK_STACKALLOC : Pseudo<(outs), (ins), []>;
+
//===----------------------------------------------------------------------===//
// Branch instructions
//===----------------------------------------------------------------------===//
@@ -285,6 +289,10 @@ let Predicates = [IsTargetXPLINK64] in {
def CallBASR_XPLINK64 : Alias<4, (outs), (ins ADDR64:$R2, variable_ops),
[(z_call ADDR64:$R2)]>;
}
+
+ let isCall = 1, Defs = [R3D, CC], Uses = [FPC] in {
+ def CallBASR_STACKEXT : Alias<4, (outs), (ins ADDR64:$R2), []>;
+ }
}
// Regular calls.
@@ -336,13 +344,25 @@ let isCall = 1, isTerminator = 1, isReturn = 1 in {
def CLGIBCall : Alias<6, (outs), (ins GR64:$R1, imm64zx8:$I2, cond4:$M3, ADDR64:$R4), []>;
}
-// A return instruction (br %r14) for ELF and (b 2 %r7) for XPLink.
-let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
- def Return : Alias<2, (outs), (ins), [(z_retflag)]>;
+let Predicates = [IsTargetXPLINK64] in {
+ // A return instruction (b 2(%r7)).
+ let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
+ def Return_XPLINK : Alias<4, (outs), (ins), [(z_retflag)]>;
+
+ // A conditional return instruction (bc <cond>, 2(%r7)).
+ let isReturn = 1, isTerminator = 1, hasCtrlDep = 1, CCMaskFirst = 1, Uses = [CC] in
+ def CondReturn_XPLINK : Alias<4, (outs), (ins cond4:$valid, cond4:$R1), []>;
+}
+
+let Predicates = [IsTargetELF] in {
+ // A return instruction (br %r14).
+ let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
+ def Return : Alias<2, (outs), (ins), [(z_retflag)]>;
-// A conditional return instruction (bcr <cond>, %r14).
-let isReturn = 1, isTerminator = 1, hasCtrlDep = 1, CCMaskFirst = 1, Uses = [CC] in
- def CondReturn : Alias<2, (outs), (ins cond4:$valid, cond4:$R1), []>;
+ // A conditional return instruction (bcr <cond>, %r14).
+ let isReturn = 1, isTerminator = 1, hasCtrlDep = 1, CCMaskFirst = 1, Uses = [CC] in
+ def CondReturn : Alias<2, (outs), (ins cond4:$valid, cond4:$R1), []>;
+}
// Fused compare and conditional returns.
let isReturn = 1, isTerminator = 1, hasCtrlDep = 1 in {
diff --git a/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp b/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp
index d6c795985448..1e6f971906e9 100644
--- a/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp
@@ -66,7 +66,7 @@ bool SystemZLDCleanup::runOnMachineFunction(MachineFunction &F) {
if (skipFunction(F.getFunction()))
return false;
- TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo());
+ TII = F.getSubtarget<SystemZSubtarget>().getInstrInfo();
MF = &F;
SystemZMachineFunctionInfo* MFI = F.getInfo<SystemZMachineFunctionInfo>();
diff --git a/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp b/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp
index 9b6aa3593ce0..cada880a82d8 100644
--- a/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp
@@ -14,3 +14,9 @@ using namespace llvm;
// pin vtable to this file
void SystemZMachineFunctionInfo::anchor() {}
+MachineFunctionInfo *SystemZMachineFunctionInfo::clone(
+ BumpPtrAllocator &Allocator, MachineFunction &DestMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
+ const {
+ return DestMF.cloneInfo<SystemZMachineFunctionInfo>(*this);
+}
diff --git a/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
index ec4b812eb0e1..de73a5d86422 100644
--- a/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
@@ -41,6 +41,11 @@ public:
: VarArgsFirstGPR(0), VarArgsFirstFPR(0), VarArgsFrameIndex(0),
RegSaveFrameIndex(0), FramePointerSaveIndex(0), NumLocalDynamics(0) {}
+ MachineFunctionInfo *
+ clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
+ const override;
+
// Get and set the first and last call-saved GPR that should be saved by
// this function and the SP offset for the STMG. These are 0 if no GPRs
// need to be saved or restored.
diff --git a/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp b/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp
index 5a2cfc53da49..e15f9027cc20 100644
--- a/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp
@@ -17,6 +17,7 @@
#include "SystemZInstrInfo.h"
#include "SystemZSubtarget.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
using namespace llvm;
@@ -253,7 +254,7 @@ bool SystemZPostRewrite::selectMBB(MachineBasicBlock &MBB) {
}
bool SystemZPostRewrite::runOnMachineFunction(MachineFunction &MF) {
- TII = static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ TII = MF.getSubtarget<SystemZSubtarget>().getInstrInfo();
bool Modified = false;
for (auto &MBB : MF)
diff --git a/llvm/lib/Target/SystemZ/SystemZProcessors.td b/llvm/lib/Target/SystemZ/SystemZProcessors.td
index 4fceaa14c598..d00b94d00242 100644
--- a/llvm/lib/Target/SystemZ/SystemZProcessors.td
+++ b/llvm/lib/Target/SystemZ/SystemZProcessors.td
@@ -38,5 +38,6 @@ def : ProcessorModel<"z14", Z14Model, Arch12SupportedFeatures.List>;
def : ProcessorModel<"arch13", Z15Model, Arch13SupportedFeatures.List>;
def : ProcessorModel<"z15", Z15Model, Arch13SupportedFeatures.List>;
-def : ProcessorModel<"arch14", Z15Model, Arch14SupportedFeatures.List>;
+def : ProcessorModel<"arch14", Z16Model, Arch14SupportedFeatures.List>;
+def : ProcessorModel<"z16", Z16Model, Arch14SupportedFeatures.List>;
diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index 48cec176b006..be65fe55c634 100644
--- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -290,8 +290,7 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
MachineBasicBlock &MBB = *MI->getParent();
MachineFunction &MF = *MBB.getParent();
- auto *TII =
- static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ auto *TII = MF.getSubtarget<SystemZSubtarget>().getInstrInfo();
const SystemZFrameLowering *TFI = getFrameLowering(MF);
DebugLoc DL = MI->getDebugLoc();
@@ -321,7 +320,7 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
// See if the offset is in range, or if an equivalent instruction that
// accepts the offset exists.
unsigned Opcode = MI->getOpcode();
- unsigned OpcodeForOffset = TII->getOpcodeForOffset(Opcode, Offset);
+ unsigned OpcodeForOffset = TII->getOpcodeForOffset(Opcode, Offset, &*MI);
if (OpcodeForOffset) {
if (OpcodeForOffset == SystemZ::LE &&
MF.getSubtarget<SystemZSubtarget>().hasVector()) {
diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
index 8ce01074873a..93ffa9847f06 100644
--- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -74,7 +74,7 @@ public:
/// Destroys the object. Bogus destructor allowing derived classes
/// to override it.
- virtual ~SystemZCallingConventionRegisters(){};
+ virtual ~SystemZCallingConventionRegisters() = default;
};
/// XPLINK64 calling convention specific use registers
@@ -102,7 +102,7 @@ public:
int getStackPointerBias() override final { return 2048; }
/// Destroys the object. Bogus destructor overriding base class destructor
- ~SystemZXPLINK64Registers(){};
+ ~SystemZXPLINK64Registers() = default;
};
/// ELF calling convention specific use registers
@@ -128,7 +128,7 @@ public:
int getStackPointerBias() override final { return 0; }
/// Destroys the object. Bogus destructor overriding base class destructor
- ~SystemZELFRegisters(){};
+ ~SystemZELFRegisters() = default;
};
struct SystemZRegisterInfo : public SystemZGenRegisterInfo {
diff --git a/llvm/lib/Target/SystemZ/SystemZSchedule.td b/llvm/lib/Target/SystemZ/SystemZSchedule.td
index 119e3ee7c22c..d683cc042e5c 100644
--- a/llvm/lib/Target/SystemZ/SystemZSchedule.td
+++ b/llvm/lib/Target/SystemZ/SystemZSchedule.td
@@ -53,12 +53,14 @@ foreach Num = ["", "2", "3", "4", "5", "6"] in {
def "DFU"#Num : SchedWrite;
}
-def VecFPd : SchedWrite; // Blocking BFP div/sqrt unit.
+def VecFPd : SchedWrite; // Blocking BFP div/sqrt unit (30 cycles).
+def VecFPd20 : SchedWrite; // Blocking BFP div/sqrt unit, 20 cycles.
def VBU : SchedWrite; // Virtual branching unit
def MCD : SchedWrite; // Millicode
+include "SystemZScheduleZ16.td"
include "SystemZScheduleZ15.td"
include "SystemZScheduleZ14.td"
include "SystemZScheduleZ13.td"
diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td
index f4777b0097f1..fd01a8a941c9 100644
--- a/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td
+++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td
@@ -168,12 +168,12 @@ def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "CL(G)?T(Asm.*)?$")>;
// Call
def : InstRW<[WLat1, VBU, FXa2, GroupAlone], (instregex "(Call)?BRAS$")>;
def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BRASL(_XPLINK64)?$")>;
-def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64)?$")>;
+def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64|_STACKEXT)?$")>;
def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "TLS_(G|L)DCALL$")>;
// Return
-def : InstRW<[WLat1, FXb, EndGroup], (instregex "Return$")>;
-def : InstRW<[WLat1, FXb, NormalGr], (instregex "CondReturn$")>;
+def : InstRW<[WLat1, FXb, EndGroup], (instregex "Return(_XPLINK)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CondReturn(_XPLINK)?$")>;
//===----------------------------------------------------------------------===//
// Move instructions
diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td
index f74c0d594482..3f406736a71f 100644
--- a/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td
+++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td
@@ -169,12 +169,12 @@ def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "CL(G)?T(Asm.*)?$")>;
// Call
def : InstRW<[WLat1, VBU, FXa2, GroupAlone], (instregex "(Call)?BRAS$")>;
def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BRASL(_XPLINK64)?$")>;
-def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64)?$")>;
+def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64|_STACKEXT)?$")>;
def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "TLS_(G|L)DCALL$")>;
// Return
-def : InstRW<[WLat1, FXb, EndGroup], (instregex "Return$")>;
-def : InstRW<[WLat1, FXb, NormalGr], (instregex "CondReturn$")>;
+def : InstRW<[WLat1, FXb, EndGroup], (instregex "Return(_XPLINK)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CondReturn(_XPLINK)?$")>;
//===----------------------------------------------------------------------===//
// Move instructions
diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td
index d17e58fc6318..6ae911c3f3eb 100644
--- a/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td
+++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td
@@ -169,12 +169,12 @@ def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "CL(G)?T(Asm.*)?$")>;
// Call
def : InstRW<[WLat1, VBU, FXa2, GroupAlone], (instregex "(Call)?BRAS$")>;
def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BRASL(_XPLINK64)?$")>;
-def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64)?$")>;
+def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64|_STACKEXT)?$")>;
def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "TLS_(G|L)DCALL$")>;
// Return
-def : InstRW<[WLat1, FXb, EndGroup], (instregex "Return$")>;
-def : InstRW<[WLat1, FXb, NormalGr], (instregex "CondReturn$")>;
+def : InstRW<[WLat1, FXb, EndGroup], (instregex "Return(_XPLINK)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CondReturn(_XPLINK)?$")>;
//===----------------------------------------------------------------------===//
// Move instructions
diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td
new file mode 100644
index 000000000000..ca688671a7e2
--- /dev/null
+++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ16.td
@@ -0,0 +1,1728 @@
+//-- SystemZScheduleZ16.td - SystemZ Scheduling Definitions ----*- tblgen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for Z16 to support instruction
+// scheduling and other instruction cost heuristics.
+//
+// Pseudos expanded right after isel do not need to be modelled here.
+//
+//===----------------------------------------------------------------------===//
+
+def Z16Model : SchedMachineModel {
+
+ let UnsupportedFeatures = Arch14UnsupportedFeatures.List;
+
+ let IssueWidth = 6; // Number of instructions decoded per cycle.
+ let MicroOpBufferSize = 60; // Issue queues
+ let LoadLatency = 1; // Optimistic load latency.
+
+ let PostRAScheduler = 1;
+
+ // Extra cycles for a mispredicted branch.
+ let MispredictPenalty = 20;
+}
+
+let SchedModel = Z16Model in {
+// These definitions need the SchedModel value. They could be put in a
+// subtarget common include file, but it seems the include system in Tablegen
+// currently (2016) rejects multiple includes of same file.
+
+// Decoder grouping rules
+let NumMicroOps = 1 in {
+ def : WriteRes<NormalGr, []>;
+ def : WriteRes<BeginGroup, []> { let BeginGroup = 1; }
+ def : WriteRes<EndGroup, []> { let EndGroup = 1; }
+}
+def : WriteRes<Cracked, []> {
+ let NumMicroOps = 2;
+ let BeginGroup = 1;
+}
+def : WriteRes<GroupAlone, []> {
+ let NumMicroOps = 3;
+ let BeginGroup = 1;
+ let EndGroup = 1;
+}
+def : WriteRes<GroupAlone2, []> {
+ let NumMicroOps = 6;
+ let BeginGroup = 1;
+ let EndGroup = 1;
+}
+def : WriteRes<GroupAlone3, []> {
+ let NumMicroOps = 9;
+ let BeginGroup = 1;
+ let EndGroup = 1;
+}
+
+// Incoming latency removed from the register operand which is used together
+// with a memory operand by the instruction.
+def : ReadAdvance<RegReadAdv, 4>;
+
+// LoadLatency (above) is not used for instructions in this file. This is
+// instead the role of LSULatency, which is the latency value added to the
+// result of loads and instructions with folded memory operands.
+def : WriteRes<LSULatency, []> { let Latency = 4; let NumMicroOps = 0; }
+
+let NumMicroOps = 0 in {
+ foreach L = 1-30 in
+ def : WriteRes<!cast<SchedWrite>("WLat"#L), []> { let Latency = L; }
+}
+
+// Execution units.
+def Z16_FXaUnit : ProcResource<2>;
+def Z16_FXbUnit : ProcResource<2>;
+def Z16_LSUnit : ProcResource<2>;
+def Z16_VecUnit : ProcResource<2>;
+def Z16_VecFPdUnit : ProcResource<2> { let BufferSize = 1; /* blocking */ }
+def Z16_VBUnit : ProcResource<2>;
+def Z16_MCD : ProcResource<1>;
+
+// Subtarget specific definitions of scheduling resources.
+let NumMicroOps = 0 in {
+ def : WriteRes<FXa, [Z16_FXaUnit]>;
+ def : WriteRes<FXb, [Z16_FXbUnit]>;
+ def : WriteRes<LSU, [Z16_LSUnit]>;
+ def : WriteRes<VecBF, [Z16_VecUnit]>;
+ def : WriteRes<VecDF, [Z16_VecUnit]>;
+ def : WriteRes<VecDFX, [Z16_VecUnit]>;
+ def : WriteRes<VecMul, [Z16_VecUnit]>;
+ def : WriteRes<VecStr, [Z16_VecUnit]>;
+ def : WriteRes<VecXsPm, [Z16_VecUnit]>;
+ foreach Num = 2-5 in { let ResourceCycles = [Num] in {
+ def : WriteRes<!cast<SchedWrite>("FXa"#Num), [Z16_FXaUnit]>;
+ def : WriteRes<!cast<SchedWrite>("FXb"#Num), [Z16_FXbUnit]>;
+ def : WriteRes<!cast<SchedWrite>("LSU"#Num), [Z16_LSUnit]>;
+ def : WriteRes<!cast<SchedWrite>("VecBF"#Num), [Z16_VecUnit]>;
+ def : WriteRes<!cast<SchedWrite>("VecDF"#Num), [Z16_VecUnit]>;
+ def : WriteRes<!cast<SchedWrite>("VecDFX"#Num), [Z16_VecUnit]>;
+ def : WriteRes<!cast<SchedWrite>("VecMul"#Num), [Z16_VecUnit]>;
+ def : WriteRes<!cast<SchedWrite>("VecStr"#Num), [Z16_VecUnit]>;
+ def : WriteRes<!cast<SchedWrite>("VecXsPm"#Num), [Z16_VecUnit]>;
+ }}
+
+ def : WriteRes<VecFPd, [Z16_VecFPdUnit]> { let ResourceCycles = [30]; }
+ def : WriteRes<VecFPd20, [Z16_VecFPdUnit]> { let ResourceCycles = [20]; }
+
+ def : WriteRes<VBU, [Z16_VBUnit]>; // Virtual Branching Unit
+}
+
+def : WriteRes<MCD, [Z16_MCD]> { let NumMicroOps = 3;
+ let BeginGroup = 1;
+ let EndGroup = 1; }
+
+// -------------------------- INSTRUCTIONS ---------------------------------- //
+
+// InstRW constructs have been used in order to preserve the
+// readability of the InstrInfo files.
+
+// For each instruction, as matched by a regexp, provide a list of
+// resources that it needs. These will be combined into a SchedClass.
+
+//===----------------------------------------------------------------------===//
+// Stack allocation
+//===----------------------------------------------------------------------===//
+
+// Pseudo -> LA / LAY
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "ADJDYNALLOC$")>;
+
+//===----------------------------------------------------------------------===//
+// Branch instructions
+//===----------------------------------------------------------------------===//
+
+// Branch
+def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Call)?BRC(L)?(Asm.*)?$")>;
+def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Call)?J(G)?(Asm.*)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "(Call)?BC(R)?(Asm.*)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "(Call)?B(R)?(Asm.*)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "BI(C)?(Asm.*)?$")>;
+def : InstRW<[WLat1, FXa, EndGroup], (instregex "BRCT(G)?$")>;
+def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BRCTH$")>;
+def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BCT(G)?(R)?$")>;
+def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "B(R)?X(H|L).*$")>;
+
+// Compare and branch
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?(G)?(I|R)J(Asm.*)?$")>;
+def : InstRW<[WLat1, FXb2, GroupAlone],
+ (instregex "C(L)?(G)?(I|R)B(Call|Return|Asm.*)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Trap instructions
+//===----------------------------------------------------------------------===//
+
+// Trap
+def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Cond)?Trap$")>;
+
+// Compare and trap
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(G)?(I|R)T(Asm.*)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CL(G)?RT(Asm.*)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CL(F|G)IT(Asm.*)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "CL(G)?T(Asm.*)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Call and return instructions
+//===----------------------------------------------------------------------===//
+
+// Call
+def : InstRW<[WLat1, VBU, FXa2, GroupAlone], (instregex "(Call)?BRAS$")>;
+def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BRASL(_XPLINK64)?$")>;
+def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64|_STACKEXT)?$")>;
+def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "TLS_(G|L)DCALL$")>;
+
+// Return
+def : InstRW<[WLat1, FXb, EndGroup], (instregex "Return(_XPLINK)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CondReturn(_XPLINK)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Move instructions
+//===----------------------------------------------------------------------===//
+
+// Moves
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "MV(G|H)?HI$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "MVI(Y)?$")>;
+
+// Move character
+def : InstRW<[WLat1, FXb, LSU3, GroupAlone], (instregex "MVC$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "MVCL(E|U)?$")>;
+def : InstRW<[WLat1, LSU2, GroupAlone], (instregex "MVCRL$")>;
+
+// Pseudo -> reg move
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "COPY(_TO_REGCLASS)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "EXTRACT_SUBREG$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "INSERT_SUBREG$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "REG_SEQUENCE$")>;
+
+// Loads
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "L(Y|FH|RL|Mux)?$")>;
+def : InstRW<[LSULatency, LSULatency, LSU, NormalGr], (instregex "LCBB$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LG(RL)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "L128$")>;
+
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLIH(F|H|L)$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLIL(F|H|L)$")>;
+
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LG(F|H)I$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LHI(Mux)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LR$")>;
+
+// Load and zero rightmost byte
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LZR(F|G)$")>;
+
+// Load and trap
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "L(FH|G)?AT$")>;
+
+// Load and test
+def : InstRW<[WLat1LSU, WLat1LSU, LSU, FXa, NormalGr], (instregex "LT(G)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LT(G)?R$")>;
+
+// Stores
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STG(RL)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST128$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST(Y|FH|RL|Mux)?$")>;
+
+// String moves.
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "MVST$")>;
+
+//===----------------------------------------------------------------------===//
+// Conditional move instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOCRMux$")>;
+def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOC(G|FH)?R(Asm.*)?$")>;
+def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOC(G|H)?HI(Mux|(Asm.*))?$")>;
+def : InstRW<[WLat2LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "LOC(G|FH|Mux)?(Asm.*)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr],
+ (instregex "STOC(G|FH|Mux)?(Asm.*)?$")>;
+
+def : InstRW<[WLat2, FXa, NormalGr], (instregex "SELRMux$")>;
+def : InstRW<[WLat2, FXa, NormalGr], (instregex "SEL(G|FH)?R(Asm.*)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Sign extensions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "L(B|H|G)R$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LG(B|H|F)R$")>;
+
+def : InstRW<[WLat1LSU, WLat1LSU, FXa, LSU, NormalGr], (instregex "LTGF$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LTGFR$")>;
+
+def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LB(H|Mux)?$")>;
+def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LH(Y)?$")>;
+def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LH(H|Mux|RL)$")>;
+def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LG(B|H|F)$")>;
+def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LG(H|F)RL$")>;
+
+//===----------------------------------------------------------------------===//
+// Zero extensions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLCR(Mux)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLHR(Mux)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLG(C|H|F|T)R$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLC(Mux)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLH(Mux)?$")>;
+def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LL(C|H)H$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLHRL$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLG(C|H|F|T|HRL|FRL)$")>;
+
+// Load and zero rightmost byte
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLZRGF$")>;
+
+// Load and trap
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "LLG(F|T)?AT$")>;
+
+//===----------------------------------------------------------------------===//
+// Truncations
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STC(H|Y|Mux)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STH(H|Y|RL|Mux)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STCM(H|Y)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Multi-register moves
+//===----------------------------------------------------------------------===//
+
+// Load multiple (estimated average of 5 ops)
+def : InstRW<[WLat10, WLat10, LSU5, GroupAlone], (instregex "LM(H|Y|G)?$")>;
+
+// Load multiple disjoint
+def : InstRW<[WLat30, WLat30, MCD], (instregex "LMD$")>;
+
+// Store multiple
+def : InstRW<[WLat1, LSU2, FXb3, GroupAlone], (instregex "STM(G|H|Y)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Byte swaps
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LRV(G)?R$")>;
+def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LRV(G|H)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STRV(G|H)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "MVCIN$")>;
+
+//===----------------------------------------------------------------------===//
+// Load address instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LA(Y|RL)?$")>;
+
+// Load the Global Offset Table address ( -> larl )
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "GOT$")>;
+
+//===----------------------------------------------------------------------===//
+// Absolute and Negation
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, WLat1, FXa, NormalGr], (instregex "LP(G)?R$")>;
+def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "L(N|P)GFR$")>;
+def : InstRW<[WLat1, WLat1, FXa, NormalGr], (instregex "LN(R|GR)$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LC(R|GR)$")>;
+def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "LCGFR$")>;
+
+//===----------------------------------------------------------------------===//
+// Insertion
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "IC(Y)?$")>;
+def : InstRW<[WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "IC32(Y)?$")>;
+def : InstRW<[WLat1LSU, RegReadAdv, WLat1LSU, FXa, LSU, NormalGr],
+ (instregex "ICM(H|Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "II(F|H|L)Mux$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHF(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHH(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHL(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILF(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILH(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILL(64)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Addition
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "A(Y)?$")>;
+def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "AH(Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AIH$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AFI(Mux)?$")>;
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "AG$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGFI$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGHI(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AHI(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AHIMux(K)?$")>;
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "AL(Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AL(FI|HSIK)$")>;
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "ALG(F)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGHSIK$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGF(I|R)$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "A(L)?HHHR$")>;
+def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "A(L)?HHLR$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALSIH(N)?$")>;
+def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "A(L)?(G)?SI$")>;
+
+// Logical addition with carry
+def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, GroupAlone],
+ (instregex "ALC(G)?$")>;
+def : InstRW<[WLat2, WLat2, FXa, GroupAlone], (instregex "ALC(G)?R$")>;
+
+// Add with sign extension (16/32 -> 64)
+def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "AG(F|H)$")>;
+def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "AGFR$")>;
+
+//===----------------------------------------------------------------------===//
+// Subtraction
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "S(G|Y)?$")>;
+def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "SH(Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SGR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLFI$")>;
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "SL(G|GF|Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLGF(I|R)$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLGR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "S(L)?HHHR$")>;
+def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "S(L)?HHLR$")>;
+
+// Subtraction with borrow
+def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, GroupAlone],
+ (instregex "SLB(G)?$")>;
+def : InstRW<[WLat2, WLat2, FXa, GroupAlone], (instregex "SLB(G)?R$")>;
+
+// Subtraction with sign extension (16/32 -> 64)
+def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "SG(F|H)$")>;
+def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "SGFR$")>;
+
+//===----------------------------------------------------------------------===//
+// AND
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "N(G|Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NGR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NI(FMux|HMux|LMux)$")>;
+def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "NI(Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHF(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHH(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHL(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILF(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILH(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILL(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NR(K)?$")>;
+def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "NC$")>;
+
+//===----------------------------------------------------------------------===//
+// OR
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "O(G|Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OGR(K)?$")>;
+def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "OI(Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OI(FMux|HMux|LMux)$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHF(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHH(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHL(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILF(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILH(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILL(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OR(K)?$")>;
+def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "OC$")>;
+
+//===----------------------------------------------------------------------===//
+// XOR
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "X(G|Y)?$")>;
+def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "XI(Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "XIFMux$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "XGR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "XIHF(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "XILF(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "XR(K)?$")>;
+def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "XC$")>;
+
+//===----------------------------------------------------------------------===//
+// Combined logical operations
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NC(G)?RK$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OC(G)?RK$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NN(G)?RK$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NO(G)?RK$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NX(G)?RK$")>;
+
+//===----------------------------------------------------------------------===//
+// Multiplication
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat5LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "MS(GF|Y)?$")>;
+def : InstRW<[WLat5, FXa, NormalGr], (instregex "MS(R|FI)$")>;
+def : InstRW<[WLat7LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "MSG$")>;
+def : InstRW<[WLat7, FXa, NormalGr], (instregex "MSGR$")>;
+def : InstRW<[WLat5, FXa, NormalGr], (instregex "MSGF(I|R)$")>;
+def : InstRW<[WLat8LSU, RegReadAdv, FXa2, LSU, GroupAlone], (instregex "MLG$")>;
+def : InstRW<[WLat8, FXa2, GroupAlone], (instregex "MLGR$")>;
+def : InstRW<[WLat4, FXa, NormalGr], (instregex "MGHI$")>;
+def : InstRW<[WLat4, FXa, NormalGr], (instregex "MHI$")>;
+def : InstRW<[WLat4LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "MH(Y)?$")>;
+def : InstRW<[WLat6, FXa2, GroupAlone], (instregex "M(L)?R$")>;
+def : InstRW<[WLat6LSU, RegReadAdv, FXa2, LSU, GroupAlone],
+ (instregex "M(FY|L)?$")>;
+def : InstRW<[WLat8, RegReadAdv, FXa, LSU, NormalGr], (instregex "MGH$")>;
+def : InstRW<[WLat12, RegReadAdv, FXa2, LSU, GroupAlone], (instregex "MG$")>;
+def : InstRW<[WLat8, FXa2, GroupAlone], (instregex "MGRK$")>;
+def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "MSC$")>;
+def : InstRW<[WLat8LSU, WLat8LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "MSGC$")>;
+def : InstRW<[WLat6, WLat6, FXa, NormalGr], (instregex "MSRKC$")>;
+def : InstRW<[WLat8, WLat8, FXa, NormalGr], (instregex "MSGRKC$")>;
+
+//===----------------------------------------------------------------------===//
+// Division and remainder
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DR$")>;
+def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2], (instregex "D$")>;
+def : InstRW<[WLat30, FXa2, GroupAlone], (instregex "DSG(F)?R$")>;
+def : InstRW<[WLat30, RegReadAdv, FXa2, LSU, GroupAlone2],
+ (instregex "DSG(F)?$")>;
+def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DLR$")>;
+def : InstRW<[WLat30, FXa4, GroupAlone], (instregex "DLGR$")>;
+def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2],
+ (instregex "DL(G)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Shifts
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLL(G|K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRL(G|K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRA(G|K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLA(G|K)?$")>;
+def : InstRW<[WLat5LSU, WLat5LSU, FXa4, LSU, GroupAlone2],
+ (instregex "S(L|R)D(A|L)$")>;
+
+// Rotate
+def : InstRW<[WLat2LSU, FXa, LSU, NormalGr], (instregex "RLL(G)?$")>;
+
+// Rotate and insert
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBG(N|32)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBH(G|H|L)$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBL(G|H|L)$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBMux$")>;
+
+// Rotate and Select
+def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "R(N|O|X)SBG$")>;
+
+//===----------------------------------------------------------------------===//
+// Comparison
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr],
+ (instregex "C(G|Y|Mux)?$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CRL$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(F|H)I(Mux)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CG(F|H)I$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CG(HSI|RL)$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(G)?R$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CIH$")>;
+def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CHF$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CHSI$")>;
+def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr],
+ (instregex "CL(Y|Mux)?$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLFHSI$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLFI(Mux)?$")>;
+def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLG$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLG(HRL|HSI)$")>;
+def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLGF$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLGFRL$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLGF(I|R)$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLGR$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLGRL$")>;
+def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLHF$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLH(RL|HSI)$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLIH$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLI(Y)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLR$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLRL$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?HHR$")>;
+def : InstRW<[WLat2, FXb, NormalGr], (instregex "C(L)?HLR$")>;
+
+// Compare halfword
+def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CH(Y)?$")>;
+def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CHRL$")>;
+def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CGH$")>;
+def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CGHRL$")>;
+def : InstRW<[WLat2LSU, FXa, FXb, LSU, Cracked], (instregex "CHHSI$")>;
+
+// Compare with sign extension (32 -> 64)
+def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CGF$")>;
+def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CGFRL$")>;
+def : InstRW<[WLat2, FXb, NormalGr], (instregex "CGFR$")>;
+
+// Compare logical character
+def : InstRW<[WLat6, FXb, LSU2, Cracked], (instregex "CLC$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CLCL(E|U)?$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CLST$")>;
+
+// Test under mask
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "TM(Y)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "TM(H|L)Mux$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMHH(64)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMHL(64)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMLH(64)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMLL(64)?$")>;
+
+// Compare logical characters under mask
+def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr],
+ (instregex "CLM(H|Y)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Prefetch and execution hint
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, LSU, NormalGr], (instregex "PFD(RL)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "BPP$")>;
+def : InstRW<[FXb, EndGroup], (instregex "BPRP$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "NIAI$")>;
+
+//===----------------------------------------------------------------------===//
+// Atomic operations
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXb, EndGroup], (instregex "Serialize$")>;
+
+def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAA(G)?$")>;
+def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAAL(G)?$")>;
+def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAN(G)?$")>;
+def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAO(G)?$")>;
+def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAX(G)?$")>;
+
+// Test and set
+def : InstRW<[WLat2LSU, FXb, LSU, EndGroup], (instregex "TS$")>;
+
+// Compare and swap
+def : InstRW<[WLat3LSU, WLat3LSU, FXa, FXb, LSU, GroupAlone],
+ (instregex "CS(G|Y)?$")>;
+
+// Compare double and swap
+def : InstRW<[WLat6LSU, WLat6LSU, FXa3, FXb2, LSU, GroupAlone2],
+ (instregex "CDS(Y)?$")>;
+def : InstRW<[WLat15, WLat15, FXa2, FXb4, LSU3,
+ GroupAlone3], (instregex "CDSG$")>;
+
+// Compare and swap and store
+def : InstRW<[WLat30, MCD], (instregex "CSST$")>;
+
+// Perform locked operation
+def : InstRW<[WLat30, MCD], (instregex "PLO$")>;
+
+// Load/store pair from/to quadword
+def : InstRW<[WLat4LSU, LSU2, GroupAlone], (instregex "LPQ$")>;
+def : InstRW<[WLat1, FXb2, LSU, GroupAlone], (instregex "STPQ$")>;
+
+// Load pair disjoint
+def : InstRW<[WLat1LSU, WLat1LSU, LSU2, GroupAlone], (instregex "LPD(G)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Translate and convert
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "TR$")>;
+def : InstRW<[WLat30, WLat30, WLat30, FXa3, LSU2, GroupAlone2],
+ (instregex "TRT$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRTR$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "TRE$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRT(R)?E(Opt)?$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TR(T|O)(T|O)(Opt)?$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD],
+ (instregex "CU(12|14|21|24|41|42)(Opt)?$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "(CUUTF|CUTFU)(Opt)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Message-security assist
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD],
+ (instregex "KM(C|F|O|CTR|A)?$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD],
+ (instregex "(KIMD|KLMD|KMAC|KDSA)$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD],
+ (instregex "(PCC|PPNO|PRNO)$")>;
+
+//===----------------------------------------------------------------------===//
+// Guarded storage
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LGG$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLGFSG$")>;
+def : InstRW<[WLat30, MCD], (instregex "(L|ST)GSC$")>;
+
+//===----------------------------------------------------------------------===//
+// Decimal arithmetic
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat20, RegReadAdv, FXb, VecDF2, LSU2, GroupAlone2],
+ (instregex "CVBG$")>;
+def : InstRW<[WLat20, RegReadAdv, FXb, VecDF, LSU, GroupAlone2],
+ (instregex "CVB(Y)?$")>;
+def : InstRW<[WLat1, FXb3, VecDF4, LSU, GroupAlone3], (instregex "CVDG$")>;
+def : InstRW<[WLat1, FXb2, VecDF, LSU, GroupAlone2], (instregex "CVD(Y)?$")>;
+def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "MV(N|O|Z)$")>;
+def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "(PACK|PKA|PKU)$")>;
+def : InstRW<[WLat12, LSU5, GroupAlone], (instregex "UNPK(A|U)$")>;
+def : InstRW<[WLat1, FXb, LSU2, Cracked], (instregex "UNPK$")>;
+
+def : InstRW<[WLat5LSU, FXb, VecDFX, LSU3, GroupAlone2],
+ (instregex "(A|S|ZA)P$")>;
+def : InstRW<[WLat1, FXb, VecDFX2, LSU3, GroupAlone2], (instregex "MP$")>;
+def : InstRW<[WLat1, FXb, VecDFX4, LSU3, GroupAlone2], (instregex "DP$")>;
+def : InstRW<[WLat15, FXb, VecDFX2, LSU2, GroupAlone3], (instregex "SRP$")>;
+def : InstRW<[WLat8, VecDFX, LSU, LSU, GroupAlone], (instregex "CP$")>;
+def : InstRW<[WLat3LSU, VecDFX, LSU, Cracked], (instregex "TP$")>;
+def : InstRW<[WLat30, MCD], (instregex "ED(MK)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Access registers
+//===----------------------------------------------------------------------===//
+
+// Extract/set/copy access register
+def : InstRW<[WLat3, LSU, NormalGr], (instregex "(EAR|SAR|CPYA)$")>;
+
+// Load address extended
+def : InstRW<[WLat5, LSU, FXa, Cracked], (instregex "LAE(Y)?$")>;
+
+// Load/store access multiple (not modeled precisely)
+def : InstRW<[WLat20, WLat20, LSU5, GroupAlone], (instregex "LAM(Y)?$")>;
+def : InstRW<[WLat1, LSU5, FXb, GroupAlone2], (instregex "STAM(Y)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Program mask and addressing mode
+//===----------------------------------------------------------------------===//
+
+// Insert Program Mask
+def : InstRW<[WLat3, FXa, EndGroup], (instregex "IPM$")>;
+
+// Set Program Mask
+def : InstRW<[WLat3, LSU, EndGroup], (instregex "SPM$")>;
+
+// Branch and link
+def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "BAL(R)?$")>;
+
+// Test addressing mode
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "TAM$")>;
+
+// Set addressing mode
+def : InstRW<[WLat1, FXb, EndGroup], (instregex "SAM(24|31|64)$")>;
+
+// Branch (and save) and set mode.
+def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BSM$")>;
+def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "BASSM$")>;
+
+//===----------------------------------------------------------------------===//
+// Transactional execution
+//===----------------------------------------------------------------------===//
+
+// Transaction begin
+def : InstRW<[WLat9, LSU2, FXb5, GroupAlone2], (instregex "TBEGIN(C)?$")>;
+
+// Transaction end
+def : InstRW<[WLat1, FXb, GroupAlone], (instregex "TEND$")>;
+
+// Transaction abort
+def : InstRW<[WLat30, MCD], (instregex "TABORT$")>;
+
+// Extract Transaction Nesting Depth
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "ETND$")>;
+
+// Nontransactional store
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "NTSTG$")>;
+
+//===----------------------------------------------------------------------===//
+// Processor assist
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXb, GroupAlone], (instregex "PPA$")>;
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//===----------------------------------------------------------------------===//
+
+// Find leftmost one
+def : InstRW<[WLat5, WLat5, FXa2, GroupAlone], (instregex "FLOGR$")>;
+
+// Population count
+def : InstRW<[WLat3, WLat3, FXa, NormalGr], (instregex "POPCNT(Opt)?$")>;
+
+// String instructions
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "SRST(U)?$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CUSE$")>;
+
+// Various complex instructions
+def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "CFC$")>;
+def : InstRW<[WLat30, WLat30, WLat30, WLat30, WLat30, WLat30, MCD],
+ (instregex "UPT$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CKSM$")>;
+def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "CMPSC$")>;
+def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "SORTL$")>;
+def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "DFLTCC$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "NNPA$")>;
+
+// Execute
+def : InstRW<[WLat1, FXb, GroupAlone], (instregex "EX(RL)?$")>;
+
+//===----------------------------------------------------------------------===//
+// .insn directive instructions
+//===----------------------------------------------------------------------===//
+
+// An "empty" sched-class will be assigned instead of the "invalid sched-class".
+// getNumDecoderSlots() will then return 1 instead of 0.
+def : InstRW<[], (instregex "Insn.*")>;
+
+
+// ----------------------------- Floating point ----------------------------- //
+
+//===----------------------------------------------------------------------===//
+// FP: Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load zero
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "LZ(DR|ER)$")>;
+def : InstRW<[WLat2, FXb2, Cracked], (instregex "LZXR$")>;
+
+// Load
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "LER$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "LD(R|R32|GR)$")>;
+def : InstRW<[WLat3, FXb, NormalGr], (instregex "LGDR$")>;
+def : InstRW<[WLat2, FXb2, GroupAlone], (instregex "LXR$")>;
+
+// Load and Test
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BR$")>;
+def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BRCompare$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone],
+ (instregex "LTXBR(Compare)?$")>;
+
+// Copy sign
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s)(d|s)$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Load instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat2LSU, VecXsPm, LSU, NormalGr], (instregex "LE(Y)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LD(Y|E32)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LX$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Store instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST(E|D)(Y)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STX$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Load rounded
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "LEDBR(A)?$")>;
+def : InstRW<[WLat9, VecDF2, NormalGr], (instregex "L(E|D)XBR(A)?$")>;
+
+// Load lengthened
+def : InstRW<[WLat6LSU, VecBF, LSU, NormalGr], (instregex "LDEB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "LDEBR$")>;
+def : InstRW<[WLat7LSU, VecBF4, LSU, GroupAlone], (instregex "LX(E|D)B$")>;
+def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "LX(E|D)BR$")>;
+
+// Convert from fixed / logical
+def : InstRW<[WLat7, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)BR(A)?$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)BR(A)?$")>;
+def : InstRW<[WLat7, FXb, VecBF, Cracked], (instregex "C(E|D)L(F|G)BR$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CXL(F|G)BR$")>;
+
+// Convert to fixed / logical
+def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked],
+ (instregex "C(F|G)(E|D)BR(A)?$")>;
+def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked],
+ (instregex "C(F|G)XBR(A)?$")>;
+def : InstRW<[WLat9, WLat9, FXb, VecBF, GroupAlone], (instregex "CLFEBR$")>;
+def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], (instregex "CLFDBR$")>;
+def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], (instregex "CLG(E|D)BR$")>;
+def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], (instregex "CL(F|G)XBR$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Load Complement / Negative / Positive
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "L(C|N|P)(E|D)BR$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "L(C|N|P)DFR(_32)?$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "L(C|N|P)XBR$")>;
+
+// Square root
+def : InstRW<[WLat30, VecFPd, LSU, NormalGr], (instregex "SQ(E|D)B$")>;
+def : InstRW<[WLat20, VecFPd20, NormalGr], (instregex "SQEBR$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "SQDBR$")>;
+def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "SQXBR$")>;
+
+// Load FP integer
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "FI(E|D)BR(A)?$")>;
+def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXBR(A)?$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition
+def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr],
+ (instregex "A(E|D)B$")>;
+def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "A(E|D)BR$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXBR$")>;
+
+// Subtraction
+def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr],
+ (instregex "S(E|D)B$")>;
+def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "S(E|D)BR$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXBR$")>;
+
+// Multiply
+def : InstRW<[WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr],
+ (instregex "M(D|DE|EE)B$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "M(D|DE|EE)BR$")>;
+def : InstRW<[WLat7LSU, RegReadAdv, VecBF4, LSU, GroupAlone],
+ (instregex "MXDB$")>;
+def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MXDBR$")>;
+def : InstRW<[WLat20, VecDF4, GroupAlone], (instregex "MXBR$")>;
+
+// Multiply and add / subtract
+def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone],
+ (instregex "M(A|S)EB$")>;
+def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "M(A|S)EBR$")>;
+def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone],
+ (instregex "M(A|S)DB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "M(A|S)DBR$")>;
+
+// Division
+def : InstRW<[WLat20, RegReadAdv, VecFPd20, LSU, NormalGr], (instregex "DEB$")>;
+def : InstRW<[WLat30, RegReadAdv, VecFPd, LSU, NormalGr], (instregex "DDB$")>;
+def : InstRW<[WLat20, VecFPd20, NormalGr], (instregex "DEBR$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "DDBR$")>;
+def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "DXBR$")>;
+
+// Divide to integer
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "DI(E|D)BR$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare
+def : InstRW<[WLat3LSU, RegReadAdv, VecXsPm, LSU, NormalGr],
+ (instregex "(K|C)(E|D)B$")>;
+def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "(K|C)(E|D)BR$")>;
+def : InstRW<[WLat9, VecDF2, GroupAlone], (instregex "(K|C)XBR$")>;
+
+// Test Data Class
+def : InstRW<[WLat5, LSU, VecXsPm, NormalGr], (instregex "TC(E|D)B$")>;
+def : InstRW<[WLat10, LSU, VecDF4, GroupAlone], (instregex "TCXB$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Floating-point control register instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat4, FXa, LSU, GroupAlone], (instregex "EFPC$")>;
+def : InstRW<[WLat1, FXb, LSU, GroupAlone], (instregex "STFPC$")>;
+def : InstRW<[WLat3, LSU, GroupAlone], (instregex "SFPC$")>;
+def : InstRW<[WLat3LSU, LSU2, GroupAlone], (instregex "LFPC$")>;
+def : InstRW<[WLat30, MCD], (instregex "SFASR$")>;
+def : InstRW<[WLat30, MCD], (instregex "LFAS$")>;
+def : InstRW<[WLat3, FXb, GroupAlone], (instregex "SRNM(B|T)?$")>;
+
+
+// --------------------- Hexadecimal floating point ------------------------- //
+
+//===----------------------------------------------------------------------===//
+// HFP: Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load and Test
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)R$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Load rounded
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "(LEDR|LRER)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "LEXR$")>;
+def : InstRW<[WLat9, VecDF2, NormalGr], (instregex "(LDXR|LRDR)$")>;
+
+// Load lengthened
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LDE$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "LDER$")>;
+def : InstRW<[WLat7LSU, VecBF4, LSU, GroupAlone], (instregex "LX(E|D)$")>;
+def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "LX(E|D)R$")>;
+
+// Convert from fixed
+def : InstRW<[WLat7, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)R$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)R$")>;
+
+// Convert to fixed
+def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], (instregex "C(F|G)(E|D)R$")>;
+def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], (instregex "C(F|G)XR$")>;
+
+// Convert BFP to HFP / HFP to BFP.
+def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "THD(E)?R$")>;
+def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "TB(E)?DR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Load Complement / Negative / Positive
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "L(C|N|P)(E|D)R$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "L(C|N|P)XR$")>;
+
+// Halve
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "H(E|D)R$")>;
+
+// Square root
+def : InstRW<[WLat30, VecFPd, LSU, NormalGr], (instregex "SQ(E|D)$")>;
+def : InstRW<[WLat20, VecFPd20, NormalGr], (instregex "SQER$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "SQDR$")>;
+def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "SQXR$")>;
+
+// Load FP integer
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "FI(E|D)R$")>;
+def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition
+def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr],
+ (instregex "A(E|D|U|W)$")>;
+def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "A(E|D|U|W)R$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXR$")>;
+
+// Subtraction
+def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr],
+ (instregex "S(E|D|U|W)$")>;
+def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "S(E|D|U|W)R$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXR$")>;
+
+// Multiply
+def : InstRW<[WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr],
+ (instregex "M(D|DE|E|EE)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "M(D|DE|E|EE)R$")>;
+def : InstRW<[WLat7LSU, RegReadAdv, VecBF4, LSU, GroupAlone],
+ (instregex "MXD$")>;
+def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MXDR$")>;
+def : InstRW<[WLat20, VecDF4, GroupAlone], (instregex "MXR$")>;
+def : InstRW<[WLat7LSU, RegReadAdv, VecBF4, LSU, GroupAlone], (instregex "MY$")>;
+def : InstRW<[WLat6LSU, RegReadAdv, VecBF2, LSU, GroupAlone],
+ (instregex "MY(H|L)$")>;
+def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MYR$")>;
+def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "MY(H|L)R$")>;
+
+// Multiply and add / subtract
+def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone],
+ (instregex "M(A|S)(E|D)$")>;
+def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "M(A|S)(E|D)R$")>;
+def : InstRW<[WLat7LSU, RegReadAdv, RegReadAdv, VecBF4, LSU, GroupAlone],
+ (instregex "MAY$")>;
+def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone],
+ (instregex "MAY(H|L)$")>;
+def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MAYR$")>;
+def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "MAY(H|L)R$")>;
+
+// Division
+def : InstRW<[WLat20, RegReadAdv, VecFPd20, LSU, NormalGr], (instregex "DE$")>;
+def : InstRW<[WLat30, RegReadAdv, VecFPd, LSU, NormalGr], (instregex "DD$")>;
+def : InstRW<[WLat20, VecFPd20, NormalGr], (instregex "DER$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "DDR$")>;
+def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "DXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare
+def : InstRW<[WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr],
+ (instregex "C(E|D)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "C(E|D)R$")>;
+def : InstRW<[WLat10, VecDF2, GroupAlone], (instregex "CXR$")>;
+
+
+// ------------------------ Decimal floating point -------------------------- //
+
+//===----------------------------------------------------------------------===//
+// DFP: Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load and Test
+def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "LTDTR$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Load rounded
+def : InstRW<[WLat15, VecDF, NormalGr], (instregex "LEDTR$")>;
+def : InstRW<[WLat15, VecDF2, NormalGr], (instregex "LDXTR$")>;
+
+// Load lengthened
+def : InstRW<[WLat8, VecDF, NormalGr], (instregex "LDETR$")>;
+def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "LXDTR$")>;
+
+// Convert from fixed / logical
+def : InstRW<[WLat15, FXb, VecDF, Cracked], (instregex "CDFTR(A)?$")>;
+def : InstRW<[WLat20, FXb, VecDF, Cracked], (instregex "CDGTR(A)?$")>;
+def : InstRW<[WLat15, FXb, VecDF4, GroupAlone2], (instregex "CXFTR(A)?$")>;
+def : InstRW<[WLat20, FXb, VecDF4, GroupAlone2], (instregex "CXGTR(A)?$")>;
+def : InstRW<[WLat15, FXb, VecDF, Cracked], (instregex "CDLFTR$")>;
+def : InstRW<[WLat20, FXb, VecDF, Cracked], (instregex "CDLGTR$")>;
+def : InstRW<[WLat15, FXb, VecDF4, GroupAlone2], (instregex "CXLFTR$")>;
+def : InstRW<[WLat20, FXb, VecDF4, GroupAlone2], (instregex "CXLGTR$")>;
+
+// Convert to fixed / logical
+def : InstRW<[WLat20, WLat20, FXb, VecDF, Cracked],
+ (instregex "C(F|G)DTR(A)?$")>;
+def : InstRW<[WLat20, WLat20, FXb, VecDF2, Cracked],
+ (instregex "C(F|G)XTR(A)?$")>;
+def : InstRW<[WLat20, WLat20, FXb, VecDF, Cracked], (instregex "CL(F|G)DTR$")>;
+def : InstRW<[WLat20, WLat20, FXb, VecDF2, Cracked], (instregex "CL(F|G)XTR$")>;
+
+// Convert from / to signed / unsigned packed
+def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "CD(S|U)TR$")>;
+def : InstRW<[WLat12, FXb2, VecDF4, GroupAlone2], (instregex "CX(S|U)TR$")>;
+def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "C(S|U)DTR$")>;
+def : InstRW<[WLat15, FXb2, VecDF4, GroupAlone2], (instregex "C(S|U)XTR$")>;
+
+// Convert from / to zoned
+def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDZT$")>;
+def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXZT$")>;
+def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CZDT$")>;
+def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CZXT$")>;
+
+// Convert from / to packed
+def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDPT$")>;
+def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXPT$")>;
+def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CPDT$")>;
+def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CPXT$")>;
+
+// Perform floating-point operation
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "PFPO$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Load FP integer
+def : InstRW<[WLat8, VecDF, NormalGr], (instregex "FIDTR$")>;
+def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXTR$")>;
+
+// Extract biased exponent
+def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "EEDTR$")>;
+def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "EEXTR$")>;
+
+// Extract significance
+def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "ESDTR$")>;
+def : InstRW<[WLat12, FXb, VecDF2, Cracked], (instregex "ESXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition
+def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "ADTR(A)?$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXTR(A)?$")>;
+
+// Subtraction
+def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "SDTR(A)?$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXTR(A)?$")>;
+
+// Multiply
+def : InstRW<[WLat20, VecDF, NormalGr], (instregex "MDTR(A)?$")>;
+def : InstRW<[WLat30, VecDF4, GroupAlone], (instregex "MXTR(A)?$")>;
+
+// Division
+def : InstRW<[WLat30, VecDF, NormalGr], (instregex "DDTR(A)?$")>;
+def : InstRW<[WLat30, VecDF4, GroupAlone], (instregex "DXTR(A)?$")>;
+
+// Quantize
+def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "QADTR$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "QAXTR$")>;
+
+// Reround
+def : InstRW<[WLat9, WLat9, FXb, VecDF, Cracked], (instregex "RRDTR$")>;
+def : InstRW<[WLat11, WLat11, FXb, VecDF4, GroupAlone2], (instregex "RRXTR$")>;
+
+// Shift significand left/right
+def : InstRW<[WLat11LSU, LSU, VecDF, GroupAlone], (instregex "S(L|R)DT$")>;
+def : InstRW<[WLat11LSU, LSU, VecDF4, GroupAlone], (instregex "S(L|R)XT$")>;
+
+// Insert biased exponent
+def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "IEDTR$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "IEXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare
+def : InstRW<[WLat8, VecDF, NormalGr], (instregex "(K|C)DTR$")>;
+def : InstRW<[WLat9, VecDF2, GroupAlone], (instregex "(K|C)XTR$")>;
+
+// Compare biased exponent
+def : InstRW<[WLat8, VecDF, NormalGr], (instregex "CEDTR$")>;
+def : InstRW<[WLat8, VecDF, NormalGr], (instregex "CEXTR$")>;
+
+// Test Data Class/Group
+def : InstRW<[WLat15, LSU, VecDF, NormalGr], (instregex "TD(C|G)(E|D)T$")>;
+def : InstRW<[WLat15, LSU, VecDF2, GroupAlone], (instregex "TD(C|G)XT$")>;
+
+
+// --------------------------------- Vector --------------------------------- //
+
+//===----------------------------------------------------------------------===//
+// Vector: Move instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "VLR(32|64)?$")>;
+def : InstRW<[WLat3, FXb, NormalGr], (instregex "VLGV(B|F|G|H)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "VLVG(B|F|G|H)?$")>;
+def : InstRW<[WLat3, FXb, NormalGr], (instregex "VLVGP(32)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Immediate instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VZERO$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VONE$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VGBM$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VGM(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VREPI(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLEI(B|F|G|H)$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Loads
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(Align)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(L|BB)$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(32|64)$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLLEZ(B|F|G|H|LF)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLREP(B|F|G|H)?$")>;
+def : InstRW<[WLat2LSU, RegReadAdv, VecXsPm, LSU, NormalGr],
+ (instregex "VLE(B|F|G|H)$")>;
+def : InstRW<[WLat5LSU, RegReadAdv, FXb, LSU, VecXsPm, Cracked],
+ (instregex "VGE(F|G)$")>;
+def : InstRW<[WLat4LSU, WLat4LSU, LSU5, GroupAlone],
+ (instregex "VLM(Align)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLRL(R)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Stores
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(Align|L|32|64)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTE(F|G)$")>;
+def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTE(B|H)$")>;
+def : InstRW<[WLat1, LSU2, FXb3, GroupAlone2], (instregex "VSTM(Align)?$")>;
+def : InstRW<[WLat1, FXb2, LSU, Cracked], (instregex "VSCE(F|G)$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTRL(R)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Byte swaps
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLBR(H|F|G|Q)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLER(H|F|G)?$")>;
+def : InstRW<[WLat2LSU, RegReadAdv, VecXsPm, LSU, NormalGr],
+ (instregex "VLEBR(H|F|G)$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLLEBRZ(H|F|G|E)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLBRREP(H|F|G)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTBR(H|F|G|Q)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTER(H|F|G)?$")>;
+def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTEBRH$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTEBR(F|G)$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Selects and permutes
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMRH(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMRL(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPERM$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPDI$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VBPERM$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VREP(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSEL$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Widening and narrowing
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPK(F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPKS(F|G|H)?$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VPKS(F|G|H)S$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPKLS(F|G|H)?$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VPKLS(F|G|H)S$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSEG(B|F|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPH(B|F|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPL(B|F)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPLH(B|F|H|W)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPLL(B|F|H)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Integer arithmetic
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VA(B|F|G|H|Q|C|CQ)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VACC(B|F|G|H|Q|C|CQ)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VAVG(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VAVGL(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VN(C|O|N|X)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VO(C)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VCKSM$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCLZ(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCTZ(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VX$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFM?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFMA(B|F|G|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFM(B|F|G|H)$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLC(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLP(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMX(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMXL(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMN(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMNL(B|F|G|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAL(B|F)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALE(B|F|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALH(B|F|H|W)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALO(B|F|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAO(B|F|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAE(B|F|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAH(B|F|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VME(B|F|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMH(B|F|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VML(B|F)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLE(B|F|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLH(B|F|H|W)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLO(B|F|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMO(B|F|H)?$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VMSL(G)?$")>;
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPOPCT(B|F|G|H)?$")>;
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERLL(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERLLV(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERIM(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESL(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESLV(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRA(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRAV(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRL(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRLV(B|F|G|H)?$")>;
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSL(DB)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSLB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSR(A|L)$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSR(A|L)B$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSLD$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSRD$")>;
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSB(I|IQ|CBI|CBIQ)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSCBI(B|F|G|H|Q)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VS(F|G|H|Q)?$")>;
+
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUM(B|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUMG(F|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUMQ(F|G)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Integer comparison
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "VEC(B|F|G|H)?$")>;
+def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "VECL(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCEQ(B|F|G|H)?$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCEQ(B|F|G|H)S$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCH(B|F|G|H)?$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCH(B|F|G|H)S$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCHL(B|F|G|H)?$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCHL(B|F|G|H)S$")>;
+def : InstRW<[WLat4, VecStr, NormalGr], (instregex "VTM$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Floating-point arithmetic
+//===----------------------------------------------------------------------===//
+
+// Conversion and rounding
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCFP(S|L)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCD(L)?G$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCD(L)?GB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WCD(L)?GB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCE(L)?FB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WCE(L)?FB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(S|L)FP$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(L)?GD$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(L)?GDB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WC(L)?GDB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(L)?FEB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WC(L)?FEB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VL(DE|ED)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VL(DE|ED)B$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WL(DE|ED)B$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFL(L|R)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFL(LS|RD)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFL(LS|RD)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFLLD$")>;
+def : InstRW<[WLat10, VecDF2, NormalGr], (instregex "WFLRX$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFI(DB)?$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFIDB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFISB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFISB$")>;
+def : InstRW<[WLat10, VecDF2, NormalGr], (instregex "WFIXB$")>;
+
+// Sign operations
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VFPSO$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FPSODB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FPSOSB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFPSOXB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FL(C|N|P)DB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FL(C|N|P)SB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFL(C|N|P)XB$")>;
+
+// Minimum / maximum
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(MAX|MIN)$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(MAX|MIN)DB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WF(MAX|MIN)DB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(MAX|MIN)SB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WF(MAX|MIN)SB$")>;
+def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "WF(MAX|MIN)XB$")>;
+
+// Test data class
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFTCI$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "(V|W)FTCIDB$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "(V|W)FTCISB$")>;
+def : InstRW<[WLat3, WLat3, VecDFX, NormalGr], (instregex "WFTCIXB$")>;
+
+// Add / subtract
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(A|S)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(A|S)DB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(A|S)DB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(A|S)SB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(A|S)SB$")>;
+def : InstRW<[WLat10, VecDF2, NormalGr], (instregex "WF(A|S)XB$")>;
+
+// Multiply / multiply-and-add/subtract
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFM(DB)?$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFM(D|S)B$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFMSB$")>;
+def : InstRW<[WLat20, VecDF2, NormalGr], (instregex "WFMXB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(N)?M(A|S)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(N)?M(A|S)DB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(N)?M(A|S)DB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(N)?M(A|S)SB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(N)?M(A|S)SB$")>;
+def : InstRW<[WLat20, VecDF2, NormalGr], (instregex "WF(N)?M(A|S)XB$")>;
+
+// Divide / square root
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VFD$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FDDB$")>;
+def : InstRW<[WLat20, VecFPd20, NormalGr], (instregex "WFDSB$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VFDSB$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "WFDXB$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VFSQ$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FSQDB$")>;
+def : InstRW<[WLat20, VecFPd20, NormalGr], (instregex "WFSQSB$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VFSQSB$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "WFSQXB$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Floating-point comparison
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(C|K)(E|H|HE)$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(C|K)(E|H|HE)DB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)DB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFK(E|H|HE)DB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(C|K)(E|H|HE)SB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)SB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFK(E|H|HE)SB$")>;
+def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "WFC(E|H|HE)XB$")>;
+def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "WFK(E|H|HE)XB$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFC(E|H|HE)DBS$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFK(E|H|HE)DBS$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr],
+ (instregex "WF(C|K)(E|H|HE)DBS$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr],
+ (instregex "VF(C|K)(E|H|HE)SBS$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)SBS$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "WFK(E|H|HE)SBS$")>;
+def : InstRW<[WLat3, WLat3, VecDFX, NormalGr], (instregex "WFC(E|H|HE)XBS$")>;
+def : InstRW<[WLat3, WLat3, VecDFX, NormalGr], (instregex "WFK(E|H|HE)XBS$")>;
+def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)$")>;
+def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)DB$")>;
+def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)SB$")>;
+def : InstRW<[WLat3, VecDFX, NormalGr], (instregex "WF(C|K)XB$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Floating-point insertion and extraction
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "LEFR$")>;
+def : InstRW<[WLat3, FXb, NormalGr], (instregex "LFER$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: String instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAE(B)?$")>;
+def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAE(F|H)$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VFAE(B|F|H)S$")>;
+def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAEZ(B|F|H)$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VFAEZ(B|F|H)S$")>;
+def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFEE(B|F|H|ZB|ZF|ZH)?$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr],
+ (instregex "VFEE(B|F|H|ZB|ZF|ZH)S$")>;
+def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFENE(B|F|H|ZB|ZF|ZH)?$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr],
+ (instregex "VFENE(B|F|H|ZB|ZF|ZH)S$")>;
+def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VISTR(B|F|H)?$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VISTR(B|F|H)S$")>;
+def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VSTRC(B|F|H)?$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRC(B|F|H)S$")>;
+def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VSTRCZ(B|F|H)$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRCZ(B|F|H)S$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRS(B|F|H)?$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRSZ(B|F|H)$")>;
+
+//===----------------------------------------------------------------------===//
+// NNP assist instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCFN$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCLFN(L|H)$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VC(R)?NF$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Packed-decimal instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "VLIP$")>;
+def : InstRW<[WLat6, VecDFX, LSU, GroupAlone2], (instregex "VPKZ$")>;
+def : InstRW<[WLat1, VecDFX, FXb, LSU2, GroupAlone2], (instregex "VUPKZ$")>;
+def : InstRW<[WLat20, WLat20, VecDF2, FXb, GroupAlone],
+ (instregex "VCVB(G)?(Opt)?$")>;
+def : InstRW<[WLat15, WLat15, VecDF2, FXb, GroupAlone],
+ (instregex "VCVD(G)?$")>;
+def : InstRW<[WLat4, WLat4, VecDFX, NormalGr], (instregex "V(A|S)P$")>;
+def : InstRW<[WLat30, WLat30, VecDF2, GroupAlone], (instregex "VM(S)?P$")>;
+def : InstRW<[WLat30, WLat30, VecDF2, GroupAlone], (instregex "V(D|R)P$")>;
+def : InstRW<[WLat30, WLat30, VecDF2, GroupAlone], (instregex "VSDP$")>;
+def : InstRW<[WLat10, WLat10, VecDF2, NormalGr], (instregex "VSRP$")>;
+def : InstRW<[WLat4, WLat4, VecDFX, NormalGr], (instregex "VPSOP$")>;
+def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "V(T|C)P$")>;
+
+def : InstRW<[WLat30, VecDF2, NormalGr], (instregex "VSCH(S|D|X)?P$")>;
+def : InstRW<[WLat30, VecDF2, NormalGr], (instregex "VSCSHP$")>;
+def : InstRW<[WLat30, VecDF2, NormalGr], (instregex "VCSPH")>;
+def : InstRW<[WLat2, WLat2, VecXsPm, NormalGr], (instregex "VCLZDP")>;
+def : InstRW<[WLat10, WLat10, VecDF2, NormalGr], (instregex "VSRPR")>;
+def : InstRW<[WLat2, WLat2, VecDFX, NormalGr], (instregex "VPKZR")>;
+def : InstRW<[WLat2, WLat2, VecDFX, NormalGr], (instregex "VUPKZH")>;
+def : InstRW<[WLat2, WLat2, VecDFX, NormalGr], (instregex "VUPKZL")>;
+
+// -------------------------------- System ---------------------------------- //
+
+//===----------------------------------------------------------------------===//
+// System: Program-Status Word Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, WLat30, MCD], (instregex "EPSW$")>;
+def : InstRW<[WLat20, GroupAlone3], (instregex "LPSW(E)?(Y)?$")>;
+def : InstRW<[WLat3, FXa, GroupAlone], (instregex "IPK$")>;
+def : InstRW<[WLat1, LSU, EndGroup], (instregex "SPKA$")>;
+def : InstRW<[WLat1, LSU, EndGroup], (instregex "SSM$")>;
+def : InstRW<[WLat1, FXb, LSU, GroupAlone], (instregex "ST(N|O)SM$")>;
+def : InstRW<[WLat3, FXa, NormalGr], (instregex "IAC$")>;
+def : InstRW<[WLat1, LSU, EndGroup], (instregex "SAC(F)?$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Control Register Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat4LSU, WLat4LSU, LSU2, GroupAlone], (instregex "LCTL(G)?$")>;
+def : InstRW<[WLat1, LSU5, FXb, GroupAlone2], (instregex "STCT(L|G)$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "E(P|S)A(I)?R$")>;
+def : InstRW<[WLat30, MCD], (instregex "SSA(I)?R$")>;
+def : InstRW<[WLat30, MCD], (instregex "ESEA$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Prefix-Register Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "S(T)?PX$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Breaking-Event-Address-Register Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat3LSU, LSU2, GroupAlone], (instregex "LBEAR")>;
+def : InstRW<[WLat1, LSU2, FXb, GroupAlone], (instregex "STBEAR")>;
+
+//===----------------------------------------------------------------------===//
+// System: Storage-Key and Real Memory Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "ISKE$")>;
+def : InstRW<[WLat30, MCD], (instregex "IVSK$")>;
+def : InstRW<[WLat30, MCD], (instregex "SSKE(Opt)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "RRB(E|M)$")>;
+def : InstRW<[WLat30, MCD], (instregex "IRBM$")>;
+def : InstRW<[WLat30, MCD], (instregex "PFMF$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "TB$")>;
+def : InstRW<[WLat30, MCD], (instregex "PGIN$")>;
+def : InstRW<[WLat30, MCD], (instregex "PGOUT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Dynamic-Address-Translation Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "IPTE(Opt)?(Opt)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "IDTE(Opt)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "RDP(Opt)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "CRDTE(Opt)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "PTLB$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "CSP(G)?$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "LPTEA$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "LRA(Y|G)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "STRAG$")>;
+def : InstRW<[WLat30, MCD], (instregex "LURA(G)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "STUR(A|G)$")>;
+def : InstRW<[WLat30, MCD], (instregex "TPROT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Memory-move Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat4LSU, FXa2, FXb, LSU5, GroupAlone2], (instregex "MVC(K|P|S)$")>;
+def : InstRW<[WLat1, FXa, LSU5, GroupAlone2], (instregex "MVC(S|D)K$")>;
+def : InstRW<[WLat30, MCD], (instregex "MVCOS$")>;
+def : InstRW<[WLat30, MCD], (instregex "MVPG$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Address-Space Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "LASP$")>;
+def : InstRW<[WLat1, LSU, GroupAlone], (instregex "PALB$")>;
+def : InstRW<[WLat30, MCD], (instregex "PC$")>;
+def : InstRW<[WLat30, MCD], (instregex "PR$")>;
+def : InstRW<[WLat30, MCD], (instregex "PT(I)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "RP$")>;
+def : InstRW<[WLat30, MCD], (instregex "BS(G|A)$")>;
+def : InstRW<[WLat30, MCD], (instregex "TAR$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Linkage-Stack Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "BAKR$")>;
+def : InstRW<[WLat30, MCD], (instregex "EREG(G)?$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "(E|M)STA$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Time-Related Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "PTFF$")>;
+def : InstRW<[WLat30, MCD], (instregex "SCK(PF|C)?$")>;
+def : InstRW<[WLat1, LSU2, GroupAlone], (instregex "SPT$")>;
+def : InstRW<[WLat15, LSU3, FXa2, FXb, GroupAlone2], (instregex "STCK(F)?$")>;
+def : InstRW<[WLat20, LSU4, FXa2, FXb2, GroupAlone3], (instregex "STCKE$")>;
+def : InstRW<[WLat30, MCD], (instregex "STCKC$")>;
+def : InstRW<[WLat1, LSU2, FXb, Cracked], (instregex "STPT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: CPU-Related Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "STAP$")>;
+def : InstRW<[WLat30, MCD], (instregex "STIDP$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "STSI$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "STFL(E)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "ECAG$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "ECTG$")>;
+def : InstRW<[WLat30, MCD], (instregex "PTF$")>;
+def : InstRW<[WLat30, MCD], (instregex "PCKMO$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "QPACI$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Miscellaneous Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "SVC$")>;
+def : InstRW<[WLat1, FXb, GroupAlone], (instregex "MC$")>;
+def : InstRW<[WLat30, MCD], (instregex "DIAG$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "TRAC(E|G)$")>;
+def : InstRW<[WLat30, MCD], (instregex "TRAP(2|4)$")>;
+def : InstRW<[WLat30, MCD], (instregex "SIG(P|A)$")>;
+def : InstRW<[WLat30, MCD], (instregex "SIE$")>;
+
+//===----------------------------------------------------------------------===//
+// System: CPU-Measurement Facility Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "LPP$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "ECPGA$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "E(C|P)CTR$")>;
+def : InstRW<[WLat30, MCD], (instregex "LCCTL$")>;
+def : InstRW<[WLat30, MCD], (instregex "L(P|S)CTL$")>;
+def : InstRW<[WLat30, MCD], (instregex "Q(S|CTR)I$")>;
+def : InstRW<[WLat30, MCD], (instregex "S(C|P)CTR$")>;
+
+//===----------------------------------------------------------------------===//
+// System: I/O Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "(C|H|R|X)SCH$")>;
+def : InstRW<[WLat30, MCD], (instregex "(M|S|ST|T)SCH$")>;
+def : InstRW<[WLat30, MCD], (instregex "RCHP$")>;
+def : InstRW<[WLat30, MCD], (instregex "SCHM$")>;
+def : InstRW<[WLat30, MCD], (instregex "STC(PS|RW)$")>;
+def : InstRW<[WLat30, MCD], (instregex "TPI$")>;
+def : InstRW<[WLat30, MCD], (instregex "SAL$")>;
+
+}
+
diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td
index 0f01a4291cf7..173cf960d2bd 100644
--- a/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td
+++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td
@@ -147,12 +147,12 @@ def : InstRW<[WLat1, FXU, NormalGr], (instregex "CL(F|G)IT(Asm.*)?$")>;
// Call
def : InstRW<[WLat1, LSU, FXU2, GroupAlone], (instregex "(Call)?BRAS$")>;
def : InstRW<[WLat1, LSU, FXU2, GroupAlone], (instregex "(Call)?BRASL(_XPLINK64)?$")>;
-def : InstRW<[WLat1, LSU, FXU2, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64)?$")>;
+def : InstRW<[WLat1, LSU, FXU2, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64|_STACKEXT)?$")>;
def : InstRW<[WLat1, LSU, FXU2, GroupAlone], (instregex "TLS_(G|L)DCALL$")>;
// Return
-def : InstRW<[WLat1, LSU, EndGroup], (instregex "Return$")>;
-def : InstRW<[WLat1, LSU, EndGroup], (instregex "CondReturn$")>;
+def : InstRW<[WLat1, LSU, EndGroup], (instregex "Return(_XPLINK)?$")>;
+def : InstRW<[WLat1, LSU, EndGroup], (instregex "CondReturn(_XPLINK)?$")>;
//===----------------------------------------------------------------------===//
// Move instructions
diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td b/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td
index 096a95a82ec8..d2060471d65e 100644
--- a/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td
+++ b/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td
@@ -152,12 +152,12 @@ def : InstRW<[WLat1, FXU, LSU, NormalGr], (instregex "CL(G)?T(Asm.*)?$")>;
// Call
def : InstRW<[WLat1, FXU2, VBU, GroupAlone], (instregex "(Call)?BRAS$")>;
def : InstRW<[WLat1, FXU2, LSU, GroupAlone], (instregex "(Call)?BRASL(_XPLINK64)?$")>;
-def : InstRW<[WLat1, FXU2, LSU, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64)?$")>;
+def : InstRW<[WLat1, FXU2, LSU, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64|_STACKEXT)?$")>;
def : InstRW<[WLat1, FXU2, LSU, GroupAlone], (instregex "TLS_(G|L)DCALL$")>;
// Return
-def : InstRW<[WLat1, LSU, EndGroup], (instregex "Return$")>;
-def : InstRW<[WLat1, LSU, NormalGr], (instregex "CondReturn$")>;
+def : InstRW<[WLat1, LSU, EndGroup], (instregex "Return(_XPLINK)?$")>;
+def : InstRW<[WLat1, LSU, NormalGr], (instregex "CondReturn(_XPLINK)?$")>;
//===----------------------------------------------------------------------===//
// Move instructions
diff --git a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
index db4b4879b33a..ce30d8ef2cba 100644
--- a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
@@ -88,7 +88,7 @@ static SDValue memsetStore(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemset(
SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst,
SDValue Byte, SDValue Size, Align Alignment, bool IsVolatile,
- MachinePointerInfo DstPtrInfo) const {
+ bool AlwaysInline, MachinePointerInfo DstPtrInfo) const {
EVT PtrVT = Dst.getValueType();
if (IsVolatile)
diff --git a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
index da6725777e43..6ac5bf8c6c1a 100644
--- a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
@@ -31,7 +31,7 @@ public:
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &DL,
SDValue Chain, SDValue Dst, SDValue Byte,
SDValue Size, Align Alignment,
- bool IsVolatile,
+ bool IsVolatile, bool AlwaysInline,
MachinePointerInfo DstPtrInfo) const override;
std::pair<SDValue, SDValue>
diff --git a/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp b/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
index 92930dad80ef..30b22fa1ce92 100644
--- a/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
@@ -162,10 +162,10 @@ bool SystemZShortenInst::shortenFPConv(MachineInstr &MI, unsigned Opcode) {
MachineOperand Src(MI.getOperand(1));
MachineOperand Suppress(MI.getOperand(2));
MachineOperand Mode(MI.getOperand(3));
- MI.RemoveOperand(3);
- MI.RemoveOperand(2);
- MI.RemoveOperand(1);
- MI.RemoveOperand(0);
+ MI.removeOperand(3);
+ MI.removeOperand(2);
+ MI.removeOperand(1);
+ MI.removeOperand(0);
MI.setDesc(TII->get(Opcode));
MachineInstrBuilder(*MI.getParent()->getParent(), &MI)
.add(Dest)
@@ -190,9 +190,9 @@ bool SystemZShortenInst::shortenFusedFPOp(MachineInstr &MI, unsigned Opcode) {
MachineOperand Lhs(LHSMO);
MachineOperand Rhs(RHSMO);
MachineOperand Src(AccMO);
- MI.RemoveOperand(3);
- MI.RemoveOperand(2);
- MI.RemoveOperand(1);
+ MI.removeOperand(3);
+ MI.removeOperand(2);
+ MI.removeOperand(1);
MI.setDesc(TII->get(Opcode));
MachineInstrBuilder(*MI.getParent()->getParent(), &MI)
.add(Src)
diff --git a/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
index 75c0d454d904..f6889035b654 100644
--- a/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -27,13 +27,14 @@ static cl::opt<bool> UseSubRegLiveness(
// Pin the vtable to this file.
void SystemZSubtarget::anchor() {}
-SystemZSubtarget &
-SystemZSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
- StringRef CPUName = CPU;
- if (CPUName.empty())
- CPUName = "generic";
+SystemZSubtarget &SystemZSubtarget::initializeSubtargetDependencies(
+ StringRef CPU, StringRef TuneCPU, StringRef FS) {
+ if (CPU.empty())
+ CPU = "generic";
+ if (TuneCPU.empty())
+ TuneCPU = CPU;
// Parse features string.
- ParseSubtargetFeatures(CPUName, /*TuneCPU*/ CPUName, FS);
+ ParseSubtargetFeatures(CPU, TuneCPU, FS);
// -msoft-float implies -mno-vx.
if (HasSoftFloat)
@@ -64,9 +65,10 @@ SystemZSubtarget::initializeSpecialRegisters() {
}
SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU,
+ const std::string &TuneCPU,
const std::string &FS,
const TargetMachine &TM)
- : SystemZGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
+ : SystemZGenSubtargetInfo(TT, CPU, TuneCPU, FS),
HasDistinctOps(false), HasLoadStoreOnCond(false), HasHighWord(false),
HasFPExtension(false), HasPopulationCount(false),
HasMessageSecurityAssist3(false), HasMessageSecurityAssist4(false),
@@ -88,8 +90,8 @@ SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU,
HasResetDATProtection(false), HasProcessorActivityInstrumentation(false),
HasSoftFloat(false), TargetTriple(TT),
SpecialRegisters(initializeSpecialRegisters()),
- InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
- FrameLowering(SystemZFrameLowering::create(*this)) {}
+ InstrInfo(initializeSubtargetDependencies(CPU, TuneCPU, FS)),
+ TLInfo(TM, *this), FrameLowering(SystemZFrameLowering::create(*this)) {}
bool SystemZSubtarget::enableSubRegLiveness() const {
return UseSubRegLiveness;
diff --git a/llvm/lib/Target/SystemZ/SystemZSubtarget.h b/llvm/lib/Target/SystemZ/SystemZSubtarget.h
index 98f7094fcb48..cd16c19f9bfa 100644
--- a/llvm/lib/Target/SystemZ/SystemZSubtarget.h
+++ b/llvm/lib/Target/SystemZ/SystemZSubtarget.h
@@ -84,12 +84,14 @@ private:
std::unique_ptr<const SystemZFrameLowering> FrameLowering;
SystemZSubtarget &initializeSubtargetDependencies(StringRef CPU,
+ StringRef TuneCPU,
StringRef FS);
SystemZCallingConventionRegisters *initializeSpecialRegisters();
public:
SystemZSubtarget(const Triple &TT, const std::string &CPU,
- const std::string &FS, const TargetMachine &TM);
+ const std::string &TuneCPU, const std::string &FS,
+ const TargetMachine &TM);
SystemZCallingConventionRegisters *getSpecialRegisters() const {
assert(SpecialRegisters && "Unsupported SystemZ calling convention");
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
index f1469fe8f56b..31f8ee2f894d 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -118,7 +118,7 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
// Static code is suitable for use in a dynamic executable; there is no
// separate DynamicNoPIC model.
- if (!RM.hasValue() || *RM == Reloc::DynamicNoPIC)
+ if (!RM || *RM == Reloc::DynamicNoPIC)
return Reloc::Static;
return *RM;
}
@@ -187,10 +187,13 @@ SystemZTargetMachine::~SystemZTargetMachine() = default;
const SystemZSubtarget *
SystemZTargetMachine::getSubtargetImpl(const Function &F) const {
Attribute CPUAttr = F.getFnAttribute("target-cpu");
+ Attribute TuneAttr = F.getFnAttribute("tune-cpu");
Attribute FSAttr = F.getFnAttribute("target-features");
std::string CPU =
CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU;
+ std::string TuneCPU =
+ TuneAttr.isValid() ? TuneAttr.getValueAsString().str() : CPU;
std::string FS =
FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS;
@@ -202,13 +205,14 @@ SystemZTargetMachine::getSubtargetImpl(const Function &F) const {
if (softFloat)
FS += FS.empty() ? "+soft-float" : ",+soft-float";
- auto &I = SubtargetMap[CPU + FS];
+ auto &I = SubtargetMap[CPU + TuneCPU + FS];
if (!I) {
// This needs to be done before we create a new subtarget since any
// creation will depend on the TM and the code generation flags on the
// function that reside in TargetOptions.
resetTargetOptions(F);
- I = std::make_unique<SystemZSubtarget>(TargetTriple, CPU, FS, *this);
+ I = std::make_unique<SystemZSubtarget>(TargetTriple, CPU, TuneCPU, FS,
+ *this);
}
return I.get();
@@ -334,6 +338,6 @@ TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) {
}
TargetTransformInfo
-SystemZTargetMachine::getTargetTransformInfo(const Function &F) {
+SystemZTargetMachine::getTargetTransformInfo(const Function &F) const {
return TargetTransformInfo(SystemZTTIImpl(this, F));
}
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetMachine.h b/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
index 9ea03e104fc9..2cdb33a5064b 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -44,7 +44,7 @@ public:
// Override LLVMTargetMachine
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
- TargetTransformInfo getTargetTransformInfo(const Function &F) override;
+ TargetTransformInfo getTargetTransformInfo(const Function &F) const override;
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetStreamer.h b/llvm/lib/Target/SystemZ/SystemZTargetStreamer.h
index a610a90d2069..1b4e93ebe39b 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetStreamer.h
+++ b/llvm/lib/Target/SystemZ/SystemZTargetStreamer.h
@@ -10,6 +10,7 @@
#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETSTREAMER_H
#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
namespace llvm {
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index 6d66ebfced05..69914049a00c 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -30,6 +30,42 @@ using namespace llvm;
//
//===----------------------------------------------------------------------===//
+static bool isUsedAsMemCpySource(const Value *V, bool &OtherUse) {
+ bool UsedAsMemCpySource = false;
+ for (const User *U : V->users())
+ if (const Instruction *User = dyn_cast<Instruction>(U)) {
+ if (isa<BitCastInst>(User) || isa<GetElementPtrInst>(User)) {
+ UsedAsMemCpySource |= isUsedAsMemCpySource(User, OtherUse);
+ continue;
+ }
+ if (const MemCpyInst *Memcpy = dyn_cast<MemCpyInst>(User)) {
+ if (Memcpy->getOperand(1) == V && !Memcpy->isVolatile()) {
+ UsedAsMemCpySource = true;
+ continue;
+ }
+ }
+ OtherUse = true;
+ }
+ return UsedAsMemCpySource;
+}
+
+unsigned SystemZTTIImpl::adjustInliningThreshold(const CallBase *CB) const {
+ unsigned Bonus = 0;
+
+ // Increase the threshold if an incoming argument is used only as a memcpy
+ // source.
+ if (Function *Callee = CB->getCalledFunction())
+ for (Argument &Arg : Callee->args()) {
+ bool OtherUse = false;
+ if (isUsedAsMemCpySource(&Arg, OtherUse) && !OtherUse)
+ Bonus += 150;
+ }
+
+ LLVM_DEBUG(if (Bonus)
+ dbgs() << "++ SZTTI Adding inlining bonus: " << Bonus << "\n";);
+ return Bonus;
+}
+
InstructionCost SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy());
@@ -303,8 +339,8 @@ void SystemZTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
BaseT::getPeelingPreferences(L, SE, PP);
}
-bool SystemZTTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1,
- TargetTransformInfo::LSRCost &C2) {
+bool SystemZTTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
+ const TargetTransformInfo::LSRCost &C2) {
// SystemZ specific: check instruction count (first), and don't care about
// ImmCost, since offsets are checked explicitly.
return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost,
@@ -559,7 +595,8 @@ InstructionCost SystemZTTIImpl::getArithmeticInstrCost(
InstructionCost SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
VectorType *Tp,
ArrayRef<int> Mask, int Index,
- VectorType *SubTp) {
+ VectorType *SubTp,
+ ArrayRef<const Value *> Args) {
Kind = improveShuffleKindFromMask(Kind, Mask);
if (ST->hasVector()) {
unsigned NumVectors = getNumVectorRegs(Tp);
@@ -781,7 +818,11 @@ InstructionCost SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt) {
if (SrcScalarBits >= 8) {
- // ZExt/SExt will be handled with one unpack per doubling of width.
+ // ZExt will use either a single unpack or a vector permute.
+ if (Opcode == Instruction::ZExt)
+ return NumDstVectors;
+
+ // SExt will be handled with one unpack per doubling of width.
unsigned NumUnpacks = getElSizeLog2Diff(Src, Dst);
// For types that spans multiple vector registers, some additional
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index db4ec794b3e4..33317e799eab 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -37,6 +37,7 @@ public:
/// @{
unsigned getInliningThresholdMultiplier() { return 3; }
+ unsigned adjustInliningThreshold(const CallBase *CB) const;
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind);
@@ -58,8 +59,8 @@ public:
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP);
- bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
- TargetTransformInfo::LSRCost &C2);
+ bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
+ const TargetTransformInfo::LSRCost &C2);
/// @}
/// \name Vector TTI Implementations
@@ -92,7 +93,8 @@ public:
const Instruction *CxtI = nullptr);
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
ArrayRef<int> Mask, int Index,
- VectorType *SubTp);
+ VectorType *SubTp,
+ ArrayRef<const Value *> Args = None);
unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy);
unsigned getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy);
unsigned getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst,
diff --git a/llvm/lib/Target/TargetIntrinsicInfo.cpp b/llvm/lib/Target/TargetIntrinsicInfo.cpp
index 256514c8c22d..d44a34984c42 100644
--- a/llvm/lib/Target/TargetIntrinsicInfo.cpp
+++ b/llvm/lib/Target/TargetIntrinsicInfo.cpp
@@ -11,15 +11,13 @@
//===----------------------------------------------------------------------===//
#include "llvm/Target/TargetIntrinsicInfo.h"
-#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringMapEntry.h"
#include "llvm/IR/Function.h"
using namespace llvm;
-TargetIntrinsicInfo::TargetIntrinsicInfo() {
-}
+TargetIntrinsicInfo::TargetIntrinsicInfo() = default;
-TargetIntrinsicInfo::~TargetIntrinsicInfo() {
-}
+TargetIntrinsicInfo::~TargetIntrinsicInfo() = default;
unsigned TargetIntrinsicInfo::getIntrinsicID(const Function *F) const {
const ValueName *ValName = F->getValueName();
diff --git a/llvm/lib/Target/TargetLoweringObjectFile.cpp b/llvm/lib/Target/TargetLoweringObjectFile.cpp
index 7954f0f09faf..8f633adbb9ef 100644
--- a/llvm/lib/Target/TargetLoweringObjectFile.cpp
+++ b/llvm/lib/Target/TargetLoweringObjectFile.cpp
@@ -24,10 +24,8 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/SectionKind.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
diff --git a/llvm/lib/Target/TargetMachine.cpp b/llvm/lib/Target/TargetMachine.cpp
index 390457dbb2bc..8d1ad617889c 100644
--- a/llvm/lib/Target/TargetMachine.cpp
+++ b/llvm/lib/Target/TargetMachine.cpp
@@ -13,17 +13,14 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Mangler.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCSectionMachO.h"
-#include "llvm/MC/MCTargetOptions.h"
-#include "llvm/MC/SectionKind.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
using namespace llvm;
@@ -63,16 +60,13 @@ void TargetMachine::resetTargetOptions(const Function &F) const {
RESET_OPTION(NoInfsFPMath, "no-infs-fp-math");
RESET_OPTION(NoNaNsFPMath, "no-nans-fp-math");
RESET_OPTION(NoSignedZerosFPMath, "no-signed-zeros-fp-math");
+ RESET_OPTION(ApproxFuncFPMath, "approx-func-fp-math");
}
/// Returns the code generation relocation model. The choices are static, PIC,
/// and dynamic-no-pic.
Reloc::Model TargetMachine::getRelocationModel() const { return RM; }
-/// Returns the code model. The choices are small, kernel, medium, large, and
-/// target default.
-CodeModel::Model TargetMachine::getCodeModel() const { return CMModel; }
-
/// Get the IR-specified TLS model for Var.
static TLSModel::Model getSelectedTLSModel(const GlobalValue *GV) {
switch (GV->getThreadLocalMode()) {
@@ -189,7 +183,8 @@ CodeGenOpt::Level TargetMachine::getOptLevel() const { return OptLevel; }
void TargetMachine::setOptLevel(CodeGenOpt::Level Level) { OptLevel = Level; }
-TargetTransformInfo TargetMachine::getTargetTransformInfo(const Function &F) {
+TargetTransformInfo
+TargetMachine::getTargetTransformInfo(const Function &F) const {
return TargetTransformInfo(F.getParent()->getDataLayout());
}
@@ -217,7 +212,7 @@ MCSymbol *TargetMachine::getSymbol(const GlobalValue *GV) const {
return TLOF->getContext().getOrCreateSymbol(NameStr);
}
-TargetIRAnalysis TargetMachine::getTargetIRAnalysis() {
+TargetIRAnalysis TargetMachine::getTargetIRAnalysis() const {
// Since Analysis can't depend on Target, use a std::function to invert the
// dependency.
return TargetIRAnalysis(
diff --git a/llvm/lib/Target/TargetMachineC.cpp b/llvm/lib/Target/TargetMachineC.cpp
index 55047a1bb3cd..b8cefbe5b6b7 100644
--- a/llvm/lib/Target/TargetMachineC.cpp
+++ b/llvm/lib/Target/TargetMachineC.cpp
@@ -11,7 +11,6 @@
//===----------------------------------------------------------------------===//
#include "llvm-c/Core.h"
-#include "llvm-c/Target.h"
#include "llvm-c/TargetMachine.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/DataLayout.h"
@@ -20,13 +19,10 @@
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/CodeGenCWrappers.h"
#include "llvm/Target/TargetMachine.h"
-#include <cassert>
-#include <cstdlib>
#include <cstring>
using namespace llvm;
@@ -217,7 +213,9 @@ static LLVMBool LLVMTargetMachineEmit(LLVMTargetMachineRef T, LLVMModuleRef M,
}
LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M,
- char* Filename, LLVMCodeGenFileType codegen, char** ErrorMessage) {
+ const char *Filename,
+ LLVMCodeGenFileType codegen,
+ char **ErrorMessage) {
std::error_code EC;
raw_fd_ostream dest(Filename, EC, sys::fs::OF_None);
if (EC) {
diff --git a/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp b/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp
index 4a318e493c52..f39be036d21f 100644
--- a/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp
+++ b/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp
@@ -17,6 +17,7 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
diff --git a/llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp b/llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp
index 72c40cbe78c4..00487a1f5bb3 100644
--- a/llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp
+++ b/llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp
@@ -15,8 +15,8 @@
#include "VE.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDecoderOps.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
-#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/TargetRegistry.h"
@@ -33,7 +33,7 @@ class VEDisassembler : public MCDisassembler {
public:
VEDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
: MCDisassembler(STI, Ctx) {}
- virtual ~VEDisassembler() {}
+ virtual ~VEDisassembler() = default;
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
@@ -126,7 +126,7 @@ static const unsigned MiscRegDecoderTable[] = {
static DecodeStatus DecodeI32RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 63)
return MCDisassembler::Fail;
unsigned Reg = I32RegDecoderTable[RegNo];
@@ -136,7 +136,7 @@ static DecodeStatus DecodeI32RegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeI64RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 63)
return MCDisassembler::Fail;
unsigned Reg = I64RegDecoderTable[RegNo];
@@ -146,7 +146,7 @@ static DecodeStatus DecodeI64RegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeF32RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 63)
return MCDisassembler::Fail;
unsigned Reg = F32RegDecoderTable[RegNo];
@@ -156,7 +156,7 @@ static DecodeStatus DecodeF32RegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeF128RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo % 2 || RegNo > 63)
return MCDisassembler::Fail;
unsigned Reg = F128RegDecoderTable[RegNo / 2];
@@ -166,7 +166,7 @@ static DecodeStatus DecodeF128RegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeV64RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
unsigned Reg = VE::NoRegister;
if (RegNo == 255)
Reg = VE::VIX;
@@ -180,7 +180,7 @@ static DecodeStatus DecodeV64RegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeVMRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 15)
return MCDisassembler::Fail;
unsigned Reg = VMRegDecoderTable[RegNo];
@@ -190,7 +190,7 @@ static DecodeStatus DecodeVMRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeVM512RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo % 2 || RegNo > 15)
return MCDisassembler::Fail;
unsigned Reg = VM512RegDecoderTable[RegNo / 2];
@@ -200,7 +200,7 @@ static DecodeStatus DecodeVM512RegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeMISCRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
if (RegNo > 30)
return MCDisassembler::Fail;
unsigned Reg = MiscRegDecoderTable[RegNo];
@@ -211,47 +211,56 @@ static DecodeStatus DecodeMISCRegisterClass(MCInst &Inst, unsigned RegNo,
}
static DecodeStatus DecodeASX(MCInst &Inst, uint64_t insn, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeLoadI32(MCInst &Inst, uint64_t insn, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeStoreI32(MCInst &Inst, uint64_t insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeLoadI64(MCInst &Inst, uint64_t insn, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeStoreI64(MCInst &Inst, uint64_t insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeLoadF32(MCInst &Inst, uint64_t insn, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeStoreF32(MCInst &Inst, uint64_t insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeLoadASI64(MCInst &Inst, uint64_t insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeStoreASI64(MCInst &Inst, uint64_t insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeTS1AMI64(MCInst &Inst, uint64_t insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeTS1AMI32(MCInst &Inst, uint64_t insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeCASI64(MCInst &Inst, uint64_t insn, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeCASI32(MCInst &Inst, uint64_t insn, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeCall(MCInst &Inst, uint64_t insn, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeSIMM7(MCInst &Inst, uint64_t insn, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeSIMM32(MCInst &Inst, uint64_t insn, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeCCOperand(MCInst &Inst, uint64_t insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeRDOperand(MCInst &Inst, uint64_t insn,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeBranchCondition(MCInst &Inst, uint64_t insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeBranchConditionAlways(MCInst &Inst, uint64_t insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
#include "VEGenDisassemblerTables.inc"
@@ -302,10 +311,10 @@ DecodeStatus VEDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
}
typedef DecodeStatus (*DecodeFunc)(MCInst &MI, unsigned RegNo, uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeASX(MCInst &MI, uint64_t insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
unsigned sy = fieldFromInstruction(insn, 40, 7);
bool cy = fieldFromInstruction(insn, 47, 1);
unsigned sz = fieldFromInstruction(insn, 32, 7);
@@ -338,7 +347,7 @@ static DecodeStatus DecodeASX(MCInst &MI, uint64_t insn, uint64_t Address,
}
static DecodeStatus DecodeAS(MCInst &MI, uint64_t insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
unsigned sz = fieldFromInstruction(insn, 32, 7);
bool cz = fieldFromInstruction(insn, 39, 1);
uint64_t simm32 = SignExtend64<32>(fieldFromInstruction(insn, 0, 32));
@@ -360,7 +369,7 @@ static DecodeStatus DecodeAS(MCInst &MI, uint64_t insn, uint64_t Address,
}
static DecodeStatus DecodeMem(MCInst &MI, uint64_t insn, uint64_t Address,
- const void *Decoder, bool isLoad,
+ const MCDisassembler *Decoder, bool isLoad,
DecodeFunc DecodeSX) {
unsigned sx = fieldFromInstruction(insn, 48, 7);
@@ -384,7 +393,7 @@ static DecodeStatus DecodeMem(MCInst &MI, uint64_t insn, uint64_t Address,
}
static DecodeStatus DecodeMemAS(MCInst &MI, uint64_t insn, uint64_t Address,
- const void *Decoder, bool isLoad,
+ const MCDisassembler *Decoder, bool isLoad,
DecodeFunc DecodeSX) {
unsigned sx = fieldFromInstruction(insn, 48, 7);
@@ -408,50 +417,55 @@ static DecodeStatus DecodeMemAS(MCInst &MI, uint64_t insn, uint64_t Address,
}
static DecodeStatus DecodeLoadI32(MCInst &Inst, uint64_t insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return DecodeMem(Inst, insn, Address, Decoder, true, DecodeI32RegisterClass);
}
static DecodeStatus DecodeStoreI32(MCInst &Inst, uint64_t insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
return DecodeMem(Inst, insn, Address, Decoder, false, DecodeI32RegisterClass);
}
static DecodeStatus DecodeLoadI64(MCInst &Inst, uint64_t insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return DecodeMem(Inst, insn, Address, Decoder, true, DecodeI64RegisterClass);
}
static DecodeStatus DecodeStoreI64(MCInst &Inst, uint64_t insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
return DecodeMem(Inst, insn, Address, Decoder, false, DecodeI64RegisterClass);
}
static DecodeStatus DecodeLoadF32(MCInst &Inst, uint64_t insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return DecodeMem(Inst, insn, Address, Decoder, true, DecodeF32RegisterClass);
}
static DecodeStatus DecodeStoreF32(MCInst &Inst, uint64_t insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
return DecodeMem(Inst, insn, Address, Decoder, false, DecodeF32RegisterClass);
}
static DecodeStatus DecodeLoadASI64(MCInst &Inst, uint64_t insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
return DecodeMemAS(Inst, insn, Address, Decoder, true,
DecodeI64RegisterClass);
}
static DecodeStatus DecodeStoreASI64(MCInst &Inst, uint64_t insn,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
return DecodeMemAS(Inst, insn, Address, Decoder, false,
DecodeI64RegisterClass);
}
static DecodeStatus DecodeCAS(MCInst &MI, uint64_t insn, uint64_t Address,
- const void *Decoder, bool isImmOnly, bool isUImm,
- DecodeFunc DecodeSX) {
+ const MCDisassembler *Decoder, bool isImmOnly,
+ bool isUImm, DecodeFunc DecodeSX) {
unsigned sx = fieldFromInstruction(insn, 48, 7);
bool cy = fieldFromInstruction(insn, 47, 1);
unsigned sy = fieldFromInstruction(insn, 40, 7);
@@ -488,43 +502,43 @@ static DecodeStatus DecodeCAS(MCInst &MI, uint64_t insn, uint64_t Address,
}
static DecodeStatus DecodeTS1AMI64(MCInst &MI, uint64_t insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return DecodeCAS(MI, insn, Address, Decoder, false, true,
DecodeI64RegisterClass);
}
static DecodeStatus DecodeTS1AMI32(MCInst &MI, uint64_t insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return DecodeCAS(MI, insn, Address, Decoder, false, true,
DecodeI32RegisterClass);
}
static DecodeStatus DecodeCASI64(MCInst &MI, uint64_t insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return DecodeCAS(MI, insn, Address, Decoder, false, false,
DecodeI64RegisterClass);
}
static DecodeStatus DecodeCASI32(MCInst &MI, uint64_t insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return DecodeCAS(MI, insn, Address, Decoder, false, false,
DecodeI32RegisterClass);
}
static DecodeStatus DecodeCall(MCInst &Inst, uint64_t insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
return DecodeMem(Inst, insn, Address, Decoder, true, DecodeI64RegisterClass);
}
static DecodeStatus DecodeSIMM7(MCInst &MI, uint64_t insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
uint64_t tgt = SignExtend64<7>(insn);
MI.addOperand(MCOperand::createImm(tgt));
return MCDisassembler::Success;
}
static DecodeStatus DecodeSIMM32(MCInst &MI, uint64_t insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
uint64_t tgt = SignExtend64<32>(insn);
MI.addOperand(MCOperand::createImm(tgt));
return MCDisassembler::Success;
@@ -568,14 +582,14 @@ static bool isIntegerBCKind(MCInst &MI) {
// Decode CC Operand field.
static DecodeStatus DecodeCCOperand(MCInst &MI, uint64_t cf, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
MI.addOperand(MCOperand::createImm(VEValToCondCode(cf, isIntegerBCKind(MI))));
return MCDisassembler::Success;
}
// Decode RD Operand field.
static DecodeStatus DecodeRDOperand(MCInst &MI, uint64_t cf, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
MI.addOperand(MCOperand::createImm(VEValToRD(cf)));
return MCDisassembler::Success;
}
@@ -583,7 +597,7 @@ static DecodeStatus DecodeRDOperand(MCInst &MI, uint64_t cf, uint64_t Address,
// Decode branch condition instruction and CCOperand field in it.
static DecodeStatus DecodeBranchCondition(MCInst &MI, uint64_t insn,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
unsigned cf = fieldFromInstruction(insn, 48, 4);
bool cy = fieldFromInstruction(insn, 47, 1);
unsigned sy = fieldFromInstruction(insn, 40, 7);
@@ -607,7 +621,7 @@ static DecodeStatus DecodeBranchCondition(MCInst &MI, uint64_t insn,
static DecodeStatus DecodeBranchConditionAlways(MCInst &MI, uint64_t insn,
uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
// Decode MEMri.
return DecodeAS(MI, insn, Address, Decoder);
}
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEELFObjectWriter.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEELFObjectWriter.cpp
index ae065407409a..1c89d6444d11 100644
--- a/llvm/lib/Target/VE/MCTargetDesc/VEELFObjectWriter.cpp
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEELFObjectWriter.cpp
@@ -25,7 +25,7 @@ public:
: MCELFObjectTargetWriter(/* Is64Bit */ true, OSABI, ELF::EM_VE,
/* HasRelocationAddend */ true) {}
- ~VEELFObjectWriter() override {}
+ ~VEELFObjectWriter() override = default;
protected:
unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEFixupKinds.h b/llvm/lib/Target/VE/MCTargetDesc/VEFixupKinds.h
index 46b995cee840..0e2d55c0182e 100644
--- a/llvm/lib/Target/VE/MCTargetDesc/VEFixupKinds.h
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEFixupKinds.h
@@ -20,28 +20,28 @@ enum Fixups {
/// fixup_ve_srel32 - 32-bit fixup corresponding to foo for relative branch
fixup_ve_srel32,
- /// fixup_ve_hi32 - 32-bit fixup corresponding to foo@hi
+ /// fixup_ve_hi32 - 32-bit fixup corresponding to foo\@hi
fixup_ve_hi32,
- /// fixup_ve_lo32 - 32-bit fixup corresponding to foo@lo
+ /// fixup_ve_lo32 - 32-bit fixup corresponding to foo\@lo
fixup_ve_lo32,
- /// fixup_ve_pc_hi32 - 32-bit fixup corresponding to foo@pc_hi
+ /// fixup_ve_pc_hi32 - 32-bit fixup corresponding to foo\@pc_hi
fixup_ve_pc_hi32,
- /// fixup_ve_pc_lo32 - 32-bit fixup corresponding to foo@pc_lo
+ /// fixup_ve_pc_lo32 - 32-bit fixup corresponding to foo\@pc_lo
fixup_ve_pc_lo32,
- /// fixup_ve_got_hi32 - 32-bit fixup corresponding to foo@got_hi
+ /// fixup_ve_got_hi32 - 32-bit fixup corresponding to foo\@got_hi
fixup_ve_got_hi32,
- /// fixup_ve_got_lo32 - 32-bit fixup corresponding to foo@got_lo
+ /// fixup_ve_got_lo32 - 32-bit fixup corresponding to foo\@got_lo
fixup_ve_got_lo32,
- /// fixup_ve_gotoff_hi32 - 32-bit fixup corresponding to foo@gotoff_hi
+ /// fixup_ve_gotoff_hi32 - 32-bit fixup corresponding to foo\@gotoff_hi
fixup_ve_gotoff_hi32,
- /// fixup_ve_gotoff_lo32 - 32-bit fixup corresponding to foo@gotoff_lo
+ /// fixup_ve_gotoff_lo32 - 32-bit fixup corresponding to foo\@gotoff_lo
fixup_ve_gotoff_lo32,
/// fixup_ve_plt_hi32/lo32
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp
index 65bb0cf8b0d7..3eb246f73679 100644
--- a/llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp
@@ -159,7 +159,6 @@ uint64_t VEMCCodeEmitter::getRDOpValue(const MCInst &MI, unsigned OpNo,
#include "VEGenMCCodeEmitter.inc"
MCCodeEmitter *llvm::createVEMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx) {
return new VEMCCodeEmitter(MCII, Ctx);
}
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.cpp
index 4d45918ad0aa..a1045107a832 100644
--- a/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.cpp
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.cpp
@@ -18,6 +18,7 @@
#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Casting.h"
using namespace llvm;
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h b/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h
index f0bb6e3acdee..d8f9d0634c24 100644
--- a/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h
@@ -28,8 +28,7 @@ class MCSubtargetInfo;
class MCTargetOptions;
class Target;
-MCCodeEmitter *createVEMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI, MCContext &Ctx);
+MCCodeEmitter *createVEMCCodeEmitter(const MCInstrInfo &MCII, MCContext &Ctx);
MCAsmBackend *createVEAsmBackend(const Target &T, const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
const MCTargetOptions &Options);
diff --git a/llvm/lib/Target/VE/VE.h b/llvm/lib/Target/VE/VE.h
index 2a729a1a311c..2794d1458be7 100644
--- a/llvm/lib/Target/VE/VE.h
+++ b/llvm/lib/Target/VE/VE.h
@@ -27,7 +27,6 @@ class MCInst;
class MachineInstr;
FunctionPass *createVEISelDag(VETargetMachine &TM);
-FunctionPass *createVEPromoteToI1Pass();
FunctionPass *createLVLGenPass();
void LowerVEMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
@@ -370,5 +369,8 @@ inline static uint64_t mimm2Val(uint64_t Val) {
inline unsigned M0(unsigned Val) { return Val + 64; }
inline unsigned M1(unsigned Val) { return Val; }
+static const unsigned StandardVectorWidth = 256;
+static const unsigned PackedVectorWidth = 512;
+
} // namespace llvm
#endif
diff --git a/llvm/lib/Target/VE/VECustomDAG.cpp b/llvm/lib/Target/VE/VECustomDAG.cpp
index af3e4af13814..8f11eba6d5fd 100644
--- a/llvm/lib/Target/VE/VECustomDAG.cpp
+++ b/llvm/lib/Target/VE/VECustomDAG.cpp
@@ -19,17 +19,52 @@
namespace llvm {
-static const int StandardVectorWidth = 256;
-
bool isPackedVectorType(EVT SomeVT) {
if (!SomeVT.isVector())
return false;
return SomeVT.getVectorNumElements() > StandardVectorWidth;
}
+MVT splitVectorType(MVT VT) {
+ if (!VT.isVector())
+ return VT;
+ return MVT::getVectorVT(VT.getVectorElementType(), StandardVectorWidth);
+}
+
+MVT getLegalVectorType(Packing P, MVT ElemVT) {
+ return MVT::getVectorVT(ElemVT, P == Packing::Normal ? StandardVectorWidth
+ : PackedVectorWidth);
+}
+
+Packing getTypePacking(EVT VT) {
+ assert(VT.isVector());
+ return isPackedVectorType(VT) ? Packing::Dense : Packing::Normal;
+}
+
+bool isMaskType(EVT SomeVT) {
+ if (!SomeVT.isVector())
+ return false;
+ return SomeVT.getVectorElementType() == MVT::i1;
+}
+
+bool isMaskArithmetic(SDValue Op) {
+ switch (Op.getOpcode()) {
+ default:
+ return false;
+ case ISD::AND:
+ case ISD::XOR:
+ case ISD::OR:
+ return isMaskType(Op.getValueType());
+ }
+}
+
/// \returns the VVP_* SDNode opcode corresponsing to \p OC.
Optional<unsigned> getVVPOpcode(unsigned Opcode) {
switch (Opcode) {
+ case ISD::MLOAD:
+ return VEISD::VVP_LOAD;
+ case ISD::MSTORE:
+ return VEISD::VVP_STORE;
#define HANDLE_VP_TO_VVP(VPOPC, VVPNAME) \
case ISD::VPOPC: \
return VEISD::VVPNAME;
@@ -38,10 +73,76 @@ Optional<unsigned> getVVPOpcode(unsigned Opcode) {
case ISD::SDNAME: \
return VEISD::VVPNAME;
#include "VVPNodes.def"
+ // TODO: Map those in VVPNodes.def too
+ case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
+ return VEISD::VVP_LOAD;
+ case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
+ return VEISD::VVP_STORE;
}
return None;
}
+bool maySafelyIgnoreMask(SDValue Op) {
+ auto VVPOpc = getVVPOpcode(Op->getOpcode());
+ auto Opc = VVPOpc.value_or(Op->getOpcode());
+
+ switch (Opc) {
+ case VEISD::VVP_SDIV:
+ case VEISD::VVP_UDIV:
+ case VEISD::VVP_FDIV:
+ case VEISD::VVP_SELECT:
+ return false;
+
+ default:
+ return true;
+ }
+}
+
+bool supportsPackedMode(unsigned Opcode, EVT IdiomVT) {
+ bool IsPackedOp = isPackedVectorType(IdiomVT);
+ bool IsMaskOp = isMaskType(IdiomVT);
+ switch (Opcode) {
+ default:
+ return false;
+
+ case VEISD::VEC_BROADCAST:
+ return true;
+#define REGISTER_PACKED(VVP_NAME) case VEISD::VVP_NAME:
+#include "VVPNodes.def"
+ return IsPackedOp && !IsMaskOp;
+ }
+}
+
+bool isPackingSupportOpcode(unsigned Opc) {
+ switch (Opc) {
+ case VEISD::VEC_PACK:
+ case VEISD::VEC_UNPACK_LO:
+ case VEISD::VEC_UNPACK_HI:
+ return true;
+ }
+ return false;
+}
+
+bool isVVPOrVEC(unsigned Opcode) {
+ switch (Opcode) {
+ case VEISD::VEC_BROADCAST:
+#define ADD_VVP_OP(VVPNAME, ...) case VEISD::VVPNAME:
+#include "VVPNodes.def"
+ return true;
+ }
+ return false;
+}
+
+bool isVVPUnaryOp(unsigned VVPOpcode) {
+ switch (VVPOpcode) {
+#define ADD_UNARY_VVP_OP(VVPNAME, ...) \
+ case VEISD::VVPNAME: \
+ return true;
+#include "VVPNodes.def"
+ }
+ return false;
+}
+
bool isVVPBinaryOp(unsigned VVPOpcode) {
switch (VVPOpcode) {
#define ADD_BINARY_VVP_OP(VVPNAME, ...) \
@@ -52,16 +153,308 @@ bool isVVPBinaryOp(unsigned VVPOpcode) {
return false;
}
+bool isVVPReductionOp(unsigned Opcode) {
+ switch (Opcode) {
+#define ADD_REDUCE_VVP_OP(VVP_NAME, SDNAME) case VEISD::VVP_NAME:
+#include "VVPNodes.def"
+ return true;
+ }
+ return false;
+}
+
+// Return the AVL operand position for this VVP or VEC Op.
+Optional<int> getAVLPos(unsigned Opc) {
+ // This is only available for VP SDNodes
+ auto PosOpt = ISD::getVPExplicitVectorLengthIdx(Opc);
+ if (PosOpt)
+ return *PosOpt;
+
+ // VVP Opcodes.
+ if (isVVPBinaryOp(Opc))
+ return 3;
+
+ // VM Opcodes.
+ switch (Opc) {
+ case VEISD::VEC_BROADCAST:
+ return 1;
+ case VEISD::VVP_SELECT:
+ return 3;
+ case VEISD::VVP_LOAD:
+ return 4;
+ case VEISD::VVP_STORE:
+ return 5;
+ }
+
+ return None;
+}
+
+Optional<int> getMaskPos(unsigned Opc) {
+ // This is only available for VP SDNodes
+ auto PosOpt = ISD::getVPMaskIdx(Opc);
+ if (PosOpt)
+ return *PosOpt;
+
+ // VVP Opcodes.
+ if (isVVPBinaryOp(Opc))
+ return 2;
+
+ // Other opcodes.
+ switch (Opc) {
+ case ISD::MSTORE:
+ return 4;
+ case ISD::MLOAD:
+ return 3;
+ case VEISD::VVP_SELECT:
+ return 2;
+ }
+
+ return None;
+}
+
+bool isLegalAVL(SDValue AVL) { return AVL->getOpcode() == VEISD::LEGALAVL; }
+
+/// Node Properties {
+
+SDValue getNodeChain(SDValue Op) {
+ if (MemSDNode *MemN = dyn_cast<MemSDNode>(Op.getNode()))
+ return MemN->getChain();
+
+ switch (Op->getOpcode()) {
+ case VEISD::VVP_LOAD:
+ case VEISD::VVP_STORE:
+ return Op->getOperand(0);
+ }
+ return SDValue();
+}
+
+SDValue getMemoryPtr(SDValue Op) {
+ if (auto *MemN = dyn_cast<MemSDNode>(Op.getNode()))
+ return MemN->getBasePtr();
+
+ switch (Op->getOpcode()) {
+ case VEISD::VVP_LOAD:
+ return Op->getOperand(1);
+ case VEISD::VVP_STORE:
+ return Op->getOperand(2);
+ }
+ return SDValue();
+}
+
+Optional<EVT> getIdiomaticVectorType(SDNode *Op) {
+ unsigned OC = Op->getOpcode();
+
+ // For memory ops -> the transfered data type
+ if (auto MemN = dyn_cast<MemSDNode>(Op))
+ return MemN->getMemoryVT();
+
+ switch (OC) {
+ // Standard ISD.
+ case ISD::SELECT: // not aliased with VVP_SELECT
+ case ISD::CONCAT_VECTORS:
+ case ISD::EXTRACT_SUBVECTOR:
+ case ISD::VECTOR_SHUFFLE:
+ case ISD::BUILD_VECTOR:
+ case ISD::SCALAR_TO_VECTOR:
+ return Op->getValueType(0);
+ }
+
+ // Translate to VVP where possible.
+ unsigned OriginalOC = OC;
+ if (auto VVPOpc = getVVPOpcode(OC))
+ OC = *VVPOpc;
+
+ if (isVVPReductionOp(OC))
+ return Op->getOperand(hasReductionStartParam(OriginalOC) ? 1 : 0)
+ .getValueType();
+
+ switch (OC) {
+ default:
+ case VEISD::VVP_SETCC:
+ return Op->getOperand(0).getValueType();
+
+ case VEISD::VVP_SELECT:
+#define ADD_BINARY_VVP_OP(VVP_NAME, ...) case VEISD::VVP_NAME:
+#include "VVPNodes.def"
+ return Op->getValueType(0);
+
+ case VEISD::VVP_LOAD:
+ return Op->getValueType(0);
+
+ case VEISD::VVP_STORE:
+ return Op->getOperand(1)->getValueType(0);
+
+ // VEC
+ case VEISD::VEC_BROADCAST:
+ return Op->getValueType(0);
+ }
+}
+
+SDValue getLoadStoreStride(SDValue Op, VECustomDAG &CDAG) {
+ switch (Op->getOpcode()) {
+ case VEISD::VVP_STORE:
+ return Op->getOperand(3);
+ case VEISD::VVP_LOAD:
+ return Op->getOperand(2);
+ }
+
+ if (auto *StoreN = dyn_cast<VPStridedStoreSDNode>(Op.getNode()))
+ return StoreN->getStride();
+ if (auto *StoreN = dyn_cast<VPStridedLoadSDNode>(Op.getNode()))
+ return StoreN->getStride();
+
+ if (isa<MemSDNode>(Op.getNode())) {
+ // Regular MLOAD/MSTORE/LOAD/STORE
+ // No stride argument -> use the contiguous element size as stride.
+ uint64_t ElemStride = getIdiomaticVectorType(Op.getNode())
+ ->getVectorElementType()
+ .getStoreSize();
+ return CDAG.getConstant(ElemStride, MVT::i64);
+ }
+ return SDValue();
+}
+
+SDValue getGatherScatterIndex(SDValue Op) {
+ if (auto *N = dyn_cast<MaskedGatherScatterSDNode>(Op.getNode()))
+ return N->getIndex();
+ if (auto *N = dyn_cast<VPGatherScatterSDNode>(Op.getNode()))
+ return N->getIndex();
+ return SDValue();
+}
+
+SDValue getGatherScatterScale(SDValue Op) {
+ if (auto *N = dyn_cast<MaskedGatherScatterSDNode>(Op.getNode()))
+ return N->getScale();
+ if (auto *N = dyn_cast<VPGatherScatterSDNode>(Op.getNode()))
+ return N->getScale();
+ return SDValue();
+}
+
+SDValue getStoredValue(SDValue Op) {
+ switch (Op->getOpcode()) {
+ case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
+ case VEISD::VVP_STORE:
+ return Op->getOperand(1);
+ }
+ if (auto *StoreN = dyn_cast<StoreSDNode>(Op.getNode()))
+ return StoreN->getValue();
+ if (auto *StoreN = dyn_cast<MaskedStoreSDNode>(Op.getNode()))
+ return StoreN->getValue();
+ if (auto *StoreN = dyn_cast<VPStridedStoreSDNode>(Op.getNode()))
+ return StoreN->getValue();
+ if (auto *StoreN = dyn_cast<VPStoreSDNode>(Op.getNode()))
+ return StoreN->getValue();
+ if (auto *StoreN = dyn_cast<MaskedScatterSDNode>(Op.getNode()))
+ return StoreN->getValue();
+ if (auto *StoreN = dyn_cast<VPScatterSDNode>(Op.getNode()))
+ return StoreN->getValue();
+ return SDValue();
+}
+
+SDValue getNodePassthru(SDValue Op) {
+ if (auto *N = dyn_cast<MaskedLoadSDNode>(Op.getNode()))
+ return N->getPassThru();
+ if (auto *N = dyn_cast<MaskedGatherSDNode>(Op.getNode()))
+ return N->getPassThru();
+
+ return SDValue();
+}
+
+bool hasReductionStartParam(unsigned OPC) {
+ // TODO: Ordered reduction opcodes.
+ if (ISD::isVPReduction(OPC))
+ return true;
+ return false;
+}
+
+unsigned getScalarReductionOpcode(unsigned VVPOC, bool IsMask) {
+ assert(!IsMask && "Mask reduction isel");
+
+ switch (VVPOC) {
+#define HANDLE_VVP_REDUCE_TO_SCALAR(VVP_RED_ISD, REDUCE_ISD) \
+ case VEISD::VVP_RED_ISD: \
+ return ISD::REDUCE_ISD;
+#include "VVPNodes.def"
+ default:
+ break;
+ }
+ llvm_unreachable("Cannot not scalarize this reduction Opcode!");
+}
+
+/// } Node Properties
+
+SDValue getNodeAVL(SDValue Op) {
+ auto PosOpt = getAVLPos(Op->getOpcode());
+ return PosOpt ? Op->getOperand(*PosOpt) : SDValue();
+}
+
+SDValue getNodeMask(SDValue Op) {
+ auto PosOpt = getMaskPos(Op->getOpcode());
+ return PosOpt ? Op->getOperand(*PosOpt) : SDValue();
+}
+
+std::pair<SDValue, bool> getAnnotatedNodeAVL(SDValue Op) {
+ SDValue AVL = getNodeAVL(Op);
+ if (!AVL)
+ return {SDValue(), true};
+ if (isLegalAVL(AVL))
+ return {AVL->getOperand(0), true};
+ return {AVL, false};
+}
+
SDValue VECustomDAG::getConstant(uint64_t Val, EVT VT, bool IsTarget,
bool IsOpaque) const {
return DAG.getConstant(Val, DL, VT, IsTarget, IsOpaque);
}
+SDValue VECustomDAG::getConstantMask(Packing Packing, bool AllTrue) const {
+ auto MaskVT = getLegalVectorType(Packing, MVT::i1);
+
+ // VEISelDAGtoDAG will replace this pattern with the constant-true VM.
+ auto TrueVal = DAG.getConstant(-1, DL, MVT::i32);
+ auto AVL = getConstant(MaskVT.getVectorNumElements(), MVT::i32);
+ auto Res = getNode(VEISD::VEC_BROADCAST, MaskVT, {TrueVal, AVL});
+ if (AllTrue)
+ return Res;
+
+ return DAG.getNOT(DL, Res, Res.getValueType());
+}
+
+SDValue VECustomDAG::getMaskBroadcast(EVT ResultVT, SDValue Scalar,
+ SDValue AVL) const {
+ // Constant mask splat.
+ if (auto BcConst = dyn_cast<ConstantSDNode>(Scalar))
+ return getConstantMask(getTypePacking(ResultVT),
+ BcConst->getSExtValue() != 0);
+
+ // Expand the broadcast to a vector comparison.
+ auto ScalarBoolVT = Scalar.getSimpleValueType();
+ assert(ScalarBoolVT == MVT::i32);
+
+ // Cast to i32 ty.
+ SDValue CmpElem = DAG.getSExtOrTrunc(Scalar, DL, MVT::i32);
+ unsigned ElemCount = ResultVT.getVectorNumElements();
+ MVT CmpVecTy = MVT::getVectorVT(ScalarBoolVT, ElemCount);
+
+ // Broadcast to vector.
+ SDValue BCVec =
+ DAG.getNode(VEISD::VEC_BROADCAST, DL, CmpVecTy, {CmpElem, AVL});
+ SDValue ZeroVec =
+ getBroadcast(CmpVecTy, {DAG.getConstant(0, DL, ScalarBoolVT)}, AVL);
+
+ MVT BoolVecTy = MVT::getVectorVT(MVT::i1, ElemCount);
+
+ // Broadcast(Data) != Broadcast(0)
+ // TODO: Use a VVP operation for this.
+ return DAG.getSetCC(DL, BoolVecTy, BCVec, ZeroVec, ISD::CondCode::SETNE);
+}
+
SDValue VECustomDAG::getBroadcast(EVT ResultVT, SDValue Scalar,
SDValue AVL) const {
assert(ResultVT.isVector());
auto ScaVT = Scalar.getValueType();
- assert(ScaVT != MVT::i1 && "TODO: Mask broadcasts");
+
+ if (isMaskType(ResultVT))
+ return getMaskBroadcast(ResultVT, Scalar, AVL);
if (isPackedVectorType(ResultVT)) {
// v512x packed mode broadcast
@@ -78,4 +471,119 @@ SDValue VECustomDAG::getBroadcast(EVT ResultVT, SDValue Scalar,
return getNode(VEISD::VEC_BROADCAST, ResultVT, {Scalar, AVL});
}
+SDValue VECustomDAG::annotateLegalAVL(SDValue AVL) const {
+ if (isLegalAVL(AVL))
+ return AVL;
+ return getNode(VEISD::LEGALAVL, AVL.getValueType(), AVL);
+}
+
+SDValue VECustomDAG::getUnpack(EVT DestVT, SDValue Vec, PackElem Part,
+ SDValue AVL) const {
+ assert(getAnnotatedNodeAVL(AVL).second && "Expected a pack-legalized AVL");
+
+ // TODO: Peek through VEC_PACK and VEC_BROADCAST(REPL_<sth> ..) operands.
+ unsigned OC =
+ (Part == PackElem::Lo) ? VEISD::VEC_UNPACK_LO : VEISD::VEC_UNPACK_HI;
+ return DAG.getNode(OC, DL, DestVT, Vec, AVL);
+}
+
+SDValue VECustomDAG::getPack(EVT DestVT, SDValue LoVec, SDValue HiVec,
+ SDValue AVL) const {
+ assert(getAnnotatedNodeAVL(AVL).second && "Expected a pack-legalized AVL");
+
+ // TODO: Peek through VEC_UNPACK_LO|HI operands.
+ return DAG.getNode(VEISD::VEC_PACK, DL, DestVT, LoVec, HiVec, AVL);
+}
+
+VETargetMasks VECustomDAG::getTargetSplitMask(SDValue RawMask, SDValue RawAVL,
+ PackElem Part) const {
+ // Adjust AVL for this part
+ SDValue NewAVL;
+ SDValue OneV = getConstant(1, MVT::i32);
+ if (Part == PackElem::Hi)
+ NewAVL = getNode(ISD::ADD, MVT::i32, {RawAVL, OneV});
+ else
+ NewAVL = RawAVL;
+ NewAVL = getNode(ISD::SRL, MVT::i32, {NewAVL, OneV});
+
+ NewAVL = annotateLegalAVL(NewAVL);
+
+ // Legalize Mask (unpack or all-true)
+ SDValue NewMask;
+ if (!RawMask)
+ NewMask = getConstantMask(Packing::Normal, true);
+ else
+ NewMask = getUnpack(MVT::v256i1, RawMask, Part, NewAVL);
+
+ return VETargetMasks(NewMask, NewAVL);
+}
+
+SDValue VECustomDAG::getSplitPtrOffset(SDValue Ptr, SDValue ByteStride,
+ PackElem Part) const {
+ // High starts at base ptr but has more significant bits in the 64bit vector
+ // element.
+ if (Part == PackElem::Hi)
+ return Ptr;
+ return getNode(ISD::ADD, MVT::i64, {Ptr, ByteStride});
+}
+
+SDValue VECustomDAG::getSplitPtrStride(SDValue PackStride) const {
+ if (auto ConstBytes = dyn_cast<ConstantSDNode>(PackStride))
+ return getConstant(2 * ConstBytes->getSExtValue(), MVT::i64);
+ return getNode(ISD::SHL, MVT::i64, {PackStride, getConstant(1, MVT::i32)});
+}
+
+SDValue VECustomDAG::getGatherScatterAddress(SDValue BasePtr, SDValue Scale,
+ SDValue Index, SDValue Mask,
+ SDValue AVL) const {
+ EVT IndexVT = Index.getValueType();
+
+ // Apply scale.
+ SDValue ScaledIndex;
+ if (!Scale || isOneConstant(Scale))
+ ScaledIndex = Index;
+ else {
+ SDValue ScaleBroadcast = getBroadcast(IndexVT, Scale, AVL);
+ ScaledIndex =
+ getNode(VEISD::VVP_MUL, IndexVT, {Index, ScaleBroadcast, Mask, AVL});
+ }
+
+ // Add basePtr.
+ if (isNullConstant(BasePtr))
+ return ScaledIndex;
+
+ // re-constitute pointer vector (basePtr + index * scale)
+ SDValue BaseBroadcast = getBroadcast(IndexVT, BasePtr, AVL);
+ auto ResPtr =
+ getNode(VEISD::VVP_ADD, IndexVT, {BaseBroadcast, ScaledIndex, Mask, AVL});
+ return ResPtr;
+}
+
+SDValue VECustomDAG::getLegalReductionOpVVP(unsigned VVPOpcode, EVT ResVT,
+ SDValue StartV, SDValue VectorV,
+ SDValue Mask, SDValue AVL,
+ SDNodeFlags Flags) const {
+
+ // Optionally attach the start param with a scalar op (where it is
+ // unsupported).
+ bool scalarizeStartParam = StartV && !hasReductionStartParam(VVPOpcode);
+ bool IsMaskReduction = isMaskType(VectorV.getValueType());
+ assert(!IsMaskReduction && "TODO Implement");
+ auto AttachStartValue = [&](SDValue ReductionResV) {
+ if (!scalarizeStartParam)
+ return ReductionResV;
+ auto ScalarOC = getScalarReductionOpcode(VVPOpcode, IsMaskReduction);
+ return getNode(ScalarOC, ResVT, {StartV, ReductionResV});
+ };
+
+ // Fixup: Always Use sequential 'fmul' reduction.
+ if (!scalarizeStartParam && StartV) {
+ assert(hasReductionStartParam(VVPOpcode));
+ return AttachStartValue(
+ getNode(VVPOpcode, ResVT, {StartV, VectorV, Mask, AVL}, Flags));
+ } else
+ return AttachStartValue(
+ getNode(VVPOpcode, ResVT, {VectorV, Mask, AVL}, Flags));
+}
+
} // namespace llvm
diff --git a/llvm/lib/Target/VE/VECustomDAG.h b/llvm/lib/Target/VE/VECustomDAG.h
index ddd6ce783366..0d35c098048e 100644
--- a/llvm/lib/Target/VE/VECustomDAG.h
+++ b/llvm/lib/Target/VE/VECustomDAG.h
@@ -23,10 +23,122 @@ namespace llvm {
Optional<unsigned> getVVPOpcode(unsigned Opcode);
+bool isVVPUnaryOp(unsigned Opcode);
bool isVVPBinaryOp(unsigned Opcode);
+bool isVVPReductionOp(unsigned Opcode);
+
+MVT splitVectorType(MVT VT);
bool isPackedVectorType(EVT SomeVT);
+bool isMaskType(EVT SomeVT);
+
+bool isMaskArithmetic(SDValue Op);
+
+bool isVVPOrVEC(unsigned);
+
+bool supportsPackedMode(unsigned Opcode, EVT IdiomVT);
+
+bool isPackingSupportOpcode(unsigned Opc);
+
+bool maySafelyIgnoreMask(SDValue Op);
+
+/// The VE backend uses a two-staged process to lower and legalize vector
+/// instructions:
+//
+/// 1. VP and standard vector SDNodes are lowered to SDNodes of the VVP_* layer.
+//
+// All VVP nodes have a mask and an Active Vector Length (AVL) parameter.
+// The AVL parameters refers to the element position in the vector the VVP
+// node operates on.
+//
+//
+// 2. The VVP SDNodes are legalized. The AVL in a legal VVP node refers to
+// chunks of 64bit. We track this by wrapping the AVL in a LEGALAVL node.
+//
+// The AVL mechanism in the VE architecture always refers to chunks of
+// 64bit, regardless of the actual element type vector instructions are
+// operating on. For vector types v256.32 or v256.64 nothing needs to be
+// legalized since each element occupies a 64bit chunk - there is no
+// difference between counting 64bit chunks or element positions. However,
+// all vector types with > 256 elements store more than one logical element
+// per 64bit chunk and need to be transformed.
+// However legalization is performed, the resulting legal VVP SDNodes will
+// have a LEGALAVL node as their AVL operand. The LEGALAVL nodes wraps
+// around an AVL that refers to 64 bit chunks just as the architecture
+// demands - that is, the wrapped AVL is the correct setting for the VL
+// register for this VVP operation to get the desired behavior.
+//
+/// AVL Functions {
+// The AVL operand position of this node.
+Optional<int> getAVLPos(unsigned);
+
+// Whether this is a LEGALAVL node.
+bool isLegalAVL(SDValue AVL);
+
+// The AVL operand of this node.
+SDValue getNodeAVL(SDValue);
+
+// Mask position of this node.
+Optional<int> getMaskPos(unsigned);
+
+SDValue getNodeMask(SDValue);
+
+// Return the AVL operand of this node. If it is a LEGALAVL node, unwrap it.
+// Return with the boolean whether unwrapping happened.
+std::pair<SDValue, bool> getAnnotatedNodeAVL(SDValue);
+
+/// } AVL Functions
+
+/// Node Properties {
+
+Optional<EVT> getIdiomaticVectorType(SDNode *Op);
+
+SDValue getLoadStoreStride(SDValue Op, VECustomDAG &CDAG);
+
+SDValue getMemoryPtr(SDValue Op);
+
+SDValue getNodeChain(SDValue Op);
+
+SDValue getStoredValue(SDValue Op);
+
+SDValue getNodePassthru(SDValue Op);
+
+SDValue getGatherScatterIndex(SDValue Op);
+
+SDValue getGatherScatterScale(SDValue Op);
+
+unsigned getScalarReductionOpcode(unsigned VVPOC, bool IsMask);
+
+// Whether this VP_REDUCE_*/ VECREDUCE_*/VVP_REDUCE_* SDNode has a start
+// parameter.
+bool hasReductionStartParam(unsigned VVPOC);
+
+/// } Node Properties
+
+enum class Packing {
+ Normal = 0, // 256 element standard mode.
+ Dense = 1 // 512 element packed mode.
+};
+
+// Get the vector or mask register type for this packing and element type.
+MVT getLegalVectorType(Packing P, MVT ElemVT);
+
+// Whether this type belongs to a packed mask or vector register.
+Packing getTypePacking(EVT);
+
+enum class PackElem : int8_t {
+ Lo = 0, // Integer (63, 32]
+ Hi = 1 // Float (32, 0]
+};
+
+struct VETargetMasks {
+ SDValue Mask;
+ SDValue AVL;
+ VETargetMasks(SDValue Mask = SDValue(), SDValue AVL = SDValue())
+ : Mask(Mask), AVL(AVL) {}
+};
+
class VECustomDAG {
SelectionDAG &DAG;
SDLoc DL;
@@ -68,10 +180,42 @@ public:
SDValue getUNDEF(EVT VT) const { return DAG.getUNDEF(VT); }
/// } getNode
+ /// Legalizing getNode {
+ SDValue getLegalReductionOpVVP(unsigned VVPOpcode, EVT ResVT, SDValue StartV,
+ SDValue VectorV, SDValue Mask, SDValue AVL,
+ SDNodeFlags Flags) const;
+ /// } Legalizing getNode
+
+ /// Packing {
+ SDValue getUnpack(EVT DestVT, SDValue Vec, PackElem Part, SDValue AVL) const;
+ SDValue getPack(EVT DestVT, SDValue LoVec, SDValue HiVec, SDValue AVL) const;
+ /// } Packing
+
+ SDValue getMergeValues(ArrayRef<SDValue> Values) const {
+ return DAG.getMergeValues(Values, DL);
+ }
+
SDValue getConstant(uint64_t Val, EVT VT, bool IsTarget = false,
bool IsOpaque = false) const;
+ SDValue getConstantMask(Packing Packing, bool AllTrue) const;
+ SDValue getMaskBroadcast(EVT ResultVT, SDValue Scalar, SDValue AVL) const;
SDValue getBroadcast(EVT ResultVT, SDValue Scalar, SDValue AVL) const;
+
+ // Wrap AVL in a LEGALAVL node (unless it is one already).
+ SDValue annotateLegalAVL(SDValue AVL) const;
+ VETargetMasks getTargetSplitMask(SDValue RawMask, SDValue RawAVL,
+ PackElem Part) const;
+
+ // Splitting support
+ SDValue getSplitPtrOffset(SDValue Ptr, SDValue ByteStride,
+ PackElem Part) const;
+ SDValue getSplitPtrStride(SDValue PackStride) const;
+ SDValue getGatherScatterAddress(SDValue BasePtr, SDValue Scale, SDValue Index,
+ SDValue Mask, SDValue AVL) const;
+ EVT getVectorVT(EVT ElemVT, unsigned NumElems) const {
+ return EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElems);
+ }
};
} // namespace llvm
diff --git a/llvm/lib/Target/VE/VEISelDAGToDAG.cpp b/llvm/lib/Target/VE/VEISelDAGToDAG.cpp
index e2608e82c9d4..a4319ec1c975 100644
--- a/llvm/lib/Target/VE/VEISelDAGToDAG.cpp
+++ b/llvm/lib/Target/VE/VEISelDAGToDAG.cpp
@@ -10,6 +10,7 @@
//
//===----------------------------------------------------------------------===//
+#include "VE.h"
#include "VETargetMachine.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
@@ -335,6 +336,42 @@ void VEDAGToDAGISel::Select(SDNode *N) {
}
switch (N->getOpcode()) {
+
+ // Late eliminate the LEGALAVL wrapper
+ case VEISD::LEGALAVL:
+ ReplaceNode(N, N->getOperand(0).getNode());
+ return;
+
+ // Lower (broadcast 1) and (broadcast 0) to VM[P]0
+ case VEISD::VEC_BROADCAST: {
+ MVT SplatResTy = N->getSimpleValueType(0);
+ if (SplatResTy.getVectorElementType() != MVT::i1)
+ break;
+
+ // Constant non-zero broadcast.
+ auto BConst = dyn_cast<ConstantSDNode>(N->getOperand(0));
+ if (!BConst)
+ break;
+ bool BCTrueMask = (BConst->getSExtValue() != 0);
+ if (!BCTrueMask)
+ break;
+
+ // Packed or non-packed.
+ SDValue New;
+ if (SplatResTy.getVectorNumElements() == StandardVectorWidth) {
+ New = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(N), VE::VM0,
+ MVT::v256i1);
+ } else if (SplatResTy.getVectorNumElements() == PackedVectorWidth) {
+ New = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(N), VE::VMP0,
+ MVT::v512i1);
+ } else
+ break;
+
+ // Replace.
+ ReplaceNode(N, New.getNode());
+ return;
+ }
+
case VEISD::GLOBAL_BASE_REG:
ReplaceNode(N, getGlobalBaseReg());
return;
diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp
index 9137c476777e..2eea65033870 100644
--- a/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -76,6 +76,8 @@ bool VETargetLowering::CanLowerReturn(
static const MVT AllVectorVTs[] = {MVT::v256i32, MVT::v512i32, MVT::v256i64,
MVT::v256f32, MVT::v512f32, MVT::v256f64};
+static const MVT AllMaskVTs[] = {MVT::v256i1, MVT::v512i1};
+
static const MVT AllPackedVTs[] = {MVT::v512i32, MVT::v512f32};
void VETargetLowering::initRegisterClasses() {
@@ -294,6 +296,12 @@ void VETargetLowering::initSPUActions() {
}
void VETargetLowering::initVPUActions() {
+ for (MVT LegalMaskVT : AllMaskVTs)
+ setOperationAction(ISD::BUILD_VECTOR, LegalMaskVT, Custom);
+
+ for (unsigned Opc : {ISD::AND, ISD::OR, ISD::XOR})
+ setOperationAction(Opc, MVT::v512i1, Custom);
+
for (MVT LegalVecVT : AllVectorVTs) {
setOperationAction(ISD::BUILD_VECTOR, LegalVecVT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, LegalVecVT, Legal);
@@ -307,6 +315,8 @@ void VETargetLowering::initVPUActions() {
setOperationAction(ISD::VP_OPC, LegalVecVT, Custom);
#define ADD_VVP_OP(VVP_NAME, ISD_NAME) \
setOperationAction(ISD::ISD_NAME, LegalVecVT, Custom);
+ setOperationAction(ISD::EXPERIMENTAL_VP_STRIDED_LOAD, LegalVecVT, Custom);
+ setOperationAction(ISD::EXPERIMENTAL_VP_STRIDED_STORE, LegalVecVT, Custom);
#include "VVPNodes.def"
}
@@ -314,6 +324,32 @@ void VETargetLowering::initVPUActions() {
setOperationAction(ISD::INSERT_VECTOR_ELT, LegalPackedVT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, LegalPackedVT, Custom);
}
+
+ // vNt32, vNt64 ops (legal element types)
+ for (MVT VT : MVT::vector_valuetypes()) {
+ MVT ElemVT = VT.getVectorElementType();
+ unsigned ElemBits = ElemVT.getScalarSizeInBits();
+ if (ElemBits != 32 && ElemBits != 64)
+ continue;
+
+ for (unsigned MemOpc : {ISD::MLOAD, ISD::MSTORE, ISD::LOAD, ISD::STORE})
+ setOperationAction(MemOpc, VT, Custom);
+
+ const ISD::NodeType IntReductionOCs[] = {
+ ISD::VECREDUCE_ADD, ISD::VECREDUCE_MUL, ISD::VECREDUCE_AND,
+ ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMIN,
+ ISD::VECREDUCE_SMAX, ISD::VECREDUCE_UMIN, ISD::VECREDUCE_UMAX};
+
+ for (unsigned IntRedOpc : IntReductionOCs)
+ setOperationAction(IntRedOpc, VT, Custom);
+ }
+
+ // v256i1 and v512i1 ops
+ for (MVT MaskVT : AllMaskVTs) {
+ // Custom lower mask ops
+ setOperationAction(ISD::STORE, MaskVT, Custom);
+ setOperationAction(ISD::LOAD, MaskVT, Custom);
+ }
}
SDValue
@@ -898,10 +934,15 @@ const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
TARGET_NODE_CASE(MEMBARRIER)
TARGET_NODE_CASE(RET_FLAG)
TARGET_NODE_CASE(TS1AM)
+ TARGET_NODE_CASE(VEC_UNPACK_LO)
+ TARGET_NODE_CASE(VEC_UNPACK_HI)
+ TARGET_NODE_CASE(VEC_PACK)
TARGET_NODE_CASE(VEC_BROADCAST)
TARGET_NODE_CASE(REPL_I32)
TARGET_NODE_CASE(REPL_F32)
+ TARGET_NODE_CASE(LEGALAVL)
+
// Register the VVP_* SDNodes.
#define ADD_VVP_OP(VVP_NAME, ...) TARGET_NODE_CASE(VVP_NAME)
#include "VVPNodes.def"
@@ -1305,9 +1346,81 @@ static SDValue lowerLoadF128(SDValue Op, SelectionDAG &DAG) {
return DAG.getMergeValues(Ops, DL);
}
+// Lower a vXi1 load into following instructions
+// LDrii %1, (,%addr)
+// LVMxir %vm, 0, %1
+// LDrii %2, 8(,%addr)
+// LVMxir %vm, 0, %2
+// ...
+static SDValue lowerLoadI1(SDValue Op, SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Op.getNode());
+ assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type");
+
+ SDValue BasePtr = LdNode->getBasePtr();
+ unsigned Alignment = LdNode->getAlign().value();
+ if (Alignment > 8)
+ Alignment = 8;
+
+ EVT AddrVT = BasePtr.getValueType();
+ EVT MemVT = LdNode->getMemoryVT();
+ if (MemVT == MVT::v256i1 || MemVT == MVT::v4i64) {
+ SDValue OutChains[4];
+ SDNode *VM = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MemVT);
+ for (int i = 0; i < 4; ++i) {
+ // Generate load dag and prepare chains.
+ SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
+ DAG.getConstant(8 * i, DL, AddrVT));
+ SDValue Val =
+ DAG.getLoad(MVT::i64, DL, LdNode->getChain(), Addr,
+ LdNode->getPointerInfo(), Alignment,
+ LdNode->isVolatile() ? MachineMemOperand::MOVolatile
+ : MachineMemOperand::MONone);
+ OutChains[i] = SDValue(Val.getNode(), 1);
+
+ VM = DAG.getMachineNode(VE::LVMir_m, DL, MVT::i64,
+ DAG.getTargetConstant(i, DL, MVT::i64), Val,
+ SDValue(VM, 0));
+ }
+ SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
+ SDValue Ops[2] = {SDValue(VM, 0), OutChain};
+ return DAG.getMergeValues(Ops, DL);
+ } else if (MemVT == MVT::v512i1 || MemVT == MVT::v8i64) {
+ SDValue OutChains[8];
+ SDNode *VM = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MemVT);
+ for (int i = 0; i < 8; ++i) {
+ // Generate load dag and prepare chains.
+ SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
+ DAG.getConstant(8 * i, DL, AddrVT));
+ SDValue Val =
+ DAG.getLoad(MVT::i64, DL, LdNode->getChain(), Addr,
+ LdNode->getPointerInfo(), Alignment,
+ LdNode->isVolatile() ? MachineMemOperand::MOVolatile
+ : MachineMemOperand::MONone);
+ OutChains[i] = SDValue(Val.getNode(), 1);
+
+ VM = DAG.getMachineNode(VE::LVMyir_y, DL, MVT::i64,
+ DAG.getTargetConstant(i, DL, MVT::i64), Val,
+ SDValue(VM, 0));
+ }
+ SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
+ SDValue Ops[2] = {SDValue(VM, 0), OutChain};
+ return DAG.getMergeValues(Ops, DL);
+ } else {
+ // Otherwise, ask llvm to expand it.
+ return SDValue();
+ }
+}
+
SDValue VETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
LoadSDNode *LdNode = cast<LoadSDNode>(Op.getNode());
+ EVT MemVT = LdNode->getMemoryVT();
+
+ // Dispatch to vector isel.
+ if (MemVT.isVector() && !isMaskType(MemVT))
+ return lowerToVVP(Op, DAG);
+
SDValue BasePtr = LdNode->getBasePtr();
if (isa<FrameIndexSDNode>(BasePtr.getNode())) {
// Do not expand store instruction with frame index here because of
@@ -1315,9 +1428,10 @@ SDValue VETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
return Op;
}
- EVT MemVT = LdNode->getMemoryVT();
if (MemVT == MVT::f128)
return lowerLoadF128(Op, DAG);
+ if (isMaskType(MemVT))
+ return lowerLoadI1(Op, DAG);
return Op;
}
@@ -1358,10 +1472,68 @@ static SDValue lowerStoreF128(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
}
+// Lower a vXi1 store into following instructions
+// SVMi %1, %vm, 0
+// STrii %1, (,%addr)
+// SVMi %2, %vm, 1
+// STrii %2, 8(,%addr)
+// ...
+static SDValue lowerStoreI1(SDValue Op, SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ StoreSDNode *StNode = dyn_cast<StoreSDNode>(Op.getNode());
+ assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
+
+ SDValue BasePtr = StNode->getBasePtr();
+ unsigned Alignment = StNode->getAlign().value();
+ if (Alignment > 8)
+ Alignment = 8;
+ EVT AddrVT = BasePtr.getValueType();
+ EVT MemVT = StNode->getMemoryVT();
+ if (MemVT == MVT::v256i1 || MemVT == MVT::v4i64) {
+ SDValue OutChains[4];
+ for (int i = 0; i < 4; ++i) {
+ SDNode *V =
+ DAG.getMachineNode(VE::SVMmi, DL, MVT::i64, StNode->getValue(),
+ DAG.getTargetConstant(i, DL, MVT::i64));
+ SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
+ DAG.getConstant(8 * i, DL, AddrVT));
+ OutChains[i] =
+ DAG.getStore(StNode->getChain(), DL, SDValue(V, 0), Addr,
+ MachinePointerInfo(), Alignment,
+ StNode->isVolatile() ? MachineMemOperand::MOVolatile
+ : MachineMemOperand::MONone);
+ }
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
+ } else if (MemVT == MVT::v512i1 || MemVT == MVT::v8i64) {
+ SDValue OutChains[8];
+ for (int i = 0; i < 8; ++i) {
+ SDNode *V =
+ DAG.getMachineNode(VE::SVMyi, DL, MVT::i64, StNode->getValue(),
+ DAG.getTargetConstant(i, DL, MVT::i64));
+ SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
+ DAG.getConstant(8 * i, DL, AddrVT));
+ OutChains[i] =
+ DAG.getStore(StNode->getChain(), DL, SDValue(V, 0), Addr,
+ MachinePointerInfo(), Alignment,
+ StNode->isVolatile() ? MachineMemOperand::MOVolatile
+ : MachineMemOperand::MONone);
+ }
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
+ } else {
+ // Otherwise, ask llvm to expand it.
+ return SDValue();
+ }
+}
+
SDValue VETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
StoreSDNode *StNode = cast<StoreSDNode>(Op.getNode());
assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
+ // always expand non-mask vector loads to VVP
+ EVT MemVT = StNode->getMemoryVT();
+ if (MemVT.isVector() && !isMaskType(MemVT))
+ return lowerToVVP(Op, DAG);
+
SDValue BasePtr = StNode->getBasePtr();
if (isa<FrameIndexSDNode>(BasePtr.getNode())) {
// Do not expand store instruction with frame index here because of
@@ -1369,9 +1541,10 @@ SDValue VETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
return Op;
}
- EVT MemVT = StNode->getMemoryVT();
if (MemVT == MVT::f128)
return lowerStoreF128(Op, DAG);
+ if (isMaskType(MemVT))
+ return lowerStoreI1(Op, DAG);
// Otherwise, ask llvm to expand it.
return SDValue();
@@ -1410,9 +1583,9 @@ SDValue VETargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const {
SDValue NextPtr;
if (VT == MVT::f128) {
- // VE f128 values must be stored with 16 bytes alignment. We doesn't
+ // VE f128 values must be stored with 16 bytes alignment. We don't
// know the actual alignment of VAList, so we take alignment of it
- // dyanmically.
+ // dynamically.
int Align = 16;
VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
DAG.getConstant(Align - 1, DL, PtrVT));
@@ -1658,25 +1831,37 @@ SDValue VETargetLowering::lowerBUILD_VECTOR(SDValue Op,
// Else emit a broadcast.
if (SDValue ScalarV = getSplatValue(Op.getNode())) {
unsigned NumEls = ResultVT.getVectorNumElements();
- // TODO: Legalize packed-mode AVL.
- // For now, cap the AVL at 256.
- auto CappedLength = std::min<unsigned>(256, NumEls);
- auto AVL = CDAG.getConstant(CappedLength, MVT::i32);
- return CDAG.getBroadcast(ResultVT, Op.getOperand(0), AVL);
+ auto AVL = CDAG.getConstant(NumEls, MVT::i32);
+ return CDAG.getBroadcast(ResultVT, ScalarV, AVL);
}
// Expand
return SDValue();
}
+TargetLowering::LegalizeAction
+VETargetLowering::getCustomOperationAction(SDNode &Op) const {
+ // Custom legalization on VVP_* and VEC_* opcodes is required to pack-legalize
+ // these operations (transform nodes such that their AVL parameter refers to
+ // packs of 64bit, instead of number of elements.
+
+ // Packing opcodes are created with a pack-legal AVL (LEGALAVL). No need to
+ // re-visit them.
+ if (isPackingSupportOpcode(Op.getOpcode()))
+ return Legal;
+
+ // Custom lower to legalize AVL for packed mode.
+ if (isVVPOrVEC(Op.getOpcode()))
+ return Custom;
+ return Legal;
+}
+
SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ LLVM_DEBUG(dbgs() << "::LowerOperation"; Op->print(dbgs()););
unsigned Opcode = Op.getOpcode();
- if (ISD::isVPOpcode(Opcode))
- return lowerToVVP(Op, DAG);
+ /// Scalar isel.
switch (Opcode) {
- default:
- llvm_unreachable("Should not custom lower this!");
case ISD::ATOMIC_FENCE:
return lowerATOMIC_FENCE(Op, DAG);
case ISD::ATOMIC_SWAP:
@@ -1720,9 +1905,33 @@ SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return lowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT:
return lowerEXTRACT_VECTOR_ELT(Op, DAG);
+ }
+ /// Vector isel.
+ LLVM_DEBUG(dbgs() << "::LowerOperation_VVP"; Op->print(dbgs()););
+ if (ISD::isVPOpcode(Opcode))
+ return lowerToVVP(Op, DAG);
+
+ switch (Opcode) {
+ default:
+ llvm_unreachable("Should not custom lower this!");
+
+ // Legalize the AVL of this internal node.
+ case VEISD::VEC_BROADCAST:
+#define ADD_VVP_OP(VVP_NAME, ...) case VEISD::VVP_NAME:
+#include "VVPNodes.def"
+ // AVL already legalized.
+ if (getAnnotatedNodeAVL(Op).second)
+ return Op;
+ return legalizeInternalVectorOp(Op, DAG);
+
+ // Translate into a VEC_*/VVP_* layer operation.
+ case ISD::MLOAD:
+ case ISD::MSTORE:
#define ADD_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME:
#include "VVPNodes.def"
+ if (isMaskArithmetic(Op) && isPackedVectorType(Op.getValueType()))
+ return splitMaskArithmetic(Op, DAG);
return lowerToVVP(Op, DAG);
}
}
@@ -2667,52 +2876,6 @@ bool VETargetLowering::hasAndNot(SDValue Y) const {
return true;
}
-SDValue VETargetLowering::lowerToVVP(SDValue Op, SelectionDAG &DAG) const {
- // Can we represent this as a VVP node.
- const unsigned Opcode = Op->getOpcode();
- auto VVPOpcodeOpt = getVVPOpcode(Opcode);
- if (!VVPOpcodeOpt.hasValue())
- return SDValue();
- unsigned VVPOpcode = VVPOpcodeOpt.getValue();
- const bool FromVP = ISD::isVPOpcode(Opcode);
-
- // The representative and legalized vector type of this operation.
- VECustomDAG CDAG(DAG, Op);
- MVT MaskVT = MVT::v256i1; // TODO: packed mode.
- EVT OpVecVT = Op.getValueType();
- EVT LegalVecVT = getTypeToTransformTo(*DAG.getContext(), OpVecVT);
-
- SDValue AVL;
- SDValue Mask;
-
- if (FromVP) {
- // All upstream VP SDNodes always have a mask and avl.
- auto MaskIdx = ISD::getVPMaskIdx(Opcode).getValue();
- auto AVLIdx = ISD::getVPExplicitVectorLengthIdx(Opcode).getValue();
- Mask = Op->getOperand(MaskIdx);
- AVL = Op->getOperand(AVLIdx);
-
- } else {
- // Materialize the VL parameter.
- AVL = CDAG.getConstant(OpVecVT.getVectorNumElements(), MVT::i32);
- SDValue ConstTrue = CDAG.getConstant(1, MVT::i32);
- Mask = CDAG.getBroadcast(MaskVT, ConstTrue, AVL);
- }
-
- if (isVVPBinaryOp(VVPOpcode)) {
- assert(LegalVecVT.isSimple());
- return CDAG.getNode(VVPOpcode, LegalVecVT,
- {Op->getOperand(0), Op->getOperand(1), Mask, AVL});
- }
- if (VVPOpcode == VEISD::VVP_SELECT) {
- auto Mask = Op->getOperand(0);
- auto OnTrue = Op->getOperand(1);
- auto OnFalse = Op->getOperand(2);
- return CDAG.getNode(VVPOpcode, LegalVecVT, {OnTrue, OnFalse, Mask, AVL});
- }
- llvm_unreachable("lowerToVVP called for unexpected SDNode.");
-}
-
SDValue VETargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h
index 09bd19e83717..087b0e215407 100644
--- a/llvm/lib/Target/VE/VEISelLowering.h
+++ b/llvm/lib/Target/VE/VEISelLowering.h
@@ -38,17 +38,30 @@ enum NodeType : unsigned {
MEMBARRIER, // Compiler barrier only; generate a no-op.
RET_FLAG, // Return with a flag operand.
TS1AM, // A TS1AM instruction used for 1/2 bytes swap.
- VEC_BROADCAST, // A vector broadcast instruction.
- // 0: scalar value, 1: VL
+ VEC_UNPACK_LO, // unpack the lo v256 slice of a packed v512 vector.
+ VEC_UNPACK_HI, // unpack the hi v256 slice of a packed v512 vector.
+ // 0: v512 vector, 1: AVL
+ VEC_PACK, // pack a lo and a hi vector into one v512 vector
+ // 0: v256 lo vector, 1: v256 hi vector, 2: AVL
+
+ VEC_BROADCAST, // A vector broadcast instruction.
+ // 0: scalar value, 1: VL
REPL_I32,
REPL_F32, // Replicate subregister to other half.
+ // Annotation as a wrapper. LEGALAVL(VL) means that VL refers to 64bit of
+ // data, whereas the raw EVL coming in from VP nodes always refers to number
+ // of elements, regardless of their size.
+ LEGALAVL,
+
// VVP_* nodes.
#define ADD_VVP_OP(VVP_NAME, ...) VVP_NAME,
#include "VVPNodes.def"
};
}
+class VECustomDAG;
+
class VETargetLowering : public TargetLowering {
const VESubtarget *Subtarget;
@@ -105,6 +118,9 @@ public:
}
/// Custom Lower {
+ TargetLoweringBase::LegalizeAction
+ getCustomOperationAction(SDNode &) const override;
+
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
unsigned getJumpTableEncoding() const override;
const MCExpr *LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
@@ -170,6 +186,15 @@ public:
/// VVP Lowering {
SDValue lowerToVVP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVVP_LOAD_STORE(SDValue Op, VECustomDAG &) const;
+ SDValue lowerVVP_GATHER_SCATTER(SDValue Op, VECustomDAG &) const;
+
+ SDValue legalizeInternalVectorOp(SDValue Op, SelectionDAG &DAG) const;
+ SDValue legalizeInternalLoadStoreOp(SDValue Op, VECustomDAG &CDAG) const;
+ SDValue splitVectorOp(SDValue Op, VECustomDAG &CDAG) const;
+ SDValue splitPackedLoadStore(SDValue Op, VECustomDAG &CDAG) const;
+ SDValue legalizePackedAVL(SDValue Op, VECustomDAG &CDAG) const;
+ SDValue splitMaskArithmetic(SDValue Op, SelectionDAG &DAG) const;
/// } VVPLowering
/// Custom DAGCombine {
diff --git a/llvm/lib/Target/VE/VEInstrInfo.cpp b/llvm/lib/Target/VE/VEInstrInfo.cpp
index 7c1bd5201867..94ebb59c4c77 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.cpp
+++ b/llvm/lib/Target/VE/VEInstrInfo.cpp
@@ -811,7 +811,7 @@ static void expandPseudoVFMK(const TargetInstrInfo &TI, MachineInstr &MI) {
// replace to pvfmk.w.up and pvfmk.w.lo
// replace to pvfmk.s.up and pvfmk.s.lo
- static std::map<unsigned, std::pair<unsigned, unsigned>> VFMKMap = {
+ static const std::pair<unsigned, std::pair<unsigned, unsigned>> VFMKMap[] = {
{VE::VFMKyal, {VE::VFMKLal, VE::VFMKLal}},
{VE::VFMKynal, {VE::VFMKLnal, VE::VFMKLnal}},
{VE::VFMKWyvl, {VE::PVFMKWUPvl, VE::PVFMKWLOvl}},
@@ -822,8 +822,9 @@ static void expandPseudoVFMK(const TargetInstrInfo &TI, MachineInstr &MI) {
unsigned Opcode = MI.getOpcode();
- auto Found = VFMKMap.find(Opcode);
- if (Found == VFMKMap.end())
+ const auto *Found =
+ llvm::find_if(VFMKMap, [&](auto P) { return P.first == Opcode; });
+ if (Found == std::end(VFMKMap))
report_fatal_error("unexpected opcode for pseudo vfmk");
unsigned OpcodeUpper = (*Found).second.first;
diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td
index 717427c3f48d..85285749b4fa 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.td
+++ b/llvm/lib/Target/VE/VEInstrInfo.td
@@ -875,14 +875,14 @@ multiclass BCRm<string opcStr, string opcStrAt, string opcStrAf, bits<8> opc,
// e.g. LCR
let hasSideEffects = 1 in
multiclass LOADCRm<string opcStr, bits<8>opc, RegisterClass RC> {
- def rr : RR<opc, (outs RC:$sx), (ins RC:$sz, RC:$sy),
+ def rr : RR<opc, (outs RC:$sx), (ins RC:$sy, RC:$sz),
!strconcat(opcStr, " $sx, $sy, $sz")>;
- let cy = 0 in def ri : RR<opc, (outs RC:$sx), (ins RC:$sz, simm7:$sy),
+ let cy = 0 in def ir : RR<opc, (outs RC:$sx), (ins simm7:$sy, RC:$sz),
!strconcat(opcStr, " $sx, $sy, $sz")>;
- let cz = 0 in def zr : RR<opc, (outs RC:$sx), (ins zero:$sz, RC:$sy),
+ let cz = 0 in def rz : RR<opc, (outs RC:$sx), (ins RC:$sy, zero:$sz),
!strconcat(opcStr, " $sx, $sy, $sz")>;
let cy = 0, cz = 0 in
- def zi : RR<opc, (outs RC:$sx), (ins zero:$sz, simm7:$sy),
+ def iz : RR<opc, (outs RC:$sx), (ins simm7:$sy, zero:$sz),
!strconcat(opcStr, " $sx, $sy, $sz")>;
}
@@ -890,17 +890,31 @@ multiclass LOADCRm<string opcStr, bits<8>opc, RegisterClass RC> {
// e.g. SCR
let hasSideEffects = 1 in
multiclass STORECRm<string opcStr, bits<8>opc, RegisterClass RC> {
- def rr : RR<opc, (outs), (ins RC:$sz, RC:$sy, RC:$sx),
+ def rrr : RR<opc, (outs), (ins RC:$sy, RC:$sz, RC:$sx),
!strconcat(opcStr, " $sx, $sy, $sz")>;
- let cy = 0 in def ri : RR<opc, (outs), (ins RC:$sz, simm7:$sy, RC:$sx),
- !strconcat(opcStr, " $sx, $sy, $sz")>;
- let cz = 0 in def zr : RR<opc, (outs), (ins zero:$sz, RC:$sy, RC:$sx),
- !strconcat(opcStr, " $sx, $sy, $sz")>;
+ let cy = 0 in def irr : RR<opc, (outs), (ins simm7:$sy, RC:$sz, RC:$sx),
+ !strconcat(opcStr, " $sx, $sy, $sz")>;
+ let cz = 0 in def rzr : RR<opc, (outs), (ins RC:$sy, zero:$sz, RC:$sx),
+ !strconcat(opcStr, " $sx, $sy, $sz")>;
let cy = 0, cz = 0 in
- def zi : RR<opc, (outs), (ins zero:$sz, simm7:$sy, RC:$sx),
- !strconcat(opcStr, " $sx, $sy, $sz")>;
+ def izr : RR<opc, (outs), (ins simm7:$sy, zero:$sz, RC:$sx),
+ !strconcat(opcStr, " $sx, $sy, $sz")>;
+}
+
+let hasSideEffects = 1, Constraints = "$sx = $sx_in", DisableEncoding = "$sx_in" in
+multiclass TSCRm<string opcStr, bits<8>opc, RegisterClass RC> {
+ def rrr : RR<opc, (outs RC:$sx), (ins RC:$sy, RC:$sz, RC:$sx_in),
+ !strconcat(opcStr, " $sx, $sy, $sz")>;
+ let cy = 0 in def irr : RR<opc, (outs RC:$sx), (ins simm7:$sy, RC:$sz, RC:$sx_in),
+ !strconcat(opcStr, " $sx, $sy, $sz")>;
+ let cz = 0 in def rzr : RR<opc, (outs RC:$sx), (ins RC:$sy, zero:$sz, RC:$sx_in),
+ !strconcat(opcStr, " $sx, $sy, $sz")>;
+ let cy = 0, cz = 0 in
+ def izr : RR<opc, (outs RC:$sx), (ins simm7:$sy, zero:$sz, RC:$sx_in),
+ !strconcat(opcStr, " $sx, $sy, $sz")>;
}
+
// Multiclass for communication register instructions.
// e.g. FIDCR
let cz = 0, hasSideEffects = 1 in
@@ -1528,7 +1542,7 @@ defm LCR : LOADCRm<"lcr", 0x40, I64>;
defm SCR : STORECRm<"scr", 0x50, I64>;
// Section 8.19.11 - TSCR (Test & Set Communication Register)
-defm TSCR : LOADCRm<"tscr", 0x41, I64>;
+defm TSCR : TSCRm<"tscr", 0x41, I64>;
// Section 8.19.12 - FIDCR (Fetch & Increment/Decrement CR)
defm FIDCR : FIDCRm<"fidcr", 0x51, I64>;
@@ -2293,6 +2307,18 @@ class IsVLVT<int OpIdx> : SDTCisVT<OpIdx,i32>;
def vec_broadcast : SDNode<"VEISD::VEC_BROADCAST", SDTypeProfile<1, 2,
[SDTCisVec<0>, IsVLVT<2>]>>;
+///// Packed mode Support /////
+// unpack the lo part of this vector
+def vec_unpack_lo : SDNode<"VEISD::VEC_UNPACK_LO", SDTypeProfile<1, 2,
+ [SDTCisVec<0>, SDTCisVec<1>, IsVLVT<2>]>>;
+// unpack the hipart of this vector
+def vec_unpack_hi : SDNode<"VEISD::VEC_UNPACK_HI", SDTypeProfile<1, 2,
+ [SDTCisVec<0>, SDTCisVec<1>, IsVLVT<2>]>>;
+// re-pack v256i32, v256f32 back into tone v512.32
+def vec_pack : SDNode<"VEISD::VEC_PACK", SDTypeProfile<1, 3,
+ [SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>,
+ SDTCisSameNumEltsAs<1,2>, IsVLVT<3>]>>;
+
// replicate lower 32bit to upper 32bit (f32 scalar replication).
def repl_f32 : SDNode<"VEISD::REPL_F32",
SDTypeProfile<1, 1,
diff --git a/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td b/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td
index 9ec10838db05..2ef621ae7477 100644
--- a/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td
+++ b/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td
@@ -601,6 +601,42 @@ def : Pat<(int_ve_vl_pveqv_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVEQVrvl i64:$s
def : Pat<(int_ve_vl_pveqv_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVEQVrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
def : Pat<(int_ve_vl_pveqv_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVEQVvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
def : Pat<(int_ve_vl_pveqv_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVEQVrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldz_vvl v256f64:$vz, i32:$vl), (VLDZvl v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vldz_vvvl v256f64:$vz, v256f64:$pt, i32:$vl), (VLDZvl_v v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vldz_vvmvl v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VLDZvml_v v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvldzlo_vvl v256f64:$vz, i32:$vl), (PVLDZLOvl v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvldzlo_vvvl v256f64:$vz, v256f64:$pt, i32:$vl), (PVLDZLOvl_v v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvldzlo_vvmvl v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (PVLDZLOvml_v v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvldzup_vvl v256f64:$vz, i32:$vl), (PVLDZUPvl v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvldzup_vvvl v256f64:$vz, v256f64:$pt, i32:$vl), (PVLDZUPvl_v v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvldzup_vvmvl v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (PVLDZUPvml_v v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvldz_vvl v256f64:$vz, i32:$vl), (PVLDZvl v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvldz_vvvl v256f64:$vz, v256f64:$pt, i32:$vl), (PVLDZvl_v v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvldz_vvMvl v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVLDZvml_v v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vpcnt_vvl v256f64:$vz, i32:$vl), (VPCNTvl v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vpcnt_vvvl v256f64:$vz, v256f64:$pt, i32:$vl), (VPCNTvl_v v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vpcnt_vvmvl v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VPCNTvml_v v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvpcntlo_vvl v256f64:$vz, i32:$vl), (PVPCNTLOvl v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvpcntlo_vvvl v256f64:$vz, v256f64:$pt, i32:$vl), (PVPCNTLOvl_v v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvpcntlo_vvmvl v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (PVPCNTLOvml_v v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvpcntup_vvl v256f64:$vz, i32:$vl), (PVPCNTUPvl v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvpcntup_vvvl v256f64:$vz, v256f64:$pt, i32:$vl), (PVPCNTUPvl_v v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvpcntup_vvmvl v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (PVPCNTUPvml_v v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvpcnt_vvl v256f64:$vz, i32:$vl), (PVPCNTvl v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvpcnt_vvvl v256f64:$vz, v256f64:$pt, i32:$vl), (PVPCNTvl_v v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvpcnt_vvMvl v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVPCNTvml_v v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vbrv_vvl v256f64:$vz, i32:$vl), (VBRVvl v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vbrv_vvvl v256f64:$vz, v256f64:$pt, i32:$vl), (VBRVvl_v v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vbrv_vvmvl v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VBRVvml_v v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvbrvlo_vvl v256f64:$vz, i32:$vl), (PVBRVLOvl v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvbrvlo_vvvl v256f64:$vz, v256f64:$pt, i32:$vl), (PVBRVLOvl_v v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvbrvlo_vvmvl v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (PVBRVLOvml_v v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvbrvup_vvl v256f64:$vz, i32:$vl), (PVBRVUPvl v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvbrvup_vvvl v256f64:$vz, v256f64:$pt, i32:$vl), (PVBRVUPvl_v v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvbrvup_vvmvl v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (PVBRVUPvml_v v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvbrv_vvl v256f64:$vz, i32:$vl), (PVBRVvl v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvbrv_vvvl v256f64:$vz, v256f64:$pt, i32:$vl), (PVBRVvl_v v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvbrv_vvMvl v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVBRVvml_v v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
def : Pat<(int_ve_vl_vseq_vl i32:$vl), (VSEQl i32:$vl)>;
def : Pat<(int_ve_vl_vseq_vvl v256f64:$pt, i32:$vl), (VSEQl_v i32:$vl, v256f64:$pt)>;
def : Pat<(int_ve_vl_pvseqlo_vl i32:$vl), (PVSEQLOl i32:$vl)>;
@@ -1602,3 +1638,21 @@ def : Pat<(int_ve_vl_negm_MM v512i1:$vmy), (NEGMy v512i1:$vmy)>;
def : Pat<(int_ve_vl_pcvm_sml v256i1:$vmy, i32:$vl), (PCVMml v256i1:$vmy, i32:$vl)>;
def : Pat<(int_ve_vl_lzvm_sml v256i1:$vmy, i32:$vl), (LZVMml v256i1:$vmy, i32:$vl)>;
def : Pat<(int_ve_vl_tovm_sml v256i1:$vmy, i32:$vl), (TOVMml v256i1:$vmy, i32:$vl)>;
+def : Pat<(int_ve_vl_lcr_sss i64:$sy, i64:$sz), (LCRrr i64:$sy, i64:$sz)>;
+def : Pat<(int_ve_vl_lcr_sss i64:$sy, zero:$Z), (LCRrz i64:$sy, (LO7 $Z))>;
+def : Pat<(int_ve_vl_lcr_sss uimm7:$N, i64:$sz), (LCRir (ULO7 $N), i64:$sz)>;
+def : Pat<(int_ve_vl_lcr_sss uimm7:$N, zero:$Z), (LCRiz (ULO7 $N), (LO7 $Z))>;
+def : Pat<(int_ve_vl_scr_sss i64:$sx, i64:$sy, i64:$sz), (SCRrrr i64:$sy, i64:$sz, i64:$sx)>;
+def : Pat<(int_ve_vl_scr_sss i64:$sx, i64:$sy, zero:$Z), (SCRrzr i64:$sy, (LO7 $Z), i64:$sx)>;
+def : Pat<(int_ve_vl_scr_sss i64:$sx, uimm7:$N, i64:$sz), (SCRirr (ULO7 $N), i64:$sz, i64:$sx)>;
+def : Pat<(int_ve_vl_scr_sss i64:$sx, uimm7:$N, zero:$Z), (SCRizr (ULO7 $N), (LO7 $Z), i64:$sx)>;
+def : Pat<(int_ve_vl_tscr_ssss i64:$sx, i64:$sy, i64:$sz), (TSCRrrr i64:$sy, i64:$sz, i64:$sx)>;
+def : Pat<(int_ve_vl_tscr_ssss i64:$sx, i64:$sy, zero:$Z), (TSCRrzr i64:$sy, (LO7 $Z), i64:$sx)>;
+def : Pat<(int_ve_vl_tscr_ssss i64:$sx, uimm7:$N, i64:$sz), (TSCRirr (ULO7 $N), i64:$sz, i64:$sx)>;
+def : Pat<(int_ve_vl_tscr_ssss i64:$sx, uimm7:$N, zero:$Z), (TSCRizr (ULO7 $N), (LO7 $Z), i64:$sx)>;
+def : Pat<(int_ve_vl_fidcr_sss i64:$sy, uimm3:$I), (FIDCRri i64:$sy, (LO7 $I))>;
+def : Pat<(int_ve_vl_fidcr_sss uimm7:$N, uimm3:$I), (FIDCRii (ULO7 $N), (LO7 $I))>;
+def : Pat<(int_ve_vl_fencei ), (FENCEI )>;
+def : Pat<(int_ve_vl_fencem_s uimm2:$I), (FENCEM (LO7 $I))>;
+def : Pat<(int_ve_vl_fencec_s uimm3:$I), (FENCEC (LO7 $I))>;
+def : Pat<(int_ve_vl_svob ), (SVOB )>;
diff --git a/llvm/lib/Target/VE/VEInstrIntrinsicVL.td b/llvm/lib/Target/VE/VEInstrIntrinsicVL.td
index 69ea133ceed0..fca0572cf9b1 100644
--- a/llvm/lib/Target/VE/VEInstrIntrinsicVL.td
+++ b/llvm/lib/Target/VE/VEInstrIntrinsicVL.td
@@ -2,9 +2,6 @@
/// Intrinsic patterns written by hand.
-// SVOB pattern.
-def : Pat<(int_ve_vl_svob), (SVOB)>;
-
// Pack patterns.
def : Pat<(i64 (int_ve_vl_pack_f32p ADDRrii:$addr0, ADDRrii:$addr1)),
(ORrr (f2l (LDUrii MEMrii:$addr0)),
diff --git a/llvm/lib/Target/VE/VEInstrPatternsVec.td b/llvm/lib/Target/VE/VEInstrPatternsVec.td
index 6c5b80315efb..71199717a3a2 100644
--- a/llvm/lib/Target/VE/VEInstrPatternsVec.td
+++ b/llvm/lib/Target/VE/VEInstrPatternsVec.td
@@ -105,3 +105,46 @@ defm : vbrd_elem64<v512i32, i64, simm7, LO7>;
defm : vbrd_elem64<v512f32, i64, simm7, LO7>;
defm : vbrd_elem64<v512i32, f64, simm7fp, LO7FP>;
defm : vbrd_elem64<v512f32, f64, simm7fp, LO7FP>;
+
+class Mask_Binary<ValueType MaskVT, SDPatternOperator MaskOp, string InstName> :
+ Pat<(MaskVT (MaskOp MaskVT:$ma, MaskVT:$mb)), (!cast<Instruction>(InstName#"mm") $ma, $mb)>;
+
+def: Mask_Binary<v256i1, and, "ANDM">;
+def: Mask_Binary<v256i1, or, "ORM">;
+def: Mask_Binary<v256i1, xor, "XORM">;
+
+///// Packing support /////
+
+// v256i1 <> v512i1
+def : Pat<(v256i1 (vec_unpack_lo v512i1:$vm, (i32 srcvalue))),
+ (EXTRACT_SUBREG $vm, sub_vm_odd)>;
+def : Pat<(v256i1 (vec_unpack_hi v512i1:$vm, (i32 srcvalue))),
+ (EXTRACT_SUBREG $vm, sub_vm_even)>;
+def : Pat<(v512i1 (vec_pack v256i1:$vlo, v256i1:$vhi, (i32 srcvalue))),
+ (INSERT_SUBREG (INSERT_SUBREG
+ (v512i1 (IMPLICIT_DEF)),
+ $vlo, sub_vm_odd),
+ $vhi, sub_vm_even)>;
+
+// v256.32 <> v512.32
+multiclass Packing<ValueType PackVT> {
+ // no-op unpacks
+ def : Pat<(v256i32 (vec_unpack_lo PackVT:$vp, (i32 srcvalue))),
+ (COPY_TO_REGCLASS $vp, V64)>;
+ def : Pat<(v256f32 (vec_unpack_hi PackVT:$vp, (i32 srcvalue))),
+ (COPY_TO_REGCLASS $vp, V64)>;
+
+ // shuffle unpacks
+ def : Pat<(v256f32 (vec_unpack_lo PackVT:$vp, i32:$avl)),
+ (VSHFvvil $vp, $vp, 4, $avl)>; // always pick lo
+ def : Pat<(v256i32 (vec_unpack_hi PackVT:$vp, i32:$avl)),
+ (VSHFvvil $vp, $vp, 0, $avl)>; // always pick hi
+}
+
+defm : Packing<v512i32>;
+defm : Packing<v512f32>;
+
+def : Pat<(v512i32 (vec_pack v256i32:$vlo, v256i32:$vhi, i32:$avl)),
+ (VSHFvvil $vlo, $vhi, 13, $avl)>;
+def : Pat<(v512f32 (vec_pack v256f32:$vlo, v256f32:$vhi, i32:$avl)),
+ (VSHFvvil $vlo, $vhi, 8, $avl)>;
diff --git a/llvm/lib/Target/VE/VEMachineFunctionInfo.cpp b/llvm/lib/Target/VE/VEMachineFunctionInfo.cpp
index 1addfc7174eb..2ada2581291d 100644
--- a/llvm/lib/Target/VE/VEMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/VE/VEMachineFunctionInfo.cpp
@@ -11,3 +11,10 @@
using namespace llvm;
void VEMachineFunctionInfo::anchor() {}
+
+MachineFunctionInfo *VEMachineFunctionInfo::clone(
+ BumpPtrAllocator &Allocator, MachineFunction &DestMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
+ const {
+ return DestMF.cloneInfo<VEMachineFunctionInfo>(*this);
+}
diff --git a/llvm/lib/Target/VE/VEMachineFunctionInfo.h b/llvm/lib/Target/VE/VEMachineFunctionInfo.h
index 3160f6a552d7..d9d30ad5b8c5 100644
--- a/llvm/lib/Target/VE/VEMachineFunctionInfo.h
+++ b/llvm/lib/Target/VE/VEMachineFunctionInfo.h
@@ -33,6 +33,11 @@ public:
explicit VEMachineFunctionInfo(MachineFunction &MF)
: VarArgsFrameOffset(0), IsLeafProc(false) {}
+ MachineFunctionInfo *
+ clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
+ const override;
+
Register getGlobalBaseReg() const { return GlobalBaseReg; }
void setGlobalBaseReg(Register Reg) { GlobalBaseReg = Reg; }
diff --git a/llvm/lib/Target/VE/VERegisterInfo.td b/llvm/lib/Target/VE/VERegisterInfo.td
index 70ff104b65b7..cca0ad26b3e9 100644
--- a/llvm/lib/Target/VE/VERegisterInfo.td
+++ b/llvm/lib/Target/VE/VERegisterInfo.td
@@ -152,8 +152,10 @@ foreach I = 0-15 in
def VM#I : VEMaskReg<I, "vm"#I, [], ["vm"#I]>, DwarfRegNum<[!add(128,I)]>;
// Aliases of VMs to use as a pair of two VM for packed instructions
+def VMP0 : VEMaskReg<0, "vm0", [], ["vm0"]>;
+
let SubRegIndices = [sub_vm_even, sub_vm_odd], CoveredBySubRegs = 1 in
-foreach I = 0-7 in
+foreach I = 1-7 in
def VMP#I : VEMaskReg<!shl(I,1), "vmp"#I,
[!cast<VEMaskReg>("VM"#!shl(I,1)),
!cast<VEMaskReg>("VM"#!add(!shl(I,1),1))],
diff --git a/llvm/lib/Target/VE/VETargetMachine.cpp b/llvm/lib/Target/VE/VETargetMachine.cpp
index 9f294f15da91..d7c1457fb0a8 100644
--- a/llvm/lib/Target/VE/VETargetMachine.cpp
+++ b/llvm/lib/Target/VE/VETargetMachine.cpp
@@ -61,7 +61,7 @@ static std::string computeDataLayout(const Triple &T) {
}
static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
- return RM.getValueOr(Reloc::Static);
+ return RM.value_or(Reloc::Static);
}
class VEELFTargetObjectFile : public TargetLoweringObjectFileELF {
@@ -90,9 +90,10 @@ VETargetMachine::VETargetMachine(const Target &T, const Triple &TT,
initAsmInfo();
}
-VETargetMachine::~VETargetMachine() {}
+VETargetMachine::~VETargetMachine() = default;
-TargetTransformInfo VETargetMachine::getTargetTransformInfo(const Function &F) {
+TargetTransformInfo
+VETargetMachine::getTargetTransformInfo(const Function &F) const {
return TargetTransformInfo(VETTIImpl(this, F));
}
diff --git a/llvm/lib/Target/VE/VETargetMachine.h b/llvm/lib/Target/VE/VETargetMachine.h
index 041d3b197ec3..9cf194444aa5 100644
--- a/llvm/lib/Target/VE/VETargetMachine.h
+++ b/llvm/lib/Target/VE/VETargetMachine.h
@@ -49,7 +49,7 @@ public:
bool isMachineVerifierClean() const override { return false; }
- TargetTransformInfo getTargetTransformInfo(const Function &F) override;
+ TargetTransformInfo getTargetTransformInfo(const Function &F) const override;
unsigned getSjLjDataSize() const override { return 64; }
};
diff --git a/llvm/lib/Target/VE/VETargetTransformInfo.h b/llvm/lib/Target/VE/VETargetTransformInfo.h
index 0242fa1b0117..c68844708878 100644
--- a/llvm/lib/Target/VE/VETargetTransformInfo.h
+++ b/llvm/lib/Target/VE/VETargetTransformInfo.h
@@ -21,6 +21,32 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
+static llvm::Type *getVectorElementType(llvm::Type *Ty) {
+ return llvm::cast<llvm::FixedVectorType>(Ty)->getElementType();
+}
+
+static llvm::Type *getLaneType(llvm::Type *Ty) {
+ using namespace llvm;
+ if (!isa<VectorType>(Ty))
+ return Ty;
+ return getVectorElementType(Ty);
+}
+
+static bool isVectorLaneType(llvm::Type &ElemTy) {
+ // check element sizes for vregs
+ if (ElemTy.isIntegerTy()) {
+ unsigned ScaBits = ElemTy.getScalarSizeInBits();
+ return ScaBits == 1 || ScaBits == 32 || ScaBits == 64;
+ }
+ if (ElemTy.isPointerTy()) {
+ return true;
+ }
+ if (ElemTy.isFloatTy() || ElemTy.isDoubleTy()) {
+ return true;
+ }
+ return false;
+}
+
namespace llvm {
class VETTIImpl : public BasicTTIImplBase<VETTIImpl> {
@@ -35,6 +61,25 @@ class VETTIImpl : public BasicTTIImplBase<VETTIImpl> {
bool enableVPU() const { return getST()->enableVPU(); }
+ static bool isSupportedReduction(Intrinsic::ID ReductionID) {
+#define VEC_VP_CASE(SUFFIX) \
+ case Intrinsic::vp_reduce_##SUFFIX: \
+ case Intrinsic::vector_reduce_##SUFFIX:
+
+ switch (ReductionID) {
+ VEC_VP_CASE(add)
+ VEC_VP_CASE(and)
+ VEC_VP_CASE(or)
+ VEC_VP_CASE(xor)
+ VEC_VP_CASE(smax)
+ return true;
+
+ default:
+ return false;
+ }
+#undef VEC_VP_CASE
+ }
+
public:
explicit VETTIImpl(const VETargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
@@ -86,6 +131,27 @@ public:
// output
return false;
}
+
+ // Load & Store {
+ bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) {
+ return isVectorLaneType(*getLaneType(DataType));
+ }
+ bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) {
+ return isVectorLaneType(*getLaneType(DataType));
+ }
+ bool isLegalMaskedGather(Type *DataType, MaybeAlign Alignment) {
+ return isVectorLaneType(*getLaneType(DataType));
+ };
+ bool isLegalMaskedScatter(Type *DataType, MaybeAlign Alignment) {
+ return isVectorLaneType(*getLaneType(DataType));
+ }
+ // } Load & Store
+
+ bool shouldExpandReduction(const IntrinsicInst *II) const {
+ if (!enableVPU())
+ return true;
+ return !isSupportedReduction(II->getIntrinsicID());
+ }
};
} // namespace llvm
diff --git a/llvm/lib/Target/VE/VVPISelLowering.cpp b/llvm/lib/Target/VE/VVPISelLowering.cpp
new file mode 100644
index 000000000000..330eef4c7c2b
--- /dev/null
+++ b/llvm/lib/Target/VE/VVPISelLowering.cpp
@@ -0,0 +1,443 @@
+//===-- VVPISelLowering.cpp - VE DAG Lowering Implementation --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the lowering and legalization of vector instructions to
+// VVP_*layer SDNodes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "VECustomDAG.h"
+#include "VEISelLowering.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ve-lower"
+
+SDValue VETargetLowering::splitMaskArithmetic(SDValue Op,
+ SelectionDAG &DAG) const {
+ VECustomDAG CDAG(DAG, Op);
+ SDValue AVL =
+ CDAG.getConstant(Op.getValueType().getVectorNumElements(), MVT::i32);
+ SDValue A = Op->getOperand(0);
+ SDValue B = Op->getOperand(1);
+ SDValue LoA = CDAG.getUnpack(MVT::v256i1, A, PackElem::Lo, AVL);
+ SDValue HiA = CDAG.getUnpack(MVT::v256i1, A, PackElem::Hi, AVL);
+ SDValue LoB = CDAG.getUnpack(MVT::v256i1, B, PackElem::Lo, AVL);
+ SDValue HiB = CDAG.getUnpack(MVT::v256i1, B, PackElem::Hi, AVL);
+ unsigned Opc = Op.getOpcode();
+ auto LoRes = CDAG.getNode(Opc, MVT::v256i1, {LoA, LoB});
+ auto HiRes = CDAG.getNode(Opc, MVT::v256i1, {HiA, HiB});
+ return CDAG.getPack(MVT::v512i1, LoRes, HiRes, AVL);
+}
+
+SDValue VETargetLowering::lowerToVVP(SDValue Op, SelectionDAG &DAG) const {
+ // Can we represent this as a VVP node.
+ const unsigned Opcode = Op->getOpcode();
+ auto VVPOpcodeOpt = getVVPOpcode(Opcode);
+ if (!VVPOpcodeOpt)
+ return SDValue();
+ unsigned VVPOpcode = VVPOpcodeOpt.getValue();
+ const bool FromVP = ISD::isVPOpcode(Opcode);
+
+ // The representative and legalized vector type of this operation.
+ VECustomDAG CDAG(DAG, Op);
+ // Dispatch to complex lowering functions.
+ switch (VVPOpcode) {
+ case VEISD::VVP_LOAD:
+ case VEISD::VVP_STORE:
+ return lowerVVP_LOAD_STORE(Op, CDAG);
+ case VEISD::VVP_GATHER:
+ case VEISD::VVP_SCATTER:
+ return lowerVVP_GATHER_SCATTER(Op, CDAG);
+ }
+
+ EVT OpVecVT = *getIdiomaticVectorType(Op.getNode());
+ EVT LegalVecVT = getTypeToTransformTo(*DAG.getContext(), OpVecVT);
+ auto Packing = getTypePacking(LegalVecVT.getSimpleVT());
+
+ SDValue AVL;
+ SDValue Mask;
+
+ if (FromVP) {
+ // All upstream VP SDNodes always have a mask and avl.
+ auto MaskIdx = ISD::getVPMaskIdx(Opcode);
+ auto AVLIdx = ISD::getVPExplicitVectorLengthIdx(Opcode);
+ if (MaskIdx)
+ Mask = Op->getOperand(*MaskIdx);
+ if (AVLIdx)
+ AVL = Op->getOperand(*AVLIdx);
+ }
+
+ // Materialize default mask and avl.
+ if (!AVL)
+ AVL = CDAG.getConstant(OpVecVT.getVectorNumElements(), MVT::i32);
+ if (!Mask)
+ Mask = CDAG.getConstantMask(Packing, true);
+
+ assert(LegalVecVT.isSimple());
+ if (isVVPUnaryOp(VVPOpcode))
+ return CDAG.getNode(VVPOpcode, LegalVecVT, {Op->getOperand(0), Mask, AVL});
+ if (isVVPBinaryOp(VVPOpcode))
+ return CDAG.getNode(VVPOpcode, LegalVecVT,
+ {Op->getOperand(0), Op->getOperand(1), Mask, AVL});
+ if (isVVPReductionOp(VVPOpcode)) {
+ auto SrcHasStart = hasReductionStartParam(Op->getOpcode());
+ SDValue StartV = SrcHasStart ? Op->getOperand(0) : SDValue();
+ SDValue VectorV = Op->getOperand(SrcHasStart ? 1 : 0);
+ return CDAG.getLegalReductionOpVVP(VVPOpcode, Op.getValueType(), StartV,
+ VectorV, Mask, AVL, Op->getFlags());
+ }
+
+ switch (VVPOpcode) {
+ default:
+ llvm_unreachable("lowerToVVP called for unexpected SDNode.");
+ case VEISD::VVP_FFMA: {
+ // VE has a swizzled operand order in FMA (compared to LLVM IR and
+ // SDNodes).
+ auto X = Op->getOperand(2);
+ auto Y = Op->getOperand(0);
+ auto Z = Op->getOperand(1);
+ return CDAG.getNode(VVPOpcode, LegalVecVT, {X, Y, Z, Mask, AVL});
+ }
+ case VEISD::VVP_SELECT: {
+ auto Mask = Op->getOperand(0);
+ auto OnTrue = Op->getOperand(1);
+ auto OnFalse = Op->getOperand(2);
+ return CDAG.getNode(VVPOpcode, LegalVecVT, {OnTrue, OnFalse, Mask, AVL});
+ }
+ case VEISD::VVP_SETCC: {
+ EVT LegalResVT = getTypeToTransformTo(*DAG.getContext(), Op.getValueType());
+ auto LHS = Op->getOperand(0);
+ auto RHS = Op->getOperand(1);
+ auto Pred = Op->getOperand(2);
+ return CDAG.getNode(VVPOpcode, LegalResVT, {LHS, RHS, Pred, Mask, AVL});
+ }
+ }
+}
+
+SDValue VETargetLowering::lowerVVP_LOAD_STORE(SDValue Op,
+ VECustomDAG &CDAG) const {
+ auto VVPOpc = *getVVPOpcode(Op->getOpcode());
+ const bool IsLoad = (VVPOpc == VEISD::VVP_LOAD);
+
+ // Shares.
+ SDValue BasePtr = getMemoryPtr(Op);
+ SDValue Mask = getNodeMask(Op);
+ SDValue Chain = getNodeChain(Op);
+ SDValue AVL = getNodeAVL(Op);
+ // Store specific.
+ SDValue Data = getStoredValue(Op);
+ // Load specific.
+ SDValue PassThru = getNodePassthru(Op);
+
+ SDValue StrideV = getLoadStoreStride(Op, CDAG);
+
+ auto DataVT = *getIdiomaticVectorType(Op.getNode());
+ auto Packing = getTypePacking(DataVT);
+
+ // TODO: Infer lower AVL from mask.
+ if (!AVL)
+ AVL = CDAG.getConstant(DataVT.getVectorNumElements(), MVT::i32);
+
+ // Default to the all-true mask.
+ if (!Mask)
+ Mask = CDAG.getConstantMask(Packing, true);
+
+ if (IsLoad) {
+ MVT LegalDataVT = getLegalVectorType(
+ Packing, DataVT.getVectorElementType().getSimpleVT());
+
+ auto NewLoadV = CDAG.getNode(VEISD::VVP_LOAD, {LegalDataVT, MVT::Other},
+ {Chain, BasePtr, StrideV, Mask, AVL});
+
+ if (!PassThru || PassThru->isUndef())
+ return NewLoadV;
+
+ // Convert passthru to an explicit select node.
+ SDValue DataV = CDAG.getNode(VEISD::VVP_SELECT, DataVT,
+ {NewLoadV, PassThru, Mask, AVL});
+ SDValue NewLoadChainV = SDValue(NewLoadV.getNode(), 1);
+
+ // Merge them back into one node.
+ return CDAG.getMergeValues({DataV, NewLoadChainV});
+ }
+
+ // VVP_STORE
+ assert(VVPOpc == VEISD::VVP_STORE);
+ return CDAG.getNode(VEISD::VVP_STORE, Op.getNode()->getVTList(),
+ {Chain, Data, BasePtr, StrideV, Mask, AVL});
+}
+
+SDValue VETargetLowering::splitPackedLoadStore(SDValue Op,
+ VECustomDAG &CDAG) const {
+ auto VVPOC = *getVVPOpcode(Op.getOpcode());
+ assert((VVPOC == VEISD::VVP_LOAD) || (VVPOC == VEISD::VVP_STORE));
+
+ MVT DataVT = getIdiomaticVectorType(Op.getNode())->getSimpleVT();
+ assert(getTypePacking(DataVT) == Packing::Dense &&
+ "Can only split packed load/store");
+ MVT SplitDataVT = splitVectorType(DataVT);
+
+ assert(!getNodePassthru(Op) &&
+ "Should have been folded in lowering to VVP layer");
+
+ // Analyze the operation
+ SDValue PackedMask = getNodeMask(Op);
+ SDValue PackedAVL = getAnnotatedNodeAVL(Op).first;
+ SDValue PackPtr = getMemoryPtr(Op);
+ SDValue PackData = getStoredValue(Op);
+ SDValue PackStride = getLoadStoreStride(Op, CDAG);
+
+ unsigned ChainResIdx = PackData ? 0 : 1;
+
+ SDValue PartOps[2];
+
+ SDValue UpperPartAVL; // we will use this for packing things back together
+ for (PackElem Part : {PackElem::Hi, PackElem::Lo}) {
+ // VP ops already have an explicit mask and AVL. When expanding from non-VP
+ // attach those additional inputs here.
+ auto SplitTM = CDAG.getTargetSplitMask(PackedMask, PackedAVL, Part);
+
+ // Keep track of the (higher) lvl.
+ if (Part == PackElem::Hi)
+ UpperPartAVL = SplitTM.AVL;
+
+ // Attach non-predicating value operands
+ SmallVector<SDValue, 4> OpVec;
+
+ // Chain
+ OpVec.push_back(getNodeChain(Op));
+
+ // Data
+ if (PackData) {
+ SDValue PartData =
+ CDAG.getUnpack(SplitDataVT, PackData, Part, SplitTM.AVL);
+ OpVec.push_back(PartData);
+ }
+
+ // Ptr & Stride
+ // Push (ptr + ElemBytes * <Part>, 2 * ElemBytes)
+ // Stride info
+ // EVT DataVT = LegalizeVectorType(getMemoryDataVT(Op), Op, DAG, Mode);
+ OpVec.push_back(CDAG.getSplitPtrOffset(PackPtr, PackStride, Part));
+ OpVec.push_back(CDAG.getSplitPtrStride(PackStride));
+
+ // Add predicating args and generate part node
+ OpVec.push_back(SplitTM.Mask);
+ OpVec.push_back(SplitTM.AVL);
+
+ if (PackData) {
+ // Store
+ PartOps[(int)Part] = CDAG.getNode(VVPOC, MVT::Other, OpVec);
+ } else {
+ // Load
+ PartOps[(int)Part] =
+ CDAG.getNode(VVPOC, {SplitDataVT, MVT::Other}, OpVec);
+ }
+ }
+
+ // Merge the chains
+ SDValue LowChain = SDValue(PartOps[(int)PackElem::Lo].getNode(), ChainResIdx);
+ SDValue HiChain = SDValue(PartOps[(int)PackElem::Hi].getNode(), ChainResIdx);
+ SDValue FusedChains =
+ CDAG.getNode(ISD::TokenFactor, MVT::Other, {LowChain, HiChain});
+
+ // Chain only [store]
+ if (PackData)
+ return FusedChains;
+
+ // Re-pack into full packed vector result
+ MVT PackedVT =
+ getLegalVectorType(Packing::Dense, DataVT.getVectorElementType());
+ SDValue PackedVals = CDAG.getPack(PackedVT, PartOps[(int)PackElem::Lo],
+ PartOps[(int)PackElem::Hi], UpperPartAVL);
+
+ return CDAG.getMergeValues({PackedVals, FusedChains});
+}
+
+SDValue VETargetLowering::lowerVVP_GATHER_SCATTER(SDValue Op,
+ VECustomDAG &CDAG) const {
+ EVT DataVT = *getIdiomaticVectorType(Op.getNode());
+ auto Packing = getTypePacking(DataVT);
+ MVT LegalDataVT =
+ getLegalVectorType(Packing, DataVT.getVectorElementType().getSimpleVT());
+
+ SDValue AVL = getAnnotatedNodeAVL(Op).first;
+ SDValue Index = getGatherScatterIndex(Op);
+ SDValue BasePtr = getMemoryPtr(Op);
+ SDValue Mask = getNodeMask(Op);
+ SDValue Chain = getNodeChain(Op);
+ SDValue Scale = getGatherScatterScale(Op);
+ SDValue PassThru = getNodePassthru(Op);
+ SDValue StoredValue = getStoredValue(Op);
+ if (PassThru && PassThru->isUndef())
+ PassThru = SDValue();
+
+ bool IsScatter = (bool)StoredValue;
+
+ // TODO: Infer lower AVL from mask.
+ if (!AVL)
+ AVL = CDAG.getConstant(DataVT.getVectorNumElements(), MVT::i32);
+
+ // Default to the all-true mask.
+ if (!Mask)
+ Mask = CDAG.getConstantMask(Packing, true);
+
+ SDValue AddressVec =
+ CDAG.getGatherScatterAddress(BasePtr, Scale, Index, Mask, AVL);
+ if (IsScatter)
+ return CDAG.getNode(VEISD::VVP_SCATTER, MVT::Other,
+ {Chain, StoredValue, AddressVec, Mask, AVL});
+
+ // Gather.
+ SDValue NewLoadV = CDAG.getNode(VEISD::VVP_GATHER, {LegalDataVT, MVT::Other},
+ {Chain, AddressVec, Mask, AVL});
+
+ if (!PassThru)
+ return NewLoadV;
+
+ // TODO: Use vvp_select
+ SDValue DataV = CDAG.getNode(VEISD::VVP_SELECT, LegalDataVT,
+ {NewLoadV, PassThru, Mask, AVL});
+ SDValue NewLoadChainV = SDValue(NewLoadV.getNode(), 1);
+ return CDAG.getMergeValues({DataV, NewLoadChainV});
+}
+
+SDValue VETargetLowering::legalizeInternalLoadStoreOp(SDValue Op,
+ VECustomDAG &CDAG) const {
+ LLVM_DEBUG(dbgs() << "::legalizeInternalLoadStoreOp\n";);
+ MVT DataVT = getIdiomaticVectorType(Op.getNode())->getSimpleVT();
+
+ // TODO: Recognize packable load,store.
+ if (isPackedVectorType(DataVT))
+ return splitPackedLoadStore(Op, CDAG);
+
+ return legalizePackedAVL(Op, CDAG);
+}
+
+SDValue VETargetLowering::legalizeInternalVectorOp(SDValue Op,
+ SelectionDAG &DAG) const {
+ LLVM_DEBUG(dbgs() << "::legalizeInternalVectorOp\n";);
+ VECustomDAG CDAG(DAG, Op);
+
+ // Dispatch to specialized legalization functions.
+ switch (Op->getOpcode()) {
+ case VEISD::VVP_LOAD:
+ case VEISD::VVP_STORE:
+ return legalizeInternalLoadStoreOp(Op, CDAG);
+ }
+
+ EVT IdiomVT = Op.getValueType();
+ if (isPackedVectorType(IdiomVT) &&
+ !supportsPackedMode(Op.getOpcode(), IdiomVT))
+ return splitVectorOp(Op, CDAG);
+
+ // TODO: Implement odd/even splitting.
+ return legalizePackedAVL(Op, CDAG);
+}
+
+SDValue VETargetLowering::splitVectorOp(SDValue Op, VECustomDAG &CDAG) const {
+ MVT ResVT = splitVectorType(Op.getValue(0).getSimpleValueType());
+
+ auto AVLPos = getAVLPos(Op->getOpcode());
+ auto MaskPos = getMaskPos(Op->getOpcode());
+
+ SDValue PackedMask = getNodeMask(Op);
+ auto AVLPair = getAnnotatedNodeAVL(Op);
+ SDValue PackedAVL = AVLPair.first;
+ assert(!AVLPair.second && "Expecting non pack-legalized oepration");
+
+ // request the parts
+ SDValue PartOps[2];
+
+ SDValue UpperPartAVL; // we will use this for packing things back together
+ for (PackElem Part : {PackElem::Hi, PackElem::Lo}) {
+ // VP ops already have an explicit mask and AVL. When expanding from non-VP
+ // attach those additional inputs here.
+ auto SplitTM = CDAG.getTargetSplitMask(PackedMask, PackedAVL, Part);
+
+ if (Part == PackElem::Hi)
+ UpperPartAVL = SplitTM.AVL;
+
+ // Attach non-predicating value operands
+ SmallVector<SDValue, 4> OpVec;
+ for (unsigned i = 0; i < Op.getNumOperands(); ++i) {
+ if (AVLPos && ((int)i) == *AVLPos)
+ continue;
+ if (MaskPos && ((int)i) == *MaskPos)
+ continue;
+
+ // Value operand
+ auto PackedOperand = Op.getOperand(i);
+ auto UnpackedOpVT = splitVectorType(PackedOperand.getSimpleValueType());
+ SDValue PartV =
+ CDAG.getUnpack(UnpackedOpVT, PackedOperand, Part, SplitTM.AVL);
+ OpVec.push_back(PartV);
+ }
+
+ // Add predicating args and generate part node.
+ OpVec.push_back(SplitTM.Mask);
+ OpVec.push_back(SplitTM.AVL);
+ // Emit legal VVP nodes.
+ PartOps[(int)Part] =
+ CDAG.getNode(Op.getOpcode(), ResVT, OpVec, Op->getFlags());
+ }
+
+ // Re-package vectors.
+ return CDAG.getPack(Op.getValueType(), PartOps[(int)PackElem::Lo],
+ PartOps[(int)PackElem::Hi], UpperPartAVL);
+}
+
+SDValue VETargetLowering::legalizePackedAVL(SDValue Op,
+ VECustomDAG &CDAG) const {
+ LLVM_DEBUG(dbgs() << "::legalizePackedAVL\n";);
+ // Only required for VEC and VVP ops.
+ if (!isVVPOrVEC(Op->getOpcode()))
+ return Op;
+
+ // Operation already has a legal AVL.
+ auto AVL = getNodeAVL(Op);
+ if (isLegalAVL(AVL))
+ return Op;
+
+ // Half and round up EVL for 32bit element types.
+ SDValue LegalAVL = AVL;
+ MVT IdiomVT = getIdiomaticVectorType(Op.getNode())->getSimpleVT();
+ if (isPackedVectorType(IdiomVT)) {
+ assert(maySafelyIgnoreMask(Op) &&
+ "TODO Shift predication from EVL into Mask");
+
+ if (auto *ConstAVL = dyn_cast<ConstantSDNode>(AVL)) {
+ LegalAVL = CDAG.getConstant((ConstAVL->getZExtValue() + 1) / 2, MVT::i32);
+ } else {
+ auto ConstOne = CDAG.getConstant(1, MVT::i32);
+ auto PlusOne = CDAG.getNode(ISD::ADD, MVT::i32, {AVL, ConstOne});
+ LegalAVL = CDAG.getNode(ISD::SRL, MVT::i32, {PlusOne, ConstOne});
+ }
+ }
+
+ SDValue AnnotatedLegalAVL = CDAG.annotateLegalAVL(LegalAVL);
+
+ // Copy the operand list.
+ int NumOp = Op->getNumOperands();
+ auto AVLPos = getAVLPos(Op->getOpcode());
+ std::vector<SDValue> FixedOperands;
+ for (int i = 0; i < NumOp; ++i) {
+ if (AVLPos && (i == *AVLPos)) {
+ FixedOperands.push_back(AnnotatedLegalAVL);
+ continue;
+ }
+ FixedOperands.push_back(Op->getOperand(i));
+ }
+
+ // Clone the operation with fixed operands.
+ auto Flags = Op->getFlags();
+ SDValue NewN =
+ CDAG.getNode(Op->getOpcode(), Op->getVTList(), FixedOperands, Flags);
+ return NewN;
+}
diff --git a/llvm/lib/Target/VE/VVPInstrInfo.td b/llvm/lib/Target/VE/VVPInstrInfo.td
index ef9c238066c0..a4e4984e3d12 100644
--- a/llvm/lib/Target/VE/VVPInstrInfo.td
+++ b/llvm/lib/Target/VE/VVPInstrInfo.td
@@ -18,7 +18,40 @@
// TODO explain how VVP nodes relate to VP SDNodes once VP ISel is uptream.
//===----------------------------------------------------------------------===//
-// Binary Operators {
+// vvp_load(ptr, stride, mask, avl)
+def SDTLoadVVP : SDTypeProfile<1, 4, [
+ SDTCisVec<0>,
+ SDTCisPtrTy<1>,
+ SDTCisInt<2>,
+ SDTCisVec<3>,
+ IsVLVT<4>
+]>;
+
+// vvp_store(data, ptr, stride, mask, avl)
+def SDTStoreVVP: SDTypeProfile<0, 5, [
+ SDTCisVec<0>,
+ SDTCisPtrTy<1>,
+ SDTCisInt<2>,
+ SDTCisVec<3>,
+ IsVLVT<4>
+]>;
+
+// vvp_scatter(chain, data, addr, mask, avl)
+def SDTScatterVVP: SDTypeProfile<0, 4, [
+ SDTCisVec<0>,
+ SDTCisVec<1>,
+ SDTCisVec<2>,
+ SDTCisSameNumEltsAs<0, 2>,
+ IsVLVT<3>
+]>;
+
+// vvp_gather(chain, addr, mask, avl)
+def SDTGatherVVP: SDTypeProfile<1, 3, [
+ SDTCisVec<0>,
+ SDTCisVec<1>,
+ SDTCisSameNumEltsAs<0, 2>,
+ IsVLVT<3>
+]>;
// BinaryOp(x,y,mask,vl)
def SDTIntBinOpVVP : SDTypeProfile<1, 4, [ // vp_add, vp_and, etc.
@@ -29,6 +62,15 @@ def SDTIntBinOpVVP : SDTypeProfile<1, 4, [ // vp_add, vp_and, etc.
IsVLVT<4>
]>;
+// UnaryFPOp(x,mask,vl)
+def SDTFPUnaryOpVVP : SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 1>,
+ SDTCisFP<0>,
+ SDTCisInt<2>,
+ SDTCisSameNumEltsAs<0, 2>,
+ IsVLVT<3>
+]>;
+
// BinaryFPOp(x,y,mask,vl)
def SDTFPBinOpVVP : SDTypeProfile<1, 4, [ // vvp_fadd, etc.
SDTCisSameAs<0, 1>,
@@ -39,6 +81,17 @@ def SDTFPBinOpVVP : SDTypeProfile<1, 4, [ // vvp_fadd, etc.
IsVLVT<4>
]>;
+// TernaryFPOp(x,y,z,mask,vl)
+def SDTFPTernaryOpVVP : SDTypeProfile<1, 5, [
+ SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisFP<0>,
+ SDTCisInt<4>,
+ SDTCisSameNumEltsAs<0, 4>,
+ IsVLVT<5>
+]>;
+
// Select(OnTrue, OnFalse, SelMask, vl)
def SDTSelectVVP : SDTypeProfile<1, 4, [ // vp_select, vp_merge
SDTCisVec<0>,
@@ -48,6 +101,28 @@ def SDTSelectVVP : SDTypeProfile<1, 4, [ // vp_select, vp_merge
IsVLVT<4>
]>;
+// SetCC (lhs, rhs, cc, mask, vl)
+def SDTSetCCVVP : SDTypeProfile<1, 5, [ // vp_setcc
+ SDTCisVec<0>,
+ SDTCisVec<1>,
+ SDTCisSameNumEltsAs<0, 1>,
+ SDTCisSameAs<1, 2>,
+ SDTCisVT<3, OtherVT>,
+ SDTCisInt<4>,
+ SDTCisSameNumEltsAs<0, 4>,
+ IsVLVT<5>
+]>;
+
+// vvp_reduce(vector, mask, vl)
+def SDTReduceVVP : SDTypeProfile<1, 3, [
+ SDTCisVec<1>,
+ SDTCisInt<2>,
+ SDTCisVec<2>,
+ SDTCisSameNumEltsAs<1,2>,
+ IsVLVT<3>
+]>;
+
+
// Binary operator commutative pattern.
class vvp_commutative<SDNode RootOp> :
PatFrags<
@@ -55,6 +130,12 @@ class vvp_commutative<SDNode RootOp> :
[(RootOp node:$lhs, node:$rhs, node:$mask, node:$vlen),
(RootOp node:$rhs, node:$lhs, node:$mask, node:$vlen)]>;
+class vvp_fma_commutative<SDNode RootOp> :
+ PatFrags<
+ (ops node:$X, node:$Y, node:$Z, node:$mask, node:$vlen),
+ [(RootOp node:$X, node:$Y, node:$Z, node:$mask, node:$vlen),
+ (RootOp node:$X, node:$Z, node:$Y, node:$mask, node:$vlen)]>;
+
// VVP node definitions.
def vvp_add : SDNode<"VEISD::VVP_ADD", SDTIntBinOpVVP>;
def c_vvp_add : vvp_commutative<vvp_add>;
@@ -80,6 +161,8 @@ def vvp_srl : SDNode<"VEISD::VVP_SRL", SDTIntBinOpVVP>;
def vvp_sra : SDNode<"VEISD::VVP_SRA", SDTIntBinOpVVP>;
def vvp_shl : SDNode<"VEISD::VVP_SHL", SDTIntBinOpVVP>;
+def vvp_fneg : SDNode<"VEISD::VVP_FNEG", SDTFPUnaryOpVVP>;
+
def vvp_fadd : SDNode<"VEISD::VVP_FADD", SDTFPBinOpVVP>;
def c_vvp_fadd : vvp_commutative<vvp_fadd>;
def vvp_fsub : SDNode<"VEISD::VVP_FSUB", SDTFPBinOpVVP>;
@@ -87,6 +170,30 @@ def vvp_fmul : SDNode<"VEISD::VVP_FMUL", SDTFPBinOpVVP>;
def c_vvp_fmul : vvp_commutative<vvp_fmul>;
def vvp_fdiv : SDNode<"VEISD::VVP_FDIV", SDTFPBinOpVVP>;
-// } Binary Operators
+def vvp_ffma : SDNode<"VEISD::VVP_FFMA", SDTFPTernaryOpVVP>;
+def c_vvp_ffma : vvp_fma_commutative<vvp_ffma>;
+
+def vvp_scatter : SDNode<"VEISD::VVP_SCATTER", SDTScatterVVP,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def vvp_gather : SDNode<"VEISD::VVP_GATHER", SDTGatherVVP,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+
+def vvp_load : SDNode<"VEISD::VVP_LOAD", SDTLoadVVP,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand ]>;
+def vvp_store : SDNode<"VEISD::VVP_STORE", SDTStoreVVP,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+// Reductions
+
+// int reductions
+def vvp_reduce_add : SDNode<"VEISD::VVP_REDUCE_ADD", SDTReduceVVP>;
+def vvp_reduce_and : SDNode<"VEISD::VVP_REDUCE_AND", SDTReduceVVP>;
+def vvp_reduce_or : SDNode<"VEISD::VVP_REDUCE_OR", SDTReduceVVP>;
+def vvp_reduce_xor : SDNode<"VEISD::VVP_REDUCE_XOR", SDTReduceVVP>;
+def vvp_reduce_smax : SDNode<"VEISD::VVP_REDUCE_SMAX", SDTReduceVVP>;
+
def vvp_select : SDNode<"VEISD::VVP_SELECT", SDTSelectVVP>;
+
+// setcc (lhs, rhs, cc, mask, vl)
+def vvp_setcc : SDNode<"VEISD::VVP_SETCC", SDTSetCCVVP>;
diff --git a/llvm/lib/Target/VE/VVPInstrPatternsVec.td b/llvm/lib/Target/VE/VVPInstrPatternsVec.td
index 74720fd1f419..33316ad054c6 100644
--- a/llvm/lib/Target/VE/VVPInstrPatternsVec.td
+++ b/llvm/lib/Target/VE/VVPInstrPatternsVec.td
@@ -17,6 +17,167 @@
//===----------------------------------------------------------------------===//
include "VVPInstrInfo.td"
+multiclass VectorStore<ValueType DataVT,
+ ValueType PtrVT, ValueType MaskVT,
+ string STWithMask, string STNoMask> {
+ // Unmasked (imm stride).
+ def : Pat<(vvp_store
+ DataVT:$val, PtrVT:$addr,
+ (i64 simm7:$stride), (MaskVT true_mask), i32:$avl),
+ (!cast<Instruction>(STNoMask#"irvl")
+ (LO7 $stride), $addr, $val, $avl)>;
+ // Unmasked.
+ def : Pat<(vvp_store
+ DataVT:$val, PtrVT:$addr,
+ i64:$stride, (MaskVT true_mask), i32:$avl),
+ (!cast<Instruction>(STNoMask#"rrvl")
+ $stride, $addr, $val, $avl)>;
+ // Masked (imm stride).
+ def : Pat<(vvp_store
+ DataVT:$val, PtrVT:$addr,
+ (i64 simm7:$stride), MaskVT:$mask, i32:$avl),
+ (!cast<Instruction>(STWithMask#"irvml")
+ (LO7 $stride), $addr, $val, $mask, $avl)>;
+ // Masked.
+ def : Pat<(vvp_store
+ DataVT:$val, PtrVT:$addr,
+ i64:$stride, MaskVT:$mask, i32:$avl),
+ (!cast<Instruction>(STWithMask#"rrvml")
+ $stride, $addr, $val, $mask, $avl)>;
+}
+
+defm : VectorStore<v256f64, i64, v256i1, "VST", "VST">;
+defm : VectorStore<v256i64, i64, v256i1, "VST", "VST">;
+defm : VectorStore<v256f32, i64, v256i1, "VSTU", "VSTU">;
+defm : VectorStore<v256i32, i64, v256i1, "VSTL", "VSTL">;
+
+multiclass VectorLoad<ValueType DataVT,
+ ValueType PtrVT, ValueType MaskVT,
+ string GTWithMask, string LDNoMask> {
+ // Unmasked (imm stride).
+ def : Pat<(DataVT (vvp_load
+ PtrVT:$addr, (i64 simm7:$stride),
+ (MaskVT true_mask), i32:$avl)),
+ (!cast<Instruction>(LDNoMask#"irl")
+ (LO7 $stride), $addr, $avl)>;
+ // Unmasked.
+ def : Pat<(DataVT (vvp_load
+ PtrVT:$addr, i64:$stride,
+ (MaskVT true_mask), i32:$avl)),
+ (!cast<Instruction>(LDNoMask#"rrl")
+ $stride, PtrVT:$addr, $avl)>;
+ // Masked (imm stride).
+ def : Pat<(DataVT (vvp_load
+ PtrVT:$addr, (i64 simm7:$stride),
+ MaskVT:$mask, i32:$avl)),
+ (!cast<Instruction>(GTWithMask#"vizml")
+ (VADDULrvml $addr,
+ (VMULULivml (LO7 $stride), (VSEQl $avl), $mask, $avl),
+ $mask, $avl),
+ 0, 0,
+ $mask,
+ $avl)>;
+ // Masked.
+ def : Pat<(DataVT (vvp_load
+ PtrVT:$addr, i64:$stride, MaskVT:$mask, i32:$avl)),
+ (!cast<Instruction>(GTWithMask#"vizml")
+ (VADDULrvml $addr,
+ (VMULULrvml $stride, (VSEQl $avl), $mask, $avl),
+ $mask, $avl),
+ 0, 0,
+ $mask,
+ $avl)>;
+}
+
+defm : VectorLoad<v256f64, i64, v256i1, "VGT", "VLD">;
+defm : VectorLoad<v256i64, i64, v256i1, "VGT", "VLD">;
+defm : VectorLoad<v256f32, i64, v256i1, "VGTU", "VLDU">;
+defm : VectorLoad<v256i32, i64, v256i1, "VGTLZX", "VLDLZX">;
+
+// Vector Gather and scatter
+multiclass VectorGather<ValueType DataVT,
+ ValueType PtrVT, ValueType MaskVT,
+ string GTPrefix> {
+ // Unmasked.
+ def : Pat<(DataVT (vvp_gather
+ PtrVT:$addr, (MaskVT true_mask), i32:$avl)),
+ (!cast<Instruction>(GTPrefix#"vizl") $addr, 0, 0, $avl)>;
+ // Masked.
+ def : Pat<(DataVT (vvp_gather PtrVT:$addr, MaskVT:$mask, i32:$avl)),
+ (!cast<Instruction>(GTPrefix#"vizml") $addr, 0, 0, $mask, $avl)>;
+}
+
+defm : VectorGather<v256f64, v256i64, v256i1, "VGT">;
+defm : VectorGather<v256i64, v256i64, v256i1, "VGT">;
+defm : VectorGather<v256f32, v256i64, v256i1, "VGTU">;
+defm : VectorGather<v256i32, v256i64, v256i1, "VGTLZX">;
+
+multiclass VectorScatter<ValueType DataVT,
+ ValueType PtrVT, ValueType MaskVT,
+ string SCPrefix> {
+ // Unmasked.
+ def : Pat<(vvp_scatter
+ DataVT:$data, PtrVT:$addr, (MaskVT true_mask), i32:$avl),
+ (!cast<Instruction>(SCPrefix#"vizvl") $addr, 0, 0, $data, $avl)>;
+ // Masked.
+ def : Pat<(vvp_scatter
+ DataVT:$data, PtrVT:$addr, MaskVT:$mask, i32:$avl),
+ (!cast<Instruction>(SCPrefix#"vizvml") $addr, 0, 0, $data, $mask, $avl)>;
+}
+
+defm : VectorScatter<v256f64, v256i64, v256i1, "VSC">;
+defm : VectorScatter<v256i64, v256i64, v256i1, "VSC">;
+defm : VectorScatter<v256f32, v256i64, v256i1, "VSCU">;
+defm : VectorScatter<v256i32, v256i64, v256i1, "VSCL">;
+
+
+/// FNEG {
+// Directly modify the sign bit to flip the sign.
+
+// Set sign bits in a pack of <2 x f32>.
+def packed_fneg_imm : OutPatFrag<(ins ),
+ (i64 (SLLri (i64 (ORim 1, (i32 32))), 31))>;
+
+
+multiclass FNeg<ValueType DataVT> {
+ // Masked with select.
+ def : Pat<(vvp_select (vvp_fneg DataVT:$vx, (v256i1 srcvalue), (i32 srcvalue)),
+ DataVT:$vfalse,
+ v256i1:$mask,
+ i32:$avl),
+ (VXORmvml_v (i32 1), $vx, $mask, $avl, $vfalse)>;
+
+ // Unmasked.
+ def : Pat<(vvp_fneg DataVT:$vx, (v256i1 true_mask), i32:$avl),
+ (VXORmvl (i32 1), $vx, $avl)>;
+
+ // Masked.
+ def : Pat<(vvp_fneg DataVT:$vx, v256i1:$mask, i32:$avl),
+ (VXORmvml (i32 1), $vx, $mask, $avl)>;
+}
+
+defm: FNeg<v256f32>;
+defm: FNeg<v256f64>;
+
+///// Packed FNeg /////
+
+// Masked with select.
+def : Pat<(vvp_select (vvp_fneg v512f32:$vx, (v512i1 srcvalue), (i32 srcvalue)),
+ v512f32:$vfalse,
+ v512i1:$mask,
+ i32:$avl),
+ (v512f32 (PVXORrvml_v (packed_fneg_imm ), $vx, $mask, $avl, $vfalse))>;
+
+// Unmasked.
+def : Pat<(vvp_fneg v512f32:$vx, (v512i1 true_mask), i32:$avl),
+ (v512f32 (PVXORrvl (packed_fneg_imm ), $vx, $avl))>;
+
+// Masked.
+def : Pat<(vvp_fneg v512f32:$vx, v512i1:$mask, i32:$avl),
+ (v512f32 (PVXORrvml (packed_fneg_imm ), $vx, $mask, $avl))>;
+
+/// } FNEG
+
multiclass Binary_rv<SDPatternOperator OpNode,
ValueType ScalarVT, ValueType DataVT,
ValueType MaskVT, string OpBaseName> {
@@ -237,6 +398,143 @@ defm : Binary_rv_vr_vv_ShortLong<vvp_fdiv,
f64, v256f64, "VFDIVD",
f32, v256f32, "VFDIVS">;
+defm : Binary_rv_vv<c_vvp_and,
+ i64, v512i32, v512i1, "PVAND">;
+defm : Binary_rv_vv<c_vvp_or,
+ i64, v512i32, v512i1, "PVOR">;
+defm : Binary_rv_vv<c_vvp_xor,
+ i64, v512i32, v512i1, "PVXOR">;
+
+defm : Binary_rv_vv<c_vvp_add,
+ i64, v512i32, v512i1, "PVADDU">;
+defm : Binary_rv_vv<vvp_sub,
+ i64, v512i32, v512i1, "PVSUBU">;
+defm : Binary_vr_vv<vvp_srl,
+ i64, v512i32, v512i1, "PVSRL">;
+defm : Binary_vr_vv<vvp_sra,
+ i64, v512i32, v512i1, "PVSRA">;
+defm : Binary_vr_vv<vvp_shl,
+ i64, v512i32, v512i1, "PVSLL">;
+
+defm : Binary_rv_vv<c_vvp_fadd,
+ i64, v512f32, v512i1, "PVFADD">;
+defm : Binary_rv_vv<c_vvp_fmul,
+ i64, v512f32, v512i1, "PVFMUL">;
+defm : Binary_rv_vv<vvp_fsub,
+ i64, v512f32, v512i1, "PVFSUB">;
+
+multiclass Ternary_vvv<
+ SDPatternOperator OpNode, ValueType DataVT,
+ ValueType MaskVT, string OpBaseName> {
+ // Masked with passthru.
+ def : Pat<(vvp_select
+ (OpNode DataVT:$vx, DataVT:$vy, DataVT:$vz,
+ (MaskVT srcvalue), (i32 srcvalue)),
+ DataVT:$vfalse,
+ MaskVT:$mask,
+ i32:$avl),
+ (!cast<Instruction>(OpBaseName#"vvvml_v")
+ $vx, $vy, $vz, $mask, $avl, $vfalse)>;
+
+ // Unmasked.
+ def : Pat<(OpNode DataVT:$vx, DataVT:$vy, DataVT:$vz,
+ (MaskVT true_mask), i32:$avl),
+ (!cast<Instruction>(OpBaseName#"vvvl")
+ $vx, $vy, $vz, $avl)>;
+
+ // Masked.
+ def : Pat<(OpNode DataVT:$vx, DataVT:$vy, DataVT:$vz,
+ MaskVT:$mask, i32:$avl),
+ (!cast<Instruction>(OpBaseName#"vvvml")
+ $vx, $vy, $vz, $mask, $avl)>;
+}
+
+multiclass Ternary_rvv<
+ SDPatternOperator OpNode,
+ ValueType ScalarVT, ValueType DataVT,
+ ValueType MaskVT, string OpBaseName> {
+ // Masked with passthru, broadcast first.
+ def : Pat<(vvp_select
+ (OpNode
+ (any_broadcast ScalarVT:$sx), DataVT:$vy, DataVT:$vz,
+ (MaskVT srcvalue), (i32 srcvalue)),
+ DataVT:$vfalse,
+ MaskVT:$mask,
+ i32:$avl),
+ (!cast<Instruction>(OpBaseName#"rvvml_v")
+ $sx, $vy, $vz, $mask, $avl, $vfalse)>;
+
+ // Unmasked, broadcast first.
+ def : Pat<(OpNode
+ (any_broadcast ScalarVT:$sx), DataVT:$vy, DataVT:$vz,
+ (MaskVT true_mask), i32:$avl),
+ (!cast<Instruction>(OpBaseName#"rvvl")
+ $sx, $vy, $vz, $avl)>;
+
+ // Masked, broadcast first.
+ def : Pat<(OpNode
+ (any_broadcast ScalarVT:$sx), DataVT:$vy, DataVT:$vz,
+ MaskVT:$mask, i32:$avl),
+ (!cast<Instruction>(OpBaseName#"rvvml")
+ $sx, $vy, $vz, $mask, $avl)>;
+}
+
+multiclass Ternary_vrv<
+ SDPatternOperator OpNode,
+ ValueType ScalarVT, ValueType DataVT,
+ ValueType MaskVT, string OpBaseName> {
+ // Masked with passthru, broadcast second.
+ def : Pat<(vvp_select
+ (OpNode
+ DataVT:$vx, (any_broadcast ScalarVT:$sy), DataVT:$vz,
+ (MaskVT srcvalue), (i32 srcvalue)),
+ DataVT:$vfalse,
+ MaskVT:$mask,
+ i32:$avl),
+ (!cast<Instruction>(OpBaseName#"vrvml_v")
+ $vx, $sy, $vz,
+ $mask, $avl, $vfalse)>;
+
+ // Unmasked, broadcast second.
+ def : Pat<(OpNode
+ DataVT:$vx, (any_broadcast ScalarVT:$sy), DataVT:$vz,
+ (MaskVT true_mask), i32:$avl),
+ (!cast<Instruction>(OpBaseName#"vrvl")
+ $vx, $sy, $vz, $avl)>;
+
+ // Masked, broadcast second.
+ def : Pat<(OpNode
+ DataVT:$vx, (any_broadcast ScalarVT:$sy), DataVT:$vz,
+ MaskVT:$mask, i32:$avl),
+ (!cast<Instruction>(OpBaseName#"vrvml")
+ $vx, $sy, $vz, $mask, $avl)>;
+}
+
+multiclass Ternary_rvv_vrv_vvv<
+ SDPatternOperator OpNode,
+ ValueType ScalarVT, ValueType DataVT,
+ ValueType MaskVT, string OpBaseName> {
+ defm : Ternary_rvv<OpNode, ScalarVT, DataVT, MaskVT, OpBaseName>;
+ defm : Ternary_vrv<OpNode, ScalarVT, DataVT, MaskVT, OpBaseName>;
+ defm : Ternary_vvv<OpNode, DataVT, MaskVT, OpBaseName>;
+}
+
+// Expand both 64bit and 32 bit variant (256 elements)
+multiclass Ternary_ShortLong<
+ SDPatternOperator OpNode,
+ ValueType LongScalarVT, ValueType LongDataVT, string LongOpBaseName,
+ ValueType ShortScalarVT, ValueType ShortDataVT, string ShortOpBaseName> {
+ defm : Ternary_rvv_vrv_vvv<OpNode, LongScalarVT, LongDataVT,
+ v256i1, LongOpBaseName>;
+ defm : Ternary_rvv_vrv_vvv<OpNode, ShortScalarVT, ShortDataVT,
+ v256i1, ShortOpBaseName>;
+}
+
+defm : Ternary_ShortLong<c_vvp_ffma,
+ f64, v256f64, "VFMADD", f32, v256f32, "VFMADS">;
+defm : Ternary_rvv_vrv_vvv<c_vvp_ffma,
+ i64, v512f32, v512i1, "PVFMAD">;
+
multiclass Merge_mvv<
SDPatternOperator OpNode,
ValueType DataVT, ValueType MaskVT,
@@ -268,3 +566,63 @@ defm : Merge_mvv_ShortLong<vvp_select,
defm : Merge_mvv_ShortLong<vvp_select,
v256i64,
v256i32, "VMRG">;
+
+multiclass Set_CC<ValueType DataVT, string FmkBaseName, string CmpBaseName, SDPatternOperator CCMatcher, SDNodeXForm CCConv> {
+ // Unmasked.
+ def : Pat<(v256i1 (vvp_setcc
+ DataVT:$LHS, DataVT:$RHS, CCMatcher:$cond, (v256i1 true_mask), i32:$vl)),
+ (!cast<Instruction>(FmkBaseName#"vl")
+ (CCConv $cond),
+ (!cast<Instruction>(CmpBaseName#"vvl")
+ $LHS, $RHS, $vl),
+ $vl)>;
+ // Masked.
+ def : Pat<(v256i1 (vvp_setcc
+ DataVT:$LHS, DataVT:$RHS, CCMatcher:$cond, v256i1:$vm, i32:$vl)),
+ (!cast<Instruction>(FmkBaseName#"vml")
+ (CCConv $cond),
+ (!cast<Instruction>(CmpBaseName#"vvl")
+ $LHS, $RHS, $vl),
+ $vm, $vl)>;
+}
+
+defm : Set_CC<v256i64,"VFMKL","VCMPUL",CCUIOp,icond2cc>;
+defm : Set_CC<v256i64,"VFMKL","VCMPSL",CCSIOp,icond2cc>;
+defm : Set_CC<v256f64,"VFMKL","VFCMPD",cond,fcond2cc>;
+
+defm : Set_CC<v256i32,"VFMKW","VCMPUW",CCUIOp,icond2cc>;
+defm : Set_CC<v256i32,"VFMKW","VCMPSWZX",CCSIOp,icond2cc>;
+defm : Set_CC<v256f32,"VFMKS","VFCMPS",cond,fcond2cc>;
+
+multiclass Reduce_GenericInt<ValueType VectorVT,
+ RegisterClass ResRC, ValueType ResVT,
+ string VVPRedOp, string RedInstName> {
+ // Unmasked.
+ def : Pat <(ResVT (!cast<SDPatternOperator>("vvp_reduce_"#VVPRedOp)
+ VectorVT:$vx, (v256i1 true_mask), i32:$vl)),
+ (COPY_TO_REGCLASS
+ (!cast<Instruction>("LVSvi")
+ (!cast<Instruction>(RedInstName#"vl") $vx, $vl), 0),
+ ResRC)>;
+
+ // Masked.
+ def : Pat <(ResVT (!cast<SDPatternOperator>("vvp_reduce_"#VVPRedOp)
+ VectorVT:$vx, v256i1:$vm, i32:$vl)),
+ (COPY_TO_REGCLASS
+ (!cast<Instruction>("LVSvi")
+ (!cast<Instruction>(RedInstName#"vml") $vx, $vm, $vl), 0),
+ ResRC)>;
+}
+
+multiclass IntReduce_ShortLong<ValueType VectorVT,
+ RegisterClass ResRC, ValueType ResVT,
+ string SumSuffix, string MinMaxSuffix> {
+ defm: Reduce_GenericInt<VectorVT, ResRC, ResVT, "or", "VROR">;
+ defm: Reduce_GenericInt<VectorVT, ResRC, ResVT, "and", "VRAND">;
+ defm: Reduce_GenericInt<VectorVT, ResRC, ResVT, "xor", "VRXOR">;
+ defm: Reduce_GenericInt<VectorVT, ResRC, ResVT, "add", "VSUM"#SumSuffix>;
+ defm: Reduce_GenericInt<VectorVT, ResRC, ResVT, "smax", "VRMAX"#MinMaxSuffix>;
+}
+
+defm: IntReduce_ShortLong<v256i64, I64, i64, "L","SLFST">;
+defm: IntReduce_ShortLong<v256i32, I32, i32, "WSX","SWFSTSX">;
diff --git a/llvm/lib/Target/VE/VVPNodes.def b/llvm/lib/Target/VE/VVPNodes.def
index 8000f84c5dbe..a60588672293 100644
--- a/llvm/lib/Target/VE/VVPNodes.def
+++ b/llvm/lib/Target/VE/VVPNodes.def
@@ -24,6 +24,14 @@
#define ADD_VVP_OP(X, Y)
#endif
+/// ADD_UNARY_VVP_OP(VVPNAME,SDNAME)
+/// \p VVPName is a VVP Unary operator.
+/// \p SDNAME is the generic SD opcode corresponding to \p VVPName.
+#ifndef ADD_UNARY_VVP_OP
+#define ADD_UNARY_VVP_OP(VVPNAME,SDNAME) \
+ ADD_VVP_OP(VVPNAME,SDNAME)
+#endif
+
/// ADD_BINARY_VVP_OP(VVPNAME,SDNAME)
/// \p VVPName is a VVP Binary operator.
/// \p SDNAME is the generic SD opcode corresponding to \p VVPName.
@@ -33,38 +41,95 @@
HANDLE_VP_TO_VVP(VPNAME, VVPNAME)
#endif
+/// ADD_TERNARY_VVP_OP(VVPNAME,SDNAME)
+/// \p VVPName is a VVP Ternary operator.
+/// \p SDNAME is the generic SD opcode corresponding to \p VVPName.
+#ifndef ADD_TERNARY_VVP_OP
+#define ADD_TERNARY_VVP_OP(VVPNAME,SDNAME) \
+ ADD_VVP_OP(VVPNAME,SDNAME)
+#endif
+
#ifndef ADD_BINARY_VVP_OP_COMPACT
#define ADD_BINARY_VVP_OP_COMPACT(NAME) \
ADD_BINARY_VVP_OP(VVP_##NAME,VP_##NAME,NAME)
#endif
+/// REGISTER_PACKED(OPC)
+/// \p OPC The VVP opcode of the operation.
+#ifndef REGISTER_PACKED
+#define REGISTER_PACKED(OPC)
+#endif
+
+/// ADD_REDUCE_VVP_OP(OPC)
+/// \p OPC The VVP opcode of the operation.
+/// \p SDNAME The standard opcode of the operation.
+#ifndef ADD_REDUCE_VVP_OP
+#define ADD_REDUCE_VVP_OP(OPC, SDNAME) ADD_VVP_OP(OPC, SDNAME)
+#endif
+
+// Scalar standard ISD to perform this reduction.
+#ifndef HANDLE_VVP_REDUCE_TO_SCALAR
+#define HANDLE_VVP_REDUCE_TO_SCALAR(VVP_RED_ISD, REDUCE_ISD)
+#endif
+
+/// Reductions.
+#define HELPER_REDUCTION(OPC, SCALAR_OPC) \
+ ADD_REDUCE_VVP_OP(VVP_REDUCE_##OPC,VECREDUCE_##OPC) \
+ HANDLE_VP_TO_VVP(VP_REDUCE_##OPC, VVP_REDUCE_##OPC) \
+ HANDLE_VVP_REDUCE_TO_SCALAR(VVP_REDUCE_##OPC, SCALAR_OPC)
+
+HELPER_REDUCTION(ADD, ADD)
+HELPER_REDUCTION(AND, AND)
+HELPER_REDUCTION(OR, OR)
+HELPER_REDUCTION(XOR, XOR)
+HELPER_REDUCTION(SMAX, SMAX)
+
+#undef HELPER_REDUCTION
+
+ADD_VVP_OP(VVP_LOAD,LOAD) HANDLE_VP_TO_VVP(VP_LOAD, VVP_LOAD) REGISTER_PACKED(VVP_LOAD)
+ADD_VVP_OP(VVP_STORE,STORE) HANDLE_VP_TO_VVP(VP_STORE, VVP_STORE) REGISTER_PACKED(VVP_STORE)
+
+ADD_VVP_OP(VVP_GATHER, MGATHER) HANDLE_VP_TO_VVP(VP_GATHER, VVP_GATHER)
+ADD_VVP_OP(VVP_SCATTER, MSCATTER) HANDLE_VP_TO_VVP(VP_SCATTER, VVP_SCATTER)
+
// Integer arithmetic.
-ADD_BINARY_VVP_OP_COMPACT(ADD)
-ADD_BINARY_VVP_OP_COMPACT(SUB)
+ADD_BINARY_VVP_OP_COMPACT(ADD) REGISTER_PACKED(VVP_ADD)
+ADD_BINARY_VVP_OP_COMPACT(SUB) REGISTER_PACKED(VVP_SUB)
ADD_BINARY_VVP_OP_COMPACT(MUL)
ADD_BINARY_VVP_OP_COMPACT(UDIV)
ADD_BINARY_VVP_OP_COMPACT(SDIV)
-ADD_BINARY_VVP_OP(VVP_SRA,VP_ASHR,SRA)
-ADD_BINARY_VVP_OP(VVP_SRL,VP_LSHR,SRL)
-ADD_BINARY_VVP_OP_COMPACT(SHL)
+ADD_BINARY_VVP_OP(VVP_SRA,VP_ASHR,SRA) REGISTER_PACKED(VVP_SRA)
+ADD_BINARY_VVP_OP(VVP_SRL,VP_LSHR,SRL) REGISTER_PACKED(VVP_SRL)
+ADD_BINARY_VVP_OP_COMPACT(SHL) REGISTER_PACKED(VVP_SHL)
-ADD_BINARY_VVP_OP_COMPACT(AND)
-ADD_BINARY_VVP_OP_COMPACT(OR)
-ADD_BINARY_VVP_OP_COMPACT(XOR)
+ADD_BINARY_VVP_OP_COMPACT(AND) REGISTER_PACKED(VVP_AND)
+ADD_BINARY_VVP_OP_COMPACT(OR) REGISTER_PACKED(VVP_OR)
+ADD_BINARY_VVP_OP_COMPACT(XOR) REGISTER_PACKED(VVP_XOR)
// FP arithmetic.
-ADD_BINARY_VVP_OP_COMPACT(FADD)
-ADD_BINARY_VVP_OP_COMPACT(FSUB)
-ADD_BINARY_VVP_OP_COMPACT(FMUL)
+ADD_UNARY_VVP_OP(VVP_FNEG, FNEG) HANDLE_VP_TO_VVP(VP_FNEG, VVP_FNEG) REGISTER_PACKED(VVP_FNEG)
+ADD_BINARY_VVP_OP_COMPACT(FADD) REGISTER_PACKED(VVP_FADD)
+ADD_BINARY_VVP_OP_COMPACT(FSUB) REGISTER_PACKED(VVP_FSUB)
+ADD_BINARY_VVP_OP_COMPACT(FMUL) REGISTER_PACKED(VVP_FMUL)
ADD_BINARY_VVP_OP_COMPACT(FDIV)
+ADD_TERNARY_VVP_OP(VVP_FFMA,FMA) HANDLE_VP_TO_VVP(VP_FMA, VVP_FFMA) REGISTER_PACKED(VVP_FFMA)
+
+ADD_VVP_OP(VVP_SETCC, SETCC)
+
// Shuffles.
-ADD_VVP_OP(VVP_SELECT,VSELECT)
+ADD_VVP_OP(VVP_SELECT,VSELECT) REGISTER_PACKED(VVP_SELECT)
HANDLE_VP_TO_VVP(VP_SELECT, VVP_SELECT)
HANDLE_VP_TO_VVP(VP_MERGE, VVP_SELECT)
+
#undef ADD_BINARY_VVP_OP
+#undef ADD_TERNARY_VVP_OP
+#undef ADD_UNARY_VVP_OP
#undef ADD_BINARY_VVP_OP_COMPACT
+#undef ADD_REDUCE_VVP_OP
#undef ADD_VVP_OP
#undef HANDLE_VP_TO_VVP
+#undef HANDLE_VVP_REDUCE_TO_SCALAR
+#undef REGISTER_PACKED
diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
index 56689d3ee06b..7bafa53af2af 100644
--- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
+++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
@@ -24,6 +24,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCSectionWasm.h"
@@ -374,7 +375,7 @@ public:
auto Type = WebAssembly::parseType(Lexer.getTok().getString());
if (!Type)
return error("unknown type: ", Lexer.getTok());
- Types.push_back(Type.getValue());
+ Types.push_back(*Type);
Parser.Lex();
if (!isNext(AsmToken::Comma))
break;
@@ -670,11 +671,12 @@ public:
} else {
// Assume this identifier is a label.
const MCExpr *Val;
+ SMLoc Start = Id.getLoc();
SMLoc End;
if (Parser.parseExpression(Val, End))
return error("Cannot parse symbol: ", Lexer.getTok());
Operands.push_back(std::make_unique<WebAssemblyOperand>(
- WebAssemblyOperand::Symbol, Id.getLoc(), Id.getEndLoc(),
+ WebAssemblyOperand::Symbol, Start, End,
WebAssemblyOperand::SymOp{Val}));
if (checkForP2AlignIfLoadStore(Operands, Name))
return true;
@@ -815,8 +817,7 @@ public:
// Now set this symbol with the correct type.
auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
- WasmSym->setGlobalType(
- wasm::WasmGlobalType{uint8_t(Type.getValue()), Mutable});
+ WasmSym->setGlobalType(wasm::WasmGlobalType{uint8_t(*Type), Mutable});
// And emit the directive again.
TOut.emitGlobalType(WasmSym);
return expect(AsmToken::EndOfStatement, "EOL");
@@ -846,7 +847,7 @@ public:
// symbol
auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_TABLE);
- wasm::WasmTableType Type = {uint8_t(ElemType.getValue()), Limits};
+ wasm::WasmTableType Type = {uint8_t(*ElemType), Limits};
WasmSym->setTableType(Type);
TOut.emitTableType(WasmSym);
return expect(AsmToken::EndOfStatement, "EOL");
@@ -1016,7 +1017,7 @@ public:
Inst.setOpcode(Opc64);
}
}
- if (!SkipTypeCheck && TC.typeCheck(IDLoc, Inst))
+ if (!SkipTypeCheck && TC.typeCheck(IDLoc, Inst, Operands))
return true;
Out.emitInstruction(Inst, getSTI());
if (CurrentState == EndFunction) {
@@ -1094,14 +1095,15 @@ public:
auto *WS =
getContext().getWasmSection(SecName, SectionKind::getText(), 0, Group,
MCContext::GenericSectionID, nullptr);
- getStreamer().SwitchSection(WS);
+ getStreamer().switchSection(WS);
// Also generate DWARF for this section if requested.
if (getContext().getGenDwarfForAssembly())
getContext().addGenDwarfSection(WS);
}
void onEndOfFunction(SMLoc ErrorLoc) {
- TC.endOfFunction(ErrorLoc);
+ if (!SkipTypeCheck)
+ TC.endOfFunction(ErrorLoc);
// Reset the type checker state.
TC.Clear();
diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp
index 128ce5c4fec0..ec72c1de0503 100644
--- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp
+++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp
@@ -86,14 +86,12 @@ bool WebAssemblyAsmTypeCheck::popType(SMLoc ErrorLoc,
Optional<wasm::ValType> EVT) {
if (Stack.empty()) {
return typeError(ErrorLoc,
- EVT.hasValue()
- ? StringRef("empty stack while popping ") +
- WebAssembly::typeToString(EVT.getValue())
- : StringRef(
- "empty stack while popping value"));
+ EVT ? StringRef("empty stack while popping ") +
+ WebAssembly::typeToString(EVT.getValue())
+ : StringRef("empty stack while popping value"));
}
auto PVT = Stack.pop_back_val();
- if (EVT.hasValue() && EVT.getValue() != PVT) {
+ if (EVT && EVT.getValue() != PVT) {
return typeError(
ErrorLoc, StringRef("popped ") + WebAssembly::typeToString(PVT) +
", expected " +
@@ -102,6 +100,19 @@ bool WebAssemblyAsmTypeCheck::popType(SMLoc ErrorLoc,
return false;
}
+bool WebAssemblyAsmTypeCheck::popRefType(SMLoc ErrorLoc) {
+ if (Stack.empty()) {
+ return typeError(ErrorLoc, StringRef("empty stack while popping reftype"));
+ }
+ auto PVT = Stack.pop_back_val();
+ if (!WebAssembly::isRefType(PVT)) {
+ return typeError(ErrorLoc, StringRef("popped ") +
+ WebAssembly::typeToString(PVT) +
+ ", expected reftype");
+ }
+ return false;
+}
+
bool WebAssemblyAsmTypeCheck::getLocal(SMLoc ErrorLoc, const MCInst &Inst,
wasm::ValType &Type) {
auto Local = static_cast<size_t>(Inst.getOperand(0).getImm());
@@ -160,7 +171,7 @@ bool WebAssemblyAsmTypeCheck::getGlobal(SMLoc ErrorLoc, const MCInst &Inst,
if (getSymRef(ErrorLoc, Inst, SymRef))
return true;
auto WasmSym = cast<MCSymbolWasm>(&SymRef->getSymbol());
- switch (WasmSym->getType().getValueOr(wasm::WASM_SYMBOL_TYPE_DATA)) {
+ switch (WasmSym->getType().value_or(wasm::WASM_SYMBOL_TYPE_DATA)) {
case wasm::WASM_SYMBOL_TYPE_GLOBAL:
Type = static_cast<wasm::ValType>(WasmSym->getGlobalType().Type);
break;
@@ -182,6 +193,20 @@ bool WebAssemblyAsmTypeCheck::getGlobal(SMLoc ErrorLoc, const MCInst &Inst,
return false;
}
+bool WebAssemblyAsmTypeCheck::getTable(SMLoc ErrorLoc, const MCInst &Inst,
+ wasm::ValType &Type) {
+ const MCSymbolRefExpr *SymRef;
+ if (getSymRef(ErrorLoc, Inst, SymRef))
+ return true;
+ auto WasmSym = cast<MCSymbolWasm>(&SymRef->getSymbol());
+ if (WasmSym->getType().value_or(wasm::WASM_SYMBOL_TYPE_DATA) !=
+ wasm::WASM_SYMBOL_TYPE_TABLE)
+ return typeError(ErrorLoc, StringRef("symbol ") + WasmSym->getName() +
+ " missing .tabletype");
+ Type = static_cast<wasm::ValType>(WasmSym->getTableType().ElemType);
+ return false;
+}
+
bool WebAssemblyAsmTypeCheck::endOfFunction(SMLoc ErrorLoc) {
// Check the return types.
for (auto RVT : llvm::reverse(ReturnTypes)) {
@@ -196,35 +221,58 @@ bool WebAssemblyAsmTypeCheck::endOfFunction(SMLoc ErrorLoc) {
return false;
}
-bool WebAssemblyAsmTypeCheck::typeCheck(SMLoc ErrorLoc, const MCInst &Inst) {
+bool WebAssemblyAsmTypeCheck::typeCheck(SMLoc ErrorLoc, const MCInst &Inst,
+ OperandVector &Operands) {
auto Opc = Inst.getOpcode();
auto Name = GetMnemonic(Opc);
dumpTypeStack("typechecking " + Name + ": ");
wasm::ValType Type;
if (Name == "local.get") {
- if (getLocal(ErrorLoc, Inst, Type))
+ if (getLocal(Operands[1]->getStartLoc(), Inst, Type))
return true;
Stack.push_back(Type);
} else if (Name == "local.set") {
- if (getLocal(ErrorLoc, Inst, Type))
+ if (getLocal(Operands[1]->getStartLoc(), Inst, Type))
return true;
if (popType(ErrorLoc, Type))
return true;
} else if (Name == "local.tee") {
- if (getLocal(ErrorLoc, Inst, Type))
+ if (getLocal(Operands[1]->getStartLoc(), Inst, Type))
return true;
if (popType(ErrorLoc, Type))
return true;
Stack.push_back(Type);
} else if (Name == "global.get") {
- if (getGlobal(ErrorLoc, Inst, Type))
+ if (getGlobal(Operands[1]->getStartLoc(), Inst, Type))
return true;
Stack.push_back(Type);
} else if (Name == "global.set") {
- if (getGlobal(ErrorLoc, Inst, Type))
+ if (getGlobal(Operands[1]->getStartLoc(), Inst, Type))
+ return true;
+ if (popType(ErrorLoc, Type))
+ return true;
+ } else if (Name == "table.get") {
+ if (getTable(Operands[1]->getStartLoc(), Inst, Type))
+ return true;
+ if (popType(ErrorLoc, wasm::ValType::I32))
+ return true;
+ Stack.push_back(Type);
+ } else if (Name == "table.set") {
+ if (getTable(Operands[1]->getStartLoc(), Inst, Type))
return true;
if (popType(ErrorLoc, Type))
return true;
+ if (popType(ErrorLoc, wasm::ValType::I32))
+ return true;
+ } else if (Name == "table.fill") {
+ if (getTable(Operands[1]->getStartLoc(), Inst, Type))
+ return true;
+ if (popType(ErrorLoc, wasm::ValType::I32))
+ return true;
+ if (popType(ErrorLoc, Type))
+ return true;
+ if (popType(ErrorLoc, wasm::ValType::I32))
+ return true;
} else if (Name == "drop") {
if (popType(ErrorLoc, {}))
return true;
@@ -245,33 +293,36 @@ bool WebAssemblyAsmTypeCheck::typeCheck(SMLoc ErrorLoc, const MCInst &Inst) {
return true;
} else if (Name == "call" || Name == "return_call") {
const MCSymbolRefExpr *SymRef;
- if (getSymRef(ErrorLoc, Inst, SymRef))
+ if (getSymRef(Operands[1]->getStartLoc(), Inst, SymRef))
return true;
auto WasmSym = cast<MCSymbolWasm>(&SymRef->getSymbol());
auto Sig = WasmSym->getSignature();
if (!Sig || WasmSym->getType() != wasm::WASM_SYMBOL_TYPE_FUNCTION)
- return typeError(ErrorLoc, StringRef("symbol ") + WasmSym->getName() +
- " missing .functype");
+ return typeError(Operands[1]->getStartLoc(), StringRef("symbol ") +
+ WasmSym->getName() +
+ " missing .functype");
if (checkSig(ErrorLoc, *Sig)) return true;
if (Name == "return_call" && endOfFunction(ErrorLoc))
return true;
} else if (Name == "catch") {
const MCSymbolRefExpr *SymRef;
- if (getSymRef(ErrorLoc, Inst, SymRef))
+ if (getSymRef(Operands[1]->getStartLoc(), Inst, SymRef))
return true;
const auto *WasmSym = cast<MCSymbolWasm>(&SymRef->getSymbol());
const auto *Sig = WasmSym->getSignature();
if (!Sig || WasmSym->getType() != wasm::WASM_SYMBOL_TYPE_TAG)
- return typeError(ErrorLoc, StringRef("symbol ") + WasmSym->getName() +
- " missing .tagtype");
+ return typeError(Operands[1]->getStartLoc(), StringRef("symbol ") +
+ WasmSym->getName() +
+ " missing .tagtype");
// catch instruction pushes values whose types are specified in the tag's
// "params" part
Stack.insert(Stack.end(), Sig->Params.begin(), Sig->Params.end());
- } else if (Name == "ref.null") {
- auto VT = static_cast<wasm::ValType>(Inst.getOperand(0).getImm());
- Stack.push_back(VT);
} else if (Name == "unreachable") {
Unreachable = true;
+ } else if (Name == "ref.is_null") {
+ if (popRefType(ErrorLoc))
+ return true;
+ Stack.push_back(wasm::ValType::I32);
} else {
// The current instruction is a stack instruction which doesn't have
// explicit operands that indicate push/pop types, so we get those from
diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h
index 2b07faf67a18..3be966b5739c 100644
--- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h
+++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h
@@ -16,9 +16,10 @@
#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_ASMPARSER_TYPECHECK_H
#define LLVM_LIB_TARGET_WEBASSEMBLY_ASMPARSER_TYPECHECK_H
-#include "llvm/MC/MCParser/MCAsmParser.h"
-#include "llvm/MC/MCInstrInfo.h"
#include "llvm/BinaryFormat/Wasm.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCSymbol.h"
namespace llvm {
@@ -38,12 +39,14 @@ class WebAssemblyAsmTypeCheck final {
void dumpTypeStack(Twine Msg);
bool typeError(SMLoc ErrorLoc, const Twine &Msg);
bool popType(SMLoc ErrorLoc, Optional<wasm::ValType> EVT);
+ bool popRefType(SMLoc ErrorLoc);
bool getLocal(SMLoc ErrorLoc, const MCInst &Inst, wasm::ValType &Type);
bool checkEnd(SMLoc ErrorLoc, bool PopVals = false);
bool checkSig(SMLoc ErrorLoc, const wasm::WasmSignature &Sig);
bool getSymRef(SMLoc ErrorLoc, const MCInst &Inst,
const MCSymbolRefExpr *&SymRef);
bool getGlobal(SMLoc ErrorLoc, const MCInst &Inst, wasm::ValType &Type);
+ bool getTable(SMLoc ErrorLoc, const MCInst &Inst, wasm::ValType &Type);
public:
WebAssemblyAsmTypeCheck(MCAsmParser &Parser, const MCInstrInfo &MII, bool is64);
@@ -52,7 +55,7 @@ public:
void localDecl(const SmallVector<wasm::ValType, 4> &Locals);
void setLastSig(const wasm::WasmSignature &Sig) { LastSig = Sig; }
bool endOfFunction(SMLoc ErrorLoc);
- bool typeCheck(SMLoc ErrorLoc, const MCInst &Inst);
+ bool typeCheck(SMLoc ErrorLoc, const MCInst &Inst, OperandVector &Operands);
void Clear() {
Stack.clear();
diff --git a/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
index 5d38145559da..ae65a9dc2a4e 100644
--- a/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
+++ b/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
@@ -17,8 +17,8 @@
#include "TargetInfo/WebAssemblyTargetInfo.h"
#include "Utils/WebAssemblyTypeUtilities.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDecoderOps.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
-#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
index d8122950e061..5727708a84ad 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
@@ -52,6 +52,4 @@ WebAssemblyMCAsmInfo::WebAssemblyMCAsmInfo(const Triple &T,
// we make sure this info is set correctly.
if (WebAssembly::WasmEnableEH || WebAssembly::WasmEnableSjLj)
ExceptionsType = ExceptionHandling::Wasm;
-
- // TODO: UseIntegratedAssembler?
}
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
index 8f670ec88897..f52545a65dbb 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
@@ -62,7 +62,6 @@ static MCInstPrinter *createMCInstPrinter(const Triple & /*T*/,
}
static MCCodeEmitter *createCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo & /*MRI*/,
MCContext &Ctx) {
return createWebAssemblyMCCodeEmitter(MCII);
}
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
index 397b9b0ee9da..2da219d54c73 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
@@ -58,8 +58,6 @@ void WebAssemblyTargetAsmStreamer::emitLocal(ArrayRef<wasm::ValType> Types) {
}
}
-void WebAssemblyTargetAsmStreamer::emitEndFunc() { OS << "\t.endfunc\n"; }
-
void WebAssemblyTargetAsmStreamer::emitFunctionType(const MCSymbolWasm *Sym) {
assert(Sym->isFunction());
OS << "\t.functype\t" << Sym->getName() << " ";
@@ -136,10 +134,6 @@ void WebAssemblyTargetWasmStreamer::emitLocal(ArrayRef<wasm::ValType> Types) {
}
}
-void WebAssemblyTargetWasmStreamer::emitEndFunc() {
- llvm_unreachable(".end_func is not needed for direct wasm output");
-}
-
void WebAssemblyTargetWasmStreamer::emitIndIdx(const MCExpr *Value) {
llvm_unreachable(".indidx encoding not yet implemented");
}
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
index c0ad63c8dd50..522f6356c28b 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
@@ -32,8 +32,6 @@ public:
/// .local
virtual void emitLocal(ArrayRef<wasm::ValType> Types) = 0;
- /// .endfunc
- virtual void emitEndFunc() = 0;
/// .functype
virtual void emitFunctionType(const MCSymbolWasm *Sym) = 0;
/// .indidx
@@ -66,7 +64,6 @@ public:
WebAssemblyTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
void emitLocal(ArrayRef<wasm::ValType> Types) override;
- void emitEndFunc() override;
void emitFunctionType(const MCSymbolWasm *Sym) override;
void emitIndIdx(const MCExpr *Value) override;
void emitGlobalType(const MCSymbolWasm *Sym) override;
@@ -83,7 +80,6 @@ public:
explicit WebAssemblyTargetWasmStreamer(MCStreamer &S);
void emitLocal(ArrayRef<wasm::ValType> Types) override;
- void emitEndFunc() override;
void emitFunctionType(const MCSymbolWasm *Sym) override {}
void emitIndIdx(const MCExpr *Value) override;
void emitGlobalType(const MCSymbolWasm *Sym) override {}
@@ -104,7 +100,6 @@ public:
: WebAssemblyTargetStreamer(S) {}
void emitLocal(ArrayRef<wasm::ValType>) override {}
- void emitEndFunc() override {}
void emitFunctionType(const MCSymbolWasm *) override {}
void emitIndIdx(const MCExpr *) override {}
void emitGlobalType(const MCSymbolWasm *) override {}
diff --git a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h
index cdb95d48398d..8fc67d37925c 100644
--- a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h
+++ b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h
@@ -80,6 +80,10 @@ inline bool isRefType(const Type *Ty) {
return isFuncrefType(Ty) || isExternrefType(Ty);
}
+inline bool isRefType(wasm::ValType Type) {
+ return Type == wasm::ValType::EXTERNREF || Type == wasm::ValType::FUNCREF;
+}
+
// Convert StringRef to ValType / HealType / BlockType
Optional<wasm::ValType> parseType(StringRef Type);
diff --git a/llvm/lib/Target/WebAssembly/WebAssembly.h b/llvm/lib/Target/WebAssembly/WebAssembly.h
index 803786e0c9c2..aee8f160f38d 100644
--- a/llvm/lib/Target/WebAssembly/WebAssembly.h
+++ b/llvm/lib/Target/WebAssembly/WebAssembly.h
@@ -26,7 +26,6 @@ class FunctionPass;
// LLVM IR passes.
ModulePass *createWebAssemblyLowerEmscriptenEHSjLj();
-ModulePass *createWebAssemblyLowerGlobalDtors();
ModulePass *createWebAssemblyAddMissingPrototypes();
ModulePass *createWebAssemblyFixFunctionBitcasts();
FunctionPass *createWebAssemblyOptimizeReturned();
@@ -41,7 +40,6 @@ FunctionPass *createWebAssemblySetP2AlignOperands();
// Late passes.
FunctionPass *createWebAssemblyReplacePhysRegs();
FunctionPass *createWebAssemblyNullifyDebugValueLists();
-FunctionPass *createWebAssemblyPrepareForLiveIntervals();
FunctionPass *createWebAssemblyOptimizeLiveIntervals();
FunctionPass *createWebAssemblyMemIntrinsicResults();
FunctionPass *createWebAssemblyRegStackify();
@@ -61,14 +59,12 @@ ModulePass *createWebAssemblyMCLowerPrePass();
// PassRegistry initialization declarations.
void initializeWebAssemblyAddMissingPrototypesPass(PassRegistry &);
void initializeWebAssemblyLowerEmscriptenEHSjLjPass(PassRegistry &);
-void initializeLowerGlobalDtorsPass(PassRegistry &);
void initializeFixFunctionBitcastsPass(PassRegistry &);
void initializeOptimizeReturnedPass(PassRegistry &);
void initializeWebAssemblyArgumentMovePass(PassRegistry &);
void initializeWebAssemblySetP2AlignOperandsPass(PassRegistry &);
void initializeWebAssemblyReplacePhysRegsPass(PassRegistry &);
void initializeWebAssemblyNullifyDebugValueListsPass(PassRegistry &);
-void initializeWebAssemblyPrepareForLiveIntervalsPass(PassRegistry &);
void initializeWebAssemblyOptimizeLiveIntervalsPass(PassRegistry &);
void initializeWebAssemblyMemIntrinsicResultsPass(PassRegistry &);
void initializeWebAssemblyRegStackifyPass(PassRegistry &);
diff --git a/llvm/lib/Target/WebAssembly/WebAssembly.td b/llvm/lib/Target/WebAssembly/WebAssembly.td
index a529c6217189..b83dcf3a8e65 100644
--- a/llvm/lib/Target/WebAssembly/WebAssembly.td
+++ b/llvm/lib/Target/WebAssembly/WebAssembly.td
@@ -67,6 +67,10 @@ def FeatureReferenceTypes :
SubtargetFeature<"reference-types", "HasReferenceTypes", "true",
"Enable reference types">;
+def FeatureExtendedConst :
+ SubtargetFeature<"extended-const", "HasExtendedConst", "true",
+ "Enable extended const expressions">;
+
//===----------------------------------------------------------------------===//
// Architectures.
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
index bf326e5106be..57d51634e849 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -180,30 +180,30 @@ void WebAssemblyAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
MCSymbolWasm *Sym = cast<MCSymbolWasm>(getSymbol(GV));
if (!Sym->getType()) {
- const WebAssemblyTargetLowering &TLI = *Subtarget->getTargetLowering();
SmallVector<MVT, 1> VTs;
Type *GlobalVT = GV->getValueType();
- computeLegalValueVTs(TLI, GV->getParent()->getContext(),
- GV->getParent()->getDataLayout(), GlobalVT, VTs);
+ if (Subtarget) {
+ // Subtarget is only set when a function is defined, because
+ // each function can declare a different subtarget. For example,
+ // on ARM a compilation unit might have a function on ARM and
+ // another on Thumb. Therefore only if Subtarget is non-null we
+ // can actually calculate the legal VTs.
+ const WebAssemblyTargetLowering &TLI = *Subtarget->getTargetLowering();
+ computeLegalValueVTs(TLI, GV->getParent()->getContext(),
+ GV->getParent()->getDataLayout(), GlobalVT, VTs);
+ }
WebAssembly::wasmSymbolSetType(Sym, GlobalVT, VTs);
}
- // If the GlobalVariable refers to a table, we handle it here instead of
- // in emitExternalDecls
- if (Sym->isTable()) {
- getTargetStreamer()->emitTableType(Sym);
- return;
- }
-
emitVisibility(Sym, GV->getVisibility(), !GV->isDeclaration());
+ emitSymbolType(Sym);
if (GV->hasInitializer()) {
assert(getSymbolPreferLocal(*GV) == Sym);
emitLinkage(GV, Sym);
- getTargetStreamer()->emitGlobalType(Sym);
OutStreamer->emitLabel(Sym);
// TODO: Actually emit the initializer value. Otherwise the global has the
// default value for its type (0, ref.null, etc).
- OutStreamer->AddBlankLine();
+ OutStreamer->addBlankLine();
}
}
@@ -211,7 +211,7 @@ MCSymbol *WebAssemblyAsmPrinter::getOrCreateWasmSymbol(StringRef Name) {
auto *WasmSym = cast<MCSymbolWasm>(GetExternalSymbolSymbol(Name));
// May be called multiple times, so early out.
- if (WasmSym->getType().hasValue())
+ if (WasmSym->getType())
return WasmSym;
const WebAssemblySubtarget &Subtarget = getSubtarget();
@@ -271,31 +271,52 @@ MCSymbol *WebAssemblyAsmPrinter::getOrCreateWasmSymbol(StringRef Name) {
return WasmSym;
}
-void WebAssemblyAsmPrinter::emitExternalDecls(const Module &M) {
+void WebAssemblyAsmPrinter::emitSymbolType(const MCSymbolWasm *Sym) {
+ Optional<wasm::WasmSymbolType> WasmTy = Sym->getType();
+ if (!WasmTy)
+ return;
+
+ switch (*WasmTy) {
+ case wasm::WASM_SYMBOL_TYPE_GLOBAL:
+ getTargetStreamer()->emitGlobalType(Sym);
+ break;
+ case wasm::WASM_SYMBOL_TYPE_TAG:
+ getTargetStreamer()->emitTagType(Sym);
+ break;
+ case wasm::WASM_SYMBOL_TYPE_TABLE:
+ getTargetStreamer()->emitTableType(Sym);
+ break;
+ default:
+ break; // We only handle globals, tags and tables here
+ }
+}
+
+void WebAssemblyAsmPrinter::emitDecls(const Module &M) {
if (signaturesEmitted)
return;
signaturesEmitted = true;
// Normally symbols for globals get discovered as the MI gets lowered,
- // but we need to know about them ahead of time.
+ // but we need to know about them ahead of time. This will however,
+ // only find symbols that have been used. Unused symbols from globals will
+ // not be found here.
MachineModuleInfoWasm &MMIW = MMI->getObjFileInfo<MachineModuleInfoWasm>();
for (const auto &Name : MMIW.MachineSymbolsUsed) {
- getOrCreateWasmSymbol(Name.getKey());
+ auto *WasmSym = cast<MCSymbolWasm>(getOrCreateWasmSymbol(Name.getKey()));
+ if (WasmSym->isFunction()) {
+ // TODO(wvo): is there any case where this overlaps with the call to
+ // emitFunctionType in the loop below?
+ getTargetStreamer()->emitFunctionType(WasmSym);
+ }
}
for (auto &It : OutContext.getSymbols()) {
- // Emit .globaltype, .tagtype, or .tabletype declarations.
+ // Emit .globaltype, .tagtype, or .tabletype declarations for extern
+ // declarations, i.e. those that have only been declared (but not defined)
+ // in the current module
auto Sym = cast<MCSymbolWasm>(It.getValue());
- if (Sym->getType() == wasm::WASM_SYMBOL_TYPE_GLOBAL) {
- // .globaltype already handled by emitGlobalVariable for defined
- // variables; here we make sure the types of external wasm globals get
- // written to the file.
- if (Sym->isUndefined())
- getTargetStreamer()->emitGlobalType(Sym);
- } else if (Sym->getType() == wasm::WASM_SYMBOL_TYPE_TAG)
- getTargetStreamer()->emitTagType(Sym);
- else if (Sym->getType() == wasm::WASM_SYMBOL_TYPE_TABLE)
- getTargetStreamer()->emitTableType(Sym);
+ if (!Sym->isDefined())
+ emitSymbolType(Sym);
}
DenseSet<MCSymbol *> InvokeSymbols;
@@ -303,55 +324,56 @@ void WebAssemblyAsmPrinter::emitExternalDecls(const Module &M) {
if (F.isIntrinsic())
continue;
- // Emit function type info for all undefined functions
- if (F.isDeclarationForLinker()) {
- SmallVector<MVT, 4> Results;
- SmallVector<MVT, 4> Params;
- computeSignatureVTs(F.getFunctionType(), &F, F, TM, Params, Results);
- // At this point these MCSymbols may or may not have been created already
- // and thus also contain a signature, but we need to get the signature
- // anyway here in case it is an invoke that has not yet been created. We
- // will discard it later if it turns out not to be necessary.
- auto Signature = signatureFromMVTs(Results, Params);
- bool InvokeDetected = false;
- auto *Sym = getMCSymbolForFunction(
- &F, WebAssembly::WasmEnableEmEH || WebAssembly::WasmEnableEmSjLj,
- Signature.get(), InvokeDetected);
-
- // Multiple functions can be mapped to the same invoke symbol. For
- // example, two IR functions '__invoke_void_i8*' and '__invoke_void_i32'
- // are both mapped to '__invoke_vi'. We keep them in a set once we emit an
- // Emscripten EH symbol so we don't emit the same symbol twice.
- if (InvokeDetected && !InvokeSymbols.insert(Sym).second)
- continue;
+ // Emit function type info for all functions. This will emit duplicate
+ // information for defined functions (which already have function type
+ // info emitted alongside their definition), but this is necessary in
+ // order to enable the single-pass WebAssemblyAsmTypeCheck to succeed.
+ SmallVector<MVT, 4> Results;
+ SmallVector<MVT, 4> Params;
+ computeSignatureVTs(F.getFunctionType(), &F, F, TM, Params, Results);
+ // At this point these MCSymbols may or may not have been created already
+ // and thus also contain a signature, but we need to get the signature
+ // anyway here in case it is an invoke that has not yet been created. We
+ // will discard it later if it turns out not to be necessary.
+ auto Signature = signatureFromMVTs(Results, Params);
+ bool InvokeDetected = false;
+ auto *Sym = getMCSymbolForFunction(
+ &F, WebAssembly::WasmEnableEmEH || WebAssembly::WasmEnableEmSjLj,
+ Signature.get(), InvokeDetected);
+
+ // Multiple functions can be mapped to the same invoke symbol. For
+ // example, two IR functions '__invoke_void_i8*' and '__invoke_void_i32'
+ // are both mapped to '__invoke_vi'. We keep them in a set once we emit an
+ // Emscripten EH symbol so we don't emit the same symbol twice.
+ if (InvokeDetected && !InvokeSymbols.insert(Sym).second)
+ continue;
- Sym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
- if (!Sym->getSignature()) {
- Sym->setSignature(Signature.get());
- addSignature(std::move(Signature));
- } else {
- // This symbol has already been created and had a signature. Discard it.
- Signature.reset();
- }
+ Sym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
+ if (!Sym->getSignature()) {
+ Sym->setSignature(Signature.get());
+ addSignature(std::move(Signature));
+ } else {
+ // This symbol has already been created and had a signature. Discard it.
+ Signature.reset();
+ }
- getTargetStreamer()->emitFunctionType(Sym);
+ getTargetStreamer()->emitFunctionType(Sym);
- if (F.hasFnAttribute("wasm-import-module")) {
- StringRef Name =
- F.getFnAttribute("wasm-import-module").getValueAsString();
- Sym->setImportModule(storeName(Name));
- getTargetStreamer()->emitImportModule(Sym, Name);
- }
- if (F.hasFnAttribute("wasm-import-name")) {
- // If this is a converted Emscripten EH/SjLj symbol, we shouldn't use
- // the original function name but the converted symbol name.
- StringRef Name =
- InvokeDetected
- ? Sym->getName()
- : F.getFnAttribute("wasm-import-name").getValueAsString();
- Sym->setImportName(storeName(Name));
- getTargetStreamer()->emitImportName(Sym, Name);
- }
+ if (F.hasFnAttribute("wasm-import-module")) {
+ StringRef Name =
+ F.getFnAttribute("wasm-import-module").getValueAsString();
+ Sym->setImportModule(storeName(Name));
+ getTargetStreamer()->emitImportModule(Sym, Name);
+ }
+ if (F.hasFnAttribute("wasm-import-name")) {
+ // If this is a converted Emscripten EH/SjLj symbol, we shouldn't use
+ // the original function name but the converted symbol name.
+ StringRef Name =
+ InvokeDetected
+ ? Sym->getName()
+ : F.getFnAttribute("wasm-import-name").getValueAsString();
+ Sym->setImportName(storeName(Name));
+ getTargetStreamer()->emitImportName(Sym, Name);
}
if (F.hasFnAttribute("wasm-export-name")) {
@@ -362,9 +384,12 @@ void WebAssemblyAsmPrinter::emitExternalDecls(const Module &M) {
}
}
}
-
+
void WebAssemblyAsmPrinter::emitEndOfAsmFile(Module &M) {
- emitExternalDecls(M);
+ // This is required to emit external declarations (like .functypes) when
+ // no functions are defined in the compilation unit and therefore,
+ // emitDecls() is not called until now.
+ emitDecls(M);
// When a function's address is taken, a TABLE_INDEX relocation is emitted
// against the function symbol at the use site. However the relocation
@@ -401,13 +426,13 @@ void WebAssemblyAsmPrinter::emitEndOfAsmFile(Module &M) {
if (!Name || !Contents)
continue;
- OutStreamer->PushSection();
+ OutStreamer->pushSection();
std::string SectionName = (".custom_section." + Name->getString()).str();
MCSectionWasm *MySection =
OutContext.getWasmSection(SectionName, SectionKind::getMetadata());
- OutStreamer->SwitchSection(MySection);
+ OutStreamer->switchSection(MySection);
OutStreamer->emitBytes(Contents->getString());
- OutStreamer->PopSection();
+ OutStreamer->popSection();
}
}
@@ -445,8 +470,8 @@ void WebAssemblyAsmPrinter::EmitProducerInfo(Module &M) {
if (FieldCount != 0) {
MCSectionWasm *Producers = OutContext.getWasmSection(
".custom_section.producers", SectionKind::getMetadata());
- OutStreamer->PushSection();
- OutStreamer->SwitchSection(Producers);
+ OutStreamer->pushSection();
+ OutStreamer->switchSection(Producers);
OutStreamer->emitULEB128IntValue(FieldCount);
for (auto &Producers : {std::make_pair("language", &Languages),
std::make_pair("processed-by", &Tools)}) {
@@ -462,7 +487,7 @@ void WebAssemblyAsmPrinter::EmitProducerInfo(Module &M) {
OutStreamer->emitBytes(Producer.second);
}
}
- OutStreamer->PopSection();
+ OutStreamer->popSection();
}
}
@@ -518,8 +543,8 @@ void WebAssemblyAsmPrinter::EmitTargetFeatures(Module &M) {
// Emit features and linkage policies into the "target_features" section
MCSectionWasm *FeaturesSection = OutContext.getWasmSection(
".custom_section.target_features", SectionKind::getMetadata());
- OutStreamer->PushSection();
- OutStreamer->SwitchSection(FeaturesSection);
+ OutStreamer->pushSection();
+ OutStreamer->switchSection(FeaturesSection);
OutStreamer->emitULEB128IntValue(EmittedFeatures.size());
for (auto &F : EmittedFeatures) {
@@ -528,10 +553,11 @@ void WebAssemblyAsmPrinter::EmitTargetFeatures(Module &M) {
OutStreamer->emitBytes(F.Name);
}
- OutStreamer->PopSection();
+ OutStreamer->popSection();
}
void WebAssemblyAsmPrinter::emitConstantPool() {
+ emitDecls(*MMI->getModule());
assert(MF->getConstantPool()->getConstants().empty() &&
"WebAssembly disables constant pools");
}
@@ -540,17 +566,6 @@ void WebAssemblyAsmPrinter::emitJumpTableInfo() {
// Nothing to do; jump tables are incorporated into the instruction stream.
}
-void WebAssemblyAsmPrinter::emitLinkage(const GlobalValue *GV, MCSymbol *Sym)
- const {
- AsmPrinter::emitLinkage(GV, Sym);
- // This gets called before the function label and type are emitted.
- // We use it to emit signatures of external functions.
- // FIXME casts!
- const_cast<WebAssemblyAsmPrinter *>(this)
- ->emitExternalDecls(*MMI->getModule());
-}
-
-
void WebAssemblyAsmPrinter::emitFunctionBodyStart() {
const Function &F = MF->getFunction();
SmallVector<MVT, 1> ResultVTs;
@@ -612,7 +627,7 @@ void WebAssemblyAsmPrinter::emitInstruction(const MachineInstr *MI) {
// function body.
if (isVerbose()) {
OutStreamer->AddComment("fallthrough-return");
- OutStreamer->AddBlankLine();
+ OutStreamer->addBlankLine();
}
break;
}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h
index 6b2f2000a0bd..65d6ee415180 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h
@@ -66,10 +66,10 @@ public:
void emitEndOfAsmFile(Module &M) override;
void EmitProducerInfo(Module &M);
void EmitTargetFeatures(Module &M);
+ void emitSymbolType(const MCSymbolWasm *Sym);
void emitGlobalVariable(const GlobalVariable *GV) override;
void emitJumpTableInfo() override;
void emitConstantPool() override;
- void emitLinkage(const GlobalValue *, MCSymbol *) const override;
void emitFunctionBodyStart() override;
void emitInstruction(const MachineInstr *MI) override;
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
@@ -84,7 +84,7 @@ public:
wasm::WasmSignature *Sig,
bool &InvokeDetected);
MCSymbol *getOrCreateWasmSymbol(StringRef Name);
- void emitExternalDecls(const Module &M);
+ void emitDecls(const Module &M);
};
} // end namespace llvm
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
index 17e867e4c7d8..02e873a0f9a6 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
@@ -1716,7 +1716,7 @@ void WebAssemblyCFGStackify::rewriteDepthImmediates(MachineFunction &MF) {
// Rewrite MBB operands to be depth immediates.
SmallVector<MachineOperand, 4> Ops(MI.operands());
while (MI.getNumOperands() > 0)
- MI.RemoveOperand(MI.getNumOperands() - 1);
+ MI.removeOperand(MI.getNumOperands() - 1);
for (auto MO : Ops) {
if (MO.isMBB()) {
if (MI.getOpcode() == WebAssembly::DELEGATE)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp
index b94981245f8b..81fe5395a6de 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp
@@ -14,6 +14,7 @@
#include "WebAssemblyExceptionInfo.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "Utils/WebAssemblyUtilities.h"
+#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/CodeGen/MachineDominanceFrontier.h"
#include "llvm/CodeGen/MachineDominators.h"
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp
index 5bdec89f1125..fa5b4a508fa5 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp
@@ -130,7 +130,7 @@ MachineBasicBlock *fixBrTableDefault(MachineInstr &MI, MachineBasicBlock *MBB,
return nullptr;
// Remove the dummy default target and install the real one.
- MI.RemoveOperand(MI.getNumExplicitOperands() - 1);
+ MI.removeOperand(MI.getNumExplicitOperands() - 1);
MI.addOperand(MF, MachineOperand::CreateMBB(TBB));
}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
index 1ceae59dc993..83e71d731bfa 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
@@ -55,6 +55,7 @@
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "WebAssembly.h"
#include "WebAssemblySubtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
@@ -221,10 +222,8 @@ private:
assert(!Enterers.count(MBB));
if (Blocks.insert(MBB).second) {
for (auto *Pred : MBB->predecessors()) {
- if (!AddedToWorkList.count(Pred)) {
+ if (AddedToWorkList.insert(Pred).second)
WorkList.push_back(Pred);
- AddedToWorkList.insert(Pred);
- }
}
}
}
@@ -491,6 +490,46 @@ FunctionPass *llvm::createWebAssemblyFixIrreducibleControlFlow() {
return new WebAssemblyFixIrreducibleControlFlow();
}
+// Test whether the given register has an ARGUMENT def.
+static bool hasArgumentDef(unsigned Reg, const MachineRegisterInfo &MRI) {
+ for (const auto &Def : MRI.def_instructions(Reg))
+ if (WebAssembly::isArgument(Def.getOpcode()))
+ return true;
+ return false;
+}
+
+// Add a register definition with IMPLICIT_DEFs for every register to cover for
+// register uses that don't have defs in every possible path.
+// TODO: This is fairly heavy-handed; find a better approach.
+static void addImplicitDefs(MachineFunction &MF) {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+ MachineBasicBlock &Entry = *MF.begin();
+ for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I) {
+ Register Reg = Register::index2VirtReg(I);
+
+ // Skip unused registers.
+ if (MRI.use_nodbg_empty(Reg))
+ continue;
+
+ // Skip registers that have an ARGUMENT definition.
+ if (hasArgumentDef(Reg, MRI))
+ continue;
+
+ BuildMI(Entry, Entry.begin(), DebugLoc(),
+ TII.get(WebAssembly::IMPLICIT_DEF), Reg);
+ }
+
+ // Move ARGUMENT_* instructions to the top of the entry block, so that their
+ // liveness reflects the fact that these really are live-in values.
+ for (MachineInstr &MI : llvm::make_early_inc_range(Entry)) {
+ if (WebAssembly::isArgument(MI.getOpcode())) {
+ MI.removeFromParent();
+ Entry.insert(Entry.begin(), &MI);
+ }
+ }
+}
+
bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction(
MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "********** Fixing Irreducible Control Flow **********\n"
@@ -505,8 +544,15 @@ bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction(
if (LLVM_UNLIKELY(processRegion(&*MF.begin(), AllBlocks, MF))) {
// We rewrote part of the function; recompute relevant things.
- MF.getRegInfo().invalidateLiveness();
MF.RenumberBlocks();
+ // Now we've inserted dispatch blocks, some register uses can have incoming
+ // paths without a def. For example, before this pass register %a was
+ // defined in BB1 and used in BB2, and there was only one path from BB1 and
+ // BB2. But if this pass inserts a dispatch block having multiple
+ // predecessors between the two BBs, now there are paths to BB2 without
+ // visiting BB1, and %a's use in BB2 is not dominated by its def. Adding
+ // IMPLICIT_DEFs to all regs is one simple way to fix it.
+ addImplicitDefs(MF);
return true;
}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index a221f37cfd94..2636acaf1604 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -19,6 +19,8 @@
#include "WebAssemblySubtarget.h"
#include "WebAssemblyTargetMachine.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
@@ -159,22 +161,17 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
// Combine extends of extract_subvectors into widening ops
- setTargetDAGCombine(ISD::SIGN_EXTEND);
- setTargetDAGCombine(ISD::ZERO_EXTEND);
+ setTargetDAGCombine({ISD::SIGN_EXTEND, ISD::ZERO_EXTEND});
// Combine int_to_fp or fp_extend of extract_vectors and vice versa into
// conversions ops
- setTargetDAGCombine(ISD::SINT_TO_FP);
- setTargetDAGCombine(ISD::UINT_TO_FP);
- setTargetDAGCombine(ISD::FP_EXTEND);
- setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR);
+ setTargetDAGCombine({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_EXTEND,
+ ISD::EXTRACT_SUBVECTOR});
// Combine fp_to_{s,u}int_sat or fp_round of concat_vectors or vice versa
// into conversion ops
- setTargetDAGCombine(ISD::FP_TO_SINT_SAT);
- setTargetDAGCombine(ISD::FP_TO_UINT_SAT);
- setTargetDAGCombine(ISD::FP_ROUND);
- setTargetDAGCombine(ISD::CONCAT_VECTORS);
+ setTargetDAGCombine({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT,
+ ISD::FP_ROUND, ISD::CONCAT_VECTORS});
setTargetDAGCombine(ISD::TRUNCATE);
@@ -577,7 +574,7 @@ LowerCallResults(MachineInstr &CallResults, DebugLoc DL, MachineBasicBlock *BB,
// Move the function pointer to the end of the arguments for indirect calls
if (IsIndirect) {
auto FnPtr = CallParams.getOperand(0);
- CallParams.RemoveOperand(0);
+ CallParams.removeOperand(0);
// For funcrefs, call_indirect is done through __funcref_call_table and the
// funcref is always installed in slot 0 of the table, therefore instead of having
@@ -909,6 +906,30 @@ WebAssemblyTargetLowering::getPreferredVectorAction(MVT VT) const {
return TargetLoweringBase::getPreferredVectorAction(VT);
}
+bool WebAssemblyTargetLowering::shouldSimplifyDemandedVectorElts(
+ SDValue Op, const TargetLoweringOpt &TLO) const {
+ // ISel process runs DAGCombiner after legalization; this step is called
+ // SelectionDAG optimization phase. This post-legalization combining process
+ // runs DAGCombiner on each node, and if there was a change to be made,
+ // re-runs legalization again on it and its user nodes to make sure
+ // everythiing is in a legalized state.
+ //
+ // The legalization calls lowering routines, and we do our custom lowering for
+ // build_vectors (LowerBUILD_VECTOR), which converts undef vector elements
+ // into zeros. But there is a set of routines in DAGCombiner that turns unused
+ // (= not demanded) nodes into undef, among which SimplifyDemandedVectorElts
+ // turns unused vector elements into undefs. But this routine does not work
+ // with our custom LowerBUILD_VECTOR, which turns undefs into zeros. This
+ // combination can result in a infinite loop, in which undefs are converted to
+ // zeros in legalization and back to undefs in combining.
+ //
+ // So after DAG is legalized, we prevent SimplifyDemandedVectorElts from
+ // running for build_vectors.
+ if (Op.getOpcode() == ISD::BUILD_VECTOR && TLO.LegalOps && TLO.LegalTys)
+ return false;
+ return true;
+}
+
//===----------------------------------------------------------------------===//
// WebAssembly Lowering private implementation.
//===----------------------------------------------------------------------===//
@@ -2110,8 +2131,7 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
auto GetMostCommon = [](auto &Counts) {
auto CommonIt =
- std::max_element(Counts.begin(), Counts.end(),
- [](auto A, auto B) { return A.second < B.second; });
+ std::max_element(Counts.begin(), Counts.end(), llvm::less_second());
assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector");
return *CommonIt;
};
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
index f7b460f61dbb..d86f2e59e3d2 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -113,6 +113,10 @@ private:
report_fatal_error("llvm.clear_cache is not supported on wasm");
}
+ bool
+ shouldSimplifyDemandedVectorElts(SDValue Op,
+ const TargetLoweringOpt &TLO) const override;
+
// Custom lowering hooks.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
index 42183d1645e1..ed80ed39f09c 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
@@ -15,7 +15,7 @@ let UseNamedOperandTable = 1 in
multiclass ATOMIC_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
list<dag> pattern_r, string asmstr_r,
string asmstr_s, bits<32> atomic_op,
- string is64 = "false"> {
+ bit is64 = false> {
defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s,
!or(0xfe00, !and(0xff, atomic_op)), is64>,
Requires<[HasAtomics]>;
@@ -38,13 +38,13 @@ defm MEMORY_ATOMIC_NOTIFY_A32 :
(ins P2Align:$p2align, offset32_op:$off, I32:$addr, I32:$count),
(outs), (ins P2Align:$p2align, offset32_op:$off), [],
"memory.atomic.notify \t$dst, ${off}(${addr})${p2align}, $count",
- "memory.atomic.notify \t${off}${p2align}", 0x00, "false">;
+ "memory.atomic.notify \t${off}${p2align}", 0x00, false>;
defm MEMORY_ATOMIC_NOTIFY_A64 :
ATOMIC_I<(outs I32:$dst),
(ins P2Align:$p2align, offset64_op:$off, I64:$addr, I32:$count),
(outs), (ins P2Align:$p2align, offset64_op:$off), [],
"memory.atomic.notify \t$dst, ${off}(${addr})${p2align}, $count",
- "memory.atomic.notify \t${off}${p2align}", 0x00, "true">;
+ "memory.atomic.notify \t${off}${p2align}", 0x00, true>;
let mayLoad = 1 in {
defm MEMORY_ATOMIC_WAIT32_A32 :
ATOMIC_I<(outs I32:$dst),
@@ -52,28 +52,28 @@ defm MEMORY_ATOMIC_WAIT32_A32 :
I64:$timeout),
(outs), (ins P2Align:$p2align, offset32_op:$off), [],
"memory.atomic.wait32 \t$dst, ${off}(${addr})${p2align}, $exp, $timeout",
- "memory.atomic.wait32 \t${off}${p2align}", 0x01, "false">;
+ "memory.atomic.wait32 \t${off}${p2align}", 0x01, false>;
defm MEMORY_ATOMIC_WAIT32_A64 :
ATOMIC_I<(outs I32:$dst),
(ins P2Align:$p2align, offset64_op:$off, I64:$addr, I32:$exp,
I64:$timeout),
(outs), (ins P2Align:$p2align, offset64_op:$off), [],
"memory.atomic.wait32 \t$dst, ${off}(${addr})${p2align}, $exp, $timeout",
- "memory.atomic.wait32 \t${off}${p2align}", 0x01, "true">;
+ "memory.atomic.wait32 \t${off}${p2align}", 0x01, true>;
defm MEMORY_ATOMIC_WAIT64_A32 :
ATOMIC_I<(outs I32:$dst),
(ins P2Align:$p2align, offset32_op:$off, I32:$addr, I64:$exp,
I64:$timeout),
(outs), (ins P2Align:$p2align, offset32_op:$off), [],
"memory.atomic.wait64 \t$dst, ${off}(${addr})${p2align}, $exp, $timeout",
- "memory.atomic.wait64 \t${off}${p2align}", 0x02, "false">;
+ "memory.atomic.wait64 \t${off}${p2align}", 0x02, false>;
defm MEMORY_ATOMIC_WAIT64_A64 :
ATOMIC_I<(outs I32:$dst),
(ins P2Align:$p2align, offset64_op:$off, I64:$addr, I64:$exp,
I64:$timeout),
(outs), (ins P2Align:$p2align, offset64_op:$off), [],
"memory.atomic.wait64 \t$dst, ${off}(${addr})${p2align}, $exp, $timeout",
- "memory.atomic.wait64 \t${off}${p2align}", 0x02, "true">;
+ "memory.atomic.wait64 \t${off}${p2align}", 0x02, true>;
} // mayLoad = 1
} // hasSideEffects = 1
@@ -469,13 +469,13 @@ multiclass WebAssemblyBinRMW<WebAssemblyRegClass rc, string name,
(ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$val),
(outs), (ins P2Align:$p2align, offset32_op:$off), [],
!strconcat(name, "\t$dst, ${off}(${addr})${p2align}, $val"),
- !strconcat(name, "\t${off}${p2align}"), atomic_op, "false">;
+ !strconcat(name, "\t${off}${p2align}"), atomic_op, false>;
defm "_A64" :
ATOMIC_I<(outs rc:$dst),
(ins P2Align:$p2align, offset64_op:$off, I64:$addr, rc:$val),
(outs), (ins P2Align:$p2align, offset64_op:$off), [],
!strconcat(name, "\t$dst, ${off}(${addr})${p2align}, $val"),
- !strconcat(name, "\t${off}${p2align}"), atomic_op, "true">;
+ !strconcat(name, "\t${off}${p2align}"), atomic_op, true>;
}
defm ATOMIC_RMW_ADD_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.add", 0x1e>;
@@ -767,14 +767,14 @@ multiclass WebAssemblyTerRMW<WebAssemblyRegClass rc, string name,
rc:$new_),
(outs), (ins P2Align:$p2align, offset32_op:$off), [],
!strconcat(name, "\t$dst, ${off}(${addr})${p2align}, $exp, $new_"),
- !strconcat(name, "\t${off}${p2align}"), atomic_op, "false">;
+ !strconcat(name, "\t${off}${p2align}"), atomic_op, false>;
defm "_A64" :
ATOMIC_I<(outs rc:$dst),
(ins P2Align:$p2align, offset64_op:$off, I64:$addr, rc:$exp,
rc:$new_),
(outs), (ins P2Align:$p2align, offset64_op:$off), [],
!strconcat(name, "\t$dst, ${off}(${addr})${p2align}, $exp, $new_"),
- !strconcat(name, "\t${off}${p2align}"), atomic_op, "true">;
+ !strconcat(name, "\t${off}${p2align}"), atomic_op, true>;
}
defm ATOMIC_RMW_CMPXCHG_I32 :
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
index 4dc0c9a46c38..f2e73dd19d6b 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
@@ -14,12 +14,12 @@
// WebAssembly Instruction Format.
// We instantiate 2 of these for every actual instruction (register based
// and stack based), see below.
-class WebAssemblyInst<bits<32> inst, string asmstr, string stack, string is64>
+class WebAssemblyInst<bits<32> inst, string asmstr, bit stack, bit is64>
: StackRel, RegisterRel, Wasm64Rel, Instruction {
bits<32> Inst = inst; // Instruction encoding.
- string StackBased = stack;
+ bit StackBased = stack;
string BaseName = NAME;
- string IsWasm64 = is64;
+ bit IsWasm64 = is64;
string Wasm32Name = !subst("_A64", "_A32", NAME);
let Namespace = "WebAssembly";
let Pattern = [];
@@ -30,8 +30,8 @@ class WebAssemblyInst<bits<32> inst, string asmstr, string stack, string is64>
}
// Normal instructions. Default instantiation of a WebAssemblyInst.
-class NI<dag oops, dag iops, list<dag> pattern, string stack,
- string asmstr = "", bits<32> inst = -1, string is64 = "false">
+class NI<dag oops, dag iops, list<dag> pattern, bit stack,
+ string asmstr = "", bits<32> inst = -1, bit is64 = false>
: WebAssemblyInst<inst, asmstr, stack, is64> {
dag OutOperandList = oops;
dag InOperandList = iops;
@@ -54,11 +54,11 @@ class NI<dag oops, dag iops, list<dag> pattern, string stack,
// there is always an equivalent pair of instructions.
multiclass I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
list<dag> pattern_r, string asmstr_r = "", string asmstr_s = "",
- bits<32> inst = -1, string is64 = "false"> {
+ bits<32> inst = -1, bit is64 = false> {
let isCodeGenOnly = 1 in
- def "" : NI<oops_r, iops_r, pattern_r, "false", asmstr_r, inst, is64>;
+ def "" : NI<oops_r, iops_r, pattern_r, false, asmstr_r, inst, is64>;
let BaseName = NAME in
- def _S : NI<oops_s, iops_s, [], "true", asmstr_s, inst, is64>;
+ def _S : NI<oops_s, iops_s, [], true, asmstr_s, inst, is64>;
}
// For instructions that have no register ops, so both sets are the same.
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
index 3fb0af1d47a0..134a0efc6822 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
@@ -66,6 +66,10 @@ def HasReferenceTypes :
Predicate<"Subtarget->hasReferenceTypes()">,
AssemblerPredicate<(all_of FeatureReferenceTypes), "reference-types">;
+def HasExtendedConst :
+ Predicate<"Subtarget->hasExtendedConst()">,
+ AssemblerPredicate<(all_of FeatureExtendedConst), "extended-const">;
+
//===----------------------------------------------------------------------===//
// WebAssembly-specific DAG Node Types.
//===----------------------------------------------------------------------===//
@@ -221,8 +225,8 @@ def getStackOpcode : InstrMapping {
let FilterClass = "StackRel";
let RowFields = ["BaseName"];
let ColFields = ["StackBased"];
- let KeyCol = ["false"];
- let ValueCols = [["true"]];
+ let KeyCol = ["0"];
+ let ValueCols = [["1"]];
}
//===----------------------------------------------------------------------===//
@@ -234,8 +238,8 @@ def getRegisterOpcode : InstrMapping {
let FilterClass = "RegisterRel";
let RowFields = ["BaseName"];
let ColFields = ["StackBased"];
- let KeyCol = ["true"];
- let ValueCols = [["false"]];
+ let KeyCol = ["1"];
+ let ValueCols = [["0"]];
}
//===----------------------------------------------------------------------===//
@@ -247,8 +251,8 @@ def getWasm64Opcode : InstrMapping {
let FilterClass = "Wasm64Rel";
let RowFields = ["Wasm32Name"];
let ColFields = ["IsWasm64"];
- let KeyCol = ["false"];
- let ValueCols = [["true"]];
+ let KeyCol = ["0"];
+ let ValueCols = [["1"]];
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
index a70f62dde845..d5bb9e9e48b4 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
@@ -47,13 +47,13 @@ multiclass WebAssemblyLoad<WebAssemblyRegClass rc, string Name, int Opcode,
(ins P2Align:$p2align, offset32_op:$off, I32:$addr),
(outs), (ins P2Align:$p2align, offset32_op:$off),
[], !strconcat(Name, "\t$dst, ${off}(${addr})${p2align}"),
- !strconcat(Name, "\t${off}${p2align}"), Opcode, "false">,
+ !strconcat(Name, "\t${off}${p2align}"), Opcode, false>,
Requires<reqs>;
defm "_A64": I<(outs rc:$dst),
(ins P2Align:$p2align, offset64_op:$off, I64:$addr),
(outs), (ins P2Align:$p2align, offset64_op:$off),
[], !strconcat(Name, "\t$dst, ${off}(${addr})${p2align}"),
- !strconcat(Name, "\t${off}${p2align}"), Opcode, "true">,
+ !strconcat(Name, "\t${off}${p2align}"), Opcode, true>,
Requires<reqs>;
}
}
@@ -244,7 +244,7 @@ multiclass WebAssemblyStore<WebAssemblyRegClass rc, string Name, int Opcode,
(outs),
(ins P2Align:$p2align, offset32_op:$off), [],
!strconcat(Name, "\t${off}(${addr})${p2align}, $val"),
- !strconcat(Name, "\t${off}${p2align}"), Opcode, "false">,
+ !strconcat(Name, "\t${off}${p2align}"), Opcode, false>,
Requires<reqs>;
let mayStore = 1, UseNamedOperandTable = 1 in
defm "_A64" : I<(outs),
@@ -252,7 +252,7 @@ multiclass WebAssemblyStore<WebAssemblyRegClass rc, string Name, int Opcode,
(outs),
(ins P2Align:$p2align, offset64_op:$off), [],
!strconcat(Name, "\t${off}(${addr})${p2align}, $val"),
- !strconcat(Name, "\t${off}${p2align}"), Opcode, "true">,
+ !strconcat(Name, "\t${off}${p2align}"), Opcode, true>,
Requires<reqs>;
}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td
index 76a88caafc47..608963d58863 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td
@@ -27,6 +27,12 @@ multiclass REF_I<WebAssemblyRegClass rc, ValueType vt, string ht> {
vt#".select\t$dst, $lhs, $rhs, $cond",
vt#".select", 0x1b>,
Requires<[HasReferenceTypes]>;
+ defm REF_IS_NULL_#rc
+ : I<(outs I32:$dst), (ins rc:$ref), (outs), (ins),
+ [(set I32:$dst, (!cast<Intrinsic>("int_wasm_ref_is_null_" # ht) rc:$ref))],
+ "ref.is_null\t$ref",
+ "ref.is_null", 0xd1>,
+ Requires<[HasReferenceTypes]>;
}
defm "" : REF_I<FUNCREF, funcref, "func">;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 5bb12c7fbdc7..ed3cc7ed1c53 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1229,9 +1229,9 @@ def trunc_sat_zero_s :
SDNode<"WebAssemblyISD::TRUNC_SAT_ZERO_S", trunc_sat_zero_t>;
def trunc_sat_zero_u :
SDNode<"WebAssemblyISD::TRUNC_SAT_ZERO_U", trunc_sat_zero_t>;
-defm "" : SIMDConvert<I32x4, F64x2, trunc_sat_zero_s, "trunc_sat_zero_f64x2_s",
+defm "" : SIMDConvert<I32x4, F64x2, trunc_sat_zero_s, "trunc_sat_f64x2_s_zero",
0xfc>;
-defm "" : SIMDConvert<I32x4, F64x2, trunc_sat_zero_u, "trunc_sat_zero_f64x2_u",
+defm "" : SIMDConvert<I32x4, F64x2, trunc_sat_zero_u, "trunc_sat_f64x2_u_zero",
0xfd>;
// Integer to floating point: convert
@@ -1307,7 +1307,7 @@ defm "" : SIMDConvert<I32x4, I16x8, int_wasm_extadd_pairwise_unsigned,
def demote_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
def demote_zero : SDNode<"WebAssemblyISD::DEMOTE_ZERO", demote_t>;
defm "" : SIMDConvert<F32x4, F64x2, demote_zero,
- "demote_zero_f64x2", 0x5e>;
+ "demote_f64x2_zero", 0x5e>;
def promote_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
def promote_low : SDNode<"WebAssemblyISD::PROMOTE_LOW", promote_t>;
@@ -1334,7 +1334,37 @@ defm Q15MULR_SAT_S :
SIMDBinary<I16x8, int_wasm_q15mulr_sat_signed, "q15mulr_sat_s", 0x82>;
//===----------------------------------------------------------------------===//
-// Fused Multiply- Add and Subtract (FMA/FMS)
+// Relaxed swizzle
+//===----------------------------------------------------------------------===//
+
+defm RELAXED_SWIZZLE :
+ RELAXED_I<(outs V128:$dst), (ins V128:$src, V128:$mask), (outs), (ins),
+ [(set (v16i8 V128:$dst),
+ (int_wasm_relaxed_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)))],
+ "i8x16.relaxed_swizzle\t$dst, $src, $mask", "i8x16.relaxed_swizzle", 0x100>;
+
+//===----------------------------------------------------------------------===//
+// Relaxed floating-point to int conversions
+//===----------------------------------------------------------------------===//
+
+multiclass RelaxedConvert<Vec vec, Vec arg, SDPatternOperator op, string name, bits<32> simdop> {
+ defm op#_#vec :
+ RELAXED_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins),
+ [(set (vec.vt V128:$dst), (vec.vt (op (arg.vt V128:$vec))))],
+ vec.prefix#"."#name#"\t$dst, $vec", vec.prefix#"."#name, simdop>;
+}
+
+defm "" : RelaxedConvert<I32x4, F32x4, int_wasm_relaxed_trunc_signed,
+ "relaxed_trunc_f32x4_s", 0x101>;
+defm "" : RelaxedConvert<I32x4, F32x4, int_wasm_relaxed_trunc_unsigned,
+ "relaxed_trunc_f32x4_u", 0x102>;
+defm "" : RelaxedConvert<I32x4, F64x2, int_wasm_relaxed_trunc_signed_zero,
+ "relaxed_trunc_f64x2_s_zero", 0x103>;
+defm "" : RelaxedConvert<I32x4, F64x2, int_wasm_relaxed_trunc_unsigned_zero,
+ "relaxed_trunc_f64x2_u_zero", 0x104>;
+
+//===----------------------------------------------------------------------===//
+// Relaxed Fused Multiply- Add and Subtract (FMA/FMS)
//===----------------------------------------------------------------------===//
multiclass SIMDFM<Vec vec, bits<32> simdopA, bits<32> simdopS> {
@@ -1342,16 +1372,18 @@ multiclass SIMDFM<Vec vec, bits<32> simdopA, bits<32> simdopS> {
RELAXED_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins),
[(set (vec.vt V128:$dst), (int_wasm_fma
(vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))],
- vec.prefix#".fma\t$dst, $a, $b, $c", vec.prefix#".fma", simdopA>;
+ vec.prefix#".relaxed_fma\t$dst, $a, $b, $c",
+ vec.prefix#".relaxed_fma", simdopA>;
defm FMS_#vec :
RELAXED_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins),
[(set (vec.vt V128:$dst), (int_wasm_fms
(vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))],
- vec.prefix#".fms\t$dst, $a, $b, $c", vec.prefix#".fms", simdopS>;
+ vec.prefix#".relaxed_fms\t$dst, $a, $b, $c",
+ vec.prefix#".relaxed_fms", simdopS>;
}
-defm "" : SIMDFM<F32x4, 0xaf, 0xb0>;
-defm "" : SIMDFM<F64x2, 0xcf, 0xd0>;
+defm "" : SIMDFM<F32x4, 0x105, 0x106>;
+defm "" : SIMDFM<F64x2, 0x107, 0x108>;
//===----------------------------------------------------------------------===//
// Laneselect
@@ -1362,58 +1394,61 @@ multiclass SIMDLANESELECT<Vec vec, bits<32> op> {
RELAXED_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins),
[(set (vec.vt V128:$dst), (int_wasm_laneselect
(vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))],
- vec.prefix#".laneselect\t$dst, $a, $b, $c", vec.prefix#".laneselect", op>;
+ vec.prefix#".relaxed_laneselect\t$dst, $a, $b, $c",
+ vec.prefix#".relaxed_laneselect", op>;
}
-defm "" : SIMDLANESELECT<I8x16, 0xb2>;
-defm "" : SIMDLANESELECT<I16x8, 0xb3>;
-defm "" : SIMDLANESELECT<I32x4, 0xd2>;
-defm "" : SIMDLANESELECT<I64x2, 0xd3>;
-
-
-//===----------------------------------------------------------------------===//
-// Relaxed swizzle
-//===----------------------------------------------------------------------===//
-
-defm RELAXED_SWIZZLE :
- RELAXED_I<(outs V128:$dst), (ins V128:$src, V128:$mask), (outs), (ins),
- [(set (v16i8 V128:$dst),
- (int_wasm_relaxed_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)))],
- "i8x16.relaxed_swizzle\t$dst, $src, $mask", "i8x16.relaxed_swizzle", 162>;
+defm "" : SIMDLANESELECT<I8x16, 0x109>;
+defm "" : SIMDLANESELECT<I16x8, 0x10a>;
+defm "" : SIMDLANESELECT<I32x4, 0x10b>;
+defm "" : SIMDLANESELECT<I64x2, 0x10c>;
//===----------------------------------------------------------------------===//
// Relaxed floating-point min and max.
//===----------------------------------------------------------------------===//
-multiclass SIMD_RELAXED_FMINMAX<Vec vec, bits<32> simdopMin, bits<32> simdopMax> {
- defm RELAXED_FMIN_#vec :
- RELAXED_I<(outs V128:$dst), (ins V128:$a, V128:$b), (outs), (ins),
- [(set (vec.vt V128:$dst), (int_wasm_relaxed_min
- (vec.vt V128:$a), (vec.vt V128:$b)))],
- vec.prefix#".relaxed_min\t$dst, $a, $b", vec.prefix#".relaxed_min", simdopMin>;
- defm RELAXED_FMAX_#vec :
- RELAXED_I<(outs V128:$dst), (ins V128:$a, V128:$b), (outs), (ins),
- [(set (vec.vt V128:$dst), (int_wasm_relaxed_max
- (vec.vt V128:$a), (vec.vt V128:$b)))],
- vec.prefix#".relaxed_max\t$dst, $a, $b", vec.prefix#".relaxed_max", simdopMax>;
+multiclass RelaxedBinary<Vec vec, SDPatternOperator node, string name,
+ bits<32> simdop> {
+ defm _#vec : RELAXED_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
+ (outs), (ins),
+ [(set (vec.vt V128:$dst),
+ (node (vec.vt V128:$lhs), (vec.vt V128:$rhs)))],
+ vec.prefix#"."#name#"\t$dst, $lhs, $rhs",
+ vec.prefix#"."#name, simdop>;
}
-defm "" : SIMD_RELAXED_FMINMAX<F32x4, 0xb4, 0xe2>;
-defm "" : SIMD_RELAXED_FMINMAX<F64x2, 0xd4, 0xee>;
+defm SIMD_RELAXED_FMIN :
+ RelaxedBinary<F32x4, int_wasm_relaxed_min, "relaxed_min", 0x10d>;
+defm SIMD_RELAXED_FMAX :
+ RelaxedBinary<F32x4, int_wasm_relaxed_max, "relaxed_max", 0x10e>;
+defm SIMD_RELAXED_FMIN :
+ RelaxedBinary<F64x2, int_wasm_relaxed_min, "relaxed_min", 0x10f>;
+defm SIMD_RELAXED_FMAX :
+ RelaxedBinary<F64x2, int_wasm_relaxed_max, "relaxed_max", 0x110>;
//===----------------------------------------------------------------------===//
-// Relaxed floating-point to int conversions
+// Relaxed rounding q15 multiplication
//===----------------------------------------------------------------------===//
-multiclass SIMD_RELAXED_CONVERT<Vec vec, Vec arg, SDPatternOperator op, string name, bits<32> simdop> {
- defm op#_#vec :
- RELAXED_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins),
- [(set (vec.vt V128:$dst), (vec.vt (op (arg.vt V128:$vec))))],
- vec.prefix#"."#name#"\t$dst, $vec", vec.prefix#"."#name, simdop>;
-}
+defm RELAXED_Q15MULR_S :
+ RelaxedBinary<I16x8, int_wasm_relaxed_q15mulr_signed, "relaxed_q15mulr_s",
+ 0x111>;
-defm "" : SIMD_RELAXED_CONVERT<I32x4, F32x4, int_wasm_relaxed_trunc_signed, "relaxed_trunc_f32x4_s", 0xa5>;
-defm "" : SIMD_RELAXED_CONVERT<I32x4, F32x4, int_wasm_relaxed_trunc_unsigned, "relaxed_trunc_f32x4_u", 0xa6>;
+//===----------------------------------------------------------------------===//
+// Relaxed integer dot product
+//===----------------------------------------------------------------------===//
-defm "" : SIMD_RELAXED_CONVERT<I32x4, F64x2, int_wasm_relaxed_trunc_zero_signed, "relaxed_trunc_f64x2_s_zero", 0xc5>;
-defm "" : SIMD_RELAXED_CONVERT<I32x4, F64x2, int_wasm_relaxed_trunc_zero_unsigned, "relaxed_trunc_f64x2_u_zero", 0xc6>;
+defm RELAXED_DOT :
+ RELAXED_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins),
+ [(set (v8i16 V128:$dst), (int_wasm_dot_i8x16_i7x16_signed
+ (v16i8 V128:$lhs), (v16i8 V128:$rhs)))],
+ "i16x8.dot_i8x16_i7x16_s\t$dst, $lhs, $rhs",
+ "i16x8.dot_i8x16_i7x16_s", 0x112>;
+
+defm RELAXED_DOT_ADD :
+ RELAXED_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs, V128:$acc),
+ (outs), (ins),
+ [(set (v4i32 V128:$dst), (int_wasm_dot_i8x16_i7x16_add_signed
+ (v16i8 V128:$lhs), (v16i8 V128:$rhs), (v4i32 V128:$acc)))],
+ "i32x4.dot_i8x16_i7x16_add_s\t$dst, $lhs, $rhs, $acc",
+ "i32x4.dot_i8x16_i7x16_add_s", 0x113>;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
index 309fcaf340eb..d16bb6b6648a 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
@@ -16,6 +16,7 @@
#include "WebAssembly.h"
#include "WebAssemblySubtarget.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/WasmEHFuncInfo.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -72,9 +73,8 @@ WebAssemblyLateEHPrepare::getMatchingEHPad(MachineInstr *MI) {
MachineBasicBlock *EHPad = nullptr;
while (!WL.empty()) {
MachineBasicBlock *MBB = WL.pop_back_val();
- if (Visited.count(MBB))
+ if (!Visited.insert(MBB).second)
continue;
- Visited.insert(MBB);
if (MBB->isEHPad()) {
if (EHPad && EHPad != MBB)
return nullptr;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
index b6c43be03aba..2db4bd822349 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
@@ -406,8 +406,9 @@ static bool canThrow(const Value *V) {
return true;
}
-// Get a global variable with the given name. If it doesn't exist declare it,
-// which will generate an import and assume that it will exist at link time.
+// Get a thread-local global variable with the given name. If it doesn't exist
+// declare it, which will generate an import and assume that it will exist at
+// link time.
static GlobalVariable *getGlobalVariable(Module &M, Type *Ty,
WebAssemblyTargetMachine &TM,
const char *Name) {
@@ -415,16 +416,11 @@ static GlobalVariable *getGlobalVariable(Module &M, Type *Ty,
if (!GV)
report_fatal_error(Twine("unable to create global: ") + Name);
- // If the target supports TLS, make this variable thread-local. We can't just
- // unconditionally make it thread-local and depend on
- // CoalesceFeaturesAndStripAtomics to downgrade it, because stripping TLS has
- // the side effect of disallowing the object from being linked into a
- // shared-memory module, which we don't want to be responsible for.
- auto *Subtarget = TM.getSubtargetImpl();
- auto TLS = Subtarget->hasAtomics() && Subtarget->hasBulkMemory()
- ? GlobalValue::LocalExecTLSModel
- : GlobalValue::NotThreadLocal;
- GV->setThreadLocalMode(TLS);
+ // Variables created by this function are thread local. If the target does not
+ // support TLS, we depend on CoalesceFeaturesAndStripAtomics to downgrade it
+ // to non-thread-local ones, in which case we don't allow this object to be
+ // linked with other objects using shared memory.
+ GV->setThreadLocalMode(GlobalValue::GeneralDynamicTLSModel);
return GV;
}
@@ -556,7 +552,7 @@ Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallBase *CI) {
Optional<unsigned> NEltArg;
std::tie(SizeArg, NEltArg) = FnAttrs.getAllocSizeArgs();
SizeArg += 1;
- if (NEltArg.hasValue())
+ if (NEltArg)
NEltArg = NEltArg.getValue() + 1;
FnAttrs.addAllocSizeAttr(SizeArg, NEltArg);
}
@@ -1064,22 +1060,16 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
nullifySetjmp(F);
}
- if (!Changed) {
- // Delete unused global variables and functions
- if (ResumeF)
- ResumeF->eraseFromParent();
- if (EHTypeIDF)
- EHTypeIDF->eraseFromParent();
- if (EmLongjmpF)
- EmLongjmpF->eraseFromParent();
- if (SaveSetjmpF)
- SaveSetjmpF->eraseFromParent();
- if (TestSetjmpF)
- TestSetjmpF->eraseFromParent();
- return false;
- }
+ // Delete unused global variables and functions
+ for (auto *V : {ThrewGV, ThrewValueGV})
+ if (V && V->use_empty())
+ V->eraseFromParent();
+ for (auto *V : {GetTempRet0F, SetTempRet0F, ResumeF, EHTypeIDF, EmLongjmpF,
+ SaveSetjmpF, TestSetjmpF, WasmLongjmpF, CatchF})
+ if (V && V->use_empty())
+ V->eraseFromParent();
- return true;
+ return Changed;
}
bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) {
@@ -1324,9 +1314,14 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
BasicBlock *BB = CB->getParent();
if (BB->getParent() != &F) // in other function
continue;
- if (CB->getOperandBundle(LLVMContext::OB_funclet))
- report_fatal_error(
- "setjmp within a catch clause is not supported in Wasm EH");
+ if (CB->getOperandBundle(LLVMContext::OB_funclet)) {
+ std::string S;
+ raw_string_ostream SS(S);
+ SS << "In function " + F.getName() +
+ ": setjmp within a catch clause is not supported in Wasm EH:\n";
+ SS << *CB;
+ report_fatal_error(StringRef(SS.str()));
+ }
CallInst *CI = nullptr;
// setjmp cannot throw. So if it is an invoke, lower it to a call
@@ -1502,10 +1497,16 @@ void WebAssemblyLowerEmscriptenEHSjLj::handleLongjmpableCallsForEmscriptenSjLj(
for (unsigned I = 0; I < BBs.size(); I++) {
BasicBlock *BB = BBs[I];
for (Instruction &I : *BB) {
- if (isa<InvokeInst>(&I))
- report_fatal_error("When using Wasm EH with Emscripten SjLj, there is "
- "a restriction that `setjmp` function call and "
- "exception cannot be used within the same function");
+ if (isa<InvokeInst>(&I)) {
+ std::string S;
+ raw_string_ostream SS(S);
+ SS << "In function " << F.getName()
+ << ": When using Wasm EH with Emscripten SjLj, there is a "
+ "restriction that `setjmp` function call and exception cannot be "
+ "used within the same function:\n";
+ SS << I;
+ report_fatal_error(StringRef(SS.str()));
+ }
auto *CI = dyn_cast<CallInst>(&I);
if (!CI)
continue;
@@ -1829,7 +1830,8 @@ void WebAssemblyLowerEmscriptenEHSjLj::handleLongjmpableCallsForWasmSjLj(
if (auto *CPI = dyn_cast<CatchPadInst>(FromPad)) {
UnwindDest = CPI->getCatchSwitch()->getUnwindDest();
break;
- } else if (auto *CPI = dyn_cast<CleanupPadInst>(FromPad)) {
+ }
+ if (auto *CPI = dyn_cast<CleanupPadInst>(FromPad)) {
// getCleanupRetUnwindDest() can return nullptr when
// 1. This cleanuppad's matching cleanupret uwninds to caller
// 2. There is no matching cleanupret because it ends with
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMCLowerPrePass.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyMCLowerPrePass.cpp
index 37ac8e75f4b7..21f6fd37d402 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyMCLowerPrePass.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyMCLowerPrePass.cpp
@@ -65,6 +65,9 @@ ModulePass *llvm::createWebAssemblyMCLowerPrePass() {
// for all functions before AsmPrinter. If this way of doing things is ever
// suboptimal, we could opt to make it a MachineFunctionPass and instead use
// something like createBarrierNoopPass() to enforce ordering.
+//
+// The information stored here is essential for emitExternalDecls in the Wasm
+// AsmPrinter
bool WebAssemblyMCLowerPrePass::runOnModule(Module &M) {
auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
if (!MMIWP)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
index ea80e96d50de..96284687971c 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
@@ -24,6 +24,16 @@ using namespace llvm;
WebAssemblyFunctionInfo::~WebAssemblyFunctionInfo() = default; // anchor.
+MachineFunctionInfo *WebAssemblyFunctionInfo::clone(
+ BumpPtrAllocator &Allocator, MachineFunction &DestMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
+ const {
+ WebAssemblyFunctionInfo *Clone =
+ DestMF.cloneInfo<WebAssemblyFunctionInfo>(*this);
+ Clone->MF = &DestMF;
+ return Clone;
+}
+
void WebAssemblyFunctionInfo::initWARegs(MachineRegisterInfo &MRI) {
assert(WARegs.empty());
unsigned Reg = UnusedReg;
@@ -153,7 +163,7 @@ void WebAssemblyFunctionInfo::initializeBaseYamlFields(
addResult(WebAssembly::parseMVT(VT.Value));
if (WasmEHInfo) {
for (auto KV : YamlMFI.SrcToUnwindDest)
- WasmEHInfo->setUnwindDest(MF.getBlockNumbered(KV.first),
- MF.getBlockNumbered(KV.second));
+ WasmEHInfo->setUnwindDest(MF->getBlockNumbered(KV.first),
+ MF->getBlockNumbered(KV.second));
}
}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
index 413d0d1dc554..619617049bb2 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
@@ -31,7 +31,7 @@ struct WebAssemblyFunctionInfo;
/// This class is derived from MachineFunctionInfo and contains private
/// WebAssembly-specific information for each MachineFunction.
class WebAssemblyFunctionInfo final : public MachineFunctionInfo {
- const MachineFunction &MF;
+ const MachineFunction *MF;
std::vector<MVT> Params;
std::vector<MVT> Results;
@@ -70,11 +70,16 @@ class WebAssemblyFunctionInfo final : public MachineFunctionInfo {
WasmEHFuncInfo *WasmEHInfo = nullptr;
public:
- explicit WebAssemblyFunctionInfo(MachineFunction &MF)
- : MF(MF), WasmEHInfo(MF.getWasmEHFuncInfo()) {}
+ explicit WebAssemblyFunctionInfo(MachineFunction &MF_)
+ : MF(&MF_), WasmEHInfo(MF_.getWasmEHFuncInfo()) {}
~WebAssemblyFunctionInfo() override;
- const MachineFunction &getMachineFunction() const { return MF; }
+ MachineFunctionInfo *
+ clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
+ const override;
+
+ const MachineFunction &getMachineFunction() const { return *MF; }
void initializeBaseYamlFields(const yaml::WebAssemblyFunctionInfo &YamlMFI);
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyNullifyDebugValueLists.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyNullifyDebugValueLists.cpp
index 62fa089a94d4..5d8c58dcc334 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyNullifyDebugValueLists.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyNullifyDebugValueLists.cpp
@@ -16,6 +16,7 @@
#include "WebAssembly.h"
#include "WebAssemblySubtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
using namespace llvm;
#define DEBUG_TYPE "wasm-nullify-dbg-value-lists"
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
index 6a6cac6d956f..d542ddb45c2e 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
@@ -49,6 +49,11 @@ class WebAssemblyOptimizeLiveIntervals final : public MachineFunctionPass {
MachineFunctionPass::getAnalysisUsage(AU);
}
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::TracksLiveness);
+ }
+
bool runOnMachineFunction(MachineFunction &MF) override;
public:
@@ -102,7 +107,7 @@ bool WebAssemblyOptimizeLiveIntervals::runOnMachineFunction(
SplitLIs.clear();
}
- // In PrepareForLiveIntervals, we conservatively inserted IMPLICIT_DEF
+ // In FixIrreducibleControlFlow, we conservatively inserted IMPLICIT_DEF
// instructions to satisfy LiveIntervals' requirement that all uses be
// dominated by defs. Now that LiveIntervals has computed which of these
// defs are actually needed and which are dead, remove the dead ones.
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
deleted file mode 100644
index 5682cadc1a64..000000000000
--- a/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
+++ /dev/null
@@ -1,126 +0,0 @@
-//===- WebAssemblyPrepareForLiveIntervals.cpp - Prepare for LiveIntervals -===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Fix up code to meet LiveInterval's requirements.
-///
-/// Some CodeGen passes don't preserve LiveInterval's requirements, because
-/// they run after register allocation and it isn't important. However,
-/// WebAssembly runs LiveIntervals in a late pass. This pass transforms code
-/// to meet LiveIntervals' requirements; primarily, it ensures that all
-/// virtual register uses have definitions (IMPLICIT_DEF definitions if
-/// nothing else).
-///
-//===----------------------------------------------------------------------===//
-
-#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
-#include "Utils/WebAssemblyUtilities.h"
-#include "WebAssembly.h"
-#include "WebAssemblyMachineFunctionInfo.h"
-#include "WebAssemblySubtarget.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "wasm-prepare-for-live-intervals"
-
-namespace {
-class WebAssemblyPrepareForLiveIntervals final : public MachineFunctionPass {
-public:
- static char ID; // Pass identification, replacement for typeid
- WebAssemblyPrepareForLiveIntervals() : MachineFunctionPass(ID) {}
-
-private:
- StringRef getPassName() const override {
- return "WebAssembly Prepare For LiveIntervals";
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-};
-} // end anonymous namespace
-
-char WebAssemblyPrepareForLiveIntervals::ID = 0;
-INITIALIZE_PASS(WebAssemblyPrepareForLiveIntervals, DEBUG_TYPE,
- "Fix up code for LiveIntervals", false, false)
-
-FunctionPass *llvm::createWebAssemblyPrepareForLiveIntervals() {
- return new WebAssemblyPrepareForLiveIntervals();
-}
-
-// Test whether the given register has an ARGUMENT def.
-static bool hasArgumentDef(unsigned Reg, const MachineRegisterInfo &MRI) {
- for (const auto &Def : MRI.def_instructions(Reg))
- if (WebAssembly::isArgument(Def.getOpcode()))
- return true;
- return false;
-}
-
-bool WebAssemblyPrepareForLiveIntervals::runOnMachineFunction(
- MachineFunction &MF) {
- LLVM_DEBUG({
- dbgs() << "********** Prepare For LiveIntervals **********\n"
- << "********** Function: " << MF.getName() << '\n';
- });
-
- bool Changed = false;
- MachineRegisterInfo &MRI = MF.getRegInfo();
- const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
- MachineBasicBlock &Entry = *MF.begin();
-
- assert(!mustPreserveAnalysisID(LiveIntervalsID) &&
- "LiveIntervals shouldn't be active yet!");
-
- // We don't preserve SSA form.
- MRI.leaveSSA();
-
- // BranchFolding and perhaps other passes don't preserve IMPLICIT_DEF
- // instructions. LiveIntervals requires that all paths to virtual register
- // uses provide a definition. Insert IMPLICIT_DEFs in the entry block to
- // conservatively satisfy this.
- //
- // TODO: This is fairly heavy-handed; find a better approach.
- //
- for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I) {
- Register Reg = Register::index2VirtReg(I);
-
- // Skip unused registers.
- if (MRI.use_nodbg_empty(Reg))
- continue;
-
- // Skip registers that have an ARGUMENT definition.
- if (hasArgumentDef(Reg, MRI))
- continue;
-
- BuildMI(Entry, Entry.begin(), DebugLoc(),
- TII.get(WebAssembly::IMPLICIT_DEF), Reg);
- Changed = true;
- }
-
- // Move ARGUMENT_* instructions to the top of the entry block, so that their
- // liveness reflects the fact that these really are live-in values.
- for (MachineInstr &MI : llvm::make_early_inc_range(Entry)) {
- if (WebAssembly::isArgument(MI.getOpcode())) {
- MI.removeFromParent();
- Entry.insert(Entry.begin(), &MI);
- }
- }
-
- // Ok, we're now ready to run the LiveIntervals analysis again.
- MF.getProperties().set(MachineFunctionProperties::Property::TracksLiveness);
-
- return Changed;
-}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp
index 71f0bd28e1be..1e2bee7a5c73 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp
@@ -72,9 +72,6 @@ bool WebAssemblyReplacePhysRegs::runOnMachineFunction(MachineFunction &MF) {
assert(!mustPreserveAnalysisID(LiveIntervalsID) &&
"LiveIntervals shouldn't be active yet!");
- // We don't preserve SSA or liveness.
- MRI.leaveSSA();
- MRI.invalidateLiveness();
for (unsigned PReg = WebAssembly::NoRegister + 1;
PReg < WebAssembly::NUM_TARGET_REGS; ++PReg) {
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp
index 16e05150c64e..74af4c8873f7 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp
@@ -44,7 +44,7 @@ SDValue WebAssemblySelectionDAGInfo::EmitTargetCodeForMemmove(
SDValue WebAssemblySelectionDAGInfo::EmitTargetCodeForMemset(
SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Val,
- SDValue Size, Align Alignment, bool IsVolatile,
+ SDValue Size, Align Alignment, bool IsVolatile, bool AlwaysInline,
MachinePointerInfo DstPtrInfo) const {
auto &ST = DAG.getMachineFunction().getSubtarget<WebAssemblySubtarget>();
if (!ST.hasBulkMemory())
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h
index f4d2132fd3af..fd517b238715 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h
@@ -37,6 +37,7 @@ public:
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &DL,
SDValue Chain, SDValue Op1, SDValue Op2,
SDValue Op3, Align Alignment, bool IsVolatile,
+ bool AlwaysInline,
MachinePointerInfo DstPtrInfo) const override;
};
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
index b553c8150652..780694980523 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
@@ -48,6 +48,7 @@ class WebAssemblySubtarget final : public WebAssemblyGenSubtargetInfo {
bool HasMutableGlobals = false;
bool HasTailCall = false;
bool HasReferenceTypes = false;
+ bool HasExtendedConst = false;
/// What processor and OS we're targeting.
Triple TargetTriple;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index 482837178f3d..76f036358ae8 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -25,11 +25,12 @@
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Scalar/LowerAtomic.h"
+#include "llvm/Transforms/Scalar/LowerAtomicPass.h"
#include "llvm/Transforms/Utils.h"
using namespace llvm;
@@ -56,13 +57,12 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeWebAssemblyTarget() {
auto &PR = *PassRegistry::getPassRegistry();
initializeWebAssemblyAddMissingPrototypesPass(PR);
initializeWebAssemblyLowerEmscriptenEHSjLjPass(PR);
- initializeLowerGlobalDtorsPass(PR);
+ initializeLowerGlobalDtorsLegacyPassPass(PR);
initializeFixFunctionBitcastsPass(PR);
initializeOptimizeReturnedPass(PR);
initializeWebAssemblyArgumentMovePass(PR);
initializeWebAssemblySetP2AlignOperandsPass(PR);
initializeWebAssemblyReplacePhysRegsPass(PR);
- initializeWebAssemblyPrepareForLiveIntervalsPass(PR);
initializeWebAssemblyOptimizeLiveIntervalsPass(PR);
initializeWebAssemblyMemIntrinsicResultsPass(PR);
initializeWebAssemblyRegStackifyPass(PR);
@@ -87,7 +87,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeWebAssemblyTarget() {
static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM,
const Triple &TT) {
- if (!RM.hasValue()) {
+ if (!RM) {
// Default to static relocation model. This should always be more optimial
// than PIC since the static linker can determine all global addresses and
// assume direct function calls.
@@ -203,11 +203,12 @@ public:
bool StrippedAtomics = false;
bool StrippedTLS = false;
- if (!Features[WebAssembly::FeatureAtomics])
+ if (!Features[WebAssembly::FeatureAtomics]) {
StrippedAtomics = stripAtomics(M);
-
- if (!Features[WebAssembly::FeatureBulkMemory])
StrippedTLS = stripThreadLocals(M);
+ } else if (!Features[WebAssembly::FeatureBulkMemory]) {
+ StrippedTLS |= stripThreadLocals(M);
+ }
if (StrippedAtomics && !StrippedTLS)
stripThreadLocals(M);
@@ -320,6 +321,7 @@ public:
FunctionPass *createTargetRegisterAllocator(bool) override;
void addIRPasses() override;
+ void addISelPrepare() override;
bool addInstSelector() override;
void addPostRegAlloc() override;
bool addGCPasses() override { return false; }
@@ -335,7 +337,7 @@ public:
} // end anonymous namespace
TargetTransformInfo
-WebAssemblyTargetMachine::getTargetTransformInfo(const Function &F) {
+WebAssemblyTargetMachine::getTargetTransformInfo(const Function &F) const {
return TargetTransformInfo(WebAssemblyTTIImpl(this, F));
}
@@ -407,17 +409,11 @@ static void basicCheckForEHAndSjLj(TargetMachine *TM) {
//===----------------------------------------------------------------------===//
void WebAssemblyPassConfig::addIRPasses() {
- // Lower atomics and TLS if necessary
- addPass(new CoalesceFeaturesAndStripAtomics(&getWebAssemblyTargetMachine()));
-
- // This is a no-op if atomics are not used in the module
- addPass(createAtomicExpandPass());
-
// Add signatures to prototype-less function declarations
addPass(createWebAssemblyAddMissingPrototypes());
// Lower .llvm.global_dtors into .llvm_global_ctors with __cxa_atexit calls.
- addPass(createWebAssemblyLowerGlobalDtors());
+ addPass(createLowerGlobalDtorsLegacyPass());
// Fix function bitcasts, as WebAssembly requires caller and callee signatures
// to match.
@@ -455,6 +451,16 @@ void WebAssemblyPassConfig::addIRPasses() {
TargetPassConfig::addIRPasses();
}
+void WebAssemblyPassConfig::addISelPrepare() {
+ // Lower atomics and TLS if necessary
+ addPass(new CoalesceFeaturesAndStripAtomics(&getWebAssemblyTargetMachine()));
+
+ // This is a no-op if atomics are not used in the module
+ addPass(createAtomicExpandPass());
+
+ TargetPassConfig::addISelPrepare();
+}
+
bool WebAssemblyPassConfig::addInstSelector() {
(void)TargetPassConfig::addInstSelector();
addPass(
@@ -517,9 +523,6 @@ void WebAssemblyPassConfig::addPreEmitPass() {
// Preparations and optimizations related to register stackification.
if (getOptLevel() != CodeGenOpt::None) {
- // LiveIntervals isn't commonly run this late. Re-establish preconditions.
- addPass(createWebAssemblyPrepareForLiveIntervals());
-
// Depend on LiveIntervals and perform some optimizations on it.
addPass(createWebAssemblyOptimizeLiveIntervals());
@@ -588,8 +591,7 @@ yaml::MachineFunctionInfo *WebAssemblyTargetMachine::convertFuncInfoToYAML(
bool WebAssemblyTargetMachine::parseMachineFunctionInfo(
const yaml::MachineFunctionInfo &MFI, PerFunctionMIParsingState &PFS,
SMDiagnostic &Error, SMRange &SourceRange) const {
- const auto &YamlMFI =
- reinterpret_cast<const yaml::WebAssemblyFunctionInfo &>(MFI);
+ const auto &YamlMFI = static_cast<const yaml::WebAssemblyFunctionInfo &>(MFI);
MachineFunction &MF = PFS.MF;
MF.getInfo<WebAssemblyFunctionInfo>()->initializeBaseYamlFields(YamlMFI);
return false;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h
index 29e968bfe8eb..5d5378f76567 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h
@@ -46,7 +46,7 @@ public:
return TLOF.get();
}
- TargetTransformInfo getTargetTransformInfo(const Function &F) override;
+ TargetTransformInfo getTargetTransformInfo(const Function &F) const override;
bool usesPhysRegsForValues() const override { return false; }
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
index f1ebcbc6fc51..62f7155e794a 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
@@ -139,3 +139,7 @@ void WebAssemblyTTIImpl::getUnrollingPreferences(
// becomes "fall through" to default value of 2.
UP.BEInsns = 2;
}
+
+bool WebAssemblyTTIImpl::supportsTailCalls() const {
+ return getST()->hasTailCall();
+}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
index 50036f7f7e98..fde58a9587b6 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
@@ -74,6 +74,8 @@ public:
bool areInlineCompatible(const Function *Caller,
const Function *Callee) const;
+
+ bool supportsTailCalls() const;
};
} // end namespace llvm
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index e9ecff3bf514..871b23f80efe 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -9,6 +9,7 @@
#include "MCTargetDesc/X86BaseInfo.h"
#include "MCTargetDesc/X86IntelInstPrinter.h"
#include "MCTargetDesc/X86MCExpr.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
#include "MCTargetDesc/X86TargetStreamer.h"
#include "TargetInfo/X86TargetInfo.h"
#include "X86AsmParserCommon.h"
@@ -124,12 +125,12 @@ private:
bool matchingInlineAsm, unsigned VariantID = 0) {
// In Code16GCC mode, match as 32-bit.
if (Code16GCC)
- SwitchMode(X86::Mode32Bit);
+ SwitchMode(X86::Is32Bit);
unsigned rv = MatchInstructionImpl(Operands, Inst, ErrorInfo,
MissingFeatures, matchingInlineAsm,
VariantID);
if (Code16GCC)
- SwitchMode(X86::Mode16Bit);
+ SwitchMode(X86::Is16Bit);
return rv;
}
@@ -422,16 +423,18 @@ private:
};
class IntelExprStateMachine {
- IntelExprState State, PrevState;
- unsigned BaseReg, IndexReg, TmpReg, Scale;
- int64_t Imm;
- const MCExpr *Sym;
+ IntelExprState State = IES_INIT, PrevState = IES_ERROR;
+ unsigned BaseReg = 0, IndexReg = 0, TmpReg = 0, Scale = 0;
+ int64_t Imm = 0;
+ const MCExpr *Sym = nullptr;
StringRef SymName;
InfixCalculator IC;
InlineAsmIdentifierInfo Info;
- short BracCount;
- bool MemExpr;
- bool OffsetOperator;
+ short BracCount = 0;
+ bool MemExpr = false;
+ bool OffsetOperator = false;
+ bool AttachToOperandIdx = false;
+ bool IsPIC = false;
SMLoc OffsetOperatorLoc;
AsmTypeInfo CurType;
@@ -446,10 +449,7 @@ private:
}
public:
- IntelExprStateMachine()
- : State(IES_INIT), PrevState(IES_ERROR), BaseReg(0), IndexReg(0),
- TmpReg(0), Scale(0), Imm(0), Sym(nullptr), BracCount(0),
- MemExpr(false), OffsetOperator(false) {}
+ IntelExprStateMachine() = default;
void addImm(int64_t imm) { Imm += imm; }
short getBracCount() const { return BracCount; }
@@ -469,9 +469,29 @@ private:
bool isValidEndState() const {
return State == IES_RBRAC || State == IES_INTEGER;
}
+
+ // Is the intel expression appended after an operand index.
+ // [OperandIdx][Intel Expression]
+ // This is neccessary for checking if it is an independent
+ // intel expression at back end when parse inline asm.
+ void setAppendAfterOperand() { AttachToOperandIdx = true; }
+
+ bool isPIC() const { return IsPIC; }
+ void setPIC() { IsPIC = true; }
+
bool hadError() const { return State == IES_ERROR; }
const InlineAsmIdentifierInfo &getIdentifierInfo() const { return Info; }
+ bool regsUseUpError(StringRef &ErrMsg) {
+ // This case mostly happen in inline asm, e.g. Arr[BaseReg + IndexReg]
+ // can not intruduce additional register in inline asm in PIC model.
+ if (IsPIC && AttachToOperandIdx)
+ ErrMsg = "Don't use 2 or more regs for mem offset in PIC model!";
+ else
+ ErrMsg = "BaseReg/IndexReg already set!";
+ return true;
+ }
+
void onOr() {
IntelExprState CurrState = State;
switch (State) {
@@ -655,10 +675,8 @@ private:
if (!BaseReg) {
BaseReg = TmpReg;
} else {
- if (IndexReg) {
- ErrMsg = "BaseReg/IndexReg already set!";
- return true;
- }
+ if (IndexReg)
+ return regsUseUpError(ErrMsg);
IndexReg = TmpReg;
Scale = 0;
}
@@ -716,10 +734,8 @@ private:
if (!BaseReg) {
BaseReg = TmpReg;
} else {
- if (IndexReg) {
- ErrMsg = "BaseReg/IndexReg already set!";
- return true;
- }
+ if (IndexReg)
+ return regsUseUpError(ErrMsg);
IndexReg = TmpReg;
Scale = 0;
}
@@ -777,10 +793,8 @@ private:
case IES_MULTIPLY:
// Index Register - Scale * Register
if (PrevState == IES_INTEGER) {
- if (IndexReg) {
- ErrMsg = "BaseReg/IndexReg already set!";
- return true;
- }
+ if (IndexReg)
+ return regsUseUpError(ErrMsg);
State = IES_REGISTER;
IndexReg = Reg;
// Get the scale and replace the 'Scale * Register' with '0'.
@@ -861,10 +875,8 @@ private:
State = IES_INTEGER;
if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
// Index Register - Register * Scale
- if (IndexReg) {
- ErrMsg = "BaseReg/IndexReg already set!";
- return true;
- }
+ if (IndexReg)
+ return regsUseUpError(ErrMsg);
IndexReg = TmpReg;
Scale = TmpInt;
if (checkScale(Scale, ErrMsg))
@@ -945,7 +957,7 @@ private:
BracCount++;
return false;
}
- bool onRBrac() {
+ bool onRBrac(StringRef &ErrMsg) {
IntelExprState CurrState = State;
switch (State) {
default:
@@ -955,8 +967,10 @@ private:
case IES_OFFSET:
case IES_REGISTER:
case IES_RPAREN:
- if (BracCount-- != 1)
+ if (BracCount-- != 1) {
+ ErrMsg = "unexpected bracket encountered";
return true;
+ }
State = IES_RBRAC;
if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
// If we already have a BaseReg, then assume this is the IndexReg with
@@ -964,7 +978,8 @@ private:
if (!BaseReg) {
BaseReg = TmpReg;
} else {
- assert (!IndexReg && "BaseReg/IndexReg already set!");
+ if (IndexReg)
+ return regsUseUpError(ErrMsg);
IndexReg = TmpReg;
Scale = 0;
}
@@ -1089,9 +1104,9 @@ private:
std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst);
bool VerifyAndAdjustOperands(OperandVector &OrigOperands,
OperandVector &FinalOperands);
- bool ParseOperand(OperandVector &Operands);
- bool ParseATTOperand(OperandVector &Operands);
- bool ParseIntelOperand(OperandVector &Operands);
+ bool parseOperand(OperandVector &Operands, StringRef Name);
+ bool parseATTOperand(OperandVector &Operands);
+ bool parseIntelOperand(OperandVector &Operands, StringRef Name);
bool ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID,
InlineAsmIdentifierInfo &Info, SMLoc &End);
bool ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End);
@@ -1111,6 +1126,8 @@ private:
InlineAsmIdentifierInfo &Info,
bool IsUnevaluatedOperand, SMLoc &End,
bool IsParsingOffsetOperator = false);
+ void tryParseOperandIdx(AsmToken::TokenKind PrevTK,
+ IntelExprStateMachine &SM);
bool ParseMemOperand(unsigned SegReg, const MCExpr *Disp, SMLoc StartLoc,
SMLoc EndLoc, OperandVector &Operands);
@@ -1193,19 +1210,19 @@ private:
bool is64BitMode() const {
// FIXME: Can tablegen auto-generate this?
- return getSTI().getFeatureBits()[X86::Mode64Bit];
+ return getSTI().getFeatureBits()[X86::Is64Bit];
}
bool is32BitMode() const {
// FIXME: Can tablegen auto-generate this?
- return getSTI().getFeatureBits()[X86::Mode32Bit];
+ return getSTI().getFeatureBits()[X86::Is32Bit];
}
bool is16BitMode() const {
// FIXME: Can tablegen auto-generate this?
- return getSTI().getFeatureBits()[X86::Mode16Bit];
+ return getSTI().getFeatureBits()[X86::Is16Bit];
}
void SwitchMode(unsigned mode) {
MCSubtargetInfo &STI = copySTI();
- FeatureBitset AllModes({X86::Mode64Bit, X86::Mode32Bit, X86::Mode16Bit});
+ FeatureBitset AllModes({X86::Is64Bit, X86::Is32Bit, X86::Is16Bit});
FeatureBitset OldMode = STI.getFeatureBits() & AllModes;
FeatureBitset FB = ComputeAvailableFeatures(
STI.ToggleFeature(OldMode.flip(mode)));
@@ -1716,11 +1733,11 @@ bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands,
return false;
}
-bool X86AsmParser::ParseOperand(OperandVector &Operands) {
+bool X86AsmParser::parseOperand(OperandVector &Operands, StringRef Name) {
if (isParsingIntelSyntax())
- return ParseIntelOperand(Operands);
+ return parseIntelOperand(Operands, Name);
- return ParseATTOperand(Operands);
+ return parseATTOperand(Operands);
}
bool X86AsmParser::CreateMemForMSInlineAsm(
@@ -1759,8 +1776,8 @@ bool X86AsmParser::CreateMemForMSInlineAsm(
// registers in a mmory expression, and though unaccessible via rip/eip.
if (IsGlobalLV && (BaseReg || IndexReg)) {
Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start,
- End, Size, Identifier, Decl,
- FrontendSize));
+ End, Size, Identifier, Decl, 0,
+ BaseReg && IndexReg));
return false;
}
// Otherwise, we set the base register to a non-zero value
@@ -1841,11 +1858,25 @@ bool X86AsmParser::ParseMasmNamedOperator(StringRef Name,
return true;
}
+// Check if current intel expression append after an operand.
+// Like: [Operand][Intel Expression]
+void X86AsmParser::tryParseOperandIdx(AsmToken::TokenKind PrevTK,
+ IntelExprStateMachine &SM) {
+ if (PrevTK != AsmToken::RBrac)
+ return;
+
+ SM.setAppendAfterOperand();
+}
+
bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
MCAsmParser &Parser = getParser();
StringRef ErrMsg;
AsmToken::TokenKind PrevTK = AsmToken::Error;
+
+ if (getContext().getObjectFileInfo()->isPositionIndependent())
+ SM.setPIC();
+
bool Done = false;
while (!Done) {
// Get a fresh reference on each loop iteration in case the previous
@@ -2123,10 +2154,12 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
case AsmToken::LBrac:
if (SM.onLBrac())
return Error(Tok.getLoc(), "unexpected bracket encountered");
+ tryParseOperandIdx(PrevTK, SM);
break;
case AsmToken::RBrac:
- if (SM.onRBrac())
- return Error(Tok.getLoc(), "unexpected bracket encountered");
+ if (SM.onRBrac(ErrMsg)) {
+ return Error(Tok.getLoc(), ErrMsg);
+ }
break;
case AsmToken::LParen: SM.onLParen(); break;
case AsmToken::RParen: SM.onRParen(); break;
@@ -2477,7 +2510,7 @@ bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) {
return false;
}
-bool X86AsmParser::ParseIntelOperand(OperandVector &Operands) {
+bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) {
MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
SMLoc Start, End;
@@ -2552,6 +2585,8 @@ bool X86AsmParser::ParseIntelOperand(OperandVector &Operands) {
StringRef ErrMsg;
unsigned BaseReg = SM.getBaseReg();
unsigned IndexReg = SM.getIndexReg();
+ if (IndexReg && BaseReg == X86::RIP)
+ BaseReg = 0;
unsigned Scale = SM.getScale();
if (!PtrInOperand)
Size = SM.getElementSize() << 3;
@@ -2597,25 +2632,49 @@ bool X86AsmParser::ParseIntelOperand(OperandVector &Operands) {
// When parsing x64 MS-style assembly, all non-absolute references to a named
// variable default to RIP-relative.
- if (Parser.isParsingMasm() && is64BitMode() && SM.getElementSize() > 0) {
- Operands.push_back(X86Operand::CreateMem(getPointerWidth(), RegNo, Disp,
- BaseReg, IndexReg, Scale, Start,
- End, Size,
- /*DefaultBaseReg=*/X86::RIP));
- return false;
+ unsigned DefaultBaseReg = X86::NoRegister;
+ bool MaybeDirectBranchDest = true;
+
+ if (Parser.isParsingMasm()) {
+ bool IsUnconditionalBranch =
+ Name.equals_insensitive("jmp") || Name.equals_insensitive("call");
+ if (is64BitMode() && SM.getElementSize() > 0) {
+ DefaultBaseReg = X86::RIP;
+ }
+ if (IsUnconditionalBranch) {
+ if (PtrInOperand) {
+ MaybeDirectBranchDest = false;
+ if (is64BitMode())
+ DefaultBaseReg = X86::RIP;
+ } else if (!BaseReg && !IndexReg && Disp &&
+ Disp->getKind() == MCExpr::SymbolRef) {
+ if (is64BitMode()) {
+ if (SM.getSize() == 8) {
+ MaybeDirectBranchDest = false;
+ DefaultBaseReg = X86::RIP;
+ }
+ } else {
+ if (SM.getSize() == 4 || SM.getSize() == 2)
+ MaybeDirectBranchDest = false;
+ }
+ }
+ }
}
- if ((BaseReg || IndexReg || RegNo))
- Operands.push_back(X86Operand::CreateMem(getPointerWidth(), RegNo, Disp,
- BaseReg, IndexReg, Scale, Start,
- End, Size));
+ if ((BaseReg || IndexReg || RegNo || DefaultBaseReg != X86::NoRegister))
+ Operands.push_back(X86Operand::CreateMem(
+ getPointerWidth(), RegNo, Disp, BaseReg, IndexReg, Scale, Start, End,
+ Size, DefaultBaseReg, /*SymName=*/StringRef(), /*OpDecl=*/nullptr,
+ /*FrontendSize=*/0, /*UseUpRegs=*/false, MaybeDirectBranchDest));
else
- Operands.push_back(
- X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size));
+ Operands.push_back(X86Operand::CreateMem(
+ getPointerWidth(), Disp, Start, End, Size, /*SymName=*/StringRef(),
+ /*OpDecl=*/nullptr, /*FrontendSize=*/0, /*UseUpRegs=*/false,
+ MaybeDirectBranchDest));
return false;
}
-bool X86AsmParser::ParseATTOperand(OperandVector &Operands) {
+bool X86AsmParser::parseATTOperand(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
switch (getLexer().getKind()) {
case AsmToken::Dollar: {
@@ -2722,7 +2781,7 @@ bool X86AsmParser::ParseZ(std::unique_ptr<X86Operand> &Z,
if (!getLexer().is(AsmToken::RCurly))
return Error(getLexer().getLoc(), "Expected } at this point");
Parser.Lex(); // Eat '}'
- // Assign Z with the {z} mark opernad
+ // Assign Z with the {z} mark operand
Z = X86Operand::CreateToken("{z}", StartLoc);
return false;
}
@@ -3346,7 +3405,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
Name = Next;
PatchedName = Name;
- ForcedDataPrefix = X86::Mode32Bit;
+ ForcedDataPrefix = X86::Is32Bit;
IsPrefix = false;
}
}
@@ -3371,7 +3430,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// Read the operands.
while (true) {
- if (ParseOperand(Operands))
+ if (parseOperand(Operands, Name))
return true;
if (HandleAVX512Operand(Operands))
return true;
@@ -3774,84 +3833,27 @@ bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
}
bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
+ using namespace X86;
const MCRegisterInfo *MRI = getContext().getRegisterInfo();
-
- switch (Inst.getOpcode()) {
- case X86::VGATHERDPDYrm:
- case X86::VGATHERDPDrm:
- case X86::VGATHERDPSYrm:
- case X86::VGATHERDPSrm:
- case X86::VGATHERQPDYrm:
- case X86::VGATHERQPDrm:
- case X86::VGATHERQPSYrm:
- case X86::VGATHERQPSrm:
- case X86::VPGATHERDDYrm:
- case X86::VPGATHERDDrm:
- case X86::VPGATHERDQYrm:
- case X86::VPGATHERDQrm:
- case X86::VPGATHERQDYrm:
- case X86::VPGATHERQDrm:
- case X86::VPGATHERQQYrm:
- case X86::VPGATHERQQrm: {
- unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg());
- unsigned Mask = MRI->getEncodingValue(Inst.getOperand(1).getReg());
- unsigned Index =
- MRI->getEncodingValue(Inst.getOperand(3 + X86::AddrIndexReg).getReg());
- if (Dest == Mask || Dest == Index || Mask == Index)
- return Warning(Ops[0]->getStartLoc(), "mask, index, and destination "
- "registers should be distinct");
- break;
- }
- case X86::VGATHERDPDZ128rm:
- case X86::VGATHERDPDZ256rm:
- case X86::VGATHERDPDZrm:
- case X86::VGATHERDPSZ128rm:
- case X86::VGATHERDPSZ256rm:
- case X86::VGATHERDPSZrm:
- case X86::VGATHERQPDZ128rm:
- case X86::VGATHERQPDZ256rm:
- case X86::VGATHERQPDZrm:
- case X86::VGATHERQPSZ128rm:
- case X86::VGATHERQPSZ256rm:
- case X86::VGATHERQPSZrm:
- case X86::VPGATHERDDZ128rm:
- case X86::VPGATHERDDZ256rm:
- case X86::VPGATHERDDZrm:
- case X86::VPGATHERDQZ128rm:
- case X86::VPGATHERDQZ256rm:
- case X86::VPGATHERDQZrm:
- case X86::VPGATHERQDZ128rm:
- case X86::VPGATHERQDZ256rm:
- case X86::VPGATHERQDZrm:
- case X86::VPGATHERQQZ128rm:
- case X86::VPGATHERQQZ256rm:
- case X86::VPGATHERQQZrm: {
- unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg());
- unsigned Index =
- MRI->getEncodingValue(Inst.getOperand(4 + X86::AddrIndexReg).getReg());
- if (Dest == Index)
- return Warning(Ops[0]->getStartLoc(), "index and destination registers "
- "should be distinct");
- break;
- }
- case X86::V4FMADDPSrm:
- case X86::V4FMADDPSrmk:
- case X86::V4FMADDPSrmkz:
- case X86::V4FMADDSSrm:
- case X86::V4FMADDSSrmk:
- case X86::V4FMADDSSrmkz:
- case X86::V4FNMADDPSrm:
- case X86::V4FNMADDPSrmk:
- case X86::V4FNMADDPSrmkz:
- case X86::V4FNMADDSSrm:
- case X86::V4FNMADDSSrmk:
- case X86::V4FNMADDSSrmkz:
- case X86::VP4DPWSSDSrm:
- case X86::VP4DPWSSDSrmk:
- case X86::VP4DPWSSDSrmkz:
- case X86::VP4DPWSSDrm:
- case X86::VP4DPWSSDrmk:
- case X86::VP4DPWSSDrmkz: {
+ unsigned Opcode = Inst.getOpcode();
+ uint64_t TSFlags = MII.get(Opcode).TSFlags;
+ if (isVFCMADDCPH(Opcode) || isVFCMADDCSH(Opcode) || isVFMADDCPH(Opcode) ||
+ isVFMADDCSH(Opcode)) {
+ unsigned Dest = Inst.getOperand(0).getReg();
+ for (unsigned i = 2; i < Inst.getNumOperands(); i++)
+ if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg())
+ return Warning(Ops[0]->getStartLoc(), "Destination register should be "
+ "distinct from source registers");
+ } else if (isVFCMULCPH(Opcode) || isVFCMULCSH(Opcode) || isVFMULCPH(Opcode) ||
+ isVFMULCSH(Opcode)) {
+ unsigned Dest = Inst.getOperand(0).getReg();
+ for (unsigned i = 1; i < Inst.getNumOperands(); i++)
+ if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg())
+ return Warning(Ops[0]->getStartLoc(), "Destination register should be "
+ "distinct from source registers");
+ } else if (isV4FMADDPS(Opcode) || isV4FMADDSS(Opcode) ||
+ isV4FNMADDPS(Opcode) || isV4FNMADDSS(Opcode) ||
+ isVP4DPWSSDS(Opcode) || isVP4DPWSSD(Opcode)) {
unsigned Src2 = Inst.getOperand(Inst.getNumOperands() -
X86::AddrNumOperands - 1).getReg();
unsigned Src2Enc = MRI->getEncodingValue(Src2);
@@ -3865,186 +3867,34 @@ bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
RegName.take_front(3) + Twine(GroupEnd) +
"' source group");
}
- break;
- }
- case X86::VFCMADDCPHZ128m:
- case X86::VFCMADDCPHZ256m:
- case X86::VFCMADDCPHZm:
- case X86::VFCMADDCPHZ128mb:
- case X86::VFCMADDCPHZ256mb:
- case X86::VFCMADDCPHZmb:
- case X86::VFCMADDCPHZ128mbk:
- case X86::VFCMADDCPHZ256mbk:
- case X86::VFCMADDCPHZmbk:
- case X86::VFCMADDCPHZ128mbkz:
- case X86::VFCMADDCPHZ256mbkz:
- case X86::VFCMADDCPHZmbkz:
- case X86::VFCMADDCPHZ128mk:
- case X86::VFCMADDCPHZ256mk:
- case X86::VFCMADDCPHZmk:
- case X86::VFCMADDCPHZ128mkz:
- case X86::VFCMADDCPHZ256mkz:
- case X86::VFCMADDCPHZmkz:
- case X86::VFCMADDCPHZ128r:
- case X86::VFCMADDCPHZ256r:
- case X86::VFCMADDCPHZr:
- case X86::VFCMADDCPHZ128rk:
- case X86::VFCMADDCPHZ256rk:
- case X86::VFCMADDCPHZrk:
- case X86::VFCMADDCPHZ128rkz:
- case X86::VFCMADDCPHZ256rkz:
- case X86::VFCMADDCPHZrkz:
- case X86::VFCMADDCPHZrb:
- case X86::VFCMADDCPHZrbk:
- case X86::VFCMADDCPHZrbkz:
- case X86::VFCMADDCSHZm:
- case X86::VFCMADDCSHZmk:
- case X86::VFCMADDCSHZmkz:
- case X86::VFCMADDCSHZr:
- case X86::VFCMADDCSHZrb:
- case X86::VFCMADDCSHZrbk:
- case X86::VFCMADDCSHZrbkz:
- case X86::VFCMADDCSHZrk:
- case X86::VFCMADDCSHZrkz:
- case X86::VFMADDCPHZ128m:
- case X86::VFMADDCPHZ256m:
- case X86::VFMADDCPHZm:
- case X86::VFMADDCPHZ128mb:
- case X86::VFMADDCPHZ256mb:
- case X86::VFMADDCPHZmb:
- case X86::VFMADDCPHZ128mbk:
- case X86::VFMADDCPHZ256mbk:
- case X86::VFMADDCPHZmbk:
- case X86::VFMADDCPHZ128mbkz:
- case X86::VFMADDCPHZ256mbkz:
- case X86::VFMADDCPHZmbkz:
- case X86::VFMADDCPHZ128mk:
- case X86::VFMADDCPHZ256mk:
- case X86::VFMADDCPHZmk:
- case X86::VFMADDCPHZ128mkz:
- case X86::VFMADDCPHZ256mkz:
- case X86::VFMADDCPHZmkz:
- case X86::VFMADDCPHZ128r:
- case X86::VFMADDCPHZ256r:
- case X86::VFMADDCPHZr:
- case X86::VFMADDCPHZ128rk:
- case X86::VFMADDCPHZ256rk:
- case X86::VFMADDCPHZrk:
- case X86::VFMADDCPHZ128rkz:
- case X86::VFMADDCPHZ256rkz:
- case X86::VFMADDCPHZrkz:
- case X86::VFMADDCPHZrb:
- case X86::VFMADDCPHZrbk:
- case X86::VFMADDCPHZrbkz:
- case X86::VFMADDCSHZm:
- case X86::VFMADDCSHZmk:
- case X86::VFMADDCSHZmkz:
- case X86::VFMADDCSHZr:
- case X86::VFMADDCSHZrb:
- case X86::VFMADDCSHZrbk:
- case X86::VFMADDCSHZrbkz:
- case X86::VFMADDCSHZrk:
- case X86::VFMADDCSHZrkz: {
- unsigned Dest = Inst.getOperand(0).getReg();
- for (unsigned i = 2; i < Inst.getNumOperands(); i++)
- if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg())
- return Warning(Ops[0]->getStartLoc(), "Destination register should be "
- "distinct from source registers");
- break;
- }
- case X86::VFCMULCPHZ128rm:
- case X86::VFCMULCPHZ256rm:
- case X86::VFCMULCPHZrm:
- case X86::VFCMULCPHZ128rmb:
- case X86::VFCMULCPHZ256rmb:
- case X86::VFCMULCPHZrmb:
- case X86::VFCMULCPHZ128rmbk:
- case X86::VFCMULCPHZ256rmbk:
- case X86::VFCMULCPHZrmbk:
- case X86::VFCMULCPHZ128rmbkz:
- case X86::VFCMULCPHZ256rmbkz:
- case X86::VFCMULCPHZrmbkz:
- case X86::VFCMULCPHZ128rmk:
- case X86::VFCMULCPHZ256rmk:
- case X86::VFCMULCPHZrmk:
- case X86::VFCMULCPHZ128rmkz:
- case X86::VFCMULCPHZ256rmkz:
- case X86::VFCMULCPHZrmkz:
- case X86::VFCMULCPHZ128rr:
- case X86::VFCMULCPHZ256rr:
- case X86::VFCMULCPHZrr:
- case X86::VFCMULCPHZ128rrk:
- case X86::VFCMULCPHZ256rrk:
- case X86::VFCMULCPHZrrk:
- case X86::VFCMULCPHZ128rrkz:
- case X86::VFCMULCPHZ256rrkz:
- case X86::VFCMULCPHZrrkz:
- case X86::VFCMULCPHZrrb:
- case X86::VFCMULCPHZrrbk:
- case X86::VFCMULCPHZrrbkz:
- case X86::VFCMULCSHZrm:
- case X86::VFCMULCSHZrmk:
- case X86::VFCMULCSHZrmkz:
- case X86::VFCMULCSHZrr:
- case X86::VFCMULCSHZrrb:
- case X86::VFCMULCSHZrrbk:
- case X86::VFCMULCSHZrrbkz:
- case X86::VFCMULCSHZrrk:
- case X86::VFCMULCSHZrrkz:
- case X86::VFMULCPHZ128rm:
- case X86::VFMULCPHZ256rm:
- case X86::VFMULCPHZrm:
- case X86::VFMULCPHZ128rmb:
- case X86::VFMULCPHZ256rmb:
- case X86::VFMULCPHZrmb:
- case X86::VFMULCPHZ128rmbk:
- case X86::VFMULCPHZ256rmbk:
- case X86::VFMULCPHZrmbk:
- case X86::VFMULCPHZ128rmbkz:
- case X86::VFMULCPHZ256rmbkz:
- case X86::VFMULCPHZrmbkz:
- case X86::VFMULCPHZ128rmk:
- case X86::VFMULCPHZ256rmk:
- case X86::VFMULCPHZrmk:
- case X86::VFMULCPHZ128rmkz:
- case X86::VFMULCPHZ256rmkz:
- case X86::VFMULCPHZrmkz:
- case X86::VFMULCPHZ128rr:
- case X86::VFMULCPHZ256rr:
- case X86::VFMULCPHZrr:
- case X86::VFMULCPHZ128rrk:
- case X86::VFMULCPHZ256rrk:
- case X86::VFMULCPHZrrk:
- case X86::VFMULCPHZ128rrkz:
- case X86::VFMULCPHZ256rrkz:
- case X86::VFMULCPHZrrkz:
- case X86::VFMULCPHZrrb:
- case X86::VFMULCPHZrrbk:
- case X86::VFMULCPHZrrbkz:
- case X86::VFMULCSHZrm:
- case X86::VFMULCSHZrmk:
- case X86::VFMULCSHZrmkz:
- case X86::VFMULCSHZrr:
- case X86::VFMULCSHZrrb:
- case X86::VFMULCSHZrrbk:
- case X86::VFMULCSHZrrbkz:
- case X86::VFMULCSHZrrk:
- case X86::VFMULCSHZrrkz: {
- unsigned Dest = Inst.getOperand(0).getReg();
- for (unsigned i = 1; i < Inst.getNumOperands(); i++)
- if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg())
- return Warning(Ops[0]->getStartLoc(), "Destination register should be "
- "distinct from source registers");
- break;
- }
+ } else if (isVGATHERDPD(Opcode) || isVGATHERDPS(Opcode) ||
+ isVGATHERQPD(Opcode) || isVGATHERQPS(Opcode) ||
+ isVPGATHERDD(Opcode) || isVPGATHERDQ(Opcode) ||
+ isVPGATHERQD(Opcode) || isVPGATHERQQ(Opcode)) {
+ bool HasEVEX = (TSFlags & X86II::EncodingMask) == X86II::EVEX;
+ if (HasEVEX) {
+ unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg());
+ unsigned Index = MRI->getEncodingValue(
+ Inst.getOperand(4 + X86::AddrIndexReg).getReg());
+ if (Dest == Index)
+ return Warning(Ops[0]->getStartLoc(), "index and destination registers "
+ "should be distinct");
+ } else {
+ unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg());
+ unsigned Mask = MRI->getEncodingValue(Inst.getOperand(1).getReg());
+ unsigned Index = MRI->getEncodingValue(
+ Inst.getOperand(3 + X86::AddrIndexReg).getReg());
+ if (Dest == Mask || Dest == Index || Mask == Index)
+ return Warning(Ops[0]->getStartLoc(), "mask, index, and destination "
+ "registers should be distinct");
+ }
}
- const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
// Check that we aren't mixing AH/BH/CH/DH with REX prefix. We only need to
// check this with the legacy encoding, VEX/EVEX/XOP don't use REX.
- if ((MCID.TSFlags & X86II::EncodingMask) == 0) {
+ if ((TSFlags & X86II::EncodingMask) == 0) {
MCPhysReg HReg = X86::NoRegister;
- bool UsesRex = MCID.TSFlags & X86II::REX_W;
+ bool UsesRex = TSFlags & X86II::REX_W;
unsigned NumOps = Inst.getNumOperands();
for (unsigned i = 0; i != NumOps; ++i) {
const MCOperand &MO = Inst.getOperand(i);
@@ -4313,15 +4163,15 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
// In 16-bit mode, if data32 is specified, temporarily switch to 32-bit mode
// when matching the instruction.
- if (ForcedDataPrefix == X86::Mode32Bit)
- SwitchMode(X86::Mode32Bit);
+ if (ForcedDataPrefix == X86::Is32Bit)
+ SwitchMode(X86::Is32Bit);
// First, try a direct match.
FeatureBitset MissingFeatures;
unsigned OriginalError = MatchInstruction(Operands, Inst, ErrorInfo,
MissingFeatures, MatchingInlineAsm,
isParsingIntelSyntax());
- if (ForcedDataPrefix == X86::Mode32Bit) {
- SwitchMode(X86::Mode16Bit);
+ if (ForcedDataPrefix == X86::Is32Bit) {
+ SwitchMode(X86::Is16Bit);
ForcedDataPrefix = 0;
}
switch (OriginalError) {
@@ -4840,8 +4690,7 @@ bool X86AsmParser::parseDirectiveNops(SMLoc L) {
if (getParser().parseAbsoluteExpression(Control))
return true;
}
- if (getParser().parseToken(AsmToken::EndOfStatement,
- "unexpected token in '.nops' directive"))
+ if (getParser().parseEOL())
return true;
if (NumBytes <= 0) {
@@ -4863,7 +4712,7 @@ bool X86AsmParser::parseDirectiveNops(SMLoc L) {
/// parseDirectiveEven
/// ::= .even
bool X86AsmParser::parseDirectiveEven(SMLoc L) {
- if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive"))
+ if (parseEOL())
return false;
const MCSection *Section = getStreamer().getCurrentSectionOnly();
@@ -4871,7 +4720,7 @@ bool X86AsmParser::parseDirectiveEven(SMLoc L) {
getStreamer().initSections(false, getSTI());
Section = getStreamer().getCurrentSectionOnly();
}
- if (Section->UseCodeAlign())
+ if (Section->useCodeAlign())
getStreamer().emitCodeAlignment(2, &getSTI(), 0);
else
getStreamer().emitValueToAlignment(2, 0, 1, 0);
@@ -4886,7 +4735,7 @@ bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
if (IDVal == ".code16") {
Parser.Lex();
if (!is16BitMode()) {
- SwitchMode(X86::Mode16Bit);
+ SwitchMode(X86::Is16Bit);
getParser().getStreamer().emitAssemblerFlag(MCAF_Code16);
}
} else if (IDVal == ".code16gcc") {
@@ -4894,19 +4743,19 @@ bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
Parser.Lex();
Code16GCC = true;
if (!is16BitMode()) {
- SwitchMode(X86::Mode16Bit);
+ SwitchMode(X86::Is16Bit);
getParser().getStreamer().emitAssemblerFlag(MCAF_Code16);
}
} else if (IDVal == ".code32") {
Parser.Lex();
if (!is32BitMode()) {
- SwitchMode(X86::Mode32Bit);
+ SwitchMode(X86::Is32Bit);
getParser().getStreamer().emitAssemblerFlag(MCAF_Code32);
}
} else if (IDVal == ".code64") {
Parser.Lex();
if (!is64BitMode()) {
- SwitchMode(X86::Mode64Bit);
+ SwitchMode(X86::Is64Bit);
getParser().getStreamer().emitAssemblerFlag(MCAF_Code64);
}
} else {
@@ -5035,7 +4884,7 @@ bool X86AsmParser::parseDirectiveSEHPushReg(SMLoc Loc) {
return TokError("unexpected token in directive");
getParser().Lex();
- getStreamer().EmitWinCFIPushReg(Reg, Loc);
+ getStreamer().emitWinCFIPushReg(Reg, Loc);
return false;
}
@@ -5055,7 +4904,7 @@ bool X86AsmParser::parseDirectiveSEHSetFrame(SMLoc Loc) {
return TokError("unexpected token in directive");
getParser().Lex();
- getStreamer().EmitWinCFISetFrame(Reg, Off, Loc);
+ getStreamer().emitWinCFISetFrame(Reg, Off, Loc);
return false;
}
@@ -5075,7 +4924,7 @@ bool X86AsmParser::parseDirectiveSEHSaveReg(SMLoc Loc) {
return TokError("unexpected token in directive");
getParser().Lex();
- getStreamer().EmitWinCFISaveReg(Reg, Off, Loc);
+ getStreamer().emitWinCFISaveReg(Reg, Off, Loc);
return false;
}
@@ -5095,7 +4944,7 @@ bool X86AsmParser::parseDirectiveSEHSaveXMM(SMLoc Loc) {
return TokError("unexpected token in directive");
getParser().Lex();
- getStreamer().EmitWinCFISaveXMM(Reg, Off, Loc);
+ getStreamer().emitWinCFISaveXMM(Reg, Off, Loc);
return false;
}
@@ -5116,7 +4965,7 @@ bool X86AsmParser::parseDirectiveSEHPushFrame(SMLoc Loc) {
return TokError("unexpected token in directive");
getParser().Lex();
- getStreamer().EmitWinCFIPushFrame(Code, Loc);
+ getStreamer().emitWinCFIPushFrame(Code, Loc);
return false;
}
diff --git a/llvm/lib/Target/X86/AsmParser/X86Operand.h b/llvm/lib/Target/X86/AsmParser/X86Operand.h
index 67b1244708a8..075b800f9e20 100644
--- a/llvm/lib/Target/X86/AsmParser/X86Operand.h
+++ b/llvm/lib/Target/X86/AsmParser/X86Operand.h
@@ -17,6 +17,8 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/SMLoc.h"
#include <cassert>
@@ -35,6 +37,10 @@ struct X86Operand final : public MCParsedAsmOperand {
void *OpDecl;
bool AddressOf;
+ /// This used for inline asm which may specify base reg and index reg for
+ /// MemOp. e.g. ARR[eax + ecx*4], so no extra reg can be used for MemOp.
+ bool UseUpRegs = false;
+
struct TokOp {
const char *Data;
unsigned Length;
@@ -66,6 +72,11 @@ struct X86Operand final : public MCParsedAsmOperand {
/// If the memory operand is unsized and there are multiple instruction
/// matches, prefer the one with this size.
unsigned FrontendSize;
+
+ /// If false, then this operand must be a memory operand for an indirect
+ /// branch instruction. Otherwise, this operand may belong to either a
+ /// direct or indirect branch instruction.
+ bool MaybeDirectBranchDest;
};
union {
@@ -203,6 +214,10 @@ struct X86Operand final : public MCParsedAsmOperand {
assert(Kind == Memory && "Invalid access!");
return Mem.FrontendSize;
}
+ bool isMaybeDirectBranchDest() const {
+ assert(Kind == Memory && "Invalid access!");
+ return Mem.MaybeDirectBranchDest;
+ }
bool isToken() const override {return Kind == Token; }
@@ -285,12 +300,6 @@ struct X86Operand final : public MCParsedAsmOperand {
bool isOffsetOfLocal() const override { return isImm() && Imm.LocalRef; }
- bool isMemPlaceholder(const MCInstrDesc &Desc) const override {
- // Only MS InlineAsm uses global variables with registers rather than
- // rip/eip.
- return isMem() && !Mem.DefaultBaseReg && Mem.FrontendSize;
- }
-
bool needAddressOf() const override { return AddressOf; }
bool isMem() const override { return Kind == Memory; }
@@ -374,8 +383,9 @@ struct X86Operand final : public MCParsedAsmOperand {
bool isAbsMem() const {
return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
- !getMemIndexReg() && getMemScale() == 1;
+ !getMemIndexReg() && getMemScale() == 1 && isMaybeDirectBranchDest();
}
+
bool isAVX512RC() const{
return isImm();
}
@@ -384,6 +394,8 @@ struct X86Operand final : public MCParsedAsmOperand {
return isAbsMem() && Mem.ModeSize == 16;
}
+ bool isMemUseUpRegs() const override { return UseUpRegs; }
+
bool isSrcIdx() const {
return !getMemIndexReg() && getMemScale() == 1 &&
(getMemBaseReg() == X86::RSI || getMemBaseReg() == X86::ESI ||
@@ -669,7 +681,8 @@ struct X86Operand final : public MCParsedAsmOperand {
static std::unique_ptr<X86Operand>
CreateMem(unsigned ModeSize, const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc,
unsigned Size = 0, StringRef SymName = StringRef(),
- void *OpDecl = nullptr, unsigned FrontendSize = 0) {
+ void *OpDecl = nullptr, unsigned FrontendSize = 0,
+ bool UseUpRegs = false, bool MaybeDirectBranchDest = true) {
auto Res = std::make_unique<X86Operand>(Memory, StartLoc, EndLoc);
Res->Mem.SegReg = 0;
Res->Mem.Disp = Disp;
@@ -680,6 +693,8 @@ struct X86Operand final : public MCParsedAsmOperand {
Res->Mem.Size = Size;
Res->Mem.ModeSize = ModeSize;
Res->Mem.FrontendSize = FrontendSize;
+ Res->Mem.MaybeDirectBranchDest = MaybeDirectBranchDest;
+ Res->UseUpRegs = UseUpRegs;
Res->SymName = SymName;
Res->OpDecl = OpDecl;
Res->AddressOf = false;
@@ -693,7 +708,8 @@ struct X86Operand final : public MCParsedAsmOperand {
SMLoc EndLoc, unsigned Size = 0,
unsigned DefaultBaseReg = X86::NoRegister,
StringRef SymName = StringRef(), void *OpDecl = nullptr,
- unsigned FrontendSize = 0) {
+ unsigned FrontendSize = 0, bool UseUpRegs = false,
+ bool MaybeDirectBranchDest = true) {
// We should never just have a displacement, that should be parsed as an
// absolute memory operand.
assert((SegReg || BaseReg || IndexReg || DefaultBaseReg) &&
@@ -712,6 +728,8 @@ struct X86Operand final : public MCParsedAsmOperand {
Res->Mem.Size = Size;
Res->Mem.ModeSize = ModeSize;
Res->Mem.FrontendSize = FrontendSize;
+ Res->Mem.MaybeDirectBranchDest = MaybeDirectBranchDest;
+ Res->UseUpRegs = UseUpRegs;
Res->SymName = SymName;
Res->OpDecl = OpDecl;
Res->AddressOf = false;
diff --git a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 908eb6d1fab1..1da6bf86397e 100644
--- a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -493,16 +493,15 @@ static int readPrefixes(struct InternalInstruction *insn) {
insn->displacementSize = (insn->hasAdSize ? 2 : 4);
insn->immediateSize = (insn->hasOpSize ? 2 : 4);
} else if (insn->mode == MODE_64BIT) {
+ insn->displacementSize = 4;
if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
insn->registerSize = 8;
insn->addressSize = (insn->hasAdSize ? 4 : 8);
- insn->displacementSize = 4;
insn->immediateSize = 4;
insn->hasOpSize = false;
} else {
insn->registerSize = (insn->hasOpSize ? 2 : 4);
insn->addressSize = (insn->hasAdSize ? 4 : 8);
- insn->displacementSize = (insn->hasOpSize ? 2 : 4);
insn->immediateSize = (insn->hasOpSize ? 2 : 4);
}
}
@@ -1722,13 +1721,13 @@ X86GenericDisassembler::X86GenericDisassembler(
std::unique_ptr<const MCInstrInfo> MII)
: MCDisassembler(STI, Ctx), MII(std::move(MII)) {
const FeatureBitset &FB = STI.getFeatureBits();
- if (FB[X86::Mode16Bit]) {
+ if (FB[X86::Is16Bit]) {
fMode = MODE_16BIT;
return;
- } else if (FB[X86::Mode32Bit]) {
+ } else if (FB[X86::Is32Bit]) {
fMode = MODE_32BIT;
return;
- } else if (FB[X86::Mode64Bit]) {
+ } else if (FB[X86::Is64Bit]) {
fMode = MODE_64BIT;
return;
}
@@ -1801,46 +1800,6 @@ static void translateRegister(MCInst &mcInst, Reg reg) {
mcInst.addOperand(MCOperand::createReg(llvmRegnum));
}
-/// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the
-/// immediate Value in the MCInst.
-///
-/// @param Value - The immediate Value, has had any PC adjustment made by
-/// the caller.
-/// @param isBranch - If the instruction is a branch instruction
-/// @param Address - The starting address of the instruction
-/// @param Offset - The byte offset to this immediate in the instruction
-/// @param Width - The byte width of this immediate in the instruction
-///
-/// If the getOpInfo() function was set when setupForSymbolicDisassembly() was
-/// called then that function is called to get any symbolic information for the
-/// immediate in the instruction using the Address, Offset and Width. If that
-/// returns non-zero then the symbolic information it returns is used to create
-/// an MCExpr and that is added as an operand to the MCInst. If getOpInfo()
-/// returns zero and isBranch is true then a symbol look up for immediate Value
-/// is done and if a symbol is found an MCExpr is created with that, else
-/// an MCExpr with the immediate Value is created. This function returns true
-/// if it adds an operand to the MCInst and false otherwise.
-static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch,
- uint64_t Address, uint64_t Offset,
- uint64_t Width, MCInst &MI,
- const MCDisassembler *Dis) {
- return Dis->tryAddingSymbolicOperand(MI, Value, Address, isBranch,
- Offset, Width);
-}
-
-/// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being
-/// referenced by a load instruction with the base register that is the rip.
-/// These can often be addresses in a literal pool. The Address of the
-/// instruction and its immediate Value are used to determine the address
-/// being referenced in the literal pool entry. The SymbolLookUp call back will
-/// return a pointer to a literal 'C' string if the referenced address is an
-/// address into a section with 'C' string literals.
-static void tryAddingPcLoadReferenceComment(uint64_t Address, uint64_t Value,
- const void *Decoder) {
- const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
- Dis->tryAddingPcLoadReferenceComment(Value, Address);
-}
-
static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {
0, // SEG_OVERRIDE_NONE
X86::CS,
@@ -1914,8 +1873,7 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
uint64_t pcrel = 0;
if (type == TYPE_REL) {
isBranch = true;
- pcrel = insn.startLocation +
- insn.immediateOffset + insn.immediateSize;
+ pcrel = insn.startLocation + insn.length;
switch (operand.encoding) {
default:
break;
@@ -1990,9 +1948,9 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
break;
}
- if(!tryAddingSymbolicOperand(immediate + pcrel, isBranch, insn.startLocation,
- insn.immediateOffset, insn.immediateSize,
- mcInst, Dis))
+ if (!Dis->tryAddingSymbolicOperand(
+ mcInst, immediate + pcrel, insn.startLocation, isBranch,
+ insn.immediateOffset, insn.immediateSize, insn.length))
mcInst.addOperand(MCOperand::createImm(immediate));
if (type == TYPE_MOFFS) {
@@ -2129,11 +2087,10 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
return true;
}
if (insn.mode == MODE_64BIT){
- pcrel = insn.startLocation +
- insn.displacementOffset + insn.displacementSize;
- tryAddingPcLoadReferenceComment(insn.startLocation +
- insn.displacementOffset,
- insn.displacement + pcrel, Dis);
+ pcrel = insn.startLocation + insn.length;
+ Dis->tryAddingPcLoadReferenceComment(insn.displacement + pcrel,
+ insn.startLocation +
+ insn.displacementOffset);
// Section 2.2.1.6
baseReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIP :
X86::RIP);
@@ -2193,9 +2150,13 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
mcInst.addOperand(baseReg);
mcInst.addOperand(scaleAmount);
mcInst.addOperand(indexReg);
- if(!tryAddingSymbolicOperand(insn.displacement + pcrel, false,
- insn.startLocation, insn.displacementOffset,
- insn.displacementSize, mcInst, Dis))
+
+ const uint8_t dispSize =
+ (insn.eaDisplacement == EA_DISP_NONE) ? 0 : insn.displacementSize;
+
+ if (!Dis->tryAddingSymbolicOperand(
+ mcInst, insn.displacement + pcrel, insn.startLocation, false,
+ insn.displacementOffset, dispSize, insn.length))
mcInst.addOperand(displacement);
mcInst.addOperand(segmentReg);
return false;
diff --git a/llvm/lib/Target/X86/MCA/X86CustomBehaviour.h b/llvm/lib/Target/X86/MCA/X86CustomBehaviour.h
index 24d26751f0a1..61e1b6b27a85 100644
--- a/llvm/lib/Target/X86/MCA/X86CustomBehaviour.h
+++ b/llvm/lib/Target/X86/MCA/X86CustomBehaviour.h
@@ -35,7 +35,7 @@ public:
X86InstrPostProcess(const MCSubtargetInfo &STI, const MCInstrInfo &MCII)
: InstrPostProcess(STI, MCII) {}
- ~X86InstrPostProcess() {}
+ ~X86InstrPostProcess() = default;
void postProcessInstruction(std::unique_ptr<Instruction> &Inst,
const MCInst &MCI) override;
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
index baacf2f46183..6fd3db4515ec 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
@@ -46,7 +46,7 @@ void X86ATTInstPrinter::printInst(const MCInst *MI, uint64_t Address,
if (CommentStream)
HasCustomInstComment = EmitAnyX86InstComments(MI, *CommentStream, MII);
- printInstFlags(MI, OS);
+ printInstFlags(MI, OS, STI);
// Output CALLpcrel32 as "callq" in 64-bit mode.
// In Intel annotation it's always emitted as "call".
@@ -55,7 +55,7 @@ void X86ATTInstPrinter::printInst(const MCInst *MI, uint64_t Address,
// InstrInfo.td as soon as Requires clause is supported properly
// for InstAlias.
if (MI->getOpcode() == X86::CALLpcrel32 &&
- (STI.getFeatureBits()[X86::Mode64Bit])) {
+ (STI.getFeatureBits()[X86::Is64Bit])) {
OS << "\tcallq\t";
printPCRelImm(MI, Address, 0, OS);
}
@@ -65,8 +65,8 @@ void X86ATTInstPrinter::printInst(const MCInst *MI, uint64_t Address,
// 0x66 to be interpreted as "data16" by the asm printer.
// Thus we add an adjustment here in order to print the "right" instruction.
else if (MI->getOpcode() == X86::DATA16_PREFIX &&
- STI.getFeatureBits()[X86::Mode16Bit]) {
- OS << "\tdata32";
+ STI.getFeatureBits()[X86::Is16Bit]) {
+ OS << "\tdata32";
}
// Try to print any aliases first.
else if (!printAliasInstr(MI, Address, OS) && !printVecCompareInstr(MI, OS))
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 3df48b466d07..2d92b8d5b574 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -8,6 +8,7 @@
#include "MCTargetDesc/X86BaseInfo.h"
#include "MCTargetDesc/X86FixupKinds.h"
+#include "MCTargetDesc/X86InstrRelaxTables.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/BinaryFormat/MachO.h"
@@ -222,87 +223,7 @@ static unsigned getRelaxedOpcodeBranch(const MCInst &Inst, bool Is16BitMode) {
static unsigned getRelaxedOpcodeArith(const MCInst &Inst) {
unsigned Op = Inst.getOpcode();
- switch (Op) {
- default:
- return Op;
-
- // IMUL
- case X86::IMUL16rri8: return X86::IMUL16rri;
- case X86::IMUL16rmi8: return X86::IMUL16rmi;
- case X86::IMUL32rri8: return X86::IMUL32rri;
- case X86::IMUL32rmi8: return X86::IMUL32rmi;
- case X86::IMUL64rri8: return X86::IMUL64rri32;
- case X86::IMUL64rmi8: return X86::IMUL64rmi32;
-
- // AND
- case X86::AND16ri8: return X86::AND16ri;
- case X86::AND16mi8: return X86::AND16mi;
- case X86::AND32ri8: return X86::AND32ri;
- case X86::AND32mi8: return X86::AND32mi;
- case X86::AND64ri8: return X86::AND64ri32;
- case X86::AND64mi8: return X86::AND64mi32;
-
- // OR
- case X86::OR16ri8: return X86::OR16ri;
- case X86::OR16mi8: return X86::OR16mi;
- case X86::OR32ri8: return X86::OR32ri;
- case X86::OR32mi8: return X86::OR32mi;
- case X86::OR64ri8: return X86::OR64ri32;
- case X86::OR64mi8: return X86::OR64mi32;
-
- // XOR
- case X86::XOR16ri8: return X86::XOR16ri;
- case X86::XOR16mi8: return X86::XOR16mi;
- case X86::XOR32ri8: return X86::XOR32ri;
- case X86::XOR32mi8: return X86::XOR32mi;
- case X86::XOR64ri8: return X86::XOR64ri32;
- case X86::XOR64mi8: return X86::XOR64mi32;
-
- // ADD
- case X86::ADD16ri8: return X86::ADD16ri;
- case X86::ADD16mi8: return X86::ADD16mi;
- case X86::ADD32ri8: return X86::ADD32ri;
- case X86::ADD32mi8: return X86::ADD32mi;
- case X86::ADD64ri8: return X86::ADD64ri32;
- case X86::ADD64mi8: return X86::ADD64mi32;
-
- // ADC
- case X86::ADC16ri8: return X86::ADC16ri;
- case X86::ADC16mi8: return X86::ADC16mi;
- case X86::ADC32ri8: return X86::ADC32ri;
- case X86::ADC32mi8: return X86::ADC32mi;
- case X86::ADC64ri8: return X86::ADC64ri32;
- case X86::ADC64mi8: return X86::ADC64mi32;
-
- // SUB
- case X86::SUB16ri8: return X86::SUB16ri;
- case X86::SUB16mi8: return X86::SUB16mi;
- case X86::SUB32ri8: return X86::SUB32ri;
- case X86::SUB32mi8: return X86::SUB32mi;
- case X86::SUB64ri8: return X86::SUB64ri32;
- case X86::SUB64mi8: return X86::SUB64mi32;
-
- // SBB
- case X86::SBB16ri8: return X86::SBB16ri;
- case X86::SBB16mi8: return X86::SBB16mi;
- case X86::SBB32ri8: return X86::SBB32ri;
- case X86::SBB32mi8: return X86::SBB32mi;
- case X86::SBB64ri8: return X86::SBB64ri32;
- case X86::SBB64mi8: return X86::SBB64mi32;
-
- // CMP
- case X86::CMP16ri8: return X86::CMP16ri;
- case X86::CMP16mi8: return X86::CMP16mi;
- case X86::CMP32ri8: return X86::CMP32ri;
- case X86::CMP32mi8: return X86::CMP32mi;
- case X86::CMP64ri8: return X86::CMP64ri32;
- case X86::CMP64mi8: return X86::CMP64mi32;
-
- // PUSH
- case X86::PUSH32i8: return X86::PUSHi32;
- case X86::PUSH16i8: return X86::PUSHi16;
- case X86::PUSH64i8: return X86::PUSH64i32;
- }
+ return X86::getRelaxedOpcodeArith(Op);
}
static unsigned getRelaxedOpcode(const MCInst &Inst, bool Is16BitMode) {
@@ -372,7 +293,7 @@ static bool isFirstMacroFusibleInst(const MCInst &Inst,
/// - If the instruction has a ESP/EBP base register, use SS.
/// - Otherwise use DS.
uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const {
- assert((STI.hasFeature(X86::Mode32Bit) || STI.hasFeature(X86::Mode64Bit)) &&
+ assert((STI.hasFeature(X86::Is32Bit) || STI.hasFeature(X86::Is64Bit)) &&
"Prefixes can be added only in 32-bit or 64-bit mode.");
const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
uint64_t TSFlags = Desc.TSFlags;
@@ -413,7 +334,7 @@ uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const {
if (SegmentReg != 0)
return X86::getSegmentOverridePrefixForReg(SegmentReg);
- if (STI.hasFeature(X86::Mode64Bit))
+ if (STI.hasFeature(X86::Is64Bit))
return X86::CS_Encoding;
if (MemoryOperand >= 0) {
@@ -572,7 +493,7 @@ bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const {
return false;
// Branches only need to be aligned in 32-bit or 64-bit mode.
- if (!(STI.hasFeature(X86::Mode64Bit) || STI.hasFeature(X86::Mode32Bit)))
+ if (!(STI.hasFeature(X86::Is64Bit) || STI.hasFeature(X86::Is32Bit)))
return false;
return true;
@@ -834,7 +755,7 @@ bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
void X86AsmBackend::relaxInstruction(MCInst &Inst,
const MCSubtargetInfo &STI) const {
// The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
- bool Is16BitMode = STI.getFeatureBits()[X86::Mode16Bit];
+ bool Is16BitMode = STI.getFeatureBits()[X86::Is16Bit];
unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode);
if (RelaxedOp == Inst.getOpcode()) {
@@ -853,7 +774,7 @@ void X86AsmBackend::relaxInstruction(MCInst &Inst,
static bool isFullyRelaxed(const MCRelaxableFragment &RF) {
auto &Inst = RF.getInst();
auto &STI = *RF.getSubtargetInfo();
- bool Is16BitMode = STI.getFeatureBits()[X86::Mode16Bit];
+ bool Is16BitMode = STI.getFeatureBits()[X86::Is16Bit];
return getRelaxedOpcode(Inst, Is16BitMode) == Inst.getOpcode();
}
@@ -1077,9 +998,9 @@ void X86AsmBackend::finishLayout(MCAssembler const &Asm,
}
unsigned X86AsmBackend::getMaximumNopSize(const MCSubtargetInfo &STI) const {
- if (STI.hasFeature(X86::Mode16Bit))
+ if (STI.hasFeature(X86::Is16Bit))
return 4;
- if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Mode64Bit))
+ if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Is64Bit))
return 1;
if (STI.getFeatureBits()[X86::TuningFast7ByteNOP])
return 7;
@@ -1134,7 +1055,7 @@ bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
};
const char(*Nops)[11] =
- STI->getFeatureBits()[X86::Mode16Bit] ? Nops16Bit : Nops32Bit;
+ STI->getFeatureBits()[X86::Is16Bit] ? Nops16Bit : Nops32Bit;
uint64_t MaxNopLength = (uint64_t)getMaximumNopSize(*STI);
@@ -1449,7 +1370,6 @@ public:
unsigned InstrOffset = 0;
unsigned StackAdjust = 0;
unsigned StackSize = 0;
- unsigned NumDefCFAOffsets = 0;
int MinAbsOffset = std::numeric_limits<int>::max();
for (const MCCFIInstruction &Inst : Instrs) {
@@ -1457,7 +1377,7 @@ public:
default:
// Any other CFI directives indicate a frame that we aren't prepared
// to represent via compact unwind, so just bail out.
- return 0;
+ return CU::UNWIND_MODE_DWARF;
case MCCFIInstruction::OpDefCfaRegister: {
// Defines a frame pointer. E.g.
//
@@ -1471,7 +1391,7 @@ public:
// generate a compact unwinding representation, so bail out.
if (*MRI.getLLVMRegNum(Inst.getRegister(), true) !=
(Is64Bit ? X86::RBP : X86::EBP))
- return 0;
+ return CU::UNWIND_MODE_DWARF;
// Reset the counts.
memset(SavedRegs, 0, sizeof(SavedRegs));
@@ -1497,7 +1417,6 @@ public:
// .cfi_def_cfa_offset 80
//
StackSize = Inst.getOffset() / StackDivide;
- ++NumDefCFAOffsets;
break;
}
case MCCFIInstruction::OpOffset: {
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
index 167580ec1ed0..e78e98cfc09e 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
@@ -18,10 +18,11 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/Casting.h"
-#include <cstdint>
+#include "llvm/Support/raw_ostream.h"
#include <cassert>
+#include <cstdint>
using namespace llvm;
@@ -349,7 +350,8 @@ void X86InstPrinterCommon::printOptionalSegReg(const MCInst *MI, unsigned OpNo,
}
}
-void X86InstPrinterCommon::printInstFlags(const MCInst *MI, raw_ostream &O) {
+void X86InstPrinterCommon::printInstFlags(const MCInst *MI, raw_ostream &O,
+ const MCSubtargetInfo &STI) {
const MCInstrDesc &Desc = MII.get(MI->getOpcode());
uint64_t TSFlags = Desc.TSFlags;
unsigned Flags = MI->getFlags();
@@ -379,6 +381,20 @@ void X86InstPrinterCommon::printInstFlags(const MCInst *MI, raw_ostream &O) {
O << "\t{disp8}";
else if (Flags & X86::IP_USE_DISP32)
O << "\t{disp32}";
+
+ // Determine where the memory operand starts, if present
+ int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
+ if (MemoryOperand != -1)
+ MemoryOperand += X86II::getOperandBias(Desc);
+
+ // Address-Size override prefix
+ if (Flags & X86::IP_HAS_AD_SIZE &&
+ !X86_MC::needsAddressSizeOverride(*MI, STI, MemoryOperand, TSFlags)) {
+ if (STI.hasFeature(X86::Is16Bit) || STI.hasFeature(X86::Is64Bit))
+ O << "\taddr32\t";
+ else if (STI.hasFeature(X86::Is32Bit))
+ O << "\taddr16\t";
+ }
}
void X86InstPrinterCommon::printVKPair(const MCInst *MI, unsigned OpNo,
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h
index fd82bdcd1a23..0cb5bf014b20 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h
@@ -33,7 +33,8 @@ public:
raw_ostream &O);
protected:
- void printInstFlags(const MCInst *MI, raw_ostream &O);
+ void printInstFlags(const MCInst *MI, raw_ostream &O,
+ const MCSubtargetInfo &STI);
void printOptionalSegReg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printVKPair(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
};
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstrRelaxTables.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstrRelaxTables.cpp
new file mode 100644
index 000000000000..901082ce6cf3
--- /dev/null
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstrRelaxTables.cpp
@@ -0,0 +1,165 @@
+//===- X86InstrRelaxTables.cpp - X86 Instruction Relaxation Tables -*- C++ -*-//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 instruction relaxation tables.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86InstrRelaxTables.h"
+#include "X86InstrInfo.h"
+#include "llvm/ADT/STLExtras.h"
+
+using namespace llvm;
+
+// These tables are sorted by their ShortOp value allowing them to be binary
+// searched at runtime without the need for additional storage. The enum values
+// are currently emitted in X86GenInstrInfo.inc in alphabetical order. Which
+// makes sorting these tables a simple matter of alphabetizing the table.
+static const X86InstrRelaxTableEntry InstrRelaxTable[] = {
+ // ADC
+ { X86::ADC16mi8, X86::ADC16mi },
+ { X86::ADC16ri8, X86::ADC16ri },
+ { X86::ADC32mi8, X86::ADC32mi },
+ { X86::ADC32ri8, X86::ADC32ri },
+ { X86::ADC64mi8, X86::ADC64mi32 },
+ { X86::ADC64ri8, X86::ADC64ri32 },
+ // ADD
+ { X86::ADD16mi8, X86::ADD16mi },
+ { X86::ADD16ri8, X86::ADD16ri },
+ { X86::ADD32mi8, X86::ADD32mi },
+ { X86::ADD32ri8, X86::ADD32ri },
+ { X86::ADD64mi8, X86::ADD64mi32 },
+ { X86::ADD64ri8, X86::ADD64ri32 },
+ // AND
+ { X86::AND16mi8, X86::AND16mi },
+ { X86::AND16ri8, X86::AND16ri },
+ { X86::AND32mi8, X86::AND32mi },
+ { X86::AND32ri8, X86::AND32ri },
+ { X86::AND64mi8, X86::AND64mi32 },
+ { X86::AND64ri8, X86::AND64ri32 },
+ // CMP
+ { X86::CMP16mi8, X86::CMP16mi },
+ { X86::CMP16ri8, X86::CMP16ri },
+ { X86::CMP32mi8, X86::CMP32mi },
+ { X86::CMP32ri8, X86::CMP32ri },
+ { X86::CMP64mi8, X86::CMP64mi32 },
+ { X86::CMP64ri8, X86::CMP64ri32 },
+ // IMUL
+ { X86::IMUL16rmi8, X86::IMUL16rmi },
+ { X86::IMUL16rri8, X86::IMUL16rri },
+ { X86::IMUL32rmi8, X86::IMUL32rmi },
+ { X86::IMUL32rri8, X86::IMUL32rri },
+ { X86::IMUL64rmi8, X86::IMUL64rmi32 },
+ { X86::IMUL64rri8, X86::IMUL64rri32 },
+ // OR
+ { X86::OR16mi8, X86::OR16mi },
+ { X86::OR16ri8, X86::OR16ri },
+ { X86::OR32mi8, X86::OR32mi },
+ { X86::OR32ri8, X86::OR32ri },
+ { X86::OR64mi8, X86::OR64mi32 },
+ { X86::OR64ri8, X86::OR64ri32 },
+ // PUSH
+ { X86::PUSH16i8, X86::PUSHi16 },
+ { X86::PUSH32i8, X86::PUSHi32 },
+ { X86::PUSH64i8, X86::PUSH64i32 },
+ // SBB
+ { X86::SBB16mi8, X86::SBB16mi },
+ { X86::SBB16ri8, X86::SBB16ri },
+ { X86::SBB32mi8, X86::SBB32mi },
+ { X86::SBB32ri8, X86::SBB32ri },
+ { X86::SBB64mi8, X86::SBB64mi32 },
+ { X86::SBB64ri8, X86::SBB64ri32 },
+ // SUB
+ { X86::SUB16mi8, X86::SUB16mi },
+ { X86::SUB16ri8, X86::SUB16ri },
+ { X86::SUB32mi8, X86::SUB32mi },
+ { X86::SUB32ri8, X86::SUB32ri },
+ { X86::SUB64mi8, X86::SUB64mi32 },
+ { X86::SUB64ri8, X86::SUB64ri32 },
+ // XOR
+ { X86::XOR16mi8, X86::XOR16mi },
+ { X86::XOR16ri8, X86::XOR16ri },
+ { X86::XOR32mi8, X86::XOR32mi },
+ { X86::XOR32ri8, X86::XOR32ri },
+ { X86::XOR64mi8, X86::XOR64mi32 },
+ { X86::XOR64ri8, X86::XOR64ri32 },
+};
+
+static const X86InstrRelaxTableEntry *
+lookupRelaxTableImpl(ArrayRef<X86InstrRelaxTableEntry> Table,
+ unsigned ShortOp) {
+#ifndef NDEBUG
+ // Make sure the tables are sorted.
+ static std::atomic<bool> RelaxTableChecked(false);
+ if (!RelaxTableChecked.load(std::memory_order_relaxed)) {
+ assert(llvm::is_sorted(InstrRelaxTable) &&
+ std::adjacent_find(std::begin(InstrRelaxTable),
+ std::end(InstrRelaxTable)) ==
+ std::end(InstrRelaxTable) &&
+ "InstrRelaxTable is not sorted and unique!");
+ RelaxTableChecked.store(true, std::memory_order_relaxed);
+ }
+#endif
+
+ const X86InstrRelaxTableEntry *Data = llvm::lower_bound(Table, ShortOp);
+ if (Data != Table.end() && Data->KeyOp == ShortOp)
+ return Data;
+ return nullptr;
+}
+
+const X86InstrRelaxTableEntry *llvm::lookupRelaxTable(unsigned ShortOp) {
+ return lookupRelaxTableImpl(InstrRelaxTable, ShortOp);
+}
+
+namespace {
+
+// This class stores the short form tables. It is instantiated as a
+// ManagedStatic to lazily init the short form table.
+struct X86ShortFormTable {
+ // Stores relaxation table entries sorted by relaxed form opcode.
+ SmallVector<X86InstrRelaxTableEntry, 0> Table;
+
+ X86ShortFormTable() {
+ for (const X86InstrRelaxTableEntry &Entry : InstrRelaxTable)
+ Table.push_back({Entry.DstOp, Entry.KeyOp});
+
+ llvm::sort(Table);
+
+ // Now that it's sorted, ensure its unique.
+ assert(std::adjacent_find(Table.begin(), Table.end()) == Table.end() &&
+ "Short form table is not unique!");
+ }
+};
+} // namespace
+
+static ManagedStatic<X86ShortFormTable> ShortTable;
+
+const X86InstrRelaxTableEntry *llvm::lookupShortTable(unsigned RelaxOp) {
+ auto &Table = ShortTable->Table;
+ auto I = llvm::lower_bound(Table, RelaxOp);
+ if (I != Table.end() && I->KeyOp == RelaxOp)
+ return &*I;
+ return nullptr;
+}
+
+namespace llvm {
+
+/// Get the short instruction opcode for a given relaxed opcode.
+unsigned X86::getShortOpcodeArith(unsigned RelaxOp) {
+ if (const X86InstrRelaxTableEntry *I = lookupShortTable(RelaxOp))
+ return I->DstOp;
+ return RelaxOp;
+}
+
+/// Get the relaxed instruction opcode for a given short opcode.
+unsigned X86::getRelaxedOpcodeArith(unsigned ShortOp) {
+ if (const X86InstrRelaxTableEntry *I = lookupRelaxTable(ShortOp))
+ return I->DstOp;
+ return ShortOp;
+}
+} // namespace llvm
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstrRelaxTables.h b/llvm/lib/Target/X86/MCTargetDesc/X86InstrRelaxTables.h
new file mode 100644
index 000000000000..0551c1861a58
--- /dev/null
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstrRelaxTables.h
@@ -0,0 +1,54 @@
+//===-- X86InstrRelaxTables.h - X86 Instruction Relaxation Tables -*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the interface to query the X86 instruction relaxation
+// tables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_X86_X86INSTRRELAXTABLES_H
+#define LLVM_LIB_TARGET_X86_X86INSTRRELAXTABLES_H
+
+#include <cstdint>
+
+namespace llvm {
+
+// This struct is used for both the relaxed and short tables. The KeyOp is used
+// to determine the sorting order.
+struct X86InstrRelaxTableEntry {
+ uint16_t KeyOp;
+ uint16_t DstOp;
+
+ bool operator<(const X86InstrRelaxTableEntry &RHS) const {
+ return KeyOp < RHS.KeyOp;
+ }
+ bool operator==(const X86InstrRelaxTableEntry &RHS) const {
+ return KeyOp == RHS.KeyOp;
+ }
+ friend bool operator<(const X86InstrRelaxTableEntry &TE, unsigned Opcode) {
+ return TE.KeyOp < Opcode;
+ }
+};
+
+/// Look up the relaxed form table entry for a given \p ShortOp.
+const X86InstrRelaxTableEntry *lookupRelaxTable(unsigned ShortOp);
+
+/// Look up the short form table entry for a given \p RelaxOp.
+const X86InstrRelaxTableEntry *lookupShortTable(unsigned RelaxOp);
+
+namespace X86 {
+
+/// Get the short instruction opcode for a given relaxed opcode.
+unsigned getShortOpcodeArith(unsigned RelaxOp);
+
+/// Get the relaxed instruction opcode for a given short opcode.
+unsigned getRelaxedOpcodeArith(unsigned ShortOp);
+} // namespace X86
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
index 48c335f9a777..2a2afa925a9c 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
@@ -40,11 +40,11 @@ void X86IntelInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
void X86IntelInstPrinter::printInst(const MCInst *MI, uint64_t Address,
StringRef Annot, const MCSubtargetInfo &STI,
raw_ostream &OS) {
- printInstFlags(MI, OS);
+ printInstFlags(MI, OS, STI);
// In 16-bit mode, print data16 as data32.
if (MI->getOpcode() == X86::DATA16_PREFIX &&
- STI.getFeatureBits()[X86::Mode16Bit]) {
+ STI.getFeatureBits()[X86::Is16Bit]) {
OS << "\tdata32";
} else if (!printAliasInstr(MI, Address, OS) && !printVecCompareInstr(MI, OS))
printInstruction(MI, Address, OS);
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 4fa8bc64b245..a21bb6da86de 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -24,6 +24,7 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
@@ -155,65 +156,6 @@ static MCFixupKind getImmFixupKind(uint64_t TSFlags) {
return MCFixup::getKindForSize(Size, isPCRel);
}
-/// \param Op operand # of the memory operand.
-///
-/// \returns true if the specified instruction has a 16-bit memory operand.
-static bool is16BitMemOperand(const MCInst &MI, unsigned Op,
- const MCSubtargetInfo &STI) {
- const MCOperand &Base = MI.getOperand(Op + X86::AddrBaseReg);
- const MCOperand &Index = MI.getOperand(Op + X86::AddrIndexReg);
-
- unsigned BaseReg = Base.getReg();
- unsigned IndexReg = Index.getReg();
-
- if (STI.hasFeature(X86::Mode16Bit) && BaseReg == 0 && IndexReg == 0)
- return true;
- if ((BaseReg != 0 &&
- X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) ||
- (IndexReg != 0 &&
- X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)))
- return true;
- return false;
-}
-
-/// \param Op operand # of the memory operand.
-///
-/// \returns true if the specified instruction has a 32-bit memory operand.
-static bool is32BitMemOperand(const MCInst &MI, unsigned Op) {
- const MCOperand &BaseReg = MI.getOperand(Op + X86::AddrBaseReg);
- const MCOperand &IndexReg = MI.getOperand(Op + X86::AddrIndexReg);
-
- if ((BaseReg.getReg() != 0 &&
- X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg.getReg())) ||
- (IndexReg.getReg() != 0 &&
- X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg.getReg())))
- return true;
- if (BaseReg.getReg() == X86::EIP) {
- assert(IndexReg.getReg() == 0 && "Invalid eip-based address.");
- return true;
- }
- if (IndexReg.getReg() == X86::EIZ)
- return true;
- return false;
-}
-
-/// \param Op operand # of the memory operand.
-///
-/// \returns true if the specified instruction has a 64-bit memory operand.
-#ifndef NDEBUG
-static bool is64BitMemOperand(const MCInst &MI, unsigned Op) {
- const MCOperand &BaseReg = MI.getOperand(Op + X86::AddrBaseReg);
- const MCOperand &IndexReg = MI.getOperand(Op + X86::AddrIndexReg);
-
- if ((BaseReg.getReg() != 0 &&
- X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg.getReg())) ||
- (IndexReg.getReg() != 0 &&
- X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg.getReg())))
- return true;
- return false;
-}
-#endif
-
enum GlobalOffsetTableExprKind { GOT_None, GOT_Normal, GOT_SymDiff };
/// Check if this expression starts with _GLOBAL_OFFSET_TABLE_ and if it is
@@ -391,7 +333,7 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
// Handle %rip relative addressing.
if (BaseReg == X86::RIP ||
BaseReg == X86::EIP) { // [disp32+rIP] in X86-64 mode
- assert(STI.hasFeature(X86::Mode64Bit) &&
+ assert(STI.hasFeature(X86::Is64Bit) &&
"Rip-relative addressing requires 64-bit mode");
assert(IndexReg.getReg() == 0 && !ForceSIB &&
"Invalid rip-relative address");
@@ -462,7 +404,7 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
// 16-bit addressing forms of the ModR/M byte have a different encoding for
// the R/M field and are far more limited in which registers can be used.
- if (is16BitMemOperand(MI, Op, STI)) {
+ if (X86_MC::is16BitMemOperand(MI, Op, STI)) {
if (BaseReg) {
// For 32-bit addressing, the row and column values in Table 2-2 are
// basically the same. It's AX/CX/DX/BX/SP/BP/SI/DI in that order, with
@@ -540,7 +482,7 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
BaseRegNo != N86::ESP &&
// If there is no base register and we're in 64-bit mode, we need a SIB
// byte to emit an addr that is just 'disp32' (the non-RIP relative form).
- (!STI.hasFeature(X86::Mode64Bit) || BaseReg != 0)) {
+ (!STI.hasFeature(X86::Is64Bit) || BaseReg != 0)) {
if (BaseReg == 0) { // [disp32] in X86-32 mode
emitByte(modRMByte(0, RegOpcodeField, 5), OS);
@@ -671,75 +613,29 @@ bool X86MCCodeEmitter::emitPrefixImpl(unsigned &CurOp, const MCInst &MI,
emitByte(0xF2, OS);
// Emit the address size opcode prefix as needed.
- bool NeedAddressOverride;
- uint64_t AdSize = TSFlags & X86II::AdSizeMask;
- if ((STI.hasFeature(X86::Mode16Bit) && AdSize == X86II::AdSize32) ||
- (STI.hasFeature(X86::Mode32Bit) && AdSize == X86II::AdSize16) ||
- (STI.hasFeature(X86::Mode64Bit) && AdSize == X86II::AdSize32)) {
- NeedAddressOverride = true;
- } else if (MemoryOperand < 0) {
- NeedAddressOverride = false;
- } else if (STI.hasFeature(X86::Mode64Bit)) {
- assert(!is16BitMemOperand(MI, MemoryOperand, STI));
- NeedAddressOverride = is32BitMemOperand(MI, MemoryOperand);
- } else if (STI.hasFeature(X86::Mode32Bit)) {
- assert(!is64BitMemOperand(MI, MemoryOperand));
- NeedAddressOverride = is16BitMemOperand(MI, MemoryOperand, STI);
- } else {
- assert(STI.hasFeature(X86::Mode16Bit));
- assert(!is64BitMemOperand(MI, MemoryOperand));
- NeedAddressOverride = !is16BitMemOperand(MI, MemoryOperand, STI);
- }
-
- if (NeedAddressOverride)
+ if (X86_MC::needsAddressSizeOverride(MI, STI, MemoryOperand, TSFlags) ||
+ Flags & X86::IP_HAS_AD_SIZE)
emitByte(0x67, OS);
- // Encoding type for this instruction.
- uint64_t Encoding = TSFlags & X86II::EncodingMask;
- bool HasREX = false;
- if (Encoding)
- emitVEXOpcodePrefix(MemoryOperand, MI, OS);
- else
- HasREX = emitOpcodePrefix(MemoryOperand, MI, STI, OS);
-
uint64_t Form = TSFlags & X86II::FormMask;
switch (Form) {
default:
break;
case X86II::RawFrmDstSrc: {
- unsigned siReg = MI.getOperand(1).getReg();
- assert(((siReg == X86::SI && MI.getOperand(0).getReg() == X86::DI) ||
- (siReg == X86::ESI && MI.getOperand(0).getReg() == X86::EDI) ||
- (siReg == X86::RSI && MI.getOperand(0).getReg() == X86::RDI)) &&
- "SI and DI register sizes do not match");
// Emit segment override opcode prefix as needed (not for %ds).
if (MI.getOperand(2).getReg() != X86::DS)
emitSegmentOverridePrefix(2, MI, OS);
- // Emit AdSize prefix as needed.
- if ((!STI.hasFeature(X86::Mode32Bit) && siReg == X86::ESI) ||
- (STI.hasFeature(X86::Mode32Bit) && siReg == X86::SI))
- emitByte(0x67, OS);
CurOp += 3; // Consume operands.
break;
}
case X86II::RawFrmSrc: {
- unsigned siReg = MI.getOperand(0).getReg();
// Emit segment override opcode prefix as needed (not for %ds).
if (MI.getOperand(1).getReg() != X86::DS)
emitSegmentOverridePrefix(1, MI, OS);
- // Emit AdSize prefix as needed.
- if ((!STI.hasFeature(X86::Mode32Bit) && siReg == X86::ESI) ||
- (STI.hasFeature(X86::Mode32Bit) && siReg == X86::SI))
- emitByte(0x67, OS);
CurOp += 2; // Consume operands.
break;
}
case X86II::RawFrmDst: {
- unsigned siReg = MI.getOperand(0).getReg();
- // Emit AdSize prefix as needed.
- if ((!STI.hasFeature(X86::Mode32Bit) && siReg == X86::EDI) ||
- (STI.hasFeature(X86::Mode32Bit) && siReg == X86::DI))
- emitByte(0x67, OS);
++CurOp; // Consume operand.
break;
}
@@ -750,6 +646,15 @@ bool X86MCCodeEmitter::emitPrefixImpl(unsigned &CurOp, const MCInst &MI,
}
}
+ // REX prefix is optional, but if used must be immediately before the opcode
+ // Encoding type for this instruction.
+ uint64_t Encoding = TSFlags & X86II::EncodingMask;
+ bool HasREX = false;
+ if (Encoding)
+ emitVEXOpcodePrefix(MemoryOperand, MI, OS);
+ else
+ HasREX = emitOpcodePrefix(MemoryOperand, MI, STI, OS);
+
return HasREX;
}
@@ -1347,7 +1252,7 @@ bool X86MCCodeEmitter::emitOpcodePrefix(int MemOperand, const MCInst &MI,
// Emit the operand size opcode prefix as needed.
if ((TSFlags & X86II::OpSizeMask) ==
- (STI.hasFeature(X86::Mode16Bit) ? X86II::OpSize32 : X86II::OpSize16))
+ (STI.hasFeature(X86::Is16Bit) ? X86II::OpSize32 : X86II::OpSize16))
emitByte(0x66, OS);
// Emit the LOCK opcode prefix.
@@ -1371,9 +1276,9 @@ bool X86MCCodeEmitter::emitOpcodePrefix(int MemOperand, const MCInst &MI,
}
// Handle REX prefix.
- assert((STI.hasFeature(X86::Mode64Bit) || !(TSFlags & X86II::REX_W)) &&
+ assert((STI.hasFeature(X86::Is64Bit) || !(TSFlags & X86II::REX_W)) &&
"REX.W requires 64bit mode.");
- bool HasREX = STI.hasFeature(X86::Mode64Bit)
+ bool HasREX = STI.hasFeature(X86::Is64Bit)
? emitREXPrefix(MemOperand, MI, STI, OS)
: false;
@@ -1472,7 +1377,7 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::RawFrm:
emitByte(BaseOpcode + OpcodeOffset, OS);
- if (!STI.hasFeature(X86::Mode64Bit) || !isPCRel32Branch(MI, MCII))
+ if (!STI.hasFeature(X86::Is64Bit) || !isPCRel32Branch(MI, MCII))
break;
const MCOperand &Op = MI.getOperand(CurOp++);
@@ -1842,7 +1747,6 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
}
MCCodeEmitter *llvm::createX86MCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx) {
return new X86MCCodeEmitter(MCII, Ctx);
}
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCExpr.h b/llvm/lib/Target/X86/MCTargetDesc/X86MCExpr.h
index 532fecd9951b..cd2baeb1c98e 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86MCExpr.h
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCExpr.h
@@ -18,6 +18,7 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
namespace llvm {
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index 8913e405539e..49660883ad83 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -72,6 +72,97 @@ bool X86_MC::hasLockPrefix(const MCInst &MI) {
return MI.getFlags() & X86::IP_HAS_LOCK;
}
+static bool isMemOperand(const MCInst &MI, unsigned Op, unsigned RegClassID) {
+ const MCOperand &Base = MI.getOperand(Op + X86::AddrBaseReg);
+ const MCOperand &Index = MI.getOperand(Op + X86::AddrIndexReg);
+ const MCRegisterClass &RC = X86MCRegisterClasses[RegClassID];
+
+ return (Base.isReg() && Base.getReg() != 0 && RC.contains(Base.getReg())) ||
+ (Index.isReg() && Index.getReg() != 0 && RC.contains(Index.getReg()));
+}
+
+bool X86_MC::is16BitMemOperand(const MCInst &MI, unsigned Op,
+ const MCSubtargetInfo &STI) {
+ const MCOperand &Base = MI.getOperand(Op + X86::AddrBaseReg);
+ const MCOperand &Index = MI.getOperand(Op + X86::AddrIndexReg);
+
+ if (STI.hasFeature(X86::Is16Bit) && Base.isReg() && Base.getReg() == 0 &&
+ Index.isReg() && Index.getReg() == 0)
+ return true;
+ return isMemOperand(MI, Op, X86::GR16RegClassID);
+}
+
+bool X86_MC::is32BitMemOperand(const MCInst &MI, unsigned Op) {
+ const MCOperand &Base = MI.getOperand(Op + X86::AddrBaseReg);
+ const MCOperand &Index = MI.getOperand(Op + X86::AddrIndexReg);
+ if (Base.isReg() && Base.getReg() == X86::EIP) {
+ assert(Index.isReg() && Index.getReg() == 0 && "Invalid eip-based address");
+ return true;
+ }
+ if (Index.isReg() && Index.getReg() == X86::EIZ)
+ return true;
+ return isMemOperand(MI, Op, X86::GR32RegClassID);
+}
+
+#ifndef NDEBUG
+bool X86_MC::is64BitMemOperand(const MCInst &MI, unsigned Op) {
+ return isMemOperand(MI, Op, X86::GR64RegClassID);
+}
+#endif
+
+bool X86_MC::needsAddressSizeOverride(const MCInst &MI,
+ const MCSubtargetInfo &STI,
+ int MemoryOperand, uint64_t TSFlags) {
+ uint64_t AdSize = TSFlags & X86II::AdSizeMask;
+ bool Is16BitMode = STI.hasFeature(X86::Is16Bit);
+ bool Is32BitMode = STI.hasFeature(X86::Is32Bit);
+ bool Is64BitMode = STI.hasFeature(X86::Is64Bit);
+ if ((Is16BitMode && AdSize == X86II::AdSize32) ||
+ (Is32BitMode && AdSize == X86II::AdSize16) ||
+ (Is64BitMode && AdSize == X86II::AdSize32))
+ return true;
+ uint64_t Form = TSFlags & X86II::FormMask;
+ switch (Form) {
+ default:
+ break;
+ case X86II::RawFrmDstSrc: {
+ unsigned siReg = MI.getOperand(1).getReg();
+ assert(((siReg == X86::SI && MI.getOperand(0).getReg() == X86::DI) ||
+ (siReg == X86::ESI && MI.getOperand(0).getReg() == X86::EDI) ||
+ (siReg == X86::RSI && MI.getOperand(0).getReg() == X86::RDI)) &&
+ "SI and DI register sizes do not match");
+ return (!Is32BitMode && siReg == X86::ESI) ||
+ (Is32BitMode && siReg == X86::SI);
+ }
+ case X86II::RawFrmSrc: {
+ unsigned siReg = MI.getOperand(0).getReg();
+ return (!Is32BitMode && siReg == X86::ESI) ||
+ (Is32BitMode && siReg == X86::SI);
+ }
+ case X86II::RawFrmDst: {
+ unsigned siReg = MI.getOperand(0).getReg();
+ return (!Is32BitMode && siReg == X86::EDI) ||
+ (Is32BitMode && siReg == X86::DI);
+ }
+ }
+
+ // Determine where the memory operand starts, if present.
+ if (MemoryOperand < 0)
+ return false;
+
+ if (STI.hasFeature(X86::Is64Bit)) {
+ assert(!is16BitMemOperand(MI, MemoryOperand, STI));
+ return is32BitMemOperand(MI, MemoryOperand);
+ }
+ if (STI.hasFeature(X86::Is32Bit)) {
+ assert(!is64BitMemOperand(MI, MemoryOperand));
+ return is16BitMemOperand(MI, MemoryOperand, STI);
+ }
+ assert(STI.hasFeature(X86::Is16Bit));
+ assert(!is64BitMemOperand(MI, MemoryOperand));
+ return !is16BitMemOperand(MI, MemoryOperand, STI);
+}
+
void X86_MC::initLLVMToSEHAndCVRegMapping(MCRegisterInfo *MRI) {
// FIXME: TableGen these.
for (unsigned Reg = X86::NoRegister + 1; Reg < X86::NUM_TARGET_REGS; ++Reg) {
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
index 35604cd3ec0a..d0530bd4d650 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -63,6 +63,28 @@ void initLLVMToSEHAndCVRegMapping(MCRegisterInfo *MRI);
/// Returns true if this instruction has a LOCK prefix.
bool hasLockPrefix(const MCInst &MI);
+/// \param Op operand # of the memory operand.
+///
+/// \returns true if the specified instruction has a 16-bit memory operand.
+bool is16BitMemOperand(const MCInst &MI, unsigned Op,
+ const MCSubtargetInfo &STI);
+
+/// \param Op operand # of the memory operand.
+///
+/// \returns true if the specified instruction has a 32-bit memory operand.
+bool is32BitMemOperand(const MCInst &MI, unsigned Op);
+
+/// \param Op operand # of the memory operand.
+///
+/// \returns true if the specified instruction has a 64-bit memory operand.
+#ifndef NDEBUG
+bool is64BitMemOperand(const MCInst &MI, unsigned Op);
+#endif
+
+/// Returns true if this instruction needs an Address-Size override prefix.
+bool needsAddressSizeOverride(const MCInst &MI, const MCSubtargetInfo &STI,
+ int MemoryOperand, uint64_t TSFlags);
+
/// Create a X86 MCSubtargetInfo instance. This is exposed so Asm parser, etc.
/// do not need to go through TargetRegistry.
MCSubtargetInfo *createX86MCSubtargetInfo(const Triple &TT, StringRef CPU,
@@ -70,7 +92,6 @@ MCSubtargetInfo *createX86MCSubtargetInfo(const Triple &TT, StringRef CPU,
}
MCCodeEmitter *createX86MCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx);
MCAsmBackend *createX86_32AsmBackend(const Target &T,
@@ -142,4 +163,7 @@ MCRegister getX86SubSuperRegisterOrZero(MCRegister, unsigned,
#define GET_SUBTARGETINFO_ENUM
#include "X86GenSubtargetInfo.inc"
+#define GET_X86_MNEMONIC_TABLES_H
+#include "X86GenMnemonicTables.inc"
+
#endif
diff --git a/llvm/include/llvm/IR/AttributesAMDGPU.td b/llvm/lib/Target/X86/MCTargetDesc/X86MnemonicTables.cpp
index e2a0f045b656..39b7f0f4160e 100644
--- a/llvm/include/llvm/IR/AttributesAMDGPU.td
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MnemonicTables.cpp
@@ -1,4 +1,4 @@
-//===- AttributesAMDGPU.td - Defines AMDGPU attributes -----*- tablegen -*-===//
+//===-- X86MnemonicTables.cpp - X86 Mnemonic Tables -------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,9 +6,11 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines AMDGPU specific attributes.
+// This file provides X86 mnemonic tables.
//
//===----------------------------------------------------------------------===//
-def AMDGPUUnsafeFPAtomics : StrBoolAttr<"amdgpu-unsafe-fp-atomics">;
-def : MergeRule<"setAND<AMDGPUUnsafeFPAtomicsAttr>">;
+#include "X86InstrInfo.h"
+
+#define GET_X86_MNEMONIC_TABLES_CPP
+#include "X86GenMnemonicTables.inc"
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
index c29211246123..36945d1f6746 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
@@ -9,6 +9,7 @@
#include "X86MCTargetDesc.h"
#include "X86TargetStreamer.h"
#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCWin64EH.h"
@@ -25,15 +26,15 @@ public:
std::unique_ptr<MCObjectWriter> OW)
: MCWinCOFFStreamer(C, std::move(AB), std::move(CE), std::move(OW)) {}
- void EmitWinEHHandlerData(SMLoc Loc) override;
- void EmitWindowsUnwindTables(WinEH::FrameInfo *Frame) override;
- void EmitWindowsUnwindTables() override;
- void EmitCVFPOData(const MCSymbol *ProcSym, SMLoc Loc) override;
+ void emitWinEHHandlerData(SMLoc Loc) override;
+ void emitWindowsUnwindTables(WinEH::FrameInfo *Frame) override;
+ void emitWindowsUnwindTables() override;
+ void emitCVFPOData(const MCSymbol *ProcSym, SMLoc Loc) override;
void finishImpl() override;
};
-void X86WinCOFFStreamer::EmitWinEHHandlerData(SMLoc Loc) {
- MCStreamer::EmitWinEHHandlerData(Loc);
+void X86WinCOFFStreamer::emitWinEHHandlerData(SMLoc Loc) {
+ MCStreamer::emitWinEHHandlerData(Loc);
// We have to emit the unwind info now, because this directive
// actually switches to the .xdata section.
@@ -41,17 +42,17 @@ void X86WinCOFFStreamer::EmitWinEHHandlerData(SMLoc Loc) {
EHStreamer.EmitUnwindInfo(*this, CurFrame, /* HandlerData = */ true);
}
-void X86WinCOFFStreamer::EmitWindowsUnwindTables(WinEH::FrameInfo *Frame) {
+void X86WinCOFFStreamer::emitWindowsUnwindTables(WinEH::FrameInfo *Frame) {
EHStreamer.EmitUnwindInfo(*this, Frame, /* HandlerData = */ false);
}
-void X86WinCOFFStreamer::EmitWindowsUnwindTables() {
+void X86WinCOFFStreamer::emitWindowsUnwindTables() {
if (!getNumWinFrameInfos())
return;
EHStreamer.Emit(*this);
}
-void X86WinCOFFStreamer::EmitCVFPOData(const MCSymbol *ProcSym, SMLoc Loc) {
+void X86WinCOFFStreamer::emitCVFPOData(const MCSymbol *ProcSym, SMLoc Loc) {
X86TargetStreamer *XTS =
static_cast<X86TargetStreamer *>(getTargetStreamer());
XTS->emitFPOData(ProcSym, Loc);
@@ -59,7 +60,7 @@ void X86WinCOFFStreamer::EmitCVFPOData(const MCSymbol *ProcSym, SMLoc Loc) {
void X86WinCOFFStreamer::finishImpl() {
emitFrames(nullptr);
- EmitWindowsUnwindTables();
+ emitWindowsUnwindTables();
MCWinCOFFStreamer::finishImpl();
}
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp
index bf3f4e990ecc..f2827c568109 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp
@@ -14,6 +14,7 @@
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/FormattedStream.h"
using namespace llvm;
diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h
index 10e1c5d6ed38..7344900f2e31 100644
--- a/llvm/lib/Target/X86/X86.h
+++ b/llvm/lib/Target/X86/X86.h
@@ -79,6 +79,9 @@ FunctionPass *createX86DynAllocaExpander();
/// Return a pass that config the tile registers.
FunctionPass *createX86TileConfigPass();
+/// Return a pass that preconfig the tile registers before fast reg allocation.
+FunctionPass *createX86FastPreTileConfigPass();
+
/// Return a pass that config the tile registers after fast reg allocation.
FunctionPass *createX86FastTileConfigPass();
@@ -175,6 +178,7 @@ void initializeX86PartialReductionPass(PassRegistry &);
void initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &);
void initializeX86SpeculativeExecutionSideEffectSuppressionPass(PassRegistry &);
void initializeX86PreTileConfigPass(PassRegistry &);
+void initializeX86FastPreTileConfigPass(PassRegistry &);
void initializeX86FastTileConfigPass(PassRegistry &);
void initializeX86TileConfigPass(PassRegistry &);
void initializeX86LowerAMXTypeLegacyPassPass(PassRegistry &);
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 380507308c3d..a5c6b40c493c 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -18,13 +18,13 @@ include "llvm/Target/Target.td"
//===----------------------------------------------------------------------===//
// X86 Subtarget state
//
-
-def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true",
- "64-bit mode (x86_64)">;
-def Mode32Bit : SubtargetFeature<"32bit-mode", "In32BitMode", "true",
- "32-bit mode (80386)">;
-def Mode16Bit : SubtargetFeature<"16bit-mode", "In16BitMode", "true",
- "16-bit mode (i8086)">;
+// disregarding specific ABI / programming model
+def Is64Bit : SubtargetFeature<"64bit-mode", "Is64Bit", "true",
+ "64-bit mode (x86_64)">;
+def Is32Bit : SubtargetFeature<"32bit-mode", "Is32Bit", "true",
+ "32-bit mode (80386)">;
+def Is16Bit : SubtargetFeature<"16bit-mode", "Is16Bit", "true",
+ "16-bit mode (i8086)">;
//===----------------------------------------------------------------------===//
// X86 Subtarget ISA features
@@ -34,16 +34,16 @@ def FeatureX87 : SubtargetFeature<"x87","HasX87", "true",
"Enable X87 float instructions">;
def FeatureNOPL : SubtargetFeature<"nopl", "HasNOPL", "true",
- "Enable NOPL instruction">;
+ "Enable NOPL instruction (generally pentium pro+)">;
-def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true",
+def FeatureCMOV : SubtargetFeature<"cmov","HasCMOV", "true",
"Enable conditional move instructions">;
-def FeatureCMPXCHG8B : SubtargetFeature<"cx8", "HasCmpxchg8b", "true",
- "Support CMPXCHG8B instructions">;
+def FeatureCX8 : SubtargetFeature<"cx8", "HasCX8", "true",
+ "Support CMPXCHG8B instructions">;
def FeatureCRC32 : SubtargetFeature<"crc32", "HasCRC32", "true",
- "Enable SSE 4.2 CRC32 instruction">;
+ "Enable SSE 4.2 CRC32 instruction (used when SSE4.2 is supported but function is GPR only)">;
def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
"Support POPCNT instruction">;
@@ -98,11 +98,11 @@ def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
// feature, because SSE2 can be disabled (e.g. for compiling OS kernels)
// without disabling 64-bit mode. Nothing should imply this feature bit. It
// is used to enforce that only 64-bit capable CPUs are used in 64-bit mode.
-def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
+def FeatureX86_64 : SubtargetFeature<"64bit", "HasX86_64", "true",
"Support 64-bit instructions">;
-def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true",
- "64-bit with cmpxchg16b",
- [FeatureCMPXCHG8B]>;
+def FeatureCX16 : SubtargetFeature<"cx16", "HasCX16", "true",
+ "64-bit with cmpxchg16b (this is true for most x86-64 chips, but not the first AMD chips)",
+ [FeatureCX8]>;
def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true",
"Support SSE 4a instructions",
[FeatureSSE3]>;
@@ -119,7 +119,7 @@ def FeatureFMA : SubtargetFeature<"fma", "HasFMA", "true",
def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true",
"Support 16-bit floating point conversion instructions",
[FeatureAVX]>;
-def FeatureAVX512 : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512F",
+def FeatureAVX512 : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512",
"Enable AVX-512 instructions",
[FeatureAVX2, FeatureFMA, FeatureF16C]>;
def FeatureERI : SubtargetFeature<"avx512er", "HasERI", "true",
@@ -198,7 +198,7 @@ def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true",
[FeatureFMA4]>;
def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem",
"HasSSEUnalignedMem", "true",
- "Allow unaligned memory operands with SSE instructions">;
+ "Allow unaligned memory operands with SSE instructions (this may require setting a configuration bit in the processor)">;
def FeatureAES : SubtargetFeature<"aes", "HasAES", "true",
"Enable AES instructions",
[FeatureSSE2]>;
@@ -228,20 +228,22 @@ def FeatureADX : SubtargetFeature<"adx", "HasADX", "true",
def FeatureSHA : SubtargetFeature<"sha", "HasSHA", "true",
"Enable SHA instructions",
[FeatureSSE2]>;
+// Processor supports CET SHSTK - Control-Flow Enforcement Technology
+// using Shadow Stack
def FeatureSHSTK : SubtargetFeature<"shstk", "HasSHSTK", "true",
"Support CET Shadow-Stack instructions">;
def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
"Support PRFCHW instructions">;
def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true",
"Support RDSEED instruction">;
-def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF64", "true",
+def FeatureLAHFSAHF64 : SubtargetFeature<"sahf", "HasLAHFSAHF64", "true",
"Support LAHF and SAHF instructions in 64-bit mode">;
def FeatureMWAITX : SubtargetFeature<"mwaitx", "HasMWAITX", "true",
"Enable MONITORX/MWAITX timer functionality">;
def FeatureCLZERO : SubtargetFeature<"clzero", "HasCLZERO", "true",
"Enable Cache Line Zero">;
def FeatureCLDEMOTE : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true",
- "Enable Cache Demote">;
+ "Enable Cache Line Demote">;
def FeaturePTWRITE : SubtargetFeature<"ptwrite", "HasPTWRITE", "true",
"Support ptwrite instruction">;
def FeatureAMXTILE : SubtargetFeature<"amx-tile", "HasAMXTILE", "true",
@@ -285,9 +287,9 @@ def FeatureUINTR : SubtargetFeature<"uintr", "HasUINTR", "true",
def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true",
"platform configuration instruction">;
def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
- "Support movdiri instruction">;
+ "Support movdiri instruction (direct store integer)">;
def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true",
- "Support movdir64b instruction">;
+ "Support movdir64b instruction (direct store 64 bytes)">;
// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
// "string operations"). See "REP String Enhancement" in the Intel Software
@@ -380,6 +382,17 @@ def FeatureTaggedGlobals
"Use an instruction sequence for taking the address of a global "
"that allows a memory tag in the upper address bits.">;
+// Control codegen mitigation against Straight Line Speculation vulnerability.
+def FeatureHardenSlsRet
+ : SubtargetFeature<
+ "harden-sls-ret", "HardenSlsRet", "true",
+ "Harden against straight line speculation across RET instructions.">;
+
+def FeatureHardenSlsIJmp
+ : SubtargetFeature<
+ "harden-sls-ijmp", "HardenSlsIJmp", "true",
+ "Harden against straight line speculation across indirect JMP instructions.">;
+
//===----------------------------------------------------------------------===//
// X86 Subtarget Tuning features
//===----------------------------------------------------------------------===//
@@ -388,7 +401,7 @@ def TuningSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
"SHLD instruction is slow">;
def TuningSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
- "PMULLD instruction is slow">;
+ "PMULLD instruction is slow (compared to PMULLW/PMULHW and PMULUDQ)">;
def TuningSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow",
"true",
@@ -396,27 +409,31 @@ def TuningSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow",
// FIXME: This should not apply to CPUs that do not have SSE.
def TuningSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
- "IsUAMem16Slow", "true",
+ "IsUnalignedMem16Slow", "true",
"Slow unaligned 16-byte memory access">;
def TuningSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
- "IsUAMem32Slow", "true",
+ "IsUnalignedMem32Slow", "true",
"Slow unaligned 32-byte memory access">;
def TuningLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
- "Use LEA for adjusting the stack pointer">;
+ "Use LEA for adjusting the stack pointer (this is an optimization for Intel Atom processors)">;
+// True if 8-bit divisions are significantly faster than
+// 32-bit divisions and should be used when possible.
def TuningSlowDivide32 : SubtargetFeature<"idivl-to-divb",
"HasSlowDivide32", "true",
"Use 8-bit divide for positive values less than 256">;
+// True if 32-bit divides are significantly faster than
+// 64-bit divisions and should be used when possible.
def TuningSlowDivide64 : SubtargetFeature<"idivq-to-divl",
"HasSlowDivide64", "true",
"Use 32-bit divide for positive values less than 2^32">;
def TuningPadShortFunctions : SubtargetFeature<"pad-short-functions",
"PadShortFunctions", "true",
- "Pad short functions">;
+ "Pad short functions (to prevent a stall when returning too early)">;
// On some processors, instructions that implicitly take two memory operands are
// slow. In practice, this means that CALL, PUSH, and POP with memory operands
@@ -425,15 +442,21 @@ def TuningSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops",
"SlowTwoMemOps", "true",
"Two memory operand instructions are slow">;
-def TuningLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
+// True if the LEA instruction inputs have to be ready at address generation
+// (AG) time.
+def TuningLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LeaUsesAG", "true",
"LEA instruction needs inputs at AG stage">;
def TuningSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
"LEA instruction with certain arguments is slow">;
+// True if the LEA instruction has all three source operands: base, index,
+// and offset or if the LEA instruction uses base and index registers where
+// the base is EBP, RBP,or R13
def TuningSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
"LEA instruction with 3 ops or certain registers is slow">;
+// True if INC and DEC instructions are slow when writing to flags
def TuningSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
"INC and DEC instructions are slower than ADD and SUB">;
@@ -445,6 +468,31 @@ def TuningLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt",
"HasLZCNTFalseDeps", "true",
"LZCNT/TZCNT have a false dependency on dest register">;
+def TuningMULCFalseDeps : SubtargetFeature<"false-deps-mulc",
+ "HasMULCFalseDeps", "true",
+ "VF[C]MULCPH/SH has a false dependency on dest register">;
+
+def TuningPERMFalseDeps : SubtargetFeature<"false-deps-perm",
+ "HasPERMFalseDeps", "true",
+ "VPERMD/Q/PS/PD has a false dependency on dest register">;
+
+def TuningRANGEFalseDeps : SubtargetFeature<"false-deps-range",
+ "HasRANGEFalseDeps", "true",
+ "VRANGEPD/PS/SD/SS has a false dependency on dest register">;
+
+def TuningGETMANTFalseDeps : SubtargetFeature<"false-deps-getmant",
+ "HasGETMANTFalseDeps", "true",
+ "VGETMANTSS/SD/SH and VGETMANDPS/PD(memory version) has a"
+ " false dependency on dest register">;
+
+def TuningMULLQFalseDeps : SubtargetFeature<"false-deps-mullq",
+ "HasMULLQFalseDeps", "true",
+ "VPMULLQ has a false dependency on dest register">;
+
+def TuningSBBDepBreaking : SubtargetFeature<"sbb-dep-breaking",
+ "HasSBBDepBreaking", "true",
+ "SBB with same register has no source dependency">;
+
// On recent X86 (port bound) processors, its preferable to combine to a single shuffle
// using a variable mask over multiple fixed shuffles.
def TuningFastVariableCrossLaneShuffle
@@ -470,9 +518,14 @@ def TuningInsertVZEROUPPER
// vectorized code we should care about the throughput of SQRT operations.
// But if the code is scalar that probably means that the code has some kind of
// dependency and we should care more about reducing the latency.
+
+// True if hardware SQRTSS instruction is at least as fast (latency) as
+// RSQRTSS followed by a Newton-Raphson iteration.
def TuningFastScalarFSQRT
: SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT",
"true", "Scalar SQRT is fast (disable Newton-Raphson)">;
+// True if hardware SQRTPS/VSQRTPS instructions are at least as fast
+// (throughput) as RSQRTPS/VRSQRTPS followed by a Newton-Raphson iteration.
def TuningFastVectorFSQRT
: SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT",
"true", "Vector SQRT is fast (disable Newton-Raphson)">;
@@ -529,7 +582,7 @@ def TuningMacroFusion
// similar to Skylake Server (AVX-512).
def TuningFastGather
: SubtargetFeature<"fast-gather", "HasFastGather", "true",
- "Indicates if gather is reasonably fast">;
+ "Indicates if gather is reasonably fast (this is true for Skylake client and all AVX-512 CPUs)">;
def TuningPrefer128Bit
: SubtargetFeature<"prefer-128-bit", "Prefer128Bit", "true",
@@ -578,17 +631,13 @@ def TuningUseGLMDivSqrtCosts
: SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true",
"Use Goldmont specific floating point div/sqrt costs">;
-// Enable use of alias analysis during code generation.
-def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true",
- "Use alias analysis during codegen">;
-
//===----------------------------------------------------------------------===//
// X86 CPU Families
// TODO: Remove these - use general tuning features to determine codegen.
//===----------------------------------------------------------------------===//
// Bonnell
-def ProcIntelAtom : SubtargetFeature<"", "X86ProcFamily", "IntelAtom", "">;
+def ProcIntelAtom : SubtargetFeature<"", "IsAtom", "true", "Is Intel Atom processor">;
//===----------------------------------------------------------------------===//
// Register File Description
@@ -632,11 +681,11 @@ include "X86SchedIceLake.td"
def ProcessorFeatures {
// x86-64 and x86-64-v[234]
list<SubtargetFeature> X86_64V1Features = [
- FeatureX87, FeatureCMPXCHG8B, FeatureCMOV, FeatureMMX, FeatureSSE2,
- FeatureFXSR, FeatureNOPL, Feature64Bit
+ FeatureX87, FeatureCX8, FeatureCMOV, FeatureMMX, FeatureSSE2,
+ FeatureFXSR, FeatureNOPL, FeatureX86_64,
];
list<SubtargetFeature> X86_64V2Features = !listconcat(X86_64V1Features, [
- FeatureCMPXCHG16B, FeatureLAHFSAHF, FeatureCRC32, FeaturePOPCNT,
+ FeatureCX16, FeatureLAHFSAHF64, FeatureCRC32, FeaturePOPCNT,
FeatureSSE42
]);
list<SubtargetFeature> X86_64V3Features = !listconcat(X86_64V2Features, [
@@ -862,22 +911,27 @@ def ProcessorFeatures {
FeatureMOVDIRI,
FeatureMOVDIR64B,
FeatureUINTR];
- list<SubtargetFeature> SPRTuning = ICXTuning;
+ list<SubtargetFeature> SPRAdditionalTuning = [TuningMULCFalseDeps,
+ TuningPERMFalseDeps,
+ TuningRANGEFalseDeps,
+ TuningGETMANTFalseDeps,
+ TuningMULLQFalseDeps];
+ list<SubtargetFeature> SPRTuning = !listconcat(ICXTuning, SPRAdditionalTuning);
list<SubtargetFeature> SPRFeatures =
!listconcat(ICXFeatures, SPRAdditionalFeatures);
// Atom
list<SubtargetFeature> AtomFeatures = [FeatureX87,
- FeatureCMPXCHG8B,
+ FeatureCX8,
FeatureCMOV,
FeatureMMX,
FeatureSSSE3,
FeatureFXSR,
FeatureNOPL,
- Feature64Bit,
- FeatureCMPXCHG16B,
+ FeatureX86_64,
+ FeatureCX16,
FeatureMOVBE,
- FeatureLAHFSAHF];
+ FeatureLAHFSAHF64];
list<SubtargetFeature> AtomTuning = [ProcIntelAtom,
TuningSlowUAMem16,
TuningLEAForSP,
@@ -968,25 +1022,26 @@ def ProcessorFeatures {
FeatureMOVDIRI,
FeatureMOVDIR64B,
FeatureWAITPKG];
- list<SubtargetFeature> ADLTuning = SKLTuning;
+ list<SubtargetFeature> ADLAdditionalTuning = [TuningPERMFalseDeps];
+ list<SubtargetFeature> ADLTuning = !listconcat(SKLTuning, ADLAdditionalTuning);
list<SubtargetFeature> ADLFeatures =
!listconcat(TRMFeatures, ADLAdditionalFeatures);
// Knights Landing
list<SubtargetFeature> KNLFeatures = [FeatureX87,
- FeatureCMPXCHG8B,
+ FeatureCX8,
FeatureCMOV,
FeatureMMX,
FeatureFXSR,
FeatureNOPL,
- Feature64Bit,
- FeatureCMPXCHG16B,
+ FeatureX86_64,
+ FeatureCX16,
FeatureCRC32,
FeaturePOPCNT,
FeaturePCLMUL,
FeatureXSAVE,
FeatureXSAVEOPT,
- FeatureLAHFSAHF,
+ FeatureLAHFSAHF64,
FeatureAES,
FeatureRDRAND,
FeatureF16C,
@@ -1018,41 +1073,43 @@ def ProcessorFeatures {
// Barcelona
list<SubtargetFeature> BarcelonaFeatures = [FeatureX87,
- FeatureCMPXCHG8B,
+ FeatureCX8,
FeatureSSE4A,
Feature3DNowA,
FeatureFXSR,
FeatureNOPL,
- FeatureCMPXCHG16B,
+ FeatureCX16,
FeaturePRFCHW,
FeatureLZCNT,
FeaturePOPCNT,
- FeatureLAHFSAHF,
+ FeatureLAHFSAHF64,
FeatureCMOV,
- Feature64Bit];
+ FeatureX86_64];
list<SubtargetFeature> BarcelonaTuning = [TuningFastScalarShiftMasks,
TuningSlowSHLD,
+ TuningSBBDepBreaking,
TuningInsertVZEROUPPER];
// Bobcat
list<SubtargetFeature> BtVer1Features = [FeatureX87,
- FeatureCMPXCHG8B,
+ FeatureCX8,
FeatureCMOV,
FeatureMMX,
FeatureSSSE3,
FeatureSSE4A,
FeatureFXSR,
FeatureNOPL,
- Feature64Bit,
- FeatureCMPXCHG16B,
+ FeatureX86_64,
+ FeatureCX16,
FeaturePRFCHW,
FeatureLZCNT,
FeaturePOPCNT,
- FeatureLAHFSAHF];
+ FeatureLAHFSAHF64];
list<SubtargetFeature> BtVer1Tuning = [TuningFast15ByteNOP,
TuningFastScalarShiftMasks,
TuningFastVectorShiftMasks,
TuningSlowSHLD,
+ TuningSBBDepBreaking,
TuningInsertVZEROUPPER];
// Jaguar
@@ -1072,17 +1129,18 @@ def ProcessorFeatures {
TuningFastScalarShiftMasks,
TuningFastVectorShiftMasks,
TuningFastMOVBE,
+ TuningSBBDepBreaking,
TuningSlowSHLD];
list<SubtargetFeature> BtVer2Features =
!listconcat(BtVer1Features, BtVer2AdditionalFeatures);
// Bulldozer
list<SubtargetFeature> BdVer1Features = [FeatureX87,
- FeatureCMPXCHG8B,
+ FeatureCX8,
FeatureCMOV,
FeatureXOP,
- Feature64Bit,
- FeatureCMPXCHG16B,
+ FeatureX86_64,
+ FeatureCX16,
FeatureAES,
FeatureCRC32,
FeaturePRFCHW,
@@ -1094,11 +1152,12 @@ def ProcessorFeatures {
FeaturePOPCNT,
FeatureXSAVE,
FeatureLWP,
- FeatureLAHFSAHF];
+ FeatureLAHFSAHF64];
list<SubtargetFeature> BdVer1Tuning = [TuningSlowSHLD,
TuningFast11ByteNOP,
TuningFastScalarShiftMasks,
TuningBranchFusion,
+ TuningSBBDepBreaking,
TuningInsertVZEROUPPER];
// PileDriver
@@ -1140,15 +1199,15 @@ def ProcessorFeatures {
FeatureCLFLUSHOPT,
FeatureCLZERO,
FeatureCMOV,
- Feature64Bit,
- FeatureCMPXCHG16B,
+ FeatureX86_64,
+ FeatureCX16,
FeatureCRC32,
FeatureF16C,
FeatureFMA,
FeatureFSGSBase,
FeatureFXSR,
FeatureNOPL,
- FeatureLAHFSAHF,
+ FeatureLAHFSAHF64,
FeatureLZCNT,
FeatureMMX,
FeatureMOVBE,
@@ -1169,9 +1228,13 @@ def ProcessorFeatures {
TuningFastBEXTR,
TuningFast15ByteNOP,
TuningBranchFusion,
+ TuningFastScalarFSQRT,
+ TuningFastVectorFSQRT,
TuningFastScalarShiftMasks,
+ TuningFastVariablePerLaneShuffle,
TuningFastMOVBE,
TuningSlowSHLD,
+ TuningSBBDepBreaking,
TuningInsertVZEROUPPER];
list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB,
FeatureRDPID,
@@ -1184,11 +1247,9 @@ def ProcessorFeatures {
FeaturePKU,
FeatureVAES,
FeatureVPCLMULQDQ];
- list<SubtargetFeature> ZN3AdditionalTuning =
- [TuningMacroFusion,
- TuningFastVariablePerLaneShuffle];
+ list<SubtargetFeature> ZN3AdditionalTuning = [TuningMacroFusion];
list<SubtargetFeature> ZN3Tuning =
- !listconcat(ZNTuning, ZN3AdditionalTuning);
+ !listconcat(ZN2Tuning, ZN3AdditionalTuning);
list<SubtargetFeature> ZN3Features =
!listconcat(ZN2Features, ZN3AdditionalFeatures);
}
@@ -1209,39 +1270,43 @@ class ProcModel<string Name, SchedMachineModel Model,
// NOTE: CMPXCHG8B is here for legacy compatibility so that it is only disabled
// if i386/i486 is specifically requested.
// NOTE: 64Bit is here as "generic" is the default llc CPU. The X86Subtarget
-// constructor checks that any CPU used in 64-bit mode has Feature64Bit enabled.
-// It has no effect on code generation.
+// constructor checks that any CPU used in 64-bit mode has FeatureX86_64
+// enabled. It has no effect on code generation.
+// NOTE: As a default tuning, "generic" aims to produce code optimized for the
+// most common X86 processors. The tunings might be changed over time. It is
+// recommended to use "x86-64" in lit tests for consistency.
def : ProcModel<"generic", SandyBridgeModel,
- [FeatureX87, FeatureCMPXCHG8B, Feature64Bit],
+ [FeatureX87, FeatureCX8, FeatureX86_64],
[TuningSlow3OpsLEA,
TuningSlowDivide64,
- TuningSlowIncDec,
TuningMacroFusion,
+ TuningFastScalarFSQRT,
+ TuningFast15ByteNOP,
TuningInsertVZEROUPPER]>;
def : Proc<"i386", [FeatureX87],
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"i486", [FeatureX87],
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
-def : Proc<"i586", [FeatureX87, FeatureCMPXCHG8B],
+def : Proc<"i586", [FeatureX87, FeatureCX8],
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
-def : Proc<"pentium", [FeatureX87, FeatureCMPXCHG8B],
+def : Proc<"pentium", [FeatureX87, FeatureCX8],
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
-def : Proc<"pentium-mmx", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX],
+def : Proc<"pentium-mmx", [FeatureX87, FeatureCX8, FeatureMMX],
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
-def : Proc<"i686", [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV],
+def : Proc<"i686", [FeatureX87, FeatureCX8, FeatureCMOV],
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
-def : Proc<"pentiumpro", [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV,
+def : Proc<"pentiumpro", [FeatureX87, FeatureCX8, FeatureCMOV,
FeatureNOPL],
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
-def : Proc<"pentium2", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureCMOV,
+def : Proc<"pentium2", [FeatureX87, FeatureCX8, FeatureMMX, FeatureCMOV,
FeatureFXSR, FeatureNOPL],
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
foreach P = ["pentium3", "pentium3m"] in {
- def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureMMX,
+ def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX,
FeatureSSE1, FeatureFXSR, FeatureNOPL, FeatureCMOV],
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
}
@@ -1257,42 +1322,42 @@ foreach P = ["pentium3", "pentium3m"] in {
// changes slightly.
def : ProcModel<"pentium-m", GenericPostRAModel,
- [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE2,
+ [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE2,
FeatureFXSR, FeatureNOPL, FeatureCMOV],
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
foreach P = ["pentium4", "pentium4m"] in {
def : ProcModel<P, GenericPostRAModel,
- [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE2,
+ [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE2,
FeatureFXSR, FeatureNOPL, FeatureCMOV],
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
}
// Intel Quark.
-def : Proc<"lakemont", [FeatureCMPXCHG8B],
+def : Proc<"lakemont", [FeatureCX8],
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
// Intel Core Duo.
def : ProcModel<"yonah", SandyBridgeModel,
- [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE3,
+ [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE3,
FeatureFXSR, FeatureNOPL, FeatureCMOV],
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
// NetBurst.
def : ProcModel<"prescott", GenericPostRAModel,
- [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE3,
+ [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE3,
FeatureFXSR, FeatureNOPL, FeatureCMOV],
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : ProcModel<"nocona", GenericPostRAModel, [
FeatureX87,
- FeatureCMPXCHG8B,
+ FeatureCX8,
FeatureCMOV,
FeatureMMX,
FeatureSSE3,
FeatureFXSR,
FeatureNOPL,
- Feature64Bit,
- FeatureCMPXCHG16B,
+ FeatureX86_64,
+ FeatureCX16,
],
[
TuningSlowUAMem16,
@@ -1302,15 +1367,15 @@ def : ProcModel<"nocona", GenericPostRAModel, [
// Intel Core 2 Solo/Duo.
def : ProcModel<"core2", SandyBridgeModel, [
FeatureX87,
- FeatureCMPXCHG8B,
+ FeatureCX8,
FeatureCMOV,
FeatureMMX,
FeatureSSSE3,
FeatureFXSR,
FeatureNOPL,
- Feature64Bit,
- FeatureCMPXCHG16B,
- FeatureLAHFSAHF
+ FeatureX86_64,
+ FeatureCX16,
+ FeatureLAHFSAHF64
],
[
TuningMacroFusion,
@@ -1319,15 +1384,15 @@ def : ProcModel<"core2", SandyBridgeModel, [
]>;
def : ProcModel<"penryn", SandyBridgeModel, [
FeatureX87,
- FeatureCMPXCHG8B,
+ FeatureCX8,
FeatureCMOV,
FeatureMMX,
FeatureSSE41,
FeatureFXSR,
FeatureNOPL,
- Feature64Bit,
- FeatureCMPXCHG16B,
- FeatureLAHFSAHF
+ FeatureX86_64,
+ FeatureCX16,
+ FeatureLAHFSAHF64
],
[
TuningMacroFusion,
@@ -1416,38 +1481,38 @@ def : ProcModel<"alderlake", SkylakeClientModel,
// AMD CPUs.
-def : Proc<"k6", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX],
+def : Proc<"k6", [FeatureX87, FeatureCX8, FeatureMMX],
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
-def : Proc<"k6-2", [FeatureX87, FeatureCMPXCHG8B, Feature3DNow],
+def : Proc<"k6-2", [FeatureX87, FeatureCX8, Feature3DNow],
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
-def : Proc<"k6-3", [FeatureX87, FeatureCMPXCHG8B, Feature3DNow],
+def : Proc<"k6-3", [FeatureX87, FeatureCX8, Feature3DNow],
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
foreach P = ["athlon", "athlon-tbird"] in {
- def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV, Feature3DNowA,
+ def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV, Feature3DNowA,
FeatureNOPL],
[TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
}
foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
- def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV,
+ def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV,
FeatureSSE1, Feature3DNowA, FeatureFXSR, FeatureNOPL],
[TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
}
foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
- def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureSSE2, Feature3DNowA,
- FeatureFXSR, FeatureNOPL, Feature64Bit, FeatureCMOV],
+ def : Proc<P, [FeatureX87, FeatureCX8, FeatureSSE2, Feature3DNowA,
+ FeatureFXSR, FeatureNOPL, FeatureX86_64, FeatureCMOV],
[TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16,
- TuningInsertVZEROUPPER]>;
+ TuningSBBDepBreaking, TuningInsertVZEROUPPER]>;
}
foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
- def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureSSE3, Feature3DNowA,
- FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, FeatureCMOV,
- Feature64Bit],
+ def : Proc<P, [FeatureX87, FeatureCX8, FeatureSSE3, Feature3DNowA,
+ FeatureFXSR, FeatureNOPL, FeatureCX16, FeatureCMOV,
+ FeatureX86_64],
[TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16,
- TuningInsertVZEROUPPER]>;
+ TuningSBBDepBreaking, TuningInsertVZEROUPPER]>;
}
foreach P = ["amdfam10", "barcelona"] in {
@@ -1482,7 +1547,7 @@ def : ProcModel<"znver2", Znver2Model, ProcessorFeatures.ZN2Features,
def : ProcModel<"znver3", Znver3Model, ProcessorFeatures.ZN3Features,
ProcessorFeatures.ZN3Tuning>;
-def : Proc<"geode", [FeatureX87, FeatureCMPXCHG8B, Feature3DNowA],
+def : Proc<"geode", [FeatureX87, FeatureCX8, Feature3DNowA],
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"winchip-c6", [FeatureX87, FeatureMMX],
@@ -1491,7 +1556,7 @@ def : Proc<"winchip2", [FeatureX87, Feature3DNow],
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"c3", [FeatureX87, Feature3DNow],
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
-def : Proc<"c3-2", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX,
+def : Proc<"c3-2", [FeatureX87, FeatureCX8, FeatureMMX,
FeatureSSE1, FeatureFXSR, FeatureCMOV],
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp
index d48b8e458219..c205395aa084 100644
--- a/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -29,6 +29,7 @@
#include "llvm/IR/Mangler.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -60,8 +61,7 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
SMShadowTracker.startFunction(MF);
CodeEmitter.reset(TM.getTarget().createMCCodeEmitter(
- *Subtarget->getInstrInfo(), *Subtarget->getRegisterInfo(),
- MF.getContext()));
+ *Subtarget->getInstrInfo(), MF.getContext()));
EmitFPOData =
Subtarget->isTargetWin32() && MF.getMMI().getModule()->getCodeViewFlag();
@@ -70,12 +70,12 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
if (Subtarget->isTargetCOFF()) {
bool Local = MF.getFunction().hasLocalLinkage();
- OutStreamer->BeginCOFFSymbolDef(CurrentFnSym);
- OutStreamer->EmitCOFFSymbolStorageClass(
+ OutStreamer->beginCOFFSymbolDef(CurrentFnSym);
+ OutStreamer->emitCOFFSymbolStorageClass(
Local ? COFF::IMAGE_SYM_CLASS_STATIC : COFF::IMAGE_SYM_CLASS_EXTERNAL);
- OutStreamer->EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
- << COFF::SCT_COMPLEX_TYPE_SHIFT);
- OutStreamer->EndCOFFSymbolDef();
+ OutStreamer->emitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
+ << COFF::SCT_COMPLEX_TYPE_SHIFT);
+ OutStreamer->endCOFFSymbolDef();
}
// Emit the rest of the function body.
@@ -249,7 +249,7 @@ void X86AsmPrinter::PrintOperand(const MachineInstr *MI, unsigned OpNo,
void X86AsmPrinter::PrintModifiedOperand(const MachineInstr *MI, unsigned OpNo,
raw_ostream &O, const char *Modifier) {
const MachineOperand &MO = MI->getOperand(OpNo);
- if (!Modifier || MO.getType() != MachineOperand::MO_Register)
+ if (!Modifier || !MO.isReg())
return PrintOperand(MI, OpNo, O);
if (MI->getInlineAsmDialect() == InlineAsm::AD_ATT)
O << '%';
@@ -336,6 +336,37 @@ void X86AsmPrinter::PrintLeaMemReference(const MachineInstr *MI, unsigned OpNo,
}
}
+static bool isSimpleReturn(const MachineInstr &MI) {
+ // We exclude all tail calls here which set both isReturn and isCall.
+ return MI.getDesc().isReturn() && !MI.getDesc().isCall();
+}
+
+static bool isIndirectBranchOrTailCall(const MachineInstr &MI) {
+ unsigned Opc = MI.getOpcode();
+ return MI.getDesc().isIndirectBranch() /*Make below code in a good shape*/ ||
+ Opc == X86::TAILJMPr || Opc == X86::TAILJMPm ||
+ Opc == X86::TAILJMPr64 || Opc == X86::TAILJMPm64 ||
+ Opc == X86::TCRETURNri || Opc == X86::TCRETURNmi ||
+ Opc == X86::TCRETURNri64 || Opc == X86::TCRETURNmi64 ||
+ Opc == X86::TAILJMPr64_REX || Opc == X86::TAILJMPm64_REX;
+}
+
+void X86AsmPrinter::emitBasicBlockEnd(const MachineBasicBlock &MBB) {
+ if (Subtarget->hardenSlsRet() || Subtarget->hardenSlsIJmp()) {
+ auto I = MBB.getLastNonDebugInstr();
+ if (I != MBB.end()) {
+ if ((Subtarget->hardenSlsRet() && isSimpleReturn(*I)) ||
+ (Subtarget->hardenSlsIJmp() && isIndirectBranchOrTailCall(*I))) {
+ MCInst TmpInst;
+ TmpInst.setOpcode(X86::INT3);
+ EmitToStreamer(*OutStreamer, TmpInst);
+ }
+ }
+ }
+ AsmPrinter::emitBasicBlockEnd(MBB);
+ SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
+}
+
void X86AsmPrinter::PrintMemReference(const MachineInstr *MI, unsigned OpNo,
raw_ostream &O, const char *Modifier) {
assert(isMem(*MI, OpNo) && "Invalid memory reference!");
@@ -363,6 +394,12 @@ void X86AsmPrinter::PrintIntelMemReference(const MachineInstr *MI,
BaseReg.getReg() == X86::RIP)
HasBaseReg = false;
+ // If we really just want to print out displacement.
+ if (Modifier && (DispSpec.isGlobal() || DispSpec.isSymbol()) &&
+ !strcmp(Modifier, "disp-only")) {
+ HasBaseReg = false;
+ }
+
// If this has a segment register, print it.
if (SegReg.getReg()) {
PrintOperand(MI, OpNo + X86::AddrSegmentReg, O);
@@ -606,11 +643,14 @@ bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
PrintMemReference(MI, OpNo, O, "H");
}
return false;
- case 'P': // Don't print @PLT, but do print as memory.
+ // Print memory only with displacement. The Modifer 'P' is used in inline
+ // asm to present a call symbol or a global symbol which can not use base
+ // reg or index reg.
+ case 'P':
if (MI->getInlineAsmDialect() == InlineAsm::AD_Intel) {
- PrintIntelMemReference(MI, OpNo, O, "no-rip");
+ PrintIntelMemReference(MI, OpNo, O, "disp-only");
} else {
- PrintMemReference(MI, OpNo, O, "no-rip");
+ PrintMemReference(MI, OpNo, O, "disp-only");
}
return false;
}
@@ -641,7 +681,7 @@ void X86AsmPrinter::emitStartOfAsmFile(Module &M) {
MCSection *Cur = OutStreamer->getCurrentSectionOnly();
MCSection *Nt = MMI->getContext().getELFSection(
".note.gnu.property", ELF::SHT_NOTE, ELF::SHF_ALLOC);
- OutStreamer->SwitchSection(Nt);
+ OutStreamer->switchSection(Nt);
// Emitting note header.
const int WordSize = TT.isArch64Bit() && !TT.isX32() ? 8 : 4;
@@ -658,21 +698,21 @@ void X86AsmPrinter::emitStartOfAsmFile(Module &M) {
emitAlignment(WordSize == 4 ? Align(4) : Align(8)); // padding
OutStreamer->endSection(Nt);
- OutStreamer->SwitchSection(Cur);
+ OutStreamer->switchSection(Cur);
}
}
if (TT.isOSBinFormatMachO())
- OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
+ OutStreamer->switchSection(getObjFileLowering().getTextSection());
if (TT.isOSBinFormatCOFF()) {
// Emit an absolute @feat.00 symbol. This appears to be some kind of
// compiler features bitfield read by link.exe.
MCSymbol *S = MMI->getContext().getOrCreateSymbol(StringRef("@feat.00"));
- OutStreamer->BeginCOFFSymbolDef(S);
- OutStreamer->EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC);
- OutStreamer->EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_NULL);
- OutStreamer->EndCOFFSymbolDef();
+ OutStreamer->beginCOFFSymbolDef(S);
+ OutStreamer->emitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC);
+ OutStreamer->emitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_NULL);
+ OutStreamer->endCOFFSymbolDef();
int64_t Feat00Flags = 0;
if (TT.getArch() == Triple::x86) {
@@ -739,7 +779,7 @@ static void emitNonLazyStubs(MachineModuleInfo *MMI, MCStreamer &OutStreamer) {
// Output stubs for external and common global variables.
Stubs = MMIMacho.GetGVStubList();
if (!Stubs.empty()) {
- OutStreamer.SwitchSection(MMI->getContext().getMachOSection(
+ OutStreamer.switchSection(MMI->getContext().getMachOSection(
"__IMPORT", "__pointers", MachO::S_NON_LAZY_SYMBOL_POINTERS,
SectionKind::getMetadata()));
@@ -747,7 +787,7 @@ static void emitNonLazyStubs(MachineModuleInfo *MMI, MCStreamer &OutStreamer) {
emitNonLazySymbolPointer(OutStreamer, Stub.first, Stub.second);
Stubs.clear();
- OutStreamer.AddBlankLine();
+ OutStreamer.addBlankLine();
}
}
@@ -795,6 +835,22 @@ void X86AsmPrinter::emitEndOfAsmFile(Module &M) {
emitStackMaps(SM);
FM.serializeToFaultMapSection();
}
+
+ // Emit __morestack address if needed for indirect calls.
+ if (TT.getArch() == Triple::x86_64 && TM.getCodeModel() == CodeModel::Large) {
+ if (MCSymbol *AddrSymbol = OutContext.lookupSymbol("__morestack_addr")) {
+ Align Alignment(1);
+ MCSection *ReadOnlySection = getObjFileLowering().getSectionForConstant(
+ getDataLayout(), SectionKind::getReadOnly(),
+ /*C=*/nullptr, Alignment);
+ OutStreamer->switchSection(ReadOnlySection);
+ OutStreamer->emitLabel(AddrSymbol);
+
+ unsigned PtrSize = MAI->getCodePointerSize();
+ OutStreamer->emitSymbolValue(GetExternalSymbolSymbol("__morestack"),
+ PtrSize);
+ }
+ }
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/X86/X86AsmPrinter.h b/llvm/lib/Target/X86/X86AsmPrinter.h
index 94679e6e3d11..d53c26b729ef 100644
--- a/llvm/lib/Target/X86/X86AsmPrinter.h
+++ b/llvm/lib/Target/X86/X86AsmPrinter.h
@@ -131,10 +131,7 @@ public:
void emitInstruction(const MachineInstr *MI) override;
- void emitBasicBlockEnd(const MachineBasicBlock &MBB) override {
- AsmPrinter::emitBasicBlockEnd(MBB);
- SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
- }
+ void emitBasicBlockEnd(const MachineBasicBlock &MBB) override;
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
const char *ExtraCode, raw_ostream &O) override;
diff --git a/llvm/lib/Target/X86/X86AvoidTrailingCall.cpp b/llvm/lib/Target/X86/X86AvoidTrailingCall.cpp
index 0899783d5f60..2ecf49382d29 100644
--- a/llvm/lib/Target/X86/X86AvoidTrailingCall.cpp
+++ b/llvm/lib/Target/X86/X86AvoidTrailingCall.cpp
@@ -35,6 +35,7 @@
#include "X86.h"
#include "X86InstrInfo.h"
#include "X86Subtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#define AVOIDCALL_DESC "X86 avoid trailing call pass"
@@ -69,8 +70,8 @@ INITIALIZE_PASS(X86AvoidTrailingCallPass, AVOIDCALL_NAME, AVOIDCALL_DESC, false,
// A real instruction is a non-meta, non-pseudo instruction. Some pseudos
// expand to nothing, and some expand to code. This logic conservatively assumes
// they might expand to nothing.
-static bool isRealInstruction(MachineInstr &MI) {
- return !MI.isPseudo() && !MI.isMetaInstruction();
+static bool isCallOrRealInstruction(MachineInstr &MI) {
+ return MI.isCall() || (!MI.isPseudo() && !MI.isMetaInstruction());
}
// Return true if this is a call instruction, but not a tail call.
@@ -100,7 +101,7 @@ bool X86AvoidTrailingCallPass::runOnMachineFunction(MachineFunction &MF) {
continue;
// Find the last real instruction in this block.
- auto LastRealInstr = llvm::find_if(reverse(MBB), isRealInstruction);
+ auto LastRealInstr = llvm::find_if(reverse(MBB), isCallOrRealInstruction);
// If the block is empty or the last real instruction is a call instruction,
// insert an int3. If there is a call instruction, insert the int3 between
diff --git a/llvm/lib/Target/X86/X86CallingConv.cpp b/llvm/lib/Target/X86/X86CallingConv.cpp
index c80a5d5bb332..ded93fdc011c 100644
--- a/llvm/lib/Target/X86/X86CallingConv.cpp
+++ b/llvm/lib/Target/X86/X86CallingConv.cpp
@@ -299,7 +299,7 @@ static bool CC_X86_Intr(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
ISD::ArgFlagsTy &ArgFlags, CCState &State) {
const MachineFunction &MF = State.getMachineFunction();
size_t ArgCount = State.getMachineFunction().getFunction().arg_size();
- bool Is64Bit = static_cast<const X86Subtarget &>(MF.getSubtarget()).is64Bit();
+ bool Is64Bit = MF.getSubtarget<X86Subtarget>().is64Bit();
unsigned SlotSize = Is64Bit ? 8 : 4;
unsigned Offset;
if (ArgCount == 1 && ValNo == 0) {
diff --git a/llvm/lib/Target/X86/X86CmovConversion.cpp b/llvm/lib/Target/X86/X86CmovConversion.cpp
index 96d3d1390a59..f32891552a82 100644
--- a/llvm/lib/Target/X86/X86CmovConversion.cpp
+++ b/llvm/lib/Target/X86/X86CmovConversion.cpp
@@ -97,6 +97,11 @@ static cl::opt<bool> ForceMemOperand(
cl::desc("Convert cmovs to branches whenever they have memory operands."),
cl::init(true), cl::Hidden);
+static cl::opt<bool> ForceAll(
+ "x86-cmov-converter-force-all",
+ cl::desc("Convert all cmovs to branches."),
+ cl::init(false), cl::Hidden);
+
namespace {
/// Converts X86 cmov instructions into branches when profitable.
@@ -174,11 +179,11 @@ bool X86CmovConverterPass::runOnMachineFunction(MachineFunction &MF) {
TSchedModel.init(&STI);
// Before we handle the more subtle cases of register-register CMOVs inside
- // of potentially hot loops, we want to quickly remove all CMOVs with
- // a memory operand. The CMOV will risk a stall waiting for the load to
- // complete that speculative execution behind a branch is better suited to
- // handle on modern x86 chips.
- if (ForceMemOperand) {
+ // of potentially hot loops, we want to quickly remove all CMOVs (ForceAll) or
+ // the ones with a memory operand (ForceMemOperand option). The latter CMOV
+ // will risk a stall waiting for the load to complete that speculative
+ // execution behind a branch is better suited to handle on modern x86 chips.
+ if (ForceMemOperand || ForceAll) {
CmovGroups AllCmovGroups;
SmallVector<MachineBasicBlock *, 4> Blocks;
for (auto &MBB : MF)
@@ -186,7 +191,8 @@ bool X86CmovConverterPass::runOnMachineFunction(MachineFunction &MF) {
if (collectCmovCandidates(Blocks, AllCmovGroups, /*IncludeLoads*/ true)) {
for (auto &Group : AllCmovGroups) {
// Skip any group that doesn't do at least one memory operand cmov.
- if (llvm::none_of(Group, [&](MachineInstr *I) { return I->mayLoad(); }))
+ if (ForceMemOperand && !ForceAll &&
+ llvm::none_of(Group, [&](MachineInstr *I) { return I->mayLoad(); }))
continue;
// For CMOV groups which we can rewrite and which contain a memory load,
@@ -196,12 +202,15 @@ bool X86CmovConverterPass::runOnMachineFunction(MachineFunction &MF) {
convertCmovInstsToBranches(Group);
}
}
+ // Early return as ForceAll converts all CmovGroups.
+ if (ForceAll)
+ return Changed;
}
//===--------------------------------------------------------------------===//
// Register-operand Conversion Algorithm
// ---------
- // For each inner most loop
+ // For each innermost loop
// collectCmovCandidates() {
// Find all CMOV-group-candidates.
// }
@@ -230,7 +239,7 @@ bool X86CmovConverterPass::runOnMachineFunction(MachineFunction &MF) {
Loops.push_back(Child);
for (MachineLoop *CurrLoop : Loops) {
- // Optimize only inner most loops.
+ // Optimize only innermost loops.
if (!CurrLoop->getSubLoops().empty())
continue;
@@ -520,7 +529,7 @@ bool X86CmovConverterPass::checkForProfitableCmovCandidates(
//===--------------------------------------------------------------------===//
// Step 3: Check for each CMOV-group-candidate if it worth to be optimized.
// Worth-Optimize-Group:
- // Iff it worths to optimize all CMOV instructions in the group.
+ // Iff it is worth to optimize all CMOV instructions in the group.
//
// Worth-Optimize-CMOV:
// Predicted branch is faster than CMOV by the difference between depth of
diff --git a/llvm/lib/Target/X86/X86DiscriminateMemOps.cpp b/llvm/lib/Target/X86/X86DiscriminateMemOps.cpp
index 2ff8ee19561b..29668f4b2761 100644
--- a/llvm/lib/Target/X86/X86DiscriminateMemOps.cpp
+++ b/llvm/lib/Target/X86/X86DiscriminateMemOps.cpp
@@ -16,6 +16,7 @@
#include "X86InstrInfo.h"
#include "X86MachineFunctionInfo.h"
#include "X86Subtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/ProfileData/SampleProf.h"
@@ -159,7 +160,7 @@ bool X86DiscriminateMemOps::runOnMachineFunction(MachineFunction &MF) {
}
// Since we were able to encode, bump the MemOpDiscriminators.
++MemOpDiscriminators[L];
- DI = DI->cloneWithDiscriminator(EncodedDiscriminator.getValue());
+ DI = DI->cloneWithDiscriminator(*EncodedDiscriminator);
assert(DI && "DI should not be nullptr");
updateDebugInfo(&MI, DI);
Changed = true;
diff --git a/llvm/lib/Target/X86/X86DomainReassignment.cpp b/llvm/lib/Target/X86/X86DomainReassignment.cpp
index 9826bf4bf861..9d4338deca35 100644
--- a/llvm/lib/Target/X86/X86DomainReassignment.cpp
+++ b/llvm/lib/Target/X86/X86DomainReassignment.cpp
@@ -15,6 +15,7 @@
#include "X86.h"
#include "X86InstrInfo.h"
#include "X86Subtarget.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/STLExtras.h"
@@ -86,7 +87,7 @@ protected:
public:
InstrConverterBase(unsigned SrcOpcode) : SrcOpcode(SrcOpcode) {}
- virtual ~InstrConverterBase() {}
+ virtual ~InstrConverterBase() = default;
/// \returns true if \p MI is legal to convert.
virtual bool isLegal(const MachineInstr *MI,
@@ -374,7 +375,7 @@ class X86DomainReassignment : public MachineFunctionPass {
const X86InstrInfo *TII = nullptr;
/// All edges that are included in some closure
- DenseSet<unsigned> EnclosedEdges;
+ BitVector EnclosedEdges{8, false};
/// All instructions that are included in some closure.
DenseMap<MachineInstr *, unsigned> EnclosedInstrs;
@@ -429,10 +430,10 @@ char X86DomainReassignment::ID = 0;
void X86DomainReassignment::visitRegister(Closure &C, Register Reg,
RegDomain &Domain,
SmallVectorImpl<unsigned> &Worklist) {
- if (EnclosedEdges.count(Reg))
+ if (!Reg.isVirtual())
return;
- if (!Reg.isVirtual())
+ if (EnclosedEdges.test(Register::virtReg2Index(Reg)))
return;
if (!MRI->hasOneDef(Reg))
@@ -550,7 +551,7 @@ void X86DomainReassignment::buildClosure(Closure &C, Register Reg) {
// Register already in this closure.
if (!C.insertEdge(CurReg))
continue;
- EnclosedEdges.insert(Reg);
+ EnclosedEdges.set(Register::virtReg2Index(Reg));
MachineInstr *DefMI = MRI->getVRegDef(CurReg);
encloseInstr(C, DefMI);
@@ -742,6 +743,7 @@ bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
EnclosedEdges.clear();
+ EnclosedEdges.resize(MRI->getNumVirtRegs());
EnclosedInstrs.clear();
std::vector<Closure> Closures;
@@ -756,7 +758,7 @@ bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) {
continue;
// Register already in closure.
- if (EnclosedEdges.count(Reg))
+ if (EnclosedEdges.test(Idx))
continue;
// Calculate closure starting with Reg.
diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index 6a047838f0b5..aebeec5a6d27 100644
--- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -19,6 +19,7 @@
#include "X86MachineFunctionInfo.h"
#include "X86Subtarget.h"
#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/Passes.h" // For IDs of passes that are preserved.
@@ -552,7 +553,7 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case X86::PTILELOADDV:
case X86::PTILELOADDT1V: {
for (unsigned i = 2; i > 0; --i)
- MI.RemoveOperand(i);
+ MI.removeOperand(i);
unsigned Opc =
Opcode == X86::PTILELOADDV ? X86::TILELOADD : X86::TILELOADDT1;
MI.setDesc(TII->get(Opc));
@@ -565,7 +566,7 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case X86::PTDPBF16PSV: {
MI.untieRegOperand(4);
for (unsigned i = 3; i > 0; --i)
- MI.RemoveOperand(i);
+ MI.removeOperand(i);
unsigned Opc;
switch (Opcode) {
case X86::PTDPBSSDV: Opc = X86::TDPBSSD; break;
@@ -581,13 +582,13 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
}
case X86::PTILESTOREDV: {
for (int i = 1; i >= 0; --i)
- MI.RemoveOperand(i);
+ MI.removeOperand(i);
MI.setDesc(TII->get(X86::TILESTORED));
return true;
}
case X86::PTILEZEROV: {
for (int i = 2; i > 0; --i) // Remove row, col
- MI.RemoveOperand(i);
+ MI.removeOperand(i);
MI.setDesc(TII->get(X86::TILEZERO));
return true;
}
@@ -729,7 +730,7 @@ bool X86ExpandPseudo::ExpandPseudosWhichAffectControlFlow(MachineFunction &MF) {
}
bool X86ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
- STI = &static_cast<const X86Subtarget &>(MF.getSubtarget());
+ STI = &MF.getSubtarget<X86Subtarget>();
TII = STI->getInstrInfo();
TRI = STI->getRegisterInfo();
X86FI = MF.getInfo<X86MachineFunctionInfo>();
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index 1ac998b7ff7e..f2c362eeaa48 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -49,22 +49,11 @@ class X86FastISel final : public FastISel {
/// make the right decision when generating code for different targets.
const X86Subtarget *Subtarget;
- /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
- /// floating point ops.
- /// When SSE is available, use it for f32 operations.
- /// When SSE2 is available, use it for f64 operations.
- bool X86ScalarSSEf64;
- bool X86ScalarSSEf32;
- bool X86ScalarSSEf16;
-
public:
explicit X86FastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo)
: FastISel(funcInfo, libInfo) {
Subtarget = &funcInfo.MF->getSubtarget<X86Subtarget>();
- X86ScalarSSEf64 = Subtarget->hasSSE2();
- X86ScalarSSEf32 = Subtarget->hasSSE1();
- X86ScalarSSEf16 = Subtarget->hasFP16();
}
bool fastSelectInstruction(const Instruction *I) override;
@@ -158,9 +147,8 @@ private:
/// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
/// computed in an SSE register, not on the X87 floating point stack.
bool isScalarFPTypeInSSEReg(EVT VT) const {
- return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
- (VT == MVT::f32 && X86ScalarSSEf32) || // f32 is when SSE1
- (VT == MVT::f16 && X86ScalarSSEf16); // f16 is when AVX512FP16
+ return (VT == MVT::f64 && Subtarget->hasSSE2()) ||
+ (VT == MVT::f32 && Subtarget->hasSSE1()) || VT == MVT::f16;
}
bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false);
@@ -292,6 +280,11 @@ bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
if (I->isTerminator() && llvm::any_of(successors(I), HasPhis))
return false;
+ // Make sure there are no potentially eflags clobbering constant
+ // materializations in between.
+ if (llvm::any_of(I->operands(), [](Value *V) { return isa<Constant>(V); }))
+ return false;
+
CC = TmpCC;
return true;
}
@@ -305,9 +298,9 @@ bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
VT = evt.getSimpleVT();
// For now, require SSE/SSE2 for performing floating-point operations,
// since x87 requires additional work.
- if (VT == MVT::f64 && !X86ScalarSSEf64)
+ if (VT == MVT::f64 && !Subtarget->hasSSE2())
return false;
- if (VT == MVT::f32 && !X86ScalarSSEf32)
+ if (VT == MVT::f32 && !Subtarget->hasSSE1())
return false;
// Similarly, no f80 support yet.
if (VT == MVT::f80)
@@ -325,6 +318,8 @@ bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
bool X86FastISel::X86FastEmitLoad(MVT VT, X86AddressMode &AM,
MachineMemOperand *MMO, unsigned &ResultReg,
unsigned Alignment) {
+ bool HasSSE1 = Subtarget->hasSSE1();
+ bool HasSSE2 = Subtarget->hasSSE2();
bool HasSSE41 = Subtarget->hasSSE41();
bool HasAVX = Subtarget->hasAVX();
bool HasAVX2 = Subtarget->hasAVX2();
@@ -354,20 +349,16 @@ bool X86FastISel::X86FastEmitLoad(MVT VT, X86AddressMode &AM,
Opc = X86::MOV64rm;
break;
case MVT::f32:
- if (X86ScalarSSEf32)
- Opc = HasAVX512 ? X86::VMOVSSZrm_alt :
- HasAVX ? X86::VMOVSSrm_alt :
- X86::MOVSSrm_alt;
- else
- Opc = X86::LD_Fp32m;
+ Opc = HasAVX512 ? X86::VMOVSSZrm_alt
+ : HasAVX ? X86::VMOVSSrm_alt
+ : HasSSE1 ? X86::MOVSSrm_alt
+ : X86::LD_Fp32m;
break;
case MVT::f64:
- if (X86ScalarSSEf64)
- Opc = HasAVX512 ? X86::VMOVSDZrm_alt :
- HasAVX ? X86::VMOVSDrm_alt :
- X86::MOVSDrm_alt;
- else
- Opc = X86::LD_Fp64m;
+ Opc = HasAVX512 ? X86::VMOVSDZrm_alt
+ : HasAVX ? X86::VMOVSDrm_alt
+ : HasSSE2 ? X86::MOVSDrm_alt
+ : X86::LD_Fp64m;
break;
case MVT::f80:
// No f80 support yet.
@@ -521,7 +512,7 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, X86AddressMode &AM,
Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTI_64mr : X86::MOV64mr;
break;
case MVT::f32:
- if (X86ScalarSSEf32) {
+ if (HasSSE1) {
if (IsNonTemporal && HasSSE4A)
Opc = X86::MOVNTSS;
else
@@ -531,7 +522,7 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, X86AddressMode &AM,
Opc = X86::ST_Fp32m;
break;
case MVT::f64:
- if (X86ScalarSSEf32) {
+ if (HasSSE2) {
if (IsNonTemporal && HasSSE4A)
Opc = X86::MOVNTSD;
else
@@ -1362,8 +1353,8 @@ bool X86FastISel::X86SelectLoad(const Instruction *I) {
static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
bool HasAVX512 = Subtarget->hasAVX512();
bool HasAVX = Subtarget->hasAVX();
- bool X86ScalarSSEf32 = Subtarget->hasSSE1();
- bool X86ScalarSSEf64 = Subtarget->hasSSE2();
+ bool HasSSE1 = Subtarget->hasSSE1();
+ bool HasSSE2 = Subtarget->hasSSE2();
switch (VT.getSimpleVT().SimpleTy) {
default: return 0;
@@ -1372,15 +1363,15 @@ static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
case MVT::i32: return X86::CMP32rr;
case MVT::i64: return X86::CMP64rr;
case MVT::f32:
- return X86ScalarSSEf32
- ? (HasAVX512 ? X86::VUCOMISSZrr
- : HasAVX ? X86::VUCOMISSrr : X86::UCOMISSrr)
- : 0;
+ return HasAVX512 ? X86::VUCOMISSZrr
+ : HasAVX ? X86::VUCOMISSrr
+ : HasSSE1 ? X86::UCOMISSrr
+ : 0;
case MVT::f64:
- return X86ScalarSSEf64
- ? (HasAVX512 ? X86::VUCOMISDZrr
- : HasAVX ? X86::VUCOMISDrr : X86::UCOMISDrr)
- : 0;
+ return HasAVX512 ? X86::VUCOMISDZrr
+ : HasAVX ? X86::VUCOMISDrr
+ : HasSSE2 ? X86::UCOMISDrr
+ : 0;
}
}
@@ -2036,7 +2027,7 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) {
/// the select.
bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
// Check if the subtarget supports these instructions.
- if (!Subtarget->hasCMov())
+ if (!Subtarget->canUseCMOV())
return false;
// FIXME: Add support for i8.
@@ -2289,12 +2280,13 @@ bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
default: return false;
case MVT::i8: Opc = X86::CMOV_GR8; break;
case MVT::i16: Opc = X86::CMOV_GR16; break;
- case MVT::f16: Opc = X86::CMOV_FR16X; break;
case MVT::i32: Opc = X86::CMOV_GR32; break;
- case MVT::f32: Opc = Subtarget->hasAVX512() ? X86::CMOV_FR32X
- : X86::CMOV_FR32; break;
- case MVT::f64: Opc = Subtarget->hasAVX512() ? X86::CMOV_FR64X
- : X86::CMOV_FR64; break;
+ case MVT::f16:
+ Opc = Subtarget->hasAVX512() ? X86::CMOV_FR16X : X86::CMOV_FR16; break;
+ case MVT::f32:
+ Opc = Subtarget->hasAVX512() ? X86::CMOV_FR32X : X86::CMOV_FR32; break;
+ case MVT::f64:
+ Opc = Subtarget->hasAVX512() ? X86::CMOV_FR64X : X86::CMOV_FR64; break;
}
const Value *Cond = I->getOperand(0);
@@ -2495,7 +2487,7 @@ bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I,
}
bool X86FastISel::X86SelectFPExt(const Instruction *I) {
- if (X86ScalarSSEf64 && I->getType()->isDoubleTy() &&
+ if (Subtarget->hasSSE2() && I->getType()->isDoubleTy() &&
I->getOperand(0)->getType()->isFloatTy()) {
bool HasAVX512 = Subtarget->hasAVX512();
// fpext from float to double.
@@ -2509,7 +2501,7 @@ bool X86FastISel::X86SelectFPExt(const Instruction *I) {
}
bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
- if (X86ScalarSSEf64 && I->getType()->isFloatTy() &&
+ if (Subtarget->hasSSE2() && I->getType()->isFloatTy() &&
I->getOperand(0)->getType()->isDoubleTy()) {
bool HasAVX512 = Subtarget->hasAVX512();
// fptrunc from double to float.
@@ -3733,25 +3725,23 @@ unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
// Get opcode and regclass of the output for the given load instruction.
unsigned Opc = 0;
+ bool HasSSE1 = Subtarget->hasSSE1();
+ bool HasSSE2 = Subtarget->hasSSE2();
bool HasAVX = Subtarget->hasAVX();
bool HasAVX512 = Subtarget->hasAVX512();
switch (VT.SimpleTy) {
default: return 0;
case MVT::f32:
- if (X86ScalarSSEf32)
- Opc = HasAVX512 ? X86::VMOVSSZrm_alt :
- HasAVX ? X86::VMOVSSrm_alt :
- X86::MOVSSrm_alt;
- else
- Opc = X86::LD_Fp32m;
+ Opc = HasAVX512 ? X86::VMOVSSZrm_alt
+ : HasAVX ? X86::VMOVSSrm_alt
+ : HasSSE1 ? X86::MOVSSrm_alt
+ : X86::LD_Fp32m;
break;
case MVT::f64:
- if (X86ScalarSSEf64)
- Opc = HasAVX512 ? X86::VMOVSDZrm_alt :
- HasAVX ? X86::VMOVSDrm_alt :
- X86::MOVSDrm_alt;
- else
- Opc = X86::LD_Fp64m;
+ Opc = HasAVX512 ? X86::VMOVSDZrm_alt
+ : HasAVX ? X86::VMOVSDrm_alt
+ : HasSSE2 ? X86::MOVSDrm_alt
+ : X86::LD_Fp64m;
break;
case MVT::f80:
// No f80 support yet.
@@ -3852,11 +3842,11 @@ unsigned X86FastISel::fastMaterializeConstant(const Constant *C) {
default:
break;
case MVT::f32:
- if (!X86ScalarSSEf32)
+ if (!Subtarget->hasSSE1())
Opc = X86::LD_Fp032;
break;
case MVT::f64:
- if (!X86ScalarSSEf64)
+ if (!Subtarget->hasSSE2())
Opc = X86::LD_Fp064;
break;
case MVT::f80:
@@ -3907,21 +3897,24 @@ unsigned X86FastISel::fastMaterializeFloatZero(const ConstantFP *CF) {
return 0;
// Get opcode and regclass for the given zero.
+ bool HasSSE1 = Subtarget->hasSSE1();
+ bool HasSSE2 = Subtarget->hasSSE2();
bool HasAVX512 = Subtarget->hasAVX512();
unsigned Opc = 0;
switch (VT.SimpleTy) {
default: return 0;
+ case MVT::f16:
+ Opc = HasAVX512 ? X86::AVX512_FsFLD0SH : X86::FsFLD0SH;
+ break;
case MVT::f32:
- if (X86ScalarSSEf32)
- Opc = HasAVX512 ? X86::AVX512_FsFLD0SS : X86::FsFLD0SS;
- else
- Opc = X86::LD_Fp032;
+ Opc = HasAVX512 ? X86::AVX512_FsFLD0SS
+ : HasSSE1 ? X86::FsFLD0SS
+ : X86::LD_Fp032;
break;
case MVT::f64:
- if (X86ScalarSSEf64)
- Opc = HasAVX512 ? X86::AVX512_FsFLD0SD : X86::FsFLD0SD;
- else
- Opc = X86::LD_Fp064;
+ Opc = HasAVX512 ? X86::AVX512_FsFLD0SD
+ : HasSSE2 ? X86::FsFLD0SD
+ : X86::LD_Fp064;
break;
case MVT::f80:
// No f80 support yet.
diff --git a/llvm/lib/Target/X86/X86FastPreTileConfig.cpp b/llvm/lib/Target/X86/X86FastPreTileConfig.cpp
new file mode 100644
index 000000000000..7e5540022cc8
--- /dev/null
+++ b/llvm/lib/Target/X86/X86FastPreTileConfig.cpp
@@ -0,0 +1,709 @@
+//===-- X86FastPreTileConfig.cpp - Fast Tile Register Configure------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file Pass to preconfig the shape of physical tile registers
+/// It inserts ldtilecfg ahead of each group of tile registers. The algorithm
+/// walk each instruction of basic block in reverse order. All the tile
+/// registers that live out the basic block would be spilled and reloaded
+/// before its user. It also check the depenedency of the shape to ensure
+/// the shape is defined before ldtilecfg.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86RegisterInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "fastpretileconfig"
+
+STATISTIC(NumStores, "Number of stores added");
+STATISTIC(NumLoads, "Number of loads added");
+
+namespace {
+
+class X86FastPreTileConfig : public MachineFunctionPass {
+ MachineFunction *MF = nullptr;
+ const X86Subtarget *ST = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+ X86MachineFunctionInfo *X86FI = nullptr;
+ MachineFrameInfo *MFI = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ MachineBasicBlock *MBB = nullptr;
+ int CfgSS = -1;
+ struct PHIInfo {
+ Register Row;
+ Register Col;
+ Register StackAddr;
+ };
+ DenseMap<MachineInstr *, struct PHIInfo> VisitedPHIs;
+
+ /// Maps virtual regs to the frame index where these values are spilled.
+ IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg;
+
+ /// Has a bit set for tile virtual register for which it was determined
+ /// that it is alive across blocks.
+ BitVector MayLiveAcrossBlocks;
+
+ int getStackSpaceFor(Register VirtReg);
+ void InitializeTileConfigStackSpace();
+ bool mayLiveOut(Register VirtReg, MachineInstr *CfgMI);
+ void spill(MachineBasicBlock::iterator Before, Register VirtReg, bool Kill);
+ void reload(MachineBasicBlock::iterator UseMI, Register VirtReg,
+ MachineOperand *RowMO, MachineOperand *ColMO);
+ void canonicalizePHIs(MachineBasicBlock &MBB);
+ void convertPHI(MachineBasicBlock *MBB, MachineInstr &PHI);
+ void convertPHIs(MachineBasicBlock &MBB);
+ bool configBasicBlock(MachineBasicBlock &MBB);
+
+public:
+ X86FastPreTileConfig() : MachineFunctionPass(ID), StackSlotForVirtReg(-1) {}
+
+ /// Return the pass name.
+ StringRef getPassName() const override {
+ return "Fast Tile Register Preconfigure";
+ }
+
+ /// Perform tile register configure.
+ bool runOnMachineFunction(MachineFunction &MFunc) override;
+
+ static char ID;
+};
+
+} // end anonymous namespace
+
+char X86FastPreTileConfig::ID = 0;
+
+INITIALIZE_PASS_BEGIN(X86FastPreTileConfig, DEBUG_TYPE,
+ "Fast Tile Register Preconfigure", false, false)
+INITIALIZE_PASS_END(X86FastPreTileConfig, DEBUG_TYPE,
+ "Fast Tile Register Preconfigure", false, false)
+
+static bool dominates(MachineBasicBlock &MBB,
+ MachineBasicBlock::const_iterator A,
+ MachineBasicBlock::const_iterator B) {
+ auto MBBEnd = MBB.end();
+ if (B == MBBEnd)
+ return true;
+
+ MachineBasicBlock::const_iterator I = MBB.begin();
+ for (; &*I != A && &*I != B; ++I)
+ ;
+
+ return &*I == A;
+}
+
+/// This allocates space for the specified virtual register to be held on the
+/// stack.
+int X86FastPreTileConfig::getStackSpaceFor(Register VirtReg) {
+ // Find the location Reg would belong...
+ int SS = StackSlotForVirtReg[VirtReg];
+ // Already has space allocated?
+ if (SS != -1)
+ return SS;
+
+ // Allocate a new stack object for this spill location...
+ const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
+ unsigned Size = TRI->getSpillSize(RC);
+ Align Alignment = TRI->getSpillAlign(RC);
+ int FrameIdx = MFI->CreateSpillStackObject(Size, Alignment);
+
+ // Assign the slot.
+ StackSlotForVirtReg[VirtReg] = FrameIdx;
+ return FrameIdx;
+}
+
+/// Returns false if \p VirtReg is known to not live out of the current config.
+/// If \p VirtReg live out of the current MBB, it must live out of the current
+/// config
+bool X86FastPreTileConfig::mayLiveOut(Register VirtReg, MachineInstr *CfgMI) {
+ if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg)))
+ return true;
+
+ for (const MachineInstr &UseInst : MRI->use_nodbg_instructions(VirtReg)) {
+ if (UseInst.getParent() != MBB) {
+ MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
+ return true;
+ }
+
+ // The use and def are in the same MBB. If the tile register is
+ // reconfigured, it is crobbered and we need to spill and reload
+ // tile register.
+ if (CfgMI) {
+ if (dominates(*MBB, *CfgMI, UseInst)) {
+ MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+void X86FastPreTileConfig::InitializeTileConfigStackSpace() {
+ MachineBasicBlock &MBB = MF->front();
+ MachineInstr *MI = &*MBB.getFirstNonPHI();
+ DebugLoc DL;
+ if (ST->hasAVX512()) {
+ Register Zmm = MRI->createVirtualRegister(&X86::VR512RegClass);
+ BuildMI(MBB, MI, DL, TII->get(X86::AVX512_512_SET0), Zmm);
+ addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSZmr)), CfgSS)
+ .addReg(Zmm);
+ } else if (ST->hasAVX2()) {
+ Register Ymm = MRI->createVirtualRegister(&X86::VR256RegClass);
+ BuildMI(MBB, MI, DL, TII->get(X86::AVX_SET0), Ymm);
+ addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), CfgSS)
+ .addReg(Ymm);
+ addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), CfgSS,
+ 32)
+ .addReg(Ymm);
+ } else {
+ assert(ST->hasSSE2() && "AMX should assume SSE2 enabled");
+ unsigned StoreOpc = ST->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;
+ Register Xmm = MRI->createVirtualRegister(&X86::VR128RegClass);
+ BuildMI(MBB, MI, DL, TII->get(X86::V_SET0), Xmm);
+ addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS)
+ .addReg(Xmm);
+ addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 16)
+ .addReg(Xmm);
+ addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 32)
+ .addReg(Xmm);
+ addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 48)
+ .addReg(Xmm);
+ }
+ // Fill in the palette first.
+ addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV8mi)), CfgSS)
+ .addImm(1);
+}
+
+/// Insert spill instruction for \p AssignedReg before \p Before.
+/// TODO: Update DBG_VALUEs with \p VirtReg operands with the stack slot.
+void X86FastPreTileConfig::spill(MachineBasicBlock::iterator Before,
+ Register VirtReg, bool Kill) {
+ LLVM_DEBUG(dbgs() << "Spilling " << printReg(VirtReg, TRI) << " \n");
+ int FI = getStackSpaceFor(VirtReg);
+ LLVM_DEBUG(dbgs() << " to stack slot #" << FI << '\n');
+
+ const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
+ // Don't need shape information for tile store, becasue it is adjacent to
+ // the tile def instruction.
+ TII->storeRegToStackSlot(*MBB, Before, VirtReg, Kill, FI, &RC, TRI);
+ ++NumStores;
+
+ // TODO: update DBG_VALUEs
+}
+
+/// Insert reload instruction for \p PhysReg before \p Before.
+void X86FastPreTileConfig::reload(MachineBasicBlock::iterator UseMI,
+ Register OrigReg, MachineOperand *RowMO,
+ MachineOperand *ColMO) {
+ int FI = getStackSpaceFor(OrigReg);
+ const TargetRegisterClass &RC = *MRI->getRegClass(OrigReg);
+ Register TileReg;
+ // Fold copy to tileload
+ // BB1:
+ // spill src to s
+ //
+ // BB2:
+ // t = copy src
+ // -->
+ // t = tileload (s)
+ if (UseMI->isCopy())
+ TileReg = UseMI->getOperand(0).getReg();
+ else
+ TileReg = MRI->createVirtualRegister(&RC);
+ // Can't use TII->loadRegFromStackSlot(), because we need the shape
+ // information for reload.
+ // tileloadd (%sp, %idx), %tmm
+ unsigned Opc = X86::PTILELOADDV;
+ Register StrideReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
+ // FIXME: MBB is not the parent of UseMI.
+ MachineInstr *NewMI = BuildMI(*UseMI->getParent(), UseMI, DebugLoc(),
+ TII->get(X86::MOV64ri), StrideReg)
+ .addImm(64);
+ NewMI = addFrameReference(
+ BuildMI(*UseMI->getParent(), UseMI, DebugLoc(), TII->get(Opc), TileReg)
+ .addReg(RowMO->getReg())
+ .addReg(ColMO->getReg()),
+ FI);
+ MachineOperand &MO = NewMI->getOperand(5);
+ MO.setReg(StrideReg);
+ MO.setIsKill(true);
+ RowMO->setIsKill(false);
+ ColMO->setIsKill(false);
+ // Erase copy instruction after it is folded.
+ if (UseMI->isCopy()) {
+ UseMI->eraseFromParent();
+ } else {
+ // Replace the register in the user MI.
+ for (auto &MO : UseMI->operands()) {
+ if (MO.isReg() && MO.getReg() == OrigReg)
+ MO.setReg(TileReg);
+ }
+ }
+
+ ++NumLoads;
+ LLVM_DEBUG(dbgs() << "Reloading " << printReg(OrigReg, TRI) << " into "
+ << printReg(TileReg, TRI) << '\n');
+}
+
+static bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI) {
+ // The instruction must have 3 operands: tile def, row, col.
+ if (MI.isDebugInstr() || MI.getNumOperands() < 3 || !MI.isPseudo())
+ return false;
+ MachineOperand &MO = MI.getOperand(0);
+
+ if (MO.isReg()) {
+ Register Reg = MO.getReg();
+ // FIXME it may be used after Greedy RA and the physical
+ // register is not rewritten yet.
+ if (Reg.isVirtual() &&
+ MRI->getRegClass(Reg)->getID() == X86::TILERegClassID)
+ return true;
+ if (Reg >= X86::TMM0 && Reg <= X86::TMM7)
+ return true;
+ }
+
+ return false;
+}
+
+static ShapeT getShape(MachineRegisterInfo *MRI, Register TileReg) {
+ MachineInstr *MI = MRI->getVRegDef(TileReg);
+ if (isTileDef(MRI, *MI)) {
+ MachineOperand *RowMO = &MI->getOperand(1);
+ MachineOperand *ColMO = &MI->getOperand(2);
+ return ShapeT(RowMO, ColMO, MRI);
+ } else if (MI->isCopy()) {
+ TileReg = MI->getOperand(1).getReg();
+ return getShape(MRI, TileReg);
+ }
+
+ // The def should not be PHI node, because we walk the MBB in reverse post
+ // order.
+ assert(MI->isPHI() && "Unexpected PHI when get shape.");
+ llvm_unreachable("Unexpected MI when get shape.");
+}
+
+// BB0:
+// spill t0 to s0
+// BB1:
+// spill t1 to s1
+//
+// BB2:
+// t = phi [t0, bb0] [t1, bb1]
+// -->
+// row = phi [r0, bb0] [r1, bb1]
+// col = phi [c0, bb0] [c1, bb1]
+// s = phi [s0, bb0] [s1, bb1]
+// t = tileload row, col, s
+// The new instruction is inserted at the end of the phi node. The order
+// of the original phi node is not ensured.
+void X86FastPreTileConfig::convertPHI(MachineBasicBlock *MBB,
+ MachineInstr &PHI) {
+ // 1. Create instruction to get stack slot address of each incoming block.
+ // 2. Create PHI node for the stack address.
+ // 3. Create PHI node for shape. If one of the incoming shape is immediate
+ // use the immediate and delete the PHI node.
+ // 4. Create tileload instruction from the stack address.
+ Register StackAddrReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
+ MachineInstrBuilder AddrPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),
+ TII->get(X86::PHI), StackAddrReg);
+ Register RowReg = MRI->createVirtualRegister(&X86::GR16RegClass);
+ MachineInstrBuilder RowPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),
+ TII->get(X86::PHI), RowReg);
+ Register ColReg = MRI->createVirtualRegister(&X86::GR16RegClass);
+ MachineInstrBuilder ColPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),
+ TII->get(X86::PHI), ColReg);
+ // Record the mapping of phi node and its row/column information.
+ VisitedPHIs[&PHI] = {RowReg, ColReg, StackAddrReg};
+
+ for (unsigned I = 1, E = PHI.getNumOperands(); I != E; I += 2) {
+ // Get the 2 incoming value of tile register and MBB.
+ Register InTileReg = PHI.getOperand(I).getReg();
+ // Mark it as liveout, so that it will be spilled when visit
+ // the incoming MBB. Otherwise since phi will be deleted, it
+ // would miss spill when visit incoming MBB.
+ MayLiveAcrossBlocks.set(Register::virtReg2Index(InTileReg));
+ MachineBasicBlock *InMBB = PHI.getOperand(I + 1).getMBB();
+
+ MachineInstr *TileDefMI = MRI->getVRegDef(InTileReg);
+ MachineBasicBlock::iterator InsertPos;
+ if (TileDefMI->isPHI()) {
+ InsertPos = TileDefMI->getParent()->getFirstNonPHI();
+ if (VisitedPHIs.count(TileDefMI)) { // circular phi reference
+ // def t1
+ // / \
+ // def t2 t3 = phi(t1, t4) <--
+ // \ / |
+ // t4 = phi(t2, t3)-------------
+ //
+ // For each (row, column and stack address) append phi incoming value.
+ // Create r3 = phi(r1, r4)
+ // Create r4 = phi(r2, r3)
+ Register InRowReg = VisitedPHIs[TileDefMI].Row;
+ Register InColReg = VisitedPHIs[TileDefMI].Col;
+ Register InStackAddrReg = VisitedPHIs[TileDefMI].StackAddr;
+ RowPHI.addReg(InRowReg).addMBB(InMBB);
+ ColPHI.addReg(InColReg).addMBB(InMBB);
+ AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);
+ continue;
+ } else {
+ // Recursively convert PHI to tileload
+ convertPHI(TileDefMI->getParent(), *TileDefMI);
+ // The PHI node is coverted to tileload instruction. Get the stack
+ // address from tileload operands.
+ MachineInstr *TileLoad = MRI->getVRegDef(InTileReg);
+ assert(TileLoad && TileLoad->getOpcode() == X86::PTILELOADDV);
+ Register InRowReg = TileLoad->getOperand(1).getReg();
+ Register InColReg = TileLoad->getOperand(2).getReg();
+ Register InStackAddrReg = TileLoad->getOperand(3).getReg();
+ RowPHI.addReg(InRowReg).addMBB(InMBB);
+ ColPHI.addReg(InColReg).addMBB(InMBB);
+ AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);
+ }
+ } else {
+ InsertPos = TileDefMI->getIterator();
+
+ // Fill the incoming operand of row/column phi instruction.
+ ShapeT Shape = getShape(MRI, InTileReg);
+ Shape.getRow()->setIsKill(false);
+ Shape.getCol()->setIsKill(false);
+ RowPHI.addReg(Shape.getRow()->getReg()).addMBB(InMBB);
+ ColPHI.addReg(Shape.getCol()->getReg()).addMBB(InMBB);
+
+ // The incoming tile register live out of its def BB, it would be spilled.
+ // Create MI to get the spill stack slot address for the tile register
+ int FI = getStackSpaceFor(InTileReg);
+ Register InStackAddrReg =
+ MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
+ addOffset(BuildMI(*TileDefMI->getParent(), InsertPos, DebugLoc(),
+ TII->get(X86::LEA64r), InStackAddrReg)
+ .addFrameIndex(FI),
+ 0);
+ AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);
+ }
+ }
+
+ MachineBasicBlock::iterator InsertPos = MBB->getFirstNonPHI();
+ Register StrideReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
+ BuildMI(*MBB, InsertPos, DebugLoc(), TII->get(X86::MOV64ri), StrideReg)
+ .addImm(64);
+ Register TileReg = PHI.getOperand(0).getReg();
+ MachineInstr *NewMI = addDirectMem(
+ BuildMI(*MBB, InsertPos, DebugLoc(), TII->get(X86::PTILELOADDV), TileReg)
+ .addReg(RowReg)
+ .addReg(ColReg),
+ StackAddrReg);
+ MachineOperand &MO = NewMI->getOperand(5);
+ MO.setReg(StrideReg);
+ MO.setIsKill(true);
+ PHI.eraseFromParent();
+ VisitedPHIs.erase(&PHI);
+}
+
+static bool isTileRegDef(MachineRegisterInfo *MRI, MachineInstr &MI) {
+ MachineOperand &MO = MI.getOperand(0);
+ if (MO.isReg() && MO.getReg().isVirtual() &&
+ MRI->getRegClass(MO.getReg())->getID() == X86::TILERegClassID)
+ return true;
+ return false;
+}
+
+void X86FastPreTileConfig::canonicalizePHIs(MachineBasicBlock &MBB) {
+ SmallVector<MachineInstr *, 8> PHIs;
+
+ for (MachineInstr &MI : MBB) {
+ if (!MI.isPHI())
+ break;
+ if (!isTileRegDef(MRI, MI))
+ continue;
+ PHIs.push_back(&MI);
+ }
+ // Canonicalize the phi node first. One tile phi may depeneds previous
+ // phi node. For below case, we need convert %t4.
+ //
+ // BB0:
+ // %t3 = phi (t1 BB1, t2 BB0)
+ // %t4 = phi (t5 BB1, t3 BB0)
+ // -->
+ // %t3 = phi (t1 BB1, t2 BB0)
+ // %t4 = phi (t5 BB1, t2 BB0)
+ //
+ while (!PHIs.empty()) {
+ MachineInstr *PHI = PHIs.pop_back_val();
+
+ // Find the operand that is incoming from the same MBB and the def
+ // is also phi node.
+ MachineOperand *InMO = nullptr;
+ MachineInstr *DefMI = nullptr;
+ for (unsigned I = 1, E = PHI->getNumOperands(); I != E; I += 2) {
+ Register InTileReg = PHI->getOperand(I).getReg();
+ MachineBasicBlock *InMBB = PHI->getOperand(I + 1).getMBB();
+ DefMI = MRI->getVRegDef(InTileReg);
+ if (InMBB != &MBB || !DefMI->isPHI())
+ continue;
+
+ InMO = &PHI->getOperand(I);
+ break;
+ }
+ // If can't find such operand, do nothing.
+ if (!InMO)
+ continue;
+
+ // Current phi node depends on previous phi node. Break the
+ // dependency.
+ Register DefTileReg;
+ for (unsigned I = 1, E = DefMI->getNumOperands(); I != E; I += 2) {
+ MachineBasicBlock *InMBB = PHI->getOperand(I + 1).getMBB();
+ if (InMBB != &MBB)
+ continue;
+ DefTileReg = DefMI->getOperand(I).getReg();
+ InMO->setReg(DefTileReg);
+ break;
+ }
+ }
+}
+
+void X86FastPreTileConfig::convertPHIs(MachineBasicBlock &MBB) {
+ SmallVector<MachineInstr *, 8> PHIs;
+ for (MachineInstr &MI : MBB) {
+ if (!MI.isPHI())
+ break;
+ if (!isTileRegDef(MRI, MI))
+ continue;
+ PHIs.push_back(&MI);
+ }
+ while (!PHIs.empty()) {
+ MachineInstr *MI = PHIs.pop_back_val();
+ VisitedPHIs.clear();
+ convertPHI(&MBB, *MI);
+ }
+}
+
+// PreTileConfig should configure the tile registers based on basic
+// block.
+bool X86FastPreTileConfig::configBasicBlock(MachineBasicBlock &MBB) {
+ this->MBB = &MBB;
+ bool Change = false;
+ MachineInstr *LastShapeMI = nullptr;
+ MachineInstr *LastTileCfg = nullptr;
+ bool HasUnconfigTile = false;
+
+ auto Config = [&](MachineInstr &Before) {
+ if (CfgSS == -1)
+ CfgSS = MFI->CreateStackObject(ST->getTileConfigSize(),
+ ST->getTileConfigAlignment(), false);
+ LastTileCfg = addFrameReference(
+ BuildMI(MBB, Before, DebugLoc(), TII->get(X86::PLDTILECFGV)), CfgSS);
+ LastShapeMI = nullptr;
+ Change = true;
+ };
+ auto HasTileOperand = [](MachineRegisterInfo *MRI, MachineInstr &MI) {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (Reg.isVirtual() &&
+ MRI->getRegClass(Reg)->getID() == X86::TILERegClassID)
+ return true;
+ }
+ return false;
+ };
+ for (MachineInstr &MI : reverse(MBB)) {
+ // We have transformed phi node before configuring BB.
+ if (MI.isPHI())
+ break;
+ // Don't collect the shape of used tile, the tile should be defined
+ // before the tile use. Spill and reload would happen if there is only
+ // tile use after ldtilecfg, so the shape can be collected from reload.
+ // Take below code for example. %t would be reloaded before tilestore
+ // call
+ // ....
+ // tilestore %r, %c, %t
+ // -->
+ // call
+ // ldtilecfg
+ // %t = tileload %r, %c
+ // tilestore %r, %c, %t
+ if (HasTileOperand(MRI, MI))
+ HasUnconfigTile = true;
+ // According to AMX ABI, all the tile registers including config register
+ // are volatile. Caller need to save/restore config register.
+ if (MI.isCall() && HasUnconfigTile) {
+ MachineBasicBlock::iterator I;
+ if (LastShapeMI && dominates(MBB, MI, LastShapeMI))
+ I = ++LastShapeMI->getIterator();
+ else
+ I = ++MI.getIterator();
+ Config(*I);
+ HasUnconfigTile = false;
+ continue;
+ }
+ if (!isTileDef(MRI, MI))
+ continue;
+ //
+ //---------------------------------------------------------------------
+ // Don't handle COPY instruction. If the src and dst of the COPY can be
+ // in the same config in below case, we just check the shape of t0.
+ // def row0
+ // def col0
+ // ldtilecfg
+ // t0 = tielzero(row0, col0)
+ // t1 = copy t0
+ // ...
+ // If the src and dst of the COPY can NOT be in the same config in below
+ // case. Reload would be generated befor the copy instruction.
+ // def row0
+ // def col0
+ // t0 = tielzero(row0, col0)
+ // spill t0
+ // ...
+ // def row1
+ // def col1
+ // ldtilecfg
+ // t1 = tilezero(row1, col1)
+ // reload t0
+ // t1 = copy t0
+ //---------------------------------------------------------------------
+ //
+ // If MI dominate the last shape def instruction, we need insert
+ // ldtilecfg after LastShapeMI now. The config doesn't include
+ // current MI.
+ // def row0
+ // def col0
+ // tilezero(row0, col0) <- MI
+ // def row1
+ // def col1
+ // ldtilecfg <- insert
+ // tilezero(row1, col1)
+ if (LastShapeMI && dominates(MBB, MI, LastShapeMI))
+ Config(*(++LastShapeMI->getIterator()));
+ MachineOperand *RowMO = &MI.getOperand(1);
+ MachineOperand *ColMO = &MI.getOperand(2);
+ MachineInstr *RowMI = MRI->getVRegDef(RowMO->getReg());
+ MachineInstr *ColMI = MRI->getVRegDef(ColMO->getReg());
+ // If the shape is defined in current MBB, check the domination.
+ // FIXME how about loop?
+ if (RowMI->getParent() == &MBB) {
+ if (!LastShapeMI)
+ LastShapeMI = RowMI;
+ else if (dominates(MBB, LastShapeMI, RowMI))
+ LastShapeMI = RowMI;
+ }
+ if (ColMI->getParent() == &MBB) {
+ if (!LastShapeMI)
+ LastShapeMI = ColMI;
+ else if (dominates(MBB, LastShapeMI, ColMI))
+ LastShapeMI = ColMI;
+ }
+ // If there is user live out of the tilecfg, spill it and reload in
+ // before the user.
+ Register TileReg = MI.getOperand(0).getReg();
+ if (mayLiveOut(TileReg, LastTileCfg))
+ spill(++MI.getIterator(), TileReg, false);
+ for (MachineInstr &UseMI : MRI->use_instructions(TileReg)) {
+ if (UseMI.getParent() == &MBB) {
+ // check user should not across ldtilecfg
+ if (!LastTileCfg || !dominates(MBB, LastTileCfg, UseMI))
+ continue;
+ // reload befor UseMI
+ reload(UseMI.getIterator(), TileReg, RowMO, ColMO);
+ } else {
+ // Don't reload for phi instruction, we handle phi reload separately.
+ // TODO: merge the reload for the same user MBB.
+ if (!UseMI.isPHI())
+ reload(UseMI.getIterator(), TileReg, RowMO, ColMO);
+ }
+ }
+ }
+
+ // Configure tile registers at the head of the MBB
+ if (HasUnconfigTile) {
+ MachineInstr *Before;
+ if (LastShapeMI == nullptr || LastShapeMI->isPHI())
+ Before = &*MBB.getFirstNonPHI();
+ else
+ Before = &*(++LastShapeMI->getIterator());
+
+ Config(*Before);
+ }
+
+ return Change;
+}
+
+bool X86FastPreTileConfig::runOnMachineFunction(MachineFunction &MFunc) {
+ MF = &MFunc;
+ MRI = &MFunc.getRegInfo();
+ ST = &MFunc.getSubtarget<X86Subtarget>();
+ TII = ST->getInstrInfo();
+ X86FI = MFunc.getInfo<X86MachineFunctionInfo>();
+ MFI = &MFunc.getFrameInfo();
+ TRI = ST->getRegisterInfo();
+ CfgSS = -1;
+
+ unsigned NumVirtRegs = MRI->getNumVirtRegs();
+ // Abandon early if there is no tile register to config.
+ bool HasVirtTileReg = false;
+ for (unsigned I = 0, E = NumVirtRegs; I != E; ++I) {
+ Register VirtReg = Register::index2VirtReg(I);
+ if (MRI->getRegClass(VirtReg)->getID() == X86::TILERegClassID) {
+ HasVirtTileReg = true;
+ break;
+ }
+ }
+ if (!HasVirtTileReg)
+ return false;
+
+ StackSlotForVirtReg.resize(NumVirtRegs);
+ MayLiveAcrossBlocks.clear();
+ // We will create register during config. *3 is to make sure
+ // the virtual register number doesn't exceed the size of
+ // the bit vector.
+ MayLiveAcrossBlocks.resize(NumVirtRegs * 3);
+ bool Change = false;
+ assert(MRI->isSSA());
+
+ // Canonicalize the phi node first.
+ for (MachineBasicBlock &MBB : MFunc)
+ canonicalizePHIs(MBB);
+
+ // Loop over all of the basic blocks in reverse post order and insert
+ // ldtilecfg for tile registers. The reserse post order is to facilitate
+ // PHI node convert.
+ ReversePostOrderTraversal<MachineFunction *> RPOT(MF);
+ for (MachineBasicBlock *MBB : RPOT) {
+ convertPHIs(*MBB);
+ Change |= configBasicBlock(*MBB);
+ }
+
+ if (Change)
+ InitializeTileConfigStackSpace();
+
+ StackSlotForVirtReg.clear();
+ return Change;
+}
+
+FunctionPass *llvm::createX86FastPreTileConfigPass() {
+ return new X86FastPreTileConfig();
+}
diff --git a/llvm/lib/Target/X86/X86FastTileConfig.cpp b/llvm/lib/Target/X86/X86FastTileConfig.cpp
index 061fff50bcea..2a20cd13791d 100644
--- a/llvm/lib/Target/X86/X86FastTileConfig.cpp
+++ b/llvm/lib/Target/X86/X86FastTileConfig.cpp
@@ -40,40 +40,25 @@ namespace {
class X86FastTileConfig : public MachineFunctionPass {
// context
MachineFunction *MF = nullptr;
- const X86Subtarget *ST = nullptr;
- const TargetRegisterInfo *TRI = nullptr;
const TargetInstrInfo *TII = nullptr;
MachineRegisterInfo *MRI = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
X86MachineFunctionInfo *X86FI = nullptr;
- MachineInstr *getTileConfigPoint();
- void tileConfig();
+ bool configBasicBlock(MachineBasicBlock &MBB);
public:
X86FastTileConfig() : MachineFunctionPass(ID) {}
- bool fastTileConfig();
- bool isTileLoad(MachineInstr &MI);
- bool isTileStore(MachineInstr &MI);
- bool isAMXInstr(MachineInstr &MI);
-
- MachineInstr *getKeyAMXInstr(MachineInstr *MI);
- void getTileShapesCfg(MachineInstr *MI,
- SmallVector<MachineOperand *> &ShapedTiles);
- void getShapeCfgInstrs(MachineInstr *MI,
- std::map<unsigned, MachineInstr *> &RowCfgs,
- std::map<unsigned, MachineInstr *> &ColCfgs);
-
/// Return the pass name.
StringRef getPassName() const override {
return "Fast Tile Register Configure";
}
- void materializeTileCfg(MachineInstr *MI);
-
- void rewriteTileCfg(SmallVector<MachineOperand *> &ShapedTiles,
- std::map<unsigned, MachineInstr *> &RowCfgs,
- std::map<unsigned, MachineInstr *> &ColCfgs);
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
/// Perform register allocation.
bool runOnMachineFunction(MachineFunction &MFunc) override;
@@ -95,209 +80,107 @@ INITIALIZE_PASS_BEGIN(X86FastTileConfig, DEBUG_TYPE,
INITIALIZE_PASS_END(X86FastTileConfig, DEBUG_TYPE,
"Fast Tile Register Configure", false, false)
-static bool isTilePhysReg(MachineOperand &Op) {
- if (!Op.isReg())
+static bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI) {
+ // There is no phi instruction after register allocation.
+ assert(MI.isPHI() == false);
+ // The instruction must have 3 operands: tile def, row, col.
+ // It should be AMX pseudo instruction that have shape operand.
+ if (MI.isDebugInstr() || MI.isCopy() || MI.getNumOperands() < 3 ||
+ !MI.isPseudo())
return false;
+ MachineOperand &MO = MI.getOperand(0);
+
+ if (MO.isReg()) {
+ Register Reg = MO.getReg();
+ // FIXME it may be used after Greedy RA and the physical
+ // register is not rewritten yet.
+ if (Reg.isVirtual() &&
+ MRI->getRegClass(Reg)->getID() == X86::TILERegClassID)
+ return true;
+ if (Reg >= X86::TMM0 && Reg <= X86::TMM7)
+ return true;
+ }
- Register Reg = Op.getReg();
- if (Reg >= X86::TMM0 && Reg <= X86::TMM7)
- return true;
return false;
}
-static unsigned getTilePhysRegIdx(MachineOperand *Op) {
- assert(isTilePhysReg(*Op) && "Tile Operand is invalid");
- return Op->getReg() - X86::TMM0;
-}
-
-static inline void adjustRowCfg(unsigned TIdx, MachineInstr *MI) {
- unsigned Offset = 48 + TIdx;
- MI->getOperand(3).ChangeToImmediate(Offset);
-}
-
-static inline void adjustColCfg(unsigned TIdx, MachineInstr *MI) {
- unsigned Offset = 16 + TIdx * 2;
- MI->getOperand(3).ChangeToImmediate(Offset);
-}
-
-bool X86FastTileConfig::isTileLoad(MachineInstr &MI) {
- return MI.getOpcode() == X86::PTILELOADDV ||
- MI.getOpcode() == X86::PTILELOADDT1V;
-}
-bool X86FastTileConfig::isTileStore(MachineInstr &MI) {
- return MI.getOpcode() == X86::PTILESTOREDV;
-}
-bool X86FastTileConfig::isAMXInstr(MachineInstr &MI) {
- // TODO: May need to handle some special nontile amx instrucion.
- if (MI.getOpcode() == X86::PLDTILECFGV || MI.isDebugInstr())
- return false;
-
- return llvm::any_of(MI.operands(), isTilePhysReg);
-}
-
-MachineInstr *X86FastTileConfig::getKeyAMXInstr(MachineInstr *MI) {
- auto Cfg = MachineBasicBlock::iterator(MI);
- MachineBasicBlock *MBB = MI->getParent();
- MachineInstr *KeyMI = nullptr;
- int KeyAMXNum = 0;
-
- for (auto II = Cfg; II != MBB->end(); II++) {
- if (isTileLoad(*II)) {
- KeyMI = &*II;
+// PreTileConfig should configure the tile registers based on basic
+// block.
+bool X86FastTileConfig::configBasicBlock(MachineBasicBlock &MBB) {
+ bool Change = false;
+ SmallVector<std::pair<unsigned, ShapeT>, 6> ShapeInfos;
+ for (MachineInstr &MI : reverse(MBB)) {
+ if (!isTileDef(MRI, MI) && MI.getOpcode() != X86::PLDTILECFGV)
continue;
+ // AMX instructions that define tile register.
+ if (MI.getOpcode() != X86::PLDTILECFGV) {
+ MachineOperand &Row = MI.getOperand(1);
+ MachineOperand &Col = MI.getOperand(2);
+ unsigned TMMIdx = MI.getOperand(0).getReg() - X86::TMM0;
+ ShapeInfos.push_back({TMMIdx, ShapeT(&Row, &Col)});
+ } else { // PLDTILECFGV
+ // Rewrite the shape information to memory. Stack slot should have
+ // been initialized to zero in pre config.
+ int SS = MI.getOperand(0).getIndex(); // tile config stack slot.
+ for (auto &ShapeInfo : ShapeInfos) {
+ DebugLoc DL;
+ unsigned TMMIdx = ShapeInfo.first;
+ Register RowReg = ShapeInfo.second.getRow()->getReg();
+ Register ColReg = ShapeInfo.second.getCol()->getReg();
+ // Here is the data format for the tile config.
+ // 0 palette
+ // 1 start_row
+ // 2-15 reserved, must be zero
+ // 16-17 tile0.colsb Tile 0 bytes per row.
+ // 18-19 tile1.colsb Tile 1 bytes per row.
+ // 20-21 tile2.colsb Tile 2 bytes per row.
+ // ... (sequence continues)
+ // 30-31 tile7.colsb Tile 7 bytes per row.
+ // 32-47 reserved, must be zero
+ // 48 tile0.rows Tile 0 rows.
+ // 49 tile1.rows Tile 1 rows.
+ // 50 tile2.rows Tile 2 rows.
+ // ... (sequence continues)
+ // 55 tile7.rows Tile 7 rows.
+ // 56-63 reserved, must be zero
+ int RowOffset = 48 + TMMIdx;
+ int ColOffset = 16 + TMMIdx * 2;
+
+ Register SubRowReg = TRI->getSubReg(RowReg, X86::sub_8bit);
+ BuildMI(MBB, MI, DL, TII->get(X86::IMPLICIT_DEF), SubRowReg);
+ MachineInstrBuilder StoreRow =
+ BuildMI(MBB, MI, DL, TII->get(X86::MOV8mr));
+ addFrameReference(StoreRow, SS, RowOffset).addReg(SubRowReg);
+
+ MachineInstrBuilder StoreCol =
+ BuildMI(MBB, MI, DL, TII->get(X86::MOV16mr));
+ addFrameReference(StoreCol, SS, ColOffset).addReg(ColReg);
+ }
+ ShapeInfos.clear();
+ Change = true;
}
-
- if (isTileStore(*II)) {
- assert(KeyMI && "Key AMX Should be found before!");
- break;
- }
-
- if (isAMXInstr(*II)) {
- assert((KeyAMXNum == 0) && "Too many Key AMX instruction!");
- KeyAMXNum++;
- KeyMI = &*II;
- }
- }
- assert(KeyMI && "There must be an AMX instruction.");
- return KeyMI;
-}
-
-// Orderly get the tiles in key amx instruction, uses before defs.
-void X86FastTileConfig::getTileShapesCfg(
- MachineInstr *CfgMI, SmallVector<MachineOperand *> &ShapedTiles) {
- MachineInstr *KeyMI = getKeyAMXInstr(CfgMI);
-
- SmallVector<MachineOperand *> DefTiles;
- for (MachineOperand &MO : KeyMI->operands()) {
- if (!isTilePhysReg(MO))
- continue;
- if (MO.isDef())
- DefTiles.push_back(&MO);
- else
- ShapedTiles.push_back(&MO);
- }
- ShapedTiles.append(DefTiles);
-}
-
-// We pre-config the shapes at position named with "amx.tmm.N.shape.row* and
-// amx.shape.N.col*" at pass "Pre AMX Tile Config".
-// The 'N' implies the order of tiles in key amx intrinsic.
-void X86FastTileConfig::getShapeCfgInstrs(
- MachineInstr *MI, std::map<unsigned, MachineInstr *> &RowCfgs,
- std::map<unsigned, MachineInstr *> &ColCfgs) {
- auto Cfg = MachineBasicBlock::iterator(MI);
- MachineBasicBlock *MBB = MI->getParent();
-
- for (auto II = Cfg; II != MBB->begin(); II--) {
- if (isAMXInstr(*II) || II->isTerminator() || II->isCall())
- break;
- if (!II->mayStore() || !II->hasOneMemOperand())
- continue;
- const Value *MemPtr = II->memoperands()[0]->getValue();
- if (!MemPtr)
- continue;
-
- StringRef Name = MemPtr->getName();
- if (!Name.startswith("amx.tmm."))
- continue;
-
- // Get the 'N'th tile shape config in key amx instruction.
- auto N = Name.find(".shape");
- StringRef STileIdx = Name.slice(8, N);
- unsigned Idx;
- STileIdx.getAsInteger(10, Idx);
-
- // And related them with their store instructions.
- if (Name.contains("row"))
- RowCfgs[Idx] = &*II;
- else if (Name.contains("col"))
- ColCfgs[Idx] = &*II;
- else
- llvm_unreachable("Invalid tile shape info!");
}
- assert((RowCfgs.size() == ColCfgs.size()) &&
- "The number of tile row and col must be equal!");
-}
-
-// Here is the data format for the tile config.
-// 0 palette = 1 now.
-// 1 start_row = 0 now.
-// 2-15 reserved, must be zero
-// 16-17 tile0.colsb Tile 0 bytes per row.
-// 18-19 tile1.colsb Tile 1 bytes per row.
-// 20-21 tile2.colsb Tile 2 bytes per row.
-// ... (sequence continues)
-// 30-31 tile7.colsb Tile 7 bytes per row.
-// 32-47 reserved, must be zero
-// 48 tile0.rows Tile 0 rows.
-// 49 tile1.rows Tile 1 rows.
-// 50 tile2.rows Tile 2 rows.
-// ... (sequence continues)
-// 55 tile7.rows Tile 7 rows.
-// 56-63 reserved, must be zero
-void X86FastTileConfig::rewriteTileCfg(
- SmallVector<MachineOperand *> &ShapedTiles,
- std::map<unsigned, MachineInstr *> &RowCfgs,
- std::map<unsigned, MachineInstr *> &ColCfgs) {
- assert((RowCfgs.size() == ShapedTiles.size()) &&
- "The number of tile shapes not equal with the number of tiles!");
- // Orderly get the tiles and adjust the shape config.
- for (unsigned I = 0, E = ShapedTiles.size(); I < E; I++) {
- MachineOperand *MO = ShapedTiles[I];
- unsigned TmmIdx = getTilePhysRegIdx(MO);
- if (I == TmmIdx)
- continue;
- adjustRowCfg(TmmIdx, RowCfgs[I]);
- adjustColCfg(TmmIdx, ColCfgs[I]);
- }
-}
-
-// We have already preconfig the shapes before fast register allocation at
-// X86PreAMXConfig::preWriteTileCfg(). Now, we have done fast register
-// allocation, the shapes pre-written before may not rightly corresponding
-// to the correct tmm registers, so we need adjust them.
-void X86FastTileConfig::materializeTileCfg(MachineInstr *CfgMI) {
- SmallVector<MachineOperand *> ShapedTiles;
- std::map<unsigned, MachineInstr *> RowCfgs;
- std::map<unsigned, MachineInstr *> ColCfgs;
-
- // Orderly keep the tile uses and def in ShapedTiles;
- getTileShapesCfg(CfgMI, ShapedTiles);
- assert(ShapedTiles.size() && "Not find shapes config!");
-
- getShapeCfgInstrs(CfgMI, RowCfgs, ColCfgs);
-
- rewriteTileCfg(ShapedTiles, RowCfgs, ColCfgs);
-}
-
-bool X86FastTileConfig::fastTileConfig() {
- bool Changed = false;
-
- for (MachineBasicBlock &MBB : *MF) {
- SmallVector<MachineInstr *, 2> CFGs;
- for (MachineInstr &MI : MBB)
- if (MI.getOpcode() == X86::PLDTILECFGV)
- CFGs.push_back(&MI);
- for (auto *MI : CFGs)
- materializeTileCfg(MI);
- if (!CFGs.empty())
- Changed = true;
- }
- if (Changed)
+ if (Change)
X86FI->setHasVirtualTileReg(true);
- return Changed;
+
+ return Change;
}
bool X86FastTileConfig::runOnMachineFunction(MachineFunction &MFunc) {
MF = &MFunc;
MRI = &MFunc.getRegInfo();
- ST = &MFunc.getSubtarget<X86Subtarget>();
+ const TargetSubtargetInfo *ST = &MFunc.getSubtarget<X86Subtarget>();
TRI = ST->getRegisterInfo();
TII = MFunc.getSubtarget().getInstrInfo();
X86FI = MFunc.getInfo<X86MachineFunctionInfo>();
+ bool Change = false;
+
+ // Loop over all of the basic blocks, eliminating virtual register references
+ for (MachineBasicBlock &MBB : MFunc)
+ Change |= configBasicBlock(MBB);
- return fastTileConfig();
+ return Change;
}
FunctionPass *llvm::createX86FastTileConfigPass() {
diff --git a/llvm/lib/Target/X86/X86FixupLEAs.cpp b/llvm/lib/Target/X86/X86FixupLEAs.cpp
index 4730b936ec1f..b01145809ac6 100644
--- a/llvm/lib/Target/X86/X86FixupLEAs.cpp
+++ b/llvm/lib/Target/X86/X86FixupLEAs.cpp
@@ -229,7 +229,7 @@ bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) {
const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
bool IsSlowLEA = ST.slowLEA();
bool IsSlow3OpsLEA = ST.slow3OpsLEA();
- bool LEAUsesAG = ST.LEAusesAG();
+ bool LEAUsesAG = ST.leaUsesAG();
bool OptIncDec = !ST.slowIncDec() || MF.getFunction().hasOptSize();
bool UseLEAForSP = ST.useLeaForSP();
@@ -546,7 +546,6 @@ bool FixupLEAPass::optLEAALU(MachineBasicBlock::iterator &I,
if (KilledIndex)
KilledIndex->setIsKill(false);
- MBB.getParent()->substituteDebugValuesForInst(*AluI, *NewMI1, 1);
MBB.getParent()->substituteDebugValuesForInst(*AluI, *NewMI2, 1);
MBB.erase(I);
MBB.erase(AluI);
diff --git a/llvm/lib/Target/X86/X86FloatingPoint.cpp b/llvm/lib/Target/X86/X86FloatingPoint.cpp
index 2f0ab4ca9de4..33f5bb365da8 100644
--- a/llvm/lib/Target/X86/X86FloatingPoint.cpp
+++ b/llvm/lib/Target/X86/X86FloatingPoint.cpp
@@ -99,17 +99,17 @@ namespace {
// but the exact mapping of FP registers to stack slots is fixed later.
struct LiveBundle {
// Bit mask of live FP registers. Bit 0 = FP0, bit 1 = FP1, &c.
- unsigned Mask;
+ unsigned Mask = 0;
// Number of pre-assigned live registers in FixStack. This is 0 when the
// stack order has not yet been fixed.
- unsigned FixCount;
+ unsigned FixCount = 0;
// Assigned stack order for live-in registers.
// FixStack[i] == getStackEntry(i) for all i < FixCount.
unsigned char FixStack[8];
- LiveBundle() : Mask(0), FixCount(0) {}
+ LiveBundle() = default;
// Have the live registers been assigned a stack order yet?
bool isFixed() const { return !Mask || FixCount; }
@@ -866,7 +866,7 @@ void FPS::popStackAfter(MachineBasicBlock::iterator &I) {
if (Opcode != -1) {
I->setDesc(TII->get(Opcode));
if (Opcode == X86::FCOMPP || Opcode == X86::UCOM_FPPr)
- I->RemoveOperand(0);
+ I->removeOperand(0);
MI.dropDebugNumber();
} else { // Insert an explicit pop
// If this instruction sets FPSW, which is read in following instruction,
@@ -1034,7 +1034,7 @@ void FPS::handleCall(MachineBasicBlock::iterator &I) {
STReturns |= 1 << getFPReg(Op);
// Remove the operand so that later passes don't see it.
- MI.RemoveOperand(i);
+ MI.removeOperand(i);
--i;
--e;
}
@@ -1098,7 +1098,7 @@ void FPS::handleReturn(MachineBasicBlock::iterator &I) {
LiveMask |= (1 << getFPReg(Op));
// Remove the operand so that later passes don't see it.
- MI.RemoveOperand(i);
+ MI.removeOperand(i);
--i;
--e;
}
@@ -1162,7 +1162,7 @@ void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) {
unsigned DestReg = getFPReg(MI.getOperand(0));
// Change from the pseudo instruction to the concrete instruction.
- MI.RemoveOperand(0); // Remove the explicit ST(0) operand
+ MI.removeOperand(0); // Remove the explicit ST(0) operand
MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
MI.addOperand(
MachineOperand::CreateReg(X86::ST0, /*isDef*/ true, /*isImp*/ true));
@@ -1210,7 +1210,7 @@ void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
}
// Convert from the pseudo instruction to the concrete instruction.
- MI.RemoveOperand(NumOps - 1); // Remove explicit ST(0) operand
+ MI.removeOperand(NumOps - 1); // Remove explicit ST(0) operand
MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
MI.addOperand(
MachineOperand::CreateReg(X86::ST0, /*isDef*/ false, /*isImp*/ true));
@@ -1263,8 +1263,8 @@ void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) {
}
// Change from the pseudo instruction to the concrete instruction.
- MI.RemoveOperand(1); // Drop the source operand.
- MI.RemoveOperand(0); // Drop the destination operand.
+ MI.removeOperand(1); // Drop the source operand.
+ MI.removeOperand(0); // Drop the destination operand.
MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
MI.dropDebugNumber();
}
@@ -1464,7 +1464,7 @@ void FPS::handleCompareFP(MachineBasicBlock::iterator &I) {
// Change from the pseudo instruction to the concrete instruction.
MI.getOperand(0).setReg(getSTReg(Op1));
- MI.RemoveOperand(1);
+ MI.removeOperand(1);
MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
MI.dropDebugNumber();
@@ -1489,8 +1489,8 @@ void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) {
// Change the second operand to the stack register that the operand is in.
// Change from the pseudo instruction to the concrete instruction.
- MI.RemoveOperand(0);
- MI.RemoveOperand(1);
+ MI.removeOperand(0);
+ MI.removeOperand(1);
MI.getOperand(0).setReg(getSTReg(Op1));
MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
MI.dropDebugNumber();
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index 51f2ced321bb..d524090f902e 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "X86FrameLowering.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
#include "X86InstrBuilder.h"
#include "X86InstrInfo.h"
#include "X86MachineFunctionInfo.h"
@@ -19,6 +20,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -99,7 +101,7 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() ||
MFI.hasStackMap() || MFI.hasPatchPoint() ||
- MFI.hasCopyImplyingStackAdjustment());
+ (isWin64Prologue(MF) && MFI.hasCopyImplyingStackAdjustment()));
}
static unsigned getSUBriOpcode(bool IsLP64, int64_t Imm) {
@@ -435,11 +437,13 @@ int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
void X86FrameLowering::BuildCFI(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL,
- const MCCFIInstruction &CFIInst) const {
+ const MCCFIInstruction &CFIInst,
+ MachineInstr::MIFlag Flag) const {
MachineFunction &MF = *MBB.getParent();
unsigned CFIIndex = MF.addFrameInst(CFIInst);
BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ .addCFIIndex(CFIIndex)
+ .setMIFlag(Flag);
}
/// Emits Dwarf Info specifying offsets of callee saved registers and
@@ -492,6 +496,87 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(
}
}
+void X86FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
+ MachineBasicBlock &MBB) const {
+ const MachineFunction &MF = *MBB.getParent();
+
+ // Insertion point.
+ MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
+
+ // Fake a debug loc.
+ DebugLoc DL;
+ if (MBBI != MBB.end())
+ DL = MBBI->getDebugLoc();
+
+ // Zero out FP stack if referenced. Do this outside of the loop below so that
+ // it's done only once.
+ const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
+ for (MCRegister Reg : RegsToZero.set_bits()) {
+ if (!X86::RFP80RegClass.contains(Reg))
+ continue;
+
+ unsigned NumFPRegs = ST.is64Bit() ? 8 : 7;
+ for (unsigned i = 0; i != NumFPRegs; ++i)
+ BuildMI(MBB, MBBI, DL, TII.get(X86::LD_F0));
+
+ for (unsigned i = 0; i != NumFPRegs; ++i)
+ BuildMI(MBB, MBBI, DL, TII.get(X86::ST_FPrr)).addReg(X86::ST0);
+ break;
+ }
+
+ // For GPRs, we only care to clear out the 32-bit register.
+ BitVector GPRsToZero(TRI->getNumRegs());
+ for (MCRegister Reg : RegsToZero.set_bits())
+ if (TRI->isGeneralPurposeRegister(MF, Reg)) {
+ GPRsToZero.set(getX86SubSuperRegisterOrZero(Reg, 32));
+ RegsToZero.reset(Reg);
+ }
+
+ for (MCRegister Reg : GPRsToZero.set_bits())
+ BuildMI(MBB, MBBI, DL, TII.get(X86::XOR32rr), Reg)
+ .addReg(Reg, RegState::Undef)
+ .addReg(Reg, RegState::Undef);
+
+ // Zero out registers.
+ for (MCRegister Reg : RegsToZero.set_bits()) {
+ if (ST.hasMMX() && X86::VR64RegClass.contains(Reg))
+ // FIXME: Ignore MMX registers?
+ continue;
+
+ unsigned XorOp;
+ if (X86::VR128RegClass.contains(Reg)) {
+ // XMM#
+ if (!ST.hasSSE1())
+ continue;
+ XorOp = X86::PXORrr;
+ } else if (X86::VR256RegClass.contains(Reg)) {
+ // YMM#
+ if (!ST.hasAVX())
+ continue;
+ XorOp = X86::VPXORrr;
+ } else if (X86::VR512RegClass.contains(Reg)) {
+ // ZMM#
+ if (!ST.hasAVX512())
+ continue;
+ XorOp = X86::VPXORYrr;
+ } else if (X86::VK1RegClass.contains(Reg) ||
+ X86::VK2RegClass.contains(Reg) ||
+ X86::VK4RegClass.contains(Reg) ||
+ X86::VK8RegClass.contains(Reg) ||
+ X86::VK16RegClass.contains(Reg)) {
+ if (!ST.hasVLX())
+ continue;
+ XorOp = ST.hasBWI() ? X86::KXORQrr : X86::KXORWrr;
+ } else {
+ continue;
+ }
+
+ BuildMI(MBB, MBBI, DL, TII.get(XorOp), Reg)
+ .addReg(Reg, RegState::Undef)
+ .addReg(Reg, RegState::Undef);
+ }
+}
+
void X86FrameLowering::emitStackProbe(
MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
@@ -1289,6 +1374,9 @@ bool X86FrameLowering::has128ByteRedZone(const MachineFunction& MF) const {
return Is64Bit && !IsWin64CC && !Fn.hasFnAttribute(Attribute::NoRedZone);
}
+/// Return true if we need to use the restricted Windows x64 prologue and
+/// epilogue code patterns that can be described with WinCFI (.seh_*
+/// directives).
bool X86FrameLowering::isWin64Prologue(const MachineFunction &MF) const {
return MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
}
@@ -1558,12 +1646,15 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// Define the current CFA rule to use the provided offset.
assert(StackSize);
BuildCFI(MBB, MBBI, DL,
- MCCFIInstruction::cfiDefCfaOffset(nullptr, -2 * stackGrowth));
+ MCCFIInstruction::cfiDefCfaOffset(nullptr, -2 * stackGrowth),
+ MachineInstr::FrameSetup);
// Change the rule for the FramePtr to be an "offset" rule.
unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
- BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createOffset(
- nullptr, DwarfFramePtr, 2 * stackGrowth));
+ BuildCFI(MBB, MBBI, DL,
+ MCCFIInstruction::createOffset(nullptr, DwarfFramePtr,
+ 2 * stackGrowth),
+ MachineInstr::FrameSetup);
}
if (NeedsWinCFI) {
@@ -1630,7 +1721,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
BuildCFI(
MBB, MBBI, DL,
- MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr));
+ MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr),
+ MachineInstr::FrameSetup);
}
if (NeedsWinFPO) {
@@ -1681,7 +1773,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// Define the current CFA rule to use the provided offset.
assert(StackSize);
BuildCFI(MBB, MBBI, DL,
- MCCFIInstruction::cfiDefCfaOffset(nullptr, -StackOffset));
+ MCCFIInstruction::cfiDefCfaOffset(nullptr, -StackOffset),
+ MachineInstr::FrameSetup);
StackOffset += stackGrowth;
}
@@ -1962,7 +2055,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
assert(StackSize);
BuildCFI(
MBB, MBBI, DL,
- MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize - stackGrowth));
+ MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize - stackGrowth),
+ MachineInstr::FrameSetup);
}
// Emit DWARF info specifying the offsets of the callee-saved registers.
@@ -2145,11 +2239,13 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
unsigned DwarfStackPtr =
TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true);
BuildCFI(MBB, MBBI, DL,
- MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize));
+ MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize),
+ MachineInstr::FrameDestroy);
if (!MBB.succ_empty() && !MBB.isReturnBlock()) {
unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
BuildCFI(MBB, AfterPop, DL,
- MCCFIInstruction::createRestore(nullptr, DwarfFramePtr));
+ MCCFIInstruction::createRestore(nullptr, DwarfFramePtr),
+ MachineInstr::FrameDestroy);
--MBBI;
--AfterPop;
}
@@ -2226,7 +2322,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
// Define the current CFA rule to use the provided offset.
BuildCFI(MBB, MBBI, DL,
MCCFIInstruction::cfiDefCfaOffset(
- nullptr, CSSize + TailCallArgReserveSize + SlotSize));
+ nullptr, CSSize + TailCallArgReserveSize + SlotSize),
+ MachineInstr::FrameDestroy);
}
--MBBI;
}
@@ -2252,7 +2349,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
if (Opc == X86::POP32r || Opc == X86::POP64r) {
Offset += SlotSize;
BuildCFI(MBB, MBBI, DL,
- MCCFIInstruction::cfiDefCfaOffset(nullptr, -Offset));
+ MCCFIInstruction::cfiDefCfaOffset(nullptr, -Offset),
+ MachineInstr::FrameDestroy);
}
}
}
@@ -2830,17 +2928,8 @@ void X86FrameLowering::adjustForSegmentedStacks(
// prologue.
StackSize = MFI.getStackSize();
- // Do not generate a prologue for leaf functions with a stack of size zero.
- // For non-leaf functions we have to allow for the possibility that the
- // callis to a non-split function, as in PR37807. This function could also
- // take the address of a non-split function. When the linker tries to adjust
- // its non-existent prologue, it would fail with an error. Mark the object
- // file so that such failures are not errors. See this Go language bug-report
- // https://go-review.googlesource.com/c/go/+/148819/
- if (StackSize == 0 && !MFI.hasTailCall()) {
- MF.getMMI().setHasNosplitStack(true);
+ if (!MFI.needsSplitStackProlog())
return;
- }
MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock();
MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock();
@@ -3023,7 +3112,6 @@ void X86FrameLowering::adjustForSegmentedStacks(
.addReg(0)
.addExternalSymbol("__morestack_addr")
.addReg(0);
- MF.getMMI().setUsesMorestackAddr(true);
} else {
if (Is64Bit)
BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32))
diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h
index 987facbfeae4..9b83fe77d505 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.h
+++ b/llvm/lib/Target/X86/X86FrameLowering.h
@@ -176,7 +176,8 @@ public:
/// Wraps up getting a CFI index and building a MachineInstr for it.
void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL, const MCCFIInstruction &CFIInst) const;
+ const DebugLoc &DL, const MCCFIInstruction &CFIInst,
+ MachineInstr::MIFlag Flag = MachineInstr::NoFlags) const;
/// Sets up EBP and optionally ESI based on the incoming EBP value. Only
/// needed for 32-bit. Used in funclet prologues and at catchret destinations.
@@ -233,6 +234,10 @@ private:
const DebugLoc &DL, uint64_t Offset,
uint64_t Align) const;
+ /// Emit target zero call-used regs.
+ void emitZeroCallUsedRegs(BitVector RegsToZero,
+ MachineBasicBlock &MBB) const override;
+
void adjustFrameForMsvcCxxEh(MachineFunction &MF) const;
/// Aligns the stack pointer by ANDing it with -MaxAlign.
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 5b90c67deae6..f88037e95d33 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -59,30 +59,27 @@ namespace {
enum {
RegBase,
FrameIndexBase
- } BaseType;
+ } BaseType = RegBase;
// This is really a union, discriminated by BaseType!
SDValue Base_Reg;
- int Base_FrameIndex;
+ int Base_FrameIndex = 0;
- unsigned Scale;
+ unsigned Scale = 1;
SDValue IndexReg;
- int32_t Disp;
+ int32_t Disp = 0;
SDValue Segment;
- const GlobalValue *GV;
- const Constant *CP;
- const BlockAddress *BlockAddr;
- const char *ES;
- MCSymbol *MCSym;
- int JT;
+ const GlobalValue *GV = nullptr;
+ const Constant *CP = nullptr;
+ const BlockAddress *BlockAddr = nullptr;
+ const char *ES = nullptr;
+ MCSymbol *MCSym = nullptr;
+ int JT = -1;
Align Alignment; // CP alignment.
- unsigned char SymbolFlags; // X86II::MO_*
+ unsigned char SymbolFlags = X86II::MO_NO_FLAG; // X86II::MO_*
bool NegateIndex = false;
- X86ISelAddressMode()
- : BaseType(RegBase), Base_FrameIndex(0), Scale(1), Disp(0), GV(nullptr),
- CP(nullptr), BlockAddr(nullptr), ES(nullptr), MCSym(nullptr), JT(-1),
- SymbolFlags(X86II::MO_NO_FLAG) {}
+ X86ISelAddressMode() = default;
bool hasSymbolicDisplacement() const {
return GV != nullptr || CP != nullptr || ES != nullptr ||
@@ -446,6 +443,43 @@ namespace {
return getI8Imm(InsertIdx ? 0x02 : 0x30, DL);
}
+ SDValue getSBBZero(SDNode *N) {
+ SDLoc dl(N);
+ MVT VT = N->getSimpleValueType(0);
+
+ // Create zero.
+ SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i32);
+ SDValue Zero =
+ SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, VTs, None), 0);
+ if (VT == MVT::i64) {
+ Zero = SDValue(
+ CurDAG->getMachineNode(
+ TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
+ CurDAG->getTargetConstant(0, dl, MVT::i64), Zero,
+ CurDAG->getTargetConstant(X86::sub_32bit, dl, MVT::i32)),
+ 0);
+ }
+
+ // Copy flags to the EFLAGS register and glue it to next node.
+ unsigned Opcode = N->getOpcode();
+ assert((Opcode == X86ISD::SBB || Opcode == X86ISD::SETCC_CARRY) &&
+ "Unexpected opcode for SBB materialization");
+ unsigned FlagOpIndex = Opcode == X86ISD::SBB ? 2 : 1;
+ SDValue EFLAGS =
+ CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS,
+ N->getOperand(FlagOpIndex), SDValue());
+
+ // Create a 64-bit instruction if the result is 64-bits otherwise use the
+ // 32-bit version.
+ unsigned Opc = VT == MVT::i64 ? X86::SBB64rr : X86::SBB32rr;
+ MVT SBBVT = VT == MVT::i64 ? MVT::i64 : MVT::i32;
+ VTs = CurDAG->getVTList(SBBVT, MVT::i32);
+ return SDValue(
+ CurDAG->getMachineNode(Opc, dl, VTs,
+ {Zero, Zero, EFLAGS, EFLAGS.getValue(1)}),
+ 0);
+ }
+
// Helper to detect unneeded and instructions on shift amounts. Called
// from PatFrags in tablegen.
bool isUnneededShiftMask(SDNode *N, unsigned Width) const {
@@ -476,6 +510,9 @@ namespace {
return Subtarget->getInstrInfo();
}
+ /// Return a condition code of the given SDNode
+ X86::CondCode getCondFromNode(SDNode *N) const;
+
/// Address-mode matching performs shift-of-and to and-of-shift
/// reassociation in order to expose more scaled addressing
/// opportunities.
@@ -492,7 +529,7 @@ namespace {
unsigned StoreSize = N->getMemoryVT().getStoreSize();
- if (N->getAlignment() < StoreSize)
+ if (N->getAlign().value() < StoreSize)
return false;
switch (StoreSize) {
@@ -2391,6 +2428,14 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
return false;
break;
+ case ISD::XOR:
+ // We want to look through a transform in InstCombine that
+ // turns 'add' with min_signed_val into 'xor', so we can treat this 'xor'
+ // exactly like an 'add'.
+ if (isMinSignedConstant(N.getOperand(1)) && !matchAdd(N, AM, Depth))
+ return false;
+ break;
+
case ISD::AND: {
// Perform some heroic transforms on an and of a constant-count shift
// with a constant to enable use of the scaled offset field.
@@ -2745,10 +2790,10 @@ bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
case X86ISD::SUB:
case X86ISD::ADC:
case X86ISD::SBB:
- /* TODO: These opcodes can be added safely, but we may want to justify
- their inclusion for different reasons (better for reg-alloc).
case X86ISD::SMUL:
case X86ISD::UMUL:
+ /* TODO: These opcodes can be added safely, but we may want to justify
+ their inclusion for different reasons (better for reg-alloc).
case X86ISD::OR:
case X86ISD::XOR:
case X86ISD::AND:
@@ -2759,10 +2804,9 @@ bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
return false;
}
};
- // TODO: This could be an 'or' rather than 'and' to make the transform more
- // likely to happen. We might want to factor in whether there's a
- // load folding opportunity for the math op that disappears with LEA.
- if (isMathWithFlags(N.getOperand(0)) && isMathWithFlags(N.getOperand(1)))
+ // TODO: We might want to factor in whether there's a load folding
+ // opportunity for the math op that disappears with LEA.
+ if (isMathWithFlags(N.getOperand(0)) || isMathWithFlags(N.getOperand(1)))
Complexity++;
}
@@ -2891,24 +2935,15 @@ bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const {
CR->getSignedMax().slt(1ull << Width);
}
-static X86::CondCode getCondFromNode(SDNode *N) {
+X86::CondCode X86DAGToDAGISel::getCondFromNode(SDNode *N) const {
assert(N->isMachineOpcode() && "Unexpected node");
- X86::CondCode CC = X86::COND_INVALID;
unsigned Opc = N->getMachineOpcode();
- if (Opc == X86::JCC_1)
- CC = static_cast<X86::CondCode>(N->getConstantOperandVal(1));
- else if (Opc == X86::SETCCr)
- CC = static_cast<X86::CondCode>(N->getConstantOperandVal(0));
- else if (Opc == X86::SETCCm)
- CC = static_cast<X86::CondCode>(N->getConstantOperandVal(5));
- else if (Opc == X86::CMOV16rr || Opc == X86::CMOV32rr ||
- Opc == X86::CMOV64rr)
- CC = static_cast<X86::CondCode>(N->getConstantOperandVal(2));
- else if (Opc == X86::CMOV16rm || Opc == X86::CMOV32rm ||
- Opc == X86::CMOV64rm)
- CC = static_cast<X86::CondCode>(N->getConstantOperandVal(6));
-
- return CC;
+ const MCInstrDesc &MCID = getInstrInfo()->get(Opc);
+ int CondNo = X86::getCondSrcNoFromDesc(MCID);
+ if (CondNo < 0)
+ return X86::COND_INVALID;
+
+ return static_cast<X86::CondCode>(N->getConstantOperandVal(CondNo));
}
/// Test whether the given X86ISD::CMP node has any users that use a flag
@@ -3464,7 +3499,7 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
const bool AllowExtraUsesByDefault = Subtarget->hasBMI2();
auto checkUses = [AllowExtraUsesByDefault](SDValue Op, unsigned NUses,
Optional<bool> AllowExtraUses) {
- return AllowExtraUses.getValueOr(AllowExtraUsesByDefault) ||
+ return AllowExtraUses.value_or(AllowExtraUsesByDefault) ||
Op.getNode()->hasNUsesOfValue(NUses, Op.getResNo());
};
auto checkOneUse = [checkUses](SDValue Op,
@@ -5478,7 +5513,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
MVT CmpVT = N0.getSimpleValueType();
// Floating point needs special handling if we don't have FCOMI.
- if (Subtarget->hasCMov())
+ if (Subtarget->canUseCMOV())
break;
bool IsSignaling = Node->getOpcode() == X86ISD::STRICT_FCMPS;
@@ -5518,7 +5553,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
// Move AH into flags.
// Some 64-bit targets lack SAHF support, but they do support FCOMI.
- assert(Subtarget->hasLAHFSAHF() &&
+ assert(Subtarget->canUseLAHFSAHF() &&
"Target doesn't support SAHF or FCOMI?");
SDValue AH = CurDAG->getCopyToReg(Chain, dl, X86::AH, Extract, SDValue());
Chain = AH;
@@ -5567,40 +5602,86 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
// Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
// use a smaller encoding.
// Look past the truncate if CMP is the only use of it.
- if (N0.getOpcode() == ISD::AND &&
- N0.getNode()->hasOneUse() &&
+ if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
N0.getValueType() != MVT::i8) {
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
- if (!C) break;
- uint64_t Mask = C->getZExtValue();
+ auto *MaskC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!MaskC)
+ break;
+
// We may have looked through a truncate so mask off any bits that
// shouldn't be part of the compare.
+ uint64_t Mask = MaskC->getZExtValue();
Mask &= maskTrailingOnes<uint64_t>(CmpVT.getScalarSizeInBits());
- // Check if we can replace AND+IMM64 with a shift. This is possible for
- // masks/ like 0xFF000000 or 0x00FFFFFF and if we care only about the zero
- // flag.
- if (CmpVT == MVT::i64 && !isInt<32>(Mask) &&
+ // Check if we can replace AND+IMM{32,64} with a shift. This is possible
+ // for masks like 0xFF000000 or 0x00FFFFFF and if we care only about the
+ // zero flag.
+ if (CmpVT == MVT::i64 && !isInt<8>(Mask) && isShiftedMask_64(Mask) &&
onlyUsesZeroFlag(SDValue(Node, 0))) {
- if (isMask_64(~Mask)) {
- unsigned TrailingZeros = countTrailingZeros(Mask);
- SDValue Imm = CurDAG->getTargetConstant(TrailingZeros, dl, MVT::i64);
- SDValue Shift =
- SDValue(CurDAG->getMachineNode(X86::SHR64ri, dl, MVT::i64, MVT::i32,
- N0.getOperand(0), Imm), 0);
- MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl,
- MVT::i32, Shift, Shift);
- ReplaceNode(Node, Test);
- return;
+ unsigned ShiftOpcode = ISD::DELETED_NODE;
+ unsigned ShiftAmt;
+ unsigned SubRegIdx;
+ MVT SubRegVT;
+ unsigned TestOpcode;
+ unsigned LeadingZeros = countLeadingZeros(Mask);
+ unsigned TrailingZeros = countTrailingZeros(Mask);
+
+ // With leading/trailing zeros, the transform is profitable if we can
+ // eliminate a movabsq or shrink a 32-bit immediate to 8-bit without
+ // incurring any extra register moves.
+ bool SavesBytes = !isInt<32>(Mask) || N0.getOperand(0).hasOneUse();
+ if (LeadingZeros == 0 && SavesBytes) {
+ // If the mask covers the most significant bit, then we can replace
+ // TEST+AND with a SHR and check eflags.
+ // This emits a redundant TEST which is subsequently eliminated.
+ ShiftOpcode = X86::SHR64ri;
+ ShiftAmt = TrailingZeros;
+ SubRegIdx = 0;
+ TestOpcode = X86::TEST64rr;
+ } else if (TrailingZeros == 0 && SavesBytes) {
+ // If the mask covers the least significant bit, then we can replace
+ // TEST+AND with a SHL and check eflags.
+ // This emits a redundant TEST which is subsequently eliminated.
+ ShiftOpcode = X86::SHL64ri;
+ ShiftAmt = LeadingZeros;
+ SubRegIdx = 0;
+ TestOpcode = X86::TEST64rr;
+ } else if (MaskC->hasOneUse() && !isInt<32>(Mask)) {
+ // If the shifted mask extends into the high half and is 8/16/32 bits
+ // wide, then replace it with a SHR and a TEST8rr/TEST16rr/TEST32rr.
+ unsigned PopCount = 64 - LeadingZeros - TrailingZeros;
+ if (PopCount == 8) {
+ ShiftOpcode = X86::SHR64ri;
+ ShiftAmt = TrailingZeros;
+ SubRegIdx = X86::sub_8bit;
+ SubRegVT = MVT::i8;
+ TestOpcode = X86::TEST8rr;
+ } else if (PopCount == 16) {
+ ShiftOpcode = X86::SHR64ri;
+ ShiftAmt = TrailingZeros;
+ SubRegIdx = X86::sub_16bit;
+ SubRegVT = MVT::i16;
+ TestOpcode = X86::TEST16rr;
+ } else if (PopCount == 32) {
+ ShiftOpcode = X86::SHR64ri;
+ ShiftAmt = TrailingZeros;
+ SubRegIdx = X86::sub_32bit;
+ SubRegVT = MVT::i32;
+ TestOpcode = X86::TEST32rr;
+ }
}
- if (isMask_64(Mask)) {
- unsigned LeadingZeros = countLeadingZeros(Mask);
- SDValue Imm = CurDAG->getTargetConstant(LeadingZeros, dl, MVT::i64);
- SDValue Shift =
- SDValue(CurDAG->getMachineNode(X86::SHL64ri, dl, MVT::i64, MVT::i32,
- N0.getOperand(0), Imm), 0);
- MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl,
- MVT::i32, Shift, Shift);
+ if (ShiftOpcode != ISD::DELETED_NODE) {
+ SDValue ShiftC = CurDAG->getTargetConstant(ShiftAmt, dl, MVT::i64);
+ SDValue Shift = SDValue(
+ CurDAG->getMachineNode(ShiftOpcode, dl, MVT::i64, MVT::i32,
+ N0.getOperand(0), ShiftC),
+ 0);
+ if (SubRegIdx != 0) {
+ Shift =
+ CurDAG->getTargetExtractSubreg(SubRegIdx, dl, SubRegVT, Shift);
+ }
+ MachineSDNode *Test =
+ CurDAG->getMachineNode(TestOpcode, dl, MVT::i32, Shift, Shift);
ReplaceNode(Node, Test);
return;
}
@@ -5769,21 +5850,28 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
break;
case X86ISD::SETCC_CARRY: {
- // We have to do this manually because tblgen will put the eflags copy in
- // the wrong place if we use an extract_subreg in the pattern.
MVT VT = Node->getSimpleValueType(0);
+ SDValue Result;
+ if (Subtarget->hasSBBDepBreaking()) {
+ // We have to do this manually because tblgen will put the eflags copy in
+ // the wrong place if we use an extract_subreg in the pattern.
+ // Copy flags to the EFLAGS register and glue it to next node.
+ SDValue EFLAGS =
+ CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS,
+ Node->getOperand(1), SDValue());
- // Copy flags to the EFLAGS register and glue it to next node.
- SDValue EFLAGS =
- CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS,
- Node->getOperand(1), SDValue());
-
- // Create a 64-bit instruction if the result is 64-bits otherwise use the
- // 32-bit version.
- unsigned Opc = VT == MVT::i64 ? X86::SETB_C64r : X86::SETB_C32r;
- MVT SetVT = VT == MVT::i64 ? MVT::i64 : MVT::i32;
- SDValue Result = SDValue(
- CurDAG->getMachineNode(Opc, dl, SetVT, EFLAGS, EFLAGS.getValue(1)), 0);
+ // Create a 64-bit instruction if the result is 64-bits otherwise use the
+ // 32-bit version.
+ unsigned Opc = VT == MVT::i64 ? X86::SETB_C64r : X86::SETB_C32r;
+ MVT SetVT = VT == MVT::i64 ? MVT::i64 : MVT::i32;
+ Result = SDValue(
+ CurDAG->getMachineNode(Opc, dl, SetVT, EFLAGS, EFLAGS.getValue(1)),
+ 0);
+ } else {
+ // The target does not recognize sbb with the same reg operand as a
+ // no-source idiom, so we explicitly zero the input values.
+ Result = getSBBZero(Node);
+ }
// For less than 32-bits we need to extract from the 32-bit node.
if (VT == MVT::i8 || VT == MVT::i16) {
@@ -5798,35 +5886,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
case X86ISD::SBB: {
if (isNullConstant(Node->getOperand(0)) &&
isNullConstant(Node->getOperand(1))) {
- MVT VT = Node->getSimpleValueType(0);
-
- // Create zero.
- SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i32);
- SDValue Zero =
- SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, VTs, None), 0);
- if (VT == MVT::i64) {
- Zero = SDValue(
- CurDAG->getMachineNode(
- TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
- CurDAG->getTargetConstant(0, dl, MVT::i64), Zero,
- CurDAG->getTargetConstant(X86::sub_32bit, dl, MVT::i32)),
- 0);
- }
-
- // Copy flags to the EFLAGS register and glue it to next node.
- SDValue EFLAGS =
- CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS,
- Node->getOperand(2), SDValue());
-
- // Create a 64-bit instruction if the result is 64-bits otherwise use the
- // 32-bit version.
- unsigned Opc = VT == MVT::i64 ? X86::SBB64rr : X86::SBB32rr;
- MVT SBBVT = VT == MVT::i64 ? MVT::i64 : MVT::i32;
- VTs = CurDAG->getVTList(SBBVT, MVT::i32);
- SDValue Result =
- SDValue(CurDAG->getMachineNode(Opc, dl, VTs, {Zero, Zero, EFLAGS,
- EFLAGS.getValue(1)}),
- 0);
+ SDValue Result = getSBBZero(Node);
// Replace the flag use.
ReplaceUses(SDValue(Node, 1), Result.getValue(1));
@@ -5834,6 +5894,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
// Replace the result use.
if (!SDValue(Node, 0).use_empty()) {
// For less than 32-bits we need to extract from the 32-bit node.
+ MVT VT = Node->getSimpleValueType(0);
if (VT == MVT::i8 || VT == MVT::i16) {
int SubIndex = VT == MVT::i16 ? X86::sub_16bit : X86::sub_8bit;
Result = CurDAG->getTargetExtractSubreg(SubIndex, dl, VT, Result);
@@ -6112,6 +6173,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
case InlineAsm::Constraint_v: // not offsetable ??
case InlineAsm::Constraint_m: // memory
case InlineAsm::Constraint_X:
+ case InlineAsm::Constraint_p: // address
if (!selectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4))
return true;
break;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 90753b5b4d33..61c1fd25031d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -108,9 +108,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
const X86Subtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
- X86ScalarSSEf64 = Subtarget.hasSSE2();
- X86ScalarSSEf32 = Subtarget.hasSSE1();
- X86ScalarSSEf16 = Subtarget.hasFP16();
MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
// Set up the TargetLowering object.
@@ -170,7 +167,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// 32 bits so the AtomicExpandPass will expand it so we don't need cmpxchg8b.
// FIXME: Should we be limiting the atomic size on other configs? Default is
// 1024.
- if (!Subtarget.hasCmpxchg8b())
+ if (!Subtarget.canUseCMPXCHG8B())
setMaxAtomicSizeInBitsSupported(32);
// Set up the register classes.
@@ -200,7 +197,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
// Integer absolute.
- if (Subtarget.hasCMov()) {
+ if (Subtarget.canUseCMOV()) {
setOperationAction(ISD::ABS , MVT::i16 , Custom);
setOperationAction(ISD::ABS , MVT::i32 , Custom);
if (Subtarget.is64Bit())
@@ -314,7 +311,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);
// TODO: when we have SSE, these could be more efficient, by using movd/movq.
- if (!X86ScalarSSEf64) {
+ if (!Subtarget.hasSSE2()) {
setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
if (Subtarget.is64Bit()) {
@@ -415,14 +412,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(Op, MVT::f128, Expand);
}
- setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
- setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
- setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
- setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand);
- setTruncStoreAction(MVT::f32, MVT::f16, Expand);
- setTruncStoreAction(MVT::f64, MVT::f16, Expand);
- setTruncStoreAction(MVT::f80, MVT::f16, Expand);
- setTruncStoreAction(MVT::f128, MVT::f16, Expand);
+ for (MVT VT : {MVT::f32, MVT::f64, MVT::f80, MVT::f128}) {
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::bf16, Expand);
+ setTruncStoreAction(VT, MVT::f16, Expand);
+ setTruncStoreAction(VT, MVT::bf16, Expand);
+
+ setOperationAction(ISD::BF16_TO_FP, VT, Expand);
+ setOperationAction(ISD::FP_TO_BF16, VT, Expand);
+ }
setOperationAction(ISD::PARITY, MVT::i8, Custom);
setOperationAction(ISD::PARITY, MVT::i16, Custom);
@@ -497,7 +495,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SRL_PARTS, VT, Custom);
}
- if (Subtarget.hasSSEPrefetch() || Subtarget.has3DNow())
+ if (Subtarget.hasSSEPrefetch() || Subtarget.hasThreeDNow())
setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
@@ -516,9 +514,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (!Subtarget.is64Bit())
setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
- if (Subtarget.hasCmpxchg16b()) {
+ if (Subtarget.canUseCMPXCHG16B())
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
- }
// FIXME - use subtarget debug flags
if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() &&
@@ -535,7 +532,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::TRAP, MVT::Other, Legal);
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
- if (Subtarget.getTargetTriple().isPS4CPU())
+ if (Subtarget.isTargetPS())
setOperationAction(ISD::UBSANTRAP, MVT::Other, Expand);
else
setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal);
@@ -556,9 +553,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
- if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) {
- // f32 and f64 use SSE.
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
+
+ if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
+ // f16, f32 and f64 use SSE.
// Set up the FP register classes.
+ addRegisterClass(MVT::f16, Subtarget.hasAVX512() ? &X86::FR16XRegClass
+ : &X86::FR16RegClass);
addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
: &X86::FR32RegClass);
addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
@@ -590,11 +591,54 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FSINCOS, VT, Expand);
}
+ // Half type will be promoted by default.
+ setOperationAction(ISD::FABS, MVT::f16, Promote);
+ setOperationAction(ISD::FNEG, MVT::f16, Promote);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
+ setOperationAction(ISD::FADD, MVT::f16, Promote);
+ setOperationAction(ISD::FSUB, MVT::f16, Promote);
+ setOperationAction(ISD::FMUL, MVT::f16, Promote);
+ setOperationAction(ISD::FDIV, MVT::f16, Promote);
+ setOperationAction(ISD::FREM, MVT::f16, Promote);
+ setOperationAction(ISD::FMA, MVT::f16, Promote);
+ setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
+ setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
+ setOperationAction(ISD::FMINIMUM, MVT::f16, Promote);
+ setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote);
+ setOperationAction(ISD::FSIN, MVT::f16, Promote);
+ setOperationAction(ISD::FCOS, MVT::f16, Promote);
+ setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
+ setOperationAction(ISD::FSQRT, MVT::f16, Promote);
+ setOperationAction(ISD::FPOW, MVT::f16, Promote);
+ setOperationAction(ISD::FLOG, MVT::f16, Promote);
+ setOperationAction(ISD::FLOG2, MVT::f16, Promote);
+ setOperationAction(ISD::FLOG10, MVT::f16, Promote);
+ setOperationAction(ISD::FEXP, MVT::f16, Promote);
+ setOperationAction(ISD::FEXP2, MVT::f16, Promote);
+ setOperationAction(ISD::FCEIL, MVT::f16, Promote);
+ setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
+ setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
+ setOperationAction(ISD::FRINT, MVT::f16, Promote);
+ setOperationAction(ISD::BR_CC, MVT::f16, Promote);
+ setOperationAction(ISD::SETCC, MVT::f16, Promote);
+ setOperationAction(ISD::SELECT, MVT::f16, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f16, Promote);
+ setOperationAction(ISD::FROUND, MVT::f16, Promote);
+ setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote);
+ setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
+ setOperationAction(ISD::FP_ROUND, MVT::f16, LibCall);
+ setOperationAction(ISD::FP_EXTEND, MVT::f32, LibCall);
+ setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom);
+
+ setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
+ setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
+
// Lower this to MOVMSK plus an AND.
setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
- } else if (!Subtarget.useSoftFloat() && X86ScalarSSEf32 &&
+ } else if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1() &&
(UseX87 || Is64Bit)) {
// Use SSE for f32, x87 for f64.
// Set up the FP register classes.
@@ -664,6 +708,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
} else // SSE immediates.
addLegalFPImmediate(APFloat(+0.0)); // xorpd
}
+ // Support fp16 0 immediate.
+ if (isTypeLegal(MVT::f16))
+ addLegalFPImmediate(APFloat::getZero(APFloat::IEEEhalf()));
+
// Handle constrained floating-point operations of scalar.
setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
@@ -673,7 +721,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
- setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
@@ -725,7 +772,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FMUL , MVT::f80, Legal);
setOperationAction(ISD::STRICT_FDIV , MVT::f80, Legal);
setOperationAction(ISD::STRICT_FSQRT , MVT::f80, Legal);
- setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Legal);
+ if (isTypeLegal(MVT::f16)) {
+ setOperationAction(ISD::FP_EXTEND, MVT::f80, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Custom);
+ } else {
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Legal);
+ }
// FIXME: When the target is 64-bit, STRICT_FP_ROUND will be overwritten
// as Custom.
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Legal);
@@ -877,7 +929,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
// split/scalarized right now.
- if (VT.getVectorElementType() == MVT::f16)
+ if (VT.getVectorElementType() == MVT::f16 ||
+ VT.getVectorElementType() == MVT::bf16)
setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
}
}
@@ -949,6 +1002,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
setOperationAction(ISD::MUL, MVT::v8i16, Legal);
+ setOperationAction(ISD::AVGCEILU, MVT::v16i8, Legal);
+ setOperationAction(ISD::AVGCEILU, MVT::v8i16, Legal);
setOperationAction(ISD::SMULO, MVT::v16i8, Custom);
setOperationAction(ISD::UMULO, MVT::v16i8, Custom);
@@ -1067,6 +1122,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STORE, MVT::v4i16, Custom);
setOperationAction(ISD::STORE, MVT::v8i8, Custom);
+ // Add 32-bit vector stores to help vectorization opportunities.
+ setOperationAction(ISD::STORE, MVT::v2i16, Custom);
+ setOperationAction(ISD::STORE, MVT::v4i8, Custom);
+
setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
@@ -1285,13 +1344,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (VT == MVT::v4i64) continue;
setOperationAction(ISD::ROTL, VT, Custom);
setOperationAction(ISD::ROTR, VT, Custom);
+ setOperationAction(ISD::FSHL, VT, Custom);
+ setOperationAction(ISD::FSHR, VT, Custom);
}
- setOperationAction(ISD::FSHL, MVT::v32i8, Custom);
- setOperationAction(ISD::FSHR, MVT::v32i8, Custom);
- setOperationAction(ISD::FSHL, MVT::v8i32, Custom);
- setOperationAction(ISD::FSHR, MVT::v8i32, Custom);
-
// These types need custom splitting if their input is a 128-bit vector.
setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
@@ -1353,6 +1409,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
+ setOperationAction(ISD::AVGCEILU, MVT::v16i16, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::AVGCEILU, MVT::v32i8, HasInt256 ? Legal : Custom);
setOperationAction(ISD::SMULO, MVT::v32i8, Custom);
setOperationAction(ISD::UMULO, MVT::v32i8, Custom);
@@ -1446,6 +1504,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
}
+ if (!Subtarget.useSoftFloat() && Subtarget.hasF16C()) {
+ setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
+ setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom);
+ }
+
// This block controls legalization of the mask vector sizes that are
// available with AVX512. 512-bit vectors are in a separate block controlled
// by useAVX512Regs.
@@ -1652,6 +1717,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::MULHU, MVT::v32i16, HasBWI ? Legal : Custom);
setOperationAction(ISD::MULHS, MVT::v64i8, Custom);
setOperationAction(ISD::MULHU, MVT::v64i8, Custom);
+ setOperationAction(ISD::AVGCEILU, MVT::v32i16, HasBWI ? Legal : Custom);
+ setOperationAction(ISD::AVGCEILU, MVT::v64i8, HasBWI ? Legal : Custom);
setOperationAction(ISD::SMULO, MVT::v64i8, Custom);
setOperationAction(ISD::UMULO, MVT::v64i8, Custom);
@@ -1698,6 +1765,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FSHL, MVT::v64i8, Custom);
setOperationAction(ISD::FSHR, MVT::v64i8, Custom);
+ setOperationAction(ISD::FSHL, MVT::v32i16, Custom);
+ setOperationAction(ISD::FSHR, MVT::v32i16, Custom);
setOperationAction(ISD::FSHL, MVT::v16i32, Custom);
setOperationAction(ISD::FSHR, MVT::v16i32, Custom);
@@ -1970,10 +2039,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
- if (isTypeLegal(MVT::f80)) {
- setOperationAction(ISD::FP_EXTEND, MVT::f80, Custom);
- setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Custom);
- }
setCondCodeAction(ISD::SETOEQ, MVT::f16, Expand);
setCondCodeAction(ISD::SETUNE, MVT::f16, Expand);
@@ -2059,9 +2124,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::LOAD, MVT::v4f16, Custom);
setOperationAction(ISD::STORE, MVT::v4f16, Custom);
}
-
- // Support fp16 0 immediate
- addLegalFPImmediate(APFloat::getZero(APFloat::IEEEhalf()));
}
if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
@@ -2209,55 +2271,55 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(Op, MVT::f32, Promote);
// We have target-specific dag combine patterns for the following nodes:
- setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
- setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
- setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
- setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
- setTargetDAGCombine(ISD::CONCAT_VECTORS);
- setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
- setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR);
- setTargetDAGCombine(ISD::BITCAST);
- setTargetDAGCombine(ISD::VSELECT);
- setTargetDAGCombine(ISD::SELECT);
- setTargetDAGCombine(ISD::SHL);
- setTargetDAGCombine(ISD::SRA);
- setTargetDAGCombine(ISD::SRL);
- setTargetDAGCombine(ISD::OR);
- setTargetDAGCombine(ISD::AND);
- setTargetDAGCombine(ISD::ADD);
- setTargetDAGCombine(ISD::FADD);
- setTargetDAGCombine(ISD::FSUB);
- setTargetDAGCombine(ISD::FNEG);
- setTargetDAGCombine(ISD::FMA);
- setTargetDAGCombine(ISD::STRICT_FMA);
- setTargetDAGCombine(ISD::FMINNUM);
- setTargetDAGCombine(ISD::FMAXNUM);
- setTargetDAGCombine(ISD::SUB);
- setTargetDAGCombine(ISD::LOAD);
- setTargetDAGCombine(ISD::MLOAD);
- setTargetDAGCombine(ISD::STORE);
- setTargetDAGCombine(ISD::MSTORE);
- setTargetDAGCombine(ISD::TRUNCATE);
- setTargetDAGCombine(ISD::ZERO_EXTEND);
- setTargetDAGCombine(ISD::ANY_EXTEND);
- setTargetDAGCombine(ISD::SIGN_EXTEND);
- setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
- setTargetDAGCombine(ISD::ANY_EXTEND_VECTOR_INREG);
- setTargetDAGCombine(ISD::SIGN_EXTEND_VECTOR_INREG);
- setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG);
- setTargetDAGCombine(ISD::SINT_TO_FP);
- setTargetDAGCombine(ISD::UINT_TO_FP);
- setTargetDAGCombine(ISD::STRICT_SINT_TO_FP);
- setTargetDAGCombine(ISD::STRICT_UINT_TO_FP);
- setTargetDAGCombine(ISD::SETCC);
- setTargetDAGCombine(ISD::MUL);
- setTargetDAGCombine(ISD::XOR);
- setTargetDAGCombine(ISD::MSCATTER);
- setTargetDAGCombine(ISD::MGATHER);
- setTargetDAGCombine(ISD::FP16_TO_FP);
- setTargetDAGCombine(ISD::FP_EXTEND);
- setTargetDAGCombine(ISD::STRICT_FP_EXTEND);
- setTargetDAGCombine(ISD::FP_ROUND);
+ setTargetDAGCombine({ISD::VECTOR_SHUFFLE,
+ ISD::SCALAR_TO_VECTOR,
+ ISD::INSERT_VECTOR_ELT,
+ ISD::EXTRACT_VECTOR_ELT,
+ ISD::CONCAT_VECTORS,
+ ISD::INSERT_SUBVECTOR,
+ ISD::EXTRACT_SUBVECTOR,
+ ISD::BITCAST,
+ ISD::VSELECT,
+ ISD::SELECT,
+ ISD::SHL,
+ ISD::SRA,
+ ISD::SRL,
+ ISD::OR,
+ ISD::AND,
+ ISD::ADD,
+ ISD::FADD,
+ ISD::FSUB,
+ ISD::FNEG,
+ ISD::FMA,
+ ISD::STRICT_FMA,
+ ISD::FMINNUM,
+ ISD::FMAXNUM,
+ ISD::SUB,
+ ISD::LOAD,
+ ISD::MLOAD,
+ ISD::STORE,
+ ISD::MSTORE,
+ ISD::TRUNCATE,
+ ISD::ZERO_EXTEND,
+ ISD::ANY_EXTEND,
+ ISD::SIGN_EXTEND,
+ ISD::SIGN_EXTEND_INREG,
+ ISD::ANY_EXTEND_VECTOR_INREG,
+ ISD::SIGN_EXTEND_VECTOR_INREG,
+ ISD::ZERO_EXTEND_VECTOR_INREG,
+ ISD::SINT_TO_FP,
+ ISD::UINT_TO_FP,
+ ISD::STRICT_SINT_TO_FP,
+ ISD::STRICT_UINT_TO_FP,
+ ISD::SETCC,
+ ISD::MUL,
+ ISD::XOR,
+ ISD::MSCATTER,
+ ISD::MGATHER,
+ ISD::FP16_TO_FP,
+ ISD::FP_EXTEND,
+ ISD::STRICT_FP_EXTEND,
+ ISD::FP_ROUND});
computeRegisterProperties(Subtarget.getRegisterInfo());
@@ -2568,9 +2630,9 @@ EVT X86TargetLowering::getOptimalMemOpType(
bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
if (VT == MVT::f32)
- return X86ScalarSSEf32;
+ return Subtarget.hasSSE1();
if (VT == MVT::f64)
- return X86ScalarSSEf64;
+ return Subtarget.hasSSE2();
return true;
}
@@ -3566,10 +3628,15 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
MFI.setObjectSExt(FI, true);
}
+ MaybeAlign Alignment;
+ if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
+ ValVT != MVT::f80)
+ Alignment = MaybeAlign(4);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
SDValue Val = DAG.getLoad(
ValVT, dl, Chain, FIN,
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
+ Alignment);
return ExtendedInMem
? (VA.getValVT().isVector()
? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
@@ -3906,7 +3973,7 @@ SDValue X86TargetLowering::LowerFormalArguments(
else if (Is64Bit && RegVT == MVT::i64)
RC = &X86::GR64RegClass;
else if (RegVT == MVT::f16)
- RC = &X86::FR16XRegClass;
+ RC = Subtarget.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass;
else if (RegVT == MVT::f32)
RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
else if (RegVT == MVT::f64)
@@ -4088,9 +4155,14 @@ SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
if (isByVal)
return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
+ MaybeAlign Alignment;
+ if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
+ Arg.getSimpleValueType() != MVT::f80)
+ Alignment = MaybeAlign(4);
return DAG.getStore(
Chain, dl, Arg, PtrOff,
- MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
+ MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset),
+ Alignment);
}
/// Emit a load of return address if tail call
@@ -5076,7 +5148,7 @@ bool X86::mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget,
// If this is an unaligned vector, make sure the target supports folding it.
auto *Ld = cast<LoadSDNode>(Op.getNode());
if (!Subtarget.hasAVX() && !Subtarget.hasSSEUnalignedMem() &&
- Ld->getValueSizeInBits(0) == 128 && Ld->getAlignment() < 16)
+ Ld->getValueSizeInBits(0) == 128 && Ld->getAlign() < Align(16))
return false;
// TODO: If this is a non-temporal load and the target has an instruction
@@ -5171,13 +5243,6 @@ static bool isTargetShuffleVariableMask(unsigned Opcode) {
}
}
-static bool isTargetShuffleSplat(SDValue Op) {
- unsigned Opcode = Op.getOpcode();
- if (Opcode == ISD::EXTRACT_SUBVECTOR)
- return isTargetShuffleSplat(Op.getOperand(0));
- return Opcode == X86ISD::VBROADCAST || Opcode == X86ISD::VBROADCAST_LOAD;
-}
-
SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
@@ -5429,6 +5494,18 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.align = Align(1);
Info.flags |= MachineMemOperand::MOLoad;
return true;
+ case Intrinsic::x86_atomic_bts:
+ case Intrinsic::x86_atomic_btc:
+ case Intrinsic::x86_atomic_btr: {
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.ptrVal = I.getArgOperand(0);
+ unsigned Size = I.getType()->getScalarSizeInBits();
+ Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), Size);
+ Info.align = Align(Size);
+ Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
+ MachineMemOperand::MOVolatile;
+ return true;
+ }
}
return false;
}
@@ -5643,6 +5720,22 @@ bool X86TargetLowering::isCheapToSpeculateCtlz() const {
return Subtarget.hasLZCNT();
}
+bool X86TargetLowering::hasBitPreservingFPLogic(EVT VT) const {
+ return VT == MVT::f32 || VT == MVT::f64 || VT.isVector();
+}
+
+bool X86TargetLowering::ShouldShrinkFPConstant(EVT VT) const {
+ // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
+ // expensive than a straight movsd. On the other hand, it's important to
+ // shrink long double fp constant since fldt is very slow.
+ return !Subtarget.hasSSE2() || VT == MVT::f80;
+}
+
+bool X86TargetLowering::isScalarFPTypeInSSEReg(EVT VT) const {
+ return (VT == MVT::f64 && Subtarget.hasSSE2()) ||
+ (VT == MVT::f32 && Subtarget.hasSSE1()) || VT == MVT::f16;
+}
+
bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
const SelectionDAG &DAG,
const MachineMemOperand &MMO) const {
@@ -5755,6 +5848,7 @@ bool X86TargetLowering::shouldFoldConstantShiftPairToMask(
(N->getOpcode() == ISD::SRL &&
N->getOperand(0).getOpcode() == ISD::SHL)) &&
"Expected shift-shift mask");
+ // TODO: Should we always create i64 masks? Or only folded immediates?
EVT VT = N->getValueType(0);
if ((Subtarget.hasFastVectorShiftMasks() && VT.isVector()) ||
(Subtarget.hasFastScalarShiftMasks() && !VT.isVector())) {
@@ -6281,7 +6375,8 @@ static SDValue widenSubVector(SDValue Vec, bool ZeroNewElements,
// Helper function to collect subvector ops that are concatenated together,
// either by ISD::CONCAT_VECTORS or a ISD::INSERT_SUBVECTOR series.
// The subvectors in Ops are guaranteed to be the same type.
-static bool collectConcatOps(SDNode *N, SmallVectorImpl<SDValue> &Ops) {
+static bool collectConcatOps(SDNode *N, SmallVectorImpl<SDValue> &Ops,
+ SelectionDAG &DAG) {
assert(Ops.empty() && "Expected an empty ops vector");
if (N->getOpcode() == ISD::CONCAT_VECTORS) {
@@ -6297,21 +6392,34 @@ static bool collectConcatOps(SDNode *N, SmallVectorImpl<SDValue> &Ops) {
EVT SubVT = Sub.getValueType();
// TODO - Handle more general insert_subvector chains.
- if (VT.getSizeInBits() == (SubVT.getSizeInBits() * 2) &&
- Idx == (VT.getVectorNumElements() / 2)) {
- // insert_subvector(insert_subvector(undef, x, lo), y, hi)
- if (Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
- Src.getOperand(1).getValueType() == SubVT &&
- isNullConstant(Src.getOperand(2))) {
- Ops.push_back(Src.getOperand(1));
+ if (VT.getSizeInBits() == (SubVT.getSizeInBits() * 2)) {
+ // insert_subvector(undef, x, lo)
+ if (Idx == 0 && Src.isUndef()) {
Ops.push_back(Sub);
+ Ops.push_back(DAG.getUNDEF(SubVT));
return true;
}
- // insert_subvector(x, extract_subvector(x, lo), hi)
- if (Sub.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
- Sub.getOperand(0) == Src && isNullConstant(Sub.getOperand(1))) {
- Ops.append(2, Sub);
- return true;
+ if (Idx == (VT.getVectorNumElements() / 2)) {
+ // insert_subvector(insert_subvector(undef, x, lo), y, hi)
+ if (Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ Src.getOperand(1).getValueType() == SubVT &&
+ isNullConstant(Src.getOperand(2))) {
+ Ops.push_back(Src.getOperand(1));
+ Ops.push_back(Sub);
+ return true;
+ }
+ // insert_subvector(x, extract_subvector(x, lo), hi)
+ if (Sub.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ Sub.getOperand(0) == Src && isNullConstant(Sub.getOperand(1))) {
+ Ops.append(2, Sub);
+ return true;
+ }
+ // insert_subvector(undef, x, hi)
+ if (Src.isUndef()) {
+ Ops.push_back(DAG.getUNDEF(SubVT));
+ Ops.push_back(Sub);
+ return true;
+ }
}
}
}
@@ -6770,7 +6878,7 @@ static SDValue IsNOT(SDValue V, SelectionDAG &DAG) {
}
}
SmallVector<SDValue, 2> CatOps;
- if (collectConcatOps(V.getNode(), CatOps)) {
+ if (collectConcatOps(V.getNode(), CatOps, DAG)) {
for (SDValue &CatOp : CatOps) {
SDValue NotCat = IsNOT(CatOp, DAG);
if (!NotCat) return SDValue();
@@ -7934,8 +8042,35 @@ static void resolveZeroablesFromTargetShuffle(const SmallVectorImpl<int> &Mask,
}
}
+// Attempt to create a shuffle mask from a VSELECT/BLENDV condition mask.
+static bool createShuffleMaskFromVSELECT(SmallVectorImpl<int> &Mask,
+ SDValue Cond, bool IsBLENDV = false) {
+ EVT CondVT = Cond.getValueType();
+ unsigned EltSizeInBits = CondVT.getScalarSizeInBits();
+ unsigned NumElts = CondVT.getVectorNumElements();
+
+ APInt UndefElts;
+ SmallVector<APInt, 32> EltBits;
+ if (!getTargetConstantBitsFromNode(Cond, EltSizeInBits, UndefElts, EltBits,
+ true, false))
+ return false;
+
+ Mask.resize(NumElts, SM_SentinelUndef);
+
+ for (int i = 0; i != (int)NumElts; ++i) {
+ Mask[i] = i;
+ // Arbitrarily choose from the 2nd operand if the select condition element
+ // is undef.
+ // TODO: Can we do better by matching patterns such as even/odd?
+ if (UndefElts[i] || (!IsBLENDV && EltBits[i].isZero()) ||
+ (IsBLENDV && EltBits[i].isNonNegative()))
+ Mask[i] += NumElts;
+ }
+
+ return true;
+}
+
// Forward declaration (for getFauxShuffleMask recursive check).
-// TODO: Use DemandedElts variant.
static bool getTargetShuffleInputs(SDValue Op, SmallVectorImpl<SDValue> &Inputs,
SmallVectorImpl<int> &Mask,
const SelectionDAG &DAG, unsigned Depth,
@@ -7987,11 +8122,11 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
uint64_t ZeroMask = IsAndN ? 255 : 0;
if (!getTargetConstantBitsFromNode(IsAndN ? N0 : N1, 8, UndefElts, EltBits))
return false;
+ // We can't assume an undef src element gives an undef dst - the other src
+ // might be zero.
+ if (!UndefElts.isZero())
+ return false;
for (int i = 0, e = (int)EltBits.size(); i != e; ++i) {
- if (UndefElts[i]) {
- Mask.push_back(SM_SentinelUndef);
- continue;
- }
const APInt &ByteBits = EltBits[i];
if (ByteBits != 0 && ByteBits != 255)
return false;
@@ -8240,6 +8375,16 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
}
return true;
}
+ case ISD::VSELECT:
+ case X86ISD::BLENDV: {
+ SDValue Cond = N.getOperand(0);
+ if (createShuffleMaskFromVSELECT(Mask, Cond, Opcode == X86ISD::BLENDV)) {
+ Ops.push_back(N.getOperand(1));
+ Ops.push_back(N.getOperand(2));
+ return true;
+ }
+ return false;
+ }
case X86ISD::VTRUNC: {
SDValue Src = N.getOperand(0);
EVT SrcVT = Src.getValueType();
@@ -9076,7 +9221,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
// Don't create 256-bit non-temporal aligned loads without AVX2 as these
// will lower to regular temporal loads and use the cache.
- if (LDBase->isNonTemporal() && LDBase->getAlignment() >= 32 &&
+ if (LDBase->isNonTemporal() && LDBase->getAlign() >= Align(32) &&
VT.is256BitVector() && !Subtarget.hasInt256())
return SDValue();
@@ -9462,7 +9607,8 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
// For size optimization, also splat v2f64 and v2i64, and for size opt
// with AVX2, also splat i8 and i16.
// With pattern matching, the VBROADCAST node may become a VMOVDDUP.
- if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) ||
+ if (ScalarSize == 32 ||
+ (ScalarSize == 64 && (IsGE256 || Subtarget.hasVLX())) ||
(ScalarSize == 16 && Subtarget.hasFP16() && CVT.isFloatingPoint()) ||
(OptForSize && (ScalarSize == 64 || Subtarget.hasAVX2()))) {
const Constant *C = nullptr;
@@ -11651,33 +11797,6 @@ static bool isTargetShuffleEquivalent(MVT VT, ArrayRef<int> Mask,
return true;
}
-// Attempt to create a shuffle mask from a VSELECT condition mask.
-static bool createShuffleMaskFromVSELECT(SmallVectorImpl<int> &Mask,
- SDValue Cond) {
- EVT CondVT = Cond.getValueType();
- unsigned EltSizeInBits = CondVT.getScalarSizeInBits();
- unsigned NumElts = CondVT.getVectorNumElements();
-
- APInt UndefElts;
- SmallVector<APInt, 32> EltBits;
- if (!getTargetConstantBitsFromNode(Cond, EltSizeInBits, UndefElts, EltBits,
- true, false))
- return false;
-
- Mask.resize(NumElts, SM_SentinelUndef);
-
- for (int i = 0; i != (int)NumElts; ++i) {
- Mask[i] = i;
- // Arbitrarily choose from the 2nd operand if the select condition element
- // is undef.
- // TODO: Can we do better by matching patterns such as even/odd?
- if (UndefElts[i] || EltBits[i].isZero())
- Mask[i] += NumElts;
- }
-
- return true;
-}
-
// Check if the shuffle mask is suitable for the AVX vpunpcklwd or vpunpckhwd
// instructions.
static bool isUnpackWdShuffleMask(ArrayRef<int> Mask, MVT VT) {
@@ -13943,8 +14062,8 @@ static SDValue getScalarValueForVectorElement(SDValue V, int Idx,
/// This is particularly important because the set of instructions varies
/// significantly based on whether the operand is a load or not.
static bool isShuffleFoldableLoad(SDValue V) {
- V = peekThroughBitcasts(V);
- return ISD::isNON_EXTLoad(V.getNode());
+ return V->hasOneUse() &&
+ ISD::isNON_EXTLoad(peekThroughOneUseBitcasts(V).getNode());
}
/// Try to lower insertion of a single element into a zero vector.
@@ -15796,7 +15915,8 @@ static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
V1 = extract128BitVector(V1V2, 0, DAG, DL);
V2 = extract128BitVector(V1V2, 4, DAG, DL);
} else {
- SmallVector<SDValue> DWordClearOps(4, DAG.getConstant(0, DL, MVT::i32));
+ SmallVector<SDValue, 4> DWordClearOps(4,
+ DAG.getConstant(0, DL, MVT::i32));
for (unsigned i = 0; i != 4; i += 1 << (NumEvenDrops - 1))
DWordClearOps[i] = DAG.getConstant(0xFFFF, DL, MVT::i32);
SDValue DWordClearMask =
@@ -16615,9 +16735,7 @@ static SDValue lowerShuffleAsLanePermuteAndShuffle(
// otherwise we're (probably) better off doing a split.
if (VT == MVT::v4f64 &&
!all_of(Mask, [LaneSize](int M) { return M < LaneSize; }))
- if (SDValue V =
- lowerShuffleAsLanePermuteAndSHUFP(DL, VT, V1, V2, Mask, DAG))
- return V;
+ return lowerShuffleAsLanePermuteAndSHUFP(DL, VT, V1, V2, Mask, DAG);
// If there are only inputs from one 128-bit lane, splitting will in fact be
// less expensive. The flags track whether the given lane contains an element
@@ -17229,114 +17347,135 @@ static SDValue lowerShuffleAsRepeatedMaskAndLanePermute(
return SDValue();
// Bail if we already have a repeated lane shuffle mask.
- SmallVector<int, 8> RepeatedShuffleMask;
- if (is128BitLaneRepeatedShuffleMask(VT, Mask, RepeatedShuffleMask))
+ if (is128BitLaneRepeatedShuffleMask(VT, Mask))
return SDValue();
- // On AVX2 targets we can permute 256-bit vectors as 64-bit sub-lanes
- // (with PERMQ/PERMPD), otherwise we can only permute whole 128-bit lanes.
- int SubLaneScale = Subtarget.hasAVX2() && VT.is256BitVector() ? 2 : 1;
- int NumSubLanes = NumLanes * SubLaneScale;
- int NumSubLaneElts = NumLaneElts / SubLaneScale;
-
- // Check that all the sources are coming from the same lane and see if we can
- // form a repeating shuffle mask (local to each sub-lane). At the same time,
- // determine the source sub-lane for each destination sub-lane.
- int TopSrcSubLane = -1;
- SmallVector<int, 8> Dst2SrcSubLanes((unsigned)NumSubLanes, -1);
- SmallVector<int, 8> RepeatedSubLaneMasks[2] = {
- SmallVector<int, 8>((unsigned)NumSubLaneElts, SM_SentinelUndef),
- SmallVector<int, 8>((unsigned)NumSubLaneElts, SM_SentinelUndef)};
-
- for (int DstSubLane = 0; DstSubLane != NumSubLanes; ++DstSubLane) {
- // Extract the sub-lane mask, check that it all comes from the same lane
- // and normalize the mask entries to come from the first lane.
- int SrcLane = -1;
- SmallVector<int, 8> SubLaneMask((unsigned)NumSubLaneElts, -1);
- for (int Elt = 0; Elt != NumSubLaneElts; ++Elt) {
- int M = Mask[(DstSubLane * NumSubLaneElts) + Elt];
- if (M < 0)
+ // Helper to look for repeated mask in each split sublane, and that those
+ // sublanes can then be permuted into place.
+ auto ShuffleSubLanes = [&](int SubLaneScale) {
+ int NumSubLanes = NumLanes * SubLaneScale;
+ int NumSubLaneElts = NumLaneElts / SubLaneScale;
+
+ // Check that all the sources are coming from the same lane and see if we
+ // can form a repeating shuffle mask (local to each sub-lane). At the same
+ // time, determine the source sub-lane for each destination sub-lane.
+ int TopSrcSubLane = -1;
+ SmallVector<int, 8> Dst2SrcSubLanes((unsigned)NumSubLanes, -1);
+ SmallVector<SmallVector<int, 8>> RepeatedSubLaneMasks(
+ SubLaneScale,
+ SmallVector<int, 8>((unsigned)NumSubLaneElts, SM_SentinelUndef));
+
+ for (int DstSubLane = 0; DstSubLane != NumSubLanes; ++DstSubLane) {
+ // Extract the sub-lane mask, check that it all comes from the same lane
+ // and normalize the mask entries to come from the first lane.
+ int SrcLane = -1;
+ SmallVector<int, 8> SubLaneMask((unsigned)NumSubLaneElts, -1);
+ for (int Elt = 0; Elt != NumSubLaneElts; ++Elt) {
+ int M = Mask[(DstSubLane * NumSubLaneElts) + Elt];
+ if (M < 0)
+ continue;
+ int Lane = (M % NumElts) / NumLaneElts;
+ if ((0 <= SrcLane) && (SrcLane != Lane))
+ return SDValue();
+ SrcLane = Lane;
+ int LocalM = (M % NumLaneElts) + (M < NumElts ? 0 : NumElts);
+ SubLaneMask[Elt] = LocalM;
+ }
+
+ // Whole sub-lane is UNDEF.
+ if (SrcLane < 0)
continue;
- int Lane = (M % NumElts) / NumLaneElts;
- if ((0 <= SrcLane) && (SrcLane != Lane))
- return SDValue();
- SrcLane = Lane;
- int LocalM = (M % NumLaneElts) + (M < NumElts ? 0 : NumElts);
- SubLaneMask[Elt] = LocalM;
- }
- // Whole sub-lane is UNDEF.
- if (SrcLane < 0)
- continue;
+ // Attempt to match against the candidate repeated sub-lane masks.
+ for (int SubLane = 0; SubLane != SubLaneScale; ++SubLane) {
+ auto MatchMasks = [NumSubLaneElts](ArrayRef<int> M1, ArrayRef<int> M2) {
+ for (int i = 0; i != NumSubLaneElts; ++i) {
+ if (M1[i] < 0 || M2[i] < 0)
+ continue;
+ if (M1[i] != M2[i])
+ return false;
+ }
+ return true;
+ };
- // Attempt to match against the candidate repeated sub-lane masks.
- for (int SubLane = 0; SubLane != SubLaneScale; ++SubLane) {
- auto MatchMasks = [NumSubLaneElts](ArrayRef<int> M1, ArrayRef<int> M2) {
+ auto &RepeatedSubLaneMask = RepeatedSubLaneMasks[SubLane];
+ if (!MatchMasks(SubLaneMask, RepeatedSubLaneMask))
+ continue;
+
+ // Merge the sub-lane mask into the matching repeated sub-lane mask.
for (int i = 0; i != NumSubLaneElts; ++i) {
- if (M1[i] < 0 || M2[i] < 0)
+ int M = SubLaneMask[i];
+ if (M < 0)
continue;
- if (M1[i] != M2[i])
- return false;
+ assert((RepeatedSubLaneMask[i] < 0 || RepeatedSubLaneMask[i] == M) &&
+ "Unexpected mask element");
+ RepeatedSubLaneMask[i] = M;
}
- return true;
- };
- auto &RepeatedSubLaneMask = RepeatedSubLaneMasks[SubLane];
- if (!MatchMasks(SubLaneMask, RepeatedSubLaneMask))
- continue;
+ // Track the top most source sub-lane - by setting the remaining to
+ // UNDEF we can greatly simplify shuffle matching.
+ int SrcSubLane = (SrcLane * SubLaneScale) + SubLane;
+ TopSrcSubLane = std::max(TopSrcSubLane, SrcSubLane);
+ Dst2SrcSubLanes[DstSubLane] = SrcSubLane;
+ break;
+ }
+
+ // Bail if we failed to find a matching repeated sub-lane mask.
+ if (Dst2SrcSubLanes[DstSubLane] < 0)
+ return SDValue();
+ }
+ assert(0 <= TopSrcSubLane && TopSrcSubLane < NumSubLanes &&
+ "Unexpected source lane");
- // Merge the sub-lane mask into the matching repeated sub-lane mask.
- for (int i = 0; i != NumSubLaneElts; ++i) {
- int M = SubLaneMask[i];
+ // Create a repeating shuffle mask for the entire vector.
+ SmallVector<int, 8> RepeatedMask((unsigned)NumElts, -1);
+ for (int SubLane = 0; SubLane <= TopSrcSubLane; ++SubLane) {
+ int Lane = SubLane / SubLaneScale;
+ auto &RepeatedSubLaneMask = RepeatedSubLaneMasks[SubLane % SubLaneScale];
+ for (int Elt = 0; Elt != NumSubLaneElts; ++Elt) {
+ int M = RepeatedSubLaneMask[Elt];
if (M < 0)
continue;
- assert((RepeatedSubLaneMask[i] < 0 || RepeatedSubLaneMask[i] == M) &&
- "Unexpected mask element");
- RepeatedSubLaneMask[i] = M;
+ int Idx = (SubLane * NumSubLaneElts) + Elt;
+ RepeatedMask[Idx] = M + (Lane * NumLaneElts);
}
-
- // Track the top most source sub-lane - by setting the remaining to UNDEF
- // we can greatly simplify shuffle matching.
- int SrcSubLane = (SrcLane * SubLaneScale) + SubLane;
- TopSrcSubLane = std::max(TopSrcSubLane, SrcSubLane);
- Dst2SrcSubLanes[DstSubLane] = SrcSubLane;
- break;
}
+ SDValue RepeatedShuffle =
+ DAG.getVectorShuffle(VT, DL, V1, V2, RepeatedMask);
- // Bail if we failed to find a matching repeated sub-lane mask.
- if (Dst2SrcSubLanes[DstSubLane] < 0)
- return SDValue();
- }
- assert(0 <= TopSrcSubLane && TopSrcSubLane < NumSubLanes &&
- "Unexpected source lane");
-
- // Create a repeating shuffle mask for the entire vector.
- SmallVector<int, 8> RepeatedMask((unsigned)NumElts, -1);
- for (int SubLane = 0; SubLane <= TopSrcSubLane; ++SubLane) {
- int Lane = SubLane / SubLaneScale;
- auto &RepeatedSubLaneMask = RepeatedSubLaneMasks[SubLane % SubLaneScale];
- for (int Elt = 0; Elt != NumSubLaneElts; ++Elt) {
- int M = RepeatedSubLaneMask[Elt];
- if (M < 0)
+ // Shuffle each source sub-lane to its destination.
+ SmallVector<int, 8> SubLaneMask((unsigned)NumElts, -1);
+ for (int i = 0; i != NumElts; i += NumSubLaneElts) {
+ int SrcSubLane = Dst2SrcSubLanes[i / NumSubLaneElts];
+ if (SrcSubLane < 0)
continue;
- int Idx = (SubLane * NumSubLaneElts) + Elt;
- RepeatedMask[Idx] = M + (Lane * NumLaneElts);
+ for (int j = 0; j != NumSubLaneElts; ++j)
+ SubLaneMask[i + j] = j + (SrcSubLane * NumSubLaneElts);
}
- }
- SDValue RepeatedShuffle = DAG.getVectorShuffle(VT, DL, V1, V2, RepeatedMask);
- // Shuffle each source sub-lane to its destination.
- SmallVector<int, 8> SubLaneMask((unsigned)NumElts, -1);
- for (int i = 0; i != NumElts; i += NumSubLaneElts) {
- int SrcSubLane = Dst2SrcSubLanes[i / NumSubLaneElts];
- if (SrcSubLane < 0)
- continue;
- for (int j = 0; j != NumSubLaneElts; ++j)
- SubLaneMask[i + j] = j + (SrcSubLane * NumSubLaneElts);
- }
+ return DAG.getVectorShuffle(VT, DL, RepeatedShuffle, DAG.getUNDEF(VT),
+ SubLaneMask);
+ };
- return DAG.getVectorShuffle(VT, DL, RepeatedShuffle, DAG.getUNDEF(VT),
- SubLaneMask);
+ // On AVX2 targets we can permute 256-bit vectors as 64-bit sub-lanes
+ // (with PERMQ/PERMPD). On AVX2/AVX512BW targets, permuting 32-bit sub-lanes,
+ // even with a variable shuffle, can be worth it for v32i8/v64i8 vectors.
+ // Otherwise we can only permute whole 128-bit lanes.
+ int MinSubLaneScale = 1, MaxSubLaneScale = 1;
+ if (Subtarget.hasAVX2() && VT.is256BitVector()) {
+ bool OnlyLowestElts = isUndefOrInRange(Mask, 0, NumLaneElts);
+ MinSubLaneScale = 2;
+ MaxSubLaneScale =
+ (!OnlyLowestElts && V2.isUndef() && VT == MVT::v32i8) ? 4 : 2;
+ }
+ if (Subtarget.hasBWI() && VT == MVT::v64i8)
+ MinSubLaneScale = MaxSubLaneScale = 4;
+
+ for (int Scale = MinSubLaneScale; Scale <= MaxSubLaneScale; Scale *= 2)
+ if (SDValue Shuffle = ShuffleSubLanes(Scale))
+ return Shuffle;
+
+ return SDValue();
}
static bool matchShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2,
@@ -17513,6 +17652,9 @@ static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
Zeroable, Subtarget, DAG))
return Op;
+ bool V1IsInPlace = isShuffleMaskInputInPlace(0, Mask);
+ bool V2IsInPlace = isShuffleMaskInputInPlace(1, Mask);
+
// If we have lane crossing shuffles AND they don't all come from the lower
// lane elements, lower to SHUFPD(VPERM2F128(V1, V2), VPERM2F128(V1, V2)).
// TODO: Handle BUILD_VECTOR sources which getVectorShuffle currently
@@ -17521,13 +17663,11 @@ static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
!all_of(Mask, [](int M) { return M < 2 || (4 <= M && M < 6); }) &&
(V1.getOpcode() != ISD::BUILD_VECTOR) &&
(V2.getOpcode() != ISD::BUILD_VECTOR))
- if (SDValue Op = lowerShuffleAsLanePermuteAndSHUFP(DL, MVT::v4f64, V1, V2,
- Mask, DAG))
- return Op;
+ return lowerShuffleAsLanePermuteAndSHUFP(DL, MVT::v4f64, V1, V2, Mask, DAG);
// If we have one input in place, then we can permute the other input and
// blend the result.
- if (isShuffleMaskInputInPlace(0, Mask) || isShuffleMaskInputInPlace(1, Mask))
+ if (V1IsInPlace || V2IsInPlace)
return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v4f64, V1, V2, Mask,
Subtarget, DAG);
@@ -17541,8 +17681,7 @@ static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// shuffle. However, if we have AVX2 and either inputs are already in place,
// we will be able to shuffle even across lanes the other input in a single
// instruction so skip this pattern.
- if (!(Subtarget.hasAVX2() && (isShuffleMaskInputInPlace(0, Mask) ||
- isShuffleMaskInputInPlace(1, Mask))))
+ if (!(Subtarget.hasAVX2() && (V1IsInPlace || V2IsInPlace)))
if (SDValue V = lowerShuffleAsLanePermuteAndRepeatedMask(
DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG))
return V;
@@ -17635,9 +17774,12 @@ static SDValue lowerV4I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4i64, Mask, V1, V2, DAG))
return V;
+ bool V1IsInPlace = isShuffleMaskInputInPlace(0, Mask);
+ bool V2IsInPlace = isShuffleMaskInputInPlace(1, Mask);
+
// If we have one input in place, then we can permute the other input and
// blend the result.
- if (isShuffleMaskInputInPlace(0, Mask) || isShuffleMaskInputInPlace(1, Mask))
+ if (V1IsInPlace || V2IsInPlace)
return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v4i64, V1, V2, Mask,
Subtarget, DAG);
@@ -17647,12 +17789,16 @@ static SDValue lowerV4I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG))
return V;
+ // Try to lower to PERMQ(BLENDD(V1,V2)).
+ if (SDValue V =
+ lowerShuffleAsBlendAndPermute(DL, MVT::v4i64, V1, V2, Mask, DAG))
+ return V;
+
// Try to simplify this by merging 128-bit lanes to enable a lane-based
// shuffle. However, if we have AVX2 and either inputs are already in place,
// we will be able to shuffle even across lanes the other input in a single
// instruction so skip this pattern.
- if (!isShuffleMaskInputInPlace(0, Mask) &&
- !isShuffleMaskInputInPlace(1, Mask))
+ if (!V1IsInPlace && !V2IsInPlace)
if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask(
DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG))
return Result;
@@ -18657,20 +18803,34 @@ static SDValue lowerV64I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
Zeroable, Subtarget, DAG))
return PSHUFB;
- // VBMI can use VPERMV/VPERMV3 byte shuffles.
- if (Subtarget.hasVBMI())
- return lowerShuffleWithPERMV(DL, MVT::v64i8, Mask, V1, V2, Subtarget, DAG);
-
// Try to create an in-lane repeating shuffle mask and then shuffle the
// results into the target lanes.
if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG))
return V;
+ if (SDValue Result = lowerShuffleAsLanePermuteAndPermute(
+ DL, MVT::v64i8, V1, V2, Mask, DAG, Subtarget))
+ return Result;
+
if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v64i8, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return Blend;
+ if (!is128BitLaneCrossingShuffleMask(MVT::v64i8, Mask)) {
+ // Use PALIGNR+Permute if possible - permute might become PSHUFB but the
+ // PALIGNR will be cheaper than the second PSHUFB+OR.
+ if (SDValue V = lowerShuffleAsByteRotateAndPermute(DL, MVT::v64i8, V1, V2,
+ Mask, Subtarget, DAG))
+ return V;
+
+ // If we can't directly blend but can use PSHUFB, that will be better as it
+ // can both shuffle and set up the inefficient blend.
+ bool V1InUse, V2InUse;
+ return lowerShuffleAsBlendOfPSHUFBs(DL, MVT::v64i8, V1, V2, Mask, Zeroable,
+ DAG, V1InUse, V2InUse);
+ }
+
// Try to simplify this by merging 128-bit lanes to enable a lane-based
// shuffle.
if (!V2.isUndef())
@@ -18678,7 +18838,10 @@ static SDValue lowerV64I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG))
return Result;
- // FIXME: Implement direct support for this type!
+ // VBMI can use VPERMV/VPERMV3 byte shuffles.
+ if (Subtarget.hasVBMI())
+ return lowerShuffleWithPERMV(DL, MVT::v64i8, Mask, V1, V2, Subtarget, DAG);
+
return splitAndLowerShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG);
}
@@ -18915,7 +19078,18 @@ static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
Offset += NumElts; // Increment for next iteration.
}
-
+ // If we're broadcasting a SETCC result, try to broadcast the ops instead.
+ // TODO: What other unary shuffles would benefit from this?
+ if (isBroadcastShuffleMask(Mask) && V1.getOpcode() == ISD::SETCC &&
+ V1->hasOneUse()) {
+ SDValue Op0 = V1.getOperand(0);
+ SDValue Op1 = V1.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(V1.getOperand(2))->get();
+ EVT OpVT = Op0.getValueType();
+ return DAG.getSetCC(
+ DL, VT, DAG.getVectorShuffle(OpVT, DL, Op0, DAG.getUNDEF(OpVT), Mask),
+ DAG.getVectorShuffle(OpVT, DL, Op1, DAG.getUNDEF(OpVT), Mask), CC);
+ }
MVT ExtVT;
switch (VT.SimpleTy) {
@@ -19619,9 +19793,11 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
bool IsAllOnesElt = VT.isInteger() && llvm::isAllOnesConstant(N1);
if (IsZeroElt || IsAllOnesElt) {
- // Lower insertion of i8 -1 as an 'OR' blend.
+ // Lower insertion of v16i8/v32i8/v64i16 -1 elts as an 'OR' blend.
// We don't deal with i8 0 since it appears to be handled elsewhere.
- if (IsAllOnesElt && EltSizeInBits == 8 && !Subtarget.hasSSE41()) {
+ if (IsAllOnesElt &&
+ ((VT == MVT::v16i8 && !Subtarget.hasSSE41()) ||
+ ((VT == MVT::v32i8 || VT == MVT::v16i16) && !Subtarget.hasInt256()))) {
SDValue ZeroCst = DAG.getConstant(0, dl, VT.getScalarType());
SDValue OnesCst = DAG.getAllOnesConstant(dl, VT.getScalarType());
SmallVector<SDValue, 8> CstVectorElts(NumElts, ZeroCst);
@@ -19652,7 +19828,7 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
// and incur a domain crossing penalty if that's what we'll end up
// doing anyway after extracting to a 128-bit vector.
if ((Subtarget.hasAVX() && (EltVT == MVT::f64 || EltVT == MVT::f32)) ||
- (Subtarget.hasAVX2() && EltVT == MVT::i32)) {
+ (Subtarget.hasAVX2() && (EltVT == MVT::i32 || EltVT == MVT::i64))) {
SDValue N1Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1);
return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1Vec,
DAG.getTargetConstant(1, dl, MVT::i8));
@@ -19666,7 +19842,7 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
// If we are not inserting into the low 128-bit vector chunk,
// then prefer the broadcast+blend sequence.
// FIXME: relax the profitability check iff all N1 uses are insertions.
- if (!VT.is128BitVector() && IdxVal >= NumEltsIn128 &&
+ if (IdxVal >= NumEltsIn128 &&
((Subtarget.hasAVX2() && EltSizeInBits != 8) ||
(Subtarget.hasAVX() && (EltSizeInBits >= 32) &&
X86::mayFoldLoad(N1, Subtarget)))) {
@@ -20617,6 +20793,35 @@ static SDValue lowerINT_TO_FP_vXi64(SDValue Op, SelectionDAG &DAG,
return Cvt;
}
+template<typename T>
+static bool isSoftFP16(T VT, const X86Subtarget &Subtarget) {
+ return VT == MVT::f16 && !Subtarget.hasFP16();
+}
+
+template<typename T>
+bool X86TargetLowering::isSoftFP16(T VT) const {
+ return ::isSoftFP16(VT, Subtarget);
+}
+
+static SDValue promoteXINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+ bool IsStrict = Op->isStrictFPOpcode();
+ SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
+ SDValue Chain = IsStrict ? Op->getOperand(0) : DAG.getEntryNode();
+ MVT VT = Op.getSimpleValueType();
+ MVT NVT = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32;
+ SDLoc dl(Op);
+
+ SDValue Rnd = DAG.getIntPtrConstant(0, dl);
+ if (IsStrict)
+ return DAG.getNode(
+ ISD::STRICT_FP_ROUND, dl, {VT, MVT::Other},
+ {Chain,
+ DAG.getNode(Op.getOpcode(), dl, {NVT, MVT::Other}, {Chain, Src}),
+ Rnd});
+ return DAG.getNode(ISD::FP_ROUND, dl, VT,
+ DAG.getNode(Op.getOpcode(), dl, NVT, Src), Rnd);
+}
+
SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
bool IsStrict = Op->isStrictFPOpcode();
@@ -20627,6 +20832,9 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
+ if (isSoftFP16(VT))
+ return promoteXINT_TO_FP(Op, DAG);
+
if (Subtarget.isTargetWin64() && SrcVT == MVT::i128)
return LowerWin64_INT128_TO_FP(Op, DAG);
@@ -21123,9 +21331,13 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
MVT DstVT = Op->getSimpleValueType(0);
SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
+ // Bail out when we don't have native conversion instructions.
if (DstVT == MVT::f128)
return SDValue();
+ if (isSoftFP16(DstVT))
+ return promoteXINT_TO_FP(Op, DAG);
+
if (DstVT.isVector())
return lowerUINT_TO_FP_vec(Op, DAG, Subtarget);
@@ -21158,9 +21370,13 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
// The transform for i64->f64 isn't correct for 0 when rounding to negative
// infinity. It produces -0.0, so disable under strictfp.
- if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64 && !IsStrict)
+ if (SrcVT == MVT::i64 && DstVT == MVT::f64 && Subtarget.hasSSE2() &&
+ !IsStrict)
return LowerUINT_TO_FP_i64(Op, DAG, Subtarget);
- if (SrcVT == MVT::i32 && X86ScalarSSEf64 && DstVT != MVT::f80)
+ // The transform for i32->f64/f32 isn't correct for 0 when rounding to
+ // negative infinity. So disable under strictfp. Using FILD instead.
+ if (SrcVT == MVT::i32 && Subtarget.hasSSE2() && DstVT != MVT::f80 &&
+ !IsStrict)
return LowerUINT_TO_FP_i32(Op, DAG, Subtarget);
if (Subtarget.is64Bit() && SrcVT == MVT::i64 &&
(DstVT == MVT::f32 || DstVT == MVT::f64))
@@ -21819,27 +22035,25 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
assert(VT.is128BitVector() && InVT.is256BitVector() && "Unexpected types!");
if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) {
- In = DAG.getBitcast(MVT::v8i32, In);
-
// On AVX2, v4i64 -> v4i32 becomes VPERMD.
if (Subtarget.hasInt256()) {
static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1};
+ In = DAG.getBitcast(MVT::v8i32, In);
In = DAG.getVectorShuffle(MVT::v8i32, DL, In, In, ShufMask);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, In,
DAG.getIntPtrConstant(0, DL));
}
- SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In,
+ SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
DAG.getIntPtrConstant(0, DL));
- SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In,
- DAG.getIntPtrConstant(4, DL));
+ SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
+ DAG.getIntPtrConstant(2, DL));
static const int ShufMask[] = {0, 2, 4, 6};
- return DAG.getVectorShuffle(VT, DL, OpLo, OpHi, ShufMask);
+ return DAG.getVectorShuffle(VT, DL, DAG.getBitcast(MVT::v4i32, OpLo),
+ DAG.getBitcast(MVT::v4i32, OpHi), ShufMask);
}
if ((VT == MVT::v8i16) && (InVT == MVT::v8i32)) {
- In = DAG.getBitcast(MVT::v32i8, In);
-
// On AVX2, v8i32 -> v8i16 becomes PSHUFB.
if (Subtarget.hasInt256()) {
// The PSHUFB mask:
@@ -21847,27 +22061,30 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
-1, -1, -1, -1, -1, -1, -1, -1,
16, 17, 20, 21, 24, 25, 28, 29,
-1, -1, -1, -1, -1, -1, -1, -1 };
+ In = DAG.getBitcast(MVT::v32i8, In);
In = DAG.getVectorShuffle(MVT::v32i8, DL, In, In, ShufMask1);
In = DAG.getBitcast(MVT::v4i64, In);
static const int ShufMask2[] = {0, 2, -1, -1};
In = DAG.getVectorShuffle(MVT::v4i64, DL, In, In, ShufMask2);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i16,
- DAG.getBitcast(MVT::v16i16, In),
- DAG.getIntPtrConstant(0, DL));
+ In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
+ DAG.getIntPtrConstant(0, DL));
+ return DAG.getBitcast(MVT::v8i16, In);
}
- SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v16i8, In,
+ SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In,
DAG.getIntPtrConstant(0, DL));
- SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v16i8, In,
- DAG.getIntPtrConstant(16, DL));
+ SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In,
+ DAG.getIntPtrConstant(4, DL));
// The PSHUFB mask:
- static const int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13,
- -1, -1, -1, -1, -1, -1, -1, -1};
+ static const int ShufMask1[] = {0, 2, 4, 6, -1, -1, -1, -1};
+
+ OpLo = DAG.getBitcast(MVT::v8i16, OpLo);
+ OpHi = DAG.getBitcast(MVT::v8i16, OpHi);
- OpLo = DAG.getVectorShuffle(MVT::v16i8, DL, OpLo, OpLo, ShufMask1);
- OpHi = DAG.getVectorShuffle(MVT::v16i8, DL, OpHi, OpHi, ShufMask1);
+ OpLo = DAG.getVectorShuffle(MVT::v8i16, DL, OpLo, OpLo, ShufMask1);
+ OpHi = DAG.getVectorShuffle(MVT::v8i16, DL, OpHi, OpHi, ShufMask1);
OpLo = DAG.getBitcast(MVT::v4i32, OpLo);
OpHi = DAG.getBitcast(MVT::v4i32, OpHi);
@@ -21941,6 +22158,16 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
SDValue Res;
+ if (isSoftFP16(SrcVT)) {
+ MVT NVT = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32;
+ if (IsStrict)
+ return DAG.getNode(Op.getOpcode(), dl, {VT, MVT::Other},
+ {Chain, DAG.getNode(ISD::STRICT_FP_EXTEND, dl,
+ {NVT, MVT::Other}, {Chain, Src})});
+ return DAG.getNode(Op.getOpcode(), dl, VT,
+ DAG.getNode(ISD::FP_EXTEND, dl, NVT, Src));
+ }
+
if (VT.isVector()) {
if (VT == MVT::v2i1 && SrcVT == MVT::v2f64) {
MVT ResVT = MVT::v4i32;
@@ -22278,6 +22505,9 @@ SDValue X86TargetLowering::LowerLRINT_LLRINT(SDValue Op,
SDValue Src = Op.getOperand(0);
MVT SrcVT = Src.getSimpleValueType();
+ if (SrcVT == MVT::f16)
+ return SDValue();
+
// If the source is in an SSE register, the node is Legal.
if (isScalarFPTypeInSSEReg(SrcVT))
return Op;
@@ -22349,7 +22579,7 @@ X86TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const {
// This code is only for floats and doubles. Fall back to generic code for
// anything else.
- if (!isScalarFPTypeInSSEReg(SrcVT))
+ if (!isScalarFPTypeInSSEReg(SrcVT) || isSoftFP16(SrcVT))
return SDValue();
EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
@@ -22381,11 +22611,11 @@ X86TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const {
// floating-point values.
APInt MinInt, MaxInt;
if (IsSigned) {
- MinInt = APInt::getSignedMinValue(SatWidth).sextOrSelf(DstWidth);
- MaxInt = APInt::getSignedMaxValue(SatWidth).sextOrSelf(DstWidth);
+ MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
+ MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
} else {
- MinInt = APInt::getMinValue(SatWidth).zextOrSelf(DstWidth);
- MaxInt = APInt::getMaxValue(SatWidth).zextOrSelf(DstWidth);
+ MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
+ MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
}
APFloat MinFloat(DAG.EVTToAPFloatSemantics(SrcVT));
@@ -22484,28 +22714,54 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
+ SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
SDValue In = Op.getOperand(IsStrict ? 1 : 0);
MVT SVT = In.getSimpleValueType();
- if (VT == MVT::f128)
+ if (VT == MVT::f128 || (SVT == MVT::f16 && VT == MVT::f80))
return SDValue();
- if (VT == MVT::f80) {
- if (SVT == MVT::f16) {
- assert(Subtarget.hasFP16() && "Unexpected features!");
- RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, VT);
- MakeLibCallOptions CallOptions;
- std::pair<SDValue, SDValue> Tmp =
- makeLibCall(DAG, LC, VT, In, CallOptions, DL,
- IsStrict ? Op.getOperand(0) : SDValue());
+ if (SVT == MVT::f16) {
+ if (Subtarget.hasFP16())
+ return Op;
+
+ if (VT != MVT::f32) {
if (IsStrict)
- return DAG.getMergeValues({Tmp.first, Tmp.second}, DL);
- else
- return Tmp.first;
+ return DAG.getNode(
+ ISD::STRICT_FP_EXTEND, DL, {VT, MVT::Other},
+ {Chain, DAG.getNode(ISD::STRICT_FP_EXTEND, DL,
+ {MVT::f32, MVT::Other}, {Chain, In})});
+
+ return DAG.getNode(ISD::FP_EXTEND, DL, VT,
+ DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, In));
}
- return Op;
+
+ if (!Subtarget.hasF16C())
+ return SDValue();
+
+ In = DAG.getBitcast(MVT::i16, In);
+ In = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v8i16,
+ getZeroVector(MVT::v8i16, Subtarget, DAG, DL), In,
+ DAG.getIntPtrConstant(0, DL));
+ SDValue Res;
+ if (IsStrict) {
+ Res = DAG.getNode(X86ISD::STRICT_CVTPH2PS, DL, {MVT::v4f32, MVT::Other},
+ {Chain, In});
+ Chain = Res.getValue(1);
+ } else {
+ Res = DAG.getNode(X86ISD::CVTPH2PS, DL, MVT::v4f32, In,
+ DAG.getTargetConstant(4, DL, MVT::i32));
+ }
+ Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Res,
+ DAG.getIntPtrConstant(0, DL));
+ if (IsStrict)
+ return DAG.getMergeValues({Res, Chain}, DL);
+ return Res;
}
+ if (!SVT.isVector())
+ return Op;
+
if (SVT.getVectorElementType() == MVT::f16) {
assert(Subtarget.hasFP16() && Subtarget.hasVLX() && "Unexpected features!");
if (SVT == MVT::v2f16)
@@ -22531,15 +22787,65 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
bool IsStrict = Op->isStrictFPOpcode();
+
+ SDLoc DL(Op);
+ SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
SDValue In = Op.getOperand(IsStrict ? 1 : 0);
+ SDValue Op2 = Op.getOperand(IsStrict ? 2 : 1);
MVT VT = Op.getSimpleValueType();
MVT SVT = In.getSimpleValueType();
- // It's legal except when f128 is involved or we're converting f80->f16.
- if (SVT != MVT::f128 && !(VT == MVT::f16 && SVT == MVT::f80))
- return Op;
+ if (SVT == MVT::f128 || (VT == MVT::f16 && SVT == MVT::f80))
+ return SDValue();
- return SDValue();
+ if (VT == MVT::f16) {
+ if (Subtarget.hasFP16())
+ return Op;
+
+ if (SVT != MVT::f32) {
+ if (IsStrict)
+ return DAG.getNode(
+ ISD::STRICT_FP_ROUND, DL, {VT, MVT::Other},
+ {Chain,
+ DAG.getNode(ISD::STRICT_FP_ROUND, DL, {MVT::f32, MVT::Other},
+ {Chain, In, Op2}),
+ Op2});
+
+ return DAG.getNode(ISD::FP_ROUND, DL, VT,
+ DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, In, Op2),
+ Op2);
+ }
+
+ if (!Subtarget.hasF16C())
+ return SDValue();
+
+ SDValue Res;
+ SDValue Rnd = DAG.getTargetConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, DL,
+ MVT::i32);
+ if (IsStrict) {
+ Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4f32,
+ DAG.getConstantFP(0, DL, MVT::v4f32), In,
+ DAG.getIntPtrConstant(0, DL));
+ Res = DAG.getNode(X86ISD::STRICT_CVTPS2PH, DL, {MVT::v8i16, MVT::Other},
+ {Chain, Res, Rnd});
+ Chain = Res.getValue(1);
+ } else {
+ // FIXME: Should we use zeros for upper elements for non-strict?
+ Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4f32, In);
+ Res = DAG.getNode(X86ISD::CVTPS2PH, DL, MVT::v8i16, Res, Rnd);
+ }
+
+ Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i16, Res,
+ DAG.getIntPtrConstant(0, DL));
+ Res = DAG.getBitcast(MVT::f16, Res);
+
+ if (IsStrict)
+ return DAG.getMergeValues({Res, Chain}, DL);
+
+ return Res;
+ }
+
+ return Op;
}
static SDValue LowerFP16_TO_FP(SDValue Op, SelectionDAG &DAG) {
@@ -22857,6 +23163,47 @@ static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) {
return Res;
}
+/// Helper for attempting to create a X86ISD::BT node.
+static SDValue getBT(SDValue Src, SDValue BitNo, const SDLoc &DL, SelectionDAG &DAG) {
+ // If Src is i8, promote it to i32 with any_extend. There is no i8 BT
+ // instruction. Since the shift amount is in-range-or-undefined, we know
+ // that doing a bittest on the i32 value is ok. We extend to i32 because
+ // the encoding for the i16 version is larger than the i32 version.
+ // Also promote i16 to i32 for performance / code size reason.
+ if (Src.getValueType().getScalarSizeInBits() < 32)
+ Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Src);
+
+ // No legal type found, give up.
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(Src.getValueType()))
+ return SDValue();
+
+ // See if we can use the 32-bit instruction instead of the 64-bit one for a
+ // shorter encoding. Since the former takes the modulo 32 of BitNo and the
+ // latter takes the modulo 64, this is only valid if the 5th bit of BitNo is
+ // known to be zero.
+ if (Src.getValueType() == MVT::i64 &&
+ DAG.MaskedValueIsZero(BitNo, APInt(BitNo.getValueSizeInBits(), 32)))
+ Src = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Src);
+
+ // If the operand types disagree, extend the shift amount to match. Since
+ // BT ignores high bits (like shifts) we can use anyextend.
+ if (Src.getValueType() != BitNo.getValueType()) {
+ // Peek through a mask/modulo operation.
+ // TODO: DAGCombine fails to do this as it just checks isTruncateFree, but
+ // we probably need a better IsDesirableToPromoteOp to handle this as well.
+ if (BitNo.getOpcode() == ISD::AND && BitNo->hasOneUse())
+ BitNo = DAG.getNode(ISD::AND, DL, Src.getValueType(),
+ DAG.getNode(ISD::ANY_EXTEND, DL, Src.getValueType(),
+ BitNo.getOperand(0)),
+ DAG.getNode(ISD::ANY_EXTEND, DL, Src.getValueType(),
+ BitNo.getOperand(1)));
+ else
+ BitNo = DAG.getNode(ISD::ANY_EXTEND, DL, Src.getValueType(), BitNo);
+ }
+
+ return DAG.getNode(X86ISD::BT, DL, MVT::i32, Src, BitNo);
+}
+
/// Helper for creating a X86ISD::SETCC node.
static SDValue getSETCC(X86::CondCode Cond, SDValue EFLAGS, const SDLoc &dl,
SelectionDAG &DAG) {
@@ -23303,7 +23650,7 @@ bool X86TargetLowering::isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const {
return true;
// We never want to use both SQRT and RSQRT instructions for the same input.
- if (DAG.getNodeIfExists(X86ISD::FRSQRT, DAG.getVTList(VT), Op))
+ if (DAG.doesNodeExist(X86ISD::FRSQRT, DAG.getVTList(VT), Op))
return false;
if (VT.isVector())
@@ -23439,7 +23786,7 @@ X86TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
// Only perform this transform if CMOV is supported otherwise the select
// below will become a branch.
- if (!Subtarget.hasCMov())
+ if (!Subtarget.canUseCMOV())
return SDValue();
// fold (sdiv X, pow2)
@@ -23485,9 +23832,8 @@ X86TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
/// Result of 'and' is compared against zero. Change to a BT node if possible.
/// Returns the BT node and the condition code needed to use it.
-static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
- const SDLoc &dl, SelectionDAG &DAG,
- SDValue &X86CC) {
+static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, const SDLoc &dl,
+ SelectionDAG &DAG, X86::CondCode &X86CC) {
assert(And.getOpcode() == ISD::AND && "Expected AND node!");
SDValue Op0 = And.getOperand(0);
SDValue Op1 = And.getOperand(1);
@@ -23538,30 +23884,24 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
if (!Src.getNode())
return SDValue();
- // If Src is i8, promote it to i32 with any_extend. There is no i8 BT
- // instruction. Since the shift amount is in-range-or-undefined, we know
- // that doing a bittest on the i32 value is ok. We extend to i32 because
- // the encoding for the i16 version is larger than the i32 version.
- // Also promote i16 to i32 for performance / code size reason.
- if (Src.getValueType() == MVT::i8 || Src.getValueType() == MVT::i16)
- Src = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Src);
+ // Remove any bit flip.
+ if (isBitwiseNot(Src)) {
+ Src = Src.getOperand(0);
+ CC = CC == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ;
+ }
- // See if we can use the 32-bit instruction instead of the 64-bit one for a
- // shorter encoding. Since the former takes the modulo 32 of BitNo and the
- // latter takes the modulo 64, this is only valid if the 5th bit of BitNo is
- // known to be zero.
- if (Src.getValueType() == MVT::i64 &&
- DAG.MaskedValueIsZero(BitNo, APInt(BitNo.getValueSizeInBits(), 32)))
- Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
+ // Attempt to create the X86ISD::BT node.
+ if (SDValue BT = getBT(Src, BitNo, dl, DAG)) {
+ X86CC = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
+ return BT;
+ }
- // If the operand types disagree, extend the shift amount to match. Since
- // BT ignores high bits (like shifts) we can use anyextend.
- if (Src.getValueType() != BitNo.getValueType())
- BitNo = DAG.getNode(ISD::ANY_EXTEND, dl, Src.getValueType(), BitNo);
+ return SDValue();
+}
- X86CC = DAG.getTargetConstant(CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B,
- dl, MVT::i8);
- return DAG.getNode(X86ISD::BT, dl, MVT::i32, Src, BitNo);
+// Check if pre-AVX condcode can be performed by a single FCMP op.
+static bool cheapX86FSETCC_SSE(ISD::CondCode SetCCOpcode) {
+ return (SetCCOpcode != ISD::SETONE) && (SetCCOpcode != ISD::SETUEQ);
}
/// Turns an ISD::CondCode into a value suitable for SSE floating-point mask
@@ -23831,7 +24171,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
// In the two cases not handled by SSE compare predicates (SETUEQ/SETONE),
// emit two comparisons and a logic op to tie them together.
- if (SSECC >= 8) {
+ if (!cheapX86FSETCC_SSE(Cond)) {
// LLVM predicate is SETUEQ or SETONE.
unsigned CC0, CC1;
unsigned CombineOpc;
@@ -23996,10 +24336,10 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
if (VT.is256BitVector() && !Subtarget.hasInt256())
return splitIntVSETCC(VT, Op0, Op1, Cond, DAG, dl);
- if (VT == MVT::v32i16 || VT == MVT::v64i8) {
- assert(!Subtarget.hasBWI() && "Unexpected VT with AVX512BW!");
+ // Break 512-bit integer vector compare into smaller ones.
+ // TODO: Try harder to use VPCMPx + VPMOV2x?
+ if (VT.is512BitVector())
return splitIntVSETCC(VT, Op0, Op1, Cond, DAG, dl);
- }
// If we have a limit constant, try to form PCMPGT (signed cmp) to avoid
// not-of-PCMPEQ:
@@ -24117,12 +24457,10 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
// Since SSE has no unsigned integer comparisons, we need to flip the sign
// bits of the inputs before performing those operations. The lower
// compare is always unsigned.
- SDValue SB;
- if (FlipSigns) {
- SB = DAG.getConstant(0x8000000080000000ULL, dl, MVT::v2i64);
- } else {
- SB = DAG.getConstant(0x0000000080000000ULL, dl, MVT::v2i64);
- }
+ SDValue SB = DAG.getConstant(FlipSigns ? 0x8000000080000000ULL
+ : 0x0000000080000000ULL,
+ dl, MVT::v2i64);
+
Op0 = DAG.getNode(ISD::XOR, dl, MVT::v2i64, Op0, SB);
Op1 = DAG.getNode(ISD::XOR, dl, MVT::v2i64, Op1, SB);
@@ -24261,8 +24599,11 @@ SDValue X86TargetLowering::emitFlagsForSetcc(SDValue Op0, SDValue Op1,
// Lower ((X >>s N) & 1) != 0 to BT(X, N).
if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() && isNullConstant(Op1) &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
- if (SDValue BT = LowerAndToBT(Op0, CC, dl, DAG, X86CC))
+ X86::CondCode X86CondCode;
+ if (SDValue BT = LowerAndToBT(Op0, CC, dl, DAG, X86CondCode)) {
+ X86CC = DAG.getTargetConstant(X86CondCode, dl, MVT::i8);
return BT;
+ }
}
// Try to use PTEST/PMOVMSKB for a tree ORs equality compared with 0.
@@ -24527,6 +24868,11 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
MVT VT = Op1.getSimpleValueType();
SDValue CC;
+ if (isSoftFP16(VT))
+ return DAG.getBitcast(MVT::f16, DAG.getNode(ISD::SELECT, DL, MVT::i16, Cond,
+ DAG.getBitcast(MVT::i16, Op1),
+ DAG.getBitcast(MVT::i16, Op2)));
+
// Lower FP selects into a CMP/AND/ANDN/OR sequence when the necessary SSE ops
// are available or VBLENDV if AVX is available.
// Otherwise FP cmovs get lowered into a less efficient branch sequence later.
@@ -24591,7 +24937,8 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2);
}
- if (Cond.getOpcode() == ISD::SETCC) {
+ if (Cond.getOpcode() == ISD::SETCC &&
+ !isSoftFP16(Cond.getOperand(0).getSimpleValueType())) {
if (SDValue NewCond = LowerSETCC(Cond, DAG)) {
Cond = NewCond;
// If the condition was updated, it's possible that the operands of the
@@ -24608,6 +24955,8 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// (select (x != 0), -1, y) -> ~(sign_bit (x - 1)) | y
// (select (and (x , 0x1) == 0), y, (z ^ y) ) -> (-(and (x , 0x1)) & z ) ^ y
// (select (and (x , 0x1) == 0), y, (z | y) ) -> (-(and (x , 0x1)) & z ) | y
+ // (select (x > 0), x, 0) -> (~(x >> (size_in_bits(x)-1))) & x
+ // (select (x < 0), x, 0) -> ((x >> (size_in_bits(x)-1))) & x
if (Cond.getOpcode() == X86ISD::SETCC &&
Cond.getOperand(1).getOpcode() == X86ISD::CMP &&
isNullConstant(Cond.getOperand(1).getOperand(1))) {
@@ -24624,7 +24973,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
return (Op1.getOpcode() == ISD::CTTZ_ZERO_UNDEF && Op1.hasOneUse() &&
Op1.getOperand(0) == CmpOp0 && isAllOnesConstant(Op2));
};
- if (Subtarget.hasCMov() && (VT == MVT::i32 || VT == MVT::i64) &&
+ if (Subtarget.canUseCMOV() && (VT == MVT::i32 || VT == MVT::i64) &&
((CondCode == X86::COND_NE && MatchFFSMinus1(Op1, Op2)) ||
(CondCode == X86::COND_E && MatchFFSMinus1(Op2, Op1)))) {
// Keep Cmp.
@@ -24652,7 +25001,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
DAG.getTargetConstant(X86::COND_B, DL, MVT::i8),
Sub.getValue(1));
return DAG.getNode(ISD::OR, DL, VT, SBB, Y);
- } else if (!Subtarget.hasCMov() && CondCode == X86::COND_E &&
+ } else if (!Subtarget.canUseCMOV() && CondCode == X86::COND_E &&
Cmp.getOperand(0).getOpcode() == ISD::AND &&
isOneConstant(Cmp.getOperand(0).getOperand(1))) {
SDValue Src1, Src2;
@@ -24688,6 +25037,22 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue And = DAG.getNode(ISD::AND, DL, VT, Mask, Src1); // Mask & z
return DAG.getNode(Op2.getOpcode(), DL, VT, And, Src2); // And Op y
}
+ } else if ((VT == MVT::i32 || VT == MVT::i64) && isNullConstant(Op2) &&
+ Cmp.getNode()->hasOneUse() && (CmpOp0 == Op1) &&
+ ((CondCode == X86::COND_S) || // smin(x, 0)
+ (CondCode == X86::COND_G && hasAndNot(Op1)))) { // smax(x, 0)
+ // (select (x < 0), x, 0) -> ((x >> (size_in_bits(x)-1))) & x
+ //
+ // If the comparison is testing for a positive value, we have to invert
+ // the sign bit mask, so only do that transform if the target has a
+ // bitwise 'and not' instruction (the invert is free).
+ // (select (x > 0), x, 0) -> (~(x >> (size_in_bits(x)-1))) & x
+ unsigned ShCt = VT.getSizeInBits() - 1;
+ SDValue ShiftAmt = DAG.getConstant(ShCt, DL, VT);
+ SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, Op1, ShiftAmt);
+ if (CondCode == X86::COND_G)
+ Shift = DAG.getNOT(DL, Shift, VT);
+ return DAG.getNode(ISD::AND, DL, VT, Shift, Op1);
}
}
@@ -24707,7 +25072,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue Cmp = Cond.getOperand(1);
bool IllegalFPCMov = false;
if (VT.isFloatingPoint() && !VT.isVector() &&
- !isScalarFPTypeInSSEReg(VT) && Subtarget.hasCMov()) // FPStack?
+ !isScalarFPTypeInSSEReg(VT) && Subtarget.canUseCMOV()) // FPStack?
IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSExtValue());
if ((isX86LogicalCmp(Cmp) && !IllegalFPCMov) ||
@@ -24734,9 +25099,9 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// We know the result of AND is compared against zero. Try to match
// it to BT.
if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
- SDValue BTCC;
- if (SDValue BT = LowerAndToBT(Cond, ISD::SETNE, DL, DAG, BTCC)) {
- CC = BTCC;
+ X86::CondCode X86CondCode;
+ if (SDValue BT = LowerAndToBT(Cond, ISD::SETNE, DL, DAG, X86CondCode)) {
+ CC = DAG.getTargetConstant(X86CondCode, DL, MVT::i8);
Cond = BT;
AddTest = false;
}
@@ -24788,7 +25153,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// legal, but EmitLoweredSelect() can not deal with these extensions
// being inserted between two CMOV's. (in i16 case too TBN)
// https://bugs.llvm.org/show_bug.cgi?id=40974
- if ((Op.getValueType() == MVT::i8 && Subtarget.hasCMov()) ||
+ if ((Op.getValueType() == MVT::i8 && Subtarget.canUseCMOV()) ||
(Op.getValueType() == MVT::i16 && !X86::mayFoldLoad(Op1, Subtarget) &&
!X86::mayFoldLoad(Op2, Subtarget))) {
Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1);
@@ -25153,16 +25518,20 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
((StoreVT == MVT::v32i16 || StoreVT == MVT::v64i8) &&
!Subtarget.hasBWI())) {
SmallVector<SDValue, 4> CatOps;
- if (StoredVal.hasOneUse() && collectConcatOps(StoredVal.getNode(), CatOps))
+ if (StoredVal.hasOneUse() &&
+ collectConcatOps(StoredVal.getNode(), CatOps, DAG))
return splitVectorStore(St, DAG);
return SDValue();
}
+ if (StoreVT.is32BitVector())
+ return SDValue();
+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- assert(StoreVT.isVector() && StoreVT.getSizeInBits() == 64 &&
- "Unexpected VT");
+ assert(StoreVT.is64BitVector() && "Unexpected VT");
assert(TLI.getTypeAction(*DAG.getContext(), StoreVT) ==
- TargetLowering::TypeWidenVector && "Unexpected type action!");
+ TargetLowering::TypeWidenVector &&
+ "Unexpected type action!");
EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), StoreVT);
StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, StoredVal,
@@ -25247,8 +25616,10 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
SDValue Dest = Op.getOperand(2);
SDLoc dl(Op);
+ // Bail out when we don't have native compare instructions.
if (Cond.getOpcode() == ISD::SETCC &&
- Cond.getOperand(0).getValueType() != MVT::f128) {
+ Cond.getOperand(0).getValueType() != MVT::f128 &&
+ !isSoftFP16(Cond.getOperand(0).getValueType())) {
SDValue LHS = Cond.getOperand(0);
SDValue RHS = Cond.getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
@@ -25647,116 +26018,116 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT,
// Fold this packed vector shift into a build vector if SrcOp is a
// vector of Constants or UNDEFs.
if (ISD::isBuildVectorOfConstantSDNodes(SrcOp.getNode())) {
- SmallVector<SDValue, 8> Elts;
- unsigned NumElts = SrcOp->getNumOperands();
-
+ unsigned ShiftOpc;
switch (Opc) {
default: llvm_unreachable("Unknown opcode!");
case X86ISD::VSHLI:
- for (unsigned i = 0; i != NumElts; ++i) {
- SDValue CurrentOp = SrcOp->getOperand(i);
- if (CurrentOp->isUndef()) {
- // Must produce 0s in the correct bits.
- Elts.push_back(DAG.getConstant(0, dl, ElementType));
- continue;
- }
- auto *ND = cast<ConstantSDNode>(CurrentOp);
- const APInt &C = ND->getAPIntValue();
- Elts.push_back(DAG.getConstant(C.shl(ShiftAmt), dl, ElementType));
- }
+ ShiftOpc = ISD::SHL;
break;
case X86ISD::VSRLI:
- for (unsigned i = 0; i != NumElts; ++i) {
- SDValue CurrentOp = SrcOp->getOperand(i);
- if (CurrentOp->isUndef()) {
- // Must produce 0s in the correct bits.
- Elts.push_back(DAG.getConstant(0, dl, ElementType));
- continue;
- }
- auto *ND = cast<ConstantSDNode>(CurrentOp);
- const APInt &C = ND->getAPIntValue();
- Elts.push_back(DAG.getConstant(C.lshr(ShiftAmt), dl, ElementType));
- }
+ ShiftOpc = ISD::SRL;
break;
case X86ISD::VSRAI:
- for (unsigned i = 0; i != NumElts; ++i) {
- SDValue CurrentOp = SrcOp->getOperand(i);
- if (CurrentOp->isUndef()) {
- // All shifted in bits must be the same so use 0.
- Elts.push_back(DAG.getConstant(0, dl, ElementType));
- continue;
- }
- auto *ND = cast<ConstantSDNode>(CurrentOp);
- const APInt &C = ND->getAPIntValue();
- Elts.push_back(DAG.getConstant(C.ashr(ShiftAmt), dl, ElementType));
- }
+ ShiftOpc = ISD::SRA;
break;
}
- return DAG.getBuildVector(VT, dl, Elts);
+ SDValue Amt = DAG.getConstant(ShiftAmt, dl, VT);
+ if (SDValue C = DAG.FoldConstantArithmetic(ShiftOpc, dl, VT, {SrcOp, Amt}))
+ return C;
}
return DAG.getNode(Opc, dl, VT, SrcOp,
DAG.getTargetConstant(ShiftAmt, dl, MVT::i8));
}
-/// Handle vector element shifts where the shift amount may or may not be a
-/// constant. Takes immediate version of shift as input.
-/// TODO: Replace with vector + (splat) idx to avoid extract_element nodes.
+/// Handle vector element shifts by a splat shift amount
static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT,
- SDValue SrcOp, SDValue ShAmt,
+ SDValue SrcOp, SDValue ShAmt, int ShAmtIdx,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
- MVT SVT = ShAmt.getSimpleValueType();
- assert((SVT == MVT::i32 || SVT == MVT::i64) && "Unexpected value type!");
-
- // Change opcode to non-immediate version.
- Opc = getTargetVShiftUniformOpcode(Opc, true);
-
- // Need to build a vector containing shift amount.
- // SSE/AVX packed shifts only use the lower 64-bit of the shift count.
- // +====================+============+=======================================+
- // | ShAmt is | HasSSE4.1? | Construct ShAmt vector as |
- // +====================+============+=======================================+
- // | i64 | Yes, No | Use ShAmt as lowest elt |
- // | i32 | Yes | zero-extend in-reg |
- // | (i32 zext(i16/i8)) | Yes | zero-extend in-reg |
- // | (i32 zext(i16/i8)) | No | byte-shift-in-reg |
- // | i16/i32 | No | v4i32 build_vector(ShAmt, 0, ud, ud)) |
- // +====================+============+=======================================+
-
- if (SVT == MVT::i64)
- ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v2i64, ShAmt);
- else if (ShAmt.getOpcode() == ISD::ZERO_EXTEND &&
- ShAmt.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- (ShAmt.getOperand(0).getSimpleValueType() == MVT::i16 ||
- ShAmt.getOperand(0).getSimpleValueType() == MVT::i8)) {
+ MVT AmtVT = ShAmt.getSimpleValueType();
+ assert(AmtVT.isVector() && "Vector shift type mismatch");
+ assert(0 <= ShAmtIdx && ShAmtIdx < (int)AmtVT.getVectorNumElements() &&
+ "Illegal vector splat index");
+
+ // Move the splat element to the bottom element.
+ if (ShAmtIdx != 0) {
+ SmallVector<int> Mask(AmtVT.getVectorNumElements(), -1);
+ Mask[0] = ShAmtIdx;
+ ShAmt = DAG.getVectorShuffle(AmtVT, dl, ShAmt, DAG.getUNDEF(AmtVT), Mask);
+ }
+
+ // Peek through any zext node if we can get back to a 128-bit source.
+ if (AmtVT.getScalarSizeInBits() == 64 &&
+ (ShAmt.getOpcode() == ISD::ZERO_EXTEND ||
+ ShAmt.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
+ ShAmt.getOperand(0).getValueType().isSimple() &&
+ ShAmt.getOperand(0).getValueType().is128BitVector()) {
ShAmt = ShAmt.getOperand(0);
- MVT AmtTy = ShAmt.getSimpleValueType() == MVT::i8 ? MVT::v16i8 : MVT::v8i16;
- ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), AmtTy, ShAmt);
- if (Subtarget.hasSSE41())
+ AmtVT = ShAmt.getSimpleValueType();
+ }
+
+ // See if we can mask off the upper elements using the existing source node.
+ // The shift uses the entire lower 64-bits of the amount vector, so no need to
+ // do this for vXi64 types.
+ bool IsMasked = false;
+ if (AmtVT.getScalarSizeInBits() < 64) {
+ if (ShAmt.getOpcode() == ISD::BUILD_VECTOR ||
+ ShAmt.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ // If the shift amount has come from a scalar, then zero-extend the scalar
+ // before moving to the vector.
+ ShAmt = DAG.getZExtOrTrunc(ShAmt.getOperand(0), dl, MVT::i32);
+ ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, ShAmt);
+ ShAmt = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, ShAmt);
+ AmtVT = MVT::v4i32;
+ IsMasked = true;
+ } else if (ShAmt.getOpcode() == ISD::AND) {
+ // See if the shift amount is already masked (e.g. for rotation modulo),
+ // then we can zero-extend it by setting all the other mask elements to
+ // zero.
+ SmallVector<SDValue> MaskElts(
+ AmtVT.getVectorNumElements(),
+ DAG.getConstant(0, dl, AmtVT.getScalarType()));
+ MaskElts[0] = DAG.getAllOnesConstant(dl, AmtVT.getScalarType());
+ SDValue Mask = DAG.getBuildVector(AmtVT, dl, MaskElts);
+ if ((Mask = DAG.FoldConstantArithmetic(ISD::AND, dl, AmtVT,
+ {ShAmt.getOperand(1), Mask}))) {
+ ShAmt = DAG.getNode(ISD::AND, dl, AmtVT, ShAmt.getOperand(0), Mask);
+ IsMasked = true;
+ }
+ }
+ }
+
+ // Extract if the shift amount vector is larger than 128-bits.
+ if (AmtVT.getSizeInBits() > 128) {
+ ShAmt = extract128BitVector(ShAmt, 0, DAG, dl);
+ AmtVT = ShAmt.getSimpleValueType();
+ }
+
+ // Zero-extend bottom element to v2i64 vector type, either by extension or
+ // shuffle masking.
+ if (!IsMasked && AmtVT.getScalarSizeInBits() < 64) {
+ if (AmtVT == MVT::v4i32 && (ShAmt.getOpcode() == X86ISD::VBROADCAST ||
+ ShAmt.getOpcode() == X86ISD::VBROADCAST_LOAD)) {
+ ShAmt = DAG.getNode(X86ISD::VZEXT_MOVL, SDLoc(ShAmt), MVT::v4i32, ShAmt);
+ } else if (Subtarget.hasSSE41()) {
ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt),
MVT::v2i64, ShAmt);
- else {
+ } else {
SDValue ByteShift = DAG.getTargetConstant(
- (128 - AmtTy.getScalarSizeInBits()) / 8, SDLoc(ShAmt), MVT::i8);
+ (128 - AmtVT.getScalarSizeInBits()) / 8, SDLoc(ShAmt), MVT::i8);
ShAmt = DAG.getBitcast(MVT::v16i8, ShAmt);
ShAmt = DAG.getNode(X86ISD::VSHLDQ, SDLoc(ShAmt), MVT::v16i8, ShAmt,
ByteShift);
ShAmt = DAG.getNode(X86ISD::VSRLDQ, SDLoc(ShAmt), MVT::v16i8, ShAmt,
ByteShift);
}
- } else if (Subtarget.hasSSE41() &&
- ShAmt.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
- ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v4i32, ShAmt);
- ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt),
- MVT::v2i64, ShAmt);
- } else {
- SDValue ShOps[4] = {ShAmt, DAG.getConstant(0, dl, SVT), DAG.getUNDEF(SVT),
- DAG.getUNDEF(SVT)};
- ShAmt = DAG.getBuildVector(MVT::v4i32, dl, ShOps);
}
+ // Change opcode to non-immediate version.
+ Opc = getTargetVShiftUniformOpcode(Opc, true);
+
// The return type has to be a 128-bit type with the same element
// type as the input type.
MVT EltVT = VT.getVectorElementType();
@@ -25907,8 +26278,7 @@ static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn,
// Return EntryEBP + ParentFrameOffset for x64. This adjusts from RSP after
// prologue to RBP in the parent function.
- const X86Subtarget &Subtarget =
- static_cast<const X86Subtarget &>(DAG.getSubtarget());
+ const X86Subtarget &Subtarget = DAG.getSubtarget<X86Subtarget>();
if (Subtarget.is64Bit())
return DAG.getNode(ISD::ADD, dl, PtrVT, EntryEBP, ParentFrameOffset);
@@ -26444,6 +26814,8 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case VSHIFT: {
SDValue SrcOp = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
+ assert(ShAmt.getValueType() == MVT::i32 &&
+ "Unexpected VSHIFT amount type");
// Catch shift-by-constant.
if (auto *CShAmt = dyn_cast<ConstantSDNode>(ShAmt))
@@ -26451,8 +26823,9 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getSimpleValueType(), SrcOp,
CShAmt->getZExtValue(), DAG);
+ ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, ShAmt);
return getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(),
- SrcOp, ShAmt, Subtarget, DAG);
+ SrcOp, ShAmt, 0, Subtarget, DAG);
}
case COMPRESS_EXPAND_IN_REG: {
SDValue Mask = Op.getOperand(3);
@@ -27411,6 +27784,30 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC,
Operation.getValue(1));
}
+ case Intrinsic::x86_atomic_bts:
+ case Intrinsic::x86_atomic_btc:
+ case Intrinsic::x86_atomic_btr: {
+ SDLoc DL(Op);
+ MVT VT = Op.getSimpleValueType();
+ SDValue Chain = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(2);
+ SDValue Op2 = Op.getOperand(3);
+ unsigned Opc = IntNo == Intrinsic::x86_atomic_bts ? X86ISD::LBTS
+ : IntNo == Intrinsic::x86_atomic_btc ? X86ISD::LBTC
+ : X86ISD::LBTR;
+ SDValue Size = DAG.getConstant(VT.getScalarSizeInBits(), DL, MVT::i32);
+ MachineMemOperand *MMO = cast<MemIntrinsicSDNode>(Op)->getMemOperand();
+ SDValue Res =
+ DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::i32, MVT::Other),
+ {Chain, Op1, Op2, Size}, VT, MMO);
+ Chain = Res.getValue(1);
+ Res = DAG.getZExtOrTrunc(getSETCC(X86::COND_B, Res, DL, DAG), DL, VT);
+ unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
+ if (Imm)
+ Res = DAG.getNode(ISD::SHL, DL, VT, Res,
+ DAG.getShiftAmountConstant(Imm, VT, DL));
+ return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), Res, Chain);
+ }
}
return SDValue();
}
@@ -28394,11 +28791,27 @@ static SDValue LowerABS(SDValue Op, const X86Subtarget &Subtarget,
return SDValue();
}
-static SDValue LowerMINMAX(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerAVG(SDValue Op, const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
+ MVT VT = Op.getSimpleValueType();
+
+ // For AVX1 cases, split to use legal ops.
+ if (VT.is256BitVector() && !Subtarget.hasInt256())
+ return splitVectorIntBinary(Op, DAG);
+
+ if (VT == MVT::v32i16 || VT == MVT::v64i8)
+ return splitVectorIntBinary(Op, DAG);
+
+ // Default to expand.
+ return SDValue();
+}
+
+static SDValue LowerMINMAX(SDValue Op, const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
- // For AVX1 cases, split to use legal ops (everything but v4i64).
- if (VT.getScalarType() != MVT::i64 && VT.is256BitVector())
+ // For AVX1 cases, split to use legal ops.
+ if (VT.is256BitVector() && !Subtarget.hasInt256())
return splitVectorIntBinary(Op, DAG);
if (VT == MVT::v32i16 || VT == MVT::v64i8)
@@ -29188,19 +29601,12 @@ static SDValue LowerShiftByScalarVariable(SDValue Op, SelectionDAG &DAG,
SDValue Amt = Op.getOperand(1);
unsigned Opcode = Op.getOpcode();
unsigned X86OpcI = getTargetVShiftUniformOpcode(Opcode, false);
- unsigned X86OpcV = getTargetVShiftUniformOpcode(Opcode, true);
- if (SDValue BaseShAmt = DAG.getSplatValue(Amt)) {
- if (supportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode)) {
- MVT EltVT = VT.getVectorElementType();
- assert(EltVT.bitsLE(MVT::i64) && "Unexpected element type!");
- if (EltVT != MVT::i64 && EltVT.bitsGT(MVT::i32))
- BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, BaseShAmt);
- else if (EltVT.bitsLT(MVT::i32))
- BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt);
-
- return getTargetVShiftNode(X86OpcI, dl, VT, R, BaseShAmt, Subtarget, DAG);
- }
+ int BaseShAmtIdx = -1;
+ if (SDValue BaseShAmt = DAG.getSplatSourceVector(Amt, BaseShAmtIdx)) {
+ if (supportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode))
+ return getTargetVShiftNode(X86OpcI, dl, VT, R, BaseShAmt, BaseShAmtIdx,
+ Subtarget, DAG);
// vXi8 shifts - shift as v8i16 + mask result.
if (((VT == MVT::v16i8 && !Subtarget.canExtendTo512DQ()) ||
@@ -29212,13 +29618,12 @@ static SDValue LowerShiftByScalarVariable(SDValue Op, SelectionDAG &DAG,
if (supportedVectorShiftWithBaseAmnt(ExtVT, Subtarget, Opcode)) {
unsigned LogicalOp = (Opcode == ISD::SHL ? ISD::SHL : ISD::SRL);
unsigned LogicalX86Op = getTargetVShiftUniformOpcode(LogicalOp, false);
- BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt);
// Create the mask using vXi16 shifts. For shift-rights we need to move
// the upper byte down before splatting the vXi8 mask.
SDValue BitMask = DAG.getConstant(-1, dl, ExtVT);
BitMask = getTargetVShiftNode(LogicalX86Op, dl, ExtVT, BitMask,
- BaseShAmt, Subtarget, DAG);
+ BaseShAmt, BaseShAmtIdx, Subtarget, DAG);
if (Opcode != ISD::SHL)
BitMask = getTargetVShiftByConstNode(LogicalX86Op, dl, ExtVT, BitMask,
8, DAG);
@@ -29228,7 +29633,7 @@ static SDValue LowerShiftByScalarVariable(SDValue Op, SelectionDAG &DAG,
SDValue Res = getTargetVShiftNode(LogicalX86Op, dl, ExtVT,
DAG.getBitcast(ExtVT, R), BaseShAmt,
- Subtarget, DAG);
+ BaseShAmtIdx, Subtarget, DAG);
Res = DAG.getBitcast(VT, Res);
Res = DAG.getNode(ISD::AND, dl, VT, Res, BitMask);
@@ -29236,8 +29641,9 @@ static SDValue LowerShiftByScalarVariable(SDValue Op, SelectionDAG &DAG,
// ashr(R, Amt) === sub(xor(lshr(R, Amt), SignMask), SignMask)
// SignMask = lshr(SignBit, Amt) - safe to do this with PSRLW.
SDValue SignMask = DAG.getConstant(0x8080, dl, ExtVT);
- SignMask = getTargetVShiftNode(LogicalX86Op, dl, ExtVT, SignMask,
- BaseShAmt, Subtarget, DAG);
+ SignMask =
+ getTargetVShiftNode(LogicalX86Op, dl, ExtVT, SignMask, BaseShAmt,
+ BaseShAmtIdx, Subtarget, DAG);
SignMask = DAG.getBitcast(VT, SignMask);
Res = DAG.getNode(ISD::XOR, dl, VT, Res, SignMask);
Res = DAG.getNode(ISD::SUB, dl, VT, Res, SignMask);
@@ -29247,23 +29653,6 @@ static SDValue LowerShiftByScalarVariable(SDValue Op, SelectionDAG &DAG,
}
}
- // Check cases (mainly 32-bit) where i64 is expanded into high and low parts.
- if (VT == MVT::v2i64 && Amt.getOpcode() == ISD::BITCAST &&
- Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
- Amt = Amt.getOperand(0);
- unsigned Ratio = 64 / Amt.getScalarValueSizeInBits();
- std::vector<SDValue> Vals(Ratio);
- for (unsigned i = 0; i != Ratio; ++i)
- Vals[i] = Amt.getOperand(i);
- for (unsigned i = Ratio, e = Amt.getNumOperands(); i != e; i += Ratio) {
- for (unsigned j = 0; j != Ratio; ++j)
- if (Vals[j] != Amt.getOperand(i + j))
- return SDValue();
- }
-
- if (supportedVectorShiftWithBaseAmnt(VT, Subtarget, Op.getOpcode()))
- return DAG.getNode(X86OpcV, dl, VT, R, Op.getOperand(1));
- }
return SDValue();
}
@@ -29843,8 +30232,8 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
{Op0, Op1, Amt}, DAG, Subtarget);
}
assert((VT == MVT::v16i8 || VT == MVT::v32i8 || VT == MVT::v64i8 ||
- VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v8i32 ||
- VT == MVT::v16i32) &&
+ VT == MVT::v8i16 || VT == MVT::v16i16 || VT == MVT::v32i16 ||
+ VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) &&
"Unexpected funnel shift type!");
// fshl(x,y,z) -> unpack(y,x) << (z & (bw-1))) >> bw.
@@ -29867,7 +30256,7 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
// Split 256-bit integers on XOP/pre-AVX2 targets.
// Split 512-bit integers on non 512-bit BWI targets.
- if ((VT.is256BitVector() && ((Subtarget.hasXOP() && EltSizeInBits < 32) ||
+ if ((VT.is256BitVector() && ((Subtarget.hasXOP() && EltSizeInBits < 16) ||
!Subtarget.hasAVX2())) ||
(VT.is512BitVector() && !Subtarget.useBWIRegs() &&
EltSizeInBits < 32)) {
@@ -29878,18 +30267,18 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
// Attempt to fold scalar shift as unpack(y,x) << zext(splat(z))
if (supportedVectorShiftWithBaseAmnt(ExtVT, Subtarget, ShiftOpc)) {
- if (SDValue ScalarAmt = DAG.getSplatValue(AmtMod)) {
+ int ScalarAmtIdx = -1;
+ if (SDValue ScalarAmt = DAG.getSplatSourceVector(AmtMod, ScalarAmtIdx)) {
// Uniform vXi16 funnel shifts can be efficiently handled by default.
if (EltSizeInBits == 16)
return SDValue();
SDValue Lo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, Op1, Op0));
SDValue Hi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, Op1, Op0));
- ScalarAmt = DAG.getZExtOrTrunc(ScalarAmt, DL, MVT::i32);
- Lo = getTargetVShiftNode(ShiftOpc, DL, ExtVT, Lo, ScalarAmt, Subtarget,
- DAG);
- Hi = getTargetVShiftNode(ShiftOpc, DL, ExtVT, Hi, ScalarAmt, Subtarget,
- DAG);
+ Lo = getTargetVShiftNode(ShiftOpc, DL, ExtVT, Lo, ScalarAmt,
+ ScalarAmtIdx, Subtarget, DAG);
+ Hi = getTargetVShiftNode(ShiftOpc, DL, ExtVT, Hi, ScalarAmt,
+ ScalarAmtIdx, Subtarget, DAG);
return getPack(DAG, Subtarget, DL, VT, Lo, Hi, !IsFSHR);
}
}
@@ -30079,18 +30468,20 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
// Attempt to fold as unpack(x,x) << zext(splat(y)):
// rotl(x,y) -> (unpack(x,x) << (y & (bw-1))) >> bw.
// rotr(x,y) -> (unpack(x,x) >> (y & (bw-1))).
- // TODO: Handle vXi16 cases on all targets.
- if (EltSizeInBits == 8 || EltSizeInBits == 32 ||
- (IsROTL && EltSizeInBits == 16 && !Subtarget.hasAVX())) {
- if (SDValue BaseRotAmt = DAG.getSplatValue(AmtMod)) {
+ if (EltSizeInBits == 8 || EltSizeInBits == 16 || EltSizeInBits == 32) {
+ int BaseRotAmtIdx = -1;
+ if (SDValue BaseRotAmt = DAG.getSplatSourceVector(AmtMod, BaseRotAmtIdx)) {
+ if (EltSizeInBits == 16 && Subtarget.hasSSE41()) {
+ unsigned FunnelOpc = IsROTL ? ISD::FSHL : ISD::FSHR;
+ return DAG.getNode(FunnelOpc, DL, VT, R, R, Amt);
+ }
unsigned ShiftX86Opc = IsROTL ? X86ISD::VSHLI : X86ISD::VSRLI;
SDValue Lo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, R, R));
SDValue Hi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, R, R));
- BaseRotAmt = DAG.getZExtOrTrunc(BaseRotAmt, DL, MVT::i32);
Lo = getTargetVShiftNode(ShiftX86Opc, DL, ExtVT, Lo, BaseRotAmt,
- Subtarget, DAG);
+ BaseRotAmtIdx, Subtarget, DAG);
Hi = getTargetVShiftNode(ShiftX86Opc, DL, ExtVT, Hi, BaseRotAmt,
- Subtarget, DAG);
+ BaseRotAmtIdx, Subtarget, DAG);
return getPack(DAG, Subtarget, DL, VT, Lo, Hi, IsROTL);
}
}
@@ -30273,14 +30664,15 @@ bool X86TargetLowering::needsCmpXchgNb(Type *MemType) const {
unsigned OpWidth = MemType->getPrimitiveSizeInBits();
if (OpWidth == 64)
- return Subtarget.hasCmpxchg8b() && !Subtarget.is64Bit();
+ return Subtarget.canUseCMPXCHG8B() && !Subtarget.is64Bit();
if (OpWidth == 128)
- return Subtarget.hasCmpxchg16b();
+ return Subtarget.canUseCMPXCHG16B();
return false;
}
-bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
+TargetLoweringBase::AtomicExpansionKind
+X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
Type *MemType = SI->getValueOperand()->getType();
bool NoImplicitFloatOps =
@@ -30288,9 +30680,10 @@ bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() &&
!Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
(Subtarget.hasSSE1() || Subtarget.hasX87()))
- return false;
+ return AtomicExpansionKind::None;
- return needsCmpXchgNb(MemType);
+ return needsCmpXchgNb(MemType) ? AtomicExpansionKind::Expand
+ : AtomicExpansionKind::None;
}
// Note: this turns large loads into lock cmpxchg8b/16b.
@@ -30314,6 +30707,65 @@ X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
}
TargetLowering::AtomicExpansionKind
+X86TargetLowering::shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const {
+ // If the atomicrmw's result isn't actually used, we can just add a "lock"
+ // prefix to a normal instruction for these operations.
+ if (AI->use_empty())
+ return AtomicExpansionKind::None;
+
+ // If the atomicrmw's result is used by a single bit AND, we may use
+ // bts/btr/btc instruction for these operations.
+ auto *C1 = dyn_cast<ConstantInt>(AI->getValOperand());
+ Instruction *I = AI->user_back();
+ if (!C1 || !AI->hasOneUse() || I->getOpcode() != Instruction::And ||
+ AI->getParent() != I->getParent())
+ return AtomicExpansionKind::CmpXChg;
+ // The following instruction must be a AND single bit.
+ auto *C2 = dyn_cast<ConstantInt>(I->getOperand(1));
+ unsigned Bits = AI->getType()->getPrimitiveSizeInBits();
+ if (!C2 || Bits == 8 || !isPowerOf2_64(C2->getZExtValue()))
+ return AtomicExpansionKind::CmpXChg;
+
+ if (AI->getOperation() == AtomicRMWInst::And)
+ return ~C1->getValue() == C2->getValue()
+ ? AtomicExpansionKind::BitTestIntrinsic
+ : AtomicExpansionKind::CmpXChg;
+
+ return C1 == C2 ? AtomicExpansionKind::BitTestIntrinsic
+ : AtomicExpansionKind::CmpXChg;
+}
+
+void X86TargetLowering::emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const {
+ IRBuilder<> Builder(AI);
+ Intrinsic::ID IID = Intrinsic::not_intrinsic;
+ switch (AI->getOperation()) {
+ default:
+ llvm_unreachable("Unknown atomic operation");
+ case AtomicRMWInst::Or:
+ IID = Intrinsic::x86_atomic_bts;
+ break;
+ case AtomicRMWInst::Xor:
+ IID = Intrinsic::x86_atomic_btc;
+ break;
+ case AtomicRMWInst::And:
+ IID = Intrinsic::x86_atomic_btr;
+ break;
+ }
+ Instruction *I = AI->user_back();
+ LLVMContext &Ctx = AI->getContext();
+ unsigned Imm =
+ countTrailingZeros(cast<ConstantInt>(I->getOperand(1))->getZExtValue());
+ Function *BitTest =
+ Intrinsic::getDeclaration(AI->getModule(), IID, AI->getType());
+ Value *Addr = Builder.CreatePointerCast(AI->getPointerOperand(),
+ Type::getInt8PtrTy(Ctx));
+ Value *Result = Builder.CreateCall(BitTest, {Addr, Builder.getInt8(Imm)});
+ I->replaceAllUsesWith(Result);
+ I->eraseFromParent();
+ AI->eraseFromParent();
+}
+
+TargetLowering::AtomicExpansionKind
X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
Type *MemType = AI->getType();
@@ -30337,10 +30789,7 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
case AtomicRMWInst::Or:
case AtomicRMWInst::And:
case AtomicRMWInst::Xor:
- // If the atomicrmw's result isn't actually used, we can just add a "lock"
- // prefix to a normal instruction for these operations.
- return !AI->use_empty() ? AtomicExpansionKind::CmpXChg
- : AtomicExpansionKind::None;
+ return shouldExpandLogicAtomicRMWInIR(AI);
case AtomicRMWInst::Nand:
case AtomicRMWInst::Max:
case AtomicRMWInst::Min:
@@ -31552,16 +32001,12 @@ SDValue X86TargetLowering::LowerGC_TRANSITION(SDValue Op,
// require special handling for these nodes), lower them as literal NOOPs for
// the time being.
SmallVector<SDValue, 2> Ops;
-
Ops.push_back(Op.getOperand(0));
if (Op->getGluedNode())
Ops.push_back(Op->getOperand(Op->getNumOperands() - 1));
- SDLoc OpDL(Op);
SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue NOOP(DAG.getMachineNode(X86::NOOP, SDLoc(Op), VTs, Ops), 0);
-
- return NOOP;
+ return SDValue(DAG.getMachineNode(X86::NOOP, SDLoc(Op), VTs, Ops), 0);
}
// Custom split CVTPS2PH with wide types.
@@ -31710,8 +32155,9 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SMAX:
case ISD::SMIN:
case ISD::UMAX:
- case ISD::UMIN: return LowerMINMAX(Op, DAG);
+ case ISD::UMIN: return LowerMINMAX(Op, Subtarget, DAG);
case ISD::ABS: return LowerABS(Op, Subtarget, DAG);
+ case ISD::AVGCEILU: return LowerAVG(Op, Subtarget, DAG);
case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG);
case ISD::MLOAD: return LowerMLOAD(Op, Subtarget, DAG);
case ISD::MSTORE: return LowerMSTORE(Op, Subtarget, DAG);
@@ -31807,9 +32253,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(Res);
return;
}
- case X86ISD::VPMADDWD:
- case X86ISD::AVG: {
- // Legalize types for X86ISD::AVG/VPMADDWD by widening.
+ case X86ISD::VPMADDWD: {
+ // Legalize types for X86ISD::VPMADDWD by widening.
assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
EVT VT = N->getValueType(0);
@@ -32462,7 +32907,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
EVT T = N->getValueType(0);
assert((T == MVT::i64 || T == MVT::i128) && "can only expand cmpxchg pair");
bool Regs64bit = T == MVT::i128;
- assert((!Regs64bit || Subtarget.hasCmpxchg16b()) &&
+ assert((!Regs64bit || Subtarget.canUseCMPXCHG16B()) &&
"64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS requires CMPXCHG16B");
MVT HalfT = Regs64bit ? MVT::i64 : MVT::i32;
SDValue cpInL, cpInH;
@@ -32821,6 +33266,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(LOR)
NODE_NAME_CASE(LXOR)
NODE_NAME_CASE(LAND)
+ NODE_NAME_CASE(LBTS)
+ NODE_NAME_CASE(LBTC)
+ NODE_NAME_CASE(LBTR)
NODE_NAME_CASE(VZEXT_MOVL)
NODE_NAME_CASE(VZEXT_LOAD)
NODE_NAME_CASE(VEXTRACT_STORE)
@@ -33041,7 +33489,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(SCALEF_RND)
NODE_NAME_CASE(SCALEFS)
NODE_NAME_CASE(SCALEFS_RND)
- NODE_NAME_CASE(AVG)
NODE_NAME_CASE(MULHRS)
NODE_NAME_CASE(SINT_TO_FP_RND)
NODE_NAME_CASE(UINT_TO_FP_RND)
@@ -33222,7 +33669,6 @@ bool X86TargetLowering::isBinOp(unsigned Opcode) const {
bool X86TargetLowering::isCommutativeBinOp(unsigned Opcode) const {
switch (Opcode) {
// TODO: Add more X86ISD opcodes once we have test coverage.
- case X86ISD::AVG:
case X86ISD::PCMPEQ:
case X86ISD::PMULDQ:
case X86ISD::PMULUDQ:
@@ -33418,6 +33864,20 @@ bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const {
return !(VT1 == MVT::i32 && VT2 == MVT::i16);
}
+bool X86TargetLowering::shouldFoldSelectWithIdentityConstant(unsigned Opcode,
+ EVT VT) const {
+ // TODO: This is too general. There are cases where pre-AVX512 codegen would
+ // benefit. The transform may also be profitable for scalar code.
+ if (!Subtarget.hasAVX512())
+ return false;
+ if (!Subtarget.hasVLX() && !VT.is512BitVector())
+ return false;
+ if (!VT.isVector())
+ return false;
+
+ return true;
+}
+
/// Targets can use this to indicate that they only support *some*
/// VECTOR_SHUFFLE operations, those with specific masks.
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
@@ -33460,6 +33920,16 @@ bool X86TargetLowering::areJTsAllowed(const Function *Fn) const {
return TargetLowering::areJTsAllowed(Fn);
}
+MVT X86TargetLowering::getPreferredSwitchConditionType(LLVMContext &Context,
+ EVT ConditionVT) const {
+ // Avoid 8 and 16 bit types because they increase the chance for unnecessary
+ // zero-extensions.
+ if (ConditionVT.getSizeInBits() < 32)
+ return MVT::i32;
+ return TargetLoweringBase::getPreferredSwitchConditionType(Context,
+ ConditionVT);
+}
+
//===----------------------------------------------------------------------===//
// X86 Scheduler Hooks
//===----------------------------------------------------------------------===//
@@ -33871,6 +34341,7 @@ static bool checkAndUpdateEFLAGSKill(MachineBasicBlock::iterator SelectItr,
// conditional jump around it.
static bool isCMOVPseudo(MachineInstr &MI) {
switch (MI.getOpcode()) {
+ case X86::CMOV_FR16:
case X86::CMOV_FR16X:
case X86::CMOV_FR32:
case X86::CMOV_FR32X:
@@ -34090,7 +34561,7 @@ X86TargetLowering::EmitLoweredCascadedSelect(MachineInstr &FirstCMOV,
// SinkMBB:
// %Result = phi [ %FalseValue, SecondInsertedMBB ], [ %TrueValue, ThisMBB ]
- Register DestReg = FirstCMOV.getOperand(0).getReg();
+ Register DestReg = SecondCascadedCMOV.getOperand(0).getReg();
Register Op1Reg = FirstCMOV.getOperand(1).getReg();
Register Op2Reg = FirstCMOV.getOperand(2).getReg();
MachineInstrBuilder MIB =
@@ -34103,11 +34574,6 @@ X86TargetLowering::EmitLoweredCascadedSelect(MachineInstr &FirstCMOV,
// The second SecondInsertedMBB provides the same incoming value as the
// FirstInsertedMBB (the True operand of the SELECT_CC/CMOV nodes).
MIB.addReg(FirstCMOV.getOperand(2).getReg()).addMBB(FirstInsertedMBB);
- // Copy the PHI result to the register defined by the second CMOV.
- BuildMI(*SinkMBB, std::next(MachineBasicBlock::iterator(MIB.getInstr())), DL,
- TII->get(TargetOpcode::COPY),
- SecondCascadedCMOV.getOperand(0).getReg())
- .addReg(FirstCMOV.getOperand(0).getReg());
// Now remove the CMOVs.
FirstCMOV.eraseFromParent();
@@ -35546,6 +36012,8 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::TLSCall_32:
case X86::TLSCall_64:
return EmitLoweredTLSCall(MI, BB);
+ case X86::CMOV_FR16:
+ case X86::CMOV_FR16X:
case X86::CMOV_FR32:
case X86::CMOV_FR32X:
case X86::CMOV_FR64:
@@ -36116,6 +36584,15 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
}
break;
}
+ case X86ISD::AND: {
+ if (Op.getResNo() == 0) {
+ KnownBits Known2;
+ Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known &= Known2;
+ }
+ break;
+ }
case X86ISD::ANDNP: {
KnownBits Known2;
Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
@@ -36257,6 +36734,28 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
Known.setAllZero();
break;
}
+ case X86ISD::VBROADCAST_LOAD: {
+ APInt UndefElts;
+ SmallVector<APInt, 16> EltBits;
+ if (getTargetConstantBitsFromNode(Op, BitWidth, UndefElts, EltBits,
+ /*AllowWholeUndefs*/ false,
+ /*AllowPartialUndefs*/ false)) {
+ Known.Zero.setAllBits();
+ Known.One.setAllBits();
+ for (unsigned I = 0; I != NumElts; ++I) {
+ if (!DemandedElts[I])
+ continue;
+ if (UndefElts[I]) {
+ Known.resetAll();
+ break;
+ }
+ KnownBits Known2 = KnownBits::makeConstant(EltBits[I]);
+ Known = KnownBits::commonBits(Known, Known2);
+ }
+ return;
+ }
+ break;
+ }
}
// Handle target shuffles.
@@ -37113,9 +37612,10 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
unsigned NumRootElts = RootVT.getVectorNumElements();
// Canonicalize shuffle input op to the requested type.
- // TODO: Support cases where Op is smaller than VT.
auto CanonicalizeShuffleInput = [&](MVT VT, SDValue Op) {
- if (VT.getSizeInBits() < Op.getValueSizeInBits())
+ if (VT.getSizeInBits() > Op.getValueSizeInBits())
+ Op = widenSubVector(Op, false, Subtarget, DAG, DL, VT.getSizeInBits());
+ else if (VT.getSizeInBits() < Op.getValueSizeInBits())
Op = extractSubVector(Op, 0, DAG, DL, VT.getSizeInBits());
return DAG.getBitcast(VT, Op);
};
@@ -37129,8 +37629,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
MVT VT1 = V1.getSimpleValueType();
MVT VT2 = V2.getSimpleValueType();
- assert(VT1.getSizeInBits() == RootSizeInBits &&
- VT2.getSizeInBits() == RootSizeInBits && "Vector size mismatch");
+ assert((RootSizeInBits % VT1.getSizeInBits()) == 0 &&
+ (RootSizeInBits % VT2.getSizeInBits()) == 0 && "Vector size mismatch");
SDValue Res;
@@ -37157,12 +37657,13 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
}
}
- // If we are shuffling a broadcast (and not introducing zeros) then
- // we can just use the broadcast directly. This works for smaller broadcast
- // elements as well as they already repeat across each mask element
- if (UnaryShuffle && isTargetShuffleSplat(V1) && !isAnyZero(BaseMask) &&
+ // If we are shuffling a splat (and not introducing zeros) then we can just
+ // use it directly. This works for smaller elements as well as they already
+ // repeat across each mask element.
+ if (UnaryShuffle && !isAnyZero(BaseMask) &&
+ V1.getValueSizeInBits() >= RootSizeInBits &&
(BaseMaskEltSizeInBits % V1.getScalarValueSizeInBits()) == 0 &&
- V1.getValueSizeInBits() >= RootSizeInBits) {
+ DAG.isSplatValue(V1, /*AllowUndefs*/ false)) {
return CanonicalizeShuffleInput(RootVT, V1);
}
@@ -37543,7 +38044,11 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
(RootVT.is128BitVector() && Subtarget.hasVLX())) &&
(MaskEltSizeInBits > 8 || Subtarget.hasBWI()) &&
isSequentialOrUndefInRange(Mask, 0, NumMaskElts, 0, 2)) {
- if (Depth == 0 && Root.getOpcode() == ISD::TRUNCATE)
+ // Bail if this was already a truncation or PACK node.
+ // We sometimes fail to match PACK if we demand known undef elements.
+ if (Depth == 0 && (Root.getOpcode() == ISD::TRUNCATE ||
+ Root.getOpcode() == X86ISD::PACKSS ||
+ Root.getOpcode() == X86ISD::PACKUS))
return SDValue(); // Nothing to do!
ShuffleSrcVT = MVT::getIntegerVT(MaskEltSizeInBits * 2);
ShuffleSrcVT = MVT::getVectorVT(ShuffleSrcVT, NumMaskElts / 2);
@@ -37852,6 +38357,12 @@ static SDValue combineX86ShuffleChainWithExtract(
unsigned RootSizeInBits = RootVT.getSizeInBits();
assert((RootSizeInBits % NumMaskElts) == 0 && "Unexpected root shuffle mask");
+ // Bail if we have any smaller inputs.
+ if (llvm::any_of(Inputs, [RootSizeInBits](SDValue Input) {
+ return Input.getValueSizeInBits() < RootSizeInBits;
+ }))
+ return SDValue();
+
SmallVector<SDValue, 4> WideInputs(Inputs.begin(), Inputs.end());
SmallVector<unsigned, 4> Offsets(NumInputs, 0);
@@ -37894,16 +38405,6 @@ static SDValue combineX86ShuffleChainWithExtract(
}))
return SDValue();
- for (SDValue &NewInput : WideInputs) {
- assert((WideSizeInBits % NewInput.getValueSizeInBits()) == 0 &&
- "Shuffle vector size mismatch");
- if (WideSizeInBits > NewInput.getValueSizeInBits())
- NewInput = widenSubVector(NewInput, false, Subtarget, DAG,
- SDLoc(NewInput), WideSizeInBits);
- assert(WideSizeInBits == NewInput.getValueSizeInBits() &&
- "Unexpected subvector extraction");
- }
-
// Create new mask for larger type.
for (unsigned i = 1; i != NumInputs; ++i)
Offsets[i] += i * Scale * NumMaskElts;
@@ -37928,7 +38429,10 @@ static SDValue combineX86ShuffleChainWithExtract(
// Attempt to combine wider chain.
// TODO: Can we use a better Root?
- SDValue WideRoot = WideInputs[0];
+ SDValue WideRoot = WideInputs.front().getValueSizeInBits() >
+ WideInputs.back().getValueSizeInBits()
+ ? WideInputs.front()
+ : WideInputs.back();
if (SDValue WideShuffle =
combineX86ShuffleChain(WideInputs, WideRoot, WideMask, Depth,
HasVariableMask, AllowVariableCrossLaneMask,
@@ -38267,9 +38771,9 @@ static SDValue combineX86ShufflesRecursively(
assert(RootMask.size() > 0 &&
(RootMask.size() > 1 || (RootMask[0] == 0 && SrcOpIndex == 0)) &&
"Illegal shuffle root mask");
- assert(Root.getSimpleValueType().isVector() &&
- "Shuffles operate on vector types!");
- unsigned RootSizeInBits = Root.getSimpleValueType().getSizeInBits();
+ MVT RootVT = Root.getSimpleValueType();
+ assert(RootVT.isVector() && "Shuffles operate on vector types!");
+ unsigned RootSizeInBits = RootVT.getSizeInBits();
// Bound the depth of our recursive combine because this is ultimately
// quadratic in nature.
@@ -38298,16 +38802,27 @@ static SDValue combineX86ShufflesRecursively(
APInt OpUndef, OpZero;
APInt OpDemandedElts = APInt::getAllOnes(VT.getVectorNumElements());
bool IsOpVariableMask = isTargetShuffleVariableMask(Op.getOpcode());
- if (!getTargetShuffleInputs(Op, OpDemandedElts, OpInputs, OpMask, OpUndef,
- OpZero, DAG, Depth, false))
- return SDValue();
-
- // Shuffle inputs must not be larger than the shuffle result.
- // TODO: Relax this for single input faux shuffles (trunc/extract_subvector).
- if (llvm::any_of(OpInputs, [VT](SDValue OpInput) {
- return OpInput.getValueSizeInBits() > VT.getSizeInBits();
- }))
+ if (getTargetShuffleInputs(Op, OpDemandedElts, OpInputs, OpMask, OpUndef,
+ OpZero, DAG, Depth, false)) {
+ // Shuffle inputs must not be larger than the shuffle result.
+ // TODO: Relax this for single input faux shuffles (e.g. trunc).
+ if (llvm::any_of(OpInputs, [VT](SDValue OpInput) {
+ return OpInput.getValueSizeInBits() > VT.getSizeInBits();
+ }))
+ return SDValue();
+ } else if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ (RootSizeInBits % Op.getOperand(0).getValueSizeInBits()) == 0 &&
+ !isNullConstant(Op.getOperand(1))) {
+ SDValue SrcVec = Op.getOperand(0);
+ int ExtractIdx = Op.getConstantOperandVal(1);
+ unsigned NumElts = VT.getVectorNumElements();
+ OpInputs.assign({SrcVec});
+ OpMask.assign(NumElts, SM_SentinelUndef);
+ std::iota(OpMask.begin(), OpMask.end(), ExtractIdx);
+ OpZero = OpUndef = APInt::getNullValue(NumElts);
+ } else {
return SDValue();
+ }
// If the shuffle result was smaller than the root, we need to adjust the
// mask indices and pad the mask with undefs.
@@ -38467,13 +38982,12 @@ static SDValue combineX86ShufflesRecursively(
// Handle the all undef/zero/ones cases early.
if (all_of(Mask, [](int Idx) { return Idx == SM_SentinelUndef; }))
- return DAG.getUNDEF(Root.getValueType());
+ return DAG.getUNDEF(RootVT);
if (all_of(Mask, [](int Idx) { return Idx < 0; }))
- return getZeroVector(Root.getSimpleValueType(), Subtarget, DAG,
- SDLoc(Root));
+ return getZeroVector(RootVT, Subtarget, DAG, SDLoc(Root));
if (Ops.size() == 1 && ISD::isBuildVectorAllOnes(Ops[0].getNode()) &&
none_of(Mask, [](int M) { return M == SM_SentinelZero; }))
- return getOnesVector(Root.getValueType(), DAG, SDLoc(Root));
+ return getOnesVector(RootVT, DAG, SDLoc(Root));
assert(!Ops.empty() && "Shuffle with no inputs detected");
HasVariableMask |= IsOpVariableMask;
@@ -38533,7 +39047,7 @@ static SDValue combineX86ShufflesRecursively(
// NOTE: This will update the Ops and Mask.
if (SDValue HOp = canonicalizeShuffleMaskWithHorizOp(
Ops, Mask, RootSizeInBits, SDLoc(Root), DAG, Subtarget))
- return DAG.getBitcast(Root.getValueType(), HOp);
+ return DAG.getBitcast(RootVT, HOp);
// Try to refine our inputs given our knowledge of target shuffle mask.
for (auto I : enumerate(Ops)) {
@@ -38578,6 +39092,8 @@ static SDValue combineX86ShufflesRecursively(
// FIXME: should we rerun resolveTargetShuffleInputsAndMask() now?
// Widen any subvector shuffle inputs we've collected.
+ // TODO: Remove this to avoid generating temporary nodes, we should only
+ // widen once combineX86ShuffleChain has found a match.
if (any_of(Ops, [RootSizeInBits](SDValue Op) {
return Op.getValueSizeInBits() < RootSizeInBits;
})) {
@@ -38823,8 +39339,7 @@ static SDValue combineCommutableSHUFP(SDValue N, MVT VT, const SDLoc &DL,
SDValue N0 = V.getOperand(0);
SDValue N1 = V.getOperand(1);
unsigned Imm = V.getConstantOperandVal(2);
- const X86Subtarget &Subtarget =
- static_cast<const X86Subtarget &>(DAG.getSubtarget());
+ const X86Subtarget &Subtarget = DAG.getSubtarget<X86Subtarget>();
if (!X86::mayFoldLoad(peekThroughOneUseBitcasts(N0), Subtarget) ||
X86::mayFoldLoad(peekThroughOneUseBitcasts(N1), Subtarget))
return SDValue();
@@ -38869,21 +39384,24 @@ static SDValue canonicalizeShuffleWithBinOps(SDValue N, SelectionDAG &DAG,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT ShuffleVT = N.getValueType();
- auto IsMergeableWithShuffle = [](SDValue Op) {
+ auto IsMergeableWithShuffle = [&DAG](SDValue Op, bool FoldLoad = false) {
// AllZeros/AllOnes constants are freely shuffled and will peek through
// bitcasts. Other constant build vectors do not peek through bitcasts. Only
// merge with target shuffles if it has one use so shuffle combining is
- // likely to kick in.
+ // likely to kick in. Shuffles of splats are expected to be removed.
return ISD::isBuildVectorAllOnes(Op.getNode()) ||
ISD::isBuildVectorAllZeros(Op.getNode()) ||
ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()) ||
- (isTargetShuffle(Op.getOpcode()) && Op->hasOneUse());
+ (isTargetShuffle(Op.getOpcode()) && Op->hasOneUse()) ||
+ (FoldLoad && isShuffleFoldableLoad(Op)) ||
+ DAG.isSplatValue(Op, /*AllowUndefs*/ false);
};
auto IsSafeToMoveShuffle = [ShuffleVT](SDValue Op, unsigned BinOp) {
// Ensure we only shuffle whole vector src elements, unless its a logical
// binops where we can more aggressively move shuffles from dst to src.
return BinOp == ISD::AND || BinOp == ISD::OR || BinOp == ISD::XOR ||
+ BinOp == X86ISD::ANDNP ||
(Op.getScalarValueSizeInBits() <= ShuffleVT.getScalarSizeInBits());
};
@@ -38913,7 +39431,8 @@ static SDValue canonicalizeShuffleWithBinOps(SDValue N, SelectionDAG &DAG,
if (TLI.isBinOp(SrcOpcode) && IsSafeToMoveShuffle(N0, SrcOpcode)) {
SDValue Op00 = peekThroughOneUseBitcasts(N0.getOperand(0));
SDValue Op01 = peekThroughOneUseBitcasts(N0.getOperand(1));
- if (IsMergeableWithShuffle(Op00) || IsMergeableWithShuffle(Op01)) {
+ if (IsMergeableWithShuffle(Op00, Opc != X86ISD::PSHUFB) ||
+ IsMergeableWithShuffle(Op01, Opc != X86ISD::PSHUFB)) {
SDValue LHS, RHS;
Op00 = DAG.getBitcast(ShuffleVT, Op00);
Op01 = DAG.getBitcast(ShuffleVT, Op01);
@@ -39054,6 +39573,11 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
SmallVector<int, 4> Mask;
unsigned Opcode = N.getOpcode();
+ // FIXME: Remove this after we support vector FP16
+ if (isSoftFP16(peekThroughBitcasts(N.getOperand(0)).getSimpleValueType(),
+ Subtarget))
+ return SDValue();
+
if (SDValue R = combineCommutableSHUFP(N, VT, DL, DAG))
return R;
@@ -39471,7 +39995,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
return SDValue();
SDValue Src = peekThroughBitcasts(N.getOperand(Idx < 2 ? 0 : 1));
SmallVector<SDValue> SubOps;
- if (collectConcatOps(Src.getNode(), SubOps) && SubOps.size() == 2)
+ if (collectConcatOps(Src.getNode(), SubOps, DAG) && SubOps.size() == 2)
return SubOps[Idx & 1];
unsigned NumElts = Src.getValueType().getVectorNumElements();
if ((Idx & 1) == 1 && Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
@@ -39581,7 +40105,9 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
if ((InsertPSMask & (1u << i)) || (i == (int)DstIdx)) {
// No change if element is already zero or the inserted element.
continue;
- } else if (KnownUndef0[i] || KnownZero0[i]) {
+ }
+
+ if (KnownUndef0[i] || KnownZero0[i]) {
// If the target mask is undef/zero then we must zero the element.
InsertPSMask |= (1u << i);
Updated = true;
@@ -40016,16 +40542,14 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
// Simplify source operands based on shuffle mask.
// TODO - merge this into combineX86ShufflesRecursively.
- APInt KnownUndef, KnownZero;
APInt DemandedElts = APInt::getAllOnes(VT.getVectorNumElements());
- if (TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
- DCI))
+ if (TLI.SimplifyDemandedVectorElts(Op, DemandedElts, DCI))
return SDValue(N, 0);
// Canonicalize SHUFFLE(BINOP(X,Y)) -> BINOP(SHUFFLE(X),SHUFFLE(Y)).
// Perform this after other shuffle combines to allow inner shuffles to be
// combined away first.
- if (SDValue BinOp = canonicalizeShuffleWithBinOps(Op, DAG, SDLoc(N)))
+ if (SDValue BinOp = canonicalizeShuffleWithBinOps(Op, DAG, dl))
return BinOp;
}
@@ -40212,6 +40736,11 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
Depth + 1))
return true;
+ // Fold shift(0,x) -> 0
+ if (DemandedElts.isSubsetOf(KnownZero))
+ return TLO.CombineTo(
+ Op, getZeroVector(VT.getSimpleVT(), Subtarget, TLO.DAG, SDLoc(Op)));
+
// Aggressively peek through ops to get at the demanded elts.
if (!DemandedElts.isAllOnes())
if (SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
@@ -40232,9 +40761,16 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
if (SimplifyDemandedVectorElts(LHS, DemandedElts, LHSUndef, LHSZero, TLO,
Depth + 1))
return true;
+
+ // Fold shift(0,x) -> 0
+ if (DemandedElts.isSubsetOf(LHSZero))
+ return TLO.CombineTo(
+ Op, getZeroVector(VT.getSimpleVT(), Subtarget, TLO.DAG, SDLoc(Op)));
+
if (SimplifyDemandedVectorElts(RHS, DemandedElts, RHSUndef, RHSZero, TLO,
Depth + 1))
return true;
+
KnownZero = LHSZero;
break;
}
@@ -40316,6 +40852,57 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
KnownZero.setHighBits(ShiftAmt);
break;
}
+ case X86ISD::ANDNP: {
+ // ANDNP = (~LHS & RHS);
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+
+ auto GetDemandedMasks = [&](SDValue Op, bool Invert = false) {
+ APInt UndefElts;
+ SmallVector<APInt> EltBits;
+ int NumElts = VT.getVectorNumElements();
+ int EltSizeInBits = VT.getScalarSizeInBits();
+ APInt OpBits = APInt::getAllOnes(EltSizeInBits);
+ APInt OpElts = DemandedElts;
+ if (getTargetConstantBitsFromNode(Op, EltSizeInBits, UndefElts,
+ EltBits)) {
+ OpBits.clearAllBits();
+ OpElts.clearAllBits();
+ for (int I = 0; I != NumElts; ++I)
+ if (DemandedElts[I] && ((Invert && !EltBits[I].isAllOnes()) ||
+ (!Invert && !EltBits[I].isZero()))) {
+ OpBits |= Invert ? ~EltBits[I] : EltBits[I];
+ OpElts.setBit(I);
+ }
+ }
+ return std::make_pair(OpBits, OpElts);
+ };
+ std::pair<APInt, APInt> DemandLHS = GetDemandedMasks(RHS);
+ std::pair<APInt, APInt> DemandRHS = GetDemandedMasks(LHS, true);
+
+ APInt LHSUndef, LHSZero;
+ APInt RHSUndef, RHSZero;
+ if (SimplifyDemandedVectorElts(LHS, DemandLHS.second, LHSUndef, LHSZero,
+ TLO, Depth + 1))
+ return true;
+ if (SimplifyDemandedVectorElts(RHS, DemandRHS.second, RHSUndef, RHSZero,
+ TLO, Depth + 1))
+ return true;
+
+ if (!DemandedElts.isAllOnes()) {
+ SDValue NewLHS = SimplifyMultipleUseDemandedBits(
+ LHS, DemandLHS.first, DemandLHS.second, TLO.DAG, Depth + 1);
+ SDValue NewRHS = SimplifyMultipleUseDemandedBits(
+ RHS, DemandRHS.first, DemandRHS.second, TLO.DAG, Depth + 1);
+ if (NewLHS || NewRHS) {
+ NewLHS = NewLHS ? NewLHS : LHS;
+ NewRHS = NewRHS ? NewRHS : RHS;
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewLHS, NewRHS));
+ }
+ }
+ break;
+ }
case X86ISD::CVTSI2P:
case X86ISD::CVTUI2P: {
SDValue Src = Op.getOperand(0);
@@ -40620,7 +41207,6 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
case X86ISD::UNPCKH:
case X86ISD::BLENDI:
// Integer ops.
- case X86ISD::AVG:
case X86ISD::PACKSS:
case X86ISD::PACKUS:
// Horizontal Ops.
@@ -40651,10 +41237,10 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
}
}
- // For broadcasts, unless we *only* demand the 0'th element,
+ // For splats, unless we *only* demand the 0'th element,
// stop attempts at simplification here, we aren't going to improve things,
// this is better than any potential shuffle.
- if (isTargetShuffleSplat(Op) && !DemandedElts.isOne())
+ if (!DemandedElts.isOne() && TLO.DAG.isSplatValue(Op, /*AllowUndefs*/false))
return false;
// Get target/faux shuffle mask.
@@ -40770,20 +41356,31 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
KnownBits KnownOp;
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
+
+ // Don't mask bits on 32-bit AVX512 targets which might lose a broadcast.
// FIXME: Can we bound this better?
APInt DemandedMask = APInt::getLowBitsSet(64, 32);
- if (SimplifyDemandedBits(LHS, DemandedMask, OriginalDemandedElts, KnownOp,
- TLO, Depth + 1))
+ APInt DemandedMaskLHS = APInt::getAllOnes(64);
+ APInt DemandedMaskRHS = APInt::getAllOnes(64);
+
+ bool Is32BitAVX512 = !Subtarget.is64Bit() && Subtarget.hasAVX512();
+ if (!Is32BitAVX512 || !TLO.DAG.isSplatValue(LHS))
+ DemandedMaskLHS = DemandedMask;
+ if (!Is32BitAVX512 || !TLO.DAG.isSplatValue(RHS))
+ DemandedMaskRHS = DemandedMask;
+
+ if (SimplifyDemandedBits(LHS, DemandedMaskLHS, OriginalDemandedElts,
+ KnownOp, TLO, Depth + 1))
return true;
- if (SimplifyDemandedBits(RHS, DemandedMask, OriginalDemandedElts, KnownOp,
- TLO, Depth + 1))
+ if (SimplifyDemandedBits(RHS, DemandedMaskRHS, OriginalDemandedElts,
+ KnownOp, TLO, Depth + 1))
return true;
// Aggressively peek through ops to get at the demanded low bits.
SDValue DemandedLHS = SimplifyMultipleUseDemandedBits(
- LHS, DemandedMask, OriginalDemandedElts, TLO.DAG, Depth + 1);
+ LHS, DemandedMaskLHS, OriginalDemandedElts, TLO.DAG, Depth + 1);
SDValue DemandedRHS = SimplifyMultipleUseDemandedBits(
- RHS, DemandedMask, OriginalDemandedElts, TLO.DAG, Depth + 1);
+ RHS, DemandedMaskRHS, OriginalDemandedElts, TLO.DAG, Depth + 1);
if (DemandedLHS || DemandedRHS) {
DemandedLHS = DemandedLHS ? DemandedLHS : LHS;
DemandedRHS = DemandedRHS ? DemandedRHS : RHS;
@@ -41084,7 +41681,7 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
TLO, Depth + 1))
return true;
- Known.Zero = KnownZero.zextOrSelf(BitWidth);
+ Known.Zero = KnownZero.zext(BitWidth);
Known.Zero.setHighBits(BitWidth - NumElts);
// MOVMSK only uses the MSB from each vector element.
@@ -41291,12 +41888,8 @@ bool X86TargetLowering::isSplatValueForTargetNode(SDValue Op,
switch (Opc) {
case X86ISD::VBROADCAST:
case X86ISD::VBROADCAST_LOAD:
- // TODO: Permit vXi64 types on 32-bit targets.
- if (isTypeLegal(Op.getValueType().getVectorElementType())) {
- UndefElts = APInt::getNullValue(NumElts);
- return true;
- }
- return false;
+ UndefElts = APInt::getNullValue(NumElts);
+ return true;
}
return TargetLowering::isSplatValueForTargetNode(Op, DemandedElts, UndefElts,
@@ -42840,10 +43433,29 @@ static SDValue combineArithReduction(SDNode *ExtElt, SelectionDAG &DAG,
return SDValue();
SDLoc DL(ExtElt);
+ unsigned NumElts = VecVT.getVectorNumElements();
+ unsigned EltSizeInBits = VecVT.getScalarSizeInBits();
+
+ // Extend v4i8/v8i8 vector to v16i8, with undef upper 64-bits.
+ auto WidenToV16I8 = [&](SDValue V, bool ZeroExtend) {
+ if (V.getValueType() == MVT::v4i8) {
+ if (ZeroExtend && Subtarget.hasSSE41()) {
+ V = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
+ DAG.getConstant(0, DL, MVT::v4i32),
+ DAG.getBitcast(MVT::i32, V),
+ DAG.getIntPtrConstant(0, DL));
+ return DAG.getBitcast(MVT::v16i8, V);
+ }
+ V = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i8, V,
+ ZeroExtend ? DAG.getConstant(0, DL, MVT::v4i8)
+ : DAG.getUNDEF(MVT::v4i8));
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V,
+ DAG.getUNDEF(MVT::v8i8));
+ };
// vXi8 mul reduction - promote to vXi16 mul reduction.
if (Opc == ISD::MUL) {
- unsigned NumElts = VecVT.getVectorNumElements();
if (VT != MVT::i8 || NumElts < 4 || !isPowerOf2_32(NumElts))
return SDValue();
if (VecVT.getSizeInBits() >= 128) {
@@ -42858,11 +43470,7 @@ static SDValue combineArithReduction(SDNode *ExtElt, SelectionDAG &DAG,
Rdx = DAG.getNode(Opc, DL, Lo.getValueType(), Lo, Hi);
}
} else {
- if (VecVT == MVT::v4i8)
- Rdx = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i8, Rdx,
- DAG.getUNDEF(MVT::v4i8));
- Rdx = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, Rdx,
- DAG.getUNDEF(MVT::v8i8));
+ Rdx = WidenToV16I8(Rdx, false);
Rdx = getUnpackl(DAG, DL, MVT::v16i8, Rdx, DAG.getUNDEF(MVT::v16i8));
Rdx = DAG.getBitcast(MVT::v8i16, Rdx);
}
@@ -42882,24 +43490,7 @@ static SDValue combineArithReduction(SDNode *ExtElt, SelectionDAG &DAG,
// vXi8 add reduction - sub 128-bit vector.
if (VecVT == MVT::v4i8 || VecVT == MVT::v8i8) {
- if (VecVT == MVT::v4i8) {
- // Pad with zero.
- if (Subtarget.hasSSE41()) {
- Rdx = DAG.getBitcast(MVT::i32, Rdx);
- Rdx = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
- DAG.getConstant(0, DL, MVT::v4i32), Rdx,
- DAG.getIntPtrConstant(0, DL));
- Rdx = DAG.getBitcast(MVT::v16i8, Rdx);
- } else {
- Rdx = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i8, Rdx,
- DAG.getConstant(0, DL, VecVT));
- }
- }
- if (Rdx.getValueType() == MVT::v8i8) {
- // Pad with undef.
- Rdx = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, Rdx,
- DAG.getUNDEF(MVT::v8i8));
- }
+ Rdx = WidenToV16I8(Rdx, true);
Rdx = DAG.getNode(X86ISD::PSADBW, DL, MVT::v2i64, Rdx,
DAG.getConstant(0, DL, MVT::v16i8));
Rdx = DAG.getBitcast(MVT::v16i8, Rdx);
@@ -42907,8 +43498,7 @@ static SDValue combineArithReduction(SDNode *ExtElt, SelectionDAG &DAG,
}
// Must be a >=128-bit vector with pow2 elements.
- if ((VecVT.getSizeInBits() % 128) != 0 ||
- !isPowerOf2_32(VecVT.getVectorNumElements()))
+ if ((VecVT.getSizeInBits() % 128) != 0 || !isPowerOf2_32(NumElts))
return SDValue();
// vXi8 add reduction - sum lo/hi halves then use PSADBW.
@@ -42931,6 +43521,48 @@ static SDValue combineArithReduction(SDNode *ExtElt, SelectionDAG &DAG,
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Rdx, Index);
}
+ // See if we can use vXi8 PSADBW add reduction for larger zext types.
+ // If the source vector values are 0-255, then we can use PSADBW to
+ // sum+zext v8i8 subvectors to vXi64, then perform the reduction.
+ // TODO: See if its worth avoiding vXi16/i32 truncations?
+ if (Opc == ISD::ADD && NumElts >= 4 && EltSizeInBits >= 16 &&
+ DAG.computeKnownBits(Rdx).getMaxValue().ule(255) &&
+ (EltSizeInBits == 16 || Rdx.getOpcode() == ISD::ZERO_EXTEND ||
+ Subtarget.hasAVX512())) {
+ EVT ByteVT = VecVT.changeVectorElementType(MVT::i8);
+ Rdx = DAG.getNode(ISD::TRUNCATE, DL, ByteVT, Rdx);
+ if (ByteVT.getSizeInBits() < 128)
+ Rdx = WidenToV16I8(Rdx, true);
+
+ // Build the PSADBW, split as 128/256/512 bits for SSE/AVX2/AVX512BW.
+ auto PSADBWBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
+ ArrayRef<SDValue> Ops) {
+ MVT VT = MVT::getVectorVT(MVT::i64, Ops[0].getValueSizeInBits() / 64);
+ SDValue Zero = DAG.getConstant(0, DL, Ops[0].getValueType());
+ return DAG.getNode(X86ISD::PSADBW, DL, VT, Ops[0], Zero);
+ };
+ MVT SadVT = MVT::getVectorVT(MVT::i64, Rdx.getValueSizeInBits() / 64);
+ Rdx = SplitOpsAndApply(DAG, Subtarget, DL, SadVT, {Rdx}, PSADBWBuilder);
+
+ // TODO: We could truncate to vXi16/vXi32 before performing the reduction.
+ while (Rdx.getValueSizeInBits() > 128) {
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = splitVector(Rdx, DAG, DL);
+ VecVT = Lo.getValueType();
+ Rdx = DAG.getNode(ISD::ADD, DL, VecVT, Lo, Hi);
+ }
+ assert(Rdx.getValueType() == MVT::v2i64 && "v2i64 reduction expected");
+
+ if (NumElts > 8) {
+ SDValue RdxHi = DAG.getVectorShuffle(MVT::v2i64, DL, Rdx, Rdx, {1, -1});
+ Rdx = DAG.getNode(ISD::ADD, DL, MVT::v2i64, Rdx, RdxHi);
+ }
+
+ VecVT = MVT::getVectorVT(VT.getSimpleVT(), 128 / VT.getSizeInBits());
+ Rdx = DAG.getBitcast(VecVT, Rdx);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Rdx, Index);
+ }
+
// Only use (F)HADD opcodes if they aren't microcoded or minimizes codesize.
if (!shouldUseHorizontalOp(true, DAG, Subtarget))
return SDValue();
@@ -42994,8 +43626,8 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
uint64_t Idx = CIdx->getZExtValue();
if (UndefVecElts[Idx])
return IsPextr ? DAG.getConstant(0, dl, VT) : DAG.getUNDEF(VT);
- return DAG.getConstant(EltBits[Idx].zextOrSelf(VT.getScalarSizeInBits()),
- dl, VT);
+ return DAG.getConstant(EltBits[Idx].zext(VT.getScalarSizeInBits()), dl,
+ VT);
}
}
@@ -43076,29 +43708,32 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
// but not
// i1 = extract_vector_elt t0:1, Constant:i64<2>
// since the latter would need its own MOVMSK.
- if (CIdx && SrcVT.getScalarType() == MVT::i1) {
+ if (SrcVT.getScalarType() == MVT::i1) {
+ bool IsVar = !CIdx;
SmallVector<SDNode *, 16> BoolExtracts;
unsigned ResNo = InputVector.getResNo();
- auto IsBoolExtract = [&BoolExtracts, &ResNo](SDNode *Use) {
+ auto IsBoolExtract = [&BoolExtracts, &ResNo, &IsVar](SDNode *Use) {
if (Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- isa<ConstantSDNode>(Use->getOperand(1)) &&
Use->getOperand(0).getResNo() == ResNo &&
Use->getValueType(0) == MVT::i1) {
BoolExtracts.push_back(Use);
+ IsVar |= !isa<ConstantSDNode>(Use->getOperand(1));
return true;
}
return false;
};
+ // TODO: Can we drop the oneuse check for constant extracts?
if (all_of(InputVector->uses(), IsBoolExtract) &&
- BoolExtracts.size() > 1) {
+ (IsVar || BoolExtracts.size() > 1)) {
EVT BCVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcElts);
if (SDValue BC =
combineBitcastvxi1(DAG, BCVT, InputVector, dl, Subtarget)) {
for (SDNode *Use : BoolExtracts) {
// extractelement vXi1 X, MaskIdx --> ((movmsk X) & Mask) == Mask
- unsigned MaskIdx = Use->getConstantOperandVal(1);
- APInt MaskBit = APInt::getOneBitSet(NumSrcElts, MaskIdx);
- SDValue Mask = DAG.getConstant(MaskBit, dl, BCVT);
+ // Mask = 1 << MaskIdx
+ SDValue MaskIdx = DAG.getZExtOrTrunc(Use->getOperand(1), dl, MVT::i8);
+ SDValue MaskBit = DAG.getConstant(1, dl, BCVT);
+ SDValue Mask = DAG.getNode(ISD::SHL, dl, BCVT, MaskBit, MaskIdx);
SDValue Res = DAG.getNode(ISD::AND, dl, BCVT, BC, Mask);
Res = DAG.getSetCC(dl, MVT::i1, Res, Mask, ISD::SETEQ);
DCI.CombineTo(Use, Res);
@@ -43123,7 +43758,7 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
auto *LoadVec = dyn_cast<LoadSDNode>(InputVector);
if (LoadVec && CIdx && ISD::isNormalLoad(LoadVec) && VT.isInteger() &&
SrcVT.getVectorElementType() == VT && DCI.isAfterLegalizeDAG() &&
- !LikelyUsedAsVector) {
+ !LikelyUsedAsVector && LoadVec->isSimple()) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue NewPtr =
TLI.getVectorElementPointer(DAG, LoadVec->getBasePtr(), SrcVT, EltIdx);
@@ -43133,16 +43768,111 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
SDValue Load =
DAG.getLoad(VT, dl, LoadVec->getChain(), NewPtr, MPI, Alignment,
LoadVec->getMemOperand()->getFlags(), LoadVec->getAAInfo());
- SDValue Chain = Load.getValue(1);
- SDValue From[] = {SDValue(N, 0), SDValue(LoadVec, 1)};
- SDValue To[] = {Load, Chain};
- DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
- return SDValue(N, 0);
+ DAG.makeEquivalentMemoryOrdering(LoadVec, Load);
+ return Load;
}
return SDValue();
}
+// Convert (vXiY *ext(vXi1 bitcast(iX))) to extend_in_reg(broadcast(iX)).
+// This is more or less the reverse of combineBitcastvxi1.
+static SDValue combineToExtendBoolVectorInReg(
+ unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N0, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) {
+ if (Opcode != ISD::SIGN_EXTEND && Opcode != ISD::ZERO_EXTEND &&
+ Opcode != ISD::ANY_EXTEND)
+ return SDValue();
+ if (!DCI.isBeforeLegalizeOps())
+ return SDValue();
+ if (!Subtarget.hasSSE2() || Subtarget.hasAVX512())
+ return SDValue();
+
+ EVT SVT = VT.getScalarType();
+ EVT InSVT = N0.getValueType().getScalarType();
+ unsigned EltSizeInBits = SVT.getSizeInBits();
+
+ // Input type must be extending a bool vector (bit-casted from a scalar
+ // integer) to legal integer types.
+ if (!VT.isVector())
+ return SDValue();
+ if (SVT != MVT::i64 && SVT != MVT::i32 && SVT != MVT::i16 && SVT != MVT::i8)
+ return SDValue();
+ if (InSVT != MVT::i1 || N0.getOpcode() != ISD::BITCAST)
+ return SDValue();
+
+ SDValue N00 = N0.getOperand(0);
+ EVT SclVT = N00.getValueType();
+ if (!SclVT.isScalarInteger())
+ return SDValue();
+
+ SDValue Vec;
+ SmallVector<int> ShuffleMask;
+ unsigned NumElts = VT.getVectorNumElements();
+ assert(NumElts == SclVT.getSizeInBits() && "Unexpected bool vector size");
+
+ // Broadcast the scalar integer to the vector elements.
+ if (NumElts > EltSizeInBits) {
+ // If the scalar integer is greater than the vector element size, then we
+ // must split it down into sub-sections for broadcasting. For example:
+ // i16 -> v16i8 (i16 -> v8i16 -> v16i8) with 2 sub-sections.
+ // i32 -> v32i8 (i32 -> v8i32 -> v32i8) with 4 sub-sections.
+ assert((NumElts % EltSizeInBits) == 0 && "Unexpected integer scale");
+ unsigned Scale = NumElts / EltSizeInBits;
+ EVT BroadcastVT = EVT::getVectorVT(*DAG.getContext(), SclVT, EltSizeInBits);
+ Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, BroadcastVT, N00);
+ Vec = DAG.getBitcast(VT, Vec);
+
+ for (unsigned i = 0; i != Scale; ++i)
+ ShuffleMask.append(EltSizeInBits, i);
+ Vec = DAG.getVectorShuffle(VT, DL, Vec, Vec, ShuffleMask);
+ } else if (Subtarget.hasAVX2() && NumElts < EltSizeInBits &&
+ (SclVT == MVT::i8 || SclVT == MVT::i16 || SclVT == MVT::i32)) {
+ // If we have register broadcast instructions, use the scalar size as the
+ // element type for the shuffle. Then cast to the wider element type. The
+ // widened bits won't be used, and this might allow the use of a broadcast
+ // load.
+ assert((EltSizeInBits % NumElts) == 0 && "Unexpected integer scale");
+ unsigned Scale = EltSizeInBits / NumElts;
+ EVT BroadcastVT =
+ EVT::getVectorVT(*DAG.getContext(), SclVT, NumElts * Scale);
+ Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, BroadcastVT, N00);
+ ShuffleMask.append(NumElts * Scale, 0);
+ Vec = DAG.getVectorShuffle(BroadcastVT, DL, Vec, Vec, ShuffleMask);
+ Vec = DAG.getBitcast(VT, Vec);
+ } else {
+ // For smaller scalar integers, we can simply any-extend it to the vector
+ // element size (we don't care about the upper bits) and broadcast it to all
+ // elements.
+ SDValue Scl = DAG.getAnyExtOrTrunc(N00, DL, SVT);
+ Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Scl);
+ ShuffleMask.append(NumElts, 0);
+ Vec = DAG.getVectorShuffle(VT, DL, Vec, Vec, ShuffleMask);
+ }
+
+ // Now, mask the relevant bit in each element.
+ SmallVector<SDValue, 32> Bits;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int BitIdx = (i % EltSizeInBits);
+ APInt Bit = APInt::getBitsSet(EltSizeInBits, BitIdx, BitIdx + 1);
+ Bits.push_back(DAG.getConstant(Bit, DL, SVT));
+ }
+ SDValue BitMask = DAG.getBuildVector(VT, DL, Bits);
+ Vec = DAG.getNode(ISD::AND, DL, VT, Vec, BitMask);
+
+ // Compare against the bitmask and extend the result.
+ EVT CCVT = VT.changeVectorElementType(MVT::i1);
+ Vec = DAG.getSetCC(DL, CCVT, Vec, BitMask, ISD::SETEQ);
+ Vec = DAG.getSExtOrTrunc(Vec, DL, VT);
+
+ // For SEXT, this is now done, otherwise shift the result down for
+ // zero-extension.
+ if (Opcode == ISD::SIGN_EXTEND)
+ return Vec;
+ return DAG.getNode(ISD::SRL, DL, VT, Vec,
+ DAG.getConstant(EltSizeInBits - 1, DL, VT));
+}
+
/// If a vector select has an operand that is -1 or 0, try to simplify the
/// select to a bitwise logic operation.
/// TODO: Move to DAGCombiner, possibly using TargetLowering::hasAndNot()?
@@ -43270,8 +44000,8 @@ static SDValue narrowVectorSelect(SDNode *N, SelectionDAG &DAG,
SDValue FVal = N->getOperand(2);
SmallVector<SDValue, 4> CatOpsT, CatOpsF;
if (!TVal.hasOneUse() || !FVal.hasOneUse() ||
- !collectConcatOps(TVal.getNode(), CatOpsT) ||
- !collectConcatOps(FVal.getNode(), CatOpsF))
+ !collectConcatOps(TVal.getNode(), CatOpsT, DAG) ||
+ !collectConcatOps(FVal.getNode(), CatOpsF, DAG))
return SDValue();
auto makeBlend = [Opcode](SelectionDAG &DAG, const SDLoc &DL,
@@ -43360,19 +44090,17 @@ static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) {
/// This function will also call SimplifyDemandedBits on already created
/// BLENDV to perform additional simplifications.
static SDValue combineVSelectToBLENDV(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget &Subtarget) {
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
SDValue Cond = N->getOperand(0);
if ((N->getOpcode() != ISD::VSELECT &&
N->getOpcode() != X86ISD::BLENDV) ||
ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()))
return SDValue();
- // Don't optimize before the condition has been transformed to a legal type
- // and don't ever optimize vector selects that map to AVX512 mask-registers.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned BitWidth = Cond.getScalarValueSizeInBits();
- if (BitWidth < 8 || BitWidth > 64)
- return SDValue();
+ EVT VT = N->getValueType(0);
// We can only handle the cases where VSELECT is directly legal on the
// subtarget. We custom lower VSELECT nodes with constant conditions and
@@ -43384,8 +44112,6 @@ static SDValue combineVSelectToBLENDV(SDNode *N, SelectionDAG &DAG,
// Potentially, we should combine constant-condition vselect nodes
// pre-legalization into shuffles and not mark as many types as custom
// lowered.
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- EVT VT = N->getValueType(0);
if (!TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
return SDValue();
// FIXME: We don't support i16-element blends currently. We could and
@@ -43403,6 +44129,11 @@ static SDValue combineVSelectToBLENDV(SDNode *N, SelectionDAG &DAG,
if (VT.is512BitVector())
return SDValue();
+ // Don't optimize before the condition has been transformed to a legal type
+ // and don't ever optimize vector selects that map to AVX512 mask-registers.
+ if (BitWidth < 8 || BitWidth > 64)
+ return SDValue();
+
auto OnlyUsedAsSelectCond = [](SDValue Cond) {
for (SDNode::use_iterator UI = Cond->use_begin(), UE = Cond->use_end();
UI != UE; ++UI)
@@ -43542,9 +44273,11 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
return V;
// Convert vselects with constant condition into shuffles.
- if (CondConstantVector && DCI.isBeforeLegalizeOps()) {
+ if (CondConstantVector && DCI.isBeforeLegalizeOps() &&
+ (N->getOpcode() == ISD::VSELECT || N->getOpcode() == X86ISD::BLENDV)) {
SmallVector<int, 64> Mask;
- if (createShuffleMaskFromVSELECT(Mask, Cond))
+ if (createShuffleMaskFromVSELECT(Mask, Cond,
+ N->getOpcode() == X86ISD::BLENDV))
return DAG.getVectorShuffle(VT, DL, LHS, RHS, Mask);
}
@@ -43565,11 +44298,11 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// getConstVector sets negative shuffle mask values as undef, so ensure
// we hardcode SM_SentinelZero values to zero (0x80).
if (CondMask[i] < NumElts) {
- LHSMask[i] = (LHSMask[i] == SM_SentinelZero) ? 0x80 : LHSMask[i];
+ LHSMask[i] = isUndefOrZero(LHSMask[i]) ? 0x80 : LHSMask[i];
RHSMask[i] = 0x80;
} else {
LHSMask[i] = 0x80;
- RHSMask[i] = (RHSMask[i] == SM_SentinelZero) ? 0x80 : RHSMask[i];
+ RHSMask[i] = isUndefOrZero(RHSMask[i]) ? 0x80 : RHSMask[i];
}
}
LHS = DAG.getNode(X86ISD::PSHUFB, DL, VT, LHS.getOperand(0),
@@ -43586,7 +44319,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// ignored in unsafe-math mode).
// We also try to create v2f32 min/max nodes, which we later widen to v4f32.
if (Cond.getOpcode() == ISD::SETCC && VT.isFloatingPoint() &&
- VT != MVT::f80 && VT != MVT::f128 &&
+ VT != MVT::f80 && VT != MVT::f128 && !isSoftFP16(VT, Subtarget) &&
(TLI.isTypeLegal(VT) || VT == MVT::v2f32) &&
(Subtarget.hasSSE2() ||
(Subtarget.hasSSE1() && VT.getScalarType() == MVT::f32))) {
@@ -43880,7 +44613,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// If this an avx512 target we can improve the use of zero masking by
// swapping the operands and inverting the condition.
if (N->getOpcode() == ISD::VSELECT && Cond.hasOneUse() &&
- Subtarget.hasAVX512() && CondVT.getVectorElementType() == MVT::i1 &&
+ Subtarget.hasAVX512() && CondVT.getVectorElementType() == MVT::i1 &&
ISD::isBuildVectorAllZeros(LHS.getNode()) &&
!ISD::isBuildVectorAllZeros(RHS.getNode())) {
// Invert the cond to not(cond) : xor(op,allones)=not(op)
@@ -43889,6 +44622,19 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
return DAG.getSelect(DL, VT, CondNew, RHS, LHS);
}
+ // Attempt to convert a (vXi1 bitcast(iX Cond)) selection mask before it might
+ // get split by legalization.
+ if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::BITCAST &&
+ CondVT.getVectorElementType() == MVT::i1 && Cond.hasOneUse() &&
+ TLI.isTypeLegal(VT.getScalarType())) {
+ EVT ExtCondVT = VT.changeVectorElementTypeToInteger();
+ if (SDValue ExtCond = combineToExtendBoolVectorInReg(
+ ISD::SIGN_EXTEND, DL, ExtCondVT, Cond, DAG, DCI, Subtarget)) {
+ ExtCond = DAG.getNode(ISD::TRUNCATE, DL, CondVT, ExtCond);
+ return DAG.getSelect(DL, VT, ExtCond, LHS, RHS);
+ }
+ }
+
// Early exit check
if (!TLI.isTypeLegal(VT))
return SDValue();
@@ -44301,14 +45047,15 @@ static bool checkBoolTestAndOrSetCCCombine(SDValue Cond, X86::CondCode &CC0,
static SDValue combineCarryThroughADD(SDValue EFLAGS, SelectionDAG &DAG) {
if (EFLAGS.getOpcode() == X86ISD::ADD) {
if (isAllOnesConstant(EFLAGS.getOperand(1))) {
+ bool FoundAndLSB = false;
SDValue Carry = EFLAGS.getOperand(0);
while (Carry.getOpcode() == ISD::TRUNCATE ||
Carry.getOpcode() == ISD::ZERO_EXTEND ||
- Carry.getOpcode() == ISD::SIGN_EXTEND ||
- Carry.getOpcode() == ISD::ANY_EXTEND ||
(Carry.getOpcode() == ISD::AND &&
- isOneConstant(Carry.getOperand(1))))
+ isOneConstant(Carry.getOperand(1)))) {
+ FoundAndLSB |= Carry.getOpcode() == ISD::AND;
Carry = Carry.getOperand(0);
+ }
if (Carry.getOpcode() == X86ISD::SETCC ||
Carry.getOpcode() == X86ISD::SETCC_CARRY) {
// TODO: Merge this code with equivalent in combineAddOrSubToADCOrSBB?
@@ -44339,6 +45086,14 @@ static SDValue combineCarryThroughADD(SDValue EFLAGS, SelectionDAG &DAG) {
CarryOp1.getOpcode() == X86ISD::ADD &&
isOneConstant(CarryOp1.getOperand(1)))
return CarryOp1;
+ } else if (FoundAndLSB) {
+ SDLoc DL(Carry);
+ SDValue BitNo = DAG.getConstant(0, DL, Carry.getValueType());
+ if (Carry.getOpcode() == ISD::SRL) {
+ BitNo = Carry.getOperand(1);
+ Carry = Carry.getOperand(0);
+ }
+ return getBT(Carry, BitNo, DL, DAG);
}
}
}
@@ -44533,6 +45288,12 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
if (!IsAnyOf && !IsAllOf)
return SDValue();
+ // TODO: Check more combining cases for me.
+ // Here we check the cmp use number to decide do combining or not.
+ // Currently we only get 2 tests about combining "MOVMSK(CONCAT(..))"
+ // and "MOVMSK(PCMPEQ(..))" are fit to use this constraint.
+ bool IsOneUse = CmpOp.getNode()->hasOneUse();
+
// See if we can peek through to a vector with a wider element type, if the
// signbits extend down to all the sub-elements as well.
// Calling MOVMSK with the wider type, avoiding the bitcast, helps expose
@@ -44561,9 +45322,9 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
// MOVMSK(CONCAT(X,Y)) != 0 -> MOVMSK(OR(X,Y)).
// MOVMSK(CONCAT(X,Y)) == -1 -> MOVMSK(AND(X,Y)).
// MOVMSK(CONCAT(X,Y)) != -1 -> MOVMSK(AND(X,Y)).
- if (VecVT.is256BitVector() && NumElts <= CmpBits) {
+ if (VecVT.is256BitVector() && NumElts <= CmpBits && IsOneUse) {
SmallVector<SDValue> Ops;
- if (collectConcatOps(peekThroughBitcasts(Vec).getNode(), Ops) &&
+ if (collectConcatOps(peekThroughBitcasts(Vec).getNode(), Ops, DAG) &&
Ops.size() == 2) {
SDLoc DL(EFLAGS);
EVT SubVT = Ops[0].getValueType().changeTypeToInteger();
@@ -44582,7 +45343,7 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
// MOVMSK(PCMPEQ(X,0)) != -1 -> !PTESTZ(X,X).
// MOVMSK(PCMPEQ(X,Y)) == -1 -> PTESTZ(SUB(X,Y),SUB(X,Y)).
// MOVMSK(PCMPEQ(X,Y)) != -1 -> !PTESTZ(SUB(X,Y),SUB(X,Y)).
- if (IsAllOf && Subtarget.hasSSE41()) {
+ if (IsAllOf && Subtarget.hasSSE41() && IsOneUse) {
MVT TestVT = VecVT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
SDValue BC = peekThroughBitcasts(Vec);
// Ensure MOVMSK was testing every signbit of BC.
@@ -44734,7 +45495,7 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
if (!(FalseOp.getValueType() == MVT::f80 ||
(FalseOp.getValueType() == MVT::f64 && !Subtarget.hasSSE2()) ||
(FalseOp.getValueType() == MVT::f32 && !Subtarget.hasSSE1())) ||
- !Subtarget.hasCMov() || hasFPCMov(CC)) {
+ !Subtarget.canUseCMOV() || hasFPCMov(CC)) {
SDValue Ops[] = {FalseOp, TrueOp, DAG.getTargetConstant(CC, DL, MVT::i8),
Flags};
return DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops);
@@ -45181,8 +45942,6 @@ static SDValue combineMulToPMADDWD(SDNode *N, SelectionDAG &DAG,
if (NumElts == 1 || !isPowerOf2_32(NumElts))
return SDValue();
- EVT WVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, 2 * NumElts);
-
// With AVX512 but without BWI, we would need to split v32i16.
if (32 <= (2 * NumElts) && Subtarget.hasAVX512() && !Subtarget.hasBWI())
return SDValue();
@@ -45265,11 +46024,13 @@ static SDValue combineMulToPMADDWD(SDNode *N, SelectionDAG &DAG,
// Use SplitOpsAndApply to handle AVX splitting.
auto PMADDWDBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
ArrayRef<SDValue> Ops) {
- MVT OpVT = MVT::getVectorVT(MVT::i32, Ops[0].getValueSizeInBits() / 32);
- return DAG.getNode(X86ISD::VPMADDWD, DL, OpVT, Ops);
+ MVT ResVT = MVT::getVectorVT(MVT::i32, Ops[0].getValueSizeInBits() / 32);
+ MVT OpVT = MVT::getVectorVT(MVT::i16, Ops[0].getValueSizeInBits() / 16);
+ return DAG.getNode(X86ISD::VPMADDWD, DL, ResVT,
+ DAG.getBitcast(OpVT, Ops[0]),
+ DAG.getBitcast(OpVT, Ops[1]));
};
- return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT,
- { DAG.getBitcast(WVT, N0), DAG.getBitcast(WVT, N1) },
+ return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, {N0, N1},
PMADDWDBuilder);
}
@@ -45622,12 +46383,11 @@ static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG,
SarConst = SarConst - (Size - ShiftSize);
if (SarConst == 0)
return NN;
- else if (SarConst.isNegative())
+ if (SarConst.isNegative())
return DAG.getNode(ISD::SHL, DL, VT, NN,
DAG.getConstant(-SarConst, DL, CVT));
- else
- return DAG.getNode(ISD::SRA, DL, VT, NN,
- DAG.getConstant(SarConst, DL, CVT));
+ return DAG.getNode(ISD::SRA, DL, VT, NN,
+ DAG.getConstant(SarConst, DL, CVT));
}
return SDValue();
}
@@ -46034,11 +46794,9 @@ static SDValue combineVectorShiftVar(SDNode *N, SelectionDAG &DAG,
EltBits[0].getZExtValue(), DAG);
}
- APInt KnownUndef, KnownZero;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
APInt DemandedElts = APInt::getAllOnes(VT.getVectorNumElements());
- if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, KnownUndef,
- KnownZero, DCI))
+ if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, DCI))
return SDValue(N, 0);
return SDValue();
@@ -46461,11 +47219,17 @@ static SDValue convertIntLogicToFPLogic(SDNode *N, SelectionDAG &DAG,
return DAG.getBitcast(VT, FPLogic);
}
+ if (VT != MVT::i1 || N0.getOpcode() != ISD::SETCC || !N0.hasOneUse() ||
+ !N1.hasOneUse())
+ return SDValue();
+
+ ISD::CondCode CC0 = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ ISD::CondCode CC1 = cast<CondCodeSDNode>(N1.getOperand(2))->get();
+
// The vector ISA for FP predicates is incomplete before AVX, so converting
// COMIS* to CMPS* may not be a win before AVX.
- // TODO: Check types/predicates to see if they are available with SSE/SSE2.
- if (!Subtarget.hasAVX() || VT != MVT::i1 || N0.getOpcode() != ISD::SETCC ||
- !N0.hasOneUse() || !N1.hasOneUse())
+ if (!Subtarget.hasAVX() &&
+ !(cheapX86FSETCC_SSE(CC0) && cheapX86FSETCC_SSE(CC1)))
return SDValue();
// Convert scalar FP compares and logic to vector compares (COMIS* to CMPS*)
@@ -46482,10 +47246,8 @@ static SDValue convertIntLogicToFPLogic(SDNode *N, SelectionDAG &DAG,
SDValue Vec01 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, N01);
SDValue Vec10 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, N10);
SDValue Vec11 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, N11);
- SDValue Setcc0 = DAG.getSetCC(DL, BoolVecVT, Vec00, Vec01,
- cast<CondCodeSDNode>(N0.getOperand(2))->get());
- SDValue Setcc1 = DAG.getSetCC(DL, BoolVecVT, Vec10, Vec11,
- cast<CondCodeSDNode>(N1.getOperand(2))->get());
+ SDValue Setcc0 = DAG.getSetCC(DL, BoolVecVT, Vec00, Vec01, CC0);
+ SDValue Setcc1 = DAG.getSetCC(DL, BoolVecVT, Vec10, Vec11, CC1);
SDValue Logic = DAG.getNode(N->getOpcode(), DL, BoolVecVT, Setcc0, Setcc1);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Logic, ZeroIndex);
}
@@ -46891,6 +47653,53 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
if (SDValue R = combineAndLoadToBZHI(N, DAG, Subtarget))
return R;
+ // fold (and (mul x, c1), c2) -> (mul x, (and c1, c2))
+ // iff c2 is all/no bits mask - i.e. a select-with-zero mask.
+ // TODO: Handle PMULDQ/PMULUDQ/VPMADDWD/VPMADDUBSW?
+ if (VT.isVector() && getTargetConstantFromNode(N1)) {
+ unsigned Opc0 = N0.getOpcode();
+ if ((Opc0 == ISD::MUL || Opc0 == ISD::MULHU || Opc0 == ISD::MULHS) &&
+ getTargetConstantFromNode(N0.getOperand(1)) &&
+ DAG.ComputeNumSignBits(N1) == VT.getScalarSizeInBits() &&
+ N0->hasOneUse() && N0.getOperand(1)->hasOneUse()) {
+ SDValue MaskMul = DAG.getNode(ISD::AND, dl, VT, N0.getOperand(1), N1);
+ return DAG.getNode(Opc0, dl, VT, N0.getOperand(0), MaskMul);
+ }
+ }
+
+ // Fold AND(SRL(X,Y),1) -> SETCC(BT(X,Y), COND_B) iff Y is not a constant
+ // avoids slow variable shift (moving shift amount to ECX etc.)
+ if (isOneConstant(N1) && N0->hasOneUse()) {
+ SDValue Src = N0;
+ while ((Src.getOpcode() == ISD::ZERO_EXTEND ||
+ Src.getOpcode() == ISD::TRUNCATE) &&
+ Src.getOperand(0)->hasOneUse())
+ Src = Src.getOperand(0);
+ bool ContainsNOT = false;
+ X86::CondCode X86CC = X86::COND_B;
+ // Peek through AND(NOT(SRL(X,Y)),1).
+ if (isBitwiseNot(Src)) {
+ Src = Src.getOperand(0);
+ X86CC = X86::COND_AE;
+ ContainsNOT = true;
+ }
+ if (Src.getOpcode() == ISD::SRL &&
+ !isa<ConstantSDNode>(Src.getOperand(1))) {
+ SDValue BitNo = Src.getOperand(1);
+ Src = Src.getOperand(0);
+ // Peek through AND(SRL(NOT(X),Y),1).
+ if (isBitwiseNot(Src)) {
+ Src = Src.getOperand(0);
+ X86CC = X86CC == X86::COND_AE ? X86::COND_B : X86::COND_AE;
+ ContainsNOT = true;
+ }
+ // If we have BMI2 then SHRX should be faster for i32/i64 cases.
+ if (!(Subtarget.hasBMI2() && !ContainsNOT && VT.getSizeInBits() >= 32))
+ if (SDValue BT = getBT(Src, BitNo, dl, DAG))
+ return DAG.getZExtOrTrunc(getSETCC(X86CC, BT, dl, DAG), dl, VT);
+ }
+ }
+
if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
// Attempt to recursively combine a bitmask AND with shuffles.
SDValue Op(N, 0);
@@ -46899,32 +47708,44 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
// If either operand is a constant mask, then only the elements that aren't
// zero are actually demanded by the other operand.
- auto SimplifyUndemandedElts = [&](SDValue Op, SDValue OtherOp) {
+ auto GetDemandedMasks = [&](SDValue Op) {
APInt UndefElts;
SmallVector<APInt> EltBits;
int NumElts = VT.getVectorNumElements();
int EltSizeInBits = VT.getScalarSizeInBits();
- if (!getTargetConstantBitsFromNode(Op, EltSizeInBits, UndefElts, EltBits))
- return false;
-
- APInt DemandedBits = APInt::getZero(EltSizeInBits);
- APInt DemandedElts = APInt::getZero(NumElts);
- for (int I = 0; I != NumElts; ++I)
- if (!EltBits[I].isZero()) {
- DemandedBits |= EltBits[I];
- DemandedElts.setBit(I);
- }
-
- APInt KnownUndef, KnownZero;
- return TLI.SimplifyDemandedVectorElts(OtherOp, DemandedElts, KnownUndef,
- KnownZero, DCI) ||
- TLI.SimplifyDemandedBits(OtherOp, DemandedBits, DemandedElts, DCI);
+ APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
+ APInt DemandedElts = APInt::getAllOnes(NumElts);
+ if (getTargetConstantBitsFromNode(Op, EltSizeInBits, UndefElts,
+ EltBits)) {
+ DemandedBits.clearAllBits();
+ DemandedElts.clearAllBits();
+ for (int I = 0; I != NumElts; ++I)
+ if (!EltBits[I].isZero()) {
+ DemandedBits |= EltBits[I];
+ DemandedElts.setBit(I);
+ }
+ }
+ return std::make_pair(DemandedBits, DemandedElts);
};
- if (SimplifyUndemandedElts(N0, N1) || SimplifyUndemandedElts(N1, N0)) {
+ std::pair<APInt, APInt> Demand0 = GetDemandedMasks(N1);
+ std::pair<APInt, APInt> Demand1 = GetDemandedMasks(N0);
+
+ if (TLI.SimplifyDemandedVectorElts(N0, Demand0.second, DCI) ||
+ TLI.SimplifyDemandedVectorElts(N1, Demand1.second, DCI) ||
+ TLI.SimplifyDemandedBits(N0, Demand0.first, Demand0.second, DCI) ||
+ TLI.SimplifyDemandedBits(N1, Demand1.first, Demand1.second, DCI)) {
if (N->getOpcode() != ISD::DELETED_NODE)
DCI.AddToWorklist(N);
return SDValue(N, 0);
}
+
+ SDValue NewN0 = TLI.SimplifyMultipleUseDemandedBits(N0, Demand0.first,
+ Demand0.second, DAG);
+ SDValue NewN1 = TLI.SimplifyMultipleUseDemandedBits(N1, Demand1.first,
+ Demand1.second, DAG);
+ if (NewN0 || NewN1)
+ return DAG.getNode(ISD::AND, dl, VT, NewN0 ? NewN0 : N0,
+ NewN1 ? NewN1 : N1);
}
// Attempt to combine a scalar bitmask AND with an extracted shuffle.
@@ -47127,8 +47948,7 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG,
// into:
// srl(ctlz x), log2(bitsize(x))
// Input pattern is checked by caller.
-static SDValue lowerX86CmpEqZeroToCtlzSrl(SDValue Op, EVT ExtTy,
- SelectionDAG &DAG) {
+static SDValue lowerX86CmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) {
SDValue Cmp = Op.getOperand(1);
EVT VT = Cmp.getOperand(0).getValueType();
unsigned Log2b = Log2_32(VT.getSizeInBits());
@@ -47139,7 +47959,7 @@ static SDValue lowerX86CmpEqZeroToCtlzSrl(SDValue Op, EVT ExtTy,
SDValue Trunc = DAG.getZExtOrTrunc(Clz, dl, MVT::i32);
SDValue Scc = DAG.getNode(ISD::SRL, dl, MVT::i32, Trunc,
DAG.getConstant(Log2b, dl, MVT::i8));
- return DAG.getZExtOrTrunc(Scc, dl, ExtTy);
+ return Scc;
}
// Try to transform:
@@ -47199,11 +48019,10 @@ static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG,
// or(srl(ctlz),srl(ctlz)).
// The dag combiner can then fold it into:
// srl(or(ctlz, ctlz)).
- EVT VT = OR->getValueType(0);
- SDValue NewLHS = lowerX86CmpEqZeroToCtlzSrl(LHS, VT, DAG);
+ SDValue NewLHS = lowerX86CmpEqZeroToCtlzSrl(LHS, DAG);
SDValue Ret, NewRHS;
- if (NewLHS && (NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, VT, DAG)))
- Ret = DAG.getNode(ISD::OR, SDLoc(OR), VT, NewLHS, NewRHS);
+ if (NewLHS && (NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, DAG)))
+ Ret = DAG.getNode(ISD::OR, SDLoc(OR), MVT::i32, NewLHS, NewRHS);
if (!Ret)
return SDValue();
@@ -47216,21 +48035,18 @@ static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG,
// Swap rhs with lhs to match or(setcc(eq, cmp, 0), or).
if (RHS->getOpcode() == ISD::OR)
std::swap(LHS, RHS);
- NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, VT, DAG);
+ NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, DAG);
if (!NewRHS)
return SDValue();
- Ret = DAG.getNode(ISD::OR, SDLoc(OR), VT, Ret, NewRHS);
+ Ret = DAG.getNode(ISD::OR, SDLoc(OR), MVT::i32, Ret, NewRHS);
}
- if (Ret)
- Ret = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), Ret);
-
- return Ret;
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), Ret);
}
static SDValue foldMaskedMergeImpl(SDValue And0_L, SDValue And0_R,
- SDValue And1_L, SDValue And1_R, SDLoc DL,
- SelectionDAG &DAG) {
+ SDValue And1_L, SDValue And1_R,
+ const SDLoc &DL, SelectionDAG &DAG) {
if (!isBitwiseNot(And0_L, true) || !And0_L->hasOneUse())
return SDValue();
SDValue NotOp = And0_L->getOperand(0);
@@ -47352,7 +48168,7 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
APInt UpperElts = APInt::getHighBitsSet(NumElts, HalfElts);
if (NumElts >= 16 && N1.getOpcode() == X86ISD::KSHIFTL &&
N1.getConstantOperandAPInt(1) == HalfElts &&
- DAG.MaskedValueIsZero(N0, APInt(1, 1), UpperElts)) {
+ DAG.MaskedVectorIsZero(N0, UpperElts)) {
return DAG.getNode(
ISD::CONCAT_VECTORS, dl, VT,
extractSubVector(N0, 0, DAG, dl, HalfElts),
@@ -47360,7 +48176,7 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
}
if (NumElts >= 16 && N0.getOpcode() == X86ISD::KSHIFTL &&
N0.getConstantOperandAPInt(1) == HalfElts &&
- DAG.MaskedValueIsZero(N1, APInt(1, 1), UpperElts)) {
+ DAG.MaskedVectorIsZero(N1, UpperElts)) {
return DAG.getNode(
ISD::CONCAT_VECTORS, dl, VT,
extractSubVector(N1, 0, DAG, dl, HalfElts),
@@ -47389,9 +48205,7 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
if (!EltBits[I].isAllOnes())
DemandedElts.setBit(I);
- APInt KnownUndef, KnownZero;
- return TLI.SimplifyDemandedVectorElts(OtherOp, DemandedElts, KnownUndef,
- KnownZero, DCI);
+ return TLI.SimplifyDemandedVectorElts(OtherOp, DemandedElts, DCI);
};
if (SimplifyUndemandedElts(N0, N1) || SimplifyUndemandedElts(N1, N0)) {
if (N->getOpcode() != ISD::DELETED_NODE)
@@ -47618,7 +48432,7 @@ static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL,
// clip to 0-255.
if (Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
InVT == MVT::v16i32 && VT == MVT::v16i8) {
- if (auto USatVal = detectSSatPattern(In, VT, true)) {
+ if (SDValue USatVal = detectSSatPattern(In, VT, true)) {
// Emit a VPACKUSDW+VPERMQ followed by a VPMOVUSWB.
SDValue Mid = truncateVectorWithPACK(X86ISD::PACKUS, MVT::v16i16, USatVal,
DL, DAG, Subtarget);
@@ -47643,7 +48457,7 @@ static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL,
VT.getSizeInBits() >= 64 &&
(SVT == MVT::i8 || SVT == MVT::i16) &&
(InSVT == MVT::i16 || InSVT == MVT::i32)) {
- if (auto USatVal = detectSSatPattern(In, VT, true)) {
+ if (SDValue USatVal = detectSSatPattern(In, VT, true)) {
// vXi32 -> vXi8 must be performed as PACKUSWB(PACKSSDW,PACKSSDW).
// Only do this when the result is at least 64 bits or we'll leaving
// dangling PACKSSDW nodes.
@@ -47660,7 +48474,7 @@ static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL,
return truncateVectorWithPACK(X86ISD::PACKUS, VT, USatVal, DL, DAG,
Subtarget);
}
- if (auto SSatVal = detectSSatPattern(In, VT))
+ if (SDValue SSatVal = detectSSatPattern(In, VT))
return truncateVectorWithPACK(X86ISD::PACKSS, VT, SSatVal, DL, DAG,
Subtarget);
}
@@ -47671,10 +48485,10 @@ static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL,
(SVT == MVT::i32 || SVT == MVT::i16 || SVT == MVT::i8)) {
unsigned TruncOpc = 0;
SDValue SatVal;
- if (auto SSatVal = detectSSatPattern(In, VT)) {
+ if (SDValue SSatVal = detectSSatPattern(In, VT)) {
SatVal = SSatVal;
TruncOpc = X86ISD::VTRUNCS;
- } else if (auto USatVal = detectUSatPattern(In, VT, DAG, DL)) {
+ } else if (SDValue USatVal = detectUSatPattern(In, VT, DAG, DL)) {
SatVal = USatVal;
TruncOpc = X86ISD::VTRUNCUS;
}
@@ -47706,7 +48520,7 @@ static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL,
/// This function detects the AVG pattern between vectors of unsigned i8/i16,
/// which is c = (a + b + 1) / 2, and replace this operation with the efficient
-/// X86ISD::AVG instruction.
+/// ISD::AVGCEILU (AVG) instruction.
static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
const X86Subtarget &Subtarget,
const SDLoc &DL) {
@@ -47769,7 +48583,7 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
auto AVGBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
ArrayRef<SDValue> Ops) {
- return DAG.getNode(X86ISD::AVG, DL, Ops[0].getValueType(), Ops);
+ return DAG.getNode(ISD::AVGCEILU, DL, Ops[0].getValueType(), Ops);
};
auto AVGSplitter = [&](std::array<SDValue, 2> Ops) {
@@ -47872,7 +48686,7 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
if (RegVT.is256BitVector() && !DCI.isBeforeLegalizeOps() &&
Ext == ISD::NON_EXTLOAD &&
((Ld->isNonTemporal() && !Subtarget.hasInt256() &&
- Ld->getAlignment() >= 16) ||
+ Ld->getAlign() >= Align(16)) ||
(TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), RegVT,
*Ld->getMemOperand(), &Fast) &&
!Fast))) {
@@ -48340,7 +49154,7 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
// Split under-aligned vector non-temporal stores.
if (St->isNonTemporal() && StVT == VT &&
- St->getAlignment() < VT.getStoreSize()) {
+ St->getAlign().value() < VT.getStoreSize()) {
// ZMM/YMM nt-stores - either it can be stored as a series of shorter
// vectors or the legalizer can scalarize it to use MOVNTI.
if (VT.is256BitVector() || VT.is512BitVector()) {
@@ -48374,9 +49188,10 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
}
// Try to fold a VTRUNCUS or VTRUNCS into a truncating store.
- if (!St->isTruncatingStore() && StoredVal.hasOneUse() &&
+ if (!St->isTruncatingStore() &&
(StoredVal.getOpcode() == X86ISD::VTRUNCUS ||
StoredVal.getOpcode() == X86ISD::VTRUNCS) &&
+ StoredVal.hasOneUse() &&
TLI.isTruncStoreLegal(StoredVal.getOperand(0).getValueType(), VT)) {
bool IsSigned = StoredVal.getOpcode() == X86ISD::VTRUNCS;
return EmitTruncSStore(IsSigned, St->getChain(),
@@ -48385,15 +49200,15 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
}
// Try to fold a extract_element(VTRUNC) pattern into a truncating store.
- if (!St->isTruncatingStore() && StoredVal.hasOneUse()) {
+ if (!St->isTruncatingStore()) {
auto IsExtractedElement = [](SDValue V) {
- if (V.getOpcode() == ISD::TRUNCATE && V.getOperand(0).hasOneUse())
+ if (V.getOpcode() == ISD::TRUNCATE && V.hasOneUse())
V = V.getOperand(0);
unsigned Opc = V.getOpcode();
- if (Opc == ISD::EXTRACT_VECTOR_ELT || Opc == X86ISD::PEXTRW) {
- if (V.getOperand(0).hasOneUse() && isNullConstant(V.getOperand(1)))
- return V.getOperand(0);
- }
+ if ((Opc == ISD::EXTRACT_VECTOR_ELT || Opc == X86ISD::PEXTRW) &&
+ isNullConstant(V.getOperand(1)) && V.hasOneUse() &&
+ V.getOperand(0).hasOneUse())
+ return V.getOperand(0);
return SDValue();
};
if (SDValue Extract = IsExtractedElement(StoredVal)) {
@@ -48531,10 +49346,8 @@ static SDValue combineVEXTRACT_STORE(SDNode *N, SelectionDAG &DAG,
unsigned StElts = MemVT.getSizeInBits() / VT.getScalarSizeInBits();
APInt DemandedElts = APInt::getLowBitsSet(VT.getVectorNumElements(), StElts);
- APInt KnownUndef, KnownZero;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (TLI.SimplifyDemandedVectorElts(StoredVal, DemandedElts, KnownUndef,
- KnownZero, DCI)) {
+ if (TLI.SimplifyDemandedVectorElts(StoredVal, DemandedElts, DCI)) {
if (N->getOpcode() != ISD::DELETED_NODE)
DCI.AddToWorklist(N);
return SDValue(N, 0);
@@ -49165,7 +49978,8 @@ static SDValue combineVectorSignBitsTruncation(SDNode *N, const SDLoc &DL,
// PACK should still be worth it for 128-bit vectors if the sources were
// originally concatenated from subvectors.
SmallVector<SDValue> ConcatOps;
- if (VT.getSizeInBits() > 128 || !collectConcatOps(In.getNode(), ConcatOps))
+ if (VT.getSizeInBits() > 128 ||
+ !collectConcatOps(In.getNode(), ConcatOps, DAG))
return SDValue();
}
@@ -49478,9 +50292,9 @@ static SDValue combineVTRUNC(SDNode *N, SelectionDAG &DAG,
SDValue In = N->getOperand(0);
SDLoc DL(N);
- if (auto SSatVal = detectSSatPattern(In, VT))
+ if (SDValue SSatVal = detectSSatPattern(In, VT))
return DAG.getNode(X86ISD::VTRUNCS, DL, VT, SSatVal);
- if (auto USatVal = detectUSatPattern(In, VT, DAG, DL))
+ if (SDValue USatVal = detectUSatPattern(In, VT, DAG, DL))
return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -49567,10 +50381,14 @@ static SDValue isFNEG(SelectionDAG &DAG, SDNode *N, unsigned Depth = 0) {
if (!UndefElts[I] && !EltBits[I].isSignMask())
return SDValue();
- return peekThroughBitcasts(Op0);
+ // Only allow bitcast from correctly-sized constant.
+ Op0 = peekThroughBitcasts(Op0);
+ if (Op0.getScalarValueSizeInBits() == ScalarSize)
+ return Op0;
}
- }
- }
+ break;
+ } // case
+ } // switch
return SDValue();
}
@@ -50074,10 +50892,8 @@ static SDValue combineX86INT_TO_FP(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- APInt KnownUndef, KnownZero;
APInt DemandedElts = APInt::getAllOnes(VT.getVectorNumElements());
- if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, KnownUndef,
- KnownZero, DCI))
+ if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, DCI))
return SDValue(N, 0);
// Convert a full vector load into vzload when not all bits are needed.
@@ -50144,26 +50960,70 @@ static SDValue combineCVTP2I_CVTTP2I(SDNode *N, SelectionDAG &DAG,
static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
MVT VT = N->getSimpleValueType(0);
+ // ANDNP(undef, x) -> 0
+ // ANDNP(x, undef) -> 0
+ if (N0.isUndef() || N1.isUndef())
+ return DAG.getConstant(0, SDLoc(N), VT);
+
// ANDNP(0, x) -> x
- if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode()))
- return N->getOperand(1);
+ if (ISD::isBuildVectorAllZeros(N0.getNode()))
+ return N1;
// ANDNP(x, 0) -> 0
- if (ISD::isBuildVectorAllZeros(N->getOperand(1).getNode()))
+ if (ISD::isBuildVectorAllZeros(N1.getNode()))
return DAG.getConstant(0, SDLoc(N), VT);
// Turn ANDNP back to AND if input is inverted.
- if (SDValue Not = IsNOT(N->getOperand(0), DAG))
- return DAG.getNode(ISD::AND, SDLoc(N), VT, DAG.getBitcast(VT, Not),
- N->getOperand(1));
+ if (SDValue Not = IsNOT(N0, DAG))
+ return DAG.getNode(ISD::AND, SDLoc(N), VT, DAG.getBitcast(VT, Not), N1);
+
+ // TODO: Constant fold NOT(N0) to allow us to use AND.
+ // TODO: Do this in IsNOT with suitable oneuse checks?
// Attempt to recursively combine a bitmask ANDNP with shuffles.
if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
SDValue Op(N, 0);
if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
return Res;
+
+ // If either operand is a constant mask, then only the elements that aren't
+ // zero are actually demanded by the other operand.
+ auto GetDemandedMasks = [&](SDValue Op, bool Invert = false) {
+ APInt UndefElts;
+ SmallVector<APInt> EltBits;
+ int NumElts = VT.getVectorNumElements();
+ int EltSizeInBits = VT.getScalarSizeInBits();
+ APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
+ APInt DemandedElts = APInt::getAllOnes(NumElts);
+ if (getTargetConstantBitsFromNode(Op, EltSizeInBits, UndefElts,
+ EltBits)) {
+ DemandedBits.clearAllBits();
+ DemandedElts.clearAllBits();
+ for (int I = 0; I != NumElts; ++I)
+ if ((Invert && !EltBits[I].isAllOnes()) ||
+ (!Invert && !EltBits[I].isZero())) {
+ DemandedBits |= Invert ? ~EltBits[I] : EltBits[I];
+ DemandedElts.setBit(I);
+ }
+ }
+ return std::make_pair(DemandedBits, DemandedElts);
+ };
+ std::pair<APInt, APInt> Demand0 = GetDemandedMasks(N1);
+ std::pair<APInt, APInt> Demand1 = GetDemandedMasks(N0, true);
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.SimplifyDemandedVectorElts(N0, Demand0.second, DCI) ||
+ TLI.SimplifyDemandedVectorElts(N1, Demand1.second, DCI) ||
+ TLI.SimplifyDemandedBits(N0, Demand0.first, Demand0.second, DCI) ||
+ TLI.SimplifyDemandedBits(N1, Demand1.first, Demand1.second, DCI)) {
+ if (N->getOpcode() != ISD::DELETED_NODE)
+ DCI.AddToWorklist(N);
+ return SDValue(N, 0);
+ }
}
return SDValue();
@@ -50191,11 +51051,9 @@ static SDValue combineCVTPH2PS(SDNode *N, SelectionDAG &DAG,
SDValue Src = N->getOperand(IsStrict ? 1 : 0);
if (N->getValueType(0) == MVT::v4f32 && Src.getValueType() == MVT::v8i16) {
- APInt KnownUndef, KnownZero;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
APInt DemandedElts = APInt::getLowBitsSet(8, 4);
- if (TLI.SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
- DCI)) {
+ if (TLI.SimplifyDemandedVectorElts(Src, DemandedElts, DCI)) {
if (N->getOpcode() != ISD::DELETED_NODE)
DCI.AddToWorklist(N);
return SDValue(N, 0);
@@ -50453,110 +51311,6 @@ static SDValue combineToExtendCMOV(SDNode *Extend, SelectionDAG &DAG) {
return Res;
}
-// Convert (vXiY *ext(vXi1 bitcast(iX))) to extend_in_reg(broadcast(iX)).
-// This is more or less the reverse of combineBitcastvxi1.
-static SDValue
-combineToExtendBoolVectorInReg(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget &Subtarget) {
- unsigned Opcode = N->getOpcode();
- if (Opcode != ISD::SIGN_EXTEND && Opcode != ISD::ZERO_EXTEND &&
- Opcode != ISD::ANY_EXTEND)
- return SDValue();
- if (!DCI.isBeforeLegalizeOps())
- return SDValue();
- if (!Subtarget.hasSSE2() || Subtarget.hasAVX512())
- return SDValue();
-
- SDValue N0 = N->getOperand(0);
- EVT VT = N->getValueType(0);
- EVT SVT = VT.getScalarType();
- EVT InSVT = N0.getValueType().getScalarType();
- unsigned EltSizeInBits = SVT.getSizeInBits();
-
- // Input type must be extending a bool vector (bit-casted from a scalar
- // integer) to legal integer types.
- if (!VT.isVector())
- return SDValue();
- if (SVT != MVT::i64 && SVT != MVT::i32 && SVT != MVT::i16 && SVT != MVT::i8)
- return SDValue();
- if (InSVT != MVT::i1 || N0.getOpcode() != ISD::BITCAST)
- return SDValue();
-
- SDValue N00 = N0.getOperand(0);
- EVT SclVT = N0.getOperand(0).getValueType();
- if (!SclVT.isScalarInteger())
- return SDValue();
-
- SDLoc DL(N);
- SDValue Vec;
- SmallVector<int, 32> ShuffleMask;
- unsigned NumElts = VT.getVectorNumElements();
- assert(NumElts == SclVT.getSizeInBits() && "Unexpected bool vector size");
-
- // Broadcast the scalar integer to the vector elements.
- if (NumElts > EltSizeInBits) {
- // If the scalar integer is greater than the vector element size, then we
- // must split it down into sub-sections for broadcasting. For example:
- // i16 -> v16i8 (i16 -> v8i16 -> v16i8) with 2 sub-sections.
- // i32 -> v32i8 (i32 -> v8i32 -> v32i8) with 4 sub-sections.
- assert((NumElts % EltSizeInBits) == 0 && "Unexpected integer scale");
- unsigned Scale = NumElts / EltSizeInBits;
- EVT BroadcastVT =
- EVT::getVectorVT(*DAG.getContext(), SclVT, EltSizeInBits);
- Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, BroadcastVT, N00);
- Vec = DAG.getBitcast(VT, Vec);
-
- for (unsigned i = 0; i != Scale; ++i)
- ShuffleMask.append(EltSizeInBits, i);
- Vec = DAG.getVectorShuffle(VT, DL, Vec, Vec, ShuffleMask);
- } else if (Subtarget.hasAVX2() && NumElts < EltSizeInBits &&
- (SclVT == MVT::i8 || SclVT == MVT::i16 || SclVT == MVT::i32)) {
- // If we have register broadcast instructions, use the scalar size as the
- // element type for the shuffle. Then cast to the wider element type. The
- // widened bits won't be used, and this might allow the use of a broadcast
- // load.
- assert((EltSizeInBits % NumElts) == 0 && "Unexpected integer scale");
- unsigned Scale = EltSizeInBits / NumElts;
- EVT BroadcastVT =
- EVT::getVectorVT(*DAG.getContext(), SclVT, NumElts * Scale);
- Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, BroadcastVT, N00);
- ShuffleMask.append(NumElts * Scale, 0);
- Vec = DAG.getVectorShuffle(BroadcastVT, DL, Vec, Vec, ShuffleMask);
- Vec = DAG.getBitcast(VT, Vec);
- } else {
- // For smaller scalar integers, we can simply any-extend it to the vector
- // element size (we don't care about the upper bits) and broadcast it to all
- // elements.
- SDValue Scl = DAG.getAnyExtOrTrunc(N00, DL, SVT);
- Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Scl);
- ShuffleMask.append(NumElts, 0);
- Vec = DAG.getVectorShuffle(VT, DL, Vec, Vec, ShuffleMask);
- }
-
- // Now, mask the relevant bit in each element.
- SmallVector<SDValue, 32> Bits;
- for (unsigned i = 0; i != NumElts; ++i) {
- int BitIdx = (i % EltSizeInBits);
- APInt Bit = APInt::getBitsSet(EltSizeInBits, BitIdx, BitIdx + 1);
- Bits.push_back(DAG.getConstant(Bit, DL, SVT));
- }
- SDValue BitMask = DAG.getBuildVector(VT, DL, Bits);
- Vec = DAG.getNode(ISD::AND, DL, VT, Vec, BitMask);
-
- // Compare against the bitmask and extend the result.
- EVT CCVT = VT.changeVectorElementType(MVT::i1);
- Vec = DAG.getSetCC(DL, CCVT, Vec, BitMask, ISD::SETEQ);
- Vec = DAG.getSExtOrTrunc(Vec, DL, VT);
-
- // For SEXT, this is now done, otherwise shift the result down for
- // zero-extension.
- if (Opcode == ISD::SIGN_EXTEND)
- return Vec;
- return DAG.getNode(ISD::SRL, DL, VT, Vec,
- DAG.getConstant(EltSizeInBits - 1, DL, VT));
-}
-
// Attempt to combine a (sext/zext (setcc)) to a setcc with a xmm/ymm/zmm
// result type.
static SDValue combineExtSetcc(SDNode *N, SelectionDAG &DAG,
@@ -50636,7 +51390,8 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combineExtSetcc(N, DAG, Subtarget))
return V;
- if (SDValue V = combineToExtendBoolVectorInReg(N, DAG, DCI, Subtarget))
+ if (SDValue V = combineToExtendBoolVectorInReg(N->getOpcode(), DL, VT, N0,
+ DAG, DCI, Subtarget))
return V;
if (VT.isVector()) {
@@ -50790,7 +51545,8 @@ static SDValue combineZext(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combineExtSetcc(N, DAG, Subtarget))
return V;
- if (SDValue V = combineToExtendBoolVectorInReg(N, DAG, DCI, Subtarget))
+ if (SDValue V = combineToExtendBoolVectorInReg(N->getOpcode(), dl, VT, N0,
+ DAG, DCI, Subtarget))
return V;
if (VT.isVector())
@@ -50832,7 +51588,7 @@ static bool isOrXorXorTree(SDValue X, bool Root = true) {
/// Recursive helper for combineVectorSizedSetCCEquality() to emit the memcmp
/// expansion.
-template<typename F>
+template <typename F>
static SDValue emitOrXorXorTree(SDValue X, SDLoc &DL, SelectionDAG &DAG,
EVT VecVT, EVT CmpVT, bool HasPT, F SToV) {
SDValue Op0 = X.getOperand(0);
@@ -50845,7 +51601,8 @@ static SDValue emitOrXorXorTree(SDValue X, SDLoc &DL, SelectionDAG &DAG,
if (HasPT)
return DAG.getNode(ISD::OR, DL, VecVT, A, B);
return DAG.getNode(ISD::AND, DL, CmpVT, A, B);
- } else if (X.getOpcode() == ISD::XOR) {
+ }
+ if (X.getOpcode() == ISD::XOR) {
SDValue A = SToV(Op0);
SDValue B = SToV(Op1);
if (VecVT != CmpVT)
@@ -51134,6 +51891,16 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
LHS.getValueType() == MVT::v4f32)
return LowerVSETCC(SDValue(N, 0), Subtarget, DAG);
+ // X pred 0.0 --> X pred -X
+ // If the negation of X already exists, use it in the comparison. This removes
+ // the need to materialize 0.0 and allows matching to SSE's MIN/MAX
+ // instructions in patterns with a 'select' node.
+ if (isNullFPScalarOrVectorConst(RHS)) {
+ SDVTList FNegVT = DAG.getVTList(OpVT);
+ if (SDNode *FNeg = DAG.getNodeIfExists(ISD::FNEG, FNegVT, {LHS}))
+ return DAG.getSetCC(DL, VT, LHS, SDValue(FNeg, 0), CC);
+ }
+
return SDValue();
}
@@ -51145,16 +51912,18 @@ static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG,
MVT VT = N->getSimpleValueType(0);
unsigned NumBits = VT.getScalarSizeInBits();
unsigned NumElts = SrcVT.getVectorNumElements();
+ unsigned NumBitsPerElt = SrcVT.getScalarSizeInBits();
+ assert(VT == MVT::i32 && NumElts <= NumBits && "Unexpected MOVMSK types");
// Perform constant folding.
- if (ISD::isBuildVectorOfConstantSDNodes(Src.getNode())) {
- assert(VT == MVT::i32 && "Unexpected result type");
+ APInt UndefElts;
+ SmallVector<APInt, 32> EltBits;
+ if (getTargetConstantBitsFromNode(Src, NumBitsPerElt, UndefElts, EltBits)) {
APInt Imm(32, 0);
- for (unsigned Idx = 0, e = Src.getNumOperands(); Idx < e; ++Idx) {
- if (!Src.getOperand(Idx).isUndef() &&
- Src.getConstantOperandAPInt(Idx).isNegative())
+ for (unsigned Idx = 0; Idx != NumElts; ++Idx)
+ if (!UndefElts[Idx] && EltBits[Idx].isNegative())
Imm.setBit(Idx);
- }
+
return DAG.getConstant(Imm, SDLoc(N), VT);
}
@@ -51713,8 +52482,6 @@ static bool needCarryOrOverflowFlag(SDValue Flags) {
CC = (X86::CondCode)User->getConstantOperandVal(0);
break;
case X86ISD::BRCOND:
- CC = (X86::CondCode)User->getConstantOperandVal(2);
- break;
case X86ISD::CMOV:
CC = (X86::CondCode)User->getConstantOperandVal(2);
break;
@@ -51743,10 +52510,14 @@ static bool onlyZeroFlagUsed(SDValue Flags) {
default:
// Be conservative.
return false;
- case X86ISD::SETCC: CCOpNo = 0; break;
- case X86ISD::SETCC_CARRY: CCOpNo = 0; break;
- case X86ISD::BRCOND: CCOpNo = 2; break;
- case X86ISD::CMOV: CCOpNo = 2; break;
+ case X86ISD::SETCC:
+ case X86ISD::SETCC_CARRY:
+ CCOpNo = 0;
+ break;
+ case X86ISD::BRCOND:
+ case X86ISD::CMOV:
+ CCOpNo = 2;
+ break;
}
X86::CondCode CC = (X86::CondCode)User->getConstantOperandVal(CCOpNo);
@@ -51757,6 +52528,215 @@ static bool onlyZeroFlagUsed(SDValue Flags) {
return true;
}
+/// If this is an add or subtract where one operand is produced by a cmp+setcc,
+/// then try to convert it to an ADC or SBB. This replaces TEST+SET+{ADD/SUB}
+/// with CMP+{ADC, SBB}.
+/// Also try (ADD/SUB)+(AND(SRL,1)) bit extraction pattern with BT+{ADC, SBB}.
+static SDValue combineAddOrSubToADCOrSBB(bool IsSub, const SDLoc &DL, EVT VT,
+ SDValue X, SDValue Y,
+ SelectionDAG &DAG,
+ bool ZeroSecondOpOnly = false) {
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return SDValue();
+
+ // Look through a one-use zext.
+ if (Y.getOpcode() == ISD::ZERO_EXTEND && Y.hasOneUse())
+ Y = Y.getOperand(0);
+
+ X86::CondCode CC;
+ SDValue EFLAGS;
+ if (Y.getOpcode() == X86ISD::SETCC && Y.hasOneUse()) {
+ CC = (X86::CondCode)Y.getConstantOperandVal(0);
+ EFLAGS = Y.getOperand(1);
+ } else if (Y.getOpcode() == ISD::AND && isOneConstant(Y.getOperand(1)) &&
+ Y.hasOneUse()) {
+ EFLAGS = LowerAndToBT(Y, ISD::SETNE, DL, DAG, CC);
+ }
+
+ if (!EFLAGS)
+ return SDValue();
+
+ // If X is -1 or 0, then we have an opportunity to avoid constants required in
+ // the general case below.
+ auto *ConstantX = dyn_cast<ConstantSDNode>(X);
+ if (ConstantX && !ZeroSecondOpOnly) {
+ if ((!IsSub && CC == X86::COND_AE && ConstantX->isAllOnes()) ||
+ (IsSub && CC == X86::COND_B && ConstantX->isZero())) {
+ // This is a complicated way to get -1 or 0 from the carry flag:
+ // -1 + SETAE --> -1 + (!CF) --> CF ? -1 : 0 --> SBB %eax, %eax
+ // 0 - SETB --> 0 - (CF) --> CF ? -1 : 0 --> SBB %eax, %eax
+ return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
+ DAG.getTargetConstant(X86::COND_B, DL, MVT::i8),
+ EFLAGS);
+ }
+
+ if ((!IsSub && CC == X86::COND_BE && ConstantX->isAllOnes()) ||
+ (IsSub && CC == X86::COND_A && ConstantX->isZero())) {
+ if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() &&
+ EFLAGS.getValueType().isInteger() &&
+ !isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
+ // Swap the operands of a SUB, and we have the same pattern as above.
+ // -1 + SETBE (SUB A, B) --> -1 + SETAE (SUB B, A) --> SUB + SBB
+ // 0 - SETA (SUB A, B) --> 0 - SETB (SUB B, A) --> SUB + SBB
+ SDValue NewSub = DAG.getNode(
+ X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(),
+ EFLAGS.getOperand(1), EFLAGS.getOperand(0));
+ SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo());
+ return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
+ DAG.getTargetConstant(X86::COND_B, DL, MVT::i8),
+ NewEFLAGS);
+ }
+ }
+ }
+
+ if (CC == X86::COND_B) {
+ // X + SETB Z --> adc X, 0
+ // X - SETB Z --> sbb X, 0
+ return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL,
+ DAG.getVTList(VT, MVT::i32), X,
+ DAG.getConstant(0, DL, VT), EFLAGS);
+ }
+
+ if (ZeroSecondOpOnly)
+ return SDValue();
+
+ if (CC == X86::COND_A) {
+ // Try to convert COND_A into COND_B in an attempt to facilitate
+ // materializing "setb reg".
+ //
+ // Do not flip "e > c", where "c" is a constant, because Cmp instruction
+ // cannot take an immediate as its first operand.
+ //
+ if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.getNode()->hasOneUse() &&
+ EFLAGS.getValueType().isInteger() &&
+ !isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
+ SDValue NewSub =
+ DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(),
+ EFLAGS.getOperand(1), EFLAGS.getOperand(0));
+ SDValue NewEFLAGS = NewSub.getValue(EFLAGS.getResNo());
+ return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL,
+ DAG.getVTList(VT, MVT::i32), X,
+ DAG.getConstant(0, DL, VT), NewEFLAGS);
+ }
+ }
+
+ if (CC == X86::COND_AE) {
+ // X + SETAE --> sbb X, -1
+ // X - SETAE --> adc X, -1
+ return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL,
+ DAG.getVTList(VT, MVT::i32), X,
+ DAG.getConstant(-1, DL, VT), EFLAGS);
+ }
+
+ if (CC == X86::COND_BE) {
+ // X + SETBE --> sbb X, -1
+ // X - SETBE --> adc X, -1
+ // Try to convert COND_BE into COND_AE in an attempt to facilitate
+ // materializing "setae reg".
+ //
+ // Do not flip "e <= c", where "c" is a constant, because Cmp instruction
+ // cannot take an immediate as its first operand.
+ //
+ if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.getNode()->hasOneUse() &&
+ EFLAGS.getValueType().isInteger() &&
+ !isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
+ SDValue NewSub =
+ DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(),
+ EFLAGS.getOperand(1), EFLAGS.getOperand(0));
+ SDValue NewEFLAGS = NewSub.getValue(EFLAGS.getResNo());
+ return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL,
+ DAG.getVTList(VT, MVT::i32), X,
+ DAG.getConstant(-1, DL, VT), NewEFLAGS);
+ }
+ }
+
+ if (CC != X86::COND_E && CC != X86::COND_NE)
+ return SDValue();
+
+ if (EFLAGS.getOpcode() != X86ISD::CMP || !EFLAGS.hasOneUse() ||
+ !X86::isZeroNode(EFLAGS.getOperand(1)) ||
+ !EFLAGS.getOperand(0).getValueType().isInteger())
+ return SDValue();
+
+ SDValue Z = EFLAGS.getOperand(0);
+ EVT ZVT = Z.getValueType();
+
+ // If X is -1 or 0, then we have an opportunity to avoid constants required in
+ // the general case below.
+ if (ConstantX) {
+ // 'neg' sets the carry flag when Z != 0, so create 0 or -1 using 'sbb' with
+ // fake operands:
+ // 0 - (Z != 0) --> sbb %eax, %eax, (neg Z)
+ // -1 + (Z == 0) --> sbb %eax, %eax, (neg Z)
+ if ((IsSub && CC == X86::COND_NE && ConstantX->isZero()) ||
+ (!IsSub && CC == X86::COND_E && ConstantX->isAllOnes())) {
+ SDValue Zero = DAG.getConstant(0, DL, ZVT);
+ SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32);
+ SDValue Neg = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Zero, Z);
+ return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
+ DAG.getTargetConstant(X86::COND_B, DL, MVT::i8),
+ SDValue(Neg.getNode(), 1));
+ }
+
+ // cmp with 1 sets the carry flag when Z == 0, so create 0 or -1 using 'sbb'
+ // with fake operands:
+ // 0 - (Z == 0) --> sbb %eax, %eax, (cmp Z, 1)
+ // -1 + (Z != 0) --> sbb %eax, %eax, (cmp Z, 1)
+ if ((IsSub && CC == X86::COND_E && ConstantX->isZero()) ||
+ (!IsSub && CC == X86::COND_NE && ConstantX->isAllOnes())) {
+ SDValue One = DAG.getConstant(1, DL, ZVT);
+ SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32);
+ SDValue Cmp1 = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Z, One);
+ return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
+ DAG.getTargetConstant(X86::COND_B, DL, MVT::i8),
+ Cmp1.getValue(1));
+ }
+ }
+
+ // (cmp Z, 1) sets the carry flag if Z is 0.
+ SDValue One = DAG.getConstant(1, DL, ZVT);
+ SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32);
+ SDValue Cmp1 = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Z, One);
+
+ // Add the flags type for ADC/SBB nodes.
+ SDVTList VTs = DAG.getVTList(VT, MVT::i32);
+
+ // X - (Z != 0) --> sub X, (zext(setne Z, 0)) --> adc X, -1, (cmp Z, 1)
+ // X + (Z != 0) --> add X, (zext(setne Z, 0)) --> sbb X, -1, (cmp Z, 1)
+ if (CC == X86::COND_NE)
+ return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL, VTs, X,
+ DAG.getConstant(-1ULL, DL, VT), Cmp1.getValue(1));
+
+ // X - (Z == 0) --> sub X, (zext(sete Z, 0)) --> sbb X, 0, (cmp Z, 1)
+ // X + (Z == 0) --> add X, (zext(sete Z, 0)) --> adc X, 0, (cmp Z, 1)
+ return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL, VTs, X,
+ DAG.getConstant(0, DL, VT), Cmp1.getValue(1));
+}
+
+/// If this is an add or subtract where one operand is produced by a cmp+setcc,
+/// then try to convert it to an ADC or SBB. This replaces TEST+SET+{ADD/SUB}
+/// with CMP+{ADC, SBB}.
+static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) {
+ bool IsSub = N->getOpcode() == ISD::SUB;
+ SDValue X = N->getOperand(0);
+ SDValue Y = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ if (SDValue ADCOrSBB = combineAddOrSubToADCOrSBB(IsSub, DL, VT, X, Y, DAG))
+ return ADCOrSBB;
+
+ // Commute and try again (negate the result for subtracts).
+ if (SDValue ADCOrSBB = combineAddOrSubToADCOrSBB(IsSub, DL, VT, Y, X, DAG)) {
+ if (IsSub)
+ ADCOrSBB =
+ DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), ADCOrSBB);
+ return ADCOrSBB;
+ }
+
+ return SDValue();
+}
+
static SDValue combineCMP(SDNode *N, SelectionDAG &DAG) {
// Only handle test patterns.
if (!isNullConstant(N->getOperand(1)))
@@ -51792,6 +52772,16 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG) {
}
}
+ // Peek through any zero-extend if we're only testing for a zero result.
+ if (Op.getOpcode() == ISD::ZERO_EXTEND && onlyZeroFlagUsed(SDValue(N, 0))) {
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ if (SrcVT.getScalarSizeInBits() >= 8 &&
+ DAG.getTargetLoweringInfo().isTypeLegal(SrcVT))
+ return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Src,
+ DAG.getConstant(0, dl, SrcVT));
+ }
+
// Look for a truncate.
if (Op.getOpcode() != ISD::TRUNCATE)
return SDValue();
@@ -51867,7 +52857,8 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
MVT VT = LHS.getSimpleValueType();
- unsigned GenericOpc = X86ISD::ADD == N->getOpcode() ? ISD::ADD : ISD::SUB;
+ bool IsSub = X86ISD::SUB == N->getOpcode();
+ unsigned GenericOpc = IsSub ? ISD::SUB : ISD::ADD;
// If we don't use the flag result, simplify back to a generic ADD/SUB.
if (!N->hasAnyUseOfValue(1)) {
@@ -51889,26 +52880,29 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
MatchGeneric(LHS, RHS, false);
MatchGeneric(RHS, LHS, X86ISD::SUB == N->getOpcode());
- return SDValue();
+ // TODO: Can we drop the ZeroSecondOpOnly limit? This is to guarantee that the
+ // EFLAGS result doesn't change.
+ return combineAddOrSubToADCOrSBB(IsSub, DL, VT, LHS, RHS, DAG,
+ /*ZeroSecondOpOnly*/ true);
}
static SDValue combineSBB(SDNode *N, SelectionDAG &DAG) {
- if (SDValue Flags = combineCarryThroughADD(N->getOperand(2), DAG)) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDValue BorrowIn = N->getOperand(2);
+
+ if (SDValue Flags = combineCarryThroughADD(BorrowIn, DAG)) {
MVT VT = N->getSimpleValueType(0);
SDVTList VTs = DAG.getVTList(VT, MVT::i32);
- return DAG.getNode(X86ISD::SBB, SDLoc(N), VTs,
- N->getOperand(0), N->getOperand(1),
- Flags);
+ return DAG.getNode(X86ISD::SBB, SDLoc(N), VTs, LHS, RHS, Flags);
}
// Fold SBB(SUB(X,Y),0,Carry) -> SBB(X,Y,Carry)
// iff the flag result is dead.
- SDValue Op0 = N->getOperand(0);
- SDValue Op1 = N->getOperand(1);
- if (Op0.getOpcode() == ISD::SUB && isNullConstant(Op1) &&
+ if (LHS.getOpcode() == ISD::SUB && isNullConstant(RHS) &&
!N->hasAnyUseOfValue(1))
- return DAG.getNode(X86ISD::SBB, SDLoc(N), N->getVTList(), Op0.getOperand(0),
- Op0.getOperand(1), N->getOperand(2));
+ return DAG.getNode(X86ISD::SBB, SDLoc(N), N->getVTList(), LHS.getOperand(0),
+ LHS.getOperand(1), BorrowIn);
return SDValue();
}
@@ -51916,228 +52910,60 @@ static SDValue combineSBB(SDNode *N, SelectionDAG &DAG) {
// Optimize RES, EFLAGS = X86ISD::ADC LHS, RHS, EFLAGS
static SDValue combineADC(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDValue CarryIn = N->getOperand(2);
+ auto *LHSC = dyn_cast<ConstantSDNode>(LHS);
+ auto *RHSC = dyn_cast<ConstantSDNode>(RHS);
+
+ // Canonicalize constant to RHS.
+ if (LHSC && !RHSC)
+ return DAG.getNode(X86ISD::ADC, SDLoc(N), N->getVTList(), RHS, LHS,
+ CarryIn);
+
// If the LHS and RHS of the ADC node are zero, then it can't overflow and
// the result is either zero or one (depending on the input carry bit).
// Strength reduce this down to a "set on carry" aka SETCC_CARRY&1.
- if (X86::isZeroNode(N->getOperand(0)) &&
- X86::isZeroNode(N->getOperand(1)) &&
+ if (LHSC && RHSC && LHSC->isZero() && RHSC->isZero() &&
// We don't have a good way to replace an EFLAGS use, so only do this when
// dead right now.
SDValue(N, 1).use_empty()) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
SDValue CarryOut = DAG.getConstant(0, DL, N->getValueType(1));
- SDValue Res1 =
- DAG.getNode(ISD::AND, DL, VT,
- DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
- DAG.getTargetConstant(X86::COND_B, DL, MVT::i8),
- N->getOperand(2)),
- DAG.getConstant(1, DL, VT));
+ SDValue Res1 = DAG.getNode(
+ ISD::AND, DL, VT,
+ DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
+ DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), CarryIn),
+ DAG.getConstant(1, DL, VT));
return DCI.CombineTo(N, Res1, CarryOut);
}
- if (SDValue Flags = combineCarryThroughADD(N->getOperand(2), DAG)) {
- MVT VT = N->getSimpleValueType(0);
- SDVTList VTs = DAG.getVTList(VT, MVT::i32);
- return DAG.getNode(X86ISD::ADC, SDLoc(N), VTs,
- N->getOperand(0), N->getOperand(1),
- Flags);
- }
-
- return SDValue();
-}
-
-/// If this is an add or subtract where one operand is produced by a cmp+setcc,
-/// then try to convert it to an ADC or SBB. This replaces TEST+SET+{ADD/SUB}
-/// with CMP+{ADC, SBB}.
-static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) {
- bool IsSub = N->getOpcode() == ISD::SUB;
- SDValue X = N->getOperand(0);
- SDValue Y = N->getOperand(1);
-
- // If this is an add, canonicalize a zext operand to the RHS.
- // TODO: Incomplete? What if both sides are zexts?
- if (!IsSub && X.getOpcode() == ISD::ZERO_EXTEND &&
- Y.getOpcode() != ISD::ZERO_EXTEND)
- std::swap(X, Y);
-
- // Look through a one-use zext.
- bool PeekedThroughZext = false;
- if (Y.getOpcode() == ISD::ZERO_EXTEND && Y.hasOneUse()) {
- Y = Y.getOperand(0);
- PeekedThroughZext = true;
- }
-
- // If this is an add, canonicalize a setcc operand to the RHS.
- // TODO: Incomplete? What if both sides are setcc?
- // TODO: Should we allow peeking through a zext of the other operand?
- if (!IsSub && !PeekedThroughZext && X.getOpcode() == X86ISD::SETCC &&
- Y.getOpcode() != X86ISD::SETCC)
- std::swap(X, Y);
-
- if (Y.getOpcode() != X86ISD::SETCC || !Y.hasOneUse())
- return SDValue();
-
- SDLoc DL(N);
- EVT VT = N->getValueType(0);
- X86::CondCode CC = (X86::CondCode)Y.getConstantOperandVal(0);
-
- // If X is -1 or 0, then we have an opportunity to avoid constants required in
- // the general case below.
- auto *ConstantX = dyn_cast<ConstantSDNode>(X);
- if (ConstantX) {
- if ((!IsSub && CC == X86::COND_AE && ConstantX->isAllOnes()) ||
- (IsSub && CC == X86::COND_B && ConstantX->isZero())) {
- // This is a complicated way to get -1 or 0 from the carry flag:
- // -1 + SETAE --> -1 + (!CF) --> CF ? -1 : 0 --> SBB %eax, %eax
- // 0 - SETB --> 0 - (CF) --> CF ? -1 : 0 --> SBB %eax, %eax
- return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
- DAG.getTargetConstant(X86::COND_B, DL, MVT::i8),
- Y.getOperand(1));
- }
-
- if ((!IsSub && CC == X86::COND_BE && ConstantX->isAllOnes()) ||
- (IsSub && CC == X86::COND_A && ConstantX->isZero())) {
- SDValue EFLAGS = Y->getOperand(1);
- if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() &&
- EFLAGS.getValueType().isInteger() &&
- !isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
- // Swap the operands of a SUB, and we have the same pattern as above.
- // -1 + SETBE (SUB A, B) --> -1 + SETAE (SUB B, A) --> SUB + SBB
- // 0 - SETA (SUB A, B) --> 0 - SETB (SUB B, A) --> SUB + SBB
- SDValue NewSub = DAG.getNode(
- X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(),
- EFLAGS.getOperand(1), EFLAGS.getOperand(0));
- SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo());
- return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
- DAG.getTargetConstant(X86::COND_B, DL, MVT::i8),
- NewEFLAGS);
- }
- }
- }
-
- if (CC == X86::COND_B) {
- // X + SETB Z --> adc X, 0
- // X - SETB Z --> sbb X, 0
- return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL,
- DAG.getVTList(VT, MVT::i32), X,
- DAG.getConstant(0, DL, VT), Y.getOperand(1));
- }
-
- if (CC == X86::COND_A) {
- SDValue EFLAGS = Y.getOperand(1);
- // Try to convert COND_A into COND_B in an attempt to facilitate
- // materializing "setb reg".
- //
- // Do not flip "e > c", where "c" is a constant, because Cmp instruction
- // cannot take an immediate as its first operand.
- //
- if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.getNode()->hasOneUse() &&
- EFLAGS.getValueType().isInteger() &&
- !isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
- SDValue NewSub = DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS),
- EFLAGS.getNode()->getVTList(),
- EFLAGS.getOperand(1), EFLAGS.getOperand(0));
- SDValue NewEFLAGS = NewSub.getValue(EFLAGS.getResNo());
- return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL,
- DAG.getVTList(VT, MVT::i32), X,
- DAG.getConstant(0, DL, VT), NewEFLAGS);
- }
- }
-
- if (CC == X86::COND_AE) {
- // X + SETAE --> sbb X, -1
- // X - SETAE --> adc X, -1
- return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL,
- DAG.getVTList(VT, MVT::i32), X,
- DAG.getConstant(-1, DL, VT), Y.getOperand(1));
- }
-
- if (CC == X86::COND_BE) {
- // X + SETBE --> sbb X, -1
- // X - SETBE --> adc X, -1
- SDValue EFLAGS = Y.getOperand(1);
- // Try to convert COND_BE into COND_AE in an attempt to facilitate
- // materializing "setae reg".
- //
- // Do not flip "e <= c", where "c" is a constant, because Cmp instruction
- // cannot take an immediate as its first operand.
- //
- if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.getNode()->hasOneUse() &&
- EFLAGS.getValueType().isInteger() &&
- !isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
- SDValue NewSub = DAG.getNode(
- X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(),
- EFLAGS.getOperand(1), EFLAGS.getOperand(0));
- SDValue NewEFLAGS = NewSub.getValue(EFLAGS.getResNo());
- return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL,
- DAG.getVTList(VT, MVT::i32), X,
- DAG.getConstant(-1, DL, VT), NewEFLAGS);
- }
+ // Fold ADC(C1,C2,Carry) -> ADC(0,C1+C2,Carry)
+ // iff the flag result is dead.
+ // TODO: Allow flag result if C1+C2 doesn't signed/unsigned overflow.
+ if (LHSC && RHSC && !LHSC->isZero() && !N->hasAnyUseOfValue(1)) {
+ SDLoc DL(N);
+ APInt Sum = LHSC->getAPIntValue() + RHSC->getAPIntValue();
+ return DAG.getNode(X86ISD::ADC, DL, N->getVTList(),
+ DAG.getConstant(0, DL, LHS.getValueType()),
+ DAG.getConstant(Sum, DL, LHS.getValueType()), CarryIn);
}
- if (CC != X86::COND_E && CC != X86::COND_NE)
- return SDValue();
-
- SDValue Cmp = Y.getOperand(1);
- if (Cmp.getOpcode() != X86ISD::CMP || !Cmp.hasOneUse() ||
- !X86::isZeroNode(Cmp.getOperand(1)) ||
- !Cmp.getOperand(0).getValueType().isInteger())
- return SDValue();
-
- SDValue Z = Cmp.getOperand(0);
- EVT ZVT = Z.getValueType();
-
- // If X is -1 or 0, then we have an opportunity to avoid constants required in
- // the general case below.
- if (ConstantX) {
- // 'neg' sets the carry flag when Z != 0, so create 0 or -1 using 'sbb' with
- // fake operands:
- // 0 - (Z != 0) --> sbb %eax, %eax, (neg Z)
- // -1 + (Z == 0) --> sbb %eax, %eax, (neg Z)
- if ((IsSub && CC == X86::COND_NE && ConstantX->isZero()) ||
- (!IsSub && CC == X86::COND_E && ConstantX->isAllOnes())) {
- SDValue Zero = DAG.getConstant(0, DL, ZVT);
- SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32);
- SDValue Neg = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Zero, Z);
- return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
- DAG.getTargetConstant(X86::COND_B, DL, MVT::i8),
- SDValue(Neg.getNode(), 1));
- }
-
- // cmp with 1 sets the carry flag when Z == 0, so create 0 or -1 using 'sbb'
- // with fake operands:
- // 0 - (Z == 0) --> sbb %eax, %eax, (cmp Z, 1)
- // -1 + (Z != 0) --> sbb %eax, %eax, (cmp Z, 1)
- if ((IsSub && CC == X86::COND_E && ConstantX->isZero()) ||
- (!IsSub && CC == X86::COND_NE && ConstantX->isAllOnes())) {
- SDValue One = DAG.getConstant(1, DL, ZVT);
- SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32);
- SDValue Cmp1 = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Z, One);
- return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
- DAG.getTargetConstant(X86::COND_B, DL, MVT::i8),
- Cmp1.getValue(1));
- }
+ if (SDValue Flags = combineCarryThroughADD(CarryIn, DAG)) {
+ MVT VT = N->getSimpleValueType(0);
+ SDVTList VTs = DAG.getVTList(VT, MVT::i32);
+ return DAG.getNode(X86ISD::ADC, SDLoc(N), VTs, LHS, RHS, Flags);
}
- // (cmp Z, 1) sets the carry flag if Z is 0.
- SDValue One = DAG.getConstant(1, DL, ZVT);
- SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32);
- SDValue Cmp1 = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Z, One);
-
- // Add the flags type for ADC/SBB nodes.
- SDVTList VTs = DAG.getVTList(VT, MVT::i32);
-
- // X - (Z != 0) --> sub X, (zext(setne Z, 0)) --> adc X, -1, (cmp Z, 1)
- // X + (Z != 0) --> add X, (zext(setne Z, 0)) --> sbb X, -1, (cmp Z, 1)
- if (CC == X86::COND_NE)
- return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL, VTs, X,
- DAG.getConstant(-1ULL, DL, VT), Cmp1.getValue(1));
+ // Fold ADC(ADD(X,Y),0,Carry) -> ADC(X,Y,Carry)
+ // iff the flag result is dead.
+ if (LHS.getOpcode() == ISD::ADD && RHSC && RHSC->isZero() &&
+ !N->hasAnyUseOfValue(1))
+ return DAG.getNode(X86ISD::ADC, SDLoc(N), N->getVTList(), LHS.getOperand(0),
+ LHS.getOperand(1), CarryIn);
- // X - (Z == 0) --> sub X, (zext(sete Z, 0)) --> sbb X, 0, (cmp Z, 1)
- // X + (Z == 0) --> add X, (zext(sete Z, 0)) --> adc X, 0, (cmp Z, 1)
- return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL, VTs, X,
- DAG.getConstant(0, DL, VT), Cmp1.getValue(1));
+ return SDValue();
}
static SDValue matchPMADDWD(SelectionDAG &DAG, SDValue Op0, SDValue Op1,
@@ -52432,7 +53258,8 @@ static SDValue combineAddOfPMADDWD(SelectionDAG &DAG, SDValue N0, SDValue N1,
/// Try to fold those constants into an 'add' instruction to reduce instruction
/// count. We do this with CMOV rather the generic 'select' because there are
/// earlier folds that may be used to turn select-of-constants into logic hacks.
-static SDValue pushAddIntoCmovOfConsts(SDNode *N, SelectionDAG &DAG) {
+static SDValue pushAddIntoCmovOfConsts(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
// If an operand is zero, add-of-0 gets simplified away, so that's clearly
// better because we eliminate 1-2 instructions. This transform is still
// an improvement without zero operands because we trade 2 move constants and
@@ -52457,6 +53284,11 @@ static SDValue pushAddIntoCmovOfConsts(SDNode *N, SelectionDAG &DAG) {
if (!isSuitableCmov(Cmov))
return SDValue();
+ // Don't remove a load folding opportunity for the add. That would neutralize
+ // any improvements from removing constant materializations.
+ if (X86::mayFoldLoad(OtherOp, Subtarget))
+ return SDValue();
+
EVT VT = N->getValueType(0);
SDLoc DL(N);
SDValue FalseOp = Cmov.getOperand(0);
@@ -52499,7 +53331,7 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
SDValue Op1 = N->getOperand(1);
SDLoc DL(N);
- if (SDValue Select = pushAddIntoCmovOfConsts(N, DAG))
+ if (SDValue Select = pushAddIntoCmovOfConsts(N, DAG, Subtarget))
return Select;
if (SDValue MAdd = matchPMADDWD(DAG, Op0, Op1, DL, VT, Subtarget))
@@ -52535,6 +53367,14 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
}
}
+ // Fold ADD(ADC(Y,0,W),X) -> ADC(X,Y,W)
+ if (Op0.getOpcode() == X86ISD::ADC && Op0->hasOneUse() &&
+ X86::isZeroNode(Op0.getOperand(1))) {
+ assert(!Op0->hasAnyUseOfValue(1) && "Overflow bit in use");
+ return DAG.getNode(X86ISD::ADC, SDLoc(Op0), Op0->getVTList(), Op1,
+ Op0.getOperand(0), Op0.getOperand(2));
+ }
+
return combineAddOrSubToADCOrSBB(N, DAG);
}
@@ -52617,6 +53457,25 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combineToHorizontalAddSub(N, DAG, Subtarget))
return V;
+ // Fold SUB(X,ADC(Y,0,W)) -> SBB(X,Y,W)
+ if (Op1.getOpcode() == X86ISD::ADC && Op1->hasOneUse() &&
+ X86::isZeroNode(Op1.getOperand(1))) {
+ assert(!Op1->hasAnyUseOfValue(1) && "Overflow bit in use");
+ return DAG.getNode(X86ISD::SBB, SDLoc(Op1), Op1->getVTList(), Op0,
+ Op1.getOperand(0), Op1.getOperand(2));
+ }
+
+ // Fold SUB(X,SBB(Y,Z,W)) -> SUB(ADC(X,Z,W),Y)
+ // Don't fold to ADC(0,0,W)/SETCC_CARRY pattern which will prevent more folds.
+ if (Op1.getOpcode() == X86ISD::SBB && Op1->hasOneUse() &&
+ !(X86::isZeroNode(Op0) && X86::isZeroNode(Op1.getOperand(1)))) {
+ assert(!Op1->hasAnyUseOfValue(1) && "Overflow bit in use");
+ SDValue ADC = DAG.getNode(X86ISD::ADC, SDLoc(Op1), Op1->getVTList(), Op0,
+ Op1.getOperand(1), Op1.getOperand(2));
+ return DAG.getNode(ISD::SUB, SDLoc(N), Op0.getValueType(), ADC.getValue(0),
+ Op1.getOperand(0));
+ }
+
return combineAddOrSubToADCOrSBB(N, DAG);
}
@@ -52745,6 +53604,17 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
Subs.push_back(SubOp.getOperand(I));
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Subs);
};
+ auto IsConcatFree = [](MVT VT, ArrayRef<SDValue> SubOps, unsigned Op) {
+ for (unsigned I = 0, E = SubOps.size(); I != E; ++I) {
+ SDValue Sub = SubOps[I].getOperand(Op);
+ unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
+ if (Sub.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
+ Sub.getOperand(0).getValueType() != VT ||
+ Sub.getConstantOperandAPInt(1) != (I * NumSubElts))
+ return false;
+ }
+ return true;
+ };
unsigned NumOps = Ops.size();
switch (Op0.getOpcode()) {
@@ -52802,6 +53672,14 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
DAG.getTargetConstant(Idx, DL, MVT::i8));
}
break;
+ case X86ISD::PSHUFB:
+ if (!IsSplat && ((VT.is256BitVector() && Subtarget.hasInt256()) ||
+ (VT.is512BitVector() && Subtarget.useBWIRegs()))) {
+ return DAG.getNode(Op0.getOpcode(), DL, VT,
+ ConcatSubOperand(VT, Ops, 0),
+ ConcatSubOperand(VT, Ops, 1));
+ }
+ break;
case X86ISD::VPERMV3:
if (!IsSplat && NumOps == 2 && VT.is512BitVector()) {
MVT OpVT = Op0.getSimpleValueType();
@@ -52920,6 +53798,19 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
ConcatSubOperand(VT, Ops, 1), Op0.getOperand(2));
}
break;
+ case ISD::VSELECT:
+ case X86ISD::BLENDV:
+ if (!IsSplat && VT.is256BitVector() && Ops.size() == 2 &&
+ (VT.getScalarSizeInBits() >= 32 || Subtarget.hasInt256()) &&
+ IsConcatFree(VT, Ops, 1) && IsConcatFree(VT, Ops, 2)) {
+ EVT SelVT = Ops[0].getOperand(0).getValueType();
+ SelVT = SelVT.getDoubleNumVectorElementsVT(*DAG.getContext());
+ return DAG.getNode(Op0.getOpcode(), DL, VT,
+ ConcatSubOperand(SelVT.getSimpleVT(), Ops, 0),
+ ConcatSubOperand(VT, Ops, 1),
+ ConcatSubOperand(VT, Ops, 2));
+ }
+ break;
}
}
@@ -52937,12 +53828,29 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
}
}
+ // Attempt to fold target constant loads.
+ if (all_of(Ops, [](SDValue Op) { return getTargetConstantFromNode(Op); })) {
+ SmallVector<APInt> EltBits;
+ APInt UndefElts = APInt::getNullValue(VT.getVectorNumElements());
+ for (unsigned I = 0, E = Ops.size(); I != E; ++I) {
+ APInt OpUndefElts;
+ SmallVector<APInt> OpEltBits;
+ if (!getTargetConstantBitsFromNode(Ops[I], EltSizeInBits, OpUndefElts,
+ OpEltBits, true, false))
+ break;
+ EltBits.append(OpEltBits);
+ UndefElts.insertBits(OpUndefElts, I * OpUndefElts.getBitWidth());
+ }
+ if (EltBits.size() == VT.getVectorNumElements())
+ return getConstVector(EltBits, UndefElts, VT, DAG, DL);
+ }
+
return SDValue();
}
-static SDValue combineConcatVectors(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget &Subtarget) {
+static SDValue combineCONCAT_VECTORS(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
EVT VT = N->getValueType(0);
EVT SrcVT = N->getOperand(0).getValueType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -52961,9 +53869,9 @@ static SDValue combineConcatVectors(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget &Subtarget) {
+static SDValue combineINSERT_SUBVECTOR(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
@@ -53044,7 +53952,7 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
// Match concat_vector style patterns.
SmallVector<SDValue, 2> SubVectorOps;
- if (collectConcatOps(N, SubVectorOps)) {
+ if (collectConcatOps(N, SubVectorOps, DAG)) {
if (SDValue Fold =
combineConcatVectorOps(dl, OpVT, SubVectorOps, DAG, DCI, Subtarget))
return Fold;
@@ -53103,10 +54011,10 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
/// This function should only be called with legal types (otherwise, the calls
/// to get simple value types will assert).
static SDValue narrowExtractedVectorSelect(SDNode *Ext, SelectionDAG &DAG) {
- SDValue Sel = peekThroughBitcasts(Ext->getOperand(0));
+ SDValue Sel = Ext->getOperand(0);
SmallVector<SDValue, 4> CatOps;
if (Sel.getOpcode() != ISD::VSELECT ||
- !collectConcatOps(Sel.getOperand(0).getNode(), CatOps))
+ !collectConcatOps(Sel.getOperand(0).getNode(), CatOps, DAG))
return SDValue();
// Note: We assume simple value types because this should only be called with
@@ -53154,9 +54062,9 @@ static SDValue narrowExtractedVectorSelect(SDNode *Ext, SelectionDAG &DAG) {
return DAG.getBitcast(VT, NarrowSel);
}
-static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget &Subtarget) {
+static SDValue combineEXTRACT_SUBVECTOR(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
// For AVX1 only, if we are extracting from a 256-bit and+not (which will
// eventually get combined/lowered into ANDNP) with a concatenated operand,
// split the 'and' into 128-bit ops to avoid the concatenate and extract.
@@ -53177,6 +54085,7 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
EVT InVecVT = InVec.getValueType();
unsigned SizeInBits = VT.getSizeInBits();
unsigned InSizeInBits = InVecVT.getSizeInBits();
+ unsigned NumSubElts = VT.getVectorNumElements();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (Subtarget.hasAVX() && !Subtarget.hasAVX2() &&
@@ -53214,22 +54123,24 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
}
if (InVec.getOpcode() == ISD::BUILD_VECTOR)
- return DAG.getBuildVector(
- VT, SDLoc(N),
- InVec.getNode()->ops().slice(IdxVal, VT.getVectorNumElements()));
+ return DAG.getBuildVector(VT, SDLoc(N),
+ InVec->ops().slice(IdxVal, NumSubElts));
- // If we are extracting from an insert into a zero vector, replace with a
- // smaller insert into zero if we don't access less than the original
- // subvector. Don't do this for i1 vectors.
+ // If we are extracting from an insert into a larger vector, replace with a
+ // smaller insert if we don't access less than the original subvector. Don't
+ // do this for i1 vectors.
+ // TODO: Relax the matching indices requirement?
if (VT.getVectorElementType() != MVT::i1 &&
- InVec.getOpcode() == ISD::INSERT_SUBVECTOR && IdxVal == 0 &&
- InVec.hasOneUse() && isNullConstant(InVec.getOperand(2)) &&
- ISD::isBuildVectorAllZeros(InVec.getOperand(0).getNode()) &&
+ InVec.getOpcode() == ISD::INSERT_SUBVECTOR && InVec.hasOneUse() &&
+ IdxVal == InVec.getConstantOperandVal(2) &&
InVec.getOperand(1).getValueSizeInBits() <= SizeInBits) {
SDLoc DL(N);
- return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
- getZeroVector(VT, Subtarget, DAG, DL),
- InVec.getOperand(1), InVec.getOperand(2));
+ SDValue NewExt = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT,
+ InVec.getOperand(0), N->getOperand(1));
+ unsigned NewIdxVal = InVec.getConstantOperandVal(2) - IdxVal;
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, NewExt,
+ InVec.getOperand(1),
+ DAG.getVectorIdxConstant(NewIdxVal, DL));
}
// If we're extracting an upper subvector from a broadcast we should just
@@ -53246,8 +54157,7 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
return extractSubVector(InVec, 0, DAG, SDLoc(N), SizeInBits);
// Attempt to extract from the source of a shuffle vector.
- if ((InSizeInBits % SizeInBits) == 0 &&
- (IdxVal % VT.getVectorNumElements()) == 0) {
+ if ((InSizeInBits % SizeInBits) == 0 && (IdxVal % NumSubElts) == 0) {
SmallVector<int, 32> ShuffleMask;
SmallVector<int, 32> ScaledMask;
SmallVector<SDValue, 2> ShuffleInputs;
@@ -53255,7 +54165,7 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
// Decode the shuffle mask and scale it so its shuffling subvectors.
if (getTargetShuffleInputs(InVecBC, ShuffleInputs, ShuffleMask, DAG) &&
scaleShuffleElements(ShuffleMask, NumSubVecs, ScaledMask)) {
- unsigned SubVecIdx = IdxVal / VT.getVectorNumElements();
+ unsigned SubVecIdx = IdxVal / NumSubElts;
if (ScaledMask[SubVecIdx] == SM_SentinelUndef)
return DAG.getUNDEF(VT);
if (ScaledMask[SubVecIdx] == SM_SentinelZero)
@@ -53263,7 +54173,7 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
SDValue Src = ShuffleInputs[ScaledMask[SubVecIdx] / NumSubVecs];
if (Src.getValueSizeInBits() == InSizeInBits) {
unsigned SrcSubVecIdx = ScaledMask[SubVecIdx] % NumSubVecs;
- unsigned SrcEltIdx = SrcSubVecIdx * VT.getVectorNumElements();
+ unsigned SrcEltIdx = SrcSubVecIdx * NumSubElts;
return extractSubVector(DAG.getBitcast(InVecVT, Src), SrcEltIdx, DAG,
SDLoc(N), SizeInBits);
}
@@ -53273,8 +54183,8 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
// If we're extracting the lowest subvector and we're the only user,
// we may be able to perform this with a smaller vector width.
unsigned InOpcode = InVec.getOpcode();
- if (IdxVal == 0 && InVec.hasOneUse()) {
- if (VT == MVT::v2f64 && InVecVT == MVT::v4f64) {
+ if (InVec.hasOneUse()) {
+ if (IdxVal == 0 && VT == MVT::v2f64 && InVecVT == MVT::v4f64) {
// v2f64 CVTDQ2PD(v4i32).
if (InOpcode == ISD::SINT_TO_FP &&
InVec.getOperand(0).getValueType() == MVT::v4i32) {
@@ -53291,7 +54201,8 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(X86ISD::VFPEXT, SDLoc(N), VT, InVec.getOperand(0));
}
}
- if ((InOpcode == ISD::ANY_EXTEND ||
+ if (IdxVal == 0 &&
+ (InOpcode == ISD::ANY_EXTEND ||
InOpcode == ISD::ANY_EXTEND_VECTOR_INREG ||
InOpcode == ISD::ZERO_EXTEND ||
InOpcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
@@ -53306,7 +54217,7 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
unsigned ExtOp = getOpcode_EXTEND_VECTOR_INREG(InOpcode);
return DAG.getNode(ExtOp, DL, VT, Ext);
}
- if (InOpcode == ISD::VSELECT &&
+ if (IdxVal == 0 && InOpcode == ISD::VSELECT &&
InVec.getOperand(0).getValueType().is256BitVector() &&
InVec.getOperand(1).getValueType().is256BitVector() &&
InVec.getOperand(2).getValueType().is256BitVector()) {
@@ -53316,7 +54227,7 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
SDValue Ext2 = extractSubVector(InVec.getOperand(2), 0, DAG, DL, 128);
return DAG.getNode(InOpcode, DL, VT, Ext0, Ext1, Ext2);
}
- if (InOpcode == ISD::TRUNCATE && Subtarget.hasVLX() &&
+ if (IdxVal == 0 && InOpcode == ISD::TRUNCATE && Subtarget.hasVLX() &&
(VT.is128BitVector() || VT.is256BitVector())) {
SDLoc DL(N);
SDValue InVecSrc = InVec.getOperand(0);
@@ -53324,6 +54235,13 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
SDValue Ext = extractSubVector(InVecSrc, 0, DAG, DL, Scale * SizeInBits);
return DAG.getNode(InOpcode, DL, VT, Ext);
}
+ if (InOpcode == X86ISD::MOVDDUP &&
+ (VT.is128BitVector() || VT.is256BitVector())) {
+ SDLoc DL(N);
+ SDValue Ext0 =
+ extractSubVector(InVec.getOperand(0), IdxVal, DAG, DL, SizeInBits);
+ return DAG.getNode(InOpcode, DL, VT, Ext0);
+ }
}
// Always split vXi64 logical shifts where we're extracting the upper 32-bits
@@ -53476,11 +54394,9 @@ static SDValue combineVPMADD(SDNode *N, SelectionDAG &DAG,
ISD::isBuildVectorAllZeros(RHS.getNode()))
return DAG.getConstant(0, SDLoc(N), VT);
- APInt KnownUndef, KnownZero;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
APInt DemandedElts = APInt::getAllOnes(VT.getVectorNumElements());
- if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, KnownUndef,
- KnownZero, DCI))
+ if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, DCI))
return SDValue(N, 0);
return SDValue();
@@ -53494,6 +54410,7 @@ static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG,
unsigned Opcode = N->getOpcode();
unsigned InOpcode = In.getOpcode();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDLoc DL(N);
// Try to merge vector loads and extend_inreg to an extload.
if (!DCI.isBeforeLegalizeOps() && ISD::isNormalLoad(In.getNode()) &&
@@ -53506,10 +54423,9 @@ static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG,
: ISD::ZEXTLOAD;
EVT MemVT = VT.changeVectorElementType(SVT);
if (TLI.isLoadExtLegal(Ext, VT, MemVT)) {
- SDValue Load =
- DAG.getExtLoad(Ext, SDLoc(N), VT, Ld->getChain(), Ld->getBasePtr(),
- Ld->getPointerInfo(), MemVT, Ld->getOriginalAlign(),
- Ld->getMemOperand()->getFlags());
+ SDValue Load = DAG.getExtLoad(
+ Ext, DL, VT, Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
+ MemVT, Ld->getOriginalAlign(), Ld->getMemOperand()->getFlags());
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1));
return Load;
}
@@ -53518,7 +54434,7 @@ static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG,
// Fold EXTEND_VECTOR_INREG(EXTEND_VECTOR_INREG(X)) -> EXTEND_VECTOR_INREG(X).
if (Opcode == InOpcode)
- return DAG.getNode(Opcode, SDLoc(N), VT, In.getOperand(0));
+ return DAG.getNode(Opcode, DL, VT, In.getOperand(0));
// Fold EXTEND_VECTOR_INREG(EXTRACT_SUBVECTOR(EXTEND(X),0))
// -> EXTEND_VECTOR_INREG(X).
@@ -53527,12 +54443,26 @@ static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG,
In.getOperand(0).getOpcode() == getOpcode_EXTEND(Opcode) &&
In.getOperand(0).getOperand(0).getValueSizeInBits() ==
In.getValueSizeInBits())
- return DAG.getNode(Opcode, SDLoc(N), VT, In.getOperand(0).getOperand(0));
+ return DAG.getNode(Opcode, DL, VT, In.getOperand(0).getOperand(0));
- // Attempt to combine as a shuffle.
- // TODO: General ZERO_EXTEND_VECTOR_INREG support.
- if (Opcode == ISD::ANY_EXTEND_VECTOR_INREG ||
- (Opcode == ISD::ZERO_EXTEND_VECTOR_INREG && Subtarget.hasSSE41())) {
+ // Fold EXTEND_VECTOR_INREG(BUILD_VECTOR(X,Y,?,?)) -> BUILD_VECTOR(X,0,Y,0).
+ // TODO: Move to DAGCombine?
+ if (!DCI.isBeforeLegalizeOps() && Opcode == ISD::ZERO_EXTEND_VECTOR_INREG &&
+ In.getOpcode() == ISD::BUILD_VECTOR && In.hasOneUse() &&
+ In.getValueSizeInBits() == VT.getSizeInBits()) {
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned Scale = VT.getScalarSizeInBits() / In.getScalarValueSizeInBits();
+ EVT EltVT = In.getOperand(0).getValueType();
+ SmallVector<SDValue> Elts(Scale * NumElts, DAG.getConstant(0, DL, EltVT));
+ for (unsigned I = 0; I != NumElts; ++I)
+ Elts[I * Scale] = In.getOperand(I);
+ return DAG.getBitcast(VT, DAG.getBuildVector(In.getValueType(), DL, Elts));
+ }
+
+ // Attempt to combine as a shuffle on SSE41+ targets.
+ if ((Opcode == ISD::ANY_EXTEND_VECTOR_INREG ||
+ Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) &&
+ Subtarget.hasSSE41()) {
SDValue Op(N, 0);
if (TLI.isTypeLegal(VT) && TLI.isTypeLegal(In.getValueType()))
if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
@@ -53549,11 +54479,9 @@ static SDValue combineKSHIFT(SDNode *N, SelectionDAG &DAG,
if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode()))
return DAG.getConstant(0, SDLoc(N), VT);
- APInt KnownUndef, KnownZero;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
APInt DemandedElts = APInt::getAllOnes(VT.getVectorNumElements());
- if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, KnownUndef,
- KnownZero, DCI))
+ if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, DCI))
return SDValue(N, 0);
return SDValue();
@@ -53781,11 +54709,11 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::PEXTRB:
return combineExtractVectorElt(N, DAG, DCI, Subtarget);
case ISD::CONCAT_VECTORS:
- return combineConcatVectors(N, DAG, DCI, Subtarget);
+ return combineCONCAT_VECTORS(N, DAG, DCI, Subtarget);
case ISD::INSERT_SUBVECTOR:
- return combineInsertSubvector(N, DAG, DCI, Subtarget);
+ return combineINSERT_SUBVECTOR(N, DAG, DCI, Subtarget);
case ISD::EXTRACT_SUBVECTOR:
- return combineExtractSubvector(N, DAG, DCI, Subtarget);
+ return combineEXTRACT_SUBVECTOR(N, DAG, DCI, Subtarget);
case ISD::VSELECT:
case ISD::SELECT:
case X86ISD::BLENDV: return combineSelect(N, DAG, DCI, Subtarget);
@@ -54397,37 +55325,37 @@ TargetLowering::ConstraintWeight
weight = CW_Register;
break;
case 'I':
- if (ConstantInt *C = dyn_cast<ConstantInt>(info.CallOperandVal)) {
+ if (auto *C = dyn_cast<ConstantInt>(info.CallOperandVal)) {
if (C->getZExtValue() <= 31)
weight = CW_Constant;
}
break;
case 'J':
- if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) {
if (C->getZExtValue() <= 63)
weight = CW_Constant;
}
break;
case 'K':
- if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) {
if ((C->getSExtValue() >= -0x80) && (C->getSExtValue() <= 0x7f))
weight = CW_Constant;
}
break;
case 'L':
- if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) {
if ((C->getZExtValue() == 0xff) || (C->getZExtValue() == 0xffff))
weight = CW_Constant;
}
break;
case 'M':
- if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) {
if (C->getZExtValue() <= 3)
weight = CW_Constant;
}
break;
case 'N':
- if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) {
if (C->getZExtValue() <= 0xff)
weight = CW_Constant;
}
@@ -54439,14 +55367,14 @@ TargetLowering::ConstraintWeight
}
break;
case 'e':
- if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) {
if ((C->getSExtValue() >= -0x80000000LL) &&
(C->getSExtValue() <= 0x7fffffffLL))
weight = CW_Constant;
}
break;
case 'Z':
- if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) {
if (C->getZExtValue() <= 0xffffffff)
weight = CW_Constant;
}
@@ -54511,7 +55439,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
switch (ConstraintLetter) {
default: break;
case 'I':
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
if (C->getZExtValue() <= 31) {
Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
Op.getValueType());
@@ -54520,7 +55448,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
}
return;
case 'J':
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
if (C->getZExtValue() <= 63) {
Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
Op.getValueType());
@@ -54529,7 +55457,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
}
return;
case 'K':
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
if (isInt<8>(C->getSExtValue())) {
Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
Op.getValueType());
@@ -54538,7 +55466,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
}
return;
case 'L':
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
if (C->getZExtValue() == 0xff || C->getZExtValue() == 0xffff ||
(Subtarget.is64Bit() && C->getZExtValue() == 0xffffffff)) {
Result = DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
@@ -54548,7 +55476,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
}
return;
case 'M':
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
if (C->getZExtValue() <= 3) {
Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
Op.getValueType());
@@ -54557,7 +55485,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
}
return;
case 'N':
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
if (C->getZExtValue() <= 255) {
Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
Op.getValueType());
@@ -54566,7 +55494,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
}
return;
case 'O':
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
if (C->getZExtValue() <= 127) {
Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
Op.getValueType());
@@ -54576,7 +55504,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
return;
case 'e': {
// 32-bit signed value
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()),
C->getSExtValue())) {
// Widen to 64 bits here to get it sign extended.
@@ -54590,7 +55518,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
}
case 'Z': {
// 32-bit unsigned value
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()),
C->getZExtValue())) {
Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
@@ -54604,7 +55532,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
}
case 'i': {
// Literal immediates are always ok.
- if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op)) {
+ if (auto *CST = dyn_cast<ConstantSDNode>(Op)) {
bool IsBool = CST->getConstantIntValue()->getBitWidth() == 1;
BooleanContent BCont = getBooleanContents(MVT::i64);
ISD::NodeType ExtOpc = IsBool ? getExtendForContent(BCont)
@@ -54617,8 +55545,9 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
// In any sort of PIC mode addresses need to be computed at runtime by
// adding in a register or some sort of table lookup. These can't
- // be used as immediates.
- if (Subtarget.isPICStyleGOT() || Subtarget.isPICStyleStubPIC())
+ // be used as immediates. BlockAddresses are fine though.
+ if ((Subtarget.isPICStyleGOT() || Subtarget.isPICStyleStubPIC()) &&
+ !isa<BlockAddressSDNode>(Op))
return;
// If we are in non-pic codegen mode, we allow the address of a global (with
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 3f6d567d3f4d..af110884049b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -249,9 +249,6 @@ namespace llvm {
SCALEFS,
SCALEFS_RND,
- // Unsigned Integer average.
- AVG,
-
/// Integer horizontal add/sub.
HADD,
HSUB,
@@ -790,6 +787,9 @@ namespace llvm {
LOR,
LXOR,
LAND,
+ LBTS,
+ LBTC,
+ LBTR,
// Load, scalar_to_vector, and zero extend.
VZEXT_LOAD,
@@ -1039,10 +1039,7 @@ namespace llvm {
bool isCtlzFast() const override;
- bool hasBitPreservingFPLogic(EVT VT) const override {
- return VT == MVT::f32 || VT == MVT::f64 || VT.isVector() ||
- (VT == MVT::f16 && X86ScalarSSEf16);
- }
+ bool hasBitPreservingFPLogic(EVT VT) const override;
bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
// If the pair to store is a mixture of float and int values, we will
@@ -1163,6 +1160,19 @@ namespace llvm {
APInt &UndefElts,
unsigned Depth) const override;
+ bool isTargetCanonicalConstantNode(SDValue Op) const override {
+ // Peek through bitcasts/extracts/inserts to see if we have a broadcast
+ // vector from memory.
+ while (Op.getOpcode() == ISD::BITCAST ||
+ Op.getOpcode() == ISD::EXTRACT_SUBVECTOR ||
+ (Op.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ Op.getOperand(0).isUndef()))
+ Op = Op.getOperand(Op.getOpcode() == ISD::INSERT_SUBVECTOR ? 1 : 0);
+
+ return Op.getOpcode() == X86ISD::VBROADCAST_LOAD ||
+ TargetLowering::isTargetCanonicalConstantNode(Op);
+ }
+
const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
SDValue unwrapAddress(SDValue N) const override;
@@ -1288,6 +1298,9 @@ namespace llvm {
/// from i32 to i8 but not from i32 to i16.
bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
+ bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
+ EVT VT) const override;
+
/// Given an intrinsic, checks if on the target the intrinsic will need to map
/// to a MemIntrinsicNode (touches memory). If this is the case, it returns
/// true and stores the intrinsic information into the IntrinsicInfo that was
@@ -1316,15 +1329,13 @@ namespace llvm {
/// Returns true if lowering to a jump table is allowed.
bool areJTsAllowed(const Function *Fn) const override;
+ MVT getPreferredSwitchConditionType(LLVMContext &Context,
+ EVT ConditionVT) const override;
+
/// If true, then instruction selection should
/// seek to shrink the FP constant of the specified type to a smaller type
/// in order to save space and / or reduce runtime.
- bool ShouldShrinkFPConstant(EVT VT) const override {
- // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
- // expensive than a straight movsd. On the other hand, it's important to
- // shrink long double fp constant since fldt is very slow.
- return !X86ScalarSSEf64 || VT == MVT::f80;
- }
+ bool ShouldShrinkFPConstant(EVT VT) const override;
/// Return true if we believe it is correct and profitable to reduce the
/// load node to a smaller type.
@@ -1333,11 +1344,7 @@ namespace llvm {
/// Return true if the specified scalar FP type is computed in an SSE
/// register, not on the X87 floating point stack.
- bool isScalarFPTypeInSSEReg(EVT VT) const {
- return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
- (VT == MVT::f32 && X86ScalarSSEf32) || // f32 is when SSE1
- (VT == MVT::f16 && X86ScalarSSEf16); // f16 is when AVX512FP16
- }
+ bool isScalarFPTypeInSSEReg(EVT VT) const;
/// Returns true if it is beneficial to convert a load of a constant
/// to just the constant itself.
@@ -1491,13 +1498,6 @@ namespace llvm {
/// make the right decision when generating code for different targets.
const X86Subtarget &Subtarget;
- /// Select between SSE or x87 floating point ops.
- /// When SSE is available, use it for f32 operations.
- /// When SSE2 is available, use it for f64 operations.
- bool X86ScalarSSEf32;
- bool X86ScalarSSEf64;
- bool X86ScalarSSEf16;
-
/// A list of legal FP immediates.
std::vector<APFloat> LegalFPImmediates;
@@ -1637,9 +1637,13 @@ namespace llvm {
TargetLoweringBase::AtomicExpansionKind
shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
- bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
+ TargetLoweringBase::AtomicExpansionKind
+ shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
TargetLoweringBase::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+ TargetLoweringBase::AtomicExpansionKind
+ shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const;
+ void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
LoadInst *
lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
@@ -1649,6 +1653,8 @@ namespace llvm {
bool needsCmpXchgNb(Type *MemType) const;
+ template<typename T> bool isSoftFP16(T VT) const;
+
void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
MachineBasicBlock *DispatchBB, int FI) const;
diff --git a/llvm/lib/Target/X86/X86IndirectThunks.cpp b/llvm/lib/Target/X86/X86IndirectThunks.cpp
index e08b4b7c03c6..001aa2dcb879 100644
--- a/llvm/lib/Target/X86/X86IndirectThunks.cpp
+++ b/llvm/lib/Target/X86/X86IndirectThunks.cpp
@@ -31,6 +31,7 @@
#include "X86Subtarget.h"
#include "llvm/CodeGen/IndirectThunks.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
diff --git a/llvm/lib/Target/X86/X86InsertPrefetch.cpp b/llvm/lib/Target/X86/X86InsertPrefetch.cpp
index 004e6fa5ebf4..08dc514a6476 100644
--- a/llvm/lib/Target/X86/X86InsertPrefetch.cpp
+++ b/llvm/lib/Target/X86/X86InsertPrefetch.cpp
@@ -23,6 +23,7 @@
#include "X86InstrInfo.h"
#include "X86MachineFunctionInfo.h"
#include "X86Subtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/ProfileData/SampleProf.h"
diff --git a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
index ff8710634e89..c098122685be 100644
--- a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
@@ -354,10 +354,9 @@ static Value *simplifyX86varShift(const IntrinsicInst &II,
// If the shift amount is guaranteed to be in-range we can replace it with a
// generic shift.
- APInt UpperBits =
- APInt::getHighBitsSet(BitWidth, BitWidth - Log2_32(BitWidth));
- if (llvm::MaskedValueIsZero(Amt, UpperBits,
- II.getModule()->getDataLayout())) {
+ KnownBits KnownAmt =
+ llvm::computeKnownBits(Amt, II.getModule()->getDataLayout());
+ if (KnownAmt.getMaxValue().ult(BitWidth)) {
return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt)
: Builder.CreateLShr(Vec, Amt))
: Builder.CreateAShr(Vec, Amt));
@@ -521,11 +520,10 @@ static Value *simplifyX86movmsk(const IntrinsicInst &II,
// %int = bitcast <16 x i1> %cmp to i16
// %res = zext i16 %int to i32
unsigned NumElts = ArgTy->getNumElements();
- Type *IntegerVecTy = VectorType::getInteger(ArgTy);
Type *IntegerTy = Builder.getIntNTy(NumElts);
- Value *Res = Builder.CreateBitCast(Arg, IntegerVecTy);
- Res = Builder.CreateICmpSLT(Res, Constant::getNullValue(IntegerVecTy));
+ Value *Res = Builder.CreateBitCast(Arg, VectorType::getInteger(ArgTy));
+ Res = Builder.CreateIsNeg(Res);
Res = Builder.CreateBitCast(Res, IntegerTy);
Res = Builder.CreateZExtOrTrunc(Res, ResTy);
return Res;
@@ -997,20 +995,18 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
return IC.replaceInstUsesWith(II, II.getArgOperand(0));
}
- if (MaskC->getValue().isShiftedMask()) {
+ unsigned MaskIdx, MaskLen;
+ if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) {
// any single contingous sequence of 1s anywhere in the mask simply
// describes a subset of the input bits shifted to the appropriate
// position. Replace with the straight forward IR.
- unsigned ShiftAmount = MaskC->getValue().countTrailingZeros();
Value *Input = II.getArgOperand(0);
Value *Masked = IC.Builder.CreateAnd(Input, II.getArgOperand(1));
- Value *Shifted = IC.Builder.CreateLShr(Masked,
- ConstantInt::get(II.getType(),
- ShiftAmount));
+ Value *ShiftAmt = ConstantInt::get(II.getType(), MaskIdx);
+ Value *Shifted = IC.Builder.CreateLShr(Masked, ShiftAmt);
return IC.replaceInstUsesWith(II, Shifted);
}
-
if (auto *SrcC = dyn_cast<ConstantInt>(II.getArgOperand(0))) {
uint64_t Src = SrcC->getZExtValue();
uint64_t Mask = MaskC->getZExtValue();
@@ -1042,15 +1038,15 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
if (MaskC->isAllOnesValue()) {
return IC.replaceInstUsesWith(II, II.getArgOperand(0));
}
- if (MaskC->getValue().isShiftedMask()) {
+
+ unsigned MaskIdx, MaskLen;
+ if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) {
// any single contingous sequence of 1s anywhere in the mask simply
// describes a subset of the input bits shifted to the appropriate
// position. Replace with the straight forward IR.
- unsigned ShiftAmount = MaskC->getValue().countTrailingZeros();
Value *Input = II.getArgOperand(0);
- Value *Shifted = IC.Builder.CreateShl(Input,
- ConstantInt::get(II.getType(),
- ShiftAmount));
+ Value *ShiftAmt = ConstantInt::get(II.getType(), MaskIdx);
+ Value *Shifted = IC.Builder.CreateShl(Input, ShiftAmt);
Value *Masked = IC.Builder.CreateAnd(Shifted, II.getArgOperand(1));
return IC.replaceInstUsesWith(II, Masked);
}
@@ -1934,6 +1930,23 @@ Optional<Value *> X86TTIImpl::simplifyDemandedVectorEltsIntrinsic(
break;
}
+ // General per-element vector operations.
+ case Intrinsic::x86_avx2_psllv_d:
+ case Intrinsic::x86_avx2_psllv_d_256:
+ case Intrinsic::x86_avx2_psllv_q:
+ case Intrinsic::x86_avx2_psllv_q_256:
+ case Intrinsic::x86_avx2_psrlv_d:
+ case Intrinsic::x86_avx2_psrlv_d_256:
+ case Intrinsic::x86_avx2_psrlv_q:
+ case Intrinsic::x86_avx2_psrlv_q_256:
+ case Intrinsic::x86_avx2_psrav_d:
+ case Intrinsic::x86_avx2_psrav_d_256: {
+ simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
+ simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
+ UndefElts &= UndefElts2;
+ break;
+ }
+
case Intrinsic::x86_sse2_packssdw_128:
case Intrinsic::x86_sse2_packsswb_128:
case Intrinsic::x86_sse2_packuswb_128:
diff --git a/llvm/lib/Target/X86/X86InstrAMX.td b/llvm/lib/Target/X86/X86InstrAMX.td
index d825981a6b36..5da06bc87b06 100644
--- a/llvm/lib/Target/X86/X86InstrAMX.td
+++ b/llvm/lib/Target/X86/X86InstrAMX.td
@@ -48,18 +48,23 @@ let Predicates = [HasAMXTILE, In64BitMode] in {
VEX, T8XD;
// Pseduo instruction for RA.
- def PLDTILECFGV : PseudoI<(outs), (ins opaquemem:$src),
- [(int_x86_ldtilecfg_internal addr:$src)]>;
+ let isPseudo = true, mayLoad = 1, hasSideEffects = 1,
+ Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
+ def PLDTILECFGV : PseudoI<(outs), (ins opaquemem:$src), []>;
+ let isPseudo = true, mayLoad = 1 in
def PTILELOADDV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
GR16:$src2,
opaquemem:$src3), []>;
+ let isPseudo = true, mayLoad = 1 in
def PTILELOADDT1V : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
GR16:$src2,
opaquemem:$src3), []>;
+ let isPseudo = true, mayStore = 1 in
def PTILESTOREDV : PseudoI<(outs), (ins GR16:$src1,
GR16:$src2, opaquemem:$src3,
TILE:$src4), []>;
- let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1 in
+ let isPseudo = true, isReMaterializable = 1, isAsCheapAsAMove = 1,
+ canFoldAsLoad = 1 in
def PTILEZEROV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2),
[(set TILE:$dst, (int_x86_tilezero_internal
GR16:$src1, GR16:$src2))]>;
@@ -67,9 +72,12 @@ let Predicates = [HasAMXTILE, In64BitMode] in {
let usesCustomInserter = 1 in {
// Pseudo instructions, using immediates instead of tile registers.
// To be translated to the actual instructions in X86ISelLowering.cpp
+ let mayLoad = 1 in
def PTILELOADD : PseudoI<(outs), (ins u8imm:$src1, sibmem:$src2), []>;
+ let mayLoad = 1 in
def PTILELOADDT1 : PseudoI<(outs), (ins u8imm:$src1,
sibmem:$src2), []>;
+ let mayStore = 1 in
def PTILESTORED : PseudoI<(outs), (ins i8mem:$dst, u8imm:$src), []>;
def PTILEZERO : PseudoI<(outs), (ins u8imm:$src),
[(int_x86_tilezero timm:$src)]>;
@@ -99,7 +107,7 @@ let Predicates = [HasAMXINT8, In64BitMode] in {
}
// Pseduo instruction for RA.
- let Constraints = "$src4 = $dst" in {
+ let isPseudo = true, Constraints = "$src4 = $dst" in {
def PTDPBSSDV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
GR16:$src2, GR16:$src3, TILE:$src4,
TILE:$src5, TILE:$src6),
@@ -158,7 +166,7 @@ let Predicates = [HasAMXBF16, In64BitMode] in {
[]>, VEX_4V, T8XS;
// Pseduo instruction for RA.
- let Constraints = "$src4 = $dst" in
+ let isPseudo = true, Constraints = "$src4 = $dst" in
def PTDPBF16PSV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
GR16:$src2, GR16:$src3, TILE:$src4,
TILE:$src5, TILE:$src6),
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index bc67d1f89d7f..48da7b3ac882 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -476,6 +476,7 @@ let Predicates = [HasAVX512] in {
def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>;
def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>;
def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
+def : Pat<(v32f16 immAllZerosV), (AVX512_512_SET0)>;
def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>;
def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
}
@@ -508,25 +509,23 @@ let Predicates = [HasAVX512] in {
def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>;
def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>;
def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>;
+def : Pat<(v8f16 immAllZerosV), (AVX512_128_SET0)>;
def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>;
def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>;
def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>;
def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>;
def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>;
+def : Pat<(v16f16 immAllZerosV), (AVX512_256_SET0)>;
def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>;
def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>;
}
-let Predicates = [HasFP16] in {
-def : Pat<(v8f16 immAllZerosV), (AVX512_128_SET0)>;
-def : Pat<(v16f16 immAllZerosV), (AVX512_256_SET0)>;
-def : Pat<(v32f16 immAllZerosV), (AVX512_512_SET0)>;
-}
-
// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
// This is expanded by ExpandPostRAPseudos.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
+ def AVX512_FsFLD0SH : I<0, Pseudo, (outs FR16X:$dst), (ins), "",
+ [(set FR16X:$dst, fp16imm0)]>;
def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
[(set FR32X:$dst, fp32imm0)]>;
def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
@@ -535,12 +534,6 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
[(set VR128X:$dst, fp128imm0)]>;
}
-let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasFP16] in {
- def AVX512_FsFLD0SH : I<0, Pseudo, (outs FR16X:$dst), (ins), "",
- [(set FR16X:$dst, fp16imm0)]>;
-}
-
//===----------------------------------------------------------------------===//
// AVX-512 - VECTOR INSERT
//
@@ -678,21 +671,21 @@ defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v8f16x_info, v16f16x_info,
- vinsert128_insert, INSERT_get_vinsert128_imm, [HasFP16, HasVLX]>;
+ vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
// Codegen pattern with the alternative types insert VEC128 into VEC512
defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v8f16x_info, v32f16_info,
- vinsert128_insert, INSERT_get_vinsert128_imm, [HasFP16]>;
+ vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
// Codegen pattern with the alternative types insert VEC256 into VEC512
defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v16f16x_info, v32f16_info,
- vinsert256_insert, INSERT_get_vinsert256_imm, [HasFP16]>;
+ vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
@@ -979,7 +972,7 @@ defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info
defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v16f16x_info, v8f16x_info,
- vextract128_extract, EXTRACT_get_vextract128_imm, [HasFP16, HasVLX]>;
+ vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
// Codegen pattern with the alternative types extract VEC128 from VEC512
defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
@@ -987,14 +980,14 @@ defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v32f16_info, v8f16x_info,
- vextract128_extract, EXTRACT_get_vextract128_imm, [HasFP16]>;
+ vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
// Codegen pattern with the alternative types extract VEC256 from VEC512
defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32f16_info, v16f16x_info,
- vextract256_extract, EXTRACT_get_vextract256_imm, [HasFP16]>;
+ vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
@@ -1020,6 +1013,10 @@ def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
(v8i16 (VEXTRACTI128rr
(v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
(iPTR 1)))>;
+def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))),
+ (v8f16 (VEXTRACTF128rr
+ (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)),
+ (iPTR 1)))>;
def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
(v16i8 (VEXTRACTI128rr
(v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
@@ -1049,18 +1046,16 @@ def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
(v8i16 (VEXTRACTI32x4Z256rr
(v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
(iPTR 1)))>;
+def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))),
+ (v8f16 (VEXTRACTF32x4Z256rr
+ (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)),
+ (iPTR 1)))>;
def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
(v16i8 (VEXTRACTI32x4Z256rr
(v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
(iPTR 1)))>;
}
-let Predicates = [HasFP16, HasVLX] in
-def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))),
- (v8f16 (VEXTRACTF32x4Z256rr
- (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)),
- (iPTR 1)))>;
-
// Additional patterns for handling a bitcast between the vselect and the
// extract_subvector.
@@ -1478,7 +1473,7 @@ multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
Sched<[SchedWriteShuffle.YMM.Folded]>,
AVX5128IBase, EVEX;
}
-let Predicates = [HasFP16] in {
+let Predicates = [HasBWI] in {
def : Pat<(v32f16 (X86VBroadcastld16 addr:$src)),
(VPBROADCASTWZrm addr:$src)>;
@@ -1487,7 +1482,7 @@ let Predicates = [HasFP16] in {
def : Pat<(v32f16 (X86VBroadcast (f16 FR16X:$src))),
(VPBROADCASTWZrr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
}
-let Predicates = [HasVLX, HasFP16] in {
+let Predicates = [HasVLX, HasBWI] in {
def : Pat<(v8f16 (X86VBroadcastld16 addr:$src)),
(VPBROADCASTWZ128rm addr:$src)>;
def : Pat<(v16f16 (X86VBroadcastld16 addr:$src)),
@@ -3763,6 +3758,9 @@ let Predicates = [HasBWI, NoVLX] in {
defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
+
+ defm : mask_move_lowering<"VMOVDQU16Z", v8f16x_info, v32f16_info>;
+ defm : mask_move_lowering<"VMOVDQU16Z", v16f16x_info, v32f16_info>;
}
let Predicates = [HasAVX512] in {
@@ -3852,7 +3850,7 @@ let Predicates = [HasVLX] in {
def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
(VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
}
-let Predicates = [HasFP16] in {
+let Predicates = [HasBWI] in {
def : Pat<(v32f16 (vselect VK32WM:$mask, (v32f16 VR512:$src1), (v32f16 VR512:$src0))),
(VMOVDQU16Zrrk VR512:$src0, VK32WM:$mask, VR512:$src1)>;
def : Pat<(v32f16 (vselect VK32WM:$mask, (v32f16 VR512:$src1), v32f16_info.ImmAllZerosV)),
@@ -3887,7 +3885,7 @@ let Predicates = [HasFP16] in {
def : Pat<(masked_store (v32f16 VR512:$src), addr:$dst, VK32WM:$mask),
(VMOVDQU16Zmrk addr:$dst, VK32WM:$mask, VR512:$src)>;
}
-let Predicates = [HasFP16, HasVLX] in {
+let Predicates = [HasBWI, HasVLX] in {
def : Pat<(v16f16 (vselect VK16WM:$mask, (v16f16 VR256X:$src1), (v16f16 VR256X:$src0))),
(VMOVDQU16Z256rrk VR256X:$src0, VK16WM:$mask, VR256X:$src1)>;
def : Pat<(v16f16 (vselect VK16WM:$mask, (v16f16 VR256X:$src1), v16f16x_info.ImmAllZerosV)),
@@ -4099,14 +4097,14 @@ def : Pat<(f64 (bitconvert VK64:$src)),
//===----------------------------------------------------------------------===//
multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
- X86VectorVTInfo _,
- list<Predicate> prd = [HasAVX512, OptForSize]> {
- let Predicates = prd in
+ X86VectorVTInfo _, Predicate prd = HasAVX512> {
+ let Predicates = !if (!eq (prd, HasFP16), [HasFP16], [prd, OptForSize]) in
def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
_.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
+ let Predicates = [prd] in {
def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
!strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
@@ -4159,6 +4157,7 @@ multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
!strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
[], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>,
NotMemoryFoldable;
+ }
}
defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
@@ -4168,7 +4167,7 @@ defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
defm VMOVSHZ : avx512_move_scalar<"vmovsh", X86Movsh, X86vzload16, f16x_info,
- [HasFP16]>,
+ HasFP16>,
VEX_LIG, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
@@ -4338,14 +4337,9 @@ def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
addr:$srcAddr)>;
}
-defm : avx512_move_scalar_lowering<"VMOVSHZ", X86Movsh, fp16imm0, v8f16x_info>;
defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
-defm : avx512_store_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
- (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
-defm : avx512_store_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
- (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
(v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
@@ -4353,6 +4347,12 @@ defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
(v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
+let Predicates = [HasFP16] in {
+defm : avx512_move_scalar_lowering<"VMOVSHZ", X86Movsh, fp16imm0, v8f16x_info>;
+defm : avx512_store_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
+ (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
+defm : avx512_store_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
+ (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
defm : avx512_store_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
(v32i1 (insert_subvector
(v32i1 immAllZerosV),
@@ -4360,6 +4360,30 @@ defm : avx512_store_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
(iPTR 0))),
(v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
GR8, sub_8bit>;
+
+defm : avx512_load_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
+ (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
+defm : avx512_load_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
+ (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
+defm : avx512_load_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
+ (v32i1 (insert_subvector
+ (v32i1 immAllZerosV),
+ (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
+ (iPTR 0))),
+ (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
+ GR8, sub_8bit>;
+
+def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), (f16 FR16X:$src2))),
+ (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrk
+ (v8f16 (COPY_TO_REGCLASS FR16X:$src2, VR128X)),
+ VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
+ (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
+
+def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), fp16imm0)),
+ (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrkz VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
+ (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
+}
+
defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
(v16i1 (insert_subvector
(v16i1 immAllZerosV),
@@ -4385,10 +4409,6 @@ defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
(v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
(iPTR 0))), GR8, sub_8bit>;
-defm : avx512_load_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
- (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
-defm : avx512_load_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
- (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
(v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
@@ -4396,13 +4416,6 @@ defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
(v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
-defm : avx512_load_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
- (v32i1 (insert_subvector
- (v32i1 immAllZerosV),
- (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
- (iPTR 0))),
- (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
- GR8, sub_8bit>;
defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
(v16i1 (insert_subvector
(v16i1 immAllZerosV),
@@ -4428,16 +4441,6 @@ defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
(v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
(iPTR 0))), GR8, sub_8bit>;
-def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), (f16 FR16X:$src2))),
- (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrk
- (v8f16 (COPY_TO_REGCLASS FR16X:$src2, VR128X)),
- VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
- (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
-
-def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), fp16imm0)),
- (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrkz VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
- (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
-
def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
(COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
(v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
@@ -5039,7 +5042,7 @@ defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
HasBWI, 1>;
defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
SchedWriteVecIMul, HasBWI, 1>, T8PD;
-defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
+defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", avgceilu,
SchedWriteVecALU, HasBWI, 1>;
defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
SchedWriteVecIMul, HasAVX512, 1>, T8PD;
@@ -11651,6 +11654,14 @@ defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
+// Always select FP16 instructions if available.
+let Predicates = [HasBWI], AddedComplexity = -10 in {
+ def : Pat<(f16 (load addr:$src)), (COPY_TO_REGCLASS (VPINSRWZrm (v8i16 (IMPLICIT_DEF)), addr:$src, 0), FR16X)>;
+ def : Pat<(store f16:$src, addr:$dst), (VPEXTRWZmr addr:$dst, (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0)>;
+ def : Pat<(i16 (bitconvert f16:$src)), (EXTRACT_SUBREG (VPEXTRWZrr (v8i16 (COPY_TO_REGCLASS FR16X:$src, VR128X)), 0), sub_16bit)>;
+ def : Pat<(f16 (bitconvert i16:$src)), (COPY_TO_REGCLASS (VPINSRWZrr (v8i16 (IMPLICIT_DEF)), (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit), 0), FR16X)>;
+}
+
//===----------------------------------------------------------------------===//
// VSHUFPS - VSHUFPD Operations
//===----------------------------------------------------------------------===//
@@ -12988,7 +12999,6 @@ def : Pat<(i16 (bitconvert FR16X:$src)),
sub_16bit))>;
def : Pat<(i16 (extractelt (v8i16 VR128X:$src), (iPTR 0))),
(i16 (EXTRACT_SUBREG (VMOVSH2Wrr VR128X:$src), sub_16bit))>;
-}
// Allow "vmovw" to use GR64
let hasSideEffects = 0 in {
@@ -12997,6 +13007,7 @@ let hasSideEffects = 0 in {
def VMOVSHtoW64rr : AVX512<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
"vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>;
}
+}
// Convert 16-bit float to i16/u16
multiclass avx512_cvtph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td
index 8337d2b37383..f08ecdf6afc9 100644
--- a/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -541,7 +541,7 @@ class X86TypeInfo<ValueType vt, string instrsuffix, RegisterClass regclass,
Operand immoperand, SDPatternOperator immoperator,
Operand imm8operand, SDPatternOperator imm8operator,
bit hasOddOpcode, OperandSize opSize,
- bit hasREX_WPrefix> {
+ bit hasREX_W> {
/// VT - This is the value type itself.
ValueType VT = vt;
@@ -596,9 +596,9 @@ class X86TypeInfo<ValueType vt, string instrsuffix, RegisterClass regclass,
/// to Opsize16. i32 sets this to OpSize32.
OperandSize OpSize = opSize;
- /// HasREX_WPrefix - This bit is set to true if the instruction should have
+ /// HasREX_W - This bit is set to true if the instruction should have
/// the 0x40 REX prefix. This is set for i64 types.
- bit HasREX_WPrefix = hasREX_WPrefix;
+ bit HasREX_W = hasREX_W;
}
def invalid_node : SDNode<"<<invalid_node>>", SDTIntLeaf,[],"<<invalid_node>>">;
@@ -634,7 +634,7 @@ class ITy<bits<8> opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins,
// Infer instruction prefixes from type info.
let OpSize = typeinfo.OpSize;
- let hasREX_WPrefix = typeinfo.HasREX_WPrefix;
+ let hasREX_W = typeinfo.HasREX_W;
}
// BinOpRR - Instructions like "add reg, reg, reg".
diff --git a/llvm/lib/Target/X86/X86InstrCMovSetCC.td b/llvm/lib/Target/X86/X86InstrCMovSetCC.td
index 330b8c7a8a43..79ac2a2d8019 100644
--- a/llvm/lib/Target/X86/X86InstrCMovSetCC.td
+++ b/llvm/lib/Target/X86/X86InstrCMovSetCC.td
@@ -14,7 +14,7 @@
// CMOV instructions.
let isCodeGenOnly = 1, ForceDisassemble = 1 in {
-let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
+let Uses = [EFLAGS], Predicates = [HasCMOV], Constraints = "$src1 = $dst",
isCommutable = 1, SchedRW = [WriteCMOV] in {
def CMOV16rr
: I<0x40, MRMSrcRegCC, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, ccode:$cond),
@@ -35,7 +35,7 @@ let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
(X86cmov GR64:$src1, GR64:$src2, timm:$cond, EFLAGS))]>, TB;
}
-let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
+let Uses = [EFLAGS], Predicates = [HasCMOV], Constraints = "$src1 = $dst",
SchedRW = [WriteCMOV.Folded, WriteCMOV.ReadAfterFold] in {
def CMOV16rm
: I<0x40, MRMSrcMemCC, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2, ccode:$cond),
@@ -52,7 +52,7 @@ let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
"cmov${cond}{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
timm:$cond, EFLAGS))]>, TB;
-} // Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst"
+} // Uses = [EFLAGS], Predicates = [HasCMOV], Constraints = "$src1 = $dst"
} // isCodeGenOnly = 1, ForceDisassemble = 1
def inv_cond_XFORM : SDNodeXForm<imm, [{
@@ -63,7 +63,7 @@ def inv_cond_XFORM : SDNodeXForm<imm, [{
// Conditional moves with folded loads with operands swapped and conditions
// inverted.
-let Predicates = [HasCMov] in {
+let Predicates = [HasCMOV] in {
def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, timm:$cond, EFLAGS),
(CMOV16rm GR16:$src2, addr:$src1, (inv_cond_XFORM timm:$cond))>;
def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, timm:$cond, EFLAGS),
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index 7288ce812138..a55b95960aa6 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -544,10 +544,10 @@ let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Uses = [EFLAGS] in {
// i8 register pressure.
defm _GR8 : CMOVrr_PSEUDO<GR8, i8>;
- let Predicates = [NoCMov] in {
+ let Predicates = [NoCMOV] in {
defm _GR32 : CMOVrr_PSEUDO<GR32, i32>;
defm _GR16 : CMOVrr_PSEUDO<GR16, i16>;
- } // Predicates = [NoCMov]
+ } // Predicates = [NoCMOV]
// fcmov doesn't handle all possible EFLAGS, provide a fallback if there is no
// SSE1/SSE2.
@@ -562,12 +562,14 @@ let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Uses = [EFLAGS] in {
let Predicates = [HasMMX] in
defm _VR64 : CMOVrr_PSEUDO<VR64, x86mmx>;
- defm _FR16X : CMOVrr_PSEUDO<FR16X, f16>;
let Predicates = [HasSSE1,NoAVX512] in
defm _FR32 : CMOVrr_PSEUDO<FR32, f32>;
- let Predicates = [HasSSE2,NoAVX512] in
+ let Predicates = [HasSSE2,NoAVX512] in {
+ defm _FR16 : CMOVrr_PSEUDO<FR16, f16>;
defm _FR64 : CMOVrr_PSEUDO<FR64, f64>;
+ }
let Predicates = [HasAVX512] in {
+ defm _FR16X : CMOVrr_PSEUDO<FR16X, f16>;
defm _FR32X : CMOVrr_PSEUDO<FR32X, f32>;
defm _FR64X : CMOVrr_PSEUDO<FR64X, f64>;
}
@@ -670,7 +672,7 @@ def OR32mi8Locked : Ii8<0x83, MRM1m, (outs), (ins i32mem:$dst, i32i8imm:$zero),
Requires<[Not64BitMode]>, OpSize32, LOCK,
Sched<[WriteALURMW]>;
-let hasSideEffects = 1 in
+let hasSideEffects = 1, isMeta = 1 in
def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
"#MEMBARRIER",
[(X86MemBarrier)]>, Sched<[WriteLoad]>;
@@ -839,6 +841,38 @@ let Predicates = [UseIncDec] in {
def : Pat<(X86lock_sub addr:$dst, (i64 -1)), (LOCK_INC64m addr:$dst)>;
}
+// Atomic bit test.
+def X86LBTest : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisPtrTy<1>,
+ SDTCisVT<2, i8>, SDTCisVT<3, i32>]>;
+def x86bts : SDNode<"X86ISD::LBTS", X86LBTest,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def x86btc : SDNode<"X86ISD::LBTC", X86LBTest,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+def x86btr : SDNode<"X86ISD::LBTR", X86LBTest,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
+
+multiclass ATOMIC_LOGIC_OP<Format Form, string s> {
+ let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
+ SchedRW = [WriteBitTestSetRegRMW] in {
+ def 16m : Ii8<0xBA, Form, (outs), (ins i16mem:$src1, i8imm:$src2),
+ !strconcat(s, "{w}\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (!cast<SDNode>("x86" # s) addr:$src1, timm:$src2, (i32 16)))]>,
+ OpSize16, TB, LOCK;
+ def 32m : Ii8<0xBA, Form, (outs), (ins i32mem:$src1, i8imm:$src2),
+ !strconcat(s, "{l}\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (!cast<SDNode>("x86" # s) addr:$src1, timm:$src2, (i32 32)))]>,
+ OpSize32, TB, LOCK;
+ def 64m : RIi8<0xBA, Form, (outs), (ins i64mem:$src1, i8imm:$src2),
+ !strconcat(s, "{q}\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (!cast<SDNode>("x86" # s) addr:$src1, timm:$src2, (i32 64)))]>,
+ TB, LOCK;
+ }
+}
+
+defm LOCK_BTS : ATOMIC_LOGIC_OP<MRM5m, "bts">;
+defm LOCK_BTC : ATOMIC_LOGIC_OP<MRM7m, "btc">;
+defm LOCK_BTR : ATOMIC_LOGIC_OP<MRM6m, "btr">;
+
// Atomic compare and swap.
multiclass LCMPXCHG_BinOp<bits<8> Opc8, bits<8> Opc, Format Form,
string mnemonic, SDPatternOperator frag> {
@@ -863,7 +897,7 @@ let isCodeGenOnly = 1, SchedRW = [WriteCMPXCHGRMW] in {
}
let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX],
- Predicates = [HasCmpxchg8b], SchedRW = [WriteCMPXCHGRMW],
+ Predicates = [HasCX8], SchedRW = [WriteCMPXCHGRMW],
isCodeGenOnly = 1, usesCustomInserter = 1 in {
def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr),
"cmpxchg8b\t$ptr",
@@ -871,7 +905,7 @@ def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr),
}
let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX],
- Predicates = [HasCmpxchg16b,In64BitMode], SchedRW = [WriteCMPXCHGRMW],
+ Predicates = [HasCX16,In64BitMode], SchedRW = [WriteCMPXCHGRMW],
isCodeGenOnly = 1, mayLoad = 1, mayStore = 1, hasSideEffects = 0 in {
def LCMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$ptr),
"cmpxchg16b\t$ptr",
@@ -898,7 +932,7 @@ def LCMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$ptr),
// the instruction and we are sure we will have a valid register to restore
// the value of RBX.
let Defs = [RAX, RDX, RBX, EFLAGS], Uses = [RAX, RCX, RDX],
- Predicates = [HasCmpxchg16b,In64BitMode], SchedRW = [WriteCMPXCHGRMW],
+ Predicates = [HasCX16,In64BitMode], SchedRW = [WriteCMPXCHGRMW],
isCodeGenOnly = 1, isPseudo = 1,
mayLoad = 1, mayStore = 1, hasSideEffects = 0,
Constraints = "$rbx_save = $dst" in {
@@ -910,7 +944,7 @@ def LCMPXCHG16B_SAVE_RBX :
// Pseudo instruction that doesn't read/write RBX. Will be turned into either
// LCMPXCHG16B_SAVE_RBX or LCMPXCHG16B via a custom inserter.
let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RCX, RDX],
- Predicates = [HasCmpxchg16b,In64BitMode], SchedRW = [WriteCMPXCHGRMW],
+ Predicates = [HasCX16,In64BitMode], SchedRW = [WriteCMPXCHGRMW],
isCodeGenOnly = 1, isPseudo = 1,
mayLoad = 1, mayStore = 1, hasSideEffects = 0,
usesCustomInserter = 1 in {
@@ -1235,6 +1269,21 @@ def X86tcret_6regs : PatFrag<(ops node:$ptr, node:$off),
return true;
}]>;
+def X86tcret_1reg : PatFrag<(ops node:$ptr, node:$off),
+ (X86tcret node:$ptr, node:$off), [{
+ // X86tcret args: (*chain, ptr, imm, regs..., glue)
+ unsigned NumRegs = 1;
+ const SDValue& BasePtr = cast<LoadSDNode>(N->getOperand(1))->getBasePtr();
+ if (isa<FrameIndexSDNode>(BasePtr))
+ NumRegs = 3;
+ else if (BasePtr->getNumOperands() && isa<GlobalAddressSDNode>(BasePtr->getOperand(0)))
+ NumRegs = 3;
+ for (unsigned i = 3, e = N->getNumOperands(); i != e; ++i)
+ if (isa<RegisterSDNode>(N->getOperand(i)) && ( NumRegs-- == 0))
+ return false;
+ return true;
+}]>;
+
def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off),
(TCRETURNri ptr_rc_tailcall:$dst, timm:$off)>,
Requires<[Not64BitMode, NotUseIndirectThunkCalls]>;
@@ -1242,7 +1291,8 @@ def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off),
// FIXME: This is disabled for 32-bit PIC mode because the global base
// register which is part of the address mode may be assigned a
// callee-saved register.
-def : Pat<(X86tcret (load addr:$dst), timm:$off),
+// Similar to X86tcret_6regs, here we only have 1 register left
+def : Pat<(X86tcret_1reg (load addr:$dst), timm:$off),
(TCRETURNmi addr:$dst, timm:$off)>,
Requires<[Not64BitMode, IsNotPIC, NotUseIndirectThunkCalls]>;
@@ -1467,6 +1517,21 @@ def ADD64ri32_DB : I<0, Pseudo,
} // AddedComplexity, SchedRW
//===----------------------------------------------------------------------===//
+// Pattern match XOR as ADD
+//===----------------------------------------------------------------------===//
+
+// Prefer to pattern match XOR with min_signed_value as ADD at isel time.
+// ADD can be 3-addressified into an LEA instruction to avoid copies.
+let AddedComplexity = 5 in {
+def : Pat<(xor GR8:$src1, -128),
+ (ADD8ri GR8:$src1, -128)>;
+def : Pat<(xor GR16:$src1, -32768),
+ (ADD16ri GR16:$src1, -32768)>;
+def : Pat<(xor GR32:$src1, -2147483648),
+ (ADD32ri GR32:$src1, -2147483648)>;
+}
+
+//===----------------------------------------------------------------------===//
// Pattern match SUB as XOR
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/X86/X86InstrControl.td b/llvm/lib/Target/X86/X86InstrControl.td
index 6d969962afff..aa89a6f0ff9d 100644
--- a/llvm/lib/Target/X86/X86InstrControl.td
+++ b/llvm/lib/Target/X86/X86InstrControl.td
@@ -147,7 +147,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
// Win64 wants indirect jumps leaving the function to have a REX_W prefix.
// These are switched from TAILJMPr/m64_REX in MCInstLower.
- let isCodeGenOnly = 1, hasREX_WPrefix = 1 in {
+ let isCodeGenOnly = 1, hasREX_W = 1 in {
def JMP64r_REX : I<0xFF, MRM4r, (outs), (ins GR64:$dst),
"rex64 jmp{q}\t{*}$dst", []>, Sched<[WriteJump]>;
let mayLoad = 1 in
@@ -384,7 +384,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
[]>, Sched<[WriteJumpLd]>;
// Win64 wants indirect jumps leaving the function to have a REX_W prefix.
- let hasREX_WPrefix = 1 in {
+ let hasREX_W = 1 in {
def TAILJMPr64_REX : PseudoI<(outs), (ins ptr_rc_tailcall:$dst),
[]>, Sched<[WriteJump]>;
diff --git a/llvm/lib/Target/X86/X86InstrFPStack.td b/llvm/lib/Target/X86/X86InstrFPStack.td
index e310f369be08..a68d61043c5c 100644
--- a/llvm/lib/Target/X86/X86InstrFPStack.td
+++ b/llvm/lib/Target/X86/X86InstrFPStack.td
@@ -423,9 +423,9 @@ def FBSTPm : FPI<0xDF, MRM6m, (outs), (ins f80mem:$dst), "fbstp\t$dst">;
// Floating point cmovs.
class FpIf32CMov<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
- FpI_<outs, ins, fp, pattern>, Requires<[FPStackf32, HasCMov]>;
+ FpI_<outs, ins, fp, pattern>, Requires<[FPStackf32, HasCMOV]>;
class FpIf64CMov<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
- FpI_<outs, ins, fp, pattern>, Requires<[FPStackf64, HasCMov]>;
+ FpI_<outs, ins, fp, pattern>, Requires<[FPStackf64, HasCMOV]>;
multiclass FPCMov<PatLeaf cc> {
def _Fp32 : FpIf32CMov<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2),
@@ -440,7 +440,7 @@ multiclass FPCMov<PatLeaf cc> {
CondMovFP,
[(set RFP80:$dst, (X86cmov RFP80:$src1, RFP80:$src2,
cc, EFLAGS))]>,
- Requires<[HasCMov]>;
+ Requires<[HasCMOV]>;
}
let SchedRW = [WriteFCMOV] in {
@@ -455,7 +455,7 @@ defm CMOVNE : FPCMov<X86_COND_NE>;
defm CMOVNP : FPCMov<X86_COND_NP>;
} // Uses = [EFLAGS], Constraints = "$src1 = $dst"
-let Predicates = [HasCMov] in {
+let Predicates = [HasCMOV] in {
// These are not factored because there's no clean way to pass DA/DB.
def CMOVB_F : FPI<0xDA, MRM0r, (outs), (ins RSTi:$op),
"fcmovb\t{$op, %st|st, $op}">;
@@ -473,7 +473,7 @@ def CMOVNE_F : FPI<0xDB, MRM1r, (outs), (ins RSTi:$op),
"fcmovne\t{$op, %st|st, $op}">;
def CMOVNP_F : FPI<0xDB, MRM3r, (outs), (ins RSTi:$op),
"fcmovnu\t{$op, %st|st, $op}">;
-} // Predicates = [HasCMov]
+} // Predicates = [HasCMOV]
} // SchedRW
let mayRaiseFPException = 1 in {
@@ -664,22 +664,22 @@ let SchedRW = [WriteFCom], mayRaiseFPException = 1 in {
let Defs = [EFLAGS, FPSW], Uses = [FPCW] in {
def UCOM_FpIr32: FpI_<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
[(set EFLAGS, (X86any_fcmp RFP32:$lhs, RFP32:$rhs))]>,
- Requires<[FPStackf32, HasCMov]>;
+ Requires<[FPStackf32, HasCMOV]>;
def UCOM_FpIr64: FpI_<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
[(set EFLAGS, (X86any_fcmp RFP64:$lhs, RFP64:$rhs))]>,
- Requires<[FPStackf64, HasCMov]>;
+ Requires<[FPStackf64, HasCMOV]>;
def UCOM_FpIr80: FpI_<(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
[(set EFLAGS, (X86any_fcmp RFP80:$lhs, RFP80:$rhs))]>,
- Requires<[HasCMov]>;
+ Requires<[HasCMOV]>;
def COM_FpIr32: FpI_<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
[(set EFLAGS, (X86strict_fcmps RFP32:$lhs, RFP32:$rhs))]>,
- Requires<[FPStackf32, HasCMov]>;
+ Requires<[FPStackf32, HasCMOV]>;
def COM_FpIr64: FpI_<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
[(set EFLAGS, (X86strict_fcmps RFP64:$lhs, RFP64:$rhs))]>,
- Requires<[FPStackf64, HasCMov]>;
+ Requires<[FPStackf64, HasCMOV]>;
def COM_FpIr80: FpI_<(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
[(set EFLAGS, (X86strict_fcmps RFP80:$lhs, RFP80:$rhs))]>,
- Requires<[HasCMov]>;
+ Requires<[HasCMOV]>;
}
let Uses = [ST0, FPCW] in {
diff --git a/llvm/lib/Target/X86/X86InstrFoldTables.cpp b/llvm/lib/Target/X86/X86InstrFoldTables.cpp
index 226349485238..27220a8d4d99 100644
--- a/llvm/lib/Target/X86/X86InstrFoldTables.cpp
+++ b/llvm/lib/Target/X86/X86InstrFoldTables.cpp
@@ -292,8 +292,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable0[] = {
{ X86::JMP32r_NT, X86::JMP32m_NT, TB_FOLDED_LOAD },
{ X86::JMP64r, X86::JMP64m, TB_FOLDED_LOAD },
{ X86::JMP64r_NT, X86::JMP64m_NT, TB_FOLDED_LOAD },
- { X86::MMX_MOVD64from64rr, X86::MMX_MOVD64from64rm, TB_FOLDED_STORE | TB_NO_REVERSE },
- { X86::MMX_MOVD64grr, X86::MMX_MOVD64mr, TB_FOLDED_STORE | TB_NO_REVERSE },
+ { X86::MMX_MOVD64from64rr, X86::MMX_MOVQ64mr, TB_FOLDED_STORE },
+ { X86::MMX_MOVD64grr, X86::MMX_MOVD64mr, TB_FOLDED_STORE },
{ X86::MOV16ri, X86::MOV16mi, TB_FOLDED_STORE },
{ X86::MOV16rr, X86::MOV16mr, TB_FOLDED_STORE },
{ X86::MOV32ri, X86::MOV32mi, TB_FOLDED_STORE },
diff --git a/llvm/lib/Target/X86/X86InstrFormats.td b/llvm/lib/Target/X86/X86InstrFormats.td
index 0e7033fc233a..3a44b4570e9b 100644
--- a/llvm/lib/Target/X86/X86InstrFormats.td
+++ b/llvm/lib/Target/X86/X86InstrFormats.td
@@ -196,7 +196,7 @@ class OpSize32 { OperandSize OpSize = OpSize32; }
class AdSize16 { AddressSize AdSize = AdSize16; }
class AdSize32 { AddressSize AdSize = AdSize32; }
class AdSize64 { AddressSize AdSize = AdSize64; }
-class REX_W { bit hasREX_WPrefix = 1; }
+class REX_W { bit hasREX_W = 1; }
class LOCK { bit hasLockPrefix = 1; }
class REP { bit hasREPPrefix = 1; }
class TB { Map OpMap = TB; }
@@ -316,7 +316,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
bits<3> OpPrefixBits = OpPrefix.Value;
Map OpMap = OB; // Which opcode map does this inst have?
bits<4> OpMapBits = OpMap.Value;
- bit hasREX_WPrefix = 0; // Does this inst require the REX.W prefix?
+ bit hasREX_W = 0; // Does this inst require the REX.W prefix?
FPFormat FPForm = NotFP; // What flavor of FP instruction is this?
bit hasLockPrefix = 0; // Does this inst have a 0xF0 prefix?
Domain ExeDomain = d;
@@ -375,7 +375,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
// No need for 3rd bit, we don't need to distinguish NoPrfx from PS.
let TSFlags{12-11} = OpPrefixBits{1-0};
let TSFlags{16-13} = OpMapBits;
- let TSFlags{17} = hasREX_WPrefix;
+ let TSFlags{17} = hasREX_W;
let TSFlags{21-18} = ImmT.Value;
let TSFlags{24-22} = FPForm.Value;
let TSFlags{25} = hasLockPrefix;
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 166f1f8c3251..57ba4683c6a4 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -287,7 +287,6 @@ def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
SDTCisSameAs<2, 1>]>;
def X86mulhrs : SDNode<"X86ISD::MULHRS", SDTIntBinOp, [SDNPCommutative]>;
-def X86avg : SDNode<"X86ISD::AVG" , SDTIntBinOp, [SDNPCommutative]>;
def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
def X86kortest : SDNode<"X86ISD::KORTEST", SDTX86CmpPTest>;
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 4dcd886fa3b2..ec32ac2acad1 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -25,13 +25,16 @@
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
@@ -137,298 +140,70 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
}
bool X86InstrInfo::isDataInvariant(MachineInstr &MI) {
- switch (MI.getOpcode()) {
- default:
- // By default, assume that the instruction is not data invariant.
+ if (MI.mayLoad() || MI.mayStore())
return false;
- // Some target-independent operations that trivially lower to data-invariant
- // instructions.
- case TargetOpcode::COPY:
- case TargetOpcode::INSERT_SUBREG:
- case TargetOpcode::SUBREG_TO_REG:
+ // Some target-independent operations that trivially lower to data-invariant
+ // instructions.
+ if (MI.isCopyLike() || MI.isInsertSubreg())
return true;
+ unsigned Opcode = MI.getOpcode();
+ using namespace X86;
// On x86 it is believed that imul is constant time w.r.t. the loaded data.
// However, they set flags and are perhaps the most surprisingly constant
// time operations so we call them out here separately.
- case X86::IMUL16rr:
- case X86::IMUL16rri8:
- case X86::IMUL16rri:
- case X86::IMUL32rr:
- case X86::IMUL32rri8:
- case X86::IMUL32rri:
- case X86::IMUL64rr:
- case X86::IMUL64rri32:
- case X86::IMUL64rri8:
-
+ if (isIMUL(Opcode))
+ return true;
// Bit scanning and counting instructions that are somewhat surprisingly
// constant time as they scan across bits and do other fairly complex
// operations like popcnt, but are believed to be constant time on x86.
// However, these set flags.
- case X86::BSF16rr:
- case X86::BSF32rr:
- case X86::BSF64rr:
- case X86::BSR16rr:
- case X86::BSR32rr:
- case X86::BSR64rr:
- case X86::LZCNT16rr:
- case X86::LZCNT32rr:
- case X86::LZCNT64rr:
- case X86::POPCNT16rr:
- case X86::POPCNT32rr:
- case X86::POPCNT64rr:
- case X86::TZCNT16rr:
- case X86::TZCNT32rr:
- case X86::TZCNT64rr:
-
+ if (isBSF(Opcode) || isBSR(Opcode) || isLZCNT(Opcode) || isPOPCNT(Opcode) ||
+ isTZCNT(Opcode))
+ return true;
// Bit manipulation instructions are effectively combinations of basic
// arithmetic ops, and should still execute in constant time. These also
// set flags.
- case X86::BLCFILL32rr:
- case X86::BLCFILL64rr:
- case X86::BLCI32rr:
- case X86::BLCI64rr:
- case X86::BLCIC32rr:
- case X86::BLCIC64rr:
- case X86::BLCMSK32rr:
- case X86::BLCMSK64rr:
- case X86::BLCS32rr:
- case X86::BLCS64rr:
- case X86::BLSFILL32rr:
- case X86::BLSFILL64rr:
- case X86::BLSI32rr:
- case X86::BLSI64rr:
- case X86::BLSIC32rr:
- case X86::BLSIC64rr:
- case X86::BLSMSK32rr:
- case X86::BLSMSK64rr:
- case X86::BLSR32rr:
- case X86::BLSR64rr:
- case X86::TZMSK32rr:
- case X86::TZMSK64rr:
-
+ if (isBLCFILL(Opcode) || isBLCI(Opcode) || isBLCIC(Opcode) ||
+ isBLCMSK(Opcode) || isBLCS(Opcode) || isBLSFILL(Opcode) ||
+ isBLSI(Opcode) || isBLSIC(Opcode) || isBLSMSK(Opcode) || isBLSR(Opcode) ||
+ isTZMSK(Opcode))
+ return true;
// Bit extracting and clearing instructions should execute in constant time,
// and set flags.
- case X86::BEXTR32rr:
- case X86::BEXTR64rr:
- case X86::BEXTRI32ri:
- case X86::BEXTRI64ri:
- case X86::BZHI32rr:
- case X86::BZHI64rr:
-
+ if (isBEXTR(Opcode) || isBZHI(Opcode))
+ return true;
// Shift and rotate.
- case X86::ROL8r1:
- case X86::ROL16r1:
- case X86::ROL32r1:
- case X86::ROL64r1:
- case X86::ROL8rCL:
- case X86::ROL16rCL:
- case X86::ROL32rCL:
- case X86::ROL64rCL:
- case X86::ROL8ri:
- case X86::ROL16ri:
- case X86::ROL32ri:
- case X86::ROL64ri:
- case X86::ROR8r1:
- case X86::ROR16r1:
- case X86::ROR32r1:
- case X86::ROR64r1:
- case X86::ROR8rCL:
- case X86::ROR16rCL:
- case X86::ROR32rCL:
- case X86::ROR64rCL:
- case X86::ROR8ri:
- case X86::ROR16ri:
- case X86::ROR32ri:
- case X86::ROR64ri:
- case X86::SAR8r1:
- case X86::SAR16r1:
- case X86::SAR32r1:
- case X86::SAR64r1:
- case X86::SAR8rCL:
- case X86::SAR16rCL:
- case X86::SAR32rCL:
- case X86::SAR64rCL:
- case X86::SAR8ri:
- case X86::SAR16ri:
- case X86::SAR32ri:
- case X86::SAR64ri:
- case X86::SHL8r1:
- case X86::SHL16r1:
- case X86::SHL32r1:
- case X86::SHL64r1:
- case X86::SHL8rCL:
- case X86::SHL16rCL:
- case X86::SHL32rCL:
- case X86::SHL64rCL:
- case X86::SHL8ri:
- case X86::SHL16ri:
- case X86::SHL32ri:
- case X86::SHL64ri:
- case X86::SHR8r1:
- case X86::SHR16r1:
- case X86::SHR32r1:
- case X86::SHR64r1:
- case X86::SHR8rCL:
- case X86::SHR16rCL:
- case X86::SHR32rCL:
- case X86::SHR64rCL:
- case X86::SHR8ri:
- case X86::SHR16ri:
- case X86::SHR32ri:
- case X86::SHR64ri:
- case X86::SHLD16rrCL:
- case X86::SHLD32rrCL:
- case X86::SHLD64rrCL:
- case X86::SHLD16rri8:
- case X86::SHLD32rri8:
- case X86::SHLD64rri8:
- case X86::SHRD16rrCL:
- case X86::SHRD32rrCL:
- case X86::SHRD64rrCL:
- case X86::SHRD16rri8:
- case X86::SHRD32rri8:
- case X86::SHRD64rri8:
-
+ if (isROL(Opcode) || isROR(Opcode) || isSAR(Opcode) || isSHL(Opcode) ||
+ isSHR(Opcode) || isSHLD(Opcode) || isSHRD(Opcode))
+ return true;
// Basic arithmetic is constant time on the input but does set flags.
- case X86::ADC8rr:
- case X86::ADC8ri:
- case X86::ADC16rr:
- case X86::ADC16ri:
- case X86::ADC16ri8:
- case X86::ADC32rr:
- case X86::ADC32ri:
- case X86::ADC32ri8:
- case X86::ADC64rr:
- case X86::ADC64ri8:
- case X86::ADC64ri32:
- case X86::ADD8rr:
- case X86::ADD8ri:
- case X86::ADD16rr:
- case X86::ADD16ri:
- case X86::ADD16ri8:
- case X86::ADD32rr:
- case X86::ADD32ri:
- case X86::ADD32ri8:
- case X86::ADD64rr:
- case X86::ADD64ri8:
- case X86::ADD64ri32:
- case X86::AND8rr:
- case X86::AND8ri:
- case X86::AND16rr:
- case X86::AND16ri:
- case X86::AND16ri8:
- case X86::AND32rr:
- case X86::AND32ri:
- case X86::AND32ri8:
- case X86::AND64rr:
- case X86::AND64ri8:
- case X86::AND64ri32:
- case X86::OR8rr:
- case X86::OR8ri:
- case X86::OR16rr:
- case X86::OR16ri:
- case X86::OR16ri8:
- case X86::OR32rr:
- case X86::OR32ri:
- case X86::OR32ri8:
- case X86::OR64rr:
- case X86::OR64ri8:
- case X86::OR64ri32:
- case X86::SBB8rr:
- case X86::SBB8ri:
- case X86::SBB16rr:
- case X86::SBB16ri:
- case X86::SBB16ri8:
- case X86::SBB32rr:
- case X86::SBB32ri:
- case X86::SBB32ri8:
- case X86::SBB64rr:
- case X86::SBB64ri8:
- case X86::SBB64ri32:
- case X86::SUB8rr:
- case X86::SUB8ri:
- case X86::SUB16rr:
- case X86::SUB16ri:
- case X86::SUB16ri8:
- case X86::SUB32rr:
- case X86::SUB32ri:
- case X86::SUB32ri8:
- case X86::SUB64rr:
- case X86::SUB64ri8:
- case X86::SUB64ri32:
- case X86::XOR8rr:
- case X86::XOR8ri:
- case X86::XOR16rr:
- case X86::XOR16ri:
- case X86::XOR16ri8:
- case X86::XOR32rr:
- case X86::XOR32ri:
- case X86::XOR32ri8:
- case X86::XOR64rr:
- case X86::XOR64ri8:
- case X86::XOR64ri32:
+ if (isADC(Opcode) || isADD(Opcode) || isAND(Opcode) || isOR(Opcode) ||
+ isSBB(Opcode) || isSUB(Opcode) || isXOR(Opcode))
+ return true;
// Arithmetic with just 32-bit and 64-bit variants and no immediates.
- case X86::ADCX32rr:
- case X86::ADCX64rr:
- case X86::ADOX32rr:
- case X86::ADOX64rr:
- case X86::ANDN32rr:
- case X86::ANDN64rr:
+ if (isADCX(Opcode) || isADOX(Opcode) || isANDN(Opcode))
+ return true;
// Unary arithmetic operations.
- case X86::DEC8r:
- case X86::DEC16r:
- case X86::DEC32r:
- case X86::DEC64r:
- case X86::INC8r:
- case X86::INC16r:
- case X86::INC32r:
- case X86::INC64r:
- case X86::NEG8r:
- case X86::NEG16r:
- case X86::NEG32r:
- case X86::NEG64r:
-
+ if (isDEC(Opcode) || isINC(Opcode) || isNEG(Opcode))
+ return true;
// Unlike other arithmetic, NOT doesn't set EFLAGS.
- case X86::NOT8r:
- case X86::NOT16r:
- case X86::NOT32r:
- case X86::NOT64r:
-
+ if (isNOT(Opcode))
+ return true;
// Various move instructions used to zero or sign extend things. Note that we
// intentionally don't support the _NOREX variants as we can't handle that
// register constraint anyways.
- case X86::MOVSX16rr8:
- case X86::MOVSX32rr8:
- case X86::MOVSX32rr16:
- case X86::MOVSX64rr8:
- case X86::MOVSX64rr16:
- case X86::MOVSX64rr32:
- case X86::MOVZX16rr8:
- case X86::MOVZX32rr8:
- case X86::MOVZX32rr16:
- case X86::MOVZX64rr8:
- case X86::MOVZX64rr16:
- case X86::MOV32rr:
-
+ if (isMOVSX(Opcode) || isMOVZX(Opcode) || isMOVSXD(Opcode) || isMOV(Opcode))
+ return true;
// Arithmetic instructions that are both constant time and don't set flags.
- case X86::RORX32ri:
- case X86::RORX64ri:
- case X86::SARX32rr:
- case X86::SARX64rr:
- case X86::SHLX32rr:
- case X86::SHLX64rr:
- case X86::SHRX32rr:
- case X86::SHRX64rr:
-
+ if (isRORX(Opcode) || isSARX(Opcode) || isSHLX(Opcode) || isSHRX(Opcode))
+ return true;
// LEA doesn't actually access memory, and its arithmetic is constant time.
- case X86::LEA16r:
- case X86::LEA32r:
- case X86::LEA64_32r:
- case X86::LEA64r:
+ if (isLEA(Opcode))
return true;
- }
+ // By default, assume that the instruction is not data invariant.
+ return false;
}
bool X86InstrInfo::isDataInvariantLoad(MachineInstr &MI) {
@@ -990,6 +765,7 @@ bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
case X86::AVX_SET0:
case X86::FsFLD0SD:
case X86::FsFLD0SS:
+ case X86::FsFLD0SH:
case X86::FsFLD0F128:
case X86::KSET0D:
case X86::KSET0Q:
@@ -1192,6 +968,102 @@ inline static bool isTruncatedShiftCountForLEA(unsigned ShAmt) {
return ShAmt < 4 && ShAmt > 0;
}
+static bool findRedundantFlagInstr(MachineInstr &CmpInstr,
+ MachineInstr &CmpValDefInstr,
+ const MachineRegisterInfo *MRI,
+ MachineInstr **AndInstr,
+ const TargetRegisterInfo *TRI,
+ bool &NoSignFlag, bool &ClearsOverflowFlag) {
+ if (CmpValDefInstr.getOpcode() != X86::SUBREG_TO_REG)
+ return false;
+
+ if (CmpInstr.getOpcode() != X86::TEST64rr)
+ return false;
+
+ // CmpInstr is a TEST64rr instruction, and `X86InstrInfo::analyzeCompare`
+ // guarantees that it's analyzable only if two registers are identical.
+ assert(
+ (CmpInstr.getOperand(0).getReg() == CmpInstr.getOperand(1).getReg()) &&
+ "CmpInstr is an analyzable TEST64rr, and `X86InstrInfo::analyzeCompare` "
+ "requires two reg operands are the same.");
+
+ // Caller (`X86InstrInfo::optimizeCompareInstr`) guarantees that
+ // `CmpValDefInstr` defines the value that's used by `CmpInstr`; in this case
+ // if `CmpValDefInstr` sets the EFLAGS, it is likely that `CmpInstr` is
+ // redundant.
+ assert(
+ (MRI->getVRegDef(CmpInstr.getOperand(0).getReg()) == &CmpValDefInstr) &&
+ "Caller guarantees that TEST64rr is a user of SUBREG_TO_REG.");
+
+ // As seen in X86 td files, CmpValDefInstr.getOperand(1).getImm() is typically
+ // 0.
+ if (CmpValDefInstr.getOperand(1).getImm() != 0)
+ return false;
+
+ // As seen in X86 td files, CmpValDefInstr.getOperand(3) is typically
+ // sub_32bit or sub_xmm.
+ if (CmpValDefInstr.getOperand(3).getImm() != X86::sub_32bit)
+ return false;
+
+ MachineInstr *VregDefInstr =
+ MRI->getVRegDef(CmpValDefInstr.getOperand(2).getReg());
+
+ assert(VregDefInstr && "Must have a definition (SSA)");
+
+ // Requires `CmpValDefInstr` and `VregDefInstr` are from the same MBB
+ // to simplify the subsequent analysis.
+ //
+ // FIXME: If `VregDefInstr->getParent()` is the only predecessor of
+ // `CmpValDefInstr.getParent()`, this could be handled.
+ if (VregDefInstr->getParent() != CmpValDefInstr.getParent())
+ return false;
+
+ if (X86::isAND(VregDefInstr->getOpcode())) {
+ // Get a sequence of instructions like
+ // %reg = and* ... // Set EFLAGS
+ // ... // EFLAGS not changed
+ // %extended_reg = subreg_to_reg 0, %reg, %subreg.sub_32bit
+ // test64rr %extended_reg, %extended_reg, implicit-def $eflags
+ //
+ // If subsequent readers use a subset of bits that don't change
+ // after `and*` instructions, it's likely that the test64rr could
+ // be optimized away.
+ for (const MachineInstr &Instr :
+ make_range(std::next(MachineBasicBlock::iterator(VregDefInstr)),
+ MachineBasicBlock::iterator(CmpValDefInstr))) {
+ // There are instructions between 'VregDefInstr' and
+ // 'CmpValDefInstr' that modifies EFLAGS.
+ if (Instr.modifiesRegister(X86::EFLAGS, TRI))
+ return false;
+ }
+
+ *AndInstr = VregDefInstr;
+
+ // AND instruction will essentially update SF and clear OF, so
+ // NoSignFlag should be false in the sense that SF is modified by `AND`.
+ //
+ // However, the implementation artifically sets `NoSignFlag` to true
+ // to poison the SF bit; that is to say, if SF is looked at later, the
+ // optimization (to erase TEST64rr) will be disabled.
+ //
+ // The reason to poison SF bit is that SF bit value could be different
+ // in the `AND` and `TEST` operation; signed bit is not known for `AND`,
+ // and is known to be 0 as a result of `TEST64rr`.
+ //
+ // FIXME: As opposed to poisoning the SF bit directly, consider peeking into
+ // the AND instruction and using the static information to guide peephole
+ // optimization if possible. For example, it's possible to fold a
+ // conditional move into a copy if the relevant EFLAG bits could be deduced
+ // from an immediate operand of and operation.
+ //
+ NoSignFlag = true;
+ // ClearsOverflowFlag is true for AND operation (no surprise).
+ ClearsOverflowFlag = true;
+ return true;
+ }
+ return false;
+}
+
bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
unsigned Opc, bool AllowSP, Register &NewSrc,
bool &isKill, MachineOperand &ImplicitOp,
@@ -1314,8 +1186,11 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
case X86::SHL8ri:
case X86::SHL16ri: {
unsigned ShAmt = MI.getOperand(2).getImm();
- MIB.addReg(0).addImm(1ULL << ShAmt)
- .addReg(InRegLEA, RegState::Kill).addImm(0).addReg(0);
+ MIB.addReg(0)
+ .addImm(1LL << ShAmt)
+ .addReg(InRegLEA, RegState::Kill)
+ .addImm(0)
+ .addReg(0);
break;
}
case X86::INC8r:
@@ -1478,7 +1353,7 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI,
NewMI = BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r))
.add(Dest)
.addReg(0)
- .addImm(1ULL << ShAmt)
+ .addImm(1LL << ShAmt)
.add(Src)
.addImm(0)
.addReg(0);
@@ -1502,7 +1377,7 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI,
BuildMI(MF, MI.getDebugLoc(), get(Opc))
.add(Dest)
.addReg(0)
- .addImm(1ULL << ShAmt)
+ .addImm(1LL << ShAmt)
.addReg(SrcReg, getKillRegState(isKill))
.addImm(0)
.addReg(0);
@@ -1957,14 +1832,13 @@ unsigned X86InstrInfo::getFMA3OpcodeToCommuteOperands(
FMAForms[0] = FMA3Group.get132Opcode();
FMAForms[1] = FMA3Group.get213Opcode();
FMAForms[2] = FMA3Group.get231Opcode();
- unsigned FormIndex;
- for (FormIndex = 0; FormIndex < 3; FormIndex++)
- if (Opc == FMAForms[FormIndex])
- break;
// Everything is ready, just adjust the FMA opcode and return it.
- FormIndex = FormMapping[Case][FormIndex];
- return FMAForms[FormIndex];
+ for (unsigned FormIndex = 0; FormIndex < 3; FormIndex++)
+ if (Opc == FMAForms[FormIndex])
+ return FMAForms[FormMapping[Case][FormIndex]];
+
+ llvm_unreachable("Illegal FMA3 format");
}
static void commuteVPTERNLOG(MachineInstr &MI, unsigned SrcOpIdx1,
@@ -2141,7 +2015,7 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
if ((MI.getOperand(3).getImm() ^ Mask) == 1) {
auto &WorkingMI = cloneIfNew(MI);
WorkingMI.setDesc(get(Opc));
- WorkingMI.RemoveOperand(3);
+ WorkingMI.removeOperand(3);
return TargetInstrInfo::commuteInstructionImpl(WorkingMI,
/*NewMI=*/false,
OpIdx1, OpIdx2);
@@ -2238,7 +2112,7 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
assert(MI.getOperand(3).getImm() == 0x02 && "Unexpected immediate!");
auto &WorkingMI = cloneIfNew(MI);
WorkingMI.setDesc(get(X86::MOVSDrr));
- WorkingMI.RemoveOperand(3);
+ WorkingMI.removeOperand(3);
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
@@ -2813,34 +2687,37 @@ bool X86InstrInfo::hasCommutePreference(MachineInstr &MI, bool &Commute) const {
return false;
}
+int X86::getCondSrcNoFromDesc(const MCInstrDesc &MCID) {
+ unsigned Opcode = MCID.getOpcode();
+ if (!(X86::isJCC(Opcode) || X86::isSETCC(Opcode) || X86::isCMOVCC(Opcode)))
+ return -1;
+ // Assume that condition code is always the last use operand.
+ unsigned NumUses = MCID.getNumOperands() - MCID.getNumDefs();
+ return NumUses - 1;
+}
+
+X86::CondCode X86::getCondFromMI(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ int CondNo = getCondSrcNoFromDesc(MCID);
+ if (CondNo < 0)
+ return X86::COND_INVALID;
+ CondNo += MCID.getNumDefs();
+ return static_cast<X86::CondCode>(MI.getOperand(CondNo).getImm());
+}
+
X86::CondCode X86::getCondFromBranch(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- default: return X86::COND_INVALID;
- case X86::JCC_1:
- return static_cast<X86::CondCode>(
- MI.getOperand(MI.getDesc().getNumOperands() - 1).getImm());
- }
+ return X86::isJCC(MI.getOpcode()) ? X86::getCondFromMI(MI)
+ : X86::COND_INVALID;
}
-/// Return condition code of a SETCC opcode.
X86::CondCode X86::getCondFromSETCC(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- default: return X86::COND_INVALID;
- case X86::SETCCr: case X86::SETCCm:
- return static_cast<X86::CondCode>(
- MI.getOperand(MI.getDesc().getNumOperands() - 1).getImm());
- }
+ return X86::isSETCC(MI.getOpcode()) ? X86::getCondFromMI(MI)
+ : X86::COND_INVALID;
}
-/// Return condition code of a CMov opcode.
X86::CondCode X86::getCondFromCMov(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- default: return X86::COND_INVALID;
- case X86::CMOV16rr: case X86::CMOV32rr: case X86::CMOV64rr:
- case X86::CMOV16rm: case X86::CMOV32rm: case X86::CMOV64rm:
- return static_cast<X86::CondCode>(
- MI.getOperand(MI.getDesc().getNumOperands() - 1).getImm());
- }
+ return X86::isCMOVCC(MI.getOpcode()) ? X86::getCondFromMI(MI)
+ : X86::COND_INVALID;
}
/// Return the inverse of the specified condition,
@@ -3166,8 +3043,7 @@ bool X86InstrInfo::AnalyzeBranchImpl(
}
// If the block has any instructions after a JMP, delete them.
- while (std::next(I) != MBB.end())
- std::next(I)->eraseFromParent();
+ MBB.erase(std::next(I), MBB.end());
Cond.clear();
FBB = nullptr;
@@ -3464,7 +3340,7 @@ bool X86InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
Register FalseReg, int &CondCycles,
int &TrueCycles, int &FalseCycles) const {
// Not all subtargets have cmov instructions.
- if (!Subtarget.hasCMov())
+ if (!Subtarget.canUseCMOV())
return false;
if (Cond.size() != 1)
return false;
@@ -3708,10 +3584,6 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
case 2:
if (X86::VK16RegClass.hasSubClassEq(RC))
return load ? X86::KMOVWkm : X86::KMOVWmk;
- if (X86::FR16XRegClass.hasSubClassEq(RC)) {
- assert(STI.hasFP16());
- return load ? X86::VMOVSHZrm_alt : X86::VMOVSHZmr;
- }
assert(X86::GR16RegClass.hasSubClassEq(RC) && "Unknown 2-byte regclass");
return load ? X86::MOV16rm : X86::MOV16mr;
case 4:
@@ -3739,6 +3611,10 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
X86::VK8PAIRRegClass.hasSubClassEq(RC) ||
X86::VK16PAIRRegClass.hasSubClassEq(RC))
return load ? X86::MASKPAIR16LOAD : X86::MASKPAIR16STORE;
+ if ((X86::FR16RegClass.hasSubClassEq(RC) ||
+ X86::FR16XRegClass.hasSubClassEq(RC)) &&
+ STI.hasFP16())
+ return load ? X86::VMOVSHZrm_alt : X86::VMOVSHZmr;
llvm_unreachable("Unknown 4-byte regclass");
case 8:
if (X86::GR64RegClass.hasSubClassEq(RC))
@@ -3845,6 +3721,35 @@ X86InstrInfo::getAddrModeFromMemoryOp(const MachineInstr &MemI,
return AM;
}
+bool X86InstrInfo::verifyInstruction(const MachineInstr &MI,
+ StringRef &ErrInfo) const {
+ Optional<ExtAddrMode> AMOrNone = getAddrModeFromMemoryOp(MI, nullptr);
+ if (!AMOrNone)
+ return true;
+
+ ExtAddrMode AM = *AMOrNone;
+
+ if (AM.ScaledReg != X86::NoRegister) {
+ switch (AM.Scale) {
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ break;
+ default:
+ ErrInfo = "Scale factor in address must be 1, 2, 4 or 8";
+ return false;
+ }
+ }
+ if (!isInt<32>(AM.Displacement)) {
+ ErrInfo = "Displacement in address must fit into 32-bit signed "
+ "integer";
+ return false;
+ }
+
+ return true;
+}
+
bool X86InstrInfo::getConstValDefinedInReg(const MachineInstr &MI,
const Register Reg,
int64_t &ImmVal) const {
@@ -3949,12 +3854,12 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
const TargetRegisterInfo *TRI) const {
const MachineFunction &MF = *MBB.getParent();
const MachineFrameInfo &MFI = MF.getFrameInfo();
+ MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo();
assert(MFI.getObjectSize(FrameIdx) >= TRI->getSpillSize(*RC) &&
"Stack slot too small for store");
if (RC->getID() == X86::TILERegClassID) {
unsigned Opc = X86::TILESTORED;
// tilestored %tmm, (%sp, %idx)
- MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo();
Register VirtReg = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
BuildMI(MBB, MI, DebugLoc(), get(X86::MOV64ri), VirtReg).addImm(64);
MachineInstr *NewMI =
@@ -3963,6 +3868,14 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineOperand &MO = NewMI->getOperand(2);
MO.setReg(VirtReg);
MO.setIsKill(true);
+ } else if ((RC->getID() == X86::FR16RegClassID ||
+ RC->getID() == X86::FR16XRegClassID) &&
+ !Subtarget.hasFP16()) {
+ unsigned Opc = Subtarget.hasAVX512() ? X86::VMOVSSZmr
+ : Subtarget.hasAVX() ? X86::VMOVSSmr
+ : X86::MOVSSmr;
+ addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc)), FrameIdx)
+ .addReg(SrcReg, getKillRegState(isKill));
} else {
unsigned Alignment = std::max<uint32_t>(TRI->getSpillSize(*RC), 16);
bool isAligned =
@@ -3991,6 +3904,14 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineOperand &MO = NewMI->getOperand(3);
MO.setReg(VirtReg);
MO.setIsKill(true);
+ } else if ((RC->getID() == X86::FR16RegClassID ||
+ RC->getID() == X86::FR16XRegClassID) &&
+ !Subtarget.hasFP16()) {
+ unsigned Opc = Subtarget.hasAVX512() ? X86::VMOVSSZrm
+ : Subtarget.hasAVX() ? X86::VMOVSSrm
+ : X86::MOVSSrm;
+ addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc), DestReg),
+ FrameIdx);
} else {
const MachineFunction &MF = *MBB.getParent();
const MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -4375,7 +4296,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
case X86::SUB8ri: NewOpcode = X86::CMP8ri; break;
}
CmpInstr.setDesc(get(NewOpcode));
- CmpInstr.RemoveOperand(0);
+ CmpInstr.removeOperand(0);
// Mutating this instruction invalidates any debug data associated with it.
CmpInstr.dropDebugNumber();
// Fall through to optimize Cmp if Cmp is CMPrr or CMPri.
@@ -4423,6 +4344,23 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
MI = &Inst;
break;
}
+
+ // Look back for the following pattern, in which case the test64rr
+ // instruction could be erased.
+ //
+ // Example:
+ // %reg = and32ri %in_reg, 5
+ // ... // EFLAGS not changed.
+ // %src_reg = subreg_to_reg 0, %reg, %subreg.sub_index
+ // test64rr %src_reg, %src_reg, implicit-def $eflags
+ MachineInstr *AndInstr = nullptr;
+ if (IsCmpZero &&
+ findRedundantFlagInstr(CmpInstr, Inst, MRI, &AndInstr, TRI,
+ NoSignFlag, ClearsOverflowFlag)) {
+ assert(AndInstr != nullptr && X86::isAND(AndInstr->getOpcode()));
+ MI = AndInstr;
+ break;
+ }
// Cannot find other candidates before definition of SrcReg.
return false;
}
@@ -4524,6 +4462,11 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
return false;
case X86::COND_G: case X86::COND_GE:
case X86::COND_L: case X86::COND_LE:
+ // If SF is used, but the instruction doesn't update the SF, then we
+ // can't do the optimization.
+ if (NoSignFlag)
+ return false;
+ LLVM_FALLTHROUGH;
case X86::COND_O: case X86::COND_NO:
// If OF is used, the instruction needs to clear it like CmpZero does.
if (!ClearsOverflowFlag)
@@ -4811,7 +4754,7 @@ static bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB,
BuildMI(MBB, I, DL, TII.get(X86::PUSH32i8)).addImm(Imm);
MIB->setDesc(TII.get(X86::POP32r));
}
- MIB->RemoveOperand(1);
+ MIB->removeOperand(1);
MIB->addImplicitDefUseOperands(*MBB.getParent());
// Build CFI if necessary.
@@ -4918,7 +4861,7 @@ static bool expandSHXDROT(MachineInstrBuilder &MIB, const MCInstrDesc &Desc) {
MIB->setDesc(Desc);
int64_t ShiftAmt = MIB->getOperand(2).getImm();
// Temporarily remove the immediate so we can add another source register.
- MIB->RemoveOperand(2);
+ MIB->removeOperand(2);
// Add the register. Don't copy the kill flag if there is one.
MIB.addReg(MIB.getReg(1),
getUndefRegState(MIB->getOperand(1).isUndef()));
@@ -4949,6 +4892,7 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case X86::V_SET0:
case X86::FsFLD0SS:
case X86::FsFLD0SD:
+ case X86::FsFLD0SH:
case X86::FsFLD0F128:
return Expand2AddrUndef(MIB, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr));
case X86::AVX_SET0: {
@@ -5026,7 +4970,7 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
unsigned MaskState = getRegState(MIB->getOperand(1));
unsigned Opc = (MI.getOpcode() == X86::AVX512_512_SEXT_MASK_64) ?
X86::VPTERNLOGQZrrikz : X86::VPTERNLOGDZrrikz;
- MI.RemoveOperand(1);
+ MI.removeOperand(1);
MIB->setDesc(get(Opc));
// VPTERNLOG needs 3 register inputs and an immediate.
// 0xff will return 1s for any input.
@@ -5165,6 +5109,255 @@ static bool hasPartialRegUpdate(unsigned Opcode,
case X86::SQRTSDr_Int:
case X86::SQRTSDm_Int:
return true;
+ case X86::VFCMULCPHZ128rm:
+ case X86::VFCMULCPHZ128rmb:
+ case X86::VFCMULCPHZ128rmbkz:
+ case X86::VFCMULCPHZ128rmkz:
+ case X86::VFCMULCPHZ128rr:
+ case X86::VFCMULCPHZ128rrkz:
+ case X86::VFCMULCPHZ256rm:
+ case X86::VFCMULCPHZ256rmb:
+ case X86::VFCMULCPHZ256rmbkz:
+ case X86::VFCMULCPHZ256rmkz:
+ case X86::VFCMULCPHZ256rr:
+ case X86::VFCMULCPHZ256rrkz:
+ case X86::VFCMULCPHZrm:
+ case X86::VFCMULCPHZrmb:
+ case X86::VFCMULCPHZrmbkz:
+ case X86::VFCMULCPHZrmkz:
+ case X86::VFCMULCPHZrr:
+ case X86::VFCMULCPHZrrb:
+ case X86::VFCMULCPHZrrbkz:
+ case X86::VFCMULCPHZrrkz:
+ case X86::VFMULCPHZ128rm:
+ case X86::VFMULCPHZ128rmb:
+ case X86::VFMULCPHZ128rmbkz:
+ case X86::VFMULCPHZ128rmkz:
+ case X86::VFMULCPHZ128rr:
+ case X86::VFMULCPHZ128rrkz:
+ case X86::VFMULCPHZ256rm:
+ case X86::VFMULCPHZ256rmb:
+ case X86::VFMULCPHZ256rmbkz:
+ case X86::VFMULCPHZ256rmkz:
+ case X86::VFMULCPHZ256rr:
+ case X86::VFMULCPHZ256rrkz:
+ case X86::VFMULCPHZrm:
+ case X86::VFMULCPHZrmb:
+ case X86::VFMULCPHZrmbkz:
+ case X86::VFMULCPHZrmkz:
+ case X86::VFMULCPHZrr:
+ case X86::VFMULCPHZrrb:
+ case X86::VFMULCPHZrrbkz:
+ case X86::VFMULCPHZrrkz:
+ case X86::VFCMULCSHZrm:
+ case X86::VFCMULCSHZrmkz:
+ case X86::VFCMULCSHZrr:
+ case X86::VFCMULCSHZrrb:
+ case X86::VFCMULCSHZrrbkz:
+ case X86::VFCMULCSHZrrkz:
+ case X86::VFMULCSHZrm:
+ case X86::VFMULCSHZrmkz:
+ case X86::VFMULCSHZrr:
+ case X86::VFMULCSHZrrb:
+ case X86::VFMULCSHZrrbkz:
+ case X86::VFMULCSHZrrkz:
+ return Subtarget.hasMULCFalseDeps();
+ case X86::VPERMDYrm:
+ case X86::VPERMDYrr:
+ case X86::VPERMQYmi:
+ case X86::VPERMQYri:
+ case X86::VPERMPSYrm:
+ case X86::VPERMPSYrr:
+ case X86::VPERMPDYmi:
+ case X86::VPERMPDYri:
+ case X86::VPERMDZ256rm:
+ case X86::VPERMDZ256rmb:
+ case X86::VPERMDZ256rmbkz:
+ case X86::VPERMDZ256rmkz:
+ case X86::VPERMDZ256rr:
+ case X86::VPERMDZ256rrkz:
+ case X86::VPERMDZrm:
+ case X86::VPERMDZrmb:
+ case X86::VPERMDZrmbkz:
+ case X86::VPERMDZrmkz:
+ case X86::VPERMDZrr:
+ case X86::VPERMDZrrkz:
+ case X86::VPERMQZ256mbi:
+ case X86::VPERMQZ256mbikz:
+ case X86::VPERMQZ256mi:
+ case X86::VPERMQZ256mikz:
+ case X86::VPERMQZ256ri:
+ case X86::VPERMQZ256rikz:
+ case X86::VPERMQZ256rm:
+ case X86::VPERMQZ256rmb:
+ case X86::VPERMQZ256rmbkz:
+ case X86::VPERMQZ256rmkz:
+ case X86::VPERMQZ256rr:
+ case X86::VPERMQZ256rrkz:
+ case X86::VPERMQZmbi:
+ case X86::VPERMQZmbikz:
+ case X86::VPERMQZmi:
+ case X86::VPERMQZmikz:
+ case X86::VPERMQZri:
+ case X86::VPERMQZrikz:
+ case X86::VPERMQZrm:
+ case X86::VPERMQZrmb:
+ case X86::VPERMQZrmbkz:
+ case X86::VPERMQZrmkz:
+ case X86::VPERMQZrr:
+ case X86::VPERMQZrrkz:
+ case X86::VPERMPSZ256rm:
+ case X86::VPERMPSZ256rmb:
+ case X86::VPERMPSZ256rmbkz:
+ case X86::VPERMPSZ256rmkz:
+ case X86::VPERMPSZ256rr:
+ case X86::VPERMPSZ256rrkz:
+ case X86::VPERMPSZrm:
+ case X86::VPERMPSZrmb:
+ case X86::VPERMPSZrmbkz:
+ case X86::VPERMPSZrmkz:
+ case X86::VPERMPSZrr:
+ case X86::VPERMPSZrrkz:
+ case X86::VPERMPDZ256mbi:
+ case X86::VPERMPDZ256mbikz:
+ case X86::VPERMPDZ256mi:
+ case X86::VPERMPDZ256mikz:
+ case X86::VPERMPDZ256ri:
+ case X86::VPERMPDZ256rikz:
+ case X86::VPERMPDZ256rm:
+ case X86::VPERMPDZ256rmb:
+ case X86::VPERMPDZ256rmbkz:
+ case X86::VPERMPDZ256rmkz:
+ case X86::VPERMPDZ256rr:
+ case X86::VPERMPDZ256rrkz:
+ case X86::VPERMPDZmbi:
+ case X86::VPERMPDZmbikz:
+ case X86::VPERMPDZmi:
+ case X86::VPERMPDZmikz:
+ case X86::VPERMPDZri:
+ case X86::VPERMPDZrikz:
+ case X86::VPERMPDZrm:
+ case X86::VPERMPDZrmb:
+ case X86::VPERMPDZrmbkz:
+ case X86::VPERMPDZrmkz:
+ case X86::VPERMPDZrr:
+ case X86::VPERMPDZrrkz:
+ return Subtarget.hasPERMFalseDeps();
+ case X86::VRANGEPDZ128rmbi:
+ case X86::VRANGEPDZ128rmbikz:
+ case X86::VRANGEPDZ128rmi:
+ case X86::VRANGEPDZ128rmikz:
+ case X86::VRANGEPDZ128rri:
+ case X86::VRANGEPDZ128rrikz:
+ case X86::VRANGEPDZ256rmbi:
+ case X86::VRANGEPDZ256rmbikz:
+ case X86::VRANGEPDZ256rmi:
+ case X86::VRANGEPDZ256rmikz:
+ case X86::VRANGEPDZ256rri:
+ case X86::VRANGEPDZ256rrikz:
+ case X86::VRANGEPDZrmbi:
+ case X86::VRANGEPDZrmbikz:
+ case X86::VRANGEPDZrmi:
+ case X86::VRANGEPDZrmikz:
+ case X86::VRANGEPDZrri:
+ case X86::VRANGEPDZrrib:
+ case X86::VRANGEPDZrribkz:
+ case X86::VRANGEPDZrrikz:
+ case X86::VRANGEPSZ128rmbi:
+ case X86::VRANGEPSZ128rmbikz:
+ case X86::VRANGEPSZ128rmi:
+ case X86::VRANGEPSZ128rmikz:
+ case X86::VRANGEPSZ128rri:
+ case X86::VRANGEPSZ128rrikz:
+ case X86::VRANGEPSZ256rmbi:
+ case X86::VRANGEPSZ256rmbikz:
+ case X86::VRANGEPSZ256rmi:
+ case X86::VRANGEPSZ256rmikz:
+ case X86::VRANGEPSZ256rri:
+ case X86::VRANGEPSZ256rrikz:
+ case X86::VRANGEPSZrmbi:
+ case X86::VRANGEPSZrmbikz:
+ case X86::VRANGEPSZrmi:
+ case X86::VRANGEPSZrmikz:
+ case X86::VRANGEPSZrri:
+ case X86::VRANGEPSZrrib:
+ case X86::VRANGEPSZrribkz:
+ case X86::VRANGEPSZrrikz:
+ case X86::VRANGESDZrmi:
+ case X86::VRANGESDZrmikz:
+ case X86::VRANGESDZrri:
+ case X86::VRANGESDZrrib:
+ case X86::VRANGESDZrribkz:
+ case X86::VRANGESDZrrikz:
+ case X86::VRANGESSZrmi:
+ case X86::VRANGESSZrmikz:
+ case X86::VRANGESSZrri:
+ case X86::VRANGESSZrrib:
+ case X86::VRANGESSZrribkz:
+ case X86::VRANGESSZrrikz:
+ return Subtarget.hasRANGEFalseDeps();
+ case X86::VGETMANTSSZrmi:
+ case X86::VGETMANTSSZrmikz:
+ case X86::VGETMANTSSZrri:
+ case X86::VGETMANTSSZrrib:
+ case X86::VGETMANTSSZrribkz:
+ case X86::VGETMANTSSZrrikz:
+ case X86::VGETMANTSDZrmi:
+ case X86::VGETMANTSDZrmikz:
+ case X86::VGETMANTSDZrri:
+ case X86::VGETMANTSDZrrib:
+ case X86::VGETMANTSDZrribkz:
+ case X86::VGETMANTSDZrrikz:
+ case X86::VGETMANTSHZrmi:
+ case X86::VGETMANTSHZrmikz:
+ case X86::VGETMANTSHZrri:
+ case X86::VGETMANTSHZrrib:
+ case X86::VGETMANTSHZrribkz:
+ case X86::VGETMANTSHZrrikz:
+ case X86::VGETMANTPSZ128rmbi:
+ case X86::VGETMANTPSZ128rmbikz:
+ case X86::VGETMANTPSZ128rmi:
+ case X86::VGETMANTPSZ128rmikz:
+ case X86::VGETMANTPSZ256rmbi:
+ case X86::VGETMANTPSZ256rmbikz:
+ case X86::VGETMANTPSZ256rmi:
+ case X86::VGETMANTPSZ256rmikz:
+ case X86::VGETMANTPSZrmbi:
+ case X86::VGETMANTPSZrmbikz:
+ case X86::VGETMANTPSZrmi:
+ case X86::VGETMANTPSZrmikz:
+ case X86::VGETMANTPDZ128rmbi:
+ case X86::VGETMANTPDZ128rmbikz:
+ case X86::VGETMANTPDZ128rmi:
+ case X86::VGETMANTPDZ128rmikz:
+ case X86::VGETMANTPDZ256rmbi:
+ case X86::VGETMANTPDZ256rmbikz:
+ case X86::VGETMANTPDZ256rmi:
+ case X86::VGETMANTPDZ256rmikz:
+ case X86::VGETMANTPDZrmbi:
+ case X86::VGETMANTPDZrmbikz:
+ case X86::VGETMANTPDZrmi:
+ case X86::VGETMANTPDZrmikz:
+ return Subtarget.hasGETMANTFalseDeps();
+ case X86::VPMULLQZ128rm:
+ case X86::VPMULLQZ128rmb:
+ case X86::VPMULLQZ128rmbkz:
+ case X86::VPMULLQZ128rmkz:
+ case X86::VPMULLQZ128rr:
+ case X86::VPMULLQZ128rrkz:
+ case X86::VPMULLQZ256rm:
+ case X86::VPMULLQZ256rmb:
+ case X86::VPMULLQZ256rmbkz:
+ case X86::VPMULLQZ256rmkz:
+ case X86::VPMULLQZ256rr:
+ case X86::VPMULLQZ256rrkz:
+ case X86::VPMULLQZrm:
+ case X86::VPMULLQZrmb:
+ case X86::VPMULLQZrmbkz:
+ case X86::VPMULLQZrmkz:
+ case X86::VPMULLQZrr:
+ case X86::VPMULLQZrrkz:
+ return Subtarget.hasMULLQFalseDeps();
// GPR
case X86::POPCNT32rm:
case X86::POPCNT32rr:
@@ -5591,6 +5784,28 @@ void X86InstrInfo::breakPartialRegDependency(
.addReg(XReg, RegState::Undef)
.addReg(Reg, RegState::ImplicitDefine);
MI.addRegisterKilled(Reg, TRI, true);
+ } else if (X86::VR128XRegClass.contains(Reg)) {
+ // Only handle VLX targets.
+ if (!Subtarget.hasVLX())
+ return;
+ // Since vxorps requires AVX512DQ, vpxord should be the best choice.
+ BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::VPXORDZ128rr), Reg)
+ .addReg(Reg, RegState::Undef)
+ .addReg(Reg, RegState::Undef);
+ MI.addRegisterKilled(Reg, TRI, true);
+ } else if (X86::VR256XRegClass.contains(Reg) ||
+ X86::VR512RegClass.contains(Reg)) {
+ // Only handle VLX targets.
+ if (!Subtarget.hasVLX())
+ return;
+ // Use vpxord to clear the full ymm/zmm register.
+ // It wants to read and write the xmm sub-register.
+ Register XReg = TRI->getSubReg(Reg, X86::sub_xmm);
+ BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::VPXORDZ128rr), XReg)
+ .addReg(XReg, RegState::Undef)
+ .addReg(XReg, RegState::Undef)
+ .addReg(Reg, RegState::ImplicitDefine);
+ MI.addRegisterKilled(Reg, TRI, true);
} else if (X86::GR64RegClass.contains(Reg)) {
// Using XOR32rr because it has shorter encoding and zeros up the upper bits
// as well.
@@ -6413,6 +6628,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
case X86::AVX512_FsFLD0SS:
Alignment = Align(4);
break;
+ case X86::FsFLD0SH:
case X86::AVX512_FsFLD0SH:
Alignment = Align(2);
break;
@@ -6451,6 +6667,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
case X86::AVX512_256_SET0:
case X86::AVX512_512_SET0:
case X86::AVX512_512_SETALLONES:
+ case X86::FsFLD0SH:
case X86::AVX512_FsFLD0SH:
case X86::FsFLD0SD:
case X86::AVX512_FsFLD0SD:
@@ -6490,7 +6707,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
Ty = Type::getDoubleTy(MF.getFunction().getContext());
else if (Opc == X86::FsFLD0F128 || Opc == X86::AVX512_FsFLD0F128)
Ty = Type::getFP128Ty(MF.getFunction().getContext());
- else if (Opc == X86::AVX512_FsFLD0SH)
+ else if (Opc == X86::FsFLD0SH || Opc == X86::AVX512_FsFLD0SH)
Ty = Type::getHalfTy(MF.getFunction().getContext());
else if (Opc == X86::AVX512_512_SET0 || Opc == X86::AVX512_512_SETALLONES)
Ty = FixedVectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),
@@ -7170,7 +7387,7 @@ bool X86InstrInfo::isSchedulingBoundary(const MachineInstr &MI,
// ENDBR instructions should not be scheduled around.
unsigned Opcode = MI.getOpcode();
if (Opcode == X86::ENDBR64 || Opcode == X86::ENDBR32 ||
- Opcode == X86::LDTILECFG)
+ Opcode == X86::PLDTILECFGV)
return true;
return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF);
@@ -9298,12 +9515,10 @@ outliner::OutlinedFunction X86InstrInfo::getOutliningCandidateInfo(
// We check to see if CFI Instructions are present, and if they are
// we find the number of CFI Instructions in the candidates.
unsigned CFICount = 0;
- MachineBasicBlock::iterator MBBI = RepeatedSequenceLocs[0].front();
- for (unsigned Loc = RepeatedSequenceLocs[0].getStartIdx();
- Loc < RepeatedSequenceLocs[0].getEndIdx() + 1; Loc++) {
- if (MBBI->isCFIInstruction())
+ for (auto &I : make_range(RepeatedSequenceLocs[0].front(),
+ std::next(RepeatedSequenceLocs[0].back()))) {
+ if (I.isCFIInstruction())
CFICount++;
- MBBI++;
}
// We compare the number of found CFI Instructions to the number of CFI
@@ -9440,7 +9655,7 @@ MachineBasicBlock::iterator
X86InstrInfo::insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
MachineBasicBlock::iterator &It,
MachineFunction &MF,
- const outliner::Candidate &C) const {
+ outliner::Candidate &C) const {
// Is it a tail call?
if (C.CallConstructionID == MachineOutlinerTailCall) {
// Yes, just insert a JMP.
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index 33ce55bbdb2b..4943d2152fd2 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -40,13 +40,21 @@ std::pair<CondCode, bool> getX86ConditionCode(CmpInst::Predicate Predicate);
/// Return a cmov opcode for the given register size in bytes, and operand type.
unsigned getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand = false);
-// Turn jCC instruction into condition code.
+/// Return the source operand # for condition code by \p MCID. If the
+/// instruction doesn't have a condition code, return -1.
+int getCondSrcNoFromDesc(const MCInstrDesc &MCID);
+
+/// Return the condition code of the instruction. If the instruction doesn't
+/// have a condition code, return X86::COND_INVALID.
+CondCode getCondFromMI(const MachineInstr &MI);
+
+// Turn JCC instruction into condition code.
CondCode getCondFromBranch(const MachineInstr &MI);
-// Turn setCC instruction into condition code.
+// Turn SETCC instruction into condition code.
CondCode getCondFromSETCC(const MachineInstr &MI);
-// Turn CMov instruction into condition code.
+// Turn CMOV instruction into condition code.
CondCode getCondFromCMov(const MachineInstr &MI);
/// GetOppositeBranchCondition - Return the inverse of the specified cond,
@@ -552,8 +560,10 @@ public:
MachineBasicBlock::iterator
insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
MachineBasicBlock::iterator &It, MachineFunction &MF,
- const outliner::Candidate &C) const override;
+ outliner::Candidate &C) const override;
+ bool verifyInstruction(const MachineInstr &MI,
+ StringRef &ErrInfo) const override;
#define GET_INSTRINFO_HELPER_DECLS
#include "X86GenInstrInfo.inc"
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index fee9939b8dfc..7f6ef3479d40 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -388,17 +388,19 @@ def X86AbsMemAsmOperand : AsmOperandClass {
}
class X86MemOperand<string printMethod,
- AsmOperandClass parserMatchClass = X86MemAsmOperand> : Operand<iPTR> {
+ AsmOperandClass parserMatchClass = X86MemAsmOperand,
+ int size = 0> : Operand<iPTR> {
let PrintMethod = printMethod;
let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, SEGMENT_REG);
let ParserMatchClass = parserMatchClass;
let OperandType = "OPERAND_MEMORY";
+ int Size = size;
}
// Gather mem operands
class X86VMemOperand<RegisterClass RC, string printMethod,
- AsmOperandClass parserMatchClass>
- : X86MemOperand<printMethod, parserMatchClass> {
+ AsmOperandClass parserMatchClass, int size = 0>
+ : X86MemOperand<printMethod, parserMatchClass, size> {
let MIOperandInfo = (ops ptr_rc, i8imm, RC, i32imm, SEGMENT_REG);
}
@@ -413,48 +415,45 @@ def opaquemem : X86MemOperand<"printMemReference">;
def sibmem: X86MemOperand<"printMemReference", X86SibMemOperand>;
-def i8mem : X86MemOperand<"printbytemem", X86Mem8AsmOperand>;
-def i16mem : X86MemOperand<"printwordmem", X86Mem16AsmOperand>;
-def i32mem : X86MemOperand<"printdwordmem", X86Mem32AsmOperand>;
-def i64mem : X86MemOperand<"printqwordmem", X86Mem64AsmOperand>;
-def i128mem : X86MemOperand<"printxmmwordmem", X86Mem128AsmOperand>;
-def i256mem : X86MemOperand<"printymmwordmem", X86Mem256AsmOperand>;
-def i512mem : X86MemOperand<"printzmmwordmem", X86Mem512AsmOperand>;
-def f16mem : X86MemOperand<"printwordmem", X86Mem16AsmOperand>;
-def f32mem : X86MemOperand<"printdwordmem", X86Mem32AsmOperand>;
-def f64mem : X86MemOperand<"printqwordmem", X86Mem64AsmOperand>;
-def f80mem : X86MemOperand<"printtbytemem", X86Mem80AsmOperand>;
-def f128mem : X86MemOperand<"printxmmwordmem", X86Mem128AsmOperand>;
-def f256mem : X86MemOperand<"printymmwordmem", X86Mem256AsmOperand>;
-def f512mem : X86MemOperand<"printzmmwordmem", X86Mem512AsmOperand>;
+def i8mem : X86MemOperand<"printbytemem", X86Mem8AsmOperand, 8>;
+def i16mem : X86MemOperand<"printwordmem", X86Mem16AsmOperand, 16>;
+def i32mem : X86MemOperand<"printdwordmem", X86Mem32AsmOperand, 32>;
+def i64mem : X86MemOperand<"printqwordmem", X86Mem64AsmOperand, 64>;
+def i128mem : X86MemOperand<"printxmmwordmem", X86Mem128AsmOperand, 128>;
+def i256mem : X86MemOperand<"printymmwordmem", X86Mem256AsmOperand, 256>;
+def i512mem : X86MemOperand<"printzmmwordmem", X86Mem512AsmOperand, 512>;
+def f16mem : X86MemOperand<"printwordmem", X86Mem16AsmOperand, 16>;
+def f32mem : X86MemOperand<"printdwordmem", X86Mem32AsmOperand, 32>;
+def f64mem : X86MemOperand<"printqwordmem", X86Mem64AsmOperand, 64>;
+def f80mem : X86MemOperand<"printtbytemem", X86Mem80AsmOperand, 80>;
+def f128mem : X86MemOperand<"printxmmwordmem", X86Mem128AsmOperand, 128>;
+def f256mem : X86MemOperand<"printymmwordmem", X86Mem256AsmOperand, 256>;
+def f512mem : X86MemOperand<"printzmmwordmem", X86Mem512AsmOperand, 512>;
// Gather mem operands
-def vx64mem : X86VMemOperand<VR128, "printqwordmem", X86Mem64_RC128Operand>;
-def vx128mem : X86VMemOperand<VR128, "printxmmwordmem", X86Mem128_RC128Operand>;
-def vx256mem : X86VMemOperand<VR128, "printymmwordmem", X86Mem256_RC128Operand>;
-def vy128mem : X86VMemOperand<VR256, "printxmmwordmem", X86Mem128_RC256Operand>;
-def vy256mem : X86VMemOperand<VR256, "printymmwordmem", X86Mem256_RC256Operand>;
-
-def vx64xmem : X86VMemOperand<VR128X, "printqwordmem", X86Mem64_RC128XOperand>;
-def vx128xmem : X86VMemOperand<VR128X, "printxmmwordmem", X86Mem128_RC128XOperand>;
-def vx256xmem : X86VMemOperand<VR128X, "printymmwordmem", X86Mem256_RC128XOperand>;
-def vy128xmem : X86VMemOperand<VR256X, "printxmmwordmem", X86Mem128_RC256XOperand>;
-def vy256xmem : X86VMemOperand<VR256X, "printymmwordmem", X86Mem256_RC256XOperand>;
-def vy512xmem : X86VMemOperand<VR256X, "printzmmwordmem", X86Mem512_RC256XOperand>;
-def vz256mem : X86VMemOperand<VR512, "printymmwordmem", X86Mem256_RC512Operand>;
-def vz512mem : X86VMemOperand<VR512, "printzmmwordmem", X86Mem512_RC512Operand>;
+def vx64mem : X86VMemOperand<VR128, "printqwordmem", X86Mem64_RC128Operand, 64>;
+def vx128mem : X86VMemOperand<VR128, "printxmmwordmem", X86Mem128_RC128Operand, 128>;
+def vx256mem : X86VMemOperand<VR128, "printymmwordmem", X86Mem256_RC128Operand, 256>;
+def vy128mem : X86VMemOperand<VR256, "printxmmwordmem", X86Mem128_RC256Operand, 128>;
+def vy256mem : X86VMemOperand<VR256, "printymmwordmem", X86Mem256_RC256Operand, 256>;
+
+def vx64xmem : X86VMemOperand<VR128X, "printqwordmem", X86Mem64_RC128XOperand, 64>;
+def vx128xmem : X86VMemOperand<VR128X, "printxmmwordmem", X86Mem128_RC128XOperand, 128>;
+def vx256xmem : X86VMemOperand<VR128X, "printymmwordmem", X86Mem256_RC128XOperand, 256>;
+def vy128xmem : X86VMemOperand<VR256X, "printxmmwordmem", X86Mem128_RC256XOperand, 128>;
+def vy256xmem : X86VMemOperand<VR256X, "printymmwordmem", X86Mem256_RC256XOperand, 256>;
+def vy512xmem : X86VMemOperand<VR256X, "printzmmwordmem", X86Mem512_RC256XOperand, 512>;
+def vz256mem : X86VMemOperand<VR512, "printymmwordmem", X86Mem256_RC512Operand, 256>;
+def vz512mem : X86VMemOperand<VR512, "printzmmwordmem", X86Mem512_RC512Operand, 512>;
// A version of i8mem for use on x86-64 and x32 that uses a NOREX GPR instead
// of a plain GPR, so that it doesn't potentially require a REX prefix.
def ptr_rc_norex : PointerLikeRegClass<2>;
def ptr_rc_norex_nosp : PointerLikeRegClass<3>;
-def i8mem_NOREX : Operand<iPTR> {
- let PrintMethod = "printbytemem";
+def i8mem_NOREX : X86MemOperand<"printbytemem", X86Mem8AsmOperand, 8> {
let MIOperandInfo = (ops ptr_rc_norex, i8imm, ptr_rc_norex_nosp, i32imm,
SEGMENT_REG);
- let ParserMatchClass = X86Mem8AsmOperand;
- let OperandType = "OPERAND_MEMORY";
}
// GPRs available for tailcall.
@@ -840,11 +839,11 @@ def VK16Pair : RegisterOperand<VK16PAIR, "printVKPair"> {
// Define X86-specific addressing mode.
def addr : ComplexPattern<iPTR, 5, "selectAddr", [], [SDNPWantParent]>;
def lea32addr : ComplexPattern<i32, 5, "selectLEAAddr",
- [add, sub, mul, X86mul_imm, shl, or, frameindex],
+ [add, sub, mul, X86mul_imm, shl, or, xor, frameindex],
[]>;
// In 64-bit mode 32-bit LEAs can use RIP-relative addressing.
def lea64_32addr : ComplexPattern<i32, 5, "selectLEA64_32Addr",
- [add, sub, mul, X86mul_imm, shl, or,
+ [add, sub, mul, X86mul_imm, shl, or, xor,
frameindex, X86WrapperRIP],
[]>;
@@ -855,7 +854,7 @@ def tls32baseaddr : ComplexPattern<i32, 5, "selectTLSADDRAddr",
[tglobaltlsaddr], []>;
def lea64addr : ComplexPattern<i64, 5, "selectLEAAddr",
- [add, sub, mul, X86mul_imm, shl, or, frameindex,
+ [add, sub, mul, X86mul_imm, shl, or, xor, frameindex,
X86WrapperRIP], []>;
def tls64addr : ComplexPattern<i64, 5, "selectTLSADDRAddr",
@@ -875,12 +874,12 @@ def relocImm : ComplexPattern<iAny, 1, "selectRelocImm",
// X86 Instruction Predicate Definitions.
def TruePredicate : Predicate<"true">;
-def HasCMov : Predicate<"Subtarget->hasCMov()">;
-def NoCMov : Predicate<"!Subtarget->hasCMov()">;
+def HasCMOV : Predicate<"Subtarget->canUseCMOV()">;
+def NoCMOV : Predicate<"!Subtarget->canUseCMOV()">;
def HasMMX : Predicate<"Subtarget->hasMMX()">;
-def Has3DNow : Predicate<"Subtarget->has3DNow()">;
-def Has3DNowA : Predicate<"Subtarget->has3DNowA()">;
+def Has3DNow : Predicate<"Subtarget->hasThreeDNow()">;
+def Has3DNowA : Predicate<"Subtarget->hasThreeDNowA()">;
def HasSSE1 : Predicate<"Subtarget->hasSSE1()">;
def UseSSE1 : Predicate<"Subtarget->hasSSE1() && !Subtarget->hasAVX()">;
def HasSSE2 : Predicate<"Subtarget->hasSSE2()">;
@@ -981,8 +980,8 @@ def HasWBNOINVD : Predicate<"Subtarget->hasWBNOINVD()">;
def HasRDPID : Predicate<"Subtarget->hasRDPID()">;
def HasWAITPKG : Predicate<"Subtarget->hasWAITPKG()">;
def HasINVPCID : Predicate<"Subtarget->hasINVPCID()">;
-def HasCmpxchg8b : Predicate<"Subtarget->hasCmpxchg8b()">;
-def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">;
+def HasCX8 : Predicate<"Subtarget->hasCX8()">;
+def HasCX16 : Predicate<"Subtarget->hasCX16()">;
def HasPCONFIG : Predicate<"Subtarget->hasPCONFIG()">;
def HasENQCMD : Predicate<"Subtarget->hasENQCMD()">;
def HasKL : Predicate<"Subtarget->hasKL()">;
@@ -996,25 +995,25 @@ def HasAMXINT8 : Predicate<"Subtarget->hasAMXINT8()">;
def HasUINTR : Predicate<"Subtarget->hasUINTR()">;
def HasCRC32 : Predicate<"Subtarget->hasCRC32()">;
def Not64BitMode : Predicate<"!Subtarget->is64Bit()">,
- AssemblerPredicate<(all_of (not Mode64Bit)), "Not 64-bit mode">;
+ AssemblerPredicate<(all_of (not Is64Bit)), "Not 64-bit mode">;
def In64BitMode : Predicate<"Subtarget->is64Bit()">,
- AssemblerPredicate<(all_of Mode64Bit), "64-bit mode">;
+ AssemblerPredicate<(all_of Is64Bit), "64-bit mode">;
def IsLP64 : Predicate<"Subtarget->isTarget64BitLP64()">;
def NotLP64 : Predicate<"!Subtarget->isTarget64BitLP64()">;
def In16BitMode : Predicate<"Subtarget->is16Bit()">,
- AssemblerPredicate<(all_of Mode16Bit), "16-bit mode">;
+ AssemblerPredicate<(all_of Is16Bit), "16-bit mode">;
def Not16BitMode : Predicate<"!Subtarget->is16Bit()">,
- AssemblerPredicate<(all_of (not Mode16Bit)), "Not 16-bit mode">;
+ AssemblerPredicate<(all_of (not Is16Bit)), "Not 16-bit mode">;
def In32BitMode : Predicate<"Subtarget->is32Bit()">,
- AssemblerPredicate<(all_of Mode32Bit), "32-bit mode">;
+ AssemblerPredicate<(all_of Is32Bit), "32-bit mode">;
def IsWin64 : Predicate<"Subtarget->isTargetWin64()">;
def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">;
def NotWin64WithoutFP : Predicate<"!Subtarget->isTargetWin64() ||"
"Subtarget->getFrameLowering()->hasFP(*MF)"> {
let RecomputePerFunction = 1;
}
-def IsPS4 : Predicate<"Subtarget->isTargetPS4()">;
-def NotPS4 : Predicate<"!Subtarget->isTargetPS4()">;
+def IsPS : Predicate<"Subtarget->isTargetPS()">;
+def NotPS : Predicate<"!Subtarget->isTargetPS()">;
def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">;
def NotNaCl : Predicate<"!Subtarget->isTargetNaCl()">;
def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">;
@@ -2229,13 +2228,13 @@ def CMPXCHG64rm : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in
def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst),
- "cmpxchg8b\t$dst", []>, TB, Requires<[HasCmpxchg8b]>;
+ "cmpxchg8b\t$dst", []>, TB, Requires<[HasCX8]>;
let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in
// NOTE: In64BitMode check needed for the AssemblerPredicate.
def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst),
"cmpxchg16b\t$dst", []>,
- TB, Requires<[HasCmpxchg16b,In64BitMode]>;
+ TB, Requires<[HasCX16,In64BitMode]>;
} // SchedRW, mayLoad, mayStore, hasSideEffects
@@ -2851,7 +2850,7 @@ let SchedRW = [WriteSystem] in {
def TPAUSE : I<0xAE, MRM6r,
(outs), (ins GR32orGR64:$src), "tpause\t$src",
[(set EFLAGS, (X86tpause GR32orGR64:$src, EDX, EAX))]>,
- PD, Requires<[HasWAITPKG]>, NotMemoryFoldable;
+ PD, Requires<[HasWAITPKG]>;
}
} // SchedRW
@@ -2939,7 +2938,7 @@ def : InstAlias<"clzero\t{%rax|rax}", (CLZERO64r)>, Requires<[In64BitMode]>;
let SchedRW = [WriteSystem] in {
let Uses = [EAX, EDX] in
def INVLPGB32 : I<0x01, MRM_FE, (outs), (ins),
- "invlpgb}", []>,
+ "invlpgb", []>,
PS, Requires<[Not64BitMode]>;
let Uses = [RAX, EDX] in
def INVLPGB64 : I<0x01, MRM_FE, (outs), (ins),
@@ -3124,7 +3123,7 @@ def CLFLUSHOPT : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
let Predicates = [HasCLWB], SchedRW = [WriteLoad] in
def CLWB : I<0xAE, MRM6m, (outs), (ins i8mem:$src), "clwb\t$src",
- [(int_x86_clwb addr:$src)]>, PD, NotMemoryFoldable;
+ [(int_x86_clwb addr:$src)]>, PD;
let Predicates = [HasCLDEMOTE], SchedRW = [WriteLoad] in
def CLDEMOTE : I<0x1C, MRM0m, (outs), (ins i8mem:$src), "cldemote\t$src",
diff --git a/llvm/lib/Target/X86/X86InstrMMX.td b/llvm/lib/Target/X86/X86InstrMMX.td
index aeecc25ddea2..4196aff240c4 100644
--- a/llvm/lib/Target/X86/X86InstrMMX.td
+++ b/llvm/lib/Target/X86/X86InstrMMX.td
@@ -211,10 +211,10 @@ def : InstAlias<"movq.s\t{$src, $dst|$dst, $src}",
(MMX_MOVQ64rr_REV VR64:$dst, VR64:$src), 0>;
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
-def MMX_MOVD64from64rm : MMXRI<0x7E, MRMDestMem,
+def MMX_MOVD64from64mr : MMXRI<0x7E, MRMDestMem,
(outs), (ins i64mem:$dst, VR64:$src),
"movq\t{$src, $dst|$dst, $src}", []>,
- Sched<[SchedWriteVecMoveLS.MMX.MR]>;
+ Sched<[SchedWriteVecMoveLS.MMX.MR]>, NotMemoryFoldable;
let SchedRW = [SchedWriteVecMoveLS.MMX.RM] in {
let canFoldAsLoad = 1 in
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 035f139e6f33..06cb280e860a 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -112,6 +112,8 @@ multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
// This is expanded by ExpandPostRAPseudos.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
isPseudo = 1, SchedRW = [WriteZero] in {
+ def FsFLD0SH : I<0, Pseudo, (outs FR16:$dst), (ins), "",
+ [(set FR16:$dst, fp16imm0)]>, Requires<[HasSSE2, NoAVX512]>;
def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "",
[(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1, NoAVX512]>;
def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "",
@@ -3471,9 +3473,9 @@ defm PMAXUB : PDI_binop_all<0xDE, "pmaxub", umax, v16i8, v32i8,
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
defm PMAXSW : PDI_binop_all<0xEE, "pmaxsw", smax, v8i16, v16i16,
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
-defm PAVGB : PDI_binop_all<0xE0, "pavgb", X86avg, v16i8, v32i8,
+defm PAVGB : PDI_binop_all<0xE0, "pavgb", avgceilu, v16i8, v32i8,
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
-defm PAVGW : PDI_binop_all<0xE3, "pavgw", X86avg, v8i16, v16i16,
+defm PAVGW : PDI_binop_all<0xE3, "pavgw", avgceilu, v8i16, v16i16,
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
defm PMULUDQ : PDI_binop_all<0xF4, "pmuludq", X86pmuludq, v2i64, v4i64,
SchedWriteVecIMul, 1, NoVLX>;
@@ -3965,6 +3967,20 @@ defm PINSRW : sse2_pinsrw, PD;
} // ExeDomain = SSEPackedInt
+// Always select FP16 instructions if available.
+let Predicates = [UseSSE2], AddedComplexity = -10 in {
+ def : Pat<(f16 (load addr:$src)), (COPY_TO_REGCLASS (PINSRWrm (v8i16 (IMPLICIT_DEF)), addr:$src, 0), FR16)>;
+ def : Pat<(store f16:$src, addr:$dst), (MOV16mr addr:$dst, (EXTRACT_SUBREG (PEXTRWrr (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0), sub_16bit))>;
+ def : Pat<(i16 (bitconvert f16:$src)), (EXTRACT_SUBREG (PEXTRWrr (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0), sub_16bit)>;
+ def : Pat<(f16 (bitconvert i16:$src)), (COPY_TO_REGCLASS (PINSRWrr (v8i16 (IMPLICIT_DEF)), (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit), 0), FR16)>;
+}
+
+let Predicates = [HasAVX, NoBWI] in {
+ def : Pat<(f16 (load addr:$src)), (COPY_TO_REGCLASS (VPINSRWrm (v8i16 (IMPLICIT_DEF)), addr:$src, 0), FR16)>;
+ def : Pat<(i16 (bitconvert f16:$src)), (EXTRACT_SUBREG (VPEXTRWrr (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0), sub_16bit)>;
+ def : Pat<(f16 (bitconvert i16:$src)), (COPY_TO_REGCLASS (VPINSRWrr (v8i16 (IMPLICIT_DEF)), (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit), 0), FR16)>;
+}
+
//===---------------------------------------------------------------------===//
// SSE2 - Packed Mask Creation
//===---------------------------------------------------------------------===//
@@ -3997,7 +4013,10 @@ def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR128:$src),
//===---------------------------------------------------------------------===//
let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecMoveLS.XMM.MR] in {
-let Uses = [EDI], Predicates = [HasAVX,Not64BitMode] in
+// As VEX does not have separate instruction contexts for address size
+// overrides, VMASKMOVDQU and VMASKMOVDQU64 would have a decode conflict.
+// Prefer VMASKMODDQU64.
+let Uses = [EDI], Predicates = [HasAVX], isAsmParserOnly = 1 in
def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs),
(ins VR128:$src, VR128:$mask),
"maskmovdqu\t{$mask, $src|$src, $mask}",
@@ -4008,32 +4027,16 @@ def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs),
(ins VR128:$src, VR128:$mask),
"maskmovdqu\t{$mask, $src|$src, $mask}",
[(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>,
- VEX, VEX_WIG, AdSize64;
-let Uses = [EDI], Predicates = [HasAVX,In64BitMode] in
-def VMASKMOVDQUX32 : VPDI<0xF7, MRMSrcReg, (outs),
- (ins VR128:$src, VR128:$mask), "",
- [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>,
- VEX, VEX_WIG, AdSize32 {
- let AsmString = "addr32 vmaskmovdqu\t{$mask, $src|$src, $mask}";
- let AsmVariantName = "NonParsable";
-}
+ VEX, VEX_WIG;
-let Uses = [EDI], Predicates = [UseSSE2,Not64BitMode] in
+let Uses = [EDI], Predicates = [UseSSE2] in
def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
"maskmovdqu\t{$mask, $src|$src, $mask}",
[(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>;
let Uses = [RDI], Predicates = [UseSSE2,In64BitMode] in
def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
"maskmovdqu\t{$mask, $src|$src, $mask}",
- [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>,
- AdSize64;
-let Uses = [EDI], Predicates = [UseSSE2,In64BitMode] in
-def MASKMOVDQUX32 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
- "addr32 maskmovdqu\t{$mask, $src|$src, $mask}",
- [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>,
- AdSize32 {
- let AsmVariantName = "NonParsable";
-}
+ [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>;
} // ExeDomain = SSEPackedInt
@@ -5206,6 +5209,12 @@ let Predicates = [HasAVX, NoBWI] in
defm PEXTRW : SS41I_extract16<0x15, "pextrw">;
+let Predicates = [UseSSE41] in
+ def : Pat<(store f16:$src, addr:$dst), (PEXTRWmr addr:$dst, (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0)>;
+
+let Predicates = [HasAVX, NoBWI] in
+ def : Pat<(store f16:$src, addr:$dst), (VPEXTRWmr addr:$dst, (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0)>;
+
/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> {
@@ -7588,6 +7597,21 @@ let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
(VPBROADCASTWYrr (VMOVDI2PDIrr
(i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
GR16:$src, sub_16bit))))>;
+
+ def : Pat<(v8f16 (X86VBroadcastld16 addr:$src)),
+ (VPBROADCASTWrm addr:$src)>;
+ def : Pat<(v16f16 (X86VBroadcastld16 addr:$src)),
+ (VPBROADCASTWYrm addr:$src)>;
+
+ def : Pat<(v8f16 (X86VBroadcast (v8f16 VR128:$src))),
+ (VPBROADCASTWrr VR128:$src)>;
+ def : Pat<(v16f16 (X86VBroadcast (v8f16 VR128:$src))),
+ (VPBROADCASTWYrr VR128:$src)>;
+
+ def : Pat<(v8f16 (X86VBroadcast (f16 FR16:$src))),
+ (VPBROADCASTWrr (COPY_TO_REGCLASS FR16:$src, VR128))>;
+ def : Pat<(v16f16 (X86VBroadcast (f16 FR16:$src))),
+ (VPBROADCASTWYrr (COPY_TO_REGCLASS FR16:$src, VR128))>;
}
let Predicates = [HasAVX2, NoVLX] in {
def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
diff --git a/llvm/lib/Target/X86/X86InstrSystem.td b/llvm/lib/Target/X86/X86InstrSystem.td
index b4dd99d08a62..3a653a56e534 100644
--- a/llvm/lib/Target/X86/X86InstrSystem.td
+++ b/llvm/lib/Target/X86/X86InstrSystem.td
@@ -25,18 +25,18 @@ let mayLoad = 1, mayStore = 0, hasSideEffects = 1, isTrap = 1 in {
def TRAP : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB;
def UD1Wm : I<0xB9, MRMSrcMem, (outs), (ins GR16:$src1, i16mem:$src2),
- "ud1{w} {$src2, $src1|$src1, $src2}", []>, TB, OpSize16;
+ "ud1{w}\t{$src2, $src1|$src1, $src2}", []>, TB, OpSize16;
def UD1Lm : I<0xB9, MRMSrcMem, (outs), (ins GR32:$src1, i32mem:$src2),
- "ud1{l} {$src2, $src1|$src1, $src2}", []>, TB, OpSize32;
+ "ud1{l}\t{$src2, $src1|$src1, $src2}", []>, TB, OpSize32;
def UD1Qm : RI<0xB9, MRMSrcMem, (outs), (ins GR64:$src1, i64mem:$src2),
- "ud1{q} {$src2, $src1|$src1, $src2}", []>, TB;
+ "ud1{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
def UD1Wr : I<0xB9, MRMSrcReg, (outs), (ins GR16:$src1, GR16:$src2),
- "ud1{w} {$src2, $src1|$src1, $src2}", []>, TB, OpSize16;
+ "ud1{w}\t{$src2, $src1|$src1, $src2}", []>, TB, OpSize16;
def UD1Lr : I<0xB9, MRMSrcReg, (outs), (ins GR32:$src1, GR32:$src2),
- "ud1{l} {$src2, $src1|$src1, $src2}", []>, TB, OpSize32;
+ "ud1{l}\t{$src2, $src1|$src1, $src2}", []>, TB, OpSize32;
def UD1Qr : RI<0xB9, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2),
- "ud1{q} {$src2, $src1|$src1, $src2}", []>, TB;
+ "ud1{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
}
let isTerminator = 1 in
@@ -71,9 +71,9 @@ def SYSEXIT64 :RI<0x35, RawFrm, (outs), (ins), "sysexitq", []>, TB,
} // SchedRW
def : Pat<(debugtrap),
- (INT3)>, Requires<[NotPS4]>;
+ (INT3)>, Requires<[NotPS]>;
def : Pat<(debugtrap),
- (INT (i8 0x41))>, Requires<[IsPS4]>;
+ (INT (i8 0x41))>, Requires<[IsPS]>;
//===----------------------------------------------------------------------===//
// Input/Output Instructions.
diff --git a/llvm/lib/Target/X86/X86InstrTSX.td b/llvm/lib/Target/X86/X86InstrTSX.td
index 28563eeb4484..7671eb4676ee 100644
--- a/llvm/lib/Target/X86/X86InstrTSX.td
+++ b/llvm/lib/Target/X86/X86InstrTSX.td
@@ -51,6 +51,8 @@ def XABORT : Ii8<0xc6, MRM_F8, (outs), (ins i8imm:$imm),
// HLE prefixes
let SchedRW = [WriteSystem] in {
+// XACQUIRE and XRELEASE reuse REPNE and REP respectively.
+// For now, just prefer the REP versions.
let isAsmParserOnly = 1 in {
def XACQUIRE_PREFIX : I<0xF2, PrefixByte, (outs), (ins), "xacquire", []>;
def XRELEASE_PREFIX : I<0xF3, PrefixByte, (outs), (ins), "xrelease", []>;
diff --git a/llvm/lib/Target/X86/X86InstrVecCompiler.td b/llvm/lib/Target/X86/X86InstrVecCompiler.td
index 2429aa113fb1..e6ecbb652100 100644
--- a/llvm/lib/Target/X86/X86InstrVecCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrVecCompiler.td
@@ -17,6 +17,8 @@
let Predicates = [NoAVX512] in {
// A vector extract of the first f32/f64 position is a subregister copy
+ def : Pat<(f16 (extractelt (v8f16 VR128:$src), (iPTR 0))),
+ (COPY_TO_REGCLASS (v8f16 VR128:$src), FR16)>;
def : Pat<(f32 (extractelt (v4f32 VR128:$src), (iPTR 0))),
(COPY_TO_REGCLASS (v4f32 VR128:$src), FR32)>;
def : Pat<(f64 (extractelt (v2f64 VR128:$src), (iPTR 0))),
@@ -34,8 +36,8 @@ let Predicates = [HasAVX512] in {
}
let Predicates = [NoVLX] in {
- def : Pat<(v8f16 (scalar_to_vector FR16X:$src)),
- (COPY_TO_REGCLASS FR16X:$src, VR128)>;
+ def : Pat<(v8f16 (scalar_to_vector FR16:$src)),
+ (COPY_TO_REGCLASS FR16:$src, VR128)>;
// Implicitly promote a 32-bit scalar to a vector.
def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
(COPY_TO_REGCLASS FR32:$src, VR128)>;
diff --git a/llvm/lib/Target/X86/X86InstrXOP.td b/llvm/lib/Target/X86/X86InstrXOP.td
index a5976b7d2d74..d89e481f4522 100644
--- a/llvm/lib/Target/X86/X86InstrXOP.td
+++ b/llvm/lib/Target/X86/X86InstrXOP.td
@@ -13,11 +13,11 @@
multiclass xop2op<bits<8> opc, string OpcodeStr, Intrinsic Int> {
def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (Int VR128:$src))]>, XOP, Sched<[SchedWritePHAdd.XMM]>;
+ [(set VR128:$dst, (Int VR128:$src))]>, XOP, Sched<[SchedWriteVecALU.XMM]>;
def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (Int (load addr:$src)))]>, XOP,
- Sched<[SchedWritePHAdd.XMM.Folded, SchedWritePHAdd.XMM.ReadAfterFold]>;
+ Sched<[SchedWriteVecALU.XMM.Folded, SchedWriteVecALU.XMM.ReadAfterFold]>;
}
let ExeDomain = SSEPackedInt in {
diff --git a/llvm/lib/Target/X86/X86InstructionSelector.cpp b/llvm/lib/Target/X86/X86InstructionSelector.cpp
index 28d57ca9ae3c..ff701159b95e 100644
--- a/llvm/lib/Target/X86/X86InstructionSelector.cpp
+++ b/llvm/lib/Target/X86/X86InstructionSelector.cpp
@@ -21,7 +21,6 @@
#include "X86TargetMachine.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
@@ -31,6 +30,7 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterBank.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/DataLayout.h"
@@ -179,6 +179,8 @@ X86InstructionSelector::getRegClass(LLT Ty, const RegisterBank &RB) const {
return &X86::GR64RegClass;
}
if (RB.getID() == X86::VECRRegBankID) {
+ if (Ty.getSizeInBits() == 16)
+ return STI.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass;
if (Ty.getSizeInBits() == 32)
return STI.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
if (Ty.getSizeInBits() == 64)
@@ -516,7 +518,7 @@ bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I,
// is already on the instruction we're mutating, and thus we don't need to
// make any changes. So long as we select an opcode which is capable of
// loading or storing the appropriate size atomically, the rest of the
- // backend is required to respect the MMO state.
+ // backend is required to respect the MMO state.
if (!MemOp.isUnordered()) {
LLVM_DEBUG(dbgs() << "Atomic ordering not supported yet\n");
return false;
@@ -537,12 +539,12 @@ bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I,
I.setDesc(TII.get(NewOpc));
MachineInstrBuilder MIB(MF, I);
if (Opc == TargetOpcode::G_LOAD) {
- I.RemoveOperand(1);
+ I.removeOperand(1);
addFullAddress(MIB, AM);
} else {
// G_STORE (VAL, Addr), X86Store instruction (Addr, VAL)
- I.RemoveOperand(1);
- I.RemoveOperand(0);
+ I.removeOperand(1);
+ I.removeOperand(0);
addFullAddress(MIB, AM).addUse(DefReg);
}
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
@@ -625,7 +627,7 @@ bool X86InstructionSelector::selectGlobalValue(MachineInstr &I,
I.setDesc(TII.get(NewOpc));
MachineInstrBuilder MIB(MF, I);
- I.RemoveOperand(1);
+ I.removeOperand(1);
addFullAddress(MIB, AM);
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
@@ -1412,7 +1414,7 @@ bool X86InstructionSelector::materializeFP(MachineInstr &I,
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad,
- MF.getDataLayout().getPointerSize(), Alignment);
+ LLT::pointer(0, MF.getDataLayout().getPointerSizeInBits()), Alignment);
LoadInst =
addDirectMem(BuildMI(*I.getParent(), I, DbgLoc, TII.get(Opc), DstReg),
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 1edec96bbec3..3c8be95b43e3 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -371,8 +371,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(avx2_packuswb, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
- X86_INTRINSIC_DATA(avx2_pavg_b, INTR_TYPE_2OP, X86ISD::AVG, 0),
- X86_INTRINSIC_DATA(avx2_pavg_w, INTR_TYPE_2OP, X86ISD::AVG, 0),
+ X86_INTRINSIC_DATA(avx2_pavg_b, INTR_TYPE_2OP, ISD::AVGCEILU, 0),
+ X86_INTRINSIC_DATA(avx2_pavg_w, INTR_TYPE_2OP, ISD::AVGCEILU, 0),
X86_INTRINSIC_DATA(avx2_pblendvb, BLENDV, X86ISD::BLENDV, 0),
X86_INTRINSIC_DATA(avx2_permd, VPERM_2OP, X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx2_permps, VPERM_2OP, X86ISD::VPERMV, 0),
@@ -818,8 +818,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_packsswb_512, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx512_packusdw_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(avx512_packuswb_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
- X86_INTRINSIC_DATA(avx512_pavg_b_512, INTR_TYPE_2OP, X86ISD::AVG, 0),
- X86_INTRINSIC_DATA(avx512_pavg_w_512, INTR_TYPE_2OP, X86ISD::AVG, 0),
+ X86_INTRINSIC_DATA(avx512_pavg_b_512, INTR_TYPE_2OP, ISD::AVGCEILU, 0),
+ X86_INTRINSIC_DATA(avx512_pavg_w_512, INTR_TYPE_2OP, ISD::AVGCEILU, 0),
X86_INTRINSIC_DATA(avx512_permvar_df_256, VPERM_2OP, X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx512_permvar_df_512, VPERM_2OP, X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx512_permvar_di_256, VPERM_2OP, X86ISD::VPERMV, 0),
@@ -1281,8 +1281,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse2_packssdw_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(sse2_packsswb_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(sse2_packuswb_128, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
- X86_INTRINSIC_DATA(sse2_pavg_b, INTR_TYPE_2OP, X86ISD::AVG, 0),
- X86_INTRINSIC_DATA(sse2_pavg_w, INTR_TYPE_2OP, X86ISD::AVG, 0),
+ X86_INTRINSIC_DATA(sse2_pavg_b, INTR_TYPE_2OP, ISD::AVGCEILU, 0),
+ X86_INTRINSIC_DATA(sse2_pavg_w, INTR_TYPE_2OP, ISD::AVGCEILU, 0),
X86_INTRINSIC_DATA(sse2_pmadd_wd, INTR_TYPE_2OP, X86ISD::VPMADDWD, 0),
X86_INTRINSIC_DATA(sse2_pmovmskb_128, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
X86_INTRINSIC_DATA(sse2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0),
diff --git a/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp b/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp
index 4710e524931c..23976fb1a142 100644
--- a/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp
+++ b/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp
@@ -558,7 +558,7 @@ int X86LoadValueInjectionLoadHardeningPass::elimMitigatedEdgesAndNodes(
}
// Find and eliminate gadget edges that have been mitigated.
- int MitigatedGadgets = 0, RemainingGadgets = 0;
+ int RemainingGadgets = 0;
NodeSet ReachableNodes{G};
for (const Node &RootN : G.nodes()) {
if (llvm::none_of(RootN.edges(), MachineGadgetGraph::isGadgetEdge))
@@ -586,7 +586,6 @@ int X86LoadValueInjectionLoadHardeningPass::elimMitigatedEdgesAndNodes(
// This gadget's sink is reachable
++RemainingGadgets;
} else { // This gadget's sink is unreachable, and therefore mitigated
- ++MitigatedGadgets;
ElimEdges.insert(E);
}
}
diff --git a/llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp b/llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp
index 6b564a0356a6..70964b352b8c 100644
--- a/llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp
+++ b/llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp
@@ -20,6 +20,7 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
diff --git a/llvm/lib/Target/X86/X86LowerAMXType.cpp b/llvm/lib/Target/X86/X86LowerAMXType.cpp
index 6206d8efb3d0..540182cb7911 100644
--- a/llvm/lib/Target/X86/X86LowerAMXType.cpp
+++ b/llvm/lib/Target/X86/X86LowerAMXType.cpp
@@ -74,6 +74,24 @@ static bool isAMXCast(Instruction *II) {
match(II, m_Intrinsic<Intrinsic::x86_cast_tile_to_vector>(m_Value()));
}
+static bool isAMXIntrinsic(Value *I) {
+ auto *II = dyn_cast<IntrinsicInst>(I);
+ if (!II)
+ return false;
+ if (isAMXCast(II))
+ return false;
+ // Check if return type or parameter is x86_amx. If it is x86_amx
+ // the intrinsic must be x86 amx intrinsics.
+ if (II->getType()->isX86_AMXTy())
+ return true;
+ for (Value *V : II->args()) {
+ if (V->getType()->isX86_AMXTy())
+ return true;
+ }
+
+ return false;
+}
+
static AllocaInst *createAllocaInstAtEntry(IRBuilder<> &Builder, BasicBlock *BB,
Type *Ty) {
Function &F = *BB->getParent();
@@ -162,6 +180,36 @@ static std::pair<Value *, Value *> getShape(IntrinsicInst *II, unsigned OpNo) {
return std::make_pair(Row, Col);
}
+static std::pair<Value *, Value *> getShape(PHINode *Phi) {
+ Use &U = *(Phi->use_begin());
+ unsigned OpNo = U.getOperandNo();
+ User *V = U.getUser();
+ // TODO We don't traverse all users. To make the algorithm simple, here we
+ // just traverse the first user. If we can find shape, then return the shape,
+ // otherwise just return nullptr and the optimization for undef/zero will be
+ // abandoned.
+ while (V) {
+ if (isAMXCast(dyn_cast<Instruction>(V))) {
+ if (V->use_empty())
+ break;
+ Use &U = *(V->use_begin());
+ OpNo = U.getOperandNo();
+ V = U.getUser();
+ } else if (isAMXIntrinsic(V)) {
+ return getShape(cast<IntrinsicInst>(V), OpNo);
+ } else if (isa<PHINode>(V)) {
+ if (V->use_empty())
+ break;
+ Use &U = *(V->use_begin());
+ V = U.getUser();
+ } else {
+ break;
+ }
+ }
+
+ return std::make_pair(nullptr, nullptr);
+}
+
namespace {
class X86LowerAMXType {
Function &Func;
@@ -655,6 +703,9 @@ class X86LowerAMXCast {
public:
X86LowerAMXCast(Function &F) : Func(F) {}
+ void combineCastStore(IntrinsicInst *Cast, StoreInst *ST);
+ void combineLoadCast(IntrinsicInst *Cast, LoadInst *LD);
+ bool combineLdSt(SmallVectorImpl<Instruction *> &Casts);
bool combineAMXcast(TargetLibraryInfo *TLI);
bool transformAMXCast(IntrinsicInst *AMXCast);
bool transformAllAMXCast();
@@ -720,11 +771,33 @@ bool X86LowerAMXCast::optimizeAMXCastFromPhi(
OldPhiNodes.insert(PN);
while (!PhiWorklist.empty()) {
auto *OldPN = PhiWorklist.pop_back_val();
- for (Value *IncValue : OldPN->incoming_values()) {
+ for (unsigned I = 0; I < OldPN->getNumOperands(); ++I) {
+ Value *IncValue = OldPN->getIncomingValue(I);
// TODO: currently, We ignore cases where it is a const. In the future, we
// might support const.
- if (isa<Constant>(IncValue))
- return false;
+ if (isa<Constant>(IncValue)) {
+ auto *IncConst = dyn_cast<Constant>(IncValue);
+ if (!isa<UndefValue>(IncValue) && !IncConst->isZeroValue())
+ return false;
+ Value *Row = nullptr, *Col = nullptr;
+ std::tie(Row, Col) = getShape(OldPN);
+ // TODO: If it is not constant the Row and Col must domoniate tilezero
+ // that we are going to create.
+ if (!Row || !Col || !isa<Constant>(Row) || !isa<Constant>(Col))
+ return false;
+ // Create tilezero at the end of incoming block.
+ auto *Block = OldPN->getIncomingBlock(I);
+ BasicBlock::iterator Iter = Block->getTerminator()->getIterator();
+ Instruction *NewInst = Builder.CreateIntrinsic(
+ Intrinsic::x86_tilezero_internal, None, {Row, Col});
+ NewInst->moveBefore(&*Iter);
+ NewInst = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector,
+ {IncValue->getType()}, {NewInst});
+ NewInst->moveBefore(&*Iter);
+ // Replace InValue with new Value.
+ OldPN->setIncomingValue(I, NewInst);
+ IncValue = NewInst;
+ }
if (auto *PNode = dyn_cast<PHINode>(IncValue)) {
if (OldPhiNodes.insert(PNode))
@@ -838,6 +911,99 @@ bool X86LowerAMXCast::optimizeAMXCastFromPhi(
return true;
}
+// %43 = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %42)
+// store <256 x i32> %43, <256 x i32>* %p, align 64
+// -->
+// call void @llvm.x86.tilestored64.internal(i16 %row, i16 %col, i8* %p,
+// i64 64, x86_amx %42)
+void X86LowerAMXCast::combineCastStore(IntrinsicInst *Cast, StoreInst *ST) {
+ Value *Tile = Cast->getOperand(0);
+ // TODO: If it is cast intrinsic or phi node, we can propagate the
+ // shape information through def-use chain.
+ if (!isAMXIntrinsic(Tile))
+ return;
+ auto *II = cast<IntrinsicInst>(Tile);
+ // Tile is output from AMX intrinsic. The first operand of the
+ // intrinsic is row, the second operand of the intrinsic is column.
+ Value *Row = II->getOperand(0);
+ Value *Col = II->getOperand(1);
+ IRBuilder<> Builder(ST);
+ // Use the maximum column as stride. It must be the same with load
+ // stride.
+ Value *Stride = Builder.getInt64(64);
+ Value *I8Ptr =
+ Builder.CreateBitCast(ST->getOperand(1), Builder.getInt8PtrTy());
+ std::array<Value *, 5> Args = {Row, Col, I8Ptr, Stride, Tile};
+ Builder.CreateIntrinsic(Intrinsic::x86_tilestored64_internal, None, Args);
+}
+
+// %65 = load <256 x i32>, <256 x i32>* %p, align 64
+// %66 = call x86_amx @llvm.x86.cast.vector.to.tile(<256 x i32> %65)
+// -->
+// %66 = call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 %col,
+// i8* %p, i64 64)
+void X86LowerAMXCast::combineLoadCast(IntrinsicInst *Cast, LoadInst *LD) {
+ Value *Row = nullptr, *Col = nullptr;
+ Use &U = *(Cast->use_begin());
+ unsigned OpNo = U.getOperandNo();
+ auto *II = cast<IntrinsicInst>(U.getUser());
+ // TODO: If it is cast intrinsic or phi node, we can propagate the
+ // shape information through def-use chain.
+ if (!isAMXIntrinsic(II))
+ return;
+ std::tie(Row, Col) = getShape(II, OpNo);
+ IRBuilder<> Builder(LD);
+ // Use the maximun column as stride.
+ Value *Stride = Builder.getInt64(64);
+ Value *I8Ptr =
+ Builder.CreateBitCast(LD->getOperand(0), Builder.getInt8PtrTy());
+ std::array<Value *, 4> Args = {Row, Col, I8Ptr, Stride};
+
+ Value *NewInst =
+ Builder.CreateIntrinsic(Intrinsic::x86_tileloadd64_internal, None, Args);
+ Cast->replaceAllUsesWith(NewInst);
+}
+
+bool X86LowerAMXCast::combineLdSt(SmallVectorImpl<Instruction *> &Casts) {
+ bool Change = false;
+ for (auto *Cast : Casts) {
+ auto *II = cast<IntrinsicInst>(Cast);
+ // %43 = call <256 x i32> @llvm.x86.cast.tile.to.vector(x86_amx %42)
+ // store <256 x i32> %43, <256 x i32>* %p, align 64
+ // -->
+ // call void @llvm.x86.tilestored64.internal(i16 %row, i16 %col, i8* %p,
+ // i64 64, x86_amx %42)
+ if (II->getIntrinsicID() == Intrinsic::x86_cast_tile_to_vector) {
+ SmallVector<Instruction *, 2> DeadStores;
+ for (User *U : Cast->users()) {
+ StoreInst *Store = dyn_cast<StoreInst>(U);
+ if (!Store)
+ continue;
+ combineCastStore(cast<IntrinsicInst>(Cast), Store);
+ DeadStores.push_back(Store);
+ Change = true;
+ }
+ for (auto *Store : DeadStores)
+ Store->eraseFromParent();
+ } else { // x86_cast_vector_to_tile
+ SmallVector<Instruction *, 2> DeadLoads;
+ auto *Load = dyn_cast<LoadInst>(Cast->getOperand(0));
+ if (!Load || !Load->hasOneUse())
+ continue;
+ // %65 = load <256 x i32>, <256 x i32>* %p, align 64
+ // %66 = call x86_amx @llvm.x86.cast.vector.to.tile(<256 x i32> %65)
+ // -->
+ // %66 = call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 %col,
+ // i8* %p, i64 64)
+ combineLoadCast(cast<IntrinsicInst>(Cast), Load);
+ // Set the operand is null so that load instruction can be erased.
+ Cast->setOperand(0, nullptr);
+ Load->eraseFromParent();
+ }
+ }
+ return Change;
+}
+
bool X86LowerAMXCast::combineAMXcast(TargetLibraryInfo *TLI) {
bool Change = false;
// Collect tile cast instruction.
@@ -879,17 +1045,22 @@ bool X86LowerAMXCast::combineAMXcast(TargetLibraryInfo *TLI) {
Convert(Vec2TileInsts, Intrinsic::x86_cast_tile_to_vector);
Convert(Tile2VecInsts, Intrinsic::x86_cast_vector_to_tile);
+ SmallVector<Instruction *, 8> LiveCasts;
auto EraseInst = [&](SmallVectorImpl<Instruction *> &Insts) {
for (auto *Inst : Insts) {
if (Inst->use_empty()) {
Inst->eraseFromParent();
Change = true;
+ } else {
+ LiveCasts.push_back(Inst);
}
}
};
EraseInst(Vec2TileInsts);
EraseInst(Tile2VecInsts);
+ Change |= combineLdSt(LiveCasts);
+ EraseInst(LiveCasts);
// Handle the A->B->A cast, and there is an intervening PHI node.
for (BasicBlock &BB : Func) {
@@ -947,6 +1118,10 @@ bool X86LowerAMXCast::transformAMXCast(IntrinsicInst *AMXCast) {
// i64 60)
// call void @llvm.x86.tilestored64.internal(i16 15, i16 60,
// i8* %addr3, i64 60, x86_amx %2)
+ if (AMXCast->use_empty()) {
+ AMXCast->eraseFromParent();
+ return true;
+ }
Use &U = *(AMXCast->use_begin());
unsigned OpNo = U.getOperandNo();
auto *II = dyn_cast<IntrinsicInst>(U.getUser());
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index 9044f10ec630..b107de692365 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -501,7 +501,7 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
for (const MachineOperand &MO : MI->operands())
if (auto MaybeMCOp = LowerMachineOperand(MI, MO))
- OutMI.addOperand(MaybeMCOp.getValue());
+ OutMI.addOperand(*MaybeMCOp);
// Handle a few special cases to eliminate operand modifiers.
switch (OutMI.getOpcode()) {
@@ -962,6 +962,12 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
// These are not truly commutable so hide them from the default case.
break;
+ case X86::MASKMOVDQU:
+ case X86::VMASKMOVDQU:
+ if (AsmPrinter.getSubtarget().is64Bit())
+ OutMI.setFlags(X86::IP_HAS_AD_SIZE);
+ break;
+
default: {
// If the instruction is a commutable arithmetic instruction we might be
// able to commute the operands to get a 2 byte VEX prefix.
@@ -1311,7 +1317,7 @@ void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI,
E = FaultingMI.operands_end();
I != E; ++I)
if (auto MaybeOperand = MCIL.LowerMachineOperand(&FaultingMI, *I))
- MI.addOperand(MaybeOperand.getValue());
+ MI.addOperand(*MaybeOperand);
OutStreamer->AddComment("on-fault: " + HandlerLabel->getName());
OutStreamer->emitInstruction(MI, getSubtargetInfo());
@@ -1347,11 +1353,12 @@ void X86AsmPrinter::LowerASAN_CHECK_MEMACCESS(const MachineInstr &MI) {
AccessInfo.CompileKernel, &ShadowBase,
&MappingScale, &OrShadowOffset);
- std::string Name = AccessInfo.IsWrite ? "store" : "load";
- std::string Op = OrShadowOffset ? "or" : "add";
- std::string SymName = "__asan_check_" + Name + "_" + Op + "_" +
- utostr(1ULL << AccessInfo.AccessSizeIndex) + "_" +
- TM.getMCRegisterInfo()->getName(Reg.asMCReg());
+ StringRef Name = AccessInfo.IsWrite ? "store" : "load";
+ StringRef Op = OrShadowOffset ? "or" : "add";
+ std::string SymName = ("__asan_check_" + Name + "_" + Op + "_" +
+ Twine(1ULL << AccessInfo.AccessSizeIndex) + "_" +
+ TM.getMCRegisterInfo()->getName(Reg.asMCReg()))
+ .str();
if (OrShadowOffset)
report_fatal_error(
"OrShadowOffset is not supported with optimized callbacks");
@@ -1375,7 +1382,7 @@ void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
MCI.setOpcode(Opcode);
for (auto &MO : drop_begin(MI.operands(), 2))
if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
- MCI.addOperand(MaybeOperand.getValue());
+ MCI.addOperand(*MaybeOperand);
SmallString<256> Code;
SmallVector<MCFixup, 4> Fixups;
@@ -1751,7 +1758,7 @@ void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI,
Ret.setOpcode(OpCode);
for (auto &MO : drop_begin(MI.operands()))
if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
- Ret.addOperand(MaybeOperand.getValue());
+ Ret.addOperand(*MaybeOperand);
OutStreamer->emitInstruction(Ret, getSubtargetInfo());
emitX86Nops(*OutStreamer, 10, Subtarget);
recordSled(CurSled, MI, SledKind::FUNCTION_EXIT, 2);
@@ -1790,7 +1797,7 @@ void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI,
OutStreamer->AddComment("TAILCALL");
for (auto &MO : drop_begin(MI.operands()))
if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
- TC.addOperand(MaybeOperand.getValue());
+ TC.addOperand(*MaybeOperand);
OutStreamer->emitInstruction(TC, getSubtargetInfo());
}
@@ -1985,34 +1992,34 @@ void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) {
// Otherwise, use the .seh_ directives for all other Windows platforms.
switch (MI->getOpcode()) {
case X86::SEH_PushReg:
- OutStreamer->EmitWinCFIPushReg(MI->getOperand(0).getImm());
+ OutStreamer->emitWinCFIPushReg(MI->getOperand(0).getImm());
break;
case X86::SEH_SaveReg:
- OutStreamer->EmitWinCFISaveReg(MI->getOperand(0).getImm(),
+ OutStreamer->emitWinCFISaveReg(MI->getOperand(0).getImm(),
MI->getOperand(1).getImm());
break;
case X86::SEH_SaveXMM:
- OutStreamer->EmitWinCFISaveXMM(MI->getOperand(0).getImm(),
+ OutStreamer->emitWinCFISaveXMM(MI->getOperand(0).getImm(),
MI->getOperand(1).getImm());
break;
case X86::SEH_StackAlloc:
- OutStreamer->EmitWinCFIAllocStack(MI->getOperand(0).getImm());
+ OutStreamer->emitWinCFIAllocStack(MI->getOperand(0).getImm());
break;
case X86::SEH_SetFrame:
- OutStreamer->EmitWinCFISetFrame(MI->getOperand(0).getImm(),
+ OutStreamer->emitWinCFISetFrame(MI->getOperand(0).getImm(),
MI->getOperand(1).getImm());
break;
case X86::SEH_PushFrame:
- OutStreamer->EmitWinCFIPushFrame(MI->getOperand(0).getImm());
+ OutStreamer->emitWinCFIPushFrame(MI->getOperand(0).getImm());
break;
case X86::SEH_EndPrologue:
- OutStreamer->EmitWinCFIEndProlog();
+ OutStreamer->emitWinCFIEndProlog();
break;
default:
diff --git a/llvm/lib/Target/X86/X86MachineFunctionInfo.cpp b/llvm/lib/Target/X86/X86MachineFunctionInfo.cpp
index 05f846bfb219..2e88e01ce7fd 100644
--- a/llvm/lib/Target/X86/X86MachineFunctionInfo.cpp
+++ b/llvm/lib/Target/X86/X86MachineFunctionInfo.cpp
@@ -13,6 +13,13 @@
using namespace llvm;
+MachineFunctionInfo *X86MachineFunctionInfo::clone(
+ BumpPtrAllocator &Allocator, MachineFunction &DestMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
+ const {
+ return DestMF.cloneInfo<X86MachineFunctionInfo>(*this);
+}
+
void X86MachineFunctionInfo::anchor() { }
void X86MachineFunctionInfo::setRestoreBasePointer(const MachineFunction *MF) {
diff --git a/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/llvm/lib/Target/X86/X86MachineFunctionInfo.h
index 99d1a97380dd..99cc9f525b2c 100644
--- a/llvm/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/llvm/lib/Target/X86/X86MachineFunctionInfo.h
@@ -119,7 +119,9 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
Optional<int> SwiftAsyncContextFrameIdx;
- ValueMap<const Value *, size_t> PreallocatedIds;
+ // Preallocated fields are only used during isel.
+ // FIXME: Can we find somewhere else to store these?
+ DenseMap<const Value *, size_t> PreallocatedIds;
SmallVector<size_t, 0> PreallocatedStackSizes;
SmallVector<SmallVector<size_t, 4>, 0> PreallocatedArgOffsets;
@@ -132,6 +134,12 @@ public:
X86MachineFunctionInfo() = default;
explicit X86MachineFunctionInfo(MachineFunction &MF) {}
+ explicit X86MachineFunctionInfo(const X86MachineFunctionInfo &) = default;
+
+ MachineFunctionInfo *
+ clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
+ const override;
bool getForceFramePointer() const { return ForceFramePointer;}
void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; }
diff --git a/llvm/lib/Target/X86/X86MacroFusion.cpp b/llvm/lib/Target/X86/X86MacroFusion.cpp
index 425054cfdd92..aa6e8645e092 100644
--- a/llvm/lib/Target/X86/X86MacroFusion.cpp
+++ b/llvm/lib/Target/X86/X86MacroFusion.cpp
@@ -15,6 +15,7 @@
#include "MCTargetDesc/X86BaseInfo.h"
#include "X86Subtarget.h"
#include "llvm/CodeGen/MacroFusion.h"
+#include "llvm/CodeGen/ScheduleDAGMutation.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
using namespace llvm;
diff --git a/llvm/lib/Target/X86/X86PadShortFunction.cpp b/llvm/lib/Target/X86/X86PadShortFunction.cpp
index e92b1b002bb0..bb59cee8badb 100644
--- a/llvm/lib/Target/X86/X86PadShortFunction.cpp
+++ b/llvm/lib/Target/X86/X86PadShortFunction.cpp
@@ -37,21 +37,20 @@ STATISTIC(NumBBsPadded, "Number of basic blocks padded");
namespace {
struct VisitedBBInfo {
// HasReturn - Whether the BB contains a return instruction
- bool HasReturn;
+ bool HasReturn = false;
// Cycles - Number of cycles until return if HasReturn is true, otherwise
// number of cycles until end of the BB
- unsigned int Cycles;
+ unsigned int Cycles = 0;
- VisitedBBInfo() : HasReturn(false), Cycles(0) {}
+ VisitedBBInfo() = default;
VisitedBBInfo(bool HasReturn, unsigned int Cycles)
: HasReturn(HasReturn), Cycles(Cycles) {}
};
struct PadShortFunc : public MachineFunctionPass {
static char ID;
- PadShortFunc() : MachineFunctionPass(ID)
- , Threshold(4) {}
+ PadShortFunc() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -82,7 +81,7 @@ namespace {
MachineBasicBlock::iterator &MBBI,
unsigned int NOOPsToAdd);
- const unsigned int Threshold;
+ const unsigned int Threshold = 4;
// ReturnBBs - Maps basic blocks that return to the minimum number of
// cycles until the return, starting from the entry block.
diff --git a/llvm/lib/Target/X86/X86PartialReduction.cpp b/llvm/lib/Target/X86/X86PartialReduction.cpp
index 4342ac089cae..7761f7323358 100644
--- a/llvm/lib/Target/X86/X86PartialReduction.cpp
+++ b/llvm/lib/Target/X86/X86PartialReduction.cpp
@@ -19,8 +19,10 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsX86.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/Pass.h"
#include "llvm/Support/KnownBits.h"
@@ -220,16 +222,21 @@ bool X86PartialReduction::trySADReplacement(Instruction *Op) {
if (!cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(32))
return false;
- // Operand should be a select.
- auto *SI = dyn_cast<SelectInst>(Op);
- if (!SI)
- return false;
-
- // Select needs to implement absolute value.
- Value *LHS, *RHS;
- auto SPR = matchSelectPattern(SI, LHS, RHS);
- if (SPR.Flavor != SPF_ABS)
- return false;
+ Value *LHS;
+ if (match(Op, PatternMatch::m_Intrinsic<Intrinsic::abs>())) {
+ LHS = Op->getOperand(0);
+ } else {
+ // Operand should be a select.
+ auto *SI = dyn_cast<SelectInst>(Op);
+ if (!SI)
+ return false;
+
+ Value *RHS;
+ // Select needs to implement absolute value.
+ auto SPR = matchSelectPattern(SI, LHS, RHS);
+ if (SPR.Flavor != SPF_ABS)
+ return false;
+ }
// Need a subtract of two values.
auto *Sub = dyn_cast<BinaryOperator>(LHS);
@@ -253,7 +260,7 @@ bool X86PartialReduction::trySADReplacement(Instruction *Op) {
if (!Op0 || !Op1)
return false;
- IRBuilder<> Builder(SI);
+ IRBuilder<> Builder(Op);
auto *OpTy = cast<FixedVectorType>(Op->getType());
unsigned NumElts = OpTy->getNumElements();
@@ -271,7 +278,7 @@ bool X86PartialReduction::trySADReplacement(Instruction *Op) {
IntrinsicNumElts = 16;
}
- Function *PSADBWFn = Intrinsic::getDeclaration(SI->getModule(), IID);
+ Function *PSADBWFn = Intrinsic::getDeclaration(Op->getModule(), IID);
if (NumElts < 16) {
// Pad input with zeroes.
@@ -336,8 +343,8 @@ bool X86PartialReduction::trySADReplacement(Instruction *Op) {
Ops[0] = Builder.CreateShuffleVector(Ops[0], Zero, ConcatMask);
}
- SI->replaceAllUsesWith(Ops[0]);
- SI->eraseFromParent();
+ Op->replaceAllUsesWith(Ops[0]);
+ Op->eraseFromParent();
return true;
}
diff --git a/llvm/lib/Target/X86/X86PreAMXConfig.cpp b/llvm/lib/Target/X86/X86PreAMXConfig.cpp
index d9c6d08ada73..cd0d448238a6 100644
--- a/llvm/lib/Target/X86/X86PreAMXConfig.cpp
+++ b/llvm/lib/Target/X86/X86PreAMXConfig.cpp
@@ -91,16 +91,17 @@ static bool brokenVolatile(Instruction *I) {
namespace {
class X86PreAMXConfig {
+ using PosAndShapesMap = MapVector<Instruction *, SmallVector<Value *, 8>>;
+
Function &F;
public:
X86PreAMXConfig(Function &Func) : F(Func) {}
bool preTileConfig();
- bool addTileConfig(Instruction *ModelStart, SmallVector<Value *, 8> &Shapes);
- bool findConfigShapes(
- DenseMap<Instruction *, SmallVector<Value *, 8>> &PosAndShapes);
+ void addTileConfig(Instruction *ModelStart, SmallVector<Value *, 8> &Shapes);
+ bool findConfigShapes(PosAndShapesMap &PosAndShapes);
bool getKeyAMXShapes(IntrinsicInst *KeyAMX, SmallVector<Value *, 8> &Shapes);
- bool preWriteTileCfg(Value *I8Ptr, Instruction *Pos,
+ void preWriteTileCfg(Value *I8Ptr, IRBuilderBase &Builder,
SmallVector<Value *, 8> &Shapes);
BasicBlock::iterator
getShapesAndConfigPosEnd(BasicBlock::iterator Iter,
@@ -149,10 +150,9 @@ public:
// %td = tail call x86_amx @llvm.x86.tdpbssd.internal(m, n, k, t1, t2, t3)
// call void @llvm.x86.tilestored64.internal(... td) area
// --------------------------------------------------------------------------
-bool X86PreAMXConfig::preWriteTileCfg(Value *I8Ptr, Instruction *Pos,
+void X86PreAMXConfig::preWriteTileCfg(Value *I8Ptr, IRBuilderBase &Builder,
SmallVector<Value *, 8> &Shapes) {
- bool Write = false;
- LLVMContext &Ctx = Pos->getParent()->getContext();
+ LLVMContext &Ctx = Builder.getContext();
Type *I8Ty = Type::getInt8Ty(Ctx);
Type *I16Ty = Type::getInt16Ty(Ctx);
@@ -160,30 +160,27 @@ bool X86PreAMXConfig::preWriteTileCfg(Value *I8Ptr, Instruction *Pos,
// other value in the future.
Value *PaletteOffset = ConstantInt::get(Type::getInt64Ty(Ctx), 0);
Value *PaletteValue = ConstantInt::get(Type::getInt8Ty(Ctx), 1);
- Value *PalettePos =
- GetElementPtrInst::Create(I8Ty, I8Ptr, PaletteOffset, "", Pos);
- new StoreInst(PaletteValue, PalettePos, Pos);
+ Value *PalettePos = Builder.CreateGEP(I8Ty, I8Ptr, PaletteOffset);
+ Builder.CreateStore(PaletteValue, PalettePos);
for (int I = 0, E = Shapes.size() / 2; I < E; I++) {
Value *RowOffset = ConstantInt::get(Type::getInt64Ty(Ctx), 48 + I);
Value *ColOffset = ConstantInt::get(Type::getInt64Ty(Ctx), 16 + I * 2);
const std::string ShapeName = "amx.tmm." + itostr(I);
- Value *RowPos = GetElementPtrInst::Create(I8Ty, I8Ptr, RowOffset,
- ShapeName + ".shape.row", Pos);
- Value *ColPos = GetElementPtrInst::Create(I8Ty, I8Ptr, ColOffset, "", Pos);
- ColPos = new BitCastInst(ColPos, PointerType::get(I16Ty, 0),
- ShapeName + ".shape.col", Pos);
+ Value *RowPos = Builder.CreateGEP(I8Ty, I8Ptr, RowOffset,
+ ShapeName + ".shape.row");
+ Value *ColPos = Builder.CreateGEP(I8Ty, I8Ptr, ColOffset);
+ ColPos = Builder.CreateBitCast(ColPos, PointerType::get(I16Ty, 0),
+ ShapeName + ".shape.col");
Value *Row = Shapes[I * 2];
Value *Col = Shapes[I * 2 + 1];
- Row = new TruncInst(Row, I8Ty, "", Pos);
- new StoreInst(Row, RowPos, Pos);
- new StoreInst(Col, ColPos, Pos);
- Write = true;
+ Row = Builder.CreateTrunc(Row, I8Ty);
+ Builder.CreateStore(Row, RowPos);
+ Builder.CreateStore(Col, ColPos);
}
- return Write;
}
-bool X86PreAMXConfig::addTileConfig(Instruction *ModelStart,
+void X86PreAMXConfig::addTileConfig(Instruction *ModelStart,
SmallVector<Value *, 8> &Shapes) {
Module *M = F.getParent();
IRBuilder<> Builder(ModelStart);
@@ -198,17 +195,11 @@ bool X86PreAMXConfig::addTileConfig(Instruction *ModelStart,
Addr->setAlignment(Alignment);
Value *I8Ptr = Builder.CreateBitCast(Addr, Builder.getInt8PtrTy());
- std::array<Value *, 1> Args = {I8Ptr};
- Instruction *Cfg =
- Builder.CreateIntrinsic(Intrinsic::x86_ldtilecfg_internal, None, Args);
-
- Value *Val0 = Constant::getNullValue(V512Ty);
- Instruction *Init0 = new StoreInst(Val0, Addr, false, Alignment, Cfg);
- assert(Init0 && "Not Zero initilizate the cfg mem!");
+ Builder.CreateAlignedStore(Constant::getNullValue(V512Ty), Addr, Alignment);
- preWriteTileCfg(I8Ptr, Cfg, Shapes);
+ preWriteTileCfg(I8Ptr, Builder, Shapes);
- return Init0;
+ Builder.CreateIntrinsic(Intrinsic::x86_ldtilecfg_internal, None, {I8Ptr});
}
// Todo: We may need to handle "more than one store" case in the future.
@@ -315,8 +306,7 @@ X86PreAMXConfig::getShapesAndConfigPosEnd(BasicBlock::iterator Iter,
// %td = call x86_amx @llvm.x86.tdpbssd.internal(...t1, t2, t3) (m,k)(k,n)
// call void @llvm.x86.tilestored64.internal(m, n,... td) (m,n)(m,n)
// --------------------------------------------------------------------------
-bool X86PreAMXConfig::findConfigShapes(
- DenseMap<Instruction *, SmallVector<Value *, 8>> &PosAndShapes) {
+bool X86PreAMXConfig::findConfigShapes(PosAndShapesMap &PosAndShapes) {
bool Find = false;
for (BasicBlock &BB : F) {
for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) {
@@ -365,7 +355,7 @@ bool X86PreAMXConfig::findConfigShapes(
// call void @llvm.x86.tilestored64.internal(... td) area
// --------------------------------------------------------------------------
bool X86PreAMXConfig::preTileConfig() {
- DenseMap<Instruction *, SmallVector<Value *, 8>> PosAndShapes;
+ PosAndShapesMap PosAndShapes;
bool NeedCfg = findConfigShapes(PosAndShapes);
if (!NeedCfg)
return false;
diff --git a/llvm/lib/Target/X86/X86PreTileConfig.cpp b/llvm/lib/Target/X86/X86PreTileConfig.cpp
index 5d21f8666ec6..479db8585ca0 100644
--- a/llvm/lib/Target/X86/X86PreTileConfig.cpp
+++ b/llvm/lib/Target/X86/X86PreTileConfig.cpp
@@ -31,6 +31,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -40,10 +41,15 @@
using namespace llvm;
#define DEBUG_TYPE "tile-pre-config"
-#define REPORT_CONFIG_FAIL \
- report_fatal_error( \
- MF.getName() + \
- ": Failed to config tile register, please define the shape earlier");
+
+static void emitErrorMsg(MachineFunction &MF) {
+ SmallString<32> Str;
+ Twine ErrorMsg =
+ MF.getName() +
+ ": Failed to config tile register, please define the shape earlier";
+ LLVMContext &Context = MF.getMMI().getModule()->getContext();
+ Context.emitError(ErrorMsg);
+}
namespace {
@@ -302,12 +308,19 @@ bool X86PreTileConfig::runOnMachineFunction(MachineFunction &MF) {
SmallVector<MachineBasicBlock *, 8> WorkList;
for (auto &I : ShapeBBs) {
// TODO: We can hoist shapes across BBs here.
- if (BBVisitedInfo[I.first].HasAMXRegLiveIn)
- REPORT_CONFIG_FAIL
+ if (BBVisitedInfo[I.first].HasAMXRegLiveIn) {
+ // We are not able to config tile registers since the shape to config
+ // is not defined yet. Emit error message and continue. The function
+ // would not config tile registers.
+ emitErrorMsg(MF);
+ return false;
+ }
if (BBVisitedInfo[I.first].FirstAMX &&
BBVisitedInfo[I.first].FirstAMX < I.second.back() &&
- !hoistShapesInBB(I.first, I.second))
- REPORT_CONFIG_FAIL
+ !hoistShapesInBB(I.first, I.second)) {
+ emitErrorMsg(MF);
+ return false;
+ }
WorkList.push_back(I.first);
}
while (!WorkList.empty()) {
@@ -356,7 +369,7 @@ bool X86PreTileConfig::runOnMachineFunction(MachineFunction &MF) {
// multi insert.
if (VisitedOrInserted.insert(I).second) {
auto II = I.MI ? I.MI->getIterator() : I.MBB->instr_begin();
- addFrameReference(BuildMI(*I.MBB, ++II, DL, TII->get(X86::LDTILECFG)),
+ addFrameReference(BuildMI(*I.MBB, ++II, DL, TII->get(X86::PLDTILECFGV)),
SS);
}
}
@@ -367,33 +380,27 @@ bool X86PreTileConfig::runOnMachineFunction(MachineFunction &MF) {
MachineInstr *MI = &*MBB.begin();
if (ST.hasAVX512()) {
Register Zmm = MRI->createVirtualRegister(&X86::VR512RegClass);
- BuildMI(MBB, MI, DL, TII->get(X86::VPXORDZrr), Zmm)
- .addReg(Zmm, RegState::Undef)
- .addReg(Zmm, RegState::Undef);
+ BuildMI(MBB, MI, DL, TII->get(X86::AVX512_512_SET0), Zmm);
addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSZmr)), SS)
.addReg(Zmm);
} else if (ST.hasAVX2()) {
Register Ymm = MRI->createVirtualRegister(&X86::VR256RegClass);
- BuildMI(MBB, MI, DL, TII->get(X86::VPXORYrr), Ymm)
- .addReg(Ymm, RegState::Undef)
- .addReg(Ymm, RegState::Undef);
+ BuildMI(MBB, MI, DL, TII->get(X86::AVX_SET0), Ymm);
addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), SS)
.addReg(Ymm);
addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), SS, 32)
.addReg(Ymm);
} else {
assert(ST.hasSSE2() && "AMX should assume SSE2 enabled");
+ unsigned StoreOpc = ST.hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;
Register Xmm = MRI->createVirtualRegister(&X86::VR128RegClass);
- BuildMI(MBB, MI, DL, TII->get(X86::PXORrr), Xmm)
- .addReg(Xmm, RegState::Undef)
- .addReg(Xmm, RegState::Undef);
- addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOVUPSmr)), SS)
- .addReg(Xmm);
- addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOVUPSmr)), SS, 16)
+ BuildMI(MBB, MI, DL, TII->get(X86::V_SET0), Xmm);
+ addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), SS).addReg(Xmm);
+ addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), SS, 16)
.addReg(Xmm);
- addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOVUPSmr)), SS, 32)
+ addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), SS, 32)
.addReg(Xmm);
- addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOVUPSmr)), SS, 48)
+ addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), SS, 48)
.addReg(Xmm);
}
// Fill in the palette first.
diff --git a/llvm/lib/Target/X86/X86RegisterBankInfo.cpp b/llvm/lib/Target/X86/X86RegisterBankInfo.cpp
index 9c076d2d6769..c49fc458eab3 100644
--- a/llvm/lib/Target/X86/X86RegisterBankInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterBankInfo.cpp
@@ -12,9 +12,9 @@
#include "X86RegisterBankInfo.h"
#include "X86InstrInfo.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterBank.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#define GET_TARGET_REGBANK_IMPL
@@ -25,8 +25,7 @@ using namespace llvm;
#define GET_TARGET_REGBANK_INFO_IMPL
#include "X86GenRegisterBankInfo.def"
-X86RegisterBankInfo::X86RegisterBankInfo(const TargetRegisterInfo &TRI)
- : X86GenRegisterBankInfo() {
+X86RegisterBankInfo::X86RegisterBankInfo(const TargetRegisterInfo &TRI) {
// validate RegBank initialization.
const RegisterBank &RBGPR = getRegBank(X86::GPRRegBankID);
diff --git a/llvm/lib/Target/X86/X86RegisterBankInfo.h b/llvm/lib/Target/X86/X86RegisterBankInfo.h
index d5afd2cae761..fca36a317b58 100644
--- a/llvm/lib/Target/X86/X86RegisterBankInfo.h
+++ b/llvm/lib/Target/X86/X86RegisterBankInfo.h
@@ -13,7 +13,7 @@
#ifndef LLVM_LIB_TARGET_X86_X86REGISTERBANKINFO_H
#define LLVM_LIB_TARGET_X86_X86REGISTERBANKINFO_H
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
#define GET_REGBANK_DECLARATIONS
#include "X86GenRegisterBank.inc"
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 130cb61cdde2..f2658f70434b 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -26,6 +26,8 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TileShapeInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Type.h"
@@ -618,6 +620,66 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
return Reserved;
}
+bool X86RegisterInfo::isArgumentRegister(const MachineFunction &MF,
+ MCRegister Reg) const {
+ const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
+ const TargetRegisterInfo &TRI = *ST.getRegisterInfo();
+ auto IsSubReg = [&](MCRegister RegA, MCRegister RegB) {
+ return TRI.isSuperOrSubRegisterEq(RegA, RegB);
+ };
+
+ if (!ST.is64Bit())
+ return llvm::any_of(
+ SmallVector<MCRegister>{X86::EAX, X86::ECX, X86::EDX},
+ [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }) ||
+ (ST.hasMMX() && X86::VR64RegClass.contains(Reg));
+
+ CallingConv::ID CC = MF.getFunction().getCallingConv();
+
+ if (CC == CallingConv::X86_64_SysV && IsSubReg(X86::RAX, Reg))
+ return true;
+
+ if (llvm::any_of(
+ SmallVector<MCRegister>{X86::RDX, X86::RCX, X86::R8, X86::R9},
+ [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }))
+ return true;
+
+ if (CC != CallingConv::Win64 &&
+ llvm::any_of(SmallVector<MCRegister>{X86::RDI, X86::RSI},
+ [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }))
+ return true;
+
+ if (ST.hasSSE1() &&
+ llvm::any_of(SmallVector<MCRegister>{X86::XMM0, X86::XMM1, X86::XMM2,
+ X86::XMM3, X86::XMM4, X86::XMM5,
+ X86::XMM6, X86::XMM7},
+ [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }))
+ return true;
+
+ return X86GenRegisterInfo::isArgumentRegister(MF, Reg);
+}
+
+bool X86RegisterInfo::isFixedRegister(const MachineFunction &MF,
+ MCRegister PhysReg) const {
+ const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
+ const TargetRegisterInfo &TRI = *ST.getRegisterInfo();
+
+ // Stack pointer.
+ if (TRI.isSuperOrSubRegisterEq(X86::RSP, PhysReg))
+ return true;
+
+ // Don't use the frame pointer if it's being used.
+ const X86FrameLowering &TFI = *getFrameLowering(MF);
+ if (TFI.hasFP(MF) && TRI.isSuperOrSubRegisterEq(X86::RBP, PhysReg))
+ return true;
+
+ return X86GenRegisterInfo::isFixedRegister(MF, PhysReg);
+}
+
+bool X86RegisterInfo::isTileRegisterClass(const TargetRegisterClass *RC) const {
+ return RC->getID() == X86::TILERegClassID;
+}
+
void X86RegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const {
// Check if the EFLAGS register is marked as live-out. This shouldn't happen,
// because the calling convention defines the EFLAGS register as NOT
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.h b/llvm/lib/Target/X86/X86RegisterInfo.h
index 7fd10ddd1a15..6f4fb405d29f 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.h
+++ b/llvm/lib/Target/X86/X86RegisterInfo.h
@@ -115,6 +115,18 @@ public:
/// register scavenger to determine what registers are free.
BitVector getReservedRegs(const MachineFunction &MF) const override;
+ /// isArgumentReg - Returns true if Reg can be used as an argument to a
+ /// function.
+ bool isArgumentRegister(const MachineFunction &MF,
+ MCRegister Reg) const override;
+
+ /// Return true if it is tile register class.
+ bool isTileRegisterClass(const TargetRegisterClass *RC) const;
+
+ /// Returns true if PhysReg is a fixed register.
+ bool isFixedRegister(const MachineFunction &MF,
+ MCRegister PhysReg) const override;
+
void adjustStackMapLiveOutMask(uint32_t *Mask) const override;
bool hasBasePointer(const MachineFunction &MF) const;
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.td b/llvm/lib/Target/X86/X86RegisterInfo.td
index 1b704bcb8e08..6dc51e37d3c2 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.td
+++ b/llvm/lib/Target/X86/X86RegisterInfo.td
@@ -537,6 +537,8 @@ def FR32 : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 15)>;
def FR64 : RegisterClass<"X86", [f64], 64, (add FR32)>;
+def FR16 : RegisterClass<"X86", [f16], 16, (add FR32)> {let Size = 32;}
+
// FIXME: This sets up the floating point register files as though they are f64
// values, though they really are f80 values. This will cause us to spill
@@ -599,7 +601,7 @@ def FR32X : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 31)>;
def FR64X : RegisterClass<"X86", [f64], 64, (add FR32X)>;
-def FR16X : RegisterClass<"X86", [f16], 16, (add FR32X)>;
+def FR16X : RegisterClass<"X86", [f16], 16, (add FR32X)> {let Size = 32;}
// Extended VR128 and VR256 for AVX-512 instructions
def VR128X : RegisterClass<"X86", [v4f32, v2f64, v8f16, v16i8, v8i16, v4i32, v2i64, f128],
@@ -638,3 +640,14 @@ def VK64WM : RegisterClass<"X86", [v64i1], 64, (add VK32WM)> {let Size = 64;}
let CopyCost = -1 in // Don't allow copying of tile registers
def TILE : RegisterClass<"X86", [x86amx], 8192,
(sequence "TMM%u", 0, 7)> {let Size = 8192;}
+
+//===----------------------------------------------------------------------===//
+// Register categories.
+//
+
+// The TILE and VK*PAIR registers may not be "fixed", but we don't want them
+// anyway.
+def FixedRegisters : RegisterCategory<[DEBUG_REG, CONTROL_REG, CCR, FPCCR,
+ DFCCR, TILE, VK1PAIR, VK2PAIR, VK4PAIR,
+ VK8PAIR, VK16PAIR]>;
+def GeneralPurposeRegisters : RegisterCategory<[GR64, GR32, GR16, GR8]>;
diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td
index 8e317dc22bd6..e4b95cb0807f 100644
--- a/llvm/lib/Target/X86/X86SchedBroadwell.td
+++ b/llvm/lib/Target/X86/X86SchedBroadwell.td
@@ -814,12 +814,26 @@ def BWWriteResGroup34 : SchedWriteRes<[BWPort6,BWPort0156]> {
def: InstRW<[BWWriteResGroup34], (instregex "CLD")>;
def BWWriteResGroup35 : SchedWriteRes<[BWPort06,BWPort0156]> {
- let Latency = 3;
+ let Latency = 2;
let NumMicroOps = 3;
let ResourceCycles = [1,2];
}
-def: InstRW<[BWWriteResGroup35], (instregex "RCL(8|16|32|64)r(1|i)",
- "RCR(8|16|32|64)r(1|i)")>;
+def: InstRW<[BWWriteResGroup35], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1,
+ RCR8r1, RCR16r1, RCR32r1, RCR64r1)>;
+
+def BWWriteResGroup36 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> {
+ let Latency = 5;
+ let NumMicroOps = 8;
+ let ResourceCycles = [2,4,2];
+}
+def: InstRW<[BWWriteResGroup36], (instrs RCR8ri, RCR16ri, RCR32ri, RCR64ri)>;
+
+def BWWriteResGroup36b : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> {
+ let Latency = 6;
+ let NumMicroOps = 8;
+ let ResourceCycles = [2,4,2];
+}
+def: InstRW<[BWWriteResGroup36b], (instrs RCL8ri, RCL16ri, RCL32ri, RCL64ri)>;
def BWWriteResGroup37 : SchedWriteRes<[BWPort4,BWPort6,BWPort237,BWPort0156]> {
let Latency = 3;
diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td
index 1cd0b3379684..7b1a31d2a4df 100644
--- a/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -1299,12 +1299,26 @@ def HWWriteResGroup58 : SchedWriteRes<[HWPort6,HWPort0156]> {
def: InstRW<[HWWriteResGroup58], (instregex "CLD")>;
def HWWriteResGroup59 : SchedWriteRes<[HWPort06,HWPort0156]> {
- let Latency = 3;
+ let Latency = 2;
let NumMicroOps = 3;
let ResourceCycles = [1,2];
}
-def: InstRW<[HWWriteResGroup59], (instregex "RCL(8|16|32|64)r(1|i)",
- "RCR(8|16|32|64)r(1|i)")>;
+def: InstRW<[HWWriteResGroup59], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1,
+ RCR8r1, RCR16r1, RCR32r1, RCR64r1)>;
+
+def HWWriteResGroup60 : SchedWriteRes<[HWPort1,HWPort06,HWPort0156]> {
+ let Latency = 5;
+ let NumMicroOps = 8;
+ let ResourceCycles = [2,4,2];
+}
+def: InstRW<[HWWriteResGroup60], (instrs RCR8ri, RCR16ri, RCR32ri, RCR64ri)>;
+
+def HWWriteResGroup60b : SchedWriteRes<[HWPort1,HWPort06,HWPort0156]> {
+ let Latency = 6;
+ let NumMicroOps = 8;
+ let ResourceCycles = [2,4,2];
+}
+def: InstRW<[HWWriteResGroup60b], (instrs RCL8ri, RCL16ri, RCL32ri, RCL64ri)>;
def HWWriteResGroup61 : SchedWriteRes<[HWPort0,HWPort4,HWPort237]> {
let Latency = 4;
diff --git a/llvm/lib/Target/X86/X86SchedIceLake.td b/llvm/lib/Target/X86/X86SchedIceLake.td
index 9fd986e34181..b66db7e7e73a 100644
--- a/llvm/lib/Target/X86/X86SchedIceLake.td
+++ b/llvm/lib/Target/X86/X86SchedIceLake.td
@@ -923,12 +923,26 @@ def ICXWriteResGroup43 : SchedWriteRes<[ICXPort237,ICXPort0156]> {
def: InstRW<[ICXWriteResGroup43], (instrs MFENCE)>;
def ICXWriteResGroup44 : SchedWriteRes<[ICXPort06,ICXPort0156]> {
- let Latency = 3;
+ let Latency = 2;
let NumMicroOps = 3;
let ResourceCycles = [1,2];
}
-def: InstRW<[ICXWriteResGroup44], (instregex "RCL(8|16|32|64)r(1|i)",
- "RCR(8|16|32|64)r(1|i)")>;
+def: InstRW<[ICXWriteResGroup44], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1,
+ RCR8r1, RCR16r1, RCR32r1, RCR64r1)>;
+
+def ICXWriteResGroup44b : SchedWriteRes<[ICXPort1,ICXPort06,ICXPort0156]> {
+ let Latency = 5;
+ let NumMicroOps = 7;
+ let ResourceCycles = [2,3,2];
+}
+def: InstRW<[ICXWriteResGroup44b], (instrs RCR8ri, RCR16ri, RCR32ri, RCR64ri)>;
+
+def ICXWriteResGroup44c : SchedWriteRes<[ICXPort1,ICXPort06,ICXPort0156]> {
+ let Latency = 6;
+ let NumMicroOps = 7;
+ let ResourceCycles = [2,3,2];
+}
+def: InstRW<[ICXWriteResGroup44c], (instrs RCL8ri, RCL16ri, RCL32ri, RCL64ri)>;
def ICXWriteResGroup45 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort237]> {
let Latency = 3;
diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td
index 7e619a3a8722..49858ca0a800 100644
--- a/llvm/lib/Target/X86/X86SchedSandyBridge.td
+++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td
@@ -111,8 +111,17 @@ def : WriteRes<WriteStore, [SBPort23, SBPort4]>;
def : WriteRes<WriteStoreNT, [SBPort23, SBPort4]>;
def : WriteRes<WriteLoad, [SBPort23]> { let Latency = 5; }
def : WriteRes<WriteMove, [SBPort015]>;
+
+// Treat misc copies as a move.
+def : InstRW<[WriteMove], (instrs COPY)>;
+
+// Idioms that clear a register, like xorps %xmm0, %xmm0.
+// These can often bypass execution ports completely.
def : WriteRes<WriteZero, []>;
-def : WriteRes<WriteVecMaskedGatherWriteback, []> { let Latency = 5; let NumMicroOps = 0; }
+
+// Model the effect of clobbering the read-write mask operand of the GATHER operation.
+// Does not cost anything by itself, only has latency, matching that of the WriteLoad,
+defm : X86WriteRes<WriteVecMaskedGatherWriteback, [], 5, [], 0>;
// Arithmetic.
defm : SBWriteResPair<WriteALU, [SBPort015], 1>;
@@ -678,13 +687,27 @@ def SBWriteResGroup22 : SchedWriteRes<[SBPort0,SBPort5]> {
}
def: InstRW<[SBWriteResGroup22], (instregex "(V?)EXTRACTPSrr")>;
-def SBWriteResGroup23 : SchedWriteRes<[SBPort05]> {
+def SBWriteResGroup23 : SchedWriteRes<[SBPort05,SBPort015]> {
let Latency = 2;
let NumMicroOps = 3;
- let ResourceCycles = [3];
+ let ResourceCycles = [2,1];
+}
+def: InstRW<[SBWriteResGroup23], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1,
+ RCR8r1, RCR16r1, RCR32r1, RCR64r1)>;
+
+def SBWriteResGroup24 : SchedWriteRes<[SBPort1,SBPort5,SBPort05,SBPort015]> {
+ let Latency = 3;
+ let NumMicroOps = 8;
+ let ResourceCycles = [1,1,4,2];
+}
+def: InstRW<[SBWriteResGroup24], (instrs RCR8ri, RCR16ri, RCR32ri, RCR64ri)>;
+
+def SBWriteResGroup24b : SchedWriteRes<[SBPort1,SBPort5,SBPort05,SBPort015]> {
+ let Latency = 4;
+ let NumMicroOps = 8;
+ let ResourceCycles = [1,1,4,2];
}
-def: InstRW<[SBWriteResGroup23], (instregex "RCL(8|16|32|64)r1",
- "RCR(8|16|32|64)r1")>;
+def: InstRW<[SBWriteResGroup24b], (instrs RCL8ri, RCL16ri, RCL32ri, RCL64ri)>;
def SBWriteResGroup25_1 : SchedWriteRes<[SBPort23,SBPort015]> {
let Latency = 7;
@@ -727,8 +750,8 @@ def SBWriteResGroup76 : SchedWriteRes<[SBPort05]> {
let NumMicroOps = 8;
let ResourceCycles = [8];
}
-def: InstRW<[SBWriteResGroup76], (instregex "RCL(8|16|32|64)r(i|CL)",
- "RCR(8|16|32|64)r(i|CL)")>;
+def: InstRW<[SBWriteResGroup76], (instregex "RCL(8|16|32|64)rCL",
+ "RCR(8|16|32|64)rCL")>;
def SBWriteResGroup33 : SchedWriteRes<[SBPort4,SBPort23]> {
let Latency = 5;
@@ -802,8 +825,7 @@ def SBWriteResGroup48 : SchedWriteRes<[SBPort23]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SBWriteResGroup48], (instrs MMX_MOVD64from64rm,
- VBROADCASTSSrm)>;
+def: InstRW<[SBWriteResGroup48], (instrs VBROADCASTSSrm)>;
def: InstRW<[SBWriteResGroup48], (instregex "POP(16|32|64)r",
"(V?)MOV64toPQIrm",
"(V?)MOVDDUPrm",
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
index 0a88bac5aa66..05364e3434e4 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
@@ -836,12 +836,26 @@ def SKLWriteResGroup41 : SchedWriteRes<[SKLPort237,SKLPort0156]> {
def: InstRW<[SKLWriteResGroup41], (instrs MFENCE)>;
def SKLWriteResGroup42 : SchedWriteRes<[SKLPort06,SKLPort0156]> {
- let Latency = 3;
+ let Latency = 2;
let NumMicroOps = 3;
let ResourceCycles = [1,2];
}
-def: InstRW<[SKLWriteResGroup42], (instregex "RCL(8|16|32|64)r(1|i)",
- "RCR(8|16|32|64)r(1|i)")>;
+def: InstRW<[SKLWriteResGroup42], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1,
+ RCR8r1, RCR16r1, RCR32r1, RCR64r1)>;
+
+def SKLWriteResGroup42b : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> {
+ let Latency = 5;
+ let NumMicroOps = 8;
+ let ResourceCycles = [2,4,2];
+}
+def: InstRW<[SKLWriteResGroup42b], (instrs RCR8ri, RCR16ri, RCR32ri, RCR64ri)>;
+
+def SKLWriteResGroup42c : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> {
+ let Latency = 6;
+ let NumMicroOps = 8;
+ let ResourceCycles = [2,4,2];
+}
+def: InstRW<[SKLWriteResGroup42c], (instrs RCL8ri, RCL16ri, RCL32ri, RCL64ri)>;
def SKLWriteResGroup43 : SchedWriteRes<[SKLPort0,SKLPort4,SKLPort237]> {
let Latency = 3;
@@ -921,8 +935,7 @@ def SKLWriteResGroup58 : SchedWriteRes<[SKLPort23]> {
let ResourceCycles = [1];
}
def: InstRW<[SKLWriteResGroup58], (instregex "MOVSX(16|32|64)rm(8|16|32)",
- "MOVZX(16|32|64)rm(8|16)",
- "(V?)MOVDDUPrm")>; // TODO: Should this be SKLWriteResGroup67?
+ "MOVZX(16|32|64)rm(8|16)")>;
def SKLWriteResGroup59 : SchedWriteRes<[SKLPort0,SKLPort5]> {
let Latency = 5;
@@ -979,7 +992,8 @@ def: InstRW<[SKLWriteResGroup67], (instrs VBROADCASTSSrm,
VPBROADCASTDrm,
VPBROADCASTQrm)>;
def: InstRW<[SKLWriteResGroup67], (instregex "(V?)MOVSHDUPrm",
- "(V?)MOVSLDUPrm")>;
+ "(V?)MOVSLDUPrm",
+ "(V?)MOVDDUPrm")>;
def SKLWriteResGroup68 : SchedWriteRes<[SKLPort0]> {
let Latency = 6;
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index b28a18f0dcd7..b682b51c298a 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -905,12 +905,26 @@ def SKXWriteResGroup43 : SchedWriteRes<[SKXPort237,SKXPort0156]> {
def: InstRW<[SKXWriteResGroup43], (instrs MFENCE)>;
def SKXWriteResGroup44 : SchedWriteRes<[SKXPort06,SKXPort0156]> {
- let Latency = 3;
+ let Latency = 2;
let NumMicroOps = 3;
let ResourceCycles = [1,2];
}
-def: InstRW<[SKXWriteResGroup44], (instregex "RCL(8|16|32|64)r(1|i)",
- "RCR(8|16|32|64)r(1|i)")>;
+def: InstRW<[SKXWriteResGroup44], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1,
+ RCR8r1, RCR16r1, RCR32r1, RCR64r1)>;
+
+def SKXWriteResGroup44b : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> {
+ let Latency = 5;
+ let NumMicroOps = 8;
+ let ResourceCycles = [2,4,2];
+}
+def: InstRW<[SKXWriteResGroup44b], (instrs RCR8ri, RCR16ri, RCR32ri, RCR64ri)>;
+
+def SKXWriteResGroup44c : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> {
+ let Latency = 6;
+ let NumMicroOps = 8;
+ let ResourceCycles = [2,4,2];
+}
+def: InstRW<[SKXWriteResGroup44c], (instrs RCL8ri, RCL16ri, RCL32ri, RCL64ri)>;
def SKXWriteResGroup45 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237]> {
let Latency = 3;
@@ -1041,8 +1055,7 @@ def SKXWriteResGroup58 : SchedWriteRes<[SKXPort23]> {
let ResourceCycles = [1];
}
def: InstRW<[SKXWriteResGroup58], (instregex "MOVSX(16|32|64)rm(8|16|32)",
- "MOVZX(16|32|64)rm(8|16)",
- "(V?)MOVDDUPrm")>; // TODO: Should this be SKXWriteResGroup71?
+ "MOVZX(16|32|64)rm(8|16)")>;
def SKXWriteResGroup61 : SchedWriteRes<[SKXPort5,SKXPort015]> {
let Latency = 5;
@@ -1145,11 +1158,10 @@ def SKXWriteResGroup71 : SchedWriteRes<[SKXPort23]> {
}
def: InstRW<[SKXWriteResGroup71], (instrs VBROADCASTSSrm,
VPBROADCASTDrm,
- VPBROADCASTQrm,
- VMOVSHDUPrm,
- VMOVSLDUPrm,
- MOVSHDUPrm,
- MOVSLDUPrm)>;
+ VPBROADCASTQrm)>;
+def: InstRW<[SKXWriteResGroup71], (instregex "(V?)MOVSHDUPrm",
+ "(V?)MOVSLDUPrm",
+ "(V?)MOVDDUPrm")>;
def SKXWriteResGroup72 : SchedWriteRes<[SKXPort5]> {
let Latency = 6;
diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
index 4b2fa87a25b5..1e9fcf6cc8cf 100644
--- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td
+++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
@@ -840,8 +840,8 @@ def JWriteMASKMOVDQU: SchedWriteRes<[JFPU0, JFPA, JFPU1, JSTC, JLAGU, JSAGU, JAL
let ResourceCycles = [1, 1, 2, 2, 2, 16, 42];
let NumMicroOps = 63;
}
-def : InstRW<[JWriteMASKMOVDQU], (instrs MASKMOVDQU, MASKMOVDQU64, MASKMOVDQUX32,
- VMASKMOVDQU, VMASKMOVDQU64, VMASKMOVDQUX32)>;
+def : InstRW<[JWriteMASKMOVDQU], (instrs MASKMOVDQU, MASKMOVDQU64,
+ VMASKMOVDQU, VMASKMOVDQU64)>;
///////////////////////////////////////////////////////////////////////////////
// SchedWriteVariant definitions.
diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td
index 52605c031617..de4e7dd3cb90 100644
--- a/llvm/lib/Target/X86/X86ScheduleSLM.td
+++ b/llvm/lib/Target/X86/X86ScheduleSLM.td
@@ -377,10 +377,8 @@ defm : SLMWriteResPair<WriteVecIMul, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WriteVecIMulX, [SLM_FPC_RSV0], 5, [2]>;
defm : SLMWriteResPair<WriteVecIMulY, [SLM_FPC_RSV0], 5, [2]>;
defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
-// FIXME: The below is closer to correct, but caused some perf regressions.
-//defm : SLMWriteResPair<WritePMULLD, [SLM_FPC_RSV0], 11, [11], 7>;
-defm : SLMWriteResPair<WritePMULLD, [SLM_FPC_RSV0], 4>;
-defm : SLMWriteResPair<WritePMULLDY, [SLM_FPC_RSV0], 4>;
+defm : SLMWriteResPair<WritePMULLD, [SLM_FPC_RSV0], 11, [11], 7>;
+defm : X86WriteResPairUnsupported<WritePMULLDY>;
defm : X86WriteResPairUnsupported<WritePMULLDZ>;
defm : SLMWriteResPair<WriteShuffle, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteShuffleY, [SLM_FPC_RSV0], 1>;
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td
index fe0484afd227..aada3e0bd906 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver1.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td
@@ -189,15 +189,6 @@ defm : ZnWriteResPair<WriteALU, [ZnALU], 1>;
defm : ZnWriteResPair<WriteADC, [ZnALU], 1>;
defm : ZnWriteResPair<WriteIMul8, [ZnALU1, ZnMultiplier], 4>;
-//defm : ZnWriteResPair<WriteIMul16, [ZnALU1, ZnMultiplier], 4>;
-//defm : ZnWriteResPair<WriteIMul16Imm, [ZnALU1, ZnMultiplier], 4>;
-//defm : ZnWriteResPair<WriteIMul16Reg, [ZnALU1, ZnMultiplier], 4>;
-//defm : ZnWriteResPair<WriteIMul32, [ZnALU1, ZnMultiplier], 4>;
-//defm : ZnWriteResPair<WriteIMul32Imm, [ZnALU1, ZnMultiplier], 4>;
-//defm : ZnWriteResPair<WriteIMul32Reg, [ZnALU1, ZnMultiplier], 4>;
-//defm : ZnWriteResPair<WriteIMul64, [ZnALU1, ZnMultiplier], 4, [1,1], 2>;
-//defm : ZnWriteResPair<WriteIMul64Imm, [ZnALU1, ZnMultiplier], 4, [1,1], 2>;
-//defm : ZnWriteResPair<WriteIMul64Reg, [ZnALU1, ZnMultiplier], 4, [1,1], 2>;
defm : X86WriteRes<WriteBSWAP32, [ZnALU], 1, [4], 1>;
defm : X86WriteRes<WriteBSWAP64, [ZnALU], 1, [4], 1>;
@@ -227,12 +218,10 @@ defm : X86WriteRes<WriteBitTest, [ZnALU], 1, [1], 1>;
defm : X86WriteRes<WriteBitTestImmLd, [ZnALU,ZnAGU], 5, [1,1], 2>;
defm : X86WriteRes<WriteBitTestRegLd, [ZnALU,ZnAGU], 5, [1,1], 2>;
defm : X86WriteRes<WriteBitTestSet, [ZnALU], 2, [1], 2>;
-//defm : X86WriteRes<WriteBitTestSetImmLd, [ZnALU,ZnAGU], 5, [1,1], 2>;
-//defm : X86WriteRes<WriteBitTestSetRegLd, [ZnALU,ZnAGU], 5, [1,1], 2>;
// Bit counts.
-defm : ZnWriteResPair<WriteBSF, [ZnALU], 3>;
-defm : ZnWriteResPair<WriteBSR, [ZnALU], 3>;
+defm : ZnWriteResPair<WriteBSF, [ZnALU], 3, [12], 6, 4, 2>;
+defm : ZnWriteResPair<WriteBSR, [ZnALU], 4, [16], 6, 4, 2>;
defm : ZnWriteResPair<WriteLZCNT, [ZnALU], 2>;
defm : ZnWriteResPair<WriteTZCNT, [ZnALU], 2>;
defm : ZnWriteResPair<WritePOPCNT, [ZnALU], 1>;
@@ -240,9 +229,8 @@ defm : ZnWriteResPair<WritePOPCNT, [ZnALU], 1>;
// Treat misc copies as a move.
def : InstRW<[WriteMove], (instrs COPY)>;
-// BMI1 BEXTR/BLS, BMI2 BZHI
+// BMI1 BEXTR, BMI2 BZHI
defm : ZnWriteResPair<WriteBEXTR, [ZnALU], 1>;
-//defm : ZnWriteResPair<WriteBLS, [ZnALU], 2>;
defm : ZnWriteResPair<WriteBZHI, [ZnALU], 1>;
// IDIV
@@ -271,13 +259,13 @@ defm : X86WriteRes<WriteFLoadX, [ZnAGU], 8, [1], 1>;
defm : X86WriteRes<WriteFLoadY, [ZnAGU], 8, [1], 1>;
defm : X86WriteRes<WriteFMaskedLoad, [ZnAGU,ZnFPU01], 8, [1,1], 1>;
defm : X86WriteRes<WriteFMaskedLoadY, [ZnAGU,ZnFPU01], 8, [1,2], 2>;
+
defm : X86WriteRes<WriteFStore, [ZnAGU], 1, [1], 1>;
defm : X86WriteRes<WriteFStoreX, [ZnAGU], 1, [1], 1>;
defm : X86WriteRes<WriteFStoreY, [ZnAGU], 1, [1], 1>;
defm : X86WriteRes<WriteFStoreNT, [ZnAGU,ZnFPU2], 8, [1,1], 1>;
defm : X86WriteRes<WriteFStoreNTX, [ZnAGU], 1, [1], 1>;
defm : X86WriteRes<WriteFStoreNTY, [ZnAGU], 1, [1], 1>;
-
defm : X86WriteRes<WriteFMaskedStore32, [ZnAGU,ZnFPU01], 4, [1,1], 1>;
defm : X86WriteRes<WriteFMaskedStore32Y, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
defm : X86WriteRes<WriteFMaskedStore64, [ZnAGU,ZnFPU01], 4, [1,1], 1>;
@@ -288,24 +276,24 @@ defm : X86WriteRes<WriteFMoveX, [ZnFPU], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveY, [ZnFPU], 1, [1], 1>;
defm : X86WriteResUnsupported<WriteFMoveZ>;
-defm : ZnWriteResFpuPair<WriteFAdd, [ZnFPU0], 3>;
-defm : ZnWriteResFpuPair<WriteFAddX, [ZnFPU0], 3>;
-defm : ZnWriteResFpuPair<WriteFAddY, [ZnFPU0], 3>;
+defm : ZnWriteResFpuPair<WriteFAdd, [ZnFPU23], 3>;
+defm : ZnWriteResFpuPair<WriteFAddX, [ZnFPU23], 3>;
+defm : ZnWriteResFpuPair<WriteFAddY, [ZnFPU23], 3, [2], 2>;
defm : X86WriteResPairUnsupported<WriteFAddZ>;
-defm : ZnWriteResFpuPair<WriteFAdd64, [ZnFPU0], 3>;
-defm : ZnWriteResFpuPair<WriteFAdd64X, [ZnFPU0], 3>;
-defm : ZnWriteResFpuPair<WriteFAdd64Y, [ZnFPU0], 3>;
+defm : ZnWriteResFpuPair<WriteFAdd64, [ZnFPU23], 3>;
+defm : ZnWriteResFpuPair<WriteFAdd64X, [ZnFPU23], 3>;
+defm : ZnWriteResFpuPair<WriteFAdd64Y, [ZnFPU23], 3, [2], 2>;
defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
-defm : ZnWriteResFpuPair<WriteFCmp, [ZnFPU0], 3>;
-defm : ZnWriteResFpuPair<WriteFCmpX, [ZnFPU0], 3>;
-defm : ZnWriteResFpuPair<WriteFCmpY, [ZnFPU0], 3>;
+defm : ZnWriteResFpuPair<WriteFCmp, [ZnFPU01], 1>;
+defm : ZnWriteResFpuPair<WriteFCmpX, [ZnFPU01], 1>;
+defm : ZnWriteResFpuPair<WriteFCmpY, [ZnFPU01], 1, [2], 2>;
defm : X86WriteResPairUnsupported<WriteFCmpZ>;
-defm : ZnWriteResFpuPair<WriteFCmp64, [ZnFPU0], 3>;
-defm : ZnWriteResFpuPair<WriteFCmp64X, [ZnFPU0], 3>;
-defm : ZnWriteResFpuPair<WriteFCmp64Y, [ZnFPU0], 3>;
+defm : ZnWriteResFpuPair<WriteFCmp64, [ZnFPU01], 1>;
+defm : ZnWriteResFpuPair<WriteFCmp64X, [ZnFPU01], 1>;
+defm : ZnWriteResFpuPair<WriteFCmp64Y, [ZnFPU01], 1, [2], 2>;
defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
-defm : ZnWriteResFpuPair<WriteFCom, [ZnFPU0], 3>;
-defm : ZnWriteResFpuPair<WriteFComX, [ZnFPU0], 3>;
+defm : ZnWriteResFpuPair<WriteFCom, [ZnFPU01,ZnFPU2], 3, [1,1], 2>;
+defm : ZnWriteResFpuPair<WriteFComX, [ZnFPU01,ZnFPU2], 3, [1,1], 2>;
defm : ZnWriteResFpuPair<WriteFBlend, [ZnFPU01], 1>;
defm : ZnWriteResFpuPair<WriteFBlendY, [ZnFPU01], 1>;
defm : X86WriteResPairUnsupported<WriteFBlendZ>;
@@ -346,8 +334,8 @@ defm : X86WriteResPairUnsupported<WriteFRndZ>;
defm : ZnWriteResFpuPair<WriteFLogic, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteFLogicY, [ZnFPU], 1>;
defm : X86WriteResPairUnsupported<WriteFLogicZ>;
-defm : ZnWriteResFpuPair<WriteFTest, [ZnFPU], 1>;
-defm : ZnWriteResFpuPair<WriteFTestY, [ZnFPU], 1>;
+defm : ZnWriteResFpuPair<WriteFTest, [ZnFPU12], 2, [2], 1, 7, 1>;
+defm : ZnWriteResFpuPair<WriteFTestY, [ZnFPU12], 4, [4], 3, 7, 2>;
defm : X86WriteResPairUnsupported<WriteFTestZ>;
defm : ZnWriteResFpuPair<WriteFShuffle, [ZnFPU12], 1>;
defm : ZnWriteResFpuPair<WriteFShuffleY, [ZnFPU12], 1>;
@@ -410,20 +398,23 @@ defm : X86WriteRes<WriteVecMoveToGpr, [ZnFPU2], 2, [1], 1>;
defm : X86WriteRes<WriteVecMoveFromGpr, [ZnFPU2], 3, [1], 1>;
defm : X86WriteRes<WriteEMMS, [ZnFPU], 2, [1], 1>;
-defm : ZnWriteResFpuPair<WriteVecShift, [ZnFPU], 1>;
+defm : ZnWriteResFpuPair<WriteVecShift, [ZnFPU2], 1>;
defm : ZnWriteResFpuPair<WriteVecShiftX, [ZnFPU2], 1>;
-defm : ZnWriteResFpuPair<WriteVecShiftY, [ZnFPU2], 2>;
+defm : ZnWriteResFpuPair<WriteVecShiftY, [ZnFPU2], 1, [2], 2>;
defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
-defm : ZnWriteResFpuPair<WriteVecShiftImm, [ZnFPU], 1>;
-defm : ZnWriteResFpuPair<WriteVecShiftImmX, [ZnFPU], 1>;
-defm : ZnWriteResFpuPair<WriteVecShiftImmY, [ZnFPU], 1>;
+defm : ZnWriteResFpuPair<WriteVecShiftImm, [ZnFPU2], 1>;
+defm : ZnWriteResFpuPair<WriteVecShiftImmX, [ZnFPU2], 1>;
+defm : ZnWriteResFpuPair<WriteVecShiftImmY, [ZnFPU2], 1, [2], 2>;
defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
+defm : ZnWriteResFpuPair<WriteVarVecShift, [ZnFPU1], 3, [2], 1>;
+defm : ZnWriteResFpuPair<WriteVarVecShiftY, [ZnFPU1], 3, [4], 2>;
+defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
defm : ZnWriteResFpuPair<WriteVecLogic, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecLogicX, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecLogicY, [ZnFPU], 1>;
defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
-defm : ZnWriteResFpuPair<WriteVecTest, [ZnFPU12], 1, [2], 1, 7, 1>;
-defm : ZnWriteResFpuPair<WriteVecTestY, [ZnFPU12], 1, [2], 1, 7, 1>;
+defm : ZnWriteResFpuPair<WriteVecTest, [ZnFPU12], 2, [2], 1, 7, 1>;
+defm : ZnWriteResFpuPair<WriteVecTestY, [ZnFPU12], 4, [4], 3, 7, 2>;
defm : X86WriteResPairUnsupported<WriteVecTestZ>;
defm : ZnWriteResFpuPair<WriteVecALU, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecALUX, [ZnFPU], 1>;
@@ -448,7 +439,7 @@ defm : ZnWriteResFpuPair<WriteBlend, [ZnFPU01], 1>;
defm : ZnWriteResFpuPair<WriteBlendY, [ZnFPU01], 1>;
defm : X86WriteResPairUnsupported<WriteBlendZ>;
defm : ZnWriteResFpuPair<WriteShuffle256, [ZnFPU], 2>;
-defm : ZnWriteResFpuPair<WriteVPMOV256, [ZnFPU12], 1, [1], 2>;
+defm : ZnWriteResFpuPair<WriteVPMOV256, [ZnFPU12], 1, [4], 3>;
defm : ZnWriteResFpuPair<WriteVarShuffle256, [ZnFPU], 2>;
defm : ZnWriteResFpuPair<WritePSADBW, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WritePSADBWX, [ZnFPU0], 3>;
@@ -456,11 +447,6 @@ defm : ZnWriteResFpuPair<WritePSADBWY, [ZnFPU0], 3>;
defm : X86WriteResPairUnsupported<WritePSADBWZ>;
defm : ZnWriteResFpuPair<WritePHMINPOS, [ZnFPU0], 4>;
-// Vector Shift Operations
-defm : ZnWriteResFpuPair<WriteVarVecShift, [ZnFPU12], 1>;
-defm : ZnWriteResFpuPair<WriteVarVecShiftY, [ZnFPU12], 1>;
-defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
-
// Vector insert/extract operations.
defm : ZnWriteResFpuPair<WriteVecInsert, [ZnFPU], 1>;
@@ -623,15 +609,14 @@ def ZnWriteMul16 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
def : SchedAlias<WriteIMul16, ZnWriteMul16>;
def : SchedAlias<WriteIMul16Imm, ZnWriteMul16>; // TODO: is this right?
def : SchedAlias<WriteIMul16Reg, ZnWriteMul16>; // TODO: is this right?
-def : SchedAlias<WriteIMul16ImmLd, ZnWriteMul16>; // TODO: this is definitely wrong but matches what the instregex did.
-def : SchedAlias<WriteIMul16RegLd, ZnWriteMul16>; // TODO: this is definitely wrong but matches what the instregex did.
// m16.
def ZnWriteMul16Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
let Latency = 8;
}
def : SchedAlias<WriteIMul16Ld, ZnWriteMul16Ld>;
-
+def : SchedAlias<WriteIMul16ImmLd, ZnWriteMul16>; // TODO: this is definitely wrong but matches what the instregex did.
+def : SchedAlias<WriteIMul16RegLd, ZnWriteMul16>; // TODO: this is definitely wrong but matches what the instregex did.
// r32.
def ZnWriteMul32 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
let Latency = 3;
@@ -639,14 +624,14 @@ def ZnWriteMul32 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
def : SchedAlias<WriteIMul32, ZnWriteMul32>;
def : SchedAlias<WriteIMul32Imm, ZnWriteMul32>; // TODO: is this right?
def : SchedAlias<WriteIMul32Reg, ZnWriteMul32>; // TODO: is this right?
-def : SchedAlias<WriteIMul32ImmLd, ZnWriteMul32>; // TODO: this is definitely wrong but matches what the instregex did.
-def : SchedAlias<WriteIMul32RegLd, ZnWriteMul32>; // TODO: this is definitely wrong but matches what the instregex did.
// m32.
def ZnWriteMul32Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
let Latency = 8;
}
def : SchedAlias<WriteIMul32Ld, ZnWriteMul32Ld>;
+def : SchedAlias<WriteIMul32ImmLd, ZnWriteMul32>; // TODO: this is definitely wrong but matches what the instregex did.
+def : SchedAlias<WriteIMul32RegLd, ZnWriteMul32>; // TODO: this is definitely wrong but matches what the instregex did.
// r64.
def ZnWriteMul64 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
@@ -656,8 +641,6 @@ def ZnWriteMul64 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
def : SchedAlias<WriteIMul64, ZnWriteMul64>;
def : SchedAlias<WriteIMul64Imm, ZnWriteMul64>; // TODO: is this right?
def : SchedAlias<WriteIMul64Reg, ZnWriteMul64>; // TODO: is this right?
-def : SchedAlias<WriteIMul64ImmLd, ZnWriteMul64>; // TODO: this is definitely wrong but matches what the instregex did.
-def : SchedAlias<WriteIMul64RegLd, ZnWriteMul64>; // TODO: this is definitely wrong but matches what the instregex did.
// m64.
def ZnWriteMul64Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
@@ -665,6 +648,8 @@ def ZnWriteMul64Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
let NumMicroOps = 2;
}
def : SchedAlias<WriteIMul64Ld, ZnWriteMul64Ld>;
+def : SchedAlias<WriteIMul64ImmLd, ZnWriteMul64>; // TODO: this is definitely wrong but matches what the instregex did.
+def : SchedAlias<WriteIMul64RegLd, ZnWriteMul64>; // TODO: this is definitely wrong but matches what the instregex did.
// MULX
// Numbers are based on the AMD SOG for Family 17h - Instruction Latencies.
@@ -1101,12 +1086,11 @@ def : InstRW<[WriteMicrocoded], (instregex "VPGATHER(Q|D)(Q|D)(Y?)rm")>;
// HADD, HSUB PS/PD
// PHADD|PHSUB (S) W/D.
-def : SchedAlias<WritePHAdd, ZnWriteMicrocoded>;
-def : SchedAlias<WritePHAddLd, ZnWriteMicrocoded>;
-def : SchedAlias<WritePHAddX, ZnWriteMicrocoded>;
-def : SchedAlias<WritePHAddXLd, ZnWriteMicrocoded>;
-def : SchedAlias<WritePHAddY, ZnWriteMicrocoded>;
-def : SchedAlias<WritePHAddYLd, ZnWriteMicrocoded>;
+defm : ZnWriteResFpuPair<WriteFHAdd, [], 7>;
+defm : ZnWriteResFpuPair<WriteFHAddY, [], 7>;
+defm : ZnWriteResFpuPair<WritePHAdd, [], 3>;
+defm : ZnWriteResFpuPair<WritePHAddX, [], 3>;
+defm : ZnWriteResFpuPair<WritePHAddY, [], 3>;
// PCMPGTQ.
def ZnWritePCMPGTQr : SchedWriteRes<[ZnFPU03]>;
@@ -1446,12 +1430,6 @@ def : InstRW<[ZnWriteSHA256RNDS2Ld], (instrs SHA256RNDS2rm)>;
//-- Arithmetic instructions --//
-// HADD, HSUB PS/PD
-def : SchedAlias<WriteFHAdd, ZnWriteMicrocoded>;
-def : SchedAlias<WriteFHAddLd, ZnWriteMicrocoded>;
-def : SchedAlias<WriteFHAddY, ZnWriteMicrocoded>;
-def : SchedAlias<WriteFHAddYLd, ZnWriteMicrocoded>;
-
// VDIVPS.
// TODO - convert to ZnWriteResFpuPair
// y,y,y.
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver2.td b/llvm/lib/Target/X86/X86ScheduleZnver2.td
index 38908a987595..c47d235eab9b 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver2.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver2.td
@@ -195,7 +195,7 @@ defm : X86WriteRes<WriteCMPXCHG, [Zn2ALU], 3, [1], 1>;
defm : X86WriteRes<WriteCMPXCHGRMW,[Zn2ALU,Zn2AGU], 8, [1,1], 5>;
defm : X86WriteRes<WriteXCHG, [Zn2ALU], 1, [2], 2>;
-defm : Zn2WriteResPair<WriteShift, [Zn2ALU], 1>;
+defm : Zn2WriteResPair<WriteShift, [Zn2ALU], 1>;
defm : Zn2WriteResPair<WriteShiftCL, [Zn2ALU], 1>;
defm : Zn2WriteResPair<WriteRotate, [Zn2ALU], 1>;
defm : Zn2WriteResPair<WriteRotateCL, [Zn2ALU], 1>;
@@ -219,8 +219,8 @@ defm : X86WriteRes<WriteBitTestRegLd, [Zn2ALU,Zn2AGU], 5, [1,1], 2>;
defm : X86WriteRes<WriteBitTestSet, [Zn2ALU], 2, [1], 2>;
// Bit counts.
-defm : Zn2WriteResPair<WriteBSF, [Zn2ALU], 3>;
-defm : Zn2WriteResPair<WriteBSR, [Zn2ALU], 4>;
+defm : Zn2WriteResPair<WriteBSF, [Zn2ALU], 3, [12], 6, 4, 2>;
+defm : Zn2WriteResPair<WriteBSR, [Zn2ALU], 4, [16], 6, 4, 2>;
defm : Zn2WriteResPair<WriteLZCNT, [Zn2ALU], 1>;
defm : Zn2WriteResPair<WriteTZCNT, [Zn2ALU], 2>;
defm : Zn2WriteResPair<WritePOPCNT, [Zn2ALU], 1>;
@@ -230,7 +230,7 @@ def : InstRW<[WriteMove], (instrs COPY)>;
// BMI1 BEXTR, BMI2 BZHI
defm : Zn2WriteResPair<WriteBEXTR, [Zn2ALU], 1>;
-defm : Zn2WriteResPair<WriteBZHI, [Zn2ALU], 1>;
+defm : Zn2WriteResPair<WriteBZHI, [Zn2ALU], 1>;
// IDIV
defm : Zn2WriteResPair<WriteDiv8, [Zn2ALU2, Zn2Divider], 15, [1,15], 1>;
@@ -247,23 +247,17 @@ def Zn2WriteIMulH : WriteRes<WriteIMulH, [Zn2Multiplier]>{
let Latency = 3;
let NumMicroOps = 0;
}
-
def : WriteRes<WriteIMulHLd, [Zn2Multiplier]>{
let Latency = !add(Zn2WriteIMulH.Latency, Znver2Model.LoadLatency);
let NumMicroOps = Zn2WriteIMulH.NumMicroOps;
}
-
// Floating point operations
defm : X86WriteRes<WriteFLoad, [Zn2AGU], 8, [1], 1>;
defm : X86WriteRes<WriteFLoadX, [Zn2AGU], 8, [1], 1>;
defm : X86WriteRes<WriteFLoadY, [Zn2AGU], 8, [1], 1>;
defm : X86WriteRes<WriteFMaskedLoad, [Zn2AGU,Zn2FPU01], 8, [1,1], 1>;
defm : X86WriteRes<WriteFMaskedLoadY, [Zn2AGU,Zn2FPU01], 8, [1,1], 2>;
-defm : X86WriteRes<WriteFMaskedStore32, [Zn2AGU,Zn2FPU01], 4, [1,1], 1>;
-defm : X86WriteRes<WriteFMaskedStore32Y, [Zn2AGU,Zn2FPU01], 5, [1,2], 2>;
-defm : X86WriteRes<WriteFMaskedStore64, [Zn2AGU,Zn2FPU01], 4, [1,1], 1>;
-defm : X86WriteRes<WriteFMaskedStore64Y, [Zn2AGU,Zn2FPU01], 5, [1,2], 2>;
defm : X86WriteRes<WriteFStore, [Zn2AGU], 1, [1], 1>;
defm : X86WriteRes<WriteFStoreX, [Zn2AGU], 1, [1], 1>;
@@ -271,29 +265,34 @@ defm : X86WriteRes<WriteFStoreY, [Zn2AGU], 1, [1], 1>;
defm : X86WriteRes<WriteFStoreNT, [Zn2AGU,Zn2FPU2], 8, [1,1], 1>;
defm : X86WriteRes<WriteFStoreNTX, [Zn2AGU], 1, [1], 1>;
defm : X86WriteRes<WriteFStoreNTY, [Zn2AGU], 1, [1], 1>;
+defm : X86WriteRes<WriteFMaskedStore32, [Zn2AGU,Zn2FPU01], 4, [1,1], 1>;
+defm : X86WriteRes<WriteFMaskedStore32Y, [Zn2AGU,Zn2FPU01], 5, [1,2], 2>;
+defm : X86WriteRes<WriteFMaskedStore64, [Zn2AGU,Zn2FPU01], 4, [1,1], 1>;
+defm : X86WriteRes<WriteFMaskedStore64Y, [Zn2AGU,Zn2FPU01], 5, [1,2], 2>;
+
defm : X86WriteRes<WriteFMove, [Zn2FPU], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveX, [Zn2FPU], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveY, [Zn2FPU], 1, [1], 1>;
defm : X86WriteResUnsupported<WriteFMoveZ>;
-defm : Zn2WriteResFpuPair<WriteFAdd, [Zn2FPU0], 3>;
-defm : Zn2WriteResFpuPair<WriteFAddX, [Zn2FPU0], 3>;
-defm : Zn2WriteResFpuPair<WriteFAddY, [Zn2FPU0], 3>;
+defm : Zn2WriteResFpuPair<WriteFAdd, [Zn2FPU23], 3>;
+defm : Zn2WriteResFpuPair<WriteFAddX, [Zn2FPU23], 3>;
+defm : Zn2WriteResFpuPair<WriteFAddY, [Zn2FPU23], 3>;
defm : X86WriteResPairUnsupported<WriteFAddZ>;
-defm : Zn2WriteResFpuPair<WriteFAdd64, [Zn2FPU0], 3>;
-defm : Zn2WriteResFpuPair<WriteFAdd64X, [Zn2FPU0], 3>;
-defm : Zn2WriteResFpuPair<WriteFAdd64Y, [Zn2FPU0], 3>;
+defm : Zn2WriteResFpuPair<WriteFAdd64, [Zn2FPU23], 3>;
+defm : Zn2WriteResFpuPair<WriteFAdd64X, [Zn2FPU23], 3>;
+defm : Zn2WriteResFpuPair<WriteFAdd64Y, [Zn2FPU23], 3>;
defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
-defm : Zn2WriteResFpuPair<WriteFCmp, [Zn2FPU0], 1>;
-defm : Zn2WriteResFpuPair<WriteFCmpX, [Zn2FPU0], 1>;
-defm : Zn2WriteResFpuPair<WriteFCmpY, [Zn2FPU0], 1>;
+defm : Zn2WriteResFpuPair<WriteFCmp, [Zn2FPU01], 1>;
+defm : Zn2WriteResFpuPair<WriteFCmpX, [Zn2FPU01], 1>;
+defm : Zn2WriteResFpuPair<WriteFCmpY, [Zn2FPU01], 1>;
defm : X86WriteResPairUnsupported<WriteFCmpZ>;
-defm : Zn2WriteResFpuPair<WriteFCmp64, [Zn2FPU0], 1>;
-defm : Zn2WriteResFpuPair<WriteFCmp64X, [Zn2FPU0], 1>;
-defm : Zn2WriteResFpuPair<WriteFCmp64Y, [Zn2FPU0], 1>;
+defm : Zn2WriteResFpuPair<WriteFCmp64, [Zn2FPU01], 1>;
+defm : Zn2WriteResFpuPair<WriteFCmp64X, [Zn2FPU01], 1>;
+defm : Zn2WriteResFpuPair<WriteFCmp64Y, [Zn2FPU01], 1>;
defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
-defm : Zn2WriteResFpuPair<WriteFCom, [Zn2FPU0], 3>;
-defm : Zn2WriteResFpuPair<WriteFComX, [Zn2FPU0], 3>;
+defm : Zn2WriteResFpuPair<WriteFCom, [Zn2FPU01,Zn2FPU2], 3, [1,1], 2>;
+defm : Zn2WriteResFpuPair<WriteFComX, [Zn2FPU01,Zn2FPU2], 3, [1,1], 2>;
defm : Zn2WriteResFpuPair<WriteFBlend, [Zn2FPU01], 1>;
defm : Zn2WriteResFpuPair<WriteFBlendY, [Zn2FPU01], 1>;
defm : X86WriteResPairUnsupported<WriteFBlendZ>;
@@ -332,8 +331,8 @@ defm : X86WriteResPairUnsupported<WriteFRndZ>;
defm : Zn2WriteResFpuPair<WriteFLogic, [Zn2FPU], 1>;
defm : Zn2WriteResFpuPair<WriteFLogicY, [Zn2FPU], 1>;
defm : X86WriteResPairUnsupported<WriteFLogicZ>;
-defm : Zn2WriteResFpuPair<WriteFTest, [Zn2FPU], 1>;
-defm : Zn2WriteResFpuPair<WriteFTestY, [Zn2FPU], 1>;
+defm : Zn2WriteResFpuPair<WriteFTest, [Zn2FPU12], 3, [2], 1, 7, 1>;
+defm : Zn2WriteResFpuPair<WriteFTestY, [Zn2FPU12], 3, [2], 1, 7, 1>;
defm : X86WriteResPairUnsupported<WriteFTestZ>;
defm : Zn2WriteResFpuPair<WriteFShuffle, [Zn2FPU12], 1>;
defm : Zn2WriteResFpuPair<WriteFShuffleY, [Zn2FPU12], 1>;
@@ -394,20 +393,23 @@ defm : X86WriteRes<WriteVecMoveToGpr, [Zn2FPU2], 2, [1], 1>;
defm : X86WriteRes<WriteVecMoveFromGpr, [Zn2FPU2], 3, [1], 1>;
defm : X86WriteRes<WriteEMMS, [Zn2FPU], 2, [1], 1>;
-defm : Zn2WriteResFpuPair<WriteVecShift, [Zn2FPU], 1>;
+defm : Zn2WriteResFpuPair<WriteVecShift, [Zn2FPU2], 1>;
defm : Zn2WriteResFpuPair<WriteVecShiftX, [Zn2FPU2], 1>;
defm : Zn2WriteResFpuPair<WriteVecShiftY, [Zn2FPU2], 1>;
defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
-defm : Zn2WriteResFpuPair<WriteVecShiftImm, [Zn2FPU], 1>;
-defm : Zn2WriteResFpuPair<WriteVecShiftImmX, [Zn2FPU], 1>;
-defm : Zn2WriteResFpuPair<WriteVecShiftImmY, [Zn2FPU], 1>;
+defm : Zn2WriteResFpuPair<WriteVecShiftImm, [Zn2FPU2], 1>;
+defm : Zn2WriteResFpuPair<WriteVecShiftImmX, [Zn2FPU2], 1>;
+defm : Zn2WriteResFpuPair<WriteVecShiftImmY, [Zn2FPU2], 1>;
defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
+defm : Zn2WriteResFpuPair<WriteVarVecShift, [Zn2FPU1], 3, [2], 1>;
+defm : Zn2WriteResFpuPair<WriteVarVecShiftY, [Zn2FPU1], 3, [2], 1>;
+defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
defm : Zn2WriteResFpuPair<WriteVecLogic, [Zn2FPU], 1>;
defm : Zn2WriteResFpuPair<WriteVecLogicX, [Zn2FPU], 1>;
defm : Zn2WriteResFpuPair<WriteVecLogicY, [Zn2FPU], 1>;
defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
-defm : Zn2WriteResFpuPair<WriteVecTest, [Zn2FPU12], 1, [2], 1, 7, 1>;
-defm : Zn2WriteResFpuPair<WriteVecTestY, [Zn2FPU12], 1, [2], 1, 7, 1>;
+defm : Zn2WriteResFpuPair<WriteVecTest, [Zn2FPU12], 3, [2], 1, 7, 1>;
+defm : Zn2WriteResFpuPair<WriteVecTestY, [Zn2FPU12], 3, [2], 1, 7, 1>;
defm : X86WriteResPairUnsupported<WriteVecTestZ>;
defm : Zn2WriteResFpuPair<WriteVecALU, [Zn2FPU], 1>;
defm : Zn2WriteResFpuPair<WriteVecALUX, [Zn2FPU], 1>;
@@ -440,11 +442,6 @@ defm : Zn2WriteResFpuPair<WritePSADBWY, [Zn2FPU0], 3>;
defm : X86WriteResPairUnsupported<WritePSADBWZ>;
defm : Zn2WriteResFpuPair<WritePHMINPOS, [Zn2FPU0], 4>;
-// Vector Shift Operations
-defm : Zn2WriteResFpuPair<WriteVarVecShift, [Zn2FPU12], 3>;
-defm : Zn2WriteResFpuPair<WriteVarVecShiftY, [Zn2FPU12], 3>;
-defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
-
// Vector insert/extract operations.
defm : Zn2WriteResFpuPair<WriteVecInsert, [Zn2FPU], 1>;
@@ -486,12 +483,6 @@ defm : Zn2WriteResFpuPair<WriteFVarShuffle256, [Zn2FPU], 100>;
def Zn2WriteMicrocoded : SchedWriteRes<[]> {
let Latency = 100;
}
-defm : Zn2WriteResPair<WriteDPPS, [], 15>;
-defm : Zn2WriteResPair<WriteFHAdd, [], 7>;
-defm : Zn2WriteResPair<WriteFHAddY, [], 7>;
-defm : Zn2WriteResPair<WritePHAdd, [], 3>;
-defm : Zn2WriteResPair<WritePHAddX, [], 3>;
-defm : Zn2WriteResPair<WritePHAddY, [], 3>;
def : SchedAlias<WriteMicrocoded, Zn2WriteMicrocoded>;
def : SchedAlias<WriteFCMOV, Zn2WriteMicrocoded>;
@@ -1109,6 +1100,14 @@ def : InstRW<[WriteMicrocoded], (instregex "VPGATHER(Q|D)(Q|D)(Y?)rm")>;
//-- Arithmetic instructions --//
+// HADD, HSUB PS/PD
+// PHADD|PHSUB (S) W/D.
+defm : Zn2WriteResFpuPair<WriteFHAdd, [], 7>;
+defm : Zn2WriteResFpuPair<WriteFHAddY, [], 7>;
+defm : Zn2WriteResFpuPair<WritePHAdd, [], 3>;
+defm : Zn2WriteResFpuPair<WritePHAddX, [], 3>;
+defm : Zn2WriteResFpuPair<WritePHAddY, [], 3>;
+
// PCMPGTQ.
def Zn2WritePCMPGTQr : SchedWriteRes<[Zn2FPU03]>;
def : InstRW<[Zn2WritePCMPGTQr], (instregex "(V?)PCMPGTQ(Y?)rr")>;
@@ -1479,6 +1478,7 @@ def : SchedAlias<WriteFDiv64YLd, Zn2WriteVDIVPDYLd>;
// DPPS.
// x,x,i / v,v,v,i.
+defm : Zn2WriteResPair<WriteDPPS, [], 15>;
def : SchedAlias<WriteDPPSY, Zn2WriteMicrocoded>;
// x,m,i / v,v,m,i.
diff --git a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
index 5e59081c63b0..78a286ae5b28 100644
--- a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -46,7 +46,7 @@ bool X86SelectionDAGInfo::isBaseRegConflictPossible(
SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Val,
- SDValue Size, Align Alignment, bool isVolatile,
+ SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
MachinePointerInfo DstPtrInfo) const {
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
const X86Subtarget &Subtarget =
@@ -67,40 +67,8 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
// The libc version is likely to be faster for these cases. It can use the
// address value and run time information about the CPU.
if (Alignment < Align(4) || !ConstantSize ||
- ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold()) {
- // Check to see if there is a specialized entry-point for memory zeroing.
- ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Val);
-
- if (const char *bzeroName =
- (ValC && ValC->isZero())
- ? DAG.getTargetLoweringInfo().getLibcallName(RTLIB::BZERO)
- : nullptr) {
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout());
- Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
- Entry.Node = Dst;
- Entry.Ty = IntPtrTy;
- Args.push_back(Entry);
- Entry.Node = Size;
- Args.push_back(Entry);
-
- TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl)
- .setChain(Chain)
- .setLibCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol(bzeroName, IntPtr),
- std::move(Args))
- .setDiscardResult();
-
- std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
- return CallResult.second;
- }
-
- // Otherwise have the target-independent code call memset.
+ ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold())
return SDValue();
- }
uint64_t SizeVal = ConstantSize->getZExtValue();
SDValue InFlag;
@@ -175,7 +143,8 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
DAG.getConstant(Offset, dl, AddrVT)),
Val, DAG.getConstant(BytesLeft, dl, SizeVT), Alignment,
- isVolatile, false, DstPtrInfo.getWithOffset(Offset));
+ isVolatile, AlwaysInline,
+ /* isTailCall */ false, DstPtrInfo.getWithOffset(Offset));
}
// TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
diff --git a/llvm/lib/Target/X86/X86SelectionDAGInfo.h b/llvm/lib/Target/X86/X86SelectionDAGInfo.h
index dac62973636c..19136ca4f6f5 100644
--- a/llvm/lib/Target/X86/X86SelectionDAGInfo.h
+++ b/llvm/lib/Target/X86/X86SelectionDAGInfo.h
@@ -29,7 +29,7 @@ public:
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Dst, SDValue Src,
SDValue Size, Align Alignment,
- bool isVolatile,
+ bool isVolatile, bool AlwaysInline,
MachinePointerInfo DstPtrInfo) const override;
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
diff --git a/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp b/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
index dba11e8b4000..3317db891cf0 100644
--- a/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
+++ b/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
@@ -181,17 +181,18 @@ private:
void tracePredStateThroughBlocksAndHarden(MachineFunction &MF);
unsigned saveEFLAGS(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator InsertPt, DebugLoc Loc);
+ MachineBasicBlock::iterator InsertPt,
+ const DebugLoc &Loc);
void restoreEFLAGS(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator InsertPt, DebugLoc Loc,
+ MachineBasicBlock::iterator InsertPt, const DebugLoc &Loc,
Register Reg);
void mergePredStateIntoSP(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator InsertPt, DebugLoc Loc,
- unsigned PredStateReg);
+ MachineBasicBlock::iterator InsertPt,
+ const DebugLoc &Loc, unsigned PredStateReg);
unsigned extractPredStateFromSP(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertPt,
- DebugLoc Loc);
+ const DebugLoc &Loc);
void
hardenLoadAddr(MachineInstr &MI, MachineOperand &BaseMO,
@@ -203,7 +204,7 @@ private:
bool canHardenRegister(Register Reg);
unsigned hardenValueInRegister(Register Reg, MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertPt,
- DebugLoc Loc);
+ const DebugLoc &Loc);
unsigned hardenPostLoad(MachineInstr &MI);
void hardenReturnInstr(MachineInstr &MI);
void tracePredStateThroughCall(MachineInstr &MI);
@@ -356,8 +357,8 @@ static void canonicalizePHIOperands(MachineFunction &MF) {
int OpIdx = DupIndices.pop_back_val();
// Remove both the block and value operand, again in reverse order to
// preserve indices.
- MI.RemoveOperand(OpIdx + 1);
- MI.RemoveOperand(OpIdx);
+ MI.removeOperand(OpIdx + 1);
+ MI.removeOperand(OpIdx);
}
Preds.clear();
@@ -1500,7 +1501,7 @@ void X86SpeculativeLoadHardeningPass::tracePredStateThroughBlocksAndHarden(
/// as the save so that no PHI nodes are inserted.
unsigned X86SpeculativeLoadHardeningPass::saveEFLAGS(
MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt,
- DebugLoc Loc) {
+ const DebugLoc &Loc) {
// FIXME: Hard coding this to a 32-bit register class seems weird, but matches
// what instruction selection does.
Register Reg = MRI->createVirtualRegister(&X86::GR32RegClass);
@@ -1517,8 +1518,8 @@ unsigned X86SpeculativeLoadHardeningPass::saveEFLAGS(
/// This must be done within the same basic block as the save in order to
/// reliably lower.
void X86SpeculativeLoadHardeningPass::restoreEFLAGS(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, DebugLoc Loc,
- Register Reg) {
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt,
+ const DebugLoc &Loc, Register Reg) {
BuildMI(MBB, InsertPt, Loc, TII->get(X86::COPY), X86::EFLAGS).addReg(Reg);
++NumInstsInserted;
}
@@ -1528,8 +1529,8 @@ void X86SpeculativeLoadHardeningPass::restoreEFLAGS(
/// a way that won't form non-canonical pointers and also will be preserved
/// across normal stack adjustments.
void X86SpeculativeLoadHardeningPass::mergePredStateIntoSP(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, DebugLoc Loc,
- unsigned PredStateReg) {
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt,
+ const DebugLoc &Loc, unsigned PredStateReg) {
Register TmpReg = MRI->createVirtualRegister(PS->RC);
// FIXME: This hard codes a shift distance based on the number of bits needed
// to stay canonical on 64-bit. We should compute this somehow and support
@@ -1549,7 +1550,7 @@ void X86SpeculativeLoadHardeningPass::mergePredStateIntoSP(
/// Extracts the predicate state stored in the high bits of the stack pointer.
unsigned X86SpeculativeLoadHardeningPass::extractPredStateFromSP(
MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt,
- DebugLoc Loc) {
+ const DebugLoc &Loc) {
Register PredStateReg = MRI->createVirtualRegister(PS->RC);
Register TmpReg = MRI->createVirtualRegister(PS->RC);
@@ -1907,7 +1908,7 @@ bool X86SpeculativeLoadHardeningPass::canHardenRegister(Register Reg) {
/// register class as `Reg`.
unsigned X86SpeculativeLoadHardeningPass::hardenValueInRegister(
Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt,
- DebugLoc Loc) {
+ const DebugLoc &Loc) {
assert(canHardenRegister(Reg) && "Cannot harden this register!");
assert(Reg.isVirtual() && "Cannot harden a physical register!");
diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp
index a3d4d04b1e0d..0d091adc8e77 100644
--- a/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -21,6 +21,8 @@
#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+#include "llvm/CodeGen/ScheduleDAGMutation.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Function.h"
@@ -247,7 +249,7 @@ bool X86Subtarget::isLegalToCallImmediateAddr() const {
// FIXME: I386 PE/COFF supports PC relative calls using IMAGE_REL_I386_REL32
// but WinCOFFObjectWriter::RecordRelocation cannot emit them. Once it does,
// the following check for Win32 should be removed.
- if (In64BitMode || isTargetWin32())
+ if (Is64Bit || isTargetWin32())
return false;
return isTargetELF() || TM.getRelocationModel() == Reloc::Static;
}
@@ -274,12 +276,12 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef TuneCPU,
// introduced with Intel's Nehalem/Silvermont and AMD's Family10h
// micro-architectures respectively.
if (hasSSE42() || hasSSE4A())
- IsUAMem16Slow = false;
+ IsUnalignedMem16Slow = false;
LLVM_DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel
<< ", 3DNowLevel " << X863DNowLevel << ", 64bit "
<< HasX86_64 << "\n");
- if (In64BitMode && !HasX86_64)
+ if (Is64Bit && !HasX86_64)
report_fatal_error("64-bit code requested on a subtarget that doesn't "
"support it!");
@@ -289,7 +291,7 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef TuneCPU,
if (StackAlignOverride)
stackAlignment = *StackAlignOverride;
else if (isTargetDarwin() || isTargetLinux() || isTargetKFreeBSD() ||
- isTargetNaCl() || In64BitMode)
+ isTargetNaCl() || Is64Bit)
stackAlignment = Align(16);
// Consume the vector width attribute or apply any target specific limit.
@@ -357,7 +359,7 @@ const RegisterBankInfo *X86Subtarget::getRegBankInfo() const {
}
bool X86Subtarget::enableEarlyIfConversion() const {
- return hasCMov() && X86EarlyIfConv;
+ return canUseCMOV() && X86EarlyIfConv;
}
void X86Subtarget::getPostRAMutations(
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index 5d773f0c57df..09a8b1f1aafb 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -50,24 +50,14 @@ enum class Style {
} // end namespace PICStyles
class X86Subtarget final : public X86GenSubtargetInfo {
- // NOTE: Do not add anything new to this list. Coarse, CPU name based flags
- // are not a good idea. We should be migrating away from these.
- enum X86ProcFamilyEnum {
- Others,
- IntelAtom
- };
-
enum X86SSEEnum {
- NoSSE, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512F
+ NoSSE, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512
};
enum X863DNowEnum {
NoThreeDNow, MMX, ThreeDNow, ThreeDNowA
};
- /// X86 processor family: Intel Atom, and others
- X86ProcFamilyEnum X86ProcFamily = Others;
-
/// Which PIC style to use
PICStyles::Style PICStyle;
@@ -79,412 +69,9 @@ class X86Subtarget final : public X86GenSubtargetInfo {
/// MMX, 3DNow, 3DNow Athlon, or none supported.
X863DNowEnum X863DNowLevel = NoThreeDNow;
- /// True if the processor supports X87 instructions.
- bool HasX87 = false;
-
- /// True if the processor supports CMPXCHG8B.
- bool HasCmpxchg8b = false;
-
- /// True if this processor has NOPL instruction
- /// (generally pentium pro+).
- bool HasNOPL = false;
-
- /// True if this processor has conditional move instructions
- /// (generally pentium pro+).
- bool HasCMov = false;
-
- /// True if the processor supports X86-64 instructions.
- bool HasX86_64 = false;
-
- /// True if the processor supports POPCNT.
- bool HasPOPCNT = false;
-
- /// True if the processor supports SSE4A instructions.
- bool HasSSE4A = false;
-
- /// Target has AES instructions
- bool HasAES = false;
- bool HasVAES = false;
-
- /// Target has FXSAVE/FXRESTOR instructions
- bool HasFXSR = false;
-
- /// Target has XSAVE instructions
- bool HasXSAVE = false;
-
- /// Target has XSAVEOPT instructions
- bool HasXSAVEOPT = false;
-
- /// Target has XSAVEC instructions
- bool HasXSAVEC = false;
-
- /// Target has XSAVES instructions
- bool HasXSAVES = false;
-
- /// Target has carry-less multiplication
- bool HasPCLMUL = false;
- bool HasVPCLMULQDQ = false;
-
- /// Target has Galois Field Arithmetic instructions
- bool HasGFNI = false;
-
- /// Target has 3-operand fused multiply-add
- bool HasFMA = false;
-
- /// Target has 4-operand fused multiply-add
- bool HasFMA4 = false;
-
- /// Target has XOP instructions
- bool HasXOP = false;
-
- /// Target has TBM instructions.
- bool HasTBM = false;
-
- /// Target has LWP instructions
- bool HasLWP = false;
-
- /// True if the processor has the MOVBE instruction.
- bool HasMOVBE = false;
-
- /// True if the processor has the RDRAND instruction.
- bool HasRDRAND = false;
-
- /// Processor has 16-bit floating point conversion instructions.
- bool HasF16C = false;
-
- /// Processor has FS/GS base insturctions.
- bool HasFSGSBase = false;
-
- /// Processor has LZCNT instruction.
- bool HasLZCNT = false;
-
- /// Processor has BMI1 instructions.
- bool HasBMI = false;
-
- /// Processor has BMI2 instructions.
- bool HasBMI2 = false;
-
- /// Processor has VBMI instructions.
- bool HasVBMI = false;
-
- /// Processor has VBMI2 instructions.
- bool HasVBMI2 = false;
-
- /// Processor has Integer Fused Multiply Add
- bool HasIFMA = false;
-
- /// Processor has RTM instructions.
- bool HasRTM = false;
-
- /// Processor has ADX instructions.
- bool HasADX = false;
-
- /// Processor has SHA instructions.
- bool HasSHA = false;
-
- /// Processor has PRFCHW instructions.
- bool HasPRFCHW = false;
-
- /// Processor has RDSEED instructions.
- bool HasRDSEED = false;
-
- /// Processor has LAHF/SAHF instructions in 64-bit mode.
- bool HasLAHFSAHF64 = false;
-
- /// Processor has MONITORX/MWAITX instructions.
- bool HasMWAITX = false;
-
- /// Processor has Cache Line Zero instruction
- bool HasCLZERO = false;
-
- /// Processor has Cache Line Demote instruction
- bool HasCLDEMOTE = false;
-
- /// Processor has MOVDIRI instruction (direct store integer).
- bool HasMOVDIRI = false;
-
- /// Processor has MOVDIR64B instruction (direct store 64 bytes).
- bool HasMOVDIR64B = false;
-
- /// Processor has ptwrite instruction.
- bool HasPTWRITE = false;
-
- /// Processor has Prefetch with intent to Write instruction
- bool HasPREFETCHWT1 = false;
-
- /// True if SHLD instructions are slow.
- bool IsSHLDSlow = false;
-
- /// True if the PMULLD instruction is slow compared to PMULLW/PMULHW and
- // PMULUDQ.
- bool IsPMULLDSlow = false;
-
- /// True if the PMADDWD instruction is slow compared to PMULLD.
- bool IsPMADDWDSlow = false;
-
- /// True if unaligned memory accesses of 16-bytes are slow.
- bool IsUAMem16Slow = false;
-
- /// True if unaligned memory accesses of 32-bytes are slow.
- bool IsUAMem32Slow = false;
-
- /// True if SSE operations can have unaligned memory operands.
- /// This may require setting a configuration bit in the processor.
- bool HasSSEUnalignedMem = false;
-
- /// True if this processor has the CMPXCHG16B instruction;
- /// this is true for most x86-64 chips, but not the first AMD chips.
- bool HasCmpxchg16b = false;
-
- /// True if the LEA instruction should be used for adjusting
- /// the stack pointer. This is an optimization for Intel Atom processors.
- bool UseLeaForSP = false;
-
- /// True if POPCNT instruction has a false dependency on the destination register.
- bool HasPOPCNTFalseDeps = false;
-
- /// True if LZCNT/TZCNT instructions have a false dependency on the destination register.
- bool HasLZCNTFalseDeps = false;
-
- /// True if its preferable to combine to a single cross-lane shuffle
- /// using a variable mask over multiple fixed shuffles.
- bool HasFastVariableCrossLaneShuffle = false;
-
- /// True if its preferable to combine to a single per-lane shuffle
- /// using a variable mask over multiple fixed shuffles.
- bool HasFastVariablePerLaneShuffle = false;
-
- /// True if vzeroupper instructions should be inserted after code that uses
- /// ymm or zmm registers.
- bool InsertVZEROUPPER = false;
-
- /// True if there is no performance penalty for writing NOPs with up to
- /// 7 bytes.
- bool HasFast7ByteNOP = false;
-
- /// True if there is no performance penalty for writing NOPs with up to
- /// 11 bytes.
- bool HasFast11ByteNOP = false;
-
- /// True if there is no performance penalty for writing NOPs with up to
- /// 15 bytes.
- bool HasFast15ByteNOP = false;
-
- /// True if gather is reasonably fast. This is true for Skylake client and
- /// all AVX-512 CPUs.
- bool HasFastGather = false;
-
- /// True if hardware SQRTSS instruction is at least as fast (latency) as
- /// RSQRTSS followed by a Newton-Raphson iteration.
- bool HasFastScalarFSQRT = false;
-
- /// True if hardware SQRTPS/VSQRTPS instructions are at least as fast
- /// (throughput) as RSQRTPS/VRSQRTPS followed by a Newton-Raphson iteration.
- bool HasFastVectorFSQRT = false;
-
- /// True if 8-bit divisions are significantly faster than
- /// 32-bit divisions and should be used when possible.
- bool HasSlowDivide32 = false;
-
- /// True if 32-bit divides are significantly faster than
- /// 64-bit divisions and should be used when possible.
- bool HasSlowDivide64 = false;
-
- /// True if LZCNT instruction is fast.
- bool HasFastLZCNT = false;
-
- /// True if SHLD based rotate is fast.
- bool HasFastSHLDRotate = false;
-
- /// True if the processor supports macrofusion.
- bool HasMacroFusion = false;
-
- /// True if the processor supports branch fusion.
- bool HasBranchFusion = false;
-
- /// True if the processor has enhanced REP MOVSB/STOSB.
- bool HasERMSB = false;
-
- /// True if the processor has fast short REP MOV.
- bool HasFSRM = false;
-
- /// True if the short functions should be padded to prevent
- /// a stall when returning too early.
- bool PadShortFunctions = false;
-
- /// True if two memory operand instructions should use a temporary register
- /// instead.
- bool SlowTwoMemOps = false;
-
- /// True if the LEA instruction inputs have to be ready at address generation
- /// (AG) time.
- bool LEAUsesAG = false;
-
- /// True if the LEA instruction with certain arguments is slow
- bool SlowLEA = false;
-
- /// True if the LEA instruction has all three source operands: base, index,
- /// and offset or if the LEA instruction uses base and index registers where
- /// the base is EBP, RBP,or R13
- bool Slow3OpsLEA = false;
-
- /// True if INC and DEC instructions are slow when writing to flags
- bool SlowIncDec = false;
-
- /// Processor has AVX-512 PreFetch Instructions
- bool HasPFI = false;
-
- /// Processor has AVX-512 Exponential and Reciprocal Instructions
- bool HasERI = false;
-
- /// Processor has AVX-512 Conflict Detection Instructions
- bool HasCDI = false;
-
- /// Processor has AVX-512 population count Instructions
- bool HasVPOPCNTDQ = false;
-
- /// Processor has AVX-512 Doubleword and Quadword instructions
- bool HasDQI = false;
-
- /// Processor has AVX-512 Byte and Word instructions
- bool HasBWI = false;
-
- /// Processor has AVX-512 Vector Length eXtenstions
- bool HasVLX = false;
-
- /// Processor has AVX-512 16 bit floating-point extenstions
- bool HasFP16 = false;
-
- /// Processor has PKU extenstions
- bool HasPKU = false;
-
- /// Processor has AVX-512 Vector Neural Network Instructions
- bool HasVNNI = false;
-
- /// Processor has AVX Vector Neural Network Instructions
- bool HasAVXVNNI = false;
-
- /// Processor has AVX-512 bfloat16 floating-point extensions
- bool HasBF16 = false;
-
- /// Processor supports ENQCMD instructions
- bool HasENQCMD = false;
-
- /// Processor has AVX-512 Bit Algorithms instructions
- bool HasBITALG = false;
-
- /// Processor has AVX-512 vp2intersect instructions
- bool HasVP2INTERSECT = false;
-
- /// Processor supports CET SHSTK - Control-Flow Enforcement Technology
- /// using Shadow Stack
- bool HasSHSTK = false;
-
- /// Processor supports Invalidate Process-Context Identifier
- bool HasINVPCID = false;
-
- /// Processor has Software Guard Extensions
- bool HasSGX = false;
-
- /// Processor supports Flush Cache Line instruction
- bool HasCLFLUSHOPT = false;
-
- /// Processor supports Cache Line Write Back instruction
- bool HasCLWB = false;
-
- /// Processor supports Write Back No Invalidate instruction
- bool HasWBNOINVD = false;
-
- /// Processor support RDPID instruction
- bool HasRDPID = false;
-
- /// Processor supports WaitPKG instructions
- bool HasWAITPKG = false;
-
- /// Processor supports PCONFIG instruction
- bool HasPCONFIG = false;
-
- /// Processor support key locker instructions
- bool HasKL = false;
-
- /// Processor support key locker wide instructions
- bool HasWIDEKL = false;
-
- /// Processor supports HRESET instruction
- bool HasHRESET = false;
-
- /// Processor supports SERIALIZE instruction
- bool HasSERIALIZE = false;
-
- /// Processor supports TSXLDTRK instruction
- bool HasTSXLDTRK = false;
-
- /// Processor has AMX support
- bool HasAMXTILE = false;
- bool HasAMXBF16 = false;
- bool HasAMXINT8 = false;
-
- /// Processor supports User Level Interrupt instructions
- bool HasUINTR = false;
-
- /// Enable SSE4.2 CRC32 instruction (Used when SSE4.2 is supported but
- /// function is GPR only)
- bool HasCRC32 = false;
-
- /// Processor has a single uop BEXTR implementation.
- bool HasFastBEXTR = false;
-
- /// Try harder to combine to horizontal vector ops if they are fast.
- bool HasFastHorizontalOps = false;
-
- /// Prefer a left/right scalar logical shifts pair over a shift+and pair.
- bool HasFastScalarShiftMasks = false;
-
- /// Prefer a left/right vector logical shifts pair over a shift+and pair.
- bool HasFastVectorShiftMasks = false;
-
- /// Prefer a movbe over a single-use load + bswap / single-use bswap + store.
- bool HasFastMOVBE = false;
-
- /// Use a retpoline thunk rather than indirect calls to block speculative
- /// execution.
- bool UseRetpolineIndirectCalls = false;
-
- /// Use a retpoline thunk or remove any indirect branch to block speculative
- /// execution.
- bool UseRetpolineIndirectBranches = false;
-
- /// Deprecated flag, query `UseRetpolineIndirectCalls` and
- /// `UseRetpolineIndirectBranches` instead.
- bool DeprecatedUseRetpoline = false;
-
- /// When using a retpoline thunk, call an externally provided thunk rather
- /// than emitting one inside the compiler.
- bool UseRetpolineExternalThunk = false;
-
- /// Prevent generation of indirect call/branch instructions from memory,
- /// and force all indirect call/branch instructions from a register to be
- /// preceded by an LFENCE. Also decompose RET instructions into a
- /// POP+LFENCE+JMP sequence.
- bool UseLVIControlFlowIntegrity = false;
-
- /// Enable Speculative Execution Side Effect Suppression
- bool UseSpeculativeExecutionSideEffectSuppression = false;
-
- /// Insert LFENCE instructions to prevent data speculatively injected into
- /// loads from being used maliciously.
- bool UseLVILoadHardening = false;
-
- /// Use an instruction sequence for taking the address of a global that allows
- /// a memory tag in the upper address bits.
- bool AllowTaggedGlobals = false;
-
- /// Use software floating point for code generation.
- bool UseSoftFloat = false;
-
- /// Use alias analysis during code generation.
- bool UseAA = false;
-
+#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
+ bool ATTRIBUTE = DEFAULT;
+#include "X86GenSubtargetInfo.inc"
/// The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
Align stackAlignment = Align(4);
@@ -496,21 +83,6 @@ class X86Subtarget final : public X86GenSubtargetInfo {
// FIXME: this is a known good value for Yonah. How about others?
unsigned MaxInlineSizeThreshold = 128;
- /// Indicates target prefers 128 bit instructions.
- bool Prefer128Bit = false;
-
- /// Indicates target prefers 256 bit instructions.
- bool Prefer256Bit = false;
-
- /// Indicates target prefers AVX512 mask registers.
- bool PreferMaskRegisters = false;
-
- /// Use Silvermont specific arithmetic costs.
- bool UseSLMArithCosts = false;
-
- /// Use Goldmont specific floating point div/sqrt costs.
- bool UseGLMDivSqrtCosts = false;
-
/// What processor and OS we're targeting.
Triple TargetTriple;
@@ -520,7 +92,6 @@ class X86Subtarget final : public X86GenSubtargetInfo {
std::unique_ptr<RegisterBankInfo> RegBankInfo;
std::unique_ptr<InstructionSelector> InstSelector;
-private:
/// Override the stack alignment.
MaybeAlign StackAlignOverride;
@@ -534,15 +105,6 @@ private:
/// Required vector width from function attribute.
unsigned RequiredVectorWidth;
- /// True if compiling for 64-bit, false for 16-bit or 32-bit.
- bool In64BitMode = false;
-
- /// True if compiling for 32-bit, false for 16-bit or 64-bit.
- bool In32BitMode = false;
-
- /// True if compiling for 16-bit, false for 32-bit or 64-bit.
- bool In16BitMode = false;
-
X86SelectionDAGInfo TSInfo;
// Ordering here is important. X86InstrInfo initializes X86RegisterInfo which
// X86TargetLowering needs.
@@ -608,38 +170,32 @@ private:
void initSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
public:
- /// Is this x86_64? (disregarding specific ABI / programming model)
- bool is64Bit() const {
- return In64BitMode;
- }
- bool is32Bit() const {
- return In32BitMode;
- }
-
- bool is16Bit() const {
- return In16BitMode;
- }
+#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
+ bool GETTER() const { return ATTRIBUTE; }
+#include "X86GenSubtargetInfo.inc"
/// Is this x86_64 with the ILP32 programming model (x32 ABI)?
bool isTarget64BitILP32() const {
- return In64BitMode && (TargetTriple.isX32() || TargetTriple.isOSNaCl());
+ return Is64Bit && (TargetTriple.isX32() || TargetTriple.isOSNaCl());
}
/// Is this x86_64 with the LP64 programming model (standard AMD64, no x32)?
bool isTarget64BitLP64() const {
- return In64BitMode && (!TargetTriple.isX32() && !TargetTriple.isOSNaCl());
+ return Is64Bit && (!TargetTriple.isX32() && !TargetTriple.isOSNaCl());
}
PICStyles::Style getPICStyle() const { return PICStyle; }
void setPICStyle(PICStyles::Style Style) { PICStyle = Style; }
- bool hasX87() const { return HasX87; }
- bool hasCmpxchg8b() const { return HasCmpxchg8b; }
- bool hasNOPL() const { return HasNOPL; }
+ bool canUseCMPXCHG8B() const { return hasCX8(); }
+ bool canUseCMPXCHG16B() const {
+ // CX16 is just the CPUID bit, instruction requires 64-bit mode too.
+ return hasCX16() && is64Bit();
+ }
// SSE codegen depends on cmovs, and all SSE1+ processors support them.
// All 64-bit processors support cmov.
- bool hasCMov() const { return HasCMov || X86SSELevel >= SSE1 || is64Bit(); }
+ bool canUseCMOV() const { return hasCMOV() || hasSSE1() || is64Bit(); }
bool hasSSE1() const { return X86SSELevel >= SSE1; }
bool hasSSE2() const { return X86SSELevel >= SSE2; }
bool hasSSE3() const { return X86SSELevel >= SSE3; }
@@ -648,146 +204,26 @@ public:
bool hasSSE42() const { return X86SSELevel >= SSE42; }
bool hasAVX() const { return X86SSELevel >= AVX; }
bool hasAVX2() const { return X86SSELevel >= AVX2; }
- bool hasAVX512() const { return X86SSELevel >= AVX512F; }
+ bool hasAVX512() const { return X86SSELevel >= AVX512; }
bool hasInt256() const { return hasAVX2(); }
- bool hasSSE4A() const { return HasSSE4A; }
bool hasMMX() const { return X863DNowLevel >= MMX; }
- bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
- bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
- bool hasPOPCNT() const { return HasPOPCNT; }
- bool hasAES() const { return HasAES; }
- bool hasVAES() const { return HasVAES; }
- bool hasFXSR() const { return HasFXSR; }
- bool hasXSAVE() const { return HasXSAVE; }
- bool hasXSAVEOPT() const { return HasXSAVEOPT; }
- bool hasXSAVEC() const { return HasXSAVEC; }
- bool hasXSAVES() const { return HasXSAVES; }
- bool hasPCLMUL() const { return HasPCLMUL; }
- bool hasVPCLMULQDQ() const { return HasVPCLMULQDQ; }
- bool hasGFNI() const { return HasGFNI; }
- // Prefer FMA4 to FMA - its better for commutation/memory folding and
- // has equal or better performance on all supported targets.
- bool hasFMA() const { return HasFMA; }
- bool hasFMA4() const { return HasFMA4; }
+ bool hasThreeDNow() const { return X863DNowLevel >= ThreeDNow; }
+ bool hasThreeDNowA() const { return X863DNowLevel >= ThreeDNowA; }
bool hasAnyFMA() const { return hasFMA() || hasFMA4(); }
- bool hasXOP() const { return HasXOP; }
- bool hasTBM() const { return HasTBM; }
- bool hasLWP() const { return HasLWP; }
- bool hasMOVBE() const { return HasMOVBE; }
- bool hasRDRAND() const { return HasRDRAND; }
- bool hasF16C() const { return HasF16C; }
- bool hasFSGSBase() const { return HasFSGSBase; }
- bool hasLZCNT() const { return HasLZCNT; }
- bool hasBMI() const { return HasBMI; }
- bool hasBMI2() const { return HasBMI2; }
- bool hasVBMI() const { return HasVBMI; }
- bool hasVBMI2() const { return HasVBMI2; }
- bool hasIFMA() const { return HasIFMA; }
- bool hasRTM() const { return HasRTM; }
- bool hasADX() const { return HasADX; }
- bool hasSHA() const { return HasSHA; }
- bool hasPRFCHW() const { return HasPRFCHW; }
- bool hasPREFETCHWT1() const { return HasPREFETCHWT1; }
bool hasPrefetchW() const {
// The PREFETCHW instruction was added with 3DNow but later CPUs gave it
// its own CPUID bit as part of deprecating 3DNow. Intel eventually added
// it and KNL has another that prefetches to L2 cache. We assume the
// L1 version exists if the L2 version does.
- return has3DNow() || hasPRFCHW() || hasPREFETCHWT1();
+ return hasThreeDNow() || hasPRFCHW() || hasPREFETCHWT1();
}
bool hasSSEPrefetch() const {
// We implicitly enable these when we have a write prefix supporting cache
// level OR if we have prfchw, but don't already have a read prefetch from
// 3dnow.
- return hasSSE1() || (hasPRFCHW() && !has3DNow()) || hasPREFETCHWT1();
- }
- bool hasRDSEED() const { return HasRDSEED; }
- bool hasLAHFSAHF() const { return HasLAHFSAHF64 || !is64Bit(); }
- bool hasMWAITX() const { return HasMWAITX; }
- bool hasCLZERO() const { return HasCLZERO; }
- bool hasCLDEMOTE() const { return HasCLDEMOTE; }
- bool hasMOVDIRI() const { return HasMOVDIRI; }
- bool hasMOVDIR64B() const { return HasMOVDIR64B; }
- bool hasPTWRITE() const { return HasPTWRITE; }
- bool isSHLDSlow() const { return IsSHLDSlow; }
- bool isPMULLDSlow() const { return IsPMULLDSlow; }
- bool isPMADDWDSlow() const { return IsPMADDWDSlow; }
- bool isUnalignedMem16Slow() const { return IsUAMem16Slow; }
- bool isUnalignedMem32Slow() const { return IsUAMem32Slow; }
- bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; }
- bool hasCmpxchg16b() const { return HasCmpxchg16b && is64Bit(); }
- bool useLeaForSP() const { return UseLeaForSP; }
- bool hasPOPCNTFalseDeps() const { return HasPOPCNTFalseDeps; }
- bool hasLZCNTFalseDeps() const { return HasLZCNTFalseDeps; }
- bool hasFastVariableCrossLaneShuffle() const {
- return HasFastVariableCrossLaneShuffle;
- }
- bool hasFastVariablePerLaneShuffle() const {
- return HasFastVariablePerLaneShuffle;
+ return hasSSE1() || (hasPRFCHW() && !hasThreeDNow()) || hasPREFETCHWT1();
}
- bool insertVZEROUPPER() const { return InsertVZEROUPPER; }
- bool hasFastGather() const { return HasFastGather; }
- bool hasFastScalarFSQRT() const { return HasFastScalarFSQRT; }
- bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; }
- bool hasFastLZCNT() const { return HasFastLZCNT; }
- bool hasFastSHLDRotate() const { return HasFastSHLDRotate; }
- bool hasFastBEXTR() const { return HasFastBEXTR; }
- bool hasFastHorizontalOps() const { return HasFastHorizontalOps; }
- bool hasFastScalarShiftMasks() const { return HasFastScalarShiftMasks; }
- bool hasFastVectorShiftMasks() const { return HasFastVectorShiftMasks; }
- bool hasFastMOVBE() const { return HasFastMOVBE; }
- bool hasMacroFusion() const { return HasMacroFusion; }
- bool hasBranchFusion() const { return HasBranchFusion; }
- bool hasERMSB() const { return HasERMSB; }
- bool hasFSRM() const { return HasFSRM; }
- bool hasSlowDivide32() const { return HasSlowDivide32; }
- bool hasSlowDivide64() const { return HasSlowDivide64; }
- bool padShortFunctions() const { return PadShortFunctions; }
- bool slowTwoMemOps() const { return SlowTwoMemOps; }
- bool LEAusesAG() const { return LEAUsesAG; }
- bool slowLEA() const { return SlowLEA; }
- bool slow3OpsLEA() const { return Slow3OpsLEA; }
- bool slowIncDec() const { return SlowIncDec; }
- bool hasCDI() const { return HasCDI; }
- bool hasVPOPCNTDQ() const { return HasVPOPCNTDQ; }
- bool hasPFI() const { return HasPFI; }
- bool hasERI() const { return HasERI; }
- bool hasDQI() const { return HasDQI; }
- bool hasBWI() const { return HasBWI; }
- bool hasVLX() const { return HasVLX; }
- bool hasFP16() const { return HasFP16; }
- bool hasPKU() const { return HasPKU; }
- bool hasVNNI() const { return HasVNNI; }
- bool hasBF16() const { return HasBF16; }
- bool hasVP2INTERSECT() const { return HasVP2INTERSECT; }
- bool hasBITALG() const { return HasBITALG; }
- bool hasSHSTK() const { return HasSHSTK; }
- bool hasCLFLUSHOPT() const { return HasCLFLUSHOPT; }
- bool hasCLWB() const { return HasCLWB; }
- bool hasWBNOINVD() const { return HasWBNOINVD; }
- bool hasRDPID() const { return HasRDPID; }
- bool hasWAITPKG() const { return HasWAITPKG; }
- bool hasPCONFIG() const { return HasPCONFIG; }
- bool hasSGX() const { return HasSGX; }
- bool hasINVPCID() const { return HasINVPCID; }
- bool hasENQCMD() const { return HasENQCMD; }
- bool hasKL() const { return HasKL; }
- bool hasWIDEKL() const { return HasWIDEKL; }
- bool hasHRESET() const { return HasHRESET; }
- bool hasSERIALIZE() const { return HasSERIALIZE; }
- bool hasTSXLDTRK() const { return HasTSXLDTRK; }
- bool hasUINTR() const { return HasUINTR; }
- bool hasCRC32() const { return HasCRC32; }
- bool useRetpolineIndirectCalls() const { return UseRetpolineIndirectCalls; }
- bool useRetpolineIndirectBranches() const {
- return UseRetpolineIndirectBranches;
- }
- bool hasAVXVNNI() const { return HasAVXVNNI; }
- bool hasAMXTILE() const { return HasAMXTILE; }
- bool hasAMXBF16() const { return HasAMXBF16; }
- bool hasAMXINT8() const { return HasAMXINT8; }
- bool useRetpolineExternalThunk() const { return UseRetpolineExternalThunk; }
-
+ bool canUseLAHFSAHF() const { return hasLAHFSAHF64() || !is64Bit(); }
// These are generic getters that OR together all of the thunk types
// supported by the subtarget. Therefore useIndirectThunk*() will return true
// if any respective thunk feature is enabled.
@@ -798,16 +234,6 @@ public:
return useRetpolineIndirectBranches() || useLVIControlFlowIntegrity();
}
- bool preferMaskRegisters() const { return PreferMaskRegisters; }
- bool useSLMArithCosts() const { return UseSLMArithCosts; }
- bool useGLMDivSqrtCosts() const { return UseGLMDivSqrtCosts; }
- bool useLVIControlFlowIntegrity() const { return UseLVIControlFlowIntegrity; }
- bool allowTaggedGlobals() const { return AllowTaggedGlobals; }
- bool useLVILoadHardening() const { return UseLVILoadHardening; }
- bool useSpeculativeExecutionSideEffectSuppression() const {
- return UseSpeculativeExecutionSideEffectSuppression;
- }
-
unsigned getPreferVectorWidth() const { return PreferVectorWidth; }
unsigned getRequiredVectorWidth() const { return RequiredVectorWidth; }
@@ -834,11 +260,6 @@ public:
bool isXRaySupported() const override { return is64Bit(); }
- /// TODO: to be removed later and replaced with suitable properties
- bool isAtom() const { return X86ProcFamily == IntelAtom; }
- bool useSoftFloat() const { return UseSoftFloat; }
- bool useAA() const override { return UseAA; }
-
/// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
/// no-sse2). There isn't any reason to disable it if the target processor
/// supports it.
@@ -850,7 +271,7 @@ public:
bool isTargetFreeBSD() const { return TargetTriple.isOSFreeBSD(); }
bool isTargetDragonFly() const { return TargetTriple.isOSDragonFly(); }
bool isTargetSolaris() const { return TargetTriple.isOSSolaris(); }
- bool isTargetPS4() const { return TargetTriple.isPS4CPU(); }
+ bool isTargetPS() const { return TargetTriple.isPS(); }
bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
@@ -890,9 +311,9 @@ public:
bool isOSWindows() const { return TargetTriple.isOSWindows(); }
- bool isTargetWin64() const { return In64BitMode && isOSWindows(); }
+ bool isTargetWin64() const { return Is64Bit && isOSWindows(); }
- bool isTargetWin32() const { return !In64BitMode && isOSWindows(); }
+ bool isTargetWin32() const { return !Is64Bit && isOSWindows(); }
bool isPICStyleGOT() const { return PICStyle == PICStyles::Style::GOT; }
bool isPICStyleRIPRel() const { return PICStyle == PICStyles::Style::RIPRel; }
@@ -990,8 +411,6 @@ public:
AntiDepBreakMode getAntiDepBreakMode() const override {
return TargetSubtargetInfo::ANTIDEP_CRITICAL;
}
-
- bool enableAdvancedRASplitCost() const override { return false; }
};
} // end namespace llvm
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index e3d0128dd73d..4249788e3540 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -27,13 +27,16 @@
#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/ExecutionDomainFix.h"
+#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/DataLayout.h"
@@ -56,6 +59,11 @@ static cl::opt<bool> EnableMachineCombinerPass("x86-machine-combiner",
cl::desc("Enable the machine combiner pass"),
cl::init(true), cl::Hidden);
+static cl::opt<bool>
+ EnableTileRAPass("x86-tile-ra",
+ cl::desc("Enable the tile register allocation pass"),
+ cl::init(true), cl::Hidden);
+
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Target() {
// Register the target.
RegisterTargetMachine<X86TargetMachine> X(getTheX86_32Target());
@@ -65,6 +73,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Target() {
initializeX86LowerAMXIntrinsicsLegacyPassPass(PR);
initializeX86LowerAMXTypeLegacyPassPass(PR);
initializeX86PreAMXConfigPassPass(PR);
+ initializeX86PreTileConfigPass(PR);
initializeGlobalISel(PR);
initializeWinEHStatePassPass(PR);
initializeFixupBWInstPassPass(PR);
@@ -75,6 +84,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Target() {
initializeX86CallFrameOptimizationPass(PR);
initializeX86CmovConverterPassPass(PR);
initializeX86TileConfigPass(PR);
+ initializeX86FastPreTileConfigPass(PR);
initializeX86FastTileConfigPass(PR);
initializeX86LowerTileCopyPass(PR);
initializeX86ExpandPseudoPass(PR);
@@ -154,7 +164,7 @@ static Reloc::Model getEffectiveRelocModel(const Triple &TT,
bool JIT,
Optional<Reloc::Model> RM) {
bool is64Bit = TT.getArch() == Triple::x86_64;
- if (!RM.hasValue()) {
+ if (!RM) {
// JIT codegen should use static relocations by default, since it's
// typically executed in process and not relocatable.
if (JIT)
@@ -218,9 +228,9 @@ X86TargetMachine::X86TargetMachine(const Target &T, const Triple &TT,
getEffectiveX86CodeModel(CM, JIT, TT.getArch() == Triple::x86_64),
OL),
TLOF(createTLOF(getTargetTriple())), IsJIT(JIT) {
- // On PS4, the "return address" of a 'noreturn' call must still be within
+ // On PS4/PS5, the "return address" of a 'noreturn' call must still be within
// the calling function, and TrapUnreachable is an easy way to get that.
- if (TT.isPS4() || TT.isOSBinFormatMachO()) {
+ if (TT.isPS() || TT.isOSBinFormatMachO()) {
this->Options.TrapUnreachable = true;
this->Options.NoTrapAfterNoreturn = TT.isOSBinFormatMachO();
}
@@ -333,7 +343,7 @@ bool X86TargetMachine::isNoopAddrSpaceCast(unsigned SrcAS,
//===----------------------------------------------------------------------===//
TargetTransformInfo
-X86TargetMachine::getTargetTransformInfo(const Function &F) {
+X86TargetMachine::getTargetTransformInfo(const Function &F) const {
return TargetTransformInfo(X86TTIImpl(this, F));
}
@@ -382,7 +392,7 @@ public:
void addPreEmitPass() override;
void addPreEmitPass2() override;
void addPreSched2() override;
- bool addPreRewrite() override;
+ bool addRegAssignAndRewriteOptimized() override;
std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
};
@@ -417,9 +427,6 @@ void X86PassConfig::addIRPasses() {
addPass(createX86LowerAMXIntrinsicsPass());
addPass(createX86LowerAMXTypePass());
- if (TM->getOptLevel() == CodeGenOpt::None)
- addPass(createX86PreAMXConfigPass());
-
TargetPassConfig::addIRPasses();
if (TM->getOptLevel() != CodeGenOpt::None) {
@@ -441,6 +448,9 @@ void X86PassConfig::addIRPasses() {
addPass(createCFGuardCheckPass());
}
}
+
+ if (TM->Options.JMCInstrument)
+ addPass(createJMCInstrumenterPass());
}
bool X86PassConfig::addInstSelector() {
@@ -505,9 +515,10 @@ void X86PassConfig::addPreRegAlloc() {
addPass(createX86FlagsCopyLoweringPass());
addPass(createX86DynAllocaExpander());
- if (getOptLevel() != CodeGenOpt::None) {
+ if (getOptLevel() != CodeGenOpt::None)
addPass(createX86PreTileConfigPass());
- }
+ else
+ addPass(createX86FastPreTileConfigPass());
}
void X86PassConfig::addMachineSSAOptimization() {
@@ -607,11 +618,21 @@ bool X86PassConfig::addPostFastRegAllocRewrite() {
return true;
}
-bool X86PassConfig::addPreRewrite() {
- addPass(createX86TileConfigPass());
- return true;
-}
-
std::unique_ptr<CSEConfigBase> X86PassConfig::getCSEConfig() const {
return getStandardCSEConfigForOpt(TM->getOptLevel());
}
+
+static bool onlyAllocateTileRegisters(const TargetRegisterInfo &TRI,
+ const TargetRegisterClass &RC) {
+ return static_cast<const X86RegisterInfo &>(TRI).isTileRegisterClass(&RC);
+}
+
+bool X86PassConfig::addRegAssignAndRewriteOptimized() {
+ // Don't support tile RA when RA is specified by command line "-regalloc".
+ if (!isCustomizedRegAlloc() && EnableTileRAPass) {
+ // Allocate tile register first.
+ addPass(createGreedyRegisterAllocator(onlyAllocateTileRegisters));
+ addPass(createX86TileConfigPass());
+ }
+ return TargetPassConfig::addRegAssignAndRewriteOptimized();
+}
diff --git a/llvm/lib/Target/X86/X86TargetMachine.h b/llvm/lib/Target/X86/X86TargetMachine.h
index 69d7e48b8977..70df8da77641 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.h
+++ b/llvm/lib/Target/X86/X86TargetMachine.h
@@ -44,7 +44,7 @@ public:
// attributes of each function.
const X86Subtarget *getSubtargetImpl() const = delete;
- TargetTransformInfo getTargetTransformInfo(const Function &F) override;
+ TargetTransformInfo getTargetTransformInfo(const Function &F) const override;
// Set up the pass pipeline.
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 5b95c10332dc..b36f8a3d06d0 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1085,7 +1085,8 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
VectorType *BaseTp,
ArrayRef<int> Mask, int Index,
- VectorType *SubTp) {
+ VectorType *SubTp,
+ ArrayRef<const Value *> Args) {
// 64-bit packed float vectors (v2f32) are widened to type v4f32.
// 64-bit packed integer vectors (v2i32) are widened to type v4i32.
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, BaseTp);
@@ -1223,6 +1224,63 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
auto *SingleOpTy = FixedVectorType::get(BaseTp->getElementType(),
LegalVT.getVectorNumElements());
+ if (!Mask.empty() && NumOfDests.isValid()) {
+ // Try to perform better estimation of the permutation.
+ // 1. Split the source/destination vectors into real registers.
+ // 2. Do the mask analysis to identify which real registers are
+ // permuted. If more than 1 source registers are used for the
+ // destination register building, the cost for this destination register
+ // is (Number_of_source_register - 1) * Cost_PermuteTwoSrc. If only one
+ // source register is used, build mask and calculate the cost as a cost
+ // of PermuteSingleSrc.
+ // Also, for the single register permute we try to identify if the
+ // destination register is just a copy of the source register or the
+ // copy of the previous destination register (the cost is
+ // TTI::TCC_Basic). If the source register is just reused, the cost for
+ // this operation is 0.
+ unsigned E = *NumOfDests.getValue();
+ unsigned NormalizedVF =
+ LegalVT.getVectorNumElements() * std::max(NumOfSrcs, E);
+ unsigned NumOfSrcRegs = NormalizedVF / LegalVT.getVectorNumElements();
+ unsigned NumOfDestRegs = NormalizedVF / LegalVT.getVectorNumElements();
+ SmallVector<int> NormalizedMask(NormalizedVF, UndefMaskElem);
+ copy(Mask, NormalizedMask.begin());
+ unsigned PrevSrcReg = 0;
+ ArrayRef<int> PrevRegMask;
+ InstructionCost Cost = 0;
+ processShuffleMasks(
+ NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
+ [this, SingleOpTy, &PrevSrcReg, &PrevRegMask,
+ &Cost](ArrayRef<int> RegMask, unsigned SrcReg, unsigned DestReg) {
+ if (!ShuffleVectorInst::isIdentityMask(RegMask)) {
+ // Check if the previous register can be just copied to the next
+ // one.
+ if (PrevRegMask.empty() || PrevSrcReg != SrcReg ||
+ PrevRegMask != RegMask)
+ Cost += getShuffleCost(TTI::SK_PermuteSingleSrc, SingleOpTy,
+ RegMask, 0, nullptr);
+ else
+ // Just a copy of previous destination register.
+ Cost += TTI::TCC_Basic;
+ return;
+ }
+ if (SrcReg != DestReg &&
+ any_of(RegMask, [](int I) { return I != UndefMaskElem; })) {
+ // Just a copy of the source register.
+ Cost += TTI::TCC_Basic;
+ }
+ PrevSrcReg = SrcReg;
+ PrevRegMask = RegMask;
+ },
+ [this, SingleOpTy, &Cost](ArrayRef<int> RegMask,
+ unsigned /*Unused*/,
+ unsigned /*Unused*/) {
+ Cost += getShuffleCost(TTI::SK_PermuteTwoSrc, SingleOpTy, RegMask,
+ 0, nullptr);
+ });
+ return Cost;
+ }
+
InstructionCost NumOfShuffles = (NumOfSrcs - 1) * NumOfDests;
return NumOfShuffles * getShuffleCost(TTI::SK_PermuteTwoSrc, SingleOpTy,
None, 0, nullptr);
@@ -1545,9 +1603,25 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
{ TTI::SK_PermuteTwoSrc, MVT::v16i8, 13 }, // blend+permute
};
- if (ST->hasSSE2())
+ static const CostTblEntry SSE3BroadcastLoadTbl[] = {
+ {TTI::SK_Broadcast, MVT::v2f64, 0}, // broadcast handled by movddup
+ };
+
+ if (ST->hasSSE2()) {
+ bool IsLoad =
+ llvm::any_of(Args, [](const auto &V) { return isa<LoadInst>(V); });
+ if (ST->hasSSE3() && IsLoad)
+ if (const auto *Entry =
+ CostTableLookup(SSE3BroadcastLoadTbl, Kind, LT.second)) {
+ assert(isLegalBroadcastLoad(BaseTp->getElementType(),
+ LT.second.getVectorElementCount()) &&
+ "Table entry missing from isLegalBroadcastLoad()");
+ return LT.first * Entry->Cost;
+ }
+
if (const auto *Entry = CostTableLookup(SSE2ShuffleTbl, Kind, LT.second))
return LT.first * Entry->Cost;
+ }
static const CostTblEntry SSE1ShuffleTbl[] = {
{ TTI::SK_Broadcast, MVT::v4f32, 1 }, // shufps
@@ -2444,6 +2518,10 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
std::pair<InstructionCost, MVT> LTDest =
TLI->getTypeLegalizationCost(DL, Dst);
+ // If we're truncating to the same legalized type - just assume its free.
+ if (ISD == ISD::TRUNCATE && LTSrc.second == LTDest.second)
+ return TTI::TCC_Free;
+
if (ST->useAVX512Regs()) {
if (ST->hasBWI())
if (const auto *Entry = ConvertCostTableLookup(
@@ -2545,7 +2623,7 @@ InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
- unsigned ExtraCost = 0;
+ InstructionCost ExtraCost = 0;
if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) {
// Some vector comparison predicates cost extra instructions.
// TODO: Should we invert this and assume worst case cmp costs
@@ -2619,15 +2697,29 @@ InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
{ ISD::SETCC, MVT::v16f32, 1 },
{ ISD::SELECT, MVT::v8i64, 1 },
+ { ISD::SELECT, MVT::v4i64, 1 },
+ { ISD::SELECT, MVT::v2i64, 1 },
{ ISD::SELECT, MVT::v16i32, 1 },
+ { ISD::SELECT, MVT::v8i32, 1 },
+ { ISD::SELECT, MVT::v4i32, 1 },
{ ISD::SELECT, MVT::v8f64, 1 },
+ { ISD::SELECT, MVT::v4f64, 1 },
+ { ISD::SELECT, MVT::v2f64, 1 },
+ { ISD::SELECT, MVT::f64, 1 },
{ ISD::SELECT, MVT::v16f32, 1 },
+ { ISD::SELECT, MVT::v8f32 , 1 },
+ { ISD::SELECT, MVT::v4f32, 1 },
+ { ISD::SELECT, MVT::f32 , 1 },
{ ISD::SETCC, MVT::v32i16, 2 }, // FIXME: should probably be 4
{ ISD::SETCC, MVT::v64i8, 2 }, // FIXME: should probably be 4
- { ISD::SELECT, MVT::v32i16, 2 }, // FIXME: should be 3
- { ISD::SELECT, MVT::v64i8, 2 }, // FIXME: should be 3
+ { ISD::SELECT, MVT::v32i16, 2 },
+ { ISD::SELECT, MVT::v16i16, 1 },
+ { ISD::SELECT, MVT::v8i16, 1 },
+ { ISD::SELECT, MVT::v64i8, 2 },
+ { ISD::SELECT, MVT::v32i8, 1 },
+ { ISD::SELECT, MVT::v16i8, 1 },
};
static const CostTblEntry AVX2CostTbl[] = {
@@ -2636,10 +2728,12 @@ InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
{ ISD::SETCC, MVT::v16i16, 1 },
{ ISD::SETCC, MVT::v32i8, 1 },
- { ISD::SELECT, MVT::v4i64, 1 }, // pblendvb
- { ISD::SELECT, MVT::v8i32, 1 }, // pblendvb
- { ISD::SELECT, MVT::v16i16, 1 }, // pblendvb
- { ISD::SELECT, MVT::v32i8, 1 }, // pblendvb
+ { ISD::SELECT, MVT::v4f64, 2 }, // vblendvpd
+ { ISD::SELECT, MVT::v8f32, 2 }, // vblendvps
+ { ISD::SELECT, MVT::v4i64, 2 }, // pblendvb
+ { ISD::SELECT, MVT::v8i32, 2 }, // pblendvb
+ { ISD::SELECT, MVT::v16i16, 2 }, // pblendvb
+ { ISD::SELECT, MVT::v32i8, 2 }, // pblendvb
};
static const CostTblEntry AVX1CostTbl[] = {
@@ -2651,49 +2745,54 @@ InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
{ ISD::SETCC, MVT::v16i16, 4 },
{ ISD::SETCC, MVT::v32i8, 4 },
- { ISD::SELECT, MVT::v4f64, 1 }, // vblendvpd
- { ISD::SELECT, MVT::v8f32, 1 }, // vblendvps
- { ISD::SELECT, MVT::v4i64, 1 }, // vblendvpd
- { ISD::SELECT, MVT::v8i32, 1 }, // vblendvps
+ { ISD::SELECT, MVT::v4f64, 3 }, // vblendvpd
+ { ISD::SELECT, MVT::v8f32, 3 }, // vblendvps
+ { ISD::SELECT, MVT::v4i64, 3 }, // vblendvpd
+ { ISD::SELECT, MVT::v8i32, 3 }, // vblendvps
{ ISD::SELECT, MVT::v16i16, 3 }, // vandps + vandnps + vorps
{ ISD::SELECT, MVT::v32i8, 3 }, // vandps + vandnps + vorps
};
static const CostTblEntry SSE42CostTbl[] = {
- { ISD::SETCC, MVT::v2f64, 1 },
- { ISD::SETCC, MVT::v4f32, 1 },
{ ISD::SETCC, MVT::v2i64, 1 },
};
static const CostTblEntry SSE41CostTbl[] = {
- { ISD::SELECT, MVT::v2f64, 1 }, // blendvpd
- { ISD::SELECT, MVT::v4f32, 1 }, // blendvps
- { ISD::SELECT, MVT::v2i64, 1 }, // pblendvb
- { ISD::SELECT, MVT::v4i32, 1 }, // pblendvb
- { ISD::SELECT, MVT::v8i16, 1 }, // pblendvb
- { ISD::SELECT, MVT::v16i8, 1 }, // pblendvb
+ { ISD::SETCC, MVT::v2f64, 1 },
+ { ISD::SETCC, MVT::v4f32, 1 },
+
+ { ISD::SELECT, MVT::v2f64, 2 }, // blendvpd
+ { ISD::SELECT, MVT::f64, 2 }, // blendvpd
+ { ISD::SELECT, MVT::v4f32, 2 }, // blendvps
+ { ISD::SELECT, MVT::f32 , 2 }, // blendvps
+ { ISD::SELECT, MVT::v2i64, 2 }, // pblendvb
+ { ISD::SELECT, MVT::v4i32, 2 }, // pblendvb
+ { ISD::SELECT, MVT::v8i16, 2 }, // pblendvb
+ { ISD::SELECT, MVT::v16i8, 2 }, // pblendvb
};
static const CostTblEntry SSE2CostTbl[] = {
{ ISD::SETCC, MVT::v2f64, 2 },
{ ISD::SETCC, MVT::f64, 1 },
- { ISD::SETCC, MVT::v2i64, 8 },
+ { ISD::SETCC, MVT::v2i64, 5 }, // pcmpeqd/pcmpgtd expansion
{ ISD::SETCC, MVT::v4i32, 1 },
{ ISD::SETCC, MVT::v8i16, 1 },
{ ISD::SETCC, MVT::v16i8, 1 },
- { ISD::SELECT, MVT::v2f64, 3 }, // andpd + andnpd + orpd
- { ISD::SELECT, MVT::v2i64, 3 }, // pand + pandn + por
- { ISD::SELECT, MVT::v4i32, 3 }, // pand + pandn + por
- { ISD::SELECT, MVT::v8i16, 3 }, // pand + pandn + por
- { ISD::SELECT, MVT::v16i8, 3 }, // pand + pandn + por
+ { ISD::SELECT, MVT::v2f64, 2 }, // andpd + andnpd + orpd
+ { ISD::SELECT, MVT::f64, 2 }, // andpd + andnpd + orpd
+ { ISD::SELECT, MVT::v2i64, 2 }, // pand + pandn + por
+ { ISD::SELECT, MVT::v4i32, 2 }, // pand + pandn + por
+ { ISD::SELECT, MVT::v8i16, 2 }, // pand + pandn + por
+ { ISD::SELECT, MVT::v16i8, 2 }, // pand + pandn + por
};
static const CostTblEntry SSE1CostTbl[] = {
{ ISD::SETCC, MVT::v4f32, 2 },
{ ISD::SETCC, MVT::f32, 1 },
- { ISD::SELECT, MVT::v4f32, 3 }, // andps + andnps + orps
+ { ISD::SELECT, MVT::v4f32, 2 }, // andps + andnps + orps
+ { ISD::SELECT, MVT::f32, 2 }, // andps + andnps + orps
};
if (ST->useSLMArithCosts())
@@ -3555,7 +3654,7 @@ InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
assert(Val->isVectorTy() && "This must be a vector type");
Type *ScalarType = Val->getScalarType();
- int RegisterFileMoveCost = 0;
+ InstructionCost RegisterFileMoveCost = 0;
// Non-immediate extraction/insertion can be handled as a sequence of
// aliased loads+stores via the stack.
@@ -3589,6 +3688,12 @@ InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
if (Index != -1U && (Opcode == Instruction::ExtractElement ||
Opcode == Instruction::InsertElement)) {
+ // Extraction of vXi1 elements are now efficiently handled by MOVMSK.
+ if (Opcode == Instruction::ExtractElement &&
+ ScalarType->getScalarSizeInBits() == 1 &&
+ cast<FixedVectorType>(Val)->getNumElements() > 1)
+ return 1;
+
// Legalize the type.
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
@@ -3597,15 +3702,16 @@ InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
return 0;
// The type may be split. Normalize the index to the new type.
+ unsigned SizeInBits = LT.second.getSizeInBits();
unsigned NumElts = LT.second.getVectorNumElements();
unsigned SubNumElts = NumElts;
Index = Index % NumElts;
// For >128-bit vectors, we need to extract higher 128-bit subvectors.
// For inserts, we also need to insert the subvector back.
- if (LT.second.getSizeInBits() > 128) {
- assert((LT.second.getSizeInBits() % 128) == 0 && "Illegal vector");
- unsigned NumSubVecs = LT.second.getSizeInBits() / 128;
+ if (SizeInBits > 128) {
+ assert((SizeInBits % 128) == 0 && "Illegal vector");
+ unsigned NumSubVecs = SizeInBits / 128;
SubNumElts = NumElts / NumSubVecs;
if (SubNumElts <= Index) {
RegisterFileMoveCost += (Opcode == Instruction::InsertElement ? 2 : 1);
@@ -3673,20 +3779,25 @@ InstructionCost X86TTIImpl::getScalarizationOverhead(VectorType *Ty,
const APInt &DemandedElts,
bool Insert,
bool Extract) {
+ assert(DemandedElts.getBitWidth() ==
+ cast<FixedVectorType>(Ty)->getNumElements() &&
+ "Vector size mismatch");
+
+ std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+ MVT MScalarTy = LT.second.getScalarType();
+ unsigned SizeInBits = LT.second.getSizeInBits();
+
InstructionCost Cost = 0;
// For insertions, a ISD::BUILD_VECTOR style vector initialization can be much
// cheaper than an accumulation of ISD::INSERT_VECTOR_ELT.
if (Insert) {
- std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
- MVT MScalarTy = LT.second.getScalarType();
-
if ((MScalarTy == MVT::i16 && ST->hasSSE2()) ||
(MScalarTy.isInteger() && ST->hasSSE41()) ||
(MScalarTy == MVT::f32 && ST->hasSSE41())) {
// For types we can insert directly, insertion into 128-bit sub vectors is
// cheap, followed by a cheap chain of concatenations.
- if (LT.second.getSizeInBits() <= 128) {
+ if (SizeInBits <= 128) {
Cost +=
BaseT::getScalarizationOverhead(Ty, DemandedElts, Insert, false);
} else {
@@ -3704,9 +3815,9 @@ InstructionCost X86TTIImpl::getScalarizationOverhead(VectorType *Ty,
// Case#3: inserting into 4,5,6,7 index needs 4*vpinsrd + inserti128.
const int CostValue = *LT.first.getValue();
assert(CostValue >= 0 && "Negative cost!");
- unsigned Num128Lanes = LT.second.getSizeInBits() / 128 * CostValue;
+ unsigned Num128Lanes = SizeInBits / 128 * CostValue;
unsigned NumElts = LT.second.getVectorNumElements() * CostValue;
- APInt WidenedDemandedElts = DemandedElts.zextOrSelf(NumElts);
+ APInt WidenedDemandedElts = DemandedElts.zext(NumElts);
unsigned Scale = NumElts / Num128Lanes;
// We iterate each 128-lane, and check if we need a
// extracti128/inserti128 for this 128-lane.
@@ -3747,10 +3858,59 @@ InstructionCost X86TTIImpl::getScalarizationOverhead(VectorType *Ty,
}
}
- // TODO: Use default extraction for now, but we should investigate extending this
- // to handle repeated subvector extraction.
- if (Extract)
+ if (Extract) {
+ // vXi1 can be efficiently extracted with MOVMSK.
+ // TODO: AVX512 predicate mask handling.
+ // NOTE: This doesn't work well for roundtrip scalarization.
+ if (!Insert && Ty->getScalarSizeInBits() == 1 && !ST->hasAVX512()) {
+ unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
+ unsigned MaxElts = ST->hasAVX2() ? 32 : 16;
+ unsigned MOVMSKCost = (NumElts + MaxElts - 1) / MaxElts;
+ return MOVMSKCost;
+ }
+
+ if (LT.second.isVector()) {
+ int CostValue = *LT.first.getValue();
+ assert(CostValue >= 0 && "Negative cost!");
+
+ unsigned NumElts = LT.second.getVectorNumElements() * CostValue;
+ assert(NumElts >= DemandedElts.getBitWidth() &&
+ "Vector has been legalized to smaller element count");
+
+ // If we're extracting elements from a 128-bit subvector lane, we only need
+ // to extract each lane once, not for every element.
+ if (SizeInBits > 128) {
+ assert((SizeInBits % 128) == 0 && "Illegal vector");
+ unsigned NumLegal128Lanes = SizeInBits / 128;
+ unsigned Num128Lanes = NumLegal128Lanes * CostValue;
+ APInt WidenedDemandedElts = DemandedElts.zext(NumElts);
+ unsigned Scale = NumElts / Num128Lanes;
+
+ // Add cost for each demanded 128-bit subvector extraction.
+ // Luckily this is a lot easier than for insertion.
+ APInt DemandedUpper128Lanes =
+ APIntOps::ScaleBitMask(WidenedDemandedElts, Num128Lanes);
+ auto *Ty128 = FixedVectorType::get(Ty->getElementType(), Scale);
+ for (unsigned I = 0; I != Num128Lanes; ++I)
+ if (DemandedUpper128Lanes[I])
+ Cost += getShuffleCost(TTI::SK_ExtractSubvector, Ty, None,
+ I * Scale, Ty128);
+
+ // Add all the demanded element extractions together, but adjust the
+ // index to use the equivalent of the bottom 128 bit lane.
+ for (unsigned I = 0; I != NumElts; ++I)
+ if (WidenedDemandedElts[I]) {
+ unsigned Idx = I % Scale;
+ Cost += getVectorInstrCost(Instruction::ExtractElement, Ty, Idx);
+ }
+
+ return Cost;
+ }
+ }
+
+ // Fallback to default extraction.
Cost += BaseT::getScalarizationOverhead(Ty, DemandedElts, false, Extract);
+ }
return Cost;
}
@@ -3855,8 +4015,7 @@ X86TTIImpl::getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
// if all elements that will form a single Dst vector aren't demanded,
// then we won't need to do that shuffle, so adjust the cost accordingly.
APInt DemandedDstVectors = APIntOps::ScaleBitMask(
- DemandedDstElts.zextOrSelf(NumDstVectors * NumEltsPerDstVec),
- NumDstVectors);
+ DemandedDstElts.zext(NumDstVectors * NumEltsPerDstVec), NumDstVectors);
unsigned NumDstVectorsDemanded = DemandedDstVectors.countPopulation();
InstructionCost SingleShuffleCost =
@@ -5029,8 +5188,8 @@ InstructionCost X86TTIImpl::getGatherScatterOpCost(
return getGSVectorCost(Opcode, SrcVTy, Ptr, Alignment, AddressSpace);
}
-bool X86TTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1,
- TargetTransformInfo::LSRCost &C2) {
+bool X86TTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
+ const TargetTransformInfo::LSRCost &C2) {
// X86 specific here are "instruction number 1st priority".
return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost,
C1.NumIVMuls, C1.NumBaseAdds,
@@ -5110,6 +5269,14 @@ bool X86TTIImpl::isLegalNTStore(Type *DataType, Align Alignment) {
return true;
}
+bool X86TTIImpl::isLegalBroadcastLoad(Type *ElementTy,
+ ElementCount NumElements) const {
+ // movddup
+ return ST->hasSSE3() && !NumElements.isScalable() &&
+ NumElements.getFixedValue() == 2 &&
+ ElementTy == Type::getDoubleTy(ElementTy->getContext());
+}
+
bool X86TTIImpl::isLegalMaskedExpandLoad(Type *DataTy) {
if (!isa<VectorType>(DataTy))
return false;
@@ -5174,6 +5341,39 @@ bool X86TTIImpl::isLegalMaskedGather(Type *DataTy, Align Alignment) {
return IntWidth == 32 || IntWidth == 64;
}
+bool X86TTIImpl::isLegalAltInstr(VectorType *VecTy, unsigned Opcode0,
+ unsigned Opcode1,
+ const SmallBitVector &OpcodeMask) const {
+ // ADDSUBPS 4xf32 SSE3
+ // VADDSUBPS 4xf32 AVX
+ // VADDSUBPS 8xf32 AVX2
+ // ADDSUBPD 2xf64 SSE3
+ // VADDSUBPD 2xf64 AVX
+ // VADDSUBPD 4xf64 AVX2
+
+ unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
+ assert(OpcodeMask.size() == NumElements && "Mask and VecTy are incompatible");
+ if (!isPowerOf2_32(NumElements))
+ return false;
+ // Check the opcode pattern. We apply the mask on the opcode arguments and
+ // then check if it is what we expect.
+ for (int Lane : seq<int>(0, NumElements)) {
+ unsigned Opc = OpcodeMask.test(Lane) ? Opcode1 : Opcode0;
+ // We expect FSub for even lanes and FAdd for odd lanes.
+ if (Lane % 2 == 0 && Opc != Instruction::FSub)
+ return false;
+ if (Lane % 2 == 1 && Opc != Instruction::FAdd)
+ return false;
+ }
+ // Now check that the pattern is supported by the target ISA.
+ Type *ElemTy = cast<VectorType>(VecTy)->getElementType();
+ if (ElemTy->isFloatTy())
+ return ST->hasSSE3() && NumElements % 4 == 0;
+ if (ElemTy->isDoubleTy())
+ return ST->hasSSE3() && NumElements % 2 == 0;
+ return false;
+}
+
bool X86TTIImpl::isLegalMaskedScatter(Type *DataType, Align Alignment) {
// AVX2 doesn't support scatter
if (!ST->hasAVX512())
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
index 69715072426f..bd3c3fb1bb2f 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -38,12 +38,12 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
const FeatureBitset InlineFeatureIgnoreList = {
// This indicates the CPU is 64 bit capable not that we are in 64-bit
// mode.
- X86::Feature64Bit,
+ X86::FeatureX86_64,
// These features don't have any intrinsics or ABI effect.
X86::FeatureNOPL,
- X86::FeatureCMPXCHG16B,
- X86::FeatureLAHFSAHF,
+ X86::FeatureCX16,
+ X86::FeatureLAHFSAHF64,
// Some older targets can be setup to fold unaligned loads.
X86::FeatureSSEUnalignedMem,
@@ -68,6 +68,11 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
X86::TuningMacroFusion,
X86::TuningPadShortFunctions,
X86::TuningPOPCNTFalseDeps,
+ X86::TuningMULCFalseDeps,
+ X86::TuningPERMFalseDeps,
+ X86::TuningRANGEFalseDeps,
+ X86::TuningGETMANTFalseDeps,
+ X86::TuningMULLQFalseDeps,
X86::TuningSlow3OpsLEA,
X86::TuningSlowDivide32,
X86::TuningSlowDivide64,
@@ -131,7 +136,8 @@ public:
const Instruction *CxtI = nullptr);
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
ArrayRef<int> Mask, int Index,
- VectorType *SubTp);
+ VectorType *SubTp,
+ ArrayRef<const Value *> Args = None);
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
TTI::CastContextHint CCH,
TTI::TargetCostKind CostKind,
@@ -219,13 +225,14 @@ public:
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind);
- bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
- TargetTransformInfo::LSRCost &C2);
+ bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
+ const TargetTransformInfo::LSRCost &C2);
bool canMacroFuseCmp();
bool isLegalMaskedLoad(Type *DataType, Align Alignment);
bool isLegalMaskedStore(Type *DataType, Align Alignment);
bool isLegalNTLoad(Type *DataType, Align Alignment);
bool isLegalNTStore(Type *DataType, Align Alignment);
+ bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const;
bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment);
bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment) {
return forceScalarizeMaskedGather(VTy, Alignment);
@@ -234,6 +241,8 @@ public:
bool isLegalMaskedScatter(Type *DataType, Align Alignment);
bool isLegalMaskedExpandLoad(Type *DataType);
bool isLegalMaskedCompressStore(Type *DataType);
+ bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
+ const SmallBitVector &OpcodeMask) const;
bool hasDivRemOp(Type *DataType, bool IsSigned);
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty);
bool areInlineCompatible(const Function *Caller,
diff --git a/llvm/lib/Target/X86/X86TileConfig.cpp b/llvm/lib/Target/X86/X86TileConfig.cpp
index 8114a0b2d423..5cada924e006 100644
--- a/llvm/lib/Target/X86/X86TileConfig.cpp
+++ b/llvm/lib/Target/X86/X86TileConfig.cpp
@@ -36,7 +36,7 @@
using namespace llvm;
-#define DEBUG_TYPE "tile-config"
+#define DEBUG_TYPE "tileconfig"
namespace {
@@ -70,11 +70,11 @@ struct X86TileConfig : public MachineFunctionPass {
char X86TileConfig::ID = 0;
-INITIALIZE_PASS_BEGIN(X86TileConfig, "tileconfig", "Tile Register Configure",
+INITIALIZE_PASS_BEGIN(X86TileConfig, DEBUG_TYPE, "Tile Register Configure",
false, false)
INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
-INITIALIZE_PASS_END(X86TileConfig, "tileconfig", "Tile Register Configure",
- false, false)
+INITIALIZE_PASS_END(X86TileConfig, DEBUG_TYPE, "Tile Register Configure", false,
+ false)
bool X86TileConfig::runOnMachineFunction(MachineFunction &MF) {
const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
@@ -90,7 +90,7 @@ bool X86TileConfig::runOnMachineFunction(MachineFunction &MF) {
int SS = INT_MAX;
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : MBB) {
- if (MI.getOpcode() == X86::LDTILECFG) {
+ if (MI.getOpcode() == X86::PLDTILECFGV) {
SS = MI.getOperand(0).getIndex();
break;
}
@@ -98,6 +98,9 @@ bool X86TileConfig::runOnMachineFunction(MachineFunction &MF) {
if (SS != INT_MAX)
break;
}
+ // Didn't find PLDTILECFGV, just return false;
+ if (SS == INT_MAX)
+ return false;
// Try to find a point to insert MIs for constant shapes.
// Here we are leveraging the palette id inserted in PreRA pass.
@@ -120,6 +123,8 @@ bool X86TileConfig::runOnMachineFunction(MachineFunction &MF) {
continue;
if (MRI.getRegClass(VirtReg)->getID() != X86::TILERegClassID)
continue;
+ if (VRM.getPhys(VirtReg) == VirtRegMap::NO_PHYS_REG)
+ continue;
unsigned Index = VRM.getPhys(VirtReg) - X86::TMM0;
if (!Phys2Virt[Index])
Phys2Virt[Index] = VirtReg;
diff --git a/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
index f6b97e9e84b3..57801752f170 100644
--- a/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
+++ b/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
@@ -15,8 +15,8 @@
#include "XCore.h"
#include "XCoreRegisterInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDecoderOps.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
-#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/TargetRegistry.h"
@@ -66,140 +66,116 @@ static bool readInstruction32(ArrayRef<uint8_t> Bytes, uint64_t Address,
return true;
}
-static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) {
- const XCoreDisassembler *Dis = static_cast<const XCoreDisassembler*>(D);
- const MCRegisterInfo *RegInfo = Dis->getContext().getRegisterInfo();
+static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo) {
+ const MCRegisterInfo *RegInfo = D->getContext().getRegisterInfo();
return *(RegInfo->getRegClass(RC).begin() + RegNo);
}
-static DecodeStatus DecodeGRRegsRegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeGRRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeRRegsRegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeRRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeBitpOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
static DecodeStatus DecodeNegImmOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+ uint64_t Address,
+ const MCDisassembler *Decoder);
-static DecodeStatus Decode2RInstruction(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus Decode2RInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus Decode2RImmInstruction(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus Decode2RImmInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeR2RInstruction(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeR2RInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus Decode2RSrcDstInstruction(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus Decode2RSrcDstInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeRUSInstruction(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeRUSInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeRUSBitpInstruction(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeRUSBitpInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeRUSSrcDstBitpInstruction(MCInst &Inst,
- unsigned Insn,
- uint64_t Address,
- const void *Decoder);
+static DecodeStatus
+DecodeRUSSrcDstBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeL2RInstruction(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeL2RInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeLR2RInstruction(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeLR2RInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus Decode3RInstruction(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus Decode3RInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus Decode3RImmInstruction(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus Decode3RImmInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus Decode2RUSInstruction(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus Decode2RUSInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus Decode2RUSBitpInstruction(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus Decode2RUSBitpInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeL3RInstruction(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeL3RInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeL3RSrcDstInstruction(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeL3RSrcDstInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeL2RUSInstruction(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeL2RUSInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeL2RUSBitpInstruction(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeL2RUSBitpInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeL6RInstruction(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeL6RInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeL5RInstruction(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeL5RInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeL4RSrcDstInstruction(MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeL4RSrcDstInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address,
- const void *Decoder);
+ const MCDisassembler *Decoder);
-static DecodeStatus DecodeL4RSrcDstSrcDstInstruction(MCInst &Inst,
- unsigned Insn,
- uint64_t Address,
- const void *Decoder);
+static DecodeStatus
+DecodeL4RSrcDstSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const MCDisassembler *Decoder);
#include "XCoreGenDisassemblerTables.inc"
-static DecodeStatus DecodeGRRegsRegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeGRRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder)
-{
+ const MCDisassembler *Decoder) {
if (RegNo > 11)
return MCDisassembler::Fail;
unsigned Reg = getReg(Decoder, XCore::GRRegsRegClassID, RegNo);
@@ -207,11 +183,9 @@ static DecodeStatus DecodeGRRegsRegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeRRegsRegisterClass(MCInst &Inst,
- unsigned RegNo,
+static DecodeStatus DecodeRRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- const void *Decoder)
-{
+ const MCDisassembler *Decoder) {
if (RegNo > 15)
return MCDisassembler::Fail;
unsigned Reg = getReg(Decoder, XCore::RRegsRegClassID, RegNo);
@@ -220,7 +194,8 @@ static DecodeStatus DecodeRRegsRegisterClass(MCInst &Inst,
}
static DecodeStatus DecodeBitpOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
if (Val > 11)
return MCDisassembler::Fail;
static const unsigned Values[] = {
@@ -231,7 +206,8 @@ static DecodeStatus DecodeBitpOperand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeNegImmOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
Inst.addOperand(MCOperand::createImm(-(int64_t)Val));
return MCDisassembler::Success;
}
@@ -270,9 +246,9 @@ Decode3OpInstruction(unsigned Insn, unsigned &Op1, unsigned &Op2,
return MCDisassembler::Success;
}
-static DecodeStatus
-Decode2OpInstructionFail(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+static DecodeStatus Decode2OpInstructionFail(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
// Try and decode as a 3R instruction.
unsigned Opcode = fieldFromInstruction(Insn, 11, 5);
switch (Opcode) {
@@ -340,9 +316,9 @@ Decode2OpInstructionFail(MCInst &Inst, unsigned Insn, uint64_t Address,
return MCDisassembler::Fail;
}
-static DecodeStatus
-Decode2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+static DecodeStatus Decode2RInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned Op1, Op2;
DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2);
if (S != MCDisassembler::Success)
@@ -353,9 +329,9 @@ Decode2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
return S;
}
-static DecodeStatus
-Decode2RImmInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+static DecodeStatus Decode2RImmInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned Op1, Op2;
DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2);
if (S != MCDisassembler::Success)
@@ -366,9 +342,9 @@ Decode2RImmInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
return S;
}
-static DecodeStatus
-DecodeR2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeR2RInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned Op1, Op2;
DecodeStatus S = Decode2OpInstruction(Insn, Op2, Op1);
if (S != MCDisassembler::Success)
@@ -379,9 +355,9 @@ DecodeR2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
return S;
}
-static DecodeStatus
-Decode2RSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+static DecodeStatus Decode2RSrcDstInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned Op1, Op2;
DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2);
if (S != MCDisassembler::Success)
@@ -393,9 +369,9 @@ Decode2RSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
return S;
}
-static DecodeStatus
-DecodeRUSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeRUSInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned Op1, Op2;
DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2);
if (S != MCDisassembler::Success)
@@ -406,9 +382,9 @@ DecodeRUSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
return S;
}
-static DecodeStatus
-DecodeRUSBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeRUSBitpInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned Op1, Op2;
DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2);
if (S != MCDisassembler::Success)
@@ -421,7 +397,7 @@ DecodeRUSBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
static DecodeStatus
DecodeRUSSrcDstBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
unsigned Op1, Op2;
DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2);
if (S != MCDisassembler::Success)
@@ -433,9 +409,9 @@ DecodeRUSSrcDstBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
return S;
}
-static DecodeStatus
-DecodeL2OpInstructionFail(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeL2OpInstructionFail(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
// Try and decode as a L3R / L2RUS instruction.
unsigned Opcode = fieldFromInstruction(Insn, 16, 4) |
fieldFromInstruction(Insn, 27, 5) << 4;
@@ -504,9 +480,9 @@ DecodeL2OpInstructionFail(MCInst &Inst, unsigned Insn, uint64_t Address,
return MCDisassembler::Fail;
}
-static DecodeStatus
-DecodeL2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeL2RInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned Op1, Op2;
DecodeStatus S = Decode2OpInstruction(fieldFromInstruction(Insn, 0, 16),
Op1, Op2);
@@ -518,9 +494,9 @@ DecodeL2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
return S;
}
-static DecodeStatus
-DecodeLR2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeLR2RInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned Op1, Op2;
DecodeStatus S = Decode2OpInstruction(fieldFromInstruction(Insn, 0, 16),
Op1, Op2);
@@ -532,9 +508,9 @@ DecodeLR2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
return S;
}
-static DecodeStatus
-Decode3RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+static DecodeStatus Decode3RInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned Op1, Op2, Op3;
DecodeStatus S = Decode3OpInstruction(Insn, Op1, Op2, Op3);
if (S == MCDisassembler::Success) {
@@ -545,9 +521,9 @@ Decode3RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
return S;
}
-static DecodeStatus
-Decode3RImmInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+static DecodeStatus Decode3RImmInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned Op1, Op2, Op3;
DecodeStatus S = Decode3OpInstruction(Insn, Op1, Op2, Op3);
if (S == MCDisassembler::Success) {
@@ -558,9 +534,9 @@ Decode3RImmInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
return S;
}
-static DecodeStatus
-Decode2RUSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+static DecodeStatus Decode2RUSInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned Op1, Op2, Op3;
DecodeStatus S = Decode3OpInstruction(Insn, Op1, Op2, Op3);
if (S == MCDisassembler::Success) {
@@ -571,9 +547,9 @@ Decode2RUSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
return S;
}
-static DecodeStatus
-Decode2RUSBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+static DecodeStatus Decode2RUSBitpInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned Op1, Op2, Op3;
DecodeStatus S = Decode3OpInstruction(Insn, Op1, Op2, Op3);
if (S == MCDisassembler::Success) {
@@ -584,9 +560,9 @@ Decode2RUSBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
return S;
}
-static DecodeStatus
-DecodeL3RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeL3RInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned Op1, Op2, Op3;
DecodeStatus S =
Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3);
@@ -598,9 +574,9 @@ DecodeL3RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
return S;
}
-static DecodeStatus
-DecodeL3RSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeL3RSrcDstInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned Op1, Op2, Op3;
DecodeStatus S =
Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3);
@@ -613,9 +589,9 @@ DecodeL3RSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
return S;
}
-static DecodeStatus
-DecodeL2RUSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeL2RUSInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned Op1, Op2, Op3;
DecodeStatus S =
Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3);
@@ -627,9 +603,9 @@ DecodeL2RUSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
return S;
}
-static DecodeStatus
-DecodeL2RUSBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeL2RUSBitpInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned Op1, Op2, Op3;
DecodeStatus S =
Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3);
@@ -641,9 +617,9 @@ DecodeL2RUSBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
return S;
}
-static DecodeStatus
-DecodeL6RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeL6RInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned Op1, Op2, Op3, Op4, Op5, Op6;
DecodeStatus S =
Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3);
@@ -661,9 +637,9 @@ DecodeL6RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
return S;
}
-static DecodeStatus
-DecodeL5RInstructionFail(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeL5RInstructionFail(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
// Try and decode as a L6R instruction.
Inst.clear();
unsigned Opcode = fieldFromInstruction(Insn, 27, 5);
@@ -675,9 +651,9 @@ DecodeL5RInstructionFail(MCInst &Inst, unsigned Insn, uint64_t Address,
return MCDisassembler::Fail;
}
-static DecodeStatus
-DecodeL5RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeL5RInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned Op1, Op2, Op3, Op4, Op5;
DecodeStatus S =
Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3);
@@ -695,9 +671,9 @@ DecodeL5RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
return S;
}
-static DecodeStatus
-DecodeL4RSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeL4RSrcDstInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
unsigned Op1, Op2, Op3;
unsigned Op4 = fieldFromInstruction(Insn, 16, 4);
DecodeStatus S =
@@ -716,7 +692,7 @@ DecodeL4RSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
static DecodeStatus
DecodeL4RSrcDstSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder) {
+ const MCDisassembler *Decoder) {
unsigned Op1, Op2, Op3;
unsigned Op4 = fieldFromInstruction(Insn, 16, 4);
DecodeStatus S =
diff --git a/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.h b/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.h
index 0ea47106434c..a8801fc2c5bc 100644
--- a/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.h
+++ b/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.h
@@ -15,10 +15,10 @@
#ifndef LLVM_LIB_TARGET_XCORE_MCTARGETDESC_XCOREINSTPRINTER_H
#define LLVM_LIB_TARGET_XCORE_MCTARGETDESC_XCOREINSTPRINTER_H
-#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCInstPrinter.h"
namespace llvm {
+class StringRef;
class XCoreInstPrinter : public MCInstPrinter {
public:
@@ -39,7 +39,6 @@ private:
void printInlineJT(const MCInst *MI, int opNum, raw_ostream &O);
void printInlineJT32(const MCInst *MI, int opNum, raw_ostream &O);
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printMemOperand(const MCInst *MI, int opNum, raw_ostream &O);
};
} // end namespace llvm
diff --git a/llvm/lib/Target/XCore/XCore.h b/llvm/lib/Target/XCore/XCore.h
index d31c34910ef6..6118775d16fe 100644
--- a/llvm/lib/Target/XCore/XCore.h
+++ b/llvm/lib/Target/XCore/XCore.h
@@ -15,6 +15,7 @@
#define LLVM_LIB_TARGET_XCORE_XCORE_H
#include "MCTargetDesc/XCoreMCTargetDesc.h"
+#include "llvm/PassRegistry.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
diff --git a/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp b/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
index 38b613700674..8fea61d125d2 100644
--- a/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -110,7 +110,7 @@ void XCoreAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
return;
const DataLayout &DL = getDataLayout();
- OutStreamer->SwitchSection(getObjFileLowering().SectionForGlobal(GV, TM));
+ OutStreamer->switchSection(getObjFileLowering().SectionForGlobal(GV, TM));
MCSymbol *GVSym = getSymbol(GV);
const Constant *C = GV->getInitializer();
diff --git a/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/llvm/lib/Target/XCore/XCoreISelLowering.cpp
index 7c86262269fc..70a1901bb04f 100644
--- a/llvm/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/llvm/lib/Target/XCore/XCoreISelLowering.cpp
@@ -167,10 +167,8 @@ XCoreTargetLowering::XCoreTargetLowering(const TargetMachine &TM,
= MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 2;
// We have target-specific dag combine patterns for the following nodes:
- setTargetDAGCombine(ISD::STORE);
- setTargetDAGCombine(ISD::ADD);
- setTargetDAGCombine(ISD::INTRINSIC_VOID);
- setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
+ setTargetDAGCombine(
+ {ISD::STORE, ISD::ADD, ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN});
setMinFunctionAlignment(Align(2));
setPrefFunctionAlignment(Align(4));
@@ -442,7 +440,7 @@ SDValue XCoreTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
}
}
- if (LD->getAlignment() == 2) {
+ if (LD->getAlign() == Align(2)) {
SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, DL, MVT::i32, Chain, BasePtr,
LD->getPointerInfo(), MVT::i16, Align(2),
LD->getMemOperand()->getFlags());
@@ -497,7 +495,7 @@ SDValue XCoreTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SDValue Value = ST->getValue();
SDLoc dl(Op);
- if (ST->getAlignment() == 2) {
+ if (ST->getAlign() == Align(2)) {
SDValue Low = Value;
SDValue High = DAG.getNode(ISD::SRL, dl, MVT::i32, Value,
DAG.getConstant(16, dl, MVT::i32));
@@ -941,25 +939,25 @@ LowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const {
N->getSuccessOrdering() == AtomicOrdering::Monotonic) &&
"setInsertFencesForAtomic(true) expects unordered / monotonic");
if (N->getMemoryVT() == MVT::i32) {
- if (N->getAlignment() < 4)
+ if (N->getAlign() < Align(4))
report_fatal_error("atomic load must be aligned");
return DAG.getLoad(getPointerTy(DAG.getDataLayout()), SDLoc(Op),
N->getChain(), N->getBasePtr(), N->getPointerInfo(),
- N->getAlignment(), N->getMemOperand()->getFlags(),
+ N->getAlign(), N->getMemOperand()->getFlags(),
N->getAAInfo(), N->getRanges());
}
if (N->getMemoryVT() == MVT::i16) {
- if (N->getAlignment() < 2)
+ if (N->getAlign() < Align(2))
report_fatal_error("atomic load must be aligned");
return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), MVT::i32, N->getChain(),
N->getBasePtr(), N->getPointerInfo(), MVT::i16,
- N->getAlignment(), N->getMemOperand()->getFlags(),
+ N->getAlign(), N->getMemOperand()->getFlags(),
N->getAAInfo());
}
if (N->getMemoryVT() == MVT::i8)
return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), MVT::i32, N->getChain(),
N->getBasePtr(), N->getPointerInfo(), MVT::i8,
- N->getAlignment(), N->getMemOperand()->getFlags(),
+ N->getAlign(), N->getMemOperand()->getFlags(),
N->getAAInfo());
return SDValue();
}
@@ -972,24 +970,24 @@ LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const {
N->getSuccessOrdering() == AtomicOrdering::Monotonic) &&
"setInsertFencesForAtomic(true) expects unordered / monotonic");
if (N->getMemoryVT() == MVT::i32) {
- if (N->getAlignment() < 4)
+ if (N->getAlign() < Align(4))
report_fatal_error("atomic store must be aligned");
return DAG.getStore(N->getChain(), SDLoc(Op), N->getVal(), N->getBasePtr(),
- N->getPointerInfo(), N->getAlignment(),
+ N->getPointerInfo(), N->getAlign(),
N->getMemOperand()->getFlags(), N->getAAInfo());
}
if (N->getMemoryVT() == MVT::i16) {
- if (N->getAlignment() < 2)
+ if (N->getAlign() < Align(2))
report_fatal_error("atomic store must be aligned");
return DAG.getTruncStore(N->getChain(), SDLoc(Op), N->getVal(),
N->getBasePtr(), N->getPointerInfo(), MVT::i16,
- N->getAlignment(), N->getMemOperand()->getFlags(),
+ N->getAlign(), N->getMemOperand()->getFlags(),
N->getAAInfo());
}
if (N->getMemoryVT() == MVT::i8)
return DAG.getTruncStore(N->getChain(), SDLoc(Op), N->getVal(),
N->getBasePtr(), N->getPointerInfo(), MVT::i8,
- N->getAlignment(), N->getMemOperand()->getFlags(),
+ N->getAlign(), N->getMemOperand()->getFlags(),
N->getAAInfo());
return SDValue();
}
@@ -1791,17 +1789,17 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
unsigned StoreBits = ST->getMemoryVT().getStoreSizeInBits();
assert((StoreBits % 8) == 0 &&
"Store size in bits must be a multiple of 8");
- unsigned Alignment = ST->getAlignment();
+ Align Alignment = ST->getAlign();
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(ST->getValue())) {
if (LD->hasNUsesOfValue(1, 0) && ST->getMemoryVT() == LD->getMemoryVT() &&
- LD->getAlignment() == Alignment &&
+ LD->getAlign() == Alignment &&
!LD->isVolatile() && !LD->isIndexed() &&
Chain.reachesChainWithoutSideEffects(SDValue(LD, 1))) {
bool isTail = isInTailCallPosition(DAG, ST, Chain);
return DAG.getMemmove(Chain, dl, ST->getBasePtr(), LD->getBasePtr(),
DAG.getConstant(StoreBits / 8, dl, MVT::i32),
- Align(Alignment), false, isTail,
+ Alignment, false, isTail,
ST->getPointerInfo(), LD->getPointerInfo());
}
}
diff --git a/llvm/lib/Target/XCore/XCoreInstrInfo.td b/llvm/lib/Target/XCore/XCoreInstrInfo.td
index aa3739d0335e..23f80b126404 100644
--- a/llvm/lib/Target/XCore/XCoreInstrInfo.td
+++ b/llvm/lib/Target/XCore/XCoreInstrInfo.td
@@ -363,7 +363,7 @@ let usesCustomInserter = 1 in {
(select GRRegs:$cond, GRRegs:$T, GRRegs:$F))]>;
}
-let hasSideEffects = 1 in
+let hasSideEffects = 1, isMeta = 1 in
def Int_MemBarrier : PseudoInstXCore<(outs), (ins), "#MEMBARRIER",
[(XCoreMemBarrier)]>;
diff --git a/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.cpp b/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.cpp
index ec44d2899dd5..f039f4f67955 100644
--- a/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.cpp
@@ -15,6 +15,13 @@ using namespace llvm;
void XCoreFunctionInfo::anchor() { }
+MachineFunctionInfo *XCoreFunctionInfo::clone(
+ BumpPtrAllocator &Allocator, MachineFunction &DestMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
+ const {
+ return DestMF.cloneInfo<XCoreFunctionInfo>(*this);
+}
+
bool XCoreFunctionInfo::isLargeFrame(const MachineFunction &MF) const {
if (CachedEStackSize == -1) {
CachedEStackSize = MF.getFrameInfo().estimateStackSize(MF);
diff --git a/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h b/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h
index aebe11b15b54..6cdb1239750a 100644
--- a/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h
+++ b/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h
@@ -45,6 +45,11 @@ public:
explicit XCoreFunctionInfo(MachineFunction &MF) {}
+ MachineFunctionInfo *
+ clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
+ const override;
+
~XCoreFunctionInfo() override = default;
void setVarArgsFrameIndex(int off) { VarArgsFrameIndex = off; }
diff --git a/llvm/lib/Target/XCore/XCoreTargetMachine.cpp b/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
index 2e49627a19bf..3c27fcd9ba53 100644
--- a/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -26,7 +26,7 @@
using namespace llvm;
static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
- return RM.getValueOr(Reloc::Static);
+ return RM.value_or(Reloc::Static);
}
static CodeModel::Model
@@ -108,6 +108,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeXCoreTarget() {
}
TargetTransformInfo
-XCoreTargetMachine::getTargetTransformInfo(const Function &F) {
+XCoreTargetMachine::getTargetTransformInfo(const Function &F) const {
return TargetTransformInfo(XCoreTTIImpl(this, F));
}
diff --git a/llvm/lib/Target/XCore/XCoreTargetMachine.h b/llvm/lib/Target/XCore/XCoreTargetMachine.h
index 9c3bdcf78f9c..a4754fd77e65 100644
--- a/llvm/lib/Target/XCore/XCoreTargetMachine.h
+++ b/llvm/lib/Target/XCore/XCoreTargetMachine.h
@@ -15,13 +15,13 @@
#include "XCoreSubtarget.h"
#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Target/TargetMachine.h"
#include <memory>
namespace llvm {
+class StringRef;
class XCoreTargetMachine : public LLVMTargetMachine {
std::unique_ptr<TargetLoweringObjectFile> TLOF;
@@ -42,7 +42,7 @@ public:
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
- TargetTransformInfo getTargetTransformInfo(const Function &F) override;
+ TargetTransformInfo getTargetTransformInfo(const Function &F) const override;
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
diff --git a/llvm/lib/Testing/Support/Annotations.cpp b/llvm/lib/Testing/Support/Annotations.cpp
index 44d3acccfdb2..557b6cdf98ce 100644
--- a/llvm/lib/Testing/Support/Annotations.cpp
+++ b/llvm/lib/Testing/Support/Annotations.cpp
@@ -33,12 +33,12 @@ Annotations::Annotations(llvm::StringRef Text) {
Code.reserve(Text.size());
while (!Text.empty()) {
if (Text.consume_front("^")) {
- Points[Name.getValueOr("")].push_back(Code.size());
+ Points[Name.value_or("")].push_back(Code.size());
Name = llvm::None;
continue;
}
if (Text.consume_front("[[")) {
- OpenRanges.emplace_back(Name.getValueOr(""), Code.size());
+ OpenRanges.emplace_back(Name.value_or(""), Code.size());
Name = llvm::None;
continue;
}
diff --git a/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp b/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp
index 8f69282d3443..5f4d0cdf2b57 100644
--- a/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp
+++ b/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp
@@ -77,7 +77,7 @@ static std::vector<StringRef> getSearchPaths(opt::InputArgList *Args,
// Add $LIB.
Optional<std::string> EnvOpt = sys::Process::GetEnv("LIB");
- if (!EnvOpt.hasValue())
+ if (!EnvOpt)
return Ret;
StringRef Env = Saver.save(*EnvOpt);
while (!Env.empty()) {
@@ -229,10 +229,11 @@ static void appendFile(std::vector<NewArchiveMember> &Members,
(Magic == file_magic::coff_object) ? getCOFFFileMachine(MB)
: getBitcodeFileMachine(MB);
if (!MaybeFileMachine) {
- handleAllErrors(MaybeFileMachine.takeError(), [&](const ErrorInfoBase &EIB) {
- llvm::errs() << MB.getBufferIdentifier() << ": " << EIB.message()
- << "\n";
- });
+ handleAllErrors(MaybeFileMachine.takeError(),
+ [&](const ErrorInfoBase &EIB) {
+ llvm::errs() << MB.getBufferIdentifier() << ": "
+ << EIB.message() << "\n";
+ });
exit(1);
}
COFF::MachineTypes FileMachine = *MaybeFileMachine;
@@ -291,10 +292,25 @@ int llvm::libDriverMain(ArrayRef<const char *> ArgsArr) {
return 0;
}
+ // Parse /ignore:
+ llvm::StringSet<> IgnoredWarnings;
+ for (auto *Arg : Args.filtered(OPT_ignore))
+ IgnoredWarnings.insert(Arg->getValue());
+
// If no input files and not told otherwise, silently do nothing to match
// lib.exe
- if (!Args.hasArgNoClaim(OPT_INPUT) && !Args.hasArg(OPT_llvmlibempty))
+ if (!Args.hasArgNoClaim(OPT_INPUT) && !Args.hasArg(OPT_llvmlibempty)) {
+ if (!IgnoredWarnings.contains("emptyoutput")) {
+ llvm::errs() << "warning: no input files, not writing output file\n";
+ llvm::errs() << " pass /llvmlibempty to write empty .lib file,\n";
+ llvm::errs() << " pass /ignore:emptyoutput to suppress warning\n";
+ if (Args.hasFlag(OPT_WX, OPT_WX_no, false)) {
+ llvm::errs() << "treating warning as error due to /WX\n";
+ return 1;
+ }
+ }
return 0;
+ }
if (Args.hasArg(OPT_lst)) {
doList(Args);
diff --git a/llvm/lib/ToolDrivers/llvm-lib/Options.td b/llvm/lib/ToolDrivers/llvm-lib/Options.td
index 5891e238a328..0d97f77e525f 100644
--- a/llvm/lib/ToolDrivers/llvm-lib/Options.td
+++ b/llvm/lib/ToolDrivers/llvm-lib/Options.td
@@ -9,6 +9,14 @@ class F<string name> : Flag<["/", "-", "/?", "-?"], name>;
class P<string name, string help> :
Joined<["/", "-", "/?", "-?"], name#":">, HelpText<help>;
+// Boolean flag which can be suffixed by ":no". Using it unsuffixed turns the
+// flag on and using it suffixed by ":no" turns it off.
+multiclass B<string name, string help_on, string help_off> {
+ def "" : F<name>, HelpText<help_on>;
+ def _no : F<name#":no">, HelpText<help_off>;
+}
+
+def ignore : P<"ignore", "Specify warning codes to ignore">;
def libpath: P<"libpath", "Object file search path">;
// Can't be called "list" since that's a keyword.
@@ -23,6 +31,9 @@ def llvmlibempty : F<"llvmlibempty">,
def machine: P<"machine", "Specify target platform">;
+defm WX : B<"WX", "Treat warnings as errors",
+ "Don't treat warnings as errors (default)">;
+
def help : F<"help">;
// /?? and -?? must be before /? and -? to not confuse lib/Options.
@@ -32,7 +43,6 @@ def help_q : Flag<["/??", "-??", "/?", "-?"], "">, Alias<help>;
// The flags below do nothing. They are defined only for lib.exe compatibility.
//==============================================================================
-class QF<string name> : Joined<["/", "-", "/?", "-?"], name#":">;
-
-def ignore : QF<"ignore">;
+def ltcg : F<"ltcg">;
def nologo : F<"nologo">;
+def subsystem : P<"subsystem", "">;
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 7243e39c9029..1fd8b88dd776 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -22,8 +22,8 @@
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -36,6 +36,10 @@
using namespace llvm;
using namespace PatternMatch;
+namespace llvm {
+class DataLayout;
+}
+
#define DEBUG_TYPE "aggressive-instcombine"
STATISTIC(NumAnyOrAllBitsSet, "Number of any/all-bits-set patterns folded");
@@ -200,14 +204,13 @@ static bool foldGuardedFunnelShift(Instruction &I, const DominatorTree &DT) {
/// of 'and' ops, then we also need to capture the fact that we saw an
/// "and X, 1", so that's an extra return value for that case.
struct MaskOps {
- Value *Root;
+ Value *Root = nullptr;
APInt Mask;
bool MatchAndChain;
- bool FoundAnd1;
+ bool FoundAnd1 = false;
MaskOps(unsigned BitWidth, bool MatchAnds)
- : Root(nullptr), Mask(APInt::getZero(BitWidth)), MatchAndChain(MatchAnds),
- FoundAnd1(false) {}
+ : Mask(APInt::getZero(BitWidth)), MatchAndChain(MatchAnds) {}
};
/// This is a recursive helper for foldAnyOrAllBitsSet() that walks through a
@@ -363,10 +366,72 @@ static bool tryToRecognizePopCount(Instruction &I) {
return false;
}
+/// Fold smin(smax(fptosi(x), C1), C2) to llvm.fptosi.sat(x), providing C1 and
+/// C2 saturate the value of the fp conversion. The transform is not reversable
+/// as the fptosi.sat is more defined than the input - all values produce a
+/// valid value for the fptosi.sat, where as some produce poison for original
+/// that were out of range of the integer conversion. The reversed pattern may
+/// use fmax and fmin instead. As we cannot directly reverse the transform, and
+/// it is not always profitable, we make it conditional on the cost being
+/// reported as lower by TTI.
+static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI) {
+ // Look for min(max(fptosi, converting to fptosi_sat.
+ Value *In;
+ const APInt *MinC, *MaxC;
+ if (!match(&I, m_SMax(m_OneUse(m_SMin(m_OneUse(m_FPToSI(m_Value(In))),
+ m_APInt(MinC))),
+ m_APInt(MaxC))) &&
+ !match(&I, m_SMin(m_OneUse(m_SMax(m_OneUse(m_FPToSI(m_Value(In))),
+ m_APInt(MaxC))),
+ m_APInt(MinC))))
+ return false;
+
+ // Check that the constants clamp a saturate.
+ if (!(*MinC + 1).isPowerOf2() || -*MaxC != *MinC + 1)
+ return false;
+
+ Type *IntTy = I.getType();
+ Type *FpTy = In->getType();
+ Type *SatTy =
+ IntegerType::get(IntTy->getContext(), (*MinC + 1).exactLogBase2() + 1);
+ if (auto *VecTy = dyn_cast<VectorType>(IntTy))
+ SatTy = VectorType::get(SatTy, VecTy->getElementCount());
+
+ // Get the cost of the intrinsic, and check that against the cost of
+ // fptosi+smin+smax
+ InstructionCost SatCost = TTI.getIntrinsicInstrCost(
+ IntrinsicCostAttributes(Intrinsic::fptosi_sat, SatTy, {In}, {FpTy}),
+ TTI::TCK_RecipThroughput);
+ SatCost += TTI.getCastInstrCost(Instruction::SExt, SatTy, IntTy,
+ TTI::CastContextHint::None,
+ TTI::TCK_RecipThroughput);
+
+ InstructionCost MinMaxCost = TTI.getCastInstrCost(
+ Instruction::FPToSI, IntTy, FpTy, TTI::CastContextHint::None,
+ TTI::TCK_RecipThroughput);
+ MinMaxCost += TTI.getIntrinsicInstrCost(
+ IntrinsicCostAttributes(Intrinsic::smin, IntTy, {IntTy}),
+ TTI::TCK_RecipThroughput);
+ MinMaxCost += TTI.getIntrinsicInstrCost(
+ IntrinsicCostAttributes(Intrinsic::smax, IntTy, {IntTy}),
+ TTI::TCK_RecipThroughput);
+
+ if (SatCost >= MinMaxCost)
+ return false;
+
+ IRBuilder<> Builder(&I);
+ Function *Fn = Intrinsic::getDeclaration(I.getModule(), Intrinsic::fptosi_sat,
+ {SatTy, FpTy});
+ Value *Sat = Builder.CreateCall(Fn, In);
+ I.replaceAllUsesWith(Builder.CreateSExt(Sat, IntTy));
+ return true;
+}
+
/// This is the entry point for folds that could be implemented in regular
/// InstCombine, but they are separated because they are not expected to
/// occur frequently and/or have more than a constant-length pattern match.
-static bool foldUnusualPatterns(Function &F, DominatorTree &DT) {
+static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
+ TargetTransformInfo &TTI) {
bool MadeChange = false;
for (BasicBlock &BB : F) {
// Ignore unreachable basic blocks.
@@ -382,6 +447,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT) {
MadeChange |= foldAnyOrAllBitsSet(I);
MadeChange |= foldGuardedFunnelShift(I, DT);
MadeChange |= tryToRecognizePopCount(I);
+ MadeChange |= tryToFPToSat(I, TTI);
}
}
@@ -395,13 +461,13 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT) {
/// This is the entry point for all transforms. Pass manager differences are
/// handled in the callers of this function.
-static bool runImpl(Function &F, AssumptionCache &AC, TargetLibraryInfo &TLI,
- DominatorTree &DT) {
+static bool runImpl(Function &F, AssumptionCache &AC, TargetTransformInfo &TTI,
+ TargetLibraryInfo &TLI, DominatorTree &DT) {
bool MadeChange = false;
const DataLayout &DL = F.getParent()->getDataLayout();
TruncInstCombine TIC(AC, TLI, DL, DT);
MadeChange |= TIC.run(F);
- MadeChange |= foldUnusualPatterns(F, DT);
+ MadeChange |= foldUnusualPatterns(F, DT, TTI);
return MadeChange;
}
@@ -411,6 +477,7 @@ void AggressiveInstCombinerLegacyPass::getAnalysisUsage(
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addPreserved<AAResultsWrapperPass>();
AU.addPreserved<BasicAAWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
@@ -421,7 +488,8 @@ bool AggressiveInstCombinerLegacyPass::runOnFunction(Function &F) {
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- return runImpl(F, AC, TLI, DT);
+ auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ return runImpl(F, AC, TTI, TLI, DT);
}
PreservedAnalyses AggressiveInstCombinePass::run(Function &F,
@@ -429,7 +497,8 @@ PreservedAnalyses AggressiveInstCombinePass::run(Function &F,
auto &AC = AM.getResult<AssumptionAnalysis>(F);
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
- if (!runImpl(F, AC, TLI, DT)) {
+ auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+ if (!runImpl(F, AC, TTI, TLI, DT)) {
// No changes, all analyses are preserved.
return PreservedAnalyses::all();
}
@@ -446,6 +515,7 @@ INITIALIZE_PASS_BEGIN(AggressiveInstCombinerLegacyPass,
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(AggressiveInstCombinerLegacyPass, "aggressive-instcombine",
"Combine pattern based expressions", false, false)
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h
index 5d69e26d6ecc..9fc103d45d98 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h
@@ -23,14 +23,14 @@
using namespace llvm;
//===----------------------------------------------------------------------===//
-// TruncInstCombine - looks for expression dags dominated by trunc instructions
-// and for each eligible dag, it will create a reduced bit-width expression and
-// replace the old expression with this new one and remove the old one.
-// Eligible expression dag is such that:
+// TruncInstCombine - looks for expression graphs dominated by trunc
+// instructions and for each eligible graph, it will create a reduced bit-width
+// expression and replace the old expression with this new one and remove the
+// old one. Eligible expression graph is such that:
// 1. Contains only supported instructions.
// 2. Supported leaves: ZExtInst, SExtInst, TruncInst and Constant value.
// 3. Can be evaluated into type with reduced legal bit-width (or Trunc type).
-// 4. All instructions in the dag must not have users outside the dag.
+// 4. All instructions in the graph must not have users outside the graph.
// Only exception is for {ZExt, SExt}Inst with operand type equal to the
// new reduced type chosen in (3).
//
@@ -61,9 +61,9 @@ class TruncInstCombine {
SmallVector<TruncInst *, 4> Worklist;
/// Current processed TruncInst instruction.
- TruncInst *CurrentTruncInst;
+ TruncInst *CurrentTruncInst = nullptr;
- /// Information per each instruction in the expression dag.
+ /// Information per each instruction in the expression graph.
struct Info {
/// Number of LSBs that are needed to generate a valid expression.
unsigned ValidBitWidth = 0;
@@ -72,26 +72,26 @@ class TruncInstCombine {
/// The reduced value generated to replace the old instruction.
Value *NewValue = nullptr;
};
- /// An ordered map representing expression dag post-dominated by current
- /// processed TruncInst. It maps each instruction in the dag to its Info
+ /// An ordered map representing expression graph post-dominated by current
+ /// processed TruncInst. It maps each instruction in the graph to its Info
/// structure. The map is ordered such that each instruction appears before
- /// all other instructions in the dag that uses it.
+ /// all other instructions in the graph that uses it.
MapVector<Instruction *, Info> InstInfoMap;
public:
TruncInstCombine(AssumptionCache &AC, TargetLibraryInfo &TLI,
const DataLayout &DL, const DominatorTree &DT)
- : AC(AC), TLI(TLI), DL(DL), DT(DT), CurrentTruncInst(nullptr) {}
+ : AC(AC), TLI(TLI), DL(DL), DT(DT) {}
/// Perform TruncInst pattern optimization on given function.
bool run(Function &F);
private:
- /// Build expression dag dominated by the /p CurrentTruncInst and append it to
- /// the InstInfoMap container.
+ /// Build expression graph dominated by the /p CurrentTruncInst and append it
+ /// to the InstInfoMap container.
///
- /// \return true only if succeed to generate an eligible sub expression dag.
- bool buildTruncExpressionDag();
+ /// \return true only if succeed to generate an eligible sub expression graph.
+ bool buildTruncExpressionGraph();
/// Calculate the minimal allowed bit-width of the chain ending with the
/// currently visited truncate's operand.
@@ -100,12 +100,12 @@ private:
/// truncate's operand can be shrunk to.
unsigned getMinBitWidth();
- /// Build an expression dag dominated by the current processed TruncInst and
+ /// Build an expression graph dominated by the current processed TruncInst and
/// Check if it is eligible to be reduced to a smaller type.
///
/// \return the scalar version of the new type to be used for the reduced
- /// expression dag, or nullptr if the expression dag is not eligible
- /// to be reduced.
+ /// expression graph, or nullptr if the expression graph is not
+ /// eligible to be reduced.
Type *getBestTruncatedType();
KnownBits computeKnownBits(const Value *V) const {
@@ -128,12 +128,12 @@ private:
/// \return the new reduced value.
Value *getReducedOperand(Value *V, Type *SclTy);
- /// Create a new expression dag using the reduced /p SclTy type and replace
- /// the old expression dag with it. Also erase all instructions in the old
- /// dag, except those that are still needed outside the dag.
+ /// Create a new expression graph using the reduced /p SclTy type and replace
+ /// the old expression graph with it. Also erase all instructions in the old
+ /// graph, except those that are still needed outside the graph.
///
- /// \param SclTy scalar version of new type to reduce expression dag into.
- void ReduceExpressionDag(Type *SclTy);
+ /// \param SclTy scalar version of new type to reduce expression graph into.
+ void ReduceExpressionGraph(Type *SclTy);
};
} // end namespace llvm.
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp
index 4624b735bef8..70ea68587b8e 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp
@@ -6,14 +6,14 @@
//
//===----------------------------------------------------------------------===//
//
-// TruncInstCombine - looks for expression dags post-dominated by TruncInst and
-// for each eligible dag, it will create a reduced bit-width expression, replace
-// the old expression with this new one and remove the old expression.
-// Eligible expression dag is such that:
+// TruncInstCombine - looks for expression graphs post-dominated by TruncInst
+// and for each eligible graph, it will create a reduced bit-width expression,
+// replace the old expression with this new one and remove the old expression.
+// Eligible expression graph is such that:
// 1. Contains only supported instructions.
// 2. Supported leaves: ZExtInst, SExtInst, TruncInst and Constant value.
// 3. Can be evaluated into type with reduced legal bit-width.
-// 4. All instructions in the dag must not have users outside the dag.
+// 4. All instructions in the graph must not have users outside the graph.
// The only exception is for {ZExt, SExt}Inst with operand type equal to
// the new reduced type evaluated in (3).
//
@@ -28,7 +28,6 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
@@ -39,14 +38,13 @@ using namespace llvm;
#define DEBUG_TYPE "aggressive-instcombine"
-STATISTIC(
- NumDAGsReduced,
- "Number of truncations eliminated by reducing bit width of expression DAG");
+STATISTIC(NumExprsReduced, "Number of truncations eliminated by reducing bit "
+ "width of expression graph");
STATISTIC(NumInstrsReduced,
"Number of instructions whose bit width was reduced");
/// Given an instruction and a container, it fills all the relevant operands of
-/// that instruction, with respect to the Trunc expression dag optimizaton.
+/// that instruction, with respect to the Trunc expression graph optimizaton.
static void getRelevantOperands(Instruction *I, SmallVectorImpl<Value *> &Ops) {
unsigned Opc = I->getOpcode();
switch (Opc) {
@@ -78,15 +76,19 @@ static void getRelevantOperands(Instruction *I, SmallVectorImpl<Value *> &Ops) {
Ops.push_back(I->getOperand(1));
Ops.push_back(I->getOperand(2));
break;
+ case Instruction::PHI:
+ for (Value *V : cast<PHINode>(I)->incoming_values())
+ Ops.push_back(V);
+ break;
default:
llvm_unreachable("Unreachable!");
}
}
-bool TruncInstCombine::buildTruncExpressionDag() {
+bool TruncInstCombine::buildTruncExpressionGraph() {
SmallVector<Value *, 8> Worklist;
SmallVector<Instruction *, 8> Stack;
- // Clear old expression dag.
+ // Clear old instructions info.
InstInfoMap.clear();
Worklist.push_back(CurrentTruncInst->getOperand(0));
@@ -150,11 +152,19 @@ bool TruncInstCombine::buildTruncExpressionDag() {
append_range(Worklist, Operands);
break;
}
+ case Instruction::PHI: {
+ SmallVector<Value *, 2> Operands;
+ getRelevantOperands(I, Operands);
+ // Add only operands not in Stack to prevent cycle
+ for (auto *Op : Operands)
+ if (all_of(Stack, [Op](Value *V) { return Op != V; }))
+ Worklist.push_back(Op);
+ break;
+ }
default:
// TODO: Can handle more cases here:
// 1. shufflevector
// 2. sdiv, srem
- // 3. phi node(and loop handling)
// ...
return false;
}
@@ -254,7 +264,7 @@ unsigned TruncInstCombine::getMinBitWidth() {
}
Type *TruncInstCombine::getBestTruncatedType() {
- if (!buildTruncExpressionDag())
+ if (!buildTruncExpressionGraph())
return nullptr;
// We don't want to duplicate instructions, which isn't profitable. Thus, we
@@ -367,8 +377,10 @@ Value *TruncInstCombine::getReducedOperand(Value *V, Type *SclTy) {
return Entry.NewValue;
}
-void TruncInstCombine::ReduceExpressionDag(Type *SclTy) {
+void TruncInstCombine::ReduceExpressionGraph(Type *SclTy) {
NumInstrsReduced += InstInfoMap.size();
+ // Pairs of old and new phi-nodes
+ SmallVector<std::pair<PHINode *, PHINode *>, 2> OldNewPHINodes;
for (auto &Itr : InstInfoMap) { // Forward
Instruction *I = Itr.first;
TruncInstCombine::Info &NodeInfo = Itr.second;
@@ -451,6 +463,12 @@ void TruncInstCombine::ReduceExpressionDag(Type *SclTy) {
Res = Builder.CreateSelect(Op0, LHS, RHS);
break;
}
+ case Instruction::PHI: {
+ Res = Builder.CreatePHI(getReducedType(I, SclTy), I->getNumOperands());
+ OldNewPHINodes.push_back(
+ std::make_pair(cast<PHINode>(I), cast<PHINode>(Res)));
+ break;
+ }
default:
llvm_unreachable("Unhandled instruction");
}
@@ -460,6 +478,14 @@ void TruncInstCombine::ReduceExpressionDag(Type *SclTy) {
ResI->takeName(I);
}
+ for (auto &Node : OldNewPHINodes) {
+ PHINode *OldPN = Node.first;
+ PHINode *NewPN = Node.second;
+ for (auto Incoming : zip(OldPN->incoming_values(), OldPN->blocks()))
+ NewPN->addIncoming(getReducedOperand(std::get<0>(Incoming), SclTy),
+ std::get<1>(Incoming));
+ }
+
Value *Res = getReducedOperand(CurrentTruncInst->getOperand(0), SclTy);
Type *DstTy = CurrentTruncInst->getType();
if (Res->getType() != DstTy) {
@@ -470,17 +496,29 @@ void TruncInstCombine::ReduceExpressionDag(Type *SclTy) {
}
CurrentTruncInst->replaceAllUsesWith(Res);
- // Erase old expression dag, which was replaced by the reduced expression dag.
- // We iterate backward, which means we visit the instruction before we visit
- // any of its operands, this way, when we get to the operand, we already
- // removed the instructions (from the expression dag) that uses it.
+ // Erase old expression graph, which was replaced by the reduced expression
+ // graph.
CurrentTruncInst->eraseFromParent();
+ // First, erase old phi-nodes and its uses
+ for (auto &Node : OldNewPHINodes) {
+ PHINode *OldPN = Node.first;
+ OldPN->replaceAllUsesWith(PoisonValue::get(OldPN->getType()));
+ InstInfoMap.erase(OldPN);
+ OldPN->eraseFromParent();
+ }
+ // Now we have expression graph turned into dag.
+ // We iterate backward, which means we visit the instruction before we
+ // visit any of its operands, this way, when we get to the operand, we already
+ // removed the instructions (from the expression dag) that uses it.
for (auto &I : llvm::reverse(InstInfoMap)) {
// We still need to check that the instruction has no users before we erase
// it, because {SExt, ZExt}Inst Instruction might have other users that was
// not reduced, in such case, we need to keep that instruction.
if (I.first->use_empty())
I.first->eraseFromParent();
+ else
+ assert((isa<SExtInst>(I.first) || isa<ZExtInst>(I.first)) &&
+ "Only {SExt, ZExt}Inst might have unreduced users");
}
}
@@ -498,18 +536,18 @@ bool TruncInstCombine::run(Function &F) {
}
// Process all TruncInst in the Worklist, for each instruction:
- // 1. Check if it dominates an eligible expression dag to be reduced.
- // 2. Create a reduced expression dag and replace the old one with it.
+ // 1. Check if it dominates an eligible expression graph to be reduced.
+ // 2. Create a reduced expression graph and replace the old one with it.
while (!Worklist.empty()) {
CurrentTruncInst = Worklist.pop_back_val();
if (Type *NewDstSclTy = getBestTruncatedType()) {
LLVM_DEBUG(
- dbgs() << "ICE: TruncInstCombine reducing type of expression dag "
+ dbgs() << "ICE: TruncInstCombine reducing type of expression graph "
"dominated by: "
<< CurrentTruncInst << '\n');
- ReduceExpressionDag(NewDstSclTy);
- ++NumDAGsReduced;
+ ReduceExpressionGraph(NewDstSclTy);
+ ++NumExprsReduced;
MadeIRChange = true;
}
}
diff --git a/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp b/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp
index 67f8828e4c75..f7bbdcffd2ec 100644
--- a/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp
@@ -10,9 +10,9 @@
#include "CoroInternal.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/Pass.h"
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Transforms/Scalar/SimplifyCFG.h"
using namespace llvm;
@@ -23,19 +23,10 @@ namespace {
struct Lowerer : coro::LowererBase {
IRBuilder<> Builder;
Lowerer(Module &M) : LowererBase(M), Builder(Context) {}
- bool lowerRemainingCoroIntrinsics(Function &F);
+ bool lower(Function &F);
};
}
-static void simplifyCFG(Function &F) {
- llvm::legacy::FunctionPassManager FPM(F.getParent());
- FPM.add(createCFGSimplificationPass());
-
- FPM.doInitialization();
- FPM.run(F);
- FPM.doFinalization();
-}
-
static void lowerSubFn(IRBuilder<> &Builder, CoroSubFnInst *SubFn) {
Builder.SetInsertPoint(SubFn);
Value *FrameRaw = SubFn->getFrame();
@@ -53,12 +44,10 @@ static void lowerSubFn(IRBuilder<> &Builder, CoroSubFnInst *SubFn) {
SubFn->replaceAllUsesWith(Load);
}
-bool Lowerer::lowerRemainingCoroIntrinsics(Function &F) {
+bool Lowerer::lower(Function &F) {
+ bool IsPrivateAndUnprocessed = F.isPresplitCoroutine() && F.hasLocalLinkage();
bool Changed = false;
- bool IsPrivateAndUnprocessed =
- F.hasFnAttribute(CORO_PRESPLIT_ATTR) && F.hasLocalLinkage();
-
for (Instruction &I : llvm::make_early_inc_range(instructions(F))) {
if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
switch (II->getIntrinsicID()) {
@@ -116,11 +105,6 @@ bool Lowerer::lowerRemainingCoroIntrinsics(Function &F) {
}
}
- if (Changed) {
- // After replacement were made we can cleanup the function body a little.
- simplifyCFG(F);
- }
-
return Changed;
}
@@ -132,50 +116,21 @@ static bool declaresCoroCleanupIntrinsics(const Module &M) {
"llvm.coro.async.resume"});
}
-PreservedAnalyses CoroCleanupPass::run(Function &F,
- FunctionAnalysisManager &AM) {
- auto &M = *F.getParent();
- if (!declaresCoroCleanupIntrinsics(M) ||
- !Lowerer(M).lowerRemainingCoroIntrinsics(F))
+PreservedAnalyses CoroCleanupPass::run(Module &M,
+ ModuleAnalysisManager &MAM) {
+ if (!declaresCoroCleanupIntrinsics(M))
return PreservedAnalyses::all();
- return PreservedAnalyses::none();
-}
-
-namespace {
-
-struct CoroCleanupLegacy : FunctionPass {
- static char ID; // Pass identification, replacement for typeid
+ FunctionAnalysisManager &FAM =
+ MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
- CoroCleanupLegacy() : FunctionPass(ID) {
- initializeCoroCleanupLegacyPass(*PassRegistry::getPassRegistry());
- }
+ FunctionPassManager FPM;
+ FPM.addPass(SimplifyCFGPass());
- std::unique_ptr<Lowerer> L;
+ Lowerer L(M);
+ for (auto &F : M)
+ if (L.lower(F))
+ FPM.run(F, FAM);
- // This pass has work to do only if we find intrinsics we are going to lower
- // in the module.
- bool doInitialization(Module &M) override {
- if (declaresCoroCleanupIntrinsics(M))
- L = std::make_unique<Lowerer>(M);
- return false;
- }
-
- bool runOnFunction(Function &F) override {
- if (L)
- return L->lowerRemainingCoroIntrinsics(F);
- return false;
- }
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- if (!L)
- AU.setPreservesAll();
- }
- StringRef getPassName() const override { return "Coroutine Cleanup"; }
-};
+ return PreservedAnalyses::none();
}
-
-char CoroCleanupLegacy::ID = 0;
-INITIALIZE_PASS(CoroCleanupLegacy, "coro-cleanup",
- "Lower all coroutine related intrinsics", false, false)
-
-Pass *llvm::createCoroCleanupLegacyPass() { return new CoroCleanupLegacy(); }
diff --git a/llvm/lib/Transforms/Coroutines/CoroConditionalWrapper.cpp b/llvm/lib/Transforms/Coroutines/CoroConditionalWrapper.cpp
new file mode 100644
index 000000000000..3d26a43ceba7
--- /dev/null
+++ b/llvm/lib/Transforms/Coroutines/CoroConditionalWrapper.cpp
@@ -0,0 +1,24 @@
+//===- CoroConditionalWrapper.cpp -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Coroutines/CoroConditionalWrapper.h"
+#include "CoroInternal.h"
+#include "llvm/IR/Module.h"
+
+using namespace llvm;
+
+CoroConditionalWrapper::CoroConditionalWrapper(ModulePassManager &&PM)
+ : PM(std::move(PM)) {}
+
+PreservedAnalyses CoroConditionalWrapper::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ if (!coro::declaresAnyIntrinsic(M))
+ return PreservedAnalyses::all();
+
+ return PM.run(M, AM);
+}
diff --git a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp
index 1533e1805f17..dd7cb23f3f3d 100644
--- a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp
@@ -8,10 +8,10 @@
#include "llvm/Transforms/Coroutines/CoroEarly.h"
#include "CoroInternal.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
using namespace llvm;
@@ -35,7 +35,7 @@ public:
AnyResumeFnPtrTy(FunctionType::get(Type::getVoidTy(Context), Int8Ptr,
/*isVarArg=*/false)
->getPointerTo()) {}
- bool lowerEarlyIntrinsics(Function &F);
+ void lowerEarlyIntrinsics(Function &F);
};
}
@@ -145,14 +145,16 @@ static void setCannotDuplicate(CoroIdInst *CoroId) {
CB->setCannotDuplicate();
}
-bool Lowerer::lowerEarlyIntrinsics(Function &F) {
- bool Changed = false;
+void Lowerer::lowerEarlyIntrinsics(Function &F) {
CoroIdInst *CoroId = nullptr;
SmallVector<CoroFreeInst *, 4> CoroFrees;
bool HasCoroSuspend = false;
for (Instruction &I : llvm::make_early_inc_range(instructions(F))) {
- if (auto *CB = dyn_cast<CallBase>(&I)) {
- switch (CB->getIntrinsicID()) {
+ auto *CB = dyn_cast<CallBase>(&I);
+ if (!CB)
+ continue;
+
+ switch (CB->getIntrinsicID()) {
default:
continue;
case Intrinsic::coro_free:
@@ -178,12 +180,9 @@ bool Lowerer::lowerEarlyIntrinsics(Function &F) {
case Intrinsic::coro_id:
if (auto *CII = cast<CoroIdInst>(&I)) {
if (CII->getInfo().isPreSplit()) {
- assert(F.hasFnAttribute(CORO_PRESPLIT_ATTR) &&
- F.getFnAttribute(CORO_PRESPLIT_ATTR).getValueAsString() ==
- UNPREPARED_FOR_SPLIT &&
+ assert(F.isPresplitCoroutine() &&
"The frontend uses Swtich-Resumed ABI should emit "
- "\"coroutine.presplit\" attribute with value \"0\" for the "
- "coroutine.");
+ "\"coroutine.presplit\" attribute for the coroutine.");
setCannotDuplicate(CII);
CII->setCoroutineSelf();
CoroId = cast<CoroIdInst>(&I);
@@ -193,9 +192,7 @@ bool Lowerer::lowerEarlyIntrinsics(Function &F) {
case Intrinsic::coro_id_retcon:
case Intrinsic::coro_id_retcon_once:
case Intrinsic::coro_id_async:
- // TODO: Remove the line once we support it in the corresponding
- // frontend.
- F.addFnAttr(CORO_PRESPLIT_ATTR, PREPARED_FOR_SPLIT);
+ F.setPresplitCoroutine();
break;
case Intrinsic::coro_resume:
lowerResumeOrDestroy(*CB, CoroSubFnInst::ResumeIndex);
@@ -209,16 +206,16 @@ bool Lowerer::lowerEarlyIntrinsics(Function &F) {
case Intrinsic::coro_done:
lowerCoroDone(cast<IntrinsicInst>(&I));
break;
- }
- Changed = true;
}
}
+
// Make sure that all CoroFree reference the coro.id intrinsic.
// Token type is not exposed through coroutine C/C++ builtins to plain C, so
// we allow specifying none and fixing it up here.
if (CoroId)
for (CoroFreeInst *CF : CoroFrees)
CF->setArgOperand(0, CoroId);
+
// Coroutine suspention could potentially lead to any argument modified
// outside of the function, hence arguments should not have noalias
// attributes.
@@ -226,7 +223,6 @@ bool Lowerer::lowerEarlyIntrinsics(Function &F) {
for (Argument &A : F.args())
if (A.hasNoAliasAttr())
A.removeAttr(Attribute::NoAlias);
- return Changed;
}
static bool declaresCoroEarlyIntrinsics(const Module &M) {
@@ -238,52 +234,15 @@ static bool declaresCoroEarlyIntrinsics(const Module &M) {
"llvm.coro.suspend"});
}
-PreservedAnalyses CoroEarlyPass::run(Function &F, FunctionAnalysisManager &) {
- Module &M = *F.getParent();
- if (!declaresCoroEarlyIntrinsics(M) || !Lowerer(M).lowerEarlyIntrinsics(F))
+PreservedAnalyses CoroEarlyPass::run(Module &M, ModuleAnalysisManager &) {
+ if (!declaresCoroEarlyIntrinsics(M))
return PreservedAnalyses::all();
+ Lowerer L(M);
+ for (auto &F : M)
+ L.lowerEarlyIntrinsics(F);
+
PreservedAnalyses PA;
PA.preserveSet<CFGAnalyses>();
return PA;
}
-
-namespace {
-
-struct CoroEarlyLegacy : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid.
- CoroEarlyLegacy() : FunctionPass(ID) {
- initializeCoroEarlyLegacyPass(*PassRegistry::getPassRegistry());
- }
-
- std::unique_ptr<Lowerer> L;
-
- // This pass has work to do only if we find intrinsics we are going to lower
- // in the module.
- bool doInitialization(Module &M) override {
- if (declaresCoroEarlyIntrinsics(M))
- L = std::make_unique<Lowerer>(M);
- return false;
- }
-
- bool runOnFunction(Function &F) override {
- if (!L)
- return false;
-
- return L->lowerEarlyIntrinsics(F);
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- }
- StringRef getPassName() const override {
- return "Lower early coroutine intrinsics";
- }
-};
-}
-
-char CoroEarlyLegacy::ID = 0;
-INITIALIZE_PASS(CoroEarlyLegacy, "coro-early",
- "Lower early coroutine intrinsics", false, false)
-
-Pass *llvm::createCoroEarlyLegacyPass() { return new CoroEarlyLegacy(); }
diff --git a/llvm/lib/Transforms/Coroutines/CoroElide.cpp b/llvm/lib/Transforms/Coroutines/CoroElide.cpp
index 84bebb7bf42d..6f78fc8db311 100644
--- a/llvm/lib/Transforms/Coroutines/CoroElide.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroElide.cpp
@@ -14,8 +14,6 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/InstIterator.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
@@ -103,21 +101,12 @@ static void removeTailCallAttribute(AllocaInst *Frame, AAResults &AA) {
// Given a resume function @f.resume(%f.frame* %frame), returns the size
// and expected alignment of %f.frame type.
-static std::pair<uint64_t, Align> getFrameLayout(Function *Resume) {
- // Prefer to pull information from the function attributes.
+static Optional<std::pair<uint64_t, Align>> getFrameLayout(Function *Resume) {
+ // Pull information from the function attributes.
auto Size = Resume->getParamDereferenceableBytes(0);
- auto Align = Resume->getParamAlign(0);
-
- // If those aren't given, extract them from the type.
- if (Size == 0 || !Align) {
- auto *FrameTy = Resume->arg_begin()->getType()->getPointerElementType();
-
- const DataLayout &DL = Resume->getParent()->getDataLayout();
- if (!Size) Size = DL.getTypeAllocSize(FrameTy);
- if (!Align) Align = DL.getABITypeAlign(FrameTy);
- }
-
- return std::make_pair(Size, *Align);
+ if (!Size)
+ return None;
+ return std::make_pair(Size, Resume->getParamAlign(0).valueOrOne());
}
// Finds first non alloca instruction in the entry block of a function.
@@ -347,56 +336,37 @@ bool Lowerer::processCoroId(CoroIdInst *CoroId, AAResults &AA,
assert(Resumers && "PostSplit coro.id Info argument must refer to an array"
"of coroutine subfunctions");
auto *ResumeAddrConstant =
- ConstantExpr::getExtractValue(Resumers, CoroSubFnInst::ResumeIndex);
+ Resumers->getAggregateElement(CoroSubFnInst::ResumeIndex);
replaceWithConstant(ResumeAddrConstant, ResumeAddr);
bool ShouldElide = shouldElide(CoroId->getFunction(), DT);
- auto *DestroyAddrConstant = ConstantExpr::getExtractValue(
- Resumers,
+ auto *DestroyAddrConstant = Resumers->getAggregateElement(
ShouldElide ? CoroSubFnInst::CleanupIndex : CoroSubFnInst::DestroyIndex);
for (auto &It : DestroyAddr)
replaceWithConstant(DestroyAddrConstant, It.second);
if (ShouldElide) {
- auto FrameSizeAndAlign = getFrameLayout(cast<Function>(ResumeAddrConstant));
- elideHeapAllocations(CoroId->getFunction(), FrameSizeAndAlign.first,
- FrameSizeAndAlign.second, AA);
- coro::replaceCoroFree(CoroId, /*Elide=*/true);
- NumOfCoroElided++;
+ if (auto FrameSizeAndAlign =
+ getFrameLayout(cast<Function>(ResumeAddrConstant))) {
+ elideHeapAllocations(CoroId->getFunction(), FrameSizeAndAlign->first,
+ FrameSizeAndAlign->second, AA);
+ coro::replaceCoroFree(CoroId, /*Elide=*/true);
+ NumOfCoroElided++;
#ifndef NDEBUG
- if (!CoroElideInfoOutputFilename.empty())
- *getOrCreateLogFile()
- << "Elide " << CoroId->getCoroutine()->getName() << " in "
- << CoroId->getFunction()->getName() << "\n";
+ if (!CoroElideInfoOutputFilename.empty())
+ *getOrCreateLogFile()
+ << "Elide " << CoroId->getCoroutine()->getName() << " in "
+ << CoroId->getFunction()->getName() << "\n";
#endif
+ }
}
return true;
}
-// See if there are any coro.subfn.addr instructions referring to coro.devirt
-// trigger, if so, replace them with a direct call to devirt trigger function.
-static bool replaceDevirtTrigger(Function &F) {
- SmallVector<CoroSubFnInst *, 1> DevirtAddr;
- for (auto &I : instructions(F))
- if (auto *SubFn = dyn_cast<CoroSubFnInst>(&I))
- if (SubFn->getIndex() == CoroSubFnInst::RestartTrigger)
- DevirtAddr.push_back(SubFn);
-
- if (DevirtAddr.empty())
- return false;
-
- Module &M = *F.getParent();
- Function *DevirtFn = M.getFunction(CORO_DEVIRT_TRIGGER_FN);
- assert(DevirtFn && "coro.devirt.fn not found");
- replaceWithConstant(DevirtFn, DevirtAddr);
-
- return true;
-}
-
static bool declaresCoroElideIntrinsics(Module &M) {
return coro::declaresIntrinsics(M, {"llvm.coro.id", "llvm.coro.id.async"});
}
@@ -422,62 +392,3 @@ PreservedAnalyses CoroElidePass::run(Function &F, FunctionAnalysisManager &AM) {
return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
}
-
-namespace {
-struct CoroElideLegacy : FunctionPass {
- static char ID;
- CoroElideLegacy() : FunctionPass(ID) {
- initializeCoroElideLegacyPass(*PassRegistry::getPassRegistry());
- }
-
- std::unique_ptr<Lowerer> L;
-
- bool doInitialization(Module &M) override {
- if (declaresCoroElideIntrinsics(M))
- L = std::make_unique<Lowerer>(M);
- return false;
- }
-
- bool runOnFunction(Function &F) override {
- if (!L)
- return false;
-
- bool Changed = false;
-
- if (F.hasFnAttribute(CORO_PRESPLIT_ATTR))
- Changed = replaceDevirtTrigger(F);
-
- L->CoroIds.clear();
- L->collectPostSplitCoroIds(&F);
- // If we did not find any coro.id, there is nothing to do.
- if (L->CoroIds.empty())
- return Changed;
-
- AAResults &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
- DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-
- for (auto *CII : L->CoroIds)
- Changed |= L->processCoroId(CII, AA, DT);
-
- return Changed;
- }
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AAResultsWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
- }
- StringRef getPassName() const override { return "Coroutine Elision"; }
-};
-}
-
-char CoroElideLegacy::ID = 0;
-INITIALIZE_PASS_BEGIN(
- CoroElideLegacy, "coro-elide",
- "Coroutine frame allocation elision and indirect calls replacement", false,
- false)
-INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_END(
- CoroElideLegacy, "coro-elide",
- "Coroutine frame allocation elision and indirect calls replacement", false,
- false)
-
-Pass *llvm::createCoroElideLegacyPass() { return new CoroElideLegacy(); }
diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index 9c16d3750998..d09607bb1c4c 100644
--- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -27,7 +27,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/OptimizedStructLayout.h"
@@ -44,13 +44,6 @@ using namespace llvm;
// "coro-frame", which results in leaner debug spew.
#define DEBUG_TYPE "coro-suspend-crossing"
-static cl::opt<bool> EnableReuseStorageInFrame(
- "reuse-storage-in-coroutine-frame", cl::Hidden,
- cl::desc(
- "Enable the optimization which would reuse the storage in the coroutine \
- frame for allocas whose liferanges are not overlapped, for testing purposes"),
- llvm::cl::init(false));
-
enum { SmallVectorThreshold = 32 };
// Provides two way mapping between the blocks and numbers.
@@ -347,15 +340,26 @@ struct FrameDataInfo {
FieldIndexMap[V] = Index;
}
- uint64_t getAlign(Value *V) const {
+ Align getAlign(Value *V) const {
auto Iter = FieldAlignMap.find(V);
assert(Iter != FieldAlignMap.end());
return Iter->second;
}
- void setAlign(Value *V, uint64_t Align) {
+ void setAlign(Value *V, Align AL) {
assert(FieldAlignMap.count(V) == 0);
- FieldAlignMap.insert({V, Align});
+ FieldAlignMap.insert({V, AL});
+ }
+
+ uint64_t getDynamicAlign(Value *V) const {
+ auto Iter = FieldDynamicAlignMap.find(V);
+ assert(Iter != FieldDynamicAlignMap.end());
+ return Iter->second;
+ }
+
+ void setDynamicAlign(Value *V, uint64_t Align) {
+ assert(FieldDynamicAlignMap.count(V) == 0);
+ FieldDynamicAlignMap.insert({V, Align});
}
uint64_t getOffset(Value *V) const {
@@ -382,7 +386,8 @@ private:
DenseMap<Value *, uint32_t> FieldIndexMap;
// Map from values to their alignment on the frame. They would be set after
// the frame is built.
- DenseMap<Value *, uint64_t> FieldAlignMap;
+ DenseMap<Value *, Align> FieldAlignMap;
+ DenseMap<Value *, uint64_t> FieldDynamicAlignMap;
// Map from values to their offset on the frame. They would be set after
// the frame is built.
DenseMap<Value *, uint64_t> FieldOffsetMap;
@@ -423,6 +428,7 @@ private:
FieldIDType LayoutFieldIndex;
Align Alignment;
Align TyAlignment;
+ uint64_t DynamicAlignBuffer;
};
const DataLayout &DL;
@@ -489,7 +495,7 @@ public:
coro::Shape &Shape);
/// Add a field to this structure.
- LLVM_NODISCARD FieldIDType addField(Type *Ty, MaybeAlign FieldAlignment,
+ LLVM_NODISCARD FieldIDType addField(Type *Ty, MaybeAlign MaybeFieldAlignment,
bool IsHeader = false,
bool IsSpillOfValue = false) {
assert(!IsFinished && "adding fields to a finished builder");
@@ -508,13 +514,21 @@ public:
// to remember the type alignment anyway to build the type.
// If we are spilling values we don't need to worry about ABI alignment
// concerns.
- auto ABIAlign = DL.getABITypeAlign(Ty);
- Align TyAlignment =
- (IsSpillOfValue && MaxFrameAlignment)
- ? (*MaxFrameAlignment < ABIAlign ? *MaxFrameAlignment : ABIAlign)
- : ABIAlign;
- if (!FieldAlignment) {
- FieldAlignment = TyAlignment;
+ Align ABIAlign = DL.getABITypeAlign(Ty);
+ Align TyAlignment = ABIAlign;
+ if (IsSpillOfValue && MaxFrameAlignment && *MaxFrameAlignment < ABIAlign)
+ TyAlignment = *MaxFrameAlignment;
+ Align FieldAlignment = MaybeFieldAlignment.value_or(TyAlignment);
+
+ // The field alignment could be bigger than the max frame case, in that case
+ // we request additional storage to be able to dynamically align the
+ // pointer.
+ uint64_t DynamicAlignBuffer = 0;
+ if (MaxFrameAlignment && (FieldAlignment > *MaxFrameAlignment)) {
+ DynamicAlignBuffer =
+ offsetToAlignment(MaxFrameAlignment->value(), FieldAlignment);
+ FieldAlignment = *MaxFrameAlignment;
+ FieldSize = FieldSize + DynamicAlignBuffer;
}
// Lay out header fields immediately.
@@ -523,12 +537,13 @@ public:
Offset = alignTo(StructSize, FieldAlignment);
StructSize = Offset + FieldSize;
- // Everything else has a flexible offset.
+ // Everything else has a flexible offset.
} else {
Offset = OptimizedStructLayoutField::FlexibleOffset;
}
- Fields.push_back({FieldSize, Offset, Ty, 0, *FieldAlignment, TyAlignment});
+ Fields.push_back({FieldSize, Offset, Ty, 0, FieldAlignment, TyAlignment,
+ DynamicAlignBuffer});
return Fields.size() - 1;
}
@@ -561,7 +576,12 @@ void FrameDataInfo::updateLayoutIndex(FrameTypeBuilder &B) {
auto Updater = [&](Value *I) {
auto Field = B.getLayoutField(getFieldIndex(I));
setFieldIndex(I, Field.LayoutFieldIndex);
- setAlign(I, Field.Alignment.value());
+ setAlign(I, Field.Alignment);
+ uint64_t dynamicAlign =
+ Field.DynamicAlignBuffer
+ ? Field.DynamicAlignBuffer + Field.Alignment.value()
+ : 0;
+ setDynamicAlign(I, dynamicAlign);
setOffset(I, Field.Offset);
};
LayoutIndexUpdateStarted = true;
@@ -588,7 +608,7 @@ void FrameTypeBuilder::addFieldForAllocas(const Function &F,
}
});
- if (!Shape.OptimizeFrame && !EnableReuseStorageInFrame) {
+ if (!Shape.OptimizeFrame) {
for (const auto &A : FrameData.Allocas) {
AllocaInst *Alloca = A.Alloca;
NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca));
@@ -755,6 +775,10 @@ void FrameTypeBuilder::finish(StructType *Ty) {
F.LayoutFieldIndex = FieldTypes.size();
FieldTypes.push_back(F.Ty);
+ if (F.DynamicAlignBuffer) {
+ FieldTypes.push_back(
+ ArrayType::get(Type::getInt8Ty(Context), F.DynamicAlignBuffer));
+ }
LastOffset = Offset + F.Size;
}
@@ -807,9 +831,10 @@ static StringRef solveTypeName(Type *Ty) {
return "__floating_type_";
}
- if (Ty->isPointerTy()) {
- auto *PtrTy = cast<PointerType>(Ty);
- Type *PointeeTy = PtrTy->getPointerElementType();
+ if (auto *PtrTy = dyn_cast<PointerType>(Ty)) {
+ if (PtrTy->isOpaque())
+ return "PointerType";
+ Type *PointeeTy = PtrTy->getNonOpaquePointerElementType();
auto Name = solveTypeName(PointeeTy);
if (Name == "UnknownType")
return "PointerType";
@@ -826,10 +851,9 @@ static StringRef solveTypeName(Type *Ty) {
auto Name = Ty->getStructName();
SmallString<16> Buffer(Name);
- for_each(Buffer, [](auto &Iter) {
+ for (auto &Iter : Buffer)
if (Iter == '.' || Iter == ':')
Iter = '_';
- });
auto *MDName = MDString::get(Ty->getContext(), Buffer.str());
return MDName->getString();
}
@@ -1012,7 +1036,7 @@ static void buildFrameDebugInfo(Function &F, coro::Shape &Shape,
auto Index = FrameData.getFieldIndex(V);
OffsetCache.insert(
- {Index, {FrameData.getAlign(V), FrameData.getOffset(V)}});
+ {Index, {FrameData.getAlign(V).value(), FrameData.getOffset(V)}});
}
DenseMap<Type *, DIType *> DITypeCache;
@@ -1078,7 +1102,7 @@ static void buildFrameDebugInfo(Function &F, coro::Shape &Shape,
DBuilder.insertDeclare(Shape.FramePtr, FrameDIVar,
DBuilder.createExpression(), DILoc,
- Shape.FramePtr->getNextNode());
+ Shape.getInsertPtAfterFramePtr());
}
// Build a struct that will keep state for an active coroutine.
@@ -1367,7 +1391,7 @@ struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
bool getShouldLiveOnFrame() const {
if (!ShouldLiveOnFrame)
ShouldLiveOnFrame = computeShouldLiveOnFrame();
- return ShouldLiveOnFrame.getValue();
+ return *ShouldLiveOnFrame;
}
bool getMayWriteBeforeCoroBegin() const { return MayWriteBeforeCoroBegin; }
@@ -1455,7 +1479,7 @@ private:
auto Itr = AliasOffetMap.find(&I);
if (Itr == AliasOffetMap.end()) {
AliasOffetMap[&I] = Offset;
- } else if (Itr->second.hasValue() && Itr->second.getValue() != Offset) {
+ } else if (Itr->second && *Itr->second != Offset) {
// If we have seen two different possible values for this alias, we set
// it to empty.
AliasOffetMap[&I].reset();
@@ -1517,13 +1541,12 @@ static void createFramePtr(coro::Shape &Shape) {
// whatever
//
//
-static Instruction *insertSpills(const FrameDataInfo &FrameData,
- coro::Shape &Shape) {
+static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
auto *CB = Shape.CoroBegin;
LLVMContext &C = CB->getContext();
IRBuilder<> Builder(C);
StructType *FrameTy = Shape.FrameTy;
- Instruction *FramePtr = Shape.FramePtr;
+ Value *FramePtr = Shape.FramePtr;
DominatorTree DT(*CB->getFunction());
SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> DbgPtrAllocaCache;
@@ -1550,7 +1573,18 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
auto GEP = cast<GetElementPtrInst>(
Builder.CreateInBoundsGEP(FrameTy, FramePtr, Indices));
- if (isa<AllocaInst>(Orig)) {
+ if (auto *AI = dyn_cast<AllocaInst>(Orig)) {
+ if (FrameData.getDynamicAlign(Orig) != 0) {
+ assert(FrameData.getDynamicAlign(Orig) == AI->getAlign().value());
+ auto *M = AI->getModule();
+ auto *IntPtrTy = M->getDataLayout().getIntPtrType(AI->getType());
+ auto *PtrValue = Builder.CreatePtrToInt(GEP, IntPtrTy);
+ auto *AlignMask =
+ ConstantInt::get(IntPtrTy, AI->getAlign().value() - 1);
+ PtrValue = Builder.CreateAdd(PtrValue, AlignMask);
+ PtrValue = Builder.CreateAnd(PtrValue, Builder.CreateNot(AlignMask));
+ return Builder.CreateIntToPtr(PtrValue, AI->getType());
+ }
// If the type of GEP is not equal to the type of AllocaInst, it implies
// that the AllocaInst may be reused in the Frame slot of other
// AllocaInst. So We cast GEP to the AllocaInst here to re-use
@@ -1571,20 +1605,19 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
// Create a store instruction storing the value into the
// coroutine frame.
Instruction *InsertPt = nullptr;
- bool NeedToCopyArgPtrValue = false;
+ Type *ByValTy = nullptr;
if (auto *Arg = dyn_cast<Argument>(Def)) {
// For arguments, we will place the store instruction right after
// the coroutine frame pointer instruction, i.e. bitcast of
// coro.begin from i8* to %f.frame*.
- InsertPt = FramePtr->getNextNode();
+ InsertPt = Shape.getInsertPtAfterFramePtr();
// If we're spilling an Argument, make sure we clear 'nocapture'
// from the coroutine function.
Arg->getParent()->removeParamAttr(Arg->getArgNo(), Attribute::NoCapture);
if (Arg->hasByValAttr())
- NeedToCopyArgPtrValue = true;
-
+ ByValTy = Arg->getParamByValType();
} else if (auto *CSI = dyn_cast<AnyCoroSuspendInst>(Def)) {
// Don't spill immediately after a suspend; splitting assumes
// that the suspend will be followed by a branch.
@@ -1594,7 +1627,7 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
if (!DT.dominates(CB, I)) {
// If it is not dominated by CoroBegin, then spill should be
// inserted immediately after CoroFrame is computed.
- InsertPt = FramePtr->getNextNode();
+ InsertPt = Shape.getInsertPtAfterFramePtr();
} else if (auto *II = dyn_cast<InvokeInst>(I)) {
// If we are spilling the result of the invoke instruction, split
// the normal edge and insert the spill in the new block.
@@ -1619,11 +1652,10 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
Builder.SetInsertPoint(InsertPt);
auto *G = Builder.CreateConstInBoundsGEP2_32(
FrameTy, FramePtr, 0, Index, Def->getName() + Twine(".spill.addr"));
- if (NeedToCopyArgPtrValue) {
+ if (ByValTy) {
// For byval arguments, we need to store the pointed value in the frame,
// instead of the pointer itself.
- auto *Value =
- Builder.CreateLoad(Def->getType()->getPointerElementType(), Def);
+ auto *Value = Builder.CreateLoad(ByValTy, Def);
Builder.CreateAlignedStore(Value, G, SpillAlignment);
} else {
Builder.CreateAlignedStore(Def, G, SpillAlignment);
@@ -1641,7 +1673,7 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
auto *GEP = GetFramePointer(E.first);
GEP->setName(E.first->getName() + Twine(".reload.addr"));
- if (NeedToCopyArgPtrValue)
+ if (ByValTy)
CurrentReload = GEP;
else
CurrentReload = Builder.CreateAlignedLoad(
@@ -1664,6 +1696,12 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
}
}
+ // Salvage debug info on any dbg.addr that we see. We do not insert them
+ // into each block where we have a use though.
+ if (auto *DI = dyn_cast<DbgAddrIntrinsic>(U)) {
+ coro::salvageDebugInfo(DbgPtrAllocaCache, DI, Shape.OptimizeFrame);
+ }
+
// If we have a single edge PHINode, remove it and replace it with a
// reload from the coroutine frame. (We already took care of multi edge
// PHINodes by rewriting them in the rewritePHIs function).
@@ -1682,10 +1720,10 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
}
}
- BasicBlock *FramePtrBB = FramePtr->getParent();
+ BasicBlock *FramePtrBB = Shape.getInsertPtAfterFramePtr()->getParent();
- auto SpillBlock =
- FramePtrBB->splitBasicBlock(FramePtr->getNextNode(), "AllocaSpillBB");
+ auto SpillBlock = FramePtrBB->splitBasicBlock(
+ Shape.getInsertPtAfterFramePtr(), "AllocaSpillBB");
SpillBlock->splitBasicBlock(&SpillBlock->front(), "PostSpill");
Shape.AllocaSpillBlock = SpillBlock;
@@ -1704,7 +1742,7 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
Alloca->replaceAllUsesWith(G);
Alloca->eraseFromParent();
}
- return FramePtr;
+ return;
}
// If we found any alloca, replace all of their remaining uses with GEP
@@ -1735,7 +1773,7 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
for (Instruction *I : UsersToUpdate)
I->replaceUsesOfWith(Alloca, G);
}
- Builder.SetInsertPoint(FramePtr->getNextNode());
+ Builder.SetInsertPoint(Shape.getInsertPtAfterFramePtr());
for (const auto &A : FrameData.Allocas) {
AllocaInst *Alloca = A.Alloca;
if (A.MayWriteBeforeCoroBegin) {
@@ -1755,16 +1793,16 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
auto *FramePtr = GetFramePointer(Alloca);
auto *FramePtrRaw =
Builder.CreateBitCast(FramePtr, Type::getInt8PtrTy(C));
- auto *AliasPtr = Builder.CreateGEP(
- Type::getInt8Ty(C), FramePtrRaw,
- ConstantInt::get(Type::getInt64Ty(C), Alias.second.getValue()));
+ auto &Value = *Alias.second;
+ auto ITy = IntegerType::get(C, Value.getBitWidth());
+ auto *AliasPtr = Builder.CreateGEP(Type::getInt8Ty(C), FramePtrRaw,
+ ConstantInt::get(ITy, Value));
auto *AliasPtrTyped =
Builder.CreateBitCast(AliasPtr, Alias.first->getType());
Alias.first->replaceUsesWithIf(
AliasPtrTyped, [&](Use &U) { return DT.dominates(CB, U); });
}
}
- return FramePtr;
}
// Moves the values in the PHIs in SuccBB that correspong to PredBB into a new
@@ -2130,7 +2168,7 @@ static void lowerLocalAllocas(ArrayRef<CoroAllocaAllocInst*> LocalAllocas,
// Allocate memory.
auto Alloca = Builder.CreateAlloca(Builder.getInt8Ty(), AI->getSize());
- Alloca->setAlignment(Align(AI->getAlignment()));
+ Alloca->setAlignment(AI->getAlignment());
for (auto U : AI->users()) {
// Replace gets with the allocation.
@@ -2279,7 +2317,10 @@ static void eliminateSwiftErrorArgument(Function &F, Argument &Arg,
IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg());
auto ArgTy = cast<PointerType>(Arg.getType());
- auto ValueTy = ArgTy->getPointerElementType();
+ // swifterror arguments are required to have pointer-to-pointer type,
+ // so create a pointer-typed alloca with opaque pointers.
+ auto ValueTy = ArgTy->isOpaque() ? PointerType::getUnqual(F.getContext())
+ : ArgTy->getNonOpaquePointerElementType();
// Reduce to the alloca case:
@@ -2520,6 +2561,7 @@ void coro::salvageDebugInfo(
bool SkipOutermostLoad = !isa<DbgValueInst>(DVI);
Value *Storage = DVI->getVariableLocationOp(0);
Value *OriginalStorage = Storage;
+
while (auto *Inst = dyn_cast_or_null<Instruction>(Storage)) {
if (auto *LdInst = dyn_cast<LoadInst>(Inst)) {
Storage = LdInst->getOperand(0);
@@ -2559,7 +2601,7 @@ void coro::salvageDebugInfo(
//
// Avoid to create the alloca would be eliminated by optimization
// passes and the corresponding dbg.declares would be invalid.
- if (!OptimizeFrame && !EnableReuseStorageInFrame)
+ if (!OptimizeFrame)
if (auto *Arg = dyn_cast<llvm::Argument>(Storage)) {
auto &Cached = DbgPtrAllocaCache[Storage];
if (!Cached) {
@@ -2575,14 +2617,15 @@ void coro::salvageDebugInfo(
// expression, we need to add a DW_OP_deref at the *start* of the
// expression to first load the contents of the alloca before
// adjusting it with the expression.
- if (Expr && Expr->isComplex())
- Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);
+ Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);
}
DVI->replaceVariableLocationOp(OriginalStorage, Storage);
DVI->setExpression(Expr);
- /// It makes no sense to move the dbg.value intrinsic.
- if (!isa<DbgValueInst>(DVI)) {
+ // We only hoist dbg.declare today since it doesn't make sense to hoist
+ // dbg.value or dbg.addr since they do not have the same function wide
+ // guarantees that dbg.declare does.
+ if (!isa<DbgValueInst>(DVI) && !isa<DbgAddrIntrinsic>(DVI)) {
if (auto *II = dyn_cast<InvokeInst>(Storage))
DVI->moveBefore(II->getNormalDest()->getFirstNonPHI());
else if (auto *CBI = dyn_cast<CallBrInst>(Storage))
@@ -2661,13 +2704,6 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
for (User *U : I.users())
if (Checker.isDefinitionAcrossSuspend(I, U))
Spills[&I].push_back(cast<Instruction>(U));
-
- // Manually add dbg.value metadata uses of I.
- SmallVector<DbgValueInst *, 16> DVIs;
- findDbgValues(DVIs, &I);
- for (auto *DVI : DVIs)
- if (Checker.isDefinitionAcrossSuspend(I, DVI))
- Spills[&I].push_back(DVI);
}
if (Spills.empty())
@@ -2754,10 +2790,9 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
auto *V = Iter.first;
SmallVector<DbgValueInst *, 16> DVIs;
findDbgValues(DVIs, V);
- llvm::for_each(DVIs, [&](DbgValueInst *DVI) {
+ for (DbgValueInst *DVI : DVIs)
if (Checker.isDefinitionAcrossSuspend(*V, DVI))
FrameData.Spills[V].push_back(DVI);
- });
}
LLVM_DEBUG(dumpSpills("Spills", FrameData.Spills));
diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h b/llvm/lib/Transforms/Coroutines/CoroInternal.h
index 9a17068df3a9..5557370c82ba 100644
--- a/llvm/lib/Transforms/Coroutines/CoroInternal.h
+++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h
@@ -13,7 +13,6 @@
#include "CoroInstr.h"
#include "llvm/IR/IRBuilder.h"
-#include "llvm/Transforms/Coroutines.h"
namespace llvm {
@@ -21,40 +20,13 @@ class CallGraph;
class CallGraphSCC;
class PassRegistry;
-void initializeCoroEarlyLegacyPass(PassRegistry &);
-void initializeCoroSplitLegacyPass(PassRegistry &);
-void initializeCoroElideLegacyPass(PassRegistry &);
-void initializeCoroCleanupLegacyPass(PassRegistry &);
-
-// CoroEarly pass marks every function that has coro.begin with a string
-// attribute "coroutine.presplit"="0". CoroSplit pass processes the coroutine
-// twice. First, it lets it go through complete IPO optimization pipeline as a
-// single function. It forces restart of the pipeline by inserting an indirect
-// call to an empty function "coro.devirt.trigger" which is devirtualized by
-// CoroElide pass that triggers a restart of the pipeline by CGPassManager.
-// When CoroSplit pass sees the same coroutine the second time, it splits it up,
-// adds coroutine subfunctions to the SCC to be processed by IPO pipeline.
-// Async lowering similarily triggers a restart of the pipeline after it has
-// split the coroutine.
-//
-// FIXME: Refactor these attributes as LLVM attributes instead of string
-// attributes since these attributes are already used outside LLVM's
-// coroutine module.
-// FIXME: Remove these values once we remove the Legacy PM.
-#define CORO_PRESPLIT_ATTR "coroutine.presplit"
-#define UNPREPARED_FOR_SPLIT "0"
-#define PREPARED_FOR_SPLIT "1"
-#define ASYNC_RESTART_AFTER_SPLIT "2"
-
-#define CORO_DEVIRT_TRIGGER_FN "coro.devirt.trigger"
-
namespace coro {
+bool declaresAnyIntrinsic(const Module &M);
bool declaresIntrinsics(const Module &M,
const std::initializer_list<StringRef>);
void replaceCoroFree(CoroIdInst *CoroId, bool Elide);
-void updateCallGraph(Function &Caller, ArrayRef<Function *> Funcs,
- CallGraph &CG, CallGraphSCC &SCC);
+
/// Recover a dbg.declare prepared by the frontend and emit an alloca
/// holding a pointer to the coroutine frame.
void salvageDebugInfo(
@@ -128,7 +100,7 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
StructType *FrameTy;
Align FrameAlign;
uint64_t FrameSize;
- Instruction *FramePtr;
+ Value *FramePtr;
BasicBlock *AllocaSpillBlock;
/// This would only be true if optimization are enabled.
@@ -210,10 +182,9 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
FunctionType *getResumeFunctionType() const {
switch (ABI) {
- case coro::ABI::Switch: {
- auto *FnPtrTy = getSwitchResumePointerType();
- return cast<FunctionType>(FnPtrTy->getPointerElementType());
- }
+ case coro::ABI::Switch:
+ return FunctionType::get(Type::getVoidTy(FrameTy->getContext()),
+ FrameTy->getPointerTo(), /*IsVarArg*/false);
case coro::ABI::Retcon:
case coro::ABI::RetconOnce:
return RetconLowering.ResumePrototype->getFunctionType();
@@ -267,6 +238,12 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
return nullptr;
}
+ Instruction *getInsertPtAfterFramePtr() const {
+ if (auto *I = dyn_cast<Instruction>(FramePtr))
+ return I->getNextNode();
+ return &cast<Argument>(FramePtr)->getParent()->getEntryBlock().front();
+ }
+
/// Allocate memory according to the rules of the active lowering.
///
/// \param CG - if non-null, will be updated for the new call
diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index b5129809c6a6..ead552d9be4e 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -22,15 +22,17 @@
#include "CoroInstr.h"
#include "CoroInternal.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PriorityWorklist.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
@@ -50,13 +52,10 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/Verifier.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/PrettyStackTrace.h"
@@ -869,11 +868,16 @@ void CoroCloner::create() {
OrigF.getParent()->end(), ActiveSuspend);
}
- // Replace all args with undefs. The buildCoroutineFrame algorithm already
- // rewritten access to the args that occurs after suspend points with loads
- // and stores to/from the coroutine frame.
- for (Argument &A : OrigF.args())
- VMap[&A] = UndefValue::get(A.getType());
+ // Replace all args with dummy instructions. If an argument is the old frame
+ // pointer, the dummy will be replaced by the new frame pointer once it is
+ // computed below. Uses of all other arguments should have already been
+ // rewritten by buildCoroutineFrame() to use loads/stores on the coroutine
+ // frame.
+ SmallVector<Instruction *> DummyArgs;
+ for (Argument &A : OrigF.args()) {
+ DummyArgs.push_back(new FreezeInst(UndefValue::get(A.getType())));
+ VMap[&A] = DummyArgs.back();
+ }
SmallVector<ReturnInst *, 4> Returns;
@@ -923,6 +927,12 @@ void CoroCloner::create() {
NewF->setVisibility(savedVisibility);
NewF->setUnnamedAddr(savedUnnamedAddr);
NewF->setDLLStorageClass(savedDLLStorageClass);
+ // The function sanitizer metadata needs to match the signature of the
+ // function it is being attached to. However this does not hold for split
+ // functions here. Thus remove the metadata for split functions.
+ if (Shape.ABI == coro::ABI::Switch &&
+ NewF->hasMetadata(LLVMContext::MD_func_sanitize))
+ NewF->eraseMetadata(LLVMContext::MD_func_sanitize);
// Replace the attributes of the new function:
auto OrigAttrs = NewF->getAttributes();
@@ -932,7 +942,8 @@ void CoroCloner::create() {
case coro::ABI::Switch:
// Bootstrap attributes by copying function attributes from the
// original function. This should include optimization settings and so on.
- NewAttrs = NewAttrs.addFnAttributes(Context, AttrBuilder(Context, OrigAttrs.getFnAttrs()));
+ NewAttrs = NewAttrs.addFnAttributes(
+ Context, AttrBuilder(Context, OrigAttrs.getFnAttrs()));
addFramePointerAttrs(NewAttrs, Context, 0,
Shape.FrameSize, Shape.FrameAlign);
@@ -1013,7 +1024,15 @@ void CoroCloner::create() {
auto *NewVFrame = Builder.CreateBitCast(
NewFramePtr, Type::getInt8PtrTy(Builder.getContext()), "vFrame");
Value *OldVFrame = cast<Value>(VMap[Shape.CoroBegin]);
- OldVFrame->replaceAllUsesWith(NewVFrame);
+ if (OldVFrame != NewVFrame)
+ OldVFrame->replaceAllUsesWith(NewVFrame);
+
+ // All uses of the arguments should have been resolved by this point,
+ // so we can safely remove the dummy values.
+ for (Instruction *DummyArg : DummyArgs) {
+ DummyArg->replaceAllUsesWith(UndefValue::get(DummyArg->getType()));
+ DummyArg->deleteValue();
+ }
switch (Shape.ABI) {
case coro::ABI::Switch:
@@ -1063,13 +1082,6 @@ static Function *createClone(Function &F, const Twine &Suffix,
return Cloner.getFunction();
}
-/// Remove calls to llvm.coro.end in the original function.
-static void removeCoroEnds(const coro::Shape &Shape, CallGraph *CG) {
- for (auto End : Shape.CoroEnds) {
- replaceCoroEnd(End, Shape, Shape.FramePtr, /*in resume*/ false, CG);
- }
-}
-
static void updateAsyncFuncPointerContextSize(coro::Shape &Shape) {
assert(Shape.ABI == coro::ABI::Async);
@@ -1150,7 +1162,8 @@ static void updateCoroFrame(coro::Shape &Shape, Function *ResumeFn,
Function *DestroyFn, Function *CleanupFn) {
assert(Shape.ABI == coro::ABI::Switch);
- IRBuilder<> Builder(Shape.FramePtr->getNextNode());
+ IRBuilder<> Builder(Shape.getInsertPtAfterFramePtr());
+
auto *ResumeAddr = Builder.CreateStructGEP(
Shape.FrameTy, Shape.FramePtr, coro::Shape::SwitchFieldIndex::Resume,
"resume.addr");
@@ -1559,7 +1572,8 @@ static void simplifySuspendPoints(coro::Shape &Shape) {
}
static void splitSwitchCoroutine(Function &F, coro::Shape &Shape,
- SmallVectorImpl<Function *> &Clones) {
+ SmallVectorImpl<Function *> &Clones,
+ TargetTransformInfo &TTI) {
assert(Shape.ABI == coro::ABI::Switch);
createResumeEntryBlock(F, Shape);
@@ -1574,7 +1588,13 @@ static void splitSwitchCoroutine(Function &F, coro::Shape &Shape,
postSplitCleanup(*DestroyClone);
postSplitCleanup(*CleanupClone);
- addMustTailToCoroResumes(*ResumeClone);
+ // Adding musttail call to support symmetric transfer.
+ // Skip targets which don't support tail call.
+ //
+ // FIXME: Could we support symmetric transfer effectively without musttail
+ // call?
+ if (TTI.supportsTailCalls())
+ addMustTailToCoroResumes(*ResumeClone);
// Store addresses resume/destroy/cleanup functions in the coroutine frame.
updateCoroFrame(Shape, ResumeClone, DestroyClone, CleanupClone);
@@ -1661,7 +1681,7 @@ static void splitAsyncCoroutine(Function &F, coro::Shape &Shape,
// Map all uses of llvm.coro.begin to the allocated frame pointer.
{
// Make sure we don't invalidate Shape.FramePtr.
- TrackingVH<Instruction> Handle(Shape.FramePtr);
+ TrackingVH<Value> Handle(Shape.FramePtr);
Shape.CoroBegin->replaceAllUsesWith(FramePtr);
Shape.FramePtr = Handle.getValPtr();
}
@@ -1773,7 +1793,7 @@ static void splitRetconCoroutine(Function &F, coro::Shape &Shape,
// Map all uses of llvm.coro.begin to the allocated frame pointer.
{
// Make sure we don't invalidate Shape.FramePtr.
- TrackingVH<Instruction> Handle(Shape.FramePtr);
+ TrackingVH<Value> Handle(Shape.FramePtr);
Shape.CoroBegin->replaceAllUsesWith(RawFramePtr);
Shape.FramePtr = Handle.getValPtr();
}
@@ -1879,6 +1899,7 @@ namespace {
static coro::Shape splitCoroutine(Function &F,
SmallVectorImpl<Function *> &Clones,
+ TargetTransformInfo &TTI,
bool OptimizeFrame) {
PrettyStackTraceFunction prettyStackTrace(F);
@@ -1901,7 +1922,7 @@ static coro::Shape splitCoroutine(Function &F,
} else {
switch (Shape.ABI) {
case coro::ABI::Switch:
- splitSwitchCoroutine(F, Shape, Clones);
+ splitSwitchCoroutine(F, Shape, Clones, TTI);
break;
case coro::ABI::Async:
splitAsyncCoroutine(F, Shape, Clones);
@@ -1917,21 +1938,27 @@ static coro::Shape splitCoroutine(Function &F,
// This invalidates SwiftErrorOps in the Shape.
replaceSwiftErrorOps(F, Shape, nullptr);
- return Shape;
-}
-
-static void
-updateCallGraphAfterCoroutineSplit(Function &F, const coro::Shape &Shape,
- const SmallVectorImpl<Function *> &Clones,
- CallGraph &CG, CallGraphSCC &SCC) {
- if (!Shape.CoroBegin)
- return;
-
- removeCoroEnds(Shape, &CG);
- postSplitCleanup(F);
+ // Finally, salvage the llvm.dbg.{declare,addr} in our original function that
+ // point into the coroutine frame. We only do this for the current function
+ // since the Cloner salvaged debug info for us in the new coroutine funclets.
+ SmallVector<DbgVariableIntrinsic *, 8> Worklist;
+ SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> DbgPtrAllocaCache;
+ for (auto &BB : F) {
+ for (auto &I : BB) {
+ if (auto *DDI = dyn_cast<DbgDeclareInst>(&I)) {
+ Worklist.push_back(DDI);
+ continue;
+ }
+ if (auto *DDI = dyn_cast<DbgAddrIntrinsic>(&I)) {
+ Worklist.push_back(DDI);
+ continue;
+ }
+ }
+ }
+ for (auto *DDI : Worklist)
+ coro::salvageDebugInfo(DbgPtrAllocaCache, DDI, Shape.OptimizeFrame);
- // Update call graph and add the functions we created to the SCC.
- coro::updateCallGraph(F, Clones, CG, SCC);
+ return Shape;
}
static void updateCallGraphAfterCoroutineSplit(
@@ -1976,70 +2003,6 @@ static void updateCallGraphAfterCoroutineSplit(
updateCGAndAnalysisManagerForFunctionPass(CG, C, N, AM, UR, FAM);
}
-// When we see the coroutine the first time, we insert an indirect call to a
-// devirt trigger function and mark the coroutine that it is now ready for
-// split.
-// Async lowering uses this after it has split the function to restart the
-// pipeline.
-static void prepareForSplit(Function &F, CallGraph &CG,
- bool MarkForAsyncRestart = false) {
- Module &M = *F.getParent();
- LLVMContext &Context = F.getContext();
-#ifndef NDEBUG
- Function *DevirtFn = M.getFunction(CORO_DEVIRT_TRIGGER_FN);
- assert(DevirtFn && "coro.devirt.trigger function not found");
-#endif
-
- F.addFnAttr(CORO_PRESPLIT_ATTR, MarkForAsyncRestart
- ? ASYNC_RESTART_AFTER_SPLIT
- : PREPARED_FOR_SPLIT);
-
- // Insert an indirect call sequence that will be devirtualized by CoroElide
- // pass:
- // %0 = call i8* @llvm.coro.subfn.addr(i8* null, i8 -1)
- // %1 = bitcast i8* %0 to void(i8*)*
- // call void %1(i8* null)
- coro::LowererBase Lowerer(M);
- Instruction *InsertPt =
- MarkForAsyncRestart ? F.getEntryBlock().getFirstNonPHIOrDbgOrLifetime()
- : F.getEntryBlock().getTerminator();
- auto *Null = ConstantPointerNull::get(Type::getInt8PtrTy(Context));
- auto *DevirtFnAddr =
- Lowerer.makeSubFnCall(Null, CoroSubFnInst::RestartTrigger, InsertPt);
- FunctionType *FnTy = FunctionType::get(Type::getVoidTy(Context),
- {Type::getInt8PtrTy(Context)}, false);
- auto *IndirectCall = CallInst::Create(FnTy, DevirtFnAddr, Null, "", InsertPt);
-
- // Update CG graph with an indirect call we just added.
- CG[&F]->addCalledFunction(IndirectCall, CG.getCallsExternalNode());
-}
-
-// Make sure that there is a devirtualization trigger function that the
-// coro-split pass uses to force a restart of the CGSCC pipeline. If the devirt
-// trigger function is not found, we will create one and add it to the current
-// SCC.
-static void createDevirtTriggerFunc(CallGraph &CG, CallGraphSCC &SCC) {
- Module &M = CG.getModule();
- if (M.getFunction(CORO_DEVIRT_TRIGGER_FN))
- return;
-
- LLVMContext &C = M.getContext();
- auto *FnTy = FunctionType::get(Type::getVoidTy(C), Type::getInt8PtrTy(C),
- /*isVarArg=*/false);
- Function *DevirtFn =
- Function::Create(FnTy, GlobalValue::LinkageTypes::PrivateLinkage,
- CORO_DEVIRT_TRIGGER_FN, &M);
- DevirtFn->addFnAttr(Attribute::AlwaysInline);
- auto *Entry = BasicBlock::Create(C, "entry", DevirtFn);
- ReturnInst::Create(C, Entry);
-
- auto *Node = CG.getOrInsertFunction(DevirtFn);
-
- SmallVector<CallGraphNode *, 8> Nodes(SCC.begin(), SCC.end());
- Nodes.push_back(Node);
- SCC.initialize(Nodes);
-}
-
/// Replace a call to llvm.coro.prepare.retcon.
static void replacePrepare(CallInst *Prepare, LazyCallGraph &CG,
LazyCallGraph::SCC &C) {
@@ -2076,59 +2039,6 @@ static void replacePrepare(CallInst *Prepare, LazyCallGraph &CG,
Cast->eraseFromParent();
}
}
-/// Replace a call to llvm.coro.prepare.retcon.
-static void replacePrepare(CallInst *Prepare, CallGraph &CG) {
- auto CastFn = Prepare->getArgOperand(0); // as an i8*
- auto Fn = CastFn->stripPointerCasts(); // as its original type
-
- // Find call graph nodes for the preparation.
- CallGraphNode *PrepareUserNode = nullptr, *FnNode = nullptr;
- if (auto ConcreteFn = dyn_cast<Function>(Fn)) {
- PrepareUserNode = CG[Prepare->getFunction()];
- FnNode = CG[ConcreteFn];
- }
-
- // Attempt to peephole this pattern:
- // %0 = bitcast [[TYPE]] @some_function to i8*
- // %1 = call @llvm.coro.prepare.retcon(i8* %0)
- // %2 = bitcast %1 to [[TYPE]]
- // ==>
- // %2 = @some_function
- for (Use &U : llvm::make_early_inc_range(Prepare->uses())) {
- // Look for bitcasts back to the original function type.
- auto *Cast = dyn_cast<BitCastInst>(U.getUser());
- if (!Cast || Cast->getType() != Fn->getType()) continue;
-
- // Check whether the replacement will introduce new direct calls.
- // If so, we'll need to update the call graph.
- if (PrepareUserNode) {
- for (auto &Use : Cast->uses()) {
- if (auto *CB = dyn_cast<CallBase>(Use.getUser())) {
- if (!CB->isCallee(&Use))
- continue;
- PrepareUserNode->removeCallEdgeFor(*CB);
- PrepareUserNode->addCalledFunction(CB, FnNode);
- }
- }
- }
-
- // Replace and remove the cast.
- Cast->replaceAllUsesWith(Fn);
- Cast->eraseFromParent();
- }
-
- // Replace any remaining uses with the function as an i8*.
- // This can never directly be a callee, so we don't need to update CG.
- Prepare->replaceAllUsesWith(CastFn);
- Prepare->eraseFromParent();
-
- // Kill dead bitcasts.
- while (auto *Cast = dyn_cast<BitCastInst>(CastFn)) {
- if (!Cast->use_empty()) break;
- CastFn = Cast->getOperand(0);
- Cast->eraseFromParent();
- }
-}
static bool replaceAllPrepares(Function *PrepareFn, LazyCallGraph &CG,
LazyCallGraph::SCC &C) {
@@ -2143,30 +2053,6 @@ static bool replaceAllPrepares(Function *PrepareFn, LazyCallGraph &CG,
return Changed;
}
-/// Remove calls to llvm.coro.prepare.retcon, a barrier meant to prevent
-/// IPO from operating on calls to a retcon coroutine before it's been
-/// split. This is only safe to do after we've split all retcon
-/// coroutines in the module. We can do that this in this pass because
-/// this pass does promise to split all retcon coroutines (as opposed to
-/// switch coroutines, which are lowered in multiple stages).
-static bool replaceAllPrepares(Function *PrepareFn, CallGraph &CG) {
- bool Changed = false;
- for (Use &P : llvm::make_early_inc_range(PrepareFn->uses())) {
- // Intrinsics can only be used in calls.
- auto *Prepare = cast<CallInst>(P.getUser());
- replacePrepare(Prepare, CG);
- Changed = true;
- }
-
- return Changed;
-}
-
-static bool declaresCoroSplitIntrinsics(const Module &M) {
- return coro::declaresIntrinsics(M, {"llvm.coro.begin",
- "llvm.coro.prepare.retcon",
- "llvm.coro.prepare.async"});
-}
-
static void addPrepareFunction(const Module &M,
SmallVectorImpl<Function *> &Fns,
StringRef Name) {
@@ -2185,18 +2071,15 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
auto &FAM =
AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
- if (!declaresCoroSplitIntrinsics(M))
- return PreservedAnalyses::all();
-
// Check for uses of llvm.coro.prepare.retcon/async.
SmallVector<Function *, 2> PrepareFns;
addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.retcon");
addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.async");
// Find coroutines for processing.
- SmallVector<LazyCallGraph::Node *, 4> Coroutines;
+ SmallVector<LazyCallGraph::Node *> Coroutines;
for (LazyCallGraph::Node &N : C)
- if (N.getFunction().hasFnAttribute(CORO_PRESPLIT_ATTR))
+ if (N.getFunction().isPresplitCoroutine())
Coroutines.push_back(&N);
if (Coroutines.empty() && PrepareFns.empty())
@@ -2212,13 +2095,12 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
for (LazyCallGraph::Node *N : Coroutines) {
Function &F = N->getFunction();
LLVM_DEBUG(dbgs() << "CoroSplit: Processing coroutine '" << F.getName()
- << "' state: "
- << F.getFnAttribute(CORO_PRESPLIT_ATTR).getValueAsString()
<< "\n");
- F.removeFnAttr(CORO_PRESPLIT_ATTR);
+ F.setSplittedCoroutine();
SmallVector<Function *, 4> Clones;
- const coro::Shape Shape = splitCoroutine(F, Clones, OptimizeFrame);
+ const coro::Shape Shape = splitCoroutine(
+ F, Clones, FAM.getResult<TargetIRAnalysis>(F), OptimizeFrame);
updateCallGraphAfterCoroutineSplit(*N, Shape, Clones, C, CG, AM, UR, FAM);
if (!Shape.CoroSuspends.empty()) {
@@ -2237,122 +2119,3 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
return PreservedAnalyses::none();
}
-
-namespace {
-
-// We present a coroutine to LLVM as an ordinary function with suspension
-// points marked up with intrinsics. We let the optimizer party on the coroutine
-// as a single function for as long as possible. Shortly before the coroutine is
-// eligible to be inlined into its callers, we split up the coroutine into parts
-// corresponding to initial, resume and destroy invocations of the coroutine,
-// add them to the current SCC and restart the IPO pipeline to optimize the
-// coroutine subfunctions we extracted before proceeding to the caller of the
-// coroutine.
-struct CoroSplitLegacy : public CallGraphSCCPass {
- static char ID; // Pass identification, replacement for typeid
-
- CoroSplitLegacy(bool OptimizeFrame = false)
- : CallGraphSCCPass(ID), OptimizeFrame(OptimizeFrame) {
- initializeCoroSplitLegacyPass(*PassRegistry::getPassRegistry());
- }
-
- bool Run = false;
- bool OptimizeFrame;
-
- // A coroutine is identified by the presence of coro.begin intrinsic, if
- // we don't have any, this pass has nothing to do.
- bool doInitialization(CallGraph &CG) override {
- Run = declaresCoroSplitIntrinsics(CG.getModule());
- return CallGraphSCCPass::doInitialization(CG);
- }
-
- bool runOnSCC(CallGraphSCC &SCC) override {
- if (!Run)
- return false;
-
- // Check for uses of llvm.coro.prepare.retcon.
- SmallVector<Function *, 2> PrepareFns;
- auto &M = SCC.getCallGraph().getModule();
- addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.retcon");
- addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.async");
-
- // Find coroutines for processing.
- SmallVector<Function *, 4> Coroutines;
- for (CallGraphNode *CGN : SCC)
- if (auto *F = CGN->getFunction())
- if (F->hasFnAttribute(CORO_PRESPLIT_ATTR))
- Coroutines.push_back(F);
-
- if (Coroutines.empty() && PrepareFns.empty())
- return false;
-
- CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
-
- if (Coroutines.empty()) {
- bool Changed = false;
- for (auto *PrepareFn : PrepareFns)
- Changed |= replaceAllPrepares(PrepareFn, CG);
- return Changed;
- }
-
- createDevirtTriggerFunc(CG, SCC);
-
- // Split all the coroutines.
- for (Function *F : Coroutines) {
- Attribute Attr = F->getFnAttribute(CORO_PRESPLIT_ATTR);
- StringRef Value = Attr.getValueAsString();
- LLVM_DEBUG(dbgs() << "CoroSplit: Processing coroutine '" << F->getName()
- << "' state: " << Value << "\n");
- // Async lowering marks coroutines to trigger a restart of the pipeline
- // after it has split them.
- if (Value == ASYNC_RESTART_AFTER_SPLIT) {
- F->removeFnAttr(CORO_PRESPLIT_ATTR);
- continue;
- }
- if (Value == UNPREPARED_FOR_SPLIT) {
- prepareForSplit(*F, CG);
- continue;
- }
- F->removeFnAttr(CORO_PRESPLIT_ATTR);
-
- SmallVector<Function *, 4> Clones;
- const coro::Shape Shape = splitCoroutine(*F, Clones, OptimizeFrame);
- updateCallGraphAfterCoroutineSplit(*F, Shape, Clones, CG, SCC);
- if (Shape.ABI == coro::ABI::Async) {
- // Restart SCC passes.
- // Mark function for CoroElide pass. It will devirtualize causing a
- // restart of the SCC pipeline.
- prepareForSplit(*F, CG, true /*MarkForAsyncRestart*/);
- }
- }
-
- for (auto *PrepareFn : PrepareFns)
- replaceAllPrepares(PrepareFn, CG);
-
- return true;
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- CallGraphSCCPass::getAnalysisUsage(AU);
- }
-
- StringRef getPassName() const override { return "Coroutine Splitting"; }
-};
-
-} // end anonymous namespace
-
-char CoroSplitLegacy::ID = 0;
-
-INITIALIZE_PASS_BEGIN(
- CoroSplitLegacy, "coro-split",
- "Split coroutine into a set of functions driving its state machine", false,
- false)
-INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
-INITIALIZE_PASS_END(
- CoroSplitLegacy, "coro-split",
- "Split coroutine into a set of functions driving its state machine", false,
- false)
-
-Pass *llvm::createCoroSplitLegacyPass(bool OptimizeFrame) {
- return new CoroSplitLegacy(OptimizeFrame);
-}
diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
index 965a146c143f..1742e9319c3b 100644
--- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp
+++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
@@ -10,14 +10,11 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Coroutines.h"
#include "CoroInstr.h"
#include "CoroInternal.h"
-#include "llvm-c/Transforms/Coroutines.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
@@ -26,14 +23,10 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
-#include "llvm/InitializePasses.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Transforms/IPO.h"
-#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
#include <cstddef>
@@ -41,55 +34,6 @@
using namespace llvm;
-void llvm::initializeCoroutines(PassRegistry &Registry) {
- initializeCoroEarlyLegacyPass(Registry);
- initializeCoroSplitLegacyPass(Registry);
- initializeCoroElideLegacyPass(Registry);
- initializeCoroCleanupLegacyPass(Registry);
-}
-
-static void addCoroutineOpt0Passes(const PassManagerBuilder &Builder,
- legacy::PassManagerBase &PM) {
- PM.add(createCoroSplitLegacyPass());
- PM.add(createCoroElideLegacyPass());
-
- PM.add(createBarrierNoopPass());
- PM.add(createCoroCleanupLegacyPass());
-}
-
-static void addCoroutineEarlyPasses(const PassManagerBuilder &Builder,
- legacy::PassManagerBase &PM) {
- PM.add(createCoroEarlyLegacyPass());
-}
-
-static void addCoroutineScalarOptimizerPasses(const PassManagerBuilder &Builder,
- legacy::PassManagerBase &PM) {
- PM.add(createCoroElideLegacyPass());
-}
-
-static void addCoroutineSCCPasses(const PassManagerBuilder &Builder,
- legacy::PassManagerBase &PM) {
- PM.add(createCoroSplitLegacyPass(Builder.OptLevel != 0));
-}
-
-static void addCoroutineOptimizerLastPasses(const PassManagerBuilder &Builder,
- legacy::PassManagerBase &PM) {
- PM.add(createCoroCleanupLegacyPass());
-}
-
-void llvm::addCoroutinePassesToExtensionPoints(PassManagerBuilder &Builder) {
- Builder.addExtension(PassManagerBuilder::EP_EarlyAsPossible,
- addCoroutineEarlyPasses);
- Builder.addExtension(PassManagerBuilder::EP_EnabledOnOptLevel0,
- addCoroutineOpt0Passes);
- Builder.addExtension(PassManagerBuilder::EP_CGSCCOptimizerLate,
- addCoroutineSCCPasses);
- Builder.addExtension(PassManagerBuilder::EP_ScalarOptimizerLate,
- addCoroutineScalarOptimizerPasses);
- Builder.addExtension(PassManagerBuilder::EP_OptimizerLast,
- addCoroutineOptimizerLastPasses);
-}
-
// Construct the lowerer base class and initialize its members.
coro::LowererBase::LowererBase(Module &M)
: TheModule(M), Context(M.getContext()),
@@ -119,44 +63,55 @@ Value *coro::LowererBase::makeSubFnCall(Value *Arg, int Index,
return Bitcast;
}
+// NOTE: Must be sorted!
+static const char *const CoroIntrinsics[] = {
+ "llvm.coro.align",
+ "llvm.coro.alloc",
+ "llvm.coro.async.context.alloc",
+ "llvm.coro.async.context.dealloc",
+ "llvm.coro.async.resume",
+ "llvm.coro.async.size.replace",
+ "llvm.coro.async.store_resume",
+ "llvm.coro.begin",
+ "llvm.coro.destroy",
+ "llvm.coro.done",
+ "llvm.coro.end",
+ "llvm.coro.end.async",
+ "llvm.coro.frame",
+ "llvm.coro.free",
+ "llvm.coro.id",
+ "llvm.coro.id.async",
+ "llvm.coro.id.retcon",
+ "llvm.coro.id.retcon.once",
+ "llvm.coro.noop",
+ "llvm.coro.prepare.async",
+ "llvm.coro.prepare.retcon",
+ "llvm.coro.promise",
+ "llvm.coro.resume",
+ "llvm.coro.save",
+ "llvm.coro.size",
+ "llvm.coro.subfn.addr",
+ "llvm.coro.suspend",
+ "llvm.coro.suspend.async",
+ "llvm.coro.suspend.retcon",
+};
+
#ifndef NDEBUG
static bool isCoroutineIntrinsicName(StringRef Name) {
- // NOTE: Must be sorted!
- static const char *const CoroIntrinsics[] = {
- "llvm.coro.align",
- "llvm.coro.alloc",
- "llvm.coro.async.context.alloc",
- "llvm.coro.async.context.dealloc",
- "llvm.coro.async.resume",
- "llvm.coro.async.size.replace",
- "llvm.coro.async.store_resume",
- "llvm.coro.begin",
- "llvm.coro.destroy",
- "llvm.coro.done",
- "llvm.coro.end",
- "llvm.coro.end.async",
- "llvm.coro.frame",
- "llvm.coro.free",
- "llvm.coro.id",
- "llvm.coro.id.async",
- "llvm.coro.id.retcon",
- "llvm.coro.id.retcon.once",
- "llvm.coro.noop",
- "llvm.coro.prepare.async",
- "llvm.coro.prepare.retcon",
- "llvm.coro.promise",
- "llvm.coro.resume",
- "llvm.coro.save",
- "llvm.coro.size",
- "llvm.coro.subfn.addr",
- "llvm.coro.suspend",
- "llvm.coro.suspend.async",
- "llvm.coro.suspend.retcon",
- };
return Intrinsic::lookupLLVMIntrinsicByName(CoroIntrinsics, Name) != -1;
}
#endif
+bool coro::declaresAnyIntrinsic(const Module &M) {
+ for (StringRef Name : CoroIntrinsics) {
+ assert(isCoroutineIntrinsicName(Name) && "not a coroutine intrinsic");
+ if (M.getNamedValue(Name))
+ return true;
+ }
+
+ return false;
+}
+
// Verifies if a module has named values listed. Also, in debug mode verifies
// that names are intrinsic names.
bool coro::declaresIntrinsics(const Module &M,
@@ -191,46 +146,6 @@ void coro::replaceCoroFree(CoroIdInst *CoroId, bool Elide) {
}
}
-// FIXME: This code is stolen from CallGraph::addToCallGraph(Function *F), which
-// happens to be private. It is better for this functionality exposed by the
-// CallGraph.
-static void buildCGN(CallGraph &CG, CallGraphNode *Node) {
- Function *F = Node->getFunction();
-
- // Look for calls by this function.
- for (Instruction &I : instructions(F))
- if (auto *Call = dyn_cast<CallBase>(&I)) {
- const Function *Callee = Call->getCalledFunction();
- if (!Callee || !Intrinsic::isLeaf(Callee->getIntrinsicID()))
- // Indirect calls of intrinsics are not allowed so no need to check.
- // We can be more precise here by using TargetArg returned by
- // Intrinsic::isLeaf.
- Node->addCalledFunction(Call, CG.getCallsExternalNode());
- else if (!Callee->isIntrinsic())
- Node->addCalledFunction(Call, CG.getOrInsertFunction(Callee));
- }
-}
-
-// Rebuild CGN after we extracted parts of the code from ParentFunc into
-// NewFuncs. Builds CGNs for the NewFuncs and adds them to the current SCC.
-void coro::updateCallGraph(Function &ParentFunc, ArrayRef<Function *> NewFuncs,
- CallGraph &CG, CallGraphSCC &SCC) {
- // Rebuild CGN from scratch for the ParentFunc
- auto *ParentNode = CG[&ParentFunc];
- ParentNode->removeAllCalledFunctions();
- buildCGN(CG, ParentNode);
-
- SmallVector<CallGraphNode *, 8> Nodes(SCC.begin(), SCC.end());
-
- for (Function *F : NewFuncs) {
- CallGraphNode *Callee = CG.getOrInsertFunction(F);
- Nodes.push_back(Callee);
- buildCGN(CG, Callee);
- }
-
- SCC.initialize(Nodes);
-}
-
static void clear(coro::Shape &Shape) {
Shape.CoroBegin = nullptr;
Shape.CoroEnds.clear();
@@ -735,25 +650,3 @@ void CoroAsyncEndInst::checkWellFormed() const {
"match the tail arguments",
MustTailCallFunc);
}
-
-void LLVMAddCoroEarlyPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createCoroEarlyLegacyPass());
-}
-
-void LLVMAddCoroSplitPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createCoroSplitLegacyPass());
-}
-
-void LLVMAddCoroElidePass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createCoroElideLegacyPass());
-}
-
-void LLVMAddCoroCleanupPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createCoroCleanupLegacyPass());
-}
-
-void
-LLVMPassManagerBuilderAddCoroutinePassesToExtensionPoints(LLVMPassManagerBuilderRef PMB) {
- PassManagerBuilder *Builder = unwrap(PMB);
- addCoroutinePassesToExtensionPoints(*Builder);
-}
diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
index a6d9ce1033f3..58cea7ebb749 100644
--- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
+++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
@@ -1,4 +1,4 @@
-//===- InlineAlways.cpp - Code to inline always_inline functions ----------===//
+//===- AlwaysInliner.cpp - Code to inline always_inline functions ----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -16,15 +16,10 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/IR/CallingConv.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/Inliner.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -60,31 +55,38 @@ PreservedAnalyses AlwaysInlinerPass::run(Module &M,
for (User *U : F.users())
if (auto *CB = dyn_cast<CallBase>(U))
if (CB->getCalledFunction() == &F &&
- CB->hasFnAttr(Attribute::AlwaysInline))
- Calls.insert(CB);
+ CB->hasFnAttr(Attribute::AlwaysInline) &&
+ !CB->getAttributes().hasFnAttr(Attribute::NoInline))
+ Calls.insert(CB);
for (CallBase *CB : Calls) {
Function *Caller = CB->getCaller();
OptimizationRemarkEmitter ORE(Caller);
- auto OIC = shouldInline(
- *CB,
- [&](CallBase &CB) {
- return InlineCost::getAlways("always inline attribute");
- },
- ORE);
- assert(OIC);
- emitInlinedIntoBasedOnCost(ORE, CB->getDebugLoc(), CB->getParent(), F,
- *Caller, *OIC, false, DEBUG_TYPE);
+ DebugLoc DLoc = CB->getDebugLoc();
+ BasicBlock *Block = CB->getParent();
InlineFunctionInfo IFI(
/*cg=*/nullptr, GetAssumptionCache, &PSI,
- &FAM.getResult<BlockFrequencyAnalysis>(*(CB->getCaller())),
+ &FAM.getResult<BlockFrequencyAnalysis>(*Caller),
&FAM.getResult<BlockFrequencyAnalysis>(F));
InlineResult Res = InlineFunction(
*CB, IFI, &FAM.getResult<AAManager>(F), InsertLifetime);
- assert(Res.isSuccess() && "unexpected failure to inline");
- (void)Res;
+ if (!Res.isSuccess()) {
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc,
+ Block)
+ << "'" << ore::NV("Callee", &F) << "' is not inlined into '"
+ << ore::NV("Caller", Caller)
+ << "': " << ore::NV("Reason", Res.getFailureReason());
+ });
+ continue;
+ }
+
+ emitInlinedIntoBasedOnCost(
+ ORE, DLoc, Block, F, *Caller,
+ InlineCost::getAlways("always inline attribute"),
+ /*ForProfileContext=*/false, DEBUG_TYPE);
// Merge the attributes based on the inlining.
AttributeFuncs::mergeAttributesForInlining(*Caller, F);
@@ -210,6 +212,9 @@ InlineCost AlwaysInlinerLegacyPass::getInlineCost(CallBase &CB) {
if (!CB.hasFnAttr(Attribute::AlwaysInline))
return InlineCost::getNever("no alwaysinline attribute");
+ if (Callee->hasFnAttribute(Attribute::AlwaysInline) && CB.isNoInline())
+ return InlineCost::getNever("noinline call site attribute");
+
auto IsViable = isInlineViable(*Callee);
if (!IsViable.isSuccess())
return InlineCost::getNever(IsViable.getFailureReason());
diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index e6a542385662..62cfc3294968 100644
--- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -29,9 +29,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/ArgumentPromotion.h"
+
#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/None.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -40,15 +39,11 @@
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
-#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Analysis/CallGraphSCCPass.h"
-#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryLocation.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
@@ -56,33 +51,26 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Metadata.h"
-#include "llvm/IR/Module.h"
#include "llvm/IR/NoFolder.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
-#include <functional>
-#include <iterator>
-#include <map>
-#include <set>
#include <utility>
#include <vector>
@@ -91,43 +79,81 @@ using namespace llvm;
#define DEBUG_TYPE "argpromotion"
STATISTIC(NumArgumentsPromoted, "Number of pointer arguments promoted");
-STATISTIC(NumAggregatesPromoted, "Number of aggregate arguments promoted");
-STATISTIC(NumByValArgsPromoted, "Number of byval arguments promoted");
STATISTIC(NumArgumentsDead, "Number of dead pointer args eliminated");
-/// A vector used to hold the indices of a single GEP instruction
-using IndicesVector = std::vector<uint64_t>;
+namespace {
+
+struct ArgPart {
+ Type *Ty;
+ Align Alignment;
+ /// A representative guaranteed-executed load or store instruction for use by
+ /// metadata transfer.
+ Instruction *MustExecInstr;
+};
+
+using OffsetAndArgPart = std::pair<int64_t, ArgPart>;
+
+} // end anonymous namespace
+
+static Value *createByteGEP(IRBuilderBase &IRB, const DataLayout &DL,
+ Value *Ptr, Type *ResElemTy, int64_t Offset) {
+ // For non-opaque pointers, try to create a "nice" GEP if possible, otherwise
+ // fall back to an i8 GEP to a specific offset.
+ unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
+ APInt OrigOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), Offset);
+ if (!Ptr->getType()->isOpaquePointerTy()) {
+ Type *OrigElemTy = Ptr->getType()->getNonOpaquePointerElementType();
+ if (OrigOffset == 0 && OrigElemTy == ResElemTy)
+ return Ptr;
+
+ if (OrigElemTy->isSized()) {
+ APInt TmpOffset = OrigOffset;
+ Type *TmpTy = OrigElemTy;
+ SmallVector<APInt> IntIndices =
+ DL.getGEPIndicesForOffset(TmpTy, TmpOffset);
+ if (TmpOffset == 0) {
+ // Try to add trailing zero indices to reach the right type.
+ while (TmpTy != ResElemTy) {
+ Type *NextTy = GetElementPtrInst::getTypeAtIndex(TmpTy, (uint64_t)0);
+ if (!NextTy)
+ break;
+
+ IntIndices.push_back(APInt::getZero(
+ isa<StructType>(TmpTy) ? 32 : OrigOffset.getBitWidth()));
+ TmpTy = NextTy;
+ }
+
+ SmallVector<Value *> Indices;
+ for (const APInt &Index : IntIndices)
+ Indices.push_back(IRB.getInt(Index));
+
+ if (OrigOffset != 0 || TmpTy == ResElemTy) {
+ Ptr = IRB.CreateGEP(OrigElemTy, Ptr, Indices);
+ return IRB.CreateBitCast(Ptr, ResElemTy->getPointerTo(AddrSpace));
+ }
+ }
+ }
+ }
+
+ if (OrigOffset != 0) {
+ Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(AddrSpace));
+ Ptr = IRB.CreateGEP(IRB.getInt8Ty(), Ptr, IRB.getInt(OrigOffset));
+ }
+ return IRB.CreateBitCast(Ptr, ResElemTy->getPointerTo(AddrSpace));
+}
/// DoPromotion - This method actually performs the promotion of the specified
/// arguments, and returns the new function. At this point, we know that it's
/// safe to do so.
static Function *
-doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
- SmallPtrSetImpl<Argument *> &ByValArgsToTransform,
- Optional<function_ref<void(CallBase &OldCS, CallBase &NewCS)>>
- ReplaceCallSite) {
+doPromotion(Function *F, FunctionAnalysisManager &FAM,
+ const DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>>
+ &ArgsToPromote) {
// Start by computing a new prototype for the function, which is the same as
// the old function, but has modified arguments.
FunctionType *FTy = F->getFunctionType();
std::vector<Type *> Params;
- using ScalarizeTable = std::set<std::pair<Type *, IndicesVector>>;
-
- // ScalarizedElements - If we are promoting a pointer that has elements
- // accessed out of it, keep track of which elements are accessed so that we
- // can add one argument for each.
- //
- // Arguments that are directly loaded will have a zero element value here, to
- // handle cases where there are both a direct load and GEP accesses.
- std::map<Argument *, ScalarizeTable> ScalarizedElements;
-
- // OriginalLoads - Keep track of a representative load instruction from the
- // original function so that we can tell the alias analysis implementation
- // what the new GEP/Load instructions we are inserting look like.
- // We need to keep the original loads for each argument and the elements
- // of the argument that are accessed.
- std::map<std::pair<Argument *, IndicesVector>, LoadInst *> OriginalLoads;
-
// Attribute - Keep track of the parameter attributes for the arguments
// that we are *not* promoting. For the ones that we do promote, the parameter
// attributes are lost
@@ -138,15 +164,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
unsigned ArgNo = 0;
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
++I, ++ArgNo) {
- if (ByValArgsToTransform.count(&*I)) {
- // Simple byval argument? Just add all the struct element types.
- Type *AgTy = I->getParamByValType();
- StructType *STy = cast<StructType>(AgTy);
- llvm::append_range(Params, STy->elements());
- ArgAttrVec.insert(ArgAttrVec.end(), STy->getNumElements(),
- AttributeSet());
- ++NumByValArgsPromoted;
- } else if (!ArgsToPromote.count(&*I)) {
+ if (!ArgsToPromote.count(&*I)) {
// Unchanged argument
Params.push_back(I->getType());
ArgAttrVec.push_back(PAL.getParamAttrs(ArgNo));
@@ -154,58 +172,12 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
// Dead argument (which are always marked as promotable)
++NumArgumentsDead;
} else {
- // Okay, this is being promoted. This means that the only uses are loads
- // or GEPs which are only used by loads
-
- // In this table, we will track which indices are loaded from the argument
- // (where direct loads are tracked as no indices).
- ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
- for (User *U : make_early_inc_range(I->users())) {
- Instruction *UI = cast<Instruction>(U);
- Type *SrcTy;
- if (LoadInst *L = dyn_cast<LoadInst>(UI))
- SrcTy = L->getType();
- else
- SrcTy = cast<GetElementPtrInst>(UI)->getSourceElementType();
- // Skip dead GEPs and remove them.
- if (isa<GetElementPtrInst>(UI) && UI->use_empty()) {
- UI->eraseFromParent();
- continue;
- }
-
- IndicesVector Indices;
- Indices.reserve(UI->getNumOperands() - 1);
- // Since loads will only have a single operand, and GEPs only a single
- // non-index operand, this will record direct loads without any indices,
- // and gep+loads with the GEP indices.
- for (const Use &I : llvm::drop_begin(UI->operands()))
- Indices.push_back(cast<ConstantInt>(I)->getSExtValue());
- // GEPs with a single 0 index can be merged with direct loads
- if (Indices.size() == 1 && Indices.front() == 0)
- Indices.clear();
- ArgIndices.insert(std::make_pair(SrcTy, Indices));
- LoadInst *OrigLoad;
- if (LoadInst *L = dyn_cast<LoadInst>(UI))
- OrigLoad = L;
- else
- // Take any load, we will use it only to update Alias Analysis
- OrigLoad = cast<LoadInst>(UI->user_back());
- OriginalLoads[std::make_pair(&*I, Indices)] = OrigLoad;
- }
-
- // Add a parameter to the function for each element passed in.
- for (const auto &ArgIndex : ArgIndices) {
- // not allowed to dereference ->begin() if size() is 0
- Params.push_back(GetElementPtrInst::getIndexedType(
- I->getType()->getPointerElementType(), ArgIndex.second));
+ const auto &ArgParts = ArgsToPromote.find(&*I)->second;
+ for (const auto &Pair : ArgParts) {
+ Params.push_back(Pair.second.Ty);
ArgAttrVec.push_back(AttributeSet());
- assert(Params.back());
}
-
- if (ArgIndices.size() == 1 && ArgIndices.begin()->second.empty())
- ++NumArgumentsPromoted;
- else
- ++NumAggregatesPromoted;
+ ++NumArgumentsPromoted;
}
}
@@ -222,24 +194,30 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
// The new function will have the !dbg metadata copied from the original
// function. The original function may not be deleted, and dbg metadata need
- // to be unique so we need to drop it.
+ // to be unique, so we need to drop it.
F->setSubprogram(nullptr);
LLVM_DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n"
<< "From: " << *F);
+ uint64_t LargestVectorWidth = 0;
+ for (auto *I : Params)
+ if (auto *VT = dyn_cast<llvm::VectorType>(I))
+ LargestVectorWidth = std::max(
+ LargestVectorWidth, VT->getPrimitiveSizeInBits().getKnownMinSize());
+
// Recompute the parameter attributes list based on the new arguments for
// the function.
NF->setAttributes(AttributeList::get(F->getContext(), PAL.getFnAttrs(),
PAL.getRetAttrs(), ArgAttrVec));
+ AttributeFuncs::updateMinLegalVectorWidthAttr(*NF, LargestVectorWidth);
ArgAttrVec.clear();
F->getParent()->getFunctionList().insert(F->getIterator(), NF);
NF->takeName(F);
- // Loop over all of the callers of the function, transforming the call sites
- // to pass in the loaded pointers.
- //
+ // Loop over all the callers of the function, transforming the call sites to
+ // pass in the loaded pointers.
SmallVector<Value *, 16> Args;
const DataLayout &DL = F->getParent()->getDataLayout();
while (!F->use_empty()) {
@@ -250,74 +228,34 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
// Loop over the operands, inserting GEP and loads in the caller as
// appropriate.
- auto AI = CB.arg_begin();
+ auto *AI = CB.arg_begin();
ArgNo = 0;
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
- ++I, ++AI, ++ArgNo)
- if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
+ ++I, ++AI, ++ArgNo) {
+ if (!ArgsToPromote.count(&*I)) {
Args.push_back(*AI); // Unmodified argument
ArgAttrVec.push_back(CallPAL.getParamAttrs(ArgNo));
- } else if (ByValArgsToTransform.count(&*I)) {
- // Emit a GEP and load for each element of the struct.
- Type *AgTy = I->getParamByValType();
- StructType *STy = cast<StructType>(AgTy);
- Value *Idxs[2] = {
- ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr};
- const StructLayout *SL = DL.getStructLayout(STy);
- Align StructAlign = *I->getParamAlign();
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
- Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
- auto *Idx =
- IRB.CreateGEP(STy, *AI, Idxs, (*AI)->getName() + "." + Twine(i));
- // TODO: Tell AA about the new values?
- Align Alignment =
- commonAlignment(StructAlign, SL->getElementOffset(i));
- Args.push_back(IRB.CreateAlignedLoad(
- STy->getElementType(i), Idx, Alignment, Idx->getName() + ".val"));
- ArgAttrVec.push_back(AttributeSet());
- }
} else if (!I->use_empty()) {
- // Non-dead argument: insert GEPs and loads as appropriate.
- ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
- // Store the Value* version of the indices in here, but declare it now
- // for reuse.
- std::vector<Value *> Ops;
- for (const auto &ArgIndex : ArgIndices) {
- Value *V = *AI;
- LoadInst *OrigLoad =
- OriginalLoads[std::make_pair(&*I, ArgIndex.second)];
- if (!ArgIndex.second.empty()) {
- Ops.reserve(ArgIndex.second.size());
- Type *ElTy = V->getType();
- for (auto II : ArgIndex.second) {
- // Use i32 to index structs, and i64 for others (pointers/arrays).
- // This satisfies GEP constraints.
- Type *IdxTy =
- (ElTy->isStructTy() ? Type::getInt32Ty(F->getContext())
- : Type::getInt64Ty(F->getContext()));
- Ops.push_back(ConstantInt::get(IdxTy, II));
- // Keep track of the type we're currently indexing.
- if (auto *ElPTy = dyn_cast<PointerType>(ElTy))
- ElTy = ElPTy->getPointerElementType();
- else
- ElTy = GetElementPtrInst::getTypeAtIndex(ElTy, II);
- }
- // And create a GEP to extract those indices.
- V = IRB.CreateGEP(ArgIndex.first, V, Ops, V->getName() + ".idx");
- Ops.clear();
+ Value *V = *AI;
+ const auto &ArgParts = ArgsToPromote.find(&*I)->second;
+ for (const auto &Pair : ArgParts) {
+ LoadInst *LI = IRB.CreateAlignedLoad(
+ Pair.second.Ty,
+ createByteGEP(IRB, DL, V, Pair.second.Ty, Pair.first),
+ Pair.second.Alignment, V->getName() + ".val");
+ if (Pair.second.MustExecInstr) {
+ LI->setAAMetadata(Pair.second.MustExecInstr->getAAMetadata());
+ LI->copyMetadata(*Pair.second.MustExecInstr,
+ {LLVMContext::MD_range, LLVMContext::MD_nonnull,
+ LLVMContext::MD_dereferenceable,
+ LLVMContext::MD_dereferenceable_or_null,
+ LLVMContext::MD_align, LLVMContext::MD_noundef});
}
- // Since we're replacing a load make sure we take the alignment
- // of the previous load.
- LoadInst *newLoad =
- IRB.CreateLoad(OrigLoad->getType(), V, V->getName() + ".val");
- newLoad->setAlignment(OrigLoad->getAlign());
- // Transfer the AA info too.
- newLoad->setAAMetadata(OrigLoad->getAAMetadata());
-
- Args.push_back(newLoad);
+ Args.push_back(LI);
ArgAttrVec.push_back(AttributeSet());
}
}
+ }
// Push any varargs arguments on the list.
for (; AI != CB.arg_end(); ++AI, ++ArgNo) {
@@ -345,9 +283,8 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
Args.clear();
ArgAttrVec.clear();
- // Update the callgraph to know that the callsite has been transformed.
- if (ReplaceCallSite)
- (*ReplaceCallSite)(CB, *NewCS);
+ AttributeFuncs::updateMinLegalVectorWidthAttr(*CB.getCaller(),
+ LargestVectorWidth);
if (!CB.use_empty()) {
CB.replaceAllUsesWith(NewCS);
@@ -364,11 +301,15 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
// function empty.
NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
+ // We will collect all the new created allocas to promote them into registers
+ // after the following loop
+ SmallVector<AllocaInst *, 4> Allocas;
+
// Loop over the argument list, transferring uses of the old arguments over to
// the new arguments, also transferring over the names as well.
Function::arg_iterator I2 = NF->arg_begin();
for (Argument &Arg : F->args()) {
- if (!ArgsToPromote.count(&Arg) && !ByValArgsToTransform.count(&Arg)) {
+ if (!ArgsToPromote.count(&Arg)) {
// If this is an unmodified argument, move the name and users over to the
// new version.
Arg.replaceAllUsesWith(&*I2);
@@ -377,37 +318,6 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
continue;
}
- if (ByValArgsToTransform.count(&Arg)) {
- // In the callee, we create an alloca, and store each of the new incoming
- // arguments into the alloca.
- Instruction *InsertPt = &NF->begin()->front();
-
- // Just add all the struct element types.
- Type *AgTy = Arg.getParamByValType();
- Align StructAlign = *Arg.getParamAlign();
- Value *TheAlloca = new AllocaInst(AgTy, DL.getAllocaAddrSpace(), nullptr,
- StructAlign, "", InsertPt);
- StructType *STy = cast<StructType>(AgTy);
- Value *Idxs[2] = {ConstantInt::get(Type::getInt32Ty(F->getContext()), 0),
- nullptr};
- const StructLayout *SL = DL.getStructLayout(STy);
-
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
- Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
- Value *Idx = GetElementPtrInst::Create(
- AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i),
- InsertPt);
- I2->setName(Arg.getName() + "." + Twine(i));
- Align Alignment = commonAlignment(StructAlign, SL->getElementOffset(i));
- new StoreInst(&*I2++, Idx, false, Alignment, InsertPt);
- }
-
- // Anything that used the arg should now use the alloca.
- Arg.replaceAllUsesWith(TheAlloca);
- TheAlloca->takeName(&Arg);
- continue;
- }
-
// There potentially are metadata uses for things like llvm.dbg.value.
// Replace them with undef, after handling the other regular uses.
auto RauwUndefMetadata = make_scope_exit(
@@ -416,57 +326,95 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
if (Arg.use_empty())
continue;
- // Otherwise, if we promoted this argument, then all users are load
- // instructions (or GEPs with only load users), and all loads should be
- // using the new argument that we added.
- ScalarizeTable &ArgIndices = ScalarizedElements[&Arg];
-
- while (!Arg.use_empty()) {
- if (LoadInst *LI = dyn_cast<LoadInst>(Arg.user_back())) {
- assert(ArgIndices.begin()->second.empty() &&
- "Load element should sort to front!");
- I2->setName(Arg.getName() + ".val");
- LI->replaceAllUsesWith(&*I2);
- LI->eraseFromParent();
- LLVM_DEBUG(dbgs() << "*** Promoted load of argument '" << Arg.getName()
- << "' in function '" << F->getName() << "'\n");
- } else {
- GetElementPtrInst *GEP = cast<GetElementPtrInst>(Arg.user_back());
- assert(!GEP->use_empty() &&
- "GEPs without uses should be cleaned up already");
- IndicesVector Operands;
- Operands.reserve(GEP->getNumIndices());
- for (const Use &Idx : GEP->indices())
- Operands.push_back(cast<ConstantInt>(Idx)->getSExtValue());
-
- // GEPs with a single 0 index can be merged with direct loads
- if (Operands.size() == 1 && Operands.front() == 0)
- Operands.clear();
-
- Function::arg_iterator TheArg = I2;
- for (ScalarizeTable::iterator It = ArgIndices.begin();
- It->second != Operands; ++It, ++TheArg) {
- assert(It != ArgIndices.end() && "GEP not handled??");
- }
+ // Otherwise, if we promoted this argument, we have to create an alloca in
+ // the callee for every promotable part and store each of the new incoming
+ // arguments into the corresponding alloca, what lets the old code (the
+ // store instructions if they are allowed especially) a chance to work as
+ // before.
+ assert(Arg.getType()->isPointerTy() &&
+ "Only arguments with a pointer type are promotable");
- TheArg->setName(formatv("{0}.{1:$[.]}.val", Arg.getName(),
- make_range(Operands.begin(), Operands.end())));
+ IRBuilder<NoFolder> IRB(&NF->begin()->front());
- LLVM_DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName()
- << "' of function '" << NF->getName() << "'\n");
+ // Add only the promoted elements, so parts from ArgsToPromote
+ SmallDenseMap<int64_t, AllocaInst *> OffsetToAlloca;
+ for (const auto &Pair : ArgsToPromote.find(&Arg)->second) {
+ int64_t Offset = Pair.first;
+ const ArgPart &Part = Pair.second;
- // All of the uses must be load instructions. Replace them all with
- // the argument specified by ArgNo.
- while (!GEP->use_empty()) {
- LoadInst *L = cast<LoadInst>(GEP->user_back());
- L->replaceAllUsesWith(&*TheArg);
- L->eraseFromParent();
- }
- GEP->eraseFromParent();
+ Argument *NewArg = I2++;
+ NewArg->setName(Arg.getName() + "." + Twine(Offset) + ".val");
+
+ AllocaInst *NewAlloca = IRB.CreateAlloca(
+ Part.Ty, nullptr, Arg.getName() + "." + Twine(Offset) + ".allc");
+ NewAlloca->setAlignment(Pair.second.Alignment);
+ IRB.CreateAlignedStore(NewArg, NewAlloca, Pair.second.Alignment);
+
+ // Collect the alloca to retarget the users to
+ OffsetToAlloca.insert({Offset, NewAlloca});
+ }
+
+ auto GetAlloca = [&](Value *Ptr) {
+ APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
+ Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset,
+ /* AllowNonInbounds */ true);
+ assert(Ptr == &Arg && "Not constant offset from arg?");
+ return OffsetToAlloca.lookup(Offset.getSExtValue());
+ };
+
+ // Cleanup the code from the dead instructions: GEPs and BitCasts in between
+ // the original argument and its users: loads and stores. Retarget every
+ // user to the new created alloca.
+ SmallVector<Value *, 16> Worklist;
+ SmallVector<Instruction *, 16> DeadInsts;
+ append_range(Worklist, Arg.users());
+ while (!Worklist.empty()) {
+ Value *V = Worklist.pop_back_val();
+ if (isa<BitCastInst>(V) || isa<GetElementPtrInst>(V)) {
+ DeadInsts.push_back(cast<Instruction>(V));
+ append_range(Worklist, V->users());
+ continue;
+ }
+
+ if (auto *LI = dyn_cast<LoadInst>(V)) {
+ Value *Ptr = LI->getPointerOperand();
+ LI->setOperand(LoadInst::getPointerOperandIndex(), GetAlloca(Ptr));
+ continue;
}
+
+ if (auto *SI = dyn_cast<StoreInst>(V)) {
+ assert(!SI->isVolatile() && "Volatile operations can't be promoted.");
+ Value *Ptr = SI->getPointerOperand();
+ SI->setOperand(StoreInst::getPointerOperandIndex(), GetAlloca(Ptr));
+ continue;
+ }
+
+ llvm_unreachable("Unexpected user");
+ }
+
+ for (Instruction *I : DeadInsts) {
+ I->replaceAllUsesWith(PoisonValue::get(I->getType()));
+ I->eraseFromParent();
}
- // Increment I2 past all of the arguments added for this promoted pointer.
- std::advance(I2, ArgIndices.size());
+
+ // Collect the allocas for promotion
+ for (const auto &Pair : OffsetToAlloca) {
+ assert(isAllocaPromotable(Pair.second) &&
+ "By design, only promotable allocas should be produced.");
+ Allocas.push_back(Pair.second);
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "ARG PROMOTION: " << Allocas.size()
+ << " alloca(s) are promotable by Mem2Reg\n");
+
+ if (!Allocas.empty()) {
+ // And we are able to call the `promoteMemoryToRegister()` function.
+ // Our earlier checks have ensured that PromoteMemToReg() will
+ // succeed.
+ auto &DT = FAM.getResult<DominatorTreeAnalysis>(*NF);
+ auto &AC = FAM.getResult<AssumptionAnalysis>(*NF);
+ PromoteMemToReg(Allocas, DT, &AC);
}
return NF;
@@ -474,100 +422,37 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
/// Return true if we can prove that all callees pass in a valid pointer for the
/// specified function argument.
-static bool allCallersPassValidPointerForArgument(Argument *Arg, Type *Ty) {
+static bool allCallersPassValidPointerForArgument(Argument *Arg,
+ Align NeededAlign,
+ uint64_t NeededDerefBytes) {
Function *Callee = Arg->getParent();
const DataLayout &DL = Callee->getParent()->getDataLayout();
+ APInt Bytes(64, NeededDerefBytes);
- unsigned ArgNo = Arg->getArgNo();
+ // Check if the argument itself is marked dereferenceable and aligned.
+ if (isDereferenceableAndAlignedPointer(Arg, NeededAlign, Bytes, DL))
+ return true;
// Look at all call sites of the function. At this point we know we only have
// direct callees.
- for (User *U : Callee->users()) {
+ return all_of(Callee->users(), [&](User *U) {
CallBase &CB = cast<CallBase>(*U);
-
- if (!isDereferenceablePointer(CB.getArgOperand(ArgNo), Ty, DL))
- return false;
- }
- return true;
+ return isDereferenceableAndAlignedPointer(CB.getArgOperand(Arg->getArgNo()),
+ NeededAlign, Bytes, DL);
+ });
}
-/// Returns true if Prefix is a prefix of longer. That means, Longer has a size
-/// that is greater than or equal to the size of prefix, and each of the
-/// elements in Prefix is the same as the corresponding elements in Longer.
-///
-/// This means it also returns true when Prefix and Longer are equal!
-static bool isPrefix(const IndicesVector &Prefix, const IndicesVector &Longer) {
- if (Prefix.size() > Longer.size())
- return false;
- return std::equal(Prefix.begin(), Prefix.end(), Longer.begin());
-}
-
-/// Checks if Indices, or a prefix of Indices, is in Set.
-static bool prefixIn(const IndicesVector &Indices,
- std::set<IndicesVector> &Set) {
- std::set<IndicesVector>::iterator Low;
- Low = Set.upper_bound(Indices);
- if (Low != Set.begin())
- Low--;
- // Low is now the last element smaller than or equal to Indices. This means
- // it points to a prefix of Indices (possibly Indices itself), if such
- // prefix exists.
- //
- // This load is safe if any prefix of its operands is safe to load.
- return Low != Set.end() && isPrefix(*Low, Indices);
-}
-
-/// Mark the given indices (ToMark) as safe in the given set of indices
-/// (Safe). Marking safe usually means adding ToMark to Safe. However, if there
-/// is already a prefix of Indices in Safe, Indices are implicitely marked safe
-/// already. Furthermore, any indices that Indices is itself a prefix of, are
-/// removed from Safe (since they are implicitely safe because of Indices now).
-static void markIndicesSafe(const IndicesVector &ToMark,
- std::set<IndicesVector> &Safe) {
- std::set<IndicesVector>::iterator Low;
- Low = Safe.upper_bound(ToMark);
- // Guard against the case where Safe is empty
- if (Low != Safe.begin())
- Low--;
- // Low is now the last element smaller than or equal to Indices. This
- // means it points to a prefix of Indices (possibly Indices itself), if
- // such prefix exists.
- if (Low != Safe.end()) {
- if (isPrefix(*Low, ToMark))
- // If there is already a prefix of these indices (or exactly these
- // indices) marked a safe, don't bother adding these indices
- return;
-
- // Increment Low, so we can use it as a "insert before" hint
- ++Low;
- }
- // Insert
- Low = Safe.insert(Low, ToMark);
- ++Low;
- // If there we're a prefix of longer index list(s), remove those
- std::set<IndicesVector>::iterator End = Safe.end();
- while (Low != End && isPrefix(ToMark, *Low)) {
- std::set<IndicesVector>::iterator Remove = Low;
- ++Low;
- Safe.erase(Remove);
- }
-}
-
-/// isSafeToPromoteArgument - As you might guess from the name of this method,
-/// it checks to see if it is both safe and useful to promote the argument.
-/// This method limits promotion of aggregates to only promote up to three
-/// elements of the aggregate in order to avoid exploding the number of
-/// arguments passed in.
-static bool isSafeToPromoteArgument(Argument *Arg, Type *ByValTy, AAResults &AAR,
- unsigned MaxElements) {
- using GEPIndicesSet = std::set<IndicesVector>;
-
+/// Determine that this argument is safe to promote, and find the argument
+/// parts it can be promoted into.
+static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
+ unsigned MaxElements, bool IsRecursive,
+ SmallVectorImpl<OffsetAndArgPart> &ArgPartsVec) {
// Quick exit for unused arguments
if (Arg->use_empty())
return true;
- // We can only promote this argument if all of the uses are loads, or are GEP
- // instructions (with constant indices) that are subsequently loaded.
+ // We can only promote this argument if all the uses are loads at known
+ // offsets.
//
// Promoting the argument causes it to be loaded in the caller
// unconditionally. This is only safe if we can prove that either the load
@@ -578,157 +463,193 @@ static bool isSafeToPromoteArgument(Argument *Arg, Type *ByValTy, AAResults &AAR
// anyway, in the latter case, invalid loads won't happen. This prevents us
// from introducing an invalid load that wouldn't have happened in the
// original code.
- //
- // This set will contain all sets of indices that are loaded in the entry
- // block, and thus are safe to unconditionally load in the caller.
- GEPIndicesSet SafeToUnconditionallyLoad;
-
- // This set contains all the sets of indices that we are planning to promote.
- // This makes it possible to limit the number of arguments added.
- GEPIndicesSet ToPromote;
-
- // If the pointer is always valid, any load with first index 0 is valid.
-
- if (ByValTy)
- SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
-
- // Whenever a new underlying type for the operand is found, make sure it's
- // consistent with the GEPs and loads we've already seen and, if necessary,
- // use it to see if all incoming pointers are valid (which implies the 0-index
- // is safe).
- Type *BaseTy = ByValTy;
- auto UpdateBaseTy = [&](Type *NewBaseTy) {
- if (BaseTy)
- return BaseTy == NewBaseTy;
-
- BaseTy = NewBaseTy;
- if (allCallersPassValidPointerForArgument(Arg, BaseTy)) {
- assert(SafeToUnconditionallyLoad.empty());
- SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
+
+ SmallDenseMap<int64_t, ArgPart, 4> ArgParts;
+ Align NeededAlign(1);
+ uint64_t NeededDerefBytes = 0;
+
+ // And if this is a byval argument we also allow to have store instructions.
+ // Only handle in such way arguments with specified alignment;
+ // if it's unspecified, the actual alignment of the argument is
+ // target-specific.
+ bool AreStoresAllowed = Arg->getParamByValType() && Arg->getParamAlign();
+
+ // An end user of a pointer argument is a load or store instruction.
+ // Returns None if this load or store is not based on the argument. Return
+ // true if we can promote the instruction, false otherwise.
+ auto HandleEndUser = [&](auto *I, Type *Ty,
+ bool GuaranteedToExecute) -> Optional<bool> {
+ // Don't promote volatile or atomic instructions.
+ if (!I->isSimple())
+ return false;
+
+ Value *Ptr = I->getPointerOperand();
+ APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
+ Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset,
+ /* AllowNonInbounds */ true);
+ if (Ptr != Arg)
+ return None;
+
+ if (Offset.getSignificantBits() >= 64)
+ return false;
+
+ TypeSize Size = DL.getTypeStoreSize(Ty);
+ // Don't try to promote scalable types.
+ if (Size.isScalable())
+ return false;
+
+ // If this is a recursive function and one of the types is a pointer,
+ // then promoting it might lead to recursive promotion.
+ if (IsRecursive && Ty->isPointerTy())
+ return false;
+
+ int64_t Off = Offset.getSExtValue();
+ auto Pair = ArgParts.try_emplace(
+ Off, ArgPart{Ty, I->getAlign(), GuaranteedToExecute ? I : nullptr});
+ ArgPart &Part = Pair.first->second;
+ bool OffsetNotSeenBefore = Pair.second;
+
+ // We limit promotion to only promoting up to a fixed number of elements of
+ // the aggregate.
+ if (MaxElements > 0 && ArgParts.size() > MaxElements) {
+ LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
+ << "more than " << MaxElements << " parts\n");
+ return false;
}
- return true;
- };
+ // For now, we only support loading/storing one specific type at a given
+ // offset.
+ if (Part.Ty != Ty) {
+ LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
+ << "accessed as both " << *Part.Ty << " and " << *Ty
+ << " at offset " << Off << "\n");
+ return false;
+ }
- // First, iterate functions that are guaranteed to execution on function
- // entry and mark loads of (geps of) arguments as safe.
- BasicBlock &EntryBlock = Arg->getParent()->front();
- // Declare this here so we can reuse it
- IndicesVector Indices;
- for (Instruction &I : EntryBlock) {
- if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
- Value *V = LI->getPointerOperand();
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) {
- V = GEP->getPointerOperand();
- if (V == Arg) {
- // This load actually loads (part of) Arg? Check the indices then.
- Indices.reserve(GEP->getNumIndices());
- for (Use &Idx : GEP->indices())
- if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx))
- Indices.push_back(CI->getSExtValue());
- else
- // We found a non-constant GEP index for this argument? Bail out
- // right away, can't promote this argument at all.
- return false;
-
- if (!UpdateBaseTy(GEP->getSourceElementType()))
- return false;
-
- // Indices checked out, mark them as safe
- markIndicesSafe(Indices, SafeToUnconditionallyLoad);
- Indices.clear();
- }
- } else if (V == Arg) {
- // Direct loads are equivalent to a GEP with a single 0 index.
- markIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad);
+ // If this instruction is not guaranteed to execute, and we haven't seen a
+ // load or store at this offset before (or it had lower alignment), then we
+ // need to remember that requirement.
+ // Note that skipping instructions of previously seen offsets is only
+ // correct because we only allow a single type for a given offset, which
+ // also means that the number of accessed bytes will be the same.
+ if (!GuaranteedToExecute &&
+ (OffsetNotSeenBefore || Part.Alignment < I->getAlign())) {
+ // We won't be able to prove dereferenceability for negative offsets.
+ if (Off < 0)
+ return false;
- if (BaseTy && LI->getType() != BaseTy)
- return false;
+ // If the offset is not aligned, an aligned base pointer won't help.
+ if (!isAligned(I->getAlign(), Off))
+ return false;
- BaseTy = LI->getType();
- }
+ NeededDerefBytes = std::max(NeededDerefBytes, Off + Size.getFixedValue());
+ NeededAlign = std::max(NeededAlign, I->getAlign());
}
+ Part.Alignment = std::max(Part.Alignment, I->getAlign());
+ return true;
+ };
+
+ // Look for loads and stores that are guaranteed to execute on entry.
+ for (Instruction &I : Arg->getParent()->getEntryBlock()) {
+ Optional<bool> Res{};
+ if (LoadInst *LI = dyn_cast<LoadInst>(&I))
+ Res = HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ true);
+ else if (StoreInst *SI = dyn_cast<StoreInst>(&I))
+ Res = HandleEndUser(SI, SI->getValueOperand()->getType(),
+ /* GuaranteedToExecute */ true);
+ if (Res && !*Res)
+ return false;
+
if (!isGuaranteedToTransferExecutionToSuccessor(&I))
break;
}
- // Now, iterate all uses of the argument to see if there are any uses that are
- // not (GEP+)loads, or any (GEP+)loads that are not safe to promote.
+ // Now look at all loads of the argument. Remember the load instructions
+ // for the aliasing check below.
+ SmallVector<const Use *, 16> Worklist;
+ SmallPtrSet<const Use *, 16> Visited;
SmallVector<LoadInst *, 16> Loads;
- IndicesVector Operands;
- for (Use &U : Arg->uses()) {
- User *UR = U.getUser();
- Operands.clear();
- if (LoadInst *LI = dyn_cast<LoadInst>(UR)) {
- // Don't hack volatile/atomic loads
- if (!LI->isSimple())
- return false;
- Loads.push_back(LI);
- // Direct loads are equivalent to a GEP with a zero index and then a load.
- Operands.push_back(0);
+ auto AppendUses = [&](const Value *V) {
+ for (const Use &U : V->uses())
+ if (Visited.insert(&U).second)
+ Worklist.push_back(&U);
+ };
+ AppendUses(Arg);
+ while (!Worklist.empty()) {
+ const Use *U = Worklist.pop_back_val();
+ Value *V = U->getUser();
+ if (isa<BitCastInst>(V)) {
+ AppendUses(V);
+ continue;
+ }
- if (!UpdateBaseTy(LI->getType()))
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(V)) {
+ if (!GEP->hasAllConstantIndices())
return false;
- } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(UR)) {
- if (GEP->use_empty()) {
- // Dead GEP's cause trouble later. Just remove them if we run into
- // them.
- continue;
- }
+ AppendUses(V);
+ continue;
+ }
- if (!UpdateBaseTy(GEP->getSourceElementType()))
+ if (auto *LI = dyn_cast<LoadInst>(V)) {
+ if (!*HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ false))
return false;
+ Loads.push_back(LI);
+ continue;
+ }
- // Ensure that all of the indices are constants.
- for (Use &Idx : GEP->indices())
- if (ConstantInt *C = dyn_cast<ConstantInt>(Idx))
- Operands.push_back(C->getSExtValue());
- else
- return false; // Not a constant operand GEP!
-
- // Ensure that the only users of the GEP are load instructions.
- for (User *GEPU : GEP->users())
- if (LoadInst *LI = dyn_cast<LoadInst>(GEPU)) {
- // Don't hack volatile/atomic loads
- if (!LI->isSimple())
- return false;
- Loads.push_back(LI);
- } else {
- // Other uses than load?
- return false;
- }
- } else {
- return false; // Not a load or a GEP.
+ // Stores are allowed for byval arguments
+ auto *SI = dyn_cast<StoreInst>(V);
+ if (AreStoresAllowed && SI &&
+ U->getOperandNo() == StoreInst::getPointerOperandIndex()) {
+ if (!*HandleEndUser(SI, SI->getValueOperand()->getType(),
+ /* GuaranteedToExecute */ false))
+ return false;
+ continue;
+ // Only stores TO the argument is allowed, all the other stores are
+ // unknown users
}
- // Now, see if it is safe to promote this load / loads of this GEP. Loading
- // is safe if Operands, or a prefix of Operands, is marked as safe.
- if (!prefixIn(Operands, SafeToUnconditionallyLoad))
- return false;
+ // Unknown user.
+ LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
+ << "unknown user " << *V << "\n");
+ return false;
+ }
- // See if we are already promoting a load with these indices. If not, check
- // to make sure that we aren't promoting too many elements. If so, nothing
- // to do.
- if (ToPromote.find(Operands) == ToPromote.end()) {
- if (MaxElements > 0 && ToPromote.size() == MaxElements) {
- LLVM_DEBUG(dbgs() << "argpromotion not promoting argument '"
- << Arg->getName()
- << "' because it would require adding more "
- << "than " << MaxElements
- << " arguments to the function.\n");
- // We limit aggregate promotion to only promoting up to a fixed number
- // of elements of the aggregate.
- return false;
- }
- ToPromote.insert(std::move(Operands));
+ if (NeededDerefBytes || NeededAlign > 1) {
+ // Try to prove a required deref / aligned requirement.
+ if (!allCallersPassValidPointerForArgument(Arg, NeededAlign,
+ NeededDerefBytes)) {
+ LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
+ << "not dereferenceable or aligned\n");
+ return false;
}
}
- if (Loads.empty())
+ if (ArgParts.empty())
return true; // No users, this is a dead argument.
- // Okay, now we know that the argument is only used by load instructions and
+ // Sort parts by offset.
+ append_range(ArgPartsVec, ArgParts);
+ sort(ArgPartsVec,
+ [](const auto &A, const auto &B) { return A.first < B.first; });
+
+ // Make sure the parts are non-overlapping.
+ int64_t Offset = ArgPartsVec[0].first;
+ for (const auto &Pair : ArgPartsVec) {
+ if (Pair.first < Offset)
+ return false; // Overlap with previous part.
+
+ Offset = Pair.first + DL.getTypeStoreSize(Pair.second.Ty);
+ }
+
+ // If store instructions are allowed, the path from the entry of the function
+ // to each load may be not free of instructions that potentially invalidate
+ // the load, and this is an admissible situation.
+ if (AreStoresAllowed)
+ return true;
+
+ // Okay, now we know that the argument is only used by load instructions, and
// it is safe to unconditionally perform all of them. Use alias analysis to
// check to see if the pointer is guaranteed to not be modified from entry of
// the function to each of the load instructions.
@@ -762,118 +683,31 @@ static bool isSafeToPromoteArgument(Argument *Arg, Type *ByValTy, AAResults &AAR
return true;
}
-bool ArgumentPromotionPass::isDenselyPacked(Type *type, const DataLayout &DL) {
- // There is no size information, so be conservative.
- if (!type->isSized())
- return false;
-
- // If the alloc size is not equal to the storage size, then there are padding
- // bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128.
- if (DL.getTypeSizeInBits(type) != DL.getTypeAllocSizeInBits(type))
- return false;
-
- // FIXME: This isn't the right way to check for padding in vectors with
- // non-byte-size elements.
- if (VectorType *seqTy = dyn_cast<VectorType>(type))
- return isDenselyPacked(seqTy->getElementType(), DL);
-
- // For array types, check for padding within members.
- if (ArrayType *seqTy = dyn_cast<ArrayType>(type))
- return isDenselyPacked(seqTy->getElementType(), DL);
-
- if (!isa<StructType>(type))
- return true;
-
- // Check for padding within and between elements of a struct.
- StructType *StructTy = cast<StructType>(type);
- const StructLayout *Layout = DL.getStructLayout(StructTy);
- uint64_t StartPos = 0;
- for (unsigned i = 0, E = StructTy->getNumElements(); i < E; ++i) {
- Type *ElTy = StructTy->getElementType(i);
- if (!isDenselyPacked(ElTy, DL))
- return false;
- if (StartPos != Layout->getElementOffsetInBits(i))
- return false;
- StartPos += DL.getTypeAllocSizeInBits(ElTy);
- }
-
- return true;
-}
-
-/// Checks if the padding bytes of an argument could be accessed.
-static bool canPaddingBeAccessed(Argument *arg) {
- assert(arg->hasByValAttr());
-
- // Track all the pointers to the argument to make sure they are not captured.
- SmallPtrSet<Value *, 16> PtrValues;
- PtrValues.insert(arg);
-
- // Track all of the stores.
- SmallVector<StoreInst *, 16> Stores;
-
- // Scan through the uses recursively to make sure the pointer is always used
- // sanely.
- SmallVector<Value *, 16> WorkList(arg->users());
- while (!WorkList.empty()) {
- Value *V = WorkList.pop_back_val();
- if (isa<GetElementPtrInst>(V) || isa<PHINode>(V)) {
- if (PtrValues.insert(V).second)
- llvm::append_range(WorkList, V->users());
- } else if (StoreInst *Store = dyn_cast<StoreInst>(V)) {
- Stores.push_back(Store);
- } else if (!isa<LoadInst>(V)) {
- return true;
- }
- }
-
- // Check to make sure the pointers aren't captured
- for (StoreInst *Store : Stores)
- if (PtrValues.count(Store->getValueOperand()))
- return true;
-
- return false;
-}
-
-/// Check if callers and the callee \p F agree how promoted arguments would be
-/// passed. The ones that they do not agree on are eliminated from the sets but
-/// the return value has to be observed as well.
-static bool areFunctionArgsABICompatible(
- const Function &F, const TargetTransformInfo &TTI,
- SmallPtrSetImpl<Argument *> &ArgsToPromote,
- SmallPtrSetImpl<Argument *> &ByValArgsToTransform) {
- // TODO: Check individual arguments so we can promote a subset?
- SmallVector<Type *, 32> Types;
- for (Argument *Arg : ArgsToPromote)
- Types.push_back(Arg->getType()->getPointerElementType());
- for (Argument *Arg : ByValArgsToTransform)
- Types.push_back(Arg->getParamByValType());
-
- for (const Use &U : F.uses()) {
+/// Check if callers and callee agree on how promoted arguments would be
+/// passed.
+static bool areTypesABICompatible(ArrayRef<Type *> Types, const Function &F,
+ const TargetTransformInfo &TTI) {
+ return all_of(F.uses(), [&](const Use &U) {
CallBase *CB = dyn_cast<CallBase>(U.getUser());
if (!CB)
return false;
+
const Function *Caller = CB->getCaller();
const Function *Callee = CB->getCalledFunction();
- if (!TTI.areTypesABICompatible(Caller, Callee, Types))
- return false;
- }
- return true;
+ return TTI.areTypesABICompatible(Caller, Callee, Types);
+ });
}
/// PromoteArguments - This method checks the specified function to see if there
/// are any promotable arguments and if it is safe to promote the function (for
/// example, all callers are direct). If safe to promote some arguments, it
/// calls the DoPromotion method.
-static Function *
-promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
- unsigned MaxElements,
- Optional<function_ref<void(CallBase &OldCS, CallBase &NewCS)>>
- ReplaceCallSite,
- const TargetTransformInfo &TTI) {
+static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM,
+ unsigned MaxElements, bool IsRecursive) {
// Don't perform argument promotion for naked functions; otherwise we can end
// up removing parameters that are seemingly 'not used' as they are referred
// to in the assembly.
- if(F->hasFnAttribute(Attribute::Naked))
+ if (F->hasFnAttribute(Attribute::Naked))
return nullptr;
// Make sure that it is local to this module.
@@ -903,20 +737,20 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
// Second check: make sure that all callers are direct callers. We can't
// transform functions that have indirect callers. Also see if the function
- // is self-recursive and check that target features are compatible.
- bool isSelfRecursive = false;
+ // is self-recursive.
for (Use &U : F->uses()) {
CallBase *CB = dyn_cast<CallBase>(U.getUser());
// Must be a direct call.
- if (CB == nullptr || !CB->isCallee(&U))
+ if (CB == nullptr || !CB->isCallee(&U) ||
+ CB->getFunctionType() != F->getFunctionType())
return nullptr;
// Can't change signature of musttail callee
if (CB->isMustTailCall())
return nullptr;
- if (CB->getParent()->getParent() == F)
- isSelfRecursive = true;
+ if (CB->getFunction() == F)
+ IsRecursive = true;
}
// Can't change signature of musttail caller
@@ -926,16 +760,13 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
return nullptr;
const DataLayout &DL = F->getParent()->getDataLayout();
-
- AAResults &AAR = AARGetter(*F);
+ auto &AAR = FAM.getResult<AAManager>(*F);
+ const auto &TTI = FAM.getResult<TargetIRAnalysis>(*F);
// Check to see which arguments are promotable. If an argument is promotable,
// add it to ArgsToPromote.
- SmallPtrSet<Argument *, 8> ArgsToPromote;
- SmallPtrSet<Argument *, 8> ByValArgsToTransform;
+ DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>> ArgsToPromote;
for (Argument *PtrArg : PointerArgs) {
- Type *AgTy = PtrArg->getType()->getPointerElementType();
-
// Replace sret attribute with noalias. This reduces register pressure by
// avoiding a register copy.
if (PtrArg->hasStructRetAttr()) {
@@ -949,72 +780,25 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
}
}
- // If this is a byval argument, and if the aggregate type is small, just
- // pass the elements, which is always safe, if the passed value is densely
- // packed or if we can prove the padding bytes are never accessed.
- //
- // Only handle arguments with specified alignment; if it's unspecified, the
- // actual alignment of the argument is target-specific.
- bool isSafeToPromote = PtrArg->hasByValAttr() && PtrArg->getParamAlign() &&
- (ArgumentPromotionPass::isDenselyPacked(AgTy, DL) ||
- !canPaddingBeAccessed(PtrArg));
- if (isSafeToPromote) {
- if (StructType *STy = dyn_cast<StructType>(AgTy)) {
- if (MaxElements > 0 && STy->getNumElements() > MaxElements) {
- LLVM_DEBUG(dbgs() << "argpromotion disable promoting argument '"
- << PtrArg->getName()
- << "' because it would require adding more"
- << " than " << MaxElements
- << " arguments to the function.\n");
- continue;
- }
-
- // If all the elements are single-value types, we can promote it.
- bool AllSimple = true;
- for (const auto *EltTy : STy->elements()) {
- if (!EltTy->isSingleValueType()) {
- AllSimple = false;
- break;
- }
- }
+ // If we can promote the pointer to its value.
+ SmallVector<OffsetAndArgPart, 4> ArgParts;
- // Safe to transform, don't even bother trying to "promote" it.
- // Passing the elements as a scalar will allow sroa to hack on
- // the new alloca we introduce.
- if (AllSimple) {
- ByValArgsToTransform.insert(PtrArg);
- continue;
- }
- }
- }
+ if (findArgParts(PtrArg, DL, AAR, MaxElements, IsRecursive, ArgParts)) {
+ SmallVector<Type *, 4> Types;
+ for (const auto &Pair : ArgParts)
+ Types.push_back(Pair.second.Ty);
- // If the argument is a recursive type and we're in a recursive
- // function, we could end up infinitely peeling the function argument.
- if (isSelfRecursive) {
- if (StructType *STy = dyn_cast<StructType>(AgTy)) {
- bool RecursiveType =
- llvm::is_contained(STy->elements(), PtrArg->getType());
- if (RecursiveType)
- continue;
+ if (areTypesABICompatible(Types, *F, TTI)) {
+ ArgsToPromote.insert({PtrArg, std::move(ArgParts)});
}
}
-
- // Otherwise, see if we can promote the pointer to its value.
- Type *ByValTy =
- PtrArg->hasByValAttr() ? PtrArg->getParamByValType() : nullptr;
- if (isSafeToPromoteArgument(PtrArg, ByValTy, AAR, MaxElements))
- ArgsToPromote.insert(PtrArg);
}
// No promotable pointer arguments.
- if (ArgsToPromote.empty() && ByValArgsToTransform.empty())
+ if (ArgsToPromote.empty())
return nullptr;
- if (!areFunctionArgsABICompatible(
- *F, TTI, ArgsToPromote, ByValArgsToTransform))
- return nullptr;
-
- return doPromotion(F, ArgsToPromote, ByValArgsToTransform, ReplaceCallSite);
+ return doPromotion(F, FAM, ArgsToPromote);
}
PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C,
@@ -1030,19 +814,10 @@ PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C,
FunctionAnalysisManager &FAM =
AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
+ bool IsRecursive = C.size() > 1;
for (LazyCallGraph::Node &N : C) {
Function &OldF = N.getFunction();
-
- // FIXME: This lambda must only be used with this function. We should
- // skip the lambda and just get the AA results directly.
- auto AARGetter = [&](Function &F) -> AAResults & {
- assert(&F == &OldF && "Called with an unexpected function!");
- return FAM.getResult<AAManager>(F);
- };
-
- const TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(OldF);
- Function *NewF =
- promoteArguments(&OldF, AARGetter, MaxElements, None, TTI);
+ Function *NewF = promoteArguments(&OldF, FAM, MaxElements, IsRecursive);
if (!NewF)
continue;
LocalChange = true;
@@ -1077,111 +852,3 @@ PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C,
PA.preserveSet<AllAnalysesOn<Function>>();
return PA;
}
-
-namespace {
-
-/// ArgPromotion - The 'by reference' to 'by value' argument promotion pass.
-struct ArgPromotion : public CallGraphSCCPass {
- // Pass identification, replacement for typeid
- static char ID;
-
- explicit ArgPromotion(unsigned MaxElements = 3)
- : CallGraphSCCPass(ID), MaxElements(MaxElements) {
- initializeArgPromotionPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- getAAResultsAnalysisUsage(AU);
- CallGraphSCCPass::getAnalysisUsage(AU);
- }
-
- bool runOnSCC(CallGraphSCC &SCC) override;
-
-private:
- using llvm::Pass::doInitialization;
-
- bool doInitialization(CallGraph &CG) override;
-
- /// The maximum number of elements to expand, or 0 for unlimited.
- unsigned MaxElements;
-};
-
-} // end anonymous namespace
-
-char ArgPromotion::ID = 0;
-
-INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion",
- "Promote 'by reference' arguments to scalars", false,
- false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_END(ArgPromotion, "argpromotion",
- "Promote 'by reference' arguments to scalars", false, false)
-
-Pass *llvm::createArgumentPromotionPass(unsigned MaxElements) {
- return new ArgPromotion(MaxElements);
-}
-
-bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
- if (skipSCC(SCC))
- return false;
-
- // Get the callgraph information that we need to update to reflect our
- // changes.
- CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
-
- LegacyAARGetter AARGetter(*this);
-
- bool Changed = false, LocalChange;
-
- // Iterate until we stop promoting from this SCC.
- do {
- LocalChange = false;
- // Attempt to promote arguments from all functions in this SCC.
- for (CallGraphNode *OldNode : SCC) {
- Function *OldF = OldNode->getFunction();
- if (!OldF)
- continue;
-
- auto ReplaceCallSite = [&](CallBase &OldCS, CallBase &NewCS) {
- Function *Caller = OldCS.getParent()->getParent();
- CallGraphNode *NewCalleeNode =
- CG.getOrInsertFunction(NewCS.getCalledFunction());
- CallGraphNode *CallerNode = CG[Caller];
- CallerNode->replaceCallEdge(cast<CallBase>(OldCS),
- cast<CallBase>(NewCS), NewCalleeNode);
- };
-
- const TargetTransformInfo &TTI =
- getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*OldF);
- if (Function *NewF = promoteArguments(OldF, AARGetter, MaxElements,
- {ReplaceCallSite}, TTI)) {
- LocalChange = true;
-
- // Update the call graph for the newly promoted function.
- CallGraphNode *NewNode = CG.getOrInsertFunction(NewF);
- NewNode->stealCalledFunctionsFrom(OldNode);
- if (OldNode->getNumReferences() == 0)
- delete CG.removeFunctionFromModule(OldNode);
- else
- OldF->setLinkage(Function::ExternalLinkage);
-
- // And updat ethe SCC we're iterating as well.
- SCC.ReplaceNode(OldNode, NewNode);
- }
- }
- // Remember that we changed something.
- Changed |= LocalChange;
- } while (LocalChange);
-
- return Changed;
-}
-
-bool ArgPromotion::doInitialization(CallGraph &CG) {
- return CallGraphSCCPass::doInitialization(CG);
-}
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index d66140a726f6..b05b7990e3f0 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -15,29 +15,25 @@
#include "llvm/Transforms/IPO/Attributor.h"
-#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/InlineCost.h"
-#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/MustExecute.h"
-#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/NoFolder.h"
#include "llvm/IR/ValueHandle.h"
-#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -50,6 +46,10 @@
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
+#ifdef EXPENSIVE_CHECKS
+#include "llvm/IR/Verifier.h"
+#endif
+
#include <cassert>
#include <string>
@@ -123,13 +123,13 @@ static cl::list<std::string>
SeedAllowList("attributor-seed-allow-list", cl::Hidden,
cl::desc("Comma seperated list of attribute names that are "
"allowed to be seeded."),
- cl::ZeroOrMore, cl::CommaSeparated);
+ cl::CommaSeparated);
static cl::list<std::string> FunctionSeedAllowList(
"attributor-function-seed-allow-list", cl::Hidden,
cl::desc("Comma seperated list of function names that are "
"allowed to be seeded."),
- cl::ZeroOrMore, cl::CommaSeparated);
+ cl::CommaSeparated);
#endif
static cl::opt<bool>
@@ -209,33 +209,25 @@ bool AA::isNoSyncInst(Attributor &A, const Instruction &I,
}
bool AA::isDynamicallyUnique(Attributor &A, const AbstractAttribute &QueryingAA,
- const Value &V) {
- if (auto *C = dyn_cast<Constant>(&V))
- return !C->isThreadDependent();
- // TODO: Inspect and cache more complex instructions.
- if (auto *CB = dyn_cast<CallBase>(&V))
- return CB->getNumOperands() == 0 && !CB->mayHaveSideEffects() &&
- !CB->mayReadFromMemory();
- const Function *Scope = nullptr;
- if (auto *I = dyn_cast<Instruction>(&V))
- Scope = I->getFunction();
- if (auto *A = dyn_cast<Argument>(&V))
- Scope = A->getParent();
- if (!Scope)
+ const Value &V, bool ForAnalysisOnly) {
+ // TODO: See the AAInstanceInfo class comment.
+ if (!ForAnalysisOnly)
return false;
- auto &NoRecurseAA = A.getAAFor<AANoRecurse>(
- QueryingAA, IRPosition::function(*Scope), DepClassTy::OPTIONAL);
- return NoRecurseAA.isAssumedNoRecurse();
+ auto &InstanceInfoAA = A.getAAFor<AAInstanceInfo>(
+ QueryingAA, IRPosition::value(V), DepClassTy::OPTIONAL);
+ return InstanceInfoAA.isAssumedUniqueForAnalysis();
}
Constant *AA::getInitialValueForObj(Value &Obj, Type &Ty,
const TargetLibraryInfo *TLI) {
if (isa<AllocaInst>(Obj))
return UndefValue::get(&Ty);
- if (isAllocationFn(&Obj, TLI))
- return getInitialValueOfAllocation(&cast<CallBase>(Obj), TLI, &Ty);
+ if (Constant *Init = getInitialValueOfAllocation(&Obj, TLI, &Ty))
+ return Init;
auto *GV = dyn_cast<GlobalVariable>(&Obj);
- if (!GV || !GV->hasLocalLinkage())
+ if (!GV)
+ return nullptr;
+ if (!GV->hasLocalLinkage() && !(GV->isConstant() && GV->hasInitializer()))
return nullptr;
if (!GV->hasInitializer())
return UndefValue::get(&Ty);
@@ -252,19 +244,29 @@ bool AA::isValidInScope(const Value &V, const Function *Scope) {
return false;
}
-bool AA::isValidAtPosition(const Value &V, const Instruction &CtxI,
+bool AA::isValidAtPosition(const AA::ValueAndContext &VAC,
InformationCache &InfoCache) {
- if (isa<Constant>(V))
+ if (isa<Constant>(VAC.getValue()) || VAC.getValue() == VAC.getCtxI())
return true;
- const Function *Scope = CtxI.getFunction();
- if (auto *A = dyn_cast<Argument>(&V))
+ const Function *Scope = nullptr;
+ const Instruction *CtxI = VAC.getCtxI();
+ if (CtxI)
+ Scope = CtxI->getFunction();
+ if (auto *A = dyn_cast<Argument>(VAC.getValue()))
return A->getParent() == Scope;
- if (auto *I = dyn_cast<Instruction>(&V))
+ if (auto *I = dyn_cast<Instruction>(VAC.getValue())) {
if (I->getFunction() == Scope) {
- const DominatorTree *DT =
- InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(*Scope);
- return DT && DT->dominates(I, &CtxI);
+ if (const DominatorTree *DT =
+ InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(
+ *Scope))
+ return DT->dominates(I, CtxI);
+ // Local dominance check mostly for the old PM passes.
+ if (CtxI && I->getParent() == CtxI->getParent())
+ return llvm::any_of(
+ make_range(I->getIterator(), I->getParent()->end()),
+ [&](const Instruction &AfterI) { return &AfterI == CtxI; });
}
+ }
return false;
}
@@ -295,11 +297,11 @@ AA::combineOptionalValuesInAAValueLatice(const Optional<Value *> &A,
const Optional<Value *> &B, Type *Ty) {
if (A == B)
return A;
- if (!B.hasValue())
+ if (!B)
return A;
if (*B == nullptr)
return nullptr;
- if (!A.hasValue())
+ if (!A)
return Ty ? getWithType(**B, *Ty) : nullptr;
if (*A == nullptr)
return nullptr;
@@ -314,21 +316,33 @@ AA::combineOptionalValuesInAAValueLatice(const Optional<Value *> &A,
return nullptr;
}
-bool AA::getPotentialCopiesOfStoredValue(
- Attributor &A, StoreInst &SI, SmallSetVector<Value *, 4> &PotentialCopies,
- const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation) {
+template <bool IsLoad, typename Ty>
+static bool getPotentialCopiesOfMemoryValue(
+ Attributor &A, Ty &I, SmallSetVector<Value *, 4> &PotentialCopies,
+ SmallSetVector<Instruction *, 4> &PotentialValueOrigins,
+ const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation,
+ bool OnlyExact) {
+ LLVM_DEBUG(dbgs() << "Trying to determine the potential copies of " << I
+ << " (only exact: " << OnlyExact << ")\n";);
- Value &Ptr = *SI.getPointerOperand();
+ Value &Ptr = *I.getPointerOperand();
SmallVector<Value *, 8> Objects;
- if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, QueryingAA, &SI)) {
+ if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, QueryingAA, &I,
+ UsedAssumedInformation)) {
LLVM_DEBUG(
dbgs() << "Underlying objects stored into could not be determined\n";);
return false;
}
+ // Containers to remember the pointer infos and new copies while we are not
+ // sure that we can find all of them. If we abort we want to avoid spurious
+ // dependences and potential copies in the provided container.
SmallVector<const AAPointerInfo *> PIs;
SmallVector<Value *> NewCopies;
+ SmallVector<Instruction *> NewCopyOrigins;
+ const auto *TLI =
+ A.getInfoCache().getTargetLibraryInfoForFunction(*I.getFunction());
for (Value *Obj : Objects) {
LLVM_DEBUG(dbgs() << "Visit underlying object " << *Obj << "\n");
if (isa<UndefValue>(Obj))
@@ -336,7 +350,7 @@ bool AA::getPotentialCopiesOfStoredValue(
if (isa<ConstantPointerNull>(Obj)) {
// A null pointer access can be undefined but any offset from null may
// be OK. We do not try to optimize the latter.
- if (!NullPointerIsDefined(SI.getFunction(),
+ if (!NullPointerIsDefined(I.getFunction(),
Ptr.getType()->getPointerAddressSpace()) &&
A.getAssumedSimplified(Ptr, QueryingAA, UsedAssumedInformation) ==
Obj)
@@ -345,37 +359,74 @@ bool AA::getPotentialCopiesOfStoredValue(
dbgs() << "Underlying object is a valid nullptr, giving up.\n";);
return false;
}
+ // TODO: Use assumed noalias return.
if (!isa<AllocaInst>(Obj) && !isa<GlobalVariable>(Obj) &&
- !isNoAliasCall(Obj)) {
+ !(IsLoad ? isAllocationFn(Obj, TLI) : isNoAliasCall(Obj))) {
LLVM_DEBUG(dbgs() << "Underlying object is not supported yet: " << *Obj
<< "\n";);
return false;
}
if (auto *GV = dyn_cast<GlobalVariable>(Obj))
- if (!GV->hasLocalLinkage()) {
+ if (!GV->hasLocalLinkage() &&
+ !(GV->isConstant() && GV->hasInitializer())) {
LLVM_DEBUG(dbgs() << "Underlying object is global with external "
"linkage, not supported yet: "
<< *Obj << "\n";);
return false;
}
+ if (IsLoad) {
+ Value *InitialValue = AA::getInitialValueForObj(*Obj, *I.getType(), TLI);
+ if (!InitialValue)
+ return false;
+ NewCopies.push_back(InitialValue);
+ NewCopyOrigins.push_back(nullptr);
+ }
+
auto CheckAccess = [&](const AAPointerInfo::Access &Acc, bool IsExact) {
- if (!Acc.isRead())
+ if ((IsLoad && !Acc.isWrite()) || (!IsLoad && !Acc.isRead()))
+ return true;
+ if (IsLoad && Acc.isWrittenValueYetUndetermined())
return true;
- auto *LI = dyn_cast<LoadInst>(Acc.getRemoteInst());
- if (!LI) {
- LLVM_DEBUG(dbgs() << "Underlying object read through a non-load "
- "instruction not supported yet: "
- << *Acc.getRemoteInst() << "\n";);
+ if (OnlyExact && !IsExact &&
+ !isa_and_nonnull<UndefValue>(Acc.getWrittenValue())) {
+ LLVM_DEBUG(dbgs() << "Non exact access " << *Acc.getRemoteInst()
+ << ", abort!\n");
return false;
}
- NewCopies.push_back(LI);
+ if (IsLoad) {
+ assert(isa<LoadInst>(I) && "Expected load or store instruction only!");
+ if (!Acc.isWrittenValueUnknown()) {
+ NewCopies.push_back(Acc.getWrittenValue());
+ NewCopyOrigins.push_back(Acc.getRemoteInst());
+ return true;
+ }
+ auto *SI = dyn_cast<StoreInst>(Acc.getRemoteInst());
+ if (!SI) {
+ LLVM_DEBUG(dbgs() << "Underlying object written through a non-store "
+ "instruction not supported yet: "
+ << *Acc.getRemoteInst() << "\n";);
+ return false;
+ }
+ NewCopies.push_back(SI->getValueOperand());
+ NewCopyOrigins.push_back(SI);
+ } else {
+ assert(isa<StoreInst>(I) && "Expected load or store instruction only!");
+ auto *LI = dyn_cast<LoadInst>(Acc.getRemoteInst());
+ if (!LI && OnlyExact) {
+ LLVM_DEBUG(dbgs() << "Underlying object read through a non-load "
+ "instruction not supported yet: "
+ << *Acc.getRemoteInst() << "\n";);
+ return false;
+ }
+ NewCopies.push_back(Acc.getRemoteInst());
+ }
return true;
};
auto &PI = A.getAAFor<AAPointerInfo>(QueryingAA, IRPosition::value(*Obj),
DepClassTy::NONE);
- if (!PI.forallInterferingAccesses(SI, CheckAccess)) {
+ if (!PI.forallInterferingAccesses(A, QueryingAA, I, CheckAccess)) {
LLVM_DEBUG(
dbgs()
<< "Failed to verify all interfering accesses for underlying object: "
@@ -385,16 +436,40 @@ bool AA::getPotentialCopiesOfStoredValue(
PIs.push_back(&PI);
}
+ // Only if we were successful collection all potential copies we record
+ // dependences (on non-fix AAPointerInfo AAs). We also only then modify the
+ // given PotentialCopies container.
for (auto *PI : PIs) {
if (!PI->getState().isAtFixpoint())
UsedAssumedInformation = true;
A.recordDependence(*PI, QueryingAA, DepClassTy::OPTIONAL);
}
PotentialCopies.insert(NewCopies.begin(), NewCopies.end());
+ PotentialValueOrigins.insert(NewCopyOrigins.begin(), NewCopyOrigins.end());
return true;
}
+bool AA::getPotentiallyLoadedValues(
+ Attributor &A, LoadInst &LI, SmallSetVector<Value *, 4> &PotentialValues,
+ SmallSetVector<Instruction *, 4> &PotentialValueOrigins,
+ const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation,
+ bool OnlyExact) {
+ return getPotentialCopiesOfMemoryValue</* IsLoad */ true>(
+ A, LI, PotentialValues, PotentialValueOrigins, QueryingAA,
+ UsedAssumedInformation, OnlyExact);
+}
+
+bool AA::getPotentialCopiesOfStoredValue(
+ Attributor &A, StoreInst &SI, SmallSetVector<Value *, 4> &PotentialCopies,
+ const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation,
+ bool OnlyExact) {
+ SmallSetVector<Instruction *, 4> PotentialValueOrigins;
+ return getPotentialCopiesOfMemoryValue</* IsLoad */ false>(
+ A, SI, PotentialCopies, PotentialValueOrigins, QueryingAA,
+ UsedAssumedInformation, OnlyExact);
+}
+
static bool isAssumedReadOnlyOrReadNone(Attributor &A, const IRPosition &IRP,
const AbstractAttribute &QueryingAA,
bool RequireReadNone, bool &IsKnown) {
@@ -449,6 +524,8 @@ isPotentiallyReachable(Attributor &A, const Instruction &FromI,
SmallVector<const Instruction *> Worklist;
Worklist.push_back(&FromI);
+ const auto &NoRecurseAA = A.getAAFor<AANoRecurse>(
+ QueryingAA, IRPosition::function(ToFn), DepClassTy::OPTIONAL);
while (!Worklist.empty()) {
const Instruction *CurFromI = Worklist.pop_back_val();
if (!Visited.insert(CurFromI).second)
@@ -468,7 +545,8 @@ isPotentiallyReachable(Attributor &A, const Instruction &FromI,
<< *ToI << " [Intra]\n");
if (Result)
return true;
- continue;
+ if (NoRecurseAA.isAssumedNoRecurse())
+ continue;
}
// TODO: If we can go arbitrarily backwards we will eventually reach an
@@ -514,10 +592,10 @@ isPotentiallyReachable(Attributor &A, const Instruction &FromI,
return true;
};
- bool AllCallSitesKnown;
+ bool UsedAssumedInformation = false;
Result = !A.checkForAllCallSites(CheckCallSite, *FromFn,
/* RequireAllCallSites */ true,
- &QueryingAA, AllCallSitesKnown);
+ &QueryingAA, UsedAssumedInformation);
if (Result) {
LLVM_DEBUG(dbgs() << "[AA] stepping back to call sites from " << *CurFromI
<< " in @" << FromFn->getName()
@@ -631,7 +709,7 @@ Argument *IRPosition::getAssociatedArgument() const {
assert(ACS.getCalledFunction()->arg_size() > u &&
"ACS mapped into var-args arguments!");
- if (CBCandidateArg.hasValue()) {
+ if (CBCandidateArg) {
CBCandidateArg = nullptr;
break;
}
@@ -640,7 +718,7 @@ Argument *IRPosition::getAssociatedArgument() const {
}
// If we found a unique callback candidate argument, return it.
- if (CBCandidateArg.hasValue() && CBCandidateArg.getValue())
+ if (CBCandidateArg && CBCandidateArg.getValue())
return CBCandidateArg.getValue();
// If no callbacks were found, or none used the underlying call site operand
@@ -949,22 +1027,24 @@ Attributor::getAssumedConstant(const IRPosition &IRP,
bool &UsedAssumedInformation) {
// First check all callbacks provided by outside AAs. If any of them returns
// a non-null value that is different from the associated value, or None, we
- // assume it's simpliied.
+ // assume it's simplified.
for (auto &CB : SimplificationCallbacks.lookup(IRP)) {
Optional<Value *> SimplifiedV = CB(IRP, &AA, UsedAssumedInformation);
- if (!SimplifiedV.hasValue())
+ if (!SimplifiedV)
return llvm::None;
if (isa_and_nonnull<Constant>(*SimplifiedV))
return cast<Constant>(*SimplifiedV);
return nullptr;
}
+ if (auto *C = dyn_cast<Constant>(&IRP.getAssociatedValue()))
+ return C;
const auto &ValueSimplifyAA =
getAAFor<AAValueSimplify>(AA, IRP, DepClassTy::NONE);
Optional<Value *> SimplifiedV =
ValueSimplifyAA.getAssumedSimplifiedValue(*this);
bool IsKnown = ValueSimplifyAA.isAtFixpoint();
UsedAssumedInformation |= !IsKnown;
- if (!SimplifiedV.hasValue()) {
+ if (!SimplifiedV) {
recordDependence(ValueSimplifyAA, AA, DepClassTy::OPTIONAL);
return llvm::None;
}
@@ -987,18 +1067,18 @@ Attributor::getAssumedSimplified(const IRPosition &IRP,
bool &UsedAssumedInformation) {
// First check all callbacks provided by outside AAs. If any of them returns
// a non-null value that is different from the associated value, or None, we
- // assume it's simpliied.
+ // assume it's simplified.
for (auto &CB : SimplificationCallbacks.lookup(IRP))
return CB(IRP, AA, UsedAssumedInformation);
- // If no high-level/outside simplification occured, use AAValueSimplify.
+ // If no high-level/outside simplification occurred, use AAValueSimplify.
const auto &ValueSimplifyAA =
getOrCreateAAFor<AAValueSimplify>(IRP, AA, DepClassTy::NONE);
Optional<Value *> SimplifiedV =
ValueSimplifyAA.getAssumedSimplifiedValue(*this);
bool IsKnown = ValueSimplifyAA.isAtFixpoint();
UsedAssumedInformation |= !IsKnown;
- if (!SimplifiedV.hasValue()) {
+ if (!SimplifiedV) {
if (AA)
recordDependence(ValueSimplifyAA, *AA, DepClassTy::OPTIONAL);
return llvm::None;
@@ -1017,7 +1097,7 @@ Attributor::getAssumedSimplified(const IRPosition &IRP,
Optional<Value *> Attributor::translateArgumentToCallSiteContent(
Optional<Value *> V, CallBase &CB, const AbstractAttribute &AA,
bool &UsedAssumedInformation) {
- if (!V.hasValue())
+ if (!V)
return V;
if (*V == nullptr || isa<Constant>(*V))
return V;
@@ -1078,6 +1158,19 @@ bool Attributor::isAssumedDead(const Use &U,
BasicBlock *IncomingBB = PHI->getIncomingBlock(U);
return isAssumedDead(*IncomingBB->getTerminator(), QueryingAA, FnLivenessAA,
UsedAssumedInformation, CheckBBLivenessOnly, DepClass);
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
+ if (!CheckBBLivenessOnly && SI->getPointerOperand() != U.get()) {
+ const IRPosition IRP = IRPosition::inst(*SI);
+ const AAIsDead &IsDeadAA =
+ getOrCreateAAFor<AAIsDead>(IRP, QueryingAA, DepClassTy::NONE);
+ if (IsDeadAA.isRemovableStore()) {
+ if (QueryingAA)
+ recordDependence(IsDeadAA, *QueryingAA, DepClass);
+ if (!IsDeadAA.isKnown(AAIsDead::IS_REMOVABLE))
+ UsedAssumedInformation = true;
+ return true;
+ }
+ }
}
return isAssumedDead(IRPosition::inst(*UserI), QueryingAA, FnLivenessAA,
@@ -1191,6 +1284,7 @@ bool Attributor::checkForAllUses(
function_ref<bool(const Use &, bool &)> Pred,
const AbstractAttribute &QueryingAA, const Value &V,
bool CheckBBLivenessOnly, DepClassTy LivenessDepClass,
+ bool IgnoreDroppableUses,
function_ref<bool(const Use &OldU, const Use &NewU)> EquivalentUseCB) {
// Check the trivial case first as it catches void values.
@@ -1231,7 +1325,7 @@ bool Attributor::checkForAllUses(
LLVM_DEBUG(dbgs() << "[Attributor] Dead use, skip!\n");
continue;
}
- if (U->getUser()->isDroppable()) {
+ if (IgnoreDroppableUses && U->getUser()->isDroppable()) {
LLVM_DEBUG(dbgs() << "[Attributor] Droppable user, skip!\n");
continue;
}
@@ -1241,9 +1335,9 @@ bool Attributor::checkForAllUses(
if (!Visited.insert(U).second)
continue;
SmallSetVector<Value *, 4> PotentialCopies;
- if (AA::getPotentialCopiesOfStoredValue(*this, *SI, PotentialCopies,
- QueryingAA,
- UsedAssumedInformation)) {
+ if (AA::getPotentialCopiesOfStoredValue(
+ *this, *SI, PotentialCopies, QueryingAA, UsedAssumedInformation,
+ /* OnlyExact */ true)) {
LLVM_DEBUG(dbgs() << "[Attributor] Value is stored, continue with "
<< PotentialCopies.size()
<< " potential copies instead!\n");
@@ -1277,7 +1371,7 @@ bool Attributor::checkForAllUses(
bool Attributor::checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred,
const AbstractAttribute &QueryingAA,
bool RequireAllCallSites,
- bool &AllCallSitesKnown) {
+ bool &UsedAssumedInformation) {
// We can try to determine information from
// the call sites. However, this is only possible all call sites are known,
// hence the function has internal linkage.
@@ -1286,31 +1380,26 @@ bool Attributor::checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred,
if (!AssociatedFunction) {
LLVM_DEBUG(dbgs() << "[Attributor] No function associated with " << IRP
<< "\n");
- AllCallSitesKnown = false;
return false;
}
return checkForAllCallSites(Pred, *AssociatedFunction, RequireAllCallSites,
- &QueryingAA, AllCallSitesKnown);
+ &QueryingAA, UsedAssumedInformation);
}
bool Attributor::checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred,
const Function &Fn,
bool RequireAllCallSites,
const AbstractAttribute *QueryingAA,
- bool &AllCallSitesKnown) {
+ bool &UsedAssumedInformation) {
if (RequireAllCallSites && !Fn.hasLocalLinkage()) {
LLVM_DEBUG(
dbgs()
<< "[Attributor] Function " << Fn.getName()
<< " has no internal linkage, hence not all call sites are known\n");
- AllCallSitesKnown = false;
return false;
}
- // If we do not require all call sites we might not see all.
- AllCallSitesKnown = RequireAllCallSites;
-
SmallVector<const Use *, 8> Uses(make_pointer_range(Fn.uses()));
for (unsigned u = 0; u < Uses.size(); ++u) {
const Use &U = *Uses[u];
@@ -1322,15 +1411,13 @@ bool Attributor::checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred,
dbgs() << "[Attributor] Check use: " << *U << " in " << *U.getUser()
<< "\n";
});
- bool UsedAssumedInformation = false;
if (isAssumedDead(U, QueryingAA, nullptr, UsedAssumedInformation,
/* CheckBBLivenessOnly */ true)) {
LLVM_DEBUG(dbgs() << "[Attributor] Dead use, skip!\n");
continue;
}
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U.getUser())) {
- if (CE->isCast() && CE->getType()->isPointerTy() &&
- CE->getType()->getPointerElementType()->isFunctionTy()) {
+ if (CE->isCast() && CE->getType()->isPointerTy()) {
LLVM_DEBUG(
dbgs() << "[Attributor] Use, is constant cast expression, add "
<< CE->getNumUses()
@@ -1477,30 +1564,24 @@ static bool checkForAllInstructionsImpl(
}
bool Attributor::checkForAllInstructions(function_ref<bool(Instruction &)> Pred,
+ const Function *Fn,
const AbstractAttribute &QueryingAA,
const ArrayRef<unsigned> &Opcodes,
bool &UsedAssumedInformation,
bool CheckBBLivenessOnly,
bool CheckPotentiallyDead) {
-
- const IRPosition &IRP = QueryingAA.getIRPosition();
// Since we need to provide instructions we have to have an exact definition.
- const Function *AssociatedFunction = IRP.getAssociatedFunction();
- if (!AssociatedFunction)
- return false;
-
- if (AssociatedFunction->isDeclaration())
+ if (!Fn || Fn->isDeclaration())
return false;
// TODO: use the function scope once we have call site AAReturnedValues.
- const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction);
+ const IRPosition &QueryIRP = IRPosition::function(*Fn);
const auto *LivenessAA =
(CheckBBLivenessOnly || CheckPotentiallyDead)
? nullptr
: &(getAAFor<AAIsDead>(QueryingAA, QueryIRP, DepClassTy::NONE));
- auto &OpcodeInstMap =
- InfoCache.getOpcodeInstMapForFunction(*AssociatedFunction);
+ auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(*Fn);
if (!checkForAllInstructionsImpl(this, OpcodeInstMap, Pred, &QueryingAA,
LivenessAA, Opcodes, UsedAssumedInformation,
CheckBBLivenessOnly, CheckPotentiallyDead))
@@ -1509,6 +1590,19 @@ bool Attributor::checkForAllInstructions(function_ref<bool(Instruction &)> Pred,
return true;
}
+bool Attributor::checkForAllInstructions(function_ref<bool(Instruction &)> Pred,
+ const AbstractAttribute &QueryingAA,
+ const ArrayRef<unsigned> &Opcodes,
+ bool &UsedAssumedInformation,
+ bool CheckBBLivenessOnly,
+ bool CheckPotentiallyDead) {
+ const IRPosition &IRP = QueryingAA.getIRPosition();
+ const Function *AssociatedFunction = IRP.getAssociatedFunction();
+ return checkForAllInstructions(Pred, AssociatedFunction, QueryingAA, Opcodes,
+ UsedAssumedInformation, CheckBBLivenessOnly,
+ CheckPotentiallyDead);
+}
+
bool Attributor::checkForAllReadWriteInstructions(
function_ref<bool(Instruction &)> Pred, AbstractAttribute &QueryingAA,
bool &UsedAssumedInformation) {
@@ -1547,11 +1641,8 @@ void Attributor::runTillFixpoint() {
// the abstract analysis.
unsigned IterationCounter = 1;
- unsigned MaxFixedPointIterations;
- if (MaxFixpointIterations)
- MaxFixedPointIterations = MaxFixpointIterations.getValue();
- else
- MaxFixedPointIterations = SetFixpointIterations;
+ unsigned MaxIterations =
+ Configuration.MaxFixpointIterations.value_or(SetFixpointIterations);
SmallVector<AbstractAttribute *, 32> ChangedAAs;
SetVector<AbstractAttribute *> Worklist, InvalidAAs;
@@ -1636,21 +1727,20 @@ void Attributor::runTillFixpoint() {
QueryAAsAwaitingUpdate.end());
QueryAAsAwaitingUpdate.clear();
- } while (!Worklist.empty() && (IterationCounter++ < MaxFixedPointIterations ||
- VerifyMaxFixpointIterations));
+ } while (!Worklist.empty() &&
+ (IterationCounter++ < MaxIterations || VerifyMaxFixpointIterations));
- if (IterationCounter > MaxFixedPointIterations && !Worklist.empty()) {
+ if (IterationCounter > MaxIterations && !Functions.empty()) {
auto Remark = [&](OptimizationRemarkMissed ORM) {
return ORM << "Attributor did not reach a fixpoint after "
- << ore::NV("Iterations", MaxFixedPointIterations)
- << " iterations.";
+ << ore::NV("Iterations", MaxIterations) << " iterations.";
};
- Function *F = Worklist.front()->getIRPosition().getAssociatedFunction();
+ Function *F = Functions.front();
emitRemark<OptimizationRemarkMissed>(F, "FixedPoint", Remark);
}
LLVM_DEBUG(dbgs() << "\n[Attributor] Fixpoint iteration done after: "
- << IterationCounter << "/" << MaxFixpointIterations
+ << IterationCounter << "/" << MaxIterations
<< " iterations\n");
// Reset abstract arguments not settled in a sound fixpoint by now. This
@@ -1684,11 +1774,9 @@ void Attributor::runTillFixpoint() {
<< " abstract attributes.\n";
});
- if (VerifyMaxFixpointIterations &&
- IterationCounter != MaxFixedPointIterations) {
+ if (VerifyMaxFixpointIterations && IterationCounter != MaxIterations) {
errs() << "\n[Attributor] Fixpoint iteration done after: "
- << IterationCounter << "/" << MaxFixedPointIterations
- << " iterations\n";
+ << IterationCounter << "/" << MaxIterations << " iterations\n";
llvm_unreachable("The fixpoint was not reached with exactly the number of "
"specified iterations!");
}
@@ -1725,6 +1813,9 @@ ChangeStatus Attributor::manifestAttributes() {
if (!State.isValidState())
continue;
+ if (AA->getCtxI() && !isRunOn(*AA->getAnchorScope()))
+ continue;
+
// Skip dead code.
bool UsedAssumedInformation = false;
if (isAssumedDead(*AA, nullptr, UsedAssumedInformation,
@@ -1774,7 +1865,7 @@ ChangeStatus Attributor::manifestAttributes() {
void Attributor::identifyDeadInternalFunctions() {
// Early exit if we don't intend to delete functions.
- if (!DeleteFns)
+ if (!Configuration.DeleteFns)
return;
// Identify dead internal functions and delete them. This happens outside
@@ -1795,7 +1886,7 @@ void Attributor::identifyDeadInternalFunctions() {
if (!F)
continue;
- bool AllCallSitesKnown;
+ bool UsedAssumedInformation = false;
if (checkForAllCallSites(
[&](AbstractCallSite ACS) {
Function *Callee = ACS.getInstruction()->getFunction();
@@ -1803,7 +1894,7 @@ void Attributor::identifyDeadInternalFunctions() {
(Functions.count(Callee) && Callee->hasLocalLinkage() &&
!LiveInternalFns.count(Callee));
},
- *F, true, nullptr, AllCallSitesKnown)) {
+ *F, true, nullptr, UsedAssumedInformation)) {
continue;
}
@@ -1826,7 +1917,8 @@ ChangeStatus Attributor::cleanupIR() {
<< ToBeDeletedBlocks.size() << " blocks and "
<< ToBeDeletedInsts.size() << " instructions and "
<< ToBeChangedValues.size() << " values and "
- << ToBeChangedUses.size() << " uses. "
+ << ToBeChangedUses.size() << " uses. To insert "
+ << ToBeChangedToUnreachableInsts.size() << " unreachables."
<< "Preserve manifest added " << ManifestAddedBlocks.size()
<< " blocks\n");
@@ -1844,12 +1936,15 @@ ChangeStatus Attributor::cleanupIR() {
NewV = Entry.first;
} while (true);
+ Instruction *I = dyn_cast<Instruction>(U->getUser());
+ assert((!I || isRunOn(*I->getFunction())) &&
+ "Cannot replace an instruction outside the current SCC!");
+
// Do not replace uses in returns if the value is a must-tail call we will
// not delete.
- if (auto *RI = dyn_cast<ReturnInst>(U->getUser())) {
+ if (auto *RI = dyn_cast_or_null<ReturnInst>(I)) {
if (auto *CI = dyn_cast<CallInst>(OldV->stripPointerCasts()))
- if (CI->isMustTailCall() &&
- (!ToBeDeletedInsts.count(CI) || !isRunOn(*CI->getCaller())))
+ if (CI->isMustTailCall() && !ToBeDeletedInsts.count(CI))
return;
// If we rewrite a return and the new value is not an argument, strip the
// `returned` attribute as it is wrong now.
@@ -1859,8 +1954,8 @@ ChangeStatus Attributor::cleanupIR() {
}
// Do not perform call graph altering changes outside the SCC.
- if (auto *CB = dyn_cast<CallBase>(U->getUser()))
- if (CB->isCallee(U) && !isRunOn(*CB->getCaller()))
+ if (auto *CB = dyn_cast_or_null<CallBase>(I))
+ if (CB->isCallee(U))
return;
LLVM_DEBUG(dbgs() << "Use " << *NewV << " in " << *U->getUser()
@@ -1908,8 +2003,12 @@ ChangeStatus Attributor::cleanupIR() {
for (auto &U : OldV->uses())
if (Entry.second || !U.getUser()->isDroppable())
Uses.push_back(&U);
- for (Use *U : Uses)
+ for (Use *U : Uses) {
+ if (auto *I = dyn_cast<Instruction>(U->getUser()))
+ if (!isRunOn(*I->getFunction()))
+ continue;
ReplaceUse(U, NewV);
+ }
}
for (auto &V : InvokeWithDeadSuccessor)
@@ -1940,15 +2039,15 @@ ChangeStatus Attributor::cleanupIR() {
}
}
for (Instruction *I : TerminatorsToFold) {
- if (!isRunOn(*I->getFunction()))
- continue;
+ assert(isRunOn(*I->getFunction()) &&
+ "Cannot replace a terminator outside the current SCC!");
CGModifiedFunctions.insert(I->getFunction());
ConstantFoldTerminator(I->getParent());
}
for (auto &V : ToBeChangedToUnreachableInsts)
if (Instruction *I = dyn_cast_or_null<Instruction>(V)) {
- if (!isRunOn(*I->getFunction()))
- continue;
+ assert(isRunOn(*I->getFunction()) &&
+ "Cannot replace an instruction outside the current SCC!");
CGModifiedFunctions.insert(I->getFunction());
changeToUnreachable(I);
}
@@ -1956,10 +2055,10 @@ ChangeStatus Attributor::cleanupIR() {
for (auto &V : ToBeDeletedInsts) {
if (Instruction *I = dyn_cast_or_null<Instruction>(V)) {
if (auto *CB = dyn_cast<CallBase>(I)) {
- if (!isRunOn(*I->getFunction()))
- continue;
+ assert(isRunOn(*I->getFunction()) &&
+ "Cannot delete an instruction outside the current SCC!");
if (!isa<IntrinsicInst>(CB))
- CGUpdater.removeCallSite(*CB);
+ Configuration.CGUpdater.removeCallSite(*CB);
}
I->dropDroppableUses();
CGModifiedFunctions.insert(I->getFunction());
@@ -1972,9 +2071,7 @@ ChangeStatus Attributor::cleanupIR() {
}
}
- llvm::erase_if(DeadInsts, [&](WeakTrackingVH I) {
- return !I || !isRunOn(*cast<Instruction>(I)->getFunction());
- });
+ llvm::erase_if(DeadInsts, [&](WeakTrackingVH I) { return !I; });
LLVM_DEBUG({
dbgs() << "[Attributor] DeadInsts size: " << DeadInsts.size() << "\n";
@@ -2010,12 +2107,12 @@ ChangeStatus Attributor::cleanupIR() {
for (Function *Fn : CGModifiedFunctions)
if (!ToBeDeletedFunctions.count(Fn) && Functions.count(Fn))
- CGUpdater.reanalyzeFunction(*Fn);
+ Configuration.CGUpdater.reanalyzeFunction(*Fn);
for (Function *Fn : ToBeDeletedFunctions) {
if (!Functions.count(Fn))
continue;
- CGUpdater.removeFunction(*Fn);
+ Configuration.CGUpdater.removeFunction(*Fn);
}
if (!ToBeChangedUses.empty())
@@ -2254,7 +2351,7 @@ bool Attributor::internalizeFunctions(SmallPtrSetImpl<Function *> &FnSet,
bool Attributor::isValidFunctionSignatureRewrite(
Argument &Arg, ArrayRef<Type *> ReplacementTypes) {
- if (!RewriteSignatures)
+ if (!Configuration.RewriteSignatures)
return false;
Function *Fn = Arg.getParent();
@@ -2290,9 +2387,9 @@ bool Attributor::isValidFunctionSignatureRewrite(
}
// Avoid callbacks for now.
- bool AllCallSitesKnown;
+ bool UsedAssumedInformation = false;
if (!checkForAllCallSites(CallSiteCanBeChanged, *Fn, true, nullptr,
- AllCallSitesKnown)) {
+ UsedAssumedInformation)) {
LLVM_DEBUG(dbgs() << "[Attributor] Cannot rewrite all call sites\n");
return false;
}
@@ -2305,7 +2402,6 @@ bool Attributor::isValidFunctionSignatureRewrite(
// Forbid must-tail calls for now.
// TODO:
- bool UsedAssumedInformation = false;
auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(*Fn);
if (!checkForAllInstructionsImpl(nullptr, OpcodeInstMap, InstPred, nullptr,
nullptr, {Instruction::Call},
@@ -2370,7 +2466,7 @@ bool Attributor::shouldSeedAttribute(AbstractAttribute &AA) {
}
ChangeStatus Attributor::rewriteFunctionSignatures(
- SmallPtrSetImpl<Function *> &ModifiedFns) {
+ SmallSetVector<Function *, 8> &ModifiedFns) {
ChangeStatus Changed = ChangeStatus::UNCHANGED;
for (auto &It : ArgumentReplacementMap) {
@@ -2403,6 +2499,12 @@ ChangeStatus Attributor::rewriteFunctionSignatures(
}
}
+ uint64_t LargestVectorWidth = 0;
+ for (auto *I : NewArgumentTypes)
+ if (auto *VT = dyn_cast<llvm::VectorType>(I))
+ LargestVectorWidth = std::max(
+ LargestVectorWidth, VT->getPrimitiveSizeInBits().getKnownMinSize());
+
FunctionType *OldFnTy = OldFn->getFunctionType();
Type *RetTy = OldFnTy->getReturnType();
@@ -2432,6 +2534,7 @@ ChangeStatus Attributor::rewriteFunctionSignatures(
NewFn->setAttributes(AttributeList::get(
Ctx, OldFnAttributeList.getFnAttrs(), OldFnAttributeList.getRetAttrs(),
NewArgumentAttributes));
+ AttributeFuncs::updateMinLegalVectorWidthAttr(*NewFn, LargestVectorWidth);
// Since we have now created the new function, splice the body of the old
// function right into the new function, leaving the old rotting hulk of the
@@ -2509,14 +2612,17 @@ ChangeStatus Attributor::rewriteFunctionSignatures(
Ctx, OldCallAttributeList.getFnAttrs(),
OldCallAttributeList.getRetAttrs(), NewArgOperandAttributes));
+ AttributeFuncs::updateMinLegalVectorWidthAttr(*NewCB->getCaller(),
+ LargestVectorWidth);
+
CallSitePairs.push_back({OldCB, NewCB});
return true;
};
// Use the CallSiteReplacementCreator to create replacement call sites.
- bool AllCallSitesKnown;
+ bool UsedAssumedInformation = false;
bool Success = checkForAllCallSites(CallSiteReplacementCreator, *OldFn,
- true, nullptr, AllCallSitesKnown);
+ true, nullptr, UsedAssumedInformation);
(void)Success;
assert(Success && "Assumed call site replacement to succeed!");
@@ -2529,6 +2635,9 @@ ChangeStatus Attributor::rewriteFunctionSignatures(
ARIs[OldArgNum]) {
if (ARI->CalleeRepairCB)
ARI->CalleeRepairCB(*ARI, *NewFn, NewFnArgIt);
+ if (ARI->ReplacementTypes.empty())
+ OldFnArgIt->replaceAllUsesWith(
+ PoisonValue::get(OldFnArgIt->getType()));
NewFnArgIt += ARI->ReplacementTypes.size();
} else {
NewFnArgIt->takeName(&*OldFnArgIt);
@@ -2544,17 +2653,17 @@ ChangeStatus Attributor::rewriteFunctionSignatures(
assert(OldCB.getType() == NewCB.getType() &&
"Cannot handle call sites with different types!");
ModifiedFns.insert(OldCB.getFunction());
- CGUpdater.replaceCallSite(OldCB, NewCB);
+ Configuration.CGUpdater.replaceCallSite(OldCB, NewCB);
OldCB.replaceAllUsesWith(&NewCB);
OldCB.eraseFromParent();
}
// Replace the function in the call graph (if any).
- CGUpdater.replaceFunctionWith(*OldFn, *NewFn);
+ Configuration.CGUpdater.replaceFunctionWith(*OldFn, *NewFn);
// If the old function was modified and needed to be reanalyzed, the new one
// does now.
- if (ModifiedFns.erase(OldFn))
+ if (ModifiedFns.remove(OldFn))
ModifiedFns.insert(NewFn);
Changed = ChangeStatus::CHANGED;
@@ -2574,6 +2683,30 @@ void InformationCache::initializeInformationCache(const Function &CF,
// queried by abstract attributes during their initialization or update.
// This has to happen before we create attributes.
+ DenseMap<const Value *, Optional<short>> AssumeUsesMap;
+
+ // Add \p V to the assume uses map which track the number of uses outside of
+ // "visited" assumes. If no outside uses are left the value is added to the
+ // assume only use vector.
+ auto AddToAssumeUsesMap = [&](const Value &V) -> void {
+ SmallVector<const Instruction *> Worklist;
+ if (auto *I = dyn_cast<Instruction>(&V))
+ Worklist.push_back(I);
+ while (!Worklist.empty()) {
+ const Instruction *I = Worklist.pop_back_val();
+ Optional<short> &NumUses = AssumeUsesMap[I];
+ if (!NumUses)
+ NumUses = I->getNumUses();
+ NumUses = NumUses.getValue() - /* this assume */ 1;
+ if (NumUses.getValue() != 0)
+ continue;
+ AssumeOnlyValues.insert(I);
+ for (const Value *Op : I->operands())
+ if (auto *OpI = dyn_cast<Instruction>(Op))
+ Worklist.push_back(OpI);
+ }
+ };
+
for (Instruction &I : instructions(&F)) {
bool IsInterestingOpcode = false;
@@ -2594,6 +2727,7 @@ void InformationCache::initializeInformationCache(const Function &CF,
// For `must-tail` calls we remember the caller and callee.
if (auto *Assume = dyn_cast<AssumeInst>(&I)) {
fillMapFromAssume(*Assume, KnowledgeMap);
+ AddToAssumeUsesMap(*Assume->getArgOperand(0));
} else if (cast<CallInst>(I).isMustTailCall()) {
FI.ContainsMustTailCall = true;
if (const Function *Callee = cast<CallInst>(I).getCalledFunction())
@@ -2742,7 +2876,8 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
getOrCreateAAFor<AAIsDead>(RetPos);
// Every function might be simplified.
- getOrCreateAAFor<AAValueSimplify>(RetPos);
+ bool UsedAssumedInformation = false;
+ getAssumedSimplified(RetPos, nullptr, UsedAssumedInformation);
// Every returned value might be marked noundef.
getOrCreateAAFor<AANoUndef>(RetPos);
@@ -2834,7 +2969,8 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
if (!Callee->getReturnType()->isVoidTy() && !CB.use_empty()) {
IRPosition CBRetPos = IRPosition::callsite_returned(CB);
- getOrCreateAAFor<AAValueSimplify>(CBRetPos);
+ bool UsedAssumedInformation = false;
+ getAssumedSimplified(CBRetPos, nullptr, UsedAssumedInformation);
}
for (int I = 0, E = CB.arg_size(); I < E; ++I) {
@@ -2897,10 +3033,15 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
getOrCreateAAFor<AAAlign>(
IRPosition::value(*cast<LoadInst>(I).getPointerOperand()));
if (SimplifyAllLoads)
- getOrCreateAAFor<AAValueSimplify>(IRPosition::value(I));
- } else
- getOrCreateAAFor<AAAlign>(
- IRPosition::value(*cast<StoreInst>(I).getPointerOperand()));
+ getAssumedSimplified(IRPosition::value(I), nullptr,
+ UsedAssumedInformation);
+ } else {
+ auto &SI = cast<StoreInst>(I);
+ getOrCreateAAFor<AAIsDead>(IRPosition::inst(I));
+ getAssumedSimplified(IRPosition::value(*SI.getValueOperand()), nullptr,
+ UsedAssumedInformation);
+ getOrCreateAAFor<AAAlign>(IRPosition::value(*SI.getPointerOperand()));
+ }
return true;
};
Success = checkForAllInstructionsImpl(
@@ -2975,8 +3116,8 @@ raw_ostream &llvm::operator<<(raw_ostream &OS,
if (!S.isValidState())
OS << "full-set";
else {
- for (auto &it : S.getAssumedSet())
- OS << it << ", ";
+ for (auto &It : S.getAssumedSet())
+ OS << It << ", ";
if (S.undefIsContained())
OS << "undef ";
}
@@ -3018,8 +3159,12 @@ raw_ostream &llvm::operator<<(raw_ostream &OS,
OS << " [" << Acc.getKind() << "] " << *Acc.getRemoteInst();
if (Acc.getLocalInst() != Acc.getRemoteInst())
OS << " via " << *Acc.getLocalInst();
- if (Acc.getContent().hasValue())
- OS << " [" << *Acc.getContent() << "]";
+ if (Acc.getContent()) {
+ if (*Acc.getContent())
+ OS << " [" << **Acc.getContent() << "]";
+ else
+ OS << " [ <unknown> ]";
+ }
return OS;
}
///}
@@ -3032,7 +3177,7 @@ static bool runAttributorOnFunctions(InformationCache &InfoCache,
SetVector<Function *> &Functions,
AnalysisGetter &AG,
CallGraphUpdater &CGUpdater,
- bool DeleteFns) {
+ bool DeleteFns, bool IsModulePass) {
if (Functions.empty())
return false;
@@ -3045,8 +3190,10 @@ static bool runAttributorOnFunctions(InformationCache &InfoCache,
// Create an Attributor and initially empty information cache that is filled
// while we identify default attribute opportunities.
- Attributor A(Functions, InfoCache, CGUpdater, /* Allowed */ nullptr,
- DeleteFns);
+ AttributorConfig AC(CGUpdater);
+ AC.IsModulePass = IsModulePass;
+ AC.DeleteFns = DeleteFns;
+ Attributor A(Functions, InfoCache, AC);
// Create shallow wrappers for all functions that are not IPO amendable
if (AllowShallowWrappers)
@@ -3151,7 +3298,7 @@ PreservedAnalyses AttributorPass::run(Module &M, ModuleAnalysisManager &AM) {
BumpPtrAllocator Allocator;
InformationCache InfoCache(M, AG, Allocator, /* CGSCC */ nullptr);
if (runAttributorOnFunctions(InfoCache, Functions, AG, CGUpdater,
- /* DeleteFns */ true)) {
+ /* DeleteFns */ true, /* IsModulePass */ true)) {
// FIXME: Think about passes we will preserve and add them here.
return PreservedAnalyses::none();
}
@@ -3179,7 +3326,8 @@ PreservedAnalyses AttributorCGSCCPass::run(LazyCallGraph::SCC &C,
BumpPtrAllocator Allocator;
InformationCache InfoCache(M, AG, Allocator, /* CGSCC */ &Functions);
if (runAttributorOnFunctions(InfoCache, Functions, AG, CGUpdater,
- /* DeleteFns */ false)) {
+ /* DeleteFns */ false,
+ /* IsModulePass */ false)) {
// FIXME: Think about passes we will preserve and add them here.
PreservedAnalyses PA;
PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
@@ -3255,7 +3403,8 @@ struct AttributorLegacyPass : public ModulePass {
BumpPtrAllocator Allocator;
InformationCache InfoCache(M, AG, Allocator, /* CGSCC */ nullptr);
return runAttributorOnFunctions(InfoCache, Functions, AG, CGUpdater,
- /* DeleteFns*/ true);
+ /* DeleteFns*/ true,
+ /* IsModulePass */ true);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -3292,7 +3441,8 @@ struct AttributorCGSCCLegacyPass : public CallGraphSCCPass {
BumpPtrAllocator Allocator;
InformationCache InfoCache(M, AG, Allocator, /* CGSCC */ &Functions);
return runAttributorOnFunctions(InfoCache, Functions, AG, CGUpdater,
- /* DeleteFns */ false);
+ /* DeleteFns */ false,
+ /* IsModulePass */ false);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 2d88e329e093..4d99ce7e3175 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -14,9 +14,11 @@
#include "llvm/Transforms/IPO/Attributor.h"
#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -30,21 +32,29 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Argument.h"
#include "llvm/IR/Assumptions.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/NoFolder.h"
+#include "llvm/IR/Value.h"
+#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/IPO/ArgumentPromotion.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
#include <cassert>
using namespace llvm;
@@ -69,11 +79,11 @@ static cl::opt<unsigned, true> MaxPotentialValues(
cl::location(llvm::PotentialConstantIntValuesState::MaxPotentialValues),
cl::init(7));
-static cl::opt<unsigned>
- MaxInterferingWrites("attributor-max-interfering-writes", cl::Hidden,
- cl::desc("Maximum number of interfering writes to "
- "check before assuming all might interfere."),
- cl::init(6));
+static cl::opt<unsigned> MaxInterferingAccesses(
+ "attributor-max-interfering-accesses", cl::Hidden,
+ cl::desc("Maximum number of interfering accesses to "
+ "check before assuming all might interfere."),
+ cl::init(6));
STATISTIC(NumAAs, "Number of abstract attributes created");
@@ -140,6 +150,7 @@ PIPE_OPERATOR(AANonNull)
PIPE_OPERATOR(AANoAlias)
PIPE_OPERATOR(AADereferenceable)
PIPE_OPERATOR(AAAlign)
+PIPE_OPERATOR(AAInstanceInfo)
PIPE_OPERATOR(AANoCapture)
PIPE_OPERATOR(AAValueSimplify)
PIPE_OPERATOR(AANoFree)
@@ -150,7 +161,7 @@ PIPE_OPERATOR(AAMemoryLocation)
PIPE_OPERATOR(AAValueConstantRange)
PIPE_OPERATOR(AAPrivatizablePtr)
PIPE_OPERATOR(AAUndefinedBehavior)
-PIPE_OPERATOR(AAPotentialValues)
+PIPE_OPERATOR(AAPotentialConstantValues)
PIPE_OPERATOR(AANoUndef)
PIPE_OPERATOR(AACallEdges)
PIPE_OPERATOR(AAFunctionReachability)
@@ -170,6 +181,45 @@ ChangeStatus clampStateAndIndicateChange<DerefState>(DerefState &S,
} // namespace llvm
+/// Checks if a type could have padding bytes.
+static bool isDenselyPacked(Type *Ty, const DataLayout &DL) {
+ // There is no size information, so be conservative.
+ if (!Ty->isSized())
+ return false;
+
+ // If the alloc size is not equal to the storage size, then there are padding
+ // bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128.
+ if (DL.getTypeSizeInBits(Ty) != DL.getTypeAllocSizeInBits(Ty))
+ return false;
+
+ // FIXME: This isn't the right way to check for padding in vectors with
+ // non-byte-size elements.
+ if (VectorType *SeqTy = dyn_cast<VectorType>(Ty))
+ return isDenselyPacked(SeqTy->getElementType(), DL);
+
+ // For array types, check for padding within members.
+ if (ArrayType *SeqTy = dyn_cast<ArrayType>(Ty))
+ return isDenselyPacked(SeqTy->getElementType(), DL);
+
+ if (!isa<StructType>(Ty))
+ return true;
+
+ // Check for padding within and between elements of a struct.
+ StructType *StructTy = cast<StructType>(Ty);
+ const StructLayout *Layout = DL.getStructLayout(StructTy);
+ uint64_t StartPos = 0;
+ for (unsigned I = 0, E = StructTy->getNumElements(); I < E; ++I) {
+ Type *ElTy = StructTy->getElementType(I);
+ if (!isDenselyPacked(ElTy, DL))
+ return false;
+ if (StartPos != Layout->getElementOffsetInBits(I))
+ return false;
+ StartPos += DL.getTypeAllocSizeInBits(ElTy);
+ }
+
+ return true;
+}
+
/// Get pointer operand of memory accessing instruction. If \p I is
/// not a memory accessing instruction, return nullptr. If \p AllowVolatile,
/// is set to false and the instruction is volatile, return nullptr.
@@ -236,7 +286,8 @@ static Value *constructPointer(Type *ResTy, Type *PtrElemTy, Value *Ptr,
}
// Ensure the result has the requested type.
- Ptr = IRB.CreateBitOrPointerCast(Ptr, ResTy, Ptr->getName() + ".cast");
+ Ptr = IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr, ResTy,
+ Ptr->getName() + ".cast");
LLVM_DEBUG(dbgs() << "Constructed pointer: " << *Ptr << "\n");
return Ptr;
@@ -251,25 +302,32 @@ static Value *constructPointer(Type *ResTy, Type *PtrElemTy, Value *Ptr,
/// once. Note that the value used for the callback may still be the value
/// associated with \p IRP (due to PHIs). To limit how much effort is invested,
/// we will never visit more values than specified by \p MaxValues.
-/// If \p Intraprocedural is set to true only values valid in the scope of
-/// \p CtxI will be visited and simplification into other scopes is prevented.
+/// If \p VS does not contain the Interprocedural bit, only values valid in the
+/// scope of \p CtxI will be visited and simplification into other scopes is
+/// prevented.
template <typename StateTy>
static bool genericValueTraversal(
Attributor &A, IRPosition IRP, const AbstractAttribute &QueryingAA,
StateTy &State,
function_ref<bool(Value &, const Instruction *, StateTy &, bool)>
VisitValueCB,
- const Instruction *CtxI, bool UseValueSimplify = true, int MaxValues = 16,
+ const Instruction *CtxI, bool &UsedAssumedInformation,
+ bool UseValueSimplify = true, int MaxValues = 16,
function_ref<Value *(Value *)> StripCB = nullptr,
- bool Intraprocedural = false) {
+ AA::ValueScope VS = AA::Interprocedural) {
- const AAIsDead *LivenessAA = nullptr;
- if (IRP.getAnchorScope())
- LivenessAA = &A.getAAFor<AAIsDead>(
- QueryingAA,
- IRPosition::function(*IRP.getAnchorScope(), IRP.getCallBaseContext()),
- DepClassTy::NONE);
- bool AnyDead = false;
+ struct LivenessInfo {
+ const AAIsDead *LivenessAA = nullptr;
+ bool AnyDead = false;
+ };
+ SmallMapVector<const Function *, LivenessInfo, 4> LivenessAAs;
+ auto GetLivenessInfo = [&](const Function &F) -> LivenessInfo & {
+ LivenessInfo &LI = LivenessAAs[&F];
+ if (!LI.LivenessAA)
+ LI.LivenessAA = &A.getAAFor<AAIsDead>(QueryingAA, IRPosition::function(F),
+ DepClassTy::NONE);
+ return LI;
+ };
Value *InitialV = &IRP.getAssociatedValue();
using Item = std::pair<Value *, const Instruction *>;
@@ -319,10 +377,9 @@ static bool genericValueTraversal(
// Look through select instructions, visit assumed potential values.
if (auto *SI = dyn_cast<SelectInst>(V)) {
- bool UsedAssumedInformation = false;
Optional<Constant *> C = A.getAssumedConstant(
*SI->getCondition(), QueryingAA, UsedAssumedInformation);
- bool NoValueYet = !C.hasValue();
+ bool NoValueYet = !C;
if (NoValueYet || isa_and_nonnull<UndefValue>(*C))
continue;
if (auto *CI = dyn_cast_or_null<ConstantInt>(*C)) {
@@ -340,12 +397,12 @@ static bool genericValueTraversal(
// Look through phi nodes, visit all live operands.
if (auto *PHI = dyn_cast<PHINode>(V)) {
- assert(LivenessAA &&
- "Expected liveness in the presence of instructions!");
+ LivenessInfo &LI = GetLivenessInfo(*PHI->getFunction());
for (unsigned u = 0, e = PHI->getNumIncomingValues(); u < e; u++) {
BasicBlock *IncomingBB = PHI->getIncomingBlock(u);
- if (LivenessAA->isEdgeDead(IncomingBB, PHI->getParent())) {
- AnyDead = true;
+ if (LI.LivenessAA->isEdgeDead(IncomingBB, PHI->getParent())) {
+ LI.AnyDead = true;
+ UsedAssumedInformation |= !LI.LivenessAA->isAtFixpoint();
continue;
}
Worklist.push_back(
@@ -355,9 +412,9 @@ static bool genericValueTraversal(
}
if (auto *Arg = dyn_cast<Argument>(V)) {
- if (!Intraprocedural && !Arg->hasPassPointeeByValueCopyAttr()) {
+ if ((VS & AA::Interprocedural) && !Arg->hasPassPointeeByValueCopyAttr()) {
SmallVector<Item> CallSiteValues;
- bool AllCallSitesKnown = true;
+ bool UsedAssumedInformation = false;
if (A.checkForAllCallSites(
[&](AbstractCallSite ACS) {
// Callbacks might not have a corresponding call site operand,
@@ -368,7 +425,7 @@ static bool genericValueTraversal(
CallSiteValues.push_back({CSOp, ACS.getInstruction()});
return true;
},
- *Arg->getParent(), true, &QueryingAA, AllCallSitesKnown)) {
+ *Arg->getParent(), true, &QueryingAA, UsedAssumedInformation)) {
Worklist.append(CallSiteValues);
continue;
}
@@ -376,14 +433,13 @@ static bool genericValueTraversal(
}
if (UseValueSimplify && !isa<Constant>(V)) {
- bool UsedAssumedInformation = false;
Optional<Value *> SimpleV =
A.getAssumedSimplified(*V, QueryingAA, UsedAssumedInformation);
- if (!SimpleV.hasValue())
+ if (!SimpleV)
continue;
Value *NewV = SimpleV.getValue();
if (NewV && NewV != V) {
- if (!Intraprocedural || !CtxI ||
+ if ((VS & AA::Interprocedural) || !CtxI ||
AA::isValidInScope(*NewV, CtxI->getFunction())) {
Worklist.push_back({NewV, CtxI});
continue;
@@ -391,6 +447,37 @@ static bool genericValueTraversal(
}
}
+ if (auto *LI = dyn_cast<LoadInst>(V)) {
+ bool UsedAssumedInformation = false;
+ // If we ask for the potentially loaded values from the initial pointer we
+ // will simply end up here again. The load is as far as we can make it.
+ if (LI->getPointerOperand() != InitialV) {
+ SmallSetVector<Value *, 4> PotentialCopies;
+ SmallSetVector<Instruction *, 4> PotentialValueOrigins;
+ if (AA::getPotentiallyLoadedValues(A, *LI, PotentialCopies,
+ PotentialValueOrigins, QueryingAA,
+ UsedAssumedInformation,
+ /* OnlyExact */ true)) {
+ // Values have to be dynamically unique or we loose the fact that a
+ // single llvm::Value might represent two runtime values (e.g., stack
+ // locations in different recursive calls).
+ bool DynamicallyUnique =
+ llvm::all_of(PotentialCopies, [&A, &QueryingAA](Value *PC) {
+ return AA::isDynamicallyUnique(A, QueryingAA, *PC);
+ });
+ if (DynamicallyUnique &&
+ ((VS & AA::Interprocedural) || !CtxI ||
+ llvm::all_of(PotentialCopies, [CtxI](Value *PC) {
+ return AA::isValidInScope(*PC, CtxI->getFunction());
+ }))) {
+ for (auto *PotentialCopy : PotentialCopies)
+ Worklist.push_back({PotentialCopy, CtxI});
+ continue;
+ }
+ }
+ }
+ }
+
// Once a leaf is reached we inform the user through the callback.
if (!VisitValueCB(*V, CtxI, State, Iteration > 1)) {
LLVM_DEBUG(dbgs() << "Generic value traversal visit callback failed for: "
@@ -400,8 +487,10 @@ static bool genericValueTraversal(
} while (!Worklist.empty());
// If we actually used liveness information so we have to record a dependence.
- if (AnyDead)
- A.recordDependence(*LivenessAA, QueryingAA, DepClassTy::OPTIONAL);
+ for (auto &It : LivenessAAs)
+ if (It.second.AnyDead)
+ A.recordDependence(*It.second.LivenessAA, QueryingAA,
+ DepClassTy::OPTIONAL);
// All values have been visited.
return true;
@@ -411,7 +500,8 @@ bool AA::getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr,
SmallVectorImpl<Value *> &Objects,
const AbstractAttribute &QueryingAA,
const Instruction *CtxI,
- bool Intraprocedural) {
+ bool &UsedAssumedInformation,
+ AA::ValueScope VS) {
auto StripCB = [&](Value *V) { return getUnderlyingObject(V); };
SmallPtrSet<Value *, 8> SeenObjects;
auto VisitValueCB = [&SeenObjects](Value &Val, const Instruction *,
@@ -423,15 +513,16 @@ bool AA::getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr,
};
if (!genericValueTraversal<decltype(Objects)>(
A, IRPosition::value(Ptr), QueryingAA, Objects, VisitValueCB, CtxI,
- true, 32, StripCB, Intraprocedural))
+ UsedAssumedInformation, true, 32, StripCB, VS))
return false;
return true;
}
-const Value *stripAndAccumulateMinimalOffsets(
- Attributor &A, const AbstractAttribute &QueryingAA, const Value *Val,
- const DataLayout &DL, APInt &Offset, bool AllowNonInbounds,
- bool UseAssumed = false) {
+static const Value *
+stripAndAccumulateOffsets(Attributor &A, const AbstractAttribute &QueryingAA,
+ const Value *Val, const DataLayout &DL, APInt &Offset,
+ bool GetMinOffset, bool AllowNonInbounds,
+ bool UseAssumed = false) {
auto AttributorAnalysis = [&](Value &V, APInt &ROffset) -> bool {
const IRPosition &Pos = IRPosition::value(V);
@@ -442,14 +533,20 @@ const Value *stripAndAccumulateMinimalOffsets(
: DepClassTy::NONE);
ConstantRange Range = UseAssumed ? ValueConstantRangeAA.getAssumed()
: ValueConstantRangeAA.getKnown();
+ if (Range.isFullSet())
+ return false;
+
// We can only use the lower part of the range because the upper part can
// be higher than what the value can really be.
- ROffset = Range.getSignedMin();
+ if (GetMinOffset)
+ ROffset = Range.getSignedMin();
+ else
+ ROffset = Range.getSignedMax();
return true;
};
return Val->stripAndAccumulateConstantOffsets(DL, Offset, AllowNonInbounds,
- /* AllowInvariant */ false,
+ /* AllowInvariant */ true,
AttributorAnalysis);
}
@@ -458,8 +555,9 @@ getMinimalBaseOfPointer(Attributor &A, const AbstractAttribute &QueryingAA,
const Value *Ptr, int64_t &BytesOffset,
const DataLayout &DL, bool AllowNonInbounds = false) {
APInt OffsetAPInt(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
- const Value *Base = stripAndAccumulateMinimalOffsets(
- A, QueryingAA, Ptr, DL, OffsetAPInt, AllowNonInbounds);
+ const Value *Base =
+ stripAndAccumulateOffsets(A, QueryingAA, Ptr, DL, OffsetAPInt,
+ /* GetMinOffset */ true, AllowNonInbounds);
BytesOffset = OffsetAPInt.getSExtValue();
return Base;
@@ -493,10 +591,9 @@ static void clampReturnedValueStates(
LLVM_DEBUG(dbgs() << "[Attributor] RV: " << RV << " AA: " << AA.getAsStr()
<< " @ " << RVPos << "\n");
const StateType &AAS = AA.getState();
- if (T.hasValue())
- *T &= AAS;
- else
- T = AAS;
+ if (!T)
+ T = StateType::getBestState(AAS);
+ *T &= AAS;
LLVM_DEBUG(dbgs() << "[Attributor] AA State: " << AAS << " RV State: " << T
<< "\n");
return T->isValidState();
@@ -504,7 +601,7 @@ static void clampReturnedValueStates(
if (!A.checkForAllReturnedValues(CheckReturnValue, QueryingAA))
S.indicatePessimisticFixpoint();
- else if (T.hasValue())
+ else if (T)
S ^= *T;
}
@@ -560,20 +657,19 @@ static void clampCallSiteArgumentStates(Attributor &A, const AAType &QueryingAA,
LLVM_DEBUG(dbgs() << "[Attributor] ACS: " << *ACS.getInstruction()
<< " AA: " << AA.getAsStr() << " @" << ACSArgPos << "\n");
const StateType &AAS = AA.getState();
- if (T.hasValue())
- *T &= AAS;
- else
- T = AAS;
+ if (!T)
+ T = StateType::getBestState(AAS);
+ *T &= AAS;
LLVM_DEBUG(dbgs() << "[Attributor] AA State: " << AAS << " CSA State: " << T
<< "\n");
return T->isValidState();
};
- bool AllCallSitesKnown;
+ bool UsedAssumedInformation = false;
if (!A.checkForAllCallSites(CallSiteCheck, QueryingAA, true,
- AllCallSitesKnown))
+ UsedAssumedInformation))
S.indicatePessimisticFixpoint();
- else if (T.hasValue())
+ else if (T)
S ^= *T;
}
@@ -667,7 +763,6 @@ struct AACallSiteReturnedFromReturned : public BaseType {
return clampStateAndIndicateChange(S, AA.getState());
}
};
-} // namespace
/// Helper function to accumulate uses.
template <class AAType, typename StateType = typename AAType::StateType>
@@ -779,6 +874,7 @@ static void followUsesInMBEC(AAType &AA, Attributor &A, StateType &S,
S += ParentState;
}
}
+} // namespace
/// ------------------------ PointerInfo ---------------------------------------
@@ -786,9 +882,6 @@ namespace llvm {
namespace AA {
namespace PointerInfo {
-/// An access kind description as used by AAPointerInfo.
-struct OffsetAndSize;
-
struct State;
} // namespace PointerInfo
@@ -806,7 +899,7 @@ struct DenseMapInfo<AAPointerInfo::Access> : DenseMapInfo<Instruction *> {
/// Helper that allows OffsetAndSize as a key in a DenseMap.
template <>
-struct DenseMapInfo<AA::PointerInfo ::OffsetAndSize>
+struct DenseMapInfo<AAPointerInfo ::OffsetAndSize>
: DenseMapInfo<std::pair<int64_t, int64_t>> {};
/// Helper for AA::PointerInfo::Acccess DenseMap/Set usage ignoring everythign
@@ -822,90 +915,15 @@ struct AccessAsInstructionInfo : DenseMapInfo<Instruction *> {
} // namespace llvm
-/// Helper to represent an access offset and size, with logic to deal with
-/// uncertainty and check for overlapping accesses.
-struct AA::PointerInfo::OffsetAndSize : public std::pair<int64_t, int64_t> {
- using BaseTy = std::pair<int64_t, int64_t>;
- OffsetAndSize(int64_t Offset, int64_t Size) : BaseTy(Offset, Size) {}
- OffsetAndSize(const BaseTy &P) : BaseTy(P) {}
- int64_t getOffset() const { return first; }
- int64_t getSize() const { return second; }
- static OffsetAndSize getUnknown() { return OffsetAndSize(Unknown, Unknown); }
-
- /// Return true if offset or size are unknown.
- bool offsetOrSizeAreUnknown() const {
- return getOffset() == OffsetAndSize::Unknown ||
- getSize() == OffsetAndSize::Unknown;
- }
-
- /// Return true if this offset and size pair might describe an address that
- /// overlaps with \p OAS.
- bool mayOverlap(const OffsetAndSize &OAS) const {
- // Any unknown value and we are giving up -> overlap.
- if (offsetOrSizeAreUnknown() || OAS.offsetOrSizeAreUnknown())
- return true;
-
- // Check if one offset point is in the other interval [offset, offset+size].
- return OAS.getOffset() + OAS.getSize() > getOffset() &&
- OAS.getOffset() < getOffset() + getSize();
- }
-
- /// Constant used to represent unknown offset or sizes.
- static constexpr int64_t Unknown = 1 << 31;
-};
-
-/// Implementation of the DenseMapInfo.
-///
-///{
-inline llvm::AccessAsInstructionInfo::Access
-llvm::AccessAsInstructionInfo::getEmptyKey() {
- return Access(Base::getEmptyKey(), nullptr, AAPointerInfo::AK_READ, nullptr);
-}
-inline llvm::AccessAsInstructionInfo::Access
-llvm::AccessAsInstructionInfo::getTombstoneKey() {
- return Access(Base::getTombstoneKey(), nullptr, AAPointerInfo::AK_READ,
- nullptr);
-}
-unsigned llvm::AccessAsInstructionInfo::getHashValue(
- const llvm::AccessAsInstructionInfo::Access &A) {
- return Base::getHashValue(A.getRemoteInst());
-}
-bool llvm::AccessAsInstructionInfo::isEqual(
- const llvm::AccessAsInstructionInfo::Access &LHS,
- const llvm::AccessAsInstructionInfo::Access &RHS) {
- return LHS.getRemoteInst() == RHS.getRemoteInst();
-}
-inline llvm::DenseMapInfo<AAPointerInfo::Access>::Access
-llvm::DenseMapInfo<AAPointerInfo::Access>::getEmptyKey() {
- return AAPointerInfo::Access(nullptr, nullptr, AAPointerInfo::AK_READ,
- nullptr);
-}
-inline llvm::DenseMapInfo<AAPointerInfo::Access>::Access
-llvm::DenseMapInfo<AAPointerInfo::Access>::getTombstoneKey() {
- return AAPointerInfo::Access(nullptr, nullptr, AAPointerInfo::AK_WRITE,
- nullptr);
-}
-
-unsigned llvm::DenseMapInfo<AAPointerInfo::Access>::getHashValue(
- const llvm::DenseMapInfo<AAPointerInfo::Access>::Access &A) {
- return detail::combineHashValue(
- DenseMapInfo<Instruction *>::getHashValue(A.getRemoteInst()),
- (A.isWrittenValueYetUndetermined()
- ? ~0
- : DenseMapInfo<Value *>::getHashValue(A.getWrittenValue()))) +
- A.getKind();
-}
-
-bool llvm::DenseMapInfo<AAPointerInfo::Access>::isEqual(
- const llvm::DenseMapInfo<AAPointerInfo::Access>::Access &LHS,
- const llvm::DenseMapInfo<AAPointerInfo::Access>::Access &RHS) {
- return LHS == RHS;
-}
-///}
-
/// A type to track pointer/struct usage and accesses for AAPointerInfo.
struct AA::PointerInfo::State : public AbstractState {
+ ~State() {
+ // We do not delete the Accesses objects but need to destroy them still.
+ for (auto &It : AccessBins)
+ It.second->~Accesses();
+ }
+
/// Return the best possible representable state.
static State getBestState(const State &SIS) { return State(); }
@@ -916,9 +934,10 @@ struct AA::PointerInfo::State : public AbstractState {
return R;
}
- State() {}
- State(const State &SIS) : AccessBins(SIS.AccessBins) {}
- State(State &&SIS) : AccessBins(std::move(SIS.AccessBins)) {}
+ State() = default;
+ State(State &&SIS) : AccessBins(std::move(SIS.AccessBins)) {
+ SIS.AccessBins.clear();
+ }
const State &getAssumed() const { return *this; }
@@ -967,15 +986,11 @@ struct AA::PointerInfo::State : public AbstractState {
return false;
auto &Accs = It->getSecond();
auto &RAccs = RIt->getSecond();
- if (Accs.size() != RAccs.size())
+ if (Accs->size() != RAccs->size())
return false;
- auto AccIt = Accs.begin(), RAccIt = RAccs.begin(), AccE = Accs.end();
- while (AccIt != AccE) {
- if (*AccIt != *RAccIt)
+ for (const auto &ZipIt : llvm::zip(*Accs, *RAccs))
+ if (std::get<0>(ZipIt) != std::get<1>(ZipIt))
return false;
- ++AccIt;
- ++RAccIt;
- }
++It;
++RIt;
}
@@ -984,42 +999,88 @@ struct AA::PointerInfo::State : public AbstractState {
bool operator!=(const State &R) const { return !(*this == R); }
/// We store accesses in a set with the instruction as key.
- using Accesses = DenseSet<AAPointerInfo::Access, AccessAsInstructionInfo>;
+ struct Accesses {
+ SmallVector<AAPointerInfo::Access, 4> Accesses;
+ DenseMap<const Instruction *, unsigned> Map;
+
+ unsigned size() const { return Accesses.size(); }
+
+ using vec_iterator = decltype(Accesses)::iterator;
+ vec_iterator begin() { return Accesses.begin(); }
+ vec_iterator end() { return Accesses.end(); }
+
+ using iterator = decltype(Map)::const_iterator;
+ iterator find(AAPointerInfo::Access &Acc) {
+ return Map.find(Acc.getRemoteInst());
+ }
+ iterator find_end() { return Map.end(); }
+
+ AAPointerInfo::Access &get(iterator &It) {
+ return Accesses[It->getSecond()];
+ }
+
+ void insert(AAPointerInfo::Access &Acc) {
+ Map[Acc.getRemoteInst()] = Accesses.size();
+ Accesses.push_back(Acc);
+ }
+ };
/// We store all accesses in bins denoted by their offset and size.
- using AccessBinsTy = DenseMap<OffsetAndSize, Accesses>;
+ using AccessBinsTy = DenseMap<AAPointerInfo::OffsetAndSize, Accesses *>;
AccessBinsTy::const_iterator begin() const { return AccessBins.begin(); }
AccessBinsTy::const_iterator end() const { return AccessBins.end(); }
protected:
/// The bins with all the accesses for the associated pointer.
- DenseMap<OffsetAndSize, Accesses> AccessBins;
+ AccessBinsTy AccessBins;
/// Add a new access to the state at offset \p Offset and with size \p Size.
/// The access is associated with \p I, writes \p Content (if anything), and
/// is of kind \p Kind.
/// \Returns CHANGED, if the state changed, UNCHANGED otherwise.
- ChangeStatus addAccess(int64_t Offset, int64_t Size, Instruction &I,
- Optional<Value *> Content,
+ ChangeStatus addAccess(Attributor &A, int64_t Offset, int64_t Size,
+ Instruction &I, Optional<Value *> Content,
AAPointerInfo::AccessKind Kind, Type *Ty,
Instruction *RemoteI = nullptr,
Accesses *BinPtr = nullptr) {
- OffsetAndSize Key{Offset, Size};
- Accesses &Bin = BinPtr ? *BinPtr : AccessBins[Key];
+ AAPointerInfo::OffsetAndSize Key{Offset, Size};
+ Accesses *&Bin = BinPtr ? BinPtr : AccessBins[Key];
+ if (!Bin)
+ Bin = new (A.Allocator) Accesses;
AAPointerInfo::Access Acc(&I, RemoteI ? RemoteI : &I, Content, Kind, Ty);
// Check if we have an access for this instruction in this bin, if not,
// simply add it.
- auto It = Bin.find(Acc);
- if (It == Bin.end()) {
- Bin.insert(Acc);
+ auto It = Bin->find(Acc);
+ if (It == Bin->find_end()) {
+ Bin->insert(Acc);
return ChangeStatus::CHANGED;
}
// If the existing access is the same as then new one, nothing changed.
- AAPointerInfo::Access Before = *It;
+ AAPointerInfo::Access &Current = Bin->get(It);
+ AAPointerInfo::Access Before = Current;
// The new one will be combined with the existing one.
- *It &= Acc;
- return *It == Before ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED;
+ Current &= Acc;
+ return Current == Before ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED;
+ }
+
+ /// See AAPointerInfo::forallInterferingAccesses.
+ bool forallInterferingAccesses(
+ AAPointerInfo::OffsetAndSize OAS,
+ function_ref<bool(const AAPointerInfo::Access &, bool)> CB) const {
+ if (!isValidState())
+ return false;
+
+ for (auto &It : AccessBins) {
+ AAPointerInfo::OffsetAndSize ItOAS = It.getFirst();
+ if (!OAS.mayOverlap(ItOAS))
+ continue;
+ bool IsExact = OAS == ItOAS && !OAS.offsetOrSizeAreUnknown();
+ for (auto &Access : *It.getSecond())
+ if (!CB(Access, IsExact))
+ return false;
+ }
+ return true;
}
/// See AAPointerInfo::forallInterferingAccesses.
@@ -1028,10 +1089,11 @@ protected:
function_ref<bool(const AAPointerInfo::Access &, bool)> CB) const {
if (!isValidState())
return false;
+
// First find the offset and size of I.
- OffsetAndSize OAS(-1, -1);
+ AAPointerInfo::OffsetAndSize OAS(-1, -1);
for (auto &It : AccessBins) {
- for (auto &Access : It.getSecond()) {
+ for (auto &Access : *It.getSecond()) {
if (Access.getRemoteInst() == &I) {
OAS = It.getFirst();
break;
@@ -1040,21 +1102,13 @@ protected:
if (OAS.getSize() != -1)
break;
}
+ // No access for I was found, we are done.
if (OAS.getSize() == -1)
return true;
// Now that we have an offset and size, find all overlapping ones and use
// the callback on the accesses.
- for (auto &It : AccessBins) {
- OffsetAndSize ItOAS = It.getFirst();
- if (!OAS.mayOverlap(ItOAS))
- continue;
- bool IsExact = OAS == ItOAS && !OAS.offsetOrSizeAreUnknown();
- for (auto &Access : It.getSecond())
- if (!CB(Access, IsExact))
- return false;
- }
- return true;
+ return forallInterferingAccesses(OAS, CB);
}
private:
@@ -1062,6 +1116,7 @@ private:
BooleanState BS;
};
+namespace {
struct AAPointerInfoImpl
: public StateWrapper<AA::PointerInfo::State, AAPointerInfo> {
using BaseTy = StateWrapper<AA::PointerInfo::State, AAPointerInfo>;
@@ -1084,22 +1139,18 @@ struct AAPointerInfoImpl
}
bool forallInterferingAccesses(
- LoadInst &LI, function_ref<bool(const AAPointerInfo::Access &, bool)> CB)
+ OffsetAndSize OAS,
+ function_ref<bool(const AAPointerInfo::Access &, bool)> CB)
const override {
- return State::forallInterferingAccesses(LI, CB);
+ return State::forallInterferingAccesses(OAS, CB);
}
bool forallInterferingAccesses(
- StoreInst &SI, function_ref<bool(const AAPointerInfo::Access &, bool)> CB)
- const override {
- return State::forallInterferingAccesses(SI, CB);
- }
- bool forallInterferingWrites(
- Attributor &A, const AbstractAttribute &QueryingAA, LoadInst &LI,
+ Attributor &A, const AbstractAttribute &QueryingAA, Instruction &I,
function_ref<bool(const Access &, bool)> UserCB) const override {
SmallPtrSet<const Access *, 8> DominatingWrites;
- SmallVector<std::pair<const Access *, bool>, 8> InterferingWrites;
+ SmallVector<std::pair<const Access *, bool>, 8> InterferingAccesses;
- Function &Scope = *LI.getFunction();
+ Function &Scope = *I.getFunction();
const auto &NoSyncAA = A.getAAFor<AANoSync>(
QueryingAA, IRPosition::function(Scope), DepClassTy::OPTIONAL);
const auto *ExecDomainAA = A.lookupAAFor<AAExecutionDomain>(
@@ -1127,13 +1178,15 @@ struct AAPointerInfoImpl
// TODO: Use inter-procedural reachability and dominance.
const auto &NoRecurseAA = A.getAAFor<AANoRecurse>(
- QueryingAA, IRPosition::function(*LI.getFunction()),
- DepClassTy::OPTIONAL);
+ QueryingAA, IRPosition::function(Scope), DepClassTy::OPTIONAL);
- const bool CanUseCFGResoning = CanIgnoreThreading(LI);
+ const bool FindInterferingWrites = I.mayReadFromMemory();
+ const bool FindInterferingReads = I.mayWriteToMemory();
+ const bool UseDominanceReasoning = FindInterferingWrites;
+ const bool CanUseCFGResoning = CanIgnoreThreading(I);
InformationCache &InfoCache = A.getInfoCache();
const DominatorTree *DT =
- NoRecurseAA.isKnownNoRecurse()
+ NoRecurseAA.isKnownNoRecurse() && UseDominanceReasoning
? InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(
Scope)
: nullptr;
@@ -1189,33 +1242,37 @@ struct AAPointerInfoImpl
}
auto AccessCB = [&](const Access &Acc, bool Exact) {
- if (!Acc.isWrite())
+ if ((!FindInterferingWrites || !Acc.isWrite()) &&
+ (!FindInterferingReads || !Acc.isRead()))
return true;
// For now we only filter accesses based on CFG reasoning which does not
// work yet if we have threading effects, or the access is complicated.
if (CanUseCFGResoning) {
- if (!AA::isPotentiallyReachable(A, *Acc.getLocalInst(), LI, QueryingAA,
- IsLiveInCalleeCB))
+ if ((!Acc.isWrite() ||
+ !AA::isPotentiallyReachable(A, *Acc.getLocalInst(), I, QueryingAA,
+ IsLiveInCalleeCB)) &&
+ (!Acc.isRead() ||
+ !AA::isPotentiallyReachable(A, I, *Acc.getLocalInst(), QueryingAA,
+ IsLiveInCalleeCB)))
return true;
- if (DT && Exact &&
- (Acc.getLocalInst()->getFunction() == LI.getFunction()) &&
+ if (DT && Exact && (Acc.getLocalInst()->getFunction() == &Scope) &&
IsSameThreadAsLoad(Acc)) {
- if (DT->dominates(Acc.getLocalInst(), &LI))
+ if (DT->dominates(Acc.getLocalInst(), &I))
DominatingWrites.insert(&Acc);
}
}
- InterferingWrites.push_back({&Acc, Exact});
+ InterferingAccesses.push_back({&Acc, Exact});
return true;
};
- if (!State::forallInterferingAccesses(LI, AccessCB))
+ if (!State::forallInterferingAccesses(I, AccessCB))
return false;
// If we cannot use CFG reasoning we only filter the non-write accesses
// and are done here.
if (!CanUseCFGResoning) {
- for (auto &It : InterferingWrites)
+ for (auto &It : InterferingAccesses)
if (!UserCB(*It.first, It.second))
return false;
return true;
@@ -1242,47 +1299,52 @@ struct AAPointerInfoImpl
return false;
};
- // Run the user callback on all writes we cannot skip and return if that
+ // Run the user callback on all accesses we cannot skip and return if that
// succeeded for all or not.
- unsigned NumInterferingWrites = InterferingWrites.size();
- for (auto &It : InterferingWrites)
- if (!DT || NumInterferingWrites > MaxInterferingWrites ||
- !CanSkipAccess(*It.first, It.second))
+ unsigned NumInterferingAccesses = InterferingAccesses.size();
+ for (auto &It : InterferingAccesses) {
+ if (!DT || NumInterferingAccesses > MaxInterferingAccesses ||
+ !CanSkipAccess(*It.first, It.second)) {
if (!UserCB(*It.first, It.second))
return false;
+ }
+ }
return true;
}
- ChangeStatus translateAndAddCalleeState(Attributor &A,
- const AAPointerInfo &CalleeAA,
- int64_t CallArgOffset, CallBase &CB) {
+ ChangeStatus translateAndAddState(Attributor &A, const AAPointerInfo &OtherAA,
+ int64_t Offset, CallBase &CB,
+ bool FromCallee = false) {
using namespace AA::PointerInfo;
- if (!CalleeAA.getState().isValidState() || !isValidState())
+ if (!OtherAA.getState().isValidState() || !isValidState())
return indicatePessimisticFixpoint();
- const auto &CalleeImplAA = static_cast<const AAPointerInfoImpl &>(CalleeAA);
- bool IsByval = CalleeImplAA.getAssociatedArgument()->hasByValAttr();
+ const auto &OtherAAImpl = static_cast<const AAPointerInfoImpl &>(OtherAA);
+ bool IsByval =
+ FromCallee && OtherAAImpl.getAssociatedArgument()->hasByValAttr();
// Combine the accesses bin by bin.
ChangeStatus Changed = ChangeStatus::UNCHANGED;
- for (auto &It : CalleeImplAA.getState()) {
+ for (auto &It : OtherAAImpl.getState()) {
OffsetAndSize OAS = OffsetAndSize::getUnknown();
- if (CallArgOffset != OffsetAndSize::Unknown)
- OAS = OffsetAndSize(It.first.getOffset() + CallArgOffset,
- It.first.getSize());
- Accesses &Bin = AccessBins[OAS];
- for (const AAPointerInfo::Access &RAcc : It.second) {
+ if (Offset != OffsetAndSize::Unknown)
+ OAS = OffsetAndSize(It.first.getOffset() + Offset, It.first.getSize());
+ Accesses *Bin = AccessBins.lookup(OAS);
+ for (const AAPointerInfo::Access &RAcc : *It.second) {
if (IsByval && !RAcc.isRead())
continue;
bool UsedAssumedInformation = false;
- Optional<Value *> Content = A.translateArgumentToCallSiteContent(
- RAcc.getContent(), CB, *this, UsedAssumedInformation);
- AccessKind AK =
- AccessKind(RAcc.getKind() & (IsByval ? AccessKind::AK_READ
- : AccessKind::AK_READ_WRITE));
+ AccessKind AK = RAcc.getKind();
+ Optional<Value *> Content = RAcc.getContent();
+ if (FromCallee) {
+ Content = A.translateArgumentToCallSiteContent(
+ RAcc.getContent(), CB, *this, UsedAssumedInformation);
+ AK = AccessKind(
+ AK & (IsByval ? AccessKind::AK_READ : AccessKind::AK_READ_WRITE));
+ }
Changed =
- Changed | addAccess(OAS.getOffset(), OAS.getSize(), CB, Content, AK,
- RAcc.getType(), RAcc.getRemoteInst(), &Bin);
+ Changed | addAccess(A, OAS.getOffset(), OAS.getSize(), CB, Content,
+ AK, RAcc.getType(), RAcc.getRemoteInst(), Bin);
}
}
return Changed;
@@ -1305,7 +1367,7 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
bool handleAccess(Attributor &A, Instruction &I, Value &Ptr,
Optional<Value *> Content, AccessKind Kind, int64_t Offset,
ChangeStatus &Changed, Type *Ty,
- int64_t Size = AA::PointerInfo::OffsetAndSize::Unknown) {
+ int64_t Size = OffsetAndSize::Unknown) {
using namespace AA::PointerInfo;
// No need to find a size if one is given or the offset is unknown.
if (Offset != OffsetAndSize::Unknown && Size == OffsetAndSize::Unknown &&
@@ -1315,13 +1377,13 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
if (!AccessSize.isScalable())
Size = AccessSize.getFixedSize();
}
- Changed = Changed | addAccess(Offset, Size, I, Content, Kind, Ty);
+ Changed = Changed | addAccess(A, Offset, Size, I, Content, Kind, Ty);
return true;
};
/// Helper struct, will support ranges eventually.
struct OffsetInfo {
- int64_t Offset = AA::PointerInfo::OffsetAndSize::Unknown;
+ int64_t Offset = OffsetAndSize::Unknown;
bool operator==(const OffsetInfo &OI) const { return Offset == OI.Offset; }
};
@@ -1329,7 +1391,6 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
using namespace AA::PointerInfo;
- State S = getState();
ChangeStatus Changed = ChangeStatus::UNCHANGED;
Value &AssociatedValue = getAssociatedValue();
@@ -1337,7 +1398,7 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
DenseMap<Value *, OffsetInfo> OffsetInfoMap;
OffsetInfoMap[&AssociatedValue] = OffsetInfo{0};
- auto HandlePassthroughUser = [&](Value *Usr, OffsetInfo &PtrOI,
+ auto HandlePassthroughUser = [&](Value *Usr, OffsetInfo PtrOI,
bool &Follow) {
OffsetInfo &UsrOI = OffsetInfoMap[Usr];
UsrOI = PtrOI;
@@ -1475,8 +1536,8 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
const auto &CSArgPI = A.getAAFor<AAPointerInfo>(
*this, IRPosition::callsite_argument(*CB, ArgNo),
DepClassTy::REQUIRED);
- Changed = translateAndAddCalleeState(
- A, CSArgPI, OffsetInfoMap[CurPtr].Offset, *CB) |
+ Changed = translateAndAddState(A, CSArgPI,
+ OffsetInfoMap[CurPtr].Offset, *CB) |
Changed;
return true;
}
@@ -1497,7 +1558,7 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
};
if (!A.checkForAllUses(UsePred, *this, AssociatedValue,
/* CheckBBLivenessOnly */ true, DepClassTy::OPTIONAL,
- EquivalentUseCB))
+ /* IgnoreDroppableUses */ true, EquivalentUseCB))
return indicatePessimisticFixpoint();
LLVM_DEBUG({
@@ -1505,15 +1566,19 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
for (auto &It : AccessBins) {
dbgs() << "[" << It.first.getOffset() << "-"
<< It.first.getOffset() + It.first.getSize()
- << "] : " << It.getSecond().size() << "\n";
- for (auto &Acc : It.getSecond()) {
+ << "] : " << It.getSecond()->size() << "\n";
+ for (auto &Acc : *It.getSecond()) {
dbgs() << " - " << Acc.getKind() << " - " << *Acc.getLocalInst()
<< "\n";
if (Acc.getLocalInst() != Acc.getRemoteInst())
dbgs() << " --> "
<< *Acc.getRemoteInst() << "\n";
- if (!Acc.isWrittenValueYetUndetermined())
- dbgs() << " - " << Acc.getWrittenValue() << "\n";
+ if (!Acc.isWrittenValueYetUndetermined()) {
+ if (Acc.getWrittenValue())
+ dbgs() << " - c: " << *Acc.getWrittenValue() << "\n";
+ else
+ dbgs() << " - c: <unknown>\n";
+ }
}
}
});
@@ -1576,7 +1641,7 @@ struct AAPointerInfoCallSiteArgument final : AAPointerInfoFloating {
LengthVal = Length->getSExtValue();
Value &Ptr = getAssociatedValue();
unsigned ArgNo = getIRPosition().getCallSiteArgNo();
- ChangeStatus Changed;
+ ChangeStatus Changed = ChangeStatus::UNCHANGED;
if (ArgNo == 0) {
handleAccess(A, *MI, Ptr, nullptr, AccessKind::AK_WRITE, 0, Changed,
nullptr, LengthVal);
@@ -1601,7 +1666,8 @@ struct AAPointerInfoCallSiteArgument final : AAPointerInfoFloating {
const IRPosition &ArgPos = IRPosition::argument(*Arg);
auto &ArgAA =
A.getAAFor<AAPointerInfo>(*this, ArgPos, DepClassTy::REQUIRED);
- return translateAndAddCalleeState(A, ArgAA, 0, *cast<CallBase>(getCtxI()));
+ return translateAndAddState(A, ArgAA, 0, *cast<CallBase>(getCtxI()),
+ /* FromCallee */ true);
}
/// See AbstractAttribute::trackStatistics()
@@ -1619,9 +1685,11 @@ struct AAPointerInfoCallSiteReturned final : AAPointerInfoFloating {
AAPointerInfoImpl::trackPointerInfoStatistics(getIRPosition());
}
};
+} // namespace
/// -----------------------NoUnwind Function Attribute--------------------------
+namespace {
struct AANoUnwindImpl : AANoUnwind {
AANoUnwindImpl(const IRPosition &IRP, Attributor &A) : AANoUnwind(IRP, A) {}
@@ -1693,9 +1761,11 @@ struct AANoUnwindCallSite final : AANoUnwindImpl {
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nounwind); }
};
+} // namespace
/// --------------------- Function Return Values -------------------------------
+namespace {
/// "Attribute" that collects all potential returned values and the return
/// instructions that they arise from.
///
@@ -1821,7 +1891,7 @@ ChangeStatus AAReturnedValuesImpl::manifest(Attributor &A) {
// Check if we have an assumed unique return value that we could manifest.
Optional<Value *> UniqueRV = getAssumedUniqueReturnValue(A);
- if (!UniqueRV.hasValue() || !UniqueRV.getValue())
+ if (!UniqueRV || !UniqueRV.getValue())
return Changed;
// Bookkeeping.
@@ -1893,17 +1963,18 @@ ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) {
return true;
};
+ bool UsedAssumedInformation = false;
auto ReturnInstCB = [&](Instruction &I) {
ReturnInst &Ret = cast<ReturnInst>(I);
return genericValueTraversal<ReturnInst>(
A, IRPosition::value(*Ret.getReturnValue()), *this, Ret, ReturnValueCB,
- &I, /* UseValueSimplify */ true, /* MaxValues */ 16,
- /* StripCB */ nullptr, /* Intraprocedural */ true);
+ &I, UsedAssumedInformation, /* UseValueSimplify */ true,
+ /* MaxValues */ 16,
+ /* StripCB */ nullptr, AA::Intraprocedural);
};
// Discover returned values from all live returned instructions in the
// associated function.
- bool UsedAssumedInformation = false;
if (!A.checkForAllInstructions(ReturnInstCB, *this, {Instruction::Ret},
UsedAssumedInformation))
return indicatePessimisticFixpoint();
@@ -1941,20 +2012,10 @@ struct AAReturnedValuesCallSite final : AAReturnedValuesImpl {
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {}
};
+} // namespace
/// ------------------------ NoSync Function Attribute -------------------------
-struct AANoSyncImpl : AANoSync {
- AANoSyncImpl(const IRPosition &IRP, Attributor &A) : AANoSync(IRP, A) {}
-
- const std::string getAsStr() const override {
- return getAssumed() ? "nosync" : "may-sync";
- }
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override;
-};
-
bool AANoSync::isNonRelaxedAtomic(const Instruction *I) {
if (!I->isAtomic())
return false;
@@ -1997,6 +2058,18 @@ bool AANoSync::isNoSyncIntrinsic(const Instruction *I) {
return false;
}
+namespace {
+struct AANoSyncImpl : AANoSync {
+ AANoSyncImpl(const IRPosition &IRP, Attributor &A) : AANoSync(IRP, A) {}
+
+ const std::string getAsStr() const override {
+ return getAssumed() ? "nosync" : "may-sync";
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override;
+};
+
ChangeStatus AANoSyncImpl::updateImpl(Attributor &A) {
auto CheckRWInstForNoSync = [&](Instruction &I) {
@@ -2059,9 +2132,11 @@ struct AANoSyncCallSite final : AANoSyncImpl {
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nosync); }
};
+} // namespace
/// ------------------------ No-Free Attributes ----------------------------
+namespace {
struct AANoFreeImpl : public AANoFree {
AANoFreeImpl(const IRPosition &IRP, Attributor &A) : AANoFree(IRP, A) {}
@@ -2243,8 +2318,10 @@ struct AANoFreeCallSiteReturned final : AANoFreeFloating {
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(nofree) }
};
+} // namespace
/// ------------------------ NonNull Argument Attribute ------------------------
+namespace {
static int64_t getKnownNonNullAndDerefBytesForUse(
Attributor &A, const AbstractAttribute &QueryingAA, Value &AssociatedValue,
const Use *U, const Instruction *I, bool &IsNonNull, bool &TrackUse) {
@@ -2332,7 +2409,7 @@ struct AANonNullImpl : AANonNull {
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
- Value &V = getAssociatedValue();
+ Value &V = *getAssociatedValue().stripPointerCasts();
if (!NullIsDefined &&
hasAttr({Attribute::NonNull, Attribute::Dereferenceable},
/* IgnoreSubsumingPositions */ false, &A)) {
@@ -2356,7 +2433,7 @@ struct AANonNullImpl : AANonNull {
}
}
- if (isa<GlobalValue>(&getAssociatedValue())) {
+ if (isa<GlobalValue>(V)) {
indicatePessimisticFixpoint();
return;
}
@@ -2419,8 +2496,10 @@ struct AANonNullFloating : public AANonNullImpl {
};
StateType T;
+ bool UsedAssumedInformation = false;
if (!genericValueTraversal<StateType>(A, getIRPosition(), *this, T,
- VisitValueCB, getCtxI()))
+ VisitValueCB, getCtxI(),
+ UsedAssumedInformation))
return indicatePessimisticFixpoint();
return clampStateAndIndicateChange(getState(), T);
@@ -2472,9 +2551,11 @@ struct AANonNullCallSiteReturned final
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(nonnull) }
};
+} // namespace
/// ------------------------ No-Recurse Attributes ----------------------------
+namespace {
struct AANoRecurseImpl : public AANoRecurse {
AANoRecurseImpl(const IRPosition &IRP, Attributor &A) : AANoRecurse(IRP, A) {}
@@ -2498,14 +2579,15 @@ struct AANoRecurseFunction final : AANoRecurseImpl {
DepClassTy::NONE);
return NoRecurseAA.isKnownNoRecurse();
};
- bool AllCallSitesKnown;
- if (A.checkForAllCallSites(CallSitePred, *this, true, AllCallSitesKnown)) {
+ bool UsedAssumedInformation = false;
+ if (A.checkForAllCallSites(CallSitePred, *this, true,
+ UsedAssumedInformation)) {
// If we know all call sites and all are known no-recurse, we are done.
// If all known call sites, which might not be all that exist, are known
// to be no-recurse, we are not done but we can continue to assume
// no-recurse. If one of the call sites we have not visited will become
// live, another update is triggered.
- if (AllCallSitesKnown)
+ if (!UsedAssumedInformation)
indicateOptimisticFixpoint();
return ChangeStatus::UNCHANGED;
}
@@ -2549,9 +2631,11 @@ struct AANoRecurseCallSite final : AANoRecurseImpl {
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(norecurse); }
};
+} // namespace
/// -------------------- Undefined-Behavior Attributes ------------------------
+namespace {
struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
AAUndefinedBehaviorImpl(const IRPosition &IRP, Attributor &A)
: AAUndefinedBehavior(IRP, A) {}
@@ -2582,7 +2666,7 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
// Either we stopped and the appropriate action was taken,
// or we got back a simplified value to continue.
Optional<Value *> SimplifiedPtrOp = stopOnUndefOrAssumed(A, PtrOp, &I);
- if (!SimplifiedPtrOp.hasValue() || !SimplifiedPtrOp.getValue())
+ if (!SimplifiedPtrOp || !SimplifiedPtrOp.getValue())
return true;
const Value *PtrOpVal = SimplifiedPtrOp.getValue();
@@ -2627,7 +2711,7 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
// or we got back a simplified value to continue.
Optional<Value *> SimplifiedCond =
stopOnUndefOrAssumed(A, BrInst->getCondition(), BrInst);
- if (!SimplifiedCond.hasValue() || !SimplifiedCond.getValue())
+ if (!SimplifiedCond || !*SimplifiedCond)
return true;
AssumedNoUBInsts.insert(&I);
return true;
@@ -2673,10 +2757,9 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
IRPosition::value(*ArgVal), *this, UsedAssumedInformation);
if (UsedAssumedInformation)
continue;
- if (SimplifiedVal.hasValue() && !SimplifiedVal.getValue())
+ if (SimplifiedVal && !SimplifiedVal.getValue())
return true;
- if (!SimplifiedVal.hasValue() ||
- isa<UndefValue>(*SimplifiedVal.getValue())) {
+ if (!SimplifiedVal || isa<UndefValue>(*SimplifiedVal.getValue())) {
KnownUBInsts.insert(&I);
continue;
}
@@ -2691,40 +2774,38 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
return true;
};
- auto InspectReturnInstForUB =
- [&](Value &V, const SmallSetVector<ReturnInst *, 4> RetInsts) {
- // Check if a return instruction always cause UB or not
- // Note: It is guaranteed that the returned position of the anchor
- // scope has noundef attribute when this is called.
- // We also ensure the return position is not "assumed dead"
- // because the returned value was then potentially simplified to
- // `undef` in AAReturnedValues without removing the `noundef`
- // attribute yet.
-
- // When the returned position has noundef attriubte, UB occur in the
- // following cases.
- // (1) Returned value is known to be undef.
- // (2) The value is known to be a null pointer and the returned
- // position has nonnull attribute (because the returned value is
- // poison).
- bool FoundUB = false;
- if (isa<UndefValue>(V)) {
- FoundUB = true;
- } else {
- if (isa<ConstantPointerNull>(V)) {
- auto &NonNullAA = A.getAAFor<AANonNull>(
- *this, IRPosition::returned(*getAnchorScope()),
- DepClassTy::NONE);
- if (NonNullAA.isKnownNonNull())
- FoundUB = true;
- }
- }
+ auto InspectReturnInstForUB = [&](Instruction &I) {
+ auto &RI = cast<ReturnInst>(I);
+ // Either we stopped and the appropriate action was taken,
+ // or we got back a simplified return value to continue.
+ Optional<Value *> SimplifiedRetValue =
+ stopOnUndefOrAssumed(A, RI.getReturnValue(), &I);
+ if (!SimplifiedRetValue || !*SimplifiedRetValue)
+ return true;
- if (FoundUB)
- for (ReturnInst *RI : RetInsts)
- KnownUBInsts.insert(RI);
- return true;
- };
+ // Check if a return instruction always cause UB or not
+ // Note: It is guaranteed that the returned position of the anchor
+ // scope has noundef attribute when this is called.
+ // We also ensure the return position is not "assumed dead"
+ // because the returned value was then potentially simplified to
+ // `undef` in AAReturnedValues without removing the `noundef`
+ // attribute yet.
+
+ // When the returned position has noundef attriubte, UB occurs in the
+ // following cases.
+ // (1) Returned value is known to be undef.
+ // (2) The value is known to be a null pointer and the returned
+ // position has nonnull attribute (because the returned value is
+ // poison).
+ if (isa<ConstantPointerNull>(*SimplifiedRetValue)) {
+ auto &NonNullAA = A.getAAFor<AANonNull>(
+ *this, IRPosition::returned(*getAnchorScope()), DepClassTy::NONE);
+ if (NonNullAA.isKnownNonNull())
+ KnownUBInsts.insert(&I);
+ }
+
+ return true;
+ };
bool UsedAssumedInformation = false;
A.checkForAllInstructions(InspectMemAccessInstForUB, *this,
@@ -2747,8 +2828,9 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
auto &RetPosNoUndefAA =
A.getAAFor<AANoUndef>(*this, ReturnIRP, DepClassTy::NONE);
if (RetPosNoUndefAA.isKnownNoUndef())
- A.checkForAllReturnedValuesAndReturnInsts(InspectReturnInstForUB,
- *this);
+ A.checkForAllInstructions(InspectReturnInstForUB, *this,
+ {Instruction::Ret}, UsedAssumedInformation,
+ /* CheckBBLivenessOnly */ true);
}
}
@@ -2776,7 +2858,7 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
case Instruction::AtomicRMW:
return !AssumedNoUBInsts.count(I);
case Instruction::Br: {
- auto BrInst = cast<BranchInst>(I);
+ auto *BrInst = cast<BranchInst>(I);
if (BrInst->isUnconditional())
return false;
return !AssumedNoUBInsts.count(I);
@@ -2847,13 +2929,13 @@ private:
IRPosition::value(*V), *this, UsedAssumedInformation);
if (!UsedAssumedInformation) {
// Don't depend on assumed values.
- if (!SimplifiedV.hasValue()) {
+ if (!SimplifiedV) {
// If it is known (which we tested above) but it doesn't have a value,
// then we can assume `undef` and hence the instruction is UB.
KnownUBInsts.insert(I);
return llvm::None;
}
- if (!SimplifiedV.getValue())
+ if (!*SimplifiedV)
return nullptr;
V = *SimplifiedV;
}
@@ -2877,9 +2959,11 @@ struct AAUndefinedBehaviorFunction final : AAUndefinedBehaviorImpl {
KnownUBInsts.size();
}
};
+} // namespace
/// ------------------------ Will-Return Attributes ----------------------------
+namespace {
// Helper function that checks whether a function has any cycle which we don't
// know if it is bounded or not.
// Loops with maximum trip count are considered bounded, any other cycle not.
@@ -3018,9 +3102,11 @@ struct AAWillReturnCallSite final : AAWillReturnImpl {
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(willreturn); }
};
+} // namespace
/// -------------------AAReachability Attribute--------------------------
+namespace {
struct AAReachabilityImpl : AAReachability {
AAReachabilityImpl(const IRPosition &IRP, Attributor &A)
: AAReachability(IRP, A) {}
@@ -3032,10 +3118,6 @@ struct AAReachabilityImpl : AAReachability {
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
- const auto &NoRecurseAA = A.getAAFor<AANoRecurse>(
- *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
- if (!NoRecurseAA.isAssumedNoRecurse())
- return indicatePessimisticFixpoint();
return ChangeStatus::UNCHANGED;
}
};
@@ -3047,9 +3129,11 @@ struct AAReachabilityFunction final : public AAReachabilityImpl {
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(reachable); }
};
+} // namespace
/// ------------------------ NoAlias Argument Attribute ------------------------
+namespace {
struct AANoAliasImpl : AANoAlias {
AANoAliasImpl(const IRPosition &IRP, Attributor &A) : AANoAlias(IRP, A) {
assert(getAssociatedType()->isPointerTy() &&
@@ -3146,10 +3230,10 @@ struct AANoAliasArgument final
// If the argument is never passed through callbacks, no-alias cannot break
// synchronization.
- bool AllCallSitesKnown;
+ bool UsedAssumedInformation = false;
if (A.checkForAllCallSites(
[](AbstractCallSite ACS) { return !ACS.isCallbackCall(); }, *this,
- true, AllCallSitesKnown))
+ true, UsedAssumedInformation))
return Base::updateImpl(A);
// TODO: add no-alias but make sure it doesn't break synchronization by
@@ -3246,14 +3330,20 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl {
return false;
}
+ auto IsDereferenceableOrNull = [&](Value *O, const DataLayout &DL) {
+ const auto &DerefAA = A.getAAFor<AADereferenceable>(
+ *this, IRPosition::value(*O), DepClassTy::OPTIONAL);
+ return DerefAA.getAssumedDereferenceableBytes();
+ };
+
A.recordDependence(NoAliasAA, *this, DepClassTy::OPTIONAL);
const IRPosition &VIRP = IRPosition::value(getAssociatedValue());
const Function *ScopeFn = VIRP.getAnchorScope();
auto &NoCaptureAA = A.getAAFor<AANoCapture>(*this, VIRP, DepClassTy::NONE);
// Check whether the value is captured in the scope using AANoCapture.
- // Look at CFG and check only uses possibly executed before this
- // callsite.
+ // Look at CFG and check only uses possibly executed before this
+ // callsite.
auto UsePred = [&](const Use &U, bool &Follow) -> bool {
Instruction *UserI = cast<Instruction>(U.getUser());
@@ -3265,12 +3355,6 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl {
return true;
if (ScopeFn) {
- const auto &ReachabilityAA = A.getAAFor<AAReachability>(
- *this, IRPosition::function(*ScopeFn), DepClassTy::OPTIONAL);
-
- if (!ReachabilityAA.isAssumedReachable(A, *UserI, *getCtxI()))
- return true;
-
if (auto *CB = dyn_cast<CallBase>(UserI)) {
if (CB->isArgOperand(&U)) {
@@ -3284,17 +3368,26 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl {
return true;
}
}
+
+ if (!AA::isPotentiallyReachable(A, *UserI, *getCtxI(), *this))
+ return true;
}
- // For cases which can potentially have more users
- if (isa<GetElementPtrInst>(U) || isa<BitCastInst>(U) || isa<PHINode>(U) ||
- isa<SelectInst>(U)) {
+ // TODO: We should track the capturing uses in AANoCapture but the problem
+ // is CGSCC runs. For those we would need to "allow" AANoCapture for
+ // a value in the module slice.
+ switch (DetermineUseCaptureKind(U, IsDereferenceableOrNull)) {
+ case UseCaptureKind::NO_CAPTURE:
+ return true;
+ case UseCaptureKind::MAY_CAPTURE:
+ LLVM_DEBUG(dbgs() << "[AANoAliasCSArg] Unknown user: " << *UserI
+ << "\n");
+ return false;
+ case UseCaptureKind::PASSTHROUGH:
Follow = true;
return true;
}
-
- LLVM_DEBUG(dbgs() << "[AANoAliasCSArg] Unknown user: " << *U << "\n");
- return false;
+ llvm_unreachable("unknown UseCaptureKind");
};
if (!NoCaptureAA.isAssumedNoCaptureMaybeReturned()) {
@@ -3423,12 +3516,21 @@ struct AANoAliasCallSiteReturned final : AANoAliasImpl {
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(noalias); }
};
+} // namespace
/// -------------------AAIsDead Function Attribute-----------------------
+namespace {
struct AAIsDeadValueImpl : public AAIsDead {
AAIsDeadValueImpl(const IRPosition &IRP, Attributor &A) : AAIsDead(IRP, A) {}
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ if (auto *Scope = getAnchorScope())
+ if (!A.isRunOn(*Scope))
+ indicatePessimisticFixpoint();
+ }
+
/// See AAIsDead::isAssumedDead().
bool isAssumedDead() const override { return isAssumed(IS_DEAD); }
@@ -3452,22 +3554,25 @@ struct AAIsDeadValueImpl : public AAIsDead {
}
/// See AbstractAttribute::getAsStr().
- const std::string getAsStr() const override {
+ virtual const std::string getAsStr() const override {
return isAssumedDead() ? "assumed-dead" : "assumed-live";
}
/// Check if all uses are assumed dead.
bool areAllUsesAssumedDead(Attributor &A, Value &V) {
// Callers might not check the type, void has no uses.
- if (V.getType()->isVoidTy())
+ if (V.getType()->isVoidTy() || V.use_empty())
return true;
// If we replace a value with a constant there are no uses left afterwards.
if (!isa<Constant>(V)) {
+ if (auto *I = dyn_cast<Instruction>(&V))
+ if (!A.isRunOn(*I->getFunction()))
+ return false;
bool UsedAssumedInformation = false;
Optional<Constant *> C =
A.getAssumedConstant(V, *this, UsedAssumedInformation);
- if (!C.hasValue() || *C)
+ if (!C || *C)
return true;
}
@@ -3477,7 +3582,8 @@ struct AAIsDeadValueImpl : public AAIsDead {
// without going through N update cycles. This is not required for
// correctness.
return A.checkForAllUses(UsePred, *this, V, /* CheckBBLivenessOnly */ false,
- DepClassTy::REQUIRED);
+ DepClassTy::REQUIRED,
+ /* IgnoreDroppableUses */ false);
}
/// Determine if \p I is assumed to be side-effect free.
@@ -3508,6 +3614,8 @@ struct AAIsDeadFloating : public AAIsDeadValueImpl {
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
+ AAIsDeadValueImpl::initialize(A);
+
if (isa<UndefValue>(getAssociatedValue())) {
indicatePessimisticFixpoint();
return;
@@ -3538,6 +3646,15 @@ struct AAIsDeadFloating : public AAIsDeadValueImpl {
});
}
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr() const override {
+ Instruction *I = dyn_cast<Instruction>(&getAssociatedValue());
+ if (isa_and_nonnull<StoreInst>(I))
+ if (isValidState())
+ return "assumed-dead-store";
+ return AAIsDeadValueImpl::getAsStr();
+ }
+
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
Instruction *I = dyn_cast<Instruction>(&getAssociatedValue());
@@ -3553,6 +3670,10 @@ struct AAIsDeadFloating : public AAIsDeadValueImpl {
return ChangeStatus::UNCHANGED;
}
+ bool isRemovableStore() const override {
+ return isAssumed(IS_REMOVABLE) && isa<StoreInst>(&getAssociatedValue());
+ }
+
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
Value &V = getAssociatedValue();
@@ -3567,21 +3688,7 @@ struct AAIsDeadFloating : public AAIsDeadValueImpl {
return ChangeStatus::CHANGED;
}
}
- if (V.use_empty())
- return ChangeStatus::UNCHANGED;
-
- bool UsedAssumedInformation = false;
- Optional<Constant *> C =
- A.getAssumedConstant(V, *this, UsedAssumedInformation);
- if (C.hasValue() && C.getValue())
- return ChangeStatus::UNCHANGED;
-
- // Replace the value with undef as it is dead but keep droppable uses around
- // as they provide information we don't want to give up on just yet.
- UndefValue &UV = *UndefValue::get(V.getType());
- bool AnyChange =
- A.changeValueAfterManifest(V, UV, /* ChangeDropppable */ false);
- return AnyChange ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
+ return ChangeStatus::UNCHANGED;
}
/// See AbstractAttribute::trackStatistics()
@@ -3596,23 +3703,22 @@ struct AAIsDeadArgument : public AAIsDeadFloating {
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
+ AAIsDeadFloating::initialize(A);
if (!A.isFunctionIPOAmendable(*getAnchorScope()))
indicatePessimisticFixpoint();
}
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
- ChangeStatus Changed = AAIsDeadFloating::manifest(A);
Argument &Arg = *getAssociatedArgument();
if (A.isValidFunctionSignatureRewrite(Arg, /* ReplacementTypes */ {}))
if (A.registerFunctionSignatureRewrite(
Arg, /* ReplacementTypes */ {},
Attributor::ArgumentReplacementInfo::CalleeRepairCBTy{},
Attributor::ArgumentReplacementInfo::ACSRepairCBTy{})) {
- Arg.dropDroppableUses();
return ChangeStatus::CHANGED;
}
- return Changed;
+ return ChangeStatus::UNCHANGED;
}
/// See AbstractAttribute::trackStatistics()
@@ -3625,6 +3731,7 @@ struct AAIsDeadCallSiteArgument : public AAIsDeadValueImpl {
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
+ AAIsDeadValueImpl::initialize(A);
if (isa<UndefValue>(getAssociatedValue()))
indicatePessimisticFixpoint();
}
@@ -3661,7 +3768,7 @@ struct AAIsDeadCallSiteArgument : public AAIsDeadValueImpl {
struct AAIsDeadCallSiteReturned : public AAIsDeadFloating {
AAIsDeadCallSiteReturned(const IRPosition &IRP, Attributor &A)
- : AAIsDeadFloating(IRP, A), IsAssumedSideEffectFree(true) {}
+ : AAIsDeadFloating(IRP, A) {}
/// See AAIsDead::isAssumedDead().
bool isAssumedDead() const override {
@@ -3670,6 +3777,7 @@ struct AAIsDeadCallSiteReturned : public AAIsDeadFloating {
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
+ AAIsDeadFloating::initialize(A);
if (isa<UndefValue>(getAssociatedValue())) {
indicatePessimisticFixpoint();
return;
@@ -3707,7 +3815,7 @@ struct AAIsDeadCallSiteReturned : public AAIsDeadFloating {
}
private:
- bool IsAssumedSideEffectFree;
+ bool IsAssumedSideEffectFree = true;
};
struct AAIsDeadReturned : public AAIsDeadValueImpl {
@@ -3727,9 +3835,8 @@ struct AAIsDeadReturned : public AAIsDeadValueImpl {
return areAllUsesAssumedDead(A, *ACS.getInstruction());
};
- bool AllCallSitesKnown;
if (!A.checkForAllCallSites(PredForCallSite, *this, true,
- AllCallSitesKnown))
+ UsedAssumedInformation))
return indicatePessimisticFixpoint();
return ChangeStatus::UNCHANGED;
@@ -3761,17 +3868,13 @@ struct AAIsDeadFunction : public AAIsDead {
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
- const Function *F = getAnchorScope();
- if (F && !F->isDeclaration()) {
- // We only want to compute liveness once. If the function is not part of
- // the SCC, skip it.
- if (A.isRunOn(*const_cast<Function *>(F))) {
- ToBeExploredFrom.insert(&F->getEntryBlock().front());
- assumeLive(A, F->getEntryBlock());
- } else {
- indicatePessimisticFixpoint();
- }
+ Function *F = getAnchorScope();
+ if (!F || F->isDeclaration() || !A.isRunOn(*F)) {
+ indicatePessimisticFixpoint();
+ return;
}
+ ToBeExploredFrom.insert(&F->getEntryBlock().front());
+ assumeLive(A, F->getEntryBlock());
}
/// See AbstractAttribute::getAsStr().
@@ -3834,6 +3937,9 @@ struct AAIsDeadFunction : public AAIsDead {
ChangeStatus updateImpl(Attributor &A) override;
bool isEdgeDead(const BasicBlock *From, const BasicBlock *To) const override {
+ assert(From->getParent() == getAnchorScope() &&
+ To->getParent() == getAnchorScope() &&
+ "Used AAIsDead of the wrong function");
return isValidState() && !AssumedLiveEdges.count(std::make_pair(From, To));
}
@@ -3973,7 +4079,7 @@ identifyAliveSuccessors(Attributor &A, const BranchInst &BI,
} else {
Optional<Constant *> C =
A.getAssumedConstant(*BI.getCondition(), AA, UsedAssumedInformation);
- if (!C.hasValue() || isa_and_nonnull<UndefValue>(C.getValue())) {
+ if (!C || isa_and_nonnull<UndefValue>(*C)) {
// No value yet, assume both edges are dead.
} else if (isa_and_nonnull<ConstantInt>(*C)) {
const BasicBlock *SuccBB =
@@ -3995,7 +4101,7 @@ identifyAliveSuccessors(Attributor &A, const SwitchInst &SI,
bool UsedAssumedInformation = false;
Optional<Constant *> C =
A.getAssumedConstant(*SI.getCondition(), AA, UsedAssumedInformation);
- if (!C.hasValue() || isa_and_nonnull<UndefValue>(C.getValue())) {
+ if (!C || isa_and_nonnull<UndefValue>(C.getValue())) {
// No value yet, assume all edges are dead.
} else if (isa_and_nonnull<ConstantInt>(C.getValue())) {
for (auto &CaseIt : SI.cases()) {
@@ -4142,9 +4248,11 @@ struct AAIsDeadCallSite final : AAIsDeadFunction {
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {}
};
+} // namespace
/// -------------------- Dereferenceable Argument Attribute --------------------
+namespace {
struct AADereferenceableImpl : AADereferenceable {
AADereferenceableImpl(const IRPosition &IRP, Attributor &A)
: AADereferenceable(IRP, A) {}
@@ -4152,6 +4260,7 @@ struct AADereferenceableImpl : AADereferenceable {
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
+ Value &V = *getAssociatedValue().stripPointerCasts();
SmallVector<Attribute, 4> Attrs;
getAttrs({Attribute::Dereferenceable, Attribute::DereferenceableOrNull},
Attrs, /* IgnoreSubsumingPositions */ false, &A);
@@ -4162,9 +4271,8 @@ struct AADereferenceableImpl : AADereferenceable {
NonNullAA = &A.getAAFor<AANonNull>(*this, IRP, DepClassTy::NONE);
bool CanBeNull, CanBeFreed;
- takeKnownDerefBytesMaximum(
- IRP.getAssociatedValue().getPointerDereferenceableBytes(
- A.getDataLayout(), CanBeNull, CanBeFreed));
+ takeKnownDerefBytesMaximum(V.getPointerDereferenceableBytes(
+ A.getDataLayout(), CanBeNull, CanBeFreed));
bool IsFnInterface = IRP.isFnInterfaceKind();
Function *FnScope = IRP.getAnchorScope();
@@ -4263,8 +4371,9 @@ struct AADereferenceableFloating : AADereferenceableImpl {
unsigned IdxWidth =
DL.getIndexSizeInBits(V.getType()->getPointerAddressSpace());
APInt Offset(IdxWidth, 0);
- const Value *Base =
- stripAndAccumulateMinimalOffsets(A, *this, &V, DL, Offset, false);
+ const Value *Base = stripAndAccumulateOffsets(
+ A, *this, &V, DL, Offset, /* GetMinOffset */ false,
+ /* AllowNonInbounds */ true);
const auto &AA = A.getAAFor<AADereferenceable>(
*this, IRPosition::value(*Base), DepClassTy::REQUIRED);
@@ -4312,8 +4421,10 @@ struct AADereferenceableFloating : AADereferenceableImpl {
};
DerefState T;
+ bool UsedAssumedInformation = false;
if (!genericValueTraversal<DerefState>(A, getIRPosition(), *this, T,
- VisitValueCB, getCtxI()))
+ VisitValueCB, getCtxI(),
+ UsedAssumedInformation))
return indicatePessimisticFixpoint();
return clampStateAndIndicateChange(getState(), T);
@@ -4377,9 +4488,11 @@ struct AADereferenceableCallSiteReturned final
STATS_DECLTRACK_CS_ATTR(dereferenceable);
}
};
+} // namespace
// ------------------------ Align Argument Attribute ------------------------
+namespace {
static unsigned getKnownAlignForUse(Attributor &A, AAAlign &QueryingAA,
Value &AssociatedValue, const Use *U,
const Instruction *I, bool &TrackUse) {
@@ -4450,14 +4563,8 @@ struct AAAlignImpl : AAAlign {
for (const Attribute &Attr : Attrs)
takeKnownMaximum(Attr.getValueAsInt());
- Value &V = getAssociatedValue();
- // TODO: This is a HACK to avoid getPointerAlignment to introduce a ptr2int
- // use of the function pointer. This was caused by D73131. We want to
- // avoid this for function pointers especially because we iterate
- // their uses and int2ptr is not handled. It is not a correctness
- // problem though!
- if (!V.getType()->getPointerElementType()->isFunctionTy())
- takeKnownMaximum(V.getPointerAlignment(A.getDataLayout()).value());
+ Value &V = *getAssociatedValue().stripPointerCasts();
+ takeKnownMaximum(V.getPointerAlignment(A.getDataLayout()).value());
if (getIRPosition().isFnInterfaceKind() &&
(!getAnchorScope() ||
@@ -4479,16 +4586,16 @@ struct AAAlignImpl : AAAlign {
for (const Use &U : AssociatedValue.uses()) {
if (auto *SI = dyn_cast<StoreInst>(U.getUser())) {
if (SI->getPointerOperand() == &AssociatedValue)
- if (SI->getAlignment() < getAssumedAlign()) {
+ if (SI->getAlign() < getAssumedAlign()) {
STATS_DECLTRACK(AAAlign, Store,
"Number of times alignment added to a store");
- SI->setAlignment(Align(getAssumedAlign()));
+ SI->setAlignment(getAssumedAlign());
LoadStoreChanged = ChangeStatus::CHANGED;
}
} else if (auto *LI = dyn_cast<LoadInst>(U.getUser())) {
if (LI->getPointerOperand() == &AssociatedValue)
- if (LI->getAlignment() < getAssumedAlign()) {
- LI->setAlignment(Align(getAssumedAlign()));
+ if (LI->getAlign() < getAssumedAlign()) {
+ LI->setAlignment(getAssumedAlign());
STATS_DECLTRACK(AAAlign, Load,
"Number of times alignment added to a load");
LoadStoreChanged = ChangeStatus::CHANGED;
@@ -4532,9 +4639,8 @@ struct AAAlignImpl : AAAlign {
/// See AbstractAttribute::getAsStr().
const std::string getAsStr() const override {
- return getAssumedAlign() ? ("align<" + std::to_string(getKnownAlign()) +
- "-" + std::to_string(getAssumedAlign()) + ">")
- : "unknown-align";
+ return "align<" + std::to_string(getKnownAlign().value()) + "-" +
+ std::to_string(getAssumedAlign().value()) + ">";
}
};
@@ -4548,6 +4654,8 @@ struct AAAlignFloating : AAAlignImpl {
auto VisitValueCB = [&](Value &V, const Instruction *,
AAAlign::StateType &T, bool Stripped) -> bool {
+ if (isa<UndefValue>(V) || isa<ConstantPointerNull>(V))
+ return true;
const auto &AA = A.getAAFor<AAAlign>(*this, IRPosition::value(V),
DepClassTy::REQUIRED);
if (!Stripped && this == &AA) {
@@ -4555,6 +4663,7 @@ struct AAAlignFloating : AAAlignImpl {
unsigned Alignment = 1;
if (const Value *Base =
GetPointerBaseWithConstantOffset(&V, Offset, DL)) {
+ // TODO: Use AAAlign for the base too.
Align PA = Base->getPointerAlignment(DL);
// BasePointerAddr + Offset = Alignment * Q for some integer Q.
// So we can say that the maximum power of two which is a divisor of
@@ -4578,8 +4687,10 @@ struct AAAlignFloating : AAAlignImpl {
};
StateType T;
+ bool UsedAssumedInformation = false;
if (!genericValueTraversal<StateType>(A, getIRPosition(), *this, T,
- VisitValueCB, getCtxI()))
+ VisitValueCB, getCtxI(),
+ UsedAssumedInformation))
return indicatePessimisticFixpoint();
// TODO: If we know we visited all incoming values, thus no are assumed
@@ -4657,7 +4768,7 @@ struct AAAlignCallSiteArgument final : AAAlignFloating {
// so we do not need to track a dependence.
const auto &ArgAlignAA = A.getAAFor<AAAlign>(
*this, IRPosition::argument(*Arg), DepClassTy::NONE);
- takeKnownMaximum(ArgAlignAA.getKnownAlign());
+ takeKnownMaximum(ArgAlignAA.getKnownAlign().value());
}
return Changed;
}
@@ -4684,8 +4795,10 @@ struct AAAlignCallSiteReturned final
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(align); }
};
+} // namespace
/// ------------------ Function No-Return Attribute ----------------------------
+namespace {
struct AANoReturnImpl : public AANoReturn {
AANoReturnImpl(const IRPosition &IRP, Attributor &A) : AANoReturn(IRP, A) {}
@@ -4753,9 +4866,179 @@ struct AANoReturnCallSite final : AANoReturnImpl {
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(noreturn); }
};
+} // namespace
+
+/// ----------------------- Instance Info ---------------------------------
+
+namespace {
+/// A class to hold the state of for no-capture attributes.
+struct AAInstanceInfoImpl : public AAInstanceInfo {
+ AAInstanceInfoImpl(const IRPosition &IRP, Attributor &A)
+ : AAInstanceInfo(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ Value &V = getAssociatedValue();
+ if (auto *C = dyn_cast<Constant>(&V)) {
+ if (C->isThreadDependent())
+ indicatePessimisticFixpoint();
+ else
+ indicateOptimisticFixpoint();
+ return;
+ }
+ if (auto *CB = dyn_cast<CallBase>(&V))
+ if (CB->arg_size() == 0 && !CB->mayHaveSideEffects() &&
+ !CB->mayReadFromMemory()) {
+ indicateOptimisticFixpoint();
+ return;
+ }
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ ChangeStatus Changed = ChangeStatus::UNCHANGED;
+
+ Value &V = getAssociatedValue();
+ const Function *Scope = nullptr;
+ if (auto *I = dyn_cast<Instruction>(&V))
+ Scope = I->getFunction();
+ if (auto *A = dyn_cast<Argument>(&V)) {
+ Scope = A->getParent();
+ if (!Scope->hasLocalLinkage())
+ return Changed;
+ }
+ if (!Scope)
+ return indicateOptimisticFixpoint();
+
+ auto &NoRecurseAA = A.getAAFor<AANoRecurse>(
+ *this, IRPosition::function(*Scope), DepClassTy::OPTIONAL);
+ if (NoRecurseAA.isAssumedNoRecurse())
+ return Changed;
+
+ auto UsePred = [&](const Use &U, bool &Follow) {
+ const Instruction *UserI = dyn_cast<Instruction>(U.getUser());
+ if (!UserI || isa<GetElementPtrInst>(UserI) || isa<CastInst>(UserI) ||
+ isa<PHINode>(UserI) || isa<SelectInst>(UserI)) {
+ Follow = true;
+ return true;
+ }
+ if (isa<LoadInst>(UserI) || isa<CmpInst>(UserI) ||
+ (isa<StoreInst>(UserI) &&
+ cast<StoreInst>(UserI)->getValueOperand() != U.get()))
+ return true;
+ if (auto *CB = dyn_cast<CallBase>(UserI)) {
+ // This check is not guaranteeing uniqueness but for now that we cannot
+ // end up with two versions of \p U thinking it was one.
+ if (!CB->getCalledFunction() ||
+ !CB->getCalledFunction()->hasLocalLinkage())
+ return true;
+ if (!CB->isArgOperand(&U))
+ return false;
+ const auto &ArgInstanceInfoAA = A.getAAFor<AAInstanceInfo>(
+ *this, IRPosition::callsite_argument(*CB, CB->getArgOperandNo(&U)),
+ DepClassTy::OPTIONAL);
+ if (!ArgInstanceInfoAA.isAssumedUniqueForAnalysis())
+ return false;
+ // If this call base might reach the scope again we might forward the
+ // argument back here. This is very conservative.
+ if (AA::isPotentiallyReachable(A, *CB, *Scope, *this, nullptr))
+ return false;
+ return true;
+ }
+ return false;
+ };
+
+ auto EquivalentUseCB = [&](const Use &OldU, const Use &NewU) {
+ if (auto *SI = dyn_cast<StoreInst>(OldU.getUser())) {
+ auto *Ptr = SI->getPointerOperand()->stripPointerCasts();
+ if (isa<AllocaInst>(Ptr) && AA::isDynamicallyUnique(A, *this, *Ptr))
+ return true;
+ auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(
+ *SI->getFunction());
+ if (isAllocationFn(Ptr, TLI) && AA::isDynamicallyUnique(A, *this, *Ptr))
+ return true;
+ }
+ return false;
+ };
+
+ if (!A.checkForAllUses(UsePred, *this, V, /* CheckBBLivenessOnly */ true,
+ DepClassTy::OPTIONAL,
+ /* IgnoreDroppableUses */ true, EquivalentUseCB))
+ return indicatePessimisticFixpoint();
+
+ return Changed;
+ }
+
+ /// See AbstractState::getAsStr().
+ const std::string getAsStr() const override {
+ return isAssumedUniqueForAnalysis() ? "<unique [fAa]>" : "<unknown>";
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {}
+};
+
+/// InstanceInfo attribute for floating values.
+struct AAInstanceInfoFloating : AAInstanceInfoImpl {
+ AAInstanceInfoFloating(const IRPosition &IRP, Attributor &A)
+ : AAInstanceInfoImpl(IRP, A) {}
+};
+
+/// NoCapture attribute for function arguments.
+struct AAInstanceInfoArgument final : AAInstanceInfoFloating {
+ AAInstanceInfoArgument(const IRPosition &IRP, Attributor &A)
+ : AAInstanceInfoFloating(IRP, A) {}
+};
+
+/// InstanceInfo attribute for call site arguments.
+struct AAInstanceInfoCallSiteArgument final : AAInstanceInfoImpl {
+ AAInstanceInfoCallSiteArgument(const IRPosition &IRP, Attributor &A)
+ : AAInstanceInfoImpl(IRP, A) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness information and then it makes
+ // sense to specialize attributes for call sites arguments instead of
+ // redirecting requests to the callee argument.
+ Argument *Arg = getAssociatedArgument();
+ if (!Arg)
+ return indicatePessimisticFixpoint();
+ const IRPosition &ArgPos = IRPosition::argument(*Arg);
+ auto &ArgAA =
+ A.getAAFor<AAInstanceInfo>(*this, ArgPos, DepClassTy::REQUIRED);
+ return clampStateAndIndicateChange(getState(), ArgAA.getState());
+ }
+};
+
+/// InstanceInfo attribute for function return value.
+struct AAInstanceInfoReturned final : AAInstanceInfoImpl {
+ AAInstanceInfoReturned(const IRPosition &IRP, Attributor &A)
+ : AAInstanceInfoImpl(IRP, A) {
+ llvm_unreachable("InstanceInfo is not applicable to function returns!");
+ }
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ llvm_unreachable("InstanceInfo is not applicable to function returns!");
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ llvm_unreachable("InstanceInfo is not applicable to function returns!");
+ }
+};
+
+/// InstanceInfo attribute deduction for a call site return value.
+struct AAInstanceInfoCallSiteReturned final : AAInstanceInfoFloating {
+ AAInstanceInfoCallSiteReturned(const IRPosition &IRP, Attributor &A)
+ : AAInstanceInfoFloating(IRP, A) {}
+};
+} // namespace
/// ----------------------- Variable Capturing ---------------------------------
+namespace {
/// A class to hold the state of for no-capture attributes.
struct AANoCaptureImpl : public AANoCapture {
AANoCaptureImpl(const IRPosition &IRP, Attributor &A) : AANoCapture(IRP, A) {}
@@ -4863,143 +5146,69 @@ struct AANoCaptureImpl : public AANoCapture {
return "assumed not-captured-maybe-returned";
return "assumed-captured";
}
-};
-
-/// Attributor-aware capture tracker.
-struct AACaptureUseTracker final : public CaptureTracker {
-
- /// Create a capture tracker that can lookup in-flight abstract attributes
- /// through the Attributor \p A.
- ///
- /// If a use leads to a potential capture, \p CapturedInMemory is set and the
- /// search is stopped. If a use leads to a return instruction,
- /// \p CommunicatedBack is set to true and \p CapturedInMemory is not changed.
- /// If a use leads to a ptr2int which may capture the value,
- /// \p CapturedInInteger is set. If a use is found that is currently assumed
- /// "no-capture-maybe-returned", the user is added to the \p PotentialCopies
- /// set. All values in \p PotentialCopies are later tracked as well. For every
- /// explored use we decrement \p RemainingUsesToExplore. Once it reaches 0,
- /// the search is stopped with \p CapturedInMemory and \p CapturedInInteger
- /// conservatively set to true.
- AACaptureUseTracker(Attributor &A, AANoCapture &NoCaptureAA,
- const AAIsDead &IsDeadAA, AANoCapture::StateType &State,
- SmallSetVector<Value *, 4> &PotentialCopies,
- unsigned &RemainingUsesToExplore)
- : A(A), NoCaptureAA(NoCaptureAA), IsDeadAA(IsDeadAA), State(State),
- PotentialCopies(PotentialCopies),
- RemainingUsesToExplore(RemainingUsesToExplore) {}
-
- /// Determine if \p V maybe captured. *Also updates the state!*
- bool valueMayBeCaptured(const Value *V) {
- if (V->getType()->isPointerTy()) {
- PointerMayBeCaptured(V, this);
- } else {
- State.indicatePessimisticFixpoint();
- }
- return State.isAssumed(AANoCapture::NO_CAPTURE_MAYBE_RETURNED);
- }
-
- /// See CaptureTracker::tooManyUses().
- void tooManyUses() override {
- State.removeAssumedBits(AANoCapture::NO_CAPTURE);
- }
- bool isDereferenceableOrNull(Value *O, const DataLayout &DL) override {
- if (CaptureTracker::isDereferenceableOrNull(O, DL))
- return true;
- const auto &DerefAA = A.getAAFor<AADereferenceable>(
- NoCaptureAA, IRPosition::value(*O), DepClassTy::OPTIONAL);
- return DerefAA.getAssumedDereferenceableBytes();
- }
-
- /// See CaptureTracker::captured(...).
- bool captured(const Use *U) override {
- Instruction *UInst = cast<Instruction>(U->getUser());
- LLVM_DEBUG(dbgs() << "Check use: " << *U->get() << " in " << *UInst
- << "\n");
-
- // Because we may reuse the tracker multiple times we keep track of the
- // number of explored uses ourselves as well.
- if (RemainingUsesToExplore-- == 0) {
- LLVM_DEBUG(dbgs() << " - too many uses to explore!\n");
- return isCapturedIn(/* Memory */ true, /* Integer */ true,
- /* Return */ true);
- }
+ /// Check the use \p U and update \p State accordingly. Return true if we
+ /// should continue to update the state.
+ bool checkUse(Attributor &A, AANoCapture::StateType &State, const Use &U,
+ bool &Follow) {
+ Instruction *UInst = cast<Instruction>(U.getUser());
+ LLVM_DEBUG(dbgs() << "[AANoCapture] Check use: " << *U.get() << " in "
+ << *UInst << "\n");
// Deal with ptr2int by following uses.
if (isa<PtrToIntInst>(UInst)) {
LLVM_DEBUG(dbgs() << " - ptr2int assume the worst!\n");
- return valueMayBeCaptured(UInst);
+ return isCapturedIn(State, /* Memory */ true, /* Integer */ true,
+ /* Return */ true);
}
- // For stores we check if we can follow the value through memory or not.
- if (auto *SI = dyn_cast<StoreInst>(UInst)) {
- if (SI->isVolatile())
- return isCapturedIn(/* Memory */ true, /* Integer */ false,
- /* Return */ false);
- bool UsedAssumedInformation = false;
- if (!AA::getPotentialCopiesOfStoredValue(
- A, *SI, PotentialCopies, NoCaptureAA, UsedAssumedInformation))
- return isCapturedIn(/* Memory */ true, /* Integer */ false,
- /* Return */ false);
- // Not captured directly, potential copies will be checked.
- return isCapturedIn(/* Memory */ false, /* Integer */ false,
+ // For stores we already checked if we can follow them, if they make it
+ // here we give up.
+ if (isa<StoreInst>(UInst))
+ return isCapturedIn(State, /* Memory */ true, /* Integer */ false,
/* Return */ false);
- }
// Explicitly catch return instructions.
if (isa<ReturnInst>(UInst)) {
- if (UInst->getFunction() == NoCaptureAA.getAnchorScope())
- return isCapturedIn(/* Memory */ false, /* Integer */ false,
+ if (UInst->getFunction() == getAnchorScope())
+ return isCapturedIn(State, /* Memory */ false, /* Integer */ false,
/* Return */ true);
- return isCapturedIn(/* Memory */ true, /* Integer */ true,
+ return isCapturedIn(State, /* Memory */ true, /* Integer */ true,
/* Return */ true);
}
// For now we only use special logic for call sites. However, the tracker
// itself knows about a lot of other non-capturing cases already.
auto *CB = dyn_cast<CallBase>(UInst);
- if (!CB || !CB->isArgOperand(U))
- return isCapturedIn(/* Memory */ true, /* Integer */ true,
+ if (!CB || !CB->isArgOperand(&U))
+ return isCapturedIn(State, /* Memory */ true, /* Integer */ true,
/* Return */ true);
- unsigned ArgNo = CB->getArgOperandNo(U);
+ unsigned ArgNo = CB->getArgOperandNo(&U);
const IRPosition &CSArgPos = IRPosition::callsite_argument(*CB, ArgNo);
// If we have a abstract no-capture attribute for the argument we can use
// it to justify a non-capture attribute here. This allows recursion!
auto &ArgNoCaptureAA =
- A.getAAFor<AANoCapture>(NoCaptureAA, CSArgPos, DepClassTy::REQUIRED);
+ A.getAAFor<AANoCapture>(*this, CSArgPos, DepClassTy::REQUIRED);
if (ArgNoCaptureAA.isAssumedNoCapture())
- return isCapturedIn(/* Memory */ false, /* Integer */ false,
+ return isCapturedIn(State, /* Memory */ false, /* Integer */ false,
/* Return */ false);
if (ArgNoCaptureAA.isAssumedNoCaptureMaybeReturned()) {
- addPotentialCopy(*CB);
- return isCapturedIn(/* Memory */ false, /* Integer */ false,
+ Follow = true;
+ return isCapturedIn(State, /* Memory */ false, /* Integer */ false,
/* Return */ false);
}
// Lastly, we could not find a reason no-capture can be assumed so we don't.
- return isCapturedIn(/* Memory */ true, /* Integer */ true,
+ return isCapturedIn(State, /* Memory */ true, /* Integer */ true,
/* Return */ true);
}
- /// Register \p CS as potential copy of the value we are checking.
- void addPotentialCopy(CallBase &CB) { PotentialCopies.insert(&CB); }
-
- /// See CaptureTracker::shouldExplore(...).
- bool shouldExplore(const Use *U) override {
- // Check liveness and ignore droppable users.
- bool UsedAssumedInformation = false;
- return !U->getUser()->isDroppable() &&
- !A.isAssumedDead(*U, &NoCaptureAA, &IsDeadAA,
- UsedAssumedInformation);
- }
-
- /// Update the state according to \p CapturedInMem, \p CapturedInInt, and
- /// \p CapturedInRet, then return the appropriate value for use in the
- /// CaptureTracker::captured() interface.
- bool isCapturedIn(bool CapturedInMem, bool CapturedInInt,
- bool CapturedInRet) {
+ /// Update \p State according to \p CapturedInMem, \p CapturedInInt, and
+ /// \p CapturedInRet, then return true if we should continue updating the
+ /// state.
+ static bool isCapturedIn(AANoCapture::StateType &State, bool CapturedInMem,
+ bool CapturedInInt, bool CapturedInRet) {
LLVM_DEBUG(dbgs() << " - captures [Mem " << CapturedInMem << "|Int "
<< CapturedInInt << "|Ret " << CapturedInRet << "]\n");
if (CapturedInMem)
@@ -5008,27 +5217,8 @@ struct AACaptureUseTracker final : public CaptureTracker {
State.removeAssumedBits(AANoCapture::NOT_CAPTURED_IN_INT);
if (CapturedInRet)
State.removeAssumedBits(AANoCapture::NOT_CAPTURED_IN_RET);
- return !State.isAssumed(AANoCapture::NO_CAPTURE_MAYBE_RETURNED);
+ return State.isAssumed(AANoCapture::NO_CAPTURE_MAYBE_RETURNED);
}
-
-private:
- /// The attributor providing in-flight abstract attributes.
- Attributor &A;
-
- /// The abstract attribute currently updated.
- AANoCapture &NoCaptureAA;
-
- /// The abstract liveness state.
- const AAIsDead &IsDeadAA;
-
- /// The state currently updated.
- AANoCapture::StateType &State;
-
- /// Set of potential copies of the tracked value.
- SmallSetVector<Value *, 4> &PotentialCopies;
-
- /// Global counter to limit the number of explored uses.
- unsigned &RemainingUsesToExplore;
};
ChangeStatus AANoCaptureImpl::updateImpl(Attributor &A) {
@@ -5042,7 +5232,6 @@ ChangeStatus AANoCaptureImpl::updateImpl(Attributor &A) {
isArgumentPosition() ? IRP.getAssociatedFunction() : IRP.getAnchorScope();
assert(F && "Expected a function!");
const IRPosition &FnPos = IRPosition::function(*F);
- const auto &IsDeadAA = A.getAAFor<AAIsDead>(*this, FnPos, DepClassTy::NONE);
AANoCapture::StateType T;
@@ -5059,6 +5248,8 @@ ChangeStatus AANoCaptureImpl::updateImpl(Attributor &A) {
// AAReturnedValues, e.g., track all values that escape through returns
// directly somehow.
auto CheckReturnedArgs = [&](const AAReturnedValues &RVAA) {
+ if (!RVAA.getState().isValidState())
+ return false;
bool SeenConstant = false;
for (auto &It : RVAA.returned_values()) {
if (isa<Constant>(It.first)) {
@@ -5094,21 +5285,27 @@ ChangeStatus AANoCaptureImpl::updateImpl(Attributor &A) {
}
}
- // Use the CaptureTracker interface and logic with the specialized tracker,
- // defined in AACaptureUseTracker, that can look at in-flight abstract
- // attributes and directly updates the assumed state.
- SmallSetVector<Value *, 4> PotentialCopies;
- unsigned RemainingUsesToExplore =
- getDefaultMaxUsesToExploreForCaptureTracking();
- AACaptureUseTracker Tracker(A, *this, IsDeadAA, T, PotentialCopies,
- RemainingUsesToExplore);
+ auto IsDereferenceableOrNull = [&](Value *O, const DataLayout &DL) {
+ const auto &DerefAA = A.getAAFor<AADereferenceable>(
+ *this, IRPosition::value(*O), DepClassTy::OPTIONAL);
+ return DerefAA.getAssumedDereferenceableBytes();
+ };
+
+ auto UseCheck = [&](const Use &U, bool &Follow) -> bool {
+ switch (DetermineUseCaptureKind(U, IsDereferenceableOrNull)) {
+ case UseCaptureKind::NO_CAPTURE:
+ return true;
+ case UseCaptureKind::MAY_CAPTURE:
+ return checkUse(A, T, U, Follow);
+ case UseCaptureKind::PASSTHROUGH:
+ Follow = true;
+ return true;
+ }
+ llvm_unreachable("Unexpected use capture kind!");
+ };
- // Check all potential copies of the associated value until we can assume
- // none will be captured or we have to assume at least one might be.
- unsigned Idx = 0;
- PotentialCopies.insert(V);
- while (T.isAssumed(NO_CAPTURE_MAYBE_RETURNED) && Idx < PotentialCopies.size())
- Tracker.valueMayBeCaptured(PotentialCopies[Idx++]);
+ if (!A.checkForAllUses(UseCheck, *this, *V))
+ return indicatePessimisticFixpoint();
AANoCapture::StateType &S = getState();
auto Assumed = S.getAssumed();
@@ -5208,6 +5405,7 @@ struct AANoCaptureCallSiteReturned final : AANoCaptureImpl {
STATS_DECLTRACK_CSRET_ATTR(nocapture)
}
};
+} // namespace
/// ------------------ Value Simplify Attribute ----------------------------
@@ -5219,7 +5417,7 @@ bool ValueSimplifyStateType::unionAssumed(Optional<Value *> Other) {
return false;
LLVM_DEBUG({
- if (SimplifiedAssociatedValue.hasValue())
+ if (SimplifiedAssociatedValue)
dbgs() << "[ValueSimplify] is assumed to be "
<< **SimplifiedAssociatedValue << "\n";
else
@@ -5228,6 +5426,7 @@ bool ValueSimplifyStateType::unionAssumed(Optional<Value *> Other) {
return true;
}
+namespace {
struct AAValueSimplifyImpl : AAValueSimplify {
AAValueSimplifyImpl(const IRPosition &IRP, Attributor &A)
: AAValueSimplify(IRP, A) {}
@@ -5243,9 +5442,9 @@ struct AAValueSimplifyImpl : AAValueSimplify {
/// See AbstractAttribute::getAsStr().
const std::string getAsStr() const override {
LLVM_DEBUG({
- errs() << "SAV: " << SimplifiedAssociatedValue << " ";
+ dbgs() << "SAV: " << (bool)SimplifiedAssociatedValue << " ";
if (SimplifiedAssociatedValue && *SimplifiedAssociatedValue)
- errs() << "SAV: " << **SimplifiedAssociatedValue << " ";
+ dbgs() << "SAV: " << **SimplifiedAssociatedValue << " ";
});
return isValidState() ? (isAtFixpoint() ? "simplified" : "maybe-simple")
: "not-simple";
@@ -5259,24 +5458,101 @@ struct AAValueSimplifyImpl : AAValueSimplify {
return SimplifiedAssociatedValue;
}
+ /// Ensure the return value is \p V with type \p Ty, if not possible return
+ /// nullptr. If \p Check is true we will only verify such an operation would
+ /// suceed and return a non-nullptr value if that is the case. No IR is
+ /// generated or modified.
+ static Value *ensureType(Attributor &A, Value &V, Type &Ty, Instruction *CtxI,
+ bool Check) {
+ if (auto *TypedV = AA::getWithType(V, Ty))
+ return TypedV;
+ if (CtxI && V.getType()->canLosslesslyBitCastTo(&Ty))
+ return Check ? &V
+ : BitCastInst::CreatePointerBitCastOrAddrSpaceCast(&V, &Ty,
+ "", CtxI);
+ return nullptr;
+ }
+
+ /// Reproduce \p I with type \p Ty or return nullptr if that is not posisble.
+ /// If \p Check is true we will only verify such an operation would suceed and
+ /// return a non-nullptr value if that is the case. No IR is generated or
+ /// modified.
+ static Value *reproduceInst(Attributor &A,
+ const AbstractAttribute &QueryingAA,
+ Instruction &I, Type &Ty, Instruction *CtxI,
+ bool Check, ValueToValueMapTy &VMap) {
+ assert(CtxI && "Cannot reproduce an instruction without context!");
+ if (Check && (I.mayReadFromMemory() ||
+ !isSafeToSpeculativelyExecute(&I, CtxI, /* DT */ nullptr,
+ /* TLI */ nullptr)))
+ return nullptr;
+ for (Value *Op : I.operands()) {
+ Value *NewOp = reproduceValue(A, QueryingAA, *Op, Ty, CtxI, Check, VMap);
+ if (!NewOp) {
+ assert(Check && "Manifest of new value unexpectedly failed!");
+ return nullptr;
+ }
+ if (!Check)
+ VMap[Op] = NewOp;
+ }
+ if (Check)
+ return &I;
+
+ Instruction *CloneI = I.clone();
+ // TODO: Try to salvage debug information here.
+ CloneI->setDebugLoc(DebugLoc());
+ VMap[&I] = CloneI;
+ CloneI->insertBefore(CtxI);
+ RemapInstruction(CloneI, VMap);
+ return CloneI;
+ }
+
+ /// Reproduce \p V with type \p Ty or return nullptr if that is not posisble.
+ /// If \p Check is true we will only verify such an operation would suceed and
+ /// return a non-nullptr value if that is the case. No IR is generated or
+ /// modified.
+ static Value *reproduceValue(Attributor &A,
+ const AbstractAttribute &QueryingAA, Value &V,
+ Type &Ty, Instruction *CtxI, bool Check,
+ ValueToValueMapTy &VMap) {
+ if (const auto &NewV = VMap.lookup(&V))
+ return NewV;
+ bool UsedAssumedInformation = false;
+ Optional<Value *> SimpleV =
+ A.getAssumedSimplified(V, QueryingAA, UsedAssumedInformation);
+ if (!SimpleV)
+ return PoisonValue::get(&Ty);
+ Value *EffectiveV = &V;
+ if (SimpleV.getValue())
+ EffectiveV = SimpleV.getValue();
+ if (auto *C = dyn_cast<Constant>(EffectiveV))
+ if (!C->canTrap())
+ return C;
+ if (CtxI && AA::isValidAtPosition(AA::ValueAndContext(*EffectiveV, *CtxI),
+ A.getInfoCache()))
+ return ensureType(A, *EffectiveV, Ty, CtxI, Check);
+ if (auto *I = dyn_cast<Instruction>(EffectiveV))
+ if (Value *NewV = reproduceInst(A, QueryingAA, *I, Ty, CtxI, Check, VMap))
+ return ensureType(A, *NewV, Ty, CtxI, Check);
+ return nullptr;
+ }
+
/// Return a value we can use as replacement for the associated one, or
/// nullptr if we don't have one that makes sense.
- Value *getReplacementValue(Attributor &A) const {
- Value *NewV;
- NewV = SimplifiedAssociatedValue.hasValue()
- ? SimplifiedAssociatedValue.getValue()
- : UndefValue::get(getAssociatedType());
- if (!NewV)
- return nullptr;
- NewV = AA::getWithType(*NewV, *getAssociatedType());
- if (!NewV || NewV == &getAssociatedValue())
- return nullptr;
- const Instruction *CtxI = getCtxI();
- if (CtxI && !AA::isValidAtPosition(*NewV, *CtxI, A.getInfoCache()))
- return nullptr;
- if (!CtxI && !AA::isValidInScope(*NewV, getAnchorScope()))
- return nullptr;
- return NewV;
+ Value *manifestReplacementValue(Attributor &A, Instruction *CtxI) const {
+ Value *NewV = SimplifiedAssociatedValue
+ ? SimplifiedAssociatedValue.getValue()
+ : UndefValue::get(getAssociatedType());
+ if (NewV && NewV != &getAssociatedValue()) {
+ ValueToValueMapTy VMap;
+ // First verify we can reprduce the value with the required type at the
+ // context location before we actually start modifying the IR.
+ if (reproduceValue(A, *this, *NewV, *getAssociatedType(), CtxI,
+ /* CheckOnly */ true, VMap))
+ return reproduceValue(A, *this, *NewV, *getAssociatedType(), CtxI,
+ /* CheckOnly */ false, VMap);
+ }
+ return nullptr;
}
/// Helper function for querying AAValueSimplify and updating candicate.
@@ -5300,14 +5576,14 @@ struct AAValueSimplifyImpl : AAValueSimplify {
const auto &AA =
A.getAAFor<AAType>(*this, getIRPosition(), DepClassTy::NONE);
- Optional<ConstantInt *> COpt = AA.getAssumedConstantInt(A);
+ Optional<Constant *> COpt = AA.getAssumedConstant(A);
- if (!COpt.hasValue()) {
+ if (!COpt) {
SimplifiedAssociatedValue = llvm::None;
A.recordDependence(AA, *this, DepClassTy::OPTIONAL);
return true;
}
- if (auto *C = COpt.getValue()) {
+ if (auto *C = *COpt) {
SimplifiedAssociatedValue = C;
A.recordDependence(AA, *this, DepClassTy::OPTIONAL);
return true;
@@ -5318,7 +5594,7 @@ struct AAValueSimplifyImpl : AAValueSimplify {
bool askSimplifiedValueForOtherAAs(Attributor &A) {
if (askSimplifiedValueFor<AAValueConstantRange>(A))
return true;
- if (askSimplifiedValueFor<AAPotentialValues>(A))
+ if (askSimplifiedValueFor<AAPotentialConstantValues>(A))
return true;
return false;
}
@@ -5326,14 +5602,18 @@ struct AAValueSimplifyImpl : AAValueSimplify {
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
ChangeStatus Changed = ChangeStatus::UNCHANGED;
- if (getAssociatedValue().user_empty())
- return Changed;
-
- if (auto *NewV = getReplacementValue(A)) {
- LLVM_DEBUG(dbgs() << "[ValueSimplify] " << getAssociatedValue() << " -> "
- << *NewV << " :: " << *this << "\n");
- if (A.changeValueAfterManifest(getAssociatedValue(), *NewV))
- Changed = ChangeStatus::CHANGED;
+ for (auto &U : getAssociatedValue().uses()) {
+ // Check if we need to adjust the insertion point to make sure the IR is
+ // valid.
+ Instruction *IP = dyn_cast<Instruction>(U.getUser());
+ if (auto *PHI = dyn_cast_or_null<PHINode>(IP))
+ IP = PHI->getIncomingBlock(U)->getTerminator();
+ if (auto *NewV = manifestReplacementValue(A, IP)) {
+ LLVM_DEBUG(dbgs() << "[ValueSimplify] " << getAssociatedValue()
+ << " -> " << *NewV << " :: " << *this << "\n");
+ if (A.changeUseAfterManifest(U, *NewV))
+ Changed = ChangeStatus::CHANGED;
+ }
}
return Changed | AAValueSimplify::manifest(A);
@@ -5344,73 +5624,6 @@ struct AAValueSimplifyImpl : AAValueSimplify {
SimplifiedAssociatedValue = &getAssociatedValue();
return AAValueSimplify::indicatePessimisticFixpoint();
}
-
- static bool handleLoad(Attributor &A, const AbstractAttribute &AA,
- LoadInst &L, function_ref<bool(Value &)> Union) {
- auto UnionWrapper = [&](Value &V, Value &Obj) {
- if (isa<AllocaInst>(Obj))
- return Union(V);
- if (!AA::isDynamicallyUnique(A, AA, V))
- return false;
- if (!AA::isValidAtPosition(V, L, A.getInfoCache()))
- return false;
- return Union(V);
- };
-
- Value &Ptr = *L.getPointerOperand();
- SmallVector<Value *, 8> Objects;
- if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, AA, &L))
- return false;
-
- const auto *TLI =
- A.getInfoCache().getTargetLibraryInfoForFunction(*L.getFunction());
- for (Value *Obj : Objects) {
- LLVM_DEBUG(dbgs() << "Visit underlying object " << *Obj << "\n");
- if (isa<UndefValue>(Obj))
- continue;
- if (isa<ConstantPointerNull>(Obj)) {
- // A null pointer access can be undefined but any offset from null may
- // be OK. We do not try to optimize the latter.
- bool UsedAssumedInformation = false;
- if (!NullPointerIsDefined(L.getFunction(),
- Ptr.getType()->getPointerAddressSpace()) &&
- A.getAssumedSimplified(Ptr, AA, UsedAssumedInformation) == Obj)
- continue;
- return false;
- }
- Constant *InitialVal = AA::getInitialValueForObj(*Obj, *L.getType(), TLI);
- if (!InitialVal || !Union(*InitialVal))
- return false;
-
- LLVM_DEBUG(dbgs() << "Underlying object amenable to load-store "
- "propagation, checking accesses next.\n");
-
- auto CheckAccess = [&](const AAPointerInfo::Access &Acc, bool IsExact) {
- LLVM_DEBUG(dbgs() << " - visit access " << Acc << "\n");
- if (Acc.isWrittenValueYetUndetermined())
- return true;
- Value *Content = Acc.getWrittenValue();
- if (!Content)
- return false;
- Value *CastedContent =
- AA::getWithType(*Content, *AA.getAssociatedType());
- if (!CastedContent)
- return false;
- if (IsExact)
- return UnionWrapper(*CastedContent, *Obj);
- if (auto *C = dyn_cast<Constant>(CastedContent))
- if (C->isNullValue() || C->isAllOnesValue() || isa<UndefValue>(C))
- return UnionWrapper(*CastedContent, *Obj);
- return false;
- };
-
- auto &PI = A.getAAFor<AAPointerInfo>(AA, IRPosition::value(*Obj),
- DepClassTy::REQUIRED);
- if (!PI.forallInterferingWrites(A, AA, L, CheckAccess))
- return false;
- }
- return true;
- }
};
struct AAValueSimplifyArgument final : AAValueSimplifyImpl {
@@ -5425,15 +5638,6 @@ struct AAValueSimplifyArgument final : AAValueSimplifyImpl {
Attribute::StructRet, Attribute::Nest, Attribute::ByVal},
/* IgnoreSubsumingPositions */ true))
indicatePessimisticFixpoint();
-
- // FIXME: This is a hack to prevent us from propagating function poiner in
- // the new pass manager CGSCC pass as it creates call edges the
- // CallGraphUpdater cannot handle yet.
- Value &V = getAssociatedValue();
- if (V.getType()->isPointerTy() &&
- V.getType()->getPointerElementType()->isFunctionTy() &&
- !A.isModulePass())
- indicatePessimisticFixpoint();
}
/// See AbstractAttribute::updateImpl(...).
@@ -5466,7 +5670,7 @@ struct AAValueSimplifyArgument final : AAValueSimplifyImpl {
bool UsedAssumedInformation = false;
Optional<Constant *> SimpleArgOp =
A.getAssumedConstant(ACSArgPos, *this, UsedAssumedInformation);
- if (!SimpleArgOp.hasValue())
+ if (!SimpleArgOp)
return true;
if (!SimpleArgOp.getValue())
return false;
@@ -5477,14 +5681,14 @@ struct AAValueSimplifyArgument final : AAValueSimplifyImpl {
// Generate a answer specific to a call site context.
bool Success;
- bool AllCallSitesKnown;
+ bool UsedAssumedInformation = false;
if (hasCallBaseContext() &&
getCallBaseContext()->getCalledFunction() == Arg->getParent())
Success = PredForCallSite(
AbstractCallSite(&getCallBaseContext()->getCalledOperandUse()));
else
Success = A.checkForAllCallSites(PredForCallSite, *this, true,
- AllCallSitesKnown);
+ UsedAssumedInformation);
if (!Success)
if (!askSimplifiedValueForOtherAAs(A))
@@ -5516,12 +5720,16 @@ struct AAValueSimplifyReturned : AAValueSimplifyImpl {
ChangeStatus updateImpl(Attributor &A) override {
auto Before = SimplifiedAssociatedValue;
- auto PredForReturned = [&](Value &V) {
- return checkAndUpdate(A, *this,
- IRPosition::value(V, getCallBaseContext()));
+ auto ReturnInstCB = [&](Instruction &I) {
+ auto &RI = cast<ReturnInst>(I);
+ return checkAndUpdate(
+ A, *this,
+ IRPosition::value(*RI.getReturnValue(), getCallBaseContext()));
};
- if (!A.checkForAllReturnedValues(PredForReturned, *this))
+ bool UsedAssumedInformation = false;
+ if (!A.checkForAllInstructions(ReturnInstCB, *this, {Instruction::Ret},
+ UsedAssumedInformation))
if (!askSimplifiedValueForOtherAAs(A))
return indicatePessimisticFixpoint();
@@ -5531,29 +5739,9 @@ struct AAValueSimplifyReturned : AAValueSimplifyImpl {
}
ChangeStatus manifest(Attributor &A) override {
- ChangeStatus Changed = ChangeStatus::UNCHANGED;
-
- if (auto *NewV = getReplacementValue(A)) {
- auto PredForReturned =
- [&](Value &, const SmallSetVector<ReturnInst *, 4> &RetInsts) {
- for (ReturnInst *RI : RetInsts) {
- Value *ReturnedVal = RI->getReturnValue();
- if (ReturnedVal == NewV || isa<UndefValue>(ReturnedVal))
- return true;
- assert(RI->getFunction() == getAnchorScope() &&
- "ReturnInst in wrong function!");
- LLVM_DEBUG(dbgs()
- << "[ValueSimplify] " << *ReturnedVal << " -> "
- << *NewV << " in " << *RI << " :: " << *this << "\n");
- if (A.changeUseAfterManifest(RI->getOperandUse(0), *NewV))
- Changed = ChangeStatus::CHANGED;
- }
- return true;
- };
- A.checkForAllReturnedValuesAndReturnInsts(PredForReturned, *this);
- }
-
- return Changed | AAValueSimplify::manifest(A);
+ // We queried AAValueSimplify for the returned values so they will be
+ // replaced if a simplified form was found. Nothing to do here.
+ return ChangeStatus::UNCHANGED;
}
/// See AbstractAttribute::trackStatistics()
@@ -5597,7 +5785,7 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl {
const auto &SimplifiedLHS =
A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()),
*this, UsedAssumedInformation);
- if (!SimplifiedLHS.hasValue())
+ if (!SimplifiedLHS)
return true;
if (!SimplifiedLHS.getValue())
return false;
@@ -5606,7 +5794,7 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl {
const auto &SimplifiedRHS =
A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()),
*this, UsedAssumedInformation);
- if (!SimplifiedRHS.hasValue())
+ if (!SimplifiedRHS)
return true;
if (!SimplifiedRHS.getValue())
return false;
@@ -5662,15 +5850,6 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl {
return true;
}
- bool updateWithLoad(Attributor &A, LoadInst &L) {
- auto Union = [&](Value &V) {
- SimplifiedAssociatedValue = AA::combineOptionalValuesInAAValueLatice(
- SimplifiedAssociatedValue, &V, L.getType());
- return SimplifiedAssociatedValue != Optional<Value *>(nullptr);
- };
- return handleLoad(A, *this, L, Union);
- }
-
/// Use the generic, non-optimistic InstSimplfy functionality if we managed to
/// simplify any operand of the instruction \p I. Return true if successful,
/// in that case SimplifiedAssociatedValue will be updated.
@@ -5686,7 +5865,7 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl {
*this, UsedAssumedInformation);
// If we are not sure about any operand we are not sure about the entire
// instruction, we'll wait.
- if (!SimplifiedOp.hasValue())
+ if (!SimplifiedOp)
return true;
if (SimplifiedOp.getValue())
@@ -5714,7 +5893,7 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl {
const DataLayout &DL = I.getModule()->getDataLayout();
SimplifyQuery Q(DL, TLI, DT, AC, &I);
if (Value *SimplifiedI =
- SimplifyInstructionWithOperands(&I, NewOps, Q, ORE)) {
+ simplifyInstructionWithOperands(&I, NewOps, Q, ORE)) {
SimplifiedAssociatedValue = AA::combineOptionalValuesInAAValueLatice(
SimplifiedAssociatedValue, SimplifiedI, I.getType());
return SimplifiedAssociatedValue != Optional<Value *>(nullptr);
@@ -5726,6 +5905,36 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl {
ChangeStatus updateImpl(Attributor &A) override {
auto Before = SimplifiedAssociatedValue;
+ // Do not simplify loads that are only used in llvm.assume if we cannot also
+ // remove all stores that may feed into the load. The reason is that the
+ // assume is probably worth something as long as the stores are around.
+ if (auto *LI = dyn_cast<LoadInst>(&getAssociatedValue())) {
+ InformationCache &InfoCache = A.getInfoCache();
+ if (InfoCache.isOnlyUsedByAssume(*LI)) {
+ SmallSetVector<Value *, 4> PotentialCopies;
+ SmallSetVector<Instruction *, 4> PotentialValueOrigins;
+ bool UsedAssumedInformation = false;
+ if (AA::getPotentiallyLoadedValues(A, *LI, PotentialCopies,
+ PotentialValueOrigins, *this,
+ UsedAssumedInformation,
+ /* OnlyExact */ true)) {
+ if (!llvm::all_of(PotentialValueOrigins, [&](Instruction *I) {
+ if (!I)
+ return true;
+ if (auto *SI = dyn_cast<StoreInst>(I))
+ return A.isAssumedDead(SI->getOperandUse(0), this,
+ /* LivenessAA */ nullptr,
+ UsedAssumedInformation,
+ /* CheckBBLivenessOnly */ false);
+ return A.isAssumedDead(*I, this, /* LivenessAA */ nullptr,
+ UsedAssumedInformation,
+ /* CheckBBLivenessOnly */ false);
+ }))
+ return indicatePessimisticFixpoint();
+ }
+ }
+ }
+
auto VisitValueCB = [&](Value &V, const Instruction *CtxI, bool &,
bool Stripped) -> bool {
auto &AA = A.getAAFor<AAValueSimplify>(
@@ -5734,9 +5943,6 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl {
if (!Stripped && this == &AA) {
if (auto *I = dyn_cast<Instruction>(&V)) {
- if (auto *LI = dyn_cast<LoadInst>(&V))
- if (updateWithLoad(A, *LI))
- return true;
if (auto *Cmp = dyn_cast<CmpInst>(&V))
if (handleCmp(A, *Cmp))
return true;
@@ -5754,8 +5960,10 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl {
};
bool Dummy = false;
+ bool UsedAssumedInformation = false;
if (!genericValueTraversal<bool>(A, getIRPosition(), *this, Dummy,
VisitValueCB, getCtxI(),
+ UsedAssumedInformation,
/* UseValueSimplify */ false))
if (!askSimplifiedValueForOtherAAs(A))
return indicatePessimisticFixpoint();
@@ -5806,8 +6014,23 @@ struct AAValueSimplifyCallSiteReturned : AAValueSimplifyImpl {
void initialize(Attributor &A) override {
AAValueSimplifyImpl::initialize(A);
- if (!getAssociatedFunction())
+ Function *Fn = getAssociatedFunction();
+ if (!Fn) {
indicatePessimisticFixpoint();
+ return;
+ }
+ for (Argument &Arg : Fn->args()) {
+ if (Arg.hasReturnedAttr()) {
+ auto IRP = IRPosition::callsite_argument(*cast<CallBase>(getCtxI()),
+ Arg.getArgNo());
+ if (IRP.getPositionKind() == IRPosition::IRP_CALL_SITE_ARGUMENT &&
+ checkAndUpdate(A, *this, IRP))
+ indicateOptimisticFixpoint();
+ else
+ indicatePessimisticFixpoint();
+ return;
+ }
+ }
}
/// See AbstractAttribute::updateImpl(...).
@@ -5845,8 +6068,13 @@ struct AAValueSimplifyCallSiteArgument : AAValueSimplifyFloating {
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
ChangeStatus Changed = ChangeStatus::UNCHANGED;
+ // TODO: We should avoid simplification duplication to begin with.
+ auto *FloatAA = A.lookupAAFor<AAValueSimplify>(
+ IRPosition::value(getAssociatedValue()), this, DepClassTy::NONE);
+ if (FloatAA && FloatAA->getState().isValidState())
+ return Changed;
- if (auto *NewV = getReplacementValue(A)) {
+ if (auto *NewV = manifestReplacementValue(A, getCtxI())) {
Use &U = cast<CallBase>(&getAnchorValue())
->getArgOperandUse(getCallSiteArgNo());
if (A.changeUseAfterManifest(U, *NewV))
@@ -5860,8 +6088,10 @@ struct AAValueSimplifyCallSiteArgument : AAValueSimplifyFloating {
STATS_DECLTRACK_CSARG_ATTR(value_simplify)
}
};
+} // namespace
/// ----------------------- Heap-To-Stack Conversion ---------------------------
+namespace {
struct AAHeapToStackFunction final : public AAHeapToStack {
struct AllocationInfo {
@@ -5883,7 +6113,7 @@ struct AAHeapToStackFunction final : public AAHeapToStack {
bool HasPotentiallyFreeingUnknownUses = false;
/// The set of free calls that use this allocation.
- SmallPtrSet<CallBase *, 1> PotentialFreeCalls{};
+ SmallSetVector<CallBase *, 1> PotentialFreeCalls{};
};
struct DeallocationInfo {
@@ -5895,7 +6125,7 @@ struct AAHeapToStackFunction final : public AAHeapToStack {
bool MightFreeUnknownObjects = false;
/// The set of allocation calls that are potentially freed.
- SmallPtrSet<CallBase *, 1> PotentialAllocationCalls{};
+ SmallSetVector<CallBase *, 1> PotentialAllocationCalls{};
};
AAHeapToStackFunction(const IRPosition &IRP, Attributor &A)
@@ -5905,9 +6135,9 @@ struct AAHeapToStackFunction final : public AAHeapToStack {
// Ensure we call the destructor so we release any memory allocated in the
// sets.
for (auto &It : AllocationInfos)
- It.getSecond()->~AllocationInfo();
+ It.second->~AllocationInfo();
for (auto &It : DeallocationInfos)
- It.getSecond()->~DeallocationInfo();
+ It.second->~DeallocationInfo();
}
void initialize(Attributor &A) override {
@@ -5932,7 +6162,8 @@ struct AAHeapToStackFunction final : public AAHeapToStack {
if (nullptr != getInitialValueOfAllocation(CB, TLI, I8Ty)) {
AllocationInfo *AI = new (A.Allocator) AllocationInfo{CB};
AllocationInfos[CB] = AI;
- TLI->getLibFunc(*CB, AI->LibraryFunctionId);
+ if (TLI)
+ TLI->getLibFunc(*CB, AI->LibraryFunctionId);
}
}
return true;
@@ -5945,6 +6176,16 @@ struct AAHeapToStackFunction final : public AAHeapToStack {
/* CheckPotentiallyDead */ true);
(void)Success;
assert(Success && "Did not expect the call base visit callback to fail!");
+
+ Attributor::SimplifictionCallbackTy SCB =
+ [](const IRPosition &, const AbstractAttribute *,
+ bool &) -> Optional<Value *> { return nullptr; };
+ for (const auto &It : AllocationInfos)
+ A.registerSimplificationCallback(IRPosition::callsite_returned(*It.first),
+ SCB);
+ for (const auto &It : DeallocationInfos)
+ A.registerSimplificationCallback(IRPosition::callsite_returned(*It.first),
+ SCB);
}
const std::string getAsStr() const override {
@@ -5971,7 +6212,8 @@ struct AAHeapToStackFunction final : public AAHeapToStack {
bool isAssumedHeapToStack(const CallBase &CB) const override {
if (isValidState())
- if (AllocationInfo *AI = AllocationInfos.lookup(&CB))
+ if (AllocationInfo *AI =
+ AllocationInfos.lookup(const_cast<CallBase *>(&CB)))
return AI->Status != AllocationInfo::INVALID;
return false;
}
@@ -6000,6 +6242,17 @@ struct AAHeapToStackFunction final : public AAHeapToStack {
Function *F = getAnchorScope();
const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F);
+ LoopInfo *LI =
+ A.getInfoCache().getAnalysisResultForFunction<LoopAnalysis>(*F);
+ Optional<bool> MayContainIrreducibleControl;
+ auto IsInLoop = [&](BasicBlock &BB) {
+ if (!MayContainIrreducibleControl.has_value())
+ MayContainIrreducibleControl = mayContainIrreducibleControl(*F, LI);
+ if (MayContainIrreducibleControl.value())
+ return true;
+ return LI->getLoopFor(&BB) != nullptr;
+ };
+
for (auto &It : AllocationInfos) {
AllocationInfo &AI = *It.second;
if (AI.Status == AllocationInfo::INVALID)
@@ -6026,13 +6279,13 @@ struct AAHeapToStackFunction final : public AAHeapToStack {
else
A.emitRemark<OptimizationRemark>(AI.CB, "HeapToStack", Remark);
+ const DataLayout &DL = A.getInfoCache().getDL();
Value *Size;
Optional<APInt> SizeAPI = getSize(A, *this, AI);
- if (SizeAPI.hasValue()) {
+ if (SizeAPI) {
Size = ConstantInt::get(AI.CB->getContext(), *SizeAPI);
} else {
LLVMContext &Ctx = AI.CB->getContext();
- auto &DL = A.getInfoCache().getDL();
ObjectSizeOpts Opts;
ObjectSizeOffsetEvaluator Eval(DL, TLI, Ctx, Opts);
SizeOffsetEvalType SizeOffsetPair = Eval.compute(AI.CB);
@@ -6041,32 +6294,36 @@ struct AAHeapToStackFunction final : public AAHeapToStack {
Size = SizeOffsetPair.first;
}
+ Instruction *IP = (!SizeAPI.has_value() || IsInLoop(*AI.CB->getParent()))
+ ? AI.CB
+ : &F->getEntryBlock().front();
+
Align Alignment(1);
if (MaybeAlign RetAlign = AI.CB->getRetAlign())
- Alignment = max(Alignment, RetAlign);
+ Alignment = std::max(Alignment, *RetAlign);
if (Value *Align = getAllocAlignment(AI.CB, TLI)) {
Optional<APInt> AlignmentAPI = getAPInt(A, *this, *Align);
- assert(AlignmentAPI.hasValue() &&
+ assert(AlignmentAPI && AlignmentAPI.getValue().getZExtValue() > 0 &&
"Expected an alignment during manifest!");
- Alignment =
- max(Alignment, MaybeAlign(AlignmentAPI.getValue().getZExtValue()));
+ Alignment = std::max(
+ Alignment, assumeAligned(AlignmentAPI.getValue().getZExtValue()));
}
- unsigned AS = cast<PointerType>(AI.CB->getType())->getAddressSpace();
- Instruction *Alloca =
- new AllocaInst(Type::getInt8Ty(F->getContext()), AS, Size, Alignment,
- "", AI.CB->getNextNode());
+ // TODO: Hoist the alloca towards the function entry.
+ unsigned AS = DL.getAllocaAddrSpace();
+ Instruction *Alloca = new AllocaInst(Type::getInt8Ty(F->getContext()), AS,
+ Size, Alignment, "", IP);
if (Alloca->getType() != AI.CB->getType())
- Alloca = new BitCastInst(Alloca, AI.CB->getType(), "malloc_bc",
- Alloca->getNextNode());
+ Alloca = BitCastInst::CreatePointerBitCastOrAddrSpaceCast(
+ Alloca, AI.CB->getType(), "malloc_cast", AI.CB);
auto *I8Ty = Type::getInt8Ty(F->getContext());
auto *InitVal = getInitialValueOfAllocation(AI.CB, TLI, I8Ty);
assert(InitVal &&
"Must be able to materialize initial memory state of allocation");
- A.changeValueAfterManifest(*AI.CB, *Alloca);
+ A.changeAfterManifest(IRPosition::inst(*AI.CB), *Alloca);
if (auto *II = dyn_cast<InvokeInst>(AI.CB)) {
auto *NBB = II->getNormalDest();
@@ -6095,7 +6352,7 @@ struct AAHeapToStackFunction final : public AAHeapToStack {
bool UsedAssumedInformation = false;
Optional<Constant *> SimpleV =
A.getAssumedConstant(V, AA, UsedAssumedInformation);
- if (!SimpleV.hasValue())
+ if (!SimpleV)
return APInt(64, 0);
if (auto *CI = dyn_cast_or_null<ConstantInt>(SimpleV.getValue()))
return CI->getValue();
@@ -6120,11 +6377,11 @@ struct AAHeapToStackFunction final : public AAHeapToStack {
/// Collection of all malloc-like calls in a function with associated
/// information.
- DenseMap<CallBase *, AllocationInfo *> AllocationInfos;
+ MapVector<CallBase *, AllocationInfo *> AllocationInfos;
/// Collection of all free-like calls in a function with associated
/// information.
- DenseMap<CallBase *, DeallocationInfo *> DeallocationInfos;
+ MapVector<CallBase *, DeallocationInfo *> DeallocationInfos;
ChangeStatus updateImpl(Attributor &A) override;
};
@@ -6167,7 +6424,8 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
// branches etc.
SmallVector<Value *, 8> Objects;
if (!AA::getAssumedUnderlyingObjects(A, *DI.CB->getArgOperand(0), Objects,
- *this, DI.CB)) {
+ *this, DI.CB,
+ UsedAssumedInformation)) {
LLVM_DEBUG(
dbgs()
<< "[H2S] Unexpected failure in getAssumedUnderlyingObjects!\n");
@@ -6239,6 +6497,8 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
dbgs() << "[H2S] unique free call might free unknown allocations\n");
return false;
}
+ if (DI->PotentialAllocationCalls.empty())
+ return true;
if (DI->PotentialAllocationCalls.size() > 1) {
LLVM_DEBUG(dbgs() << "[H2S] unique free call might free "
<< DI->PotentialAllocationCalls.size()
@@ -6316,7 +6576,7 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
if (ValidUsesOnly &&
AI.LibraryFunctionId == LibFunc___kmpc_alloc_shared)
- A.emitRemark<OptimizationRemarkMissed>(AI.CB, "OMP113", Remark);
+ A.emitRemark<OptimizationRemarkMissed>(CB, "OMP113", Remark);
LLVM_DEBUG(dbgs() << "[H2S] Bad user: " << *UserI << "\n");
ValidUsesOnly = false;
@@ -6348,7 +6608,8 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
continue;
if (Value *Align = getAllocAlignment(AI.CB, TLI)) {
- if (!getAPInt(A, *this, *Align)) {
+ Optional<APInt> APAlign = getAPInt(A, *this, *Align);
+ if (!APAlign) {
// Can't generate an alloca which respects the required alignment
// on the allocation.
LLVM_DEBUG(dbgs() << "[H2S] Unknown allocation alignment: " << *AI.CB
@@ -6356,14 +6617,23 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
AI.Status = AllocationInfo::INVALID;
Changed = ChangeStatus::CHANGED;
continue;
+ } else {
+ if (APAlign->ugt(llvm::Value::MaximumAlignment) ||
+ !APAlign->isPowerOf2()) {
+ LLVM_DEBUG(dbgs() << "[H2S] Invalid allocation alignment: " << APAlign
+ << "\n");
+ AI.Status = AllocationInfo::INVALID;
+ Changed = ChangeStatus::CHANGED;
+ continue;
+ }
}
}
if (MaxHeapToStackSize != -1) {
Optional<APInt> Size = getSize(A, *this, AI);
- if (!Size.hasValue() || Size.getValue().ugt(MaxHeapToStackSize)) {
+ if (!Size || Size.getValue().ugt(MaxHeapToStackSize)) {
LLVM_DEBUG({
- if (!Size.hasValue())
+ if (!Size)
dbgs() << "[H2S] Unknown allocation size: " << *AI.CB << "\n";
else
dbgs() << "[H2S] Allocation size too large: " << *AI.CB << " vs. "
@@ -6395,8 +6665,10 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
return Changed;
}
+} // namespace
/// ----------------------- Privatizable Pointers ------------------------------
+namespace {
struct AAPrivatizablePtrImpl : public AAPrivatizablePtr {
AAPrivatizablePtrImpl(const IRPosition &IRP, Attributor &A)
: AAPrivatizablePtr(IRP, A), PrivatizableType(llvm::None) {}
@@ -6414,9 +6686,9 @@ struct AAPrivatizablePtrImpl : public AAPrivatizablePtr {
/// Return a privatizable type that encloses both T0 and T1.
/// TODO: This is merely a stub for now as we should manage a mapping as well.
Optional<Type *> combineTypes(Optional<Type *> T0, Optional<Type *> T1) {
- if (!T0.hasValue())
+ if (!T0)
return T1;
- if (!T1.hasValue())
+ if (!T1)
return T0;
if (T0 == T1)
return T0;
@@ -6445,11 +6717,13 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
Optional<Type *> identifyPrivatizableType(Attributor &A) override {
// If this is a byval argument and we know all the call sites (so we can
// rewrite them), there is no need to check them explicitly.
- bool AllCallSitesKnown;
- if (getIRPosition().hasAttr(Attribute::ByVal) &&
+ bool UsedAssumedInformation = false;
+ SmallVector<Attribute, 1> Attrs;
+ getAttrs({Attribute::ByVal}, Attrs, /* IgnoreSubsumingPositions */ true);
+ if (!Attrs.empty() &&
A.checkForAllCallSites([](AbstractCallSite ACS) { return true; }, *this,
- true, AllCallSitesKnown))
- return getAssociatedValue().getType()->getPointerElementType();
+ true, UsedAssumedInformation))
+ return Attrs[0].getValueAsType();
Optional<Type *> Ty;
unsigned ArgNo = getIRPosition().getCallSiteArgNo();
@@ -6474,9 +6748,9 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
LLVM_DEBUG({
dbgs() << "[AAPrivatizablePtr] ACSPos: " << ACSArgPos << ", CSTy: ";
- if (CSTy.hasValue() && CSTy.getValue())
+ if (CSTy && CSTy.getValue())
CSTy.getValue()->print(dbgs());
- else if (CSTy.hasValue())
+ else if (CSTy)
dbgs() << "<nullptr>";
else
dbgs() << "<none>";
@@ -6486,19 +6760,20 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
LLVM_DEBUG({
dbgs() << " : New Type: ";
- if (Ty.hasValue() && Ty.getValue())
+ if (Ty && Ty.getValue())
Ty.getValue()->print(dbgs());
- else if (Ty.hasValue())
+ else if (Ty)
dbgs() << "<nullptr>";
else
dbgs() << "<none>";
dbgs() << "\n";
});
- return !Ty.hasValue() || Ty.getValue();
+ return !Ty || Ty.getValue();
};
- if (!A.checkForAllCallSites(CallSiteCheck, *this, true, AllCallSitesKnown))
+ if (!A.checkForAllCallSites(CallSiteCheck, *this, true,
+ UsedAssumedInformation))
return nullptr;
return Ty;
}
@@ -6506,7 +6781,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
PrivatizableType = identifyPrivatizableType(A);
- if (!PrivatizableType.hasValue())
+ if (!PrivatizableType)
return ChangeStatus::UNCHANGED;
if (!PrivatizableType.getValue())
return indicatePessimisticFixpoint();
@@ -6518,8 +6793,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
// Avoid arguments with padding for now.
if (!getIRPosition().hasAttr(Attribute::ByVal) &&
- !ArgumentPromotionPass::isDenselyPacked(PrivatizableType.getValue(),
- A.getInfoCache().getDL())) {
+ !isDenselyPacked(*PrivatizableType, A.getInfoCache().getDL())) {
LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] Padding detected\n");
return indicatePessimisticFixpoint();
}
@@ -6527,7 +6801,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
// Collect the types that will replace the privatizable type in the function
// signature.
SmallVector<Type *, 16> ReplacementTypes;
- identifyReplacementTypes(PrivatizableType.getValue(), ReplacementTypes);
+ identifyReplacementTypes(*PrivatizableType, ReplacementTypes);
// Verify callee and caller agree on how the promoted argument would be
// passed.
@@ -6545,9 +6819,9 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
return TTI->areTypesABICompatible(
CB->getCaller(), CB->getCalledFunction(), ReplacementTypes);
};
- bool AllCallSitesKnown;
+ bool UsedAssumedInformation = false;
if (!A.checkForAllCallSites(CallSiteCheck, *this, true,
- AllCallSitesKnown)) {
+ UsedAssumedInformation)) {
LLVM_DEBUG(
dbgs() << "[AAPrivatizablePtr] ABI incompatibility detected for "
<< Fn.getName() << "\n");
@@ -6595,7 +6869,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
*this, IRPosition::argument(CBArg), DepClassTy::REQUIRED);
if (CBArgPrivAA.isValidState()) {
auto CBArgPrivTy = CBArgPrivAA.getPrivatizableType();
- if (!CBArgPrivTy.hasValue())
+ if (!CBArgPrivTy)
continue;
if (CBArgPrivTy.getValue() == PrivatizableType)
continue;
@@ -6642,7 +6916,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
DepClassTy::REQUIRED);
if (DCArgPrivAA.isValidState()) {
auto DCArgPrivTy = DCArgPrivAA.getPrivatizableType();
- if (!DCArgPrivTy.hasValue())
+ if (!DCArgPrivTy)
return true;
if (DCArgPrivTy.getValue() == PrivatizableType)
return true;
@@ -6674,7 +6948,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
};
if (!A.checkForAllCallSites(IsCompatiblePrivArgOfOtherCallSite, *this, true,
- AllCallSitesKnown))
+ UsedAssumedInformation))
return indicatePessimisticFixpoint();
return ChangeStatus::UNCHANGED;
@@ -6749,8 +7023,8 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
Type *PrivPtrType = PrivType->getPointerTo();
if (Base->getType() != PrivPtrType)
- Base = BitCastInst::CreateBitOrPointerCast(Base, PrivPtrType, "",
- ACS.getInstruction());
+ Base = BitCastInst::CreatePointerBitCastOrAddrSpaceCast(
+ Base, PrivPtrType, "", ACS.getInstruction());
// Traverse the type, build GEPs and loads.
if (auto *PrivStructType = dyn_cast<StructType>(PrivType)) {
@@ -6784,7 +7058,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
/// See AbstractAttribute::manifest(...)
ChangeStatus manifest(Attributor &A) override {
- if (!PrivatizableType.hasValue())
+ if (!PrivatizableType)
return ChangeStatus::UNCHANGED;
assert(PrivatizableType.getValue() && "Expected privatizable type!");
@@ -6817,14 +7091,16 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
Function &ReplacementFn, Function::arg_iterator ArgIt) {
BasicBlock &EntryBB = ReplacementFn.getEntryBlock();
Instruction *IP = &*EntryBB.getFirstInsertionPt();
- Instruction *AI = new AllocaInst(PrivatizableType.getValue(), 0,
+ const DataLayout &DL = IP->getModule()->getDataLayout();
+ unsigned AS = DL.getAllocaAddrSpace();
+ Instruction *AI = new AllocaInst(PrivatizableType.getValue(), AS,
Arg->getName() + ".priv", IP);
createInitialization(PrivatizableType.getValue(), *AI, ReplacementFn,
ArgIt->getArgNo(), *IP);
if (AI->getType() != Arg->getType())
- AI =
- BitCastInst::CreateBitOrPointerCast(AI, Arg->getType(), "", IP);
+ AI = BitCastInst::CreatePointerBitCastOrAddrSpaceCast(
+ AI, Arg->getType(), "", IP);
Arg->replaceAllUsesWith(AI);
for (CallInst *CI : TailCalls)
@@ -6841,8 +7117,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
// When no alignment is specified for the load instruction,
// natural alignment is assumed.
createReplacementValues(
- assumeAligned(AlignAA.getAssumedAlign()),
- PrivatizableType.getValue(), ACS,
+ AlignAA.getAssumedAlign(), *PrivatizableType, ACS,
ACS.getCallArgOperand(ARI.getReplacedArg().getArgNo()),
NewArgOperands);
};
@@ -6850,7 +7125,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
// Collect the types that will replace the privatizable type in the function
// signature.
SmallVector<Type *, 16> ReplacementTypes;
- identifyReplacementTypes(PrivatizableType.getValue(), ReplacementTypes);
+ identifyReplacementTypes(*PrivatizableType, ReplacementTypes);
// Register a rewrite of the argument.
if (A.registerFunctionSignatureRewrite(*Arg, ReplacementTypes,
@@ -6897,7 +7172,7 @@ struct AAPrivatizablePtrFloating : public AAPrivatizablePtrImpl {
auto &PrivArgAA = A.getAAFor<AAPrivatizablePtr>(
*this, IRPosition::argument(*Arg), DepClassTy::REQUIRED);
if (PrivArgAA.isAssumedPrivatizablePtr())
- return Obj->getType()->getPointerElementType();
+ return PrivArgAA.getPrivatizableType();
}
LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] Underlying object neither valid "
@@ -6926,7 +7201,7 @@ struct AAPrivatizablePtrCallSiteArgument final
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
PrivatizableType = identifyPrivatizableType(A);
- if (!PrivatizableType.hasValue())
+ if (!PrivatizableType)
return ChangeStatus::UNCHANGED;
if (!PrivatizableType.getValue())
return indicatePessimisticFixpoint();
@@ -6992,10 +7267,12 @@ struct AAPrivatizablePtrReturned final : public AAPrivatizablePtrFloating {
STATS_DECLTRACK_FNRET_ATTR(privatizable_ptr);
}
};
+} // namespace
/// -------------------- Memory Behavior Attributes ----------------------------
/// Includes read-none, read-only, and write-only.
/// ----------------------------------------------------------------------------
+namespace {
struct AAMemoryBehaviorImpl : public AAMemoryBehavior {
AAMemoryBehaviorImpl(const IRPosition &IRP, Attributor &A)
: AAMemoryBehavior(IRP, A) {}
@@ -7495,6 +7772,7 @@ void AAMemoryBehaviorFloating::analyzeUseIn(Attributor &A, const Use &U,
if (UserI->mayWriteToMemory())
removeAssumedBits(NO_WRITES);
}
+} // namespace
/// -------------------- Memory Locations Attributes ---------------------------
/// Includes read-none, argmemonly, inaccessiblememonly,
@@ -7528,6 +7806,7 @@ std::string AAMemoryLocation::getMemoryLocationsAsStr(
return S;
}
+namespace {
struct AAMemoryLocationImpl : public AAMemoryLocation {
AAMemoryLocationImpl(const IRPosition &IRP, Attributor &A)
@@ -7772,8 +8051,10 @@ void AAMemoryLocationImpl::categorizePtrValue(
<< getMemoryLocationsAsStr(State.getAssumed()) << "]\n");
SmallVector<Value *, 8> Objects;
+ bool UsedAssumedInformation = false;
if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, *this, &I,
- /* Intraprocedural */ true)) {
+ UsedAssumedInformation,
+ AA::Intraprocedural)) {
LLVM_DEBUG(
dbgs() << "[AAMemoryLocation] Pointer locations not categorized\n");
updateStateAndAccessesMap(State, NO_UNKOWN_MEM, &I, nullptr, Changed,
@@ -8042,9 +8323,11 @@ struct AAMemoryLocationCallSite final : AAMemoryLocationImpl {
STATS_DECLTRACK_CS_ATTR(readnone)
}
};
+} // namespace
/// ------------------ Value Constant Range Attribute -------------------------
+namespace {
struct AAValueConstantRangeImpl : AAValueConstantRange {
using StateType = IntegerRangeState;
AAValueConstantRangeImpl(const IRPosition &IRP, Attributor &A)
@@ -8379,7 +8662,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
const auto &SimplifiedLHS =
A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()),
*this, UsedAssumedInformation);
- if (!SimplifiedLHS.hasValue())
+ if (!SimplifiedLHS)
return true;
if (!SimplifiedLHS.getValue())
return false;
@@ -8388,7 +8671,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
const auto &SimplifiedRHS =
A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()),
*this, UsedAssumedInformation);
- if (!SimplifiedRHS.hasValue())
+ if (!SimplifiedRHS)
return true;
if (!SimplifiedRHS.getValue())
return false;
@@ -8432,7 +8715,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
const auto &SimplifiedOpV =
A.getAssumedSimplified(IRPosition::value(*OpV, getCallBaseContext()),
*this, UsedAssumedInformation);
- if (!SimplifiedOpV.hasValue())
+ if (!SimplifiedOpV)
return true;
if (!SimplifiedOpV.getValue())
return false;
@@ -8462,7 +8745,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
const auto &SimplifiedLHS =
A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()),
*this, UsedAssumedInformation);
- if (!SimplifiedLHS.hasValue())
+ if (!SimplifiedLHS)
return true;
if (!SimplifiedLHS.getValue())
return false;
@@ -8471,7 +8754,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
const auto &SimplifiedRHS =
A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()),
*this, UsedAssumedInformation);
- if (!SimplifiedRHS.hasValue())
+ if (!SimplifiedRHS)
return true;
if (!SimplifiedRHS.getValue())
return false;
@@ -8536,7 +8819,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
const auto &SimplifiedOpV =
A.getAssumedSimplified(IRPosition::value(V, getCallBaseContext()),
*this, UsedAssumedInformation);
- if (!SimplifiedOpV.hasValue())
+ if (!SimplifiedOpV)
return true;
if (!SimplifiedOpV.getValue())
return false;
@@ -8588,8 +8871,10 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
IntegerRangeState T(getBitWidth());
+ bool UsedAssumedInformation = false;
if (!genericValueTraversal<IntegerRangeState>(A, getIRPosition(), *this, T,
VisitValueCB, getCtxI(),
+ UsedAssumedInformation,
/* UseValueSimplify */ false))
return indicatePessimisticFixpoint();
@@ -8683,21 +8968,23 @@ struct AAValueConstantRangeCallSiteArgument : AAValueConstantRangeFloating {
STATS_DECLTRACK_CSARG_ATTR(value_range)
}
};
+} // namespace
/// ------------------ Potential Values Attribute -------------------------
-struct AAPotentialValuesImpl : AAPotentialValues {
+namespace {
+struct AAPotentialConstantValuesImpl : AAPotentialConstantValues {
using StateType = PotentialConstantIntValuesState;
- AAPotentialValuesImpl(const IRPosition &IRP, Attributor &A)
- : AAPotentialValues(IRP, A) {}
+ AAPotentialConstantValuesImpl(const IRPosition &IRP, Attributor &A)
+ : AAPotentialConstantValues(IRP, A) {}
/// See AbstractAttribute::initialize(..).
void initialize(Attributor &A) override {
if (A.hasSimplificationCallback(getIRPosition()))
indicatePessimisticFixpoint();
else
- AAPotentialValues::initialize(A);
+ AAPotentialConstantValues::initialize(A);
}
/// See AbstractAttribute::getAsStr().
@@ -8714,13 +9001,14 @@ struct AAPotentialValuesImpl : AAPotentialValues {
}
};
-struct AAPotentialValuesArgument final
- : AAArgumentFromCallSiteArguments<AAPotentialValues, AAPotentialValuesImpl,
+struct AAPotentialConstantValuesArgument final
+ : AAArgumentFromCallSiteArguments<AAPotentialConstantValues,
+ AAPotentialConstantValuesImpl,
PotentialConstantIntValuesState> {
- using Base =
- AAArgumentFromCallSiteArguments<AAPotentialValues, AAPotentialValuesImpl,
- PotentialConstantIntValuesState>;
- AAPotentialValuesArgument(const IRPosition &IRP, Attributor &A)
+ using Base = AAArgumentFromCallSiteArguments<AAPotentialConstantValues,
+ AAPotentialConstantValuesImpl,
+ PotentialConstantIntValuesState>;
+ AAPotentialConstantValuesArgument(const IRPosition &IRP, Attributor &A)
: Base(IRP, A) {}
/// See AbstractAttribute::initialize(..).
@@ -8738,11 +9026,12 @@ struct AAPotentialValuesArgument final
}
};
-struct AAPotentialValuesReturned
- : AAReturnedFromReturnedValues<AAPotentialValues, AAPotentialValuesImpl> {
- using Base =
- AAReturnedFromReturnedValues<AAPotentialValues, AAPotentialValuesImpl>;
- AAPotentialValuesReturned(const IRPosition &IRP, Attributor &A)
+struct AAPotentialConstantValuesReturned
+ : AAReturnedFromReturnedValues<AAPotentialConstantValues,
+ AAPotentialConstantValuesImpl> {
+ using Base = AAReturnedFromReturnedValues<AAPotentialConstantValues,
+ AAPotentialConstantValuesImpl>;
+ AAPotentialConstantValuesReturned(const IRPosition &IRP, Attributor &A)
: Base(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
@@ -8751,13 +9040,13 @@ struct AAPotentialValuesReturned
}
};
-struct AAPotentialValuesFloating : AAPotentialValuesImpl {
- AAPotentialValuesFloating(const IRPosition &IRP, Attributor &A)
- : AAPotentialValuesImpl(IRP, A) {}
+struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl {
+ AAPotentialConstantValuesFloating(const IRPosition &IRP, Attributor &A)
+ : AAPotentialConstantValuesImpl(IRP, A) {}
/// See AbstractAttribute::initialize(..).
void initialize(Attributor &A) override {
- AAPotentialValuesImpl::initialize(A);
+ AAPotentialConstantValuesImpl::initialize(A);
if (isAtFixpoint())
return;
@@ -8783,7 +9072,7 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
indicatePessimisticFixpoint();
- LLVM_DEBUG(dbgs() << "[AAPotentialValues] We give up: "
+ LLVM_DEBUG(dbgs() << "[AAPotentialConstantValues] We give up: "
<< getAssociatedValue() << "\n");
}
@@ -8891,7 +9180,7 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
const auto &SimplifiedLHS =
A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()),
*this, UsedAssumedInformation);
- if (!SimplifiedLHS.hasValue())
+ if (!SimplifiedLHS)
return ChangeStatus::UNCHANGED;
if (!SimplifiedLHS.getValue())
return indicatePessimisticFixpoint();
@@ -8900,7 +9189,7 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
const auto &SimplifiedRHS =
A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()),
*this, UsedAssumedInformation);
- if (!SimplifiedRHS.hasValue())
+ if (!SimplifiedRHS)
return ChangeStatus::UNCHANGED;
if (!SimplifiedRHS.getValue())
return indicatePessimisticFixpoint();
@@ -8909,18 +9198,18 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
return indicatePessimisticFixpoint();
- auto &LHSAA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*LHS),
- DepClassTy::REQUIRED);
+ auto &LHSAA = A.getAAFor<AAPotentialConstantValues>(
+ *this, IRPosition::value(*LHS), DepClassTy::REQUIRED);
if (!LHSAA.isValidState())
return indicatePessimisticFixpoint();
- auto &RHSAA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*RHS),
- DepClassTy::REQUIRED);
+ auto &RHSAA = A.getAAFor<AAPotentialConstantValues>(
+ *this, IRPosition::value(*RHS), DepClassTy::REQUIRED);
if (!RHSAA.isValidState())
return indicatePessimisticFixpoint();
- const DenseSet<APInt> &LHSAAPVS = LHSAA.getAssumedSet();
- const DenseSet<APInt> &RHSAAPVS = RHSAA.getAssumedSet();
+ const SetTy &LHSAAPVS = LHSAA.getAssumedSet();
+ const SetTy &RHSAAPVS = RHSAA.getAssumedSet();
// TODO: make use of undef flag to limit potential values aggressively.
bool MaybeTrue = false, MaybeFalse = false;
@@ -8974,7 +9263,7 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
const auto &SimplifiedLHS =
A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()),
*this, UsedAssumedInformation);
- if (!SimplifiedLHS.hasValue())
+ if (!SimplifiedLHS)
return ChangeStatus::UNCHANGED;
if (!SimplifiedLHS.getValue())
return indicatePessimisticFixpoint();
@@ -8983,7 +9272,7 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
const auto &SimplifiedRHS =
A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()),
*this, UsedAssumedInformation);
- if (!SimplifiedRHS.hasValue())
+ if (!SimplifiedRHS)
return ChangeStatus::UNCHANGED;
if (!SimplifiedRHS.getValue())
return indicatePessimisticFixpoint();
@@ -8997,21 +9286,21 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
// Check if we only need one operand.
bool OnlyLeft = false, OnlyRight = false;
- if (C.hasValue() && *C && (*C)->isOneValue())
+ if (C && *C && (*C)->isOneValue())
OnlyLeft = true;
- else if (C.hasValue() && *C && (*C)->isZeroValue())
+ else if (C && *C && (*C)->isZeroValue())
OnlyRight = true;
- const AAPotentialValues *LHSAA = nullptr, *RHSAA = nullptr;
+ const AAPotentialConstantValues *LHSAA = nullptr, *RHSAA = nullptr;
if (!OnlyRight) {
- LHSAA = &A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*LHS),
- DepClassTy::REQUIRED);
+ LHSAA = &A.getAAFor<AAPotentialConstantValues>(
+ *this, IRPosition::value(*LHS), DepClassTy::REQUIRED);
if (!LHSAA->isValidState())
return indicatePessimisticFixpoint();
}
if (!OnlyLeft) {
- RHSAA = &A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*RHS),
- DepClassTy::REQUIRED);
+ RHSAA = &A.getAAFor<AAPotentialConstantValues>(
+ *this, IRPosition::value(*RHS), DepClassTy::REQUIRED);
if (!RHSAA->isValidState())
return indicatePessimisticFixpoint();
}
@@ -9049,17 +9338,17 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
const auto &SimplifiedSrc =
A.getAssumedSimplified(IRPosition::value(*Src, getCallBaseContext()),
*this, UsedAssumedInformation);
- if (!SimplifiedSrc.hasValue())
+ if (!SimplifiedSrc)
return ChangeStatus::UNCHANGED;
if (!SimplifiedSrc.getValue())
return indicatePessimisticFixpoint();
Src = *SimplifiedSrc;
- auto &SrcAA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*Src),
- DepClassTy::REQUIRED);
+ auto &SrcAA = A.getAAFor<AAPotentialConstantValues>(
+ *this, IRPosition::value(*Src), DepClassTy::REQUIRED);
if (!SrcAA.isValidState())
return indicatePessimisticFixpoint();
- const DenseSet<APInt> &SrcAAPVS = SrcAA.getAssumedSet();
+ const SetTy &SrcAAPVS = SrcAA.getAssumedSet();
if (SrcAA.undefIsContained())
unionAssumedWithUndef();
else {
@@ -9082,7 +9371,7 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
const auto &SimplifiedLHS =
A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()),
*this, UsedAssumedInformation);
- if (!SimplifiedLHS.hasValue())
+ if (!SimplifiedLHS)
return ChangeStatus::UNCHANGED;
if (!SimplifiedLHS.getValue())
return indicatePessimisticFixpoint();
@@ -9091,7 +9380,7 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
const auto &SimplifiedRHS =
A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()),
*this, UsedAssumedInformation);
- if (!SimplifiedRHS.hasValue())
+ if (!SimplifiedRHS)
return ChangeStatus::UNCHANGED;
if (!SimplifiedRHS.getValue())
return indicatePessimisticFixpoint();
@@ -9100,18 +9389,18 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
return indicatePessimisticFixpoint();
- auto &LHSAA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*LHS),
- DepClassTy::REQUIRED);
+ auto &LHSAA = A.getAAFor<AAPotentialConstantValues>(
+ *this, IRPosition::value(*LHS), DepClassTy::REQUIRED);
if (!LHSAA.isValidState())
return indicatePessimisticFixpoint();
- auto &RHSAA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*RHS),
- DepClassTy::REQUIRED);
+ auto &RHSAA = A.getAAFor<AAPotentialConstantValues>(
+ *this, IRPosition::value(*RHS), DepClassTy::REQUIRED);
if (!RHSAA.isValidState())
return indicatePessimisticFixpoint();
- const DenseSet<APInt> &LHSAAPVS = LHSAA.getAssumedSet();
- const DenseSet<APInt> &RHSAAPVS = RHSAA.getAssumedSet();
+ const SetTy &LHSAAPVS = LHSAA.getAssumedSet();
+ const SetTy &RHSAAPVS = RHSAA.getAssumedSet();
const APInt Zero = APInt(LHS->getType()->getIntegerBitWidth(), 0);
// TODO: make use of undef flag to limit potential values aggressively.
@@ -9150,13 +9439,13 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
const auto &SimplifiedIncomingValue = A.getAssumedSimplified(
IRPosition::value(*IncomingValue, getCallBaseContext()), *this,
UsedAssumedInformation);
- if (!SimplifiedIncomingValue.hasValue())
+ if (!SimplifiedIncomingValue)
continue;
if (!SimplifiedIncomingValue.getValue())
return indicatePessimisticFixpoint();
IncomingValue = *SimplifiedIncomingValue;
- auto &PotentialValuesAA = A.getAAFor<AAPotentialValues>(
+ auto &PotentialValuesAA = A.getAAFor<AAPotentialConstantValues>(
*this, IRPosition::value(*IncomingValue), DepClassTy::REQUIRED);
if (!PotentialValuesAA.isValidState())
return indicatePessimisticFixpoint();
@@ -9169,30 +9458,6 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
: ChangeStatus::CHANGED;
}
- ChangeStatus updateWithLoad(Attributor &A, LoadInst &L) {
- if (!L.getType()->isIntegerTy())
- return indicatePessimisticFixpoint();
-
- auto Union = [&](Value &V) {
- if (isa<UndefValue>(V)) {
- unionAssumedWithUndef();
- return true;
- }
- if (ConstantInt *CI = dyn_cast<ConstantInt>(&V)) {
- unionAssumed(CI->getValue());
- return true;
- }
- return false;
- };
- auto AssumedBefore = getAssumed();
-
- if (!AAValueSimplifyImpl::handleLoad(A, *this, L, Union))
- return indicatePessimisticFixpoint();
-
- return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED
- : ChangeStatus::CHANGED;
- }
-
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
Value &V = getAssociatedValue();
@@ -9213,9 +9478,6 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
if (auto *PHI = dyn_cast<PHINode>(I))
return updateWithPHINode(A, PHI);
- if (auto *L = dyn_cast<LoadInst>(I))
- return updateWithLoad(A, *L);
-
return indicatePessimisticFixpoint();
}
@@ -9225,14 +9487,15 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
}
};
-struct AAPotentialValuesFunction : AAPotentialValuesImpl {
- AAPotentialValuesFunction(const IRPosition &IRP, Attributor &A)
- : AAPotentialValuesImpl(IRP, A) {}
+struct AAPotentialConstantValuesFunction : AAPotentialConstantValuesImpl {
+ AAPotentialConstantValuesFunction(const IRPosition &IRP, Attributor &A)
+ : AAPotentialConstantValuesImpl(IRP, A) {}
/// See AbstractAttribute::initialize(...).
ChangeStatus updateImpl(Attributor &A) override {
- llvm_unreachable("AAPotentialValues(Function|CallSite)::updateImpl will "
- "not be called");
+ llvm_unreachable(
+ "AAPotentialConstantValues(Function|CallSite)::updateImpl will "
+ "not be called");
}
/// See AbstractAttribute::trackStatistics()
@@ -9241,9 +9504,9 @@ struct AAPotentialValuesFunction : AAPotentialValuesImpl {
}
};
-struct AAPotentialValuesCallSite : AAPotentialValuesFunction {
- AAPotentialValuesCallSite(const IRPosition &IRP, Attributor &A)
- : AAPotentialValuesFunction(IRP, A) {}
+struct AAPotentialConstantValuesCallSite : AAPotentialConstantValuesFunction {
+ AAPotentialConstantValuesCallSite(const IRPosition &IRP, Attributor &A)
+ : AAPotentialConstantValuesFunction(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
@@ -9251,11 +9514,13 @@ struct AAPotentialValuesCallSite : AAPotentialValuesFunction {
}
};
-struct AAPotentialValuesCallSiteReturned
- : AACallSiteReturnedFromReturned<AAPotentialValues, AAPotentialValuesImpl> {
- AAPotentialValuesCallSiteReturned(const IRPosition &IRP, Attributor &A)
- : AACallSiteReturnedFromReturned<AAPotentialValues,
- AAPotentialValuesImpl>(IRP, A) {}
+struct AAPotentialConstantValuesCallSiteReturned
+ : AACallSiteReturnedFromReturned<AAPotentialConstantValues,
+ AAPotentialConstantValuesImpl> {
+ AAPotentialConstantValuesCallSiteReturned(const IRPosition &IRP,
+ Attributor &A)
+ : AACallSiteReturnedFromReturned<AAPotentialConstantValues,
+ AAPotentialConstantValuesImpl>(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
@@ -9263,13 +9528,15 @@ struct AAPotentialValuesCallSiteReturned
}
};
-struct AAPotentialValuesCallSiteArgument : AAPotentialValuesFloating {
- AAPotentialValuesCallSiteArgument(const IRPosition &IRP, Attributor &A)
- : AAPotentialValuesFloating(IRP, A) {}
+struct AAPotentialConstantValuesCallSiteArgument
+ : AAPotentialConstantValuesFloating {
+ AAPotentialConstantValuesCallSiteArgument(const IRPosition &IRP,
+ Attributor &A)
+ : AAPotentialConstantValuesFloating(IRP, A) {}
/// See AbstractAttribute::initialize(..).
void initialize(Attributor &A) override {
- AAPotentialValuesImpl::initialize(A);
+ AAPotentialConstantValuesImpl::initialize(A);
if (isAtFixpoint())
return;
@@ -9292,8 +9559,8 @@ struct AAPotentialValuesCallSiteArgument : AAPotentialValuesFloating {
ChangeStatus updateImpl(Attributor &A) override {
Value &V = getAssociatedValue();
auto AssumedBefore = getAssumed();
- auto &AA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(V),
- DepClassTy::REQUIRED);
+ auto &AA = A.getAAFor<AAPotentialConstantValues>(
+ *this, IRPosition::value(V), DepClassTy::REQUIRED);
const auto &S = AA.getAssumed();
unionAssumed(S);
return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED
@@ -9365,7 +9632,7 @@ struct AANoUndefImpl : AANoUndef {
// considered to be dead. We don't manifest noundef in such positions for
// the same reason above.
if (!A.getAssumedSimplified(getIRPosition(), *this, UsedAssumedInformation)
- .hasValue())
+ .has_value())
return ChangeStatus::UNCHANGED;
return AANoUndef::manifest(A);
}
@@ -9400,8 +9667,10 @@ struct AANoUndefFloating : public AANoUndefImpl {
};
StateType T;
+ bool UsedAssumedInformation = false;
if (!genericValueTraversal<StateType>(A, getIRPosition(), *this, T,
- VisitValueCB, getCtxI()))
+ VisitValueCB, getCtxI(),
+ UsedAssumedInformation))
return indicatePessimisticFixpoint();
return clampStateAndIndicateChange(getState(), T);
@@ -9518,9 +9787,10 @@ struct AACallEdgesCallSite : public AACallEdgesImpl {
// Process any value that we might call.
auto ProcessCalledOperand = [&](Value *V) {
bool DummyValue = false;
+ bool UsedAssumedInformation = false;
if (!genericValueTraversal<bool>(A, IRPosition::value(*V), *this,
DummyValue, VisitValue, nullptr,
- false)) {
+ UsedAssumedInformation, false)) {
// If we haven't gone through all values, assume that there are unknown
// callees.
setHasUnknownCallee(true, Change);
@@ -9530,7 +9800,9 @@ struct AACallEdgesCallSite : public AACallEdgesImpl {
CallBase *CB = cast<CallBase>(getCtxI());
if (CB->isInlineAsm()) {
- setHasUnknownCallee(false, Change);
+ if (!hasAssumption(*CB->getCaller(), "ompx_no_call_asm") &&
+ !hasAssumption(*CB, "ompx_no_call_asm"))
+ setHasUnknownCallee(false, Change);
return Change;
}
@@ -9584,7 +9856,8 @@ struct AACallEdgesFunction : public AACallEdgesImpl {
// Visit all callable instructions.
bool UsedAssumedInformation = false;
if (!A.checkForAllCallLikeInstructions(ProcessCallInst, *this,
- UsedAssumedInformation)) {
+ UsedAssumedInformation,
+ /* CheckBBLivenessOnly */ true)) {
// If we haven't looked at all call like instructions, assume that there
// are unknown callees.
setHasUnknownCallee(true, Change);
@@ -9656,7 +9929,7 @@ private:
ArrayRef<const AACallEdges *> AAEdgesList,
const Function &Fn) {
Optional<bool> Cached = isCachedReachable(Fn);
- if (Cached.hasValue())
+ if (Cached)
return Cached.getValue();
// The query was not cached, thus it is new. We need to request an update
@@ -9691,6 +9964,10 @@ private:
const SetVector<Function *> &Edges = AAEdges->getOptimisticEdges();
for (Function *Edge : Edges) {
+ // Functions that do not call back into the module can be ignored.
+ if (Edge->hasFnAttribute(Attribute::NoCallback))
+ continue;
+
// We don't need a dependency if the result is reachable.
const AAFunctionReachability &EdgeReachability =
A.getAAFor<AAFunctionReachability>(
@@ -9820,22 +10097,21 @@ public:
}
// Update the Instruction queries.
- const AAReachability *Reachability;
if (!InstQueries.empty()) {
- Reachability = &A.getAAFor<AAReachability>(
+ const AAReachability *Reachability = &A.getAAFor<AAReachability>(
*this, IRPosition::function(*getAssociatedFunction()),
DepClassTy::REQUIRED);
- }
- // Check for local callbases first.
- for (auto &InstPair : InstQueries) {
- SmallVector<const AACallEdges *> CallEdges;
- bool AllKnown =
- getReachableCallEdges(A, *Reachability, *InstPair.first, CallEdges);
- // Update will return change if we this effects any queries.
- if (!AllKnown)
- InstPair.second.CanReachUnknownCallee = true;
- Change |= InstPair.second.update(A, *this, CallEdges);
+ // Check for local callbases first.
+ for (auto &InstPair : InstQueries) {
+ SmallVector<const AACallEdges *> CallEdges;
+ bool AllKnown =
+ getReachableCallEdges(A, *Reachability, *InstPair.first, CallEdges);
+ // Update will return change if we this effects any queries.
+ if (!AllKnown)
+ InstPair.second.CanReachUnknownCallee = true;
+ Change |= InstPair.second.update(A, *this, CallEdges);
+ }
}
return Change;
@@ -9862,13 +10138,15 @@ private:
/// Used to answer if a call base inside this function can reach a specific
/// function.
- DenseMap<const CallBase *, QueryResolver> CBQueries;
+ MapVector<const CallBase *, QueryResolver> CBQueries;
/// This is for instruction queries than scan "forward".
- DenseMap<const Instruction *, QueryResolver> InstQueries;
+ MapVector<const Instruction *, QueryResolver> InstQueries;
};
+} // namespace
/// ---------------------- Assumption Propagation ------------------------------
+namespace {
struct AAAssumptionInfoImpl : public AAAssumptionInfo {
AAAssumptionInfoImpl(const IRPosition &IRP, Attributor &A,
const DenseSet<StringRef> &Known)
@@ -9938,12 +10216,13 @@ struct AAAssumptionInfoFunction final : AAAssumptionInfoImpl {
return !getAssumed().empty() || !getKnown().empty();
};
- bool AllCallSitesKnown;
+ bool UsedAssumedInformation = false;
// Get the intersection of all assumptions held by this node's predecessors.
// If we don't know all the call sites then this is either an entry into the
// call graph or an empty node. This node is known to only contain its own
// assumptions and can be propagated to its successors.
- if (!A.checkForAllCallSites(CallSitePred, *this, true, AllCallSitesKnown))
+ if (!A.checkForAllCallSites(CallSitePred, *this, true,
+ UsedAssumedInformation))
return indicatePessimisticFixpoint();
return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
@@ -10001,6 +10280,7 @@ private:
return Assumptions;
}
};
+} // namespace
AACallGraphNode *AACallEdgeIterator::operator*() const {
return static_cast<AACallGraphNode *>(const_cast<AACallEdges *>(
@@ -10023,6 +10303,7 @@ const char AANoReturn::ID = 0;
const char AAIsDead::ID = 0;
const char AADereferenceable::ID = 0;
const char AAAlign::ID = 0;
+const char AAInstanceInfo::ID = 0;
const char AANoCapture::ID = 0;
const char AAValueSimplify::ID = 0;
const char AAHeapToStack::ID = 0;
@@ -10030,7 +10311,7 @@ const char AAPrivatizablePtr::ID = 0;
const char AAMemoryBehavior::ID = 0;
const char AAMemoryLocation::ID = 0;
const char AAValueConstantRange::ID = 0;
-const char AAPotentialValues::ID = 0;
+const char AAPotentialConstantValues::ID = 0;
const char AANoUndef::ID = 0;
const char AACallEdges::ID = 0;
const char AAFunctionReachability::ID = 0;
@@ -10145,9 +10426,10 @@ CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoAlias)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPrivatizablePtr)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AADereferenceable)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAlign)
+CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAInstanceInfo)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoCapture)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueConstantRange)
-CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPotentialValues)
+CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPotentialConstantValues)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoUndef)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPointerInfo)
diff --git a/llvm/lib/Transforms/IPO/BlockExtractor.cpp b/llvm/lib/Transforms/IPO/BlockExtractor.cpp
index 7c178f9a9834..9e27ae49a901 100644
--- a/llvm/lib/Transforms/IPO/BlockExtractor.cpp
+++ b/llvm/lib/Transforms/IPO/BlockExtractor.cpp
@@ -135,7 +135,8 @@ void BlockExtractor::loadFile() {
if (LineSplit.empty())
continue;
if (LineSplit.size()!=2)
- report_fatal_error("Invalid line format, expecting lines like: 'funcname bb1[;bb2..]'");
+ report_fatal_error("Invalid line format, expecting lines like: 'funcname bb1[;bb2..]'",
+ /*GenCrashDiag=*/false);
SmallVector<StringRef, 4> BBNames;
LineSplit[1].split(BBNames, ';', /*MaxSplit=*/-1,
/*KeepEmpty=*/false);
@@ -194,13 +195,15 @@ bool BlockExtractor::runOnModule(Module &M) {
for (const auto &BInfo : BlocksByName) {
Function *F = M.getFunction(BInfo.first);
if (!F)
- report_fatal_error("Invalid function name specified in the input file");
+ report_fatal_error("Invalid function name specified in the input file",
+ /*GenCrashDiag=*/false);
for (const auto &BBInfo : BInfo.second) {
auto Res = llvm::find_if(*F, [&](const BasicBlock &BB) {
return BB.getName().equals(BBInfo);
});
if (Res == F->end())
- report_fatal_error("Invalid block name specified in the input file");
+ report_fatal_error("Invalid block name specified in the input file",
+ /*GenCrashDiag=*/false);
GroupsOfBlocks[NextGroupIdx].push_back(&*Res);
}
++NextGroupIdx;
@@ -212,7 +215,7 @@ bool BlockExtractor::runOnModule(Module &M) {
for (BasicBlock *BB : BBs) {
// Check if the module contains BB.
if (BB->getParent()->getParent() != &M)
- report_fatal_error("Invalid basic block");
+ report_fatal_error("Invalid basic block", /*GenCrashDiag=*/false);
LLVM_DEBUG(dbgs() << "BlockExtractor: Extracting "
<< BB->getParent()->getName() << ":" << BB->getName()
<< "\n");
diff --git a/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp b/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp
index 927dceec8865..64bfcb2a9a9f 100644
--- a/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp
+++ b/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp
@@ -19,11 +19,13 @@
#include "llvm/Transforms/IPO/CalledValuePropagation.h"
#include "llvm/Analysis/SparsePropagation.h"
#include "llvm/Analysis/ValueLatticeUtils.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/IPO.h"
+
using namespace llvm;
#define DEBUG_TYPE "called-value-propagation"
@@ -68,7 +70,7 @@ public:
}
};
- CVPLatticeVal() : LatticeState(Undefined) {}
+ CVPLatticeVal() = default;
CVPLatticeVal(CVPLatticeStateTy LatticeState) : LatticeState(LatticeState) {}
CVPLatticeVal(std::vector<Function *> &&Functions)
: LatticeState(FunctionSet), Functions(std::move(Functions)) {
@@ -94,7 +96,7 @@ public:
private:
/// Holds the state this lattice value is in.
- CVPLatticeStateTy LatticeState;
+ CVPLatticeStateTy LatticeState = Undefined;
/// Holds functions indicating the possible targets of call sites. This set
/// is empty for lattice values in the undefined, overdefined, and untracked
diff --git a/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/llvm/lib/Transforms/IPO/ConstantMerge.cpp
index 178d3f41963e..73af30ece47c 100644
--- a/llvm/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/llvm/lib/Transforms/IPO/ConstantMerge.cpp
@@ -85,7 +85,7 @@ static void copyDebugLocMetadata(const GlobalVariable *From,
}
static Align getAlign(GlobalVariable *GV) {
- return GV->getAlign().getValueOr(
+ return GV->getAlign().value_or(
GV->getParent()->getDataLayout().getPreferredAlign(GV));
}
diff --git a/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp b/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp
index 2fe9a59ad210..dfe33ac9da0d 100644
--- a/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp
+++ b/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp
@@ -15,21 +15,16 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Triple.h"
-#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalObject.h"
-#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/Operator.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
using namespace llvm;
diff --git a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 2a6e38b0437f..99fa4baf355d 100644
--- a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -16,18 +16,17 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/IPO/DeadArgumentElimination.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
@@ -44,9 +43,9 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/DeadArgumentElimination.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <cassert>
-#include <cstdint>
#include <utility>
#include <vector>
@@ -55,36 +54,36 @@ using namespace llvm;
#define DEBUG_TYPE "deadargelim"
STATISTIC(NumArgumentsEliminated, "Number of unread args removed");
-STATISTIC(NumRetValsEliminated , "Number of unused return values removed");
-STATISTIC(NumArgumentsReplacedWithUndef,
- "Number of unread args replaced with undef");
+STATISTIC(NumRetValsEliminated, "Number of unused return values removed");
+STATISTIC(NumArgumentsReplacedWithPoison,
+ "Number of unread args replaced with poison");
namespace {
- /// DAE - The dead argument elimination pass.
- class DAE : public ModulePass {
- protected:
- // DAH uses this to specify a different ID.
- explicit DAE(char &ID) : ModulePass(ID) {}
+/// The dead argument elimination pass.
+class DAE : public ModulePass {
+protected:
+ // DAH uses this to specify a different ID.
+ explicit DAE(char &ID) : ModulePass(ID) {}
- public:
- static char ID; // Pass identification, replacement for typeid
+public:
+ static char ID; // Pass identification, replacement for typeid
- DAE() : ModulePass(ID) {
- initializeDAEPass(*PassRegistry::getPassRegistry());
- }
+ DAE() : ModulePass(ID) {
+ initializeDAEPass(*PassRegistry::getPassRegistry());
+ }
- bool runOnModule(Module &M) override {
- if (skipModule(M))
- return false;
- DeadArgumentEliminationPass DAEP(ShouldHackArguments());
- ModuleAnalysisManager DummyMAM;
- PreservedAnalyses PA = DAEP.run(M, DummyMAM);
- return !PA.areAllPreserved();
- }
+ bool runOnModule(Module &M) override {
+ if (skipModule(M))
+ return false;
+ DeadArgumentEliminationPass DAEP(shouldHackArguments());
+ ModuleAnalysisManager DummyMAM;
+ PreservedAnalyses PA = DAEP.run(M, DummyMAM);
+ return !PA.areAllPreserved();
+ }
- virtual bool ShouldHackArguments() const { return false; }
- };
+ virtual bool shouldHackArguments() const { return false; }
+};
} // end anonymous namespace
@@ -94,51 +93,51 @@ INITIALIZE_PASS(DAE, "deadargelim", "Dead Argument Elimination", false, false)
namespace {
- /// DAH - DeadArgumentHacking pass - Same as dead argument elimination, but
- /// deletes arguments to functions which are external. This is only for use
- /// by bugpoint.
- struct DAH : public DAE {
- static char ID;
+/// The DeadArgumentHacking pass, same as dead argument elimination, but deletes
+/// arguments to functions which are external. This is only for use by bugpoint.
+struct DAH : public DAE {
+ static char ID;
- DAH() : DAE(ID) {}
+ DAH() : DAE(ID) {}
- bool ShouldHackArguments() const override { return true; }
- };
+ bool shouldHackArguments() const override { return true; }
+};
} // end anonymous namespace
char DAH::ID = 0;
INITIALIZE_PASS(DAH, "deadarghaX0r",
- "Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)",
- false, false)
+ "Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)", false,
+ false)
-/// createDeadArgEliminationPass - This pass removes arguments from functions
-/// which are not used by the body of the function.
+/// This pass removes arguments from functions which are not used by the body of
+/// the function.
ModulePass *llvm::createDeadArgEliminationPass() { return new DAE(); }
ModulePass *llvm::createDeadArgHackingPass() { return new DAH(); }
-/// DeleteDeadVarargs - If this is an function that takes a ... list, and if
-/// llvm.vastart is never called, the varargs list is dead for the function.
-bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) {
- assert(Fn.getFunctionType()->isVarArg() && "Function isn't varargs!");
- if (Fn.isDeclaration() || !Fn.hasLocalLinkage()) return false;
+/// If this is an function that takes a ... list, and if llvm.vastart is never
+/// called, the varargs list is dead for the function.
+bool DeadArgumentEliminationPass::deleteDeadVarargs(Function &F) {
+ assert(F.getFunctionType()->isVarArg() && "Function isn't varargs!");
+ if (F.isDeclaration() || !F.hasLocalLinkage())
+ return false;
// Ensure that the function is only directly called.
- if (Fn.hasAddressTaken())
+ if (F.hasAddressTaken())
return false;
// Don't touch naked functions. The assembly might be using an argument, or
// otherwise rely on the frame layout in a way that this analysis will not
// see.
- if (Fn.hasFnAttribute(Attribute::Naked)) {
+ if (F.hasFnAttribute(Attribute::Naked)) {
return false;
}
// Okay, we know we can transform this function if safe. Scan its body
// looking for calls marked musttail or calls to llvm.vastart.
- for (BasicBlock &BB : Fn) {
+ for (BasicBlock &BB : F) {
for (Instruction &I : BB) {
CallInst *CI = dyn_cast<CallInst>(&I);
if (!CI)
@@ -157,25 +156,24 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) {
// Start by computing a new prototype for the function, which is the same as
// the old function, but doesn't have isVarArg set.
- FunctionType *FTy = Fn.getFunctionType();
+ FunctionType *FTy = F.getFunctionType();
std::vector<Type *> Params(FTy->param_begin(), FTy->param_end());
- FunctionType *NFTy = FunctionType::get(FTy->getReturnType(),
- Params, false);
+ FunctionType *NFTy = FunctionType::get(FTy->getReturnType(), Params, false);
unsigned NumArgs = Params.size();
// Create the new function body and insert it into the module...
- Function *NF = Function::Create(NFTy, Fn.getLinkage(), Fn.getAddressSpace());
- NF->copyAttributesFrom(&Fn);
- NF->setComdat(Fn.getComdat());
- Fn.getParent()->getFunctionList().insert(Fn.getIterator(), NF);
- NF->takeName(&Fn);
+ Function *NF = Function::Create(NFTy, F.getLinkage(), F.getAddressSpace());
+ NF->copyAttributesFrom(&F);
+ NF->setComdat(F.getComdat());
+ F.getParent()->getFunctionList().insert(F.getIterator(), NF);
+ NF->takeName(&F);
- // Loop over all of the callers of the function, transforming the call sites
+ // Loop over all the callers of the function, transforming the call sites
// to pass in a smaller number of arguments into the new function.
//
std::vector<Value *> Args;
- for (User *U : llvm::make_early_inc_range(Fn.users())) {
+ for (User *U : llvm::make_early_inc_range(F.users())) {
CallBase *CB = dyn_cast<CallBase>(U);
if (!CB)
continue;
@@ -189,7 +187,7 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) {
SmallVector<AttributeSet, 8> ArgAttrs;
for (unsigned ArgNo = 0; ArgNo < NumArgs; ++ArgNo)
ArgAttrs.push_back(PAL.getParamAttrs(ArgNo));
- PAL = AttributeList::get(Fn.getContext(), PAL.getFnAttrs(),
+ PAL = AttributeList::get(F.getContext(), PAL.getFnAttrs(),
PAL.getRetAttrs(), ArgAttrs);
}
@@ -224,64 +222,67 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) {
// Since we have now created the new function, splice the body of the old
// function right into the new function, leaving the old rotting hulk of the
// function empty.
- NF->getBasicBlockList().splice(NF->begin(), Fn.getBasicBlockList());
+ NF->getBasicBlockList().splice(NF->begin(), F.getBasicBlockList());
// Loop over the argument list, transferring uses of the old arguments over to
- // the new arguments, also transferring over the names as well. While we're at
- // it, remove the dead arguments from the DeadArguments list.
- for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(),
- I2 = NF->arg_begin(); I != E; ++I, ++I2) {
+ // the new arguments, also transferring over the names as well. While we're
+ // at it, remove the dead arguments from the DeadArguments list.
+ for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(),
+ I2 = NF->arg_begin();
+ I != E; ++I, ++I2) {
// Move the name and users over to the new version.
I->replaceAllUsesWith(&*I2);
I2->takeName(&*I);
}
- // Clone metadatas from the old function, including debug info descriptor.
+ // Clone metadata from the old function, including debug info descriptor.
SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
- Fn.getAllMetadata(MDs);
+ F.getAllMetadata(MDs);
for (auto MD : MDs)
NF->addMetadata(MD.first, *MD.second);
// Fix up any BlockAddresses that refer to the function.
- Fn.replaceAllUsesWith(ConstantExpr::getBitCast(NF, Fn.getType()));
+ F.replaceAllUsesWith(ConstantExpr::getBitCast(NF, F.getType()));
// Delete the bitcast that we just created, so that NF does not
// appear to be address-taken.
NF->removeDeadConstantUsers();
// Finally, nuke the old function.
- Fn.eraseFromParent();
+ F.eraseFromParent();
return true;
}
-/// RemoveDeadArgumentsFromCallers - Checks if the given function has any
-/// arguments that are unused, and changes the caller parameters to be undefined
-/// instead.
-bool DeadArgumentEliminationPass::RemoveDeadArgumentsFromCallers(Function &Fn) {
+/// Checks if the given function has any arguments that are unused, and changes
+/// the caller parameters to be poison instead.
+bool DeadArgumentEliminationPass::removeDeadArgumentsFromCallers(Function &F) {
// We cannot change the arguments if this TU does not define the function or
// if the linker may choose a function body from another TU, even if the
// nominal linkage indicates that other copies of the function have the same
// semantics. In the below example, the dead load from %p may not have been
- // eliminated from the linker-chosen copy of f, so replacing %p with undef
+ // eliminated from the linker-chosen copy of f, so replacing %p with poison
// in callers may introduce undefined behavior.
//
// define linkonce_odr void @f(i32* %p) {
// %v = load i32 %p
// ret void
// }
- if (!Fn.hasExactDefinition())
+ if (!F.hasExactDefinition())
return false;
- // Functions with local linkage should already have been handled, except the
- // fragile (variadic) ones which we can improve here.
- if (Fn.hasLocalLinkage() && !Fn.getFunctionType()->isVarArg())
+ // Functions with local linkage should already have been handled, except if
+ // they are fully alive (e.g., called indirectly) and except for the fragile
+ // (variadic) ones. In these cases, we may still be able to improve their
+ // statically known call sites.
+ if ((F.hasLocalLinkage() && !LiveFunctions.count(&F)) &&
+ !F.getFunctionType()->isVarArg())
return false;
// Don't touch naked functions. The assembly might be using an argument, or
// otherwise rely on the frame layout in a way that this analysis will not
// see.
- if (Fn.hasFnAttribute(Attribute::Naked))
+ if (F.hasFnAttribute(Attribute::Naked))
return false;
- if (Fn.use_empty())
+ if (F.use_empty())
return false;
SmallVector<unsigned, 8> UnusedArgs;
@@ -289,35 +290,36 @@ bool DeadArgumentEliminationPass::RemoveDeadArgumentsFromCallers(Function &Fn) {
AttributeMask UBImplyingAttributes =
AttributeFuncs::getUBImplyingAttributes();
- for (Argument &Arg : Fn.args()) {
+ for (Argument &Arg : F.args()) {
if (!Arg.hasSwiftErrorAttr() && Arg.use_empty() &&
!Arg.hasPassPointeeByValueCopyAttr()) {
if (Arg.isUsedByMetadata()) {
- Arg.replaceAllUsesWith(UndefValue::get(Arg.getType()));
+ Arg.replaceAllUsesWith(PoisonValue::get(Arg.getType()));
Changed = true;
}
UnusedArgs.push_back(Arg.getArgNo());
- Fn.removeParamAttrs(Arg.getArgNo(), UBImplyingAttributes);
+ F.removeParamAttrs(Arg.getArgNo(), UBImplyingAttributes);
}
}
if (UnusedArgs.empty())
return false;
- for (Use &U : Fn.uses()) {
+ for (Use &U : F.uses()) {
CallBase *CB = dyn_cast<CallBase>(U.getUser());
- if (!CB || !CB->isCallee(&U))
+ if (!CB || !CB->isCallee(&U) ||
+ CB->getFunctionType() != F.getFunctionType())
continue;
- // Now go through all unused args and replace them with "undef".
+ // Now go through all unused args and replace them with poison.
for (unsigned I = 0, E = UnusedArgs.size(); I != E; ++I) {
unsigned ArgNo = UnusedArgs[I];
Value *Arg = CB->getArgOperand(ArgNo);
- CB->setArgOperand(ArgNo, UndefValue::get(Arg->getType()));
+ CB->setArgOperand(ArgNo, PoisonValue::get(Arg->getType()));
CB->removeParamAttrs(ArgNo, UBImplyingAttributes);
- ++NumArgumentsReplacedWithUndef;
+ ++NumArgumentsReplacedWithPoison;
Changed = true;
}
}
@@ -328,16 +330,15 @@ bool DeadArgumentEliminationPass::RemoveDeadArgumentsFromCallers(Function &Fn) {
/// Convenience function that returns the number of return values. It returns 0
/// for void functions and 1 for functions not returning a struct. It returns
/// the number of struct elements for functions returning a struct.
-static unsigned NumRetVals(const Function *F) {
+static unsigned numRetVals(const Function *F) {
Type *RetTy = F->getReturnType();
if (RetTy->isVoidTy())
return 0;
- else if (StructType *STy = dyn_cast<StructType>(RetTy))
+ if (StructType *STy = dyn_cast<StructType>(RetTy))
return STy->getNumElements();
- else if (ArrayType *ATy = dyn_cast<ArrayType>(RetTy))
+ if (ArrayType *ATy = dyn_cast<ArrayType>(RetTy))
return ATy->getNumElements();
- else
- return 1;
+ return 1;
}
/// Returns the sub-type a function will return at a given Idx. Should
@@ -349,20 +350,18 @@ static Type *getRetComponentType(const Function *F, unsigned Idx) {
if (StructType *STy = dyn_cast<StructType>(RetTy))
return STy->getElementType(Idx);
- else if (ArrayType *ATy = dyn_cast<ArrayType>(RetTy))
+ if (ArrayType *ATy = dyn_cast<ArrayType>(RetTy))
return ATy->getElementType();
- else
- return RetTy;
+ return RetTy;
}
-/// MarkIfNotLive - This checks Use for liveness in LiveValues. If Use is not
-/// live, it adds Use to the MaybeLiveUses argument. Returns the determined
-/// liveness of Use.
+/// Checks Use for liveness in LiveValues. If Use is not live, it adds Use to
+/// the MaybeLiveUses argument. Returns the determined liveness of Use.
DeadArgumentEliminationPass::Liveness
-DeadArgumentEliminationPass::MarkIfNotLive(RetOrArg Use,
+DeadArgumentEliminationPass::markIfNotLive(RetOrArg Use,
UseVector &MaybeLiveUses) {
// We're live if our use or its Function is already marked as live.
- if (IsLive(Use))
+ if (isLive(Use))
return Live;
// We're maybe live otherwise, but remember that we must become live if
@@ -371,127 +370,127 @@ DeadArgumentEliminationPass::MarkIfNotLive(RetOrArg Use,
return MaybeLive;
}
-/// SurveyUse - This looks at a single use of an argument or return value
-/// and determines if it should be alive or not. Adds this use to MaybeLiveUses
-/// if it causes the used value to become MaybeLive.
+/// Looks at a single use of an argument or return value and determines if it
+/// should be alive or not. Adds this use to MaybeLiveUses if it causes the
+/// used value to become MaybeLive.
///
/// RetValNum is the return value number to use when this use is used in a
/// return instruction. This is used in the recursion, you should always leave
/// it at 0.
DeadArgumentEliminationPass::Liveness
-DeadArgumentEliminationPass::SurveyUse(const Use *U, UseVector &MaybeLiveUses,
+DeadArgumentEliminationPass::surveyUse(const Use *U, UseVector &MaybeLiveUses,
unsigned RetValNum) {
- const User *V = U->getUser();
- if (const ReturnInst *RI = dyn_cast<ReturnInst>(V)) {
- // The value is returned from a function. It's only live when the
- // function's return value is live. We use RetValNum here, for the case
- // that U is really a use of an insertvalue instruction that uses the
- // original Use.
- const Function *F = RI->getParent()->getParent();
- if (RetValNum != -1U) {
- RetOrArg Use = CreateRet(F, RetValNum);
- // We might be live, depending on the liveness of Use.
- return MarkIfNotLive(Use, MaybeLiveUses);
- } else {
- DeadArgumentEliminationPass::Liveness Result = MaybeLive;
- for (unsigned Ri = 0; Ri < NumRetVals(F); ++Ri) {
- RetOrArg Use = CreateRet(F, Ri);
- // We might be live, depending on the liveness of Use. If any
- // sub-value is live, then the entire value is considered live. This
- // is a conservative choice, and better tracking is possible.
- DeadArgumentEliminationPass::Liveness SubResult =
- MarkIfNotLive(Use, MaybeLiveUses);
- if (Result != Live)
- Result = SubResult;
- }
- return Result;
- }
+ const User *V = U->getUser();
+ if (const ReturnInst *RI = dyn_cast<ReturnInst>(V)) {
+ // The value is returned from a function. It's only live when the
+ // function's return value is live. We use RetValNum here, for the case
+ // that U is really a use of an insertvalue instruction that uses the
+ // original Use.
+ const Function *F = RI->getParent()->getParent();
+ if (RetValNum != -1U) {
+ RetOrArg Use = createRet(F, RetValNum);
+ // We might be live, depending on the liveness of Use.
+ return markIfNotLive(Use, MaybeLiveUses);
}
- if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(V)) {
- if (U->getOperandNo() != InsertValueInst::getAggregateOperandIndex()
- && IV->hasIndices())
- // The use we are examining is inserted into an aggregate. Our liveness
- // depends on all uses of that aggregate, but if it is used as a return
- // value, only index at which we were inserted counts.
- RetValNum = *IV->idx_begin();
-
- // Note that if we are used as the aggregate operand to the insertvalue,
- // we don't change RetValNum, but do survey all our uses.
-
- Liveness Result = MaybeLive;
- for (const Use &UU : IV->uses()) {
- Result = SurveyUse(&UU, MaybeLiveUses, RetValNum);
- if (Result == Live)
- break;
- }
- return Result;
+
+ DeadArgumentEliminationPass::Liveness Result = MaybeLive;
+ for (unsigned Ri = 0; Ri < numRetVals(F); ++Ri) {
+ RetOrArg Use = createRet(F, Ri);
+ // We might be live, depending on the liveness of Use. If any
+ // sub-value is live, then the entire value is considered live. This
+ // is a conservative choice, and better tracking is possible.
+ DeadArgumentEliminationPass::Liveness SubResult =
+ markIfNotLive(Use, MaybeLiveUses);
+ if (Result != Live)
+ Result = SubResult;
+ }
+ return Result;
+ }
+
+ if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(V)) {
+ if (U->getOperandNo() != InsertValueInst::getAggregateOperandIndex() &&
+ IV->hasIndices())
+ // The use we are examining is inserted into an aggregate. Our liveness
+ // depends on all uses of that aggregate, but if it is used as a return
+ // value, only index at which we were inserted counts.
+ RetValNum = *IV->idx_begin();
+
+ // Note that if we are used as the aggregate operand to the insertvalue,
+ // we don't change RetValNum, but do survey all our uses.
+
+ Liveness Result = MaybeLive;
+ for (const Use &UU : IV->uses()) {
+ Result = surveyUse(&UU, MaybeLiveUses, RetValNum);
+ if (Result == Live)
+ break;
}
+ return Result;
+ }
- if (const auto *CB = dyn_cast<CallBase>(V)) {
- const Function *F = CB->getCalledFunction();
- if (F) {
- // Used in a direct call.
+ if (const auto *CB = dyn_cast<CallBase>(V)) {
+ const Function *F = CB->getCalledFunction();
+ if (F) {
+ // Used in a direct call.
- // The function argument is live if it is used as a bundle operand.
- if (CB->isBundleOperand(U))
- return Live;
+ // The function argument is live if it is used as a bundle operand.
+ if (CB->isBundleOperand(U))
+ return Live;
- // Find the argument number. We know for sure that this use is an
- // argument, since if it was the function argument this would be an
- // indirect call and the we know can't be looking at a value of the
- // label type (for the invoke instruction).
- unsigned ArgNo = CB->getArgOperandNo(U);
+ // Find the argument number. We know for sure that this use is an
+ // argument, since if it was the function argument this would be an
+ // indirect call and that we know can't be looking at a value of the
+ // label type (for the invoke instruction).
+ unsigned ArgNo = CB->getArgOperandNo(U);
- if (ArgNo >= F->getFunctionType()->getNumParams())
- // The value is passed in through a vararg! Must be live.
- return Live;
+ if (ArgNo >= F->getFunctionType()->getNumParams())
+ // The value is passed in through a vararg! Must be live.
+ return Live;
- assert(CB->getArgOperand(ArgNo) == CB->getOperand(U->getOperandNo()) &&
- "Argument is not where we expected it");
+ assert(CB->getArgOperand(ArgNo) == CB->getOperand(U->getOperandNo()) &&
+ "Argument is not where we expected it");
- // Value passed to a normal call. It's only live when the corresponding
- // argument to the called function turns out live.
- RetOrArg Use = CreateArg(F, ArgNo);
- return MarkIfNotLive(Use, MaybeLiveUses);
- }
+ // Value passed to a normal call. It's only live when the corresponding
+ // argument to the called function turns out live.
+ RetOrArg Use = createArg(F, ArgNo);
+ return markIfNotLive(Use, MaybeLiveUses);
}
- // Used in any other way? Value must be live.
- return Live;
+ }
+ // Used in any other way? Value must be live.
+ return Live;
}
-/// SurveyUses - This looks at all the uses of the given value
+/// Looks at all the uses of the given value
/// Returns the Liveness deduced from the uses of this value.
///
/// Adds all uses that cause the result to be MaybeLive to MaybeLiveRetUses. If
/// the result is Live, MaybeLiveUses might be modified but its content should
/// be ignored (since it might not be complete).
DeadArgumentEliminationPass::Liveness
-DeadArgumentEliminationPass::SurveyUses(const Value *V,
+DeadArgumentEliminationPass::surveyUses(const Value *V,
UseVector &MaybeLiveUses) {
// Assume it's dead (which will only hold if there are no uses at all..).
Liveness Result = MaybeLive;
// Check each use.
for (const Use &U : V->uses()) {
- Result = SurveyUse(&U, MaybeLiveUses);
+ Result = surveyUse(&U, MaybeLiveUses);
if (Result == Live)
break;
}
return Result;
}
-// SurveyFunction - This performs the initial survey of the specified function,
-// checking out whether or not it uses any of its incoming arguments or whether
-// any callers use the return value. This fills in the LiveValues set and Uses
-// map.
-//
-// We consider arguments of non-internal functions to be intrinsically alive as
-// well as arguments to functions which have their "address taken".
-void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
+/// Performs the initial survey of the specified function, checking out whether
+/// it uses any of its incoming arguments or whether any callers use the return
+/// value. This fills in the LiveValues set and Uses map.
+///
+/// We consider arguments of non-internal functions to be intrinsically alive as
+/// well as arguments to functions which have their "address taken".
+void DeadArgumentEliminationPass::surveyFunction(const Function &F) {
// Functions with inalloca/preallocated parameters are expecting args in a
// particular register and memory layout.
if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
F.getAttributes().hasAttrSomewhere(Attribute::Preallocated)) {
- MarkLive(F);
+ markLive(F);
return;
}
@@ -499,11 +498,11 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
// otherwise rely on the frame layout in a way that this analysis will not
// see.
if (F.hasFnAttribute(Attribute::Naked)) {
- MarkLive(F);
+ markLive(F);
return;
}
- unsigned RetCount = NumRetVals(&F);
+ unsigned RetCount = numRetVals(&F);
// Assume all return values are dead
using RetVals = SmallVector<Liveness, 5>;
@@ -518,20 +517,10 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
RetUses MaybeLiveRetUses(RetCount);
bool HasMustTailCalls = false;
-
- for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- if (const ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
- if (RI->getNumOperands() != 0 && RI->getOperand(0)->getType()
- != F.getFunctionType()->getReturnType()) {
- // We don't support old style multiple return values.
- MarkLive(F);
- return;
- }
- }
-
+ for (const BasicBlock &BB : F) {
// If we have any returns of `musttail` results - the signature can't
// change
- if (BB->getTerminatingMustTailCall() != nullptr)
+ if (BB.getTerminatingMustTailCall() != nullptr)
HasMustTailCalls = true;
}
@@ -541,7 +530,7 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
}
if (!F.hasLocalLinkage() && (!ShouldHackArguments || F.isIntrinsic())) {
- MarkLive(F);
+ markLive(F);
return;
}
@@ -559,8 +548,9 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
// If the function is PASSED IN as an argument, its address has been
// taken.
const auto *CB = dyn_cast<CallBase>(U.getUser());
- if (!CB || !CB->isCallee(&U)) {
- MarkLive(F);
+ if (!CB || !CB->isCallee(&U) ||
+ CB->getFunctionType() != F.getFunctionType()) {
+ markLive(F);
return;
}
@@ -577,13 +567,13 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
continue;
// Check all uses of the return value.
- for (const Use &U : CB->uses()) {
- if (ExtractValueInst *Ext = dyn_cast<ExtractValueInst>(U.getUser())) {
+ for (const Use &UU : CB->uses()) {
+ if (ExtractValueInst *Ext = dyn_cast<ExtractValueInst>(UU.getUser())) {
// This use uses a part of our return value, survey the uses of
// that part and store the results for this index only.
unsigned Idx = *Ext->idx_begin();
if (RetValLiveness[Idx] != Live) {
- RetValLiveness[Idx] = SurveyUses(Ext, MaybeLiveRetUses[Idx]);
+ RetValLiveness[Idx] = surveyUses(Ext, MaybeLiveRetUses[Idx]);
if (RetValLiveness[Idx] == Live)
NumLiveRetVals++;
}
@@ -591,16 +581,16 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
// Used by something else than extractvalue. Survey, but assume that the
// result applies to all sub-values.
UseVector MaybeLiveAggregateUses;
- if (SurveyUse(&U, MaybeLiveAggregateUses) == Live) {
+ if (surveyUse(&UU, MaybeLiveAggregateUses) == Live) {
NumLiveRetVals = RetCount;
RetValLiveness.assign(RetCount, Live);
break;
- } else {
- for (unsigned Ri = 0; Ri != RetCount; ++Ri) {
- if (RetValLiveness[Ri] != Live)
- MaybeLiveRetUses[Ri].append(MaybeLiveAggregateUses.begin(),
- MaybeLiveAggregateUses.end());
- }
+ }
+
+ for (unsigned Ri = 0; Ri != RetCount; ++Ri) {
+ if (RetValLiveness[Ri] != Live)
+ MaybeLiveRetUses[Ri].append(MaybeLiveAggregateUses.begin(),
+ MaybeLiveAggregateUses.end());
}
}
}
@@ -613,7 +603,7 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
// Now we've inspected all callers, record the liveness of our return values.
for (unsigned Ri = 0; Ri != RetCount; ++Ri)
- MarkValue(CreateRet(&F, Ri), RetValLiveness[Ri], MaybeLiveRetUses[Ri]);
+ markValue(createRet(&F, Ri), RetValLiveness[Ri], MaybeLiveRetUses[Ri]);
LLVM_DEBUG(dbgs() << "DeadArgumentEliminationPass - Inspecting args for fn: "
<< F.getName() << "\n");
@@ -641,81 +631,77 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
} else {
// See what the effect of this use is (recording any uses that cause
// MaybeLive in MaybeLiveArgUses).
- Result = SurveyUses(&*AI, MaybeLiveArgUses);
+ Result = surveyUses(&*AI, MaybeLiveArgUses);
}
// Mark the result.
- MarkValue(CreateArg(&F, ArgI), Result, MaybeLiveArgUses);
+ markValue(createArg(&F, ArgI), Result, MaybeLiveArgUses);
// Clear the vector again for the next iteration.
MaybeLiveArgUses.clear();
}
}
-/// MarkValue - This function marks the liveness of RA depending on L. If L is
-/// MaybeLive, it also takes all uses in MaybeLiveUses and records them in Uses,
-/// such that RA will be marked live if any use in MaybeLiveUses gets marked
-/// live later on.
-void DeadArgumentEliminationPass::MarkValue(const RetOrArg &RA, Liveness L,
+/// Marks the liveness of RA depending on L. If L is MaybeLive, it also takes
+/// all uses in MaybeLiveUses and records them in Uses, such that RA will be
+/// marked live if any use in MaybeLiveUses gets marked live later on.
+void DeadArgumentEliminationPass::markValue(const RetOrArg &RA, Liveness L,
const UseVector &MaybeLiveUses) {
switch (L) {
- case Live:
- MarkLive(RA);
- break;
- case MaybeLive:
- assert(!IsLive(RA) && "Use is already live!");
- for (const auto &MaybeLiveUse : MaybeLiveUses) {
- if (IsLive(MaybeLiveUse)) {
- // A use is live, so this value is live.
- MarkLive(RA);
- break;
- } else {
- // Note any uses of this value, so this value can be
- // marked live whenever one of the uses becomes live.
- Uses.insert(std::make_pair(MaybeLiveUse, RA));
- }
+ case Live:
+ markLive(RA);
+ break;
+ case MaybeLive:
+ assert(!isLive(RA) && "Use is already live!");
+ for (const auto &MaybeLiveUse : MaybeLiveUses) {
+ if (isLive(MaybeLiveUse)) {
+ // A use is live, so this value is live.
+ markLive(RA);
+ break;
}
- break;
+ // Note any uses of this value, so this value can be
+ // marked live whenever one of the uses becomes live.
+ Uses.emplace(MaybeLiveUse, RA);
+ }
+ break;
}
}
-/// MarkLive - Mark the given Function as alive, meaning that it cannot be
-/// changed in any way. Additionally,
-/// mark any values that are used as this function's parameters or by its return
-/// values (according to Uses) live as well.
-void DeadArgumentEliminationPass::MarkLive(const Function &F) {
+/// Mark the given Function as alive, meaning that it cannot be changed in any
+/// way. Additionally, mark any values that are used as this function's
+/// parameters or by its return values (according to Uses) live as well.
+void DeadArgumentEliminationPass::markLive(const Function &F) {
LLVM_DEBUG(dbgs() << "DeadArgumentEliminationPass - Intrinsically live fn: "
<< F.getName() << "\n");
// Mark the function as live.
LiveFunctions.insert(&F);
// Mark all arguments as live.
for (unsigned ArgI = 0, E = F.arg_size(); ArgI != E; ++ArgI)
- PropagateLiveness(CreateArg(&F, ArgI));
+ propagateLiveness(createArg(&F, ArgI));
// Mark all return values as live.
- for (unsigned Ri = 0, E = NumRetVals(&F); Ri != E; ++Ri)
- PropagateLiveness(CreateRet(&F, Ri));
+ for (unsigned Ri = 0, E = numRetVals(&F); Ri != E; ++Ri)
+ propagateLiveness(createRet(&F, Ri));
}
-/// MarkLive - Mark the given return value or argument as live. Additionally,
-/// mark any values that are used by this value (according to Uses) live as
-/// well.
-void DeadArgumentEliminationPass::MarkLive(const RetOrArg &RA) {
- if (IsLive(RA))
+/// Mark the given return value or argument as live. Additionally, mark any
+/// values that are used by this value (according to Uses) live as well.
+void DeadArgumentEliminationPass::markLive(const RetOrArg &RA) {
+ if (isLive(RA))
return; // Already marked Live.
LiveValues.insert(RA);
LLVM_DEBUG(dbgs() << "DeadArgumentEliminationPass - Marking "
<< RA.getDescription() << " live\n");
- PropagateLiveness(RA);
+ propagateLiveness(RA);
}
-bool DeadArgumentEliminationPass::IsLive(const RetOrArg &RA) {
+bool DeadArgumentEliminationPass::isLive(const RetOrArg &RA) {
return LiveFunctions.count(RA.F) || LiveValues.count(RA);
}
-/// PropagateLiveness - Given that RA is a live value, propagate it's liveness
-/// to any other values it uses (according to Uses).
-void DeadArgumentEliminationPass::PropagateLiveness(const RetOrArg &RA) {
+/// Given that RA is a live value, propagate it's liveness to any other values
+/// it uses (according to Uses).
+void DeadArgumentEliminationPass::propagateLiveness(const RetOrArg &RA) {
// We don't use upper_bound (or equal_range) here, because our recursive call
// to ourselves is likely to cause the upper_bound (which is the first value
// not belonging to RA) to become erased and the iterator invalidated.
@@ -723,18 +709,17 @@ void DeadArgumentEliminationPass::PropagateLiveness(const RetOrArg &RA) {
UseMap::iterator E = Uses.end();
UseMap::iterator I;
for (I = Begin; I != E && I->first == RA; ++I)
- MarkLive(I->second);
+ markLive(I->second);
// Erase RA from the Uses map (from the lower bound to wherever we ended up
// after the loop).
Uses.erase(Begin, I);
}
-// RemoveDeadStuffFromFunction - Remove any arguments and return values from F
-// that are not in LiveValues. Transform the function and all of the callees of
-// the function to not have these arguments and return values.
-//
-bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
+/// Remove any arguments and return values from F that are not in LiveValues.
+/// Transform the function and all the callees of the function to not have these
+/// arguments and return values.
+bool DeadArgumentEliminationPass::removeDeadStuffFromFunction(Function *F) {
// Don't modify fully live functions
if (LiveFunctions.count(F))
return false;
@@ -742,7 +727,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
// Start by computing a new prototype for the function, which is the same as
// the old function, but has fewer arguments and a different return type.
FunctionType *FTy = F->getFunctionType();
- std::vector<Type*> Params;
+ std::vector<Type *> Params;
// Keep track of if we have a live 'returned' argument
bool HasLiveReturnedArg = false;
@@ -759,7 +744,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
unsigned ArgI = 0;
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
++I, ++ArgI) {
- RetOrArg Arg = CreateArg(F, ArgI);
+ RetOrArg Arg = createArg(F, ArgI);
if (LiveValues.erase(Arg)) {
Params.push_back(I->getType());
ArgAlive[ArgI] = true;
@@ -776,11 +761,11 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
// Find out the new return value.
Type *RetTy = FTy->getReturnType();
Type *NRetTy = nullptr;
- unsigned RetCount = NumRetVals(F);
+ unsigned RetCount = numRetVals(F);
// -1 means unused, other numbers are the new index
SmallVector<int, 5> NewRetIdxs(RetCount, -1);
- std::vector<Type*> RetTypes;
+ std::vector<Type *> RetTypes;
// If there is a function with a live 'returned' argument but a dead return
// value, then there are two possible actions:
@@ -792,9 +777,9 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
// It's not clear in the general case which option is more profitable because,
// even in the absence of explicit uses of the return value, code generation
// is free to use the 'returned' attribute to do things like eliding
- // save/restores of registers across calls. Whether or not this happens is
- // target and ABI-specific as well as depending on the amount of register
- // pressure, so there's no good way for an IR-level pass to figure this out.
+ // save/restores of registers across calls. Whether this happens is target and
+ // ABI-specific as well as depending on the amount of register pressure, so
+ // there's no good way for an IR-level pass to figure this out.
//
// Fortunately, the only places where 'returned' is currently generated by
// the FE are places where 'returned' is basically free and almost always a
@@ -806,7 +791,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
} else {
// Look at each of the original return values individually.
for (unsigned Ri = 0; Ri != RetCount; ++Ri) {
- RetOrArg Ret = CreateRet(F, Ri);
+ RetOrArg Ret = createRet(F, Ri);
if (LiveValues.erase(Ret)) {
RetTypes.push_back(getRetComponentType(F, Ri));
NewRetIdxs[Ri] = RetTypes.size() - 1;
@@ -879,9 +864,9 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
F->getParent()->getFunctionList().insert(F->getIterator(), NF);
NF->takeName(F);
- // Loop over all of the callers of the function, transforming the call sites
- // to pass in a smaller number of arguments into the new function.
- std::vector<Value*> Args;
+ // Loop over all the callers of the function, transforming the call sites to
+ // pass in a smaller number of arguments into the new function.
+ std::vector<Value *> Args;
while (!F->use_empty()) {
CallBase &CB = cast<CallBase>(*F->user_back());
@@ -896,7 +881,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
// Declare these outside of the loops, so we can reuse them for the second
// loop, which loops the varargs.
- auto I = CB.arg_begin();
+ auto *I = CB.arg_begin();
unsigned Pi = 0;
// Loop over those operands, corresponding to the normal arguments to the
// original function, and add those that are still alive.
@@ -909,11 +894,11 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
// If the return type has changed, then get rid of 'returned' on the
// call site. The alternative is to make all 'returned' attributes on
// call sites keep the return value alive just like 'returned'
- // attributes on function declaration but it's less clearly a win and
+ // attributes on function declaration, but it's less clearly a win and
// this is not an expected case anyway
ArgAttrVec.push_back(AttributeSet::get(
- F->getContext(),
- AttrBuilder(F->getContext(), Attrs).removeAttribute(Attribute::Returned)));
+ F->getContext(), AttrBuilder(F->getContext(), Attrs)
+ .removeAttribute(Attribute::Returned)));
} else {
// Otherwise, use the original attributes.
ArgAttrVec.push_back(Attrs);
@@ -921,7 +906,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
}
// Push any varargs arguments on the list. Don't forget their attributes.
- for (auto E = CB.arg_end(); I != E; ++I, ++Pi) {
+ for (auto *E = CB.arg_end(); I != E; ++I, ++Pi) {
Args.push_back(*I);
ArgAttrVec.push_back(CallPAL.getParamAttrs(Pi));
}
@@ -934,8 +919,8 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
AttributeSet FnAttrs = CallPAL.getFnAttrs().removeAttribute(
F->getContext(), Attribute::AllocSize);
- AttributeList NewCallPAL = AttributeList::get(
- F->getContext(), FnAttrs, RetAttrs, ArgAttrVec);
+ AttributeList NewCallPAL =
+ AttributeList::get(F->getContext(), FnAttrs, RetAttrs, ArgAttrVec);
SmallVector<OperandBundleDef, 1> OpBundles;
CB.getOperandBundlesAsDefs(OpBundles);
@@ -961,10 +946,10 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
CB.replaceAllUsesWith(NewCB);
NewCB->takeName(&CB);
} else if (NewCB->getType()->isVoidTy()) {
- // If the return value is dead, replace any uses of it with undef
+ // If the return value is dead, replace any uses of it with poison
// (any non-debug value uses will get removed later on).
if (!CB.getType()->isX86_MMXTy())
- CB.replaceAllUsesWith(UndefValue::get(CB.getType()));
+ CB.replaceAllUsesWith(PoisonValue::get(CB.getType()));
} else {
assert((RetTy->isStructTy() || RetTy->isArrayTy()) &&
"Return type changed, but not into a void. The old return type"
@@ -980,8 +965,8 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
// with all the uses, we will just rebuild it using extract/insertvalue
// chaining and let instcombine clean that up.
//
- // Start out building up our return value from undef
- Value *RetVal = UndefValue::get(RetTy);
+ // Start out building up our return value from poison
+ Value *RetVal = PoisonValue::get(RetTy);
for (unsigned Ri = 0; Ri != RetCount; ++Ri)
if (NewRetIdxs[Ri] != -1) {
Value *V;
@@ -1026,10 +1011,10 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
I2->takeName(&*I);
++I2;
} else {
- // If this argument is dead, replace any uses of it with undef
+ // If this argument is dead, replace any uses of it with poison
// (any non-debug value uses will get removed later on).
if (!I->getType()->isX86_MMXTy())
- I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ I->replaceAllUsesWith(PoisonValue::get(I->getType()));
}
// If we change the return value of the function we must rewrite any return
@@ -1048,8 +1033,8 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
// This does generate messy code, but we'll let it to instcombine to
// clean that up.
Value *OldRet = RI->getOperand(0);
- // Start out building up our return value from undef
- RetVal = UndefValue::get(NRetTy);
+ // Start out building up our return value from poison
+ RetVal = PoisonValue::get(NRetTy);
for (unsigned RetI = 0; RetI != RetCount; ++RetI)
if (NewRetIdxs[RetI] != -1) {
Value *EV = IRB.CreateExtractValue(OldRet, RetI, "oldret");
@@ -1074,12 +1059,22 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
BB.getInstList().erase(RI);
}
- // Clone metadatas from the old function, including debug info descriptor.
+ // Clone metadata from the old function, including debug info descriptor.
SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
F->getAllMetadata(MDs);
for (auto MD : MDs)
NF->addMetadata(MD.first, *MD.second);
+ // If either the return value(s) or argument(s) are removed, then probably the
+ // function does not follow standard calling conventions anymore. Hence, add
+ // DW_CC_nocall to DISubroutineType to inform debugger that it may not be safe
+ // to call this function or try to interpret the return value.
+ if (NFTy != FTy && NF->getSubprogram()) {
+ DISubprogram *SP = NF->getSubprogram();
+ auto Temp = SP->getType()->cloneWithCC(llvm::dwarf::DW_CC_nocall);
+ SP->replaceType(MDNode::replaceWithPermanent(std::move(Temp)));
+ }
+
// Now that the old function is dead, delete it.
F->eraseFromParent();
@@ -1097,26 +1092,25 @@ PreservedAnalyses DeadArgumentEliminationPass::run(Module &M,
LLVM_DEBUG(dbgs() << "DeadArgumentEliminationPass - Deleting dead varargs\n");
for (Function &F : llvm::make_early_inc_range(M))
if (F.getFunctionType()->isVarArg())
- Changed |= DeleteDeadVarargs(F);
+ Changed |= deleteDeadVarargs(F);
- // Second phase:loop through the module, determining which arguments are live.
- // We assume all arguments are dead unless proven otherwise (allowing us to
- // determine that dead arguments passed into recursive functions are dead).
- //
+ // Second phase: Loop through the module, determining which arguments are
+ // live. We assume all arguments are dead unless proven otherwise (allowing us
+ // to determine that dead arguments passed into recursive functions are dead).
LLVM_DEBUG(dbgs() << "DeadArgumentEliminationPass - Determining liveness\n");
for (auto &F : M)
- SurveyFunction(F);
+ surveyFunction(F);
// Now, remove all dead arguments and return values from each function in
// turn. We use make_early_inc_range here because functions will probably get
// removed (i.e. replaced by new ones).
for (Function &F : llvm::make_early_inc_range(M))
- Changed |= RemoveDeadStuffFromFunction(&F);
+ Changed |= removeDeadStuffFromFunction(&F);
// Finally, look for any unused parameters in functions with non-local
- // linkage and replace the passed in parameters with undef.
+ // linkage and replace the passed in parameters with poison.
for (auto &F : M)
- Changed |= RemoveDeadArgumentsFromCallers(F);
+ Changed |= removeDeadArgumentsFromCallers(F);
if (!Changed)
return PreservedAnalyses::all();
diff --git a/llvm/lib/Transforms/IPO/ExtractGV.cpp b/llvm/lib/Transforms/IPO/ExtractGV.cpp
index 387f114f6ffa..84280781ee70 100644
--- a/llvm/lib/Transforms/IPO/ExtractGV.cpp
+++ b/llvm/lib/Transforms/IPO/ExtractGV.cpp
@@ -11,7 +11,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/SetVector.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/IPO.h"
diff --git a/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp b/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
index 16d00a0c89e1..b10c2ea13469 100644
--- a/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
+++ b/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
@@ -8,9 +8,9 @@
#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index 213a998d5bba..49077f92884f 100644
--- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -30,7 +30,6 @@
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/LazyCallGraph.h"
-#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
@@ -45,6 +44,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/ModuleSummaryIndex.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
@@ -69,6 +69,7 @@ using namespace llvm;
#define DEBUG_TYPE "function-attrs"
+STATISTIC(NumArgMemOnly, "Number of functions marked argmemonly");
STATISTIC(NumReadNone, "Number of functions marked readnone");
STATISTIC(NumReadOnly, "Number of functions marked readonly");
STATISTIC(NumWriteOnly, "Number of functions marked writeonly");
@@ -121,28 +122,28 @@ using SCCNodeSet = SmallSetVector<Function *, 8>;
/// result will be based only on AA results for the function declaration; it
/// will be assumed that some other (perhaps less optimized) version of the
/// function may be selected at link time.
-static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
- AAResults &AAR,
- const SCCNodeSet &SCCNodes) {
+static FunctionModRefBehavior
+checkFunctionMemoryAccess(Function &F, bool ThisBody, AAResults &AAR,
+ const SCCNodeSet &SCCNodes) {
FunctionModRefBehavior MRB = AAR.getModRefBehavior(&F);
if (MRB == FMRB_DoesNotAccessMemory)
// Already perfect!
- return MAK_ReadNone;
+ return MRB;
- if (!ThisBody) {
- if (AliasAnalysis::onlyReadsMemory(MRB))
- return MAK_ReadOnly;
-
- if (AliasAnalysis::onlyWritesMemory(MRB))
- return MAK_WriteOnly;
-
- // Conservatively assume it reads and writes to memory.
- return MAK_MayWrite;
- }
+ if (!ThisBody)
+ return MRB;
// Scan the function body for instructions that may read or write memory.
bool ReadsMemory = false;
bool WritesMemory = false;
+ // Track if the function accesses memory not based on pointer arguments or
+ // allocas.
+ bool AccessesNonArgsOrAlloca = false;
+ // Returns true if Ptr is not based on a function argument.
+ auto IsArgumentOrAlloca = [](const Value *Ptr) {
+ const Value *UO = getUnderlyingObject(Ptr);
+ return isa<Argument>(UO) || isa<AllocaInst>(UO);
+ };
for (Instruction &I : instructions(F)) {
// Some instructions can be ignored even if they read or write memory.
// Detect these now, skipping to the next instruction if one is found.
@@ -175,6 +176,7 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
// If it reads, note it.
if (isRefSet(MRI))
ReadsMemory = true;
+ AccessesNonArgsOrAlloca = true;
continue;
}
@@ -187,12 +189,13 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
MemoryLocation Loc =
MemoryLocation::getBeforeOrAfter(Arg, I.getAAMetadata());
-
// Skip accesses to local or constant memory as they don't impact the
// externally visible mod/ref behavior.
if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
continue;
+ AccessesNonArgsOrAlloca |= !IsArgumentOrAlloca(Loc.Ptr);
+
if (isModSet(MRI))
// Writes non-local memory.
WritesMemory = true;
@@ -202,24 +205,29 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
}
continue;
} else if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
+ MemoryLocation Loc = MemoryLocation::get(LI);
// Ignore non-volatile loads from local memory. (Atomic is okay here.)
- if (!LI->isVolatile()) {
- MemoryLocation Loc = MemoryLocation::get(LI);
- if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
- continue;
- }
+ if (!LI->isVolatile() &&
+ AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
+ continue;
+ AccessesNonArgsOrAlloca |= !IsArgumentOrAlloca(Loc.Ptr);
} else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
+ MemoryLocation Loc = MemoryLocation::get(SI);
// Ignore non-volatile stores to local memory. (Atomic is okay here.)
- if (!SI->isVolatile()) {
- MemoryLocation Loc = MemoryLocation::get(SI);
- if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
- continue;
- }
+ if (!SI->isVolatile() &&
+ AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
+ continue;
+ AccessesNonArgsOrAlloca |= !IsArgumentOrAlloca(Loc.Ptr);
} else if (VAArgInst *VI = dyn_cast<VAArgInst>(&I)) {
// Ignore vaargs on local memory.
MemoryLocation Loc = MemoryLocation::get(VI);
if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
continue;
+ AccessesNonArgsOrAlloca |= !IsArgumentOrAlloca(Loc.Ptr);
+ } else {
+ // If AccessesNonArgsOrAlloca has not been updated above, set it
+ // conservatively.
+ AccessesNonArgsOrAlloca |= I.mayReadOrWriteMemory();
}
// Any remaining instructions need to be taken seriously! Check if they
@@ -232,61 +240,74 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
ReadsMemory |= I.mayReadFromMemory();
}
- if (WritesMemory) {
- if (!ReadsMemory)
- return MAK_WriteOnly;
- else
- return MAK_MayWrite;
- }
-
- return ReadsMemory ? MAK_ReadOnly : MAK_ReadNone;
+ if (!WritesMemory && !ReadsMemory)
+ return FMRB_DoesNotAccessMemory;
+
+ FunctionModRefBehavior Result = FunctionModRefBehavior(FMRL_Anywhere);
+ if (!AccessesNonArgsOrAlloca)
+ Result = FunctionModRefBehavior(FMRL_ArgumentPointees);
+ if (WritesMemory)
+ Result = FunctionModRefBehavior(Result | static_cast<int>(ModRefInfo::Mod));
+ if (ReadsMemory)
+ Result = FunctionModRefBehavior(Result | static_cast<int>(ModRefInfo::Ref));
+ return Result;
}
-MemoryAccessKind llvm::computeFunctionBodyMemoryAccess(Function &F,
- AAResults &AAR) {
+FunctionModRefBehavior llvm::computeFunctionBodyMemoryAccess(Function &F,
+ AAResults &AAR) {
return checkFunctionMemoryAccess(F, /*ThisBody=*/true, AAR, {});
}
-/// Deduce readonly/readnone attributes for the SCC.
+/// Deduce readonly/readnone/writeonly attributes for the SCC.
template <typename AARGetterT>
-static void addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter,
- SmallSet<Function *, 8> &Changed) {
+static void addMemoryAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter,
+ SmallSet<Function *, 8> &Changed) {
// Check if any of the functions in the SCC read or write memory. If they
// write memory then they can't be marked readnone or readonly.
bool ReadsMemory = false;
bool WritesMemory = false;
+ // Check if all functions only access memory through their arguments.
+ bool ArgMemOnly = true;
for (Function *F : SCCNodes) {
// Call the callable parameter to look up AA results for this function.
AAResults &AAR = AARGetter(*F);
-
// Non-exact function definitions may not be selected at link time, and an
// alternative version that writes to memory may be selected. See the
// comment on GlobalValue::isDefinitionExact for more details.
- switch (checkFunctionMemoryAccess(*F, F->hasExactDefinition(),
- AAR, SCCNodes)) {
- case MAK_MayWrite:
+ FunctionModRefBehavior FMRB =
+ checkFunctionMemoryAccess(*F, F->hasExactDefinition(), AAR, SCCNodes);
+ if (FMRB == FMRB_DoesNotAccessMemory)
+ continue;
+ ModRefInfo MR = createModRefInfo(FMRB);
+ ReadsMemory |= isRefSet(MR);
+ WritesMemory |= isModSet(MR);
+ ArgMemOnly &= AliasAnalysis::onlyAccessesArgPointees(FMRB);
+ // Reached neither readnone, readonly, writeonly nor argmemonly can be
+ // inferred. Exit.
+ if (ReadsMemory && WritesMemory && !ArgMemOnly)
return;
- case MAK_ReadOnly:
- ReadsMemory = true;
- break;
- case MAK_WriteOnly:
- WritesMemory = true;
- break;
- case MAK_ReadNone:
- // Nothing to do!
- break;
- }
}
- // If the SCC contains both functions that read and functions that write, then
- // we cannot add readonly attributes.
- if (ReadsMemory && WritesMemory)
- return;
-
- // Success! Functions in this SCC do not access memory, or only read memory.
- // Give them the appropriate attribute.
+ assert((!ReadsMemory || !WritesMemory || ArgMemOnly) &&
+ "no memory attributes can be added for this SCC, should have exited "
+ "earlier");
+ // Success! Functions in this SCC do not access memory, only read memory,
+ // only write memory, or only access memory through its arguments. Give them
+ // the appropriate attribute.
for (Function *F : SCCNodes) {
+ // If possible add argmemonly attribute to F, if it accesses memory.
+ if (ArgMemOnly && !F->onlyAccessesArgMemory() &&
+ (ReadsMemory || WritesMemory)) {
+ NumArgMemOnly++;
+ F->addFnAttr(Attribute::ArgMemOnly);
+ Changed.insert(F);
+ }
+
+ // The SCC contains functions both writing and reading from memory. We
+ // cannot add readonly or writeonline attributes.
+ if (ReadsMemory && WritesMemory)
+ continue;
if (F->doesNotAccessMemory())
// Already perfect!
continue;
@@ -1614,6 +1635,26 @@ static bool basicBlockCanReturn(BasicBlock &BB) {
return none_of(BB, instructionDoesNotReturn);
}
+// FIXME: this doesn't handle recursion.
+static bool canReturn(Function &F) {
+ SmallVector<BasicBlock *, 16> Worklist;
+ SmallPtrSet<BasicBlock *, 16> Visited;
+
+ Visited.insert(&F.front());
+ Worklist.push_back(&F.front());
+
+ do {
+ BasicBlock *BB = Worklist.pop_back_val();
+ if (basicBlockCanReturn(*BB))
+ return true;
+ for (BasicBlock *Succ : successors(BB))
+ if (Visited.insert(Succ).second)
+ Worklist.push_back(Succ);
+ } while (!Worklist.empty());
+
+ return false;
+}
+
// Set the noreturn function attribute if possible.
static void addNoReturnAttrs(const SCCNodeSet &SCCNodes,
SmallSet<Function *, 8> &Changed) {
@@ -1622,9 +1663,7 @@ static void addNoReturnAttrs(const SCCNodeSet &SCCNodes,
F->doesNotReturn())
continue;
- // The function can return if any basic blocks can return.
- // FIXME: this doesn't handle recursion or unreachable blocks.
- if (none_of(*F, basicBlockCanReturn)) {
+ if (!canReturn(*F)) {
F->setDoesNotReturn();
Changed.insert(F);
}
@@ -1792,7 +1831,7 @@ deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter) {
SmallSet<Function *, 8> Changed;
addArgumentReturnedAttrs(Nodes.SCCNodes, Changed);
- addReadAttrs(Nodes.SCCNodes, AARGetter, Changed);
+ addMemoryAttrs(Nodes.SCCNodes, AARGetter, Changed);
addArgumentAttrs(Nodes.SCCNodes, Changed);
inferConvergent(Nodes.SCCNodes, Changed);
addNoReturnAttrs(Nodes.SCCNodes, Changed);
@@ -1896,6 +1935,7 @@ struct PostOrderFunctionAttrsLegacyPass : public CallGraphSCCPass {
char PostOrderFunctionAttrsLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(PostOrderFunctionAttrsLegacyPass, "function-attrs",
"Deduce function attributes", false, false)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
INITIALIZE_PASS_END(PostOrderFunctionAttrsLegacyPass, "function-attrs",
@@ -1975,12 +2015,13 @@ static bool addNoRecurseAttrsTopDown(Function &F) {
// this function could be recursively (indirectly) called. Note that this
// also detects if F is directly recursive as F is not yet marked as
// a norecurse function.
- for (auto *U : F.users()) {
- auto *I = dyn_cast<Instruction>(U);
+ for (auto &U : F.uses()) {
+ auto *I = dyn_cast<Instruction>(U.getUser());
if (!I)
return false;
CallBase *CB = dyn_cast<CallBase>(I);
- if (!CB || !CB->getParent()->getParent()->doesNotRecurse())
+ if (!CB || !CB->isCallee(&U) ||
+ !CB->getParent()->getParent()->doesNotRecurse())
return false;
}
F.setDoesNotRecurse();
diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index d9b43109f629..56e2df14ff38 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -18,7 +18,6 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/StringSet.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/IR/AutoUpgrade.h"
#include "llvm/IR/Constants.h"
@@ -33,8 +32,6 @@
#include "llvm/IRReader/IRReader.h"
#include "llvm/InitializePasses.h"
#include "llvm/Linker/IRMover.h"
-#include "llvm/Object/ModuleSymbolTable.h"
-#include "llvm/Object/SymbolicFile.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -1112,12 +1109,13 @@ void llvm::thinLTOFinalizeInModule(Module &TheModule,
llvm_unreachable("Expected GV to be converted");
} else {
// If all copies of the original symbol had global unnamed addr and
- // linkonce_odr linkage, it should be an auto hide symbol. In that case
- // the thin link would have marked it as CanAutoHide. Add hidden visibility
- // to the symbol to preserve the property.
+ // linkonce_odr linkage, or if all of them had local unnamed addr linkage
+ // and are constants, then it should be an auto hide symbol. In that case
+ // the thin link would have marked it as CanAutoHide. Add hidden
+ // visibility to the symbol to preserve the property.
if (NewLinkage == GlobalValue::WeakODRLinkage &&
GS->second->canAutoHide()) {
- assert(GV.hasLinkOnceODRLinkage() && GV.hasGlobalUnnamedAddr());
+ assert(GV.canBeOmittedFromSymbolTable());
GV.setVisibility(GlobalValue::HiddenVisibility);
}
@@ -1330,10 +1328,9 @@ Expected<bool> FunctionImporter::importFunctions(
<< " from " << SrcModule->getSourceFileName() << "\n";
}
- if (Error Err = Mover.move(
- std::move(SrcModule), GlobalsToImport.getArrayRef(),
- [](GlobalValue &, IRMover::ValueAdder) {},
- /*IsPerformingImport=*/true))
+ if (Error Err = Mover.move(std::move(SrcModule),
+ GlobalsToImport.getArrayRef(), nullptr,
+ /*IsPerformingImport=*/true))
report_fatal_error(Twine("Function Import: link error: ") +
toString(std::move(Err)));
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 6c3cc3914337..dafd0dc865a2 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -19,11 +19,8 @@
// Current limitations:
// - It does not yet handle integer ranges. We do support "literal constants",
// but that's off by default under an option.
-// - Only 1 argument per function is specialised,
// - The cost-model could be further looked into (it mainly focuses on inlining
// benefits),
-// - We are not yet caching analysis results, but profiling and checking where
-// extra compile time is spent didn't suggest this to be a problem.
//
// Ideas:
// - With a function specialization attribute for arguments, we could have
@@ -49,15 +46,16 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
-#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueLattice.h"
+#include "llvm/Analysis/ValueLatticeUtils.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Transforms/Scalar/SCCP.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/SCCPSolver.h"
#include "llvm/Transforms/Utils/SizeOpts.h"
#include <cmath>
@@ -98,8 +96,13 @@ static cl::opt<bool> SpecializeOnAddresses(
"func-specialization-on-address", cl::init(false), cl::Hidden,
cl::desc("Enable function specialization on the address of global values"));
-// TODO: This needs checking to see the impact on compile-times, which is why
-// this is off by default for now.
+// Disabled by default as it can significantly increase compilation times.
+// Running nikic's compile time tracker on x86 with instruction count as the
+// metric shows 3-4% regression for SPASS while being neutral for all other
+// benchmarks of the llvm test suite.
+//
+// https://llvm-compile-time-tracker.com
+// https://github.com/nikic/llvm-compile-time-tracker
static cl::opt<bool> EnableSpecializationForLiteralConstant(
"function-specialization-for-literal-constant", cl::init(false), cl::Hidden,
cl::desc("Enable specialization of functions that take a literal constant "
@@ -108,24 +111,18 @@ static cl::opt<bool> EnableSpecializationForLiteralConstant(
namespace {
// Bookkeeping struct to pass data from the analysis and profitability phase
// to the actual transform helper functions.
-struct ArgInfo {
- Function *Fn; // The function to perform specialisation on.
- Argument *Arg; // The Formal argument being analysed.
- Constant *Const; // A corresponding actual constant argument.
- InstructionCost Gain; // Profitability: Gain = Bonus - Cost.
-
- // Flag if this will be a partial specialization, in which case we will need
- // to keep the original function around in addition to the added
- // specializations.
- bool Partial = false;
-
- ArgInfo(Function *F, Argument *A, Constant *C, InstructionCost G)
- : Fn(F), Arg(A), Const(C), Gain(G){};
+struct SpecializationInfo {
+ SmallVector<ArgInfo, 8> Args; // Stores the {formal,actual} argument pairs.
+ InstructionCost Gain; // Profitability: Gain = Bonus - Cost.
};
} // Anonymous namespace
using FuncList = SmallVectorImpl<Function *>;
-using ConstList = SmallVectorImpl<Constant *>;
+using CallArgBinding = std::pair<CallBase *, Constant *>;
+using CallSpecBinding = std::pair<CallBase *, SpecializationInfo>;
+// We are using MapVector because it guarantees deterministic iteration
+// order across executions.
+using SpecializationMap = SmallMapVector<CallBase *, SpecializationInfo, 8>;
// Helper to check if \p LV is either a constant or a constant
// range with a single element. This should cover exactly the same cases as the
@@ -204,41 +201,45 @@ static Constant *getConstantStackValue(CallInst *Call, Value *Val,
// ret void
// }
//
-static void constantArgPropagation(FuncList &WorkList,
- Module &M, SCCPSolver &Solver) {
+static void constantArgPropagation(FuncList &WorkList, Module &M,
+ SCCPSolver &Solver) {
// Iterate over the argument tracked functions see if there
// are any new constant values for the call instruction via
// stack variables.
for (auto *F : WorkList) {
- // TODO: Generalize for any read only arguments.
- if (F->arg_size() != 1)
- continue;
-
- auto &Arg = *F->arg_begin();
- if (!Arg.onlyReadsMemory() || !Arg.getType()->isPointerTy())
- continue;
for (auto *User : F->users()) {
+
auto *Call = dyn_cast<CallInst>(User);
if (!Call)
- break;
- auto *ArgOp = Call->getArgOperand(0);
- auto *ArgOpType = ArgOp->getType();
- auto *ConstVal = getConstantStackValue(Call, ArgOp, Solver);
- if (!ConstVal)
- break;
+ continue;
- Value *GV = new GlobalVariable(M, ConstVal->getType(), true,
- GlobalValue::InternalLinkage, ConstVal,
- "funcspec.arg");
+ bool Changed = false;
+ for (const Use &U : Call->args()) {
+ unsigned Idx = Call->getArgOperandNo(&U);
+ Value *ArgOp = Call->getArgOperand(Idx);
+ Type *ArgOpType = ArgOp->getType();
- if (ArgOpType != ConstVal->getType())
- GV = ConstantExpr::getBitCast(cast<Constant>(GV), ArgOp->getType());
+ if (!Call->onlyReadsMemory(Idx) || !ArgOpType->isPointerTy())
+ continue;
- Call->setArgOperand(0, GV);
+ auto *ConstVal = getConstantStackValue(Call, ArgOp, Solver);
+ if (!ConstVal)
+ continue;
+
+ Value *GV = new GlobalVariable(M, ConstVal->getType(), true,
+ GlobalValue::InternalLinkage, ConstVal,
+ "funcspec.arg");
+ if (ArgOpType != ConstVal->getType())
+ GV = ConstantExpr::getBitCast(cast<Constant>(GV), ArgOpType);
+
+ Call->setArgOperand(Idx, GV);
+ Changed = true;
+ }
// Add the changed CallInst to Solver Worklist
- Solver.visitCall(*Call);
+ if (Changed)
+ Solver.visitCall(*Call);
}
}
}
@@ -275,7 +276,10 @@ class FunctionSpecializer {
std::function<TargetTransformInfo &(Function &)> GetTTI;
std::function<TargetLibraryInfo &(Function &)> GetTLI;
- SmallPtrSet<Function *, 2> SpecializedFuncs;
+ SmallPtrSet<Function *, 4> SpecializedFuncs;
+ SmallPtrSet<Function *, 4> FullySpecialized;
+ SmallVector<Instruction *> ReplacedWithConstant;
+ DenseMap<Function *, CodeMetrics> FunctionMetrics;
public:
FunctionSpecializer(SCCPSolver &Solver,
@@ -284,42 +288,66 @@ public:
std::function<TargetLibraryInfo &(Function &)> GetTLI)
: Solver(Solver), GetAC(GetAC), GetTTI(GetTTI), GetTLI(GetTLI) {}
+ ~FunctionSpecializer() {
+ // Eliminate dead code.
+ removeDeadInstructions();
+ removeDeadFunctions();
+ }
+
/// Attempt to specialize functions in the module to enable constant
/// propagation across function boundaries.
///
/// \returns true if at least one function is specialized.
- bool
- specializeFunctions(FuncList &FuncDecls,
- FuncList &CurrentSpecializations) {
+ bool specializeFunctions(FuncList &Candidates, FuncList &WorkList) {
bool Changed = false;
- for (auto *F : FuncDecls) {
- if (!isCandidateFunction(F, CurrentSpecializations))
+ for (auto *F : Candidates) {
+ if (!isCandidateFunction(F))
continue;
auto Cost = getSpecializationCost(F);
if (!Cost.isValid()) {
LLVM_DEBUG(
- dbgs() << "FnSpecialization: Invalid specialisation cost.\n");
+ dbgs() << "FnSpecialization: Invalid specialization cost.\n");
continue;
}
- auto ConstArgs = calculateGains(F, Cost);
- if (ConstArgs.empty()) {
- LLVM_DEBUG(dbgs() << "FnSpecialization: no possible constants found\n");
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for "
+ << F->getName() << " is " << Cost << "\n");
+
+ SmallVector<CallSpecBinding, 8> Specializations;
+ if (!calculateGains(F, Cost, Specializations)) {
+ LLVM_DEBUG(dbgs() << "FnSpecialization: No possible constants found\n");
continue;
}
- for (auto &CA : ConstArgs) {
- specializeFunction(CA, CurrentSpecializations);
- Changed = true;
- }
+ Changed = true;
+ for (auto &Entry : Specializations)
+ specializeFunction(F, Entry.second, WorkList);
}
- updateSpecializedFuncs(FuncDecls, CurrentSpecializations);
+ updateSpecializedFuncs(Candidates, WorkList);
NumFuncSpecialized += NbFunctionsSpecialized;
return Changed;
}
+ void removeDeadInstructions() {
+ for (auto *I : ReplacedWithConstant) {
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Removing dead instruction " << *I
+ << "\n");
+ I->eraseFromParent();
+ }
+ ReplacedWithConstant.clear();
+ }
+
+ void removeDeadFunctions() {
+ for (auto *F : FullySpecialized) {
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Removing dead function "
+ << F->getName() << "\n");
+ F->eraseFromParent();
+ }
+ FullySpecialized.clear();
+ }
+
bool tryToReplaceWithConstant(Value *V) {
if (!V->getType()->isSingleValueType() || isa<CallBase>(V) ||
V->user_empty())
@@ -330,17 +358,26 @@ public:
return false;
auto *Const =
isConstant(IV) ? Solver.getConstant(IV) : UndefValue::get(V->getType());
- V->replaceAllUsesWith(Const);
- for (auto *U : Const->users())
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Replacing " << *V
+ << "\nFnSpecialization: with " << *Const << "\n");
+
+ // Record uses of V to avoid visiting irrelevant uses of const later.
+ SmallVector<Instruction *> UseInsts;
+ for (auto *U : V->users())
if (auto *I = dyn_cast<Instruction>(U))
if (Solver.isBlockExecutable(I->getParent()))
- Solver.visit(I);
+ UseInsts.push_back(I);
+
+ V->replaceAllUsesWith(Const);
+
+ for (auto *I : UseInsts)
+ Solver.visit(I);
// Remove the instruction from Block and Solver.
if (auto *I = dyn_cast<Instruction>(V)) {
if (I->isSafeToRemove()) {
- I->eraseFromParent();
+ ReplacedWithConstant.push_back(I);
Solver.removeLatticeValueFor(I);
}
}
@@ -352,92 +389,108 @@ private:
// also in the cost model.
unsigned NbFunctionsSpecialized = 0;
+ // Compute the code metrics for function \p F.
+ CodeMetrics &analyzeFunction(Function *F) {
+ auto I = FunctionMetrics.insert({F, CodeMetrics()});
+ CodeMetrics &Metrics = I.first->second;
+ if (I.second) {
+ // The code metrics were not cached.
+ SmallPtrSet<const Value *, 32> EphValues;
+ CodeMetrics::collectEphemeralValues(F, &(GetAC)(*F), EphValues);
+ for (BasicBlock &BB : *F)
+ Metrics.analyzeBasicBlock(&BB, (GetTTI)(*F), EphValues);
+
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Code size of function "
+ << F->getName() << " is " << Metrics.NumInsts
+ << " instructions\n");
+ }
+ return Metrics;
+ }
+
/// Clone the function \p F and remove the ssa_copy intrinsics added by
/// the SCCPSolver in the cloned version.
- Function *cloneCandidateFunction(Function *F) {
- ValueToValueMapTy EmptyMap;
- Function *Clone = CloneFunction(F, EmptyMap);
+ Function *cloneCandidateFunction(Function *F, ValueToValueMapTy &Mappings) {
+ Function *Clone = CloneFunction(F, Mappings);
removeSSACopy(*Clone);
return Clone;
}
- /// This function decides whether it's worthwhile to specialize function \p F
- /// based on the known constant values its arguments can take on, i.e. it
- /// calculates a gain and returns a list of actual arguments that are deemed
- /// profitable to specialize. Specialization is performed on the first
- /// interesting argument. Specializations based on additional arguments will
- /// be evaluated on following iterations of the main IPSCCP solve loop.
- SmallVector<ArgInfo> calculateGains(Function *F, InstructionCost Cost) {
- SmallVector<ArgInfo> Worklist;
+ /// This function decides whether it's worthwhile to specialize function
+ /// \p F based on the known constant values its arguments can take on. It
+ /// only discovers potential specialization opportunities without actually
+ /// applying them.
+ ///
+ /// \returns true if any specializations have been found.
+ bool calculateGains(Function *F, InstructionCost Cost,
+ SmallVectorImpl<CallSpecBinding> &WorkList) {
+ SpecializationMap Specializations;
// Determine if we should specialize the function based on the values the
// argument can take on. If specialization is not profitable, we continue
// on to the next argument.
for (Argument &FormalArg : F->args()) {
- LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing arg: "
- << FormalArg.getName() << "\n");
// Determine if this argument is interesting. If we know the argument can
- // take on any constant values, they are collected in Constants. If the
- // argument can only ever equal a constant value in Constants, the
- // function will be completely specialized, and the IsPartial flag will
- // be set to false by isArgumentInteresting (that function only adds
- // values to the Constants list that are deemed profitable).
- bool IsPartial = true;
- SmallVector<Constant *> ActualConstArg;
- if (!isArgumentInteresting(&FormalArg, ActualConstArg, IsPartial)) {
- LLVM_DEBUG(dbgs() << "FnSpecialization: Argument is not interesting\n");
+ // take on any constant values, they are collected in Constants.
+ SmallVector<CallArgBinding, 8> ActualArgs;
+ if (!isArgumentInteresting(&FormalArg, ActualArgs)) {
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Argument "
+ << FormalArg.getNameOrAsOperand()
+ << " is not interesting\n");
continue;
}
- for (auto *ActualArg : ActualConstArg) {
- InstructionCost Gain =
- ForceFunctionSpecialization
- ? 1
- : getSpecializationBonus(&FormalArg, ActualArg) - Cost;
-
- if (Gain <= 0)
- continue;
- Worklist.push_back({F, &FormalArg, ActualArg, Gain});
- }
+ for (const auto &Entry : ActualArgs) {
+ CallBase *Call = Entry.first;
+ Constant *ActualArg = Entry.second;
- if (Worklist.empty())
- continue;
+ auto I = Specializations.insert({Call, SpecializationInfo()});
+ SpecializationInfo &S = I.first->second;
- // Sort the candidates in descending order.
- llvm::stable_sort(Worklist, [](const ArgInfo &L, const ArgInfo &R) {
- return L.Gain > R.Gain;
- });
-
- // Truncate the worklist to 'MaxClonesThreshold' candidates if
- // necessary.
- if (Worklist.size() > MaxClonesThreshold) {
- LLVM_DEBUG(dbgs() << "FnSpecialization: number of candidates exceed "
- << "the maximum number of clones threshold.\n"
- << "Truncating worklist to " << MaxClonesThreshold
- << " candidates.\n");
- Worklist.erase(Worklist.begin() + MaxClonesThreshold,
- Worklist.end());
+ if (I.second)
+ S.Gain = ForceFunctionSpecialization ? 1 : 0 - Cost;
+ if (!ForceFunctionSpecialization)
+ S.Gain += getSpecializationBonus(&FormalArg, ActualArg);
+ S.Args.push_back({&FormalArg, ActualArg});
}
+ }
- if (IsPartial || Worklist.size() < ActualConstArg.size())
- for (auto &ActualArg : Worklist)
- ActualArg.Partial = true;
-
- LLVM_DEBUG(dbgs() << "Sorted list of candidates by gain:\n";
- for (auto &C
- : Worklist) {
- dbgs() << "- Function = " << C.Fn->getName() << ", ";
- dbgs() << "FormalArg = " << C.Arg->getName() << ", ";
- dbgs() << "ActualArg = " << C.Const->getName() << ", ";
- dbgs() << "Gain = " << C.Gain << "\n";
- });
-
- // FIXME: Only one argument per function.
- break;
+ // Remove unprofitable specializations.
+ Specializations.remove_if(
+ [](const auto &Entry) { return Entry.second.Gain <= 0; });
+
+ // Clear the MapVector and return the underlying vector.
+ WorkList = Specializations.takeVector();
+
+ // Sort the candidates in descending order.
+ llvm::stable_sort(WorkList, [](const auto &L, const auto &R) {
+ return L.second.Gain > R.second.Gain;
+ });
+
+ // Truncate the worklist to 'MaxClonesThreshold' candidates if necessary.
+ if (WorkList.size() > MaxClonesThreshold) {
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Number of candidates exceed "
+ << "the maximum number of clones threshold.\n"
+ << "FnSpecialization: Truncating worklist to "
+ << MaxClonesThreshold << " candidates.\n");
+ WorkList.erase(WorkList.begin() + MaxClonesThreshold, WorkList.end());
}
- return Worklist;
+
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Specializations for function "
+ << F->getName() << "\n";
+ for (const auto &Entry
+ : WorkList) {
+ dbgs() << "FnSpecialization: Gain = " << Entry.second.Gain
+ << "\n";
+ for (const ArgInfo &Arg : Entry.second.Args)
+ dbgs() << "FnSpecialization: FormalArg = "
+ << Arg.Formal->getNameOrAsOperand()
+ << ", ActualArg = "
+ << Arg.Actual->getNameOrAsOperand() << "\n";
+ });
+
+ return !WorkList.empty();
}
- bool isCandidateFunction(Function *F, FuncList &Specializations) {
+ bool isCandidateFunction(Function *F) {
// Do not specialize the cloned function again.
if (SpecializedFuncs.contains(F))
return false;
@@ -461,44 +514,45 @@ private:
return true;
}
- void specializeFunction(ArgInfo &AI, FuncList &Specializations) {
- Function *Clone = cloneCandidateFunction(AI.Fn);
- Argument *ClonedArg = Clone->getArg(AI.Arg->getArgNo());
+ void specializeFunction(Function *F, SpecializationInfo &S,
+ FuncList &WorkList) {
+ ValueToValueMapTy Mappings;
+ Function *Clone = cloneCandidateFunction(F, Mappings);
// Rewrite calls to the function so that they call the clone instead.
- rewriteCallSites(AI.Fn, Clone, *ClonedArg, AI.Const);
+ rewriteCallSites(Clone, S.Args, Mappings);
// Initialize the lattice state of the arguments of the function clone,
// marking the argument on which we specialized the function constant
// with the given value.
- Solver.markArgInFuncSpecialization(AI.Fn, ClonedArg, AI.Const);
+ Solver.markArgInFuncSpecialization(Clone, S.Args);
// Mark all the specialized functions
- Specializations.push_back(Clone);
+ WorkList.push_back(Clone);
NbFunctionsSpecialized++;
// If the function has been completely specialized, the original function
// is no longer needed. Mark it unreachable.
- if (!AI.Partial)
- Solver.markFunctionUnreachable(AI.Fn);
+ if (F->getNumUses() == 0 || all_of(F->users(), [F](User *U) {
+ if (auto *CS = dyn_cast<CallBase>(U))
+ return CS->getFunction() == F;
+ return false;
+ })) {
+ Solver.markFunctionUnreachable(F);
+ FullySpecialized.insert(F);
+ }
}
/// Compute and return the cost of specializing function \p F.
InstructionCost getSpecializationCost(Function *F) {
- // Compute the code metrics for the function.
- SmallPtrSet<const Value *, 32> EphValues;
- CodeMetrics::collectEphemeralValues(F, &(GetAC)(*F), EphValues);
- CodeMetrics Metrics;
- for (BasicBlock &BB : *F)
- Metrics.analyzeBasicBlock(&BB, (GetTTI)(*F), EphValues);
-
+ CodeMetrics &Metrics = analyzeFunction(F);
// If the code metrics reveal that we shouldn't duplicate the function, we
// shouldn't specialize it. Set the specialization cost to Invalid.
// Or if the lines of codes implies that this function is easy to get
// inlined so that we shouldn't specialize it.
- if (Metrics.notDuplicatable ||
+ if (Metrics.notDuplicatable || !Metrics.NumInsts.isValid() ||
(!ForceFunctionSpecialization &&
- Metrics.NumInsts < SmallFunctionThreshold)) {
+ *Metrics.NumInsts.getValue() < SmallFunctionThreshold)) {
InstructionCost C{};
C.setInvalid();
return C;
@@ -539,31 +593,20 @@ private:
DominatorTree DT(*F);
LoopInfo LI(DT);
auto &TTI = (GetTTI)(*F);
- LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for: " << *A
- << "\n");
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: "
+ << C->getNameOrAsOperand() << "\n");
InstructionCost TotalCost = 0;
for (auto *U : A->users()) {
TotalCost += getUserBonus(U, TTI, LI);
- LLVM_DEBUG(dbgs() << "FnSpecialization: User cost ";
+ LLVM_DEBUG(dbgs() << "FnSpecialization: User cost ";
TotalCost.print(dbgs()); dbgs() << " for: " << *U << "\n");
}
// The below heuristic is only concerned with exposing inlining
// opportunities via indirect call promotion. If the argument is not a
- // function pointer, give up.
- if (!isa<PointerType>(A->getType()) ||
- !isa<FunctionType>(A->getType()->getPointerElementType()))
- return TotalCost;
-
- // Since the argument is a function pointer, its incoming constant values
- // should be functions or constant expressions. The code below attempts to
- // look through cast expressions to find the function that will be called.
- Value *CalledValue = C;
- while (isa<ConstantExpr>(CalledValue) &&
- cast<ConstantExpr>(CalledValue)->isCast())
- CalledValue = cast<User>(CalledValue)->getOperand(0);
- Function *CalledFunction = dyn_cast<Function>(CalledValue);
+ // (potentially casted) function pointer, give up.
+ Function *CalledFunction = dyn_cast<Function>(C->stripPointerCasts());
if (!CalledFunction)
return TotalCost;
@@ -603,6 +646,9 @@ private:
Bonus += Params.DefaultThreshold;
else if (IC.isVariable() && IC.getCostDelta() > 0)
Bonus += IC.getCostDelta();
+
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Inlining bonus " << Bonus
+ << " for user " << *U << "\n");
}
return TotalCost + Bonus;
@@ -615,15 +661,12 @@ private:
/// specializing the function based on the incoming values of argument \p A
/// would result in any significant optimization opportunities. If
/// optimization opportunities exist, the constant values of \p A on which to
- /// specialize the function are collected in \p Constants. If the values in
- /// \p Constants represent the complete set of values that \p A can take on,
- /// the function will be completely specialized, and the \p IsPartial flag is
- /// set to false.
+ /// specialize the function are collected in \p Constants.
///
/// \returns true if the function should be specialized on the given
/// argument.
- bool isArgumentInteresting(Argument *A, ConstList &Constants,
- bool &IsPartial) {
+ bool isArgumentInteresting(Argument *A,
+ SmallVectorImpl<CallArgBinding> &Constants) {
// For now, don't attempt to specialize functions based on the values of
// composite types.
if (!A->getType()->isSingleValueType() || A->user_empty())
@@ -632,8 +675,9 @@ private:
// If the argument isn't overdefined, there's nothing to do. It should
// already be constant.
if (!Solver.getLatticeValueFor(A).isOverdefined()) {
- LLVM_DEBUG(dbgs() << "FnSpecialization: nothing to do, arg is already "
- << "constant?\n");
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Nothing to do, argument "
+ << A->getNameOrAsOperand()
+ << " is already constant?\n");
return false;
}
@@ -650,20 +694,26 @@ private:
//
// TODO 2: this currently does not support constants, i.e. integer ranges.
//
- IsPartial = !getPossibleConstants(A, Constants);
- LLVM_DEBUG(dbgs() << "FnSpecialization: interesting arg: " << *A << "\n");
+ getPossibleConstants(A, Constants);
+
+ if (Constants.empty())
+ return false;
+
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Found interesting argument "
+ << A->getNameOrAsOperand() << "\n");
return true;
}
/// Collect in \p Constants all the constant values that argument \p A can
/// take on.
- ///
- /// \returns true if all of the values the argument can take on are constant
- /// (e.g., the argument's parent function cannot be called with an
- /// overdefined value).
- bool getPossibleConstants(Argument *A, ConstList &Constants) {
+ void getPossibleConstants(Argument *A,
+ SmallVectorImpl<CallArgBinding> &Constants) {
Function *F = A->getParent();
- bool AllConstant = true;
+
+ // SCCP solver does not record an argument that will be constructed on
+ // stack.
+ if (A->hasByValAttr() && !F->onlyReadsMemory())
+ return;
// Iterate over all the call sites of the argument's parent function.
for (User *U : F->users()) {
@@ -672,10 +722,8 @@ private:
auto &CS = *cast<CallBase>(U);
// If the call site has attribute minsize set, that callsite won't be
// specialized.
- if (CS.hasFnAttr(Attribute::MinSize)) {
- AllConstant = false;
+ if (CS.hasFnAttr(Attribute::MinSize))
continue;
- }
// If the parent of the call site will never be executed, we don't need
// to worry about the passed value.
@@ -684,13 +732,7 @@ private:
auto *V = CS.getArgOperand(A->getArgNo());
if (isa<PoisonValue>(V))
- return false;
-
- // For now, constant expressions are fine but only if they are function
- // calls.
- if (auto *CE = dyn_cast<ConstantExpr>(V))
- if (!isa<Function>(CE->getOperand(0)))
- return false;
+ return;
// TrackValueOfGlobalVariable only tracks scalar global variables.
if (auto *GV = dyn_cast<GlobalVariable>(V)) {
@@ -698,36 +740,32 @@ private:
// global values.
if (!GV->isConstant())
if (!SpecializeOnAddresses)
- return false;
+ return;
if (!GV->getValueType()->isSingleValueType())
- return false;
+ return;
}
if (isa<Constant>(V) && (Solver.getLatticeValueFor(V).isConstant() ||
EnableSpecializationForLiteralConstant))
- Constants.push_back(cast<Constant>(V));
- else
- AllConstant = false;
+ Constants.push_back({&CS, cast<Constant>(V)});
}
-
- // If the argument can only take on constant values, AllConstant will be
- // true.
- return AllConstant;
}
/// Rewrite calls to function \p F to call function \p Clone instead.
///
- /// This function modifies calls to function \p F whose argument at index \p
- /// ArgNo is equal to constant \p C. The calls are rewritten to call function
- /// \p Clone instead.
+ /// This function modifies calls to function \p F as long as the actual
+ /// arguments match those in \p Args. Note that for recursive calls we
+ /// need to compare against the cloned formal arguments.
///
/// Callsites that have been marked with the MinSize function attribute won't
/// be specialized and rewritten.
- void rewriteCallSites(Function *F, Function *Clone, Argument &Arg,
- Constant *C) {
- unsigned ArgNo = Arg.getArgNo();
- SmallVector<CallBase *, 4> CallSitesToRewrite;
+ void rewriteCallSites(Function *Clone, const SmallVectorImpl<ArgInfo> &Args,
+ ValueToValueMapTy &Mappings) {
+ assert(!Args.empty() && "Specialization without arguments");
+ Function *F = Args[0].Formal->getParent();
+
+ SmallVector<CallBase *, 8> CallSitesToRewrite;
for (auto *U : F->users()) {
if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
continue;
@@ -736,35 +774,50 @@ private:
continue;
CallSitesToRewrite.push_back(&CS);
}
+
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Replacing call sites of "
+ << F->getName() << " with " << Clone->getName() << "\n");
+
for (auto *CS : CallSitesToRewrite) {
- if ((CS->getFunction() == Clone && CS->getArgOperand(ArgNo) == &Arg) ||
- CS->getArgOperand(ArgNo) == C) {
+ LLVM_DEBUG(dbgs() << "FnSpecialization: "
+ << CS->getFunction()->getName() << " ->" << *CS
+ << "\n");
+ if (/* recursive call */
+ (CS->getFunction() == Clone &&
+ all_of(Args,
+ [CS, &Mappings](const ArgInfo &Arg) {
+ unsigned ArgNo = Arg.Formal->getArgNo();
+ return CS->getArgOperand(ArgNo) == Mappings[Arg.Formal];
+ })) ||
+ /* normal call */
+ all_of(Args, [CS](const ArgInfo &Arg) {
+ unsigned ArgNo = Arg.Formal->getArgNo();
+ return CS->getArgOperand(ArgNo) == Arg.Actual;
+ })) {
CS->setCalledFunction(Clone);
Solver.markOverdefined(CS);
}
}
}
- void updateSpecializedFuncs(FuncList &FuncDecls,
- FuncList &CurrentSpecializations) {
- for (auto *SpecializedFunc : CurrentSpecializations) {
- SpecializedFuncs.insert(SpecializedFunc);
+ void updateSpecializedFuncs(FuncList &Candidates, FuncList &WorkList) {
+ for (auto *F : WorkList) {
+ SpecializedFuncs.insert(F);
// Initialize the state of the newly created functions, marking them
// argument-tracked and executable.
- if (SpecializedFunc->hasExactDefinition() &&
- !SpecializedFunc->hasFnAttribute(Attribute::Naked))
- Solver.addTrackedFunction(SpecializedFunc);
+ if (F->hasExactDefinition() && !F->hasFnAttribute(Attribute::Naked))
+ Solver.addTrackedFunction(F);
- Solver.addArgumentTrackedFunction(SpecializedFunc);
- FuncDecls.push_back(SpecializedFunc);
- Solver.markBlockExecutable(&SpecializedFunc->front());
+ Solver.addArgumentTrackedFunction(F);
+ Candidates.push_back(F);
+ Solver.markBlockExecutable(&F->front());
// Replace the function arguments for the specialized functions.
- for (Argument &Arg : SpecializedFunc->args())
+ for (Argument &Arg : F->args())
if (!Arg.use_empty() && tryToReplaceWithConstant(&Arg))
LLVM_DEBUG(dbgs() << "FnSpecialization: Replaced constant argument: "
- << Arg.getName() << "\n");
+ << Arg.getNameOrAsOperand() << "\n");
}
}
};
@@ -871,22 +924,26 @@ bool llvm::runFunctionSpecialization(
// Initially resolve the constants in all the argument tracked functions.
RunSCCPSolver(FuncDecls);
- SmallVector<Function *, 2> CurrentSpecializations;
+ SmallVector<Function *, 8> WorkList;
unsigned I = 0;
while (FuncSpecializationMaxIters != I++ &&
- FS.specializeFunctions(FuncDecls, CurrentSpecializations)) {
+ FS.specializeFunctions(FuncDecls, WorkList)) {
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Finished iteration " << I << "\n");
// Run the solver for the specialized functions.
- RunSCCPSolver(CurrentSpecializations);
+ RunSCCPSolver(WorkList);
// Replace some unresolved constant arguments.
constantArgPropagation(FuncDecls, M, Solver);
- CurrentSpecializations.clear();
+ WorkList.clear();
Changed = true;
}
- // Clean up the IR by removing ssa_copy intrinsics.
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Number of specializations = "
+ << NumFuncSpecialized << "\n");
+
+ // Remove any ssa_copy intrinsics that may have been introduced.
removeSSACopy(M);
return Changed;
}
diff --git a/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/llvm/lib/Transforms/IPO/GlobalDCE.cpp
index 5e5d2086adc2..f35827220bb6 100644
--- a/llvm/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalDCE.cpp
@@ -21,7 +21,6 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/Operator.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
@@ -34,7 +33,7 @@ using namespace llvm;
#define DEBUG_TYPE "globaldce"
static cl::opt<bool>
- ClEnableVFE("enable-vfe", cl::Hidden, cl::init(true), cl::ZeroOrMore,
+ ClEnableVFE("enable-vfe", cl::Hidden, cl::init(true),
cl::desc("Enable virtual function elimination"));
STATISTIC(NumAliases , "Number of global aliases removed");
@@ -86,6 +85,9 @@ ModulePass *llvm::createGlobalDCEPass() {
/// Returns true if F is effectively empty.
static bool isEmptyFunction(Function *F) {
+ // Skip external functions.
+ if (F->isDeclaration())
+ return false;
BasicBlock &Entry = F->getEntryBlock();
for (auto &I : Entry) {
if (I.isDebugOrPseudoInst())
@@ -214,14 +216,14 @@ void GlobalDCEPass::ScanVTableLoad(Function *Caller, Metadata *TypeId,
if (!Ptr) {
LLVM_DEBUG(dbgs() << "can't find pointer in vtable!\n");
VFESafeVTables.erase(VTable);
- return;
+ continue;
}
auto Callee = dyn_cast<Function>(Ptr->stripPointerCasts());
if (!Callee) {
LLVM_DEBUG(dbgs() << "vtable entry is not function pointer!\n");
VFESafeVTables.erase(VTable);
- return;
+ continue;
}
LLVM_DEBUG(dbgs() << "vfunc dep " << Caller->getName() << " -> "
@@ -298,7 +300,8 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) {
// marked as alive are discarded.
// Remove empty functions from the global ctors list.
- Changed |= optimizeGlobalCtorsList(M, isEmptyFunction);
+ Changed |= optimizeGlobalCtorsList(
+ M, [](uint32_t, Function *F) { return isEmptyFunction(F); });
// Collect the set of members for each comdat.
for (Function &F : M)
@@ -317,7 +320,7 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) {
// Loop over the module, adding globals which are obviously necessary.
for (GlobalObject &GO : M.global_objects()) {
- Changed |= RemoveUnusedGlobalValue(GO);
+ GO.removeDeadConstantUsers();
// Functions with external linkage are needed if they have a body.
// Externally visible & appending globals are needed, if they have an
// initializer.
@@ -330,7 +333,7 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) {
// Compute direct dependencies of aliases.
for (GlobalAlias &GA : M.aliases()) {
- Changed |= RemoveUnusedGlobalValue(GA);
+ GA.removeDeadConstantUsers();
// Externally visible aliases are needed.
if (!GA.isDiscardableIfUnused())
MarkLive(GA);
@@ -340,7 +343,7 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) {
// Compute direct dependencies of ifuncs.
for (GlobalIFunc &GIF : M.ifuncs()) {
- Changed |= RemoveUnusedGlobalValue(GIF);
+ GIF.removeDeadConstantUsers();
// Externally visible ifuncs are needed.
if (!GIF.isDiscardableIfUnused())
MarkLive(GIF);
@@ -403,7 +406,7 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) {
// Now that all interferences have been dropped, delete the actual objects
// themselves.
auto EraseUnusedGlobalValue = [&](GlobalValue *GV) {
- RemoveUnusedGlobalValue(*GV);
+ GV->removeDeadConstantUsers();
GV->eraseFromParent();
Changed = true;
};
@@ -455,16 +458,3 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) {
return PreservedAnalyses::none();
return PreservedAnalyses::all();
}
-
-// RemoveUnusedGlobalValue - Loop over all of the uses of the specified
-// GlobalValue, looking for the constant pointer ref that may be pointing to it.
-// If found, check to see if the constant pointer ref is safe to destroy, and if
-// so, nuke it. This will reduce the reference count on the global value, which
-// might make it deader.
-//
-bool GlobalDCEPass::RemoveUnusedGlobalValue(GlobalValue &GV) {
- if (GV.use_empty())
- return false;
- GV.removeDeadConstantUsers();
- return GV.use_empty();
-}
diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 1cb32e32c895..1a1bde4f0668 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Twine.h"
#include "llvm/ADT/iterator_range.h"
@@ -37,7 +38,6 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
@@ -60,7 +60,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/CtorUtils.h"
@@ -100,7 +99,7 @@ static cl::opt<bool>
cl::init(false), cl::Hidden);
static cl::opt<int> ColdCCRelFreq(
- "coldcc-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore,
+ "coldcc-rel-freq", cl::Hidden, cl::init(2),
cl::desc(
"Maximum block frequency, expressed as a percentage of caller's "
"entry frequency, for a call site to be considered cold for enabling"
@@ -232,7 +231,7 @@ CleanupPointerRootUsers(GlobalVariable *GV,
if (MemSrc && MemSrc->isConstant()) {
Changed = true;
MTI->eraseFromParent();
- } else if (Instruction *I = dyn_cast<Instruction>(MemSrc)) {
+ } else if (Instruction *I = dyn_cast<Instruction>(MTI->getSource())) {
if (I->hasOneUse())
Dead.push_back(std::make_pair(I, MTI));
}
@@ -405,9 +404,37 @@ static void transferSRADebugInfo(GlobalVariable *GV, GlobalVariable *NGV,
for (auto *GVE : GVs) {
DIVariable *Var = GVE->getVariable();
DIExpression *Expr = GVE->getExpression();
+ int64_t CurVarOffsetInBytes = 0;
+ uint64_t CurVarOffsetInBits = 0;
+
+ // Calculate the offset (Bytes), Continue if unknown.
+ if (!Expr->extractIfOffset(CurVarOffsetInBytes))
+ continue;
+
+ // Ignore negative offset.
+ if (CurVarOffsetInBytes < 0)
+ continue;
+
+ // Convert offset to bits.
+ CurVarOffsetInBits = CHAR_BIT * (uint64_t)CurVarOffsetInBytes;
+
+ // Current var starts after the fragment, ignore.
+ if (CurVarOffsetInBits >= (FragmentOffsetInBits + FragmentSizeInBits))
+ continue;
+
+ uint64_t CurVarSize = Var->getType()->getSizeInBits();
+ // Current variable ends before start of fragment, ignore.
+ if (CurVarSize != 0 &&
+ (CurVarOffsetInBits + CurVarSize) <= FragmentOffsetInBits)
+ continue;
+
+ // Current variable fits in the fragment.
+ if (CurVarOffsetInBits == FragmentOffsetInBits &&
+ CurVarSize == FragmentSizeInBits)
+ Expr = DIExpression::get(Expr->getContext(), {});
// If the FragmentSize is smaller than the variable,
// emit a fragment expression.
- if (FragmentSizeInBits < VarSize) {
+ else if (FragmentSizeInBits < VarSize) {
if (auto E = DIExpression::createFragmentExpression(
Expr, FragmentOffsetInBits, FragmentSizeInBits))
Expr = *E;
@@ -581,17 +608,14 @@ static bool AllUsesOfValueWillTrapIfNull(const Value *V,
// Will trap.
} else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
if (SI->getOperand(0) == V) {
- //cerr << "NONTRAPPING USE: " << *U;
return false; // Storing the value.
}
} else if (const CallInst *CI = dyn_cast<CallInst>(U)) {
if (CI->getCalledOperand() != V) {
- //cerr << "NONTRAPPING USE: " << *U;
return false; // Not calling the ptr
}
} else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) {
if (II->getCalledOperand() != V) {
- //cerr << "NONTRAPPING USE: " << *U;
return false; // Not calling the ptr
}
} else if (const BitCastInst *CI = dyn_cast<BitCastInst>(U)) {
@@ -615,7 +639,6 @@ static bool AllUsesOfValueWillTrapIfNull(const Value *V,
// the comparing of the value of the created global init bool later in
// optimizeGlobalAddressOfAllocation for the global variable.
} else {
- //cerr << "NONTRAPPING USE: " << *U;
return false;
}
}
@@ -878,7 +901,7 @@ OptimizeGlobalAddressOfAllocation(GlobalVariable *GV, CallInst *CI,
}
}
- SmallPtrSet<Constant *, 1> RepValues;
+ SmallSetVector<Constant *, 1> RepValues;
RepValues.insert(NewGV);
// If there is a comparison against null, we will insert a global bool to
@@ -1015,7 +1038,6 @@ valueIsOnlyUsedLocallyOrStoredToOneGlobal(const CallInst *CI,
/// accessing the data, and exposes the resultant global to further GlobalOpt.
static bool tryToOptimizeStoreOfAllocationToGlobal(GlobalVariable *GV,
CallInst *CI,
- AtomicOrdering Ordering,
const DataLayout &DL,
TargetLibraryInfo *TLI) {
if (!isAllocRemovable(CI, TLI))
@@ -1062,7 +1084,7 @@ static bool tryToOptimizeStoreOfAllocationToGlobal(GlobalVariable *GV,
// its initializer) is ever stored to the global.
static bool
optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
- AtomicOrdering Ordering, const DataLayout &DL,
+ const DataLayout &DL,
function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
// Ignore no-op GEPs and bitcasts.
StoredOnceVal = StoredOnceVal->stripPointerCasts();
@@ -1087,7 +1109,7 @@ optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
} else if (isAllocationFn(StoredOnceVal, GetTLI)) {
if (auto *CI = dyn_cast<CallInst>(StoredOnceVal)) {
auto *TLI = &GetTLI(*CI->getFunction());
- if (tryToOptimizeStoreOfAllocationToGlobal(GV, CI, Ordering, DL, TLI))
+ if (tryToOptimizeStoreOfAllocationToGlobal(GV, CI, DL, TLI))
return true;
}
}
@@ -1257,8 +1279,10 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
return true;
}
-static bool deleteIfDead(
- GlobalValue &GV, SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
+static bool
+deleteIfDead(GlobalValue &GV,
+ SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats,
+ function_ref<void(Function &)> DeleteFnCallback = nullptr) {
GV.removeDeadConstantUsers();
if (!GV.isDiscardableIfUnused() && !GV.isDeclaration())
@@ -1277,6 +1301,10 @@ static bool deleteIfDead(
return false;
LLVM_DEBUG(dbgs() << "GLOBAL DEAD: " << GV << "\n");
+ if (auto *F = dyn_cast<Function>(&GV)) {
+ if (DeleteFnCallback)
+ DeleteFnCallback(*F);
+ }
GV.eraseFromParent();
++NumDeleted;
return true;
@@ -1416,6 +1444,42 @@ static void makeAllConstantUsesInstructions(Constant *C) {
}
}
+// For a global variable with one store, if the store dominates any loads,
+// those loads will always load the stored value (as opposed to the
+// initializer), even in the presence of recursion.
+static bool forwardStoredOnceStore(
+ GlobalVariable *GV, const StoreInst *StoredOnceStore,
+ function_ref<DominatorTree &(Function &)> LookupDomTree) {
+ const Value *StoredOnceValue = StoredOnceStore->getValueOperand();
+ // We can do this optimization for non-constants in nosync + norecurse
+ // functions, but globals used in exactly one norecurse functions are already
+ // promoted to an alloca.
+ if (!isa<Constant>(StoredOnceValue))
+ return false;
+ const Function *F = StoredOnceStore->getFunction();
+ SmallVector<LoadInst *> Loads;
+ for (User *U : GV->users()) {
+ if (auto *LI = dyn_cast<LoadInst>(U)) {
+ if (LI->getFunction() == F &&
+ LI->getType() == StoredOnceValue->getType() && LI->isSimple())
+ Loads.push_back(LI);
+ }
+ }
+ // Only compute DT if we have any loads to examine.
+ bool MadeChange = false;
+ if (!Loads.empty()) {
+ auto &DT = LookupDomTree(*const_cast<Function *>(F));
+ for (auto *LI : Loads) {
+ if (DT.dominates(StoredOnceStore, LI)) {
+ LI->replaceAllUsesWith(const_cast<Value *>(StoredOnceValue));
+ LI->eraseFromParent();
+ MadeChange = true;
+ }
+ }
+ }
+ return MadeChange;
+}
+
/// Analyze the specified global variable and optimize
/// it if possible. If we make a change, return true.
static bool
@@ -1572,9 +1636,15 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
// Try to optimize globals based on the knowledge that only one value
// (besides its initializer) is ever stored to the global.
- if (optimizeOnceStoredGlobal(GV, StoredOnceValue, GS.Ordering, DL, GetTLI))
+ if (optimizeOnceStoredGlobal(GV, StoredOnceValue, DL, GetTLI))
return true;
+ // Try to forward the store to any loads. If we have more than one store, we
+ // may have a store of the initializer between StoredOnceStore and a load.
+ if (GS.NumStores == 1)
+ if (forwardStoredOnceStore(GV, GS.StoredOnceStore, LookupDomTree))
+ return true;
+
// Otherwise, if the global was not a boolean, we can shrink it to be a
// boolean. Skip this optimization for AS that doesn't allow an initializer.
if (SOVConstant && GS.Ordering == AtomicOrdering::NotAtomic &&
@@ -1755,7 +1825,7 @@ hasOnlyColdCalls(Function &F,
return false;
if (!CalledFn->hasLocalLinkage())
return false;
- // Skip over instrinsics since they won't remain as function calls.
+ // Skip over intrinsics since they won't remain as function calls.
if (CalledFn->getIntrinsicID() != Intrinsic::not_intrinsic)
continue;
// Check if it's valid to use coldcc calling convention.
@@ -1884,7 +1954,9 @@ OptimizeFunctions(Module &M,
function_ref<TargetTransformInfo &(Function &)> GetTTI,
function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
function_ref<DominatorTree &(Function &)> LookupDomTree,
- SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
+ SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats,
+ function_ref<void(Function &F)> ChangedCFGCallback,
+ function_ref<void(Function &F)> DeleteFnCallback) {
bool Changed = false;
@@ -1904,7 +1976,7 @@ OptimizeFunctions(Module &M,
if (!F.hasName() && !F.isDeclaration() && !F.hasLocalLinkage())
F.setLinkage(GlobalValue::InternalLinkage);
- if (deleteIfDead(F, NotDiscardableComdats)) {
+ if (deleteIfDead(F, NotDiscardableComdats, DeleteFnCallback)) {
Changed = true;
continue;
}
@@ -1917,13 +1989,11 @@ OptimizeFunctions(Module &M,
// So, remove unreachable blocks from the function, because a) there's
// no point in analyzing them and b) GlobalOpt should otherwise grow
// some more complicated logic to break these cycles.
- // Removing unreachable blocks might invalidate the dominator so we
- // recalculate it.
+ // Notify the analysis manager that we've modified the function's CFG.
if (!F.isDeclaration()) {
if (removeUnreachableBlocks(F)) {
- auto &DT = LookupDomTree(F);
- DT.recalculate(F);
Changed = true;
+ ChangedCFGCallback(F);
}
}
@@ -2031,6 +2101,9 @@ OptimizeGlobalVars(Module &M,
/// can, false otherwise.
static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL,
TargetLibraryInfo *TLI) {
+ // Skip external functions.
+ if (F->isDeclaration())
+ return false;
// Call the function.
Evaluator Eval(DL, TLI);
Constant *RetValDummy;
@@ -2383,15 +2456,19 @@ static bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
return Changed;
}
-static bool optimizeGlobalsInModule(
- Module &M, const DataLayout &DL,
- function_ref<TargetLibraryInfo &(Function &)> GetTLI,
- function_ref<TargetTransformInfo &(Function &)> GetTTI,
- function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
- function_ref<DominatorTree &(Function &)> LookupDomTree) {
+static bool
+optimizeGlobalsInModule(Module &M, const DataLayout &DL,
+ function_ref<TargetLibraryInfo &(Function &)> GetTLI,
+ function_ref<TargetTransformInfo &(Function &)> GetTTI,
+ function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
+ function_ref<DominatorTree &(Function &)> LookupDomTree,
+ function_ref<void(Function &F)> ChangedCFGCallback,
+ function_ref<void(Function &F)> DeleteFnCallback) {
SmallPtrSet<const Comdat *, 8> NotDiscardableComdats;
bool Changed = false;
bool LocalChange = true;
+ Optional<uint32_t> FirstNotFullyEvaluatedPriority;
+
while (LocalChange) {
LocalChange = false;
@@ -2411,12 +2488,20 @@ static bool optimizeGlobalsInModule(
// Delete functions that are trivially dead, ccc -> fastcc
LocalChange |= OptimizeFunctions(M, GetTLI, GetTTI, GetBFI, LookupDomTree,
- NotDiscardableComdats);
+ NotDiscardableComdats, ChangedCFGCallback,
+ DeleteFnCallback);
// Optimize global_ctors list.
- LocalChange |= optimizeGlobalCtorsList(M, [&](Function *F) {
- return EvaluateStaticConstructor(F, DL, &GetTLI(*F));
- });
+ LocalChange |=
+ optimizeGlobalCtorsList(M, [&](uint32_t Priority, Function *F) {
+ if (FirstNotFullyEvaluatedPriority &&
+ *FirstNotFullyEvaluatedPriority != Priority)
+ return false;
+ bool Evaluated = EvaluateStaticConstructor(F, DL, &GetTLI(*F));
+ if (!Evaluated)
+ FirstNotFullyEvaluatedPriority = Priority;
+ return Evaluated;
+ });
// Optimize non-address-taken globals.
LocalChange |= OptimizeGlobalVars(M, GetTTI, GetTLI, LookupDomTree,
@@ -2457,10 +2542,23 @@ PreservedAnalyses GlobalOptPass::run(Module &M, ModuleAnalysisManager &AM) {
auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & {
return FAM.getResult<BlockFrequencyAnalysis>(F);
};
+ auto ChangedCFGCallback = [&FAM](Function &F) {
+ FAM.invalidate(F, PreservedAnalyses::none());
+ };
+ auto DeleteFnCallback = [&FAM](Function &F) { FAM.clear(F, F.getName()); };
- if (!optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, LookupDomTree))
+ if (!optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, LookupDomTree,
+ ChangedCFGCallback, DeleteFnCallback))
return PreservedAnalyses::all();
- return PreservedAnalyses::none();
+
+ PreservedAnalyses PA = PreservedAnalyses::none();
+ // We made sure to clear analyses for deleted functions.
+ PA.preserve<FunctionAnalysisManagerModuleProxy>();
+ // The only place we modify the CFG is when calling
+ // removeUnreachableBlocks(), but there we make sure to invalidate analyses
+ // for modified functions.
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
}
namespace {
@@ -2491,8 +2589,13 @@ struct GlobalOptLegacyPass : public ModulePass {
return this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
};
- return optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI,
- LookupDomTree);
+ auto ChangedCFGCallback = [&LookupDomTree](Function &F) {
+ auto &DT = LookupDomTree(F);
+ DT.recalculate(F);
+ };
+
+ return optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, LookupDomTree,
+ ChangedCFGCallback, nullptr);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
diff --git a/llvm/lib/Transforms/IPO/GlobalSplit.cpp b/llvm/lib/Transforms/IPO/GlobalSplit.cpp
index e7d698c42fcf..7d9e6135b2eb 100644
--- a/llvm/lib/Transforms/IPO/GlobalSplit.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalSplit.cpp
@@ -134,9 +134,9 @@ static bool splitGlobal(GlobalVariable &GV) {
}
// Finally, remove the original global. Any remaining uses refer to invalid
- // elements of the global, so replace with undef.
+ // elements of the global, so replace with poison.
if (!GV.use_empty())
- GV.replaceAllUsesWith(UndefValue::get(GV.getType()));
+ GV.replaceAllUsesWith(PoisonValue::get(GV.getType()));
GV.eraseFromParent();
return true;
}
diff --git a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
index a964fcde0396..95e8ae0fd22f 100644
--- a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
+++ b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
@@ -29,46 +29,33 @@
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
-#include "llvm/Analysis/BranchProbabilityInfo.h"
-#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
-#include "llvm/Support/BlockFrequency.h"
-#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/CodeExtractor.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
-#include <limits>
#include <cassert>
+#include <limits>
#include <string>
#define DEBUG_TYPE "hotcoldsplit"
@@ -126,7 +113,8 @@ bool unlikelyExecuted(BasicBlock &BB) {
// mark sanitizer traps as cold.
for (Instruction &I : BB)
if (auto *CB = dyn_cast<CallBase>(&I))
- if (CB->hasFnAttr(Attribute::Cold) && !CB->getMetadata("nosanitize"))
+ if (CB->hasFnAttr(Attribute::Cold) &&
+ !CB->getMetadata(LLVMContext::MD_nosanitize))
return true;
// The block is cold if it has an unreachable terminator, unless it's
@@ -352,7 +340,7 @@ Function *HotColdSplitting::extractColdRegion(
// TODO: Pass BFI and BPI to update profile information.
CodeExtractor CE(Region, &DT, /* AggregateArgs */ false, /* BFI */ nullptr,
/* BPI */ nullptr, AC, /* AllowVarArgs */ false,
- /* AllowAlloca */ false,
+ /* AllowAlloca */ false, /* AllocaBlock */ nullptr,
/* Suffix */ "cold." + std::to_string(Count));
// Perform a simple cost/benefit analysis to decide whether or not to permit
@@ -740,7 +728,7 @@ bool HotColdSplittingLegacyPass::runOnModule(Module &M) {
std::function<OptimizationRemarkEmitter &(Function &)> GetORE =
[&ORE](Function &F) -> OptimizationRemarkEmitter & {
ORE.reset(new OptimizationRemarkEmitter(&F));
- return *ORE.get();
+ return *ORE;
};
auto LookupAC = [this](Function &F) -> AssumptionCache * {
if (auto *ACT = getAnalysisIfAvailable<AssumptionCacheTracker>())
@@ -772,7 +760,7 @@ HotColdSplittingPass::run(Module &M, ModuleAnalysisManager &AM) {
std::function<OptimizationRemarkEmitter &(Function &)> GetORE =
[&ORE](Function &F) -> OptimizationRemarkEmitter & {
ORE.reset(new OptimizationRemarkEmitter(&F));
- return *ORE.get();
+ return *ORE;
};
ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
diff --git a/llvm/lib/Transforms/IPO/IPO.cpp b/llvm/lib/Transforms/IPO/IPO.cpp
index de1c1d379502..ec2b80012ed6 100644
--- a/llvm/lib/Transforms/IPO/IPO.cpp
+++ b/llvm/lib/Transforms/IPO/IPO.cpp
@@ -24,7 +24,6 @@ using namespace llvm;
void llvm::initializeIPO(PassRegistry &Registry) {
initializeOpenMPOptCGSCCLegacyPassPass(Registry);
- initializeArgPromotionPass(Registry);
initializeAnnotation2MetadataLegacyPass(Registry);
initializeCalledValuePropagationLegacyPassPass(Registry);
initializeConstantMergeLegacyPassPass(Registry);
@@ -70,10 +69,6 @@ void LLVMInitializeIPO(LLVMPassRegistryRef R) {
initializeIPO(*unwrap(R));
}
-void LLVMAddArgumentPromotionPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createArgumentPromotionPass());
-}
-
void LLVMAddCalledValuePropagationPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createCalledValuePropagationPass());
}
diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp
index faf7cb7d566a..d75d99e307fd 100644
--- a/llvm/lib/Transforms/IPO/IROutliner.cpp
+++ b/llvm/lib/Transforms/IPO/IROutliner.cpp
@@ -16,8 +16,9 @@
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Attributes.h"
-#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Mangler.h"
#include "llvm/IR/PassManager.h"
@@ -25,8 +26,6 @@
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/IPO.h"
-#include <map>
-#include <set>
#include <vector>
#define DEBUG_TYPE "iroutliner"
@@ -183,11 +182,24 @@ static void getSortedConstantKeys(std::vector<Value *> &SortedKeys,
Value *OutlinableRegion::findCorrespondingValueIn(const OutlinableRegion &Other,
Value *V) {
Optional<unsigned> GVN = Candidate->getGVN(V);
- assert(GVN.hasValue() && "No GVN for incoming value");
+ assert(GVN && "No GVN for incoming value");
Optional<unsigned> CanonNum = Candidate->getCanonicalNum(*GVN);
Optional<unsigned> FirstGVN = Other.Candidate->fromCanonicalNum(*CanonNum);
Optional<Value *> FoundValueOpt = Other.Candidate->fromGVN(*FirstGVN);
- return FoundValueOpt.getValueOr(nullptr);
+ return FoundValueOpt.value_or(nullptr);
+}
+
+BasicBlock *
+OutlinableRegion::findCorrespondingBlockIn(const OutlinableRegion &Other,
+ BasicBlock *BB) {
+ Instruction *FirstNonPHI = BB->getFirstNonPHI();
+ assert(FirstNonPHI && "block is empty?");
+ Value *CorrespondingVal = findCorrespondingValueIn(Other, FirstNonPHI);
+ if (!CorrespondingVal)
+ return nullptr;
+ BasicBlock *CorrespondingBlock =
+ cast<Instruction>(CorrespondingVal)->getParent();
+ return CorrespondingBlock;
}
/// Rewrite the BranchInsts in the incoming blocks to \p PHIBlock that are found
@@ -264,13 +276,33 @@ void OutlinableRegion::splitCandidate() {
// We iterate over the instructions in the region, if we find a PHINode, we
// check if there are predecessors outside of the region, if there are,
// we ignore this region since we are unable to handle the severing of the
- // phi node right now.
+ // phi node right now.
+
+ // TODO: Handle extraneous inputs for PHINodes through variable number of
+ // inputs, similar to how outputs are handled.
BasicBlock::iterator It = StartInst->getIterator();
+ EndBB = BackInst->getParent();
+ BasicBlock *IBlock;
+ BasicBlock *PHIPredBlock = nullptr;
+ bool EndBBTermAndBackInstDifferent = EndBB->getTerminator() != BackInst;
while (PHINode *PN = dyn_cast<PHINode>(&*It)) {
unsigned NumPredsOutsideRegion = 0;
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (!BBSet.contains(PN->getIncomingBlock(i)))
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ if (!BBSet.contains(PN->getIncomingBlock(i))) {
+ PHIPredBlock = PN->getIncomingBlock(i);
+ ++NumPredsOutsideRegion;
+ continue;
+ }
+
+ // We must consider the case there the incoming block to the PHINode is
+ // the same as the final block of the OutlinableRegion. If this is the
+ // case, the branch from this block must also be outlined to be valid.
+ IBlock = PN->getIncomingBlock(i);
+ if (IBlock == EndBB && EndBBTermAndBackInstDifferent) {
+ PHIPredBlock = PN->getIncomingBlock(i);
++NumPredsOutsideRegion;
+ }
+ }
if (NumPredsOutsideRegion > 1)
return;
@@ -285,11 +317,9 @@ void OutlinableRegion::splitCandidate() {
// If the region ends with a PHINode, but does not contain all of the phi node
// instructions of the region, we ignore it for now.
- if (isa<PHINode>(BackInst)) {
- EndBB = BackInst->getParent();
- if (BackInst != &*std::prev(EndBB->getFirstInsertionPt()))
- return;
- }
+ if (isa<PHINode>(BackInst) &&
+ BackInst != &*std::prev(EndBB->getFirstInsertionPt()))
+ return;
// The basic block gets split like so:
// block: block:
@@ -310,6 +340,10 @@ void OutlinableRegion::splitCandidate() {
StartBB = PrevBB->splitBasicBlock(StartInst, OriginalName + "_to_outline");
PrevBB->replaceSuccessorsPhiUsesWith(PrevBB, StartBB);
+ // If there was a PHINode with an incoming block outside the region,
+ // make sure is correctly updated in the newly split block.
+ if (PHIPredBlock)
+ PrevBB->replaceSuccessorsPhiUsesWith(PHIPredBlock, PrevBB);
CandidateSplit = true;
if (!BackInst->isTerminator()) {
@@ -353,6 +387,25 @@ void OutlinableRegion::reattachCandidate() {
assert(StartBB != nullptr && "StartBB for Candidate is not defined!");
assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!");
+ // Make sure PHINode references to the block we are merging into are
+ // updated to be incoming blocks from the predecessor to the current block.
+
+ // NOTE: If this is updated such that the outlined block can have more than
+ // one incoming block to a PHINode, this logic will have to updated
+ // to handle multiple precessors instead.
+
+ // We only need to update this if the outlined section contains a PHINode, if
+ // it does not, then the incoming block was never changed in the first place.
+ // On the other hand, if PrevBB has no predecessors, it means that all
+ // incoming blocks to the first block are contained in the region, and there
+ // will be nothing to update.
+ Instruction *StartInst = (*Candidate->begin()).Inst;
+ if (isa<PHINode>(StartInst) && !PrevBB->hasNPredecessors(0)) {
+ assert(!PrevBB->hasNPredecessorsOrMore(2) &&
+ "PrevBB has more than one predecessor. Should be 0 or 1.");
+ BasicBlock *BeforePrevBB = PrevBB->getSinglePredecessor();
+ PrevBB->replaceSuccessorsPhiUsesWith(PrevBB, BeforePrevBB);
+ }
PrevBB->getTerminator()->eraseFromParent();
// If we reattaching after outlining, we iterate over the phi nodes to
@@ -501,7 +554,7 @@ collectRegionsConstants(OutlinableRegion &Region,
// the the number has been found to be not the same value in each instance.
for (Value *V : ID.OperVals) {
Optional<unsigned> GVNOpt = C.getGVN(V);
- assert(GVNOpt.hasValue() && "Expected a GVN for operand?");
+ assert(GVNOpt && "Expected a GVN for operand?");
unsigned GVN = GVNOpt.getValue();
// Check if this global value has been found to not be the same already.
@@ -516,7 +569,7 @@ collectRegionsConstants(OutlinableRegion &Region,
// global value number. If the global value does not map to a Constant,
// it is considered to not be the same value.
Optional<bool> ConstantMatches = constantMatches(V, GVN, GVNToConstant);
- if (ConstantMatches.hasValue()) {
+ if (ConstantMatches) {
if (ConstantMatches.getValue())
continue;
else
@@ -597,7 +650,7 @@ Function *IROutliner::createFunction(Module &M, OutlinableGroup &Group,
"outlined_ir_func_" + std::to_string(FunctionNameSuffix), M);
// Transfer the swifterr attribute to the correct function parameter.
- if (Group.SwiftErrorArgument.hasValue())
+ if (Group.SwiftErrorArgument)
Group.OutlinedFunction->addParamAttr(Group.SwiftErrorArgument.getValue(),
Attribute::SwiftError);
@@ -666,6 +719,18 @@ static void moveFunctionData(Function &Old, Function &New,
if (!isa<CallInst>(&Val)) {
// Remove the debug information for outlined functions.
Val.setDebugLoc(DebugLoc());
+
+ // Loop info metadata may contain line locations. Update them to have no
+ // value in the new subprogram since the outlined code could be from
+ // several locations.
+ auto updateLoopInfoLoc = [&New](Metadata *MD) -> Metadata * {
+ if (DISubprogram *SP = New.getSubprogram())
+ if (auto *Loc = dyn_cast_or_null<DILocation>(MD))
+ return DILocation::get(New.getContext(), Loc->getLine(),
+ Loc->getColumn(), SP, nullptr);
+ return MD;
+ };
+ updateLoopMetadataDebugLocations(Val, updateLoopInfoLoc);
continue;
}
@@ -691,8 +756,6 @@ static void moveFunctionData(Function &Old, Function &New,
for (Instruction *I : DebugInsts)
I->eraseFromParent();
}
-
- assert(NewEnds.size() > 0 && "No return instruction for new function?");
}
/// Find the the constants that will need to be lifted into arguments
@@ -714,7 +777,7 @@ static void findConstants(IRSimilarityCandidate &C, DenseSet<unsigned> &NotSame,
for (Value *V : (*IDIt).OperVals) {
// Since these are stored before any outlining, they will be in the
// global value numbering.
- unsigned GVN = C.getGVN(V).getValue();
+ unsigned GVN = *C.getGVN(V);
if (isa<Constant>(V))
if (NotSame.contains(GVN) && !Seen.contains(GVN)) {
Inputs.push_back(GVN);
@@ -745,8 +808,7 @@ static void mapInputsToGVNs(IRSimilarityCandidate &C,
assert(Input && "Have a nullptr as an input");
if (OutputMappings.find(Input) != OutputMappings.end())
Input = OutputMappings.find(Input)->second;
- assert(C.getGVN(Input).hasValue() &&
- "Could not find a numbering for the given input");
+ assert(C.getGVN(Input) && "Could not find a numbering for the given input");
EndInputNumbers.push_back(C.getGVN(Input).getValue());
}
}
@@ -885,11 +947,11 @@ findExtractedInputToOverallInputMapping(OutlinableRegion &Region,
// numbering overrides any discovered location for the extracted code.
for (unsigned InputVal : InputGVNs) {
Optional<unsigned> CanonicalNumberOpt = C.getCanonicalNum(InputVal);
- assert(CanonicalNumberOpt.hasValue() && "Canonical number not found?");
+ assert(CanonicalNumberOpt && "Canonical number not found?");
unsigned CanonicalNumber = CanonicalNumberOpt.getValue();
Optional<Value *> InputOpt = C.fromGVN(InputVal);
- assert(InputOpt.hasValue() && "Global value number not found?");
+ assert(InputOpt && "Global value number not found?");
Value *Input = InputOpt.getValue();
DenseMap<unsigned, unsigned>::iterator AggArgIt =
@@ -901,7 +963,7 @@ findExtractedInputToOverallInputMapping(OutlinableRegion &Region,
// argument in the overall function.
if (Input->isSwiftError()) {
assert(
- !Group.SwiftErrorArgument.hasValue() &&
+ !Group.SwiftErrorArgument &&
"Argument already marked with swifterr for this OutlinableGroup!");
Group.SwiftErrorArgument = TypeIndex;
}
@@ -969,12 +1031,11 @@ static bool outputHasNonPHI(Value *V, unsigned PHILoc, PHINode &PN,
// We check to see if the value is used by the PHINode from some other
// predecessor not included in the region. If it is, we make sure
// to keep it as an output.
- SmallVector<unsigned, 2> IncomingNumbers(PN.getNumIncomingValues());
- std::iota(IncomingNumbers.begin(), IncomingNumbers.end(), 0);
- if (any_of(IncomingNumbers, [PHILoc, &PN, V, &BlocksInRegion](unsigned Idx) {
- return (Idx != PHILoc && V == PN.getIncomingValue(Idx) &&
- !BlocksInRegion.contains(PN.getIncomingBlock(Idx)));
- }))
+ if (any_of(llvm::seq<unsigned>(0, PN.getNumIncomingValues()),
+ [PHILoc, &PN, V, &BlocksInRegion](unsigned Idx) {
+ return (Idx != PHILoc && V == PN.getIncomingValue(Idx) &&
+ !BlocksInRegion.contains(PN.getIncomingBlock(Idx)));
+ }))
return true;
// Check if the value is used by any other instructions outside the region.
@@ -1098,30 +1159,72 @@ static hash_code encodePHINodeData(PHINodeData &PND) {
///
/// \param Region - The region that \p PN is an output for.
/// \param PN - The PHINode we are analyzing.
+/// \param Blocks - The blocks for the region we are analyzing.
/// \param AggArgIdx - The argument \p PN will be stored into.
/// \returns An optional holding the assigned canonical number, or None if
/// there is some attribute of the PHINode blocking it from being used.
static Optional<unsigned> getGVNForPHINode(OutlinableRegion &Region,
- PHINode *PN, unsigned AggArgIdx) {
+ PHINode *PN,
+ DenseSet<BasicBlock *> &Blocks,
+ unsigned AggArgIdx) {
OutlinableGroup &Group = *Region.Parent;
IRSimilarityCandidate &Cand = *Region.Candidate;
BasicBlock *PHIBB = PN->getParent();
CanonList PHIGVNs;
- for (Value *Incoming : PN->incoming_values()) {
- // If we cannot find a GVN, this means that the input to the PHINode is
- // not included in the region we are trying to analyze, meaning, that if
- // it was outlined, we would be adding an extra input. We ignore this
- // case for now, and so ignore the region.
+ Value *Incoming;
+ BasicBlock *IncomingBlock;
+ for (unsigned Idx = 0, EIdx = PN->getNumIncomingValues(); Idx < EIdx; Idx++) {
+ Incoming = PN->getIncomingValue(Idx);
+ IncomingBlock = PN->getIncomingBlock(Idx);
+ // If we cannot find a GVN, and the incoming block is included in the region
+ // this means that the input to the PHINode is not included in the region we
+ // are trying to analyze, meaning, that if it was outlined, we would be
+ // adding an extra input. We ignore this case for now, and so ignore the
+ // region.
Optional<unsigned> OGVN = Cand.getGVN(Incoming);
- if (!OGVN.hasValue()) {
+ if (!OGVN && Blocks.contains(IncomingBlock)) {
Region.IgnoreRegion = true;
return None;
}
+ // If the incoming block isn't in the region, we don't have to worry about
+ // this incoming value.
+ if (!Blocks.contains(IncomingBlock))
+ continue;
+
// Collect the canonical numbers of the values in the PHINode.
- unsigned GVN = OGVN.getValue();
+ unsigned GVN = *OGVN;
OGVN = Cand.getCanonicalNum(GVN);
- assert(OGVN.hasValue() && "No GVN found for incoming value?");
+ assert(OGVN && "No GVN found for incoming value?");
+ PHIGVNs.push_back(*OGVN);
+
+ // Find the incoming block and use the canonical numbering as well to define
+ // the hash for the PHINode.
+ OGVN = Cand.getGVN(IncomingBlock);
+
+ // If there is no number for the incoming block, it is becaause we have
+ // split the candidate basic blocks. So we use the previous block that it
+ // was split from to find the valid global value numbering for the PHINode.
+ if (!OGVN) {
+ assert(Cand.getStartBB() == IncomingBlock &&
+ "Unknown basic block used in exit path PHINode.");
+
+ BasicBlock *PrevBlock = nullptr;
+ // Iterate over the predecessors to the incoming block of the
+ // PHINode, when we find a block that is not contained in the region
+ // we know that this is the first block that we split from, and should
+ // have a valid global value numbering.
+ for (BasicBlock *Pred : predecessors(IncomingBlock))
+ if (!Blocks.contains(Pred)) {
+ PrevBlock = Pred;
+ break;
+ }
+ assert(PrevBlock && "Expected a predecessor not in the reigon!");
+ OGVN = Cand.getGVN(PrevBlock);
+ }
+ GVN = *OGVN;
+ OGVN = Cand.getCanonicalNum(GVN);
+ assert(OGVN && "No GVN found for incoming block?");
PHIGVNs.push_back(*OGVN);
}
@@ -1131,11 +1234,10 @@ static Optional<unsigned> getGVNForPHINode(OutlinableRegion &Region,
DenseMap<hash_code, unsigned>::iterator GVNToPHIIt;
DenseMap<unsigned, PHINodeData>::iterator PHIToGVNIt;
Optional<unsigned> BBGVN = Cand.getGVN(PHIBB);
- assert(BBGVN.hasValue() && "Could not find GVN for the incoming block!");
+ assert(BBGVN && "Could not find GVN for the incoming block!");
BBGVN = Cand.getCanonicalNum(BBGVN.getValue());
- assert(BBGVN.hasValue() &&
- "Could not find canonical number for the incoming block!");
+ assert(BBGVN && "Could not find canonical number for the incoming block!");
// Create a pair of the exit block canonical value, and the aggregate
// argument location, connected to the canonical numbers stored in the
// PHINode.
@@ -1262,9 +1364,9 @@ findExtractedOutputToOverallOutputMapping(OutlinableRegion &Region,
// If two PHINodes have the same canonical values, but different aggregate
// argument locations, then they will have distinct Canonical Values.
- GVN = getGVNForPHINode(Region, PN, AggArgIdx);
- if (!GVN.hasValue())
- return;
+ GVN = getGVNForPHINode(Region, PN, BlocksInRegion, AggArgIdx);
+ if (!GVN)
+ return;
} else {
// If we do not have a PHINode we use the global value numbering for the
// output value, to find the canonical number to add to the set of stored
@@ -1413,7 +1515,7 @@ CallInst *replaceCalledFunction(Module &M, OutlinableRegion &Region) {
// Make sure that the argument in the new function has the SwiftError
// argument.
- if (Group.SwiftErrorArgument.hasValue())
+ if (Group.SwiftErrorArgument)
Call->addParamAttr(Group.SwiftErrorArgument.getValue(),
Attribute::SwiftError);
@@ -1520,17 +1622,18 @@ getPassedArgumentAndAdjustArgumentLocation(const Argument *A,
/// \param OutputMappings [in] - The mapping of output values from outlined
/// region to their original values.
/// \param CanonNums [out] - The canonical numbering for the incoming values to
-/// \p PN.
+/// \p PN paired with their incoming block.
/// \param ReplacedWithOutlinedCall - A flag to use the extracted function call
/// of \p Region rather than the overall function's call.
-static void
-findCanonNumsForPHI(PHINode *PN, OutlinableRegion &Region,
- const DenseMap<Value *, Value *> &OutputMappings,
- DenseSet<unsigned> &CanonNums,
- bool ReplacedWithOutlinedCall = true) {
+static void findCanonNumsForPHI(
+ PHINode *PN, OutlinableRegion &Region,
+ const DenseMap<Value *, Value *> &OutputMappings,
+ SmallVector<std::pair<unsigned, BasicBlock *>> &CanonNums,
+ bool ReplacedWithOutlinedCall = true) {
// Iterate over the incoming values.
for (unsigned Idx = 0, EIdx = PN->getNumIncomingValues(); Idx < EIdx; Idx++) {
Value *IVal = PN->getIncomingValue(Idx);
+ BasicBlock *IBlock = PN->getIncomingBlock(Idx);
// If we have an argument as incoming value, we need to grab the passed
// value from the call itself.
if (Argument *A = dyn_cast<Argument>(IVal)) {
@@ -1545,10 +1648,10 @@ findCanonNumsForPHI(PHINode *PN, OutlinableRegion &Region,
// Find and add the canonical number for the incoming value.
Optional<unsigned> GVN = Region.Candidate->getGVN(IVal);
- assert(GVN.hasValue() && "No GVN for incoming value");
+ assert(GVN && "No GVN for incoming value");
Optional<unsigned> CanonNum = Region.Candidate->getCanonicalNum(*GVN);
- assert(CanonNum.hasValue() && "No Canonical Number for GVN");
- CanonNums.insert(*CanonNum);
+ assert(CanonNum && "No Canonical Number for GVN");
+ CanonNums.push_back(std::make_pair(*CanonNum, IBlock));
}
}
@@ -1557,19 +1660,26 @@ findCanonNumsForPHI(PHINode *PN, OutlinableRegion &Region,
/// function.
///
/// \param PN [in] - The PHINode that we are finding the canonical numbers for.
-/// \param Region [in] - The OutlinableRegion containing \p PN.
+/// \param Region [in] - The OutlinableRegion containing \p PN.
/// \param OverallPhiBlock [in] - The overall PHIBlock we are trying to find
/// \p PN in.
/// \param OutputMappings [in] - The mapping of output values from outlined
/// region to their original values.
+/// \param UsedPHIs [in, out] - The PHINodes in the block that have already been
+/// matched.
/// \return the newly found or created PHINode in \p OverallPhiBlock.
static PHINode*
findOrCreatePHIInBlock(PHINode &PN, OutlinableRegion &Region,
BasicBlock *OverallPhiBlock,
- const DenseMap<Value *, Value *> &OutputMappings) {
+ const DenseMap<Value *, Value *> &OutputMappings,
+ DenseSet<PHINode *> &UsedPHIs) {
OutlinableGroup &Group = *Region.Parent;
- DenseSet<unsigned> PNCanonNums;
+
+ // A list of the canonical numbering assigned to each incoming value, paired
+ // with the incoming block for the PHINode passed into this function.
+ SmallVector<std::pair<unsigned, BasicBlock *>> PNCanonNums;
+
// We have to use the extracted function since we have merged this region into
// the overall function yet. We make sure to reassign the argument numbering
// since it is possible that the argument ordering is different between the
@@ -1578,18 +1688,61 @@ findOrCreatePHIInBlock(PHINode &PN, OutlinableRegion &Region,
/* ReplacedWithOutlinedCall = */ false);
OutlinableRegion *FirstRegion = Group.Regions[0];
- DenseSet<unsigned> CurrentCanonNums;
+
+ // A list of the canonical numbering assigned to each incoming value, paired
+ // with the incoming block for the PHINode that we are currently comparing
+ // the passed PHINode to.
+ SmallVector<std::pair<unsigned, BasicBlock *>> CurrentCanonNums;
+
// Find the Canonical Numbering for each PHINode, if it matches, we replace
// the uses of the PHINode we are searching for, with the found PHINode.
for (PHINode &CurrPN : OverallPhiBlock->phis()) {
+ // If this PHINode has already been matched to another PHINode to be merged,
+ // we skip it.
+ if (UsedPHIs.contains(&CurrPN))
+ continue;
+
CurrentCanonNums.clear();
findCanonNumsForPHI(&CurrPN, *FirstRegion, OutputMappings, CurrentCanonNums,
/* ReplacedWithOutlinedCall = */ true);
- if (all_of(PNCanonNums, [&CurrentCanonNums](unsigned CanonNum) {
- return CurrentCanonNums.contains(CanonNum);
- }))
+ // If the list of incoming values is not the same length, then they cannot
+ // match since there is not an analogue for each incoming value.
+ if (PNCanonNums.size() != CurrentCanonNums.size())
+ continue;
+
+ bool FoundMatch = true;
+
+ // We compare the canonical value for each incoming value in the passed
+ // in PHINode to one already present in the outlined region. If the
+ // incoming values do not match, then the PHINodes do not match.
+
+ // We also check to make sure that the incoming block matches as well by
+ // finding the corresponding incoming block in the combined outlined region
+ // for the current outlined region.
+ for (unsigned Idx = 0, Edx = PNCanonNums.size(); Idx < Edx; ++Idx) {
+ std::pair<unsigned, BasicBlock *> ToCompareTo = CurrentCanonNums[Idx];
+ std::pair<unsigned, BasicBlock *> ToAdd = PNCanonNums[Idx];
+ if (ToCompareTo.first != ToAdd.first) {
+ FoundMatch = false;
+ break;
+ }
+
+ BasicBlock *CorrespondingBlock =
+ Region.findCorrespondingBlockIn(*FirstRegion, ToAdd.second);
+ assert(CorrespondingBlock && "Found block is nullptr");
+ if (CorrespondingBlock != ToCompareTo.second) {
+ FoundMatch = false;
+ break;
+ }
+ }
+
+ // If all incoming values and branches matched, then we can merge
+ // into the found PHINode.
+ if (FoundMatch) {
+ UsedPHIs.insert(&CurrPN);
return &CurrPN;
+ }
}
// If we've made it here, it means we weren't able to replace the PHINode, so
@@ -1603,12 +1756,8 @@ findOrCreatePHIInBlock(PHINode &PN, OutlinableRegion &Region,
// Find corresponding basic block in the overall function for the incoming
// block.
- Instruction *FirstNonPHI = IncomingBlock->getFirstNonPHI();
- assert(FirstNonPHI && "Incoming block is empty?");
- Value *CorrespondingVal =
- Region.findCorrespondingValueIn(*FirstRegion, FirstNonPHI);
- assert(CorrespondingVal && "Value is nullptr?");
- BasicBlock *BlockToUse = cast<Instruction>(CorrespondingVal)->getParent();
+ BasicBlock *BlockToUse =
+ Region.findCorrespondingBlockIn(*FirstRegion, IncomingBlock);
NewPN->setIncomingBlock(Idx, BlockToUse);
// If we have an argument we make sure we replace using the argument from
@@ -1623,6 +1772,10 @@ findOrCreatePHIInBlock(PHINode &PN, OutlinableRegion &Region,
IncomingVal = findOutputMapping(OutputMappings, IncomingVal);
Value *Val = Region.findCorrespondingValueIn(*FirstRegion, IncomingVal);
assert(Val && "Value is nullptr?");
+ DenseMap<Value *, Value *>::iterator RemappedIt =
+ FirstRegion->RemappedArguments.find(Val);
+ if (RemappedIt != FirstRegion->RemappedArguments.end())
+ Val = RemappedIt->second;
NewPN->setIncomingValue(Idx, Val);
}
return NewPN;
@@ -1649,6 +1802,7 @@ replaceArgumentUses(OutlinableRegion &Region,
if (FirstFunction)
DominatingFunction = Group.OutlinedFunction;
DominatorTree DT(*DominatingFunction);
+ DenseSet<PHINode *> UsedPHIs;
for (unsigned ArgIdx = 0; ArgIdx < Region.ExtractedFunction->arg_size();
ArgIdx++) {
@@ -1665,6 +1819,8 @@ replaceArgumentUses(OutlinableRegion &Region,
<< *Region.ExtractedFunction << " with " << *AggArg
<< " in function " << *Group.OutlinedFunction << "\n");
Arg->replaceAllUsesWith(AggArg);
+ Value *V = Region.Call->getArgOperand(ArgIdx);
+ Region.RemappedArguments.insert(std::make_pair(V, AggArg));
continue;
}
@@ -1713,7 +1869,7 @@ replaceArgumentUses(OutlinableRegion &Region,
// If this is storing a PHINode, we must make sure it is included in the
// overall function.
if (!isa<PHINode>(ValueOperand) ||
- Region.Candidate->getGVN(ValueOperand).hasValue()) {
+ Region.Candidate->getGVN(ValueOperand).has_value()) {
if (FirstFunction)
continue;
Value *CorrVal =
@@ -1725,7 +1881,7 @@ replaceArgumentUses(OutlinableRegion &Region,
PHINode *PN = cast<PHINode>(SI->getValueOperand());
// If it has a value, it was not split by the code extractor, which
// is what we are looking for.
- if (Region.Candidate->getGVN(PN).hasValue())
+ if (Region.Candidate->getGVN(PN))
continue;
// We record the parent block for the PHINode in the Region so that
@@ -1748,8 +1904,8 @@ replaceArgumentUses(OutlinableRegion &Region,
// For our PHINode, we find the combined canonical numbering, and
// attempt to find a matching PHINode in the overall PHIBlock. If we
// cannot, we copy the PHINode and move it into this new block.
- PHINode *NewPN =
- findOrCreatePHIInBlock(*PN, Region, OverallPhiBlock, OutputMappings);
+ PHINode *NewPN = findOrCreatePHIInBlock(*PN, Region, OverallPhiBlock,
+ OutputMappings, UsedPHIs);
NewI->setOperand(0, NewPN);
}
@@ -1923,7 +2079,7 @@ static void alignOutputBlockWithAggFunc(
// If there is, we remove the new output blocks. If it does not,
// we add it to our list of sets of output blocks.
- if (MatchingBB.hasValue()) {
+ if (MatchingBB) {
LLVM_DEBUG(dbgs() << "Set output block for region in function"
<< Region.ExtractedFunction << " to "
<< MatchingBB.getValue());
@@ -2279,6 +2435,9 @@ void IROutliner::pruneIncompatibleRegions(
if (BBHasAddressTaken)
continue;
+ if (IRSC.getFunction()->hasOptNone())
+ continue;
+
if (IRSC.front()->Inst->getFunction()->hasLinkOnceODRLinkage() &&
!OutlineFromLinkODRs)
continue;
@@ -2343,9 +2502,9 @@ static Value *findOutputValueInRegion(OutlinableRegion &Region,
OutputCanon = *It->second.second.begin();
}
Optional<unsigned> OGVN = Region.Candidate->fromCanonicalNum(OutputCanon);
- assert(OGVN.hasValue() && "Could not find GVN for Canonical Number?");
+ assert(OGVN && "Could not find GVN for Canonical Number?");
Optional<Value *> OV = Region.Candidate->fromGVN(*OGVN);
- assert(OV.hasValue() && "Could not find value for GVN?");
+ assert(OV && "Could not find value for GVN?");
return *OV;
}
@@ -2400,11 +2559,8 @@ static InstructionCost findCostForOutputBlocks(Module &M,
for (Value *V : ID.OperVals) {
BasicBlock *BB = static_cast<BasicBlock *>(V);
- DenseSet<BasicBlock *>::iterator CBIt = CandidateBlocks.find(BB);
- if (CBIt != CandidateBlocks.end() || FoundBlocks.contains(BB))
- continue;
- FoundBlocks.insert(BB);
- NumOutputBranches++;
+ if (!CandidateBlocks.contains(BB) && FoundBlocks.insert(BB).second)
+ NumOutputBranches++;
}
}
@@ -2520,7 +2676,7 @@ void IROutliner::updateOutputMapping(OutlinableRegion &Region,
// If we found an output register, place a mapping of the new value
// to the original in the mapping.
- if (!OutputIdx.hasValue())
+ if (!OutputIdx)
return;
if (OutputMappings.find(Outputs[OutputIdx.getValue()]) ==
@@ -2680,7 +2836,7 @@ unsigned IROutliner::doOutline(Module &M) {
OS->Candidate->getBasicBlocks(BlocksInRegion, BE);
OS->CE = new (ExtractorAllocator.Allocate())
CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
- false, "outlined");
+ false, nullptr, "outlined");
findAddInputsOutputs(M, *OS, NotSame);
if (!OS->IgnoreRegion)
OutlinedRegions.push_back(OS);
@@ -2791,7 +2947,7 @@ unsigned IROutliner::doOutline(Module &M) {
OS->Candidate->getBasicBlocks(BlocksInRegion, BE);
OS->CE = new (ExtractorAllocator.Allocate())
CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
- false, "outlined");
+ false, nullptr, "outlined");
bool FunctionOutlined = extractSection(*OS);
if (FunctionOutlined) {
unsigned StartIdx = OS->Candidate->getStartIdx();
@@ -2874,7 +3030,7 @@ bool IROutlinerLegacyPass::runOnModule(Module &M) {
std::unique_ptr<OptimizationRemarkEmitter> ORE;
auto GORE = [&ORE](Function &F) -> OptimizationRemarkEmitter & {
ORE.reset(new OptimizationRemarkEmitter(&F));
- return *ORE.get();
+ return *ORE;
};
auto GTTI = [this](Function &F) -> TargetTransformInfo & {
@@ -2905,7 +3061,7 @@ PreservedAnalyses IROutlinerPass::run(Module &M, ModuleAnalysisManager &AM) {
std::function<OptimizationRemarkEmitter &(Function &)> GORE =
[&ORE](Function &F) -> OptimizationRemarkEmitter & {
ORE.reset(new OptimizationRemarkEmitter(&F));
- return *ORE.get();
+ return *ORE;
};
if (IROutliner(GTTI, GIRSI, GORE).run(M))
diff --git a/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp b/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp
index c32e09875a12..76f8f1a7a482 100644
--- a/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp
+++ b/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp
@@ -9,11 +9,8 @@
#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -32,7 +29,7 @@ static bool inferAllPrototypeAttributes(
// explicitly visited by CGSCC passes in the new pass manager.)
if (F.isDeclaration() && !F.hasOptNone()) {
if (!F.hasFnAttribute(Attribute::NoBuiltin))
- Changed |= inferLibFuncAttributes(F, GetTLI(F));
+ Changed |= inferNonMandatoryLibFuncAttrs(F, GetTLI(F));
Changed |= inferAttributesFromOthers(F);
}
diff --git a/llvm/lib/Transforms/IPO/InlineSimple.cpp b/llvm/lib/Transforms/IPO/InlineSimple.cpp
index 76f1d0c54d08..2143e39d488d 100644
--- a/llvm/lib/Transforms/IPO/InlineSimple.cpp
+++ b/llvm/lib/Transforms/IPO/InlineSimple.cpp
@@ -12,14 +12,8 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InlineCost.h"
-#include "llvm/Analysis/ProfileSummaryInfo.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/IR/CallingConv.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
#include "llvm/InitializePasses.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/Inliner.h"
diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp
index 49babc24cb82..4d32266eb9ea 100644
--- a/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -14,21 +14,21 @@
#include "llvm/Transforms/IPO/Inliner.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/PriorityWorklist.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InlineAdvisor.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/InlineOrder.h"
@@ -37,11 +37,9 @@
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ReplayInlineAdvisor.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
@@ -67,8 +65,6 @@
#include <algorithm>
#include <cassert>
#include <functional>
-#include <sstream>
-#include <tuple>
#include <utility>
#include <vector>
@@ -92,11 +88,28 @@ static cl::opt<bool>
DisableInlinedAllocaMerging("disable-inlined-alloca-merging",
cl::init(false), cl::Hidden);
+static cl::opt<int> IntraSCCCostMultiplier(
+ "intra-scc-cost-multiplier", cl::init(2), cl::Hidden,
+ cl::desc(
+ "Cost multiplier to multiply onto inlined call sites where the "
+ "new call was previously an intra-SCC call (not relevant when the "
+ "original call was already intra-SCC). This can accumulate over "
+ "multiple inlinings (e.g. if a call site already had a cost "
+ "multiplier and one of its inlined calls was also subject to "
+ "this, the inlined call would have the original multiplier "
+ "multiplied by intra-scc-cost-multiplier). This is to prevent tons of "
+ "inlining through a child SCC which can cause terrible compile times"));
+
/// A flag for test, so we can print the content of the advisor when running it
/// as part of the default (e.g. -O3) pipeline.
static cl::opt<bool> KeepAdvisorForPrinting("keep-inline-advisor-for-printing",
cl::init(false), cl::Hidden);
+/// Allows printing the contents of the advisor after each SCC inliner pass.
+static cl::opt<bool>
+ EnablePostSCCAdvisorPrinting("enable-scc-inline-advisor-printing",
+ cl::init(false), cl::Hidden);
+
extern cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats;
static cl::opt<std::string> CGSCCInlineReplayFile(
@@ -150,10 +163,6 @@ static cl::opt<CallSiteFormat::Format> CGSCCInlineReplayFormat(
"<Line Number>:<Column Number>.<Discriminator> (default)")),
cl::desc("How cgscc inline replay file is formatted"), cl::Hidden);
-static cl::opt<bool> InlineEnablePriorityOrder(
- "inline-enable-priority-order", cl::Hidden, cl::init(false),
- cl::desc("Enable the priority inline order for the inliner"));
-
LegacyInlinerBase::LegacyInlinerBase(char &ID) : CallGraphSCCPass(ID) {}
LegacyInlinerBase::LegacyInlinerBase(char &ID, bool InsertLifetime)
@@ -708,8 +717,9 @@ InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM,
// duration of the inliner pass, and thus the lifetime of the owned advisor.
// The one we would get from the MAM can be invalidated as a result of the
// inliner's activity.
- OwnedAdvisor =
- std::make_unique<DefaultInlineAdvisor>(M, FAM, getInlineParams());
+ OwnedAdvisor = std::make_unique<DefaultInlineAdvisor>(
+ M, FAM, getInlineParams(),
+ InlineContext{LTOPhase, InlinePass::CGSCCInliner});
if (!CGSCCInlineReplayFile.empty())
OwnedAdvisor = getReplayInlineAdvisor(
@@ -718,7 +728,9 @@ InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM,
CGSCCInlineReplayScope,
CGSCCInlineReplayFallback,
{CGSCCInlineReplayFormat}},
- /*EmitRemarks=*/true);
+ /*EmitRemarks=*/true,
+ InlineContext{LTOPhase,
+ InlinePass::ReplayCGSCCInliner});
return *OwnedAdvisor;
}
@@ -744,7 +756,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
.getManager();
InlineAdvisor &Advisor = getAdvisor(MAMProxy, FAM, M);
- Advisor.onPassEntry();
+ Advisor.onPassEntry(&InitialC);
auto AdvisorOnExit = make_scope_exit([&] { Advisor.onPassExit(&InitialC); });
@@ -773,12 +785,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// this model, but it is uniformly spread across all the functions in the SCC
// and eventually they all become too large to inline, rather than
// incrementally maknig a single function grow in a super linear fashion.
- std::unique_ptr<InlineOrder<std::pair<CallBase *, int>>> Calls;
- if (InlineEnablePriorityOrder)
- Calls = std::make_unique<PriorityInlineOrder<InlineSizePriority>>();
- else
- Calls = std::make_unique<DefaultInlineOrder<std::pair<CallBase *, int>>>();
- assert(Calls != nullptr && "Expected an initialized InlineOrder");
+ DefaultInlineOrder<std::pair<CallBase *, int>> Calls;
// Populate the initial list of calls in this SCC.
for (auto &N : InitialC) {
@@ -793,7 +800,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
if (auto *CB = dyn_cast<CallBase>(&I))
if (Function *Callee = CB->getCalledFunction()) {
if (!Callee->isDeclaration())
- Calls->push({CB, -1});
+ Calls.push({CB, -1});
else if (!isa<IntrinsicInst>(I)) {
using namespace ore;
setInlineRemark(*CB, "unavailable definition");
@@ -807,7 +814,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
}
}
}
- if (Calls->empty())
+ if (Calls.empty())
return PreservedAnalyses::all();
// Capture updatable variable for the current SCC.
@@ -833,15 +840,15 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
SmallVector<Function *, 4> DeadFunctionsInComdats;
// Loop forward over all of the calls.
- while (!Calls->empty()) {
+ while (!Calls.empty()) {
// We expect the calls to typically be batched with sequences of calls that
// have the same caller, so we first set up some shared infrastructure for
// this caller. We also do any pruning we can at this layer on the caller
// alone.
- Function &F = *Calls->front().first->getCaller();
+ Function &F = *Calls.front().first->getCaller();
LazyCallGraph::Node &N = *CG.lookup(F);
if (CG.lookupSCC(N) != C) {
- Calls->pop();
+ Calls.pop();
continue;
}
@@ -857,8 +864,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// We bail out as soon as the caller has to change so we can update the
// call graph and prepare the context of that new caller.
bool DidInline = false;
- while (!Calls->empty() && Calls->front().first->getCaller() == &F) {
- auto P = Calls->pop();
+ while (!Calls.empty() && Calls.front().first->getCaller() == &F) {
+ auto P = Calls.pop();
CallBase *CB = P.first;
const int InlineHistoryID = P.second;
Function &Callee = *CB->getCalledFunction();
@@ -876,8 +883,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// trigger infinite inlining, much like is prevented within the inliner
// itself by the InlineHistory above, but spread across CGSCC iterations
// and thus hidden from the full inline history.
- if (CG.lookupSCC(*CG.lookup(Callee)) == C &&
- UR.InlinedInternalEdges.count({&N, C})) {
+ LazyCallGraph::SCC *CalleeSCC = CG.lookupSCC(*CG.lookup(Callee));
+ if (CalleeSCC == C && UR.InlinedInternalEdges.count({&N, C})) {
LLVM_DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node "
"previously split out of this SCC by inlining: "
<< F.getName() << " -> " << Callee.getName() << "\n");
@@ -897,6 +904,11 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
continue;
}
+ int CBCostMult =
+ getStringFnAttrAsInt(
+ *CB, InlineConstants::FunctionInlineCostMultiplierAttributeName)
+ .value_or(1);
+
// Setup the data structure used to plumb customization into the
// `InlineFunction` routine.
InlineFunctionInfo IFI(
@@ -935,9 +947,28 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
if (tryPromoteCall(*ICB))
NewCallee = ICB->getCalledFunction();
}
- if (NewCallee)
- if (!NewCallee->isDeclaration())
- Calls->push({ICB, NewHistoryID});
+ if (NewCallee) {
+ if (!NewCallee->isDeclaration()) {
+ Calls.push({ICB, NewHistoryID});
+ // Continually inlining through an SCC can result in huge compile
+ // times and bloated code since we arbitrarily stop at some point
+ // when the inliner decides it's not profitable to inline anymore.
+ // We attempt to mitigate this by making these calls exponentially
+ // more expensive.
+ // This doesn't apply to calls in the same SCC since if we do
+ // inline through the SCC the function will end up being
+ // self-recursive which the inliner bails out on, and inlining
+ // within an SCC is necessary for performance.
+ if (CalleeSCC != C &&
+ CalleeSCC == CG.lookupSCC(CG.get(*NewCallee))) {
+ Attribute NewCBCostMult = Attribute::get(
+ M.getContext(),
+ InlineConstants::FunctionInlineCostMultiplierAttributeName,
+ itostr(CBCostMult * IntraSCCCostMultiplier));
+ ICB->addFnAttr(NewCBCostMult);
+ }
+ }
+ }
}
}
@@ -953,7 +984,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
if (Callee.isDiscardableIfUnused() && Callee.hasZeroLiveUses() &&
!CG.isLibFunction(Callee)) {
if (Callee.hasLocalLinkage() || !Callee.hasComdat()) {
- Calls->erase_if([&](const std::pair<CallBase *, int> &Call) {
+ Calls.erase_if([&](const std::pair<CallBase *, int> &Call) {
return Call.first->getCaller() == &Callee;
});
// Clear the body and queue the function itself for deletion when we
@@ -1083,17 +1114,24 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
ModuleInlinerWrapperPass::ModuleInlinerWrapperPass(InlineParams Params,
bool MandatoryFirst,
+ InlineContext IC,
InliningAdvisorMode Mode,
unsigned MaxDevirtIterations)
- : Params(Params), Mode(Mode), MaxDevirtIterations(MaxDevirtIterations) {
+ : Params(Params), IC(IC), Mode(Mode),
+ MaxDevirtIterations(MaxDevirtIterations) {
// Run the inliner first. The theory is that we are walking bottom-up and so
// the callees have already been fully optimized, and we want to inline them
// into the callers so that our optimizations can reflect that.
// For PreLinkThinLTO pass, we disable hot-caller heuristic for sample PGO
// because it makes profile annotation in the backend inaccurate.
- if (MandatoryFirst)
+ if (MandatoryFirst) {
PM.addPass(InlinerPass(/*OnlyMandatory*/ true));
+ if (EnablePostSCCAdvisorPrinting)
+ PM.addPass(InlineAdvisorAnalysisPrinterPass(dbgs()));
+ }
PM.addPass(InlinerPass());
+ if (EnablePostSCCAdvisorPrinting)
+ PM.addPass(InlineAdvisorAnalysisPrinterPass(dbgs()));
}
PreservedAnalyses ModuleInlinerWrapperPass::run(Module &M,
@@ -1103,7 +1141,8 @@ PreservedAnalyses ModuleInlinerWrapperPass::run(Module &M,
{CGSCCInlineReplayFile,
CGSCCInlineReplayScope,
CGSCCInlineReplayFallback,
- {CGSCCInlineReplayFormat}})) {
+ {CGSCCInlineReplayFormat}},
+ IC)) {
M.getContext().emitError(
"Could not setup Inlining Advisor for the requested "
"mode and/or options");
diff --git a/llvm/lib/Transforms/IPO/Internalize.cpp b/llvm/lib/Transforms/IPO/Internalize.cpp
index 692e445cb7cb..5aa5b905f06c 100644
--- a/llvm/lib/Transforms/IPO/Internalize.cpp
+++ b/llvm/lib/Transforms/IPO/Internalize.cpp
@@ -19,7 +19,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/Internalize.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/Triple.h"
@@ -33,8 +32,6 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
-#include "llvm/Transforms/Utils/GlobalStatus.h"
-#include "llvm/Transforms/Utils/ModuleUtils.h"
using namespace llvm;
#define DEBUG_TYPE "internalize"
diff --git a/llvm/lib/Transforms/IPO/LoopExtractor.cpp b/llvm/lib/Transforms/IPO/LoopExtractor.cpp
index d9a59dd35fde..ad1927c09803 100644
--- a/llvm/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/llvm/lib/Transforms/IPO/LoopExtractor.cpp
@@ -23,14 +23,9 @@
#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/IPO.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/CodeExtractor.h"
-#include <fstream>
-#include <set>
using namespace llvm;
#define DEBUG_TYPE "loop-extract"
diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
index 8e83d7bcb6c2..d5f1d291f41f 100644
--- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -1223,6 +1223,7 @@ void LowerTypeTestsModule::verifyTypeMDNode(GlobalObject *GO, MDNode *Type) {
static const unsigned kX86JumpTableEntrySize = 8;
static const unsigned kARMJumpTableEntrySize = 4;
static const unsigned kARMBTIJumpTableEntrySize = 8;
+static const unsigned kRISCVJumpTableEntrySize = 8;
unsigned LowerTypeTestsModule::getJumpTableEntrySize() {
switch (Arch) {
@@ -1238,6 +1239,9 @@ unsigned LowerTypeTestsModule::getJumpTableEntrySize() {
if (BTE->getZExtValue())
return kARMBTIJumpTableEntrySize;
return kARMJumpTableEntrySize;
+ case Triple::riscv32:
+ case Triple::riscv64:
+ return kRISCVJumpTableEntrySize;
default:
report_fatal_error("Unsupported architecture for jump tables");
}
@@ -1265,6 +1269,9 @@ void LowerTypeTestsModule::createJumpTableEntry(
AsmOS << "b $" << ArgIndex << "\n";
} else if (JumpTableArch == Triple::thumb) {
AsmOS << "b.w $" << ArgIndex << "\n";
+ } else if (JumpTableArch == Triple::riscv32 ||
+ JumpTableArch == Triple::riscv64) {
+ AsmOS << "tail $" << ArgIndex << "@plt\n";
} else {
report_fatal_error("Unsupported architecture for jump tables");
}
@@ -1282,7 +1289,8 @@ Type *LowerTypeTestsModule::getJumpTableEntryType() {
void LowerTypeTestsModule::buildBitSetsFromFunctions(
ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Functions) {
if (Arch == Triple::x86 || Arch == Triple::x86_64 || Arch == Triple::arm ||
- Arch == Triple::thumb || Arch == Triple::aarch64)
+ Arch == Triple::thumb || Arch == Triple::aarch64 ||
+ Arch == Triple::riscv32 || Arch == Triple::riscv64)
buildBitSetsFromFunctionsNative(TypeIds, Functions);
else if (Arch == Triple::wasm32 || Arch == Triple::wasm64)
buildBitSetsFromFunctionsWASM(TypeIds, Functions);
@@ -1427,6 +1435,11 @@ void LowerTypeTestsModule::createJumpTable(
F->addFnAttr("branch-target-enforcement", "false");
F->addFnAttr("sign-return-address", "none");
}
+ if (JumpTableArch == Triple::riscv32 || JumpTableArch == Triple::riscv64) {
+ // Make sure the jump table assembly is not modified by the assembler or
+ // the linker.
+ F->addFnAttr("target-features", "-c,-relax");
+ }
// Make sure we don't emit .eh_frame for this function.
F->addFnAttr(Attribute::NoUnwind);
@@ -2187,11 +2200,7 @@ bool LowerTypeTestsModule::lower() {
}
Sets.emplace_back(I, MaxUniqueId);
}
- llvm::sort(Sets,
- [](const std::pair<GlobalClassesTy::iterator, unsigned> &S1,
- const std::pair<GlobalClassesTy::iterator, unsigned> &S2) {
- return S1.second < S2.second;
- });
+ llvm::sort(Sets, llvm::less_second());
// For each disjoint set we found...
for (const auto &S : Sets) {
diff --git a/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/llvm/lib/Transforms/IPO/MergeFunctions.cpp
index 97ef872c5499..b850591b4aa6 100644
--- a/llvm/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -88,12 +88,11 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/IPO/MergeFunctions.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Argument.h"
-#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
@@ -113,7 +112,6 @@
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
-#include "llvm/IR/ValueMap.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
@@ -121,8 +119,8 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
-#include "llvm/Transforms/IPO/MergeFunctions.h"
#include "llvm/Transforms/Utils/FunctionComparator.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <algorithm>
#include <cassert>
#include <iterator>
@@ -139,10 +137,10 @@ STATISTIC(NumThunksWritten, "Number of thunks generated");
STATISTIC(NumAliasesWritten, "Number of aliases generated");
STATISTIC(NumDoubleWeak, "Number of new functions created");
-static cl::opt<unsigned> NumFunctionsForSanityCheck(
- "mergefunc-sanity",
- cl::desc("How many functions in module could be used for "
- "MergeFunctions pass sanity check. "
+static cl::opt<unsigned> NumFunctionsForVerificationCheck(
+ "mergefunc-verify",
+ cl::desc("How many functions in a module could be used for "
+ "MergeFunctions to pass a basic correctness check. "
"'0' disables this check. Works only with '-debug' key."),
cl::init(0), cl::Hidden);
@@ -228,10 +226,13 @@ private:
/// analyzed again.
std::vector<WeakTrackingVH> Deferred;
+ /// Set of values marked as used in llvm.used and llvm.compiler.used.
+ SmallPtrSet<GlobalValue *, 4> Used;
+
#ifndef NDEBUG
/// Checks the rules of order relation introduced among functions set.
- /// Returns true, if sanity check has been passed, and false if failed.
- bool doSanityCheck(std::vector<WeakTrackingVH> &Worklist);
+ /// Returns true, if check has been passed, and false if failed.
+ bool doFunctionalCheck(std::vector<WeakTrackingVH> &Worklist);
#endif
/// Insert a ComparableFunction into the FnTree, or merge it away if it's
@@ -330,12 +331,12 @@ PreservedAnalyses MergeFunctionsPass::run(Module &M,
}
#ifndef NDEBUG
-bool MergeFunctions::doSanityCheck(std::vector<WeakTrackingVH> &Worklist) {
- if (const unsigned Max = NumFunctionsForSanityCheck) {
+bool MergeFunctions::doFunctionalCheck(std::vector<WeakTrackingVH> &Worklist) {
+ if (const unsigned Max = NumFunctionsForVerificationCheck) {
unsigned TripleNumber = 0;
bool Valid = true;
- dbgs() << "MERGEFUNC-SANITY: Started for first " << Max << " functions.\n";
+ dbgs() << "MERGEFUNC-VERIFY: Started for first " << Max << " functions.\n";
unsigned i = 0;
for (std::vector<WeakTrackingVH>::iterator I = Worklist.begin(),
@@ -351,7 +352,7 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakTrackingVH> &Worklist) {
// If F1 <= F2, then F2 >= F1, otherwise report failure.
if (Res1 != -Res2) {
- dbgs() << "MERGEFUNC-SANITY: Non-symmetric; triple: " << TripleNumber
+ dbgs() << "MERGEFUNC-VERIFY: Non-symmetric; triple: " << TripleNumber
<< "\n";
dbgs() << *F1 << '\n' << *F2 << '\n';
Valid = false;
@@ -384,7 +385,7 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakTrackingVH> &Worklist) {
}
if (!Transitive) {
- dbgs() << "MERGEFUNC-SANITY: Non-transitive; triple: "
+ dbgs() << "MERGEFUNC-VERIFY: Non-transitive; triple: "
<< TripleNumber << "\n";
dbgs() << "Res1, Res3, Res4: " << Res1 << ", " << Res3 << ", "
<< Res4 << "\n";
@@ -395,7 +396,7 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakTrackingVH> &Worklist) {
}
}
- dbgs() << "MERGEFUNC-SANITY: " << (Valid ? "Passed." : "Failed.") << "\n";
+ dbgs() << "MERGEFUNC-VERIFY: " << (Valid ? "Passed." : "Failed.") << "\n";
return Valid;
}
return true;
@@ -410,6 +411,11 @@ static bool isEligibleForMerging(Function &F) {
bool MergeFunctions::runOnModule(Module &M) {
bool Changed = false;
+ SmallVector<GlobalValue *, 4> UsedV;
+ collectUsedGlobalVariables(M, UsedV, /*CompilerUsed=*/false);
+ collectUsedGlobalVariables(M, UsedV, /*CompilerUsed=*/true);
+ Used.insert(UsedV.begin(), UsedV.end());
+
// All functions in the module, ordered by hash. Functions with a unique
// hash value are easily eliminated.
std::vector<std::pair<FunctionComparator::FunctionHash, Function *>>
@@ -436,7 +442,7 @@ bool MergeFunctions::runOnModule(Module &M) {
std::vector<WeakTrackingVH> Worklist;
Deferred.swap(Worklist);
- LLVM_DEBUG(doSanityCheck(Worklist));
+ LLVM_DEBUG(doFunctionalCheck(Worklist));
LLVM_DEBUG(dbgs() << "size of module: " << M.size() << '\n');
LLVM_DEBUG(dbgs() << "size of worklist: " << Worklist.size() << '\n');
@@ -456,6 +462,7 @@ bool MergeFunctions::runOnModule(Module &M) {
FnTree.clear();
FNodesInTree.clear();
GlobalNumbers.clear();
+ Used.clear();
return Changed;
}
@@ -484,7 +491,7 @@ static Value *createCast(IRBuilder<> &Builder, Value *V, Type *DestTy) {
if (SrcTy->isStructTy()) {
assert(DestTy->isStructTy());
assert(SrcTy->getStructNumElements() == DestTy->getStructNumElements());
- Value *Result = UndefValue::get(DestTy);
+ Value *Result = PoisonValue::get(DestTy);
for (unsigned int I = 0, E = SrcTy->getStructNumElements(); I < E; ++I) {
Value *Element = createCast(
Builder, Builder.CreateExtractValue(V, makeArrayRef(I)),
@@ -828,7 +835,10 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
// For better debugability, under MergeFunctionsPDI, we do not modify G's
// call sites to point to F even when within the same translation unit.
if (!G->isInterposable() && !MergeFunctionsPDI) {
- if (G->hasGlobalUnnamedAddr()) {
+ // Functions referred to by llvm.used/llvm.compiler.used are special:
+ // there are uses of the symbol name that are not visible to LLVM,
+ // usually from inline asm.
+ if (G->hasGlobalUnnamedAddr() && !Used.contains(G)) {
// G might have been a key in our GlobalNumberState, and it's illegal
// to replace a key in ValueMap<GlobalValue *> with a non-global.
GlobalNumbers.erase(G);
diff --git a/llvm/lib/Transforms/IPO/ModuleInliner.cpp b/llvm/lib/Transforms/IPO/ModuleInliner.cpp
index d515303e4911..143715006512 100644
--- a/llvm/lib/Transforms/IPO/ModuleInliner.cpp
+++ b/llvm/lib/Transforms/IPO/ModuleInliner.cpp
@@ -14,43 +14,33 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/ModuleInliner.h"
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
-#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InlineAdvisor.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/InlineOrder.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/ReplayInlineAdvisor.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/IR/User.h"
-#include "llvm/IR/Value.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/CallPromotionUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <cassert>
-#include <functional>
using namespace llvm;
@@ -94,7 +84,9 @@ InlineAdvisor &ModuleInlinerPass::getAdvisor(const ModuleAnalysisManager &MAM,
// inliner pass, and thus the lifetime of the owned advisor. The one we
// would get from the MAM can be invalidated as a result of the inliner's
// activity.
- OwnedAdvisor = std::make_unique<DefaultInlineAdvisor>(M, FAM, Params);
+ OwnedAdvisor = std::make_unique<DefaultInlineAdvisor>(
+ M, FAM, Params,
+ InlineContext{LTOPhase, InlinePass::ModuleInliner});
return *OwnedAdvisor;
}
@@ -119,7 +111,9 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M,
LLVM_DEBUG(dbgs() << "---- Module Inliner is Running ---- \n");
auto &IAA = MAM.getResult<InlineAdvisorAnalysis>(M);
- if (!IAA.tryCreate(Params, Mode, {})) {
+ if (!IAA.tryCreate(
+ Params, Mode, {},
+ InlineContext{LTOPhase, InlinePass::ModuleInliner})) {
M.getContext().emitError(
"Could not setup Inlining Advisor for the requested "
"mode and/or options");
@@ -153,7 +147,8 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M,
// the SCC inliner, which need some refactoring.
std::unique_ptr<InlineOrder<std::pair<CallBase *, int>>> Calls;
if (InlineEnablePriorityOrder)
- Calls = std::make_unique<PriorityInlineOrder<InlineSizePriority>>();
+ Calls = std::make_unique<PriorityInlineOrder>(
+ std::make_unique<SizePriority>());
else
Calls = std::make_unique<DefaultInlineOrder<std::pair<CallBase *, int>>>();
assert(Calls != nullptr && "Expected an initialized InlineOrder");
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 2d765fb6ce6d..227ad8501f25 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -49,7 +49,6 @@
#include "llvm/Transforms/IPO/Attributor.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/CallGraphUpdater.h"
-#include "llvm/Transforms/Utils/CodeExtractor.h"
#include <algorithm>
@@ -59,17 +58,16 @@ using namespace omp;
#define DEBUG_TYPE "openmp-opt"
static cl::opt<bool> DisableOpenMPOptimizations(
- "openmp-opt-disable", cl::ZeroOrMore,
- cl::desc("Disable OpenMP specific optimizations."), cl::Hidden,
- cl::init(false));
+ "openmp-opt-disable", cl::desc("Disable OpenMP specific optimizations."),
+ cl::Hidden, cl::init(false));
static cl::opt<bool> EnableParallelRegionMerging(
- "openmp-opt-enable-merging", cl::ZeroOrMore,
+ "openmp-opt-enable-merging",
cl::desc("Enable the OpenMP region merging optimization."), cl::Hidden,
cl::init(false));
static cl::opt<bool>
- DisableInternalization("openmp-opt-disable-internalization", cl::ZeroOrMore,
+ DisableInternalization("openmp-opt-disable-internalization",
cl::desc("Disable function internalization."),
cl::Hidden, cl::init(false));
@@ -85,42 +83,47 @@ static cl::opt<bool> HideMemoryTransferLatency(
cl::Hidden, cl::init(false));
static cl::opt<bool> DisableOpenMPOptDeglobalization(
- "openmp-opt-disable-deglobalization", cl::ZeroOrMore,
+ "openmp-opt-disable-deglobalization",
cl::desc("Disable OpenMP optimizations involving deglobalization."),
cl::Hidden, cl::init(false));
static cl::opt<bool> DisableOpenMPOptSPMDization(
- "openmp-opt-disable-spmdization", cl::ZeroOrMore,
+ "openmp-opt-disable-spmdization",
cl::desc("Disable OpenMP optimizations involving SPMD-ization."),
cl::Hidden, cl::init(false));
static cl::opt<bool> DisableOpenMPOptFolding(
- "openmp-opt-disable-folding", cl::ZeroOrMore,
+ "openmp-opt-disable-folding",
cl::desc("Disable OpenMP optimizations involving folding."), cl::Hidden,
cl::init(false));
static cl::opt<bool> DisableOpenMPOptStateMachineRewrite(
- "openmp-opt-disable-state-machine-rewrite", cl::ZeroOrMore,
+ "openmp-opt-disable-state-machine-rewrite",
cl::desc("Disable OpenMP optimizations that replace the state machine."),
cl::Hidden, cl::init(false));
static cl::opt<bool> DisableOpenMPOptBarrierElimination(
- "openmp-opt-disable-barrier-elimination", cl::ZeroOrMore,
+ "openmp-opt-disable-barrier-elimination",
cl::desc("Disable OpenMP optimizations that eliminate barriers."),
cl::Hidden, cl::init(false));
static cl::opt<bool> PrintModuleAfterOptimizations(
- "openmp-opt-print-module", cl::ZeroOrMore,
+ "openmp-opt-print-module-after",
cl::desc("Print the current module after OpenMP optimizations."),
cl::Hidden, cl::init(false));
+static cl::opt<bool> PrintModuleBeforeOptimizations(
+ "openmp-opt-print-module-before",
+ cl::desc("Print the current module before OpenMP optimizations."),
+ cl::Hidden, cl::init(false));
+
static cl::opt<bool> AlwaysInlineDeviceFunctions(
- "openmp-opt-inline-device", cl::ZeroOrMore,
+ "openmp-opt-inline-device",
cl::desc("Inline all applicible functions on the device."), cl::Hidden,
cl::init(false));
static cl::opt<bool>
- EnableVerboseRemarks("openmp-opt-verbose-remarks", cl::ZeroOrMore,
+ EnableVerboseRemarks("openmp-opt-verbose-remarks",
cl::desc("Enables more verbose remarks."), cl::Hidden,
cl::init(false));
@@ -129,6 +132,11 @@ static cl::opt<unsigned>
cl::desc("Maximal number of attributor iterations."),
cl::init(256));
+static cl::opt<unsigned>
+ SharedMemoryLimit("openmp-opt-shared-limit", cl::Hidden,
+ cl::desc("Maximum amount of shared memory to use."),
+ cl::init(std::numeric_limits<unsigned>::max()));
+
STATISTIC(NumOpenMPRuntimeCallsDeduplicated,
"Number of OpenMP runtime calls deduplicated");
STATISTIC(NumOpenMPParallelRegionsDeleted,
@@ -493,11 +501,14 @@ struct OMPInformationCache : public InformationCache {
// Remove the `noinline` attribute from `__kmpc`, `_OMP::` and `omp_`
// functions, except if `optnone` is present.
- for (Function &F : M) {
- for (StringRef Prefix : {"__kmpc", "_ZN4_OMP", "omp_"})
- if (F.getName().startswith(Prefix) &&
- !F.hasFnAttribute(Attribute::OptimizeNone))
- F.removeFnAttr(Attribute::NoInline);
+ if (isOpenMPDevice(M)) {
+ for (Function &F : M) {
+ for (StringRef Prefix : {"__kmpc", "_ZN4_OMP", "omp_"})
+ if (F.hasFnAttribute(Attribute::NoInline) &&
+ F.getName().startswith(Prefix) &&
+ !F.hasFnAttribute(Attribute::OptimizeNone))
+ F.removeFnAttr(Attribute::NoInline);
+ }
}
// TODO: We should attach the attributes defined in OMPKinds.def.
@@ -591,7 +602,7 @@ struct KernelInfoState : AbstractState {
/// Abstract State interface
///{
- KernelInfoState() {}
+ KernelInfoState() = default;
KernelInfoState(bool BestState) {
if (!BestState)
indicatePessimisticFixpoint();
@@ -926,8 +937,7 @@ private:
SmallDenseMap<BasicBlock *, SmallPtrSet<Instruction *, 4>> BB2PRMap;
BasicBlock *StartBB = nullptr, *EndBB = nullptr;
- auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
- BasicBlock &ContinuationIP) {
+ auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
BasicBlock *CGStartBB = CodeGenIP.getBlock();
BasicBlock *CGEndBB =
SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI);
@@ -966,8 +976,7 @@ private:
const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc();
ParentBB->getTerminator()->eraseFromParent();
- auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
- BasicBlock &ContinuationIP) {
+ auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
BasicBlock *CGStartBB = CodeGenIP.getBlock();
BasicBlock *CGEndBB =
SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI);
@@ -1107,10 +1116,8 @@ private:
// callbacks.
SmallVector<Value *, 8> Args;
for (auto *CI : MergableCIs) {
- Value *Callee =
- CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts();
- FunctionType *FT =
- cast<FunctionType>(Callee->getType()->getPointerElementType());
+ Value *Callee = CI->getArgOperand(CallbackCalleeOperand);
+ FunctionType *FT = OMPInfoCache.OMPBuilder.ParallelTask;
Args.clear();
Args.push_back(OutlinedFn->getArg(0));
Args.push_back(OutlinedFn->getArg(1));
@@ -1458,7 +1465,6 @@ private:
case Intrinsic::nvvm_barrier0_and:
case Intrinsic::nvvm_barrier0_or:
case Intrinsic::nvvm_barrier0_popc:
- case Intrinsic::amdgcn_s_barrier:
return true;
default:
break;
@@ -2120,6 +2126,8 @@ private:
OMPRTL___kmpc_barrier_simple_generic);
ExternalizationRAII ThreadId(OMPInfoCache,
OMPRTL___kmpc_get_hardware_thread_id_in_block);
+ ExternalizationRAII NumThreads(
+ OMPInfoCache, OMPRTL___kmpc_get_hardware_num_threads_in_block);
ExternalizationRAII WarpSize(OMPInfoCache, OMPRTL___kmpc_get_warp_size);
registerAAs(IsModulePass);
@@ -2407,8 +2415,7 @@ struct AAICVTrackerFunction : public AAICVTracker {
auto CallCheck = [&](Instruction &I) {
Optional<Value *> ReplVal = getValueForCall(A, I, ICV);
- if (ReplVal.hasValue() &&
- ValuesMap.insert(std::make_pair(&I, *ReplVal)).second)
+ if (ReplVal && ValuesMap.insert(std::make_pair(&I, *ReplVal)).second)
HasChanged = ChangeStatus::CHANGED;
return true;
@@ -2468,7 +2475,8 @@ struct AAICVTrackerFunction : public AAICVTracker {
if (ICVTrackingAA.isAssumedTracked()) {
Optional<Value *> URV = ICVTrackingAA.getUniqueReplacementValue(ICV);
- if (!URV || (*URV && AA::isValidAtPosition(**URV, I, OMPInfoCache)))
+ if (!URV || (*URV && AA::isValidAtPosition(AA::ValueAndContext(**URV, I),
+ OMPInfoCache)))
return URV;
}
@@ -2509,13 +2517,13 @@ struct AAICVTrackerFunction : public AAICVTracker {
if (ValuesMap.count(CurrInst)) {
Optional<Value *> NewReplVal = ValuesMap.lookup(CurrInst);
// Unknown value, track new.
- if (!ReplVal.hasValue()) {
+ if (!ReplVal) {
ReplVal = NewReplVal;
break;
}
// If we found a new value, we can't know the icv value anymore.
- if (NewReplVal.hasValue())
+ if (NewReplVal)
if (ReplVal != NewReplVal)
return nullptr;
@@ -2523,11 +2531,11 @@ struct AAICVTrackerFunction : public AAICVTracker {
}
Optional<Value *> NewReplVal = getValueForCall(A, *CurrInst, ICV);
- if (!NewReplVal.hasValue())
+ if (!NewReplVal)
continue;
// Unknown value, track new.
- if (!ReplVal.hasValue()) {
+ if (!ReplVal) {
ReplVal = NewReplVal;
break;
}
@@ -2539,7 +2547,7 @@ struct AAICVTrackerFunction : public AAICVTracker {
}
// If we are in the same BB and we have a value, we are done.
- if (CurrBB == I->getParent() && ReplVal.hasValue())
+ if (CurrBB == I->getParent() && ReplVal)
return ReplVal;
// Go through all predecessors and add terminators for analysis.
@@ -2597,7 +2605,7 @@ struct AAICVTrackerFunctionReturned : AAICVTracker {
ICVTrackingAA.getReplacementValue(ICV, &I, A);
// If we found a second ICV value there is no unique returned value.
- if (UniqueICVValue.hasValue() && UniqueICVValue != NewReplVal)
+ if (UniqueICVValue && UniqueICVValue != NewReplVal)
return false;
UniqueICVValue = NewReplVal;
@@ -2648,10 +2656,10 @@ struct AAICVTrackerCallSite : AAICVTracker {
}
ChangeStatus manifest(Attributor &A) override {
- if (!ReplVal.hasValue() || !ReplVal.getValue())
+ if (!ReplVal || !*ReplVal)
return ChangeStatus::UNCHANGED;
- A.changeValueAfterManifest(*getCtxI(), **ReplVal);
+ A.changeAfterManifest(IRPosition::inst(*getCtxI()), **ReplVal);
A.deleteAfterManifest(*getCtxI());
return ChangeStatus::CHANGED;
@@ -2789,7 +2797,7 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
SmallSetVector<const BasicBlock *, 16> SingleThreadedBBs;
/// Total number of basic blocks in this function.
- long unsigned NumBBs;
+ long unsigned NumBBs = 0;
};
ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
@@ -2952,12 +2960,23 @@ struct AAHeapToSharedFunction : public AAHeapToShared {
}
void initialize(Attributor &A) override {
+ if (DisableOpenMPOptDeglobalization) {
+ indicatePessimisticFixpoint();
+ return;
+ }
+
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
+ Attributor::SimplifictionCallbackTy SCB =
+ [](const IRPosition &, const AbstractAttribute *,
+ bool &) -> Optional<Value *> { return nullptr; };
for (User *U : RFI.Declaration->users())
- if (CallBase *CB = dyn_cast<CallBase>(U))
+ if (CallBase *CB = dyn_cast<CallBase>(U)) {
MallocCalls.insert(CB);
+ A.registerSimplificationCallback(IRPosition::callsite_returned(*CB),
+ SCB);
+ }
findPotentialRemovedFreeCalls(A);
}
@@ -2999,6 +3018,14 @@ struct AAHeapToSharedFunction : public AAHeapToShared {
auto *AllocSize = cast<ConstantInt>(CB->getArgOperand(0));
+ if (AllocSize->getZExtValue() + SharedMemoryUsed > SharedMemoryLimit) {
+ LLVM_DEBUG(dbgs() << TAG << "Cannot replace call " << *CB
+ << " with shared memory."
+ << " Shared memory usage is limited to "
+ << SharedMemoryLimit << " bytes\n");
+ continue;
+ }
+
LLVM_DEBUG(dbgs() << TAG << "Replace globalization call " << *CB
<< " with " << AllocSize->getZExtValue()
<< " bytes of shared memory\n");
@@ -3029,11 +3056,12 @@ struct AAHeapToSharedFunction : public AAHeapToShared {
"HeapToShared on allocation without alignment attribute");
SharedMem->setAlignment(MaybeAlign(Alignment));
- A.changeValueAfterManifest(*CB, *NewBuffer);
+ A.changeAfterManifest(IRPosition::callsite_returned(*CB), *NewBuffer);
A.deleteAfterManifest(*CB);
A.deleteAfterManifest(*FreeCalls.front());
- NumBytesMovedToSharedMemory += AllocSize->getZExtValue();
+ SharedMemoryUsed += AllocSize->getZExtValue();
+ NumBytesMovedToSharedMemory = SharedMemoryUsed;
Changed = ChangeStatus::CHANGED;
}
@@ -3069,6 +3097,8 @@ struct AAHeapToSharedFunction : public AAHeapToShared {
SmallSetVector<CallBase *, 4> MallocCalls;
/// Collection of potentially removed free calls in a function.
SmallPtrSet<CallBase *, 4> PotentialRemovedFreeCalls;
+ /// The total amount of shared memory that has been used for HeapToShared.
+ unsigned SharedMemoryUsed = 0;
};
struct AAKernelInfo : public StateWrapper<KernelInfoState, AbstractAttribute> {
@@ -3137,12 +3167,6 @@ struct AAKernelInfoFunction : AAKernelInfo {
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
Function *Fn = getAnchorScope();
- if (!OMPInfoCache.Kernels.count(Fn))
- return;
-
- // Add itself to the reaching kernel and set IsKernelEntry.
- ReachingKernelEntries.insert(Fn);
- IsKernelEntry = true;
OMPInformationCache::RuntimeFunctionInfo &InitRFI =
OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
@@ -3176,10 +3200,12 @@ struct AAKernelInfoFunction : AAKernelInfo {
Fn);
// Ignore kernels without initializers such as global constructors.
- if (!KernelInitCB || !KernelDeinitCB) {
- indicateOptimisticFixpoint();
+ if (!KernelInitCB || !KernelDeinitCB)
return;
- }
+
+ // Add itself to the reaching kernel and set IsKernelEntry.
+ ReachingKernelEntries.insert(Fn);
+ IsKernelEntry = true;
// For kernels we might need to initialize/finalize the IsSPMD state and
// we need to register a simplification callback so that the Attributor
@@ -3345,8 +3371,17 @@ struct AAKernelInfoFunction : AAKernelInfo {
return false;
}
- // Check if the kernel is already in SPMD mode, if so, return success.
+ // Get the actual kernel, could be the caller of the anchor scope if we have
+ // a debug wrapper.
Function *Kernel = getAnchorScope();
+ if (Kernel->hasLocalLinkage()) {
+ assert(Kernel->hasOneUse() && "Unexpected use of debug kernel wrapper.");
+ auto *CB = cast<CallBase>(Kernel->user_back());
+ Kernel = CB->getCaller();
+ }
+ assert(OMPInfoCache.Kernels.count(Kernel) && "Expected kernel function!");
+
+ // Check if the kernel is already in SPMD mode, if so, return success.
GlobalVariable *ExecMode = Kernel->getParent()->getGlobalVariable(
(Kernel->getName() + "_exec_mode").str());
assert(ExecMode && "Kernel without exec mode?");
@@ -3711,9 +3746,9 @@ struct AAKernelInfoFunction : AAKernelInfo {
// __kmpc_get_hardware_num_threads_in_block();
// WarpSize = __kmpc_get_warp_size();
// BlockSize = BlockHwSize - WarpSize;
- // if (InitCB >= BlockSize) return;
- // IsWorkerCheckBB: bool IsWorker = InitCB >= 0;
+ // IsWorkerCheckBB: bool IsWorker = InitCB != -1;
// if (IsWorker) {
+ // if (InitCB >= BlockSize) return;
// SMBeginBB: __kmpc_barrier_simple_generic(...);
// void *WorkFn;
// bool Active = __kmpc_kernel_parallel(&WorkFn);
@@ -3770,6 +3805,13 @@ struct AAKernelInfoFunction : AAKernelInfo {
ReturnInst::Create(Ctx, StateMachineFinishedBB)->setDebugLoc(DLoc);
InitBB->getTerminator()->eraseFromParent();
+ Instruction *IsWorker =
+ ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_NE, KernelInitCB,
+ ConstantInt::get(KernelInitCB->getType(), -1),
+ "thread.is_worker", InitBB);
+ IsWorker->setDebugLoc(DLoc);
+ BranchInst::Create(IsWorkerCheckBB, UserCodeEntryBB, IsWorker, InitBB);
+
Module &M = *Kernel->getParent();
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
FunctionCallee BlockHwSizeFn =
@@ -3779,29 +3821,22 @@ struct AAKernelInfoFunction : AAKernelInfo {
OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
M, OMPRTL___kmpc_get_warp_size);
CallInst *BlockHwSize =
- CallInst::Create(BlockHwSizeFn, "block.hw_size", InitBB);
+ CallInst::Create(BlockHwSizeFn, "block.hw_size", IsWorkerCheckBB);
OMPInfoCache.setCallingConvention(BlockHwSizeFn, BlockHwSize);
BlockHwSize->setDebugLoc(DLoc);
- CallInst *WarpSize = CallInst::Create(WarpSizeFn, "warp.size", InitBB);
+ CallInst *WarpSize =
+ CallInst::Create(WarpSizeFn, "warp.size", IsWorkerCheckBB);
OMPInfoCache.setCallingConvention(WarpSizeFn, WarpSize);
WarpSize->setDebugLoc(DLoc);
- Instruction *BlockSize =
- BinaryOperator::CreateSub(BlockHwSize, WarpSize, "block.size", InitBB);
+ Instruction *BlockSize = BinaryOperator::CreateSub(
+ BlockHwSize, WarpSize, "block.size", IsWorkerCheckBB);
BlockSize->setDebugLoc(DLoc);
- Instruction *IsMainOrWorker =
- ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_SLT, KernelInitCB,
- BlockSize, "thread.is_main_or_worker", InitBB);
+ Instruction *IsMainOrWorker = ICmpInst::Create(
+ ICmpInst::ICmp, llvm::CmpInst::ICMP_SLT, KernelInitCB, BlockSize,
+ "thread.is_main_or_worker", IsWorkerCheckBB);
IsMainOrWorker->setDebugLoc(DLoc);
- BranchInst::Create(IsWorkerCheckBB, StateMachineFinishedBB, IsMainOrWorker,
- InitBB);
-
- Instruction *IsWorker =
- ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_NE, KernelInitCB,
- ConstantInt::get(KernelInitCB->getType(), -1),
- "thread.is_worker", IsWorkerCheckBB);
- IsWorker->setDebugLoc(DLoc);
- BranchInst::Create(StateMachineBeginBB, UserCodeEntryBB, IsWorker,
- IsWorkerCheckBB);
+ BranchInst::Create(StateMachineBeginBB, StateMachineFinishedBB,
+ IsMainOrWorker, IsWorkerCheckBB);
// Create local storage for the work function pointer.
const DataLayout &DL = M.getDataLayout();
@@ -4241,10 +4276,10 @@ struct AAKernelInfoCallSite : AAKernelInfo {
unsigned ScheduleTypeVal =
ScheduleTypeCI ? ScheduleTypeCI->getZExtValue() : 0;
switch (OMPScheduleType(ScheduleTypeVal)) {
- case OMPScheduleType::Static:
- case OMPScheduleType::StaticChunked:
- case OMPScheduleType::Distribute:
- case OMPScheduleType::DistributeChunked:
+ case OMPScheduleType::UnorderedStatic:
+ case OMPScheduleType::UnorderedStaticChunked:
+ case OMPScheduleType::OrderedDistribute:
+ case OMPScheduleType::OrderedDistributeChunked:
break;
default:
SPMDCompatibilityTracker.indicatePessimisticFixpoint();
@@ -4390,7 +4425,7 @@ struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall {
std::string Str("simplified value: ");
- if (!SimplifiedValue.hasValue())
+ if (!SimplifiedValue)
return Str + std::string("none");
if (!SimplifiedValue.getValue())
@@ -4420,8 +4455,8 @@ struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall {
IRPosition::callsite_returned(CB),
[&](const IRPosition &IRP, const AbstractAttribute *AA,
bool &UsedAssumedInformation) -> Optional<Value *> {
- assert((isValidState() || (SimplifiedValue.hasValue() &&
- SimplifiedValue.getValue() == nullptr)) &&
+ assert((isValidState() ||
+ (SimplifiedValue && SimplifiedValue.getValue() == nullptr)) &&
"Unexpected invalid state!");
if (!isAtFixpoint()) {
@@ -4461,9 +4496,9 @@ struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall {
ChangeStatus manifest(Attributor &A) override {
ChangeStatus Changed = ChangeStatus::UNCHANGED;
- if (SimplifiedValue.hasValue() && SimplifiedValue.getValue()) {
+ if (SimplifiedValue && *SimplifiedValue) {
Instruction &I = *getCtxI();
- A.changeValueAfterManifest(I, **SimplifiedValue);
+ A.changeAfterManifest(IRPosition::inst(I), **SimplifiedValue);
A.deleteAfterManifest(I);
CallBase *CB = dyn_cast<CallBase>(&I);
@@ -4549,7 +4584,7 @@ private:
// We have empty reaching kernels, therefore we cannot tell if the
// associated call site can be folded. At this moment, SimplifiedValue
// must be none.
- assert(!SimplifiedValue.hasValue() && "SimplifiedValue should be none");
+ assert(!SimplifiedValue && "SimplifiedValue should be none");
}
return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED
@@ -4592,7 +4627,7 @@ private:
return indicatePessimisticFixpoint();
if (CallerKernelInfoAA.ReachingKernelEntries.empty()) {
- assert(!SimplifiedValue.hasValue() &&
+ assert(!SimplifiedValue &&
"SimplifiedValue should keep none at this point");
return ChangeStatus::UNCHANGED;
}
@@ -4700,18 +4735,23 @@ void OpenMPOpt::registerFoldRuntimeCall(RuntimeFunction RF) {
void OpenMPOpt::registerAAs(bool IsModulePass) {
if (SCC.empty())
-
return;
+
if (IsModulePass) {
// Ensure we create the AAKernelInfo AAs first and without triggering an
// update. This will make sure we register all value simplification
// callbacks before any other AA has the chance to create an AAValueSimplify
// or similar.
- for (Function *Kernel : OMPInfoCache.Kernels)
+ auto CreateKernelInfoCB = [&](Use &, Function &Kernel) {
A.getOrCreateAAFor<AAKernelInfo>(
- IRPosition::function(*Kernel), /* QueryingAA */ nullptr,
+ IRPosition::function(Kernel), /* QueryingAA */ nullptr,
DepClassTy::NONE, /* ForceUpdate */ false,
/* UpdateAfterInit */ false);
+ return false;
+ };
+ OMPInformationCache::RuntimeFunctionInfo &InitRFI =
+ OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
+ InitRFI.foreachUse(SCC, CreateKernelInfoCB);
registerFoldRuntimeCall(OMPRTL___kmpc_is_generic_main_thread_id);
registerFoldRuntimeCall(OMPRTL___kmpc_is_spmd_exec_mode);
@@ -4899,6 +4939,9 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
KernelSet Kernels = getDeviceKernels(M);
+ if (PrintModuleBeforeOptimizations)
+ LLVM_DEBUG(dbgs() << TAG << "Module before OpenMPOpt Module Pass:\n" << M);
+
auto IsCalled = [&](Function &F) {
if (Kernels.contains(&F))
return true;
@@ -4958,8 +5001,15 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
unsigned MaxFixpointIterations =
(isOpenMPDevice(M)) ? SetFixpointIterations : 32;
- Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false,
- MaxFixpointIterations, OREGetter, DEBUG_TYPE);
+
+ AttributorConfig AC(CGUpdater);
+ AC.DefaultInitializeLiveInternals = false;
+ AC.RewriteSignatures = false;
+ AC.MaxFixpointIterations = MaxFixpointIterations;
+ AC.OREGetter = OREGetter;
+ AC.PassName = DEBUG_TYPE;
+
+ Attributor A(Functions, InfoCache, AC);
OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
bool Changed = OMPOpt.run(true);
@@ -5001,6 +5051,9 @@ PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
Module &M = *C.begin()->getFunction().getParent();
+ if (PrintModuleBeforeOptimizations)
+ LLVM_DEBUG(dbgs() << TAG << "Module before OpenMPOpt CGSCC Pass:\n" << M);
+
KernelSet Kernels = getDeviceKernels(M);
FunctionAnalysisManager &FAM =
@@ -5022,8 +5075,16 @@ PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
unsigned MaxFixpointIterations =
(isOpenMPDevice(M)) ? SetFixpointIterations : 32;
- Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true,
- MaxFixpointIterations, OREGetter, DEBUG_TYPE);
+
+ AttributorConfig AC(CGUpdater);
+ AC.DefaultInitializeLiveInternals = false;
+ AC.IsModulePass = false;
+ AC.RewriteSignatures = false;
+ AC.MaxFixpointIterations = MaxFixpointIterations;
+ AC.OREGetter = OREGetter;
+ AC.PassName = DEBUG_TYPE;
+
+ Attributor A(Functions, InfoCache, AC);
OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
bool Changed = OMPOpt.run(false);
@@ -5093,8 +5154,16 @@ struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass {
unsigned MaxFixpointIterations =
(isOpenMPDevice(M)) ? SetFixpointIterations : 32;
- Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true,
- MaxFixpointIterations, OREGetter, DEBUG_TYPE);
+
+ AttributorConfig AC(CGUpdater);
+ AC.DefaultInitializeLiveInternals = false;
+ AC.IsModulePass = false;
+ AC.RewriteSignatures = false;
+ AC.MaxFixpointIterations = MaxFixpointIterations;
+ AC.OREGetter = OREGetter;
+ AC.PassName = DEBUG_TYPE;
+
+ Attributor A(Functions, InfoCache, AC);
OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
bool Result = OMPOpt.run(false);
diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp
index 5f2223e4047e..54c72bdbb203 100644
--- a/llvm/lib/Transforms/IPO/PartialInlining.cpp
+++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -14,7 +14,6 @@
#include "llvm/Transforms/IPO/PartialInlining.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
@@ -40,6 +39,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
#include "llvm/IR/User.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -55,8 +55,6 @@
#include <algorithm>
#include <cassert>
#include <cstdint>
-#include <functional>
-#include <iterator>
#include <memory>
#include <tuple>
#include <vector>
@@ -99,7 +97,7 @@ static cl::opt<bool>
// This is an option used by testing:
static cl::opt<bool> SkipCostAnalysis("skip-partial-inlining-cost-analysis",
- cl::init(false), cl::ZeroOrMore,
+
cl::ReallyHidden,
cl::desc("Skip Cost Analysis"));
// Used to determine if a cold region is worth outlining based on
@@ -129,7 +127,7 @@ static cl::opt<unsigned> MaxNumInlineBlocks(
// Command line option to set the maximum number of partial inlining allowed
// for the module. The default value of -1 means no limit.
static cl::opt<int> MaxNumPartialInlining(
- "max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore,
+ "max-partial-inlining", cl::init(-1), cl::Hidden,
cl::desc("Max number of partial inlining. The default is unlimited"));
// Used only when PGO or user annotated branch data is absent. It is
@@ -137,7 +135,7 @@ static cl::opt<int> MaxNumPartialInlining(
// produces larger value, the BFI value will be used.
static cl::opt<int>
OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75),
- cl::Hidden, cl::ZeroOrMore,
+ cl::Hidden,
cl::desc("Relative frequency of outline region to "
"the entry block"));
@@ -169,7 +167,7 @@ struct FunctionOutliningInfo {
};
struct FunctionOutliningMultiRegionInfo {
- FunctionOutliningMultiRegionInfo() {}
+ FunctionOutliningMultiRegionInfo() = default;
// Container for outline regions
struct OutlineRegionInfo {
@@ -440,7 +438,7 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(
};
auto BBProfileCount = [BFI](BasicBlock *BB) {
- return BFI->getBlockProfileCount(BB).getValueOr(0);
+ return BFI->getBlockProfileCount(BB).value_or(0);
};
// Use the same computeBBInlineCost function to compute the cost savings of
@@ -741,7 +739,7 @@ BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq(
auto OutlineRegionRelFreq = BranchProbability::getBranchProbability(
OutliningCallFreq.getFrequency(), EntryFreq.getFrequency());
- if (hasProfileData(*Cloner.OrigFunc, *Cloner.ClonedOI.get()))
+ if (hasProfileData(*Cloner.OrigFunc, *Cloner.ClonedOI))
return OutlineRegionRelFreq;
// When profile data is not available, we need to be conservative in
diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index 74f68531b89a..ae787be40c55 100644
--- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -15,19 +15,13 @@
#include "llvm-c/Transforms/PassManagerBuilder.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/CFLAndersAliasAnalysis.h"
#include "llvm/Analysis/CFLSteensAliasAnalysis.h"
#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/Analysis/InlineCost.h"
-#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/ScopedNoAliasAA.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/IR/Verifier.h"
-#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Target/CGPassBuilderOption.h"
@@ -41,22 +35,16 @@
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVN.h"
-#include "llvm/Transforms/Scalar/InstSimplifyPass.h"
#include "llvm/Transforms/Scalar/LICM.h"
#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
-#include "llvm/Transforms/Scalar/SCCP.h"
#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Vectorize.h"
-#include "llvm/Transforms/Vectorize/LoopVectorize.h"
-#include "llvm/Transforms/Vectorize/SLPVectorizer.h"
-#include "llvm/Transforms/Vectorize/VectorCombine.h"
using namespace llvm;
namespace llvm {
-cl::opt<bool> RunPartialInlining("enable-partial-inlining", cl::init(false),
- cl::Hidden, cl::ZeroOrMore,
+cl::opt<bool> RunPartialInlining("enable-partial-inlining", cl::Hidden,
cl::desc("Run Partial inlinining pass"));
static cl::opt<bool>
@@ -111,8 +99,8 @@ static cl::opt<bool>
EnablePerformThinLTO("perform-thinlto", cl::init(false), cl::Hidden,
cl::desc("Enable performing ThinLTO."));
-cl::opt<bool> EnableHotColdSplit("hot-cold-split", cl::init(false),
- cl::ZeroOrMore, cl::desc("Enable hot-cold splitting pass"));
+cl::opt<bool> EnableHotColdSplit("hot-cold-split",
+ cl::desc("Enable hot-cold splitting pass"));
cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false), cl::Hidden,
cl::desc("Enable ir outliner pass"));
@@ -126,12 +114,12 @@ cl::opt<bool>
cl::desc("Disable pre-instrumentation inliner"));
cl::opt<int> PreInlineThreshold(
- "preinline-threshold", cl::Hidden, cl::init(75), cl::ZeroOrMore,
+ "preinline-threshold", cl::Hidden, cl::init(75),
cl::desc("Control the amount of inlining in pre-instrumentation inliner "
"(default = 75)"));
cl::opt<bool>
- EnableGVNHoist("enable-gvn-hoist", cl::init(false), cl::ZeroOrMore,
+ EnableGVNHoist("enable-gvn-hoist",
cl::desc("Enable the GVN hoisting pass (default = off)"));
static cl::opt<bool>
@@ -139,13 +127,8 @@ static cl::opt<bool>
cl::Hidden,
cl::desc("Disable shrink-wrap library calls"));
-static cl::opt<bool> EnableSimpleLoopUnswitch(
- "enable-simple-loop-unswitch", cl::init(false), cl::Hidden,
- cl::desc("Enable the simple loop unswitch pass. Also enables independent "
- "cleanup passes integrated into the loop pass manager pipeline."));
-
cl::opt<bool>
- EnableGVNSink("enable-gvn-sink", cl::init(false), cl::ZeroOrMore,
+ EnableGVNSink("enable-gvn-sink",
cl::desc("Enable the GVN sinking pass (default = off)"));
// This option is used in simplifying testing SampleFDO optimizations for
@@ -336,59 +319,6 @@ void PassManagerBuilder::populateFunctionPassManager(
FPM.add(createEarlyCSEPass());
}
-// Do PGO instrumentation generation or use pass as the option specified.
-void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM,
- bool IsCS = false) {
- if (IsCS) {
- if (!EnablePGOCSInstrGen && !EnablePGOCSInstrUse)
- return;
- } else if (!EnablePGOInstrGen && PGOInstrUse.empty() && PGOSampleUse.empty())
- return;
-
- // Perform the preinline and cleanup passes for O1 and above.
- // We will not do this inline for context sensitive PGO (when IsCS is true).
- if (OptLevel > 0 && !DisablePreInliner && PGOSampleUse.empty() && !IsCS) {
- // Create preinline pass. We construct an InlineParams object and specify
- // the threshold here to avoid the command line options of the regular
- // inliner to influence pre-inlining. The only fields of InlineParams we
- // care about are DefaultThreshold and HintThreshold.
- InlineParams IP;
- IP.DefaultThreshold = PreInlineThreshold;
- // FIXME: The hint threshold has the same value used by the regular inliner
- // when not optimzing for size. This should probably be lowered after
- // performance testing.
- // Use PreInlineThreshold for both -Os and -Oz. Not running preinliner makes
- // the instrumented binary unusably large. Even if PreInlineThreshold is not
- // correct thresold for -Oz, it is better than not running preinliner.
- IP.HintThreshold = SizeLevel > 0 ? PreInlineThreshold : 325;
-
- MPM.add(createFunctionInliningPass(IP));
- MPM.add(createSROAPass());
- MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
- MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
- MPM.add(createInstructionCombiningPass()); // Combine silly seq's
- addExtensionsToPM(EP_Peephole, MPM);
- }
- if ((EnablePGOInstrGen && !IsCS) || (EnablePGOCSInstrGen && IsCS)) {
- MPM.add(createPGOInstrumentationGenLegacyPass(IsCS));
- // Add the profile lowering pass.
- InstrProfOptions Options;
- if (!PGOInstrGen.empty())
- Options.InstrProfileOutput = PGOInstrGen;
- Options.DoCounterPromotion = true;
- Options.UseBFIInPromotion = IsCS;
- MPM.add(createLoopRotatePass());
- MPM.add(createInstrProfilingLegacyPass(Options, IsCS));
- }
- if (!PGOInstrUse.empty())
- MPM.add(createPGOInstrumentationUseLegacyPass(PGOInstrUse, IsCS));
- // Indirect call promotion that promotes intra-module targets only.
- // For ThinLTO this is done earlier due to interactions with globalopt
- // for imported functions. We don't run this at -O0.
- if (OptLevel > 0 && !IsCS)
- MPM.add(
- createPGOIndirectCallPromotionLegacyPass(false, !PGOSampleUse.empty()));
-}
void PassManagerBuilder::addFunctionSimplificationPasses(
legacy::PassManagerBase &MPM) {
// Start of function pass.
@@ -404,7 +334,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
MPM.add(createGVNHoistPass());
if (EnableGVNSink) {
MPM.add(createGVNSinkPass());
- MPM.add(createCFGSimplificationPass());
+ MPM.add(createCFGSimplificationPass(
+ SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
}
}
@@ -418,7 +349,9 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
MPM.add(createJumpThreadingPass()); // Thread jumps.
MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals
}
- MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
+ MPM.add(
+ createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
+ true))); // Merge & remove BBs
// Combine silly seq's
if (OptLevel > 2)
MPM.add(createAggressiveInstCombinerPass());
@@ -427,14 +360,12 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
MPM.add(createLibCallsShrinkWrapPass());
addExtensionsToPM(EP_Peephole, MPM);
- // Optimize memory intrinsic calls based on the profiled size information.
- if (SizeLevel == 0)
- MPM.add(createPGOMemOPSizeOptLegacyPass());
-
// TODO: Investigate the cost/benefit of tail call elimination on debugging.
if (OptLevel > 1)
MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
- MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
+ MPM.add(
+ createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
+ true))); // Merge & remove BBs
MPM.add(createReassociatePass()); // Reassociate expressions
// The matrix extension can introduce large vector operations early, which can
@@ -443,29 +374,32 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
MPM.add(createVectorCombinePass());
// Begin the loop pass pipeline.
- if (EnableSimpleLoopUnswitch) {
- // The simple loop unswitch pass relies on separate cleanup passes. Schedule
- // them first so when we re-process a loop they run before other loop
- // passes.
- MPM.add(createLoopInstSimplifyPass());
- MPM.add(createLoopSimplifyCFGPass());
- }
+
+ // The simple loop unswitch pass relies on separate cleanup passes. Schedule
+ // them first so when we re-process a loop they run before other loop
+ // passes.
+ MPM.add(createLoopInstSimplifyPass());
+ MPM.add(createLoopSimplifyCFGPass());
+
// Try to remove as much code from the loop header as possible,
- // to reduce amount of IR that will have to be duplicated.
+ // to reduce amount of IR that will have to be duplicated. However,
+ // do not perform speculative hoisting the first time as LICM
+ // will destroy metadata that may not need to be destroyed if run
+ // after loop rotation.
// TODO: Investigate promotion cap for O1.
- MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
+ MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+ /*AllowSpeculation=*/false));
// Rotate Loop - disable header duplication at -Oz
MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, PrepareForLTO));
// TODO: Investigate promotion cap for O1.
- MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
- if (EnableSimpleLoopUnswitch)
- MPM.add(createSimpleLoopUnswitchLegacyPass());
- else
- MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
+ MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+ /*AllowSpeculation=*/true));
+ MPM.add(createSimpleLoopUnswitchLegacyPass(OptLevel == 3));
// FIXME: We break the loop pass pipeline here in order to do full
// simplifycfg. Eventually loop-simplifycfg should be enhanced to replace the
// need for this.
- MPM.add(createCFGSimplificationPass());
+ MPM.add(createCFGSimplificationPass(
+ SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
MPM.add(createInstructionCombiningPass());
// We resume loop passes creating a second loop pipeline here.
if (EnableLoopFlatten) {
@@ -521,7 +455,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
// TODO: Investigate if this is too expensive at O1.
if (OptLevel > 1) {
MPM.add(createDeadStoreEliminationPass()); // Delete dead stores
- MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
+ MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+ /*AllowSpeculation=*/true));
}
addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
@@ -580,9 +515,11 @@ void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM,
PM.add(createEarlyCSEPass());
PM.add(createCorrelatedValuePropagationPass());
PM.add(createInstructionCombiningPass());
- PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
- PM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
- PM.add(createCFGSimplificationPass());
+ PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+ /*AllowSpeculation=*/true));
+ PM.add(createSimpleLoopUnswitchLegacyPass());
+ PM.add(createCFGSimplificationPass(
+ SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
PM.add(createInstructionCombiningPass());
}
@@ -597,6 +534,7 @@ void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM,
// before SLP vectorization.
PM.add(createCFGSimplificationPass(SimplifyCFGOptions()
.forwardSwitchCondToPhi(true)
+ .convertSwitchRangeToICmp(true)
.convertSwitchToLookupTable(true)
.needCanonicalLoops(false)
.hoistCommonInsts(true)
@@ -641,7 +579,8 @@ void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM,
// unrolled loop is a inner loop, then the prologue will be inside the
// outer loop. LICM pass can help to promote the runtime check out if the
// checked value is loop invariant.
- PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
+ PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+ /*AllowSpeculation=*/true));
}
PM.add(createWarnMissedTransformationsPass());
@@ -657,10 +596,6 @@ void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM,
void PassManagerBuilder::populateModulePassManager(
legacy::PassManagerBase &MPM) {
- // Whether this is a default or *LTO pre-link pipeline. The FullLTO post-link
- // is handled separately, so just check this is not the ThinLTO post-link.
- bool DefaultOrPreLinkPipeline = !PerformThinLTO;
-
MPM.add(createAnnotation2MetadataLegacyPass());
if (!PGOSampleUse.empty()) {
@@ -678,7 +613,6 @@ void PassManagerBuilder::populateModulePassManager(
// If all optimizations are disabled, just run the always-inline pass and,
// if enabled, the function merging pass.
if (OptLevel == 0) {
- addPGOInstrPasses(MPM);
if (Inliner) {
MPM.add(Inliner);
Inliner = nullptr;
@@ -732,8 +666,6 @@ void PassManagerBuilder::populateModulePassManager(
// earlier in the pass pipeline, here before globalopt. Otherwise imported
// available_externally functions look unreferenced and are removed.
if (PerformThinLTO) {
- MPM.add(createPGOIndirectCallPromotionLegacyPass(/*InLTO = */ true,
- !PGOSampleUse.empty()));
MPM.add(createLowerTypeTestsPass(nullptr, nullptr, true));
}
@@ -772,20 +704,9 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createInstructionCombiningPass()); // Clean up after IPCP & DAE
addExtensionsToPM(EP_Peephole, MPM);
- MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE
-
- // For SamplePGO in ThinLTO compile phase, we do not want to do indirect
- // call promotion as it will change the CFG too much to make the 2nd
- // profile annotation in backend more difficult.
- // PGO instrumentation is added during the compile phase for ThinLTO, do
- // not run it a second time
- if (DefaultOrPreLinkPipeline && !PrepareForThinLTOUsingPGOSampleProfile)
- addPGOInstrPasses(MPM);
-
- // Create profile COMDAT variables. Lld linker wants to see all variables
- // before the LTO/ThinLTO link since it needs to resolve symbols/comdats.
- if (!PerformThinLTO && EnablePGOCSInstrGen)
- MPM.add(createPGOInstrumentationGenCreateVarLegacyPass(PGOInstrGen));
+ MPM.add(
+ createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
+ true))); // Clean up after IPCP & DAE
// We add a module alias analysis pass here. In part due to bugs in the
// analysis infrastructure this "works" in that the analysis stays alive
@@ -811,8 +732,6 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createOpenMPOptCGSCCLegacyPass());
MPM.add(createPostOrderFunctionAttrsLegacyPass());
- if (OptLevel > 2)
- MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args
addExtensionsToPM(EP_CGSCCOptimizerLate, MPM);
addFunctionSimplificationPasses(MPM);
@@ -837,14 +756,6 @@ void PassManagerBuilder::populateModulePassManager(
// and saves running remaining passes on the eliminated functions.
MPM.add(createEliminateAvailableExternallyPass());
- // CSFDO instrumentation and use pass. Don't invoke this for Prepare pass
- // for LTO and ThinLTO -- The actual pass will be called after all inlines
- // are performed.
- // Need to do this after COMDAT variables have been eliminated,
- // (i.e. after EliminateAvailableExternallyPass).
- if (!(PrepareForLTO || PrepareForThinLTO))
- addPGOInstrPasses(MPM, /* IsCS */ true);
-
if (EnableOrderFileInstrumentation)
MPM.add(createInstrOrderFilePass());
@@ -886,7 +797,8 @@ void PassManagerBuilder::populateModulePassManager(
// later might get benefit of no-alias assumption in clone loop.
if (UseLoopVersioningLICM) {
MPM.add(createLoopVersioningLICMPass()); // Do LoopVersioningLICM
- MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
+ MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+ /*AllowSpeculation=*/true));
}
// We add a fresh GlobalsModRef run at this point. This is particularly
@@ -972,7 +884,8 @@ void PassManagerBuilder::populateModulePassManager(
// LoopSink (and other loop passes since the last simplifyCFG) might have
// resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
- MPM.add(createCFGSimplificationPass());
+ MPM.add(createCFGSimplificationPass(
+ SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
addExtensionsToPM(EP_OptimizerLast, MPM);
@@ -1009,13 +922,6 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
// Split call-site with more constrained arguments.
PM.add(createCallSiteSplittingPass());
- // Indirect call promotion. This should promote all the targets that are
- // left by the earlier promotion pass that promotes intra-module targets.
- // This two-step promotion is to save the compile time. For LTO, it should
- // produce the same result as if we only do promotion here.
- PM.add(
- createPGOIndirectCallPromotionLegacyPass(true, !PGOSampleUse.empty()));
-
// Propage constant function arguments by specializing the functions.
if (EnableFunctionSpecialization && OptLevel > 2)
PM.add(createFunctionSpecializationPass());
@@ -1081,9 +987,6 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
PM.add(createPruneEHPass()); // Remove dead EH info.
- // CSFDO instrumentation and use pass.
- addPGOInstrPasses(PM, /* IsCS */ true);
-
// Infer attributes on declarations, call sites, arguments, etc. for an SCC.
if (AttributorRun & AttributorRunOption::CGSCC)
PM.add(createAttributorCGSCCLegacyPass());
@@ -1098,14 +1001,10 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
PM.add(createGlobalOptimizerPass());
PM.add(createGlobalDCEPass()); // Remove dead functions.
- // If we didn't decide to inline a function, check to see if we can
- // transform it to pass arguments by value instead of by reference.
- PM.add(createArgumentPromotionPass());
-
// The IPO passes may leave cruft around. Clean up after them.
PM.add(createInstructionCombiningPass());
addExtensionsToPM(EP_Peephole, PM);
- PM.add(createJumpThreadingPass(/*FreezeSelectCond*/ true));
+ PM.add(createJumpThreadingPass());
// Break up allocas
PM.add(createSROAPass());
@@ -1120,7 +1019,8 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
// Run a few AA driven optimizations here and now, to cleanup the code.
PM.add(createGlobalsAAWrapperPass()); // IP alias analysis.
- PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
+ PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+ /*AllowSpeculation=*/true));
PM.add(NewGVN ? createNewGVNPass()
: createGVNPass(DisableGVNLoadPRE)); // Remove redundancies.
PM.add(createMemCpyOptPass()); // Remove dead memcpys.
@@ -1149,7 +1049,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
addExtensionsToPM(EP_Peephole, PM);
- PM.add(createJumpThreadingPass(/*FreezeSelectCond*/ true));
+ PM.add(createJumpThreadingPass());
}
void PassManagerBuilder::addLateLTOOptimizationPasses(
@@ -1175,80 +1075,6 @@ void PassManagerBuilder::addLateLTOOptimizationPasses(
PM.add(createMergeFunctionsPass());
}
-void PassManagerBuilder::populateThinLTOPassManager(
- legacy::PassManagerBase &PM) {
- PerformThinLTO = true;
- if (LibraryInfo)
- PM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
-
- if (VerifyInput)
- PM.add(createVerifierPass());
-
- if (ImportSummary) {
- // This pass imports type identifier resolutions for whole-program
- // devirtualization and CFI. It must run early because other passes may
- // disturb the specific instruction patterns that these passes look for,
- // creating dependencies on resolutions that may not appear in the summary.
- //
- // For example, GVN may transform the pattern assume(type.test) appearing in
- // two basic blocks into assume(phi(type.test, type.test)), which would
- // transform a dependency on a WPD resolution into a dependency on a type
- // identifier resolution for CFI.
- //
- // Also, WPD has access to more precise information than ICP and can
- // devirtualize more effectively, so it should operate on the IR first.
- PM.add(createWholeProgramDevirtPass(nullptr, ImportSummary));
- PM.add(createLowerTypeTestsPass(nullptr, ImportSummary));
- }
-
- populateModulePassManager(PM);
-
- if (VerifyOutput)
- PM.add(createVerifierPass());
- PerformThinLTO = false;
-}
-
-void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) {
- if (LibraryInfo)
- PM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
-
- if (VerifyInput)
- PM.add(createVerifierPass());
-
- addExtensionsToPM(EP_FullLinkTimeOptimizationEarly, PM);
-
- if (OptLevel != 0)
- addLTOOptimizationPasses(PM);
- else {
- // The whole-program-devirt pass needs to run at -O0 because only it knows
- // about the llvm.type.checked.load intrinsic: it needs to both lower the
- // intrinsic itself and handle it in the summary.
- PM.add(createWholeProgramDevirtPass(ExportSummary, nullptr));
- }
-
- // Create a function that performs CFI checks for cross-DSO calls with targets
- // in the current module.
- PM.add(createCrossDSOCFIPass());
-
- // Lower type metadata and the type.test intrinsic. This pass supports Clang's
- // control flow integrity mechanisms (-fsanitize=cfi*) and needs to run at
- // link time if CFI is enabled. The pass does nothing if CFI is disabled.
- PM.add(createLowerTypeTestsPass(ExportSummary, nullptr));
- // Run a second time to clean up any type tests left behind by WPD for use
- // in ICP (which is performed earlier than this in the regular LTO pipeline).
- PM.add(createLowerTypeTestsPass(nullptr, nullptr, true));
-
- if (OptLevel != 0)
- addLateLTOOptimizationPasses(PM);
-
- addExtensionsToPM(EP_FullLinkTimeOptimizationLast, PM);
-
- PM.add(createAnnotationRemarksLegacyPass());
-
- if (VerifyOutput)
- PM.add(createVerifierPass());
-}
-
LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate() {
PassManagerBuilder *PMB = new PassManagerBuilder();
return wrap(PMB);
@@ -1314,18 +1140,3 @@ LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB,
legacy::PassManagerBase *MPM = unwrap(PM);
Builder->populateModulePassManager(*MPM);
}
-
-void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB,
- LLVMPassManagerRef PM,
- LLVMBool Internalize,
- LLVMBool RunInliner) {
- PassManagerBuilder *Builder = unwrap(PMB);
- legacy::PassManagerBase *LPM = unwrap(PM);
-
- // A small backwards compatibility hack. populateLTOPassManager used to take
- // an RunInliner option.
- if (RunInliner && !Builder->Inliner)
- Builder->Inliner = createFunctionInliningPass();
-
- Builder->populateLTOPassManager(*LPM);
-}
diff --git a/llvm/lib/Transforms/IPO/PruneEH.cpp b/llvm/lib/Transforms/IPO/PruneEH.cpp
index 39de19ca9e9d..e0836a9fd699 100644
--- a/llvm/lib/Transforms/IPO/PruneEH.cpp
+++ b/llvm/lib/Transforms/IPO/PruneEH.cpp
@@ -14,7 +14,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
@@ -24,9 +23,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/CallGraphUpdater.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -246,7 +243,7 @@ static void DeleteBasicBlock(BasicBlock *BB, CallGraphUpdater &CGU) {
}
if (!I->use_empty())
- I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ I->replaceAllUsesWith(PoisonValue::get(I->getType()));
}
if (TokenInst) {
diff --git a/llvm/lib/Transforms/IPO/SCCP.cpp b/llvm/lib/Transforms/IPO/SCCP.cpp
index 5779553ee732..26fb7d676429 100644
--- a/llvm/lib/Transforms/IPO/SCCP.cpp
+++ b/llvm/lib/Transforms/IPO/SCCP.cpp
@@ -18,6 +18,7 @@
#include "llvm/InitializePasses.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Scalar/SCCP.h"
+#include "llvm/Transforms/Utils/SCCPSolver.h"
using namespace llvm;
diff --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
index 7334bf695b67..6859953de962 100644
--- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
+++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
@@ -14,7 +14,8 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/Instructions.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/ProfileData/SampleProf.h"
#include <map>
#include <queue>
@@ -62,23 +63,24 @@ ContextTrieNode::getHottestChildContext(const LineLocation &CallSite) {
return ChildNodeRet;
}
-ContextTrieNode &ContextTrieNode::moveToChildContext(
- const LineLocation &CallSite, ContextTrieNode &&NodeToMove,
- uint32_t ContextFramesToRemove, bool DeleteNode) {
+ContextTrieNode &
+SampleContextTracker::moveContextSamples(ContextTrieNode &ToNodeParent,
+ const LineLocation &CallSite,
+ ContextTrieNode &&NodeToMove) {
uint64_t Hash =
FunctionSamples::getCallSiteHash(NodeToMove.getFuncName(), CallSite);
+ std::map<uint64_t, ContextTrieNode> &AllChildContext =
+ ToNodeParent.getAllChildContext();
assert(!AllChildContext.count(Hash) && "Node to remove must exist");
- LineLocation OldCallSite = NodeToMove.CallSiteLoc;
- ContextTrieNode &OldParentContext = *NodeToMove.getParentContext();
AllChildContext[Hash] = NodeToMove;
ContextTrieNode &NewNode = AllChildContext[Hash];
- NewNode.CallSiteLoc = CallSite;
+ NewNode.setCallSiteLoc(CallSite);
// Walk through nodes in the moved the subtree, and update
// FunctionSamples' context as for the context promotion.
// We also need to set new parant link for all children.
std::queue<ContextTrieNode *> NodeToUpdate;
- NewNode.setParentContext(this);
+ NewNode.setParentContext(&ToNodeParent);
NodeToUpdate.push(&NewNode);
while (!NodeToUpdate.empty()) {
@@ -87,10 +89,8 @@ ContextTrieNode &ContextTrieNode::moveToChildContext(
FunctionSamples *FSamples = Node->getFunctionSamples();
if (FSamples) {
- FSamples->getContext().promoteOnPath(ContextFramesToRemove);
+ setContextNode(FSamples, Node);
FSamples->getContext().setState(SyntheticContext);
- LLVM_DEBUG(dbgs() << " Context promoted to: "
- << FSamples->getContext().toString() << "\n");
}
for (auto &It : Node->getAllChildContext()) {
@@ -100,10 +100,6 @@ ContextTrieNode &ContextTrieNode::moveToChildContext(
}
}
- // Original context no longer needed, destroy if requested.
- if (DeleteNode)
- OldParentContext.removeChildContext(OldCallSite, NewNode.getFuncName());
-
return NewNode;
}
@@ -131,7 +127,7 @@ void ContextTrieNode::setFunctionSamples(FunctionSamples *FSamples) {
Optional<uint32_t> ContextTrieNode::getFunctionSize() const { return FuncSize; }
void ContextTrieNode::addFunctionSize(uint32_t FSize) {
- if (!FuncSize.hasValue())
+ if (!FuncSize)
FuncSize = 0;
FuncSize = FuncSize.getValue() + FSize;
@@ -147,6 +143,10 @@ void ContextTrieNode::setParentContext(ContextTrieNode *Parent) {
ParentContext = Parent;
}
+void ContextTrieNode::setCallSiteLoc(const LineLocation &Loc) {
+ CallSiteLoc = Loc;
+}
+
void ContextTrieNode::dumpNode() {
dbgs() << "Node: " << FuncName << "\n"
<< " Callsite: " << CallSiteLoc << "\n"
@@ -202,13 +202,23 @@ SampleContextTracker::SampleContextTracker(
SampleContext Context = FuncSample.first;
LLVM_DEBUG(dbgs() << "Tracking Context for function: " << Context.toString()
<< "\n");
- if (!Context.isBaseContext())
- FuncToCtxtProfiles[Context.getName()].insert(FSamples);
ContextTrieNode *NewNode = getOrCreateContextPath(Context, true);
assert(!NewNode->getFunctionSamples() &&
"New node can't have sample profile");
NewNode->setFunctionSamples(FSamples);
}
+ populateFuncToCtxtMap();
+}
+
+void SampleContextTracker::populateFuncToCtxtMap() {
+ for (auto *Node : *this) {
+ FunctionSamples *FSamples = Node->getFunctionSamples();
+ if (FSamples) {
+ FSamples->getContext().setState(RawContext);
+ setContextNode(FSamples, Node);
+ FuncToCtxtProfiles[Node->getFuncName()].push_back(FSamples);
+ }
+ }
}
FunctionSamples *
@@ -231,7 +241,7 @@ SampleContextTracker::getCalleeContextSamplesFor(const CallBase &Inst,
if (CalleeContext) {
FunctionSamples *FSamples = CalleeContext->getFunctionSamples();
LLVM_DEBUG(if (FSamples) {
- dbgs() << " Callee context found: " << FSamples->getContext().toString()
+ dbgs() << " Callee context found: " << getContextString(CalleeContext)
<< "\n";
});
return FSamples;
@@ -333,7 +343,7 @@ FunctionSamples *SampleContextTracker::getBaseSamplesFor(StringRef Name,
if (Context.hasState(InlinedContext) || Context.hasState(MergedContext))
continue;
- ContextTrieNode *FromNode = getContextFor(Context);
+ ContextTrieNode *FromNode = getContextNodeForProfile(CSamples);
if (FromNode == Node)
continue;
@@ -354,7 +364,7 @@ void SampleContextTracker::markContextSamplesInlined(
const FunctionSamples *InlinedSamples) {
assert(InlinedSamples && "Expect non-null inlined samples");
LLVM_DEBUG(dbgs() << "Marking context profile as inlined: "
- << InlinedSamples->getContext().toString() << "\n");
+ << getContextString(*InlinedSamples) << "\n");
InlinedSamples->getContext().setState(InlinedContext);
}
@@ -405,17 +415,43 @@ ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree(
// the context profile in the base (context-less) profile.
FunctionSamples *FromSamples = NodeToPromo.getFunctionSamples();
assert(FromSamples && "Shouldn't promote a context without profile");
+ (void)FromSamples; // Unused in release build.
+
LLVM_DEBUG(dbgs() << " Found context tree root to promote: "
- << FromSamples->getContext().toString() << "\n");
+ << getContextString(&NodeToPromo) << "\n");
assert(!FromSamples->getContext().hasState(InlinedContext) &&
"Shouldn't promote inlined context profile");
- uint32_t ContextFramesToRemove =
- FromSamples->getContext().getContextFrames().size() - 1;
- return promoteMergeContextSamplesTree(NodeToPromo, RootContext,
- ContextFramesToRemove);
+ return promoteMergeContextSamplesTree(NodeToPromo, RootContext);
+}
+
+#ifndef NDEBUG
+std::string
+SampleContextTracker::getContextString(const FunctionSamples &FSamples) const {
+ return getContextString(getContextNodeForProfile(&FSamples));
}
+std::string
+SampleContextTracker::getContextString(ContextTrieNode *Node) const {
+ SampleContextFrameVector Res;
+ if (Node == &RootContext)
+ return std::string();
+ Res.emplace_back(Node->getFuncName(), LineLocation(0, 0));
+
+ ContextTrieNode *PreNode = Node;
+ Node = Node->getParentContext();
+ while (Node && Node != &RootContext) {
+ Res.emplace_back(Node->getFuncName(), PreNode->getCallSiteLoc());
+ PreNode = Node;
+ Node = Node->getParentContext();
+ }
+
+ std::reverse(Res.begin(), Res.end());
+
+ return SampleContext::getContextString(Res);
+}
+#endif
+
void SampleContextTracker::dump() { RootContext.dumpTree(); }
StringRef SampleContextTracker::getFuncNameFor(ContextTrieNode *Node) const {
@@ -526,8 +562,7 @@ ContextTrieNode &SampleContextTracker::addTopLevelContextNode(StringRef FName) {
}
void SampleContextTracker::mergeContextNode(ContextTrieNode &FromNode,
- ContextTrieNode &ToNode,
- uint32_t ContextFramesToRemove) {
+ ContextTrieNode &ToNode) {
FunctionSamples *FromSamples = FromNode.getFunctionSamples();
FunctionSamples *ToSamples = ToNode.getFunctionSamples();
if (FromSamples && ToSamples) {
@@ -540,16 +575,13 @@ void SampleContextTracker::mergeContextNode(ContextTrieNode &FromNode,
} else if (FromSamples) {
// Transfer FromSamples from FromNode to ToNode
ToNode.setFunctionSamples(FromSamples);
+ setContextNode(FromSamples, &ToNode);
FromSamples->getContext().setState(SyntheticContext);
- FromSamples->getContext().promoteOnPath(ContextFramesToRemove);
- FromNode.setFunctionSamples(nullptr);
}
}
ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree(
- ContextTrieNode &FromNode, ContextTrieNode &ToNodeParent,
- uint32_t ContextFramesToRemove) {
- assert(ContextFramesToRemove && "Context to remove can't be empty");
+ ContextTrieNode &FromNode, ContextTrieNode &ToNodeParent) {
// Ignore call site location if destination is top level under root
LineLocation NewCallSiteLoc = LineLocation(0, 0);
@@ -566,22 +598,25 @@ ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree(
if (!ToNode) {
// Do not delete node to move from its parent here because
// caller is iterating over children of that parent node.
- ToNode = &ToNodeParent.moveToChildContext(
- NewCallSiteLoc, std::move(FromNode), ContextFramesToRemove, false);
+ ToNode =
+ &moveContextSamples(ToNodeParent, NewCallSiteLoc, std::move(FromNode));
+ LLVM_DEBUG({
+ dbgs() << " Context promoted and merged to: " << getContextString(ToNode)
+ << "\n";
+ });
} else {
// Destination node exists, merge samples for the context tree
- mergeContextNode(FromNode, *ToNode, ContextFramesToRemove);
+ mergeContextNode(FromNode, *ToNode);
LLVM_DEBUG({
if (ToNode->getFunctionSamples())
dbgs() << " Context promoted and merged to: "
- << ToNode->getFunctionSamples()->getContext().toString() << "\n";
+ << getContextString(ToNode) << "\n";
});
// Recursively promote and merge children
for (auto &It : FromNode.getAllChildContext()) {
ContextTrieNode &FromChildNode = It.second;
- promoteMergeContextSamplesTree(FromChildNode, *ToNode,
- ContextFramesToRemove);
+ promoteMergeContextSamplesTree(FromChildNode, *ToNode);
}
// Remove children once they're all merged
@@ -594,4 +629,14 @@ ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree(
return *ToNode;
}
+
+void SampleContextTracker::createContextLessProfileMap(
+ SampleProfileMap &ContextLessProfiles) {
+ for (auto *Node : *this) {
+ FunctionSamples *FProfile = Node->getFunctionSamples();
+ // Profile's context can be empty, use ContextNode's func name.
+ if (FProfile)
+ ContextLessProfiles[Node->getFuncName()].merge(*FProfile);
+ }
+}
} // namespace llvm
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index bc6051de90c4..40de69bbf2cf 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -25,11 +25,8 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/None.h"
#include "llvm/ADT/PriorityQueue.h"
#include "llvm/ADT/SCCIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringMap.h"
@@ -38,22 +35,16 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/InlineAdvisor.h"
#include "llvm/Analysis/InlineCost.h"
-#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
-#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ReplayInlineAdvisor.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CFG.h"
-#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DiagnosticInfo.h"
-#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/InstrTypes.h"
@@ -64,6 +55,7 @@
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/IR/PseudoProbe.h"
#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -73,9 +65,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ErrorOr.h"
-#include "llvm/Support/GenericDomTree.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/ProfiledCallGraph.h"
@@ -84,7 +74,6 @@
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/CallPromotionUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/SampleProfileInference.h"
#include "llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h"
#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h"
#include <algorithm>
@@ -151,8 +140,7 @@ static cl::opt<bool> ProfileSampleBlockAccurate(
"them conservatively as unknown. "));
static cl::opt<bool> ProfileAccurateForSymsInList(
- "profile-accurate-for-symsinlist", cl::Hidden, cl::ZeroOrMore,
- cl::init(true),
+ "profile-accurate-for-symsinlist", cl::Hidden, cl::init(true),
cl::desc("For symbols in profile symbol list, regard their profiles to "
"be accurate. It may be overriden by profile-sample-accurate. "));
@@ -183,6 +171,15 @@ static cl::opt<bool> ProfileSizeInline(
cl::desc("Inline cold call sites in profile loader if it's beneficial "
"for code size."));
+// Since profiles are consumed by many passes, turning on this option has
+// side effects. For instance, pre-link SCC inliner would see merged profiles
+// and inline the hot functions (that are skipped in this pass).
+static cl::opt<bool> DisableSampleLoaderInlining(
+ "disable-sample-loader-inlining", cl::Hidden, cl::init(false),
+ cl::desc("If true, artifically skip inline transformation in sample-loader "
+ "pass, and merge (or scale) profiles (as configured by "
+ "--sample-profile-merge-inlinee)."));
+
cl::opt<int> ProfileInlineGrowthLimit(
"sample-profile-inline-growth-limit", cl::Hidden, cl::init(12),
cl::desc("The size growth ratio limit for proirity-based sample profile "
@@ -219,19 +216,19 @@ static cl::opt<unsigned> ProfileICPRelativeHotnessSkip(
"Skip relative hotness check for ICP up to given number of targets."));
static cl::opt<bool> CallsitePrioritizedInline(
- "sample-profile-prioritized-inline", cl::Hidden, cl::ZeroOrMore,
- cl::init(false),
+ "sample-profile-prioritized-inline", cl::Hidden,
+
cl::desc("Use call site prioritized inlining for sample profile loader."
"Currently only CSSPGO is supported."));
static cl::opt<bool> UsePreInlinerDecision(
- "sample-profile-use-preinliner", cl::Hidden, cl::ZeroOrMore,
- cl::init(false),
+ "sample-profile-use-preinliner", cl::Hidden,
+
cl::desc("Use the preinliner decisions stored in profile context."));
static cl::opt<bool> AllowRecursiveInline(
- "sample-profile-recursive-inline", cl::Hidden, cl::ZeroOrMore,
- cl::init(false),
+ "sample-profile-recursive-inline", cl::Hidden,
+
cl::desc("Allow sample loader inliner to inline recursive calls."));
static cl::opt<std::string> ProfileInlineReplayFile(
@@ -287,7 +284,6 @@ static cl::opt<CallSiteFormat::Format> ProfileInlineReplayFormat(
static cl::opt<unsigned>
MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden,
- cl::ZeroOrMore,
cl::desc("Max number of promotions for a single indirect "
"call callsite in sample profile loader"));
@@ -295,6 +291,13 @@ static cl::opt<bool> OverwriteExistingWeights(
"overwrite-existing-weights", cl::Hidden, cl::init(false),
cl::desc("Ignore existing branch weights on IR and always overwrite."));
+static cl::opt<bool> AnnotateSampleProfileInlinePhase(
+ "annotate-sample-profile-inline-phase", cl::Hidden, cl::init(false),
+ cl::desc("Annotate LTO phase (prelink / postlink), or main (no LTO) for "
+ "sample-profile inline pass name."));
+
+extern cl::opt<bool> EnableExtTspBlockPlacement;
+
namespace {
using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
@@ -425,7 +428,11 @@ public:
: SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName)),
GetAC(std::move(GetAssumptionCache)),
GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)),
- LTOPhase(LTOPhase) {}
+ LTOPhase(LTOPhase),
+ AnnotatedPassName(AnnotateSampleProfileInlinePhase
+ ? llvm::AnnotateInlinePassName(InlineContext{
+ LTOPhase, InlinePass::SampleProfileInliner})
+ : CSINLINE_DEBUG) {}
bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr);
bool runOnModule(Module &M, ModuleAnalysisManager *AM,
@@ -487,15 +494,13 @@ protected:
/// Profile tracker for different context.
std::unique_ptr<SampleContextTracker> ContextTracker;
- /// Flag indicating whether input profile is context-sensitive
- bool ProfileIsCSFlat = false;
-
/// Flag indicating which LTO/ThinLTO phase the pass is invoked in.
///
/// We need to know the LTO phase because for example in ThinLTOPrelink
/// phase, in annotation, we should not promote indirect calls. Instead,
/// we will mark GUIDs that needs to be annotated to the function.
- ThinOrFullLTOPhase LTOPhase;
+ const ThinOrFullLTOPhase LTOPhase;
+ const std::string AnnotatedPassName;
/// Profle Symbol list tells whether a function name appears in the binary
/// used to generate the current profile.
@@ -535,6 +540,11 @@ protected:
// A pseudo probe helper to correlate the imported sample counts.
std::unique_ptr<PseudoProbeManager> ProbeManager;
+
+private:
+ const char *getAnnotatedRemarkPassName() const {
+ return AnnotatedPassName.c_str();
+ }
};
class SampleProfileLoaderLegacyPass : public ModulePass {
@@ -605,7 +615,7 @@ ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
// call instruction should have 0 count.
// For CS profile, the callsite count of previously inlined callees is
// populated with the entry count of the callees.
- if (!ProfileIsCSFlat)
+ if (!FunctionSamples::ProfileIsCS)
if (const auto *CB = dyn_cast<CallBase>(&Inst))
if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
return 0;
@@ -644,7 +654,7 @@ ErrorOr<uint64_t> SampleProfileLoader::getProbeWeight(const Instruction &Inst) {
// call instruction should have 0 count.
// For CS profile, the callsite count of previously inlined callees is
// populated with the entry count of the callees.
- if (!ProfileIsCSFlat)
+ if (!FunctionSamples::ProfileIsCS)
if (const auto *CB = dyn_cast<CallBase>(&Inst))
if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
return 0;
@@ -698,7 +708,7 @@ SampleProfileLoader::findCalleeFunctionSamples(const CallBase &Inst) const {
if (Function *Callee = Inst.getCalledFunction())
CalleeName = Callee->getName();
- if (ProfileIsCSFlat)
+ if (FunctionSamples::ProfileIsCS)
return ContextTracker->getCalleeContextSamplesFor(Inst, CalleeName);
const FunctionSamples *FS = findFunctionSamples(Inst);
@@ -730,7 +740,7 @@ SampleProfileLoader::findIndirectCallFunctionSamples(
FunctionSamples::getGUID(R->getName());
};
- if (ProfileIsCSFlat) {
+ if (FunctionSamples::ProfileIsCS) {
auto CalleeSamples =
ContextTracker->getIndirectCalleeContextSamplesFor(DIL);
if (CalleeSamples.empty())
@@ -783,7 +793,7 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
auto it = DILocation2SampleMap.try_emplace(DIL,nullptr);
if (it.second) {
- if (ProfileIsCSFlat)
+ if (FunctionSamples::ProfileIsCS)
it.first->second = ContextTracker->getContextSamplesFor(DIL);
else
it.first->second =
@@ -839,6 +849,13 @@ static void
updateIDTMetaData(Instruction &Inst,
const SmallVectorImpl<InstrProfValueData> &CallTargets,
uint64_t Sum) {
+ // Bail out early if MaxNumPromotions is zero.
+ // This prevents allocating an array of zero length below.
+ //
+ // Note `updateIDTMetaData` is called in two places so check
+ // `MaxNumPromotions` inside it.
+ if (MaxNumPromotions == 0)
+ return;
uint32_t NumVals = 0;
// OldSum is the existing total count in the value profile data.
uint64_t OldSum = 0;
@@ -922,6 +939,14 @@ updateIDTMetaData(Instruction &Inst,
bool SampleProfileLoader::tryPromoteAndInlineCandidate(
Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum,
SmallVector<CallBase *, 8> *InlinedCallSite) {
+ // Bail out early if sample-loader inliner is disabled.
+ if (DisableSampleLoaderInlining)
+ return false;
+
+ // Bail out early if MaxNumPromotions is zero.
+ // This prevents allocating an array of zero length in callees below.
+ if (MaxNumPromotions == 0)
+ return false;
auto CalleeFunctionName = Candidate.CalleeSamples->getFuncName();
auto R = SymbolMap.find(CalleeFunctionName);
if (R == SymbolMap.end() || !R->getValue())
@@ -1009,8 +1034,9 @@ void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
for (auto I : Candidates) {
Function *CalledFunction = I->getCalledFunction();
if (CalledFunction) {
- ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineAttempt",
- I->getDebugLoc(), I->getParent())
+ ORE->emit(OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(),
+ "InlineAttempt", I->getDebugLoc(),
+ I->getParent())
<< "previous inlining reattempted for "
<< (Hot ? "hotness: '" : "size: '")
<< ore::NV("Callee", CalledFunction) << "' into '"
@@ -1042,13 +1068,12 @@ void SampleProfileLoader::findExternalInlineCandidate(
// For AutoFDO profile, retrieve candidate profiles by walking over
// the nested inlinee profiles.
- if (!ProfileIsCSFlat) {
+ if (!FunctionSamples::ProfileIsCS) {
Samples->findInlinedFunctions(InlinedGUIDs, SymbolMap, Threshold);
return;
}
- ContextTrieNode *Caller =
- ContextTracker->getContextFor(Samples->getContext());
+ ContextTrieNode *Caller = ContextTracker->getContextNodeForProfile(Samples);
std::queue<ContextTrieNode *> CalleeList;
CalleeList.push(Caller);
while (!CalleeList.empty()) {
@@ -1098,11 +1123,20 @@ void SampleProfileLoader::findExternalInlineCandidate(
/// Iteratively inline hot callsites of a function.
///
-/// Iteratively traverse all callsites of the function \p F, and find if
-/// the corresponding inlined instance exists and is hot in profile. If
-/// it is hot enough, inline the callsites and adds new callsites of the
-/// callee into the caller. If the call is an indirect call, first promote
-/// it to direct call. Each indirect call is limited with a single target.
+/// Iteratively traverse all callsites of the function \p F, so as to
+/// find out callsites with corresponding inline instances.
+///
+/// For such callsites,
+/// - If it is hot enough, inline the callsites and adds callsites of the callee
+/// into the caller. If the call is an indirect call, first promote
+/// it to direct call. Each indirect call is limited with a single target.
+///
+/// - If a callsite is not inlined, merge the its profile to the outline
+/// version (if --sample-profile-merge-inlinee is true), or scale the
+/// counters of standalone function based on the profile of inlined
+/// instances (if --sample-profile-merge-inlinee is false).
+///
+/// Later passes may consume the updated profiles.
///
/// \param F function to perform iterative inlining.
/// \param InlinedGUIDs a set to be updated to include all GUIDs that are
@@ -1137,7 +1171,7 @@ bool SampleProfileLoader::inlineHotFunctions(
assert((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) &&
"GUIDToFuncNameMap has to be populated");
AllCandidates.push_back(CB);
- if (FS->getEntrySamples() > 0 || ProfileIsCSFlat)
+ if (FS->getEntrySamples() > 0 || FunctionSamples::ProfileIsCS)
LocalNotInlinedCallSites.try_emplace(CB, FS);
if (callsiteIsHot(FS, PSI, ProfAccForSymsInList))
Hot = true;
@@ -1200,13 +1234,17 @@ bool SampleProfileLoader::inlineHotFunctions(
// For CS profile, profile for not inlined context will be merged when
// base profile is being retrieved.
- if (!FunctionSamples::ProfileIsCSFlat)
+ if (!FunctionSamples::ProfileIsCS)
promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites, F);
return Changed;
}
bool SampleProfileLoader::tryInlineCandidate(
InlineCandidate &Candidate, SmallVector<CallBase *, 8> *InlinedCallSites) {
+ // Do not attempt to inline a candidate if
+ // --disable-sample-loader-inlining is true.
+ if (DisableSampleLoaderInlining)
+ return false;
CallBase &CB = *Candidate.CallInstr;
Function *CalledFunction = CB.getCalledFunction();
@@ -1216,7 +1254,8 @@ bool SampleProfileLoader::tryInlineCandidate(
InlineCost Cost = shouldInlineCandidate(Candidate);
if (Cost.isNever()) {
- ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineFail", DLoc, BB)
+ ORE->emit(OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(),
+ "InlineFail", DLoc, BB)
<< "incompatible inlining");
return false;
}
@@ -1226,45 +1265,45 @@ bool SampleProfileLoader::tryInlineCandidate(
InlineFunctionInfo IFI(nullptr, GetAC);
IFI.UpdateProfile = false;
- if (InlineFunction(CB, IFI).isSuccess()) {
- // Merge the attributes based on the inlining.
- AttributeFuncs::mergeAttributesForInlining(*BB->getParent(),
- *CalledFunction);
-
- // The call to InlineFunction erases I, so we can't pass it here.
- emitInlinedIntoBasedOnCost(*ORE, DLoc, BB, *CalledFunction,
- *BB->getParent(), Cost, true, CSINLINE_DEBUG);
-
- // Now populate the list of newly exposed call sites.
- if (InlinedCallSites) {
- InlinedCallSites->clear();
- for (auto &I : IFI.InlinedCallSites)
- InlinedCallSites->push_back(I);
- }
+ if (!InlineFunction(CB, IFI).isSuccess())
+ return false;
- if (ProfileIsCSFlat)
- ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
- ++NumCSInlined;
-
- // Prorate inlined probes for a duplicated inlining callsite which probably
- // has a distribution less than 100%. Samples for an inlinee should be
- // distributed among the copies of the original callsite based on each
- // callsite's distribution factor for counts accuracy. Note that an inlined
- // probe may come with its own distribution factor if it has been duplicated
- // in the inlinee body. The two factor are multiplied to reflect the
- // aggregation of duplication.
- if (Candidate.CallsiteDistribution < 1) {
- for (auto &I : IFI.InlinedCallSites) {
- if (Optional<PseudoProbe> Probe = extractProbe(*I))
- setProbeDistributionFactor(*I, Probe->Factor *
- Candidate.CallsiteDistribution);
- }
- NumDuplicatedInlinesite++;
- }
+ // Merge the attributes based on the inlining.
+ AttributeFuncs::mergeAttributesForInlining(*BB->getParent(),
+ *CalledFunction);
- return true;
+ // The call to InlineFunction erases I, so we can't pass it here.
+ emitInlinedIntoBasedOnCost(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(),
+ Cost, true, getAnnotatedRemarkPassName());
+
+ // Now populate the list of newly exposed call sites.
+ if (InlinedCallSites) {
+ InlinedCallSites->clear();
+ for (auto &I : IFI.InlinedCallSites)
+ InlinedCallSites->push_back(I);
}
- return false;
+
+ if (FunctionSamples::ProfileIsCS)
+ ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
+ ++NumCSInlined;
+
+ // Prorate inlined probes for a duplicated inlining callsite which probably
+ // has a distribution less than 100%. Samples for an inlinee should be
+ // distributed among the copies of the original callsite based on each
+ // callsite's distribution factor for counts accuracy. Note that an inlined
+ // probe may come with its own distribution factor if it has been duplicated
+ // in the inlinee body. The two factor are multiplied to reflect the
+ // aggregation of duplication.
+ if (Candidate.CallsiteDistribution < 1) {
+ for (auto &I : IFI.InlinedCallSites) {
+ if (Optional<PseudoProbe> Probe = extractProbe(*I))
+ setProbeDistributionFactor(*I, Probe->Factor *
+ Candidate.CallsiteDistribution);
+ }
+ NumDuplicatedInlinesite++;
+ }
+
+ return true;
}
bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
@@ -1285,14 +1324,8 @@ bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
if (Optional<PseudoProbe> Probe = extractProbe(*CB))
Factor = Probe->Factor;
- uint64_t CallsiteCount = 0;
- ErrorOr<uint64_t> Weight = getBlockWeight(CB->getParent());
- if (Weight)
- CallsiteCount = Weight.get();
- if (CalleeSamples)
- CallsiteCount = std::max(
- CallsiteCount, uint64_t(CalleeSamples->getEntrySamples() * Factor));
-
+ uint64_t CallsiteCount =
+ CalleeSamples ? CalleeSamples->getEntrySamples() * Factor : 0;
*NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor};
return true;
}
@@ -1387,7 +1420,6 @@ SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
bool SampleProfileLoader::inlineHotFunctionsWithPriority(
Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
-
// ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
// Profile symbol list is ignored when profile-sample-accurate is on.
assert((!ProfAccForSymsInList ||
@@ -1513,7 +1545,7 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority(
// For CS profile, profile for not inlined context will be merged when
// base profile is being retrieved.
- if (!FunctionSamples::ProfileIsCSFlat)
+ if (!FunctionSamples::ProfileIsCS)
promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites, F);
return Changed;
}
@@ -1528,11 +1560,11 @@ void SampleProfileLoader::promoteMergeNotInlinedContextSamples(
if (!Callee || Callee->isDeclaration())
continue;
- ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "NotInline",
- I->getDebugLoc(), I->getParent())
- << "previous inlining not repeated: '"
- << ore::NV("Callee", Callee) << "' into '"
- << ore::NV("Caller", &F) << "'");
+ ORE->emit(
+ OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(), "NotInline",
+ I->getDebugLoc(), I->getParent())
+ << "previous inlining not repeated: '" << ore::NV("Callee", Callee)
+ << "' into '" << ore::NV("Caller", &F) << "'");
++NumCSNotInlined;
const FunctionSamples *FS = Pair.getSecond();
@@ -1540,6 +1572,10 @@ void SampleProfileLoader::promoteMergeNotInlinedContextSamples(
continue;
}
+ // Do not merge a context that is already duplicated into the base profile.
+ if (FS->getContext().hasAttribute(sampleprof::ContextDuplicatedIntoBase))
+ continue;
+
if (ProfileMergeInlinee) {
// A function call can be replicated by optimizations like callsite
// splitting or jump threading and the replicates end up sharing the
@@ -1623,7 +1659,7 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) {
// With CSSPGO all indirect call targets are counted torwards the
// original indirect call site in the profile, including both
// inlined and non-inlined targets.
- if (!FunctionSamples::ProfileIsCSFlat) {
+ if (!FunctionSamples::ProfileIsCS) {
if (const FunctionSamplesMap *M =
FS->findFunctionSamplesMapAt(CallSite)) {
for (const auto &NameFS : *M)
@@ -1714,6 +1750,11 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) {
}
}
+ // FIXME: Re-enable for sample profiling after investigating why the sum
+ // of branch weights can be 0
+ //
+ // misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false);
+
uint64_t TempWeight;
// Only set weights if there is at least one non-zero weight.
// In any other case, let the analyzer set weights.
@@ -1798,7 +1839,7 @@ INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",
std::unique_ptr<ProfiledCallGraph>
SampleProfileLoader::buildProfiledCallGraph(CallGraph &CG) {
std::unique_ptr<ProfiledCallGraph> ProfiledCG;
- if (ProfileIsCSFlat)
+ if (FunctionSamples::ProfileIsCS)
ProfiledCG = std::make_unique<ProfiledCallGraph>(*ContextTracker);
else
ProfiledCG = std::make_unique<ProfiledCallGraph>(Reader->getProfiles());
@@ -1843,8 +1884,8 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
assert(&CG->getModule() == &M);
- if (UseProfiledCallGraph ||
- (ProfileIsCSFlat && !UseProfiledCallGraph.getNumOccurrences())) {
+ if (UseProfiledCallGraph || (FunctionSamples::ProfileIsCS &&
+ !UseProfiledCallGraph.getNumOccurrences())) {
// Use profiled call edges to augment the top-down order. There are cases
// that the top-down order computed based on the static call graph doesn't
// reflect real execution order. For example
@@ -1973,40 +2014,50 @@ bool SampleProfileLoader::doInitialization(Module &M,
ProfileInlineReplayScope,
ProfileInlineReplayFallback,
{ProfileInlineReplayFormat}},
- /*EmitRemarks=*/false);
+ /*EmitRemarks=*/false, InlineContext{LTOPhase, InlinePass::ReplaySampleProfileInliner});
}
- // Apply tweaks if context-sensitive profile is available.
- if (Reader->profileIsCSFlat() || Reader->profileIsCSNested()) {
- ProfileIsCSFlat = Reader->profileIsCSFlat();
+ // Apply tweaks if context-sensitive or probe-based profile is available.
+ if (Reader->profileIsCS() || Reader->profileIsPreInlined() ||
+ Reader->profileIsProbeBased()) {
+ if (!UseIterativeBFIInference.getNumOccurrences())
+ UseIterativeBFIInference = true;
+ if (!SampleProfileUseProfi.getNumOccurrences())
+ SampleProfileUseProfi = true;
+ if (!EnableExtTspBlockPlacement.getNumOccurrences())
+ EnableExtTspBlockPlacement = true;
// Enable priority-base inliner and size inline by default for CSSPGO.
if (!ProfileSizeInline.getNumOccurrences())
ProfileSizeInline = true;
if (!CallsitePrioritizedInline.getNumOccurrences())
CallsitePrioritizedInline = true;
-
- // For CSSPGO, use preinliner decision by default when available.
- if (!UsePreInlinerDecision.getNumOccurrences())
- UsePreInlinerDecision = true;
-
// For CSSPGO, we also allow recursive inline to best use context profile.
if (!AllowRecursiveInline.getNumOccurrences())
AllowRecursiveInline = true;
- // Enable iterative-BFI by default for CSSPGO.
- if (!UseIterativeBFIInference.getNumOccurrences())
- UseIterativeBFIInference = true;
- // Enable Profi by default for CSSPGO.
- if (!SampleProfileUseProfi.getNumOccurrences())
- SampleProfileUseProfi = true;
+ if (Reader->profileIsPreInlined()) {
+ if (!UsePreInlinerDecision.getNumOccurrences())
+ UsePreInlinerDecision = true;
+ }
- if (FunctionSamples::ProfileIsCSFlat) {
- // Tracker for profiles under different context
- ContextTracker = std::make_unique<SampleContextTracker>(
- Reader->getProfiles(), &GUIDToFuncNameMap);
+ if (!Reader->profileIsCS()) {
+ // Non-CS profile should be fine without a function size budget for the
+ // inliner since the contexts in the profile are either all from inlining
+ // in the prevoius build or pre-computed by the preinliner with a size
+ // cap, thus they are bounded.
+ if (!ProfileInlineLimitMin.getNumOccurrences())
+ ProfileInlineLimitMin = std::numeric_limits<unsigned>::max();
+ if (!ProfileInlineLimitMax.getNumOccurrences())
+ ProfileInlineLimitMax = std::numeric_limits<unsigned>::max();
}
}
+ if (Reader->profileIsCS()) {
+ // Tracker for profiles under different context
+ ContextTracker = std::make_unique<SampleContextTracker>(
+ Reader->getProfiles(), &GUIDToFuncNameMap);
+ }
+
// Load pseudo probe descriptors for probe-based function samples.
if (Reader->profileIsProbeBased()) {
ProbeManager = std::make_unique<PseudoProbeManager>(M);
@@ -2082,7 +2133,7 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
}
// Account for cold calls not inlined....
- if (!ProfileIsCSFlat)
+ if (!FunctionSamples::ProfileIsCS)
for (const std::pair<Function *, NotInlinedProfileInfo> &pair :
notInlinedCallInfo)
updateProfileCallee(pair.first, pair.second.entryCount);
@@ -2145,7 +2196,7 @@ bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM)
// Initialize entry count when the function has no existing entry
// count value.
- if (!F.getEntryCount().hasValue())
+ if (!F.getEntryCount())
F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real));
std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
if (AM) {
@@ -2158,7 +2209,7 @@ bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM)
ORE = OwnedORE.get();
}
- if (ProfileIsCSFlat)
+ if (FunctionSamples::ProfileIsCS)
Samples = ContextTracker->getBaseSamplesFor(F);
else
Samples = Reader->getSamplesFor(F);
diff --git a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
index e104ae00e916..d1ab2649ee2e 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
@@ -13,21 +13,19 @@
#include "llvm/Transforms/IPO/SampleProfileProbe.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CFG.h"
-#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/PseudoProbe.h"
#include "llvm/ProfileData/SampleProf.h"
#include "llvm/Support/CRC.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <unordered_set>
@@ -416,7 +414,7 @@ void PseudoProbeUpdatePass::runOnFunction(Function &F,
FunctionAnalysisManager &FAM) {
BlockFrequencyInfo &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
auto BBProfileCount = [&BFI](BasicBlock *BB) {
- return BFI.getBlockProfileCount(BB).getValueOr(0);
+ return BFI.getBlockProfileCount(BB).value_or(0);
};
// Collect the sum of execution weight for each probe.
diff --git a/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp b/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp
index 95393d9476e0..c7d54b8cdeb0 100644
--- a/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp
+++ b/llvm/lib/Transforms/IPO/SyntheticCountsPropagation.cpp
@@ -25,18 +25,13 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/SyntheticCountsPropagation.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/SyntheticCountsUtils.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
using namespace llvm;
using Scaled64 = ScaledNumber<uint64_t>;
@@ -47,18 +42,17 @@ using ProfileCount = Function::ProfileCount;
namespace llvm {
cl::opt<int>
InitialSyntheticCount("initial-synthetic-count", cl::Hidden, cl::init(10),
- cl::ZeroOrMore,
cl::desc("Initial value of synthetic entry count"));
} // namespace llvm
/// Initial synthetic count assigned to inline functions.
static cl::opt<int> InlineSyntheticCount(
- "inline-synthetic-count", cl::Hidden, cl::init(15), cl::ZeroOrMore,
+ "inline-synthetic-count", cl::Hidden, cl::init(15),
cl::desc("Initial synthetic entry count for inline functions."));
/// Initial synthetic count assigned to cold functions.
static cl::opt<int> ColdSyntheticCount(
- "cold-synthetic-count", cl::Hidden, cl::init(5), cl::ZeroOrMore,
+ "cold-synthetic-count", cl::Hidden, cl::init(5),
cl::desc("Initial synthetic entry count for cold functions."));
// Assign initial synthetic entry counts to functions.
diff --git a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index 52708ff2f226..a360a768a2bc 100644
--- a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -21,7 +21,6 @@
#include "llvm/InitializePasses.h"
#include "llvm/Object/ModuleSymbolTable.h"
#include "llvm/Pass.h"
-#include "llvm/Support/ScopedPrinter.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/FunctionAttrs.h"
@@ -311,7 +310,8 @@ void splitAndWriteThinLTOBitcode(
return;
}
if (!F->isDeclaration() &&
- computeFunctionBodyMemoryAccess(*F, AARGetter(*F)) == MAK_ReadNone)
+ computeFunctionBodyMemoryAccess(*F, AARGetter(*F)) ==
+ FMRB_DoesNotAccessMemory)
EligibleVirtualFns.insert(F);
});
}
@@ -542,11 +542,11 @@ class WriteThinLTOBitcode : public ModulePass {
raw_ostream &OS; // raw_ostream to print on
// The output stream on which to emit a minimized module for use
// just in the thin link, if requested.
- raw_ostream *ThinLinkOS;
+ raw_ostream *ThinLinkOS = nullptr;
public:
static char ID; // Pass identification, replacement for typeid
- WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()), ThinLinkOS(nullptr) {
+ WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()) {
initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
}
diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
index 8b30f0e989a1..898a213d0849 100644
--- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -57,6 +57,7 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AssumptionCache.h"
@@ -79,6 +80,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ModuleSummaryIndexYAML.h"
@@ -95,6 +97,7 @@
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/CallPromotionUtils.h"
#include "llvm/Transforms/Utils/Evaluator.h"
#include <algorithm>
#include <cstddef>
@@ -107,6 +110,15 @@ using namespace wholeprogramdevirt;
#define DEBUG_TYPE "wholeprogramdevirt"
+STATISTIC(NumDevirtTargets, "Number of whole program devirtualization targets");
+STATISTIC(NumSingleImpl, "Number of single implementation devirtualizations");
+STATISTIC(NumBranchFunnel, "Number of branch funnels");
+STATISTIC(NumUniformRetVal, "Number of uniform return value optimizations");
+STATISTIC(NumUniqueRetVal, "Number of unique return value optimizations");
+STATISTIC(NumVirtConstProp1Bit,
+ "Number of 1 bit virtual constant propagations");
+STATISTIC(NumVirtConstProp, "Number of virtual constant propagations");
+
static cl::opt<PassSummaryAction> ClSummaryAction(
"wholeprogramdevirt-summary-action",
cl::desc("What to do with the summary when running this pass"),
@@ -132,13 +144,12 @@ static cl::opt<std::string> ClWriteSummary(
static cl::opt<unsigned>
ClThreshold("wholeprogramdevirt-branch-funnel-threshold", cl::Hidden,
- cl::init(10), cl::ZeroOrMore,
+ cl::init(10),
cl::desc("Maximum number of call targets per "
"call site to enable branch funnels"));
static cl::opt<bool>
PrintSummaryDevirt("wholeprogramdevirt-print-index-based", cl::Hidden,
- cl::init(false), cl::ZeroOrMore,
cl::desc("Print index-based devirtualization messages"));
/// Provide a way to force enable whole program visibility in tests.
@@ -146,30 +157,34 @@ static cl::opt<bool>
/// !vcall_visibility metadata (the mere presense of type tests
/// previously implied hidden visibility).
static cl::opt<bool>
- WholeProgramVisibility("whole-program-visibility", cl::init(false),
- cl::Hidden, cl::ZeroOrMore,
+ WholeProgramVisibility("whole-program-visibility", cl::Hidden,
cl::desc("Enable whole program visibility"));
/// Provide a way to force disable whole program for debugging or workarounds,
/// when enabled via the linker.
static cl::opt<bool> DisableWholeProgramVisibility(
- "disable-whole-program-visibility", cl::init(false), cl::Hidden,
- cl::ZeroOrMore,
+ "disable-whole-program-visibility", cl::Hidden,
cl::desc("Disable whole program visibility (overrides enabling options)"));
/// Provide way to prevent certain function from being devirtualized
static cl::list<std::string>
SkipFunctionNames("wholeprogramdevirt-skip",
cl::desc("Prevent function(s) from being devirtualized"),
- cl::Hidden, cl::ZeroOrMore, cl::CommaSeparated);
-
-/// Mechanism to add runtime checking of devirtualization decisions, trapping on
-/// any that are not correct. Useful for debugging undefined behavior leading to
-/// failures with WPD.
-static cl::opt<bool>
- CheckDevirt("wholeprogramdevirt-check", cl::init(false), cl::Hidden,
- cl::ZeroOrMore,
- cl::desc("Add code to trap on incorrect devirtualizations"));
+ cl::Hidden, cl::CommaSeparated);
+
+/// Mechanism to add runtime checking of devirtualization decisions, optionally
+/// trapping or falling back to indirect call on any that are not correct.
+/// Trapping mode is useful for debugging undefined behavior leading to failures
+/// with WPD. Fallback mode is useful for ensuring safety when whole program
+/// visibility may be compromised.
+enum WPDCheckMode { None, Trap, Fallback };
+static cl::opt<WPDCheckMode> DevirtCheckMode(
+ "wholeprogramdevirt-check", cl::Hidden,
+ cl::desc("Type of checking for incorrect devirtualizations"),
+ cl::values(clEnumValN(WPDCheckMode::None, "none", "No checking"),
+ clEnumValN(WPDCheckMode::Trap, "trap", "Trap when incorrect"),
+ clEnumValN(WPDCheckMode::Fallback, "fallback",
+ "Fallback to indirect when incorrect")));
namespace {
struct PatternList {
@@ -866,13 +881,14 @@ void updateVCallVisibilityInIndex(
if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO))
return;
for (auto &P : Index) {
+ // Don't upgrade the visibility for symbols exported to the dynamic
+ // linker, as we have no information on their eventual use.
+ if (DynamicExportSymbols.count(P.first))
+ continue;
for (auto &S : P.second.SummaryList) {
auto *GVar = dyn_cast<GlobalVarSummary>(S.get());
if (!GVar ||
- GVar->getVCallVisibility() != GlobalObject::VCallVisibilityPublic ||
- // Don't upgrade the visibility for symbols exported to the dynamic
- // linker, as we have no information on their eventual use.
- DynamicExportSymbols.count(P.first))
+ GVar->getVCallVisibility() != GlobalObject::VCallVisibilityPublic)
continue;
GVar->setVCallVisibility(GlobalObject::VCallVisibilityLinkageUnit);
}
@@ -1133,16 +1149,17 @@ void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,
if (RemarksEnabled)
VCallSite.emitRemark("single-impl",
TheFn->stripPointerCasts()->getName(), OREGetter);
+ NumSingleImpl++;
auto &CB = VCallSite.CB;
assert(!CB.getCalledFunction() && "devirtualizing direct call?");
IRBuilder<> Builder(&CB);
Value *Callee =
Builder.CreateBitCast(TheFn, CB.getCalledOperand()->getType());
- // If checking is enabled, add support to compare the virtual function
- // pointer to the devirtualized target. In case of a mismatch, perform a
- // debug trap.
- if (CheckDevirt) {
+ // If trap checking is enabled, add support to compare the virtual
+ // function pointer to the devirtualized target. In case of a mismatch,
+ // perform a debug trap.
+ if (DevirtCheckMode == WPDCheckMode::Trap) {
auto *Cond = Builder.CreateICmpNE(CB.getCalledOperand(), Callee);
Instruction *ThenTerm =
SplitBlockAndInsertIfThen(Cond, &CB, /*Unreachable=*/false);
@@ -1152,8 +1169,38 @@ void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,
CallTrap->setDebugLoc(CB.getDebugLoc());
}
- // Devirtualize.
- CB.setCalledOperand(Callee);
+ // If fallback checking is enabled, add support to compare the virtual
+ // function pointer to the devirtualized target. In case of a mismatch,
+ // fall back to indirect call.
+ if (DevirtCheckMode == WPDCheckMode::Fallback) {
+ MDNode *Weights =
+ MDBuilder(M.getContext()).createBranchWeights((1U << 20) - 1, 1);
+ // Version the indirect call site. If the called value is equal to the
+ // given callee, 'NewInst' will be executed, otherwise the original call
+ // site will be executed.
+ CallBase &NewInst = versionCallSite(CB, Callee, Weights);
+ NewInst.setCalledOperand(Callee);
+ // Since the new call site is direct, we must clear metadata that
+ // is only appropriate for indirect calls. This includes !prof and
+ // !callees metadata.
+ NewInst.setMetadata(LLVMContext::MD_prof, nullptr);
+ NewInst.setMetadata(LLVMContext::MD_callees, nullptr);
+ // Additionally, we should remove them from the fallback indirect call,
+ // so that we don't attempt to perform indirect call promotion later.
+ CB.setMetadata(LLVMContext::MD_prof, nullptr);
+ CB.setMetadata(LLVMContext::MD_callees, nullptr);
+ }
+
+ // In either trapping or non-checking mode, devirtualize original call.
+ else {
+ // Devirtualize unconditionally.
+ CB.setCalledOperand(Callee);
+ // Since the call site is now direct, we must clear metadata that
+ // is only appropriate for indirect calls. This includes !prof and
+ // !callees metadata.
+ CB.setMetadata(LLVMContext::MD_prof, nullptr);
+ CB.setMetadata(LLVMContext::MD_callees, nullptr);
+ }
// This use is no longer unsafe.
if (VCallSite.NumUnsafeUses)
@@ -1208,7 +1255,7 @@ bool DevirtModule::trySingleImplDevirt(
return false;
// If so, update each call site to call that implementation directly.
- if (RemarksEnabled)
+ if (RemarksEnabled || AreStatisticsEnabled())
TargetsForSlot[0].WasDevirt = true;
bool IsExported = false;
@@ -1279,7 +1326,7 @@ bool DevirtIndex::trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot,
return false;
// Collect functions devirtualized at least for one call site for stats.
- if (PrintSummaryDevirt)
+ if (PrintSummaryDevirt || AreStatisticsEnabled())
DevirtTargets.insert(TheFn);
auto &S = TheFn.getSummaryList()[0];
@@ -1385,6 +1432,7 @@ void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
!FSAttr.getValueAsString().contains("+retpoline"))
continue;
+ NumBranchFunnel++;
if (RemarksEnabled)
VCallSite.emitRemark("branch-funnel",
JT->stripPointerCasts()->getName(), OREGetter);
@@ -1476,6 +1524,7 @@ void DevirtModule::applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
for (auto Call : CSInfo.CallSites) {
if (!OptimizedCalls.insert(&Call.CB).second)
continue;
+ NumUniformRetVal++;
Call.replaceAndErase(
"uniform-ret-val", FnName, RemarksEnabled, OREGetter,
ConstantInt::get(cast<IntegerType>(Call.CB.getType()), TheRetVal));
@@ -1499,7 +1548,7 @@ bool DevirtModule::tryUniformRetValOpt(
}
applyUniformRetValOpt(CSInfo, TargetsForSlot[0].Fn->getName(), TheRetVal);
- if (RemarksEnabled)
+ if (RemarksEnabled || AreStatisticsEnabled())
for (auto &&Target : TargetsForSlot)
Target.WasDevirt = true;
return true;
@@ -1592,6 +1641,7 @@ void DevirtModule::applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
B.CreateICmp(IsOne ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE, Call.VTable,
B.CreateBitCast(UniqueMemberAddr, Call.VTable->getType()));
Cmp = B.CreateZExt(Cmp, Call.CB.getType());
+ NumUniqueRetVal++;
Call.replaceAndErase("unique-ret-val", FnName, RemarksEnabled, OREGetter,
Cmp);
}
@@ -1636,7 +1686,7 @@ bool DevirtModule::tryUniqueRetValOpt(
UniqueMemberAddr);
// Update devirtualization statistics for targets.
- if (RemarksEnabled)
+ if (RemarksEnabled || AreStatisticsEnabled())
for (auto &&Target : TargetsForSlot)
Target.WasDevirt = true;
@@ -1665,11 +1715,13 @@ void DevirtModule::applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,
Value *Bits = B.CreateLoad(Int8Ty, Addr);
Value *BitsAndBit = B.CreateAnd(Bits, Bit);
auto IsBitSet = B.CreateICmpNE(BitsAndBit, ConstantInt::get(Int8Ty, 0));
+ NumVirtConstProp1Bit++;
Call.replaceAndErase("virtual-const-prop-1-bit", FnName, RemarksEnabled,
OREGetter, IsBitSet);
} else {
Value *ValAddr = B.CreateBitCast(Addr, RetType->getPointerTo());
Value *Val = B.CreateLoad(RetType, ValAddr);
+ NumVirtConstProp++;
Call.replaceAndErase("virtual-const-prop", FnName, RemarksEnabled,
OREGetter, Val);
}
@@ -1701,7 +1753,7 @@ bool DevirtModule::tryVirtualConstProp(
for (VirtualCallTarget &Target : TargetsForSlot) {
if (Target.Fn->isDeclaration() ||
computeFunctionBodyMemoryAccess(*Target.Fn, AARGetter(*Target.Fn)) !=
- MAK_ReadNone ||
+ FMRB_DoesNotAccessMemory ||
Target.Fn->arg_empty() || !Target.Fn->arg_begin()->use_empty() ||
Target.Fn->getReturnType() != RetType)
return false;
@@ -1755,7 +1807,7 @@ bool DevirtModule::tryVirtualConstProp(
setAfterReturnValues(TargetsForSlot, AllocAfter, BitWidth, OffsetByte,
OffsetBit);
- if (RemarksEnabled)
+ if (RemarksEnabled || AreStatisticsEnabled())
for (auto &&Target : TargetsForSlot)
Target.WasDevirt = true;
@@ -1963,7 +2015,7 @@ void DevirtModule::scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc) {
// (although this is unlikely). In that case, explicitly build a pair and
// RAUW it.
if (!CI->use_empty()) {
- Value *Pair = UndefValue::get(CI->getType());
+ Value *Pair = PoisonValue::get(CI->getType());
IRBuilder<> B(CI);
Pair = B.CreateInsertValue(Pair, LoadedValue, {0});
Pair = B.CreateInsertValue(Pair, TypeTestCall, {1});
@@ -2151,9 +2203,9 @@ bool DevirtModule::run() {
removeRedundantTypeTests();
- // We have lowered or deleted the type instrinsics, so we will no
- // longer have enough information to reason about the liveness of virtual
- // function pointers in GlobalDCE.
+ // We have lowered or deleted the type intrinsics, so we will no longer have
+ // enough information to reason about the liveness of virtual function
+ // pointers in GlobalDCE.
for (GlobalVariable &GV : M.globals())
GV.eraseMetadata(LLVMContext::MD_vcall_visibility);
@@ -2243,7 +2295,7 @@ bool DevirtModule::run() {
}
// Collect functions devirtualized at least for one call site for stats.
- if (RemarksEnabled)
+ if (RemarksEnabled || AreStatisticsEnabled())
for (const auto &T : TargetsForSlot)
if (T.WasDevirt)
DevirtTargets[std::string(T.Fn->getName())] = T.Fn;
@@ -2276,6 +2328,8 @@ bool DevirtModule::run() {
}
}
+ NumDevirtTargets += DevirtTargets.size();
+
removeRedundantTypeTests();
// Rebuild each global we touched as part of virtual constant propagation to
@@ -2284,9 +2338,9 @@ bool DevirtModule::run() {
for (VTableBits &B : Bits)
rebuildGlobal(B);
- // We have lowered or deleted the type instrinsics, so we will no
- // longer have enough information to reason about the liveness of virtual
- // function pointers in GlobalDCE.
+ // We have lowered or deleted the type intrinsics, so we will no longer have
+ // enough information to reason about the liveness of virtual function
+ // pointers in GlobalDCE.
for (GlobalVariable &GV : M.globals())
GV.eraseMetadata(LLVMContext::MD_vcall_visibility);
@@ -2367,4 +2421,6 @@ void DevirtIndex::run() {
if (PrintSummaryDevirt)
for (const auto &DT : DevirtTargets)
errs() << "Devirtualized call to " << DT << "\n";
+
+ NumDevirtTargets += DevirtTargets.size();
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 0598f751febe..f4d8b79a5311 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -693,9 +693,6 @@ unsigned FAddCombine::calcInstrNumber(const AddendVect &Opnds) {
unsigned OpndNum = Opnds.size();
unsigned InstrNeeded = OpndNum - 1;
- // The number of addends in the form of "(-1)*x".
- unsigned NegOpndNum = 0;
-
// Adjust the number of instructions needed to emit the N-ary add.
for (const FAddend *Opnd : Opnds) {
if (Opnd->isConstant())
@@ -707,9 +704,6 @@ unsigned FAddCombine::calcInstrNumber(const AddendVect &Opnds) {
continue;
const FAddendCoef &CE = Opnd->getCoef();
- if (CE.isMinusOne() || CE.isMinusTwo())
- NegOpndNum++;
-
// Let the addend be "c * x". If "c == +/-1", the value of the addend
// is immediately available; otherwise, it needs exactly one instruction
// to evaluate the value.
@@ -1277,7 +1271,7 @@ static Instruction *factorizeMathWithShlOps(BinaryOperator &I,
}
Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
- if (Value *V = SimplifyAddInst(I.getOperand(0), I.getOperand(1),
+ if (Value *V = simplifyAddInst(I.getOperand(0), I.getOperand(1),
I.hasNoSignedWrap(), I.hasNoUnsignedWrap(),
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
@@ -1375,6 +1369,13 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
}
}
+ // (A & 2^C1) + A => A & (2^C1 - 1) iff bit C1 in A is a sign bit
+ if (match(&I, m_c_Add(m_And(m_Value(A), m_APInt(C1)), m_Deferred(A))) &&
+ C1->isPowerOf2() && (ComputeNumSignBits(A) > C1->countLeadingZeros())) {
+ Constant *NewMask = ConstantInt::get(RHS->getType(), *C1 - 1);
+ return BinaryOperator::CreateAnd(A, NewMask);
+ }
+
// A+B --> A|B iff A and B have no bits set in common.
if (haveNoCommonBitsSet(LHS, RHS, DL, &AC, &I, &DT))
return BinaryOperator::CreateOr(LHS, RHS);
@@ -1528,7 +1529,7 @@ static Instruction *factorizeFAddFSub(BinaryOperator &I,
}
Instruction *InstCombinerImpl::visitFAdd(BinaryOperator &I) {
- if (Value *V = SimplifyFAddInst(I.getOperand(0), I.getOperand(1),
+ if (Value *V = simplifyFAddInst(I.getOperand(0), I.getOperand(1),
I.getFastMathFlags(),
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
@@ -1687,7 +1688,8 @@ Value *InstCombinerImpl::OptimizePointerDifference(Value *LHS, Value *RHS,
// Require at least one GEP with a common base pointer on both sides.
if (auto *LHSGEP = dyn_cast<GEPOperator>(LHS)) {
// (gep X, ...) - X
- if (LHSGEP->getOperand(0) == RHS) {
+ if (LHSGEP->getOperand(0)->stripPointerCasts() ==
+ RHS->stripPointerCasts()) {
GEP1 = LHSGEP;
} else if (auto *RHSGEP = dyn_cast<GEPOperator>(RHS)) {
// (gep X, ...) - (gep X, ...)
@@ -1749,7 +1751,7 @@ Value *InstCombinerImpl::OptimizePointerDifference(Value *LHS, Value *RHS,
}
Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
- if (Value *V = SimplifySubInst(I.getOperand(0), I.getOperand(1),
+ if (Value *V = simplifySubInst(I.getOperand(0), I.getOperand(1),
I.hasNoSignedWrap(), I.hasNoUnsignedWrap(),
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
@@ -2014,6 +2016,37 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
}
}
+ if (auto *II = dyn_cast<MinMaxIntrinsic>(Op1)) {
+ {
+ // sub(add(X,Y), s/umin(X,Y)) --> s/umax(X,Y)
+ // sub(add(X,Y), s/umax(X,Y)) --> s/umin(X,Y)
+ Value *X = II->getLHS();
+ Value *Y = II->getRHS();
+ if (match(Op0, m_c_Add(m_Specific(X), m_Specific(Y))) &&
+ (Op0->hasOneUse() || Op1->hasOneUse())) {
+ Intrinsic::ID InvID = getInverseMinMaxIntrinsic(II->getIntrinsicID());
+ Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, X, Y);
+ return replaceInstUsesWith(I, InvMaxMin);
+ }
+ }
+
+ {
+ // sub(add(X,Y),umin(Y,Z)) --> add(X,usub.sat(Y,Z))
+ // sub(add(X,Z),umin(Y,Z)) --> add(X,usub.sat(Z,Y))
+ Value *X, *Y, *Z;
+ if (match(Op1, m_OneUse(m_UMin(m_Value(Y), m_Value(Z))))) {
+ if (match(Op0, m_OneUse(m_c_Add(m_Specific(Y), m_Value(X)))))
+ return BinaryOperator::CreateAdd(
+ X, Builder.CreateIntrinsic(Intrinsic::usub_sat, I.getType(),
+ {Y, Z}));
+ if (match(Op0, m_OneUse(m_c_Add(m_Specific(Z), m_Value(X)))))
+ return BinaryOperator::CreateAdd(
+ X, Builder.CreateIntrinsic(Intrinsic::usub_sat, I.getType(),
+ {Z, Y}));
+ }
+ }
+ }
+
{
// If we have a subtraction between some value and a select between
// said value and something else, sink subtraction into select hands, i.e.:
@@ -2089,36 +2122,6 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
return BinaryOperator::CreateSub(X, Not);
}
- // TODO: This is the same logic as above but handles the cmp-select idioms
- // for min/max, so the use checks are increased to account for the
- // extra instructions. If we canonicalize to intrinsics, this block
- // can likely be removed.
- {
- Value *LHS, *RHS, *A;
- Value *NotA = Op0, *MinMax = Op1;
- SelectPatternFlavor SPF = matchSelectPattern(MinMax, LHS, RHS).Flavor;
- if (!SelectPatternResult::isMinOrMax(SPF)) {
- NotA = Op1;
- MinMax = Op0;
- SPF = matchSelectPattern(MinMax, LHS, RHS).Flavor;
- }
- if (SelectPatternResult::isMinOrMax(SPF) &&
- match(NotA, m_Not(m_Value(A))) && (NotA == LHS || NotA == RHS)) {
- if (NotA == LHS)
- std::swap(LHS, RHS);
- // LHS is now Y above and expected to have at least 2 uses (the min/max)
- // NotA is expected to have 2 uses from the min/max and 1 from the sub.
- if (isFreeToInvert(LHS, !LHS->hasNUsesOrMore(3)) &&
- !NotA->hasNUsesOrMore(4)) {
- Value *Not = Builder.CreateNot(MinMax);
- if (NotA == Op0)
- return BinaryOperator::CreateSub(Not, A);
- else
- return BinaryOperator::CreateSub(A, Not);
- }
- }
- }
-
// Optimize pointer differences into the same array into a size. Consider:
// &A[10] - &A[0]: we should compile this to "10".
Value *LHSOp, *RHSOp;
@@ -2149,11 +2152,11 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
// B = ashr i32 A, 31 ; smear the sign bit
// sub (xor A, B), B ; flip bits if negative and subtract -1 (add 1)
// --> (A < 0) ? -A : A
- Value *Cmp = Builder.CreateICmpSLT(A, ConstantInt::getNullValue(Ty));
+ Value *IsNeg = Builder.CreateIsNeg(A);
// Copy the nuw/nsw flags from the sub to the negate.
- Value *Neg = Builder.CreateNeg(A, "", I.hasNoUnsignedWrap(),
- I.hasNoSignedWrap());
- return SelectInst::Create(Cmp, Neg, A);
+ Value *NegA = Builder.CreateNeg(A, "", I.hasNoUnsignedWrap(),
+ I.hasNoSignedWrap());
+ return SelectInst::Create(IsNeg, NegA, A);
}
// If we are subtracting a low-bit masked subset of some value from an add
@@ -2187,12 +2190,23 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
return replaceInstUsesWith(
I, Builder.CreateIntrinsic(Intrinsic::usub_sat, {Ty}, {X, Op1}));
+ // Op0 - umin(X, Op0) --> usub.sat(Op0, X)
+ if (match(Op1, m_OneUse(m_c_UMin(m_Value(X), m_Specific(Op0)))))
+ return replaceInstUsesWith(
+ I, Builder.CreateIntrinsic(Intrinsic::usub_sat, {Ty}, {Op0, X}));
+
// Op0 - umax(X, Op0) --> 0 - usub.sat(X, Op0)
if (match(Op1, m_OneUse(m_c_UMax(m_Value(X), m_Specific(Op0))))) {
Value *USub = Builder.CreateIntrinsic(Intrinsic::usub_sat, {Ty}, {X, Op0});
return BinaryOperator::CreateNeg(USub);
}
+ // umin(X, Op1) - Op1 --> 0 - usub.sat(Op1, X)
+ if (match(Op0, m_OneUse(m_c_UMin(m_Value(X), m_Specific(Op1))))) {
+ Value *USub = Builder.CreateIntrinsic(Intrinsic::usub_sat, {Ty}, {Op1, X});
+ return BinaryOperator::CreateNeg(USub);
+ }
+
// C - ctpop(X) => ctpop(~X) if C is bitwidth
if (match(Op0, m_SpecificInt(Ty->getScalarSizeInBits())) &&
match(Op1, m_OneUse(m_Intrinsic<Intrinsic::ctpop>(m_Value(X)))))
@@ -2264,7 +2278,7 @@ static Instruction *hoistFNegAboveFMulFDiv(Instruction &I,
Instruction *InstCombinerImpl::visitFNeg(UnaryOperator &I) {
Value *Op = I.getOperand(0);
- if (Value *V = SimplifyFNegInst(Op, I.getFastMathFlags(),
+ if (Value *V = simplifyFNegInst(Op, I.getFastMathFlags(),
getSimplifyQuery().getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
@@ -2287,10 +2301,11 @@ Instruction *InstCombinerImpl::visitFNeg(UnaryOperator &I) {
// Unlike most transforms, this one is not safe to propagate nsz unless
// it is present on the original select. (We are conservatively intersecting
// the nsz flags from the select and root fneg instruction.)
- auto propagateSelectFMF = [&](SelectInst *S) {
+ auto propagateSelectFMF = [&](SelectInst *S, bool CommonOperand) {
S->copyFastMathFlags(&I);
if (auto *OldSel = dyn_cast<SelectInst>(Op))
- if (!OldSel->hasNoSignedZeros())
+ if (!OldSel->hasNoSignedZeros() && !CommonOperand &&
+ !isGuaranteedNotToBeUndefOrPoison(OldSel->getCondition()))
S->setHasNoSignedZeros(false);
};
// -(Cond ? -P : Y) --> Cond ? P : -Y
@@ -2298,14 +2313,14 @@ Instruction *InstCombinerImpl::visitFNeg(UnaryOperator &I) {
if (match(X, m_FNeg(m_Value(P)))) {
Value *NegY = Builder.CreateFNegFMF(Y, &I, Y->getName() + ".neg");
SelectInst *NewSel = SelectInst::Create(Cond, P, NegY);
- propagateSelectFMF(NewSel);
+ propagateSelectFMF(NewSel, P == Y);
return NewSel;
}
// -(Cond ? X : -P) --> Cond ? -X : P
if (match(Y, m_FNeg(m_Value(P)))) {
Value *NegX = Builder.CreateFNegFMF(X, &I, X->getName() + ".neg");
SelectInst *NewSel = SelectInst::Create(Cond, NegX, P);
- propagateSelectFMF(NewSel);
+ propagateSelectFMF(NewSel, P == X);
return NewSel;
}
}
@@ -2314,7 +2329,7 @@ Instruction *InstCombinerImpl::visitFNeg(UnaryOperator &I) {
}
Instruction *InstCombinerImpl::visitFSub(BinaryOperator &I) {
- if (Value *V = SimplifyFSubInst(I.getOperand(0), I.getOperand(1),
+ if (Value *V = simplifyFSubInst(I.getOperand(0), I.getOperand(1),
I.getFastMathFlags(),
getSimplifyQuery().getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 6bbb0251f2bc..ae8865651ece 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -24,32 +24,6 @@ using namespace PatternMatch;
#define DEBUG_TYPE "instcombine"
-/// Similar to getICmpCode but for FCmpInst. This encodes a fcmp predicate into
-/// a four bit mask.
-static unsigned getFCmpCode(FCmpInst::Predicate CC) {
- assert(FCmpInst::FCMP_FALSE <= CC && CC <= FCmpInst::FCMP_TRUE &&
- "Unexpected FCmp predicate!");
- // Take advantage of the bit pattern of FCmpInst::Predicate here.
- // U L G E
- static_assert(FCmpInst::FCMP_FALSE == 0, ""); // 0 0 0 0
- static_assert(FCmpInst::FCMP_OEQ == 1, ""); // 0 0 0 1
- static_assert(FCmpInst::FCMP_OGT == 2, ""); // 0 0 1 0
- static_assert(FCmpInst::FCMP_OGE == 3, ""); // 0 0 1 1
- static_assert(FCmpInst::FCMP_OLT == 4, ""); // 0 1 0 0
- static_assert(FCmpInst::FCMP_OLE == 5, ""); // 0 1 0 1
- static_assert(FCmpInst::FCMP_ONE == 6, ""); // 0 1 1 0
- static_assert(FCmpInst::FCMP_ORD == 7, ""); // 0 1 1 1
- static_assert(FCmpInst::FCMP_UNO == 8, ""); // 1 0 0 0
- static_assert(FCmpInst::FCMP_UEQ == 9, ""); // 1 0 0 1
- static_assert(FCmpInst::FCMP_UGT == 10, ""); // 1 0 1 0
- static_assert(FCmpInst::FCMP_UGE == 11, ""); // 1 0 1 1
- static_assert(FCmpInst::FCMP_ULT == 12, ""); // 1 1 0 0
- static_assert(FCmpInst::FCMP_ULE == 13, ""); // 1 1 0 1
- static_assert(FCmpInst::FCMP_UNE == 14, ""); // 1 1 1 0
- static_assert(FCmpInst::FCMP_TRUE == 15, ""); // 1 1 1 1
- return CC;
-}
-
/// This is the complement of getICmpCode, which turns an opcode and two
/// operands into either a constant true or false, or a brand new ICmp
/// instruction. The sign is passed in to determine which kind of predicate to
@@ -66,14 +40,10 @@ static Value *getNewICmpValue(unsigned Code, bool Sign, Value *LHS, Value *RHS,
/// operands into either a FCmp instruction, or a true/false constant.
static Value *getFCmpValue(unsigned Code, Value *LHS, Value *RHS,
InstCombiner::BuilderTy &Builder) {
- const auto Pred = static_cast<FCmpInst::Predicate>(Code);
- assert(FCmpInst::FCMP_FALSE <= Pred && Pred <= FCmpInst::FCMP_TRUE &&
- "Unexpected FCmp predicate!");
- if (Pred == FCmpInst::FCMP_FALSE)
- return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
- if (Pred == FCmpInst::FCMP_TRUE)
- return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1);
- return Builder.CreateFCmp(Pred, LHS, RHS);
+ FCmpInst::Predicate NewPred;
+ if (Constant *TorF = getPredForFCmpCode(Code, LHS->getType(), NewPred))
+ return TorF;
+ return Builder.CreateFCmp(NewPred, LHS, RHS);
}
/// Transform BITWISE_OP(BSWAP(A),BSWAP(B)) or
@@ -395,6 +365,7 @@ getMaskedTypeForICmpPair(Value *&A, Value *&B, Value *&C,
/// (icmp(A & X) ==/!= Y), where the left-hand side is of type Mask_NotAllZeros
/// and the right hand side is of type BMask_Mixed. For example,
/// (icmp (A & 12) != 0) & (icmp (A & 15) == 8) -> (icmp (A & 15) == 8).
+/// Also used for logical and/or, must be poison safe.
static Value *foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed(
ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, Value *A, Value *B, Value *C,
Value *D, Value *E, ICmpInst::Predicate PredL, ICmpInst::Predicate PredR,
@@ -409,9 +380,9 @@ static Value *foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed(
//
// We currently handle the case of B, C, D, E are constant.
//
- ConstantInt *BCst, *CCst, *DCst, *ECst;
- if (!match(B, m_ConstantInt(BCst)) || !match(C, m_ConstantInt(CCst)) ||
- !match(D, m_ConstantInt(DCst)) || !match(E, m_ConstantInt(ECst)))
+ const APInt *BCst, *CCst, *DCst, *OrigECst;
+ if (!match(B, m_APInt(BCst)) || !match(C, m_APInt(CCst)) ||
+ !match(D, m_APInt(DCst)) || !match(E, m_APInt(OrigECst)))
return nullptr;
ICmpInst::Predicate NewCC = IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE;
@@ -420,19 +391,20 @@ static Value *foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed(
// canonicalized as,
// (icmp ne (A & D), 0) -> (icmp eq (A & D), D) or
// (icmp ne (A & D), D) -> (icmp eq (A & D), 0).
+ APInt ECst = *OrigECst;
if (PredR != NewCC)
- ECst = cast<ConstantInt>(ConstantExpr::getXor(DCst, ECst));
+ ECst ^= *DCst;
// If B or D is zero, skip because if LHS or RHS can be trivially folded by
// other folding rules and this pattern won't apply any more.
- if (BCst->getValue() == 0 || DCst->getValue() == 0)
+ if (*BCst == 0 || *DCst == 0)
return nullptr;
// If B and D don't intersect, ie. (B & D) == 0, no folding because we can't
// deduce anything from it.
// For example,
// (icmp ne (A & 12), 0) & (icmp eq (A & 3), 1) -> no folding.
- if ((BCst->getValue() & DCst->getValue()) == 0)
+ if ((*BCst & *DCst) == 0)
return nullptr;
// If the following two conditions are met:
@@ -451,22 +423,21 @@ static Value *foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed(
// For example,
// (icmp ne (A & 12), 0) & (icmp eq (A & 7), 1) -> (icmp eq (A & 15), 9)
// (icmp ne (A & 15), 0) & (icmp eq (A & 7), 0) -> (icmp eq (A & 15), 8)
- if ((((BCst->getValue() & DCst->getValue()) & ECst->getValue()) == 0) &&
- (BCst->getValue() & (BCst->getValue() ^ DCst->getValue())).isPowerOf2()) {
- APInt BorD = BCst->getValue() | DCst->getValue();
- APInt BandBxorDorE = (BCst->getValue() & (BCst->getValue() ^ DCst->getValue())) |
- ECst->getValue();
- Value *NewMask = ConstantInt::get(BCst->getType(), BorD);
- Value *NewMaskedValue = ConstantInt::get(BCst->getType(), BandBxorDorE);
+ if ((((*BCst & *DCst) & ECst) == 0) &&
+ (*BCst & (*BCst ^ *DCst)).isPowerOf2()) {
+ APInt BorD = *BCst | *DCst;
+ APInt BandBxorDorE = (*BCst & (*BCst ^ *DCst)) | ECst;
+ Value *NewMask = ConstantInt::get(A->getType(), BorD);
+ Value *NewMaskedValue = ConstantInt::get(A->getType(), BandBxorDorE);
Value *NewAnd = Builder.CreateAnd(A, NewMask);
return Builder.CreateICmp(NewCC, NewAnd, NewMaskedValue);
}
- auto IsSubSetOrEqual = [](ConstantInt *C1, ConstantInt *C2) {
- return (C1->getValue() & C2->getValue()) == C1->getValue();
+ auto IsSubSetOrEqual = [](const APInt *C1, const APInt *C2) {
+ return (*C1 & *C2) == *C1;
};
- auto IsSuperSetOrEqual = [](ConstantInt *C1, ConstantInt *C2) {
- return (C1->getValue() & C2->getValue()) == C2->getValue();
+ auto IsSuperSetOrEqual = [](const APInt *C1, const APInt *C2) {
+ return (*C1 & *C2) == *C2;
};
// In the following, we consider only the cases where B is a superset of D, B
@@ -486,7 +457,7 @@ static Value *foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed(
// For example,
// (icmp ne (A & 3), 0) & (icmp eq (A & 7), 0) -> false.
// (icmp ne (A & 15), 0) & (icmp eq (A & 3), 0) -> no folding.
- if (ECst->isZero()) {
+ if (ECst.isZero()) {
if (IsSubSetOrEqual(BCst, DCst))
return ConstantInt::get(LHS->getType(), !IsAnd);
return nullptr;
@@ -504,7 +475,7 @@ static Value *foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed(
// ie. (B & E) != 0, then LHS is subsumed by RHS. For example.
// (icmp ne (A & 12), 0) & (icmp eq (A & 15), 8) -> (icmp eq (A & 15), 8).
assert(IsSubSetOrEqual(BCst, DCst) && "Precondition due to above code");
- if ((BCst->getValue() & ECst->getValue()) != 0)
+ if ((*BCst & ECst) != 0)
return RHS;
// Otherwise, LHS and RHS contradict and the whole expression becomes false
// (or true if negated.) For example,
@@ -516,6 +487,7 @@ static Value *foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed(
/// Try to fold (icmp(A & B) ==/!= 0) &/| (icmp(A & D) ==/!= E) into a single
/// (icmp(A & X) ==/!= Y), where the left-hand side and the right hand side
/// aren't of the common mask pattern type.
+/// Also used for logical and/or, must be poison safe.
static Value *foldLogOpOfMaskedICmpsAsymmetric(
ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, Value *A, Value *B, Value *C,
Value *D, Value *E, ICmpInst::Predicate PredL, ICmpInst::Predicate PredR,
@@ -550,6 +522,7 @@ static Value *foldLogOpOfMaskedICmpsAsymmetric(
/// Try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
/// into a single (icmp(A & X) ==/!= Y).
static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
+ bool IsLogical,
InstCombiner::BuilderTy &Builder) {
Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr, *E = nullptr;
ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
@@ -594,6 +567,8 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
if (Mask & Mask_AllZeros) {
// (icmp eq (A & B), 0) & (icmp eq (A & D), 0)
// -> (icmp eq (A & (B|D)), 0)
+ if (IsLogical && !isGuaranteedNotToBeUndefOrPoison(D))
+ return nullptr; // TODO: Use freeze?
Value *NewOr = Builder.CreateOr(B, D);
Value *NewAnd = Builder.CreateAnd(A, NewOr);
// We can't use C as zero because we might actually handle
@@ -605,6 +580,8 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
if (Mask & BMask_AllOnes) {
// (icmp eq (A & B), B) & (icmp eq (A & D), D)
// -> (icmp eq (A & (B|D)), (B|D))
+ if (IsLogical && !isGuaranteedNotToBeUndefOrPoison(D))
+ return nullptr; // TODO: Use freeze?
Value *NewOr = Builder.CreateOr(B, D);
Value *NewAnd = Builder.CreateAnd(A, NewOr);
return Builder.CreateICmp(NewCC, NewAnd, NewOr);
@@ -612,6 +589,8 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
if (Mask & AMask_AllOnes) {
// (icmp eq (A & B), A) & (icmp eq (A & D), A)
// -> (icmp eq (A & (B&D)), A)
+ if (IsLogical && !isGuaranteedNotToBeUndefOrPoison(D))
+ return nullptr; // TODO: Use freeze?
Value *NewAnd1 = Builder.CreateAnd(B, D);
Value *NewAnd2 = Builder.CreateAnd(A, NewAnd1);
return Builder.CreateICmp(NewCC, NewAnd2, A);
@@ -736,47 +715,6 @@ Value *InstCombinerImpl::simplifyRangeCheck(ICmpInst *Cmp0, ICmpInst *Cmp1,
return Builder.CreateICmp(NewPred, Input, RangeEnd);
}
-static Value *
-foldAndOrOfEqualityCmpsWithConstants(ICmpInst *LHS, ICmpInst *RHS,
- bool JoinedByAnd,
- InstCombiner::BuilderTy &Builder) {
- Value *X = LHS->getOperand(0);
- if (X != RHS->getOperand(0))
- return nullptr;
-
- const APInt *C1, *C2;
- if (!match(LHS->getOperand(1), m_APInt(C1)) ||
- !match(RHS->getOperand(1), m_APInt(C2)))
- return nullptr;
-
- // We only handle (X != C1 && X != C2) and (X == C1 || X == C2).
- ICmpInst::Predicate Pred = LHS->getPredicate();
- if (Pred != RHS->getPredicate())
- return nullptr;
- if (JoinedByAnd && Pred != ICmpInst::ICMP_NE)
- return nullptr;
- if (!JoinedByAnd && Pred != ICmpInst::ICMP_EQ)
- return nullptr;
-
- // The larger unsigned constant goes on the right.
- if (C1->ugt(*C2))
- std::swap(C1, C2);
-
- APInt Xor = *C1 ^ *C2;
- if (Xor.isPowerOf2()) {
- // If LHSC and RHSC differ by only one bit, then set that bit in X and
- // compare against the larger constant:
- // (X == C1 || X == C2) --> (X | (C1 ^ C2)) == C2
- // (X != C1 && X != C2) --> (X | (C1 ^ C2)) != C2
- // We choose an 'or' with a Pow2 constant rather than the inverse mask with
- // 'and' because that may lead to smaller codegen from a smaller constant.
- Value *Or = Builder.CreateOr(X, ConstantInt::get(X->getType(), Xor));
- return Builder.CreateICmp(Pred, Or, ConstantInt::get(X->getType(), *C2));
- }
-
- return nullptr;
-}
-
// Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2)
// Fold (!iszero(A & K1) & !iszero(A & K2)) -> (A & (K1 | K2)) == (K1 | K2)
Value *InstCombinerImpl::foldAndOrOfICmpsOfAndWithPow2(ICmpInst *LHS,
@@ -941,7 +879,29 @@ static Value *foldSignedTruncationCheck(ICmpInst *ICmp0, ICmpInst *ICmp1,
CxtI.getName() + ".simplified");
}
+/// Fold (icmp eq ctpop(X) 1) | (icmp eq X 0) into (icmp ult ctpop(X) 2) and
+/// fold (icmp ne ctpop(X) 1) & (icmp ne X 0) into (icmp ugt ctpop(X) 1).
+/// Also used for logical and/or, must be poison safe.
+static Value *foldIsPowerOf2OrZero(ICmpInst *Cmp0, ICmpInst *Cmp1, bool IsAnd,
+ InstCombiner::BuilderTy &Builder) {
+ CmpInst::Predicate Pred0, Pred1;
+ Value *X;
+ if (!match(Cmp0, m_ICmp(Pred0, m_Intrinsic<Intrinsic::ctpop>(m_Value(X)),
+ m_SpecificInt(1))) ||
+ !match(Cmp1, m_ICmp(Pred1, m_Specific(X), m_ZeroInt())))
+ return nullptr;
+
+ Value *CtPop = Cmp0->getOperand(0);
+ if (IsAnd && Pred0 == ICmpInst::ICMP_NE && Pred1 == ICmpInst::ICMP_NE)
+ return Builder.CreateICmpUGT(CtPop, ConstantInt::get(CtPop->getType(), 1));
+ if (!IsAnd && Pred0 == ICmpInst::ICMP_EQ && Pred1 == ICmpInst::ICMP_EQ)
+ return Builder.CreateICmpULT(CtPop, ConstantInt::get(CtPop->getType(), 2));
+
+ return nullptr;
+}
+
/// Reduce a pair of compares that check if a value has exactly 1 bit set.
+/// Also used for logical and/or, must be poison safe.
static Value *foldIsPowerOf2(ICmpInst *Cmp0, ICmpInst *Cmp1, bool JoinedByAnd,
InstCombiner::BuilderTy &Builder) {
// Handle 'and' / 'or' commutation: make the equality check the first operand.
@@ -1001,22 +961,13 @@ static Value *foldUnsignedUnderflowCheck(ICmpInst *ZeroICmp,
};
// Given ZeroCmpOp = (A + B)
- // ZeroCmpOp <= A && ZeroCmpOp != 0 --> (0-B) < A
- // ZeroCmpOp > A || ZeroCmpOp == 0 --> (0-B) >= A
- //
// ZeroCmpOp < A && ZeroCmpOp != 0 --> (0-X) < Y iff
// ZeroCmpOp >= A || ZeroCmpOp == 0 --> (0-X) >= Y iff
// with X being the value (A/B) that is known to be non-zero,
// and Y being remaining value.
- if (UnsignedPred == ICmpInst::ICMP_ULE && EqPred == ICmpInst::ICMP_NE &&
- IsAnd)
- return Builder.CreateICmpULT(Builder.CreateNeg(B), A);
if (UnsignedPred == ICmpInst::ICMP_ULT && EqPred == ICmpInst::ICMP_NE &&
IsAnd && GetKnownNonZeroAndOther(B, A))
return Builder.CreateICmpULT(Builder.CreateNeg(B), A);
- if (UnsignedPred == ICmpInst::ICMP_UGT && EqPred == ICmpInst::ICMP_EQ &&
- !IsAnd)
- return Builder.CreateICmpUGE(Builder.CreateNeg(B), A);
if (UnsignedPred == ICmpInst::ICMP_UGE && EqPred == ICmpInst::ICMP_EQ &&
!IsAnd && GetKnownNonZeroAndOther(B, A))
return Builder.CreateICmpUGE(Builder.CreateNeg(B), A);
@@ -1143,12 +1094,9 @@ Value *InstCombinerImpl::foldEqOfParts(ICmpInst *Cmp0, ICmpInst *Cmp1,
/// common operand with the constant. Callers are expected to call this with
/// Cmp0/Cmp1 switched to handle logic op commutativity.
static Value *foldAndOrOfICmpsWithConstEq(ICmpInst *Cmp0, ICmpInst *Cmp1,
- BinaryOperator &Logic,
+ bool IsAnd,
InstCombiner::BuilderTy &Builder,
const SimplifyQuery &Q) {
- bool IsAnd = Logic.getOpcode() == Instruction::And;
- assert((IsAnd || Logic.getOpcode() == Instruction::Or) && "Wrong logic op");
-
// Match an equality compare with a non-poison constant as Cmp0.
// Also, give up if the compare can be constant-folded to avoid looping.
ICmpInst::Predicate Pred0;
@@ -1174,7 +1122,7 @@ static Value *foldAndOrOfICmpsWithConstEq(ICmpInst *Cmp0, ICmpInst *Cmp1,
// (X != C) || (Y Pred1 X) --> (X != C) || (Y Pred1 C)
// Can think of the 'or' substitution with the 'and' bool equivalent:
// A || B --> A || (!A && B)
- Value *SubstituteCmp = SimplifyICmpInst(Pred1, Y, C, Q);
+ Value *SubstituteCmp = simplifyICmpInst(Pred1, Y, C, Q);
if (!SubstituteCmp) {
// If we need to create a new instruction, require that the old compare can
// be removed.
@@ -1182,16 +1130,24 @@ static Value *foldAndOrOfICmpsWithConstEq(ICmpInst *Cmp0, ICmpInst *Cmp1,
return nullptr;
SubstituteCmp = Builder.CreateICmp(Pred1, Y, C);
}
- return Builder.CreateBinOp(Logic.getOpcode(), Cmp0, SubstituteCmp);
+ return Builder.CreateBinOp(IsAnd ? Instruction::And : Instruction::Or, Cmp0,
+ SubstituteCmp);
}
/// Fold (icmp Pred1 V1, C1) & (icmp Pred2 V2, C2)
/// or (icmp Pred1 V1, C1) | (icmp Pred2 V2, C2)
/// into a single comparison using range-based reasoning.
-static Value *foldAndOrOfICmpsUsingRanges(
- ICmpInst::Predicate Pred1, Value *V1, const APInt &C1,
- ICmpInst::Predicate Pred2, Value *V2, const APInt &C2,
- IRBuilderBase &Builder, bool IsAnd) {
+/// NOTE: This is also used for logical and/or, must be poison-safe!
+Value *InstCombinerImpl::foldAndOrOfICmpsUsingRanges(ICmpInst *ICmp1,
+ ICmpInst *ICmp2,
+ bool IsAnd) {
+ ICmpInst::Predicate Pred1, Pred2;
+ Value *V1, *V2;
+ const APInt *C1, *C2;
+ if (!match(ICmp1, m_ICmp(Pred1, m_Value(V1), m_APInt(C1))) ||
+ !match(ICmp2, m_ICmp(Pred2, m_Value(V2), m_APInt(C2))))
+ return nullptr;
+
// Look through add of a constant offset on V1, V2, or both operands. This
// allows us to interpret the V + C' < C'' range idiom into a proper range.
const APInt *Offset1 = nullptr, *Offset2 = nullptr;
@@ -1206,152 +1162,51 @@ static Value *foldAndOrOfICmpsUsingRanges(
if (V1 != V2)
return nullptr;
- ConstantRange CR1 = ConstantRange::makeExactICmpRegion(Pred1, C1);
+ ConstantRange CR1 = ConstantRange::makeExactICmpRegion(
+ IsAnd ? ICmpInst::getInversePredicate(Pred1) : Pred1, *C1);
if (Offset1)
CR1 = CR1.subtract(*Offset1);
- ConstantRange CR2 = ConstantRange::makeExactICmpRegion(Pred2, C2);
+ ConstantRange CR2 = ConstantRange::makeExactICmpRegion(
+ IsAnd ? ICmpInst::getInversePredicate(Pred2) : Pred2, *C2);
if (Offset2)
CR2 = CR2.subtract(*Offset2);
- Optional<ConstantRange> CR =
- IsAnd ? CR1.exactIntersectWith(CR2) : CR1.exactUnionWith(CR2);
- if (!CR)
- return nullptr;
-
- CmpInst::Predicate NewPred;
- APInt NewC, Offset;
- CR->getEquivalentICmp(NewPred, NewC, Offset);
-
Type *Ty = V1->getType();
Value *NewV = V1;
- if (Offset != 0)
- NewV = Builder.CreateAdd(NewV, ConstantInt::get(Ty, Offset));
- return Builder.CreateICmp(NewPred, NewV, ConstantInt::get(Ty, NewC));
-}
-
-/// Fold (icmp)&(icmp) if possible.
-Value *InstCombinerImpl::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
- BinaryOperator &And) {
- const SimplifyQuery Q = SQ.getWithInstruction(&And);
-
- // Fold (!iszero(A & K1) & !iszero(A & K2)) -> (A & (K1 | K2)) == (K1 | K2)
- // if K1 and K2 are a one-bit mask.
- if (Value *V = foldAndOrOfICmpsOfAndWithPow2(LHS, RHS, &And,
- /* IsAnd */ true))
- return V;
-
- ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
-
- // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B)
- if (predicatesFoldable(PredL, PredR)) {
- if (LHS->getOperand(0) == RHS->getOperand(1) &&
- LHS->getOperand(1) == RHS->getOperand(0))
- LHS->swapOperands();
- if (LHS->getOperand(0) == RHS->getOperand(0) &&
- LHS->getOperand(1) == RHS->getOperand(1)) {
- Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
- unsigned Code = getICmpCode(LHS) & getICmpCode(RHS);
- bool IsSigned = LHS->isSigned() || RHS->isSigned();
- return getNewICmpValue(Code, IsSigned, Op0, Op1, Builder);
- }
- }
-
- // handle (roughly): (icmp eq (A & B), C) & (icmp eq (A & D), E)
- if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, true, Builder))
- return V;
-
- if (Value *V = foldAndOrOfICmpsWithConstEq(LHS, RHS, And, Builder, Q))
- return V;
- if (Value *V = foldAndOrOfICmpsWithConstEq(RHS, LHS, And, Builder, Q))
- return V;
-
- // E.g. (icmp sge x, 0) & (icmp slt x, n) --> icmp ult x, n
- if (Value *V = simplifyRangeCheck(LHS, RHS, /*Inverted=*/false))
- return V;
-
- // E.g. (icmp slt x, n) & (icmp sge x, 0) --> icmp ult x, n
- if (Value *V = simplifyRangeCheck(RHS, LHS, /*Inverted=*/false))
- return V;
-
- if (Value *V = foldAndOrOfEqualityCmpsWithConstants(LHS, RHS, true, Builder))
- return V;
-
- if (Value *V = foldSignedTruncationCheck(LHS, RHS, And, Builder))
- return V;
-
- if (Value *V = foldIsPowerOf2(LHS, RHS, true /* JoinedByAnd */, Builder))
- return V;
-
- if (Value *X =
- foldUnsignedUnderflowCheck(LHS, RHS, /*IsAnd=*/true, Q, Builder))
- return X;
- if (Value *X =
- foldUnsignedUnderflowCheck(RHS, LHS, /*IsAnd=*/true, Q, Builder))
- return X;
-
- if (Value *X = foldEqOfParts(LHS, RHS, /*IsAnd=*/true))
- return X;
+ Optional<ConstantRange> CR = CR1.exactUnionWith(CR2);
+ if (!CR) {
+ if (!(ICmp1->hasOneUse() && ICmp2->hasOneUse()) || CR1.isWrappedSet() ||
+ CR2.isWrappedSet())
+ return nullptr;
- // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
- Value *LHS0 = LHS->getOperand(0), *RHS0 = RHS->getOperand(0);
+ // Check whether we have equal-size ranges that only differ by one bit.
+ // In that case we can apply a mask to map one range onto the other.
+ APInt LowerDiff = CR1.getLower() ^ CR2.getLower();
+ APInt UpperDiff = (CR1.getUpper() - 1) ^ (CR2.getUpper() - 1);
+ APInt CR1Size = CR1.getUpper() - CR1.getLower();
+ if (!LowerDiff.isPowerOf2() || LowerDiff != UpperDiff ||
+ CR1Size != CR2.getUpper() - CR2.getLower())
+ return nullptr;
- // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0)
- // TODO: Remove this when foldLogOpOfMaskedICmps can handle undefs.
- if (PredL == ICmpInst::ICMP_EQ && match(LHS->getOperand(1), m_ZeroInt()) &&
- PredR == ICmpInst::ICMP_EQ && match(RHS->getOperand(1), m_ZeroInt()) &&
- LHS0->getType() == RHS0->getType()) {
- Value *NewOr = Builder.CreateOr(LHS0, RHS0);
- return Builder.CreateICmp(PredL, NewOr,
- Constant::getNullValue(NewOr->getType()));
+ CR = CR1.getLower().ult(CR2.getLower()) ? CR1 : CR2;
+ NewV = Builder.CreateAnd(NewV, ConstantInt::get(Ty, ~LowerDiff));
}
- const APInt *LHSC, *RHSC;
- if (!match(LHS->getOperand(1), m_APInt(LHSC)) ||
- !match(RHS->getOperand(1), m_APInt(RHSC)))
- return nullptr;
-
- // (trunc x) == C1 & (and x, CA) == C2 -> (and x, CA|CMAX) == C1|C2
- // where CMAX is the all ones value for the truncated type,
- // iff the lower bits of C2 and CA are zero.
- if (PredL == ICmpInst::ICMP_EQ && PredL == PredR && LHS->hasOneUse() &&
- RHS->hasOneUse()) {
- Value *V;
- const APInt *AndC, *SmallC = nullptr, *BigC = nullptr;
-
- // (trunc x) == C1 & (and x, CA) == C2
- // (and x, CA) == C2 & (trunc x) == C1
- if (match(RHS0, m_Trunc(m_Value(V))) &&
- match(LHS0, m_And(m_Specific(V), m_APInt(AndC)))) {
- SmallC = RHSC;
- BigC = LHSC;
- } else if (match(LHS0, m_Trunc(m_Value(V))) &&
- match(RHS0, m_And(m_Specific(V), m_APInt(AndC)))) {
- SmallC = LHSC;
- BigC = RHSC;
- }
-
- if (SmallC && BigC) {
- unsigned BigBitSize = BigC->getBitWidth();
- unsigned SmallBitSize = SmallC->getBitWidth();
+ if (IsAnd)
+ CR = CR->inverse();
- // Check that the low bits are zero.
- APInt Low = APInt::getLowBitsSet(BigBitSize, SmallBitSize);
- if ((Low & *AndC).isZero() && (Low & *BigC).isZero()) {
- Value *NewAnd = Builder.CreateAnd(V, Low | *AndC);
- APInt N = SmallC->zext(BigBitSize) | *BigC;
- Value *NewVal = ConstantInt::get(NewAnd->getType(), N);
- return Builder.CreateICmp(PredL, NewAnd, NewVal);
- }
- }
- }
+ CmpInst::Predicate NewPred;
+ APInt NewC, Offset;
+ CR->getEquivalentICmp(NewPred, NewC, Offset);
- return foldAndOrOfICmpsUsingRanges(PredL, LHS0, *LHSC, PredR, RHS0, *RHSC,
- Builder, /* IsAnd */ true);
+ if (Offset != 0)
+ NewV = Builder.CreateAdd(NewV, ConstantInt::get(Ty, Offset));
+ return Builder.CreateICmp(NewPred, NewV, ConstantInt::get(Ty, NewC));
}
Value *InstCombinerImpl::foldLogicOfFCmps(FCmpInst *LHS, FCmpInst *RHS,
- bool IsAnd) {
+ bool IsAnd, bool IsLogicalSelect) {
Value *LHS0 = LHS->getOperand(0), *LHS1 = LHS->getOperand(1);
Value *RHS0 = RHS->getOperand(0), *RHS1 = RHS->getOperand(1);
FCmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
@@ -1380,11 +1235,22 @@ Value *InstCombinerImpl::foldLogicOfFCmps(FCmpInst *LHS, FCmpInst *RHS,
unsigned FCmpCodeL = getFCmpCode(PredL);
unsigned FCmpCodeR = getFCmpCode(PredR);
unsigned NewPred = IsAnd ? FCmpCodeL & FCmpCodeR : FCmpCodeL | FCmpCodeR;
+
+ // Intersect the fast math flags.
+ // TODO: We can union the fast math flags unless this is a logical select.
+ IRBuilder<>::FastMathFlagGuard FMFG(Builder);
+ FastMathFlags FMF = LHS->getFastMathFlags();
+ FMF &= RHS->getFastMathFlags();
+ Builder.setFastMathFlags(FMF);
+
return getFCmpValue(NewPred, LHS0, LHS1, Builder);
}
- if ((PredL == FCmpInst::FCMP_ORD && PredR == FCmpInst::FCMP_ORD && IsAnd) ||
- (PredL == FCmpInst::FCMP_UNO && PredR == FCmpInst::FCMP_UNO && !IsAnd)) {
+ // This transform is not valid for a logical select.
+ if (!IsLogicalSelect &&
+ ((PredL == FCmpInst::FCMP_ORD && PredR == FCmpInst::FCMP_ORD && IsAnd) ||
+ (PredL == FCmpInst::FCMP_UNO && PredR == FCmpInst::FCMP_UNO &&
+ !IsAnd))) {
if (LHS0->getType() != RHS0->getType())
return nullptr;
@@ -1574,9 +1440,10 @@ Instruction *InstCombinerImpl::foldCastedBitwiseLogic(BinaryOperator &I) {
Value *Cast1Src = Cast1->getOperand(0);
// fold logic(cast(A), cast(B)) -> cast(logic(A, B))
- if (shouldOptimizeCast(Cast0) && shouldOptimizeCast(Cast1)) {
+ if ((Cast0->hasOneUse() || Cast1->hasOneUse()) &&
+ shouldOptimizeCast(Cast0) && shouldOptimizeCast(Cast1)) {
Value *NewOp = Builder.CreateBinOp(LogicOpc, Cast0Src, Cast1Src,
- I.getName());
+ I.getName());
return CastInst::Create(CastOpcode, NewOp, DestTy);
}
@@ -1589,9 +1456,8 @@ Instruction *InstCombinerImpl::foldCastedBitwiseLogic(BinaryOperator &I) {
ICmpInst *ICmp0 = dyn_cast<ICmpInst>(Cast0Src);
ICmpInst *ICmp1 = dyn_cast<ICmpInst>(Cast1Src);
if (ICmp0 && ICmp1) {
- Value *Res = LogicOpc == Instruction::And ? foldAndOfICmps(ICmp0, ICmp1, I)
- : foldOrOfICmps(ICmp0, ICmp1, I);
- if (Res)
+ if (Value *Res =
+ foldAndOrOfICmps(ICmp0, ICmp1, I, LogicOpc == Instruction::And))
return CastInst::Create(CastOpcode, Res, DestTy);
return nullptr;
}
@@ -1862,7 +1728,7 @@ static Instruction *foldComplexAndOrPatterns(BinaryOperator &I,
Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
Type *Ty = I.getType();
- if (Value *V = SimplifyAndInst(I.getOperand(0), I.getOperand(1),
+ if (Value *V = simplifyAndInst(I.getOperand(0), I.getOperand(1),
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
@@ -1930,25 +1796,6 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
return BinaryOperator::CreateOr(And, ConstantInt::get(Ty, Together));
}
- // If the mask is only needed on one incoming arm, push the 'and' op up.
- if (match(Op0, m_OneUse(m_Xor(m_Value(X), m_Value(Y)))) ||
- match(Op0, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) {
- APInt NotAndMask(~(*C));
- BinaryOperator::BinaryOps BinOp = cast<BinaryOperator>(Op0)->getOpcode();
- if (MaskedValueIsZero(X, NotAndMask, 0, &I)) {
- // Not masking anything out for the LHS, move mask to RHS.
- // and ({x}or X, Y), C --> {x}or X, (and Y, C)
- Value *NewRHS = Builder.CreateAnd(Y, Op1, Y->getName() + ".masked");
- return BinaryOperator::Create(BinOp, X, NewRHS);
- }
- if (!isa<Constant>(Y) && MaskedValueIsZero(Y, NotAndMask, 0, &I)) {
- // Not masking anything out for the RHS, move mask to LHS.
- // and ({x}or X, Y), C --> {x}or (and X, C), Y
- Value *NewLHS = Builder.CreateAnd(X, Op1, X->getName() + ".masked");
- return BinaryOperator::Create(BinOp, NewLHS, Y);
- }
- }
-
unsigned Width = Ty->getScalarSizeInBits();
const APInt *ShiftC;
if (match(Op0, m_OneUse(m_SExt(m_AShr(m_Value(X), m_APInt(ShiftC)))))) {
@@ -1961,6 +1808,12 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
}
}
+ // If this 'and' clears the sign-bits added by ashr, replace with lshr:
+ // and (ashr X, ShiftC), C --> lshr X, ShiftC
+ if (match(Op0, m_AShr(m_Value(X), m_APInt(ShiftC))) && ShiftC->ult(Width) &&
+ C->isMask(Width - ShiftC->getZExtValue()))
+ return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, *ShiftC));
+
const APInt *AddC;
if (match(Op0, m_Add(m_Value(X), m_APInt(AddC)))) {
// If we add zeros to every bit below a mask, the add has no effect:
@@ -1983,7 +1836,7 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
// ((C1 OP zext(X)) & C2) -> zext((C1 OP X) & C2) if C2 fits in the
// bitwidth of X and OP behaves well when given trunc(C1) and X.
- auto isSuitableBinOpcode = [](BinaryOperator *B) {
+ auto isNarrowableBinOpcode = [](BinaryOperator *B) {
switch (B->getOpcode()) {
case Instruction::Xor:
case Instruction::Or:
@@ -1996,22 +1849,125 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
}
};
BinaryOperator *BO;
- if (match(Op0, m_OneUse(m_BinOp(BO))) && isSuitableBinOpcode(BO)) {
+ if (match(Op0, m_OneUse(m_BinOp(BO))) && isNarrowableBinOpcode(BO)) {
+ Instruction::BinaryOps BOpcode = BO->getOpcode();
Value *X;
const APInt *C1;
// TODO: The one-use restrictions could be relaxed a little if the AND
// is going to be removed.
+ // Try to narrow the 'and' and a binop with constant operand:
+ // and (bo (zext X), C1), C --> zext (and (bo X, TruncC1), TruncC)
if (match(BO, m_c_BinOp(m_OneUse(m_ZExt(m_Value(X))), m_APInt(C1))) &&
C->isIntN(X->getType()->getScalarSizeInBits())) {
unsigned XWidth = X->getType()->getScalarSizeInBits();
Constant *TruncC1 = ConstantInt::get(X->getType(), C1->trunc(XWidth));
Value *BinOp = isa<ZExtInst>(BO->getOperand(0))
- ? Builder.CreateBinOp(BO->getOpcode(), X, TruncC1)
- : Builder.CreateBinOp(BO->getOpcode(), TruncC1, X);
+ ? Builder.CreateBinOp(BOpcode, X, TruncC1)
+ : Builder.CreateBinOp(BOpcode, TruncC1, X);
Constant *TruncC = ConstantInt::get(X->getType(), C->trunc(XWidth));
Value *And = Builder.CreateAnd(BinOp, TruncC);
return new ZExtInst(And, Ty);
}
+
+ // Similar to above: if the mask matches the zext input width, then the
+ // 'and' can be eliminated, so we can truncate the other variable op:
+ // and (bo (zext X), Y), C --> zext (bo X, (trunc Y))
+ if (isa<Instruction>(BO->getOperand(0)) &&
+ match(BO->getOperand(0), m_OneUse(m_ZExt(m_Value(X)))) &&
+ C->isMask(X->getType()->getScalarSizeInBits())) {
+ Y = BO->getOperand(1);
+ Value *TrY = Builder.CreateTrunc(Y, X->getType(), Y->getName() + ".tr");
+ Value *NewBO =
+ Builder.CreateBinOp(BOpcode, X, TrY, BO->getName() + ".narrow");
+ return new ZExtInst(NewBO, Ty);
+ }
+ // and (bo Y, (zext X)), C --> zext (bo (trunc Y), X)
+ if (isa<Instruction>(BO->getOperand(1)) &&
+ match(BO->getOperand(1), m_OneUse(m_ZExt(m_Value(X)))) &&
+ C->isMask(X->getType()->getScalarSizeInBits())) {
+ Y = BO->getOperand(0);
+ Value *TrY = Builder.CreateTrunc(Y, X->getType(), Y->getName() + ".tr");
+ Value *NewBO =
+ Builder.CreateBinOp(BOpcode, TrY, X, BO->getName() + ".narrow");
+ return new ZExtInst(NewBO, Ty);
+ }
+ }
+
+ // This is intentionally placed after the narrowing transforms for
+ // efficiency (transform directly to the narrow logic op if possible).
+ // If the mask is only needed on one incoming arm, push the 'and' op up.
+ if (match(Op0, m_OneUse(m_Xor(m_Value(X), m_Value(Y)))) ||
+ match(Op0, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) {
+ APInt NotAndMask(~(*C));
+ BinaryOperator::BinaryOps BinOp = cast<BinaryOperator>(Op0)->getOpcode();
+ if (MaskedValueIsZero(X, NotAndMask, 0, &I)) {
+ // Not masking anything out for the LHS, move mask to RHS.
+ // and ({x}or X, Y), C --> {x}or X, (and Y, C)
+ Value *NewRHS = Builder.CreateAnd(Y, Op1, Y->getName() + ".masked");
+ return BinaryOperator::Create(BinOp, X, NewRHS);
+ }
+ if (!isa<Constant>(Y) && MaskedValueIsZero(Y, NotAndMask, 0, &I)) {
+ // Not masking anything out for the RHS, move mask to LHS.
+ // and ({x}or X, Y), C --> {x}or (and X, C), Y
+ Value *NewLHS = Builder.CreateAnd(X, Op1, X->getName() + ".masked");
+ return BinaryOperator::Create(BinOp, NewLHS, Y);
+ }
+ }
+
+ // When the mask is a power-of-2 constant and op0 is a shifted-power-of-2
+ // constant, test if the shift amount equals the offset bit index:
+ // (ShiftC << X) & C --> X == (log2(C) - log2(ShiftC)) ? C : 0
+ // (ShiftC >> X) & C --> X == (log2(ShiftC) - log2(C)) ? C : 0
+ if (C->isPowerOf2() &&
+ match(Op0, m_OneUse(m_LogicalShift(m_Power2(ShiftC), m_Value(X))))) {
+ int Log2ShiftC = ShiftC->exactLogBase2();
+ int Log2C = C->exactLogBase2();
+ bool IsShiftLeft =
+ cast<BinaryOperator>(Op0)->getOpcode() == Instruction::Shl;
+ int BitNum = IsShiftLeft ? Log2C - Log2ShiftC : Log2ShiftC - Log2C;
+ assert(BitNum >= 0 && "Expected demanded bits to handle impossible mask");
+ Value *Cmp = Builder.CreateICmpEQ(X, ConstantInt::get(Ty, BitNum));
+ return SelectInst::Create(Cmp, ConstantInt::get(Ty, *C),
+ ConstantInt::getNullValue(Ty));
+ }
+
+ Constant *C1, *C2;
+ const APInt *C3 = C;
+ Value *X;
+ if (C3->isPowerOf2()) {
+ Constant *Log2C3 = ConstantInt::get(Ty, C3->countTrailingZeros());
+ if (match(Op0, m_OneUse(m_LShr(m_Shl(m_ImmConstant(C1), m_Value(X)),
+ m_ImmConstant(C2)))) &&
+ match(C1, m_Power2())) {
+ Constant *Log2C1 = ConstantExpr::getExactLogBase2(C1);
+ Constant *LshrC = ConstantExpr::getAdd(C2, Log2C3);
+ KnownBits KnownLShrc = computeKnownBits(LshrC, 0, nullptr);
+ if (KnownLShrc.getMaxValue().ult(Width)) {
+ // iff C1,C3 is pow2 and C2 + cttz(C3) < BitWidth:
+ // ((C1 << X) >> C2) & C3 -> X == (cttz(C3)+C2-cttz(C1)) ? C3 : 0
+ Constant *CmpC = ConstantExpr::getSub(LshrC, Log2C1);
+ Value *Cmp = Builder.CreateICmpEQ(X, CmpC);
+ return SelectInst::Create(Cmp, ConstantInt::get(Ty, *C3),
+ ConstantInt::getNullValue(Ty));
+ }
+ }
+
+ if (match(Op0, m_OneUse(m_Shl(m_LShr(m_ImmConstant(C1), m_Value(X)),
+ m_ImmConstant(C2)))) &&
+ match(C1, m_Power2())) {
+ Constant *Log2C1 = ConstantExpr::getExactLogBase2(C1);
+ Constant *Cmp =
+ ConstantExpr::getCompare(ICmpInst::ICMP_ULT, Log2C3, C2);
+ if (Cmp->isZeroValue()) {
+ // iff C1,C3 is pow2 and Log2(C3) >= C2:
+ // ((C1 >> X) << C2) & C3 -> X == (cttz(C1)+C2-cttz(C3)) ? C3 : 0
+ Constant *ShlC = ConstantExpr::getAdd(C2, Log2C1);
+ Constant *CmpC = ConstantExpr::getSub(ShlC, Log2C3);
+ Value *Cmp = Builder.CreateICmpEQ(X, CmpC);
+ return SelectInst::Create(Cmp, ConstantInt::get(Ty, *C3),
+ ConstantInt::getNullValue(Ty));
+ }
+ }
}
}
@@ -2121,32 +2077,50 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
ICmpInst *LHS = dyn_cast<ICmpInst>(Op0);
ICmpInst *RHS = dyn_cast<ICmpInst>(Op1);
if (LHS && RHS)
- if (Value *Res = foldAndOfICmps(LHS, RHS, I))
+ if (Value *Res = foldAndOrOfICmps(LHS, RHS, I, /* IsAnd */ true))
return replaceInstUsesWith(I, Res);
// TODO: Make this recursive; it's a little tricky because an arbitrary
// number of 'and' instructions might have to be created.
- if (LHS && match(Op1, m_OneUse(m_And(m_Value(X), m_Value(Y))))) {
+ if (LHS && match(Op1, m_OneUse(m_LogicalAnd(m_Value(X), m_Value(Y))))) {
+ bool IsLogical = isa<SelectInst>(Op1);
+ // LHS & (X && Y) --> (LHS && X) && Y
if (auto *Cmp = dyn_cast<ICmpInst>(X))
- if (Value *Res = foldAndOfICmps(LHS, Cmp, I))
- return replaceInstUsesWith(I, Builder.CreateAnd(Res, Y));
+ if (Value *Res =
+ foldAndOrOfICmps(LHS, Cmp, I, /* IsAnd */ true, IsLogical))
+ return replaceInstUsesWith(I, IsLogical
+ ? Builder.CreateLogicalAnd(Res, Y)
+ : Builder.CreateAnd(Res, Y));
+ // LHS & (X && Y) --> X && (LHS & Y)
if (auto *Cmp = dyn_cast<ICmpInst>(Y))
- if (Value *Res = foldAndOfICmps(LHS, Cmp, I))
- return replaceInstUsesWith(I, Builder.CreateAnd(Res, X));
- }
- if (RHS && match(Op0, m_OneUse(m_And(m_Value(X), m_Value(Y))))) {
+ if (Value *Res = foldAndOrOfICmps(LHS, Cmp, I, /* IsAnd */ true,
+ /* IsLogical */ false))
+ return replaceInstUsesWith(I, IsLogical
+ ? Builder.CreateLogicalAnd(X, Res)
+ : Builder.CreateAnd(X, Res));
+ }
+ if (RHS && match(Op0, m_OneUse(m_LogicalAnd(m_Value(X), m_Value(Y))))) {
+ bool IsLogical = isa<SelectInst>(Op0);
+ // (X && Y) & RHS --> (X && RHS) && Y
if (auto *Cmp = dyn_cast<ICmpInst>(X))
- if (Value *Res = foldAndOfICmps(Cmp, RHS, I))
- return replaceInstUsesWith(I, Builder.CreateAnd(Res, Y));
+ if (Value *Res =
+ foldAndOrOfICmps(Cmp, RHS, I, /* IsAnd */ true, IsLogical))
+ return replaceInstUsesWith(I, IsLogical
+ ? Builder.CreateLogicalAnd(Res, Y)
+ : Builder.CreateAnd(Res, Y));
+ // (X && Y) & RHS --> X && (Y & RHS)
if (auto *Cmp = dyn_cast<ICmpInst>(Y))
- if (Value *Res = foldAndOfICmps(Cmp, RHS, I))
- return replaceInstUsesWith(I, Builder.CreateAnd(Res, X));
+ if (Value *Res = foldAndOrOfICmps(Cmp, RHS, I, /* IsAnd */ true,
+ /* IsLogical */ false))
+ return replaceInstUsesWith(I, IsLogical
+ ? Builder.CreateLogicalAnd(X, Res)
+ : Builder.CreateAnd(X, Res));
}
}
if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
- if (Value *Res = foldLogicOfFCmps(LHS, RHS, true))
+ if (Value *Res = foldLogicOfFCmps(LHS, RHS, /*IsAnd*/ true))
return replaceInstUsesWith(I, Res);
if (Instruction *FoldedFCmps = reassociateFCmps(I, Builder))
@@ -2175,18 +2149,16 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
unsigned FullShift = Ty->getScalarSizeInBits() - 1;
if (match(&I, m_c_And(m_OneUse(m_AShr(m_Value(X), m_SpecificInt(FullShift))),
m_Value(Y)))) {
- Constant *Zero = ConstantInt::getNullValue(Ty);
- Value *Cmp = Builder.CreateICmpSLT(X, Zero, "isneg");
- return SelectInst::Create(Cmp, Y, Zero);
+ Value *IsNeg = Builder.CreateIsNeg(X, "isneg");
+ return SelectInst::Create(IsNeg, Y, ConstantInt::getNullValue(Ty));
}
// If there's a 'not' of the shifted value, swap the select operands:
// ~(iN X s>> (N-1)) & Y --> (X s< 0) ? 0 : Y
if (match(&I, m_c_And(m_OneUse(m_Not(
m_AShr(m_Value(X), m_SpecificInt(FullShift)))),
m_Value(Y)))) {
- Constant *Zero = ConstantInt::getNullValue(Ty);
- Value *Cmp = Builder.CreateICmpSLT(X, Zero, "isneg");
- return SelectInst::Create(Cmp, Zero, Y);
+ Value *IsNeg = Builder.CreateIsNeg(X, "isneg");
+ return SelectInst::Create(IsNeg, ConstantInt::getNullValue(Ty), Y);
}
// (~x) & y --> ~(x | (~y)) iff that gets rid of inversions
@@ -2482,8 +2454,12 @@ Value *InstCombinerImpl::matchSelectFromAndOr(Value *A, Value *C, Value *B,
// not create unnecessary casts if the types already match.
Type *SelTy = A->getType();
if (auto *VecTy = dyn_cast<VectorType>(Cond->getType())) {
+ // For a fixed or scalable vector get N from <{vscale x} N x iM>
unsigned Elts = VecTy->getElementCount().getKnownMinValue();
- Type *EltTy = Builder.getIntNTy(SelTy->getPrimitiveSizeInBits() / Elts);
+ // For a fixed or scalable vector, get the size in bits of N x iM; for a
+ // scalar this is just M.
+ unsigned SelEltSize = SelTy->getPrimitiveSizeInBits().getKnownMinSize();
+ Type *EltTy = Builder.getIntNTy(SelEltSize / Elts);
SelTy = VectorType::get(EltTy, VecTy->getElementCount());
}
Value *BitcastC = Builder.CreateBitCast(C, SelTy);
@@ -2495,15 +2471,46 @@ Value *InstCombinerImpl::matchSelectFromAndOr(Value *A, Value *C, Value *B,
return nullptr;
}
-/// Fold (icmp)|(icmp) if possible.
-Value *InstCombinerImpl::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
- BinaryOperator &Or) {
- const SimplifyQuery Q = SQ.getWithInstruction(&Or);
+// (icmp eq X, 0) | (icmp ult Other, X) -> (icmp ule Other, X-1)
+// (icmp ne X, 0) & (icmp uge Other, X) -> (icmp ugt Other, X-1)
+Value *foldAndOrOfICmpEqZeroAndICmp(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
+ IRBuilderBase &Builder) {
+ ICmpInst::Predicate LPred =
+ IsAnd ? LHS->getInversePredicate() : LHS->getPredicate();
+ ICmpInst::Predicate RPred =
+ IsAnd ? RHS->getInversePredicate() : RHS->getPredicate();
+ Value *LHS0 = LHS->getOperand(0);
+ if (LPred != ICmpInst::ICMP_EQ || !match(LHS->getOperand(1), m_Zero()) ||
+ !LHS0->getType()->isIntOrIntVectorTy() ||
+ !(LHS->hasOneUse() || RHS->hasOneUse()))
+ return nullptr;
+
+ Value *Other;
+ if (RPred == ICmpInst::ICMP_ULT && RHS->getOperand(1) == LHS0)
+ Other = RHS->getOperand(0);
+ else if (RPred == ICmpInst::ICMP_UGT && RHS->getOperand(0) == LHS0)
+ Other = RHS->getOperand(1);
+ else
+ return nullptr;
+
+ return Builder.CreateICmp(
+ IsAnd ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_UGE,
+ Builder.CreateAdd(LHS0, Constant::getAllOnesValue(LHS0->getType())),
+ Other);
+}
+
+/// Fold (icmp)&(icmp) or (icmp)|(icmp) if possible.
+/// If IsLogical is true, then the and/or is in select form and the transform
+/// must be poison-safe.
+Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
+ Instruction &I, bool IsAnd,
+ bool IsLogical) {
+ const SimplifyQuery Q = SQ.getWithInstruction(&I);
// Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2)
+ // Fold (!iszero(A & K1) & !iszero(A & K2)) -> (A & (K1 | K2)) == (K1 | K2)
// if K1 and K2 are a one-bit mask.
- if (Value *V = foldAndOrOfICmpsOfAndWithPow2(LHS, RHS, &Or,
- /* IsAnd */ false))
+ if (Value *V = foldAndOrOfICmpsOfAndWithPow2(LHS, RHS, &I, IsAnd, IsLogical))
return V;
ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
@@ -2513,64 +2520,16 @@ Value *InstCombinerImpl::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
match(LHS1, m_APInt(LHSC));
match(RHS1, m_APInt(RHSC));
- // Fold (icmp ult/ule (A + C1), C3) | (icmp ult/ule (A + C2), C3)
- // --> (icmp ult/ule ((A & ~(C1 ^ C2)) + max(C1, C2)), C3)
- // The original condition actually refers to the following two ranges:
- // [MAX_UINT-C1+1, MAX_UINT-C1+1+C3] and [MAX_UINT-C2+1, MAX_UINT-C2+1+C3]
- // We can fold these two ranges if:
- // 1) C1 and C2 is unsigned greater than C3.
- // 2) The two ranges are separated.
- // 3) C1 ^ C2 is one-bit mask.
- // 4) LowRange1 ^ LowRange2 and HighRange1 ^ HighRange2 are one-bit mask.
- // This implies all values in the two ranges differ by exactly one bit.
- if ((PredL == ICmpInst::ICMP_ULT || PredL == ICmpInst::ICMP_ULE) &&
- PredL == PredR && LHSC && RHSC && LHS->hasOneUse() && RHS->hasOneUse() &&
- LHSC->getBitWidth() == RHSC->getBitWidth() && *LHSC == *RHSC) {
-
- Value *AddOpnd;
- const APInt *LAddC, *RAddC;
- if (match(LHS0, m_Add(m_Value(AddOpnd), m_APInt(LAddC))) &&
- match(RHS0, m_Add(m_Specific(AddOpnd), m_APInt(RAddC))) &&
- LAddC->ugt(*LHSC) && RAddC->ugt(*LHSC)) {
-
- APInt DiffC = *LAddC ^ *RAddC;
- if (DiffC.isPowerOf2()) {
- const APInt *MaxAddC = nullptr;
- if (LAddC->ult(*RAddC))
- MaxAddC = RAddC;
- else
- MaxAddC = LAddC;
-
- APInt RRangeLow = -*RAddC;
- APInt RRangeHigh = RRangeLow + *LHSC;
- APInt LRangeLow = -*LAddC;
- APInt LRangeHigh = LRangeLow + *LHSC;
- APInt LowRangeDiff = RRangeLow ^ LRangeLow;
- APInt HighRangeDiff = RRangeHigh ^ LRangeHigh;
- APInt RangeDiff = LRangeLow.sgt(RRangeLow) ? LRangeLow - RRangeLow
- : RRangeLow - LRangeLow;
-
- if (LowRangeDiff.isPowerOf2() && LowRangeDiff == HighRangeDiff &&
- RangeDiff.ugt(*LHSC)) {
- Type *Ty = AddOpnd->getType();
- Value *MaskC = ConstantInt::get(Ty, ~DiffC);
-
- Value *NewAnd = Builder.CreateAnd(AddOpnd, MaskC);
- Value *NewAdd = Builder.CreateAdd(NewAnd,
- ConstantInt::get(Ty, *MaxAddC));
- return Builder.CreateICmp(LHS->getPredicate(), NewAdd,
- ConstantInt::get(Ty, *LHSC));
- }
- }
- }
- }
-
// (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B)
+ // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B)
if (predicatesFoldable(PredL, PredR)) {
- if (LHS0 == RHS1 && LHS1 == RHS0)
- LHS->swapOperands();
+ if (LHS0 == RHS1 && LHS1 == RHS0) {
+ PredL = ICmpInst::getSwappedPredicate(PredL);
+ std::swap(LHS0, LHS1);
+ }
if (LHS0 == RHS0 && LHS1 == RHS1) {
- unsigned Code = getICmpCode(LHS) | getICmpCode(RHS);
+ unsigned Code = IsAnd ? getICmpCode(PredL) & getICmpCode(PredR)
+ : getICmpCode(PredL) | getICmpCode(PredR);
bool IsSigned = LHS->isSigned() || RHS->isSigned();
return getNewICmpValue(Code, IsSigned, LHS0, LHS1, Builder);
}
@@ -2578,68 +2537,70 @@ Value *InstCombinerImpl::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
// handle (roughly):
// (icmp ne (A & B), C) | (icmp ne (A & D), E)
- if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, false, Builder))
+ // (icmp eq (A & B), C) & (icmp eq (A & D), E)
+ if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, IsAnd, IsLogical, Builder))
return V;
- if (LHS->hasOneUse() || RHS->hasOneUse()) {
- // (icmp eq B, 0) | (icmp ult A, B) -> (icmp ule A, B-1)
- // (icmp eq B, 0) | (icmp ugt B, A) -> (icmp ule A, B-1)
- Value *A = nullptr, *B = nullptr;
- if (PredL == ICmpInst::ICMP_EQ && match(LHS1, m_Zero())) {
- B = LHS0;
- if (PredR == ICmpInst::ICMP_ULT && LHS0 == RHS1)
- A = RHS0;
- else if (PredR == ICmpInst::ICMP_UGT && LHS0 == RHS0)
- A = RHS1;
- }
- // (icmp ult A, B) | (icmp eq B, 0) -> (icmp ule A, B-1)
- // (icmp ugt B, A) | (icmp eq B, 0) -> (icmp ule A, B-1)
- else if (PredR == ICmpInst::ICMP_EQ && match(RHS1, m_Zero())) {
- B = RHS0;
- if (PredL == ICmpInst::ICMP_ULT && RHS0 == LHS1)
- A = LHS0;
- else if (PredL == ICmpInst::ICMP_UGT && RHS0 == LHS0)
- A = LHS1;
- }
- if (A && B && B->getType()->isIntOrIntVectorTy())
- return Builder.CreateICmp(
- ICmpInst::ICMP_UGE,
- Builder.CreateAdd(B, Constant::getAllOnesValue(B->getType())), A);
- }
-
- if (Value *V = foldAndOrOfICmpsWithConstEq(LHS, RHS, Or, Builder, Q))
+ // TODO: One of these directions is fine with logical and/or, the other could
+ // be supported by inserting freeze.
+ if (!IsLogical) {
+ if (Value *V = foldAndOrOfICmpEqZeroAndICmp(LHS, RHS, IsAnd, Builder))
+ return V;
+ if (Value *V = foldAndOrOfICmpEqZeroAndICmp(RHS, LHS, IsAnd, Builder))
+ return V;
+ }
+
+ // TODO: Verify whether this is safe for logical and/or.
+ if (!IsLogical) {
+ if (Value *V = foldAndOrOfICmpsWithConstEq(LHS, RHS, IsAnd, Builder, Q))
+ return V;
+ if (Value *V = foldAndOrOfICmpsWithConstEq(RHS, LHS, IsAnd, Builder, Q))
+ return V;
+ }
+
+ if (Value *V = foldIsPowerOf2OrZero(LHS, RHS, IsAnd, Builder))
return V;
- if (Value *V = foldAndOrOfICmpsWithConstEq(RHS, LHS, Or, Builder, Q))
+ if (Value *V = foldIsPowerOf2OrZero(RHS, LHS, IsAnd, Builder))
return V;
- // E.g. (icmp slt x, 0) | (icmp sgt x, n) --> icmp ugt x, n
- if (Value *V = simplifyRangeCheck(LHS, RHS, /*Inverted=*/true))
- return V;
+ // TODO: One of these directions is fine with logical and/or, the other could
+ // be supported by inserting freeze.
+ if (!IsLogical) {
+ // E.g. (icmp slt x, 0) | (icmp sgt x, n) --> icmp ugt x, n
+ // E.g. (icmp sge x, 0) & (icmp slt x, n) --> icmp ult x, n
+ if (Value *V = simplifyRangeCheck(LHS, RHS, /*Inverted=*/!IsAnd))
+ return V;
- // E.g. (icmp sgt x, n) | (icmp slt x, 0) --> icmp ugt x, n
- if (Value *V = simplifyRangeCheck(RHS, LHS, /*Inverted=*/true))
- return V;
+ // E.g. (icmp sgt x, n) | (icmp slt x, 0) --> icmp ugt x, n
+ // E.g. (icmp slt x, n) & (icmp sge x, 0) --> icmp ult x, n
+ if (Value *V = simplifyRangeCheck(RHS, LHS, /*Inverted=*/!IsAnd))
+ return V;
+ }
- if (Value *V = foldAndOrOfEqualityCmpsWithConstants(LHS, RHS, false, Builder))
- return V;
+ // TODO: Add conjugated or fold, check whether it is safe for logical and/or.
+ if (IsAnd && !IsLogical)
+ if (Value *V = foldSignedTruncationCheck(LHS, RHS, I, Builder))
+ return V;
- if (Value *V = foldIsPowerOf2(LHS, RHS, false /* JoinedByAnd */, Builder))
+ if (Value *V = foldIsPowerOf2(LHS, RHS, IsAnd, Builder))
return V;
- if (Value *X =
- foldUnsignedUnderflowCheck(LHS, RHS, /*IsAnd=*/false, Q, Builder))
- return X;
- if (Value *X =
- foldUnsignedUnderflowCheck(RHS, LHS, /*IsAnd=*/false, Q, Builder))
- return X;
+ // TODO: Verify whether this is safe for logical and/or.
+ if (!IsLogical) {
+ if (Value *X = foldUnsignedUnderflowCheck(LHS, RHS, IsAnd, Q, Builder))
+ return X;
+ if (Value *X = foldUnsignedUnderflowCheck(RHS, LHS, IsAnd, Q, Builder))
+ return X;
+ }
- if (Value *X = foldEqOfParts(LHS, RHS, /*IsAnd=*/false))
+ if (Value *X = foldEqOfParts(LHS, RHS, IsAnd))
return X;
// (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0)
+ // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0)
// TODO: Remove this when foldLogOpOfMaskedICmps can handle undefs.
- if (PredL == ICmpInst::ICMP_NE && match(LHS1, m_ZeroInt()) &&
- PredR == ICmpInst::ICMP_NE && match(RHS1, m_ZeroInt()) &&
+ if (!IsLogical && PredL == (IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE) &&
+ PredL == PredR && match(LHS1, m_ZeroInt()) && match(RHS1, m_ZeroInt()) &&
LHS0->getType() == RHS0->getType()) {
Value *NewOr = Builder.CreateOr(LHS0, RHS0);
return Builder.CreateICmp(PredL, NewOr,
@@ -2650,15 +2611,83 @@ Value *InstCombinerImpl::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
if (!LHSC || !RHSC)
return nullptr;
- return foldAndOrOfICmpsUsingRanges(PredL, LHS0, *LHSC, PredR, RHS0, *RHSC,
- Builder, /* IsAnd */ false);
+ // (trunc x) == C1 & (and x, CA) == C2 -> (and x, CA|CMAX) == C1|C2
+ // (trunc x) != C1 | (and x, CA) != C2 -> (and x, CA|CMAX) != C1|C2
+ // where CMAX is the all ones value for the truncated type,
+ // iff the lower bits of C2 and CA are zero.
+ if (PredL == (IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE) &&
+ PredL == PredR && LHS->hasOneUse() && RHS->hasOneUse()) {
+ Value *V;
+ const APInt *AndC, *SmallC = nullptr, *BigC = nullptr;
+
+ // (trunc x) == C1 & (and x, CA) == C2
+ // (and x, CA) == C2 & (trunc x) == C1
+ if (match(RHS0, m_Trunc(m_Value(V))) &&
+ match(LHS0, m_And(m_Specific(V), m_APInt(AndC)))) {
+ SmallC = RHSC;
+ BigC = LHSC;
+ } else if (match(LHS0, m_Trunc(m_Value(V))) &&
+ match(RHS0, m_And(m_Specific(V), m_APInt(AndC)))) {
+ SmallC = LHSC;
+ BigC = RHSC;
+ }
+
+ if (SmallC && BigC) {
+ unsigned BigBitSize = BigC->getBitWidth();
+ unsigned SmallBitSize = SmallC->getBitWidth();
+
+ // Check that the low bits are zero.
+ APInt Low = APInt::getLowBitsSet(BigBitSize, SmallBitSize);
+ if ((Low & *AndC).isZero() && (Low & *BigC).isZero()) {
+ Value *NewAnd = Builder.CreateAnd(V, Low | *AndC);
+ APInt N = SmallC->zext(BigBitSize) | *BigC;
+ Value *NewVal = ConstantInt::get(NewAnd->getType(), N);
+ return Builder.CreateICmp(PredL, NewAnd, NewVal);
+ }
+ }
+ }
+
+ // Match naive pattern (and its inverted form) for checking if two values
+ // share same sign. An example of the pattern:
+ // (icmp slt (X & Y), 0) | (icmp sgt (X | Y), -1) -> (icmp sgt (X ^ Y), -1)
+ // Inverted form (example):
+ // (icmp slt (X | Y), 0) & (icmp sgt (X & Y), -1) -> (icmp slt (X ^ Y), 0)
+ bool TrueIfSignedL, TrueIfSignedR;
+ if (InstCombiner::isSignBitCheck(PredL, *LHSC, TrueIfSignedL) &&
+ InstCombiner::isSignBitCheck(PredR, *RHSC, TrueIfSignedR) &&
+ (RHS->hasOneUse() || LHS->hasOneUse())) {
+ Value *X, *Y;
+ if (IsAnd) {
+ if ((TrueIfSignedL && !TrueIfSignedR &&
+ match(LHS0, m_Or(m_Value(X), m_Value(Y))) &&
+ match(RHS0, m_c_And(m_Specific(X), m_Specific(Y)))) ||
+ (!TrueIfSignedL && TrueIfSignedR &&
+ match(LHS0, m_And(m_Value(X), m_Value(Y))) &&
+ match(RHS0, m_c_Or(m_Specific(X), m_Specific(Y))))) {
+ Value *NewXor = Builder.CreateXor(X, Y);
+ return Builder.CreateIsNeg(NewXor);
+ }
+ } else {
+ if ((TrueIfSignedL && !TrueIfSignedR &&
+ match(LHS0, m_And(m_Value(X), m_Value(Y))) &&
+ match(RHS0, m_c_Or(m_Specific(X), m_Specific(Y)))) ||
+ (!TrueIfSignedL && TrueIfSignedR &&
+ match(LHS0, m_Or(m_Value(X), m_Value(Y))) &&
+ match(RHS0, m_c_And(m_Specific(X), m_Specific(Y))))) {
+ Value *NewXor = Builder.CreateXor(X, Y);
+ return Builder.CreateIsNotNeg(NewXor);
+ }
+ }
+ }
+
+ return foldAndOrOfICmpsUsingRanges(LHS, RHS, IsAnd);
}
// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
// here. We should standardize that construct where it is needed or choose some
// other way to ensure that commutated variants of patterns are not missed.
Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
- if (Value *V = SimplifyOrInst(I.getOperand(0), I.getOperand(1),
+ if (Value *V = simplifyOrInst(I.getOperand(0), I.getOperand(1),
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
@@ -2824,6 +2853,14 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
if (match(Op1, m_Xor(m_Specific(B), m_Specific(A))))
return BinaryOperator::CreateOr(Op1, C);
+ // ((A & B) ^ C) | B -> C | B
+ if (match(Op0, m_c_Xor(m_c_And(m_Value(A), m_Specific(Op1)), m_Value(C))))
+ return BinaryOperator::CreateOr(C, Op1);
+
+ // B | ((A & B) ^ C) -> B | C
+ if (match(Op1, m_c_Xor(m_c_And(m_Value(A), m_Specific(Op0)), m_Value(C))))
+ return BinaryOperator::CreateOr(Op0, C);
+
// ((B | C) & A) | B -> B | (A & C)
if (match(Op0, m_And(m_Or(m_Specific(Op1), m_Value(C)), m_Value(A))))
return BinaryOperator::CreateOr(Op1, Builder.CreateAnd(A, C));
@@ -2885,33 +2922,51 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
ICmpInst *LHS = dyn_cast<ICmpInst>(Op0);
ICmpInst *RHS = dyn_cast<ICmpInst>(Op1);
if (LHS && RHS)
- if (Value *Res = foldOrOfICmps(LHS, RHS, I))
+ if (Value *Res = foldAndOrOfICmps(LHS, RHS, I, /* IsAnd */ false))
return replaceInstUsesWith(I, Res);
// TODO: Make this recursive; it's a little tricky because an arbitrary
// number of 'or' instructions might have to be created.
Value *X, *Y;
- if (LHS && match(Op1, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) {
+ if (LHS && match(Op1, m_OneUse(m_LogicalOr(m_Value(X), m_Value(Y))))) {
+ bool IsLogical = isa<SelectInst>(Op1);
+ // LHS | (X || Y) --> (LHS || X) || Y
if (auto *Cmp = dyn_cast<ICmpInst>(X))
- if (Value *Res = foldOrOfICmps(LHS, Cmp, I))
- return replaceInstUsesWith(I, Builder.CreateOr(Res, Y));
+ if (Value *Res =
+ foldAndOrOfICmps(LHS, Cmp, I, /* IsAnd */ false, IsLogical))
+ return replaceInstUsesWith(I, IsLogical
+ ? Builder.CreateLogicalOr(Res, Y)
+ : Builder.CreateOr(Res, Y));
+ // LHS | (X || Y) --> X || (LHS | Y)
if (auto *Cmp = dyn_cast<ICmpInst>(Y))
- if (Value *Res = foldOrOfICmps(LHS, Cmp, I))
- return replaceInstUsesWith(I, Builder.CreateOr(Res, X));
- }
- if (RHS && match(Op0, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) {
+ if (Value *Res = foldAndOrOfICmps(LHS, Cmp, I, /* IsAnd */ false,
+ /* IsLogical */ false))
+ return replaceInstUsesWith(I, IsLogical
+ ? Builder.CreateLogicalOr(X, Res)
+ : Builder.CreateOr(X, Res));
+ }
+ if (RHS && match(Op0, m_OneUse(m_LogicalOr(m_Value(X), m_Value(Y))))) {
+ bool IsLogical = isa<SelectInst>(Op0);
+ // (X || Y) | RHS --> (X || RHS) || Y
if (auto *Cmp = dyn_cast<ICmpInst>(X))
- if (Value *Res = foldOrOfICmps(Cmp, RHS, I))
- return replaceInstUsesWith(I, Builder.CreateOr(Res, Y));
+ if (Value *Res =
+ foldAndOrOfICmps(Cmp, RHS, I, /* IsAnd */ false, IsLogical))
+ return replaceInstUsesWith(I, IsLogical
+ ? Builder.CreateLogicalOr(Res, Y)
+ : Builder.CreateOr(Res, Y));
+ // (X || Y) | RHS --> X || (Y | RHS)
if (auto *Cmp = dyn_cast<ICmpInst>(Y))
- if (Value *Res = foldOrOfICmps(Cmp, RHS, I))
- return replaceInstUsesWith(I, Builder.CreateOr(Res, X));
+ if (Value *Res = foldAndOrOfICmps(Cmp, RHS, I, /* IsAnd */ false,
+ /* IsLogical */ false))
+ return replaceInstUsesWith(I, IsLogical
+ ? Builder.CreateLogicalOr(X, Res)
+ : Builder.CreateOr(X, Res));
}
}
if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
- if (Value *Res = foldLogicOfFCmps(LHS, RHS, false))
+ if (Value *Res = foldLogicOfFCmps(LHS, RHS, /*IsAnd*/ false))
return replaceInstUsesWith(I, Res);
if (Instruction *FoldedFCmps = reassociateFCmps(I, Builder))
@@ -3025,6 +3080,36 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
if (matchSimpleRecurrence(&I, PN, Start, Step) && DT.dominates(Step, PN))
return replaceInstUsesWith(I, Builder.CreateOr(Start, Step));
+ // (A & B) | (C | D) or (C | D) | (A & B)
+ // Can be combined if C or D is of type (A/B & X)
+ if (match(&I, m_c_Or(m_OneUse(m_And(m_Value(A), m_Value(B))),
+ m_OneUse(m_Or(m_Value(C), m_Value(D)))))) {
+ // (A & B) | (C | ?) -> C | (? | (A & B))
+ // (A & B) | (C | ?) -> C | (? | (A & B))
+ // (A & B) | (C | ?) -> C | (? | (A & B))
+ // (A & B) | (C | ?) -> C | (? | (A & B))
+ // (C | ?) | (A & B) -> C | (? | (A & B))
+ // (C | ?) | (A & B) -> C | (? | (A & B))
+ // (C | ?) | (A & B) -> C | (? | (A & B))
+ // (C | ?) | (A & B) -> C | (? | (A & B))
+ if (match(D, m_OneUse(m_c_And(m_Specific(A), m_Value()))) ||
+ match(D, m_OneUse(m_c_And(m_Specific(B), m_Value()))))
+ return BinaryOperator::CreateOr(
+ C, Builder.CreateOr(D, Builder.CreateAnd(A, B)));
+ // (A & B) | (? | D) -> (? | (A & B)) | D
+ // (A & B) | (? | D) -> (? | (A & B)) | D
+ // (A & B) | (? | D) -> (? | (A & B)) | D
+ // (A & B) | (? | D) -> (? | (A & B)) | D
+ // (? | D) | (A & B) -> (? | (A & B)) | D
+ // (? | D) | (A & B) -> (? | (A & B)) | D
+ // (? | D) | (A & B) -> (? | (A & B)) | D
+ // (? | D) | (A & B) -> (? | (A & B)) | D
+ if (match(C, m_OneUse(m_c_And(m_Specific(A), m_Value()))) ||
+ match(C, m_OneUse(m_c_And(m_Specific(B), m_Value()))))
+ return BinaryOperator::CreateOr(
+ Builder.CreateOr(C, Builder.CreateAnd(A, B)), D);
+ }
+
return nullptr;
}
@@ -3086,26 +3171,26 @@ Value *InstCombinerImpl::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS,
assert(I.getOpcode() == Instruction::Xor && I.getOperand(0) == LHS &&
I.getOperand(1) == RHS && "Should be 'xor' with these operands");
- if (predicatesFoldable(LHS->getPredicate(), RHS->getPredicate())) {
- if (LHS->getOperand(0) == RHS->getOperand(1) &&
- LHS->getOperand(1) == RHS->getOperand(0))
- LHS->swapOperands();
- if (LHS->getOperand(0) == RHS->getOperand(0) &&
- LHS->getOperand(1) == RHS->getOperand(1)) {
+ ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
+ Value *LHS0 = LHS->getOperand(0), *LHS1 = LHS->getOperand(1);
+ Value *RHS0 = RHS->getOperand(0), *RHS1 = RHS->getOperand(1);
+
+ if (predicatesFoldable(PredL, PredR)) {
+ if (LHS0 == RHS1 && LHS1 == RHS0) {
+ std::swap(LHS0, LHS1);
+ PredL = ICmpInst::getSwappedPredicate(PredL);
+ }
+ if (LHS0 == RHS0 && LHS1 == RHS1) {
// (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B)
- Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
- unsigned Code = getICmpCode(LHS) ^ getICmpCode(RHS);
+ unsigned Code = getICmpCode(PredL) ^ getICmpCode(PredR);
bool IsSigned = LHS->isSigned() || RHS->isSigned();
- return getNewICmpValue(Code, IsSigned, Op0, Op1, Builder);
+ return getNewICmpValue(Code, IsSigned, LHS0, LHS1, Builder);
}
}
// TODO: This can be generalized to compares of non-signbits using
// decomposeBitTestICmp(). It could be enhanced more by using (something like)
// foldLogOpOfMaskedICmps().
- ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
- Value *LHS0 = LHS->getOperand(0), *LHS1 = LHS->getOperand(1);
- Value *RHS0 = RHS->getOperand(0), *RHS1 = RHS->getOperand(1);
if ((LHS->hasOneUse() || RHS->hasOneUse()) &&
LHS0->getType() == RHS0->getType() &&
LHS0->getType()->isIntOrIntVectorTy()) {
@@ -3114,19 +3199,17 @@ Value *InstCombinerImpl::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS,
if ((PredL == CmpInst::ICMP_SGT && match(LHS1, m_AllOnes()) &&
PredR == CmpInst::ICMP_SGT && match(RHS1, m_AllOnes())) ||
(PredL == CmpInst::ICMP_SLT && match(LHS1, m_Zero()) &&
- PredR == CmpInst::ICMP_SLT && match(RHS1, m_Zero()))) {
- Value *Zero = ConstantInt::getNullValue(LHS0->getType());
- return Builder.CreateICmpSLT(Builder.CreateXor(LHS0, RHS0), Zero);
- }
+ PredR == CmpInst::ICMP_SLT && match(RHS1, m_Zero())))
+ return Builder.CreateIsNeg(Builder.CreateXor(LHS0, RHS0));
+
// (X > -1) ^ (Y < 0) --> (X ^ Y) > -1
// (X < 0) ^ (Y > -1) --> (X ^ Y) > -1
if ((PredL == CmpInst::ICMP_SGT && match(LHS1, m_AllOnes()) &&
PredR == CmpInst::ICMP_SLT && match(RHS1, m_Zero())) ||
(PredL == CmpInst::ICMP_SLT && match(LHS1, m_Zero()) &&
- PredR == CmpInst::ICMP_SGT && match(RHS1, m_AllOnes()))) {
- Value *MinusOne = ConstantInt::getAllOnesValue(LHS0->getType());
- return Builder.CreateICmpSGT(Builder.CreateXor(LHS0, RHS0), MinusOne);
- }
+ PredR == CmpInst::ICMP_SGT && match(RHS1, m_AllOnes())))
+ return Builder.CreateIsNotNeg(Builder.CreateXor(LHS0, RHS0));
+
}
// Instead of trying to imitate the folds for and/or, decompose this 'xor'
@@ -3135,10 +3218,10 @@ Value *InstCombinerImpl::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS,
//
// This is based on a truth table definition of xor:
// X ^ Y --> (X | Y) & !(X & Y)
- if (Value *OrICmp = SimplifyBinOp(Instruction::Or, LHS, RHS, SQ)) {
+ if (Value *OrICmp = simplifyBinOp(Instruction::Or, LHS, RHS, SQ)) {
// TODO: If OrICmp is true, then the definition of xor simplifies to !(X&Y).
// TODO: If OrICmp is false, the whole thing is false (InstSimplify?).
- if (Value *AndICmp = SimplifyBinOp(Instruction::And, LHS, RHS, SQ)) {
+ if (Value *AndICmp = simplifyBinOp(Instruction::And, LHS, RHS, SQ)) {
// TODO: Independently handle cases where the 'and' side is a constant.
ICmpInst *X = nullptr, *Y = nullptr;
if (OrICmp == LHS && AndICmp == RHS) {
@@ -3274,12 +3357,12 @@ static Instruction *canonicalizeAbs(BinaryOperator &Xor,
// Op1 = ashr i32 A, 31 ; smear the sign bit
// xor (add A, Op1), Op1 ; add -1 and flip bits if negative
// --> (A < 0) ? -A : A
- Value *Cmp = Builder.CreateICmpSLT(A, ConstantInt::getNullValue(Ty));
+ Value *IsNeg = Builder.CreateIsNeg(A);
// Copy the nuw/nsw flags from the add to the negate.
auto *Add = cast<BinaryOperator>(Op0);
- Value *Neg = Builder.CreateNeg(A, "", Add->hasNoUnsignedWrap(),
+ Value *NegA = Builder.CreateNeg(A, "", Add->hasNoUnsignedWrap(),
Add->hasNoSignedWrap());
- return SelectInst::Create(Cmp, Neg, A);
+ return SelectInst::Create(IsNeg, NegA, A);
}
return nullptr;
}
@@ -3465,51 +3548,7 @@ Instruction *InstCombinerImpl::foldNot(BinaryOperator &I) {
}
}
- // TODO: Remove folds if we canonicalize to intrinsics (see above).
- // Eliminate a bitwise 'not' op of 'not' min/max by inverting the min/max:
- //
- // %notx = xor i32 %x, -1
- // %cmp1 = icmp sgt i32 %notx, %y
- // %smax = select i1 %cmp1, i32 %notx, i32 %y
- // %res = xor i32 %smax, -1
- // =>
- // %noty = xor i32 %y, -1
- // %cmp2 = icmp slt %x, %noty
- // %res = select i1 %cmp2, i32 %x, i32 %noty
- //
- // Same is applicable for smin/umax/umin.
if (NotOp->hasOneUse()) {
- Value *LHS, *RHS;
- SelectPatternFlavor SPF = matchSelectPattern(NotOp, LHS, RHS).Flavor;
- if (SelectPatternResult::isMinOrMax(SPF)) {
- // It's possible we get here before the not has been simplified, so make
- // sure the input to the not isn't freely invertible.
- if (match(LHS, m_Not(m_Value(X))) && !isFreeToInvert(X, X->hasOneUse())) {
- Value *NotY = Builder.CreateNot(RHS);
- return SelectInst::Create(
- Builder.CreateICmp(getInverseMinMaxPred(SPF), X, NotY), X, NotY);
- }
-
- // It's possible we get here before the not has been simplified, so make
- // sure the input to the not isn't freely invertible.
- if (match(RHS, m_Not(m_Value(Y))) && !isFreeToInvert(Y, Y->hasOneUse())) {
- Value *NotX = Builder.CreateNot(LHS);
- return SelectInst::Create(
- Builder.CreateICmp(getInverseMinMaxPred(SPF), NotX, Y), NotX, Y);
- }
-
- // If both sides are freely invertible, then we can get rid of the xor
- // completely.
- if (isFreeToInvert(LHS, !LHS->hasNUsesOrMore(3)) &&
- isFreeToInvert(RHS, !RHS->hasNUsesOrMore(3))) {
- Value *NotLHS = Builder.CreateNot(LHS);
- Value *NotRHS = Builder.CreateNot(RHS);
- return SelectInst::Create(
- Builder.CreateICmp(getInverseMinMaxPred(SPF), NotLHS, NotRHS),
- NotLHS, NotRHS);
- }
- }
-
// Pull 'not' into operands of select if both operands are one-use compares
// or one is one-use compare and the other one is a constant.
// Inverting the predicates eliminates the 'not' operation.
@@ -3549,7 +3588,7 @@ Instruction *InstCombinerImpl::foldNot(BinaryOperator &I) {
// here. We should standardize that construct where it is needed or choose some
// other way to ensure that commutated variants of patterns are not missed.
Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
- if (Value *V = SimplifyXorInst(I.getOperand(0), I.getOperand(1),
+ if (Value *V = simplifyXorInst(I.getOperand(0), I.getOperand(1),
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
@@ -3596,8 +3635,20 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
Value *X, *Y;
Constant *C1;
if (match(Op1, m_Constant(C1))) {
- // Use DeMorgan and reassociation to eliminate a 'not' op.
Constant *C2;
+
+ if (match(Op0, m_OneUse(m_Or(m_Value(X), m_ImmConstant(C2)))) &&
+ match(C1, m_ImmConstant())) {
+ // (X | C2) ^ C1 --> (X & ~C2) ^ (C1^C2)
+ C2 = Constant::replaceUndefsWith(
+ C2, Constant::getAllOnesValue(C2->getType()->getScalarType()));
+ Value *And = Builder.CreateAnd(
+ X, Constant::mergeUndefsWith(ConstantExpr::getNot(C2), C1));
+ return BinaryOperator::CreateXor(
+ And, Constant::mergeUndefsWith(ConstantExpr::getXor(C1, C2), C1));
+ }
+
+ // Use DeMorgan and reassociation to eliminate a 'not' op.
if (match(Op0, m_OneUse(m_Or(m_Not(m_Value(X)), m_Constant(C2))))) {
// (~X | C2) ^ C1 --> ((X & ~C2) ^ -1) ^ C1 --> (X & ~C2) ^ ~C1
Value *And = Builder.CreateAnd(X, ConstantExpr::getNot(C2));
@@ -3619,9 +3670,8 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
*CA == X->getType()->getScalarSizeInBits() - 1 &&
!match(C1, m_AllOnes())) {
assert(!C1->isZeroValue() && "Unexpected xor with 0");
- Value *ICmp =
- Builder.CreateICmpSGT(X, Constant::getAllOnesValue(X->getType()));
- return SelectInst::Create(ICmp, Op1, Builder.CreateNot(Op1));
+ Value *IsNotNeg = Builder.CreateIsNotNeg(X);
+ return SelectInst::Create(IsNotNeg, Op1, Builder.CreateNot(Op1));
}
}
@@ -3677,9 +3727,8 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
APInt FoldConst = C1->getValue().lshr(C2->getValue());
FoldConst ^= C3->getValue();
// Prepare the two operands.
- auto *Opnd0 = cast<Instruction>(Builder.CreateLShr(X, C2));
- Opnd0->takeName(cast<Instruction>(Op0));
- Opnd0->setDebugLoc(I.getDebugLoc());
+ auto *Opnd0 = Builder.CreateLShr(X, C2);
+ Opnd0->takeName(Op0);
return BinaryOperator::CreateXor(Opnd0, ConstantInt::get(Ty, FoldConst));
}
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp
index 495493aab4b5..2540e545ae4d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp
@@ -12,7 +12,6 @@
#include "InstCombineInternal.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/Transforms/InstCombine/InstCombiner.h"
using namespace llvm;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 05b28328afbf..67ef2e895b6c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -15,21 +15,18 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/FloatingPointMode.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumeBundleQueries.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Attributes.h"
@@ -74,7 +71,6 @@
#include <algorithm>
#include <cassert>
#include <cstdint>
-#include <cstring>
#include <utility>
#include <vector>
@@ -108,6 +104,19 @@ static Type *getPromotedType(Type *Ty) {
return Ty;
}
+/// Recognize a memcpy/memmove from a trivially otherwise unused alloca.
+/// TODO: This should probably be integrated with visitAllocSites, but that
+/// requires a deeper change to allow either unread or unwritten objects.
+static bool hasUndefSource(AnyMemTransferInst *MI) {
+ auto *Src = MI->getRawSource();
+ while (isa<GetElementPtrInst>(Src) || isa<BitCastInst>(Src)) {
+ if (!Src->hasOneUse())
+ return false;
+ Src = cast<Instruction>(Src)->getOperand(0);
+ }
+ return isa<AllocaInst>(Src) && Src->hasOneUse();
+}
+
Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
Align DstAlign = getKnownAlignment(MI->getRawDest(), DL, MI, &AC, &DT);
MaybeAlign CopyDstAlign = MI->getDestAlign();
@@ -132,6 +141,14 @@ Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
return MI;
}
+ // If the source is provably undef, the memcpy/memmove doesn't do anything
+ // (unless the transfer is volatile).
+ if (hasUndefSource(MI) && !MI->isVolatile()) {
+ // Set the size of the copy to 0, it will be deleted on the next iteration.
+ MI->setLength(Constant::getNullValue(MI->getLength()->getType()));
+ return MI;
+ }
+
// If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
// load/store.
ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getLength());
@@ -241,6 +258,15 @@ Instruction *InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst *MI) {
return MI;
}
+ // Remove memset with an undef value.
+ // FIXME: This is technically incorrect because it might overwrite a poison
+ // value. Change to PoisonValue once #52930 is resolved.
+ if (isa<UndefValue>(MI->getValue())) {
+ // Set the size of the copy to 0, it will be deleted on the next iteration.
+ MI->setLength(Constant::getNullValue(MI->getLength()->getType()));
+ return MI;
+ }
+
// Extract the length and alignment and fill if they are constant.
ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
@@ -248,7 +274,7 @@ Instruction *InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst *MI) {
return nullptr;
const uint64_t Len = LenC->getLimitedValue();
assert(Len && "0-sized memory setting should be removed already.");
- const Align Alignment = assumeAligned(MI->getDestAlignment());
+ const Align Alignment = MI->getDestAlign().valueOrOne();
// If it is an atomic and alignment is less than the size then we will
// introduce the unaligned memory access which will be later transformed
@@ -769,7 +795,7 @@ static CallInst *canonicalizeConstantArg0ToArg1(CallInst &Call) {
/// \p Result and a constant \p Overflow value.
static Instruction *createOverflowTuple(IntrinsicInst *II, Value *Result,
Constant *Overflow) {
- Constant *V[] = {UndefValue::get(Result->getType()), Overflow};
+ Constant *V[] = {PoisonValue::get(Result->getType()), Overflow};
StructType *ST = cast<StructType>(II->getType());
Constant *Struct = ConstantStruct::get(ST, V);
return InsertValueInst::Create(Struct, Result, 0);
@@ -795,6 +821,10 @@ static Optional<bool> getKnownSign(Value *Op, Instruction *CxtI,
if (Known.isNegative())
return true;
+ Value *X, *Y;
+ if (match(Op, m_NSWSub(m_Value(X), m_Value(Y))))
+ return isImpliedByDomCondition(ICmpInst::ICMP_SLT, X, Y, CxtI, DL);
+
return isImpliedByDomCondition(
ICmpInst::ICMP_SLT, Op, Constant::getNullValue(Op->getType()), CxtI, DL);
}
@@ -837,6 +867,67 @@ static Instruction *moveAddAfterMinMax(IntrinsicInst *II,
return IsSigned ? BinaryOperator::CreateNSWAdd(NewMinMax, Add->getOperand(1))
: BinaryOperator::CreateNUWAdd(NewMinMax, Add->getOperand(1));
}
+/// Match a sadd_sat or ssub_sat which is using min/max to clamp the value.
+Instruction *InstCombinerImpl::matchSAddSubSat(IntrinsicInst &MinMax1) {
+ Type *Ty = MinMax1.getType();
+
+ // We are looking for a tree of:
+ // max(INT_MIN, min(INT_MAX, add(sext(A), sext(B))))
+ // Where the min and max could be reversed
+ Instruction *MinMax2;
+ BinaryOperator *AddSub;
+ const APInt *MinValue, *MaxValue;
+ if (match(&MinMax1, m_SMin(m_Instruction(MinMax2), m_APInt(MaxValue)))) {
+ if (!match(MinMax2, m_SMax(m_BinOp(AddSub), m_APInt(MinValue))))
+ return nullptr;
+ } else if (match(&MinMax1,
+ m_SMax(m_Instruction(MinMax2), m_APInt(MinValue)))) {
+ if (!match(MinMax2, m_SMin(m_BinOp(AddSub), m_APInt(MaxValue))))
+ return nullptr;
+ } else
+ return nullptr;
+
+ // Check that the constants clamp a saturate, and that the new type would be
+ // sensible to convert to.
+ if (!(*MaxValue + 1).isPowerOf2() || -*MinValue != *MaxValue + 1)
+ return nullptr;
+ // In what bitwidth can this be treated as saturating arithmetics?
+ unsigned NewBitWidth = (*MaxValue + 1).logBase2() + 1;
+ // FIXME: This isn't quite right for vectors, but using the scalar type is a
+ // good first approximation for what should be done there.
+ if (!shouldChangeType(Ty->getScalarType()->getIntegerBitWidth(), NewBitWidth))
+ return nullptr;
+
+ // Also make sure that the inner min/max and the add/sub have one use.
+ if (!MinMax2->hasOneUse() || !AddSub->hasOneUse())
+ return nullptr;
+
+ // Create the new type (which can be a vector type)
+ Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth);
+
+ Intrinsic::ID IntrinsicID;
+ if (AddSub->getOpcode() == Instruction::Add)
+ IntrinsicID = Intrinsic::sadd_sat;
+ else if (AddSub->getOpcode() == Instruction::Sub)
+ IntrinsicID = Intrinsic::ssub_sat;
+ else
+ return nullptr;
+
+ // The two operands of the add/sub must be nsw-truncatable to the NewTy. This
+ // is usually achieved via a sext from a smaller type.
+ if (ComputeMaxSignificantBits(AddSub->getOperand(0), 0, AddSub) >
+ NewBitWidth ||
+ ComputeMaxSignificantBits(AddSub->getOperand(1), 0, AddSub) > NewBitWidth)
+ return nullptr;
+
+ // Finally create and return the sat intrinsic, truncated to the new type
+ Function *F = Intrinsic::getDeclaration(MinMax1.getModule(), IntrinsicID, NewTy);
+ Value *AT = Builder.CreateTrunc(AddSub->getOperand(0), NewTy);
+ Value *BT = Builder.CreateTrunc(AddSub->getOperand(1), NewTy);
+ Value *Sat = Builder.CreateCall(F, {AT, BT});
+ return CastInst::Create(Instruction::SExt, Sat, Ty);
+}
+
/// If we have a clamp pattern like max (min X, 42), 41 -- where the output
/// can only be one of two possible constant values -- turn that into a select
@@ -879,6 +970,59 @@ static Instruction *foldClampRangeOfTwo(IntrinsicInst *II,
return SelectInst::Create(Cmp, ConstantInt::get(II->getType(), *C0), I1);
}
+/// If this min/max has a constant operand and an operand that is a matching
+/// min/max with a constant operand, constant-fold the 2 constant operands.
+static Instruction *reassociateMinMaxWithConstants(IntrinsicInst *II) {
+ Intrinsic::ID MinMaxID = II->getIntrinsicID();
+ auto *LHS = dyn_cast<IntrinsicInst>(II->getArgOperand(0));
+ if (!LHS || LHS->getIntrinsicID() != MinMaxID)
+ return nullptr;
+
+ Constant *C0, *C1;
+ if (!match(LHS->getArgOperand(1), m_ImmConstant(C0)) ||
+ !match(II->getArgOperand(1), m_ImmConstant(C1)))
+ return nullptr;
+
+ // max (max X, C0), C1 --> max X, (max C0, C1) --> max X, NewC
+ ICmpInst::Predicate Pred = MinMaxIntrinsic::getPredicate(MinMaxID);
+ Constant *CondC = ConstantExpr::getICmp(Pred, C0, C1);
+ Constant *NewC = ConstantExpr::getSelect(CondC, C0, C1);
+
+ Module *Mod = II->getModule();
+ Function *MinMax = Intrinsic::getDeclaration(Mod, MinMaxID, II->getType());
+ return CallInst::Create(MinMax, {LHS->getArgOperand(0), NewC});
+}
+
+/// If this min/max has a matching min/max operand with a constant, try to push
+/// the constant operand into this instruction. This can enable more folds.
+static Instruction *
+reassociateMinMaxWithConstantInOperand(IntrinsicInst *II,
+ InstCombiner::BuilderTy &Builder) {
+ // Match and capture a min/max operand candidate.
+ Value *X, *Y;
+ Constant *C;
+ Instruction *Inner;
+ if (!match(II, m_c_MaxOrMin(m_OneUse(m_CombineAnd(
+ m_Instruction(Inner),
+ m_MaxOrMin(m_Value(X), m_ImmConstant(C)))),
+ m_Value(Y))))
+ return nullptr;
+
+ // The inner op must match. Check for constants to avoid infinite loops.
+ Intrinsic::ID MinMaxID = II->getIntrinsicID();
+ auto *InnerMM = dyn_cast<IntrinsicInst>(Inner);
+ if (!InnerMM || InnerMM->getIntrinsicID() != MinMaxID ||
+ match(X, m_ImmConstant()) || match(Y, m_ImmConstant()))
+ return nullptr;
+
+ // max (max X, C), Y --> max (max X, Y), C
+ Function *MinMax =
+ Intrinsic::getDeclaration(II->getModule(), MinMaxID, II->getType());
+ Value *NewInner = Builder.CreateBinaryIntrinsic(MinMaxID, X, Y);
+ NewInner->takeName(Inner);
+ return CallInst::Create(MinMax, {NewInner, C});
+}
+
/// Reduce a sequence of min/max intrinsics with a common operand.
static Instruction *factorizeMinMaxTree(IntrinsicInst *II) {
// Match 3 of the same min/max ops. Example: umin(umin(), umin()).
@@ -936,6 +1080,56 @@ static Instruction *factorizeMinMaxTree(IntrinsicInst *II) {
return CallInst::Create(MinMax, { MinMaxOp, ThirdOp });
}
+/// If all arguments of the intrinsic are unary shuffles with the same mask,
+/// try to shuffle after the intrinsic.
+static Instruction *
+foldShuffledIntrinsicOperands(IntrinsicInst *II,
+ InstCombiner::BuilderTy &Builder) {
+ // TODO: This should be extended to handle other intrinsics like fshl, ctpop,
+ // etc. Use llvm::isTriviallyVectorizable() and related to determine
+ // which intrinsics are safe to shuffle?
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::smax:
+ case Intrinsic::smin:
+ case Intrinsic::umax:
+ case Intrinsic::umin:
+ case Intrinsic::fma:
+ case Intrinsic::fshl:
+ case Intrinsic::fshr:
+ break;
+ default:
+ return nullptr;
+ }
+
+ Value *X;
+ ArrayRef<int> Mask;
+ if (!match(II->getArgOperand(0),
+ m_Shuffle(m_Value(X), m_Undef(), m_Mask(Mask))))
+ return nullptr;
+
+ // At least 1 operand must have 1 use because we are creating 2 instructions.
+ if (none_of(II->args(), [](Value *V) { return V->hasOneUse(); }))
+ return nullptr;
+
+ // See if all arguments are shuffled with the same mask.
+ SmallVector<Value *, 4> NewArgs(II->arg_size());
+ NewArgs[0] = X;
+ Type *SrcTy = X->getType();
+ for (unsigned i = 1, e = II->arg_size(); i != e; ++i) {
+ if (!match(II->getArgOperand(i),
+ m_Shuffle(m_Value(X), m_Undef(), m_SpecificMask(Mask))) ||
+ X->getType() != SrcTy)
+ return nullptr;
+ NewArgs[i] = X;
+ }
+
+ // intrinsic (shuf X, M), (shuf Y, M), ... --> shuf (intrinsic X, Y, ...), M
+ Instruction *FPI = isa<FPMathOperator>(II) ? II : nullptr;
+ Value *NewIntrinsic =
+ Builder.CreateIntrinsic(II->getIntrinsicID(), SrcTy, NewArgs, FPI);
+ return new ShuffleVectorInst(NewIntrinsic, Mask);
+}
+
/// CallInst simplification. This mostly only handles folding of intrinsic
/// instructions. For normal calls, it allows visitCallBase to do the heavy
/// lifting.
@@ -943,14 +1137,14 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
// Don't try to simplify calls without uses. It will not do anything useful,
// but will result in the following folds being skipped.
if (!CI.use_empty())
- if (Value *V = SimplifyCall(&CI, SQ.getWithInstruction(&CI)))
+ if (Value *V = simplifyCall(&CI, SQ.getWithInstruction(&CI)))
return replaceInstUsesWith(CI, V);
if (isFreeCall(&CI, &TLI))
return visitFree(CI);
- // If the caller function is nounwind, mark the call as nounwind, even if the
- // callee isn't.
+ // If the caller function (i.e. us, the function that contains this CallInst)
+ // is nounwind, mark the call as nounwind, even if the callee isn't.
if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
CI.setDoesNotThrow();
return &CI;
@@ -980,13 +1174,6 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
if (NumBytes->isNullValue())
return eraseInstFromFunction(CI);
-
- if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes))
- if (CI->getZExtValue() == 1) {
- // Replace the instruction with just byte operations. We would
- // transform other cases to loads/stores, but we don't know if
- // alignment is sufficient.
- }
}
// No other transformations apply to volatile transfers.
@@ -1050,10 +1237,19 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
return NewCall;
}
+ // Unused constrained FP intrinsic calls may have declared side effect, which
+ // prevents it from being removed. In some cases however the side effect is
+ // actually absent. To detect this case, call SimplifyConstrainedFPCall. If it
+ // returns a replacement, the call may be removed.
+ if (CI.use_empty() && isa<ConstrainedFPIntrinsic>(CI)) {
+ if (simplifyConstrainedFPCall(&CI, SQ.getWithInstruction(&CI)))
+ return eraseInstFromFunction(CI);
+ }
+
Intrinsic::ID IID = II->getIntrinsicID();
switch (IID) {
case Intrinsic::objectsize:
- if (Value *V = lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/false))
+ if (Value *V = lowerObjectSizeCall(II, DL, &TLI, AA, /*MustSucceed=*/false))
return replaceInstUsesWith(CI, V);
return nullptr;
case Intrinsic::abs: {
@@ -1224,6 +1420,12 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
if (Instruction *R = FoldOpIntoSelect(*II, Sel))
return R;
+ if (Instruction *NewMinMax = reassociateMinMaxWithConstants(II))
+ return NewMinMax;
+
+ if (Instruction *R = reassociateMinMaxWithConstantInOperand(II, Builder))
+ return R;
+
if (Instruction *NewMinMax = factorizeMinMaxTree(II))
return NewMinMax;
@@ -1231,14 +1433,35 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
}
case Intrinsic::bswap: {
Value *IIOperand = II->getArgOperand(0);
- Value *X = nullptr;
+
+ // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
+ // inverse-shift-of-bswap:
+ // bswap (shl X, Y) --> lshr (bswap X), Y
+ // bswap (lshr X, Y) --> shl (bswap X), Y
+ Value *X, *Y;
+ if (match(IIOperand, m_OneUse(m_LogicalShift(m_Value(X), m_Value(Y))))) {
+ // The transform allows undef vector elements, so try a constant match
+ // first. If knownbits can handle that case, that clause could be removed.
+ unsigned BitWidth = IIOperand->getType()->getScalarSizeInBits();
+ const APInt *C;
+ if ((match(Y, m_APIntAllowUndef(C)) && (*C & 7) == 0) ||
+ MaskedValueIsZero(Y, APInt::getLowBitsSet(BitWidth, 3))) {
+ Value *NewSwap = Builder.CreateUnaryIntrinsic(Intrinsic::bswap, X);
+ BinaryOperator::BinaryOps InverseShift =
+ cast<BinaryOperator>(IIOperand)->getOpcode() == Instruction::Shl
+ ? Instruction::LShr
+ : Instruction::Shl;
+ return BinaryOperator::Create(InverseShift, NewSwap, Y);
+ }
+ }
KnownBits Known = computeKnownBits(IIOperand, 0, II);
uint64_t LZ = alignDown(Known.countMinLeadingZeros(), 8);
uint64_t TZ = alignDown(Known.countMinTrailingZeros(), 8);
+ unsigned BW = Known.getBitWidth();
// bswap(x) -> shift(x) if x has exactly one "active byte"
- if (Known.getBitWidth() - LZ - TZ == 8) {
+ if (BW - LZ - TZ == 8) {
assert(LZ != TZ && "active byte cannot be in the middle");
if (LZ > TZ) // -> shl(x) if the "active byte" is in the low part of x
return BinaryOperator::CreateNUWShl(
@@ -1250,8 +1473,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
// bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
- unsigned C = X->getType()->getScalarSizeInBits() -
- IIOperand->getType()->getScalarSizeInBits();
+ unsigned C = X->getType()->getScalarSizeInBits() - BW;
Value *CV = ConstantInt::get(X->getType(), C);
Value *V = Builder.CreateLShr(X, CV);
return new TruncInst(V, IIOperand->getType());
@@ -1618,7 +1840,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
}
// Try to simplify the underlying FMul.
- if (Value *V = SimplifyFMulInst(II->getArgOperand(0), II->getArgOperand(1),
+ if (Value *V = simplifyFMulInst(II->getArgOperand(0), II->getArgOperand(1),
II->getFastMathFlags(),
SQ.getWithInstruction(II))) {
auto *FAdd = BinaryOperator::CreateFAdd(V, II->getArgOperand(2));
@@ -1649,7 +1871,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
// Try to simplify the underlying FMul. We can only apply simplifications
// that do not require rounding.
- if (Value *V = SimplifyFMAFMul(II->getArgOperand(0), II->getArgOperand(1),
+ if (Value *V = simplifyFMAFMul(II->getArgOperand(0), II->getArgOperand(1),
II->getFastMathFlags(),
SQ.getWithInstruction(II))) {
auto *FAdd = BinaryOperator::CreateFAdd(V, II->getArgOperand(2));
@@ -2135,7 +2357,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
}
break;
}
- case Intrinsic::experimental_vector_insert: {
+ case Intrinsic::vector_insert: {
Value *Vec = II->getArgOperand(0);
Value *SubVec = II->getArgOperand(1);
Value *Idx = II->getArgOperand(2);
@@ -2181,7 +2403,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
}
break;
}
- case Intrinsic::experimental_vector_extract: {
+ case Intrinsic::vector_extract: {
Value *Vec = II->getArgOperand(0);
Value *Idx = II->getArgOperand(1);
@@ -2456,11 +2678,15 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
default: {
// Handle target specific intrinsics
Optional<Instruction *> V = targetInstCombineIntrinsic(*II);
- if (V.hasValue())
+ if (V)
return V.getValue();
break;
}
}
+
+ if (Instruction *Shuf = foldShuffledIntrinsicOperands(II, Builder))
+ return Shuf;
+
// Some intrinsics (like experimental_gc_statepoint) can be used in invoke
// context, so it is handled in visitCallBase and we should trigger it.
return visitCallBase(*II);
@@ -2648,47 +2874,56 @@ static IntrinsicInst *findInitTrampoline(Value *Callee) {
return nullptr;
}
-void InstCombinerImpl::annotateAnyAllocSite(CallBase &Call, const TargetLibraryInfo *TLI) {
+bool InstCombinerImpl::annotateAnyAllocSite(CallBase &Call,
+ const TargetLibraryInfo *TLI) {
// Note: We only handle cases which can't be driven from generic attributes
// here. So, for example, nonnull and noalias (which are common properties
// of some allocation functions) are expected to be handled via annotation
// of the respective allocator declaration with generic attributes.
+ bool Changed = false;
- uint64_t Size;
- ObjectSizeOpts Opts;
- if (getObjectSize(&Call, Size, DL, TLI, Opts) && Size > 0) {
- // TODO: We really should just emit deref_or_null here and then
- // let the generic inference code combine that with nonnull.
- if (Call.hasRetAttr(Attribute::NonNull))
- Call.addRetAttr(Attribute::getWithDereferenceableBytes(
- Call.getContext(), Size));
- else
- Call.addRetAttr(Attribute::getWithDereferenceableOrNullBytes(
- Call.getContext(), Size));
+ if (isAllocationFn(&Call, TLI)) {
+ uint64_t Size;
+ ObjectSizeOpts Opts;
+ if (getObjectSize(&Call, Size, DL, TLI, Opts) && Size > 0) {
+ // TODO: We really should just emit deref_or_null here and then
+ // let the generic inference code combine that with nonnull.
+ if (Call.hasRetAttr(Attribute::NonNull)) {
+ Changed = !Call.hasRetAttr(Attribute::Dereferenceable);
+ Call.addRetAttr(
+ Attribute::getWithDereferenceableBytes(Call.getContext(), Size));
+ } else {
+ Changed = !Call.hasRetAttr(Attribute::DereferenceableOrNull);
+ Call.addRetAttr(Attribute::getWithDereferenceableOrNullBytes(
+ Call.getContext(), Size));
+ }
+ }
}
// Add alignment attribute if alignment is a power of two constant.
Value *Alignment = getAllocAlignment(&Call, TLI);
if (!Alignment)
- return;
+ return Changed;
ConstantInt *AlignOpC = dyn_cast<ConstantInt>(Alignment);
if (AlignOpC && AlignOpC->getValue().ult(llvm::Value::MaximumAlignment)) {
uint64_t AlignmentVal = AlignOpC->getZExtValue();
if (llvm::isPowerOf2_64(AlignmentVal)) {
- Call.removeRetAttr(Attribute::Alignment);
- Call.addRetAttr(Attribute::getWithAlignment(Call.getContext(),
- Align(AlignmentVal)));
+ Align ExistingAlign = Call.getRetAlign().valueOrOne();
+ Align NewAlign = Align(AlignmentVal);
+ if (NewAlign > ExistingAlign) {
+ Call.addRetAttr(
+ Attribute::getWithAlignment(Call.getContext(), NewAlign));
+ Changed = true;
+ }
}
}
+ return Changed;
}
/// Improvements for call, callbr and invoke instructions.
Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
- if (isAllocationFn(&Call, &TLI))
- annotateAnyAllocSite(Call, &TLI);
-
- bool Changed = false;
+ bool Changed = annotateAnyAllocSite(Call, &TLI);
// Mark any parameters that are known to be non-null with the nonnull
// attribute. This is helpful for inlining calls to functions with null
@@ -2718,10 +2953,12 @@ Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
// If the callee is a pointer to a function, attempt to move any casts to the
// arguments of the call/callbr/invoke.
Value *Callee = Call.getCalledOperand();
- if (!isa<Function>(Callee) && transformConstExprCastCall(Call))
+ Function *CalleeF = dyn_cast<Function>(Callee);
+ if ((!CalleeF || CalleeF->getFunctionType() != Call.getFunctionType()) &&
+ transformConstExprCastCall(Call))
return nullptr;
- if (Function *CalleeF = dyn_cast<Function>(Callee)) {
+ if (CalleeF) {
// Remove the convergent attr on calls when the callee is not convergent.
if (Call.isConvergent() && !CalleeF->isConvergent() &&
!CalleeF->isIntrinsic()) {
@@ -2905,7 +3142,7 @@ Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
Optional<OperandBundleUse> Bundle =
GCSP.getOperandBundle(LLVMContext::OB_gc_live);
unsigned NumOfGCLives = LiveGcValues.size();
- if (!Bundle.hasValue() || NumOfGCLives == Bundle->Inputs.size())
+ if (!Bundle || NumOfGCLives == Bundle->Inputs.size())
break;
// We can reduce the size of gc live bundle.
DenseMap<Value *, unsigned> Val2Idx;
@@ -3026,8 +3263,7 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
//
// Similarly, avoid folding away bitcasts of byval calls.
if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
- Callee->getAttributes().hasAttrSomewhere(Attribute::Preallocated) ||
- Callee->getAttributes().hasAttrSomewhere(Attribute::ByVal))
+ Callee->getAttributes().hasAttrSomewhere(Attribute::Preallocated))
return false;
auto AI = Call.arg_begin();
@@ -3038,12 +3274,15 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL))
return false; // Cannot transform this parameter value.
+ // Check if there are any incompatible attributes we cannot drop safely.
if (AttrBuilder(FT->getContext(), CallerPAL.getParamAttrs(i))
- .overlaps(AttributeFuncs::typeIncompatible(ParamTy)))
+ .overlaps(AttributeFuncs::typeIncompatible(
+ ParamTy, AttributeFuncs::ASK_UNSAFE_TO_DROP)))
return false; // Attribute not compatible with transformed value.
- if (Call.isInAllocaArgument(i))
- return false; // Cannot transform to and from inalloca.
+ if (Call.isInAllocaArgument(i) ||
+ CallerPAL.hasParamAttr(i, Attribute::Preallocated))
+ return false; // Cannot transform to and from inalloca/preallocated.
if (CallerPAL.hasParamAttr(i, Attribute::SwiftError))
return false;
@@ -3052,13 +3291,18 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
// sized type and the sized type has to have the same size as the old type.
if (ParamTy != ActTy && CallerPAL.hasParamAttr(i, Attribute::ByVal)) {
PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
- if (!ParamPTy || !ParamPTy->getPointerElementType()->isSized())
+ if (!ParamPTy)
return false;
- Type *CurElTy = Call.getParamByValType(i);
- if (DL.getTypeAllocSize(CurElTy) !=
- DL.getTypeAllocSize(ParamPTy->getPointerElementType()))
- return false;
+ if (!ParamPTy->isOpaque()) {
+ Type *ParamElTy = ParamPTy->getNonOpaquePointerElementType();
+ if (!ParamElTy->isSized())
+ return false;
+
+ Type *CurElTy = Call.getParamByValType(i);
+ if (DL.getTypeAllocSize(CurElTy) != DL.getTypeAllocSize(ParamElTy))
+ return false;
+ }
}
}
@@ -3116,13 +3360,20 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
NewArg = Builder.CreateBitOrPointerCast(*AI, ParamTy);
Args.push_back(NewArg);
- // Add any parameter attributes.
- if (CallerPAL.hasParamAttr(i, Attribute::ByVal)) {
- AttrBuilder AB(FT->getContext(), CallerPAL.getParamAttrs(i));
- AB.addByValAttr(NewArg->getType()->getPointerElementType());
+ // Add any parameter attributes except the ones incompatible with the new
+ // type. Note that we made sure all incompatible ones are safe to drop.
+ AttributeMask IncompatibleAttrs = AttributeFuncs::typeIncompatible(
+ ParamTy, AttributeFuncs::ASK_SAFE_TO_DROP);
+ if (CallerPAL.hasParamAttr(i, Attribute::ByVal) &&
+ !ParamTy->isOpaquePointerTy()) {
+ AttrBuilder AB(Ctx, CallerPAL.getParamAttrs(i).removeAttributes(
+ Ctx, IncompatibleAttrs));
+ AB.addByValAttr(ParamTy->getNonOpaquePointerElementType());
ArgAttrs.push_back(AttributeSet::get(Ctx, AB));
- } else
- ArgAttrs.push_back(CallerPAL.getParamAttrs(i));
+ } else {
+ ArgAttrs.push_back(
+ CallerPAL.getParamAttrs(i).removeAttributes(Ctx, IncompatibleAttrs));
+ }
}
// If the function takes more arguments than the call was taking, add them
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index f11ba8772f3c..e9e779b8619b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -13,13 +13,10 @@
#include "InstCombineInternal.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
-#include <numeric>
using namespace llvm;
using namespace PatternMatch;
@@ -39,8 +36,10 @@ static Value *decomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) {
// Cannot look past anything that might overflow.
+ // We specifically require nuw because we store the Scale in an unsigned
+ // and perform an unsigned divide on it.
OverflowingBinaryOperator *OBI = dyn_cast<OverflowingBinaryOperator>(Val);
- if (OBI && !OBI->hasNoUnsignedWrap() && !OBI->hasNoSignedWrap()) {
+ if (OBI && !OBI->hasNoUnsignedWrap()) {
Scale = 1;
Offset = 0;
return Val;
@@ -639,10 +638,12 @@ Instruction *InstCombinerImpl::narrowFunnelShift(TruncInst &Trunc) {
/// Try to narrow the width of math or bitwise logic instructions by pulling a
/// truncate ahead of binary operators.
-/// TODO: Transforms for truncated shifts should be moved into here.
Instruction *InstCombinerImpl::narrowBinOp(TruncInst &Trunc) {
Type *SrcTy = Trunc.getSrcTy();
Type *DestTy = Trunc.getType();
+ unsigned SrcWidth = SrcTy->getScalarSizeInBits();
+ unsigned DestWidth = DestTy->getScalarSizeInBits();
+
if (!isa<VectorType>(SrcTy) && !shouldChangeType(SrcTy, DestTy))
return nullptr;
@@ -685,7 +686,30 @@ Instruction *InstCombinerImpl::narrowBinOp(TruncInst &Trunc) {
}
break;
}
-
+ case Instruction::LShr:
+ case Instruction::AShr: {
+ // trunc (*shr (trunc A), C) --> trunc(*shr A, C)
+ Value *A;
+ Constant *C;
+ if (match(BinOp0, m_Trunc(m_Value(A))) && match(BinOp1, m_Constant(C))) {
+ unsigned MaxShiftAmt = SrcWidth - DestWidth;
+ // If the shift is small enough, all zero/sign bits created by the shift
+ // are removed by the trunc.
+ if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULE,
+ APInt(SrcWidth, MaxShiftAmt)))) {
+ auto *OldShift = cast<Instruction>(Trunc.getOperand(0));
+ bool IsExact = OldShift->isExact();
+ auto *ShAmt = ConstantExpr::getIntegerCast(C, A->getType(), true);
+ ShAmt = Constant::mergeUndefsWith(ShAmt, C);
+ Value *Shift =
+ OldShift->getOpcode() == Instruction::AShr
+ ? Builder.CreateAShr(A, ShAmt, OldShift->getName(), IsExact)
+ : Builder.CreateLShr(A, ShAmt, OldShift->getName(), IsExact);
+ return CastInst::CreateTruncOrBitCast(Shift, DestTy);
+ }
+ }
+ break;
+ }
default: break;
}
@@ -873,26 +897,6 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) {
// TODO: Mask high bits with 'and'.
}
- // trunc (*shr (trunc A), C) --> trunc(*shr A, C)
- if (match(Src, m_OneUse(m_Shr(m_Trunc(m_Value(A)), m_Constant(C))))) {
- unsigned MaxShiftAmt = SrcWidth - DestWidth;
-
- // If the shift is small enough, all zero/sign bits created by the shift are
- // removed by the trunc.
- if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULE,
- APInt(SrcWidth, MaxShiftAmt)))) {
- auto *OldShift = cast<Instruction>(Src);
- bool IsExact = OldShift->isExact();
- auto *ShAmt = ConstantExpr::getIntegerCast(C, A->getType(), true);
- ShAmt = Constant::mergeUndefsWith(ShAmt, C);
- Value *Shift =
- OldShift->getOpcode() == Instruction::AShr
- ? Builder.CreateAShr(A, ShAmt, OldShift->getName(), IsExact)
- : Builder.CreateLShr(A, ShAmt, OldShift->getName(), IsExact);
- return CastInst::CreateTruncOrBitCast(Shift, DestTy);
- }
- }
-
if (Instruction *I = narrowBinOp(Trunc))
return I;
@@ -971,7 +975,7 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) {
Attribute Attr =
Trunc.getFunction()->getFnAttribute(Attribute::VScaleRange);
if (Optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
- if (Log2_32(MaxVScale.getValue()) < DestWidth) {
+ if (Log2_32(*MaxVScale) < DestWidth) {
Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
return replaceInstUsesWith(Trunc, VScale);
}
@@ -986,13 +990,18 @@ Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext)
// If we are just checking for a icmp eq of a single bit and zext'ing it
// to an integer, then shift the bit to the appropriate place and then
// cast to integer to avoid the comparison.
+
+ // FIXME: This set of transforms does not check for extra uses and/or creates
+ // an extra instruction (an optional final cast is not included
+ // in the transform comments). We may also want to favor icmp over
+ // shifts in cases of equal instructions because icmp has better
+ // analysis in general (invert the transform).
+
const APInt *Op1CV;
if (match(Cmp->getOperand(1), m_APInt(Op1CV))) {
// zext (x <s 0) to i32 --> x>>u31 true if signbit set.
- // zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear.
- if ((Cmp->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isZero()) ||
- (Cmp->getPredicate() == ICmpInst::ICMP_SGT && Op1CV->isAllOnes())) {
+ if (Cmp->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isZero()) {
Value *In = Cmp->getOperand(0);
Value *Sh = ConstantInt::get(In->getType(),
In->getType()->getScalarSizeInBits() - 1);
@@ -1000,11 +1009,6 @@ Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext)
if (In->getType() != Zext.getType())
In = Builder.CreateIntCast(In, Zext.getType(), false /*ZExt*/);
- if (Cmp->getPredicate() == ICmpInst::ICMP_SGT) {
- Constant *One = ConstantInt::get(In->getType(), 1);
- In = Builder.CreateXor(In, One, In->getName() + ".not");
- }
-
return replaceInstUsesWith(Zext, In);
}
@@ -1080,7 +1084,7 @@ Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext)
KnownBits KnownLHS = computeKnownBits(LHS, 0, &Zext);
KnownBits KnownRHS = computeKnownBits(RHS, 0, &Zext);
- if (KnownLHS.Zero == KnownRHS.Zero && KnownLHS.One == KnownRHS.One) {
+ if (KnownLHS == KnownRHS) {
APInt KnownBits = KnownLHS.Zero | KnownLHS.One;
APInt UnknownBit = ~KnownBits;
if (UnknownBit.countPopulation() == 1) {
@@ -1343,7 +1347,7 @@ Instruction *InstCombinerImpl::visitZExt(ZExtInst &CI) {
Attribute Attr = CI.getFunction()->getFnAttribute(Attribute::VScaleRange);
if (Optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
unsigned TypeWidth = Src->getType()->getScalarSizeInBits();
- if (Log2_32(MaxVScale.getValue()) < TypeWidth) {
+ if (Log2_32(*MaxVScale) < TypeWidth) {
Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
return replaceInstUsesWith(CI, VScale);
}
@@ -1506,10 +1510,8 @@ Instruction *InstCombinerImpl::visitSExt(SExtInst &CI) {
unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
unsigned DestBitSize = DestTy->getScalarSizeInBits();
- // If we know that the value being extended is positive, we can use a zext
- // instead.
- KnownBits Known = computeKnownBits(Src, 0, &CI);
- if (Known.isNonNegative())
+ // If the value being extended is zero or positive, use a zext instead.
+ if (isKnownNonNegative(Src, DL, 0, &AC, &CI, &DT))
return CastInst::Create(Instruction::ZExt, Src, DestTy);
// Try to extend the entire expression tree to the wide destination type.
@@ -1597,14 +1599,20 @@ Instruction *InstCombinerImpl::visitSExt(SExtInst &CI) {
// Splatting a bit of constant-index across a value:
// sext (ashr (trunc iN X to iM), M-1) to iN --> ashr (shl X, N-M), N-1
- // TODO: If the dest type is different, use a cast (adjust use check).
+ // If the dest type is different, use a cast (adjust use check).
if (match(Src, m_OneUse(m_AShr(m_Trunc(m_Value(X)),
- m_SpecificInt(SrcBitSize - 1)))) &&
- X->getType() == DestTy) {
- Constant *ShlAmtC = ConstantInt::get(DestTy, DestBitSize - SrcBitSize);
- Constant *AshrAmtC = ConstantInt::get(DestTy, DestBitSize - 1);
- Value *Shl = Builder.CreateShl(X, ShlAmtC);
- return BinaryOperator::CreateAShr(Shl, AshrAmtC);
+ m_SpecificInt(SrcBitSize - 1))))) {
+ Type *XTy = X->getType();
+ unsigned XBitSize = XTy->getScalarSizeInBits();
+ Constant *ShlAmtC = ConstantInt::get(XTy, XBitSize - SrcBitSize);
+ Constant *AshrAmtC = ConstantInt::get(XTy, XBitSize - 1);
+ if (XTy == DestTy)
+ return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShlAmtC),
+ AshrAmtC);
+ if (cast<BinaryOperator>(Src)->getOperand(0)->hasOneUse()) {
+ Value *Ashr = Builder.CreateAShr(Builder.CreateShl(X, ShlAmtC), AshrAmtC);
+ return CastInst::CreateIntegerCast(Ashr, DestTy, /* isSigned */ true);
+ }
}
if (match(Src, m_VScale(DL))) {
@@ -1612,7 +1620,7 @@ Instruction *InstCombinerImpl::visitSExt(SExtInst &CI) {
CI.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
Attribute Attr = CI.getFunction()->getFnAttribute(Attribute::VScaleRange);
if (Optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
- if (Log2_32(MaxVScale.getValue()) < (SrcBitSize - 1)) {
+ if (Log2_32(*MaxVScale) < (SrcBitSize - 1)) {
Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
return replaceInstUsesWith(CI, VScale);
}
@@ -1712,7 +1720,7 @@ static Type *getMinimumFPType(Value *V) {
/// Return true if the cast from integer to FP can be proven to be exact for all
/// possible inputs (the conversion does not lose any precision).
-static bool isKnownExactCastIntToFP(CastInst &I) {
+static bool isKnownExactCastIntToFP(CastInst &I, InstCombinerImpl &IC) {
CastInst::CastOps Opcode = I.getOpcode();
assert((Opcode == CastInst::SIToFP || Opcode == CastInst::UIToFP) &&
"Unexpected cast");
@@ -1749,6 +1757,12 @@ static bool isKnownExactCastIntToFP(CastInst &I) {
// TODO:
// Try harder to find if the source integer type has less significant bits.
// For example, compute number of sign bits or compute low bit mask.
+ KnownBits SrcKnown = IC.computeKnownBits(Src, 0, &I);
+ int LowBits =
+ (int)SrcTy->getScalarSizeInBits() - SrcKnown.countMinLeadingZeros();
+ if (LowBits <= DestNumSigBits)
+ return true;
+
return false;
}
@@ -1929,7 +1943,7 @@ Instruction *InstCombinerImpl::visitFPTrunc(FPTruncInst &FPT) {
Value *Src = FPT.getOperand(0);
if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
auto *FPCast = cast<CastInst>(Src);
- if (isKnownExactCastIntToFP(*FPCast))
+ if (isKnownExactCastIntToFP(*FPCast, *this))
return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
}
@@ -1943,7 +1957,7 @@ Instruction *InstCombinerImpl::visitFPExt(CastInst &FPExt) {
Value *Src = FPExt.getOperand(0);
if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
auto *FPCast = cast<CastInst>(Src);
- if (isKnownExactCastIntToFP(*FPCast))
+ if (isKnownExactCastIntToFP(*FPCast, *this))
return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
}
@@ -1970,13 +1984,13 @@ Instruction *InstCombinerImpl::foldItoFPtoI(CastInst &FI) {
// This means this is also safe for a signed input and unsigned output, since
// a negative input would lead to undefined behavior.
- if (!isKnownExactCastIntToFP(*OpI)) {
+ if (!isKnownExactCastIntToFP(*OpI, *this)) {
// The first cast may not round exactly based on the source integer width
// and FP width, but the overflow UB rules can still allow this to fold.
// If the destination type is narrow, that means the intermediate FP value
// must be large enough to hold the source value exactly.
// For example, (uint8_t)((float)(uint32_t 16777217) is undefined behavior.
- int OutputSize = (int)DestType->getScalarSizeInBits() - IsOutputSigned;
+ int OutputSize = (int)DestType->getScalarSizeInBits();
if (OutputSize > OpI->getType()->getFPMantissaWidth())
return nullptr;
}
@@ -2150,14 +2164,10 @@ optimizeVectorResizeWithIntegerBitCasts(Value *InVal, VectorType *DestTy,
// Now that the element types match, get the shuffle mask and RHS of the
// shuffle to use, which depends on whether we're increasing or decreasing the
// size of the input.
- SmallVector<int, 16> ShuffleMaskStorage;
+ auto ShuffleMaskStorage = llvm::to_vector<16>(llvm::seq<int>(0, SrcElts));
ArrayRef<int> ShuffleMask;
Value *V2;
- // Produce an identify shuffle mask for the src vector.
- ShuffleMaskStorage.resize(SrcElts);
- std::iota(ShuffleMaskStorage.begin(), ShuffleMaskStorage.end(), 0);
-
if (SrcElts > DestElts) {
// If we're shrinking the number of elements (rewriting an integer
// truncate), just shuffle in the elements corresponding to the least
@@ -2278,6 +2288,8 @@ static bool collectInsertionElements(Value *V, unsigned Shift,
switch (I->getOpcode()) {
default: return false; // Unhandled case.
case Instruction::BitCast:
+ if (I->getOperand(0)->getType()->isVectorTy())
+ return false;
return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
isBigEndian);
case Instruction::ZExt:
@@ -2351,21 +2363,28 @@ static Value *optimizeIntegerToVectorInsertions(BitCastInst &CI,
/// usually not type-specific like scalar integer or scalar floating-point.
static Instruction *canonicalizeBitCastExtElt(BitCastInst &BitCast,
InstCombinerImpl &IC) {
- // TODO: Create and use a pattern matcher for ExtractElementInst.
- auto *ExtElt = dyn_cast<ExtractElementInst>(BitCast.getOperand(0));
- if (!ExtElt || !ExtElt->hasOneUse())
+ Value *VecOp, *Index;
+ if (!match(BitCast.getOperand(0),
+ m_OneUse(m_ExtractElt(m_Value(VecOp), m_Value(Index)))))
return nullptr;
// The bitcast must be to a vectorizable type, otherwise we can't make a new
// type to extract from.
Type *DestType = BitCast.getType();
- if (!VectorType::isValidElementType(DestType))
- return nullptr;
+ VectorType *VecType = cast<VectorType>(VecOp->getType());
+ if (VectorType::isValidElementType(DestType)) {
+ auto *NewVecType = VectorType::get(DestType, VecType);
+ auto *NewBC = IC.Builder.CreateBitCast(VecOp, NewVecType, "bc");
+ return ExtractElementInst::Create(NewBC, Index);
+ }
+
+ // Only solve DestType is vector to avoid inverse transform in visitBitCast.
+ // bitcast (extractelement <1 x elt>, dest) -> bitcast(<1 x elt>, dest)
+ auto *FixedVType = dyn_cast<FixedVectorType>(VecType);
+ if (DestType->isVectorTy() && FixedVType && FixedVType->getNumElements() == 1)
+ return CastInst::Create(Instruction::BitCast, VecOp, DestType);
- auto *NewVecType = VectorType::get(DestType, ExtElt->getVectorOperandType());
- auto *NewBC = IC.Builder.CreateBitCast(ExtElt->getVectorOperand(),
- NewVecType, "bc");
- return ExtractElementInst::Create(NewBC, ExtElt->getIndexOperand());
+ return nullptr;
}
/// Change the type of a bitwise logic operation if we can eliminate a bitcast.
@@ -2373,8 +2392,8 @@ static Instruction *foldBitCastBitwiseLogic(BitCastInst &BitCast,
InstCombiner::BuilderTy &Builder) {
Type *DestTy = BitCast.getType();
BinaryOperator *BO;
- if (!DestTy->isIntOrIntVectorTy() ||
- !match(BitCast.getOperand(0), m_OneUse(m_BinOp(BO))) ||
+
+ if (!match(BitCast.getOperand(0), m_OneUse(m_BinOp(BO))) ||
!BO->isBitwiseLogicOp())
return nullptr;
@@ -2384,6 +2403,32 @@ static Instruction *foldBitCastBitwiseLogic(BitCastInst &BitCast,
if (!DestTy->isVectorTy() || !BO->getType()->isVectorTy())
return nullptr;
+ if (DestTy->isFPOrFPVectorTy()) {
+ Value *X, *Y;
+ // bitcast(logic(bitcast(X), bitcast(Y))) -> bitcast'(logic(bitcast'(X), Y))
+ if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&
+ match(BO->getOperand(1), m_OneUse(m_BitCast(m_Value(Y))))) {
+ if (X->getType()->isFPOrFPVectorTy() &&
+ Y->getType()->isIntOrIntVectorTy()) {
+ Value *CastedOp =
+ Builder.CreateBitCast(BO->getOperand(0), Y->getType());
+ Value *NewBO = Builder.CreateBinOp(BO->getOpcode(), CastedOp, Y);
+ return CastInst::CreateBitOrPointerCast(NewBO, DestTy);
+ }
+ if (X->getType()->isIntOrIntVectorTy() &&
+ Y->getType()->isFPOrFPVectorTy()) {
+ Value *CastedOp =
+ Builder.CreateBitCast(BO->getOperand(1), X->getType());
+ Value *NewBO = Builder.CreateBinOp(BO->getOpcode(), CastedOp, X);
+ return CastInst::CreateBitOrPointerCast(NewBO, DestTy);
+ }
+ }
+ return nullptr;
+ }
+
+ if (!DestTy->isIntOrIntVectorTy())
+ return nullptr;
+
Value *X;
if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&
X->getType() == DestTy && !isa<Constant>(X)) {
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index e45be5745fcc..d1f89973caa1 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -17,13 +17,11 @@
#include "llvm/Analysis/CmpInstAnalysis.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
@@ -105,10 +103,14 @@ static bool isSignTest(ICmpInst::Predicate &Pred, const APInt &C) {
///
/// If AndCst is non-null, then the loaded value is masked with that constant
/// before doing the comparison. This handles cases like "A[i]&4 == 0".
-Instruction *
-InstCombinerImpl::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
- GlobalVariable *GV, CmpInst &ICI,
- ConstantInt *AndCst) {
+Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
+ LoadInst *LI, GetElementPtrInst *GEP, GlobalVariable *GV, CmpInst &ICI,
+ ConstantInt *AndCst) {
+ if (LI->isVolatile() || LI->getType() != GEP->getResultElementType() ||
+ GV->getValueType() != GEP->getSourceElementType() ||
+ !GV->isConstant() || !GV->hasDefinitiveInitializer())
+ return nullptr;
+
Constant *Init = GV->getInitializer();
if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
return nullptr;
@@ -188,8 +190,11 @@ InstCombinerImpl::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
if (!Elt) return nullptr;
// If this is indexing an array of structures, get the structure element.
- if (!LaterIndices.empty())
- Elt = ConstantExpr::getExtractValue(Elt, LaterIndices);
+ if (!LaterIndices.empty()) {
+ Elt = ConstantFoldExtractValueInstruction(Elt, LaterIndices);
+ if (!Elt)
+ return nullptr;
+ }
// If the element is masked, handle it.
if (AndCst) Elt = ConstantExpr::getAnd(Elt, AndCst);
@@ -757,7 +762,7 @@ getAsConstantIndexedAddress(Type *ElemTy, Value *V, const DataLayout &DL) {
V = GEP->getOperand(0);
Constant *GEPIndex = static_cast<Constant *>(GEP->getOperand(1));
Index = ConstantExpr::getAdd(
- Index, ConstantExpr::getSExtOrBitCast(GEPIndex, IndexType));
+ Index, ConstantExpr::getSExtOrTrunc(GEPIndex, IndexType));
continue;
}
break;
@@ -887,7 +892,8 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
if (PtrBase != GEPRHS->getOperand(0)) {
bool IndicesTheSame =
GEPLHS->getNumOperands() == GEPRHS->getNumOperands() &&
- GEPLHS->getType() == GEPRHS->getType() &&
+ GEPLHS->getPointerOperand()->getType() ==
+ GEPRHS->getPointerOperand()->getType() &&
GEPLHS->getSourceElementType() == GEPRHS->getSourceElementType();
if (IndicesTheSame)
for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i)
@@ -950,7 +956,8 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
return foldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I);
bool GEPsInBounds = GEPLHS->isInBounds() && GEPRHS->isInBounds();
- if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands()) {
+ if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands() &&
+ GEPLHS->getSourceElementType() == GEPRHS->getSourceElementType()) {
// If the GEPs only differ by one index, compare it.
unsigned NumDifferences = 0; // Keep track of # differences.
unsigned DiffOperand = 0; // The operand that differs.
@@ -1001,8 +1008,7 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
}
Instruction *InstCombinerImpl::foldAllocaCmp(ICmpInst &ICI,
- const AllocaInst *Alloca,
- const Value *Other) {
+ const AllocaInst *Alloca) {
assert(ICI.isEquality() && "Cannot fold non-equality comparison.");
// It would be tempting to fold away comparisons between allocas and any
@@ -1071,10 +1077,9 @@ Instruction *InstCombinerImpl::foldAllocaCmp(ICmpInst &ICI,
}
}
- Type *CmpTy = CmpInst::makeCmpResultType(Other->getType());
- return replaceInstUsesWith(
- ICI,
- ConstantInt::get(CmpTy, !CmpInst::isTrueWhenEqual(ICI.getPredicate())));
+ auto *Res = ConstantInt::get(ICI.getType(),
+ !CmpInst::isTrueWhenEqual(ICI.getPredicate()));
+ return replaceInstUsesWith(ICI, Res);
}
/// Fold "icmp pred (X+C), X".
@@ -1376,8 +1381,7 @@ Instruction *InstCombinerImpl::foldICmpWithZero(ICmpInst &Cmp) {
// (icmp sgt smin(PosA, B) 0) -> (icmp sgt B 0)
if (Pred == ICmpInst::ICMP_SGT) {
Value *A, *B;
- SelectPatternResult SPR = matchSelectPattern(Cmp.getOperand(0), A, B);
- if (SPR.Flavor == SPF_SMIN) {
+ if (match(Cmp.getOperand(0), m_SMin(m_Value(A), m_Value(B)))) {
if (isKnownPositive(A, DL, 0, &AC, &Cmp, &DT))
return new ICmpInst(Pred, B, Cmp.getOperand(1));
if (isKnownPositive(B, DL, 0, &AC, &Cmp, &DT))
@@ -1530,7 +1534,7 @@ Instruction *InstCombinerImpl::foldICmpWithDominatingICmp(ICmpInst &Cmp) {
return nullptr;
}
-/// Fold icmp (trunc X, Y), C.
+/// Fold icmp (trunc X), C.
Instruction *InstCombinerImpl::foldICmpTruncConstant(ICmpInst &Cmp,
TruncInst *Trunc,
const APInt &C) {
@@ -1547,6 +1551,16 @@ Instruction *InstCombinerImpl::foldICmpTruncConstant(ICmpInst &Cmp,
unsigned DstBits = Trunc->getType()->getScalarSizeInBits(),
SrcBits = X->getType()->getScalarSizeInBits();
if (Cmp.isEquality() && Trunc->hasOneUse()) {
+ // Canonicalize to a mask and wider compare if the wide type is suitable:
+ // (trunc X to i8) == C --> (X & 0xff) == (zext C)
+ if (!X->getType()->isVectorTy() && shouldChangeType(DstBits, SrcBits)) {
+ Constant *Mask = ConstantInt::get(X->getType(),
+ APInt::getLowBitsSet(SrcBits, DstBits));
+ Value *And = Builder.CreateAnd(X, Mask);
+ Constant *WideC = ConstantInt::get(X->getType(), C.zext(SrcBits));
+ return new ICmpInst(Pred, And, WideC);
+ }
+
// Simplify icmp eq (trunc x to i8), 42 -> icmp eq x, 42|highbits if all
// of the high bits truncated out of x are known.
KnownBits Known = computeKnownBits(X, 0, &Cmp);
@@ -1865,15 +1879,13 @@ Instruction *InstCombinerImpl::foldICmpAndConstant(ICmpInst &Cmp,
// Try to optimize things like "A[i] & 42 == 0" to index computations.
Value *X = And->getOperand(0);
Value *Y = And->getOperand(1);
- if (auto *LI = dyn_cast<LoadInst>(X))
- if (auto *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0)))
- if (auto *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
- if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
- !LI->isVolatile() && isa<ConstantInt>(Y)) {
- ConstantInt *C2 = cast<ConstantInt>(Y);
- if (Instruction *Res = foldCmpLoadFromIndexedGlobal(GEP, GV, Cmp, C2))
+ if (auto *C2 = dyn_cast<ConstantInt>(Y))
+ if (auto *LI = dyn_cast<LoadInst>(X))
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0)))
+ if (auto *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
+ if (Instruction *Res =
+ foldCmpLoadFromIndexedGlobal(LI, GEP, GV, Cmp, C2))
return Res;
- }
if (!Cmp.isEquality())
return nullptr;
@@ -2216,22 +2228,41 @@ Instruction *InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp,
if (Cmp.isEquality() && Shr->isExact() && C.isZero())
return new ICmpInst(Pred, X, Cmp.getOperand(1));
- const APInt *ShiftVal;
- if (Cmp.isEquality() && match(Shr->getOperand(0), m_APInt(ShiftVal)))
- return foldICmpShrConstConst(Cmp, Shr->getOperand(1), C, *ShiftVal);
-
- const APInt *ShiftAmt;
- if (!match(Shr->getOperand(1), m_APInt(ShiftAmt)))
+ bool IsAShr = Shr->getOpcode() == Instruction::AShr;
+ const APInt *ShiftValC;
+ if (match(Shr->getOperand(0), m_APInt(ShiftValC))) {
+ if (Cmp.isEquality())
+ return foldICmpShrConstConst(Cmp, Shr->getOperand(1), C, *ShiftValC);
+
+ // If the shifted constant is a power-of-2, test the shift amount directly:
+ // (ShiftValC >> X) >u C --> X <u (LZ(C) - LZ(ShiftValC))
+ // (ShiftValC >> X) <u C --> X >=u (LZ(C-1) - LZ(ShiftValC))
+ if (!IsAShr && ShiftValC->isPowerOf2() &&
+ (Pred == CmpInst::ICMP_UGT || Pred == CmpInst::ICMP_ULT)) {
+ bool IsUGT = Pred == CmpInst::ICMP_UGT;
+ assert(ShiftValC->uge(C) && "Expected simplify of compare");
+ assert((IsUGT || !C.isZero()) && "Expected X u< 0 to simplify");
+
+ unsigned CmpLZ =
+ IsUGT ? C.countLeadingZeros() : (C - 1).countLeadingZeros();
+ unsigned ShiftLZ = ShiftValC->countLeadingZeros();
+ Constant *NewC = ConstantInt::get(Shr->getType(), CmpLZ - ShiftLZ);
+ auto NewPred = IsUGT ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE;
+ return new ICmpInst(NewPred, Shr->getOperand(1), NewC);
+ }
+ }
+
+ const APInt *ShiftAmtC;
+ if (!match(Shr->getOperand(1), m_APInt(ShiftAmtC)))
return nullptr;
// Check that the shift amount is in range. If not, don't perform undefined
// shifts. When the shift is visited it will be simplified.
unsigned TypeBits = C.getBitWidth();
- unsigned ShAmtVal = ShiftAmt->getLimitedValue(TypeBits);
+ unsigned ShAmtVal = ShiftAmtC->getLimitedValue(TypeBits);
if (ShAmtVal >= TypeBits || ShAmtVal == 0)
return nullptr;
- bool IsAShr = Shr->getOpcode() == Instruction::AShr;
bool IsExact = Shr->isExact();
Type *ShrTy = Shr->getType();
// TODO: If we could guarantee that InstSimplify would handle all of the
@@ -2256,8 +2287,11 @@ Instruction *InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp,
}
if (Pred == CmpInst::ICMP_UGT) {
// icmp ugt (ashr X, ShAmtC), C --> icmp ugt X, ((C + 1) << ShAmtC) - 1
+ // 'C + 1 << ShAmtC' can overflow as a signed number, so the 2nd
+ // clause accounts for that pattern.
APInt ShiftedC = (C + 1).shl(ShAmtVal) - 1;
- if ((ShiftedC + 1).ashr(ShAmtVal) == (C + 1))
+ if ((ShiftedC + 1).ashr(ShAmtVal) == (C + 1) ||
+ (C + 1).shl(ShAmtVal).isMinSignedValue())
return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
}
@@ -2337,7 +2371,8 @@ Instruction *InstCombinerImpl::foldICmpSRemConstant(ICmpInst &Cmp,
// constant power-of-2 value:
// (X % pow2C) sgt/slt 0
const ICmpInst::Predicate Pred = Cmp.getPredicate();
- if (Pred != ICmpInst::ICMP_SGT && Pred != ICmpInst::ICMP_SLT)
+ if (Pred != ICmpInst::ICMP_SGT && Pred != ICmpInst::ICMP_SLT &&
+ Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
return nullptr;
// TODO: The one-use check is standard because we do not typically want to
@@ -2347,7 +2382,15 @@ Instruction *InstCombinerImpl::foldICmpSRemConstant(ICmpInst &Cmp,
return nullptr;
const APInt *DivisorC;
- if (!C.isZero() || !match(SRem->getOperand(1), m_Power2(DivisorC)))
+ if (!match(SRem->getOperand(1), m_Power2(DivisorC)))
+ return nullptr;
+
+ // For cmp_sgt/cmp_slt only zero valued C is handled.
+ // For cmp_eq/cmp_ne only positive valued C is handled.
+ if (((Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLT) &&
+ !C.isZero()) ||
+ ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) &&
+ !C.isStrictlyPositive()))
return nullptr;
// Mask off the sign bit and the modulo bits (low-bits).
@@ -2356,6 +2399,9 @@ Instruction *InstCombinerImpl::foldICmpSRemConstant(ICmpInst &Cmp,
Constant *MaskC = ConstantInt::get(Ty, SignMask | (*DivisorC - 1));
Value *And = Builder.CreateAnd(SRem->getOperand(0), MaskC);
+ if (Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE)
+ return new ICmpInst(Pred, And, ConstantInt::get(Ty, C));
+
// For 'is positive?' check that the sign-bit is clear and at least 1 masked
// bit is set. Example:
// (i8 X % 32) s> 0 --> (X & 159) s> 0
@@ -2372,26 +2418,30 @@ Instruction *InstCombinerImpl::foldICmpSRemConstant(ICmpInst &Cmp,
Instruction *InstCombinerImpl::foldICmpUDivConstant(ICmpInst &Cmp,
BinaryOperator *UDiv,
const APInt &C) {
+ ICmpInst::Predicate Pred = Cmp.getPredicate();
+ Value *X = UDiv->getOperand(0);
+ Value *Y = UDiv->getOperand(1);
+ Type *Ty = UDiv->getType();
+
const APInt *C2;
- if (!match(UDiv->getOperand(0), m_APInt(C2)))
+ if (!match(X, m_APInt(C2)))
return nullptr;
assert(*C2 != 0 && "udiv 0, X should have been simplified already.");
// (icmp ugt (udiv C2, Y), C) -> (icmp ule Y, C2/(C+1))
- Value *Y = UDiv->getOperand(1);
- if (Cmp.getPredicate() == ICmpInst::ICMP_UGT) {
+ if (Pred == ICmpInst::ICMP_UGT) {
assert(!C.isMaxValue() &&
"icmp ugt X, UINT_MAX should have been simplified already.");
return new ICmpInst(ICmpInst::ICMP_ULE, Y,
- ConstantInt::get(Y->getType(), C2->udiv(C + 1)));
+ ConstantInt::get(Ty, C2->udiv(C + 1)));
}
// (icmp ult (udiv C2, Y), C) -> (icmp ugt Y, C2/C)
- if (Cmp.getPredicate() == ICmpInst::ICMP_ULT) {
+ if (Pred == ICmpInst::ICMP_ULT) {
assert(C != 0 && "icmp ult X, 0 should have been simplified already.");
return new ICmpInst(ICmpInst::ICMP_UGT, Y,
- ConstantInt::get(Y->getType(), C2->udiv(C)));
+ ConstantInt::get(Ty, C2->udiv(C)));
}
return nullptr;
@@ -2401,6 +2451,28 @@ Instruction *InstCombinerImpl::foldICmpUDivConstant(ICmpInst &Cmp,
Instruction *InstCombinerImpl::foldICmpDivConstant(ICmpInst &Cmp,
BinaryOperator *Div,
const APInt &C) {
+ ICmpInst::Predicate Pred = Cmp.getPredicate();
+ Value *X = Div->getOperand(0);
+ Value *Y = Div->getOperand(1);
+ Type *Ty = Div->getType();
+ bool DivIsSigned = Div->getOpcode() == Instruction::SDiv;
+
+ // If unsigned division and the compare constant is bigger than
+ // UMAX/2 (negative), there's only one pair of values that satisfies an
+ // equality check, so eliminate the division:
+ // (X u/ Y) == C --> (X == C) && (Y == 1)
+ // (X u/ Y) != C --> (X != C) || (Y != 1)
+ // Similarly, if signed division and the compare constant is exactly SMIN:
+ // (X s/ Y) == SMIN --> (X == SMIN) && (Y == 1)
+ // (X s/ Y) != SMIN --> (X != SMIN) || (Y != 1)
+ if (Cmp.isEquality() && Div->hasOneUse() && C.isSignBitSet() &&
+ (!DivIsSigned || C.isMinSignedValue())) {
+ Value *XBig = Builder.CreateICmp(Pred, X, ConstantInt::get(Ty, C));
+ Value *YOne = Builder.CreateICmp(Pred, Y, ConstantInt::get(Ty, 1));
+ auto Logic = Pred == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
+ return BinaryOperator::Create(Logic, XBig, YOne);
+ }
+
// Fold: icmp pred ([us]div X, C2), C -> range test
// Fold this div into the comparison, producing a range check.
// Determine, based on the divide type, what the range is being
@@ -2408,7 +2480,7 @@ Instruction *InstCombinerImpl::foldICmpDivConstant(ICmpInst &Cmp,
// it, otherwise compute the range [low, hi) bounding the new value.
// See: InsertRangeTest above for the kinds of replacements possible.
const APInt *C2;
- if (!match(Div->getOperand(1), m_APInt(C2)))
+ if (!match(Y, m_APInt(C2)))
return nullptr;
// FIXME: If the operand types don't match the type of the divide
@@ -2419,7 +2491,6 @@ Instruction *InstCombinerImpl::foldICmpDivConstant(ICmpInst &Cmp,
// (x /u C2) <u C. Simply casting the operands and result won't
// work. :( The if statement below tests that condition and bails
// if it finds it.
- bool DivIsSigned = Div->getOpcode() == Instruction::SDiv;
if (!Cmp.isEquality() && DivIsSigned != Cmp.isSigned())
return nullptr;
@@ -2441,8 +2512,6 @@ Instruction *InstCombinerImpl::foldICmpDivConstant(ICmpInst &Cmp,
// instruction that we're folding.
bool ProdOV = (DivIsSigned ? Prod.sdiv(*C2) : Prod.udiv(*C2)) != C;
- ICmpInst::Predicate Pred = Cmp.getPredicate();
-
// If the division is known to be exact, then there is no remainder from the
// divide, so the covered range size is unit, otherwise it is the divisor.
APInt RangeSize = Div->isExact() ? APInt(C2->getBitWidth(), 1) : *C2;
@@ -2457,7 +2526,7 @@ Instruction *InstCombinerImpl::foldICmpDivConstant(ICmpInst &Cmp,
int LoOverflow = 0, HiOverflow = 0;
APInt LoBound, HiBound;
- if (!DivIsSigned) { // udiv
+ if (!DivIsSigned) { // udiv
// e.g. X/5 op 3 --> [15, 20)
LoBound = Prod;
HiOverflow = LoOverflow = ProdOV;
@@ -2472,7 +2541,7 @@ Instruction *InstCombinerImpl::foldICmpDivConstant(ICmpInst &Cmp,
LoBound = -(RangeSize - 1);
HiBound = RangeSize;
} else if (C.isStrictlyPositive()) { // (X / pos) op pos
- LoBound = Prod; // e.g. X/5 op 3 --> [15, 20)
+ LoBound = Prod; // e.g. X/5 op 3 --> [15, 20)
HiOverflow = LoOverflow = ProdOV;
if (!HiOverflow)
HiOverflow = addWithOverflow(HiBound, Prod, RangeSize, true);
@@ -2492,18 +2561,19 @@ Instruction *InstCombinerImpl::foldICmpDivConstant(ICmpInst &Cmp,
// e.g. X/-5 op 0 --> [-4, 5)
LoBound = RangeSize + 1;
HiBound = -RangeSize;
- if (HiBound == *C2) { // -INTMIN = INTMIN
- HiOverflow = 1; // [INTMIN+1, overflow)
- HiBound = APInt(); // e.g. X/INTMIN = 0 --> X > INTMIN
+ if (HiBound == *C2) { // -INTMIN = INTMIN
+ HiOverflow = 1; // [INTMIN+1, overflow)
+ HiBound = APInt(); // e.g. X/INTMIN = 0 --> X > INTMIN
}
} else if (C.isStrictlyPositive()) { // (X / neg) op pos
// e.g. X/-5 op 3 --> [-19, -14)
HiBound = Prod + 1;
HiOverflow = LoOverflow = ProdOV ? -1 : 0;
if (!LoOverflow)
- LoOverflow = addWithOverflow(LoBound, HiBound, RangeSize, true) ? -1:0;
- } else { // (X / neg) op neg
- LoBound = Prod; // e.g. X/-5 op -3 --> [15, 20)
+ LoOverflow =
+ addWithOverflow(LoBound, HiBound, RangeSize, true) ? -1 : 0;
+ } else { // (X / neg) op neg
+ LoBound = Prod; // e.g. X/-5 op -3 --> [15, 20)
LoOverflow = HiOverflow = ProdOV;
if (!HiOverflow)
HiOverflow = subWithOverflow(HiBound, Prod, RangeSize, true);
@@ -2513,54 +2583,47 @@ Instruction *InstCombinerImpl::foldICmpDivConstant(ICmpInst &Cmp,
Pred = ICmpInst::getSwappedPredicate(Pred);
}
- Value *X = Div->getOperand(0);
switch (Pred) {
- default: llvm_unreachable("Unhandled icmp opcode!");
- case ICmpInst::ICMP_EQ:
- if (LoOverflow && HiOverflow)
- return replaceInstUsesWith(Cmp, Builder.getFalse());
- if (HiOverflow)
- return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
- ICmpInst::ICMP_UGE, X,
- ConstantInt::get(Div->getType(), LoBound));
- if (LoOverflow)
- return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
- ICmpInst::ICMP_ULT, X,
- ConstantInt::get(Div->getType(), HiBound));
- return replaceInstUsesWith(
- Cmp, insertRangeTest(X, LoBound, HiBound, DivIsSigned, true));
- case ICmpInst::ICMP_NE:
- if (LoOverflow && HiOverflow)
- return replaceInstUsesWith(Cmp, Builder.getTrue());
- if (HiOverflow)
- return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
- ICmpInst::ICMP_ULT, X,
- ConstantInt::get(Div->getType(), LoBound));
- if (LoOverflow)
- return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
- ICmpInst::ICMP_UGE, X,
- ConstantInt::get(Div->getType(), HiBound));
- return replaceInstUsesWith(Cmp,
- insertRangeTest(X, LoBound, HiBound,
- DivIsSigned, false));
- case ICmpInst::ICMP_ULT:
- case ICmpInst::ICMP_SLT:
- if (LoOverflow == +1) // Low bound is greater than input range.
- return replaceInstUsesWith(Cmp, Builder.getTrue());
- if (LoOverflow == -1) // Low bound is less than input range.
- return replaceInstUsesWith(Cmp, Builder.getFalse());
- return new ICmpInst(Pred, X, ConstantInt::get(Div->getType(), LoBound));
- case ICmpInst::ICMP_UGT:
- case ICmpInst::ICMP_SGT:
- if (HiOverflow == +1) // High bound greater than input range.
- return replaceInstUsesWith(Cmp, Builder.getFalse());
- if (HiOverflow == -1) // High bound less than input range.
- return replaceInstUsesWith(Cmp, Builder.getTrue());
- if (Pred == ICmpInst::ICMP_UGT)
- return new ICmpInst(ICmpInst::ICMP_UGE, X,
- ConstantInt::get(Div->getType(), HiBound));
- return new ICmpInst(ICmpInst::ICMP_SGE, X,
- ConstantInt::get(Div->getType(), HiBound));
+ default:
+ llvm_unreachable("Unhandled icmp predicate!");
+ case ICmpInst::ICMP_EQ:
+ if (LoOverflow && HiOverflow)
+ return replaceInstUsesWith(Cmp, Builder.getFalse());
+ if (HiOverflow)
+ return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE,
+ X, ConstantInt::get(Ty, LoBound));
+ if (LoOverflow)
+ return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
+ X, ConstantInt::get(Ty, HiBound));
+ return replaceInstUsesWith(
+ Cmp, insertRangeTest(X, LoBound, HiBound, DivIsSigned, true));
+ case ICmpInst::ICMP_NE:
+ if (LoOverflow && HiOverflow)
+ return replaceInstUsesWith(Cmp, Builder.getTrue());
+ if (HiOverflow)
+ return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
+ X, ConstantInt::get(Ty, LoBound));
+ if (LoOverflow)
+ return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE,
+ X, ConstantInt::get(Ty, HiBound));
+ return replaceInstUsesWith(
+ Cmp, insertRangeTest(X, LoBound, HiBound, DivIsSigned, false));
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_SLT:
+ if (LoOverflow == +1) // Low bound is greater than input range.
+ return replaceInstUsesWith(Cmp, Builder.getTrue());
+ if (LoOverflow == -1) // Low bound is less than input range.
+ return replaceInstUsesWith(Cmp, Builder.getFalse());
+ return new ICmpInst(Pred, X, ConstantInt::get(Ty, LoBound));
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_SGT:
+ if (HiOverflow == +1) // High bound greater than input range.
+ return replaceInstUsesWith(Cmp, Builder.getFalse());
+ if (HiOverflow == -1) // High bound less than input range.
+ return replaceInstUsesWith(Cmp, Builder.getTrue());
+ if (Pred == ICmpInst::ICMP_UGT)
+ return new ICmpInst(ICmpInst::ICMP_UGE, X, ConstantInt::get(Ty, HiBound));
+ return new ICmpInst(ICmpInst::ICMP_SGE, X, ConstantInt::get(Ty, HiBound));
}
return nullptr;
@@ -2593,18 +2656,24 @@ Instruction *InstCombinerImpl::foldICmpSubConstant(ICmpInst &Cmp,
!subWithOverflow(SubResult, *C2, C, Cmp.isSigned()))
return new ICmpInst(SwappedPred, Y, ConstantInt::get(Ty, SubResult));
+ // X - Y == 0 --> X == Y.
+ // X - Y != 0 --> X != Y.
+ // TODO: We allow this with multiple uses as long as the other uses are not
+ // in phis. The phi use check is guarding against a codegen regression
+ // for a loop test. If the backend could undo this (and possibly
+ // subsequent transforms), we would not need this hack.
+ if (Cmp.isEquality() && C.isZero() &&
+ none_of((Sub->users()), [](const User *U) { return isa<PHINode>(U); }))
+ return new ICmpInst(Pred, X, Y);
+
// The following transforms are only worth it if the only user of the subtract
// is the icmp.
// TODO: This is an artificial restriction for all of the transforms below
- // that only need a single replacement icmp.
+ // that only need a single replacement icmp. Can these use the phi test
+ // like the transform above here?
if (!Sub->hasOneUse())
return nullptr;
- // X - Y == 0 --> X == Y.
- // X - Y != 0 --> X != Y.
- if (Cmp.isEquality() && C.isZero())
- return new ICmpInst(Pred, X, Y);
-
if (Sub->hasNoSignedWrap()) {
// (icmp sgt (sub nsw X, Y), -1) -> (icmp sge X, Y)
if (Pred == ICmpInst::ICMP_SGT && C.isAllOnes())
@@ -2855,10 +2924,13 @@ Instruction *InstCombinerImpl::foldICmpBitCast(ICmpInst &Cmp) {
ICmpInst::Predicate Pred = Cmp.getPredicate();
Value *Op1 = Cmp.getOperand(1);
Value *BCSrcOp = Bitcast->getOperand(0);
+ Type *SrcType = Bitcast->getSrcTy();
+ Type *DstType = Bitcast->getType();
- // Make sure the bitcast doesn't change the number of vector elements.
- if (Bitcast->getSrcTy()->getScalarSizeInBits() ==
- Bitcast->getDestTy()->getScalarSizeInBits()) {
+ // Make sure the bitcast doesn't change between scalar and vector and
+ // doesn't change the number of vector elements.
+ if (SrcType->isVectorTy() == DstType->isVectorTy() &&
+ SrcType->getScalarSizeInBits() == DstType->getScalarSizeInBits()) {
// Zero-equality and sign-bit checks are preserved through sitofp + bitcast.
Value *X;
if (match(BCSrcOp, m_SIToFP(m_Value(X)))) {
@@ -2903,8 +2975,7 @@ Instruction *InstCombinerImpl::foldICmpBitCast(ICmpInst &Cmp) {
Type *XType = X->getType();
// We can't currently handle Power style floating point operations here.
- if (!(XType->isPPC_FP128Ty() || BCSrcOp->getType()->isPPC_FP128Ty())) {
-
+ if (!(XType->isPPC_FP128Ty() || SrcType->isPPC_FP128Ty())) {
Type *NewType = Builder.getIntNTy(XType->getScalarSizeInBits());
if (auto *XVTy = dyn_cast<VectorType>(XType))
NewType = VectorType::get(NewType, XVTy->getElementCount());
@@ -2922,21 +2993,19 @@ Instruction *InstCombinerImpl::foldICmpBitCast(ICmpInst &Cmp) {
// Test to see if the operands of the icmp are casted versions of other
// values. If the ptr->ptr cast can be stripped off both arguments, do so.
- if (Bitcast->getType()->isPointerTy() &&
- (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) {
+ if (DstType->isPointerTy() && (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) {
// If operand #1 is a bitcast instruction, it must also be a ptr->ptr cast
// so eliminate it as well.
if (auto *BC2 = dyn_cast<BitCastInst>(Op1))
Op1 = BC2->getOperand(0);
- Op1 = Builder.CreateBitCast(Op1, BCSrcOp->getType());
+ Op1 = Builder.CreateBitCast(Op1, SrcType);
return new ICmpInst(Pred, BCSrcOp, Op1);
}
const APInt *C;
- if (!match(Cmp.getOperand(1), m_APInt(C)) ||
- !Bitcast->getType()->isIntegerTy() ||
- !Bitcast->getSrcTy()->isIntOrIntVectorTy())
+ if (!match(Cmp.getOperand(1), m_APInt(C)) || !DstType->isIntegerTy() ||
+ !SrcType->isIntOrIntVectorTy())
return nullptr;
// If this is checking if all elements of a vector compare are set or not,
@@ -2948,9 +3017,8 @@ Instruction *InstCombinerImpl::foldICmpBitCast(ICmpInst &Cmp) {
// TODO: Try harder to reduce compare of 2 freely invertible operands?
if (Cmp.isEquality() && C->isAllOnes() && Bitcast->hasOneUse() &&
isFreeToInvert(BCSrcOp, BCSrcOp->hasOneUse())) {
- Type *ScalarTy = Bitcast->getType();
- Value *Cast = Builder.CreateBitCast(Builder.CreateNot(BCSrcOp), ScalarTy);
- return new ICmpInst(Pred, Cast, ConstantInt::getNullValue(ScalarTy));
+ Value *Cast = Builder.CreateBitCast(Builder.CreateNot(BCSrcOp), DstType);
+ return new ICmpInst(Pred, Cast, ConstantInt::getNullValue(DstType));
}
// If this is checking if all elements of an extended vector are clear or not,
@@ -2978,7 +3046,7 @@ Instruction *InstCombinerImpl::foldICmpBitCast(ICmpInst &Cmp) {
if (match(BCSrcOp, m_Shuffle(m_Value(Vec), m_Undef(), m_Mask(Mask)))) {
// Check whether every element of Mask is the same constant
if (is_splat(Mask)) {
- auto *VecTy = cast<VectorType>(BCSrcOp->getType());
+ auto *VecTy = cast<VectorType>(SrcType);
auto *EltTy = cast<IntegerType>(VecTy->getElementType());
if (C->isSplat(EltTy->getBitWidth())) {
// Fold the icmp based on the value of C
@@ -3000,83 +3068,31 @@ Instruction *InstCombinerImpl::foldICmpBitCast(ICmpInst &Cmp) {
/// where X is some kind of instruction.
Instruction *InstCombinerImpl::foldICmpInstWithConstant(ICmpInst &Cmp) {
const APInt *C;
- if (!match(Cmp.getOperand(1), m_APInt(C)))
- return nullptr;
- if (auto *BO = dyn_cast<BinaryOperator>(Cmp.getOperand(0))) {
- switch (BO->getOpcode()) {
- case Instruction::Xor:
- if (Instruction *I = foldICmpXorConstant(Cmp, BO, *C))
- return I;
- break;
- case Instruction::And:
- if (Instruction *I = foldICmpAndConstant(Cmp, BO, *C))
- return I;
- break;
- case Instruction::Or:
- if (Instruction *I = foldICmpOrConstant(Cmp, BO, *C))
- return I;
- break;
- case Instruction::Mul:
- if (Instruction *I = foldICmpMulConstant(Cmp, BO, *C))
- return I;
- break;
- case Instruction::Shl:
- if (Instruction *I = foldICmpShlConstant(Cmp, BO, *C))
- return I;
- break;
- case Instruction::LShr:
- case Instruction::AShr:
- if (Instruction *I = foldICmpShrConstant(Cmp, BO, *C))
- return I;
- break;
- case Instruction::SRem:
- if (Instruction *I = foldICmpSRemConstant(Cmp, BO, *C))
- return I;
- break;
- case Instruction::UDiv:
- if (Instruction *I = foldICmpUDivConstant(Cmp, BO, *C))
- return I;
- LLVM_FALLTHROUGH;
- case Instruction::SDiv:
- if (Instruction *I = foldICmpDivConstant(Cmp, BO, *C))
+ if (match(Cmp.getOperand(1), m_APInt(C))) {
+ if (auto *BO = dyn_cast<BinaryOperator>(Cmp.getOperand(0)))
+ if (Instruction *I = foldICmpBinOpWithConstant(Cmp, BO, *C))
return I;
- break;
- case Instruction::Sub:
- if (Instruction *I = foldICmpSubConstant(Cmp, BO, *C))
- return I;
- break;
- case Instruction::Add:
- if (Instruction *I = foldICmpAddConstant(Cmp, BO, *C))
- return I;
- break;
- default:
- break;
- }
- // TODO: These folds could be refactored to be part of the above calls.
- if (Instruction *I = foldICmpBinOpEqualityWithConstant(Cmp, BO, *C))
- return I;
- }
- // Match against CmpInst LHS being instructions other than binary operators.
+ if (auto *SI = dyn_cast<SelectInst>(Cmp.getOperand(0)))
+ // For now, we only support constant integers while folding the
+ // ICMP(SELECT)) pattern. We can extend this to support vector of integers
+ // similar to the cases handled by binary ops above.
+ if (auto *ConstRHS = dyn_cast<ConstantInt>(Cmp.getOperand(1)))
+ if (Instruction *I = foldICmpSelectConstant(Cmp, SI, ConstRHS))
+ return I;
- if (auto *SI = dyn_cast<SelectInst>(Cmp.getOperand(0))) {
- // For now, we only support constant integers while folding the
- // ICMP(SELECT)) pattern. We can extend this to support vector of integers
- // similar to the cases handled by binary ops above.
- if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(Cmp.getOperand(1)))
- if (Instruction *I = foldICmpSelectConstant(Cmp, SI, ConstRHS))
+ if (auto *TI = dyn_cast<TruncInst>(Cmp.getOperand(0)))
+ if (Instruction *I = foldICmpTruncConstant(Cmp, TI, *C))
return I;
- }
- if (auto *TI = dyn_cast<TruncInst>(Cmp.getOperand(0))) {
- if (Instruction *I = foldICmpTruncConstant(Cmp, TI, *C))
- return I;
+ if (auto *II = dyn_cast<IntrinsicInst>(Cmp.getOperand(0)))
+ if (Instruction *I = foldICmpIntrinsicWithConstant(Cmp, II, *C))
+ return I;
}
- if (auto *II = dyn_cast<IntrinsicInst>(Cmp.getOperand(0)))
- if (Instruction *I = foldICmpIntrinsicWithConstant(Cmp, II, *C))
- return I;
+ if (match(Cmp.getOperand(1), m_APIntAllowUndef(C)))
+ return foldICmpInstWithConstantAllowUndef(Cmp, *C);
return nullptr;
}
@@ -3233,12 +3249,6 @@ Instruction *InstCombinerImpl::foldICmpEqIntrinsicWithConstant(
case Intrinsic::fshl:
case Intrinsic::fshr:
if (II->getArgOperand(0) == II->getArgOperand(1)) {
- // (rot X, ?) == 0/-1 --> X == 0/-1
- // TODO: This transform is safe to re-use undef elts in a vector, but
- // the constant value passed in by the caller doesn't allow that.
- if (C.isZero() || C.isAllOnes())
- return new ICmpInst(Pred, II->getArgOperand(0), Cmp.getOperand(1));
-
const APInt *RotAmtC;
// ror(X, RotAmtC) == C --> X == rol(C, RotAmtC)
// rol(X, RotAmtC) == C --> X == ror(C, RotAmtC)
@@ -3311,6 +3321,89 @@ static Instruction *foldICmpIntrinsicWithIntrinsic(ICmpInst &Cmp) {
return nullptr;
}
+/// Try to fold integer comparisons with a constant operand: icmp Pred X, C
+/// where X is some kind of instruction and C is AllowUndef.
+/// TODO: Move more folds which allow undef to this function.
+Instruction *
+InstCombinerImpl::foldICmpInstWithConstantAllowUndef(ICmpInst &Cmp,
+ const APInt &C) {
+ const ICmpInst::Predicate Pred = Cmp.getPredicate();
+ if (auto *II = dyn_cast<IntrinsicInst>(Cmp.getOperand(0))) {
+ switch (II->getIntrinsicID()) {
+ default:
+ break;
+ case Intrinsic::fshl:
+ case Intrinsic::fshr:
+ if (Cmp.isEquality() && II->getArgOperand(0) == II->getArgOperand(1)) {
+ // (rot X, ?) == 0/-1 --> X == 0/-1
+ if (C.isZero() || C.isAllOnes())
+ return new ICmpInst(Pred, II->getArgOperand(0), Cmp.getOperand(1));
+ }
+ break;
+ }
+ }
+
+ return nullptr;
+}
+
+/// Fold an icmp with BinaryOp and constant operand: icmp Pred BO, C.
+Instruction *InstCombinerImpl::foldICmpBinOpWithConstant(ICmpInst &Cmp,
+ BinaryOperator *BO,
+ const APInt &C) {
+ switch (BO->getOpcode()) {
+ case Instruction::Xor:
+ if (Instruction *I = foldICmpXorConstant(Cmp, BO, C))
+ return I;
+ break;
+ case Instruction::And:
+ if (Instruction *I = foldICmpAndConstant(Cmp, BO, C))
+ return I;
+ break;
+ case Instruction::Or:
+ if (Instruction *I = foldICmpOrConstant(Cmp, BO, C))
+ return I;
+ break;
+ case Instruction::Mul:
+ if (Instruction *I = foldICmpMulConstant(Cmp, BO, C))
+ return I;
+ break;
+ case Instruction::Shl:
+ if (Instruction *I = foldICmpShlConstant(Cmp, BO, C))
+ return I;
+ break;
+ case Instruction::LShr:
+ case Instruction::AShr:
+ if (Instruction *I = foldICmpShrConstant(Cmp, BO, C))
+ return I;
+ break;
+ case Instruction::SRem:
+ if (Instruction *I = foldICmpSRemConstant(Cmp, BO, C))
+ return I;
+ break;
+ case Instruction::UDiv:
+ if (Instruction *I = foldICmpUDivConstant(Cmp, BO, C))
+ return I;
+ LLVM_FALLTHROUGH;
+ case Instruction::SDiv:
+ if (Instruction *I = foldICmpDivConstant(Cmp, BO, C))
+ return I;
+ break;
+ case Instruction::Sub:
+ if (Instruction *I = foldICmpSubConstant(Cmp, BO, C))
+ return I;
+ break;
+ case Instruction::Add:
+ if (Instruction *I = foldICmpAddConstant(Cmp, BO, C))
+ return I;
+ break;
+ default:
+ break;
+ }
+
+ // TODO: These folds could be refactored to be part of the above calls.
+ return foldICmpBinOpEqualityWithConstant(Cmp, BO, C);
+}
+
/// Fold an icmp with LLVM intrinsic and constant operand: icmp Pred II, C.
Instruction *InstCombinerImpl::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
IntrinsicInst *II,
@@ -3406,64 +3499,6 @@ Instruction *InstCombinerImpl::foldICmpInstWithConstantNotInt(ICmpInst &I) {
if (Instruction *NV = foldOpIntoPhi(I, cast<PHINode>(LHSI)))
return NV;
break;
- case Instruction::Select: {
- // If either operand of the select is a constant, we can fold the
- // comparison into the select arms, which will cause one to be
- // constant folded and the select turned into a bitwise or.
- Value *Op1 = nullptr, *Op2 = nullptr;
- ConstantInt *CI = nullptr;
-
- auto SimplifyOp = [&](Value *V) {
- Value *Op = nullptr;
- if (Constant *C = dyn_cast<Constant>(V)) {
- Op = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
- } else if (RHSC->isNullValue()) {
- // If null is being compared, check if it can be further simplified.
- Op = SimplifyICmpInst(I.getPredicate(), V, RHSC, SQ);
- }
- return Op;
- };
- Op1 = SimplifyOp(LHSI->getOperand(1));
- if (Op1)
- CI = dyn_cast<ConstantInt>(Op1);
-
- Op2 = SimplifyOp(LHSI->getOperand(2));
- if (Op2)
- CI = dyn_cast<ConstantInt>(Op2);
-
- // We only want to perform this transformation if it will not lead to
- // additional code. This is true if either both sides of the select
- // fold to a constant (in which case the icmp is replaced with a select
- // which will usually simplify) or this is the only user of the
- // select (in which case we are trading a select+icmp for a simpler
- // select+icmp) or all uses of the select can be replaced based on
- // dominance information ("Global cases").
- bool Transform = false;
- if (Op1 && Op2)
- Transform = true;
- else if (Op1 || Op2) {
- // Local case
- if (LHSI->hasOneUse())
- Transform = true;
- // Global cases
- else if (CI && !CI->isZero())
- // When Op1 is constant try replacing select with second operand.
- // Otherwise Op2 is constant and try replacing select with first
- // operand.
- Transform =
- replacedSelectWithOperand(cast<SelectInst>(LHSI), &I, Op1 ? 2 : 1);
- }
- if (Transform) {
- if (!Op1)
- Op1 = Builder.CreateICmp(I.getPredicate(), LHSI->getOperand(1), RHSC,
- I.getName());
- if (!Op2)
- Op2 = Builder.CreateICmp(I.getPredicate(), LHSI->getOperand(2), RHSC,
- I.getName());
- return SelectInst::Create(LHSI->getOperand(0), Op1, Op2);
- }
- break;
- }
case Instruction::IntToPtr:
// icmp pred inttoptr(X), null -> icmp pred X, 0
if (RHSC->isNullValue() &&
@@ -3476,19 +3511,72 @@ Instruction *InstCombinerImpl::foldICmpInstWithConstantNotInt(ICmpInst &I) {
case Instruction::Load:
// Try to optimize things like "A[i] > 4" to index computations.
if (GetElementPtrInst *GEP =
- dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) {
+ dyn_cast<GetElementPtrInst>(LHSI->getOperand(0)))
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
- if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
- !cast<LoadInst>(LHSI)->isVolatile())
- if (Instruction *Res = foldCmpLoadFromIndexedGlobal(GEP, GV, I))
- return Res;
- }
+ if (Instruction *Res =
+ foldCmpLoadFromIndexedGlobal(cast<LoadInst>(LHSI), GEP, GV, I))
+ return Res;
break;
}
return nullptr;
}
+Instruction *InstCombinerImpl::foldSelectICmp(ICmpInst::Predicate Pred,
+ SelectInst *SI, Value *RHS,
+ const ICmpInst &I) {
+ // Try to fold the comparison into the select arms, which will cause the
+ // select to be converted into a logical and/or.
+ auto SimplifyOp = [&](Value *Op, bool SelectCondIsTrue) -> Value * {
+ if (Value *Res = simplifyICmpInst(Pred, Op, RHS, SQ))
+ return Res;
+ if (Optional<bool> Impl = isImpliedCondition(SI->getCondition(), Pred, Op,
+ RHS, DL, SelectCondIsTrue))
+ return ConstantInt::get(I.getType(), *Impl);
+ return nullptr;
+ };
+
+ ConstantInt *CI = nullptr;
+ Value *Op1 = SimplifyOp(SI->getOperand(1), true);
+ if (Op1)
+ CI = dyn_cast<ConstantInt>(Op1);
+
+ Value *Op2 = SimplifyOp(SI->getOperand(2), false);
+ if (Op2)
+ CI = dyn_cast<ConstantInt>(Op2);
+
+ // We only want to perform this transformation if it will not lead to
+ // additional code. This is true if either both sides of the select
+ // fold to a constant (in which case the icmp is replaced with a select
+ // which will usually simplify) or this is the only user of the
+ // select (in which case we are trading a select+icmp for a simpler
+ // select+icmp) or all uses of the select can be replaced based on
+ // dominance information ("Global cases").
+ bool Transform = false;
+ if (Op1 && Op2)
+ Transform = true;
+ else if (Op1 || Op2) {
+ // Local case
+ if (SI->hasOneUse())
+ Transform = true;
+ // Global cases
+ else if (CI && !CI->isZero())
+ // When Op1 is constant try replacing select with second operand.
+ // Otherwise Op2 is constant and try replacing select with first
+ // operand.
+ Transform = replacedSelectWithOperand(SI, &I, Op1 ? 2 : 1);
+ }
+ if (Transform) {
+ if (!Op1)
+ Op1 = Builder.CreateICmp(Pred, SI->getOperand(1), RHS, I.getName());
+ if (!Op2)
+ Op2 = Builder.CreateICmp(Pred, SI->getOperand(2), RHS, I.getName());
+ return SelectInst::Create(SI->getOperand(0), Op1, Op2);
+ }
+
+ return nullptr;
+}
+
/// Some comparisons can be simplified.
/// In this case, we are looking for comparisons that look like
/// a check for a lossy truncation.
@@ -3756,7 +3844,7 @@ foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ,
// Can we fold (XShAmt+YShAmt) ?
auto *NewShAmt = dyn_cast_or_null<Constant>(
- SimplifyAddInst(XShAmt, YShAmt, /*isNSW=*/false,
+ simplifyAddInst(XShAmt, YShAmt, /*isNSW=*/false,
/*isNUW=*/false, SQ.getWithInstruction(&I)));
if (!NewShAmt)
return nullptr;
@@ -3957,6 +4045,24 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
return new ICmpInst(Pred, X, Builder.CreateNot(Op0));
{
+ // (Op1 + X) + C u</u>= Op1 --> ~C - X u</u>= Op1
+ Constant *C;
+ if (match(Op0, m_OneUse(m_Add(m_c_Add(m_Specific(Op1), m_Value(X)),
+ m_ImmConstant(C)))) &&
+ (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE)) {
+ Constant *C2 = ConstantExpr::getNot(C);
+ return new ICmpInst(Pred, Builder.CreateSub(C2, X), Op1);
+ }
+ // Op0 u>/u<= (Op0 + X) + C --> Op0 u>/u<= ~C - X
+ if (match(Op1, m_OneUse(m_Add(m_c_Add(m_Specific(Op0), m_Value(X)),
+ m_ImmConstant(C)))) &&
+ (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULE)) {
+ Constant *C2 = ConstantExpr::getNot(C);
+ return new ICmpInst(Pred, Op0, Builder.CreateSub(C2, X));
+ }
+ }
+
+ {
// Similar to above: an unsigned overflow comparison may use offset + mask:
// ((Op1 + C) & C) u< Op1 --> Op1 != 0
// ((Op1 + C) & C) u>= Op1 --> Op1 == 0
@@ -4114,29 +4220,38 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
// icmp (A + C1), (C + C2) -> icmp A, (C + C3)
// s.t. C3 = C2 - C1
if (A && C && NoOp0WrapProblem && NoOp1WrapProblem &&
- (BO0->hasOneUse() || BO1->hasOneUse()) && !I.isUnsigned())
- if (ConstantInt *C1 = dyn_cast<ConstantInt>(B))
- if (ConstantInt *C2 = dyn_cast<ConstantInt>(D)) {
- const APInt &AP1 = C1->getValue();
- const APInt &AP2 = C2->getValue();
- if (AP1.isNegative() == AP2.isNegative()) {
- APInt AP1Abs = C1->getValue().abs();
- APInt AP2Abs = C2->getValue().abs();
- if (AP1Abs.uge(AP2Abs)) {
- ConstantInt *C3 = Builder.getInt(AP1 - AP2);
- bool HasNUW = BO0->hasNoUnsignedWrap() && C3->getValue().ule(AP1);
- bool HasNSW = BO0->hasNoSignedWrap();
- Value *NewAdd = Builder.CreateAdd(A, C3, "", HasNUW, HasNSW);
- return new ICmpInst(Pred, NewAdd, C);
- } else {
- ConstantInt *C3 = Builder.getInt(AP2 - AP1);
- bool HasNUW = BO1->hasNoUnsignedWrap() && C3->getValue().ule(AP2);
- bool HasNSW = BO1->hasNoSignedWrap();
- Value *NewAdd = Builder.CreateAdd(C, C3, "", HasNUW, HasNSW);
- return new ICmpInst(Pred, A, NewAdd);
- }
- }
+ (BO0->hasOneUse() || BO1->hasOneUse()) && !I.isUnsigned()) {
+ const APInt *AP1, *AP2;
+ // TODO: Support non-uniform vectors.
+ // TODO: Allow undef passthrough if B AND D's element is undef.
+ if (match(B, m_APIntAllowUndef(AP1)) && match(D, m_APIntAllowUndef(AP2)) &&
+ AP1->isNegative() == AP2->isNegative()) {
+ APInt AP1Abs = AP1->abs();
+ APInt AP2Abs = AP2->abs();
+ if (AP1Abs.uge(AP2Abs)) {
+ APInt Diff = *AP1 - *AP2;
+ bool HasNUW = BO0->hasNoUnsignedWrap() && Diff.ule(*AP1);
+ bool HasNSW = BO0->hasNoSignedWrap();
+ Constant *C3 = Constant::getIntegerValue(BO0->getType(), Diff);
+ Value *NewAdd = Builder.CreateAdd(A, C3, "", HasNUW, HasNSW);
+ return new ICmpInst(Pred, NewAdd, C);
+ } else {
+ APInt Diff = *AP2 - *AP1;
+ bool HasNUW = BO1->hasNoUnsignedWrap() && Diff.ule(*AP2);
+ bool HasNSW = BO1->hasNoSignedWrap();
+ Constant *C3 = Constant::getIntegerValue(BO0->getType(), Diff);
+ Value *NewAdd = Builder.CreateAdd(C, C3, "", HasNUW, HasNSW);
+ return new ICmpInst(Pred, A, NewAdd);
}
+ }
+ Constant *Cst1, *Cst2;
+ if (match(B, m_ImmConstant(Cst1)) && match(D, m_ImmConstant(Cst2)) &&
+ ICmpInst::isEquality(Pred)) {
+ Constant *Diff = ConstantExpr::getSub(Cst2, Cst1);
+ Value *NewAdd = Builder.CreateAdd(C, Diff);
+ return new ICmpInst(Pred, A, NewAdd);
+ }
+ }
// Analyze the case when either Op0 or Op1 is a sub instruction.
// Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null).
@@ -4524,18 +4639,21 @@ Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) {
// (A >> C) == (B >> C) --> (A^B) u< (1 << C)
// For lshr and ashr pairs.
- if ((match(Op0, m_OneUse(m_LShr(m_Value(A), m_ConstantInt(Cst1)))) &&
- match(Op1, m_OneUse(m_LShr(m_Value(B), m_Specific(Cst1))))) ||
- (match(Op0, m_OneUse(m_AShr(m_Value(A), m_ConstantInt(Cst1)))) &&
- match(Op1, m_OneUse(m_AShr(m_Value(B), m_Specific(Cst1)))))) {
- unsigned TypeBits = Cst1->getBitWidth();
- unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits);
+ const APInt *AP1, *AP2;
+ if ((match(Op0, m_OneUse(m_LShr(m_Value(A), m_APIntAllowUndef(AP1)))) &&
+ match(Op1, m_OneUse(m_LShr(m_Value(B), m_APIntAllowUndef(AP2))))) ||
+ (match(Op0, m_OneUse(m_AShr(m_Value(A), m_APIntAllowUndef(AP1)))) &&
+ match(Op1, m_OneUse(m_AShr(m_Value(B), m_APIntAllowUndef(AP2)))))) {
+ if (AP1 != AP2)
+ return nullptr;
+ unsigned TypeBits = AP1->getBitWidth();
+ unsigned ShAmt = AP1->getLimitedValue(TypeBits);
if (ShAmt < TypeBits && ShAmt != 0) {
ICmpInst::Predicate NewPred =
Pred == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
Value *Xor = Builder.CreateXor(A, B, I.getName() + ".unshifted");
APInt CmpVal = APInt::getOneBitSet(TypeBits, ShAmt);
- return new ICmpInst(NewPred, Xor, Builder.getInt(CmpVal));
+ return new ICmpInst(NewPred, Xor, ConstantInt::get(A->getType(), CmpVal));
}
}
@@ -4665,8 +4783,7 @@ static Instruction *foldICmpWithTrunc(ICmpInst &ICmp,
return nullptr;
}
-static Instruction *foldICmpWithZextOrSext(ICmpInst &ICmp,
- InstCombiner::BuilderTy &Builder) {
+Instruction *InstCombinerImpl::foldICmpWithZextOrSext(ICmpInst &ICmp) {
assert(isa<CastInst>(ICmp.getOperand(0)) && "Expected cast for operand 0");
auto *CastOp0 = cast<CastInst>(ICmp.getOperand(0));
Value *X;
@@ -4675,25 +4792,37 @@ static Instruction *foldICmpWithZextOrSext(ICmpInst &ICmp,
bool IsSignedExt = CastOp0->getOpcode() == Instruction::SExt;
bool IsSignedCmp = ICmp.isSigned();
- if (auto *CastOp1 = dyn_cast<CastInst>(ICmp.getOperand(1))) {
- // If the signedness of the two casts doesn't agree (i.e. one is a sext
- // and the other is a zext), then we can't handle this.
- // TODO: This is too strict. We can handle some predicates (equality?).
- if (CastOp0->getOpcode() != CastOp1->getOpcode())
- return nullptr;
+
+ // icmp Pred (ext X), (ext Y)
+ Value *Y;
+ if (match(ICmp.getOperand(1), m_ZExtOrSExt(m_Value(Y)))) {
+ bool IsZext0 = isa<ZExtOperator>(ICmp.getOperand(0));
+ bool IsZext1 = isa<ZExtOperator>(ICmp.getOperand(1));
+
+ // If we have mismatched casts, treat the zext of a non-negative source as
+ // a sext to simulate matching casts. Otherwise, we are done.
+ // TODO: Can we handle some predicates (equality) without non-negative?
+ if (IsZext0 != IsZext1) {
+ if ((IsZext0 && isKnownNonNegative(X, DL, 0, &AC, &ICmp, &DT)) ||
+ (IsZext1 && isKnownNonNegative(Y, DL, 0, &AC, &ICmp, &DT)))
+ IsSignedExt = true;
+ else
+ return nullptr;
+ }
// Not an extension from the same type?
- Value *Y = CastOp1->getOperand(0);
Type *XTy = X->getType(), *YTy = Y->getType();
if (XTy != YTy) {
// One of the casts must have one use because we are creating a new cast.
- if (!CastOp0->hasOneUse() && !CastOp1->hasOneUse())
+ if (!ICmp.getOperand(0)->hasOneUse() && !ICmp.getOperand(1)->hasOneUse())
return nullptr;
// Extend the narrower operand to the type of the wider operand.
+ CastInst::CastOps CastOpcode =
+ IsSignedExt ? Instruction::SExt : Instruction::ZExt;
if (XTy->getScalarSizeInBits() < YTy->getScalarSizeInBits())
- X = Builder.CreateCast(CastOp0->getOpcode(), X, YTy);
+ X = Builder.CreateCast(CastOpcode, X, YTy);
else if (YTy->getScalarSizeInBits() < XTy->getScalarSizeInBits())
- Y = Builder.CreateCast(CastOp0->getOpcode(), Y, XTy);
+ Y = Builder.CreateCast(CastOpcode, Y, XTy);
else
return nullptr;
}
@@ -4742,7 +4871,7 @@ static Instruction *foldICmpWithZextOrSext(ICmpInst &ICmp,
// or could not be determined to be equal (in the case of a constant
// expression), so the constant cannot be represented in the shorter type.
// All the cases that fold to true or false will have already been handled
- // by SimplifyICmpInst, so only deal with the tricky case.
+ // by simplifyICmpInst, so only deal with the tricky case.
if (IsSignedCmp || !IsSignedExt || !isa<ConstantInt>(C))
return nullptr;
@@ -4811,7 +4940,7 @@ Instruction *InstCombinerImpl::foldICmpWithCastOp(ICmpInst &ICmp) {
if (Instruction *R = foldICmpWithTrunc(ICmp, Builder))
return R;
- return foldICmpWithZextOrSext(ICmp, Builder);
+ return foldICmpWithZextOrSext(ICmp);
}
static bool isNeutralValue(Instruction::BinaryOps BinaryOp, Value *RHS) {
@@ -5449,35 +5578,23 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
LHS = Op0;
Value *X;
- if (match(LHS, m_Shl(m_One(), m_Value(X)))) {
- APInt ValToCheck = Op0KnownZeroInverted;
+ const APInt *C1;
+ if (match(LHS, m_Shl(m_Power2(C1), m_Value(X)))) {
Type *XTy = X->getType();
- if (ValToCheck.isPowerOf2()) {
- // ((1 << X) & 8) == 0 -> X != 3
- // ((1 << X) & 8) != 0 -> X == 3
- auto *CmpC = ConstantInt::get(XTy, ValToCheck.countTrailingZeros());
- auto NewPred = ICmpInst::getInversePredicate(Pred);
- return new ICmpInst(NewPred, X, CmpC);
- } else if ((++ValToCheck).isPowerOf2()) {
- // ((1 << X) & 7) == 0 -> X >= 3
- // ((1 << X) & 7) != 0 -> X < 3
- auto *CmpC = ConstantInt::get(XTy, ValToCheck.countTrailingZeros());
+ unsigned Log2C1 = C1->countTrailingZeros();
+ APInt C2 = Op0KnownZeroInverted;
+ APInt C2Pow2 = (C2 & ~(*C1 - 1)) + *C1;
+ if (C2Pow2.isPowerOf2()) {
+ // iff (C1 is pow2) & ((C2 & ~(C1-1)) + C1) is pow2):
+ // ((C1 << X) & C2) == 0 -> X >= (Log2(C2+C1) - Log2(C1))
+ // ((C1 << X) & C2) != 0 -> X < (Log2(C2+C1) - Log2(C1))
+ unsigned Log2C2 = C2Pow2.countTrailingZeros();
+ auto *CmpC = ConstantInt::get(XTy, Log2C2 - Log2C1);
auto NewPred =
Pred == CmpInst::ICMP_EQ ? CmpInst::ICMP_UGE : CmpInst::ICMP_ULT;
return new ICmpInst(NewPred, X, CmpC);
}
}
-
- // Check if the LHS is 8 >>u x and the result is a power of 2 like 1.
- const APInt *CI;
- if (Op0KnownZeroInverted.isOne() &&
- match(LHS, m_LShr(m_Power2(CI), m_Value(X)))) {
- // ((8 >>u X) & 1) == 0 -> X != 3
- // ((8 >>u X) & 1) != 0 -> X == 3
- unsigned CmpVal = CI->countTrailingZeros();
- auto NewPred = ICmpInst::getInversePredicate(Pred);
- return new ICmpInst(NewPred, X, ConstantInt::get(X->getType(), CmpVal));
- }
}
break;
}
@@ -5557,6 +5674,28 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
return nullptr;
}
+/// If one operand of an icmp is effectively a bool (value range of {0,1}),
+/// then try to reduce patterns based on that limit.
+static Instruction *foldICmpUsingBoolRange(ICmpInst &I,
+ InstCombiner::BuilderTy &Builder) {
+ Value *X, *Y;
+ ICmpInst::Predicate Pred;
+
+ // X must be 0 and bool must be true for "ULT":
+ // X <u (zext i1 Y) --> (X == 0) & Y
+ if (match(&I, m_c_ICmp(Pred, m_Value(X), m_OneUse(m_ZExt(m_Value(Y))))) &&
+ Y->getType()->isIntOrIntVectorTy(1) && Pred == ICmpInst::ICMP_ULT)
+ return BinaryOperator::CreateAnd(Builder.CreateIsNull(X), Y);
+
+ // X must be 0 or bool must be true for "ULE":
+ // X <=u (sext i1 Y) --> (X == 0) | Y
+ if (match(&I, m_c_ICmp(Pred, m_Value(X), m_OneUse(m_SExt(m_Value(Y))))) &&
+ Y->getType()->isIntOrIntVectorTy(1) && Pred == ICmpInst::ICMP_ULE)
+ return BinaryOperator::CreateOr(Builder.CreateIsNull(X), Y);
+
+ return nullptr;
+}
+
llvm::Optional<std::pair<CmpInst::Predicate, Constant *>>
InstCombiner::getFlippedStrictnessPredicateAndConstant(CmpInst::Predicate Pred,
Constant *C) {
@@ -5948,7 +6087,7 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
Changed = true;
}
- if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, Q))
+ if (Value *V = simplifyICmpInst(I.getPredicate(), Op0, Op1, Q))
return replaceInstUsesWith(I, V);
// Comparing -val or val with non-zero is the same as just comparing val
@@ -5984,6 +6123,9 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
if (Instruction *Res = foldICmpWithDominatingICmp(I))
return Res;
+ if (Instruction *Res = foldICmpUsingBoolRange(I, Builder))
+ return Res;
+
if (Instruction *Res = foldICmpUsingKnownBits(I))
return Res;
@@ -6057,14 +6199,21 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
if (Instruction *NI = foldGEPICmp(GEP, Op0, I.getSwappedPredicate(), I))
return NI;
+ if (auto *SI = dyn_cast<SelectInst>(Op0))
+ if (Instruction *NI = foldSelectICmp(I.getPredicate(), SI, Op1, I))
+ return NI;
+ if (auto *SI = dyn_cast<SelectInst>(Op1))
+ if (Instruction *NI = foldSelectICmp(I.getSwappedPredicate(), SI, Op0, I))
+ return NI;
+
// Try to optimize equality comparisons against alloca-based pointers.
if (Op0->getType()->isPointerTy() && I.isEquality()) {
assert(Op1->getType()->isPointerTy() && "Comparing pointer with non-pointer?");
if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(Op0)))
- if (Instruction *New = foldAllocaCmp(I, Alloca, Op1))
+ if (Instruction *New = foldAllocaCmp(I, Alloca))
return New;
if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(Op1)))
- if (Instruction *New = foldAllocaCmp(I, Alloca, Op0))
+ if (Instruction *New = foldAllocaCmp(I, Alloca))
return New;
}
@@ -6529,6 +6678,25 @@ static Instruction *foldFabsWithFcmpZero(FCmpInst &I, InstCombinerImpl &IC) {
}
}
+static Instruction *foldFCmpFNegCommonOp(FCmpInst &I) {
+ CmpInst::Predicate Pred = I.getPredicate();
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ // Canonicalize fneg as Op1.
+ if (match(Op0, m_FNeg(m_Value())) && !match(Op1, m_FNeg(m_Value()))) {
+ std::swap(Op0, Op1);
+ Pred = I.getSwappedPredicate();
+ }
+
+ if (!match(Op1, m_FNeg(m_Specific(Op0))))
+ return nullptr;
+
+ // Replace the negated operand with 0.0:
+ // fcmp Pred Op0, -Op0 --> fcmp Pred Op0, 0.0
+ Constant *Zero = ConstantFP::getNullValue(Op0->getType());
+ return new FCmpInst(Pred, Op0, Zero, "", &I);
+}
+
Instruction *InstCombinerImpl::visitFCmpInst(FCmpInst &I) {
bool Changed = false;
@@ -6542,7 +6710,7 @@ Instruction *InstCombinerImpl::visitFCmpInst(FCmpInst &I) {
const CmpInst::Predicate Pred = I.getPredicate();
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- if (Value *V = SimplifyFCmpInst(Pred, Op0, Op1, I.getFastMathFlags(),
+ if (Value *V = simplifyFCmpInst(Pred, Op0, Op1, I.getFastMathFlags(),
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
@@ -6587,6 +6755,9 @@ Instruction *InstCombinerImpl::visitFCmpInst(FCmpInst &I) {
if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_FNeg(m_Value(Y))))
return new FCmpInst(I.getSwappedPredicate(), X, Y, "", &I);
+ if (Instruction *R = foldFCmpFNegCommonOp(I))
+ return R;
+
// Test if the FCmpInst instruction is used exclusively by a select as
// part of a minimum or maximum operation. If so, refrain from doing
// any other folding. This helps out other analyses which understand
@@ -6632,10 +6803,9 @@ Instruction *InstCombinerImpl::visitFCmpInst(FCmpInst &I) {
case Instruction::Load:
if (auto *GEP = dyn_cast<GetElementPtrInst>(LHSI->getOperand(0)))
if (auto *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
- if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
- !cast<LoadInst>(LHSI)->isVolatile())
- if (Instruction *Res = foldCmpLoadFromIndexedGlobal(GEP, GV, I))
- return Res;
+ if (Instruction *Res = foldCmpLoadFromIndexedGlobal(
+ cast<LoadInst>(LHSI), GEP, GV, I))
+ return Res;
break;
}
}
@@ -6657,7 +6827,6 @@ Instruction *InstCombinerImpl::visitFCmpInst(FCmpInst &I) {
if (match(Op1, m_FPExt(m_Value(Y))) && X->getType() == Y->getType())
return new FCmpInst(Pred, X, Y, "", &I);
- // fcmp (fpext X), C -> fcmp X, (fptrunc C) if fptrunc is lossless
const APFloat *C;
if (match(Op1, m_APFloat(C))) {
const fltSemantics &FPSem =
@@ -6666,6 +6835,31 @@ Instruction *InstCombinerImpl::visitFCmpInst(FCmpInst &I) {
APFloat TruncC = *C;
TruncC.convert(FPSem, APFloat::rmNearestTiesToEven, &Lossy);
+ if (Lossy) {
+ // X can't possibly equal the higher-precision constant, so reduce any
+ // equality comparison.
+ // TODO: Other predicates can be handled via getFCmpCode().
+ switch (Pred) {
+ case FCmpInst::FCMP_OEQ:
+ // X is ordered and equal to an impossible constant --> false
+ return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+ case FCmpInst::FCMP_ONE:
+ // X is ordered and not equal to an impossible constant --> ordered
+ return new FCmpInst(FCmpInst::FCMP_ORD, X,
+ ConstantFP::getNullValue(X->getType()));
+ case FCmpInst::FCMP_UEQ:
+ // X is unordered or equal to an impossible constant --> unordered
+ return new FCmpInst(FCmpInst::FCMP_UNO, X,
+ ConstantFP::getNullValue(X->getType()));
+ case FCmpInst::FCMP_UNE:
+ // X is unordered or not equal to an impossible constant --> true
+ return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+ default:
+ break;
+ }
+ }
+
+ // fcmp (fpext X), C -> fcmp X, (fptrunc C) if fptrunc is lossless
// Avoid lossy conversions and denormals.
// Zero is a special case that's OK to convert.
APFloat Fabs = TruncC;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 7743b4c41555..271154bb3f5a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -71,7 +71,7 @@ public:
: InstCombiner(Worklist, Builder, MinimizeSize, AA, AC, TLI, TTI, DT, ORE,
BFI, PSI, DL, LI) {}
- virtual ~InstCombinerImpl() {}
+ virtual ~InstCombinerImpl() = default;
/// Run the combiner over the entire worklist until it is empty.
///
@@ -172,7 +172,8 @@ public:
Instruction *visitLandingPadInst(LandingPadInst &LI);
Instruction *visitVAEndInst(VAEndInst &I);
Value *pushFreezeToPreventPoisonFromPropagating(FreezeInst &FI);
- bool freezeDominatedUses(FreezeInst &FI);
+ bool freezeOtherUses(FreezeInst &FI);
+ Instruction *foldFreezeIntoRecurrence(FreezeInst &I, PHINode *PN);
Instruction *visitFreeze(FreezeInst &I);
/// Specify what to return for unhandled instructions.
@@ -192,7 +193,7 @@ public:
const Twine &Suffix = "");
private:
- void annotateAnyAllocSite(CallBase &Call, const TargetLibraryInfo *TLI);
+ bool annotateAnyAllocSite(CallBase &Call, const TargetLibraryInfo *TLI);
bool isDesirableIntType(unsigned BitWidth) const;
bool shouldChangeType(unsigned FromBitWidth, unsigned ToBitWidth) const;
bool shouldChangeType(Type *From, Type *To) const;
@@ -325,7 +326,7 @@ private:
Instruction *narrowMathIfNoOverflow(BinaryOperator &I);
Instruction *narrowFunnelShift(TruncInst &Trunc);
Instruction *optimizeBitCastFromPhi(CastInst &CI, PHINode *PN);
- Instruction *matchSAddSubSat(Instruction &MinMax1);
+ Instruction *matchSAddSubSat(IntrinsicInst &MinMax1);
Instruction *foldNot(BinaryOperator &I);
void freelyInvertAllUsersOf(Value *V);
@@ -344,16 +345,20 @@ private:
const CastInst *CI2);
Value *simplifyIntToPtrRoundTripCast(Value *Val);
- Value *foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS, BinaryOperator &And);
- Value *foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, BinaryOperator &Or);
+ Value *foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction &I,
+ bool IsAnd, bool IsLogical = false);
Value *foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS, BinaryOperator &Xor);
Value *foldEqOfParts(ICmpInst *Cmp0, ICmpInst *Cmp1, bool IsAnd);
+ Value *foldAndOrOfICmpsUsingRanges(ICmpInst *ICmp1, ICmpInst *ICmp2,
+ bool IsAnd);
+
/// Optimize (fcmp)&(fcmp) or (fcmp)|(fcmp).
/// NOTE: Unlike most of instcombine, this returns a Value which should
/// already be inserted into the function.
- Value *foldLogicOfFCmps(FCmpInst *LHS, FCmpInst *RHS, bool IsAnd);
+ Value *foldLogicOfFCmps(FCmpInst *LHS, FCmpInst *RHS, bool IsAnd,
+ bool IsLogicalSelect = false);
Value *foldAndOrOfICmpsOfAndWithPow2(ICmpInst *LHS, ICmpInst *RHS,
Instruction *CxtI, bool IsAnd,
@@ -407,7 +412,7 @@ public:
// If we are replacing the instruction with itself, this must be in a
// segment of unreachable code, so just clobber the instruction.
if (&I == V)
- V = UndefValue::get(I.getType());
+ V = PoisonValue::get(I.getType());
LLVM_DEBUG(dbgs() << "IC: Replacing " << I << "\n"
<< " with " << *V << '\n');
@@ -435,7 +440,7 @@ public:
void CreateNonTerminatorUnreachable(Instruction *InsertAt) {
auto &Ctx = InsertAt->getContext();
new StoreInst(ConstantInt::getTrue(Ctx),
- UndefValue::get(Type::getInt1PtrTy(Ctx)),
+ PoisonValue::get(Type::getInt1PtrTy(Ctx)),
InsertAt);
}
@@ -621,7 +626,8 @@ public:
/// other operand, try to fold the binary operator into the select arguments.
/// This also works for Cast instructions, which obviously do not have a
/// second operand.
- Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI);
+ Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
+ bool FoldWithMultiUse = false);
/// This is a convenience wrapper function for the above two functions.
Instruction *foldBinOpIntoSelectOrPhi(BinaryOperator &I);
@@ -650,22 +656,27 @@ public:
Instruction *foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
ICmpInst::Predicate Cond, Instruction &I);
- Instruction *foldAllocaCmp(ICmpInst &ICI, const AllocaInst *Alloca,
- const Value *Other);
- Instruction *foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
+ Instruction *foldSelectICmp(ICmpInst::Predicate Pred, SelectInst *SI,
+ Value *RHS, const ICmpInst &I);
+ Instruction *foldAllocaCmp(ICmpInst &ICI, const AllocaInst *Alloca);
+ Instruction *foldCmpLoadFromIndexedGlobal(LoadInst *LI,
+ GetElementPtrInst *GEP,
GlobalVariable *GV, CmpInst &ICI,
ConstantInt *AndCst = nullptr);
Instruction *foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI,
Constant *RHSC);
Instruction *foldICmpAddOpConst(Value *X, const APInt &C,
ICmpInst::Predicate Pred);
- Instruction *foldICmpWithCastOp(ICmpInst &ICI);
+ Instruction *foldICmpWithCastOp(ICmpInst &ICmp);
+ Instruction *foldICmpWithZextOrSext(ICmpInst &ICmp);
Instruction *foldICmpUsingKnownBits(ICmpInst &Cmp);
Instruction *foldICmpWithDominatingICmp(ICmpInst &Cmp);
Instruction *foldICmpWithConstant(ICmpInst &Cmp);
Instruction *foldICmpInstWithConstant(ICmpInst &Cmp);
Instruction *foldICmpInstWithConstantNotInt(ICmpInst &Cmp);
+ Instruction *foldICmpInstWithConstantAllowUndef(ICmpInst &Cmp,
+ const APInt &C);
Instruction *foldICmpBinOp(ICmpInst &Cmp, const SimplifyQuery &SQ);
Instruction *foldICmpEquality(ICmpInst &Cmp);
Instruction *foldIRemByPowerOfTwoToBitTest(ICmpInst &I);
@@ -674,6 +685,8 @@ public:
Value *foldMultiplicationOverflowCheck(ICmpInst &Cmp);
+ Instruction *foldICmpBinOpWithConstant(ICmpInst &Cmp, BinaryOperator *BO,
+ const APInt &C);
Instruction *foldICmpSelectConstant(ICmpInst &Cmp, SelectInst *Select,
ConstantInt *C);
Instruction *foldICmpTruncConstant(ICmpInst &Cmp, TruncInst *Trunc,
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 756792918dba..e03b7026f802 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -16,15 +16,12 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/Loads.h"
-#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
using namespace PatternMatch;
@@ -775,7 +772,7 @@ static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize,
uint64_t TypeSize = DL.getTypeAllocSize(AI->getAllocatedType());
// Make sure that, even if the multiplication below would wrap as an
// uint64_t, we still do the right thing.
- if ((CS->getValue().zextOrSelf(128)*APInt(128, TypeSize)).ugt(MaxSize))
+ if ((CS->getValue().zext(128) * APInt(128, TypeSize)).ugt(MaxSize))
return false;
continue;
}
@@ -1395,8 +1392,10 @@ Instruction *InstCombinerImpl::visitStoreInst(StoreInst &SI) {
if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
// Prev store isn't volatile, and stores to the same location?
- if (PrevSI->isUnordered() && equivalentAddressValues(PrevSI->getOperand(1),
- SI.getOperand(1))) {
+ if (PrevSI->isUnordered() &&
+ equivalentAddressValues(PrevSI->getOperand(1), SI.getOperand(1)) &&
+ PrevSI->getValueOperand()->getType() ==
+ SI.getValueOperand()->getType()) {
++NumDeadStore;
// Manually add back the original store to the worklist now, so it will
// be processed after the operands of the removed store, as this may
@@ -1436,6 +1435,8 @@ Instruction *InstCombinerImpl::visitStoreInst(StoreInst &SI) {
}
// store undef, Ptr -> noop
+ // FIXME: This is technically incorrect because it might overwrite a poison
+ // value. Change to PoisonValue once #52930 is resolved.
if (isa<UndefValue>(Val))
return eraseInstFromFunction(SI);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 1aa10b550fc4..2a34edbf6cb8 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "InstCombineInternal.h"
-#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -30,13 +29,9 @@
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include <cassert>
-#include <cstddef>
-#include <cstdint>
-#include <utility>
#define DEBUG_TYPE "instcombine"
#include "llvm/Transforms/Utils/InstructionWorklist.h"
@@ -145,7 +140,7 @@ static Value *foldMulSelectToNegate(BinaryOperator &I,
}
Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) {
- if (Value *V = SimplifyMulInst(I.getOperand(0), I.getOperand(1),
+ if (Value *V = simplifyMulInst(I.getOperand(0), I.getOperand(1),
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
@@ -297,15 +292,24 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) {
auto RemOpc = Div->getOpcode() == Instruction::UDiv ? Instruction::URem
: Instruction::SRem;
- Value *Rem = Builder.CreateBinOp(RemOpc, X, DivOp1);
+ // X must be frozen because we are increasing its number of uses.
+ Value *XFreeze = Builder.CreateFreeze(X, X->getName() + ".fr");
+ Value *Rem = Builder.CreateBinOp(RemOpc, XFreeze, DivOp1);
if (DivOp1 == Y)
- return BinaryOperator::CreateSub(X, Rem);
- return BinaryOperator::CreateSub(Rem, X);
+ return BinaryOperator::CreateSub(XFreeze, Rem);
+ return BinaryOperator::CreateSub(Rem, XFreeze);
}
}
- /// i1 mul -> i1 and.
- if (I.getType()->isIntOrIntVectorTy(1))
+ // Fold the following two scenarios:
+ // 1) i1 mul -> i1 and.
+ // 2) X * Y --> X & Y, iff X, Y can be only {0,1}.
+ // Note: We could use known bits to generalize this and related patterns with
+ // shifts/truncs
+ Type *Ty = I.getType();
+ if (Ty->isIntOrIntVectorTy(1) ||
+ (match(Op0, m_And(m_Value(), m_One())) &&
+ match(Op1, m_And(m_Value(), m_One()))))
return BinaryOperator::CreateAnd(Op0, Op1);
// X*(1 << Y) --> X << Y
@@ -338,7 +342,7 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) {
X->getType()->isIntOrIntVectorTy(1) && X->getType() == Y->getType() &&
(Op0->hasOneUse() || Op1->hasOneUse() || X == Y)) {
Value *And = Builder.CreateAnd(X, Y, "mulbool");
- return CastInst::Create(Instruction::ZExt, And, I.getType());
+ return CastInst::Create(Instruction::ZExt, And, Ty);
}
// (sext bool X) * (zext bool Y) --> sext (and X, Y)
// (zext bool X) * (sext bool Y) --> sext (and X, Y)
@@ -348,42 +352,56 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) {
X->getType()->isIntOrIntVectorTy(1) && X->getType() == Y->getType() &&
(Op0->hasOneUse() || Op1->hasOneUse())) {
Value *And = Builder.CreateAnd(X, Y, "mulbool");
- return CastInst::Create(Instruction::SExt, And, I.getType());
+ return CastInst::Create(Instruction::SExt, And, Ty);
}
// (zext bool X) * Y --> X ? Y : 0
// Y * (zext bool X) --> X ? Y : 0
if (match(Op0, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1))
- return SelectInst::Create(X, Op1, ConstantInt::get(I.getType(), 0));
+ return SelectInst::Create(X, Op1, ConstantInt::getNullValue(Ty));
if (match(Op1, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1))
- return SelectInst::Create(X, Op0, ConstantInt::get(I.getType(), 0));
+ return SelectInst::Create(X, Op0, ConstantInt::getNullValue(Ty));
- // (sext bool X) * C --> X ? -C : 0
Constant *ImmC;
- if (match(Op0, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1) &&
- match(Op1, m_ImmConstant(ImmC))) {
- Constant *NegC = ConstantExpr::getNeg(ImmC);
- return SelectInst::Create(X, NegC, ConstantInt::getNullValue(I.getType()));
+ if (match(Op1, m_ImmConstant(ImmC))) {
+ // (sext bool X) * C --> X ? -C : 0
+ if (match(Op0, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) {
+ Constant *NegC = ConstantExpr::getNeg(ImmC);
+ return SelectInst::Create(X, NegC, ConstantInt::getNullValue(Ty));
+ }
+
+ // (ashr i32 X, 31) * C --> (X < 0) ? -C : 0
+ const APInt *C;
+ if (match(Op0, m_OneUse(m_AShr(m_Value(X), m_APInt(C)))) &&
+ *C == C->getBitWidth() - 1) {
+ Constant *NegC = ConstantExpr::getNeg(ImmC);
+ Value *IsNeg = Builder.CreateIsNeg(X, "isneg");
+ return SelectInst::Create(IsNeg, NegC, ConstantInt::getNullValue(Ty));
+ }
}
- // (lshr X, 31) * Y --> (ashr X, 31) & Y
- // Y * (lshr X, 31) --> (ashr X, 31) & Y
+ // (lshr X, 31) * Y --> (X < 0) ? Y : 0
// TODO: We are not checking one-use because the elimination of the multiply
// is better for analysis?
- // TODO: Should we canonicalize to '(X < 0) ? Y : 0' instead? That would be
- // more similar to what we're doing above.
const APInt *C;
- if (match(Op0, m_LShr(m_Value(X), m_APInt(C))) && *C == C->getBitWidth() - 1)
- return BinaryOperator::CreateAnd(Builder.CreateAShr(X, *C), Op1);
- if (match(Op1, m_LShr(m_Value(X), m_APInt(C))) && *C == C->getBitWidth() - 1)
- return BinaryOperator::CreateAnd(Builder.CreateAShr(X, *C), Op0);
+ if (match(&I, m_c_BinOp(m_LShr(m_Value(X), m_APInt(C)), m_Value(Y))) &&
+ *C == C->getBitWidth() - 1) {
+ Value *IsNeg = Builder.CreateIsNeg(X, "isneg");
+ return SelectInst::Create(IsNeg, Y, ConstantInt::getNullValue(Ty));
+ }
+
+ // (and X, 1) * Y --> (trunc X) ? Y : 0
+ if (match(&I, m_c_BinOp(m_OneUse(m_And(m_Value(X), m_One())), m_Value(Y)))) {
+ Value *Tr = Builder.CreateTrunc(X, CmpInst::makeCmpResultType(Ty));
+ return SelectInst::Create(Tr, Y, ConstantInt::getNullValue(Ty));
+ }
// ((ashr X, 31) | 1) * X --> abs(X)
// X * ((ashr X, 31) | 1) --> abs(X)
if (match(&I, m_c_BinOp(m_Or(m_AShr(m_Value(X),
- m_SpecificIntAllowUndef(BitWidth - 1)),
- m_One()),
- m_Deferred(X)))) {
+ m_SpecificIntAllowUndef(BitWidth - 1)),
+ m_One()),
+ m_Deferred(X)))) {
Value *Abs = Builder.CreateBinaryIntrinsic(
Intrinsic::abs, X,
ConstantInt::getBool(I.getContext(), I.hasNoSignedWrap()));
@@ -442,7 +460,7 @@ Instruction *InstCombinerImpl::foldFPSignBitOps(BinaryOperator &I) {
}
Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) {
- if (Value *V = SimplifyFMulInst(I.getOperand(0), I.getOperand(1),
+ if (Value *V = simplifyFMulInst(I.getOperand(0), I.getOperand(1),
I.getFastMathFlags(),
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
@@ -532,9 +550,8 @@ Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) {
// sqrt(X) * sqrt(Y) -> sqrt(X * Y)
// nnan disallows the possibility of returning a number if both operands are
// negative (in that case, we should return NaN).
- if (I.hasNoNaNs() &&
- match(Op0, m_OneUse(m_Intrinsic<Intrinsic::sqrt>(m_Value(X)))) &&
- match(Op1, m_OneUse(m_Intrinsic<Intrinsic::sqrt>(m_Value(Y))))) {
+ if (I.hasNoNaNs() && match(Op0, m_OneUse(m_Sqrt(m_Value(X)))) &&
+ match(Op1, m_OneUse(m_Sqrt(m_Value(Y))))) {
Value *XY = Builder.CreateFMulFMF(X, Y, &I);
Value *Sqrt = Builder.CreateUnaryIntrinsic(Intrinsic::sqrt, XY, &I);
return replaceInstUsesWith(I, Sqrt);
@@ -548,11 +565,11 @@ Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) {
// has the necessary (reassoc) fast-math-flags.
if (I.hasNoSignedZeros() &&
match(Op0, (m_FDiv(m_SpecificFP(1.0), m_Value(Y)))) &&
- match(Y, m_Intrinsic<Intrinsic::sqrt>(m_Value(X))) && Op1 == X)
+ match(Y, m_Sqrt(m_Value(X))) && Op1 == X)
return BinaryOperator::CreateFDivFMF(X, Y, &I);
if (I.hasNoSignedZeros() &&
match(Op1, (m_FDiv(m_SpecificFP(1.0), m_Value(Y)))) &&
- match(Y, m_Intrinsic<Intrinsic::sqrt>(m_Value(X))) && Op0 == X)
+ match(Y, m_Sqrt(m_Value(X))) && Op0 == X)
return BinaryOperator::CreateFDivFMF(X, Y, &I);
// Like the similar transform in instsimplify, this requires 'nsz' because
@@ -561,14 +578,12 @@ Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) {
Op0->hasNUses(2)) {
// Peek through fdiv to find squaring of square root:
// (X / sqrt(Y)) * (X / sqrt(Y)) --> (X * X) / Y
- if (match(Op0, m_FDiv(m_Value(X),
- m_Intrinsic<Intrinsic::sqrt>(m_Value(Y))))) {
+ if (match(Op0, m_FDiv(m_Value(X), m_Sqrt(m_Value(Y))))) {
Value *XX = Builder.CreateFMulFMF(X, X, &I);
return BinaryOperator::CreateFDivFMF(XX, Y, &I);
}
// (sqrt(Y) / X) * (sqrt(Y) / X) --> Y / (X * X)
- if (match(Op0, m_FDiv(m_Intrinsic<Intrinsic::sqrt>(m_Value(Y)),
- m_Value(X)))) {
+ if (match(Op0, m_FDiv(m_Sqrt(m_Value(Y)), m_Value(X)))) {
Value *XX = Builder.CreateFMulFMF(X, X, &I);
return BinaryOperator::CreateFDivFMF(Y, XX, &I);
}
@@ -777,7 +792,8 @@ Instruction *InstCombinerImpl::commonIDivTransforms(BinaryOperator &I) {
// TODO: Adapt simplifyDivRemOfSelectWithZeroOp to allow this and other folds.
if (match(Op0, m_ImmConstant()) &&
match(Op1, m_Select(m_Value(), m_ImmConstant(), m_ImmConstant()))) {
- if (Instruction *R = FoldOpIntoSelect(I, cast<SelectInst>(Op1)))
+ if (Instruction *R = FoldOpIntoSelect(I, cast<SelectInst>(Op1),
+ /*FoldWithMultiUse*/ true))
return R;
}
@@ -853,12 +869,13 @@ Instruction *InstCombinerImpl::commonIDivTransforms(BinaryOperator &I) {
if (match(Op0, m_One())) {
assert(!Ty->isIntOrIntVectorTy(1) && "i1 divide not removed?");
if (IsSigned) {
- // If Op1 is 0 then it's undefined behaviour, if Op1 is 1 then the
- // result is one, if Op1 is -1 then the result is minus one, otherwise
- // it's zero.
- Value *Inc = Builder.CreateAdd(Op1, Op0);
+ // 1 / 0 --> undef ; 1 / 1 --> 1 ; 1 / -1 --> -1 ; 1 / anything else --> 0
+ // (Op1 + 1) u< 3 ? Op1 : 0
+ // Op1 must be frozen because we are increasing its number of uses.
+ Value *F1 = Builder.CreateFreeze(Op1, Op1->getName() + ".fr");
+ Value *Inc = Builder.CreateAdd(F1, Op0);
Value *Cmp = Builder.CreateICmpULT(Inc, ConstantInt::get(Ty, 3));
- return SelectInst::Create(Cmp, Op1, ConstantInt::get(Ty, 0));
+ return SelectInst::Create(Cmp, F1, ConstantInt::get(Ty, 0));
} else {
// If Op1 is 0 then it's undefined behaviour. If Op1 is 1 then the
// result is one, otherwise it's zero.
@@ -900,113 +917,69 @@ Instruction *InstCombinerImpl::commonIDivTransforms(BinaryOperator &I) {
static const unsigned MaxDepth = 6;
-namespace {
-
-using FoldUDivOperandCb = Instruction *(*)(Value *Op0, Value *Op1,
- const BinaryOperator &I,
- InstCombinerImpl &IC);
-
-/// Used to maintain state for visitUDivOperand().
-struct UDivFoldAction {
- /// Informs visitUDiv() how to fold this operand. This can be zero if this
- /// action joins two actions together.
- FoldUDivOperandCb FoldAction;
-
- /// Which operand to fold.
- Value *OperandToFold;
-
- union {
- /// The instruction returned when FoldAction is invoked.
- Instruction *FoldResult;
-
- /// Stores the LHS action index if this action joins two actions together.
- size_t SelectLHSIdx;
+// Take the exact integer log2 of the value. If DoFold is true, create the
+// actual instructions, otherwise return a non-null dummy value. Return nullptr
+// on failure.
+static Value *takeLog2(IRBuilderBase &Builder, Value *Op, unsigned Depth,
+ bool DoFold) {
+ auto IfFold = [DoFold](function_ref<Value *()> Fn) {
+ if (!DoFold)
+ return reinterpret_cast<Value *>(-1);
+ return Fn();
};
- UDivFoldAction(FoldUDivOperandCb FA, Value *InputOperand)
- : FoldAction(FA), OperandToFold(InputOperand), FoldResult(nullptr) {}
- UDivFoldAction(FoldUDivOperandCb FA, Value *InputOperand, size_t SLHS)
- : FoldAction(FA), OperandToFold(InputOperand), SelectLHSIdx(SLHS) {}
-};
-
-} // end anonymous namespace
-
-// X udiv 2^C -> X >> C
-static Instruction *foldUDivPow2Cst(Value *Op0, Value *Op1,
- const BinaryOperator &I,
- InstCombinerImpl &IC) {
- Constant *C1 = ConstantExpr::getExactLogBase2(cast<Constant>(Op1));
- if (!C1)
- llvm_unreachable("Failed to constant fold udiv -> logbase2");
- BinaryOperator *LShr = BinaryOperator::CreateLShr(Op0, C1);
- if (I.isExact())
- LShr->setIsExact();
- return LShr;
-}
-
-// X udiv (C1 << N), where C1 is "1<<C2" --> X >> (N+C2)
-// X udiv (zext (C1 << N)), where C1 is "1<<C2" --> X >> (N+C2)
-static Instruction *foldUDivShl(Value *Op0, Value *Op1, const BinaryOperator &I,
- InstCombinerImpl &IC) {
- Value *ShiftLeft;
- if (!match(Op1, m_ZExt(m_Value(ShiftLeft))))
- ShiftLeft = Op1;
-
- Constant *CI;
- Value *N;
- if (!match(ShiftLeft, m_Shl(m_Constant(CI), m_Value(N))))
- llvm_unreachable("match should never fail here!");
- Constant *Log2Base = ConstantExpr::getExactLogBase2(CI);
- if (!Log2Base)
- llvm_unreachable("getLogBase2 should never fail here!");
- N = IC.Builder.CreateAdd(N, Log2Base);
- if (Op1 != ShiftLeft)
- N = IC.Builder.CreateZExt(N, Op1->getType());
- BinaryOperator *LShr = BinaryOperator::CreateLShr(Op0, N);
- if (I.isExact())
- LShr->setIsExact();
- return LShr;
-}
-
-// Recursively visits the possible right hand operands of a udiv
-// instruction, seeing through select instructions, to determine if we can
-// replace the udiv with something simpler. If we find that an operand is not
-// able to simplify the udiv, we abort the entire transformation.
-static size_t visitUDivOperand(Value *Op0, Value *Op1, const BinaryOperator &I,
- SmallVectorImpl<UDivFoldAction> &Actions,
- unsigned Depth = 0) {
// FIXME: assert that Op1 isn't/doesn't contain undef.
- // Check to see if this is an unsigned division with an exact power of 2,
- // if so, convert to a right shift.
- if (match(Op1, m_Power2())) {
- Actions.push_back(UDivFoldAction(foldUDivPow2Cst, Op1));
- return Actions.size();
- }
-
- // X udiv (C1 << N), where C1 is "1<<C2" --> X >> (N+C2)
- if (match(Op1, m_Shl(m_Power2(), m_Value())) ||
- match(Op1, m_ZExt(m_Shl(m_Power2(), m_Value())))) {
- Actions.push_back(UDivFoldAction(foldUDivShl, Op1));
- return Actions.size();
- }
+ // log2(2^C) -> C
+ if (match(Op, m_Power2()))
+ return IfFold([&]() {
+ Constant *C = ConstantExpr::getExactLogBase2(cast<Constant>(Op));
+ if (!C)
+ llvm_unreachable("Failed to constant fold udiv -> logbase2");
+ return C;
+ });
// The remaining tests are all recursive, so bail out if we hit the limit.
if (Depth++ == MaxDepth)
- return 0;
-
- if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
- // FIXME: missed optimization: if one of the hands of select is/contains
- // undef, just directly pick the other one.
- // FIXME: can both hands contain undef?
- if (size_t LHSIdx =
- visitUDivOperand(Op0, SI->getOperand(1), I, Actions, Depth))
- if (visitUDivOperand(Op0, SI->getOperand(2), I, Actions, Depth)) {
- Actions.push_back(UDivFoldAction(nullptr, Op1, LHSIdx - 1));
- return Actions.size();
- }
+ return nullptr;
+
+ // log2(zext X) -> zext log2(X)
+ // FIXME: Require one use?
+ Value *X, *Y;
+ if (match(Op, m_ZExt(m_Value(X))))
+ if (Value *LogX = takeLog2(Builder, X, Depth, DoFold))
+ return IfFold([&]() { return Builder.CreateZExt(LogX, Op->getType()); });
+
+ // log2(X << Y) -> log2(X) + Y
+ // FIXME: Require one use unless X is 1?
+ if (match(Op, m_Shl(m_Value(X), m_Value(Y))))
+ if (Value *LogX = takeLog2(Builder, X, Depth, DoFold))
+ return IfFold([&]() { return Builder.CreateAdd(LogX, Y); });
+
+ // log2(Cond ? X : Y) -> Cond ? log2(X) : log2(Y)
+ // FIXME: missed optimization: if one of the hands of select is/contains
+ // undef, just directly pick the other one.
+ // FIXME: can both hands contain undef?
+ // FIXME: Require one use?
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op))
+ if (Value *LogX = takeLog2(Builder, SI->getOperand(1), Depth, DoFold))
+ if (Value *LogY = takeLog2(Builder, SI->getOperand(2), Depth, DoFold))
+ return IfFold([&]() {
+ return Builder.CreateSelect(SI->getOperand(0), LogX, LogY);
+ });
+
+ // log2(umin(X, Y)) -> umin(log2(X), log2(Y))
+ // log2(umax(X, Y)) -> umax(log2(X), log2(Y))
+ auto *MinMax = dyn_cast<MinMaxIntrinsic>(Op);
+ if (MinMax && MinMax->hasOneUse() && !MinMax->isSigned())
+ if (Value *LogX = takeLog2(Builder, MinMax->getLHS(), Depth, DoFold))
+ if (Value *LogY = takeLog2(Builder, MinMax->getRHS(), Depth, DoFold))
+ return IfFold([&]() {
+ return Builder.CreateBinaryIntrinsic(
+ MinMax->getIntrinsicID(), LogX, LogY);
+ });
- return 0;
+ return nullptr;
}
/// If we have zero-extended operands of an unsigned div or rem, we may be able
@@ -1047,7 +1020,7 @@ static Instruction *narrowUDivURem(BinaryOperator &I,
}
Instruction *InstCombinerImpl::visitUDiv(BinaryOperator &I) {
- if (Value *V = SimplifyUDivInst(I.getOperand(0), I.getOperand(1),
+ if (Value *V = simplifyUDivInst(I.getOperand(0), I.getOperand(1),
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
@@ -1106,42 +1079,18 @@ Instruction *InstCombinerImpl::visitUDiv(BinaryOperator &I) {
return BinaryOperator::CreateUDiv(A, X);
}
- // (LHS udiv (select (select (...)))) -> (LHS >> (select (select (...))))
- SmallVector<UDivFoldAction, 6> UDivActions;
- if (visitUDivOperand(Op0, Op1, I, UDivActions))
- for (unsigned i = 0, e = UDivActions.size(); i != e; ++i) {
- FoldUDivOperandCb Action = UDivActions[i].FoldAction;
- Value *ActionOp1 = UDivActions[i].OperandToFold;
- Instruction *Inst;
- if (Action)
- Inst = Action(Op0, ActionOp1, I, *this);
- else {
- // This action joins two actions together. The RHS of this action is
- // simply the last action we processed, we saved the LHS action index in
- // the joining action.
- size_t SelectRHSIdx = i - 1;
- Value *SelectRHS = UDivActions[SelectRHSIdx].FoldResult;
- size_t SelectLHSIdx = UDivActions[i].SelectLHSIdx;
- Value *SelectLHS = UDivActions[SelectLHSIdx].FoldResult;
- Inst = SelectInst::Create(cast<SelectInst>(ActionOp1)->getCondition(),
- SelectLHS, SelectRHS);
- }
-
- // If this is the last action to process, return it to the InstCombiner.
- // Otherwise, we insert it before the UDiv and record it so that we may
- // use it as part of a joining action (i.e., a SelectInst).
- if (e - i != 1) {
- Inst->insertBefore(&I);
- UDivActions[i].FoldResult = Inst;
- } else
- return Inst;
- }
+ // Op1 udiv Op2 -> Op1 lshr log2(Op2), if log2() folds away.
+ if (takeLog2(Builder, Op1, /*Depth*/0, /*DoFold*/false)) {
+ Value *Res = takeLog2(Builder, Op1, /*Depth*/0, /*DoFold*/true);
+ return replaceInstUsesWith(
+ I, Builder.CreateLShr(Op0, Res, I.getName(), I.isExact()));
+ }
return nullptr;
}
Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) {
- if (Value *V = SimplifySDivInst(I.getOperand(0), I.getOperand(1),
+ if (Value *V = simplifySDivInst(I.getOperand(0), I.getOperand(1),
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
@@ -1223,9 +1172,9 @@ Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) {
if (match(&I, m_c_BinOp(
m_OneUse(m_Intrinsic<Intrinsic::abs>(m_Value(X), m_One())),
m_Deferred(X)))) {
- Constant *NegOne = ConstantInt::getAllOnesValue(Ty);
- Value *Cond = Builder.CreateICmpSGT(X, NegOne);
- return SelectInst::Create(Cond, ConstantInt::get(Ty, 1), NegOne);
+ Value *Cond = Builder.CreateIsNotNeg(X);
+ return SelectInst::Create(Cond, ConstantInt::get(Ty, 1),
+ ConstantInt::getAllOnesValue(Ty));
}
// If the sign bits of both operands are zero (i.e. we can prove they are
@@ -1242,8 +1191,10 @@ Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) {
if (match(Op1, m_NegatedPower2())) {
// X sdiv (-(1 << C)) -> -(X sdiv (1 << C)) ->
// -> -(X udiv (1 << C)) -> -(X u>> C)
- return BinaryOperator::CreateNeg(Builder.Insert(foldUDivPow2Cst(
- Op0, ConstantExpr::getNeg(cast<Constant>(Op1)), I, *this)));
+ Constant *CNegLog2 = ConstantExpr::getExactLogBase2(
+ ConstantExpr::getNeg(cast<Constant>(Op1)));
+ Value *Shr = Builder.CreateLShr(Op0, CNegLog2, I.getName(), I.isExact());
+ return BinaryOperator::CreateNeg(Shr);
}
if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, &I)) {
@@ -1368,7 +1319,9 @@ static Instruction *foldFDivPowDivisor(BinaryOperator &I,
}
Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) {
- if (Value *V = SimplifyFDivInst(I.getOperand(0), I.getOperand(1),
+ Module *M = I.getModule();
+
+ if (Value *V = simplifyFDivInst(I.getOperand(0), I.getOperand(1),
I.getFastMathFlags(),
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
@@ -1433,8 +1386,8 @@ Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) {
!IsTan && match(Op0, m_Intrinsic<Intrinsic::cos>(m_Value(X))) &&
match(Op1, m_Intrinsic<Intrinsic::sin>(m_Specific(X)));
- if ((IsTan || IsCot) &&
- hasFloatFn(&TLI, I.getType(), LibFunc_tan, LibFunc_tanf, LibFunc_tanl)) {
+ if ((IsTan || IsCot) && hasFloatFn(M, &TLI, I.getType(), LibFunc_tan,
+ LibFunc_tanf, LibFunc_tanl)) {
IRBuilder<> B(&I);
IRBuilder<>::FastMathFlagGuard FMFGuard(B);
B.setFastMathFlags(I.getFastMathFlags());
@@ -1498,7 +1451,8 @@ Instruction *InstCombinerImpl::commonIRemTransforms(BinaryOperator &I) {
// TODO: Adapt simplifyDivRemOfSelectWithZeroOp to allow this and other folds.
if (match(Op0, m_ImmConstant()) &&
match(Op1, m_Select(m_Value(), m_ImmConstant(), m_ImmConstant()))) {
- if (Instruction *R = FoldOpIntoSelect(I, cast<SelectInst>(Op1)))
+ if (Instruction *R = FoldOpIntoSelect(I, cast<SelectInst>(Op1),
+ /*FoldWithMultiUse*/ true))
return R;
}
@@ -1530,7 +1484,7 @@ Instruction *InstCombinerImpl::commonIRemTransforms(BinaryOperator &I) {
}
Instruction *InstCombinerImpl::visitURem(BinaryOperator &I) {
- if (Value *V = SimplifyURemInst(I.getOperand(0), I.getOperand(1),
+ if (Value *V = simplifyURemInst(I.getOperand(0), I.getOperand(1),
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
@@ -1560,11 +1514,13 @@ Instruction *InstCombinerImpl::visitURem(BinaryOperator &I) {
return CastInst::CreateZExtOrBitCast(Cmp, Ty);
}
- // X urem C -> X < C ? X : X - C, where C >= signbit.
+ // Op0 urem C -> Op0 < C ? Op0 : Op0 - C, where C >= signbit.
+ // Op0 must be frozen because we are increasing its number of uses.
if (match(Op1, m_Negative())) {
- Value *Cmp = Builder.CreateICmpULT(Op0, Op1);
- Value *Sub = Builder.CreateSub(Op0, Op1);
- return SelectInst::Create(Cmp, Op0, Sub);
+ Value *F0 = Builder.CreateFreeze(Op0, Op0->getName() + ".fr");
+ Value *Cmp = Builder.CreateICmpULT(F0, Op1);
+ Value *Sub = Builder.CreateSub(F0, Op1);
+ return SelectInst::Create(Cmp, F0, Sub);
}
// If the divisor is a sext of a boolean, then the divisor must be max
@@ -1581,7 +1537,7 @@ Instruction *InstCombinerImpl::visitURem(BinaryOperator &I) {
}
Instruction *InstCombinerImpl::visitSRem(BinaryOperator &I) {
- if (Value *V = SimplifySRemInst(I.getOperand(0), I.getOperand(1),
+ if (Value *V = simplifySRemInst(I.getOperand(0), I.getOperand(1),
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
@@ -1653,7 +1609,7 @@ Instruction *InstCombinerImpl::visitSRem(BinaryOperator &I) {
}
Instruction *InstCombinerImpl::visitFRem(BinaryOperator &I) {
- if (Value *V = SimplifyFRemInst(I.getOperand(0), I.getOperand(1),
+ if (Value *V = simplifyFRemInst(I.getOperand(0), I.getOperand(1),
I.getFastMathFlags(),
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp b/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp
index 42ba4a34a5a9..c573b03f31a6 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp
@@ -248,6 +248,20 @@ LLVM_NODISCARD Value *Negator::visitImpl(Value *V, unsigned Depth) {
return nullptr;
switch (I->getOpcode()) {
+ case Instruction::And: {
+ Constant *ShAmt;
+ // sub(y,and(lshr(x,C),1)) --> add(ashr(shl(x,(BW-1)-C),BW-1),y)
+ if (match(I, m_c_And(m_OneUse(m_TruncOrSelf(
+ m_LShr(m_Value(X), m_ImmConstant(ShAmt)))),
+ m_One()))) {
+ unsigned BW = X->getType()->getScalarSizeInBits();
+ Constant *BWMinusOne = ConstantInt::get(X->getType(), BW - 1);
+ Value *R = Builder.CreateShl(X, Builder.CreateSub(BWMinusOne, ShAmt));
+ R = Builder.CreateAShr(R, BWMinusOne);
+ return Builder.CreateTruncOrBitCast(R, I->getType());
+ }
+ break;
+ }
case Instruction::SDiv:
// `sdiv` is negatible if divisor is not undef/INT_MIN/1.
// While this is normally not behind a use-check,
diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 09694d50468f..90a796a0939e 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -511,7 +511,8 @@ Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) {
// Scan to see if all operands are the same opcode, and all have one user.
for (Value *V : drop_begin(PN.incoming_values())) {
GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V);
- if (!GEP || !GEP->hasOneUser() || GEP->getType() != FirstInst->getType() ||
+ if (!GEP || !GEP->hasOneUser() ||
+ GEP->getSourceElementType() != FirstInst->getSourceElementType() ||
GEP->getNumOperands() != FirstInst->getNumOperands())
return nullptr;
@@ -657,6 +658,10 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) {
LoadInst *FirstLI = cast<LoadInst>(PN.getIncomingValue(0));
+ // Can't forward swifterror through a phi.
+ if (FirstLI->getOperand(0)->isSwiftError())
+ return nullptr;
+
// FIXME: This is overconservative; this transform is allowed in some cases
// for atomic operations.
if (FirstLI->isAtomic())
@@ -693,6 +698,10 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) {
LI->getPointerAddressSpace() != LoadAddrSpace)
return nullptr;
+ // Can't forward swifterror through a phi.
+ if (LI->getOperand(0)->isSwiftError())
+ return nullptr;
+
// We can't sink the load if the loaded value could be modified between
// the load and the PHI.
if (LI->getParent() != InBB || !isSafeAndProfitableToSinkLoad(LI))
@@ -1112,6 +1121,13 @@ Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
return nullptr;
}
+ // If the incoming value is a PHI node before a catchswitch, we cannot
+ // extract the value within that BB because we cannot insert any non-PHI
+ // instructions in the BB.
+ for (auto *Pred : PN->blocks())
+ if (Pred->getFirstInsertionPt() == Pred->end())
+ return nullptr;
+
for (User *U : PN->users()) {
Instruction *UserI = cast<Instruction>(U);
@@ -1260,12 +1276,12 @@ static Value *simplifyUsingControlFlow(InstCombiner &Self, PHINode &PN,
// ... ...
// \ /
// phi [true] [false]
- if (!PN.getType()->isIntegerTy(1))
- return nullptr;
-
- if (PN.getNumOperands() != 2)
- return nullptr;
-
+ // and
+ // switch (cond)
+ // case v1: / \ case v2:
+ // ... ...
+ // \ /
+ // phi [v1] [v2]
// Make sure all inputs are constants.
if (!all_of(PN.operands(), [](Value *V) { return isa<ConstantInt>(V); }))
return nullptr;
@@ -1275,50 +1291,77 @@ static Value *simplifyUsingControlFlow(InstCombiner &Self, PHINode &PN,
if (!DT.isReachableFromEntry(BB))
return nullptr;
- // Same inputs.
- if (PN.getOperand(0) == PN.getOperand(1))
- return PN.getOperand(0);
+ // Determine which value the condition of the idom has for which successor.
+ LLVMContext &Context = PN.getContext();
+ auto *IDom = DT.getNode(BB)->getIDom()->getBlock();
+ Value *Cond;
+ SmallDenseMap<ConstantInt *, BasicBlock *, 8> SuccForValue;
+ SmallDenseMap<BasicBlock *, unsigned, 8> SuccCount;
+ auto AddSucc = [&](ConstantInt *C, BasicBlock *Succ) {
+ SuccForValue[C] = Succ;
+ ++SuccCount[Succ];
+ };
+ if (auto *BI = dyn_cast<BranchInst>(IDom->getTerminator())) {
+ if (BI->isUnconditional())
+ return nullptr;
- BasicBlock *TruePred = nullptr, *FalsePred = nullptr;
- for (auto *Pred : predecessors(BB)) {
- auto *Input = cast<ConstantInt>(PN.getIncomingValueForBlock(Pred));
- if (Input->isAllOnesValue())
- TruePred = Pred;
- else
- FalsePred = Pred;
+ Cond = BI->getCondition();
+ AddSucc(ConstantInt::getTrue(Context), BI->getSuccessor(0));
+ AddSucc(ConstantInt::getFalse(Context), BI->getSuccessor(1));
+ } else if (auto *SI = dyn_cast<SwitchInst>(IDom->getTerminator())) {
+ Cond = SI->getCondition();
+ ++SuccCount[SI->getDefaultDest()];
+ for (auto Case : SI->cases())
+ AddSucc(Case.getCaseValue(), Case.getCaseSuccessor());
+ } else {
+ return nullptr;
}
- assert(TruePred && FalsePred && "Must be!");
- // Check which edge of the dominator dominates the true input. If it is the
- // false edge, we should invert the condition.
- auto *IDom = DT.getNode(BB)->getIDom()->getBlock();
- auto *BI = dyn_cast<BranchInst>(IDom->getTerminator());
- if (!BI || BI->isUnconditional())
+ if (Cond->getType() != PN.getType())
return nullptr;
// Check that edges outgoing from the idom's terminators dominate respective
// inputs of the Phi.
- BasicBlockEdge TrueOutEdge(IDom, BI->getSuccessor(0));
- BasicBlockEdge FalseOutEdge(IDom, BI->getSuccessor(1));
+ Optional<bool> Invert;
+ for (auto Pair : zip(PN.incoming_values(), PN.blocks())) {
+ auto *Input = cast<ConstantInt>(std::get<0>(Pair));
+ BasicBlock *Pred = std::get<1>(Pair);
+ auto IsCorrectInput = [&](ConstantInt *Input) {
+ // The input needs to be dominated by the corresponding edge of the idom.
+ // This edge cannot be a multi-edge, as that would imply that multiple
+ // different condition values follow the same edge.
+ auto It = SuccForValue.find(Input);
+ return It != SuccForValue.end() && SuccCount[It->second] == 1 &&
+ DT.dominates(BasicBlockEdge(IDom, It->second),
+ BasicBlockEdge(Pred, BB));
+ };
+
+ // Depending on the constant, the condition may need to be inverted.
+ bool NeedsInvert;
+ if (IsCorrectInput(Input))
+ NeedsInvert = false;
+ else if (IsCorrectInput(cast<ConstantInt>(ConstantExpr::getNot(Input))))
+ NeedsInvert = true;
+ else
+ return nullptr;
+
+ // Make sure the inversion requirement is always the same.
+ if (Invert && *Invert != NeedsInvert)
+ return nullptr;
- BasicBlockEdge TrueIncEdge(TruePred, BB);
- BasicBlockEdge FalseIncEdge(FalsePred, BB);
+ Invert = NeedsInvert;
+ }
- auto *Cond = BI->getCondition();
- if (DT.dominates(TrueOutEdge, TrueIncEdge) &&
- DT.dominates(FalseOutEdge, FalseIncEdge))
- // This Phi is actually equivalent to branching condition of IDom.
+ if (!*Invert)
return Cond;
- if (DT.dominates(TrueOutEdge, FalseIncEdge) &&
- DT.dominates(FalseOutEdge, TrueIncEdge)) {
- // This Phi is actually opposite to branching condition of IDom. We invert
- // the condition that will potentially open up some opportunities for
- // sinking.
- auto InsertPt = BB->getFirstInsertionPt();
- if (InsertPt != BB->end()) {
- Self.Builder.SetInsertPoint(&*InsertPt);
- return Self.Builder.CreateNot(Cond);
- }
+
+ // This Phi is actually opposite to branching condition of IDom. We invert
+ // the condition that will potentially open up some opportunities for
+ // sinking.
+ auto InsertPt = BB->getFirstInsertionPt();
+ if (InsertPt != BB->end()) {
+ Self.Builder.SetInsertPoint(&*InsertPt);
+ return Self.Builder.CreateNot(Cond);
}
return nullptr;
@@ -1327,7 +1370,7 @@ static Value *simplifyUsingControlFlow(InstCombiner &Self, PHINode &PN,
// PHINode simplification
//
Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
- if (Value *V = SimplifyInstruction(&PN, SQ.getWithInstruction(&PN)))
+ if (Value *V = simplifyInstruction(&PN, SQ.getWithInstruction(&PN)))
return replaceInstUsesWith(PN, V);
if (Instruction *Result = foldPHIArgZextsIntoPHI(PN))
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 65e60498ff95..ad96a5f475f1 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -22,6 +22,7 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
+#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/IRBuilder.h"
@@ -49,13 +50,6 @@ using namespace llvm;
using namespace PatternMatch;
-static Value *createMinMax(InstCombiner::BuilderTy &Builder,
- SelectPatternFlavor SPF, Value *A, Value *B) {
- CmpInst::Predicate Pred = getMinMaxPred(SPF);
- assert(CmpInst::isIntPredicate(Pred) && "Expected integer predicate");
- return Builder.CreateSelect(Builder.CreateICmp(Pred, A, B), A, B);
-}
-
/// Replace a select operand based on an equality comparison with the identity
/// constant of a binop.
static Instruction *foldSelectBinOpIdentity(SelectInst &Sel,
@@ -370,6 +364,7 @@ Instruction *InstCombinerImpl::foldSelectOpOp(SelectInst &SI, Instruction *TI,
// one-use constraint, but that needs be examined carefully since it may not
// reduce the total number of instructions.
if (TI->getNumOperands() != 2 || FI->getNumOperands() != 2 ||
+ !TI->isSameOperationAs(FI) ||
(!isa<BinaryOperator>(TI) && !isa<GetElementPtrInst>(TI)) ||
!TI->hasOneUse() || !FI->hasOneUse())
return nullptr;
@@ -444,69 +439,56 @@ Instruction *InstCombinerImpl::foldSelectIntoOp(SelectInst &SI, Value *TrueVal,
Value *FalseVal) {
// See the comment above GetSelectFoldableOperands for a description of the
// transformation we are doing here.
- if (auto *TVI = dyn_cast<BinaryOperator>(TrueVal)) {
- if (TVI->hasOneUse() && !isa<Constant>(FalseVal)) {
- if (unsigned SFO = getSelectFoldableOperands(TVI)) {
- unsigned OpToFold = 0;
- if ((SFO & 1) && FalseVal == TVI->getOperand(0)) {
- OpToFold = 1;
- } else if ((SFO & 2) && FalseVal == TVI->getOperand(1)) {
- OpToFold = 2;
- }
-
- if (OpToFold) {
- Constant *C = ConstantExpr::getBinOpIdentity(TVI->getOpcode(),
- TVI->getType(), true);
- Value *OOp = TVI->getOperand(2-OpToFold);
- // Avoid creating select between 2 constants unless it's selecting
- // between 0, 1 and -1.
- const APInt *OOpC;
- bool OOpIsAPInt = match(OOp, m_APInt(OOpC));
- if (!isa<Constant>(OOp) ||
- (OOpIsAPInt && isSelect01(C->getUniqueInteger(), *OOpC))) {
- Value *NewSel = Builder.CreateSelect(SI.getCondition(), OOp, C);
- NewSel->takeName(TVI);
- BinaryOperator *BO = BinaryOperator::Create(TVI->getOpcode(),
- FalseVal, NewSel);
- BO->copyIRFlags(TVI);
- return BO;
+ auto TryFoldSelectIntoOp = [&](SelectInst &SI, Value *TrueVal,
+ Value *FalseVal,
+ bool Swapped) -> Instruction * {
+ if (auto *TVI = dyn_cast<BinaryOperator>(TrueVal)) {
+ if (TVI->hasOneUse() && !isa<Constant>(FalseVal)) {
+ if (unsigned SFO = getSelectFoldableOperands(TVI)) {
+ unsigned OpToFold = 0;
+ if ((SFO & 1) && FalseVal == TVI->getOperand(0))
+ OpToFold = 1;
+ else if ((SFO & 2) && FalseVal == TVI->getOperand(1))
+ OpToFold = 2;
+
+ if (OpToFold) {
+ FastMathFlags FMF;
+ // TODO: We probably ought to revisit cases where the select and FP
+ // instructions have different flags and add tests to ensure the
+ // behaviour is correct.
+ if (isa<FPMathOperator>(&SI))
+ FMF = SI.getFastMathFlags();
+ Constant *C = ConstantExpr::getBinOpIdentity(
+ TVI->getOpcode(), TVI->getType(), true, FMF.noSignedZeros());
+ Value *OOp = TVI->getOperand(2 - OpToFold);
+ // Avoid creating select between 2 constants unless it's selecting
+ // between 0, 1 and -1.
+ const APInt *OOpC;
+ bool OOpIsAPInt = match(OOp, m_APInt(OOpC));
+ if (!isa<Constant>(OOp) ||
+ (OOpIsAPInt && isSelect01(C->getUniqueInteger(), *OOpC))) {
+ Value *NewSel = Builder.CreateSelect(
+ SI.getCondition(), Swapped ? C : OOp, Swapped ? OOp : C);
+ if (isa<FPMathOperator>(&SI))
+ cast<Instruction>(NewSel)->setFastMathFlags(FMF);
+ NewSel->takeName(TVI);
+ BinaryOperator *BO =
+ BinaryOperator::Create(TVI->getOpcode(), FalseVal, NewSel);
+ BO->copyIRFlags(TVI);
+ return BO;
+ }
}
}
}
}
- }
+ return nullptr;
+ };
- if (auto *FVI = dyn_cast<BinaryOperator>(FalseVal)) {
- if (FVI->hasOneUse() && !isa<Constant>(TrueVal)) {
- if (unsigned SFO = getSelectFoldableOperands(FVI)) {
- unsigned OpToFold = 0;
- if ((SFO & 1) && TrueVal == FVI->getOperand(0)) {
- OpToFold = 1;
- } else if ((SFO & 2) && TrueVal == FVI->getOperand(1)) {
- OpToFold = 2;
- }
+ if (Instruction *R = TryFoldSelectIntoOp(SI, TrueVal, FalseVal, false))
+ return R;
- if (OpToFold) {
- Constant *C = ConstantExpr::getBinOpIdentity(FVI->getOpcode(),
- FVI->getType(), true);
- Value *OOp = FVI->getOperand(2-OpToFold);
- // Avoid creating select between 2 constants unless it's selecting
- // between 0, 1 and -1.
- const APInt *OOpC;
- bool OOpIsAPInt = match(OOp, m_APInt(OOpC));
- if (!isa<Constant>(OOp) ||
- (OOpIsAPInt && isSelect01(C->getUniqueInteger(), *OOpC))) {
- Value *NewSel = Builder.CreateSelect(SI.getCondition(), C, OOp);
- NewSel->takeName(FVI);
- BinaryOperator *BO = BinaryOperator::Create(FVI->getOpcode(),
- TrueVal, NewSel);
- BO->copyIRFlags(FVI);
- return BO;
- }
- }
- }
- }
- }
+ if (Instruction *R = TryFoldSelectIntoOp(SI, FalseVal, TrueVal, true))
+ return R;
return nullptr;
}
@@ -535,6 +517,16 @@ static Instruction *foldSelectICmpAndAnd(Type *SelType, const ICmpInst *Cmp,
// Where %B may be optionally shifted: lshr %X, %Z.
Value *X, *Z;
const bool HasShift = match(B, m_OneUse(m_LShr(m_Value(X), m_Value(Z))));
+
+ // The shift must be valid.
+ // TODO: This restricts the fold to constant shift amounts. Is there a way to
+ // handle variable shifts safely? PR47012
+ if (HasShift &&
+ !match(Z, m_SpecificInt_ICMP(CmpInst::ICMP_ULT,
+ APInt(SelType->getScalarSizeInBits(),
+ SelType->getScalarSizeInBits()))))
+ return nullptr;
+
if (!HasShift)
X = B;
@@ -1096,74 +1088,55 @@ static bool adjustMinMax(SelectInst &Sel, ICmpInst &Cmp) {
return true;
}
-/// If this is an integer min/max (icmp + select) with a constant operand,
-/// create the canonical icmp for the min/max operation and canonicalize the
-/// constant to the 'false' operand of the select:
-/// select (icmp Pred X, C1), C2, X --> select (icmp Pred' X, C2), X, C2
-/// Note: if C1 != C2, this will change the icmp constant to the existing
-/// constant operand of the select.
-static Instruction *canonicalizeMinMaxWithConstant(SelectInst &Sel,
- ICmpInst &Cmp,
- InstCombinerImpl &IC) {
- if (!Cmp.hasOneUse() || !isa<Constant>(Cmp.getOperand(1)))
- return nullptr;
-
- // Canonicalize the compare predicate based on whether we have min or max.
+static Instruction *canonicalizeSPF(SelectInst &Sel, ICmpInst &Cmp,
+ InstCombinerImpl &IC) {
Value *LHS, *RHS;
- SelectPatternResult SPR = matchSelectPattern(&Sel, LHS, RHS);
- if (!SelectPatternResult::isMinOrMax(SPR.Flavor))
+ // TODO: What to do with pointer min/max patterns?
+ if (!Sel.getType()->isIntOrIntVectorTy())
return nullptr;
- // Is this already canonical?
- ICmpInst::Predicate CanonicalPred = getMinMaxPred(SPR.Flavor);
- if (Cmp.getOperand(0) == LHS && Cmp.getOperand(1) == RHS &&
- Cmp.getPredicate() == CanonicalPred)
- return nullptr;
-
- // Bail out on unsimplified X-0 operand (due to some worklist management bug),
- // as this may cause an infinite combine loop. Let the sub be folded first.
- if (match(LHS, m_Sub(m_Value(), m_Zero())) ||
- match(RHS, m_Sub(m_Value(), m_Zero())))
- return nullptr;
-
- // Create the canonical compare and plug it into the select.
- IC.replaceOperand(Sel, 0, IC.Builder.CreateICmp(CanonicalPred, LHS, RHS));
-
- // If the select operands did not change, we're done.
- if (Sel.getTrueValue() == LHS && Sel.getFalseValue() == RHS)
- return &Sel;
-
- // If we are swapping the select operands, swap the metadata too.
- assert(Sel.getTrueValue() == RHS && Sel.getFalseValue() == LHS &&
- "Unexpected results from matchSelectPattern");
- Sel.swapValues();
- Sel.swapProfMetadata();
- return &Sel;
-}
-
-static Instruction *canonicalizeAbsNabs(SelectInst &Sel, ICmpInst &Cmp,
- InstCombinerImpl &IC) {
- if (!Cmp.hasOneUse() || !isa<Constant>(Cmp.getOperand(1)))
- return nullptr;
-
- Value *LHS, *RHS;
SelectPatternFlavor SPF = matchSelectPattern(&Sel, LHS, RHS).Flavor;
- if (SPF != SelectPatternFlavor::SPF_ABS &&
- SPF != SelectPatternFlavor::SPF_NABS)
- return nullptr;
-
- // Note that NSW flag can only be propagated for normal, non-negated abs!
- bool IntMinIsPoison = SPF == SelectPatternFlavor::SPF_ABS &&
- match(RHS, m_NSWNeg(m_Specific(LHS)));
- Constant *IntMinIsPoisonC =
- ConstantInt::get(Type::getInt1Ty(Sel.getContext()), IntMinIsPoison);
- Instruction *Abs =
- IC.Builder.CreateBinaryIntrinsic(Intrinsic::abs, LHS, IntMinIsPoisonC);
-
- if (SPF == SelectPatternFlavor::SPF_NABS)
- return BinaryOperator::CreateNeg(Abs); // Always without NSW flag!
+ if (SPF == SelectPatternFlavor::SPF_ABS ||
+ SPF == SelectPatternFlavor::SPF_NABS) {
+ if (!Cmp.hasOneUse() && !RHS->hasOneUse())
+ return nullptr; // TODO: Relax this restriction.
+
+ // Note that NSW flag can only be propagated for normal, non-negated abs!
+ bool IntMinIsPoison = SPF == SelectPatternFlavor::SPF_ABS &&
+ match(RHS, m_NSWNeg(m_Specific(LHS)));
+ Constant *IntMinIsPoisonC =
+ ConstantInt::get(Type::getInt1Ty(Sel.getContext()), IntMinIsPoison);
+ Instruction *Abs =
+ IC.Builder.CreateBinaryIntrinsic(Intrinsic::abs, LHS, IntMinIsPoisonC);
+
+ if (SPF == SelectPatternFlavor::SPF_NABS)
+ return BinaryOperator::CreateNeg(Abs); // Always without NSW flag!
+ return IC.replaceInstUsesWith(Sel, Abs);
+ }
+
+ if (SelectPatternResult::isMinOrMax(SPF)) {
+ Intrinsic::ID IntrinsicID;
+ switch (SPF) {
+ case SelectPatternFlavor::SPF_UMIN:
+ IntrinsicID = Intrinsic::umin;
+ break;
+ case SelectPatternFlavor::SPF_UMAX:
+ IntrinsicID = Intrinsic::umax;
+ break;
+ case SelectPatternFlavor::SPF_SMIN:
+ IntrinsicID = Intrinsic::smin;
+ break;
+ case SelectPatternFlavor::SPF_SMAX:
+ IntrinsicID = Intrinsic::smax;
+ break;
+ default:
+ llvm_unreachable("Unexpected SPF");
+ }
+ return IC.replaceInstUsesWith(
+ Sel, IC.Builder.CreateBinaryIntrinsic(IntrinsicID, LHS, RHS));
+ }
- return IC.replaceInstUsesWith(Sel, Abs);
+ return nullptr;
}
/// If we have a select with an equality comparison, then we know the value in
@@ -1336,6 +1309,7 @@ static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
ICmpInst::Predicate::ICMP_NE,
APInt::getAllOnes(C0->getType()->getScalarSizeInBits()))))
return nullptr; // Can't do, have all-ones element[s].
+ Pred0 = ICmpInst::getFlippedStrictnessPredicate(Pred0);
C0 = InstCombiner::AddOne(C0);
break;
default:
@@ -1401,15 +1375,22 @@ static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
case ICmpInst::Predicate::ICMP_SGE:
// Also non-canonical, but here we don't need to change C2,
// so we don't have any restrictions on C2, so we can just handle it.
+ Pred1 = ICmpInst::Predicate::ICMP_SLT;
std::swap(ReplacementLow, ReplacementHigh);
break;
default:
return nullptr; // Unknown predicate.
}
+ assert(Pred1 == ICmpInst::Predicate::ICMP_SLT &&
+ "Unexpected predicate type.");
// The thresholds of this clamp-like pattern.
auto *ThresholdLowIncl = ConstantExpr::getNeg(C1);
auto *ThresholdHighExcl = ConstantExpr::getSub(C0, C1);
+
+ assert((Pred0 == ICmpInst::Predicate::ICMP_ULT ||
+ Pred0 == ICmpInst::Predicate::ICMP_UGE) &&
+ "Unexpected predicate type.");
if (Pred0 == ICmpInst::Predicate::ICMP_UGE)
std::swap(ThresholdLowIncl, ThresholdHighExcl);
@@ -1530,17 +1511,71 @@ tryToReuseConstantFromSelectInComparison(SelectInst &Sel, ICmpInst &Cmp,
return &Sel;
}
+static Instruction *foldSelectZeroOrOnes(ICmpInst *Cmp, Value *TVal,
+ Value *FVal,
+ InstCombiner::BuilderTy &Builder) {
+ if (!Cmp->hasOneUse())
+ return nullptr;
+
+ const APInt *CmpC;
+ if (!match(Cmp->getOperand(1), m_APIntAllowUndef(CmpC)))
+ return nullptr;
+
+ // (X u< 2) ? -X : -1 --> sext (X != 0)
+ Value *X = Cmp->getOperand(0);
+ if (Cmp->getPredicate() == ICmpInst::ICMP_ULT && *CmpC == 2 &&
+ match(TVal, m_Neg(m_Specific(X))) && match(FVal, m_AllOnes()))
+ return new SExtInst(Builder.CreateIsNotNull(X), TVal->getType());
+
+ // (X u> 1) ? -1 : -X --> sext (X != 0)
+ if (Cmp->getPredicate() == ICmpInst::ICMP_UGT && *CmpC == 1 &&
+ match(FVal, m_Neg(m_Specific(X))) && match(TVal, m_AllOnes()))
+ return new SExtInst(Builder.CreateIsNotNull(X), TVal->getType());
+
+ return nullptr;
+}
+
+static Value *foldSelectInstWithICmpConst(SelectInst &SI, ICmpInst *ICI) {
+ const APInt *CmpC;
+ Value *V;
+ CmpInst::Predicate Pred;
+ if (!match(ICI, m_ICmp(Pred, m_Value(V), m_APInt(CmpC))))
+ return nullptr;
+
+ BinaryOperator *BO;
+ const APInt *C;
+ CmpInst::Predicate CPred;
+ if (match(&SI, m_Select(m_Specific(ICI), m_APInt(C), m_BinOp(BO))))
+ CPred = ICI->getPredicate();
+ else if (match(&SI, m_Select(m_Specific(ICI), m_BinOp(BO), m_APInt(C))))
+ CPred = ICI->getInversePredicate();
+ else
+ return nullptr;
+
+ const APInt *BinOpC;
+ if (!match(BO, m_BinOp(m_Specific(V), m_APInt(BinOpC))))
+ return nullptr;
+
+ ConstantRange R = ConstantRange::makeExactICmpRegion(CPred, *CmpC)
+ .binaryOp(BO->getOpcode(), *BinOpC);
+ if (R == *C) {
+ BO->dropPoisonGeneratingFlags();
+ return BO;
+ }
+ return nullptr;
+}
+
/// Visit a SelectInst that has an ICmpInst as its first operand.
Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI,
ICmpInst *ICI) {
if (Instruction *NewSel = foldSelectValueEquivalence(SI, *ICI))
return NewSel;
- if (Instruction *NewSel = canonicalizeMinMaxWithConstant(SI, *ICI, *this))
- return NewSel;
+ if (Instruction *NewSPF = canonicalizeSPF(SI, *ICI, *this))
+ return NewSPF;
- if (Instruction *NewAbs = canonicalizeAbsNabs(SI, *ICI, *this))
- return NewAbs;
+ if (Value *V = foldSelectInstWithICmpConst(SI, ICI))
+ return replaceInstUsesWith(SI, V);
if (Value *V = canonicalizeClampLike(SI, *ICI, Builder))
return replaceInstUsesWith(SI, V);
@@ -1572,6 +1607,22 @@ Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI,
}
}
+ // Canonicalize a signbit condition to use zero constant by swapping:
+ // (CmpLHS > -1) ? TV : FV --> (CmpLHS < 0) ? FV : TV
+ // To avoid conflicts (infinite loops) with other canonicalizations, this is
+ // not applied with any constant select arm.
+ if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, m_AllOnes()) &&
+ !match(TrueVal, m_Constant()) && !match(FalseVal, m_Constant()) &&
+ ICI->hasOneUse()) {
+ InstCombiner::BuilderTy::InsertPointGuard Guard(Builder);
+ Builder.SetInsertPoint(&SI);
+ Value *IsNeg = Builder.CreateIsNeg(CmpLHS, ICI->getName());
+ replaceOperand(SI, 0, IsNeg);
+ SI.swapValues();
+ SI.swapProfMetadata();
+ return &SI;
+ }
+
// FIXME: This code is nearly duplicated in InstSimplify. Using/refactoring
// decomposeBitTestICmp() might help.
{
@@ -1629,6 +1680,9 @@ Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI,
if (Instruction *V = foldSelectCtlzToCttz(ICI, TrueVal, FalseVal, Builder))
return V;
+ if (Instruction *V = foldSelectZeroOrOnes(ICI, TrueVal, FalseVal, Builder))
+ return V;
+
if (Value *V = foldSelectICmpAndOr(ICI, TrueVal, FalseVal, Builder))
return replaceInstUsesWith(SI, V);
@@ -1698,114 +1752,6 @@ Instruction *InstCombinerImpl::foldSPFofSPF(Instruction *Inner,
// TODO: This could be done in instsimplify.
if (SPF1 == SPF2 && SelectPatternResult::isMinOrMax(SPF1))
return replaceInstUsesWith(Outer, Inner);
-
- // MAX(MIN(a, b), a) -> a
- // MIN(MAX(a, b), a) -> a
- // TODO: This could be done in instsimplify.
- if ((SPF1 == SPF_SMIN && SPF2 == SPF_SMAX) ||
- (SPF1 == SPF_SMAX && SPF2 == SPF_SMIN) ||
- (SPF1 == SPF_UMIN && SPF2 == SPF_UMAX) ||
- (SPF1 == SPF_UMAX && SPF2 == SPF_UMIN))
- return replaceInstUsesWith(Outer, C);
- }
-
- if (SPF1 == SPF2) {
- const APInt *CB, *CC;
- if (match(B, m_APInt(CB)) && match(C, m_APInt(CC))) {
- // MIN(MIN(A, 23), 97) -> MIN(A, 23)
- // MAX(MAX(A, 97), 23) -> MAX(A, 97)
- // TODO: This could be done in instsimplify.
- if ((SPF1 == SPF_UMIN && CB->ule(*CC)) ||
- (SPF1 == SPF_SMIN && CB->sle(*CC)) ||
- (SPF1 == SPF_UMAX && CB->uge(*CC)) ||
- (SPF1 == SPF_SMAX && CB->sge(*CC)))
- return replaceInstUsesWith(Outer, Inner);
-
- // MIN(MIN(A, 97), 23) -> MIN(A, 23)
- // MAX(MAX(A, 23), 97) -> MAX(A, 97)
- if ((SPF1 == SPF_UMIN && CB->ugt(*CC)) ||
- (SPF1 == SPF_SMIN && CB->sgt(*CC)) ||
- (SPF1 == SPF_UMAX && CB->ult(*CC)) ||
- (SPF1 == SPF_SMAX && CB->slt(*CC))) {
- Outer.replaceUsesOfWith(Inner, A);
- return &Outer;
- }
- }
- }
-
- // max(max(A, B), min(A, B)) --> max(A, B)
- // min(min(A, B), max(A, B)) --> min(A, B)
- // TODO: This could be done in instsimplify.
- if (SPF1 == SPF2 &&
- ((SPF1 == SPF_UMIN && match(C, m_c_UMax(m_Specific(A), m_Specific(B)))) ||
- (SPF1 == SPF_SMIN && match(C, m_c_SMax(m_Specific(A), m_Specific(B)))) ||
- (SPF1 == SPF_UMAX && match(C, m_c_UMin(m_Specific(A), m_Specific(B)))) ||
- (SPF1 == SPF_SMAX && match(C, m_c_SMin(m_Specific(A), m_Specific(B))))))
- return replaceInstUsesWith(Outer, Inner);
-
- // ABS(ABS(X)) -> ABS(X)
- // NABS(NABS(X)) -> NABS(X)
- // TODO: This could be done in instsimplify.
- if (SPF1 == SPF2 && (SPF1 == SPF_ABS || SPF1 == SPF_NABS)) {
- return replaceInstUsesWith(Outer, Inner);
- }
-
- // ABS(NABS(X)) -> ABS(X)
- // NABS(ABS(X)) -> NABS(X)
- if ((SPF1 == SPF_ABS && SPF2 == SPF_NABS) ||
- (SPF1 == SPF_NABS && SPF2 == SPF_ABS)) {
- SelectInst *SI = cast<SelectInst>(Inner);
- Value *NewSI =
- Builder.CreateSelect(SI->getCondition(), SI->getFalseValue(),
- SI->getTrueValue(), SI->getName(), SI);
- return replaceInstUsesWith(Outer, NewSI);
- }
-
- auto IsFreeOrProfitableToInvert =
- [&](Value *V, Value *&NotV, bool &ElidesXor) {
- if (match(V, m_Not(m_Value(NotV)))) {
- // If V has at most 2 uses then we can get rid of the xor operation
- // entirely.
- ElidesXor |= !V->hasNUsesOrMore(3);
- return true;
- }
-
- if (isFreeToInvert(V, !V->hasNUsesOrMore(3))) {
- NotV = nullptr;
- return true;
- }
-
- return false;
- };
-
- Value *NotA, *NotB, *NotC;
- bool ElidesXor = false;
-
- // MIN(MIN(~A, ~B), ~C) == ~MAX(MAX(A, B), C)
- // MIN(MAX(~A, ~B), ~C) == ~MAX(MIN(A, B), C)
- // MAX(MIN(~A, ~B), ~C) == ~MIN(MAX(A, B), C)
- // MAX(MAX(~A, ~B), ~C) == ~MIN(MIN(A, B), C)
- //
- // This transform is performance neutral if we can elide at least one xor from
- // the set of three operands, since we'll be tacking on an xor at the very
- // end.
- if (SelectPatternResult::isMinOrMax(SPF1) &&
- SelectPatternResult::isMinOrMax(SPF2) &&
- IsFreeOrProfitableToInvert(A, NotA, ElidesXor) &&
- IsFreeOrProfitableToInvert(B, NotB, ElidesXor) &&
- IsFreeOrProfitableToInvert(C, NotC, ElidesXor) && ElidesXor) {
- if (!NotA)
- NotA = Builder.CreateNot(A);
- if (!NotB)
- NotB = Builder.CreateNot(B);
- if (!NotC)
- NotC = Builder.CreateNot(C);
-
- Value *NewInner = createMinMax(Builder, getInverseMinMaxFlavor(SPF1), NotA,
- NotB);
- Value *NewOuter = Builder.CreateNot(
- createMinMax(Builder, getInverseMinMaxFlavor(SPF2), NewInner, NotC));
- return replaceInstUsesWith(Outer, NewOuter);
}
return nullptr;
@@ -2238,163 +2184,6 @@ static Value *foldSelectCmpXchg(SelectInst &SI) {
return nullptr;
}
-static Instruction *moveAddAfterMinMax(SelectPatternFlavor SPF, Value *X,
- Value *Y,
- InstCombiner::BuilderTy &Builder) {
- assert(SelectPatternResult::isMinOrMax(SPF) && "Expected min/max pattern");
- bool IsUnsigned = SPF == SelectPatternFlavor::SPF_UMIN ||
- SPF == SelectPatternFlavor::SPF_UMAX;
- // TODO: If InstSimplify could fold all cases where C2 <= C1, we could change
- // the constant value check to an assert.
- Value *A;
- const APInt *C1, *C2;
- if (IsUnsigned && match(X, m_NUWAdd(m_Value(A), m_APInt(C1))) &&
- match(Y, m_APInt(C2)) && C2->uge(*C1) && X->hasNUses(2)) {
- // umin (add nuw A, C1), C2 --> add nuw (umin A, C2 - C1), C1
- // umax (add nuw A, C1), C2 --> add nuw (umax A, C2 - C1), C1
- Value *NewMinMax = createMinMax(Builder, SPF, A,
- ConstantInt::get(X->getType(), *C2 - *C1));
- return BinaryOperator::CreateNUW(BinaryOperator::Add, NewMinMax,
- ConstantInt::get(X->getType(), *C1));
- }
-
- if (!IsUnsigned && match(X, m_NSWAdd(m_Value(A), m_APInt(C1))) &&
- match(Y, m_APInt(C2)) && X->hasNUses(2)) {
- bool Overflow;
- APInt Diff = C2->ssub_ov(*C1, Overflow);
- if (!Overflow) {
- // smin (add nsw A, C1), C2 --> add nsw (smin A, C2 - C1), C1
- // smax (add nsw A, C1), C2 --> add nsw (smax A, C2 - C1), C1
- Value *NewMinMax = createMinMax(Builder, SPF, A,
- ConstantInt::get(X->getType(), Diff));
- return BinaryOperator::CreateNSW(BinaryOperator::Add, NewMinMax,
- ConstantInt::get(X->getType(), *C1));
- }
- }
-
- return nullptr;
-}
-
-/// Match a sadd_sat or ssub_sat which is using min/max to clamp the value.
-Instruction *InstCombinerImpl::matchSAddSubSat(Instruction &MinMax1) {
- Type *Ty = MinMax1.getType();
-
- // We are looking for a tree of:
- // max(INT_MIN, min(INT_MAX, add(sext(A), sext(B))))
- // Where the min and max could be reversed
- Instruction *MinMax2;
- BinaryOperator *AddSub;
- const APInt *MinValue, *MaxValue;
- if (match(&MinMax1, m_SMin(m_Instruction(MinMax2), m_APInt(MaxValue)))) {
- if (!match(MinMax2, m_SMax(m_BinOp(AddSub), m_APInt(MinValue))))
- return nullptr;
- } else if (match(&MinMax1,
- m_SMax(m_Instruction(MinMax2), m_APInt(MinValue)))) {
- if (!match(MinMax2, m_SMin(m_BinOp(AddSub), m_APInt(MaxValue))))
- return nullptr;
- } else
- return nullptr;
-
- // Check that the constants clamp a saturate, and that the new type would be
- // sensible to convert to.
- if (!(*MaxValue + 1).isPowerOf2() || -*MinValue != *MaxValue + 1)
- return nullptr;
- // In what bitwidth can this be treated as saturating arithmetics?
- unsigned NewBitWidth = (*MaxValue + 1).logBase2() + 1;
- // FIXME: This isn't quite right for vectors, but using the scalar type is a
- // good first approximation for what should be done there.
- if (!shouldChangeType(Ty->getScalarType()->getIntegerBitWidth(), NewBitWidth))
- return nullptr;
-
- // Also make sure that the number of uses is as expected. The 3 is for the
- // the two items of the compare and the select, or 2 from a min/max.
- unsigned ExpUses = isa<IntrinsicInst>(MinMax1) ? 2 : 3;
- if (MinMax2->hasNUsesOrMore(ExpUses) || AddSub->hasNUsesOrMore(ExpUses))
- return nullptr;
-
- // Create the new type (which can be a vector type)
- Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth);
-
- Intrinsic::ID IntrinsicID;
- if (AddSub->getOpcode() == Instruction::Add)
- IntrinsicID = Intrinsic::sadd_sat;
- else if (AddSub->getOpcode() == Instruction::Sub)
- IntrinsicID = Intrinsic::ssub_sat;
- else
- return nullptr;
-
- // The two operands of the add/sub must be nsw-truncatable to the NewTy. This
- // is usually achieved via a sext from a smaller type.
- if (ComputeMaxSignificantBits(AddSub->getOperand(0), 0, AddSub) >
- NewBitWidth ||
- ComputeMaxSignificantBits(AddSub->getOperand(1), 0, AddSub) > NewBitWidth)
- return nullptr;
-
- // Finally create and return the sat intrinsic, truncated to the new type
- Function *F = Intrinsic::getDeclaration(MinMax1.getModule(), IntrinsicID, NewTy);
- Value *AT = Builder.CreateTrunc(AddSub->getOperand(0), NewTy);
- Value *BT = Builder.CreateTrunc(AddSub->getOperand(1), NewTy);
- Value *Sat = Builder.CreateCall(F, {AT, BT});
- return CastInst::Create(Instruction::SExt, Sat, Ty);
-}
-
-/// Reduce a sequence of min/max with a common operand.
-static Instruction *factorizeMinMaxTree(SelectPatternFlavor SPF, Value *LHS,
- Value *RHS,
- InstCombiner::BuilderTy &Builder) {
- assert(SelectPatternResult::isMinOrMax(SPF) && "Expected a min/max");
- // TODO: Allow FP min/max with nnan/nsz.
- if (!LHS->getType()->isIntOrIntVectorTy())
- return nullptr;
-
- // Match 3 of the same min/max ops. Example: umin(umin(), umin()).
- Value *A, *B, *C, *D;
- SelectPatternResult L = matchSelectPattern(LHS, A, B);
- SelectPatternResult R = matchSelectPattern(RHS, C, D);
- if (SPF != L.Flavor || L.Flavor != R.Flavor)
- return nullptr;
-
- // Look for a common operand. The use checks are different than usual because
- // a min/max pattern typically has 2 uses of each op: 1 by the cmp and 1 by
- // the select.
- Value *MinMaxOp = nullptr;
- Value *ThirdOp = nullptr;
- if (!LHS->hasNUsesOrMore(3) && RHS->hasNUsesOrMore(3)) {
- // If the LHS is only used in this chain and the RHS is used outside of it,
- // reuse the RHS min/max because that will eliminate the LHS.
- if (D == A || C == A) {
- // min(min(a, b), min(c, a)) --> min(min(c, a), b)
- // min(min(a, b), min(a, d)) --> min(min(a, d), b)
- MinMaxOp = RHS;
- ThirdOp = B;
- } else if (D == B || C == B) {
- // min(min(a, b), min(c, b)) --> min(min(c, b), a)
- // min(min(a, b), min(b, d)) --> min(min(b, d), a)
- MinMaxOp = RHS;
- ThirdOp = A;
- }
- } else if (!RHS->hasNUsesOrMore(3)) {
- // Reuse the LHS. This will eliminate the RHS.
- if (D == A || D == B) {
- // min(min(a, b), min(c, a)) --> min(min(a, b), c)
- // min(min(a, b), min(c, b)) --> min(min(a, b), c)
- MinMaxOp = LHS;
- ThirdOp = C;
- } else if (C == A || C == B) {
- // min(min(a, b), min(b, d)) --> min(min(a, b), d)
- // min(min(a, b), min(c, b)) --> min(min(a, b), d)
- MinMaxOp = LHS;
- ThirdOp = D;
- }
- }
- if (!MinMaxOp || !ThirdOp)
- return nullptr;
-
- CmpInst::Predicate P = getMinMaxPred(SPF);
- Value *CmpABC = Builder.CreateICmp(P, MinMaxOp, ThirdOp);
- return SelectInst::Create(CmpABC, MinMaxOp, ThirdOp);
-}
-
/// Try to reduce a funnel/rotate pattern that includes a compare and select
/// into a funnel shift intrinsic. Example:
/// rotl32(a, b) --> (b == 0 ? a : ((a >> (32 - b)) | (a << b)))
@@ -2484,7 +2273,8 @@ static Instruction *foldSelectToCopysign(SelectInst &Sel,
// Match select ?, TC, FC where the constants are equal but negated.
// TODO: Generalize to handle a negated variable operand?
const APFloat *TC, *FC;
- if (!match(TVal, m_APFloat(TC)) || !match(FVal, m_APFloat(FC)) ||
+ if (!match(TVal, m_APFloatAllowUndef(TC)) ||
+ !match(FVal, m_APFloatAllowUndef(FC)) ||
!abs(*TC).bitwiseIsEqual(abs(*FC)))
return nullptr;
@@ -2504,17 +2294,16 @@ static Instruction *foldSelectToCopysign(SelectInst &Sel,
// (bitcast X) < 0 ? TC : -TC --> copysign(TC, -X)
// (bitcast X) >= 0 ? -TC : TC --> copysign(TC, -X)
// (bitcast X) >= 0 ? TC : -TC --> copysign(TC, X)
+ // Note: FMF from the select can not be propagated to the new instructions.
if (IsTrueIfSignSet ^ TC->isNegative())
- X = Builder.CreateFNegFMF(X, &Sel);
+ X = Builder.CreateFNeg(X);
// Canonicalize the magnitude argument as the positive constant since we do
// not care about its sign.
- Value *MagArg = TC->isNegative() ? FVal : TVal;
+ Value *MagArg = ConstantFP::get(SelType, abs(*TC));
Function *F = Intrinsic::getDeclaration(Sel.getModule(), Intrinsic::copysign,
Sel.getType());
- Instruction *CopySign = CallInst::Create(F, { MagArg, X });
- CopySign->setFastMathFlags(Sel.getFastMathFlags());
- return CopySign;
+ return CallInst::Create(F, { MagArg, X });
}
Instruction *InstCombinerImpl::foldVectorSelect(SelectInst &Sel) {
@@ -2715,29 +2504,144 @@ Instruction *InstCombinerImpl::foldAndOrOfSelectUsingImpliedCond(Value *Op,
}
}
-Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
+// Canonicalize select with fcmp to fabs(). -0.0 makes this tricky. We need
+// fast-math-flags (nsz) or fsub with +0.0 (not fneg) for this to work.
+static Instruction *foldSelectWithFCmpToFabs(SelectInst &SI,
+ InstCombinerImpl &IC) {
Value *CondVal = SI.getCondition();
- Value *TrueVal = SI.getTrueValue();
- Value *FalseVal = SI.getFalseValue();
- Type *SelType = SI.getType();
- // FIXME: Remove this workaround when freeze related patches are done.
- // For select with undef operand which feeds into an equality comparison,
- // don't simplify it so loop unswitch can know the equality comparison
- // may have an undef operand. This is a workaround for PR31652 caused by
- // descrepancy about branch on undef between LoopUnswitch and GVN.
- if (match(TrueVal, m_Undef()) || match(FalseVal, m_Undef())) {
- if (llvm::any_of(SI.users(), [&](User *U) {
- ICmpInst *CI = dyn_cast<ICmpInst>(U);
- if (CI && CI->isEquality())
- return true;
- return false;
- })) {
+ for (bool Swap : {false, true}) {
+ Value *TrueVal = SI.getTrueValue();
+ Value *X = SI.getFalseValue();
+ CmpInst::Predicate Pred;
+
+ if (Swap)
+ std::swap(TrueVal, X);
+
+ if (!match(CondVal, m_FCmp(Pred, m_Specific(X), m_AnyZeroFP())))
+ continue;
+
+ // fold (X <= +/-0.0) ? (0.0 - X) : X to fabs(X), when 'Swap' is false
+ // fold (X > +/-0.0) ? X : (0.0 - X) to fabs(X), when 'Swap' is true
+ if (match(TrueVal, m_FSub(m_PosZeroFP(), m_Specific(X)))) {
+ if (!Swap && (Pred == FCmpInst::FCMP_OLE || Pred == FCmpInst::FCMP_ULE)) {
+ Value *Fabs = IC.Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, &SI);
+ return IC.replaceInstUsesWith(SI, Fabs);
+ }
+ if (Swap && (Pred == FCmpInst::FCMP_OGT || Pred == FCmpInst::FCMP_UGT)) {
+ Value *Fabs = IC.Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, &SI);
+ return IC.replaceInstUsesWith(SI, Fabs);
+ }
+ }
+
+ // With nsz, when 'Swap' is false:
+ // fold (X < +/-0.0) ? -X : X or (X <= +/-0.0) ? -X : X to fabs(X)
+ // fold (X > +/-0.0) ? -X : X or (X >= +/-0.0) ? -X : X to -fabs(x)
+ // when 'Swap' is true:
+ // fold (X > +/-0.0) ? X : -X or (X >= +/-0.0) ? X : -X to fabs(X)
+ // fold (X < +/-0.0) ? X : -X or (X <= +/-0.0) ? X : -X to -fabs(X)
+ if (!match(TrueVal, m_FNeg(m_Specific(X))) || !SI.hasNoSignedZeros())
return nullptr;
+
+ if (Swap)
+ Pred = FCmpInst::getSwappedPredicate(Pred);
+
+ bool IsLTOrLE = Pred == FCmpInst::FCMP_OLT || Pred == FCmpInst::FCMP_OLE ||
+ Pred == FCmpInst::FCMP_ULT || Pred == FCmpInst::FCMP_ULE;
+ bool IsGTOrGE = Pred == FCmpInst::FCMP_OGT || Pred == FCmpInst::FCMP_OGE ||
+ Pred == FCmpInst::FCMP_UGT || Pred == FCmpInst::FCMP_UGE;
+
+ if (IsLTOrLE) {
+ Value *Fabs = IC.Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, &SI);
+ return IC.replaceInstUsesWith(SI, Fabs);
+ }
+ if (IsGTOrGE) {
+ Value *Fabs = IC.Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, &SI);
+ Instruction *NewFNeg = UnaryOperator::CreateFNeg(Fabs);
+ NewFNeg->setFastMathFlags(SI.getFastMathFlags());
+ return NewFNeg;
}
}
- if (Value *V = SimplifySelectInst(CondVal, TrueVal, FalseVal,
+ return nullptr;
+}
+
+// Match the following IR pattern:
+// %x.lowbits = and i8 %x, %lowbitmask
+// %x.lowbits.are.zero = icmp eq i8 %x.lowbits, 0
+// %x.biased = add i8 %x, %bias
+// %x.biased.highbits = and i8 %x.biased, %highbitmask
+// %x.roundedup = select i1 %x.lowbits.are.zero, i8 %x, i8 %x.biased.highbits
+// Define:
+// %alignment = add i8 %lowbitmask, 1
+// Iff 1. an %alignment is a power-of-two (aka, %lowbitmask is a low bit mask)
+// and 2. %bias is equal to either %lowbitmask or %alignment,
+// and 3. %highbitmask is equal to ~%lowbitmask (aka, to -%alignment)
+// then this pattern can be transformed into:
+// %x.offset = add i8 %x, %lowbitmask
+// %x.roundedup = and i8 %x.offset, %highbitmask
+static Value *
+foldRoundUpIntegerWithPow2Alignment(SelectInst &SI,
+ InstCombiner::BuilderTy &Builder) {
+ Value *Cond = SI.getCondition();
+ Value *X = SI.getTrueValue();
+ Value *XBiasedHighBits = SI.getFalseValue();
+
+ ICmpInst::Predicate Pred;
+ Value *XLowBits;
+ if (!match(Cond, m_ICmp(Pred, m_Value(XLowBits), m_ZeroInt())) ||
+ !ICmpInst::isEquality(Pred))
+ return nullptr;
+
+ if (Pred == ICmpInst::Predicate::ICMP_NE)
+ std::swap(X, XBiasedHighBits);
+
+ // FIXME: we could support non non-splats here.
+
+ const APInt *LowBitMaskCst;
+ if (!match(XLowBits, m_And(m_Specific(X), m_APIntAllowUndef(LowBitMaskCst))))
+ return nullptr;
+
+ const APInt *BiasCst, *HighBitMaskCst;
+ if (!match(XBiasedHighBits,
+ m_And(m_Add(m_Specific(X), m_APIntAllowUndef(BiasCst)),
+ m_APIntAllowUndef(HighBitMaskCst))))
+ return nullptr;
+
+ if (!LowBitMaskCst->isMask())
+ return nullptr;
+
+ APInt InvertedLowBitMaskCst = ~*LowBitMaskCst;
+ if (InvertedLowBitMaskCst != *HighBitMaskCst)
+ return nullptr;
+
+ APInt AlignmentCst = *LowBitMaskCst + 1;
+
+ if (*BiasCst != AlignmentCst && *BiasCst != *LowBitMaskCst)
+ return nullptr;
+
+ if (!XBiasedHighBits->hasOneUse()) {
+ if (*BiasCst == *LowBitMaskCst)
+ return XBiasedHighBits;
+ return nullptr;
+ }
+
+ // FIXME: could we preserve undef's here?
+ Type *Ty = X->getType();
+ Value *XOffset = Builder.CreateAdd(X, ConstantInt::get(Ty, *LowBitMaskCst),
+ X->getName() + ".biased");
+ Value *R = Builder.CreateAnd(XOffset, ConstantInt::get(Ty, *HighBitMaskCst));
+ R->takeName(&SI);
+ return R;
+}
+
+Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
+ Value *CondVal = SI.getCondition();
+ Value *TrueVal = SI.getTrueValue();
+ Value *FalseVal = SI.getFalseValue();
+ Type *SelType = SI.getType();
+
+ if (Value *V = simplifySelectInst(CondVal, TrueVal, FalseVal,
SQ.getWithInstruction(&SI)))
return replaceInstUsesWith(SI, V);
@@ -2747,8 +2651,6 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
if (Instruction *I = canonicalizeScalarSelectOfVecs(SI, *this))
return I;
- CmpInst::Predicate Pred;
-
// Avoid potential infinite loops by checking for non-constant condition.
// TODO: Can we assert instead by improving canonicalizeSelectToShuffle()?
// Scalar select must have simplified?
@@ -2757,13 +2659,29 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
// Folding select to and/or i1 isn't poison safe in general. impliesPoison
// checks whether folding it does not convert a well-defined value into
// poison.
- if (match(TrueVal, m_One()) && impliesPoison(FalseVal, CondVal)) {
- // Change: A = select B, true, C --> A = or B, C
- return BinaryOperator::CreateOr(CondVal, FalseVal);
+ if (match(TrueVal, m_One())) {
+ if (impliesPoison(FalseVal, CondVal)) {
+ // Change: A = select B, true, C --> A = or B, C
+ return BinaryOperator::CreateOr(CondVal, FalseVal);
+ }
+
+ if (auto *LHS = dyn_cast<FCmpInst>(CondVal))
+ if (auto *RHS = dyn_cast<FCmpInst>(FalseVal))
+ if (Value *V = foldLogicOfFCmps(LHS, RHS, /*IsAnd*/ false,
+ /*IsSelectLogical*/ true))
+ return replaceInstUsesWith(SI, V);
}
- if (match(FalseVal, m_Zero()) && impliesPoison(TrueVal, CondVal)) {
- // Change: A = select B, C, false --> A = and B, C
- return BinaryOperator::CreateAnd(CondVal, TrueVal);
+ if (match(FalseVal, m_Zero())) {
+ if (impliesPoison(TrueVal, CondVal)) {
+ // Change: A = select B, C, false --> A = and B, C
+ return BinaryOperator::CreateAnd(CondVal, TrueVal);
+ }
+
+ if (auto *LHS = dyn_cast<FCmpInst>(CondVal))
+ if (auto *RHS = dyn_cast<FCmpInst>(TrueVal))
+ if (Value *V = foldLogicOfFCmps(LHS, RHS, /*IsAnd*/ true,
+ /*IsSelectLogical*/ true))
+ return replaceInstUsesWith(SI, V);
}
auto *One = ConstantInt::getTrue(SelType);
@@ -2821,6 +2739,20 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
match(TrueVal, m_Specific(B)) && match(FalseVal, m_Zero()))
return replaceOperand(SI, 0, A);
+ Value *C;
+ // select (~a | c), a, b -> and a, (or c, freeze(b))
+ if (match(CondVal, m_c_Or(m_Not(m_Specific(TrueVal)), m_Value(C))) &&
+ CondVal->hasOneUse()) {
+ FalseVal = Builder.CreateFreeze(FalseVal);
+ return BinaryOperator::CreateAnd(TrueVal, Builder.CreateOr(C, FalseVal));
+ }
+ // select (~c & b), a, b -> and b, (or freeze(a), c)
+ if (match(CondVal, m_c_And(m_Not(m_Value(C)), m_Specific(FalseVal))) &&
+ CondVal->hasOneUse()) {
+ TrueVal = Builder.CreateFreeze(TrueVal);
+ return BinaryOperator::CreateAnd(FalseVal, Builder.CreateOr(C, TrueVal));
+ }
+
if (!SelType->isVectorTy()) {
if (Value *S = simplifyWithOpReplaced(TrueVal, CondVal, One, SQ,
/* AllowRefinement */ true))
@@ -2846,16 +2778,11 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
/* IsAnd */ IsAnd))
return I;
- if (auto *ICmp0 = dyn_cast<ICmpInst>(CondVal)) {
- if (auto *ICmp1 = dyn_cast<ICmpInst>(Op1)) {
- if (auto *V = foldAndOrOfICmpsOfAndWithPow2(ICmp0, ICmp1, &SI, IsAnd,
- /* IsLogical */ true))
+ if (auto *ICmp0 = dyn_cast<ICmpInst>(CondVal))
+ if (auto *ICmp1 = dyn_cast<ICmpInst>(Op1))
+ if (auto *V = foldAndOrOfICmps(ICmp0, ICmp1, SI, IsAnd,
+ /* IsLogical */ true))
return replaceInstUsesWith(SI, V);
-
- if (auto *V = foldEqOfParts(ICmp0, ICmp1, IsAnd))
- return replaceInstUsesWith(SI, V);
- }
- }
}
// select (select a, true, b), c, false -> select a, c, false
@@ -2959,42 +2886,9 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
}
}
- // Canonicalize select with fcmp to fabs(). -0.0 makes this tricky. We need
- // fast-math-flags (nsz) or fsub with +0.0 (not fneg) for this to work.
- // (X <= +/-0.0) ? (0.0 - X) : X --> fabs(X)
- if (match(CondVal, m_FCmp(Pred, m_Specific(FalseVal), m_AnyZeroFP())) &&
- match(TrueVal, m_FSub(m_PosZeroFP(), m_Specific(FalseVal))) &&
- (Pred == FCmpInst::FCMP_OLE || Pred == FCmpInst::FCMP_ULE)) {
- Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, FalseVal, &SI);
- return replaceInstUsesWith(SI, Fabs);
- }
- // (X > +/-0.0) ? X : (0.0 - X) --> fabs(X)
- if (match(CondVal, m_FCmp(Pred, m_Specific(TrueVal), m_AnyZeroFP())) &&
- match(FalseVal, m_FSub(m_PosZeroFP(), m_Specific(TrueVal))) &&
- (Pred == FCmpInst::FCMP_OGT || Pred == FCmpInst::FCMP_UGT)) {
- Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, TrueVal, &SI);
- return replaceInstUsesWith(SI, Fabs);
- }
- // With nnan and nsz:
- // (X < +/-0.0) ? -X : X --> fabs(X)
- // (X <= +/-0.0) ? -X : X --> fabs(X)
- if (match(CondVal, m_FCmp(Pred, m_Specific(FalseVal), m_AnyZeroFP())) &&
- match(TrueVal, m_FNeg(m_Specific(FalseVal))) && SI.hasNoSignedZeros() &&
- (Pred == FCmpInst::FCMP_OLT || Pred == FCmpInst::FCMP_OLE ||
- Pred == FCmpInst::FCMP_ULT || Pred == FCmpInst::FCMP_ULE)) {
- Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, FalseVal, &SI);
- return replaceInstUsesWith(SI, Fabs);
- }
- // With nnan and nsz:
- // (X > +/-0.0) ? X : -X --> fabs(X)
- // (X >= +/-0.0) ? X : -X --> fabs(X)
- if (match(CondVal, m_FCmp(Pred, m_Specific(TrueVal), m_AnyZeroFP())) &&
- match(FalseVal, m_FNeg(m_Specific(TrueVal))) && SI.hasNoSignedZeros() &&
- (Pred == FCmpInst::FCMP_OGT || Pred == FCmpInst::FCMP_OGE ||
- Pred == FCmpInst::FCMP_UGT || Pred == FCmpInst::FCMP_UGE)) {
- Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, TrueVal, &SI);
- return replaceInstUsesWith(SI, Fabs);
- }
+ // Fold selecting to fabs.
+ if (Instruction *Fabs = foldSelectWithFCmpToFabs(SI, *this))
+ return Fabs;
// See if we are selecting two values based on a comparison of the two values.
if (ICmpInst *ICI = dyn_cast<ICmpInst>(CondVal))
@@ -3066,8 +2960,6 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
if (Instruction *R = foldSPFofSPF(cast<Instruction>(RHS), SPF2, LHS2,
RHS2, SI, SPF, LHS))
return R;
- // TODO.
- // ABS(-X) -> ABS(X)
}
if (SelectPatternResult::isMinOrMax(SPF)) {
@@ -3102,46 +2994,6 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
Value *NewCast = Builder.CreateCast(CastOp, NewSI, SelType);
return replaceInstUsesWith(SI, NewCast);
}
-
- // MAX(~a, ~b) -> ~MIN(a, b)
- // MAX(~a, C) -> ~MIN(a, ~C)
- // MIN(~a, ~b) -> ~MAX(a, b)
- // MIN(~a, C) -> ~MAX(a, ~C)
- auto moveNotAfterMinMax = [&](Value *X, Value *Y) -> Instruction * {
- Value *A;
- if (match(X, m_Not(m_Value(A))) && !X->hasNUsesOrMore(3) &&
- !isFreeToInvert(A, A->hasOneUse()) &&
- // Passing false to only consider m_Not and constants.
- isFreeToInvert(Y, false)) {
- Value *B = Builder.CreateNot(Y);
- Value *NewMinMax = createMinMax(Builder, getInverseMinMaxFlavor(SPF),
- A, B);
- // Copy the profile metadata.
- if (MDNode *MD = SI.getMetadata(LLVMContext::MD_prof)) {
- cast<SelectInst>(NewMinMax)->setMetadata(LLVMContext::MD_prof, MD);
- // Swap the metadata if the operands are swapped.
- if (X == SI.getFalseValue() && Y == SI.getTrueValue())
- cast<SelectInst>(NewMinMax)->swapProfMetadata();
- }
-
- return BinaryOperator::CreateNot(NewMinMax);
- }
-
- return nullptr;
- };
-
- if (Instruction *I = moveNotAfterMinMax(LHS, RHS))
- return I;
- if (Instruction *I = moveNotAfterMinMax(RHS, LHS))
- return I;
-
- if (Instruction *I = moveAddAfterMinMax(SPF, LHS, RHS, Builder))
- return I;
-
- if (Instruction *I = factorizeMinMaxTree(SPF, LHS, RHS, Builder))
- return I;
- if (Instruction *I = matchSAddSubSat(SI))
- return I;
}
}
@@ -3307,35 +3159,42 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
if (Value *Fr = foldSelectWithFrozenICmp(SI, Builder))
return replaceInstUsesWith(SI, Fr);
+ if (Value *V = foldRoundUpIntegerWithPow2Alignment(SI, Builder))
+ return replaceInstUsesWith(SI, V);
+
// select(mask, mload(,,mask,0), 0) -> mload(,,mask,0)
// Load inst is intentionally not checked for hasOneUse()
if (match(FalseVal, m_Zero()) &&
- match(TrueVal, m_MaskedLoad(m_Value(), m_Value(), m_Specific(CondVal),
- m_CombineOr(m_Undef(), m_Zero())))) {
- auto *MaskedLoad = cast<IntrinsicInst>(TrueVal);
- if (isa<UndefValue>(MaskedLoad->getArgOperand(3)))
- MaskedLoad->setArgOperand(3, FalseVal /* Zero */);
- return replaceInstUsesWith(SI, MaskedLoad);
+ (match(TrueVal, m_MaskedLoad(m_Value(), m_Value(), m_Specific(CondVal),
+ m_CombineOr(m_Undef(), m_Zero()))) ||
+ match(TrueVal, m_MaskedGather(m_Value(), m_Value(), m_Specific(CondVal),
+ m_CombineOr(m_Undef(), m_Zero()))))) {
+ auto *MaskedInst = cast<IntrinsicInst>(TrueVal);
+ if (isa<UndefValue>(MaskedInst->getArgOperand(3)))
+ MaskedInst->setArgOperand(3, FalseVal /* Zero */);
+ return replaceInstUsesWith(SI, MaskedInst);
}
Value *Mask;
if (match(TrueVal, m_Zero()) &&
- match(FalseVal, m_MaskedLoad(m_Value(), m_Value(), m_Value(Mask),
- m_CombineOr(m_Undef(), m_Zero()))) &&
+ (match(FalseVal, m_MaskedLoad(m_Value(), m_Value(), m_Value(Mask),
+ m_CombineOr(m_Undef(), m_Zero()))) ||
+ match(FalseVal, m_MaskedGather(m_Value(), m_Value(), m_Value(Mask),
+ m_CombineOr(m_Undef(), m_Zero())))) &&
(CondVal->getType() == Mask->getType())) {
// We can remove the select by ensuring the load zeros all lanes the
// select would have. We determine this by proving there is no overlap
// between the load and select masks.
// (i.e (load_mask & select_mask) == 0 == no overlap)
bool CanMergeSelectIntoLoad = false;
- if (Value *V = SimplifyAndInst(CondVal, Mask, SQ.getWithInstruction(&SI)))
+ if (Value *V = simplifyAndInst(CondVal, Mask, SQ.getWithInstruction(&SI)))
CanMergeSelectIntoLoad = match(V, m_Zero());
if (CanMergeSelectIntoLoad) {
- auto *MaskedLoad = cast<IntrinsicInst>(FalseVal);
- if (isa<UndefValue>(MaskedLoad->getArgOperand(3)))
- MaskedLoad->setArgOperand(3, TrueVal /* Zero */);
- return replaceInstUsesWith(SI, MaskedLoad);
+ auto *MaskedInst = cast<IntrinsicInst>(FalseVal);
+ if (isa<UndefValue>(MaskedInst->getArgOperand(3)))
+ MaskedInst->setArgOperand(3, TrueVal /* Zero */);
+ return replaceInstUsesWith(SI, MaskedInst);
}
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 17f0c5c4cff0..f4e2d1239f0f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -11,7 +11,6 @@
//===----------------------------------------------------------------------===//
#include "InstCombineInternal.h"
-#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
@@ -108,7 +107,7 @@ Value *InstCombinerImpl::reassociateShiftAmtsOfTwoSameDirectionShifts(
// Can we fold (ShAmt0+ShAmt1) ?
auto *NewShAmt = dyn_cast_or_null<Constant>(
- SimplifyAddInst(ShAmt0, ShAmt1, /*isNSW=*/false, /*isNUW=*/false,
+ simplifyAddInst(ShAmt0, ShAmt1, /*isNSW=*/false, /*isNUW=*/false,
SQ.getWithInstruction(Sh0)));
if (!NewShAmt)
return nullptr; // Did not simplify.
@@ -232,7 +231,7 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,
return nullptr;
// Can we simplify (MaskShAmt+ShiftShAmt) ?
- auto *SumOfShAmts = dyn_cast_or_null<Constant>(SimplifyAddInst(
+ auto *SumOfShAmts = dyn_cast_or_null<Constant>(simplifyAddInst(
MaskShAmt, ShiftShAmt, /*IsNSW=*/false, /*IsNUW=*/false, Q));
if (!SumOfShAmts)
return nullptr; // Did not simplify.
@@ -264,7 +263,7 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,
return nullptr;
// Can we simplify (ShiftShAmt-MaskShAmt) ?
- auto *ShAmtsDiff = dyn_cast_or_null<Constant>(SimplifySubInst(
+ auto *ShAmtsDiff = dyn_cast_or_null<Constant>(simplifySubInst(
ShiftShAmt, MaskShAmt, /*IsNSW=*/false, /*IsNUW=*/false, Q));
if (!ShAmtsDiff)
return nullptr; // Did not simplify.
@@ -374,11 +373,12 @@ Instruction *InstCombinerImpl::commonShiftTransforms(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
assert(Op0->getType() == Op1->getType());
+ Type *Ty = I.getType();
// If the shift amount is a one-use `sext`, we can demote it to `zext`.
Value *Y;
if (match(Op1, m_OneUse(m_SExt(m_Value(Y))))) {
- Value *NewExt = Builder.CreateZExt(Y, I.getType(), Op1->getName());
+ Value *NewExt = Builder.CreateZExt(Y, Ty, Op1->getName());
return BinaryOperator::Create(I.getOpcode(), Op0, NewExt);
}
@@ -400,15 +400,56 @@ Instruction *InstCombinerImpl::commonShiftTransforms(BinaryOperator &I) {
reassociateShiftAmtsOfTwoSameDirectionShifts(&I, SQ)))
return NewShift;
- // (C1 shift (A add C2)) -> (C1 shift C2) shift A)
- // iff A and C2 are both positive.
+ // Pre-shift a constant shifted by a variable amount with constant offset:
+ // C shift (A add nuw C1) --> (C shift C1) shift A
Value *A;
- Constant *C;
- if (match(Op0, m_Constant()) && match(Op1, m_Add(m_Value(A), m_Constant(C))))
- if (isKnownNonNegative(A, DL, 0, &AC, &I, &DT) &&
- isKnownNonNegative(C, DL, 0, &AC, &I, &DT))
- return BinaryOperator::Create(
- I.getOpcode(), Builder.CreateBinOp(I.getOpcode(), Op0, C), A);
+ Constant *C, *C1;
+ if (match(Op0, m_Constant(C)) &&
+ match(Op1, m_NUWAdd(m_Value(A), m_Constant(C1)))) {
+ Value *NewC = Builder.CreateBinOp(I.getOpcode(), C, C1);
+ return BinaryOperator::Create(I.getOpcode(), NewC, A);
+ }
+
+ unsigned BitWidth = Ty->getScalarSizeInBits();
+
+ const APInt *AC, *AddC;
+ // Try to pre-shift a constant shifted by a variable amount added with a
+ // negative number:
+ // C << (X - AddC) --> (C >> AddC) << X
+ // and
+ // C >> (X - AddC) --> (C << AddC) >> X
+ if (match(Op0, m_APInt(AC)) && match(Op1, m_Add(m_Value(A), m_APInt(AddC))) &&
+ AddC->isNegative() && (-*AddC).ult(BitWidth)) {
+ assert(!AC->isZero() && "Expected simplify of shifted zero");
+ unsigned PosOffset = (-*AddC).getZExtValue();
+
+ auto isSuitableForPreShift = [PosOffset, &I, AC]() {
+ switch (I.getOpcode()) {
+ default:
+ return false;
+ case Instruction::Shl:
+ return (I.hasNoSignedWrap() || I.hasNoUnsignedWrap()) &&
+ AC->eq(AC->lshr(PosOffset).shl(PosOffset));
+ case Instruction::LShr:
+ return I.isExact() && AC->eq(AC->shl(PosOffset).lshr(PosOffset));
+ case Instruction::AShr:
+ return I.isExact() && AC->eq(AC->shl(PosOffset).ashr(PosOffset));
+ }
+ };
+ if (isSuitableForPreShift()) {
+ Constant *NewC = ConstantInt::get(Ty, I.getOpcode() == Instruction::Shl
+ ? AC->lshr(PosOffset)
+ : AC->shl(PosOffset));
+ BinaryOperator *NewShiftOp =
+ BinaryOperator::Create(I.getOpcode(), NewC, A);
+ if (I.getOpcode() == Instruction::Shl) {
+ NewShiftOp->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
+ } else {
+ NewShiftOp->setIsExact();
+ }
+ return NewShiftOp;
+ }
+ }
// X shift (A srem C) -> X shift (A and (C - 1)) iff C is a power of 2.
// Because shifts by negative values (which could occur if A were negative)
@@ -417,7 +458,7 @@ Instruction *InstCombinerImpl::commonShiftTransforms(BinaryOperator &I) {
match(C, m_Power2())) {
// FIXME: Should this get moved into SimplifyDemandedBits by saying we don't
// demand the sign bit (and many others) here??
- Constant *Mask = ConstantExpr::getSub(C, ConstantInt::get(I.getType(), 1));
+ Constant *Mask = ConstantExpr::getSub(C, ConstantInt::get(Ty, 1));
Value *Rem = Builder.CreateAnd(A, Mask, Op1->getName());
return replaceOperand(I, 1, Rem);
}
@@ -661,10 +702,18 @@ static bool canShiftBinOpWithConstantRHS(BinaryOperator &Shift,
}
}
-Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *Op1,
+Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *C1,
BinaryOperator &I) {
+ // (C2 << X) << C1 --> (C2 << C1) << X
+ // (C2 >> X) >> C1 --> (C2 >> C1) >> X
+ Constant *C2;
+ Value *X;
+ if (match(Op0, m_BinOp(I.getOpcode(), m_Constant(C2), m_Value(X))))
+ return BinaryOperator::Create(
+ I.getOpcode(), Builder.CreateBinOp(I.getOpcode(), C2, C1), X);
+
const APInt *Op1C;
- if (!match(Op1, m_APInt(Op1C)))
+ if (!match(C1, m_APInt(Op1C)))
return nullptr;
// See if we can propagate this shift into the input, this covers the trivial
@@ -701,11 +750,11 @@ Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *Op1,
const APInt *Op0C;
if (match(Op0BO->getOperand(1), m_APInt(Op0C))) {
if (canShiftBinOpWithConstantRHS(I, Op0BO)) {
- Constant *NewRHS = ConstantExpr::get(
- I.getOpcode(), cast<Constant>(Op0BO->getOperand(1)), Op1);
+ Value *NewRHS =
+ Builder.CreateBinOp(I.getOpcode(), Op0BO->getOperand(1), C1);
Value *NewShift =
- Builder.CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1);
+ Builder.CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), C1);
NewShift->takeName(Op0BO);
return BinaryOperator::Create(Op0BO->getOpcode(), NewShift, NewRHS);
@@ -730,10 +779,10 @@ Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *Op1,
if (!isa<Constant>(FalseVal) && TBO->getOperand(0) == FalseVal &&
match(TBO->getOperand(1), m_APInt(C)) &&
canShiftBinOpWithConstantRHS(I, TBO)) {
- Constant *NewRHS = ConstantExpr::get(
- I.getOpcode(), cast<Constant>(TBO->getOperand(1)), Op1);
+ Value *NewRHS =
+ Builder.CreateBinOp(I.getOpcode(), TBO->getOperand(1), C1);
- Value *NewShift = Builder.CreateBinOp(I.getOpcode(), FalseVal, Op1);
+ Value *NewShift = Builder.CreateBinOp(I.getOpcode(), FalseVal, C1);
Value *NewOp = Builder.CreateBinOp(TBO->getOpcode(), NewShift, NewRHS);
return SelectInst::Create(Cond, NewOp, NewShift);
}
@@ -747,10 +796,10 @@ Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *Op1,
if (!isa<Constant>(TrueVal) && FBO->getOperand(0) == TrueVal &&
match(FBO->getOperand(1), m_APInt(C)) &&
canShiftBinOpWithConstantRHS(I, FBO)) {
- Constant *NewRHS = ConstantExpr::get(
- I.getOpcode(), cast<Constant>(FBO->getOperand(1)), Op1);
+ Value *NewRHS =
+ Builder.CreateBinOp(I.getOpcode(), FBO->getOperand(1), C1);
- Value *NewShift = Builder.CreateBinOp(I.getOpcode(), TrueVal, Op1);
+ Value *NewShift = Builder.CreateBinOp(I.getOpcode(), TrueVal, C1);
Value *NewOp = Builder.CreateBinOp(FBO->getOpcode(), NewShift, NewRHS);
return SelectInst::Create(Cond, NewShift, NewOp);
}
@@ -762,7 +811,7 @@ Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *Op1,
Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) {
const SimplifyQuery Q = SQ.getWithInstruction(&I);
- if (Value *V = SimplifyShlInst(I.getOperand(0), I.getOperand(1),
+ if (Value *V = simplifyShlInst(I.getOperand(0), I.getOperand(1),
I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), Q))
return replaceInstUsesWith(I, V);
@@ -968,10 +1017,6 @@ Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) {
if (match(Op1, m_Constant(C1))) {
Constant *C2;
Value *X;
- // (C2 << X) << C1 --> (C2 << C1) << X
- if (match(Op0, m_OneUse(m_Shl(m_Constant(C2), m_Value(X)))))
- return BinaryOperator::CreateShl(ConstantExpr::getShl(C2, C1), X);
-
// (X * C2) << C1 --> X * (C2 << C1)
if (match(Op0, m_Mul(m_Value(X), m_Constant(C2))))
return BinaryOperator::CreateMul(X, ConstantExpr::getShl(C2, C1));
@@ -993,7 +1038,7 @@ Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) {
}
Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
- if (Value *V = SimplifyLShrInst(I.getOperand(0), I.getOperand(1), I.isExact(),
+ if (Value *V = simplifyLShrInst(I.getOperand(0), I.getOperand(1), I.isExact(),
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
@@ -1164,15 +1209,54 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
}
}
- // Look for a "splat" mul pattern - it replicates bits across each half of
- // a value, so a right shift is just a mask of the low bits:
- // lshr i32 (mul nuw X, Pow2+1), 16 --> and X, Pow2-1
- // TODO: Generalize to allow more than just half-width shifts?
const APInt *MulC;
- if (match(Op0, m_NUWMul(m_Value(X), m_APInt(MulC))) &&
- ShAmtC * 2 == BitWidth && (*MulC - 1).isPowerOf2() &&
- MulC->logBase2() == ShAmtC)
- return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, *MulC - 2));
+ if (match(Op0, m_NUWMul(m_Value(X), m_APInt(MulC)))) {
+ // Look for a "splat" mul pattern - it replicates bits across each half of
+ // a value, so a right shift is just a mask of the low bits:
+ // lshr i[2N] (mul nuw X, (2^N)+1), N --> and iN X, (2^N)-1
+ // TODO: Generalize to allow more than just half-width shifts?
+ if (BitWidth > 2 && ShAmtC * 2 == BitWidth && (*MulC - 1).isPowerOf2() &&
+ MulC->logBase2() == ShAmtC)
+ return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, *MulC - 2));
+
+ // The one-use check is not strictly necessary, but codegen may not be
+ // able to invert the transform and perf may suffer with an extra mul
+ // instruction.
+ if (Op0->hasOneUse()) {
+ APInt NewMulC = MulC->lshr(ShAmtC);
+ // if c is divisible by (1 << ShAmtC):
+ // lshr (mul nuw x, MulC), ShAmtC -> mul nuw x, (MulC >> ShAmtC)
+ if (MulC->eq(NewMulC.shl(ShAmtC))) {
+ auto *NewMul =
+ BinaryOperator::CreateNUWMul(X, ConstantInt::get(Ty, NewMulC));
+ BinaryOperator *OrigMul = cast<BinaryOperator>(Op0);
+ NewMul->setHasNoSignedWrap(OrigMul->hasNoSignedWrap());
+ return NewMul;
+ }
+ }
+ }
+
+ // Try to narrow bswap.
+ // In the case where the shift amount equals the bitwidth difference, the
+ // shift is eliminated.
+ if (match(Op0, m_OneUse(m_Intrinsic<Intrinsic::bswap>(
+ m_OneUse(m_ZExt(m_Value(X))))))) {
+ unsigned SrcWidth = X->getType()->getScalarSizeInBits();
+ unsigned WidthDiff = BitWidth - SrcWidth;
+ if (SrcWidth % 16 == 0) {
+ Value *NarrowSwap = Builder.CreateUnaryIntrinsic(Intrinsic::bswap, X);
+ if (ShAmtC >= WidthDiff) {
+ // (bswap (zext X)) >> C --> zext (bswap X >> C')
+ Value *NewShift = Builder.CreateLShr(NarrowSwap, ShAmtC - WidthDiff);
+ return new ZExtInst(NewShift, Ty);
+ } else {
+ // (bswap (zext X)) >> C --> (zext (bswap X)) << C'
+ Value *NewZExt = Builder.CreateZExt(NarrowSwap, Ty);
+ Constant *ShiftDiff = ConstantInt::get(Ty, WidthDiff - ShAmtC);
+ return BinaryOperator::CreateShl(NewZExt, ShiftDiff);
+ }
+ }
+ }
// If the shifted-out value is known-zero, then this is an exact shift.
if (!I.isExact() &&
@@ -1263,7 +1347,7 @@ InstCombinerImpl::foldVariableSignZeroExtensionOfVariableHighBitExtract(
}
Instruction *InstCombinerImpl::visitAShr(BinaryOperator &I) {
- if (Value *V = SimplifyAShrInst(I.getOperand(0), I.getOperand(1), I.isExact(),
+ if (Value *V = simplifyAShrInst(I.getOperand(0), I.getOperand(1), I.isExact(),
SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 3f064cfda712..9d4c01ac03e2 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -12,8 +12,8 @@
//===----------------------------------------------------------------------===//
#include "InstCombineInternal.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/KnownBits.h"
@@ -154,6 +154,29 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (Depth == 0 && !V->hasOneUse())
DemandedMask.setAllBits();
+ // If the high-bits of an ADD/SUB/MUL are not demanded, then we do not care
+ // about the high bits of the operands.
+ auto simplifyOperandsBasedOnUnusedHighBits = [&](APInt &DemandedFromOps) {
+ unsigned NLZ = DemandedMask.countLeadingZeros();
+ // Right fill the mask of bits for the operands to demand the most
+ // significant bit and all those below it.
+ DemandedFromOps = APInt::getLowBitsSet(BitWidth, BitWidth - NLZ);
+ if (ShrinkDemandedConstant(I, 0, DemandedFromOps) ||
+ SimplifyDemandedBits(I, 0, DemandedFromOps, LHSKnown, Depth + 1) ||
+ ShrinkDemandedConstant(I, 1, DemandedFromOps) ||
+ SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnown, Depth + 1)) {
+ if (NLZ > 0) {
+ // Disable the nsw and nuw flags here: We can no longer guarantee that
+ // we won't wrap after simplification. Removing the nsw/nuw flags is
+ // legal here because the top bit is not demanded.
+ I->setHasNoSignedWrap(false);
+ I->setHasNoUnsignedWrap(false);
+ }
+ return true;
+ }
+ return false;
+ };
+
switch (I->getOpcode()) {
default:
computeKnownBits(I, Known, Depth, CxtI);
@@ -297,13 +320,11 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
(LHSKnown.One & RHSKnown.One & DemandedMask) != 0) {
APInt NewMask = ~(LHSKnown.One & RHSKnown.One & DemandedMask);
- Constant *AndC =
- ConstantInt::get(I->getType(), NewMask & AndRHS->getValue());
+ Constant *AndC = ConstantInt::get(VTy, NewMask & AndRHS->getValue());
Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
InsertNewInstWith(NewAnd, *I);
- Constant *XorC =
- ConstantInt::get(I->getType(), NewMask & XorRHS->getValue());
+ Constant *XorC = ConstantInt::get(VTy, NewMask & XorRHS->getValue());
Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC);
return InsertNewInstWith(NewXor, *I);
}
@@ -311,33 +332,6 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
break;
}
case Instruction::Select: {
- Value *LHS, *RHS;
- SelectPatternFlavor SPF = matchSelectPattern(I, LHS, RHS).Flavor;
- if (SPF == SPF_UMAX) {
- // UMax(A, C) == A if ...
- // The lowest non-zero bit of DemandMask is higher than the highest
- // non-zero bit of C.
- const APInt *C;
- unsigned CTZ = DemandedMask.countTrailingZeros();
- if (match(RHS, m_APInt(C)) && CTZ >= C->getActiveBits())
- return LHS;
- } else if (SPF == SPF_UMIN) {
- // UMin(A, C) == A if ...
- // The lowest non-zero bit of DemandMask is higher than the highest
- // non-one bit of C.
- // This comes from using DeMorgans on the above umax example.
- const APInt *C;
- unsigned CTZ = DemandedMask.countTrailingZeros();
- if (match(RHS, m_APInt(C)) &&
- CTZ >= C->getBitWidth() - C->countLeadingOnes())
- return LHS;
- }
-
- // If this is a select as part of any other min/max pattern, don't simplify
- // any further in case we break the structure.
- if (SPF != SPF_UNKNOWN)
- return nullptr;
-
if (SimplifyDemandedBits(I, 2, DemandedMask, RHSKnown, Depth + 1) ||
SimplifyDemandedBits(I, 1, DemandedMask, LHSKnown, Depth + 1))
return I;
@@ -393,12 +387,12 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (match(I->getOperand(0), m_OneUse(m_LShr(m_Value(X), m_APInt(C))))) {
// The shift amount must be valid (not poison) in the narrow type, and
// it must not be greater than the high bits demanded of the result.
- if (C->ult(I->getType()->getScalarSizeInBits()) &&
+ if (C->ult(VTy->getScalarSizeInBits()) &&
C->ule(DemandedMask.countLeadingZeros())) {
// trunc (lshr X, C) --> lshr (trunc X), C
IRBuilderBase::InsertPointGuard Guard(Builder);
Builder.SetInsertPoint(I);
- Value *Trunc = Builder.CreateTrunc(X, I->getType());
+ Value *Trunc = Builder.CreateTrunc(X, VTy);
return Builder.CreateLShr(Trunc, C->getZExtValue());
}
}
@@ -420,9 +414,8 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (!I->getOperand(0)->getType()->isIntOrIntVectorTy())
return nullptr; // vector->int or fp->int?
- if (VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) {
- if (VectorType *SrcVTy =
- dyn_cast<VectorType>(I->getOperand(0)->getType())) {
+ if (auto *DstVTy = dyn_cast<VectorType>(VTy)) {
+ if (auto *SrcVTy = dyn_cast<VectorType>(I->getOperand(0)->getType())) {
if (cast<FixedVectorType>(DstVTy)->getNumElements() !=
cast<FixedVectorType>(SrcVTy)->getNumElements())
// Don't touch a bitcast between vectors of different element counts.
@@ -507,26 +500,9 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
}
LLVM_FALLTHROUGH;
case Instruction::Sub: {
- /// If the high-bits of an ADD/SUB are not demanded, then we do not care
- /// about the high bits of the operands.
- unsigned NLZ = DemandedMask.countLeadingZeros();
- // Right fill the mask of bits for this ADD/SUB to demand the most
- // significant bit and all those below it.
- APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
- if (ShrinkDemandedConstant(I, 0, DemandedFromOps) ||
- SimplifyDemandedBits(I, 0, DemandedFromOps, LHSKnown, Depth + 1) ||
- ShrinkDemandedConstant(I, 1, DemandedFromOps) ||
- SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnown, Depth + 1)) {
- if (NLZ > 0) {
- // Disable the nsw and nuw flags here: We can no longer guarantee that
- // we won't wrap after simplification. Removing the nsw/nuw flags is
- // legal here because the top bit is not demanded.
- BinaryOperator &BinOP = *cast<BinaryOperator>(I);
- BinOP.setHasNoSignedWrap(false);
- BinOP.setHasNoUnsignedWrap(false);
- }
+ APInt DemandedFromOps;
+ if (simplifyOperandsBasedOnUnusedHighBits(DemandedFromOps))
return I;
- }
// If we are known to be adding/subtracting zeros to every bit below
// the highest demanded bit, we just return the other side.
@@ -544,6 +520,36 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
NSW, LHSKnown, RHSKnown);
break;
}
+ case Instruction::Mul: {
+ APInt DemandedFromOps;
+ if (simplifyOperandsBasedOnUnusedHighBits(DemandedFromOps))
+ return I;
+
+ if (DemandedMask.isPowerOf2()) {
+ // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
+ // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
+ // odd (has LSB set), then the left-shifted low bit of X is the answer.
+ unsigned CTZ = DemandedMask.countTrailingZeros();
+ const APInt *C;
+ if (match(I->getOperand(1), m_APInt(C)) &&
+ C->countTrailingZeros() == CTZ) {
+ Constant *ShiftC = ConstantInt::get(VTy, CTZ);
+ Instruction *Shl = BinaryOperator::CreateShl(I->getOperand(0), ShiftC);
+ return InsertNewInstWith(Shl, *I);
+ }
+ }
+ // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
+ // X * X is odd iff X is odd.
+ // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
+ if (I->getOperand(0) == I->getOperand(1) && DemandedMask.ult(4)) {
+ Constant *One = ConstantInt::get(VTy, 1);
+ Instruction *And1 = BinaryOperator::CreateAnd(I->getOperand(0), One);
+ return InsertNewInstWith(And1, *I);
+ }
+
+ computeKnownBits(I, Known, Depth, CxtI);
+ break;
+ }
case Instruction::Shl: {
const APInt *SA;
if (match(I->getOperand(1), m_APInt(SA))) {
@@ -554,7 +560,26 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
DemandedMask, Known))
return R;
+ // TODO: If we only want bits that already match the signbit then we don't
+ // need to shift.
+
+ // If we can pre-shift a right-shifted constant to the left without
+ // losing any high bits amd we don't demand the low bits, then eliminate
+ // the left-shift:
+ // (C >> X) << LeftShiftAmtC --> (C << RightShiftAmtC) >> X
uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
+ Value *X;
+ Constant *C;
+ if (DemandedMask.countTrailingZeros() >= ShiftAmt &&
+ match(I->getOperand(0), m_LShr(m_ImmConstant(C), m_Value(X)))) {
+ Constant *LeftShiftAmtC = ConstantInt::get(VTy, ShiftAmt);
+ Constant *NewC = ConstantExpr::getShl(C, LeftShiftAmtC);
+ if (ConstantExpr::getLShr(NewC, LeftShiftAmtC) == C) {
+ Instruction *Lshr = BinaryOperator::CreateLShr(NewC, X);
+ return InsertNewInstWith(Lshr, *I);
+ }
+ }
+
APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt));
// If the shift is NUW/NSW, then it does demand the high bits.
@@ -584,7 +609,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
else if (SignBitOne)
Known.One.setSignBit();
if (Known.hasConflict())
- return UndefValue::get(I->getType());
+ return UndefValue::get(VTy);
}
} else {
// This is a variable shift, so we can't shift the demand mask by a known
@@ -607,6 +632,34 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (match(I->getOperand(1), m_APInt(SA))) {
uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
+ // If we are just demanding the shifted sign bit and below, then this can
+ // be treated as an ASHR in disguise.
+ if (DemandedMask.countLeadingZeros() >= ShiftAmt) {
+ // If we only want bits that already match the signbit then we don't
+ // need to shift.
+ unsigned NumHiDemandedBits =
+ BitWidth - DemandedMask.countTrailingZeros();
+ unsigned SignBits =
+ ComputeNumSignBits(I->getOperand(0), Depth + 1, CxtI);
+ if (SignBits >= NumHiDemandedBits)
+ return I->getOperand(0);
+
+ // If we can pre-shift a left-shifted constant to the right without
+ // losing any low bits (we already know we don't demand the high bits),
+ // then eliminate the right-shift:
+ // (C << X) >> RightShiftAmtC --> (C >> RightShiftAmtC) << X
+ Value *X;
+ Constant *C;
+ if (match(I->getOperand(0), m_Shl(m_ImmConstant(C), m_Value(X)))) {
+ Constant *RightShiftAmtC = ConstantInt::get(VTy, ShiftAmt);
+ Constant *NewC = ConstantExpr::getLShr(C, RightShiftAmtC);
+ if (ConstantExpr::getShl(NewC, RightShiftAmtC) == C) {
+ Instruction *Shl = BinaryOperator::CreateShl(NewC, X);
+ return InsertNewInstWith(Shl, *I);
+ }
+ }
+ }
+
// Unsigned shift right.
APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
@@ -628,6 +681,14 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
break;
}
case Instruction::AShr: {
+ unsigned SignBits = ComputeNumSignBits(I->getOperand(0), Depth + 1, CxtI);
+
+ // If we only want bits that already match the signbit then we don't need
+ // to shift.
+ unsigned NumHiDemandedBits = BitWidth - DemandedMask.countTrailingZeros();
+ if (SignBits >= NumHiDemandedBits)
+ return I->getOperand(0);
+
// If this is an arithmetic shift right and only the low-bit is set, we can
// always convert this into a logical shr, even if the shift amount is
// variable. The low bit of the shift cannot be an input sign bit unless
@@ -639,11 +700,6 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
return InsertNewInstWith(NewVal, *I);
}
- // If the sign bit is the only bit demanded by this ashr, then there is no
- // need to do it, the shift doesn't change the high bit.
- if (DemandedMask.isSignMask())
- return I->getOperand(0);
-
const APInt *SA;
if (match(I->getOperand(1), m_APInt(SA))) {
uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
@@ -663,8 +719,6 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1))
return I;
- unsigned SignBits = ComputeNumSignBits(I->getOperand(0), Depth + 1, CxtI);
-
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
// Compute the new bits that are at the top now plus sign bits.
APInt HighBits(APInt::getHighBitsSet(
@@ -713,13 +767,13 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
break;
}
case Instruction::SRem: {
- ConstantInt *Rem;
- if (match(I->getOperand(1), m_ConstantInt(Rem))) {
+ const APInt *Rem;
+ if (match(I->getOperand(1), m_APInt(Rem))) {
// X % -1 demands all the bits because we don't want to introduce
// INT_MIN % -1 (== undef) by accident.
- if (Rem->isMinusOne())
+ if (Rem->isAllOnes())
break;
- APInt RA = Rem->getValue().abs();
+ APInt RA = Rem->abs();
if (RA.isPowerOf2()) {
if (DemandedMask.ult(RA)) // srem won't affect demanded bits
return I->getOperand(0);
@@ -786,7 +840,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (DemandedMask == 1 && VTy->getScalarSizeInBits() % 2 == 0 &&
match(II->getArgOperand(0), m_Not(m_Value(X)))) {
Function *Ctpop = Intrinsic::getDeclaration(
- II->getModule(), Intrinsic::ctpop, II->getType());
+ II->getModule(), Intrinsic::ctpop, VTy);
return InsertNewInstWith(CallInst::Create(Ctpop, {X}), *I);
}
break;
@@ -809,12 +863,10 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
Instruction *NewVal;
if (NLZ > NTZ)
NewVal = BinaryOperator::CreateLShr(
- II->getArgOperand(0),
- ConstantInt::get(I->getType(), NLZ - NTZ));
+ II->getArgOperand(0), ConstantInt::get(VTy, NLZ - NTZ));
else
NewVal = BinaryOperator::CreateShl(
- II->getArgOperand(0),
- ConstantInt::get(I->getType(), NTZ - NLZ));
+ II->getArgOperand(0), ConstantInt::get(VTy, NTZ - NLZ));
NewVal->takeName(I);
return InsertNewInstWith(NewVal, *I);
}
@@ -872,7 +924,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// Handle target specific intrinsics
Optional<Value *> V = targetSimplifyDemandedUseBitsIntrinsic(
*II, DemandedMask, Known, KnownBitsComputed);
- if (V.hasValue())
+ if (V)
return V.getValue();
break;
}
@@ -1583,7 +1635,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
Optional<Value *> V = targetSimplifyDemandedVectorEltsIntrinsic(
*II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
simplifyAndSetOp);
- if (V.hasValue())
+ if (V)
return V.getValue();
break;
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 736cf9c825d5..22659a8e4951 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -42,7 +42,6 @@
#include <utility>
#define DEBUG_TYPE "instcombine"
-#include "llvm/Transforms/Utils/InstructionWorklist.h"
using namespace llvm;
using namespace PatternMatch;
@@ -378,7 +377,7 @@ ConstantInt *getPreferredVectorIndex(ConstantInt *IndexC) {
Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
Value *SrcVec = EI.getVectorOperand();
Value *Index = EI.getIndexOperand();
- if (Value *V = SimplifyExtractElementInst(SrcVec, Index,
+ if (Value *V = simplifyExtractElementInst(SrcVec, Index,
SQ.getWithInstruction(&EI)))
return replaceInstUsesWith(EI, V);
@@ -879,7 +878,7 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
// of an aggregate. If we did, that means the CurrIVI will later be
// overwritten with the already-recorded value. But if not, let's record it!
Optional<Instruction *> &Elt = AggElts[Indices.front()];
- Elt = Elt.getValueOr(InsertedValue);
+ Elt = Elt.value_or(InsertedValue);
// FIXME: should we handle chain-terminating undef base operand?
}
@@ -1489,7 +1488,7 @@ Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) {
Value *ScalarOp = IE.getOperand(1);
Value *IdxOp = IE.getOperand(2);
- if (auto *V = SimplifyInsertElementInst(
+ if (auto *V = simplifyInsertElementInst(
VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
return replaceInstUsesWith(IE, V);
@@ -1919,24 +1918,29 @@ static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL) {
Value *BO0 = BO->getOperand(0), *BO1 = BO->getOperand(1);
Type *Ty = BO->getType();
switch (BO->getOpcode()) {
- case Instruction::Shl: {
- // shl X, C --> mul X, (1 << C)
- Constant *C;
- if (match(BO1, m_Constant(C))) {
- Constant *ShlOne = ConstantExpr::getShl(ConstantInt::get(Ty, 1), C);
- return { Instruction::Mul, BO0, ShlOne };
- }
- break;
- }
- case Instruction::Or: {
- // or X, C --> add X, C (when X and C have no common bits set)
- const APInt *C;
- if (match(BO1, m_APInt(C)) && MaskedValueIsZero(BO0, *C, DL))
- return { Instruction::Add, BO0, BO1 };
- break;
+ case Instruction::Shl: {
+ // shl X, C --> mul X, (1 << C)
+ Constant *C;
+ if (match(BO1, m_Constant(C))) {
+ Constant *ShlOne = ConstantExpr::getShl(ConstantInt::get(Ty, 1), C);
+ return {Instruction::Mul, BO0, ShlOne};
}
- default:
- break;
+ break;
+ }
+ case Instruction::Or: {
+ // or X, C --> add X, C (when X and C have no common bits set)
+ const APInt *C;
+ if (match(BO1, m_APInt(C)) && MaskedValueIsZero(BO0, *C, DL))
+ return {Instruction::Add, BO0, BO1};
+ break;
+ }
+ case Instruction::Sub:
+ // sub 0, X --> mul X, -1
+ if (match(BO0, m_ZeroInt()))
+ return {Instruction::Mul, BO1, ConstantInt::getAllOnesValue(Ty)};
+ break;
+ default:
+ break;
}
return {};
}
@@ -2053,15 +2057,20 @@ Instruction *InstCombinerImpl::foldSelectShuffle(ShuffleVectorInst &Shuf) {
!match(Shuf.getOperand(1), m_BinOp(B1)))
return nullptr;
+ // If one operand is "0 - X", allow that to be viewed as "X * -1"
+ // (ConstantsAreOp1) by getAlternateBinop below. If the neg is not paired
+ // with a multiply, we will exit because C0/C1 will not be set.
Value *X, *Y;
- Constant *C0, *C1;
+ Constant *C0 = nullptr, *C1 = nullptr;
bool ConstantsAreOp1;
- if (match(B0, m_BinOp(m_Value(X), m_Constant(C0))) &&
- match(B1, m_BinOp(m_Value(Y), m_Constant(C1))))
- ConstantsAreOp1 = true;
- else if (match(B0, m_BinOp(m_Constant(C0), m_Value(X))) &&
- match(B1, m_BinOp(m_Constant(C1), m_Value(Y))))
+ if (match(B0, m_BinOp(m_Constant(C0), m_Value(X))) &&
+ match(B1, m_BinOp(m_Constant(C1), m_Value(Y))))
ConstantsAreOp1 = false;
+ else if (match(B0, m_CombineOr(m_BinOp(m_Value(X), m_Constant(C0)),
+ m_Neg(m_Value(X)))) &&
+ match(B1, m_CombineOr(m_BinOp(m_Value(Y), m_Constant(C1)),
+ m_Neg(m_Value(Y)))))
+ ConstantsAreOp1 = true;
else
return nullptr;
@@ -2086,7 +2095,7 @@ Instruction *InstCombinerImpl::foldSelectShuffle(ShuffleVectorInst &Shuf) {
}
}
- if (Opc0 != Opc1)
+ if (Opc0 != Opc1 || !C0 || !C1)
return nullptr;
// The opcodes must be the same. Use a new name to make that clear.
@@ -2233,6 +2242,88 @@ static Instruction *narrowVectorSelect(ShuffleVectorInst &Shuf,
return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
}
+/// Canonicalize FP negate after shuffle.
+static Instruction *foldFNegShuffle(ShuffleVectorInst &Shuf,
+ InstCombiner::BuilderTy &Builder) {
+ Instruction *FNeg0;
+ Value *X;
+ if (!match(Shuf.getOperand(0), m_CombineAnd(m_Instruction(FNeg0),
+ m_FNeg(m_Value(X)))))
+ return nullptr;
+
+ // shuffle (fneg X), Mask --> fneg (shuffle X, Mask)
+ if (FNeg0->hasOneUse() && match(Shuf.getOperand(1), m_Undef())) {
+ Value *NewShuf = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
+ return UnaryOperator::CreateFNegFMF(NewShuf, FNeg0);
+ }
+
+ Instruction *FNeg1;
+ Value *Y;
+ if (!match(Shuf.getOperand(1), m_CombineAnd(m_Instruction(FNeg1),
+ m_FNeg(m_Value(Y)))))
+ return nullptr;
+
+ // shuffle (fneg X), (fneg Y), Mask --> fneg (shuffle X, Y, Mask)
+ if (FNeg0->hasOneUse() || FNeg1->hasOneUse()) {
+ Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
+ Instruction *NewFNeg = UnaryOperator::CreateFNeg(NewShuf);
+ NewFNeg->copyIRFlags(FNeg0);
+ NewFNeg->andIRFlags(FNeg1);
+ return NewFNeg;
+ }
+
+ return nullptr;
+}
+
+/// Canonicalize casts after shuffle.
+static Instruction *foldCastShuffle(ShuffleVectorInst &Shuf,
+ InstCombiner::BuilderTy &Builder) {
+ // Do we have 2 matching cast operands?
+ auto *Cast0 = dyn_cast<CastInst>(Shuf.getOperand(0));
+ auto *Cast1 = dyn_cast<CastInst>(Shuf.getOperand(1));
+ if (!Cast0 || !Cast1 || Cast0->getOpcode() != Cast1->getOpcode() ||
+ Cast0->getSrcTy() != Cast1->getSrcTy())
+ return nullptr;
+
+ // TODO: Allow other opcodes? That would require easing the type restrictions
+ // below here.
+ CastInst::CastOps CastOpcode = Cast0->getOpcode();
+ switch (CastOpcode) {
+ case Instruction::FPToSI:
+ case Instruction::FPToUI:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ break;
+ default:
+ return nullptr;
+ }
+
+ VectorType *ShufTy = Shuf.getType();
+ VectorType *ShufOpTy = cast<VectorType>(Shuf.getOperand(0)->getType());
+ VectorType *CastSrcTy = cast<VectorType>(Cast0->getSrcTy());
+
+ // TODO: Allow length-increasing shuffles?
+ if (ShufTy->getElementCount().getKnownMinValue() >
+ ShufOpTy->getElementCount().getKnownMinValue())
+ return nullptr;
+
+ // TODO: Allow element-size-decreasing casts (ex: fptosi float to i8)?
+ assert(isa<FixedVectorType>(CastSrcTy) && isa<FixedVectorType>(ShufOpTy) &&
+ "Expected fixed vector operands for casts and binary shuffle");
+ if (CastSrcTy->getPrimitiveSizeInBits() > ShufOpTy->getPrimitiveSizeInBits())
+ return nullptr;
+
+ // At least one of the operands must have only one use (the shuffle).
+ if (!Cast0->hasOneUse() && !Cast1->hasOneUse())
+ return nullptr;
+
+ // shuffle (cast X), (cast Y), Mask --> cast (shuffle X, Y, Mask)
+ Value *X = Cast0->getOperand(0);
+ Value *Y = Cast1->getOperand(0);
+ Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
+ return CastInst::Create(CastOpcode, NewShuf, ShufTy);
+}
+
/// Try to fold an extract subvector operation.
static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) {
Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
@@ -2442,7 +2533,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
Value *LHS = SVI.getOperand(0);
Value *RHS = SVI.getOperand(1);
SimplifyQuery ShufQuery = SQ.getWithInstruction(&SVI);
- if (auto *V = SimplifyShuffleVectorInst(LHS, RHS, SVI.getShuffleMask(),
+ if (auto *V = simplifyShuffleVectorInst(LHS, RHS, SVI.getShuffleMask(),
SVI.getType(), ShufQuery))
return replaceInstUsesWith(SVI, V);
@@ -2497,7 +2588,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
if (!ScaledMask.empty()) {
// If the shuffled source vector simplifies, cast that value to this
// shuffle's type.
- if (auto *V = SimplifyShuffleVectorInst(X, UndefValue::get(XType),
+ if (auto *V = simplifyShuffleVectorInst(X, UndefValue::get(XType),
ScaledMask, XType, ShufQuery))
return BitCastInst::Create(Instruction::BitCast, V, SVI.getType());
}
@@ -2528,6 +2619,12 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
if (Instruction *I = narrowVectorSelect(SVI, Builder))
return I;
+ if (Instruction *I = foldFNegShuffle(SVI, Builder))
+ return I;
+
+ if (Instruction *I = foldCastShuffle(SVI, Builder))
+ return I;
+
APInt UndefElts(VWidth, 0);
APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 3091905ca534..0816a4a575d9 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -42,7 +42,6 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
@@ -60,6 +59,7 @@
#include "llvm/Analysis/TargetFolder.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/BasicBlock.h"
@@ -90,8 +90,6 @@
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CBindingWrapping.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
@@ -140,6 +138,10 @@ static cl::opt<bool>
EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"),
cl::init(true));
+static cl::opt<unsigned> MaxSinkNumUsers(
+ "instcombine-max-sink-users", cl::init(32),
+ cl::desc("Maximum number of undroppable users for instruction sinking"));
+
static cl::opt<unsigned> LimitMaxIterations(
"instcombine-max-iterations",
cl::desc("Limit the maximum number of instruction combining iterations"),
@@ -424,7 +426,7 @@ bool InstCombinerImpl::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
Value *C = I.getOperand(1);
// Does "B op C" simplify?
- if (Value *V = SimplifyBinOp(Opcode, B, C, SQ.getWithInstruction(&I))) {
+ if (Value *V = simplifyBinOp(Opcode, B, C, SQ.getWithInstruction(&I))) {
// It simplifies to V. Form "A op V".
replaceOperand(I, 0, A);
replaceOperand(I, 1, V);
@@ -457,7 +459,7 @@ bool InstCombinerImpl::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
Value *C = Op1->getOperand(1);
// Does "A op B" simplify?
- if (Value *V = SimplifyBinOp(Opcode, A, B, SQ.getWithInstruction(&I))) {
+ if (Value *V = simplifyBinOp(Opcode, A, B, SQ.getWithInstruction(&I))) {
// It simplifies to V. Form "V op C".
replaceOperand(I, 0, V);
replaceOperand(I, 1, C);
@@ -485,7 +487,7 @@ bool InstCombinerImpl::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
Value *C = I.getOperand(1);
// Does "C op A" simplify?
- if (Value *V = SimplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
+ if (Value *V = simplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
// It simplifies to V. Form "V op B".
replaceOperand(I, 0, V);
replaceOperand(I, 1, B);
@@ -505,7 +507,7 @@ bool InstCombinerImpl::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
Value *C = Op1->getOperand(1);
// Does "C op A" simplify?
- if (Value *V = SimplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
+ if (Value *V = simplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
// It simplifies to V. Form "B op V".
replaceOperand(I, 0, B);
replaceOperand(I, 1, V);
@@ -652,7 +654,7 @@ Value *InstCombinerImpl::tryFactorization(BinaryOperator &I,
std::swap(C, D);
// Consider forming "A op' (B op D)".
// If "B op D" simplifies then it can be formed with no cost.
- V = SimplifyBinOp(TopLevelOpcode, B, D, SQ.getWithInstruction(&I));
+ V = simplifyBinOp(TopLevelOpcode, B, D, SQ.getWithInstruction(&I));
// If "B op D" doesn't simplify then only go on if both of the existing
// operations "A op' B" and "C op' D" will be zapped as no longer used.
if (!V && LHS->hasOneUse() && RHS->hasOneUse())
@@ -671,7 +673,7 @@ Value *InstCombinerImpl::tryFactorization(BinaryOperator &I,
std::swap(C, D);
// Consider forming "(A op C) op' B".
// If "A op C" simplifies then it can be formed with no cost.
- V = SimplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I));
+ V = simplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I));
// If "A op C" doesn't simplify then only go on if both of the existing
// operations "A op' B" and "C op' D" will be zapped as no longer used.
@@ -780,8 +782,8 @@ Value *InstCombinerImpl::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
// Disable the use of undef because it's not safe to distribute undef.
auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
- Value *L = SimplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
- Value *R = SimplifyBinOp(TopLevelOpcode, B, C, SQDistributive);
+ Value *L = simplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
+ Value *R = simplifyBinOp(TopLevelOpcode, B, C, SQDistributive);
// Do "A op C" and "B op C" both simplify?
if (L && R) {
@@ -819,8 +821,8 @@ Value *InstCombinerImpl::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
// Disable the use of undef because it's not safe to distribute undef.
auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
- Value *L = SimplifyBinOp(TopLevelOpcode, A, B, SQDistributive);
- Value *R = SimplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
+ Value *L = simplifyBinOp(TopLevelOpcode, A, B, SQDistributive);
+ Value *R = simplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
// Do "A op B" and "A op C" both simplify?
if (L && R) {
@@ -876,8 +878,8 @@ Value *InstCombinerImpl::SimplifySelectsFeedingBinaryOp(BinaryOperator &I,
if (LHSIsSelect && RHSIsSelect && A == D) {
// (A ? B : C) op (A ? E : F) -> A ? (B op E) : (C op F)
Cond = A;
- True = SimplifyBinOp(Opcode, B, E, FMF, Q);
- False = SimplifyBinOp(Opcode, C, F, FMF, Q);
+ True = simplifyBinOp(Opcode, B, E, FMF, Q);
+ False = simplifyBinOp(Opcode, C, F, FMF, Q);
if (LHS->hasOneUse() && RHS->hasOneUse()) {
if (False && !True)
@@ -888,13 +890,13 @@ Value *InstCombinerImpl::SimplifySelectsFeedingBinaryOp(BinaryOperator &I,
} else if (LHSIsSelect && LHS->hasOneUse()) {
// (A ? B : C) op Y -> A ? (B op Y) : (C op Y)
Cond = A;
- True = SimplifyBinOp(Opcode, B, RHS, FMF, Q);
- False = SimplifyBinOp(Opcode, C, RHS, FMF, Q);
+ True = simplifyBinOp(Opcode, B, RHS, FMF, Q);
+ False = simplifyBinOp(Opcode, C, RHS, FMF, Q);
} else if (RHSIsSelect && RHS->hasOneUse()) {
// X op (D ? E : F) -> D ? (X op E) : (X op F)
Cond = D;
- True = SimplifyBinOp(Opcode, LHS, E, FMF, Q);
- False = SimplifyBinOp(Opcode, LHS, F, FMF, Q);
+ True = simplifyBinOp(Opcode, LHS, E, FMF, Q);
+ False = simplifyBinOp(Opcode, LHS, F, FMF, Q);
}
if (!True || !False)
@@ -986,8 +988,8 @@ Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) {
// bo (sext i1 X), C --> select X, (bo -1, C), (bo 0, C)
Constant *Ones = ConstantInt::getAllOnesValue(BO.getType());
Constant *Zero = ConstantInt::getNullValue(BO.getType());
- Constant *TVal = ConstantExpr::get(BO.getOpcode(), Ones, C);
- Constant *FVal = ConstantExpr::get(BO.getOpcode(), Zero, C);
+ Value *TVal = Builder.CreateBinOp(BO.getOpcode(), Ones, C);
+ Value *FVal = Builder.CreateBinOp(BO.getOpcode(), Zero, C);
return SelectInst::Create(X, TVal, FVal);
}
@@ -1018,12 +1020,6 @@ static Value *foldOperationIntoSelectOperand(Instruction &I, Value *SO,
bool ConstIsRHS = isa<Constant>(I.getOperand(1));
Constant *ConstOperand = cast<Constant>(I.getOperand(ConstIsRHS));
- if (auto *SOC = dyn_cast<Constant>(SO)) {
- if (ConstIsRHS)
- return ConstantExpr::get(I.getOpcode(), SOC, ConstOperand);
- return ConstantExpr::get(I.getOpcode(), ConstOperand, SOC);
- }
-
Value *Op0 = SO, *Op1 = ConstOperand;
if (!ConstIsRHS)
std::swap(Op0, Op1);
@@ -1035,10 +1031,10 @@ static Value *foldOperationIntoSelectOperand(Instruction &I, Value *SO,
return NewBO;
}
-Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op,
- SelectInst *SI) {
- // Don't modify shared select instructions.
- if (!SI->hasOneUse())
+Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
+ bool FoldWithMultiUse) {
+ // Don't modify shared select instructions unless set FoldWithMultiUse
+ if (!SI->hasOneUse() && !FoldWithMultiUse)
return nullptr;
Value *TV = SI->getTrueValue();
@@ -1114,12 +1110,6 @@ static Value *foldOperationIntoPhiValue(BinaryOperator *I, Value *InV,
bool ConstIsRHS = isa<Constant>(I->getOperand(1));
Constant *C = cast<Constant>(I->getOperand(ConstIsRHS));
- if (auto *InC = dyn_cast<Constant>(InV)) {
- if (ConstIsRHS)
- return ConstantExpr::get(I->getOpcode(), InC, C);
- return ConstantExpr::get(I->getOpcode(), C, InC);
- }
-
Value *Op0 = InV, *Op1 = C;
if (!ConstIsRHS)
std::swap(Op0, Op1);
@@ -1175,10 +1165,11 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
if (cast<Instruction>(InVal)->getParent() == NonConstBB)
return nullptr;
- // If the incoming non-constant value is in I's block, we will remove one
- // instruction, but insert another equivalent one, leading to infinite
- // instcombine.
- if (isPotentiallyReachable(I.getParent(), NonConstBB, nullptr, &DT, LI))
+ // If the incoming non-constant value is reachable from the phis block,
+ // we'll push the operation across a loop backedge. This could result in
+ // an infinite combine loop, and is generally non-profitable (especially
+ // if the operation was originally outside the loop).
+ if (isPotentiallyReachable(PN->getParent(), NonConstBB, nullptr, &DT, LI))
return nullptr;
}
@@ -1941,10 +1932,8 @@ static Instruction *foldSelectGEP(GetElementPtrInst &GEP,
SmallVector<Value *, 4> IndexC(GEP.indices());
bool IsInBounds = GEP.isInBounds();
Type *Ty = GEP.getSourceElementType();
- Value *NewTrueC = IsInBounds ? Builder.CreateInBoundsGEP(Ty, TrueC, IndexC)
- : Builder.CreateGEP(Ty, TrueC, IndexC);
- Value *NewFalseC = IsInBounds ? Builder.CreateInBoundsGEP(Ty, FalseC, IndexC)
- : Builder.CreateGEP(Ty, FalseC, IndexC);
+ Value *NewTrueC = Builder.CreateGEP(Ty, TrueC, IndexC, "", IsInBounds);
+ Value *NewFalseC = Builder.CreateGEP(Ty, FalseC, IndexC, "", IsInBounds);
return SelectInst::Create(Cond, NewTrueC, NewFalseC, "", nullptr, Sel);
}
@@ -1953,13 +1942,11 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
// Combine Indices - If the source pointer to this getelementptr instruction
// is a getelementptr instruction with matching element type, combine the
// indices of the two getelementptr instructions into a single instruction.
- if (Src->getResultElementType() != GEP.getSourceElementType())
- return nullptr;
-
if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src))
return nullptr;
- if (Src->getNumOperands() == 2 && GEP.getNumOperands() == 2 &&
+ if (Src->getResultElementType() == GEP.getSourceElementType() &&
+ Src->getNumOperands() == 2 && GEP.getNumOperands() == 2 &&
Src->hasOneUse()) {
Value *GO1 = GEP.getOperand(1);
Value *SO1 = Src->getOperand(1);
@@ -1971,45 +1958,21 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
// invariant: this breaks the dependence between GEPs and allows LICM
// to hoist the invariant part out of the loop.
if (L->isLoopInvariant(GO1) && !L->isLoopInvariant(SO1)) {
- // We have to be careful here.
- // We have something like:
- // %src = getelementptr <ty>, <ty>* %base, <ty> %idx
- // %gep = getelementptr <ty>, <ty>* %src, <ty> %idx2
- // If we just swap idx & idx2 then we could inadvertantly
- // change %src from a vector to a scalar, or vice versa.
- // Cases:
- // 1) %base a scalar & idx a scalar & idx2 a vector
- // => Swapping idx & idx2 turns %src into a vector type.
- // 2) %base a scalar & idx a vector & idx2 a scalar
- // => Swapping idx & idx2 turns %src in a scalar type
- // 3) %base, %idx, and %idx2 are scalars
- // => %src & %gep are scalars
- // => swapping idx & idx2 is safe
- // 4) %base a vector
- // => %src is a vector
- // => swapping idx & idx2 is safe.
- auto *SO0 = Src->getOperand(0);
- auto *SO0Ty = SO0->getType();
- if (!isa<VectorType>(GEP.getType()) || // case 3
- isa<VectorType>(SO0Ty)) { // case 4
- Src->setOperand(1, GO1);
- GEP.setOperand(1, SO1);
- return &GEP;
- } else {
- // Case 1 or 2
- // -- have to recreate %src & %gep
- // put NewSrc at same location as %src
- Builder.SetInsertPoint(cast<Instruction>(Src));
- Value *NewSrc = Builder.CreateGEP(
- GEP.getSourceElementType(), SO0, GO1, Src->getName());
- // Propagate 'inbounds' if the new source was not constant-folded.
- if (auto *NewSrcGEPI = dyn_cast<GetElementPtrInst>(NewSrc))
- NewSrcGEPI->setIsInBounds(Src->isInBounds());
- GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
- GEP.getSourceElementType(), NewSrc, {SO1});
- NewGEP->setIsInBounds(GEP.isInBounds());
- return NewGEP;
- }
+ // The swapped GEPs are inbounds if both original GEPs are inbounds
+ // and the sign of the offsets is the same. For simplicity, only
+ // handle both offsets being non-negative.
+ bool IsInBounds = Src->isInBounds() && GEP.isInBounds() &&
+ isKnownNonNegative(SO1, DL, 0, &AC, &GEP, &DT) &&
+ isKnownNonNegative(GO1, DL, 0, &AC, &GEP, &DT);
+ // Put NewSrc at same location as %src.
+ Builder.SetInsertPoint(cast<Instruction>(Src));
+ Value *NewSrc = Builder.CreateGEP(GEP.getSourceElementType(),
+ Src->getPointerOperand(), GO1,
+ Src->getName(), IsInBounds);
+ GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
+ GEP.getSourceElementType(), NewSrc, {SO1});
+ NewGEP->setIsInBounds(IsInBounds);
+ return NewGEP;
}
}
}
@@ -2022,6 +1985,87 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP))
return nullptr; // Wait until our source is folded to completion.
+ // For constant GEPs, use a more general offset-based folding approach.
+ // Only do this for opaque pointers, as the result element type may change.
+ Type *PtrTy = Src->getType()->getScalarType();
+ if (PtrTy->isOpaquePointerTy() && GEP.hasAllConstantIndices() &&
+ (Src->hasOneUse() || Src->hasAllConstantIndices())) {
+ // Split Src into a variable part and a constant suffix.
+ gep_type_iterator GTI = gep_type_begin(*Src);
+ Type *BaseType = GTI.getIndexedType();
+ bool IsFirstType = true;
+ unsigned NumVarIndices = 0;
+ for (auto Pair : enumerate(Src->indices())) {
+ if (!isa<ConstantInt>(Pair.value())) {
+ BaseType = GTI.getIndexedType();
+ IsFirstType = false;
+ NumVarIndices = Pair.index() + 1;
+ }
+ ++GTI;
+ }
+
+ // Determine the offset for the constant suffix of Src.
+ APInt Offset(DL.getIndexTypeSizeInBits(PtrTy), 0);
+ if (NumVarIndices != Src->getNumIndices()) {
+ // FIXME: getIndexedOffsetInType() does not handled scalable vectors.
+ if (isa<ScalableVectorType>(BaseType))
+ return nullptr;
+
+ SmallVector<Value *> ConstantIndices;
+ if (!IsFirstType)
+ ConstantIndices.push_back(
+ Constant::getNullValue(Type::getInt32Ty(GEP.getContext())));
+ append_range(ConstantIndices, drop_begin(Src->indices(), NumVarIndices));
+ Offset += DL.getIndexedOffsetInType(BaseType, ConstantIndices);
+ }
+
+ // Add the offset for GEP (which is fully constant).
+ if (!GEP.accumulateConstantOffset(DL, Offset))
+ return nullptr;
+
+ APInt OffsetOld = Offset;
+ // Convert the total offset back into indices.
+ SmallVector<APInt> ConstIndices =
+ DL.getGEPIndicesForOffset(BaseType, Offset);
+ if (!Offset.isZero() || (!IsFirstType && !ConstIndices[0].isZero())) {
+ // If both GEP are constant-indexed, and cannot be merged in either way,
+ // convert them to a GEP of i8.
+ if (Src->hasAllConstantIndices())
+ return isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP))
+ ? GetElementPtrInst::CreateInBounds(
+ Builder.getInt8Ty(), Src->getOperand(0),
+ Builder.getInt(OffsetOld), GEP.getName())
+ : GetElementPtrInst::Create(
+ Builder.getInt8Ty(), Src->getOperand(0),
+ Builder.getInt(OffsetOld), GEP.getName());
+ return nullptr;
+ }
+
+ bool IsInBounds = isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP));
+ SmallVector<Value *> Indices;
+ append_range(Indices, drop_end(Src->indices(),
+ Src->getNumIndices() - NumVarIndices));
+ for (const APInt &Idx : drop_begin(ConstIndices, !IsFirstType)) {
+ Indices.push_back(ConstantInt::get(GEP.getContext(), Idx));
+ // Even if the total offset is inbounds, we may end up representing it
+ // by first performing a larger negative offset, and then a smaller
+ // positive one. The large negative offset might go out of bounds. Only
+ // preserve inbounds if all signs are the same.
+ IsInBounds &= Idx.isNonNegative() == ConstIndices[0].isNonNegative();
+ }
+
+ return IsInBounds
+ ? GetElementPtrInst::CreateInBounds(Src->getSourceElementType(),
+ Src->getOperand(0), Indices,
+ GEP.getName())
+ : GetElementPtrInst::Create(Src->getSourceElementType(),
+ Src->getOperand(0), Indices,
+ GEP.getName());
+ }
+
+ if (Src->getResultElementType() != GEP.getSourceElementType())
+ return nullptr;
+
SmallVector<Value*, 8> Indices;
// Find out whether the last index in the source GEP is a sequential idx.
@@ -2045,7 +2089,7 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
return nullptr;
Value *Sum =
- SimplifyAddInst(GO1, SO1, false, false, SQ.getWithInstruction(&GEP));
+ simplifyAddInst(GO1, SO1, false, false, SQ.getWithInstruction(&GEP));
// Only do the combine when we are sure the cost after the
// merge is never more than that before the merge.
if (Sum == nullptr)
@@ -2116,9 +2160,8 @@ Instruction *InstCombinerImpl::visitGEPOfBitcast(BitCastInst *BCI,
// existing GEP Value. Causing issues if this Value is accessed when
// constructing an AddrSpaceCastInst
SmallVector<Value *, 8> Indices(GEP.indices());
- Value *NGEP = GEP.isInBounds()
- ? Builder.CreateInBoundsGEP(SrcEltType, SrcOp, Indices)
- : Builder.CreateGEP(SrcEltType, SrcOp, Indices);
+ Value *NGEP =
+ Builder.CreateGEP(SrcEltType, SrcOp, Indices, "", GEP.isInBounds());
NGEP->takeName(&GEP);
// Preserve GEP address space to satisfy users
@@ -2169,12 +2212,10 @@ Instruction *InstCombinerImpl::visitGEPOfBitcast(BitCastInst *BCI,
// Otherwise, if the offset is non-zero, we need to find out if there is a
// field at Offset in 'A's type. If so, we can pull the cast through the
// GEP.
- SmallVector<Value*, 8> NewIndices;
+ SmallVector<Value *, 8> NewIndices;
if (findElementAtOffset(SrcType, Offset.getSExtValue(), NewIndices, DL)) {
- Value *NGEP =
- GEP.isInBounds()
- ? Builder.CreateInBoundsGEP(SrcEltType, SrcOp, NewIndices)
- : Builder.CreateGEP(SrcEltType, SrcOp, NewIndices);
+ Value *NGEP = Builder.CreateGEP(SrcEltType, SrcOp, NewIndices, "",
+ GEP.isInBounds());
if (NGEP->getType() == GEP.getType())
return replaceInstUsesWith(GEP, NGEP);
@@ -2195,7 +2236,7 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
Type *GEPType = GEP.getType();
Type *GEPEltType = GEP.getSourceElementType();
bool IsGEPSrcEleScalable = isa<ScalableVectorType>(GEPEltType);
- if (Value *V = SimplifyGEPInst(GEPEltType, PtrOp, Indices, GEP.isInBounds(),
+ if (Value *V = simplifyGEPInst(GEPEltType, PtrOp, Indices, GEP.isInBounds(),
SQ.getWithInstruction(&GEP)))
return replaceInstUsesWith(GEP, V);
@@ -2280,7 +2321,8 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) {
auto *Op2 = dyn_cast<GetElementPtrInst>(*I);
- if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands())
+ if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands() ||
+ Op1->getSourceElementType() != Op2->getSourceElementType())
return nullptr;
// As for Op1 above, don't try to fold a GEP into itself.
@@ -2476,11 +2518,8 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// addrspacecast i8 addrspace(1)* %0 to i8*
SmallVector<Value *, 8> Idx(GEP.indices());
Value *NewGEP =
- GEP.isInBounds()
- ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr,
- Idx, GEP.getName())
- : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Idx,
- GEP.getName());
+ Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Idx,
+ GEP.getName(), GEP.isInBounds());
return new AddrSpaceCastInst(NewGEP, GEPType);
}
}
@@ -2495,13 +2534,9 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType()) ==
DL.getTypeAllocSize(GEPEltType)) {
Type *IdxType = DL.getIndexType(GEPType);
- Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) };
- Value *NewGEP =
- GEP.isInBounds()
- ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr, Idx,
- GEP.getName())
- : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Idx,
- GEP.getName());
+ Value *Idx[2] = {Constant::getNullValue(IdxType), GEP.getOperand(1)};
+ Value *NewGEP = Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Idx,
+ GEP.getName(), GEP.isInBounds());
// V and GEP are both pointer types --> BitCast
return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, GEPType);
@@ -2533,11 +2568,8 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// If the multiplication NewIdx * Scale may overflow then the new
// GEP may not be "inbounds".
Value *NewGEP =
- GEP.isInBounds() && NSW
- ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr,
- NewIdx, GEP.getName())
- : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, NewIdx,
- GEP.getName());
+ Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, NewIdx,
+ GEP.getName(), GEP.isInBounds() && NSW);
// The NewGEP must be pointer typed, so must the old one -> BitCast
return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
@@ -2578,11 +2610,8 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
Value *Off[2] = {Constant::getNullValue(IndTy), NewIdx};
Value *NewGEP =
- GEP.isInBounds() && NSW
- ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr,
- Off, GEP.getName())
- : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Off,
- GEP.getName());
+ Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Off,
+ GEP.getName(), GEP.isInBounds() && NSW);
// The NewGEP must be pointer typed, so must the old one -> BitCast
return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
GEPType);
@@ -2672,6 +2701,7 @@ static bool isAllocSiteRemovable(Instruction *AI,
SmallVectorImpl<WeakTrackingVH> &Users,
const TargetLibraryInfo &TLI) {
SmallVector<Instruction*, 4> Worklist;
+ const Optional<StringRef> Family = getAllocationFamily(AI, &TLI);
Worklist.push_back(AI);
do {
@@ -2740,12 +2770,15 @@ static bool isAllocSiteRemovable(Instruction *AI,
continue;
}
- if (isFreeCall(I, &TLI)) {
+ if (isFreeCall(I, &TLI) && getAllocationFamily(I, &TLI) == Family) {
+ assert(Family);
Users.emplace_back(I);
continue;
}
- if (isReallocLikeFn(I, &TLI)) {
+ if (isReallocLikeFn(I, &TLI) &&
+ getAllocationFamily(I, &TLI) == Family) {
+ assert(Family);
Users.emplace_back(I);
Worklist.push_back(I);
continue;
@@ -2803,7 +2836,7 @@ Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) {
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
if (II->getIntrinsicID() == Intrinsic::objectsize) {
Value *Result =
- lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/true);
+ lowerObjectSizeCall(II, DL, &TLI, AA, /*MustSucceed=*/true);
replaceInstUsesWith(*I, Result);
eraseInstFromFunction(*I);
Users[i] = nullptr; // Skip examining in the next loop.
@@ -3192,7 +3225,7 @@ Instruction *InstCombinerImpl::visitExtractValueInst(ExtractValueInst &EV) {
if (!EV.hasIndices())
return replaceInstUsesWith(EV, Agg);
- if (Value *V = SimplifyExtractValueInst(Agg, EV.getIndices(),
+ if (Value *V = simplifyExtractValueInst(Agg, EV.getIndices(),
SQ.getWithInstruction(&EV)))
return replaceInstUsesWith(EV, V);
@@ -3248,6 +3281,15 @@ Instruction *InstCombinerImpl::visitExtractValueInst(ExtractValueInst &EV) {
makeArrayRef(exti, exte));
}
if (WithOverflowInst *WO = dyn_cast<WithOverflowInst>(Agg)) {
+ // extractvalue (any_mul_with_overflow X, -1), 0 --> -X
+ Intrinsic::ID OvID = WO->getIntrinsicID();
+ if (*EV.idx_begin() == 0 &&
+ (OvID == Intrinsic::smul_with_overflow ||
+ OvID == Intrinsic::umul_with_overflow) &&
+ match(WO->getArgOperand(1), m_AllOnes())) {
+ return BinaryOperator::CreateNeg(WO->getArgOperand(0));
+ }
+
// We're extracting from an overflow intrinsic, see if we're the only user,
// which allows us to simplify multiple result intrinsics to simpler
// things that just get one value.
@@ -3723,21 +3765,116 @@ InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating(FreezeInst &OrigFI) {
if (!MaybePoisonOperand)
return OrigOp;
- auto *FrozenMaybePoisonOperand = new FreezeInst(
+ Builder.SetInsertPoint(OrigOpInst);
+ auto *FrozenMaybePoisonOperand = Builder.CreateFreeze(
MaybePoisonOperand->get(), MaybePoisonOperand->get()->getName() + ".fr");
replaceUse(*MaybePoisonOperand, FrozenMaybePoisonOperand);
- FrozenMaybePoisonOperand->insertBefore(OrigOpInst);
return OrigOp;
}
-bool InstCombinerImpl::freezeDominatedUses(FreezeInst &FI) {
+Instruction *InstCombinerImpl::foldFreezeIntoRecurrence(FreezeInst &FI,
+ PHINode *PN) {
+ // Detect whether this is a recurrence with a start value and some number of
+ // backedge values. We'll check whether we can push the freeze through the
+ // backedge values (possibly dropping poison flags along the way) until we
+ // reach the phi again. In that case, we can move the freeze to the start
+ // value.
+ Use *StartU = nullptr;
+ SmallVector<Value *> Worklist;
+ for (Use &U : PN->incoming_values()) {
+ if (DT.dominates(PN->getParent(), PN->getIncomingBlock(U))) {
+ // Add backedge value to worklist.
+ Worklist.push_back(U.get());
+ continue;
+ }
+
+ // Don't bother handling multiple start values.
+ if (StartU)
+ return nullptr;
+ StartU = &U;
+ }
+
+ if (!StartU || Worklist.empty())
+ return nullptr; // Not a recurrence.
+
+ Value *StartV = StartU->get();
+ BasicBlock *StartBB = PN->getIncomingBlock(*StartU);
+ bool StartNeedsFreeze = !isGuaranteedNotToBeUndefOrPoison(StartV);
+ // We can't insert freeze if the the start value is the result of the
+ // terminator (e.g. an invoke).
+ if (StartNeedsFreeze && StartBB->getTerminator() == StartV)
+ return nullptr;
+
+ SmallPtrSet<Value *, 32> Visited;
+ SmallVector<Instruction *> DropFlags;
+ while (!Worklist.empty()) {
+ Value *V = Worklist.pop_back_val();
+ if (!Visited.insert(V).second)
+ continue;
+
+ if (Visited.size() > 32)
+ return nullptr; // Limit the total number of values we inspect.
+
+ // Assume that PN is non-poison, because it will be after the transform.
+ if (V == PN || isGuaranteedNotToBeUndefOrPoison(V))
+ continue;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I || canCreateUndefOrPoison(cast<Operator>(I),
+ /*ConsiderFlags*/ false))
+ return nullptr;
+
+ DropFlags.push_back(I);
+ append_range(Worklist, I->operands());
+ }
+
+ for (Instruction *I : DropFlags)
+ I->dropPoisonGeneratingFlags();
+
+ if (StartNeedsFreeze) {
+ Builder.SetInsertPoint(StartBB->getTerminator());
+ Value *FrozenStartV = Builder.CreateFreeze(StartV,
+ StartV->getName() + ".fr");
+ replaceUse(*StartU, FrozenStartV);
+ }
+ return replaceInstUsesWith(FI, PN);
+}
+
+bool InstCombinerImpl::freezeOtherUses(FreezeInst &FI) {
Value *Op = FI.getOperand(0);
- if (isa<Constant>(Op))
+ if (isa<Constant>(Op) || Op->hasOneUse())
return false;
+ // Move the freeze directly after the definition of its operand, so that
+ // it dominates the maximum number of uses. Note that it may not dominate
+ // *all* uses if the operand is an invoke/callbr and the use is in a phi on
+ // the normal/default destination. This is why the domination check in the
+ // replacement below is still necessary.
+ Instruction *MoveBefore = nullptr;
+ if (isa<Argument>(Op)) {
+ MoveBefore = &FI.getFunction()->getEntryBlock().front();
+ while (isa<AllocaInst>(MoveBefore))
+ MoveBefore = MoveBefore->getNextNode();
+ } else if (auto *PN = dyn_cast<PHINode>(Op)) {
+ MoveBefore = PN->getParent()->getFirstNonPHI();
+ } else if (auto *II = dyn_cast<InvokeInst>(Op)) {
+ MoveBefore = II->getNormalDest()->getFirstNonPHI();
+ } else if (auto *CB = dyn_cast<CallBrInst>(Op)) {
+ MoveBefore = CB->getDefaultDest()->getFirstNonPHI();
+ } else {
+ auto *I = cast<Instruction>(Op);
+ assert(!I->isTerminator() && "Cannot be a terminator");
+ MoveBefore = I->getNextNode();
+ }
+
bool Changed = false;
+ if (&FI != MoveBefore) {
+ FI.moveBefore(MoveBefore);
+ Changed = true;
+ }
+
Op->replaceUsesWithIf(&FI, [&](Use &U) -> bool {
bool Dominates = DT.dominates(&FI, U);
Changed |= Dominates;
@@ -3750,48 +3887,63 @@ bool InstCombinerImpl::freezeDominatedUses(FreezeInst &FI) {
Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) {
Value *Op0 = I.getOperand(0);
- if (Value *V = SimplifyFreezeInst(Op0, SQ.getWithInstruction(&I)))
+ if (Value *V = simplifyFreezeInst(Op0, SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
// freeze (phi const, x) --> phi const, (freeze x)
if (auto *PN = dyn_cast<PHINode>(Op0)) {
if (Instruction *NV = foldOpIntoPhi(I, PN))
return NV;
+ if (Instruction *NV = foldFreezeIntoRecurrence(I, PN))
+ return NV;
}
if (Value *NI = pushFreezeToPreventPoisonFromPropagating(I))
return replaceInstUsesWith(I, NI);
- if (match(Op0, m_Undef())) {
- // If I is freeze(undef), see its uses and fold it to the best constant.
- // - or: pick -1
- // - select's condition: pick the value that leads to choosing a constant
- // - other ops: pick 0
+ // If I is freeze(undef), check its uses and fold it to a fixed constant.
+ // - or: pick -1
+ // - select's condition: if the true value is constant, choose it by making
+ // the condition true.
+ // - default: pick 0
+ //
+ // Note that this transform is intentionally done here rather than
+ // via an analysis in InstSimplify or at individual user sites. That is
+ // because we must produce the same value for all uses of the freeze -
+ // it's the reason "freeze" exists!
+ //
+ // TODO: This could use getBinopAbsorber() / getBinopIdentity() to avoid
+ // duplicating logic for binops at least.
+ auto getUndefReplacement = [&I](Type *Ty) {
Constant *BestValue = nullptr;
- Constant *NullValue = Constant::getNullValue(I.getType());
+ Constant *NullValue = Constant::getNullValue(Ty);
for (const auto *U : I.users()) {
Constant *C = NullValue;
-
if (match(U, m_Or(m_Value(), m_Value())))
- C = Constant::getAllOnesValue(I.getType());
- else if (const auto *SI = dyn_cast<SelectInst>(U)) {
- if (SI->getCondition() == &I) {
- APInt CondVal(1, isa<Constant>(SI->getFalseValue()) ? 0 : 1);
- C = Constant::getIntegerValue(I.getType(), CondVal);
- }
- }
+ C = ConstantInt::getAllOnesValue(Ty);
+ else if (match(U, m_Select(m_Specific(&I), m_Constant(), m_Value())))
+ C = ConstantInt::getTrue(Ty);
if (!BestValue)
BestValue = C;
else if (BestValue != C)
BestValue = NullValue;
}
+ assert(BestValue && "Must have at least one use");
+ return BestValue;
+ };
- return replaceInstUsesWith(I, BestValue);
+ if (match(Op0, m_Undef()))
+ return replaceInstUsesWith(I, getUndefReplacement(I.getType()));
+
+ Constant *C;
+ if (match(Op0, m_Constant(C)) && C->containsUndefOrPoisonElement()) {
+ Constant *ReplaceC = getUndefReplacement(I.getType()->getScalarType());
+ return replaceInstUsesWith(I, Constant::replaceUndefsWith(C, ReplaceC));
}
- // Replace all dominated uses of Op to freeze(Op).
- if (freezeDominatedUses(I))
+ // Replace uses of Op with freeze(Op).
+ if (freezeOtherUses(I))
return &I;
return nullptr;
@@ -3847,7 +3999,6 @@ static bool SoleWriteToDeadLocal(Instruction *I, TargetLibraryInfo &TLI) {
/// block.
static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock,
TargetLibraryInfo &TLI) {
- assert(I->getUniqueUndroppableUser() && "Invariants didn't hold!");
BasicBlock *SrcBlock = I->getParent();
// Cannot move control-flow-involving, volatile loads, vaarg, etc.
@@ -4014,48 +4165,68 @@ bool InstCombinerImpl::run() {
[this](Instruction *I) -> Optional<BasicBlock *> {
if (!EnableCodeSinking)
return None;
- auto *UserInst = cast_or_null<Instruction>(I->getUniqueUndroppableUser());
- if (!UserInst)
- return None;
BasicBlock *BB = I->getParent();
BasicBlock *UserParent = nullptr;
+ unsigned NumUsers = 0;
- // Special handling for Phi nodes - get the block the use occurs in.
- if (PHINode *PN = dyn_cast<PHINode>(UserInst)) {
- for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
- if (PN->getIncomingValue(i) == I) {
- // Bail out if we have uses in different blocks. We don't do any
- // sophisticated analysis (i.e finding NearestCommonDominator of these
- // use blocks).
- if (UserParent && UserParent != PN->getIncomingBlock(i))
- return None;
- UserParent = PN->getIncomingBlock(i);
+ for (auto *U : I->users()) {
+ if (U->isDroppable())
+ continue;
+ if (NumUsers > MaxSinkNumUsers)
+ return None;
+
+ Instruction *UserInst = cast<Instruction>(U);
+ // Special handling for Phi nodes - get the block the use occurs in.
+ if (PHINode *PN = dyn_cast<PHINode>(UserInst)) {
+ for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
+ if (PN->getIncomingValue(i) == I) {
+ // Bail out if we have uses in different blocks. We don't do any
+ // sophisticated analysis (i.e finding NearestCommonDominator of
+ // these use blocks).
+ if (UserParent && UserParent != PN->getIncomingBlock(i))
+ return None;
+ UserParent = PN->getIncomingBlock(i);
+ }
}
+ assert(UserParent && "expected to find user block!");
+ } else {
+ if (UserParent && UserParent != UserInst->getParent())
+ return None;
+ UserParent = UserInst->getParent();
}
- assert(UserParent && "expected to find user block!");
- } else
- UserParent = UserInst->getParent();
- // Try sinking to another block. If that block is unreachable, then do
- // not bother. SimplifyCFG should handle it.
- if (UserParent == BB || !DT.isReachableFromEntry(UserParent))
- return None;
+ // Make sure these checks are done only once, naturally we do the checks
+ // the first time we get the userparent, this will save compile time.
+ if (NumUsers == 0) {
+ // Try sinking to another block. If that block is unreachable, then do
+ // not bother. SimplifyCFG should handle it.
+ if (UserParent == BB || !DT.isReachableFromEntry(UserParent))
+ return None;
+
+ auto *Term = UserParent->getTerminator();
+ // See if the user is one of our successors that has only one
+ // predecessor, so that we don't have to split the critical edge.
+ // Another option where we can sink is a block that ends with a
+ // terminator that does not pass control to other block (such as
+ // return or unreachable or resume). In this case:
+ // - I dominates the User (by SSA form);
+ // - the User will be executed at most once.
+ // So sinking I down to User is always profitable or neutral.
+ if (UserParent->getUniquePredecessor() != BB && !succ_empty(Term))
+ return None;
+
+ assert(DT.dominates(BB, UserParent) && "Dominance relation broken?");
+ }
- auto *Term = UserParent->getTerminator();
- // See if the user is one of our successors that has only one
- // predecessor, so that we don't have to split the critical edge.
- // Another option where we can sink is a block that ends with a
- // terminator that does not pass control to other block (such as
- // return or unreachable or resume). In this case:
- // - I dominates the User (by SSA form);
- // - the User will be executed at most once.
- // So sinking I down to User is always profitable or neutral.
- if (UserParent->getUniquePredecessor() == BB || succ_empty(Term)) {
- assert(DT.dominates(BB, UserParent) && "Dominance relation broken?");
- return UserParent;
+ NumUsers++;
}
- return None;
+
+ // No user or only has droppable users.
+ if (!UserParent)
+ return None;
+
+ return UserParent;
};
auto OptBB = getOptionalSinkBlockForInst(I);
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 8f94172a6402..7a5a74aa4fff 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -31,6 +31,7 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/Demangle/Demangle.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
@@ -42,14 +43,12 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InlineAsm.h"
-#include "llvm/IR/InstIterator.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
@@ -63,15 +62,12 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
-#include "llvm/InitializePasses.h"
#include "llvm/MC/MCSectionMachO.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/ScopedPrinter.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
@@ -87,7 +83,6 @@
#include <cstdint>
#include <iomanip>
#include <limits>
-#include <memory>
#include <sstream>
#include <string>
#include <tuple>
@@ -116,7 +111,7 @@ static const uint64_t kFreeBSDKasan_ShadowOffset64 = 0xdffff7c000000000;
static const uint64_t kNetBSD_ShadowOffset32 = 1ULL << 30;
static const uint64_t kNetBSD_ShadowOffset64 = 1ULL << 46;
static const uint64_t kNetBSDKasan_ShadowOffset64 = 0xdfff900000000000;
-static const uint64_t kPS4CPU_ShadowOffset64 = 1ULL << 40;
+static const uint64_t kPS_ShadowOffset64 = 1ULL << 40;
static const uint64_t kWindowsShadowOffset32 = 3ULL << 28;
static const uint64_t kEmscriptenShadowOffset = 0;
@@ -335,6 +330,11 @@ static cl::opt<std::string> ClMemoryAccessCallbackPrefix(
cl::desc("Prefix for memory access callbacks"), cl::Hidden,
cl::init("__asan_"));
+static cl::opt<bool> ClKasanMemIntrinCallbackPrefix(
+ "asan-kernel-mem-intrinsic-prefix",
+ cl::desc("Use prefix for memory intrinsics in KASAN mode"), cl::Hidden,
+ cl::init(false));
+
static cl::opt<bool>
ClInstrumentDynamicAllocas("asan-instrument-dynamic-allocas",
cl::desc("instrument dynamic allocas"),
@@ -465,11 +465,12 @@ struct ShadowMapping {
static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize,
bool IsKasan) {
bool IsAndroid = TargetTriple.isAndroid();
- bool IsIOS = TargetTriple.isiOS() || TargetTriple.isWatchOS();
+ bool IsIOS = TargetTriple.isiOS() || TargetTriple.isWatchOS() ||
+ TargetTriple.isDriverKit();
bool IsMacOS = TargetTriple.isMacOSX();
bool IsFreeBSD = TargetTriple.isOSFreeBSD();
bool IsNetBSD = TargetTriple.isOSNetBSD();
- bool IsPS4CPU = TargetTriple.isPS4CPU();
+ bool IsPS = TargetTriple.isPS();
bool IsLinux = TargetTriple.isOSLinux();
bool IsPPC64 = TargetTriple.getArch() == Triple::ppc64 ||
TargetTriple.getArch() == Triple::ppc64le;
@@ -528,8 +529,8 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize,
Mapping.Offset = kNetBSDKasan_ShadowOffset64;
else
Mapping.Offset = kNetBSD_ShadowOffset64;
- } else if (IsPS4CPU)
- Mapping.Offset = kPS4CPU_ShadowOffset64;
+ } else if (IsPS)
+ Mapping.Offset = kPS_ShadowOffset64;
else if (IsLinux && IsX86_64) {
if (IsKasan)
Mapping.Offset = kLinuxKasan_ShadowOffset64;
@@ -568,7 +569,7 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize,
// offset is not necessary 1/8-th of the address space. On SystemZ,
// we could OR the constant in a single instruction, but it's more
// efficient to load it once and use indexed addressing.
- Mapping.OrShadowOffset = !IsAArch64 && !IsPPC64 && !IsSystemZ && !IsPS4CPU &&
+ Mapping.OrShadowOffset = !IsAArch64 && !IsPPC64 && !IsSystemZ && !IsPS &&
!IsRISCV64 &&
!(Mapping.Offset & (Mapping.Offset - 1)) &&
Mapping.Offset != kDynamicShadowSentinel;
@@ -621,41 +622,9 @@ static uint64_t GetCtorAndDtorPriority(Triple &TargetTriple) {
namespace {
-/// Module analysis for getting various metadata about the module.
-class ASanGlobalsMetadataWrapperPass : public ModulePass {
-public:
- static char ID;
-
- ASanGlobalsMetadataWrapperPass() : ModulePass(ID) {
- initializeASanGlobalsMetadataWrapperPassPass(
- *PassRegistry::getPassRegistry());
- }
-
- bool runOnModule(Module &M) override {
- GlobalsMD = GlobalsMetadata(M);
- return false;
- }
-
- StringRef getPassName() const override {
- return "ASanGlobalsMetadataWrapperPass";
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- }
-
- GlobalsMetadata &getGlobalsMD() { return GlobalsMD; }
-
-private:
- GlobalsMetadata GlobalsMD;
-};
-
-char ASanGlobalsMetadataWrapperPass::ID = 0;
-
/// AddressSanitizer: instrument the code in module to find memory bugs.
struct AddressSanitizer {
- AddressSanitizer(Module &M, const GlobalsMetadata *GlobalsMD,
- const StackSafetyGlobalInfo *SSGI,
+ AddressSanitizer(Module &M, const StackSafetyGlobalInfo *SSGI,
bool CompileKernel = false, bool Recover = false,
bool UseAfterScope = false,
AsanDetectStackUseAfterReturnMode UseAfterReturn =
@@ -666,7 +635,7 @@ struct AddressSanitizer {
UseAfterScope(UseAfterScope || ClUseAfterScope),
UseAfterReturn(ClUseAfterReturn.getNumOccurrences() ? ClUseAfterReturn
: UseAfterReturn),
- GlobalsMD(*GlobalsMD), SSGI(SSGI) {
+ SSGI(SSGI) {
C = &(M.getContext());
LongSize = M.getDataLayout().getPointerSizeInBits();
IntptrTy = Type::getIntNTy(*C, LongSize);
@@ -779,7 +748,6 @@ private:
FunctionCallee AsanMemmove, AsanMemcpy, AsanMemset;
Value *LocalDynamicShadow = nullptr;
- const GlobalsMetadata &GlobalsMD;
const StackSafetyGlobalInfo *SSGI;
DenseMap<const AllocaInst *, bool> ProcessedAllocas;
@@ -787,60 +755,13 @@ private:
FunctionCallee AMDGPUAddressPrivate;
};
-class AddressSanitizerLegacyPass : public FunctionPass {
-public:
- static char ID;
-
- explicit AddressSanitizerLegacyPass(
- bool CompileKernel = false, bool Recover = false,
- bool UseAfterScope = false,
- AsanDetectStackUseAfterReturnMode UseAfterReturn =
- AsanDetectStackUseAfterReturnMode::Runtime)
- : FunctionPass(ID), CompileKernel(CompileKernel), Recover(Recover),
- UseAfterScope(UseAfterScope), UseAfterReturn(UseAfterReturn) {
- initializeAddressSanitizerLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- StringRef getPassName() const override {
- return "AddressSanitizerFunctionPass";
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<ASanGlobalsMetadataWrapperPass>();
- if (ClUseStackSafety)
- AU.addRequired<StackSafetyGlobalInfoWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- }
-
- bool runOnFunction(Function &F) override {
- GlobalsMetadata &GlobalsMD =
- getAnalysis<ASanGlobalsMetadataWrapperPass>().getGlobalsMD();
- const StackSafetyGlobalInfo *const SSGI =
- ClUseStackSafety
- ? &getAnalysis<StackSafetyGlobalInfoWrapperPass>().getResult()
- : nullptr;
- const TargetLibraryInfo *TLI =
- &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- AddressSanitizer ASan(*F.getParent(), &GlobalsMD, SSGI, CompileKernel,
- Recover, UseAfterScope, UseAfterReturn);
- return ASan.instrumentFunction(F, TLI);
- }
-
-private:
- bool CompileKernel;
- bool Recover;
- bool UseAfterScope;
- AsanDetectStackUseAfterReturnMode UseAfterReturn;
-};
-
class ModuleAddressSanitizer {
public:
- ModuleAddressSanitizer(Module &M, const GlobalsMetadata *GlobalsMD,
- bool CompileKernel = false, bool Recover = false,
- bool UseGlobalsGC = true, bool UseOdrIndicator = false,
+ ModuleAddressSanitizer(Module &M, bool CompileKernel = false,
+ bool Recover = false, bool UseGlobalsGC = true,
+ bool UseOdrIndicator = false,
AsanDtorKind DestructorKind = AsanDtorKind::Global)
- : GlobalsMD(*GlobalsMD),
- CompileKernel(ClEnableKasan.getNumOccurrences() > 0 ? ClEnableKasan
+ : CompileKernel(ClEnableKasan.getNumOccurrences() > 0 ? ClEnableKasan
: CompileKernel),
Recover(ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover),
UseGlobalsGC(UseGlobalsGC && ClUseGlobalsGC && !this->CompileKernel),
@@ -906,7 +827,6 @@ private:
uint64_t getRedzoneSizeForGlobal(uint64_t SizeInBytes) const;
int GetAsanVersion(const Module &M) const;
- const GlobalsMetadata &GlobalsMD;
bool CompileKernel;
bool Recover;
bool UseGlobalsGC;
@@ -931,44 +851,6 @@ private:
Function *AsanDtorFunction = nullptr;
};
-class ModuleAddressSanitizerLegacyPass : public ModulePass {
-public:
- static char ID;
-
- explicit ModuleAddressSanitizerLegacyPass(
- bool CompileKernel = false, bool Recover = false, bool UseGlobalGC = true,
- bool UseOdrIndicator = false,
- AsanDtorKind DestructorKind = AsanDtorKind::Global)
- : ModulePass(ID), CompileKernel(CompileKernel), Recover(Recover),
- UseGlobalGC(UseGlobalGC), UseOdrIndicator(UseOdrIndicator),
- DestructorKind(DestructorKind) {
- initializeModuleAddressSanitizerLegacyPassPass(
- *PassRegistry::getPassRegistry());
- }
-
- StringRef getPassName() const override { return "ModuleAddressSanitizer"; }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<ASanGlobalsMetadataWrapperPass>();
- }
-
- bool runOnModule(Module &M) override {
- GlobalsMetadata &GlobalsMD =
- getAnalysis<ASanGlobalsMetadataWrapperPass>().getGlobalsMD();
- ModuleAddressSanitizer ASanModule(M, &GlobalsMD, CompileKernel, Recover,
- UseGlobalGC, UseOdrIndicator,
- DestructorKind);
- return ASanModule.instrumentModule(M);
- }
-
-private:
- bool CompileKernel;
- bool Recover;
- bool UseGlobalGC;
- bool UseOdrIndicator;
- AsanDtorKind DestructorKind;
-};
-
// Stack poisoning does not play well with exception handling.
// When an exception is thrown, we essentially bypass the code
// that unpoisones the stack. This is why the run-time library has
@@ -1221,85 +1103,6 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
} // end anonymous namespace
-void LocationMetadata::parse(MDNode *MDN) {
- assert(MDN->getNumOperands() == 3);
- MDString *DIFilename = cast<MDString>(MDN->getOperand(0));
- Filename = DIFilename->getString();
- LineNo = mdconst::extract<ConstantInt>(MDN->getOperand(1))->getLimitedValue();
- ColumnNo =
- mdconst::extract<ConstantInt>(MDN->getOperand(2))->getLimitedValue();
-}
-
-// FIXME: It would be cleaner to instead attach relevant metadata to the globals
-// we want to sanitize instead and reading this metadata on each pass over a
-// function instead of reading module level metadata at first.
-GlobalsMetadata::GlobalsMetadata(Module &M) {
- NamedMDNode *Globals = M.getNamedMetadata("llvm.asan.globals");
- if (!Globals)
- return;
- for (auto MDN : Globals->operands()) {
- // Metadata node contains the global and the fields of "Entry".
- assert(MDN->getNumOperands() == 5);
- auto *V = mdconst::extract_or_null<Constant>(MDN->getOperand(0));
- // The optimizer may optimize away a global entirely.
- if (!V)
- continue;
- auto *StrippedV = V->stripPointerCasts();
- auto *GV = dyn_cast<GlobalVariable>(StrippedV);
- if (!GV)
- continue;
- // We can already have an entry for GV if it was merged with another
- // global.
- Entry &E = Entries[GV];
- if (auto *Loc = cast_or_null<MDNode>(MDN->getOperand(1)))
- E.SourceLoc.parse(Loc);
- if (auto *Name = cast_or_null<MDString>(MDN->getOperand(2)))
- E.Name = Name->getString();
- ConstantInt *IsDynInit = mdconst::extract<ConstantInt>(MDN->getOperand(3));
- E.IsDynInit |= IsDynInit->isOne();
- ConstantInt *IsExcluded =
- mdconst::extract<ConstantInt>(MDN->getOperand(4));
- E.IsExcluded |= IsExcluded->isOne();
- }
-}
-
-AnalysisKey ASanGlobalsMetadataAnalysis::Key;
-
-GlobalsMetadata ASanGlobalsMetadataAnalysis::run(Module &M,
- ModuleAnalysisManager &AM) {
- return GlobalsMetadata(M);
-}
-
-PreservedAnalyses AddressSanitizerPass::run(Function &F,
- AnalysisManager<Function> &AM) {
- auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
- Module &M = *F.getParent();
- if (auto *R = MAMProxy.getCachedResult<ASanGlobalsMetadataAnalysis>(M)) {
- const TargetLibraryInfo *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
- AddressSanitizer Sanitizer(M, R, nullptr, Options.CompileKernel,
- Options.Recover, Options.UseAfterScope,
- Options.UseAfterReturn);
- if (Sanitizer.instrumentFunction(F, TLI))
- return PreservedAnalyses::none();
- return PreservedAnalyses::all();
- }
-
- report_fatal_error(
- "The ASanGlobalsMetadataAnalysis is required to run before "
- "AddressSanitizer can run");
- return PreservedAnalyses::all();
-}
-
-void AddressSanitizerPass::printPipeline(
- raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
- static_cast<PassInfoMixin<AddressSanitizerPass> *>(this)->printPipeline(
- OS, MapClassName2PassName);
- OS << "<";
- if (Options.CompileKernel)
- OS << "kernel";
- OS << ">";
-}
-
void ModuleAddressSanitizerPass::printPipeline(
raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
static_cast<PassInfoMixin<ModuleAddressSanitizerPass> *>(this)->printPipeline(
@@ -1318,8 +1121,7 @@ ModuleAddressSanitizerPass::ModuleAddressSanitizerPass(
PreservedAnalyses ModuleAddressSanitizerPass::run(Module &M,
ModuleAnalysisManager &MAM) {
- GlobalsMetadata &GlobalsMD = MAM.getResult<ASanGlobalsMetadataAnalysis>(M);
- ModuleAddressSanitizer ModuleSanitizer(M, &GlobalsMD, Options.CompileKernel,
+ ModuleAddressSanitizer ModuleSanitizer(M, Options.CompileKernel,
Options.Recover, UseGlobalGC,
UseOdrIndicator, DestructorKind);
bool Modified = false;
@@ -1327,9 +1129,9 @@ PreservedAnalyses ModuleAddressSanitizerPass::run(Module &M,
const StackSafetyGlobalInfo *const SSGI =
ClUseStackSafety ? &MAM.getResult<StackSafetyGlobalAnalysis>(M) : nullptr;
for (Function &F : M) {
- AddressSanitizer FunctionSanitizer(
- M, &GlobalsMD, SSGI, Options.CompileKernel, Options.Recover,
- Options.UseAfterScope, Options.UseAfterReturn);
+ AddressSanitizer FunctionSanitizer(M, SSGI, Options.CompileKernel,
+ Options.Recover, Options.UseAfterScope,
+ Options.UseAfterReturn);
const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
Modified |= FunctionSanitizer.instrumentFunction(F, &TLI);
}
@@ -1337,75 +1139,20 @@ PreservedAnalyses ModuleAddressSanitizerPass::run(Module &M,
return Modified ? PreservedAnalyses::none() : PreservedAnalyses::all();
}
-INITIALIZE_PASS(ASanGlobalsMetadataWrapperPass, "asan-globals-md",
- "Read metadata to mark which globals should be instrumented "
- "when running ASan.",
- false, true)
-
-char AddressSanitizerLegacyPass::ID = 0;
-
-INITIALIZE_PASS_BEGIN(
- AddressSanitizerLegacyPass, "asan",
- "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false,
- false)
-INITIALIZE_PASS_DEPENDENCY(ASanGlobalsMetadataWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(StackSafetyGlobalInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(
- AddressSanitizerLegacyPass, "asan",
- "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false,
- false)
-
-FunctionPass *llvm::createAddressSanitizerFunctionPass(
- bool CompileKernel, bool Recover, bool UseAfterScope,
- AsanDetectStackUseAfterReturnMode UseAfterReturn) {
- assert(!CompileKernel || Recover);
- return new AddressSanitizerLegacyPass(CompileKernel, Recover, UseAfterScope,
- UseAfterReturn);
-}
-
-char ModuleAddressSanitizerLegacyPass::ID = 0;
-
-INITIALIZE_PASS(
- ModuleAddressSanitizerLegacyPass, "asan-module",
- "AddressSanitizer: detects use-after-free and out-of-bounds bugs."
- "ModulePass",
- false, false)
-
-ModulePass *llvm::createModuleAddressSanitizerLegacyPassPass(
- bool CompileKernel, bool Recover, bool UseGlobalsGC, bool UseOdrIndicator,
- AsanDtorKind Destructor) {
- assert(!CompileKernel || Recover);
- return new ModuleAddressSanitizerLegacyPass(
- CompileKernel, Recover, UseGlobalsGC, UseOdrIndicator, Destructor);
-}
-
static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
size_t Res = countTrailingZeros(TypeSize / 8);
assert(Res < kNumberOfAccessSizes);
return Res;
}
-/// Create a global describing a source location.
-static GlobalVariable *createPrivateGlobalForSourceLoc(Module &M,
- LocationMetadata MD) {
- Constant *LocData[] = {
- createPrivateGlobalForString(M, MD.Filename, true, kAsanGenPrefix),
- ConstantInt::get(Type::getInt32Ty(M.getContext()), MD.LineNo),
- ConstantInt::get(Type::getInt32Ty(M.getContext()), MD.ColumnNo),
- };
- auto LocStruct = ConstantStruct::getAnon(LocData);
- auto GV = new GlobalVariable(M, LocStruct->getType(), true,
- GlobalValue::PrivateLinkage, LocStruct,
- kAsanGenPrefix);
- GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
- return GV;
-}
-
/// Check if \p G has been created by a trusted compiler pass.
static bool GlobalWasGeneratedByCompiler(GlobalVariable *G) {
// Do not instrument @llvm.global_ctors, @llvm.used, etc.
- if (G->getName().startswith("llvm."))
+ if (G->getName().startswith("llvm.") ||
+ // Do not instrument gcov counter arrays.
+ G->getName().startswith("__llvm_gcov_ctr") ||
+ // Do not instrument rtti proxy symbols for function sanitizer.
+ G->getName().startswith("__llvm_rtti_proxy"))
return true;
// Do not instrument asan globals.
@@ -1414,10 +1161,6 @@ static bool GlobalWasGeneratedByCompiler(GlobalVariable *G) {
G->getName().startswith(kODRGenPrefix))
return true;
- // Do not instrument gcov counter arrays.
- if (G->getName() == "__llvm_gcov_ctr")
- return true;
-
return false;
}
@@ -1518,10 +1261,6 @@ bool AddressSanitizer::ignoreAccess(Instruction *Inst, Value *Ptr) {
void AddressSanitizer::getInterestingMemoryOperands(
Instruction *I, SmallVectorImpl<InterestingMemoryOperand> &Interesting) {
- // Skip memory accesses inserted by another instrumentation.
- if (I->hasMetadata("nosanitize"))
- return;
-
// Do not instrument the load fetching the dynamic shadow address.
if (LocalDynamicShadow == I)
return;
@@ -1613,10 +1352,13 @@ bool AddressSanitizer::GlobalIsLinkerInitialized(GlobalVariable *G) {
// If a global variable does not have dynamic initialization we don't
// have to instrument it. However, if a global does not have initializer
// at all, we assume it has dynamic initializer (in other TU).
- //
- // FIXME: Metadata should be attched directly to the global directly instead
- // of being added to llvm.asan.globals.
- return G->hasInitializer() && !GlobalsMD.get(G).IsDynInit;
+ if (!G->hasInitializer())
+ return false;
+
+ if (G->hasSanitizerMetadata() && G->getSanitizerMetadata().IsDynInit)
+ return false;
+
+ return true;
}
void AddressSanitizer::instrumentPointerComparisonOrSubtraction(
@@ -1977,9 +1719,8 @@ bool ModuleAddressSanitizer::shouldInstrumentGlobal(GlobalVariable *G) const {
Type *Ty = G->getValueType();
LLVM_DEBUG(dbgs() << "GLOBAL: " << *G << "\n");
- // FIXME: Metadata should be attched directly to the global directly instead
- // of being added to llvm.asan.globals.
- if (GlobalsMD.get(G).IsExcluded) return false;
+ if (G->hasSanitizerMetadata() && G->getSanitizerMetadata().NoAddress)
+ return false;
if (!Ty->isSized()) return false;
if (!G->hasInitializer()) return false;
// Globals in address space 1 and 4 are supported for AMDGPU.
@@ -2125,6 +1866,8 @@ bool ModuleAddressSanitizer::ShouldUseMachOGlobalsSection() const {
return true;
if (TargetTriple.isWatchOS() && !TargetTriple.isOSVersionLT(2))
return true;
+ if (TargetTriple.isDriverKit())
+ return true;
return false;
}
@@ -2136,7 +1879,9 @@ StringRef ModuleAddressSanitizer::getGlobalMetadataSection() const {
case Triple::MachO: return "__DATA,__asan_globals,regular";
case Triple::Wasm:
case Triple::GOFF:
+ case Triple::SPIRV:
case Triple::XCOFF:
+ case Triple::DXContainer:
report_fatal_error(
"ModuleAddressSanitizer not implemented for object file format");
case Triple::UnknownObjectFormat:
@@ -2470,7 +2215,7 @@ bool ModuleAddressSanitizer::InstrumentGlobals(IRBuilder<> &IRB, Module &M,
// const char *name;
// const char *module_name;
// size_t has_dynamic_init;
- // void *source_location;
+ // size_t padding_for_windows_msvc_incremental_link;
// size_t odr_indicator;
// We initialize an array of such structures and pass it to a run-time call.
StructType *GlobalStructTy =
@@ -2489,15 +2234,16 @@ bool ModuleAddressSanitizer::InstrumentGlobals(IRBuilder<> &IRB, Module &M,
for (size_t i = 0; i < n; i++) {
GlobalVariable *G = GlobalsToChange[i];
- // FIXME: Metadata should be attched directly to the global directly instead
- // of being added to llvm.asan.globals.
- auto MD = GlobalsMD.get(G);
- StringRef NameForGlobal = G->getName();
- // Create string holding the global name (use global name from metadata
- // if it's available, otherwise just write the name of global variable).
- GlobalVariable *Name = createPrivateGlobalForString(
- M, MD.Name.empty() ? NameForGlobal : MD.Name,
- /*AllowMerging*/ true, kAsanGenPrefix);
+ GlobalValue::SanitizerMetadata MD;
+ if (G->hasSanitizerMetadata())
+ MD = G->getSanitizerMetadata();
+
+ // TODO: Symbol names in the descriptor can be demangled by the runtime
+ // library. This could save ~0.4% of VM size for a private large binary.
+ std::string NameForGlobal = llvm::demangle(G->getName().str());
+ GlobalVariable *Name =
+ createPrivateGlobalForString(M, NameForGlobal,
+ /*AllowMerging*/ true, kAsanGenPrefix);
Type *Ty = G->getValueType();
const uint64_t SizeInBytes = DL.getTypeAllocSize(Ty);
@@ -2545,14 +2291,6 @@ bool ModuleAddressSanitizer::InstrumentGlobals(IRBuilder<> &IRB, Module &M,
G->eraseFromParent();
NewGlobals[i] = NewGlobal;
- Constant *SourceLoc;
- if (!MD.SourceLoc.empty()) {
- auto SourceLocGlobal = createPrivateGlobalForSourceLoc(M, MD.SourceLoc);
- SourceLoc = ConstantExpr::getPointerCast(SourceLocGlobal, IntptrTy);
- } else {
- SourceLoc = ConstantInt::get(IntptrTy, 0);
- }
-
Constant *ODRIndicator = ConstantExpr::getNullValue(IRB.getInt8PtrTy());
GlobalValue *InstrumentedGlobal = NewGlobal;
@@ -2593,10 +2331,12 @@ bool ModuleAddressSanitizer::InstrumentGlobals(IRBuilder<> &IRB, Module &M,
ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize),
ConstantExpr::getPointerCast(Name, IntptrTy),
ConstantExpr::getPointerCast(ModuleName, IntptrTy),
- ConstantInt::get(IntptrTy, MD.IsDynInit), SourceLoc,
+ ConstantInt::get(IntptrTy, MD.IsDynInit),
+ Constant::getNullValue(IntptrTy),
ConstantExpr::getPointerCast(ODRIndicator, IntptrTy));
- if (ClInitializers && MD.IsDynInit) HasDynamicallyInitializedGlobals = true;
+ if (ClInitializers && MD.IsDynInit)
+ HasDynamicallyInitializedGlobals = true;
LLVM_DEBUG(dbgs() << "NEW GLOBAL: " << *NewGlobal << "\n");
@@ -2759,7 +2499,9 @@ void AddressSanitizer::initializeCallbacks(Module &M) {
}
const std::string MemIntrinCallbackPrefix =
- CompileKernel ? std::string("") : ClMemoryAccessCallbackPrefix;
+ (CompileKernel && !ClKasanMemIntrinCallbackPrefix)
+ ? std::string("")
+ : ClMemoryAccessCallbackPrefix;
AsanMemmove = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memmove",
IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
IRB.getInt8PtrTy(), IntptrTy);
@@ -2888,6 +2630,9 @@ bool AddressSanitizer::instrumentFunction(Function &F,
// Leave if the function doesn't need instrumentation.
if (!F.hasFnAttribute(Attribute::SanitizeAddress)) return FunctionModified;
+ if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation))
+ return FunctionModified;
+
LLVM_DEBUG(dbgs() << "ASAN instrumenting:\n" << F << "\n");
initializeCallbacks(*F.getParent());
@@ -2908,7 +2653,6 @@ bool AddressSanitizer::instrumentFunction(Function &F,
SmallVector<Instruction *, 8> NoReturnCalls;
SmallVector<BasicBlock *, 16> AllBlocks;
SmallVector<Instruction *, 16> PointerComparisonsOrSubtracts;
- int NumAllocas = 0;
// Fill the set of memory operations to instrument.
for (auto &BB : F) {
@@ -2917,6 +2661,9 @@ bool AddressSanitizer::instrumentFunction(Function &F,
int NumInsnsPerBB = 0;
for (auto &Inst : BB) {
if (LooksLikeCodeInBug11395(&Inst)) return false;
+ // Skip instructions inserted by another instrumentation.
+ if (Inst.hasMetadata(LLVMContext::MD_nosanitize))
+ continue;
SmallVector<InterestingMemoryOperand, 1> InterestingOperands;
getInterestingMemoryOperands(&Inst, InterestingOperands);
@@ -2948,11 +2695,10 @@ bool AddressSanitizer::instrumentFunction(Function &F,
IntrinToInstrument.push_back(MI);
NumInsnsPerBB++;
} else {
- if (isa<AllocaInst>(Inst)) NumAllocas++;
if (auto *CB = dyn_cast<CallBase>(&Inst)) {
// A call inside BB.
TempsToInstrument.clear();
- if (CB->doesNotReturn() && !CB->hasMetadata("nosanitize"))
+ if (CB->doesNotReturn())
NoReturnCalls.push_back(CB);
}
if (CallInst *CI = dyn_cast<CallInst>(&Inst))
@@ -3347,7 +3093,7 @@ void FunctionStackPoisoner::processStaticAllocas() {
ASanStackVariableDescription D = {AI->getName().data(),
ASan.getAllocaSizeInBytes(*AI),
0,
- AI->getAlignment(),
+ AI->getAlign().value(),
AI,
0,
0};
@@ -3611,7 +3357,7 @@ void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size,
void FunctionStackPoisoner::handleDynamicAllocaCall(AllocaInst *AI) {
IRBuilder<> IRB(AI);
- const uint64_t Alignment = std::max(kAllocaRzSize, AI->getAlignment());
+ const Align Alignment = std::max(Align(kAllocaRzSize), AI->getAlign());
const uint64_t AllocaRedzoneMask = kAllocaRzSize - 1;
Value *Zero = Constant::getNullValue(IntptrTy);
@@ -3642,17 +3388,19 @@ void FunctionStackPoisoner::handleDynamicAllocaCall(AllocaInst *AI) {
// Alignment is added to locate left redzone, PartialPadding for possible
// partial redzone and kAllocaRzSize for right redzone respectively.
Value *AdditionalChunkSize = IRB.CreateAdd(
- ConstantInt::get(IntptrTy, Alignment + kAllocaRzSize), PartialPadding);
+ ConstantInt::get(IntptrTy, Alignment.value() + kAllocaRzSize),
+ PartialPadding);
Value *NewSize = IRB.CreateAdd(OldSize, AdditionalChunkSize);
// Insert new alloca with new NewSize and Alignment params.
AllocaInst *NewAlloca = IRB.CreateAlloca(IRB.getInt8Ty(), NewSize);
- NewAlloca->setAlignment(Align(Alignment));
+ NewAlloca->setAlignment(Alignment);
// NewAddress = Address + Alignment
- Value *NewAddress = IRB.CreateAdd(IRB.CreatePtrToInt(NewAlloca, IntptrTy),
- ConstantInt::get(IntptrTy, Alignment));
+ Value *NewAddress =
+ IRB.CreateAdd(IRB.CreatePtrToInt(NewAlloca, IntptrTy),
+ ConstantInt::get(IntptrTy, Alignment.value()));
// Insert __asan_alloca_poison call for new created alloca.
IRB.CreateCall(AsanAllocaPoisonFunc, {NewAddress, OldSize});
diff --git a/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp b/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
index 4ad07cab001a..1eadafb4e4b4 100644
--- a/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
+++ b/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -19,7 +19,6 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
@@ -29,7 +28,6 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdint>
#include <utility>
@@ -142,6 +140,9 @@ static void insertBoundsCheck(Value *Or, BuilderTy &IRB, GetTrapBBT GetTrapBB) {
static bool addBoundsChecking(Function &F, TargetLibraryInfo &TLI,
ScalarEvolution &SE) {
+ if (F.hasFnAttribute(Attribute::NoSanitizeBounds))
+ return false;
+
const DataLayout &DL = F.getParent()->getDataLayout();
ObjectSizeOpts EvalOpts;
EvalOpts.RoundToAlign = true;
diff --git a/llvm/lib/Transforms/Instrumentation/CGProfile.cpp b/llvm/lib/Transforms/Instrumentation/CGProfile.cpp
index 1a7f7a365ce4..b11b84d65d23 100644
--- a/llvm/lib/Transforms/Instrumentation/CGProfile.cpp
+++ b/llvm/lib/Transforms/Instrumentation/CGProfile.cpp
@@ -13,15 +13,12 @@
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Transforms/Instrumentation.h"
-#include <array>
-
using namespace llvm;
static bool
diff --git a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
index 497aac30c3f6..e5c0705b916e 100644
--- a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
+++ b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
@@ -26,6 +26,7 @@
#include "llvm/IR/CFG.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
@@ -145,27 +146,27 @@ FunctionPass *llvm::createControlHeightReductionLegacyPass() {
namespace {
struct CHRStats {
- CHRStats() : NumBranches(0), NumBranchesDelta(0),
- WeightedNumBranchesDelta(0) {}
+ CHRStats() = default;
void print(raw_ostream &OS) const {
OS << "CHRStats: NumBranches " << NumBranches
<< " NumBranchesDelta " << NumBranchesDelta
<< " WeightedNumBranchesDelta " << WeightedNumBranchesDelta;
}
- uint64_t NumBranches; // The original number of conditional branches /
- // selects
- uint64_t NumBranchesDelta; // The decrease of the number of conditional
- // branches / selects in the hot paths due to CHR.
- uint64_t WeightedNumBranchesDelta; // NumBranchesDelta weighted by the profile
- // count at the scope entry.
+ // The original number of conditional branches / selects
+ uint64_t NumBranches = 0;
+ // The decrease of the number of conditional branches / selects in the hot
+ // paths due to CHR.
+ uint64_t NumBranchesDelta = 0;
+ // NumBranchesDelta weighted by the profile count at the scope entry.
+ uint64_t WeightedNumBranchesDelta = 0;
};
// RegInfo - some properties of a Region.
struct RegInfo {
- RegInfo() : R(nullptr), HasBranch(false) {}
- RegInfo(Region *RegionIn) : R(RegionIn), HasBranch(false) {}
- Region *R;
- bool HasBranch;
+ RegInfo() = default;
+ RegInfo(Region *RegionIn) : R(RegionIn) {}
+ Region *R = nullptr;
+ bool HasBranch = false;
SmallVector<SelectInst *, 8> Selects;
};
@@ -769,9 +770,21 @@ CHRScope * CHR::findScope(Region *R) {
return nullptr;
// If any of the basic blocks have address taken, we must skip this region
// because we cannot clone basic blocks that have address taken.
- for (BasicBlock *BB : R->blocks())
+ for (BasicBlock *BB : R->blocks()) {
if (BB->hasAddressTaken())
return nullptr;
+ // If we encounter llvm.coro.id, skip this region because if the basic block
+ // is cloned, we end up inserting a token type PHI node to the block with
+ // llvm.coro.begin.
+ // FIXME: This could lead to less optimal codegen, because the region is
+ // excluded, it can prevent CHR from merging adjacent regions into bigger
+ // scope and hoisting more branches.
+ for (Instruction &I : *BB)
+ if (auto *II = dyn_cast<IntrinsicInst>(&I))
+ if (II->getIntrinsicID() == Intrinsic::coro_id)
+ return nullptr;
+ }
+
if (Exit) {
// Try to find an if-then block (check if R is an if-then).
// if (cond) {
@@ -1752,7 +1765,7 @@ void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) {
// Create the combined branch condition and constant-fold the branches/selects
// in the hot path.
fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr,
- ProfileCount.getValueOr(0));
+ ProfileCount.value_or(0));
}
// A helper for transformScopes. Clone the blocks in the scope (excluding the
@@ -1949,28 +1962,27 @@ void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope,
// A helper for fixupBranch/fixupSelect. Add a branch condition to the merged
// condition.
void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond,
- Instruction *BranchOrSelect,
- CHRScope *Scope,
- IRBuilder<> &IRB,
- Value *&MergedCondition) {
- if (IsTrueBiased) {
- MergedCondition = IRB.CreateAnd(MergedCondition, Cond);
- } else {
+ Instruction *BranchOrSelect, CHRScope *Scope,
+ IRBuilder<> &IRB, Value *&MergedCondition) {
+ if (!IsTrueBiased) {
// If Cond is an icmp and all users of V except for BranchOrSelect is a
// branch, negate the icmp predicate and swap the branch targets and avoid
// inserting an Xor to negate Cond.
- bool Done = false;
- if (auto *ICmp = dyn_cast<ICmpInst>(Cond))
- if (negateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) {
- MergedCondition = IRB.CreateAnd(MergedCondition, Cond);
- Done = true;
- }
- if (!Done) {
- Value *Negate = IRB.CreateXor(
- ConstantInt::getTrue(F.getContext()), Cond);
- MergedCondition = IRB.CreateAnd(MergedCondition, Negate);
- }
+ auto *ICmp = dyn_cast<ICmpInst>(Cond);
+ if (!ICmp ||
+ !negateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope))
+ Cond = IRB.CreateXor(ConstantInt::getTrue(F.getContext()), Cond);
}
+
+ // Select conditions can be poison, while branching on poison is immediate
+ // undefined behavior. As such, we need to freeze potentially poisonous
+ // conditions derived from selects.
+ if (isa<SelectInst>(BranchOrSelect) &&
+ !isGuaranteedNotToBeUndefOrPoison(Cond))
+ Cond = IRB.CreateFreeze(Cond);
+
+ // Use logical and to avoid propagating poison from later conditions.
+ MergedCondition = IRB.CreateLogicalAnd(MergedCondition, Cond);
}
void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) {
@@ -2080,7 +2092,7 @@ bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) {
RegionInfo &RI = getAnalysis<RegionInfoPass>().getRegionInfo();
std::unique_ptr<OptimizationRemarkEmitter> OwnedORE =
std::make_unique<OptimizationRemarkEmitter>(&F);
- return CHR(F, BFI, DT, PSI, RI, *OwnedORE.get()).run();
+ return CHR(F, BFI, DT, PSI, RI, *OwnedORE).run();
}
namespace llvm {
diff --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index ff3aa14a2a83..6815688827d2 100644
--- a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -66,8 +66,8 @@
#include "llvm/ADT/None.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/iterator.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -84,13 +84,11 @@
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
@@ -112,7 +110,6 @@
#include <cassert>
#include <cstddef>
#include <cstdint>
-#include <iterator>
#include <memory>
#include <set>
#include <string>
@@ -187,6 +184,15 @@ static cl::opt<bool> ClCombineOffsetLabelsOnGEP(
"doing pointer arithmetic."),
cl::Hidden, cl::init(true));
+static cl::list<std::string> ClCombineTaintLookupTables(
+ "dfsan-combine-taint-lookup-table",
+ cl::desc(
+ "When dfsan-combine-offset-labels-on-gep and/or "
+ "dfsan-combine-pointer-labels-on-load are false, this flag can "
+ "be used to re-enable combining offset and/or pointer taint when "
+ "loading specific constant global variables (i.e. lookup tables)."),
+ cl::Hidden);
+
static cl::opt<bool> ClDebugNonzeroLabels(
"dfsan-debug-nonzero-labels",
cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, "
@@ -433,6 +439,7 @@ class DataFlowSanitizer {
FunctionType *DFSanUnionLoadFnTy;
FunctionType *DFSanLoadLabelAndOriginFnTy;
FunctionType *DFSanUnimplementedFnTy;
+ FunctionType *DFSanWrapperExternWeakNullFnTy;
FunctionType *DFSanSetLabelFnTy;
FunctionType *DFSanNonzeroLabelFnTy;
FunctionType *DFSanVarargWrapperFnTy;
@@ -448,6 +455,7 @@ class DataFlowSanitizer {
FunctionCallee DFSanUnionLoadFn;
FunctionCallee DFSanLoadLabelAndOriginFn;
FunctionCallee DFSanUnimplementedFn;
+ FunctionCallee DFSanWrapperExternWeakNullFn;
FunctionCallee DFSanSetLabelFn;
FunctionCallee DFSanNonzeroLabelFn;
FunctionCallee DFSanVarargWrapperFn;
@@ -467,6 +475,7 @@ class DataFlowSanitizer {
DFSanABIList ABIList;
DenseMap<Value *, Function *> UnwrappedFnMap;
AttributeMask ReadOnlyNoneAttrs;
+ StringSet<> CombineTaintLookupTableNames;
/// Memory map parameters used in calculation mapping application addresses
/// to shadow addresses and origin addresses.
@@ -480,14 +489,13 @@ class DataFlowSanitizer {
bool isInstrumented(const Function *F);
bool isInstrumented(const GlobalAlias *GA);
bool isForceZeroLabels(const Function *F);
- FunctionType *getTrampolineFunctionType(FunctionType *T);
TransformedFunction getCustomFunctionType(FunctionType *T);
WrapperKind getWrapperKind(Function *F);
void addGlobalNameSuffix(GlobalValue *GV);
+ void buildExternWeakCheckIfNeeded(IRBuilder<> &IRB, Function *F);
Function *buildWrapperFunction(Function *F, StringRef NewFName,
GlobalValue::LinkageTypes NewFLink,
FunctionType *NewFT);
- Constant *getOrBuildTrampolineFunction(FunctionType *FT, StringRef FName);
void initializeCallbackFunctions(Module &M);
void initializeRuntimeFunctions(Module &M);
void injectMetadataGlobals(Module &M);
@@ -658,6 +666,8 @@ struct DFSanFunction {
// branch instruction using the given conditional expression.
void addConditionalCallbacksIfEnabled(Instruction &I, Value *Condition);
+ bool isLookupTableConstant(Value *P);
+
private:
/// Collapses the shadow with aggregate type into a single primitive shadow
/// value.
@@ -792,25 +802,9 @@ DataFlowSanitizer::DataFlowSanitizer(
// FIXME: should we propagate vfs::FileSystem to this constructor?
ABIList.set(
SpecialCaseList::createOrDie(AllABIListFiles, *vfs::getRealFileSystem()));
-}
-FunctionType *DataFlowSanitizer::getTrampolineFunctionType(FunctionType *T) {
- assert(!T->isVarArg());
- SmallVector<Type *, 4> ArgTypes;
- ArgTypes.push_back(T->getPointerTo());
- ArgTypes.append(T->param_begin(), T->param_end());
- ArgTypes.append(T->getNumParams(), PrimitiveShadowTy);
- Type *RetType = T->getReturnType();
- if (!RetType->isVoidTy())
- ArgTypes.push_back(PrimitiveShadowPtrTy);
-
- if (shouldTrackOrigins()) {
- ArgTypes.append(T->getNumParams(), OriginTy);
- if (!RetType->isVoidTy())
- ArgTypes.push_back(OriginPtrTy);
- }
-
- return FunctionType::get(T->getReturnType(), ArgTypes, false);
+ for (StringRef v : ClCombineTaintLookupTables)
+ CombineTaintLookupTableNames.insert(v);
}
TransformedFunction DataFlowSanitizer::getCustomFunctionType(FunctionType *T) {
@@ -823,16 +817,8 @@ TransformedFunction DataFlowSanitizer::getCustomFunctionType(FunctionType *T) {
std::vector<unsigned> ArgumentIndexMapping;
for (unsigned I = 0, E = T->getNumParams(); I != E; ++I) {
Type *ParamType = T->getParamType(I);
- FunctionType *FT;
- if (isa<PointerType>(ParamType) &&
- (FT = dyn_cast<FunctionType>(ParamType->getPointerElementType()))) {
- ArgumentIndexMapping.push_back(ArgTypes.size());
- ArgTypes.push_back(getTrampolineFunctionType(FT)->getPointerTo());
- ArgTypes.push_back(Type::getInt8PtrTy(*Ctx));
- } else {
- ArgumentIndexMapping.push_back(ArgTypes.size());
- ArgTypes.push_back(ParamType);
- }
+ ArgumentIndexMapping.push_back(ArgTypes.size());
+ ArgTypes.push_back(ParamType);
}
for (unsigned I = 0, E = T->getNumParams(); I != E; ++I)
ArgTypes.push_back(PrimitiveShadowTy);
@@ -1058,6 +1044,10 @@ bool DataFlowSanitizer::initializeModule(Module &M) {
/*isVarArg=*/false);
DFSanUnimplementedFnTy = FunctionType::get(
Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false);
+ Type *DFSanWrapperExternWeakNullArgs[2] = {Int8Ptr, Int8Ptr};
+ DFSanWrapperExternWeakNullFnTy =
+ FunctionType::get(Type::getVoidTy(*Ctx), DFSanWrapperExternWeakNullArgs,
+ /*isVarArg=*/false);
Type *DFSanSetLabelArgs[4] = {PrimitiveShadowTy, OriginTy,
Type::getInt8PtrTy(*Ctx), IntptrTy};
DFSanSetLabelFnTy = FunctionType::get(Type::getVoidTy(*Ctx),
@@ -1149,6 +1139,23 @@ void DataFlowSanitizer::addGlobalNameSuffix(GlobalValue *GV) {
}
}
+void DataFlowSanitizer::buildExternWeakCheckIfNeeded(IRBuilder<> &IRB,
+ Function *F) {
+ // If the function we are wrapping was ExternWeak, it may be null.
+ // The original code before calling this wrapper may have checked for null,
+ // but replacing with a known-to-not-be-null wrapper can break this check.
+ // When replacing uses of the extern weak function with the wrapper we try
+ // to avoid replacing uses in conditionals, but this is not perfect.
+ // In the case where we fail, and accidentially optimize out a null check
+ // for a extern weak function, add a check here to help identify the issue.
+ if (GlobalValue::isExternalWeakLinkage(F->getLinkage())) {
+ std::vector<Value *> Args;
+ Args.push_back(IRB.CreatePointerCast(F, IRB.getInt8PtrTy()));
+ Args.push_back(IRB.CreateGlobalStringPtr(F->getName()));
+ IRB.CreateCall(DFSanWrapperExternWeakNullFn, Args);
+ }
+}
+
Function *
DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName,
GlobalValue::LinkageTypes NewFLink,
@@ -1181,61 +1188,6 @@ DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName,
return NewF;
}
-Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT,
- StringRef FName) {
- FunctionType *FTT = getTrampolineFunctionType(FT);
- FunctionCallee C = Mod->getOrInsertFunction(FName, FTT);
- Function *F = dyn_cast<Function>(C.getCallee());
- if (F && F->isDeclaration()) {
- F->setLinkage(GlobalValue::LinkOnceODRLinkage);
- BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F);
- std::vector<Value *> Args;
- Function::arg_iterator AI = F->arg_begin() + 1;
- for (unsigned N = FT->getNumParams(); N != 0; ++AI, --N)
- Args.push_back(&*AI);
- CallInst *CI = CallInst::Create(FT, &*F->arg_begin(), Args, "", BB);
- Type *RetType = FT->getReturnType();
- ReturnInst *RI = RetType->isVoidTy() ? ReturnInst::Create(*Ctx, BB)
- : ReturnInst::Create(*Ctx, CI, BB);
-
- // F is called by a wrapped custom function with primitive shadows. So
- // its arguments and return value need conversion.
- DFSanFunction DFSF(*this, F, /*IsNativeABI=*/true,
- /*IsForceZeroLabels=*/false);
- Function::arg_iterator ValAI = F->arg_begin(), ShadowAI = AI;
- ++ValAI;
- for (unsigned N = FT->getNumParams(); N != 0; ++ValAI, ++ShadowAI, --N) {
- Value *Shadow =
- DFSF.expandFromPrimitiveShadow(ValAI->getType(), &*ShadowAI, CI);
- DFSF.ValShadowMap[&*ValAI] = Shadow;
- }
- Function::arg_iterator RetShadowAI = ShadowAI;
- const bool ShouldTrackOrigins = shouldTrackOrigins();
- if (ShouldTrackOrigins) {
- ValAI = F->arg_begin();
- ++ValAI;
- Function::arg_iterator OriginAI = ShadowAI;
- if (!RetType->isVoidTy())
- ++OriginAI;
- for (unsigned N = FT->getNumParams(); N != 0; ++ValAI, ++OriginAI, --N) {
- DFSF.ValOriginMap[&*ValAI] = &*OriginAI;
- }
- }
- DFSanVisitor(DFSF).visitCallInst(*CI);
- if (!RetType->isVoidTy()) {
- Value *PrimitiveShadow = DFSF.collapseToPrimitiveShadow(
- DFSF.getShadow(RI->getReturnValue()), RI);
- new StoreInst(PrimitiveShadow, &*RetShadowAI, RI);
- if (ShouldTrackOrigins) {
- Value *Origin = DFSF.getOrigin(RI->getReturnValue());
- new StoreInst(Origin, &*std::prev(F->arg_end()), RI);
- }
- }
- }
-
- return cast<Constant>(C.getCallee());
-}
-
// Initialize DataFlowSanitizer runtime functions and declare them in the module
void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) {
{
@@ -1256,6 +1208,8 @@ void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) {
}
DFSanUnimplementedFn =
Mod->getOrInsertFunction("__dfsan_unimplemented", DFSanUnimplementedFnTy);
+ DFSanWrapperExternWeakNullFn = Mod->getOrInsertFunction(
+ "__dfsan_wrapper_extern_weak_null", DFSanWrapperExternWeakNullFnTy);
{
AttributeList AL;
AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
@@ -1300,6 +1254,8 @@ void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) {
DFSanRuntimeFunctions.insert(
DFSanUnimplementedFn.getCallee()->stripPointerCasts());
DFSanRuntimeFunctions.insert(
+ DFSanWrapperExternWeakNullFn.getCallee()->stripPointerCasts());
+ DFSanRuntimeFunctions.insert(
DFSanSetLabelFn.getCallee()->stripPointerCasts());
DFSanRuntimeFunctions.insert(
DFSanNonzeroLabelFn.getCallee()->stripPointerCasts());
@@ -1500,7 +1456,40 @@ bool DataFlowSanitizer::runImpl(Module &M) {
Value *WrappedFnCst =
ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT));
- F.replaceAllUsesWith(WrappedFnCst);
+
+ // Extern weak functions can sometimes be null at execution time.
+ // Code will sometimes check if an extern weak function is null.
+ // This could look something like:
+ // declare extern_weak i8 @my_func(i8)
+ // br i1 icmp ne (i8 (i8)* @my_func, i8 (i8)* null), label %use_my_func,
+ // label %avoid_my_func
+ // The @"dfsw$my_func" wrapper is never null, so if we replace this use
+ // in the comparision, the icmp will simplify to false and we have
+ // accidentially optimized away a null check that is necessary.
+ // This can lead to a crash when the null extern_weak my_func is called.
+ //
+ // To prevent (the most common pattern of) this problem,
+ // do not replace uses in comparisons with the wrapper.
+ // We definitely want to replace uses in call instructions.
+ // Other uses (e.g. store the function address somewhere) might be
+ // called or compared or both - this case may not be handled correctly.
+ // We will default to replacing with wrapper in cases we are unsure.
+ auto IsNotCmpUse = [](Use &U) -> bool {
+ User *Usr = U.getUser();
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Usr)) {
+ // This is the most common case for icmp ne null
+ if (CE->getOpcode() == Instruction::ICmp) {
+ return false;
+ }
+ }
+ if (Instruction *I = dyn_cast<Instruction>(Usr)) {
+ if (I->getOpcode() == Instruction::ICmp) {
+ return false;
+ }
+ }
+ return true;
+ };
+ F.replaceUsesWithIf(WrappedFnCst, IsNotCmpUse);
UnwrappedFnMap[WrappedFnCst] = &F;
*FI = NewF;
@@ -1919,6 +1908,14 @@ Align DFSanFunction::getOriginAlign(Align InstAlignment) {
return Align(std::max(MinOriginAlignment, Alignment));
}
+bool DFSanFunction::isLookupTableConstant(Value *P) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P->stripPointerCasts()))
+ if (GV->isConstant() && GV->hasName())
+ return DFS.CombineTaintLookupTableNames.count(GV->getName());
+
+ return false;
+}
+
bool DFSanFunction::useCallbackLoadLabelAndOrigin(uint64_t Size,
Align InstAlignment) {
// When enabling tracking load instructions, we always use
@@ -2172,6 +2169,29 @@ static AtomicOrdering addAcquireOrdering(AtomicOrdering AO) {
llvm_unreachable("Unknown ordering");
}
+Value *StripPointerGEPsAndCasts(Value *V) {
+ if (!V->getType()->isPointerTy())
+ return V;
+
+ // DFSan pass should be running on valid IR, but we'll
+ // keep a seen set to ensure there are no issues.
+ SmallPtrSet<const Value *, 4> Visited;
+ Visited.insert(V);
+ do {
+ if (auto *GEP = dyn_cast<GEPOperator>(V)) {
+ V = GEP->getPointerOperand();
+ } else if (Operator::getOpcode(V) == Instruction::BitCast) {
+ V = cast<Operator>(V)->getOperand(0);
+ if (!V->getType()->isPointerTy())
+ return V;
+ } else if (isa<GlobalAlias>(V)) {
+ V = cast<GlobalAlias>(V)->getAliasee();
+ }
+ } while (Visited.insert(V).second);
+
+ return V;
+}
+
void DFSanVisitor::visitLoadInst(LoadInst &LI) {
auto &DL = LI.getModule()->getDataLayout();
uint64_t Size = DL.getTypeStoreSize(LI.getType());
@@ -2200,7 +2220,9 @@ void DFSanVisitor::visitLoadInst(LoadInst &LI) {
Shadows.push_back(PrimitiveShadow);
Origins.push_back(Origin);
}
- if (ClCombinePointerLabelsOnLoad) {
+ if (ClCombinePointerLabelsOnLoad ||
+ DFSF.isLookupTableConstant(
+ StripPointerGEPsAndCasts(LI.getPointerOperand()))) {
Value *PtrShadow = DFSF.getShadow(LI.getPointerOperand());
PrimitiveShadow = DFSF.combineShadows(PrimitiveShadow, PtrShadow, Pos);
if (ShouldTrackOrigins) {
@@ -2562,7 +2584,9 @@ void DFSanVisitor::visitLandingPadInst(LandingPadInst &LPI) {
}
void DFSanVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
- if (ClCombineOffsetLabelsOnGEP) {
+ if (ClCombineOffsetLabelsOnGEP ||
+ DFSF.isLookupTableConstant(
+ StripPointerGEPsAndCasts(GEPI.getPointerOperand()))) {
visitInstOperands(GEPI);
return;
}
@@ -2722,13 +2746,8 @@ void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) {
auto *MTI = cast<MemTransferInst>(
IRB.CreateCall(I.getFunctionType(), I.getCalledOperand(),
{DestShadow, SrcShadow, LenShadow, I.getVolatileCst()}));
- if (ClPreserveAlignment) {
- MTI->setDestAlignment(I.getDestAlign() * DFSF.DFS.ShadowWidthBytes);
- MTI->setSourceAlignment(I.getSourceAlign() * DFSF.DFS.ShadowWidthBytes);
- } else {
- MTI->setDestAlignment(Align(DFSF.DFS.ShadowWidthBytes));
- MTI->setSourceAlignment(Align(DFSF.DFS.ShadowWidthBytes));
- }
+ MTI->setDestAlignment(DFSF.getShadowAlign(I.getDestAlign().valueOrOne()));
+ MTI->setSourceAlignment(DFSF.getShadowAlign(I.getSourceAlign().valueOrOne()));
if (ClEventCallbacks) {
IRB.CreateCall(DFSF.DFS.DFSanMemTransferCallbackFn,
{RawDestShadow,
@@ -2864,16 +2883,19 @@ bool DFSanVisitor::visitWrappedCallBase(Function &F, CallBase &CB) {
CB.setCalledFunction(&F);
IRB.CreateCall(DFSF.DFS.DFSanUnimplementedFn,
IRB.CreateGlobalStringPtr(F.getName()));
+ DFSF.DFS.buildExternWeakCheckIfNeeded(IRB, &F);
DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));
DFSF.setOrigin(&CB, DFSF.DFS.ZeroOrigin);
return true;
case DataFlowSanitizer::WK_Discard:
CB.setCalledFunction(&F);
+ DFSF.DFS.buildExternWeakCheckIfNeeded(IRB, &F);
DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));
DFSF.setOrigin(&CB, DFSF.DFS.ZeroOrigin);
return true;
case DataFlowSanitizer::WK_Functional:
CB.setCalledFunction(&F);
+ DFSF.DFS.buildExternWeakCheckIfNeeded(IRB, &F);
visitInstOperands(CB);
return true;
case DataFlowSanitizer::WK_Custom:
@@ -2905,22 +2927,7 @@ bool DFSanVisitor::visitWrappedCallBase(Function &F, CallBase &CB) {
// Adds non-variable arguments.
auto *I = CB.arg_begin();
for (unsigned N = FT->getNumParams(); N != 0; ++I, --N) {
- Type *T = (*I)->getType();
- FunctionType *ParamFT;
- if (isa<PointerType>(T) &&
- (ParamFT = dyn_cast<FunctionType>(T->getPointerElementType()))) {
- std::string TName = "dfst";
- TName += utostr(FT->getNumParams() - N);
- TName += "$";
- TName += F.getName();
- Constant *Trampoline =
- DFSF.DFS.getOrBuildTrampolineFunction(ParamFT, TName);
- Args.push_back(Trampoline);
- Args.push_back(
- IRB.CreateBitCast(*I, Type::getInt8PtrTy(*DFSF.DFS.Ctx)));
- } else {
- Args.push_back(*I);
- }
+ Args.push_back(*I);
}
// Adds shadow arguments.
diff --git a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index 325089fc4402..ac4a1fd6bb7e 100644
--- a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -14,19 +14,15 @@
//===----------------------------------------------------------------------===//
#include "CFGMST.h"
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Sequence.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/IR/CFG.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/IRBuilder.h"
@@ -34,8 +30,6 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/CRC.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -87,7 +81,7 @@ GCOVOptions GCOVOptions::getDefault() {
if (DefaultGCOVVersion.size() != 4) {
llvm::report_fatal_error(Twine("Invalid -default-gcov-version: ") +
- DefaultGCOVVersion);
+ DefaultGCOVVersion, /*GenCrashDiag=*/false);
}
memcpy(Options.Version, DefaultGCOVVersion.c_str(), 4);
return Options;
@@ -169,39 +163,6 @@ private:
StringMap<bool> InstrumentedFiles;
};
-class GCOVProfilerLegacyPass : public ModulePass {
-public:
- static char ID;
- GCOVProfilerLegacyPass()
- : GCOVProfilerLegacyPass(GCOVOptions::getDefault()) {}
- GCOVProfilerLegacyPass(const GCOVOptions &Opts)
- : ModulePass(ID), Profiler(Opts) {
- initializeGCOVProfilerLegacyPassPass(*PassRegistry::getPassRegistry());
- }
- StringRef getPassName() const override { return "GCOV Profiler"; }
-
- bool runOnModule(Module &M) override {
- auto GetBFI = [this](Function &F) {
- return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
- };
- auto GetBPI = [this](Function &F) {
- return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI();
- };
- auto GetTLI = [this](Function &F) -> const TargetLibraryInfo & {
- return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- };
- return Profiler.runOnModule(M, GetBFI, GetBPI, GetTLI);
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<BlockFrequencyInfoWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- }
-
-private:
- GCOVProfiler Profiler;
-};
-
struct BBInfo {
BBInfo *Group;
uint32_t Index;
@@ -237,21 +198,6 @@ struct Edge {
};
}
-char GCOVProfilerLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(
- GCOVProfilerLegacyPass, "insert-gcov-profiling",
- "Insert instrumentation for GCOV profiling", false, false)
-INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(
- GCOVProfilerLegacyPass, "insert-gcov-profiling",
- "Insert instrumentation for GCOV profiling", false, false)
-
-ModulePass *llvm::createGCOVProfilerPass(const GCOVOptions &Options) {
- return new GCOVProfilerLegacyPass(Options);
-}
-
static StringRef getFunctionName(const DISubprogram *SP) {
if (!SP->getLinkageName().empty())
return SP->getLinkageName();
@@ -862,7 +808,8 @@ bool GCOVProfiler::emitProfileNotes(
// Split indirectbr critical edges here before computing the MST rather
// than later in getInstrBB() to avoid invalidating it.
- SplitIndirectBrCriticalEdges(F, BPI, BFI);
+ SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI,
+ BFI);
CFGMST<Edge, BBInfo> MST(F, /*InstrumentFuncEntry_=*/false, BPI, BFI);
diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index 7b3741d19a1b..218b4bbfb6c0 100644
--- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -13,14 +13,15 @@
#include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
-#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/StackSafetyAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
@@ -33,7 +34,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InlineAsm.h"
-#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -43,19 +44,15 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
-#include "llvm/PassRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/MemoryTaggingSupport.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
-#include <sstream>
using namespace llvm;
@@ -83,6 +80,11 @@ static cl::opt<std::string>
cl::desc("Prefix for memory access callbacks"),
cl::Hidden, cl::init("__hwasan_"));
+static cl::opt<bool> ClKasanMemIntrinCallbackPrefix(
+ "hwasan-kernel-mem-intrinsic-prefix",
+ cl::desc("Use prefix for memory intrinsics in KASAN mode"), cl::Hidden,
+ cl::init(false));
+
static cl::opt<bool> ClInstrumentWithCalls(
"hwasan-instrument-with-calls",
cl::desc("instrument reads and writes with callbacks"), cl::Hidden,
@@ -145,7 +147,7 @@ static cl::opt<bool> ClGenerateTagsWithCalls(
cl::init(false));
static cl::opt<bool> ClGlobals("hwasan-globals", cl::desc("Instrument globals"),
- cl::Hidden, cl::init(false), cl::ZeroOrMore);
+ cl::Hidden, cl::init(false));
static cl::opt<int> ClMatchAllTag(
"hwasan-match-all-tag",
@@ -191,17 +193,16 @@ static cl::opt<bool>
static cl::opt<bool>
ClInstrumentLandingPads("hwasan-instrument-landing-pads",
cl::desc("instrument landing pads"), cl::Hidden,
- cl::init(false), cl::ZeroOrMore);
+ cl::init(false));
static cl::opt<bool> ClUseShortGranules(
"hwasan-use-short-granules",
cl::desc("use short granules in allocas and outlined checks"), cl::Hidden,
- cl::init(false), cl::ZeroOrMore);
+ cl::init(false));
static cl::opt<bool> ClInstrumentPersonalityFunctions(
"hwasan-instrument-personality-functions",
- cl::desc("instrument personality functions"), cl::Hidden, cl::init(false),
- cl::ZeroOrMore);
+ cl::desc("instrument personality functions"), cl::Hidden);
static cl::opt<bool> ClInlineAllChecks("hwasan-inline-all-checks",
cl::desc("inline all checks"),
@@ -244,13 +245,6 @@ bool shouldDetectUseAfterScope(const Triple &TargetTriple) {
/// An instrumentation pass implementing detection of addressability bugs
/// using tagged pointers.
class HWAddressSanitizer {
-private:
- struct AllocaInfo {
- AllocaInst *AI;
- SmallVector<IntrinsicInst *, 2> LifetimeStart;
- SmallVector<IntrinsicInst *, 2> LifetimeEnd;
- };
-
public:
HWAddressSanitizer(Module &M, bool CompileKernel, bool Recover,
const StackSafetyGlobalInfo *SSI)
@@ -265,11 +259,7 @@ public:
void setSSI(const StackSafetyGlobalInfo *S) { SSI = S; }
- DenseMap<AllocaInst *, AllocaInst *> padInterestingAllocas(
- const MapVector<AllocaInst *, AllocaInfo> &AllocasToInstrument);
- bool sanitizeFunction(Function &F,
- llvm::function_ref<const DominatorTree &()> GetDT,
- llvm::function_ref<const PostDominatorTree &()> GetPDT);
+ bool sanitizeFunction(Function &F, FunctionAnalysisManager &FAM);
void initializeModule();
void createHwasanCtorComdat();
@@ -301,16 +291,9 @@ public:
void tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag, size_t Size);
Value *tagPointer(IRBuilder<> &IRB, Type *Ty, Value *PtrLong, Value *Tag);
Value *untagPointer(IRBuilder<> &IRB, Value *PtrLong);
- static bool isStandardLifetime(const AllocaInfo &AllocaInfo,
- const DominatorTree &DT);
- bool instrumentStack(
- bool ShouldDetectUseAfterScope,
- MapVector<AllocaInst *, AllocaInfo> &AllocasToInstrument,
- SmallVector<Instruction *, 4> &UnrecognizedLifetimes,
- DenseMap<AllocaInst *, std::vector<DbgVariableIntrinsic *>> &AllocaDbgMap,
- SmallVectorImpl<Instruction *> &RetVec, Value *StackTag,
- llvm::function_ref<const DominatorTree &()> GetDT,
- llvm::function_ref<const PostDominatorTree &()> GetPDT);
+ bool instrumentStack(memtag::StackInfo &Info, Value *StackTag,
+ const DominatorTree &DT, const PostDominatorTree &PDT,
+ const LoopInfo &LI);
Value *readRegister(IRBuilder<> &IRB, StringRef Name);
bool instrumentLandingPads(SmallVectorImpl<Instruction *> &RetVec);
Value *getNextTagWithCall(IRBuilder<> &IRB);
@@ -328,6 +311,9 @@ public:
void instrumentGlobal(GlobalVariable *GV, uint8_t Tag);
void instrumentGlobals();
+ Value *getPC(IRBuilder<> &IRB);
+ Value *getSP(IRBuilder<> &IRB);
+
void instrumentPersonalityFunctions();
private:
@@ -397,96 +383,12 @@ private:
Value *ShadowBase = nullptr;
Value *StackBaseTag = nullptr;
+ Value *CachedSP = nullptr;
GlobalValue *ThreadPtrGlobal = nullptr;
};
-class HWAddressSanitizerLegacyPass : public FunctionPass {
-public:
- // Pass identification, replacement for typeid.
- static char ID;
-
- explicit HWAddressSanitizerLegacyPass(bool CompileKernel = false,
- bool Recover = false,
- bool DisableOptimization = false)
- : FunctionPass(ID), CompileKernel(CompileKernel), Recover(Recover),
- DisableOptimization(DisableOptimization) {
- initializeHWAddressSanitizerLegacyPassPass(
- *PassRegistry::getPassRegistry());
- }
-
- StringRef getPassName() const override { return "HWAddressSanitizer"; }
-
- bool doInitialization(Module &M) override {
- HWASan = std::make_unique<HWAddressSanitizer>(M, CompileKernel, Recover,
- /*SSI=*/nullptr);
- return true;
- }
-
- bool runOnFunction(Function &F) override {
- auto TargetTriple = Triple(F.getParent()->getTargetTriple());
- if (shouldUseStackSafetyAnalysis(TargetTriple, DisableOptimization)) {
- // We cannot call getAnalysis in doInitialization, that would cause a
- // crash as the required analyses are not initialized yet.
- HWASan->setSSI(
- &getAnalysis<StackSafetyGlobalInfoWrapperPass>().getResult());
- }
- return HWASan->sanitizeFunction(
- F,
- [&]() -> const DominatorTree & {
- return getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- },
- [&]() -> const PostDominatorTree & {
- return getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
- });
- }
-
- bool doFinalization(Module &M) override {
- HWASan.reset();
- return false;
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- // This is an over-estimation of, in case we are building for an
- // architecture that doesn't allow stack tagging we will still load the
- // analysis.
- // This is so we don't need to plumb TargetTriple all the way to here.
- if (mightUseStackSafetyAnalysis(DisableOptimization))
- AU.addRequired<StackSafetyGlobalInfoWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<PostDominatorTreeWrapperPass>();
- }
-
-private:
- std::unique_ptr<HWAddressSanitizer> HWASan;
- bool CompileKernel;
- bool Recover;
- bool DisableOptimization;
-};
-
} // end anonymous namespace
-char HWAddressSanitizerLegacyPass::ID = 0;
-
-INITIALIZE_PASS_BEGIN(
- HWAddressSanitizerLegacyPass, "hwasan",
- "HWAddressSanitizer: detect memory bugs using tagged addressing.", false,
- false)
-INITIALIZE_PASS_DEPENDENCY(StackSafetyGlobalInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
-INITIALIZE_PASS_END(
- HWAddressSanitizerLegacyPass, "hwasan",
- "HWAddressSanitizer: detect memory bugs using tagged addressing.", false,
- false)
-
-FunctionPass *
-llvm::createHWAddressSanitizerLegacyPassPass(bool CompileKernel, bool Recover,
- bool DisableOptimization) {
- assert(!CompileKernel || Recover);
- return new HWAddressSanitizerLegacyPass(CompileKernel, Recover,
- DisableOptimization);
-}
-
PreservedAnalyses HWAddressSanitizerPass::run(Module &M,
ModuleAnalysisManager &MAM) {
const StackSafetyGlobalInfo *SSI = nullptr;
@@ -497,16 +399,8 @@ PreservedAnalyses HWAddressSanitizerPass::run(Module &M,
HWAddressSanitizer HWASan(M, Options.CompileKernel, Options.Recover, SSI);
bool Modified = false;
auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
- for (Function &F : M) {
- Modified |= HWASan.sanitizeFunction(
- F,
- [&]() -> const DominatorTree & {
- return FAM.getResult<DominatorTreeAnalysis>(F);
- },
- [&]() -> const PostDominatorTree & {
- return FAM.getResult<PostDominatorTreeAnalysis>(F);
- });
- }
+ for (Function &F : M)
+ Modified |= HWASan.sanitizeFunction(F, FAM);
if (Modified)
return PreservedAnalyses::none();
return PreservedAnalyses::all();
@@ -739,7 +633,9 @@ void HWAddressSanitizer::initializeCallbacks(Module &M) {
ArrayType::get(IRB.getInt8Ty(), 0));
const std::string MemIntrinCallbackPrefix =
- CompileKernel ? std::string("") : ClMemoryAccessCallbackPrefix;
+ (CompileKernel && !ClKasanMemIntrinCallbackPrefix)
+ ? std::string("")
+ : ClMemoryAccessCallbackPrefix;
HWAsanMemmove = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memmove",
IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
IRB.getInt8PtrTy(), IntptrTy);
@@ -812,7 +708,7 @@ bool HWAddressSanitizer::ignoreAccess(Instruction *Inst, Value *Ptr) {
void HWAddressSanitizer::getInterestingMemoryOperands(
Instruction *I, SmallVectorImpl<InterestingMemoryOperand> &Interesting) {
// Skip memory accesses inserted by another instrumentation.
- if (I->hasMetadata("nosanitize"))
+ if (I->hasMetadata(LLVMContext::MD_nosanitize))
return;
// Do not instrument the load fetching the dynamic shadow address.
@@ -1056,18 +952,6 @@ bool HWAddressSanitizer::instrumentMemAccess(InterestingMemoryOperand &O) {
return true;
}
-static uint64_t getAllocaSizeInBytes(const AllocaInst &AI) {
- uint64_t ArraySize = 1;
- if (AI.isArrayAllocation()) {
- const ConstantInt *CI = dyn_cast<ConstantInt>(AI.getArraySize());
- assert(CI && "non-constant array size");
- ArraySize = CI->getZExtValue();
- }
- Type *Ty = AI.getAllocatedType();
- uint64_t SizeInBytes = AI.getModule()->getDataLayout().getTypeAllocSize(Ty);
- return SizeInBytes * ArraySize;
-}
-
void HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag,
size_t Size) {
size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
@@ -1141,19 +1025,10 @@ Value *HWAddressSanitizer::getStackBaseTag(IRBuilder<> &IRB) {
return getNextTagWithCall(IRB);
if (StackBaseTag)
return StackBaseTag;
- // FIXME: use addressofreturnaddress (but implement it in aarch64 backend
- // first).
- Module *M = IRB.GetInsertBlock()->getParent()->getParent();
- auto GetStackPointerFn = Intrinsic::getDeclaration(
- M, Intrinsic::frameaddress,
- IRB.getInt8PtrTy(M->getDataLayout().getAllocaAddrSpace()));
- Value *StackPointer = IRB.CreateCall(
- GetStackPointerFn, {Constant::getNullValue(IRB.getInt32Ty())});
-
// Extract some entropy from the stack pointer for the tags.
// Take bits 20..28 (ASLR entropy) and xor with bits 0..8 (these differ
// between functions).
- Value *StackPointerLong = IRB.CreatePointerCast(StackPointer, IntptrTy);
+ Value *StackPointerLong = getSP(IRB);
Value *StackTag =
applyTagMask(IRB, IRB.CreateXor(StackPointerLong,
IRB.CreateLShr(StackPointerLong, 20)));
@@ -1233,6 +1108,30 @@ Value *HWAddressSanitizer::getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty) {
return nullptr;
}
+Value *HWAddressSanitizer::getPC(IRBuilder<> &IRB) {
+ if (TargetTriple.getArch() == Triple::aarch64)
+ return readRegister(IRB, "pc");
+ else
+ return IRB.CreatePtrToInt(IRB.GetInsertBlock()->getParent(), IntptrTy);
+}
+
+Value *HWAddressSanitizer::getSP(IRBuilder<> &IRB) {
+ if (!CachedSP) {
+ // FIXME: use addressofreturnaddress (but implement it in aarch64 backend
+ // first).
+ Function *F = IRB.GetInsertBlock()->getParent();
+ Module *M = F->getParent();
+ auto GetStackPointerFn = Intrinsic::getDeclaration(
+ M, Intrinsic::frameaddress,
+ IRB.getInt8PtrTy(M->getDataLayout().getAllocaAddrSpace()));
+ CachedSP = IRB.CreatePtrToInt(
+ IRB.CreateCall(GetStackPointerFn,
+ {Constant::getNullValue(IRB.getInt32Ty())}),
+ IntptrTy);
+ }
+ return CachedSP;
+}
+
void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) {
if (!Mapping.InTls)
ShadowBase = getShadowNonTls(IRB);
@@ -1251,23 +1150,12 @@ void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) {
TargetTriple.isAArch64() ? ThreadLong : untagPointer(IRB, ThreadLong);
if (WithFrameRecord) {
- Function *F = IRB.GetInsertBlock()->getParent();
StackBaseTag = IRB.CreateAShr(ThreadLong, 3);
// Prepare ring buffer data.
- Value *PC;
- if (TargetTriple.getArch() == Triple::aarch64)
- PC = readRegister(IRB, "pc");
- else
- PC = IRB.CreatePtrToInt(F, IntptrTy);
- Module *M = F->getParent();
- auto GetStackPointerFn = Intrinsic::getDeclaration(
- M, Intrinsic::frameaddress,
- IRB.getInt8PtrTy(M->getDataLayout().getAllocaAddrSpace()));
- Value *SP = IRB.CreatePtrToInt(
- IRB.CreateCall(GetStackPointerFn,
- {Constant::getNullValue(IRB.getInt32Ty())}),
- IntptrTy);
+ Value *PC = getPC(IRB);
+ Value *SP = getSP(IRB);
+
// Mix SP and PC.
// Assumptions:
// PC is 0x0000PPPPPPPPPPPP (48 bits are meaningful, others are zero)
@@ -1330,43 +1218,16 @@ bool HWAddressSanitizer::instrumentLandingPads(
return true;
}
-static bool
-maybeReachableFromEachOther(const SmallVectorImpl<IntrinsicInst *> &Insts,
- const DominatorTree &DT) {
- // If we have too many lifetime ends, give up, as the algorithm below is N^2.
- if (Insts.size() > ClMaxLifetimes)
- return true;
- for (size_t I = 0; I < Insts.size(); ++I) {
- for (size_t J = 0; J < Insts.size(); ++J) {
- if (I == J)
- continue;
- if (isPotentiallyReachable(Insts[I], Insts[J], nullptr, &DT))
- return true;
- }
- }
- return false;
-}
-
-// static
-bool HWAddressSanitizer::isStandardLifetime(const AllocaInfo &AllocaInfo,
- const DominatorTree &DT) {
- // An alloca that has exactly one start and end in every possible execution.
- // If it has multiple ends, they have to be unreachable from each other, so
- // at most one of them is actually used for each execution of the function.
- return AllocaInfo.LifetimeStart.size() == 1 &&
- (AllocaInfo.LifetimeEnd.size() == 1 ||
- (AllocaInfo.LifetimeEnd.size() > 0 &&
- !maybeReachableFromEachOther(AllocaInfo.LifetimeEnd, DT)));
+static bool isLifetimeIntrinsic(Value *V) {
+ auto *II = dyn_cast<IntrinsicInst>(V);
+ return II && II->isLifetimeStartOrEnd();
}
-bool HWAddressSanitizer::instrumentStack(
- bool ShouldDetectUseAfterScope,
- MapVector<AllocaInst *, AllocaInfo> &AllocasToInstrument,
- SmallVector<Instruction *, 4> &UnrecognizedLifetimes,
- DenseMap<AllocaInst *, std::vector<DbgVariableIntrinsic *>> &AllocaDbgMap,
- SmallVectorImpl<Instruction *> &RetVec, Value *StackTag,
- llvm::function_ref<const DominatorTree &()> GetDT,
- llvm::function_ref<const PostDominatorTree &()> GetPDT) {
+bool HWAddressSanitizer::instrumentStack(memtag::StackInfo &SInfo,
+ Value *StackTag,
+ const DominatorTree &DT,
+ const PostDominatorTree &PDT,
+ const LoopInfo &LI) {
// Ideally, we want to calculate tagged stack base pointer, and rewrite all
// alloca addresses using that. Unfortunately, offsets are not known yet
// (unless we use ASan-style mega-alloca). Instead we keep the base tag in a
@@ -1374,10 +1235,10 @@ bool HWAddressSanitizer::instrumentStack(
// This generates one extra instruction per alloca use.
unsigned int I = 0;
- for (auto &KV : AllocasToInstrument) {
+ for (auto &KV : SInfo.AllocasToInstrument) {
auto N = I++;
auto *AI = KV.first;
- AllocaInfo &Info = KV.second;
+ memtag::AllocaInfo &Info = KV.second;
IRBuilder<> IRB(AI->getNextNode());
// Replace uses of the alloca with tagged address.
@@ -1388,10 +1249,34 @@ bool HWAddressSanitizer::instrumentStack(
AI->hasName() ? AI->getName().str() : "alloca." + itostr(N);
Replacement->setName(Name + ".hwasan");
- AI->replaceUsesWithIf(Replacement,
- [AILong](Use &U) { return U.getUser() != AILong; });
+ size_t Size = memtag::getAllocaSizeInBytes(*AI);
+ size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
+
+ Value *AICast = IRB.CreatePointerCast(AI, Int8PtrTy);
+
+ auto HandleLifetime = [&](IntrinsicInst *II) {
+ // Set the lifetime intrinsic to cover the whole alloca. This reduces the
+ // set of assumptions we need to make about the lifetime. Without this we
+ // would need to ensure that we can track the lifetime pointer to a
+ // constant offset from the alloca, and would still need to change the
+ // size to include the extra alignment we use for the untagging to make
+ // the size consistent.
+ //
+ // The check for standard lifetime below makes sure that we have exactly
+ // one set of start / end in any execution (i.e. the ends are not
+ // reachable from each other), so this will not cause any problems.
+ II->setArgOperand(0, ConstantInt::get(Int64Ty, AlignedSize));
+ II->setArgOperand(1, AICast);
+ };
+ llvm::for_each(Info.LifetimeStart, HandleLifetime);
+ llvm::for_each(Info.LifetimeEnd, HandleLifetime);
- for (auto *DDI : AllocaDbgMap.lookup(AI)) {
+ AI->replaceUsesWithIf(Replacement, [AICast, AILong](Use &U) {
+ auto *User = U.getUser();
+ return User != AILong && User != AICast && !isLifetimeIntrinsic(User);
+ });
+
+ for (auto *DDI : Info.DbgVariableIntrinsics) {
// Prepend "tag_offset, N" to the dwarf expression.
// Tag offset logically applies to the alloca pointer, and it makes sense
// to put it at the beginning of the expression.
@@ -1403,37 +1288,47 @@ bool HWAddressSanitizer::instrumentStack(
NewOps, LocNo));
}
- size_t Size = getAllocaSizeInBytes(*AI);
- size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
auto TagEnd = [&](Instruction *Node) {
IRB.SetInsertPoint(Node);
Value *UARTag = getUARTag(IRB, StackTag);
+ // When untagging, use the `AlignedSize` because we need to set the tags
+ // for the entire alloca to zero. If we used `Size` here, we would
+ // keep the last granule tagged, and store zero in the last byte of the
+ // last granule, due to how short granules are implemented.
tagAlloca(IRB, AI, UARTag, AlignedSize);
};
+ // Calls to functions that may return twice (e.g. setjmp) confuse the
+ // postdominator analysis, and will leave us to keep memory tagged after
+ // function return. Work around this by always untagging at every return
+ // statement if return_twice functions are called.
bool StandardLifetime =
- UnrecognizedLifetimes.empty() && isStandardLifetime(Info, GetDT());
- if (ShouldDetectUseAfterScope && StandardLifetime) {
+ SInfo.UnrecognizedLifetimes.empty() &&
+ memtag::isStandardLifetime(Info.LifetimeStart, Info.LifetimeEnd, &DT,
+ &LI, ClMaxLifetimes) &&
+ !SInfo.CallsReturnTwice;
+ if (DetectUseAfterScope && StandardLifetime) {
IntrinsicInst *Start = Info.LifetimeStart[0];
IRB.SetInsertPoint(Start->getNextNode());
tagAlloca(IRB, AI, Tag, Size);
- if (!forAllReachableExits(GetDT(), GetPDT(), Start, Info.LifetimeEnd,
- RetVec, TagEnd)) {
+ if (!memtag::forAllReachableExits(DT, PDT, LI, Start, Info.LifetimeEnd,
+ SInfo.RetVec, TagEnd)) {
for (auto *End : Info.LifetimeEnd)
End->eraseFromParent();
}
} else {
tagAlloca(IRB, AI, Tag, Size);
- for (auto *RI : RetVec)
+ for (auto *RI : SInfo.RetVec)
TagEnd(RI);
- if (!StandardLifetime) {
- for (auto &II : Info.LifetimeStart)
- II->eraseFromParent();
- for (auto &II : Info.LifetimeEnd)
- II->eraseFromParent();
- }
+ // We inserted tagging outside of the lifetimes, so we have to remove
+ // them.
+ for (auto &II : Info.LifetimeStart)
+ II->eraseFromParent();
+ for (auto &II : Info.LifetimeEnd)
+ II->eraseFromParent();
}
+ memtag::alignAndPadAlloca(Info, Align(Mapping.getObjectAlignment()));
}
- for (auto &I : UnrecognizedLifetimes)
+ for (auto &I : SInfo.UnrecognizedLifetimes)
I->eraseFromParent();
return true;
}
@@ -1443,7 +1338,7 @@ bool HWAddressSanitizer::isInterestingAlloca(const AllocaInst &AI) {
// FIXME: instrument dynamic allocas, too
AI.isStaticAlloca() &&
// alloca() may be called with 0 size, ignore it.
- getAllocaSizeInBytes(AI) > 0 &&
+ memtag::getAllocaSizeInBytes(AI) > 0 &&
// We are only interested in allocas not promotable to registers.
// Promotable allocas are common under -O0.
!isAllocaPromotable(&AI) &&
@@ -1456,42 +1351,8 @@ bool HWAddressSanitizer::isInterestingAlloca(const AllocaInst &AI) {
!(SSI && SSI->isSafe(AI));
}
-DenseMap<AllocaInst *, AllocaInst *> HWAddressSanitizer::padInterestingAllocas(
- const MapVector<AllocaInst *, AllocaInfo> &AllocasToInstrument) {
- DenseMap<AllocaInst *, AllocaInst *> AllocaToPaddedAllocaMap;
- for (auto &KV : AllocasToInstrument) {
- AllocaInst *AI = KV.first;
- uint64_t Size = getAllocaSizeInBytes(*AI);
- uint64_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
- AI->setAlignment(
- Align(std::max(AI->getAlignment(), Mapping.getObjectAlignment())));
- if (Size != AlignedSize) {
- Type *AllocatedType = AI->getAllocatedType();
- if (AI->isArrayAllocation()) {
- uint64_t ArraySize =
- cast<ConstantInt>(AI->getArraySize())->getZExtValue();
- AllocatedType = ArrayType::get(AllocatedType, ArraySize);
- }
- Type *TypeWithPadding = StructType::get(
- AllocatedType, ArrayType::get(Int8Ty, AlignedSize - Size));
- auto *NewAI = new AllocaInst(
- TypeWithPadding, AI->getType()->getAddressSpace(), nullptr, "", AI);
- NewAI->takeName(AI);
- NewAI->setAlignment(AI->getAlign());
- NewAI->setUsedWithInAlloca(AI->isUsedWithInAlloca());
- NewAI->setSwiftError(AI->isSwiftError());
- NewAI->copyMetadata(*AI);
- auto *Bitcast = new BitCastInst(NewAI, AI->getType(), "", AI);
- AI->replaceAllUsesWith(Bitcast);
- AllocaToPaddedAllocaMap[AI] = NewAI;
- }
- }
- return AllocaToPaddedAllocaMap;
-}
-
-bool HWAddressSanitizer::sanitizeFunction(
- Function &F, llvm::function_ref<const DominatorTree &()> GetDT,
- llvm::function_ref<const PostDominatorTree &()> GetPDT) {
+bool HWAddressSanitizer::sanitizeFunction(Function &F,
+ FunctionAnalysisManager &FAM) {
if (&F == HwasanCtorFunction)
return false;
@@ -1502,72 +1363,27 @@ bool HWAddressSanitizer::sanitizeFunction(
SmallVector<InterestingMemoryOperand, 16> OperandsToInstrument;
SmallVector<MemIntrinsic *, 16> IntrinToInstrument;
- MapVector<AllocaInst *, AllocaInfo> AllocasToInstrument;
- SmallVector<Instruction *, 8> RetVec;
SmallVector<Instruction *, 8> LandingPadVec;
- SmallVector<Instruction *, 4> UnrecognizedLifetimes;
- DenseMap<AllocaInst *, std::vector<DbgVariableIntrinsic *>> AllocaDbgMap;
- bool CallsReturnTwice = false;
- for (auto &BB : F) {
- for (auto &Inst : BB) {
- if (CallInst *CI = dyn_cast<CallInst>(&Inst)) {
- if (CI->canReturnTwice()) {
- CallsReturnTwice = true;
- }
- }
- if (InstrumentStack) {
- if (AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
- if (isInterestingAlloca(*AI))
- AllocasToInstrument.insert({AI, {}});
- continue;
- }
- auto *II = dyn_cast<IntrinsicInst>(&Inst);
- if (II && (II->getIntrinsicID() == Intrinsic::lifetime_start ||
- II->getIntrinsicID() == Intrinsic::lifetime_end)) {
- AllocaInst *AI = findAllocaForValue(II->getArgOperand(1));
- if (!AI) {
- UnrecognizedLifetimes.push_back(&Inst);
- continue;
- }
- if (!isInterestingAlloca(*AI))
- continue;
- if (II->getIntrinsicID() == Intrinsic::lifetime_start)
- AllocasToInstrument[AI].LifetimeStart.push_back(II);
- else
- AllocasToInstrument[AI].LifetimeEnd.push_back(II);
- continue;
- }
- }
- if (isa<ReturnInst>(Inst)) {
- if (CallInst *CI = Inst.getParent()->getTerminatingMustTailCall())
- RetVec.push_back(CI);
- else
- RetVec.push_back(&Inst);
- } else if (isa<ResumeInst, CleanupReturnInst>(Inst)) {
- RetVec.push_back(&Inst);
- }
-
- if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&Inst)) {
- for (Value *V : DVI->location_ops()) {
- if (auto *Alloca = dyn_cast_or_null<AllocaInst>(V))
- if (!AllocaDbgMap.count(Alloca) ||
- AllocaDbgMap[Alloca].back() != DVI)
- AllocaDbgMap[Alloca].push_back(DVI);
- }
- }
+ memtag::StackInfoBuilder SIB(
+ [this](const AllocaInst &AI) { return isInterestingAlloca(AI); });
+ for (auto &Inst : instructions(F)) {
+ if (InstrumentStack) {
+ SIB.visit(Inst);
+ }
- if (InstrumentLandingPads && isa<LandingPadInst>(Inst))
- LandingPadVec.push_back(&Inst);
+ if (InstrumentLandingPads && isa<LandingPadInst>(Inst))
+ LandingPadVec.push_back(&Inst);
- getInterestingMemoryOperands(&Inst, OperandsToInstrument);
+ getInterestingMemoryOperands(&Inst, OperandsToInstrument);
- if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&Inst))
- if (!ignoreMemIntrinsic(MI))
- IntrinToInstrument.push_back(MI);
- }
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&Inst))
+ if (!ignoreMemIntrinsic(MI))
+ IntrinToInstrument.push_back(MI);
}
+ memtag::StackInfo &SInfo = SIB.get();
+
initializeCallbacks(*F.getParent());
bool Changed = false;
@@ -1575,7 +1391,7 @@ bool HWAddressSanitizer::sanitizeFunction(
if (!LandingPadVec.empty())
Changed |= instrumentLandingPads(LandingPadVec);
- if (AllocasToInstrument.empty() && F.hasPersonalityFn() &&
+ if (SInfo.AllocasToInstrument.empty() && F.hasPersonalityFn() &&
F.getPersonalityFn()->getName() == kHwasanPersonalityThunkName) {
// __hwasan_personality_thunk is a no-op for functions without an
// instrumented stack, so we can drop it.
@@ -1583,7 +1399,7 @@ bool HWAddressSanitizer::sanitizeFunction(
Changed = true;
}
- if (AllocasToInstrument.empty() && OperandsToInstrument.empty() &&
+ if (SInfo.AllocasToInstrument.empty() && OperandsToInstrument.empty() &&
IntrinToInstrument.empty())
return Changed;
@@ -1593,42 +1409,16 @@ bool HWAddressSanitizer::sanitizeFunction(
IRBuilder<> EntryIRB(InsertPt);
emitPrologue(EntryIRB,
/*WithFrameRecord*/ ClRecordStackHistory &&
- Mapping.WithFrameRecord && !AllocasToInstrument.empty());
+ Mapping.WithFrameRecord &&
+ !SInfo.AllocasToInstrument.empty());
- if (!AllocasToInstrument.empty()) {
+ if (!SInfo.AllocasToInstrument.empty()) {
+ const DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
+ const PostDominatorTree &PDT = FAM.getResult<PostDominatorTreeAnalysis>(F);
+ const LoopInfo &LI = FAM.getResult<LoopAnalysis>(F);
Value *StackTag =
ClGenerateTagsWithCalls ? nullptr : getStackBaseTag(EntryIRB);
- // Calls to functions that may return twice (e.g. setjmp) confuse the
- // postdominator analysis, and will leave us to keep memory tagged after
- // function return. Work around this by always untagging at every return
- // statement if return_twice functions are called.
- instrumentStack(DetectUseAfterScope && !CallsReturnTwice,
- AllocasToInstrument, UnrecognizedLifetimes, AllocaDbgMap,
- RetVec, StackTag, GetDT, GetPDT);
- }
- // Pad and align each of the allocas that we instrumented to stop small
- // uninteresting allocas from hiding in instrumented alloca's padding and so
- // that we have enough space to store real tags for short granules.
- DenseMap<AllocaInst *, AllocaInst *> AllocaToPaddedAllocaMap =
- padInterestingAllocas(AllocasToInstrument);
-
- if (!AllocaToPaddedAllocaMap.empty()) {
- for (auto &BB : F) {
- for (auto &Inst : BB) {
- if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&Inst)) {
- SmallDenseSet<Value *> LocationOps(DVI->location_ops().begin(),
- DVI->location_ops().end());
- for (Value *V : LocationOps) {
- if (auto *AI = dyn_cast_or_null<AllocaInst>(V)) {
- if (auto *NewAI = AllocaToPaddedAllocaMap.lookup(AI))
- DVI->replaceVariableLocationOp(V, NewAI);
- }
- }
- }
- }
- }
- for (auto &P : AllocaToPaddedAllocaMap)
- P.first->eraseFromParent();
+ instrumentStack(SInfo, StackTag, DT, PDT, LI);
}
// If we split the entry block, move any allocas that were originally in the
@@ -1654,6 +1444,7 @@ bool HWAddressSanitizer::sanitizeFunction(
ShadowBase = nullptr;
StackBaseTag = nullptr;
+ CachedSP = nullptr;
return true;
}
@@ -1735,34 +1526,10 @@ void HWAddressSanitizer::instrumentGlobal(GlobalVariable *GV, uint8_t Tag) {
GV->eraseFromParent();
}
-static DenseSet<GlobalVariable *> getExcludedGlobals(Module &M) {
- NamedMDNode *Globals = M.getNamedMetadata("llvm.asan.globals");
- if (!Globals)
- return DenseSet<GlobalVariable *>();
- DenseSet<GlobalVariable *> Excluded(Globals->getNumOperands());
- for (auto MDN : Globals->operands()) {
- // Metadata node contains the global and the fields of "Entry".
- assert(MDN->getNumOperands() == 5);
- auto *V = mdconst::extract_or_null<Constant>(MDN->getOperand(0));
- // The optimizer may optimize away a global entirely.
- if (!V)
- continue;
- auto *StrippedV = V->stripPointerCasts();
- auto *GV = dyn_cast<GlobalVariable>(StrippedV);
- if (!GV)
- continue;
- ConstantInt *IsExcluded = mdconst::extract<ConstantInt>(MDN->getOperand(4));
- if (IsExcluded->isOne())
- Excluded.insert(GV);
- }
- return Excluded;
-}
-
void HWAddressSanitizer::instrumentGlobals() {
std::vector<GlobalVariable *> Globals;
- auto ExcludedGlobals = getExcludedGlobals(M);
for (GlobalVariable &GV : M.globals()) {
- if (ExcludedGlobals.count(&GV))
+ if (GV.hasSanitizerMetadata() && GV.getSanitizerMetadata().NoHWAddress)
continue;
if (GV.isDeclarationForLinker() || GV.getName().startswith("llvm.") ||
diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index 9a3afa9cc924..3ef06907dfee 100644
--- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -13,30 +13,20 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
#include "llvm/Analysis/IndirectCallVisitor.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -45,7 +35,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/CallPromotionUtils.h"
#include <cassert>
#include <cstdint>
@@ -71,13 +60,13 @@ static cl::opt<bool> DisableICP("disable-icp", cl::init(false), cl::Hidden,
// value.
// For debug use only.
static cl::opt<unsigned>
- ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden, cl::ZeroOrMore,
+ ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden,
cl::desc("Max number of promotions for this compilation"));
// If ICPCSSkip is non zero, the first ICPCSSkip callsites will be skipped.
// For debug use only.
static cl::opt<unsigned>
- ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden, cl::ZeroOrMore,
+ ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden,
cl::desc("Skip Callsite up to this number for this compilation"));
// Set if the pass is called in LTO optimization. The difference for LTO mode
@@ -115,55 +104,6 @@ static cl::opt<bool>
namespace {
-class PGOIndirectCallPromotionLegacyPass : public ModulePass {
-public:
- static char ID;
-
- PGOIndirectCallPromotionLegacyPass(bool InLTO = false, bool SamplePGO = false)
- : ModulePass(ID), InLTO(InLTO), SamplePGO(SamplePGO) {
- initializePGOIndirectCallPromotionLegacyPassPass(
- *PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<ProfileSummaryInfoWrapperPass>();
- }
-
- StringRef getPassName() const override { return "PGOIndirectCallPromotion"; }
-
-private:
- bool runOnModule(Module &M) override;
-
- // If this pass is called in LTO. We need to special handling the PGOFuncName
- // for the static variables due to LTO's internalization.
- bool InLTO;
-
- // If this pass is called in SamplePGO. We need to add the prof metadata to
- // the promoted direct call.
- bool SamplePGO;
-};
-
-} // end anonymous namespace
-
-char PGOIndirectCallPromotionLegacyPass::ID = 0;
-
-INITIALIZE_PASS_BEGIN(PGOIndirectCallPromotionLegacyPass, "pgo-icall-prom",
- "Use PGO instrumentation profile to promote indirect "
- "calls to direct calls.",
- false, false)
-INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
-INITIALIZE_PASS_END(PGOIndirectCallPromotionLegacyPass, "pgo-icall-prom",
- "Use PGO instrumentation profile to promote indirect "
- "calls to direct calls.",
- false, false)
-
-ModulePass *llvm::createPGOIndirectCallPromotionLegacyPass(bool InLTO,
- bool SamplePGO) {
- return new PGOIndirectCallPromotionLegacyPass(InLTO, SamplePGO);
-}
-
-namespace {
-
// The class for main data structure to promote indirect calls to conditional
// direct calls.
class ICallPromotionFunc {
@@ -428,15 +368,6 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI,
return Changed;
}
-bool PGOIndirectCallPromotionLegacyPass::runOnModule(Module &M) {
- ProfileSummaryInfo *PSI =
- &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
-
- // Command-line option has the priority for InLTO.
- return promoteIndirectCalls(M, PSI, InLTO | ICPLTOMode,
- SamplePGO | ICPSamplePGOMode);
-}
-
PreservedAnalyses PGOIndirectCallPromotion::run(Module &M,
ModuleAnalysisManager &AM) {
ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
diff --git a/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp b/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp
index 3ea314329079..2091881c29fe 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp
@@ -9,29 +9,22 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Instrumentation/InstrOrderFile.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/PassRegistry.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Instrumentation.h"
#include <fstream>
-#include <map>
#include <mutex>
-#include <set>
#include <sstream>
using namespace llvm;
@@ -61,7 +54,7 @@ private:
ArrayType *MapTy;
public:
- InstrOrderFile() {}
+ InstrOrderFile() = default;
void createOrderFileData(Module &M) {
LLVMContext &Ctx = M.getContext();
diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 6868408ef5f5..7843b1522830 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -47,12 +47,10 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include <algorithm>
#include <cassert>
-#include <cstddef>
#include <cstdint>
#include <string>
@@ -62,7 +60,7 @@ using namespace llvm;
namespace llvm {
cl::opt<bool>
- DebugInfoCorrelate("debug-info-correlate", cl::ZeroOrMore,
+ DebugInfoCorrelate("debug-info-correlate",
cl::desc("Use debug info to correlate profiles."),
cl::init(false));
} // namespace llvm
@@ -95,18 +93,18 @@ cl::opt<double> NumCountersPerValueSite(
cl::init(1.0));
cl::opt<bool> AtomicCounterUpdateAll(
- "instrprof-atomic-counter-update-all", cl::ZeroOrMore,
+ "instrprof-atomic-counter-update-all",
cl::desc("Make all profile counter updates atomic (for testing only)"),
cl::init(false));
cl::opt<bool> AtomicCounterUpdatePromoted(
- "atomic-counter-update-promoted", cl::ZeroOrMore,
+ "atomic-counter-update-promoted",
cl::desc("Do counter update using atomic fetch add "
" for promoted counters only"),
cl::init(false));
cl::opt<bool> AtomicFirstCounter(
- "atomic-first-counter", cl::ZeroOrMore,
+ "atomic-first-counter",
cl::desc("Use atomic fetch add for first counter in a function (usually "
"the entry counter)"),
cl::init(false));
@@ -116,37 +114,37 @@ cl::opt<bool> AtomicFirstCounter(
// pipeline is setup, i.e., the default value of true of this option
// does not mean the promotion will be done by default. Explicitly
// setting this option can override the default behavior.
-cl::opt<bool> DoCounterPromotion("do-counter-promotion", cl::ZeroOrMore,
+cl::opt<bool> DoCounterPromotion("do-counter-promotion",
cl::desc("Do counter register promotion"),
cl::init(false));
cl::opt<unsigned> MaxNumOfPromotionsPerLoop(
- cl::ZeroOrMore, "max-counter-promotions-per-loop", cl::init(20),
+ "max-counter-promotions-per-loop", cl::init(20),
cl::desc("Max number counter promotions per loop to avoid"
" increasing register pressure too much"));
// A debug option
cl::opt<int>
- MaxNumOfPromotions(cl::ZeroOrMore, "max-counter-promotions", cl::init(-1),
+ MaxNumOfPromotions("max-counter-promotions", cl::init(-1),
cl::desc("Max number of allowed counter promotions"));
cl::opt<unsigned> SpeculativeCounterPromotionMaxExiting(
- cl::ZeroOrMore, "speculative-counter-promotion-max-exiting", cl::init(3),
+ "speculative-counter-promotion-max-exiting", cl::init(3),
cl::desc("The max number of exiting blocks of a loop to allow "
" speculative counter promotion"));
cl::opt<bool> SpeculativeCounterPromotionToLoop(
- cl::ZeroOrMore, "speculative-counter-promotion-to-loop", cl::init(false),
+ "speculative-counter-promotion-to-loop",
cl::desc("When the option is false, if the target block is in a loop, "
"the promotion will be disallowed unless the promoted counter "
" update can be further/iteratively promoted into an acyclic "
" region."));
cl::opt<bool> IterativeCounterPromotion(
- cl::ZeroOrMore, "iterative-counter-promotion", cl::init(true),
+ "iterative-counter-promotion", cl::init(true),
cl::desc("Allow counter promotion across the whole loop nest."));
cl::opt<bool> SkipRetExitBlock(
- cl::ZeroOrMore, "skip-ret-exit-block", cl::init(true),
+ "skip-ret-exit-block", cl::init(true),
cl::desc("Suppress counter promotion if exit blocks contain ret."));
class InstrProfilingLegacyPass : public ModulePass {
@@ -211,6 +209,18 @@ public:
Value *Addr = cast<StoreInst>(Store)->getPointerOperand();
Type *Ty = LiveInValue->getType();
IRBuilder<> Builder(InsertPos);
+ if (auto *AddrInst = dyn_cast_or_null<IntToPtrInst>(Addr)) {
+ // If isRuntimeCounterRelocationEnabled() is true then the address of
+ // the store instruction is computed with two instructions in
+ // InstrProfiling::getCounterAddress(). We need to copy those
+ // instructions to this block to compute Addr correctly.
+ // %BiasAdd = add i64 ptrtoint <__profc_>, <__llvm_profile_counter_bias>
+ // %Addr = inttoptr i64 %BiasAdd to i64*
+ auto *OrigBiasInst = dyn_cast<BinaryOperator>(AddrInst->getOperand(0));
+ assert(OrigBiasInst->getOpcode() == Instruction::BinaryOps::Add);
+ Value *BiasInst = Builder.Insert(OrigBiasInst->clone());
+ Addr = Builder.CreateIntToPtr(BiasInst, Ty->getPointerTo());
+ }
if (AtomicCounterUpdatePromoted)
// automic update currently can only be promoted across the current
// loop, not the whole loop nest.
@@ -303,8 +313,7 @@ public:
auto PreheaderCount = BFI->getBlockProfileCount(L.getLoopPreheader());
// If the average loop trip count is not greater than 1.5, we skip
// promotion.
- if (PreheaderCount &&
- (PreheaderCount.getValue() * 3) >= (InstrCount.getValue() * 2))
+ if (PreheaderCount && (*PreheaderCount * 3) >= (*InstrCount * 2))
continue;
}
@@ -705,10 +714,9 @@ Value *InstrProfiling::getCounterAddress(InstrProfInstBase *I) {
Type *Int64Ty = Type::getInt64Ty(M->getContext());
Function *Fn = I->getParent()->getParent();
- Instruction &EntryI = Fn->getEntryBlock().front();
- LoadInst *LI = dyn_cast<LoadInst>(&EntryI);
- if (!LI) {
- IRBuilder<> EntryBuilder(&EntryI);
+ LoadInst *&BiasLI = FunctionToProfileBiasMap[Fn];
+ if (!BiasLI) {
+ IRBuilder<> EntryBuilder(&Fn->getEntryBlock().front());
auto *Bias = M->getGlobalVariable(getInstrProfCounterBiasVarName());
if (!Bias) {
// Compiler must define this variable when runtime counter relocation
@@ -725,9 +733,9 @@ Value *InstrProfiling::getCounterAddress(InstrProfInstBase *I) {
if (TT.supportsCOMDAT())
Bias->setComdat(M->getOrInsertComdat(Bias->getName()));
}
- LI = EntryBuilder.CreateLoad(Int64Ty, Bias);
+ BiasLI = EntryBuilder.CreateLoad(Int64Ty, Bias);
}
- auto *Add = Builder.CreateAdd(Builder.CreatePtrToInt(Addr, Int64Ty), LI);
+ auto *Add = Builder.CreateAdd(Builder.CreatePtrToInt(Addr, Int64Ty), BiasLI);
return Builder.CreateIntToPtr(Add, Addr->getType());
}
@@ -769,7 +777,8 @@ void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageNamesVar) {
Name->setLinkage(GlobalValue::PrivateLinkage);
ReferencedNames.push_back(Name);
- NC->dropAllReferences();
+ if (isa<ConstantExpr>(NC))
+ NC->dropAllReferences();
}
CoverageNamesVar->eraseFromParent();
}
@@ -856,8 +865,8 @@ static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) {
if (TT.isOSDarwin())
return false;
// Use linker script magic to get data/cnts/name start/end.
- if (TT.isOSLinux() || TT.isOSFreeBSD() || TT.isOSNetBSD() ||
- TT.isOSSolaris() || TT.isOSFuchsia() || TT.isPS4CPU() || TT.isOSWindows())
+ if (TT.isOSAIX() || TT.isOSLinux() || TT.isOSFreeBSD() || TT.isOSNetBSD() ||
+ TT.isOSSolaris() || TT.isOSFuchsia() || TT.isPS() || TT.isOSWindows())
return false;
return true;
@@ -1236,7 +1245,7 @@ bool InstrProfiling::emitRuntimeHook() {
new GlobalVariable(*M, Int32Ty, false, GlobalValue::ExternalLinkage,
nullptr, getInstrProfRuntimeHookVarName());
- if (TT.isOSBinFormatELF()) {
+ if (TT.isOSBinFormatELF() && !TT.isPS()) {
// Mark the user variable as used so that it isn't stripped out.
CompilerUsedVars.push_back(Var);
} else {
diff --git a/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp b/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
index dda242492391..9ff0e632bd7f 100644
--- a/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -91,23 +91,13 @@ Comdat *llvm::getOrCreateFunctionComdat(Function &F, Triple &T) {
/// initializeInstrumentation - Initialize all passes in the TransformUtils
/// library.
void llvm::initializeInstrumentation(PassRegistry &Registry) {
- initializeAddressSanitizerLegacyPassPass(Registry);
- initializeModuleAddressSanitizerLegacyPassPass(Registry);
initializeMemProfilerLegacyPassPass(Registry);
initializeModuleMemProfilerLegacyPassPass(Registry);
initializeBoundsCheckingLegacyPassPass(Registry);
initializeControlHeightReductionLegacyPassPass(Registry);
- initializeGCOVProfilerLegacyPassPass(Registry);
- initializePGOInstrumentationGenLegacyPassPass(Registry);
- initializePGOInstrumentationUseLegacyPassPass(Registry);
- initializePGOIndirectCallPromotionLegacyPassPass(Registry);
- initializePGOMemOPSizeOptLegacyPassPass(Registry);
initializeCGProfileLegacyPassPass(Registry);
initializeInstrOrderFileLegacyPassPass(Registry);
initializeInstrProfilingLegacyPassPass(Registry);
- initializeMemorySanitizerLegacyPassPass(Registry);
- initializeHWAddressSanitizerLegacyPassPass(Registry);
- initializeThreadSanitizerLegacyPassPass(Registry);
initializeModuleSanitizerCoverageLegacyPassPass(Registry);
initializeDataFlowSanitizerLegacyPassPass(Registry);
}
diff --git a/llvm/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/llvm/lib/Transforms/Instrumentation/MaximumSpanningTree.h
deleted file mode 100644
index 892a6a26da91..000000000000
--- a/llvm/lib/Transforms/Instrumentation/MaximumSpanningTree.h
+++ /dev/null
@@ -1,109 +0,0 @@
-//===- llvm/Analysis/MaximumSpanningTree.h - Interface ----------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This module provides means for calculating a maximum spanning tree for a
-// given set of weighted edges. The type parameter T is the type of a node.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TRANSFORMS_INSTRUMENTATION_MAXIMUMSPANNINGTREE_H
-#define LLVM_LIB_TRANSFORMS_INSTRUMENTATION_MAXIMUMSPANNINGTREE_H
-
-#include "llvm/ADT/EquivalenceClasses.h"
-#include "llvm/IR/BasicBlock.h"
-#include <algorithm>
-#include <vector>
-
-namespace llvm {
-
- /// MaximumSpanningTree - A MST implementation.
- /// The type parameter T determines the type of the nodes of the graph.
- template <typename T>
- class MaximumSpanningTree {
- public:
- typedef std::pair<const T*, const T*> Edge;
- typedef std::pair<Edge, double> EdgeWeight;
- typedef std::vector<EdgeWeight> EdgeWeights;
- protected:
- typedef std::vector<Edge> MaxSpanTree;
-
- MaxSpanTree MST;
-
- private:
- // A comparing class for comparing weighted edges.
- struct EdgeWeightCompare {
- static bool getBlockSize(const T *X) {
- const BasicBlock *BB = dyn_cast_or_null<BasicBlock>(X);
- return BB ? BB->size() : 0;
- }
-
- bool operator()(EdgeWeight X, EdgeWeight Y) const {
- if (X.second > Y.second) return true;
- if (X.second < Y.second) return false;
-
- // Equal edge weights: break ties by comparing block sizes.
- size_t XSizeA = getBlockSize(X.first.first);
- size_t YSizeA = getBlockSize(Y.first.first);
- if (XSizeA > YSizeA) return true;
- if (XSizeA < YSizeA) return false;
-
- size_t XSizeB = getBlockSize(X.first.second);
- size_t YSizeB = getBlockSize(Y.first.second);
- if (XSizeB > YSizeB) return true;
- if (XSizeB < YSizeB) return false;
-
- return false;
- }
- };
-
- public:
- static char ID; // Class identification, replacement for typeinfo
-
- /// MaximumSpanningTree() - Takes a vector of weighted edges and returns a
- /// spanning tree.
- MaximumSpanningTree(EdgeWeights &EdgeVector) {
- llvm::stable_sort(EdgeVector, EdgeWeightCompare());
-
- // Create spanning tree, Forest contains a special data structure
- // that makes checking if two nodes are already in a common (sub-)tree
- // fast and cheap.
- EquivalenceClasses<const T*> Forest;
- for (typename EdgeWeights::iterator EWi = EdgeVector.begin(),
- EWe = EdgeVector.end(); EWi != EWe; ++EWi) {
- Edge e = (*EWi).first;
-
- Forest.insert(e.first);
- Forest.insert(e.second);
- }
-
- // Iterate over the sorted edges, biggest first.
- for (typename EdgeWeights::iterator EWi = EdgeVector.begin(),
- EWe = EdgeVector.end(); EWi != EWe; ++EWi) {
- Edge e = (*EWi).first;
-
- if (Forest.findLeader(e.first) != Forest.findLeader(e.second)) {
- Forest.unionSets(e.first, e.second);
- // So we know now that the edge is not already in a subtree, so we push
- // the edge to the MST.
- MST.push_back(e);
- }
- }
- }
-
- typename MaxSpanTree::iterator begin() {
- return MST.begin();
- }
-
- typename MaxSpanTree::iterator end() {
- return MST.end();
- }
- };
-
-} // End llvm namespace
-
-#endif // LLVM_LIB_TRANSFORMS_INSTRUMENTATION_MAXIMUMSPANNINGTREE_H
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index 5e078f2c4212..01e3b2c20218 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -27,15 +27,14 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
+#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -156,7 +155,6 @@ static uint64_t getCtorAndDtorPriority(Triple &TargetTriple) {
struct InterestingMemoryAccess {
Value *Addr = nullptr;
bool IsWrite;
- unsigned Alignment;
Type *AccessTy;
uint64_t TypeSize;
Value *MaybeMask = nullptr;
@@ -182,8 +180,7 @@ public:
void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore,
Value *Addr, uint32_t TypeSize, bool IsWrite);
void instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask,
- Instruction *I, Value *Addr,
- unsigned Alignment, Type *AccessTy,
+ Instruction *I, Value *Addr, Type *AccessTy,
bool IsWrite);
void instrumentMemIntrinsic(MemIntrinsic *MI);
Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
@@ -255,7 +252,7 @@ public:
} // end anonymous namespace
-MemProfilerPass::MemProfilerPass() {}
+MemProfilerPass::MemProfilerPass() = default;
PreservedAnalyses MemProfilerPass::run(Function &F,
AnalysisManager<Function> &AM) {
@@ -266,7 +263,7 @@ PreservedAnalyses MemProfilerPass::run(Function &F,
return PreservedAnalyses::all();
}
-ModuleMemProfilerPass::ModuleMemProfilerPass() {}
+ModuleMemProfilerPass::ModuleMemProfilerPass() = default;
PreservedAnalyses ModuleMemProfilerPass::run(Module &M,
AnalysisManager<Module> &AM) {
@@ -341,28 +338,24 @@ MemProfiler::isInterestingMemoryAccess(Instruction *I) const {
return None;
Access.IsWrite = false;
Access.AccessTy = LI->getType();
- Access.Alignment = LI->getAlignment();
Access.Addr = LI->getPointerOperand();
} else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
if (!ClInstrumentWrites)
return None;
Access.IsWrite = true;
Access.AccessTy = SI->getValueOperand()->getType();
- Access.Alignment = SI->getAlignment();
Access.Addr = SI->getPointerOperand();
} else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
if (!ClInstrumentAtomics)
return None;
Access.IsWrite = true;
Access.AccessTy = RMW->getValOperand()->getType();
- Access.Alignment = 0;
Access.Addr = RMW->getPointerOperand();
} else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
if (!ClInstrumentAtomics)
return None;
Access.IsWrite = true;
Access.AccessTy = XCHG->getCompareOperand()->getType();
- Access.Alignment = 0;
Access.Addr = XCHG->getPointerOperand();
} else if (auto *CI = dyn_cast<CallInst>(I)) {
auto *F = CI->getCalledFunction();
@@ -384,11 +377,6 @@ MemProfiler::isInterestingMemoryAccess(Instruction *I) const {
}
auto *BasePtr = CI->getOperand(0 + OpOffset);
- if (auto *AlignmentConstant =
- dyn_cast<ConstantInt>(CI->getOperand(1 + OpOffset)))
- Access.Alignment = (unsigned)AlignmentConstant->getZExtValue();
- else
- Access.Alignment = 1; // No alignment guarantees. We probably got Undef
Access.MaybeMask = CI->getOperand(2 + OpOffset);
Access.Addr = BasePtr;
}
@@ -410,6 +398,25 @@ MemProfiler::isInterestingMemoryAccess(Instruction *I) const {
if (Access.Addr->isSwiftError())
return None;
+ // Peel off GEPs and BitCasts.
+ auto *Addr = Access.Addr->stripInBoundsOffsets();
+
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
+ // Do not instrument PGO counter updates.
+ if (GV->hasSection()) {
+ StringRef SectionName = GV->getSection();
+ // Check if the global is in the PGO counters section.
+ auto OF = Triple(I->getModule()->getTargetTriple()).getObjectFormat();
+ if (SectionName.endswith(
+ getInstrProfSectionName(IPSK_cnts, OF, /*AddSegmentInfo=*/false)))
+ return None;
+ }
+
+ // Do not instrument accesses to LLVM internal variables.
+ if (GV->getName().startswith("__llvm"))
+ return None;
+ }
+
const DataLayout &DL = I->getModule()->getDataLayout();
Access.TypeSize = DL.getTypeStoreSizeInBits(Access.AccessTy);
return Access;
@@ -417,7 +424,6 @@ MemProfiler::isInterestingMemoryAccess(Instruction *I) const {
void MemProfiler::instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask,
Instruction *I, Value *Addr,
- unsigned Alignment,
Type *AccessTy, bool IsWrite) {
auto *VTy = cast<FixedVectorType>(AccessTy);
uint64_t ElemTypeSize = DL.getTypeStoreSizeInBits(VTy->getScalarType());
@@ -468,8 +474,7 @@ void MemProfiler::instrumentMop(Instruction *I, const DataLayout &DL,
if (Access.MaybeMask) {
instrumentMaskedLoadOrStore(DL, Access.MaybeMask, I, Access.Addr,
- Access.Alignment, Access.AccessTy,
- Access.IsWrite);
+ Access.AccessTy, Access.IsWrite);
} else {
// Since the access counts will be accumulated across the entire allocation,
// we only update the shadow access count for the first location and thus
@@ -615,8 +620,6 @@ bool MemProfiler::instrumentFunction(Function &F) {
initializeCallbacks(*F.getParent());
- FunctionModified |= insertDynamicShadowAtFunctionEntry(F);
-
SmallVector<Instruction *, 16> ToInstrument;
// Fill the set of memory operations to instrument.
@@ -627,6 +630,15 @@ bool MemProfiler::instrumentFunction(Function &F) {
}
}
+ if (ToInstrument.empty()) {
+ LLVM_DEBUG(dbgs() << "MEMPROF done instrumenting: " << FunctionModified
+ << " " << F << "\n");
+
+ return FunctionModified;
+ }
+
+ FunctionModified |= insertDynamicShadowAtFunctionEntry(F);
+
int NumInstrumented = 0;
for (auto *Inst : ToInstrument) {
if (ClDebugMin < 0 || ClDebugMax < 0 ||
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index c51acdf52f14..4d72f6c3d1a9 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -174,24 +174,19 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsX86.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueMap.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -634,33 +629,6 @@ void insertModuleCtor(Module &M) {
});
}
-/// A legacy function pass for msan instrumentation.
-///
-/// Instruments functions to detect uninitialized reads.
-struct MemorySanitizerLegacyPass : public FunctionPass {
- // Pass identification, replacement for typeid.
- static char ID;
-
- MemorySanitizerLegacyPass(MemorySanitizerOptions Options = {})
- : FunctionPass(ID), Options(Options) {
- initializeMemorySanitizerLegacyPassPass(*PassRegistry::getPassRegistry());
- }
- StringRef getPassName() const override { return "MemorySanitizerLegacyPass"; }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- }
-
- bool runOnFunction(Function &F) override {
- return MSan->sanitizeFunction(
- F, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F));
- }
- bool doInitialization(Module &M) override;
-
- Optional<MemorySanitizer> MSan;
- MemorySanitizerOptions Options;
-};
-
template <class T> T getOptOrDefault(const cl::opt<T> &Opt, T Default) {
return (Opt.getNumOccurrences() > 0) ? Opt : Default;
}
@@ -705,21 +673,6 @@ void MemorySanitizerPass::printPipeline(
OS << ">";
}
-char MemorySanitizerLegacyPass::ID = 0;
-
-INITIALIZE_PASS_BEGIN(MemorySanitizerLegacyPass, "msan",
- "MemorySanitizer: detects uninitialized reads.", false,
- false)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(MemorySanitizerLegacyPass, "msan",
- "MemorySanitizer: detects uninitialized reads.", false,
- false)
-
-FunctionPass *
-llvm::createMemorySanitizerLegacyPassPass(MemorySanitizerOptions Options) {
- return new MemorySanitizerLegacyPass(Options);
-}
-
/// Create a non-const global initialized with the given string.
///
/// Creates a writable global for Str so that we can pass it to the
@@ -1017,13 +970,6 @@ void MemorySanitizer::initializeModule(Module &M) {
}
}
-bool MemorySanitizerLegacyPass::doInitialization(Module &M) {
- if (!Options.Kernel)
- insertModuleCtor(M);
- MSan.emplace(M, Options);
- return true;
-}
-
namespace {
/// A helper class that handles instrumentation of VarArg
@@ -1674,7 +1620,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// or extracts if from ParamTLS (for function arguments).
Value *getShadow(Value *V) {
if (Instruction *I = dyn_cast<Instruction>(V)) {
- if (!PropagateShadow || I->getMetadata("nosanitize"))
+ if (!PropagateShadow || I->getMetadata(LLVMContext::MD_nosanitize))
return getCleanShadow(V);
// For instructions the shadow is already stored in the map.
Value *Shadow = ShadowMap[V];
@@ -1694,9 +1640,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
if (Argument *A = dyn_cast<Argument>(V)) {
// For arguments we compute the shadow on demand and store it in the map.
- Value **ShadowPtr = &ShadowMap[V];
- if (*ShadowPtr)
- return *ShadowPtr;
+ Value *&ShadowPtr = ShadowMap[V];
+ if (ShadowPtr)
+ return ShadowPtr;
Function *F = A->getParent();
IRBuilder<> EntryIRB(FnPrologueEnd);
unsigned ArgOffset = 0;
@@ -1753,12 +1699,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (!PropagateShadow || Overflow || FArg.hasByValAttr() ||
(MS.EagerChecks && FArg.hasAttribute(Attribute::NoUndef))) {
- *ShadowPtr = getCleanShadow(V);
+ ShadowPtr = getCleanShadow(V);
setOrigin(A, getCleanOrigin());
} else {
// Shadow over TLS
Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset);
- *ShadowPtr = EntryIRB.CreateAlignedLoad(getShadowTy(&FArg), Base,
+ ShadowPtr = EntryIRB.CreateAlignedLoad(getShadowTy(&FArg), Base,
kShadowTLSAlignment);
if (MS.TrackOrigins) {
Value *OriginPtr =
@@ -1767,14 +1713,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
}
LLVM_DEBUG(dbgs()
- << " ARG: " << FArg << " ==> " << **ShadowPtr << "\n");
+ << " ARG: " << FArg << " ==> " << *ShadowPtr << "\n");
break;
}
ArgOffset += alignTo(Size, kShadowTLSAlignment);
}
- assert(*ShadowPtr && "Could not find shadow for an argument");
- return *ShadowPtr;
+ assert(ShadowPtr && "Could not find shadow for an argument");
+ return ShadowPtr;
}
// For everything else the shadow is zero.
return getCleanShadow(V);
@@ -1793,7 +1739,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
assert((isa<Instruction>(V) || isa<Argument>(V)) &&
"Unexpected value type in getOrigin()");
if (Instruction *I = dyn_cast<Instruction>(V)) {
- if (I->getMetadata("nosanitize"))
+ if (I->getMetadata(LLVMContext::MD_nosanitize))
return getCleanOrigin();
}
Value *Origin = OriginMap[V];
@@ -1916,7 +1862,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// ------------------- Visitors.
using InstVisitor<MemorySanitizerVisitor>::visit;
void visit(Instruction &I) {
- if (I.getMetadata("nosanitize"))
+ if (I.getMetadata(LLVMContext::MD_nosanitize))
return;
// Don't want to visit if we're in the prologue
if (isInPrologue(I))
@@ -1930,12 +1876,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// Optionally, checks that the load address is fully defined.
void visitLoadInst(LoadInst &I) {
assert(I.getType()->isSized() && "Load type must have size");
- assert(!I.getMetadata("nosanitize"));
+ assert(!I.getMetadata(LLVMContext::MD_nosanitize));
IRBuilder<> IRB(I.getNextNode());
Type *ShadowTy = getShadowTy(&I);
Value *Addr = I.getPointerOperand();
Value *ShadowPtr = nullptr, *OriginPtr = nullptr;
- const Align Alignment = assumeAligned(I.getAlignment());
+ const Align Alignment = I.getAlign();
if (PropagateShadow) {
std::tie(ShadowPtr, OriginPtr) =
getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
@@ -2573,6 +2519,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
///
/// Similar situation exists for memcpy and memset.
void visitMemMoveInst(MemMoveInst &I) {
+ getShadow(I.getArgOperand(1)); // Ensure shadow initialized
IRBuilder<> IRB(&I);
IRB.CreateCall(
MS.MemmoveFn,
@@ -2587,6 +2534,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// FIXME: consider doing manual inline for small constant sizes and proper
// alignment.
void visitMemCpyInst(MemCpyInst &I) {
+ getShadow(I.getArgOperand(1)); // Ensure shadow initialized
IRBuilder<> IRB(&I);
IRB.CreateCall(
MS.MemcpyFn,
@@ -3252,27 +3200,37 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
SOC.Done(&I);
}
- // Instrument _mm_*_sd intrinsics
- void handleUnarySdIntrinsic(IntrinsicInst &I) {
+ // Instrument _mm_*_sd|ss intrinsics
+ void handleUnarySdSsIntrinsic(IntrinsicInst &I) {
IRBuilder<> IRB(&I);
+ unsigned Width =
+ cast<FixedVectorType>(I.getArgOperand(0)->getType())->getNumElements();
Value *First = getShadow(&I, 0);
Value *Second = getShadow(&I, 1);
- // High word of first operand, low word of second
- Value *Shadow =
- IRB.CreateShuffleVector(First, Second, llvm::makeArrayRef<int>({2, 1}));
+ // First element of second operand, remaining elements of first operand
+ SmallVector<int, 16> Mask;
+ Mask.push_back(Width);
+ for (unsigned i = 1; i < Width; i++)
+ Mask.push_back(i);
+ Value *Shadow = IRB.CreateShuffleVector(First, Second, Mask);
setShadow(&I, Shadow);
setOriginForNaryOp(I);
}
- void handleBinarySdIntrinsic(IntrinsicInst &I) {
+ void handleBinarySdSsIntrinsic(IntrinsicInst &I) {
IRBuilder<> IRB(&I);
+ unsigned Width =
+ cast<FixedVectorType>(I.getArgOperand(0)->getType())->getNumElements();
Value *First = getShadow(&I, 0);
Value *Second = getShadow(&I, 1);
Value *OrShadow = IRB.CreateOr(First, Second);
- // High word of first operand, low word of both OR'd together
- Value *Shadow = IRB.CreateShuffleVector(First, OrShadow,
- llvm::makeArrayRef<int>({2, 1}));
+ // First element of both OR'd together, remaining elements of first operand
+ SmallVector<int, 16> Mask;
+ Mask.push_back(Width);
+ for (unsigned i = 1; i < Width; i++)
+ Mask.push_back(i);
+ Value *Shadow = IRB.CreateShuffleVector(First, OrShadow, Mask);
setShadow(&I, Shadow);
setOriginForNaryOp(I);
@@ -3547,11 +3505,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
break;
case Intrinsic::x86_sse41_round_sd:
- handleUnarySdIntrinsic(I);
+ case Intrinsic::x86_sse41_round_ss:
+ handleUnarySdSsIntrinsic(I);
break;
case Intrinsic::x86_sse2_max_sd:
+ case Intrinsic::x86_sse_max_ss:
case Intrinsic::x86_sse2_min_sd:
- handleBinarySdIntrinsic(I);
+ case Intrinsic::x86_sse_min_ss:
+ handleBinarySdSsIntrinsic(I);
break;
case Intrinsic::fshl:
@@ -3630,7 +3591,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
void visitCallBase(CallBase &CB) {
- assert(!CB.getMetadata("nosanitize"));
+ assert(!CB.getMetadata(LLVMContext::MD_nosanitize));
if (CB.isInlineAsm()) {
// For inline asm (either a call to asm function, or callbr instruction),
// do the usual thing: check argument shadow and mark all outputs as
@@ -4083,8 +4044,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// Nothing to do here.
}
- void instrumentAsmArgument(Value *Operand, Instruction &I, IRBuilder<> &IRB,
- const DataLayout &DL, bool isOutput) {
+ void instrumentAsmArgument(Value *Operand, Type *ElemTy, Instruction &I,
+ IRBuilder<> &IRB, const DataLayout &DL,
+ bool isOutput) {
// For each assembly argument, we check its value for being initialized.
// If the argument is a pointer, we assume it points to a single element
// of the corresponding type (or to a 8-byte word, if the type is unsized).
@@ -4096,10 +4058,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
assert(!isOutput);
return;
}
- Type *ElType = OpType->getPointerElementType();
- if (!ElType->isSized())
+ if (!ElemTy->isSized())
return;
- int Size = DL.getTypeStoreSize(ElType);
+ int Size = DL.getTypeStoreSize(ElemTy);
Value *Ptr = IRB.CreatePointerCast(Operand, IRB.getInt8PtrTy());
Value *SizeVal = ConstantInt::get(MS.IntptrTy, Size);
IRB.CreateCall(MS.MsanInstrumentAsmStoreFn, {Ptr, SizeVal});
@@ -4159,14 +4120,16 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// that we won't overwrite uninit values before checking them.
for (int i = OutputArgs; i < NumOperands; i++) {
Value *Operand = CB->getOperand(i);
- instrumentAsmArgument(Operand, I, IRB, DL, /*isOutput*/ false);
+ instrumentAsmArgument(Operand, CB->getParamElementType(i), I, IRB, DL,
+ /*isOutput*/ false);
}
// Unpoison output arguments. This must happen before the actual InlineAsm
// call, so that the shadow for memory published in the asm() statement
// remains valid.
for (int i = 0; i < OutputArgs; i++) {
Value *Operand = CB->getOperand(i);
- instrumentAsmArgument(Operand, I, IRB, DL, /*isOutput*/ true);
+ instrumentAsmArgument(Operand, CB->getParamElementType(i), I, IRB, DL,
+ /*isOutput*/ true);
}
setShadow(&I, getCleanShadow(&I));
@@ -4885,8 +4848,8 @@ struct VarArgPowerPC64Helper : public VarArgHelper {
assert(A->getType()->isPointerTy());
Type *RealTy = CB.getParamByValType(ArgNo);
uint64_t ArgSize = DL.getTypeAllocSize(RealTy);
- MaybeAlign ArgAlign = CB.getParamAlign(ArgNo);
- if (!ArgAlign || *ArgAlign < Align(8))
+ Align ArgAlign = CB.getParamAlign(ArgNo).value_or(Align(8));
+ if (ArgAlign < 8)
ArgAlign = Align(8);
VAArgOffset = alignTo(VAArgOffset, ArgAlign);
if (!IsFixed) {
@@ -4902,27 +4865,27 @@ struct VarArgPowerPC64Helper : public VarArgHelper {
kShadowTLSAlignment, ArgSize);
}
}
- VAArgOffset += alignTo(ArgSize, 8);
+ VAArgOffset += alignTo(ArgSize, Align(8));
} else {
Value *Base;
uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
- uint64_t ArgAlign = 8;
+ Align ArgAlign = Align(8);
if (A->getType()->isArrayTy()) {
// Arrays are aligned to element size, except for long double
// arrays, which are aligned to 8 bytes.
Type *ElementTy = A->getType()->getArrayElementType();
if (!ElementTy->isPPC_FP128Ty())
- ArgAlign = DL.getTypeAllocSize(ElementTy);
+ ArgAlign = Align(DL.getTypeAllocSize(ElementTy));
} else if (A->getType()->isVectorTy()) {
// Vectors are naturally aligned.
- ArgAlign = DL.getTypeAllocSize(A->getType());
+ ArgAlign = Align(ArgSize);
}
if (ArgAlign < 8)
- ArgAlign = 8;
+ ArgAlign = Align(8);
VAArgOffset = alignTo(VAArgOffset, ArgAlign);
if (DL.isBigEndian()) {
- // Adjusting the shadow for argument with size < 8 to match the placement
- // of bits in big endian system
+ // Adjusting the shadow for argument with size < 8 to match the
+ // placement of bits in big endian system
if (ArgSize < 8)
VAArgOffset += (8 - ArgSize);
}
@@ -4933,7 +4896,7 @@ struct VarArgPowerPC64Helper : public VarArgHelper {
IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
}
VAArgOffset += ArgSize;
- VAArgOffset = alignTo(VAArgOffset, 8);
+ VAArgOffset = alignTo(VAArgOffset, Align(8));
}
if (IsFixed)
VAArgBase = VAArgOffset;
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 0902a94452e3..3a29cd70e42e 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -52,7 +52,6 @@
#include "ValueProfileCollector.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -68,6 +67,7 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
@@ -94,8 +94,6 @@
#include "llvm/IR/ProfileSummary.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/InstrProfReader.h"
#include "llvm/Support/BranchProbability.h"
@@ -110,6 +108,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/MisExpect.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <algorithm>
#include <cassert>
@@ -173,14 +172,14 @@ static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false),
// Command line option to set the maximum number of VP annotations to write to
// the metadata for a single indirect call callsite.
static cl::opt<unsigned> MaxNumAnnotations(
- "icp-max-annotations", cl::init(3), cl::Hidden, cl::ZeroOrMore,
+ "icp-max-annotations", cl::init(3), cl::Hidden,
cl::desc("Max number of annotations for a single indirect "
"call callsite"));
// Command line option to set the maximum number of value annotations
// to write to the metadata for a single memop intrinsic.
static cl::opt<unsigned> MaxNumMemOPAnnotations(
- "memop-max-annotations", cl::init(4), cl::Hidden, cl::ZeroOrMore,
+ "memop-max-annotations", cl::init(4), cl::Hidden,
cl::desc("Max number of preicise value annotations for a single memop"
"intrinsic"));
@@ -256,7 +255,7 @@ static cl::opt<bool> PGOInstrumentEntry(
cl::desc("Force to instrument function entry basicblock."));
static cl::opt<bool> PGOFunctionEntryCoverage(
- "pgo-function-entry-coverage", cl::init(false), cl::Hidden, cl::ZeroOrMore,
+ "pgo-function-entry-coverage", cl::Hidden,
cl::desc(
"Use this option to enable function entry coverage instrumentation."));
@@ -431,125 +430,8 @@ struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
unsigned getNumOfSelectInsts() const { return NSIs; }
};
-
-class PGOInstrumentationGenLegacyPass : public ModulePass {
-public:
- static char ID;
-
- PGOInstrumentationGenLegacyPass(bool IsCS = false)
- : ModulePass(ID), IsCS(IsCS) {
- initializePGOInstrumentationGenLegacyPassPass(
- *PassRegistry::getPassRegistry());
- }
-
- StringRef getPassName() const override { return "PGOInstrumentationGenPass"; }
-
-private:
- // Is this is context-sensitive instrumentation.
- bool IsCS;
- bool runOnModule(Module &M) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<BlockFrequencyInfoWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- }
-};
-
-class PGOInstrumentationUseLegacyPass : public ModulePass {
-public:
- static char ID;
-
- // Provide the profile filename as the parameter.
- PGOInstrumentationUseLegacyPass(std::string Filename = "", bool IsCS = false)
- : ModulePass(ID), ProfileFileName(std::move(Filename)), IsCS(IsCS) {
- if (!PGOTestProfileFile.empty())
- ProfileFileName = PGOTestProfileFile;
- initializePGOInstrumentationUseLegacyPassPass(
- *PassRegistry::getPassRegistry());
- }
-
- StringRef getPassName() const override { return "PGOInstrumentationUsePass"; }
-
-private:
- std::string ProfileFileName;
- // Is this is context-sensitive instrumentation use.
- bool IsCS;
-
- bool runOnModule(Module &M) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<ProfileSummaryInfoWrapperPass>();
- AU.addRequired<BlockFrequencyInfoWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- }
-};
-
-class PGOInstrumentationGenCreateVarLegacyPass : public ModulePass {
-public:
- static char ID;
- StringRef getPassName() const override {
- return "PGOInstrumentationGenCreateVarPass";
- }
- PGOInstrumentationGenCreateVarLegacyPass(std::string CSInstrName = "")
- : ModulePass(ID), InstrProfileOutput(CSInstrName) {
- initializePGOInstrumentationGenCreateVarLegacyPassPass(
- *PassRegistry::getPassRegistry());
- }
-
-private:
- bool runOnModule(Module &M) override {
- createProfileFileNameVar(M, InstrProfileOutput);
- // The variable in a comdat may be discarded by LTO. Ensure the
- // declaration will be retained.
- appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true));
- return false;
- }
- std::string InstrProfileOutput;
-};
-
} // end anonymous namespace
-char PGOInstrumentationGenLegacyPass::ID = 0;
-
-INITIALIZE_PASS_BEGIN(PGOInstrumentationGenLegacyPass, "pgo-instr-gen",
- "PGO instrumentation.", false, false)
-INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen",
- "PGO instrumentation.", false, false)
-
-ModulePass *llvm::createPGOInstrumentationGenLegacyPass(bool IsCS) {
- return new PGOInstrumentationGenLegacyPass(IsCS);
-}
-
-char PGOInstrumentationUseLegacyPass::ID = 0;
-
-INITIALIZE_PASS_BEGIN(PGOInstrumentationUseLegacyPass, "pgo-instr-use",
- "Read PGO instrumentation profile.", false, false)
-INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
-INITIALIZE_PASS_END(PGOInstrumentationUseLegacyPass, "pgo-instr-use",
- "Read PGO instrumentation profile.", false, false)
-
-ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename,
- bool IsCS) {
- return new PGOInstrumentationUseLegacyPass(Filename.str(), IsCS);
-}
-
-char PGOInstrumentationGenCreateVarLegacyPass::ID = 0;
-
-INITIALIZE_PASS(PGOInstrumentationGenCreateVarLegacyPass,
- "pgo-instr-gen-create-var",
- "Create PGO instrumentation version variable for CSPGO.", false,
- false)
-
-ModulePass *
-llvm::createPGOInstrumentationGenCreateVarLegacyPass(StringRef CSInstrName) {
- return new PGOInstrumentationGenCreateVarLegacyPass(std::string(CSInstrName));
-}
-
namespace {
/// An MST based instrumentation for PGO
@@ -940,7 +822,7 @@ static void instrumentOneFunc(
bool IsCS) {
// Split indirectbr critical edges here before computing the MST rather than
// later in getInstrBB() to avoid invalidating it.
- SplitIndirectBrCriticalEdges(F, BPI, BFI);
+ SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI);
FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(
F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry);
@@ -1457,6 +1339,7 @@ void PGOUseFunc::populateCounters() {
}
LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n");
+ (void) NumPasses;
#ifndef NDEBUG
// Assert every BB has a valid counter.
for (auto &BB : F) {
@@ -1697,22 +1580,6 @@ PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) {
return PreservedAnalyses::all();
}
-bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) {
- if (skipModule(M))
- return false;
-
- auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & {
- return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- };
- auto LookupBPI = [this](Function &F) {
- return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI();
- };
- auto LookupBFI = [this](Function &F) {
- return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
- };
- return InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS);
-}
-
PreservedAnalyses PGOInstrumentationGen::run(Module &M,
ModuleAnalysisManager &AM) {
auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
@@ -1740,7 +1607,7 @@ static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI,
BlockFrequencyInfo NBFI(F, NBPI, LI);
#ifndef NDEBUG
auto BFIEntryCount = F.getEntryCount();
- assert(BFIEntryCount.hasValue() && (BFIEntryCount->getCount() > 0) &&
+ assert(BFIEntryCount && (BFIEntryCount->getCount() > 0) &&
"Invalid BFI Entrycount");
#endif
auto SumCount = APFloat::getZero(APFloat::IEEEdouble());
@@ -1752,7 +1619,7 @@ static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI,
continue;
auto BFICount = NBFI.getBlockProfileCount(&BBI);
CountValue = Func.getBBInfo(&BBI).CountValue;
- BFICountValue = BFICount.getValue();
+ BFICountValue = *BFICount;
SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven);
SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven);
}
@@ -1805,7 +1672,7 @@ static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI,
NonZeroBBNum++;
auto BFICount = NBFI.getBlockProfileCount(&BBI);
if (BFICount)
- BFICountValue = BFICount.getValue();
+ BFICountValue = *BFICount;
if (HotBBOnly) {
bool rawIsHot = CountValue >= HotCountThreshold;
@@ -1929,7 +1796,7 @@ static bool annotateAllFunctions(
auto *BFI = LookupBFI(F);
// Split indirectbr critical edges here before computing the MST rather than
// later in getInstrBB() to avoid invalidating it.
- SplitIndirectBrCriticalEdges(F, BPI, BFI);
+ SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI);
PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS,
InstrumentFuncEntry);
// When AllMinusOnes is true, it means the profile for the function
@@ -2073,25 +1940,6 @@ PreservedAnalyses PGOInstrumentationUse::run(Module &M,
return PreservedAnalyses::none();
}
-bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) {
- if (skipModule(M))
- return false;
-
- auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & {
- return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- };
- auto LookupBPI = [this](Function &F) {
- return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI();
- };
- auto LookupBFI = [this](Function &F) {
- return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
- };
-
- auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
- return annotateAllFunctions(M, ProfileFileName, "", LookupTLI, LookupBPI,
- LookupBFI, PSI, IsCS);
-}
-
static std::string getSimpleNodeName(const BasicBlock *Node) {
if (!Node->getName().empty())
return std::string(Node->getName());
@@ -2117,6 +1965,8 @@ void llvm::setProfMetadata(Module *M, Instruction *TI,
dbgs() << W << " ";
} dbgs() << "\n";);
+ misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false);
+
TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
if (EmitBranchProbability) {
std::string BrCondStr = getBranchCondString(TI);
diff --git a/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp b/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
index d4b78f2c14b0..b11f16894669 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
@@ -20,7 +20,6 @@
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/DomTreeUpdater.h"
-#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/BasicBlock.h"
@@ -29,15 +28,11 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
-#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
-#include "llvm/PassRegistry.h"
#include "llvm/ProfileData/InstrProf.h"
#define INSTR_PROF_VALUE_PROF_MEMOP_API
#include "llvm/ProfileData/InstrProfData.inc"
@@ -46,8 +41,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/WithColor.h"
-#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <cassert>
@@ -63,8 +56,7 @@ STATISTIC(NumOfPGOMemOPAnnotate, "Number of memop intrinsics annotated.");
// The minimum call count to optimize memory intrinsic calls.
static cl::opt<unsigned>
- MemOPCountThreshold("pgo-memop-count-threshold", cl::Hidden, cl::ZeroOrMore,
- cl::init(1000),
+ MemOPCountThreshold("pgo-memop-count-threshold", cl::Hidden, cl::init(1000),
cl::desc("The minimum count to optimize memory "
"intrinsic calls"));
@@ -76,14 +68,13 @@ static cl::opt<bool> DisableMemOPOPT("disable-memop-opt", cl::init(false),
// The percent threshold to optimize memory intrinsic calls.
static cl::opt<unsigned>
MemOPPercentThreshold("pgo-memop-percent-threshold", cl::init(40),
- cl::Hidden, cl::ZeroOrMore,
+ cl::Hidden,
cl::desc("The percentage threshold for the "
"memory intrinsic calls optimization"));
// Maximum number of versions for optimizing memory intrinsic call.
static cl::opt<unsigned>
MemOPMaxVersion("pgo-memop-max-version", cl::init(3), cl::Hidden,
- cl::ZeroOrMore,
cl::desc("The max version for the optimized memory "
" intrinsic calls"));
@@ -103,43 +94,6 @@ static cl::opt<unsigned>
cl::desc("Optimize the memop size <= this value"));
namespace {
-class PGOMemOPSizeOptLegacyPass : public FunctionPass {
-public:
- static char ID;
-
- PGOMemOPSizeOptLegacyPass() : FunctionPass(ID) {
- initializePGOMemOPSizeOptLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- StringRef getPassName() const override { return "PGOMemOPSize"; }
-
-private:
- bool runOnFunction(Function &F) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<BlockFrequencyInfoWrapperPass>();
- AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- }
-};
-} // end anonymous namespace
-
-char PGOMemOPSizeOptLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt",
- "Optimize memory intrinsic using its size value profile",
- false, false)
-INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt",
- "Optimize memory intrinsic using its size value profile",
- false, false)
-
-FunctionPass *llvm::createPGOMemOPSizeOptLegacyPass() {
- return new PGOMemOPSizeOptLegacyPass();
-}
-
-namespace {
static const char *getMIName(const MemIntrinsic *MI) {
switch (MI->getIntrinsicID()) {
@@ -517,20 +471,6 @@ static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI,
return MemOPSizeOpt.isChanged();
}
-bool PGOMemOPSizeOptLegacyPass::runOnFunction(Function &F) {
- BlockFrequencyInfo &BFI =
- getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
- auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
- auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
- TargetLibraryInfo &TLI =
- getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- return PGOMemOPSizeOptImpl(F, BFI, ORE, DT, TLI);
-}
-
-namespace llvm {
-char &PGOMemOPSizeOptID = PGOMemOPSizeOptLegacyPass::ID;
-
PreservedAnalyses PGOMemOPSizeOpt::run(Function &F,
FunctionAnalysisManager &FAM) {
auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
@@ -544,4 +484,3 @@ PreservedAnalyses PGOMemOPSizeOpt::run(Function &F,
PA.preserve<DominatorTreeAnalysis>();
return PA;
}
-} // namespace llvm
diff --git a/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp b/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp
index fc5267261851..0e39fe266369 100644
--- a/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp
@@ -60,15 +60,9 @@
#include "llvm/Transforms/Instrumentation/PoisonChecking.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InstVisitor.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
using namespace llvm;
diff --git a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index d3b60c7add34..d9d11cc90d3d 100644
--- a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -13,30 +13,24 @@
#include "llvm/Transforms/Instrumentation/SanitizerCoverage.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/PostDominators.h"
-#include "llvm/IR/CFG.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/MDBuilder.h"
-#include "llvm/IR/Mangler.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/SpecialCaseList.h"
#include "llvm/Support/VirtualFileSystem.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -247,8 +241,7 @@ private:
Type *Ty);
void SetNoSanitizeMetadata(Instruction *I) {
- I->setMetadata(I->getModule()->getMDKindID("nosanitize"),
- MDNode::get(*C, None));
+ I->setMetadata(LLVMContext::MD_nosanitize, MDNode::get(*C, None));
}
std::string getSectionName(const std::string &Section) const;
@@ -694,7 +687,7 @@ void ModuleSanitizerCoverage::instrumentFunction(
for (auto &Inst : BB) {
if (Options.IndirectCalls) {
CallBase *CB = dyn_cast<CallBase>(&Inst);
- if (CB && !CB->getCalledFunction())
+ if (CB && CB->isIndirectCall())
IndirCalls.push_back(&Inst);
}
if (Options.TraceCmp) {
@@ -996,15 +989,11 @@ void ModuleSanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
// if we aren't splitting the block, it's nice for allocas to be before
// calls.
IP = PrepareToSplitEntryBlock(BB, IP);
- } else {
- EntryLoc = IP->getDebugLoc();
- if (!EntryLoc)
- if (auto *SP = F.getSubprogram())
- EntryLoc = DILocation::get(SP->getContext(), 0, 0, SP);
}
- IRBuilder<> IRB(&*IP);
- IRB.SetCurrentDebugLocation(EntryLoc);
+ InstrumentationIRBuilder IRB(&*IP);
+ if (EntryLoc)
+ IRB.SetCurrentDebugLocation(EntryLoc);
if (Options.TracePC) {
IRB.CreateCall(SanCovTracePC)
->setCannotMerge(); // gets the PC using GET_CALLER_PC.
diff --git a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index 180012198c42..c33b1b3b1a5c 100644
--- a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -38,7 +38,6 @@
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
-#include "llvm/InitializePasses.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -174,19 +173,6 @@ private:
FunctionCallee MemmoveFn, MemcpyFn, MemsetFn;
};
-struct ThreadSanitizerLegacyPass : FunctionPass {
- ThreadSanitizerLegacyPass() : FunctionPass(ID) {
- initializeThreadSanitizerLegacyPassPass(*PassRegistry::getPassRegistry());
- }
- StringRef getPassName() const override;
- void getAnalysisUsage(AnalysisUsage &AU) const override;
- bool runOnFunction(Function &F) override;
- bool doInitialization(Module &M) override;
- static char ID; // Pass identification, replacement for typeid.
-private:
- Optional<ThreadSanitizer> TSan;
-};
-
void insertModuleCtor(Module &M) {
getOrCreateSanitizerCtorAndInitFunctions(
M, kTsanModuleCtorName, kTsanInitName, /*InitArgTypes=*/{},
@@ -195,7 +181,6 @@ void insertModuleCtor(Module &M) {
// time. Hook them into the global ctors list in that case:
[&](Function *Ctor, FunctionCallee) { appendToGlobalCtors(M, Ctor, 0); });
}
-
} // namespace
PreservedAnalyses ThreadSanitizerPass::run(Function &F,
@@ -211,38 +196,6 @@ PreservedAnalyses ModuleThreadSanitizerPass::run(Module &M,
insertModuleCtor(M);
return PreservedAnalyses::none();
}
-
-char ThreadSanitizerLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(ThreadSanitizerLegacyPass, "tsan",
- "ThreadSanitizer: detects data races.", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(ThreadSanitizerLegacyPass, "tsan",
- "ThreadSanitizer: detects data races.", false, false)
-
-StringRef ThreadSanitizerLegacyPass::getPassName() const {
- return "ThreadSanitizerLegacyPass";
-}
-
-void ThreadSanitizerLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<TargetLibraryInfoWrapperPass>();
-}
-
-bool ThreadSanitizerLegacyPass::doInitialization(Module &M) {
- insertModuleCtor(M);
- TSan.emplace();
- return true;
-}
-
-bool ThreadSanitizerLegacyPass::runOnFunction(Function &F) {
- auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- TSan->sanitizeFunction(F, TLI);
- return true;
-}
-
-FunctionPass *llvm::createThreadSanitizerLegacyPassPass() {
- return new ThreadSanitizerLegacyPass();
-}
-
void ThreadSanitizer::initialize(Module &M) {
const DataLayout &DL = M.getDataLayout();
IntptrTy = DL.getIntPtrType(M.getContext());
@@ -527,26 +480,22 @@ void ThreadSanitizer::chooseInstructionsToInstrument(
Local.clear();
}
-static bool isAtomic(Instruction *I) {
+static bool isTsanAtomic(const Instruction *I) {
// TODO: Ask TTI whether synchronization scope is between threads.
- if (LoadInst *LI = dyn_cast<LoadInst>(I))
- return LI->isAtomic() && LI->getSyncScopeID() != SyncScope::SingleThread;
- if (StoreInst *SI = dyn_cast<StoreInst>(I))
- return SI->isAtomic() && SI->getSyncScopeID() != SyncScope::SingleThread;
- if (isa<AtomicRMWInst>(I))
- return true;
- if (isa<AtomicCmpXchgInst>(I))
- return true;
- if (isa<FenceInst>(I))
- return true;
- return false;
+ auto SSID = getAtomicSyncScopeID(I);
+ if (!SSID)
+ return false;
+ if (isa<LoadInst>(I) || isa<StoreInst>(I))
+ return SSID.getValue() != SyncScope::SingleThread;
+ return true;
}
void ThreadSanitizer::InsertRuntimeIgnores(Function &F) {
- IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
+ InstrumentationIRBuilder IRB(F.getEntryBlock().getFirstNonPHI());
IRB.CreateCall(TsanIgnoreBegin);
EscapeEnumerator EE(F, "tsan_ignore_cleanup", ClHandleCxxExceptions);
while (IRBuilder<> *AtExit = EE.Next()) {
+ InstrumentationIRBuilder::ensureDebugInfo(*AtExit, F);
AtExit->CreateCall(TsanIgnoreEnd);
}
}
@@ -581,7 +530,7 @@ bool ThreadSanitizer::sanitizeFunction(Function &F,
// Traverse all instructions, collect loads/stores/returns, check for calls.
for (auto &BB : F) {
for (auto &Inst : BB) {
- if (isAtomic(&Inst))
+ if (isTsanAtomic(&Inst))
AtomicAccesses.push_back(&Inst);
else if (isa<LoadInst>(Inst) || isa<StoreInst>(Inst))
LocalLoadsAndStores.push_back(&Inst);
@@ -629,7 +578,7 @@ bool ThreadSanitizer::sanitizeFunction(Function &F,
// Instrument function entry/exit points if there were instrumented accesses.
if ((Res || HasCalls) && ClInstrumentFuncEntryExit) {
- IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
+ InstrumentationIRBuilder IRB(F.getEntryBlock().getFirstNonPHI());
Value *ReturnAddress = IRB.CreateCall(
Intrinsic::getDeclaration(F.getParent(), Intrinsic::returnaddress),
IRB.getInt32(0));
@@ -637,6 +586,7 @@ bool ThreadSanitizer::sanitizeFunction(Function &F,
EscapeEnumerator EE(F, "tsan_cleanup", ClHandleCxxExceptions);
while (IRBuilder<> *AtExit = EE.Next()) {
+ InstrumentationIRBuilder::ensureDebugInfo(*AtExit, F);
AtExit->CreateCall(TsanFuncExit, {});
}
Res = true;
@@ -646,7 +596,7 @@ bool ThreadSanitizer::sanitizeFunction(Function &F,
bool ThreadSanitizer::instrumentLoadOrStore(const InstructionInfo &II,
const DataLayout &DL) {
- IRBuilder<> IRB(II.Inst);
+ InstrumentationIRBuilder IRB(II.Inst);
const bool IsWrite = isa<StoreInst>(*II.Inst);
Value *Addr = IsWrite ? cast<StoreInst>(II.Inst)->getPointerOperand()
: cast<LoadInst>(II.Inst)->getPointerOperand();
@@ -686,8 +636,8 @@ bool ThreadSanitizer::instrumentLoadOrStore(const InstructionInfo &II,
return true;
}
- const unsigned Alignment = IsWrite ? cast<StoreInst>(II.Inst)->getAlignment()
- : cast<LoadInst>(II.Inst)->getAlignment();
+ const Align Alignment = IsWrite ? cast<StoreInst>(II.Inst)->getAlign()
+ : cast<LoadInst>(II.Inst)->getAlign();
const bool IsCompoundRW =
ClCompoundReadBeforeWrite && (II.Flags & InstructionInfo::kCompoundRW);
const bool IsVolatile = ClDistinguishVolatile &&
@@ -697,7 +647,7 @@ bool ThreadSanitizer::instrumentLoadOrStore(const InstructionInfo &II,
const uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy);
FunctionCallee OnAccessFunc = nullptr;
- if (Alignment == 0 || Alignment >= 8 || (Alignment % (TypeSize / 8)) == 0) {
+ if (Alignment >= Align(8) || (Alignment.value() % (TypeSize / 8)) == 0) {
if (IsCompoundRW)
OnAccessFunc = TsanCompoundRW[Idx];
else if (IsVolatile)
@@ -775,7 +725,7 @@ bool ThreadSanitizer::instrumentMemIntrinsic(Instruction *I) {
// http://www.hpl.hp.com/personal/Hans_Boehm/c++mm/
bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) {
- IRBuilder<> IRB(I);
+ InstrumentationIRBuilder IRB(I);
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
Value *Addr = LI->getPointerOperand();
Type *OrigTy = LI->getType();
diff --git a/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.cpp b/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.cpp
index fb6216bb2177..32633bbc941b 100644
--- a/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.cpp
+++ b/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.cpp
@@ -10,12 +10,9 @@
//
//===----------------------------------------------------------------------===//
+#include "ValueProfileCollector.h"
#include "ValueProfilePlugins.inc"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/InitializePasses.h"
-#include <cassert>
+#include "llvm/ProfileData/InstrProf.h"
using namespace llvm;
diff --git a/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.h b/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.h
index 584a60ab451e..10e5e4d128b1 100644
--- a/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.h
+++ b/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.h
@@ -16,7 +16,6 @@
#ifndef LLVM_ANALYSIS_PROFILE_GEN_ANALYSIS_H
#define LLVM_ANALYSIS_PROFILE_GEN_ANALYSIS_H
-#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/ProfileData/InstrProf.h"
#include <memory>
#include <vector>
@@ -25,6 +24,7 @@ namespace llvm {
class Function;
class Instruction;
+class TargetLibraryInfo;
class Value;
/// Utility analysis that determines what values are worth profiling.
diff --git a/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc b/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc
index 6a2c473a596a..3a129de1acd0 100644
--- a/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc
+++ b/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc
@@ -15,6 +15,7 @@
#include "ValueProfileCollector.h"
#include "llvm/Analysis/IndirectCallVisitor.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/InstVisitor.h"
using namespace llvm;
diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp
index 126845bb3308..70f150c9461a 100644
--- a/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp
+++ b/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp
@@ -16,7 +16,6 @@
#include "llvm-c/Initialization.h"
#include "llvm/Analysis/ObjCARCUtil.h"
#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
#include "llvm/InitializePasses.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARC.h b/llvm/lib/Transforms/ObjCARC/ObjCARC.h
index 62f88a8cc02b..2bc0c8f87d77 100644
--- a/llvm/lib/Transforms/ObjCARC/ObjCARC.h
+++ b/llvm/lib/Transforms/ObjCARC/ObjCARC.h
@@ -22,7 +22,6 @@
#ifndef LLVM_LIB_TRANSFORMS_OBJCARC_OBJCARC_H
#define LLVM_LIB_TRANSFORMS_OBJCARC_OBJCARC_H
-#include "ARCRuntimeEntryPoints.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
#include "llvm/Analysis/ObjCARCUtil.h"
diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
index 210ec60f2f87..03e5fb18d5ac 100644
--- a/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
+++ b/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
@@ -23,11 +23,14 @@
///
//===----------------------------------------------------------------------===//
-#include "ObjCARC.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
+#include "llvm/Analysis/ObjCARCInstKind.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/ObjCARC.h"
diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
index 2985ae004d3c..f64c26ef2bed 100644
--- a/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
+++ b/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -102,11 +102,8 @@ public:
};
class ObjCARCContractLegacyPass : public FunctionPass {
- ObjCARCContract OCARCC;
-
public:
void getAnalysisUsage(AnalysisUsage &AU) const override;
- bool doInitialization(Module &M) override;
bool runOnFunction(Function &F) override;
static char ID;
@@ -737,11 +734,9 @@ Pass *llvm::createObjCARCContractPass() {
return new ObjCARCContractLegacyPass();
}
-bool ObjCARCContractLegacyPass::doInitialization(Module &M) {
- return OCARCC.init(M);
-}
-
bool ObjCARCContractLegacyPass::runOnFunction(Function &F) {
+ ObjCARCContract OCARCC;
+ OCARCC.init(*F.getParent());
auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
return OCARCC.run(F, AA, DT);
diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
index 6b074ac5adab..efcdc51ef5e3 100644
--- a/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
+++ b/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
@@ -22,7 +22,7 @@
///
//===----------------------------------------------------------------------===//
-#include "ObjCARC.h"
+#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
diff --git a/llvm/lib/Transforms/Scalar/ADCE.cpp b/llvm/lib/Transforms/Scalar/ADCE.cpp
index 1cda206a7e14..cdf9de8d78d5 100644
--- a/llvm/lib/Transforms/Scalar/ADCE.cpp
+++ b/llvm/lib/Transforms/Scalar/ADCE.cpp
@@ -35,7 +35,6 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
diff --git a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index e4ec5f266eb8..9571e99dfb19 100644
--- a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -15,8 +15,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/IR/Instructions.h"
-#include "llvm/InitializePasses.h"
#include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
@@ -26,12 +24,11 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/Constant.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
diff --git a/llvm/lib/Transforms/Scalar/AnnotationRemarks.cpp b/llvm/lib/Transforms/Scalar/AnnotationRemarks.cpp
index a5e65ffc45fe..155f47b49357 100644
--- a/llvm/lib/Transforms/Scalar/AnnotationRemarks.cpp
+++ b/llvm/lib/Transforms/Scalar/AnnotationRemarks.cpp
@@ -16,11 +16,8 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/MemoryOpRemark.h"
diff --git a/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp b/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
index 95de59fa8262..cc12033fb677 100644
--- a/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
+++ b/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
@@ -57,6 +57,7 @@
#include "llvm/Transforms/Scalar/CallSiteSplitting.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -65,7 +66,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -123,8 +123,8 @@ static bool isCondRelevantToAnyCallArgument(ICmpInst *Cmp, CallBase &CB) {
return false;
}
-typedef std::pair<ICmpInst *, unsigned> ConditionTy;
-typedef SmallVector<ConditionTy, 2> ConditionsTy;
+using ConditionTy = std::pair<ICmpInst *, unsigned>;
+using ConditionsTy = SmallVector<ConditionTy, 2>;
/// If From has a conditional jump to To, add the condition to Conditions,
/// if it is relevant to any argument at CB.
@@ -301,10 +301,9 @@ static void copyMustTailReturn(BasicBlock *SplitBB, Instruction *CI,
/// Note that in case any arguments at the call-site are constrained by its
/// predecessors, new call-sites with more constrained arguments will be
/// created in createCallSitesOnPredicatedArgument().
-static void splitCallSite(
- CallBase &CB,
- const SmallVectorImpl<std::pair<BasicBlock *, ConditionsTy>> &Preds,
- DomTreeUpdater &DTU) {
+static void splitCallSite(CallBase &CB,
+ ArrayRef<std::pair<BasicBlock *, ConditionsTy>> Preds,
+ DomTreeUpdater &DTU) {
BasicBlock *TailBB = CB.getParent();
bool IsMustTailCall = CB.isMustTailCall();
diff --git a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
index 25e8c3ef3b48..8a1761505d59 100644
--- a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -52,6 +52,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Operator.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index 13963657d183..6dfa2440023f 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -19,15 +19,16 @@
#include "llvm/Analysis/ConstraintSystem.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/DebugCounter.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Transforms/Scalar.h"
#include <string>
@@ -42,48 +43,129 @@ DEBUG_COUNTER(EliminatedCounter, "conds-eliminated",
"Controls which conditions are eliminated");
static int64_t MaxConstraintValue = std::numeric_limits<int64_t>::max();
+static int64_t MinSignedConstraintValue = std::numeric_limits<int64_t>::min();
namespace {
-struct ConstraintTy {
- SmallVector<int64_t, 8> Coefficients;
- ConstraintTy(SmallVector<int64_t, 8> Coefficients)
- : Coefficients(Coefficients) {}
+class ConstraintInfo;
- unsigned size() const { return Coefficients.size(); }
+struct StackEntry {
+ unsigned NumIn;
+ unsigned NumOut;
+ bool IsNot;
+ bool IsSigned = false;
+ /// Variables that can be removed from the system once the stack entry gets
+ /// removed.
+ SmallVector<Value *, 2> ValuesToRelease;
+
+ StackEntry(unsigned NumIn, unsigned NumOut, bool IsNot, bool IsSigned,
+ SmallVector<Value *, 2> ValuesToRelease)
+ : NumIn(NumIn), NumOut(NumOut), IsNot(IsNot), IsSigned(IsSigned),
+ ValuesToRelease(ValuesToRelease) {}
};
-/// Struct to manage a list of constraints.
-struct ConstraintListTy {
- SmallVector<ConstraintTy, 4> Constraints;
+/// Struct to express a pre-condition of the form %Op0 Pred %Op1.
+struct PreconditionTy {
+ CmpInst::Predicate Pred;
+ Value *Op0;
+ Value *Op1;
- ConstraintListTy() {}
+ PreconditionTy(CmpInst::Predicate Pred, Value *Op0, Value *Op1)
+ : Pred(Pred), Op0(Op0), Op1(Op1) {}
+};
- ConstraintListTy(const SmallVector<ConstraintTy, 4> &Constraints)
- : Constraints(Constraints) {}
+struct ConstraintTy {
+ SmallVector<int64_t, 8> Coefficients;
+ SmallVector<PreconditionTy, 2> Preconditions;
- void mergeIn(const ConstraintListTy &Other) {
- append_range(Constraints, Other.Constraints);
- }
+ bool IsSigned = false;
+ bool IsEq = false;
+
+ ConstraintTy() = default;
- unsigned size() const { return Constraints.size(); }
+ ConstraintTy(SmallVector<int64_t, 8> Coefficients, bool IsSigned)
+ : Coefficients(Coefficients), IsSigned(IsSigned) {}
+
+ unsigned size() const { return Coefficients.size(); }
- unsigned empty() const { return Constraints.empty(); }
+ unsigned empty() const { return Coefficients.empty(); }
/// Returns true if any constraint has a non-zero coefficient for any of the
/// newly added indices. Zero coefficients for new indices are removed. If it
/// returns true, no new variable need to be added to the system.
bool needsNewIndices(const DenseMap<Value *, unsigned> &NewIndices) {
- assert(size() == 1);
for (unsigned I = 0; I < NewIndices.size(); ++I) {
- int64_t Last = get(0).Coefficients.pop_back_val();
+ int64_t Last = Coefficients.pop_back_val();
if (Last != 0)
return true;
}
return false;
}
- ConstraintTy &get(unsigned I) { return Constraints[I]; }
+ /// Returns true if all preconditions for this list of constraints are
+ /// satisfied given \p CS and the corresponding \p Value2Index mapping.
+ bool isValid(const ConstraintInfo &Info) const;
+};
+
+/// Wrapper encapsulating separate constraint systems and corresponding value
+/// mappings for both unsigned and signed information. Facts are added to and
+/// conditions are checked against the corresponding system depending on the
+/// signed-ness of their predicates. While the information is kept separate
+/// based on signed-ness, certain conditions can be transferred between the two
+/// systems.
+class ConstraintInfo {
+ DenseMap<Value *, unsigned> UnsignedValue2Index;
+ DenseMap<Value *, unsigned> SignedValue2Index;
+
+ ConstraintSystem UnsignedCS;
+ ConstraintSystem SignedCS;
+
+public:
+ DenseMap<Value *, unsigned> &getValue2Index(bool Signed) {
+ return Signed ? SignedValue2Index : UnsignedValue2Index;
+ }
+ const DenseMap<Value *, unsigned> &getValue2Index(bool Signed) const {
+ return Signed ? SignedValue2Index : UnsignedValue2Index;
+ }
+
+ ConstraintSystem &getCS(bool Signed) {
+ return Signed ? SignedCS : UnsignedCS;
+ }
+ const ConstraintSystem &getCS(bool Signed) const {
+ return Signed ? SignedCS : UnsignedCS;
+ }
+
+ void popLastConstraint(bool Signed) { getCS(Signed).popLastConstraint(); }
+ void popLastNVariables(bool Signed, unsigned N) {
+ getCS(Signed).popLastNVariables(N);
+ }
+
+ bool doesHold(CmpInst::Predicate Pred, Value *A, Value *B) const;
+
+ void addFact(CmpInst::Predicate Pred, Value *A, Value *B, bool IsNegated,
+ unsigned NumIn, unsigned NumOut,
+ SmallVectorImpl<StackEntry> &DFSInStack);
+
+ /// Turn a comparison of the form \p Op0 \p Pred \p Op1 into a vector of
+ /// constraints, using indices from the corresponding constraint system.
+ /// Additional indices for newly discovered values are added to \p NewIndices.
+ ConstraintTy getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1,
+ DenseMap<Value *, unsigned> &NewIndices) const;
+
+ /// Turn a condition \p CmpI into a vector of constraints, using indices from
+ /// the corresponding constraint system. Additional indices for newly
+ /// discovered values are added to \p NewIndices.
+ ConstraintTy getConstraint(CmpInst *Cmp,
+ DenseMap<Value *, unsigned> &NewIndices) const {
+ return getConstraint(Cmp->getPredicate(), Cmp->getOperand(0),
+ Cmp->getOperand(1), NewIndices);
+ }
+
+ /// Try to add information from \p A \p Pred \p B to the unsigned/signed
+ /// system if \p Pred is signed/unsigned.
+ void transferToOtherSystem(CmpInst::Predicate Pred, Value *A, Value *B,
+ bool IsNegated, unsigned NumIn, unsigned NumOut,
+ SmallVectorImpl<StackEntry> &DFSInStack);
};
} // namespace
@@ -92,11 +174,28 @@ struct ConstraintListTy {
// sum of the pairs equals \p V. The first pair is the constant-factor and X
// must be nullptr. If the expression cannot be decomposed, returns an empty
// vector.
-static SmallVector<std::pair<int64_t, Value *>, 4> decompose(Value *V) {
+static SmallVector<std::pair<int64_t, Value *>, 4>
+decompose(Value *V, SmallVector<PreconditionTy, 4> &Preconditions,
+ bool IsSigned) {
+
+ auto CanUseSExt = [](ConstantInt *CI) {
+ const APInt &Val = CI->getValue();
+ return Val.sgt(MinSignedConstraintValue) && Val.slt(MaxConstraintValue);
+ };
+ // Decompose \p V used with a signed predicate.
+ if (IsSigned) {
+ if (auto *CI = dyn_cast<ConstantInt>(V)) {
+ if (CanUseSExt(CI))
+ return {{CI->getSExtValue(), nullptr}};
+ }
+
+ return {{0, nullptr}, {1, V}};
+ }
+
if (auto *CI = dyn_cast<ConstantInt>(V)) {
- if (CI->isNegative() || CI->uge(MaxConstraintValue))
+ if (CI->uge(MaxConstraintValue))
return {};
- return {{CI->getSExtValue(), nullptr}};
+ return {{CI->getZExtValue(), nullptr}};
}
auto *GEP = dyn_cast<GetElementPtrInst>(V);
if (GEP && GEP->getNumOperands() == 2 && GEP->isInBounds()) {
@@ -106,11 +205,13 @@ static SmallVector<std::pair<int64_t, Value *>, 4> decompose(Value *V) {
// If the index is zero-extended, it is guaranteed to be positive.
if (match(GEP->getOperand(GEP->getNumOperands() - 1),
m_ZExt(m_Value(Op0)))) {
- if (match(Op0, m_NUWShl(m_Value(Op1), m_ConstantInt(CI))))
+ if (match(Op0, m_NUWShl(m_Value(Op1), m_ConstantInt(CI))) &&
+ CanUseSExt(CI))
return {{0, nullptr},
{1, GEP->getPointerOperand()},
{std::pow(int64_t(2), CI->getSExtValue()), Op1}};
- if (match(Op0, m_NSWAdd(m_Value(Op1), m_ConstantInt(CI))))
+ if (match(Op0, m_NSWAdd(m_Value(Op1), m_ConstantInt(CI))) &&
+ CanUseSExt(CI))
return {{CI->getSExtValue(), nullptr},
{1, GEP->getPointerOperand()},
{1, Op1}};
@@ -118,17 +219,19 @@ static SmallVector<std::pair<int64_t, Value *>, 4> decompose(Value *V) {
}
if (match(GEP->getOperand(GEP->getNumOperands() - 1), m_ConstantInt(CI)) &&
- !CI->isNegative())
+ !CI->isNegative() && CanUseSExt(CI))
return {{CI->getSExtValue(), nullptr}, {1, GEP->getPointerOperand()}};
SmallVector<std::pair<int64_t, Value *>, 4> Result;
if (match(GEP->getOperand(GEP->getNumOperands() - 1),
- m_NUWShl(m_Value(Op0), m_ConstantInt(CI))))
+ m_NUWShl(m_Value(Op0), m_ConstantInt(CI))) &&
+ CanUseSExt(CI))
Result = {{0, nullptr},
{1, GEP->getPointerOperand()},
{std::pow(int64_t(2), CI->getSExtValue()), Op0}};
else if (match(GEP->getOperand(GEP->getNumOperands() - 1),
- m_NSWAdd(m_Value(Op0), m_ConstantInt(CI))))
+ m_NSWAdd(m_Value(Op0), m_ConstantInt(CI))) &&
+ CanUseSExt(CI))
Result = {{CI->getSExtValue(), nullptr},
{1, GEP->getPointerOperand()},
{1, Op0}};
@@ -136,6 +239,10 @@ static SmallVector<std::pair<int64_t, Value *>, 4> decompose(Value *V) {
Op0 = GEP->getOperand(GEP->getNumOperands() - 1);
Result = {{0, nullptr}, {1, GEP->getPointerOperand()}, {1, Op0}};
}
+ // If Op0 is signed non-negative, the GEP is increasing monotonically and
+ // can be de-composed.
+ Preconditions.emplace_back(CmpInst::ICMP_SGE, Op0,
+ ConstantInt::get(Op0->getType(), 0));
return Result;
}
@@ -145,12 +252,20 @@ static SmallVector<std::pair<int64_t, Value *>, 4> decompose(Value *V) {
Value *Op1;
ConstantInt *CI;
- if (match(V, m_NUWAdd(m_Value(Op0), m_ConstantInt(CI))))
+ if (match(V, m_NUWAdd(m_Value(Op0), m_ConstantInt(CI))) &&
+ !CI->uge(MaxConstraintValue))
+ return {{CI->getZExtValue(), nullptr}, {1, Op0}};
+ if (match(V, m_Add(m_Value(Op0), m_ConstantInt(CI))) && CI->isNegative() &&
+ CanUseSExt(CI)) {
+ Preconditions.emplace_back(
+ CmpInst::ICMP_UGE, Op0,
+ ConstantInt::get(Op0->getType(), CI->getSExtValue() * -1));
return {{CI->getSExtValue(), nullptr}, {1, Op0}};
+ }
if (match(V, m_NUWAdd(m_Value(Op0), m_Value(Op1))))
return {{0, nullptr}, {1, Op0}, {1, Op1}};
- if (match(V, m_NUWSub(m_Value(Op0), m_ConstantInt(CI))))
+ if (match(V, m_NUWSub(m_Value(Op0), m_ConstantInt(CI))) && CanUseSExt(CI))
return {{-1 * CI->getSExtValue(), nullptr}, {1, Op0}};
if (match(V, m_NUWSub(m_Value(Op0), m_Value(Op1))))
return {{0, nullptr}, {1, Op0}, {-1, Op1}};
@@ -158,73 +273,73 @@ static SmallVector<std::pair<int64_t, Value *>, 4> decompose(Value *V) {
return {{0, nullptr}, {1, V}};
}
-/// Turn a condition \p CmpI into a vector of constraints, using indices from \p
-/// Value2Index. Additional indices for newly discovered values are added to \p
-/// NewIndices.
-static ConstraintListTy
-getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1,
- const DenseMap<Value *, unsigned> &Value2Index,
- DenseMap<Value *, unsigned> &NewIndices) {
- int64_t Offset1 = 0;
- int64_t Offset2 = 0;
-
- // First try to look up \p V in Value2Index and NewIndices. Otherwise add a
- // new entry to NewIndices.
- auto GetOrAddIndex = [&Value2Index, &NewIndices](Value *V) -> unsigned {
- auto V2I = Value2Index.find(V);
- if (V2I != Value2Index.end())
- return V2I->second;
- auto NewI = NewIndices.find(V);
- if (NewI != NewIndices.end())
- return NewI->second;
- auto Insert =
- NewIndices.insert({V, Value2Index.size() + NewIndices.size() + 1});
- return Insert.first->second;
- };
-
- if (Pred == CmpInst::ICMP_UGT || Pred == CmpInst::ICMP_UGE)
- return getConstraint(CmpInst::getSwappedPredicate(Pred), Op1, Op0,
- Value2Index, NewIndices);
-
- if (Pred == CmpInst::ICMP_EQ) {
- if (match(Op1, m_Zero()))
- return getConstraint(CmpInst::ICMP_ULE, Op0, Op1, Value2Index,
- NewIndices);
-
- auto A =
- getConstraint(CmpInst::ICMP_UGE, Op0, Op1, Value2Index, NewIndices);
- auto B =
- getConstraint(CmpInst::ICMP_ULE, Op0, Op1, Value2Index, NewIndices);
- A.mergeIn(B);
- return A;
+ConstraintTy
+ConstraintInfo::getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1,
+ DenseMap<Value *, unsigned> &NewIndices) const {
+ bool IsEq = false;
+ // Try to convert Pred to one of ULE/SLT/SLE/SLT.
+ switch (Pred) {
+ case CmpInst::ICMP_UGT:
+ case CmpInst::ICMP_UGE:
+ case CmpInst::ICMP_SGT:
+ case CmpInst::ICMP_SGE: {
+ Pred = CmpInst::getSwappedPredicate(Pred);
+ std::swap(Op0, Op1);
+ break;
}
-
- if (Pred == CmpInst::ICMP_NE && match(Op1, m_Zero())) {
- return getConstraint(CmpInst::ICMP_UGT, Op0, Op1, Value2Index, NewIndices);
+ case CmpInst::ICMP_EQ:
+ if (match(Op1, m_Zero())) {
+ Pred = CmpInst::ICMP_ULE;
+ } else {
+ IsEq = true;
+ Pred = CmpInst::ICMP_ULE;
+ }
+ break;
+ case CmpInst::ICMP_NE:
+ if (!match(Op1, m_Zero()))
+ return {};
+ Pred = CmpInst::getSwappedPredicate(CmpInst::ICMP_UGT);
+ std::swap(Op0, Op1);
+ break;
+ default:
+ break;
}
// Only ULE and ULT predicates are supported at the moment.
- if (Pred != CmpInst::ICMP_ULE && Pred != CmpInst::ICMP_ULT)
+ if (Pred != CmpInst::ICMP_ULE && Pred != CmpInst::ICMP_ULT &&
+ Pred != CmpInst::ICMP_SLE && Pred != CmpInst::ICMP_SLT)
return {};
- auto ADec = decompose(Op0->stripPointerCastsSameRepresentation());
- auto BDec = decompose(Op1->stripPointerCastsSameRepresentation());
+ SmallVector<PreconditionTy, 4> Preconditions;
+ bool IsSigned = CmpInst::isSigned(Pred);
+ auto &Value2Index = getValue2Index(IsSigned);
+ auto ADec = decompose(Op0->stripPointerCastsSameRepresentation(),
+ Preconditions, IsSigned);
+ auto BDec = decompose(Op1->stripPointerCastsSameRepresentation(),
+ Preconditions, IsSigned);
// Skip if decomposing either of the values failed.
if (ADec.empty() || BDec.empty())
return {};
- // Skip trivial constraints without any variables.
- if (ADec.size() == 1 && BDec.size() == 1)
- return {};
-
- Offset1 = ADec[0].first;
- Offset2 = BDec[0].first;
+ int64_t Offset1 = ADec[0].first;
+ int64_t Offset2 = BDec[0].first;
Offset1 *= -1;
// Create iterator ranges that skip the constant-factor.
auto VariablesA = llvm::drop_begin(ADec);
auto VariablesB = llvm::drop_begin(BDec);
+ // First try to look up \p V in Value2Index and NewIndices. Otherwise add a
+ // new entry to NewIndices.
+ auto GetOrAddIndex = [&Value2Index, &NewIndices](Value *V) -> unsigned {
+ auto V2I = Value2Index.find(V);
+ if (V2I != Value2Index.end())
+ return V2I->second;
+ auto Insert =
+ NewIndices.insert({V, Value2Index.size() + NewIndices.size() + 1});
+ return Insert.first->second;
+ };
+
// Make sure all variables have entries in Value2Index or NewIndices.
for (const auto &KV :
concat<std::pair<int64_t, Value *>>(VariablesA, VariablesB))
@@ -232,22 +347,85 @@ getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1,
// Build result constraint, by first adding all coefficients from A and then
// subtracting all coefficients from B.
- SmallVector<int64_t, 8> R(Value2Index.size() + NewIndices.size() + 1, 0);
+ ConstraintTy Res(
+ SmallVector<int64_t, 8>(Value2Index.size() + NewIndices.size() + 1, 0),
+ IsSigned);
+ Res.IsEq = IsEq;
+ auto &R = Res.Coefficients;
for (const auto &KV : VariablesA)
R[GetOrAddIndex(KV.second)] += KV.first;
for (const auto &KV : VariablesB)
R[GetOrAddIndex(KV.second)] -= KV.first;
- R[0] = Offset1 + Offset2 + (Pred == CmpInst::ICMP_ULT ? -1 : 0);
- return {{R}};
+ int64_t OffsetSum;
+ if (AddOverflow(Offset1, Offset2, OffsetSum))
+ return {};
+ if (Pred == (IsSigned ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT))
+ if (AddOverflow(OffsetSum, int64_t(-1), OffsetSum))
+ return {};
+ R[0] = OffsetSum;
+ Res.Preconditions = std::move(Preconditions);
+ return Res;
+}
+
+bool ConstraintTy::isValid(const ConstraintInfo &Info) const {
+ return Coefficients.size() > 0 &&
+ all_of(Preconditions, [&Info](const PreconditionTy &C) {
+ return Info.doesHold(C.Pred, C.Op0, C.Op1);
+ });
+}
+
+bool ConstraintInfo::doesHold(CmpInst::Predicate Pred, Value *A,
+ Value *B) const {
+ DenseMap<Value *, unsigned> NewIndices;
+ auto R = getConstraint(Pred, A, B, NewIndices);
+
+ if (!NewIndices.empty())
+ return false;
+
+ // TODO: properly check NewIndices.
+ return NewIndices.empty() && R.Preconditions.empty() && !R.IsEq &&
+ !R.empty() &&
+ getCS(CmpInst::isSigned(Pred)).isConditionImplied(R.Coefficients);
}
-static ConstraintListTy
-getConstraint(CmpInst *Cmp, const DenseMap<Value *, unsigned> &Value2Index,
- DenseMap<Value *, unsigned> &NewIndices) {
- return getConstraint(Cmp->getPredicate(), Cmp->getOperand(0),
- Cmp->getOperand(1), Value2Index, NewIndices);
+void ConstraintInfo::transferToOtherSystem(
+ CmpInst::Predicate Pred, Value *A, Value *B, bool IsNegated, unsigned NumIn,
+ unsigned NumOut, SmallVectorImpl<StackEntry> &DFSInStack) {
+ // Check if we can combine facts from the signed and unsigned systems to
+ // derive additional facts.
+ if (!A->getType()->isIntegerTy())
+ return;
+ // FIXME: This currently depends on the order we add facts. Ideally we
+ // would first add all known facts and only then try to add additional
+ // facts.
+ switch (Pred) {
+ default:
+ break;
+ case CmpInst::ICMP_ULT:
+ // If B is a signed positive constant, A >=s 0 and A <s B.
+ if (doesHold(CmpInst::ICMP_SGE, B, ConstantInt::get(B->getType(), 0))) {
+ addFact(CmpInst::ICMP_SGE, A, ConstantInt::get(B->getType(), 0),
+ IsNegated, NumIn, NumOut, DFSInStack);
+ addFact(CmpInst::ICMP_SLT, A, B, IsNegated, NumIn, NumOut, DFSInStack);
+ }
+ break;
+ case CmpInst::ICMP_SLT:
+ if (doesHold(CmpInst::ICMP_SGE, A, ConstantInt::get(B->getType(), 0)))
+ addFact(CmpInst::ICMP_ULT, A, B, IsNegated, NumIn, NumOut, DFSInStack);
+ break;
+ case CmpInst::ICMP_SGT:
+ if (doesHold(CmpInst::ICMP_SGE, B, ConstantInt::get(B->getType(), -1)))
+ addFact(CmpInst::ICMP_UGE, A, ConstantInt::get(B->getType(), 0),
+ IsNegated, NumIn, NumOut, DFSInStack);
+ break;
+ case CmpInst::ICMP_SGE:
+ if (doesHold(CmpInst::ICMP_SGE, B, ConstantInt::get(B->getType(), 0))) {
+ addFact(CmpInst::ICMP_UGE, A, B, IsNegated, NumIn, NumOut, DFSInStack);
+ }
+ break;
+ }
}
namespace {
@@ -271,134 +449,253 @@ struct ConstraintOrBlock {
Not(Not), Condition(Condition) {}
};
-struct StackEntry {
- unsigned NumIn;
- unsigned NumOut;
- CmpInst *Condition;
- bool IsNot;
+/// Keep state required to build worklist.
+struct State {
+ DominatorTree &DT;
+ SmallVector<ConstraintOrBlock, 64> WorkList;
- StackEntry(unsigned NumIn, unsigned NumOut, CmpInst *Condition, bool IsNot)
- : NumIn(NumIn), NumOut(NumOut), Condition(Condition), IsNot(IsNot) {}
+ State(DominatorTree &DT) : DT(DT) {}
+
+ /// Process block \p BB and add known facts to work-list.
+ void addInfoFor(BasicBlock &BB);
+
+ /// Returns true if we can add a known condition from BB to its successor
+ /// block Succ. Each predecessor of Succ can either be BB or be dominated
+ /// by Succ (e.g. the case when adding a condition from a pre-header to a
+ /// loop header).
+ bool canAddSuccessor(BasicBlock &BB, BasicBlock *Succ) const {
+ if (BB.getSingleSuccessor()) {
+ assert(BB.getSingleSuccessor() == Succ);
+ return DT.properlyDominates(&BB, Succ);
+ }
+ return any_of(successors(&BB),
+ [Succ](const BasicBlock *S) { return S != Succ; }) &&
+ all_of(predecessors(Succ), [&BB, Succ, this](BasicBlock *Pred) {
+ return Pred == &BB || DT.dominates(Succ, Pred);
+ });
+ }
};
+
} // namespace
#ifndef NDEBUG
-static void dumpWithNames(ConstraintTy &C,
+static void dumpWithNames(const ConstraintSystem &CS,
DenseMap<Value *, unsigned> &Value2Index) {
SmallVector<std::string> Names(Value2Index.size(), "");
for (auto &KV : Value2Index) {
Names[KV.second - 1] = std::string("%") + KV.first->getName().str();
}
- ConstraintSystem CS;
- CS.addVariableRowFill(C.Coefficients);
CS.dump(Names);
}
-#endif
-static bool eliminateConstraints(Function &F, DominatorTree &DT) {
- bool Changed = false;
- DT.updateDFSNumbers();
+static void dumpWithNames(ArrayRef<int64_t> C,
+ DenseMap<Value *, unsigned> &Value2Index) {
ConstraintSystem CS;
+ CS.addVariableRowFill(C);
+ dumpWithNames(CS, Value2Index);
+}
+#endif
- SmallVector<ConstraintOrBlock, 64> WorkList;
-
- // First, collect conditions implied by branches and blocks with their
- // Dominator DFS in and out numbers.
- for (BasicBlock &BB : F) {
- if (!DT.getNode(&BB))
- continue;
- WorkList.emplace_back(DT.getNode(&BB));
-
- // True as long as long as the current instruction is guaranteed to execute.
- bool GuaranteedToExecute = true;
- // Scan BB for assume calls.
- // TODO: also use this scan to queue conditions to simplify, so we can
- // interleave facts from assumes and conditions to simplify in a single
- // basic block. And to skip another traversal of each basic block when
- // simplifying.
- for (Instruction &I : BB) {
- Value *Cond;
- // For now, just handle assumes with a single compare as condition.
- if (match(&I, m_Intrinsic<Intrinsic::assume>(m_Value(Cond))) &&
- isa<CmpInst>(Cond)) {
- if (GuaranteedToExecute) {
- // The assume is guaranteed to execute when BB is entered, hence Cond
- // holds on entry to BB.
- WorkList.emplace_back(DT.getNode(&BB), cast<CmpInst>(Cond), false);
- } else {
- // Otherwise the condition only holds in the successors.
- for (BasicBlock *Succ : successors(&BB))
- WorkList.emplace_back(DT.getNode(Succ), cast<CmpInst>(Cond), false);
+void State::addInfoFor(BasicBlock &BB) {
+ WorkList.emplace_back(DT.getNode(&BB));
+
+ // True as long as long as the current instruction is guaranteed to execute.
+ bool GuaranteedToExecute = true;
+ // Scan BB for assume calls.
+ // TODO: also use this scan to queue conditions to simplify, so we can
+ // interleave facts from assumes and conditions to simplify in a single
+ // basic block. And to skip another traversal of each basic block when
+ // simplifying.
+ for (Instruction &I : BB) {
+ Value *Cond;
+ // For now, just handle assumes with a single compare as condition.
+ if (match(&I, m_Intrinsic<Intrinsic::assume>(m_Value(Cond))) &&
+ isa<ICmpInst>(Cond)) {
+ if (GuaranteedToExecute) {
+ // The assume is guaranteed to execute when BB is entered, hence Cond
+ // holds on entry to BB.
+ WorkList.emplace_back(DT.getNode(&BB), cast<ICmpInst>(Cond), false);
+ } else {
+ // Otherwise the condition only holds in the successors.
+ for (BasicBlock *Succ : successors(&BB)) {
+ if (!canAddSuccessor(BB, Succ))
+ continue;
+ WorkList.emplace_back(DT.getNode(Succ), cast<ICmpInst>(Cond), false);
}
}
- GuaranteedToExecute &= isGuaranteedToTransferExecutionToSuccessor(&I);
}
+ GuaranteedToExecute &= isGuaranteedToTransferExecutionToSuccessor(&I);
+ }
- auto *Br = dyn_cast<BranchInst>(BB.getTerminator());
- if (!Br || !Br->isConditional())
- continue;
+ auto *Br = dyn_cast<BranchInst>(BB.getTerminator());
+ if (!Br || !Br->isConditional())
+ return;
+
+ // If the condition is an OR of 2 compares and the false successor only has
+ // the current block as predecessor, queue both negated conditions for the
+ // false successor.
+ Value *Op0, *Op1;
+ if (match(Br->getCondition(), m_LogicalOr(m_Value(Op0), m_Value(Op1))) &&
+ isa<ICmpInst>(Op0) && isa<ICmpInst>(Op1)) {
+ BasicBlock *FalseSuccessor = Br->getSuccessor(1);
+ if (canAddSuccessor(BB, FalseSuccessor)) {
+ WorkList.emplace_back(DT.getNode(FalseSuccessor), cast<ICmpInst>(Op0),
+ true);
+ WorkList.emplace_back(DT.getNode(FalseSuccessor), cast<ICmpInst>(Op1),
+ true);
+ }
+ return;
+ }
- // Returns true if we can add a known condition from BB to its successor
- // block Succ. Each predecessor of Succ can either be BB or be dominated by
- // Succ (e.g. the case when adding a condition from a pre-header to a loop
- // header).
- auto CanAdd = [&BB, &DT](BasicBlock *Succ) {
- return all_of(predecessors(Succ), [&BB, &DT, Succ](BasicBlock *Pred) {
- return Pred == &BB || DT.dominates(Succ, Pred);
- });
- };
- // If the condition is an OR of 2 compares and the false successor only has
- // the current block as predecessor, queue both negated conditions for the
- // false successor.
- Value *Op0, *Op1;
- if (match(Br->getCondition(), m_LogicalOr(m_Value(Op0), m_Value(Op1))) &&
- match(Op0, m_Cmp()) && match(Op1, m_Cmp())) {
- BasicBlock *FalseSuccessor = Br->getSuccessor(1);
- if (CanAdd(FalseSuccessor)) {
- WorkList.emplace_back(DT.getNode(FalseSuccessor), cast<CmpInst>(Op0),
- true);
- WorkList.emplace_back(DT.getNode(FalseSuccessor), cast<CmpInst>(Op1),
- true);
- }
- continue;
+ // If the condition is an AND of 2 compares and the true successor only has
+ // the current block as predecessor, queue both conditions for the true
+ // successor.
+ if (match(Br->getCondition(), m_LogicalAnd(m_Value(Op0), m_Value(Op1))) &&
+ isa<ICmpInst>(Op0) && isa<ICmpInst>(Op1)) {
+ BasicBlock *TrueSuccessor = Br->getSuccessor(0);
+ if (canAddSuccessor(BB, TrueSuccessor)) {
+ WorkList.emplace_back(DT.getNode(TrueSuccessor), cast<ICmpInst>(Op0),
+ false);
+ WorkList.emplace_back(DT.getNode(TrueSuccessor), cast<ICmpInst>(Op1),
+ false);
}
+ return;
+ }
- // If the condition is an AND of 2 compares and the true successor only has
- // the current block as predecessor, queue both conditions for the true
- // successor.
- if (match(Br->getCondition(), m_LogicalAnd(m_Value(Op0), m_Value(Op1))) &&
- match(Op0, m_Cmp()) && match(Op1, m_Cmp())) {
- BasicBlock *TrueSuccessor = Br->getSuccessor(0);
- if (CanAdd(TrueSuccessor)) {
- WorkList.emplace_back(DT.getNode(TrueSuccessor), cast<CmpInst>(Op0),
- false);
- WorkList.emplace_back(DT.getNode(TrueSuccessor), cast<CmpInst>(Op1),
- false);
+ auto *CmpI = dyn_cast<ICmpInst>(Br->getCondition());
+ if (!CmpI)
+ return;
+ if (canAddSuccessor(BB, Br->getSuccessor(0)))
+ WorkList.emplace_back(DT.getNode(Br->getSuccessor(0)), CmpI, false);
+ if (canAddSuccessor(BB, Br->getSuccessor(1)))
+ WorkList.emplace_back(DT.getNode(Br->getSuccessor(1)), CmpI, true);
+}
+
+void ConstraintInfo::addFact(CmpInst::Predicate Pred, Value *A, Value *B,
+ bool IsNegated, unsigned NumIn, unsigned NumOut,
+ SmallVectorImpl<StackEntry> &DFSInStack) {
+ // If the constraint has a pre-condition, skip the constraint if it does not
+ // hold.
+ DenseMap<Value *, unsigned> NewIndices;
+ auto R = getConstraint(Pred, A, B, NewIndices);
+ if (!R.isValid(*this))
+ return;
+
+ //LLVM_DEBUG(dbgs() << "Adding " << *Condition << " " << IsNegated << "\n");
+ bool Added = false;
+ assert(CmpInst::isSigned(Pred) == R.IsSigned &&
+ "condition and constraint signs must match");
+ auto &CSToUse = getCS(R.IsSigned);
+ if (R.Coefficients.empty())
+ return;
+
+ Added |= CSToUse.addVariableRowFill(R.Coefficients);
+
+ // If R has been added to the system, queue it for removal once it goes
+ // out-of-scope.
+ if (Added) {
+ SmallVector<Value *, 2> ValuesToRelease;
+ for (auto &KV : NewIndices) {
+ getValue2Index(R.IsSigned).insert(KV);
+ ValuesToRelease.push_back(KV.first);
+ }
+
+ LLVM_DEBUG({
+ dbgs() << " constraint: ";
+ dumpWithNames(R.Coefficients, getValue2Index(R.IsSigned));
+ });
+
+ DFSInStack.emplace_back(NumIn, NumOut, IsNegated, R.IsSigned,
+ ValuesToRelease);
+
+ if (R.IsEq) {
+ // Also add the inverted constraint for equality constraints.
+ for (auto &Coeff : R.Coefficients)
+ Coeff *= -1;
+ CSToUse.addVariableRowFill(R.Coefficients);
+
+ DFSInStack.emplace_back(NumIn, NumOut, IsNegated, R.IsSigned,
+ SmallVector<Value *, 2>());
+ }
+ }
+}
+
+static void
+tryToSimplifyOverflowMath(IntrinsicInst *II, ConstraintInfo &Info,
+ SmallVectorImpl<Instruction *> &ToRemove) {
+ auto DoesConditionHold = [](CmpInst::Predicate Pred, Value *A, Value *B,
+ ConstraintInfo &Info) {
+ DenseMap<Value *, unsigned> NewIndices;
+ auto R = Info.getConstraint(Pred, A, B, NewIndices);
+ if (R.size() < 2 || R.needsNewIndices(NewIndices) || !R.isValid(Info))
+ return false;
+
+ auto &CSToUse = Info.getCS(CmpInst::isSigned(Pred));
+ return CSToUse.isConditionImplied(R.Coefficients);
+ };
+
+ if (II->getIntrinsicID() == Intrinsic::ssub_with_overflow) {
+ // If A s>= B && B s>= 0, ssub.with.overflow(a, b) should not overflow and
+ // can be simplified to a regular sub.
+ Value *A = II->getArgOperand(0);
+ Value *B = II->getArgOperand(1);
+ if (!DoesConditionHold(CmpInst::ICMP_SGE, A, B, Info) ||
+ !DoesConditionHold(CmpInst::ICMP_SGE, B,
+ ConstantInt::get(A->getType(), 0), Info))
+ return;
+
+ IRBuilder<> Builder(II->getParent(), II->getIterator());
+ Value *Sub = nullptr;
+ for (User *U : make_early_inc_range(II->users())) {
+ if (match(U, m_ExtractValue<0>(m_Value()))) {
+ if (!Sub)
+ Sub = Builder.CreateSub(A, B);
+ U->replaceAllUsesWith(Sub);
+ } else if (match(U, m_ExtractValue<1>(m_Value())))
+ U->replaceAllUsesWith(Builder.getFalse());
+ else
+ continue;
+
+ if (U->use_empty()) {
+ auto *I = cast<Instruction>(U);
+ ToRemove.push_back(I);
+ I->setOperand(0, PoisonValue::get(II->getType()));
}
- continue;
}
- auto *CmpI = dyn_cast<CmpInst>(Br->getCondition());
- if (!CmpI)
+ if (II->use_empty())
+ II->eraseFromParent();
+ }
+}
+
+static bool eliminateConstraints(Function &F, DominatorTree &DT) {
+ bool Changed = false;
+ DT.updateDFSNumbers();
+
+ ConstraintInfo Info;
+ State S(DT);
+
+ // First, collect conditions implied by branches and blocks with their
+ // Dominator DFS in and out numbers.
+ for (BasicBlock &BB : F) {
+ if (!DT.getNode(&BB))
continue;
- if (CanAdd(Br->getSuccessor(0)))
- WorkList.emplace_back(DT.getNode(Br->getSuccessor(0)), CmpI, false);
- if (CanAdd(Br->getSuccessor(1)))
- WorkList.emplace_back(DT.getNode(Br->getSuccessor(1)), CmpI, true);
+ S.addInfoFor(BB);
}
// Next, sort worklist by dominance, so that dominating blocks and conditions
// come before blocks and conditions dominated by them. If a block and a
// condition have the same numbers, the condition comes before the block, as
// it holds on entry to the block.
- sort(WorkList, [](const ConstraintOrBlock &A, const ConstraintOrBlock &B) {
+ stable_sort(S.WorkList, [](const ConstraintOrBlock &A, const ConstraintOrBlock &B) {
return std::tie(A.NumIn, A.IsBlock) < std::tie(B.NumIn, B.IsBlock);
});
+ SmallVector<Instruction *> ToRemove;
+
// Finally, process ordered worklist and eliminate implied conditions.
SmallVector<StackEntry, 16> DFSInStack;
- DenseMap<Value *, unsigned> Value2Index;
- for (ConstraintOrBlock &CB : WorkList) {
+ for (ConstraintOrBlock &CB : S.WorkList) {
// First, pop entries from the stack that are out-of-scope for CB. Remove
// the corresponding entry from the constraint system.
while (!DFSInStack.empty()) {
@@ -409,10 +706,20 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT) {
assert(E.NumIn <= CB.NumIn);
if (CB.NumOut <= E.NumOut)
break;
- LLVM_DEBUG(dbgs() << "Removing " << *E.Condition << " " << E.IsNot
- << "\n");
+ LLVM_DEBUG({
+ dbgs() << "Removing ";
+ dumpWithNames(Info.getCS(E.IsSigned).getLastConstraint(),
+ Info.getValue2Index(E.IsSigned));
+ dbgs() << "\n";
+ });
+
+ Info.popLastConstraint(E.IsSigned);
+ // Remove variables in the system that went out of scope.
+ auto &Mapping = Info.getValue2Index(E.IsSigned);
+ for (Value *V : E.ValuesToRelease)
+ Mapping.erase(V);
+ Info.popLastNVariables(E.IsSigned, E.ValuesToRelease.size());
DFSInStack.pop_back();
- CS.popLastConstraint();
}
LLVM_DEBUG({
@@ -427,28 +734,30 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT) {
// For a block, check if any CmpInsts become known based on the current set
// of constraints.
if (CB.IsBlock) {
- for (Instruction &I : *CB.BB) {
- auto *Cmp = dyn_cast<CmpInst>(&I);
+ for (Instruction &I : make_early_inc_range(*CB.BB)) {
+ if (auto *II = dyn_cast<WithOverflowInst>(&I)) {
+ tryToSimplifyOverflowMath(II, Info, ToRemove);
+ continue;
+ }
+ auto *Cmp = dyn_cast<ICmpInst>(&I);
if (!Cmp)
continue;
DenseMap<Value *, unsigned> NewIndices;
- auto R = getConstraint(Cmp, Value2Index, NewIndices);
- if (R.size() != 1)
- continue;
-
- if (R.needsNewIndices(NewIndices))
+ auto R = Info.getConstraint(Cmp, NewIndices);
+ if (R.IsEq || R.empty() || R.needsNewIndices(NewIndices) ||
+ !R.isValid(Info))
continue;
- if (CS.isConditionImplied(R.get(0).Coefficients)) {
+ auto &CSToUse = Info.getCS(R.IsSigned);
+ if (CSToUse.isConditionImplied(R.Coefficients)) {
if (!DebugCounter::shouldExecute(EliminatedCounter))
continue;
- LLVM_DEBUG(dbgs() << "Condition " << *Cmp
- << " implied by dominating constraints\n");
LLVM_DEBUG({
- for (auto &E : reverse(DFSInStack))
- dbgs() << " C " << *E.Condition << " " << E.IsNot << "\n";
+ dbgs() << "Condition " << *Cmp
+ << " implied by dominating constraints\n";
+ dumpWithNames(CSToUse, Info.getValue2Index(R.IsSigned));
});
Cmp->replaceUsesWithIf(
ConstantInt::getTrue(F.getParent()->getContext()), [](Use &U) {
@@ -460,16 +769,15 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT) {
NumCondsRemoved++;
Changed = true;
}
- if (CS.isConditionImplied(
- ConstraintSystem::negate(R.get(0).Coefficients))) {
+ if (CSToUse.isConditionImplied(
+ ConstraintSystem::negate(R.Coefficients))) {
if (!DebugCounter::shouldExecute(EliminatedCounter))
continue;
- LLVM_DEBUG(dbgs() << "Condition !" << *Cmp
- << " implied by dominating constraints\n");
LLVM_DEBUG({
- for (auto &E : reverse(DFSInStack))
- dbgs() << " C " << *E.Condition << " " << E.IsNot << "\n";
+ dbgs() << "Condition !" << *Cmp
+ << " implied by dominating constraints\n";
+ dumpWithNames(CSToUse, Info.getValue2Index(R.IsSigned));
});
Cmp->replaceAllUsesWith(
ConstantInt::getFalse(F.getParent()->getContext()));
@@ -482,7 +790,7 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT) {
// Set up a function to restore the predicate at the end of the scope if it
// has been negated. Negate the predicate in-place, if required.
- auto *CI = dyn_cast<CmpInst>(CB.Condition);
+ auto *CI = dyn_cast<ICmpInst>(CB.Condition);
auto PredicateRestorer = make_scope_exit([CI, &CB]() {
if (CB.Not && CI)
CI->setPredicate(CI->getInversePredicate());
@@ -496,34 +804,28 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT) {
}
}
- // Otherwise, add the condition to the system and stack, if we can transform
- // it into a constraint.
- DenseMap<Value *, unsigned> NewIndices;
- auto R = getConstraint(CB.Condition, Value2Index, NewIndices);
- if (R.empty())
- continue;
-
- for (auto &KV : NewIndices)
- Value2Index.insert(KV);
-
- LLVM_DEBUG(dbgs() << "Adding " << *CB.Condition << " " << CB.Not << "\n");
- bool Added = false;
- for (auto &C : R.Constraints) {
- auto Coeffs = C.Coefficients;
- LLVM_DEBUG({
- dbgs() << " constraint: ";
- dumpWithNames(C, Value2Index);
- });
- Added |= CS.addVariableRowFill(Coeffs);
- // If R has been added to the system, queue it for removal once it goes
- // out-of-scope.
- if (Added)
- DFSInStack.emplace_back(CB.NumIn, CB.NumOut, CB.Condition, CB.Not);
+ ICmpInst::Predicate Pred;
+ Value *A, *B;
+ if (match(CB.Condition, m_ICmp(Pred, m_Value(A), m_Value(B)))) {
+ // Otherwise, add the condition to the system and stack, if we can
+ // transform it into a constraint.
+ Info.addFact(Pred, A, B, CB.Not, CB.NumIn, CB.NumOut, DFSInStack);
+ Info.transferToOtherSystem(Pred, A, B, CB.Not, CB.NumIn, CB.NumOut,
+ DFSInStack);
}
}
- assert(CS.size() == DFSInStack.size() &&
+#ifndef NDEBUG
+ unsigned SignedEntries =
+ count_if(DFSInStack, [](const StackEntry &E) { return E.IsSigned; });
+ assert(Info.getCS(false).size() == DFSInStack.size() - SignedEntries &&
+ "updates to CS and DFSInStack are out of sync");
+ assert(Info.getCS(true).size() == SignedEntries &&
"updates to CS and DFSInStack are out of sync");
+#endif
+
+ for (Instruction *I : ToRemove)
+ I->eraseFromParent();
return Changed;
}
diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index a3fd97079b1d..64bd4241f37c 100644
--- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -41,8 +41,6 @@
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
@@ -215,6 +213,53 @@ static bool simplifyCommonValuePhi(PHINode *P, LazyValueInfo *LVI,
return true;
}
+static Value *getValueOnEdge(LazyValueInfo *LVI, Value *Incoming,
+ BasicBlock *From, BasicBlock *To,
+ Instruction *CxtI) {
+ if (Constant *C = LVI->getConstantOnEdge(Incoming, From, To, CxtI))
+ return C;
+
+ // Look if the incoming value is a select with a scalar condition for which
+ // LVI can tells us the value. In that case replace the incoming value with
+ // the appropriate value of the select. This often allows us to remove the
+ // select later.
+ auto *SI = dyn_cast<SelectInst>(Incoming);
+ if (!SI)
+ return nullptr;
+
+ // Once LVI learns to handle vector types, we could also add support
+ // for vector type constants that are not all zeroes or all ones.
+ Value *Condition = SI->getCondition();
+ if (!Condition->getType()->isVectorTy()) {
+ if (Constant *C = LVI->getConstantOnEdge(Condition, From, To, CxtI)) {
+ if (C->isOneValue())
+ return SI->getTrueValue();
+ if (C->isZeroValue())
+ return SI->getFalseValue();
+ }
+ }
+
+ // Look if the select has a constant but LVI tells us that the incoming
+ // value can never be that constant. In that case replace the incoming
+ // value with the other value of the select. This often allows us to
+ // remove the select later.
+
+ // The "false" case
+ if (auto *C = dyn_cast<Constant>(SI->getFalseValue()))
+ if (LVI->getPredicateOnEdge(ICmpInst::ICMP_EQ, SI, C, From, To, CxtI) ==
+ LazyValueInfo::False)
+ return SI->getTrueValue();
+
+ // The "true" case,
+ // similar to the select "false" case, but try the select "true" value
+ if (auto *C = dyn_cast<Constant>(SI->getTrueValue()))
+ if (LVI->getPredicateOnEdge(ICmpInst::ICMP_EQ, SI, C, From, To, CxtI) ==
+ LazyValueInfo::False)
+ return SI->getFalseValue();
+
+ return nullptr;
+}
+
static bool processPHI(PHINode *P, LazyValueInfo *LVI, DominatorTree *DT,
const SimplifyQuery &SQ) {
bool Changed = false;
@@ -224,53 +269,14 @@ static bool processPHI(PHINode *P, LazyValueInfo *LVI, DominatorTree *DT,
Value *Incoming = P->getIncomingValue(i);
if (isa<Constant>(Incoming)) continue;
- Value *V = LVI->getConstantOnEdge(Incoming, P->getIncomingBlock(i), BB, P);
-
- // Look if the incoming value is a select with a scalar condition for which
- // LVI can tells us the value. In that case replace the incoming value with
- // the appropriate value of the select. This often allows us to remove the
- // select later.
- if (!V) {
- SelectInst *SI = dyn_cast<SelectInst>(Incoming);
- if (!SI) continue;
-
- Value *Condition = SI->getCondition();
- if (!Condition->getType()->isVectorTy()) {
- if (Constant *C = LVI->getConstantOnEdge(
- Condition, P->getIncomingBlock(i), BB, P)) {
- if (C->isOneValue()) {
- V = SI->getTrueValue();
- } else if (C->isZeroValue()) {
- V = SI->getFalseValue();
- }
- // Once LVI learns to handle vector types, we could also add support
- // for vector type constants that are not all zeroes or all ones.
- }
- }
-
- // Look if the select has a constant but LVI tells us that the incoming
- // value can never be that constant. In that case replace the incoming
- // value with the other value of the select. This often allows us to
- // remove the select later.
- if (!V) {
- Constant *C = dyn_cast<Constant>(SI->getFalseValue());
- if (!C) continue;
-
- if (LVI->getPredicateOnEdge(ICmpInst::ICMP_EQ, SI, C,
- P->getIncomingBlock(i), BB, P) !=
- LazyValueInfo::False)
- continue;
- V = SI->getTrueValue();
- }
-
- LLVM_DEBUG(dbgs() << "CVP: Threading PHI over " << *SI << '\n');
+ Value *V = getValueOnEdge(LVI, Incoming, P->getIncomingBlock(i), BB, P);
+ if (V) {
+ P->setIncomingValue(i, V);
+ Changed = true;
}
-
- P->setIncomingValue(i, V);
- Changed = true;
}
- if (Value *V = SimplifyInstruction(P, SQ)) {
+ if (Value *V = simplifyInstruction(P, SQ)) {
P->replaceAllUsesWith(V);
P->eraseFromParent();
Changed = true;
@@ -575,7 +581,7 @@ static bool processOverflowIntrinsic(WithOverflowInst *WO, LazyValueInfo *LVI) {
StructType *ST = cast<StructType>(WO->getType());
Constant *Struct = ConstantStruct::get(ST,
- { UndefValue::get(ST->getElementType(0)),
+ { PoisonValue::get(ST->getElementType(0)),
ConstantInt::getFalse(ST->getElementType(1)) });
Value *NewI = B.CreateInsertValue(Struct, NewOp, 0);
WO->replaceAllUsesWith(NewI);
@@ -735,8 +741,7 @@ static bool narrowSDivOrSRem(BinaryOperator *Instr, LazyValueInfo *LVI) {
// sdiv/srem is UB if divisor is -1 and divident is INT_MIN, so unless we can
// prove that such a combination is impossible, we need to bump the bitwidth.
if (CRs[1]->contains(APInt::getAllOnes(OrigWidth)) &&
- CRs[0]->contains(
- APInt::getSignedMinValue(MinSignedBits).sextOrSelf(OrigWidth)))
+ CRs[0]->contains(APInt::getSignedMinValue(MinSignedBits).sext(OrigWidth)))
++MinSignedBits;
// Don't shrink below 8 bits wide.
@@ -955,7 +960,8 @@ static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) {
++NumAShrsConverted;
auto *BO = BinaryOperator::CreateLShr(SDI->getOperand(0), SDI->getOperand(1),
- SDI->getName(), SDI);
+ "", SDI);
+ BO->takeName(SDI);
BO->setDebugLoc(SDI->getDebugLoc());
BO->setIsExact(SDI->isExact());
SDI->replaceAllUsesWith(BO);
@@ -974,8 +980,8 @@ static bool processSExt(SExtInst *SDI, LazyValueInfo *LVI) {
return false;
++NumSExt;
- auto *ZExt =
- CastInst::CreateZExtOrBitCast(Base, SDI->getType(), SDI->getName(), SDI);
+ auto *ZExt = CastInst::CreateZExtOrBitCast(Base, SDI->getType(), "", SDI);
+ ZExt->takeName(SDI);
ZExt->setDebugLoc(SDI->getDebugLoc());
SDI->replaceAllUsesWith(ZExt);
SDI->eraseFromParent();
diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
index 143a78f604fc..5667eefabad5 100644
--- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
@@ -60,30 +60,31 @@
#include "llvm/Transforms/Scalar/DFAJumpThreading.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
-#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/SSAUpdaterBulk.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
#include <deque>
+#ifdef EXPENSIVE_CHECKS
+#include "llvm/IR/Verifier.h"
+#endif
+
using namespace llvm;
#define DEBUG_TYPE "dfa-jump-threading"
@@ -102,6 +103,11 @@ static cl::opt<unsigned> MaxPathLength(
cl::desc("Max number of blocks searched to find a threading path"),
cl::Hidden, cl::init(20));
+static cl::opt<unsigned> MaxNumPaths(
+ "dfa-max-num-paths",
+ cl::desc("Max number of paths enumerated around a switch"),
+ cl::Hidden, cl::init(200));
+
static cl::opt<unsigned>
CostThreshold("dfa-cost-threshold",
cl::desc("Maximum cost accepted for the transformation"),
@@ -414,7 +420,7 @@ inline raw_ostream &operator<<(raw_ostream &OS, const ThreadingPath &TPath) {
struct MainSwitch {
MainSwitch(SwitchInst *SI, OptimizationRemarkEmitter *ORE) {
- if (isPredictable(SI)) {
+ if (isCandidate(SI)) {
Instr = SI;
} else {
ORE->emit([&]() {
@@ -432,83 +438,60 @@ struct MainSwitch {
}
private:
- /// Do a use-def chain traversal. Make sure the value of the switch variable
- /// is always a known constant. This means that all conditional jumps based on
- /// switch variable can be converted to unconditional jumps.
- bool isPredictable(const SwitchInst *SI) {
- std::deque<Instruction *> Q;
+ /// Do a use-def chain traversal starting from the switch condition to see if
+ /// \p SI is a potential condidate.
+ ///
+ /// Also, collect select instructions to unfold.
+ bool isCandidate(const SwitchInst *SI) {
+ std::deque<Value *> Q;
SmallSet<Value *, 16> SeenValues;
SelectInsts.clear();
- Value *FirstDef = SI->getOperand(0);
- auto *Inst = dyn_cast<Instruction>(FirstDef);
-
- // If this is a function argument or another non-instruction, then give up.
- // We are interested in loop local variables.
- if (!Inst)
- return false;
-
- // Require the first definition to be a PHINode
- if (!isa<PHINode>(Inst))
+ Value *SICond = SI->getCondition();
+ LLVM_DEBUG(dbgs() << "\tSICond: " << *SICond << "\n");
+ if (!isa<PHINode>(SICond))
return false;
- LLVM_DEBUG(dbgs() << "\tisPredictable() FirstDef: " << *Inst << "\n");
-
- Q.push_back(Inst);
- SeenValues.insert(FirstDef);
+ addToQueue(SICond, Q, SeenValues);
while (!Q.empty()) {
- Instruction *Current = Q.front();
+ Value *Current = Q.front();
Q.pop_front();
if (auto *Phi = dyn_cast<PHINode>(Current)) {
for (Value *Incoming : Phi->incoming_values()) {
- if (!isPredictableValue(Incoming, SeenValues))
- return false;
- addInstToQueue(Incoming, Q, SeenValues);
+ addToQueue(Incoming, Q, SeenValues);
}
- LLVM_DEBUG(dbgs() << "\tisPredictable() phi: " << *Phi << "\n");
+ LLVM_DEBUG(dbgs() << "\tphi: " << *Phi << "\n");
} else if (SelectInst *SelI = dyn_cast<SelectInst>(Current)) {
if (!isValidSelectInst(SelI))
return false;
- if (!isPredictableValue(SelI->getTrueValue(), SeenValues) ||
- !isPredictableValue(SelI->getFalseValue(), SeenValues)) {
- return false;
- }
- addInstToQueue(SelI->getTrueValue(), Q, SeenValues);
- addInstToQueue(SelI->getFalseValue(), Q, SeenValues);
- LLVM_DEBUG(dbgs() << "\tisPredictable() select: " << *SelI << "\n");
+ addToQueue(SelI->getTrueValue(), Q, SeenValues);
+ addToQueue(SelI->getFalseValue(), Q, SeenValues);
+ LLVM_DEBUG(dbgs() << "\tselect: " << *SelI << "\n");
if (auto *SelIUse = dyn_cast<PHINode>(SelI->user_back()))
SelectInsts.push_back(SelectInstToUnfold(SelI, SelIUse));
+ } else if (isa<Constant>(Current)) {
+ LLVM_DEBUG(dbgs() << "\tconst: " << *Current << "\n");
+ continue;
} else {
- // If it is neither a phi nor a select, then we give up.
- return false;
+ LLVM_DEBUG(dbgs() << "\tother: " << *Current << "\n");
+ // Allow unpredictable values. The hope is that those will be the
+ // initial switch values that can be ignored (they will hit the
+ // unthreaded switch) but this assumption will get checked later after
+ // paths have been enumerated (in function getStateDefMap).
+ continue;
}
}
return true;
}
- bool isPredictableValue(Value *InpVal, SmallSet<Value *, 16> &SeenValues) {
- if (SeenValues.contains(InpVal))
- return true;
-
- if (isa<ConstantInt>(InpVal))
- return true;
-
- // If this is a function argument or another non-instruction, then give up.
- if (!isa<Instruction>(InpVal))
- return false;
-
- return true;
- }
-
- void addInstToQueue(Value *Val, std::deque<Instruction *> &Q,
- SmallSet<Value *, 16> &SeenValues) {
+ void addToQueue(Value *Val, std::deque<Value *> &Q,
+ SmallSet<Value *, 16> &SeenValues) {
if (SeenValues.contains(Val))
return;
- if (Instruction *I = dyn_cast<Instruction>(Val))
- Q.push_back(I);
+ Q.push_back(Val);
SeenValues.insert(Val);
}
@@ -562,7 +545,16 @@ struct AllSwitchPaths {
void run() {
VisitedBlocks Visited;
PathsType LoopPaths = paths(SwitchBlock, Visited, /* PathDepth = */ 1);
- StateDefMap StateDef = getStateDefMap();
+ StateDefMap StateDef = getStateDefMap(LoopPaths);
+
+ if (StateDef.empty()) {
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable",
+ Switch)
+ << "Switch instruction is not predictable.";
+ });
+ return;
+ }
for (PathType Path : LoopPaths) {
ThreadingPath TPath;
@@ -637,6 +629,9 @@ private:
PathType NewPath(Path);
NewPath.push_front(BB);
Res.push_back(NewPath);
+ if (Res.size() >= MaxNumPaths) {
+ return Res;
+ }
}
}
// This block could now be visited again from a different predecessor. Note
@@ -647,14 +642,22 @@ private:
}
/// Walk the use-def chain and collect all the state-defining instructions.
- StateDefMap getStateDefMap() const {
+ ///
+ /// Return an empty map if unpredictable values encountered inside the basic
+ /// blocks of \p LoopPaths.
+ StateDefMap getStateDefMap(const PathsType &LoopPaths) const {
StateDefMap Res;
+ // Basic blocks belonging to any of the loops around the switch statement.
+ SmallPtrSet<BasicBlock *, 16> LoopBBs;
+ for (const PathType &Path : LoopPaths) {
+ for (BasicBlock *BB : Path)
+ LoopBBs.insert(BB);
+ }
+
Value *FirstDef = Switch->getOperand(0);
- assert(isa<PHINode>(FirstDef) && "After select unfolding, all state "
- "definitions are expected to be phi "
- "nodes.");
+ assert(isa<PHINode>(FirstDef) && "The first definition must be a phi.");
SmallVector<PHINode *, 8> Stack;
Stack.push_back(dyn_cast<PHINode>(FirstDef));
@@ -666,15 +669,17 @@ private:
Res[CurPhi->getParent()] = CurPhi;
SeenValues.insert(CurPhi);
- for (Value *Incoming : CurPhi->incoming_values()) {
+ for (BasicBlock *IncomingBB : CurPhi->blocks()) {
+ Value *Incoming = CurPhi->getIncomingValueForBlock(IncomingBB);
+ bool IsOutsideLoops = LoopBBs.count(IncomingBB) == 0;
if (Incoming == FirstDef || isa<ConstantInt>(Incoming) ||
- SeenValues.contains(Incoming)) {
+ SeenValues.contains(Incoming) || IsOutsideLoops) {
continue;
}
- assert(isa<PHINode>(Incoming) && "After select unfolding, all state "
- "definitions are expected to be phi "
- "nodes.");
+ // Any unpredictable value inside the loops means we must bail out.
+ if (!isa<PHINode>(Incoming))
+ return StateDefMap();
Stack.push_back(cast<PHINode>(Incoming));
}
@@ -823,6 +828,16 @@ private:
});
return false;
}
+
+ if (!Metrics.NumInsts.isValid()) {
+ LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "
+ << "instructions with invalid cost.\n");
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "ConvergentInst", Switch)
+ << "Contains instructions with invalid cost.";
+ });
+ return false;
+ }
}
unsigned DuplicationCost = 0;
@@ -836,7 +851,7 @@ private:
// using binary search, hence the LogBase2().
unsigned CondBranches =
APInt(32, Switch->getNumSuccessors()).ceilLogBase2();
- DuplicationCost = Metrics.NumInsts / CondBranches;
+ DuplicationCost = *Metrics.NumInsts.getValue() / CondBranches;
} else {
// Compared with jump tables, the DFA optimizer removes an indirect branch
// on each loop iteration, thus making branch prediction more precise. The
@@ -844,7 +859,7 @@ private:
// predictor to make a mistake, and the more benefit there is in the DFA
// optimizer. Thus, the more branch targets there are, the lower is the
// cost of the DFA opt.
- DuplicationCost = Metrics.NumInsts / JumpTableSize;
+ DuplicationCost = *Metrics.NumInsts.getValue() / JumpTableSize;
}
LLVM_DEBUG(dbgs() << "\nDFA Jump Threading: Cost to jump thread block "
@@ -1197,7 +1212,7 @@ private:
PhiToRemove.push_back(Phi);
}
for (PHINode *PN : PhiToRemove) {
- PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
+ PN->replaceAllUsesWith(PoisonValue::get(PN->getType()));
PN->eraseFromParent();
}
return;
@@ -1246,7 +1261,7 @@ private:
/// Returns true if IncomingBB is a predecessor of BB.
bool isPredecessor(BasicBlock *BB, BasicBlock *IncomingBB) {
- return llvm::find(predecessors(BB), IncomingBB) != pred_end(BB);
+ return llvm::is_contained(predecessors(BB), IncomingBB);
}
AllSwitchPaths *SwitchPaths;
@@ -1278,7 +1293,7 @@ bool DFAJumpThreading::run(Function &F) {
continue;
LLVM_DEBUG(dbgs() << "\nCheck if SwitchInst in BB " << BB.getName()
- << " is predictable\n");
+ << " is a candidate\n");
MainSwitch Switch(SI, ORE);
if (!Switch.getInstr())
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index ae636e7b61f7..4c42869dbd58 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -38,7 +38,9 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
@@ -62,8 +64,6 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
@@ -75,7 +75,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/DebugCounter.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
@@ -83,7 +82,6 @@
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <cassert>
-#include <cstddef>
#include <cstdint>
#include <iterator>
#include <map>
@@ -766,20 +764,27 @@ struct DSEState {
// Post-order numbers for each basic block. Used to figure out if memory
// accesses are executed before another access.
DenseMap<BasicBlock *, unsigned> PostOrderNumbers;
+ // Values that are only used with assumes. Used to refine pointer escape
+ // analysis.
+ SmallPtrSet<const Value *, 32> EphValues;
/// Keep track of instructions (partly) overlapping with killing MemoryDefs per
/// basic block.
MapVector<BasicBlock *, InstOverlapIntervalsTy> IOLs;
+ // Check if there are root nodes that are terminated by UnreachableInst.
+ // Those roots pessimize post-dominance queries. If there are such roots,
+ // fall back to CFG scan starting from all non-unreachable roots.
+ bool AnyUnreachableExit;
// Class contains self-reference, make sure it's not copied/moved.
DSEState(const DSEState &) = delete;
DSEState &operator=(const DSEState &) = delete;
DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
- PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
- const LoopInfo &LI)
- : F(F), AA(AA), EI(DT, LI), BatchAA(AA, &EI), MSSA(MSSA), DT(DT),
- PDT(PDT), TLI(TLI), DL(F.getParent()->getDataLayout()), LI(LI) {
+ PostDominatorTree &PDT, AssumptionCache &AC,
+ const TargetLibraryInfo &TLI, const LoopInfo &LI)
+ : F(F), AA(AA), EI(DT, LI, EphValues), BatchAA(AA, &EI), MSSA(MSSA),
+ DT(DT), PDT(PDT), TLI(TLI), DL(F.getParent()->getDataLayout()), LI(LI) {
// Collect blocks with throwing instructions not modeled in MemorySSA and
// alloc-like objects.
unsigned PO = 0;
@@ -805,6 +810,12 @@ struct DSEState {
// Collect whether there is any irreducible control flow in the function.
ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI);
+
+ AnyUnreachableExit = any_of(PDT.roots(), [](const BasicBlock *E) {
+ return isa<UnreachableInst>(E->getTerminator());
+ });
+
+ CodeMetrics::collectEphemeralValues(&F, &AC, EphValues);
}
/// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p
@@ -951,7 +962,7 @@ struct DSEState {
if (!isInvisibleToCallerOnUnwind(V)) {
I.first->second = false;
} else if (isNoAliasCall(V)) {
- I.first->second = !PointerMayBeCaptured(V, true, false);
+ I.first->second = !PointerMayBeCaptured(V, true, false, EphValues);
}
}
return I.first->second;
@@ -970,7 +981,7 @@ struct DSEState {
// with the killing MemoryDef. But we refrain from doing so for now to
// limit compile-time and this does not cause any changes to the number
// of stores removed on a large test set in practice.
- I.first->second = PointerMayBeCaptured(V, false, true);
+ I.first->second = PointerMayBeCaptured(V, false, true, EphValues);
return !I.first->second;
}
@@ -1003,7 +1014,8 @@ struct DSEState {
if (CB->isLifetimeStartOrEnd())
return false;
- return CB->use_empty() && CB->willReturn() && CB->doesNotThrow();
+ return CB->use_empty() && CB->willReturn() && CB->doesNotThrow() &&
+ !CB->isTerminator();
}
return false;
@@ -1233,6 +1245,9 @@ struct DSEState {
// Reached TOP.
if (MSSA.isLiveOnEntryDef(Current)) {
LLVM_DEBUG(dbgs() << " ... found LiveOnEntryDef\n");
+ if (CanOptimize && Current != KillingDef->getDefiningAccess())
+ // The first clobbering def is... none.
+ KillingDef->setOptimized(Current);
return None;
}
@@ -1309,7 +1324,6 @@ struct DSEState {
// memory location and not located in different loops.
if (!isGuaranteedLoopIndependent(CurrentI, KillingI, *CurrentLoc)) {
LLVM_DEBUG(dbgs() << " ... not guaranteed loop independent\n");
- WalkerStepLimit -= 1;
CanOptimize = false;
continue;
}
@@ -1508,54 +1522,56 @@ struct DSEState {
CommonPred = PDT.findNearestCommonDominator(CommonPred, BB);
}
- // If CommonPred is in the set of killing blocks, just check if it
- // post-dominates MaybeDeadAccess.
- if (KillingBlocks.count(CommonPred)) {
- if (PDT.dominates(CommonPred, MaybeDeadAccess->getBlock()))
- return {MaybeDeadAccess};
- return None;
- }
-
// If the common post-dominator does not post-dominate MaybeDeadAccess,
// there is a path from MaybeDeadAccess to an exit not going through a
// killing block.
- if (PDT.dominates(CommonPred, MaybeDeadAccess->getBlock())) {
- SetVector<BasicBlock *> WorkList;
-
- // If CommonPred is null, there are multiple exits from the function.
- // They all have to be added to the worklist.
- if (CommonPred)
- WorkList.insert(CommonPred);
- else
- for (BasicBlock *R : PDT.roots())
+ if (!PDT.dominates(CommonPred, MaybeDeadAccess->getBlock())) {
+ if (!AnyUnreachableExit)
+ return None;
+
+ // Fall back to CFG scan starting at all non-unreachable roots if not
+ // all paths to the exit go through CommonPred.
+ CommonPred = nullptr;
+ }
+
+ // If CommonPred itself is in the set of killing blocks, we're done.
+ if (KillingBlocks.count(CommonPred))
+ return {MaybeDeadAccess};
+
+ SetVector<BasicBlock *> WorkList;
+ // If CommonPred is null, there are multiple exits from the function.
+ // They all have to be added to the worklist.
+ if (CommonPred)
+ WorkList.insert(CommonPred);
+ else
+ for (BasicBlock *R : PDT.roots()) {
+ if (!isa<UnreachableInst>(R->getTerminator()))
WorkList.insert(R);
+ }
- NumCFGTries++;
- // Check if all paths starting from an exit node go through one of the
- // killing blocks before reaching MaybeDeadAccess.
- for (unsigned I = 0; I < WorkList.size(); I++) {
- NumCFGChecks++;
- BasicBlock *Current = WorkList[I];
- if (KillingBlocks.count(Current))
- continue;
- if (Current == MaybeDeadAccess->getBlock())
- return None;
+ NumCFGTries++;
+ // Check if all paths starting from an exit node go through one of the
+ // killing blocks before reaching MaybeDeadAccess.
+ for (unsigned I = 0; I < WorkList.size(); I++) {
+ NumCFGChecks++;
+ BasicBlock *Current = WorkList[I];
+ if (KillingBlocks.count(Current))
+ continue;
+ if (Current == MaybeDeadAccess->getBlock())
+ return None;
- // MaybeDeadAccess is reachable from the entry, so we don't have to
- // explore unreachable blocks further.
- if (!DT.isReachableFromEntry(Current))
- continue;
+ // MaybeDeadAccess is reachable from the entry, so we don't have to
+ // explore unreachable blocks further.
+ if (!DT.isReachableFromEntry(Current))
+ continue;
- for (BasicBlock *Pred : predecessors(Current))
- WorkList.insert(Pred);
+ for (BasicBlock *Pred : predecessors(Current))
+ WorkList.insert(Pred);
- if (WorkList.size() >= MemorySSAPathCheckLimit)
- return None;
- }
- NumCFGSuccess++;
- return {MaybeDeadAccess};
+ if (WorkList.size() >= MemorySSAPathCheckLimit)
+ return None;
}
- return None;
+ NumCFGSuccess++;
}
// No aliasing MemoryUses of MaybeDeadAccess found, MaybeDeadAccess is
@@ -1780,10 +1796,9 @@ struct DSEState {
if (!isRemovable(DefI))
return false;
- if (StoredConstant && isAllocationFn(DefUO, &TLI)) {
- auto *CB = cast<CallBase>(DefUO);
- auto *InitC = getInitialValueOfAllocation(CB, &TLI,
- StoredConstant->getType());
+ if (StoredConstant) {
+ Constant *InitC =
+ getInitialValueOfAllocation(DefUO, &TLI, StoredConstant->getType());
// If the clobbering access is LiveOnEntry, no instructions between them
// can modify the memory location.
if (InitC && InitC == StoredConstant)
@@ -1921,11 +1936,13 @@ struct DSEState {
static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
DominatorTree &DT, PostDominatorTree &PDT,
+ AssumptionCache &AC,
const TargetLibraryInfo &TLI,
const LoopInfo &LI) {
bool MadeChange = false;
- DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
+ MSSA.ensureOptimizedUses();
+ DSEState State(F, AA, MSSA, DT, PDT, AC, TLI, LI);
// For each store:
for (unsigned I = 0; I < State.MemDefs.size(); I++) {
MemoryDef *KillingDef = State.MemDefs[I];
@@ -2105,9 +2122,10 @@ PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {
DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
PostDominatorTree &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
+ AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F);
LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
- bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
+ bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, AC, TLI, LI);
#ifdef LLVM_ENABLE_STATS
if (AreStatisticsEnabled())
@@ -2147,9 +2165,11 @@ public:
MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
PostDominatorTree &PDT =
getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
+ AssumptionCache &AC =
+ getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
+ bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, AC, TLI, LI);
#ifdef LLVM_ENABLE_STATS
if (AreStatisticsEnabled())
@@ -2173,6 +2193,7 @@ public:
AU.addPreserved<MemorySSAWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequired<AssumptionCacheTracker>();
}
};
@@ -2190,6 +2211,7 @@ INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false,
false)
diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 59b934c16c8a..cf2824954122 100644
--- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -16,7 +16,6 @@
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopedHashTable.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
@@ -30,19 +29,16 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
-#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -55,7 +51,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
-#include "llvm/Transforms/Utils/GuardUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
#include <deque>
@@ -781,6 +776,21 @@ private:
return getLoadStorePointerOperand(Inst);
}
+ Type *getValueType() const {
+ // TODO: handle target-specific intrinsics.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::masked_load:
+ return II->getType();
+ case Intrinsic::masked_store:
+ return II->getArgOperand(0)->getType();
+ default:
+ return nullptr;
+ }
+ }
+ return getLoadStoreType(Inst);
+ }
+
bool mayReadFromMemory() const {
if (IntrID != 0)
return Info.ReadMem;
@@ -1162,6 +1172,9 @@ bool EarlyCSE::overridingStores(const ParseMemoryInst &Earlier,
"Violated invariant");
if (Earlier.getPointerOperand() != Later.getPointerOperand())
return false;
+ if (!Earlier.getValueType() || !Later.getValueType() ||
+ Earlier.getValueType() != Later.getValueType())
+ return false;
if (Earlier.getMatchingId() != Later.getMatchingId())
return false;
// At the moment, we don't remove ordered stores, but do remove
@@ -1334,7 +1347,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// If the instruction can be simplified (e.g. X+0 = X) then replace it with
// its simpler value.
- if (Value *V = SimplifyInstruction(&Inst, SQ)) {
+ if (Value *V = simplifyInstruction(&Inst, SQ)) {
LLVM_DEBUG(dbgs() << "EarlyCSE Simplify: " << Inst << " to: " << *V
<< '\n');
if (!DebugCounter::shouldExecute(CSECounter)) {
diff --git a/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp b/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
index 44017b555769..ad2041cd4253 100644
--- a/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
+++ b/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
@@ -11,8 +11,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/IR/CFG.h"
-#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/InitializePasses.h"
diff --git a/llvm/lib/Transforms/Scalar/Float2Int.cpp b/llvm/lib/Transforms/Scalar/Float2Int.cpp
index a98bb8358aef..56f2a3b3004d 100644
--- a/llvm/lib/Transforms/Scalar/Float2Int.cpp
+++ b/llvm/lib/Transforms/Scalar/Float2Int.cpp
@@ -11,24 +11,22 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/InitializePasses.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Scalar/Float2Int.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include <deque>
-#include <functional> // For std::function
#define DEBUG_TYPE "float2int"
@@ -236,116 +234,111 @@ void Float2IntPass::walkBackwards() {
}
}
-// Walk forwards down the list of seen instructions, so we visit defs before
-// uses.
-void Float2IntPass::walkForwards() {
- for (auto &It : reverse(SeenInsts)) {
- if (It.second != unknownRange())
- continue;
+// Calculate result range from operand ranges.
+// Return None if the range cannot be calculated yet.
+Optional<ConstantRange> Float2IntPass::calcRange(Instruction *I) {
+ SmallVector<ConstantRange, 4> OpRanges;
+ for (Value *O : I->operands()) {
+ if (Instruction *OI = dyn_cast<Instruction>(O)) {
+ auto OpIt = SeenInsts.find(OI);
+ assert(OpIt != SeenInsts.end() && "def not seen before use!");
+ if (OpIt->second == unknownRange())
+ return None; // Wait until operand range has been calculated.
+ OpRanges.push_back(OpIt->second);
+ } else if (ConstantFP *CF = dyn_cast<ConstantFP>(O)) {
+ // Work out if the floating point number can be losslessly represented
+ // as an integer.
+ // APFloat::convertToInteger(&Exact) purports to do what we want, but
+ // the exactness can be too precise. For example, negative zero can
+ // never be exactly converted to an integer.
+ //
+ // Instead, we ask APFloat to round itself to an integral value - this
+ // preserves sign-of-zero - then compare the result with the original.
+ //
+ const APFloat &F = CF->getValueAPF();
+
+ // First, weed out obviously incorrect values. Non-finite numbers
+ // can't be represented and neither can negative zero, unless
+ // we're in fast math mode.
+ if (!F.isFinite() ||
+ (F.isZero() && F.isNegative() && isa<FPMathOperator>(I) &&
+ !I->hasNoSignedZeros()))
+ return badRange();
+
+ APFloat NewF = F;
+ auto Res = NewF.roundToIntegral(APFloat::rmNearestTiesToEven);
+ if (Res != APFloat::opOK || NewF != F)
+ return badRange();
+
+ // OK, it's representable. Now get it.
+ APSInt Int(MaxIntegerBW+1, false);
+ bool Exact;
+ CF->getValueAPF().convertToInteger(Int,
+ APFloat::rmNearestTiesToEven,
+ &Exact);
+ OpRanges.push_back(ConstantRange(Int));
+ } else {
+ llvm_unreachable("Should have already marked this as badRange!");
+ }
+ }
- Instruction *I = It.first;
- std::function<ConstantRange(ArrayRef<ConstantRange>)> Op;
- switch (I->getOpcode()) {
- // FIXME: Handle select and phi nodes.
- default:
- case Instruction::UIToFP:
- case Instruction::SIToFP:
- llvm_unreachable("Should have been handled in walkForwards!");
+ switch (I->getOpcode()) {
+ // FIXME: Handle select and phi nodes.
+ default:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ llvm_unreachable("Should have been handled in walkForwards!");
- case Instruction::FNeg:
- Op = [](ArrayRef<ConstantRange> Ops) {
- assert(Ops.size() == 1 && "FNeg is a unary operator!");
- unsigned Size = Ops[0].getBitWidth();
- auto Zero = ConstantRange(APInt::getZero(Size));
- return Zero.sub(Ops[0]);
- };
- break;
+ case Instruction::FNeg: {
+ assert(OpRanges.size() == 1 && "FNeg is a unary operator!");
+ unsigned Size = OpRanges[0].getBitWidth();
+ auto Zero = ConstantRange(APInt::getZero(Size));
+ return Zero.sub(OpRanges[0]);
+ }
- case Instruction::FAdd:
- case Instruction::FSub:
- case Instruction::FMul:
- Op = [I](ArrayRef<ConstantRange> Ops) {
- assert(Ops.size() == 2 && "its a binary operator!");
- auto BinOp = (Instruction::BinaryOps) I->getOpcode();
- return Ops[0].binaryOp(BinOp, Ops[1]);
- };
- break;
+ case Instruction::FAdd:
+ case Instruction::FSub:
+ case Instruction::FMul: {
+ assert(OpRanges.size() == 2 && "its a binary operator!");
+ auto BinOp = (Instruction::BinaryOps) I->getOpcode();
+ return OpRanges[0].binaryOp(BinOp, OpRanges[1]);
+ }
- //
- // Root-only instructions - we'll only see these if they're the
- // first node in a walk.
- //
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- Op = [I](ArrayRef<ConstantRange> Ops) {
- assert(Ops.size() == 1 && "FPTo[US]I is a unary operator!");
- // Note: We're ignoring the casts output size here as that's what the
- // caller expects.
- auto CastOp = (Instruction::CastOps)I->getOpcode();
- return Ops[0].castOp(CastOp, MaxIntegerBW+1);
- };
- break;
+ //
+ // Root-only instructions - we'll only see these if they're the
+ // first node in a walk.
+ //
+ case Instruction::FPToUI:
+ case Instruction::FPToSI: {
+ assert(OpRanges.size() == 1 && "FPTo[US]I is a unary operator!");
+ // Note: We're ignoring the casts output size here as that's what the
+ // caller expects.
+ auto CastOp = (Instruction::CastOps)I->getOpcode();
+ return OpRanges[0].castOp(CastOp, MaxIntegerBW+1);
+ }
- case Instruction::FCmp:
- Op = [](ArrayRef<ConstantRange> Ops) {
- assert(Ops.size() == 2 && "FCmp is a binary operator!");
- return Ops[0].unionWith(Ops[1]);
- };
- break;
- }
+ case Instruction::FCmp:
+ assert(OpRanges.size() == 2 && "FCmp is a binary operator!");
+ return OpRanges[0].unionWith(OpRanges[1]);
+ }
+}
- bool Abort = false;
- SmallVector<ConstantRange,4> OpRanges;
- for (Value *O : I->operands()) {
- if (Instruction *OI = dyn_cast<Instruction>(O)) {
- assert(SeenInsts.find(OI) != SeenInsts.end() &&
- "def not seen before use!");
- OpRanges.push_back(SeenInsts.find(OI)->second);
- } else if (ConstantFP *CF = dyn_cast<ConstantFP>(O)) {
- // Work out if the floating point number can be losslessly represented
- // as an integer.
- // APFloat::convertToInteger(&Exact) purports to do what we want, but
- // the exactness can be too precise. For example, negative zero can
- // never be exactly converted to an integer.
- //
- // Instead, we ask APFloat to round itself to an integral value - this
- // preserves sign-of-zero - then compare the result with the original.
- //
- const APFloat &F = CF->getValueAPF();
-
- // First, weed out obviously incorrect values. Non-finite numbers
- // can't be represented and neither can negative zero, unless
- // we're in fast math mode.
- if (!F.isFinite() ||
- (F.isZero() && F.isNegative() && isa<FPMathOperator>(I) &&
- !I->hasNoSignedZeros())) {
- seen(I, badRange());
- Abort = true;
- break;
- }
+// Walk forwards down the list of seen instructions, so we visit defs before
+// uses.
+void Float2IntPass::walkForwards() {
+ std::deque<Instruction *> Worklist;
+ for (const auto &Pair : SeenInsts)
+ if (Pair.second == unknownRange())
+ Worklist.push_back(Pair.first);
- APFloat NewF = F;
- auto Res = NewF.roundToIntegral(APFloat::rmNearestTiesToEven);
- if (Res != APFloat::opOK || NewF != F) {
- seen(I, badRange());
- Abort = true;
- break;
- }
- // OK, it's representable. Now get it.
- APSInt Int(MaxIntegerBW+1, false);
- bool Exact;
- CF->getValueAPF().convertToInteger(Int,
- APFloat::rmNearestTiesToEven,
- &Exact);
- OpRanges.push_back(ConstantRange(Int));
- } else {
- llvm_unreachable("Should have already marked this as badRange!");
- }
- }
+ while (!Worklist.empty()) {
+ Instruction *I = Worklist.back();
+ Worklist.pop_back();
- // Reduce the operands' ranges to a single range and return.
- if (!Abort)
- seen(I, Op(OpRanges));
+ if (Optional<ConstantRange> Range = calcRange(I))
+ seen(I, *Range);
+ else
+ Worklist.push_front(I); // Reprocess later.
}
}
diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp
index 398c93e8758c..783301fe589e 100644
--- a/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -19,7 +19,6 @@
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
@@ -32,6 +31,7 @@
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/InstructionPrecedenceTracking.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
@@ -42,12 +42,10 @@
#include "llvm/Analysis/PHITransAddr.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
@@ -55,11 +53,9 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/Operator.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
@@ -72,7 +68,6 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -112,16 +107,16 @@ static cl::opt<bool> GVNEnableLoadInLoopPRE("enable-load-in-loop-pre",
cl::init(true));
static cl::opt<bool>
GVNEnableSplitBackedgeInLoadPRE("enable-split-backedge-in-load-pre",
- cl::init(true));
+ cl::init(false));
static cl::opt<bool> GVNEnableMemDep("enable-gvn-memdep", cl::init(true));
static cl::opt<uint32_t> MaxNumDeps(
- "gvn-max-num-deps", cl::Hidden, cl::init(100), cl::ZeroOrMore,
+ "gvn-max-num-deps", cl::Hidden, cl::init(100),
cl::desc("Max number of dependences to attempt Load PRE (default = 100)"));
// This is based on IsValueFullyAvailableInBlockNumSpeculationsMax stat.
static cl::opt<uint32_t> MaxBBSpeculations(
- "gvn-max-block-speculations", cl::Hidden, cl::init(600), cl::ZeroOrMore,
+ "gvn-max-block-speculations", cl::Hidden, cl::init(600),
cl::desc("Max number of blocks we're willing to speculate on (and recurse "
"into) when deducing if a value is fully available or not in GVN "
"(default = 600)"));
@@ -129,6 +124,8 @@ static cl::opt<uint32_t> MaxBBSpeculations(
struct llvm::GVNPass::Expression {
uint32_t opcode;
bool commutative = false;
+ // The type is not necessarily the result type of the expression, it may be
+ // any additional type needed to disambiguate the expression.
Type *type = nullptr;
SmallVector<uint32_t, 4> varargs;
@@ -178,70 +175,88 @@ template <> struct DenseMapInfo<GVNPass::Expression> {
/// implicitly associated with a rematerialization point which is the
/// location of the instruction from which it was formed.
struct llvm::gvn::AvailableValue {
- enum ValType {
+ enum class ValType {
SimpleVal, // A simple offsetted value that is accessed.
LoadVal, // A value produced by a load.
MemIntrin, // A memory intrinsic which is loaded from.
- UndefVal // A UndefValue representing a value from dead block (which
+ UndefVal, // A UndefValue representing a value from dead block (which
// is not yet physically removed from the CFG).
+ SelectVal, // A pointer select which is loaded from and for which the load
+ // can be replace by a value select.
};
- /// V - The value that is live out of the block.
- PointerIntPair<Value *, 2, ValType> Val;
+ /// Val - The value that is live out of the block.
+ Value *Val;
+ /// Kind of the live-out value.
+ ValType Kind;
/// Offset - The byte offset in Val that is interesting for the load query.
unsigned Offset = 0;
static AvailableValue get(Value *V, unsigned Offset = 0) {
AvailableValue Res;
- Res.Val.setPointer(V);
- Res.Val.setInt(SimpleVal);
+ Res.Val = V;
+ Res.Kind = ValType::SimpleVal;
Res.Offset = Offset;
return Res;
}
static AvailableValue getMI(MemIntrinsic *MI, unsigned Offset = 0) {
AvailableValue Res;
- Res.Val.setPointer(MI);
- Res.Val.setInt(MemIntrin);
+ Res.Val = MI;
+ Res.Kind = ValType::MemIntrin;
Res.Offset = Offset;
return Res;
}
static AvailableValue getLoad(LoadInst *Load, unsigned Offset = 0) {
AvailableValue Res;
- Res.Val.setPointer(Load);
- Res.Val.setInt(LoadVal);
+ Res.Val = Load;
+ Res.Kind = ValType::LoadVal;
Res.Offset = Offset;
return Res;
}
static AvailableValue getUndef() {
AvailableValue Res;
- Res.Val.setPointer(nullptr);
- Res.Val.setInt(UndefVal);
+ Res.Val = nullptr;
+ Res.Kind = ValType::UndefVal;
Res.Offset = 0;
return Res;
}
- bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
- bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
- bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
- bool isUndefValue() const { return Val.getInt() == UndefVal; }
+ static AvailableValue getSelect(SelectInst *Sel) {
+ AvailableValue Res;
+ Res.Val = Sel;
+ Res.Kind = ValType::SelectVal;
+ Res.Offset = 0;
+ return Res;
+ }
+
+ bool isSimpleValue() const { return Kind == ValType::SimpleVal; }
+ bool isCoercedLoadValue() const { return Kind == ValType::LoadVal; }
+ bool isMemIntrinValue() const { return Kind == ValType::MemIntrin; }
+ bool isUndefValue() const { return Kind == ValType::UndefVal; }
+ bool isSelectValue() const { return Kind == ValType::SelectVal; }
Value *getSimpleValue() const {
assert(isSimpleValue() && "Wrong accessor");
- return Val.getPointer();
+ return Val;
}
LoadInst *getCoercedLoadValue() const {
assert(isCoercedLoadValue() && "Wrong accessor");
- return cast<LoadInst>(Val.getPointer());
+ return cast<LoadInst>(Val);
}
MemIntrinsic *getMemIntrinValue() const {
assert(isMemIntrinValue() && "Wrong accessor");
- return cast<MemIntrinsic>(Val.getPointer());
+ return cast<MemIntrinsic>(Val);
+ }
+
+ SelectInst *getSelectValue() const {
+ assert(isSelectValue() && "Wrong accessor");
+ return cast<SelectInst>(Val);
}
/// Emit code at the specified insertion point to adjust the value defined
@@ -275,6 +290,10 @@ struct llvm::gvn::AvailableValueInBlock {
return get(BB, AvailableValue::getUndef());
}
+ static AvailableValueInBlock getSelect(BasicBlock *BB, SelectInst *Sel) {
+ return get(BB, AvailableValue::getSelect(Sel));
+ }
+
/// Emit code at the end of this block to adjust the value defined here to
/// the specified type. This handles various coercion cases.
Value *MaterializeAdjustedValue(LoadInst *Load, GVNPass &gvn) const {
@@ -379,6 +398,39 @@ GVNPass::ValueTable::createExtractvalueExpr(ExtractValueInst *EI) {
return e;
}
+GVNPass::Expression GVNPass::ValueTable::createGEPExpr(GetElementPtrInst *GEP) {
+ Expression E;
+ Type *PtrTy = GEP->getType()->getScalarType();
+ const DataLayout &DL = GEP->getModule()->getDataLayout();
+ unsigned BitWidth = DL.getIndexTypeSizeInBits(PtrTy);
+ MapVector<Value *, APInt> VariableOffsets;
+ APInt ConstantOffset(BitWidth, 0);
+ if (PtrTy->isOpaquePointerTy() &&
+ GEP->collectOffset(DL, BitWidth, VariableOffsets, ConstantOffset)) {
+ // For opaque pointers, convert into offset representation, to recognize
+ // equivalent address calculations that use different type encoding.
+ LLVMContext &Context = GEP->getContext();
+ E.opcode = GEP->getOpcode();
+ E.type = nullptr;
+ E.varargs.push_back(lookupOrAdd(GEP->getPointerOperand()));
+ for (const auto &Pair : VariableOffsets) {
+ E.varargs.push_back(lookupOrAdd(Pair.first));
+ E.varargs.push_back(lookupOrAdd(ConstantInt::get(Context, Pair.second)));
+ }
+ if (!ConstantOffset.isZero())
+ E.varargs.push_back(
+ lookupOrAdd(ConstantInt::get(Context, ConstantOffset)));
+ } else {
+ // If converting to offset representation fails (for typed pointers and
+ // scalable vectors), fall back to type-based implementation:
+ E.opcode = GEP->getOpcode();
+ E.type = GEP->getSourceElementType();
+ for (Use &Op : GEP->operands())
+ E.varargs.push_back(lookupOrAdd(Op));
+ }
+ return E;
+}
+
//===----------------------------------------------------------------------===//
// ValueTable External Functions
//===----------------------------------------------------------------------===//
@@ -562,9 +614,11 @@ uint32_t GVNPass::ValueTable::lookupOrAdd(Value *V) {
case Instruction::InsertElement:
case Instruction::ShuffleVector:
case Instruction::InsertValue:
- case Instruction::GetElementPtr:
exp = createExpr(I);
break;
+ case Instruction::GetElementPtr:
+ exp = createGEPExpr(cast<GetElementPtrInst>(I));
+ break;
case Instruction::ExtractValue:
exp = createExtractvalueExpr(cast<ExtractValueInst>(I));
break;
@@ -639,24 +693,24 @@ void GVNPass::ValueTable::verifyRemoved(const Value *V) const {
//===----------------------------------------------------------------------===//
bool GVNPass::isPREEnabled() const {
- return Options.AllowPRE.getValueOr(GVNEnablePRE);
+ return Options.AllowPRE.value_or(GVNEnablePRE);
}
bool GVNPass::isLoadPREEnabled() const {
- return Options.AllowLoadPRE.getValueOr(GVNEnableLoadPRE);
+ return Options.AllowLoadPRE.value_or(GVNEnableLoadPRE);
}
bool GVNPass::isLoadInLoopPREEnabled() const {
- return Options.AllowLoadInLoopPRE.getValueOr(GVNEnableLoadInLoopPRE);
+ return Options.AllowLoadInLoopPRE.value_or(GVNEnableLoadInLoopPRE);
}
bool GVNPass::isLoadPRESplitBackedgeEnabled() const {
- return Options.AllowLoadPRESplitBackedge.getValueOr(
+ return Options.AllowLoadPRESplitBackedge.value_or(
GVNEnableSplitBackedgeInLoadPRE);
}
bool GVNPass::isMemDepEnabled() const {
- return Options.AllowMemDep.getValueOr(GVNEnableMemDep);
+ return Options.AllowMemDep.value_or(GVNEnableMemDep);
}
PreservedAnalyses GVNPass::run(Function &F, FunctionAnalysisManager &AM) {
@@ -897,6 +951,17 @@ ConstructSSAForLoadSet(LoadInst *Load,
return SSAUpdate.GetValueInMiddleOfBlock(Load->getParent());
}
+static LoadInst *findDominatingLoad(Value *Ptr, Type *LoadTy, SelectInst *Sel,
+ DominatorTree &DT) {
+ for (Value *U : Ptr->users()) {
+ auto *LI = dyn_cast<LoadInst>(U);
+ if (LI && LI->getType() == LoadTy && LI->getParent() == Sel->getParent() &&
+ DT.dominates(LI, Sel))
+ return LI;
+ }
+ return nullptr;
+}
+
Value *AvailableValue::MaterializeAdjustedValue(LoadInst *Load,
Instruction *InsertPt,
GVNPass &gvn) const {
@@ -937,6 +1002,17 @@ Value *AvailableValue::MaterializeAdjustedValue(LoadInst *Load,
<< " " << *getMemIntrinValue() << '\n'
<< *Res << '\n'
<< "\n\n\n");
+ } else if (isSelectValue()) {
+ // Introduce a new value select for a load from an eligible pointer select.
+ SelectInst *Sel = getSelectValue();
+ LoadInst *L1 = findDominatingLoad(Sel->getOperand(1), LoadTy, Sel,
+ gvn.getDominatorTree());
+ LoadInst *L2 = findDominatingLoad(Sel->getOperand(2), LoadTy, Sel,
+ gvn.getDominatorTree());
+ assert(L1 && L2 &&
+ "must be able to obtain dominating loads for both value operands of "
+ "the select");
+ Res = SelectInst::Create(Sel->getCondition(), L1, L2, "", Sel);
} else {
llvm_unreachable("Should not materialize value from dead block");
}
@@ -1023,8 +1099,54 @@ static void reportMayClobberedLoad(LoadInst *Load, MemDepResult DepInfo,
ORE->emit(R);
}
+/// Check if a load from pointer-select \p Address in \p DepBB can be converted
+/// to a value select. The following conditions need to be satisfied:
+/// 1. The pointer select (\p Address) must be defined in \p DepBB.
+/// 2. Both value operands of the pointer select must be loaded in the same
+/// basic block, before the pointer select.
+/// 3. There must be no instructions between the found loads and \p End that may
+/// clobber the loads.
+static Optional<AvailableValue>
+tryToConvertLoadOfPtrSelect(BasicBlock *DepBB, BasicBlock::iterator End,
+ Value *Address, Type *LoadTy, DominatorTree &DT,
+ AAResults *AA) {
+
+ auto *Sel = dyn_cast_or_null<SelectInst>(Address);
+ if (!Sel || DepBB != Sel->getParent())
+ return None;
+
+ LoadInst *L1 = findDominatingLoad(Sel->getOperand(1), LoadTy, Sel, DT);
+ LoadInst *L2 = findDominatingLoad(Sel->getOperand(2), LoadTy, Sel, DT);
+ if (!L1 || !L2)
+ return None;
+
+ // Ensure there are no accesses that may modify the locations referenced by
+ // either L1 or L2 between L1, L2 and the specified End iterator.
+ Instruction *EarlierLoad = L1->comesBefore(L2) ? L1 : L2;
+ MemoryLocation L1Loc = MemoryLocation::get(L1);
+ MemoryLocation L2Loc = MemoryLocation::get(L2);
+ if (any_of(make_range(EarlierLoad->getIterator(), End), [&](Instruction &I) {
+ return isModSet(AA->getModRefInfo(&I, L1Loc)) ||
+ isModSet(AA->getModRefInfo(&I, L2Loc));
+ }))
+ return None;
+
+ return AvailableValue::getSelect(Sel);
+}
+
bool GVNPass::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo,
Value *Address, AvailableValue &Res) {
+ if (!DepInfo.isDef() && !DepInfo.isClobber()) {
+ assert(isa<SelectInst>(Address));
+ if (auto R = tryToConvertLoadOfPtrSelect(
+ Load->getParent(), Load->getIterator(), Address, Load->getType(),
+ getDominatorTree(), getAliasAnalysis())) {
+ Res = *R;
+ return true;
+ }
+ return false;
+ }
+
assert((DepInfo.isDef() || DepInfo.isClobber()) &&
"expected a local dependence");
assert(Load->isUnordered() && "rules below are incorrect for ordered access");
@@ -1066,9 +1188,7 @@ bool GVNPass::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo,
canCoerceMustAliasedValueToLoad(DepLoad, LoadType, DL)) {
const auto ClobberOff = MD->getClobberOffset(DepLoad);
// GVN has no deal with a negative offset.
- Offset = (ClobberOff == None || ClobberOff.getValue() < 0)
- ? -1
- : ClobberOff.getValue();
+ Offset = (ClobberOff == None || *ClobberOff < 0) ? -1 : *ClobberOff;
}
if (Offset == -1)
Offset =
@@ -1092,6 +1212,7 @@ bool GVNPass::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo,
}
}
}
+
// Nothing known about this clobber, have to be conservative
LLVM_DEBUG(
// fast print dep, using operator<< on instruction is too slow.
@@ -1111,12 +1232,11 @@ bool GVNPass::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo,
return true;
}
- if (isAllocationFn(DepInst, TLI))
- if (auto *InitVal = getInitialValueOfAllocation(cast<CallBase>(DepInst),
- TLI, Load->getType())) {
- Res = AvailableValue::get(InitVal);
- return true;
- }
+ if (Constant *InitVal =
+ getInitialValueOfAllocation(DepInst, TLI, Load->getType())) {
+ Res = AvailableValue::get(InitVal);
+ return true;
+ }
if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
// Reject loads and stores that are to the same address but are of
@@ -1176,16 +1296,23 @@ void GVNPass::AnalyzeLoadAvailability(LoadInst *Load, LoadDepVect &Deps,
continue;
}
- if (!DepInfo.isDef() && !DepInfo.isClobber()) {
- UnavailableBlocks.push_back(DepBB);
- continue;
- }
-
// The address being loaded in this non-local block may not be the same as
// the pointer operand of the load if PHI translation occurs. Make sure
// to consider the right address.
Value *Address = Deps[i].getAddress();
+ if (!DepInfo.isDef() && !DepInfo.isClobber()) {
+ if (auto R = tryToConvertLoadOfPtrSelect(
+ DepBB, DepBB->end(), Address, Load->getType(), getDominatorTree(),
+ getAliasAnalysis())) {
+ ValuesPerBlock.push_back(
+ AvailableValueInBlock::get(DepBB, std::move(*R)));
+ continue;
+ }
+ UnavailableBlocks.push_back(DepBB);
+ continue;
+ }
+
AvailableValue AV;
if (AnalyzeLoadAvailability(Load, DepInfo, Address, AV)) {
// subtlety: because we know this was a non-local dependency, we know
@@ -1923,8 +2050,9 @@ bool GVNPass::processLoad(LoadInst *L) {
if (Dep.isNonLocal())
return processNonLocalLoad(L);
+ Value *Address = L->getPointerOperand();
// Only handle the local case below
- if (!Dep.isDef() && !Dep.isClobber()) {
+ if (!Dep.isDef() && !Dep.isClobber() && !isa<SelectInst>(Address)) {
// This might be a NonFuncLocal or an Unknown
LLVM_DEBUG(
// fast print dep, using operator<< on instruction is too slow.
@@ -1934,7 +2062,7 @@ bool GVNPass::processLoad(LoadInst *L) {
}
AvailableValue AV;
- if (AnalyzeLoadAvailability(L, Dep, L->getPointerOperand(), AV)) {
+ if (AnalyzeLoadAvailability(L, Dep, Address, AV)) {
Value *AvailableValue = AV.MaterializeAdjustedValue(L, L, *this);
// Replace the load!
@@ -2324,7 +2452,7 @@ bool GVNPass::processInstruction(Instruction *I) {
// example if it determines that %y is equal to %x then the instruction
// "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
const DataLayout &DL = I->getModule()->getDataLayout();
- if (Value *V = SimplifyInstruction(I, {DL, TLI, DT, AC})) {
+ if (Value *V = simplifyInstruction(I, {DL, TLI, DT, AC})) {
bool Changed = false;
if (!I->use_empty()) {
// Simplification can cause a special instruction to become not special.
@@ -2491,6 +2619,7 @@ bool GVNPass::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
unsigned Iteration = 0;
while (ShouldContinue) {
LLVM_DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n");
+ (void) Iteration;
ShouldContinue = iterateOnFunction(F);
Changed |= ShouldContinue;
++Iteration;
diff --git a/llvm/lib/Transforms/Scalar/GVNHoist.cpp b/llvm/lib/Transforms/Scalar/GVNHoist.cpp
index fdc3afd9348a..6cdc671ddb64 100644
--- a/llvm/lib/Transforms/Scalar/GVNHoist.cpp
+++ b/llvm/lib/Transforms/Scalar/GVNHoist.cpp
@@ -54,11 +54,9 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Use.h"
@@ -126,7 +124,7 @@ using HoistingPointInfo = std::pair<BasicBlock *, SmallVecInsn>;
using HoistingPointList = SmallVector<HoistingPointInfo, 4>;
// A map from a pair of VNs to all the instructions with those VNs.
-using VNType = std::pair<unsigned, unsigned>;
+using VNType = std::pair<unsigned, uintptr_t>;
using VNtoInsns = DenseMap<VNType, SmallVector<Instruction *, 4>>;
@@ -161,7 +159,7 @@ using InValuesType =
// An invalid value number Used when inserting a single value number into
// VNtoInsns.
-enum : unsigned { InvalidVN = ~2U };
+enum : uintptr_t { InvalidVN = ~(uintptr_t)2 };
// Records all scalar instructions candidate for code hoisting.
class InsnInfo {
@@ -187,7 +185,9 @@ public:
void insert(LoadInst *Load, GVNPass::ValueTable &VN) {
if (Load->isSimple()) {
unsigned V = VN.lookupOrAdd(Load->getPointerOperand());
- VNtoLoads[{V, InvalidVN}].push_back(Load);
+ // With opaque pointers we may have loads from the same pointer with
+ // different result types, which should be disambiguated.
+ VNtoLoads[{V, (uintptr_t)Load->getType()}].push_back(Load);
}
}
@@ -261,7 +261,9 @@ public:
GVNHoist(DominatorTree *DT, PostDominatorTree *PDT, AliasAnalysis *AA,
MemoryDependenceResults *MD, MemorySSA *MSSA)
: DT(DT), PDT(PDT), AA(AA), MD(MD), MSSA(MSSA),
- MSSAUpdater(std::make_unique<MemorySSAUpdater>(MSSA)) {}
+ MSSAUpdater(std::make_unique<MemorySSAUpdater>(MSSA)) {
+ MSSA->ensureOptimizedUses();
+ }
bool run(Function &F);
@@ -1147,6 +1149,8 @@ std::pair<unsigned, unsigned> GVNHoist::hoist(HoistingPointList &HPL) {
DFSNumber[Repl] = DFSNumber[Last]++;
}
+ // Drop debug location as per debug info update guide.
+ Repl->dropLocation();
NR += removeAndReplace(InstructionsToHoist, Repl, DestBB, MoveAccess);
if (isa<LoadInst>(Repl))
diff --git a/llvm/lib/Transforms/Scalar/GVNSink.cpp b/llvm/lib/Transforms/Scalar/GVNSink.cpp
index e612a82fc89a..720b8e71fd56 100644
--- a/llvm/lib/Transforms/Scalar/GVNSink.cpp
+++ b/llvm/lib/Transforms/Scalar/GVNSink.cpp
@@ -35,7 +35,6 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/None.h"
@@ -45,7 +44,6 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
@@ -383,6 +381,8 @@ public:
}
};
+using BasicBlocksSet = SmallPtrSet<const BasicBlock *, 32>;
+
class ValueTable {
DenseMap<Value *, uint32_t> ValueNumbering;
DenseMap<GVNExpression::Expression *, uint32_t> ExpressionNumbering;
@@ -390,6 +390,7 @@ class ValueTable {
BumpPtrAllocator Allocator;
ArrayRecycler<Value *> Recycler;
uint32_t nextValueNumber = 1;
+ BasicBlocksSet ReachableBBs;
/// Create an expression for I based on its opcode and its uses. If I
/// touches or reads memory, the expression is also based upon its memory
@@ -421,6 +422,11 @@ class ValueTable {
public:
ValueTable() = default;
+ /// Set basic blocks reachable from entry block.
+ void setReachableBBs(const BasicBlocksSet &ReachableBBs) {
+ this->ReachableBBs = ReachableBBs;
+ }
+
/// Returns the value number for the specified value, assigning
/// it a new number if it did not have one before.
uint32_t lookupOrAdd(Value *V) {
@@ -434,6 +440,9 @@ public:
}
Instruction *I = cast<Instruction>(V);
+ if (!ReachableBBs.contains(I->getParent()))
+ return ~0U;
+
InstructionUseExpr *exp = nullptr;
switch (I->getOpcode()) {
case Instruction::Load:
@@ -570,6 +579,7 @@ public:
unsigned NumSunk = 0;
ReversePostOrderTraversal<Function*> RPOT(&F);
+ VN.setReachableBBs(BasicBlocksSet(RPOT.begin(), RPOT.end()));
for (auto *N : RPOT)
NumSunk += sinkBB(N);
@@ -648,12 +658,7 @@ Optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking(
VNums[N]++;
}
unsigned VNumToSink =
- std::max_element(VNums.begin(), VNums.end(),
- [](const std::pair<uint32_t, unsigned> &I,
- const std::pair<uint32_t, unsigned> &J) {
- return I.second < J.second;
- })
- ->first;
+ std::max_element(VNums.begin(), VNums.end(), llvm::less_second())->first;
if (VNums[VNumToSink] == 1)
// Can't sink anything!
@@ -776,12 +781,9 @@ unsigned GVNSink::sinkBB(BasicBlock *BBEnd) {
unsigned NumOrigPreds = Preds.size();
// We can only sink instructions through unconditional branches.
- for (auto I = Preds.begin(); I != Preds.end();) {
- if ((*I)->getTerminator()->getNumSuccessors() != 1)
- I = Preds.erase(I);
- else
- ++I;
- }
+ llvm::erase_if(Preds, [](BasicBlock *BB) {
+ return BB->getTerminator()->getNumSuccessors() != 1;
+ });
LockstepReverseIterator LRI(Preds);
SmallVector<SinkingInstructionCandidate, 4> Candidates;
diff --git a/llvm/lib/Transforms/Scalar/GuardWidening.cpp b/llvm/lib/Transforms/Scalar/GuardWidening.cpp
index 82b81003ef21..af6062d142f0 100644
--- a/llvm/lib/Transforms/Scalar/GuardWidening.cpp
+++ b/llvm/lib/Transforms/Scalar/GuardWidening.cpp
@@ -42,7 +42,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/GuardUtils.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
@@ -496,6 +495,8 @@ void GuardWideningImpl::makeAvailableAt(Value *V, Instruction *Loc) const {
makeAvailableAt(Op, Loc);
Inst->moveBefore(Loc);
+ // If we moved instruction before guard we must clean poison generating flags.
+ Inst->dropPoisonGeneratingFlags();
}
bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1,
diff --git a/llvm/lib/Transforms/Scalar/IVUsersPrinter.cpp b/llvm/lib/Transforms/Scalar/IVUsersPrinter.cpp
index e2022aba97c4..26f2db183fbf 100644
--- a/llvm/lib/Transforms/Scalar/IVUsersPrinter.cpp
+++ b/llvm/lib/Transforms/Scalar/IVUsersPrinter.cpp
@@ -8,7 +8,6 @@
#include "llvm/Transforms/Scalar/IVUsersPrinter.h"
#include "llvm/Analysis/IVUsers.h"
-#include "llvm/Support/Debug.h"
using namespace llvm;
#define DEBUG_TYPE "iv-users"
diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index ceb03eb17f6d..e977dd18be9f 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -25,10 +25,7 @@
#include "llvm/Transforms/Scalar/IndVarSimplify.h"
#include "llvm/ADT/APFloat.h"
-#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -74,11 +71,9 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
@@ -387,7 +382,7 @@ bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
// Delete the old floating point increment.
- Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
+ Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType()));
RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
// If the FP induction variable still has uses, this is because something else
@@ -605,10 +600,10 @@ bool IndVarSimplify::simplifyAndExtend(Loop *L,
Intrinsic::getName(Intrinsic::experimental_guard));
bool HasGuards = GuardDecl && !GuardDecl->use_empty();
- SmallVector<PHINode*, 8> LoopPhis;
- for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
- LoopPhis.push_back(cast<PHINode>(I));
- }
+ SmallVector<PHINode *, 8> LoopPhis;
+ for (PHINode &PN : L->getHeader()->phis())
+ LoopPhis.push_back(&PN);
+
// Each round of simplification iterates through the SimplifyIVUsers worklist
// for all current phis, then determines whether any IVs can be
// widened. Widening adds new phis to LoopPhis, inducing another round of
diff --git a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index 0e5653eeb7d5..799669a19796 100644
--- a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -56,8 +56,6 @@
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/IR/BasicBlock.h"
@@ -1411,12 +1409,12 @@ bool LoopConstrainer::run() {
bool IsSignedPredicate = MainLoopStructure.IsSignedPredicate;
Optional<SubRanges> MaybeSR = calculateSubRanges(IsSignedPredicate);
- if (!MaybeSR.hasValue()) {
+ if (!MaybeSR) {
LLVM_DEBUG(dbgs() << "irce: could not compute subranges\n");
return false;
}
- SubRanges SR = MaybeSR.getValue();
+ SubRanges SR = *MaybeSR;
bool Increasing = MainLoopStructure.IndVarIncreasing;
IntegerType *IVTy =
cast<IntegerType>(Range.getBegin()->getType());
@@ -1429,9 +1427,9 @@ bool LoopConstrainer::run() {
// constructor.
ClonedLoop PreLoop, PostLoop;
bool NeedsPreLoop =
- Increasing ? SR.LowLimit.hasValue() : SR.HighLimit.hasValue();
+ Increasing ? SR.LowLimit.has_value() : SR.HighLimit.has_value();
bool NeedsPostLoop =
- Increasing ? SR.HighLimit.hasValue() : SR.LowLimit.hasValue();
+ Increasing ? SR.HighLimit.has_value() : SR.LowLimit.has_value();
Value *ExitPreLoopAt = nullptr;
Value *ExitMainLoopAt = nullptr;
@@ -1710,7 +1708,7 @@ IntersectSignedRange(ScalarEvolution &SE,
const InductiveRangeCheck::Range &R2) {
if (R2.isEmpty(SE, /* IsSigned */ true))
return None;
- if (!R1.hasValue())
+ if (!R1)
return R2;
auto &R1Value = R1.getValue();
// We never return empty ranges from this function, and R1 is supposed to be
@@ -1739,7 +1737,7 @@ IntersectUnsignedRange(ScalarEvolution &SE,
const InductiveRangeCheck::Range &R2) {
if (R2.isEmpty(SE, /* IsSigned */ false))
return None;
- if (!R1.hasValue())
+ if (!R1)
return R2;
auto &R1Value = R1.getValue();
// We never return empty ranges from this function, and R1 is supposed to be
@@ -1763,10 +1761,14 @@ IntersectUnsignedRange(ScalarEvolution &SE,
}
PreservedAnalyses IRCEPass::run(Function &F, FunctionAnalysisManager &AM) {
- auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
- auto &BPI = AM.getResult<BranchProbabilityAnalysis>(F);
LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
+ // There are no loops in the function. Return before computing other expensive
+ // analyses.
+ if (LI.empty())
+ return PreservedAnalyses::all();
+ auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
+ auto &BPI = AM.getResult<BranchProbabilityAnalysis>(F);
// Get BFI analysis result on demand. Please note that modification of
// CFG invalidates this analysis and we should handle it.
@@ -1854,7 +1856,7 @@ InductiveRangeCheckElimination::isProfitableToTransform(const Loop &L,
LoopStructure &LS) {
if (SkipProfitabilityChecks)
return true;
- if (GetBFI.hasValue()) {
+ if (GetBFI) {
BlockFrequencyInfo &BFI = (*GetBFI)();
uint64_t hFreq = BFI.getBlockFreq(LS.Header).getFrequency();
uint64_t phFreq = BFI.getBlockFreq(L.getLoopPreheader()).getFrequency();
@@ -1920,12 +1922,12 @@ bool InductiveRangeCheckElimination::run(
const char *FailureReason = nullptr;
Optional<LoopStructure> MaybeLoopStructure =
LoopStructure::parseLoopStructure(SE, *L, FailureReason);
- if (!MaybeLoopStructure.hasValue()) {
+ if (!MaybeLoopStructure) {
LLVM_DEBUG(dbgs() << "irce: could not parse loop structure: "
<< FailureReason << "\n";);
return false;
}
- LoopStructure LS = MaybeLoopStructure.getValue();
+ LoopStructure LS = *MaybeLoopStructure;
if (!isProfitableToTransform(*L, LS))
return false;
const SCEVAddRecExpr *IndVar =
@@ -1946,10 +1948,10 @@ bool InductiveRangeCheckElimination::run(
for (InductiveRangeCheck &IRC : RangeChecks) {
auto Result = IRC.computeSafeIterationSpace(SE, IndVar,
LS.IsSignedPredicate);
- if (Result.hasValue()) {
+ if (Result) {
auto MaybeSafeIterRange =
IntersectRange(SE, SafeIterRange, Result.getValue());
- if (MaybeSafeIterRange.hasValue()) {
+ if (MaybeSafeIterRange) {
assert(
!MaybeSafeIterRange.getValue().isEmpty(SE, LS.IsSignedPredicate) &&
"We should never return empty ranges!");
@@ -1959,7 +1961,7 @@ bool InductiveRangeCheckElimination::run(
}
}
- if (!SafeIterRange.hasValue())
+ if (!SafeIterRange)
return false;
LoopConstrainer LC(*L, LI, LPMAddNewLoop, LS, SE, DT,
diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 8f5933b7bd71..5eefde2e37a1 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -92,8 +92,6 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/None.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AssumptionCache.h"
@@ -182,7 +180,7 @@ public:
class InferAddressSpacesImpl {
AssumptionCache &AC;
- DominatorTree *DT = nullptr;
+ const DominatorTree *DT = nullptr;
const TargetTransformInfo *TTI = nullptr;
const DataLayout *DL = nullptr;
@@ -213,10 +211,11 @@ class InferAddressSpacesImpl {
// Changes the flat address expressions in function F to point to specific
// address spaces if InferredAddrSpace says so. Postorder is the postorder of
// all flat expressions in the use-def graph of function F.
- bool rewriteWithNewAddressSpaces(
- const TargetTransformInfo &TTI, ArrayRef<WeakTrackingVH> Postorder,
- const ValueToAddrSpaceMapTy &InferredAddrSpace,
- const PredicatedAddrSpaceMapTy &PredicatedAS, Function *F) const;
+ bool
+ rewriteWithNewAddressSpaces(ArrayRef<WeakTrackingVH> Postorder,
+ const ValueToAddrSpaceMapTy &InferredAddrSpace,
+ const PredicatedAddrSpaceMapTy &PredicatedAS,
+ Function *F) const;
void appendsFlatAddressExpressionToPostorderStack(
Value *V, PostorderStackTy &PostorderStack,
@@ -240,7 +239,7 @@ class InferAddressSpacesImpl {
unsigned getPredicatedAddrSpace(const Value &V, Value *Opnd) const;
public:
- InferAddressSpacesImpl(AssumptionCache &AC, DominatorTree *DT,
+ InferAddressSpacesImpl(AssumptionCache &AC, const DominatorTree *DT,
const TargetTransformInfo *TTI, unsigned FlatAddrSpace)
: AC(AC), DT(DT), TTI(TTI), FlatAddrSpace(FlatAddrSpace) {}
bool run(Function &F);
@@ -280,15 +279,15 @@ static bool isNoopPtrIntCastPair(const Operator *I2P, const DataLayout &DL,
// arithmetic may also be undefined after invalid pointer reinterpret cast.
// However, as we confirm through the target hooks that it's a no-op
// addrspacecast, it doesn't matter since the bits should be the same.
+ unsigned P2IOp0AS = P2I->getOperand(0)->getType()->getPointerAddressSpace();
+ unsigned I2PAS = I2P->getType()->getPointerAddressSpace();
return CastInst::isNoopCast(Instruction::CastOps(I2P->getOpcode()),
I2P->getOperand(0)->getType(), I2P->getType(),
DL) &&
CastInst::isNoopCast(Instruction::CastOps(P2I->getOpcode()),
P2I->getOperand(0)->getType(), P2I->getType(),
DL) &&
- TTI->isNoopAddrSpaceCast(
- P2I->getOperand(0)->getType()->getPointerAddressSpace(),
- I2P->getType()->getPointerAddressSpace());
+ (P2IOp0AS == I2PAS || TTI->isNoopAddrSpaceCast(P2IOp0AS, I2PAS));
}
// Returns true if V is an address expression.
@@ -332,8 +331,7 @@ getPointerOperands(const Value &V, const DataLayout &DL,
switch (Op.getOpcode()) {
case Instruction::PHI: {
auto IncomingValues = cast<PHINode>(Op).incoming_values();
- return SmallVector<Value *, 2>(IncomingValues.begin(),
- IncomingValues.end());
+ return {IncomingValues.begin(), IncomingValues.end()};
}
case Instruction::BitCast:
case Instruction::AddrSpaceCast:
@@ -655,10 +653,13 @@ Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
case Instruction::IntToPtr: {
assert(isNoopPtrIntCastPair(cast<Operator>(I), *DL, TTI));
Value *Src = cast<Operator>(I->getOperand(0))->getOperand(0);
- assert(Src->getType()->getPointerAddressSpace() == NewAddrSpace);
- if (Src->getType() != NewPtrType)
- return new BitCastInst(Src, NewPtrType);
- return Src;
+ if (Src->getType() == NewPtrType)
+ return Src;
+
+ // If we had a no-op inttoptr/ptrtoint pair, we may still have inferred a
+ // source address space from a generic pointer source need to insert a cast
+ // back.
+ return CastInst::CreatePointerBitCastOrAddrSpaceCast(Src, NewPtrType);
}
default:
llvm_unreachable("Unexpected opcode");
@@ -726,7 +727,7 @@ static Value *cloneConstantExprWithNewAddressSpace(
NewOperands.push_back(cast<Constant>(NewOperand));
continue;
}
- if (auto CExpr = dyn_cast<ConstantExpr>(Operand))
+ if (auto *CExpr = dyn_cast<ConstantExpr>(Operand))
if (Value *NewOperand = cloneConstantExprWithNewAddressSpace(
CExpr, NewAddrSpace, ValueWithNewAddrSpace, DL, TTI)) {
IsNew = true;
@@ -738,7 +739,7 @@ static Value *cloneConstantExprWithNewAddressSpace(
}
// If !IsNew, we will replace the Value with itself. However, replaced values
- // are assumed to wrapped in a addrspace cast later so drop it now.
+ // are assumed to wrapped in an addrspacecast cast later so drop it now.
if (!IsNew)
return nullptr;
@@ -821,8 +822,8 @@ bool InferAddressSpacesImpl::run(Function &F) {
// Changes the address spaces of the flat address expressions who are inferred
// to point to a specific address space.
- return rewriteWithNewAddressSpaces(*TTI, Postorder, InferredAddrSpace,
- PredicatedAS, &F);
+ return rewriteWithNewAddressSpaces(Postorder, InferredAddrSpace, PredicatedAS,
+ &F);
}
// Constants need to be tracked through RAUW to handle cases with nested
@@ -1010,7 +1011,7 @@ static bool isSimplePointerUseValidToReplace(const TargetTransformInfo &TTI,
}
/// Update memory intrinsic uses that require more complex processing than
-/// simple memory instructions. Thse require re-mangling and may have multiple
+/// simple memory instructions. These require re-mangling and may have multiple
/// pointer operands.
static bool handleMemIntrinsicPtrUse(MemIntrinsic *MI, Value *OldV,
Value *NewV) {
@@ -1020,8 +1021,7 @@ static bool handleMemIntrinsicPtrUse(MemIntrinsic *MI, Value *OldV,
MDNode *NoAliasMD = MI->getMetadata(LLVMContext::MD_noalias);
if (auto *MSI = dyn_cast<MemSetInst>(MI)) {
- B.CreateMemSet(NewV, MSI->getValue(), MSI->getLength(),
- MaybeAlign(MSI->getDestAlignment()),
+ B.CreateMemSet(NewV, MSI->getValue(), MSI->getLength(), MSI->getDestAlign(),
false, // isVolatile
TBAA, ScopeMD, NoAliasMD);
} else if (auto *MTI = dyn_cast<MemTransferInst>(MI)) {
@@ -1104,7 +1104,7 @@ static Value::use_iterator skipToNextUser(Value::use_iterator I,
}
bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
- const TargetTransformInfo &TTI, ArrayRef<WeakTrackingVH> Postorder,
+ ArrayRef<WeakTrackingVH> Postorder,
const ValueToAddrSpaceMapTy &InferredAddrSpace,
const PredicatedAddrSpaceMapTy &PredicatedAS, Function *F) const {
// For each address expression to be modified, creates a clone of it with its
@@ -1178,7 +1178,7 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
I = skipToNextUser(I, E);
if (isSimplePointerUseValidToReplace(
- TTI, U, V->getType()->getPointerAddressSpace())) {
+ *TTI, U, V->getType()->getPointerAddressSpace())) {
// If V is used as the pointer operand of a compatible memory operation,
// sets the pointer operand to NewV. This replacement does not change
// the element type, so the resultant load/store is still valid.
@@ -1239,8 +1239,16 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
if (!cast<PointerType>(ASC->getType())
->hasSameElementTypeAs(
cast<PointerType>(NewV->getType()))) {
+ BasicBlock::iterator InsertPos;
+ if (Instruction *NewVInst = dyn_cast<Instruction>(NewV))
+ InsertPos = std::next(NewVInst->getIterator());
+ else if (Instruction *VInst = dyn_cast<Instruction>(V))
+ InsertPos = std::next(VInst->getIterator());
+ else
+ InsertPos = ASC->getIterator();
+
NewV = CastInst::Create(Instruction::BitCast, NewV,
- ASC->getType(), "", ASC);
+ ASC->getType(), "", &*InsertPos);
}
ASC->replaceAllUsesWith(NewV);
DeadInstructions.push_back(ASC);
@@ -1249,12 +1257,18 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
}
// Otherwise, replaces the use with flat(NewV).
- if (Instruction *Inst = dyn_cast<Instruction>(V)) {
+ if (Instruction *VInst = dyn_cast<Instruction>(V)) {
// Don't create a copy of the original addrspacecast.
if (U == V && isa<AddrSpaceCastInst>(V))
continue;
- BasicBlock::iterator InsertPos = std::next(Inst->getIterator());
+ // Insert the addrspacecast after NewV.
+ BasicBlock::iterator InsertPos;
+ if (Instruction *NewVInst = dyn_cast<Instruction>(NewV))
+ InsertPos = std::next(NewVInst->getIterator());
+ else
+ InsertPos = std::next(VInst->getIterator());
+
while (isa<PHINode>(InsertPos))
++InsertPos;
U.set(new AddrSpaceCastInst(NewV, V->getType(), "", &*InsertPos));
diff --git a/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp b/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp
index c11d2e4c1d6b..4644905adba3 100644
--- a/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp
+++ b/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp
@@ -7,21 +7,17 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/InstSimplifyPass.h"
-#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Type.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -55,7 +51,7 @@ static bool runImpl(Function &F, const SimplifyQuery &SQ,
DeadInstsInBB.push_back(&I);
Changed = true;
} else if (!I.use_empty()) {
- if (Value *V = SimplifyInstruction(&I, SQ, ORE)) {
+ if (Value *V = simplifyInstruction(&I, SQ, ORE)) {
// Mark all uses for resimplification next time round the loop.
for (User *U : I.users())
Next->insert(cast<Instruction>(U));
diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index a3efad104ca6..5caefc422921 100644
--- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -56,7 +56,6 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
-#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -74,7 +73,6 @@
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
#include <cassert>
-#include <cstddef>
#include <cstdint>
#include <iterator>
#include <memory>
@@ -106,11 +104,6 @@ static cl::opt<bool> PrintLVIAfterJumpThreading(
cl::desc("Print the LazyValueInfo cache after JumpThreading"), cl::init(false),
cl::Hidden);
-static cl::opt<bool> JumpThreadingFreezeSelectCond(
- "jump-threading-freeze-select-cond",
- cl::desc("Freeze the condition when unfolding select"), cl::init(false),
- cl::Hidden);
-
static cl::opt<bool> ThreadAcrossLoopHeaders(
"jump-threading-across-loop-headers",
cl::desc("Allow JumpThreading to thread across loop headers, for testing"),
@@ -140,8 +133,7 @@ namespace {
public:
static char ID; // Pass identification
- JumpThreading(bool InsertFreezeWhenUnfoldingSelect = false, int T = -1)
- : FunctionPass(ID), Impl(InsertFreezeWhenUnfoldingSelect, T) {
+ JumpThreading(int T = -1) : FunctionPass(ID), Impl(T) {
initializeJumpThreadingPass(*PassRegistry::getPassRegistry());
}
@@ -175,12 +167,11 @@ INITIALIZE_PASS_END(JumpThreading, "jump-threading",
"Jump Threading", false, false)
// Public interface to the Jump Threading pass
-FunctionPass *llvm::createJumpThreadingPass(bool InsertFr, int Threshold) {
- return new JumpThreading(InsertFr, Threshold);
+FunctionPass *llvm::createJumpThreadingPass(int Threshold) {
+ return new JumpThreading(Threshold);
}
-JumpThreadingPass::JumpThreadingPass(bool InsertFr, int T) {
- InsertFreezeWhenUnfoldingSelect = JumpThreadingFreezeSelectCond | InsertFr;
+JumpThreadingPass::JumpThreadingPass(int T) {
DefaultBBDupThreshold = (T == -1) ? BBDuplicateThreshold : unsigned(T);
}
@@ -326,7 +317,7 @@ bool JumpThreading::runOnFunction(Function &F) {
std::unique_ptr<BlockFrequencyInfo> BFI;
std::unique_ptr<BranchProbabilityInfo> BPI;
if (F.hasProfileData()) {
- LoopInfo LI{DominatorTree(F)};
+ LoopInfo LI{*DT};
BPI.reset(new BranchProbabilityInfo(F, LI, TLI));
BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
}
@@ -491,14 +482,16 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
// at the end of block. RAUW unconditionally replaces all uses
// including the guards/assumes themselves and the uses before the
// guard/assume.
-static void replaceFoldableUses(Instruction *Cond, Value *ToVal) {
+static bool replaceFoldableUses(Instruction *Cond, Value *ToVal,
+ BasicBlock *KnownAtEndOfBB) {
+ bool Changed = false;
assert(Cond->getType() == ToVal->getType());
- auto *BB = Cond->getParent();
// We can unconditionally replace all uses in non-local blocks (i.e. uses
// strictly dominated by BB), since LVI information is true from the
// terminator of BB.
- replaceNonLocalUsesWith(Cond, ToVal);
- for (Instruction &I : reverse(*BB)) {
+ if (Cond->getParent() == KnownAtEndOfBB)
+ Changed |= replaceNonLocalUsesWith(Cond, ToVal);
+ for (Instruction &I : reverse(*KnownAtEndOfBB)) {
// Reached the Cond whose uses we are trying to replace, so there are no
// more uses.
if (&I == Cond)
@@ -507,10 +500,13 @@ static void replaceFoldableUses(Instruction *Cond, Value *ToVal) {
// of BB, where we know Cond is ToVal.
if (!isGuaranteedToTransferExecutionToSuccessor(&I))
break;
- I.replaceUsesOfWith(Cond, ToVal);
+ Changed |= I.replaceUsesOfWith(Cond, ToVal);
}
- if (Cond->use_empty() && !Cond->mayHaveSideEffects())
+ if (Cond->use_empty() && !Cond->mayHaveSideEffects()) {
Cond->eraseFromParent();
+ Changed = true;
+ }
+ return Changed;
}
/// Return the cost of duplicating a piece of this block from first non-phi
@@ -792,6 +788,7 @@ bool JumpThreadingPass::computeValueKnownInPredecessorsImpl(
if (Preference != WantInteger)
return false;
if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+ const DataLayout &DL = BO->getModule()->getDataLayout();
PredValueInfoTy LHSVals;
computeValueKnownInPredecessorsImpl(BO->getOperand(0), BB, LHSVals,
WantInteger, RecursionSet, CxtI);
@@ -799,7 +796,8 @@ bool JumpThreadingPass::computeValueKnownInPredecessorsImpl(
// Try to use constant folding to simplify the binary operator.
for (const auto &LHSVal : LHSVals) {
Constant *V = LHSVal.first;
- Constant *Folded = ConstantExpr::get(BO->getOpcode(), V, CI);
+ Constant *Folded =
+ ConstantFoldBinaryOpOperands(BO->getOpcode(), V, CI, DL);
if (Constant *KC = getKnownConstant(Folded, WantInteger))
Result.emplace_back(KC, LHSVal.second);
@@ -835,7 +833,7 @@ bool JumpThreadingPass::computeValueKnownInPredecessorsImpl(
LHS = CmpLHS->DoPHITranslation(BB, PredBB);
RHS = PN->getIncomingValue(i);
}
- Value *Res = SimplifyCmpInst(Pred, LHS, RHS, {DL});
+ Value *Res = simplifyCmpInst(Pred, LHS, RHS, {DL});
if (!Res) {
if (!isa<Constant>(RHS))
continue;
@@ -1135,34 +1133,21 @@ bool JumpThreadingPass::processBlock(BasicBlock *BB) {
return ConstantFolded;
}
- if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) {
+ // Some of the following optimization can safely work on the unfrozen cond.
+ Value *CondWithoutFreeze = CondInst;
+ if (auto *FI = dyn_cast<FreezeInst>(CondInst))
+ CondWithoutFreeze = FI->getOperand(0);
+
+ if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondWithoutFreeze)) {
// If we're branching on a conditional, LVI might be able to determine
// it's value at the branch instruction. We only handle comparisons
// against a constant at this time.
- // TODO: This should be extended to handle switches as well.
- BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
- Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1));
- if (CondBr && CondConst) {
- // We should have returned as soon as we turn a conditional branch to
- // unconditional. Because its no longer interesting as far as jump
- // threading is concerned.
- assert(CondBr->isConditional() && "Threading on unconditional terminator");
-
+ if (Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1))) {
LazyValueInfo::Tristate Ret =
LVI->getPredicateAt(CondCmp->getPredicate(), CondCmp->getOperand(0),
- CondConst, CondBr, /*UseBlockValue=*/false);
+ CondConst, BB->getTerminator(),
+ /*UseBlockValue=*/false);
if (Ret != LazyValueInfo::Unknown) {
- unsigned ToRemove = Ret == LazyValueInfo::True ? 1 : 0;
- unsigned ToKeep = Ret == LazyValueInfo::True ? 0 : 1;
- BasicBlock *ToRemoveSucc = CondBr->getSuccessor(ToRemove);
- ToRemoveSucc->removePredecessor(BB, true);
- BranchInst *UncondBr =
- BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr);
- UncondBr->setDebugLoc(CondBr->getDebugLoc());
- ++NumFolds;
- CondBr->eraseFromParent();
- if (CondCmp->use_empty())
- CondCmp->eraseFromParent();
// We can safely replace *some* uses of the CondInst if it has
// exactly one value as returned by LVI. RAUW is incorrect in the
// presence of guards and assumes, that have the `Cond` as the use. This
@@ -1170,17 +1155,11 @@ bool JumpThreadingPass::processBlock(BasicBlock *BB) {
// at the end of block, but RAUW unconditionally replaces all uses
// including the guards/assumes themselves and the uses before the
// guard/assume.
- else if (CondCmp->getParent() == BB) {
- auto *CI = Ret == LazyValueInfo::True ?
- ConstantInt::getTrue(CondCmp->getType()) :
- ConstantInt::getFalse(CondCmp->getType());
- replaceFoldableUses(CondCmp, CI);
- }
- DTU->applyUpdatesPermissive(
- {{DominatorTree::Delete, BB, ToRemoveSucc}});
- if (HasProfileData)
- BPI->eraseBlock(BB);
- return true;
+ auto *CI = Ret == LazyValueInfo::True ?
+ ConstantInt::getTrue(CondCmp->getType()) :
+ ConstantInt::getFalse(CondCmp->getType());
+ if (replaceFoldableUses(CondCmp, CI, BB))
+ return true;
}
// We did not manage to simplify this branch, try to see whether
@@ -1198,11 +1177,7 @@ bool JumpThreadingPass::processBlock(BasicBlock *BB) {
// for loads that are used by a switch or by the condition for the branch. If
// we see one, check to see if it's partially redundant. If so, insert a PHI
// which can then be used to thread the values.
- Value *SimplifyValue = CondInst;
-
- if (auto *FI = dyn_cast<FreezeInst>(SimplifyValue))
- // Look into freeze's operand
- SimplifyValue = FI->getOperand(0);
+ Value *SimplifyValue = CondWithoutFreeze;
if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue))
if (isa<Constant>(CondCmp->getOperand(1)))
@@ -1227,10 +1202,7 @@ bool JumpThreadingPass::processBlock(BasicBlock *BB) {
// If this is an otherwise-unfoldable branch on a phi node or freeze(phi) in
// the current block, see if we can simplify.
- PHINode *PN = dyn_cast<PHINode>(
- isa<FreezeInst>(CondInst) ? cast<FreezeInst>(CondInst)->getOperand(0)
- : CondInst);
-
+ PHINode *PN = dyn_cast<PHINode>(CondWithoutFreeze);
if (PN && PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
return processBranchOnPHI(PN);
@@ -1253,6 +1225,17 @@ bool JumpThreadingPass::processImpliedCondition(BasicBlock *BB) {
return false;
Value *Cond = BI->getCondition();
+ // Assuming that predecessor's branch was taken, if pred's branch condition
+ // (V) implies Cond, Cond can be either true, undef, or poison. In this case,
+ // freeze(Cond) is either true or a nondeterministic value.
+ // If freeze(Cond) has only one use, we can freely fold freeze(Cond) to true
+ // without affecting other instructions.
+ auto *FICond = dyn_cast<FreezeInst>(Cond);
+ if (FICond && FICond->hasOneUse())
+ Cond = FICond->getOperand(0);
+ else
+ FICond = nullptr;
+
BasicBlock *CurrentBB = BB;
BasicBlock *CurrentPred = BB->getSinglePredecessor();
unsigned Iter = 0;
@@ -1269,6 +1252,15 @@ bool JumpThreadingPass::processImpliedCondition(BasicBlock *BB) {
bool CondIsTrue = PBI->getSuccessor(0) == CurrentBB;
Optional<bool> Implication =
isImpliedCondition(PBI->getCondition(), Cond, DL, CondIsTrue);
+
+ // If the branch condition of BB (which is Cond) and CurrentPred are
+ // exactly the same freeze instruction, Cond can be folded into CondIsTrue.
+ if (!Implication && FICond && isa<FreezeInst>(PBI->getCondition())) {
+ if (cast<FreezeInst>(PBI->getCondition())->getOperand(0) ==
+ FICond->getOperand(0))
+ Implication = CondIsTrue;
+ }
+
if (Implication) {
BasicBlock *KeepSucc = BI->getSuccessor(*Implication ? 0 : 1);
BasicBlock *RemoveSucc = BI->getSuccessor(*Implication ? 1 : 0);
@@ -1277,6 +1269,9 @@ bool JumpThreadingPass::processImpliedCondition(BasicBlock *BB) {
UncondBI->setDebugLoc(BI->getDebugLoc());
++NumFolds;
BI->eraseFromParent();
+ if (FICond)
+ FICond->eraseFromParent();
+
DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, RemoveSucc}});
if (HasProfileData)
BPI->eraseBlock(BB);
@@ -1338,10 +1333,10 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) {
combineMetadataForCSE(NLoadI, LoadI, false);
};
- // If the returned value is the load itself, replace with an undef. This can
+ // If the returned value is the load itself, replace with poison. This can
// only happen in dead loops.
if (AvailableVal == LoadI)
- AvailableVal = UndefValue::get(LoadI->getType());
+ AvailableVal = PoisonValue::get(LoadI->getType());
if (AvailableVal->getType() != LoadI->getType())
AvailableVal = CastInst::CreateBitOrPointerCast(
AvailableVal, LoadI->getType(), "", LoadI);
@@ -1566,10 +1561,8 @@ findMostPopularDest(BasicBlock *BB,
DestPopularity[PredToDest.second]++;
// Find the most popular dest.
- using VT = decltype(DestPopularity)::value_type;
auto MostPopular = std::max_element(
- DestPopularity.begin(), DestPopularity.end(),
- [](const VT &L, const VT &R) { return L.second < R.second; });
+ DestPopularity.begin(), DestPopularity.end(), llvm::less_second());
// Okay, we have finally picked the most popular destination.
return MostPopular->first;
@@ -1742,9 +1735,8 @@ bool JumpThreadingPass::processThreadableEdges(Value *Cond, BasicBlock *BB,
// at the end of block, but RAUW unconditionally replaces all uses
// including the guards/assumes themselves and the uses before the
// guard/assume.
- else if (OnlyVal && OnlyVal != MultipleVal &&
- CondInst->getParent() == BB)
- replaceFoldableUses(CondInst, OnlyVal);
+ else if (OnlyVal && OnlyVal != MultipleVal)
+ replaceFoldableUses(CondInst, OnlyVal, BB);
}
return true;
}
@@ -2672,7 +2664,7 @@ bool JumpThreadingPass::duplicateCondBranchOnPHIIntoPred(
// If this instruction can be simplified after the operands are updated,
// just use the simplified value instead. This frequently happens due to
// phi translation.
- if (Value *IV = SimplifyInstruction(
+ if (Value *IV = simplifyInstruction(
New,
{BB->getModule()->getDataLayout(), TLI, nullptr, nullptr, New})) {
ValueMapping[&*BI] = IV;
@@ -2912,9 +2904,7 @@ bool JumpThreadingPass::tryToUnfoldSelectInCurrBB(BasicBlock *BB) {
continue;
// Expand the select.
Value *Cond = SI->getCondition();
- if (InsertFreezeWhenUnfoldingSelect &&
- !isGuaranteedNotToBeUndefOrPoison(Cond, nullptr, SI,
- &DTU->getDomTree()))
+ if (!isGuaranteedNotToBeUndefOrPoison(Cond, nullptr, SI))
Cond = new FreezeInst(Cond, "cond.fr", SI);
Instruction *Term = SplitBlockAndInsertIfThen(Cond, SI, false);
BasicBlock *SplitBB = SI->getParent();
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 7fb1a25bdf13..492f4e40395a 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -37,29 +37,27 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/LICM.h"
+#include "llvm/ADT/PriorityWorklist.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
-#include "llvm/Analysis/BasicAliasAnalysis.h"
-#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/GuardUtils.h"
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/LoopNestAnalysis.h"
#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
@@ -78,7 +76,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -88,6 +85,11 @@
#include <utility>
using namespace llvm;
+namespace llvm {
+class BlockFrequencyInfo;
+class LPMUpdater;
+} // namespace llvm
+
#define DEBUG_TYPE "licm"
STATISTIC(NumCreatedBlocks, "Number of blocks created");
@@ -114,8 +116,7 @@ static cl::opt<uint32_t> MaxNumUsesTraversed(
// Experimental option to allow imprecision in LICM in pathological cases, in
// exchange for faster compile. This is to be removed if MemorySSA starts to
-// address the same issue. This flag applies only when LICM uses MemorySSA
-// instead on AliasSetTracker. LICM calls MemorySSAWalker's
+// address the same issue. LICM calls MemorySSAWalker's
// getClobberingMemoryAccess, up to the value of the Cap, getting perfect
// accuracy. Afterwards, LICM will call into MemorySSA's getDefiningAccess,
// which may not be precise, since optimizeUses is capped. The result is
@@ -143,37 +144,32 @@ static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
bool LoopNestMode);
static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
- MemorySSAUpdater *MSSAU, ScalarEvolution *SE,
+ MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
OptimizationRemarkEmitter *ORE);
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
BlockFrequencyInfo *BFI, const Loop *CurLoop,
- ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU,
+ ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU,
OptimizationRemarkEmitter *ORE);
-static bool isSafeToExecuteUnconditionally(Instruction &Inst,
- const DominatorTree *DT,
- const TargetLibraryInfo *TLI,
- const Loop *CurLoop,
- const LoopSafetyInfo *SafetyInfo,
- OptimizationRemarkEmitter *ORE,
- const Instruction *CtxI = nullptr);
-static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
- AliasSetTracker *CurAST, Loop *CurLoop,
- AAResults *AA);
-static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
- Loop *CurLoop, Instruction &I,
- SinkAndHoistLICMFlags &Flags);
-static bool pointerInvalidatedByBlockWithMSSA(BasicBlock &BB, MemorySSA &MSSA,
- MemoryUse &MU);
+static bool isSafeToExecuteUnconditionally(
+ Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI,
+ const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
+ OptimizationRemarkEmitter *ORE, const Instruction *CtxI,
+ bool AllowSpeculation);
+static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU,
+ Loop *CurLoop, Instruction &I,
+ SinkAndHoistLICMFlags &Flags);
+static bool pointerInvalidatedByBlock(BasicBlock &BB, MemorySSA &MSSA,
+ MemoryUse &MU);
static Instruction *cloneInstructionInExitBlock(
Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
- const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU);
+ const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU);
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
- MemorySSAUpdater *MSSAU);
+ MemorySSAUpdater &MSSAU);
static void moveInstructionBefore(Instruction &I, Instruction &Dest,
ICFLoopSafetyInfo &SafetyInfo,
- MemorySSAUpdater *MSSAU, ScalarEvolution *SE);
+ MemorySSAUpdater &MSSAU, ScalarEvolution *SE);
static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L,
function_ref<void(Instruction *)> Fn);
@@ -188,21 +184,26 @@ struct LoopInvariantCodeMotion {
OptimizationRemarkEmitter *ORE, bool LoopNestMode = false);
LoopInvariantCodeMotion(unsigned LicmMssaOptCap,
- unsigned LicmMssaNoAccForPromotionCap)
+ unsigned LicmMssaNoAccForPromotionCap,
+ bool LicmAllowSpeculation)
: LicmMssaOptCap(LicmMssaOptCap),
- LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {}
+ LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
+ LicmAllowSpeculation(LicmAllowSpeculation) {}
private:
unsigned LicmMssaOptCap;
unsigned LicmMssaNoAccForPromotionCap;
+ bool LicmAllowSpeculation;
};
struct LegacyLICMPass : public LoopPass {
static char ID; // Pass identification, replacement for typeid
LegacyLICMPass(
unsigned LicmMssaOptCap = SetLicmMssaOptCap,
- unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap)
- : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap) {
+ unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap,
+ bool LicmAllowSpeculation = true)
+ : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+ LicmAllowSpeculation) {
initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry());
}
@@ -265,7 +266,8 @@ PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
// but ORE cannot be preserved (see comment before the pass definition).
OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
- LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
+ LoopInvariantCodeMotion LICM(Opts.MssaOptCap, Opts.MssaNoAccForPromotionCap,
+ Opts.AllowSpeculation);
if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, AR.BFI, &AR.TLI, &AR.TTI,
&AR.SE, AR.MSSA, &ORE))
return PreservedAnalyses::all();
@@ -279,6 +281,16 @@ PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
return PA;
}
+void LICMPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<LICMPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+
+ OS << "<";
+ OS << (Opts.AllowSpeculation ? "" : "no-") << "allowspeculation";
+ OS << ">";
+}
+
PreservedAnalyses LNICMPass::run(LoopNest &LN, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &) {
@@ -290,7 +302,8 @@ PreservedAnalyses LNICMPass::run(LoopNest &LN, LoopAnalysisManager &AM,
// but ORE cannot be preserved (see comment before the pass definition).
OptimizationRemarkEmitter ORE(LN.getParent());
- LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
+ LoopInvariantCodeMotion LICM(Opts.MssaOptCap, Opts.MssaNoAccForPromotionCap,
+ Opts.AllowSpeculation);
Loop &OutermostLoop = LN.getOutermostLoop();
bool Changed = LICM.runOnLoop(&OutermostLoop, &AR.AA, &AR.LI, &AR.DT, AR.BFI,
@@ -308,6 +321,16 @@ PreservedAnalyses LNICMPass::run(LoopNest &LN, LoopAnalysisManager &AM,
return PA;
}
+void LNICMPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<LNICMPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+
+ OS << "<";
+ OS << (Opts.AllowSpeculation ? "" : "no-") << "allowspeculation";
+ OS << ">";
+}
+
char LegacyLICMPass::ID = 0;
INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion",
false, false)
@@ -321,8 +344,10 @@ INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }
Pass *llvm::createLICMPass(unsigned LicmMssaOptCap,
- unsigned LicmMssaNoAccForPromotionCap) {
- return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
+ unsigned LicmMssaNoAccForPromotionCap,
+ bool LicmAllowSpeculation) {
+ return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+ LicmAllowSpeculation);
}
llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(bool IsSink, Loop *L,
@@ -365,6 +390,7 @@ bool LoopInvariantCodeMotion::runOnLoop(
bool Changed = false;
assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
+ MSSA->ensureOptimizedUses();
// If this loop has metadata indicating that LICM is not to be performed then
// just exit.
@@ -411,14 +437,15 @@ bool LoopInvariantCodeMotion::runOnLoop(
if (L->hasDedicatedExits())
Changed |= LoopNestMode
? sinkRegionForLoopNest(DT->getNode(L->getHeader()), AA, LI,
- DT, BFI, TLI, TTI, L, &MSSAU,
+ DT, BFI, TLI, TTI, L, MSSAU,
&SafetyInfo, Flags, ORE)
: sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI,
- TLI, TTI, L, &MSSAU, &SafetyInfo, Flags, ORE);
+ TLI, TTI, L, MSSAU, &SafetyInfo, Flags, ORE);
Flags.setIsSink(false);
if (Preheader)
Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L,
- &MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode);
+ MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode,
+ LicmAllowSpeculation);
// Now that all loop invariants have been removed from the loop, promote any
// memory references to scalars that we can.
@@ -451,8 +478,7 @@ bool LoopInvariantCodeMotion::runOnLoop(
PredIteratorCache PIC;
// Promoting one set of accesses may make the pointers for another set
- // loop invariant, so run this in a loop (with the MaybePromotable set
- // decreasing in size over time).
+ // loop invariant, so run this in a loop.
bool Promoted = false;
bool LocalPromoted;
do {
@@ -460,8 +486,8 @@ bool LoopInvariantCodeMotion::runOnLoop(
for (const SmallSetVector<Value *, 8> &PointerMustAliases :
collectPromotionCandidates(MSSA, AA, L)) {
LocalPromoted |= promoteLoopAccessesToScalars(
- PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC,
- LI, DT, TLI, L, &MSSAU, &SafetyInfo, ORE);
+ PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
+ DT, TLI, L, MSSAU, &SafetyInfo, ORE, LicmAllowSpeculation);
}
Promoted |= LocalPromoted;
} while (LocalPromoted);
@@ -502,17 +528,17 @@ bool LoopInvariantCodeMotion::runOnLoop(
bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
DominatorTree *DT, BlockFrequencyInfo *BFI,
TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
- Loop *CurLoop, MemorySSAUpdater *MSSAU,
+ Loop *CurLoop, MemorySSAUpdater &MSSAU,
ICFLoopSafetyInfo *SafetyInfo,
SinkAndHoistLICMFlags &Flags,
OptimizationRemarkEmitter *ORE, Loop *OutermostLoop) {
// Verify inputs.
assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
- CurLoop != nullptr && MSSAU != nullptr && SafetyInfo != nullptr &&
+ CurLoop != nullptr && SafetyInfo != nullptr &&
"Unexpected input to sinkRegion.");
- // We want to visit children before parents. We will enque all the parents
+ // We want to visit children before parents. We will enqueue all the parents
// before their children in the worklist and process the worklist in reverse
// order.
SmallVector<DomTreeNode *, 16> Worklist = collectChildrenInLoop(N, CurLoop);
@@ -550,8 +576,7 @@ bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
if (!I.mayHaveSideEffects() &&
isNotUsedOrFreeInLoop(I, LoopNestMode ? OutermostLoop : CurLoop,
SafetyInfo, TTI, FreeInLoop, LoopNestMode) &&
- canSinkOrHoistInst(I, AA, DT, CurLoop, /*CurAST*/nullptr, MSSAU, true,
- &Flags, ORE)) {
+ canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, true, Flags, ORE)) {
if (sink(I, LI, DT, BFI, CurLoop, SafetyInfo, MSSAU, ORE)) {
if (!FreeInLoop) {
++II;
@@ -564,14 +589,14 @@ bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
}
}
if (VerifyMemorySSA)
- MSSAU->getMemorySSA()->verifyMemorySSA();
+ MSSAU.getMemorySSA()->verifyMemorySSA();
return Changed;
}
bool llvm::sinkRegionForLoopNest(
DomTreeNode *N, AAResults *AA, LoopInfo *LI, DominatorTree *DT,
BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
- Loop *CurLoop, MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo,
+ Loop *CurLoop, MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo,
SinkAndHoistLICMFlags &Flags, OptimizationRemarkEmitter *ORE) {
bool Changed = false;
@@ -600,7 +625,7 @@ private:
LoopInfo *LI;
DominatorTree *DT;
Loop *CurLoop;
- MemorySSAUpdater *MSSAU;
+ MemorySSAUpdater &MSSAU;
// A map of blocks in the loop to the block their instructions will be hoisted
// to.
@@ -612,7 +637,7 @@ private:
public:
ControlFlowHoister(LoopInfo *LI, DominatorTree *DT, Loop *CurLoop,
- MemorySSAUpdater *MSSAU)
+ MemorySSAUpdater &MSSAU)
: LI(LI), DT(DT), CurLoop(CurLoop), MSSAU(MSSAU) {}
void registerPossiblyHoistableBranch(BranchInst *BI) {
@@ -788,7 +813,7 @@ public:
if (HoistTarget == InitialPreheader) {
// Phis in the loop header now need to use the new preheader.
InitialPreheader->replaceSuccessorsPhiUsesWith(HoistCommonSucc);
- MSSAU->wireOldPredecessorsToNewImmediatePredecessor(
+ MSSAU.wireOldPredecessorsToNewImmediatePredecessor(
HoistTarget->getSingleSuccessor(), HoistCommonSucc, {HoistTarget});
// The new preheader dominates the loop header.
DomTreeNode *PreheaderNode = DT->getNode(HoistCommonSucc);
@@ -822,13 +847,14 @@ public:
bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
DominatorTree *DT, BlockFrequencyInfo *BFI,
TargetLibraryInfo *TLI, Loop *CurLoop,
- MemorySSAUpdater *MSSAU, ScalarEvolution *SE,
+ MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
ICFLoopSafetyInfo *SafetyInfo,
SinkAndHoistLICMFlags &Flags,
- OptimizationRemarkEmitter *ORE, bool LoopNestMode) {
+ OptimizationRemarkEmitter *ORE, bool LoopNestMode,
+ bool AllowSpeculation) {
// Verify inputs.
assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
- CurLoop != nullptr && MSSAU != nullptr && SafetyInfo != nullptr &&
+ CurLoop != nullptr && SafetyInfo != nullptr &&
"Unexpected input to hoistRegion.");
ControlFlowHoister CFH(LI, DT, CurLoop, MSSAU);
@@ -873,11 +899,10 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
// and we have accurately duplicated the control flow from the loop header
// to that block.
if (CurLoop->hasLoopInvariantOperands(&I) &&
- canSinkOrHoistInst(I, AA, DT, CurLoop, /*CurAST*/ nullptr, MSSAU,
- true, &Flags, ORE) &&
+ canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, true, Flags, ORE) &&
isSafeToExecuteUnconditionally(
I, DT, TLI, CurLoop, SafetyInfo, ORE,
- CurLoop->getLoopPreheader()->getTerminator())) {
+ CurLoop->getLoopPreheader()->getTerminator(), AllowSpeculation)) {
hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
MSSAU, SE, ORE);
HoistedInstructions.push_back(&I);
@@ -982,7 +1007,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
}
}
if (VerifyMemorySSA)
- MSSAU->getMemorySSA()->verifyMemorySSA();
+ MSSAU.getMemorySSA()->verifyMemorySSA();
// Now that we've finished hoisting make sure that LI and DT are still
// valid.
@@ -1083,30 +1108,19 @@ bool isHoistableAndSinkableInst(Instruction &I) {
isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
isa<InsertValueInst>(I) || isa<FreezeInst>(I));
}
-/// Return true if all of the alias sets within this AST are known not to
-/// contain a Mod, or if MSSA knows there are no MemoryDefs in the loop.
-bool isReadOnly(AliasSetTracker *CurAST, const MemorySSAUpdater *MSSAU,
- const Loop *L) {
- if (CurAST) {
- for (AliasSet &AS : *CurAST) {
- if (!AS.isForwardingAliasSet() && AS.isMod()) {
- return false;
- }
- }
- return true;
- } else { /*MSSAU*/
- for (auto *BB : L->getBlocks())
- if (MSSAU->getMemorySSA()->getBlockDefs(BB))
- return false;
- return true;
- }
+/// Return true if MSSA knows there are no MemoryDefs in the loop.
+bool isReadOnly(const MemorySSAUpdater &MSSAU, const Loop *L) {
+ for (auto *BB : L->getBlocks())
+ if (MSSAU.getMemorySSA()->getBlockDefs(BB))
+ return false;
+ return true;
}
/// Return true if I is the only Instruction with a MemoryAccess in L.
bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
- const MemorySSAUpdater *MSSAU) {
+ const MemorySSAUpdater &MSSAU) {
for (auto *BB : L->getBlocks())
- if (auto *Accs = MSSAU->getMemorySSA()->getBlockAccesses(BB)) {
+ if (auto *Accs = MSSAU.getMemorySSA()->getBlockAccesses(BB)) {
int NotAPhi = 0;
for (const auto &Acc : *Accs) {
if (isa<MemoryPhi>(&Acc))
@@ -1121,22 +1135,15 @@ bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
}
bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
- Loop *CurLoop, AliasSetTracker *CurAST,
- MemorySSAUpdater *MSSAU,
+ Loop *CurLoop, MemorySSAUpdater &MSSAU,
bool TargetExecutesOncePerLoop,
- SinkAndHoistLICMFlags *Flags,
+ SinkAndHoistLICMFlags &Flags,
OptimizationRemarkEmitter *ORE) {
- assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
- "Either AliasSetTracker or MemorySSA should be initialized.");
-
// If we don't understand the instruction, bail early.
if (!isHoistableAndSinkableInst(I))
return false;
- MemorySSA *MSSA = MSSAU ? MSSAU->getMemorySSA() : nullptr;
- if (MSSA)
- assert(Flags != nullptr && "Flags cannot be null.");
-
+ MemorySSA *MSSA = MSSAU.getMemorySSA();
// Loads have extra constraints we have to verify before we can hoist them.
if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
if (!LI->isUnordered())
@@ -1156,13 +1163,8 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
if (isLoadInvariantInLoop(LI, DT, CurLoop))
return true;
- bool Invalidated;
- if (CurAST)
- Invalidated = pointerInvalidatedByLoop(MemoryLocation::get(LI), CurAST,
- CurLoop, AA);
- else
- Invalidated = pointerInvalidatedByLoopWithMSSA(
- MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop, I, *Flags);
+ bool Invalidated = pointerInvalidatedByLoop(
+ MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop, I, Flags);
// Check loop-invariant address because this may also be a sinkable load
// whose address is not necessarily loop-invariant.
if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand()))
@@ -1210,24 +1212,17 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
if (AAResults::onlyAccessesArgPointees(Behavior)) {
// TODO: expand to writeable arguments
for (Value *Op : CI->args())
- if (Op->getType()->isPointerTy()) {
- bool Invalidated;
- if (CurAST)
- Invalidated = pointerInvalidatedByLoop(
- MemoryLocation::getBeforeOrAfter(Op), CurAST, CurLoop, AA);
- else
- Invalidated = pointerInvalidatedByLoopWithMSSA(
+ if (Op->getType()->isPointerTy() &&
+ pointerInvalidatedByLoop(
MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop, I,
- *Flags);
- if (Invalidated)
- return false;
- }
+ Flags))
+ return false;
return true;
}
// If this call only reads from memory and there are no writes to memory
// in the loop, we can hoist or sink the call as appropriate.
- if (isReadOnly(CurAST, MSSAU, CurLoop))
+ if (isReadOnly(MSSAU, CurLoop))
return true;
}
@@ -1238,21 +1233,7 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
} else if (auto *FI = dyn_cast<FenceInst>(&I)) {
// Fences alias (most) everything to provide ordering. For the moment,
// just give up if there are any other memory operations in the loop.
- if (CurAST) {
- auto Begin = CurAST->begin();
- assert(Begin != CurAST->end() && "must contain FI");
- if (std::next(Begin) != CurAST->end())
- // constant memory for instance, TODO: handle better
- return false;
- auto *UniqueI = Begin->getUniqueInstruction();
- if (!UniqueI)
- // other memory op, give up
- return false;
- (void)FI; // suppress unused variable warning
- assert(UniqueI == FI && "AS must contain FI");
- return true;
- } else // MSSAU
- return isOnlyMemoryAccess(FI, CurLoop, MSSAU);
+ return isOnlyMemoryAccess(FI, CurLoop, MSSAU);
} else if (auto *SI = dyn_cast<StoreInst>(&I)) {
if (!SI->isUnordered())
return false; // Don't sink/hoist volatile or ordered atomic store!
@@ -1262,68 +1243,54 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
// load store promotion instead. TODO: We can extend this to cases where
// there is exactly one write to the location and that write dominates an
// arbitrary number of reads in the loop.
- if (CurAST) {
- auto &AS = CurAST->getAliasSetFor(MemoryLocation::get(SI));
-
- if (AS.isRef() || !AS.isMustAlias())
- // Quick exit test, handled by the full path below as well.
- return false;
- auto *UniqueI = AS.getUniqueInstruction();
- if (!UniqueI)
- // other memory op, give up
- return false;
- assert(UniqueI == SI && "AS must contain SI");
+ if (isOnlyMemoryAccess(SI, CurLoop, MSSAU))
return true;
- } else { // MSSAU
- if (isOnlyMemoryAccess(SI, CurLoop, MSSAU))
- return true;
- // If there are more accesses than the Promotion cap or no "quota" to
- // check clobber, then give up as we're not walking a list that long.
- if (Flags->tooManyMemoryAccesses() || Flags->tooManyClobberingCalls())
- return false;
- // If there are interfering Uses (i.e. their defining access is in the
- // loop), or ordered loads (stored as Defs!), don't move this store.
- // Could do better here, but this is conservatively correct.
- // TODO: Cache set of Uses on the first walk in runOnLoop, update when
- // moving accesses. Can also extend to dominating uses.
- auto *SIMD = MSSA->getMemoryAccess(SI);
- for (auto *BB : CurLoop->getBlocks())
- if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
- for (const auto &MA : *Accesses)
- if (const auto *MU = dyn_cast<MemoryUse>(&MA)) {
- auto *MD = MU->getDefiningAccess();
- if (!MSSA->isLiveOnEntryDef(MD) &&
- CurLoop->contains(MD->getBlock()))
- return false;
- // Disable hoisting past potentially interfering loads. Optimized
- // Uses may point to an access outside the loop, as getClobbering
- // checks the previous iteration when walking the backedge.
- // FIXME: More precise: no Uses that alias SI.
- if (!Flags->getIsSink() && !MSSA->dominates(SIMD, MU))
- return false;
- } else if (const auto *MD = dyn_cast<MemoryDef>(&MA)) {
- if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) {
- (void)LI; // Silence warning.
- assert(!LI->isUnordered() && "Expected unordered load");
+ // If there are more accesses than the Promotion cap or no "quota" to
+ // check clobber, then give up as we're not walking a list that long.
+ if (Flags.tooManyMemoryAccesses() || Flags.tooManyClobberingCalls())
+ return false;
+ // If there are interfering Uses (i.e. their defining access is in the
+ // loop), or ordered loads (stored as Defs!), don't move this store.
+ // Could do better here, but this is conservatively correct.
+ // TODO: Cache set of Uses on the first walk in runOnLoop, update when
+ // moving accesses. Can also extend to dominating uses.
+ auto *SIMD = MSSA->getMemoryAccess(SI);
+ for (auto *BB : CurLoop->getBlocks())
+ if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
+ for (const auto &MA : *Accesses)
+ if (const auto *MU = dyn_cast<MemoryUse>(&MA)) {
+ auto *MD = MU->getDefiningAccess();
+ if (!MSSA->isLiveOnEntryDef(MD) &&
+ CurLoop->contains(MD->getBlock()))
+ return false;
+ // Disable hoisting past potentially interfering loads. Optimized
+ // Uses may point to an access outside the loop, as getClobbering
+ // checks the previous iteration when walking the backedge.
+ // FIXME: More precise: no Uses that alias SI.
+ if (!Flags.getIsSink() && !MSSA->dominates(SIMD, MU))
+ return false;
+ } else if (const auto *MD = dyn_cast<MemoryDef>(&MA)) {
+ if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) {
+ (void)LI; // Silence warning.
+ assert(!LI->isUnordered() && "Expected unordered load");
+ return false;
+ }
+ // Any call, while it may not be clobbering SI, it may be a use.
+ if (auto *CI = dyn_cast<CallInst>(MD->getMemoryInst())) {
+ // Check if the call may read from the memory location written
+ // to by SI. Check CI's attributes and arguments; the number of
+ // such checks performed is limited above by NoOfMemAccTooLarge.
+ ModRefInfo MRI = AA->getModRefInfo(CI, MemoryLocation::get(SI));
+ if (isModOrRefSet(MRI))
return false;
- }
- // Any call, while it may not be clobbering SI, it may be a use.
- if (auto *CI = dyn_cast<CallInst>(MD->getMemoryInst())) {
- // Check if the call may read from the memory location written
- // to by SI. Check CI's attributes and arguments; the number of
- // such checks performed is limited above by NoOfMemAccTooLarge.
- ModRefInfo MRI = AA->getModRefInfo(CI, MemoryLocation::get(SI));
- if (isModOrRefSet(MRI))
- return false;
- }
}
- }
- auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI);
- Flags->incrementClobberingCalls();
- // If there are no clobbering Defs in the loop, store is safe to hoist.
- return MSSA->isLiveOnEntryDef(Source) ||
- !CurLoop->contains(Source->getBlock());
- }
+ }
+ }
+ auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI);
+ Flags.incrementClobberingCalls();
+ // If there are no clobbering Defs in the loop, store is safe to hoist.
+ return MSSA->isLiveOnEntryDef(Source) ||
+ !CurLoop->contains(Source->getBlock());
}
assert(!I.mayReadOrWriteMemory() && "unhandled aliasing");
@@ -1421,7 +1388,7 @@ static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
static Instruction *cloneInstructionInExitBlock(
Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
- const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU) {
+ const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU) {
Instruction *New;
if (auto *CI = dyn_cast<CallInst>(&I)) {
const auto &BlockColors = SafetyInfo->getBlockColors();
@@ -1457,16 +1424,16 @@ static Instruction *cloneInstructionInExitBlock(
if (!I.getName().empty())
New->setName(I.getName() + ".le");
- if (MSSAU && MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
+ if (MSSAU.getMemorySSA()->getMemoryAccess(&I)) {
// Create a new MemoryAccess and let MemorySSA set its defining access.
- MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
+ MemoryAccess *NewMemAcc = MSSAU.createMemoryAccessInBB(
New, nullptr, New->getParent(), MemorySSA::Beginning);
if (NewMemAcc) {
if (auto *MemDef = dyn_cast<MemoryDef>(NewMemAcc))
- MSSAU->insertDef(MemDef, /*RenameUses=*/true);
+ MSSAU.insertDef(MemDef, /*RenameUses=*/true);
else {
auto *MemUse = cast<MemoryUse>(NewMemAcc);
- MSSAU->insertUse(MemUse, /*RenameUses=*/true);
+ MSSAU.insertUse(MemUse, /*RenameUses=*/true);
}
}
}
@@ -1492,25 +1459,22 @@ static Instruction *cloneInstructionInExitBlock(
}
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
- MemorySSAUpdater *MSSAU) {
- if (MSSAU)
- MSSAU->removeMemoryAccess(&I);
+ MemorySSAUpdater &MSSAU) {
+ MSSAU.removeMemoryAccess(&I);
SafetyInfo.removeInstruction(&I);
I.eraseFromParent();
}
static void moveInstructionBefore(Instruction &I, Instruction &Dest,
ICFLoopSafetyInfo &SafetyInfo,
- MemorySSAUpdater *MSSAU,
+ MemorySSAUpdater &MSSAU,
ScalarEvolution *SE) {
SafetyInfo.removeInstruction(&I);
SafetyInfo.insertInstructionTo(&I, Dest.getParent());
I.moveBefore(&Dest);
- if (MSSAU)
- if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
- MSSAU->getMemorySSA()->getMemoryAccess(&I)))
- MSSAU->moveToPlace(OldMemAcc, Dest.getParent(),
- MemorySSA::BeforeTerminator);
+ if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
+ MSSAU.getMemorySSA()->getMemoryAccess(&I)))
+ MSSAU.moveToPlace(OldMemAcc, Dest.getParent(), MemorySSA::BeforeTerminator);
if (SE)
SE->forgetValue(&I);
}
@@ -1519,7 +1483,7 @@ static Instruction *sinkThroughTriviallyReplaceablePHI(
PHINode *TPN, Instruction *I, LoopInfo *LI,
SmallDenseMap<BasicBlock *, Instruction *, 32> &SunkCopies,
const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop,
- MemorySSAUpdater *MSSAU) {
+ MemorySSAUpdater &MSSAU) {
assert(isTriviallyReplaceablePHI(*TPN, *I) &&
"Expect only trivially replaceable PHI");
BasicBlock *ExitBlock = TPN->getParent();
@@ -1625,7 +1589,7 @@ static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT,
///
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
BlockFrequencyInfo *BFI, const Loop *CurLoop,
- ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU,
+ ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU,
OptimizationRemarkEmitter *ORE) {
bool Changed = false;
LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
@@ -1642,7 +1606,7 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
continue;
if (!DT->isReachableFromEntry(User->getParent())) {
- U = UndefValue::get(I.getType());
+ U = PoisonValue::get(I.getType());
Changed = true;
continue;
}
@@ -1655,7 +1619,7 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
// unreachable.
BasicBlock *BB = PN->getIncomingBlock(U);
if (!DT->isReachableFromEntry(BB)) {
- U = UndefValue::get(I.getType());
+ U = PoisonValue::get(I.getType());
Changed = true;
continue;
}
@@ -1669,7 +1633,7 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
// Split predecessors of the PHI so that we can make users trivially
// replaceable.
- splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo, MSSAU);
+ splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo, &MSSAU);
// Should rebuild the iterators, as they may be invalidated by
// splitPredecessorsOfLoopExit().
@@ -1720,7 +1684,7 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
Instruction *New = sinkThroughTriviallyReplaceablePHI(
PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU);
PN->replaceAllUsesWith(New);
- eraseInstruction(*PN, *SafetyInfo, nullptr);
+ eraseInstruction(*PN, *SafetyInfo, MSSAU);
Changed = true;
}
return Changed;
@@ -1731,7 +1695,7 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
///
static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
- MemorySSAUpdater *MSSAU, ScalarEvolution *SE,
+ MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
OptimizationRemarkEmitter *ORE) {
LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getNameOrAsOperand() << ": "
<< I << "\n");
@@ -1774,14 +1738,12 @@ static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
/// Only sink or hoist an instruction if it is not a trapping instruction,
/// or if the instruction is known not to trap when moved to the preheader.
/// or if it is a trapping instruction and is guaranteed to execute.
-static bool isSafeToExecuteUnconditionally(Instruction &Inst,
- const DominatorTree *DT,
- const TargetLibraryInfo *TLI,
- const Loop *CurLoop,
- const LoopSafetyInfo *SafetyInfo,
- OptimizationRemarkEmitter *ORE,
- const Instruction *CtxI) {
- if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT, TLI))
+static bool isSafeToExecuteUnconditionally(
+ Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI,
+ const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
+ OptimizationRemarkEmitter *ORE, const Instruction *CtxI,
+ bool AllowSpeculation) {
+ if (AllowSpeculation && isSafeToSpeculativelyExecute(&Inst, CtxI, DT, TLI))
return true;
bool GuaranteedToExecute =
@@ -1809,7 +1771,7 @@ class LoopPromoter : public LoadAndStorePromoter {
SmallVectorImpl<Instruction *> &LoopInsertPts;
SmallVectorImpl<MemoryAccess *> &MSSAInsertPts;
PredIteratorCache &PredCache;
- MemorySSAUpdater *MSSAU;
+ MemorySSAUpdater &MSSAU;
LoopInfo &LI;
DebugLoc DL;
Align Alignment;
@@ -1841,7 +1803,7 @@ public:
SmallVectorImpl<BasicBlock *> &LEB,
SmallVectorImpl<Instruction *> &LIP,
SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC,
- MemorySSAUpdater *MSSAU, LoopInfo &li, DebugLoc dl,
+ MemorySSAUpdater &MSSAU, LoopInfo &li, DebugLoc dl,
Align Alignment, bool UnorderedAtomic, const AAMDNodes &AATags,
ICFLoopSafetyInfo &SafetyInfo, bool CanInsertStoresInExitBlocks)
: LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
@@ -1883,14 +1845,14 @@ public:
MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i];
MemoryAccess *NewMemAcc;
if (!MSSAInsertPoint) {
- NewMemAcc = MSSAU->createMemoryAccessInBB(
+ NewMemAcc = MSSAU.createMemoryAccessInBB(
NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning);
} else {
NewMemAcc =
- MSSAU->createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint);
+ MSSAU.createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint);
}
MSSAInsertPts[i] = NewMemAcc;
- MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
+ MSSAU.insertDef(cast<MemoryDef>(NewMemAcc), true);
// FIXME: true for safety, false may still be correct.
}
}
@@ -1902,7 +1864,7 @@ public:
void instructionDeleted(Instruction *I) const override {
SafetyInfo.removeInstruction(I);
- MSSAU->removeMemoryAccess(I);
+ MSSAU.removeMemoryAccess(I);
}
bool shouldDelete(Instruction *I) const override {
@@ -1948,8 +1910,8 @@ bool llvm::promoteLoopAccessesToScalars(
SmallVectorImpl<Instruction *> &InsertPts,
SmallVectorImpl<MemoryAccess *> &MSSAInsertPts, PredIteratorCache &PIC,
LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
- Loop *CurLoop, MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo,
- OptimizationRemarkEmitter *ORE) {
+ Loop *CurLoop, MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo,
+ OptimizationRemarkEmitter *ORE, bool AllowSpeculation) {
// Verify inputs.
assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
SafetyInfo != nullptr &&
@@ -1997,6 +1959,7 @@ bool llvm::promoteLoopAccessesToScalars(
bool DereferenceableInPH = false;
bool SafeToInsertStore = false;
+ bool StoreIsGuanteedToExecute = false;
bool FoundLoadToPromote = false;
SmallVector<Instruction *, 64> LoopUses;
@@ -2031,9 +1994,9 @@ bool llvm::promoteLoopAccessesToScalars(
// different sizes. While we are at it, collect alignment and AA info.
Type *AccessTy = nullptr;
for (Value *ASIV : PointerMustAliases) {
- for (User *U : ASIV->users()) {
+ for (Use &U : ASIV->uses()) {
// Ignore instructions that are outside the loop.
- Instruction *UI = dyn_cast<Instruction>(U);
+ Instruction *UI = dyn_cast<Instruction>(U.getUser());
if (!UI || !CurLoop->contains(UI))
continue;
@@ -2054,16 +2017,16 @@ bool llvm::promoteLoopAccessesToScalars(
// to execute does as well. Thus we can increase our guaranteed
// alignment as well.
if (!DereferenceableInPH || (InstAlignment > Alignment))
- if (isSafeToExecuteUnconditionally(*Load, DT, TLI, CurLoop,
- SafetyInfo, ORE,
- Preheader->getTerminator())) {
+ if (isSafeToExecuteUnconditionally(
+ *Load, DT, TLI, CurLoop, SafetyInfo, ORE,
+ Preheader->getTerminator(), AllowSpeculation)) {
DereferenceableInPH = true;
Alignment = std::max(Alignment, InstAlignment);
}
} else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) {
// Stores *of* the pointer are not interesting, only stores *to* the
// pointer.
- if (UI->getOperand(1) != ASIV)
+ if (U.getOperandNo() != StoreInst::getPointerOperandIndex())
continue;
if (!Store->isUnordered())
return false;
@@ -2077,10 +2040,12 @@ bool llvm::promoteLoopAccessesToScalars(
// alignment than any other guaranteed stores, in which case we can
// raise the alignment on the promoted store.
Align InstAlignment = Store->getAlign();
-
+ bool GuaranteedToExecute =
+ SafetyInfo->isGuaranteedToExecute(*UI, DT, CurLoop);
+ StoreIsGuanteedToExecute |= GuaranteedToExecute;
if (!DereferenceableInPH || !SafeToInsertStore ||
(InstAlignment > Alignment)) {
- if (SafetyInfo->isGuaranteedToExecute(*UI, DT, CurLoop)) {
+ if (GuaranteedToExecute) {
DereferenceableInPH = true;
SafeToInsertStore = true;
Alignment = std::max(Alignment, InstAlignment);
@@ -2194,32 +2159,37 @@ bool llvm::promoteLoopAccessesToScalars(
// Set up the preheader to have a definition of the value. It is the live-out
// value from the preheader that uses in the loop will use.
- LoadInst *PreheaderLoad = new LoadInst(
- AccessTy, SomePtr, SomePtr->getName() + ".promoted",
- Preheader->getTerminator());
- if (SawUnorderedAtomic)
- PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
- PreheaderLoad->setAlignment(Alignment);
- PreheaderLoad->setDebugLoc(DebugLoc());
- if (AATags)
- PreheaderLoad->setAAMetadata(AATags);
- SSA.AddAvailableValue(Preheader, PreheaderLoad);
-
- MemoryAccess *PreheaderLoadMemoryAccess = MSSAU->createMemoryAccessInBB(
- PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End);
- MemoryUse *NewMemUse = cast<MemoryUse>(PreheaderLoadMemoryAccess);
- MSSAU->insertUse(NewMemUse, /*RenameUses=*/true);
+ LoadInst *PreheaderLoad = nullptr;
+ if (FoundLoadToPromote || !StoreIsGuanteedToExecute) {
+ PreheaderLoad =
+ new LoadInst(AccessTy, SomePtr, SomePtr->getName() + ".promoted",
+ Preheader->getTerminator());
+ if (SawUnorderedAtomic)
+ PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
+ PreheaderLoad->setAlignment(Alignment);
+ PreheaderLoad->setDebugLoc(DebugLoc());
+ if (AATags)
+ PreheaderLoad->setAAMetadata(AATags);
+
+ MemoryAccess *PreheaderLoadMemoryAccess = MSSAU.createMemoryAccessInBB(
+ PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End);
+ MemoryUse *NewMemUse = cast<MemoryUse>(PreheaderLoadMemoryAccess);
+ MSSAU.insertUse(NewMemUse, /*RenameUses=*/true);
+ SSA.AddAvailableValue(Preheader, PreheaderLoad);
+ } else {
+ SSA.AddAvailableValue(Preheader, PoisonValue::get(AccessTy));
+ }
if (VerifyMemorySSA)
- MSSAU->getMemorySSA()->verifyMemorySSA();
+ MSSAU.getMemorySSA()->verifyMemorySSA();
// Rewrite all the loads in the loop and remember all the definitions from
// stores in the loop.
Promoter.run(LoopUses);
if (VerifyMemorySSA)
- MSSAU->getMemorySSA()->verifyMemorySSA();
+ MSSAU.getMemorySSA()->verifyMemorySSA();
// If the SSAUpdater didn't use the load in the preheader, just zap it now.
- if (PreheaderLoad->use_empty())
+ if (PreheaderLoad && PreheaderLoad->use_empty())
eraseInstruction(*PreheaderLoad, *SafetyInfo, MSSAU);
return true;
@@ -2246,8 +2216,7 @@ collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) {
return false;
};
- // Populate AST with potentially promotable accesses and remove them from
- // MaybePromotable, so they will not be checked again on the next iteration.
+ // Populate AST with potentially promotable accesses.
SmallPtrSet<Value *, 16> AttemptingPromotion;
foreachMemoryAccess(MSSA, L, [&](Instruction *I) {
if (IsPotentiallyPromotable(I)) {
@@ -2286,15 +2255,9 @@ collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) {
return Result;
}
-static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
- AliasSetTracker *CurAST, Loop *CurLoop,
- AAResults *AA) {
- return CurAST->getAliasSetFor(MemLoc).isMod();
-}
-
-bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
- Loop *CurLoop, Instruction &I,
- SinkAndHoistLICMFlags &Flags) {
+static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU,
+ Loop *CurLoop, Instruction &I,
+ SinkAndHoistLICMFlags &Flags) {
// For hoisting, use the walker to determine safety
if (!Flags.getIsSink()) {
MemoryAccess *Source;
@@ -2329,17 +2292,16 @@ bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
if (Flags.tooManyMemoryAccesses())
return true;
for (auto *BB : CurLoop->getBlocks())
- if (pointerInvalidatedByBlockWithMSSA(*BB, *MSSA, *MU))
+ if (pointerInvalidatedByBlock(*BB, *MSSA, *MU))
return true;
// When sinking, the source block may not be part of the loop so check it.
if (!CurLoop->contains(&I))
- return pointerInvalidatedByBlockWithMSSA(*I.getParent(), *MSSA, *MU);
+ return pointerInvalidatedByBlock(*I.getParent(), *MSSA, *MU);
return false;
}
-bool pointerInvalidatedByBlockWithMSSA(BasicBlock &BB, MemorySSA &MSSA,
- MemoryUse &MU) {
+bool pointerInvalidatedByBlock(BasicBlock &BB, MemorySSA &MSSA, MemoryUse &MU) {
if (const auto *Accesses = MSSA.getBlockDefs(&BB))
for (const auto &MA : *Accesses)
if (const auto *MD = dyn_cast<MemoryDef>(&MA))
diff --git a/llvm/lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp b/llvm/lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp
index 1c3ff1a61b7e..c063c0d3c88a 100644
--- a/llvm/lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp
@@ -8,6 +8,7 @@
#include "llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
using namespace llvm;
#define DEBUG_TYPE "loop-accesses"
diff --git a/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp b/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp
index d438d56e38ca..2b9800f11912 100644
--- a/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp
@@ -8,20 +8,15 @@
#include "llvm/Transforms/Scalar/LoopBoundSplit.h"
#include "llvm/ADT/Sequence.h"
-#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/LoopIterator.h"
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/MemorySSA.h"
-#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
-#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#define DEBUG_TYPE "loop-bound-split"
@@ -33,26 +28,23 @@ using namespace PatternMatch;
namespace {
struct ConditionInfo {
/// Branch instruction with this condition
- BranchInst *BI;
+ BranchInst *BI = nullptr;
/// ICmp instruction with this condition
- ICmpInst *ICmp;
+ ICmpInst *ICmp = nullptr;
/// Preciate info
- ICmpInst::Predicate Pred;
+ ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
/// AddRec llvm value
- Value *AddRecValue;
+ Value *AddRecValue = nullptr;
/// Non PHI AddRec llvm value
Value *NonPHIAddRecValue;
/// Bound llvm value
- Value *BoundValue;
+ Value *BoundValue = nullptr;
/// AddRec SCEV
- const SCEVAddRecExpr *AddRecSCEV;
+ const SCEVAddRecExpr *AddRecSCEV = nullptr;
/// Bound SCEV
- const SCEV *BoundSCEV;
+ const SCEV *BoundSCEV = nullptr;
- ConditionInfo()
- : BI(nullptr), ICmp(nullptr), Pred(ICmpInst::BAD_ICMP_PREDICATE),
- AddRecValue(nullptr), BoundValue(nullptr), AddRecSCEV(nullptr),
- BoundSCEV(nullptr) {}
+ ConditionInfo() = default;
};
} // namespace
diff --git a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
index 57e36e5b9b90..9590fbbb1994 100644
--- a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
@@ -22,7 +22,6 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/IR/CFG.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
@@ -30,9 +29,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
-#include "llvm/Transforms/Utils/ValueMapper.h"
#define DEBUG_TYPE "loop-data-prefetch"
@@ -236,15 +233,14 @@ struct Prefetch {
/// The address formula for this prefetch as returned by ScalarEvolution.
const SCEVAddRecExpr *LSCEVAddRec;
/// The point of insertion for the prefetch instruction.
- Instruction *InsertPt;
+ Instruction *InsertPt = nullptr;
/// True if targeting a write memory access.
- bool Writes;
+ bool Writes = false;
/// The (first seen) prefetched instruction.
- Instruction *MemI;
+ Instruction *MemI = nullptr;
/// Constructor to create a new Prefetch for \p I.
- Prefetch(const SCEVAddRecExpr *L, Instruction *I)
- : LSCEVAddRec(L), InsertPt(nullptr), Writes(false), MemI(nullptr) {
+ Prefetch(const SCEVAddRecExpr *L, Instruction *I) : LSCEVAddRec(L) {
addInstruction(I);
};
@@ -303,7 +299,11 @@ bool LoopDataPrefetch::runOnLoop(Loop *L) {
}
Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
}
- unsigned LoopSize = Metrics.NumInsts;
+
+ if (!Metrics.NumInsts.isValid())
+ return MadeChange;
+
+ unsigned LoopSize = *Metrics.NumInsts.getValue();
if (!LoopSize)
LoopSize = 1;
diff --git a/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
index 361d6c0d9381..93f3cd704196 100644
--- a/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -17,12 +17,12 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/PatternMatch.h"
@@ -192,13 +192,13 @@ getValueOnFirstIteration(Value *V, DenseMap<Value *, Value *> &FirstIterValue,
getValueOnFirstIteration(BO->getOperand(0), FirstIterValue, SQ);
Value *RHS =
getValueOnFirstIteration(BO->getOperand(1), FirstIterValue, SQ);
- FirstIterV = SimplifyBinOp(BO->getOpcode(), LHS, RHS, SQ);
+ FirstIterV = simplifyBinOp(BO->getOpcode(), LHS, RHS, SQ);
} else if (auto *Cmp = dyn_cast<ICmpInst>(V)) {
Value *LHS =
getValueOnFirstIteration(Cmp->getOperand(0), FirstIterValue, SQ);
Value *RHS =
getValueOnFirstIteration(Cmp->getOperand(1), FirstIterValue, SQ);
- FirstIterV = SimplifyICmpInst(Cmp->getPredicate(), LHS, RHS, SQ);
+ FirstIterV = simplifyICmpInst(Cmp->getPredicate(), LHS, RHS, SQ);
} else if (auto *Select = dyn_cast<SelectInst>(V)) {
Value *Cond =
getValueOnFirstIteration(Select->getCondition(), FirstIterValue, SQ);
@@ -458,13 +458,13 @@ static LoopDeletionResult deleteLoopIfDead(Loop *L, DominatorTree &DT,
if (ExitBlock && isLoopNeverExecuted(L)) {
LLVM_DEBUG(dbgs() << "Loop is proven to never execute, delete it!");
// We need to forget the loop before setting the incoming values of the exit
- // phis to undef, so we properly invalidate the SCEV expressions for those
+ // phis to poison, so we properly invalidate the SCEV expressions for those
// phis.
SE.forgetLoop(L);
- // Set incoming value to undef for phi nodes in the exit block.
+ // Set incoming value to poison for phi nodes in the exit block.
for (PHINode &P : ExitBlock->phis()) {
std::fill(P.incoming_values().begin(), P.incoming_values().end(),
- UndefValue::get(P.getType()));
+ PoisonValue::get(P.getType()));
}
ORE.emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "NeverExecutes", L->getStartLoc(),
diff --git a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
index 0f4c767c1e4c..03a10cb36bb6 100644
--- a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -47,7 +47,6 @@
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
@@ -231,7 +230,7 @@ public:
// having to update as many def-use and use-def chains.
for (auto *Inst : reverse(Unused)) {
if (!Inst->use_empty())
- Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
+ Inst->replaceAllUsesWith(PoisonValue::get(Inst->getType()));
Inst->eraseFromParent();
}
}
@@ -601,7 +600,7 @@ private:
{LLVMLoopDistributeFollowupAll,
Part->hasDepCycle() ? LLVMLoopDistributeFollowupSequential
: LLVMLoopDistributeFollowupCoincident});
- if (PartitionID.hasValue()) {
+ if (PartitionID) {
Loop *NewLoop = Part->getDistributedLoop();
NewLoop->setLoopID(PartitionID.getValue());
}
@@ -770,19 +769,19 @@ public:
// Don't distribute the loop if we need too many SCEV run-time checks, or
// any if it's illegal.
- const SCEVUnionPredicate &Pred = LAI->getPSE().getUnionPredicate();
+ const SCEVPredicate &Pred = LAI->getPSE().getPredicate();
if (LAI->hasConvergentOp() && !Pred.isAlwaysTrue()) {
return fail("RuntimeCheckWithConvergent",
"may not insert runtime check with convergent operation");
}
- if (Pred.getComplexity() > (IsForced.getValueOr(false)
+ if (Pred.getComplexity() > (IsForced.value_or(false)
? PragmaDistributeSCEVCheckThreshold
: DistributeSCEVCheckThreshold))
return fail("TooManySCEVRuntimeChecks",
"too many SCEV run-time checks needed.\n");
- if (!IsForced.getValueOr(false) && hasDisableAllTransformsHint(L))
+ if (!IsForced.value_or(false) && hasDisableAllTransformsHint(L))
return fail("HeuristicDisabled", "distribution heuristic disabled");
LLVM_DEBUG(dbgs() << "\nDistributing loop: " << *L << "\n");
@@ -859,7 +858,7 @@ public:
/// Provide diagnostics then \return with false.
bool fail(StringRef RemarkName, StringRef Message) {
LLVMContext &Ctx = F->getContext();
- bool Forced = isForced().getValueOr(false);
+ bool Forced = isForced().value_or(false);
LLVM_DEBUG(dbgs() << "Skipping; " << Message << "\n");
@@ -991,7 +990,7 @@ static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT,
// If distribution was forced for the specific loop to be
// enabled/disabled, follow that. Otherwise use the global flag.
- if (LDL.isForced().getValueOr(EnableLoopDistribute))
+ if (LDL.isForced().value_or(EnableLoopDistribute))
Changed |= LDL.processLoop(GetLAA);
}
diff --git a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
index c46db4e63bfe..f36193fc468e 100644
--- a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
@@ -54,6 +54,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopNestAnalysis.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
@@ -64,12 +65,12 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
-#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
@@ -210,8 +211,9 @@ struct FlattenInfo {
if (!MatchedItCount)
return false;
- // Look through extends if the IV has been widened.
- if (Widened &&
+ // Look through extends if the IV has been widened. Don't look through
+ // extends if we already looked through a trunc.
+ if (Widened && IsAdd &&
(isa<SExtInst>(MatchedItCount) || isa<ZExtInst>(MatchedItCount))) {
assert(MatchedItCount->getType() == InnerInductionPHI->getType() &&
"Unexpected type mismatch in types after widening");
@@ -410,7 +412,7 @@ static bool findLoopComponents(
// pre-header and one from the latch. The incoming latch value is the
// increment variable.
Increment =
- dyn_cast<BinaryOperator>(InductionPHI->getIncomingValueForBlock(Latch));
+ cast<BinaryOperator>(InductionPHI->getIncomingValueForBlock(Latch));
if (Increment->hasNUsesOrMore(3)) {
LLVM_DEBUG(dbgs() << "Could not find valid increment\n");
return false;
@@ -921,7 +923,7 @@ PreservedAnalyses LoopFlattenPass::run(LoopNest &LN, LoopAnalysisManager &LAM,
// this pass will simplify all loops that contain inner loops,
// regardless of whether anything ends up being flattened.
Changed |= Flatten(LN, &AR.DT, &AR.LI, &AR.SE, &AR.AC, &AR.TTI, &U,
- MSSAU.hasValue() ? MSSAU.getPointer() : nullptr);
+ MSSAU ? MSSAU.getPointer() : nullptr);
if (!Changed)
return PreservedAnalyses::all();
@@ -987,7 +989,7 @@ bool LoopFlattenLegacyPass::runOnFunction(Function &F) {
for (Loop *L : *LI) {
auto LN = LoopNest::getLoopNest(*L, *SE);
Changed |= Flatten(*LN, DT, LI, SE, AC, TTI, nullptr,
- MSSAU.hasValue() ? MSSAU.getPointer() : nullptr);
+ MSSAU ? MSSAU.getPointer() : nullptr);
}
return Changed;
}
diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
index bf4d275e04ba..d94b767c7b63 100644
--- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
@@ -117,7 +117,7 @@ static cl::opt<FusionDependenceAnalysisChoice> FusionDependenceAnalysis(
"Use the dependence analysis interface"),
clEnumValN(FUSION_DEPENDENCE_ANALYSIS_ALL, "all",
"Use all available analyses")),
- cl::Hidden, cl::init(FUSION_DEPENDENCE_ANALYSIS_ALL), cl::ZeroOrMore);
+ cl::Hidden, cl::init(FUSION_DEPENDENCE_ANALYSIS_ALL));
static cl::opt<unsigned> FusionPeelMaxCount(
"loop-fusion-peel-max-count", cl::init(0), cl::Hidden,
@@ -128,7 +128,7 @@ static cl::opt<unsigned> FusionPeelMaxCount(
static cl::opt<bool>
VerboseFusionDebugging("loop-fusion-verbose-debug",
cl::desc("Enable verbose debugging for Loop Fusion"),
- cl::Hidden, cl::init(false), cl::ZeroOrMore);
+ cl::Hidden, cl::init(false));
#endif
namespace {
@@ -178,12 +178,12 @@ struct FusionCandidate {
/// FusionCandidateCompare function, required by FusionCandidateSet to
/// determine where the FusionCandidate should be inserted into the set. These
/// are used to establish ordering of the FusionCandidates based on dominance.
- const DominatorTree *DT;
+ DominatorTree &DT;
const PostDominatorTree *PDT;
OptimizationRemarkEmitter &ORE;
- FusionCandidate(Loop *L, const DominatorTree *DT,
+ FusionCandidate(Loop *L, DominatorTree &DT,
const PostDominatorTree *PDT, OptimizationRemarkEmitter &ORE,
TTI::PeelingPreferences PP)
: Preheader(L->getLoopPreheader()), Header(L->getHeader()),
@@ -192,7 +192,6 @@ struct FusionCandidate {
GuardBranch(L->getLoopGuardBranch()), PP(PP), AbleToPeel(canPeel(L)),
Peeled(false), DT(DT), PDT(PDT), ORE(ORE) {
- assert(DT && "Expected non-null DT!");
// Walk over all blocks in the loop and check for conditions that may
// prevent fusion. For each block, walk over all instructions and collect
// the memory reads and writes If any instructions that prevent fusion are
@@ -391,7 +390,7 @@ struct FusionCandidateCompare {
/// IF RHS dominates LHS and LHS post-dominates RHS, return false;
bool operator()(const FusionCandidate &LHS,
const FusionCandidate &RHS) const {
- const DominatorTree *DT = LHS.DT;
+ const DominatorTree *DT = &(LHS.DT);
BasicBlock *LHSEntryBlock = LHS.getEntryBlock();
BasicBlock *RHSEntryBlock = RHS.getEntryBlock();
@@ -646,7 +645,7 @@ private:
for (Loop *L : LV) {
TTI::PeelingPreferences PP =
gatherPeelingPreferences(L, SE, TTI, None, None);
- FusionCandidate CurrCand(L, &DT, &PDT, ORE, PP);
+ FusionCandidate CurrCand(L, DT, &PDT, ORE, PP);
if (!CurrCand.isEligibleForFusion(SE))
continue;
@@ -991,7 +990,7 @@ private:
FuseCounter);
FusionCandidate FusedCand(
- performFusion((Peel ? FC0Copy : *FC0), *FC1), &DT, &PDT, ORE,
+ performFusion((Peel ? FC0Copy : *FC0), *FC1), DT, &PDT, ORE,
FC0Copy.PP);
FusedCand.verify();
assert(FusedCand.isEligibleForFusion(SE) &&
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 318c4c06f0f7..88d6a7aff3c9 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -61,7 +61,6 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
@@ -346,7 +345,7 @@ INITIALIZE_PASS_END(LoopIdiomRecognizeLegacyPass, "loop-idiom",
Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognizeLegacyPass(); }
static void deleteDeadInstruction(Instruction *I) {
- I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ I->replaceAllUsesWith(PoisonValue::get(I->getType()));
I->eraseFromParent();
}
@@ -798,7 +797,7 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
}
/// processLoopMemIntrinsic - Template function for calling different processor
-/// functions based on mem instrinsic type.
+/// functions based on mem intrinsic type.
template <typename MemInst>
bool LoopIdiomRecognize::processLoopMemIntrinsic(
BasicBlock *BB,
@@ -995,9 +994,8 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
SmallPtrSet<Instruction *, 1> MSIs;
MSIs.insert(MSI);
return processLoopStridedStore(Pointer, SE->getSCEV(MSI->getLength()),
- MaybeAlign(MSI->getDestAlignment()),
- SplatValue, MSI, MSIs, Ev, BECount,
- IsNegStride, /*IsLoopMemset=*/true);
+ MSI->getDestAlign(), SplatValue, MSI, MSIs, Ev,
+ BECount, IsNegStride, /*IsLoopMemset=*/true);
}
/// mayLoopAccessLocation - Return true if the specified loop might access the
@@ -1101,6 +1099,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
Value *StoredVal, Instruction *TheStore,
SmallPtrSetImpl<Instruction *> &Stores, const SCEVAddRecExpr *Ev,
const SCEV *BECount, bool IsNegStride, bool IsLoopMemset) {
+ Module *M = TheStore->getModule();
Value *SplatValue = isBytewiseValue(StoredVal, *DL);
Constant *PatternValue = nullptr;
@@ -1173,6 +1172,8 @@ bool LoopIdiomRecognize::processLoopStridedStore(
CallInst *NewCall;
if (SplatValue) {
AAMDNodes AATags = TheStore->getAAMetadata();
+ for (Instruction *Store : Stores)
+ AATags = AATags.merge(Store->getAAMetadata());
if (auto CI = dyn_cast<ConstantInt>(NumBytes))
AATags = AATags.extendTo(CI->getZExtValue());
else
@@ -1181,15 +1182,14 @@ bool LoopIdiomRecognize::processLoopStridedStore(
NewCall = Builder.CreateMemSet(
BasePtr, SplatValue, NumBytes, MaybeAlign(StoreAlignment),
/*isVolatile=*/false, AATags.TBAA, AATags.Scope, AATags.NoAlias);
- } else {
+ } else if (isLibFuncEmittable(M, TLI, LibFunc_memset_pattern16)) {
// Everything is emitted in default address space
Type *Int8PtrTy = DestInt8PtrTy;
- Module *M = TheStore->getModule();
StringRef FuncName = "memset_pattern16";
- FunctionCallee MSP = M->getOrInsertFunction(FuncName, Builder.getVoidTy(),
- Int8PtrTy, Int8PtrTy, IntIdxTy);
- inferLibFuncAttributes(M, FuncName, *TLI);
+ FunctionCallee MSP = getOrInsertLibFunc(M, *TLI, LibFunc_memset_pattern16,
+ Builder.getVoidTy(), Int8PtrTy, Int8PtrTy, IntIdxTy);
+ inferNonMandatoryLibFuncAttrs(M, FuncName, *TLI);
// Otherwise we should form a memset_pattern16. PatternValue is known to be
// an constant array of 16-bytes. Plop the value into a mergable global.
@@ -1200,7 +1200,9 @@ bool LoopIdiomRecognize::processLoopStridedStore(
GV->setAlignment(Align(16));
Value *PatternPtr = ConstantExpr::getBitCast(GV, Int8PtrTy);
NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes});
- }
+ } else
+ return Changed;
+
NewCall->setDebugLoc(TheStore->getDebugLoc());
if (MSSAU) {
@@ -1275,9 +1277,8 @@ class MemmoveVerifier {
public:
explicit MemmoveVerifier(const Value &LoadBasePtr, const Value &StoreBasePtr,
const DataLayout &DL)
- : DL(DL), LoadOff(0), StoreOff(0),
- BP1(llvm::GetPointerBaseWithConstantOffset(
- LoadBasePtr.stripPointerCasts(), LoadOff, DL)),
+ : DL(DL), BP1(llvm::GetPointerBaseWithConstantOffset(
+ LoadBasePtr.stripPointerCasts(), LoadOff, DL)),
BP2(llvm::GetPointerBaseWithConstantOffset(
StoreBasePtr.stripPointerCasts(), StoreOff, DL)),
IsSameObject(BP1 == BP2) {}
@@ -1307,8 +1308,8 @@ public:
private:
const DataLayout &DL;
- int64_t LoadOff;
- int64_t StoreOff;
+ int64_t LoadOff = 0;
+ int64_t StoreOff = 0;
const Value *BP1;
const Value *BP2;
@@ -1420,26 +1421,19 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
// If the store is a memcpy instruction, we must check if it will write to
// the load memory locations. So remove it from the ignored stores.
- if (IsMemCpy)
- IgnoredInsts.erase(TheStore);
MemmoveVerifier Verifier(*LoadBasePtr, *StoreBasePtr, *DL);
+ if (IsMemCpy && !Verifier.IsSameObject)
+ IgnoredInsts.erase(TheStore);
if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
StoreSizeSCEV, *AA, IgnoredInsts)) {
- if (!IsMemCpy) {
- ORE.emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessLoad",
- TheLoad)
- << ore::NV("Inst", InstRemark) << " in "
- << ore::NV("Function", TheStore->getFunction())
- << " function will not be hoisted: "
- << ore::NV("Reason", "The loop may access load location");
- });
- return Changed;
- }
- // At this point loop may access load only for memcpy in same underlying
- // object. If that's not the case bail out.
- if (!Verifier.IsSameObject)
- return Changed;
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessLoad", TheLoad)
+ << ore::NV("Inst", InstRemark) << " in "
+ << ore::NV("Function", TheStore->getFunction())
+ << " function will not be hoisted: "
+ << ore::NV("Reason", "The loop may access load location");
+ });
+ return Changed;
}
bool UseMemMove = IsMemCpy ? Verifier.IsSameObject : LoopAccessStore;
@@ -1487,7 +1481,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
return Changed;
// We cannot allow unaligned ops for unordered load/store, so reject
// anything where the alignment isn't at least the element size.
- assert((StoreAlign.hasValue() && LoadAlign.hasValue()) &&
+ assert((StoreAlign && LoadAlign) &&
"Expect unordered load/store to have align.");
if (StoreAlign.getValue() < StoreSize || LoadAlign.getValue() < StoreSize)
return Changed;
diff --git a/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp b/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
index b9e63a4bc06f..4249512ea0f8 100644
--- a/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -11,7 +11,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/LoopInstSimplify.h"
-#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -25,21 +24,17 @@
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CFG.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/IR/User.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
-#include <algorithm>
#include <utility>
using namespace llvm;
@@ -101,7 +96,7 @@ static bool simplifyLoopInst(Loop &L, DominatorTree &DT, LoopInfo &LI,
if (!IsFirstIteration && !ToSimplify->count(&I))
continue;
- Value *V = SimplifyInstruction(&I, SQ.getWithInstruction(&I));
+ Value *V = simplifyInstruction(&I, SQ.getWithInstruction(&I));
if (!V || !LI.replacementPreservesLCSSAForm(&I, V))
continue;
@@ -109,6 +104,10 @@ static bool simplifyLoopInst(Loop &L, DominatorTree &DT, LoopInfo &LI,
auto *UserI = cast<Instruction>(U.getUser());
U.set(V);
+ // Do not bother dealing with unreachable code.
+ if (!DT.isReachableFromEntry(UserI->getParent()))
+ continue;
+
// If the instruction is used by a PHI node we have already processed
// we'll need to iterate on the loop body to converge, so add it to
// the next set.
@@ -222,7 +221,7 @@ PreservedAnalyses LoopInstSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
AR.MSSA->verifyMemorySSA();
}
if (!simplifyLoopInst(L, AR.DT, AR.LI, AR.AC, AR.TLI,
- MSSAU.hasValue() ? MSSAU.getPointer() : nullptr))
+ MSSAU ? MSSAU.getPointer() : nullptr))
return PreservedAnalyses::all();
auto PA = getLoopPassPreservedAnalyses();
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index c2b065c4eb31..1d3023d04463 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/Analysis/LoopCacheAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopNestAnalysis.h"
#include "llvm/Analysis/LoopPass.h"
@@ -33,7 +34,6 @@
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
@@ -44,7 +44,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include <cassert>
@@ -120,8 +119,6 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
std::vector<char> Dep;
Instruction *Src = cast<Instruction>(*I);
Instruction *Dst = cast<Instruction>(*J);
- if (Src == Dst)
- continue;
// Ignore Input dependencies.
if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
continue;
@@ -270,26 +267,28 @@ static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
return true;
}
-static LoopVector populateWorklist(Loop &L) {
+static void populateWorklist(Loop &L, LoopVector &LoopList) {
LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: "
<< L.getHeader()->getParent()->getName() << " Loop: %"
<< L.getHeader()->getName() << '\n');
- LoopVector LoopList;
+ assert(LoopList.empty() && "LoopList should initially be empty!");
Loop *CurrentLoop = &L;
const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops();
while (!Vec->empty()) {
// The current loop has multiple subloops in it hence it is not tightly
// nested.
// Discard all loops above it added into Worklist.
- if (Vec->size() != 1)
- return {};
+ if (Vec->size() != 1) {
+ LoopList = {};
+ return;
+ }
LoopList.push_back(CurrentLoop);
CurrentLoop = Vec->front();
Vec = &CurrentLoop->getSubLoops();
}
LoopList.push_back(CurrentLoop);
- return LoopList;
+ return;
}
namespace {
@@ -360,8 +359,10 @@ public:
: OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
/// Check if the loop interchange is profitable.
- bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId,
- CharMatrix &DepMatrix);
+ bool isProfitable(const Loop *InnerLoop, const Loop *OuterLoop,
+ unsigned InnerLoopId, unsigned OuterLoopId,
+ CharMatrix &DepMatrix,
+ const DenseMap<const Loop *, unsigned> &CostMap);
private:
int getInstrOrderCost();
@@ -412,23 +413,26 @@ struct LoopInterchange {
LoopInfo *LI = nullptr;
DependenceInfo *DI = nullptr;
DominatorTree *DT = nullptr;
+ std::unique_ptr<CacheCost> CC = nullptr;
/// Interface to emit optimization remarks.
OptimizationRemarkEmitter *ORE;
LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
- DominatorTree *DT, OptimizationRemarkEmitter *ORE)
- : SE(SE), LI(LI), DI(DI), DT(DT), ORE(ORE) {}
+ DominatorTree *DT, std::unique_ptr<CacheCost> &CC,
+ OptimizationRemarkEmitter *ORE)
+ : SE(SE), LI(LI), DI(DI), DT(DT), CC(std::move(CC)), ORE(ORE) {}
bool run(Loop *L) {
if (L->getParentLoop())
return false;
-
- return processLoopList(populateWorklist(*L));
+ SmallVector<Loop *, 8> LoopList;
+ populateWorklist(*L, LoopList);
+ return processLoopList(LoopList);
}
bool run(LoopNest &LN) {
- const auto &LoopList = LN.getLoops();
+ SmallVector<Loop *, 8> LoopList(LN.getLoops().begin(), LN.getLoops().end());
for (unsigned I = 1; I < LoopList.size(); ++I)
if (LoopList[I]->getParentLoop() != LoopList[I - 1])
return false;
@@ -460,7 +464,7 @@ struct LoopInterchange {
return LoopList.size() - 1;
}
- bool processLoopList(ArrayRef<Loop *> LoopList) {
+ bool processLoopList(SmallVectorImpl<Loop *> &LoopList) {
bool Changed = false;
unsigned LoopNestDepth = LoopList.size();
if (LoopNestDepth < 2) {
@@ -500,27 +504,55 @@ struct LoopInterchange {
}
unsigned SelecLoopId = selectLoopForInterchange(LoopList);
- // Move the selected loop outwards to the best possible position.
- Loop *LoopToBeInterchanged = LoopList[SelecLoopId];
- for (unsigned i = SelecLoopId; i > 0; i--) {
- bool Interchanged = processLoop(LoopToBeInterchanged, LoopList[i - 1], i,
- i - 1, DependencyMatrix);
- if (!Interchanged)
- return Changed;
- // Update the DependencyMatrix
- interChangeDependencies(DependencyMatrix, i, i - 1);
+ // Obtain the loop vector returned from loop cache analysis beforehand,
+ // and put each <Loop, index> pair into a map for constant time query
+ // later. Indices in loop vector reprsent the optimal order of the
+ // corresponding loop, e.g., given a loopnest with depth N, index 0
+ // indicates the loop should be placed as the outermost loop and index N
+ // indicates the loop should be placed as the innermost loop.
+ //
+ // For the old pass manager CacheCost would be null.
+ DenseMap<const Loop *, unsigned> CostMap;
+ if (CC != nullptr) {
+ const auto &LoopCosts = CC->getLoopCosts();
+ for (unsigned i = 0; i < LoopCosts.size(); i++) {
+ CostMap[LoopCosts[i].first] = i;
+ }
+ }
+ // We try to achieve the globally optimal memory access for the loopnest,
+ // and do interchange based on a bubble-sort fasion. We start from
+ // the innermost loop, move it outwards to the best possible position
+ // and repeat this process.
+ for (unsigned j = SelecLoopId; j > 0; j--) {
+ bool ChangedPerIter = false;
+ for (unsigned i = SelecLoopId; i > SelecLoopId - j; i--) {
+ bool Interchanged = processLoop(LoopList[i], LoopList[i - 1], i, i - 1,
+ DependencyMatrix, CostMap);
+ if (!Interchanged)
+ continue;
+ // Loops interchanged, update LoopList accordingly.
+ std::swap(LoopList[i - 1], LoopList[i]);
+ // Update the DependencyMatrix
+ interChangeDependencies(DependencyMatrix, i, i - 1);
#ifdef DUMP_DEP_MATRICIES
- LLVM_DEBUG(dbgs() << "Dependence after interchange\n");
- printDepMatrix(DependencyMatrix);
+ LLVM_DEBUG(dbgs() << "Dependence after interchange\n");
+ printDepMatrix(DependencyMatrix);
#endif
- Changed |= Interchanged;
+ ChangedPerIter |= Interchanged;
+ Changed |= Interchanged;
+ }
+ // Early abort if there was no interchange during an entire round of
+ // moving loops outwards.
+ if (!ChangedPerIter)
+ break;
}
return Changed;
}
bool processLoop(Loop *InnerLoop, Loop *OuterLoop, unsigned InnerLoopId,
unsigned OuterLoopId,
- std::vector<std::vector<char>> &DependencyMatrix) {
+ std::vector<std::vector<char>> &DependencyMatrix,
+ const DenseMap<const Loop *, unsigned> &CostMap) {
LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId
<< " and OuterLoopId = " << OuterLoopId << "\n");
LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
@@ -530,7 +562,8 @@ struct LoopInterchange {
}
LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n");
LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
- if (!LIP.isProfitable(InnerLoopId, OuterLoopId, DependencyMatrix)) {
+ if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
+ DependencyMatrix, CostMap)) {
LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n");
return false;
}
@@ -733,8 +766,12 @@ static PHINode *findInnerReductionPhi(Loop *L, Value *V) {
if (PHI->getNumIncomingValues() == 1)
continue;
RecurrenceDescriptor RD;
- if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD))
+ if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD)) {
+ // Detect floating point reduction only when it can be reordered.
+ if (RD.getExactFPMathInst() != nullptr)
+ return nullptr;
return PHI;
+ }
return nullptr;
}
}
@@ -893,28 +930,23 @@ areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL,
static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock();
for (PHINode &PHI : LoopNestExit->phis()) {
- // FIXME: We currently are not able to detect floating point reductions
- // and have to use floating point PHIs as a proxy to prevent
- // interchanging in the presence of floating point reductions.
- if (PHI.getType()->isFloatingPointTy())
- return false;
for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
- Instruction *IncomingI = dyn_cast<Instruction>(PHI.getIncomingValue(i));
- if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch())
- continue;
-
- // The incoming value is defined in the outer loop latch. Currently we
- // only support that in case the outer loop latch has a single predecessor.
- // This guarantees that the outer loop latch is executed if and only if
- // the inner loop is executed (because tightlyNested() guarantees that the
- // outer loop header only branches to the inner loop or the outer loop
- // latch).
- // FIXME: We could weaken this logic and allow multiple predecessors,
- // if the values are produced outside the loop latch. We would need
- // additional logic to update the PHI nodes in the exit block as
- // well.
- if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr)
- return false;
+ Instruction *IncomingI = dyn_cast<Instruction>(PHI.getIncomingValue(i));
+ if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch())
+ continue;
+
+ // The incoming value is defined in the outer loop latch. Currently we
+ // only support that in case the outer loop latch has a single predecessor.
+ // This guarantees that the outer loop latch is executed if and only if
+ // the inner loop is executed (because tightlyNested() guarantees that the
+ // outer loop header only branches to the inner loop or the outer loop
+ // latch).
+ // FIXME: We could weaken this logic and allow multiple predecessors,
+ // if the values are produced outside the loop latch. We would need
+ // additional logic to update the PHI nodes in the exit block as
+ // well.
+ if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr)
+ return false;
}
}
return true;
@@ -1125,21 +1157,33 @@ static bool isProfitableForVectorization(unsigned InnerLoopId,
return !DepMatrix.empty();
}
-bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
- unsigned OuterLoopId,
- CharMatrix &DepMatrix) {
- // TODO: Add better profitability checks.
- // e.g
- // 1) Construct dependency matrix and move the one with no loop carried dep
- // inside to enable vectorization.
-
- // This is rough cost estimation algorithm. It counts the good and bad order
- // of induction variables in the instruction and allows reordering if number
- // of bad orders is more than good.
- int Cost = getInstrOrderCost();
- LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n");
- if (Cost < -LoopInterchangeCostThreshold)
- return true;
+bool LoopInterchangeProfitability::isProfitable(
+ const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId,
+ unsigned OuterLoopId, CharMatrix &DepMatrix,
+ const DenseMap<const Loop *, unsigned> &CostMap) {
+ // TODO: Remove the legacy cost model.
+
+ // This is the new cost model returned from loop cache analysis.
+ // A smaller index means the loop should be placed an outer loop, and vice
+ // versa.
+ if (CostMap.find(InnerLoop) != CostMap.end() &&
+ CostMap.find(OuterLoop) != CostMap.end()) {
+ unsigned InnerIndex = 0, OuterIndex = 0;
+ InnerIndex = CostMap.find(InnerLoop)->second;
+ OuterIndex = CostMap.find(OuterLoop)->second;
+ LLVM_DEBUG(dbgs() << "InnerIndex = " << InnerIndex
+ << ", OuterIndex = " << OuterIndex << "\n");
+ if (InnerIndex < OuterIndex)
+ return true;
+ } else {
+ // Legacy cost model: this is rough cost estimation algorithm. It counts the
+ // good and bad order of induction variables in the instruction and allows
+ // reordering if number of bad orders is more than good.
+ int Cost = getInstrOrderCost();
+ LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n");
+ if (Cost < -LoopInterchangeCostThreshold)
+ return true;
+ }
// It is not profitable as per current cache profitability model. But check if
// we can move this loop outside to improve parallelism.
@@ -1150,10 +1194,8 @@ bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable",
InnerLoop->getStartLoc(),
InnerLoop->getHeader())
- << "Interchanging loops is too costly (cost="
- << ore::NV("Cost", Cost) << ", threshold="
- << ore::NV("Threshold", LoopInterchangeCostThreshold)
- << ") and it does not improve parallelism.";
+ << "Interchanging loops is too costly and it does not improve "
+ "parallelism.";
});
return false;
}
@@ -1424,9 +1466,13 @@ static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader,
// Incoming values are guaranteed be instructions currently.
auto IncI = cast<Instruction>(P.getIncomingValueForBlock(InnerLatch));
+ // In case of multi-level nested loops, follow LCSSA to find the incoming
+ // value defined from the innermost loop.
+ auto IncIInnerMost = cast<Instruction>(followLCSSA(IncI));
// Skip phis with incoming values from the inner loop body, excluding the
// header and latch.
- if (IncI->getParent() != InnerLatch && IncI->getParent() != InnerHeader)
+ if (IncIInnerMost->getParent() != InnerLatch &&
+ IncIInnerMost->getParent() != InnerHeader)
continue;
assert(all_of(P.users(),
@@ -1695,8 +1741,8 @@ struct LoopInterchangeLegacyPass : public LoopPass {
auto *DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI();
auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
-
- return LoopInterchange(SE, LI, DI, DT, ORE).run(L);
+ std::unique_ptr<CacheCost> CC = nullptr;
+ return LoopInterchange(SE, LI, DI, DT, CC, ORE).run(L);
}
};
} // namespace
@@ -1723,8 +1769,10 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
Function &F = *LN.getParent();
DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
+ std::unique_ptr<CacheCost> CC =
+ CacheCost::getCacheCost(LN.getOutermostLoop(), AR, DI);
OptimizationRemarkEmitter ORE(&F);
- if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, &ORE).run(LN))
+ if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, CC, &ORE).run(LN))
return PreservedAnalyses::all();
return getLoopPassPreservedAnalyses();
}
diff --git a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
index 21d59936616b..1877ac1dfd08 100644
--- a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -61,7 +61,6 @@
#include <algorithm>
#include <cassert>
#include <forward_list>
-#include <set>
#include <tuple>
#include <utility>
@@ -213,7 +212,8 @@ public:
continue;
// Only progagate the value if they are of the same type.
- if (Store->getPointerOperandType() != Load->getPointerOperandType())
+ if (Store->getPointerOperandType() != Load->getPointerOperandType() ||
+ getLoadStoreType(Store) != getLoadStoreType(Load))
continue;
Candidates.emplace_front(Load, Store);
@@ -528,7 +528,7 @@ public:
return false;
}
- if (LAI.getPSE().getUnionPredicate().getComplexity() >
+ if (LAI.getPSE().getPredicate().getComplexity() >
LoadElimSCEVCheckThreshold) {
LLVM_DEBUG(dbgs() << "Too many SCEV run-time checks needed.\n");
return false;
@@ -539,7 +539,7 @@ public:
return false;
}
- if (!Checks.empty() || !LAI.getPSE().getUnionPredicate().isAlwaysTrue()) {
+ if (!Checks.empty() || !LAI.getPSE().getPredicate().isAlwaysTrue()) {
if (LAI.hasConvergentOp()) {
LLVM_DEBUG(dbgs() << "Versioning is needed but not allowed with "
"convergent calls\n");
@@ -706,8 +706,12 @@ FunctionPass *llvm::createLoopLoadEliminationPass() {
PreservedAnalyses LoopLoadEliminationPass::run(Function &F,
FunctionAnalysisManager &AM) {
- auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
auto &LI = AM.getResult<LoopAnalysis>(F);
+ // There are no loops in the function. Return before computing other expensive
+ // analyses.
+ if (LI.empty())
+ return PreservedAnalyses::all();
+ auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
diff --git a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
index 6c783848432b..d20d275ea60c 100644
--- a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
@@ -8,14 +8,12 @@
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
-#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/MemorySSA.h"
-#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Support/TimeProfiler.h"
using namespace llvm;
@@ -311,12 +309,12 @@ PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F,
#ifndef NDEBUG
// LoopAnalysisResults should always be valid.
- // Note that we don't LAR.SE.verify() because that can change observed SE
- // queries. See PR44815.
if (VerifyDomInfo)
LAR.DT.verify();
if (VerifyLoopInfo)
LAR.LI.verify(LAR.DT);
+ if (VerifySCEV)
+ LAR.SE.verify();
if (LAR.MSSA && VerifyMemorySSA)
LAR.MSSA->verifyMemorySSA();
#endif
diff --git a/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/llvm/lib/Transforms/Scalar/LoopPredication.cpp
index aa7e79a589f2..d0ee5b47a8ca 100644
--- a/llvm/lib/Transforms/Scalar/LoopPredication.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopPredication.cpp
@@ -188,7 +188,6 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
@@ -244,7 +243,7 @@ struct LoopICmp {
LoopICmp(ICmpInst::Predicate Pred, const SCEVAddRecExpr *IV,
const SCEV *Limit)
: Pred(Pred), IV(IV), Limit(Limit) {}
- LoopICmp() {}
+ LoopICmp() = default;
void dump() {
dbgs() << "LoopICmp Pred = " << Pred << ", IV = " << *IV
<< ", Limit = " << *Limit << "\n";
@@ -778,7 +777,7 @@ unsigned LoopPredication::collectChecks(SmallVectorImpl<Value *> &Checks,
if (ICmpInst *ICI = dyn_cast<ICmpInst>(Condition)) {
if (auto NewRangeCheck = widenICmpRangeCheck(ICI, Expander,
Guard)) {
- Checks.push_back(NewRangeCheck.getValue());
+ Checks.push_back(*NewRangeCheck);
NumWidened++;
continue;
}
diff --git a/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp b/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
index 9d22eceb987f..f4ef22562341 100644
--- a/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -29,15 +29,11 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
-#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
@@ -59,7 +55,6 @@
#include <cassert>
#include <cstddef>
#include <cstdint>
-#include <cstdlib>
#include <iterator>
#include <map>
#include <utility>
@@ -559,12 +554,12 @@ bool LoopReroll::isLoopControlIV(Loop *L, Instruction *IV) {
}
// Must be a CMP or an ext (of a value with nsw) then CMP
else {
- Instruction *UUser = dyn_cast<Instruction>(UU);
+ auto *UUser = cast<Instruction>(UU);
// Skip SExt if we are extending an nsw value
// TODO: Allow ZExt too
- if (BO->hasNoSignedWrap() && UUser && UUser->hasOneUse() &&
+ if (BO->hasNoSignedWrap() && UUser->hasOneUse() &&
isa<SExtInst>(UUser))
- UUser = dyn_cast<Instruction>(*(UUser->user_begin()));
+ UUser = cast<Instruction>(*(UUser->user_begin()));
if (!isCompareUsedByBranch(UUser))
return false;
}
diff --git a/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/llvm/lib/Transforms/Scalar/LoopRotation.cpp
index 5ba137b1c85f..d9c33b5f335a 100644
--- a/llvm/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -11,10 +11,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/LoopRotation.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
@@ -22,9 +22,7 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/LoopRotationUtils.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
using namespace llvm;
@@ -62,8 +60,8 @@ PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM,
MSSAU = MemorySSAUpdater(AR.MSSA);
bool Changed =
LoopRotation(&L, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE,
- MSSAU.hasValue() ? MSSAU.getPointer() : nullptr, SQ, false,
- Threshold, false, PrepareForLTO || PrepareForLTOOption);
+ MSSAU ? MSSAU.getPointer() : nullptr, SQ, false, Threshold,
+ false, PrepareForLTO || PrepareForLTOOption);
if (!Changed)
return PreservedAnalyses::all();
@@ -133,9 +131,8 @@ public:
: MaxHeaderSize;
return LoopRotation(L, LI, TTI, AC, &DT, &SE,
- MSSAU.hasValue() ? MSSAU.getPointer() : nullptr, SQ,
- false, Threshold, false,
- PrepareForLTO || PrepareForLTOOption);
+ MSSAU ? MSSAU.getPointer() : nullptr, SQ, false,
+ Threshold, false, PrepareForLTO || PrepareForLTOOption);
}
};
} // end namespace
diff --git a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index d3fcba10c275..b7e0e32780b4 100644
--- a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -16,28 +16,21 @@
#include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/DependenceAnalysis.h"
#include "llvm/Analysis/DomTreeUpdater.h"
-#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
-#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
using namespace llvm;
@@ -261,13 +254,17 @@ private:
assert(L.getNumBlocks() == LiveLoopBlocks.size() + DeadLoopBlocks.size() &&
"Malformed block sets?");
- // Now, all exit blocks that are not marked as live are dead.
+ // Now, all exit blocks that are not marked as live are dead, if all their
+ // predecessors are in the loop. This may not be the case, as the input loop
+ // may not by in loop-simplify/canonical form.
SmallVector<BasicBlock *, 8> ExitBlocks;
L.getExitBlocks(ExitBlocks);
SmallPtrSet<BasicBlock *, 8> UniqueDeadExits;
for (auto *ExitBlock : ExitBlocks)
if (!LiveExitBlocks.count(ExitBlock) &&
- UniqueDeadExits.insert(ExitBlock).second)
+ UniqueDeadExits.insert(ExitBlock).second &&
+ all_of(predecessors(ExitBlock),
+ [this](BasicBlock *Pred) { return L.contains(Pred); }))
DeadExitBlocks.push_back(ExitBlock);
// Whether or not the edge From->To will still be present in graph after the
@@ -374,7 +371,7 @@ private:
DeadInstructions.emplace_back(LandingPad);
for (Instruction *I : DeadInstructions) {
- I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ I->replaceAllUsesWith(PoisonValue::get(I->getType()));
I->eraseFromParent();
}
@@ -704,8 +701,7 @@ PreservedAnalyses LoopSimplifyCFGPass::run(Loop &L, LoopAnalysisManager &AM,
MSSAU = MemorySSAUpdater(AR.MSSA);
bool DeleteCurrentLoop = false;
if (!simplifyLoopCFG(L, AR.DT, AR.LI, AR.SE,
- MSSAU.hasValue() ? MSSAU.getPointer() : nullptr,
- DeleteCurrentLoop))
+ MSSAU ? MSSAU.getPointer() : nullptr, DeleteCurrentLoop))
return PreservedAnalyses::all();
if (DeleteCurrentLoop)
@@ -739,9 +735,9 @@ public:
if (MSSAA && VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
bool DeleteCurrentLoop = false;
- bool Changed = simplifyLoopCFG(
- *L, DT, LI, SE, MSSAU.hasValue() ? MSSAU.getPointer() : nullptr,
- DeleteCurrentLoop);
+ bool Changed =
+ simplifyLoopCFG(*L, DT, LI, SE, MSSAU ? MSSAU.getPointer() : nullptr,
+ DeleteCurrentLoop);
if (DeleteCurrentLoop)
LPM.markLoopAsDeleted(*L);
return Changed;
diff --git a/llvm/lib/Transforms/Scalar/LoopSink.cpp b/llvm/lib/Transforms/Scalar/LoopSink.cpp
index c9c9e60d0921..dce1af475fb1 100644
--- a/llvm/lib/Transforms/Scalar/LoopSink.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopSink.cpp
@@ -34,24 +34,18 @@
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/AliasSetTracker.h"
-#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
-#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Metadata.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
using namespace llvm;
@@ -70,14 +64,6 @@ static cl::opt<unsigned> MaxNumberOfUseBBsForSinking(
"max-uses-for-sinking", cl::Hidden, cl::init(30),
cl::desc("Do not sink instructions that have too many uses."));
-static cl::opt<bool> EnableMSSAInLoopSink(
- "enable-mssa-in-loop-sink", cl::Hidden, cl::init(true),
- cl::desc("Enable MemorySSA for LoopSink in new pass manager"));
-
-static cl::opt<bool> EnableMSSAInLegacyLoopSink(
- "enable-mssa-in-legacy-loop-sink", cl::Hidden, cl::init(false),
- cl::desc("Enable MemorySSA for LoopSink in legacy pass manager"));
-
/// Return adjusted total frequency of \p BBs.
///
/// * If there is only one BB, sinking instruction will not introduce code
@@ -279,9 +265,8 @@ static bool sinkInstruction(
static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
DominatorTree &DT,
BlockFrequencyInfo &BFI,
- ScalarEvolution *SE,
- AliasSetTracker *CurAST,
- MemorySSA *MSSA) {
+ MemorySSA &MSSA,
+ ScalarEvolution *SE) {
BasicBlock *Preheader = L.getLoopPreheader();
assert(Preheader && "Expected loop to have preheader");
@@ -297,13 +282,8 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
}))
return false;
- std::unique_ptr<MemorySSAUpdater> MSSAU;
- std::unique_ptr<SinkAndHoistLICMFlags> LICMFlags;
- if (MSSA) {
- MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
- LICMFlags =
- std::make_unique<SinkAndHoistLICMFlags>(/*IsSink=*/true, &L, MSSA);
- }
+ MemorySSAUpdater MSSAU(&MSSA);
+ SinkAndHoistLICMFlags LICMFlags(/*IsSink=*/true, &L, &MSSA);
bool Changed = false;
@@ -324,14 +304,15 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
// on B (A appears after B), A needs to be sinked first before B can be
// sinked.
for (Instruction &I : llvm::make_early_inc_range(llvm::reverse(*Preheader))) {
+ if (isa<PHINode>(&I))
+ continue;
// No need to check for instruction's operands are loop invariant.
assert(L.hasLoopInvariantOperands(&I) &&
"Insts in a loop's preheader should have loop invariant operands!");
- if (!canSinkOrHoistInst(I, &AA, &DT, &L, CurAST, MSSAU.get(), false,
- LICMFlags.get()))
+ if (!canSinkOrHoistInst(I, &AA, &DT, &L, MSSAU, false, LICMFlags))
continue;
if (sinkInstruction(L, I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI,
- MSSAU.get()))
+ &MSSAU))
Changed = true;
}
@@ -340,13 +321,6 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
return Changed;
}
-static void computeAliasSet(Loop &L, BasicBlock &Preheader,
- AliasSetTracker &CurAST) {
- for (BasicBlock *BB : L.blocks())
- CurAST.add(*BB);
- CurAST.add(Preheader);
-}
-
PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) {
LoopInfo &LI = FAM.getResult<LoopAnalysis>(F);
// Nothing to do if there are no loops.
@@ -356,10 +330,7 @@ PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) {
AAResults &AA = FAM.getResult<AAManager>(F);
DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
BlockFrequencyInfo &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
-
- MemorySSA *MSSA = EnableMSSAInLoopSink
- ? &FAM.getResult<MemorySSAAnalysis>(F).getMSSA()
- : nullptr;
+ MemorySSA &MSSA = FAM.getResult<MemorySSAAnalysis>(F).getMSSA();
// We want to do a postorder walk over the loops. Since loops are a tree this
// is equivalent to a reversed preorder walk and preorder is easy to compute
@@ -381,18 +352,11 @@ PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) {
if (!Preheader->getParent()->hasProfileData())
continue;
- std::unique_ptr<AliasSetTracker> CurAST;
- if (!EnableMSSAInLoopSink) {
- CurAST = std::make_unique<AliasSetTracker>(AA);
- computeAliasSet(L, *Preheader, *CurAST.get());
- }
-
// Note that we don't pass SCEV here because it is only used to invalidate
// loops in SCEV and we don't preserve (or request) SCEV at all making that
// unnecessary.
- Changed |= sinkLoopInvariantInstructions(L, AA, LI, DT, BFI,
- /*ScalarEvolution*/ nullptr,
- CurAST.get(), MSSA);
+ Changed |= sinkLoopInvariantInstructions(L, AA, LI, DT, BFI, MSSA,
+ /*ScalarEvolution*/ nullptr);
} while (!PreorderLoops.empty());
if (!Changed)
@@ -400,13 +364,10 @@ PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) {
PreservedAnalyses PA;
PA.preserveSet<CFGAnalyses>();
+ PA.preserve<MemorySSAAnalysis>();
- if (MSSA) {
- PA.preserve<MemorySSAAnalysis>();
-
- if (VerifyMemorySSA)
- MSSA->verifyMemorySSA();
- }
+ if (VerifyMemorySSA)
+ MSSA.verifyMemorySSA();
return PA;
}
@@ -432,24 +393,16 @@ struct LegacyLoopSinkPass : public LoopPass {
return false;
AAResults &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+ MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
- std::unique_ptr<AliasSetTracker> CurAST;
- MemorySSA *MSSA = nullptr;
- if (EnableMSSAInLegacyLoopSink)
- MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
- else {
- CurAST = std::make_unique<AliasSetTracker>(AA);
- computeAliasSet(*L, *Preheader, *CurAST.get());
- }
-
bool Changed = sinkLoopInvariantInstructions(
*L, AA, getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(),
- SE ? &SE->getSE() : nullptr, CurAST.get(), MSSA);
+ MSSA, SE ? &SE->getSE() : nullptr);
- if (MSSA && VerifyMemorySSA)
- MSSA->verifyMemorySSA();
+ if (VerifyMemorySSA)
+ MSSA.verifyMemorySSA();
return Changed;
}
@@ -458,10 +411,8 @@ struct LegacyLoopSinkPass : public LoopPass {
AU.setPreservesCFG();
AU.addRequired<BlockFrequencyInfoWrapperPass>();
getLoopAnalysisUsage(AU);
- if (EnableMSSAInLegacyLoopSink) {
- AU.addRequired<MemorySSAWrapperPass>();
- AU.addPreserved<MemorySSAWrapperPass>();
- }
+ AU.addRequired<MemorySSAWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
}
};
}
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 654f0d2a03a8..9959e408e2e2 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -78,6 +78,7 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
@@ -91,9 +92,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/OperandTraits.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
@@ -114,12 +113,12 @@
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdint>
-#include <cstdlib>
#include <iterator>
#include <limits>
#include <map>
@@ -142,10 +141,7 @@ static const unsigned MaxIVUsers = 200;
/// the salvaging is not too expensive for the compiler.
static const unsigned MaxSCEVSalvageExpressionSize = 64;
-// Temporary flag to cleanup congruent phis after LSR phi expansion.
-// It's currently disabled until we can determine whether it's truly useful or
-// not. The flag should be removed after the v3.0 release.
-// This is now needed for ivchains.
+// Cleanup congruent phis after LSR phi expansion.
static cl::opt<bool> EnablePhiElim(
"enable-lsr-phielim", cl::Hidden, cl::init(true),
cl::desc("Enable LSR phi elimination"));
@@ -481,6 +477,12 @@ void Formula::initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
canonicalize(*L);
}
+static bool containsAddRecDependentOnLoop(const SCEV *S, const Loop &L) {
+ return SCEVExprContains(S, [&L](const SCEV *S) {
+ return isa<SCEVAddRecExpr>(S) && (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
+ });
+}
+
/// Check whether or not this formula satisfies the canonical
/// representation.
/// \see Formula::BaseRegs.
@@ -494,18 +496,15 @@ bool Formula::isCanonical(const Loop &L) const {
if (Scale == 1 && BaseRegs.empty())
return false;
- const SCEVAddRecExpr *SAR = dyn_cast<const SCEVAddRecExpr>(ScaledReg);
- if (SAR && SAR->getLoop() == &L)
+ if (containsAddRecDependentOnLoop(ScaledReg, L))
return true;
// If ScaledReg is not a recurrent expr, or it is but its loop is not current
// loop, meanwhile BaseRegs contains a recurrent expr reg related with current
// loop, we want to swap the reg in BaseRegs with ScaledReg.
- auto I = find_if(BaseRegs, [&](const SCEV *S) {
- return isa<const SCEVAddRecExpr>(S) &&
- (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
+ return none_of(BaseRegs, [&L](const SCEV *S) {
+ return containsAddRecDependentOnLoop(S, L);
});
- return I == BaseRegs.end();
}
/// Helper method to morph a formula into its canonical representation.
@@ -537,11 +536,9 @@ void Formula::canonicalize(const Loop &L) {
// If ScaledReg is an invariant with respect to L, find the reg from
// BaseRegs containing the recurrent expr related with Loop L. Swap the
// reg with ScaledReg.
- const SCEVAddRecExpr *SAR = dyn_cast<const SCEVAddRecExpr>(ScaledReg);
- if (!SAR || SAR->getLoop() != &L) {
- auto I = find_if(BaseRegs, [&](const SCEV *S) {
- return isa<const SCEVAddRecExpr>(S) &&
- (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
+ if (!containsAddRecDependentOnLoop(ScaledReg, L)) {
+ auto I = find_if(BaseRegs, [&L](const SCEV *S) {
+ return containsAddRecDependentOnLoop(S, L);
});
if (I != BaseRegs.end())
std::swap(ScaledReg, *I);
@@ -1070,7 +1067,7 @@ public:
C.ScaleCost = 0;
}
- bool isLess(Cost &Other);
+ bool isLess(const Cost &Other);
void Lose();
@@ -1358,6 +1355,8 @@ void Cost::RateFormula(const Formula &F,
const DenseSet<const SCEV *> &VisitedRegs,
const LSRUse &LU,
SmallPtrSetImpl<const SCEV *> *LoserRegs) {
+ if (isLoser())
+ return;
assert(F.isCanonical(*L) && "Cost is accurate only for canonical formula");
// Tally up the registers.
unsigned PrevAddRecCost = C.AddRecCost;
@@ -1467,7 +1466,7 @@ void Cost::Lose() {
}
/// Choose the lower cost.
-bool Cost::isLess(Cost &Other) {
+bool Cost::isLess(const Cost &Other) {
if (InsnsCost.getNumOccurrences() > 0 && InsnsCost &&
C.Insns != Other.C.Insns)
return C.Insns < Other.C.Insns;
@@ -4081,23 +4080,24 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
continue;
// Divide out the factor, ignoring high bits, since we'll be
// scaling the value back up in the end.
- if (const SCEV *Quotient = getExactSDiv(AR, FactorS, SE, true)) {
- // TODO: This could be optimized to avoid all the copying.
- Formula F = Base;
- F.ScaledReg = Quotient;
- F.deleteBaseReg(F.BaseRegs[i]);
- // The canonical representation of 1*reg is reg, which is already in
- // Base. In that case, do not try to insert the formula, it will be
- // rejected anyway.
- if (F.Scale == 1 && (F.BaseRegs.empty() ||
- (AR->getLoop() != L && LU.AllFixupsOutsideLoop)))
- continue;
- // If AllFixupsOutsideLoop is true and F.Scale is 1, we may generate
- // non canonical Formula with ScaledReg's loop not being L.
- if (F.Scale == 1 && LU.AllFixupsOutsideLoop)
- F.canonicalize(*L);
- (void)InsertFormula(LU, LUIdx, F);
- }
+ if (const SCEV *Quotient = getExactSDiv(AR, FactorS, SE, true))
+ if (!Quotient->isZero()) {
+ // TODO: This could be optimized to avoid all the copying.
+ Formula F = Base;
+ F.ScaledReg = Quotient;
+ F.deleteBaseReg(F.BaseRegs[i]);
+ // The canonical representation of 1*reg is reg, which is already in
+ // Base. In that case, do not try to insert the formula, it will be
+ // rejected anyway.
+ if (F.Scale == 1 && (F.BaseRegs.empty() ||
+ (AR->getLoop() != L && LU.AllFixupsOutsideLoop)))
+ continue;
+ // If AllFixupsOutsideLoop is true and F.Scale is 1, we may generate
+ // non canonical Formula with ScaledReg's loop not being L.
+ if (F.Scale == 1 && LU.AllFixupsOutsideLoop)
+ F.canonicalize(*L);
+ (void)InsertFormula(LU, LUIdx, F);
+ }
}
}
}
@@ -5601,6 +5601,27 @@ void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF,
DeadInsts.emplace_back(OperandIsInstr);
}
+// Check if there are any loop exit values which are only used once within the
+// loop which may potentially be optimized with a call to rewriteLoopExitValue.
+static bool LoopExitValHasSingleUse(Loop *L) {
+ BasicBlock *ExitBB = L->getExitBlock();
+ if (!ExitBB)
+ return false;
+
+ for (PHINode &ExitPhi : ExitBB->phis()) {
+ if (ExitPhi.getNumIncomingValues() != 1)
+ break;
+
+ BasicBlock *Pred = ExitPhi.getIncomingBlock(0);
+ Value *IVNext = ExitPhi.getIncomingValueForBlock(Pred);
+ // One use would be the exit phi node, and there should be only one other
+ // use for this to be considered.
+ if (IVNext->getNumUses() == 2)
+ return true;
+ }
+ return false;
+}
+
/// Rewrite all the fixup locations with new values, following the chosen
/// solution.
void LSRInstance::ImplementSolution(
@@ -5894,40 +5915,57 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
}
namespace {
+
+/// Enables more convenient iteration over a DWARF expression vector.
+static iterator_range<llvm::DIExpression::expr_op_iterator>
+ToDwarfOpIter(SmallVectorImpl<uint64_t> &Expr) {
+ llvm::DIExpression::expr_op_iterator Begin =
+ llvm::DIExpression::expr_op_iterator(Expr.begin());
+ llvm::DIExpression::expr_op_iterator End =
+ llvm::DIExpression::expr_op_iterator(Expr.end());
+ return {Begin, End};
+}
+
struct SCEVDbgValueBuilder {
SCEVDbgValueBuilder() = default;
- SCEVDbgValueBuilder(const SCEVDbgValueBuilder &Base) {
- Values = Base.Values;
+ SCEVDbgValueBuilder(const SCEVDbgValueBuilder &Base) { clone(Base); }
+
+ void clone(const SCEVDbgValueBuilder &Base) {
+ LocationOps = Base.LocationOps;
Expr = Base.Expr;
}
+ void clear() {
+ LocationOps.clear();
+ Expr.clear();
+ }
+
/// The DIExpression as we translate the SCEV.
SmallVector<uint64_t, 6> Expr;
/// The location ops of the DIExpression.
- SmallVector<llvm::ValueAsMetadata *, 2> Values;
+ SmallVector<Value *, 2> LocationOps;
void pushOperator(uint64_t Op) { Expr.push_back(Op); }
void pushUInt(uint64_t Operand) { Expr.push_back(Operand); }
/// Add a DW_OP_LLVM_arg to the expression, followed by the index of the value
/// in the set of values referenced by the expression.
- void pushValue(llvm::Value *V) {
+ void pushLocation(llvm::Value *V) {
Expr.push_back(llvm::dwarf::DW_OP_LLVM_arg);
- auto *It =
- std::find(Values.begin(), Values.end(), llvm::ValueAsMetadata::get(V));
+ auto *It = std::find(LocationOps.begin(), LocationOps.end(), V);
unsigned ArgIndex = 0;
- if (It != Values.end()) {
- ArgIndex = std::distance(Values.begin(), It);
+ if (It != LocationOps.end()) {
+ ArgIndex = std::distance(LocationOps.begin(), It);
} else {
- ArgIndex = Values.size();
- Values.push_back(llvm::ValueAsMetadata::get(V));
+ ArgIndex = LocationOps.size();
+ LocationOps.push_back(V);
}
Expr.push_back(ArgIndex);
}
void pushValue(const SCEVUnknown *U) {
llvm::Value *V = cast<SCEVUnknown>(U)->getValue();
- pushValue(V);
+ pushLocation(V);
}
bool pushConst(const SCEVConstant *C) {
@@ -5938,6 +5976,12 @@ struct SCEVDbgValueBuilder {
return true;
}
+ // Iterating the expression as DWARF ops is convenient when updating
+ // DWARF_OP_LLVM_args.
+ iterator_range<llvm::DIExpression::expr_op_iterator> expr_ops() {
+ return ToDwarfOpIter(Expr);
+ }
+
/// Several SCEV types are sequences of the same arithmetic operator applied
/// to constants and values that may be extended or truncated.
bool pushArithmeticExpr(const llvm::SCEVCommutativeExpr *CommExpr,
@@ -5979,7 +6023,7 @@ struct SCEVDbgValueBuilder {
} else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
if (!U->getValue())
return false;
- pushValue(U->getValue());
+ pushLocation(U->getValue());
} else if (const SCEVMulExpr *MulRec = dyn_cast<SCEVMulExpr>(S)) {
Success &= pushArithmeticExpr(MulRec, llvm::dwarf::DW_OP_mul);
@@ -6010,52 +6054,6 @@ struct SCEVDbgValueBuilder {
return Success;
}
- void setFinalExpression(llvm::DbgValueInst &DI, const DIExpression *OldExpr) {
- // Re-state assumption that this dbg.value is not variadic. Any remaining
- // opcodes in its expression operate on a single value already on the
- // expression stack. Prepend our operations, which will re-compute and
- // place that value on the expression stack.
- assert(!DI.hasArgList());
- auto *NewExpr =
- DIExpression::prependOpcodes(OldExpr, Expr, /*StackValue*/ true);
- DI.setExpression(NewExpr);
-
- auto ValArrayRef = llvm::ArrayRef<llvm::ValueAsMetadata *>(Values);
- DI.setRawLocation(llvm::DIArgList::get(DI.getContext(), ValArrayRef));
- }
-
- /// If a DVI can be emitted without a DIArgList, omit DW_OP_llvm_arg and the
- /// location op index 0.
- void setShortFinalExpression(llvm::DbgValueInst &DI,
- const DIExpression *OldExpr) {
- assert((Expr[0] == llvm::dwarf::DW_OP_LLVM_arg && Expr[1] == 0) &&
- "Expected DW_OP_llvm_arg and 0.");
- DI.replaceVariableLocationOp(
- 0u, llvm::MetadataAsValue::get(DI.getContext(), Values[0]));
-
- // See setFinalExpression: prepend our opcodes on the start of any old
- // expression opcodes.
- assert(!DI.hasArgList());
- llvm::SmallVector<uint64_t, 6> FinalExpr(llvm::drop_begin(Expr, 2));
- auto *NewExpr =
- DIExpression::prependOpcodes(OldExpr, FinalExpr, /*StackValue*/ true);
- DI.setExpression(NewExpr);
- }
-
- /// Once the IV and variable SCEV translation is complete, write it to the
- /// source DVI.
- void applyExprToDbgValue(llvm::DbgValueInst &DI,
- const DIExpression *OldExpr) {
- assert(!Expr.empty() && "Unexpected empty expression.");
- // Emit a simpler form if only a single location is referenced.
- if (Values.size() == 1 && Expr[0] == llvm::dwarf::DW_OP_LLVM_arg &&
- Expr[1] == 0) {
- setShortFinalExpression(DI, OldExpr);
- } else {
- setFinalExpression(DI, OldExpr);
- }
- }
-
/// Return true if the combination of arithmetic operator and underlying
/// SCEV constant value is an identity function.
bool isIdentityFunction(uint64_t Op, const SCEV *S) {
@@ -6104,6 +6102,48 @@ struct SCEVDbgValueBuilder {
return true;
}
+ /// Create an expression that is an offset from a value (usually the IV).
+ void createOffsetExpr(int64_t Offset, Value *OffsetValue) {
+ pushLocation(OffsetValue);
+ DIExpression::appendOffset(Expr, Offset);
+ LLVM_DEBUG(
+ dbgs() << "scev-salvage: Generated IV offset expression. Offset: "
+ << std::to_string(Offset) << "\n");
+ }
+
+ /// Combine a translation of the SCEV and the IV to create an expression that
+ /// recovers a location's value.
+ /// returns true if an expression was created.
+ bool createIterCountExpr(const SCEV *S,
+ const SCEVDbgValueBuilder &IterationCount,
+ ScalarEvolution &SE) {
+ // SCEVs for SSA values are most frquently of the form
+ // {start,+,stride}, but sometimes they are ({start,+,stride} + %a + ..).
+ // This is because %a is a PHI node that is not the IV. However, these
+ // SCEVs have not been observed to result in debuginfo-lossy optimisations,
+ // so its not expected this point will be reached.
+ if (!isa<SCEVAddRecExpr>(S))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "scev-salvage: Location to salvage SCEV: " << *S
+ << '\n');
+
+ const auto *Rec = cast<SCEVAddRecExpr>(S);
+ if (!Rec->isAffine())
+ return false;
+
+ if (S->getExpressionSize() > MaxSCEVSalvageExpressionSize)
+ return false;
+
+ // Initialise a new builder with the iteration count expression. In
+ // combination with the value's SCEV this enables recovery.
+ clone(IterationCount);
+ if (!SCEVToValueExpr(*Rec, SE))
+ return false;
+
+ return true;
+ }
+
/// Convert a SCEV of a value to a DIExpression that is pushed onto the
/// builder's expression stack. The stack should already contain an
/// expression for the iteration count, so that it can be multiplied by
@@ -6133,74 +6173,294 @@ struct SCEVDbgValueBuilder {
}
return true;
}
+
+ // Append the current expression and locations to a location list and an
+ // expression list. Modify the DW_OP_LLVM_arg indexes to account for
+ // the locations already present in the destination list.
+ void appendToVectors(SmallVectorImpl<uint64_t> &DestExpr,
+ SmallVectorImpl<Value *> &DestLocations) {
+ assert(!DestLocations.empty() &&
+ "Expected the locations vector to contain the IV");
+ // The DWARF_OP_LLVM_arg arguments of the expression being appended must be
+ // modified to account for the locations already in the destination vector.
+ // All builders contain the IV as the first location op.
+ assert(!LocationOps.empty() &&
+ "Expected the location ops to contain the IV.");
+ // DestIndexMap[n] contains the index in DestLocations for the nth
+ // location in this SCEVDbgValueBuilder.
+ SmallVector<uint64_t, 2> DestIndexMap;
+ for (const auto &Op : LocationOps) {
+ auto It = find(DestLocations, Op);
+ if (It != DestLocations.end()) {
+ // Location already exists in DestLocations, reuse existing ArgIndex.
+ DestIndexMap.push_back(std::distance(DestLocations.begin(), It));
+ continue;
+ }
+ // Location is not in DestLocations, add it.
+ DestIndexMap.push_back(DestLocations.size());
+ DestLocations.push_back(Op);
+ }
+
+ for (const auto &Op : expr_ops()) {
+ if (Op.getOp() != dwarf::DW_OP_LLVM_arg) {
+ Op.appendToVector(DestExpr);
+ continue;
+ }
+
+ DestExpr.push_back(dwarf::DW_OP_LLVM_arg);
+ // `DW_OP_LLVM_arg n` represents the nth LocationOp in this SCEV,
+ // DestIndexMap[n] contains its new index in DestLocations.
+ uint64_t NewIndex = DestIndexMap[Op.getArg(0)];
+ DestExpr.push_back(NewIndex);
+ }
+ }
};
+/// Holds all the required data to salvage a dbg.value using the pre-LSR SCEVs
+/// and DIExpression.
struct DVIRecoveryRec {
+ DVIRecoveryRec(DbgValueInst *DbgValue)
+ : DVI(DbgValue), Expr(DbgValue->getExpression()),
+ HadLocationArgList(false) {}
+
DbgValueInst *DVI;
DIExpression *Expr;
- Metadata *LocationOp;
- const llvm::SCEV *SCEV;
+ bool HadLocationArgList;
+ SmallVector<WeakVH, 2> LocationOps;
+ SmallVector<const llvm::SCEV *, 2> SCEVs;
+ SmallVector<std::unique_ptr<SCEVDbgValueBuilder>, 2> RecoveryExprs;
+
+ void clear() {
+ for (auto &RE : RecoveryExprs)
+ RE.reset();
+ RecoveryExprs.clear();
+ }
+
+ ~DVIRecoveryRec() { clear(); }
};
} // namespace
-static void RewriteDVIUsingIterCount(DVIRecoveryRec CachedDVI,
- const SCEVDbgValueBuilder &IterationCount,
- ScalarEvolution &SE) {
- // LSR may add locations to previously single location-op DVIs which
- // are currently not supported.
- if (CachedDVI.DVI->getNumVariableLocationOps() != 1)
- return;
+/// Returns the total number of DW_OP_llvm_arg operands in the expression.
+/// This helps in determining if a DIArglist is necessary or can be omitted from
+/// the dbg.value.
+static unsigned numLLVMArgOps(SmallVectorImpl<uint64_t> &Expr) {
+ auto expr_ops = ToDwarfOpIter(Expr);
+ unsigned Count = 0;
+ for (auto Op : expr_ops)
+ if (Op.getOp() == dwarf::DW_OP_LLVM_arg)
+ Count++;
+ return Count;
+}
+
+/// Overwrites DVI with the location and Ops as the DIExpression. This will
+/// create an invalid expression if Ops has any dwarf::DW_OP_llvm_arg operands,
+/// because a DIArglist is not created for the first argument of the dbg.value.
+static void updateDVIWithLocation(DbgValueInst &DVI, Value *Location,
+ SmallVectorImpl<uint64_t> &Ops) {
+ assert(
+ numLLVMArgOps(Ops) == 0 &&
+ "Expected expression that does not contain any DW_OP_llvm_arg operands.");
+ DVI.setRawLocation(ValueAsMetadata::get(Location));
+ DVI.setExpression(DIExpression::get(DVI.getContext(), Ops));
+}
+
+/// Overwrite DVI with locations placed into a DIArglist.
+static void updateDVIWithLocations(DbgValueInst &DVI,
+ SmallVectorImpl<Value *> &Locations,
+ SmallVectorImpl<uint64_t> &Ops) {
+ assert(numLLVMArgOps(Ops) != 0 &&
+ "Expected expression that references DIArglist locations using "
+ "DW_OP_llvm_arg operands.");
+ SmallVector<ValueAsMetadata *, 3> MetadataLocs;
+ for (Value *V : Locations)
+ MetadataLocs.push_back(ValueAsMetadata::get(V));
+ auto ValArrayRef = llvm::ArrayRef<llvm::ValueAsMetadata *>(MetadataLocs);
+ DVI.setRawLocation(llvm::DIArgList::get(DVI.getContext(), ValArrayRef));
+ DVI.setExpression(DIExpression::get(DVI.getContext(), Ops));
+}
+
+/// Write the new expression and new location ops for the dbg.value. If possible
+/// reduce the szie of the dbg.value intrinsic by omitting DIArglist. This
+/// can be omitted if:
+/// 1. There is only a single location, refenced by a single DW_OP_llvm_arg.
+/// 2. The DW_OP_LLVM_arg is the first operand in the expression.
+static void UpdateDbgValueInst(DVIRecoveryRec &DVIRec,
+ SmallVectorImpl<Value *> &NewLocationOps,
+ SmallVectorImpl<uint64_t> &NewExpr) {
+ unsigned NumLLVMArgs = numLLVMArgOps(NewExpr);
+ if (NumLLVMArgs == 0) {
+ // Location assumed to be on the stack.
+ updateDVIWithLocation(*DVIRec.DVI, NewLocationOps[0], NewExpr);
+ } else if (NumLLVMArgs == 1 && NewExpr[0] == dwarf::DW_OP_LLVM_arg) {
+ // There is only a single DW_OP_llvm_arg at the start of the expression,
+ // so it can be omitted along with DIArglist.
+ assert(NewExpr[1] == 0 &&
+ "Lone LLVM_arg in a DIExpression should refer to location-op 0.");
+ llvm::SmallVector<uint64_t, 6> ShortenedOps(llvm::drop_begin(NewExpr, 2));
+ updateDVIWithLocation(*DVIRec.DVI, NewLocationOps[0], ShortenedOps);
+ } else {
+ // Multiple DW_OP_llvm_arg, so DIArgList is strictly necessary.
+ updateDVIWithLocations(*DVIRec.DVI, NewLocationOps, NewExpr);
+ }
+
+ // If the DIExpression was previously empty then add the stack terminator.
+ // Non-empty expressions have only had elements inserted into them and so the
+ // terminator should already be present e.g. stack_value or fragment.
+ DIExpression *SalvageExpr = DVIRec.DVI->getExpression();
+ if (!DVIRec.Expr->isComplex() && SalvageExpr->isComplex()) {
+ SalvageExpr = DIExpression::append(SalvageExpr, {dwarf::DW_OP_stack_value});
+ DVIRec.DVI->setExpression(SalvageExpr);
+ }
+}
+
+/// Cached location ops may be erased during LSR, in which case an undef is
+/// required when restoring from the cache. The type of that location is no
+/// longer available, so just use int8. The undef will be replaced by one or
+/// more locations later when a SCEVDbgValueBuilder selects alternative
+/// locations to use for the salvage.
+static Value *getValueOrUndef(WeakVH &VH, LLVMContext &C) {
+ return (VH) ? VH : UndefValue::get(llvm::Type::getInt8Ty(C));
+}
+
+/// Restore the DVI's pre-LSR arguments. Substitute undef for any erased values.
+static void restorePreTransformState(DVIRecoveryRec &DVIRec) {
+ LLVM_DEBUG(dbgs() << "scev-salvage: restore dbg.value to pre-LSR state\n"
+ << "scev-salvage: post-LSR: " << *DVIRec.DVI << '\n');
+ assert(DVIRec.Expr && "Expected an expression");
+ DVIRec.DVI->setExpression(DVIRec.Expr);
+
+ // Even a single location-op may be inside a DIArgList and referenced with
+ // DW_OP_LLVM_arg, which is valid only with a DIArgList.
+ if (!DVIRec.HadLocationArgList) {
+ assert(DVIRec.LocationOps.size() == 1 &&
+ "Unexpected number of location ops.");
+ // LSR's unsuccessful salvage attempt may have added DIArgList, which in
+ // this case was not present before, so force the location back to a single
+ // uncontained Value.
+ Value *CachedValue =
+ getValueOrUndef(DVIRec.LocationOps[0], DVIRec.DVI->getContext());
+ DVIRec.DVI->setRawLocation(ValueAsMetadata::get(CachedValue));
+ } else {
+ SmallVector<ValueAsMetadata *, 3> MetadataLocs;
+ for (WeakVH VH : DVIRec.LocationOps) {
+ Value *CachedValue = getValueOrUndef(VH, DVIRec.DVI->getContext());
+ MetadataLocs.push_back(ValueAsMetadata::get(CachedValue));
+ }
+ auto ValArrayRef = llvm::ArrayRef<llvm::ValueAsMetadata *>(MetadataLocs);
+ DVIRec.DVI->setRawLocation(
+ llvm::DIArgList::get(DVIRec.DVI->getContext(), ValArrayRef));
+ }
+ LLVM_DEBUG(dbgs() << "scev-salvage: pre-LSR: " << *DVIRec.DVI << '\n');
+}
- // SCEVs for SSA values are most frquently of the form
- // {start,+,stride}, but sometimes they are ({start,+,stride} + %a + ..).
- // This is because %a is a PHI node that is not the IV. However, these
- // SCEVs have not been observed to result in debuginfo-lossy optimisations,
- // so its not expected this point will be reached.
- if (!isa<SCEVAddRecExpr>(CachedDVI.SCEV))
- return;
+static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE,
+ llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec,
+ const SCEV *SCEVInductionVar,
+ SCEVDbgValueBuilder IterCountExpr) {
+ if (!DVIRec.DVI->isUndef())
+ return false;
- LLVM_DEBUG(dbgs() << "scev-salvage: Value to salvage SCEV: "
- << *CachedDVI.SCEV << '\n');
+ // LSR may have caused several changes to the dbg.value in the failed salvage
+ // attempt. So restore the DIExpression, the location ops and also the
+ // location ops format, which is always DIArglist for multiple ops, but only
+ // sometimes for a single op.
+ restorePreTransformState(DVIRec);
+
+ // LocationOpIndexMap[i] will store the post-LSR location index of
+ // the non-optimised out location at pre-LSR index i.
+ SmallVector<int64_t, 2> LocationOpIndexMap;
+ LocationOpIndexMap.assign(DVIRec.LocationOps.size(), -1);
+ SmallVector<Value *, 2> NewLocationOps;
+ NewLocationOps.push_back(LSRInductionVar);
+
+ for (unsigned i = 0; i < DVIRec.LocationOps.size(); i++) {
+ WeakVH VH = DVIRec.LocationOps[i];
+ // Place the locations not optimised out in the list first, avoiding
+ // inserts later. The map is used to update the DIExpression's
+ // DW_OP_LLVM_arg arguments as the expression is updated.
+ if (VH && !isa<UndefValue>(VH)) {
+ NewLocationOps.push_back(VH);
+ LocationOpIndexMap[i] = NewLocationOps.size() - 1;
+ LLVM_DEBUG(dbgs() << "scev-salvage: Location index " << i
+ << " now at index " << LocationOpIndexMap[i] << "\n");
+ continue;
+ }
- const auto *Rec = cast<SCEVAddRecExpr>(CachedDVI.SCEV);
- if (!Rec->isAffine())
- return;
+ // It's possible that a value referred to in the SCEV may have been
+ // optimised out by LSR.
+ if (SE.containsErasedValue(DVIRec.SCEVs[i]) ||
+ SE.containsUndefs(DVIRec.SCEVs[i])) {
+ LLVM_DEBUG(dbgs() << "scev-salvage: SCEV for location at index: " << i
+ << " refers to a location that is now undef or erased. "
+ "Salvage abandoned.\n");
+ return false;
+ }
- if (CachedDVI.SCEV->getExpressionSize() > MaxSCEVSalvageExpressionSize)
- return;
+ LLVM_DEBUG(dbgs() << "scev-salvage: salvaging location at index " << i
+ << " with SCEV: " << *DVIRec.SCEVs[i] << "\n");
+
+ DVIRec.RecoveryExprs[i] = std::make_unique<SCEVDbgValueBuilder>();
+ SCEVDbgValueBuilder *SalvageExpr = DVIRec.RecoveryExprs[i].get();
+
+ // Create an offset-based salvage expression if possible, as it requires
+ // less DWARF ops than an iteration count-based expression.
+ if (Optional<APInt> Offset =
+ SE.computeConstantDifference(DVIRec.SCEVs[i], SCEVInductionVar)) {
+ if (Offset.getValue().getMinSignedBits() <= 64)
+ SalvageExpr->createOffsetExpr(Offset.getValue().getSExtValue(),
+ LSRInductionVar);
+ } else if (!SalvageExpr->createIterCountExpr(DVIRec.SCEVs[i], IterCountExpr,
+ SE))
+ return false;
+ }
- // Initialise a new builder with the iteration count expression. In
- // combination with the value's SCEV this enables recovery.
- SCEVDbgValueBuilder RecoverValue(IterationCount);
- if (!RecoverValue.SCEVToValueExpr(*Rec, SE))
- return;
+ // Merge the DbgValueBuilder generated expressions and the original
+ // DIExpression, place the result into an new vector.
+ SmallVector<uint64_t, 3> NewExpr;
+ if (DVIRec.Expr->getNumElements() == 0) {
+ assert(DVIRec.RecoveryExprs.size() == 1 &&
+ "Expected only a single recovery expression for an empty "
+ "DIExpression.");
+ assert(DVIRec.RecoveryExprs[0] &&
+ "Expected a SCEVDbgSalvageBuilder for location 0");
+ SCEVDbgValueBuilder *B = DVIRec.RecoveryExprs[0].get();
+ B->appendToVectors(NewExpr, NewLocationOps);
+ }
+ for (const auto &Op : DVIRec.Expr->expr_ops()) {
+ // Most Ops needn't be updated.
+ if (Op.getOp() != dwarf::DW_OP_LLVM_arg) {
+ Op.appendToVector(NewExpr);
+ continue;
+ }
- LLVM_DEBUG(dbgs() << "scev-salvage: Updating: " << *CachedDVI.DVI << '\n');
- RecoverValue.applyExprToDbgValue(*CachedDVI.DVI, CachedDVI.Expr);
- LLVM_DEBUG(dbgs() << "scev-salvage: to: " << *CachedDVI.DVI << '\n');
-}
+ uint64_t LocationArgIndex = Op.getArg(0);
+ SCEVDbgValueBuilder *DbgBuilder =
+ DVIRec.RecoveryExprs[LocationArgIndex].get();
+ // The location doesn't have s SCEVDbgValueBuilder, so LSR did not
+ // optimise it away. So just translate the argument to the updated
+ // location index.
+ if (!DbgBuilder) {
+ NewExpr.push_back(dwarf::DW_OP_LLVM_arg);
+ assert(LocationOpIndexMap[Op.getArg(0)] != -1 &&
+ "Expected a positive index for the location-op position.");
+ NewExpr.push_back(LocationOpIndexMap[Op.getArg(0)]);
+ continue;
+ }
+ // The location has a recovery expression.
+ DbgBuilder->appendToVectors(NewExpr, NewLocationOps);
+ }
-static void RewriteDVIUsingOffset(DVIRecoveryRec &DVIRec, llvm::PHINode &IV,
- int64_t Offset) {
- assert(!DVIRec.DVI->hasArgList() && "Expected single location-op dbg.value.");
- DbgValueInst *DVI = DVIRec.DVI;
- SmallVector<uint64_t, 8> Ops;
- DIExpression::appendOffset(Ops, Offset);
- DIExpression *Expr = DIExpression::prependOpcodes(DVIRec.Expr, Ops, true);
- LLVM_DEBUG(dbgs() << "scev-salvage: Updating: " << *DVIRec.DVI << '\n');
- DVI->setExpression(Expr);
- llvm::Value *ValIV = dyn_cast<llvm::Value>(&IV);
- DVI->replaceVariableLocationOp(
- 0u, llvm::MetadataAsValue::get(DVI->getContext(),
- llvm::ValueAsMetadata::get(ValIV)));
- LLVM_DEBUG(dbgs() << "scev-salvage: updated with offset to IV: "
- << *DVIRec.DVI << '\n');
+ UpdateDbgValueInst(DVIRec, NewLocationOps, NewExpr);
+ LLVM_DEBUG(dbgs() << "scev-salvage: Updated DVI: " << *DVIRec.DVI << "\n");
+ return true;
}
+/// Obtain an expression for the iteration count, then attempt to salvage the
+/// dbg.value intrinsics.
static void
DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE,
llvm::PHINode *LSRInductionVar,
- SmallVector<DVIRecoveryRec, 2> &DVIToUpdate) {
+ SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &DVIToUpdate) {
if (DVIToUpdate.empty())
return;
@@ -6213,49 +6473,22 @@ DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE,
if (!IVAddRec->isAffine())
return;
+ // Prevent translation using excessive resources.
if (IVAddRec->getExpressionSize() > MaxSCEVSalvageExpressionSize)
return;
// The iteration count is required to recover location values.
SCEVDbgValueBuilder IterCountExpr;
- IterCountExpr.pushValue(LSRInductionVar);
+ IterCountExpr.pushLocation(LSRInductionVar);
if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE))
return;
LLVM_DEBUG(dbgs() << "scev-salvage: IV SCEV: " << *SCEVInductionVar
<< '\n');
- // Needn't salvage if the location op hasn't been undef'd by LSR.
for (auto &DVIRec : DVIToUpdate) {
- if (!DVIRec.DVI->isUndef())
- continue;
-
- // Some DVIs that were single location-op when cached are now multi-op,
- // due to LSR optimisations. However, multi-op salvaging is not yet
- // supported by SCEV salvaging. But, we can attempt a salvage by restoring
- // the pre-LSR single-op expression.
- if (DVIRec.DVI->hasArgList()) {
- if (!DVIRec.DVI->getVariableLocationOp(0))
- continue;
- llvm::Type *Ty = DVIRec.DVI->getVariableLocationOp(0)->getType();
- DVIRec.DVI->setRawLocation(
- llvm::ValueAsMetadata::get(UndefValue::get(Ty)));
- DVIRec.DVI->setExpression(DVIRec.Expr);
- }
-
- LLVM_DEBUG(dbgs() << "scev-salvage: value to recover SCEV: "
- << *DVIRec.SCEV << '\n');
-
- // Create a simple expression if the IV and value to salvage SCEVs
- // start values differ by only a constant value.
- if (Optional<APInt> Offset =
- SE.computeConstantDifference(DVIRec.SCEV, SCEVInductionVar)) {
- if (Offset.getValue().getMinSignedBits() <= 64)
- RewriteDVIUsingOffset(DVIRec, *LSRInductionVar,
- Offset.getValue().getSExtValue());
- } else {
- RewriteDVIUsingIterCount(DVIRec, IterCountExpr, SE);
- }
+ SalvageDVI(L, SE, LSRInductionVar, *DVIRec, SCEVInductionVar,
+ IterCountExpr);
}
}
}
@@ -6263,39 +6496,53 @@ DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE,
/// Identify and cache salvageable DVI locations and expressions along with the
/// corresponding SCEV(s). Also ensure that the DVI is not deleted between
/// cacheing and salvaging.
-static void
-DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE,
- SmallVector<DVIRecoveryRec, 2> &SalvageableDVISCEVs,
- SmallSet<AssertingVH<DbgValueInst>, 2> &DVIHandles) {
+static void DbgGatherSalvagableDVI(
+ Loop *L, ScalarEvolution &SE,
+ SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &SalvageableDVISCEVs,
+ SmallSet<AssertingVH<DbgValueInst>, 2> &DVIHandles) {
for (auto &B : L->getBlocks()) {
for (auto &I : *B) {
auto DVI = dyn_cast<DbgValueInst>(&I);
if (!DVI)
continue;
-
+ // Ensure that if any location op is undef that the dbg.vlue is not
+ // cached.
if (DVI->isUndef())
continue;
- if (DVI->hasArgList())
- continue;
+ // Check that the location op SCEVs are suitable for translation to
+ // DIExpression.
+ const auto &HasTranslatableLocationOps =
+ [&](const DbgValueInst *DVI) -> bool {
+ for (const auto LocOp : DVI->location_ops()) {
+ if (!LocOp)
+ return false;
- if (!DVI->getVariableLocationOp(0) ||
- !SE.isSCEVable(DVI->getVariableLocationOp(0)->getType()))
- continue;
+ if (!SE.isSCEVable(LocOp->getType()))
+ return false;
- // SCEVUnknown wraps an llvm::Value, it does not have a start and stride.
- // Therefore no translation to DIExpression is performed.
- const SCEV *S = SE.getSCEV(DVI->getVariableLocationOp(0));
- if (isa<SCEVUnknown>(S))
- continue;
+ const SCEV *S = SE.getSCEV(LocOp);
+ if (SE.containsUndefs(S))
+ return false;
+ }
+ return true;
+ };
- // Avoid wasting resources generating an expression containing undef.
- if (SE.containsUndefs(S))
+ if (!HasTranslatableLocationOps(DVI))
continue;
- SalvageableDVISCEVs.push_back(
- {DVI, DVI->getExpression(), DVI->getRawLocation(),
- SE.getSCEV(DVI->getVariableLocationOp(0))});
+ std::unique_ptr<DVIRecoveryRec> NewRec =
+ std::make_unique<DVIRecoveryRec>(DVI);
+ // Each location Op may need a SCEVDbgValueBuilder in order to recover it.
+ // Pre-allocating a vector will enable quick lookups of the builder later
+ // during the salvage.
+ NewRec->RecoveryExprs.resize(DVI->getNumVariableLocationOps());
+ for (const auto LocOp : DVI->location_ops()) {
+ NewRec->SCEVs.push_back(SE.getSCEV(LocOp));
+ NewRec->LocationOps.push_back(LocOp);
+ NewRec->HadLocationArgList = DVI->hasArgList();
+ }
+ SalvageableDVISCEVs.push_back(std::move(NewRec));
DVIHandles.insert(DVI);
}
}
@@ -6344,9 +6591,9 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
// Debug preservation - before we start removing anything identify which DVI
// meet the salvageable criteria and store their DIExpression and SCEVs.
- SmallVector<DVIRecoveryRec, 2> SalvageableDVI;
+ SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> SalvageableDVIRecords;
SmallSet<AssertingVH<DbgValueInst>, 2> DVIHandles;
- DbgGatherSalvagableDVI(L, SE, SalvageableDVI, DVIHandles);
+ DbgGatherSalvagableDVI(L, SE, SalvageableDVIRecords, DVIHandles);
bool Changed = false;
std::unique_ptr<MemorySSAUpdater> MSSAU;
@@ -6375,8 +6622,26 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get());
}
}
+ // LSR may at times remove all uses of an induction variable from a loop.
+ // The only remaining use is the PHI in the exit block.
+ // When this is the case, if the exit value of the IV can be calculated using
+ // SCEV, we can replace the exit block PHI with the final value of the IV and
+ // skip the updates in each loop iteration.
+ if (L->isRecursivelyLCSSAForm(DT, LI) && LoopExitValHasSingleUse(L)) {
+ SmallVector<WeakTrackingVH, 16> DeadInsts;
+ const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+ SCEVExpander Rewriter(SE, DL, "lsr", false);
+ int Rewrites = rewriteLoopExitValues(L, &LI, &TLI, &SE, &TTI, Rewriter, &DT,
+ OnlyCheapRepl, DeadInsts);
+ if (Rewrites) {
+ Changed = true;
+ RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts, &TLI,
+ MSSAU.get());
+ DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get());
+ }
+ }
- if (SalvageableDVI.empty())
+ if (SalvageableDVIRecords.empty())
return Changed;
// Obtain relevant IVs and attempt to rewrite the salvageable DVIs with
@@ -6384,13 +6649,16 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
// TODO: Allow for multiple IV references for nested AddRecSCEVs
for (auto &L : LI) {
if (llvm::PHINode *IV = GetInductionVariable(*L, SE, Reducer))
- DbgRewriteSalvageableDVIs(L, SE, IV, SalvageableDVI);
+ DbgRewriteSalvageableDVIs(L, SE, IV, SalvageableDVIRecords);
else {
LLVM_DEBUG(dbgs() << "scev-salvage: SCEV salvaging not possible. An IV "
"could not be identified.\n");
}
}
+ for (auto &Rec : SalvageableDVIRecords)
+ Rec->clear();
+ SalvageableDVIRecords.clear();
DVIHandles.clear();
return Changed;
}
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
index 1ecbb86724e1..8c2868563227 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
@@ -22,6 +22,7 @@
#include "llvm/Analysis/DependenceAnalysis.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopNestAnalysis.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
@@ -42,10 +43,8 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils.h"
-#include "llvm/Transforms/Utils/LCSSA.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/LoopPeel.h"
-#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
#include <cassert>
@@ -331,14 +330,23 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
SmallPtrSet<const Value *, 32> EphValues;
CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
Loop *SubLoop = L->getSubLoops()[0];
- unsigned InnerLoopSize =
+ InstructionCost InnerLoopSizeIC =
ApproximateLoopSize(SubLoop, NumInlineCandidates, NotDuplicatable,
Convergent, TTI, EphValues, UP.BEInsns);
- unsigned OuterLoopSize =
+ InstructionCost OuterLoopSizeIC =
ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
TTI, EphValues, UP.BEInsns);
- LLVM_DEBUG(dbgs() << " Outer Loop Size: " << OuterLoopSize << "\n");
- LLVM_DEBUG(dbgs() << " Inner Loop Size: " << InnerLoopSize << "\n");
+ LLVM_DEBUG(dbgs() << " Outer Loop Size: " << OuterLoopSizeIC << "\n");
+ LLVM_DEBUG(dbgs() << " Inner Loop Size: " << InnerLoopSizeIC << "\n");
+
+ if (!InnerLoopSizeIC.isValid() || !OuterLoopSizeIC.isValid()) {
+ LLVM_DEBUG(dbgs() << " Not unrolling loop which contains instructions"
+ << " with invalid cost.\n");
+ return LoopUnrollResult::Unmodified;
+ }
+ unsigned InnerLoopSize = *InnerLoopSizeIC.getValue();
+ unsigned OuterLoopSize = *OuterLoopSizeIC.getValue();
+
if (NotDuplicatable) {
LLVM_DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable "
"instructions.\n");
@@ -364,7 +372,7 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
Optional<MDNode *> NewInnerEpilogueLoopID = makeFollowupLoopID(
OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,
LLVMLoopUnrollAndJamFollowupRemainderInner});
- if (NewInnerEpilogueLoopID.hasValue())
+ if (NewInnerEpilogueLoopID)
SubLoop->setLoopID(NewInnerEpilogueLoopID.getValue());
// Find trip count and trip multiple
@@ -394,14 +402,14 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
Optional<MDNode *> NewOuterEpilogueLoopID = makeFollowupLoopID(
OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,
LLVMLoopUnrollAndJamFollowupRemainderOuter});
- if (NewOuterEpilogueLoopID.hasValue())
+ if (NewOuterEpilogueLoopID)
EpilogueOuterLoop->setLoopID(NewOuterEpilogueLoopID.getValue());
}
Optional<MDNode *> NewInnerLoopID =
makeFollowupLoopID(OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,
LLVMLoopUnrollAndJamFollowupInner});
- if (NewInnerLoopID.hasValue())
+ if (NewInnerLoopID)
SubLoop->setLoopID(NewInnerLoopID.getValue());
else
SubLoop->setLoopID(OrigSubLoopID);
@@ -410,7 +418,7 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
Optional<MDNode *> NewOuterLoopID = makeFollowupLoopID(
OrigOuterLoopID,
{LLVMLoopUnrollAndJamFollowupAll, LLVMLoopUnrollAndJamFollowupOuter});
- if (NewOuterLoopID.hasValue()) {
+ if (NewOuterLoopID) {
L->setLoopID(NewOuterLoopID.getValue());
// Do not setLoopAlreadyUnrolled if a followup was given.
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 9beb2281cf0f..fda86afe5f9d 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -25,7 +25,6 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CodeMetrics.h"
-#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
@@ -133,7 +132,7 @@ static cl::opt<bool> UnrollAllowRemainder(
"when unrolling a loop."));
static cl::opt<bool>
- UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden,
+ UnrollRuntime("unroll-runtime", cl::Hidden,
cl::desc("Unroll loops with run-time trip counts"));
static cl::opt<unsigned> UnrollMaxUpperBound(
@@ -254,19 +253,19 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
UP.MaxIterationsCountToAnalyze = UnrollMaxIterationsCountToAnalyze;
// Apply user values provided by argument
- if (UserThreshold.hasValue()) {
+ if (UserThreshold) {
UP.Threshold = *UserThreshold;
UP.PartialThreshold = *UserThreshold;
}
- if (UserCount.hasValue())
+ if (UserCount)
UP.Count = *UserCount;
- if (UserAllowPartial.hasValue())
+ if (UserAllowPartial)
UP.Partial = *UserAllowPartial;
- if (UserRuntime.hasValue())
+ if (UserRuntime)
UP.Runtime = *UserRuntime;
- if (UserUpperBound.hasValue())
+ if (UserUpperBound)
UP.UpperBound = *UserUpperBound;
- if (UserFullUnrollMaxCount.hasValue())
+ if (UserFullUnrollMaxCount)
UP.FullUnrollMaxCount = *UserFullUnrollMaxCount;
return UP;
@@ -664,7 +663,7 @@ static Optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
}
/// ApproximateLoopSize - Approximate the size of the loop.
-unsigned llvm::ApproximateLoopSize(
+InstructionCost llvm::ApproximateLoopSize(
const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent,
const TargetTransformInfo &TTI,
const SmallPtrSetImpl<const Value *> &EphValues, unsigned BEInsns) {
@@ -675,7 +674,7 @@ unsigned llvm::ApproximateLoopSize(
NotDuplicatable = Metrics.notDuplicatable;
Convergent = Metrics.convergent;
- unsigned LoopSize = Metrics.NumInsts;
+ InstructionCost LoopSize = Metrics.NumInsts;
// Don't allow an estimate of size zero. This would allows unrolling of loops
// with huge iteration counts, which is a compile time problem even if it's
@@ -683,7 +682,9 @@ unsigned llvm::ApproximateLoopSize(
// that each loop has at least three instructions (likely a conditional
// branch, a comparison feeding that branch, and some kind of loop increment
// feeding that comparison instruction).
- LoopSize = std::max(LoopSize, BEInsns + 1);
+ if (LoopSize.isValid() && *LoopSize.getValue() < BEInsns + 1)
+ // This is an open coded max() on InstructionCost
+ LoopSize = BEInsns + 1;
return LoopSize;
}
@@ -788,15 +789,13 @@ shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo,
// 2nd priority is unroll count set by pragma.
if (PInfo.PragmaCount > 0) {
- if ((UP.AllowRemainder || (TripMultiple % PInfo.PragmaCount == 0)) &&
- UCE.getUnrolledLoopSize(UP, PInfo.PragmaCount) < PragmaUnrollThreshold)
+ if ((UP.AllowRemainder || (TripMultiple % PInfo.PragmaCount == 0)))
return PInfo.PragmaCount;
}
- if (PInfo.PragmaFullUnroll && TripCount != 0) {
- if (UCE.getUnrolledLoopSize(UP, TripCount) < PragmaUnrollThreshold)
- return TripCount;
- }
+ if (PInfo.PragmaFullUnroll && TripCount != 0)
+ return TripCount;
+
// if didn't return until here, should continue to other priorties
return None;
}
@@ -912,7 +911,7 @@ bool llvm::computeUnrollCount(
if (PP.PeelCount) {
if (UnrollCount.getNumOccurrences() > 0) {
report_fatal_error("Cannot specify both explicit peel count and "
- "explicit unroll count");
+ "explicit unroll count", /*GenCrashDiag=*/false);
}
UP.Count = 1;
UP.Runtime = false;
@@ -1192,10 +1191,18 @@ static LoopUnrollResult tryToUnrollLoop(
SmallPtrSet<const Value *, 32> EphValues;
CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
- unsigned LoopSize =
+ InstructionCost LoopSizeIC =
ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
TTI, EphValues, UP.BEInsns);
- LLVM_DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
+ LLVM_DEBUG(dbgs() << " Loop Size = " << LoopSizeIC << "\n");
+
+ if (!LoopSizeIC.isValid()) {
+ LLVM_DEBUG(dbgs() << " Not unrolling loop which contains instructions"
+ << " with invalid cost.\n");
+ return LoopUnrollResult::Unmodified;
+ }
+ unsigned LoopSize = *LoopSizeIC.getValue();
+
if (NotDuplicatable) {
LLVM_DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable"
<< " instructions.\n");
@@ -1316,7 +1323,7 @@ static LoopUnrollResult tryToUnrollLoop(
Optional<MDNode *> RemainderLoopID =
makeFollowupLoopID(OrigLoopID, {LLVMLoopUnrollFollowupAll,
LLVMLoopUnrollFollowupRemainder});
- if (RemainderLoopID.hasValue())
+ if (RemainderLoopID)
RemainderLoop->setLoopID(RemainderLoopID.getValue());
}
@@ -1324,7 +1331,7 @@ static LoopUnrollResult tryToUnrollLoop(
Optional<MDNode *> NewLoopID =
makeFollowupLoopID(OrigLoopID, {LLVMLoopUnrollFollowupAll,
LLVMLoopUnrollFollowupUnrolled});
- if (NewLoopID.hasValue()) {
+ if (NewLoopID) {
L->setLoopID(NewLoopID.getValue());
// Do not setLoopAlreadyUnrolled if loop attributes have been specified
@@ -1548,8 +1555,12 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
PreservedAnalyses LoopUnrollPass::run(Function &F,
FunctionAnalysisManager &AM) {
- auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
auto &LI = AM.getResult<LoopAnalysis>(F);
+ // There are no loops in the function. Return before computing other expensive
+ // analyses.
+ if (LI.empty())
+ return PreservedAnalyses::all();
+ auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &AC = AM.getResult<AssumptionAnalysis>(F);
diff --git a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
deleted file mode 100644
index 76bb5497c2c2..000000000000
--- a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ /dev/null
@@ -1,1774 +0,0 @@
-//===- LoopUnswitch.cpp - Hoist loop-invariant conditionals in loop -------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass transforms loops that contain branches on loop-invariant conditions
-// to multiple loops. For example, it turns the left into the right code:
-//
-// for (...) if (lic)
-// A for (...)
-// if (lic) A; B; C
-// B else
-// C for (...)
-// A; C
-//
-// This can increase the size of the code exponentially (doubling it every time
-// a loop is unswitched) so we only unswitch if the resultant code will be
-// smaller than a threshold.
-//
-// This pass expects LICM to be run before it to hoist invariant conditions out
-// of the loop, to make the unswitching opportunity obvious.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/CodeMetrics.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
-#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/LoopIterator.h"
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/MemorySSA.h"
-#include "llvm/Analysis/MemorySSAUpdater.h"
-#include "llvm/Analysis/MustExecute.h"
-#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Constant.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/User.h"
-#include "llvm/IR/Value.h"
-#include "llvm/IR/ValueHandle.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Scalar/LoopPassManager.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/LoopUtils.h"
-#include "llvm/Transforms/Utils/ValueMapper.h"
-#include <algorithm>
-#include <cassert>
-#include <map>
-#include <set>
-#include <tuple>
-#include <utility>
-#include <vector>
-
-using namespace llvm;
-
-#define DEBUG_TYPE "loop-unswitch"
-
-STATISTIC(NumBranches, "Number of branches unswitched");
-STATISTIC(NumSwitches, "Number of switches unswitched");
-STATISTIC(NumGuards, "Number of guards unswitched");
-STATISTIC(NumSelects , "Number of selects unswitched");
-STATISTIC(NumTrivial , "Number of unswitches that are trivial");
-STATISTIC(NumSimplify, "Number of simplifications of unswitched code");
-STATISTIC(TotalInsts, "Total number of instructions analyzed");
-
-// The specific value of 100 here was chosen based only on intuition and a
-// few specific examples.
-static cl::opt<unsigned>
-Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"),
- cl::init(100), cl::Hidden);
-
-static cl::opt<unsigned>
- MSSAThreshold("loop-unswitch-memoryssa-threshold",
- cl::desc("Max number of memory uses to explore during "
- "partial unswitching analysis"),
- cl::init(100), cl::Hidden);
-
-namespace {
-
- class LUAnalysisCache {
- using UnswitchedValsMap =
- DenseMap<const SwitchInst *, SmallPtrSet<const Value *, 8>>;
- using UnswitchedValsIt = UnswitchedValsMap::iterator;
-
- struct LoopProperties {
- unsigned CanBeUnswitchedCount;
- unsigned WasUnswitchedCount;
- unsigned SizeEstimation;
- UnswitchedValsMap UnswitchedVals;
- };
-
- // Here we use std::map instead of DenseMap, since we need to keep valid
- // LoopProperties pointer for current loop for better performance.
- using LoopPropsMap = std::map<const Loop *, LoopProperties>;
- using LoopPropsMapIt = LoopPropsMap::iterator;
-
- LoopPropsMap LoopsProperties;
- UnswitchedValsMap *CurLoopInstructions = nullptr;
- LoopProperties *CurrentLoopProperties = nullptr;
-
- // A loop unswitching with an estimated cost above this threshold
- // is not performed. MaxSize is turned into unswitching quota for
- // the current loop, and reduced correspondingly, though note that
- // the quota is returned by releaseMemory() when the loop has been
- // processed, so that MaxSize will return to its previous
- // value. So in most cases MaxSize will equal the Threshold flag
- // when a new loop is processed. An exception to that is that
- // MaxSize will have a smaller value while processing nested loops
- // that were introduced due to loop unswitching of an outer loop.
- //
- // FIXME: The way that MaxSize works is subtle and depends on the
- // pass manager processing loops and calling releaseMemory() in a
- // specific order. It would be good to find a more straightforward
- // way of doing what MaxSize does.
- unsigned MaxSize;
-
- public:
- LUAnalysisCache() : MaxSize(Threshold) {}
-
- // Analyze loop. Check its size, calculate is it possible to unswitch
- // it. Returns true if we can unswitch this loop.
- bool countLoop(const Loop *L, const TargetTransformInfo &TTI,
- AssumptionCache *AC);
-
- // Clean all data related to given loop.
- void forgetLoop(const Loop *L);
-
- // Mark case value as unswitched.
- // Since SI instruction can be partly unswitched, in order to avoid
- // extra unswitching in cloned loops keep track all unswitched values.
- void setUnswitched(const SwitchInst *SI, const Value *V);
-
- // Check was this case value unswitched before or not.
- bool isUnswitched(const SwitchInst *SI, const Value *V);
-
- // Returns true if another unswitching could be done within the cost
- // threshold.
- bool costAllowsUnswitching();
-
- // Clone all loop-unswitch related loop properties.
- // Redistribute unswitching quotas.
- // Note, that new loop data is stored inside the VMap.
- void cloneData(const Loop *NewLoop, const Loop *OldLoop,
- const ValueToValueMapTy &VMap);
- };
-
- class LoopUnswitch : public LoopPass {
- LoopInfo *LI; // Loop information
- LPPassManager *LPM;
- AssumptionCache *AC;
-
- // Used to check if second loop needs processing after
- // rewriteLoopBodyWithConditionConstant rewrites first loop.
- std::vector<Loop*> LoopProcessWorklist;
-
- LUAnalysisCache BranchesInfo;
-
- bool OptimizeForSize;
- bool RedoLoop = false;
-
- Loop *CurrentLoop = nullptr;
- DominatorTree *DT = nullptr;
- MemorySSA *MSSA = nullptr;
- AAResults *AA = nullptr;
- std::unique_ptr<MemorySSAUpdater> MSSAU;
- BasicBlock *LoopHeader = nullptr;
- BasicBlock *LoopPreheader = nullptr;
-
- bool SanitizeMemory;
- SimpleLoopSafetyInfo SafetyInfo;
-
- // LoopBlocks contains all of the basic blocks of the loop, including the
- // preheader of the loop, the body of the loop, and the exit blocks of the
- // loop, in that order.
- std::vector<BasicBlock*> LoopBlocks;
- // NewBlocks contained cloned copy of basic blocks from LoopBlocks.
- std::vector<BasicBlock*> NewBlocks;
-
- bool HasBranchDivergence;
-
- public:
- static char ID; // Pass ID, replacement for typeid
-
- explicit LoopUnswitch(bool Os = false, bool HasBranchDivergence = false)
- : LoopPass(ID), OptimizeForSize(Os),
- HasBranchDivergence(HasBranchDivergence) {
- initializeLoopUnswitchPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnLoop(Loop *L, LPPassManager &LPM) override;
- bool processCurrentLoop();
- bool isUnreachableDueToPreviousUnswitching(BasicBlock *);
-
- /// This transformation requires natural loop information & requires that
- /// loop preheaders be inserted into the CFG.
- ///
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- // Lazy BFI and BPI are marked as preserved here so Loop Unswitching
- // can remain part of the same loop pass as LICM
- AU.addPreserved<LazyBlockFrequencyInfoPass>();
- AU.addPreserved<LazyBranchProbabilityInfoPass>();
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.addRequired<MemorySSAWrapperPass>();
- AU.addPreserved<MemorySSAWrapperPass>();
- if (HasBranchDivergence)
- AU.addRequired<LegacyDivergenceAnalysis>();
- getLoopAnalysisUsage(AU);
- }
-
- private:
- void releaseMemory() override { BranchesInfo.forgetLoop(CurrentLoop); }
-
- void initLoopData() {
- LoopHeader = CurrentLoop->getHeader();
- LoopPreheader = CurrentLoop->getLoopPreheader();
- }
-
- /// Split all of the edges from inside the loop to their exit blocks.
- /// Update the appropriate Phi nodes as we do so.
- void splitExitEdges(Loop *L,
- const SmallVectorImpl<BasicBlock *> &ExitBlocks);
-
- bool tryTrivialLoopUnswitch(bool &Changed);
-
- bool unswitchIfProfitable(Value *LoopCond, Constant *Val,
- Instruction *TI = nullptr,
- ArrayRef<Instruction *> ToDuplicate = {});
- void unswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
- BasicBlock *ExitBlock, Instruction *TI);
- void unswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L,
- Instruction *TI,
- ArrayRef<Instruction *> ToDuplicate = {});
-
- void rewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
- Constant *Val, bool IsEqual);
-
- void
- emitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
- BasicBlock *TrueDest, BasicBlock *FalseDest,
- BranchInst *OldBranch, Instruction *TI,
- ArrayRef<Instruction *> ToDuplicate = {});
-
- void simplifyCode(std::vector<Instruction *> &Worklist, Loop *L);
-
- /// Given that the Invariant is not equal to Val. Simplify instructions
- /// in the loop.
- Value *simplifyInstructionWithNotEqual(Instruction *Inst, Value *Invariant,
- Constant *Val);
- };
-
-} // end anonymous namespace
-
-// Analyze loop. Check its size, calculate is it possible to unswitch
-// it. Returns true if we can unswitch this loop.
-bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI,
- AssumptionCache *AC) {
- LoopPropsMapIt PropsIt;
- bool Inserted;
- std::tie(PropsIt, Inserted) =
- LoopsProperties.insert(std::make_pair(L, LoopProperties()));
-
- LoopProperties &Props = PropsIt->second;
-
- if (Inserted) {
- // New loop.
-
- // Limit the number of instructions to avoid causing significant code
- // expansion, and the number of basic blocks, to avoid loops with
- // large numbers of branches which cause loop unswitching to go crazy.
- // This is a very ad-hoc heuristic.
-
- SmallPtrSet<const Value *, 32> EphValues;
- CodeMetrics::collectEphemeralValues(L, AC, EphValues);
-
- // FIXME: This is overly conservative because it does not take into
- // consideration code simplification opportunities and code that can
- // be shared by the resultant unswitched loops.
- CodeMetrics Metrics;
- for (BasicBlock *BB : L->blocks())
- Metrics.analyzeBasicBlock(BB, TTI, EphValues);
-
- Props.SizeEstimation = Metrics.NumInsts;
- Props.CanBeUnswitchedCount = MaxSize / (Props.SizeEstimation);
- Props.WasUnswitchedCount = 0;
- MaxSize -= Props.SizeEstimation * Props.CanBeUnswitchedCount;
-
- if (Metrics.notDuplicatable) {
- LLVM_DEBUG(dbgs() << "NOT unswitching loop %" << L->getHeader()->getName()
- << ", contents cannot be "
- << "duplicated!\n");
- return false;
- }
- }
-
- // Be careful. This links are good only before new loop addition.
- CurrentLoopProperties = &Props;
- CurLoopInstructions = &Props.UnswitchedVals;
-
- return true;
-}
-
-// Clean all data related to given loop.
-void LUAnalysisCache::forgetLoop(const Loop *L) {
- LoopPropsMapIt LIt = LoopsProperties.find(L);
-
- if (LIt != LoopsProperties.end()) {
- LoopProperties &Props = LIt->second;
- MaxSize += (Props.CanBeUnswitchedCount + Props.WasUnswitchedCount) *
- Props.SizeEstimation;
- LoopsProperties.erase(LIt);
- }
-
- CurrentLoopProperties = nullptr;
- CurLoopInstructions = nullptr;
-}
-
-// Mark case value as unswitched.
-// Since SI instruction can be partly unswitched, in order to avoid
-// extra unswitching in cloned loops keep track all unswitched values.
-void LUAnalysisCache::setUnswitched(const SwitchInst *SI, const Value *V) {
- (*CurLoopInstructions)[SI].insert(V);
-}
-
-// Check was this case value unswitched before or not.
-bool LUAnalysisCache::isUnswitched(const SwitchInst *SI, const Value *V) {
- return (*CurLoopInstructions)[SI].count(V);
-}
-
-bool LUAnalysisCache::costAllowsUnswitching() {
- return CurrentLoopProperties->CanBeUnswitchedCount > 0;
-}
-
-// Clone all loop-unswitch related loop properties.
-// Redistribute unswitching quotas.
-// Note, that new loop data is stored inside the VMap.
-void LUAnalysisCache::cloneData(const Loop *NewLoop, const Loop *OldLoop,
- const ValueToValueMapTy &VMap) {
- LoopProperties &NewLoopProps = LoopsProperties[NewLoop];
- LoopProperties &OldLoopProps = *CurrentLoopProperties;
- UnswitchedValsMap &Insts = OldLoopProps.UnswitchedVals;
-
- // Reallocate "can-be-unswitched quota"
-
- --OldLoopProps.CanBeUnswitchedCount;
- ++OldLoopProps.WasUnswitchedCount;
- NewLoopProps.WasUnswitchedCount = 0;
- unsigned Quota = OldLoopProps.CanBeUnswitchedCount;
- NewLoopProps.CanBeUnswitchedCount = Quota / 2;
- OldLoopProps.CanBeUnswitchedCount = Quota - Quota / 2;
-
- NewLoopProps.SizeEstimation = OldLoopProps.SizeEstimation;
-
- // Clone unswitched values info:
- // for new loop switches we clone info about values that was
- // already unswitched and has redundant successors.
- for (const auto &I : Insts) {
- const SwitchInst *OldInst = I.first;
- Value *NewI = VMap.lookup(OldInst);
- const SwitchInst *NewInst = cast_or_null<SwitchInst>(NewI);
- assert(NewInst && "All instructions that are in SrcBB must be in VMap.");
-
- NewLoopProps.UnswitchedVals[NewInst] = OldLoopProps.UnswitchedVals[OldInst];
- }
-}
-
-char LoopUnswitch::ID = 0;
-
-INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops",
- false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(LoopPass)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
-INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
-INITIALIZE_PASS_END(LoopUnswitch, "loop-unswitch", "Unswitch loops",
- false, false)
-
-Pass *llvm::createLoopUnswitchPass(bool Os, bool HasBranchDivergence) {
- return new LoopUnswitch(Os, HasBranchDivergence);
-}
-
-/// Operator chain lattice.
-enum OperatorChain {
- OC_OpChainNone, ///< There is no operator.
- OC_OpChainOr, ///< There are only ORs.
- OC_OpChainAnd, ///< There are only ANDs.
- OC_OpChainMixed ///< There are ANDs and ORs.
-};
-
-/// Cond is a condition that occurs in L. If it is invariant in the loop, or has
-/// an invariant piece, return the invariant. Otherwise, return null.
-//
-/// NOTE: findLIVLoopCondition will not return a partial LIV by walking up a
-/// mixed operator chain, as we can not reliably find a value which will
-/// simplify the operator chain. If the chain is AND-only or OR-only, we can use
-/// 0 or ~0 to simplify the chain.
-///
-/// NOTE: In case a partial LIV and a mixed operator chain, we may be able to
-/// simplify the condition itself to a loop variant condition, but at the
-/// cost of creating an entirely new loop.
-static Value *findLIVLoopCondition(Value *Cond, Loop *L, bool &Changed,
- OperatorChain &ParentChain,
- DenseMap<Value *, Value *> &Cache,
- MemorySSAUpdater *MSSAU) {
- auto CacheIt = Cache.find(Cond);
- if (CacheIt != Cache.end())
- return CacheIt->second;
-
- // We started analyze new instruction, increment scanned instructions counter.
- ++TotalInsts;
-
- // We can never unswitch on vector conditions.
- if (Cond->getType()->isVectorTy())
- return nullptr;
-
- // Constants should be folded, not unswitched on!
- if (isa<Constant>(Cond)) return nullptr;
-
- // TODO: Handle: br (VARIANT|INVARIANT).
-
- // Hoist simple values out.
- if (L->makeLoopInvariant(Cond, Changed, nullptr, MSSAU)) {
- Cache[Cond] = Cond;
- return Cond;
- }
-
- // Walk up the operator chain to find partial invariant conditions.
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Cond))
- if (BO->getOpcode() == Instruction::And ||
- BO->getOpcode() == Instruction::Or) {
- // Given the previous operator, compute the current operator chain status.
- OperatorChain NewChain;
- switch (ParentChain) {
- case OC_OpChainNone:
- NewChain = BO->getOpcode() == Instruction::And ? OC_OpChainAnd :
- OC_OpChainOr;
- break;
- case OC_OpChainOr:
- NewChain = BO->getOpcode() == Instruction::Or ? OC_OpChainOr :
- OC_OpChainMixed;
- break;
- case OC_OpChainAnd:
- NewChain = BO->getOpcode() == Instruction::And ? OC_OpChainAnd :
- OC_OpChainMixed;
- break;
- case OC_OpChainMixed:
- NewChain = OC_OpChainMixed;
- break;
- }
-
- // If we reach a Mixed state, we do not want to keep walking up as we can not
- // reliably find a value that will simplify the chain. With this check, we
- // will return null on the first sight of mixed chain and the caller will
- // either backtrack to find partial LIV in other operand or return null.
- if (NewChain != OC_OpChainMixed) {
- // Update the current operator chain type before we search up the chain.
- ParentChain = NewChain;
- // If either the left or right side is invariant, we can unswitch on this,
- // which will cause the branch to go away in one loop and the condition to
- // simplify in the other one.
- if (Value *LHS = findLIVLoopCondition(BO->getOperand(0), L, Changed,
- ParentChain, Cache, MSSAU)) {
- Cache[Cond] = LHS;
- return LHS;
- }
- // We did not manage to find a partial LIV in operand(0). Backtrack and try
- // operand(1).
- ParentChain = NewChain;
- if (Value *RHS = findLIVLoopCondition(BO->getOperand(1), L, Changed,
- ParentChain, Cache, MSSAU)) {
- Cache[Cond] = RHS;
- return RHS;
- }
- }
- }
-
- Cache[Cond] = nullptr;
- return nullptr;
-}
-
-/// Cond is a condition that occurs in L. If it is invariant in the loop, or has
-/// an invariant piece, return the invariant along with the operator chain type.
-/// Otherwise, return null.
-static std::pair<Value *, OperatorChain>
-findLIVLoopCondition(Value *Cond, Loop *L, bool &Changed,
- MemorySSAUpdater *MSSAU) {
- DenseMap<Value *, Value *> Cache;
- OperatorChain OpChain = OC_OpChainNone;
- Value *FCond = findLIVLoopCondition(Cond, L, Changed, OpChain, Cache, MSSAU);
-
- // In case we do find a LIV, it can not be obtained by walking up a mixed
- // operator chain.
- assert((!FCond || OpChain != OC_OpChainMixed) &&
- "Do not expect a partial LIV with mixed operator chain");
- return {FCond, OpChain};
-}
-
-bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPMRef) {
- if (skipLoop(L))
- return false;
-
- AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
- *L->getHeader()->getParent());
- LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- LPM = &LPMRef;
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
- MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
- CurrentLoop = L;
- Function *F = CurrentLoop->getHeader()->getParent();
-
- SanitizeMemory = F->hasFnAttribute(Attribute::SanitizeMemory);
- if (SanitizeMemory)
- SafetyInfo.computeLoopSafetyInfo(L);
-
- if (VerifyMemorySSA)
- MSSA->verifyMemorySSA();
-
- bool Changed = false;
- do {
- assert(CurrentLoop->isLCSSAForm(*DT));
- if (VerifyMemorySSA)
- MSSA->verifyMemorySSA();
- RedoLoop = false;
- Changed |= processCurrentLoop();
- } while (RedoLoop);
-
- if (VerifyMemorySSA)
- MSSA->verifyMemorySSA();
-
- return Changed;
-}
-
-// Return true if the BasicBlock BB is unreachable from the loop header.
-// Return false, otherwise.
-bool LoopUnswitch::isUnreachableDueToPreviousUnswitching(BasicBlock *BB) {
- auto *Node = DT->getNode(BB)->getIDom();
- BasicBlock *DomBB = Node->getBlock();
- while (CurrentLoop->contains(DomBB)) {
- BranchInst *BInst = dyn_cast<BranchInst>(DomBB->getTerminator());
-
- Node = DT->getNode(DomBB)->getIDom();
- DomBB = Node->getBlock();
-
- if (!BInst || !BInst->isConditional())
- continue;
-
- Value *Cond = BInst->getCondition();
- if (!isa<ConstantInt>(Cond))
- continue;
-
- BasicBlock *UnreachableSucc =
- Cond == ConstantInt::getTrue(Cond->getContext())
- ? BInst->getSuccessor(1)
- : BInst->getSuccessor(0);
-
- if (DT->dominates(UnreachableSucc, BB))
- return true;
- }
- return false;
-}
-
-/// FIXME: Remove this workaround when freeze related patches are done.
-/// LoopUnswitch and Equality propagation in GVN have discrepancy about
-/// whether branch on undef/poison has undefine behavior. Here it is to
-/// rule out some common cases that we found such discrepancy already
-/// causing problems. Detail could be found in PR31652. Note if the
-/// func returns true, it is unsafe. But if it is false, it doesn't mean
-/// it is necessarily safe.
-static bool equalityPropUnSafe(Value &LoopCond) {
- ICmpInst *CI = dyn_cast<ICmpInst>(&LoopCond);
- if (!CI || !CI->isEquality())
- return false;
-
- Value *LHS = CI->getOperand(0);
- Value *RHS = CI->getOperand(1);
- if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS))
- return true;
-
- auto HasUndefInPHI = [](PHINode &PN) {
- for (Value *Opd : PN.incoming_values()) {
- if (isa<UndefValue>(Opd))
- return true;
- }
- return false;
- };
- PHINode *LPHI = dyn_cast<PHINode>(LHS);
- PHINode *RPHI = dyn_cast<PHINode>(RHS);
- if ((LPHI && HasUndefInPHI(*LPHI)) || (RPHI && HasUndefInPHI(*RPHI)))
- return true;
-
- auto HasUndefInSelect = [](SelectInst &SI) {
- if (isa<UndefValue>(SI.getTrueValue()) ||
- isa<UndefValue>(SI.getFalseValue()))
- return true;
- return false;
- };
- SelectInst *LSI = dyn_cast<SelectInst>(LHS);
- SelectInst *RSI = dyn_cast<SelectInst>(RHS);
- if ((LSI && HasUndefInSelect(*LSI)) || (RSI && HasUndefInSelect(*RSI)))
- return true;
- return false;
-}
-
-/// Do actual work and unswitch loop if possible and profitable.
-bool LoopUnswitch::processCurrentLoop() {
- bool Changed = false;
-
- initLoopData();
-
- // If LoopSimplify was unable to form a preheader, don't do any unswitching.
- if (!LoopPreheader)
- return false;
-
- // Loops with indirectbr cannot be cloned.
- if (!CurrentLoop->isSafeToClone())
- return false;
-
- // Without dedicated exits, splitting the exit edge may fail.
- if (!CurrentLoop->hasDedicatedExits())
- return false;
-
- LLVMContext &Context = LoopHeader->getContext();
-
- // Analyze loop cost, and stop unswitching if loop content can not be duplicated.
- if (!BranchesInfo.countLoop(
- CurrentLoop,
- getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
- *CurrentLoop->getHeader()->getParent()),
- AC))
- return false;
-
- // Try trivial unswitch first before loop over other basic blocks in the loop.
- if (tryTrivialLoopUnswitch(Changed)) {
- return true;
- }
-
- // Do not do non-trivial unswitch while optimizing for size.
- // FIXME: Use Function::hasOptSize().
- if (OptimizeForSize ||
- LoopHeader->getParent()->hasFnAttribute(Attribute::OptimizeForSize))
- return Changed;
-
- // Run through the instructions in the loop, keeping track of three things:
- //
- // - That we do not unswitch loops containing convergent operations, as we
- // might be making them control dependent on the unswitch value when they
- // were not before.
- // FIXME: This could be refined to only bail if the convergent operation is
- // not already control-dependent on the unswitch value.
- //
- // - That basic blocks in the loop contain invokes whose predecessor edges we
- // cannot split.
- //
- // - The set of guard intrinsics encountered (these are non terminator
- // instructions that are also profitable to be unswitched).
-
- SmallVector<IntrinsicInst *, 4> Guards;
-
- for (const auto BB : CurrentLoop->blocks()) {
- for (auto &I : *BB) {
- auto *CB = dyn_cast<CallBase>(&I);
- if (!CB)
- continue;
- if (CB->isConvergent())
- return Changed;
- if (auto *II = dyn_cast<InvokeInst>(&I))
- if (!II->getUnwindDest()->canSplitPredecessors())
- return Changed;
- if (auto *II = dyn_cast<IntrinsicInst>(&I))
- if (II->getIntrinsicID() == Intrinsic::experimental_guard)
- Guards.push_back(II);
- }
- }
-
- for (IntrinsicInst *Guard : Guards) {
- Value *LoopCond = findLIVLoopCondition(Guard->getOperand(0), CurrentLoop,
- Changed, MSSAU.get())
- .first;
- if (LoopCond &&
- unswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context))) {
- // NB! Unswitching (if successful) could have erased some of the
- // instructions in Guards leaving dangling pointers there. This is fine
- // because we're returning now, and won't look at Guards again.
- ++NumGuards;
- return true;
- }
- }
-
- // Loop over all of the basic blocks in the loop. If we find an interior
- // block that is branching on a loop-invariant condition, we can unswitch this
- // loop.
- for (Loop::block_iterator I = CurrentLoop->block_begin(),
- E = CurrentLoop->block_end();
- I != E; ++I) {
- Instruction *TI = (*I)->getTerminator();
-
- // Unswitching on a potentially uninitialized predicate is not
- // MSan-friendly. Limit this to the cases when the original predicate is
- // guaranteed to execute, to avoid creating a use-of-uninitialized-value
- // in the code that did not have one.
- // This is a workaround for the discrepancy between LLVM IR and MSan
- // semantics. See PR28054 for more details.
- if (SanitizeMemory &&
- !SafetyInfo.isGuaranteedToExecute(*TI, DT, CurrentLoop))
- continue;
-
- if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
- // Some branches may be rendered unreachable because of previous
- // unswitching.
- // Unswitch only those branches that are reachable.
- if (isUnreachableDueToPreviousUnswitching(*I))
- continue;
-
- // If this isn't branching on an invariant condition, we can't unswitch
- // it.
- if (BI->isConditional()) {
- // See if this, or some part of it, is loop invariant. If so, we can
- // unswitch on it if we desire.
- Value *LoopCond = findLIVLoopCondition(BI->getCondition(), CurrentLoop,
- Changed, MSSAU.get())
- .first;
- if (LoopCond && !equalityPropUnSafe(*LoopCond) &&
- unswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context), TI)) {
- ++NumBranches;
- return true;
- }
- }
- } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
- Value *SC = SI->getCondition();
- Value *LoopCond;
- OperatorChain OpChain;
- std::tie(LoopCond, OpChain) =
- findLIVLoopCondition(SC, CurrentLoop, Changed, MSSAU.get());
-
- unsigned NumCases = SI->getNumCases();
- if (LoopCond && NumCases) {
- // Find a value to unswitch on:
- // FIXME: this should chose the most expensive case!
- // FIXME: scan for a case with a non-critical edge?
- Constant *UnswitchVal = nullptr;
- // Find a case value such that at least one case value is unswitched
- // out.
- if (OpChain == OC_OpChainAnd) {
- // If the chain only has ANDs and the switch has a case value of 0.
- // Dropping in a 0 to the chain will unswitch out the 0-casevalue.
- auto *AllZero = cast<ConstantInt>(Constant::getNullValue(SC->getType()));
- if (BranchesInfo.isUnswitched(SI, AllZero))
- continue;
- // We are unswitching 0 out.
- UnswitchVal = AllZero;
- } else if (OpChain == OC_OpChainOr) {
- // If the chain only has ORs and the switch has a case value of ~0.
- // Dropping in a ~0 to the chain will unswitch out the ~0-casevalue.
- auto *AllOne = cast<ConstantInt>(Constant::getAllOnesValue(SC->getType()));
- if (BranchesInfo.isUnswitched(SI, AllOne))
- continue;
- // We are unswitching ~0 out.
- UnswitchVal = AllOne;
- } else {
- assert(OpChain == OC_OpChainNone &&
- "Expect to unswitch on trivial chain");
- // Do not process same value again and again.
- // At this point we have some cases already unswitched and
- // some not yet unswitched. Let's find the first not yet unswitched one.
- for (auto Case : SI->cases()) {
- Constant *UnswitchValCandidate = Case.getCaseValue();
- if (!BranchesInfo.isUnswitched(SI, UnswitchValCandidate)) {
- UnswitchVal = UnswitchValCandidate;
- break;
- }
- }
- }
-
- if (!UnswitchVal)
- continue;
-
- if (unswitchIfProfitable(LoopCond, UnswitchVal)) {
- ++NumSwitches;
- // In case of a full LIV, UnswitchVal is the value we unswitched out.
- // In case of a partial LIV, we only unswitch when its an AND-chain
- // or OR-chain. In both cases switch input value simplifies to
- // UnswitchVal.
- BranchesInfo.setUnswitched(SI, UnswitchVal);
- return true;
- }
- }
- }
-
- // Scan the instructions to check for unswitchable values.
- for (BasicBlock::iterator BBI = (*I)->begin(), E = (*I)->end();
- BBI != E; ++BBI)
- if (SelectInst *SI = dyn_cast<SelectInst>(BBI)) {
- Value *LoopCond = findLIVLoopCondition(SI->getCondition(), CurrentLoop,
- Changed, MSSAU.get())
- .first;
- if (LoopCond &&
- unswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context))) {
- ++NumSelects;
- return true;
- }
- }
- }
-
- // Check if there is a header condition that is invariant along the patch from
- // either the true or false successors to the header. This allows unswitching
- // conditions depending on memory accesses, if there's a path not clobbering
- // the memory locations. Check if this transform has been disabled using
- // metadata, to avoid unswitching the same loop multiple times.
- if (MSSA &&
- !findOptionMDForLoop(CurrentLoop, "llvm.loop.unswitch.partial.disable")) {
- if (auto Info =
- hasPartialIVCondition(*CurrentLoop, MSSAThreshold, *MSSA, *AA)) {
- assert(!Info->InstToDuplicate.empty() &&
- "need at least a partially invariant condition");
- LLVM_DEBUG(dbgs() << "loop-unswitch: Found partially invariant condition "
- << *Info->InstToDuplicate[0] << "\n");
-
- Instruction *TI = CurrentLoop->getHeader()->getTerminator();
- Value *LoopCond = Info->InstToDuplicate[0];
-
- // If the partially unswitched path is a no-op and has a single exit
- // block, we do not need to do full unswitching. Instead, we can directly
- // branch to the exit.
- // TODO: Instead of duplicating the checks, we could also just directly
- // branch to the exit from the conditional branch in the loop.
- if (Info->PathIsNoop) {
- if (HasBranchDivergence &&
- getAnalysis<LegacyDivergenceAnalysis>().isDivergent(LoopCond)) {
- LLVM_DEBUG(dbgs() << "NOT unswitching loop %"
- << CurrentLoop->getHeader()->getName()
- << " at non-trivial condition '"
- << *Info->KnownValue << "' == " << *LoopCond << "\n"
- << ". Condition is divergent.\n");
- return false;
- }
-
- ++NumBranches;
-
- BasicBlock *TrueDest = LoopHeader;
- BasicBlock *FalseDest = Info->ExitForPath;
- if (Info->KnownValue->isOneValue())
- std::swap(TrueDest, FalseDest);
-
- auto *OldBr =
- cast<BranchInst>(CurrentLoop->getLoopPreheader()->getTerminator());
- emitPreheaderBranchOnCondition(LoopCond, Info->KnownValue, TrueDest,
- FalseDest, OldBr, TI,
- Info->InstToDuplicate);
- delete OldBr;
- RedoLoop = false;
- return true;
- }
-
- // Otherwise, the path is not a no-op. Run regular unswitching.
- if (unswitchIfProfitable(LoopCond, Info->KnownValue,
- CurrentLoop->getHeader()->getTerminator(),
- Info->InstToDuplicate)) {
- ++NumBranches;
- RedoLoop = false;
- return true;
- }
- }
- }
-
- return Changed;
-}
-
-/// Check to see if all paths from BB exit the loop with no side effects
-/// (including infinite loops).
-///
-/// If true, we return true and set ExitBB to the block we
-/// exit through.
-///
-static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
- BasicBlock *&ExitBB,
- std::set<BasicBlock*> &Visited) {
- if (!Visited.insert(BB).second) {
- // Already visited. Without more analysis, this could indicate an infinite
- // loop.
- return false;
- }
- if (!L->contains(BB)) {
- // Otherwise, this is a loop exit, this is fine so long as this is the
- // first exit.
- if (ExitBB) return false;
- ExitBB = BB;
- return true;
- }
-
- // Otherwise, this is an unvisited intra-loop node. Check all successors.
- for (BasicBlock *Succ : successors(BB)) {
- // Check to see if the successor is a trivial loop exit.
- if (!isTrivialLoopExitBlockHelper(L, Succ, ExitBB, Visited))
- return false;
- }
-
- // Okay, everything after this looks good, check to make sure that this block
- // doesn't include any side effects.
- for (Instruction &I : *BB)
- if (I.mayHaveSideEffects())
- return false;
-
- return true;
-}
-
-/// Return true if the specified block unconditionally leads to an exit from
-/// the specified loop, and has no side-effects in the process. If so, return
-/// the block that is exited to, otherwise return null.
-static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) {
- std::set<BasicBlock*> Visited;
- Visited.insert(L->getHeader()); // Branches to header make infinite loops.
- BasicBlock *ExitBB = nullptr;
- if (isTrivialLoopExitBlockHelper(L, BB, ExitBB, Visited))
- return ExitBB;
- return nullptr;
-}
-
-/// We have found that we can unswitch CurrentLoop when LoopCond == Val to
-/// simplify the loop. If we decide that this is profitable,
-/// unswitch the loop, reprocess the pieces, then return true.
-bool LoopUnswitch::unswitchIfProfitable(Value *LoopCond, Constant *Val,
- Instruction *TI,
- ArrayRef<Instruction *> ToDuplicate) {
- // Check to see if it would be profitable to unswitch current loop.
- if (!BranchesInfo.costAllowsUnswitching()) {
- LLVM_DEBUG(dbgs() << "NOT unswitching loop %"
- << CurrentLoop->getHeader()->getName()
- << " at non-trivial condition '" << *Val
- << "' == " << *LoopCond << "\n"
- << ". Cost too high.\n");
- return false;
- }
- if (HasBranchDivergence &&
- getAnalysis<LegacyDivergenceAnalysis>().isDivergent(LoopCond)) {
- LLVM_DEBUG(dbgs() << "NOT unswitching loop %"
- << CurrentLoop->getHeader()->getName()
- << " at non-trivial condition '" << *Val
- << "' == " << *LoopCond << "\n"
- << ". Condition is divergent.\n");
- return false;
- }
-
- unswitchNontrivialCondition(LoopCond, Val, CurrentLoop, TI, ToDuplicate);
- return true;
-}
-
-/// Emit a conditional branch on two values if LIC == Val, branch to TrueDst,
-/// otherwise branch to FalseDest. Insert the code immediately before OldBranch
-/// and remove (but not erase!) it from the function.
-void LoopUnswitch::emitPreheaderBranchOnCondition(
- Value *LIC, Constant *Val, BasicBlock *TrueDest, BasicBlock *FalseDest,
- BranchInst *OldBranch, Instruction *TI,
- ArrayRef<Instruction *> ToDuplicate) {
- assert(OldBranch->isUnconditional() && "Preheader is not split correctly");
- assert(TrueDest != FalseDest && "Branch targets should be different");
-
- // Insert a conditional branch on LIC to the two preheaders. The original
- // code is the true version and the new code is the false version.
- Value *BranchVal = LIC;
- bool Swapped = false;
-
- if (!ToDuplicate.empty()) {
- ValueToValueMapTy Old2New;
- for (Instruction *I : reverse(ToDuplicate)) {
- auto *New = I->clone();
- New->insertBefore(OldBranch);
- RemapInstruction(New, Old2New,
- RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
- Old2New[I] = New;
-
- if (MSSAU) {
- MemorySSA *MSSA = MSSAU->getMemorySSA();
- auto *MemA = dyn_cast_or_null<MemoryUse>(MSSA->getMemoryAccess(I));
- if (!MemA)
- continue;
-
- Loop *L = LI->getLoopFor(I->getParent());
- auto *DefiningAccess = MemA->getDefiningAccess();
- // Get the first defining access before the loop.
- while (L->contains(DefiningAccess->getBlock())) {
- // If the defining access is a MemoryPhi, get the incoming
- // value for the pre-header as defining access.
- if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess)) {
- DefiningAccess =
- MemPhi->getIncomingValueForBlock(L->getLoopPreheader());
- } else {
- DefiningAccess =
- cast<MemoryDef>(DefiningAccess)->getDefiningAccess();
- }
- }
- MSSAU->createMemoryAccessInBB(New, DefiningAccess, New->getParent(),
- MemorySSA::BeforeTerminator);
- }
- }
- BranchVal = Old2New[ToDuplicate[0]];
- } else {
-
- if (!isa<ConstantInt>(Val) ||
- Val->getType() != Type::getInt1Ty(LIC->getContext()))
- BranchVal = new ICmpInst(OldBranch, ICmpInst::ICMP_EQ, LIC, Val);
- else if (Val != ConstantInt::getTrue(Val->getContext())) {
- // We want to enter the new loop when the condition is true.
- std::swap(TrueDest, FalseDest);
- Swapped = true;
- }
- }
-
- // Old branch will be removed, so save its parent and successor to update the
- // DomTree.
- auto *OldBranchSucc = OldBranch->getSuccessor(0);
- auto *OldBranchParent = OldBranch->getParent();
-
- // Insert the new branch.
- BranchInst *BI =
- IRBuilder<>(OldBranch).CreateCondBr(BranchVal, TrueDest, FalseDest, TI);
- if (Swapped)
- BI->swapProfMetadata();
-
- // Remove the old branch so there is only one branch at the end. This is
- // needed to perform DomTree's internal DFS walk on the function's CFG.
- OldBranch->removeFromParent();
-
- // Inform the DT about the new branch.
- if (DT) {
- // First, add both successors.
- SmallVector<DominatorTree::UpdateType, 3> Updates;
- if (TrueDest != OldBranchSucc)
- Updates.push_back({DominatorTree::Insert, OldBranchParent, TrueDest});
- if (FalseDest != OldBranchSucc)
- Updates.push_back({DominatorTree::Insert, OldBranchParent, FalseDest});
- // If both of the new successors are different from the old one, inform the
- // DT that the edge was deleted.
- if (OldBranchSucc != TrueDest && OldBranchSucc != FalseDest) {
- Updates.push_back({DominatorTree::Delete, OldBranchParent, OldBranchSucc});
- }
-
- if (MSSAU)
- MSSAU->applyUpdates(Updates, *DT, /*UpdateDT=*/true);
- else
- DT->applyUpdates(Updates);
- }
-
- // If either edge is critical, split it. This helps preserve LoopSimplify
- // form for enclosing loops.
- auto Options =
- CriticalEdgeSplittingOptions(DT, LI, MSSAU.get()).setPreserveLCSSA();
- SplitCriticalEdge(BI, 0, Options);
- SplitCriticalEdge(BI, 1, Options);
-}
-
-/// Given a loop that has a trivial unswitchable condition in it (a cond branch
-/// from its header block to its latch block, where the path through the loop
-/// that doesn't execute its body has no side-effects), unswitch it. This
-/// doesn't involve any code duplication, just moving the conditional branch
-/// outside of the loop and updating loop info.
-void LoopUnswitch::unswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
- BasicBlock *ExitBlock,
- Instruction *TI) {
- LLVM_DEBUG(dbgs() << "loop-unswitch: Trivial-Unswitch loop %"
- << LoopHeader->getName() << " [" << L->getBlocks().size()
- << " blocks] in Function "
- << L->getHeader()->getParent()->getName()
- << " on cond: " << *Val << " == " << *Cond << "\n");
- // We are going to make essential changes to CFG. This may invalidate cached
- // information for L or one of its parent loops in SCEV.
- if (auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>())
- SEWP->getSE().forgetTopmostLoop(L);
-
- // First step, split the preheader, so that we know that there is a safe place
- // to insert the conditional branch. We will change LoopPreheader to have a
- // conditional branch on Cond.
- BasicBlock *NewPH = SplitEdge(LoopPreheader, LoopHeader, DT, LI, MSSAU.get());
-
- // Now that we have a place to insert the conditional branch, create a place
- // to branch to: this is the exit block out of the loop that we should
- // short-circuit to.
-
- // Split this block now, so that the loop maintains its exit block, and so
- // that the jump from the preheader can execute the contents of the exit block
- // without actually branching to it (the exit block should be dominated by the
- // loop header, not the preheader).
- assert(!L->contains(ExitBlock) && "Exit block is in the loop?");
- BasicBlock *NewExit =
- SplitBlock(ExitBlock, &ExitBlock->front(), DT, LI, MSSAU.get());
-
- // Okay, now we have a position to branch from and a position to branch to,
- // insert the new conditional branch.
- auto *OldBranch = dyn_cast<BranchInst>(LoopPreheader->getTerminator());
- assert(OldBranch && "Failed to split the preheader");
- emitPreheaderBranchOnCondition(Cond, Val, NewExit, NewPH, OldBranch, TI);
-
- // emitPreheaderBranchOnCondition removed the OldBranch from the function.
- // Delete it, as it is no longer needed.
- delete OldBranch;
-
- // We need to reprocess this loop, it could be unswitched again.
- RedoLoop = true;
-
- // Now that we know that the loop is never entered when this condition is a
- // particular value, rewrite the loop with this info. We know that this will
- // at least eliminate the old branch.
- rewriteLoopBodyWithConditionConstant(L, Cond, Val, /*IsEqual=*/false);
-
- ++NumTrivial;
-}
-
-/// Check if the first non-constant condition starting from the loop header is
-/// a trivial unswitch condition: that is, a condition controls whether or not
-/// the loop does anything at all. If it is a trivial condition, unswitching
-/// produces no code duplications (equivalently, it produces a simpler loop and
-/// a new empty loop, which gets deleted). Therefore always unswitch trivial
-/// condition.
-bool LoopUnswitch::tryTrivialLoopUnswitch(bool &Changed) {
- BasicBlock *CurrentBB = CurrentLoop->getHeader();
- Instruction *CurrentTerm = CurrentBB->getTerminator();
- LLVMContext &Context = CurrentBB->getContext();
-
- // If loop header has only one reachable successor (currently via an
- // unconditional branch or constant foldable conditional branch, but
- // should also consider adding constant foldable switch instruction in
- // future), we should keep looking for trivial condition candidates in
- // the successor as well. An alternative is to constant fold conditions
- // and merge successors into loop header (then we only need to check header's
- // terminator). The reason for not doing this in LoopUnswitch pass is that
- // it could potentially break LoopPassManager's invariants. Folding dead
- // branches could either eliminate the current loop or make other loops
- // unreachable. LCSSA form might also not be preserved after deleting
- // branches. The following code keeps traversing loop header's successors
- // until it finds the trivial condition candidate (condition that is not a
- // constant). Since unswitching generates branches with constant conditions,
- // this scenario could be very common in practice.
- SmallPtrSet<BasicBlock*, 8> Visited;
-
- while (true) {
- // If we exit loop or reach a previous visited block, then
- // we can not reach any trivial condition candidates (unfoldable
- // branch instructions or switch instructions) and no unswitch
- // can happen. Exit and return false.
- if (!CurrentLoop->contains(CurrentBB) || !Visited.insert(CurrentBB).second)
- return false;
-
- // Check if this loop will execute any side-effecting instructions (e.g.
- // stores, calls, volatile loads) in the part of the loop that the code
- // *would* execute. Check the header first.
- for (Instruction &I : *CurrentBB)
- if (I.mayHaveSideEffects())
- return false;
-
- if (BranchInst *BI = dyn_cast<BranchInst>(CurrentTerm)) {
- if (BI->isUnconditional()) {
- CurrentBB = BI->getSuccessor(0);
- } else if (BI->getCondition() == ConstantInt::getTrue(Context)) {
- CurrentBB = BI->getSuccessor(0);
- } else if (BI->getCondition() == ConstantInt::getFalse(Context)) {
- CurrentBB = BI->getSuccessor(1);
- } else {
- // Found a trivial condition candidate: non-foldable conditional branch.
- break;
- }
- } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
- // At this point, any constant-foldable instructions should have probably
- // been folded.
- ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition());
- if (!Cond)
- break;
- // Find the target block we are definitely going to.
- CurrentBB = SI->findCaseValue(Cond)->getCaseSuccessor();
- } else {
- // We do not understand these terminator instructions.
- break;
- }
-
- CurrentTerm = CurrentBB->getTerminator();
- }
-
- // CondVal is the condition that controls the trivial condition.
- // LoopExitBB is the BasicBlock that loop exits when meets trivial condition.
- Constant *CondVal = nullptr;
- BasicBlock *LoopExitBB = nullptr;
-
- if (BranchInst *BI = dyn_cast<BranchInst>(CurrentTerm)) {
- // If this isn't branching on an invariant condition, we can't unswitch it.
- if (!BI->isConditional())
- return false;
-
- Value *LoopCond = findLIVLoopCondition(BI->getCondition(), CurrentLoop,
- Changed, MSSAU.get())
- .first;
-
- // Unswitch only if the trivial condition itself is an LIV (not
- // partial LIV which could occur in and/or)
- if (!LoopCond || LoopCond != BI->getCondition())
- return false;
-
- // Check to see if a successor of the branch is guaranteed to
- // exit through a unique exit block without having any
- // side-effects. If so, determine the value of Cond that causes
- // it to do this.
- if ((LoopExitBB =
- isTrivialLoopExitBlock(CurrentLoop, BI->getSuccessor(0)))) {
- CondVal = ConstantInt::getTrue(Context);
- } else if ((LoopExitBB =
- isTrivialLoopExitBlock(CurrentLoop, BI->getSuccessor(1)))) {
- CondVal = ConstantInt::getFalse(Context);
- }
-
- // If we didn't find a single unique LoopExit block, or if the loop exit
- // block contains phi nodes, this isn't trivial.
- if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin()))
- return false; // Can't handle this.
-
- if (equalityPropUnSafe(*LoopCond))
- return false;
-
- unswitchTrivialCondition(CurrentLoop, LoopCond, CondVal, LoopExitBB,
- CurrentTerm);
- ++NumBranches;
- return true;
- } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
- // If this isn't switching on an invariant condition, we can't unswitch it.
- Value *LoopCond = findLIVLoopCondition(SI->getCondition(), CurrentLoop,
- Changed, MSSAU.get())
- .first;
-
- // Unswitch only if the trivial condition itself is an LIV (not
- // partial LIV which could occur in and/or)
- if (!LoopCond || LoopCond != SI->getCondition())
- return false;
-
- // Check to see if a successor of the switch is guaranteed to go to the
- // latch block or exit through a one exit block without having any
- // side-effects. If so, determine the value of Cond that causes it to do
- // this.
- // Note that we can't trivially unswitch on the default case or
- // on already unswitched cases.
- for (auto Case : SI->cases()) {
- BasicBlock *LoopExitCandidate;
- if ((LoopExitCandidate =
- isTrivialLoopExitBlock(CurrentLoop, Case.getCaseSuccessor()))) {
- // Okay, we found a trivial case, remember the value that is trivial.
- ConstantInt *CaseVal = Case.getCaseValue();
-
- // Check that it was not unswitched before, since already unswitched
- // trivial vals are looks trivial too.
- if (BranchesInfo.isUnswitched(SI, CaseVal))
- continue;
- LoopExitBB = LoopExitCandidate;
- CondVal = CaseVal;
- break;
- }
- }
-
- // If we didn't find a single unique LoopExit block, or if the loop exit
- // block contains phi nodes, this isn't trivial.
- if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin()))
- return false; // Can't handle this.
-
- unswitchTrivialCondition(CurrentLoop, LoopCond, CondVal, LoopExitBB,
- nullptr);
-
- // We are only unswitching full LIV.
- BranchesInfo.setUnswitched(SI, CondVal);
- ++NumSwitches;
- return true;
- }
- return false;
-}
-
-/// Split all of the edges from inside the loop to their exit blocks.
-/// Update the appropriate Phi nodes as we do so.
-void LoopUnswitch::splitExitEdges(
- Loop *L, const SmallVectorImpl<BasicBlock *> &ExitBlocks) {
-
- for (unsigned I = 0, E = ExitBlocks.size(); I != E; ++I) {
- BasicBlock *ExitBlock = ExitBlocks[I];
- SmallVector<BasicBlock *, 4> Preds(predecessors(ExitBlock));
-
- // Although SplitBlockPredecessors doesn't preserve loop-simplify in
- // general, if we call it on all predecessors of all exits then it does.
- SplitBlockPredecessors(ExitBlock, Preds, ".us-lcssa", DT, LI, MSSAU.get(),
- /*PreserveLCSSA*/ true);
- }
-}
-
-/// We determined that the loop is profitable to unswitch when LIC equal Val.
-/// Split it into loop versions and test the condition outside of either loop.
-/// Return the loops created as Out1/Out2.
-void LoopUnswitch::unswitchNontrivialCondition(
- Value *LIC, Constant *Val, Loop *L, Instruction *TI,
- ArrayRef<Instruction *> ToDuplicate) {
- Function *F = LoopHeader->getParent();
- LLVM_DEBUG(dbgs() << "loop-unswitch: Unswitching loop %"
- << LoopHeader->getName() << " [" << L->getBlocks().size()
- << " blocks] in Function " << F->getName() << " when '"
- << *Val << "' == " << *LIC << "\n");
-
- // We are going to make essential changes to CFG. This may invalidate cached
- // information for L or one of its parent loops in SCEV.
- if (auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>())
- SEWP->getSE().forgetTopmostLoop(L);
-
- LoopBlocks.clear();
- NewBlocks.clear();
-
- if (MSSAU && VerifyMemorySSA)
- MSSA->verifyMemorySSA();
-
- // First step, split the preheader and exit blocks, and add these blocks to
- // the LoopBlocks list.
- BasicBlock *NewPreheader =
- SplitEdge(LoopPreheader, LoopHeader, DT, LI, MSSAU.get());
- LoopBlocks.push_back(NewPreheader);
-
- // We want the loop to come after the preheader, but before the exit blocks.
- llvm::append_range(LoopBlocks, L->blocks());
-
- SmallVector<BasicBlock*, 8> ExitBlocks;
- L->getUniqueExitBlocks(ExitBlocks);
-
- // Split all of the edges from inside the loop to their exit blocks. Update
- // the appropriate Phi nodes as we do so.
- splitExitEdges(L, ExitBlocks);
-
- // The exit blocks may have been changed due to edge splitting, recompute.
- ExitBlocks.clear();
- L->getUniqueExitBlocks(ExitBlocks);
-
- // Add exit blocks to the loop blocks.
- llvm::append_range(LoopBlocks, ExitBlocks);
-
- // Next step, clone all of the basic blocks that make up the loop (including
- // the loop preheader and exit blocks), keeping track of the mapping between
- // the instructions and blocks.
- NewBlocks.reserve(LoopBlocks.size());
- ValueToValueMapTy VMap;
- for (unsigned I = 0, E = LoopBlocks.size(); I != E; ++I) {
- BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[I], VMap, ".us", F);
-
- NewBlocks.push_back(NewBB);
- VMap[LoopBlocks[I]] = NewBB; // Keep the BB mapping.
- }
-
- // Splice the newly inserted blocks into the function right before the
- // original preheader.
- F->getBasicBlockList().splice(NewPreheader->getIterator(),
- F->getBasicBlockList(),
- NewBlocks[0]->getIterator(), F->end());
-
- // Now we create the new Loop object for the versioned loop.
- Loop *NewLoop = cloneLoop(L, L->getParentLoop(), VMap, LI, LPM);
-
- // Recalculate unswitching quota, inherit simplified switches info for NewBB,
- // Probably clone more loop-unswitch related loop properties.
- BranchesInfo.cloneData(NewLoop, L, VMap);
-
- Loop *ParentLoop = L->getParentLoop();
- if (ParentLoop) {
- // Make sure to add the cloned preheader and exit blocks to the parent loop
- // as well.
- ParentLoop->addBasicBlockToLoop(NewBlocks[0], *LI);
- }
-
- for (unsigned EBI = 0, EBE = ExitBlocks.size(); EBI != EBE; ++EBI) {
- BasicBlock *NewExit = cast<BasicBlock>(VMap[ExitBlocks[EBI]]);
- // The new exit block should be in the same loop as the old one.
- if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[EBI]))
- ExitBBLoop->addBasicBlockToLoop(NewExit, *LI);
-
- assert(NewExit->getTerminator()->getNumSuccessors() == 1 &&
- "Exit block should have been split to have one successor!");
- BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0);
-
- // If the successor of the exit block had PHI nodes, add an entry for
- // NewExit.
- for (PHINode &PN : ExitSucc->phis()) {
- Value *V = PN.getIncomingValueForBlock(ExitBlocks[EBI]);
- ValueToValueMapTy::iterator It = VMap.find(V);
- if (It != VMap.end()) V = It->second;
- PN.addIncoming(V, NewExit);
- }
-
- if (LandingPadInst *LPad = NewExit->getLandingPadInst()) {
- PHINode *PN = PHINode::Create(LPad->getType(), 0, "",
- &*ExitSucc->getFirstInsertionPt());
-
- for (BasicBlock *BB : predecessors(ExitSucc)) {
- LandingPadInst *LPI = BB->getLandingPadInst();
- LPI->replaceAllUsesWith(PN);
- PN->addIncoming(LPI, BB);
- }
- }
- }
-
- // Rewrite the code to refer to itself.
- for (unsigned NBI = 0, NBE = NewBlocks.size(); NBI != NBE; ++NBI) {
- for (Instruction &I : *NewBlocks[NBI]) {
- RemapInstruction(&I, VMap,
- RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
- if (auto *II = dyn_cast<AssumeInst>(&I))
- AC->registerAssumption(II);
- }
- }
-
- // Rewrite the original preheader to select between versions of the loop.
- BranchInst *OldBR = cast<BranchInst>(LoopPreheader->getTerminator());
- assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == LoopBlocks[0] &&
- "Preheader splitting did not work correctly!");
-
- if (MSSAU) {
- // Update MemorySSA after cloning, and before splitting to unreachables,
- // since that invalidates the 1:1 mapping of clones in VMap.
- LoopBlocksRPO LBRPO(L);
- LBRPO.perform(LI);
- MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, VMap);
- }
-
- // Emit the new branch that selects between the two versions of this loop.
- emitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR,
- TI, ToDuplicate);
- if (MSSAU) {
- // Update MemoryPhis in Exit blocks.
- MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMap, *DT);
- if (VerifyMemorySSA)
- MSSA->verifyMemorySSA();
- }
-
- // The OldBr was replaced by a new one and removed (but not erased) by
- // emitPreheaderBranchOnCondition. It is no longer needed, so delete it.
- delete OldBR;
-
- LoopProcessWorklist.push_back(NewLoop);
- RedoLoop = true;
-
- // Keep a WeakTrackingVH holding onto LIC. If the first call to
- // RewriteLoopBody
- // deletes the instruction (for example by simplifying a PHI that feeds into
- // the condition that we're unswitching on), we don't rewrite the second
- // iteration.
- WeakTrackingVH LICHandle(LIC);
-
- if (ToDuplicate.empty()) {
- // Now we rewrite the original code to know that the condition is true and
- // the new code to know that the condition is false.
- rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/false);
-
- // It's possible that simplifying one loop could cause the other to be
- // changed to another value or a constant. If its a constant, don't
- // simplify it.
- if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop &&
- LICHandle && !isa<Constant>(LICHandle))
- rewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val,
- /*IsEqual=*/true);
- } else {
- // Partial unswitching. Update the condition in the right loop with the
- // constant.
- auto *CC = cast<ConstantInt>(Val);
- if (CC->isOneValue()) {
- rewriteLoopBodyWithConditionConstant(NewLoop, VMap[LIC], Val,
- /*IsEqual=*/true);
- } else
- rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/true);
-
- // Mark the new loop as partially unswitched, to avoid unswitching on the
- // same condition again.
- auto &Context = NewLoop->getHeader()->getContext();
- MDNode *DisableUnswitchMD = MDNode::get(
- Context, MDString::get(Context, "llvm.loop.unswitch.partial.disable"));
- MDNode *NewLoopID = makePostTransformationMetadata(
- Context, L->getLoopID(), {"llvm.loop.unswitch.partial"},
- {DisableUnswitchMD});
- NewLoop->setLoopID(NewLoopID);
- }
-
- if (MSSA && VerifyMemorySSA)
- MSSA->verifyMemorySSA();
-}
-
-/// Remove all instances of I from the worklist vector specified.
-static void removeFromWorklist(Instruction *I,
- std::vector<Instruction *> &Worklist) {
- llvm::erase_value(Worklist, I);
-}
-
-/// When we find that I really equals V, remove I from the
-/// program, replacing all uses with V and update the worklist.
-static void replaceUsesOfWith(Instruction *I, Value *V,
- std::vector<Instruction *> &Worklist, Loop *L,
- LPPassManager *LPM, MemorySSAUpdater *MSSAU) {
- LLVM_DEBUG(dbgs() << "Replace with '" << *V << "': " << *I << "\n");
-
- // Add uses to the worklist, which may be dead now.
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
- if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i)))
- Worklist.push_back(Use);
-
- // Add users to the worklist which may be simplified now.
- for (User *U : I->users())
- Worklist.push_back(cast<Instruction>(U));
- removeFromWorklist(I, Worklist);
- I->replaceAllUsesWith(V);
- if (!I->mayHaveSideEffects()) {
- if (MSSAU)
- MSSAU->removeMemoryAccess(I);
- I->eraseFromParent();
- }
- ++NumSimplify;
-}
-
-/// We know either that the value LIC has the value specified by Val in the
-/// specified loop, or we know it does NOT have that value.
-/// Rewrite any uses of LIC or of properties correlated to it.
-void LoopUnswitch::rewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
- Constant *Val,
- bool IsEqual) {
- assert(!isa<Constant>(LIC) && "Why are we unswitching on a constant?");
-
- // FIXME: Support correlated properties, like:
- // for (...)
- // if (li1 < li2)
- // ...
- // if (li1 > li2)
- // ...
-
- // FOLD boolean conditions (X|LIC), (X&LIC). Fold conditional branches,
- // selects, switches.
- std::vector<Instruction*> Worklist;
- LLVMContext &Context = Val->getContext();
-
- // If we know that LIC == Val, or that LIC == NotVal, just replace uses of LIC
- // in the loop with the appropriate one directly.
- if (IsEqual || (isa<ConstantInt>(Val) &&
- Val->getType()->isIntegerTy(1))) {
- Value *Replacement;
- if (IsEqual)
- Replacement = Val;
- else
- Replacement = ConstantInt::get(Type::getInt1Ty(Val->getContext()),
- !cast<ConstantInt>(Val)->getZExtValue());
-
- for (User *U : LIC->users()) {
- Instruction *UI = dyn_cast<Instruction>(U);
- if (!UI || !L->contains(UI))
- continue;
- Worklist.push_back(UI);
- }
-
- for (Instruction *UI : Worklist)
- UI->replaceUsesOfWith(LIC, Replacement);
-
- simplifyCode(Worklist, L);
- return;
- }
-
- // Otherwise, we don't know the precise value of LIC, but we do know that it
- // is certainly NOT "Val". As such, simplify any uses in the loop that we
- // can. This case occurs when we unswitch switch statements.
- for (User *U : LIC->users()) {
- Instruction *UI = dyn_cast<Instruction>(U);
- if (!UI || !L->contains(UI))
- continue;
-
- // At this point, we know LIC is definitely not Val. Try to use some simple
- // logic to simplify the user w.r.t. to the context.
- if (Value *Replacement = simplifyInstructionWithNotEqual(UI, LIC, Val)) {
- if (LI->replacementPreservesLCSSAForm(UI, Replacement)) {
- // This in-loop instruction has been simplified w.r.t. its context,
- // i.e. LIC != Val, make sure we propagate its replacement value to
- // all its users.
- //
- // We can not yet delete UI, the LIC user, yet, because that would invalidate
- // the LIC->users() iterator !. However, we can make this instruction
- // dead by replacing all its users and push it onto the worklist so that
- // it can be properly deleted and its operands simplified.
- UI->replaceAllUsesWith(Replacement);
- }
- }
-
- // This is a LIC user, push it into the worklist so that simplifyCode can
- // attempt to simplify it.
- Worklist.push_back(UI);
-
- // If we know that LIC is not Val, use this info to simplify code.
- SwitchInst *SI = dyn_cast<SwitchInst>(UI);
- if (!SI || !isa<ConstantInt>(Val)) continue;
-
- // NOTE: if a case value for the switch is unswitched out, we record it
- // after the unswitch finishes. We can not record it here as the switch
- // is not a direct user of the partial LIV.
- SwitchInst::CaseHandle DeadCase =
- *SI->findCaseValue(cast<ConstantInt>(Val));
- // Default case is live for multiple values.
- if (DeadCase == *SI->case_default())
- continue;
-
- // Found a dead case value. Don't remove PHI nodes in the
- // successor if they become single-entry, those PHI nodes may
- // be in the Users list.
-
- BasicBlock *Switch = SI->getParent();
- BasicBlock *SISucc = DeadCase.getCaseSuccessor();
- BasicBlock *Latch = L->getLoopLatch();
-
- if (!SI->findCaseDest(SISucc)) continue; // Edge is critical.
- // If the DeadCase successor dominates the loop latch, then the
- // transformation isn't safe since it will delete the sole predecessor edge
- // to the latch.
- if (Latch && DT->dominates(SISucc, Latch))
- continue;
-
- // FIXME: This is a hack. We need to keep the successor around
- // and hooked up so as to preserve the loop structure, because
- // trying to update it is complicated. So instead we preserve the
- // loop structure and put the block on a dead code path.
- SplitEdge(Switch, SISucc, DT, LI, MSSAU.get());
- // Compute the successors instead of relying on the return value
- // of SplitEdge, since it may have split the switch successor
- // after PHI nodes.
- BasicBlock *NewSISucc = DeadCase.getCaseSuccessor();
- BasicBlock *OldSISucc = *succ_begin(NewSISucc);
- // Create an "unreachable" destination.
- BasicBlock *Abort = BasicBlock::Create(Context, "us-unreachable",
- Switch->getParent(),
- OldSISucc);
- new UnreachableInst(Context, Abort);
- // Force the new case destination to branch to the "unreachable"
- // block while maintaining a (dead) CFG edge to the old block.
- NewSISucc->getTerminator()->eraseFromParent();
- BranchInst::Create(Abort, OldSISucc,
- ConstantInt::getTrue(Context), NewSISucc);
- // Release the PHI operands for this edge.
- for (PHINode &PN : NewSISucc->phis())
- PN.setIncomingValueForBlock(Switch, UndefValue::get(PN.getType()));
- // Tell the domtree about the new block. We don't fully update the
- // domtree here -- instead we force it to do a full recomputation
- // after the pass is complete -- but we do need to inform it of
- // new blocks.
- DT->addNewBlock(Abort, NewSISucc);
- }
-
- simplifyCode(Worklist, L);
-}
-
-/// Now that we have simplified some instructions in the loop, walk over it and
-/// constant prop, dce, and fold control flow where possible. Note that this is
-/// effectively a very simple loop-structure-aware optimizer. During processing
-/// of this loop, L could very well be deleted, so it must not be used.
-///
-/// FIXME: When the loop optimizer is more mature, separate this out to a new
-/// pass.
-///
-void LoopUnswitch::simplifyCode(std::vector<Instruction *> &Worklist, Loop *L) {
- const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
- while (!Worklist.empty()) {
- Instruction *I = Worklist.back();
- Worklist.pop_back();
-
- // Simple DCE.
- if (isInstructionTriviallyDead(I)) {
- LLVM_DEBUG(dbgs() << "Remove dead instruction '" << *I << "\n");
-
- // Add uses to the worklist, which may be dead now.
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
- if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i)))
- Worklist.push_back(Use);
- removeFromWorklist(I, Worklist);
- if (MSSAU)
- MSSAU->removeMemoryAccess(I);
- I->eraseFromParent();
- ++NumSimplify;
- continue;
- }
-
- // See if instruction simplification can hack this up. This is common for
- // things like "select false, X, Y" after unswitching made the condition be
- // 'false'. TODO: update the domtree properly so we can pass it here.
- if (Value *V = SimplifyInstruction(I, DL))
- if (LI->replacementPreservesLCSSAForm(I, V)) {
- replaceUsesOfWith(I, V, Worklist, L, LPM, MSSAU.get());
- continue;
- }
-
- // Special case hacks that appear commonly in unswitched code.
- if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
- if (BI->isUnconditional()) {
- // If BI's parent is the only pred of the successor, fold the two blocks
- // together.
- BasicBlock *Pred = BI->getParent();
- (void)Pred;
- BasicBlock *Succ = BI->getSuccessor(0);
- BasicBlock *SinglePred = Succ->getSinglePredecessor();
- if (!SinglePred) continue; // Nothing to do.
- assert(SinglePred == Pred && "CFG broken");
-
- // Make the LPM and Worklist updates specific to LoopUnswitch.
- removeFromWorklist(BI, Worklist);
- auto SuccIt = Succ->begin();
- while (PHINode *PN = dyn_cast<PHINode>(SuccIt++)) {
- for (unsigned It = 0, E = PN->getNumOperands(); It != E; ++It)
- if (Instruction *Use = dyn_cast<Instruction>(PN->getOperand(It)))
- Worklist.push_back(Use);
- for (User *U : PN->users())
- Worklist.push_back(cast<Instruction>(U));
- removeFromWorklist(PN, Worklist);
- ++NumSimplify;
- }
- // Merge the block and make the remaining analyses updates.
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
- MergeBlockIntoPredecessor(Succ, &DTU, LI, MSSAU.get());
- ++NumSimplify;
- continue;
- }
-
- continue;
- }
- }
-}
-
-/// Simple simplifications we can do given the information that Cond is
-/// definitely not equal to Val.
-Value *LoopUnswitch::simplifyInstructionWithNotEqual(Instruction *Inst,
- Value *Invariant,
- Constant *Val) {
- // icmp eq cond, val -> false
- ICmpInst *CI = dyn_cast<ICmpInst>(Inst);
- if (CI && CI->isEquality()) {
- Value *Op0 = CI->getOperand(0);
- Value *Op1 = CI->getOperand(1);
- if ((Op0 == Invariant && Op1 == Val) || (Op0 == Val && Op1 == Invariant)) {
- LLVMContext &Ctx = Inst->getContext();
- if (CI->getPredicate() == CmpInst::ICMP_EQ)
- return ConstantInt::getFalse(Ctx);
- else
- return ConstantInt::getTrue(Ctx);
- }
- }
-
- // FIXME: there may be other opportunities, e.g. comparison with floating
- // point, or Invariant - Val != 0, etc.
- return nullptr;
-}
diff --git a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
index 2ff1e8480749..c733aa4701ed 100644
--- a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
@@ -70,14 +70,12 @@
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
-#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
diff --git a/llvm/lib/Transforms/Scalar/LowerAtomic.cpp b/llvm/lib/Transforms/Scalar/LowerAtomicPass.cpp
index 4063e4fe0472..6aba913005d0 100644
--- a/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerAtomicPass.cpp
@@ -11,95 +11,17 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar/LowerAtomic.h"
+#include "llvm/Transforms/Scalar/LowerAtomicPass.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/LowerAtomic.h"
using namespace llvm;
#define DEBUG_TYPE "loweratomic"
-static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) {
- IRBuilder<> Builder(CXI);
- Value *Ptr = CXI->getPointerOperand();
- Value *Cmp = CXI->getCompareOperand();
- Value *Val = CXI->getNewValOperand();
-
- LoadInst *Orig = Builder.CreateLoad(Val->getType(), Ptr);
- Value *Equal = Builder.CreateICmpEQ(Orig, Cmp);
- Value *Res = Builder.CreateSelect(Equal, Val, Orig);
- Builder.CreateStore(Res, Ptr);
-
- Res = Builder.CreateInsertValue(UndefValue::get(CXI->getType()), Orig, 0);
- Res = Builder.CreateInsertValue(Res, Equal, 1);
-
- CXI->replaceAllUsesWith(Res);
- CXI->eraseFromParent();
- return true;
-}
-
-bool llvm::lowerAtomicRMWInst(AtomicRMWInst *RMWI) {
- IRBuilder<> Builder(RMWI);
- Value *Ptr = RMWI->getPointerOperand();
- Value *Val = RMWI->getValOperand();
-
- LoadInst *Orig = Builder.CreateLoad(Val->getType(), Ptr);
- Value *Res = nullptr;
-
- switch (RMWI->getOperation()) {
- default: llvm_unreachable("Unexpected RMW operation");
- case AtomicRMWInst::Xchg:
- Res = Val;
- break;
- case AtomicRMWInst::Add:
- Res = Builder.CreateAdd(Orig, Val);
- break;
- case AtomicRMWInst::Sub:
- Res = Builder.CreateSub(Orig, Val);
- break;
- case AtomicRMWInst::And:
- Res = Builder.CreateAnd(Orig, Val);
- break;
- case AtomicRMWInst::Nand:
- Res = Builder.CreateNot(Builder.CreateAnd(Orig, Val));
- break;
- case AtomicRMWInst::Or:
- Res = Builder.CreateOr(Orig, Val);
- break;
- case AtomicRMWInst::Xor:
- Res = Builder.CreateXor(Orig, Val);
- break;
- case AtomicRMWInst::Max:
- Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Val),
- Val, Orig);
- break;
- case AtomicRMWInst::Min:
- Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Val),
- Orig, Val);
- break;
- case AtomicRMWInst::UMax:
- Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Val),
- Val, Orig);
- break;
- case AtomicRMWInst::UMin:
- Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Val),
- Orig, Val);
- break;
- case AtomicRMWInst::FAdd:
- Res = Builder.CreateFAdd(Orig, Val);
- break;
- case AtomicRMWInst::FSub:
- Res = Builder.CreateFSub(Orig, Val);
- break;
- }
- Builder.CreateStore(Res, Ptr);
- RMWI->replaceAllUsesWith(Orig);
- RMWI->eraseFromParent();
- return true;
-}
-
static bool LowerFenceInst(FenceInst *FI) {
FI->eraseFromParent();
return true;
@@ -121,7 +43,7 @@ static bool runOnBasicBlock(BasicBlock &BB) {
if (FenceInst *FI = dyn_cast<FenceInst>(&Inst))
Changed |= LowerFenceInst(FI);
else if (AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(&Inst))
- Changed |= LowerAtomicCmpXchgInst(CXI);
+ Changed |= lowerAtomicCmpXchgInst(CXI);
else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(&Inst))
Changed |= lowerAtomicRMWInst(RMWI);
else if (LoadInst *LI = dyn_cast<LoadInst>(&Inst)) {
diff --git a/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
index 186065db327e..47493b54a527 100644
--- a/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
@@ -26,11 +26,9 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -96,7 +94,7 @@ static bool replaceConditionalBranchesOnConstant(Instruction *II,
return HasDeadBlocks;
}
-static bool lowerConstantIntrinsics(Function &F, const TargetLibraryInfo *TLI,
+static bool lowerConstantIntrinsics(Function &F, const TargetLibraryInfo &TLI,
DominatorTree *DT) {
Optional<DomTreeUpdater> DTU;
if (DT)
@@ -140,21 +138,21 @@ static bool lowerConstantIntrinsics(Function &F, const TargetLibraryInfo *TLI,
IsConstantIntrinsicsHandled++;
break;
case Intrinsic::objectsize:
- NewValue = lowerObjectSizeCall(II, DL, TLI, true);
+ NewValue = lowerObjectSizeCall(II, DL, &TLI, true);
ObjectSizeIntrinsicsHandled++;
break;
}
HasDeadBlocks |= replaceConditionalBranchesOnConstant(
- II, NewValue, DTU.hasValue() ? DTU.getPointer() : nullptr);
+ II, NewValue, DTU ? DTU.getPointer() : nullptr);
}
if (HasDeadBlocks)
- removeUnreachableBlocks(F, DTU.hasValue() ? DTU.getPointer() : nullptr);
+ removeUnreachableBlocks(F, DTU ? DTU.getPointer() : nullptr);
return !Worklist.empty();
}
PreservedAnalyses
LowerConstantIntrinsicsPass::run(Function &F, FunctionAnalysisManager &AM) {
- if (lowerConstantIntrinsics(F, AM.getCachedResult<TargetLibraryAnalysis>(F),
+ if (lowerConstantIntrinsics(F, AM.getResult<TargetLibraryAnalysis>(F),
AM.getCachedResult<DominatorTreeAnalysis>(F))) {
PreservedAnalyses PA;
PA.preserve<DominatorTreeAnalysis>();
@@ -178,8 +176,8 @@ public:
}
bool runOnFunction(Function &F) override {
- auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
- const TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
+ const TargetLibraryInfo &TLI =
+ getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
DominatorTree *DT = nullptr;
if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
DT = &DTWP->getDomTree();
@@ -187,6 +185,7 @@ public:
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
}
@@ -196,6 +195,7 @@ public:
char LowerConstantIntrinsics::ID = 0;
INITIALIZE_PASS_BEGIN(LowerConstantIntrinsics, "lower-constant-intrinsics",
"Lower constant intrinsics", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(LowerConstantIntrinsics, "lower-constant-intrinsics",
"Lower constant intrinsics", false, false)
diff --git a/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
index a7eb60b5e032..88fad9896c59 100644
--- a/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
@@ -21,12 +21,11 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
-#include "llvm/IR/Metadata.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/MisExpect.h"
using namespace llvm;
@@ -101,6 +100,8 @@ static bool handleSwitchExpect(SwitchInst &SI) {
uint64_t Index = (Case == *SI.case_default()) ? 0 : Case.getCaseIndex() + 1;
Weights[Index] = LikelyBranchWeightVal;
+ misexpect::checkExpectAnnotations(SI, Weights, /*IsFrontend=*/true);
+
SI.setCondition(ArgValue);
SI.setMetadata(LLVMContext::MD_prof,
@@ -315,13 +316,16 @@ template <class BrSelInst> static bool handleBrSelExpect(BrSelInst &BSI) {
std::tie(LikelyBranchWeightVal, UnlikelyBranchWeightVal) =
getBranchWeight(Fn->getIntrinsicID(), CI, 2);
+ SmallVector<uint32_t, 4> ExpectedWeights;
if ((ExpectedValue->getZExtValue() == ValueComparedTo) ==
(Predicate == CmpInst::ICMP_EQ)) {
Node =
MDB.createBranchWeights(LikelyBranchWeightVal, UnlikelyBranchWeightVal);
+ ExpectedWeights = {LikelyBranchWeightVal, UnlikelyBranchWeightVal};
} else {
Node =
MDB.createBranchWeights(UnlikelyBranchWeightVal, LikelyBranchWeightVal);
+ ExpectedWeights = {UnlikelyBranchWeightVal, LikelyBranchWeightVal};
}
if (CmpI)
@@ -329,6 +333,8 @@ template <class BrSelInst> static bool handleBrSelExpect(BrSelInst &BSI) {
else
BSI.setCondition(ArgValue);
+ misexpect::checkFrontendInstrumentation(BSI, ExpectedWeights);
+
BSI.setMetadata(LLVMContext::MD_prof, Node);
return true;
@@ -409,7 +415,7 @@ public:
bool runOnFunction(Function &F) override { return lowerExpectIntrinsic(F); }
};
-}
+} // namespace
char LowerExpectIntrinsic::ID = 0;
INITIALIZE_PASS(LowerExpectIntrinsic, "lower-expect",
diff --git a/llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp b/llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp
index 45f5929e3b90..8dc037b10cc8 100644
--- a/llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp
@@ -15,7 +15,6 @@
#include "llvm/Transforms/Scalar/LowerGuardIntrinsic.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/GuardUtils.h"
-#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
@@ -49,9 +48,13 @@ static bool lowerGuardIntrinsic(Function &F) {
return false;
SmallVector<CallInst *, 8> ToLower;
- for (auto &I : instructions(F))
- if (isGuard(&I))
- ToLower.push_back(cast<CallInst>(&I));
+ // Traverse through the users of GuardDecl.
+ // This is presumably cheaper than traversing all instructions in the
+ // function.
+ for (auto *U : GuardDecl->users())
+ if (auto *CI = dyn_cast<CallInst>(U))
+ if (CI->getFunction() == &F)
+ ToLower.push_back(CI);
if (ToLower.empty())
return false;
diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index 296becb31e8f..c05906649f16 100644
--- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -18,11 +18,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
-#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -704,10 +704,10 @@ public:
// We may remove II. By default continue on the next/prev instruction.
++II;
// If we were to erase II, move again.
- auto EraseFromParent = [&II](Value *V) {
+ auto EraseFromParent = [&II, &BB](Value *V) {
auto *Inst = cast<Instruction>(V);
if (Inst->use_empty()) {
- if (Inst == &*II) {
+ if (II != BB.rend() && Inst == &*II) {
++II;
}
Inst->eraseFromParent();
@@ -718,7 +718,7 @@ public:
Instruction *NewInst = nullptr;
IRBuilder<> IB(&I);
- MatrixBuilder<IRBuilder<>> Builder(IB);
+ MatrixBuilder Builder(IB);
Value *TA, *TAMA, *TAMB;
ConstantInt *R, *K, *C;
@@ -766,28 +766,25 @@ public:
// If we have a TT matmul, lift the transpose. We may be able to fold into
// consuming multiply.
for (BasicBlock &BB : Func) {
- for (BasicBlock::iterator II = BB.begin(); II != BB.end();) {
- Instruction *I = &*II;
- // We may remove I.
- ++II;
+ for (Instruction &I : llvm::make_early_inc_range(BB)) {
Value *A, *B, *AT, *BT;
ConstantInt *R, *K, *C;
// A^t * B ^t -> (B * A)^t
- if (match(&*I, m_Intrinsic<Intrinsic::matrix_multiply>(
- m_Value(A), m_Value(B), m_ConstantInt(R),
- m_ConstantInt(K), m_ConstantInt(C))) &&
+ if (match(&I, m_Intrinsic<Intrinsic::matrix_multiply>(
+ m_Value(A), m_Value(B), m_ConstantInt(R),
+ m_ConstantInt(K), m_ConstantInt(C))) &&
match(A, m_Intrinsic<Intrinsic::matrix_transpose>(m_Value(AT))) &&
match(B, m_Intrinsic<Intrinsic::matrix_transpose>(m_Value((BT))))) {
- IRBuilder<> IB(&*I);
- MatrixBuilder<IRBuilder<>> Builder(IB);
+ IRBuilder<> IB(&I);
+ MatrixBuilder Builder(IB);
Value *M = Builder.CreateMatrixMultiply(
BT, AT, C->getZExtValue(), K->getZExtValue(), R->getZExtValue());
setShapeInfo(M, {C, R});
Instruction *NewInst = Builder.CreateMatrixTranspose(
M, C->getZExtValue(), R->getZExtValue());
- ReplaceAllUsesWith(*I, NewInst);
- if (I->use_empty())
- I->eraseFromParent();
+ ReplaceAllUsesWith(I, NewInst);
+ if (I.use_empty())
+ I.eraseFromParent();
if (A->use_empty())
cast<Instruction>(A)->eraseFromParent();
if (A != B && B->use_empty())
@@ -891,27 +888,27 @@ public:
// having to update as many def-use and use-def chains.
//
// Because we add to ToRemove during fusion we can't guarantee that defs
- // are before uses. Change uses to undef temporarily as these should get
+ // are before uses. Change uses to poison temporarily as these should get
// removed as well.
//
- // For verification, we keep track of where we changed uses to undefs in
- // UndefedInsts and then check that we in fact remove them.
- SmallSet<Instruction *, 16> UndefedInsts;
+ // For verification, we keep track of where we changed uses to poison in
+ // PoisonedInsts and then check that we in fact remove them.
+ SmallSet<Instruction *, 16> PoisonedInsts;
for (auto *Inst : reverse(ToRemove)) {
for (Use &U : llvm::make_early_inc_range(Inst->uses())) {
- if (auto *Undefed = dyn_cast<Instruction>(U.getUser()))
- UndefedInsts.insert(Undefed);
- U.set(UndefValue::get(Inst->getType()));
+ if (auto *Poisoned = dyn_cast<Instruction>(U.getUser()))
+ PoisonedInsts.insert(Poisoned);
+ U.set(PoisonValue::get(Inst->getType()));
}
Inst->eraseFromParent();
- UndefedInsts.erase(Inst);
+ PoisonedInsts.erase(Inst);
}
- if (!UndefedInsts.empty()) {
- // If we didn't remove all undefed instructions, it's a hard error.
- dbgs() << "Undefed but present instructions:\n";
- for (auto *I : UndefedInsts)
+ if (!PoisonedInsts.empty()) {
+ // If we didn't remove all poisoned instructions, it's a hard error.
+ dbgs() << "Poisoned but present instructions:\n";
+ for (auto *I : PoisonedInsts)
dbgs() << *I << "\n";
- llvm_unreachable("Undefed but instruction not removed");
+ llvm_unreachable("Poisoned but instruction not removed");
}
return Changed;
@@ -1670,7 +1667,7 @@ public:
for (unsigned I = 0; I < NewNumVecs; ++I) {
// Build a single result vector. First initialize it.
- Value *ResultVector = UndefValue::get(
+ Value *ResultVector = PoisonValue::get(
FixedVectorType::get(VectorTy->getElementType(), NewNumElts));
// Go through the old elements and insert it into the resulting vector.
for (auto J : enumerate(InputMatrix.vectors())) {
diff --git a/llvm/lib/Transforms/Scalar/LowerWidenableCondition.cpp b/llvm/lib/Transforms/Scalar/LowerWidenableCondition.cpp
index 73b2cd06fa23..e2de322933bc 100644
--- a/llvm/lib/Transforms/Scalar/LowerWidenableCondition.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerWidenableCondition.cpp
@@ -13,8 +13,6 @@
#include "llvm/Transforms/Scalar/LowerWidenableCondition.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/GuardUtils.h"
-#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
@@ -24,7 +22,6 @@
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/GuardUtils.h"
using namespace llvm;
@@ -50,9 +47,13 @@ static bool lowerWidenableCondition(Function &F) {
using namespace llvm::PatternMatch;
SmallVector<CallInst *, 8> ToLower;
- for (auto &I : instructions(F))
- if (match(&I, m_Intrinsic<Intrinsic::experimental_widenable_condition>()))
- ToLower.push_back(cast<CallInst>(&I));
+ // Traverse through the users of WCDecl.
+ // This is presumably cheaper than traversing all instructions in the
+ // function.
+ for (auto *U : WCDecl->users())
+ if (auto *CI = dyn_cast<CallInst>(U))
+ if (CI->getFunction() == &F)
+ ToLower.push_back(CI);
if (ToLower.empty())
return false;
diff --git a/llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp b/llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp
index 5ffae128f5f0..a3f09a5a33c3 100644
--- a/llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp
+++ b/llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp
@@ -33,13 +33,11 @@
#include "llvm/Transforms/Scalar/MakeGuardsExplicit.h"
#include "llvm/Analysis/GuardUtils.h"
-#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/GuardUtils.h"
using namespace llvm;
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 6698db26626b..1f5bc69acecd 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -28,14 +28,12 @@
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
@@ -45,7 +43,6 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/Operator.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
@@ -61,15 +58,13 @@
#include <algorithm>
#include <cassert>
#include <cstdint>
-#include <utility>
using namespace llvm;
#define DEBUG_TYPE "memcpyopt"
static cl::opt<bool> EnableMemCpyOptWithoutLibcalls(
- "enable-memcpyopt-without-libcalls", cl::init(false), cl::Hidden,
- cl::ZeroOrMore,
+ "enable-memcpyopt-without-libcalls", cl::Hidden,
cl::desc("Enable memcpyopt even when libcalls are disabled"));
STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
@@ -100,7 +95,7 @@ struct MemsetRange {
Value *StartPtr;
/// Alignment - The known alignment of the first store.
- unsigned Alignment;
+ MaybeAlign Alignment;
/// TheStores - The actual stores that make up this range.
SmallVector<Instruction*, 16> TheStores;
@@ -182,16 +177,16 @@ public:
TypeSize StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
assert(!StoreSize.isScalable() && "Can't track scalable-typed stores");
addRange(OffsetFromFirst, StoreSize.getFixedSize(), SI->getPointerOperand(),
- SI->getAlign().value(), SI);
+ SI->getAlign(), SI);
}
void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
- addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlignment(), MSI);
+ addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlign(), MSI);
}
- void addRange(int64_t Start, int64_t Size, Value *Ptr,
- unsigned Alignment, Instruction *Inst);
+ void addRange(int64_t Start, int64_t Size, Value *Ptr, MaybeAlign Alignment,
+ Instruction *Inst);
};
} // end anonymous namespace
@@ -200,7 +195,7 @@ public:
/// new range for the specified store at the specified offset, merging into
/// existing ranges as appropriate.
void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
- unsigned Alignment, Instruction *Inst) {
+ MaybeAlign Alignment, Instruction *Inst) {
int64_t End = Start+Size;
range_iterator I = partition_point(
@@ -352,9 +347,25 @@ static bool accessedBetween(AliasAnalysis &AA, MemoryLocation Loc,
// Check for mod of Loc between Start and End, excluding both boundaries.
// Start and End can be in different blocks.
-static bool writtenBetween(MemorySSA *MSSA, MemoryLocation Loc,
- const MemoryUseOrDef *Start,
+static bool writtenBetween(MemorySSA *MSSA, AliasAnalysis &AA,
+ MemoryLocation Loc, const MemoryUseOrDef *Start,
const MemoryUseOrDef *End) {
+ if (isa<MemoryUse>(End)) {
+ // For MemoryUses, getClobberingMemoryAccess may skip non-clobbering writes.
+ // Manually check read accesses between Start and End, if they are in the
+ // same block, for clobbers. Otherwise assume Loc is clobbered.
+ return Start->getBlock() != End->getBlock() ||
+ any_of(
+ make_range(std::next(Start->getIterator()), End->getIterator()),
+ [&AA, Loc](const MemoryAccess &Acc) {
+ if (isa<MemoryUse>(&Acc))
+ return false;
+ Instruction *AccInst =
+ cast<MemoryUseOrDef>(&Acc)->getMemoryInst();
+ return isModSet(AA.getModRefInfo(AccInst, Loc));
+ });
+ }
+
// TODO: Only walk until we hit Start.
MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
End->getDefiningAccess(), Loc);
@@ -492,7 +503,7 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
StartPtr = Range.StartPtr;
AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End - Range.Start,
- MaybeAlign(Range.Alignment));
+ Range.Alignment);
LLVM_DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SI
: Range.TheStores) dbgs()
<< *SI << '\n';
@@ -749,36 +760,25 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// Detect cases where we're performing call slot forwarding, but
// happen to be using a load-store pair to implement it, rather than
// a memcpy.
- CallInst *C = nullptr;
- if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
- MSSA->getWalker()->getClobberingMemoryAccess(LI))) {
- // The load most post-dom the call. Limit to the same block for now.
- // TODO: Support non-local call-slot optimization?
- if (LoadClobber->getBlock() == SI->getParent())
- C = dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
- }
-
- if (C) {
- // Check that nothing touches the dest of the "copy" between
- // the call and the store.
- MemoryLocation StoreLoc = MemoryLocation::get(SI);
- if (accessedBetween(*AA, StoreLoc, MSSA->getMemoryAccess(C),
- MSSA->getMemoryAccess(SI)))
- C = nullptr;
- }
-
- if (C) {
- bool changed = performCallSlotOptzn(
- LI, SI, SI->getPointerOperand()->stripPointerCasts(),
- LI->getPointerOperand()->stripPointerCasts(),
- DL.getTypeStoreSize(SI->getOperand(0)->getType()),
- commonAlignment(SI->getAlign(), LI->getAlign()), C);
- if (changed) {
- eraseInstruction(SI);
- eraseInstruction(LI);
- ++NumMemCpyInstr;
- return true;
- }
+ auto GetCall = [&]() -> CallInst * {
+ // We defer this expensive clobber walk until the cheap checks
+ // have been done on the source inside performCallSlotOptzn.
+ if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
+ MSSA->getWalker()->getClobberingMemoryAccess(LI)))
+ return dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
+ return nullptr;
+ };
+
+ bool changed = performCallSlotOptzn(
+ LI, SI, SI->getPointerOperand()->stripPointerCasts(),
+ LI->getPointerOperand()->stripPointerCasts(),
+ DL.getTypeStoreSize(SI->getOperand(0)->getType()),
+ std::min(SI->getAlign(), LI->getAlign()), GetCall);
+ if (changed) {
+ eraseInstruction(SI);
+ eraseInstruction(LI);
+ ++NumMemCpyInstr;
+ return true;
}
}
}
@@ -853,7 +853,8 @@ bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
Instruction *cpyStore, Value *cpyDest,
Value *cpySrc, TypeSize cpySize,
- Align cpyAlign, CallInst *C) {
+ Align cpyAlign,
+ std::function<CallInst *()> GetC) {
// The general transformation to keep in mind is
//
// call @func(..., src, ...)
@@ -872,11 +873,6 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
if (cpySize.isScalable())
return false;
- // Lifetime marks shouldn't be operated on.
- if (Function *F = C->getCalledFunction())
- if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start)
- return false;
-
// Require that src be an alloca. This simplifies the reasoning considerably.
auto *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
if (!srcAlloca)
@@ -893,6 +889,33 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
if (cpySize < srcSize)
return false;
+ CallInst *C = GetC();
+ if (!C)
+ return false;
+
+ // Lifetime marks shouldn't be operated on.
+ if (Function *F = C->getCalledFunction())
+ if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start)
+ return false;
+
+
+ if (C->getParent() != cpyStore->getParent()) {
+ LLVM_DEBUG(dbgs() << "Call Slot: block local restriction\n");
+ return false;
+ }
+
+ MemoryLocation DestLoc = isa<StoreInst>(cpyStore) ?
+ MemoryLocation::get(cpyStore) :
+ MemoryLocation::getForDest(cast<MemCpyInst>(cpyStore));
+
+ // Check that nothing touches the dest of the copy between
+ // the call and the store/memcpy.
+ if (accessedBetween(*AA, DestLoc, MSSA->getMemoryAccess(C),
+ MSSA->getMemoryAccess(cpyStore))) {
+ LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer modified after call\n");
+ return false;
+ }
+
// Check that accessing the first srcSize bytes of dest will not cause a
// trap. Otherwise the transform is invalid since it might cause a trap
// to occur earlier than it otherwise would.
@@ -902,6 +925,7 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
return false;
}
+
// Make sure that nothing can observe cpyDest being written early. There are
// a number of cases to consider:
// 1. cpyDest cannot be accessed between C and cpyStore as a precondition of
@@ -1118,7 +1142,7 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
// then we could still perform the xform by moving M up to the first memcpy.
// TODO: It would be sufficient to check the MDep source up to the memcpy
// size of M, rather than MDep.
- if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep),
+ if (writtenBetween(MSSA, *AA, MemoryLocation::getForSource(MDep),
MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M)))
return false;
@@ -1215,14 +1239,14 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
}
// By default, create an unaligned memset.
- unsigned Align = 1;
+ Align Alignment = Align(1);
// If Dest is aligned, and SrcSize is constant, use the minimum alignment
// of the sum.
- const unsigned DestAlign =
- std::max(MemSet->getDestAlignment(), MemCpy->getDestAlignment());
+ const Align DestAlign = std::max(MemSet->getDestAlign().valueOrOne(),
+ MemCpy->getDestAlign().valueOrOne());
if (DestAlign > 1)
if (auto *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
- Align = MinAlign(SrcSizeC->getZExtValue(), DestAlign);
+ Alignment = commonAlignment(DestAlign, SrcSizeC->getZExtValue());
IRBuilder<> Builder(MemCpy);
@@ -1241,11 +1265,11 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff);
unsigned DestAS = Dest->getType()->getPointerAddressSpace();
Instruction *NewMemSet = Builder.CreateMemSet(
- Builder.CreateGEP(Builder.getInt8Ty(),
- Builder.CreatePointerCast(Dest,
- Builder.getInt8PtrTy(DestAS)),
- SrcSize),
- MemSet->getOperand(1), MemsetLen, MaybeAlign(Align));
+ Builder.CreateGEP(
+ Builder.getInt8Ty(),
+ Builder.CreatePointerCast(Dest, Builder.getInt8PtrTy(DestAS)),
+ SrcSize),
+ MemSet->getOperand(1), MemsetLen, Alignment);
assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) &&
"MemCpy must be a MemoryDef");
@@ -1402,7 +1426,8 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
}
MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
- MemoryAccess *AnyClobber = MSSA->getWalker()->getClobberingMemoryAccess(MA);
+ // FIXME: Not using getClobberingMemoryAccess() here due to PR54682.
+ MemoryAccess *AnyClobber = MA->getDefiningAccess();
MemoryLocation DestLoc = MemoryLocation::getForDest(M);
const MemoryAccess *DestClobber =
MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc);
@@ -1431,28 +1456,20 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
if (Instruction *MI = MD->getMemoryInst()) {
if (auto *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
if (auto *C = dyn_cast<CallInst>(MI)) {
- // The memcpy must post-dom the call. Limit to the same block for
- // now. Additionally, we need to ensure that there are no accesses
- // to dest between the call and the memcpy. Accesses to src will be
- // checked by performCallSlotOptzn().
- // TODO: Support non-local call-slot optimization?
- if (C->getParent() == M->getParent() &&
- !accessedBetween(*AA, DestLoc, MD, MA)) {
- // FIXME: Can we pass in either of dest/src alignment here instead
- // of conservatively taking the minimum?
- Align Alignment = std::min(M->getDestAlign().valueOrOne(),
- M->getSourceAlign().valueOrOne());
- if (performCallSlotOptzn(
- M, M, M->getDest(), M->getSource(),
- TypeSize::getFixed(CopySize->getZExtValue()), Alignment,
- C)) {
- LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
- << " call: " << *C << "\n"
- << " memcpy: " << *M << "\n");
- eraseInstruction(M);
- ++NumMemCpyInstr;
- return true;
- }
+ // FIXME: Can we pass in either of dest/src alignment here instead
+ // of conservatively taking the minimum?
+ Align Alignment = std::min(M->getDestAlign().valueOrOne(),
+ M->getSourceAlign().valueOrOne());
+ if (performCallSlotOptzn(
+ M, M, M->getDest(), M->getSource(),
+ TypeSize::getFixed(CopySize->getZExtValue()), Alignment,
+ [C]() -> CallInst * { return C; })) {
+ LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
+ << " call: " << *C << "\n"
+ << " memcpy: " << *M << "\n");
+ eraseInstruction(M);
+ ++NumMemCpyInstr;
+ return true;
}
}
}
@@ -1557,7 +1574,7 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
// *b = 42;
// foo(*a)
// It would be invalid to transform the second memcpy into foo(*b).
- if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep),
+ if (writtenBetween(MSSA, *AA, MemoryLocation::getForSource(MDep),
MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB)))
return false;
diff --git a/llvm/lib/Transforms/Scalar/MergeICmps.cpp b/llvm/lib/Transforms/Scalar/MergeICmps.cpp
index aac0deea5be3..ce01ae5b2692 100644
--- a/llvm/lib/Transforms/Scalar/MergeICmps.cpp
+++ b/llvm/lib/Transforms/Scalar/MergeICmps.cpp
@@ -144,31 +144,33 @@ BCEAtom visitICmpLoadOperand(Value *const Val, BaseIdentifier &BaseId) {
LLVM_DEBUG(dbgs() << "volatile or atomic\n");
return {};
}
- Value *const Addr = LoadI->getOperand(0);
+ Value *Addr = LoadI->getOperand(0);
if (Addr->getType()->getPointerAddressSpace() != 0) {
LLVM_DEBUG(dbgs() << "from non-zero AddressSpace\n");
return {};
}
- auto *const GEP = dyn_cast<GetElementPtrInst>(Addr);
- if (!GEP)
- return {};
- LLVM_DEBUG(dbgs() << "GEP\n");
- if (GEP->isUsedOutsideOfBlock(LoadI->getParent())) {
- LLVM_DEBUG(dbgs() << "used outside of block\n");
- return {};
- }
- const auto &DL = GEP->getModule()->getDataLayout();
- if (!isDereferenceablePointer(GEP, LoadI->getType(), DL)) {
+ const auto &DL = LoadI->getModule()->getDataLayout();
+ if (!isDereferenceablePointer(Addr, LoadI->getType(), DL)) {
LLVM_DEBUG(dbgs() << "not dereferenceable\n");
// We need to make sure that we can do comparison in any order, so we
// require memory to be unconditionnally dereferencable.
return {};
}
- APInt Offset = APInt(DL.getPointerTypeSizeInBits(GEP->getType()), 0);
- if (!GEP->accumulateConstantOffset(DL, Offset))
- return {};
- return BCEAtom(GEP, LoadI, BaseId.getBaseId(GEP->getPointerOperand()),
- Offset);
+
+ APInt Offset = APInt(DL.getPointerTypeSizeInBits(Addr->getType()), 0);
+ Value *Base = Addr;
+ auto *GEP = dyn_cast<GetElementPtrInst>(Addr);
+ if (GEP) {
+ LLVM_DEBUG(dbgs() << "GEP\n");
+ if (GEP->isUsedOutsideOfBlock(LoadI->getParent())) {
+ LLVM_DEBUG(dbgs() << "used outside of block\n");
+ return {};
+ }
+ if (!GEP->accumulateConstantOffset(DL, Offset))
+ return {};
+ Base = GEP->getPointerOperand();
+ }
+ return BCEAtom(GEP, LoadI, BaseId.getBaseId(Base), Offset);
}
// A comparison between two BCE atoms, e.g. `a == o.a` in the example at the
@@ -244,7 +246,7 @@ bool BCECmpBlock::canSinkBCECmpInst(const Instruction *Inst,
auto MayClobber = [&](LoadInst *LI) {
// If a potentially clobbering instruction comes before the load,
// we can still safely sink the load.
- return !Inst->comesBefore(LI) &&
+ return (Inst->getParent() != LI->getParent() || !Inst->comesBefore(LI)) &&
isModSet(AA.getModRefInfo(Inst, MemoryLocation::get(LI)));
};
if (MayClobber(Cmp.Lhs.LoadI) || MayClobber(Cmp.Rhs.LoadI))
@@ -270,9 +272,8 @@ void BCECmpBlock::split(BasicBlock *NewParent, AliasAnalysis &AA) const {
}
// Do the actual spliting.
- for (Instruction *Inst : reverse(OtherInsts)) {
- Inst->moveBefore(&*NewParent->begin());
- }
+ for (Instruction *Inst : reverse(OtherInsts))
+ Inst->moveBefore(*NewParent, NewParent->begin());
}
bool BCECmpBlock::canSplit(AliasAnalysis &AA) const {
@@ -368,8 +369,11 @@ Optional<BCECmpBlock> visitCmpBlock(Value *const Val, BasicBlock *const Block,
return None;
BCECmpBlock::InstructionSet BlockInsts(
- {Result->Lhs.GEP, Result->Rhs.GEP, Result->Lhs.LoadI, Result->Rhs.LoadI,
- Result->CmpI, BranchI});
+ {Result->Lhs.LoadI, Result->Rhs.LoadI, Result->CmpI, BranchI});
+ if (Result->Lhs.GEP)
+ BlockInsts.insert(Result->Lhs.GEP);
+ if (Result->Rhs.GEP)
+ BlockInsts.insert(Result->Rhs.GEP);
return BCECmpBlock(std::move(*Result), Block, BlockInsts);
}
@@ -604,8 +608,15 @@ static BasicBlock *mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
NextCmpBlock->getParent(), InsertBefore);
IRBuilder<> Builder(BB);
// Add the GEPs from the first BCECmpBlock.
- Value *const Lhs = Builder.Insert(FirstCmp.Lhs().GEP->clone());
- Value *const Rhs = Builder.Insert(FirstCmp.Rhs().GEP->clone());
+ Value *Lhs, *Rhs;
+ if (FirstCmp.Lhs().GEP)
+ Lhs = Builder.Insert(FirstCmp.Lhs().GEP->clone());
+ else
+ Lhs = FirstCmp.Lhs().LoadI->getPointerOperand();
+ if (FirstCmp.Rhs().GEP)
+ Rhs = Builder.Insert(FirstCmp.Rhs().GEP->clone());
+ else
+ Rhs = FirstCmp.Rhs().LoadI->getPointerOperand();
Value *IsEqual = nullptr;
LLVM_DEBUG(dbgs() << "Merging " << Comparisons.size() << " comparisons -> "
diff --git a/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
index 734532a6670c..6383d6ea838b 100644
--- a/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ b/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -76,13 +76,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/Analysis/Loads.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp
index f35c9212a6f9..876ef3c427a6 100644
--- a/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -88,8 +88,6 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
@@ -1076,6 +1074,9 @@ const Expression *NewGVN::createBinaryExpression(unsigned Opcode, Type *T,
Value *Arg1, Value *Arg2,
Instruction *I) const {
auto *E = new (ExpressionAllocator) BasicExpression(2);
+ // TODO: we need to remove context instruction after Value Tracking
+ // can run without context instruction
+ const SimplifyQuery Q = SQ.getWithInstruction(I);
E->setType(T);
E->setOpcode(Opcode);
@@ -1091,7 +1092,7 @@ const Expression *NewGVN::createBinaryExpression(unsigned Opcode, Type *T,
E->op_push_back(lookupOperandLeader(Arg1));
E->op_push_back(lookupOperandLeader(Arg2));
- Value *V = SimplifyBinOp(Opcode, E->getOperand(0), E->getOperand(1), SQ);
+ Value *V = simplifyBinOp(Opcode, E->getOperand(0), E->getOperand(1), Q);
if (auto Simplified = checkExprResults(E, I, V)) {
addAdditionalUsers(Simplified, I);
return Simplified.Expr;
@@ -1147,6 +1148,9 @@ NewGVN::ExprResult NewGVN::checkExprResults(Expression *E, Instruction *I,
NewGVN::ExprResult NewGVN::createExpression(Instruction *I) const {
auto *E = new (ExpressionAllocator) BasicExpression(I->getNumOperands());
+ // TODO: we need to remove context instruction after Value Tracking
+ // can run without context instruction
+ const SimplifyQuery Q = SQ.getWithInstruction(I);
bool AllConstant = setBasicExpressionInfo(I, E);
@@ -1169,13 +1173,13 @@ NewGVN::ExprResult NewGVN::createExpression(Instruction *I) const {
Predicate = CmpInst::getSwappedPredicate(Predicate);
}
E->setOpcode((CI->getOpcode() << 8) | Predicate);
- // TODO: 25% of our time is spent in SimplifyCmpInst with pointer operands
+ // TODO: 25% of our time is spent in simplifyCmpInst with pointer operands
assert(I->getOperand(0)->getType() == I->getOperand(1)->getType() &&
"Wrong types on cmp instruction");
assert((E->getOperand(0)->getType() == I->getOperand(0)->getType() &&
E->getOperand(1)->getType() == I->getOperand(1)->getType()));
Value *V =
- SimplifyCmpInst(Predicate, E->getOperand(0), E->getOperand(1), SQ);
+ simplifyCmpInst(Predicate, E->getOperand(0), E->getOperand(1), Q);
if (auto Simplified = checkExprResults(E, I, V))
return Simplified;
} else if (isa<SelectInst>(I)) {
@@ -1183,26 +1187,26 @@ NewGVN::ExprResult NewGVN::createExpression(Instruction *I) const {
E->getOperand(1) == E->getOperand(2)) {
assert(E->getOperand(1)->getType() == I->getOperand(1)->getType() &&
E->getOperand(2)->getType() == I->getOperand(2)->getType());
- Value *V = SimplifySelectInst(E->getOperand(0), E->getOperand(1),
- E->getOperand(2), SQ);
+ Value *V = simplifySelectInst(E->getOperand(0), E->getOperand(1),
+ E->getOperand(2), Q);
if (auto Simplified = checkExprResults(E, I, V))
return Simplified;
}
} else if (I->isBinaryOp()) {
Value *V =
- SimplifyBinOp(E->getOpcode(), E->getOperand(0), E->getOperand(1), SQ);
+ simplifyBinOp(E->getOpcode(), E->getOperand(0), E->getOperand(1), Q);
if (auto Simplified = checkExprResults(E, I, V))
return Simplified;
} else if (auto *CI = dyn_cast<CastInst>(I)) {
Value *V =
- SimplifyCastInst(CI->getOpcode(), E->getOperand(0), CI->getType(), SQ);
+ simplifyCastInst(CI->getOpcode(), E->getOperand(0), CI->getType(), Q);
if (auto Simplified = checkExprResults(E, I, V))
return Simplified;
} else if (auto *GEPI = dyn_cast<GetElementPtrInst>(I)) {
Value *V =
- SimplifyGEPInst(GEPI->getSourceElementType(), *E->op_begin(),
+ simplifyGEPInst(GEPI->getSourceElementType(), *E->op_begin(),
makeArrayRef(std::next(E->op_begin()), E->op_end()),
- GEPI->isInBounds(), SQ);
+ GEPI->isInBounds(), Q);
if (auto Simplified = checkExprResults(E, I, V))
return Simplified;
} else if (AllConstant) {
@@ -1453,10 +1457,12 @@ NewGVN::performSymbolicLoadCoercion(Type *LoadType, Value *LoadPtr,
if (Offset >= 0) {
if (auto *C = dyn_cast<Constant>(
lookupOperandLeader(DepSI->getValueOperand()))) {
- LLVM_DEBUG(dbgs() << "Coercing load from store " << *DepSI
- << " to constant " << *C << "\n");
- return createConstantExpression(
- getConstantStoreValueForLoad(C, Offset, LoadType, DL));
+ if (Constant *Res =
+ getConstantStoreValueForLoad(C, Offset, LoadType, DL)) {
+ LLVM_DEBUG(dbgs() << "Coercing load from store " << *DepSI
+ << " to constant " << *Res << "\n");
+ return createConstantExpression(Res);
+ }
}
}
} else if (auto *DepLI = dyn_cast<LoadInst>(DepInst)) {
@@ -1503,9 +1509,8 @@ NewGVN::performSymbolicLoadCoercion(Type *LoadType, Value *LoadPtr,
else if (auto *II = dyn_cast<IntrinsicInst>(DepInst)) {
if (II->getIntrinsicID() == Intrinsic::lifetime_start)
return createConstantExpression(UndefValue::get(LoadType));
- } else if (isAllocationFn(DepInst, TLI))
- if (auto *InitVal = getInitialValueOfAllocation(cast<CallBase>(DepInst),
- TLI, LoadType))
+ } else if (auto *InitVal =
+ getInitialValueOfAllocation(DepInst, TLI, LoadType))
return createConstantExpression(InitVal);
return nullptr;
@@ -3142,9 +3147,8 @@ bool NewGVN::singleReachablePHIPath(
// connected component finding in this routine, and it's probably not worth
// the complexity for the time being. So, we just keep a set of visited
// MemoryAccess and return true when we hit a cycle.
- if (Visited.count(First))
+ if (!Visited.insert(First).second)
return true;
- Visited.insert(First);
const auto *EndDef = First;
for (auto *ChainDef : optimized_def_chain(First)) {
@@ -3353,7 +3357,7 @@ void NewGVN::verifyStoreExpressions() const {
// instruction set, propagating value numbers, marking things touched, etc,
// until the set of touched instructions is completely empty.
void NewGVN::iterateTouchedInstructions() {
- unsigned int Iterations = 0;
+ uint64_t Iterations = 0;
// Figure out where touchedinstructions starts
int FirstInstr = TouchedInstructions.find_first();
// Nothing set, nothing to iterate, just return.
diff --git a/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
index e0d0301c1ef6..689a2a286cb9 100644
--- a/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
+++ b/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -125,6 +125,9 @@ static bool runPartiallyInlineLibCalls(Function &F, TargetLibraryInfo *TLI,
if (Call->isNoBuiltin() || Call->isStrictFP())
continue;
+ if (Call->isMustTailCall())
+ continue;
+
// Skip if function either has local linkage or is not a known library
// function.
LibFunc LF;
@@ -137,7 +140,7 @@ static bool runPartiallyInlineLibCalls(Function &F, TargetLibraryInfo *TLI,
case LibFunc_sqrt:
if (TTI->haveFastSqrt(Call->getType()) &&
optimizeSQRT(Call, CalledFunc, *CurrBB, BB, TTI,
- DTU.hasValue() ? DTU.getPointer() : nullptr))
+ DTU ? DTU.getPointer() : nullptr))
break;
continue;
default:
diff --git a/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp b/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
index a110f7d5c241..e1cc3fc71c3e 100644
--- a/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
+++ b/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
@@ -53,9 +53,9 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LegacyPassManager.h"
@@ -65,6 +65,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
#define DEBUG_TYPE "safepoint-placement"
diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp
index c354fa177a60..da1737979305 100644
--- a/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -24,7 +24,6 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -42,7 +41,6 @@
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
@@ -54,7 +52,6 @@
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -183,7 +180,7 @@ void ReassociatePass::BuildRankMap(Function &F,
// we cannot move. This ensures that the ranks for these instructions are
// all different in the block.
for (Instruction &I : *BB)
- if (mayBeMemoryDependent(I))
+ if (mayHaveNonDefUseDependency(I))
ValueRankMap[&I] = ++BBRank;
}
}
@@ -1076,7 +1073,7 @@ static BinaryOperator *ConvertShiftToMul(Instruction *Shl) {
BinaryOperator *Mul =
BinaryOperator::CreateMul(Shl->getOperand(0), MulCst, "", Shl);
- Shl->setOperand(0, UndefValue::get(Shl->getType())); // Drop use of op.
+ Shl->setOperand(0, PoisonValue::get(Shl->getType())); // Drop use of op.
Mul->takeName(Shl);
// Everyone now refers to the mul instruction.
diff --git a/llvm/lib/Transforms/Scalar/Reg2Mem.cpp b/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
index a49b9ad3f62b..9dc64493a9ee 100644
--- a/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -24,8 +24,6 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index b795ad3899bc..51e4a5773f3e 100644
--- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -258,6 +258,7 @@ struct GCPtrLivenessData {
// base relation will remain. Internally, we add a mixture of the two
// types, then update all the second type to the first type
using DefiningValueMapTy = MapVector<Value *, Value *>;
+using IsKnownBaseMapTy = MapVector<Value *, bool>;
using PointerToBaseTy = MapVector<Value *, Value *>;
using StatepointLiveSetTy = SetVector<Value *>;
using RematerializedValueMapTy =
@@ -281,19 +282,29 @@ struct PartiallyConstructedSafepointRecord {
RematerializedValueMapTy RematerializedValues;
};
+struct RematerizlizationCandidateRecord {
+ // Chain from derived pointer to base.
+ SmallVector<Instruction *, 3> ChainToBase;
+ // Original base.
+ Value *RootOfChain;
+ // Cost of chain.
+ InstructionCost Cost;
+};
+using RematCandTy = MapVector<Value *, RematerizlizationCandidateRecord>;
+
} // end anonymous namespace
static ArrayRef<Use> GetDeoptBundleOperands(const CallBase *Call) {
Optional<OperandBundleUse> DeoptBundle =
Call->getOperandBundle(LLVMContext::OB_deopt);
- if (!DeoptBundle.hasValue()) {
+ if (!DeoptBundle) {
assert(AllowStatepointWithNoDeoptInfo &&
"Found non-leaf call without deopt info!");
return None;
}
- return DeoptBundle.getValue().Inputs;
+ return DeoptBundle->Inputs;
}
/// Compute the live-in set for every basic block in the function
@@ -385,45 +396,16 @@ static void analyzeParsePointLiveness(
Result.LiveSet = LiveSet;
}
-// Returns true is V is a knownBaseResult.
-static bool isKnownBaseResult(Value *V);
-
-// Returns true if V is a BaseResult that already exists in the IR, i.e. it is
-// not created by the findBasePointers algorithm.
-static bool isOriginalBaseResult(Value *V);
-
-namespace {
-
-/// A single base defining value - An immediate base defining value for an
-/// instruction 'Def' is an input to 'Def' whose base is also a base of 'Def'.
-/// For instructions which have multiple pointer [vector] inputs or that
-/// transition between vector and scalar types, there is no immediate base
-/// defining value. The 'base defining value' for 'Def' is the transitive
-/// closure of this relation stopping at the first instruction which has no
-/// immediate base defining value. The b.d.v. might itself be a base pointer,
-/// but it can also be an arbitrary derived pointer.
-struct BaseDefiningValueResult {
- /// Contains the value which is the base defining value.
- Value * const BDV;
-
- /// True if the base defining value is also known to be an actual base
- /// pointer.
- const bool IsKnownBase;
-
- BaseDefiningValueResult(Value *BDV, bool IsKnownBase)
- : BDV(BDV), IsKnownBase(IsKnownBase) {
-#ifndef NDEBUG
- // Check consistency between new and old means of checking whether a BDV is
- // a base.
- bool MustBeBase = isKnownBaseResult(BDV);
- assert(!MustBeBase || MustBeBase == IsKnownBase);
-#endif
- }
-};
+/// Returns true if V is a known base.
+static bool isKnownBase(Value *V, const IsKnownBaseMapTy &KnownBases);
-} // end anonymous namespace
+/// Caches the IsKnownBase flag for a value and asserts that it wasn't present
+/// in the cache before.
+static void setKnownBase(Value *V, bool IsKnownBase,
+ IsKnownBaseMapTy &KnownBases);
-static BaseDefiningValueResult findBaseDefiningValue(Value *I);
+static Value *findBaseDefiningValue(Value *I, DefiningValueMapTy &Cache,
+ IsKnownBaseMapTy &KnownBases);
/// Return a base defining value for the 'Index' element of the given vector
/// instruction 'I'. If Index is null, returns a BDV for the entire vector
@@ -434,76 +416,122 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I);
/// vector returned is a BDV (and possibly a base) of the entire vector 'I'.
/// If the later, the return pointer is a BDV (or possibly a base) for the
/// particular element in 'I'.
-static BaseDefiningValueResult
-findBaseDefiningValueOfVector(Value *I) {
+static Value *findBaseDefiningValueOfVector(Value *I, DefiningValueMapTy &Cache,
+ IsKnownBaseMapTy &KnownBases) {
// Each case parallels findBaseDefiningValue below, see that code for
// detailed motivation.
- if (isa<Argument>(I))
+ auto Cached = Cache.find(I);
+ if (Cached != Cache.end())
+ return Cached->second;
+
+ if (isa<Argument>(I)) {
// An incoming argument to the function is a base pointer
- return BaseDefiningValueResult(I, true);
+ Cache[I] = I;
+ setKnownBase(I, /* IsKnownBase */true, KnownBases);
+ return I;
+ }
- if (isa<Constant>(I))
+ if (isa<Constant>(I)) {
// Base of constant vector consists only of constant null pointers.
// For reasoning see similar case inside 'findBaseDefiningValue' function.
- return BaseDefiningValueResult(ConstantAggregateZero::get(I->getType()),
- true);
+ auto *CAZ = ConstantAggregateZero::get(I->getType());
+ Cache[I] = CAZ;
+ setKnownBase(CAZ, /* IsKnownBase */true, KnownBases);
+ return CAZ;
+ }
- if (isa<LoadInst>(I))
- return BaseDefiningValueResult(I, true);
+ if (isa<LoadInst>(I)) {
+ Cache[I] = I;
+ setKnownBase(I, /* IsKnownBase */true, KnownBases);
+ return I;
+ }
- if (isa<InsertElementInst>(I))
+ if (isa<InsertElementInst>(I)) {
// We don't know whether this vector contains entirely base pointers or
// not. To be conservatively correct, we treat it as a BDV and will
// duplicate code as needed to construct a parallel vector of bases.
- return BaseDefiningValueResult(I, false);
+ Cache[I] = I;
+ setKnownBase(I, /* IsKnownBase */false, KnownBases);
+ return I;
+ }
- if (isa<ShuffleVectorInst>(I))
+ if (isa<ShuffleVectorInst>(I)) {
// We don't know whether this vector contains entirely base pointers or
// not. To be conservatively correct, we treat it as a BDV and will
// duplicate code as needed to construct a parallel vector of bases.
// TODO: There a number of local optimizations which could be applied here
// for particular sufflevector patterns.
- return BaseDefiningValueResult(I, false);
+ Cache[I] = I;
+ setKnownBase(I, /* IsKnownBase */false, KnownBases);
+ return I;
+ }
// The behavior of getelementptr instructions is the same for vector and
// non-vector data types.
- if (auto *GEP = dyn_cast<GetElementPtrInst>(I))
- return findBaseDefiningValue(GEP->getPointerOperand());
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
+ auto *BDV =
+ findBaseDefiningValue(GEP->getPointerOperand(), Cache, KnownBases);
+ Cache[GEP] = BDV;
+ return BDV;
+ }
+
+ // The behavior of freeze instructions is the same for vector and
+ // non-vector data types.
+ if (auto *Freeze = dyn_cast<FreezeInst>(I)) {
+ auto *BDV = findBaseDefiningValue(Freeze->getOperand(0), Cache, KnownBases);
+ Cache[Freeze] = BDV;
+ return BDV;
+ }
// If the pointer comes through a bitcast of a vector of pointers to
// a vector of another type of pointer, then look through the bitcast
- if (auto *BC = dyn_cast<BitCastInst>(I))
- return findBaseDefiningValue(BC->getOperand(0));
+ if (auto *BC = dyn_cast<BitCastInst>(I)) {
+ auto *BDV = findBaseDefiningValue(BC->getOperand(0), Cache, KnownBases);
+ Cache[BC] = BDV;
+ return BDV;
+ }
// We assume that functions in the source language only return base
// pointers. This should probably be generalized via attributes to support
// both source language and internal functions.
- if (isa<CallInst>(I) || isa<InvokeInst>(I))
- return BaseDefiningValueResult(I, true);
+ if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
+ Cache[I] = I;
+ setKnownBase(I, /* IsKnownBase */true, KnownBases);
+ return I;
+ }
// A PHI or Select is a base defining value. The outer findBasePointer
// algorithm is responsible for constructing a base value for this BDV.
assert((isa<SelectInst>(I) || isa<PHINode>(I)) &&
"unknown vector instruction - no base found for vector element");
- return BaseDefiningValueResult(I, false);
+ Cache[I] = I;
+ setKnownBase(I, /* IsKnownBase */false, KnownBases);
+ return I;
}
/// Helper function for findBasePointer - Will return a value which either a)
/// defines the base pointer for the input, b) blocks the simple search
/// (i.e. a PHI or Select of two derived pointers), or c) involves a change
/// from pointer to vector type or back.
-static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
+static Value *findBaseDefiningValue(Value *I, DefiningValueMapTy &Cache,
+ IsKnownBaseMapTy &KnownBases) {
assert(I->getType()->isPtrOrPtrVectorTy() &&
"Illegal to ask for the base pointer of a non-pointer type");
+ auto Cached = Cache.find(I);
+ if (Cached != Cache.end())
+ return Cached->second;
if (I->getType()->isVectorTy())
- return findBaseDefiningValueOfVector(I);
+ return findBaseDefiningValueOfVector(I, Cache, KnownBases);
- if (isa<Argument>(I))
+ if (isa<Argument>(I)) {
// An incoming argument to the function is a base pointer
// We should have never reached here if this argument isn't an gc value
- return BaseDefiningValueResult(I, true);
+ Cache[I] = I;
+ setKnownBase(I, /* IsKnownBase */true, KnownBases);
+ return I;
+ }
if (isa<Constant>(I)) {
// We assume that objects with a constant base (e.g. a global) can't move
@@ -516,8 +544,10 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
// "phi (const1, const2)" or "phi (const, regular gc ptr)".
// See constant.ll file for relevant test cases.
- return BaseDefiningValueResult(
- ConstantPointerNull::get(cast<PointerType>(I->getType())), true);
+ auto *CPN = ConstantPointerNull::get(cast<PointerType>(I->getType()));
+ Cache[I] = CPN;
+ setKnownBase(CPN, /* IsKnownBase */true, KnownBases);
+ return CPN;
}
// inttoptrs in an integral address space are currently ill-defined. We
@@ -525,8 +555,11 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
// constant rule above and because we don't really have a better semantic
// to give them. Note that the optimizer is always free to insert undefined
// behavior on dynamically dead paths as well.
- if (isa<IntToPtrInst>(I))
- return BaseDefiningValueResult(I, true);
+ if (isa<IntToPtrInst>(I)) {
+ Cache[I] = I;
+ setKnownBase(I, /* IsKnownBase */true, KnownBases);
+ return I;
+ }
if (CastInst *CI = dyn_cast<CastInst>(I)) {
Value *Def = CI->stripPointerCasts();
@@ -539,16 +572,31 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
// not simply a pointer cast (i.e. an inttoptr). We don't know how to
// handle int->ptr conversion.
assert(!isa<CastInst>(Def) && "shouldn't find another cast here");
- return findBaseDefiningValue(Def);
+ auto *BDV = findBaseDefiningValue(Def, Cache, KnownBases);
+ Cache[CI] = BDV;
+ return BDV;
}
- if (isa<LoadInst>(I))
+ if (isa<LoadInst>(I)) {
// The value loaded is an gc base itself
- return BaseDefiningValueResult(I, true);
+ Cache[I] = I;
+ setKnownBase(I, /* IsKnownBase */true, KnownBases);
+ return I;
+ }
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I))
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
// The base of this GEP is the base
- return findBaseDefiningValue(GEP->getPointerOperand());
+ auto *BDV =
+ findBaseDefiningValue(GEP->getPointerOperand(), Cache, KnownBases);
+ Cache[GEP] = BDV;
+ return BDV;
+ }
+
+ if (auto *Freeze = dyn_cast<FreezeInst>(I)) {
+ auto *BDV = findBaseDefiningValue(Freeze->getOperand(0), Cache, KnownBases);
+ Cache[Freeze] = BDV;
+ return BDV;
+ }
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
@@ -569,24 +617,32 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
llvm_unreachable(
"interaction with the gcroot mechanism is not supported");
case Intrinsic::experimental_gc_get_pointer_base:
- return findBaseDefiningValue(II->getOperand(0));
+ auto *BDV = findBaseDefiningValue(II->getOperand(0), Cache, KnownBases);
+ Cache[II] = BDV;
+ return BDV;
}
}
// We assume that functions in the source language only return base
// pointers. This should probably be generalized via attributes to support
// both source language and internal functions.
- if (isa<CallInst>(I) || isa<InvokeInst>(I))
- return BaseDefiningValueResult(I, true);
+ if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
+ Cache[I] = I;
+ setKnownBase(I, /* IsKnownBase */true, KnownBases);
+ return I;
+ }
// TODO: I have absolutely no idea how to implement this part yet. It's not
// necessarily hard, I just haven't really looked at it yet.
assert(!isa<LandingPadInst>(I) && "Landing Pad is unimplemented");
- if (isa<AtomicCmpXchgInst>(I))
+ if (isa<AtomicCmpXchgInst>(I)) {
// A CAS is effectively a atomic store and load combined under a
// predicate. From the perspective of base pointers, we just treat it
// like a load.
- return BaseDefiningValueResult(I, true);
+ Cache[I] = I;
+ setKnownBase(I, /* IsKnownBase */true, KnownBases);
+ return I;
+ }
assert(!isa<AtomicRMWInst>(I) && "Xchg handled above, all others are "
"binary ops which don't apply to pointers");
@@ -594,8 +650,11 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
// The aggregate ops. Aggregates can either be in the heap or on the
// stack, but in either case, this is simply a field load. As a result,
// this is a defining definition of the base just like a load is.
- if (isa<ExtractValueInst>(I))
- return BaseDefiningValueResult(I, true);
+ if (isa<ExtractValueInst>(I)) {
+ Cache[I] = I;
+ setKnownBase(I, /* IsKnownBase */true, KnownBases);
+ return I;
+ }
// We should never see an insert vector since that would require we be
// tracing back a struct value not a pointer value.
@@ -606,6 +665,8 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
// substituting gc.get.pointer.base() intrinsic.
bool IsKnownBase =
isa<Instruction>(I) && cast<Instruction>(I)->getMetadata("is_base_value");
+ setKnownBase(I, /* IsKnownBase */IsKnownBase, KnownBases);
+ Cache[I] = I;
// An extractelement produces a base result exactly when it's input does.
// We may need to insert a parallel instruction to extract the appropriate
@@ -615,33 +676,38 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
// Note: There a lot of obvious peephole cases here. This are deliberately
// handled after the main base pointer inference algorithm to make writing
// test cases to exercise that code easier.
- return BaseDefiningValueResult(I, IsKnownBase);
+ return I;
// The last two cases here don't return a base pointer. Instead, they
// return a value which dynamically selects from among several base
// derived pointers (each with it's own base potentially). It's the job of
// the caller to resolve these.
assert((isa<SelectInst>(I) || isa<PHINode>(I)) &&
- "missing instruction case in findBaseDefiningValing");
- return BaseDefiningValueResult(I, IsKnownBase);
+ "missing instruction case in findBaseDefiningValue");
+ return I;
}
/// Returns the base defining value for this value.
-static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &Cache) {
- Value *&Cached = Cache[I];
- if (!Cached) {
- Cached = findBaseDefiningValue(I).BDV;
+static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &Cache,
+ IsKnownBaseMapTy &KnownBases) {
+ if (Cache.find(I) == Cache.end()) {
+ auto *BDV = findBaseDefiningValue(I, Cache, KnownBases);
+ Cache[I] = BDV;
LLVM_DEBUG(dbgs() << "fBDV-cached: " << I->getName() << " -> "
- << Cached->getName() << "\n");
+ << Cache[I]->getName() << ", is known base = "
+ << KnownBases[I] << "\n");
}
assert(Cache[I] != nullptr);
- return Cached;
+ assert(KnownBases.find(Cache[I]) != KnownBases.end() &&
+ "Cached value must be present in known bases map");
+ return Cache[I];
}
/// Return a base pointer for this value if known. Otherwise, return it's
/// base defining value.
-static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &Cache) {
- Value *Def = findBaseDefiningValueCached(I, Cache);
+static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &Cache,
+ IsKnownBaseMapTy &KnownBases) {
+ Value *Def = findBaseDefiningValueCached(I, Cache, KnownBases);
auto Found = Cache.find(Def);
if (Found != Cache.end()) {
// Either a base-of relation, or a self reference. Caller must check.
@@ -651,6 +717,7 @@ static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &Cache) {
return Def;
}
+#ifndef NDEBUG
/// This value is a base pointer that is not generated by RS4GC, i.e. it already
/// exists in the code.
static bool isOriginalBaseResult(Value *V) {
@@ -659,21 +726,22 @@ static bool isOriginalBaseResult(Value *V) {
!isa<ExtractElementInst>(V) && !isa<InsertElementInst>(V) &&
!isa<ShuffleVectorInst>(V);
}
+#endif
-/// Given the result of a call to findBaseDefiningValue, or findBaseOrBDV,
-/// is it known to be a base pointer? Or do we need to continue searching.
-static bool isKnownBaseResult(Value *V) {
- if (isOriginalBaseResult(V))
- return true;
- if (isa<Instruction>(V) &&
- cast<Instruction>(V)->getMetadata("is_base_value")) {
- // This is a previously inserted base phi or select. We know
- // that this is a base value.
- return true;
- }
+static bool isKnownBase(Value *V, const IsKnownBaseMapTy &KnownBases) {
+ auto It = KnownBases.find(V);
+ assert(It != KnownBases.end() && "Value not present in the map");
+ return It->second;
+}
- // We need to keep searching
- return false;
+static void setKnownBase(Value *V, bool IsKnownBase,
+ IsKnownBaseMapTy &KnownBases) {
+#ifndef NDEBUG
+ auto It = KnownBases.find(V);
+ if (It != KnownBases.end())
+ assert(It->second == IsKnownBase && "Changing already present value");
+#endif
+ KnownBases[V] = IsKnownBase;
}
// Returns true if First and Second values are both scalar or both vector.
@@ -801,10 +869,11 @@ static raw_ostream &operator<<(raw_ostream &OS, const BDVState &State) {
/// For gc objects, this is simply itself. On success, returns a value which is
/// the base pointer. (This is reliable and can be used for relocation.) On
/// failure, returns nullptr.
-static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
- Value *Def = findBaseOrBDV(I, Cache);
+static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache,
+ IsKnownBaseMapTy &KnownBases) {
+ Value *Def = findBaseOrBDV(I, Cache, KnownBases);
- if (isKnownBaseResult(Def) && areBothVectorOrScalar(Def, I))
+ if (isKnownBase(Def, KnownBases) && areBothVectorOrScalar(Def, I))
return Def;
// Here's the rough algorithm:
@@ -887,8 +956,8 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
assert(!isOriginalBaseResult(Current) && "why did it get added?");
auto visitIncomingValue = [&](Value *InVal) {
- Value *Base = findBaseOrBDV(InVal, Cache);
- if (isKnownBaseResult(Base) && areBothVectorOrScalar(Base, InVal))
+ Value *Base = findBaseOrBDV(InVal, Cache, KnownBases);
+ if (isKnownBase(Base, KnownBases) && areBothVectorOrScalar(Base, InVal))
// Known bases won't need new instructions introduced and can be
// ignored safely. However, this can only be done when InVal and Base
// are both scalar or both vector. Otherwise, we need to find a
@@ -924,12 +993,16 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
for (auto Pair : States) {
Value *BDV = Pair.first;
auto canPruneInput = [&](Value *V) {
- Value *BDV = findBaseOrBDV(V, Cache);
- if (V->stripPointerCasts() != BDV)
+ // If the input of the BDV is the BDV itself we can prune it. This is
+ // only possible if the BDV is a PHI node.
+ if (V->stripPointerCasts() == BDV)
+ return true;
+ Value *VBDV = findBaseOrBDV(V, Cache, KnownBases);
+ if (V->stripPointerCasts() != VBDV)
return false;
// The assumption is that anything not in the state list is
// propagates a base pointer.
- return States.count(BDV) == 0;
+ return States.count(VBDV) == 0;
};
bool CanPrune = true;
@@ -975,13 +1048,13 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
// Only values that do not have known bases or those that have differing
// type (scalar versus vector) from a possible known base should be in the
// lattice.
- assert((!isKnownBaseResult(BDV) ||
+ assert((!isKnownBase(BDV, KnownBases) ||
!areBothVectorOrScalar(BDV, Pair.second.getBaseValue())) &&
"why did it get added?");
BDVState NewState(BDV);
visitBDVOperands(BDV, [&](Value *Op) {
- Value *BDV = findBaseOrBDV(Op, Cache);
+ Value *BDV = findBaseOrBDV(Op, Cache, KnownBases);
auto OpState = GetStateForBDV(BDV, Op);
NewState.meet(OpState);
});
@@ -1014,8 +1087,9 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
// Only values that do not have known bases or those that have differing
// type (scalar versus vector) from a possible known base should be in the
// lattice.
- assert((!isKnownBaseResult(I) || !areBothVectorOrScalar(I, BaseValue)) &&
- "why did it get added?");
+ assert(
+ (!isKnownBase(I, KnownBases) || !areBothVectorOrScalar(I, BaseValue)) &&
+ "why did it get added?");
assert(!State.isUnknown() && "Optimistic algorithm didn't complete!");
if (!State.isBase() || !isa<VectorType>(BaseValue->getType()))
@@ -1033,6 +1107,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
State.getBaseValue(), EE->getIndexOperand(), "base_ee", EE);
BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {}));
States[I] = BDVState(I, BDVState::Base, BaseInst);
+ setKnownBase(BaseInst, /* IsKnownBase */true, KnownBases);
} else if (!isa<VectorType>(I->getType())) {
// We need to handle cases that have a vector base but the instruction is
// a scalar type (these could be phis or selects or any instruction that
@@ -1055,7 +1130,8 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
// Only values that do not have known bases or those that have differing
// type (scalar versus vector) from a possible known base should be in the
// lattice.
- assert((!isKnownBaseResult(I) || !areBothVectorOrScalar(I, State.getBaseValue())) &&
+ assert((!isKnownBase(I, KnownBases) ||
+ !areBothVectorOrScalar(I, State.getBaseValue())) &&
"why did it get added?");
assert(!State.isUnknown() && "Optimistic algorithm didn't complete!");
@@ -1087,6 +1163,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
// Add metadata marking this as a base value
BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {}));
States[I] = BDVState(I, BDVState::Conflict, BaseInst);
+ setKnownBase(BaseInst, /* IsKnownBase */true, KnownBases);
}
#ifndef NDEBUG
@@ -1102,7 +1179,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
// assured to be able to determine an instruction which produces it's base
// pointer.
auto getBaseForInput = [&](Value *Input, Instruction *InsertPt) {
- Value *BDV = findBaseOrBDV(Input, Cache);
+ Value *BDV = findBaseOrBDV(Input, Cache, KnownBases);
Value *Base = nullptr;
if (!States.count(BDV)) {
assert(areBothVectorOrScalar(BDV, Input));
@@ -1129,7 +1206,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
// Only values that do not have known bases or those that have differing
// type (scalar versus vector) from a possible known base should be in the
// lattice.
- assert((!isKnownBaseResult(BDV) ||
+ assert((!isKnownBase(BDV, KnownBases) ||
!areBothVectorOrScalar(BDV, State.getBaseValue())) &&
"why did it get added?");
assert(!State.isUnknown() && "Optimistic algorithm didn't complete!");
@@ -1154,13 +1231,21 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
#ifndef NDEBUG
Value *OldBase = BlockToValue[InBB];
Value *Base = getBaseForInput(InVal, nullptr);
+
+ // We can't use `stripPointerCasts` instead of this function because
+ // `stripPointerCasts` doesn't handle vectors of pointers.
+ auto StripBitCasts = [](Value *V) -> Value * {
+ while (auto *BC = dyn_cast<BitCastInst>(V))
+ V = BC->getOperand(0);
+ return V;
+ };
// In essence this assert states: the only way two values
// incoming from the same basic block may be different is by
// being different bitcasts of the same value. A cleanup
// that remains TODO is changing findBaseOrBDV to return an
// llvm::Value of the correct type (and still remain pure).
// This will remove the need to add bitcasts.
- assert(Base->stripPointerCasts() == OldBase->stripPointerCasts() &&
+ assert(StripBitCasts(Base) == StripBitCasts(OldBase) &&
"findBaseOrBDV should be pure!");
#endif
}
@@ -1223,8 +1308,9 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
// Only values that do not have known bases or those that have differing
// type (scalar versus vector) from a possible known base should be in the
// lattice.
- assert((!isKnownBaseResult(BDV) || !areBothVectorOrScalar(BDV, Base)) &&
- "why did it get added?");
+ assert(
+ (!isKnownBase(BDV, KnownBases) || !areBothVectorOrScalar(BDV, Base)) &&
+ "why did it get added?");
LLVM_DEBUG(
dbgs() << "Updating base value cache"
@@ -1255,9 +1341,10 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
// pointer was a base pointer.
static void findBasePointers(const StatepointLiveSetTy &live,
PointerToBaseTy &PointerToBase, DominatorTree *DT,
- DefiningValueMapTy &DVCache) {
+ DefiningValueMapTy &DVCache,
+ IsKnownBaseMapTy &KnownBases) {
for (Value *ptr : live) {
- Value *base = findBasePointer(ptr, DVCache);
+ Value *base = findBasePointer(ptr, DVCache, KnownBases);
assert(base && "failed to find base pointer");
PointerToBase[ptr] = base;
assert((!isa<Instruction>(base) || !isa<Instruction>(ptr) ||
@@ -1272,7 +1359,8 @@ static void findBasePointers(const StatepointLiveSetTy &live,
static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
CallBase *Call,
PartiallyConstructedSafepointRecord &result,
- PointerToBaseTy &PointerToBase) {
+ PointerToBaseTy &PointerToBase,
+ IsKnownBaseMapTy &KnownBases) {
StatepointLiveSetTy PotentiallyDerivedPointers = result.LiveSet;
// We assume that all pointers passed to deopt are base pointers; as an
// optimization, we can use this to avoid seperately materializing the base
@@ -1286,7 +1374,8 @@ static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
PotentiallyDerivedPointers.remove(V);
PointerToBase[V] = V;
}
- findBasePointers(PotentiallyDerivedPointers, PointerToBase, &DT, DVCache);
+ findBasePointers(PotentiallyDerivedPointers, PointerToBase, &DT, DVCache,
+ KnownBases);
}
/// Given an updated version of the dataflow liveness results, update the
@@ -1349,23 +1438,23 @@ static constexpr Attribute::AttrKind FnAttrsToStrip[] =
// Create new attribute set containing only attributes which can be transferred
// from original call to the safepoint.
static AttributeList legalizeCallAttributes(LLVMContext &Ctx,
- AttributeList AL) {
- if (AL.isEmpty())
- return AL;
+ AttributeList OrigAL,
+ AttributeList StatepointAL) {
+ if (OrigAL.isEmpty())
+ return StatepointAL;
// Remove the readonly, readnone, and statepoint function attributes.
- AttrBuilder FnAttrs(Ctx, AL.getFnAttrs());
+ AttrBuilder FnAttrs(Ctx, OrigAL.getFnAttrs());
for (auto Attr : FnAttrsToStrip)
FnAttrs.removeAttribute(Attr);
- for (Attribute A : AL.getFnAttrs()) {
+ for (Attribute A : OrigAL.getFnAttrs()) {
if (isStatepointDirectiveAttr(A))
FnAttrs.removeAttribute(A);
}
// Just skip parameter and return attributes for now
- return AttributeList::get(Ctx, AttributeList::FunctionIndex,
- AttributeSet::get(Ctx, FnAttrs));
+ return StatepointAL.addFnAttributes(Ctx, FnAttrs);
}
/// Helper function to place all gc relocates necessary for the given
@@ -1570,8 +1659,8 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
assert(DeoptLowering.equals("live-through") && "Unsupported value!");
}
- Value *CallTarget = Call->getCalledOperand();
- if (Function *F = dyn_cast<Function>(CallTarget)) {
+ FunctionCallee CallTarget(Call->getFunctionType(), Call->getCalledOperand());
+ if (Function *F = dyn_cast<Function>(CallTarget.getCallee())) {
auto IID = F->getIntrinsicID();
if (IID == Intrinsic::experimental_deoptimize) {
// Calls to llvm.experimental.deoptimize are lowered to calls to the
@@ -1589,8 +1678,7 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
// the same module. This is fine -- we assume the frontend knew what it
// was doing when generating this kind of IR.
CallTarget = F->getParent()
- ->getOrInsertFunction("__llvm_deoptimize", FTy)
- .getCallee();
+ ->getOrInsertFunction("__llvm_deoptimize", FTy);
IsDeoptimize = true;
} else if (IID == Intrinsic::memcpy_element_unordered_atomic ||
@@ -1686,8 +1774,7 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
CallTarget =
F->getParent()
- ->getOrInsertFunction(GetFunctionName(IID, ElementSizeCI), FTy)
- .getCallee();
+ ->getOrInsertFunction(GetFunctionName(IID, ElementSizeCI), FTy);
}
}
@@ -1705,8 +1792,8 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
// function attributes. In case if we can handle this set of attributes -
// set up function attrs directly on statepoint and return attrs later for
// gc_result intrinsic.
- SPCall->setAttributes(
- legalizeCallAttributes(CI->getContext(), CI->getAttributes()));
+ SPCall->setAttributes(legalizeCallAttributes(
+ CI->getContext(), CI->getAttributes(), SPCall->getAttributes()));
Token = cast<GCStatepointInst>(SPCall);
@@ -1732,8 +1819,8 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
// function attributes. In case if we can handle this set of attributes -
// set up function attrs directly on statepoint and return attrs later for
// gc_result intrinsic.
- SPInvoke->setAttributes(
- legalizeCallAttributes(II->getContext(), II->getAttributes()));
+ SPInvoke->setAttributes(legalizeCallAttributes(
+ II->getContext(), II->getAttributes(), SPInvoke->getAttributes()));
Token = cast<GCStatepointInst>(SPInvoke);
@@ -2071,6 +2158,7 @@ static void relocationViaAlloca(
assert(PromotableAllocas.size() == Live.size() + NumRematerializedValues &&
"we must have the same allocas with lives");
+ (void) NumRematerializedValues;
if (!PromotableAllocas.empty()) {
// Apply mem2reg to promote alloca to SSA
PromoteMemToReg(PromotableAllocas, DT);
@@ -2221,27 +2309,25 @@ static bool AreEquivalentPhiNodes(PHINode &OrigRootPhi, PHINode &AlternateRootPh
return true;
}
-// From the statepoint live set pick values that are cheaper to recompute then
-// to relocate. Remove this values from the live set, rematerialize them after
-// statepoint and record them in "Info" structure. Note that similar to
-// relocated values we don't do any user adjustments here.
-static void rematerializeLiveValues(CallBase *Call,
- PartiallyConstructedSafepointRecord &Info,
- PointerToBaseTy &PointerToBase,
- TargetTransformInfo &TTI) {
+// Find derived pointers that can be recomputed cheap enough and fill
+// RematerizationCandidates with such candidates.
+static void
+findRematerializationCandidates(PointerToBaseTy PointerToBase,
+ RematCandTy &RematerizationCandidates,
+ TargetTransformInfo &TTI) {
const unsigned int ChainLengthThreshold = 10;
- // Record values we are going to delete from this statepoint live set.
- // We can not di this in following loop due to iterator invalidation.
- SmallVector<Value *, 32> LiveValuesToBeDeleted;
+ for (auto P2B : PointerToBase) {
+ auto *Derived = P2B.first;
+ auto *Base = P2B.second;
+ // Consider only derived pointers.
+ if (Derived == Base)
+ continue;
- for (Value *LiveValue: Info.LiveSet) {
- // For each live pointer find its defining chain
+ // For each live pointer find its defining chain.
SmallVector<Instruction *, 3> ChainToBase;
- assert(PointerToBase.count(LiveValue));
Value *RootOfChain =
- findRematerializableChainToBasePointer(ChainToBase,
- LiveValue);
+ findRematerializableChainToBasePointer(ChainToBase, Derived);
// Nothing to do, or chain is too long
if ( ChainToBase.size() == 0 ||
@@ -2250,9 +2336,9 @@ static void rematerializeLiveValues(CallBase *Call,
// Handle the scenario where the RootOfChain is not equal to the
// Base Value, but they are essentially the same phi values.
- if (RootOfChain != PointerToBase[LiveValue]) {
+ if (RootOfChain != PointerToBase[Derived]) {
PHINode *OrigRootPhi = dyn_cast<PHINode>(RootOfChain);
- PHINode *AlternateRootPhi = dyn_cast<PHINode>(PointerToBase[LiveValue]);
+ PHINode *AlternateRootPhi = dyn_cast<PHINode>(PointerToBase[Derived]);
if (!OrigRootPhi || !AlternateRootPhi)
continue;
// PHI nodes that have the same incoming values, and belonging to the same
@@ -2266,33 +2352,61 @@ static void rematerializeLiveValues(CallBase *Call,
// deficiency in the findBasePointer algorithm.
if (!AreEquivalentPhiNodes(*OrigRootPhi, *AlternateRootPhi))
continue;
- // Now that the phi nodes are proved to be the same, assert that
- // findBasePointer's newly generated AlternateRootPhi is present in the
- // liveset of the call.
- assert(Info.LiveSet.count(AlternateRootPhi));
}
- // Compute cost of this chain
+ // Compute cost of this chain.
InstructionCost Cost = chainToBasePointerCost(ChainToBase, TTI);
// TODO: We can also account for cases when we will be able to remove some
// of the rematerialized values by later optimization passes. I.e if
// we rematerialized several intersecting chains. Or if original values
// don't have any uses besides this statepoint.
+ // Ok, there is a candidate.
+ RematerizlizationCandidateRecord Record;
+ Record.ChainToBase = ChainToBase;
+ Record.RootOfChain = RootOfChain;
+ Record.Cost = Cost;
+ RematerizationCandidates.insert({ Derived, Record });
+ }
+}
+
+// From the statepoint live set pick values that are cheaper to recompute then
+// to relocate. Remove this values from the live set, rematerialize them after
+// statepoint and record them in "Info" structure. Note that similar to
+// relocated values we don't do any user adjustments here.
+static void rematerializeLiveValues(CallBase *Call,
+ PartiallyConstructedSafepointRecord &Info,
+ PointerToBaseTy &PointerToBase,
+ RematCandTy &RematerizationCandidates,
+ TargetTransformInfo &TTI) {
+ // Record values we are going to delete from this statepoint live set.
+ // We can not di this in following loop due to iterator invalidation.
+ SmallVector<Value *, 32> LiveValuesToBeDeleted;
+
+ for (Value *LiveValue : Info.LiveSet) {
+ auto It = RematerizationCandidates.find(LiveValue);
+ if (It == RematerizationCandidates.end())
+ continue;
+
+ RematerizlizationCandidateRecord &Record = It->second;
+
+ InstructionCost Cost = Record.Cost;
// For invokes we need to rematerialize each chain twice - for normal and
// for unwind basic blocks. Model this by multiplying cost by two.
- if (isa<InvokeInst>(Call)) {
+ if (isa<InvokeInst>(Call))
Cost *= 2;
- }
- // If it's too expensive - skip it
+
+ // If it's too expensive - skip it.
if (Cost >= RematerializationThreshold)
continue;
// Remove value from the live set
LiveValuesToBeDeleted.push_back(LiveValue);
- // Clone instructions and record them inside "Info" structure
+ // Clone instructions and record them inside "Info" structure.
- // Walk backwards to visit top-most instructions first
+ // For each live pointer find get its defining chain.
+ SmallVector<Instruction *, 3> ChainToBase = Record.ChainToBase;
+ // Walk backwards to visit top-most instructions first.
std::reverse(ChainToBase.begin(), ChainToBase.end());
// Utility function which clones all instructions from "ChainToBase"
@@ -2352,7 +2466,7 @@ static void rematerializeLiveValues(CallBase *Call,
Instruction *InsertBefore = Call->getNextNode();
assert(InsertBefore);
Instruction *RematerializedValue = rematerializeChain(
- InsertBefore, RootOfChain, PointerToBase[LiveValue]);
+ InsertBefore, Record.RootOfChain, PointerToBase[LiveValue]);
Info.RematerializedValues[RematerializedValue] = LiveValue;
} else {
auto *Invoke = cast<InvokeInst>(Call);
@@ -2363,9 +2477,9 @@ static void rematerializeLiveValues(CallBase *Call,
&*Invoke->getUnwindDest()->getFirstInsertionPt();
Instruction *NormalRematerializedValue = rematerializeChain(
- NormalInsertBefore, RootOfChain, PointerToBase[LiveValue]);
+ NormalInsertBefore, Record.RootOfChain, PointerToBase[LiveValue]);
Instruction *UnwindRematerializedValue = rematerializeChain(
- UnwindInsertBefore, RootOfChain, PointerToBase[LiveValue]);
+ UnwindInsertBefore, Record.RootOfChain, PointerToBase[LiveValue]);
Info.RematerializedValues[NormalRematerializedValue] = LiveValue;
Info.RematerializedValues[UnwindRematerializedValue] = LiveValue;
@@ -2380,7 +2494,8 @@ static void rematerializeLiveValues(CallBase *Call,
static bool inlineGetBaseAndOffset(Function &F,
SmallVectorImpl<CallInst *> &Intrinsics,
- DefiningValueMapTy &DVCache) {
+ DefiningValueMapTy &DVCache,
+ IsKnownBaseMapTy &KnownBases) {
auto &Context = F.getContext();
auto &DL = F.getParent()->getDataLayout();
bool Changed = false;
@@ -2389,7 +2504,8 @@ static bool inlineGetBaseAndOffset(Function &F,
switch (Callsite->getIntrinsicID()) {
case Intrinsic::experimental_gc_get_pointer_base: {
Changed = true;
- Value *Base = findBasePointer(Callsite->getOperand(0), DVCache);
+ Value *Base =
+ findBasePointer(Callsite->getOperand(0), DVCache, KnownBases);
assert(!DVCache.count(Callsite));
auto *BaseBC = IRBuilder<>(Callsite).CreateBitCast(
Base, Callsite->getType(), suffixed_name_or(Base, ".cast", ""));
@@ -2404,7 +2520,7 @@ static bool inlineGetBaseAndOffset(Function &F,
case Intrinsic::experimental_gc_get_pointer_offset: {
Changed = true;
Value *Derived = Callsite->getOperand(0);
- Value *Base = findBasePointer(Derived, DVCache);
+ Value *Base = findBasePointer(Derived, DVCache, KnownBases);
assert(!DVCache.count(Callsite));
unsigned AddressSpace = Derived->getType()->getPointerAddressSpace();
unsigned IntPtrSize = DL.getPointerSizeInBits(AddressSpace);
@@ -2431,7 +2547,8 @@ static bool inlineGetBaseAndOffset(Function &F,
static bool insertParsePoints(Function &F, DominatorTree &DT,
TargetTransformInfo &TTI,
SmallVectorImpl<CallBase *> &ToUpdate,
- DefiningValueMapTy &DVCache) {
+ DefiningValueMapTy &DVCache,
+ IsKnownBaseMapTy &KnownBases) {
#ifndef NDEBUG
// Validate the input
std::set<CallBase *> Uniqued;
@@ -2487,7 +2604,7 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
// B) Find the base pointers for each live pointer
for (size_t i = 0; i < Records.size(); i++) {
PartiallyConstructedSafepointRecord &info = Records[i];
- findBasePointers(DT, DVCache, ToUpdate[i], info, PointerToBase);
+ findBasePointers(DT, DVCache, ToUpdate[i], info, PointerToBase, KnownBases);
}
if (PrintBasePointers) {
errs() << "Base Pairs (w/o Relocation):\n";
@@ -2563,11 +2680,16 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
Holders.clear();
+ // Compute the cost of possible re-materialization of derived pointers.
+ RematCandTy RematerizationCandidates;
+ findRematerializationCandidates(PointerToBase, RematerizationCandidates, TTI);
+
// In order to reduce live set of statepoint we might choose to rematerialize
// some values instead of relocating them. This is purely an optimization and
// does not influence correctness.
for (size_t i = 0; i < Records.size(); i++)
- rematerializeLiveValues(ToUpdate[i], Records[i], PointerToBase, TTI);
+ rematerializeLiveValues(ToUpdate[i], Records[i], PointerToBase,
+ RematerizationCandidates, TTI);
// We need this to safely RAUW and delete call or invoke return values that
// may themselves be live over a statepoint. For details, please see usage in
@@ -2930,13 +3052,18 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT,
// inlineGetBaseAndOffset() and insertParsePoints().
DefiningValueMapTy DVCache;
+ // Mapping between a base values and a flag indicating whether it's a known
+ // base or not.
+ IsKnownBaseMapTy KnownBases;
+
if (!Intrinsics.empty())
// Inline @gc.get.pointer.base() and @gc.get.pointer.offset() before finding
// live references.
- MadeChange |= inlineGetBaseAndOffset(F, Intrinsics, DVCache);
+ MadeChange |= inlineGetBaseAndOffset(F, Intrinsics, DVCache, KnownBases);
if (!ParsePointNeeded.empty())
- MadeChange |= insertParsePoints(F, DT, TTI, ParsePointNeeded, DVCache);
+ MadeChange |=
+ insertParsePoints(F, DT, TTI, ParsePointNeeded, DVCache, KnownBases);
return MadeChange;
}
diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp
index c34da51e6dc1..2282ef636076 100644
--- a/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -17,20 +17,15 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/SCCP.h"
-#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueLattice.h"
#include "llvm/Analysis/ValueLatticeUtils.h"
@@ -38,14 +33,13 @@
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
@@ -59,7 +53,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/PredicateInfo.h"
+#include "llvm/Transforms/Utils/SCCPSolver.h"
#include <cassert>
#include <utility>
#include <vector>
@@ -97,6 +91,18 @@ static bool isOverdefined(const ValueLatticeElement &LV) {
return !LV.isUnknownOrUndef() && !isConstant(LV);
}
+static bool canRemoveInstruction(Instruction *I) {
+ if (wouldInstructionBeTriviallyDead(I))
+ return true;
+
+ // Some instructions can be handled but are rejected above. Catch
+ // those cases by falling through to here.
+ // TODO: Mark globals as being constant earlier, so
+ // TODO: wouldInstructionBeTriviallyDead() knows that atomic loads
+ // TODO: are safe to remove.
+ return isa<LoadInst>(I);
+}
+
static bool tryToReplaceWithConstant(SCCPSolver &Solver, Value *V) {
Constant *Const = nullptr;
if (V->getType()->isStructTy()) {
@@ -127,7 +133,8 @@ static bool tryToReplaceWithConstant(SCCPSolver &Solver, Value *V) {
// Calls with "clang.arc.attachedcall" implicitly use the return value and
// those uses cannot be updated with a constant.
CallBase *CB = dyn_cast<CallBase>(V);
- if (CB && ((CB->isMustTailCall() && !CB->isSafeToRemove()) ||
+ if (CB && ((CB->isMustTailCall() &&
+ !canRemoveInstruction(CB)) ||
CB->getOperandBundle(LLVMContext::OB_clang_arc_attachedcall))) {
Function *F = CB->getCalledFunction();
@@ -156,7 +163,7 @@ static bool simplifyInstsInBlock(SCCPSolver &Solver, BasicBlock &BB,
if (Inst.getType()->isVoidTy())
continue;
if (tryToReplaceWithConstant(Solver, &Inst)) {
- if (Inst.isSafeToRemove())
+ if (canRemoveInstruction(&Inst))
Inst.eraseFromParent();
MadeChanges = true;
@@ -170,6 +177,7 @@ static bool simplifyInstsInBlock(SCCPSolver &Solver, BasicBlock &BB,
continue;
if (IV.getConstantRange().isAllNonNegative()) {
auto *ZExt = new ZExtInst(ExtOp, Inst.getType(), "", &Inst);
+ ZExt->takeName(&Inst);
InsertedValues.insert(ZExt);
Inst.replaceAllUsesWith(ZExt);
Solver.removeLatticeValueFor(&Inst);
@@ -182,10 +190,14 @@ static bool simplifyInstsInBlock(SCCPSolver &Solver, BasicBlock &BB,
return MadeChanges;
}
+static bool removeNonFeasibleEdges(const SCCPSolver &Solver, BasicBlock *BB,
+ DomTreeUpdater &DTU,
+ BasicBlock *&NewUnreachableBB);
+
// runSCCP() - Run the Sparse Conditional Constant Propagation algorithm,
// and return true if the function was modified.
static bool runSCCP(Function &F, const DataLayout &DL,
- const TargetLibraryInfo *TLI) {
+ const TargetLibraryInfo *TLI, DomTreeUpdater &DTU) {
LLVM_DEBUG(dbgs() << "SCCP on function '" << F.getName() << "'\n");
SCCPSolver Solver(
DL, [TLI](Function &F) -> const TargetLibraryInfo & { return *TLI; },
@@ -213,13 +225,12 @@ static bool runSCCP(Function &F, const DataLayout &DL,
// as we cannot modify the CFG of the function.
SmallPtrSet<Value *, 32> InsertedValues;
+ SmallVector<BasicBlock *, 8> BlocksToErase;
for (BasicBlock &BB : F) {
if (!Solver.isBlockExecutable(&BB)) {
LLVM_DEBUG(dbgs() << " BasicBlock Dead:" << BB);
-
++NumDeadBlocks;
- NumInstRemoved += removeAllNonTerminatorAndEHPadInstructions(&BB).first;
-
+ BlocksToErase.push_back(&BB);
MadeChanges = true;
continue;
}
@@ -228,17 +239,32 @@ static bool runSCCP(Function &F, const DataLayout &DL,
NumInstRemoved, NumInstReplaced);
}
+ // Remove unreachable blocks and non-feasible edges.
+ for (BasicBlock *DeadBB : BlocksToErase)
+ NumInstRemoved += changeToUnreachable(DeadBB->getFirstNonPHI(),
+ /*PreserveLCSSA=*/false, &DTU);
+
+ BasicBlock *NewUnreachableBB = nullptr;
+ for (BasicBlock &BB : F)
+ MadeChanges |= removeNonFeasibleEdges(Solver, &BB, DTU, NewUnreachableBB);
+
+ for (BasicBlock *DeadBB : BlocksToErase)
+ if (!DeadBB->hasAddressTaken())
+ DTU.deleteBB(DeadBB);
+
return MadeChanges;
}
PreservedAnalyses SCCPPass::run(Function &F, FunctionAnalysisManager &AM) {
const DataLayout &DL = F.getParent()->getDataLayout();
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
- if (!runSCCP(F, DL, &TLI))
+ auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
+ if (!runSCCP(F, DL, &TLI, DTU))
return PreservedAnalyses::all();
auto PA = PreservedAnalyses();
- PA.preserveSet<CFGAnalyses>();
+ PA.preserve<DominatorTreeAnalysis>();
return PA;
}
@@ -261,7 +287,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
- AU.setPreservesCFG();
+ AU.addPreserved<DominatorTreeWrapperPass>();
}
// runOnFunction - Run the Sparse Conditional Constant Propagation
@@ -272,7 +298,10 @@ public:
const DataLayout &DL = F.getParent()->getDataLayout();
const TargetLibraryInfo *TLI =
&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- return runSCCP(F, DL, TLI);
+ auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ DomTreeUpdater DTU(DTWP ? &DTWP->getDomTree() : nullptr,
+ DomTreeUpdater::UpdateStrategy::Lazy);
+ return runSCCP(F, DL, TLI, DTU);
}
};
@@ -342,7 +371,8 @@ static void findReturnsToZap(Function &F,
}
static bool removeNonFeasibleEdges(const SCCPSolver &Solver, BasicBlock *BB,
- DomTreeUpdater &DTU) {
+ DomTreeUpdater &DTU,
+ BasicBlock *&NewUnreachableBB) {
SmallPtrSet<BasicBlock *, 8> FeasibleSuccessors;
bool HasNonFeasibleEdges = false;
for (BasicBlock *Succ : successors(BB)) {
@@ -362,7 +392,19 @@ static bool removeNonFeasibleEdges(const SCCPSolver &Solver, BasicBlock *BB,
isa<IndirectBrInst>(TI)) &&
"Terminator must be a br, switch or indirectbr");
- if (FeasibleSuccessors.size() == 1) {
+ if (FeasibleSuccessors.size() == 0) {
+ // Branch on undef/poison, replace with unreachable.
+ SmallPtrSet<BasicBlock *, 8> SeenSuccs;
+ SmallVector<DominatorTree::UpdateType, 8> Updates;
+ for (BasicBlock *Succ : successors(BB)) {
+ Succ->removePredecessor(BB);
+ if (SeenSuccs.insert(Succ).second)
+ Updates.push_back({DominatorTree::Delete, BB, Succ});
+ }
+ TI->eraseFromParent();
+ new UnreachableInst(BB->getContext(), BB);
+ DTU.applyUpdatesPermissive(Updates);
+ } else if (FeasibleSuccessors.size() == 1) {
// Replace with an unconditional branch to the only feasible successor.
BasicBlock *OnlyFeasibleSuccessor = *FeasibleSuccessors.begin();
SmallVector<DominatorTree::UpdateType, 8> Updates;
@@ -385,6 +427,23 @@ static bool removeNonFeasibleEdges(const SCCPSolver &Solver, BasicBlock *BB,
} else if (FeasibleSuccessors.size() > 1) {
SwitchInstProfUpdateWrapper SI(*cast<SwitchInst>(TI));
SmallVector<DominatorTree::UpdateType, 8> Updates;
+
+ // If the default destination is unfeasible it will never be taken. Replace
+ // it with a new block with a single Unreachable instruction.
+ BasicBlock *DefaultDest = SI->getDefaultDest();
+ if (!FeasibleSuccessors.contains(DefaultDest)) {
+ if (!NewUnreachableBB) {
+ NewUnreachableBB =
+ BasicBlock::Create(DefaultDest->getContext(), "default.unreachable",
+ DefaultDest->getParent(), DefaultDest);
+ new UnreachableInst(DefaultDest->getContext(), NewUnreachableBB);
+ }
+
+ SI->setDefaultDest(NewUnreachableBB);
+ Updates.push_back({DominatorTree::Delete, BB, DefaultDest});
+ Updates.push_back({DominatorTree::Insert, BB, NewUnreachableBB});
+ }
+
for (auto CI = SI->case_begin(); CI != SI->case_end();) {
if (FeasibleSuccessors.contains(CI->getCaseSuccessor())) {
++CI;
@@ -532,11 +591,13 @@ bool llvm::runIPSCCP(
NumInstRemoved += changeToUnreachable(F.front().getFirstNonPHI(),
/*PreserveLCSSA=*/false, &DTU);
+ BasicBlock *NewUnreachableBB = nullptr;
for (BasicBlock &BB : F)
- MadeChanges |= removeNonFeasibleEdges(Solver, &BB, DTU);
+ MadeChanges |= removeNonFeasibleEdges(Solver, &BB, DTU, NewUnreachableBB);
for (BasicBlock *DeadBB : BlocksToErase)
- DTU.deleteBB(DeadBB);
+ if (!DeadBB->hasAddressTaken())
+ DTU.deleteBB(DeadBB);
for (BasicBlock &BB : F) {
for (Instruction &Inst : llvm::make_early_inc_range(BB)) {
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 8be8946702be..143a035749c7 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -57,11 +57,9 @@
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
-#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
@@ -78,14 +76,12 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include <algorithm>
#include <cassert>
-#include <chrono>
#include <cstddef>
#include <cstdint>
#include <cstring>
@@ -1016,7 +1012,7 @@ private:
I.getParent()->getFirstInsertionPt() == I.getParent()->end())
return PI.setAborted(&I);
- // TODO: We could use SimplifyInstruction here to fold PHINodes and
+ // TODO: We could use simplifyInstruction here to fold PHINodes and
// SelectInsts. However, doing so requires to change the current
// dead-operand-tracking mechanism. For instance, suppose neither loading
// from %U nor %other traps. Then "load (select undef, %U, %other)" does not
@@ -1987,13 +1983,22 @@ static bool isIntegerWideningViableForSlice(const Slice &S,
uint64_t RelBegin = S.beginOffset() - AllocBeginOffset;
uint64_t RelEnd = S.endOffset() - AllocBeginOffset;
+ Use *U = S.getUse();
+
+ // Lifetime intrinsics operate over the whole alloca whose sizes are usually
+ // larger than other load/store slices (RelEnd > Size). But lifetime are
+ // always promotable and should not impact other slices' promotability of the
+ // partition.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
+ if (II->isLifetimeStartOrEnd() || II->isDroppable())
+ return true;
+ }
+
// We can't reasonably handle cases where the load or store extends past
// the end of the alloca's type and into its padding.
if (RelEnd > Size)
return false;
- Use *U = S.getUse();
-
if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
if (LI->isVolatile())
return false;
@@ -2048,9 +2053,6 @@ static bool isIntegerWideningViableForSlice(const Slice &S,
return false;
if (!S.isSplittable())
return false; // Skip any unsplittable intrinsics.
- } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
- if (!II->isLifetimeStartOrEnd() && !II->isDroppable())
- return false;
} else {
return false;
}
@@ -2179,10 +2181,7 @@ static Value *extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex,
return V;
}
- SmallVector<int, 8> Mask;
- Mask.reserve(NumElements);
- for (unsigned i = BeginIndex; i != EndIndex; ++i)
- Mask.push_back(i);
+ auto Mask = llvm::to_vector<8>(llvm::seq<int>(BeginIndex, EndIndex));
V = IRB.CreateShuffleVector(V, Mask, Name + ".extract");
LLVM_DEBUG(dbgs() << " shuffle: " << *V << "\n");
return V;
@@ -2734,10 +2733,9 @@ private:
Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size * 8);
V = IRB.CreateMul(
IRB.CreateZExt(V, SplatIntTy, "zext"),
- ConstantExpr::getUDiv(
- Constant::getAllOnesValue(SplatIntTy),
- ConstantExpr::getZExt(Constant::getAllOnesValue(V->getType()),
- SplatIntTy)),
+ IRB.CreateUDiv(Constant::getAllOnesValue(SplatIntTy),
+ IRB.CreateZExt(Constant::getAllOnesValue(V->getType()),
+ SplatIntTy)),
"isplat");
return V;
}
@@ -2887,7 +2885,7 @@ private:
assert((IsDest && II.getRawDest() == OldPtr) ||
(!IsDest && II.getRawSource() == OldPtr));
- MaybeAlign SliceAlign = getSliceAlign();
+ Align SliceAlign = getSliceAlign();
// For unsplit intrinsics, we simply modify the source and destination
// pointers in place. This isn't just an optimization, it is a matter of
@@ -3481,19 +3479,13 @@ private:
Type *Ty = GEPI.getSourceElementType();
Value *True = Sel->getTrueValue();
- Value *NTrue =
- IsInBounds
- ? IRB.CreateInBoundsGEP(Ty, True, Index,
- True->getName() + ".sroa.gep")
- : IRB.CreateGEP(Ty, True, Index, True->getName() + ".sroa.gep");
+ Value *NTrue = IRB.CreateGEP(Ty, True, Index, True->getName() + ".sroa.gep",
+ IsInBounds);
Value *False = Sel->getFalseValue();
- Value *NFalse =
- IsInBounds
- ? IRB.CreateInBoundsGEP(Ty, False, Index,
- False->getName() + ".sroa.gep")
- : IRB.CreateGEP(Ty, False, Index, False->getName() + ".sroa.gep");
+ Value *NFalse = IRB.CreateGEP(Ty, False, Index,
+ False->getName() + ".sroa.gep", IsInBounds);
Value *NSel = IRB.CreateSelect(Sel->getCondition(), NTrue, NFalse,
Sel->getName() + ".sroa.sel");
@@ -3547,10 +3539,8 @@ private:
IRB.SetInsertPoint(In->getParent(), std::next(In->getIterator()));
Type *Ty = GEPI.getSourceElementType();
- NewVal = IsInBounds ? IRB.CreateInBoundsGEP(Ty, In, Index,
- In->getName() + ".sroa.gep")
- : IRB.CreateGEP(Ty, In, Index,
- In->getName() + ".sroa.gep");
+ NewVal = IRB.CreateGEP(Ty, In, Index, In->getName() + ".sroa.gep",
+ IsInBounds);
}
NewPN->addIncoming(NewVal, B);
}
@@ -3972,16 +3962,15 @@ bool SROAPass::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
for (LoadInst *LI : Loads) {
SplitLoads.clear();
- IntegerType *Ty = cast<IntegerType>(LI->getType());
- assert(Ty->getBitWidth() % 8 == 0);
- uint64_t LoadSize = Ty->getBitWidth() / 8;
- assert(LoadSize > 0 && "Cannot have a zero-sized integer load!");
-
auto &Offsets = SplitOffsetsMap[LI];
- assert(LoadSize == Offsets.S->endOffset() - Offsets.S->beginOffset() &&
- "Slice size should always match load size exactly!");
+ unsigned SliceSize = Offsets.S->endOffset() - Offsets.S->beginOffset();
+ assert(LI->getType()->getIntegerBitWidth() % 8 == 0 &&
+ "Load must have type size equal to store size");
+ assert(LI->getType()->getIntegerBitWidth() / 8 >= SliceSize &&
+ "Load must be >= slice size");
+
uint64_t BaseOffset = Offsets.S->beginOffset();
- assert(BaseOffset + LoadSize > BaseOffset &&
+ assert(BaseOffset + SliceSize > BaseOffset &&
"Cannot represent alloca access size using 64-bit integers!");
Instruction *BasePtr = cast<Instruction>(LI->getPointerOperand());
@@ -3992,7 +3981,7 @@ bool SROAPass::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
uint64_t PartOffset = 0, PartSize = Offsets.Splits.front();
int Idx = 0, Size = Offsets.Splits.size();
for (;;) {
- auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8);
+ auto *PartTy = Type::getIntNTy(LI->getContext(), PartSize * 8);
auto AS = LI->getPointerAddressSpace();
auto *PartPtrTy = PartTy->getPointerTo(AS);
LoadInst *PLoad = IRB.CreateAlignedLoad(
@@ -4025,7 +4014,7 @@ bool SROAPass::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
// Setup the next partition.
PartOffset = Offsets.Splits[Idx];
++Idx;
- PartSize = (Idx < Size ? Offsets.Splits[Idx] : LoadSize) - PartOffset;
+ PartSize = (Idx < Size ? Offsets.Splits[Idx] : SliceSize) - PartOffset;
}
// Now that we have the split loads, do the slow walk over all uses of the
diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp
index f9650efc051f..008ddfc72740 100644
--- a/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -16,16 +16,13 @@
#include "llvm-c/Initialization.h"
#include "llvm-c/Transforms/Scalar.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
-#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/ScopedNoAliasAA.h"
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/Scalar/Scalarizer.h"
-#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
using namespace llvm;
@@ -76,7 +73,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeLoopRerollLegacyPassPass(Registry);
initializeLoopUnrollPass(Registry);
initializeLoopUnrollAndJamPass(Registry);
- initializeLoopUnswitchPass(Registry);
initializeWarnMissedTransformationsLegacyPass(Registry);
initializeLoopVersioningLICMLegacyPassPass(Registry);
initializeLoopIdiomRecognizeLegacyPassPass(Registry);
@@ -104,6 +100,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeSimpleLoopUnswitchLegacyPassPass(Registry);
initializeSinkingLegacyPassPass(Registry);
initializeTailCallElimPass(Registry);
+ initializeTLSVariableHoistLegacyPassPass(Registry);
initializeSeparateConstOffsetFromGEPLegacyPassPass(Registry);
initializeSpeculativeExecutionLegacyPassPass(Registry);
initializeStraightLineStrengthReduceLegacyPassPass(Registry);
@@ -214,10 +211,6 @@ void LLVMAddLoopUnrollAndJamPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createLoopUnrollAndJamPass());
}
-void LLVMAddLoopUnswitchPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createLoopUnswitchPass());
-}
-
void LLVMAddLowerAtomicPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createLowerAtomicPass());
}
diff --git a/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp b/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
index 29cea42e4a00..e2976ace3a4a 100644
--- a/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
+++ b/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
@@ -1,5 +1,5 @@
//===- ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem ----===//
-// instrinsics
+// intrinsics
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -24,11 +24,9 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
@@ -36,7 +34,6 @@
#include "llvm/Support/Casting.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include <algorithm>
#include <cassert>
using namespace llvm;
@@ -876,7 +873,7 @@ static bool runImpl(Function &F, const TargetTransformInfo &TTI,
for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
bool ModifiedDTOnIteration = false;
MadeChange |= optimizeBlock(BB, ModifiedDTOnIteration, TTI, DL,
- DTU.hasValue() ? DTU.getPointer() : nullptr);
+ DTU ? DTU.getPointer() : nullptr);
// Restart BB iteration if the dominator tree of the Function was changed
if (ModifiedDTOnIteration)
diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 3606c8a4b073..08f4b2173da2 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -39,8 +39,6 @@
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
#include <cstdint>
@@ -52,7 +50,7 @@ using namespace llvm;
#define DEBUG_TYPE "scalarizer"
-static cl::opt<bool> ScalarizeVariableInsertExtract(
+static cl::opt<bool> ClScalarizeVariableInsertExtract(
"scalarize-variable-insert-extract", cl::init(true), cl::Hidden,
cl::desc("Allow the scalarizer pass to scalarize "
"insertelement/extractelement with variable index"));
@@ -60,9 +58,9 @@ static cl::opt<bool> ScalarizeVariableInsertExtract(
// This is disabled by default because having separate loads and stores
// makes it more likely that the -combiner-alias-analysis limits will be
// reached.
-static cl::opt<bool>
- ScalarizeLoadStore("scalarize-load-store", cl::init(false), cl::Hidden,
- cl::desc("Allow the scalarizer pass to scalarize loads and store"));
+static cl::opt<bool> ClScalarizeLoadStore(
+ "scalarize-load-store", cl::init(false), cl::Hidden,
+ cl::desc("Allow the scalarizer pass to scalarize loads and store"));
namespace {
@@ -96,7 +94,7 @@ public:
// Scatter V into Size components. If new instructions are needed,
// insert them before BBI in BB. If Cache is nonnull, use it to cache
// the results.
- Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
+ Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v, Type *PtrElemTy,
ValueVector *cachePtr = nullptr);
// Return component I, creating a new Value for it if necessary.
@@ -109,8 +107,8 @@ private:
BasicBlock *BB;
BasicBlock::iterator BBI;
Value *V;
+ Type *PtrElemTy;
ValueVector *CachePtr;
- PointerType *PtrTy;
ValueVector Tmp;
unsigned Size;
};
@@ -188,10 +186,23 @@ struct VectorLayout {
uint64_t ElemSize = 0;
};
+template <typename T>
+T getWithDefaultOverride(const cl::opt<T> &ClOption,
+ const llvm::Optional<T> &DefaultOverride) {
+ return ClOption.getNumOccurrences() ? ClOption
+ : DefaultOverride.value_or(ClOption);
+}
+
class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> {
public:
- ScalarizerVisitor(unsigned ParallelLoopAccessMDKind, DominatorTree *DT)
- : ParallelLoopAccessMDKind(ParallelLoopAccessMDKind), DT(DT) {
+ ScalarizerVisitor(unsigned ParallelLoopAccessMDKind, DominatorTree *DT,
+ ScalarizerPassOptions Options)
+ : ParallelLoopAccessMDKind(ParallelLoopAccessMDKind), DT(DT),
+ ScalarizeVariableInsertExtract(
+ getWithDefaultOverride(ClScalarizeVariableInsertExtract,
+ Options.ScalarizeVariableInsertExtract)),
+ ScalarizeLoadStore(getWithDefaultOverride(ClScalarizeLoadStore,
+ Options.ScalarizeLoadStore)) {
}
bool visit(Function &F);
@@ -216,8 +227,9 @@ public:
bool visitCallInst(CallInst &ICI);
private:
- Scatterer scatter(Instruction *Point, Value *V);
+ Scatterer scatter(Instruction *Point, Value *V, Type *PtrElemTy = nullptr);
void gather(Instruction *Op, const ValueVector &CV);
+ void replaceUses(Instruction *Op, Value *CV);
bool canTransferMetadata(unsigned Kind);
void transferMetadataAndIRFlags(Instruction *Op, const ValueVector &CV);
Optional<VectorLayout> getVectorLayout(Type *Ty, Align Alignment,
@@ -231,12 +243,16 @@ private:
ScatterMap Scattered;
GatherList Gathered;
+ bool Scalarized;
SmallVector<WeakTrackingVH, 32> PotentiallyDeadInstrs;
unsigned ParallelLoopAccessMDKind;
DominatorTree *DT;
+
+ const bool ScalarizeVariableInsertExtract;
+ const bool ScalarizeLoadStore;
};
class ScalarizerLegacyPass : public FunctionPass {
@@ -265,12 +281,14 @@ INITIALIZE_PASS_END(ScalarizerLegacyPass, "scalarizer",
"Scalarize vector operations", false, false)
Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
- ValueVector *cachePtr)
- : BB(bb), BBI(bbi), V(v), CachePtr(cachePtr) {
+ Type *PtrElemTy, ValueVector *cachePtr)
+ : BB(bb), BBI(bbi), V(v), PtrElemTy(PtrElemTy), CachePtr(cachePtr) {
Type *Ty = V->getType();
- PtrTy = dyn_cast<PointerType>(Ty);
- if (PtrTy)
- Ty = PtrTy->getPointerElementType();
+ if (Ty->isPointerTy()) {
+ assert(cast<PointerType>(Ty)->isOpaqueOrPointeeTypeMatches(PtrElemTy) &&
+ "Pointer element type mismatch");
+ Ty = PtrElemTy;
+ }
Size = cast<FixedVectorType>(Ty)->getNumElements();
if (!CachePtr)
Tmp.resize(Size, nullptr);
@@ -287,15 +305,15 @@ Value *Scatterer::operator[](unsigned I) {
if (CV[I])
return CV[I];
IRBuilder<> Builder(BB, BBI);
- if (PtrTy) {
- Type *ElTy =
- cast<VectorType>(PtrTy->getPointerElementType())->getElementType();
+ if (PtrElemTy) {
+ Type *VectorElemTy = cast<VectorType>(PtrElemTy)->getElementType();
if (!CV[0]) {
- Type *NewPtrTy = PointerType::get(ElTy, PtrTy->getAddressSpace());
+ Type *NewPtrTy = PointerType::get(
+ VectorElemTy, V->getType()->getPointerAddressSpace());
CV[0] = Builder.CreateBitCast(V, NewPtrTy, V->getName() + ".i0");
}
if (I != 0)
- CV[I] = Builder.CreateConstGEP1_32(ElTy, CV[0], I,
+ CV[I] = Builder.CreateConstGEP1_32(VectorElemTy, CV[0], I,
V->getName() + ".i" + Twine(I));
} else {
// Search through a chain of InsertElementInsts looking for element I.
@@ -334,7 +352,7 @@ bool ScalarizerLegacyPass::runOnFunction(Function &F) {
unsigned ParallelLoopAccessMDKind =
M.getContext().getMDKindID("llvm.mem.parallel_loop_access");
DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- ScalarizerVisitor Impl(ParallelLoopAccessMDKind, DT);
+ ScalarizerVisitor Impl(ParallelLoopAccessMDKind, DT, ScalarizerPassOptions());
return Impl.visit(F);
}
@@ -345,6 +363,8 @@ FunctionPass *llvm::createScalarizerPass() {
bool ScalarizerVisitor::visit(Function &F) {
assert(Gathered.empty() && Scattered.empty());
+ Scalarized = false;
+
// To ensure we replace gathered components correctly we need to do an ordered
// traversal of the basic blocks in the function.
ReversePostOrderTraversal<BasicBlock *> RPOT(&F.getEntryBlock());
@@ -362,13 +382,14 @@ bool ScalarizerVisitor::visit(Function &F) {
// Return a scattered form of V that can be accessed by Point. V must be a
// vector or a pointer to a vector.
-Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V) {
+Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V,
+ Type *PtrElemTy) {
if (Argument *VArg = dyn_cast<Argument>(V)) {
// Put the scattered form of arguments in the entry block,
// so that it can be used everywhere.
Function *F = VArg->getParent();
BasicBlock *BB = &F->getEntryBlock();
- return Scatterer(BB, BB->begin(), V, &Scattered[V]);
+ return Scatterer(BB, BB->begin(), V, PtrElemTy, &Scattered[V]);
}
if (Instruction *VOp = dyn_cast<Instruction>(V)) {
// When scalarizing PHI nodes we might try to examine/rewrite InsertElement
@@ -379,17 +400,17 @@ Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V) {
// need to analyse them further.
if (!DT->isReachableFromEntry(VOp->getParent()))
return Scatterer(Point->getParent(), Point->getIterator(),
- UndefValue::get(V->getType()));
+ PoisonValue::get(V->getType()), PtrElemTy);
// Put the scattered form of an instruction directly after the
// instruction, skipping over PHI nodes and debug intrinsics.
BasicBlock *BB = VOp->getParent();
return Scatterer(
BB, skipPastPhiNodesAndDbg(std::next(BasicBlock::iterator(VOp))), V,
- &Scattered[V]);
+ PtrElemTy, &Scattered[V]);
}
// In the fallback case, just put the scattered before Point and
// keep the result local to Point.
- return Scatterer(Point->getParent(), Point->getIterator(), V);
+ return Scatterer(Point->getParent(), Point->getIterator(), V, PtrElemTy);
}
// Replace Op with the gathered form of the components in CV. Defer the
@@ -419,6 +440,15 @@ void ScalarizerVisitor::gather(Instruction *Op, const ValueVector &CV) {
Gathered.push_back(GatherList::value_type(Op, &SV));
}
+// Replace Op with CV and collect Op has a potentially dead instruction.
+void ScalarizerVisitor::replaceUses(Instruction *Op, Value *CV) {
+ if (CV != Op) {
+ Op->replaceAllUsesWith(CV);
+ PotentiallyDeadInstrs.emplace_back(Op);
+ Scalarized = true;
+ }
+}
+
// Return true if it is safe to transfer the given metadata tag from
// vector to scalar instructions.
bool ScalarizerVisitor::canTransferMetadata(unsigned Tag) {
@@ -558,9 +588,11 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) {
if (OpI->getType()->isVectorTy()) {
Scattered[I] = scatter(&CI, OpI);
assert(Scattered[I].size() == NumElems && "mismatched call operands");
+ if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I))
+ Tys.push_back(OpI->getType()->getScalarType());
} else {
ScalarOperands[I] = OpI;
- if (hasVectorInstrinsicOverloadedScalarOpd(ID, I))
+ if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I))
Tys.push_back(OpI->getType());
}
}
@@ -576,7 +608,7 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) {
ScalarCallOps.clear();
for (unsigned J = 0; J != NumArgs; ++J) {
- if (hasVectorInstrinsicScalarOpd(ID, J))
+ if (isVectorIntrinsicWithScalarOpAtArg(ID, J))
ScalarCallOps.push_back(ScalarOperands[J]);
else
ScalarCallOps.push_back(Scattered[J][Elem]);
@@ -809,7 +841,7 @@ bool ScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
if (auto *CI = dyn_cast<ConstantInt>(ExtIdx)) {
Value *Res = Op0[CI->getValue().getZExtValue()];
- gather(&EEI, {Res});
+ replaceUses(&EEI, Res);
return true;
}
@@ -825,7 +857,7 @@ bool ScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
Res = Builder.CreateSelect(ShouldExtract, Elt, Res,
EEI.getName() + ".upto" + Twine(I));
}
- gather(&EEI, {Res});
+ replaceUses(&EEI, Res);
return true;
}
@@ -891,7 +923,7 @@ bool ScalarizerVisitor::visitLoadInst(LoadInst &LI) {
unsigned NumElems = cast<FixedVectorType>(Layout->VecTy)->getNumElements();
IRBuilder<> Builder(&LI);
- Scatterer Ptr = scatter(&LI, LI.getPointerOperand());
+ Scatterer Ptr = scatter(&LI, LI.getPointerOperand(), LI.getType());
ValueVector Res;
Res.resize(NumElems);
@@ -917,7 +949,7 @@ bool ScalarizerVisitor::visitStoreInst(StoreInst &SI) {
unsigned NumElems = cast<FixedVectorType>(Layout->VecTy)->getNumElements();
IRBuilder<> Builder(&SI);
- Scatterer VPtr = scatter(&SI, SI.getPointerOperand());
+ Scatterer VPtr = scatter(&SI, SI.getPointerOperand(), FullValue->getType());
Scatterer VVal = scatter(&SI, FullValue);
ValueVector Stores;
@@ -940,7 +972,7 @@ bool ScalarizerVisitor::visitCallInst(CallInst &CI) {
bool ScalarizerVisitor::finish() {
// The presence of data in Gathered or Scattered indicates changes
// made to the Function.
- if (Gathered.empty() && Scattered.empty())
+ if (Gathered.empty() && Scattered.empty() && !Scalarized)
return false;
for (const auto &GMI : Gathered) {
Instruction *Op = GMI.first;
@@ -971,6 +1003,7 @@ bool ScalarizerVisitor::finish() {
}
Gathered.clear();
Scattered.clear();
+ Scalarized = false;
RecursivelyDeleteTriviallyDeadInstructionsPermissive(PotentiallyDeadInstrs);
@@ -982,7 +1015,7 @@ PreservedAnalyses ScalarizerPass::run(Function &F, FunctionAnalysisManager &AM)
unsigned ParallelLoopAccessMDKind =
M.getContext().getMDKindID("llvm.mem.parallel_loop_access");
DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
- ScalarizerVisitor Impl(ParallelLoopAccessMDKind, DT);
+ ScalarizerVisitor Impl(ParallelLoopAccessMDKind, DT, Options);
bool Changed = Impl.visit(F);
PreservedAnalyses PA;
PA.preserve<DominatorTreeAnalysis>();
diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index d23925042b0a..7da5a78772ad 100644
--- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -189,7 +189,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index a27da047bfd3..0535608244cc 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -19,7 +19,6 @@
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/GuardUtils.h"
-#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
@@ -28,6 +27,7 @@
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
@@ -49,7 +49,9 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GenericDomTree.h"
+#include "llvm/Support/InstructionCost.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -81,7 +83,6 @@ static cl::opt<bool> EnableNonTrivialUnswitch(
static cl::opt<int>
UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
- cl::ZeroOrMore,
cl::desc("The cost threshold for unswitching a loop."));
static cl::opt<bool> EnableUnswitchCostMultiplier(
@@ -110,17 +111,27 @@ static cl::opt<unsigned>
"partial unswitching analysis"),
cl::init(100), cl::Hidden);
static cl::opt<bool> FreezeLoopUnswitchCond(
- "freeze-loop-unswitch-cond", cl::init(false), cl::Hidden,
+ "freeze-loop-unswitch-cond", cl::init(true), cl::Hidden,
cl::desc("If enabled, the freeze instruction will be added to condition "
"of loop unswitch to prevent miscompilation."));
+// Helper to skip (select x, true, false), which matches both a logical AND and
+// OR and can confuse code that tries to determine if \p Cond is either a
+// logical AND or OR but not both.
+static Value *skipTrivialSelect(Value *Cond) {
+ Value *CondNext;
+ while (match(Cond, m_Select(m_Value(CondNext), m_One(), m_Zero())))
+ Cond = CondNext;
+ return Cond;
+}
+
/// Collect all of the loop invariant input values transitively used by the
/// homogeneous instruction graph from a given root.
///
/// This essentially walks from a root recursively through loop variant operands
-/// which have the exact same opcode and finds all inputs which are loop
-/// invariant. For some operations these can be re-associated and unswitched out
-/// of the loop entirely.
+/// which have perform the same logical operation (AND or OR) and finds all
+/// inputs which are loop invariant. For some operations these can be
+/// re-associated and unswitched out of the loop entirely.
static TinyPtrVector<Value *>
collectHomogenousInstGraphLoopInvariants(Loop &L, Instruction &Root,
LoopInfo &LI) {
@@ -150,7 +161,7 @@ collectHomogenousInstGraphLoopInvariants(Loop &L, Instruction &Root,
}
// If not an instruction with the same opcode, nothing we can do.
- Instruction *OpI = dyn_cast<Instruction>(OpV);
+ Instruction *OpI = dyn_cast<Instruction>(skipTrivialSelect(OpV));
if (OpI && ((IsRootAnd && match(OpI, m_LogicalAnd())) ||
(IsRootOr && match(OpI, m_LogicalOr())))) {
@@ -202,13 +213,19 @@ static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB,
/// branch on a single value.
static void buildPartialUnswitchConditionalBranch(
BasicBlock &BB, ArrayRef<Value *> Invariants, bool Direction,
- BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze) {
+ BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze,
+ Instruction *I, AssumptionCache *AC, DominatorTree &DT) {
IRBuilder<> IRB(&BB);
- Value *Cond = Direction ? IRB.CreateOr(Invariants) :
- IRB.CreateAnd(Invariants);
- if (InsertFreeze)
- Cond = IRB.CreateFreeze(Cond, Cond->getName() + ".fr");
+ SmallVector<Value *> FrozenInvariants;
+ for (Value *Inv : Invariants) {
+ if (InsertFreeze && !isGuaranteedNotToBeUndefOrPoison(Inv, AC, I, &DT))
+ Inv = IRB.CreateFreeze(Inv, Inv->getName() + ".fr");
+ FrozenInvariants.push_back(Inv);
+ }
+
+ Value *Cond = Direction ? IRB.CreateOr(FrozenInvariants)
+ : IRB.CreateAnd(FrozenInvariants);
IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
Direction ? &NormalSucc : &UnswitchedSucc);
}
@@ -442,11 +459,12 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
// some input conditions to the branch.
bool FullUnswitch = false;
- if (L.isLoopInvariant(BI.getCondition())) {
- Invariants.push_back(BI.getCondition());
+ Value *Cond = skipTrivialSelect(BI.getCondition());
+ if (L.isLoopInvariant(Cond)) {
+ Invariants.push_back(Cond);
FullUnswitch = true;
} else {
- if (auto *CondInst = dyn_cast<Instruction>(BI.getCondition()))
+ if (auto *CondInst = dyn_cast<Instruction>(Cond))
Invariants = collectHomogenousInstGraphLoopInvariants(L, *CondInst, LI);
if (Invariants.empty()) {
LLVM_DEBUG(dbgs() << " Couldn't find invariant inputs!\n");
@@ -480,8 +498,8 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
// is a graph of `or` operations, or the exit block is along the false edge
// and the condition is a graph of `and` operations.
if (!FullUnswitch) {
- if (ExitDirection ? !match(BI.getCondition(), m_LogicalOr())
- : !match(BI.getCondition(), m_LogicalAnd())) {
+ if (ExitDirection ? !match(Cond, m_LogicalOr())
+ : !match(Cond, m_LogicalAnd())) {
LLVM_DEBUG(dbgs() << " Branch condition is in improper form for "
"non-full unswitch!\n");
return false;
@@ -546,6 +564,7 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
// its successors.
OldPH->getInstList().splice(OldPH->end(), BI.getParent()->getInstList(),
BI);
+ BI.setCondition(Cond);
if (MSSAU) {
// Temporarily clone the terminator, to make MSSA update cheaper by
// separating "insert edge" updates from "remove edge" ones.
@@ -561,15 +580,16 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
// Only unswitching a subset of inputs to the condition, so we will need to
// build a new branch that merges the invariant inputs.
if (ExitDirection)
- assert(match(BI.getCondition(), m_LogicalOr()) &&
+ assert(match(skipTrivialSelect(BI.getCondition()), m_LogicalOr()) &&
"Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "
"condition!");
else
- assert(match(BI.getCondition(), m_LogicalAnd()) &&
+ assert(match(skipTrivialSelect(BI.getCondition()), m_LogicalAnd()) &&
"Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
" condition!");
- buildPartialUnswitchConditionalBranch(*OldPH, Invariants, ExitDirection,
- *UnswitchedBB, *NewPH, false);
+ buildPartialUnswitchConditionalBranch(
+ *OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH,
+ FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT);
}
// Update the dominator tree with the added edge.
@@ -1019,7 +1039,8 @@ static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT,
// Don't bother trying to unswitch past an unconditional branch or a branch
// with a constant value. These should be removed by simplifycfg prior to
// running this pass.
- if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
+ if (!BI->isConditional() ||
+ isa<Constant>(skipTrivialSelect(BI->getCondition())))
return Changed;
// Found a trivial condition candidate: non-foldable conditional branch. If
@@ -1663,7 +1684,7 @@ deleteDeadBlocksFromLoop(Loop &L,
// uses in other blocks.
for (auto &I : *BB)
if (!I.use_empty())
- I.replaceAllUsesWith(UndefValue::get(I.getType()));
+ I.replaceAllUsesWith(PoisonValue::get(I.getType()));
BB->dropAllReferences();
}
@@ -2042,12 +2063,13 @@ static void unswitchNontrivialInvariants(
"Can only unswitch switches and conditional branch!");
bool PartiallyInvariant = !PartialIVInfo.InstToDuplicate.empty();
bool FullUnswitch =
- SI || (BI->getCondition() == Invariants[0] && !PartiallyInvariant);
+ SI || (skipTrivialSelect(BI->getCondition()) == Invariants[0] &&
+ !PartiallyInvariant);
if (FullUnswitch)
assert(Invariants.size() == 1 &&
"Cannot have other invariants with full unswitching!");
else
- assert(isa<Instruction>(BI->getCondition()) &&
+ assert(isa<Instruction>(skipTrivialSelect(BI->getCondition())) &&
"Partial unswitching requires an instruction as the condition!");
if (MSSAU && VerifyMemorySSA)
@@ -2062,14 +2084,14 @@ static void unswitchNontrivialInvariants(
bool Direction = true;
int ClonedSucc = 0;
if (!FullUnswitch) {
- Value *Cond = BI->getCondition();
+ Value *Cond = skipTrivialSelect(BI->getCondition());
(void)Cond;
assert(((match(Cond, m_LogicalAnd()) ^ match(Cond, m_LogicalOr())) ||
PartiallyInvariant) &&
"Only `or`, `and`, an `select`, partially invariant instructions "
"can combine invariants being unswitched.");
- if (!match(BI->getCondition(), m_LogicalOr())) {
- if (match(BI->getCondition(), m_LogicalAnd()) ||
+ if (!match(Cond, m_LogicalOr())) {
+ if (match(Cond, m_LogicalAnd()) ||
(PartiallyInvariant && !PartialIVInfo.KnownValue->isOneValue())) {
Direction = false;
ClonedSucc = 1;
@@ -2209,11 +2231,12 @@ static void unswitchNontrivialInvariants(
BasicBlock *ClonedPH = ClonedPHs.begin()->second;
BI->setSuccessor(ClonedSucc, ClonedPH);
BI->setSuccessor(1 - ClonedSucc, LoopPH);
+ Value *Cond = skipTrivialSelect(BI->getCondition());
if (InsertFreeze) {
- auto Cond = BI->getCondition();
if (!isGuaranteedNotToBeUndefOrPoison(Cond, &AC, BI, &DT))
- BI->setCondition(new FreezeInst(Cond, Cond->getName() + ".fr", BI));
+ Cond = new FreezeInst(Cond, Cond->getName() + ".fr", BI);
}
+ BI->setCondition(Cond);
DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
} else {
assert(SI && "Must either be a branch or switch!");
@@ -2311,9 +2334,11 @@ static void unswitchNontrivialInvariants(
if (PartiallyInvariant)
buildPartialInvariantUnswitchConditionalBranch(
*SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU);
- else
- buildPartialUnswitchConditionalBranch(*SplitBB, Invariants, Direction,
- *ClonedPH, *LoopPH, InsertFreeze);
+ else {
+ buildPartialUnswitchConditionalBranch(
+ *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH,
+ FreezeLoopUnswitchCond, BI, &AC, DT);
+ }
DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
if (MSSAU) {
@@ -2745,22 +2770,16 @@ static bool unswitchBestCondition(
BI->getSuccessor(0) == BI->getSuccessor(1))
continue;
- // If BI's condition is 'select _, true, false', simplify it to confuse
- // matchers
- Value *Cond = BI->getCondition(), *CondNext;
- while (match(Cond, m_Select(m_Value(CondNext), m_One(), m_Zero())))
- Cond = CondNext;
- BI->setCondition(Cond);
-
+ Value *Cond = skipTrivialSelect(BI->getCondition());
if (isa<Constant>(Cond))
continue;
- if (L.isLoopInvariant(BI->getCondition())) {
- UnswitchCandidates.push_back({BI, {BI->getCondition()}});
+ if (L.isLoopInvariant(Cond)) {
+ UnswitchCandidates.push_back({BI, {Cond}});
continue;
}
- Instruction &CondI = *cast<Instruction>(BI->getCondition());
+ Instruction &CondI = *cast<Instruction>(Cond);
if (match(&CondI, m_CombineOr(m_LogicalAnd(), m_LogicalOr()))) {
TinyPtrVector<Value *> Invariants =
collectHomogenousInstGraphLoopInvariants(L, CondI, LI);
@@ -2785,8 +2804,7 @@ static bool unswitchBestCondition(
PartialIVInfo = *Info;
PartialIVCondBranch = L.getHeader()->getTerminator();
TinyPtrVector<Value *> ValsToDuplicate;
- for (auto *Inst : Info->InstToDuplicate)
- ValsToDuplicate.push_back(Inst);
+ llvm::append_range(ValsToDuplicate, Info->InstToDuplicate);
UnswitchCandidates.push_back(
{L.getHeader()->getTerminator(), std::move(ValsToDuplicate)});
}
@@ -2902,10 +2920,11 @@ static bool unswitchBestCondition(
// its cost.
if (!FullUnswitch) {
auto &BI = cast<BranchInst>(TI);
- if (match(BI.getCondition(), m_LogicalAnd())) {
+ Value *Cond = skipTrivialSelect(BI.getCondition());
+ if (match(Cond, m_LogicalAnd())) {
if (SuccBB == BI.getSuccessor(1))
continue;
- } else if (match(BI.getCondition(), m_LogicalOr())) {
+ } else if (match(Cond, m_LogicalOr())) {
if (SuccBB == BI.getSuccessor(0))
continue;
} else if ((PartialIVInfo.KnownValue->isOneValue() &&
@@ -2947,8 +2966,9 @@ static bool unswitchBestCondition(
ArrayRef<Value *> Invariants = TerminatorAndInvariants.second;
BranchInst *BI = dyn_cast<BranchInst>(&TI);
InstructionCost CandidateCost = ComputeUnswitchedCost(
- TI, /*FullUnswitch*/ !BI || (Invariants.size() == 1 &&
- Invariants[0] == BI->getCondition()));
+ TI, /*FullUnswitch*/ !BI ||
+ (Invariants.size() == 1 &&
+ Invariants[0] == skipTrivialSelect(BI->getCondition())));
// Calculate cost multiplier which is a tool to limit potentially
// exponential behavior of loop-unswitch.
if (EnableUnswitchCostMultiplier) {
@@ -3131,8 +3151,7 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
AR.MSSA->verifyMemorySSA();
}
if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial,
- UnswitchCB, &AR.SE,
- MSSAU.hasValue() ? MSSAU.getPointer() : nullptr,
+ UnswitchCB, &AR.SE, MSSAU ? MSSAU.getPointer() : nullptr,
DestroyLoopCB))
return PreservedAnalyses::all();
diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index ee17da1875e5..fb2d812a186d 100644
--- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -31,19 +31,16 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CFG.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/SimplifyCFG.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
#include <utility>
@@ -59,6 +56,11 @@ static cl::opt<bool> UserKeepLoops(
"keep-loops", cl::Hidden, cl::init(true),
cl::desc("Preserve canonical loop structure (default = true)"));
+static cl::opt<bool> UserSwitchRangeToICmp(
+ "switch-range-to-icmp", cl::Hidden, cl::init(false),
+ cl::desc(
+ "Convert switches into an integer range comparison (default = false)"));
+
static cl::opt<bool> UserSwitchToLookup(
"switch-to-lookup", cl::Hidden, cl::init(false),
cl::desc("Convert switches to lookup tables (default = false)"));
@@ -311,6 +313,8 @@ static void applyCommandLineOverridesToOptions(SimplifyCFGOptions &Options) {
Options.BonusInstThreshold = UserBonusInstThreshold;
if (UserForwardSwitchCond.getNumOccurrences())
Options.ForwardSwitchCondToPhi = UserForwardSwitchCond;
+ if (UserSwitchRangeToICmp.getNumOccurrences())
+ Options.ConvertSwitchRangeToICmp = UserSwitchRangeToICmp;
if (UserSwitchToLookup.getNumOccurrences())
Options.ConvertSwitchToLookupTable = UserSwitchToLookup;
if (UserKeepLoops.getNumOccurrences())
@@ -337,6 +341,8 @@ void SimplifyCFGPass::printPipeline(
OS << "<";
OS << "bonus-inst-threshold=" << Options.BonusInstThreshold << ";";
OS << (Options.ForwardSwitchCondToPhi ? "" : "no-") << "forward-switch-cond;";
+ OS << (Options.ConvertSwitchRangeToICmp ? "" : "no-")
+ << "switch-range-to-icmp;";
OS << (Options.ConvertSwitchToLookupTable ? "" : "no-")
<< "switch-to-lookup;";
OS << (Options.NeedCanonicalLoop ? "" : "no-") << "keep-loops;";
diff --git a/llvm/lib/Transforms/Scalar/Sink.cpp b/llvm/lib/Transforms/Scalar/Sink.cpp
index 8600aacdb056..e8fde53005f0 100644
--- a/llvm/lib/Transforms/Scalar/Sink.cpp
+++ b/llvm/lib/Transforms/Scalar/Sink.cpp
@@ -15,12 +15,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/CFG.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -48,7 +43,7 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis &AA,
}
if (Inst->isTerminator() || isa<PHINode>(Inst) || Inst->isEHPad() ||
- Inst->mayThrow())
+ Inst->mayThrow() || !Inst->willReturn())
return false;
if (auto *Call = dyn_cast<CallBase>(Inst)) {
diff --git a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
index 06169a7834f6..9ac4608134c2 100644
--- a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
+++ b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
@@ -63,10 +63,10 @@
#include "llvm/Transforms/Scalar/SpeculativeExecution.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
@@ -275,7 +275,7 @@ bool SpeculativeExecutionPass::considerHoistingFromTo(
});
}
- // Usially debug label instrinsic corresponds to label in LLVM IR. In these
+ // Usially debug label intrinsic corresponds to label in LLVM IR. In these
// cases we should not move it here.
// TODO: Possible special processing needed to detect it is related to a
// hoisted instruction.
@@ -301,7 +301,7 @@ bool SpeculativeExecutionPass::considerHoistingFromTo(
if (TotalSpeculationCost > SpecExecMaxSpeculationCost)
return false; // too much to hoist
} else {
- // Debug info instrinsics should not be counted for threshold.
+ // Debug info intrinsics should not be counted for threshold.
if (!isa<DbgInfoIntrinsic>(I))
NotHoistedInstCount++;
if (NotHoistedInstCount > SpecExecMaxNotHoisted)
diff --git a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index b47378808216..70df0cec0dca 100644
--- a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -68,7 +68,6 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
@@ -683,24 +682,16 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis(
unsigned AS = Basis.Ins->getType()->getPointerAddressSpace();
Type *CharTy = Type::getInt8PtrTy(Basis.Ins->getContext(), AS);
Reduced = Builder.CreateBitCast(Basis.Ins, CharTy);
- if (InBounds)
- Reduced =
- Builder.CreateInBoundsGEP(Builder.getInt8Ty(), Reduced, Bump);
- else
- Reduced = Builder.CreateGEP(Builder.getInt8Ty(), Reduced, Bump);
+ Reduced =
+ Builder.CreateGEP(Builder.getInt8Ty(), Reduced, Bump, "", InBounds);
Reduced = Builder.CreateBitCast(Reduced, C.Ins->getType());
} else {
// C = gep Basis, Bump
// Canonicalize bump to pointer size.
Bump = Builder.CreateSExtOrTrunc(Bump, IntPtrTy);
- if (InBounds)
- Reduced = Builder.CreateInBoundsGEP(
- cast<GetElementPtrInst>(Basis.Ins)->getResultElementType(),
- Basis.Ins, Bump);
- else
- Reduced = Builder.CreateGEP(
- cast<GetElementPtrInst>(Basis.Ins)->getResultElementType(),
- Basis.Ins, Bump);
+ Reduced = Builder.CreateGEP(
+ cast<GetElementPtrInst>(Basis.Ins)->getResultElementType(),
+ Basis.Ins, Bump, "", InBounds);
}
break;
}
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index b3a445368537..f6525ad7de9b 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -18,10 +18,8 @@
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/Analysis/RegionIterator.h"
#include "llvm/Analysis/RegionPass.h"
-#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
-#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
@@ -33,7 +31,6 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
-#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/InitializePasses.h"
@@ -41,7 +38,6 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
@@ -72,6 +68,11 @@ static cl::opt<bool>
cl::desc("Allow relaxed uniform region checks"),
cl::init(true));
+static cl::opt<unsigned>
+ ReorderNodeSize("structurizecfg-node-reorder-size",
+ cl::desc("Limit region size for reordering nodes"),
+ cl::init(100), cl::Hidden);
+
// Definition of the complex types used in this pass.
using BBValuePair = std::pair<BasicBlock *, Value *>;
@@ -266,6 +267,8 @@ class StructurizeCFG {
void orderNodes();
+ void reorderNodes();
+
void analyzeLoops(RegionNode *N);
Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert);
@@ -424,6 +427,57 @@ void StructurizeCFG::orderNodes() {
}
}
+/// Change the node ordering to decrease the range of live values, especially
+/// the values that capture the control flow path for branches. We do this
+/// by moving blocks with a single predecessor and successor to appear after
+/// predecessor. The motivation is to move some loop exit blocks into a loop.
+/// In cases where a loop has a large number of exit blocks, this reduces the
+/// amount of values needed across the loop boundary.
+void StructurizeCFG::reorderNodes() {
+ SmallVector<RegionNode *, 8> NewOrder;
+ DenseMap<BasicBlock *, unsigned> MoveTo;
+ BitVector Moved(Order.size());
+
+ // The benefits of reordering nodes occurs for large regions.
+ if (Order.size() <= ReorderNodeSize)
+ return;
+
+ // The algorithm works with two passes over Order. The first pass identifies
+ // the blocks to move and the position to move them to. The second pass
+ // creates the new order based upon this information. We move blocks with
+ // a single predecessor and successor. If there are multiple candidates then
+ // maintain the original order.
+ BBSet Seen;
+ for (int I = Order.size() - 1; I >= 0; --I) {
+ auto *BB = Order[I]->getEntry();
+ Seen.insert(BB);
+ auto *Pred = BB->getSinglePredecessor();
+ auto *Succ = BB->getSingleSuccessor();
+ // Consider only those basic blocks that have a predecessor in Order and a
+ // successor that exits the region. The region may contain subregions that
+ // have been structurized and are not included in Order.
+ if (Pred && Succ && Seen.count(Pred) && Succ == ParentRegion->getExit() &&
+ !MoveTo.count(Pred)) {
+ MoveTo[Pred] = I;
+ Moved.set(I);
+ }
+ }
+
+ // If no blocks have been moved then the original order is good.
+ if (!Moved.count())
+ return;
+
+ for (size_t I = 0, E = Order.size(); I < E; ++I) {
+ auto *BB = Order[I]->getEntry();
+ if (MoveTo.count(BB))
+ NewOrder.push_back(Order[MoveTo[BB]]);
+ if (!Moved[I])
+ NewOrder.push_back(Order[I]);
+ }
+
+ Order.assign(NewOrder);
+}
+
/// Determine the end of the loops
void StructurizeCFG::analyzeLoops(RegionNode *N) {
if (N->isSubRegion()) {
@@ -685,7 +739,7 @@ void StructurizeCFG::simplifyAffectedPhis() {
Q.DT = DT;
for (WeakVH VH : AffectedPhis) {
if (auto Phi = dyn_cast_or_null<PHINode>(VH)) {
- if (auto NewValue = SimplifyInstruction(Phi, Q)) {
+ if (auto NewValue = simplifyInstruction(Phi, Q)) {
Phi->replaceAllUsesWith(NewValue);
Phi->eraseFromParent();
Changed = true;
@@ -1085,12 +1139,13 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
ParentRegion = R;
orderNodes();
+ reorderNodes();
collectInfos();
createFlow();
insertConditions(false);
insertConditions(true);
- simplifyConditions();
setPhiValues();
+ simplifyConditions();
simplifyAffectedPhis();
rebuildSSA();
diff --git a/llvm/lib/Transforms/Scalar/TLSVariableHoist.cpp b/llvm/lib/Transforms/Scalar/TLSVariableHoist.cpp
new file mode 100644
index 000000000000..16b3483f9687
--- /dev/null
+++ b/llvm/lib/Transforms/Scalar/TLSVariableHoist.cpp
@@ -0,0 +1,306 @@
+//===- TLSVariableHoist.cpp -------- Remove Redundant TLS Loads ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass identifies/eliminate Redundant TLS Loads if related option is set.
+// The example: Please refer to the comment at the head of TLSVariableHoist.h.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/TLSVariableHoist.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <tuple>
+#include <utility>
+
+using namespace llvm;
+using namespace tlshoist;
+
+#define DEBUG_TYPE "tlshoist"
+
+static cl::opt<bool> TLSLoadHoist(
+ "tls-load-hoist", cl::init(false), cl::Hidden,
+ cl::desc("hoist the TLS loads in PIC model to eliminate redundant "
+ "TLS address calculation."));
+
+namespace {
+
+/// The TLS Variable hoist pass.
+class TLSVariableHoistLegacyPass : public FunctionPass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+
+ TLSVariableHoistLegacyPass() : FunctionPass(ID) {
+ initializeTLSVariableHoistLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &Fn) override;
+
+ StringRef getPassName() const override { return "TLS Variable Hoist"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ }
+
+private:
+ TLSVariableHoistPass Impl;
+};
+
+} // end anonymous namespace
+
+char TLSVariableHoistLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(TLSVariableHoistLegacyPass, "tlshoist",
+ "TLS Variable Hoist", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(TLSVariableHoistLegacyPass, "tlshoist",
+ "TLS Variable Hoist", false, false)
+
+FunctionPass *llvm::createTLSVariableHoistPass() {
+ return new TLSVariableHoistLegacyPass();
+}
+
+/// Perform the TLS Variable Hoist optimization for the given function.
+bool TLSVariableHoistLegacyPass::runOnFunction(Function &Fn) {
+ if (skipFunction(Fn))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "********** Begin TLS Variable Hoist **********\n");
+ LLVM_DEBUG(dbgs() << "********** Function: " << Fn.getName() << '\n');
+
+ bool MadeChange =
+ Impl.runImpl(Fn, getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
+ getAnalysis<LoopInfoWrapperPass>().getLoopInfo());
+
+ if (MadeChange) {
+ LLVM_DEBUG(dbgs() << "********** Function after TLS Variable Hoist: "
+ << Fn.getName() << '\n');
+ LLVM_DEBUG(dbgs() << Fn);
+ }
+ LLVM_DEBUG(dbgs() << "********** End TLS Variable Hoist **********\n");
+
+ return MadeChange;
+}
+
+void TLSVariableHoistPass::collectTLSCandidate(Instruction *Inst) {
+ // Skip all cast instructions. They are visited indirectly later on.
+ if (Inst->isCast())
+ return;
+
+ // Scan all operands.
+ for (unsigned Idx = 0, E = Inst->getNumOperands(); Idx != E; ++Idx) {
+ auto *GV = dyn_cast<GlobalVariable>(Inst->getOperand(Idx));
+ if (!GV || !GV->isThreadLocal())
+ continue;
+
+ // Add Candidate to TLSCandMap (GV --> Candidate).
+ TLSCandMap[GV].addUser(Inst, Idx);
+ }
+}
+
+void TLSVariableHoistPass::collectTLSCandidates(Function &Fn) {
+ // First, quickly check if there is TLS Variable.
+ Module *M = Fn.getParent();
+
+ bool HasTLS = llvm::any_of(
+ M->globals(), [](GlobalVariable &GV) { return GV.isThreadLocal(); });
+
+ // If non, directly return.
+ if (!HasTLS)
+ return;
+
+ TLSCandMap.clear();
+
+ // Then, collect TLS Variable info.
+ for (BasicBlock &BB : Fn) {
+ // Ignore unreachable basic blocks.
+ if (!DT->isReachableFromEntry(&BB))
+ continue;
+
+ for (Instruction &Inst : BB)
+ collectTLSCandidate(&Inst);
+ }
+}
+
+static bool oneUseOutsideLoop(tlshoist::TLSCandidate &Cand, LoopInfo *LI) {
+ if (Cand.Users.size() != 1)
+ return false;
+
+ BasicBlock *BB = Cand.Users[0].Inst->getParent();
+ if (LI->getLoopFor(BB))
+ return false;
+
+ return true;
+}
+
+Instruction *TLSVariableHoistPass::getNearestLoopDomInst(BasicBlock *BB,
+ Loop *L) {
+ assert(L && "Unexcepted Loop status!");
+
+ // Get the outermost loop.
+ while (Loop *Parent = L->getParentLoop())
+ L = Parent;
+
+ BasicBlock *PreHeader = L->getLoopPreheader();
+
+ // There is unique predecessor outside the loop.
+ if (PreHeader)
+ return PreHeader->getTerminator();
+
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *Dom = Header;
+ for (BasicBlock *PredBB : predecessors(Header))
+ Dom = DT->findNearestCommonDominator(Dom, PredBB);
+
+ assert(Dom && "Not find dominator BB!");
+ Instruction *Term = Dom->getTerminator();
+
+ return Term;
+}
+
+Instruction *TLSVariableHoistPass::getDomInst(Instruction *I1,
+ Instruction *I2) {
+ if (!I1)
+ return I2;
+ if (DT->dominates(I1, I2))
+ return I1;
+ if (DT->dominates(I2, I1))
+ return I2;
+
+ // If there is no dominance relation, use common dominator.
+ BasicBlock *DomBB =
+ DT->findNearestCommonDominator(I1->getParent(), I2->getParent());
+
+ Instruction *Dom = DomBB->getTerminator();
+ assert(Dom && "Common dominator not found!");
+
+ return Dom;
+}
+
+BasicBlock::iterator TLSVariableHoistPass::findInsertPos(Function &Fn,
+ GlobalVariable *GV,
+ BasicBlock *&PosBB) {
+ tlshoist::TLSCandidate &Cand = TLSCandMap[GV];
+
+ // We should hoist the TLS use out of loop, so choose its nearest instruction
+ // which dominate the loop and the outside loops (if exist).
+ Instruction *LastPos = nullptr;
+ for (auto &User : Cand.Users) {
+ BasicBlock *BB = User.Inst->getParent();
+ Instruction *Pos = User.Inst;
+ if (Loop *L = LI->getLoopFor(BB)) {
+ Pos = getNearestLoopDomInst(BB, L);
+ assert(Pos && "Not find insert position out of loop!");
+ }
+ Pos = getDomInst(LastPos, Pos);
+ LastPos = Pos;
+ }
+
+ assert(LastPos && "Unexpected insert position!");
+ BasicBlock *Parent = LastPos->getParent();
+ PosBB = Parent;
+ return LastPos->getIterator();
+}
+
+// Generate a bitcast (no type change) to replace the uses of TLS Candidate.
+Instruction *TLSVariableHoistPass::genBitCastInst(Function &Fn,
+ GlobalVariable *GV) {
+ BasicBlock *PosBB = &Fn.getEntryBlock();
+ BasicBlock::iterator Iter = findInsertPos(Fn, GV, PosBB);
+ Type *Ty = GV->getType();
+ auto *CastInst = new BitCastInst(GV, Ty, "tls_bitcast");
+ PosBB->getInstList().insert(Iter, CastInst);
+ return CastInst;
+}
+
+bool TLSVariableHoistPass::tryReplaceTLSCandidate(Function &Fn,
+ GlobalVariable *GV) {
+
+ tlshoist::TLSCandidate &Cand = TLSCandMap[GV];
+
+ // If only used 1 time and not in loops, we no need to replace it.
+ if (oneUseOutsideLoop(Cand, LI))
+ return false;
+
+ // Generate a bitcast (no type change)
+ auto *CastInst = genBitCastInst(Fn, GV);
+
+ // to replace the uses of TLS Candidate
+ for (auto &User : Cand.Users)
+ User.Inst->setOperand(User.OpndIdx, CastInst);
+
+ return true;
+}
+
+bool TLSVariableHoistPass::tryReplaceTLSCandidates(Function &Fn) {
+ if (TLSCandMap.empty())
+ return false;
+
+ bool Replaced = false;
+ for (auto &GV2Cand : TLSCandMap) {
+ GlobalVariable *GV = GV2Cand.first;
+ Replaced |= tryReplaceTLSCandidate(Fn, GV);
+ }
+
+ return Replaced;
+}
+
+/// Optimize expensive TLS variables in the given function.
+bool TLSVariableHoistPass::runImpl(Function &Fn, DominatorTree &DT,
+ LoopInfo &LI) {
+ if (Fn.hasOptNone())
+ return false;
+
+ if (!TLSLoadHoist && !Fn.getAttributes().hasFnAttr("tls-load-hoist"))
+ return false;
+
+ this->LI = &LI;
+ this->DT = &DT;
+ assert(this->LI && this->DT && "Unexcepted requirement!");
+
+ // Collect all TLS variable candidates.
+ collectTLSCandidates(Fn);
+
+ bool MadeChange = tryReplaceTLSCandidates(Fn);
+
+ return MadeChange;
+}
+
+PreservedAnalyses TLSVariableHoistPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+
+ auto &LI = AM.getResult<LoopAnalysis>(F);
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+
+ if (!runImpl(F, DT, LI))
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
diff --git a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 3bcf92e28a21..27c04177e894 100644
--- a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -53,11 +53,8 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
@@ -76,14 +73,12 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/ValueHandle.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
#define DEBUG_TYPE "tailcallelim"
@@ -248,10 +243,10 @@ static bool markTails(Function &F, OptimizationRemarkEmitter *ORE) {
isa<PseudoProbeInst>(&I))
continue;
- // Special-case operand bundle "clang.arc.attachedcall".
+ // Special-case operand bundles "clang.arc.attachedcall" and "ptrauth".
bool IsNoTail =
CI->isNoTailCall() || CI->hasOperandBundlesOtherThan(
- LLVMContext::OB_clang_arc_attachedcall);
+ {LLVMContext::OB_clang_arc_attachedcall, LLVMContext::OB_ptrauth});
if (!IsNoTail && CI->doesNotAccessMemory()) {
// A call to a readnone function whose arguments are all things computed
@@ -531,7 +526,7 @@ void TailRecursionEliminator::createTailRecurseLoopHeader(CallInst *CI) {
}
// If the function doen't return void, create the RetPN and RetKnownPN PHI
- // nodes to track our return value. We initialize RetPN with undef and
+ // nodes to track our return value. We initialize RetPN with poison and
// RetKnownPN with false since we can't know our return value at function
// entry.
Type *RetType = F.getReturnType();
@@ -540,7 +535,7 @@ void TailRecursionEliminator::createTailRecurseLoopHeader(CallInst *CI) {
RetPN = PHINode::Create(RetType, 2, "ret.tr", InsertPos);
RetKnownPN = PHINode::Create(BoolType, 2, "ret.known.tr", InsertPos);
- RetPN->addIncoming(UndefValue::get(RetType), NewEntry);
+ RetPN->addIncoming(PoisonValue::get(RetType), NewEntry);
RetKnownPN->addIncoming(ConstantInt::getFalse(BoolType), NewEntry);
}
@@ -734,7 +729,7 @@ void TailRecursionEliminator::cleanupAndFinalize() {
// call.
for (PHINode *PN : ArgumentPHIs) {
// If the PHI Node is a dynamic constant, replace it with the value it is.
- if (Value *PNV = SimplifyInstruction(PN, F.getParent()->getDataLayout())) {
+ if (Value *PNV = simplifyInstruction(PN, F.getParent()->getDataLayout())) {
PN->replaceAllUsesWith(PNV);
PN->eraseFromParent();
}
diff --git a/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp b/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp
index 80a7d3a43ad6..8367e61c1a47 100644
--- a/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp
+++ b/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp
@@ -61,7 +61,7 @@ static void warnAboutLeftoverTransformations(Loop *L,
<< "loop not vectorized: the optimizer was unable to perform the "
"requested transformation; the transformation might be disabled "
"or specified as part of an unsupported transformation ordering");
- else if (InterleaveCount.getValueOr(0) != 1)
+ else if (InterleaveCount.value_or(0) != 1)
ORE->emit(
DiagnosticInfoOptimizationFailure(DEBUG_TYPE,
"FailedRequestedInterleaving",
diff --git a/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp b/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
index c734611836eb..24972db404be 100644
--- a/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
+++ b/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
@@ -50,9 +50,6 @@ static Value *callPrintfBegin(IRBuilder<> &Builder, Value *Version) {
auto Int64Ty = Builder.getInt64Ty();
auto M = Builder.GetInsertBlock()->getModule();
auto Fn = M->getOrInsertFunction("__ockl_printf_begin", Int64Ty, Int64Ty);
- if (!M->getModuleFlag("amdgpu_hostcall")) {
- M->addModuleFlag(llvm::Module::Override, "amdgpu_hostcall", 1);
- }
return Builder.CreateCall(Fn, Version);
}
diff --git a/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp b/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp
index cbc508bb863a..0318429a76a7 100644
--- a/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp
+++ b/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp
@@ -11,7 +11,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/ASanStackFrameLayout.h"
#include "llvm/ADT/SmallString.h"
-#include "llvm/IR/DebugInfo.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/ScopedPrinter.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/llvm/lib/Transforms/Utils/AddDiscriminators.cpp b/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
index e789194eb3ab..e6372fc5ab86 100644
--- a/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
+++ b/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
@@ -222,7 +222,7 @@ static bool addDiscriminators(Function &F) {
<< DIL->getColumn() << ":" << Discriminator << " "
<< I << "\n");
} else {
- I.setDebugLoc(NewDIL.getValue());
+ I.setDebugLoc(*NewDIL);
LLVM_DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":"
<< DIL->getColumn() << ":" << Discriminator << " " << I
<< "\n");
@@ -260,7 +260,7 @@ static bool addDiscriminators(Function &F) {
<< CurrentDIL->getLine() << ":" << CurrentDIL->getColumn()
<< ":" << Discriminator << " " << I << "\n");
} else {
- I.setDebugLoc(NewDIL.getValue());
+ I.setDebugLoc(*NewDIL);
Changed = true;
}
}
diff --git a/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp b/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp
index f910f7c3c31f..02ea17825c2f 100644
--- a/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp
+++ b/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp
@@ -18,6 +18,7 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/DebugCounter.h"
diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index 15c4a64eb794..e9983ff82176 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -21,7 +21,6 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
-#include "llvm/Analysis/PostDominators.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
@@ -33,7 +32,6 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/PseudoProbe.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
@@ -1164,7 +1162,11 @@ SplitBlockPredecessorsImpl(BasicBlock *BB, ArrayRef<BasicBlock *> Preds,
if (NewLatch != OldLatch) {
MDNode *MD = OldLatch->getTerminator()->getMetadata("llvm.loop");
NewLatch->getTerminator()->setMetadata("llvm.loop", MD);
- OldLatch->getTerminator()->setMetadata("llvm.loop", nullptr);
+ // It's still possible that OldLatch is the latch of another inner loop,
+ // in which case we do not remove the metadata.
+ Loop *IL = LI->getLoopFor(OldLatch);
+ if (IL && IL->getLoopLatch() != OldLatch)
+ OldLatch->getTerminator()->setMetadata("llvm.loop", nullptr);
}
}
diff --git a/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 1bb80be8ef99..0b36e8708a03 100644
--- a/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -27,9 +27,7 @@
#include "llvm/IR/CFG.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Type.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
@@ -317,18 +315,11 @@ llvm::SplitKnownCriticalEdge(Instruction *TI, unsigned SuccNum,
// predecessors of BB.
static BasicBlock *
findIBRPredecessor(BasicBlock *BB, SmallVectorImpl<BasicBlock *> &OtherPreds) {
- // If the block doesn't have any PHIs, we don't care about it, since there's
- // no point in splitting it.
- PHINode *PN = dyn_cast<PHINode>(BB->begin());
- if (!PN)
- return nullptr;
-
// Verify we have exactly one IBR predecessor.
// Conservatively bail out if one of the other predecessors is not a "regular"
// terminator (that is, not a switch or a br).
BasicBlock *IBB = nullptr;
- for (unsigned Pred = 0, E = PN->getNumIncomingValues(); Pred != E; ++Pred) {
- BasicBlock *PredBB = PN->getIncomingBlock(Pred);
+ for (BasicBlock *PredBB : predecessors(BB)) {
Instruction *PredTerm = PredBB->getTerminator();
switch (PredTerm->getOpcode()) {
case Instruction::IndirectBr:
@@ -349,6 +340,7 @@ findIBRPredecessor(BasicBlock *BB, SmallVectorImpl<BasicBlock *> &OtherPreds) {
}
bool llvm::SplitIndirectBrCriticalEdges(Function &F,
+ bool IgnoreBlocksWithoutPHI,
BranchProbabilityInfo *BPI,
BlockFrequencyInfo *BFI) {
// Check whether the function has any indirectbrs, and collect which blocks
@@ -370,6 +362,9 @@ bool llvm::SplitIndirectBrCriticalEdges(Function &F,
bool ShouldUpdateAnalysis = BPI && BFI;
bool Changed = false;
for (BasicBlock *Target : Targets) {
+ if (IgnoreBlocksWithoutPHI && Target->phis().empty())
+ continue;
+
SmallVector<BasicBlock *, 16> OtherPreds;
BasicBlock *IBRPred = findIBRPredecessor(Target, OtherPreds);
// If we did not found an indirectbr, or the indirectbr is the only
diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index 97f11ca71726..c4a58f36c171 100644
--- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -13,16 +13,17 @@
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
-#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Support/TypeSize.h"
using namespace llvm;
@@ -41,7 +42,6 @@ STATISTIC(NumInaccessibleMemOrArgMemOnly,
STATISTIC(NumNoUnwind, "Number of functions inferred as nounwind");
STATISTIC(NumNoCapture, "Number of arguments inferred as nocapture");
STATISTIC(NumWriteOnlyArg, "Number of arguments inferred as writeonly");
-STATISTIC(NumSExtArg, "Number of arguments inferred as signext");
STATISTIC(NumReadOnlyArg, "Number of arguments inferred as readonly");
STATISTIC(NumNoAlias, "Number of function returns inferred as noalias");
STATISTIC(NumNoUndef, "Number of function returns inferred as noundef returns");
@@ -149,14 +149,6 @@ static bool setOnlyWritesMemory(Function &F, unsigned ArgNo) {
return true;
}
-static bool setSignExtendedArg(Function &F, unsigned ArgNo) {
- if (F.hasParamAttribute(ArgNo, Attribute::SExt))
- return false;
- F.addParamAttr(ArgNo, Attribute::SExt);
- ++NumSExtArg;
- return true;
-}
-
static bool setRetNoUndef(Function &F) {
if (!F.getReturnType()->isVoidTy() &&
!F.hasRetAttribute(Attribute::NoUndef)) {
@@ -224,15 +216,54 @@ static bool setWillReturn(Function &F) {
return true;
}
-bool llvm::inferLibFuncAttributes(Module *M, StringRef Name,
- const TargetLibraryInfo &TLI) {
+static bool setAlignedAllocParam(Function &F, unsigned ArgNo) {
+ if (F.hasParamAttribute(ArgNo, Attribute::AllocAlign))
+ return false;
+ F.addParamAttr(ArgNo, Attribute::AllocAlign);
+ return true;
+}
+
+static bool setAllocatedPointerParam(Function &F, unsigned ArgNo) {
+ if (F.hasParamAttribute(ArgNo, Attribute::AllocatedPointer))
+ return false;
+ F.addParamAttr(ArgNo, Attribute::AllocatedPointer);
+ return true;
+}
+
+static bool setAllocSize(Function &F, unsigned ElemSizeArg,
+ Optional<unsigned> NumElemsArg) {
+ if (F.hasFnAttribute(Attribute::AllocSize))
+ return false;
+ F.addFnAttr(Attribute::getWithAllocSizeArgs(F.getContext(), ElemSizeArg,
+ NumElemsArg));
+ return true;
+}
+
+static bool setAllocFamily(Function &F, StringRef Family) {
+ if (F.hasFnAttribute("alloc-family"))
+ return false;
+ F.addFnAttr("alloc-family", Family);
+ return true;
+}
+
+static bool setAllocKind(Function &F, AllocFnKind K) {
+ if (F.hasFnAttribute(Attribute::AllocKind))
+ return false;
+ F.addFnAttr(
+ Attribute::get(F.getContext(), Attribute::AllocKind, uint64_t(K)));
+ return true;
+}
+
+bool llvm::inferNonMandatoryLibFuncAttrs(Module *M, StringRef Name,
+ const TargetLibraryInfo &TLI) {
Function *F = M->getFunction(Name);
if (!F)
return false;
- return inferLibFuncAttributes(*F, TLI);
+ return inferNonMandatoryLibFuncAttrs(*F, TLI);
}
-bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
+bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
+ const TargetLibraryInfo &TLI) {
LibFunc TheLibFunc;
if (!(TLI.getLibFunc(F, TheLibFunc) && TLI.has(TheLibFunc)))
return false;
@@ -360,6 +391,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setArgNoUndef(F, 1);
LLVM_FALLTHROUGH;
case LibFunc_strdup:
+ Changed |= setAllocFamily(F, "malloc");
Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F);
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
@@ -416,9 +448,17 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setOnlyReadsMemory(F, 0);
return Changed;
case LibFunc_aligned_alloc:
+ Changed |= setAlignedAllocParam(F, 0);
+ Changed |= setAllocSize(F, 1, None);
+ Changed |= setAllocKind(F, AllocFnKind::Alloc | AllocFnKind::Uninitialized | AllocFnKind::Aligned);
+ LLVM_FALLTHROUGH;
case LibFunc_valloc:
case LibFunc_malloc:
case LibFunc_vec_malloc:
+ Changed |= setAllocFamily(F, TheLibFunc == LibFunc_vec_malloc ? "vec_malloc"
+ : "malloc");
+ Changed |= setAllocKind(F, AllocFnKind::Alloc | AllocFnKind::Uninitialized);
+ Changed |= setAllocSize(F, 0, None);
Changed |= setOnlyAccessesInaccessibleMemory(F);
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
@@ -481,6 +521,11 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_memalign:
+ Changed |= setAllocFamily(F, "malloc");
+ Changed |= setAllocKind(F, AllocFnKind::Alloc | AllocFnKind::Aligned |
+ AllocFnKind::Uninitialized);
+ Changed |= setAllocSize(F, 1, None);
+ Changed |= setAlignedAllocParam(F, 0);
Changed |= setOnlyAccessesInaccessibleMemory(F);
Changed |= setRetNoUndef(F);
Changed |= setDoesNotThrow(F);
@@ -500,8 +545,13 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setDoesNotCapture(F, 0);
return Changed;
case LibFunc_realloc:
- case LibFunc_vec_realloc:
case LibFunc_reallocf:
+ case LibFunc_vec_realloc:
+ Changed |= setAllocFamily(
+ F, TheLibFunc == LibFunc_vec_realloc ? "vec_malloc" : "malloc");
+ Changed |= setAllocKind(F, AllocFnKind::Realloc);
+ Changed |= setAllocatedPointerParam(F, 0);
+ Changed |= setAllocSize(F, 1, None);
Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F);
Changed |= setRetNoUndef(F);
Changed |= setDoesNotThrow(F);
@@ -575,6 +625,10 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
return Changed;
case LibFunc_calloc:
case LibFunc_vec_calloc:
+ Changed |= setAllocFamily(F, TheLibFunc == LibFunc_vec_calloc ? "vec_malloc"
+ : "malloc");
+ Changed |= setAllocKind(F, AllocFnKind::Alloc | AllocFnKind::Zeroed);
+ Changed |= setAllocSize(F, 0, 1);
Changed |= setOnlyAccessesInaccessibleMemory(F);
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
@@ -633,6 +687,10 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
return Changed;
case LibFunc_free:
case LibFunc_vec_free:
+ Changed |= setAllocFamily(F, TheLibFunc == LibFunc_vec_free ? "vec_malloc"
+ : "malloc");
+ Changed |= setAllocKind(F, AllocFnKind::Free);
+ Changed |= setAllocatedPointerParam(F, 0);
Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F);
Changed |= setArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
@@ -1041,7 +1099,6 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
case LibFunc_ldexp:
case LibFunc_ldexpf:
case LibFunc_ldexpl:
- Changed |= setSignExtendedArg(F, 1);
Changed |= setWillReturn(F);
return Changed;
case LibFunc_abs:
@@ -1178,34 +1235,179 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
}
}
-bool llvm::hasFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
+static void setArgExtAttr(Function &F, unsigned ArgNo,
+ const TargetLibraryInfo &TLI, bool Signed = true) {
+ Attribute::AttrKind ExtAttr = TLI.getExtAttrForI32Param(Signed);
+ if (ExtAttr != Attribute::None && !F.hasParamAttribute(ArgNo, ExtAttr))
+ F.addParamAttr(ArgNo, ExtAttr);
+}
+
+// Modeled after X86TargetLowering::markLibCallAttributes.
+static void markRegisterParameterAttributes(Function *F) {
+ if (!F->arg_size() || F->isVarArg())
+ return;
+
+ const CallingConv::ID CC = F->getCallingConv();
+ if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
+ return;
+
+ const Module *M = F->getParent();
+ unsigned N = M->getNumberRegisterParameters();
+ if (!N)
+ return;
+
+ const DataLayout &DL = M->getDataLayout();
+
+ for (Argument &A : F->args()) {
+ Type *T = A.getType();
+ if (!T->isIntOrPtrTy())
+ continue;
+
+ const TypeSize &TS = DL.getTypeAllocSize(T);
+ if (TS > 8)
+ continue;
+
+ assert(TS <= 4 && "Need to account for parameters larger than word size");
+ const unsigned NumRegs = TS > 4 ? 2 : 1;
+ if (N < NumRegs)
+ return;
+
+ N -= NumRegs;
+ F->addParamAttr(A.getArgNo(), Attribute::InReg);
+ }
+}
+
+FunctionCallee llvm::getOrInsertLibFunc(Module *M, const TargetLibraryInfo &TLI,
+ LibFunc TheLibFunc, FunctionType *T,
+ AttributeList AttributeList) {
+ assert(TLI.has(TheLibFunc) &&
+ "Creating call to non-existing library function.");
+ StringRef Name = TLI.getName(TheLibFunc);
+ FunctionCallee C = M->getOrInsertFunction(Name, T, AttributeList);
+
+ // Make sure any mandatory argument attributes are added.
+
+ // Any outgoing i32 argument should be handled with setArgExtAttr() which
+ // will add an extension attribute if the target ABI requires it. Adding
+ // argument extensions is typically done by the front end but when an
+ // optimizer is building a library call on its own it has to take care of
+ // this. Each such generated function must be handled here with sign or
+ // zero extensions as needed. F is retreived with cast<> because we demand
+ // of the caller to have called isLibFuncEmittable() first.
+ Function *F = cast<Function>(C.getCallee());
+ assert(F->getFunctionType() == T && "Function type does not match.");
+ switch (TheLibFunc) {
+ case LibFunc_fputc:
+ case LibFunc_putchar:
+ setArgExtAttr(*F, 0, TLI);
+ break;
+ case LibFunc_ldexp:
+ case LibFunc_ldexpf:
+ case LibFunc_ldexpl:
+ case LibFunc_memchr:
+ case LibFunc_memrchr:
+ case LibFunc_strchr:
+ setArgExtAttr(*F, 1, TLI);
+ break;
+ case LibFunc_memccpy:
+ setArgExtAttr(*F, 2, TLI);
+ break;
+
+ // These are functions that are known to not need any argument extension
+ // on any target: A size_t argument (which may be an i32 on some targets)
+ // should not trigger the assert below.
+ case LibFunc_bcmp:
+ case LibFunc_calloc:
+ case LibFunc_fwrite:
+ case LibFunc_malloc:
+ case LibFunc_memcmp:
+ case LibFunc_memcpy_chk:
+ case LibFunc_mempcpy:
+ case LibFunc_memset_pattern16:
+ case LibFunc_snprintf:
+ case LibFunc_stpncpy:
+ case LibFunc_strlcat:
+ case LibFunc_strlcpy:
+ case LibFunc_strncat:
+ case LibFunc_strncmp:
+ case LibFunc_strncpy:
+ case LibFunc_vsnprintf:
+ break;
+
+ default:
+#ifndef NDEBUG
+ for (unsigned i = 0; i < T->getNumParams(); i++)
+ assert(!isa<IntegerType>(T->getParamType(i)) &&
+ "Unhandled integer argument.");
+#endif
+ break;
+ }
+
+ markRegisterParameterAttributes(F);
+
+ return C;
+}
+
+FunctionCallee llvm::getOrInsertLibFunc(Module *M, const TargetLibraryInfo &TLI,
+ LibFunc TheLibFunc, FunctionType *T) {
+ return getOrInsertLibFunc(M, TLI, TheLibFunc, T, AttributeList());
+}
+
+bool llvm::isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI,
+ LibFunc TheLibFunc) {
+ StringRef FuncName = TLI->getName(TheLibFunc);
+ if (!TLI->has(TheLibFunc))
+ return false;
+
+ // Check if the Module already has a GlobalValue with the same name, in
+ // which case it must be a Function with the expected type.
+ if (GlobalValue *GV = M->getNamedValue(FuncName)) {
+ if (auto *F = dyn_cast<Function>(GV))
+ return TLI->isValidProtoForLibFunc(*F->getFunctionType(), TheLibFunc, *M);
+ return false;
+ }
+
+ return true;
+}
+
+bool llvm::isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI,
+ StringRef Name) {
+ LibFunc TheLibFunc;
+ return TLI->getLibFunc(Name, TheLibFunc) &&
+ isLibFuncEmittable(M, TLI, TheLibFunc);
+}
+
+bool llvm::hasFloatFn(const Module *M, const TargetLibraryInfo *TLI, Type *Ty,
LibFunc DoubleFn, LibFunc FloatFn, LibFunc LongDoubleFn) {
switch (Ty->getTypeID()) {
case Type::HalfTyID:
return false;
case Type::FloatTyID:
- return TLI->has(FloatFn);
+ return isLibFuncEmittable(M, TLI, FloatFn);
case Type::DoubleTyID:
- return TLI->has(DoubleFn);
+ return isLibFuncEmittable(M, TLI, DoubleFn);
default:
- return TLI->has(LongDoubleFn);
+ return isLibFuncEmittable(M, TLI, LongDoubleFn);
}
}
-StringRef llvm::getFloatFnName(const TargetLibraryInfo *TLI, Type *Ty,
- LibFunc DoubleFn, LibFunc FloatFn,
- LibFunc LongDoubleFn) {
- assert(hasFloatFn(TLI, Ty, DoubleFn, FloatFn, LongDoubleFn) &&
+StringRef llvm::getFloatFn(const Module *M, const TargetLibraryInfo *TLI,
+ Type *Ty, LibFunc DoubleFn, LibFunc FloatFn,
+ LibFunc LongDoubleFn, LibFunc &TheLibFunc) {
+ assert(hasFloatFn(M, TLI, Ty, DoubleFn, FloatFn, LongDoubleFn) &&
"Cannot get name for unavailable function!");
switch (Ty->getTypeID()) {
case Type::HalfTyID:
llvm_unreachable("No name for HalfTy!");
case Type::FloatTyID:
+ TheLibFunc = FloatFn;
return TLI->getName(FloatFn);
case Type::DoubleTyID:
+ TheLibFunc = DoubleFn;
return TLI->getName(DoubleFn);
default:
+ TheLibFunc = LongDoubleFn;
return TLI->getName(LongDoubleFn);
}
}
@@ -1222,14 +1424,14 @@ static Value *emitLibCall(LibFunc TheLibFunc, Type *ReturnType,
ArrayRef<Value *> Operands, IRBuilderBase &B,
const TargetLibraryInfo *TLI,
bool IsVaArgs = false) {
- if (!TLI->has(TheLibFunc))
+ Module *M = B.GetInsertBlock()->getModule();
+ if (!isLibFuncEmittable(M, TLI, TheLibFunc))
return nullptr;
- Module *M = B.GetInsertBlock()->getModule();
StringRef FuncName = TLI->getName(TheLibFunc);
FunctionType *FuncType = FunctionType::get(ReturnType, ParamTypes, IsVaArgs);
- FunctionCallee Callee = M->getOrInsertFunction(FuncName, FuncType);
- inferLibFuncAttributes(M, FuncName, *TLI);
+ FunctionCallee Callee = getOrInsertLibFunc(M, *TLI, TheLibFunc, FuncType);
+ inferNonMandatoryLibFuncAttrs(M, FuncName, *TLI);
CallInst *CI = B.CreateCall(Callee, Operands, FuncName);
if (const Function *F =
dyn_cast<Function>(Callee.getCallee()->stripPointerCasts()))
@@ -1298,16 +1500,16 @@ Value *llvm::emitStpNCpy(Value *Dst, Value *Src, Value *Len, IRBuilderBase &B,
Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
IRBuilderBase &B, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc_memcpy_chk))
+ Module *M = B.GetInsertBlock()->getModule();
+ if (!isLibFuncEmittable(M, TLI, LibFunc_memcpy_chk))
return nullptr;
- Module *M = B.GetInsertBlock()->getModule();
AttributeList AS;
AS = AttributeList::get(M->getContext(), AttributeList::FunctionIndex,
Attribute::NoUnwind);
LLVMContext &Context = B.GetInsertBlock()->getContext();
- FunctionCallee MemCpy = M->getOrInsertFunction(
- "__memcpy_chk", AttributeList::get(M->getContext(), AS), B.getInt8PtrTy(),
+ FunctionCallee MemCpy = getOrInsertLibFunc(M, *TLI, LibFunc_memcpy_chk,
+ AttributeList::get(M->getContext(), AS), B.getInt8PtrTy(),
B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context),
DL.getIntPtrType(Context));
Dst = castToCStr(Dst, B);
@@ -1337,6 +1539,15 @@ Value *llvm::emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilderBase &B,
{castToCStr(Ptr, B), Val, Len}, B, TLI);
}
+Value *llvm::emitMemRChr(Value *Ptr, Value *Val, Value *Len, IRBuilderBase &B,
+ const DataLayout &DL, const TargetLibraryInfo *TLI) {
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ return emitLibCall(
+ LibFunc_memrchr, B.getInt8PtrTy(),
+ {B.getInt8PtrTy(), B.getInt32Ty(), DL.getIntPtrType(Context)},
+ {castToCStr(Ptr, B), Val, Len}, B, TLI);
+}
+
Value *llvm::emitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilderBase &B,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
LLVMContext &Context = B.GetInsertBlock()->getContext();
@@ -1441,14 +1652,15 @@ static void appendTypeSuffix(Value *Op, StringRef &Name,
}
}
-static Value *emitUnaryFloatFnCallHelper(Value *Op, StringRef Name,
- IRBuilderBase &B,
- const AttributeList &Attrs) {
+static Value *emitUnaryFloatFnCallHelper(Value *Op, LibFunc TheLibFunc,
+ StringRef Name, IRBuilderBase &B,
+ const AttributeList &Attrs,
+ const TargetLibraryInfo *TLI) {
assert((Name != "") && "Must specify Name to emitUnaryFloatFnCall");
Module *M = B.GetInsertBlock()->getModule();
- FunctionCallee Callee =
- M->getOrInsertFunction(Name, Op->getType(), Op->getType());
+ FunctionCallee Callee = getOrInsertLibFunc(M, *TLI, TheLibFunc, Op->getType(),
+ Op->getType());
CallInst *CI = B.CreateCall(Callee, Op, Name);
// The incoming attribute set may have come from a speculatable intrinsic, but
@@ -1463,12 +1675,16 @@ static Value *emitUnaryFloatFnCallHelper(Value *Op, StringRef Name,
return CI;
}
-Value *llvm::emitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilderBase &B,
+Value *llvm::emitUnaryFloatFnCall(Value *Op, const TargetLibraryInfo *TLI,
+ StringRef Name, IRBuilderBase &B,
const AttributeList &Attrs) {
SmallString<20> NameBuffer;
appendTypeSuffix(Op, Name, NameBuffer);
- return emitUnaryFloatFnCallHelper(Op, Name, B, Attrs);
+ LibFunc TheLibFunc;
+ TLI->getLibFunc(Name, TheLibFunc);
+
+ return emitUnaryFloatFnCallHelper(Op, TheLibFunc, Name, B, Attrs, TLI);
}
Value *llvm::emitUnaryFloatFnCall(Value *Op, const TargetLibraryInfo *TLI,
@@ -1476,23 +1692,25 @@ Value *llvm::emitUnaryFloatFnCall(Value *Op, const TargetLibraryInfo *TLI,
LibFunc LongDoubleFn, IRBuilderBase &B,
const AttributeList &Attrs) {
// Get the name of the function according to TLI.
- StringRef Name = getFloatFnName(TLI, Op->getType(),
- DoubleFn, FloatFn, LongDoubleFn);
+ Module *M = B.GetInsertBlock()->getModule();
+ LibFunc TheLibFunc;
+ StringRef Name = getFloatFn(M, TLI, Op->getType(), DoubleFn, FloatFn,
+ LongDoubleFn, TheLibFunc);
- return emitUnaryFloatFnCallHelper(Op, Name, B, Attrs);
+ return emitUnaryFloatFnCallHelper(Op, TheLibFunc, Name, B, Attrs, TLI);
}
static Value *emitBinaryFloatFnCallHelper(Value *Op1, Value *Op2,
+ LibFunc TheLibFunc,
StringRef Name, IRBuilderBase &B,
const AttributeList &Attrs,
- const TargetLibraryInfo *TLI = nullptr) {
+ const TargetLibraryInfo *TLI) {
assert((Name != "") && "Must specify Name to emitBinaryFloatFnCall");
Module *M = B.GetInsertBlock()->getModule();
- FunctionCallee Callee = M->getOrInsertFunction(Name, Op1->getType(),
- Op1->getType(), Op2->getType());
- if (TLI != nullptr)
- inferLibFuncAttributes(M, Name, *TLI);
+ FunctionCallee Callee = getOrInsertLibFunc(M, *TLI, TheLibFunc, Op1->getType(),
+ Op1->getType(), Op2->getType());
+ inferNonMandatoryLibFuncAttrs(M, Name, *TLI);
CallInst *CI = B.CreateCall(Callee, { Op1, Op2 }, Name);
// The incoming attribute set may have come from a speculatable intrinsic, but
@@ -1507,15 +1725,19 @@ static Value *emitBinaryFloatFnCallHelper(Value *Op1, Value *Op2,
return CI;
}
-Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name,
- IRBuilderBase &B,
+Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2,
+ const TargetLibraryInfo *TLI,
+ StringRef Name, IRBuilderBase &B,
const AttributeList &Attrs) {
assert((Name != "") && "Must specify Name to emitBinaryFloatFnCall");
SmallString<20> NameBuffer;
appendTypeSuffix(Op1, Name, NameBuffer);
- return emitBinaryFloatFnCallHelper(Op1, Op2, Name, B, Attrs);
+ LibFunc TheLibFunc;
+ TLI->getLibFunc(Name, TheLibFunc);
+
+ return emitBinaryFloatFnCallHelper(Op1, Op2, TheLibFunc, Name, B, Attrs, TLI);
}
Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2,
@@ -1524,22 +1746,24 @@ Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2,
LibFunc LongDoubleFn, IRBuilderBase &B,
const AttributeList &Attrs) {
// Get the name of the function according to TLI.
- StringRef Name = getFloatFnName(TLI, Op1->getType(),
- DoubleFn, FloatFn, LongDoubleFn);
+ Module *M = B.GetInsertBlock()->getModule();
+ LibFunc TheLibFunc;
+ StringRef Name = getFloatFn(M, TLI, Op1->getType(), DoubleFn, FloatFn,
+ LongDoubleFn, TheLibFunc);
- return emitBinaryFloatFnCallHelper(Op1, Op2, Name, B, Attrs, TLI);
+ return emitBinaryFloatFnCallHelper(Op1, Op2, TheLibFunc, Name, B, Attrs, TLI);
}
Value *llvm::emitPutChar(Value *Char, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc_putchar))
+ Module *M = B.GetInsertBlock()->getModule();
+ if (!isLibFuncEmittable(M, TLI, LibFunc_putchar))
return nullptr;
- Module *M = B.GetInsertBlock()->getModule();
StringRef PutCharName = TLI->getName(LibFunc_putchar);
- FunctionCallee PutChar =
- M->getOrInsertFunction(PutCharName, B.getInt32Ty(), B.getInt32Ty());
- inferLibFuncAttributes(M, PutCharName, *TLI);
+ FunctionCallee PutChar = getOrInsertLibFunc(M, *TLI, LibFunc_putchar,
+ B.getInt32Ty(), B.getInt32Ty());
+ inferNonMandatoryLibFuncAttrs(M, PutCharName, *TLI);
CallInst *CI = B.CreateCall(PutChar,
B.CreateIntCast(Char,
B.getInt32Ty(),
@@ -1555,14 +1779,14 @@ Value *llvm::emitPutChar(Value *Char, IRBuilderBase &B,
Value *llvm::emitPutS(Value *Str, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc_puts))
+ Module *M = B.GetInsertBlock()->getModule();
+ if (!isLibFuncEmittable(M, TLI, LibFunc_puts))
return nullptr;
- Module *M = B.GetInsertBlock()->getModule();
StringRef PutsName = TLI->getName(LibFunc_puts);
- FunctionCallee PutS =
- M->getOrInsertFunction(PutsName, B.getInt32Ty(), B.getInt8PtrTy());
- inferLibFuncAttributes(M, PutsName, *TLI);
+ FunctionCallee PutS = getOrInsertLibFunc(M, *TLI, LibFunc_puts, B.getInt32Ty(),
+ B.getInt8PtrTy());
+ inferNonMandatoryLibFuncAttrs(M, PutsName, *TLI);
CallInst *CI = B.CreateCall(PutS, castToCStr(Str, B), PutsName);
if (const Function *F =
dyn_cast<Function>(PutS.getCallee()->stripPointerCasts()))
@@ -1572,15 +1796,15 @@ Value *llvm::emitPutS(Value *Str, IRBuilderBase &B,
Value *llvm::emitFPutC(Value *Char, Value *File, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc_fputc))
+ Module *M = B.GetInsertBlock()->getModule();
+ if (!isLibFuncEmittable(M, TLI, LibFunc_fputc))
return nullptr;
- Module *M = B.GetInsertBlock()->getModule();
StringRef FPutcName = TLI->getName(LibFunc_fputc);
- FunctionCallee F = M->getOrInsertFunction(FPutcName, B.getInt32Ty(),
- B.getInt32Ty(), File->getType());
+ FunctionCallee F = getOrInsertLibFunc(M, *TLI, LibFunc_fputc, B.getInt32Ty(),
+ B.getInt32Ty(), File->getType());
if (File->getType()->isPointerTy())
- inferLibFuncAttributes(M, FPutcName, *TLI);
+ inferNonMandatoryLibFuncAttrs(M, FPutcName, *TLI);
Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/true,
"chari");
CallInst *CI = B.CreateCall(F, {Char, File}, FPutcName);
@@ -1593,15 +1817,15 @@ Value *llvm::emitFPutC(Value *Char, Value *File, IRBuilderBase &B,
Value *llvm::emitFPutS(Value *Str, Value *File, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc_fputs))
+ Module *M = B.GetInsertBlock()->getModule();
+ if (!isLibFuncEmittable(M, TLI, LibFunc_fputs))
return nullptr;
- Module *M = B.GetInsertBlock()->getModule();
StringRef FPutsName = TLI->getName(LibFunc_fputs);
- FunctionCallee F = M->getOrInsertFunction(FPutsName, B.getInt32Ty(),
- B.getInt8PtrTy(), File->getType());
+ FunctionCallee F = getOrInsertLibFunc(M, *TLI, LibFunc_fputs, B.getInt32Ty(),
+ B.getInt8PtrTy(), File->getType());
if (File->getType()->isPointerTy())
- inferLibFuncAttributes(M, FPutsName, *TLI);
+ inferNonMandatoryLibFuncAttrs(M, FPutsName, *TLI);
CallInst *CI = B.CreateCall(F, {castToCStr(Str, B), File}, FPutsName);
if (const Function *Fn =
@@ -1612,18 +1836,18 @@ Value *llvm::emitFPutS(Value *Str, Value *File, IRBuilderBase &B,
Value *llvm::emitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilderBase &B,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc_fwrite))
+ Module *M = B.GetInsertBlock()->getModule();
+ if (!isLibFuncEmittable(M, TLI, LibFunc_fwrite))
return nullptr;
- Module *M = B.GetInsertBlock()->getModule();
LLVMContext &Context = B.GetInsertBlock()->getContext();
StringRef FWriteName = TLI->getName(LibFunc_fwrite);
- FunctionCallee F = M->getOrInsertFunction(
- FWriteName, DL.getIntPtrType(Context), B.getInt8PtrTy(),
- DL.getIntPtrType(Context), DL.getIntPtrType(Context), File->getType());
+ FunctionCallee F = getOrInsertLibFunc(M, *TLI, LibFunc_fwrite,
+ DL.getIntPtrType(Context), B.getInt8PtrTy(), DL.getIntPtrType(Context),
+ DL.getIntPtrType(Context), File->getType());
if (File->getType()->isPointerTy())
- inferLibFuncAttributes(M, FWriteName, *TLI);
+ inferNonMandatoryLibFuncAttrs(M, FWriteName, *TLI);
CallInst *CI =
B.CreateCall(F, {castToCStr(Ptr, B), Size,
ConstantInt::get(DL.getIntPtrType(Context), 1), File});
@@ -1636,15 +1860,15 @@ Value *llvm::emitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilderBase &B,
Value *llvm::emitMalloc(Value *Num, IRBuilderBase &B, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc_malloc))
+ Module *M = B.GetInsertBlock()->getModule();
+ if (!isLibFuncEmittable(M, TLI, LibFunc_malloc))
return nullptr;
- Module *M = B.GetInsertBlock()->getModule();
StringRef MallocName = TLI->getName(LibFunc_malloc);
LLVMContext &Context = B.GetInsertBlock()->getContext();
- FunctionCallee Malloc = M->getOrInsertFunction(MallocName, B.getInt8PtrTy(),
- DL.getIntPtrType(Context));
- inferLibFuncAttributes(M, MallocName, *TLI);
+ FunctionCallee Malloc = getOrInsertLibFunc(M, *TLI, LibFunc_malloc,
+ B.getInt8PtrTy(), DL.getIntPtrType(Context));
+ inferNonMandatoryLibFuncAttrs(M, MallocName, *TLI);
CallInst *CI = B.CreateCall(Malloc, Num, MallocName);
if (const Function *F =
@@ -1656,16 +1880,16 @@ Value *llvm::emitMalloc(Value *Num, IRBuilderBase &B, const DataLayout &DL,
Value *llvm::emitCalloc(Value *Num, Value *Size, IRBuilderBase &B,
const TargetLibraryInfo &TLI) {
- if (!TLI.has(LibFunc_calloc))
+ Module *M = B.GetInsertBlock()->getModule();
+ if (!isLibFuncEmittable(M, &TLI, LibFunc_calloc))
return nullptr;
- Module *M = B.GetInsertBlock()->getModule();
StringRef CallocName = TLI.getName(LibFunc_calloc);
const DataLayout &DL = M->getDataLayout();
IntegerType *PtrType = DL.getIntPtrType((B.GetInsertBlock()->getContext()));
- FunctionCallee Calloc =
- M->getOrInsertFunction(CallocName, B.getInt8PtrTy(), PtrType, PtrType);
- inferLibFuncAttributes(M, CallocName, TLI);
+ FunctionCallee Calloc = getOrInsertLibFunc(M, TLI, LibFunc_calloc,
+ B.getInt8PtrTy(), PtrType, PtrType);
+ inferNonMandatoryLibFuncAttrs(M, CallocName, TLI);
CallInst *CI = B.CreateCall(Calloc, {Num, Size}, CallocName);
if (const auto *F =
diff --git a/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp b/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp
index ac3839f2a4ab..1840f26add2d 100644
--- a/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp
+++ b/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp
@@ -14,6 +14,9 @@
#include "llvm/Transforms/Utils/CallGraphUpdater.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/IR/Constants.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
using namespace llvm;
diff --git a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
index 56b6e4bc46a5..e530afc277db 100644
--- a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
+++ b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
@@ -279,8 +279,8 @@ static void createRetBitCast(CallBase &CB, Type *RetTy, CastInst **RetBitCast) {
/// ; The original call instruction stays in its original block.
/// %t0 = musttail call i32 %ptr()
/// ret %t0
-static CallBase &versionCallSite(CallBase &CB, Value *Callee,
- MDNode *BranchWeights) {
+CallBase &llvm::versionCallSite(CallBase &CB, Value *Callee,
+ MDNode *BranchWeights) {
IRBuilder<> Builder(&CB);
CallBase *OrigInst = &CB;
diff --git a/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp b/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp
index 6b01c0c71d00..f229d4bf14e9 100644
--- a/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp
+++ b/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp
@@ -30,8 +30,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
-#include "llvm/IR/Operator.h"
-#include "llvm/IR/ValueHandle.h"
+#include "llvm/IR/Constants.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
diff --git a/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp b/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp
index 049c7d113521..a1ee3df907ec 100644
--- a/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp
+++ b/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp
@@ -29,7 +29,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/CanonicalizeFreezeInLoops.h"
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/IVDescriptors.h"
diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 86413df664a0..8f053cd56e0e 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -14,7 +14,6 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -23,7 +22,6 @@
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
@@ -324,6 +322,9 @@ struct PruningFunctionCloner {
bool ModuleLevelChanges;
const char *NameSuffix;
ClonedCodeInfo *CodeInfo;
+ bool HostFuncIsStrictFP;
+
+ Instruction *cloneInstruction(BasicBlock::const_iterator II);
public:
PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
@@ -331,7 +332,10 @@ public:
const char *nameSuffix, ClonedCodeInfo *codeInfo)
: NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap),
ModuleLevelChanges(moduleLevelChanges), NameSuffix(nameSuffix),
- CodeInfo(codeInfo) {}
+ CodeInfo(codeInfo) {
+ HostFuncIsStrictFP =
+ newFunc->getAttributes().hasFnAttr(Attribute::StrictFP);
+ }
/// The specified block is found to be reachable, clone it and
/// anything that it can reach.
@@ -340,6 +344,89 @@ public:
};
} // namespace
+static bool hasRoundingModeOperand(Intrinsic::ID CIID) {
+ switch (CIID) {
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
+ case Intrinsic::INTRINSIC: \
+ return ROUND_MODE == 1;
+#define FUNCTION INSTRUCTION
+#include "llvm/IR/ConstrainedOps.def"
+ default:
+ llvm_unreachable("Unexpected constrained intrinsic id");
+ }
+}
+
+Instruction *
+PruningFunctionCloner::cloneInstruction(BasicBlock::const_iterator II) {
+ const Instruction &OldInst = *II;
+ Instruction *NewInst = nullptr;
+ if (HostFuncIsStrictFP) {
+ Intrinsic::ID CIID = getConstrainedIntrinsicID(OldInst);
+ if (CIID != Intrinsic::not_intrinsic) {
+ // Instead of cloning the instruction, a call to constrained intrinsic
+ // should be created.
+ // Assume the first arguments of constrained intrinsics are the same as
+ // the operands of original instruction.
+
+ // Determine overloaded types of the intrinsic.
+ SmallVector<Type *, 2> TParams;
+ SmallVector<Intrinsic::IITDescriptor, 8> Descriptor;
+ getIntrinsicInfoTableEntries(CIID, Descriptor);
+ for (unsigned I = 0, E = Descriptor.size(); I != E; ++I) {
+ Intrinsic::IITDescriptor Operand = Descriptor[I];
+ switch (Operand.Kind) {
+ case Intrinsic::IITDescriptor::Argument:
+ if (Operand.getArgumentKind() !=
+ Intrinsic::IITDescriptor::AK_MatchType) {
+ if (I == 0)
+ TParams.push_back(OldInst.getType());
+ else
+ TParams.push_back(OldInst.getOperand(I - 1)->getType());
+ }
+ break;
+ case Intrinsic::IITDescriptor::SameVecWidthArgument:
+ ++I;
+ break;
+ default:
+ break;
+ }
+ }
+
+ // Create intrinsic call.
+ LLVMContext &Ctx = NewFunc->getContext();
+ Function *IFn =
+ Intrinsic::getDeclaration(NewFunc->getParent(), CIID, TParams);
+ SmallVector<Value *, 4> Args;
+ unsigned NumOperands = OldInst.getNumOperands();
+ if (isa<CallInst>(OldInst))
+ --NumOperands;
+ for (unsigned I = 0; I < NumOperands; ++I) {
+ Value *Op = OldInst.getOperand(I);
+ Args.push_back(Op);
+ }
+ if (const auto *CmpI = dyn_cast<FCmpInst>(&OldInst)) {
+ FCmpInst::Predicate Pred = CmpI->getPredicate();
+ StringRef PredName = FCmpInst::getPredicateName(Pred);
+ Args.push_back(MetadataAsValue::get(Ctx, MDString::get(Ctx, PredName)));
+ }
+
+ // The last arguments of a constrained intrinsic are metadata that
+ // represent rounding mode (absents in some intrinsics) and exception
+ // behavior. The inlined function uses default settings.
+ if (hasRoundingModeOperand(CIID))
+ Args.push_back(
+ MetadataAsValue::get(Ctx, MDString::get(Ctx, "round.tonearest")));
+ Args.push_back(
+ MetadataAsValue::get(Ctx, MDString::get(Ctx, "fpexcept.ignore")));
+
+ NewInst = CallInst::Create(IFn, Args, OldInst.getName() + ".strict");
+ }
+ }
+ if (!NewInst)
+ NewInst = II->clone();
+ return NewInst;
+}
+
/// The specified block is found to be reachable, clone it and
/// anything that it can reach.
void PruningFunctionCloner::CloneBlock(
@@ -379,7 +466,14 @@ void PruningFunctionCloner::CloneBlock(
for (BasicBlock::const_iterator II = StartingInst, IE = --BB->end(); II != IE;
++II) {
- Instruction *NewInst = II->clone();
+ Instruction *NewInst = cloneInstruction(II);
+
+ if (HostFuncIsStrictFP) {
+ // All function calls in the inlined function must get 'strictfp'
+ // attribute to prevent undesirable optimizations.
+ if (auto *Call = dyn_cast<CallInst>(NewInst))
+ Call->addFnAttr(Attribute::StrictFP);
+ }
// Eagerly remap operands to the newly cloned instruction, except for PHI
// nodes for which we defer processing until we update the CFG.
@@ -391,7 +485,7 @@ void PruningFunctionCloner::CloneBlock(
// a mapping to that value rather than inserting a new instruction into
// the basic block.
if (Value *V =
- SimplifyInstruction(NewInst, BB->getModule()->getDataLayout())) {
+ simplifyInstruction(NewInst, BB->getModule()->getDataLayout())) {
// On the off-chance that this simplifies to an instruction in the old
// function, map it back into the new function.
if (NewFunc != OldFunc)
@@ -674,7 +768,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
continue;
// See if this instruction simplifies.
- Value *SimpleV = SimplifyInstruction(I, DL);
+ Value *SimpleV = simplifyInstruction(I, DL);
if (!SimpleV)
continue;
diff --git a/llvm/lib/Transforms/Utils/CloneModule.cpp b/llvm/lib/Transforms/Utils/CloneModule.cpp
index 57c273a0e3c5..55cda0f11e47 100644
--- a/llvm/lib/Transforms/Utils/CloneModule.cpp
+++ b/llvm/lib/Transforms/Utils/CloneModule.cpp
@@ -11,13 +11,16 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/IR/Constant.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Module.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
using namespace llvm;
+namespace llvm {
+class Constant;
+}
+
static void copyComdat(GlobalObject *Dst, const GlobalObject *Src) {
const Comdat *SC = Src->getComdat();
if (!SC)
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index cec159f6a448..f94d854f7ee8 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -53,7 +53,6 @@
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/Verifier.h"
-#include "llvm/Pass.h"
#include "llvm/Support/BlockFrequency.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/Casting.h"
@@ -62,12 +61,10 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
#include <cstdint>
#include <iterator>
#include <map>
-#include <set>
#include <utility>
#include <vector>
@@ -249,9 +246,10 @@ CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
bool AggregateArgs, BlockFrequencyInfo *BFI,
BranchProbabilityInfo *BPI, AssumptionCache *AC,
bool AllowVarArgs, bool AllowAlloca,
- std::string Suffix)
+ BasicBlock *AllocationBlock, std::string Suffix)
: DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
- BPI(BPI), AC(AC), AllowVarArgs(AllowVarArgs),
+ BPI(BPI), AC(AC), AllocationBlock(AllocationBlock),
+ AllowVarArgs(AllowVarArgs),
Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca)),
Suffix(Suffix) {}
@@ -260,7 +258,7 @@ CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs,
BranchProbabilityInfo *BPI, AssumptionCache *AC,
std::string Suffix)
: DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
- BPI(BPI), AC(AC), AllowVarArgs(false),
+ BPI(BPI), AC(AC), AllocationBlock(nullptr), AllowVarArgs(false),
Blocks(buildExtractionBlockSet(L.getBlocks(), &DT,
/* AllowVarArgs */ false,
/* AllowAlloca */ false)),
@@ -922,6 +920,8 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
case Attribute::StackAlignment:
case Attribute::WillReturn:
case Attribute::WriteOnly:
+ case Attribute::AllocKind:
+ case Attribute::PresplitCoroutine:
continue;
// Those attributes should be safe to propagate to the extracted function.
case Attribute::AlwaysInline:
@@ -939,6 +939,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
case Attribute::NonLazyBind:
case Attribute::NoRedZone:
case Attribute::NoUnwind:
+ case Attribute::NoSanitizeBounds:
case Attribute::NoSanitizeCoverage:
case Attribute::NullPointerIsValid:
case Attribute::OptForFuzzing:
@@ -964,6 +965,8 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
break;
// These attributes cannot be applied to functions.
case Attribute::Alignment:
+ case Attribute::AllocatedPointer:
+ case Attribute::AllocAlign:
case Attribute::ByVal:
case Attribute::Dereferenceable:
case Attribute::DereferenceableOrNull:
@@ -1190,9 +1193,10 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
// Allocate a struct at the beginning of this function
StructArgTy = StructType::get(newFunction->getContext(), ArgTypes);
- Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr,
- "structArg",
- &codeReplacer->getParent()->front().front());
+ Struct = new AllocaInst(
+ StructArgTy, DL.getAllocaAddrSpace(), nullptr, "structArg",
+ AllocationBlock ? &*AllocationBlock->getFirstInsertionPt()
+ : &codeReplacer->getParent()->front().front());
params.push_back(Struct);
// Store aggregated inputs in the struct.
@@ -1771,7 +1775,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC,
// Update the entry count of the function.
if (BFI) {
auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency());
- if (Count.hasValue())
+ if (Count)
newFunction->setEntryCount(
ProfileCount(Count.getValue(), Function::PCT_Real)); // FIXME
BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency());
diff --git a/llvm/lib/Transforms/Utils/CodeLayout.cpp b/llvm/lib/Transforms/Utils/CodeLayout.cpp
index dfb9f608eab2..1ff0f148b3a9 100644
--- a/llvm/lib/Transforms/Utils/CodeLayout.cpp
+++ b/llvm/lib/Transforms/Utils/CodeLayout.cpp
@@ -40,11 +40,20 @@
#include "llvm/Transforms/Utils/CodeLayout.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
using namespace llvm;
#define DEBUG_TYPE "code-layout"
+cl::opt<bool> EnableExtTspBlockPlacement(
+ "enable-ext-tsp-block-placement", cl::Hidden, cl::init(false),
+ cl::desc("Enable machine block placement based on the ext-tsp model, "
+ "optimizing I-cache utilization."));
+
+cl::opt<bool> ApplyExtTspWithoutProfile(
+ "ext-tsp-apply-without-profile",
+ cl::desc("Whether to apply ext-tsp placement for instances w/o profile"),
+ cl::init(true), cl::Hidden);
+
// Algorithm-specific constants. The values are tuned for the best performance
// of large-scale front-end bound binaries.
static cl::opt<double>
@@ -63,6 +72,12 @@ static cl::opt<unsigned> BackwardDistance(
"ext-tsp-backward-distance", cl::Hidden, cl::init(640),
cl::desc("The maximum distance (in bytes) of a backward jump for ExtTSP"));
+// The maximum size of a chain created by the algorithm. The size is bounded
+// so that the algorithm can efficiently process extremely large instance.
+static cl::opt<unsigned>
+ MaxChainSize("ext-tsp-max-chain-size", cl::Hidden, cl::init(4096),
+ cl::desc("The maximum size of a chain to create."));
+
// The maximum size of a chain for splitting. Larger values of the threshold
// may yield better quality at the cost of worsen run-time.
static cl::opt<unsigned> ChainSplitThreshold(
@@ -115,7 +130,7 @@ enum class MergeTypeTy : int { X_Y, X1_Y_X2, Y_X2_X1, X2_X1_Y };
/// together with the corresponfiding merge 'type' and 'offset'.
class MergeGainTy {
public:
- explicit MergeGainTy() {}
+ explicit MergeGainTy() = default;
explicit MergeGainTy(double Score, size_t MergeOffset, MergeTypeTy MergeType)
: Score(Score), MergeOffset(MergeOffset), MergeType(MergeType) {}
@@ -142,7 +157,6 @@ private:
MergeTypeTy MergeType{MergeTypeTy::X_Y};
};
-class Block;
class Jump;
class Chain;
class ChainEdge;
@@ -223,6 +237,8 @@ public:
const std::vector<Block *> &blocks() const { return Blocks; }
+ size_t numBlocks() const { return Blocks.size(); }
+
const std::vector<std::pair<Chain *, ChainEdge *>> &edges() const {
return Edges;
}
@@ -499,7 +515,7 @@ private:
AllEdges.reserve(AllJumps.size());
for (auto &Block : AllBlocks) {
for (auto &Jump : Block.OutJumps) {
- const auto SuccBlock = Jump->Target;
+ auto SuccBlock = Jump->Target;
auto CurEdge = Block.CurChain->getEdge(SuccBlock->CurChain);
// this edge is already present in the graph
if (CurEdge != nullptr) {
@@ -589,6 +605,10 @@ private:
if (ChainPred == ChainSucc)
continue;
+ // Stop early if the combined chain violates the maximum allowed size
+ if (ChainPred->numBlocks() + ChainSucc->numBlocks() >= MaxChainSize)
+ continue;
+
// Compute the gain of merging the two chains
auto CurGain = getBestMergeGain(ChainPred, ChainSucc, ChainEdge);
if (CurGain.score() <= EPS)
diff --git a/llvm/lib/Transforms/Utils/CtorUtils.cpp b/llvm/lib/Transforms/Utils/CtorUtils.cpp
index 069a86f6ab33..c997f39508e3 100644
--- a/llvm/lib/Transforms/Utils/CtorUtils.cpp
+++ b/llvm/lib/Transforms/Utils/CtorUtils.cpp
@@ -18,6 +18,7 @@
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include <numeric>
#define DEBUG_TYPE "ctor_utils"
@@ -62,21 +63,20 @@ static void removeGlobalCtors(GlobalVariable *GCL, const BitVector &CtorsToRemov
/// Given a llvm.global_ctors list that we can understand,
/// return a list of the functions and null terminator as a vector.
-static std::vector<Function *> parseGlobalCtors(GlobalVariable *GV) {
- if (GV->getInitializer()->isNullValue())
- return std::vector<Function *>();
+static std::vector<std::pair<uint32_t, Function *>>
+parseGlobalCtors(GlobalVariable *GV) {
ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
- std::vector<Function *> Result;
+ std::vector<std::pair<uint32_t, Function *>> Result;
Result.reserve(CA->getNumOperands());
for (auto &V : CA->operands()) {
ConstantStruct *CS = cast<ConstantStruct>(V);
- Result.push_back(dyn_cast<Function>(CS->getOperand(1)));
+ Result.emplace_back(cast<ConstantInt>(CS->getOperand(0))->getZExtValue(),
+ dyn_cast<Function>(CS->getOperand(1)));
}
return Result;
}
-/// Find the llvm.global_ctors list, verifying that all initializers have an
-/// init priority of 65535.
+/// Find the llvm.global_ctors list.
static GlobalVariable *findGlobalCtors(Module &M) {
GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
if (!GV)
@@ -87,9 +87,11 @@ static GlobalVariable *findGlobalCtors(Module &M) {
if (!GV->hasUniqueInitializer())
return nullptr;
- if (isa<ConstantAggregateZero>(GV->getInitializer()))
- return GV;
- ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
+ // If there are no ctors, then the initializer might be null/undef/poison.
+ // Ignore anything but an array.
+ ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer());
+ if (!CA)
+ return nullptr;
for (auto &V : CA->operands()) {
if (isa<ConstantAggregateZero>(V))
@@ -98,54 +100,47 @@ static GlobalVariable *findGlobalCtors(Module &M) {
if (isa<ConstantPointerNull>(CS->getOperand(1)))
continue;
- // Must have a function or null ptr.
- if (!isa<Function>(CS->getOperand(1)))
- return nullptr;
-
- // Init priority must be standard.
- ConstantInt *CI = cast<ConstantInt>(CS->getOperand(0));
- if (CI->getZExtValue() != 65535)
+ // Can only handle global constructors with no arguments.
+ Function *F = dyn_cast<Function>(CS->getOperand(1));
+ if (!F || F->arg_size() != 0)
return nullptr;
}
-
return GV;
}
/// Call "ShouldRemove" for every entry in M's global_ctor list and remove the
/// entries for which it returns true. Return true if anything changed.
bool llvm::optimizeGlobalCtorsList(
- Module &M, function_ref<bool(Function *)> ShouldRemove) {
+ Module &M, function_ref<bool(uint32_t, Function *)> ShouldRemove) {
GlobalVariable *GlobalCtors = findGlobalCtors(M);
if (!GlobalCtors)
return false;
- std::vector<Function *> Ctors = parseGlobalCtors(GlobalCtors);
+ std::vector<std::pair<uint32_t, Function *>> Ctors =
+ parseGlobalCtors(GlobalCtors);
if (Ctors.empty())
return false;
bool MadeChange = false;
-
// Loop over global ctors, optimizing them when we can.
- unsigned NumCtors = Ctors.size();
- BitVector CtorsToRemove(NumCtors);
- for (unsigned i = 0; i != Ctors.size() && NumCtors > 0; ++i) {
- Function *F = Ctors[i];
- // Found a null terminator in the middle of the list, prune off the rest of
- // the list.
+ BitVector CtorsToRemove(Ctors.size());
+ std::vector<size_t> CtorsByPriority(Ctors.size());
+ std::iota(CtorsByPriority.begin(), CtorsByPriority.end(), 0);
+ stable_sort(CtorsByPriority, [&](size_t LHS, size_t RHS) {
+ return Ctors[LHS].first < Ctors[RHS].first;
+ });
+ for (unsigned CtorIndex : CtorsByPriority) {
+ const uint32_t Priority = Ctors[CtorIndex].first;
+ Function *F = Ctors[CtorIndex].second;
if (!F)
continue;
LLVM_DEBUG(dbgs() << "Optimizing Global Constructor: " << *F << "\n");
- // We cannot simplify external ctor functions.
- if (F->empty())
- continue;
-
// If we can evaluate the ctor at compile time, do.
- if (ShouldRemove(F)) {
- Ctors[i] = nullptr;
- CtorsToRemove.set(i);
- NumCtors--;
+ if (ShouldRemove(Priority, F)) {
+ Ctors[CtorIndex].second = nullptr;
+ CtorsToRemove.set(CtorIndex);
MadeChange = true;
continue;
}
diff --git a/llvm/lib/Transforms/Utils/Debugify.cpp b/llvm/lib/Transforms/Utils/Debugify.cpp
index 589622d69578..205f7a7d9ed2 100644
--- a/llvm/lib/Transforms/Utils/Debugify.cpp
+++ b/llvm/lib/Transforms/Utils/Debugify.cpp
@@ -37,12 +37,16 @@ namespace {
cl::opt<bool> Quiet("debugify-quiet",
cl::desc("Suppress verbose debugify output"));
+cl::opt<uint64_t> DebugifyFunctionsLimit(
+ "debugify-func-limit",
+ cl::desc("Set max number of processed functions per pass."),
+ cl::init(UINT_MAX));
+
enum class Level {
Locations,
LocationsAndVariables
};
-// Used for the synthetic mode only.
cl::opt<Level> DebugifyLevel(
"debugify-level", cl::desc("Kind of debug info to add"),
cl::values(clEnumValN(Level::Locations, "locations", "Locations only"),
@@ -210,15 +214,15 @@ bool llvm::applyDebugifyMetadata(
static bool
applyDebugify(Function &F,
enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo,
- DebugInfoPerPassMap *DIPreservationMap = nullptr,
+ DebugInfoPerPass *DebugInfoBeforePass = nullptr,
StringRef NameOfWrappedPass = "") {
Module &M = *F.getParent();
auto FuncIt = F.getIterator();
if (Mode == DebugifyMode::SyntheticDebugInfo)
return applyDebugifyMetadata(M, make_range(FuncIt, std::next(FuncIt)),
"FunctionDebugify: ", /*ApplyToMF*/ nullptr);
- assert(DIPreservationMap);
- return collectDebugInfoMetadata(M, M.functions(), *DIPreservationMap,
+ assert(DebugInfoBeforePass);
+ return collectDebugInfoMetadata(M, M.functions(), *DebugInfoBeforePass,
"FunctionDebugify (original debuginfo)",
NameOfWrappedPass);
}
@@ -226,12 +230,12 @@ applyDebugify(Function &F,
static bool
applyDebugify(Module &M,
enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo,
- DebugInfoPerPassMap *DIPreservationMap = nullptr,
+ DebugInfoPerPass *DebugInfoBeforePass = nullptr,
StringRef NameOfWrappedPass = "") {
if (Mode == DebugifyMode::SyntheticDebugInfo)
return applyDebugifyMetadata(M, M.functions(),
"ModuleDebugify: ", /*ApplyToMF*/ nullptr);
- return collectDebugInfoMetadata(M, M.functions(), *DIPreservationMap,
+ return collectDebugInfoMetadata(M, M.functions(), *DebugInfoBeforePass,
"ModuleDebugify (original debuginfo)",
NameOfWrappedPass);
}
@@ -267,7 +271,7 @@ bool llvm::stripDebugifyMetadata(Module &M) {
SmallVector<MDNode *, 4> Flags(NMD->operands());
NMD->clearOperands();
for (MDNode *Flag : Flags) {
- MDString *Key = dyn_cast_or_null<MDString>(Flag->getOperand(1));
+ auto *Key = cast<MDString>(Flag->getOperand(1));
if (Key->getString() == "Debug Info Version") {
Changed = true;
continue;
@@ -283,32 +287,37 @@ bool llvm::stripDebugifyMetadata(Module &M) {
bool llvm::collectDebugInfoMetadata(Module &M,
iterator_range<Module::iterator> Functions,
- DebugInfoPerPassMap &DIPreservationMap,
+ DebugInfoPerPass &DebugInfoBeforePass,
StringRef Banner,
StringRef NameOfWrappedPass) {
LLVM_DEBUG(dbgs() << Banner << ": (before) " << NameOfWrappedPass << '\n');
- // Clear the map with the debug info before every single pass.
- DIPreservationMap.clear();
-
if (!M.getNamedMetadata("llvm.dbg.cu")) {
dbg() << Banner << ": Skipping module without debug info\n";
return false;
}
+ uint64_t FunctionsCnt = DebugInfoBeforePass.DIFunctions.size();
// Visit each instruction.
for (Function &F : Functions) {
+ // Use DI collected after previous Pass (when -debugify-each is used).
+ if (DebugInfoBeforePass.DIFunctions.count(&F))
+ continue;
+
if (isFunctionSkipped(F))
continue;
+ // Stop collecting DI if the Functions number reached the limit.
+ if (++FunctionsCnt >= DebugifyFunctionsLimit)
+ break;
// Collect the DISubprogram.
auto *SP = F.getSubprogram();
- DIPreservationMap[NameOfWrappedPass].DIFunctions.insert({F.getName(), SP});
+ DebugInfoBeforePass.DIFunctions.insert({&F, SP});
if (SP) {
LLVM_DEBUG(dbgs() << " Collecting subprogram: " << *SP << '\n');
for (const DINode *DN : SP->getRetainedNodes()) {
if (const auto *DV = dyn_cast<DILocalVariable>(DN)) {
- DIPreservationMap[NameOfWrappedPass].DIVariables[DV] = 0;
+ DebugInfoBeforePass.DIVariables[DV] = 0;
}
}
}
@@ -320,20 +329,22 @@ bool llvm::collectDebugInfoMetadata(Module &M,
if (isa<PHINode>(I))
continue;
- // Collect dbg.values and dbg.declares.
- if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I)) {
- if (!SP)
- continue;
- // Skip inlined variables.
- if (I.getDebugLoc().getInlinedAt())
+ // Cllect dbg.values and dbg.declare.
+ if (DebugifyLevel > Level::Locations) {
+ if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I)) {
+ if (!SP)
+ continue;
+ // Skip inlined variables.
+ if (I.getDebugLoc().getInlinedAt())
+ continue;
+ // Skip undef values.
+ if (DVI->isUndef())
+ continue;
+
+ auto *Var = DVI->getVariable();
+ DebugInfoBeforePass.DIVariables[Var]++;
continue;
- // Skip undef values.
- if (DVI->isUndef())
- continue;
-
- auto *Var = DVI->getVariable();
- DIPreservationMap[NameOfWrappedPass].DIVariables[Var]++;
- continue;
+ }
}
// Skip debug instructions other than dbg.value and dbg.declare.
@@ -341,11 +352,11 @@ bool llvm::collectDebugInfoMetadata(Module &M,
continue;
LLVM_DEBUG(dbgs() << " Collecting info for inst: " << I << '\n');
- DIPreservationMap[NameOfWrappedPass].InstToDelete.insert({&I, &I});
+ DebugInfoBeforePass.InstToDelete.insert({&I, &I});
const DILocation *Loc = I.getDebugLoc().get();
bool HasLoc = Loc != nullptr;
- DIPreservationMap[NameOfWrappedPass].DILocations.insert({&I, HasLoc});
+ DebugInfoBeforePass.DILocations.insert({&I, HasLoc});
}
}
}
@@ -367,12 +378,12 @@ static bool checkFunctions(const DebugFnMap &DIFunctionsBefore,
if (SPIt == DIFunctionsBefore.end()) {
if (ShouldWriteIntoJSON)
Bugs.push_back(llvm::json::Object({{"metadata", "DISubprogram"},
- {"name", F.first},
+ {"name", F.first->getName()},
{"action", "not-generate"}}));
else
dbg() << "ERROR: " << NameOfWrappedPass
- << " did not generate DISubprogram for " << F.first << " from "
- << FileNameFromCU << '\n';
+ << " did not generate DISubprogram for " << F.first->getName()
+ << " from " << FileNameFromCU << '\n';
Preserved = false;
} else {
auto SP = SPIt->second;
@@ -382,11 +393,11 @@ static bool checkFunctions(const DebugFnMap &DIFunctionsBefore,
// a debug info bug.
if (ShouldWriteIntoJSON)
Bugs.push_back(llvm::json::Object({{"metadata", "DISubprogram"},
- {"name", F.first},
+ {"name", F.first->getName()},
{"action", "drop"}}));
else
dbg() << "ERROR: " << NameOfWrappedPass << " dropped DISubprogram of "
- << F.first << " from " << FileNameFromCU << '\n';
+ << F.first->getName() << " from " << FileNameFromCU << '\n';
Preserved = false;
}
}
@@ -515,7 +526,7 @@ static void writeJSON(StringRef OrigDIVerifyBugsReportFilePath,
bool llvm::checkDebugInfoMetadata(Module &M,
iterator_range<Module::iterator> Functions,
- DebugInfoPerPassMap &DIPreservationMap,
+ DebugInfoPerPass &DebugInfoBeforePass,
StringRef Banner, StringRef NameOfWrappedPass,
StringRef OrigDIVerifyBugsReportFilePath) {
LLVM_DEBUG(dbgs() << Banner << ": (after) " << NameOfWrappedPass << '\n');
@@ -526,24 +537,26 @@ bool llvm::checkDebugInfoMetadata(Module &M,
}
// Map the debug info holding DIs after a pass.
- DebugInfoPerPassMap DIPreservationAfter;
+ DebugInfoPerPass DebugInfoAfterPass;
// Visit each instruction.
for (Function &F : Functions) {
if (isFunctionSkipped(F))
continue;
+ // Don't process functions without DI collected before the Pass.
+ if (!DebugInfoBeforePass.DIFunctions.count(&F))
+ continue;
// TODO: Collect metadata other than DISubprograms.
// Collect the DISubprogram.
auto *SP = F.getSubprogram();
- DIPreservationAfter[NameOfWrappedPass].DIFunctions.insert(
- {F.getName(), SP});
+ DebugInfoAfterPass.DIFunctions.insert({&F, SP});
if (SP) {
LLVM_DEBUG(dbgs() << " Collecting subprogram: " << *SP << '\n');
for (const DINode *DN : SP->getRetainedNodes()) {
if (const auto *DV = dyn_cast<DILocalVariable>(DN)) {
- DIPreservationAfter[NameOfWrappedPass].DIVariables[DV] = 0;
+ DebugInfoAfterPass.DIVariables[DV] = 0;
}
}
}
@@ -556,19 +569,21 @@ bool llvm::checkDebugInfoMetadata(Module &M,
continue;
// Collect dbg.values and dbg.declares.
- if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I)) {
- if (!SP)
- continue;
- // Skip inlined variables.
- if (I.getDebugLoc().getInlinedAt())
- continue;
- // Skip undef values.
- if (DVI->isUndef())
+ if (DebugifyLevel > Level::Locations) {
+ if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I)) {
+ if (!SP)
+ continue;
+ // Skip inlined variables.
+ if (I.getDebugLoc().getInlinedAt())
+ continue;
+ // Skip undef values.
+ if (DVI->isUndef())
+ continue;
+
+ auto *Var = DVI->getVariable();
+ DebugInfoAfterPass.DIVariables[Var]++;
continue;
-
- auto *Var = DVI->getVariable();
- DIPreservationAfter[NameOfWrappedPass].DIVariables[Var]++;
- continue;
+ }
}
// Skip debug instructions other than dbg.value and dbg.declare.
@@ -580,7 +595,7 @@ bool llvm::checkDebugInfoMetadata(Module &M,
const DILocation *Loc = I.getDebugLoc().get();
bool HasLoc = Loc != nullptr;
- DIPreservationAfter[NameOfWrappedPass].DILocations.insert({&I, HasLoc});
+ DebugInfoAfterPass.DILocations.insert({&I, HasLoc});
}
}
}
@@ -590,16 +605,16 @@ bool llvm::checkDebugInfoMetadata(Module &M,
(cast<DICompileUnit>(M.getNamedMetadata("llvm.dbg.cu")->getOperand(0)))
->getFilename();
- auto DIFunctionsBefore = DIPreservationMap[NameOfWrappedPass].DIFunctions;
- auto DIFunctionsAfter = DIPreservationAfter[NameOfWrappedPass].DIFunctions;
+ auto DIFunctionsBefore = DebugInfoBeforePass.DIFunctions;
+ auto DIFunctionsAfter = DebugInfoAfterPass.DIFunctions;
- auto DILocsBefore = DIPreservationMap[NameOfWrappedPass].DILocations;
- auto DILocsAfter = DIPreservationAfter[NameOfWrappedPass].DILocations;
+ auto DILocsBefore = DebugInfoBeforePass.DILocations;
+ auto DILocsAfter = DebugInfoAfterPass.DILocations;
- auto InstToDelete = DIPreservationMap[NameOfWrappedPass].InstToDelete;
+ auto InstToDelete = DebugInfoBeforePass.InstToDelete;
- auto DIVarsBefore = DIPreservationMap[NameOfWrappedPass].DIVariables;
- auto DIVarsAfter = DIPreservationAfter[NameOfWrappedPass].DIVariables;
+ auto DIVarsBefore = DebugInfoBeforePass.DIVariables;
+ auto DIVarsAfter = DebugInfoAfterPass.DIVariables;
bool ShouldWriteIntoJSON = !OrigDIVerifyBugsReportFilePath.empty();
llvm::json::Array Bugs;
@@ -626,6 +641,11 @@ bool llvm::checkDebugInfoMetadata(Module &M,
else
dbg() << ResultBanner << ": FAIL\n";
+ // In the case of the `debugify-each`, no need to go over all the instructions
+ // again in the collectDebugInfoMetadata(), since as an input we can use
+ // the debugging information from the previous pass.
+ DebugInfoBeforePass = DebugInfoAfterPass;
+
LLVM_DEBUG(dbgs() << "\n\n");
return Result;
}
@@ -770,14 +790,14 @@ bool checkDebugifyMetadata(Module &M,
/// legacy module pass manager.
struct DebugifyModulePass : public ModulePass {
bool runOnModule(Module &M) override {
- return applyDebugify(M, Mode, DIPreservationMap, NameOfWrappedPass);
+ return applyDebugify(M, Mode, DebugInfoBeforePass, NameOfWrappedPass);
}
DebugifyModulePass(enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo,
StringRef NameOfWrappedPass = "",
- DebugInfoPerPassMap *DIPreservationMap = nullptr)
+ DebugInfoPerPass *DebugInfoBeforePass = nullptr)
: ModulePass(ID), NameOfWrappedPass(NameOfWrappedPass),
- DIPreservationMap(DIPreservationMap), Mode(Mode) {}
+ DebugInfoBeforePass(DebugInfoBeforePass), Mode(Mode) {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
@@ -787,7 +807,7 @@ struct DebugifyModulePass : public ModulePass {
private:
StringRef NameOfWrappedPass;
- DebugInfoPerPassMap *DIPreservationMap;
+ DebugInfoPerPass *DebugInfoBeforePass;
enum DebugifyMode Mode;
};
@@ -795,15 +815,15 @@ private:
/// single function, used with the legacy module pass manager.
struct DebugifyFunctionPass : public FunctionPass {
bool runOnFunction(Function &F) override {
- return applyDebugify(F, Mode, DIPreservationMap, NameOfWrappedPass);
+ return applyDebugify(F, Mode, DebugInfoBeforePass, NameOfWrappedPass);
}
DebugifyFunctionPass(
enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo,
StringRef NameOfWrappedPass = "",
- DebugInfoPerPassMap *DIPreservationMap = nullptr)
+ DebugInfoPerPass *DebugInfoBeforePass = nullptr)
: FunctionPass(ID), NameOfWrappedPass(NameOfWrappedPass),
- DIPreservationMap(DIPreservationMap), Mode(Mode) {}
+ DebugInfoBeforePass(DebugInfoBeforePass), Mode(Mode) {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
@@ -813,7 +833,7 @@ struct DebugifyFunctionPass : public FunctionPass {
private:
StringRef NameOfWrappedPass;
- DebugInfoPerPassMap *DIPreservationMap;
+ DebugInfoPerPass *DebugInfoBeforePass;
enum DebugifyMode Mode;
};
@@ -825,7 +845,7 @@ struct CheckDebugifyModulePass : public ModulePass {
return checkDebugifyMetadata(M, M.functions(), NameOfWrappedPass,
"CheckModuleDebugify", Strip, StatsMap);
return checkDebugInfoMetadata(
- M, M.functions(), *DIPreservationMap,
+ M, M.functions(), *DebugInfoBeforePass,
"CheckModuleDebugify (original debuginfo)", NameOfWrappedPass,
OrigDIVerifyBugsReportFilePath);
}
@@ -834,11 +854,11 @@ struct CheckDebugifyModulePass : public ModulePass {
bool Strip = false, StringRef NameOfWrappedPass = "",
DebugifyStatsMap *StatsMap = nullptr,
enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo,
- DebugInfoPerPassMap *DIPreservationMap = nullptr,
+ DebugInfoPerPass *DebugInfoBeforePass = nullptr,
StringRef OrigDIVerifyBugsReportFilePath = "")
: ModulePass(ID), NameOfWrappedPass(NameOfWrappedPass),
OrigDIVerifyBugsReportFilePath(OrigDIVerifyBugsReportFilePath),
- StatsMap(StatsMap), DIPreservationMap(DIPreservationMap), Mode(Mode),
+ StatsMap(StatsMap), DebugInfoBeforePass(DebugInfoBeforePass), Mode(Mode),
Strip(Strip) {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -851,7 +871,7 @@ private:
StringRef NameOfWrappedPass;
StringRef OrigDIVerifyBugsReportFilePath;
DebugifyStatsMap *StatsMap;
- DebugInfoPerPassMap *DIPreservationMap;
+ DebugInfoPerPass *DebugInfoBeforePass;
enum DebugifyMode Mode;
bool Strip;
};
@@ -867,7 +887,7 @@ struct CheckDebugifyFunctionPass : public FunctionPass {
NameOfWrappedPass, "CheckFunctionDebugify",
Strip, StatsMap);
return checkDebugInfoMetadata(
- M, make_range(FuncIt, std::next(FuncIt)), *DIPreservationMap,
+ M, make_range(FuncIt, std::next(FuncIt)), *DebugInfoBeforePass,
"CheckFunctionDebugify (original debuginfo)", NameOfWrappedPass,
OrigDIVerifyBugsReportFilePath);
}
@@ -876,11 +896,11 @@ struct CheckDebugifyFunctionPass : public FunctionPass {
bool Strip = false, StringRef NameOfWrappedPass = "",
DebugifyStatsMap *StatsMap = nullptr,
enum DebugifyMode Mode = DebugifyMode::SyntheticDebugInfo,
- DebugInfoPerPassMap *DIPreservationMap = nullptr,
+ DebugInfoPerPass *DebugInfoBeforePass = nullptr,
StringRef OrigDIVerifyBugsReportFilePath = "")
: FunctionPass(ID), NameOfWrappedPass(NameOfWrappedPass),
OrigDIVerifyBugsReportFilePath(OrigDIVerifyBugsReportFilePath),
- StatsMap(StatsMap), DIPreservationMap(DIPreservationMap), Mode(Mode),
+ StatsMap(StatsMap), DebugInfoBeforePass(DebugInfoBeforePass), Mode(Mode),
Strip(Strip) {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -893,7 +913,7 @@ private:
StringRef NameOfWrappedPass;
StringRef OrigDIVerifyBugsReportFilePath;
DebugifyStatsMap *StatsMap;
- DebugInfoPerPassMap *DIPreservationMap;
+ DebugInfoPerPass *DebugInfoBeforePass;
enum DebugifyMode Mode;
bool Strip;
};
@@ -923,21 +943,21 @@ void llvm::exportDebugifyStats(StringRef Path, const DebugifyStatsMap &Map) {
ModulePass *createDebugifyModulePass(enum DebugifyMode Mode,
llvm::StringRef NameOfWrappedPass,
- DebugInfoPerPassMap *DIPreservationMap) {
+ DebugInfoPerPass *DebugInfoBeforePass) {
if (Mode == DebugifyMode::SyntheticDebugInfo)
return new DebugifyModulePass();
assert(Mode == DebugifyMode::OriginalDebugInfo && "Must be original mode");
- return new DebugifyModulePass(Mode, NameOfWrappedPass, DIPreservationMap);
+ return new DebugifyModulePass(Mode, NameOfWrappedPass, DebugInfoBeforePass);
}
FunctionPass *
createDebugifyFunctionPass(enum DebugifyMode Mode,
llvm::StringRef NameOfWrappedPass,
- DebugInfoPerPassMap *DIPreservationMap) {
+ DebugInfoPerPass *DebugInfoBeforePass) {
if (Mode == DebugifyMode::SyntheticDebugInfo)
return new DebugifyFunctionPass();
assert(Mode == DebugifyMode::OriginalDebugInfo && "Must be original mode");
- return new DebugifyFunctionPass(Mode, NameOfWrappedPass, DIPreservationMap);
+ return new DebugifyFunctionPass(Mode, NameOfWrappedPass, DebugInfoBeforePass);
}
PreservedAnalyses NewPMDebugifyPass::run(Module &M, ModuleAnalysisManager &) {
@@ -948,25 +968,25 @@ PreservedAnalyses NewPMDebugifyPass::run(Module &M, ModuleAnalysisManager &) {
ModulePass *createCheckDebugifyModulePass(
bool Strip, StringRef NameOfWrappedPass, DebugifyStatsMap *StatsMap,
- enum DebugifyMode Mode, DebugInfoPerPassMap *DIPreservationMap,
+ enum DebugifyMode Mode, DebugInfoPerPass *DebugInfoBeforePass,
StringRef OrigDIVerifyBugsReportFilePath) {
if (Mode == DebugifyMode::SyntheticDebugInfo)
return new CheckDebugifyModulePass(Strip, NameOfWrappedPass, StatsMap);
assert(Mode == DebugifyMode::OriginalDebugInfo && "Must be original mode");
return new CheckDebugifyModulePass(false, NameOfWrappedPass, nullptr, Mode,
- DIPreservationMap,
+ DebugInfoBeforePass,
OrigDIVerifyBugsReportFilePath);
}
FunctionPass *createCheckDebugifyFunctionPass(
bool Strip, StringRef NameOfWrappedPass, DebugifyStatsMap *StatsMap,
- enum DebugifyMode Mode, DebugInfoPerPassMap *DIPreservationMap,
+ enum DebugifyMode Mode, DebugInfoPerPass *DebugInfoBeforePass,
StringRef OrigDIVerifyBugsReportFilePath) {
if (Mode == DebugifyMode::SyntheticDebugInfo)
return new CheckDebugifyFunctionPass(Strip, NameOfWrappedPass, StatsMap);
assert(Mode == DebugifyMode::OriginalDebugInfo && "Must be original mode");
return new CheckDebugifyFunctionPass(false, NameOfWrappedPass, nullptr, Mode,
- DIPreservationMap,
+ DebugInfoBeforePass,
OrigDIVerifyBugsReportFilePath);
}
diff --git a/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp b/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
index 5f53d794fe8a..f6f80540ad95 100644
--- a/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
+++ b/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -8,11 +8,10 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/Analysis/CFG.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Type.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
/// DemoteRegToStack - This function takes a virtual register computed by an
diff --git a/llvm/lib/Transforms/Utils/Evaluator.cpp b/llvm/lib/Transforms/Utils/Evaluator.cpp
index e73287c060ae..7b8d8553bac2 100644
--- a/llvm/lib/Transforms/Utils/Evaluator.cpp
+++ b/llvm/lib/Transforms/Utils/Evaluator.cpp
@@ -29,7 +29,6 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
@@ -37,7 +36,6 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include <iterator>
#define DEBUG_TYPE "evaluator"
@@ -219,10 +217,13 @@ Constant *Evaluator::ComputeLoadResult(Constant *P, Type *Ty) {
P = cast<Constant>(P->stripAndAccumulateConstantOffsets(
DL, Offset, /* AllowNonInbounds */ true));
Offset = Offset.sextOrTrunc(DL.getIndexTypeSizeInBits(P->getType()));
- auto *GV = dyn_cast<GlobalVariable>(P);
- if (!GV)
- return nullptr;
+ if (auto *GV = dyn_cast<GlobalVariable>(P))
+ return ComputeLoadResult(GV, Ty, Offset);
+ return nullptr;
+}
+Constant *Evaluator::ComputeLoadResult(GlobalVariable *GV, Type *Ty,
+ const APInt &Offset) {
auto It = MutatedMemory.find(GV);
if (It != MutatedMemory.end())
return It->second.read(Ty, Offset, DL);
@@ -335,50 +336,6 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB,
auto Res = MutatedMemory.try_emplace(GV, GV->getInitializer());
if (!Res.first->second.write(Val, Offset, DL))
return false;
- } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) {
- InstResult = ConstantExpr::get(BO->getOpcode(),
- getVal(BO->getOperand(0)),
- getVal(BO->getOperand(1)));
- LLVM_DEBUG(dbgs() << "Found a BinaryOperator! Simplifying: "
- << *InstResult << "\n");
- } else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) {
- InstResult = ConstantExpr::getCompare(CI->getPredicate(),
- getVal(CI->getOperand(0)),
- getVal(CI->getOperand(1)));
- LLVM_DEBUG(dbgs() << "Found a CmpInst! Simplifying: " << *InstResult
- << "\n");
- } else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) {
- InstResult = ConstantExpr::getCast(CI->getOpcode(),
- getVal(CI->getOperand(0)),
- CI->getType());
- LLVM_DEBUG(dbgs() << "Found a Cast! Simplifying: " << *InstResult
- << "\n");
- } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) {
- InstResult = ConstantExpr::getSelect(getVal(SI->getOperand(0)),
- getVal(SI->getOperand(1)),
- getVal(SI->getOperand(2)));
- LLVM_DEBUG(dbgs() << "Found a Select! Simplifying: " << *InstResult
- << "\n");
- } else if (auto *EVI = dyn_cast<ExtractValueInst>(CurInst)) {
- InstResult = ConstantExpr::getExtractValue(
- getVal(EVI->getAggregateOperand()), EVI->getIndices());
- LLVM_DEBUG(dbgs() << "Found an ExtractValueInst! Simplifying: "
- << *InstResult << "\n");
- } else if (auto *IVI = dyn_cast<InsertValueInst>(CurInst)) {
- InstResult = ConstantExpr::getInsertValue(
- getVal(IVI->getAggregateOperand()),
- getVal(IVI->getInsertedValueOperand()), IVI->getIndices());
- LLVM_DEBUG(dbgs() << "Found an InsertValueInst! Simplifying: "
- << *InstResult << "\n");
- } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) {
- Constant *P = getVal(GEP->getOperand(0));
- SmallVector<Constant*, 8> GEPOps;
- for (Use &Op : llvm::drop_begin(GEP->operands()))
- GEPOps.push_back(getVal(Op));
- InstResult =
- ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), P, GEPOps,
- cast<GEPOperator>(GEP)->isInBounds());
- LLVM_DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult << "\n");
} else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
if (!LI->isSimple()) {
LLVM_DEBUG(
@@ -438,16 +395,39 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB,
<< "intrinsic.\n");
return false;
}
+
+ auto *LenC = dyn_cast<ConstantInt>(getVal(MSI->getLength()));
+ if (!LenC) {
+ LLVM_DEBUG(dbgs() << "Memset with unknown length.\n");
+ return false;
+ }
+
Constant *Ptr = getVal(MSI->getDest());
+ APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
+ Ptr = cast<Constant>(Ptr->stripAndAccumulateConstantOffsets(
+ DL, Offset, /* AllowNonInbounds */ true));
+ auto *GV = dyn_cast<GlobalVariable>(Ptr);
+ if (!GV) {
+ LLVM_DEBUG(dbgs() << "Memset with unknown base.\n");
+ return false;
+ }
+
Constant *Val = getVal(MSI->getValue());
- Constant *DestVal =
- ComputeLoadResult(getVal(Ptr), MSI->getValue()->getType());
- if (Val->isNullValue() && DestVal && DestVal->isNullValue()) {
- // This memset is a no-op.
- LLVM_DEBUG(dbgs() << "Ignoring no-op memset.\n");
- ++CurInst;
- continue;
+ APInt Len = LenC->getValue();
+ while (Len != 0) {
+ Constant *DestVal = ComputeLoadResult(GV, Val->getType(), Offset);
+ if (DestVal != Val) {
+ LLVM_DEBUG(dbgs() << "Memset is not a no-op at offset "
+ << Offset << " of " << *GV << ".\n");
+ return false;
+ }
+ ++Offset;
+ --Len;
}
+
+ LLVM_DEBUG(dbgs() << "Ignoring no-op memset.\n");
+ ++CurInst;
+ continue;
}
if (II->isLifetimeStartOrEnd()) {
@@ -602,11 +582,16 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB,
LLVM_DEBUG(dbgs() << "Successfully evaluated block.\n");
return true;
} else {
- // Did not know how to evaluate this!
- LLVM_DEBUG(
- dbgs() << "Failed to evaluate block due to unhandled instruction."
- "\n");
- return false;
+ SmallVector<Constant *> Ops;
+ for (Value *Op : CurInst->operands())
+ Ops.push_back(getVal(Op));
+ InstResult = ConstantFoldInstOperands(&*CurInst, Ops, DL, TLI);
+ if (!InstResult) {
+ LLVM_DEBUG(dbgs() << "Cannot fold instruction: " << *CurInst << "\n");
+ return false;
+ }
+ LLVM_DEBUG(dbgs() << "Folded instruction " << *CurInst << " to "
+ << *InstResult << "\n");
}
if (!CurInst->use_empty()) {
@@ -631,6 +616,8 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB,
/// function.
bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
const SmallVectorImpl<Constant*> &ActualArgs) {
+ assert(ActualArgs.size() == F->arg_size() && "wrong number of arguments");
+
// Check to see if this function is already executing (recursion). If so,
// bail out. TODO: we might want to accept limited recursion.
if (is_contained(CallStack, F))
diff --git a/llvm/lib/Transforms/Utils/FixIrreducible.cpp b/llvm/lib/Transforms/Utils/FixIrreducible.cpp
index 8de3ce876bab..24539bd231c6 100644
--- a/llvm/lib/Transforms/Utils/FixIrreducible.cpp
+++ b/llvm/lib/Transforms/Utils/FixIrreducible.cpp
@@ -68,6 +68,7 @@
#include "llvm/Transforms/Utils/FixIrreducible.h"
#include "llvm/ADT/SCCIterator.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -137,10 +138,18 @@ static void reconnectChildLoops(LoopInfo &LI, Loop *ParentLoop, Loop *NewLoop,
// not be necessary if we can retain such backedges.
if (Headers.count(Child->getHeader())) {
for (auto BB : Child->blocks()) {
+ if (LI.getLoopFor(BB) != Child)
+ continue;
LI.changeLoopFor(BB, NewLoop);
LLVM_DEBUG(dbgs() << "moved block from child: " << BB->getName()
<< "\n");
}
+ std::vector<Loop *> GrandChildLoops;
+ std::swap(GrandChildLoops, Child->getSubLoopsVector());
+ for (auto GrandChildLoop : GrandChildLoops) {
+ GrandChildLoop->setParentLoop(nullptr);
+ NewLoop->addChildLoop(GrandChildLoop);
+ }
LI.destroy(Child);
LLVM_DEBUG(dbgs() << "subsumed child loop (common header)\n");
continue;
diff --git a/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp b/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
index 2946c0018c31..193806d9cc87 100644
--- a/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
+++ b/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
@@ -12,8 +12,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/FunctionImportUtils.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/InstIterator.h"
using namespace llvm;
/// Checks if we should import SGV as a definition, otherwise import as a
diff --git a/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/llvm/lib/Transforms/Utils/GlobalStatus.cpp
index c1c5f5cc879f..c5aded3c45f4 100644
--- a/llvm/lib/Transforms/Utils/GlobalStatus.cpp
+++ b/llvm/lib/Transforms/Utils/GlobalStatus.cpp
@@ -38,22 +38,26 @@ static AtomicOrdering strongerOrdering(AtomicOrdering X, AtomicOrdering Y) {
}
/// It is safe to destroy a constant iff it is only used by constants itself.
-/// Note that constants cannot be cyclic, so this test is pretty easy to
-/// implement recursively.
-///
+/// Note that while constants cannot be cyclic, they can be tree-like, so we
+/// should keep a visited set to avoid exponential runtime.
bool llvm::isSafeToDestroyConstant(const Constant *C) {
- if (isa<GlobalValue>(C))
- return false;
-
- if (isa<ConstantData>(C))
- return false;
+ SmallVector<const Constant *, 8> Worklist;
+ SmallPtrSet<const Constant *, 8> Visited;
+ Worklist.push_back(C);
+ while (!Worklist.empty()) {
+ const Constant *C = Worklist.pop_back_val();
+ if (!Visited.insert(C).second)
+ continue;
+ if (isa<GlobalValue>(C) || isa<ConstantData>(C))
+ return false;
- for (const User *U : C->users())
- if (const Constant *CU = dyn_cast<Constant>(U)) {
- if (!isSafeToDestroyConstant(CU))
+ for (const User *U : C->users()) {
+ if (const Constant *CU = dyn_cast<Constant>(U))
+ Worklist.push_back(CU);
+ else
return false;
- } else
- return false;
+ }
+ }
return true;
}
@@ -100,6 +104,8 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
if (SI->isVolatile())
return true;
+ ++GS.NumStores;
+
GS.Ordering = strongerOrdering(GS.Ordering, SI->getOrdering());
// If this is a direct store to the global (i.e., the global is a scalar
diff --git a/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp b/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
index 047bf5569ded..55bcb6f3b121 100644
--- a/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
+++ b/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
@@ -19,7 +19,6 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 923bcc781e47..2fb00f95b749 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -37,7 +37,6 @@
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
@@ -85,7 +84,7 @@ EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true),
static cl::opt<bool>
UseNoAliasIntrinsic("use-noalias-intrinsic-during-inlining", cl::Hidden,
- cl::ZeroOrMore, cl::init(true),
+ cl::init(true),
cl::desc("Use the llvm.experimental.noalias.scope.decl "
"intrinsic during inlining."));
@@ -1044,12 +1043,10 @@ static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap,
}
for (Value *Arg : Call->args()) {
- // We need to check the underlying objects of all arguments, not just
- // the pointer arguments, because we might be passing pointers as
- // integers, etc.
- // However, if we know that the call only accesses pointer arguments,
- // then we only need to check the pointer arguments.
- if (IsArgMemOnlyCall && !Arg->getType()->isPointerTy())
+ // Only care about pointer arguments. If a noalias argument is
+ // accessed through a non-pointer argument, it must be captured
+ // first (e.g. via ptrtoint), and we protect against captures below.
+ if (!Arg->getType()->isPointerTy())
continue;
PtrArgs.push_back(Arg);
@@ -1080,7 +1077,8 @@ static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap,
// Figure out if we're derived from anything that is not a noalias
// argument.
- bool CanDeriveViaCapture = false, UsesAliasingPtr = false;
+ bool RequiresNoCaptureBefore = false, UsesAliasingPtr = false,
+ UsesUnknownObject = false;
for (const Value *V : ObjSet) {
// Is this value a constant that cannot be derived from any pointer
// value (we need to exclude constant expressions, for example, that
@@ -1101,19 +1099,28 @@ static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap,
UsesAliasingPtr = true;
}
- // If this is not some identified function-local object (which cannot
- // directly alias a noalias argument), or some other argument (which,
- // by definition, also cannot alias a noalias argument), then we could
- // alias a noalias argument that has been captured).
- if (!isa<Argument>(V) &&
- !isIdentifiedFunctionLocal(const_cast<Value*>(V)))
- CanDeriveViaCapture = true;
+ if (isEscapeSource(V)) {
+ // An escape source can only alias with a noalias argument if it has
+ // been captured beforehand.
+ RequiresNoCaptureBefore = true;
+ } else if (!isa<Argument>(V) && !isIdentifiedObject(V)) {
+ // If this is neither an escape source, nor some identified object
+ // (which cannot directly alias a noalias argument), nor some other
+ // argument (which, by definition, also cannot alias a noalias
+ // argument), conservatively do not make any assumptions.
+ UsesUnknownObject = true;
+ }
}
+ // Nothing we can do if the used underlying object cannot be reliably
+ // determined.
+ if (UsesUnknownObject)
+ continue;
+
// A function call can always get captured noalias pointers (via other
// parameters, globals, etc.).
if (IsFuncCall && !IsArgMemOnlyCall)
- CanDeriveViaCapture = true;
+ RequiresNoCaptureBefore = true;
// First, we want to figure out all of the sets with which we definitely
// don't alias. Iterate over all noalias set, and add those for which:
@@ -1124,16 +1131,16 @@ static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap,
// noalias arguments via other noalias arguments or globals, and so we
// must always check for prior capture.
for (const Argument *A : NoAliasArgs) {
- if (!ObjSet.count(A) && (!CanDeriveViaCapture ||
- // It might be tempting to skip the
- // PointerMayBeCapturedBefore check if
- // A->hasNoCaptureAttr() is true, but this is
- // incorrect because nocapture only guarantees
- // that no copies outlive the function, not
- // that the value cannot be locally captured.
- !PointerMayBeCapturedBefore(A,
- /* ReturnCaptures */ false,
- /* StoreCaptures */ false, I, &DT)))
+ if (ObjSet.contains(A))
+ continue; // May be based on a noalias argument.
+
+ // It might be tempting to skip the PointerMayBeCapturedBefore check if
+ // A->hasNoCaptureAttr() is true, but this is incorrect because
+ // nocapture only guarantees that no copies outlive the function, not
+ // that the value cannot be locally captured.
+ if (!RequiresNoCaptureBefore ||
+ !PointerMayBeCapturedBefore(A, /* ReturnCaptures */ false,
+ /* StoreCaptures */ false, I, &DT))
NoAliases.push_back(NewScopes[A]);
}
@@ -1422,7 +1429,8 @@ static Value *HandleByValArgument(Type *ByValType, Value *Arg,
// If the byval had an alignment specified, we *must* use at least that
// alignment, as it is required by the byval argument (and uses of the
// pointer inside the callee).
- Alignment = max(Alignment, MaybeAlign(ByValAlignment));
+ if (ByValAlignment > 0)
+ Alignment = std::max(Alignment, Align(ByValAlignment));
Value *NewAlloca =
new AllocaInst(ByValType, DL.getAllocaAddrSpace(), nullptr, Alignment,
@@ -1601,7 +1609,7 @@ static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap,
return;
auto CallSiteCount = PSI ? PSI->getProfileCount(TheCall, CallerBFI) : None;
int64_t CallCount =
- std::min(CallSiteCount.getValueOr(0), CalleeEntryCount.getCount());
+ std::min(CallSiteCount.value_or(0), CalleeEntryCount.getCount());
updateProfileCallee(Callee, -CallCount, &VMap);
}
@@ -1609,7 +1617,7 @@ void llvm::updateProfileCallee(
Function *Callee, int64_t EntryDelta,
const ValueMap<const Value *, WeakTrackingVH> *VMap) {
auto CalleeCount = Callee->getEntryCount();
- if (!CalleeCount.hasValue())
+ if (!CalleeCount)
return;
const uint64_t PriorEntryCount = CalleeCount->getCount();
@@ -1789,6 +1797,13 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
BasicBlock *OrigBB = CB.getParent();
Function *Caller = OrigBB->getParent();
+ // Do not inline strictfp function into non-strictfp one. It would require
+ // conversion of all FP operations in host function to constrained intrinsics.
+ if (CalledFunc->getAttributes().hasFnAttr(Attribute::StrictFP) &&
+ !Caller->getAttributes().hasFnAttr(Attribute::StrictFP)) {
+ return InlineResult::failure("incompatible strictfp attributes");
+ }
+
// GC poses two hazards to inlining, which only occur when the callee has GC:
// 1. If the caller has no GC, then the callee's GC must be propagated to the
// caller.
@@ -2644,7 +2659,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
AssumptionCache *AC =
IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr;
auto &DL = Caller->getParent()->getDataLayout();
- if (Value *V = SimplifyInstruction(PHI, {DL, nullptr, nullptr, AC})) {
+ if (Value *V = simplifyInstruction(PHI, {DL, nullptr, nullptr, AC})) {
PHI->replaceAllUsesWith(V);
PHI->eraseFromParent();
}
diff --git a/llvm/lib/Transforms/Utils/IntegerDivision.cpp b/llvm/lib/Transforms/Utils/IntegerDivision.cpp
index 9082049c82da..47ab30f03d14 100644
--- a/llvm/lib/Transforms/Utils/IntegerDivision.cpp
+++ b/llvm/lib/Transforms/Utils/IntegerDivision.cpp
@@ -18,7 +18,6 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
-#include <utility>
using namespace llvm;
diff --git a/llvm/lib/Transforms/Utils/LCSSA.cpp b/llvm/lib/Transforms/Utils/LCSSA.cpp
index 72b864dc3e48..84d377d835f3 100644
--- a/llvm/lib/Transforms/Utils/LCSSA.cpp
+++ b/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -33,14 +33,13 @@
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
-#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
diff --git a/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
index 6958a89f5be6..6e87da9fb168 100644
--- a/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
+++ b/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
@@ -30,14 +30,12 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index 9a10535c9310..b203259db1c6 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -29,7 +29,6 @@
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -63,9 +62,7 @@
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
-#include "llvm/IR/PseudoProbe.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
@@ -80,7 +77,6 @@
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
#include <cassert>
-#include <climits>
#include <cstdint>
#include <iterator>
#include <map>
@@ -489,7 +485,7 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
if (auto *FPI = dyn_cast<ConstrainedFPIntrinsic>(I)) {
Optional<fp::ExceptionBehavior> ExBehavior = FPI->getExceptionBehavior();
- return ExBehavior.getValue() != fp::ebStrict;
+ return *ExBehavior != fp::ebStrict;
}
}
@@ -504,15 +500,12 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
if (isMathLibCallNoop(Call, TLI))
return true;
- // To express possible interaction with floating point environment constrained
- // intrinsics are described as if they access memory. So they look like having
- // side effect but actually do not have it unless they raise floating point
- // exception. If FP exceptions are ignored, the intrinsic may be deleted.
- if (auto *CI = dyn_cast<ConstrainedFPIntrinsic>(I)) {
- Optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();
- if (!EB || *EB == fp::ExceptionBehavior::ebIgnore)
- return true;
- }
+ // Non-volatile atomic loads from constants can be removed.
+ if (auto *LI = dyn_cast<LoadInst>(I))
+ if (auto *GV = dyn_cast<GlobalVariable>(
+ LI->getPointerOperand()->stripPointerCasts()))
+ if (!LI->isVolatile() && GV->isConstant())
+ return true;
return false;
}
@@ -682,7 +675,7 @@ simplifyAndDCEInstruction(Instruction *I,
return true;
}
- if (Value *SimpleV = SimplifyInstruction(I, DL)) {
+ if (Value *SimpleV = simplifyInstruction(I, DL)) {
// Add the users to the worklist. CAREFUL: an instruction can use itself,
// in the case of a phi node.
for (User *U : I->users()) {
@@ -1133,7 +1126,7 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
// If there is more than one pred of succ, and there are PHI nodes in
// the successor, then we need to add incoming edges for the PHI nodes
//
- const PredBlockVector BBPreds(pred_begin(BB), pred_end(BB));
+ const PredBlockVector BBPreds(predecessors(BB));
// Loop over all of the PHI nodes in the successor of BB.
for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
@@ -1393,7 +1386,7 @@ Align llvm::getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign,
static bool PhiHasDebugValue(DILocalVariable *DIVar,
DIExpression *DIExpr,
PHINode *APN) {
- // Since we can't guarantee that the original dbg.declare instrinsic
+ // Since we can't guarantee that the original dbg.declare intrinsic
// is removed by LowerDbgDeclare(), we need to make sure that we are
// not inserting the same dbg.value intrinsic over and over.
SmallVector<DbgValueInst *, 1> DbgValues;
@@ -1472,7 +1465,7 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
LLVM_DEBUG(dbgs() << "Failed to convert dbg.declare to dbg.value: "
<< *DII << '\n');
// For now, when there is a store to parts of the variable (but we do not
- // know which part) we insert an dbg.value instrinsic to indicate that we
+ // know which part) we insert an dbg.value intrinsic to indicate that we
// know nothing about the variable's content.
DV = UndefValue::get(DV->getType());
Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc, SI);
@@ -2240,6 +2233,7 @@ BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI,
II->setDebugLoc(CI->getDebugLoc());
II->setCallingConv(CI->getCallingConv());
II->setAttributes(CI->getAttributes());
+ II->setMetadata(LLVMContext::MD_prof, CI->getMetadata(LLVMContext::MD_prof));
if (DTU)
DTU->applyUpdates({{DominatorTree::Insert, BB, UnwindEdge}});
@@ -2349,19 +2343,42 @@ static bool markAliveBlocks(Function &F,
isa<UndefValue>(Callee)) {
changeToUnreachable(II, false, DTU);
Changed = true;
- } else if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(&F)) {
- if (II->use_empty() && II->onlyReadsMemory()) {
- // jump to the normal destination branch.
- BasicBlock *NormalDestBB = II->getNormalDest();
- BasicBlock *UnwindDestBB = II->getUnwindDest();
- BranchInst::Create(NormalDestBB, II);
- UnwindDestBB->removePredecessor(II->getParent());
- II->eraseFromParent();
+ } else {
+ if (II->doesNotReturn() &&
+ !isa<UnreachableInst>(II->getNormalDest()->front())) {
+ // If we found an invoke of a no-return function,
+ // create a new empty basic block with an `unreachable` terminator,
+ // and set it as the normal destination for the invoke,
+ // unless that is already the case.
+ // Note that the original normal destination could have other uses.
+ BasicBlock *OrigNormalDest = II->getNormalDest();
+ OrigNormalDest->removePredecessor(II->getParent());
+ LLVMContext &Ctx = II->getContext();
+ BasicBlock *UnreachableNormalDest = BasicBlock::Create(
+ Ctx, OrigNormalDest->getName() + ".unreachable",
+ II->getFunction(), OrigNormalDest);
+ new UnreachableInst(Ctx, UnreachableNormalDest);
+ II->setNormalDest(UnreachableNormalDest);
if (DTU)
- DTU->applyUpdates({{DominatorTree::Delete, BB, UnwindDestBB}});
- } else
- changeToCall(II, DTU);
- Changed = true;
+ DTU->applyUpdates(
+ {{DominatorTree::Delete, BB, OrigNormalDest},
+ {DominatorTree::Insert, BB, UnreachableNormalDest}});
+ Changed = true;
+ }
+ if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(&F)) {
+ if (II->use_empty() && !II->mayHaveSideEffects()) {
+ // jump to the normal destination branch.
+ BasicBlock *NormalDestBB = II->getNormalDest();
+ BasicBlock *UnwindDestBB = II->getUnwindDest();
+ BranchInst::Create(NormalDestBB, II);
+ UnwindDestBB->removePredecessor(II->getParent());
+ II->eraseFromParent();
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Delete, BB, UnwindDestBB}});
+ } else
+ changeToCall(II, DTU);
+ Changed = true;
+ }
}
} else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Terminator)) {
// Remove catchpads which cannot be reached.
diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index 5b66da1e7082..f093fea19c4d 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -28,7 +28,6 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
-#include "llvm/IR/Metadata.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -38,12 +37,10 @@
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
-#include "llvm/Transforms/Utils/UnrollLoop.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
-#include <limits>
using namespace llvm;
using namespace llvm::PatternMatch;
@@ -389,6 +386,10 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
if (!PP.AllowPeeling)
return;
+ // Check that we can peel at least one iteration.
+ if (2 * LoopSize > Threshold)
+ return;
+
unsigned AlreadyPeeled = 0;
if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData))
AlreadyPeeled = *Peeled;
@@ -401,47 +402,45 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
// which every Phi is guaranteed to become an invariant, and try to peel the
// maximum number of iterations among these values, thus turning all those
// Phis into invariants.
- // First, check that we can peel at least one iteration.
- if (2 * LoopSize <= Threshold && UnrollPeelMaxCount > 0) {
- // Store the pre-calculated values here.
- SmallDenseMap<PHINode *, Optional<unsigned> > IterationsToInvariance;
- // Now go through all Phis to calculate their the number of iterations they
- // need to become invariants.
- // Start the max computation with the PP.PeelCount value set by the target
- // in TTI.getPeelingPreferences or by the flag -unroll-peel-count.
- unsigned DesiredPeelCount = TargetPeelCount;
- BasicBlock *BackEdge = L->getLoopLatch();
- assert(BackEdge && "Loop is not in simplified form?");
- for (auto BI = L->getHeader()->begin(); isa<PHINode>(&*BI); ++BI) {
- PHINode *Phi = cast<PHINode>(&*BI);
- auto ToInvariance = calculateIterationsToInvariance(
- Phi, L, BackEdge, IterationsToInvariance);
- if (ToInvariance)
- DesiredPeelCount = std::max(DesiredPeelCount, *ToInvariance);
- }
- // Pay respect to limitations implied by loop size and the max peel count.
- unsigned MaxPeelCount = UnrollPeelMaxCount;
- MaxPeelCount = std::min(MaxPeelCount, Threshold / LoopSize - 1);
-
- DesiredPeelCount = std::max(DesiredPeelCount,
- countToEliminateCompares(*L, MaxPeelCount, SE));
-
- if (DesiredPeelCount == 0)
- DesiredPeelCount = peelToTurnInvariantLoadsDerefencebale(*L, DT);
-
- if (DesiredPeelCount > 0) {
- DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount);
- // Consider max peel count limitation.
- assert(DesiredPeelCount > 0 && "Wrong loop size estimation?");
- if (DesiredPeelCount + AlreadyPeeled <= UnrollPeelMaxCount) {
- LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
- << " iteration(s) to turn"
- << " some Phis into invariants.\n");
- PP.PeelCount = DesiredPeelCount;
- PP.PeelProfiledIterations = false;
- return;
- }
+ // Store the pre-calculated values here.
+ SmallDenseMap<PHINode *, Optional<unsigned>> IterationsToInvariance;
+ // Now go through all Phis to calculate their the number of iterations they
+ // need to become invariants.
+ // Start the max computation with the PP.PeelCount value set by the target
+ // in TTI.getPeelingPreferences or by the flag -unroll-peel-count.
+ unsigned DesiredPeelCount = TargetPeelCount;
+ BasicBlock *BackEdge = L->getLoopLatch();
+ assert(BackEdge && "Loop is not in simplified form?");
+ for (auto BI = L->getHeader()->begin(); isa<PHINode>(&*BI); ++BI) {
+ PHINode *Phi = cast<PHINode>(&*BI);
+ auto ToInvariance = calculateIterationsToInvariance(Phi, L, BackEdge,
+ IterationsToInvariance);
+ if (ToInvariance)
+ DesiredPeelCount = std::max(DesiredPeelCount, *ToInvariance);
+ }
+
+ // Pay respect to limitations implied by loop size and the max peel count.
+ unsigned MaxPeelCount = UnrollPeelMaxCount;
+ MaxPeelCount = std::min(MaxPeelCount, Threshold / LoopSize - 1);
+
+ DesiredPeelCount = std::max(DesiredPeelCount,
+ countToEliminateCompares(*L, MaxPeelCount, SE));
+
+ if (DesiredPeelCount == 0)
+ DesiredPeelCount = peelToTurnInvariantLoadsDerefencebale(*L, DT);
+
+ if (DesiredPeelCount > 0) {
+ DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount);
+ // Consider max peel count limitation.
+ assert(DesiredPeelCount > 0 && "Wrong loop size estimation?");
+ if (DesiredPeelCount + AlreadyPeeled <= UnrollPeelMaxCount) {
+ LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
+ << " iteration(s) to turn"
+ << " some Phis into invariants.\n");
+ PP.PeelCount = DesiredPeelCount;
+ PP.PeelProfiledIterations = false;
+ return;
}
}
@@ -461,27 +460,26 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
if (L->getHeader()->getParent()->hasProfileData()) {
if (violatesLegacyMultiExitLoopCheck(L))
return;
- Optional<unsigned> PeelCount = getLoopEstimatedTripCount(L);
- if (!PeelCount)
+ Optional<unsigned> EstimatedTripCount = getLoopEstimatedTripCount(L);
+ if (!EstimatedTripCount)
return;
- LLVM_DEBUG(dbgs() << "Profile-based estimated trip count is " << *PeelCount
- << "\n");
+ LLVM_DEBUG(dbgs() << "Profile-based estimated trip count is "
+ << *EstimatedTripCount << "\n");
- if (*PeelCount) {
- if ((*PeelCount + AlreadyPeeled <= UnrollPeelMaxCount) &&
- (LoopSize * (*PeelCount + 1) <= Threshold)) {
- LLVM_DEBUG(dbgs() << "Peeling first " << *PeelCount
- << " iterations.\n");
- PP.PeelCount = *PeelCount;
+ if (*EstimatedTripCount) {
+ if (*EstimatedTripCount + AlreadyPeeled <= MaxPeelCount) {
+ unsigned PeelCount = *EstimatedTripCount;
+ LLVM_DEBUG(dbgs() << "Peeling first " << PeelCount << " iterations.\n");
+ PP.PeelCount = PeelCount;
return;
}
- LLVM_DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n");
LLVM_DEBUG(dbgs() << "Already peel count: " << AlreadyPeeled << "\n");
LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n");
- LLVM_DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1)
- << "\n");
+ LLVM_DEBUG(dbgs() << "Loop cost: " << LoopSize << "\n");
LLVM_DEBUG(dbgs() << "Max peel cost: " << Threshold << "\n");
+ LLVM_DEBUG(dbgs() << "Max peel count by cost: "
+ << (Threshold / LoopSize - 1) << "\n");
}
}
}
@@ -579,7 +577,8 @@ static void cloneLoopBlocks(
SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges,
SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT,
- LoopInfo *LI, ArrayRef<MDNode *> LoopLocalNoAliasDeclScopes) {
+ LoopInfo *LI, ArrayRef<MDNode *> LoopLocalNoAliasDeclScopes,
+ ScalarEvolution &SE) {
BasicBlock *Header = L->getHeader();
BasicBlock *Latch = L->getLoopLatch();
BasicBlock *PreHeader = L->getLoopPreheader();
@@ -685,6 +684,7 @@ static void cloneLoopBlocks(
if (LatchInst && L->contains(LatchInst))
LatchVal = VMap[LatchVal];
PHI.addIncoming(LatchVal, cast<BasicBlock>(VMap[Edge.first]));
+ SE.forgetValue(&PHI);
}
// LastValueMap is updated with the values for the current loop
@@ -719,9 +719,9 @@ TargetTransformInfo::PeelingPreferences llvm::gatherPeelingPreferences(
}
// User specifed values provided by argument.
- if (UserAllowPeeling.hasValue())
+ if (UserAllowPeeling)
PP.AllowPeeling = *UserAllowPeeling;
- if (UserAllowProfileBasedPeeling.hasValue())
+ if (UserAllowProfileBasedPeeling)
PP.PeelProfiledIterations = *UserAllowProfileBasedPeeling;
return PP;
@@ -851,7 +851,7 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks,
LoopBlocks, VMap, LVMap, &DT, LI,
- LoopLocalNoAliasDeclScopes);
+ LoopLocalNoAliasDeclScopes, *SE);
// Remap to use values from the current iteration instead of the
// previous one.
@@ -907,8 +907,10 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
// We modified the loop, update SE.
SE->forgetTopmostLoop(L);
+#ifdef EXPENSIVE_CHECKS
// Finally DomtTree must be correct.
assert(DT.verify(DominatorTree::VerificationLevel::Fast));
+#endif
// FIXME: Incrementally update loop-simplify
simplifyLoop(L, &DT, LI, SE, AC, nullptr, PreserveLCSSA);
diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
index c66fd7bb0588..0f33559c7e70 100644
--- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -13,31 +13,24 @@
#include "llvm/Transforms/Utils/LoopRotationUtils.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/DomTreeUpdater.h"
-#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
using namespace llvm;
@@ -317,7 +310,13 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
L->dump());
return Rotated;
}
- if (Metrics.NumInsts > MaxHeaderSize) {
+ if (!Metrics.NumInsts.isValid()) {
+ LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains instructions"
+ " with invalid cost: ";
+ L->dump());
+ return Rotated;
+ }
+ if (*Metrics.NumInsts.getValue() > MaxHeaderSize) {
LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains "
<< Metrics.NumInsts
<< " instructions, which is more than the threshold ("
@@ -446,7 +445,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// With the operands remapped, see if the instruction constant folds or is
// otherwise simplifyable. This commonly occurs because the entry from PHI
// nodes allows icmps and other instructions to fold.
- Value *V = SimplifyInstruction(C, SQ);
+ Value *V = simplifyInstruction(C, SQ);
if (V && LI->replacementPreservesLCSSAForm(C, V)) {
// If so, then delete the temporary instruction and stick the folded value
// in the map.
diff --git a/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/llvm/lib/Transforms/Utils/LoopSimplify.cpp
index 67311ab4cd02..55d5c733733b 100644
--- a/llvm/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/llvm/lib/Transforms/Utils/LoopSimplify.cpp
@@ -40,8 +40,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/LoopSimplify.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -59,14 +57,11 @@
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -181,7 +176,7 @@ static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT,
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
PHINode *PN = cast<PHINode>(I);
++I;
- if (Value *V = SimplifyInstruction(PN, {DL, nullptr, DT, AC})) {
+ if (Value *V = simplifyInstruction(PN, {DL, nullptr, DT, AC})) {
// This is a degenerate PHI already, don't modify it!
PN->replaceAllUsesWith(V);
PN->eraseFromParent();
@@ -602,7 +597,7 @@ ReprocessLoop:
PHINode *PN;
for (BasicBlock::iterator I = L->getHeader()->begin();
(PN = dyn_cast<PHINode>(I++)); )
- if (Value *V = SimplifyInstruction(PN, {DL, nullptr, DT, AC})) {
+ if (Value *V = simplifyInstruction(PN, {DL, nullptr, DT, AC})) {
if (SE) SE->forgetValue(PN);
if (!PreserveLCSSA || LI->replacementPreservesLCSSAForm(PN, V)) {
PN->replaceAllUsesWith(V);
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 9ca1f4f44b97..1be1082002fc 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -236,7 +236,7 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
SmallVector<WeakTrackingVH, 16> DeadInsts;
for (BasicBlock *BB : L->getBlocks()) {
for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
- if (Value *V = SimplifyInstruction(&Inst, {DL, nullptr, DT, AC}))
+ if (Value *V = simplifyInstruction(&Inst, {DL, nullptr, DT, AC}))
if (LI->replacementPreservesLCSSAForm(&Inst, V))
Inst.replaceAllUsesWith(V);
if (isInstructionTriviallyDead(&Inst))
@@ -513,7 +513,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
if (const DILocation *DIL = I.getDebugLoc()) {
auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(ULO.Count);
if (NewDIL)
- I.setDebugLoc(NewDIL.getValue());
+ I.setDebugLoc(*NewDIL);
else
LLVM_DEBUG(dbgs()
<< "Failed to create new discriminator: "
diff --git a/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
index 6efaa012aeca..96485d15c75b 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
@@ -15,7 +15,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/Sequence.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -39,7 +38,6 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
@@ -358,7 +356,7 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
if (const DILocation *DIL = I.getDebugLoc()) {
auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(Count);
if (NewDIL)
- I.setDebugLoc(NewDIL.getValue());
+ I.setDebugLoc(*NewDIL);
else
LLVM_DEBUG(dbgs()
<< "Failed to create new discriminator: "
diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index bb719a499a4c..cd3b6c1a095a 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -20,20 +20,19 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/MDBuilder.h"
-#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -74,7 +73,8 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
BasicBlock *OriginalLoopLatchExit,
BasicBlock *PreHeader, BasicBlock *NewPreHeader,
ValueToValueMapTy &VMap, DominatorTree *DT,
- LoopInfo *LI, bool PreserveLCSSA) {
+ LoopInfo *LI, bool PreserveLCSSA,
+ ScalarEvolution &SE) {
// Loop structure should be the following:
// Preheader
// PrologHeader
@@ -134,6 +134,7 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
PN.setIncomingValueForBlock(NewPreHeader, NewPN);
else
PN.addIncoming(NewPN, PrologExit);
+ SE.forgetValue(&PN);
}
}
@@ -192,7 +193,8 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
BasicBlock *Exit, BasicBlock *PreHeader,
BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader,
ValueToValueMapTy &VMap, DominatorTree *DT,
- LoopInfo *LI, bool PreserveLCSSA) {
+ LoopInfo *LI, bool PreserveLCSSA,
+ ScalarEvolution &SE) {
BasicBlock *Latch = L->getLoopLatch();
assert(Latch && "Loop must have a latch");
BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]);
@@ -233,6 +235,7 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
// Add incoming PreHeader from branch around the Loop
PN.addIncoming(UndefValue::get(PN.getType()), PreHeader);
+ SE.forgetValue(&PN);
Value *V = PN.getIncomingValueForBlock(Latch);
Instruction *I = dyn_cast<Instruction>(V);
@@ -398,7 +401,7 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder,
Optional<MDNode *> NewLoopID = makeFollowupLoopID(
LoopID, {LLVMLoopUnrollFollowupAll, LLVMLoopUnrollFollowupRemainder});
- if (NewLoopID.hasValue()) {
+ if (NewLoopID) {
NewLoop->setLoopID(NewLoopID.getValue());
// Do not setLoopAlreadyUnrolled if loop attributes have been defined
@@ -739,11 +742,28 @@ bool llvm::UnrollRuntimeLoopRemainder(
// Compute the number of extra iterations required, which is:
// extra iterations = run-time trip count % loop unroll factor
PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
+ IRBuilder<> B(PreHeaderBR);
Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
PreHeaderBR);
- Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(),
- PreHeaderBR);
- IRBuilder<> B(PreHeaderBR);
+ Value *BECount;
+ // If there are other exits before the latch, that may cause the latch exit
+ // branch to never be executed, and the latch exit count may be poison.
+ // In this case, freeze the TripCount and base BECount on the frozen
+ // TripCount. We will introduce two branches using these values, and it's
+ // important that they see a consistent value (which would not be guaranteed
+ // if were frozen independently.)
+ if ((!OtherExits.empty() || !SE->loopHasNoAbnormalExits(L)) &&
+ !isGuaranteedNotToBeUndefOrPoison(TripCount, AC, PreHeaderBR, DT)) {
+ TripCount = B.CreateFreeze(TripCount);
+ BECount =
+ B.CreateAdd(TripCount, ConstantInt::get(TripCount->getType(), -1));
+ } else {
+ // If we don't need to freeze, use SCEVExpander for BECount as well, to
+ // allow slightly better value reuse.
+ BECount =
+ Expander.expandCodeFor(BECountSC, BECountSC->getType(), PreHeaderBR);
+ }
+
Value * const ModVal = CreateTripRemainder(B, BECount, TripCount, Count);
Value *BranchVal =
@@ -884,9 +904,8 @@ bool llvm::UnrollRuntimeLoopRemainder(
if (UseEpilogRemainder) {
// Connect the epilog code to the original loop and update the
// PHI functions.
- ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader,
- EpilogPreHeader, NewPreHeader, VMap, DT, LI,
- PreserveLCSSA);
+ ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader, EpilogPreHeader,
+ NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE);
// Update counter in loop for unrolling.
// Use an incrementing IV. Pre-incr/post-incr is backedge/trip count.
@@ -910,7 +929,7 @@ bool llvm::UnrollRuntimeLoopRemainder(
// Connect the prolog code to the original loop and update the
// PHI functions.
ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader,
- NewPreHeader, VMap, DT, LI, PreserveLCSSA);
+ NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE);
}
// If this loop is nested, then the loop unroller changes the code in the any
@@ -941,7 +960,7 @@ bool llvm::UnrollRuntimeLoopRemainder(
SmallVector<WeakTrackingVH, 16> DeadInsts;
for (BasicBlock *BB : RemainderBlocks) {
for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
- if (Value *V = SimplifyInstruction(&Inst, {DL, nullptr, DT, AC}))
+ if (Value *V = simplifyInstruction(&Inst, {DL, nullptr, DT, AC}))
if (LI->replacementPreservesLCSSAForm(&Inst, V))
Inst.replaceAllUsesWith(V);
if (isInstructionTriviallyDead(&Inst))
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 95db2fe8d310..ec898c463574 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -23,31 +23,25 @@
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstSimplifyFolder.h"
-#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
-#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
@@ -260,10 +254,10 @@ llvm::getOptionalElementCountLoopAttribute(const Loop *TheLoop) {
Optional<int> Width =
getOptionalIntLoopAttribute(TheLoop, "llvm.loop.vectorize.width");
- if (Width.hasValue()) {
+ if (Width) {
Optional<int> IsScalable = getOptionalIntLoopAttribute(
TheLoop, "llvm.loop.vectorize.scalable.enable");
- return ElementCount::get(*Width, IsScalable.getValueOr(false));
+ return ElementCount::get(*Width, IsScalable.value_or(false));
}
return None;
@@ -364,7 +358,7 @@ TransformationMode llvm::hasUnrollTransformation(const Loop *L) {
Optional<int> Count =
getOptionalIntLoopAttribute(L, "llvm.loop.unroll.count");
- if (Count.hasValue())
+ if (Count)
return Count.getValue() == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
if (getBooleanLoopAttribute(L, "llvm.loop.unroll.enable"))
@@ -385,7 +379,7 @@ TransformationMode llvm::hasUnrollAndJamTransformation(const Loop *L) {
Optional<int> Count =
getOptionalIntLoopAttribute(L, "llvm.loop.unroll_and_jam.count");
- if (Count.hasValue())
+ if (Count)
return Count.getValue() == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.enable"))
@@ -497,9 +491,11 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
if (SE)
SE->forgetLoop(L);
- auto *OldBr = dyn_cast<BranchInst>(Preheader->getTerminator());
- assert(OldBr && "Preheader must end with a branch");
- assert(OldBr->isUnconditional() && "Preheader must have a single successor");
+ Instruction *OldTerm = Preheader->getTerminator();
+ assert(!OldTerm->mayHaveSideEffects() &&
+ "Preheader must end with a side-effect-free terminator");
+ assert(OldTerm->getNumSuccessors() == 1 &&
+ "Preheader must have a single successor");
// Connect the preheader to the exit block. Keep the old edge to the header
// around to perform the dominator tree update in two separate steps
// -- #1 insertion of the edge preheader -> exit and #2 deletion of the edge
@@ -525,7 +521,7 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
// coming to this inner loop, this will break the outer loop structure (by
// deleting the backedge of the outer loop). If the outer loop is indeed a
// non-loop, it will be deleted in a future iteration of loop deletion pass.
- IRBuilder<> Builder(OldBr);
+ IRBuilder<> Builder(OldTerm);
auto *ExitBlock = L->getUniqueExitBlock();
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
@@ -535,7 +531,7 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock);
// Remove the old branch. The conditional branch becomes a new terminator.
- OldBr->eraseFromParent();
+ OldTerm->eraseFromParent();
// Rewrite phis in the exit block to get their inputs from the Preheader
// instead of the exiting block.
@@ -579,7 +575,7 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
assert(L->hasNoExitBlocks() &&
"Loop should have either zero or one exit blocks.");
- Builder.SetInsertPoint(OldBr);
+ Builder.SetInsertPoint(OldTerm);
Builder.CreateUnreachable();
Preheader->getTerminator()->eraseFromParent();
}
@@ -692,18 +688,12 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
}
}
-static Loop *getOutermostLoop(Loop *L) {
- while (Loop *Parent = L->getParentLoop())
- L = Parent;
- return L;
-}
-
void llvm::breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
LoopInfo &LI, MemorySSA *MSSA) {
auto *Latch = L->getLoopLatch();
assert(Latch && "multiple latches not yet supported");
auto *Header = L->getHeader();
- Loop *OutermostLoop = getOutermostLoop(L);
+ Loop *OutermostLoop = L->getOutermostLoop();
SE.forgetLoop(L);
@@ -1103,7 +1093,8 @@ Value *llvm::createOrderedReduction(IRBuilderBase &B,
return B.CreateFAddReduce(Start, Src);
}
-void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) {
+void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue,
+ bool IncludeWrapFlags) {
auto *VecOp = dyn_cast<Instruction>(I);
if (!VecOp)
return;
@@ -1112,7 +1103,7 @@ void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) {
if (!Intersection)
return;
const unsigned Opcode = Intersection->getOpcode();
- VecOp->copyIRFlags(Intersection);
+ VecOp->copyIRFlags(Intersection, IncludeWrapFlags);
for (auto *V : VL) {
auto *Instr = dyn_cast<Instruction>(V);
if (!Instr)
@@ -1536,6 +1527,11 @@ static PointerBounds expandBounds(const RuntimeCheckingPtrGroup *CG,
LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc);
End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc);
+ if (CG->NeedsFreeze) {
+ IRBuilder<> Builder(Loc);
+ Start = Builder.CreateFreeze(Start, Start->getName() + ".fr");
+ End = Builder.CreateFreeze(End, End->getName() + ".fr");
+ }
LLVM_DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High << "\n");
return {Start, End};
}
@@ -1614,6 +1610,45 @@ Value *llvm::addRuntimeChecks(
return MemoryRuntimeCheck;
}
+Value *llvm::addDiffRuntimeChecks(
+ Instruction *Loc, Loop *TheLoop, ArrayRef<PointerDiffInfo> Checks,
+ SCEVExpander &Expander,
+ function_ref<Value *(IRBuilderBase &, unsigned)> GetVF, unsigned IC) {
+
+ LLVMContext &Ctx = Loc->getContext();
+ IRBuilder<InstSimplifyFolder> ChkBuilder(Ctx,
+ Loc->getModule()->getDataLayout());
+ ChkBuilder.SetInsertPoint(Loc);
+ // Our instructions might fold to a constant.
+ Value *MemoryRuntimeCheck = nullptr;
+
+ for (auto &C : Checks) {
+ Type *Ty = C.SinkStart->getType();
+ // Compute VF * IC * AccessSize.
+ auto *VFTimesUFTimesSize =
+ ChkBuilder.CreateMul(GetVF(ChkBuilder, Ty->getScalarSizeInBits()),
+ ConstantInt::get(Ty, IC * C.AccessSize));
+ Value *Sink = Expander.expandCodeFor(C.SinkStart, Ty, Loc);
+ Value *Src = Expander.expandCodeFor(C.SrcStart, Ty, Loc);
+ if (C.NeedsFreeze) {
+ IRBuilder<> Builder(Loc);
+ Sink = Builder.CreateFreeze(Sink, Sink->getName() + ".fr");
+ Src = Builder.CreateFreeze(Src, Src->getName() + ".fr");
+ }
+ Value *Diff = ChkBuilder.CreateSub(Sink, Src);
+ Value *IsConflict =
+ ChkBuilder.CreateICmpULT(Diff, VFTimesUFTimesSize, "diff.check");
+
+ if (MemoryRuntimeCheck) {
+ IsConflict =
+ ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
+ }
+ MemoryRuntimeCheck = IsConflict;
+ }
+
+ return MemoryRuntimeCheck;
+}
+
Optional<IVConditionInfo> llvm::hasPartialIVCondition(Loop &L,
unsigned MSSAThreshold,
MemorySSA &MSSA,
diff --git a/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/llvm/lib/Transforms/Utils/LoopVersioning.cpp
index f0bf625fa18e..97f29527bb95 100644
--- a/llvm/lib/Transforms/Utils/LoopVersioning.cpp
+++ b/llvm/lib/Transforms/Utils/LoopVersioning.cpp
@@ -41,9 +41,8 @@ LoopVersioning::LoopVersioning(const LoopAccessInfo &LAI,
ArrayRef<RuntimePointerCheck> Checks, Loop *L,
LoopInfo *LI, DominatorTree *DT,
ScalarEvolution *SE)
- : VersionedLoop(L), NonVersionedLoop(nullptr),
- AliasChecks(Checks.begin(), Checks.end()),
- Preds(LAI.getPSE().getUnionPredicate()), LAI(LAI), LI(LI), DT(DT),
+ : VersionedLoop(L), AliasChecks(Checks.begin(), Checks.end()),
+ Preds(LAI.getPSE().getPredicate()), LAI(LAI), LI(LI), DT(DT),
SE(SE) {
}
@@ -277,7 +276,7 @@ bool runImpl(LoopInfo *LI, function_ref<const LoopAccessInfo &(Loop &)> GetLAA,
const LoopAccessInfo &LAI = GetLAA(*L);
if (!LAI.hasConvergentOp() &&
(LAI.getNumRuntimePointerChecks() ||
- !LAI.getPSE().getUnionPredicate().isAlwaysTrue())) {
+ !LAI.getPSE().getPredicate().isAlwaysTrue())) {
LoopVersioning LVer(LAI, LAI.getRuntimePointerChecking()->getChecks(), L,
LI, DT, SE);
LVer.versionLoop();
diff --git a/llvm/lib/Transforms/Utils/LowerAtomic.cpp b/llvm/lib/Transforms/Utils/LowerAtomic.cpp
new file mode 100644
index 000000000000..8641581c8039
--- /dev/null
+++ b/llvm/lib/Transforms/Utils/LowerAtomic.cpp
@@ -0,0 +1,93 @@
+//===- LowerAtomic.cpp - Lower atomic intrinsics --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers atomic intrinsics to non-atomic form for use in a known
+// non-preemptible environment.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/LowerAtomic.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "loweratomic"
+
+bool llvm::lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) {
+ IRBuilder<> Builder(CXI);
+ Value *Ptr = CXI->getPointerOperand();
+ Value *Cmp = CXI->getCompareOperand();
+ Value *Val = CXI->getNewValOperand();
+
+ LoadInst *Orig = Builder.CreateLoad(Val->getType(), Ptr);
+ Value *Equal = Builder.CreateICmpEQ(Orig, Cmp);
+ Value *Res = Builder.CreateSelect(Equal, Val, Orig);
+ Builder.CreateStore(Res, Ptr);
+
+ Res = Builder.CreateInsertValue(UndefValue::get(CXI->getType()), Orig, 0);
+ Res = Builder.CreateInsertValue(Res, Equal, 1);
+
+ CXI->replaceAllUsesWith(Res);
+ CXI->eraseFromParent();
+ return true;
+}
+
+Value *llvm::buildAtomicRMWValue(AtomicRMWInst::BinOp Op,
+ IRBuilderBase &Builder, Value *Loaded,
+ Value *Inc) {
+ Value *NewVal;
+ switch (Op) {
+ case AtomicRMWInst::Xchg:
+ return Inc;
+ case AtomicRMWInst::Add:
+ return Builder.CreateAdd(Loaded, Inc, "new");
+ case AtomicRMWInst::Sub:
+ return Builder.CreateSub(Loaded, Inc, "new");
+ case AtomicRMWInst::And:
+ return Builder.CreateAnd(Loaded, Inc, "new");
+ case AtomicRMWInst::Nand:
+ return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new");
+ case AtomicRMWInst::Or:
+ return Builder.CreateOr(Loaded, Inc, "new");
+ case AtomicRMWInst::Xor:
+ return Builder.CreateXor(Loaded, Inc, "new");
+ case AtomicRMWInst::Max:
+ NewVal = Builder.CreateICmpSGT(Loaded, Inc);
+ return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
+ case AtomicRMWInst::Min:
+ NewVal = Builder.CreateICmpSLE(Loaded, Inc);
+ return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
+ case AtomicRMWInst::UMax:
+ NewVal = Builder.CreateICmpUGT(Loaded, Inc);
+ return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
+ case AtomicRMWInst::UMin:
+ NewVal = Builder.CreateICmpULE(Loaded, Inc);
+ return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
+ case AtomicRMWInst::FAdd:
+ return Builder.CreateFAdd(Loaded, Inc, "new");
+ case AtomicRMWInst::FSub:
+ return Builder.CreateFSub(Loaded, Inc, "new");
+ default:
+ llvm_unreachable("Unknown atomic op");
+ }
+}
+
+bool llvm::lowerAtomicRMWInst(AtomicRMWInst *RMWI) {
+ IRBuilder<> Builder(RMWI);
+ Value *Ptr = RMWI->getPointerOperand();
+ Value *Val = RMWI->getValOperand();
+
+ LoadInst *Orig = Builder.CreateLoad(Val->getType(), Ptr);
+ Value *Res = buildAtomicRMWValue(RMWI->getOperation(), Builder, Orig, Val);
+ Builder.CreateStore(Res, Ptr);
+ RMWI->replaceAllUsesWith(Orig);
+ RMWI->eraseFromParent();
+ return true;
+}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp b/llvm/lib/Transforms/Utils/LowerGlobalDtors.cpp
index ca6f3f194645..010deb77a883 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp
+++ b/llvm/lib/Transforms/Utils/LowerGlobalDtors.cpp
@@ -1,4 +1,4 @@
-//===-- WebAssemblyLowerGlobalDtors.cpp - Lower @llvm.global_dtors --------===//
+//===-- LowerGlobalDtors.cpp - Lower @llvm.global_dtors -------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -9,33 +9,31 @@
/// \file
/// Lower @llvm.global_dtors.
///
-/// WebAssembly doesn't have a builtin way to invoke static destructors.
/// Implement @llvm.global_dtors by creating wrapper functions that are
/// registered in @llvm.global_ctors and which contain a call to
/// `__cxa_atexit` to register their destructor functions.
///
//===----------------------------------------------------------------------===//
-#include "WebAssembly.h"
-#include "llvm/ADT/MapVector.h"
+#include "llvm/Transforms/Utils/LowerGlobalDtors.h"
+
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <map>
using namespace llvm;
-#define DEBUG_TYPE "wasm-lower-global-dtors"
+#define DEBUG_TYPE "lower-global-dtors"
namespace {
-class LowerGlobalDtors final : public ModulePass {
+class LowerGlobalDtorsLegacyPass final : public ModulePass {
StringRef getPassName() const override {
- return "WebAssembly Lower @llvm.global_dtors";
+ return "Lower @llvm.global_dtors via `__cxa_atexit`";
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -47,21 +45,35 @@ class LowerGlobalDtors final : public ModulePass {
public:
static char ID;
- LowerGlobalDtors() : ModulePass(ID) {}
+ LowerGlobalDtorsLegacyPass() : ModulePass(ID) {
+ initializeLowerGlobalDtorsLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
};
} // End anonymous namespace
-char LowerGlobalDtors::ID = 0;
-INITIALIZE_PASS(LowerGlobalDtors, DEBUG_TYPE,
- "Lower @llvm.global_dtors for WebAssembly", false, false)
+char LowerGlobalDtorsLegacyPass::ID = 0;
+INITIALIZE_PASS(LowerGlobalDtorsLegacyPass, DEBUG_TYPE,
+ "Lower @llvm.global_dtors via `__cxa_atexit`", false, false)
-ModulePass *llvm::createWebAssemblyLowerGlobalDtors() {
- return new LowerGlobalDtors();
+ModulePass *llvm::createLowerGlobalDtorsLegacyPass() {
+ return new LowerGlobalDtorsLegacyPass();
}
-bool LowerGlobalDtors::runOnModule(Module &M) {
- LLVM_DEBUG(dbgs() << "********** Lower Global Destructors **********\n");
+static bool runImpl(Module &M);
+bool LowerGlobalDtorsLegacyPass::runOnModule(Module &M) { return runImpl(M); }
+
+PreservedAnalyses LowerGlobalDtorsPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ bool Changed = runImpl(M);
+ if (!Changed)
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
+
+static bool runImpl(Module &M) {
GlobalVariable *GV = M.getGlobalVariable("llvm.global_dtors");
if (!GV || !GV->hasInitializer())
return false;
@@ -129,15 +141,14 @@ bool LowerGlobalDtors::runOnModule(Module &M) {
/*isVarArg=*/false));
// Declare __dso_local.
- Constant *DsoHandle = M.getNamedValue("__dso_handle");
- if (!DsoHandle) {
- Type *DsoHandleTy = Type::getInt8Ty(C);
- GlobalVariable *Handle = new GlobalVariable(
- M, DsoHandleTy, /*isConstant=*/true,
- GlobalVariable::ExternalWeakLinkage, nullptr, "__dso_handle");
- Handle->setVisibility(GlobalVariable::HiddenVisibility);
- DsoHandle = Handle;
- }
+ Type *DsoHandleTy = Type::getInt8Ty(C);
+ Constant *DsoHandle = M.getOrInsertGlobal("__dso_handle", DsoHandleTy, [&] {
+ auto *GV = new GlobalVariable(M, DsoHandleTy, /*isConstant=*/true,
+ GlobalVariable::ExternalWeakLinkage, nullptr,
+ "__dso_handle");
+ GV->setVisibility(GlobalVariable::HiddenVisibility);
+ return GV;
+ });
// For each unique priority level and associated symbol, generate a function
// to call all the destructors at that level, and a function to register the
diff --git a/llvm/lib/Transforms/Utils/LowerInvoke.cpp b/llvm/lib/Transforms/Utils/LowerInvoke.cpp
index fe0ff5899d8f..59cfa41fb7fd 100644
--- a/llvm/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/llvm/lib/Transforms/Utils/LowerInvoke.cpp
@@ -17,8 +17,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils.h"
diff --git a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
index 3d75dd57456d..b4acb1b2ae90 100644
--- a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
+++ b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
@@ -7,9 +7,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
+#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
using namespace llvm;
@@ -18,7 +20,9 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
Value *DstAddr, ConstantInt *CopyLen,
Align SrcAlign, Align DstAlign,
bool SrcIsVolatile, bool DstIsVolatile,
- const TargetTransformInfo &TTI) {
+ bool CanOverlap,
+ const TargetTransformInfo &TTI,
+ Optional<uint32_t> AtomicElementSize) {
// No need to expand zero length copies.
if (CopyLen->isZero())
return;
@@ -28,15 +32,25 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
Function *ParentFunc = PreLoopBB->getParent();
LLVMContext &Ctx = PreLoopBB->getContext();
const DataLayout &DL = ParentFunc->getParent()->getDataLayout();
+ MDBuilder MDB(Ctx);
+ MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("MemCopyDomain");
+ StringRef Name = "MemCopyAliasScope";
+ MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name);
unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
Type *TypeOfCopyLen = CopyLen->getType();
Type *LoopOpType = TTI.getMemcpyLoopLoweringType(
- Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value());
+ Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value(),
+ AtomicElementSize);
+ assert((!AtomicElementSize || !LoopOpType->isVectorTy()) &&
+ "Atomic memcpy lowering is not supported for vector operand type");
unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType);
+ assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) &&
+ "Atomic memcpy lowering is not supported for selected operand size");
+
uint64_t LoopEndCount = CopyLen->getZExtValue() / LoopOpSize;
if (LoopEndCount != 0) {
@@ -68,12 +82,25 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
// Loop Body
Value *SrcGEP =
LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex);
- Value *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP,
- PartSrcAlign, SrcIsVolatile);
+ LoadInst *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP,
+ PartSrcAlign, SrcIsVolatile);
+ if (!CanOverlap) {
+ // Set alias scope for loads.
+ Load->setMetadata(LLVMContext::MD_alias_scope,
+ MDNode::get(Ctx, NewScope));
+ }
Value *DstGEP =
LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex);
- LoopBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile);
-
+ StoreInst *Store = LoopBuilder.CreateAlignedStore(
+ Load, DstGEP, PartDstAlign, DstIsVolatile);
+ if (!CanOverlap) {
+ // Indicate that stores don't overlap loads.
+ Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope));
+ }
+ if (AtomicElementSize) {
+ Load->setAtomic(AtomicOrdering::Unordered);
+ Store->setAtomic(AtomicOrdering::Unordered);
+ }
Value *NewIndex =
LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1U));
LoopIndex->addIncoming(NewIndex, LoopBB);
@@ -93,7 +120,7 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
SmallVector<Type *, 5> RemainingOps;
TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes,
SrcAS, DstAS, SrcAlign.value(),
- DstAlign.value());
+ DstAlign.value(), AtomicElementSize);
for (auto OpTy : RemainingOps) {
Align PartSrcAlign(commonAlignment(SrcAlign, BytesCopied));
@@ -101,6 +128,10 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
// Calaculate the new index
unsigned OperandSize = DL.getTypeStoreSize(OpTy);
+ assert(
+ (!AtomicElementSize || OperandSize % *AtomicElementSize == 0) &&
+ "Atomic memcpy lowering is not supported for selected operand size");
+
uint64_t GepIndex = BytesCopied / OperandSize;
assert(GepIndex * OperandSize == BytesCopied &&
"Division should have no Remainder!");
@@ -111,9 +142,13 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
: RBuilder.CreateBitCast(SrcAddr, SrcPtrType);
Value *SrcGEP = RBuilder.CreateInBoundsGEP(
OpTy, CastedSrc, ConstantInt::get(TypeOfCopyLen, GepIndex));
- Value *Load =
+ LoadInst *Load =
RBuilder.CreateAlignedLoad(OpTy, SrcGEP, PartSrcAlign, SrcIsVolatile);
-
+ if (!CanOverlap) {
+ // Set alias scope for loads.
+ Load->setMetadata(LLVMContext::MD_alias_scope,
+ MDNode::get(Ctx, NewScope));
+ }
// Cast destination to operand type and store.
PointerType *DstPtrType = PointerType::get(OpTy, DstAS);
Value *CastedDst = DstAddr->getType() == DstPtrType
@@ -121,8 +156,16 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
: RBuilder.CreateBitCast(DstAddr, DstPtrType);
Value *DstGEP = RBuilder.CreateInBoundsGEP(
OpTy, CastedDst, ConstantInt::get(TypeOfCopyLen, GepIndex));
- RBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile);
-
+ StoreInst *Store = RBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign,
+ DstIsVolatile);
+ if (!CanOverlap) {
+ // Indicate that stores don't overlap loads.
+ Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope));
+ }
+ if (AtomicElementSize) {
+ Load->setAtomic(AtomicOrdering::Unordered);
+ Store->setAtomic(AtomicOrdering::Unordered);
+ }
BytesCopied += OperandSize;
}
}
@@ -134,8 +177,9 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore,
Value *SrcAddr, Value *DstAddr,
Value *CopyLen, Align SrcAlign,
Align DstAlign, bool SrcIsVolatile,
- bool DstIsVolatile,
- const TargetTransformInfo &TTI) {
+ bool DstIsVolatile, bool CanOverlap,
+ const TargetTransformInfo &TTI,
+ Optional<uint32_t> AtomicElementSize) {
BasicBlock *PreLoopBB = InsertBefore->getParent();
BasicBlock *PostLoopBB =
PreLoopBB->splitBasicBlock(InsertBefore, "post-loop-memcpy-expansion");
@@ -143,12 +187,22 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore,
Function *ParentFunc = PreLoopBB->getParent();
const DataLayout &DL = ParentFunc->getParent()->getDataLayout();
LLVMContext &Ctx = PreLoopBB->getContext();
+ MDBuilder MDB(Ctx);
+ MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("MemCopyDomain");
+ StringRef Name = "MemCopyAliasScope";
+ MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name);
+
unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
Type *LoopOpType = TTI.getMemcpyLoopLoweringType(
- Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value());
+ Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value(),
+ AtomicElementSize);
+ assert((!AtomicElementSize || !LoopOpType->isVectorTy()) &&
+ "Atomic memcpy lowering is not supported for vector operand type");
unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType);
+ assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) &&
+ "Atomic memcpy lowering is not supported for selected operand size");
IRBuilder<> PLBuilder(PreLoopBB->getTerminator());
@@ -183,19 +237,40 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore,
LoopIndex->addIncoming(ConstantInt::get(CopyLenType, 0U), PreLoopBB);
Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex);
- Value *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP, PartSrcAlign,
- SrcIsVolatile);
+ LoadInst *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP,
+ PartSrcAlign, SrcIsVolatile);
+ if (!CanOverlap) {
+ // Set alias scope for loads.
+ Load->setMetadata(LLVMContext::MD_alias_scope, MDNode::get(Ctx, NewScope));
+ }
Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex);
- LoopBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile);
-
+ StoreInst *Store =
+ LoopBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile);
+ if (!CanOverlap) {
+ // Indicate that stores don't overlap loads.
+ Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope));
+ }
+ if (AtomicElementSize) {
+ Load->setAtomic(AtomicOrdering::Unordered);
+ Store->setAtomic(AtomicOrdering::Unordered);
+ }
Value *NewIndex =
LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLenType, 1U));
LoopIndex->addIncoming(NewIndex, LoopBB);
- if (!LoopOpIsInt8) {
- // Add in the
- Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize);
- Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual);
+ bool requiresResidual =
+ !LoopOpIsInt8 && !(AtomicElementSize && LoopOpSize == AtomicElementSize);
+ if (requiresResidual) {
+ Type *ResLoopOpType = AtomicElementSize
+ ? Type::getIntNTy(Ctx, *AtomicElementSize * 8)
+ : Int8Type;
+ unsigned ResLoopOpSize = DL.getTypeStoreSize(ResLoopOpType);
+ assert((ResLoopOpSize == AtomicElementSize ? *AtomicElementSize : 1) &&
+ "Store size is expected to match type size");
+
+ // Add in the
+ Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize);
+ Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual);
// Loop body for the residual copy.
BasicBlock *ResLoopBB = BasicBlock::Create(Ctx, "loop-memcpy-residual",
@@ -230,21 +305,34 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore,
ResBuilder.CreatePHI(CopyLenType, 2, "residual-loop-index");
ResidualIndex->addIncoming(Zero, ResHeaderBB);
- Value *SrcAsInt8 =
- ResBuilder.CreateBitCast(SrcAddr, PointerType::get(Int8Type, SrcAS));
- Value *DstAsInt8 =
- ResBuilder.CreateBitCast(DstAddr, PointerType::get(Int8Type, DstAS));
+ Value *SrcAsResLoopOpType = ResBuilder.CreateBitCast(
+ SrcAddr, PointerType::get(ResLoopOpType, SrcAS));
+ Value *DstAsResLoopOpType = ResBuilder.CreateBitCast(
+ DstAddr, PointerType::get(ResLoopOpType, DstAS));
Value *FullOffset = ResBuilder.CreateAdd(RuntimeBytesCopied, ResidualIndex);
- Value *SrcGEP =
- ResBuilder.CreateInBoundsGEP(Int8Type, SrcAsInt8, FullOffset);
- Value *Load = ResBuilder.CreateAlignedLoad(Int8Type, SrcGEP, PartSrcAlign,
- SrcIsVolatile);
- Value *DstGEP =
- ResBuilder.CreateInBoundsGEP(Int8Type, DstAsInt8, FullOffset);
- ResBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile);
-
- Value *ResNewIndex =
- ResBuilder.CreateAdd(ResidualIndex, ConstantInt::get(CopyLenType, 1U));
+ Value *SrcGEP = ResBuilder.CreateInBoundsGEP(
+ ResLoopOpType, SrcAsResLoopOpType, FullOffset);
+ LoadInst *Load = ResBuilder.CreateAlignedLoad(ResLoopOpType, SrcGEP,
+ PartSrcAlign, SrcIsVolatile);
+ if (!CanOverlap) {
+ // Set alias scope for loads.
+ Load->setMetadata(LLVMContext::MD_alias_scope,
+ MDNode::get(Ctx, NewScope));
+ }
+ Value *DstGEP = ResBuilder.CreateInBoundsGEP(
+ ResLoopOpType, DstAsResLoopOpType, FullOffset);
+ StoreInst *Store = ResBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign,
+ DstIsVolatile);
+ if (!CanOverlap) {
+ // Indicate that stores don't overlap loads.
+ Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope));
+ }
+ if (AtomicElementSize) {
+ Load->setAtomic(AtomicOrdering::Unordered);
+ Store->setAtomic(AtomicOrdering::Unordered);
+ }
+ Value *ResNewIndex = ResBuilder.CreateAdd(
+ ResidualIndex, ConstantInt::get(CopyLenType, ResLoopOpSize));
ResidualIndex->addIncoming(ResNewIndex, ResLoopBB);
// Create the loop branch condition.
@@ -297,7 +385,13 @@ static void createMemMoveLoop(Instruction *InsertBefore, Value *SrcAddr,
Function *F = OrigBB->getParent();
const DataLayout &DL = F->getParent()->getDataLayout();
- Type *EltTy = SrcAddr->getType()->getPointerElementType();
+ // TODO: Use different element type if possible?
+ IRBuilder<> CastBuilder(InsertBefore);
+ Type *EltTy = CastBuilder.getInt8Ty();
+ Type *PtrTy =
+ CastBuilder.getInt8PtrTy(SrcAddr->getType()->getPointerAddressSpace());
+ SrcAddr = CastBuilder.CreateBitCast(SrcAddr, PtrTy);
+ DstAddr = CastBuilder.CreateBitCast(DstAddr, PtrTy);
// Create the a comparison of src and dst, based on which we jump to either
// the forward-copy part of the function (if src >= dst) or the backwards-copy
@@ -419,8 +513,21 @@ static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr,
NewBB);
}
+template <typename T>
+static bool canOverlap(MemTransferBase<T> *Memcpy, ScalarEvolution *SE) {
+ if (SE) {
+ auto *SrcSCEV = SE->getSCEV(Memcpy->getRawSource());
+ auto *DestSCEV = SE->getSCEV(Memcpy->getRawDest());
+ if (SE->isKnownPredicateAt(CmpInst::ICMP_NE, SrcSCEV, DestSCEV, Memcpy))
+ return false;
+ }
+ return true;
+}
+
void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy,
- const TargetTransformInfo &TTI) {
+ const TargetTransformInfo &TTI,
+ ScalarEvolution *SE) {
+ bool CanOverlap = canOverlap(Memcpy, SE);
if (ConstantInt *CI = dyn_cast<ConstantInt>(Memcpy->getLength())) {
createMemCpyLoopKnownSize(
/* InsertBefore */ Memcpy,
@@ -431,6 +538,7 @@ void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy,
/* DestAlign */ Memcpy->getDestAlign().valueOrOne(),
/* SrcIsVolatile */ Memcpy->isVolatile(),
/* DstIsVolatile */ Memcpy->isVolatile(),
+ /* CanOverlap */ CanOverlap,
/* TargetTransformInfo */ TTI);
} else {
createMemCpyLoopUnknownSize(
@@ -442,6 +550,7 @@ void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy,
/* DestAlign */ Memcpy->getDestAlign().valueOrOne(),
/* SrcIsVolatile */ Memcpy->isVolatile(),
/* DstIsVolatile */ Memcpy->isVolatile(),
+ /* CanOverlap */ CanOverlap,
/* TargetTransformInfo */ TTI);
}
}
@@ -465,3 +574,35 @@ void llvm::expandMemSetAsLoop(MemSetInst *Memset) {
/* Alignment */ Memset->getDestAlign().valueOrOne(),
Memset->isVolatile());
}
+
+void llvm::expandAtomicMemCpyAsLoop(AtomicMemCpyInst *AtomicMemcpy,
+ const TargetTransformInfo &TTI,
+ ScalarEvolution *SE) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(AtomicMemcpy->getLength())) {
+ createMemCpyLoopKnownSize(
+ /* InsertBefore */ AtomicMemcpy,
+ /* SrcAddr */ AtomicMemcpy->getRawSource(),
+ /* DstAddr */ AtomicMemcpy->getRawDest(),
+ /* CopyLen */ CI,
+ /* SrcAlign */ AtomicMemcpy->getSourceAlign().valueOrOne(),
+ /* DestAlign */ AtomicMemcpy->getDestAlign().valueOrOne(),
+ /* SrcIsVolatile */ AtomicMemcpy->isVolatile(),
+ /* DstIsVolatile */ AtomicMemcpy->isVolatile(),
+ /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec.
+ /* TargetTransformInfo */ TTI,
+ /* AtomicCpySize */ AtomicMemcpy->getElementSizeInBytes());
+ } else {
+ createMemCpyLoopUnknownSize(
+ /* InsertBefore */ AtomicMemcpy,
+ /* SrcAddr */ AtomicMemcpy->getRawSource(),
+ /* DstAddr */ AtomicMemcpy->getRawDest(),
+ /* CopyLen */ AtomicMemcpy->getLength(),
+ /* SrcAlign */ AtomicMemcpy->getSourceAlign().valueOrOne(),
+ /* DestAlign */ AtomicMemcpy->getDestAlign().valueOrOne(),
+ /* SrcIsVolatile */ AtomicMemcpy->isVolatile(),
+ /* DstIsVolatile */ AtomicMemcpy->isVolatile(),
+ /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec.
+ /* TargetTransformInfo */ TTI,
+ /* AtomicCpySize */ AtomicMemcpy->getElementSizeInBytes());
+ }
+}
diff --git a/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/llvm/lib/Transforms/Utils/LowerSwitch.cpp
index aff9d1311688..44aeb26fadf9 100644
--- a/llvm/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/llvm/lib/Transforms/Utils/LowerSwitch.cpp
@@ -119,25 +119,27 @@ raw_ostream &operator<<(raw_ostream &O, const CaseVector &C) {
void FixPhis(
BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB,
const unsigned NumMergedCases = std::numeric_limits<unsigned>::max()) {
- for (BasicBlock::iterator I = SuccBB->begin(),
- IE = SuccBB->getFirstNonPHI()->getIterator();
- I != IE; ++I) {
- PHINode *PN = cast<PHINode>(I);
+ for (auto &I : SuccBB->phis()) {
+ PHINode *PN = cast<PHINode>(&I);
- // Only update the first occurrence.
+ // Only update the first occurrence if NewBB exists.
unsigned Idx = 0, E = PN->getNumIncomingValues();
unsigned LocalNumMergedCases = NumMergedCases;
- for (; Idx != E; ++Idx) {
+ for (; Idx != E && NewBB; ++Idx) {
if (PN->getIncomingBlock(Idx) == OrigBB) {
PN->setIncomingBlock(Idx, NewBB);
break;
}
}
+ // Skip the updated incoming block so that it will not be removed.
+ if (NewBB)
+ ++Idx;
+
// Remove additional occurrences coming from condensed cases and keep the
// number of incoming values equal to the number of branches to SuccBB.
SmallVector<unsigned, 8> Indices;
- for (++Idx; LocalNumMergedCases > 0 && Idx < E; ++Idx)
+ for (; LocalNumMergedCases > 0 && Idx < E; ++Idx)
if (PN->getIncomingBlock(Idx) == OrigBB) {
Indices.push_back(Idx);
LocalNumMergedCases--;
@@ -195,6 +197,13 @@ BasicBlock *NewLeafBlock(CaseRange &Leaf, Value *Val, ConstantInt *LowerBound,
BasicBlock *Succ = Leaf.BB;
BranchInst::Create(Succ, Default, Comp, NewLeaf);
+ // Update the PHI incoming value/block for the default.
+ for (auto &I : Default->phis()) {
+ PHINode *PN = cast<PHINode>(&I);
+ auto *V = PN->getIncomingValueForBlock(OrigBlock);
+ PN->addIncoming(V, NewLeaf);
+ }
+
// If there were any PHI nodes in this successor, rewrite one entry
// from OrigBlock to come from NewLeaf.
for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
@@ -494,19 +503,17 @@ void ProcessSwitchInst(SwitchInst *SI,
Val = SI->getCondition();
}
- // Create a new, empty default block so that the new hierarchy of
- // if-then statements go to this and the PHI nodes are happy.
- BasicBlock *NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault");
- F->getBasicBlockList().insert(Default->getIterator(), NewDefault);
- BranchInst::Create(Default, NewDefault);
-
BasicBlock *SwitchBlock =
SwitchConvert(Cases.begin(), Cases.end(), LowerBound, UpperBound, Val,
- OrigBlock, OrigBlock, NewDefault, UnreachableRanges);
-
- // If there are entries in any PHI nodes for the default edge, make sure
- // to update them as well.
- FixPhis(Default, OrigBlock, NewDefault);
+ OrigBlock, OrigBlock, Default, UnreachableRanges);
+
+ // We have added incoming values for newly-created predecessors in
+ // NewLeafBlock(). The only meaningful work we offload to FixPhis() is to
+ // remove the incoming values from OrigBlock. There might be a special case
+ // that SwitchBlock is the same as Default, under which the PHIs in Default
+ // are fixed inside SwitchConvert().
+ if (SwitchBlock != Default)
+ FixPhis(Default, OrigBlock, nullptr);
// Branch to our shiny new if-then stuff...
BranchInst::Create(SwitchBlock, OrigBlock);
diff --git a/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp b/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp
new file mode 100644
index 000000000000..a1029475cf1d
--- /dev/null
+++ b/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp
@@ -0,0 +1,195 @@
+//== MemoryTaggingSupport.cpp - helpers for memory tagging implementations ===//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares common infrastructure for HWAddressSanitizer and
+// Aarch64StackTagging.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/MemoryTaggingSupport.h"
+
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/IntrinsicInst.h"
+
+namespace llvm {
+namespace memtag {
+namespace {
+bool maybeReachableFromEachOther(const SmallVectorImpl<IntrinsicInst *> &Insts,
+ const DominatorTree *DT, const LoopInfo *LI,
+ size_t MaxLifetimes) {
+ // If we have too many lifetime ends, give up, as the algorithm below is N^2.
+ if (Insts.size() > MaxLifetimes)
+ return true;
+ for (size_t I = 0; I < Insts.size(); ++I) {
+ for (size_t J = 0; J < Insts.size(); ++J) {
+ if (I == J)
+ continue;
+ if (isPotentiallyReachable(Insts[I], Insts[J], nullptr, DT, LI))
+ return true;
+ }
+ }
+ return false;
+}
+} // namespace
+
+bool forAllReachableExits(const DominatorTree &DT, const PostDominatorTree &PDT,
+ const LoopInfo &LI, const Instruction *Start,
+ const SmallVectorImpl<IntrinsicInst *> &Ends,
+ const SmallVectorImpl<Instruction *> &RetVec,
+ llvm::function_ref<void(Instruction *)> Callback) {
+ if (Ends.size() == 1 && PDT.dominates(Ends[0], Start)) {
+ Callback(Ends[0]);
+ return true;
+ }
+ SmallPtrSet<BasicBlock *, 2> EndBlocks;
+ for (auto *End : Ends) {
+ EndBlocks.insert(End->getParent());
+ }
+ SmallVector<Instruction *, 8> ReachableRetVec;
+ unsigned NumCoveredExits = 0;
+ for (auto *RI : RetVec) {
+ if (!isPotentiallyReachable(Start, RI, nullptr, &DT, &LI))
+ continue;
+ ReachableRetVec.push_back(RI);
+ // If there is an end in the same basic block as the return, we know for
+ // sure that the return is covered. Otherwise, we can check whether there
+ // is a way to reach the RI from the start of the lifetime without passing
+ // through an end.
+ if (EndBlocks.count(RI->getParent()) > 0 ||
+ !isPotentiallyReachable(Start, RI, &EndBlocks, &DT, &LI)) {
+ ++NumCoveredExits;
+ }
+ }
+ // If there's a mix of covered and non-covered exits, just put the untag
+ // on exits, so we avoid the redundancy of untagging twice.
+ if (NumCoveredExits == ReachableRetVec.size()) {
+ for (auto *End : Ends)
+ Callback(End);
+ } else {
+ for (auto *RI : ReachableRetVec)
+ Callback(RI);
+ // We may have inserted untag outside of the lifetime interval.
+ // Signal the caller to remove the lifetime end call for this alloca.
+ return false;
+ }
+ return true;
+}
+
+bool isStandardLifetime(const SmallVectorImpl<IntrinsicInst *> &LifetimeStart,
+ const SmallVectorImpl<IntrinsicInst *> &LifetimeEnd,
+ const DominatorTree *DT, const LoopInfo *LI,
+ size_t MaxLifetimes) {
+ // An alloca that has exactly one start and end in every possible execution.
+ // If it has multiple ends, they have to be unreachable from each other, so
+ // at most one of them is actually used for each execution of the function.
+ return LifetimeStart.size() == 1 &&
+ (LifetimeEnd.size() == 1 ||
+ (LifetimeEnd.size() > 0 &&
+ !maybeReachableFromEachOther(LifetimeEnd, DT, LI, MaxLifetimes)));
+}
+
+Instruction *getUntagLocationIfFunctionExit(Instruction &Inst) {
+ if (isa<ReturnInst>(Inst)) {
+ if (CallInst *CI = Inst.getParent()->getTerminatingMustTailCall())
+ return CI;
+ return &Inst;
+ }
+ if (isa<ResumeInst, CleanupReturnInst>(Inst)) {
+ return &Inst;
+ }
+ return nullptr;
+}
+
+void StackInfoBuilder::visit(Instruction &Inst) {
+ if (CallInst *CI = dyn_cast<CallInst>(&Inst)) {
+ if (CI->canReturnTwice()) {
+ Info.CallsReturnTwice = true;
+ }
+ }
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
+ if (IsInterestingAlloca(*AI)) {
+ Info.AllocasToInstrument[AI].AI = AI;
+ }
+ return;
+ }
+ auto *II = dyn_cast<IntrinsicInst>(&Inst);
+ if (II && (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end)) {
+ AllocaInst *AI = findAllocaForValue(II->getArgOperand(1));
+ if (!AI) {
+ Info.UnrecognizedLifetimes.push_back(&Inst);
+ return;
+ }
+ if (!IsInterestingAlloca(*AI))
+ return;
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start)
+ Info.AllocasToInstrument[AI].LifetimeStart.push_back(II);
+ else
+ Info.AllocasToInstrument[AI].LifetimeEnd.push_back(II);
+ return;
+ }
+ if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&Inst)) {
+ for (Value *V : DVI->location_ops()) {
+ if (auto *AI = dyn_cast_or_null<AllocaInst>(V)) {
+ if (!IsInterestingAlloca(*AI))
+ continue;
+ AllocaInfo &AInfo = Info.AllocasToInstrument[AI];
+ auto &DVIVec = AInfo.DbgVariableIntrinsics;
+ if (DVIVec.empty() || DVIVec.back() != DVI)
+ DVIVec.push_back(DVI);
+ }
+ }
+ }
+ Instruction *ExitUntag = getUntagLocationIfFunctionExit(Inst);
+ if (ExitUntag)
+ Info.RetVec.push_back(ExitUntag);
+}
+
+uint64_t getAllocaSizeInBytes(const AllocaInst &AI) {
+ auto DL = AI.getModule()->getDataLayout();
+ return *AI.getAllocationSizeInBits(DL) / 8;
+}
+
+void alignAndPadAlloca(memtag::AllocaInfo &Info, llvm::Align Alignment) {
+ const Align NewAlignment = std::max(Info.AI->getAlign(), Alignment);
+ Info.AI->setAlignment(NewAlignment);
+ auto &Ctx = Info.AI->getFunction()->getContext();
+
+ uint64_t Size = getAllocaSizeInBytes(*Info.AI);
+ uint64_t AlignedSize = alignTo(Size, Alignment);
+ if (Size == AlignedSize)
+ return;
+
+ // Add padding to the alloca.
+ Type *AllocatedType =
+ Info.AI->isArrayAllocation()
+ ? ArrayType::get(
+ Info.AI->getAllocatedType(),
+ cast<ConstantInt>(Info.AI->getArraySize())->getZExtValue())
+ : Info.AI->getAllocatedType();
+ Type *PaddingType = ArrayType::get(Type::getInt8Ty(Ctx), AlignedSize - Size);
+ Type *TypeWithPadding = StructType::get(AllocatedType, PaddingType);
+ auto *NewAI =
+ new AllocaInst(TypeWithPadding, Info.AI->getType()->getAddressSpace(),
+ nullptr, "", Info.AI);
+ NewAI->takeName(Info.AI);
+ NewAI->setAlignment(Info.AI->getAlign());
+ NewAI->setUsedWithInAlloca(Info.AI->isUsedWithInAlloca());
+ NewAI->setSwiftError(Info.AI->isSwiftError());
+ NewAI->copyMetadata(*Info.AI);
+
+ auto *NewPtr = new BitCastInst(NewAI, Info.AI->getType(), "", Info.AI);
+ Info.AI->replaceAllUsesWith(NewPtr);
+ Info.AI->eraseFromParent();
+ Info.AI = NewAI;
+}
+
+} // namespace memtag
+} // namespace llvm
diff --git a/llvm/lib/Transforms/Utils/MisExpect.cpp b/llvm/lib/Transforms/Utils/MisExpect.cpp
new file mode 100644
index 000000000000..b73d68ebec7c
--- /dev/null
+++ b/llvm/lib/Transforms/Utils/MisExpect.cpp
@@ -0,0 +1,249 @@
+//===--- MisExpect.cpp - Check the use of llvm.expect with PGO data -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This contains code to emit warnings for potentially incorrect usage of the
+// llvm.expect intrinsic. This utility extracts the threshold values from
+// metadata associated with the instrumented Branch or Switch instruction. The
+// threshold values are then used to determine if a warning should be emmited.
+//
+// MisExpect's implementation relies on two assumptions about how branch weights
+// are managed in LLVM.
+//
+// 1) Frontend profiling weights are always in place before llvm.expect is
+// lowered in LowerExpectIntrinsic.cpp. Frontend based instrumentation therefore
+// needs to extract the branch weights and then compare them to the weights
+// being added by the llvm.expect intrinsic lowering.
+//
+// 2) Sampling and IR based profiles will *only* have branch weight metadata
+// before profiling data is consulted if they are from a lowered llvm.expect
+// intrinsic. These profiles thus always extract the expected weights and then
+// compare them to the weights collected during profiling to determine if a
+// diagnostic message is warranted.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/MisExpect.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FormatVariadic.h"
+#include <cstdint>
+#include <functional>
+#include <numeric>
+
+#define DEBUG_TYPE "misexpect"
+
+using namespace llvm;
+using namespace misexpect;
+
+namespace llvm {
+
+// Command line option to enable/disable the warning when profile data suggests
+// a mismatch with the use of the llvm.expect intrinsic
+static cl::opt<bool> PGOWarnMisExpect(
+ "pgo-warn-misexpect", cl::init(false), cl::Hidden,
+ cl::desc("Use this option to turn on/off "
+ "warnings about incorrect usage of llvm.expect intrinsics."));
+
+static cl::opt<unsigned> MisExpectTolerance(
+ "misexpect-tolerance", cl::init(0),
+ cl::desc("Prevents emiting diagnostics when profile counts are "
+ "within N% of the threshold.."));
+
+} // namespace llvm
+
+namespace {
+
+bool isMisExpectDiagEnabled(LLVMContext &Ctx) {
+ return PGOWarnMisExpect || Ctx.getMisExpectWarningRequested();
+}
+
+uint64_t getMisExpectTolerance(LLVMContext &Ctx) {
+ return std::max(static_cast<uint64_t>(MisExpectTolerance),
+ Ctx.getDiagnosticsMisExpectTolerance());
+}
+
+Instruction *getInstCondition(Instruction *I) {
+ assert(I != nullptr && "MisExpect target Instruction cannot be nullptr");
+ Instruction *Ret = nullptr;
+ if (auto *B = dyn_cast<BranchInst>(I)) {
+ Ret = dyn_cast<Instruction>(B->getCondition());
+ }
+ // TODO: Find a way to resolve condition location for switches
+ // Using the condition of the switch seems to often resolve to an earlier
+ // point in the program, i.e. the calculation of the switch condition, rather
+ // than the switch's location in the source code. Thus, we should use the
+ // instruction to get source code locations rather than the condition to
+ // improve diagnostic output, such as the caret. If the same problem exists
+ // for branch instructions, then we should remove this function and directly
+ // use the instruction
+ //
+ else if (auto *S = dyn_cast<SwitchInst>(I)) {
+ Ret = dyn_cast<Instruction>(S->getCondition());
+ }
+ return Ret ? Ret : I;
+}
+
+void emitMisexpectDiagnostic(Instruction *I, LLVMContext &Ctx,
+ uint64_t ProfCount, uint64_t TotalCount) {
+ double PercentageCorrect = (double)ProfCount / TotalCount;
+ auto PerString =
+ formatv("{0:P} ({1} / {2})", PercentageCorrect, ProfCount, TotalCount);
+ auto RemStr = formatv(
+ "Potential performance regression from use of the llvm.expect intrinsic: "
+ "Annotation was correct on {0} of profiled executions.",
+ PerString);
+ Twine Msg(PerString);
+ Instruction *Cond = getInstCondition(I);
+ if (isMisExpectDiagEnabled(Ctx))
+ Ctx.diagnose(DiagnosticInfoMisExpect(Cond, Msg));
+ OptimizationRemarkEmitter ORE(I->getParent()->getParent());
+ ORE.emit(OptimizationRemark(DEBUG_TYPE, "misexpect", Cond) << RemStr.str());
+}
+
+} // namespace
+
+namespace llvm {
+namespace misexpect {
+
+// Helper function to extract branch weights into a vector
+Optional<SmallVector<uint32_t, 4>> extractWeights(Instruction *I,
+ LLVMContext &Ctx) {
+ assert(I && "MisExpect::extractWeights given invalid pointer");
+
+ auto *ProfileData = I->getMetadata(LLVMContext::MD_prof);
+ if (!ProfileData)
+ return None;
+
+ unsigned NOps = ProfileData->getNumOperands();
+ if (NOps < 3)
+ return None;
+
+ auto *ProfDataName = dyn_cast<MDString>(ProfileData->getOperand(0));
+ if (!ProfDataName || !ProfDataName->getString().equals("branch_weights"))
+ return None;
+
+ SmallVector<uint32_t, 4> Weights(NOps - 1);
+ for (unsigned Idx = 1; Idx < NOps; Idx++) {
+ ConstantInt *Value =
+ mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(Idx));
+ uint32_t V = Value->getZExtValue();
+ Weights[Idx - 1] = V;
+ }
+
+ return Weights;
+}
+
+// TODO: when clang allows c++17, use std::clamp instead
+uint32_t clamp(uint64_t value, uint32_t low, uint32_t hi) {
+ if (value > hi)
+ return hi;
+ if (value < low)
+ return low;
+ return value;
+}
+
+void verifyMisExpect(Instruction &I, ArrayRef<uint32_t> RealWeights,
+ ArrayRef<uint32_t> ExpectedWeights) {
+ // To determine if we emit a diagnostic, we need to compare the branch weights
+ // from the profile to those added by the llvm.expect intrinsic.
+ // So first, we extract the "likely" and "unlikely" weights from
+ // ExpectedWeights And determine the correct weight in the profile to compare
+ // against.
+ uint64_t LikelyBranchWeight = 0,
+ UnlikelyBranchWeight = std::numeric_limits<uint32_t>::max();
+ size_t MaxIndex = 0;
+ for (size_t Idx = 0, End = ExpectedWeights.size(); Idx < End; Idx++) {
+ uint32_t V = ExpectedWeights[Idx];
+ if (LikelyBranchWeight < V) {
+ LikelyBranchWeight = V;
+ MaxIndex = Idx;
+ }
+ if (UnlikelyBranchWeight > V) {
+ UnlikelyBranchWeight = V;
+ }
+ }
+
+ const uint64_t ProfiledWeight = RealWeights[MaxIndex];
+ const uint64_t RealWeightsTotal =
+ std::accumulate(RealWeights.begin(), RealWeights.end(), (uint64_t)0,
+ std::plus<uint64_t>());
+ const uint64_t NumUnlikelyTargets = RealWeights.size() - 1;
+
+ uint64_t TotalBranchWeight =
+ LikelyBranchWeight + (UnlikelyBranchWeight * NumUnlikelyTargets);
+
+ // FIXME: When we've addressed sample profiling, restore the assertion
+ //
+ // We cannot calculate branch probability if either of these invariants aren't
+ // met. However, MisExpect diagnostics should not prevent code from compiling,
+ // so we simply forgo emitting diagnostics here, and return early.
+ if ((TotalBranchWeight == 0) || (TotalBranchWeight <= LikelyBranchWeight))
+ return;
+
+ // To determine our threshold value we need to obtain the branch probability
+ // for the weights added by llvm.expect and use that proportion to calculate
+ // our threshold based on the collected profile data.
+ auto LikelyProbablilty = BranchProbability::getBranchProbability(
+ LikelyBranchWeight, TotalBranchWeight);
+
+ uint64_t ScaledThreshold = LikelyProbablilty.scale(RealWeightsTotal);
+
+ // clamp tolerance range to [0, 100)
+ auto Tolerance = getMisExpectTolerance(I.getContext());
+ Tolerance = clamp(Tolerance, 0, 99);
+
+ // Allow users to relax checking by N% i.e., if they use a 5% tolerance,
+ // then we check against 0.95*ScaledThreshold
+ if (Tolerance > 0)
+ ScaledThreshold *= (1.0 - Tolerance / 100.0);
+
+ // When the profile weight is below the threshold, we emit the diagnostic
+ if (ProfiledWeight < ScaledThreshold)
+ emitMisexpectDiagnostic(&I, I.getContext(), ProfiledWeight,
+ RealWeightsTotal);
+}
+
+void checkBackendInstrumentation(Instruction &I,
+ const ArrayRef<uint32_t> RealWeights) {
+ auto ExpectedWeightsOpt = extractWeights(&I, I.getContext());
+ if (!ExpectedWeightsOpt)
+ return;
+ auto ExpectedWeights = ExpectedWeightsOpt.getValue();
+ verifyMisExpect(I, RealWeights, ExpectedWeights);
+}
+
+void checkFrontendInstrumentation(Instruction &I,
+ const ArrayRef<uint32_t> ExpectedWeights) {
+ auto RealWeightsOpt = extractWeights(&I, I.getContext());
+ if (!RealWeightsOpt)
+ return;
+ auto RealWeights = RealWeightsOpt.getValue();
+ verifyMisExpect(I, RealWeights, ExpectedWeights);
+}
+
+void checkExpectAnnotations(Instruction &I,
+ const ArrayRef<uint32_t> ExistingWeights,
+ bool IsFrontendInstr) {
+ if (IsFrontendInstr) {
+ checkFrontendInstrumentation(I, ExistingWeights);
+ } else {
+ checkBackendInstrumentation(I, ExistingWeights);
+ }
+}
+
+} // namespace misexpect
+} // namespace llvm
+#undef DEBUG_TYPE
diff --git a/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/llvm/lib/Transforms/Utils/ModuleUtils.cpp
index d6a6be2762c7..5120ade70e16 100644
--- a/llvm/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/llvm/lib/Transforms/Utils/ModuleUtils.cpp
@@ -11,7 +11,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/ModuleUtils.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
@@ -237,8 +236,8 @@ std::string llvm::getUniqueModuleId(Module *M) {
return ("." + Str).str();
}
-void VFABI::setVectorVariantNames(
- CallInst *CI, const SmallVector<std::string, 8> &VariantMappings) {
+void VFABI::setVectorVariantNames(CallInst *CI,
+ ArrayRef<std::string> VariantMappings) {
if (VariantMappings.empty())
return;
@@ -255,7 +254,7 @@ void VFABI::setVectorVariantNames(
for (const std::string &VariantMapping : VariantMappings) {
LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << VariantMapping << "'\n");
Optional<VFInfo> VI = VFABI::tryDemangleForVFABI(VariantMapping, *M);
- assert(VI.hasValue() && "Cannot add an invalid VFABI name.");
+ assert(VI && "Cannot add an invalid VFABI name.");
assert(M->getNamedValue(VI.getValue().VectorName) &&
"Cannot add variant to attribute: "
"vector function declaration is missing.");
@@ -266,14 +265,15 @@ void VFABI::setVectorVariantNames(
}
void llvm::embedBufferInModule(Module &M, MemoryBufferRef Buf,
- StringRef SectionName) {
- // Embed the buffer into the module.
+ StringRef SectionName, Align Alignment) {
+ // Embed the memory buffer into the module.
Constant *ModuleConstant = ConstantDataArray::get(
M.getContext(), makeArrayRef(Buf.getBufferStart(), Buf.getBufferSize()));
GlobalVariable *GV = new GlobalVariable(
M, ModuleConstant->getType(), true, GlobalValue::PrivateLinkage,
ModuleConstant, "llvm.embedded.object");
GV->setSection(SectionName);
+ GV->setAlignment(Alignment);
appendToCompilerUsed(M, GV);
}
diff --git a/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/llvm/lib/Transforms/Utils/PredicateInfo.cpp
index bd2b6fafdf2e..53334bc2a369 100644
--- a/llvm/lib/Transforms/Utils/PredicateInfo.cpp
+++ b/llvm/lib/Transforms/Utils/PredicateInfo.cpp
@@ -15,19 +15,12 @@
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/CFG.h"
#include "llvm/IR/AssemblyAnnotationWriter.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
-#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
@@ -35,7 +28,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/DebugCounter.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Transforms/Utils.h"
#include <algorithm>
#define DEBUG_TYPE "predicateinfo"
using namespace llvm;
diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 01b433b4782a..aff692b36288 100644
--- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -20,7 +20,6 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -32,7 +31,6 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DebugInfo.h"
-#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
@@ -68,7 +66,7 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) {
if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
// Note that atomic loads can be transformed; atomic semantics do
// not have any meaning for a local alloca.
- if (LI->isVolatile())
+ if (LI->isVolatile() || LI->getType() != AI->getAllocatedType())
return false;
} else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
if (SI->getValueOperand() == AI ||
@@ -678,7 +676,7 @@ void PromoteMem2Reg::run() {
A->eraseFromParent();
}
- // Remove alloca's dbg.declare instrinsics from the function.
+ // Remove alloca's dbg.declare intrinsics from the function.
for (auto &DbgUsers : AllocaDbgUsers) {
for (auto *DII : DbgUsers)
if (DII->isAddressOfVariable() || DII->getExpression()->startsWithDeref())
@@ -704,7 +702,7 @@ void PromoteMem2Reg::run() {
PHINode *PN = I->second;
// If this PHI node merges one value and/or undefs, get the value.
- if (Value *V = SimplifyInstruction(PN, SQ)) {
+ if (Value *V = simplifyInstruction(PN, SQ)) {
PN->replaceAllUsesWith(V);
PN->eraseFromParent();
NewPhiNodes.erase(I++);
diff --git a/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp b/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp
index 65207056a3f4..926427450682 100644
--- a/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp
+++ b/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp
@@ -18,9 +18,6 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
using namespace llvm;
@@ -38,11 +35,13 @@ static bool shouldConvertToRelLookupTable(Module &M, GlobalVariable &GV) {
GetElementPtrInst *GEP =
dyn_cast<GetElementPtrInst>(GV.use_begin()->getUser());
- if (!GEP || !GEP->hasOneUse())
+ if (!GEP || !GEP->hasOneUse() ||
+ GV.getValueType() != GEP->getSourceElementType())
return false;
LoadInst *Load = dyn_cast<LoadInst>(GEP->use_begin()->getUser());
- if (!Load || !Load->hasOneUse())
+ if (!Load || !Load->hasOneUse() ||
+ Load->getType() != GEP->getResultElementType())
return false;
// If the original lookup table does not have local linkage and is
@@ -144,7 +143,7 @@ static void convertToRelLookupTable(GlobalVariable &LookupTable) {
Value *Offset =
Builder.CreateShl(Index, ConstantInt::get(IntTy, 2), "reltable.shift");
- // Insert the call to load.relative instrinsic before LOAD.
+ // Insert the call to load.relative intrinsic before LOAD.
// GEP might not be immediately followed by a LOAD, like it can be hoisted
// outside the loop or another instruction might be inserted them in between.
Builder.SetInsertPoint(Load);
@@ -171,13 +170,17 @@ static void convertToRelLookupTable(GlobalVariable &LookupTable) {
// Convert lookup tables to relative lookup tables in the module.
static bool convertToRelativeLookupTables(
Module &M, function_ref<TargetTransformInfo &(Function &)> GetTTI) {
- Module::iterator FI = M.begin();
- if (FI == M.end())
- return false;
+ for (Function &F : M) {
+ if (F.isDeclaration())
+ continue;
- // Check if we have a target that supports relative lookup tables.
- if (!GetTTI(*FI).shouldBuildRelLookupTables())
- return false;
+ // Check if we have a target that supports relative lookup tables.
+ if (!GetTTI(F).shouldBuildRelLookupTables())
+ return false;
+
+ // We assume that the result is independent of the checked function.
+ break;
+ }
bool Changed = false;
diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
index d7e8eaf677c6..eee91e70292e 100644
--- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp
+++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
@@ -15,14 +15,12 @@
#include "llvm/Transforms/Utils/SCCPSolver.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
+#include "llvm/Analysis/ValueLattice.h"
+#include "llvm/IR/InstVisitor.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
#include <utility>
#include <vector>
@@ -452,7 +450,8 @@ public:
return TrackingIncomingArguments;
}
- void markArgInFuncSpecialization(Function *F, Argument *A, Constant *C);
+ void markArgInFuncSpecialization(Function *F,
+ const SmallVectorImpl<ArgInfo> &Args);
void markFunctionUnreachable(Function *F) {
for (auto &BB : *F)
@@ -526,29 +525,38 @@ Constant *SCCPInstVisitor::getConstant(const ValueLatticeElement &LV) const {
return nullptr;
}
-void SCCPInstVisitor::markArgInFuncSpecialization(Function *F, Argument *A,
- Constant *C) {
- assert(F->arg_size() == A->getParent()->arg_size() &&
+void SCCPInstVisitor::markArgInFuncSpecialization(
+ Function *F, const SmallVectorImpl<ArgInfo> &Args) {
+ assert(!Args.empty() && "Specialization without arguments");
+ assert(F->arg_size() == Args[0].Formal->getParent()->arg_size() &&
"Functions should have the same number of arguments");
- // Mark the argument constant in the new function.
- markConstant(A, C);
-
- // For the remaining arguments in the new function, copy the lattice state
- // over from the old function.
- for (auto I = F->arg_begin(), J = A->getParent()->arg_begin(),
- E = F->arg_end();
- I != E; ++I, ++J)
- if (J != A && ValueState.count(I)) {
+ auto Iter = Args.begin();
+ Argument *NewArg = F->arg_begin();
+ Argument *OldArg = Args[0].Formal->getParent()->arg_begin();
+ for (auto End = F->arg_end(); NewArg != End; ++NewArg, ++OldArg) {
+
+ LLVM_DEBUG(dbgs() << "SCCP: Marking argument "
+ << NewArg->getNameOrAsOperand() << "\n");
+
+ if (Iter != Args.end() && OldArg == Iter->Formal) {
+ // Mark the argument constants in the new function.
+ markConstant(NewArg, Iter->Actual);
+ ++Iter;
+ } else if (ValueState.count(OldArg)) {
+ // For the remaining arguments in the new function, copy the lattice state
+ // over from the old function.
+ //
// Note: This previously looked like this:
- // ValueState[J] = ValueState[I];
+ // ValueState[NewArg] = ValueState[OldArg];
// This is incorrect because the DenseMap class may resize the underlying
- // memory when inserting `J`, which will invalidate the reference to `I`.
- // Instead, we make sure `J` exists, then set it to `I` afterwards.
- auto &NewValue = ValueState[J];
- NewValue = ValueState[I];
- pushToWorkList(NewValue, J);
+ // memory when inserting `NewArg`, which will invalidate the reference to
+ // `OldArg`. Instead, we make sure `NewArg` exists before setting it.
+ auto &NewValue = ValueState[NewArg];
+ NewValue = ValueState[OldArg];
+ pushToWorkList(NewValue, NewArg);
}
+ }
}
void SCCPInstVisitor::visitInstruction(Instruction &I) {
@@ -988,7 +996,7 @@ void SCCPInstVisitor::visitBinaryOperator(Instruction &I) {
if ((V1State.isConstant() || V2State.isConstant())) {
Value *V1 = isConstant(V1State) ? getConstant(V1State) : I.getOperand(0);
Value *V2 = isConstant(V2State) ? getConstant(V2State) : I.getOperand(1);
- Value *R = SimplifyBinOp(I.getOpcode(), V1, V2, SimplifyQuery(DL));
+ Value *R = simplifyBinOp(I.getOpcode(), V1, V2, SimplifyQuery(DL));
auto *C = dyn_cast_or_null<Constant>(R);
if (C) {
// X op Y -> undef.
@@ -1287,17 +1295,6 @@ void SCCPInstVisitor::handleCallResult(CallBase &CB) {
return;
}
- // TODO: Actually filp MayIncludeUndef for the created range to false,
- // once most places in the optimizer respect the branches on
- // undef/poison are UB rule. The reason why the new range cannot be
- // undef is as follows below:
- // The new range is based on a branch condition. That guarantees that
- // neither of the compare operands can be undef in the branch targets,
- // unless we have conditions that are always true/false (e.g. icmp ule
- // i32, %a, i32_max). For the latter overdefined/empty range will be
- // inferred, but the branch will get folded accordingly anyways.
- bool MayIncludeUndef = !isa<PredicateAssume>(PI);
-
ValueLatticeElement CondVal = getValueState(OtherOp);
ValueLatticeElement &IV = ValueState[&CB];
if (CondVal.isConstantRange() || CopyOfVal.isConstantRange()) {
@@ -1322,9 +1319,15 @@ void SCCPInstVisitor::handleCallResult(CallBase &CB) {
if (!CopyOfCR.contains(NewCR) && CopyOfCR.getSingleMissingElement())
NewCR = CopyOfCR;
+ // The new range is based on a branch condition. That guarantees that
+ // neither of the compare operands can be undef in the branch targets,
+ // unless we have conditions that are always true/false (e.g. icmp ule
+ // i32, %a, i32_max). For the latter overdefined/empty range will be
+ // inferred, but the branch will get folded accordingly anyways.
addAdditionalUser(OtherOp, &CB);
- mergeInValue(IV, &CB,
- ValueLatticeElement::getRange(NewCR, MayIncludeUndef));
+ mergeInValue(
+ IV, &CB,
+ ValueLatticeElement::getRange(NewCR, /*MayIncludeUndef*/ false));
return;
} else if (Pred == CmpInst::ICMP_EQ && CondVal.isConstant()) {
// For non-integer values or integer constant expressions, only
@@ -1332,8 +1335,7 @@ void SCCPInstVisitor::handleCallResult(CallBase &CB) {
addAdditionalUser(OtherOp, &CB);
mergeInValue(IV, &CB, CondVal);
return;
- } else if (Pred == CmpInst::ICMP_NE && CondVal.isConstant() &&
- !MayIncludeUndef) {
+ } else if (Pred == CmpInst::ICMP_NE && CondVal.isConstant()) {
// Propagate inequalities.
addAdditionalUser(OtherOp, &CB);
mergeInValue(IV, &CB,
@@ -1442,22 +1444,19 @@ void SCCPInstVisitor::solve() {
}
}
-/// resolvedUndefsIn - While solving the dataflow for a function, we assume
-/// that branches on undef values cannot reach any of their successors.
-/// However, this is not a safe assumption. After we solve dataflow, this
-/// method should be use to handle this. If this returns true, the solver
-/// should be rerun.
+/// While solving the dataflow for a function, we don't compute a result for
+/// operations with an undef operand, to allow undef to be lowered to a
+/// constant later. For example, constant folding of "zext i8 undef to i16"
+/// would result in "i16 0", and if undef is later lowered to "i8 1", then the
+/// zext result would become "i16 1" and would result into an overdefined
+/// lattice value once merged with the previous result. Not computing the
+/// result of the zext (treating undef the same as unknown) allows us to handle
+/// a later undef->constant lowering more optimally.
///
-/// This method handles this by finding an unresolved branch and marking it one
-/// of the edges from the block as being feasible, even though the condition
-/// doesn't say it would otherwise be. This allows SCCP to find the rest of the
-/// CFG and only slightly pessimizes the analysis results (by marking one,
-/// potentially infeasible, edge feasible). This cannot usefully modify the
-/// constraints on the condition of the branch, as that would impact other users
-/// of the value.
-///
-/// This scan also checks for values that use undefs. It conservatively marks
-/// them as overdefined.
+/// However, if the operand remains undef when the solver returns, we do need
+/// to assign some result to the instruction (otherwise we would treat it as
+/// unreachable). For simplicity, we mark any instructions that are still
+/// unknown as overdefined.
bool SCCPInstVisitor::resolvedUndefsIn(Function &F) {
bool MadeChange = false;
for (BasicBlock &BB : F) {
@@ -1486,7 +1485,7 @@ bool SCCPInstVisitor::resolvedUndefsIn(Function &F) {
// more precise than this but it isn't worth bothering.
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
ValueLatticeElement &LV = getStructValueState(&I, i);
- if (LV.isUnknownOrUndef()) {
+ if (LV.isUnknown()) {
markOverdefined(LV, &I);
MadeChange = true;
}
@@ -1495,7 +1494,7 @@ bool SCCPInstVisitor::resolvedUndefsIn(Function &F) {
}
ValueLatticeElement &LV = getValueState(&I);
- if (!LV.isUnknownOrUndef())
+ if (!LV.isUnknown())
continue;
// There are two reasons a call can have an undef result
@@ -1518,91 +1517,6 @@ bool SCCPInstVisitor::resolvedUndefsIn(Function &F) {
markOverdefined(&I);
MadeChange = true;
}
-
- // Check to see if we have a branch or switch on an undefined value. If so
- // we force the branch to go one way or the other to make the successor
- // values live. It doesn't really matter which way we force it.
- Instruction *TI = BB.getTerminator();
- if (auto *BI = dyn_cast<BranchInst>(TI)) {
- if (!BI->isConditional())
- continue;
- if (!getValueState(BI->getCondition()).isUnknownOrUndef())
- continue;
-
- // If the input to SCCP is actually branch on undef, fix the undef to
- // false.
- if (isa<UndefValue>(BI->getCondition())) {
- BI->setCondition(ConstantInt::getFalse(BI->getContext()));
- markEdgeExecutable(&BB, TI->getSuccessor(1));
- MadeChange = true;
- continue;
- }
-
- // Otherwise, it is a branch on a symbolic value which is currently
- // considered to be undef. Make sure some edge is executable, so a
- // branch on "undef" always flows somewhere.
- // FIXME: Distinguish between dead code and an LLVM "undef" value.
- BasicBlock *DefaultSuccessor = TI->getSuccessor(1);
- if (markEdgeExecutable(&BB, DefaultSuccessor))
- MadeChange = true;
-
- continue;
- }
-
- if (auto *IBR = dyn_cast<IndirectBrInst>(TI)) {
- // Indirect branch with no successor ?. Its ok to assume it branches
- // to no target.
- if (IBR->getNumSuccessors() < 1)
- continue;
-
- if (!getValueState(IBR->getAddress()).isUnknownOrUndef())
- continue;
-
- // If the input to SCCP is actually branch on undef, fix the undef to
- // the first successor of the indirect branch.
- if (isa<UndefValue>(IBR->getAddress())) {
- IBR->setAddress(BlockAddress::get(IBR->getSuccessor(0)));
- markEdgeExecutable(&BB, IBR->getSuccessor(0));
- MadeChange = true;
- continue;
- }
-
- // Otherwise, it is a branch on a symbolic value which is currently
- // considered to be undef. Make sure some edge is executable, so a
- // branch on "undef" always flows somewhere.
- // FIXME: IndirectBr on "undef" doesn't actually need to go anywhere:
- // we can assume the branch has undefined behavior instead.
- BasicBlock *DefaultSuccessor = IBR->getSuccessor(0);
- if (markEdgeExecutable(&BB, DefaultSuccessor))
- MadeChange = true;
-
- continue;
- }
-
- if (auto *SI = dyn_cast<SwitchInst>(TI)) {
- if (!SI->getNumCases() ||
- !getValueState(SI->getCondition()).isUnknownOrUndef())
- continue;
-
- // If the input to SCCP is actually switch on undef, fix the undef to
- // the first constant.
- if (isa<UndefValue>(SI->getCondition())) {
- SI->setCondition(SI->case_begin()->getCaseValue());
- markEdgeExecutable(&BB, SI->case_begin()->getCaseSuccessor());
- MadeChange = true;
- continue;
- }
-
- // Otherwise, it is a branch on a symbolic value which is currently
- // considered to be undef. Make sure some edge is executable, so a
- // branch on "undef" always flows somewhere.
- // FIXME: Distinguish between dead code and an LLVM "undef" value.
- BasicBlock *DefaultSuccessor = SI->case_begin()->getCaseSuccessor();
- if (markEdgeExecutable(&BB, DefaultSuccessor))
- MadeChange = true;
-
- continue;
- }
}
return MadeChange;
@@ -1618,7 +1532,7 @@ SCCPSolver::SCCPSolver(
LLVMContext &Ctx)
: Visitor(new SCCPInstVisitor(DL, std::move(GetTLI), Ctx)) {}
-SCCPSolver::~SCCPSolver() {}
+SCCPSolver::~SCCPSolver() = default;
void SCCPSolver::addAnalysis(Function &F, AnalysisResultsForFn A) {
return Visitor->addAnalysis(F, std::move(A));
@@ -1713,9 +1627,9 @@ SmallPtrSetImpl<Function *> &SCCPSolver::getArgumentTrackedFunctions() {
return Visitor->getArgumentTrackedFunctions();
}
-void SCCPSolver::markArgInFuncSpecialization(Function *F, Argument *A,
- Constant *C) {
- Visitor->markArgInFuncSpecialization(F, A, C);
+void SCCPSolver::markArgInFuncSpecialization(
+ Function *F, const SmallVectorImpl<ArgInfo> &Args) {
+ Visitor->markArgInFuncSpecialization(F, Args);
}
void SCCPSolver::markFunctionUnreachable(Function *F) {
diff --git a/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/llvm/lib/Transforms/Utils/SSAUpdater.cpp
index 7d9992176658..37019e3bf95b 100644
--- a/llvm/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/llvm/lib/Transforms/Utils/SSAUpdater.cpp
@@ -25,7 +25,6 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
-#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -166,7 +165,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
// See if the PHI node can be merged to a single value. This can happen in
// loop cases when we get a PHI of itself and one other value.
if (Value *V =
- SimplifyInstruction(InsertedPHI, BB->getModule()->getDataLayout())) {
+ simplifyInstruction(InsertedPHI, BB->getModule()->getDataLayout())) {
InsertedPHI->eraseFromParent();
return V;
}
diff --git a/llvm/lib/Transforms/Utils/SampleProfileInference.cpp b/llvm/lib/Transforms/Utils/SampleProfileInference.cpp
index 961adf2570a7..5e92b9852a9f 100644
--- a/llvm/lib/Transforms/Utils/SampleProfileInference.cpp
+++ b/llvm/lib/Transforms/Utils/SampleProfileInference.cpp
@@ -15,15 +15,46 @@
#include "llvm/Transforms/Utils/SampleProfileInference.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include <queue>
#include <set>
+#include <stack>
using namespace llvm;
#define DEBUG_TYPE "sample-profile-inference"
namespace {
+static cl::opt<bool> SampleProfileEvenCountDistribution(
+ "sample-profile-even-count-distribution", cl::init(true), cl::Hidden,
+ cl::desc("Try to evenly distribute counts when there are multiple equally "
+ "likely options."));
+
+static cl::opt<unsigned> SampleProfileMaxDfsCalls(
+ "sample-profile-max-dfs-calls", cl::init(10), cl::Hidden,
+ cl::desc("Maximum number of dfs iterations for even count distribution."));
+
+static cl::opt<unsigned> SampleProfileProfiCostInc(
+ "sample-profile-profi-cost-inc", cl::init(10), cl::Hidden,
+ cl::desc("A cost of increasing a block's count by one."));
+
+static cl::opt<unsigned> SampleProfileProfiCostDec(
+ "sample-profile-profi-cost-dec", cl::init(20), cl::Hidden,
+ cl::desc("A cost of decreasing a block's count by one."));
+
+static cl::opt<unsigned> SampleProfileProfiCostIncZero(
+ "sample-profile-profi-cost-inc-zero", cl::init(11), cl::Hidden,
+ cl::desc("A cost of increasing a count of zero-weight block by one."));
+
+static cl::opt<unsigned> SampleProfileProfiCostIncEntry(
+ "sample-profile-profi-cost-inc-entry", cl::init(40), cl::Hidden,
+ cl::desc("A cost of increasing the entry block's count by one."));
+
+static cl::opt<unsigned> SampleProfileProfiCostDecEntry(
+ "sample-profile-profi-cost-dec-entry", cl::init(10), cl::Hidden,
+ cl::desc("A cost of decreasing the entry block's count by one."));
+
/// A value indicating an infinite flow/capacity/weight of a block/edge.
/// Not using numeric_limits<int64_t>::max(), as the values can be summed up
/// during the execution.
@@ -52,16 +83,16 @@ public:
Nodes = std::vector<Node>(NodeCount);
Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>());
+ if (SampleProfileEvenCountDistribution)
+ AugmentingEdges =
+ std::vector<std::vector<Edge *>>(NodeCount, std::vector<Edge *>());
}
// Run the algorithm.
int64_t run() {
- // Find an augmenting path and update the flow along the path
- size_t AugmentationIters = 0;
- while (findAugmentingPath()) {
- augmentFlowAlongPath();
- AugmentationIters++;
- }
+ // Iteratively find an augmentation path/dag in the network and send the
+ // flow along its edges
+ size_t AugmentationIters = applyFlowAugmentation();
// Compute the total flow and its cost
int64_t TotalCost = 0;
@@ -79,6 +110,7 @@ public:
<< " iterations with " << TotalFlow << " total flow"
<< " of " << TotalCost << " cost\n");
(void)TotalFlow;
+ (void)AugmentationIters;
return TotalCost;
}
@@ -134,20 +166,61 @@ public:
return Flow;
}
- /// A cost of increasing a block's count by one.
- static constexpr int64_t AuxCostInc = 10;
- /// A cost of decreasing a block's count by one.
- static constexpr int64_t AuxCostDec = 20;
- /// A cost of increasing a count of zero-weight block by one.
- static constexpr int64_t AuxCostIncZero = 11;
- /// A cost of increasing the entry block's count by one.
- static constexpr int64_t AuxCostIncEntry = 40;
- /// A cost of decreasing the entry block's count by one.
- static constexpr int64_t AuxCostDecEntry = 10;
/// A cost of taking an unlikely jump.
static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 30;
+ /// Minimum BaseDistance for the jump distance values in island joining.
+ static constexpr uint64_t MinBaseDistance = 10000;
private:
+ /// Iteratively find an augmentation path/dag in the network and send the
+ /// flow along its edges. The method returns the number of applied iterations.
+ size_t applyFlowAugmentation() {
+ size_t AugmentationIters = 0;
+ while (findAugmentingPath()) {
+ uint64_t PathCapacity = computeAugmentingPathCapacity();
+ while (PathCapacity > 0) {
+ bool Progress = false;
+ if (SampleProfileEvenCountDistribution) {
+ // Identify node/edge candidates for augmentation
+ identifyShortestEdges(PathCapacity);
+
+ // Find an augmenting DAG
+ auto AugmentingOrder = findAugmentingDAG();
+
+ // Apply the DAG augmentation
+ Progress = augmentFlowAlongDAG(AugmentingOrder);
+ PathCapacity = computeAugmentingPathCapacity();
+ }
+
+ if (!Progress) {
+ augmentFlowAlongPath(PathCapacity);
+ PathCapacity = 0;
+ }
+
+ AugmentationIters++;
+ }
+ }
+ return AugmentationIters;
+ }
+
+ /// Compute the capacity of the cannonical augmenting path. If the path is
+ /// saturated (that is, no flow can be sent along the path), then return 0.
+ uint64_t computeAugmentingPathCapacity() {
+ uint64_t PathCapacity = INF;
+ uint64_t Now = Target;
+ while (Now != Source) {
+ uint64_t Pred = Nodes[Now].ParentNode;
+ auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
+
+ assert(Edge.Capacity >= Edge.Flow && "incorrect edge flow");
+ uint64_t EdgeCapacity = uint64_t(Edge.Capacity - Edge.Flow);
+ PathCapacity = std::min(PathCapacity, EdgeCapacity);
+
+ Now = Pred;
+ }
+ return PathCapacity;
+ }
+
/// Check for existence of an augmenting path with a positive capacity.
bool findAugmentingPath() {
// Initialize data structures
@@ -180,7 +253,7 @@ private:
// from Source to Target; it follows from inequalities
// Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target]
// >= Dist[Source, V]
- if (Nodes[Target].Distance == 0)
+ if (!SampleProfileEvenCountDistribution && Nodes[Target].Distance == 0)
break;
if (Nodes[Src].Distance > Nodes[Target].Distance)
continue;
@@ -210,21 +283,9 @@ private:
}
/// Update the current flow along the augmenting path.
- void augmentFlowAlongPath() {
- // Find path capacity
- int64_t PathCapacity = INF;
- uint64_t Now = Target;
- while (Now != Source) {
- uint64_t Pred = Nodes[Now].ParentNode;
- auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
- PathCapacity = std::min(PathCapacity, Edge.Capacity - Edge.Flow);
- Now = Pred;
- }
-
+ void augmentFlowAlongPath(uint64_t PathCapacity) {
assert(PathCapacity > 0 && "found an incorrect augmenting path");
-
- // Update the flow along the path
- Now = Target;
+ uint64_t Now = Target;
while (Now != Source) {
uint64_t Pred = Nodes[Now].ParentNode;
auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
@@ -237,6 +298,220 @@ private:
}
}
+ /// Find an Augmenting DAG order using a modified version of DFS in which we
+ /// can visit a node multiple times. In the DFS search, when scanning each
+ /// edge out of a node, continue search at Edge.Dst endpoint if it has not
+ /// been discovered yet and its NumCalls < MaxDfsCalls. The algorithm
+ /// runs in O(MaxDfsCalls * |Edges| + |Nodes|) time.
+ /// It returns an Augmenting Order (Taken nodes in decreasing Finish time)
+ /// that starts with Source and ends with Target.
+ std::vector<uint64_t> findAugmentingDAG() {
+ // We use a stack based implemenation of DFS to avoid recursion.
+ // Defining DFS data structures:
+ // A pair (NodeIdx, EdgeIdx) at the top of the Stack denotes that
+ // - we are currently visiting Nodes[NodeIdx] and
+ // - the next edge to scan is Edges[NodeIdx][EdgeIdx]
+ typedef std::pair<uint64_t, uint64_t> StackItemType;
+ std::stack<StackItemType> Stack;
+ std::vector<uint64_t> AugmentingOrder;
+
+ // Phase 0: Initialize Node attributes and Time for DFS run
+ for (auto &Node : Nodes) {
+ Node.Discovery = 0;
+ Node.Finish = 0;
+ Node.NumCalls = 0;
+ Node.Taken = false;
+ }
+ uint64_t Time = 0;
+ // Mark Target as Taken
+ // Taken attribute will be propagated backwards from Target towards Source
+ Nodes[Target].Taken = true;
+
+ // Phase 1: Start DFS traversal from Source
+ Stack.emplace(Source, 0);
+ Nodes[Source].Discovery = ++Time;
+ while (!Stack.empty()) {
+ auto NodeIdx = Stack.top().first;
+ auto EdgeIdx = Stack.top().second;
+
+ // If we haven't scanned all edges out of NodeIdx, continue scanning
+ if (EdgeIdx < Edges[NodeIdx].size()) {
+ auto &Edge = Edges[NodeIdx][EdgeIdx];
+ auto &Dst = Nodes[Edge.Dst];
+ Stack.top().second++;
+
+ if (Edge.OnShortestPath) {
+ // If we haven't seen Edge.Dst so far, continue DFS search there
+ if (Dst.Discovery == 0 && Dst.NumCalls < SampleProfileMaxDfsCalls) {
+ Dst.Discovery = ++Time;
+ Stack.emplace(Edge.Dst, 0);
+ Dst.NumCalls++;
+ } else if (Dst.Taken && Dst.Finish != 0) {
+ // Else, if Edge.Dst already have a path to Target, so that NodeIdx
+ Nodes[NodeIdx].Taken = true;
+ }
+ }
+ } else {
+ // If we are done scanning all edge out of NodeIdx
+ Stack.pop();
+ // If we haven't found a path from NodeIdx to Target, forget about it
+ if (!Nodes[NodeIdx].Taken) {
+ Nodes[NodeIdx].Discovery = 0;
+ } else {
+ // If we have found a path from NodeIdx to Target, then finish NodeIdx
+ // and propagate Taken flag to DFS parent unless at the Source
+ Nodes[NodeIdx].Finish = ++Time;
+ // NodeIdx == Source if and only if the stack is empty
+ if (NodeIdx != Source) {
+ assert(!Stack.empty() && "empty stack while running dfs");
+ Nodes[Stack.top().first].Taken = true;
+ }
+ AugmentingOrder.push_back(NodeIdx);
+ }
+ }
+ }
+ // Nodes are collected decreasing Finish time, so the order is reversed
+ std::reverse(AugmentingOrder.begin(), AugmentingOrder.end());
+
+ // Phase 2: Extract all forward (DAG) edges and fill in AugmentingEdges
+ for (size_t Src : AugmentingOrder) {
+ AugmentingEdges[Src].clear();
+ for (auto &Edge : Edges[Src]) {
+ uint64_t Dst = Edge.Dst;
+ if (Edge.OnShortestPath && Nodes[Src].Taken && Nodes[Dst].Taken &&
+ Nodes[Dst].Finish < Nodes[Src].Finish) {
+ AugmentingEdges[Src].push_back(&Edge);
+ }
+ }
+ assert((Src == Target || !AugmentingEdges[Src].empty()) &&
+ "incorrectly constructed augmenting edges");
+ }
+
+ return AugmentingOrder;
+ }
+
+ /// Update the current flow along the given (acyclic) subgraph specified by
+ /// the vertex order, AugmentingOrder. The objective is to send as much flow
+ /// as possible while evenly distributing flow among successors of each node.
+ /// After the update at least one edge is saturated.
+ bool augmentFlowAlongDAG(const std::vector<uint64_t> &AugmentingOrder) {
+ // Phase 0: Initialization
+ for (uint64_t Src : AugmentingOrder) {
+ Nodes[Src].FracFlow = 0;
+ Nodes[Src].IntFlow = 0;
+ for (auto &Edge : AugmentingEdges[Src]) {
+ Edge->AugmentedFlow = 0;
+ }
+ }
+
+ // Phase 1: Send a unit of fractional flow along the DAG
+ uint64_t MaxFlowAmount = INF;
+ Nodes[Source].FracFlow = 1.0;
+ for (uint64_t Src : AugmentingOrder) {
+ assert((Src == Target || Nodes[Src].FracFlow > 0.0) &&
+ "incorrectly computed fractional flow");
+ // Distribute flow evenly among successors of Src
+ uint64_t Degree = AugmentingEdges[Src].size();
+ for (auto &Edge : AugmentingEdges[Src]) {
+ double EdgeFlow = Nodes[Src].FracFlow / Degree;
+ Nodes[Edge->Dst].FracFlow += EdgeFlow;
+ if (Edge->Capacity == INF)
+ continue;
+ uint64_t MaxIntFlow = double(Edge->Capacity - Edge->Flow) / EdgeFlow;
+ MaxFlowAmount = std::min(MaxFlowAmount, MaxIntFlow);
+ }
+ }
+ // Stop early if we cannot send any (integral) flow from Source to Target
+ if (MaxFlowAmount == 0)
+ return false;
+
+ // Phase 2: Send an integral flow of MaxFlowAmount
+ Nodes[Source].IntFlow = MaxFlowAmount;
+ for (uint64_t Src : AugmentingOrder) {
+ if (Src == Target)
+ break;
+ // Distribute flow evenly among successors of Src, rounding up to make
+ // sure all flow is sent
+ uint64_t Degree = AugmentingEdges[Src].size();
+ // We are guaranteeed that Node[Src].IntFlow <= SuccFlow * Degree
+ uint64_t SuccFlow = (Nodes[Src].IntFlow + Degree - 1) / Degree;
+ for (auto &Edge : AugmentingEdges[Src]) {
+ uint64_t Dst = Edge->Dst;
+ uint64_t EdgeFlow = std::min(Nodes[Src].IntFlow, SuccFlow);
+ EdgeFlow = std::min(EdgeFlow, uint64_t(Edge->Capacity - Edge->Flow));
+ Nodes[Dst].IntFlow += EdgeFlow;
+ Nodes[Src].IntFlow -= EdgeFlow;
+ Edge->AugmentedFlow += EdgeFlow;
+ }
+ }
+ assert(Nodes[Target].IntFlow <= MaxFlowAmount);
+ Nodes[Target].IntFlow = 0;
+
+ // Phase 3: Send excess flow back traversing the nodes backwards.
+ // Because of rounding, not all flow can be sent along the edges of Src.
+ // Hence, sending the remaining flow back to maintain flow conservation
+ for (size_t Idx = AugmentingOrder.size() - 1; Idx > 0; Idx--) {
+ uint64_t Src = AugmentingOrder[Idx - 1];
+ // Try to send excess flow back along each edge.
+ // Make sure we only send back flow we just augmented (AugmentedFlow).
+ for (auto &Edge : AugmentingEdges[Src]) {
+ uint64_t Dst = Edge->Dst;
+ if (Nodes[Dst].IntFlow == 0)
+ continue;
+ uint64_t EdgeFlow = std::min(Nodes[Dst].IntFlow, Edge->AugmentedFlow);
+ Nodes[Dst].IntFlow -= EdgeFlow;
+ Nodes[Src].IntFlow += EdgeFlow;
+ Edge->AugmentedFlow -= EdgeFlow;
+ }
+ }
+
+ // Phase 4: Update flow values along all edges
+ bool HasSaturatedEdges = false;
+ for (uint64_t Src : AugmentingOrder) {
+ // Verify that we have sent all the excess flow from the node
+ assert(Src == Source || Nodes[Src].IntFlow == 0);
+ for (auto &Edge : AugmentingEdges[Src]) {
+ assert(uint64_t(Edge->Capacity - Edge->Flow) >= Edge->AugmentedFlow);
+ // Update flow values along the edge and its reverse copy
+ auto &RevEdge = Edges[Edge->Dst][Edge->RevEdgeIndex];
+ Edge->Flow += Edge->AugmentedFlow;
+ RevEdge.Flow -= Edge->AugmentedFlow;
+ if (Edge->Capacity == Edge->Flow && Edge->AugmentedFlow > 0)
+ HasSaturatedEdges = true;
+ }
+ }
+
+ // The augmentation is successful iff at least one edge becomes saturated
+ return HasSaturatedEdges;
+ }
+
+ /// Identify candidate (shortest) edges for augmentation.
+ void identifyShortestEdges(uint64_t PathCapacity) {
+ assert(PathCapacity > 0 && "found an incorrect augmenting DAG");
+ // To make sure the augmentation DAG contains only edges with large residual
+ // capacity, we prune all edges whose capacity is below a fraction of
+ // the capacity of the augmented path.
+ // (All edges of the path itself are always in the DAG)
+ uint64_t MinCapacity = std::max(PathCapacity / 2, uint64_t(1));
+
+ // Decide which edges are on a shortest path from Source to Target
+ for (size_t Src = 0; Src < Nodes.size(); Src++) {
+ // An edge cannot be augmenting if the endpoint has large distance
+ if (Nodes[Src].Distance > Nodes[Target].Distance)
+ continue;
+
+ for (auto &Edge : Edges[Src]) {
+ uint64_t Dst = Edge.Dst;
+ Edge.OnShortestPath =
+ Src != Target && Dst != Source &&
+ Nodes[Dst].Distance <= Nodes[Target].Distance &&
+ Nodes[Dst].Distance == Nodes[Src].Distance + Edge.Cost &&
+ Edge.Capacity > Edge.Flow &&
+ uint64_t(Edge.Capacity - Edge.Flow) >= MinCapacity;
+ }
+ }
+ }
+
/// A node in a flow network.
struct Node {
/// The cost of the cheapest path from the source to the current node.
@@ -247,7 +522,20 @@ private:
uint64_t ParentEdgeIndex;
/// An indicator of whether the current node is in a queue.
bool Taken;
+
+ /// Data fields utilized in DAG-augmentation:
+ /// Fractional flow.
+ double FracFlow;
+ /// Integral flow.
+ uint64_t IntFlow;
+ /// Discovery time.
+ uint64_t Discovery;
+ /// Finish time.
+ uint64_t Finish;
+ /// NumCalls.
+ uint64_t NumCalls;
};
+
/// An edge in a flow network.
struct Edge {
/// The cost of the edge.
@@ -260,6 +548,12 @@ private:
uint64_t Dst;
/// The index of the reverse edge between Dst and the current node.
uint64_t RevEdgeIndex;
+
+ /// Data fields utilized in DAG-augmentation:
+ /// Whether the edge is currently on a shortest path from Source to Target.
+ bool OnShortestPath;
+ /// Extra flow along the edge.
+ uint64_t AugmentedFlow;
};
/// The set of network nodes.
@@ -270,8 +564,13 @@ private:
uint64_t Source;
/// Target (sink) node of the flow.
uint64_t Target;
+ /// Augmenting edges.
+ std::vector<std::vector<Edge *>> AugmentingEdges;
};
+constexpr int64_t MinCostMaxFlow::AuxCostUnlikely;
+constexpr uint64_t MinCostMaxFlow::MinBaseDistance;
+
/// A post-processing adjustment of control flow. It applies two steps by
/// rerouting some flow and making it more realistic:
///
@@ -433,19 +732,22 @@ private:
/// A distance of a path for a given jump.
/// In order to incite the path to use blocks/jumps with large positive flow,
/// and avoid changing branch probability of outgoing edges drastically,
- /// set the distance as follows:
- /// if Jump.Flow > 0, then distance = max(100 - Jump->Flow, 0)
- /// if Block.Weight > 0, then distance = 1
- /// otherwise distance >> 1
+ /// set the jump distance so as:
+ /// - to minimize the number of unlikely jumps used and subject to that,
+ /// - to minimize the number of Flow == 0 jumps used and subject to that,
+ /// - minimizes total multiplicative Flow increase for the remaining edges.
+ /// To capture this objective with integer distances, we round off fractional
+ /// parts to a multiple of 1 / BaseDistance.
int64_t jumpDistance(FlowJump *Jump) const {
- int64_t BaseDistance = 100;
+ uint64_t BaseDistance =
+ std::max(static_cast<uint64_t>(MinCostMaxFlow::MinBaseDistance),
+ std::min(Func.Blocks[Func.Entry].Flow,
+ MinCostMaxFlow::AuxCostUnlikely / NumBlocks()));
if (Jump->IsUnlikely)
return MinCostMaxFlow::AuxCostUnlikely;
if (Jump->Flow > 0)
- return std::max(BaseDistance - (int64_t)Jump->Flow, (int64_t)0);
- if (Func.Blocks[Jump->Target].Weight > 0)
- return BaseDistance;
- return BaseDistance * (NumBlocks() + 1);
+ return BaseDistance + BaseDistance / Jump->Flow;
+ return BaseDistance * NumBlocks();
};
uint64_t NumBlocks() const { return Func.Blocks.size(); }
@@ -511,7 +813,7 @@ private:
std::vector<FlowBlock *> &KnownDstBlocks,
std::vector<FlowBlock *> &UnknownBlocks) {
// Run BFS from SrcBlock and make sure all paths are going through unknown
- // blocks and end at a non-unknown DstBlock
+ // blocks and end at a known DstBlock
auto Visited = BitVector(NumBlocks(), false);
std::queue<uint64_t> Queue;
@@ -778,8 +1080,8 @@ void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) {
// We assume that decreasing block counts is more expensive than increasing,
// and thus, setting separate costs here. In the future we may want to tune
// the relative costs so as to maximize the quality of generated profiles.
- int64_t AuxCostInc = MinCostMaxFlow::AuxCostInc;
- int64_t AuxCostDec = MinCostMaxFlow::AuxCostDec;
+ int64_t AuxCostInc = SampleProfileProfiCostInc;
+ int64_t AuxCostDec = SampleProfileProfiCostDec;
if (Block.UnknownWeight) {
// Do not penalize changing weights of blocks w/o known profile count
AuxCostInc = 0;
@@ -788,12 +1090,12 @@ void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) {
// Increasing the count for "cold" blocks with zero initial count is more
// expensive than for "hot" ones
if (Block.Weight == 0) {
- AuxCostInc = MinCostMaxFlow::AuxCostIncZero;
+ AuxCostInc = SampleProfileProfiCostIncZero;
}
// Modifying the count of the entry block is expensive
if (Block.isEntry()) {
- AuxCostInc = MinCostMaxFlow::AuxCostIncEntry;
- AuxCostDec = MinCostMaxFlow::AuxCostDecEntry;
+ AuxCostInc = SampleProfileProfiCostIncEntry;
+ AuxCostDec = SampleProfileProfiCostDecEntry;
}
}
// For blocks with self-edges, do not penalize a reduction of the count,
diff --git a/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp b/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp
index ea0e8343eb88..a2588b8cec7d 100644
--- a/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp
+++ b/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp
@@ -11,6 +11,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
namespace llvm {
@@ -35,9 +39,13 @@ cl::opt<bool> NoWarnSampleUnused(
"samples but without debug information to use those samples. "));
cl::opt<bool> SampleProfileUseProfi(
- "sample-profile-use-profi", cl::init(false), cl::Hidden, cl::ZeroOrMore,
+ "sample-profile-use-profi", cl::Hidden,
cl::desc("Use profi to infer block and edge counts."));
+cl::opt<bool> SampleProfileInferEntryCount(
+ "sample-profile-infer-entry-count", cl::init(true), cl::Hidden,
+ cl::desc("Use profi to infer function entry count."));
+
namespace sampleprofutil {
/// Return true if the given callsite is hot wrt to hot cutoff threshold.
diff --git a/llvm/lib/Transforms/Utils/SanitizerStats.cpp b/llvm/lib/Transforms/Utils/SanitizerStats.cpp
index a1313c77ed77..fd21ee4cc408 100644
--- a/llvm/lib/Transforms/Utils/SanitizerStats.cpp
+++ b/llvm/lib/Transforms/Utils/SanitizerStats.cpp
@@ -11,7 +11,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/SanitizerStats.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalVariable.h"
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index 5363a851fc27..401f1ee5a55d 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -22,11 +22,8 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
@@ -276,7 +273,9 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
}
// If we haven't found this binop, insert it.
- Instruction *BO = cast<Instruction>(Builder.CreateBinOp(Opcode, LHS, RHS));
+ // TODO: Use the Builder, which will make CreateBinOp below fold with
+ // InstSimplifyFolder.
+ Instruction *BO = Builder.Insert(BinaryOperator::Create(Opcode, LHS, RHS));
BO->setDebugLoc(Loc);
if (Flags & SCEV::FlagNUW)
BO->setHasNoUnsignedWrap();
@@ -591,7 +590,9 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
if (isa<DbgInfoIntrinsic>(IP))
ScanLimit++;
if (IP->getOpcode() == Instruction::GetElementPtr &&
- IP->getOperand(0) == V && IP->getOperand(1) == Idx)
+ IP->getOperand(0) == V && IP->getOperand(1) == Idx &&
+ cast<GEPOperator>(&*IP)->getSourceElementType() ==
+ Type::getInt8Ty(Ty->getContext()))
return &*IP;
if (IP == BlockBegin) break;
}
@@ -1633,7 +1634,6 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
NewS = Ext;
const SCEV *V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE);
- //cerr << "Evaluated: " << *this << "\n to: " << *V << "\n";
// Truncate the result down to the original type, if needed.
const SCEV *T = SE.getTruncateOrNoop(V, Ty);
@@ -1671,154 +1671,49 @@ Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
return Builder.CreateSExt(V, Ty);
}
-Value *SCEVExpander::expandSMaxExpr(const SCEVNAryExpr *S) {
- Value *LHS = expand(S->getOperand(S->getNumOperands()-1));
- Type *Ty = LHS->getType();
- for (int i = S->getNumOperands()-2; i >= 0; --i) {
- // In the case of mixed integer and pointer types, do the
- // rest of the comparisons as integer.
- Type *OpTy = S->getOperand(i)->getType();
- if (OpTy->isIntegerTy() != Ty->isIntegerTy()) {
- Ty = SE.getEffectiveSCEVType(Ty);
- LHS = InsertNoopCastOfTo(LHS, Ty);
- }
- Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false);
- Value *Sel;
- if (Ty->isIntegerTy())
- Sel = Builder.CreateIntrinsic(Intrinsic::smax, {Ty}, {LHS, RHS},
- /*FMFSource=*/nullptr, "smax");
- else {
- Value *ICmp = Builder.CreateICmpSGT(LHS, RHS);
- Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax");
- }
- LHS = Sel;
- }
- // In the case of mixed integer and pointer types, cast the
- // final result back to the pointer type.
- if (LHS->getType() != S->getType())
- LHS = InsertNoopCastOfTo(LHS, S->getType());
- return LHS;
-}
-
-Value *SCEVExpander::expandUMaxExpr(const SCEVNAryExpr *S) {
- Value *LHS = expand(S->getOperand(S->getNumOperands()-1));
- Type *Ty = LHS->getType();
- for (int i = S->getNumOperands()-2; i >= 0; --i) {
- // In the case of mixed integer and pointer types, do the
- // rest of the comparisons as integer.
- Type *OpTy = S->getOperand(i)->getType();
- if (OpTy->isIntegerTy() != Ty->isIntegerTy()) {
- Ty = SE.getEffectiveSCEVType(Ty);
- LHS = InsertNoopCastOfTo(LHS, Ty);
- }
- Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false);
- Value *Sel;
- if (Ty->isIntegerTy())
- Sel = Builder.CreateIntrinsic(Intrinsic::umax, {Ty}, {LHS, RHS},
- /*FMFSource=*/nullptr, "umax");
- else {
- Value *ICmp = Builder.CreateICmpUGT(LHS, RHS);
- Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax");
- }
- LHS = Sel;
- }
- // In the case of mixed integer and pointer types, cast the
- // final result back to the pointer type.
- if (LHS->getType() != S->getType())
- LHS = InsertNoopCastOfTo(LHS, S->getType());
- return LHS;
-}
-
-Value *SCEVExpander::expandSMinExpr(const SCEVNAryExpr *S) {
- Value *LHS = expand(S->getOperand(S->getNumOperands() - 1));
- Type *Ty = LHS->getType();
- for (int i = S->getNumOperands() - 2; i >= 0; --i) {
- // In the case of mixed integer and pointer types, do the
- // rest of the comparisons as integer.
- Type *OpTy = S->getOperand(i)->getType();
- if (OpTy->isIntegerTy() != Ty->isIntegerTy()) {
- Ty = SE.getEffectiveSCEVType(Ty);
- LHS = InsertNoopCastOfTo(LHS, Ty);
- }
- Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false);
- Value *Sel;
- if (Ty->isIntegerTy())
- Sel = Builder.CreateIntrinsic(Intrinsic::smin, {Ty}, {LHS, RHS},
- /*FMFSource=*/nullptr, "smin");
- else {
- Value *ICmp = Builder.CreateICmpSLT(LHS, RHS);
- Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smin");
- }
- LHS = Sel;
- }
- // In the case of mixed integer and pointer types, cast the
- // final result back to the pointer type.
- if (LHS->getType() != S->getType())
- LHS = InsertNoopCastOfTo(LHS, S->getType());
- return LHS;
-}
-
-Value *SCEVExpander::expandUMinExpr(const SCEVNAryExpr *S) {
+Value *SCEVExpander::expandMinMaxExpr(const SCEVNAryExpr *S,
+ Intrinsic::ID IntrinID, Twine Name,
+ bool IsSequential) {
Value *LHS = expand(S->getOperand(S->getNumOperands() - 1));
Type *Ty = LHS->getType();
+ if (IsSequential)
+ LHS = Builder.CreateFreeze(LHS);
for (int i = S->getNumOperands() - 2; i >= 0; --i) {
- // In the case of mixed integer and pointer types, do the
- // rest of the comparisons as integer.
- Type *OpTy = S->getOperand(i)->getType();
- if (OpTy->isIntegerTy() != Ty->isIntegerTy()) {
- Ty = SE.getEffectiveSCEVType(Ty);
- LHS = InsertNoopCastOfTo(LHS, Ty);
- }
Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false);
+ if (IsSequential && i != 0)
+ RHS = Builder.CreateFreeze(RHS);
Value *Sel;
if (Ty->isIntegerTy())
- Sel = Builder.CreateIntrinsic(Intrinsic::umin, {Ty}, {LHS, RHS},
- /*FMFSource=*/nullptr, "umin");
+ Sel = Builder.CreateIntrinsic(IntrinID, {Ty}, {LHS, RHS},
+ /*FMFSource=*/nullptr, Name);
else {
- Value *ICmp = Builder.CreateICmpULT(LHS, RHS);
- Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umin");
+ Value *ICmp =
+ Builder.CreateICmp(MinMaxIntrinsic::getPredicate(IntrinID), LHS, RHS);
+ Sel = Builder.CreateSelect(ICmp, LHS, RHS, Name);
}
LHS = Sel;
}
- // In the case of mixed integer and pointer types, cast the
- // final result back to the pointer type.
- if (LHS->getType() != S->getType())
- LHS = InsertNoopCastOfTo(LHS, S->getType());
return LHS;
}
Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
- return expandSMaxExpr(S);
+ return expandMinMaxExpr(S, Intrinsic::smax, "smax");
}
Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
- return expandUMaxExpr(S);
+ return expandMinMaxExpr(S, Intrinsic::umax, "umax");
}
Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) {
- return expandSMinExpr(S);
+ return expandMinMaxExpr(S, Intrinsic::smin, "smin");
}
Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) {
- return expandUMinExpr(S);
+ return expandMinMaxExpr(S, Intrinsic::umin, "umin");
}
Value *SCEVExpander::visitSequentialUMinExpr(const SCEVSequentialUMinExpr *S) {
- SmallVector<Value *> Ops;
- for (const SCEV *Op : S->operands())
- Ops.emplace_back(expand(Op));
-
- Value *SaturationPoint =
- MinMaxIntrinsic::getSaturationPoint(Intrinsic::umin, S->getType());
-
- SmallVector<Value *> OpIsZero;
- for (Value *Op : ArrayRef<Value *>(Ops).drop_back())
- OpIsZero.emplace_back(Builder.CreateICmpEQ(Op, SaturationPoint));
-
- Value *AnyOpIsZero = Builder.CreateLogicalOr(OpIsZero);
-
- Value *NaiveUMin = expandUMinExpr(S);
- return Builder.CreateSelect(AnyOpIsZero, SaturationPoint, NaiveUMin);
+ return expandMinMaxExpr(S, Intrinsic::umin, "umin", /*IsSequential*/true);
}
Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty,
@@ -1868,35 +1763,33 @@ Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty, bool Root) {
return V;
}
-ScalarEvolution::ValueOffsetPair
-SCEVExpander::FindValueInExprValueMap(const SCEV *S,
- const Instruction *InsertPt) {
- auto *Set = SE.getSCEVValues(S);
+Value *SCEVExpander::FindValueInExprValueMap(const SCEV *S,
+ const Instruction *InsertPt) {
// If the expansion is not in CanonicalMode, and the SCEV contains any
// sub scAddRecExpr type SCEV, it is required to expand the SCEV literally.
- if (CanonicalMode || !SE.containsAddRecurrence(S)) {
- // If S is scConstant, it may be worse to reuse an existing Value.
- if (S->getSCEVType() != scConstant && Set) {
- // Choose a Value from the set which dominates the InsertPt.
- // InsertPt should be inside the Value's parent loop so as not to break
- // the LCSSA form.
- for (auto const &VOPair : *Set) {
- Value *V = VOPair.first;
- ConstantInt *Offset = VOPair.second;
- Instruction *EntInst = dyn_cast_or_null<Instruction>(V);
- if (!EntInst)
- continue;
+ if (!CanonicalMode && SE.containsAddRecurrence(S))
+ return nullptr;
- assert(EntInst->getFunction() == InsertPt->getFunction());
- if (S->getType() == V->getType() &&
- SE.DT.dominates(EntInst, InsertPt) &&
- (SE.LI.getLoopFor(EntInst->getParent()) == nullptr ||
- SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt)))
- return {V, Offset};
- }
- }
+ // If S is a constant, it may be worse to reuse an existing Value.
+ if (isa<SCEVConstant>(S))
+ return nullptr;
+
+ // Choose a Value from the set which dominates the InsertPt.
+ // InsertPt should be inside the Value's parent loop so as not to break
+ // the LCSSA form.
+ for (Value *V : SE.getSCEVValues(S)) {
+ Instruction *EntInst = dyn_cast<Instruction>(V);
+ if (!EntInst)
+ continue;
+
+ assert(EntInst->getFunction() == InsertPt->getFunction());
+ if (S->getType() == V->getType() &&
+ SE.DT.dominates(EntInst, InsertPt) &&
+ (SE.LI.getLoopFor(EntInst->getParent()) == nullptr ||
+ SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt)))
+ return V;
}
- return {nullptr, nullptr};
+ return nullptr;
}
// The expansion of SCEV will either reuse a previous Value in ExprValueMap,
@@ -1965,9 +1858,7 @@ Value *SCEVExpander::expand(const SCEV *S) {
Builder.SetInsertPoint(InsertPt);
// Expand the expression into instructions.
- ScalarEvolution::ValueOffsetPair VO = FindValueInExprValueMap(S, InsertPt);
- Value *V = VO.first;
-
+ Value *V = FindValueInExprValueMap(S, InsertPt);
if (!V)
V = visit(S);
else {
@@ -1978,21 +1869,6 @@ Value *SCEVExpander::expand(const SCEV *S) {
if (auto *I = dyn_cast<Instruction>(V))
if (I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I))
I->dropPoisonGeneratingFlags();
-
- if (VO.second) {
- if (PointerType *Vty = dyn_cast<PointerType>(V->getType())) {
- int64_t Offset = VO.second->getSExtValue();
- ConstantInt *Idx =
- ConstantInt::getSigned(VO.second->getType(), -Offset);
- unsigned AS = Vty->getAddressSpace();
- V = Builder.CreateBitCast(V, Type::getInt8PtrTy(SE.getContext(), AS));
- V = Builder.CreateGEP(Type::getInt8Ty(SE.getContext()), V, Idx,
- "uglygep");
- V = Builder.CreateBitCast(V, Vty);
- } else {
- V = Builder.CreateSub(V, VO.second);
- }
- }
}
// Remember the expanded value for this SCEV at this location.
//
@@ -2058,7 +1934,7 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
// so narrow phis can reuse them.
for (PHINode *Phi : Phis) {
auto SimplifyPHINode = [&](PHINode *PN) -> Value * {
- if (Value *V = SimplifyInstruction(PN, {DL, &SE.TLI, &SE.DT, &SE.AC}))
+ if (Value *V = simplifyInstruction(PN, {DL, &SE.TLI, &SE.DT, &SE.AC}))
return V;
if (!SE.isSCEVable(PN->getType()))
return nullptr;
@@ -2174,9 +2050,9 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
return NumElim;
}
-Optional<ScalarEvolution::ValueOffsetPair>
-SCEVExpander::getRelatedExistingExpansion(const SCEV *S, const Instruction *At,
- Loop *L) {
+Value *SCEVExpander::getRelatedExistingExpansion(const SCEV *S,
+ const Instruction *At,
+ Loop *L) {
using namespace llvm::PatternMatch;
SmallVector<BasicBlock *, 4> ExitingBlocks;
@@ -2193,25 +2069,17 @@ SCEVExpander::getRelatedExistingExpansion(const SCEV *S, const Instruction *At,
continue;
if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At))
- return ScalarEvolution::ValueOffsetPair(LHS, nullptr);
+ return LHS;
if (SE.getSCEV(RHS) == S && SE.DT.dominates(RHS, At))
- return ScalarEvolution::ValueOffsetPair(RHS, nullptr);
+ return RHS;
}
// Use expand's logic which is used for reusing a previous Value in
// ExprValueMap. Note that we don't currently model the cost of
// needing to drop poison generating flags on the instruction if we
// want to reuse it. We effectively assume that has zero cost.
- ScalarEvolution::ValueOffsetPair VO = FindValueInExprValueMap(S, At);
- if (VO.first)
- return VO;
-
- // There is potential to make this significantly smarter, but this simple
- // heuristic already gets some interesting cases.
-
- // Can not find suitable value.
- return None;
+ return FindValueInExprValueMap(S, At);
}
template<typename T> static InstructionCost costAndCollectOperands(
@@ -2469,8 +2337,8 @@ Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred,
switch (Pred->getKind()) {
case SCEVPredicate::P_Union:
return expandUnionPredicate(cast<SCEVUnionPredicate>(Pred), IP);
- case SCEVPredicate::P_Equal:
- return expandEqualPredicate(cast<SCEVEqualPredicate>(Pred), IP);
+ case SCEVPredicate::P_Compare:
+ return expandComparePredicate(cast<SCEVComparePredicate>(Pred), IP);
case SCEVPredicate::P_Wrap: {
auto *AddRecPred = cast<SCEVWrapPredicate>(Pred);
return expandWrapPredicate(AddRecPred, IP);
@@ -2479,15 +2347,16 @@ Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred,
llvm_unreachable("Unknown SCEV predicate type");
}
-Value *SCEVExpander::expandEqualPredicate(const SCEVEqualPredicate *Pred,
- Instruction *IP) {
+Value *SCEVExpander::expandComparePredicate(const SCEVComparePredicate *Pred,
+ Instruction *IP) {
Value *Expr0 =
expandCodeForImpl(Pred->getLHS(), Pred->getLHS()->getType(), IP, false);
Value *Expr1 =
expandCodeForImpl(Pred->getRHS(), Pred->getRHS()->getType(), IP, false);
Builder.SetInsertPoint(IP);
- auto *I = Builder.CreateICmpNE(Expr0, Expr1, "ident.check");
+ auto InvPred = ICmpInst::getInversePredicate(Pred->getPredicate());
+ auto *I = Builder.CreateICmp(InvPred, Expr0, Expr1, "ident.check");
return I;
}
@@ -2496,7 +2365,8 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
assert(AR->isAffine() && "Cannot generate RT check for "
"non-affine expression");
- SCEVUnionPredicate Pred;
+ // FIXME: It is highly suspicious that we're ignoring the predicates here.
+ SmallVector<const SCEVPredicate *, 4> Pred;
const SCEV *ExitCount =
SE.getPredicatedBackedgeTakenCount(AR->getLoop(), Pred);
@@ -2710,10 +2580,10 @@ namespace {
struct SCEVFindUnsafe {
ScalarEvolution &SE;
bool CanonicalMode;
- bool IsUnsafe;
+ bool IsUnsafe = false;
SCEVFindUnsafe(ScalarEvolution &SE, bool CanonicalMode)
- : SE(SE), CanonicalMode(CanonicalMode), IsUnsafe(false) {}
+ : SE(SE), CanonicalMode(CanonicalMode) {}
bool follow(const SCEV *S) {
if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 335ac03ccb52..567b866f7777 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -27,7 +27,7 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GuardUtils.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemorySSA.h"
@@ -50,7 +50,6 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
@@ -58,7 +57,6 @@
#include "llvm/IR/NoFolder.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
-#include "llvm/IR/PseudoProbe.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
@@ -74,7 +72,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
#include <cassert>
@@ -94,8 +91,8 @@ using namespace PatternMatch;
#define DEBUG_TYPE "simplifycfg"
cl::opt<bool> llvm::RequireAndPreserveDomTree(
- "simplifycfg-require-and-preserve-domtree", cl::Hidden, cl::ZeroOrMore,
- cl::init(false),
+ "simplifycfg-require-and-preserve-domtree", cl::Hidden,
+
cl::desc("Temorary development switch used to gradually uplift SimplifyCFG "
"into preserving DomTree,"));
@@ -167,6 +164,14 @@ static cl::opt<unsigned> BranchFoldToCommonDestVectorMultiplier(
"to fold branch to common destination when vector operations are "
"present"));
+static cl::opt<bool> EnableMergeCompatibleInvokes(
+ "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true),
+ cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"));
+
+static cl::opt<unsigned> MaxSwitchCasesPerResult(
+ "max-switch-cases-per-result", cl::Hidden, cl::init(16),
+ cl::desc("Limit cases to analyze when converting a switch to select"));
+
STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
STATISTIC(NumLinearMaps,
"Number of switch instructions turned into linear mapping");
@@ -192,6 +197,8 @@ STATISTIC(NumSinkCommonInstrs,
STATISTIC(NumSpeculations, "Number of speculative executed instructions");
STATISTIC(NumInvokes,
"Number of invokes with empty resume blocks simplified into calls");
+STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
+STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
namespace {
@@ -291,6 +298,34 @@ public:
} // end anonymous namespace
+/// Return true if all the PHI nodes in the basic block \p BB
+/// receive compatible (identical) incoming values when coming from
+/// all of the predecessor blocks that are specified in \p IncomingBlocks.
+///
+/// Note that if the values aren't exactly identical, but \p EquivalenceSet
+/// is provided, and *both* of the values are present in the set,
+/// then they are considered equal.
+static bool IncomingValuesAreCompatible(
+ BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks,
+ SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) {
+ assert(IncomingBlocks.size() == 2 &&
+ "Only for a pair of incoming blocks at the time!");
+
+ // FIXME: it is okay if one of the incoming values is an `undef` value,
+ // iff the other incoming value is guaranteed to be a non-poison value.
+ // FIXME: it is okay if one of the incoming values is a `poison` value.
+ return all_of(BB->phis(), [IncomingBlocks, EquivalenceSet](PHINode &PN) {
+ Value *IV0 = PN.getIncomingValueForBlock(IncomingBlocks[0]);
+ Value *IV1 = PN.getIncomingValueForBlock(IncomingBlocks[1]);
+ if (IV0 == IV1)
+ return true;
+ if (EquivalenceSet && EquivalenceSet->contains(IV0) &&
+ EquivalenceSet->contains(IV1))
+ return true;
+ return false;
+ });
+}
+
/// Return true if it is safe to merge these two
/// terminator instructions together.
static bool
@@ -307,17 +342,17 @@ SafeToMergeTerminators(Instruction *SI1, Instruction *SI2,
SmallPtrSet<BasicBlock *, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
bool Fail = false;
- for (BasicBlock *Succ : successors(SI2BB))
- if (SI1Succs.count(Succ))
- for (BasicBlock::iterator BBI = Succ->begin(); isa<PHINode>(BBI); ++BBI) {
- PHINode *PN = cast<PHINode>(BBI);
- if (PN->getIncomingValueForBlock(SI1BB) !=
- PN->getIncomingValueForBlock(SI2BB)) {
- if (FailBlocks)
- FailBlocks->insert(Succ);
- Fail = true;
- }
- }
+ for (BasicBlock *Succ : successors(SI2BB)) {
+ if (!SI1Succs.count(Succ))
+ continue;
+ if (IncomingValuesAreCompatible(Succ, {SI1BB, SI2BB}))
+ continue;
+ Fail = true;
+ if (FailBlocks)
+ FailBlocks->insert(Succ);
+ else
+ break;
+ }
return !Fail;
}
@@ -347,6 +382,13 @@ static InstructionCost computeSpeculationCost(const User *I,
return TTI.getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency);
}
+/// Check whether this is a potentially trapping constant.
+static bool canTrap(const Value *V) {
+ if (auto *C = dyn_cast<Constant>(V))
+ return C->canTrap();
+ return false;
+}
+
/// If we have a merge point of an "if condition" as accepted above,
/// return true if the specified value dominates the block. We
/// don't handle the true generality of domination here, just a special case
@@ -381,10 +423,7 @@ static bool dominatesMergePoint(Value *V, BasicBlock *BB,
if (!I) {
// Non-instructions all dominate instructions, but not all constantexprs
// can be executed unconditionally.
- if (ConstantExpr *C = dyn_cast<ConstantExpr>(V))
- if (C->canTrap())
- return false;
- return true;
+ return !canTrap(V);
}
BasicBlock *PBB = I->getParent();
@@ -1459,7 +1498,7 @@ bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI,
return false;
if (!I1NonDbg->isTerminator())
return false;
- // Now we know that we only need to hoist debug instrinsics and the
+ // Now we know that we only need to hoist debug intrinsics and the
// terminator. Let the loop below handle those 2 cases.
}
@@ -2212,6 +2251,320 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB,
return Changed;
}
+namespace {
+
+struct CompatibleSets {
+ using SetTy = SmallVector<InvokeInst *, 2>;
+
+ SmallVector<SetTy, 1> Sets;
+
+ static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
+
+ SetTy &getCompatibleSet(InvokeInst *II);
+
+ void insert(InvokeInst *II);
+};
+
+CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
+ // Perform a linear scan over all the existing sets, see if the new `invoke`
+ // is compatible with any particular set. Since we know that all the `invokes`
+ // within a set are compatible, only check the first `invoke` in each set.
+ // WARNING: at worst, this has quadratic complexity.
+ for (CompatibleSets::SetTy &Set : Sets) {
+ if (CompatibleSets::shouldBelongToSameSet({Set.front(), II}))
+ return Set;
+ }
+
+ // Otherwise, we either had no sets yet, or this invoke forms a new set.
+ return Sets.emplace_back();
+}
+
+void CompatibleSets::insert(InvokeInst *II) {
+ getCompatibleSet(II).emplace_back(II);
+}
+
+bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
+ assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
+
+ // Can we theoretically merge these `invoke`s?
+ auto IsIllegalToMerge = [](InvokeInst *II) {
+ return II->cannotMerge() || II->isInlineAsm();
+ };
+ if (any_of(Invokes, IsIllegalToMerge))
+ return false;
+
+ // Either both `invoke`s must be direct,
+ // or both `invoke`s must be indirect.
+ auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); };
+ bool HaveIndirectCalls = any_of(Invokes, IsIndirectCall);
+ bool AllCallsAreIndirect = all_of(Invokes, IsIndirectCall);
+ if (HaveIndirectCalls) {
+ if (!AllCallsAreIndirect)
+ return false;
+ } else {
+ // All callees must be identical.
+ Value *Callee = nullptr;
+ for (InvokeInst *II : Invokes) {
+ Value *CurrCallee = II->getCalledOperand();
+ assert(CurrCallee && "There is always a called operand.");
+ if (!Callee)
+ Callee = CurrCallee;
+ else if (Callee != CurrCallee)
+ return false;
+ }
+ }
+
+ // Either both `invoke`s must not have a normal destination,
+ // or both `invoke`s must have a normal destination,
+ auto HasNormalDest = [](InvokeInst *II) {
+ return !isa<UnreachableInst>(II->getNormalDest()->getFirstNonPHIOrDbg());
+ };
+ if (any_of(Invokes, HasNormalDest)) {
+ // Do not merge `invoke` that does not have a normal destination with one
+ // that does have a normal destination, even though doing so would be legal.
+ if (!all_of(Invokes, HasNormalDest))
+ return false;
+
+ // All normal destinations must be identical.
+ BasicBlock *NormalBB = nullptr;
+ for (InvokeInst *II : Invokes) {
+ BasicBlock *CurrNormalBB = II->getNormalDest();
+ assert(CurrNormalBB && "There is always a 'continue to' basic block.");
+ if (!NormalBB)
+ NormalBB = CurrNormalBB;
+ else if (NormalBB != CurrNormalBB)
+ return false;
+ }
+
+ // In the normal destination, the incoming values for these two `invoke`s
+ // must be compatible.
+ SmallPtrSet<Value *, 16> EquivalenceSet(Invokes.begin(), Invokes.end());
+ if (!IncomingValuesAreCompatible(
+ NormalBB, {Invokes[0]->getParent(), Invokes[1]->getParent()},
+ &EquivalenceSet))
+ return false;
+ }
+
+#ifndef NDEBUG
+ // All unwind destinations must be identical.
+ // We know that because we have started from said unwind destination.
+ BasicBlock *UnwindBB = nullptr;
+ for (InvokeInst *II : Invokes) {
+ BasicBlock *CurrUnwindBB = II->getUnwindDest();
+ assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
+ if (!UnwindBB)
+ UnwindBB = CurrUnwindBB;
+ else
+ assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
+ }
+#endif
+
+ // In the unwind destination, the incoming values for these two `invoke`s
+ // must be compatible.
+ if (!IncomingValuesAreCompatible(
+ Invokes.front()->getUnwindDest(),
+ {Invokes[0]->getParent(), Invokes[1]->getParent()}))
+ return false;
+
+ // Ignoring arguments, these `invoke`s must be identical,
+ // including operand bundles.
+ const InvokeInst *II0 = Invokes.front();
+ for (auto *II : Invokes.drop_front())
+ if (!II->isSameOperationAs(II0))
+ return false;
+
+ // Can we theoretically form the data operands for the merged `invoke`?
+ auto IsIllegalToMergeArguments = [](auto Ops) {
+ Type *Ty = std::get<0>(Ops)->getType();
+ assert(Ty == std::get<1>(Ops)->getType() && "Incompatible types?");
+ return Ty->isTokenTy() && std::get<0>(Ops) != std::get<1>(Ops);
+ };
+ assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
+ if (any_of(zip(Invokes[0]->data_ops(), Invokes[1]->data_ops()),
+ IsIllegalToMergeArguments))
+ return false;
+
+ return true;
+}
+
+} // namespace
+
+// Merge all invokes in the provided set, all of which are compatible
+// as per the `CompatibleSets::shouldBelongToSameSet()`.
+static void MergeCompatibleInvokesImpl(ArrayRef<InvokeInst *> Invokes,
+ DomTreeUpdater *DTU) {
+ assert(Invokes.size() >= 2 && "Must have at least two invokes to merge.");
+
+ SmallVector<DominatorTree::UpdateType, 8> Updates;
+ if (DTU)
+ Updates.reserve(2 + 3 * Invokes.size());
+
+ bool HasNormalDest =
+ !isa<UnreachableInst>(Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg());
+
+ // Clone one of the invokes into a new basic block.
+ // Since they are all compatible, it doesn't matter which invoke is cloned.
+ InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
+ InvokeInst *II0 = Invokes.front();
+ BasicBlock *II0BB = II0->getParent();
+ BasicBlock *InsertBeforeBlock =
+ II0->getParent()->getIterator()->getNextNode();
+ Function *Func = II0BB->getParent();
+ LLVMContext &Ctx = II0->getContext();
+
+ BasicBlock *MergedInvokeBB = BasicBlock::Create(
+ Ctx, II0BB->getName() + ".invoke", Func, InsertBeforeBlock);
+
+ auto *MergedInvoke = cast<InvokeInst>(II0->clone());
+ // NOTE: all invokes have the same attributes, so no handling needed.
+ MergedInvokeBB->getInstList().push_back(MergedInvoke);
+
+ if (!HasNormalDest) {
+ // This set does not have a normal destination,
+ // so just form a new block with unreachable terminator.
+ BasicBlock *MergedNormalDest = BasicBlock::Create(
+ Ctx, II0BB->getName() + ".cont", Func, InsertBeforeBlock);
+ new UnreachableInst(Ctx, MergedNormalDest);
+ MergedInvoke->setNormalDest(MergedNormalDest);
+ }
+
+ // The unwind destination, however, remainds identical for all invokes here.
+
+ return MergedInvoke;
+ }();
+
+ if (DTU) {
+ // Predecessor blocks that contained these invokes will now branch to
+ // the new block that contains the merged invoke, ...
+ for (InvokeInst *II : Invokes)
+ Updates.push_back(
+ {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
+
+ // ... which has the new `unreachable` block as normal destination,
+ // or unwinds to the (same for all `invoke`s in this set) `landingpad`,
+ for (BasicBlock *SuccBBOfMergedInvoke : successors(MergedInvoke))
+ Updates.push_back({DominatorTree::Insert, MergedInvoke->getParent(),
+ SuccBBOfMergedInvoke});
+
+ // Since predecessor blocks now unconditionally branch to a new block,
+ // they no longer branch to their original successors.
+ for (InvokeInst *II : Invokes)
+ for (BasicBlock *SuccOfPredBB : successors(II->getParent()))
+ Updates.push_back(
+ {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
+ }
+
+ bool IsIndirectCall = Invokes[0]->isIndirectCall();
+
+ // Form the merged operands for the merged invoke.
+ for (Use &U : MergedInvoke->operands()) {
+ // Only PHI together the indirect callees and data operands.
+ if (MergedInvoke->isCallee(&U)) {
+ if (!IsIndirectCall)
+ continue;
+ } else if (!MergedInvoke->isDataOperand(&U))
+ continue;
+
+ // Don't create trivial PHI's with all-identical incoming values.
+ bool NeedPHI = any_of(Invokes, [&U](InvokeInst *II) {
+ return II->getOperand(U.getOperandNo()) != U.get();
+ });
+ if (!NeedPHI)
+ continue;
+
+ // Form a PHI out of all the data ops under this index.
+ PHINode *PN = PHINode::Create(
+ U->getType(), /*NumReservedValues=*/Invokes.size(), "", MergedInvoke);
+ for (InvokeInst *II : Invokes)
+ PN->addIncoming(II->getOperand(U.getOperandNo()), II->getParent());
+
+ U.set(PN);
+ }
+
+ // We've ensured that each PHI node has compatible (identical) incoming values
+ // when coming from each of the `invoke`s in the current merge set,
+ // so update the PHI nodes accordingly.
+ for (BasicBlock *Succ : successors(MergedInvoke))
+ AddPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(),
+ /*ExistPred=*/Invokes.front()->getParent());
+
+ // And finally, replace the original `invoke`s with an unconditional branch
+ // to the block with the merged `invoke`. Also, give that merged `invoke`
+ // the merged debugloc of all the original `invoke`s.
+ const DILocation *MergedDebugLoc = nullptr;
+ for (InvokeInst *II : Invokes) {
+ // Compute the debug location common to all the original `invoke`s.
+ if (!MergedDebugLoc)
+ MergedDebugLoc = II->getDebugLoc();
+ else
+ MergedDebugLoc =
+ DILocation::getMergedLocation(MergedDebugLoc, II->getDebugLoc());
+
+ // And replace the old `invoke` with an unconditionally branch
+ // to the block with the merged `invoke`.
+ for (BasicBlock *OrigSuccBB : successors(II->getParent()))
+ OrigSuccBB->removePredecessor(II->getParent());
+ BranchInst::Create(MergedInvoke->getParent(), II->getParent());
+ II->replaceAllUsesWith(MergedInvoke);
+ II->eraseFromParent();
+ ++NumInvokesMerged;
+ }
+ MergedInvoke->setDebugLoc(MergedDebugLoc);
+ ++NumInvokeSetsFormed;
+
+ if (DTU)
+ DTU->applyUpdates(Updates);
+}
+
+/// If this block is a `landingpad` exception handling block, categorize all
+/// the predecessor `invoke`s into sets, with all `invoke`s in each set
+/// being "mergeable" together, and then merge invokes in each set together.
+///
+/// This is a weird mix of hoisting and sinking. Visually, it goes from:
+/// [...] [...]
+/// | |
+/// [invoke0] [invoke1]
+/// / \ / \
+/// [cont0] [landingpad] [cont1]
+/// to:
+/// [...] [...]
+/// \ /
+/// [invoke]
+/// / \
+/// [cont] [landingpad]
+///
+/// But of course we can only do that if the invokes share the `landingpad`,
+/// edges invoke0->cont0 and invoke1->cont1 are "compatible",
+/// and the invoked functions are "compatible".
+static bool MergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU) {
+ if (!EnableMergeCompatibleInvokes)
+ return false;
+
+ bool Changed = false;
+
+ // FIXME: generalize to all exception handling blocks?
+ if (!BB->isLandingPad())
+ return Changed;
+
+ CompatibleSets Grouper;
+
+ // Record all the predecessors of this `landingpad`. As per verifier,
+ // the only allowed predecessor is the unwind edge of an `invoke`.
+ // We want to group "compatible" `invokes` into the same set to be merged.
+ for (BasicBlock *PredBB : predecessors(BB))
+ Grouper.insert(cast<InvokeInst>(PredBB->getTerminator()));
+
+ // And now, merge `invoke`s that were grouped togeter.
+ for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
+ if (Invokes.size() < 2)
+ continue;
+ Changed = true;
+ MergeCompatibleInvokesImpl(Invokes, DTU);
+ }
+
+ return Changed;
+}
+
/// Determine if we can hoist sink a sole store instruction out of a
/// conditional block.
///
@@ -2326,15 +2679,15 @@ static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB,
passingValueIsAlwaysUndefined(ThenV, &PN))
return false;
+ if (canTrap(OrigV) || canTrap(ThenV))
+ return false;
+
HaveRewritablePHIs = true;
ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
if (!OrigCE && !ThenCE)
- continue; // Known safe and cheap.
+ continue; // Known cheap (FIXME: Maybe not true for aggregates).
- if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) ||
- (OrigCE && !isSafeToSpeculativelyExecute(OrigCE)))
- return false;
InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0;
InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0;
InstructionCost MaxCost =
@@ -2626,40 +2979,85 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
return true;
}
-/// If we have a conditional branch on a PHI node value that is defined in the
-/// same block as the branch and if any PHI entries are constants, thread edges
-/// corresponding to that entry to be branches to their ultimate destination.
-static Optional<bool> FoldCondBranchOnPHIImpl(BranchInst *BI,
- DomTreeUpdater *DTU,
- const DataLayout &DL,
- AssumptionCache *AC) {
+static ConstantInt *
+getKnownValueOnEdge(Value *V, BasicBlock *From, BasicBlock *To,
+ SmallDenseMap<std::pair<BasicBlock *, BasicBlock *>,
+ ConstantInt *> &Visited) {
+ // Don't look past the block defining the value, we might get the value from
+ // a previous loop iteration.
+ auto *I = dyn_cast<Instruction>(V);
+ if (I && I->getParent() == To)
+ return nullptr;
+
+ // We know the value if the From block branches on it.
+ auto *BI = dyn_cast<BranchInst>(From->getTerminator());
+ if (BI && BI->isConditional() && BI->getCondition() == V &&
+ BI->getSuccessor(0) != BI->getSuccessor(1))
+ return BI->getSuccessor(0) == To ? ConstantInt::getTrue(BI->getContext())
+ : ConstantInt::getFalse(BI->getContext());
+
+ // Limit the amount of blocks we inspect.
+ if (Visited.size() >= 8)
+ return nullptr;
+
+ auto Pair = Visited.try_emplace({From, To}, nullptr);
+ if (!Pair.second)
+ return Pair.first->second;
+
+ // Check whether the known value is the same for all predecessors.
+ ConstantInt *Common = nullptr;
+ for (BasicBlock *Pred : predecessors(From)) {
+ ConstantInt *C = getKnownValueOnEdge(V, Pred, From, Visited);
+ if (!C || (Common && Common != C))
+ return nullptr;
+ Common = C;
+ }
+ return Visited[{From, To}] = Common;
+}
+
+/// If we have a conditional branch on something for which we know the constant
+/// value in predecessors (e.g. a phi node in the current block), thread edges
+/// from the predecessor to their ultimate destination.
+static Optional<bool>
+FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU,
+ const DataLayout &DL,
+ AssumptionCache *AC) {
+ SmallMapVector<BasicBlock *, ConstantInt *, 8> KnownValues;
BasicBlock *BB = BI->getParent();
- PHINode *PN = dyn_cast<PHINode>(BI->getCondition());
- // NOTE: we currently cannot transform this case if the PHI node is used
- // outside of the block.
- if (!PN || PN->getParent() != BB || !PN->hasOneUse())
- return false;
+ Value *Cond = BI->getCondition();
+ PHINode *PN = dyn_cast<PHINode>(Cond);
+ if (PN && PN->getParent() == BB) {
+ // Degenerate case of a single entry PHI.
+ if (PN->getNumIncomingValues() == 1) {
+ FoldSingleEntryPHINodes(PN->getParent());
+ return true;
+ }
- // Degenerate case of a single entry PHI.
- if (PN->getNumIncomingValues() == 1) {
- FoldSingleEntryPHINodes(PN->getParent());
- return true;
+ for (Use &U : PN->incoming_values())
+ if (auto *CB = dyn_cast<ConstantInt>(U))
+ KnownValues.insert({PN->getIncomingBlock(U), CB});
+ } else {
+ SmallDenseMap<std::pair<BasicBlock *, BasicBlock *>, ConstantInt *> Visited;
+ for (BasicBlock *Pred : predecessors(BB)) {
+ if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB, Visited))
+ KnownValues.insert({Pred, CB});
+ }
}
+ if (KnownValues.empty())
+ return false;
+
// Now we know that this block has multiple preds and two succs.
+ // Check that the block is small enough and values defined in the block are
+ // not used outside of it.
if (!BlockIsSimpleEnoughToThreadThrough(BB))
return false;
- // Okay, this is a simple enough basic block. See if any phi values are
- // constants.
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- ConstantInt *CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i));
- if (!CB || !CB->getType()->isIntegerTy(1))
- continue;
-
+ for (const auto &Pair : KnownValues) {
// Okay, we now know that all edges from PredBB should be revectored to
// branch to RealDest.
- BasicBlock *PredBB = PN->getIncomingBlock(i);
+ ConstantInt *CB = Pair.second;
+ BasicBlock *PredBB = Pair.first;
BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
if (RealDest == BB)
@@ -2690,6 +3088,7 @@ static Optional<bool> FoldCondBranchOnPHIImpl(BranchInst *BI,
// cloned instructions outside of EdgeBB.
BasicBlock::iterator InsertPt = EdgeBB->begin();
DenseMap<Value *, Value *> TranslateMap; // Track translated values.
+ TranslateMap[Cond] = Pair.second;
for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
TranslateMap[PN] = PN->getIncomingValueForBlock(PredBB);
@@ -2708,7 +3107,7 @@ static Optional<bool> FoldCondBranchOnPHIImpl(BranchInst *BI,
}
// Check for trivial simplification.
- if (Value *V = SimplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
+ if (Value *V = simplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
if (!BBI->use_empty())
TranslateMap[&*BBI] = V;
if (!N->mayHaveSideEffects()) {
@@ -2746,6 +3145,12 @@ static Optional<bool> FoldCondBranchOnPHIImpl(BranchInst *BI,
DTU->applyUpdates(Updates);
}
+ // For simplicity, we created a separate basic block for the edge. Merge
+ // it back into the predecessor if possible. This not only avoids
+ // unnecessary SimplifyCFG iterations, but also makes sure that we don't
+ // bypass the check for trivial cycles above.
+ MergeBlockIntoPredecessor(EdgeBB, DTU);
+
// Signal repeat, simplifying any other constants.
return None;
}
@@ -2753,13 +3158,15 @@ static Optional<bool> FoldCondBranchOnPHIImpl(BranchInst *BI,
return false;
}
-static bool FoldCondBranchOnPHI(BranchInst *BI, DomTreeUpdater *DTU,
- const DataLayout &DL, AssumptionCache *AC) {
+static bool FoldCondBranchOnValueKnownInPredecessor(BranchInst *BI,
+ DomTreeUpdater *DTU,
+ const DataLayout &DL,
+ AssumptionCache *AC) {
Optional<bool> Result;
bool EverChanged = false;
do {
// Note that None means "we changed things, but recurse further."
- Result = FoldCondBranchOnPHIImpl(BI, DTU, DL, AC);
+ Result = FoldCondBranchOnValueKnownInPredecessorImpl(BI, DTU, DL, AC);
EverChanged |= Result == None || *Result;
} while (Result == None);
return EverChanged;
@@ -2847,7 +3254,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
bool Changed = false;
for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
PHINode *PN = cast<PHINode>(II++);
- if (Value *V = SimplifyInstruction(PN, {DL, PN})) {
+ if (Value *V = simplifyInstruction(PN, {DL, PN})) {
PN->replaceAllUsesWith(V);
PN->eraseFromParent();
Changed = true;
@@ -3186,18 +3593,18 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
- if (!Cond || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) ||
+ if (!Cond ||
+ (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond) &&
+ !isa<SelectInst>(Cond)) ||
Cond->getParent() != BB || !Cond->hasOneUse())
return false;
// Cond is known to be a compare or binary operator. Check to make sure that
// neither operand is a potentially-trapping constant expression.
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(0)))
- if (CE->canTrap())
- return false;
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(1)))
- if (CE->canTrap())
- return false;
+ if (canTrap(Cond->getOperand(0)))
+ return false;
+ if (canTrap(Cond->getOperand(1)))
+ return false;
// Finally, don't infinitely unroll conditional loops.
if (is_contained(successors(BB), BB))
@@ -3384,7 +3791,9 @@ static bool mergeConditionalStoreToAddress(
return false;
// Now check the stores are compatible.
- if (!QStore->isUnordered() || !PStore->isUnordered())
+ if (!QStore->isUnordered() || !PStore->isUnordered() ||
+ PStore->getValueOperand()->getType() !=
+ QStore->getValueOperand()->getType())
return false;
// Check that sinking the store won't cause program behavior changes. Sinking
@@ -3687,7 +4096,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
if (PBI->getCondition() == BI->getCondition() &&
PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
// Okay, the outcome of this conditional branch is statically
- // knowable. If this block had a single pred, handle specially.
+ // knowable. If this block had a single pred, handle specially, otherwise
+ // FoldCondBranchOnValueKnownInPredecessor() will handle it.
if (BB->getSinglePredecessor()) {
// Turn this into a branch on constant.
bool CondIsTrue = PBI->getSuccessor(0) == BB;
@@ -3695,35 +4105,6 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue));
return true; // Nuke the branch on constant.
}
-
- // Otherwise, if there are multiple predecessors, insert a PHI that merges
- // in the constant and simplify the block result. Subsequent passes of
- // simplifycfg will thread the block.
- if (BlockIsSimpleEnoughToThreadThrough(BB)) {
- pred_iterator PB = pred_begin(BB), PE = pred_end(BB);
- PHINode *NewPN = PHINode::Create(
- Type::getInt1Ty(BB->getContext()), std::distance(PB, PE),
- BI->getCondition()->getName() + ".pr", &BB->front());
- // Okay, we're going to insert the PHI node. Since PBI is not the only
- // predecessor, compute the PHI'd conditional value for all of the preds.
- // Any predecessor where the condition is not computable we keep symbolic.
- for (pred_iterator PI = PB; PI != PE; ++PI) {
- BasicBlock *P = *PI;
- if ((PBI = dyn_cast<BranchInst>(P->getTerminator())) && PBI != BI &&
- PBI->isConditional() && PBI->getCondition() == BI->getCondition() &&
- PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
- bool CondIsTrue = PBI->getSuccessor(0) == BB;
- NewPN->addIncoming(
- ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue),
- P);
- } else {
- NewPN->addIncoming(BI->getCondition(), P);
- }
- }
-
- BI->setCondition(NewPN);
- return true;
- }
}
// If the previous block ended with a widenable branch, determine if reusing
@@ -3732,9 +4113,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
return true;
- if (auto *CE = dyn_cast<ConstantExpr>(BI->getCondition()))
- if (CE->canTrap())
- return false;
+ if (canTrap(BI->getCondition()))
+ return false;
// If both branches are conditional and both contain stores to the same
// address, remove the stores from the conditionals and create a conditional
@@ -3791,15 +4171,13 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
PHINode *PN = cast<PHINode>(II);
Value *BIV = PN->getIncomingValueForBlock(BB);
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BIV))
- if (CE->canTrap())
- return false;
+ if (canTrap(BIV))
+ return false;
unsigned PBBIdx = PN->getBasicBlockIndex(PBI->getParent());
Value *PBIV = PN->getIncomingValue(PBBIdx);
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(PBIV))
- if (CE->canTrap())
- return false;
+ if (canTrap(PBIV))
+ return false;
}
// Finally, if everything is ok, fold the branches to logical ops.
@@ -4116,7 +4494,7 @@ bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
assert(VVal && "Should have a unique destination value");
ICI->setOperand(0, VVal);
- if (Value *V = SimplifyInstruction(ICI, {DL, ICI})) {
+ if (Value *V = simplifyInstruction(ICI, {DL, ICI})) {
ICI->replaceAllUsesWith(V);
ICI->eraseFromParent();
}
@@ -4812,8 +5190,9 @@ static void createUnreachableSwitchDefault(SwitchInst *Switch,
}
}
-/// Turn a switch with two reachable destinations into an integer range
-/// comparison and branch.
+/// Turn a switch into an integer range comparison and branch.
+/// Switches with more than 2 destinations are ignored.
+/// Switches with 1 destination are also ignored.
bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI,
IRBuilder<> &Builder) {
assert(SI->getNumCases() > 1 && "Degenerate switch?");
@@ -4845,6 +5224,8 @@ bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI,
}
return false; // More than two destinations.
}
+ if (!DestB)
+ return false; // All destinations are the same and the default is unreachable
assert(DestA && DestB &&
"Single-destination switch should have been folded.");
@@ -5169,11 +5550,6 @@ ConstantFold(Instruction *I, const DataLayout &DL,
return nullptr;
}
- if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
- return ConstantFoldCompareInstOperands(Cmp->getPredicate(), COps[0],
- COps[1], DL);
- }
-
return ConstantFoldInstOperands(I, COps, DL);
}
@@ -5182,7 +5558,7 @@ ConstantFold(Instruction *I, const DataLayout &DL,
/// destionations CaseDest corresponding to value CaseVal (0 for the default
/// case), of a switch instruction SI.
static bool
-GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
+getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
BasicBlock **CommonDest,
SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
const DataLayout &DL, const TargetTransformInfo &TTI) {
@@ -5253,9 +5629,9 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
// Helper function used to add CaseVal to the list of cases that generate
// Result. Returns the updated number of cases that generate this result.
-static uintptr_t MapCaseToResult(ConstantInt *CaseVal,
- SwitchCaseResultVectorTy &UniqueResults,
- Constant *Result) {
+static size_t mapCaseToResult(ConstantInt *CaseVal,
+ SwitchCaseResultVectorTy &UniqueResults,
+ Constant *Result) {
for (auto &I : UniqueResults) {
if (I.first == Result) {
I.second.push_back(CaseVal);
@@ -5271,18 +5647,19 @@ static uintptr_t MapCaseToResult(ConstantInt *CaseVal,
// results for the PHI node of the common destination block for a switch
// instruction. Returns false if multiple PHI nodes have been found or if
// there is not a common destination block for the switch.
-static bool
-InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest,
- SwitchCaseResultVectorTy &UniqueResults,
- Constant *&DefaultResult, const DataLayout &DL,
- const TargetTransformInfo &TTI,
- uintptr_t MaxUniqueResults, uintptr_t MaxCasesPerResult) {
+static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI,
+ BasicBlock *&CommonDest,
+ SwitchCaseResultVectorTy &UniqueResults,
+ Constant *&DefaultResult,
+ const DataLayout &DL,
+ const TargetTransformInfo &TTI,
+ uintptr_t MaxUniqueResults) {
for (auto &I : SI->cases()) {
ConstantInt *CaseVal = I.getCaseValue();
// Resulting value at phi nodes for this case value.
SwitchCaseResultsTy Results;
- if (!GetCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
+ if (!getCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
DL, TTI))
return false;
@@ -5291,11 +5668,11 @@ InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest,
return false;
// Add the case->result mapping to UniqueResults.
- const uintptr_t NumCasesForResult =
- MapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
+ const size_t NumCasesForResult =
+ mapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
// Early out if there are too many cases for this result.
- if (NumCasesForResult > MaxCasesPerResult)
+ if (NumCasesForResult > MaxSwitchCasesPerResult)
return false;
// Early out if there are too many unique results.
@@ -5311,7 +5688,7 @@ InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest,
// Find the default result value.
SmallVector<std::pair<PHINode *, Constant *>, 1> DefaultResults;
BasicBlock *DefaultDest = SI->getDefaultDest();
- GetCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
+ getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
DL, TTI);
// If the default value is not found abort unless the default destination
// is unreachable.
@@ -5326,48 +5703,76 @@ InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest,
// Helper function that checks if it is possible to transform a switch with only
// two cases (or two cases + default) that produces a result into a select.
-// Example:
-// switch (a) {
-// case 10: %0 = icmp eq i32 %a, 10
-// return 10; %1 = select i1 %0, i32 10, i32 4
-// case 20: ----> %2 = icmp eq i32 %a, 20
-// return 2; %3 = select i1 %2, i32 2, i32 %1
-// default:
-// return 4;
-// }
-static Value *ConvertTwoCaseSwitch(const SwitchCaseResultVectorTy &ResultVector,
- Constant *DefaultResult, Value *Condition,
- IRBuilder<> &Builder) {
+// TODO: Handle switches with more than 2 cases that map to the same result.
+static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
+ Constant *DefaultResult, Value *Condition,
+ IRBuilder<> &Builder) {
// If we are selecting between only two cases transform into a simple
// select or a two-way select if default is possible.
+ // Example:
+ // switch (a) { %0 = icmp eq i32 %a, 10
+ // case 10: return 42; %1 = select i1 %0, i32 42, i32 4
+ // case 20: return 2; ----> %2 = icmp eq i32 %a, 20
+ // default: return 4; %3 = select i1 %2, i32 2, i32 %1
+ // }
if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
ResultVector[1].second.size() == 1) {
- ConstantInt *const FirstCase = ResultVector[0].second[0];
- ConstantInt *const SecondCase = ResultVector[1].second[0];
-
- bool DefaultCanTrigger = DefaultResult;
+ ConstantInt *FirstCase = ResultVector[0].second[0];
+ ConstantInt *SecondCase = ResultVector[1].second[0];
Value *SelectValue = ResultVector[1].first;
- if (DefaultCanTrigger) {
- Value *const ValueCompare =
+ if (DefaultResult) {
+ Value *ValueCompare =
Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
DefaultResult, "switch.select");
}
- Value *const ValueCompare =
+ Value *ValueCompare =
Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
return Builder.CreateSelect(ValueCompare, ResultVector[0].first,
SelectValue, "switch.select");
}
- // Handle the degenerate case where two cases have the same value.
- if (ResultVector.size() == 1 && ResultVector[0].second.size() == 2 &&
- DefaultResult) {
- Value *Cmp1 = Builder.CreateICmpEQ(
- Condition, ResultVector[0].second[0], "switch.selectcmp.case1");
- Value *Cmp2 = Builder.CreateICmpEQ(
- Condition, ResultVector[0].second[1], "switch.selectcmp.case2");
- Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp");
- return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
+ // Handle the degenerate case where two cases have the same result value.
+ if (ResultVector.size() == 1 && DefaultResult) {
+ ArrayRef<ConstantInt *> CaseValues = ResultVector[0].second;
+ unsigned CaseCount = CaseValues.size();
+ // n bits group cases map to the same result:
+ // case 0,4 -> Cond & 0b1..1011 == 0 ? result : default
+ // case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default
+ // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
+ if (isPowerOf2_32(CaseCount)) {
+ ConstantInt *MinCaseVal = CaseValues[0];
+ // Find mininal value.
+ for (auto Case : CaseValues)
+ if (Case->getValue().slt(MinCaseVal->getValue()))
+ MinCaseVal = Case;
+
+ // Mark the bits case number touched.
+ APInt BitMask = APInt::getZero(MinCaseVal->getBitWidth());
+ for (auto Case : CaseValues)
+ BitMask |= (Case->getValue() - MinCaseVal->getValue());
+
+ // Check if cases with the same result can cover all number
+ // in touched bits.
+ if (BitMask.countPopulation() == Log2_32(CaseCount)) {
+ if (!MinCaseVal->isNullValue())
+ Condition = Builder.CreateSub(Condition, MinCaseVal);
+ Value *And = Builder.CreateAnd(Condition, ~BitMask, "switch.and");
+ Value *Cmp = Builder.CreateICmpEQ(
+ And, Constant::getNullValue(And->getType()), "switch.selectcmp");
+ return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
+ }
+ }
+
+ // Handle the degenerate case where two cases have the same value.
+ if (CaseValues.size() == 2) {
+ Value *Cmp1 = Builder.CreateICmpEQ(Condition, CaseValues[0],
+ "switch.selectcmp.case1");
+ Value *Cmp2 = Builder.CreateICmpEQ(Condition, CaseValues[1],
+ "switch.selectcmp.case2");
+ Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp");
+ return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
+ }
}
return nullptr;
@@ -5375,10 +5780,10 @@ static Value *ConvertTwoCaseSwitch(const SwitchCaseResultVectorTy &ResultVector,
// Helper function to cleanup a switch instruction that has been converted into
// a select, fixing up PHI nodes and basic blocks.
-static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI,
- Value *SelectValue,
- IRBuilder<> &Builder,
- DomTreeUpdater *DTU) {
+static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI,
+ Value *SelectValue,
+ IRBuilder<> &Builder,
+ DomTreeUpdater *DTU) {
std::vector<DominatorTree::UpdateType> Updates;
BasicBlock *SelectBB = SI->getParent();
@@ -5409,33 +5814,31 @@ static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI,
DTU->applyUpdates(Updates);
}
-/// If the switch is only used to initialize one or more
-/// phi nodes in a common successor block with only two different
-/// constant values, replace the switch with select.
-static bool switchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
- DomTreeUpdater *DTU, const DataLayout &DL,
- const TargetTransformInfo &TTI) {
+/// If a switch is only used to initialize one or more phi nodes in a common
+/// successor block with only two different constant values, try to replace the
+/// switch with a select. Returns true if the fold was made.
+static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
+ DomTreeUpdater *DTU, const DataLayout &DL,
+ const TargetTransformInfo &TTI) {
Value *const Cond = SI->getCondition();
PHINode *PHI = nullptr;
BasicBlock *CommonDest = nullptr;
Constant *DefaultResult;
SwitchCaseResultVectorTy UniqueResults;
// Collect all the cases that will deliver the same value from the switch.
- if (!InitializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
- DL, TTI, /*MaxUniqueResults*/2,
- /*MaxCasesPerResult*/2))
+ if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
+ DL, TTI, /*MaxUniqueResults*/ 2))
return false;
- assert(PHI != nullptr && "PHI for value select not found");
+ assert(PHI != nullptr && "PHI for value select not found");
Builder.SetInsertPoint(SI);
Value *SelectValue =
- ConvertTwoCaseSwitch(UniqueResults, DefaultResult, Cond, Builder);
- if (SelectValue) {
- RemoveSwitchAfterSelectConversion(SI, PHI, SelectValue, Builder, DTU);
- return true;
- }
- // The switch couldn't be converted into a select.
- return false;
+ foldSwitchToSelect(UniqueResults, DefaultResult, Cond, Builder);
+ if (!SelectValue)
+ return false;
+
+ removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
+ return true;
}
namespace {
@@ -5655,7 +6058,7 @@ Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
IntegerType *IT = cast<IntegerType>(Index->getType());
uint64_t TableSize =
Array->getInitializer()->getType()->getArrayNumElements();
- if (TableSize > (1ULL << (IT->getBitWidth() - 1)))
+ if (TableSize > (1ULL << std::min(IT->getBitWidth() - 1, 63u)))
Index = Builder.CreateZExt(
Index, IntegerType::get(IT->getContext(), IT->getBitWidth() + 1),
"switch.tableidx.zext");
@@ -5707,6 +6110,27 @@ static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI,
DL.fitsInLegalInteger(IT->getBitWidth());
}
+static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) {
+ // 40% is the default density for building a jump table in optsize/minsize
+ // mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this
+ // function was based on.
+ const uint64_t MinDensity = 40;
+
+ if (CaseRange >= UINT64_MAX / 100)
+ return false; // Avoid multiplication overflows below.
+
+ return NumCases * 100 >= CaseRange * MinDensity;
+}
+
+static bool isSwitchDense(ArrayRef<int64_t> Values) {
+ uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
+ uint64_t Range = Diff + 1;
+ if (Range < Diff)
+ return false; // Overflow.
+
+ return isSwitchDense(Values.size(), Range);
+}
+
/// Determine whether a lookup table should be built for this switch, based on
/// the number of cases, size of the table, and the types of the results.
// TODO: We could support larger than legal types by limiting based on the
@@ -5716,8 +6140,8 @@ static bool
ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize,
const TargetTransformInfo &TTI, const DataLayout &DL,
const SmallDenseMap<PHINode *, Type *> &ResultTypes) {
- if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10)
- return false; // TableSize overflowed, or mul below might overflow.
+ if (SI->getNumCases() > TableSize)
+ return false; // TableSize overflowed.
bool AllTablesFitInRegister = true;
bool HasIllegalType = false;
@@ -5747,10 +6171,7 @@ ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize,
if (HasIllegalType)
return false;
- // The table density should be at least 40%. This is the same criterion as for
- // jump tables, see SelectionDAGBuilder::handleJTSwitchCase.
- // FIXME: Find the best cut-off.
- return SI->getNumCases() * 10 >= TableSize * 4;
+ return isSwitchDense(SI->getNumCases(), TableSize);
}
/// Try to reuse the switch table index compare. Following pattern:
@@ -5888,7 +6309,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// Resulting value at phi nodes for this case value.
using ResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
ResultsTy Results;
- if (!GetCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
+ if (!getCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
Results, DL, TTI))
return false;
@@ -5916,7 +6337,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// or a bitmask that fits in a register.
SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
bool HasDefaultResults =
- GetCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
+ getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
DefaultResultsList, DL, TTI);
bool NeedMask = (TableHasHoles && !HasDefaultResults);
@@ -6086,17 +6507,6 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
return true;
}
-static bool isSwitchDense(ArrayRef<int64_t> Values) {
- // See also SelectionDAGBuilder::isDense(), which this function was based on.
- uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
- uint64_t Range = Diff + 1;
- uint64_t NumCases = Values.size();
- // 40% is the default density for building a jump table in optsize/minsize mode.
- uint64_t MinDensity = 40;
-
- return NumCases * 100 >= Range * MinDensity;
-}
-
/// Try to transform a switch that has "holes" in it to a contiguous sequence
/// of cases.
///
@@ -6211,14 +6621,16 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
}
// Try to transform the switch into an icmp and a branch.
- if (TurnSwitchRangeIntoICmp(SI, Builder))
+ // The conversion from switch to comparison may lose information on
+ // impossible switch values, so disable it early in the pipeline.
+ if (Options.ConvertSwitchRangeToICmp && TurnSwitchRangeIntoICmp(SI, Builder))
return requestResimplify();
// Remove unreachable cases.
if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL))
return requestResimplify();
- if (switchToSelect(SI, Builder, DTU, DL, TTI))
+ if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
return requestResimplify();
if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI))
@@ -6521,12 +6933,11 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
return requestResimplify();
}
- // If this is a branch on a phi node in the current block, thread control
- // through this block if any PHI node entries are constants.
- if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
- if (PN->getParent() == BI->getParent())
- if (FoldCondBranchOnPHI(BI, DTU, DL, Options.AC))
- return requestResimplify();
+ // If this is a branch on something for which we know the constant value in
+ // predecessors (e.g. a phi node in the current block), thread control
+ // through this block.
+ if (FoldCondBranchOnValueKnownInPredecessor(BI, DTU, DL, Options.AC))
+ return requestResimplify();
// Scan predecessor blocks for conditional branches.
for (BasicBlock *Pred : predecessors(BB))
@@ -6725,7 +7136,8 @@ bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
return true;
if (SinkCommon && Options.SinkCommonInsts)
- if (SinkCommonCodeFromPredecessors(BB, DTU)) {
+ if (SinkCommonCodeFromPredecessors(BB, DTU) ||
+ MergeCompatibleInvokes(BB, DTU)) {
// SinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
// so we may now how duplicate PHI's.
// Let's rerun EliminateDuplicatePHINodes() first,
diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index 5b7fd4349c6c..dbef1ff2e739 100644
--- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -13,11 +13,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
@@ -58,7 +56,7 @@ namespace {
SCEVExpander &Rewriter;
SmallVectorImpl<WeakTrackingVH> &DeadInsts;
- bool Changed;
+ bool Changed = false;
public:
SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, DominatorTree *DT,
@@ -66,7 +64,7 @@ namespace {
SCEVExpander &Rewriter,
SmallVectorImpl<WeakTrackingVH> &Dead)
: L(Loop), LI(LI), SE(SE), DT(DT), TTI(TTI), Rewriter(Rewriter),
- DeadInsts(Dead), Changed(false) {
+ DeadInsts(Dead) {
assert(LI && "IV simplification requires LoopInfo");
}
@@ -161,11 +159,12 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand)
D = ConstantInt::get(UseInst->getContext(),
APInt::getOneBitSet(BitWidth, D->getZExtValue()));
}
- FoldedExpr = SE->getUDivExpr(SE->getSCEV(IVSrc), SE->getSCEV(D));
+ const auto *LHS = SE->getSCEV(IVSrc);
+ const auto *RHS = SE->getSCEV(D);
+ FoldedExpr = SE->getUDivExpr(LHS, RHS);
// We might have 'exact' flag set at this point which will no longer be
// correct after we make the replacement.
- if (UseInst->isExact() &&
- SE->getSCEV(IVSrc) != SE->getMulExpr(FoldedExpr, SE->getSCEV(D)))
+ if (UseInst->isExact() && LHS != SE->getMulExpr(FoldedExpr, RHS))
MustDropExactFlag = true;
}
// We have something that might fold it's operand. Compare SCEVs.
@@ -872,6 +871,7 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
Instruction *IVOperand = UseOper.second;
for (unsigned N = 0; IVOperand; ++N) {
assert(N <= Simplified.size() && "runaway iteration");
+ (void) N;
Value *NewOper = foldIVUser(UseInst, IVOperand);
if (!NewOper)
@@ -1757,10 +1757,6 @@ Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU, SCEVExpander &Rewri
truncateIVUse(DU, DT, LI);
return nullptr;
}
- // Assume block terminators cannot evaluate to a recurrence. We can't to
- // insert a Trunc after a terminator if there happens to be a critical edge.
- assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
- "SCEV is not expected to evaluate a block terminator");
// Reuse the IV increment that SCEVExpander created as long as it dominates
// NarrowUse.
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index e02d02a05752..f4306bb43dfd 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -14,28 +14,23 @@
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/Triple.h"
-#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
-#include "llvm/Analysis/ProfileSummaryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Analysis/CaptureTracking.h"
-#include "llvm/Analysis/Loads.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SizeOpts.h"
using namespace llvm;
@@ -206,6 +201,11 @@ static Value *copyFlags(const CallInst &Old, Value *New) {
return New;
}
+// Helper to avoid truncating the length if size_t is 32-bits.
+static StringRef substr(StringRef Str, uint64_t Len) {
+ return Len >= Str.size() ? Str : Str.substr(0, Len);
+}
+
//===----------------------------------------------------------------------===//
// String and Memory Library Call Optimizations
//===----------------------------------------------------------------------===//
@@ -242,7 +242,7 @@ Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len,
// Now that we have the destination's length, we must index into the
// destination's pointer to get the actual memcpy destination (end of
// the string .. we're concatenating).
- Value *CpyDst = B.CreateGEP(B.getInt8Ty(), Dst, DstLen, "endptr");
+ Value *CpyDst = B.CreateInBoundsGEP(B.getInt8Ty(), Dst, DstLen, "endptr");
// We have enough information to now generate the memcpy call to do the
// concatenation for us. Make a memcpy to copy the nul byte with align = 1.
@@ -326,7 +326,7 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilderBase &B) {
if (!getConstantStringInfo(SrcStr, Str)) {
if (CharC->isZero()) // strchr(p, 0) -> p + strlen(p)
if (Value *StrLen = emitStrLen(SrcStr, B, DL, TLI))
- return B.CreateGEP(B.getInt8Ty(), SrcStr, StrLen, "strchr");
+ return B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr, StrLen, "strchr");
return nullptr;
}
@@ -339,35 +339,29 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilderBase &B) {
return Constant::getNullValue(CI->getType());
// strchr(s+n,c) -> gep(s+n+i,c)
- return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "strchr");
+ return B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "strchr");
}
Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilderBase &B) {
Value *SrcStr = CI->getArgOperand(0);
- ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ Value *CharVal = CI->getArgOperand(1);
+ ConstantInt *CharC = dyn_cast<ConstantInt>(CharVal);
annotateNonNullNoUndefBasedOnAccess(CI, 0);
- // Cannot fold anything if we're not looking for a constant.
- if (!CharC)
- return nullptr;
-
StringRef Str;
if (!getConstantStringInfo(SrcStr, Str)) {
// strrchr(s, 0) -> strchr(s, 0)
- if (CharC->isZero())
+ if (CharC && CharC->isZero())
return copyFlags(*CI, emitStrChr(SrcStr, '\0', B, TLI));
return nullptr;
}
- // Compute the offset.
- size_t I = (0xFF & CharC->getSExtValue()) == 0
- ? Str.size()
- : Str.rfind(CharC->getSExtValue());
- if (I == StringRef::npos) // Didn't find the char. Return null.
- return Constant::getNullValue(CI->getType());
-
- // strrchr(s+n,c) -> gep(s+n+i,c)
- return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "strrchr");
+ // Try to expand strrchr to the memrchr nonstandard extension if it's
+ // available, or simply fail otherwise.
+ uint64_t NBytes = Str.size() + 1; // Include the terminating nul.
+ Type *IntPtrType = DL.getIntPtrType(CI->getContext());
+ Value *Size = ConstantInt::get(IntPtrType, NBytes);
+ return copyFlags(*CI, emitMemRChr(SrcStr, CharVal, Size, B, DL, TLI));
}
Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilderBase &B) {
@@ -428,6 +422,12 @@ Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilderBase &B) {
return nullptr;
}
+// Optimize a memcmp or, when StrNCmp is true, strncmp call CI with constant
+// arrays LHS and RHS and nonconstant Size.
+static Value *optimizeMemCmpVarSize(CallInst *CI, Value *LHS, Value *RHS,
+ Value *Size, bool StrNCmp,
+ IRBuilderBase &B, const DataLayout &DL);
+
Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilderBase &B) {
Value *Str1P = CI->getArgOperand(0);
Value *Str2P = CI->getArgOperand(1);
@@ -442,7 +442,7 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilderBase &B) {
if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(Size))
Length = LengthArg->getZExtValue();
else
- return nullptr;
+ return optimizeMemCmpVarSize(CI, Str1P, Str2P, Size, true, B, DL);
if (Length == 0) // strncmp(x,y,0) -> 0
return ConstantInt::get(CI->getType(), 0);
@@ -456,8 +456,9 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilderBase &B) {
// strncmp(x, y) -> cnst (if both x and y are constant strings)
if (HasStr1 && HasStr2) {
- StringRef SubStr1 = Str1.substr(0, Length);
- StringRef SubStr2 = Str2.substr(0, Length);
+ // Avoid truncating the 64-bit Length to 32 bits in ILP32.
+ StringRef SubStr1 = substr(Str1, Length);
+ StringRef SubStr2 = substr(Str2, Length);
return ConstantInt::get(CI->getType(), SubStr1.compare(SubStr2));
}
@@ -557,8 +558,8 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilderBase &B) {
Type *PT = Callee->getFunctionType()->getParamType(0);
Value *LenV = ConstantInt::get(DL.getIntPtrType(PT), Len);
- Value *DstEnd = B.CreateGEP(B.getInt8Ty(), Dst,
- ConstantInt::get(DL.getIntPtrType(PT), Len - 1));
+ Value *DstEnd = B.CreateInBoundsGEP(
+ B.getInt8Ty(), Dst, ConstantInt::get(DL.getIntPtrType(PT), Len - 1));
// We have enough information to now generate the memcpy call to do the
// copy for us. Make a memcpy to copy the nul byte with align = 1.
@@ -634,12 +635,51 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilderBase &B) {
}
Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilderBase &B,
- unsigned CharSize) {
+ unsigned CharSize,
+ Value *Bound) {
Value *Src = CI->getArgOperand(0);
+ Type *CharTy = B.getIntNTy(CharSize);
+
+ if (isOnlyUsedInZeroEqualityComparison(CI) &&
+ (!Bound || isKnownNonZero(Bound, DL))) {
+ // Fold strlen:
+ // strlen(x) != 0 --> *x != 0
+ // strlen(x) == 0 --> *x == 0
+ // and likewise strnlen with constant N > 0:
+ // strnlen(x, N) != 0 --> *x != 0
+ // strnlen(x, N) == 0 --> *x == 0
+ return B.CreateZExt(B.CreateLoad(CharTy, Src, "char0"),
+ CI->getType());
+ }
+
+ if (Bound) {
+ if (ConstantInt *BoundCst = dyn_cast<ConstantInt>(Bound)) {
+ if (BoundCst->isZero())
+ // Fold strnlen(s, 0) -> 0 for any s, constant or otherwise.
+ return ConstantInt::get(CI->getType(), 0);
+
+ if (BoundCst->isOne()) {
+ // Fold strnlen(s, 1) -> *s ? 1 : 0 for any s.
+ Value *CharVal = B.CreateLoad(CharTy, Src, "strnlen.char0");
+ Value *ZeroChar = ConstantInt::get(CharTy, 0);
+ Value *Cmp = B.CreateICmpNE(CharVal, ZeroChar, "strnlen.char0cmp");
+ return B.CreateZExt(Cmp, CI->getType());
+ }
+ }
+ }
+
+ if (uint64_t Len = GetStringLength(Src, CharSize)) {
+ Value *LenC = ConstantInt::get(CI->getType(), Len - 1);
+ // Fold strlen("xyz") -> 3 and strnlen("xyz", 2) -> 2
+ // and strnlen("xyz", Bound) -> min(3, Bound) for nonconstant Bound.
+ if (Bound)
+ return B.CreateBinaryIntrinsic(Intrinsic::umin, LenC, Bound);
+ return LenC;
+ }
- // Constant folding: strlen("xyz") -> 3
- if (uint64_t Len = GetStringLength(Src, CharSize))
- return ConstantInt::get(CI->getType(), Len - 1);
+ if (Bound)
+ // Punt for strnlen for now.
+ return nullptr;
// If s is a constant pointer pointing to a string literal, we can fold
// strlen(s + x) to strlen(s) - x, when x is known to be in the range
@@ -650,6 +690,7 @@ Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilderBase &B,
// very useful because calling strlen for a pointer of other types is
// very uncommon.
if (GEPOperator *GEP = dyn_cast<GEPOperator>(Src)) {
+ // TODO: Handle subobjects.
if (!isGEPBasedOnPointerToString(GEP, CharSize))
return nullptr;
@@ -674,22 +715,15 @@ Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilderBase &B,
Value *Offset = GEP->getOperand(2);
KnownBits Known = computeKnownBits(Offset, DL, 0, nullptr, CI, nullptr);
- Known.Zero.flipAllBits();
uint64_t ArrSize =
cast<ArrayType>(GEP->getSourceElementType())->getNumElements();
- // KnownZero's bits are flipped, so zeros in KnownZero now represent
- // bits known to be zeros in Offset, and ones in KnowZero represent
- // bits unknown in Offset. Therefore, Offset is known to be in range
- // [0, NullTermIdx] when the flipped KnownZero is non-negative and
- // unsigned-less-than NullTermIdx.
- //
// If Offset is not provably in the range [0, NullTermIdx], we can still
// optimize if we can prove that the program has undefined behavior when
// Offset is outside that range. That is the case when GEP->getOperand(0)
// is a pointer to an object whose memory extent is NullTermIdx+1.
- if ((Known.Zero.isNonNegative() && Known.Zero.ule(NullTermIdx)) ||
- (GEP->isInBounds() && isa<GlobalVariable>(GEP->getOperand(0)) &&
+ if ((Known.isNonNegative() && Known.getMaxValue().ule(NullTermIdx)) ||
+ (isa<GlobalVariable>(GEP->getOperand(0)) &&
NullTermIdx == ArrSize - 1)) {
Offset = B.CreateSExtOrTrunc(Offset, CI->getType());
return B.CreateSub(ConstantInt::get(CI->getType(), NullTermIdx),
@@ -713,12 +747,6 @@ Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilderBase &B,
}
}
- // strlen(x) != 0 --> *x != 0
- // strlen(x) == 0 --> *x == 0
- if (isOnlyUsedInZeroEqualityComparison(CI))
- return B.CreateZExt(B.CreateLoad(B.getIntNTy(CharSize), Src, "strlenfirst"),
- CI->getType());
-
return nullptr;
}
@@ -729,6 +757,16 @@ Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilderBase &B) {
return nullptr;
}
+Value *LibCallSimplifier::optimizeStrNLen(CallInst *CI, IRBuilderBase &B) {
+ Value *Bound = CI->getArgOperand(1);
+ if (Value *V = optimizeStringLength(CI, B, 8, Bound))
+ return V;
+
+ if (isKnownNonZero(Bound, DL))
+ annotateNonNullNoUndefBasedOnAccess(CI, 0);
+ return nullptr;
+}
+
Value *LibCallSimplifier::optimizeWcslen(CallInst *CI, IRBuilderBase &B) {
Module &M = *CI->getModule();
unsigned WCharSize = TLI->getWCharSize(M) * 8;
@@ -755,8 +793,8 @@ Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilderBase &B) {
if (I == StringRef::npos) // No match.
return Constant::getNullValue(CI->getType());
- return B.CreateGEP(B.getInt8Ty(), CI->getArgOperand(0), B.getInt64(I),
- "strpbrk");
+ return B.CreateInBoundsGEP(B.getInt8Ty(), CI->getArgOperand(0),
+ B.getInt64(I), "strpbrk");
}
// strpbrk(s, "a") -> strchr(s, 'a')
@@ -880,35 +918,190 @@ Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilderBase &B) {
}
Value *LibCallSimplifier::optimizeMemRChr(CallInst *CI, IRBuilderBase &B) {
- if (isKnownNonZero(CI->getOperand(2), DL))
- annotateNonNullNoUndefBasedOnAccess(CI, 0);
- return nullptr;
+ Value *SrcStr = CI->getArgOperand(0);
+ Value *Size = CI->getArgOperand(2);
+ annotateNonNullAndDereferenceable(CI, 0, Size, DL);
+ Value *CharVal = CI->getArgOperand(1);
+ ConstantInt *LenC = dyn_cast<ConstantInt>(Size);
+ Value *NullPtr = Constant::getNullValue(CI->getType());
+
+ if (LenC) {
+ if (LenC->isZero())
+ // Fold memrchr(x, y, 0) --> null.
+ return NullPtr;
+
+ if (LenC->isOne()) {
+ // Fold memrchr(x, y, 1) --> *x == y ? x : null for any x and y,
+ // constant or otherwise.
+ Value *Val = B.CreateLoad(B.getInt8Ty(), SrcStr, "memrchr.char0");
+ // Slice off the character's high end bits.
+ CharVal = B.CreateTrunc(CharVal, B.getInt8Ty());
+ Value *Cmp = B.CreateICmpEQ(Val, CharVal, "memrchr.char0cmp");
+ return B.CreateSelect(Cmp, SrcStr, NullPtr, "memrchr.sel");
+ }
+ }
+
+ StringRef Str;
+ if (!getConstantStringInfo(SrcStr, Str, 0, /*TrimAtNul=*/false))
+ return nullptr;
+
+ if (Str.size() == 0)
+ // If the array is empty fold memrchr(A, C, N) to null for any value
+ // of C and N on the basis that the only valid value of N is zero
+ // (otherwise the call is undefined).
+ return NullPtr;
+
+ uint64_t EndOff = UINT64_MAX;
+ if (LenC) {
+ EndOff = LenC->getZExtValue();
+ if (Str.size() < EndOff)
+ // Punt out-of-bounds accesses to sanitizers and/or libc.
+ return nullptr;
+ }
+
+ if (ConstantInt *CharC = dyn_cast<ConstantInt>(CharVal)) {
+ // Fold memrchr(S, C, N) for a constant C.
+ size_t Pos = Str.rfind(CharC->getZExtValue(), EndOff);
+ if (Pos == StringRef::npos)
+ // When the character is not in the source array fold the result
+ // to null regardless of Size.
+ return NullPtr;
+
+ if (LenC)
+ // Fold memrchr(s, c, N) --> s + Pos for constant N > Pos.
+ return B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr, B.getInt64(Pos));
+
+ if (Str.find(Str[Pos]) == Pos) {
+ // When there is just a single occurrence of C in S, i.e., the one
+ // in Str[Pos], fold
+ // memrchr(s, c, N) --> N <= Pos ? null : s + Pos
+ // for nonconstant N.
+ Value *Cmp = B.CreateICmpULE(Size, ConstantInt::get(Size->getType(), Pos),
+ "memrchr.cmp");
+ Value *SrcPlus = B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr,
+ B.getInt64(Pos), "memrchr.ptr_plus");
+ return B.CreateSelect(Cmp, NullPtr, SrcPlus, "memrchr.sel");
+ }
+ }
+
+ // Truncate the string to search at most EndOff characters.
+ Str = Str.substr(0, EndOff);
+ if (Str.find_first_not_of(Str[0]) != StringRef::npos)
+ return nullptr;
+
+ // If the source array consists of all equal characters, then for any
+ // C and N (whether in bounds or not), fold memrchr(S, C, N) to
+ // N != 0 && *S == C ? S + N - 1 : null
+ Type *SizeTy = Size->getType();
+ Type *Int8Ty = B.getInt8Ty();
+ Value *NNeZ = B.CreateICmpNE(Size, ConstantInt::get(SizeTy, 0));
+ // Slice off the sought character's high end bits.
+ CharVal = B.CreateTrunc(CharVal, Int8Ty);
+ Value *CEqS0 = B.CreateICmpEQ(ConstantInt::get(Int8Ty, Str[0]), CharVal);
+ Value *And = B.CreateLogicalAnd(NNeZ, CEqS0);
+ Value *SizeM1 = B.CreateSub(Size, ConstantInt::get(SizeTy, 1));
+ Value *SrcPlus =
+ B.CreateInBoundsGEP(Int8Ty, SrcStr, SizeM1, "memrchr.ptr_plus");
+ return B.CreateSelect(And, SrcPlus, NullPtr, "memrchr.sel");
}
Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) {
Value *SrcStr = CI->getArgOperand(0);
Value *Size = CI->getArgOperand(2);
- annotateNonNullAndDereferenceable(CI, 0, Size, DL);
- ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ if (isKnownNonZero(Size, DL))
+ annotateNonNullNoUndefBasedOnAccess(CI, 0);
+
+ Value *CharVal = CI->getArgOperand(1);
+ ConstantInt *CharC = dyn_cast<ConstantInt>(CharVal);
ConstantInt *LenC = dyn_cast<ConstantInt>(Size);
+ Value *NullPtr = Constant::getNullValue(CI->getType());
// memchr(x, y, 0) -> null
if (LenC) {
if (LenC->isZero())
- return Constant::getNullValue(CI->getType());
- } else {
- // From now on we need at least constant length and string.
- return nullptr;
+ return NullPtr;
+
+ if (LenC->isOne()) {
+ // Fold memchr(x, y, 1) --> *x == y ? x : null for any x and y,
+ // constant or otherwise.
+ Value *Val = B.CreateLoad(B.getInt8Ty(), SrcStr, "memchr.char0");
+ // Slice off the character's high end bits.
+ CharVal = B.CreateTrunc(CharVal, B.getInt8Ty());
+ Value *Cmp = B.CreateICmpEQ(Val, CharVal, "memchr.char0cmp");
+ return B.CreateSelect(Cmp, SrcStr, NullPtr, "memchr.sel");
+ }
}
StringRef Str;
if (!getConstantStringInfo(SrcStr, Str, 0, /*TrimAtNul=*/false))
return nullptr;
- // Truncate the string to LenC. If Str is smaller than LenC we will still only
- // scan the string, as reading past the end of it is undefined and we can just
- // return null if we don't find the char.
- Str = Str.substr(0, LenC->getZExtValue());
+ if (CharC) {
+ size_t Pos = Str.find(CharC->getZExtValue());
+ if (Pos == StringRef::npos)
+ // When the character is not in the source array fold the result
+ // to null regardless of Size.
+ return NullPtr;
+
+ // Fold memchr(s, c, n) -> n <= Pos ? null : s + Pos
+ // When the constant Size is less than or equal to the character
+ // position also fold the result to null.
+ Value *Cmp = B.CreateICmpULE(Size, ConstantInt::get(Size->getType(), Pos),
+ "memchr.cmp");
+ Value *SrcPlus = B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr, B.getInt64(Pos),
+ "memchr.ptr");
+ return B.CreateSelect(Cmp, NullPtr, SrcPlus);
+ }
+
+ if (Str.size() == 0)
+ // If the array is empty fold memchr(A, C, N) to null for any value
+ // of C and N on the basis that the only valid value of N is zero
+ // (otherwise the call is undefined).
+ return NullPtr;
+
+ if (LenC)
+ Str = substr(Str, LenC->getZExtValue());
+
+ size_t Pos = Str.find_first_not_of(Str[0]);
+ if (Pos == StringRef::npos
+ || Str.find_first_not_of(Str[Pos], Pos) == StringRef::npos) {
+ // If the source array consists of at most two consecutive sequences
+ // of the same characters, then for any C and N (whether in bounds or
+ // not), fold memchr(S, C, N) to
+ // N != 0 && *S == C ? S : null
+ // or for the two sequences to:
+ // N != 0 && *S == C ? S : (N > Pos && S[Pos] == C ? S + Pos : null)
+ // ^Sel2 ^Sel1 are denoted above.
+ // The latter makes it also possible to fold strchr() calls with strings
+ // of the same characters.
+ Type *SizeTy = Size->getType();
+ Type *Int8Ty = B.getInt8Ty();
+
+ // Slice off the sought character's high end bits.
+ CharVal = B.CreateTrunc(CharVal, Int8Ty);
+
+ Value *Sel1 = NullPtr;
+ if (Pos != StringRef::npos) {
+ // Handle two consecutive sequences of the same characters.
+ Value *PosVal = ConstantInt::get(SizeTy, Pos);
+ Value *StrPos = ConstantInt::get(Int8Ty, Str[Pos]);
+ Value *CEqSPos = B.CreateICmpEQ(CharVal, StrPos);
+ Value *NGtPos = B.CreateICmp(ICmpInst::ICMP_UGT, Size, PosVal);
+ Value *And = B.CreateAnd(CEqSPos, NGtPos);
+ Value *SrcPlus = B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr, PosVal);
+ Sel1 = B.CreateSelect(And, SrcPlus, NullPtr, "memchr.sel1");
+ }
+
+ Value *Str0 = ConstantInt::get(Int8Ty, Str[0]);
+ Value *CEqS0 = B.CreateICmpEQ(Str0, CharVal);
+ Value *NNeZ = B.CreateICmpNE(Size, ConstantInt::get(SizeTy, 0));
+ Value *And = B.CreateAnd(NNeZ, CEqS0);
+ return B.CreateSelect(And, SrcStr, Sel1, "memchr.sel2");
+ }
+
+ if (!LenC)
+ // From now on we need a constant length and constant array.
+ return nullptr;
// If the char is variable but the input str and length are not we can turn
// this memchr call into a simple bit field test. Of course this only works
@@ -920,60 +1113,93 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) {
// memchr("\r\n", C, 2) != nullptr -> (1 << C & ((1 << '\r') | (1 << '\n')))
// != 0
// after bounds check.
- if (!CharC && !Str.empty() && isOnlyUsedInZeroEqualityComparison(CI)) {
- unsigned char Max =
- *std::max_element(reinterpret_cast<const unsigned char *>(Str.begin()),
- reinterpret_cast<const unsigned char *>(Str.end()));
-
- // Make sure the bit field we're about to create fits in a register on the
- // target.
- // FIXME: On a 64 bit architecture this prevents us from using the
- // interesting range of alpha ascii chars. We could do better by emitting
- // two bitfields or shifting the range by 64 if no lower chars are used.
- if (!DL.fitsInLegalInteger(Max + 1))
- return nullptr;
+ if (Str.empty() || !isOnlyUsedInZeroEqualityComparison(CI))
+ return nullptr;
+
+ unsigned char Max =
+ *std::max_element(reinterpret_cast<const unsigned char *>(Str.begin()),
+ reinterpret_cast<const unsigned char *>(Str.end()));
- // For the bit field use a power-of-2 type with at least 8 bits to avoid
- // creating unnecessary illegal types.
- unsigned char Width = NextPowerOf2(std::max((unsigned char)7, Max));
+ // Make sure the bit field we're about to create fits in a register on the
+ // target.
+ // FIXME: On a 64 bit architecture this prevents us from using the
+ // interesting range of alpha ascii chars. We could do better by emitting
+ // two bitfields or shifting the range by 64 if no lower chars are used.
+ if (!DL.fitsInLegalInteger(Max + 1))
+ return nullptr;
- // Now build the bit field.
- APInt Bitfield(Width, 0);
- for (char C : Str)
- Bitfield.setBit((unsigned char)C);
- Value *BitfieldC = B.getInt(Bitfield);
+ // For the bit field use a power-of-2 type with at least 8 bits to avoid
+ // creating unnecessary illegal types.
+ unsigned char Width = NextPowerOf2(std::max((unsigned char)7, Max));
- // Adjust width of "C" to the bitfield width, then mask off the high bits.
- Value *C = B.CreateZExtOrTrunc(CI->getArgOperand(1), BitfieldC->getType());
- C = B.CreateAnd(C, B.getIntN(Width, 0xFF));
+ // Now build the bit field.
+ APInt Bitfield(Width, 0);
+ for (char C : Str)
+ Bitfield.setBit((unsigned char)C);
+ Value *BitfieldC = B.getInt(Bitfield);
- // First check that the bit field access is within bounds.
- Value *Bounds = B.CreateICmp(ICmpInst::ICMP_ULT, C, B.getIntN(Width, Width),
- "memchr.bounds");
+ // Adjust width of "C" to the bitfield width, then mask off the high bits.
+ Value *C = B.CreateZExtOrTrunc(CharVal, BitfieldC->getType());
+ C = B.CreateAnd(C, B.getIntN(Width, 0xFF));
- // Create code that checks if the given bit is set in the field.
- Value *Shl = B.CreateShl(B.getIntN(Width, 1ULL), C);
- Value *Bits = B.CreateIsNotNull(B.CreateAnd(Shl, BitfieldC), "memchr.bits");
+ // First check that the bit field access is within bounds.
+ Value *Bounds = B.CreateICmp(ICmpInst::ICMP_ULT, C, B.getIntN(Width, Width),
+ "memchr.bounds");
- // Finally merge both checks and cast to pointer type. The inttoptr
- // implicitly zexts the i1 to intptr type.
- return B.CreateIntToPtr(B.CreateLogicalAnd(Bounds, Bits, "memchr"),
- CI->getType());
- }
+ // Create code that checks if the given bit is set in the field.
+ Value *Shl = B.CreateShl(B.getIntN(Width, 1ULL), C);
+ Value *Bits = B.CreateIsNotNull(B.CreateAnd(Shl, BitfieldC), "memchr.bits");
- // Check if all arguments are constants. If so, we can constant fold.
- if (!CharC)
- return nullptr;
+ // Finally merge both checks and cast to pointer type. The inttoptr
+ // implicitly zexts the i1 to intptr type.
+ return B.CreateIntToPtr(B.CreateLogicalAnd(Bounds, Bits, "memchr"),
+ CI->getType());
+}
- // Compute the offset.
- size_t I = Str.find(CharC->getSExtValue() & 0xFF);
- if (I == StringRef::npos) // Didn't find the char. memchr returns null.
+// Optimize a memcmp or, when StrNCmp is true, strncmp call CI with constant
+// arrays LHS and RHS and nonconstant Size.
+static Value *optimizeMemCmpVarSize(CallInst *CI, Value *LHS, Value *RHS,
+ Value *Size, bool StrNCmp,
+ IRBuilderBase &B, const DataLayout &DL) {
+ if (LHS == RHS) // memcmp(s,s,x) -> 0
return Constant::getNullValue(CI->getType());
- // memchr(s+n,c,l) -> gep(s+n+i,c)
- return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "memchr");
+ StringRef LStr, RStr;
+ if (!getConstantStringInfo(LHS, LStr, 0, /*TrimAtNul=*/false) ||
+ !getConstantStringInfo(RHS, RStr, 0, /*TrimAtNul=*/false))
+ return nullptr;
+
+ // If the contents of both constant arrays are known, fold a call to
+ // memcmp(A, B, N) to
+ // N <= Pos ? 0 : (A < B ? -1 : B < A ? +1 : 0)
+ // where Pos is the first mismatch between A and B, determined below.
+
+ uint64_t Pos = 0;
+ Value *Zero = ConstantInt::get(CI->getType(), 0);
+ for (uint64_t MinSize = std::min(LStr.size(), RStr.size()); ; ++Pos) {
+ if (Pos == MinSize ||
+ (StrNCmp && (LStr[Pos] == '\0' && RStr[Pos] == '\0'))) {
+ // One array is a leading part of the other of equal or greater
+ // size, or for strncmp, the arrays are equal strings.
+ // Fold the result to zero. Size is assumed to be in bounds, since
+ // otherwise the call would be undefined.
+ return Zero;
+ }
+
+ if (LStr[Pos] != RStr[Pos])
+ break;
+ }
+
+ // Normalize the result.
+ typedef unsigned char UChar;
+ int IRes = UChar(LStr[Pos]) < UChar(RStr[Pos]) ? -1 : 1;
+ Value *MaxSize = ConstantInt::get(Size->getType(), Pos);
+ Value *Cmp = B.CreateICmp(ICmpInst::ICMP_ULE, Size, MaxSize);
+ Value *Res = ConstantInt::get(CI->getType(), IRes);
+ return B.CreateSelect(Cmp, Zero, Res);
}
+// Optimize a memcmp call CI with constant size Len.
static Value *optimizeMemCmpConstantSize(CallInst *CI, Value *LHS, Value *RHS,
uint64_t Len, IRBuilderBase &B,
const DataLayout &DL) {
@@ -1028,25 +1254,6 @@ static Value *optimizeMemCmpConstantSize(CallInst *CI, Value *LHS, Value *RHS,
}
}
- // Constant folding: memcmp(x, y, Len) -> constant (all arguments are const).
- // TODO: This is limited to i8 arrays.
- StringRef LHSStr, RHSStr;
- if (getConstantStringInfo(LHS, LHSStr) &&
- getConstantStringInfo(RHS, RHSStr)) {
- // Make sure we're not reading out-of-bounds memory.
- if (Len > LHSStr.size() || Len > RHSStr.size())
- return nullptr;
- // Fold the memcmp and normalize the result. This way we get consistent
- // results across multiple platforms.
- uint64_t Ret = 0;
- int Cmp = memcmp(LHSStr.data(), RHSStr.data(), Len);
- if (Cmp < 0)
- Ret = -1;
- else if (Cmp > 0)
- Ret = 1;
- return ConstantInt::get(CI->getType(), Ret);
- }
-
return nullptr;
}
@@ -1056,33 +1263,29 @@ Value *LibCallSimplifier::optimizeMemCmpBCmpCommon(CallInst *CI,
Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1);
Value *Size = CI->getArgOperand(2);
- if (LHS == RHS) // memcmp(s,s,x) -> 0
- return Constant::getNullValue(CI->getType());
-
annotateNonNullAndDereferenceable(CI, {0, 1}, Size, DL);
- // Handle constant lengths.
+
+ if (Value *Res = optimizeMemCmpVarSize(CI, LHS, RHS, Size, false, B, DL))
+ return Res;
+
+ // Handle constant Size.
ConstantInt *LenC = dyn_cast<ConstantInt>(Size);
if (!LenC)
return nullptr;
- // memcmp(d,s,0) -> 0
- if (LenC->getZExtValue() == 0)
- return Constant::getNullValue(CI->getType());
-
- if (Value *Res =
- optimizeMemCmpConstantSize(CI, LHS, RHS, LenC->getZExtValue(), B, DL))
- return Res;
- return nullptr;
+ return optimizeMemCmpConstantSize(CI, LHS, RHS, LenC->getZExtValue(), B, DL);
}
Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilderBase &B) {
+ Module *M = CI->getModule();
if (Value *V = optimizeMemCmpBCmpCommon(CI, B))
return V;
// memcmp(x, y, Len) == 0 -> bcmp(x, y, Len) == 0
// bcmp can be more efficient than memcmp because it only has to know that
// there is a difference, not how different one is to the other.
- if (TLI->has(LibFunc_bcmp) && isOnlyUsedInZeroEqualityComparison(CI)) {
+ if (isLibFuncEmittable(M, TLI, LibFunc_bcmp) &&
+ isOnlyUsedInZeroEqualityComparison(CI)) {
Value *LHS = CI->getArgOperand(0);
Value *RHS = CI->getArgOperand(1);
Value *Size = CI->getArgOperand(2);
@@ -1125,6 +1328,7 @@ Value *LibCallSimplifier::optimizeMemCCpy(CallInst *CI, IRBuilderBase &B) {
return Constant::getNullValue(CI->getType());
if (!getConstantStringInfo(Src, SrcStr, /*Offset=*/0,
/*TrimAtNul=*/false) ||
+ // TODO: Handle zeroinitializer.
!StopChar)
return nullptr;
} else {
@@ -1246,7 +1450,8 @@ static Value *valueHasFloatPrecision(Value *Val) {
/// Shrink double -> float functions.
static Value *optimizeDoubleFP(CallInst *CI, IRBuilderBase &B,
- bool isBinary, bool isPrecise = false) {
+ bool isBinary, const TargetLibraryInfo *TLI,
+ bool isPrecise = false) {
Function *CalleeFn = CI->getCalledFunction();
if (!CI->getType()->isDoubleTy() || !CalleeFn)
return nullptr;
@@ -1296,22 +1501,25 @@ static Value *optimizeDoubleFP(CallInst *CI, IRBuilderBase &B,
R = isBinary ? B.CreateCall(Fn, V) : B.CreateCall(Fn, V[0]);
} else {
AttributeList CalleeAttrs = CalleeFn->getAttributes();
- R = isBinary ? emitBinaryFloatFnCall(V[0], V[1], CalleeName, B, CalleeAttrs)
- : emitUnaryFloatFnCall(V[0], CalleeName, B, CalleeAttrs);
+ R = isBinary ? emitBinaryFloatFnCall(V[0], V[1], TLI, CalleeName, B,
+ CalleeAttrs)
+ : emitUnaryFloatFnCall(V[0], TLI, CalleeName, B, CalleeAttrs);
}
return B.CreateFPExt(R, B.getDoubleTy());
}
/// Shrink double -> float for unary functions.
static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilderBase &B,
+ const TargetLibraryInfo *TLI,
bool isPrecise = false) {
- return optimizeDoubleFP(CI, B, false, isPrecise);
+ return optimizeDoubleFP(CI, B, false, TLI, isPrecise);
}
/// Shrink double -> float for binary functions.
static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilderBase &B,
+ const TargetLibraryInfo *TLI,
bool isPrecise = false) {
- return optimizeDoubleFP(CI, B, true, isPrecise);
+ return optimizeDoubleFP(CI, B, true, TLI, isPrecise);
}
// cabs(z) -> sqrt((creal(z)*creal(z)) + (cimag(z)*cimag(z)))
@@ -1427,6 +1635,7 @@ static Value *getIntToFPVal(Value *I2F, IRBuilderBase &B, unsigned DstWidth) {
/// ldexp(1.0, x) for pow(2.0, itofp(x)); exp2(n * x) for pow(2.0 ** n, x);
/// exp10(x) for pow(10.0, x); exp2(log2(n) * x) for pow(n, x).
Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) {
+ Module *M = Pow->getModule();
Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
AttributeList Attrs; // Attributes are only meaningful on the original call
Module *Mod = Pow->getModule();
@@ -1454,7 +1663,8 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) {
Function *CalleeFn = BaseFn->getCalledFunction();
if (CalleeFn &&
- TLI->getLibFunc(CalleeFn->getName(), LibFn) && TLI->has(LibFn)) {
+ TLI->getLibFunc(CalleeFn->getName(), LibFn) &&
+ isLibFuncEmittable(M, TLI, LibFn)) {
StringRef ExpName;
Intrinsic::ID ID;
Value *ExpFn;
@@ -1506,7 +1716,7 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) {
// pow(2.0, itofp(x)) -> ldexp(1.0, x)
if (match(Base, m_SpecificFP(2.0)) &&
(isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo)) &&
- hasFloatFn(TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) {
+ hasFloatFn(M, TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) {
if (Value *ExpoI = getIntToFPVal(Expo, B, TLI->getIntSize()))
return copyFlags(*Pow,
emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), ExpoI,
@@ -1515,7 +1725,7 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) {
}
// pow(2.0 ** n, x) -> exp2(n * x)
- if (hasFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f, LibFunc_exp2l)) {
+ if (hasFloatFn(M, TLI, Ty, LibFunc_exp2, LibFunc_exp2f, LibFunc_exp2l)) {
APFloat BaseR = APFloat(1.0);
BaseR.convert(BaseF->getSemantics(), APFloat::rmTowardZero, &Ignored);
BaseR = BaseR / *BaseF;
@@ -1542,7 +1752,7 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) {
// pow(10.0, x) -> exp10(x)
// TODO: There is no exp10() intrinsic yet, but some day there shall be one.
if (match(Base, m_SpecificFP(10.0)) &&
- hasFloatFn(TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l))
+ hasFloatFn(M, TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l))
return copyFlags(*Pow, emitUnaryFloatFnCall(Expo, TLI, LibFunc_exp10,
LibFunc_exp10f, LibFunc_exp10l,
B, Attrs));
@@ -1567,7 +1777,8 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) {
return copyFlags(*Pow, B.CreateCall(Intrinsic::getDeclaration(
Mod, Intrinsic::exp2, Ty),
FMul, "exp2"));
- else if (hasFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f, LibFunc_exp2l))
+ else if (hasFloatFn(M, TLI, Ty, LibFunc_exp2, LibFunc_exp2f,
+ LibFunc_exp2l))
return copyFlags(*Pow, emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2,
LibFunc_exp2f,
LibFunc_exp2l, B, Attrs));
@@ -1588,7 +1799,8 @@ static Value *getSqrtCall(Value *V, AttributeList Attrs, bool NoErrno,
}
// Otherwise, use the libcall for sqrt().
- if (hasFloatFn(TLI, V->getType(), LibFunc_sqrt, LibFunc_sqrtf, LibFunc_sqrtl))
+ if (hasFloatFn(M, TLI, V->getType(), LibFunc_sqrt, LibFunc_sqrtf,
+ LibFunc_sqrtl))
// TODO: We also should check that the target can in fact lower the sqrt()
// libcall. We currently have no way to ask this question, so we ask if
// the target has a sqrt() libcall, which is not exactly the same.
@@ -1778,8 +1990,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) {
// Shrink pow() to powf() if the arguments are single precision,
// unless the result is expected to be double precision.
if (UnsafeFPShrink && Name == TLI->getName(LibFunc_pow) &&
- hasFloatVersion(Name)) {
- if (Value *Shrunk = optimizeBinaryDoubleFP(Pow, B, true))
+ hasFloatVersion(M, Name)) {
+ if (Value *Shrunk = optimizeBinaryDoubleFP(Pow, B, TLI, true))
return Shrunk;
}
@@ -1787,13 +1999,14 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) {
}
Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilderBase &B) {
+ Module *M = CI->getModule();
Function *Callee = CI->getCalledFunction();
AttributeList Attrs; // Attributes are only meaningful on the original call
StringRef Name = Callee->getName();
Value *Ret = nullptr;
if (UnsafeFPShrink && Name == TLI->getName(LibFunc_exp2) &&
- hasFloatVersion(Name))
- Ret = optimizeUnaryDoubleFP(CI, B, true);
+ hasFloatVersion(M, Name))
+ Ret = optimizeUnaryDoubleFP(CI, B, TLI, true);
Type *Ty = CI->getType();
Value *Op = CI->getArgOperand(0);
@@ -1801,7 +2014,7 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilderBase &B) {
// Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= IntSize
// Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < IntSize
if ((isa<SIToFPInst>(Op) || isa<UIToFPInst>(Op)) &&
- hasFloatFn(TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) {
+ hasFloatFn(M, TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) {
if (Value *Exp = getIntToFPVal(Op, B, TLI->getIntSize()))
return emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), Exp, TLI,
LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl,
@@ -1812,12 +2025,14 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilderBase &B) {
}
Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilderBase &B) {
+ Module *M = CI->getModule();
+
// If we can shrink the call to a float function rather than a double
// function, do that first.
Function *Callee = CI->getCalledFunction();
StringRef Name = Callee->getName();
- if ((Name == "fmin" || Name == "fmax") && hasFloatVersion(Name))
- if (Value *Ret = optimizeBinaryDoubleFP(CI, B))
+ if ((Name == "fmin" || Name == "fmax") && hasFloatVersion(M, Name))
+ if (Value *Ret = optimizeBinaryDoubleFP(CI, B, TLI))
return Ret;
// The LLVM intrinsics minnum/maxnum correspond to fmin/fmax. Canonicalize to
@@ -1848,8 +2063,8 @@ Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilderBase &B) {
Type *Ty = Log->getType();
Value *Ret = nullptr;
- if (UnsafeFPShrink && hasFloatVersion(LogNm))
- Ret = optimizeUnaryDoubleFP(Log, B, true);
+ if (UnsafeFPShrink && hasFloatVersion(Mod, LogNm))
+ Ret = optimizeUnaryDoubleFP(Log, B, TLI, true);
// The earlier call must also be 'fast' in order to do these transforms.
CallInst *Arg = dyn_cast<CallInst>(Log->getArgOperand(0));
@@ -1957,7 +2172,7 @@ Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilderBase &B) {
Log->doesNotAccessMemory()
? B.CreateCall(Intrinsic::getDeclaration(Mod, LogID, Ty),
Arg->getOperand(0), "log")
- : emitUnaryFloatFnCall(Arg->getOperand(0), LogNm, B, Attrs);
+ : emitUnaryFloatFnCall(Arg->getOperand(0), TLI, LogNm, B, Attrs);
Value *MulY = B.CreateFMul(Arg->getArgOperand(1), LogX, "mul");
// Since pow() may have side effects, e.g. errno,
// dead code elimination may not be trusted to remove it.
@@ -1980,7 +2195,7 @@ Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilderBase &B) {
Value *LogE = Log->doesNotAccessMemory()
? B.CreateCall(Intrinsic::getDeclaration(Mod, LogID, Ty),
Eul, "log")
- : emitUnaryFloatFnCall(Eul, LogNm, B, Attrs);
+ : emitUnaryFloatFnCall(Eul, TLI, LogNm, B, Attrs);
Value *MulY = B.CreateFMul(Arg->getArgOperand(0), LogE, "mul");
// Since exp() may have side effects, e.g. errno,
// dead code elimination may not be trusted to remove it.
@@ -1992,14 +2207,16 @@ Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilderBase &B) {
}
Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilderBase &B) {
+ Module *M = CI->getModule();
Function *Callee = CI->getCalledFunction();
Value *Ret = nullptr;
// TODO: Once we have a way (other than checking for the existince of the
// libcall) to tell whether our target can lower @llvm.sqrt, relax the
// condition below.
- if (TLI->has(LibFunc_sqrtf) && (Callee->getName() == "sqrt" ||
- Callee->getIntrinsicID() == Intrinsic::sqrt))
- Ret = optimizeUnaryDoubleFP(CI, B, true);
+ if (isLibFuncEmittable(M, TLI, LibFunc_sqrtf) &&
+ (Callee->getName() == "sqrt" ||
+ Callee->getIntrinsicID() == Intrinsic::sqrt))
+ Ret = optimizeUnaryDoubleFP(CI, B, TLI, true);
if (!CI->isFast())
return Ret;
@@ -2044,7 +2261,6 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilderBase &B) {
// If we found a repeated factor, hoist it out of the square root and
// replace it with the fabs of that factor.
- Module *M = Callee->getParent();
Type *ArgType = I->getType();
Function *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, ArgType);
Value *FabsCall = B.CreateCall(Fabs, RepeatOp, "fabs");
@@ -2061,11 +2277,12 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilderBase &B) {
// TODO: Generalize to handle any trig function and its inverse.
Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilderBase &B) {
+ Module *M = CI->getModule();
Function *Callee = CI->getCalledFunction();
Value *Ret = nullptr;
StringRef Name = Callee->getName();
- if (UnsafeFPShrink && Name == "tan" && hasFloatVersion(Name))
- Ret = optimizeUnaryDoubleFP(CI, B, true);
+ if (UnsafeFPShrink && Name == "tan" && hasFloatVersion(M, Name))
+ Ret = optimizeUnaryDoubleFP(CI, B, TLI, true);
Value *Op1 = CI->getArgOperand(0);
auto *OpC = dyn_cast<CallInst>(Op1);
@@ -2081,7 +2298,8 @@ Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilderBase &B) {
// tanl(atanl(x)) -> x
LibFunc Func;
Function *F = OpC->getCalledFunction();
- if (F && TLI->getLibFunc(F->getName(), Func) && TLI->has(Func) &&
+ if (F && TLI->getLibFunc(F->getName(), Func) &&
+ isLibFuncEmittable(M, TLI, Func) &&
((Func == LibFunc_atan && Callee->getName() == "tan") ||
(Func == LibFunc_atanf && Callee->getName() == "tanf") ||
(Func == LibFunc_atanl && Callee->getName() == "tanl")))
@@ -2097,9 +2315,10 @@ static bool isTrigLibCall(CallInst *CI) {
CI->hasFnAttr(Attribute::ReadNone);
}
-static void insertSinCosCall(IRBuilderBase &B, Function *OrigCallee, Value *Arg,
+static bool insertSinCosCall(IRBuilderBase &B, Function *OrigCallee, Value *Arg,
bool UseFloat, Value *&Sin, Value *&Cos,
- Value *&SinCos) {
+ Value *&SinCos, const TargetLibraryInfo *TLI) {
+ Module *M = OrigCallee->getParent();
Type *ArgTy = Arg->getType();
Type *ResTy;
StringRef Name;
@@ -2119,9 +2338,12 @@ static void insertSinCosCall(IRBuilderBase &B, Function *OrigCallee, Value *Arg,
ResTy = StructType::get(ArgTy, ArgTy);
}
- Module *M = OrigCallee->getParent();
- FunctionCallee Callee =
- M->getOrInsertFunction(Name, OrigCallee->getAttributes(), ResTy, ArgTy);
+ if (!isLibFuncEmittable(M, TLI, Name))
+ return false;
+ LibFunc TheLibFunc;
+ TLI->getLibFunc(Name, TheLibFunc);
+ FunctionCallee Callee = getOrInsertLibFunc(
+ M, *TLI, TheLibFunc, OrigCallee->getAttributes(), ResTy, ArgTy);
if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) {
// If the argument is an instruction, it must dominate all uses so put our
@@ -2145,6 +2367,8 @@ static void insertSinCosCall(IRBuilderBase &B, Function *OrigCallee, Value *Arg,
Cos = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 1),
"cospi");
}
+
+ return true;
}
Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, IRBuilderBase &B) {
@@ -2172,7 +2396,9 @@ Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, IRBuilderBase &B) {
return nullptr;
Value *Sin, *Cos, *SinCos;
- insertSinCosCall(B, CI->getCalledFunction(), Arg, IsFloat, Sin, Cos, SinCos);
+ if (!insertSinCosCall(B, CI->getCalledFunction(), Arg, IsFloat, Sin, Cos,
+ SinCos, TLI))
+ return nullptr;
auto replaceTrigInsts = [this](SmallVectorImpl<CallInst *> &Calls,
Value *Res) {
@@ -2193,6 +2419,7 @@ void LibCallSimplifier::classifyArgUse(
SmallVectorImpl<CallInst *> &CosCalls,
SmallVectorImpl<CallInst *> &SinCosCalls) {
CallInst *CI = dyn_cast<CallInst>(Val);
+ Module *M = CI->getModule();
if (!CI || CI->use_empty())
return;
@@ -2203,7 +2430,8 @@ void LibCallSimplifier::classifyArgUse(
Function *Callee = CI->getCalledFunction();
LibFunc Func;
- if (!Callee || !TLI->getLibFunc(*Callee, Func) || !TLI->has(Func) ||
+ if (!Callee || !TLI->getLibFunc(*Callee, Func) ||
+ !isLibFuncEmittable(M, TLI, Func) ||
!isTrigLibCall(CI))
return;
@@ -2258,7 +2486,7 @@ Value *LibCallSimplifier::optimizeAbs(CallInst *CI, IRBuilderBase &B) {
// abs(x) -> x <s 0 ? -x : x
// The negation has 'nsw' because abs of INT_MIN is undefined.
Value *X = CI->getArgOperand(0);
- Value *IsNeg = B.CreateICmpSLT(X, Constant::getNullValue(X->getType()));
+ Value *IsNeg = B.CreateIsNeg(X);
Value *NegX = B.CreateNSWNeg(X, "neg");
return B.CreateSelect(IsNeg, NegX, X);
}
@@ -2418,6 +2646,7 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilderBase &B) {
Value *LibCallSimplifier::optimizePrintF(CallInst *CI, IRBuilderBase &B) {
+ Module *M = CI->getModule();
Function *Callee = CI->getCalledFunction();
FunctionType *FT = Callee->getFunctionType();
if (Value *V = optimizePrintFString(CI, B)) {
@@ -2426,10 +2655,10 @@ Value *LibCallSimplifier::optimizePrintF(CallInst *CI, IRBuilderBase &B) {
// printf(format, ...) -> iprintf(format, ...) if no floating point
// arguments.
- if (TLI->has(LibFunc_iprintf) && !callHasFloatingPointArgument(CI)) {
- Module *M = B.GetInsertBlock()->getParent()->getParent();
- FunctionCallee IPrintFFn =
- M->getOrInsertFunction("iprintf", FT, Callee->getAttributes());
+ if (isLibFuncEmittable(M, TLI, LibFunc_iprintf) &&
+ !callHasFloatingPointArgument(CI)) {
+ FunctionCallee IPrintFFn = getOrInsertLibFunc(M, *TLI, LibFunc_iprintf, FT,
+ Callee->getAttributes());
CallInst *New = cast<CallInst>(CI->clone());
New->setCalledFunction(IPrintFFn);
B.Insert(New);
@@ -2438,11 +2667,10 @@ Value *LibCallSimplifier::optimizePrintF(CallInst *CI, IRBuilderBase &B) {
// printf(format, ...) -> __small_printf(format, ...) if no 128-bit floating point
// arguments.
- if (TLI->has(LibFunc_small_printf) && !callHasFP128Argument(CI)) {
- Module *M = B.GetInsertBlock()->getParent()->getParent();
- auto SmallPrintFFn =
- M->getOrInsertFunction(TLI->getName(LibFunc_small_printf),
- FT, Callee->getAttributes());
+ if (isLibFuncEmittable(M, TLI, LibFunc_small_printf) &&
+ !callHasFP128Argument(CI)) {
+ auto SmallPrintFFn = getOrInsertLibFunc(M, *TLI, LibFunc_small_printf, FT,
+ Callee->getAttributes());
CallInst *New = cast<CallInst>(CI->clone());
New->setCalledFunction(SmallPrintFFn);
B.Insert(New);
@@ -2489,7 +2717,7 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI,
Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char");
Value *Ptr = castToCStr(Dest, B);
B.CreateStore(V, Ptr);
- Ptr = B.CreateGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul");
+ Ptr = B.CreateInBoundsGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul");
B.CreateStore(B.getInt8(0), Ptr);
return ConstantInt::get(CI->getType(), 1);
@@ -2541,6 +2769,7 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI,
}
Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilderBase &B) {
+ Module *M = CI->getModule();
Function *Callee = CI->getCalledFunction();
FunctionType *FT = Callee->getFunctionType();
if (Value *V = optimizeSPrintFString(CI, B)) {
@@ -2549,10 +2778,10 @@ Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilderBase &B) {
// sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating
// point arguments.
- if (TLI->has(LibFunc_siprintf) && !callHasFloatingPointArgument(CI)) {
- Module *M = B.GetInsertBlock()->getParent()->getParent();
- FunctionCallee SIPrintFFn =
- M->getOrInsertFunction("siprintf", FT, Callee->getAttributes());
+ if (isLibFuncEmittable(M, TLI, LibFunc_siprintf) &&
+ !callHasFloatingPointArgument(CI)) {
+ FunctionCallee SIPrintFFn = getOrInsertLibFunc(M, *TLI, LibFunc_siprintf,
+ FT, Callee->getAttributes());
CallInst *New = cast<CallInst>(CI->clone());
New->setCalledFunction(SIPrintFFn);
B.Insert(New);
@@ -2561,11 +2790,10 @@ Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilderBase &B) {
// sprintf(str, format, ...) -> __small_sprintf(str, format, ...) if no 128-bit
// floating point arguments.
- if (TLI->has(LibFunc_small_sprintf) && !callHasFP128Argument(CI)) {
- Module *M = B.GetInsertBlock()->getParent()->getParent();
- auto SmallSPrintFFn =
- M->getOrInsertFunction(TLI->getName(LibFunc_small_sprintf),
- FT, Callee->getAttributes());
+ if (isLibFuncEmittable(M, TLI, LibFunc_small_sprintf) &&
+ !callHasFP128Argument(CI)) {
+ auto SmallSPrintFFn = getOrInsertLibFunc(M, *TLI, LibFunc_small_sprintf, FT,
+ Callee->getAttributes());
CallInst *New = cast<CallInst>(CI->clone());
New->setCalledFunction(SmallSPrintFFn);
B.Insert(New);
@@ -2629,7 +2857,7 @@ Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI,
Value *V = B.CreateTrunc(CI->getArgOperand(3), B.getInt8Ty(), "char");
Value *Ptr = castToCStr(CI->getArgOperand(0), B);
B.CreateStore(V, Ptr);
- Ptr = B.CreateGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul");
+ Ptr = B.CreateInBoundsGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul");
B.CreateStore(B.getInt8(0), Ptr);
return ConstantInt::get(CI->getType(), 1);
@@ -2721,6 +2949,7 @@ Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI,
}
Value *LibCallSimplifier::optimizeFPrintF(CallInst *CI, IRBuilderBase &B) {
+ Module *M = CI->getModule();
Function *Callee = CI->getCalledFunction();
FunctionType *FT = Callee->getFunctionType();
if (Value *V = optimizeFPrintFString(CI, B)) {
@@ -2729,10 +2958,10 @@ Value *LibCallSimplifier::optimizeFPrintF(CallInst *CI, IRBuilderBase &B) {
// fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no
// floating point arguments.
- if (TLI->has(LibFunc_fiprintf) && !callHasFloatingPointArgument(CI)) {
- Module *M = B.GetInsertBlock()->getParent()->getParent();
- FunctionCallee FIPrintFFn =
- M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes());
+ if (isLibFuncEmittable(M, TLI, LibFunc_fiprintf) &&
+ !callHasFloatingPointArgument(CI)) {
+ FunctionCallee FIPrintFFn = getOrInsertLibFunc(M, *TLI, LibFunc_fiprintf,
+ FT, Callee->getAttributes());
CallInst *New = cast<CallInst>(CI->clone());
New->setCalledFunction(FIPrintFFn);
B.Insert(New);
@@ -2741,11 +2970,11 @@ Value *LibCallSimplifier::optimizeFPrintF(CallInst *CI, IRBuilderBase &B) {
// fprintf(stream, format, ...) -> __small_fprintf(stream, format, ...) if no
// 128-bit floating point arguments.
- if (TLI->has(LibFunc_small_fprintf) && !callHasFP128Argument(CI)) {
- Module *M = B.GetInsertBlock()->getParent()->getParent();
+ if (isLibFuncEmittable(M, TLI, LibFunc_small_fprintf) &&
+ !callHasFP128Argument(CI)) {
auto SmallFPrintFFn =
- M->getOrInsertFunction(TLI->getName(LibFunc_small_fprintf),
- FT, Callee->getAttributes());
+ getOrInsertLibFunc(M, *TLI, LibFunc_small_fprintf, FT,
+ Callee->getAttributes());
CallInst *New = cast<CallInst>(CI->clone());
New->setCalledFunction(SmallFPrintFFn);
B.Insert(New);
@@ -2830,21 +3059,19 @@ Value *LibCallSimplifier::optimizeBCopy(CallInst *CI, IRBuilderBase &B) {
CI->getArgOperand(2)));
}
-bool LibCallSimplifier::hasFloatVersion(StringRef FuncName) {
- LibFunc Func;
+bool LibCallSimplifier::hasFloatVersion(const Module *M, StringRef FuncName) {
SmallString<20> FloatFuncName = FuncName;
FloatFuncName += 'f';
- if (TLI->getLibFunc(FloatFuncName, Func))
- return TLI->has(Func);
- return false;
+ return isLibFuncEmittable(M, TLI, FloatFuncName);
}
Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
IRBuilderBase &Builder) {
+ Module *M = CI->getModule();
LibFunc Func;
Function *Callee = CI->getCalledFunction();
// Check for string/memory library functions.
- if (TLI->getLibFunc(*Callee, Func) && TLI->has(Func)) {
+ if (TLI->getLibFunc(*Callee, Func) && isLibFuncEmittable(M, TLI, Func)) {
// Make sure we never change the calling convention.
assert(
(ignoreCallingConv(Func) ||
@@ -2871,6 +3098,8 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
return optimizeStrNCpy(CI, Builder);
case LibFunc_strlen:
return optimizeStrLen(CI, Builder);
+ case LibFunc_strnlen:
+ return optimizeStrNLen(CI, Builder);
case LibFunc_strpbrk:
return optimizeStrPBrk(CI, Builder);
case LibFunc_strndup:
@@ -2923,6 +3152,8 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI,
LibFunc Func,
IRBuilderBase &Builder) {
+ const Module *M = CI->getModule();
+
// Don't optimize calls that require strict floating point semantics.
if (CI->isStrictFP())
return nullptr;
@@ -3001,12 +3232,12 @@ Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI,
case LibFunc_sin:
case LibFunc_sinh:
case LibFunc_tanh:
- if (UnsafeFPShrink && hasFloatVersion(CI->getCalledFunction()->getName()))
- return optimizeUnaryDoubleFP(CI, Builder, true);
+ if (UnsafeFPShrink && hasFloatVersion(M, CI->getCalledFunction()->getName()))
+ return optimizeUnaryDoubleFP(CI, Builder, TLI, true);
return nullptr;
case LibFunc_copysign:
- if (hasFloatVersion(CI->getCalledFunction()->getName()))
- return optimizeBinaryDoubleFP(CI, Builder);
+ if (hasFloatVersion(M, CI->getCalledFunction()->getName()))
+ return optimizeBinaryDoubleFP(CI, Builder, TLI);
return nullptr;
case LibFunc_fminf:
case LibFunc_fmin:
@@ -3025,6 +3256,7 @@ Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI,
}
Value *LibCallSimplifier::optimizeCall(CallInst *CI, IRBuilderBase &Builder) {
+ Module *M = CI->getModule();
assert(!CI->isMustTailCall() && "These transforms aren't musttail safe.");
// TODO: Split out the code below that operates on FP calls so that
@@ -3103,7 +3335,7 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI, IRBuilderBase &Builder) {
}
// Then check for known library functions.
- if (TLI->getLibFunc(*Callee, Func) && TLI->has(Func)) {
+ if (TLI->getLibFunc(*Callee, Func) && isLibFuncEmittable(M, TLI, Func)) {
// We never change the calling convention.
if (!ignoreCallingConv(Func) && !IsCallingConvC)
return nullptr;
@@ -3170,7 +3402,7 @@ LibCallSimplifier::LibCallSimplifier(
function_ref<void(Instruction *, Value *)> Replacer,
function_ref<void(Instruction *)> Eraser)
: FortifiedSimplifier(TLI), DL(DL), TLI(TLI), ORE(ORE), BFI(BFI), PSI(PSI),
- UnsafeFPShrink(false), Replacer(Replacer), Eraser(Eraser) {}
+ Replacer(Replacer), Eraser(Eraser) {}
void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) {
// Indirect through the replacer used in this instance.
@@ -3361,7 +3593,8 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
// If the function was an __stpcpy_chk, and we were able to fold it into
// a __memcpy_chk, we still need to return the correct end pointer.
if (Ret && Func == LibFunc_stpcpy_chk)
- return B.CreateGEP(B.getInt8Ty(), Dst, ConstantInt::get(SizeTTy, Len - 1));
+ return B.CreateInBoundsGEP(B.getInt8Ty(), Dst,
+ ConstantInt::get(SizeTTy, Len - 1));
return copyFlags(*CI, cast<CallInst>(Ret));
}
diff --git a/llvm/lib/Transforms/Utils/SizeOpts.cpp b/llvm/lib/Transforms/Utils/SizeOpts.cpp
index 08a29ea16ba1..1242380f73c1 100644
--- a/llvm/lib/Transforms/Utils/SizeOpts.cpp
+++ b/llvm/lib/Transforms/Utils/SizeOpts.cpp
@@ -48,12 +48,12 @@ cl::opt<bool> llvm::ForcePGSO(
cl::desc("Force the (profiled-guided) size optimizations. "));
cl::opt<int> llvm::PgsoCutoffInstrProf(
- "pgso-cutoff-instr-prof", cl::Hidden, cl::init(950000), cl::ZeroOrMore,
+ "pgso-cutoff-instr-prof", cl::Hidden, cl::init(950000),
cl::desc("The profile guided size optimization profile summary cutoff "
"for instrumentation profile."));
cl::opt<int> llvm::PgsoCutoffSampleProf(
- "pgso-cutoff-sample-prof", cl::Hidden, cl::init(990000), cl::ZeroOrMore,
+ "pgso-cutoff-sample-prof", cl::Hidden, cl::init(990000),
cl::desc("The profile guided size optimization profile summary cutoff "
"for sample profile."));
diff --git a/llvm/lib/Transforms/Utils/StripGCRelocates.cpp b/llvm/lib/Transforms/Utils/StripGCRelocates.cpp
index 1fa574f04c37..0ff88e8b4612 100644
--- a/llvm/lib/Transforms/Utils/StripGCRelocates.cpp
+++ b/llvm/lib/Transforms/Utils/StripGCRelocates.cpp
@@ -9,7 +9,7 @@
// This is a little utility pass that removes the gc.relocates inserted by
// RewriteStatepointsForGC. Note that the generated IR is incorrect,
// but this is useful as a single pass in itself, for analysis of IR, without
-// the GC.relocates. The statepoint and gc.result instrinsics would still be
+// the GC.relocates. The statepoint and gc.result intrinsics would still be
// present.
//===----------------------------------------------------------------------===//
@@ -18,10 +18,8 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Statepoint.h"
-#include "llvm/IR/Type.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
-#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/llvm/lib/Transforms/Utils/SymbolRewriter.cpp b/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
index 6a0eb34a7999..4ad16d622e8d 100644
--- a/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
+++ b/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
@@ -57,7 +57,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/SymbolRewriter.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/ilist.h"
diff --git a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
index 0b718ed6136e..832353741500 100644
--- a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
+++ b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
@@ -18,7 +18,9 @@
#include "llvm/Transforms/Utils/UnifyLoopExits.h"
#include "llvm/ADT/MapVector.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/InitializePasses.h"
#include "llvm/Transforms/Utils.h"
@@ -143,6 +145,8 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {
// locate the exit blocks.
SetVector<BasicBlock *> ExitingBlocks;
SetVector<BasicBlock *> Exits;
+ // Record the exit blocks that branch to the same block.
+ MapVector<BasicBlock *, SetVector<BasicBlock *> > CommonSuccs;
// We need SetVectors, but the Loop API takes a vector, so we use a temporary.
SmallVector<BasicBlock *, 8> Temp;
@@ -156,6 +160,11 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {
if (SL == L || L->contains(SL))
continue;
Exits.insert(S);
+ // The typical case for reducing the number of guard blocks occurs when
+ // the exit block has a single predecessor and successor.
+ if (S->getSinglePredecessor())
+ if (auto *Succ = S->getSingleSuccessor())
+ CommonSuccs[Succ].insert(S);
}
}
@@ -170,13 +179,39 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {
for (auto EB : ExitingBlocks) {
dbgs() << " " << EB->getName();
}
- dbgs() << "\n";);
+ dbgs() << "\n";
+
+ dbgs() << "Exit blocks with a common successor:\n";
+ for (auto CS : CommonSuccs) {
+ dbgs() << " Succ " << CS.first->getName() << ", exits:";
+ for (auto Exit : CS.second)
+ dbgs() << " " << Exit->getName();
+ dbgs() << "\n";
+ });
if (Exits.size() <= 1) {
LLVM_DEBUG(dbgs() << "loop does not have multiple exits; nothing to do\n");
return false;
}
+ // When multiple exit blocks branch to the same block, change the control
+ // flow hub to after the exit blocks rather than before. This reduces the
+ // number of guard blocks needed after the loop.
+ for (auto CS : CommonSuccs) {
+ auto CB = CS.first;
+ auto Preds = CS.second;
+ if (Exits.contains(CB))
+ continue;
+ if (Preds.size() < 2 || Preds.size() == Exits.size())
+ continue;
+ for (auto Exit : Preds) {
+ Exits.remove(Exit);
+ ExitingBlocks.remove(Exit->getSinglePredecessor());
+ ExitingBlocks.insert(Exit);
+ }
+ Exits.insert(CB);
+ }
+
SmallVector<BasicBlock *, 8> GuardBlocks;
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
auto LoopExitBlock = CreateControlFlowHub(&DTU, GuardBlocks, ExitingBlocks,
@@ -196,6 +231,17 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {
if (auto ParentLoop = L->getParentLoop()) {
for (auto G : GuardBlocks) {
ParentLoop->addBasicBlockToLoop(G, LI);
+ // Ensure the guard block predecessors are in a valid loop. After the
+ // change to the control flow hub for common successors, a guard block
+ // predecessor may not be in a loop or may be in an outer loop.
+ for (auto Pred : predecessors(G)) {
+ auto PredLoop = LI.getLoopFor(Pred);
+ if (!ParentLoop->contains(PredLoop)) {
+ if (PredLoop)
+ LI.removeBlock(Pred);
+ ParentLoop->addBasicBlockToLoop(Pred, LI);
+ }
+ }
}
ParentLoop->verifyLoop();
}
diff --git a/llvm/lib/Transforms/Utils/Utils.cpp b/llvm/lib/Transforms/Utils/Utils.cpp
index 43eb5c87acee..f34f2df971b1 100644
--- a/llvm/lib/Transforms/Utils/Utils.cpp
+++ b/llvm/lib/Transforms/Utils/Utils.cpp
@@ -34,6 +34,7 @@ void llvm::initializeTransformUtils(PassRegistry &Registry) {
initializeLCSSAWrapperPassPass(Registry);
initializeLibCallsShrinkWrapLegacyPassPass(Registry);
initializeLoopSimplifyPass(Registry);
+ initializeLowerGlobalDtorsLegacyPassPass(Registry);
initializeLowerInvokeLegacyPassPass(Registry);
initializeLowerSwitchLegacyPassPass(Registry);
initializeNameAnonGlobalLegacyPassPass(Registry);
diff --git a/llvm/lib/Transforms/Utils/VNCoercion.cpp b/llvm/lib/Transforms/Utils/VNCoercion.cpp
index 637181722f63..42be67f3cfc0 100644
--- a/llvm/lib/Transforms/Utils/VNCoercion.cpp
+++ b/llvm/lib/Transforms/Utils/VNCoercion.cpp
@@ -64,10 +64,15 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
return true;
}
-template <class T, class HelperClass>
-static T *coerceAvailableValueToLoadTypeHelper(T *StoredVal, Type *LoadedTy,
- HelperClass &Helper,
- const DataLayout &DL) {
+/// If we saw a store of a value to memory, and
+/// then a load from a must-aliased pointer of a different type, try to coerce
+/// the stored value. LoadedTy is the type of the load we want to replace.
+/// IRB is IRBuilder used to insert new instructions.
+///
+/// If we can't do it, return null.
+Value *coerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy,
+ IRBuilderBase &Helper,
+ const DataLayout &DL) {
assert(canCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, DL) &&
"precondition violation - materialization can't fail");
if (auto *C = dyn_cast<Constant>(StoredVal))
@@ -154,18 +159,6 @@ static T *coerceAvailableValueToLoadTypeHelper(T *StoredVal, Type *LoadedTy,
return StoredVal;
}
-/// If we saw a store of a value to memory, and
-/// then a load from a must-aliased pointer of a different type, try to coerce
-/// the stored value. LoadedTy is the type of the load we want to replace.
-/// IRB is IRBuilder used to insert new instructions.
-///
-/// If we can't do it, return null.
-Value *coerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy,
- IRBuilderBase &IRB,
- const DataLayout &DL) {
- return coerceAvailableValueToLoadTypeHelper(StoredVal, LoadedTy, IRB, DL);
-}
-
/// This function is called when we have a memdep query of a load that ends up
/// being a clobbering memory write (store, memset, memcpy, memmove). This
/// means that the write *may* provide bits used by the load but we can't be
@@ -277,7 +270,7 @@ static unsigned getLoadLoadClobberFullWidthSize(const Value *MemLocBase,
// looking at an i8 load on x86-32 that is known 1024 byte aligned, we can
// widen it up to an i32 load. If it is known 2-byte aligned, we can widen it
// to i16.
- unsigned LoadAlign = LI->getAlignment();
+ unsigned LoadAlign = LI->getAlign().value();
int64_t MemLocEnd = MemLocOffs + MemLocSize;
@@ -400,10 +393,9 @@ int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
return -1;
}
-template <class T, class HelperClass>
-static T *getStoreValueForLoadHelper(T *SrcVal, unsigned Offset, Type *LoadTy,
- HelperClass &Helper,
- const DataLayout &DL) {
+static Value *getStoreValueForLoadHelper(Value *SrcVal, unsigned Offset,
+ Type *LoadTy, IRBuilderBase &Builder,
+ const DataLayout &DL) {
LLVMContext &Ctx = SrcVal->getType()->getContext();
// If two pointers are in the same address space, they have the same size,
@@ -421,9 +413,11 @@ static T *getStoreValueForLoadHelper(T *SrcVal, unsigned Offset, Type *LoadTy,
// Compute which bits of the stored value are being used by the load. Convert
// to an integer type to start with.
if (SrcVal->getType()->isPtrOrPtrVectorTy())
- SrcVal = Helper.CreatePtrToInt(SrcVal, DL.getIntPtrType(SrcVal->getType()));
+ SrcVal =
+ Builder.CreatePtrToInt(SrcVal, DL.getIntPtrType(SrcVal->getType()));
if (!SrcVal->getType()->isIntegerTy())
- SrcVal = Helper.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize * 8));
+ SrcVal =
+ Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize * 8));
// Shift the bits to the least significant depending on endianness.
unsigned ShiftAmt;
@@ -432,12 +426,12 @@ static T *getStoreValueForLoadHelper(T *SrcVal, unsigned Offset, Type *LoadTy,
else
ShiftAmt = (StoreSize - LoadSize - Offset) * 8;
if (ShiftAmt)
- SrcVal = Helper.CreateLShr(SrcVal,
- ConstantInt::get(SrcVal->getType(), ShiftAmt));
+ SrcVal = Builder.CreateLShr(SrcVal,
+ ConstantInt::get(SrcVal->getType(), ShiftAmt));
if (LoadSize != StoreSize)
- SrcVal = Helper.CreateTruncOrBitCast(SrcVal,
- IntegerType::get(Ctx, LoadSize * 8));
+ SrcVal = Builder.CreateTruncOrBitCast(SrcVal,
+ IntegerType::get(Ctx, LoadSize * 8));
return SrcVal;
}
@@ -450,14 +444,12 @@ Value *getStoreValueForLoad(Value *SrcVal, unsigned Offset, Type *LoadTy,
IRBuilder<> Builder(InsertPt);
SrcVal = getStoreValueForLoadHelper(SrcVal, Offset, LoadTy, Builder, DL);
- return coerceAvailableValueToLoadTypeHelper(SrcVal, LoadTy, Builder, DL);
+ return coerceAvailableValueToLoadType(SrcVal, LoadTy, Builder, DL);
}
Constant *getConstantStoreValueForLoad(Constant *SrcVal, unsigned Offset,
Type *LoadTy, const DataLayout &DL) {
- ConstantFolder F;
- SrcVal = getStoreValueForLoadHelper(SrcVal, Offset, LoadTy, F, DL);
- return coerceAvailableValueToLoadTypeHelper(SrcVal, LoadTy, F, DL);
+ return ConstantFoldLoadFromConst(SrcVal, LoadTy, APInt(32, Offset), DL);
}
/// This function is called when we have a memdep query of a load that ends up
@@ -522,75 +514,77 @@ Constant *getConstantLoadValueForLoad(Constant *SrcVal, unsigned Offset,
return getConstantStoreValueForLoad(SrcVal, Offset, LoadTy, DL);
}
-template <class T, class HelperClass>
-T *getMemInstValueForLoadHelper(MemIntrinsic *SrcInst, unsigned Offset,
- Type *LoadTy, HelperClass &Helper,
- const DataLayout &DL) {
+/// This function is called when we have a
+/// memdep query of a load that ends up being a clobbering mem intrinsic.
+Value *getMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
+ Type *LoadTy, Instruction *InsertPt,
+ const DataLayout &DL) {
LLVMContext &Ctx = LoadTy->getContext();
uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy).getFixedSize() / 8;
+ IRBuilder<> Builder(InsertPt);
// We know that this method is only called when the mem transfer fully
// provides the bits for the load.
if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) {
// memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and
// independently of what the offset is.
- T *Val = cast<T>(MSI->getValue());
+ Value *Val = MSI->getValue();
if (LoadSize != 1)
Val =
- Helper.CreateZExtOrBitCast(Val, IntegerType::get(Ctx, LoadSize * 8));
- T *OneElt = Val;
+ Builder.CreateZExtOrBitCast(Val, IntegerType::get(Ctx, LoadSize * 8));
+ Value *OneElt = Val;
// Splat the value out to the right number of bits.
for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize;) {
// If we can double the number of bytes set, do it.
if (NumBytesSet * 2 <= LoadSize) {
- T *ShVal = Helper.CreateShl(
+ Value *ShVal = Builder.CreateShl(
Val, ConstantInt::get(Val->getType(), NumBytesSet * 8));
- Val = Helper.CreateOr(Val, ShVal);
+ Val = Builder.CreateOr(Val, ShVal);
NumBytesSet <<= 1;
continue;
}
// Otherwise insert one byte at a time.
- T *ShVal = Helper.CreateShl(Val, ConstantInt::get(Val->getType(), 1 * 8));
- Val = Helper.CreateOr(OneElt, ShVal);
+ Value *ShVal =
+ Builder.CreateShl(Val, ConstantInt::get(Val->getType(), 1 * 8));
+ Val = Builder.CreateOr(OneElt, ShVal);
++NumBytesSet;
}
- return coerceAvailableValueToLoadTypeHelper(Val, LoadTy, Helper, DL);
+ return coerceAvailableValueToLoadType(Val, LoadTy, Builder, DL);
}
// Otherwise, this is a memcpy/memmove from a constant global.
MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
Constant *Src = cast<Constant>(MTI->getSource());
-
- // Otherwise, see if we can constant fold a load from the constant with the
- // offset applied as appropriate.
unsigned IndexSize = DL.getIndexTypeSizeInBits(Src->getType());
- return ConstantFoldLoadFromConstPtr(
- Src, LoadTy, APInt(IndexSize, Offset), DL);
-}
-
-/// This function is called when we have a
-/// memdep query of a load that ends up being a clobbering mem intrinsic.
-Value *getMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
- Type *LoadTy, Instruction *InsertPt,
- const DataLayout &DL) {
- IRBuilder<> Builder(InsertPt);
- return getMemInstValueForLoadHelper<Value, IRBuilder<>>(SrcInst, Offset,
- LoadTy, Builder, DL);
+ return ConstantFoldLoadFromConstPtr(Src, LoadTy, APInt(IndexSize, Offset),
+ DL);
}
Constant *getConstantMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
Type *LoadTy, const DataLayout &DL) {
- // The only case analyzeLoadFromClobberingMemInst cannot be converted to a
- // constant is when it's a memset of a non-constant.
- if (auto *MSI = dyn_cast<MemSetInst>(SrcInst))
- if (!isa<Constant>(MSI->getValue()))
+ LLVMContext &Ctx = LoadTy->getContext();
+ uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy).getFixedSize() / 8;
+
+ // We know that this method is only called when the mem transfer fully
+ // provides the bits for the load.
+ if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) {
+ auto *Val = dyn_cast<ConstantInt>(MSI->getValue());
+ if (!Val)
return nullptr;
- ConstantFolder F;
- return getMemInstValueForLoadHelper<Constant, ConstantFolder>(SrcInst, Offset,
- LoadTy, F, DL);
+
+ Val = ConstantInt::get(Ctx, APInt::getSplat(LoadSize * 8, Val->getValue()));
+ return ConstantFoldLoadFromConst(Val, LoadTy, DL);
+ }
+
+ // Otherwise, this is a memcpy/memmove from a constant global.
+ MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
+ Constant *Src = cast<Constant>(MTI->getSource());
+ unsigned IndexSize = DL.getIndexTypeSizeInBits(Src->getType());
+ return ConstantFoldLoadFromConstPtr(Src, LoadTy, APInt(IndexSize, Offset),
+ DL);
}
} // namespace VNCoercion
} // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
index 97c2acb7d4c7..f59fc3a6dd60 100644
--- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@@ -62,14 +62,13 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
-#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -497,7 +496,7 @@ bool Vectorizer::lookThroughComplexAddresses(Value *PtrA, Value *PtrB,
if (PtrDelta.urem(Stride) != 0)
return false;
unsigned IdxBitWidth = OpA->getType()->getScalarSizeInBits();
- APInt IdxDiff = PtrDelta.udiv(Stride).zextOrSelf(IdxBitWidth);
+ APInt IdxDiff = PtrDelta.udiv(Stride).zext(IdxBitWidth);
// Only look through a ZExt/SExt.
if (!isa<SExtInst>(OpA) && !isa<ZExtInst>(OpA))
@@ -1298,10 +1297,16 @@ bool Vectorizer::vectorizeLoadChain(
CV->replaceAllUsesWith(V);
}
- // Bitcast might not be an Instruction, if the value being loaded is a
- // constant. In that case, no need to reorder anything.
- if (Instruction *BitcastInst = dyn_cast<Instruction>(Bitcast))
- reorder(BitcastInst);
+ // Since we might have opaque pointers we might end up using the pointer
+ // operand of the first load (wrt. memory loaded) for the vector load. Since
+ // this first load might not be the first in the block we potentially need to
+ // reorder the pointer operand (and its operands). If we have a bitcast though
+ // it might be before the load and should be the reorder start instruction.
+ // "Might" because for opaque pointers the "bitcast" is just the first loads
+ // pointer operand, as oppposed to something we inserted at the right position
+ // ourselves.
+ Instruction *BCInst = dyn_cast<Instruction>(Bitcast);
+ reorder((BCInst && BCInst != L0->getPointerOperand()) ? BCInst : LI);
eraseInstructions(Chain);
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 81e5aa223c07..6242d9a93fc1 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -17,7 +17,9 @@
#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -31,8 +33,6 @@ using namespace PatternMatch;
#define LV_NAME "loop-vectorize"
#define DEBUG_TYPE LV_NAME
-extern cl::opt<bool> EnableVPlanPredication;
-
static cl::opt<bool>
EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
cl::desc("Enable if-conversion during vectorization."));
@@ -439,6 +439,26 @@ static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
return false;
}
+/// Returns true if A and B have same pointer operands or same SCEVs addresses
+static bool storeToSameAddress(ScalarEvolution *SE, StoreInst *A,
+ StoreInst *B) {
+ // Compare store
+ if (A == B)
+ return true;
+
+ // Otherwise Compare pointers
+ Value *APtr = A->getPointerOperand();
+ Value *BPtr = B->getPointerOperand();
+ if (APtr == BPtr)
+ return true;
+
+ // Otherwise compare address SCEVs
+ if (SE->getSCEV(APtr) == SE->getSCEV(BPtr))
+ return true;
+
+ return false;
+}
+
int LoopVectorizationLegality::isConsecutivePtr(Type *AccessTy,
Value *Ptr) const {
const ValueToValueMap &Strides =
@@ -487,7 +507,7 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() {
// FIXME: We skip these checks when VPlan predication is enabled as we
// want to allow divergent branches. This whole check will be removed
// once VPlan predication is on by default.
- if (!EnableVPlanPredication && Br && Br->isConditional() &&
+ if (Br && Br->isConditional() &&
!TheLoop->isLoopInvariant(Br->getCondition()) &&
!LI->isLoopHeader(Br->getSuccessor(0)) &&
!LI->isLoopHeader(Br->getSuccessor(1))) {
@@ -572,7 +592,7 @@ void LoopVectorizationLegality::addInductionPhi(
// on predicates that only hold within the loop, since allowing the exit
// currently means re-using this SCEV outside the loop (see PR33706 for more
// details).
- if (PSE.getUnionPredicate().isAlwaysTrue()) {
+ if (PSE.getPredicate().isAlwaysTrue()) {
AllowedExit.insert(Phi);
AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
}
@@ -676,7 +696,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
RecurrenceDescriptor RedDes;
if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
- DT)) {
+ DT, PSE.getSE())) {
Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
AllowedExit.insert(RedDes.getLoopExitInstr());
Reductions[Phi] = RedDes;
@@ -770,7 +790,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
auto *SE = PSE.getSE();
Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
for (unsigned i = 0, e = CI->arg_size(); i != e; ++i)
- if (hasVectorInstrinsicScalarOpd(IntrinID, i)) {
+ if (isVectorIntrinsicWithScalarOpAtArg(IntrinID, i)) {
if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
reportVectorizationFailure("Found unvectorizable intrinsic",
"intrinsic instruction cannot be vectorized",
@@ -849,7 +869,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// used outside the loop only if the SCEV predicates within the loop is
// same as outside the loop. Allowing the exit means reusing the SCEV
// outside the loop.
- if (PSE.getUnionPredicate().isAlwaysTrue()) {
+ if (PSE.getPredicate().isAlwaysTrue()) {
AllowedExit.insert(&I);
continue;
}
@@ -911,15 +931,70 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
if (!LAI->canVectorizeMemory())
return false;
- if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
- reportVectorizationFailure("Stores to a uniform address",
- "write to a loop invariant address could not be vectorized",
- "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
- return false;
+ // We can vectorize stores to invariant address when final reduction value is
+ // guaranteed to be stored at the end of the loop. Also, if decision to
+ // vectorize loop is made, runtime checks are added so as to make sure that
+ // invariant address won't alias with any other objects.
+ if (!LAI->getStoresToInvariantAddresses().empty()) {
+ // For each invariant address, check its last stored value is unconditional.
+ for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
+ if (isInvariantStoreOfReduction(SI) &&
+ blockNeedsPredication(SI->getParent())) {
+ reportVectorizationFailure(
+ "We don't allow storing to uniform addresses",
+ "write of conditional recurring variant value to a loop "
+ "invariant address could not be vectorized",
+ "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
+ return false;
+ }
+ }
+
+ if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
+ // For each invariant address, check its last stored value is the result
+ // of one of our reductions.
+ //
+ // We do not check if dependence with loads exists because they are
+ // currently rejected earlier in LoopAccessInfo::analyzeLoop. In case this
+ // behaviour changes we have to modify this code.
+ ScalarEvolution *SE = PSE.getSE();
+ SmallVector<StoreInst *, 4> UnhandledStores;
+ for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
+ if (isInvariantStoreOfReduction(SI)) {
+ // Earlier stores to this address are effectively deadcode.
+ // With opaque pointers it is possible for one pointer to be used with
+ // different sizes of stored values:
+ // store i32 0, ptr %x
+ // store i8 0, ptr %x
+ // The latest store doesn't complitely overwrite the first one in the
+ // example. That is why we have to make sure that types of stored
+ // values are same.
+ // TODO: Check that bitwidth of unhandled store is smaller then the
+ // one that overwrites it and add a test.
+ erase_if(UnhandledStores, [SE, SI](StoreInst *I) {
+ return storeToSameAddress(SE, SI, I) &&
+ I->getValueOperand()->getType() ==
+ SI->getValueOperand()->getType();
+ });
+ continue;
+ }
+ UnhandledStores.push_back(SI);
+ }
+
+ bool IsOK = UnhandledStores.empty();
+ // TODO: we should also validate against InvariantMemSets.
+ if (!IsOK) {
+ reportVectorizationFailure(
+ "We don't allow storing to uniform addresses",
+ "write to a loop invariant address could not "
+ "be vectorized",
+ "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
+ return false;
+ }
+ }
}
Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
- PSE.addPredicate(LAI->getPSE().getUnionPredicate());
+ PSE.addPredicate(LAI->getPSE().getPredicate());
return true;
}
@@ -949,6 +1024,26 @@ bool LoopVectorizationLegality::canVectorizeFPMath(
}));
}
+bool LoopVectorizationLegality::isInvariantStoreOfReduction(StoreInst *SI) {
+ return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
+ const RecurrenceDescriptor &RdxDesc = Reduction.second;
+ return RdxDesc.IntermediateStore == SI;
+ });
+}
+
+bool LoopVectorizationLegality::isInvariantAddressOfReduction(Value *V) {
+ return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
+ const RecurrenceDescriptor &RdxDesc = Reduction.second;
+ if (!RdxDesc.IntermediateStore)
+ return false;
+
+ ScalarEvolution *SE = PSE.getSE();
+ Value *InvariantAddress = RdxDesc.IntermediateStore->getPointerOperand();
+ return V == InvariantAddress ||
+ SE->getSCEV(V) == SE->getSCEV(InvariantAddress);
+ });
+}
+
bool LoopVectorizationLegality::isInductionPhi(const Value *V) const {
Value *In0 = const_cast<Value *>(V);
PHINode *PN = dyn_cast_or_null<PHINode>(In0);
@@ -969,6 +1064,16 @@ LoopVectorizationLegality::getIntOrFpInductionDescriptor(PHINode *Phi) const {
return nullptr;
}
+const InductionDescriptor *
+LoopVectorizationLegality::getPointerInductionDescriptor(PHINode *Phi) const {
+ if (!isInductionPhi(Phi))
+ return nullptr;
+ auto &ID = getInductionVars().find(Phi)->second;
+ if (ID.getKind() == InductionDescriptor::IK_PtrInduction)
+ return &ID;
+ return nullptr;
+}
+
bool LoopVectorizationLegality::isCastedInductionVariable(
const Value *V) const {
auto *Inst = dyn_cast<Instruction>(V);
@@ -1266,7 +1371,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
- if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
+ if (PSE.getPredicate().getComplexity() > SCEVThreshold) {
reportVectorizationFailure("Too many SCEV checks needed",
"Too many SCEV assumptions need to be made and checked at runtime",
"TooManySCEVRunTimeChecks", ORE, TheLoop);
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 71eb39a18d2f..0cb2032fa45a 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -25,6 +25,7 @@
#define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
#include "VPlan.h"
+#include "llvm/Support/InstructionCost.h"
namespace llvm {
@@ -59,7 +60,7 @@ class VPBuilder {
}
public:
- VPBuilder() {}
+ VPBuilder() = default;
/// Clear the insertion point: created instructions will not be inserted into
/// a block.
@@ -187,12 +188,16 @@ struct VectorizationFactor {
/// Cost of the loop with that width.
InstructionCost Cost;
- VectorizationFactor(ElementCount Width, InstructionCost Cost)
- : Width(Width), Cost(Cost) {}
+ /// Cost of the scalar loop.
+ InstructionCost ScalarCost;
+
+ VectorizationFactor(ElementCount Width, InstructionCost Cost,
+ InstructionCost ScalarCost)
+ : Width(Width), Cost(Cost), ScalarCost(ScalarCost) {}
/// Width 1 means no vectorization, cost 0 means uncomputed cost.
static VectorizationFactor Disabled() {
- return {ElementCount::getFixed(1), 0};
+ return {ElementCount::getFixed(1), 0, 0};
}
bool operator==(const VectorizationFactor &rhs) const {
@@ -298,8 +303,12 @@ public:
/// Generate the IR code for the body of the vectorized loop according to the
/// best selected \p VF, \p UF and VPlan \p BestPlan.
+ /// TODO: \p IsEpilogueVectorization is needed to avoid issues due to epilogue
+ /// vectorization re-using plans for both the main and epilogue vector loops.
+ /// It should be removed once the re-use issue has been fixed.
void executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan,
- InnerLoopVectorizer &LB, DominatorTree *DT);
+ InnerLoopVectorizer &LB, DominatorTree *DT,
+ bool IsEpilogueVectorization);
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void printPlans(raw_ostream &O);
@@ -319,6 +328,9 @@ public:
getDecisionAndClampRange(const std::function<bool(ElementCount)> &Predicate,
VFRange &Range);
+ /// Check if the number of runtime checks exceeds the threshold.
+ bool requiresTooManyRuntimeChecks() const;
+
protected:
/// Collect the instructions from the original loop that would be trivially
/// dead in the vectorized loop if generated.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 3290439ecd07..b637b2d5ddae 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -58,7 +58,6 @@
#include "VPRecipeBuilder.h"
#include "VPlan.h"
#include "VPlanHCFGBuilder.h"
-#include "VPlanPredicator.h"
#include "VPlanTransforms.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
@@ -112,7 +111,6 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
@@ -144,10 +142,10 @@
#include <algorithm>
#include <cassert>
#include <cstdint>
-#include <cstdlib>
#include <functional>
#include <iterator>
#include <limits>
+#include <map>
#include <memory>
#include <string>
#include <tuple>
@@ -346,13 +344,6 @@ cl::opt<bool> EnableVPlanNativePath(
cl::desc("Enable VPlan-native vectorization path with "
"support for outer loop vectorization."));
-// FIXME: Remove this switch once we have divergence analysis. Currently we
-// assume divergent non-backedge branches when this switch is true.
-cl::opt<bool> EnableVPlanPredication(
- "enable-vplan-predication", cl::init(false), cl::Hidden,
- cl::desc("Enable VPlan-native vectorization path predicator with "
- "support for outer loop vectorization."));
-
// This flag enables the stress testing of the VPlan H-CFG construction in the
// VPlan-native vectorization path. It must be used in conjuction with
// -enable-vplan-native-path. -vplan-verify-hcfg can also be used to enable the
@@ -481,7 +472,7 @@ public:
VPTransformState &State);
/// Fix the vectorized code, taking care of header phi's, live-outs, and more.
- void fixVectorizedLoop(VPTransformState &State);
+ void fixVectorizedLoop(VPTransformState &State, VPlan &Plan);
// Return true if any runtime check is added.
bool areSafetyChecksAdded() { return AddedSafetyChecks; }
@@ -491,12 +482,6 @@ public:
/// new unrolled loop, where UF is the unroll factor.
using VectorParts = SmallVector<Value *, 2>;
- /// Vectorize a single first-order recurrence or pointer induction PHINode in
- /// a block. This method handles the induction variable canonicalization. It
- /// supports both VF = 1 for unrolled loops and arbitrary length vectors.
- void widenPHIInstruction(Instruction *PN, VPWidenPHIRecipe *PhiR,
- VPTransformState &State);
-
/// A helper function to scalarize a single Instruction in the innermost loop.
/// Generates a sequence of scalar instances for each lane between \p MinLane
/// and \p MaxLane, times each part between \p MinPart and \p MaxPart,
@@ -506,13 +491,6 @@ public:
const VPIteration &Instance, bool IfPredicateInstr,
VPTransformState &State);
- /// Widen an integer or floating-point induction variable \p IV. If \p Trunc
- /// is provided, the integer induction variable will first be truncated to
- /// the corresponding type. \p CanonicalIV is the scalar value generated for
- /// the canonical induction variable.
- void widenIntOrFpInduction(PHINode *IV, VPWidenIntOrFpInductionRecipe *Def,
- VPTransformState &State, Value *CanonicalIV);
-
/// Construct the vector value of a scalarized value \p V one lane at a time.
void packScalarIntoVectorValue(VPValue *Def, const VPIteration &Instance,
VPTransformState &State);
@@ -527,13 +505,8 @@ public:
ArrayRef<VPValue *> StoredValues,
VPValue *BlockInMask = nullptr);
- /// Set the debug location in the builder \p Ptr using the debug location in
- /// \p V. If \p Ptr is None then it uses the class member's Builder.
- void setDebugLocFromInst(const Value *V,
- Optional<IRBuilder<> *> CustomBuilder = None);
-
- /// Fix the non-induction PHIs in the OrigPHIsToFix vector.
- void fixNonInductionPHIs(VPTransformState &State);
+ /// Fix the non-induction PHIs in \p Plan.
+ void fixNonInductionPHIs(VPlan &Plan, VPTransformState &State);
/// Returns true if the reordering of FP operations is not allowed, but we are
/// able to vectorize with strict in-order reductions for the given RdxDesc.
@@ -546,17 +519,6 @@ public:
/// element.
virtual Value *getBroadcastInstrs(Value *V);
- /// Add metadata from one instruction to another.
- ///
- /// This includes both the original MDs from \p From and additional ones (\see
- /// addNewMetadata). Use this for *newly created* instructions in the vector
- /// loop.
- void addMetadata(Instruction *To, Instruction *From);
-
- /// Similar to the previous function but it adds the metadata to a
- /// vector of instructions.
- void addMetadata(ArrayRef<Value *> To, Instruction *From);
-
// Returns the resume value (bc.merge.rdx) for a reduction as
// generated by fixReduction.
PHINode *getReductionResumeValue(const RecurrenceDescriptor &RdxDesc);
@@ -575,13 +537,9 @@ protected:
/// Set up the values of the IVs correctly when exiting the vector loop.
void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
- Value *CountRoundDown, Value *EndValue,
- BasicBlock *MiddleBlock);
-
- /// Introduce a conditional branch (on true, condition to be set later) at the
- /// end of the header=latch connecting it to itself (across the backedge) and
- /// to the exit block of \p L.
- void createHeaderBranch(Loop *L);
+ Value *VectorTripCount, Value *EndValue,
+ BasicBlock *MiddleBlock, BasicBlock *VectorHeader,
+ VPlan &Plan);
/// Handle all cross-iteration phis in the header.
void fixCrossIterationPHIs(VPTransformState &State);
@@ -595,16 +553,9 @@ protected:
void fixReduction(VPReductionPHIRecipe *Phi, VPTransformState &State);
/// Clear NSW/NUW flags from reduction instructions if necessary.
- void clearReductionWrapFlags(const RecurrenceDescriptor &RdxDesc,
+ void clearReductionWrapFlags(VPReductionPHIRecipe *PhiR,
VPTransformState &State);
- /// Fixup the LCSSA phi nodes in the unique exit block. This simply
- /// means we need to add the appropriate incoming value from the middle
- /// block as exiting edges from the scalar epilogue loop (if present) are
- /// already in place, and we exit the vector loop exclusively to the middle
- /// block.
- void fixLCSSAPHIs(VPTransformState &State);
-
/// Iteratively sink the scalarized operands of a predicated instruction into
/// the block that was created for it.
void sinkScalarOperands(Instruction *PredInst);
@@ -613,30 +564,11 @@ protected:
/// represented as.
void truncateToMinimalBitwidths(VPTransformState &State);
- /// Compute scalar induction steps. \p ScalarIV is the scalar induction
- /// variable on which to base the steps, \p Step is the size of the step, and
- /// \p EntryVal is the value from the original loop that maps to the steps.
- /// Note that \p EntryVal doesn't have to be an induction variable - it
- /// can also be a truncate instruction.
- void buildScalarSteps(Value *ScalarIV, Value *Step, Instruction *EntryVal,
- const InductionDescriptor &ID, VPValue *Def,
- VPTransformState &State);
-
- /// Create a vector induction phi node based on an existing scalar one. \p
- /// EntryVal is the value from the original loop that maps to the vector phi
- /// node, and \p Step is the loop-invariant step. If \p EntryVal is a
- /// truncate instruction, instead of widening the original IV, we widen a
- /// version of the IV truncated to \p EntryVal's type.
- void createVectorIntOrFpInductionPHI(const InductionDescriptor &II,
- Value *Step, Value *Start,
- Instruction *EntryVal, VPValue *Def,
- VPTransformState &State);
-
/// Returns (and creates if needed) the original loop trip count.
- Value *getOrCreateTripCount(Loop *NewLoop);
+ Value *getOrCreateTripCount(BasicBlock *InsertBlock);
/// Returns (and creates if needed) the trip count of the widened loop.
- Value *getOrCreateVectorTripCount(Loop *NewLoop);
+ Value *getOrCreateVectorTripCount(BasicBlock *InsertBlock);
/// Returns a bitcasted value to the requested vector type.
/// Also handles bitcasts of vector<float> <-> vector<pointer> types.
@@ -645,33 +577,21 @@ protected:
/// Emit a bypass check to see if the vector trip count is zero, including if
/// it overflows.
- void emitMinimumIterationCountCheck(Loop *L, BasicBlock *Bypass);
+ void emitIterationCountCheck(BasicBlock *Bypass);
/// Emit a bypass check to see if all of the SCEV assumptions we've
/// had to make are correct. Returns the block containing the checks or
/// nullptr if no checks have been added.
- BasicBlock *emitSCEVChecks(Loop *L, BasicBlock *Bypass);
+ BasicBlock *emitSCEVChecks(BasicBlock *Bypass);
/// Emit bypass checks to check any memory assumptions we may have made.
/// Returns the block containing the checks or nullptr if no checks have been
/// added.
- BasicBlock *emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass);
-
- /// Compute the transformed value of Index at offset StartValue using step
- /// StepValue.
- /// For integer induction, returns StartValue + Index * StepValue.
- /// For pointer induction, returns StartValue[Index * StepValue].
- /// FIXME: The newly created binary instructions should contain nsw/nuw
- /// flags, which can be found from the original scalar operations.
- Value *emitTransformedIndex(IRBuilder<> &B, Value *Index, ScalarEvolution *SE,
- const DataLayout &DL,
- const InductionDescriptor &ID,
- BasicBlock *VectorHeader) const;
+ BasicBlock *emitMemRuntimeChecks(BasicBlock *Bypass);
/// Emit basic blocks (prefixed with \p Prefix) for the iteration check,
- /// vector loop preheader, middle block and scalar preheader. Also
- /// allocate a loop object for the new vector loop and return it.
- Loop *createVectorLoopSkeleton(StringRef Prefix);
+ /// vector loop preheader, middle block and scalar preheader.
+ void createVectorLoopSkeleton(StringRef Prefix);
/// Create new phi nodes for the induction variables to resume iteration count
/// in the scalar epilogue, from where the vectorized loop left off.
@@ -680,21 +600,12 @@ protected:
/// block, the \p AdditionalBypass pair provides information about the bypass
/// block and the end value on the edge from bypass to this loop.
void createInductionResumeValues(
- Loop *L,
std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr, nullptr});
/// Complete the loop skeleton by adding debug MDs, creating appropriate
/// conditional branches in the middle block, preparing the builder and
- /// running the verifier. Take in the vector loop \p L as argument, and return
- /// the preheader of the completed vector loop.
- BasicBlock *completeLoopSkeleton(Loop *L, MDNode *OrigLoopID);
-
- /// Add additional metadata to \p To that was not present on \p Orig.
- ///
- /// Currently this is used to add the noalias annotations based on the
- /// inserted memchecks. Use this for instructions that are *cloned* into the
- /// vector loop.
- void addNewMetadata(Instruction *To, const Instruction *Orig);
+ /// running the verifier. Return the preheader of the completed vector loop.
+ BasicBlock *completeLoopSkeleton(MDNode *OrigLoopID);
/// Collect poison-generating recipes that may generate a poison value that is
/// used after vectorization, even when their operands are not poison. Those
@@ -741,13 +652,6 @@ protected:
/// Interface to emit optimization remarks.
OptimizationRemarkEmitter *ORE;
- /// LoopVersioning. It's only set up (non-null) if memchecks were
- /// used.
- ///
- /// This is currently only used to add no-alias metadata based on the
- /// memchecks. The actually versioning is performed manually.
- std::unique_ptr<LoopVersioning> LVer;
-
/// The vectorization SIMD factor to use. Each vector will have this many
/// vector elements.
ElementCount VF;
@@ -774,9 +678,6 @@ protected:
/// there can be multiple exiting edges reaching this block.
BasicBlock *LoopExitBlock;
- /// The vector loop body.
- BasicBlock *LoopVectorBody;
-
/// The scalar loop body.
BasicBlock *LoopScalarBody;
@@ -805,10 +706,6 @@ protected:
// so we can later fix-up the external users of the induction variables.
DenseMap<PHINode *, Value *> IVEndValues;
- // Vector of original scalar PHIs whose corresponding widened PHIs need to be
- // fixed up at the end of vector code generation.
- SmallVector<PHINode *, 8> OrigPHIsToFix;
-
/// BFI and PSI are used to check for profile guided size optimizations.
BlockFrequencyInfo *BFI;
ProfileSummaryInfo *PSI;
@@ -936,8 +833,7 @@ protected:
/// Emits an iteration count bypass check once for the main loop (when \p
/// ForEpilogue is false) and once for the epilogue loop (when \p
/// ForEpilogue is true).
- BasicBlock *emitMinimumIterationCountCheck(Loop *L, BasicBlock *Bypass,
- bool ForEpilogue);
+ BasicBlock *emitIterationCountCheck(BasicBlock *Bypass, bool ForEpilogue);
void printDebugTracesAtStart() override;
void printDebugTracesAtEnd() override;
};
@@ -956,7 +852,9 @@ public:
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
GeneratedRTChecks &Checks)
: InnerLoopAndEpilogueVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE,
- EPI, LVL, CM, BFI, PSI, Checks) {}
+ EPI, LVL, CM, BFI, PSI, Checks) {
+ TripCount = EPI.TripCount;
+ }
/// Implements the interface for creating a vectorized skeleton using the
/// *epilogue loop* strategy (ie the second pass of vplan execution).
std::pair<BasicBlock *, Value *>
@@ -966,7 +864,7 @@ protected:
/// Emits an iteration count bypass check after the main vector loop has
/// finished to see if there are any iterations left to execute by either
/// the vector epilogue or the scalar epilogue.
- BasicBlock *emitMinimumVectorEpilogueIterCountCheck(Loop *L,
+ BasicBlock *emitMinimumVectorEpilogueIterCountCheck(
BasicBlock *Bypass,
BasicBlock *Insert);
void printDebugTracesAtStart() override;
@@ -993,31 +891,6 @@ static Instruction *getDebugLocFromInstOrOperands(Instruction *I) {
return I;
}
-void InnerLoopVectorizer::setDebugLocFromInst(
- const Value *V, Optional<IRBuilder<> *> CustomBuilder) {
- IRBuilder<> *B = (CustomBuilder == None) ? &Builder : *CustomBuilder;
- if (const Instruction *Inst = dyn_cast_or_null<Instruction>(V)) {
- const DILocation *DIL = Inst->getDebugLoc();
-
- // When a FSDiscriminator is enabled, we don't need to add the multiply
- // factors to the discriminators.
- if (DIL && Inst->getFunction()->isDebugInfoForProfiling() &&
- !isa<DbgInfoIntrinsic>(Inst) && !EnableFSDiscriminator) {
- // FIXME: For scalable vectors, assume vscale=1.
- auto NewDIL =
- DIL->cloneByMultiplyingDuplicationFactor(UF * VF.getKnownMinValue());
- if (NewDIL)
- B->SetCurrentDebugLocation(NewDIL.getValue());
- else
- LLVM_DEBUG(dbgs()
- << "Failed to create new discriminator: "
- << DIL->getFilename() << " Line: " << DIL->getLine());
- } else
- B->SetCurrentDebugLocation(DIL);
- } else
- B->SetCurrentDebugLocation(DebugLoc());
-}
-
/// Write a \p DebugMsg about vectorization to the debug output stream. If \p I
/// is passed, the message relates to that particular instruction.
#ifndef NDEBUG
@@ -1059,7 +932,7 @@ static OptimizationRemarkAnalysis createLVAnalysis(const char *PassName,
namespace llvm {
/// Return a value for Step multiplied by VF.
-Value *createStepForVF(IRBuilder<> &B, Type *Ty, ElementCount VF,
+Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
int64_t Step) {
assert(Ty->isIntegerTy() && "Expected an integer step");
Constant *StepVal = ConstantInt::get(Ty, Step * VF.getKnownMinValue());
@@ -1067,12 +940,13 @@ Value *createStepForVF(IRBuilder<> &B, Type *Ty, ElementCount VF,
}
/// Return the runtime value for VF.
-Value *getRuntimeVF(IRBuilder<> &B, Type *Ty, ElementCount VF) {
+Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF) {
Constant *EC = ConstantInt::get(Ty, VF.getKnownMinValue());
return VF.isScalable() ? B.CreateVScale(EC) : EC;
}
-static Value *getRuntimeVFAsFloat(IRBuilder<> &B, Type *FTy, ElementCount VF) {
+static Value *getRuntimeVFAsFloat(IRBuilderBase &B, Type *FTy,
+ ElementCount VF) {
assert(FTy->isFloatingPointTy() && "Expected floating point type!");
Type *IntTy = IntegerType::get(FTy->getContext(), FTy->getScalarSizeInBits());
Value *RuntimeVF = getRuntimeVF(B, IntTy, VF);
@@ -1119,14 +993,6 @@ static std::string getDebugLocString(const Loop *L) {
}
#endif
-void InnerLoopVectorizer::addNewMetadata(Instruction *To,
- const Instruction *Orig) {
- // If the loop was versioned with memchecks, add the corresponding no-alias
- // metadata.
- if (LVer && (isa<LoadInst>(Orig) || isa<StoreInst>(Orig)))
- LVer->annotateInstWithNoAlias(To, Orig);
-}
-
void InnerLoopVectorizer::collectPoisonGeneratingRecipes(
VPTransformState &State) {
@@ -1151,6 +1017,7 @@ void InnerLoopVectorizer::collectPoisonGeneratingRecipes(
// handled.
if (isa<VPWidenMemoryInstructionRecipe>(CurRec) ||
isa<VPInterleaveRecipe>(CurRec) ||
+ isa<VPScalarIVStepsRecipe>(CurRec) ||
isa<VPCanonicalIVPHIRecipe>(CurRec))
continue;
@@ -1176,10 +1043,10 @@ void InnerLoopVectorizer::collectPoisonGeneratingRecipes(
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
for (VPRecipeBase &Recipe : *VPBB) {
if (auto *WidenRec = dyn_cast<VPWidenMemoryInstructionRecipe>(&Recipe)) {
- Instruction *UnderlyingInstr = WidenRec->getUnderlyingInstr();
+ Instruction &UnderlyingInstr = WidenRec->getIngredient();
VPDef *AddrDef = WidenRec->getAddr()->getDef();
- if (AddrDef && WidenRec->isConsecutive() && UnderlyingInstr &&
- Legal->blockNeedsPredication(UnderlyingInstr->getParent()))
+ if (AddrDef && WidenRec->isConsecutive() &&
+ Legal->blockNeedsPredication(UnderlyingInstr.getParent()))
collectPoisonGeneratingInstrsInBackwardSlice(
cast<VPRecipeBase>(AddrDef));
} else if (auto *InterleaveRec = dyn_cast<VPInterleaveRecipe>(&Recipe)) {
@@ -1206,20 +1073,6 @@ void InnerLoopVectorizer::collectPoisonGeneratingRecipes(
}
}
-void InnerLoopVectorizer::addMetadata(Instruction *To,
- Instruction *From) {
- propagateMetadata(To, From);
- addNewMetadata(To, From);
-}
-
-void InnerLoopVectorizer::addMetadata(ArrayRef<Value *> To,
- Instruction *From) {
- for (Value *V : To) {
- if (Instruction *I = dyn_cast<Instruction>(V))
- addMetadata(I, From);
- }
-}
-
PHINode *InnerLoopVectorizer::getReductionResumeValue(
const RecurrenceDescriptor &RdxDesc) {
auto It = ReductionResumeValues.find(&RdxDesc);
@@ -1363,7 +1216,7 @@ public:
/// RdxDesc. This is true if the -enable-strict-reductions flag is passed,
/// the IsOrdered flag of RdxDesc is set and we do not allow reordering
/// of FP operations.
- bool useOrderedReductions(const RecurrenceDescriptor &RdxDesc) {
+ bool useOrderedReductions(const RecurrenceDescriptor &RdxDesc) const {
return !Hints->allowReordering() && RdxDesc.isOrdered();
}
@@ -1701,6 +1554,11 @@ public:
private:
unsigned NumPredStores = 0;
+ /// Convenience function that returns the value of vscale_range iff
+ /// vscale_range.min == vscale_range.max or otherwise returns the value
+ /// returned by the corresponding TLI method.
+ Optional<unsigned> getVScaleForTuning() const;
+
/// \return An upper bound for the vectorization factors for both
/// fixed and scalable vectorization, where the minimum-known number of
/// elements is a power-of-2 larger than zero. If scalable vectorization is
@@ -1713,15 +1571,10 @@ private:
/// \return the maximized element count based on the targets vector
/// registers and the loop trip-count, but limited to a maximum safe VF.
/// This is a helper function of computeFeasibleMaxVF.
- /// FIXME: MaxSafeVF is currently passed by reference to avoid some obscure
- /// issue that occurred on one of the buildbots which cannot be reproduced
- /// without having access to the properietary compiler (see comments on
- /// D98509). The issue is currently under investigation and this workaround
- /// will be removed as soon as possible.
ElementCount getMaximizedVFForTarget(unsigned ConstTripCount,
unsigned SmallestType,
unsigned WidestType,
- const ElementCount &MaxSafeVF,
+ ElementCount MaxSafeVF,
bool FoldTailByMasking);
/// \return the maximum legal scalable VF, based on the safe max number
@@ -2012,7 +1865,7 @@ public:
/// there is no vector code generation, the check blocks are removed
/// completely.
void Create(Loop *L, const LoopAccessInfo &LAI,
- const SCEVUnionPredicate &UnionPred) {
+ const SCEVPredicate &UnionPred, ElementCount VF, unsigned IC) {
BasicBlock *LoopHeader = L->getHeader();
BasicBlock *Preheader = L->getLoopPreheader();
@@ -2035,9 +1888,19 @@ public:
MemCheckBlock = SplitBlock(Pred, Pred->getTerminator(), DT, LI, nullptr,
"vector.memcheck");
- MemRuntimeCheckCond =
- addRuntimeChecks(MemCheckBlock->getTerminator(), L,
- RtPtrChecking.getChecks(), MemCheckExp);
+ auto DiffChecks = RtPtrChecking.getDiffChecks();
+ if (DiffChecks) {
+ MemRuntimeCheckCond = addDiffRuntimeChecks(
+ MemCheckBlock->getTerminator(), L, *DiffChecks, MemCheckExp,
+ [VF](IRBuilderBase &B, unsigned Bits) {
+ return getRuntimeVF(B, B.getIntNTy(Bits), VF);
+ },
+ IC);
+ } else {
+ MemRuntimeCheckCond =
+ addRuntimeChecks(MemCheckBlock->getTerminator(), L,
+ RtPtrChecking.getChecks(), MemCheckExp);
+ }
assert(MemRuntimeCheckCond &&
"no RT checks generated although RtPtrChecking "
"claimed checks are required");
@@ -2109,12 +1972,16 @@ public:
/// Adds the generated SCEVCheckBlock before \p LoopVectorPreHeader and
/// adjusts the branches to branch to the vector preheader or \p Bypass,
/// depending on the generated condition.
- BasicBlock *emitSCEVChecks(Loop *L, BasicBlock *Bypass,
+ BasicBlock *emitSCEVChecks(BasicBlock *Bypass,
BasicBlock *LoopVectorPreHeader,
BasicBlock *LoopExitBlock) {
if (!SCEVCheckCond)
return nullptr;
- if (auto *C = dyn_cast<ConstantInt>(SCEVCheckCond))
+
+ Value *Cond = SCEVCheckCond;
+ // Mark the check as used, to prevent it from being removed during cleanup.
+ SCEVCheckCond = nullptr;
+ if (auto *C = dyn_cast<ConstantInt>(Cond))
if (C->isZero())
return nullptr;
@@ -2133,18 +2000,15 @@ public:
DT->addNewBlock(SCEVCheckBlock, Pred);
DT->changeImmediateDominator(LoopVectorPreHeader, SCEVCheckBlock);
- ReplaceInstWithInst(
- SCEVCheckBlock->getTerminator(),
- BranchInst::Create(Bypass, LoopVectorPreHeader, SCEVCheckCond));
- // Mark the check as used, to prevent it from being removed during cleanup.
- SCEVCheckCond = nullptr;
+ ReplaceInstWithInst(SCEVCheckBlock->getTerminator(),
+ BranchInst::Create(Bypass, LoopVectorPreHeader, Cond));
return SCEVCheckBlock;
}
/// Adds the generated MemCheckBlock before \p LoopVectorPreHeader and adjusts
/// the branches to branch to the vector preheader or \p Bypass, depending on
/// the generated condition.
- BasicBlock *emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass,
+ BasicBlock *emitMemRuntimeChecks(BasicBlock *Bypass,
BasicBlock *LoopVectorPreHeader) {
// Check if we generated code that checks in runtime if arrays overlap.
if (!MemRuntimeCheckCond)
@@ -2341,7 +2205,7 @@ Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
/// \p Opcode is relevant for FP induction variable.
static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step,
Instruction::BinaryOps BinOp, ElementCount VF,
- IRBuilder<> &Builder) {
+ IRBuilderBase &Builder) {
assert(VF.isVector() && "only vector VFs are supported");
// Create and check the types.
@@ -2357,9 +2221,8 @@ static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step,
// Create a vector of consecutive numbers from zero to VF.
VectorType *InitVecValVTy = ValVTy;
- Type *InitVecValSTy = STy;
if (STy->isFloatingPointTy()) {
- InitVecValSTy =
+ Type *InitVecValSTy =
IntegerType::get(STy->getContext(), STy->getScalarSizeInBits());
InitVecValVTy = VectorType::get(InitVecValSTy, VLen);
}
@@ -2389,199 +2252,12 @@ static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step,
return Builder.CreateBinOp(BinOp, Val, MulOp, "induction");
}
-void InnerLoopVectorizer::createVectorIntOrFpInductionPHI(
- const InductionDescriptor &II, Value *Step, Value *Start,
- Instruction *EntryVal, VPValue *Def, VPTransformState &State) {
- IRBuilder<> &Builder = State.Builder;
- assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) &&
- "Expected either an induction phi-node or a truncate of it!");
-
- // Construct the initial value of the vector IV in the vector loop preheader
- auto CurrIP = Builder.saveIP();
- Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
- if (isa<TruncInst>(EntryVal)) {
- assert(Start->getType()->isIntegerTy() &&
- "Truncation requires an integer type");
- auto *TruncType = cast<IntegerType>(EntryVal->getType());
- Step = Builder.CreateTrunc(Step, TruncType);
- Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);
- }
-
- Value *Zero = getSignedIntOrFpConstant(Start->getType(), 0);
- Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
- Value *SteppedStart = getStepVector(
- SplatStart, Zero, Step, II.getInductionOpcode(), State.VF, State.Builder);
-
- // We create vector phi nodes for both integer and floating-point induction
- // variables. Here, we determine the kind of arithmetic we will perform.
- Instruction::BinaryOps AddOp;
- Instruction::BinaryOps MulOp;
- if (Step->getType()->isIntegerTy()) {
- AddOp = Instruction::Add;
- MulOp = Instruction::Mul;
- } else {
- AddOp = II.getInductionOpcode();
- MulOp = Instruction::FMul;
- }
-
- // Multiply the vectorization factor by the step using integer or
- // floating-point arithmetic as appropriate.
- Type *StepType = Step->getType();
- Value *RuntimeVF;
- if (Step->getType()->isFloatingPointTy())
- RuntimeVF = getRuntimeVFAsFloat(Builder, StepType, State.VF);
- else
- RuntimeVF = getRuntimeVF(Builder, StepType, State.VF);
- Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF);
-
- // Create a vector splat to use in the induction update.
- //
- // FIXME: If the step is non-constant, we create the vector splat with
- // IRBuilder. IRBuilder can constant-fold the multiply, but it doesn't
- // handle a constant vector splat.
- Value *SplatVF = isa<Constant>(Mul)
- ? ConstantVector::getSplat(State.VF, cast<Constant>(Mul))
- : Builder.CreateVectorSplat(State.VF, Mul);
- Builder.restoreIP(CurrIP);
-
- // We may need to add the step a number of times, depending on the unroll
- // factor. The last of those goes into the PHI.
- PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind",
- &*LoopVectorBody->getFirstInsertionPt());
- VecInd->setDebugLoc(EntryVal->getDebugLoc());
- Instruction *LastInduction = VecInd;
- for (unsigned Part = 0; Part < UF; ++Part) {
- State.set(Def, LastInduction, Part);
-
- if (isa<TruncInst>(EntryVal))
- addMetadata(LastInduction, EntryVal);
-
- LastInduction = cast<Instruction>(
- Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add"));
- LastInduction->setDebugLoc(EntryVal->getDebugLoc());
- }
-
- // Move the last step to the end of the latch block. This ensures consistent
- // placement of all induction updates.
- auto *LoopVectorLatch = LI->getLoopFor(LoopVectorBody)->getLoopLatch();
- auto *Br = cast<BranchInst>(LoopVectorLatch->getTerminator());
- LastInduction->moveBefore(Br);
- LastInduction->setName("vec.ind.next");
-
- VecInd->addIncoming(SteppedStart, LoopVectorPreHeader);
- VecInd->addIncoming(LastInduction, LoopVectorLatch);
-}
-
-void InnerLoopVectorizer::widenIntOrFpInduction(
- PHINode *IV, VPWidenIntOrFpInductionRecipe *Def, VPTransformState &State,
- Value *CanonicalIV) {
- Value *Start = Def->getStartValue()->getLiveInIRValue();
- const InductionDescriptor &ID = Def->getInductionDescriptor();
- TruncInst *Trunc = Def->getTruncInst();
- IRBuilder<> &Builder = State.Builder;
- assert(IV->getType() == ID.getStartValue()->getType() && "Types must match");
- assert(!State.VF.isZero() && "VF must be non-zero");
-
- // The value from the original loop to which we are mapping the new induction
- // variable.
- Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : IV;
-
- auto &DL = EntryVal->getModule()->getDataLayout();
-
- // Generate code for the induction step. Note that induction steps are
- // required to be loop-invariant
- auto CreateStepValue = [&](const SCEV *Step) -> Value * {
- assert(PSE.getSE()->isLoopInvariant(Step, OrigLoop) &&
- "Induction step should be loop invariant");
- if (PSE.getSE()->isSCEVable(IV->getType())) {
- SCEVExpander Exp(*PSE.getSE(), DL, "induction");
- return Exp.expandCodeFor(Step, Step->getType(),
- State.CFG.VectorPreHeader->getTerminator());
- }
- return cast<SCEVUnknown>(Step)->getValue();
- };
-
- // The scalar value to broadcast. This is derived from the canonical
- // induction variable. If a truncation type is given, truncate the canonical
- // induction variable and step. Otherwise, derive these values from the
- // induction descriptor.
- auto CreateScalarIV = [&](Value *&Step) -> Value * {
- Value *ScalarIV = CanonicalIV;
- Type *NeededType = IV->getType();
- if (!Def->isCanonical() || ScalarIV->getType() != NeededType) {
- ScalarIV =
- NeededType->isIntegerTy()
- ? Builder.CreateSExtOrTrunc(ScalarIV, NeededType)
- : Builder.CreateCast(Instruction::SIToFP, ScalarIV, NeededType);
- ScalarIV = emitTransformedIndex(Builder, ScalarIV, PSE.getSE(), DL, ID,
- State.CFG.PrevBB);
- ScalarIV->setName("offset.idx");
- }
- if (Trunc) {
- auto *TruncType = cast<IntegerType>(Trunc->getType());
- assert(Step->getType()->isIntegerTy() &&
- "Truncation requires an integer step");
- ScalarIV = Builder.CreateTrunc(ScalarIV, TruncType);
- Step = Builder.CreateTrunc(Step, TruncType);
- }
- return ScalarIV;
- };
-
- // Fast-math-flags propagate from the original induction instruction.
- IRBuilder<>::FastMathFlagGuard FMFG(Builder);
- if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp()))
- Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags());
-
- // Now do the actual transformations, and start with creating the step value.
- Value *Step = CreateStepValue(ID.getStep());
- if (State.VF.isScalar()) {
- Value *ScalarIV = CreateScalarIV(Step);
- Type *ScalarTy = IntegerType::get(ScalarIV->getContext(),
- Step->getType()->getScalarSizeInBits());
-
- Instruction::BinaryOps IncOp = ID.getInductionOpcode();
- if (IncOp == Instruction::BinaryOpsEnd)
- IncOp = Instruction::Add;
- for (unsigned Part = 0; Part < UF; ++Part) {
- Value *StartIdx = ConstantInt::get(ScalarTy, Part);
- Instruction::BinaryOps MulOp = Instruction::Mul;
- if (Step->getType()->isFloatingPointTy()) {
- StartIdx = Builder.CreateUIToFP(StartIdx, Step->getType());
- MulOp = Instruction::FMul;
- }
-
- Value *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
- Value *EntryPart = Builder.CreateBinOp(IncOp, ScalarIV, Mul, "induction");
- State.set(Def, EntryPart, Part);
- if (Trunc) {
- assert(!Step->getType()->isFloatingPointTy() &&
- "fp inductions shouldn't be truncated");
- addMetadata(EntryPart, Trunc);
- }
- }
- return;
- }
-
- // Create a new independent vector induction variable, if one is needed.
- if (Def->needsVectorIV())
- createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, State);
-
- if (Def->needsScalarIV()) {
- // Create scalar steps that can be used by instructions we will later
- // scalarize. Note that the addition of the scalar steps will not increase
- // the number of instructions in the loop in the common case prior to
- // InstCombine. We will be trading one vector extract for each scalar step.
- Value *ScalarIV = CreateScalarIV(Step);
- buildScalarSteps(ScalarIV, Step, EntryVal, ID, Def, State);
- }
-}
-
-void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
- Instruction *EntryVal,
- const InductionDescriptor &ID,
- VPValue *Def,
- VPTransformState &State) {
- IRBuilder<> &Builder = State.Builder;
+/// Compute scalar induction steps. \p ScalarIV is the scalar induction
+/// variable on which to base the steps, \p Step is the size of the step.
+static void buildScalarSteps(Value *ScalarIV, Value *Step,
+ const InductionDescriptor &ID, VPValue *Def,
+ VPTransformState &State) {
+ IRBuilderBase &Builder = State.Builder;
// We shouldn't have to build scalar steps if we aren't vectorizing.
assert(State.VF.isVector() && "VF should be greater than one");
// Get the value type and ensure it and the step have the same integer type.
@@ -2652,6 +2328,103 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
}
}
+// Generate code for the induction step. Note that induction steps are
+// required to be loop-invariant
+static Value *CreateStepValue(const SCEV *Step, ScalarEvolution &SE,
+ Instruction *InsertBefore,
+ Loop *OrigLoop = nullptr) {
+ const DataLayout &DL = SE.getDataLayout();
+ assert((!OrigLoop || SE.isLoopInvariant(Step, OrigLoop)) &&
+ "Induction step should be loop invariant");
+ if (auto *E = dyn_cast<SCEVUnknown>(Step))
+ return E->getValue();
+
+ SCEVExpander Exp(SE, DL, "induction");
+ return Exp.expandCodeFor(Step, Step->getType(), InsertBefore);
+}
+
+/// Compute the transformed value of Index at offset StartValue using step
+/// StepValue.
+/// For integer induction, returns StartValue + Index * StepValue.
+/// For pointer induction, returns StartValue[Index * StepValue].
+/// FIXME: The newly created binary instructions should contain nsw/nuw
+/// flags, which can be found from the original scalar operations.
+static Value *emitTransformedIndex(IRBuilderBase &B, Value *Index,
+ Value *StartValue, Value *Step,
+ const InductionDescriptor &ID) {
+ assert(Index->getType()->getScalarType() == Step->getType() &&
+ "Index scalar type does not match StepValue type");
+
+ // Note: the IR at this point is broken. We cannot use SE to create any new
+ // SCEV and then expand it, hoping that SCEV's simplification will give us
+ // a more optimal code. Unfortunately, attempt of doing so on invalid IR may
+ // lead to various SCEV crashes. So all we can do is to use builder and rely
+ // on InstCombine for future simplifications. Here we handle some trivial
+ // cases only.
+ auto CreateAdd = [&B](Value *X, Value *Y) {
+ assert(X->getType() == Y->getType() && "Types don't match!");
+ if (auto *CX = dyn_cast<ConstantInt>(X))
+ if (CX->isZero())
+ return Y;
+ if (auto *CY = dyn_cast<ConstantInt>(Y))
+ if (CY->isZero())
+ return X;
+ return B.CreateAdd(X, Y);
+ };
+
+ // We allow X to be a vector type, in which case Y will potentially be
+ // splatted into a vector with the same element count.
+ auto CreateMul = [&B](Value *X, Value *Y) {
+ assert(X->getType()->getScalarType() == Y->getType() &&
+ "Types don't match!");
+ if (auto *CX = dyn_cast<ConstantInt>(X))
+ if (CX->isOne())
+ return Y;
+ if (auto *CY = dyn_cast<ConstantInt>(Y))
+ if (CY->isOne())
+ return X;
+ VectorType *XVTy = dyn_cast<VectorType>(X->getType());
+ if (XVTy && !isa<VectorType>(Y->getType()))
+ Y = B.CreateVectorSplat(XVTy->getElementCount(), Y);
+ return B.CreateMul(X, Y);
+ };
+
+ switch (ID.getKind()) {
+ case InductionDescriptor::IK_IntInduction: {
+ assert(!isa<VectorType>(Index->getType()) &&
+ "Vector indices not supported for integer inductions yet");
+ assert(Index->getType() == StartValue->getType() &&
+ "Index type does not match StartValue type");
+ if (isa<ConstantInt>(Step) && cast<ConstantInt>(Step)->isMinusOne())
+ return B.CreateSub(StartValue, Index);
+ auto *Offset = CreateMul(Index, Step);
+ return CreateAdd(StartValue, Offset);
+ }
+ case InductionDescriptor::IK_PtrInduction: {
+ assert(isa<Constant>(Step) &&
+ "Expected constant step for pointer induction");
+ return B.CreateGEP(ID.getElementType(), StartValue, CreateMul(Index, Step));
+ }
+ case InductionDescriptor::IK_FpInduction: {
+ assert(!isa<VectorType>(Index->getType()) &&
+ "Vector indices not supported for FP inductions yet");
+ assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value");
+ auto InductionBinOp = ID.getInductionBinOp();
+ assert(InductionBinOp &&
+ (InductionBinOp->getOpcode() == Instruction::FAdd ||
+ InductionBinOp->getOpcode() == Instruction::FSub) &&
+ "Original bin op should be defined for FP induction");
+
+ Value *MulExp = B.CreateFMul(Step, Index);
+ return B.CreateBinOp(InductionBinOp->getOpcode(), StartValue, MulExp,
+ "induction");
+ }
+ case InductionDescriptor::IK_NoInduction:
+ return nullptr;
+ }
+ llvm_unreachable("invalid enum");
+}
+
void InnerLoopVectorizer::packScalarIntoVectorValue(VPValue *Def,
const VPIteration &Instance,
VPTransformState &State) {
@@ -2734,7 +2507,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
for (unsigned Part = 0; Part < UF; Part++) {
Value *AddrPart = State.get(Addr, VPIteration(Part, 0));
- setDebugLocFromInst(AddrPart);
+ State.setDebugLocFromInst(AddrPart);
// Notice current instruction could be any index. Need to adjust the address
// to the member of index 0.
@@ -2760,7 +2533,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
AddrParts.push_back(Builder.CreateBitCast(AddrPart, PtrTy));
}
- setDebugLocFromInst(Instr);
+ State.setDebugLocFromInst(Instr);
Value *PoisonVec = PoisonValue::get(VecTy);
Value *MaskForGaps = nullptr;
@@ -2915,8 +2688,6 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr,
if (!Instance.isFirstIteration())
return;
- setDebugLocFromInst(Instr);
-
// Does this instruction return a value ?
bool IsVoidRetTy = Instr->getType()->isVoidTy();
@@ -2933,21 +2704,23 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr,
if (State.MayGeneratePoisonRecipes.contains(RepRecipe))
Cloned->dropPoisonGeneratingFlags();
- State.Builder.SetInsertPoint(Builder.GetInsertBlock(),
- Builder.GetInsertPoint());
+ if (Instr->getDebugLoc())
+ State.setDebugLocFromInst(Instr);
+
// Replace the operands of the cloned instructions with their scalar
// equivalents in the new loop.
for (auto &I : enumerate(RepRecipe->operands())) {
auto InputInstance = Instance;
VPValue *Operand = I.value();
- if (State.Plan->isUniformAfterVectorization(Operand))
+ VPReplicateRecipe *OperandR = dyn_cast<VPReplicateRecipe>(Operand);
+ if (OperandR && OperandR->isUniform())
InputInstance.Lane = VPLane::getFirstLane();
Cloned->setOperand(I.index(), State.get(Operand, InputInstance));
}
- addNewMetadata(Cloned, Instr);
+ State.addNewMetadata(Cloned, Instr);
// Place the cloned scalar in the new loop.
- Builder.Insert(Cloned);
+ State.Builder.Insert(Cloned);
State.set(RepRecipe, Cloned, Instance);
@@ -2960,29 +2733,12 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr,
PredicatedInstructions.push_back(Cloned);
}
-void InnerLoopVectorizer::createHeaderBranch(Loop *L) {
- BasicBlock *Header = L->getHeader();
- assert(!L->getLoopLatch() && "loop should not have a latch at this point");
-
- IRBuilder<> B(Header->getTerminator());
- Instruction *OldInst =
- getDebugLocFromInstOrOperands(Legal->getPrimaryInduction());
- setDebugLocFromInst(OldInst, &B);
-
- // Connect the header to the exit and header blocks and replace the old
- // terminator.
- B.CreateCondBr(B.getTrue(), L->getUniqueExitBlock(), Header);
-
- // Now we have two terminators. Remove the old one from the block.
- Header->getTerminator()->eraseFromParent();
-}
-
-Value *InnerLoopVectorizer::getOrCreateTripCount(Loop *L) {
+Value *InnerLoopVectorizer::getOrCreateTripCount(BasicBlock *InsertBlock) {
if (TripCount)
return TripCount;
- assert(L && "Create Trip Count for null loop.");
- IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
+ assert(InsertBlock);
+ IRBuilder<> Builder(InsertBlock->getTerminator());
// Find the loop boundaries.
ScalarEvolution *SE = PSE.getSE();
const SCEV *BackedgeTakenCount = PSE.getBackedgeTakenCount();
@@ -3006,7 +2762,7 @@ Value *InnerLoopVectorizer::getOrCreateTripCount(Loop *L) {
const SCEV *ExitCount = SE->getAddExpr(
BackedgeTakenCount, SE->getOne(BackedgeTakenCount->getType()));
- const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+ const DataLayout &DL = InsertBlock->getModule()->getDataLayout();
// Expand the trip count and place the new instructions in the preheader.
// Notice that the pre-header does not change, only the loop body.
@@ -3014,22 +2770,23 @@ Value *InnerLoopVectorizer::getOrCreateTripCount(Loop *L) {
// Count holds the overall loop count (N).
TripCount = Exp.expandCodeFor(ExitCount, ExitCount->getType(),
- L->getLoopPreheader()->getTerminator());
+ InsertBlock->getTerminator());
if (TripCount->getType()->isPointerTy())
TripCount =
CastInst::CreatePointerCast(TripCount, IdxTy, "exitcount.ptrcnt.to.int",
- L->getLoopPreheader()->getTerminator());
+ InsertBlock->getTerminator());
return TripCount;
}
-Value *InnerLoopVectorizer::getOrCreateVectorTripCount(Loop *L) {
+Value *
+InnerLoopVectorizer::getOrCreateVectorTripCount(BasicBlock *InsertBlock) {
if (VectorTripCount)
return VectorTripCount;
- Value *TC = getOrCreateTripCount(L);
- IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
+ Value *TC = getOrCreateTripCount(InsertBlock);
+ IRBuilder<> Builder(InsertBlock->getTerminator());
Type *Ty = TC->getType();
// This is where we can make the step a runtime constant.
@@ -3041,6 +2798,8 @@ Value *InnerLoopVectorizer::getOrCreateVectorTripCount(Loop *L) {
// overflows: the vector induction variable will eventually wrap to zero given
// that it starts at zero and its Step is a power of two; the loop will then
// exit, with the last early-exit vector comparison also producing all-true.
+ // For scalable vectors the VF is not guaranteed to be a power of 2, but this
+ // is accounted for in emitIterationCountCheck that adds an overflow check.
if (Cost->foldTailByMasking()) {
assert(isPowerOf2_32(VF.getKnownMinValue() * UF) &&
"VF*UF must be a power of 2 when folding tail by masking");
@@ -3103,9 +2862,8 @@ Value *InnerLoopVectorizer::createBitOrPointerCast(Value *V, VectorType *DstVTy,
return Builder.CreateBitOrPointerCast(CastVal, DstFVTy);
}
-void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L,
- BasicBlock *Bypass) {
- Value *Count = getOrCreateTripCount(L);
+void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
+ Value *Count = getOrCreateTripCount(LoopVectorPreHeader);
// Reuse existing vector loop preheader for TC checks.
// Note that new preheader block is generated for vector loop.
BasicBlock *const TCCheckBlock = LoopVectorPreHeader;
@@ -3120,10 +2878,23 @@ void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L,
: ICmpInst::ICMP_ULT;
// If tail is to be folded, vector loop takes care of all iterations.
+ Type *CountTy = Count->getType();
Value *CheckMinIters = Builder.getFalse();
- if (!Cost->foldTailByMasking()) {
- Value *Step = createStepForVF(Builder, Count->getType(), VF, UF);
+ Value *Step = createStepForVF(Builder, CountTy, VF, UF);
+ if (!Cost->foldTailByMasking())
CheckMinIters = Builder.CreateICmp(P, Count, Step, "min.iters.check");
+ else if (VF.isScalable()) {
+ // vscale is not necessarily a power-of-2, which means we cannot guarantee
+ // an overflow to zero when updating induction variables and so an
+ // additional overflow check is required before entering the vector loop.
+
+ // Get the maximum unsigned value for the type.
+ Value *MaxUIntTripCount =
+ ConstantInt::get(CountTy, cast<IntegerType>(CountTy)->getMask());
+ Value *LHS = Builder.CreateSub(MaxUIntTripCount, Count);
+
+ // Don't execute the vector loop if (UMax - n) < (VF * UF).
+ CheckMinIters = Builder.CreateICmp(ICmpInst::ICMP_ULT, LHS, Step);
}
// Create new preheader for vector loop.
LoopVectorPreHeader =
@@ -3148,10 +2919,10 @@ void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L,
LoopBypassBlocks.push_back(TCCheckBlock);
}
-BasicBlock *InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) {
+BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) {
BasicBlock *const SCEVCheckBlock =
- RTChecks.emitSCEVChecks(L, Bypass, LoopVectorPreHeader, LoopExitBlock);
+ RTChecks.emitSCEVChecks(Bypass, LoopVectorPreHeader, LoopExitBlock);
if (!SCEVCheckBlock)
return nullptr;
@@ -3176,14 +2947,13 @@ BasicBlock *InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) {
return SCEVCheckBlock;
}
-BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L,
- BasicBlock *Bypass) {
+BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(BasicBlock *Bypass) {
// VPlan-native path does not do any analysis for runtime checks currently.
if (EnableVPlanNativePath)
return nullptr;
BasicBlock *const MemCheckBlock =
- RTChecks.emitMemRuntimeChecks(L, Bypass, LoopVectorPreHeader);
+ RTChecks.emitMemRuntimeChecks(Bypass, LoopVectorPreHeader);
// Check if we generated code that checks in runtime if arrays overlap. We put
// the checks into a separate block to make the more common case of few
@@ -3197,7 +2967,8 @@ BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L,
"to vectorize.");
ORE->emit([&]() {
return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationCodeSize",
- L->getStartLoc(), L->getHeader())
+ OrigLoop->getStartLoc(),
+ OrigLoop->getHeader())
<< "Code-size may be reduced by not forcing "
"vectorization, or by source-code modifications "
"eliminating the need for runtime checks "
@@ -3209,116 +2980,10 @@ BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L,
AddedSafetyChecks = true;
- // We currently don't use LoopVersioning for the actual loop cloning but we
- // still use it to add the noalias metadata.
- LVer = std::make_unique<LoopVersioning>(
- *Legal->getLAI(),
- Legal->getLAI()->getRuntimePointerChecking()->getChecks(), OrigLoop, LI,
- DT, PSE.getSE());
- LVer->prepareNoAliasMetadata();
return MemCheckBlock;
}
-Value *InnerLoopVectorizer::emitTransformedIndex(
- IRBuilder<> &B, Value *Index, ScalarEvolution *SE, const DataLayout &DL,
- const InductionDescriptor &ID, BasicBlock *VectorHeader) const {
-
- SCEVExpander Exp(*SE, DL, "induction");
- auto Step = ID.getStep();
- auto StartValue = ID.getStartValue();
- assert(Index->getType()->getScalarType() == Step->getType() &&
- "Index scalar type does not match StepValue type");
-
- // Note: the IR at this point is broken. We cannot use SE to create any new
- // SCEV and then expand it, hoping that SCEV's simplification will give us
- // a more optimal code. Unfortunately, attempt of doing so on invalid IR may
- // lead to various SCEV crashes. So all we can do is to use builder and rely
- // on InstCombine for future simplifications. Here we handle some trivial
- // cases only.
- auto CreateAdd = [&B](Value *X, Value *Y) {
- assert(X->getType() == Y->getType() && "Types don't match!");
- if (auto *CX = dyn_cast<ConstantInt>(X))
- if (CX->isZero())
- return Y;
- if (auto *CY = dyn_cast<ConstantInt>(Y))
- if (CY->isZero())
- return X;
- return B.CreateAdd(X, Y);
- };
-
- // We allow X to be a vector type, in which case Y will potentially be
- // splatted into a vector with the same element count.
- auto CreateMul = [&B](Value *X, Value *Y) {
- assert(X->getType()->getScalarType() == Y->getType() &&
- "Types don't match!");
- if (auto *CX = dyn_cast<ConstantInt>(X))
- if (CX->isOne())
- return Y;
- if (auto *CY = dyn_cast<ConstantInt>(Y))
- if (CY->isOne())
- return X;
- VectorType *XVTy = dyn_cast<VectorType>(X->getType());
- if (XVTy && !isa<VectorType>(Y->getType()))
- Y = B.CreateVectorSplat(XVTy->getElementCount(), Y);
- return B.CreateMul(X, Y);
- };
-
- // Get a suitable insert point for SCEV expansion. For blocks in the vector
- // loop, choose the end of the vector loop header (=VectorHeader), because
- // the DomTree is not kept up-to-date for additional blocks generated in the
- // vector loop. By using the header as insertion point, we guarantee that the
- // expanded instructions dominate all their uses.
- auto GetInsertPoint = [this, &B, VectorHeader]() {
- BasicBlock *InsertBB = B.GetInsertPoint()->getParent();
- if (InsertBB != LoopVectorBody &&
- LI->getLoopFor(VectorHeader) == LI->getLoopFor(InsertBB))
- return VectorHeader->getTerminator();
- return &*B.GetInsertPoint();
- };
-
- switch (ID.getKind()) {
- case InductionDescriptor::IK_IntInduction: {
- assert(!isa<VectorType>(Index->getType()) &&
- "Vector indices not supported for integer inductions yet");
- assert(Index->getType() == StartValue->getType() &&
- "Index type does not match StartValue type");
- if (ID.getConstIntStepValue() && ID.getConstIntStepValue()->isMinusOne())
- return B.CreateSub(StartValue, Index);
- auto *Offset = CreateMul(
- Index, Exp.expandCodeFor(Step, Index->getType(), GetInsertPoint()));
- return CreateAdd(StartValue, Offset);
- }
- case InductionDescriptor::IK_PtrInduction: {
- assert(isa<SCEVConstant>(Step) &&
- "Expected constant step for pointer induction");
- return B.CreateGEP(
- ID.getElementType(), StartValue,
- CreateMul(Index,
- Exp.expandCodeFor(Step, Index->getType()->getScalarType(),
- GetInsertPoint())));
- }
- case InductionDescriptor::IK_FpInduction: {
- assert(!isa<VectorType>(Index->getType()) &&
- "Vector indices not supported for FP inductions yet");
- assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value");
- auto InductionBinOp = ID.getInductionBinOp();
- assert(InductionBinOp &&
- (InductionBinOp->getOpcode() == Instruction::FAdd ||
- InductionBinOp->getOpcode() == Instruction::FSub) &&
- "Original bin op should be defined for FP induction");
-
- Value *StepValue = cast<SCEVUnknown>(Step)->getValue();
- Value *MulExp = B.CreateFMul(StepValue, Index);
- return B.CreateBinOp(InductionBinOp->getOpcode(), StartValue, MulExp,
- "induction");
- }
- case InductionDescriptor::IK_NoInduction:
- return nullptr;
- }
- llvm_unreachable("invalid enum");
-}
-
-Loop *InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
+void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
LoopScalarBody = OrigLoop->getHeader();
LoopVectorPreHeader = OrigLoop->getLoopPreheader();
assert(LoopVectorPreHeader && "Invalid loop structure");
@@ -3350,43 +3015,24 @@ Loop *InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
BrInst->setDebugLoc(ScalarLatchTerm->getDebugLoc());
ReplaceInstWithInst(LoopMiddleBlock->getTerminator(), BrInst);
- // We intentionally don't let SplitBlock to update LoopInfo since
- // LoopVectorBody should belong to another loop than LoopVectorPreHeader.
- // LoopVectorBody is explicitly added to the correct place few lines later.
- LoopVectorBody =
- SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT,
- nullptr, nullptr, Twine(Prefix) + "vector.body");
-
- // Update dominator for loop exit.
+ // Update dominator for loop exit. During skeleton creation, only the vector
+ // pre-header and the middle block are created. The vector loop is entirely
+ // created during VPlan exection.
if (!Cost->requiresScalarEpilogue(VF))
// If there is an epilogue which must run, there's no edge from the
// middle block to exit blocks and thus no need to update the immediate
// dominator of the exit blocks.
DT->changeImmediateDominator(LoopExitBlock, LoopMiddleBlock);
-
- // Create and register the new vector loop.
- Loop *Lp = LI->AllocateLoop();
- Loop *ParentLoop = OrigLoop->getParentLoop();
-
- // Insert the new loop into the loop nest and register the new basic blocks
- // before calling any utilities such as SCEV that require valid LoopInfo.
- if (ParentLoop) {
- ParentLoop->addChildLoop(Lp);
- } else {
- LI->addTopLevelLoop(Lp);
- }
- Lp->addBasicBlockToLoop(LoopVectorBody, *LI);
- return Lp;
}
void InnerLoopVectorizer::createInductionResumeValues(
- Loop *L, std::pair<BasicBlock *, Value *> AdditionalBypass) {
+ std::pair<BasicBlock *, Value *> AdditionalBypass) {
assert(((AdditionalBypass.first && AdditionalBypass.second) ||
(!AdditionalBypass.first && !AdditionalBypass.second)) &&
"Inconsistent information about additional bypass.");
- Value *VectorTripCount = getOrCreateVectorTripCount(L);
- assert(VectorTripCount && L && "Expected valid arguments");
+ Value *VectorTripCount = getOrCreateVectorTripCount(LoopVectorPreHeader);
+ assert(VectorTripCount && "Expected valid arguments");
// We are going to resume the execution of the scalar loop.
// Go over all of the induction variables that we found and fix the
// PHIs that are left in the scalar version of the loop.
@@ -3399,19 +3045,13 @@ void InnerLoopVectorizer::createInductionResumeValues(
PHINode *OrigPhi = InductionEntry.first;
InductionDescriptor II = InductionEntry.second;
- // Create phi nodes to merge from the backedge-taken check block.
- PHINode *BCResumeVal =
- PHINode::Create(OrigPhi->getType(), 3, "bc.resume.val",
- LoopScalarPreHeader->getTerminator());
- // Copy original phi DL over to the new one.
- BCResumeVal->setDebugLoc(OrigPhi->getDebugLoc());
Value *&EndValue = IVEndValues[OrigPhi];
Value *EndValueFromAdditionalBypass = AdditionalBypass.second;
if (OrigPhi == OldInduction) {
// We know what the end value is.
EndValue = VectorTripCount;
} else {
- IRBuilder<> B(L->getLoopPreheader()->getTerminator());
+ IRBuilder<> B(LoopVectorPreHeader->getTerminator());
// Fast-math-flags propagate from the original induction instruction.
if (II.getInductionBinOp() && isa<FPMathOperator>(II.getInductionBinOp()))
@@ -3420,10 +3060,10 @@ void InnerLoopVectorizer::createInductionResumeValues(
Type *StepType = II.getStep()->getType();
Instruction::CastOps CastOp =
CastInst::getCastOpcode(VectorTripCount, true, StepType, true);
- Value *CRD = B.CreateCast(CastOp, VectorTripCount, StepType, "cast.crd");
- const DataLayout &DL = LoopScalarBody->getModule()->getDataLayout();
- EndValue =
- emitTransformedIndex(B, CRD, PSE.getSE(), DL, II, LoopVectorBody);
+ Value *VTC = B.CreateCast(CastOp, VectorTripCount, StepType, "cast.vtc");
+ Value *Step =
+ CreateStepValue(II.getStep(), *PSE.getSE(), &*B.GetInsertPoint());
+ EndValue = emitTransformedIndex(B, VTC, II.getStartValue(), Step, II);
EndValue->setName("ind.end");
// Compute the end value for the additional bypass (if applicable).
@@ -3431,13 +3071,23 @@ void InnerLoopVectorizer::createInductionResumeValues(
B.SetInsertPoint(&(*AdditionalBypass.first->getFirstInsertionPt()));
CastOp = CastInst::getCastOpcode(AdditionalBypass.second, true,
StepType, true);
- CRD =
- B.CreateCast(CastOp, AdditionalBypass.second, StepType, "cast.crd");
+ Value *Step =
+ CreateStepValue(II.getStep(), *PSE.getSE(), &*B.GetInsertPoint());
+ VTC =
+ B.CreateCast(CastOp, AdditionalBypass.second, StepType, "cast.vtc");
EndValueFromAdditionalBypass =
- emitTransformedIndex(B, CRD, PSE.getSE(), DL, II, LoopVectorBody);
+ emitTransformedIndex(B, VTC, II.getStartValue(), Step, II);
EndValueFromAdditionalBypass->setName("ind.end");
}
}
+
+ // Create phi nodes to merge from the backedge-taken check block.
+ PHINode *BCResumeVal =
+ PHINode::Create(OrigPhi->getType(), 3, "bc.resume.val",
+ LoopScalarPreHeader->getTerminator());
+ // Copy original phi DL over to the new one.
+ BCResumeVal->setDebugLoc(OrigPhi->getDebugLoc());
+
// The new PHI merges the original incoming value, in case of a bypass,
// or the value at the end of the vectorized loop.
BCResumeVal->addIncoming(EndValue, LoopMiddleBlock);
@@ -3456,13 +3106,10 @@ void InnerLoopVectorizer::createInductionResumeValues(
}
}
-BasicBlock *InnerLoopVectorizer::completeLoopSkeleton(Loop *L,
- MDNode *OrigLoopID) {
- assert(L && "Expected valid loop.");
-
+BasicBlock *InnerLoopVectorizer::completeLoopSkeleton(MDNode *OrigLoopID) {
// The trip counts should be cached by now.
- Value *Count = getOrCreateTripCount(L);
- Value *VectorTripCount = getOrCreateVectorTripCount(L);
+ Value *Count = getOrCreateTripCount(LoopVectorPreHeader);
+ Value *VectorTripCount = getOrCreateVectorTripCount(LoopVectorPreHeader);
auto *ScalarLatchTerm = OrigLoop->getLoopLatch()->getTerminator();
@@ -3487,14 +3134,8 @@ BasicBlock *InnerLoopVectorizer::completeLoopSkeleton(Loop *L,
cast<BranchInst>(LoopMiddleBlock->getTerminator())->setCondition(CmpN);
}
- // Get ready to start creating new instructions into the vectorized body.
- assert(LoopVectorPreHeader == L->getLoopPreheader() &&
- "Inconsistent vector loop preheader");
- Builder.SetInsertPoint(&*LoopVectorBody->getFirstInsertionPt());
-
#ifdef EXPENSIVE_CHECKS
assert(DT->verify(DominatorTree::VerificationLevel::Fast));
- LI->verify(*DT);
#endif
return LoopVectorPreHeader;
@@ -3517,7 +3158,7 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton() {
|/ |
| v
| [ ] \
- | [ ]_| <-- vector loop.
+ | [ ]_| <-- vector loop (created during VPlan execution).
| |
| v
\ -[ ] <--- middle-block.
@@ -3544,34 +3185,32 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton() {
// simply happens to be prone to hitting this in practice. In theory, we
// can hit the same issue for any SCEV, or ValueTracking query done during
// mutation. See PR49900.
- getOrCreateTripCount(OrigLoop);
+ getOrCreateTripCount(OrigLoop->getLoopPreheader());
// Create an empty vector loop, and prepare basic blocks for the runtime
// checks.
- Loop *Lp = createVectorLoopSkeleton("");
+ createVectorLoopSkeleton("");
// Now, compare the new count to zero. If it is zero skip the vector loop and
// jump to the scalar loop. This check also covers the case where the
// backedge-taken count is uint##_max: adding one to it will overflow leading
// to an incorrect trip count of zero. In this (rare) case we will also jump
// to the scalar loop.
- emitMinimumIterationCountCheck(Lp, LoopScalarPreHeader);
+ emitIterationCountCheck(LoopScalarPreHeader);
// Generate the code to check any assumptions that we've made for SCEV
// expressions.
- emitSCEVChecks(Lp, LoopScalarPreHeader);
+ emitSCEVChecks(LoopScalarPreHeader);
// Generate the code that checks in runtime if arrays overlap. We put the
// checks into a separate block to make the more common case of few elements
// faster.
- emitMemRuntimeChecks(Lp, LoopScalarPreHeader);
-
- createHeaderBranch(Lp);
+ emitMemRuntimeChecks(LoopScalarPreHeader);
// Emit phis for the new starting index of the scalar loop.
- createInductionResumeValues(Lp);
+ createInductionResumeValues();
- return {completeLoopSkeleton(Lp, OrigLoopID), nullptr};
+ return {completeLoopSkeleton(OrigLoopID), nullptr};
}
// Fix up external users of the induction variable. At this point, we are
@@ -3580,8 +3219,9 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton() {
// value for the IV when arriving directly from the middle block.
void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
const InductionDescriptor &II,
- Value *CountRoundDown, Value *EndValue,
- BasicBlock *MiddleBlock) {
+ Value *VectorTripCount, Value *EndValue,
+ BasicBlock *MiddleBlock,
+ BasicBlock *VectorHeader, VPlan &Plan) {
// There are two kinds of external IV usages - those that use the value
// computed in the last iteration (the PHI) and those that use the penultimate
// value (the value that feeds into the phi from the loop latch).
@@ -3608,8 +3248,6 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
for (User *U : OrigPhi->users()) {
auto *UI = cast<Instruction>(U);
if (!OrigLoop->contains(UI)) {
- const DataLayout &DL =
- OrigLoop->getHeader()->getModule()->getDataLayout();
assert(isa<PHINode>(UI) && "Expected LCSSA form");
IRBuilder<> B(MiddleBlock->getTerminator());
@@ -3619,15 +3257,18 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags());
Value *CountMinusOne = B.CreateSub(
- CountRoundDown, ConstantInt::get(CountRoundDown->getType(), 1));
+ VectorTripCount, ConstantInt::get(VectorTripCount->getType(), 1));
Value *CMO =
!II.getStep()->getType()->isIntegerTy()
? B.CreateCast(Instruction::SIToFP, CountMinusOne,
II.getStep()->getType())
: B.CreateSExtOrTrunc(CountMinusOne, II.getStep()->getType());
CMO->setName("cast.cmo");
+
+ Value *Step = CreateStepValue(II.getStep(), *PSE.getSE(),
+ VectorHeader->getTerminator());
Value *Escape =
- emitTransformedIndex(B, CMO, PSE.getSE(), DL, II, LoopVectorBody);
+ emitTransformedIndex(B, CMO, II.getStartValue(), Step, II);
Escape->setName("ind.escape");
MissingVals[UI] = Escape;
}
@@ -3640,8 +3281,10 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
// In this case, if IV1 has an external use, we need to avoid adding both
// "last value of IV1" and "penultimate value of IV2". So, verify that we
// don't already have an incoming value for the middle block.
- if (PHI->getBasicBlockIndex(MiddleBlock) == -1)
+ if (PHI->getBasicBlockIndex(MiddleBlock) == -1) {
PHI->addIncoming(I.second, MiddleBlock);
+ Plan.removeLiveOut(PHI);
+ }
}
}
@@ -3920,18 +3563,16 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths(VPTransformState &State) {
}
}
-void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
+void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
+ VPlan &Plan) {
// Insert truncates and extends for any truncated instructions as hints to
// InstCombine.
if (VF.isVector())
truncateToMinimalBitwidths(State);
// Fix widened non-induction PHIs by setting up the PHI operands.
- if (OrigPHIsToFix.size()) {
- assert(EnableVPlanNativePath &&
- "Unexpected non-induction PHIs for fixup in non VPlan-native path");
- fixNonInductionPHIs(State);
- }
+ if (EnableVPlanNativePath)
+ fixNonInductionPHIs(Plan, State);
// At this point every instruction in the original loop is widened to a
// vector form. Now we need to fix the recurrences in the loop. These PHI
@@ -3942,24 +3583,37 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
// Forget the original basic block.
PSE.getSE()->forgetLoop(OrigLoop);
- // If we inserted an edge from the middle block to the unique exit block,
- // update uses outside the loop (phis) to account for the newly inserted
- // edge.
- if (!Cost->requiresScalarEpilogue(VF)) {
+ VPBasicBlock *LatchVPBB = Plan.getVectorLoopRegion()->getExitingBasicBlock();
+ Loop *VectorLoop = LI->getLoopFor(State.CFG.VPBB2IRBB[LatchVPBB]);
+ if (Cost->requiresScalarEpilogue(VF)) {
+ // No edge from the middle block to the unique exit block has been inserted
+ // and there is nothing to fix from vector loop; phis should have incoming
+ // from scalar loop only.
+ Plan.clearLiveOuts();
+ } else {
+ // If we inserted an edge from the middle block to the unique exit block,
+ // update uses outside the loop (phis) to account for the newly inserted
+ // edge.
+
// Fix-up external users of the induction variables.
for (auto &Entry : Legal->getInductionVars())
fixupIVUsers(Entry.first, Entry.second,
- getOrCreateVectorTripCount(LI->getLoopFor(LoopVectorBody)),
- IVEndValues[Entry.first], LoopMiddleBlock);
-
- fixLCSSAPHIs(State);
+ getOrCreateVectorTripCount(VectorLoop->getLoopPreheader()),
+ IVEndValues[Entry.first], LoopMiddleBlock,
+ VectorLoop->getHeader(), Plan);
}
+ // Fix LCSSA phis not already fixed earlier. Extracts may need to be generated
+ // in the exit block, so update the builder.
+ State.Builder.SetInsertPoint(State.CFG.ExitBB->getFirstNonPHI());
+ for (auto &KV : Plan.getLiveOuts())
+ KV.second->fixPhi(Plan, State);
+
for (Instruction *PI : PredicatedInstructions)
sinkScalarOperands(&*PI);
// Remove redundant induction instructions.
- cse(LoopVectorBody);
+ cse(VectorLoop->getHeader());
// Set/update profile weights for the vector and remainder loops as original
// loop iterations are now distributed among them. Note that original loop
@@ -3974,9 +3628,9 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
// For scalable vectorization we can't know at compile time how many iterations
// of the loop are handled in one vector iteration, so instead assume a pessimistic
// vscale of '1'.
- setProfileInfoAfterUnrolling(
- LI->getLoopFor(LoopScalarBody), LI->getLoopFor(LoopVectorBody),
- LI->getLoopFor(LoopScalarBody), VF.getKnownMinValue() * UF);
+ setProfileInfoAfterUnrolling(LI->getLoopFor(LoopScalarBody), VectorLoop,
+ LI->getLoopFor(LoopScalarBody),
+ VF.getKnownMinValue() * UF);
}
void InnerLoopVectorizer::fixCrossIterationPHIs(VPTransformState &State) {
@@ -3986,7 +3640,8 @@ void InnerLoopVectorizer::fixCrossIterationPHIs(VPTransformState &State) {
// the currently empty PHI nodes. At this point every instruction in the
// original loop is widened to a vector form so we can use them to construct
// the incoming edges.
- VPBasicBlock *Header = State.Plan->getEntry()->getEntryBasicBlock();
+ VPBasicBlock *Header =
+ State.Plan->getVectorLoopRegion()->getEntryBasicBlock();
for (VPRecipeBase &R : Header->phis()) {
if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R))
fixReduction(ReductionPhi, State);
@@ -4102,8 +3757,10 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(
// and thus no phis which needed updated.
if (!Cost->requiresScalarEpilogue(VF))
for (PHINode &LCSSAPhi : LoopExitBlock->phis())
- if (llvm::is_contained(LCSSAPhi.incoming_values(), Phi))
+ if (llvm::is_contained(LCSSAPhi.incoming_values(), Phi)) {
LCSSAPhi.addIncoming(ExtractForPhiUsedOutsideLoop, LoopMiddleBlock);
+ State.Plan->removeLiveOut(&LCSSAPhi);
+ }
}
void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
@@ -4117,14 +3774,14 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
RecurKind RK = RdxDesc.getRecurrenceKind();
TrackingVH<Value> ReductionStartValue = RdxDesc.getRecurrenceStartValue();
Instruction *LoopExitInst = RdxDesc.getLoopExitInstr();
- setDebugLocFromInst(ReductionStartValue);
+ State.setDebugLocFromInst(ReductionStartValue);
VPValue *LoopExitInstDef = PhiR->getBackedgeValue();
// This is the vector-clone of the value that leaves the loop.
Type *VecTy = State.get(LoopExitInstDef, 0)->getType();
// Wrap flags are in general invalid after vectorization, clear them.
- clearReductionWrapFlags(RdxDesc, State);
+ clearReductionWrapFlags(PhiR, State);
// Before each round, move the insertion point right between
// the PHIs and the values we are going to write.
@@ -4132,9 +3789,13 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
// instructions.
Builder.SetInsertPoint(&*LoopMiddleBlock->getFirstInsertionPt());
- setDebugLocFromInst(LoopExitInst);
+ State.setDebugLocFromInst(LoopExitInst);
Type *PhiTy = OrigPhi->getType();
+
+ VPBasicBlock *LatchVPBB =
+ PhiR->getParent()->getEnclosingLoopRegion()->getExitingBasicBlock();
+ BasicBlock *VectorLoopLatch = State.CFG.VPBB2IRBB[LatchVPBB];
// If tail is folded by masking, the vector value to leave the loop should be
// a Select choosing between the vectorized LoopExitInst and vectorized Phi,
// instead of the former. For an inloop reduction the reduction will already
@@ -4142,17 +3803,20 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
if (Cost->foldTailByMasking() && !PhiR->isInLoop()) {
for (unsigned Part = 0; Part < UF; ++Part) {
Value *VecLoopExitInst = State.get(LoopExitInstDef, Part);
- Value *Sel = nullptr;
+ SelectInst *Sel = nullptr;
for (User *U : VecLoopExitInst->users()) {
if (isa<SelectInst>(U)) {
assert(!Sel && "Reduction exit feeding two selects");
- Sel = U;
+ Sel = cast<SelectInst>(U);
} else
assert(isa<PHINode>(U) && "Reduction exit must feed Phi's or select");
}
assert(Sel && "Reduction exit feeds no select");
State.reset(LoopExitInstDef, Sel, Part);
+ if (isa<FPMathOperator>(Sel))
+ Sel->setFastMathFlags(RdxDesc.getFastMathFlags());
+
// If the target can create a predicated operator for the reduction at no
// extra cost in the loop (for example a predicated vadd), it can be
// cheaper for the select to remain in the loop than be sunk out of it,
@@ -4164,8 +3828,7 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
TargetTransformInfo::ReductionFlags())) {
auto *VecRdxPhi =
cast<PHINode>(State.get(PhiR, Part));
- VecRdxPhi->setIncomingValueForBlock(
- LI->getLoopFor(LoopVectorBody)->getLoopLatch(), Sel);
+ VecRdxPhi->setIncomingValueForBlock(VectorLoopLatch, Sel);
}
}
}
@@ -4176,8 +3839,7 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
if (VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) {
assert(!PhiR->isInLoop() && "Unexpected truncated inloop reduction!");
Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), VF);
- Builder.SetInsertPoint(
- LI->getLoopFor(LoopVectorBody)->getLoopLatch()->getTerminator());
+ Builder.SetInsertPoint(VectorLoopLatch->getTerminator());
VectorParts RdxParts(UF);
for (unsigned Part = 0; Part < UF; ++Part) {
RdxParts[Part] = State.get(LoopExitInstDef, Part);
@@ -4208,7 +3870,7 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
// conditional branch, and (c) other passes may add new predecessors which
// terminate on this line. This is the easiest way to ensure we don't
// accidentally cause an extra step back into the loop while debugging.
- setDebugLocFromInst(LoopMiddleBlock->getTerminator());
+ State.setDebugLocFromInst(LoopMiddleBlock->getTerminator());
if (PhiR->isOrdered())
ReducedPartRdx = State.get(LoopExitInstDef, UF - 1);
else {
@@ -4265,6 +3927,17 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
// Set the resume value for this reduction
ReductionResumeValues.insert({&RdxDesc, BCBlockPhi});
+ // If there were stores of the reduction value to a uniform memory address
+ // inside the loop, create the final store here.
+ if (StoreInst *SI = RdxDesc.IntermediateStore) {
+ StoreInst *NewSI =
+ Builder.CreateStore(ReducedPartRdx, SI->getPointerOperand());
+ propagateMetadata(NewSI, SI);
+
+ // If the reduction value is used in other places,
+ // then let the code below create PHI's for that.
+ }
+
// Now, we need to fix the users of the reduction variable
// inside and outside of the scalar remainder loop.
@@ -4273,8 +3946,10 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
// fixFirstOrderRecurrence for a more complete explaination of the logic.
if (!Cost->requiresScalarEpilogue(VF))
for (PHINode &LCSSAPhi : LoopExitBlock->phis())
- if (llvm::is_contained(LCSSAPhi.incoming_values(), LoopExitInst))
+ if (llvm::is_contained(LCSSAPhi.incoming_values(), LoopExitInst)) {
LCSSAPhi.addIncoming(ReducedPartRdx, LoopMiddleBlock);
+ State.Plan->removeLiveOut(&LCSSAPhi);
+ }
// Fix the scalar loop reduction variable with the incoming reduction sum
// from the vector body and from the backedge value.
@@ -4287,63 +3962,35 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
OrigPhi->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst);
}
-void InnerLoopVectorizer::clearReductionWrapFlags(const RecurrenceDescriptor &RdxDesc,
+void InnerLoopVectorizer::clearReductionWrapFlags(VPReductionPHIRecipe *PhiR,
VPTransformState &State) {
+ const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
RecurKind RK = RdxDesc.getRecurrenceKind();
if (RK != RecurKind::Add && RK != RecurKind::Mul)
return;
- Instruction *LoopExitInstr = RdxDesc.getLoopExitInstr();
- assert(LoopExitInstr && "null loop exit instruction");
- SmallVector<Instruction *, 8> Worklist;
- SmallPtrSet<Instruction *, 8> Visited;
- Worklist.push_back(LoopExitInstr);
- Visited.insert(LoopExitInstr);
+ SmallVector<VPValue *, 8> Worklist;
+ SmallPtrSet<VPValue *, 8> Visited;
+ Worklist.push_back(PhiR);
+ Visited.insert(PhiR);
while (!Worklist.empty()) {
- Instruction *Cur = Worklist.pop_back_val();
- if (isa<OverflowingBinaryOperator>(Cur))
- for (unsigned Part = 0; Part < UF; ++Part) {
- // FIXME: Should not rely on getVPValue at this point.
- Value *V = State.get(State.Plan->getVPValue(Cur, true), Part);
- cast<Instruction>(V)->dropPoisonGeneratingFlags();
+ VPValue *Cur = Worklist.pop_back_val();
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ Value *V = State.get(Cur, Part);
+ if (!isa<OverflowingBinaryOperator>(V))
+ break;
+ cast<Instruction>(V)->dropPoisonGeneratingFlags();
}
- for (User *U : Cur->users()) {
- Instruction *UI = cast<Instruction>(U);
- if ((Cur != LoopExitInstr || OrigLoop->contains(UI->getParent())) &&
- Visited.insert(UI).second)
- Worklist.push_back(UI);
- }
- }
-}
-
-void InnerLoopVectorizer::fixLCSSAPHIs(VPTransformState &State) {
- for (PHINode &LCSSAPhi : LoopExitBlock->phis()) {
- if (LCSSAPhi.getBasicBlockIndex(LoopMiddleBlock) != -1)
- // Some phis were already hand updated by the reduction and recurrence
- // code above, leave them alone.
- continue;
-
- auto *IncomingValue = LCSSAPhi.getIncomingValue(0);
- // Non-instruction incoming values will have only one value.
-
- VPLane Lane = VPLane::getFirstLane();
- if (isa<Instruction>(IncomingValue) &&
- !Cost->isUniformAfterVectorization(cast<Instruction>(IncomingValue),
- VF))
- Lane = VPLane::getLastLaneForVF(VF);
-
- // Can be a loop invariant incoming value or the last scalar value to be
- // extracted from the vectorized loop.
- // FIXME: Should not rely on getVPValue at this point.
- Builder.SetInsertPoint(LoopMiddleBlock->getTerminator());
- Value *lastIncomingValue =
- OrigLoop->isLoopInvariant(IncomingValue)
- ? IncomingValue
- : State.get(State.Plan->getVPValue(IncomingValue, true),
- VPIteration(UF - 1, Lane));
- LCSSAPhi.addIncoming(lastIncomingValue, LoopMiddleBlock);
+ for (VPUser *U : Cur->users()) {
+ auto *UserRecipe = dyn_cast<VPRecipeBase>(U);
+ if (!UserRecipe)
+ continue;
+ for (VPValue *V : UserRecipe->definedValues())
+ if (Visited.insert(V).second)
+ Worklist.push_back(V);
+ }
}
}
@@ -4421,17 +4068,23 @@ void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) {
} while (Changed);
}
-void InnerLoopVectorizer::fixNonInductionPHIs(VPTransformState &State) {
- for (PHINode *OrigPhi : OrigPHIsToFix) {
- VPWidenPHIRecipe *VPPhi =
- cast<VPWidenPHIRecipe>(State.Plan->getVPValue(OrigPhi));
- PHINode *NewPhi = cast<PHINode>(State.get(VPPhi, 0));
- // Make sure the builder has a valid insert point.
- Builder.SetInsertPoint(NewPhi);
- for (unsigned i = 0; i < VPPhi->getNumOperands(); ++i) {
- VPValue *Inc = VPPhi->getIncomingValue(i);
- VPBasicBlock *VPBB = VPPhi->getIncomingBlock(i);
- NewPhi->addIncoming(State.get(Inc, 0), State.CFG.VPBB2IRBB[VPBB]);
+void InnerLoopVectorizer::fixNonInductionPHIs(VPlan &Plan,
+ VPTransformState &State) {
+ auto Iter = depth_first(
+ VPBlockRecursiveTraversalWrapper<VPBlockBase *>(Plan.getEntry()));
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
+ for (VPRecipeBase &P : VPBB->phis()) {
+ VPWidenPHIRecipe *VPPhi = dyn_cast<VPWidenPHIRecipe>(&P);
+ if (!VPPhi)
+ continue;
+ PHINode *NewPhi = cast<PHINode>(State.get(VPPhi, 0));
+ // Make sure the builder has a valid insert point.
+ Builder.SetInsertPoint(NewPhi);
+ for (unsigned i = 0; i < VPPhi->getNumOperands(); ++i) {
+ VPValue *Inc = VPPhi->getIncomingValue(i);
+ VPBasicBlock *VPBB = VPPhi->getIncomingBlock(i);
+ NewPhi->addIncoming(State.get(Inc, 0), State.CFG.VPBB2IRBB[VPBB]);
+ }
}
}
}
@@ -4441,139 +4094,6 @@ bool InnerLoopVectorizer::useOrderedReductions(
return Cost->useOrderedReductions(RdxDesc);
}
-void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
- VPWidenPHIRecipe *PhiR,
- VPTransformState &State) {
- PHINode *P = cast<PHINode>(PN);
- if (EnableVPlanNativePath) {
- // Currently we enter here in the VPlan-native path for non-induction
- // PHIs where all control flow is uniform. We simply widen these PHIs.
- // Create a vector phi with no operands - the vector phi operands will be
- // set at the end of vector code generation.
- Type *VecTy = (State.VF.isScalar())
- ? PN->getType()
- : VectorType::get(PN->getType(), State.VF);
- Value *VecPhi = Builder.CreatePHI(VecTy, PN->getNumOperands(), "vec.phi");
- State.set(PhiR, VecPhi, 0);
- OrigPHIsToFix.push_back(P);
-
- return;
- }
-
- assert(PN->getParent() == OrigLoop->getHeader() &&
- "Non-header phis should have been handled elsewhere");
-
- // In order to support recurrences we need to be able to vectorize Phi nodes.
- // Phi nodes have cycles, so we need to vectorize them in two stages. This is
- // stage #1: We create a new vector PHI node with no incoming edges. We'll use
- // this value when we vectorize all of the instructions that use the PHI.
-
- assert(!Legal->isReductionVariable(P) &&
- "reductions should be handled elsewhere");
-
- setDebugLocFromInst(P);
-
- // This PHINode must be an induction variable.
- // Make sure that we know about it.
- assert(Legal->getInductionVars().count(P) && "Not an induction variable");
-
- InductionDescriptor II = Legal->getInductionVars().lookup(P);
- const DataLayout &DL = OrigLoop->getHeader()->getModule()->getDataLayout();
-
- auto *IVR = PhiR->getParent()->getPlan()->getCanonicalIV();
- PHINode *CanonicalIV = cast<PHINode>(State.get(IVR, 0));
-
- // FIXME: The newly created binary instructions should contain nsw/nuw flags,
- // which can be found from the original scalar operations.
- switch (II.getKind()) {
- case InductionDescriptor::IK_NoInduction:
- llvm_unreachable("Unknown induction");
- case InductionDescriptor::IK_IntInduction:
- case InductionDescriptor::IK_FpInduction:
- llvm_unreachable("Integer/fp induction is handled elsewhere.");
- case InductionDescriptor::IK_PtrInduction: {
- // Handle the pointer induction variable case.
- assert(P->getType()->isPointerTy() && "Unexpected type.");
-
- if (Cost->isScalarAfterVectorization(P, State.VF)) {
- // This is the normalized GEP that starts counting at zero.
- Value *PtrInd =
- Builder.CreateSExtOrTrunc(CanonicalIV, II.getStep()->getType());
- // Determine the number of scalars we need to generate for each unroll
- // iteration. If the instruction is uniform, we only need to generate the
- // first lane. Otherwise, we generate all VF values.
- bool IsUniform = vputils::onlyFirstLaneUsed(PhiR);
- assert((IsUniform || !State.VF.isScalable()) &&
- "Cannot scalarize a scalable VF");
- unsigned Lanes = IsUniform ? 1 : State.VF.getFixedValue();
-
- for (unsigned Part = 0; Part < UF; ++Part) {
- Value *PartStart =
- createStepForVF(Builder, PtrInd->getType(), VF, Part);
-
- for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
- Value *Idx = Builder.CreateAdd(
- PartStart, ConstantInt::get(PtrInd->getType(), Lane));
- Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx);
- Value *SclrGep = emitTransformedIndex(Builder, GlobalIdx, PSE.getSE(),
- DL, II, State.CFG.PrevBB);
- SclrGep->setName("next.gep");
- State.set(PhiR, SclrGep, VPIteration(Part, Lane));
- }
- }
- return;
- }
- assert(isa<SCEVConstant>(II.getStep()) &&
- "Induction step not a SCEV constant!");
- Type *PhiType = II.getStep()->getType();
-
- // Build a pointer phi
- Value *ScalarStartValue = PhiR->getStartValue()->getLiveInIRValue();
- Type *ScStValueType = ScalarStartValue->getType();
- PHINode *NewPointerPhi =
- PHINode::Create(ScStValueType, 2, "pointer.phi", CanonicalIV);
- NewPointerPhi->addIncoming(ScalarStartValue, LoopVectorPreHeader);
-
- // A pointer induction, performed by using a gep
- BasicBlock *LoopLatch = LI->getLoopFor(LoopVectorBody)->getLoopLatch();
- Instruction *InductionLoc = LoopLatch->getTerminator();
- const SCEV *ScalarStep = II.getStep();
- SCEVExpander Exp(*PSE.getSE(), DL, "induction");
- Value *ScalarStepValue =
- Exp.expandCodeFor(ScalarStep, PhiType, InductionLoc);
- Value *RuntimeVF = getRuntimeVF(Builder, PhiType, VF);
- Value *NumUnrolledElems =
- Builder.CreateMul(RuntimeVF, ConstantInt::get(PhiType, State.UF));
- Value *InductionGEP = GetElementPtrInst::Create(
- II.getElementType(), NewPointerPhi,
- Builder.CreateMul(ScalarStepValue, NumUnrolledElems), "ptr.ind",
- InductionLoc);
- NewPointerPhi->addIncoming(InductionGEP, LoopLatch);
-
- // Create UF many actual address geps that use the pointer
- // phi as base and a vectorized version of the step value
- // (<step*0, ..., step*N>) as offset.
- for (unsigned Part = 0; Part < State.UF; ++Part) {
- Type *VecPhiType = VectorType::get(PhiType, State.VF);
- Value *StartOffsetScalar =
- Builder.CreateMul(RuntimeVF, ConstantInt::get(PhiType, Part));
- Value *StartOffset =
- Builder.CreateVectorSplat(State.VF, StartOffsetScalar);
- // Create a vector of consecutive numbers from zero to VF.
- StartOffset =
- Builder.CreateAdd(StartOffset, Builder.CreateStepVector(VecPhiType));
-
- Value *GEP = Builder.CreateGEP(
- II.getElementType(), NewPointerPhi,
- Builder.CreateMul(
- StartOffset, Builder.CreateVectorSplat(State.VF, ScalarStepValue),
- "vector.gep"));
- State.set(PhiR, GEP, Part);
- }
- }
- }
-}
-
/// A helper function for checking whether an integer division-related
/// instruction may divide by zero (in which case it must be predicated if
/// executed conditionally in the scalar code).
@@ -4597,7 +4117,7 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def,
VPTransformState &State) {
assert(!isa<DbgInfoIntrinsic>(I) &&
"DbgInfoIntrinsic should have been dropped during VPlan construction");
- setDebugLocFromInst(&I);
+ State.setDebugLocFromInst(&I);
Module *M = I.getParent()->getParent()->getParent();
auto *CI = cast<CallInst>(&I);
@@ -4627,13 +4147,13 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def,
// Some intrinsics have a scalar argument - don't replace it with a
// vector.
Value *Arg;
- if (!UseVectorIntrinsic || !hasVectorInstrinsicScalarOpd(ID, I.index()))
+ if (!UseVectorIntrinsic ||
+ !isVectorIntrinsicWithScalarOpAtArg(ID, I.index()))
Arg = State.get(I.value(), Part);
- else {
+ else
Arg = State.get(I.value(), VPIteration(0, 0));
- if (hasVectorInstrinsicOverloadedScalarOpd(ID, I.index()))
- TysForDecl.push_back(Arg->getType());
- }
+ if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I.index()))
+ TysForDecl.push_back(Arg->getType());
Args.push_back(Arg);
}
@@ -4661,7 +4181,7 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def,
V->copyFastMathFlags(CI);
State.set(Def, V, Part);
- addMetadata(V, &I);
+ State.addMetadata(V, &I);
}
}
@@ -4672,6 +4192,14 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
assert(VF.isVector() && Scalars.find(VF) == Scalars.end() &&
"This function should not be visited twice for the same VF");
+ // This avoids any chances of creating a REPLICATE recipe during planning
+ // since that would result in generation of scalarized code during execution,
+ // which is not supported for scalable vectors.
+ if (VF.isScalable()) {
+ Scalars[VF].insert(Uniforms[VF].begin(), Uniforms[VF].end());
+ return;
+ }
+
SmallSetVector<Instruction *, 8> Worklist;
// These sets are used to seed the analysis with pointers used by memory
@@ -4761,7 +4289,7 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
}
// Insert the forced scalars.
- // FIXME: Currently widenPHIInstruction() often creates a dead vector
+ // FIXME: Currently VPWidenPHIRecipe() often creates a dead vector
// induction variable when the PHI user is scalarized.
auto ForcedScalar = ForcedScalars.find(VF);
if (ForcedScalar != ForcedScalars.end())
@@ -4888,6 +4416,27 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
if (hasIrregularType(ScalarTy, DL))
return false;
+ // If the group involves a non-integral pointer, we may not be able to
+ // losslessly cast all values to a common type.
+ unsigned InterleaveFactor = Group->getFactor();
+ bool ScalarNI = DL.isNonIntegralPointerType(ScalarTy);
+ for (unsigned i = 0; i < InterleaveFactor; i++) {
+ Instruction *Member = Group->getMember(i);
+ if (!Member)
+ continue;
+ auto *MemberTy = getLoadStoreType(Member);
+ bool MemberNI = DL.isNonIntegralPointerType(MemberTy);
+ // Don't coerce non-integral pointers to integers or vice versa.
+ if (MemberNI != ScalarNI) {
+ // TODO: Consider adding special nullptr value case here
+ return false;
+ } else if (MemberNI && ScalarNI &&
+ ScalarTy->getPointerAddressSpace() !=
+ MemberTy->getPointerAddressSpace()) {
+ return false;
+ }
+ }
+
// Check if masking is required.
// A Group may need masking for one of two reasons: it resides in a block that
// needs predication, or it was decided to use masking to deal with gaps
@@ -5170,7 +4719,7 @@ bool LoopVectorizationCostModel::runtimeChecksRequired() {
return true;
}
- if (!PSE.getUnionPredicate().getPredicates().empty()) {
+ if (!PSE.getPredicate().isAlwaysTrue()) {
reportVectorizationFailure("Runtime SCEV check is required with -Os/-Oz",
"runtime SCEV checks needed. Enable vectorization of this "
"loop with '#pragma clang loop vectorize(enable)' when "
@@ -5461,14 +5010,6 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
}
}
- // For scalable vectors don't use tail folding for low trip counts or
- // optimizing for code size. We only permit this if the user has explicitly
- // requested it.
- if (ScalarEpilogueStatus != CM_ScalarEpilogueNotNeededUsePredicate &&
- ScalarEpilogueStatus != CM_ScalarEpilogueNotAllowedUsePredicate &&
- MaxFactors.ScalableVF.isVector())
- MaxFactors.ScalableVF = ElementCount::getScalable(0);
-
// If we don't know the precise trip count, or if the trip count that we
// found modulo the vectorization factor is not zero, try to fold the tail
// by masking.
@@ -5511,7 +5052,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
unsigned ConstTripCount, unsigned SmallestType, unsigned WidestType,
- const ElementCount &MaxSafeVF, bool FoldTailByMasking) {
+ ElementCount MaxSafeVF, bool FoldTailByMasking) {
bool ComputeScalableMaxVF = MaxSafeVF.isScalable();
TypeSize WidestRegister = TTI.getRegisterBitWidth(
ComputeScalableMaxVF ? TargetTransformInfo::RGK_ScalableVector
@@ -5556,9 +5097,12 @@ ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
return ElementCount::getFixed(ClampedConstTripCount);
}
+ TargetTransformInfo::RegisterKind RegKind =
+ ComputeScalableMaxVF ? TargetTransformInfo::RGK_ScalableVector
+ : TargetTransformInfo::RGK_FixedWidthVector;
ElementCount MaxVF = MaxVectorElementCount;
- if (TTI.shouldMaximizeVectorBandwidth() ||
- (MaximizeBandwidth && isScalarEpilogueAllowed())) {
+ if (MaximizeBandwidth || (MaximizeBandwidth.getNumOccurrences() == 0 &&
+ TTI.shouldMaximizeVectorBandwidth(RegKind))) {
auto MaxVectorElementCountMaxBW = ElementCount::get(
PowerOf2Floor(WidestRegister.getKnownMinSize() / SmallestType),
ComputeScalableMaxVF);
@@ -5596,10 +5140,27 @@ ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
MaxVF = MinVF;
}
}
+
+ // Invalidate any widening decisions we might have made, in case the loop
+ // requires prediction (decided later), but we have already made some
+ // load/store widening decisions.
+ invalidateCostModelingDecisions();
}
return MaxVF;
}
+Optional<unsigned> LoopVectorizationCostModel::getVScaleForTuning() const {
+ if (TheFunction->hasFnAttribute(Attribute::VScaleRange)) {
+ auto Attr = TheFunction->getFnAttribute(Attribute::VScaleRange);
+ auto Min = Attr.getVScaleRangeMin();
+ auto Max = Attr.getVScaleRangeMax();
+ if (Max && Min == Max)
+ return Max;
+ }
+
+ return TTI.getVScaleForTuning();
+}
+
bool LoopVectorizationCostModel::isMoreProfitable(
const VectorizationFactor &A, const VectorizationFactor &B) const {
InstructionCost CostA = A.Cost;
@@ -5624,7 +5185,7 @@ bool LoopVectorizationCostModel::isMoreProfitable(
// Improve estimate for the vector width if it is scalable.
unsigned EstimatedWidthA = A.Width.getKnownMinValue();
unsigned EstimatedWidthB = B.Width.getKnownMinValue();
- if (Optional<unsigned> VScale = TTI.getVScaleForTuning()) {
+ if (Optional<unsigned> VScale = getVScaleForTuning()) {
if (A.Width.isScalable())
EstimatedWidthA *= VScale.getValue();
if (B.Width.isScalable())
@@ -5651,7 +5212,8 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(
assert(VFCandidates.count(ElementCount::getFixed(1)) &&
"Expected Scalar VF to be a candidate");
- const VectorizationFactor ScalarCost(ElementCount::getFixed(1), ExpectedCost);
+ const VectorizationFactor ScalarCost(ElementCount::getFixed(1), ExpectedCost,
+ ExpectedCost);
VectorizationFactor ChosenFactor = ScalarCost;
bool ForceVectorization = Hints->getForce() == LoopVectorizeHints::FK_Enabled;
@@ -5669,12 +5231,12 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(
continue;
VectorizationCostTy C = expectedCost(i, &InvalidCosts);
- VectorizationFactor Candidate(i, C.first);
+ VectorizationFactor Candidate(i, C.first, ScalarCost.ScalarCost);
#ifndef NDEBUG
unsigned AssumedMinimumVscale = 1;
- if (Optional<unsigned> VScale = TTI.getVScaleForTuning())
- AssumedMinimumVscale = VScale.getValue();
+ if (Optional<unsigned> VScale = getVScaleForTuning())
+ AssumedMinimumVscale = *VScale;
unsigned Width =
Candidate.Width.isScalable()
? Candidate.Width.getKnownMinValue() * AssumedMinimumVscale
@@ -5862,7 +5424,7 @@ LoopVectorizationCostModel::selectEpilogueVectorizationFactor(
LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization factor is forced.\n";);
ElementCount ForcedEC = ElementCount::getFixed(EpilogueVectorizationForceVF);
if (LVP.hasPlanWithVF(ForcedEC))
- return {ForcedEC, 0};
+ return {ForcedEC, 0, 0};
else {
LLVM_DEBUG(
dbgs()
@@ -5885,8 +5447,20 @@ LoopVectorizationCostModel::selectEpilogueVectorizationFactor(
return Result;
}
+ // If MainLoopVF = vscale x 2, and vscale is expected to be 4, then we know
+ // the main loop handles 8 lanes per iteration. We could still benefit from
+ // vectorizing the epilogue loop with VF=4.
+ ElementCount EstimatedRuntimeVF = MainLoopVF;
+ if (MainLoopVF.isScalable()) {
+ EstimatedRuntimeVF = ElementCount::getFixed(MainLoopVF.getKnownMinValue());
+ if (Optional<unsigned> VScale = getVScaleForTuning())
+ EstimatedRuntimeVF *= *VScale;
+ }
+
for (auto &NextVF : ProfitableVFs)
- if (ElementCount::isKnownLT(NextVF.Width, MainLoopVF) &&
+ if (((!NextVF.Width.isScalable() && MainLoopVF.isScalable() &&
+ ElementCount::isKnownLT(NextVF.Width, EstimatedRuntimeVF)) ||
+ ElementCount::isKnownLT(NextVF.Width, MainLoopVF)) &&
(Result.Width.isScalar() || isMoreProfitable(NextVF, Result)) &&
LVP.hasPlanWithVF(NextVF.Width))
Result = NextVF;
@@ -6006,6 +5580,18 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
!(InterleaveSmallLoopScalarReduction && HasReductions && VF.isScalar()))
return 1;
+ // If we did not calculate the cost for VF (because the user selected the VF)
+ // then we calculate the cost of VF here.
+ if (LoopCost == 0) {
+ InstructionCost C = expectedCost(VF).first;
+ assert(C.isValid() && "Expected to have chosen a VF with valid cost");
+ LoopCost = *C.getValue();
+
+ // Loop body is free and there is no need for interleaving.
+ if (LoopCost == 0)
+ return 1;
+ }
+
RegisterUsage R = calculateRegisterUsage({VF})[0];
// We divide by these constants so assume that we have at least one
// instruction that uses at least one register.
@@ -6097,16 +5683,6 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
assert(IC > 0 && "Interleave count must be greater than 0.");
- // If we did not calculate the cost for VF (because the user selected the VF)
- // then we calculate the cost of VF here.
- if (LoopCost == 0) {
- InstructionCost C = expectedCost(VF).first;
- assert(C.isValid() && "Expected to have chosen a VF with valid cost");
- LoopCost = *C.getValue();
- }
-
- assert(LoopCost && "Non-zero loop cost expected");
-
// Interleave if we vectorized this loop and there is a reduction that could
// benefit from interleaving.
if (VF.isVector() && HasReductions) {
@@ -6114,9 +5690,15 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
return IC;
}
- // Note that if we've already vectorized the loop we will have done the
- // runtime check and so interleaving won't require further checks.
- bool InterleavingRequiresRuntimePointerCheck =
+ // For any scalar loop that either requires runtime checks or predication we
+ // are better off leaving this to the unroller. Note that if we've already
+ // vectorized the loop we will have done the runtime check and so interleaving
+ // won't require further checks.
+ bool ScalarInterleavingRequiresPredication =
+ (VF.isScalar() && any_of(TheLoop->blocks(), [this](BasicBlock *BB) {
+ return Legal->blockNeedsPredication(BB);
+ }));
+ bool ScalarInterleavingRequiresRuntimePointerCheck =
(VF.isScalar() && Legal->getRuntimePointerChecking()->Need);
// We want to interleave small loops in order to reduce the loop overhead and
@@ -6126,7 +5708,8 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
<< "LV: VF is " << VF << '\n');
const bool AggressivelyInterleaveReductions =
TTI.enableAggressiveInterleaving(HasReductions);
- if (!InterleavingRequiresRuntimePointerCheck && LoopCost < SmallLoopCost) {
+ if (!ScalarInterleavingRequiresRuntimePointerCheck &&
+ !ScalarInterleavingRequiresPredication && LoopCost < SmallLoopCost) {
// We assume that the cost overhead is 1 and we use the cost model
// to estimate the cost of the loop and interleave until the cost of the
// loop overhead is about 5% of the cost of the loop.
@@ -6289,16 +5872,10 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<ElementCount> VFs) {
LLVM_DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n");
- // A lambda that gets the register usage for the given type and VF.
- const auto &TTICapture = TTI;
- auto GetRegUsage = [&TTICapture](Type *Ty, ElementCount VF) -> unsigned {
+ auto GetRegUsage = [&TTI = TTI](Type *Ty, ElementCount VF) -> unsigned {
if (Ty->isTokenTy() || !VectorType::isValidElementType(Ty))
return 0;
- InstructionCost::CostType RegUsage =
- *TTICapture.getRegUsageForType(VectorType::get(Ty, VF)).getValue();
- assert(RegUsage >= 0 && RegUsage <= std::numeric_limits<unsigned>::max() &&
- "Nonsensical values for register usage.");
- return RegUsage;
+ return TTI.getRegUsageForType(VectorType::get(Ty, VF));
};
for (unsigned int i = 0, s = IdxToInstr.size(); i < s; ++i) {
@@ -7049,10 +6626,17 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
bool TypeNotScalarized = false;
if (VF.isVector() && VectorTy->isVectorTy()) {
- unsigned NumParts = TTI.getNumberOfParts(VectorTy);
- if (NumParts)
- TypeNotScalarized = NumParts < VF.getKnownMinValue();
- else
+ if (unsigned NumParts = TTI.getNumberOfParts(VectorTy)) {
+ if (VF.isScalable())
+ // <vscale x 1 x iN> is assumed to be profitable over iN because
+ // scalable registers are a distinct register class from scalar ones.
+ // If we ever find a target which wants to lower scalable vectors
+ // back to scalars, we'll need to update this code to explicitly
+ // ask TTI about the register class uses for each part.
+ TypeNotScalarized = NumParts <= VF.getKnownMinValue();
+ else
+ TypeNotScalarized = NumParts < VF.getKnownMinValue();
+ } else
C = InstructionCost::getInvalid();
}
return VectorizationCostTy(C, TypeNotScalarized);
@@ -7128,8 +6712,6 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
Cost = getGatherScatterCost(&I, VF);
setWideningDecision(&I, VF, CM_GatherScatter, Cost);
} else {
- assert((isa<LoadInst>(&I) || !VF.isScalable()) &&
- "Cannot yet scalarize uniform stores");
Cost = getUniformMemOpCost(&I, VF);
setWideningDecision(&I, VF, CM_Scalarize, Cost);
}
@@ -7487,8 +7069,13 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
InstWidening Decision = getWideningDecision(I, Width);
assert(Decision != CM_Unknown &&
"CM decision should be taken at this point");
- if (Decision == CM_Scalarize)
+ if (Decision == CM_Scalarize) {
+ if (VF.isScalable() && isa<StoreInst>(I))
+ // We can't scalarize a scalable vector store (even a uniform one
+ // currently), return an invalid cost so as to prevent vectorization.
+ return InstructionCost::getInvalid();
Width = ElementCount::getFixed(1);
+ }
}
VectorTy = ToVectorTy(getLoadStoreType(I), Width);
return getMemoryInstructionCost(I, VF);
@@ -7656,6 +7243,16 @@ void LoopVectorizationCostModel::collectValuesToIgnore() {
// Ignore ephemeral values.
CodeMetrics::collectEphemeralValues(TheLoop, AC, ValuesToIgnore);
+ // Find all stores to invariant variables. Since they are going to sink
+ // outside the loop we do not need calculate cost for them.
+ for (BasicBlock *BB : TheLoop->blocks())
+ for (Instruction &I : *BB) {
+ StoreInst *SI;
+ if ((SI = dyn_cast<StoreInst>(&I)) &&
+ Legal->isInvariantAddressOfReduction(SI->getPointerOperand()))
+ ValuesToIgnore.insert(&I);
+ }
+
// Ignore type-promoting instructions we identified during reduction
// detection.
for (auto &Reduction : Legal->getReductionVars()) {
@@ -7757,7 +7354,7 @@ LoopVectorizationPlanner::planInVPlanNativePath(ElementCount UserVF) {
if (VPlanBuildStressTest)
return VectorizationFactor::Disabled();
- return {VF, 0 /*Cost*/};
+ return {VF, 0 /*Cost*/, 0 /* ScalarCost */};
}
LLVM_DEBUG(
@@ -7766,6 +7363,14 @@ LoopVectorizationPlanner::planInVPlanNativePath(ElementCount UserVF) {
return VectorizationFactor::Disabled();
}
+bool LoopVectorizationPlanner::requiresTooManyRuntimeChecks() const {
+ unsigned NumRuntimePointerChecks = Requirements.getNumRuntimePointerChecks();
+ return (NumRuntimePointerChecks >
+ VectorizerParams::RuntimeMemoryCheckThreshold &&
+ !Hints.allowReordering()) ||
+ NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
+}
+
Optional<VectorizationFactor>
LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
assert(OrigLoop->isInnermost() && "Inner loop expected.");
@@ -7800,7 +7405,7 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
CM.collectInLoopReductions();
buildVPlansWithVPRecipes(UserVF, UserVF);
LLVM_DEBUG(printPlans(dbgs()));
- return {{UserVF, 0}};
+ return {{UserVF, 0, 0}};
} else
reportVectorizationInfo("UserVF ignored because of invalid costs.",
"InvalidCost", ORE, OrigLoop);
@@ -7834,30 +7439,7 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
return VectorizationFactor::Disabled();
// Select the optimal vectorization factor.
- auto SelectedVF = CM.selectVectorizationFactor(VFCandidates);
-
- // Check if it is profitable to vectorize with runtime checks.
- unsigned NumRuntimePointerChecks = Requirements.getNumRuntimePointerChecks();
- if (SelectedVF.Width.getKnownMinValue() > 1 && NumRuntimePointerChecks) {
- bool PragmaThresholdReached =
- NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
- bool ThresholdReached =
- NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold;
- if ((ThresholdReached && !Hints.allowReordering()) ||
- PragmaThresholdReached) {
- ORE->emit([&]() {
- return OptimizationRemarkAnalysisAliasing(
- DEBUG_TYPE, "CantReorderMemOps", OrigLoop->getStartLoc(),
- OrigLoop->getHeader())
- << "loop not vectorized: cannot prove it is safe to reorder "
- "memory operations";
- });
- LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
- Hints.emitRemarkWithHints();
- return VectorizationFactor::Disabled();
- }
- }
- return SelectedVF;
+ return CM.selectVectorizationFactor(VFCandidates);
}
VPlan &LoopVectorizationPlanner::getBestPlanFor(ElementCount VF) const {
@@ -7910,17 +7492,36 @@ static void AddRuntimeUnrollDisableMetaData(Loop *L) {
void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF,
VPlan &BestVPlan,
InnerLoopVectorizer &ILV,
- DominatorTree *DT) {
+ DominatorTree *DT,
+ bool IsEpilogueVectorization) {
LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << BestVF << ", UF=" << BestUF
<< '\n');
// Perform the actual loop transformation.
- // 1. Create a new empty loop. Unlink the old loop and connect the new one.
+ // 1. Set up the skeleton for vectorization, including vector pre-header and
+ // middle block. The vector loop is created during VPlan execution.
VPTransformState State{BestVF, BestUF, LI, DT, ILV.Builder, &ILV, &BestVPlan};
Value *CanonicalIVStartValue;
std::tie(State.CFG.PrevBB, CanonicalIVStartValue) =
ILV.createVectorizedLoopSkeleton();
+
+ // Only use noalias metadata when using memory checks guaranteeing no overlap
+ // across all iterations.
+ const LoopAccessInfo *LAI = ILV.Legal->getLAI();
+ if (LAI && !LAI->getRuntimePointerChecking()->getChecks().empty() &&
+ !LAI->getRuntimePointerChecking()->getDiffChecks()) {
+
+ // We currently don't use LoopVersioning for the actual loop cloning but we
+ // still use it to add the noalias metadata.
+ // TODO: Find a better way to re-use LoopVersioning functionality to add
+ // metadata.
+ State.LVer = std::make_unique<LoopVersioning>(
+ *LAI, LAI->getRuntimePointerChecking()->getChecks(), OrigLoop, LI, DT,
+ PSE.getSE());
+ State.LVer->prepareNoAliasMetadata();
+ }
+
ILV.collectPoisonGeneratingRecipes(State);
ILV.printDebugTracesAtStart();
@@ -7936,7 +7537,9 @@ void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF,
// 2. Copy and widen instructions from the old loop into the new loop.
BestVPlan.prepareToExecute(ILV.getOrCreateTripCount(nullptr),
ILV.getOrCreateVectorTripCount(nullptr),
- CanonicalIVStartValue, State);
+ CanonicalIVStartValue, State,
+ IsEpilogueVectorization);
+
BestVPlan.execute(&State);
// Keep all loop hints from the original loop on the vector loop (we'll
@@ -7947,8 +7550,10 @@ void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF,
makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll,
LLVMLoopVectorizeFollowupVectorized});
- Loop *L = LI->getLoopFor(State.CFG.PrevBB);
- if (VectorizedLoopID.hasValue())
+ VPBasicBlock *HeaderVPBB =
+ BestVPlan.getVectorLoopRegion()->getEntryBasicBlock();
+ Loop *L = LI->getLoopFor(State.CFG.VPBB2IRBB[HeaderVPBB]);
+ if (VectorizedLoopID)
L->setLoopID(VectorizedLoopID.getValue());
else {
// Keep all loop hints from the original loop on the vector loop (we'll
@@ -7965,7 +7570,7 @@ void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF,
// 3. Fix the vectorized code: take care of header phi's, live-outs,
// predication, updating analyses.
- ILV.fixVectorizedLoop(State);
+ ILV.fixVectorizedLoop(State, BestVPlan);
ILV.printDebugTracesAtEnd();
}
@@ -8036,22 +7641,31 @@ Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) { return V; }
std::pair<BasicBlock *, Value *>
EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton() {
MDNode *OrigLoopID = OrigLoop->getLoopID();
- Loop *Lp = createVectorLoopSkeleton("");
+
+ // Workaround! Compute the trip count of the original loop and cache it
+ // before we start modifying the CFG. This code has a systemic problem
+ // wherein it tries to run analysis over partially constructed IR; this is
+ // wrong, and not simply for SCEV. The trip count of the original loop
+ // simply happens to be prone to hitting this in practice. In theory, we
+ // can hit the same issue for any SCEV, or ValueTracking query done during
+ // mutation. See PR49900.
+ getOrCreateTripCount(OrigLoop->getLoopPreheader());
+ createVectorLoopSkeleton("");
// Generate the code to check the minimum iteration count of the vector
// epilogue (see below).
EPI.EpilogueIterationCountCheck =
- emitMinimumIterationCountCheck(Lp, LoopScalarPreHeader, true);
+ emitIterationCountCheck(LoopScalarPreHeader, true);
EPI.EpilogueIterationCountCheck->setName("iter.check");
// Generate the code to check any assumptions that we've made for SCEV
// expressions.
- EPI.SCEVSafetyCheck = emitSCEVChecks(Lp, LoopScalarPreHeader);
+ EPI.SCEVSafetyCheck = emitSCEVChecks(LoopScalarPreHeader);
// Generate the code that checks at runtime if arrays overlap. We put the
// checks into a separate block to make the more common case of few elements
// faster.
- EPI.MemSafetyCheck = emitMemRuntimeChecks(Lp, LoopScalarPreHeader);
+ EPI.MemSafetyCheck = emitMemRuntimeChecks(LoopScalarPreHeader);
// Generate the iteration count check for the main loop, *after* the check
// for the epilogue loop, so that the path-length is shorter for the case
@@ -8060,19 +7674,17 @@ EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton() {
// trip count. Note: the branch will get updated later on when we vectorize
// the epilogue.
EPI.MainLoopIterationCountCheck =
- emitMinimumIterationCountCheck(Lp, LoopScalarPreHeader, false);
+ emitIterationCountCheck(LoopScalarPreHeader, false);
// Generate the induction variable.
- Value *CountRoundDown = getOrCreateVectorTripCount(Lp);
- EPI.VectorTripCount = CountRoundDown;
- createHeaderBranch(Lp);
+ EPI.VectorTripCount = getOrCreateVectorTripCount(LoopVectorPreHeader);
// Skip induction resume value creation here because they will be created in
// the second pass. If we created them here, they wouldn't be used anyway,
// because the vplan in the second pass still contains the inductions from the
// original loop.
- return {completeLoopSkeleton(Lp, OrigLoopID), nullptr};
+ return {completeLoopSkeleton(OrigLoopID), nullptr};
}
void EpilogueVectorizerMainLoop::printDebugTracesAtStart() {
@@ -8092,13 +7704,13 @@ void EpilogueVectorizerMainLoop::printDebugTracesAtEnd() {
});
}
-BasicBlock *EpilogueVectorizerMainLoop::emitMinimumIterationCountCheck(
- Loop *L, BasicBlock *Bypass, bool ForEpilogue) {
- assert(L && "Expected valid Loop.");
+BasicBlock *
+EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
+ bool ForEpilogue) {
assert(Bypass && "Expected valid bypass basic block.");
ElementCount VFactor = ForEpilogue ? EPI.EpilogueVF : VF;
unsigned UFactor = ForEpilogue ? EPI.EpilogueUF : UF;
- Value *Count = getOrCreateTripCount(L);
+ Value *Count = getOrCreateTripCount(LoopVectorPreHeader);
// Reuse existing vector loop preheader for TC checks.
// Note that new preheader block is generated for vector loop.
BasicBlock *const TCCheckBlock = LoopVectorPreHeader;
@@ -8157,7 +7769,7 @@ BasicBlock *EpilogueVectorizerMainLoop::emitMinimumIterationCountCheck(
std::pair<BasicBlock *, Value *>
EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton() {
MDNode *OrigLoopID = OrigLoop->getLoopID();
- Loop *Lp = createVectorLoopSkeleton("vec.epilog.");
+ createVectorLoopSkeleton("vec.epilog.");
// Now, compare the remaining count and if there aren't enough iterations to
// execute the vectorized epilogue skip to the scalar part.
@@ -8166,7 +7778,7 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton() {
LoopVectorPreHeader =
SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT,
LI, nullptr, "vec.epilog.ph");
- emitMinimumVectorEpilogueIterCountCheck(Lp, LoopScalarPreHeader,
+ emitMinimumVectorEpilogueIterCountCheck(LoopScalarPreHeader,
VecEpilogueIterationCountCheck);
// Adjust the control flow taking the state info from the main loop
@@ -8238,9 +7850,6 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton() {
EPResumeVal->addIncoming(ConstantInt::get(IdxTy, 0),
EPI.MainLoopIterationCountCheck);
- // Generate the induction variable.
- createHeaderBranch(Lp);
-
// Generate induction resume values. These variables save the new starting
// indexes for the scalar loop. They are used to test if there are any tail
// iterations left once the vector loop has completed.
@@ -8248,15 +7857,15 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton() {
// check, then the resume value for the induction variable comes from
// the trip count of the main vector loop, hence passing the AdditionalBypass
// argument.
- createInductionResumeValues(Lp, {VecEpilogueIterationCountCheck,
- EPI.VectorTripCount} /* AdditionalBypass */);
+ createInductionResumeValues({VecEpilogueIterationCountCheck,
+ EPI.VectorTripCount} /* AdditionalBypass */);
- return {completeLoopSkeleton(Lp, OrigLoopID), EPResumeVal};
+ return {completeLoopSkeleton(OrigLoopID), EPResumeVal};
}
BasicBlock *
EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck(
- Loop *L, BasicBlock *Bypass, BasicBlock *Insert) {
+ BasicBlock *Bypass, BasicBlock *Insert) {
assert(EPI.TripCount &&
"Expected trip count to have been safed in the first pass.");
@@ -8397,7 +8006,8 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) {
// constructing the desired canonical IV in the header block as its first
// non-phi instructions.
assert(CM.foldTailByMasking() && "must fold the tail");
- VPBasicBlock *HeaderVPBB = Plan->getEntry()->getEntryBasicBlock();
+ VPBasicBlock *HeaderVPBB =
+ Plan->getVectorLoopRegion()->getEntryBasicBlock();
auto NewInsertionPoint = HeaderVPBB->getFirstNonPhi();
auto *IV = new VPWidenCanonicalIVRecipe(Plan->getCanonicalIV());
HeaderVPBB->insert(IV, HeaderVPBB->getFirstNonPhi());
@@ -8439,8 +8049,6 @@ VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I,
"Must be called with either a load or store");
auto willWiden = [&](ElementCount VF) -> bool {
- if (VF.isScalar())
- return false;
LoopVectorizationCostModel::InstWidening Decision =
CM.getWideningDecision(I, VF);
assert(Decision != LoopVectorizationCostModel::CM_Unknown &&
@@ -8477,11 +8085,12 @@ VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I,
Mask, Consecutive, Reverse);
}
-static VPWidenIntOrFpInductionRecipe *
-createWidenInductionRecipe(PHINode *Phi, Instruction *PhiOrTrunc,
- VPValue *Start, const InductionDescriptor &IndDesc,
- LoopVectorizationCostModel &CM, Loop &OrigLoop,
- VFRange &Range) {
+/// Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also
+/// insert a recipe to expand the step for the induction recipe.
+static VPWidenIntOrFpInductionRecipe *createWidenInductionRecipes(
+ PHINode *Phi, Instruction *PhiOrTrunc, VPValue *Start,
+ const InductionDescriptor &IndDesc, LoopVectorizationCostModel &CM,
+ VPlan &Plan, ScalarEvolution &SE, Loop &OrigLoop, VFRange &Range) {
// Returns true if an instruction \p I should be scalarized instead of
// vectorized for the chosen vectorization factor.
auto ShouldScalarizeInstruction = [&CM](Instruction *I, ElementCount VF) {
@@ -8489,18 +8098,6 @@ createWidenInductionRecipe(PHINode *Phi, Instruction *PhiOrTrunc,
CM.isProfitableToScalarize(I, VF);
};
- bool NeedsScalarIV = LoopVectorizationPlanner::getDecisionAndClampRange(
- [&](ElementCount VF) {
- // Returns true if we should generate a scalar version of \p IV.
- if (ShouldScalarizeInstruction(PhiOrTrunc, VF))
- return true;
- auto isScalarInst = [&](User *U) -> bool {
- auto *I = cast<Instruction>(U);
- return OrigLoop.contains(I) && ShouldScalarizeInstruction(I, VF);
- };
- return any_of(PhiOrTrunc->users(), isScalarInst);
- },
- Range);
bool NeedsScalarIVOnly = LoopVectorizationPlanner::getDecisionAndClampRange(
[&](ElementCount VF) {
return ShouldScalarizeInstruction(PhiOrTrunc, VF);
@@ -8508,30 +8105,38 @@ createWidenInductionRecipe(PHINode *Phi, Instruction *PhiOrTrunc,
Range);
assert(IndDesc.getStartValue() ==
Phi->getIncomingValueForBlock(OrigLoop.getLoopPreheader()));
+ assert(SE.isLoopInvariant(IndDesc.getStep(), &OrigLoop) &&
+ "step must be loop invariant");
+
+ VPValue *Step =
+ vputils::getOrCreateVPValueForSCEVExpr(Plan, IndDesc.getStep(), SE);
if (auto *TruncI = dyn_cast<TruncInst>(PhiOrTrunc)) {
- return new VPWidenIntOrFpInductionRecipe(Phi, Start, IndDesc, TruncI,
- NeedsScalarIV, !NeedsScalarIVOnly);
+ return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, IndDesc, TruncI,
+ !NeedsScalarIVOnly);
}
assert(isa<PHINode>(PhiOrTrunc) && "must be a phi node here");
- return new VPWidenIntOrFpInductionRecipe(Phi, Start, IndDesc, NeedsScalarIV,
+ return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, IndDesc,
!NeedsScalarIVOnly);
}
-VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionPHI(
- PHINode *Phi, ArrayRef<VPValue *> Operands, VFRange &Range) const {
+VPRecipeBase *VPRecipeBuilder::tryToOptimizeInductionPHI(
+ PHINode *Phi, ArrayRef<VPValue *> Operands, VPlan &Plan, VFRange &Range) {
// Check if this is an integer or fp induction. If so, build the recipe that
// produces its scalar and vector values.
if (auto *II = Legal->getIntOrFpInductionDescriptor(Phi))
- return createWidenInductionRecipe(Phi, Phi, Operands[0], *II, CM, *OrigLoop,
- Range);
+ return createWidenInductionRecipes(Phi, Phi, Operands[0], *II, CM, Plan,
+ *PSE.getSE(), *OrigLoop, Range);
+ // Check if this is pointer induction. If so, build the recipe for it.
+ if (auto *II = Legal->getPointerInductionDescriptor(Phi))
+ return new VPWidenPointerInductionRecipe(Phi, Operands[0], *II,
+ *PSE.getSE());
return nullptr;
}
VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionTruncate(
- TruncInst *I, ArrayRef<VPValue *> Operands, VFRange &Range,
- VPlan &Plan) const {
+ TruncInst *I, ArrayRef<VPValue *> Operands, VFRange &Range, VPlan &Plan) {
// Optimize the special case where the source is a constant integer
// induction variable. Notice that we can only optimize the 'trunc' case
// because (a) FP conversions lose precision, (b) sext/zext may wrap, and
@@ -8552,7 +8157,8 @@ VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionTruncate(
auto *Phi = cast<PHINode>(I->getOperand(0));
const InductionDescriptor &II = *Legal->getIntOrFpInductionDescriptor(Phi);
VPValue *Start = Plan.getOrAddVPValue(II.getStartValue());
- return createWidenInductionRecipe(Phi, I, Start, II, CM, *OrigLoop, Range);
+ return createWidenInductionRecipes(Phi, I, Start, II, CM, Plan,
+ *PSE.getSE(), *OrigLoop, Range);
}
return nullptr;
}
@@ -8569,13 +8175,30 @@ VPRecipeOrVPValueTy VPRecipeBuilder::tryToBlend(PHINode *Phi,
return Operands[0];
}
+ unsigned NumIncoming = Phi->getNumIncomingValues();
+ // For in-loop reductions, we do not need to create an additional select.
+ VPValue *InLoopVal = nullptr;
+ for (unsigned In = 0; In < NumIncoming; In++) {
+ PHINode *PhiOp =
+ dyn_cast_or_null<PHINode>(Operands[In]->getUnderlyingValue());
+ if (PhiOp && CM.isInLoopReduction(PhiOp)) {
+ assert(!InLoopVal && "Found more than one in-loop reduction!");
+ InLoopVal = Operands[In];
+ }
+ }
+
+ assert((!InLoopVal || NumIncoming == 2) &&
+ "Found an in-loop reduction for PHI with unexpected number of "
+ "incoming values");
+ if (InLoopVal)
+ return Operands[Operands[0] == InLoopVal ? 1 : 0];
+
// We know that all PHIs in non-header blocks are converted into selects, so
// we don't have to worry about the insertion order and we can just use the
// builder. At this point we generate the predication tree. There may be
// duplications since this is a simple recursive scan, but future
// optimizations will clean it up.
SmallVector<VPValue *, 2> OperandsWithMask;
- unsigned NumIncoming = Phi->getNumIncomingValues();
for (unsigned In = 0; In < NumIncoming; In++) {
VPValue *EdgeMask =
@@ -8681,6 +8304,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
case Instruction::URem:
case Instruction::Xor:
case Instruction::ZExt:
+ case Instruction::Freeze:
return true;
}
return false;
@@ -8806,14 +8430,14 @@ VPRegionBlock *VPRecipeBuilder::createReplicateRegion(Instruction *Instr,
Plan->removeVPValueFor(Instr);
Plan->addVPValue(Instr, PHIRecipe);
}
- auto *Exit = new VPBasicBlock(Twine(RegionName) + ".continue", PHIRecipe);
+ auto *Exiting = new VPBasicBlock(Twine(RegionName) + ".continue", PHIRecipe);
auto *Pred = new VPBasicBlock(Twine(RegionName) + ".if", PredRecipe);
- VPRegionBlock *Region = new VPRegionBlock(Entry, Exit, RegionName, true);
+ VPRegionBlock *Region = new VPRegionBlock(Entry, Exiting, RegionName, true);
// Note: first set Entry as region entry and then connect successors starting
// from it in order, to propagate the "parent" of each VPBasicBlock.
- VPBlockUtils::insertTwoBlocksAfter(Pred, Exit, BlockInMask, Entry);
- VPBlockUtils::connectBlocks(Pred, Exit);
+ VPBlockUtils::insertTwoBlocksAfter(Pred, Exiting, Entry);
+ VPBlockUtils::connectBlocks(Pred, Exiting);
return Region;
}
@@ -8822,52 +8446,37 @@ VPRecipeOrVPValueTy
VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
ArrayRef<VPValue *> Operands,
VFRange &Range, VPlanPtr &Plan) {
- // First, check for specific widening recipes that deal with calls, memory
- // operations, inductions and Phi nodes.
- if (auto *CI = dyn_cast<CallInst>(Instr))
- return toVPRecipeResult(tryToWidenCall(CI, Operands, Range));
-
- if (isa<LoadInst>(Instr) || isa<StoreInst>(Instr))
- return toVPRecipeResult(tryToWidenMemory(Instr, Operands, Range, Plan));
-
+ // First, check for specific widening recipes that deal with inductions, Phi
+ // nodes, calls and memory operations.
VPRecipeBase *Recipe;
if (auto Phi = dyn_cast<PHINode>(Instr)) {
if (Phi->getParent() != OrigLoop->getHeader())
return tryToBlend(Phi, Operands, Plan);
- if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands, Range)))
+ if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands, *Plan, Range)))
return toVPRecipeResult(Recipe);
VPHeaderPHIRecipe *PhiRecipe = nullptr;
- if (Legal->isReductionVariable(Phi) || Legal->isFirstOrderRecurrence(Phi)) {
- VPValue *StartV = Operands[0];
- if (Legal->isReductionVariable(Phi)) {
- const RecurrenceDescriptor &RdxDesc =
- Legal->getReductionVars().find(Phi)->second;
- assert(RdxDesc.getRecurrenceStartValue() ==
- Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader()));
- PhiRecipe = new VPReductionPHIRecipe(Phi, RdxDesc, *StartV,
- CM.isInLoopReduction(Phi),
- CM.useOrderedReductions(RdxDesc));
- } else {
- PhiRecipe = new VPFirstOrderRecurrencePHIRecipe(Phi, *StartV);
- }
-
- // Record the incoming value from the backedge, so we can add the incoming
- // value from the backedge after all recipes have been created.
- recordRecipeOf(cast<Instruction>(
- Phi->getIncomingValueForBlock(OrigLoop->getLoopLatch())));
- PhisToFix.push_back(PhiRecipe);
+ assert((Legal->isReductionVariable(Phi) ||
+ Legal->isFirstOrderRecurrence(Phi)) &&
+ "can only widen reductions and first-order recurrences here");
+ VPValue *StartV = Operands[0];
+ if (Legal->isReductionVariable(Phi)) {
+ const RecurrenceDescriptor &RdxDesc =
+ Legal->getReductionVars().find(Phi)->second;
+ assert(RdxDesc.getRecurrenceStartValue() ==
+ Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader()));
+ PhiRecipe = new VPReductionPHIRecipe(Phi, RdxDesc, *StartV,
+ CM.isInLoopReduction(Phi),
+ CM.useOrderedReductions(RdxDesc));
} else {
- // TODO: record backedge value for remaining pointer induction phis.
- assert(Phi->getType()->isPointerTy() &&
- "only pointer phis should be handled here");
- assert(Legal->getInductionVars().count(Phi) &&
- "Not an induction variable");
- InductionDescriptor II = Legal->getInductionVars().lookup(Phi);
- VPValue *Start = Plan->getOrAddVPValue(II.getStartValue());
- PhiRecipe = new VPWidenPHIRecipe(Phi, Start);
+ PhiRecipe = new VPFirstOrderRecurrencePHIRecipe(Phi, *StartV);
}
+ // Record the incoming value from the backedge, so we can add the incoming
+ // value from the backedge after all recipes have been created.
+ recordRecipeOf(cast<Instruction>(
+ Phi->getIncomingValueForBlock(OrigLoop->getLoopLatch())));
+ PhisToFix.push_back(PhiRecipe);
return toVPRecipeResult(PhiRecipe);
}
@@ -8876,6 +8485,17 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
Range, *Plan)))
return toVPRecipeResult(Recipe);
+ // All widen recipes below deal only with VF > 1.
+ if (LoopVectorizationPlanner::getDecisionAndClampRange(
+ [&](ElementCount VF) { return VF.isScalar(); }, Range))
+ return nullptr;
+
+ if (auto *CI = dyn_cast<CallInst>(Instr))
+ return toVPRecipeResult(tryToWidenCall(CI, Operands, Range));
+
+ if (isa<LoadInst>(Instr) || isa<StoreInst>(Instr))
+ return toVPRecipeResult(tryToWidenMemory(Instr, Operands, Range, Plan));
+
if (!shouldWiden(Instr, Range))
return nullptr;
@@ -8949,15 +8569,13 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
// CanonicalIVIncrement{NUW} VPInstruction to increment it by VF * UF and a
// BranchOnCount VPInstruction to the latch.
static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, DebugLoc DL,
- bool HasNUW, bool IsVPlanNative) {
+ bool HasNUW) {
Value *StartIdx = ConstantInt::get(IdxTy, 0);
auto *StartV = Plan.getOrAddVPValue(StartIdx);
auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(StartV, DL);
VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
VPBasicBlock *Header = TopRegion->getEntryBasicBlock();
- if (IsVPlanNative)
- Header = cast<VPBasicBlock>(Header->getSingleSuccessor());
Header->insert(CanonicalIVPHI, Header->begin());
auto *CanonicalIVIncrement =
@@ -8966,11 +8584,7 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, DebugLoc DL,
{CanonicalIVPHI}, DL);
CanonicalIVPHI->addOperand(CanonicalIVIncrement);
- VPBasicBlock *EB = TopRegion->getExitBasicBlock();
- if (IsVPlanNative) {
- EB = cast<VPBasicBlock>(EB->getSinglePredecessor());
- EB->setCondBit(nullptr);
- }
+ VPBasicBlock *EB = TopRegion->getExitingBasicBlock();
EB->appendRecipe(CanonicalIVIncrement);
auto *BranchOnCount =
@@ -8979,6 +8593,26 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, DebugLoc DL,
EB->appendRecipe(BranchOnCount);
}
+// Add exit values to \p Plan. VPLiveOuts are added for each LCSSA phi in the
+// original exit block.
+static void addUsersInExitBlock(VPBasicBlock *HeaderVPBB,
+ VPBasicBlock *MiddleVPBB, Loop *OrigLoop,
+ VPlan &Plan) {
+ BasicBlock *ExitBB = OrigLoop->getUniqueExitBlock();
+ BasicBlock *ExitingBB = OrigLoop->getExitingBlock();
+ // Only handle single-exit loops with unique exit blocks for now.
+ if (!ExitBB || !ExitBB->getSinglePredecessor() || !ExitingBB)
+ return;
+
+ // Introduce VPUsers modeling the exit values.
+ for (PHINode &ExitPhi : ExitBB->phis()) {
+ Value *IncomingValue =
+ ExitPhi.getIncomingValueForBlock(ExitingBB);
+ VPValue *V = Plan.getOrAddVPValue(IncomingValue, true);
+ Plan.addLiveOut(&ExitPhi, V);
+ }
+}
+
VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
VFRange &Range, SmallPtrSetImpl<Instruction *> &DeadInstructions,
const MapVector<Instruction *, Instruction *> &SinkAfter) {
@@ -9007,7 +8641,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
RecipeBuilder.recordRecipeOf(Phi);
for (auto &R : ReductionOperations) {
RecipeBuilder.recordRecipeOf(R);
- // For min/max reducitons, where we have a pair of icmp/select, we also
+ // For min/max reductions, where we have a pair of icmp/select, we also
// need to record the ICmp recipe, so it can be removed later.
assert(!RecurrenceDescriptor::isSelectCmpRecurrenceKind(Kind) &&
"Only min/max recurrences allowed for inloop reductions");
@@ -9039,18 +8673,25 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
// visit each basic block after having visited its predecessor basic blocks.
// ---------------------------------------------------------------------------
- // Create initial VPlan skeleton, with separate header and latch blocks.
- VPBasicBlock *HeaderVPBB = new VPBasicBlock();
+ // Create initial VPlan skeleton, starting with a block for the pre-header,
+ // followed by a region for the vector loop, followed by the middle block. The
+ // skeleton vector loop region contains a header and latch block.
+ VPBasicBlock *Preheader = new VPBasicBlock("vector.ph");
+ auto Plan = std::make_unique<VPlan>(Preheader);
+
+ VPBasicBlock *HeaderVPBB = new VPBasicBlock("vector.body");
VPBasicBlock *LatchVPBB = new VPBasicBlock("vector.latch");
VPBlockUtils::insertBlockAfter(LatchVPBB, HeaderVPBB);
auto *TopRegion = new VPRegionBlock(HeaderVPBB, LatchVPBB, "vector loop");
- auto Plan = std::make_unique<VPlan>(TopRegion);
+ VPBlockUtils::insertBlockAfter(TopRegion, Preheader);
+ VPBasicBlock *MiddleVPBB = new VPBasicBlock("middle.block");
+ VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion);
Instruction *DLInst =
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction());
addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(),
DLInst ? DLInst->getDebugLoc() : DebugLoc(),
- !CM.foldTailByMasking(), false);
+ !CM.foldTailByMasking());
// Scan the body of the loop in a topological order to visit each basic block
// after having visited its predecessor basic blocks.
@@ -9063,11 +8704,12 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
// Relevant instructions from basic block BB will be grouped into VPRecipe
// ingredients and fill a new VPBasicBlock.
unsigned VPBBsForBB = 0;
- VPBB->setName(BB->getName());
+ if (VPBB != HeaderVPBB)
+ VPBB->setName(BB->getName());
Builder.setInsertPoint(VPBB);
// Introduce each ingredient into VPlan.
- // TODO: Model and preserve debug instrinsics in VPlan.
+ // TODO: Model and preserve debug intrinsics in VPlan.
for (Instruction &I : BB->instructionsWithoutDebug()) {
Instruction *Instr = &I;
@@ -9085,6 +8727,14 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
auto OpRange = Plan->mapToVPValues(Instr->operands());
Operands = {OpRange.begin(), OpRange.end()};
}
+
+ // Invariant stores inside loop will be deleted and a single store
+ // with the final reduction value will be added to the exit block
+ StoreInst *SI;
+ if ((SI = dyn_cast<StoreInst>(&I)) &&
+ Legal->isInvariantAddressOfReduction(SI->getPointerOperand()))
+ continue;
+
if (auto RecipeOrValue = RecipeBuilder.tryToCreateWidenRecipe(
Instr, Operands, Range, Plan)) {
// If Instr can be simplified to an existing VPValue, use it.
@@ -9135,14 +8785,18 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
VPBB = cast<VPBasicBlock>(VPBB->getSingleSuccessor());
}
+ HeaderVPBB->setName("vector.body");
+
// Fold the last, empty block into its predecessor.
VPBB = VPBlockUtils::tryToMergeBlockIntoPredecessor(VPBB);
assert(VPBB && "expected to fold last (empty) block");
// After here, VPBB should not be used.
VPBB = nullptr;
- assert(isa<VPRegionBlock>(Plan->getEntry()) &&
- !Plan->getEntry()->getEntryBasicBlock()->empty() &&
+ addUsersInExitBlock(HeaderVPBB, MiddleVPBB, OrigLoop, *Plan);
+
+ assert(isa<VPRegionBlock>(Plan->getVectorLoopRegion()) &&
+ !Plan->getVectorLoopRegion()->getEntryBasicBlock()->empty() &&
"entry block must be set to a VPRegionBlock having a non-empty entry "
"VPBasicBlock");
RecipeBuilder.fixHeaderPhis();
@@ -9222,12 +8876,13 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
Ind->moveBefore(*HeaderVPBB, HeaderVPBB->getFirstNonPhi());
// Adjust the recipes for any inloop reductions.
- adjustRecipesForReductions(cast<VPBasicBlock>(TopRegion->getExit()), Plan,
+ adjustRecipesForReductions(cast<VPBasicBlock>(TopRegion->getExiting()), Plan,
RecipeBuilder, Range.Start);
// Introduce a recipe to combine the incoming and previous values of a
// first-order recurrence.
- for (VPRecipeBase &R : Plan->getEntry()->getEntryBasicBlock()->phis()) {
+ for (VPRecipeBase &R :
+ Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
auto *RecurPhi = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R);
if (!RecurPhi)
continue;
@@ -9236,7 +8891,11 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
VPBasicBlock *InsertBlock = PrevRecipe->getParent();
auto *Region = GetReplicateRegion(PrevRecipe);
if (Region)
- InsertBlock = cast<VPBasicBlock>(Region->getSingleSuccessor());
+ InsertBlock = dyn_cast<VPBasicBlock>(Region->getSingleSuccessor());
+ if (!InsertBlock) {
+ InsertBlock = new VPBasicBlock(Region->getName() + ".succ");
+ VPBlockUtils::insertBlockAfter(InsertBlock, Region);
+ }
if (Region || PrevRecipe->isPhi())
Builder.setInsertPoint(InsertBlock, InsertBlock->getFirstNonPhi());
else
@@ -9283,13 +8942,6 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
}
}
- // From this point onwards, VPlan-to-VPlan transformations may change the plan
- // in ways that accessing values using original IR values is incorrect.
- Plan->disableValue2VPValue();
-
- VPlanTransforms::sinkScalarOperands(*Plan);
- VPlanTransforms::mergeReplicateRegions(*Plan);
-
std::string PlanName;
raw_string_ostream RSO(PlanName);
ElementCount VF = Range.Start;
@@ -9303,10 +8955,20 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
RSO.flush();
Plan->setName(PlanName);
+ // From this point onwards, VPlan-to-VPlan transformations may change the plan
+ // in ways that accessing values using original IR values is incorrect.
+ Plan->disableValue2VPValue();
+
+ VPlanTransforms::optimizeInductions(*Plan, *PSE.getSE());
+ VPlanTransforms::sinkScalarOperands(*Plan);
+ VPlanTransforms::mergeReplicateRegions(*Plan);
+ VPlanTransforms::removeDeadRecipes(*Plan);
+ VPlanTransforms::removeRedundantExpandSCEVRecipes(*Plan);
+
// Fold Exit block into its predecessor if possible.
// TODO: Fold block earlier once all VPlan transforms properly maintain a
// VPBasicBlock as exit.
- VPBlockUtils::tryToMergeBlockIntoPredecessor(TopRegion->getExit());
+ VPBlockUtils::tryToMergeBlockIntoPredecessor(TopRegion->getExiting());
assert(VPlanVerifier::verifyPlanIsValid(*Plan) && "VPlan is invalid");
return Plan;
@@ -9331,23 +8993,20 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
VF *= 2)
Plan->addVF(VF);
- if (EnableVPlanPredication) {
- VPlanPredicator VPP(*Plan);
- VPP.predicate();
-
- // Avoid running transformation to recipes until masked code generation in
- // VPlan-native path is in place.
- return Plan;
- }
-
SmallPtrSet<Instruction *, 1> DeadInstructions;
VPlanTransforms::VPInstructionsToVPRecipes(
OrigLoop, Plan,
[this](PHINode *P) { return Legal->getIntOrFpInductionDescriptor(P); },
DeadInstructions, *PSE.getSE());
+ // Remove the existing terminator of the exiting block of the top-most region.
+ // A BranchOnCount will be added instead when adding the canonical IV recipes.
+ auto *Term =
+ Plan->getVectorLoopRegion()->getExitingBasicBlock()->getTerminator();
+ Term->eraseFromParent();
+
addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), DebugLoc(),
- true, true);
+ true);
return Plan;
}
@@ -9399,7 +9058,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
R->getOperand(FirstOpId) == Chain ? FirstOpId + 1 : FirstOpId;
VPValue *VecOp = Plan->getVPValue(R->getOperand(VecOpId));
- auto *CondOp = CM.foldTailByMasking()
+ auto *CondOp = CM.blockNeedsPredicationForAnyReason(R->getParent())
? RecipeBuilder.createBlockInMask(R->getParent(), Plan)
: nullptr;
@@ -9441,7 +9100,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
// dedicated latch block.
if (CM.foldTailByMasking()) {
Builder.setInsertPoint(LatchVPBB, LatchVPBB->begin());
- for (VPRecipeBase &R : Plan->getEntry()->getEntryBasicBlock()->phis()) {
+ for (VPRecipeBase &R :
+ Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
VPReductionPHIRecipe *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
if (!PhiR || PhiR->isInLoop())
continue;
@@ -9493,7 +9153,7 @@ void VPWidenCallRecipe::execute(VPTransformState &State) {
void VPWidenSelectRecipe::execute(VPTransformState &State) {
auto &I = *cast<SelectInst>(getUnderlyingInstr());
- State.ILV->setDebugLocFromInst(&I);
+ State.setDebugLocFromInst(&I);
// The condition can be loop invariant but still defined inside the
// loop. This means that we can't just use the original 'cond' value.
@@ -9508,7 +9168,7 @@ void VPWidenSelectRecipe::execute(VPTransformState &State) {
Value *Op1 = State.get(getOperand(2), Part);
Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
State.set(this, Sel, Part);
- State.ILV->addMetadata(Sel, &I);
+ State.addMetadata(Sel, &I);
}
}
@@ -9542,7 +9202,7 @@ void VPWidenRecipe::execute(VPTransformState &State) {
case Instruction::Or:
case Instruction::Xor: {
// Just widen unops and binops.
- State.ILV->setDebugLocFromInst(&I);
+ State.setDebugLocFromInst(&I);
for (unsigned Part = 0; Part < State.UF; ++Part) {
SmallVector<Value *, 2> Ops;
@@ -9565,17 +9225,28 @@ void VPWidenRecipe::execute(VPTransformState &State) {
// Use this vector value for all users of the original instruction.
State.set(this, V, Part);
- State.ILV->addMetadata(V, &I);
+ State.addMetadata(V, &I);
}
break;
}
+ case Instruction::Freeze: {
+ State.setDebugLocFromInst(&I);
+
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ Value *Op = State.get(getOperand(0), Part);
+
+ Value *Freeze = Builder.CreateFreeze(Op);
+ State.set(this, Freeze, Part);
+ }
+ break;
+ }
case Instruction::ICmp:
case Instruction::FCmp: {
// Widen compares. Generate vector compares.
bool FCmp = (I.getOpcode() == Instruction::FCmp);
auto *Cmp = cast<CmpInst>(&I);
- State.ILV->setDebugLocFromInst(Cmp);
+ State.setDebugLocFromInst(Cmp);
for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *A = State.get(getOperand(0), Part);
Value *B = State.get(getOperand(1), Part);
@@ -9589,7 +9260,7 @@ void VPWidenRecipe::execute(VPTransformState &State) {
C = Builder.CreateICmp(Cmp->getPredicate(), A, B);
}
State.set(this, C, Part);
- State.ILV->addMetadata(C, &I);
+ State.addMetadata(C, &I);
}
break;
@@ -9608,7 +9279,7 @@ void VPWidenRecipe::execute(VPTransformState &State) {
case Instruction::FPTrunc:
case Instruction::BitCast: {
auto *CI = cast<CastInst>(&I);
- State.ILV->setDebugLocFromInst(CI);
+ State.setDebugLocFromInst(CI);
/// Vectorize casts.
Type *DestTy = (State.VF.isScalar())
@@ -9619,7 +9290,7 @@ void VPWidenRecipe::execute(VPTransformState &State) {
Value *A = State.get(getOperand(0), Part);
Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy);
State.set(this, Cast, Part);
- State.ILV->addMetadata(Cast, &I);
+ State.addMetadata(Cast, &I);
}
break;
}
@@ -9655,7 +9326,7 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) {
for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, Clone);
State.set(this, EntryPart, Part);
- State.ILV->addMetadata(EntryPart, GEP);
+ State.addMetadata(EntryPart, GEP);
}
} else {
// If the GEP has at least one loop-varying operand, we are sure to
@@ -9693,32 +9364,276 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) {
// Create the new GEP. Note that this GEP may be a scalar if VF == 1,
// but it should be a vector, otherwise.
- auto *NewGEP = IsInBounds
- ? State.Builder.CreateInBoundsGEP(
- GEP->getSourceElementType(), Ptr, Indices)
- : State.Builder.CreateGEP(GEP->getSourceElementType(),
- Ptr, Indices);
+ auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
+ Indices, "", IsInBounds);
assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
"NewGEP is not a pointer vector");
State.set(this, NewGEP, Part);
- State.ILV->addMetadata(NewGEP, GEP);
+ State.addMetadata(NewGEP, GEP);
}
}
}
void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
assert(!State.Instance && "Int or FP induction being replicated.");
- auto *CanonicalIV = State.get(getParent()->getPlan()->getCanonicalIV(), 0);
- State.ILV->widenIntOrFpInduction(IV, this, State, CanonicalIV);
+
+ Value *Start = getStartValue()->getLiveInIRValue();
+ const InductionDescriptor &ID = getInductionDescriptor();
+ TruncInst *Trunc = getTruncInst();
+ IRBuilderBase &Builder = State.Builder;
+ assert(IV->getType() == ID.getStartValue()->getType() && "Types must match");
+ assert(State.VF.isVector() && "must have vector VF");
+
+ // The value from the original loop to which we are mapping the new induction
+ // variable.
+ Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : IV;
+
+ // Fast-math-flags propagate from the original induction instruction.
+ IRBuilder<>::FastMathFlagGuard FMFG(Builder);
+ if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp()))
+ Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags());
+
+ // Now do the actual transformations, and start with fetching the step value.
+ Value *Step = State.get(getStepValue(), VPIteration(0, 0));
+
+ assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) &&
+ "Expected either an induction phi-node or a truncate of it!");
+
+ // Construct the initial value of the vector IV in the vector loop preheader
+ auto CurrIP = Builder.saveIP();
+ BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
+ Builder.SetInsertPoint(VectorPH->getTerminator());
+ if (isa<TruncInst>(EntryVal)) {
+ assert(Start->getType()->isIntegerTy() &&
+ "Truncation requires an integer type");
+ auto *TruncType = cast<IntegerType>(EntryVal->getType());
+ Step = Builder.CreateTrunc(Step, TruncType);
+ Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);
+ }
+
+ Value *Zero = getSignedIntOrFpConstant(Start->getType(), 0);
+ Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
+ Value *SteppedStart = getStepVector(
+ SplatStart, Zero, Step, ID.getInductionOpcode(), State.VF, State.Builder);
+
+ // We create vector phi nodes for both integer and floating-point induction
+ // variables. Here, we determine the kind of arithmetic we will perform.
+ Instruction::BinaryOps AddOp;
+ Instruction::BinaryOps MulOp;
+ if (Step->getType()->isIntegerTy()) {
+ AddOp = Instruction::Add;
+ MulOp = Instruction::Mul;
+ } else {
+ AddOp = ID.getInductionOpcode();
+ MulOp = Instruction::FMul;
+ }
+
+ // Multiply the vectorization factor by the step using integer or
+ // floating-point arithmetic as appropriate.
+ Type *StepType = Step->getType();
+ Value *RuntimeVF;
+ if (Step->getType()->isFloatingPointTy())
+ RuntimeVF = getRuntimeVFAsFloat(Builder, StepType, State.VF);
+ else
+ RuntimeVF = getRuntimeVF(Builder, StepType, State.VF);
+ Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF);
+
+ // Create a vector splat to use in the induction update.
+ //
+ // FIXME: If the step is non-constant, we create the vector splat with
+ // IRBuilder. IRBuilder can constant-fold the multiply, but it doesn't
+ // handle a constant vector splat.
+ Value *SplatVF = isa<Constant>(Mul)
+ ? ConstantVector::getSplat(State.VF, cast<Constant>(Mul))
+ : Builder.CreateVectorSplat(State.VF, Mul);
+ Builder.restoreIP(CurrIP);
+
+ // We may need to add the step a number of times, depending on the unroll
+ // factor. The last of those goes into the PHI.
+ PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind",
+ &*State.CFG.PrevBB->getFirstInsertionPt());
+ VecInd->setDebugLoc(EntryVal->getDebugLoc());
+ Instruction *LastInduction = VecInd;
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ State.set(this, LastInduction, Part);
+
+ if (isa<TruncInst>(EntryVal))
+ State.addMetadata(LastInduction, EntryVal);
+
+ LastInduction = cast<Instruction>(
+ Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add"));
+ LastInduction->setDebugLoc(EntryVal->getDebugLoc());
+ }
+
+ LastInduction->setName("vec.ind.next");
+ VecInd->addIncoming(SteppedStart, VectorPH);
+ // Add induction update using an incorrect block temporarily. The phi node
+ // will be fixed after VPlan execution. Note that at this point the latch
+ // block cannot be used, as it does not exist yet.
+ // TODO: Model increment value in VPlan, by turning the recipe into a
+ // multi-def and a subclass of VPHeaderPHIRecipe.
+ VecInd->addIncoming(LastInduction, VectorPH);
+}
+
+void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
+ assert(IndDesc.getKind() == InductionDescriptor::IK_PtrInduction &&
+ "Not a pointer induction according to InductionDescriptor!");
+ assert(cast<PHINode>(getUnderlyingInstr())->getType()->isPointerTy() &&
+ "Unexpected type.");
+
+ auto *IVR = getParent()->getPlan()->getCanonicalIV();
+ PHINode *CanonicalIV = cast<PHINode>(State.get(IVR, 0));
+
+ if (onlyScalarsGenerated(State.VF)) {
+ // This is the normalized GEP that starts counting at zero.
+ Value *PtrInd = State.Builder.CreateSExtOrTrunc(
+ CanonicalIV, IndDesc.getStep()->getType());
+ // Determine the number of scalars we need to generate for each unroll
+ // iteration. If the instruction is uniform, we only need to generate the
+ // first lane. Otherwise, we generate all VF values.
+ bool IsUniform = vputils::onlyFirstLaneUsed(this);
+ assert((IsUniform || !State.VF.isScalable()) &&
+ "Cannot scalarize a scalable VF");
+ unsigned Lanes = IsUniform ? 1 : State.VF.getFixedValue();
+
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ Value *PartStart =
+ createStepForVF(State.Builder, PtrInd->getType(), State.VF, Part);
+
+ for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
+ Value *Idx = State.Builder.CreateAdd(
+ PartStart, ConstantInt::get(PtrInd->getType(), Lane));
+ Value *GlobalIdx = State.Builder.CreateAdd(PtrInd, Idx);
+
+ Value *Step = CreateStepValue(IndDesc.getStep(), SE,
+ State.CFG.PrevBB->getTerminator());
+ Value *SclrGep = emitTransformedIndex(
+ State.Builder, GlobalIdx, IndDesc.getStartValue(), Step, IndDesc);
+ SclrGep->setName("next.gep");
+ State.set(this, SclrGep, VPIteration(Part, Lane));
+ }
+ }
+ return;
+ }
+
+ assert(isa<SCEVConstant>(IndDesc.getStep()) &&
+ "Induction step not a SCEV constant!");
+ Type *PhiType = IndDesc.getStep()->getType();
+
+ // Build a pointer phi
+ Value *ScalarStartValue = getStartValue()->getLiveInIRValue();
+ Type *ScStValueType = ScalarStartValue->getType();
+ PHINode *NewPointerPhi =
+ PHINode::Create(ScStValueType, 2, "pointer.phi", CanonicalIV);
+
+ BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
+ NewPointerPhi->addIncoming(ScalarStartValue, VectorPH);
+
+ // A pointer induction, performed by using a gep
+ const DataLayout &DL = NewPointerPhi->getModule()->getDataLayout();
+ Instruction *InductionLoc = &*State.Builder.GetInsertPoint();
+
+ const SCEV *ScalarStep = IndDesc.getStep();
+ SCEVExpander Exp(SE, DL, "induction");
+ Value *ScalarStepValue = Exp.expandCodeFor(ScalarStep, PhiType, InductionLoc);
+ Value *RuntimeVF = getRuntimeVF(State.Builder, PhiType, State.VF);
+ Value *NumUnrolledElems =
+ State.Builder.CreateMul(RuntimeVF, ConstantInt::get(PhiType, State.UF));
+ Value *InductionGEP = GetElementPtrInst::Create(
+ IndDesc.getElementType(), NewPointerPhi,
+ State.Builder.CreateMul(ScalarStepValue, NumUnrolledElems), "ptr.ind",
+ InductionLoc);
+ // Add induction update using an incorrect block temporarily. The phi node
+ // will be fixed after VPlan execution. Note that at this point the latch
+ // block cannot be used, as it does not exist yet.
+ // TODO: Model increment value in VPlan, by turning the recipe into a
+ // multi-def and a subclass of VPHeaderPHIRecipe.
+ NewPointerPhi->addIncoming(InductionGEP, VectorPH);
+
+ // Create UF many actual address geps that use the pointer
+ // phi as base and a vectorized version of the step value
+ // (<step*0, ..., step*N>) as offset.
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ Type *VecPhiType = VectorType::get(PhiType, State.VF);
+ Value *StartOffsetScalar =
+ State.Builder.CreateMul(RuntimeVF, ConstantInt::get(PhiType, Part));
+ Value *StartOffset =
+ State.Builder.CreateVectorSplat(State.VF, StartOffsetScalar);
+ // Create a vector of consecutive numbers from zero to VF.
+ StartOffset = State.Builder.CreateAdd(
+ StartOffset, State.Builder.CreateStepVector(VecPhiType));
+
+ Value *GEP = State.Builder.CreateGEP(
+ IndDesc.getElementType(), NewPointerPhi,
+ State.Builder.CreateMul(
+ StartOffset,
+ State.Builder.CreateVectorSplat(State.VF, ScalarStepValue),
+ "vector.gep"));
+ State.set(this, GEP, Part);
+ }
}
-void VPWidenPHIRecipe::execute(VPTransformState &State) {
- State.ILV->widenPHIInstruction(cast<PHINode>(getUnderlyingValue()), this,
- State);
+void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
+ assert(!State.Instance && "VPScalarIVStepsRecipe being replicated.");
+
+ // Fast-math-flags propagate from the original induction instruction.
+ IRBuilder<>::FastMathFlagGuard FMFG(State.Builder);
+ if (IndDesc.getInductionBinOp() &&
+ isa<FPMathOperator>(IndDesc.getInductionBinOp()))
+ State.Builder.setFastMathFlags(
+ IndDesc.getInductionBinOp()->getFastMathFlags());
+
+ Value *Step = State.get(getStepValue(), VPIteration(0, 0));
+ auto CreateScalarIV = [&](Value *&Step) -> Value * {
+ Value *ScalarIV = State.get(getCanonicalIV(), VPIteration(0, 0));
+ auto *CanonicalIV = State.get(getParent()->getPlan()->getCanonicalIV(), 0);
+ if (!isCanonical() || CanonicalIV->getType() != Ty) {
+ ScalarIV =
+ Ty->isIntegerTy()
+ ? State.Builder.CreateSExtOrTrunc(ScalarIV, Ty)
+ : State.Builder.CreateCast(Instruction::SIToFP, ScalarIV, Ty);
+ ScalarIV = emitTransformedIndex(State.Builder, ScalarIV,
+ getStartValue()->getLiveInIRValue(), Step,
+ IndDesc);
+ ScalarIV->setName("offset.idx");
+ }
+ if (TruncToTy) {
+ assert(Step->getType()->isIntegerTy() &&
+ "Truncation requires an integer step");
+ ScalarIV = State.Builder.CreateTrunc(ScalarIV, TruncToTy);
+ Step = State.Builder.CreateTrunc(Step, TruncToTy);
+ }
+ return ScalarIV;
+ };
+
+ Value *ScalarIV = CreateScalarIV(Step);
+ if (State.VF.isVector()) {
+ buildScalarSteps(ScalarIV, Step, IndDesc, this, State);
+ return;
+ }
+
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ assert(!State.VF.isScalable() && "scalable vectors not yet supported.");
+ Value *EntryPart;
+ if (Step->getType()->isFloatingPointTy()) {
+ Value *StartIdx =
+ getRuntimeVFAsFloat(State.Builder, Step->getType(), State.VF * Part);
+ // Floating-point operations inherit FMF via the builder's flags.
+ Value *MulOp = State.Builder.CreateFMul(StartIdx, Step);
+ EntryPart = State.Builder.CreateBinOp(IndDesc.getInductionOpcode(),
+ ScalarIV, MulOp);
+ } else {
+ Value *StartIdx =
+ getRuntimeVF(State.Builder, Step->getType(), State.VF * Part);
+ EntryPart = State.Builder.CreateAdd(
+ ScalarIV, State.Builder.CreateMul(StartIdx, Step), "induction");
+ }
+ State.set(this, EntryPart, Part);
+ }
}
void VPBlendRecipe::execute(VPTransformState &State) {
- State.ILV->setDebugLocFromInst(Phi, &State.Builder);
+ State.setDebugLocFromInst(Phi);
// We know that all PHIs in non-header blocks are converted into
// selects, so we don't have to worry about the insertion order and we
// can just use the builder.
@@ -9979,7 +9894,7 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
// Handle Stores:
if (SI) {
- State.ILV->setDebugLocFromInst(SI);
+ State.setDebugLocFromInst(SI);
for (unsigned Part = 0; Part < State.UF; ++Part) {
Instruction *NewSI = nullptr;
@@ -10005,14 +9920,14 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
else
NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment);
}
- State.ILV->addMetadata(NewSI, SI);
+ State.addMetadata(NewSI, SI);
}
return;
}
// Handle loads.
assert(LI && "Must have a load instruction");
- State.ILV->setDebugLocFromInst(LI);
+ State.setDebugLocFromInst(LI);
for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *NewLI;
if (CreateGatherScatter) {
@@ -10020,7 +9935,7 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
Value *VectorGep = State.get(getAddr(), Part);
NewLI = Builder.CreateMaskedGather(DataTy, VectorGep, Alignment, MaskPart,
nullptr, "wide.masked.gather");
- State.ILV->addMetadata(NewLI, LI);
+ State.addMetadata(NewLI, LI);
} else {
auto *VecPtr =
CreateVecPtr(Part, State.get(getAddr(), VPIteration(0, 0)));
@@ -10033,12 +9948,12 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
Builder.CreateAlignedLoad(DataTy, VecPtr, Alignment, "wide.load");
// Add metadata to the load, but setVectorValue to the reverse shuffle.
- State.ILV->addMetadata(NewLI, LI);
+ State.addMetadata(NewLI, LI);
if (Reverse)
NewLI = Builder.CreateVectorReverse(NewLI, "reverse");
}
- State.set(this, NewLI, Part);
+ State.set(getVPSingleValue(), NewLI, Part);
}
}
@@ -10119,7 +10034,8 @@ Value *VPTransformState::get(VPValue *Def, unsigned Part) {
// Check if there is a scalar value for the selected lane.
if (!hasScalarValue(Def, {Part, LastLane})) {
// At the moment, VPWidenIntOrFpInductionRecipes can also be uniform.
- assert(isa<VPWidenIntOrFpInductionRecipe>(Def->getDef()) &&
+ assert((isa<VPWidenIntOrFpInductionRecipe>(Def->getDef()) ||
+ isa<VPScalarIVStepsRecipe>(Def->getDef())) &&
"unexpected recipe found to be invariant");
IsUniform = true;
LastLane = 0;
@@ -10201,8 +10117,7 @@ static bool processLoopInVPlanNativePath(
// If we are stress testing VPlan builds, do not attempt to generate vector
// code. Masked vector code generation support will follow soon.
// Also, do not attempt to vectorize if no vector code will be produced.
- if (VPlanBuildStressTest || EnableVPlanPredication ||
- VectorizationFactor::Disabled() == VF)
+ if (VPlanBuildStressTest || VectorizationFactor::Disabled() == VF)
return false;
VPlan &BestPlan = LVP.getBestPlanFor(VF.Width);
@@ -10214,7 +10129,7 @@ static bool processLoopInVPlanNativePath(
&CM, BFI, PSI, Checks);
LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \""
<< L->getHeader()->getParent()->getName() << "\"\n");
- LVP.executePlan(VF.Width, 1, BestPlan, LB, DT);
+ LVP.executePlan(VF.Width, 1, BestPlan, LB, DT, false);
}
// Mark the loop as already vectorized to avoid vectorizing again.
@@ -10282,8 +10197,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
const std::string DebugLocStr = getDebugLocString(L);
#endif /* NDEBUG */
- LLVM_DEBUG(dbgs() << "\nLV: Checking a loop in \""
- << L->getHeader()->getParent()->getName() << "\" from "
+ LLVM_DEBUG(dbgs() << "\nLV: Checking a loop in '"
+ << L->getHeader()->getParent()->getName() << "' from "
<< DebugLocStr << "\n");
LoopVectorizeHints Hints(L, InterleaveOnlyWhenForced, *ORE, TTI);
@@ -10438,10 +10353,30 @@ bool LoopVectorizePass::processLoop(Loop *L) {
VectorizationFactor VF = VectorizationFactor::Disabled();
unsigned IC = 1;
+ GeneratedRTChecks Checks(*PSE.getSE(), DT, LI,
+ F->getParent()->getDataLayout());
if (MaybeVF) {
+ if (LVP.requiresTooManyRuntimeChecks()) {
+ ORE->emit([&]() {
+ return OptimizationRemarkAnalysisAliasing(
+ DEBUG_TYPE, "CantReorderMemOps", L->getStartLoc(),
+ L->getHeader())
+ << "loop not vectorized: cannot prove it is safe to reorder "
+ "memory operations";
+ });
+ LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
+ Hints.emitRemarkWithHints();
+ return false;
+ }
VF = *MaybeVF;
// Select the interleave count.
IC = CM.selectInterleaveCount(VF.Width, *VF.Cost.getValue());
+
+ unsigned SelectedIC = std::max(IC, UserIC);
+ // Optimistically generate runtime checks if they are needed. Drop them if
+ // they turn out to not be profitable.
+ if (VF.Width.isVector() || SelectedIC > 1)
+ Checks.Create(L, *LVL.getLAI(), PSE.getPredicate(), VF.Width, SelectedIC);
}
// Identify the diagnostic messages that should be produced.
@@ -10529,14 +10464,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
bool DisableRuntimeUnroll = false;
MDNode *OrigLoopID = L->getLoopID();
{
- // Optimistically generate runtime checks. Drop them if they turn out to not
- // be profitable. Limit the scope of Checks, so the cleanup happens
- // immediately after vector codegeneration is done.
- GeneratedRTChecks Checks(*PSE.getSE(), DT, LI,
- F->getParent()->getDataLayout());
- if (!VF.Width.isScalar() || IC > 1)
- Checks.Create(L, *LVL.getLAI(), PSE.getUnionPredicate());
-
using namespace ore;
if (!VectorizeLoop) {
assert(IC > 1 && "interleave count should not be 1 or 0");
@@ -10546,7 +10473,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
&CM, BFI, PSI, Checks);
VPlan &BestPlan = LVP.getBestPlanFor(VF.Width);
- LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT);
+ LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT, false);
ORE->emit([&]() {
return OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(),
@@ -10571,12 +10498,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
VPlan &BestMainPlan = LVP.getBestPlanFor(EPI.MainLoopVF);
LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF, BestMainPlan, MainILV,
- DT);
+ DT, true);
++LoopsVectorized;
- simplifyLoop(L, DT, LI, SE, AC, nullptr, false /* PreserveLCSSA */);
- formLCSSARecursively(*L, *DT, LI, SE);
-
// Second pass vectorizes the epilogue and adjusts the control flow
// edges from the first pass.
EPI.MainLoopVF = EPI.EpilogueVF;
@@ -10586,23 +10510,24 @@ bool LoopVectorizePass::processLoop(Loop *L) {
Checks);
VPlan &BestEpiPlan = LVP.getBestPlanFor(EPI.EpilogueVF);
+ VPRegionBlock *VectorLoop = BestEpiPlan.getVectorLoopRegion();
+ VPBasicBlock *Header = VectorLoop->getEntryBasicBlock();
+ Header->setName("vec.epilog.vector.body");
// Ensure that the start values for any VPReductionPHIRecipes are
// updated before vectorising the epilogue loop.
- VPBasicBlock *Header = BestEpiPlan.getEntry()->getEntryBasicBlock();
for (VPRecipeBase &R : Header->phis()) {
if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R)) {
if (auto *Resume = MainILV.getReductionResumeValue(
ReductionPhi->getRecurrenceDescriptor())) {
- VPValue *StartVal = new VPValue(Resume);
- BestEpiPlan.addExternalDef(StartVal);
+ VPValue *StartVal = BestEpiPlan.getOrAddExternalDef(Resume);
ReductionPhi->setOperand(0, StartVal);
}
}
}
LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV,
- DT);
+ DT, true);
++LoopsEpilogueVectorized;
if (!MainILV.areSafetyChecksAdded())
@@ -10612,7 +10537,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
&LVL, &CM, BFI, PSI, Checks);
VPlan &BestPlan = LVP.getBestPlanFor(VF.Width);
- LVP.executePlan(VF.Width, IC, BestPlan, LB, DT);
+ LVP.executePlan(VF.Width, IC, BestPlan, LB, DT, false);
++LoopsVectorized;
// Add metadata to disable runtime unrolling a scalar loop when there
@@ -10638,7 +10563,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
Optional<MDNode *> RemainderLoopID =
makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll,
LLVMLoopVectorizeFollowupEpilogue});
- if (RemainderLoopID.hasValue()) {
+ if (RemainderLoopID) {
L->setLoopID(RemainderLoopID.getValue());
} else {
if (DisableRuntimeUnroll)
@@ -10720,8 +10645,12 @@ LoopVectorizeResult LoopVectorizePass::runImpl(
PreservedAnalyses LoopVectorizePass::run(Function &F,
FunctionAnalysisManager &AM) {
- auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
auto &LI = AM.getResult<LoopAnalysis>(F);
+ // There are no loops in the function. Return before computing other expensive
+ // analyses.
+ if (LI.empty())
+ return PreservedAnalyses::all();
+ auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &BFI = AM.getResult<BlockFrequencyAnalysis>(F);
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 15b349f53fd9..019a09665a67 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -53,7 +53,6 @@
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
@@ -64,7 +63,6 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/NoFolder.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
@@ -72,8 +70,9 @@
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
+#ifdef EXPENSIVE_CHECKS
#include "llvm/IR/Verifier.h"
-#include "llvm/InitializePasses.h"
+#endif
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -87,6 +86,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/InjectTLIMappings.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Vectorize.h"
#include <algorithm>
@@ -164,13 +164,14 @@ static cl::opt<int> LookAheadMaxDepth(
"slp-max-look-ahead-depth", cl::init(2), cl::Hidden,
cl::desc("The maximum look-ahead depth for operand reordering scores"));
-// The Look-ahead heuristic goes through the users of the bundle to calculate
-// the users cost in getExternalUsesCost(). To avoid compilation time increase
-// we limit the number of users visited to this value.
-static cl::opt<unsigned> LookAheadUsersBudget(
- "slp-look-ahead-users-budget", cl::init(2), cl::Hidden,
- cl::desc("The maximum number of users to visit while visiting the "
- "predecessors. This prevents compilation time increase."));
+// The maximum depth that the look-ahead score heuristic will explore
+// when it probing among candidates for vectorization tree roots.
+// The higher this value, the higher the compilation time overhead but unlike
+// similar limit for operands ordering this is less frequently used, hence
+// impact of higher value is less noticeable.
+static cl::opt<int> RootLookAheadMaxDepth(
+ "slp-max-root-look-ahead-depth", cl::init(2), cl::Hidden,
+ cl::desc("The maximum look-ahead depth for searching best rooting option"));
static cl::opt<bool>
ViewSLPTree("view-slp-tree", cl::Hidden,
@@ -571,7 +572,7 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
areCompatibleCmpOps(AltOp0, AltOp1, Op1, Op0))
continue;
}
- if (BaseIndex == AltIndex) {
+ if (BaseIndex == AltIndex && BasePred != CurrentPred) {
assert(isValidForAlternation(Opcode) &&
isValidForAlternation(InstOpcode) &&
"Cast isn't safe for alternation, logic needs to be updated!");
@@ -640,7 +641,7 @@ static bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst,
CallInst *CI = cast<CallInst>(UserInst);
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
for (unsigned i = 0, e = CI->arg_size(); i != e; ++i) {
- if (hasVectorInstrinsicScalarOpd(ID, i))
+ if (isVectorIntrinsicWithScalarOpAtArg(ID, i))
return (CI->getArgOperand(i) == Scalar);
}
LLVM_FALLTHROUGH;
@@ -736,29 +737,28 @@ static void inversePermutation(ArrayRef<unsigned> Indices,
/// \returns inserting index of InsertElement or InsertValue instruction,
/// using Offset as base offset for index.
-static Optional<int> getInsertIndex(Value *InsertInst, unsigned Offset) {
+static Optional<unsigned> getInsertIndex(const Value *InsertInst,
+ unsigned Offset = 0) {
int Index = Offset;
- if (auto *IE = dyn_cast<InsertElementInst>(InsertInst)) {
- if (auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2))) {
+ if (const auto *IE = dyn_cast<InsertElementInst>(InsertInst)) {
+ if (const auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2))) {
auto *VT = cast<FixedVectorType>(IE->getType());
if (CI->getValue().uge(VT->getNumElements()))
- return UndefMaskElem;
+ return None;
Index *= VT->getNumElements();
Index += CI->getZExtValue();
return Index;
}
- if (isa<UndefValue>(IE->getOperand(2)))
- return UndefMaskElem;
return None;
}
- auto *IV = cast<InsertValueInst>(InsertInst);
+ const auto *IV = cast<InsertValueInst>(InsertInst);
Type *CurrentType = IV->getType();
for (unsigned I : IV->indices()) {
- if (auto *ST = dyn_cast<StructType>(CurrentType)) {
+ if (const auto *ST = dyn_cast<StructType>(CurrentType)) {
Index *= ST->getNumElements();
CurrentType = ST->getElementType(I);
- } else if (auto *AT = dyn_cast<ArrayType>(CurrentType)) {
+ } else if (const auto *AT = dyn_cast<ArrayType>(CurrentType)) {
Index *= AT->getNumElements();
CurrentType = AT->getElementType();
} else {
@@ -769,11 +769,7 @@ static Optional<int> getInsertIndex(Value *InsertInst, unsigned Offset) {
return Index;
}
-/// Reorders the list of scalars in accordance with the given \p Order and then
-/// the \p Mask. \p Order - is the original order of the scalars, need to
-/// reorder scalars into an unordered state at first according to the given
-/// order. Then the ordered scalars are shuffled once again in accordance with
-/// the provided mask.
+/// Reorders the list of scalars in accordance with the given \p Mask.
static void reorderScalars(SmallVectorImpl<Value *> &Scalars,
ArrayRef<int> Mask) {
assert(!Mask.empty() && "Expected non-empty mask.");
@@ -785,6 +781,58 @@ static void reorderScalars(SmallVectorImpl<Value *> &Scalars,
Scalars[Mask[I]] = Prev[I];
}
+/// Checks if the provided value does not require scheduling. It does not
+/// require scheduling if this is not an instruction or it is an instruction
+/// that does not read/write memory and all operands are either not instructions
+/// or phi nodes or instructions from different blocks.
+static bool areAllOperandsNonInsts(Value *V) {
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return true;
+ return !mayHaveNonDefUseDependency(*I) &&
+ all_of(I->operands(), [I](Value *V) {
+ auto *IO = dyn_cast<Instruction>(V);
+ if (!IO)
+ return true;
+ return isa<PHINode>(IO) || IO->getParent() != I->getParent();
+ });
+}
+
+/// Checks if the provided value does not require scheduling. It does not
+/// require scheduling if this is not an instruction or it is an instruction
+/// that does not read/write memory and all users are phi nodes or instructions
+/// from the different blocks.
+static bool isUsedOutsideBlock(Value *V) {
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return true;
+ // Limits the number of uses to save compile time.
+ constexpr int UsesLimit = 8;
+ return !I->mayReadOrWriteMemory() && !I->hasNUsesOrMore(UsesLimit) &&
+ all_of(I->users(), [I](User *U) {
+ auto *IU = dyn_cast<Instruction>(U);
+ if (!IU)
+ return true;
+ return IU->getParent() != I->getParent() || isa<PHINode>(IU);
+ });
+}
+
+/// Checks if the specified value does not require scheduling. It does not
+/// require scheduling if all operands and all users do not need to be scheduled
+/// in the current basic block.
+static bool doesNotNeedToBeScheduled(Value *V) {
+ return areAllOperandsNonInsts(V) && isUsedOutsideBlock(V);
+}
+
+/// Checks if the specified array of instructions does not require scheduling.
+/// It is so if all either instructions have operands that do not require
+/// scheduling or their users do not require scheduling since they are phis or
+/// in other basic blocks.
+static bool doesNotNeedToSchedule(ArrayRef<Value *> VL) {
+ return !VL.empty() &&
+ (all_of(VL, isUsedOutsideBlock) || all_of(VL, areAllOperandsNonInsts));
+}
+
namespace slpvectorizer {
/// Bottom Up SLP Vectorizer.
@@ -805,8 +853,8 @@ public:
TargetLibraryInfo *TLi, AAResults *Aa, LoopInfo *Li,
DominatorTree *Dt, AssumptionCache *AC, DemandedBits *DB,
const DataLayout *DL, OptimizationRemarkEmitter *ORE)
- : F(Func), SE(Se), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt), AC(AC),
- DB(DB), DL(DL), ORE(ORE), Builder(Se->getContext()) {
+ : BatchAA(*Aa), F(Func), SE(Se), TTI(Tti), TLI(TLi), LI(Li),
+ DT(Dt), AC(AC), DB(DB), DL(DL), ORE(ORE), Builder(Se->getContext()) {
CodeMetrics::collectEphemeralValues(F, AC, EphValues);
// Use the vector register size specified by the target unless overridden
// by a command-line option.
@@ -847,7 +895,10 @@ public:
/// Construct a vectorizable tree that starts at \p Roots, ignoring users for
/// the purpose of scheduling and extraction in the \p UserIgnoreLst.
void buildTree(ArrayRef<Value *> Roots,
- ArrayRef<Value *> UserIgnoreLst = None);
+ const SmallDenseSet<Value *> &UserIgnoreLst);
+
+ /// Construct a vectorizable tree that starts at \p Roots.
+ void buildTree(ArrayRef<Value *> Roots);
/// Builds external uses of the vectorized scalars, i.e. the list of
/// vectorized scalars to be extracted, their lanes and their scalar users. \p
@@ -868,6 +919,7 @@ public:
}
MinBWs.clear();
InstrElementSize.clear();
+ UserIgnoreList = nullptr;
}
unsigned getTreeSize() const { return VectorizableTree.size(); }
@@ -881,6 +933,9 @@ public:
/// ExtractElement, ExtractValue), which can be part of the graph.
Optional<OrdersType> findReusedOrderedScalars(const TreeEntry &TE);
+ /// Sort loads into increasing pointers offsets to allow greater clustering.
+ Optional<OrdersType> findPartiallyOrderedLoads(const TreeEntry &TE);
+
/// Gets reordering data for the given tree entry. If the entry is vectorized
/// - just return ReorderIndices, otherwise check if the scalars can be
/// reordered and return the most optimal order.
@@ -995,96 +1050,18 @@ public:
#endif
};
- /// A helper data structure to hold the operands of a vector of instructions.
- /// This supports a fixed vector length for all operand vectors.
- class VLOperands {
- /// For each operand we need (i) the value, and (ii) the opcode that it
- /// would be attached to if the expression was in a left-linearized form.
- /// This is required to avoid illegal operand reordering.
- /// For example:
- /// \verbatim
- /// 0 Op1
- /// |/
- /// Op1 Op2 Linearized + Op2
- /// \ / ----------> |/
- /// - -
- ///
- /// Op1 - Op2 (0 + Op1) - Op2
- /// \endverbatim
- ///
- /// Value Op1 is attached to a '+' operation, and Op2 to a '-'.
- ///
- /// Another way to think of this is to track all the operations across the
- /// path from the operand all the way to the root of the tree and to
- /// calculate the operation that corresponds to this path. For example, the
- /// path from Op2 to the root crosses the RHS of the '-', therefore the
- /// corresponding operation is a '-' (which matches the one in the
- /// linearized tree, as shown above).
- ///
- /// For lack of a better term, we refer to this operation as Accumulated
- /// Path Operation (APO).
- struct OperandData {
- OperandData() = default;
- OperandData(Value *V, bool APO, bool IsUsed)
- : V(V), APO(APO), IsUsed(IsUsed) {}
- /// The operand value.
- Value *V = nullptr;
- /// TreeEntries only allow a single opcode, or an alternate sequence of
- /// them (e.g, +, -). Therefore, we can safely use a boolean value for the
- /// APO. It is set to 'true' if 'V' is attached to an inverse operation
- /// in the left-linearized form (e.g., Sub/Div), and 'false' otherwise
- /// (e.g., Add/Mul)
- bool APO = false;
- /// Helper data for the reordering function.
- bool IsUsed = false;
- };
-
- /// During operand reordering, we are trying to select the operand at lane
- /// that matches best with the operand at the neighboring lane. Our
- /// selection is based on the type of value we are looking for. For example,
- /// if the neighboring lane has a load, we need to look for a load that is
- /// accessing a consecutive address. These strategies are summarized in the
- /// 'ReorderingMode' enumerator.
- enum class ReorderingMode {
- Load, ///< Matching loads to consecutive memory addresses
- Opcode, ///< Matching instructions based on opcode (same or alternate)
- Constant, ///< Matching constants
- Splat, ///< Matching the same instruction multiple times (broadcast)
- Failed, ///< We failed to create a vectorizable group
- };
-
- using OperandDataVec = SmallVector<OperandData, 2>;
-
- /// A vector of operand vectors.
- SmallVector<OperandDataVec, 4> OpsVec;
-
+ /// A helper class used for scoring candidates for two consecutive lanes.
+ class LookAheadHeuristics {
const DataLayout &DL;
ScalarEvolution &SE;
const BoUpSLP &R;
+ int NumLanes; // Total number of lanes (aka vectorization factor).
+ int MaxLevel; // The maximum recursion depth for accumulating score.
- /// \returns the operand data at \p OpIdx and \p Lane.
- OperandData &getData(unsigned OpIdx, unsigned Lane) {
- return OpsVec[OpIdx][Lane];
- }
-
- /// \returns the operand data at \p OpIdx and \p Lane. Const version.
- const OperandData &getData(unsigned OpIdx, unsigned Lane) const {
- return OpsVec[OpIdx][Lane];
- }
-
- /// Clears the used flag for all entries.
- void clearUsed() {
- for (unsigned OpIdx = 0, NumOperands = getNumOperands();
- OpIdx != NumOperands; ++OpIdx)
- for (unsigned Lane = 0, NumLanes = getNumLanes(); Lane != NumLanes;
- ++Lane)
- OpsVec[OpIdx][Lane].IsUsed = false;
- }
-
- /// Swap the operand at \p OpIdx1 with that one at \p OpIdx2.
- void swap(unsigned OpIdx1, unsigned OpIdx2, unsigned Lane) {
- std::swap(OpsVec[OpIdx1][Lane], OpsVec[OpIdx2][Lane]);
- }
+ public:
+ LookAheadHeuristics(const DataLayout &DL, ScalarEvolution &SE,
+ const BoUpSLP &R, int NumLanes, int MaxLevel)
+ : DL(DL), SE(SE), R(R), NumLanes(NumLanes), MaxLevel(MaxLevel) {}
// The hard-coded scores listed here are not very important, though it shall
// be higher for better matches to improve the resulting cost. When
@@ -1099,6 +1076,11 @@ public:
/// Loads from consecutive memory addresses, e.g. load(A[i]), load(A[i+1]).
static const int ScoreConsecutiveLoads = 4;
+ /// The same load multiple times. This should have a better score than
+ /// `ScoreSplat` because it in x86 for a 2-lane vector we can represent it
+ /// with `movddup (%reg), xmm0` which has a throughput of 0.5 versus 0.5 for
+ /// a vector load and 1.0 for a broadcast.
+ static const int ScoreSplatLoads = 3;
/// Loads from reversed memory addresses, e.g. load(A[i+1]), load(A[i]).
static const int ScoreReversedLoads = 3;
/// ExtractElementInst from same vector and consecutive indexes.
@@ -1117,43 +1099,67 @@ public:
static const int ScoreUndef = 1;
/// Score for failing to find a decent match.
static const int ScoreFail = 0;
- /// User exteranl to the vectorized code.
- static const int ExternalUseCost = 1;
- /// The user is internal but in a different lane.
- static const int UserInDiffLaneCost = ExternalUseCost;
+ /// Score if all users are vectorized.
+ static const int ScoreAllUserVectorized = 1;
/// \returns the score of placing \p V1 and \p V2 in consecutive lanes.
- static int getShallowScore(Value *V1, Value *V2, const DataLayout &DL,
- ScalarEvolution &SE, int NumLanes) {
- if (V1 == V2)
- return VLOperands::ScoreSplat;
+ /// \p U1 and \p U2 are the users of \p V1 and \p V2.
+ /// Also, checks if \p V1 and \p V2 are compatible with instructions in \p
+ /// MainAltOps.
+ int getShallowScore(Value *V1, Value *V2, Instruction *U1, Instruction *U2,
+ ArrayRef<Value *> MainAltOps) const {
+ if (V1 == V2) {
+ if (isa<LoadInst>(V1)) {
+ // Retruns true if the users of V1 and V2 won't need to be extracted.
+ auto AllUsersAreInternal = [U1, U2, this](Value *V1, Value *V2) {
+ // Bail out if we have too many uses to save compilation time.
+ static constexpr unsigned Limit = 8;
+ if (V1->hasNUsesOrMore(Limit) || V2->hasNUsesOrMore(Limit))
+ return false;
+
+ auto AllUsersVectorized = [U1, U2, this](Value *V) {
+ return llvm::all_of(V->users(), [U1, U2, this](Value *U) {
+ return U == U1 || U == U2 || R.getTreeEntry(U) != nullptr;
+ });
+ };
+ return AllUsersVectorized(V1) && AllUsersVectorized(V2);
+ };
+ // A broadcast of a load can be cheaper on some targets.
+ if (R.TTI->isLegalBroadcastLoad(V1->getType(),
+ ElementCount::getFixed(NumLanes)) &&
+ ((int)V1->getNumUses() == NumLanes ||
+ AllUsersAreInternal(V1, V2)))
+ return LookAheadHeuristics::ScoreSplatLoads;
+ }
+ return LookAheadHeuristics::ScoreSplat;
+ }
auto *LI1 = dyn_cast<LoadInst>(V1);
auto *LI2 = dyn_cast<LoadInst>(V2);
if (LI1 && LI2) {
if (LI1->getParent() != LI2->getParent())
- return VLOperands::ScoreFail;
+ return LookAheadHeuristics::ScoreFail;
Optional<int> Dist = getPointersDiff(
LI1->getType(), LI1->getPointerOperand(), LI2->getType(),
LI2->getPointerOperand(), DL, SE, /*StrictCheck=*/true);
- if (!Dist)
- return VLOperands::ScoreFail;
+ if (!Dist || *Dist == 0)
+ return LookAheadHeuristics::ScoreFail;
// The distance is too large - still may be profitable to use masked
// loads/gathers.
if (std::abs(*Dist) > NumLanes / 2)
- return VLOperands::ScoreAltOpcodes;
+ return LookAheadHeuristics::ScoreAltOpcodes;
// This still will detect consecutive loads, but we might have "holes"
// in some cases. It is ok for non-power-2 vectorization and may produce
// better results. It should not affect current vectorization.
- return (*Dist > 0) ? VLOperands::ScoreConsecutiveLoads
- : VLOperands::ScoreReversedLoads;
+ return (*Dist > 0) ? LookAheadHeuristics::ScoreConsecutiveLoads
+ : LookAheadHeuristics::ScoreReversedLoads;
}
auto *C1 = dyn_cast<Constant>(V1);
auto *C2 = dyn_cast<Constant>(V2);
if (C1 && C2)
- return VLOperands::ScoreConstants;
+ return LookAheadHeuristics::ScoreConstants;
// Extracts from consecutive indexes of the same vector better score as
// the extracts could be optimized away.
@@ -1162,7 +1168,7 @@ public:
if (match(V1, m_ExtractElt(m_Value(EV1), m_ConstantInt(Ex1Idx)))) {
// Undefs are always profitable for extractelements.
if (isa<UndefValue>(V2))
- return VLOperands::ScoreConsecutiveExtracts;
+ return LookAheadHeuristics::ScoreConsecutiveExtracts;
Value *EV2 = nullptr;
ConstantInt *Ex2Idx = nullptr;
if (match(V2,
@@ -1170,108 +1176,62 @@ public:
m_Undef())))) {
// Undefs are always profitable for extractelements.
if (!Ex2Idx)
- return VLOperands::ScoreConsecutiveExtracts;
+ return LookAheadHeuristics::ScoreConsecutiveExtracts;
if (isUndefVector(EV2) && EV2->getType() == EV1->getType())
- return VLOperands::ScoreConsecutiveExtracts;
+ return LookAheadHeuristics::ScoreConsecutiveExtracts;
if (EV2 == EV1) {
int Idx1 = Ex1Idx->getZExtValue();
int Idx2 = Ex2Idx->getZExtValue();
int Dist = Idx2 - Idx1;
// The distance is too large - still may be profitable to use
// shuffles.
+ if (std::abs(Dist) == 0)
+ return LookAheadHeuristics::ScoreSplat;
if (std::abs(Dist) > NumLanes / 2)
- return VLOperands::ScoreAltOpcodes;
- return (Dist > 0) ? VLOperands::ScoreConsecutiveExtracts
- : VLOperands::ScoreReversedExtracts;
+ return LookAheadHeuristics::ScoreSameOpcode;
+ return (Dist > 0) ? LookAheadHeuristics::ScoreConsecutiveExtracts
+ : LookAheadHeuristics::ScoreReversedExtracts;
}
+ return LookAheadHeuristics::ScoreAltOpcodes;
}
+ return LookAheadHeuristics::ScoreFail;
}
auto *I1 = dyn_cast<Instruction>(V1);
auto *I2 = dyn_cast<Instruction>(V2);
if (I1 && I2) {
if (I1->getParent() != I2->getParent())
- return VLOperands::ScoreFail;
- InstructionsState S = getSameOpcode({I1, I2});
+ return LookAheadHeuristics::ScoreFail;
+ SmallVector<Value *, 4> Ops(MainAltOps.begin(), MainAltOps.end());
+ Ops.push_back(I1);
+ Ops.push_back(I2);
+ InstructionsState S = getSameOpcode(Ops);
// Note: Only consider instructions with <= 2 operands to avoid
// complexity explosion.
- if (S.getOpcode() && S.MainOp->getNumOperands() <= 2)
- return S.isAltShuffle() ? VLOperands::ScoreAltOpcodes
- : VLOperands::ScoreSameOpcode;
+ if (S.getOpcode() &&
+ (S.MainOp->getNumOperands() <= 2 || !MainAltOps.empty() ||
+ !S.isAltShuffle()) &&
+ all_of(Ops, [&S](Value *V) {
+ return cast<Instruction>(V)->getNumOperands() ==
+ S.MainOp->getNumOperands();
+ }))
+ return S.isAltShuffle() ? LookAheadHeuristics::ScoreAltOpcodes
+ : LookAheadHeuristics::ScoreSameOpcode;
}
if (isa<UndefValue>(V2))
- return VLOperands::ScoreUndef;
-
- return VLOperands::ScoreFail;
- }
-
- /// Holds the values and their lanes that are taking part in the look-ahead
- /// score calculation. This is used in the external uses cost calculation.
- /// Need to hold all the lanes in case of splat/broadcast at least to
- /// correctly check for the use in the different lane.
- SmallDenseMap<Value *, SmallSet<int, 4>> InLookAheadValues;
-
- /// \returns the additional cost due to uses of \p LHS and \p RHS that are
- /// either external to the vectorized code, or require shuffling.
- int getExternalUsesCost(const std::pair<Value *, int> &LHS,
- const std::pair<Value *, int> &RHS) {
- int Cost = 0;
- std::array<std::pair<Value *, int>, 2> Values = {{LHS, RHS}};
- for (int Idx = 0, IdxE = Values.size(); Idx != IdxE; ++Idx) {
- Value *V = Values[Idx].first;
- if (isa<Constant>(V)) {
- // Since this is a function pass, it doesn't make semantic sense to
- // walk the users of a subclass of Constant. The users could be in
- // another function, or even another module that happens to be in
- // the same LLVMContext.
- continue;
- }
+ return LookAheadHeuristics::ScoreUndef;
- // Calculate the absolute lane, using the minimum relative lane of LHS
- // and RHS as base and Idx as the offset.
- int Ln = std::min(LHS.second, RHS.second) + Idx;
- assert(Ln >= 0 && "Bad lane calculation");
- unsigned UsersBudget = LookAheadUsersBudget;
- for (User *U : V->users()) {
- if (const TreeEntry *UserTE = R.getTreeEntry(U)) {
- // The user is in the VectorizableTree. Check if we need to insert.
- int UserLn = UserTE->findLaneForValue(U);
- assert(UserLn >= 0 && "Bad lane");
- // If the values are different, check just the line of the current
- // value. If the values are the same, need to add UserInDiffLaneCost
- // only if UserLn does not match both line numbers.
- if ((LHS.first != RHS.first && UserLn != Ln) ||
- (LHS.first == RHS.first && UserLn != LHS.second &&
- UserLn != RHS.second)) {
- Cost += UserInDiffLaneCost;
- break;
- }
- } else {
- // Check if the user is in the look-ahead code.
- auto It2 = InLookAheadValues.find(U);
- if (It2 != InLookAheadValues.end()) {
- // The user is in the look-ahead code. Check the lane.
- if (!It2->getSecond().contains(Ln)) {
- Cost += UserInDiffLaneCost;
- break;
- }
- } else {
- // The user is neither in SLP tree nor in the look-ahead code.
- Cost += ExternalUseCost;
- break;
- }
- }
- // Limit the number of visited uses to cap compilation time.
- if (--UsersBudget == 0)
- break;
- }
- }
- return Cost;
+ return LookAheadHeuristics::ScoreFail;
}
- /// Go through the operands of \p LHS and \p RHS recursively until \p
- /// MaxLevel, and return the cummulative score. For example:
+ /// Go through the operands of \p LHS and \p RHS recursively until
+ /// MaxLevel, and return the cummulative score. \p U1 and \p U2 are
+ /// the users of \p LHS and \p RHS (that is \p LHS and \p RHS are operands
+ /// of \p U1 and \p U2), except at the beginning of the recursion where
+ /// these are set to nullptr.
+ ///
+ /// For example:
/// \verbatim
/// A[0] B[0] A[1] B[1] C[0] D[0] B[1] A[1]
/// \ / \ / \ / \ /
@@ -1282,8 +1242,8 @@ public:
/// each level recursively, accumulating the score. It starts from matching
/// the additions at level 0, then moves on to the loads (level 1). The
/// score of G1 and G2 is higher than G1 and G3, because {A[0],A[1]} and
- /// {B[0],B[1]} match with VLOperands::ScoreConsecutiveLoads, while
- /// {A[0],C[0]} has a score of VLOperands::ScoreFail.
+ /// {B[0],B[1]} match with LookAheadHeuristics::ScoreConsecutiveLoads, while
+ /// {A[0],C[0]} has a score of LookAheadHeuristics::ScoreFail.
/// Please note that the order of the operands does not matter, as we
/// evaluate the score of all profitable combinations of operands. In
/// other words the score of G1 and G4 is the same as G1 and G2. This
@@ -1291,18 +1251,13 @@ public:
/// Look-ahead SLP: Auto-vectorization in the presence of commutative
/// operations, CGO 2018 by Vasileios Porpodas, Rodrigo C. O. Rocha,
/// Luís F. W. Góes
- int getScoreAtLevelRec(const std::pair<Value *, int> &LHS,
- const std::pair<Value *, int> &RHS, int CurrLevel,
- int MaxLevel) {
+ int getScoreAtLevelRec(Value *LHS, Value *RHS, Instruction *U1,
+ Instruction *U2, int CurrLevel,
+ ArrayRef<Value *> MainAltOps) const {
- Value *V1 = LHS.first;
- Value *V2 = RHS.first;
// Get the shallow score of V1 and V2.
- int ShallowScoreAtThisLevel = std::max(
- (int)ScoreFail, getShallowScore(V1, V2, DL, SE, getNumLanes()) -
- getExternalUsesCost(LHS, RHS));
- int Lane1 = LHS.second;
- int Lane2 = RHS.second;
+ int ShallowScoreAtThisLevel =
+ getShallowScore(LHS, RHS, U1, U2, MainAltOps);
// If reached MaxLevel,
// or if V1 and V2 are not instructions,
@@ -1310,20 +1265,17 @@ public:
// or if they are not consecutive,
// or if profitable to vectorize loads or extractelements, early return
// the current cost.
- auto *I1 = dyn_cast<Instruction>(V1);
- auto *I2 = dyn_cast<Instruction>(V2);
+ auto *I1 = dyn_cast<Instruction>(LHS);
+ auto *I2 = dyn_cast<Instruction>(RHS);
if (CurrLevel == MaxLevel || !(I1 && I2) || I1 == I2 ||
- ShallowScoreAtThisLevel == VLOperands::ScoreFail ||
+ ShallowScoreAtThisLevel == LookAheadHeuristics::ScoreFail ||
(((isa<LoadInst>(I1) && isa<LoadInst>(I2)) ||
+ (I1->getNumOperands() > 2 && I2->getNumOperands() > 2) ||
(isa<ExtractElementInst>(I1) && isa<ExtractElementInst>(I2))) &&
ShallowScoreAtThisLevel))
return ShallowScoreAtThisLevel;
assert(I1 && I2 && "Should have early exited.");
- // Keep track of in-tree values for determining the external-use cost.
- InLookAheadValues[V1].insert(Lane1);
- InLookAheadValues[V2].insert(Lane2);
-
// Contains the I2 operand indexes that got matched with I1 operands.
SmallSet<unsigned, 4> Op2Used;
@@ -1346,11 +1298,12 @@ public:
if (Op2Used.count(OpIdx2))
continue;
// Recursively calculate the cost at each level
- int TmpScore = getScoreAtLevelRec({I1->getOperand(OpIdx1), Lane1},
- {I2->getOperand(OpIdx2), Lane2},
- CurrLevel + 1, MaxLevel);
+ int TmpScore =
+ getScoreAtLevelRec(I1->getOperand(OpIdx1), I2->getOperand(OpIdx2),
+ I1, I2, CurrLevel + 1, None);
// Look for the best score.
- if (TmpScore > VLOperands::ScoreFail && TmpScore > MaxTmpScore) {
+ if (TmpScore > LookAheadHeuristics::ScoreFail &&
+ TmpScore > MaxTmpScore) {
MaxTmpScore = TmpScore;
MaxOpIdx2 = OpIdx2;
FoundBest = true;
@@ -1364,24 +1317,213 @@ public:
}
return ShallowScoreAtThisLevel;
}
+ };
+ /// A helper data structure to hold the operands of a vector of instructions.
+ /// This supports a fixed vector length for all operand vectors.
+ class VLOperands {
+ /// For each operand we need (i) the value, and (ii) the opcode that it
+ /// would be attached to if the expression was in a left-linearized form.
+ /// This is required to avoid illegal operand reordering.
+ /// For example:
+ /// \verbatim
+ /// 0 Op1
+ /// |/
+ /// Op1 Op2 Linearized + Op2
+ /// \ / ----------> |/
+ /// - -
+ ///
+ /// Op1 - Op2 (0 + Op1) - Op2
+ /// \endverbatim
+ ///
+ /// Value Op1 is attached to a '+' operation, and Op2 to a '-'.
+ ///
+ /// Another way to think of this is to track all the operations across the
+ /// path from the operand all the way to the root of the tree and to
+ /// calculate the operation that corresponds to this path. For example, the
+ /// path from Op2 to the root crosses the RHS of the '-', therefore the
+ /// corresponding operation is a '-' (which matches the one in the
+ /// linearized tree, as shown above).
+ ///
+ /// For lack of a better term, we refer to this operation as Accumulated
+ /// Path Operation (APO).
+ struct OperandData {
+ OperandData() = default;
+ OperandData(Value *V, bool APO, bool IsUsed)
+ : V(V), APO(APO), IsUsed(IsUsed) {}
+ /// The operand value.
+ Value *V = nullptr;
+ /// TreeEntries only allow a single opcode, or an alternate sequence of
+ /// them (e.g, +, -). Therefore, we can safely use a boolean value for the
+ /// APO. It is set to 'true' if 'V' is attached to an inverse operation
+ /// in the left-linearized form (e.g., Sub/Div), and 'false' otherwise
+ /// (e.g., Add/Mul)
+ bool APO = false;
+ /// Helper data for the reordering function.
+ bool IsUsed = false;
+ };
+
+ /// During operand reordering, we are trying to select the operand at lane
+ /// that matches best with the operand at the neighboring lane. Our
+ /// selection is based on the type of value we are looking for. For example,
+ /// if the neighboring lane has a load, we need to look for a load that is
+ /// accessing a consecutive address. These strategies are summarized in the
+ /// 'ReorderingMode' enumerator.
+ enum class ReorderingMode {
+ Load, ///< Matching loads to consecutive memory addresses
+ Opcode, ///< Matching instructions based on opcode (same or alternate)
+ Constant, ///< Matching constants
+ Splat, ///< Matching the same instruction multiple times (broadcast)
+ Failed, ///< We failed to create a vectorizable group
+ };
+
+ using OperandDataVec = SmallVector<OperandData, 2>;
+
+ /// A vector of operand vectors.
+ SmallVector<OperandDataVec, 4> OpsVec;
+
+ const DataLayout &DL;
+ ScalarEvolution &SE;
+ const BoUpSLP &R;
+
+ /// \returns the operand data at \p OpIdx and \p Lane.
+ OperandData &getData(unsigned OpIdx, unsigned Lane) {
+ return OpsVec[OpIdx][Lane];
+ }
+
+ /// \returns the operand data at \p OpIdx and \p Lane. Const version.
+ const OperandData &getData(unsigned OpIdx, unsigned Lane) const {
+ return OpsVec[OpIdx][Lane];
+ }
+
+ /// Clears the used flag for all entries.
+ void clearUsed() {
+ for (unsigned OpIdx = 0, NumOperands = getNumOperands();
+ OpIdx != NumOperands; ++OpIdx)
+ for (unsigned Lane = 0, NumLanes = getNumLanes(); Lane != NumLanes;
+ ++Lane)
+ OpsVec[OpIdx][Lane].IsUsed = false;
+ }
+
+ /// Swap the operand at \p OpIdx1 with that one at \p OpIdx2.
+ void swap(unsigned OpIdx1, unsigned OpIdx2, unsigned Lane) {
+ std::swap(OpsVec[OpIdx1][Lane], OpsVec[OpIdx2][Lane]);
+ }
+
+ /// \param Lane lane of the operands under analysis.
+ /// \param OpIdx operand index in \p Lane lane we're looking the best
+ /// candidate for.
+ /// \param Idx operand index of the current candidate value.
+ /// \returns The additional score due to possible broadcasting of the
+ /// elements in the lane. It is more profitable to have power-of-2 unique
+ /// elements in the lane, it will be vectorized with higher probability
+ /// after removing duplicates. Currently the SLP vectorizer supports only
+ /// vectorization of the power-of-2 number of unique scalars.
+ int getSplatScore(unsigned Lane, unsigned OpIdx, unsigned Idx) const {
+ Value *IdxLaneV = getData(Idx, Lane).V;
+ if (!isa<Instruction>(IdxLaneV) || IdxLaneV == getData(OpIdx, Lane).V)
+ return 0;
+ SmallPtrSet<Value *, 4> Uniques;
+ for (unsigned Ln = 0, E = getNumLanes(); Ln < E; ++Ln) {
+ if (Ln == Lane)
+ continue;
+ Value *OpIdxLnV = getData(OpIdx, Ln).V;
+ if (!isa<Instruction>(OpIdxLnV))
+ return 0;
+ Uniques.insert(OpIdxLnV);
+ }
+ int UniquesCount = Uniques.size();
+ int UniquesCntWithIdxLaneV =
+ Uniques.contains(IdxLaneV) ? UniquesCount : UniquesCount + 1;
+ Value *OpIdxLaneV = getData(OpIdx, Lane).V;
+ int UniquesCntWithOpIdxLaneV =
+ Uniques.contains(OpIdxLaneV) ? UniquesCount : UniquesCount + 1;
+ if (UniquesCntWithIdxLaneV == UniquesCntWithOpIdxLaneV)
+ return 0;
+ return (PowerOf2Ceil(UniquesCntWithOpIdxLaneV) -
+ UniquesCntWithOpIdxLaneV) -
+ (PowerOf2Ceil(UniquesCntWithIdxLaneV) - UniquesCntWithIdxLaneV);
+ }
+
+ /// \param Lane lane of the operands under analysis.
+ /// \param OpIdx operand index in \p Lane lane we're looking the best
+ /// candidate for.
+ /// \param Idx operand index of the current candidate value.
+ /// \returns The additional score for the scalar which users are all
+ /// vectorized.
+ int getExternalUseScore(unsigned Lane, unsigned OpIdx, unsigned Idx) const {
+ Value *IdxLaneV = getData(Idx, Lane).V;
+ Value *OpIdxLaneV = getData(OpIdx, Lane).V;
+ // Do not care about number of uses for vector-like instructions
+ // (extractelement/extractvalue with constant indices), they are extracts
+ // themselves and already externally used. Vectorization of such
+ // instructions does not add extra extractelement instruction, just may
+ // remove it.
+ if (isVectorLikeInstWithConstOps(IdxLaneV) &&
+ isVectorLikeInstWithConstOps(OpIdxLaneV))
+ return LookAheadHeuristics::ScoreAllUserVectorized;
+ auto *IdxLaneI = dyn_cast<Instruction>(IdxLaneV);
+ if (!IdxLaneI || !isa<Instruction>(OpIdxLaneV))
+ return 0;
+ return R.areAllUsersVectorized(IdxLaneI, None)
+ ? LookAheadHeuristics::ScoreAllUserVectorized
+ : 0;
+ }
+
+ /// Score scaling factor for fully compatible instructions but with
+ /// different number of external uses. Allows better selection of the
+ /// instructions with less external uses.
+ static const int ScoreScaleFactor = 10;
/// \Returns the look-ahead score, which tells us how much the sub-trees
/// rooted at \p LHS and \p RHS match, the more they match the higher the
/// score. This helps break ties in an informed way when we cannot decide on
/// the order of the operands by just considering the immediate
/// predecessors.
- int getLookAheadScore(const std::pair<Value *, int> &LHS,
- const std::pair<Value *, int> &RHS) {
- InLookAheadValues.clear();
- return getScoreAtLevelRec(LHS, RHS, 1, LookAheadMaxDepth);
+ int getLookAheadScore(Value *LHS, Value *RHS, ArrayRef<Value *> MainAltOps,
+ int Lane, unsigned OpIdx, unsigned Idx,
+ bool &IsUsed) {
+ LookAheadHeuristics LookAhead(DL, SE, R, getNumLanes(),
+ LookAheadMaxDepth);
+ // Keep track of the instruction stack as we recurse into the operands
+ // during the look-ahead score exploration.
+ int Score =
+ LookAhead.getScoreAtLevelRec(LHS, RHS, /*U1=*/nullptr, /*U2=*/nullptr,
+ /*CurrLevel=*/1, MainAltOps);
+ if (Score) {
+ int SplatScore = getSplatScore(Lane, OpIdx, Idx);
+ if (Score <= -SplatScore) {
+ // Set the minimum score for splat-like sequence to avoid setting
+ // failed state.
+ Score = 1;
+ } else {
+ Score += SplatScore;
+ // Scale score to see the difference between different operands
+ // and similar operands but all vectorized/not all vectorized
+ // uses. It does not affect actual selection of the best
+ // compatible operand in general, just allows to select the
+ // operand with all vectorized uses.
+ Score *= ScoreScaleFactor;
+ Score += getExternalUseScore(Lane, OpIdx, Idx);
+ IsUsed = true;
+ }
+ }
+ return Score;
}
+ /// Best defined scores per lanes between the passes. Used to choose the
+ /// best operand (with the highest score) between the passes.
+ /// The key - {Operand Index, Lane}.
+ /// The value - the best score between the passes for the lane and the
+ /// operand.
+ SmallDenseMap<std::pair<unsigned, unsigned>, unsigned, 8>
+ BestScoresPerLanes;
+
// Search all operands in Ops[*][Lane] for the one that matches best
// Ops[OpIdx][LastLane] and return its opreand index.
// If no good match can be found, return None.
- Optional<unsigned>
- getBestOperand(unsigned OpIdx, int Lane, int LastLane,
- ArrayRef<ReorderingMode> ReorderingModes) {
+ Optional<unsigned> getBestOperand(unsigned OpIdx, int Lane, int LastLane,
+ ArrayRef<ReorderingMode> ReorderingModes,
+ ArrayRef<Value *> MainAltOps) {
unsigned NumOperands = getNumOperands();
// The operand of the previous lane at OpIdx.
@@ -1389,6 +1531,8 @@ public:
// Our strategy mode for OpIdx.
ReorderingMode RMode = ReorderingModes[OpIdx];
+ if (RMode == ReorderingMode::Failed)
+ return None;
// The linearized opcode of the operand at OpIdx, Lane.
bool OpIdxAPO = getData(OpIdx, Lane).APO;
@@ -1400,7 +1544,15 @@ public:
Optional<unsigned> Idx = None;
unsigned Score = 0;
} BestOp;
-
+ BestOp.Score =
+ BestScoresPerLanes.try_emplace(std::make_pair(OpIdx, Lane), 0)
+ .first->second;
+
+ // Track if the operand must be marked as used. If the operand is set to
+ // Score 1 explicitly (because of non power-of-2 unique scalars, we may
+ // want to reestimate the operands again on the following iterations).
+ bool IsUsed =
+ RMode == ReorderingMode::Splat || RMode == ReorderingMode::Constant;
// Iterate through all unused operands and look for the best.
for (unsigned Idx = 0; Idx != NumOperands; ++Idx) {
// Get the operand at Idx and Lane.
@@ -1426,11 +1578,12 @@ public:
bool LeftToRight = Lane > LastLane;
Value *OpLeft = (LeftToRight) ? OpLastLane : Op;
Value *OpRight = (LeftToRight) ? Op : OpLastLane;
- unsigned Score =
- getLookAheadScore({OpLeft, LastLane}, {OpRight, Lane});
- if (Score > BestOp.Score) {
+ int Score = getLookAheadScore(OpLeft, OpRight, MainAltOps, Lane,
+ OpIdx, Idx, IsUsed);
+ if (Score > static_cast<int>(BestOp.Score)) {
BestOp.Idx = Idx;
BestOp.Score = Score;
+ BestScoresPerLanes[std::make_pair(OpIdx, Lane)] = Score;
}
break;
}
@@ -1439,12 +1592,12 @@ public:
BestOp.Idx = Idx;
break;
case ReorderingMode::Failed:
- return None;
+ llvm_unreachable("Not expected Failed reordering mode.");
}
}
if (BestOp.Idx) {
- getData(BestOp.Idx.getValue(), Lane).IsUsed = true;
+ getData(*BestOp.Idx, Lane).IsUsed = IsUsed;
return BestOp.Idx;
}
// If we could not find a good match return None.
@@ -1761,6 +1914,10 @@ public:
// rest of the lanes. We are visiting the nodes in a circular fashion,
// using FirstLane as the center point and increasing the radius
// distance.
+ SmallVector<SmallVector<Value *, 2>> MainAltOps(NumOperands);
+ for (unsigned I = 0; I < NumOperands; ++I)
+ MainAltOps[I].push_back(getData(I, FirstLane).V);
+
for (unsigned Distance = 1; Distance != NumLanes; ++Distance) {
// Visit the lane on the right and then the lane on the left.
for (int Direction : {+1, -1}) {
@@ -1773,21 +1930,29 @@ public:
// Look for a good match for each operand.
for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
// Search for the operand that matches SortedOps[OpIdx][Lane-1].
- Optional<unsigned> BestIdx =
- getBestOperand(OpIdx, Lane, LastLane, ReorderingModes);
+ Optional<unsigned> BestIdx = getBestOperand(
+ OpIdx, Lane, LastLane, ReorderingModes, MainAltOps[OpIdx]);
// By not selecting a value, we allow the operands that follow to
// select a better matching value. We will get a non-null value in
// the next run of getBestOperand().
if (BestIdx) {
// Swap the current operand with the one returned by
// getBestOperand().
- swap(OpIdx, BestIdx.getValue(), Lane);
+ swap(OpIdx, *BestIdx, Lane);
} else {
// We failed to find a best operand, set mode to 'Failed'.
ReorderingModes[OpIdx] = ReorderingMode::Failed;
// Enable the second pass.
StrategyFailed = true;
}
+ // Try to get the alternate opcode and follow it during analysis.
+ if (MainAltOps[OpIdx].size() != 2) {
+ OperandData &AltOp = getData(OpIdx, Lane);
+ InstructionsState OpS =
+ getSameOpcode({MainAltOps[OpIdx].front(), AltOp.V});
+ if (OpS.getOpcode() && OpS.isAltShuffle())
+ MainAltOps[OpIdx].push_back(AltOp.V);
+ }
}
}
}
@@ -1851,15 +2016,109 @@ public:
#endif
};
+ /// Evaluate each pair in \p Candidates and return index into \p Candidates
+ /// for a pair which have highest score deemed to have best chance to form
+ /// root of profitable tree to vectorize. Return None if no candidate scored
+ /// above the LookAheadHeuristics::ScoreFail.
+ /// \param Limit Lower limit of the cost, considered to be good enough score.
+ Optional<int>
+ findBestRootPair(ArrayRef<std::pair<Value *, Value *>> Candidates,
+ int Limit = LookAheadHeuristics::ScoreFail) {
+ LookAheadHeuristics LookAhead(*DL, *SE, *this, /*NumLanes=*/2,
+ RootLookAheadMaxDepth);
+ int BestScore = Limit;
+ Optional<int> Index = None;
+ for (int I : seq<int>(0, Candidates.size())) {
+ int Score = LookAhead.getScoreAtLevelRec(Candidates[I].first,
+ Candidates[I].second,
+ /*U1=*/nullptr, /*U2=*/nullptr,
+ /*Level=*/1, None);
+ if (Score > BestScore) {
+ BestScore = Score;
+ Index = I;
+ }
+ }
+ return Index;
+ }
+
/// Checks if the instruction is marked for deletion.
bool isDeleted(Instruction *I) const { return DeletedInstructions.count(I); }
- /// Marks values operands for later deletion by replacing them with Undefs.
- void eraseInstructions(ArrayRef<Value *> AV);
+ /// Removes an instruction from its block and eventually deletes it.
+ /// It's like Instruction::eraseFromParent() except that the actual deletion
+ /// is delayed until BoUpSLP is destructed.
+ void eraseInstruction(Instruction *I) {
+ DeletedInstructions.insert(I);
+ }
+
+ /// Checks if the instruction was already analyzed for being possible
+ /// reduction root.
+ bool isAnalyzedReductionRoot(Instruction *I) const {
+ return AnalyzedReductionsRoots.count(I);
+ }
+ /// Register given instruction as already analyzed for being possible
+ /// reduction root.
+ void analyzedReductionRoot(Instruction *I) {
+ AnalyzedReductionsRoots.insert(I);
+ }
+ /// Checks if the provided list of reduced values was checked already for
+ /// vectorization.
+ bool areAnalyzedReductionVals(ArrayRef<Value *> VL) {
+ return AnalyzedReductionVals.contains(hash_value(VL));
+ }
+ /// Adds the list of reduced values to list of already checked values for the
+ /// vectorization.
+ void analyzedReductionVals(ArrayRef<Value *> VL) {
+ AnalyzedReductionVals.insert(hash_value(VL));
+ }
+ /// Clear the list of the analyzed reduction root instructions.
+ void clearReductionData() {
+ AnalyzedReductionsRoots.clear();
+ AnalyzedReductionVals.clear();
+ }
+ /// Checks if the given value is gathered in one of the nodes.
+ bool isAnyGathered(const SmallDenseSet<Value *> &Vals) const {
+ return any_of(MustGather, [&](Value *V) { return Vals.contains(V); });
+ }
~BoUpSLP();
private:
+ /// Check if the operands on the edges \p Edges of the \p UserTE allows
+ /// reordering (i.e. the operands can be reordered because they have only one
+ /// user and reordarable).
+ /// \param ReorderableGathers List of all gather nodes that require reordering
+ /// (e.g., gather of extractlements or partially vectorizable loads).
+ /// \param GatherOps List of gather operand nodes for \p UserTE that require
+ /// reordering, subset of \p NonVectorized.
+ bool
+ canReorderOperands(TreeEntry *UserTE,
+ SmallVectorImpl<std::pair<unsigned, TreeEntry *>> &Edges,
+ ArrayRef<TreeEntry *> ReorderableGathers,
+ SmallVectorImpl<TreeEntry *> &GatherOps);
+
+ /// Returns vectorized operand \p OpIdx of the node \p UserTE from the graph,
+ /// if any. If it is not vectorized (gather node), returns nullptr.
+ TreeEntry *getVectorizedOperand(TreeEntry *UserTE, unsigned OpIdx) {
+ ArrayRef<Value *> VL = UserTE->getOperand(OpIdx);
+ TreeEntry *TE = nullptr;
+ const auto *It = find_if(VL, [this, &TE](Value *V) {
+ TE = getTreeEntry(V);
+ return TE;
+ });
+ if (It != VL.end() && TE->isSame(VL))
+ return TE;
+ return nullptr;
+ }
+
+ /// Returns vectorized operand \p OpIdx of the node \p UserTE from the graph,
+ /// if any. If it is not vectorized (gather node), returns nullptr.
+ const TreeEntry *getVectorizedOperand(const TreeEntry *UserTE,
+ unsigned OpIdx) const {
+ return const_cast<BoUpSLP *>(this)->getVectorizedOperand(
+ const_cast<TreeEntry *>(UserTE), OpIdx);
+ }
+
/// Checks if all users of \p I are the part of the vectorization tree.
bool areAllUsersVectorized(Instruction *I,
ArrayRef<Value *> VectorizedVals) const;
@@ -1886,12 +2145,17 @@ private:
/// Vectorize a single entry in the tree, starting in \p VL.
Value *vectorizeTree(ArrayRef<Value *> VL);
+ /// Create a new vector from a list of scalar values. Produces a sequence
+ /// which exploits values reused across lanes, and arranges the inserts
+ /// for ease of later optimization.
+ Value *createBuildVector(ArrayRef<Value *> VL);
+
/// \returns the scalarization cost for this type. Scalarization in this
/// context means the creation of vectors from a group of scalars. If \p
/// NeedToShuffle is true, need to add a cost of reshuffling some of the
/// vector elements.
InstructionCost getGatherCost(FixedVectorType *Ty,
- const DenseSet<unsigned> &ShuffledIndices,
+ const APInt &ShuffledIndices,
bool NeedToShuffle) const;
/// Checks if the gathered \p VL can be represented as shuffle(s) of previous
@@ -1926,6 +2190,29 @@ private:
const DataLayout &DL,
ScalarEvolution &SE,
const BoUpSLP &R);
+
+ /// Helper for `findExternalStoreUsersReorderIndices()`. It iterates over the
+ /// users of \p TE and collects the stores. It returns the map from the store
+ /// pointers to the collected stores.
+ DenseMap<Value *, SmallVector<StoreInst *, 4>>
+ collectUserStores(const BoUpSLP::TreeEntry *TE) const;
+
+ /// Helper for `findExternalStoreUsersReorderIndices()`. It checks if the
+ /// stores in \p StoresVec can for a vector instruction. If so it returns true
+ /// and populates \p ReorderIndices with the shuffle indices of the the stores
+ /// when compared to the sorted vector.
+ bool CanFormVector(const SmallVector<StoreInst *, 4> &StoresVec,
+ OrdersType &ReorderIndices) const;
+
+ /// Iterates through the users of \p TE, looking for scalar stores that can be
+ /// potentially vectorized in a future SLP-tree. If found, it keeps track of
+ /// their order and builds an order index vector for each store bundle. It
+ /// returns all these order vectors found.
+ /// We run this after the tree has formed, otherwise we may come across user
+ /// instructions that are not yet in the tree.
+ SmallVector<OrdersType, 1>
+ findExternalStoreUsersReorderIndices(TreeEntry *TE) const;
+
struct TreeEntry {
using VecTreeTy = SmallVector<std::unique_ptr<TreeEntry>, 8>;
TreeEntry(VecTreeTy &Container) : Container(Container) {}
@@ -2270,15 +2557,21 @@ private:
ScalarToTreeEntry[V] = Last;
}
// Update the scheduler bundle to point to this TreeEntry.
- unsigned Lane = 0;
- for (ScheduleData *BundleMember = Bundle.getValue(); BundleMember;
- BundleMember = BundleMember->NextInBundle) {
- BundleMember->TE = Last;
- BundleMember->Lane = Lane;
- ++Lane;
- }
- assert((!Bundle.getValue() || Lane == VL.size()) &&
+ ScheduleData *BundleMember = *Bundle;
+ assert((BundleMember || isa<PHINode>(S.MainOp) ||
+ isVectorLikeInstWithConstOps(S.MainOp) ||
+ doesNotNeedToSchedule(VL)) &&
"Bundle and VL out of sync");
+ if (BundleMember) {
+ for (Value *V : VL) {
+ if (doesNotNeedToBeScheduled(V))
+ continue;
+ assert(BundleMember && "Unexpected end of bundle.");
+ BundleMember->TE = Last;
+ BundleMember = BundleMember->NextInBundle;
+ }
+ }
+ assert(!BundleMember && "Bundle and VL out of sync");
} else {
MustGather.insert(VL.begin(), VL.end());
}
@@ -2312,7 +2605,7 @@ private:
/// Maps a specific scalar to its tree entry.
SmallDenseMap<Value*, TreeEntry *> ScalarToTreeEntry;
- /// Maps a value to the proposed vectorizable size.
+ /// Maps a value to the proposed vectorizable size.
SmallDenseMap<Value *, unsigned> InstrElementSize;
/// A list of scalars that we found that we need to keep as scalars.
@@ -2343,12 +2636,12 @@ private:
// First check if the result is already in the cache.
AliasCacheKey key = std::make_pair(Inst1, Inst2);
Optional<bool> &result = AliasCache[key];
- if (result.hasValue()) {
+ if (result) {
return result.getValue();
}
bool aliased = true;
if (Loc1.Ptr && isSimple(Inst1))
- aliased = isModOrRefSet(AA->getModRefInfo(Inst2, Loc1));
+ aliased = isModOrRefSet(BatchAA.getModRefInfo(Inst2, Loc1));
// Store the result in the cache.
result = aliased;
return aliased;
@@ -2360,20 +2653,23 @@ private:
/// TODO: consider moving this to the AliasAnalysis itself.
DenseMap<AliasCacheKey, Optional<bool>> AliasCache;
- /// Removes an instruction from its block and eventually deletes it.
- /// It's like Instruction::eraseFromParent() except that the actual deletion
- /// is delayed until BoUpSLP is destructed.
- /// This is required to ensure that there are no incorrect collisions in the
- /// AliasCache, which can happen if a new instruction is allocated at the
- /// same address as a previously deleted instruction.
- void eraseInstruction(Instruction *I, bool ReplaceOpsWithUndef = false) {
- auto It = DeletedInstructions.try_emplace(I, ReplaceOpsWithUndef).first;
- It->getSecond() = It->getSecond() && ReplaceOpsWithUndef;
- }
+ // Cache for pointerMayBeCaptured calls inside AA. This is preserved
+ // globally through SLP because we don't perform any action which
+ // invalidates capture results.
+ BatchAAResults BatchAA;
/// Temporary store for deleted instructions. Instructions will be deleted
- /// eventually when the BoUpSLP is destructed.
- DenseMap<Instruction *, bool> DeletedInstructions;
+ /// eventually when the BoUpSLP is destructed. The deferral is required to
+ /// ensure that there are no incorrect collisions in the AliasCache, which
+ /// can happen if a new instruction is allocated at the same address as a
+ /// previously deleted instruction.
+ DenseSet<Instruction *> DeletedInstructions;
+
+ /// Set of the instruction, being analyzed already for reductions.
+ SmallPtrSet<Instruction *, 16> AnalyzedReductionsRoots;
+
+ /// Set of hashes for the list of reduction values already being analyzed.
+ DenseSet<size_t> AnalyzedReductionVals;
/// A list of values that need to extracted out of the tree.
/// This list holds pairs of (Internal Scalar : External User). External User
@@ -2407,14 +2703,39 @@ private:
NextLoadStore = nullptr;
IsScheduled = false;
SchedulingRegionID = BlockSchedulingRegionID;
- UnscheduledDepsInBundle = UnscheduledDeps;
clearDependencies();
OpValue = OpVal;
TE = nullptr;
- Lane = -1;
+ }
+
+ /// Verify basic self consistency properties
+ void verify() {
+ if (hasValidDependencies()) {
+ assert(UnscheduledDeps <= Dependencies && "invariant");
+ } else {
+ assert(UnscheduledDeps == Dependencies && "invariant");
+ }
+
+ if (IsScheduled) {
+ assert(isSchedulingEntity() &&
+ "unexpected scheduled state");
+ for (const ScheduleData *BundleMember = this; BundleMember;
+ BundleMember = BundleMember->NextInBundle) {
+ assert(BundleMember->hasValidDependencies() &&
+ BundleMember->UnscheduledDeps == 0 &&
+ "unexpected scheduled state");
+ assert((BundleMember == this || !BundleMember->IsScheduled) &&
+ "only bundle is marked scheduled");
+ }
+ }
+
+ assert(Inst->getParent() == FirstInBundle->Inst->getParent() &&
+ "all bundle members must be in same basic block");
}
/// Returns true if the dependency information has been calculated.
+ /// Note that depenendency validity can vary between instructions within
+ /// a single bundle.
bool hasValidDependencies() const { return Dependencies != InvalidDeps; }
/// Returns true for single instructions and for bundle representatives
@@ -2424,7 +2745,7 @@ private:
/// Returns true if it represents an instruction bundle and not only a
/// single instruction.
bool isPartOfBundle() const {
- return NextInBundle != nullptr || FirstInBundle != this;
+ return NextInBundle != nullptr || FirstInBundle != this || TE;
}
/// Returns true if it is ready for scheduling, i.e. it has no more
@@ -2432,20 +2753,23 @@ private:
bool isReady() const {
assert(isSchedulingEntity() &&
"can't consider non-scheduling entity for ready list");
- return UnscheduledDepsInBundle == 0 && !IsScheduled;
+ return unscheduledDepsInBundle() == 0 && !IsScheduled;
}
- /// Modifies the number of unscheduled dependencies, also updating it for
- /// the whole bundle.
+ /// Modifies the number of unscheduled dependencies for this instruction,
+ /// and returns the number of remaining dependencies for the containing
+ /// bundle.
int incrementUnscheduledDeps(int Incr) {
+ assert(hasValidDependencies() &&
+ "increment of unscheduled deps would be meaningless");
UnscheduledDeps += Incr;
- return FirstInBundle->UnscheduledDepsInBundle += Incr;
+ return FirstInBundle->unscheduledDepsInBundle();
}
/// Sets the number of unscheduled dependencies to the number of
/// dependencies.
void resetUnscheduledDeps() {
- incrementUnscheduledDeps(Dependencies - UnscheduledDeps);
+ UnscheduledDeps = Dependencies;
}
/// Clears all dependency information.
@@ -2453,6 +2777,19 @@ private:
Dependencies = InvalidDeps;
resetUnscheduledDeps();
MemoryDependencies.clear();
+ ControlDependencies.clear();
+ }
+
+ int unscheduledDepsInBundle() const {
+ assert(isSchedulingEntity() && "only meaningful on the bundle");
+ int Sum = 0;
+ for (const ScheduleData *BundleMember = this; BundleMember;
+ BundleMember = BundleMember->NextInBundle) {
+ if (BundleMember->UnscheduledDeps == InvalidDeps)
+ return InvalidDeps;
+ Sum += BundleMember->UnscheduledDeps;
+ }
+ return Sum;
}
void dump(raw_ostream &os) const {
@@ -2473,6 +2810,12 @@ private:
Instruction *Inst = nullptr;
+ /// Opcode of the current instruction in the schedule data.
+ Value *OpValue = nullptr;
+
+ /// The TreeEntry that this instruction corresponds to.
+ TreeEntry *TE = nullptr;
+
/// Points to the head in an instruction bundle (and always to this for
/// single instructions).
ScheduleData *FirstInBundle = nullptr;
@@ -2489,6 +2832,12 @@ private:
/// This list is derived on demand in calculateDependencies().
SmallVector<ScheduleData *, 4> MemoryDependencies;
+ /// List of instructions which this instruction could be control dependent
+ /// on. Allowing such nodes to be scheduled below this one could introduce
+ /// a runtime fault which didn't exist in the original program.
+ /// ex: this is a load or udiv following a readonly call which inf loops
+ SmallVector<ScheduleData *, 4> ControlDependencies;
+
/// This ScheduleData is in the current scheduling region if this matches
/// the current SchedulingRegionID of BlockScheduling.
int SchedulingRegionID = 0;
@@ -2508,22 +2857,9 @@ private:
/// Note that this is negative as long as Dependencies is not calculated.
int UnscheduledDeps = InvalidDeps;
- /// The sum of UnscheduledDeps in a bundle. Equals to UnscheduledDeps for
- /// single instructions.
- int UnscheduledDepsInBundle = InvalidDeps;
-
/// True if this instruction is scheduled (or considered as scheduled in the
/// dry-run).
bool IsScheduled = false;
-
- /// Opcode of the current instruction in the schedule data.
- Value *OpValue = nullptr;
-
- /// The TreeEntry that this instruction corresponds to.
- TreeEntry *TE = nullptr;
-
- /// The lane of this node in the TreeEntry.
- int Lane = -1;
};
#ifndef NDEBUG
@@ -2538,6 +2874,21 @@ private:
friend struct DOTGraphTraits<BoUpSLP *>;
/// Contains all scheduling data for a basic block.
+ /// It does not schedules instructions, which are not memory read/write
+ /// instructions and their operands are either constants, or arguments, or
+ /// phis, or instructions from others blocks, or their users are phis or from
+ /// the other blocks. The resulting vector instructions can be placed at the
+ /// beginning of the basic block without scheduling (if operands does not need
+ /// to be scheduled) or at the end of the block (if users are outside of the
+ /// block). It allows to save some compile time and memory used by the
+ /// compiler.
+ /// ScheduleData is assigned for each instruction in between the boundaries of
+ /// the tree entry, even for those, which are not part of the graph. It is
+ /// required to correctly follow the dependencies between the instructions and
+ /// their correct scheduling. The ScheduleData is not allocated for the
+ /// instructions, which do not require scheduling, like phis, nodes with
+ /// extractelements/insertelements only or nodes with instructions, with
+ /// uses/operands outside of the block.
struct BlockScheduling {
BlockScheduling(BasicBlock *BB)
: BB(BB), ChunkSize(BB->size()), ChunkPos(ChunkSize) {}
@@ -2548,6 +2899,7 @@ private:
ScheduleEnd = nullptr;
FirstLoadStoreInRegion = nullptr;
LastLoadStoreInRegion = nullptr;
+ RegionHasStackSave = false;
// Reduce the maximum schedule region size by the size of the
// previous scheduling run.
@@ -2561,20 +2913,29 @@ private:
++SchedulingRegionID;
}
- ScheduleData *getScheduleData(Value *V) {
- ScheduleData *SD = ScheduleDataMap[V];
- if (SD && SD->SchedulingRegionID == SchedulingRegionID)
+ ScheduleData *getScheduleData(Instruction *I) {
+ if (BB != I->getParent())
+ // Avoid lookup if can't possibly be in map.
+ return nullptr;
+ ScheduleData *SD = ScheduleDataMap.lookup(I);
+ if (SD && isInSchedulingRegion(SD))
return SD;
return nullptr;
}
+ ScheduleData *getScheduleData(Value *V) {
+ if (auto *I = dyn_cast<Instruction>(V))
+ return getScheduleData(I);
+ return nullptr;
+ }
+
ScheduleData *getScheduleData(Value *V, Value *Key) {
if (V == Key)
return getScheduleData(V);
auto I = ExtraScheduleDataMap.find(V);
if (I != ExtraScheduleDataMap.end()) {
- ScheduleData *SD = I->second[Key];
- if (SD && SD->SchedulingRegionID == SchedulingRegionID)
+ ScheduleData *SD = I->second.lookup(Key);
+ if (SD && isInSchedulingRegion(SD))
return SD;
}
return nullptr;
@@ -2595,7 +2956,7 @@ private:
BundleMember = BundleMember->NextInBundle) {
if (BundleMember->Inst != BundleMember->OpValue)
continue;
-
+
// Handle the def-use chain dependencies.
// Decrement the unscheduled counter and insert to ready list if ready.
@@ -2617,10 +2978,12 @@ private:
};
// If BundleMember is a vector bundle, its operands may have been
- // reordered duiring buildTree(). We therefore need to get its operands
+ // reordered during buildTree(). We therefore need to get its operands
// through the TreeEntry.
if (TreeEntry *TE = BundleMember->TE) {
- int Lane = BundleMember->Lane;
+ // Need to search for the lane since the tree entry can be reordered.
+ int Lane = std::distance(TE->Scalars.begin(),
+ find(TE->Scalars, BundleMember->Inst));
assert(Lane >= 0 && "Lane not set");
// Since vectorization tree is being built recursively this assertion
@@ -2629,7 +2992,7 @@ private:
// where their second (immediate) operand is not added. Since
// immediates do not affect scheduler behavior this is considered
// okay.
- auto *In = TE->getMainOp();
+ auto *In = BundleMember->Inst;
assert(In &&
(isa<ExtractValueInst>(In) || isa<ExtractElementInst>(In) ||
In->getNumOperands() == TE->getNumOperands()) &&
@@ -2649,7 +3012,8 @@ private:
}
// Handle the memory dependencies.
for (ScheduleData *MemoryDepSD : BundleMember->MemoryDependencies) {
- if (MemoryDepSD->incrementUnscheduledDeps(-1) == 0) {
+ if (MemoryDepSD->hasValidDependencies() &&
+ MemoryDepSD->incrementUnscheduledDeps(-1) == 0) {
// There are no more unscheduled dependencies after decrementing,
// so we can put the dependent instruction into the ready list.
ScheduleData *DepBundle = MemoryDepSD->FirstInBundle;
@@ -2660,6 +3024,48 @@ private:
<< "SLP: gets ready (mem): " << *DepBundle << "\n");
}
}
+ // Handle the control dependencies.
+ for (ScheduleData *DepSD : BundleMember->ControlDependencies) {
+ if (DepSD->incrementUnscheduledDeps(-1) == 0) {
+ // There are no more unscheduled dependencies after decrementing,
+ // so we can put the dependent instruction into the ready list.
+ ScheduleData *DepBundle = DepSD->FirstInBundle;
+ assert(!DepBundle->IsScheduled &&
+ "already scheduled bundle gets ready");
+ ReadyList.insert(DepBundle);
+ LLVM_DEBUG(dbgs()
+ << "SLP: gets ready (ctl): " << *DepBundle << "\n");
+ }
+ }
+
+ }
+ }
+
+ /// Verify basic self consistency properties of the data structure.
+ void verify() {
+ if (!ScheduleStart)
+ return;
+
+ assert(ScheduleStart->getParent() == ScheduleEnd->getParent() &&
+ ScheduleStart->comesBefore(ScheduleEnd) &&
+ "Not a valid scheduling region?");
+
+ for (auto *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode()) {
+ auto *SD = getScheduleData(I);
+ if (!SD)
+ continue;
+ assert(isInSchedulingRegion(SD) &&
+ "primary schedule data not in window?");
+ assert(isInSchedulingRegion(SD->FirstInBundle) &&
+ "entire bundle in window!");
+ (void)SD;
+ doForAllOpcodes(I, [](ScheduleData *SD) { SD->verify(); });
+ }
+
+ for (auto *SD : ReadyInsts) {
+ assert(SD->isSchedulingEntity() && SD->isReady() &&
+ "item in ready list not ready?");
+ (void)SD;
}
}
@@ -2670,7 +3076,7 @@ private:
auto I = ExtraScheduleDataMap.find(V);
if (I != ExtraScheduleDataMap.end())
for (auto &P : I->second)
- if (P.second->SchedulingRegionID == SchedulingRegionID)
+ if (isInSchedulingRegion(P.second))
Action(P.second);
}
@@ -2679,10 +3085,11 @@ private:
void initialFillReadyList(ReadyListType &ReadyList) {
for (auto *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode()) {
doForAllOpcodes(I, [&](ScheduleData *SD) {
- if (SD->isSchedulingEntity() && SD->isReady()) {
+ if (SD->isSchedulingEntity() && SD->hasValidDependencies() &&
+ SD->isReady()) {
ReadyList.insert(SD);
LLVM_DEBUG(dbgs()
- << "SLP: initially in ready list: " << *I << "\n");
+ << "SLP: initially in ready list: " << *SD << "\n");
}
});
}
@@ -2740,18 +3147,14 @@ private:
/// Attaches ScheduleData to Instruction.
/// Note that the mapping survives during all vectorization iterations, i.e.
/// ScheduleData structures are recycled.
- DenseMap<Value *, ScheduleData *> ScheduleDataMap;
+ DenseMap<Instruction *, ScheduleData *> ScheduleDataMap;
/// Attaches ScheduleData to Instruction with the leading key.
DenseMap<Value *, SmallDenseMap<Value *, ScheduleData *>>
ExtraScheduleDataMap;
- struct ReadyList : SmallVector<ScheduleData *, 8> {
- void insert(ScheduleData *SD) { push_back(SD); }
- };
-
/// The ready-list for scheduling (only used for the dry-run).
- ReadyList ReadyInsts;
+ SetVector<ScheduleData *> ReadyInsts;
/// The first instruction of the scheduling region.
Instruction *ScheduleStart = nullptr;
@@ -2767,6 +3170,11 @@ private:
/// (can be null).
ScheduleData *LastLoadStoreInRegion = nullptr;
+ /// Is there an llvm.stacksave or llvm.stackrestore in the scheduling
+ /// region? Used to optimize the dependence calculation for the
+ /// common case where there isn't.
+ bool RegionHasStackSave = false;
+
/// The current size of the scheduling region.
int ScheduleRegionSize = 0;
@@ -2775,8 +3183,8 @@ private:
/// The ID of the scheduling region. For a new vectorization iteration this
/// is incremented which "removes" all ScheduleData from the region.
- // Make sure that the initial SchedulingRegionID is greater than the
- // initial SchedulingRegionID in ScheduleData (which is 0).
+ /// Make sure that the initial SchedulingRegionID is greater than the
+ /// initial SchedulingRegionID in ScheduleData (which is 0).
int SchedulingRegionID = 1;
};
@@ -2788,7 +3196,7 @@ private:
void scheduleBlock(BlockScheduling *BS);
/// List of users to ignore during scheduling and that don't need extracting.
- ArrayRef<Value *> UserIgnoreList;
+ const SmallDenseSet<Value *> *UserIgnoreList = nullptr;
/// A DenseMapInfo implementation for holding DenseMaps and DenseSets of
/// sorted SmallVectors of unsigned.
@@ -2819,7 +3227,6 @@ private:
ScalarEvolution *SE;
TargetTransformInfo *TTI;
TargetLibraryInfo *TLI;
- AAResults *AA;
LoopInfo *LI;
DominatorTree *DT;
AssumptionCache *AC;
@@ -2936,20 +3343,25 @@ template <> struct DOTGraphTraits<BoUpSLP *> : public DefaultDOTGraphTraits {
} // end namespace llvm
BoUpSLP::~BoUpSLP() {
- for (const auto &Pair : DeletedInstructions) {
- // Replace operands of ignored instructions with Undefs in case if they were
- // marked for deletion.
- if (Pair.getSecond()) {
- Value *Undef = UndefValue::get(Pair.getFirst()->getType());
- Pair.getFirst()->replaceAllUsesWith(Undef);
- }
- Pair.getFirst()->dropAllReferences();
- }
- for (const auto &Pair : DeletedInstructions) {
- assert(Pair.getFirst()->use_empty() &&
+ SmallVector<WeakTrackingVH> DeadInsts;
+ for (auto *I : DeletedInstructions) {
+ for (Use &U : I->operands()) {
+ auto *Op = dyn_cast<Instruction>(U.get());
+ if (Op && !DeletedInstructions.count(Op) && Op->hasOneUser() &&
+ wouldInstructionBeTriviallyDead(Op, TLI))
+ DeadInsts.emplace_back(Op);
+ }
+ I->dropAllReferences();
+ }
+ for (auto *I : DeletedInstructions) {
+ assert(I->use_empty() &&
"trying to erase instruction with users.");
- Pair.getFirst()->eraseFromParent();
+ I->eraseFromParent();
}
+
+ // Cleanup any dead scalar code feeding the vectorized instructions
+ RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI);
+
#ifdef EXPENSIVE_CHECKS
// If we could guarantee that this call is not extremely slow, we could
// remove the ifdef limitation (see PR47712).
@@ -2957,13 +3369,6 @@ BoUpSLP::~BoUpSLP() {
#endif
}
-void BoUpSLP::eraseInstructions(ArrayRef<Value *> AV) {
- for (auto *V : AV) {
- if (auto *I = dyn_cast<Instruction>(V))
- eraseInstruction(I, /*ReplaceOpsWithUndef=*/true);
- };
-}
-
/// Reorders the given \p Reuses mask according to the given \p Mask. \p Reuses
/// contains original mask for the scalars reused in the node. Procedure
/// transform this mask in accordance with the given \p Mask.
@@ -3068,6 +3473,189 @@ BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE) {
return None;
}
+namespace {
+/// Tracks the state we can represent the loads in the given sequence.
+enum class LoadsState { Gather, Vectorize, ScatterVectorize };
+} // anonymous namespace
+
+/// Checks if the given array of loads can be represented as a vectorized,
+/// scatter or just simple gather.
+static LoadsState canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
+ const TargetTransformInfo &TTI,
+ const DataLayout &DL, ScalarEvolution &SE,
+ LoopInfo &LI,
+ SmallVectorImpl<unsigned> &Order,
+ SmallVectorImpl<Value *> &PointerOps) {
+ // Check that a vectorized load would load the same memory as a scalar
+ // load. For example, we don't want to vectorize loads that are smaller
+ // than 8-bit. Even though we have a packed struct {<i2, i2, i2, i2>} LLVM
+ // treats loading/storing it as an i8 struct. If we vectorize loads/stores
+ // from such a struct, we read/write packed bits disagreeing with the
+ // unvectorized version.
+ Type *ScalarTy = VL0->getType();
+
+ if (DL.getTypeSizeInBits(ScalarTy) != DL.getTypeAllocSizeInBits(ScalarTy))
+ return LoadsState::Gather;
+
+ // Make sure all loads in the bundle are simple - we can't vectorize
+ // atomic or volatile loads.
+ PointerOps.clear();
+ PointerOps.resize(VL.size());
+ auto *POIter = PointerOps.begin();
+ for (Value *V : VL) {
+ auto *L = cast<LoadInst>(V);
+ if (!L->isSimple())
+ return LoadsState::Gather;
+ *POIter = L->getPointerOperand();
+ ++POIter;
+ }
+
+ Order.clear();
+ // Check the order of pointer operands or that all pointers are the same.
+ bool IsSorted = sortPtrAccesses(PointerOps, ScalarTy, DL, SE, Order);
+ if (IsSorted || all_of(PointerOps, [&PointerOps](Value *P) {
+ if (getUnderlyingObject(P) != getUnderlyingObject(PointerOps.front()))
+ return false;
+ auto *GEP = dyn_cast<GetElementPtrInst>(P);
+ if (!GEP)
+ return false;
+ auto *GEP0 = cast<GetElementPtrInst>(PointerOps.front());
+ return GEP->getNumOperands() == 2 &&
+ ((isConstant(GEP->getOperand(1)) &&
+ isConstant(GEP0->getOperand(1))) ||
+ getSameOpcode({GEP->getOperand(1), GEP0->getOperand(1)})
+ .getOpcode());
+ })) {
+ if (IsSorted) {
+ Value *Ptr0;
+ Value *PtrN;
+ if (Order.empty()) {
+ Ptr0 = PointerOps.front();
+ PtrN = PointerOps.back();
+ } else {
+ Ptr0 = PointerOps[Order.front()];
+ PtrN = PointerOps[Order.back()];
+ }
+ Optional<int> Diff =
+ getPointersDiff(ScalarTy, Ptr0, ScalarTy, PtrN, DL, SE);
+ // Check that the sorted loads are consecutive.
+ if (static_cast<unsigned>(*Diff) == VL.size() - 1)
+ return LoadsState::Vectorize;
+ }
+ // TODO: need to improve analysis of the pointers, if not all of them are
+ // GEPs or have > 2 operands, we end up with a gather node, which just
+ // increases the cost.
+ Loop *L = LI.getLoopFor(cast<LoadInst>(VL0)->getParent());
+ bool ProfitableGatherPointers =
+ static_cast<unsigned>(count_if(PointerOps, [L](Value *V) {
+ return L && L->isLoopInvariant(V);
+ })) <= VL.size() / 2 && VL.size() > 2;
+ if (ProfitableGatherPointers || all_of(PointerOps, [IsSorted](Value *P) {
+ auto *GEP = dyn_cast<GetElementPtrInst>(P);
+ return (IsSorted && !GEP && doesNotNeedToBeScheduled(P)) ||
+ (GEP && GEP->getNumOperands() == 2);
+ })) {
+ Align CommonAlignment = cast<LoadInst>(VL0)->getAlign();
+ for (Value *V : VL)
+ CommonAlignment =
+ std::min(CommonAlignment, cast<LoadInst>(V)->getAlign());
+ auto *VecTy = FixedVectorType::get(ScalarTy, VL.size());
+ if (TTI.isLegalMaskedGather(VecTy, CommonAlignment) &&
+ !TTI.forceScalarizeMaskedGather(VecTy, CommonAlignment))
+ return LoadsState::ScatterVectorize;
+ }
+ }
+
+ return LoadsState::Gather;
+}
+
+bool clusterSortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy,
+ const DataLayout &DL, ScalarEvolution &SE,
+ SmallVectorImpl<unsigned> &SortedIndices) {
+ assert(llvm::all_of(
+ VL, [](const Value *V) { return V->getType()->isPointerTy(); }) &&
+ "Expected list of pointer operands.");
+ // Map from bases to a vector of (Ptr, Offset, OrigIdx), which we insert each
+ // Ptr into, sort and return the sorted indices with values next to one
+ // another.
+ MapVector<Value *, SmallVector<std::tuple<Value *, int, unsigned>>> Bases;
+ Bases[VL[0]].push_back(std::make_tuple(VL[0], 0U, 0U));
+
+ unsigned Cnt = 1;
+ for (Value *Ptr : VL.drop_front()) {
+ bool Found = any_of(Bases, [&](auto &Base) {
+ Optional<int> Diff =
+ getPointersDiff(ElemTy, Base.first, ElemTy, Ptr, DL, SE,
+ /*StrictCheck=*/true);
+ if (!Diff)
+ return false;
+
+ Base.second.emplace_back(Ptr, *Diff, Cnt++);
+ return true;
+ });
+
+ if (!Found) {
+ // If we haven't found enough to usefully cluster, return early.
+ if (Bases.size() > VL.size() / 2 - 1)
+ return false;
+
+ // Not found already - add a new Base
+ Bases[Ptr].emplace_back(Ptr, 0, Cnt++);
+ }
+ }
+
+ // For each of the bases sort the pointers by Offset and check if any of the
+ // base become consecutively allocated.
+ bool AnyConsecutive = false;
+ for (auto &Base : Bases) {
+ auto &Vec = Base.second;
+ if (Vec.size() > 1) {
+ llvm::stable_sort(Vec, [](const std::tuple<Value *, int, unsigned> &X,
+ const std::tuple<Value *, int, unsigned> &Y) {
+ return std::get<1>(X) < std::get<1>(Y);
+ });
+ int InitialOffset = std::get<1>(Vec[0]);
+ AnyConsecutive |= all_of(enumerate(Vec), [InitialOffset](auto &P) {
+ return std::get<1>(P.value()) == int(P.index()) + InitialOffset;
+ });
+ }
+ }
+
+ // Fill SortedIndices array only if it looks worth-while to sort the ptrs.
+ SortedIndices.clear();
+ if (!AnyConsecutive)
+ return false;
+
+ for (auto &Base : Bases) {
+ for (auto &T : Base.second)
+ SortedIndices.push_back(std::get<2>(T));
+ }
+
+ assert(SortedIndices.size() == VL.size() &&
+ "Expected SortedIndices to be the size of VL");
+ return true;
+}
+
+Optional<BoUpSLP::OrdersType>
+BoUpSLP::findPartiallyOrderedLoads(const BoUpSLP::TreeEntry &TE) {
+ assert(TE.State == TreeEntry::NeedToGather && "Expected gather node only.");
+ Type *ScalarTy = TE.Scalars[0]->getType();
+
+ SmallVector<Value *> Ptrs;
+ Ptrs.reserve(TE.Scalars.size());
+ for (Value *V : TE.Scalars) {
+ auto *L = dyn_cast<LoadInst>(V);
+ if (!L || !L->isSimple())
+ return None;
+ Ptrs.push_back(L->getPointerOperand());
+ }
+
+ BoUpSLP::OrdersType Order;
+ if (clusterSortPtrAccesses(Ptrs, ScalarTy, *DL, *SE, Order))
+ return Order;
+ return None;
+}
+
Optional<BoUpSLP::OrdersType> BoUpSLP::getReorderingData(const TreeEntry &TE,
bool TopToBottom) {
// No need to reorder if need to shuffle reuses, still need to shuffle the
@@ -3108,6 +3696,9 @@ Optional<BoUpSLP::OrdersType> BoUpSLP::getReorderingData(const TreeEntry &TE,
}
if (Optional<OrdersType> CurrentOrder = findReusedOrderedScalars(TE))
return CurrentOrder;
+ if (TE.Scalars.size() >= 4)
+ if (Optional<OrdersType> Order = findPartiallyOrderedLoads(TE))
+ return Order;
}
return None;
}
@@ -3118,13 +3709,55 @@ void BoUpSLP::reorderTopToBottom() {
// ExtractElement gather nodes which can be vectorized and need to handle
// their ordering.
DenseMap<const TreeEntry *, OrdersType> GathersToOrders;
+
+ // AltShuffles can also have a preferred ordering that leads to fewer
+ // instructions, e.g., the addsub instruction in x86.
+ DenseMap<const TreeEntry *, OrdersType> AltShufflesToOrders;
+
+ // Maps a TreeEntry to the reorder indices of external users.
+ DenseMap<const TreeEntry *, SmallVector<OrdersType, 1>>
+ ExternalUserReorderMap;
+ // FIXME: Workaround for syntax error reported by MSVC buildbots.
+ TargetTransformInfo &TTIRef = *TTI;
// Find all reorderable nodes with the given VF.
// Currently the are vectorized stores,loads,extracts + some gathering of
// extracts.
- for_each(VectorizableTree, [this, &VFToOrderedEntries, &GathersToOrders](
+ for_each(VectorizableTree, [this, &TTIRef, &VFToOrderedEntries,
+ &GathersToOrders, &ExternalUserReorderMap,
+ &AltShufflesToOrders](
const std::unique_ptr<TreeEntry> &TE) {
+ // Look for external users that will probably be vectorized.
+ SmallVector<OrdersType, 1> ExternalUserReorderIndices =
+ findExternalStoreUsersReorderIndices(TE.get());
+ if (!ExternalUserReorderIndices.empty()) {
+ VFToOrderedEntries[TE->Scalars.size()].insert(TE.get());
+ ExternalUserReorderMap.try_emplace(TE.get(),
+ std::move(ExternalUserReorderIndices));
+ }
+
+ // Patterns like [fadd,fsub] can be combined into a single instruction in
+ // x86. Reordering them into [fsub,fadd] blocks this pattern. So we need
+ // to take into account their order when looking for the most used order.
+ if (TE->isAltShuffle()) {
+ VectorType *VecTy =
+ FixedVectorType::get(TE->Scalars[0]->getType(), TE->Scalars.size());
+ unsigned Opcode0 = TE->getOpcode();
+ unsigned Opcode1 = TE->getAltOpcode();
+ // The opcode mask selects between the two opcodes.
+ SmallBitVector OpcodeMask(TE->Scalars.size(), 0);
+ for (unsigned Lane : seq<unsigned>(0, TE->Scalars.size()))
+ if (cast<Instruction>(TE->Scalars[Lane])->getOpcode() == Opcode1)
+ OpcodeMask.set(Lane);
+ // If this pattern is supported by the target then we consider the order.
+ if (TTIRef.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask)) {
+ VFToOrderedEntries[TE->Scalars.size()].insert(TE.get());
+ AltShufflesToOrders.try_emplace(TE.get(), OrdersType());
+ }
+ // TODO: Check the reverse order too.
+ }
+
if (Optional<OrdersType> CurrentOrder =
- getReorderingData(*TE.get(), /*TopToBottom=*/true)) {
+ getReorderingData(*TE, /*TopToBottom=*/true)) {
// Do not include ordering for nodes used in the alt opcode vectorization,
// better to reorder them during bottom-to-top stage. If follow the order
// here, it causes reordering of the whole graph though actually it is
@@ -3142,10 +3775,7 @@ void BoUpSLP::reorderTopToBottom() {
EI.UserTE->isAltShuffle() && EI.UserTE->Idx != 0;
}))
return;
- if (UserTE->UserTreeIndices.empty())
- UserTE = nullptr;
- else
- UserTE = UserTE->UserTreeIndices.back().UserTE;
+ UserTE = UserTE->UserTreeIndices.back().UserTE;
++Cnt;
}
VFToOrderedEntries[TE->Scalars.size()].insert(TE.get());
@@ -3176,11 +3806,30 @@ void BoUpSLP::reorderTopToBottom() {
if (!OpTE->ReuseShuffleIndices.empty())
continue;
// Count number of orders uses.
- const auto &Order = [OpTE, &GathersToOrders]() -> const OrdersType & {
- if (OpTE->State == TreeEntry::NeedToGather)
- return GathersToOrders.find(OpTE)->second;
+ const auto &Order = [OpTE, &GathersToOrders,
+ &AltShufflesToOrders]() -> const OrdersType & {
+ if (OpTE->State == TreeEntry::NeedToGather) {
+ auto It = GathersToOrders.find(OpTE);
+ if (It != GathersToOrders.end())
+ return It->second;
+ }
+ if (OpTE->isAltShuffle()) {
+ auto It = AltShufflesToOrders.find(OpTE);
+ if (It != AltShufflesToOrders.end())
+ return It->second;
+ }
return OpTE->ReorderIndices;
}();
+ // First consider the order of the external scalar users.
+ auto It = ExternalUserReorderMap.find(OpTE);
+ if (It != ExternalUserReorderMap.end()) {
+ const auto &ExternalUserReorderIndices = It->second;
+ for (const OrdersType &ExtOrder : ExternalUserReorderIndices)
+ ++OrdersUses.insert(std::make_pair(ExtOrder, 0)).first->second;
+ // No other useful reorder data in this entry.
+ if (Order.empty())
+ continue;
+ }
// Stores actually store the mask, not the order, need to invert.
if (OpTE->State == TreeEntry::Vectorize && !OpTE->isAltShuffle() &&
OpTE->getOpcode() == Instruction::Store && !Order.empty()) {
@@ -3270,6 +3919,57 @@ void BoUpSLP::reorderTopToBottom() {
}
}
+bool BoUpSLP::canReorderOperands(
+ TreeEntry *UserTE, SmallVectorImpl<std::pair<unsigned, TreeEntry *>> &Edges,
+ ArrayRef<TreeEntry *> ReorderableGathers,
+ SmallVectorImpl<TreeEntry *> &GatherOps) {
+ for (unsigned I = 0, E = UserTE->getNumOperands(); I < E; ++I) {
+ if (any_of(Edges, [I](const std::pair<unsigned, TreeEntry *> &OpData) {
+ return OpData.first == I &&
+ OpData.second->State == TreeEntry::Vectorize;
+ }))
+ continue;
+ if (TreeEntry *TE = getVectorizedOperand(UserTE, I)) {
+ // Do not reorder if operand node is used by many user nodes.
+ if (any_of(TE->UserTreeIndices,
+ [UserTE](const EdgeInfo &EI) { return EI.UserTE != UserTE; }))
+ return false;
+ // Add the node to the list of the ordered nodes with the identity
+ // order.
+ Edges.emplace_back(I, TE);
+ // Add ScatterVectorize nodes to the list of operands, where just
+ // reordering of the scalars is required. Similar to the gathers, so
+ // simply add to the list of gathered ops.
+ // If there are reused scalars, process this node as a regular vectorize
+ // node, just reorder reuses mask.
+ if (TE->State != TreeEntry::Vectorize && TE->ReuseShuffleIndices.empty())
+ GatherOps.push_back(TE);
+ continue;
+ }
+ TreeEntry *Gather = nullptr;
+ if (count_if(ReorderableGathers,
+ [&Gather, UserTE, I](TreeEntry *TE) {
+ assert(TE->State != TreeEntry::Vectorize &&
+ "Only non-vectorized nodes are expected.");
+ if (any_of(TE->UserTreeIndices,
+ [UserTE, I](const EdgeInfo &EI) {
+ return EI.UserTE == UserTE && EI.EdgeIdx == I;
+ })) {
+ assert(TE->isSame(UserTE->getOperand(I)) &&
+ "Operand entry does not match operands.");
+ Gather = TE;
+ return true;
+ }
+ return false;
+ }) > 1 &&
+ !all_of(UserTE->getOperand(I), isConstant))
+ return false;
+ if (Gather)
+ GatherOps.push_back(Gather);
+ }
+ return true;
+}
+
void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
SetVector<TreeEntry *> OrderedEntries;
DenseMap<const TreeEntry *, OrdersType> GathersToOrders;
@@ -3283,49 +3983,13 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
if (TE->State != TreeEntry::Vectorize)
NonVectorized.push_back(TE.get());
if (Optional<OrdersType> CurrentOrder =
- getReorderingData(*TE.get(), /*TopToBottom=*/false)) {
+ getReorderingData(*TE, /*TopToBottom=*/false)) {
OrderedEntries.insert(TE.get());
if (TE->State != TreeEntry::Vectorize)
GathersToOrders.try_emplace(TE.get(), *CurrentOrder);
}
});
- // Checks if the operands of the users are reordarable and have only single
- // use.
- auto &&CheckOperands =
- [this, &NonVectorized](const auto &Data,
- SmallVectorImpl<TreeEntry *> &GatherOps) {
- for (unsigned I = 0, E = Data.first->getNumOperands(); I < E; ++I) {
- if (any_of(Data.second,
- [I](const std::pair<unsigned, TreeEntry *> &OpData) {
- return OpData.first == I &&
- OpData.second->State == TreeEntry::Vectorize;
- }))
- continue;
- ArrayRef<Value *> VL = Data.first->getOperand(I);
- const TreeEntry *TE = nullptr;
- const auto *It = find_if(VL, [this, &TE](Value *V) {
- TE = getTreeEntry(V);
- return TE;
- });
- if (It != VL.end() && TE->isSame(VL))
- return false;
- TreeEntry *Gather = nullptr;
- if (count_if(NonVectorized, [VL, &Gather](TreeEntry *TE) {
- assert(TE->State != TreeEntry::Vectorize &&
- "Only non-vectorized nodes are expected.");
- if (TE->isSame(VL)) {
- Gather = TE;
- return true;
- }
- return false;
- }) > 1)
- return false;
- if (Gather)
- GatherOps.push_back(Gather);
- }
- return true;
- };
// 1. Propagate order to the graph nodes, which use only reordered nodes.
// I.e., if the node has operands, that are reordered, try to make at least
// one operand order in the natural order and reorder others + reorder the
@@ -3334,7 +3998,7 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
while (!OrderedEntries.empty()) {
// 1. Filter out only reordered nodes.
// 2. If the entry has multiple uses - skip it and jump to the next node.
- MapVector<TreeEntry *, SmallVector<std::pair<unsigned, TreeEntry *>>> Users;
+ DenseMap<TreeEntry *, SmallVector<std::pair<unsigned, TreeEntry *>>> Users;
SmallVector<TreeEntry *> Filtered;
for (TreeEntry *TE : OrderedEntries) {
if (!(TE->State == TreeEntry::Vectorize ||
@@ -3362,10 +4026,17 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
// Erase filtered entries.
for_each(Filtered,
[&OrderedEntries](TreeEntry *TE) { OrderedEntries.remove(TE); });
- for (const auto &Data : Users) {
+ SmallVector<
+ std::pair<TreeEntry *, SmallVector<std::pair<unsigned, TreeEntry *>>>>
+ UsersVec(Users.begin(), Users.end());
+ sort(UsersVec, [](const auto &Data1, const auto &Data2) {
+ return Data1.first->Idx > Data2.first->Idx;
+ });
+ for (auto &Data : UsersVec) {
// Check that operands are used only in the User node.
SmallVector<TreeEntry *> GatherOps;
- if (!CheckOperands(Data, GatherOps)) {
+ if (!canReorderOperands(Data.first, Data.second, NonVectorized,
+ GatherOps)) {
for_each(Data.second,
[&OrderedEntries](const std::pair<unsigned, TreeEntry *> &Op) {
OrderedEntries.remove(Op.second);
@@ -3381,18 +4052,22 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
// the same node my be considered several times, though might be not
// profitable.
SmallPtrSet<const TreeEntry *, 4> VisitedOps;
+ SmallPtrSet<const TreeEntry *, 4> VisitedUsers;
for (const auto &Op : Data.second) {
TreeEntry *OpTE = Op.second;
if (!VisitedOps.insert(OpTE).second)
continue;
- if (!OpTE->ReuseShuffleIndices.empty() ||
- (IgnoreReorder && OpTE == VectorizableTree.front().get()))
+ if (!OpTE->ReuseShuffleIndices.empty())
continue;
const auto &Order = [OpTE, &GathersToOrders]() -> const OrdersType & {
if (OpTE->State == TreeEntry::NeedToGather)
return GathersToOrders.find(OpTE)->second;
return OpTE->ReorderIndices;
}();
+ unsigned NumOps = count_if(
+ Data.second, [OpTE](const std::pair<unsigned, TreeEntry *> &P) {
+ return P.second == OpTE;
+ });
// Stores actually store the mask, not the order, need to invert.
if (OpTE->State == TreeEntry::Vectorize && !OpTE->isAltShuffle() &&
OpTE->getOpcode() == Instruction::Store && !Order.empty()) {
@@ -3404,14 +4079,52 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
return Idx == UndefMaskElem ? E : static_cast<unsigned>(Idx);
});
fixupOrderingIndices(CurrentOrder);
- ++OrdersUses.insert(std::make_pair(CurrentOrder, 0)).first->second;
+ OrdersUses.insert(std::make_pair(CurrentOrder, 0)).first->second +=
+ NumOps;
} else {
- ++OrdersUses.insert(std::make_pair(Order, 0)).first->second;
+ OrdersUses.insert(std::make_pair(Order, 0)).first->second += NumOps;
+ }
+ auto Res = OrdersUses.insert(std::make_pair(OrdersType(), 0));
+ const auto &&AllowsReordering = [IgnoreReorder, &GathersToOrders](
+ const TreeEntry *TE) {
+ if (!TE->ReorderIndices.empty() || !TE->ReuseShuffleIndices.empty() ||
+ (TE->State == TreeEntry::Vectorize && TE->isAltShuffle()) ||
+ (IgnoreReorder && TE->Idx == 0))
+ return true;
+ if (TE->State == TreeEntry::NeedToGather) {
+ auto It = GathersToOrders.find(TE);
+ if (It != GathersToOrders.end())
+ return !It->second.empty();
+ return true;
+ }
+ return false;
+ };
+ for (const EdgeInfo &EI : OpTE->UserTreeIndices) {
+ TreeEntry *UserTE = EI.UserTE;
+ if (!VisitedUsers.insert(UserTE).second)
+ continue;
+ // May reorder user node if it requires reordering, has reused
+ // scalars, is an alternate op vectorize node or its op nodes require
+ // reordering.
+ if (AllowsReordering(UserTE))
+ continue;
+ // Check if users allow reordering.
+ // Currently look up just 1 level of operands to avoid increase of
+ // the compile time.
+ // Profitable to reorder if definitely more operands allow
+ // reordering rather than those with natural order.
+ ArrayRef<std::pair<unsigned, TreeEntry *>> Ops = Users[UserTE];
+ if (static_cast<unsigned>(count_if(
+ Ops, [UserTE, &AllowsReordering](
+ const std::pair<unsigned, TreeEntry *> &Op) {
+ return AllowsReordering(Op.second) &&
+ all_of(Op.second->UserTreeIndices,
+ [UserTE](const EdgeInfo &EI) {
+ return EI.UserTE == UserTE;
+ });
+ })) <= Ops.size() / 2)
+ ++Res.first->second;
}
- OrdersUses.insert(std::make_pair(OrdersType(), 0)).first->second +=
- OpTE->UserTreeIndices.size();
- assert(OrdersUses[{}] > 0 && "Counter cannot be less than 0.");
- --OrdersUses[{}];
}
// If no orders - skip current nodes and jump to the next one, if any.
if (OrdersUses.empty()) {
@@ -3452,7 +4165,7 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
OrderedEntries.remove(TE);
if (!VisitedOps.insert(TE).second)
continue;
- if (!TE->ReuseShuffleIndices.empty() && TE->ReorderIndices.empty()) {
+ if (TE->ReuseShuffleIndices.size() == BestOrder.size()) {
// Just reorder reuses indices.
reorderReuses(TE->ReuseShuffleIndices, Mask);
continue;
@@ -3464,6 +4177,8 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
TE->ReorderIndices.empty()) &&
"Non-matching sizes of user/operand entries.");
reorderOrder(TE->ReorderIndices, Mask);
+ if (IgnoreReorder && TE == VectorizableTree.front().get())
+ IgnoreReorder = false;
}
// For gathers just need to reorder its scalars.
for (TreeEntry *Gather : GatherOps) {
@@ -3555,7 +4270,7 @@ void BoUpSLP::buildExternalUses(
}
// Ignore users in the user ignore list.
- if (is_contained(UserIgnoreList, UserInst))
+ if (UserIgnoreList && UserIgnoreList->contains(UserInst))
continue;
LLVM_DEBUG(dbgs() << "SLP: Need to extract:" << *U << " from lane "
@@ -3566,78 +4281,270 @@ void BoUpSLP::buildExternalUses(
}
}
+DenseMap<Value *, SmallVector<StoreInst *, 4>>
+BoUpSLP::collectUserStores(const BoUpSLP::TreeEntry *TE) const {
+ DenseMap<Value *, SmallVector<StoreInst *, 4>> PtrToStoresMap;
+ for (unsigned Lane : seq<unsigned>(0, TE->Scalars.size())) {
+ Value *V = TE->Scalars[Lane];
+ // To save compilation time we don't visit if we have too many users.
+ static constexpr unsigned UsersLimit = 4;
+ if (V->hasNUsesOrMore(UsersLimit))
+ break;
+
+ // Collect stores per pointer object.
+ for (User *U : V->users()) {
+ auto *SI = dyn_cast<StoreInst>(U);
+ if (SI == nullptr || !SI->isSimple() ||
+ !isValidElementType(SI->getValueOperand()->getType()))
+ continue;
+ // Skip entry if already
+ if (getTreeEntry(U))
+ continue;
+
+ Value *Ptr = getUnderlyingObject(SI->getPointerOperand());
+ auto &StoresVec = PtrToStoresMap[Ptr];
+ // For now just keep one store per pointer object per lane.
+ // TODO: Extend this to support multiple stores per pointer per lane
+ if (StoresVec.size() > Lane)
+ continue;
+ // Skip if in different BBs.
+ if (!StoresVec.empty() &&
+ SI->getParent() != StoresVec.back()->getParent())
+ continue;
+ // Make sure that the stores are of the same type.
+ if (!StoresVec.empty() &&
+ SI->getValueOperand()->getType() !=
+ StoresVec.back()->getValueOperand()->getType())
+ continue;
+ StoresVec.push_back(SI);
+ }
+ }
+ return PtrToStoresMap;
+}
+
+bool BoUpSLP::CanFormVector(const SmallVector<StoreInst *, 4> &StoresVec,
+ OrdersType &ReorderIndices) const {
+ // We check whether the stores in StoreVec can form a vector by sorting them
+ // and checking whether they are consecutive.
+
+ // To avoid calling getPointersDiff() while sorting we create a vector of
+ // pairs {store, offset from first} and sort this instead.
+ SmallVector<std::pair<StoreInst *, int>, 4> StoreOffsetVec(StoresVec.size());
+ StoreInst *S0 = StoresVec[0];
+ StoreOffsetVec[0] = {S0, 0};
+ Type *S0Ty = S0->getValueOperand()->getType();
+ Value *S0Ptr = S0->getPointerOperand();
+ for (unsigned Idx : seq<unsigned>(1, StoresVec.size())) {
+ StoreInst *SI = StoresVec[Idx];
+ Optional<int> Diff =
+ getPointersDiff(S0Ty, S0Ptr, SI->getValueOperand()->getType(),
+ SI->getPointerOperand(), *DL, *SE,
+ /*StrictCheck=*/true);
+ // We failed to compare the pointers so just abandon this StoresVec.
+ if (!Diff)
+ return false;
+ StoreOffsetVec[Idx] = {StoresVec[Idx], *Diff};
+ }
+
+ // Sort the vector based on the pointers. We create a copy because we may
+ // need the original later for calculating the reorder (shuffle) indices.
+ stable_sort(StoreOffsetVec, [](const std::pair<StoreInst *, int> &Pair1,
+ const std::pair<StoreInst *, int> &Pair2) {
+ int Offset1 = Pair1.second;
+ int Offset2 = Pair2.second;
+ return Offset1 < Offset2;
+ });
+
+ // Check if the stores are consecutive by checking if their difference is 1.
+ for (unsigned Idx : seq<unsigned>(1, StoreOffsetVec.size()))
+ if (StoreOffsetVec[Idx].second != StoreOffsetVec[Idx-1].second + 1)
+ return false;
+
+ // Calculate the shuffle indices according to their offset against the sorted
+ // StoreOffsetVec.
+ ReorderIndices.reserve(StoresVec.size());
+ for (StoreInst *SI : StoresVec) {
+ unsigned Idx = find_if(StoreOffsetVec,
+ [SI](const std::pair<StoreInst *, int> &Pair) {
+ return Pair.first == SI;
+ }) -
+ StoreOffsetVec.begin();
+ ReorderIndices.push_back(Idx);
+ }
+ // Identity order (e.g., {0,1,2,3}) is modeled as an empty OrdersType in
+ // reorderTopToBottom() and reorderBottomToTop(), so we are following the
+ // same convention here.
+ auto IsIdentityOrder = [](const OrdersType &Order) {
+ for (unsigned Idx : seq<unsigned>(0, Order.size()))
+ if (Idx != Order[Idx])
+ return false;
+ return true;
+ };
+ if (IsIdentityOrder(ReorderIndices))
+ ReorderIndices.clear();
+
+ return true;
+}
+
+#ifndef NDEBUG
+LLVM_DUMP_METHOD static void dumpOrder(const BoUpSLP::OrdersType &Order) {
+ for (unsigned Idx : Order)
+ dbgs() << Idx << ", ";
+ dbgs() << "\n";
+}
+#endif
+
+SmallVector<BoUpSLP::OrdersType, 1>
+BoUpSLP::findExternalStoreUsersReorderIndices(TreeEntry *TE) const {
+ unsigned NumLanes = TE->Scalars.size();
+
+ DenseMap<Value *, SmallVector<StoreInst *, 4>> PtrToStoresMap =
+ collectUserStores(TE);
+
+ // Holds the reorder indices for each candidate store vector that is a user of
+ // the current TreeEntry.
+ SmallVector<OrdersType, 1> ExternalReorderIndices;
+
+ // Now inspect the stores collected per pointer and look for vectorization
+ // candidates. For each candidate calculate the reorder index vector and push
+ // it into `ExternalReorderIndices`
+ for (const auto &Pair : PtrToStoresMap) {
+ auto &StoresVec = Pair.second;
+ // If we have fewer than NumLanes stores, then we can't form a vector.
+ if (StoresVec.size() != NumLanes)
+ continue;
+
+ // If the stores are not consecutive then abandon this StoresVec.
+ OrdersType ReorderIndices;
+ if (!CanFormVector(StoresVec, ReorderIndices))
+ continue;
+
+ // We now know that the scalars in StoresVec can form a vector instruction,
+ // so set the reorder indices.
+ ExternalReorderIndices.push_back(ReorderIndices);
+ }
+ return ExternalReorderIndices;
+}
+
void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
- ArrayRef<Value *> UserIgnoreLst) {
+ const SmallDenseSet<Value *> &UserIgnoreLst) {
deleteTree();
- UserIgnoreList = UserIgnoreLst;
+ UserIgnoreList = &UserIgnoreLst;
if (!allSameType(Roots))
return;
buildTree_rec(Roots, 0, EdgeInfo());
}
-namespace {
-/// Tracks the state we can represent the loads in the given sequence.
-enum class LoadsState { Gather, Vectorize, ScatterVectorize };
-} // anonymous namespace
-
-/// Checks if the given array of loads can be represented as a vectorized,
-/// scatter or just simple gather.
-static LoadsState canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
- const TargetTransformInfo &TTI,
- const DataLayout &DL, ScalarEvolution &SE,
- SmallVectorImpl<unsigned> &Order,
- SmallVectorImpl<Value *> &PointerOps) {
- // Check that a vectorized load would load the same memory as a scalar
- // load. For example, we don't want to vectorize loads that are smaller
- // than 8-bit. Even though we have a packed struct {<i2, i2, i2, i2>} LLVM
- // treats loading/storing it as an i8 struct. If we vectorize loads/stores
- // from such a struct, we read/write packed bits disagreeing with the
- // unvectorized version.
- Type *ScalarTy = VL0->getType();
-
- if (DL.getTypeSizeInBits(ScalarTy) != DL.getTypeAllocSizeInBits(ScalarTy))
- return LoadsState::Gather;
+void BoUpSLP::buildTree(ArrayRef<Value *> Roots) {
+ deleteTree();
+ if (!allSameType(Roots))
+ return;
+ buildTree_rec(Roots, 0, EdgeInfo());
+}
- // Make sure all loads in the bundle are simple - we can't vectorize
- // atomic or volatile loads.
- PointerOps.clear();
- PointerOps.resize(VL.size());
- auto *POIter = PointerOps.begin();
+/// \return true if the specified list of values has only one instruction that
+/// requires scheduling, false otherwise.
+#ifndef NDEBUG
+static bool needToScheduleSingleInstruction(ArrayRef<Value *> VL) {
+ Value *NeedsScheduling = nullptr;
for (Value *V : VL) {
- auto *L = cast<LoadInst>(V);
- if (!L->isSimple())
- return LoadsState::Gather;
- *POIter = L->getPointerOperand();
- ++POIter;
+ if (doesNotNeedToBeScheduled(V))
+ continue;
+ if (!NeedsScheduling) {
+ NeedsScheduling = V;
+ continue;
+ }
+ return false;
}
+ return NeedsScheduling;
+}
+#endif
- Order.clear();
- // Check the order of pointer operands.
- if (llvm::sortPtrAccesses(PointerOps, ScalarTy, DL, SE, Order)) {
- Value *Ptr0;
- Value *PtrN;
- if (Order.empty()) {
- Ptr0 = PointerOps.front();
- PtrN = PointerOps.back();
+/// Generates key/subkey pair for the given value to provide effective sorting
+/// of the values and better detection of the vectorizable values sequences. The
+/// keys/subkeys can be used for better sorting of the values themselves (keys)
+/// and in values subgroups (subkeys).
+static std::pair<size_t, size_t> generateKeySubkey(
+ Value *V, const TargetLibraryInfo *TLI,
+ function_ref<hash_code(size_t, LoadInst *)> LoadsSubkeyGenerator,
+ bool AllowAlternate) {
+ hash_code Key = hash_value(V->getValueID() + 2);
+ hash_code SubKey = hash_value(0);
+ // Sort the loads by the distance between the pointers.
+ if (auto *LI = dyn_cast<LoadInst>(V)) {
+ Key = hash_combine(hash_value(Instruction::Load), Key);
+ if (LI->isSimple())
+ SubKey = hash_value(LoadsSubkeyGenerator(Key, LI));
+ else
+ SubKey = hash_value(LI);
+ } else if (isVectorLikeInstWithConstOps(V)) {
+ // Sort extracts by the vector operands.
+ if (isa<ExtractElementInst, UndefValue>(V))
+ Key = hash_value(Value::UndefValueVal + 1);
+ if (auto *EI = dyn_cast<ExtractElementInst>(V)) {
+ if (!isUndefVector(EI->getVectorOperand()) &&
+ !isa<UndefValue>(EI->getIndexOperand()))
+ SubKey = hash_value(EI->getVectorOperand());
+ }
+ } else if (auto *I = dyn_cast<Instruction>(V)) {
+ // Sort other instructions just by the opcodes except for CMPInst.
+ // For CMP also sort by the predicate kind.
+ if ((isa<BinaryOperator>(I) || isa<CastInst>(I)) &&
+ isValidForAlternation(I->getOpcode())) {
+ if (AllowAlternate)
+ Key = hash_value(isa<BinaryOperator>(I) ? 1 : 0);
+ else
+ Key = hash_combine(hash_value(I->getOpcode()), Key);
+ SubKey = hash_combine(
+ hash_value(I->getOpcode()), hash_value(I->getType()),
+ hash_value(isa<BinaryOperator>(I)
+ ? I->getType()
+ : cast<CastInst>(I)->getOperand(0)->getType()));
+ // For casts, look through the only operand to improve compile time.
+ if (isa<CastInst>(I)) {
+ std::pair<size_t, size_t> OpVals =
+ generateKeySubkey(I->getOperand(0), TLI, LoadsSubkeyGenerator,
+ /*=AllowAlternate*/ true);
+ Key = hash_combine(OpVals.first, Key);
+ SubKey = hash_combine(OpVals.first, SubKey);
+ }
+ } else if (auto *CI = dyn_cast<CmpInst>(I)) {
+ CmpInst::Predicate Pred = CI->getPredicate();
+ if (CI->isCommutative())
+ Pred = std::min(Pred, CmpInst::getInversePredicate(Pred));
+ CmpInst::Predicate SwapPred = CmpInst::getSwappedPredicate(Pred);
+ SubKey = hash_combine(hash_value(I->getOpcode()), hash_value(Pred),
+ hash_value(SwapPred),
+ hash_value(CI->getOperand(0)->getType()));
+ } else if (auto *Call = dyn_cast<CallInst>(I)) {
+ Intrinsic::ID ID = getVectorIntrinsicIDForCall(Call, TLI);
+ if (isTriviallyVectorizable(ID)) {
+ SubKey = hash_combine(hash_value(I->getOpcode()), hash_value(ID));
+ } else if (!VFDatabase(*Call).getMappings(*Call).empty()) {
+ SubKey = hash_combine(hash_value(I->getOpcode()),
+ hash_value(Call->getCalledFunction()));
+ } else {
+ Key = hash_combine(hash_value(Call), Key);
+ SubKey = hash_combine(hash_value(I->getOpcode()), hash_value(Call));
+ }
+ for (const CallBase::BundleOpInfo &Op : Call->bundle_op_infos())
+ SubKey = hash_combine(hash_value(Op.Begin), hash_value(Op.End),
+ hash_value(Op.Tag), SubKey);
+ } else if (auto *Gep = dyn_cast<GetElementPtrInst>(I)) {
+ if (Gep->getNumOperands() == 2 && isa<ConstantInt>(Gep->getOperand(1)))
+ SubKey = hash_value(Gep->getPointerOperand());
+ else
+ SubKey = hash_value(Gep);
+ } else if (BinaryOperator::isIntDivRem(I->getOpcode()) &&
+ !isa<ConstantInt>(I->getOperand(1))) {
+ // Do not try to vectorize instructions with potentially high cost.
+ SubKey = hash_value(I);
} else {
- Ptr0 = PointerOps[Order.front()];
- PtrN = PointerOps[Order.back()];
+ SubKey = hash_value(I->getOpcode());
}
- Optional<int> Diff =
- getPointersDiff(ScalarTy, Ptr0, ScalarTy, PtrN, DL, SE);
- // Check that the sorted loads are consecutive.
- if (static_cast<unsigned>(*Diff) == VL.size() - 1)
- return LoadsState::Vectorize;
- Align CommonAlignment = cast<LoadInst>(VL0)->getAlign();
- for (Value *V : VL)
- CommonAlignment =
- commonAlignment(CommonAlignment, cast<LoadInst>(V)->getAlign());
- if (TTI.isLegalMaskedGather(FixedVectorType::get(ScalarTy, VL.size()),
- CommonAlignment))
- return LoadsState::ScatterVectorize;
+ Key = hash_combine(hash_value(I->getParent()), Key);
}
-
- return LoadsState::Gather;
+ return std::make_pair(Key, SubKey);
}
void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
@@ -3722,10 +4629,84 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// If all of the operands are identical or constant we have a simple solution.
// If we deal with insert/extract instructions, they all must have constant
// indices, otherwise we should gather them, not try to vectorize.
- if (allConstant(VL) || isSplat(VL) || !allSameBlock(VL) || !S.getOpcode() ||
- (isa<InsertElementInst, ExtractValueInst, ExtractElementInst>(S.MainOp) &&
- !all_of(VL, isVectorLikeInstWithConstOps))) {
- LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n");
+ // If alternate op node with 2 elements with gathered operands - do not
+ // vectorize.
+ auto &&NotProfitableForVectorization = [&S, this,
+ Depth](ArrayRef<Value *> VL) {
+ if (!S.getOpcode() || !S.isAltShuffle() || VL.size() > 2)
+ return false;
+ if (VectorizableTree.size() < MinTreeSize)
+ return false;
+ if (Depth >= RecursionMaxDepth - 1)
+ return true;
+ // Check if all operands are extracts, part of vector node or can build a
+ // regular vectorize node.
+ SmallVector<unsigned, 2> InstsCount(VL.size(), 0);
+ for (Value *V : VL) {
+ auto *I = cast<Instruction>(V);
+ InstsCount.push_back(count_if(I->operand_values(), [](Value *Op) {
+ return isa<Instruction>(Op) || isVectorLikeInstWithConstOps(Op);
+ }));
+ }
+ bool IsCommutative = isCommutative(S.MainOp) || isCommutative(S.AltOp);
+ if ((IsCommutative &&
+ std::accumulate(InstsCount.begin(), InstsCount.end(), 0) < 2) ||
+ (!IsCommutative &&
+ all_of(InstsCount, [](unsigned ICnt) { return ICnt < 2; })))
+ return true;
+ assert(VL.size() == 2 && "Expected only 2 alternate op instructions.");
+ SmallVector<SmallVector<std::pair<Value *, Value *>>> Candidates;
+ auto *I1 = cast<Instruction>(VL.front());
+ auto *I2 = cast<Instruction>(VL.back());
+ for (int Op = 0, E = S.MainOp->getNumOperands(); Op < E; ++Op)
+ Candidates.emplace_back().emplace_back(I1->getOperand(Op),
+ I2->getOperand(Op));
+ if (static_cast<unsigned>(count_if(
+ Candidates, [this](ArrayRef<std::pair<Value *, Value *>> Cand) {
+ return findBestRootPair(Cand, LookAheadHeuristics::ScoreSplat);
+ })) >= S.MainOp->getNumOperands() / 2)
+ return false;
+ if (S.MainOp->getNumOperands() > 2)
+ return true;
+ if (IsCommutative) {
+ // Check permuted operands.
+ Candidates.clear();
+ for (int Op = 0, E = S.MainOp->getNumOperands(); Op < E; ++Op)
+ Candidates.emplace_back().emplace_back(I1->getOperand(Op),
+ I2->getOperand((Op + 1) % E));
+ if (any_of(
+ Candidates, [this](ArrayRef<std::pair<Value *, Value *>> Cand) {
+ return findBestRootPair(Cand, LookAheadHeuristics::ScoreSplat);
+ }))
+ return false;
+ }
+ return true;
+ };
+ SmallVector<unsigned> SortedIndices;
+ BasicBlock *BB = nullptr;
+ bool AreAllSameInsts =
+ (S.getOpcode() && allSameBlock(VL)) ||
+ (S.OpValue->getType()->isPointerTy() && UserTreeIdx.UserTE &&
+ UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize &&
+ VL.size() > 2 &&
+ all_of(VL,
+ [&BB](Value *V) {
+ auto *I = dyn_cast<GetElementPtrInst>(V);
+ if (!I)
+ return doesNotNeedToBeScheduled(V);
+ if (!BB)
+ BB = I->getParent();
+ return BB == I->getParent() && I->getNumOperands() == 2;
+ }) &&
+ BB &&
+ sortPtrAccesses(VL, UserTreeIdx.UserTE->getMainOp()->getType(), *DL, *SE,
+ SortedIndices));
+ if (allConstant(VL) || isSplat(VL) || !AreAllSameInsts ||
+ (isa<InsertElementInst, ExtractValueInst, ExtractElementInst>(
+ S.OpValue) &&
+ !all_of(VL, isVectorLikeInstWithConstOps)) ||
+ NotProfitableForVectorization(VL)) {
+ LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O, small shuffle. \n");
if (TryToFindDuplicates(S))
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
@@ -3736,12 +4717,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// the same block.
// Don't vectorize ephemeral values.
- for (Value *V : VL) {
- if (EphValues.count(V)) {
- LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V
- << ") is ephemeral.\n");
- newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
- return;
+ if (!EphValues.empty()) {
+ for (Value *V : VL) {
+ if (EphValues.count(V)) {
+ LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V
+ << ") is ephemeral.\n");
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
+ return;
+ }
}
}
@@ -3779,20 +4762,37 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
// The reduction nodes (stored in UserIgnoreList) also should stay scalar.
- for (Value *V : VL) {
- if (is_contained(UserIgnoreList, V)) {
- LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n");
- if (TryToFindDuplicates(S))
- newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndicies);
- return;
+ if (UserIgnoreList && !UserIgnoreList->empty()) {
+ for (Value *V : VL) {
+ if (UserIgnoreList && UserIgnoreList->contains(V)) {
+ LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n");
+ if (TryToFindDuplicates(S))
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
+ return;
+ }
}
}
+ // Special processing for sorted pointers for ScatterVectorize node with
+ // constant indeces only.
+ if (AreAllSameInsts && !(S.getOpcode() && allSameBlock(VL)) &&
+ UserTreeIdx.UserTE &&
+ UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize) {
+ assert(S.OpValue->getType()->isPointerTy() &&
+ count_if(VL, [](Value *V) { return isa<GetElementPtrInst>(V); }) >=
+ 2 &&
+ "Expected pointers only.");
+ // Reset S to make it GetElementPtr kind of node.
+ const auto *It = find_if(VL, [](Value *V) { return isa<GetElementPtrInst>(V); });
+ assert(It != VL.end() && "Expected at least one GEP.");
+ S = getSameOpcode(*It);
+ }
+
// Check that all of the users of the scalars that we want to vectorize are
// schedulable.
auto *VL0 = cast<Instruction>(S.OpValue);
- BasicBlock *BB = VL0->getParent();
+ BB = VL0->getParent();
if (!DT->isReachableFromEntry(BB)) {
// Don't go into unreachable blocks. They may contain instructions with
@@ -3810,9 +4810,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
if (!BSRef)
BSRef = std::make_unique<BlockScheduling>(BB);
- BlockScheduling &BS = *BSRef.get();
+ BlockScheduling &BS = *BSRef;
Optional<ScheduleData *> Bundle = BS.tryScheduleBundle(VL, this, S);
+#ifdef EXPENSIVE_CHECKS
+ // Make sure we didn't break any internal invariants
+ BS.verify();
+#endif
if (!Bundle) {
LLVM_DEBUG(dbgs() << "SLP: We are not able to schedule this bundle!\n");
assert((!BS.getScheduleData(VL0) ||
@@ -3832,10 +4836,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// Check for terminator values (e.g. invoke).
for (Value *V : VL)
- for (unsigned I = 0, E = PH->getNumIncomingValues(); I < E; ++I) {
- Instruction *Term = dyn_cast<Instruction>(
- cast<PHINode>(V)->getIncomingValueForBlock(
- PH->getIncomingBlock(I)));
+ for (Value *Incoming : cast<PHINode>(V)->incoming_values()) {
+ Instruction *Term = dyn_cast<Instruction>(Incoming);
if (Term && Term->isTerminator()) {
LLVM_DEBUG(dbgs()
<< "SLP: Need to swizzle PHINodes (terminator use).\n");
@@ -3918,13 +4920,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// Check that we have a buildvector and not a shuffle of 2 or more
// different vectors.
ValueSet SourceVectors;
- int MinIdx = std::numeric_limits<int>::max();
for (Value *V : VL) {
SourceVectors.insert(cast<Instruction>(V)->getOperand(0));
- Optional<int> Idx = *getInsertIndex(V, 0);
- if (!Idx || *Idx == UndefMaskElem)
- continue;
- MinIdx = std::min(MinIdx, *Idx);
+ assert(getInsertIndex(V) != None && "Non-constant or undef index?");
}
if (count_if(VL, [&SourceVectors](Value *V) {
@@ -3946,10 +4944,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
decltype(OrdCompare)>
Indices(OrdCompare);
for (int I = 0, E = VL.size(); I < E; ++I) {
- Optional<int> Idx = *getInsertIndex(VL[I], 0);
- if (!Idx || *Idx == UndefMaskElem)
- continue;
- Indices.emplace(*Idx, I);
+ unsigned Idx = *getInsertIndex(VL[I]);
+ Indices.emplace(Idx, I);
}
OrdersType CurrentOrder(VL.size(), VL.size());
bool IsIdentity = true;
@@ -3985,7 +4981,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
SmallVector<Value *> PointerOps;
OrdersType CurrentOrder;
TreeEntry *TE = nullptr;
- switch (canVectorizeLoads(VL, VL0, *TTI, *DL, *SE, CurrentOrder,
+ switch (canVectorizeLoads(VL, VL0, *TTI, *DL, *SE, *LI, CurrentOrder,
PointerOps)) {
case LoadsState::Vectorize:
if (CurrentOrder.empty()) {
@@ -4166,7 +5162,10 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
case Instruction::GetElementPtr: {
// We don't combine GEPs with complicated (nested) indexing.
for (Value *V : VL) {
- if (cast<Instruction>(V)->getNumOperands() != 2) {
+ auto *I = dyn_cast<GetElementPtrInst>(V);
+ if (!I)
+ continue;
+ if (I->getNumOperands() != 2) {
LLVM_DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n");
BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
@@ -4177,9 +5176,12 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// We can't combine several GEPs into one vector if they operate on
// different types.
- Type *Ty0 = VL0->getOperand(0)->getType();
+ Type *Ty0 = cast<GEPOperator>(VL0)->getSourceElementType();
for (Value *V : VL) {
- Type *CurTy = cast<Instruction>(V)->getOperand(0)->getType();
+ auto *GEP = dyn_cast<GEPOperator>(V);
+ if (!GEP)
+ continue;
+ Type *CurTy = GEP->getSourceElementType();
if (Ty0 != CurTy) {
LLVM_DEBUG(dbgs()
<< "SLP: not-vectorizable GEP (different types).\n");
@@ -4190,15 +5192,22 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
}
+ bool IsScatterUser =
+ UserTreeIdx.UserTE &&
+ UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize;
// We don't combine GEPs with non-constant indexes.
Type *Ty1 = VL0->getOperand(1)->getType();
for (Value *V : VL) {
- auto Op = cast<Instruction>(V)->getOperand(1);
- if (!isa<ConstantInt>(Op) ||
+ auto *I = dyn_cast<GetElementPtrInst>(V);
+ if (!I)
+ continue;
+ auto *Op = I->getOperand(1);
+ if ((!IsScatterUser && !isa<ConstantInt>(Op)) ||
(Op->getType() != Ty1 &&
- Op->getType()->getScalarSizeInBits() >
- DL->getIndexSizeInBits(
- V->getType()->getPointerAddressSpace()))) {
+ ((IsScatterUser && !isa<ConstantInt>(Op)) ||
+ Op->getType()->getScalarSizeInBits() >
+ DL->getIndexSizeInBits(
+ V->getType()->getPointerAddressSpace())))) {
LLVM_DEBUG(dbgs()
<< "SLP: not-vectorizable GEP (non-constant indexes).\n");
BS.cancelScheduling(VL, VL0);
@@ -4213,9 +5222,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
LLVM_DEBUG(dbgs() << "SLP: added a vector of GEPs.\n");
SmallVector<ValueList, 2> Operands(2);
// Prepare the operand vector for pointer operands.
- for (Value *V : VL)
- Operands.front().push_back(
- cast<GetElementPtrInst>(V)->getPointerOperand());
+ for (Value *V : VL) {
+ auto *GEP = dyn_cast<GetElementPtrInst>(V);
+ if (!GEP) {
+ Operands.front().push_back(V);
+ continue;
+ }
+ Operands.front().push_back(GEP->getPointerOperand());
+ }
TE->setOperand(0, Operands.front());
// Need to cast all indices to the same type before vectorization to
// avoid crash.
@@ -4226,9 +5240,10 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
Type *VL0Ty = VL0->getOperand(IndexIdx)->getType();
Type *Ty = all_of(VL,
[VL0Ty, IndexIdx](Value *V) {
- return VL0Ty == cast<GetElementPtrInst>(V)
- ->getOperand(IndexIdx)
- ->getType();
+ auto *GEP = dyn_cast<GetElementPtrInst>(V);
+ if (!GEP)
+ return true;
+ return VL0Ty == GEP->getOperand(IndexIdx)->getType();
})
? VL0Ty
: DL->getIndexType(cast<GetElementPtrInst>(VL0)
@@ -4236,10 +5251,19 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
->getScalarType());
// Prepare the operand vector.
for (Value *V : VL) {
- auto *Op = cast<Instruction>(V)->getOperand(IndexIdx);
- auto *CI = cast<ConstantInt>(Op);
- Operands.back().push_back(ConstantExpr::getIntegerCast(
- CI, Ty, CI->getValue().isSignBitSet()));
+ auto *I = dyn_cast<GetElementPtrInst>(V);
+ if (!I) {
+ Operands.back().push_back(
+ ConstantInt::get(Ty, 0, /*isSigned=*/false));
+ continue;
+ }
+ auto *Op = I->getOperand(IndexIdx);
+ auto *CI = dyn_cast<ConstantInt>(Op);
+ if (!CI)
+ Operands.back().push_back(Op);
+ else
+ Operands.back().push_back(ConstantExpr::getIntegerCast(
+ CI, Ty, CI->getValue().isSignBitSet()));
}
TE->setOperand(IndexIdx, Operands.back());
@@ -4345,7 +5369,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
unsigned NumArgs = CI->arg_size();
SmallVector<Value*, 4> ScalarArgs(NumArgs, nullptr);
for (unsigned j = 0; j != NumArgs; ++j)
- if (hasVectorInstrinsicScalarOpd(ID, j))
+ if (isVectorIntrinsicWithScalarOpAtArg(ID, j))
ScalarArgs[j] = CI->getArgOperand(j);
for (Value *V : VL) {
CallInst *CI2 = dyn_cast<CallInst>(V);
@@ -4364,7 +5388,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// Some intrinsics have scalar arguments and should be same in order for
// them to be vectorized.
for (unsigned j = 0; j != NumArgs; ++j) {
- if (hasVectorInstrinsicScalarOpd(ID, j)) {
+ if (isVectorIntrinsicWithScalarOpAtArg(ID, j)) {
Value *A1J = CI2->getArgOperand(j);
if (ScalarArgs[j] != A1J) {
BS.cancelScheduling(VL, VL0);
@@ -4397,7 +5421,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
for (unsigned i = 0, e = CI->arg_size(); i != e; ++i) {
// For scalar operands no need to to create an entry since no need to
// vectorize it.
- if (hasVectorInstrinsicScalarOpd(ID, i))
+ if (isVectorIntrinsicWithScalarOpAtArg(ID, i))
continue;
ValueList Operands;
// Prepare the operand vector.
@@ -4434,6 +5458,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
} else {
CmpInst::Predicate P0 = CI->getPredicate();
CmpInst::Predicate AltP0 = cast<CmpInst>(S.AltOp)->getPredicate();
+ assert(P0 != AltP0 &&
+ "Expected different main/alternate predicates.");
CmpInst::Predicate AltP0Swapped = CmpInst::getSwappedPredicate(AltP0);
Value *BaseOp0 = VL0->getOperand(0);
Value *BaseOp1 = VL0->getOperand(1);
@@ -4443,16 +5469,15 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
auto *Cmp = cast<CmpInst>(V);
Value *LHS = Cmp->getOperand(0);
Value *RHS = Cmp->getOperand(1);
- CmpInst::Predicate CurrentPred = CI->getPredicate();
- CmpInst::Predicate CurrentPredSwapped =
- CmpInst::getSwappedPredicate(CurrentPred);
- if (P0 == AltP0 || P0 == AltP0Swapped) {
- if ((P0 == CurrentPred &&
- !areCompatibleCmpOps(BaseOp0, BaseOp1, LHS, RHS)) ||
- (P0 == CurrentPredSwapped &&
- !areCompatibleCmpOps(BaseOp0, BaseOp1, RHS, LHS)))
+ CmpInst::Predicate CurrentPred = Cmp->getPredicate();
+ if (P0 == AltP0Swapped) {
+ if (CI != Cmp && S.AltOp != Cmp &&
+ ((P0 == CurrentPred &&
+ !areCompatibleCmpOps(BaseOp0, BaseOp1, LHS, RHS)) ||
+ (AltP0 == CurrentPred &&
+ areCompatibleCmpOps(BaseOp0, BaseOp1, LHS, RHS))))
std::swap(LHS, RHS);
- } else if (!areCompatibleCmpOps(BaseOp0, BaseOp1, LHS, RHS)) {
+ } else if (P0 != CurrentPred && AltP0 != CurrentPred) {
std::swap(LHS, RHS);
}
Left.push_back(LHS);
@@ -4602,7 +5627,9 @@ bool BoUpSLP::areAllUsersVectorized(Instruction *I,
ArrayRef<Value *> VectorizedVals) const {
return (I->hasOneUse() && is_contained(VectorizedVals, I)) ||
all_of(I->users(), [this](User *U) {
- return ScalarToTreeEntry.count(U) > 0 || MustGather.contains(U);
+ return ScalarToTreeEntry.count(U) > 0 ||
+ isVectorLikeInstWithConstOps(U) ||
+ (isa<ExtractElementInst>(U) && MustGather.contains(U));
});
}
@@ -4659,19 +5686,21 @@ computeExtractCost(ArrayRef<Value *> VL, FixedVectorType *VecTy,
// Process extracts in blocks of EltsPerVector to check if the source vector
// operand can be re-used directly. If not, add the cost of creating a shuffle
// to extract the values into a vector register.
+ SmallVector<int> RegMask(EltsPerVector, UndefMaskElem);
for (auto *V : VL) {
++Idx;
- // Need to exclude undefs from analysis.
- if (isa<UndefValue>(V) || Mask[Idx] == UndefMaskElem)
- continue;
-
// Reached the start of a new vector registers.
if (Idx % EltsPerVector == 0) {
+ RegMask.assign(EltsPerVector, UndefMaskElem);
AllConsecutive = true;
continue;
}
+ // Need to exclude undefs from analysis.
+ if (isa<UndefValue>(V) || Mask[Idx] == UndefMaskElem)
+ continue;
+
// Check all extracts for a vector register on the target directly
// extract values in order.
unsigned CurrentIdx = *getExtractIndex(cast<Instruction>(V));
@@ -4679,6 +5708,7 @@ computeExtractCost(ArrayRef<Value *> VL, FixedVectorType *VecTy,
unsigned PrevIdx = *getExtractIndex(cast<Instruction>(VL[Idx - 1]));
AllConsecutive &= PrevIdx + 1 == CurrentIdx &&
CurrentIdx % EltsPerVector == Idx % EltsPerVector;
+ RegMask[Idx % EltsPerVector] = CurrentIdx % EltsPerVector;
}
if (AllConsecutive)
@@ -4690,10 +5720,10 @@ computeExtractCost(ArrayRef<Value *> VL, FixedVectorType *VecTy,
// If we have a series of extracts which are not consecutive and hence
// cannot re-use the source vector register directly, compute the shuffle
- // cost to extract the a vector with EltsPerVector elements.
+ // cost to extract the vector with EltsPerVector elements.
Cost += TTI.getShuffleCost(
TargetTransformInfo::SK_PermuteSingleSrc,
- FixedVectorType::get(VecTy->getElementType(), EltsPerVector));
+ FixedVectorType::get(VecTy->getElementType(), EltsPerVector), RegMask);
}
return Cost;
}
@@ -4701,12 +5731,12 @@ computeExtractCost(ArrayRef<Value *> VL, FixedVectorType *VecTy,
/// Build shuffle mask for shuffle graph entries and lists of main and alternate
/// operations operands.
static void
-buildSuffleEntryMask(ArrayRef<Value *> VL, ArrayRef<unsigned> ReorderIndices,
- ArrayRef<int> ReusesIndices,
- const function_ref<bool(Instruction *)> IsAltOp,
- SmallVectorImpl<int> &Mask,
- SmallVectorImpl<Value *> *OpScalars = nullptr,
- SmallVectorImpl<Value *> *AltScalars = nullptr) {
+buildShuffleEntryMask(ArrayRef<Value *> VL, ArrayRef<unsigned> ReorderIndices,
+ ArrayRef<int> ReusesIndices,
+ const function_ref<bool(Instruction *)> IsAltOp,
+ SmallVectorImpl<int> &Mask,
+ SmallVectorImpl<Value *> *OpScalars = nullptr,
+ SmallVectorImpl<Value *> *AltScalars = nullptr) {
unsigned Sz = VL.size();
Mask.assign(Sz, UndefMaskElem);
SmallVector<int> OrderMask;
@@ -4736,6 +5766,29 @@ buildSuffleEntryMask(ArrayRef<Value *> VL, ArrayRef<unsigned> ReorderIndices,
}
}
+/// Checks if the specified instruction \p I is an alternate operation for the
+/// given \p MainOp and \p AltOp instructions.
+static bool isAlternateInstruction(const Instruction *I,
+ const Instruction *MainOp,
+ const Instruction *AltOp) {
+ if (auto *CI0 = dyn_cast<CmpInst>(MainOp)) {
+ auto *AltCI0 = cast<CmpInst>(AltOp);
+ auto *CI = cast<CmpInst>(I);
+ CmpInst::Predicate P0 = CI0->getPredicate();
+ CmpInst::Predicate AltP0 = AltCI0->getPredicate();
+ assert(P0 != AltP0 && "Expected different main/alternate predicates.");
+ CmpInst::Predicate AltP0Swapped = CmpInst::getSwappedPredicate(AltP0);
+ CmpInst::Predicate CurrentPred = CI->getPredicate();
+ if (P0 == AltP0Swapped)
+ return I == AltCI0 ||
+ (I != MainOp &&
+ !areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1),
+ CI->getOperand(0), CI->getOperand(1)));
+ return AltP0 == CurrentPred || AltP0Swapped == CurrentPred;
+ }
+ return I->getOpcode() == AltOp->getOpcode();
+}
+
InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
ArrayRef<Value *> VectorizedVals) {
ArrayRef<Value*> VL = E->Scalars;
@@ -4849,7 +5902,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
SmallVector<const TreeEntry *> Entries;
Optional<TargetTransformInfo::ShuffleKind> Shuffle =
isGatherShuffledEntry(E, Mask, Entries);
- if (Shuffle.hasValue()) {
+ if (Shuffle) {
InstructionCost GatherCost = 0;
if (ShuffleVectorInst::isIdentityMask(Mask)) {
// Perfect match in the graph, will reuse the previously vectorized
@@ -4885,7 +5938,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
SmallVector<int> Mask;
Optional<TargetTransformInfo::ShuffleKind> ShuffleKind =
isFixedVectorShuffle(VL, Mask);
- if (ShuffleKind.hasValue()) {
+ if (ShuffleKind) {
// Found the bunch of extractelement instructions that must be gathered
// into a vector and can be represented as a permutation elements in a
// single input vector or of 2 input vectors.
@@ -4903,7 +5956,9 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
// broadcast.
assert(VecTy == FinalVecTy &&
"No reused scalars expected for broadcast.");
- return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy);
+ return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy,
+ /*Mask=*/None, /*Index=*/0,
+ /*SubTp=*/nullptr, /*Args=*/VL[0]);
}
InstructionCost ReuseShuffleCost = 0;
if (NeedToShuffleReuses)
@@ -4927,8 +5982,9 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
!VectorizedLoads.count(Slice.back()) && allSameBlock(Slice)) {
SmallVector<Value *> PointerOps;
OrdersType CurrentOrder;
- LoadsState LS = canVectorizeLoads(Slice, Slice.front(), *TTI, *DL,
- *SE, CurrentOrder, PointerOps);
+ LoadsState LS =
+ canVectorizeLoads(Slice, Slice.front(), *TTI, *DL, *SE, *LI,
+ CurrentOrder, PointerOps);
switch (LS) {
case LoadsState::Vectorize:
case LoadsState::ScatterVectorize:
@@ -5018,7 +6074,11 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
assert((E->State == TreeEntry::Vectorize ||
E->State == TreeEntry::ScatterVectorize) &&
"Unhandled state");
- assert(E->getOpcode() && allSameType(VL) && allSameBlock(VL) && "Invalid VL");
+ assert(E->getOpcode() &&
+ ((allSameType(VL) && allSameBlock(VL)) ||
+ (E->getOpcode() == Instruction::GetElementPtr &&
+ E->getMainOp()->getType()->isPointerTy())) &&
+ "Invalid VL");
Instruction *VL0 = E->getMainOp();
unsigned ShuffleOrOp =
E->isAltShuffle() ? (unsigned)Instruction::ShuffleVector : E->getOpcode();
@@ -5090,30 +6150,60 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
assert(E->ReuseShuffleIndices.empty() &&
"Unique insertelements only are expected.");
auto *SrcVecTy = cast<FixedVectorType>(VL0->getType());
-
unsigned const NumElts = SrcVecTy->getNumElements();
unsigned const NumScalars = VL.size();
+
+ unsigned NumOfParts = TTI->getNumberOfParts(SrcVecTy);
+
+ unsigned OffsetBeg = *getInsertIndex(VL.front());
+ unsigned OffsetEnd = OffsetBeg;
+ for (Value *V : VL.drop_front()) {
+ unsigned Idx = *getInsertIndex(V);
+ if (OffsetBeg > Idx)
+ OffsetBeg = Idx;
+ else if (OffsetEnd < Idx)
+ OffsetEnd = Idx;
+ }
+ unsigned VecScalarsSz = PowerOf2Ceil(NumElts);
+ if (NumOfParts > 0)
+ VecScalarsSz = PowerOf2Ceil((NumElts + NumOfParts - 1) / NumOfParts);
+ unsigned VecSz =
+ (1 + OffsetEnd / VecScalarsSz - OffsetBeg / VecScalarsSz) *
+ VecScalarsSz;
+ unsigned Offset = VecScalarsSz * (OffsetBeg / VecScalarsSz);
+ unsigned InsertVecSz = std::min<unsigned>(
+ PowerOf2Ceil(OffsetEnd - OffsetBeg + 1),
+ ((OffsetEnd - OffsetBeg + VecScalarsSz) / VecScalarsSz) *
+ VecScalarsSz);
+ bool IsWholeSubvector =
+ OffsetBeg == Offset && ((OffsetEnd + 1) % VecScalarsSz == 0);
+ // Check if we can safely insert a subvector. If it is not possible, just
+ // generate a whole-sized vector and shuffle the source vector and the new
+ // subvector.
+ if (OffsetBeg + InsertVecSz > VecSz) {
+ // Align OffsetBeg to generate correct mask.
+ OffsetBeg = alignDown(OffsetBeg, VecSz, Offset);
+ InsertVecSz = VecSz;
+ }
+
APInt DemandedElts = APInt::getZero(NumElts);
// TODO: Add support for Instruction::InsertValue.
SmallVector<int> Mask;
if (!E->ReorderIndices.empty()) {
inversePermutation(E->ReorderIndices, Mask);
- Mask.append(NumElts - NumScalars, UndefMaskElem);
+ Mask.append(InsertVecSz - Mask.size(), UndefMaskElem);
} else {
- Mask.assign(NumElts, UndefMaskElem);
- std::iota(Mask.begin(), std::next(Mask.begin(), NumScalars), 0);
+ Mask.assign(VecSz, UndefMaskElem);
+ std::iota(Mask.begin(), std::next(Mask.begin(), InsertVecSz), 0);
}
- unsigned Offset = *getInsertIndex(VL0, 0);
bool IsIdentity = true;
- SmallVector<int> PrevMask(NumElts, UndefMaskElem);
+ SmallVector<int> PrevMask(InsertVecSz, UndefMaskElem);
Mask.swap(PrevMask);
for (unsigned I = 0; I < NumScalars; ++I) {
- Optional<int> InsertIdx = getInsertIndex(VL[PrevMask[I]], 0);
- if (!InsertIdx || *InsertIdx == UndefMaskElem)
- continue;
- DemandedElts.setBit(*InsertIdx);
- IsIdentity &= *InsertIdx - Offset == I;
- Mask[*InsertIdx - Offset] = I;
+ unsigned InsertIdx = *getInsertIndex(VL[PrevMask[I]]);
+ DemandedElts.setBit(InsertIdx);
+ IsIdentity &= InsertIdx - OffsetBeg == I;
+ Mask[InsertIdx - OffsetBeg] = I;
}
assert(Offset < NumElts && "Failed to find vector index offset");
@@ -5121,32 +6211,41 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
Cost -= TTI->getScalarizationOverhead(SrcVecTy, DemandedElts,
/*Insert*/ true, /*Extract*/ false);
- if (IsIdentity && NumElts != NumScalars && Offset % NumScalars != 0) {
- // FIXME: Replace with SK_InsertSubvector once it is properly supported.
- unsigned Sz = PowerOf2Ceil(Offset + NumScalars);
- Cost += TTI->getShuffleCost(
- TargetTransformInfo::SK_PermuteSingleSrc,
- FixedVectorType::get(SrcVecTy->getElementType(), Sz));
- } else if (!IsIdentity) {
- auto *FirstInsert =
- cast<Instruction>(*find_if(E->Scalars, [E](Value *V) {
- return !is_contained(E->Scalars,
- cast<Instruction>(V)->getOperand(0));
- }));
- if (isUndefVector(FirstInsert->getOperand(0))) {
- Cost += TTI->getShuffleCost(TTI::SK_PermuteSingleSrc, SrcVecTy, Mask);
+ // First cost - resize to actual vector size if not identity shuffle or
+ // need to shift the vector.
+ // Do not calculate the cost if the actual size is the register size and
+ // we can merge this shuffle with the following SK_Select.
+ auto *InsertVecTy =
+ FixedVectorType::get(SrcVecTy->getElementType(), InsertVecSz);
+ if (!IsIdentity)
+ Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
+ InsertVecTy, Mask);
+ auto *FirstInsert = cast<Instruction>(*find_if(E->Scalars, [E](Value *V) {
+ return !is_contained(E->Scalars, cast<Instruction>(V)->getOperand(0));
+ }));
+ // Second cost - permutation with subvector, if some elements are from the
+ // initial vector or inserting a subvector.
+ // TODO: Implement the analysis of the FirstInsert->getOperand(0)
+ // subvector of ActualVecTy.
+ if (!isUndefVector(FirstInsert->getOperand(0)) && NumScalars != NumElts &&
+ !IsWholeSubvector) {
+ if (InsertVecSz != VecSz) {
+ auto *ActualVecTy =
+ FixedVectorType::get(SrcVecTy->getElementType(), VecSz);
+ Cost += TTI->getShuffleCost(TTI::SK_InsertSubvector, ActualVecTy,
+ None, OffsetBeg - Offset, InsertVecTy);
} else {
- SmallVector<int> InsertMask(NumElts);
- std::iota(InsertMask.begin(), InsertMask.end(), 0);
- for (unsigned I = 0; I < NumElts; I++) {
+ for (unsigned I = 0, End = OffsetBeg - Offset; I < End; ++I)
+ Mask[I] = I;
+ for (unsigned I = OffsetBeg - Offset, End = OffsetEnd - Offset;
+ I <= End; ++I)
if (Mask[I] != UndefMaskElem)
- InsertMask[Offset + I] = NumElts + I;
- }
- Cost +=
- TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, SrcVecTy, InsertMask);
+ Mask[I] = I + VecSz;
+ for (unsigned I = OffsetEnd + 1 - Offset; I < VecSz; ++I)
+ Mask[I] = I;
+ Cost += TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, InsertVecTy, Mask);
}
}
-
return Cost;
}
case Instruction::ZExt:
@@ -5227,9 +6326,8 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
// If the selects are the only uses of the compares, they will be dead
// and we can adjust the cost by removing their cost.
if (IntrinsicAndUse.second)
- IntrinsicCost -=
- TTI->getCmpSelInstrCost(Instruction::ICmp, VecTy, MaskTy,
- CmpInst::BAD_ICMP_PREDICATE, CostKind);
+ IntrinsicCost -= TTI->getCmpSelInstrCost(Instruction::ICmp, VecTy,
+ MaskTy, VecPred, CostKind);
VecCost = std::min(VecCost, IntrinsicCost);
}
LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecCost, ScalarCost));
@@ -5309,7 +6407,14 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
TargetTransformInfo::OperandValueKind Op1VK =
TargetTransformInfo::OK_AnyValue;
TargetTransformInfo::OperandValueKind Op2VK =
- TargetTransformInfo::OK_UniformConstantValue;
+ any_of(VL,
+ [](Value *V) {
+ return isa<GetElementPtrInst>(V) &&
+ !isConstant(
+ cast<GetElementPtrInst>(V)->getOperand(1));
+ })
+ ? TargetTransformInfo::OK_AnyValue
+ : TargetTransformInfo::OK_UniformConstantValue;
InstructionCost ScalarEltCost = TTI->getArithmeticInstrCost(
Instruction::Add, ScalarTy, CostKind, Op1VK, Op2VK);
@@ -5340,7 +6445,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
Align CommonAlignment = Alignment;
for (Value *V : VL)
CommonAlignment =
- commonAlignment(CommonAlignment, cast<LoadInst>(V)->getAlign());
+ std::min(CommonAlignment, cast<LoadInst>(V)->getAlign());
VecLdCost = TTI->getGatherScatterOpCost(
Instruction::Load, VecTy, cast<LoadInst>(VL0)->getPointerOperand(),
/*VariableMask=*/false, CommonAlignment, CostKind, VL0);
@@ -5458,39 +6563,21 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
TTI::CastContextHint::None, CostKind);
}
- SmallVector<int> Mask;
- buildSuffleEntryMask(
- E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices,
- [E](Instruction *I) {
- assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
- if (auto *CI0 = dyn_cast<CmpInst>(E->getMainOp())) {
- auto *AltCI0 = cast<CmpInst>(E->getAltOp());
- auto *CI = cast<CmpInst>(I);
- CmpInst::Predicate P0 = CI0->getPredicate();
- CmpInst::Predicate AltP0 = AltCI0->getPredicate();
- CmpInst::Predicate AltP0Swapped =
- CmpInst::getSwappedPredicate(AltP0);
- CmpInst::Predicate CurrentPred = CI->getPredicate();
- CmpInst::Predicate CurrentPredSwapped =
- CmpInst::getSwappedPredicate(CurrentPred);
- if (P0 == AltP0 || P0 == AltP0Swapped) {
- // Alternate cmps have same/swapped predicate as main cmps but
- // different order of compatible operands.
- return !(
- (P0 == CurrentPred &&
- areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1),
- I->getOperand(0), I->getOperand(1))) ||
- (P0 == CurrentPredSwapped &&
- areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1),
- I->getOperand(1), I->getOperand(0))));
- }
- return CurrentPred != P0 && CurrentPredSwapped != P0;
- }
- return I->getOpcode() == E->getAltOpcode();
- },
- Mask);
- CommonCost =
- TTI->getShuffleCost(TargetTransformInfo::SK_Select, FinalVecTy, Mask);
+ if (E->ReuseShuffleIndices.empty()) {
+ CommonCost =
+ TTI->getShuffleCost(TargetTransformInfo::SK_Select, FinalVecTy);
+ } else {
+ SmallVector<int> Mask;
+ buildShuffleEntryMask(
+ E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices,
+ [E](Instruction *I) {
+ assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
+ return I->getOpcode() == E->getAltOpcode();
+ },
+ Mask);
+ CommonCost = TTI->getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc,
+ FinalVecTy, Mask);
+ }
LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecCost, ScalarCost));
return CommonCost + VecCost - ScalarCost;
}
@@ -5618,7 +6705,10 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const {
// No need to vectorize inserts of gathered values.
if (VectorizableTree.size() == 2 &&
isa<InsertElementInst>(VectorizableTree[0]->Scalars[0]) &&
- VectorizableTree[1]->State == TreeEntry::NeedToGather)
+ VectorizableTree[1]->State == TreeEntry::NeedToGather &&
+ (VectorizableTree[1]->getVectorFactor() <= 2 ||
+ !(isSplat(VectorizableTree[1]->Scalars) ||
+ allConstant(VectorizableTree[1]->Scalars))))
return true;
// We can vectorize the tree if its size is greater than or equal to the
@@ -5748,20 +6838,26 @@ static bool areTwoInsertFromSameBuildVector(InsertElementInst *VU,
return false;
auto *IE1 = VU;
auto *IE2 = V;
+ unsigned Idx1 = *getInsertIndex(IE1);
+ unsigned Idx2 = *getInsertIndex(IE2);
// Go through the vector operand of insertelement instructions trying to find
// either VU as the original vector for IE2 or V as the original vector for
// IE1.
do {
- if (IE2 == VU || IE1 == V)
- return true;
+ if (IE2 == VU)
+ return VU->hasOneUse();
+ if (IE1 == V)
+ return V->hasOneUse();
if (IE1) {
- if (IE1 != VU && !IE1->hasOneUse())
+ if ((IE1 != VU && !IE1->hasOneUse()) ||
+ getInsertIndex(IE1).value_or(Idx2) == Idx2)
IE1 = nullptr;
else
IE1 = dyn_cast<InsertElementInst>(IE1->getOperand(0));
}
if (IE2) {
- if (IE2 != V && !IE2->hasOneUse())
+ if ((IE2 != V && !IE2->hasOneUse()) ||
+ getInsertIndex(IE2).value_or(Idx1) == Idx1)
IE2 = nullptr;
else
IE2 = dyn_cast<InsertElementInst>(IE2->getOperand(0));
@@ -5770,6 +6866,153 @@ static bool areTwoInsertFromSameBuildVector(InsertElementInst *VU,
return false;
}
+/// Checks if the \p IE1 instructions is followed by \p IE2 instruction in the
+/// buildvector sequence.
+static bool isFirstInsertElement(const InsertElementInst *IE1,
+ const InsertElementInst *IE2) {
+ if (IE1 == IE2)
+ return false;
+ const auto *I1 = IE1;
+ const auto *I2 = IE2;
+ const InsertElementInst *PrevI1;
+ const InsertElementInst *PrevI2;
+ unsigned Idx1 = *getInsertIndex(IE1);
+ unsigned Idx2 = *getInsertIndex(IE2);
+ do {
+ if (I2 == IE1)
+ return true;
+ if (I1 == IE2)
+ return false;
+ PrevI1 = I1;
+ PrevI2 = I2;
+ if (I1 && (I1 == IE1 || I1->hasOneUse()) &&
+ getInsertIndex(I1).value_or(Idx2) != Idx2)
+ I1 = dyn_cast<InsertElementInst>(I1->getOperand(0));
+ if (I2 && ((I2 == IE2 || I2->hasOneUse())) &&
+ getInsertIndex(I2).value_or(Idx1) != Idx1)
+ I2 = dyn_cast<InsertElementInst>(I2->getOperand(0));
+ } while ((I1 && PrevI1 != I1) || (I2 && PrevI2 != I2));
+ llvm_unreachable("Two different buildvectors not expected.");
+}
+
+namespace {
+/// Returns incoming Value *, if the requested type is Value * too, or a default
+/// value, otherwise.
+struct ValueSelect {
+ template <typename U>
+ static typename std::enable_if<std::is_same<Value *, U>::value, Value *>::type
+ get(Value *V) {
+ return V;
+ }
+ template <typename U>
+ static typename std::enable_if<!std::is_same<Value *, U>::value, U>::type
+ get(Value *) {
+ return U();
+ }
+};
+} // namespace
+
+/// Does the analysis of the provided shuffle masks and performs the requested
+/// actions on the vectors with the given shuffle masks. It tries to do it in
+/// several steps.
+/// 1. If the Base vector is not undef vector, resizing the very first mask to
+/// have common VF and perform action for 2 input vectors (including non-undef
+/// Base). Other shuffle masks are combined with the resulting after the 1 stage
+/// and processed as a shuffle of 2 elements.
+/// 2. If the Base is undef vector and have only 1 shuffle mask, perform the
+/// action only for 1 vector with the given mask, if it is not the identity
+/// mask.
+/// 3. If > 2 masks are used, perform the remaining shuffle actions for 2
+/// vectors, combing the masks properly between the steps.
+template <typename T>
+static T *performExtractsShuffleAction(
+ MutableArrayRef<std::pair<T *, SmallVector<int>>> ShuffleMask, Value *Base,
+ function_ref<unsigned(T *)> GetVF,
+ function_ref<std::pair<T *, bool>(T *, ArrayRef<int>)> ResizeAction,
+ function_ref<T *(ArrayRef<int>, ArrayRef<T *>)> Action) {
+ assert(!ShuffleMask.empty() && "Empty list of shuffles for inserts.");
+ SmallVector<int> Mask(ShuffleMask.begin()->second);
+ auto VMIt = std::next(ShuffleMask.begin());
+ T *Prev = nullptr;
+ bool IsBaseNotUndef = !isUndefVector(Base);
+ if (IsBaseNotUndef) {
+ // Base is not undef, need to combine it with the next subvectors.
+ std::pair<T *, bool> Res = ResizeAction(ShuffleMask.begin()->first, Mask);
+ for (unsigned Idx = 0, VF = Mask.size(); Idx < VF; ++Idx) {
+ if (Mask[Idx] == UndefMaskElem)
+ Mask[Idx] = Idx;
+ else
+ Mask[Idx] = (Res.second ? Idx : Mask[Idx]) + VF;
+ }
+ auto *V = ValueSelect::get<T *>(Base);
+ (void)V;
+ assert((!V || GetVF(V) == Mask.size()) &&
+ "Expected base vector of VF number of elements.");
+ Prev = Action(Mask, {nullptr, Res.first});
+ } else if (ShuffleMask.size() == 1) {
+ // Base is undef and only 1 vector is shuffled - perform the action only for
+ // single vector, if the mask is not the identity mask.
+ std::pair<T *, bool> Res = ResizeAction(ShuffleMask.begin()->first, Mask);
+ if (Res.second)
+ // Identity mask is found.
+ Prev = Res.first;
+ else
+ Prev = Action(Mask, {ShuffleMask.begin()->first});
+ } else {
+ // Base is undef and at least 2 input vectors shuffled - perform 2 vectors
+ // shuffles step by step, combining shuffle between the steps.
+ unsigned Vec1VF = GetVF(ShuffleMask.begin()->first);
+ unsigned Vec2VF = GetVF(VMIt->first);
+ if (Vec1VF == Vec2VF) {
+ // No need to resize the input vectors since they are of the same size, we
+ // can shuffle them directly.
+ ArrayRef<int> SecMask = VMIt->second;
+ for (unsigned I = 0, VF = Mask.size(); I < VF; ++I) {
+ if (SecMask[I] != UndefMaskElem) {
+ assert(Mask[I] == UndefMaskElem && "Multiple uses of scalars.");
+ Mask[I] = SecMask[I] + Vec1VF;
+ }
+ }
+ Prev = Action(Mask, {ShuffleMask.begin()->first, VMIt->first});
+ } else {
+ // Vectors of different sizes - resize and reshuffle.
+ std::pair<T *, bool> Res1 =
+ ResizeAction(ShuffleMask.begin()->first, Mask);
+ std::pair<T *, bool> Res2 = ResizeAction(VMIt->first, VMIt->second);
+ ArrayRef<int> SecMask = VMIt->second;
+ for (unsigned I = 0, VF = Mask.size(); I < VF; ++I) {
+ if (Mask[I] != UndefMaskElem) {
+ assert(SecMask[I] == UndefMaskElem && "Multiple uses of scalars.");
+ if (Res1.second)
+ Mask[I] = I;
+ } else if (SecMask[I] != UndefMaskElem) {
+ assert(Mask[I] == UndefMaskElem && "Multiple uses of scalars.");
+ Mask[I] = (Res2.second ? I : SecMask[I]) + VF;
+ }
+ }
+ Prev = Action(Mask, {Res1.first, Res2.first});
+ }
+ VMIt = std::next(VMIt);
+ }
+ // Perform requested actions for the remaining masks/vectors.
+ for (auto E = ShuffleMask.end(); VMIt != E; ++VMIt) {
+ // Shuffle other input vectors, if any.
+ std::pair<T *, bool> Res = ResizeAction(VMIt->first, VMIt->second);
+ ArrayRef<int> SecMask = VMIt->second;
+ for (unsigned I = 0, VF = Mask.size(); I < VF; ++I) {
+ if (SecMask[I] != UndefMaskElem) {
+ assert((Mask[I] == UndefMaskElem || IsBaseNotUndef) &&
+ "Multiple uses of scalars.");
+ Mask[I] = (Res.second ? I : SecMask[I]) + VF;
+ } else if (Mask[I] != UndefMaskElem) {
+ Mask[I] = I;
+ }
+ }
+ Prev = Action(Mask, {Prev, Res.first});
+ }
+ return Prev;
+}
+
InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
InstructionCost Cost = 0;
LLVM_DEBUG(dbgs() << "SLP: Calculating cost for tree of size "
@@ -5778,7 +7021,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
unsigned BundleWidth = VectorizableTree[0]->Scalars.size();
for (unsigned I = 0, E = VectorizableTree.size(); I < E; ++I) {
- TreeEntry &TE = *VectorizableTree[I].get();
+ TreeEntry &TE = *VectorizableTree[I];
InstructionCost C = getEntryCost(&TE, VectorizedVals);
Cost += C;
@@ -5790,9 +7033,8 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
SmallPtrSet<Value *, 16> ExtractCostCalculated;
InstructionCost ExtractCost = 0;
- SmallVector<unsigned> VF;
- SmallVector<SmallVector<int>> ShuffleMask;
- SmallVector<Value *> FirstUsers;
+ SmallVector<MapVector<const TreeEntry *, SmallVector<int>>> ShuffleMasks;
+ SmallVector<std::pair<Value *, const TreeEntry *>> FirstUsers;
SmallVector<APInt> DemandedElts;
for (ExternalUser &EU : ExternalUses) {
// We only add extract cost once for the same scalar.
@@ -5819,42 +7061,59 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
// to detect it as a final shuffled/identity match.
if (auto *VU = dyn_cast_or_null<InsertElementInst>(EU.User)) {
if (auto *FTy = dyn_cast<FixedVectorType>(VU->getType())) {
- Optional<int> InsertIdx = getInsertIndex(VU, 0);
- if (!InsertIdx || *InsertIdx == UndefMaskElem)
- continue;
- auto *It = find_if(FirstUsers, [VU](Value *V) {
- return areTwoInsertFromSameBuildVector(VU,
- cast<InsertElementInst>(V));
- });
- int VecId = -1;
- if (It == FirstUsers.end()) {
- VF.push_back(FTy->getNumElements());
- ShuffleMask.emplace_back(VF.back(), UndefMaskElem);
- // Find the insertvector, vectorized in tree, if any.
- Value *Base = VU;
- while (isa<InsertElementInst>(Base)) {
- // Build the mask for the vectorized insertelement instructions.
- if (const TreeEntry *E = getTreeEntry(Base)) {
- VU = cast<InsertElementInst>(Base);
- do {
- int Idx = E->findLaneForValue(Base);
- ShuffleMask.back()[Idx] = Idx;
- Base = cast<InsertElementInst>(Base)->getOperand(0);
- } while (E == getTreeEntry(Base));
- break;
+ Optional<unsigned> InsertIdx = getInsertIndex(VU);
+ if (InsertIdx) {
+ const TreeEntry *ScalarTE = getTreeEntry(EU.Scalar);
+ auto *It =
+ find_if(FirstUsers,
+ [VU](const std::pair<Value *, const TreeEntry *> &Pair) {
+ return areTwoInsertFromSameBuildVector(
+ VU, cast<InsertElementInst>(Pair.first));
+ });
+ int VecId = -1;
+ if (It == FirstUsers.end()) {
+ (void)ShuffleMasks.emplace_back();
+ SmallVectorImpl<int> &Mask = ShuffleMasks.back()[ScalarTE];
+ if (Mask.empty())
+ Mask.assign(FTy->getNumElements(), UndefMaskElem);
+ // Find the insertvector, vectorized in tree, if any.
+ Value *Base = VU;
+ while (auto *IEBase = dyn_cast<InsertElementInst>(Base)) {
+ if (IEBase != EU.User &&
+ (!IEBase->hasOneUse() ||
+ getInsertIndex(IEBase).value_or(*InsertIdx) == *InsertIdx))
+ break;
+ // Build the mask for the vectorized insertelement instructions.
+ if (const TreeEntry *E = getTreeEntry(IEBase)) {
+ VU = IEBase;
+ do {
+ IEBase = cast<InsertElementInst>(Base);
+ int Idx = *getInsertIndex(IEBase);
+ assert(Mask[Idx] == UndefMaskElem &&
+ "InsertElementInstruction used already.");
+ Mask[Idx] = Idx;
+ Base = IEBase->getOperand(0);
+ } while (E == getTreeEntry(Base));
+ break;
+ }
+ Base = cast<InsertElementInst>(Base)->getOperand(0);
}
- Base = cast<InsertElementInst>(Base)->getOperand(0);
+ FirstUsers.emplace_back(VU, ScalarTE);
+ DemandedElts.push_back(APInt::getZero(FTy->getNumElements()));
+ VecId = FirstUsers.size() - 1;
+ } else {
+ if (isFirstInsertElement(VU, cast<InsertElementInst>(It->first)))
+ It->first = VU;
+ VecId = std::distance(FirstUsers.begin(), It);
}
- FirstUsers.push_back(VU);
- DemandedElts.push_back(APInt::getZero(VF.back()));
- VecId = FirstUsers.size() - 1;
- } else {
- VecId = std::distance(FirstUsers.begin(), It);
+ int InIdx = *InsertIdx;
+ SmallVectorImpl<int> &Mask = ShuffleMasks[VecId][ScalarTE];
+ if (Mask.empty())
+ Mask.assign(FTy->getNumElements(), UndefMaskElem);
+ Mask[InIdx] = EU.Lane;
+ DemandedElts[VecId].setBit(InIdx);
+ continue;
}
- int Idx = *InsertIdx;
- ShuffleMask[VecId][Idx] = EU.Lane;
- DemandedElts[VecId].setBit(Idx);
- continue;
}
}
@@ -5878,86 +7137,75 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
InstructionCost SpillCost = getSpillCost();
Cost += SpillCost + ExtractCost;
- if (FirstUsers.size() == 1) {
- int Limit = ShuffleMask.front().size() * 2;
- if (all_of(ShuffleMask.front(), [Limit](int Idx) { return Idx < Limit; }) &&
- !ShuffleVectorInst::isIdentityMask(ShuffleMask.front())) {
- InstructionCost C = TTI->getShuffleCost(
+ auto &&ResizeToVF = [this, &Cost](const TreeEntry *TE, ArrayRef<int> Mask) {
+ InstructionCost C = 0;
+ unsigned VF = Mask.size();
+ unsigned VecVF = TE->getVectorFactor();
+ if (VF != VecVF &&
+ (any_of(Mask, [VF](int Idx) { return Idx >= static_cast<int>(VF); }) ||
+ (all_of(Mask,
+ [VF](int Idx) { return Idx < 2 * static_cast<int>(VF); }) &&
+ !ShuffleVectorInst::isIdentityMask(Mask)))) {
+ SmallVector<int> OrigMask(VecVF, UndefMaskElem);
+ std::copy(Mask.begin(), std::next(Mask.begin(), std::min(VF, VecVF)),
+ OrigMask.begin());
+ C = TTI->getShuffleCost(
TTI::SK_PermuteSingleSrc,
- cast<FixedVectorType>(FirstUsers.front()->getType()),
- ShuffleMask.front());
- LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
- << " for final shuffle of insertelement external users "
- << *VectorizableTree.front()->Scalars.front() << ".\n"
- << "SLP: Current total cost = " << Cost << "\n");
+ FixedVectorType::get(TE->getMainOp()->getType(), VecVF), OrigMask);
+ LLVM_DEBUG(
+ dbgs() << "SLP: Adding cost " << C
+ << " for final shuffle of insertelement external users.\n";
+ TE->dump(); dbgs() << "SLP: Current total cost = " << Cost << "\n");
Cost += C;
+ return std::make_pair(TE, true);
}
+ return std::make_pair(TE, false);
+ };
+ // Calculate the cost of the reshuffled vectors, if any.
+ for (int I = 0, E = FirstUsers.size(); I < E; ++I) {
+ Value *Base = cast<Instruction>(FirstUsers[I].first)->getOperand(0);
+ unsigned VF = ShuffleMasks[I].begin()->second.size();
+ auto *FTy = FixedVectorType::get(
+ cast<VectorType>(FirstUsers[I].first->getType())->getElementType(), VF);
+ auto Vector = ShuffleMasks[I].takeVector();
+ auto &&EstimateShufflesCost = [this, FTy,
+ &Cost](ArrayRef<int> Mask,
+ ArrayRef<const TreeEntry *> TEs) {
+ assert((TEs.size() == 1 || TEs.size() == 2) &&
+ "Expected exactly 1 or 2 tree entries.");
+ if (TEs.size() == 1) {
+ int Limit = 2 * Mask.size();
+ if (!all_of(Mask, [Limit](int Idx) { return Idx < Limit; }) ||
+ !ShuffleVectorInst::isIdentityMask(Mask)) {
+ InstructionCost C =
+ TTI->getShuffleCost(TTI::SK_PermuteSingleSrc, FTy, Mask);
+ LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
+ << " for final shuffle of insertelement "
+ "external users.\n";
+ TEs.front()->dump();
+ dbgs() << "SLP: Current total cost = " << Cost << "\n");
+ Cost += C;
+ }
+ } else {
+ InstructionCost C =
+ TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, FTy, Mask);
+ LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
+ << " for final shuffle of vector node and external "
+ "insertelement users.\n";
+ if (TEs.front()) { TEs.front()->dump(); } TEs.back()->dump();
+ dbgs() << "SLP: Current total cost = " << Cost << "\n");
+ Cost += C;
+ }
+ return TEs.back();
+ };
+ (void)performExtractsShuffleAction<const TreeEntry>(
+ makeMutableArrayRef(Vector.data(), Vector.size()), Base,
+ [](const TreeEntry *E) { return E->getVectorFactor(); }, ResizeToVF,
+ EstimateShufflesCost);
InstructionCost InsertCost = TTI->getScalarizationOverhead(
- cast<FixedVectorType>(FirstUsers.front()->getType()),
- DemandedElts.front(), /*Insert*/ true, /*Extract*/ false);
- LLVM_DEBUG(dbgs() << "SLP: subtracting the cost " << InsertCost
- << " for insertelements gather.\n"
- << "SLP: Current total cost = " << Cost << "\n");
- Cost -= InsertCost;
- } else if (FirstUsers.size() >= 2) {
- unsigned MaxVF = *std::max_element(VF.begin(), VF.end());
- // Combined masks of the first 2 vectors.
- SmallVector<int> CombinedMask(MaxVF, UndefMaskElem);
- copy(ShuffleMask.front(), CombinedMask.begin());
- APInt CombinedDemandedElts = DemandedElts.front().zextOrSelf(MaxVF);
- auto *VecTy = FixedVectorType::get(
- cast<VectorType>(FirstUsers.front()->getType())->getElementType(),
- MaxVF);
- for (int I = 0, E = ShuffleMask[1].size(); I < E; ++I) {
- if (ShuffleMask[1][I] != UndefMaskElem) {
- CombinedMask[I] = ShuffleMask[1][I] + MaxVF;
- CombinedDemandedElts.setBit(I);
- }
- }
- InstructionCost C =
- TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, CombinedMask);
- LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
- << " for final shuffle of vector node and external "
- "insertelement users "
- << *VectorizableTree.front()->Scalars.front() << ".\n"
- << "SLP: Current total cost = " << Cost << "\n");
- Cost += C;
- InstructionCost InsertCost = TTI->getScalarizationOverhead(
- VecTy, CombinedDemandedElts, /*Insert*/ true, /*Extract*/ false);
- LLVM_DEBUG(dbgs() << "SLP: subtracting the cost " << InsertCost
- << " for insertelements gather.\n"
- << "SLP: Current total cost = " << Cost << "\n");
+ cast<FixedVectorType>(FirstUsers[I].first->getType()), DemandedElts[I],
+ /*Insert*/ true, /*Extract*/ false);
Cost -= InsertCost;
- for (int I = 2, E = FirstUsers.size(); I < E; ++I) {
- // Other elements - permutation of 2 vectors (the initial one and the
- // next Ith incoming vector).
- unsigned VF = ShuffleMask[I].size();
- for (unsigned Idx = 0; Idx < VF; ++Idx) {
- int Mask = ShuffleMask[I][Idx];
- if (Mask != UndefMaskElem)
- CombinedMask[Idx] = MaxVF + Mask;
- else if (CombinedMask[Idx] != UndefMaskElem)
- CombinedMask[Idx] = Idx;
- }
- for (unsigned Idx = VF; Idx < MaxVF; ++Idx)
- if (CombinedMask[Idx] != UndefMaskElem)
- CombinedMask[Idx] = Idx;
- InstructionCost C =
- TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, CombinedMask);
- LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
- << " for final shuffle of vector node and external "
- "insertelement users "
- << *VectorizableTree.front()->Scalars.front() << ".\n"
- << "SLP: Current total cost = " << Cost << "\n");
- Cost += C;
- InstructionCost InsertCost = TTI->getScalarizationOverhead(
- cast<FixedVectorType>(FirstUsers[I]->getType()), DemandedElts[I],
- /*Insert*/ true, /*Extract*/ false);
- LLVM_DEBUG(dbgs() << "SLP: subtracting the cost " << InsertCost
- << " for insertelements gather.\n"
- << "SLP: Current total cost = " << Cost << "\n");
- Cost -= InsertCost;
- }
}
#ifndef NDEBUG
@@ -6050,6 +7298,12 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, SmallVectorImpl<int> &Mask,
}
}
+ if (UsedTEs.empty()) {
+ assert(all_of(TE->Scalars, UndefValue::classof) &&
+ "Expected vector of undefs only.");
+ return None;
+ }
+
unsigned VF = 0;
if (UsedTEs.size() == 1) {
// Try to find the perfect match in another gather node at first.
@@ -6109,17 +7363,11 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, SmallVectorImpl<int> &Mask,
return None;
}
-InstructionCost
-BoUpSLP::getGatherCost(FixedVectorType *Ty,
- const DenseSet<unsigned> &ShuffledIndices,
- bool NeedToShuffle) const {
- unsigned NumElts = Ty->getNumElements();
- APInt DemandedElts = APInt::getZero(NumElts);
- for (unsigned I = 0; I < NumElts; ++I)
- if (!ShuffledIndices.count(I))
- DemandedElts.setBit(I);
+InstructionCost BoUpSLP::getGatherCost(FixedVectorType *Ty,
+ const APInt &ShuffledIndices,
+ bool NeedToShuffle) const {
InstructionCost Cost =
- TTI->getScalarizationOverhead(Ty, DemandedElts, /*Insert*/ true,
+ TTI->getScalarizationOverhead(Ty, ~ShuffledIndices, /*Insert*/ true,
/*Extract*/ false);
if (NeedToShuffle)
Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, Ty);
@@ -6136,19 +7384,19 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL) const {
// Find the cost of inserting/extracting values from the vector.
// Check if the same elements are inserted several times and count them as
// shuffle candidates.
- DenseSet<unsigned> ShuffledElements;
+ APInt ShuffledElements = APInt::getZero(VL.size());
DenseSet<Value *> UniqueElements;
// Iterate in reverse order to consider insert elements with the high cost.
for (unsigned I = VL.size(); I > 0; --I) {
unsigned Idx = I - 1;
// No need to shuffle duplicates for constants.
if (isConstant(VL[Idx])) {
- ShuffledElements.insert(Idx);
+ ShuffledElements.setBit(Idx);
continue;
}
if (!UniqueElements.insert(VL[Idx]).second) {
DuplicateNonConst = true;
- ShuffledElements.insert(Idx);
+ ShuffledElements.setBit(Idx);
}
}
return getGatherCost(VecTy, ShuffledElements, DuplicateNonConst);
@@ -6173,14 +7421,83 @@ void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
void BoUpSLP::setInsertPointAfterBundle(const TreeEntry *E) {
// Get the basic block this bundle is in. All instructions in the bundle
- // should be in this block.
+ // should be in this block (except for extractelement-like instructions with
+ // constant indeces).
auto *Front = E->getMainOp();
auto *BB = Front->getParent();
assert(llvm::all_of(E->Scalars, [=](Value *V) -> bool {
+ if (E->getOpcode() == Instruction::GetElementPtr &&
+ !isa<GetElementPtrInst>(V))
+ return true;
auto *I = cast<Instruction>(V);
- return !E->isOpcodeOrAlt(I) || I->getParent() == BB;
+ return !E->isOpcodeOrAlt(I) || I->getParent() == BB ||
+ isVectorLikeInstWithConstOps(I);
}));
+ auto &&FindLastInst = [E, Front, this, &BB]() {
+ Instruction *LastInst = Front;
+ for (Value *V : E->Scalars) {
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I)
+ continue;
+ if (LastInst->getParent() == I->getParent()) {
+ if (LastInst->comesBefore(I))
+ LastInst = I;
+ continue;
+ }
+ assert(isVectorLikeInstWithConstOps(LastInst) &&
+ isVectorLikeInstWithConstOps(I) &&
+ "Expected vector-like insts only.");
+ if (!DT->isReachableFromEntry(LastInst->getParent())) {
+ LastInst = I;
+ continue;
+ }
+ if (!DT->isReachableFromEntry(I->getParent()))
+ continue;
+ auto *NodeA = DT->getNode(LastInst->getParent());
+ auto *NodeB = DT->getNode(I->getParent());
+ assert(NodeA && "Should only process reachable instructions");
+ assert(NodeB && "Should only process reachable instructions");
+ assert((NodeA == NodeB) ==
+ (NodeA->getDFSNumIn() == NodeB->getDFSNumIn()) &&
+ "Different nodes should have different DFS numbers");
+ if (NodeA->getDFSNumIn() < NodeB->getDFSNumIn())
+ LastInst = I;
+ }
+ BB = LastInst->getParent();
+ return LastInst;
+ };
+
+ auto &&FindFirstInst = [E, Front]() {
+ Instruction *FirstInst = Front;
+ for (Value *V : E->Scalars) {
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I)
+ continue;
+ if (I->comesBefore(FirstInst))
+ FirstInst = I;
+ }
+ return FirstInst;
+ };
+
+ // Set the insert point to the beginning of the basic block if the entry
+ // should not be scheduled.
+ if (E->State != TreeEntry::NeedToGather &&
+ doesNotNeedToSchedule(E->Scalars)) {
+ Instruction *InsertInst;
+ if (all_of(E->Scalars, isUsedOutsideBlock))
+ InsertInst = FindLastInst();
+ else
+ InsertInst = FindFirstInst();
+ // If the instruction is PHI, set the insert point after all the PHIs.
+ if (isa<PHINode>(InsertInst))
+ InsertInst = BB->getFirstNonPHI();
+ BasicBlock::iterator InsertPt = InsertInst->getIterator();
+ Builder.SetInsertPoint(BB, InsertPt);
+ Builder.SetCurrentDebugLocation(Front->getDebugLoc());
+ return;
+ }
+
// The last instruction in the bundle in program order.
Instruction *LastInst = nullptr;
@@ -6189,8 +7506,10 @@ void BoUpSLP::setInsertPointAfterBundle(const TreeEntry *E) {
// VL.back() and iterate over schedule data until we reach the end of the
// bundle. The end of the bundle is marked by null ScheduleData.
if (BlocksSchedules.count(BB)) {
- auto *Bundle =
- BlocksSchedules[BB]->getScheduleData(E->isOneOf(E->Scalars.back()));
+ Value *V = E->isOneOf(E->Scalars.back());
+ if (doesNotNeedToBeScheduled(V))
+ V = *find_if_not(E->Scalars, doesNotNeedToBeScheduled);
+ auto *Bundle = BlocksSchedules[BB]->getScheduleData(V);
if (Bundle && Bundle->isPartOfBundle())
for (; Bundle; Bundle = Bundle->NextInBundle)
if (Bundle->OpValue == Bundle->Inst)
@@ -6216,19 +7535,16 @@ void BoUpSLP::setInsertPointAfterBundle(const TreeEntry *E) {
// we both exit early from buildTree_rec and that the bundle be out-of-order
// (causing us to iterate all the way to the end of the block).
if (!LastInst) {
- SmallPtrSet<Value *, 16> Bundle(E->Scalars.begin(), E->Scalars.end());
- for (auto &I : make_range(BasicBlock::iterator(Front), BB->end())) {
- if (Bundle.erase(&I) && E->isOpcodeOrAlt(&I))
- LastInst = &I;
- if (Bundle.empty())
- break;
- }
+ LastInst = FindLastInst();
+ // If the instruction is PHI, set the insert point after all the PHIs.
+ if (isa<PHINode>(LastInst))
+ LastInst = BB->getFirstNonPHI()->getPrevNode();
}
assert(LastInst && "Failed to find last instruction in bundle");
// Set the insertion point after the last instruction in the bundle. Set the
// debug location to Front.
- Builder.SetInsertPoint(BB, ++LastInst->getIterator());
+ Builder.SetInsertPoint(BB, std::next(LastInst->getIterator()));
Builder.SetCurrentDebugLocation(Front->getDebugLoc());
}
@@ -6358,8 +7674,15 @@ public:
} // namespace
Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
- unsigned VF = VL.size();
+ const unsigned VF = VL.size();
InstructionsState S = getSameOpcode(VL);
+ // Special processing for GEPs bundle, which may include non-gep values.
+ if (!S.getOpcode() && VL.front()->getType()->isPointerTy()) {
+ const auto *It =
+ find_if(VL, [](Value *V) { return isa<GetElementPtrInst>(V); });
+ if (It != VL.end())
+ S = getSameOpcode(*It);
+ }
if (S.getOpcode()) {
if (TreeEntry *E = getTreeEntry(S.OpValue))
if (E->isSame(VL)) {
@@ -6414,7 +7737,18 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
}
}
- // Check that every instruction appears once in this bundle.
+ // Can't vectorize this, so simply build a new vector with each lane
+ // corresponding to the requested value.
+ return createBuildVector(VL);
+}
+Value *BoUpSLP::createBuildVector(ArrayRef<Value *> VL) {
+ assert(any_of(VectorizableTree,
+ [VL](const std::unique_ptr<TreeEntry> &TE) {
+ return TE->State == TreeEntry::NeedToGather && TE->isSame(VL);
+ }) &&
+ "Non-matching gather node.");
+ unsigned VF = VL.size();
+ // Exploit possible reuse of values across lanes.
SmallVector<int> ReuseShuffleIndicies;
SmallVector<Value *> UniqueValues;
if (VL.size() > 2) {
@@ -6447,6 +7781,10 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
ReuseShuffleIndicies.append(VF - ReuseShuffleIndicies.size(),
UndefMaskElem);
} else if (UniqueValues.size() >= VF - 1 || UniqueValues.size() <= 1) {
+ if (UniqueValues.empty()) {
+ assert(all_of(VL, UndefValue::classof) && "Expected list of undefs.");
+ NumValues = VF;
+ }
ReuseShuffleIndicies.clear();
UniqueValues.clear();
UniqueValues.append(VL.begin(), std::next(VL.begin(), NumValues));
@@ -6486,7 +7824,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
SmallVector<const TreeEntry *> Entries;
Optional<TargetTransformInfo::ShuffleKind> Shuffle =
isGatherShuffledEntry(E, Mask, Entries);
- if (Shuffle.hasValue()) {
+ if (Shuffle) {
assert((Entries.size() == 1 || Entries.size() == 2) &&
"Expected shuffle of 1 or 2 entries.");
Vec = Builder.CreateShuffleVector(Entries.front()->VectorizedValue,
@@ -6520,14 +7858,20 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
auto *VecTy = FixedVectorType::get(ScalarTy, E->Scalars.size());
switch (ShuffleOrOp) {
case Instruction::PHI: {
- assert(
- (E->ReorderIndices.empty() || E != VectorizableTree.front().get()) &&
- "PHI reordering is free.");
+ assert((E->ReorderIndices.empty() ||
+ E != VectorizableTree.front().get() ||
+ !E->UserTreeIndices.empty()) &&
+ "PHI reordering is free.");
auto *PH = cast<PHINode>(VL0);
Builder.SetInsertPoint(PH->getParent()->getFirstNonPHI());
Builder.SetCurrentDebugLocation(PH->getDebugLoc());
PHINode *NewPhi = Builder.CreatePHI(VecTy, PH->getNumIncomingValues());
Value *V = NewPhi;
+
+ // Adjust insertion point once all PHI's have been generated.
+ Builder.SetInsertPoint(&*PH->getParent()->getFirstInsertionPt());
+ Builder.SetCurrentDebugLocation(PH->getDebugLoc());
+
ShuffleBuilder.addInversedMask(E->ReorderIndices);
ShuffleBuilder.addMask(E->ReuseShuffleIndices);
V = ShuffleBuilder.finalize(V);
@@ -6593,7 +7937,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
cast<FixedVectorType>(FirstInsert->getType())->getNumElements();
const unsigned NumScalars = E->Scalars.size();
- unsigned Offset = *getInsertIndex(VL0, 0);
+ unsigned Offset = *getInsertIndex(VL0);
assert(Offset < NumElts && "Failed to find vector index offset");
// Create shuffle to resize vector
@@ -6611,11 +7955,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Mask.swap(PrevMask);
for (unsigned I = 0; I < NumScalars; ++I) {
Value *Scalar = E->Scalars[PrevMask[I]];
- Optional<int> InsertIdx = getInsertIndex(Scalar, 0);
- if (!InsertIdx || *InsertIdx == UndefMaskElem)
- continue;
- IsIdentity &= *InsertIdx - Offset == I;
- Mask[*InsertIdx - Offset] = I;
+ unsigned InsertIdx = *getInsertIndex(Scalar);
+ IsIdentity &= InsertIdx - Offset == I;
+ Mask[InsertIdx - Offset] = I;
}
if (!IsIdentity || NumElts != NumScalars) {
V = Builder.CreateShuffleVector(V, Mask);
@@ -6802,19 +8144,18 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
unsigned AS = LI->getPointerAddressSpace();
Value *PO = LI->getPointerOperand();
if (E->State == TreeEntry::Vectorize) {
-
Value *VecPtr = Builder.CreateBitCast(PO, VecTy->getPointerTo(AS));
+ NewLI = Builder.CreateAlignedLoad(VecTy, VecPtr, LI->getAlign());
// The pointer operand uses an in-tree scalar so we add the new BitCast
- // to ExternalUses list to make sure that an extract will be generated
- // in the future.
+ // or LoadInst to ExternalUses list to make sure that an extract will
+ // be generated in the future.
if (TreeEntry *Entry = getTreeEntry(PO)) {
// Find which lane we need to extract.
unsigned FoundLane = Entry->findLaneForValue(PO);
- ExternalUses.emplace_back(PO, cast<User>(VecPtr), FoundLane);
+ ExternalUses.emplace_back(
+ PO, PO != VecPtr ? cast<User>(VecPtr) : NewLI, FoundLane);
}
-
- NewLI = Builder.CreateAlignedLoad(VecTy, VecPtr, LI->getAlign());
} else {
assert(E->State == TreeEntry::ScatterVectorize && "Unhandled state");
Value *VecPtr = vectorizeTree(E->getOperand(0));
@@ -6822,7 +8163,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Align CommonAlignment = LI->getAlign();
for (Value *V : E->Scalars)
CommonAlignment =
- commonAlignment(CommonAlignment, cast<LoadInst>(V)->getAlign());
+ std::min(CommonAlignment, cast<LoadInst>(V)->getAlign());
NewLI = Builder.CreateMaskedGather(VecTy, VecPtr, CommonAlignment);
}
Value *V = propagateMetadata(NewLI, E->Scalars);
@@ -6847,17 +8188,18 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Value *ScalarPtr = SI->getPointerOperand();
Value *VecPtr = Builder.CreateBitCast(
ScalarPtr, VecValue->getType()->getPointerTo(AS));
- StoreInst *ST = Builder.CreateAlignedStore(VecValue, VecPtr,
- SI->getAlign());
+ StoreInst *ST =
+ Builder.CreateAlignedStore(VecValue, VecPtr, SI->getAlign());
- // The pointer operand uses an in-tree scalar, so add the new BitCast to
- // ExternalUses to make sure that an extract will be generated in the
- // future.
+ // The pointer operand uses an in-tree scalar, so add the new BitCast or
+ // StoreInst to ExternalUses to make sure that an extract will be
+ // generated in the future.
if (TreeEntry *Entry = getTreeEntry(ScalarPtr)) {
// Find which lane we need to extract.
unsigned FoundLane = Entry->findLaneForValue(ScalarPtr);
- ExternalUses.push_back(
- ExternalUser(ScalarPtr, cast<User>(VecPtr), FoundLane));
+ ExternalUses.push_back(ExternalUser(
+ ScalarPtr, ScalarPtr != VecPtr ? cast<User>(VecPtr) : ST,
+ FoundLane));
}
Value *V = propagateMetadata(ST, E->Scalars);
@@ -6879,8 +8221,14 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
Value *V = Builder.CreateGEP(GEP0->getSourceElementType(), Op0, OpVecs);
- if (Instruction *I = dyn_cast<Instruction>(V))
- V = propagateMetadata(I, E->Scalars);
+ if (Instruction *I = dyn_cast<GetElementPtrInst>(V)) {
+ SmallVector<Value *> GEPs;
+ for (Value *V : E->Scalars) {
+ if (isa<GetElementPtrInst>(V))
+ GEPs.push_back(V);
+ }
+ V = propagateMetadata(I, GEPs);
+ }
ShuffleBuilder.addInversedMask(E->ReorderIndices);
ShuffleBuilder.addMask(E->ReuseShuffleIndices);
@@ -6913,11 +8261,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
ValueList OpVL;
// Some intrinsics have scalar arguments. This argument should not be
// vectorized.
- if (UseIntrinsic && hasVectorInstrinsicScalarOpd(IID, j)) {
+ if (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(IID, j)) {
CallInst *CEI = cast<CallInst>(VL0);
ScalarArg = CEI->getArgOperand(j);
OpVecs.push_back(CEI->getArgOperand(j));
- if (hasVectorInstrinsicOverloadedScalarOpd(IID, j))
+ if (isVectorIntrinsicWithOverloadTypeAtArg(IID, j))
TysForDecl.push_back(ScalarArg->getType());
continue;
}
@@ -6925,6 +8273,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Value *OpVec = vectorizeTree(E->getOperand(j));
LLVM_DEBUG(dbgs() << "SLP: OpVec[" << j << "]: " << *OpVec << "\n");
OpVecs.push_back(OpVec);
+ if (isVectorIntrinsicWithOverloadTypeAtArg(IID, j))
+ TysForDecl.push_back(OpVec->getType());
}
Function *CF;
@@ -6997,10 +8347,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
V0 = Builder.CreateCmp(CI0->getPredicate(), LHS, RHS);
auto *AltCI = cast<CmpInst>(E->getAltOp());
CmpInst::Predicate AltPred = AltCI->getPredicate();
- unsigned AltIdx =
- std::distance(E->Scalars.begin(), find(E->Scalars, AltCI));
- if (AltCI->getOperand(0) != E->getOperand(0)[AltIdx])
- AltPred = CmpInst::getSwappedPredicate(AltPred);
V1 = Builder.CreateCmp(AltPred, LHS, RHS);
} else {
V0 = Builder.CreateCast(
@@ -7022,34 +8368,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
// each vector operation.
ValueList OpScalars, AltScalars;
SmallVector<int> Mask;
- buildSuffleEntryMask(
+ buildShuffleEntryMask(
E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices,
[E](Instruction *I) {
assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
- if (auto *CI0 = dyn_cast<CmpInst>(E->getMainOp())) {
- auto *AltCI0 = cast<CmpInst>(E->getAltOp());
- auto *CI = cast<CmpInst>(I);
- CmpInst::Predicate P0 = CI0->getPredicate();
- CmpInst::Predicate AltP0 = AltCI0->getPredicate();
- CmpInst::Predicate AltP0Swapped =
- CmpInst::getSwappedPredicate(AltP0);
- CmpInst::Predicate CurrentPred = CI->getPredicate();
- CmpInst::Predicate CurrentPredSwapped =
- CmpInst::getSwappedPredicate(CurrentPred);
- if (P0 == AltP0 || P0 == AltP0Swapped) {
- // Alternate cmps have same/swapped predicate as main cmps but
- // different order of compatible operands.
- return !(
- (P0 == CurrentPred &&
- areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1),
- I->getOperand(0), I->getOperand(1))) ||
- (P0 == CurrentPredSwapped &&
- areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1),
- I->getOperand(1), I->getOperand(0))));
- }
- return CurrentPred != P0 && CurrentPredSwapped != P0;
- }
- return I->getOpcode() == E->getAltOpcode();
+ return isAlternateInstruction(I, E->getMainOp(), E->getAltOp());
},
Mask, &OpScalars, &AltScalars);
@@ -7080,6 +8403,17 @@ Value *BoUpSLP::vectorizeTree() {
return vectorizeTree(ExternallyUsedValues);
}
+namespace {
+/// Data type for handling buildvector sequences with the reused scalars from
+/// other tree entries.
+struct ShuffledInsertData {
+ /// List of insertelements to be replaced by shuffles.
+ SmallVector<InsertElementInst *> InsertElements;
+ /// The parent vectors and shuffle mask for the given list of inserts.
+ MapVector<Value *, SmallVector<int>> ValueMasks;
+};
+} // namespace
+
Value *
BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
// All blocks must be scheduled before any instructions are inserted.
@@ -7113,6 +8447,9 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
LLVM_DEBUG(dbgs() << "SLP: Extracting " << ExternalUses.size()
<< " values .\n");
+ SmallVector<ShuffledInsertData> ShuffledInserts;
+ // Maps vector instruction to original insertelement instruction
+ DenseMap<Value *, InsertElementInst *> VectorToInsertElement;
// Extract all of the elements with the external uses.
for (const auto &ExternalUse : ExternalUses) {
Value *Scalar = ExternalUse.Scalar;
@@ -7126,6 +8463,10 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
assert(E && "Invalid scalar");
assert(E->State != TreeEntry::NeedToGather &&
"Extracting from a gather list");
+ // Non-instruction pointers are not deleted, just skip them.
+ if (E->getOpcode() == Instruction::GetElementPtr &&
+ !isa<GetElementPtrInst>(Scalar))
+ continue;
Value *Vec = E->VectorizedValue;
assert(Vec && "Can't find vectorizable value");
@@ -7152,6 +8493,8 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
assert(isa<FixedVectorType>(Scalar->getType()) &&
isa<InsertElementInst>(Scalar) &&
"In-tree scalar of vector type is not insertelement?");
+ auto *IE = cast<InsertElementInst>(Scalar);
+ VectorToInsertElement.try_emplace(Vec, IE);
return Vec;
};
// If User == nullptr, the Scalar is used as extra arg. Generate
@@ -7180,6 +8523,69 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
continue;
}
+ if (auto *VU = dyn_cast<InsertElementInst>(User)) {
+ // Skip if the scalar is another vector op or Vec is not an instruction.
+ if (!Scalar->getType()->isVectorTy() && isa<Instruction>(Vec)) {
+ if (auto *FTy = dyn_cast<FixedVectorType>(User->getType())) {
+ Optional<unsigned> InsertIdx = getInsertIndex(VU);
+ if (InsertIdx) {
+ // Need to use original vector, if the root is truncated.
+ if (MinBWs.count(Scalar) &&
+ VectorizableTree[0]->VectorizedValue == Vec)
+ Vec = VectorRoot;
+ auto *It =
+ find_if(ShuffledInserts, [VU](const ShuffledInsertData &Data) {
+ // Checks if 2 insertelements are from the same buildvector.
+ InsertElementInst *VecInsert = Data.InsertElements.front();
+ return areTwoInsertFromSameBuildVector(VU, VecInsert);
+ });
+ unsigned Idx = *InsertIdx;
+ if (It == ShuffledInserts.end()) {
+ (void)ShuffledInserts.emplace_back();
+ It = std::next(ShuffledInserts.begin(),
+ ShuffledInserts.size() - 1);
+ SmallVectorImpl<int> &Mask = It->ValueMasks[Vec];
+ if (Mask.empty())
+ Mask.assign(FTy->getNumElements(), UndefMaskElem);
+ // Find the insertvector, vectorized in tree, if any.
+ Value *Base = VU;
+ while (auto *IEBase = dyn_cast<InsertElementInst>(Base)) {
+ if (IEBase != User &&
+ (!IEBase->hasOneUse() ||
+ getInsertIndex(IEBase).value_or(Idx) == Idx))
+ break;
+ // Build the mask for the vectorized insertelement instructions.
+ if (const TreeEntry *E = getTreeEntry(IEBase)) {
+ do {
+ IEBase = cast<InsertElementInst>(Base);
+ int IEIdx = *getInsertIndex(IEBase);
+ assert(Mask[Idx] == UndefMaskElem &&
+ "InsertElementInstruction used already.");
+ Mask[IEIdx] = IEIdx;
+ Base = IEBase->getOperand(0);
+ } while (E == getTreeEntry(Base));
+ break;
+ }
+ Base = cast<InsertElementInst>(Base)->getOperand(0);
+ // After the vectorization the def-use chain has changed, need
+ // to look through original insertelement instructions, if they
+ // get replaced by vector instructions.
+ auto It = VectorToInsertElement.find(Base);
+ if (It != VectorToInsertElement.end())
+ Base = It->second;
+ }
+ }
+ SmallVectorImpl<int> &Mask = It->ValueMasks[Vec];
+ if (Mask.empty())
+ Mask.assign(FTy->getNumElements(), UndefMaskElem);
+ Mask[Idx] = ExternalUse.Lane;
+ It->InsertElements.push_back(cast<InsertElementInst>(User));
+ continue;
+ }
+ }
+ }
+ }
+
// Generate extracts for out-of-tree users.
// Find the insertion point for the extractelement lane.
if (auto *VecI = dyn_cast<Instruction>(Vec)) {
@@ -7215,6 +8621,221 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
LLVM_DEBUG(dbgs() << "SLP: Replaced:" << *User << ".\n");
}
+ // Checks if the mask is an identity mask.
+ auto &&IsIdentityMask = [](ArrayRef<int> Mask, FixedVectorType *VecTy) {
+ int Limit = Mask.size();
+ return VecTy->getNumElements() == Mask.size() &&
+ all_of(Mask, [Limit](int Idx) { return Idx < Limit; }) &&
+ ShuffleVectorInst::isIdentityMask(Mask);
+ };
+ // Tries to combine 2 different masks into single one.
+ auto &&CombineMasks = [](SmallVectorImpl<int> &Mask, ArrayRef<int> ExtMask) {
+ SmallVector<int> NewMask(ExtMask.size(), UndefMaskElem);
+ for (int I = 0, Sz = ExtMask.size(); I < Sz; ++I) {
+ if (ExtMask[I] == UndefMaskElem)
+ continue;
+ NewMask[I] = Mask[ExtMask[I]];
+ }
+ Mask.swap(NewMask);
+ };
+ // Peek through shuffles, trying to simplify the final shuffle code.
+ auto &&PeekThroughShuffles =
+ [&IsIdentityMask, &CombineMasks](Value *&V, SmallVectorImpl<int> &Mask,
+ bool CheckForLengthChange = false) {
+ while (auto *SV = dyn_cast<ShuffleVectorInst>(V)) {
+ // Exit if not a fixed vector type or changing size shuffle.
+ if (!isa<FixedVectorType>(SV->getType()) ||
+ (CheckForLengthChange && SV->changesLength()))
+ break;
+ // Exit if the identity or broadcast mask is found.
+ if (IsIdentityMask(Mask, cast<FixedVectorType>(SV->getType())) ||
+ SV->isZeroEltSplat())
+ break;
+ bool IsOp1Undef = isUndefVector(SV->getOperand(0));
+ bool IsOp2Undef = isUndefVector(SV->getOperand(1));
+ if (!IsOp1Undef && !IsOp2Undef)
+ break;
+ SmallVector<int> ShuffleMask(SV->getShuffleMask().begin(),
+ SV->getShuffleMask().end());
+ CombineMasks(ShuffleMask, Mask);
+ Mask.swap(ShuffleMask);
+ if (IsOp2Undef)
+ V = SV->getOperand(0);
+ else
+ V = SV->getOperand(1);
+ }
+ };
+ // Smart shuffle instruction emission, walks through shuffles trees and
+ // tries to find the best matching vector for the actual shuffle
+ // instruction.
+ auto &&CreateShuffle = [this, &IsIdentityMask, &PeekThroughShuffles,
+ &CombineMasks](Value *V1, Value *V2,
+ ArrayRef<int> Mask) -> Value * {
+ assert(V1 && "Expected at least one vector value.");
+ if (V2 && !isUndefVector(V2)) {
+ // Peek through shuffles.
+ Value *Op1 = V1;
+ Value *Op2 = V2;
+ int VF =
+ cast<VectorType>(V1->getType())->getElementCount().getKnownMinValue();
+ SmallVector<int> CombinedMask1(Mask.size(), UndefMaskElem);
+ SmallVector<int> CombinedMask2(Mask.size(), UndefMaskElem);
+ for (int I = 0, E = Mask.size(); I < E; ++I) {
+ if (Mask[I] < VF)
+ CombinedMask1[I] = Mask[I];
+ else
+ CombinedMask2[I] = Mask[I] - VF;
+ }
+ Value *PrevOp1;
+ Value *PrevOp2;
+ do {
+ PrevOp1 = Op1;
+ PrevOp2 = Op2;
+ PeekThroughShuffles(Op1, CombinedMask1, /*CheckForLengthChange=*/true);
+ PeekThroughShuffles(Op2, CombinedMask2, /*CheckForLengthChange=*/true);
+ // Check if we have 2 resizing shuffles - need to peek through operands
+ // again.
+ if (auto *SV1 = dyn_cast<ShuffleVectorInst>(Op1))
+ if (auto *SV2 = dyn_cast<ShuffleVectorInst>(Op2))
+ if (SV1->getOperand(0)->getType() ==
+ SV2->getOperand(0)->getType() &&
+ SV1->getOperand(0)->getType() != SV1->getType() &&
+ isUndefVector(SV1->getOperand(1)) &&
+ isUndefVector(SV2->getOperand(1))) {
+ Op1 = SV1->getOperand(0);
+ Op2 = SV2->getOperand(0);
+ SmallVector<int> ShuffleMask1(SV1->getShuffleMask().begin(),
+ SV1->getShuffleMask().end());
+ CombineMasks(ShuffleMask1, CombinedMask1);
+ CombinedMask1.swap(ShuffleMask1);
+ SmallVector<int> ShuffleMask2(SV2->getShuffleMask().begin(),
+ SV2->getShuffleMask().end());
+ CombineMasks(ShuffleMask2, CombinedMask2);
+ CombinedMask2.swap(ShuffleMask2);
+ }
+ } while (PrevOp1 != Op1 || PrevOp2 != Op2);
+ VF = cast<VectorType>(Op1->getType())
+ ->getElementCount()
+ .getKnownMinValue();
+ for (int I = 0, E = Mask.size(); I < E; ++I) {
+ if (CombinedMask2[I] != UndefMaskElem) {
+ assert(CombinedMask1[I] == UndefMaskElem &&
+ "Expected undefined mask element");
+ CombinedMask1[I] = CombinedMask2[I] + (Op1 == Op2 ? 0 : VF);
+ }
+ }
+ Value *Vec = Builder.CreateShuffleVector(
+ Op1, Op1 == Op2 ? PoisonValue::get(Op1->getType()) : Op2,
+ CombinedMask1);
+ if (auto *I = dyn_cast<Instruction>(Vec)) {
+ GatherShuffleSeq.insert(I);
+ CSEBlocks.insert(I->getParent());
+ }
+ return Vec;
+ }
+ if (isa<PoisonValue>(V1))
+ return PoisonValue::get(FixedVectorType::get(
+ cast<VectorType>(V1->getType())->getElementType(), Mask.size()));
+ Value *Op = V1;
+ SmallVector<int> CombinedMask(Mask.begin(), Mask.end());
+ PeekThroughShuffles(Op, CombinedMask);
+ if (!isa<FixedVectorType>(Op->getType()) ||
+ !IsIdentityMask(CombinedMask, cast<FixedVectorType>(Op->getType()))) {
+ Value *Vec = Builder.CreateShuffleVector(Op, CombinedMask);
+ if (auto *I = dyn_cast<Instruction>(Vec)) {
+ GatherShuffleSeq.insert(I);
+ CSEBlocks.insert(I->getParent());
+ }
+ return Vec;
+ }
+ return Op;
+ };
+
+ auto &&ResizeToVF = [&CreateShuffle](Value *Vec, ArrayRef<int> Mask) {
+ unsigned VF = Mask.size();
+ unsigned VecVF = cast<FixedVectorType>(Vec->getType())->getNumElements();
+ if (VF != VecVF) {
+ if (any_of(Mask, [VF](int Idx) { return Idx >= static_cast<int>(VF); })) {
+ Vec = CreateShuffle(Vec, nullptr, Mask);
+ return std::make_pair(Vec, true);
+ }
+ SmallVector<int> ResizeMask(VF, UndefMaskElem);
+ for (unsigned I = 0; I < VF; ++I) {
+ if (Mask[I] != UndefMaskElem)
+ ResizeMask[Mask[I]] = Mask[I];
+ }
+ Vec = CreateShuffle(Vec, nullptr, ResizeMask);
+ }
+
+ return std::make_pair(Vec, false);
+ };
+ // Perform shuffling of the vectorize tree entries for better handling of
+ // external extracts.
+ for (int I = 0, E = ShuffledInserts.size(); I < E; ++I) {
+ // Find the first and the last instruction in the list of insertelements.
+ sort(ShuffledInserts[I].InsertElements, isFirstInsertElement);
+ InsertElementInst *FirstInsert = ShuffledInserts[I].InsertElements.front();
+ InsertElementInst *LastInsert = ShuffledInserts[I].InsertElements.back();
+ Builder.SetInsertPoint(LastInsert);
+ auto Vector = ShuffledInserts[I].ValueMasks.takeVector();
+ Value *NewInst = performExtractsShuffleAction<Value>(
+ makeMutableArrayRef(Vector.data(), Vector.size()),
+ FirstInsert->getOperand(0),
+ [](Value *Vec) {
+ return cast<VectorType>(Vec->getType())
+ ->getElementCount()
+ .getKnownMinValue();
+ },
+ ResizeToVF,
+ [FirstInsert, &CreateShuffle](ArrayRef<int> Mask,
+ ArrayRef<Value *> Vals) {
+ assert((Vals.size() == 1 || Vals.size() == 2) &&
+ "Expected exactly 1 or 2 input values.");
+ if (Vals.size() == 1) {
+ // Do not create shuffle if the mask is a simple identity
+ // non-resizing mask.
+ if (Mask.size() != cast<FixedVectorType>(Vals.front()->getType())
+ ->getNumElements() ||
+ !ShuffleVectorInst::isIdentityMask(Mask))
+ return CreateShuffle(Vals.front(), nullptr, Mask);
+ return Vals.front();
+ }
+ return CreateShuffle(Vals.front() ? Vals.front()
+ : FirstInsert->getOperand(0),
+ Vals.back(), Mask);
+ });
+ auto It = ShuffledInserts[I].InsertElements.rbegin();
+ // Rebuild buildvector chain.
+ InsertElementInst *II = nullptr;
+ if (It != ShuffledInserts[I].InsertElements.rend())
+ II = *It;
+ SmallVector<Instruction *> Inserts;
+ while (It != ShuffledInserts[I].InsertElements.rend()) {
+ assert(II && "Must be an insertelement instruction.");
+ if (*It == II)
+ ++It;
+ else
+ Inserts.push_back(cast<Instruction>(II));
+ II = dyn_cast<InsertElementInst>(II->getOperand(0));
+ }
+ for (Instruction *II : reverse(Inserts)) {
+ II->replaceUsesOfWith(II->getOperand(0), NewInst);
+ if (auto *NewI = dyn_cast<Instruction>(NewInst))
+ if (II->getParent() == NewI->getParent() && II->comesBefore(NewI))
+ II->moveAfter(NewI);
+ NewInst = II;
+ }
+ LastInsert->replaceAllUsesWith(NewInst);
+ for (InsertElementInst *IE : reverse(ShuffledInserts[I].InsertElements)) {
+ IE->replaceUsesOfWith(IE->getOperand(0),
+ PoisonValue::get(IE->getOperand(0)->getType()));
+ IE->replaceUsesOfWith(IE->getOperand(1),
+ PoisonValue::get(IE->getOperand(1)->getType()));
+ eraseInstruction(IE);
+ }
+ CSEBlocks.insert(LastInsert->getParent());
+ }
+
// For each vectorized value:
for (auto &TEPtr : VectorizableTree) {
TreeEntry *Entry = TEPtr.get();
@@ -7229,6 +8850,9 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) {
Value *Scalar = Entry->Scalars[Lane];
+ if (Entry->getOpcode() == Instruction::GetElementPtr &&
+ !isa<GetElementPtrInst>(Scalar))
+ continue;
#ifndef NDEBUG
Type *Ty = Scalar->getType();
if (!Ty->isVoidTy()) {
@@ -7236,7 +8860,8 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
LLVM_DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n");
// It is legal to delete users in the ignorelist.
- assert((getTreeEntry(U) || is_contained(UserIgnoreList, U) ||
+ assert((getTreeEntry(U) ||
+ (UserIgnoreList && UserIgnoreList->contains(U)) ||
(isa_and_nonnull<Instruction>(U) &&
isDeleted(cast<Instruction>(U)))) &&
"Deleting out-of-tree value");
@@ -7404,9 +9029,11 @@ void BoUpSLP::optimizeGatherSequence() {
BoUpSLP::ScheduleData *
BoUpSLP::BlockScheduling::buildBundle(ArrayRef<Value *> VL) {
- ScheduleData *Bundle = nullptr;
+ ScheduleData *Bundle = nullptr;
ScheduleData *PrevInBundle = nullptr;
for (Value *V : VL) {
+ if (doesNotNeedToBeScheduled(V))
+ continue;
ScheduleData *BundleMember = getScheduleData(V);
assert(BundleMember &&
"no ScheduleData for bundle member "
@@ -7418,8 +9045,6 @@ BoUpSLP::BlockScheduling::buildBundle(ArrayRef<Value *> VL) {
} else {
Bundle = BundleMember;
}
- BundleMember->UnscheduledDepsInBundle = 0;
- Bundle->UnscheduledDepsInBundle += BundleMember->UnscheduledDeps;
// Group the instructions to a bundle.
BundleMember->FirstInBundle = Bundle;
@@ -7436,7 +9061,8 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
const InstructionsState &S) {
// No need to schedule PHIs, insertelement, extractelement and extractvalue
// instructions.
- if (isa<PHINode>(S.OpValue) || isVectorLikeInstWithConstOps(S.OpValue))
+ if (isa<PHINode>(S.OpValue) || isVectorLikeInstWithConstOps(S.OpValue) ||
+ doesNotNeedToSchedule(VL))
return nullptr;
// Initialize the instruction bundle.
@@ -7455,16 +9081,17 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
doForAllOpcodes(I, [](ScheduleData *SD) { SD->clearDependencies(); });
ReSchedule = true;
}
- if (ReSchedule) {
- resetSchedule();
- initialFillReadyList(ReadyInsts);
- }
if (Bundle) {
LLVM_DEBUG(dbgs() << "SLP: try schedule bundle " << *Bundle
<< " in block " << BB->getName() << "\n");
calculateDependencies(Bundle, /*InsertInReadyList=*/true, SLP);
}
+ if (ReSchedule) {
+ resetSchedule();
+ initialFillReadyList(ReadyInsts);
+ }
+
// Now try to schedule the new bundle or (if no bundle) just calculate
// dependencies. As soon as the bundle is "ready" it means that there are no
// cyclic dependencies and we can schedule it. Note that's important that we
@@ -7472,14 +9099,17 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
while (((!Bundle && ReSchedule) || (Bundle && !Bundle->isReady())) &&
!ReadyInsts.empty()) {
ScheduleData *Picked = ReadyInsts.pop_back_val();
- if (Picked->isSchedulingEntity() && Picked->isReady())
- schedule(Picked, ReadyInsts);
+ assert(Picked->isSchedulingEntity() && Picked->isReady() &&
+ "must be ready to schedule");
+ schedule(Picked, ReadyInsts);
}
};
// Make sure that the scheduling region contains all
// instructions of the bundle.
for (Value *V : VL) {
+ if (doesNotNeedToBeScheduled(V))
+ continue;
if (!extendSchedulingRegion(V, S)) {
// If the scheduling region got new instructions at the lower end (or it
// is a new region for the first bundle). This makes it necessary to
@@ -7494,9 +9124,16 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
bool ReSchedule = false;
for (Value *V : VL) {
+ if (doesNotNeedToBeScheduled(V))
+ continue;
ScheduleData *BundleMember = getScheduleData(V);
assert(BundleMember &&
"no ScheduleData for bundle member (maybe not in same basic block)");
+
+ // Make sure we don't leave the pieces of the bundle in the ready list when
+ // whole bundle might not be ready.
+ ReadyInsts.remove(BundleMember);
+
if (!BundleMember->IsScheduled)
continue;
// A bundle member was scheduled as single instruction before and now
@@ -7518,16 +9155,24 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef<Value *> VL,
Value *OpValue) {
- if (isa<PHINode>(OpValue) || isVectorLikeInstWithConstOps(OpValue))
+ if (isa<PHINode>(OpValue) || isVectorLikeInstWithConstOps(OpValue) ||
+ doesNotNeedToSchedule(VL))
return;
+ if (doesNotNeedToBeScheduled(OpValue))
+ OpValue = *find_if_not(VL, doesNotNeedToBeScheduled);
ScheduleData *Bundle = getScheduleData(OpValue);
LLVM_DEBUG(dbgs() << "SLP: cancel scheduling of " << *Bundle << "\n");
assert(!Bundle->IsScheduled &&
"Can't cancel bundle which is already scheduled");
- assert(Bundle->isSchedulingEntity() && Bundle->isPartOfBundle() &&
+ assert(Bundle->isSchedulingEntity() &&
+ (Bundle->isPartOfBundle() || needToScheduleSingleInstruction(VL)) &&
"tried to unbundle something which is not a bundle");
+ // Remove the bundle from the ready list.
+ if (Bundle->isReady())
+ ReadyInsts.remove(Bundle);
+
// Un-bundle: make single instructions out of the bundle.
ScheduleData *BundleMember = Bundle;
while (BundleMember) {
@@ -7535,8 +9180,8 @@ void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef<Value *> VL,
BundleMember->FirstInBundle = BundleMember;
ScheduleData *Next = BundleMember->NextInBundle;
BundleMember->NextInBundle = nullptr;
- BundleMember->UnscheduledDepsInBundle = BundleMember->UnscheduledDeps;
- if (BundleMember->UnscheduledDepsInBundle == 0) {
+ BundleMember->TE = nullptr;
+ if (BundleMember->unscheduledDepsInBundle() == 0) {
ReadyInsts.insert(BundleMember);
}
BundleMember = Next;
@@ -7559,9 +9204,10 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
Instruction *I = dyn_cast<Instruction>(V);
assert(I && "bundle member must be an instruction");
assert(!isa<PHINode>(I) && !isVectorLikeInstWithConstOps(I) &&
+ !doesNotNeedToBeScheduled(I) &&
"phi nodes/insertelements/extractelements/extractvalues don't need to "
"be scheduled");
- auto &&CheckSheduleForI = [this, &S](Instruction *I) -> bool {
+ auto &&CheckScheduleForI = [this, &S](Instruction *I) -> bool {
ScheduleData *ISD = getScheduleData(I);
if (!ISD)
return false;
@@ -7573,7 +9219,7 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
ExtraScheduleDataMap[I][S.OpValue] = SD;
return true;
};
- if (CheckSheduleForI(I))
+ if (CheckScheduleForI(I))
return true;
if (!ScheduleStart) {
// It's the first instruction in the new region.
@@ -7581,7 +9227,7 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
ScheduleStart = I;
ScheduleEnd = I->getNextNode();
if (isOneOf(S, I) != I)
- CheckSheduleForI(I);
+ CheckScheduleForI(I);
assert(ScheduleEnd && "tried to vectorize a terminator?");
LLVM_DEBUG(dbgs() << "SLP: initialize schedule region to " << *I << "\n");
return true;
@@ -7609,7 +9255,7 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
initScheduleData(I, ScheduleStart, nullptr, FirstLoadStoreInRegion);
ScheduleStart = I;
if (isOneOf(S, I) != I)
- CheckSheduleForI(I);
+ CheckScheduleForI(I);
LLVM_DEBUG(dbgs() << "SLP: extend schedule region start to " << *I
<< "\n");
return true;
@@ -7623,7 +9269,7 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
nullptr);
ScheduleEnd = I->getNextNode();
if (isOneOf(S, I) != I)
- CheckSheduleForI(I);
+ CheckScheduleForI(I);
assert(ScheduleEnd && "tried to vectorize a terminator?");
LLVM_DEBUG(dbgs() << "SLP: extend schedule region end to " << *I << "\n");
return true;
@@ -7635,7 +9281,10 @@ void BoUpSLP::BlockScheduling::initScheduleData(Instruction *FromI,
ScheduleData *NextLoadStore) {
ScheduleData *CurrentLoadStore = PrevLoadStore;
for (Instruction *I = FromI; I != ToI; I = I->getNextNode()) {
- ScheduleData *SD = ScheduleDataMap[I];
+ // No need to allocate data for non-schedulable instructions.
+ if (doesNotNeedToBeScheduled(I))
+ continue;
+ ScheduleData *SD = ScheduleDataMap.lookup(I);
if (!SD) {
SD = allocateScheduleDataChunks();
ScheduleDataMap[I] = SD;
@@ -7658,6 +9307,10 @@ void BoUpSLP::BlockScheduling::initScheduleData(Instruction *FromI,
}
CurrentLoadStore = SD;
}
+
+ if (match(I, m_Intrinsic<Intrinsic::stacksave>()) ||
+ match(I, m_Intrinsic<Intrinsic::stackrestore>()))
+ RegionHasStackSave = true;
}
if (NextLoadStore) {
if (CurrentLoadStore)
@@ -7690,8 +9343,7 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
// Handle def-use chain dependencies.
if (BundleMember->OpValue != BundleMember->Inst) {
- ScheduleData *UseSD = getScheduleData(BundleMember->Inst);
- if (UseSD && isInSchedulingRegion(UseSD->FirstInBundle)) {
+ if (ScheduleData *UseSD = getScheduleData(BundleMember->Inst)) {
BundleMember->Dependencies++;
ScheduleData *DestBundle = UseSD->FirstInBundle;
if (!DestBundle->IsScheduled)
@@ -7701,10 +9353,7 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
}
} else {
for (User *U : BundleMember->Inst->users()) {
- assert(isa<Instruction>(U) &&
- "user of instruction must be instruction");
- ScheduleData *UseSD = getScheduleData(U);
- if (UseSD && isInSchedulingRegion(UseSD->FirstInBundle)) {
+ if (ScheduleData *UseSD = getScheduleData(cast<Instruction>(U))) {
BundleMember->Dependencies++;
ScheduleData *DestBundle = UseSD->FirstInBundle;
if (!DestBundle->IsScheduled)
@@ -7715,6 +9364,75 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
}
}
+ auto makeControlDependent = [&](Instruction *I) {
+ auto *DepDest = getScheduleData(I);
+ assert(DepDest && "must be in schedule window");
+ DepDest->ControlDependencies.push_back(BundleMember);
+ BundleMember->Dependencies++;
+ ScheduleData *DestBundle = DepDest->FirstInBundle;
+ if (!DestBundle->IsScheduled)
+ BundleMember->incrementUnscheduledDeps(1);
+ if (!DestBundle->hasValidDependencies())
+ WorkList.push_back(DestBundle);
+ };
+
+ // Any instruction which isn't safe to speculate at the begining of the
+ // block is control dependend on any early exit or non-willreturn call
+ // which proceeds it.
+ if (!isGuaranteedToTransferExecutionToSuccessor(BundleMember->Inst)) {
+ for (Instruction *I = BundleMember->Inst->getNextNode();
+ I != ScheduleEnd; I = I->getNextNode()) {
+ if (isSafeToSpeculativelyExecute(I, &*BB->begin()))
+ continue;
+
+ // Add the dependency
+ makeControlDependent(I);
+
+ if (!isGuaranteedToTransferExecutionToSuccessor(I))
+ // Everything past here must be control dependent on I.
+ break;
+ }
+ }
+
+ if (RegionHasStackSave) {
+ // If we have an inalloc alloca instruction, it needs to be scheduled
+ // after any preceeding stacksave. We also need to prevent any alloca
+ // from reordering above a preceeding stackrestore.
+ if (match(BundleMember->Inst, m_Intrinsic<Intrinsic::stacksave>()) ||
+ match(BundleMember->Inst, m_Intrinsic<Intrinsic::stackrestore>())) {
+ for (Instruction *I = BundleMember->Inst->getNextNode();
+ I != ScheduleEnd; I = I->getNextNode()) {
+ if (match(I, m_Intrinsic<Intrinsic::stacksave>()) ||
+ match(I, m_Intrinsic<Intrinsic::stackrestore>()))
+ // Any allocas past here must be control dependent on I, and I
+ // must be memory dependend on BundleMember->Inst.
+ break;
+
+ if (!isa<AllocaInst>(I))
+ continue;
+
+ // Add the dependency
+ makeControlDependent(I);
+ }
+ }
+
+ // In addition to the cases handle just above, we need to prevent
+ // allocas from moving below a stacksave. The stackrestore case
+ // is currently thought to be conservatism.
+ if (isa<AllocaInst>(BundleMember->Inst)) {
+ for (Instruction *I = BundleMember->Inst->getNextNode();
+ I != ScheduleEnd; I = I->getNextNode()) {
+ if (!match(I, m_Intrinsic<Intrinsic::stacksave>()) &&
+ !match(I, m_Intrinsic<Intrinsic::stackrestore>()))
+ continue;
+
+ // Add the dependency
+ makeControlDependent(I);
+ break;
+ }
+ }
+ }
+
// Handle the memory dependencies (if any).
ScheduleData *DepDest = BundleMember->NextLoadStore;
if (!DepDest)
@@ -7777,7 +9495,7 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
}
}
if (InsertInReadyList && SD->isReady()) {
- ReadyInsts.push_back(SD);
+ ReadyInsts.insert(SD);
LLVM_DEBUG(dbgs() << "SLP: gets ready on update: " << *SD->Inst
<< "\n");
}
@@ -7804,11 +9522,18 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
LLVM_DEBUG(dbgs() << "SLP: schedule block " << BS->BB->getName() << "\n");
+ // A key point - if we got here, pre-scheduling was able to find a valid
+ // scheduling of the sub-graph of the scheduling window which consists
+ // of all vector bundles and their transitive users. As such, we do not
+ // need to reschedule anything *outside of* that subgraph.
+
BS->resetSchedule();
// For the real scheduling we use a more sophisticated ready-list: it is
// sorted by the original instruction location. This lets the final schedule
// be as close as possible to the original instruction order.
+ // WARNING: If changing this order causes a correctness issue, that means
+ // there is some missing dependence edge in the schedule data graph.
struct ScheduleDataCompare {
bool operator()(ScheduleData *SD1, ScheduleData *SD2) const {
return SD2->SchedulingPriority < SD1->SchedulingPriority;
@@ -7816,21 +9541,22 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
};
std::set<ScheduleData *, ScheduleDataCompare> ReadyInsts;
- // Ensure that all dependency data is updated and fill the ready-list with
- // initial instructions.
+ // Ensure that all dependency data is updated (for nodes in the sub-graph)
+ // and fill the ready-list with initial instructions.
int Idx = 0;
- int NumToSchedule = 0;
for (auto *I = BS->ScheduleStart; I != BS->ScheduleEnd;
I = I->getNextNode()) {
- BS->doForAllOpcodes(I, [this, &Idx, &NumToSchedule, BS](ScheduleData *SD) {
+ BS->doForAllOpcodes(I, [this, &Idx, BS](ScheduleData *SD) {
+ TreeEntry *SDTE = getTreeEntry(SD->Inst);
+ (void)SDTE;
assert((isVectorLikeInstWithConstOps(SD->Inst) ||
- SD->isPartOfBundle() == (getTreeEntry(SD->Inst) != nullptr)) &&
+ SD->isPartOfBundle() ==
+ (SDTE && !doesNotNeedToSchedule(SDTE->Scalars))) &&
"scheduler and vectorizer bundle mismatch");
SD->FirstInBundle->SchedulingPriority = Idx++;
- if (SD->isSchedulingEntity()) {
+
+ if (SD->isSchedulingEntity() && SD->isPartOfBundle())
BS->calculateDependencies(SD, false, this);
- NumToSchedule++;
- }
});
}
BS->initialFillReadyList(ReadyInsts);
@@ -7853,9 +9579,23 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
}
BS->schedule(picked, ReadyInsts);
- NumToSchedule--;
}
- assert(NumToSchedule == 0 && "could not schedule all instructions");
+
+ // Check that we didn't break any of our invariants.
+#ifdef EXPENSIVE_CHECKS
+ BS->verify();
+#endif
+
+#if !defined(NDEBUG) || defined(EXPENSIVE_CHECKS)
+ // Check that all schedulable entities got scheduled
+ for (auto *I = BS->ScheduleStart; I != BS->ScheduleEnd; I = I->getNextNode()) {
+ BS->doForAllOpcodes(I, [&](ScheduleData *SD) {
+ if (SD->isSchedulingEntity() && SD->hasValidDependencies()) {
+ assert(SD->IsScheduled && "must be scheduled at this point");
+ }
+ });
+ }
+#endif
// Avoid duplicate scheduling of the block.
BS->ScheduleStart = nullptr;
@@ -7865,11 +9605,8 @@ unsigned BoUpSLP::getVectorElementSize(Value *V) {
// If V is a store, just return the width of the stored value (or value
// truncated just before storing) without traversing the expression tree.
// This is the common case.
- if (auto *Store = dyn_cast<StoreInst>(V)) {
- if (auto *Trunc = dyn_cast<TruncInst>(Store->getValueOperand()))
- return DL->getTypeSizeInBits(Trunc->getSrcTy());
+ if (auto *Store = dyn_cast<StoreInst>(V))
return DL->getTypeSizeInBits(Store->getValueOperand()->getType());
- }
if (auto *IEI = dyn_cast<InsertElementInst>(V))
return getVectorElementSize(IEI->getOperand(1));
@@ -8271,6 +10008,8 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_,
// Scan the blocks in the function in post order.
for (auto BB : post_order(&F.getEntryBlock())) {
+ // Start new block - clear the list of reduction roots.
+ R.clearReductionData();
collectSeedInstructions(BB);
// Vectorize trees that end at stores.
@@ -8301,11 +10040,10 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_,
}
bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,
- unsigned Idx) {
+ unsigned Idx, unsigned MinVF) {
LLVM_DEBUG(dbgs() << "SLP: Analyzing a store chain of length " << Chain.size()
<< "\n");
const unsigned Sz = R.getVectorElementSize(Chain[0]);
- const unsigned MinVF = R.getMinVecRegSize() / Sz;
unsigned VF = Chain.size();
if (!isPowerOf2_32(Sz) || !isPowerOf2_32(VF) || VF < 2 || VF < MinVF)
@@ -8444,9 +10182,15 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
unsigned EltSize = R.getVectorElementSize(Operands[0]);
unsigned MaxElts = llvm::PowerOf2Floor(MaxVecRegSize / EltSize);
- unsigned MinVF = R.getMinVF(EltSize);
unsigned MaxVF = std::min(R.getMaximumVF(EltSize, Instruction::Store),
MaxElts);
+ auto *Store = cast<StoreInst>(Operands[0]);
+ Type *StoreTy = Store->getValueOperand()->getType();
+ Type *ValueTy = StoreTy;
+ if (auto *Trunc = dyn_cast<TruncInst>(Store->getValueOperand()))
+ ValueTy = Trunc->getSrcTy();
+ unsigned MinVF = TTI->getStoreMinimumVF(
+ R.getMinVF(DL->getTypeSizeInBits(ValueTy)), StoreTy, ValueTy);
// FIXME: Is division-by-2 the correct step? Should we assert that the
// register size is a power-of-2?
@@ -8456,7 +10200,7 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
ArrayRef<Value *> Slice = makeArrayRef(Operands).slice(Cnt, Size);
if (!VectorizedStores.count(Slice.front()) &&
!VectorizedStores.count(Slice.back()) &&
- vectorizeStoreChain(Slice, R, Cnt)) {
+ vectorizeStoreChain(Slice, R, Cnt, MinVF)) {
// Mark the vectorized stores so that we don't vectorize them again.
VectorizedStores.insert(Slice.begin(), Slice.end());
Changed = true;
@@ -8516,6 +10260,8 @@ void SLPVectorizerPass::collectSeedInstructions(BasicBlock *BB) {
bool SLPVectorizerPass::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
if (!A || !B)
return false;
+ if (isa<InsertElementInst>(A) || isa<InsertElementInst>(B))
+ return false;
Value *VL[] = {A, B};
return tryToVectorizeList(VL, R);
}
@@ -8658,7 +10404,8 @@ bool SLPVectorizerPass::tryToVectorize(Instruction *I, BoUpSLP &R) {
if (!I)
return false;
- if (!isa<BinaryOperator>(I) && !isa<CmpInst>(I))
+ if ((!isa<BinaryOperator>(I) && !isa<CmpInst>(I)) ||
+ isa<VectorType>(I->getType()))
return false;
Value *P = I->getParent();
@@ -8669,32 +10416,40 @@ bool SLPVectorizerPass::tryToVectorize(Instruction *I, BoUpSLP &R) {
if (!Op0 || !Op1 || Op0->getParent() != P || Op1->getParent() != P)
return false;
- // Try to vectorize V.
- if (tryToVectorizePair(Op0, Op1, R))
- return true;
+ // First collect all possible candidates
+ SmallVector<std::pair<Value *, Value *>, 4> Candidates;
+ Candidates.emplace_back(Op0, Op1);
auto *A = dyn_cast<BinaryOperator>(Op0);
auto *B = dyn_cast<BinaryOperator>(Op1);
// Try to skip B.
- if (B && B->hasOneUse()) {
+ if (A && B && B->hasOneUse()) {
auto *B0 = dyn_cast<BinaryOperator>(B->getOperand(0));
auto *B1 = dyn_cast<BinaryOperator>(B->getOperand(1));
- if (B0 && B0->getParent() == P && tryToVectorizePair(A, B0, R))
- return true;
- if (B1 && B1->getParent() == P && tryToVectorizePair(A, B1, R))
- return true;
+ if (B0 && B0->getParent() == P)
+ Candidates.emplace_back(A, B0);
+ if (B1 && B1->getParent() == P)
+ Candidates.emplace_back(A, B1);
}
-
// Try to skip A.
- if (A && A->hasOneUse()) {
+ if (B && A && A->hasOneUse()) {
auto *A0 = dyn_cast<BinaryOperator>(A->getOperand(0));
auto *A1 = dyn_cast<BinaryOperator>(A->getOperand(1));
- if (A0 && A0->getParent() == P && tryToVectorizePair(A0, B, R))
- return true;
- if (A1 && A1->getParent() == P && tryToVectorizePair(A1, B, R))
- return true;
+ if (A0 && A0->getParent() == P)
+ Candidates.emplace_back(A0, B);
+ if (A1 && A1->getParent() == P)
+ Candidates.emplace_back(A1, B);
}
- return false;
+
+ if (Candidates.size() == 1)
+ return tryToVectorizePair(Op0, Op1, R);
+
+ // We have multiple options. Try to pick the single best.
+ Optional<int> BestCandidate = R.findBestRootPair(Candidates);
+ if (!BestCandidate)
+ return false;
+ return tryToVectorizePair(Candidates[*BestCandidate].first,
+ Candidates[*BestCandidate].second, R);
}
namespace {
@@ -8729,15 +10484,16 @@ class HorizontalReduction {
using ReductionOpsType = SmallVector<Value *, 16>;
using ReductionOpsListType = SmallVector<ReductionOpsType, 2>;
ReductionOpsListType ReductionOps;
- SmallVector<Value *, 32> ReducedVals;
+ /// List of possibly reduced values.
+ SmallVector<SmallVector<Value *>> ReducedVals;
+ /// Maps reduced value to the corresponding reduction operation.
+ DenseMap<Value *, SmallVector<Instruction *>> ReducedValsToOps;
// Use map vector to make stable output.
MapVector<Instruction *, Value *> ExtraArgs;
WeakTrackingVH ReductionRoot;
/// The type of reduction operation.
RecurKind RdxKind;
- const unsigned INVALID_OPERAND_INDEX = std::numeric_limits<unsigned>::max();
-
static bool isCmpSelMinMax(Instruction *I) {
return match(I, m_Select(m_Cmp(), m_Value(), m_Value())) &&
RecurrenceDescriptor::isMinMaxRecurrenceKind(getRdxKind(I));
@@ -8781,26 +10537,6 @@ class HorizontalReduction {
return I->getOperand(Index);
}
- /// Checks if the ParentStackElem.first should be marked as a reduction
- /// operation with an extra argument or as extra argument itself.
- void markExtraArg(std::pair<Instruction *, unsigned> &ParentStackElem,
- Value *ExtraArg) {
- if (ExtraArgs.count(ParentStackElem.first)) {
- ExtraArgs[ParentStackElem.first] = nullptr;
- // We ran into something like:
- // ParentStackElem.first = ExtraArgs[ParentStackElem.first] + ExtraArg.
- // The whole ParentStackElem.first should be considered as an extra value
- // in this case.
- // Do not perform analysis of remaining operands of ParentStackElem.first
- // instruction, this whole instruction is an extra argument.
- ParentStackElem.second = INVALID_OPERAND_INDEX;
- } else {
- // We ran into something like:
- // ParentStackElem.first += ... + ExtraArg + ...
- ExtraArgs[ParentStackElem.first] = ExtraArg;
- }
- }
-
/// Creates reduction operation with the current opcode.
static Value *createOp(IRBuilder<> &Builder, RecurKind Kind, Value *LHS,
Value *RHS, const Twine &Name, bool UseSelect) {
@@ -8859,7 +10595,7 @@ class HorizontalReduction {
}
/// Creates reduction operation with the current opcode with the IR flags
- /// from \p ReductionOps.
+ /// from \p ReductionOps, dropping nuw/nsw flags.
static Value *createOp(IRBuilder<> &Builder, RecurKind RdxKind, Value *LHS,
Value *RHS, const Twine &Name,
const ReductionOpsListType &ReductionOps) {
@@ -8873,31 +10609,21 @@ class HorizontalReduction {
Value *Op = createOp(Builder, RdxKind, LHS, RHS, Name, UseSelect);
if (RecurrenceDescriptor::isIntMinMaxRecurrenceKind(RdxKind)) {
if (auto *Sel = dyn_cast<SelectInst>(Op)) {
- propagateIRFlags(Sel->getCondition(), ReductionOps[0]);
- propagateIRFlags(Op, ReductionOps[1]);
+ propagateIRFlags(Sel->getCondition(), ReductionOps[0], nullptr,
+ /*IncludeWrapFlags=*/false);
+ propagateIRFlags(Op, ReductionOps[1], nullptr,
+ /*IncludeWrapFlags=*/false);
return Op;
}
}
- propagateIRFlags(Op, ReductionOps[0]);
+ propagateIRFlags(Op, ReductionOps[0], nullptr, /*IncludeWrapFlags=*/false);
return Op;
}
- /// Creates reduction operation with the current opcode with the IR flags
- /// from \p I.
- static Value *createOp(IRBuilder<> &Builder, RecurKind RdxKind, Value *LHS,
- Value *RHS, const Twine &Name, Instruction *I) {
- auto *SelI = dyn_cast<SelectInst>(I);
- Value *Op = createOp(Builder, RdxKind, LHS, RHS, Name, SelI != nullptr);
- if (SelI && RecurrenceDescriptor::isIntMinMaxRecurrenceKind(RdxKind)) {
- if (auto *Sel = dyn_cast<SelectInst>(Op))
- propagateIRFlags(Sel->getCondition(), SelI->getCondition());
- }
- propagateIRFlags(Op, I);
- return Op;
- }
-
- static RecurKind getRdxKind(Instruction *I) {
- assert(I && "Expected instruction for reduction matching");
+ static RecurKind getRdxKind(Value *V) {
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return RecurKind::None;
if (match(I, m_Add(m_Value(), m_Value())))
return RecurKind::Add;
if (match(I, m_Mul(m_Value(), m_Value())))
@@ -9059,7 +10785,9 @@ public:
HorizontalReduction() = default;
/// Try to find a reduction tree.
- bool matchAssociativeReduction(PHINode *Phi, Instruction *Inst) {
+ bool matchAssociativeReduction(PHINode *Phi, Instruction *Inst,
+ ScalarEvolution &SE, const DataLayout &DL,
+ const TargetLibraryInfo &TLI) {
assert((!Phi || is_contained(Phi->operands(), Inst)) &&
"Phi needs to use the binary operator");
assert((isa<BinaryOperator>(Inst) || isa<SelectInst>(Inst) ||
@@ -9103,124 +10831,178 @@ public:
ReductionRoot = Inst;
- // The opcode for leaf values that we perform a reduction on.
- // For example: load(x) + load(y) + load(z) + fptoui(w)
- // The leaf opcode for 'w' does not match, so we don't include it as a
- // potential candidate for the reduction.
- unsigned LeafOpcode = 0;
-
- // Post-order traverse the reduction tree starting at Inst. We only handle
- // true trees containing binary operators or selects.
- SmallVector<std::pair<Instruction *, unsigned>, 32> Stack;
- Stack.push_back(std::make_pair(Inst, getFirstOperandIndex(Inst)));
- initReductionOps(Inst);
- while (!Stack.empty()) {
- Instruction *TreeN = Stack.back().first;
- unsigned EdgeToVisit = Stack.back().second++;
- const RecurKind TreeRdxKind = getRdxKind(TreeN);
- bool IsReducedValue = TreeRdxKind != RdxKind;
-
- // Postorder visit.
- if (IsReducedValue || EdgeToVisit >= getNumberOfOperands(TreeN)) {
- if (IsReducedValue)
- ReducedVals.push_back(TreeN);
- else {
- auto ExtraArgsIter = ExtraArgs.find(TreeN);
- if (ExtraArgsIter != ExtraArgs.end() && !ExtraArgsIter->second) {
- // Check if TreeN is an extra argument of its parent operation.
- if (Stack.size() <= 1) {
- // TreeN can't be an extra argument as it is a root reduction
- // operation.
- return false;
- }
- // Yes, TreeN is an extra argument, do not add it to a list of
- // reduction operations.
- // Stack[Stack.size() - 2] always points to the parent operation.
- markExtraArg(Stack[Stack.size() - 2], TreeN);
- ExtraArgs.erase(TreeN);
- } else
- addReductionOps(TreeN);
- }
- // Retract.
- Stack.pop_back();
- continue;
- }
-
- // Visit operands.
- Value *EdgeVal = getRdxOperand(TreeN, EdgeToVisit);
- auto *EdgeInst = dyn_cast<Instruction>(EdgeVal);
- if (!EdgeInst) {
- // Edge value is not a reduction instruction or a leaf instruction.
- // (It may be a constant, function argument, or something else.)
- markExtraArg(Stack.back(), EdgeVal);
- continue;
+ // Iterate through all the operands of the possible reduction tree and
+ // gather all the reduced values, sorting them by their value id.
+ BasicBlock *BB = Inst->getParent();
+ bool IsCmpSelMinMax = isCmpSelMinMax(Inst);
+ SmallVector<Instruction *> Worklist(1, Inst);
+ // Checks if the operands of the \p TreeN instruction are also reduction
+ // operations or should be treated as reduced values or an extra argument,
+ // which is not part of the reduction.
+ auto &&CheckOperands = [this, IsCmpSelMinMax,
+ BB](Instruction *TreeN,
+ SmallVectorImpl<Value *> &ExtraArgs,
+ SmallVectorImpl<Value *> &PossibleReducedVals,
+ SmallVectorImpl<Instruction *> &ReductionOps) {
+ for (int I = getFirstOperandIndex(TreeN),
+ End = getNumberOfOperands(TreeN);
+ I < End; ++I) {
+ Value *EdgeVal = getRdxOperand(TreeN, I);
+ ReducedValsToOps[EdgeVal].push_back(TreeN);
+ auto *EdgeInst = dyn_cast<Instruction>(EdgeVal);
+ // Edge has wrong parent - mark as an extra argument.
+ if (EdgeInst && !isVectorLikeInstWithConstOps(EdgeInst) &&
+ !hasSameParent(EdgeInst, BB)) {
+ ExtraArgs.push_back(EdgeVal);
+ continue;
+ }
+ // If the edge is not an instruction, or it is different from the main
+ // reduction opcode or has too many uses - possible reduced value.
+ if (!EdgeInst || getRdxKind(EdgeInst) != RdxKind ||
+ IsCmpSelMinMax != isCmpSelMinMax(EdgeInst) ||
+ !hasRequiredNumberOfUses(IsCmpSelMinMax, EdgeInst) ||
+ !isVectorizable(getRdxKind(EdgeInst), EdgeInst)) {
+ PossibleReducedVals.push_back(EdgeVal);
+ continue;
+ }
+ ReductionOps.push_back(EdgeInst);
}
- RecurKind EdgeRdxKind = getRdxKind(EdgeInst);
- // Continue analysis if the next operand is a reduction operation or
- // (possibly) a leaf value. If the leaf value opcode is not set,
- // the first met operation != reduction operation is considered as the
- // leaf opcode.
- // Only handle trees in the current basic block.
- // Each tree node needs to have minimal number of users except for the
- // ultimate reduction.
- const bool IsRdxInst = EdgeRdxKind == RdxKind;
- if (EdgeInst != Phi && EdgeInst != Inst &&
- hasSameParent(EdgeInst, Inst->getParent()) &&
- hasRequiredNumberOfUses(isCmpSelMinMax(Inst), EdgeInst) &&
- (!LeafOpcode || LeafOpcode == EdgeInst->getOpcode() || IsRdxInst)) {
- if (IsRdxInst) {
- // We need to be able to reassociate the reduction operations.
- if (!isVectorizable(EdgeRdxKind, EdgeInst)) {
- // I is an extra argument for TreeN (its parent operation).
- markExtraArg(Stack.back(), EdgeInst);
- continue;
- }
- } else if (!LeafOpcode) {
- LeafOpcode = EdgeInst->getOpcode();
+ };
+ // Try to regroup reduced values so that it gets more profitable to try to
+ // reduce them. Values are grouped by their value ids, instructions - by
+ // instruction op id and/or alternate op id, plus do extra analysis for
+ // loads (grouping them by the distabce between pointers) and cmp
+ // instructions (grouping them by the predicate).
+ MapVector<size_t, MapVector<size_t, MapVector<Value *, unsigned>>>
+ PossibleReducedVals;
+ initReductionOps(Inst);
+ while (!Worklist.empty()) {
+ Instruction *TreeN = Worklist.pop_back_val();
+ SmallVector<Value *> Args;
+ SmallVector<Value *> PossibleRedVals;
+ SmallVector<Instruction *> PossibleReductionOps;
+ CheckOperands(TreeN, Args, PossibleRedVals, PossibleReductionOps);
+ // If too many extra args - mark the instruction itself as a reduction
+ // value, not a reduction operation.
+ if (Args.size() < 2) {
+ addReductionOps(TreeN);
+ // Add extra args.
+ if (!Args.empty()) {
+ assert(Args.size() == 1 && "Expected only single argument.");
+ ExtraArgs[TreeN] = Args.front();
}
- Stack.push_back(
- std::make_pair(EdgeInst, getFirstOperandIndex(EdgeInst)));
- continue;
+ // Add reduction values. The values are sorted for better vectorization
+ // results.
+ for (Value *V : PossibleRedVals) {
+ size_t Key, Idx;
+ std::tie(Key, Idx) = generateKeySubkey(
+ V, &TLI,
+ [&PossibleReducedVals, &DL, &SE](size_t Key, LoadInst *LI) {
+ auto It = PossibleReducedVals.find(Key);
+ if (It != PossibleReducedVals.end()) {
+ for (const auto &LoadData : It->second) {
+ auto *RLI = cast<LoadInst>(LoadData.second.front().first);
+ if (getPointersDiff(RLI->getType(),
+ RLI->getPointerOperand(), LI->getType(),
+ LI->getPointerOperand(), DL, SE,
+ /*StrictCheck=*/true))
+ return hash_value(RLI->getPointerOperand());
+ }
+ }
+ return hash_value(LI->getPointerOperand());
+ },
+ /*AllowAlternate=*/false);
+ ++PossibleReducedVals[Key][Idx]
+ .insert(std::make_pair(V, 0))
+ .first->second;
+ }
+ Worklist.append(PossibleReductionOps.rbegin(),
+ PossibleReductionOps.rend());
+ } else {
+ size_t Key, Idx;
+ std::tie(Key, Idx) = generateKeySubkey(
+ TreeN, &TLI,
+ [&PossibleReducedVals, &DL, &SE](size_t Key, LoadInst *LI) {
+ auto It = PossibleReducedVals.find(Key);
+ if (It != PossibleReducedVals.end()) {
+ for (const auto &LoadData : It->second) {
+ auto *RLI = cast<LoadInst>(LoadData.second.front().first);
+ if (getPointersDiff(RLI->getType(), RLI->getPointerOperand(),
+ LI->getType(), LI->getPointerOperand(),
+ DL, SE, /*StrictCheck=*/true))
+ return hash_value(RLI->getPointerOperand());
+ }
+ }
+ return hash_value(LI->getPointerOperand());
+ },
+ /*AllowAlternate=*/false);
+ ++PossibleReducedVals[Key][Idx]
+ .insert(std::make_pair(TreeN, 0))
+ .first->second;
+ }
+ }
+ auto PossibleReducedValsVect = PossibleReducedVals.takeVector();
+ // Sort values by the total number of values kinds to start the reduction
+ // from the longest possible reduced values sequences.
+ for (auto &PossibleReducedVals : PossibleReducedValsVect) {
+ auto PossibleRedVals = PossibleReducedVals.second.takeVector();
+ SmallVector<SmallVector<Value *>> PossibleRedValsVect;
+ for (auto It = PossibleRedVals.begin(), E = PossibleRedVals.end();
+ It != E; ++It) {
+ PossibleRedValsVect.emplace_back();
+ auto RedValsVect = It->second.takeVector();
+ stable_sort(RedValsVect, [](const auto &P1, const auto &P2) {
+ return P1.second < P2.second;
+ });
+ for (const std::pair<Value *, unsigned> &Data : RedValsVect)
+ PossibleRedValsVect.back().append(Data.second, Data.first);
}
- // I is an extra argument for TreeN (its parent operation).
- markExtraArg(Stack.back(), EdgeInst);
- }
+ stable_sort(PossibleRedValsVect, [](const auto &P1, const auto &P2) {
+ return P1.size() > P2.size();
+ });
+ ReducedVals.emplace_back();
+ for (ArrayRef<Value *> Data : PossibleRedValsVect)
+ ReducedVals.back().append(Data.rbegin(), Data.rend());
+ }
+ // Sort the reduced values by number of same/alternate opcode and/or pointer
+ // operand.
+ stable_sort(ReducedVals, [](ArrayRef<Value *> P1, ArrayRef<Value *> P2) {
+ return P1.size() > P2.size();
+ });
return true;
}
/// Attempt to vectorize the tree found by matchAssociativeReduction.
Value *tryToReduce(BoUpSLP &V, TargetTransformInfo *TTI) {
+ constexpr int ReductionLimit = 4;
+ constexpr unsigned RegMaxNumber = 4;
+ constexpr unsigned RedValsMaxNumber = 128;
// If there are a sufficient number of reduction values, reduce
// to a nearby power-of-2. We can safely generate oversized
// vectors and rely on the backend to split them to legal sizes.
- unsigned NumReducedVals = ReducedVals.size();
- if (NumReducedVals < 4)
+ unsigned NumReducedVals = std::accumulate(
+ ReducedVals.begin(), ReducedVals.end(), 0,
+ [](int Num, ArrayRef<Value *> Vals) { return Num + Vals.size(); });
+ if (NumReducedVals < ReductionLimit)
return nullptr;
- // Intersect the fast-math-flags from all reduction operations.
- FastMathFlags RdxFMF;
- RdxFMF.set();
- for (ReductionOpsType &RdxOp : ReductionOps) {
- for (Value *RdxVal : RdxOp) {
- if (auto *FPMO = dyn_cast<FPMathOperator>(RdxVal))
- RdxFMF &= FPMO->getFastMathFlags();
- }
- }
-
IRBuilder<> Builder(cast<Instruction>(ReductionRoot));
- Builder.setFastMathFlags(RdxFMF);
+ // Track the reduced values in case if they are replaced by extractelement
+ // because of the vectorization.
+ DenseMap<Value *, WeakTrackingVH> TrackedVals;
BoUpSLP::ExtraValueToDebugLocsMap ExternallyUsedValues;
// The same extra argument may be used several times, so log each attempt
// to use it.
for (const std::pair<Instruction *, Value *> &Pair : ExtraArgs) {
assert(Pair.first && "DebugLoc must be set.");
ExternallyUsedValues[Pair.second].push_back(Pair.first);
+ TrackedVals.try_emplace(Pair.second, Pair.second);
}
// The compare instruction of a min/max is the insertion point for new
// instructions and may be replaced with a new compare instruction.
- auto getCmpForMinMaxReduction = [](Instruction *RdxRootInst) {
+ auto &&GetCmpForMinMaxReduction = [](Instruction *RdxRootInst) {
assert(isa<SelectInst>(RdxRootInst) &&
"Expected min/max reduction to have select root instruction");
Value *ScalarCond = cast<SelectInst>(RdxRootInst)->getCondition();
@@ -9232,164 +11014,390 @@ public:
// The reduction root is used as the insertion point for new instructions,
// so set it as externally used to prevent it from being deleted.
ExternallyUsedValues[ReductionRoot];
- SmallVector<Value *, 16> IgnoreList;
- for (ReductionOpsType &RdxOp : ReductionOps)
- IgnoreList.append(RdxOp.begin(), RdxOp.end());
-
- unsigned ReduxWidth = PowerOf2Floor(NumReducedVals);
- if (NumReducedVals > ReduxWidth) {
- // In the loop below, we are building a tree based on a window of
- // 'ReduxWidth' values.
- // If the operands of those values have common traits (compare predicate,
- // constant operand, etc), then we want to group those together to
- // minimize the cost of the reduction.
-
- // TODO: This should be extended to count common operands for
- // compares and binops.
-
- // Step 1: Count the number of times each compare predicate occurs.
- SmallDenseMap<unsigned, unsigned> PredCountMap;
- for (Value *RdxVal : ReducedVals) {
- CmpInst::Predicate Pred;
- if (match(RdxVal, m_Cmp(Pred, m_Value(), m_Value())))
- ++PredCountMap[Pred];
- }
- // Step 2: Sort the values so the most common predicates come first.
- stable_sort(ReducedVals, [&PredCountMap](Value *A, Value *B) {
- CmpInst::Predicate PredA, PredB;
- if (match(A, m_Cmp(PredA, m_Value(), m_Value())) &&
- match(B, m_Cmp(PredB, m_Value(), m_Value()))) {
- return PredCountMap[PredA] > PredCountMap[PredB];
- }
- return false;
- });
- }
+ SmallDenseSet<Value *> IgnoreList;
+ for (ReductionOpsType &RdxOps : ReductionOps)
+ for (Value *RdxOp : RdxOps) {
+ if (!RdxOp)
+ continue;
+ IgnoreList.insert(RdxOp);
+ }
+ bool IsCmpSelMinMax = isCmpSelMinMax(cast<Instruction>(ReductionRoot));
+
+ // Need to track reduced vals, they may be changed during vectorization of
+ // subvectors.
+ for (ArrayRef<Value *> Candidates : ReducedVals)
+ for (Value *V : Candidates)
+ TrackedVals.try_emplace(V, V);
+ DenseMap<Value *, unsigned> VectorizedVals;
Value *VectorizedTree = nullptr;
- unsigned i = 0;
- while (i < NumReducedVals - ReduxWidth + 1 && ReduxWidth > 2) {
- ArrayRef<Value *> VL(&ReducedVals[i], ReduxWidth);
- V.buildTree(VL, IgnoreList);
- if (V.isTreeTinyAndNotFullyVectorizable(/*ForReduction=*/true))
- break;
- if (V.isLoadCombineReductionCandidate(RdxKind))
- break;
- V.reorderTopToBottom();
- V.reorderBottomToTop(/*IgnoreReorder=*/true);
- V.buildExternalUses(ExternallyUsedValues);
-
- // For a poison-safe boolean logic reduction, do not replace select
- // instructions with logic ops. All reduced values will be frozen (see
- // below) to prevent leaking poison.
- if (isa<SelectInst>(ReductionRoot) &&
- isBoolLogicOp(cast<Instruction>(ReductionRoot)) &&
- NumReducedVals != ReduxWidth)
- break;
+ bool CheckForReusedReductionOps = false;
+ // Try to vectorize elements based on their type.
+ for (unsigned I = 0, E = ReducedVals.size(); I < E; ++I) {
+ ArrayRef<Value *> OrigReducedVals = ReducedVals[I];
+ InstructionsState S = getSameOpcode(OrigReducedVals);
+ SmallVector<Value *> Candidates;
+ DenseMap<Value *, Value *> TrackedToOrig;
+ for (unsigned Cnt = 0, Sz = OrigReducedVals.size(); Cnt < Sz; ++Cnt) {
+ Value *RdxVal = TrackedVals.find(OrigReducedVals[Cnt])->second;
+ // Check if the reduction value was not overriden by the extractelement
+ // instruction because of the vectorization and exclude it, if it is not
+ // compatible with other values.
+ if (auto *Inst = dyn_cast<Instruction>(RdxVal))
+ if (isVectorLikeInstWithConstOps(Inst) &&
+ (!S.getOpcode() || !S.isOpcodeOrAlt(Inst)))
+ continue;
+ Candidates.push_back(RdxVal);
+ TrackedToOrig.try_emplace(RdxVal, OrigReducedVals[Cnt]);
+ }
+ bool ShuffledExtracts = false;
+ // Try to handle shuffled extractelements.
+ if (S.getOpcode() == Instruction::ExtractElement && !S.isAltShuffle() &&
+ I + 1 < E) {
+ InstructionsState NextS = getSameOpcode(ReducedVals[I + 1]);
+ if (NextS.getOpcode() == Instruction::ExtractElement &&
+ !NextS.isAltShuffle()) {
+ SmallVector<Value *> CommonCandidates(Candidates);
+ for (Value *RV : ReducedVals[I + 1]) {
+ Value *RdxVal = TrackedVals.find(RV)->second;
+ // Check if the reduction value was not overriden by the
+ // extractelement instruction because of the vectorization and
+ // exclude it, if it is not compatible with other values.
+ if (auto *Inst = dyn_cast<Instruction>(RdxVal))
+ if (!NextS.getOpcode() || !NextS.isOpcodeOrAlt(Inst))
+ continue;
+ CommonCandidates.push_back(RdxVal);
+ TrackedToOrig.try_emplace(RdxVal, RV);
+ }
+ SmallVector<int> Mask;
+ if (isFixedVectorShuffle(CommonCandidates, Mask)) {
+ ++I;
+ Candidates.swap(CommonCandidates);
+ ShuffledExtracts = true;
+ }
+ }
+ }
+ unsigned NumReducedVals = Candidates.size();
+ if (NumReducedVals < ReductionLimit)
+ continue;
- V.computeMinimumValueSizes();
+ unsigned MaxVecRegSize = V.getMaxVecRegSize();
+ unsigned EltSize = V.getVectorElementSize(Candidates[0]);
+ unsigned MaxElts = RegMaxNumber * PowerOf2Floor(MaxVecRegSize / EltSize);
+
+ unsigned ReduxWidth = std::min<unsigned>(
+ PowerOf2Floor(NumReducedVals), std::max(RedValsMaxNumber, MaxElts));
+ unsigned Start = 0;
+ unsigned Pos = Start;
+ // Restarts vectorization attempt with lower vector factor.
+ unsigned PrevReduxWidth = ReduxWidth;
+ bool CheckForReusedReductionOpsLocal = false;
+ auto &&AdjustReducedVals = [&Pos, &Start, &ReduxWidth, NumReducedVals,
+ &CheckForReusedReductionOpsLocal,
+ &PrevReduxWidth, &V,
+ &IgnoreList](bool IgnoreVL = false) {
+ bool IsAnyRedOpGathered = !IgnoreVL && V.isAnyGathered(IgnoreList);
+ if (!CheckForReusedReductionOpsLocal && PrevReduxWidth == ReduxWidth) {
+ // Check if any of the reduction ops are gathered. If so, worth
+ // trying again with less number of reduction ops.
+ CheckForReusedReductionOpsLocal |= IsAnyRedOpGathered;
+ }
+ ++Pos;
+ if (Pos < NumReducedVals - ReduxWidth + 1)
+ return IsAnyRedOpGathered;
+ Pos = Start;
+ ReduxWidth /= 2;
+ return IsAnyRedOpGathered;
+ };
+ while (Pos < NumReducedVals - ReduxWidth + 1 &&
+ ReduxWidth >= ReductionLimit) {
+ // Dependency in tree of the reduction ops - drop this attempt, try
+ // later.
+ if (CheckForReusedReductionOpsLocal && PrevReduxWidth != ReduxWidth &&
+ Start == 0) {
+ CheckForReusedReductionOps = true;
+ break;
+ }
+ PrevReduxWidth = ReduxWidth;
+ ArrayRef<Value *> VL(std::next(Candidates.begin(), Pos), ReduxWidth);
+ // Beeing analyzed already - skip.
+ if (V.areAnalyzedReductionVals(VL)) {
+ (void)AdjustReducedVals(/*IgnoreVL=*/true);
+ continue;
+ }
+ // Early exit if any of the reduction values were deleted during
+ // previous vectorization attempts.
+ if (any_of(VL, [&V](Value *RedVal) {
+ auto *RedValI = dyn_cast<Instruction>(RedVal);
+ if (!RedValI)
+ return false;
+ return V.isDeleted(RedValI);
+ }))
+ break;
+ V.buildTree(VL, IgnoreList);
+ if (V.isTreeTinyAndNotFullyVectorizable(/*ForReduction=*/true)) {
+ if (!AdjustReducedVals())
+ V.analyzedReductionVals(VL);
+ continue;
+ }
+ if (V.isLoadCombineReductionCandidate(RdxKind)) {
+ if (!AdjustReducedVals())
+ V.analyzedReductionVals(VL);
+ continue;
+ }
+ V.reorderTopToBottom();
+ // No need to reorder the root node at all.
+ V.reorderBottomToTop(/*IgnoreReorder=*/true);
+ // Keep extracted other reduction values, if they are used in the
+ // vectorization trees.
+ BoUpSLP::ExtraValueToDebugLocsMap LocalExternallyUsedValues(
+ ExternallyUsedValues);
+ for (unsigned Cnt = 0, Sz = ReducedVals.size(); Cnt < Sz; ++Cnt) {
+ if (Cnt == I || (ShuffledExtracts && Cnt == I - 1))
+ continue;
+ for_each(ReducedVals[Cnt],
+ [&LocalExternallyUsedValues, &TrackedVals](Value *V) {
+ if (isa<Instruction>(V))
+ LocalExternallyUsedValues[TrackedVals[V]];
+ });
+ }
+ // Number of uses of the candidates in the vector of values.
+ SmallDenseMap<Value *, unsigned> NumUses;
+ for (unsigned Cnt = 0; Cnt < Pos; ++Cnt) {
+ Value *V = Candidates[Cnt];
+ if (NumUses.count(V) > 0)
+ continue;
+ NumUses[V] = std::count(VL.begin(), VL.end(), V);
+ }
+ for (unsigned Cnt = Pos + ReduxWidth; Cnt < NumReducedVals; ++Cnt) {
+ Value *V = Candidates[Cnt];
+ if (NumUses.count(V) > 0)
+ continue;
+ NumUses[V] = std::count(VL.begin(), VL.end(), V);
+ }
+ // Gather externally used values.
+ SmallPtrSet<Value *, 4> Visited;
+ for (unsigned Cnt = 0; Cnt < Pos; ++Cnt) {
+ Value *V = Candidates[Cnt];
+ if (!Visited.insert(V).second)
+ continue;
+ unsigned NumOps = VectorizedVals.lookup(V) + NumUses[V];
+ if (NumOps != ReducedValsToOps.find(V)->second.size())
+ LocalExternallyUsedValues[V];
+ }
+ for (unsigned Cnt = Pos + ReduxWidth; Cnt < NumReducedVals; ++Cnt) {
+ Value *V = Candidates[Cnt];
+ if (!Visited.insert(V).second)
+ continue;
+ unsigned NumOps = VectorizedVals.lookup(V) + NumUses[V];
+ if (NumOps != ReducedValsToOps.find(V)->second.size())
+ LocalExternallyUsedValues[V];
+ }
+ V.buildExternalUses(LocalExternallyUsedValues);
+
+ V.computeMinimumValueSizes();
+
+ // Intersect the fast-math-flags from all reduction operations.
+ FastMathFlags RdxFMF;
+ RdxFMF.set();
+ for (Value *U : IgnoreList)
+ if (auto *FPMO = dyn_cast<FPMathOperator>(U))
+ RdxFMF &= FPMO->getFastMathFlags();
+ // Estimate cost.
+ InstructionCost TreeCost = V.getTreeCost(VL);
+ InstructionCost ReductionCost =
+ getReductionCost(TTI, VL, ReduxWidth, RdxFMF);
+ InstructionCost Cost = TreeCost + ReductionCost;
+ if (!Cost.isValid()) {
+ LLVM_DEBUG(dbgs() << "Encountered invalid baseline cost.\n");
+ return nullptr;
+ }
+ if (Cost >= -SLPCostThreshold) {
+ V.getORE()->emit([&]() {
+ return OptimizationRemarkMissed(
+ SV_NAME, "HorSLPNotBeneficial",
+ ReducedValsToOps.find(VL[0])->second.front())
+ << "Vectorizing horizontal reduction is possible"
+ << "but not beneficial with cost " << ore::NV("Cost", Cost)
+ << " and threshold "
+ << ore::NV("Threshold", -SLPCostThreshold);
+ });
+ if (!AdjustReducedVals())
+ V.analyzedReductionVals(VL);
+ continue;
+ }
- // Estimate cost.
- InstructionCost TreeCost =
- V.getTreeCost(makeArrayRef(&ReducedVals[i], ReduxWidth));
- InstructionCost ReductionCost =
- getReductionCost(TTI, ReducedVals[i], ReduxWidth, RdxFMF);
- InstructionCost Cost = TreeCost + ReductionCost;
- if (!Cost.isValid()) {
- LLVM_DEBUG(dbgs() << "Encountered invalid baseline cost.\n");
- return nullptr;
- }
- if (Cost >= -SLPCostThreshold) {
+ LLVM_DEBUG(dbgs() << "SLP: Vectorizing horizontal reduction at cost:"
+ << Cost << ". (HorRdx)\n");
V.getORE()->emit([&]() {
- return OptimizationRemarkMissed(SV_NAME, "HorSLPNotBeneficial",
- cast<Instruction>(VL[0]))
- << "Vectorizing horizontal reduction is possible"
- << "but not beneficial with cost " << ore::NV("Cost", Cost)
- << " and threshold "
- << ore::NV("Threshold", -SLPCostThreshold);
+ return OptimizationRemark(
+ SV_NAME, "VectorizedHorizontalReduction",
+ ReducedValsToOps.find(VL[0])->second.front())
+ << "Vectorized horizontal reduction with cost "
+ << ore::NV("Cost", Cost) << " and with tree size "
+ << ore::NV("TreeSize", V.getTreeSize());
});
- break;
- }
- LLVM_DEBUG(dbgs() << "SLP: Vectorizing horizontal reduction at cost:"
- << Cost << ". (HorRdx)\n");
- V.getORE()->emit([&]() {
- return OptimizationRemark(SV_NAME, "VectorizedHorizontalReduction",
- cast<Instruction>(VL[0]))
- << "Vectorized horizontal reduction with cost "
- << ore::NV("Cost", Cost) << " and with tree size "
- << ore::NV("TreeSize", V.getTreeSize());
- });
+ Builder.setFastMathFlags(RdxFMF);
- // Vectorize a tree.
- DebugLoc Loc = cast<Instruction>(ReducedVals[i])->getDebugLoc();
- Value *VectorizedRoot = V.vectorizeTree(ExternallyUsedValues);
+ // Vectorize a tree.
+ Value *VectorizedRoot = V.vectorizeTree(LocalExternallyUsedValues);
- // Emit a reduction. If the root is a select (min/max idiom), the insert
- // point is the compare condition of that select.
- Instruction *RdxRootInst = cast<Instruction>(ReductionRoot);
- if (isCmpSelMinMax(RdxRootInst))
- Builder.SetInsertPoint(getCmpForMinMaxReduction(RdxRootInst));
- else
- Builder.SetInsertPoint(RdxRootInst);
+ // Emit a reduction. If the root is a select (min/max idiom), the insert
+ // point is the compare condition of that select.
+ Instruction *RdxRootInst = cast<Instruction>(ReductionRoot);
+ if (IsCmpSelMinMax)
+ Builder.SetInsertPoint(GetCmpForMinMaxReduction(RdxRootInst));
+ else
+ Builder.SetInsertPoint(RdxRootInst);
- // To prevent poison from leaking across what used to be sequential, safe,
- // scalar boolean logic operations, the reduction operand must be frozen.
- if (isa<SelectInst>(RdxRootInst) && isBoolLogicOp(RdxRootInst))
- VectorizedRoot = Builder.CreateFreeze(VectorizedRoot);
+ // To prevent poison from leaking across what used to be sequential,
+ // safe, scalar boolean logic operations, the reduction operand must be
+ // frozen.
+ if (isa<SelectInst>(RdxRootInst) && isBoolLogicOp(RdxRootInst))
+ VectorizedRoot = Builder.CreateFreeze(VectorizedRoot);
- Value *ReducedSubTree =
- emitReduction(VectorizedRoot, Builder, ReduxWidth, TTI);
+ Value *ReducedSubTree =
+ emitReduction(VectorizedRoot, Builder, ReduxWidth, TTI);
- if (!VectorizedTree) {
- // Initialize the final value in the reduction.
- VectorizedTree = ReducedSubTree;
- } else {
- // Update the final value in the reduction.
- Builder.SetCurrentDebugLocation(Loc);
- VectorizedTree = createOp(Builder, RdxKind, VectorizedTree,
- ReducedSubTree, "op.rdx", ReductionOps);
+ if (!VectorizedTree) {
+ // Initialize the final value in the reduction.
+ VectorizedTree = ReducedSubTree;
+ } else {
+ // Update the final value in the reduction.
+ Builder.SetCurrentDebugLocation(
+ cast<Instruction>(ReductionOps.front().front())->getDebugLoc());
+ VectorizedTree = createOp(Builder, RdxKind, VectorizedTree,
+ ReducedSubTree, "op.rdx", ReductionOps);
+ }
+ // Count vectorized reduced values to exclude them from final reduction.
+ for (Value *V : VL)
+ ++VectorizedVals.try_emplace(TrackedToOrig.find(V)->second, 0)
+ .first->getSecond();
+ Pos += ReduxWidth;
+ Start = Pos;
+ ReduxWidth = PowerOf2Floor(NumReducedVals - Pos);
}
- i += ReduxWidth;
- ReduxWidth = PowerOf2Floor(NumReducedVals - i);
}
-
if (VectorizedTree) {
// Finish the reduction.
- for (; i < NumReducedVals; ++i) {
- auto *I = cast<Instruction>(ReducedVals[i]);
- Builder.SetCurrentDebugLocation(I->getDebugLoc());
- VectorizedTree =
- createOp(Builder, RdxKind, VectorizedTree, I, "", ReductionOps);
+ // Need to add extra arguments and not vectorized possible reduction
+ // values.
+ // Try to avoid dependencies between the scalar remainders after
+ // reductions.
+ auto &&FinalGen =
+ [this, &Builder,
+ &TrackedVals](ArrayRef<std::pair<Instruction *, Value *>> InstVals) {
+ unsigned Sz = InstVals.size();
+ SmallVector<std::pair<Instruction *, Value *>> ExtraReds(Sz / 2 +
+ Sz % 2);
+ for (unsigned I = 0, E = (Sz / 2) * 2; I < E; I += 2) {
+ Instruction *RedOp = InstVals[I + 1].first;
+ Builder.SetCurrentDebugLocation(RedOp->getDebugLoc());
+ Value *RdxVal1 = InstVals[I].second;
+ Value *StableRdxVal1 = RdxVal1;
+ auto It1 = TrackedVals.find(RdxVal1);
+ if (It1 != TrackedVals.end())
+ StableRdxVal1 = It1->second;
+ Value *RdxVal2 = InstVals[I + 1].second;
+ Value *StableRdxVal2 = RdxVal2;
+ auto It2 = TrackedVals.find(RdxVal2);
+ if (It2 != TrackedVals.end())
+ StableRdxVal2 = It2->second;
+ Value *ExtraRed = createOp(Builder, RdxKind, StableRdxVal1,
+ StableRdxVal2, "op.rdx", ReductionOps);
+ ExtraReds[I / 2] = std::make_pair(InstVals[I].first, ExtraRed);
+ }
+ if (Sz % 2 == 1)
+ ExtraReds[Sz / 2] = InstVals.back();
+ return ExtraReds;
+ };
+ SmallVector<std::pair<Instruction *, Value *>> ExtraReductions;
+ SmallPtrSet<Value *, 8> Visited;
+ for (ArrayRef<Value *> Candidates : ReducedVals) {
+ for (Value *RdxVal : Candidates) {
+ if (!Visited.insert(RdxVal).second)
+ continue;
+ unsigned NumOps = VectorizedVals.lookup(RdxVal);
+ for (Instruction *RedOp :
+ makeArrayRef(ReducedValsToOps.find(RdxVal)->second)
+ .drop_back(NumOps))
+ ExtraReductions.emplace_back(RedOp, RdxVal);
+ }
}
for (auto &Pair : ExternallyUsedValues) {
// Add each externally used value to the final reduction.
- for (auto *I : Pair.second) {
- Builder.SetCurrentDebugLocation(I->getDebugLoc());
- VectorizedTree = createOp(Builder, RdxKind, VectorizedTree,
- Pair.first, "op.extra", I);
- }
+ for (auto *I : Pair.second)
+ ExtraReductions.emplace_back(I, Pair.first);
+ }
+ // Iterate through all not-vectorized reduction values/extra arguments.
+ while (ExtraReductions.size() > 1) {
+ SmallVector<std::pair<Instruction *, Value *>> NewReds =
+ FinalGen(ExtraReductions);
+ ExtraReductions.swap(NewReds);
+ }
+ // Final reduction.
+ if (ExtraReductions.size() == 1) {
+ Instruction *RedOp = ExtraReductions.back().first;
+ Builder.SetCurrentDebugLocation(RedOp->getDebugLoc());
+ Value *RdxVal = ExtraReductions.back().second;
+ Value *StableRdxVal = RdxVal;
+ auto It = TrackedVals.find(RdxVal);
+ if (It != TrackedVals.end())
+ StableRdxVal = It->second;
+ VectorizedTree = createOp(Builder, RdxKind, VectorizedTree,
+ StableRdxVal, "op.rdx", ReductionOps);
}
ReductionRoot->replaceAllUsesWith(VectorizedTree);
- // Mark all scalar reduction ops for deletion, they are replaced by the
- // vector reductions.
- V.eraseInstructions(IgnoreList);
+ // The original scalar reduction is expected to have no remaining
+ // uses outside the reduction tree itself. Assert that we got this
+ // correct, replace internal uses with undef, and mark for eventual
+ // deletion.
+#ifndef NDEBUG
+ SmallSet<Value *, 4> IgnoreSet;
+ for (ArrayRef<Value *> RdxOps : ReductionOps)
+ IgnoreSet.insert(RdxOps.begin(), RdxOps.end());
+#endif
+ for (ArrayRef<Value *> RdxOps : ReductionOps) {
+ for (Value *Ignore : RdxOps) {
+ if (!Ignore)
+ continue;
+#ifndef NDEBUG
+ for (auto *U : Ignore->users()) {
+ assert(IgnoreSet.count(U) &&
+ "All users must be either in the reduction ops list.");
+ }
+#endif
+ if (!Ignore->use_empty()) {
+ Value *Undef = UndefValue::get(Ignore->getType());
+ Ignore->replaceAllUsesWith(Undef);
+ }
+ V.eraseInstruction(cast<Instruction>(Ignore));
+ }
+ }
+ } else if (!CheckForReusedReductionOps) {
+ for (ReductionOpsType &RdxOps : ReductionOps)
+ for (Value *RdxOp : RdxOps)
+ V.analyzedReductionRoot(cast<Instruction>(RdxOp));
}
return VectorizedTree;
}
- unsigned numReductionValues() const { return ReducedVals.size(); }
-
private:
/// Calculate the cost of a reduction.
InstructionCost getReductionCost(TargetTransformInfo *TTI,
- Value *FirstReducedVal, unsigned ReduxWidth,
- FastMathFlags FMF) {
+ ArrayRef<Value *> ReducedVals,
+ unsigned ReduxWidth, FastMathFlags FMF) {
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
+ Value *FirstReducedVal = ReducedVals.front();
Type *ScalarTy = FirstReducedVal->getType();
FixedVectorType *VectorTy = FixedVectorType::get(ScalarTy, ReduxWidth);
- InstructionCost VectorCost, ScalarCost;
+ InstructionCost VectorCost = 0, ScalarCost;
+ // If all of the reduced values are constant, the vector cost is 0, since
+ // the reduction value can be calculated at the compile time.
+ bool AllConsts = all_of(ReducedVals, isConstant);
switch (RdxKind) {
case RecurKind::Add:
case RecurKind::Mul:
@@ -9399,17 +11407,22 @@ private:
case RecurKind::FAdd:
case RecurKind::FMul: {
unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(RdxKind);
- VectorCost =
- TTI->getArithmeticReductionCost(RdxOpcode, VectorTy, FMF, CostKind);
+ if (!AllConsts)
+ VectorCost =
+ TTI->getArithmeticReductionCost(RdxOpcode, VectorTy, FMF, CostKind);
ScalarCost = TTI->getArithmeticInstrCost(RdxOpcode, ScalarTy, CostKind);
break;
}
case RecurKind::FMax:
case RecurKind::FMin: {
auto *SclCondTy = CmpInst::makeCmpResultType(ScalarTy);
- auto *VecCondTy = cast<VectorType>(CmpInst::makeCmpResultType(VectorTy));
- VectorCost = TTI->getMinMaxReductionCost(VectorTy, VecCondTy,
- /*IsUnsigned=*/false, CostKind);
+ if (!AllConsts) {
+ auto *VecCondTy =
+ cast<VectorType>(CmpInst::makeCmpResultType(VectorTy));
+ VectorCost =
+ TTI->getMinMaxReductionCost(VectorTy, VecCondTy,
+ /*IsUnsigned=*/false, CostKind);
+ }
CmpInst::Predicate RdxPred = getMinMaxReductionPredicate(RdxKind);
ScalarCost = TTI->getCmpSelInstrCost(Instruction::FCmp, ScalarTy,
SclCondTy, RdxPred, CostKind) +
@@ -9422,11 +11435,14 @@ private:
case RecurKind::UMax:
case RecurKind::UMin: {
auto *SclCondTy = CmpInst::makeCmpResultType(ScalarTy);
- auto *VecCondTy = cast<VectorType>(CmpInst::makeCmpResultType(VectorTy));
- bool IsUnsigned =
- RdxKind == RecurKind::UMax || RdxKind == RecurKind::UMin;
- VectorCost = TTI->getMinMaxReductionCost(VectorTy, VecCondTy, IsUnsigned,
- CostKind);
+ if (!AllConsts) {
+ auto *VecCondTy =
+ cast<VectorType>(CmpInst::makeCmpResultType(VectorTy));
+ bool IsUnsigned =
+ RdxKind == RecurKind::UMax || RdxKind == RecurKind::UMin;
+ VectorCost = TTI->getMinMaxReductionCost(VectorTy, VecCondTy,
+ IsUnsigned, CostKind);
+ }
CmpInst::Predicate RdxPred = getMinMaxReductionPredicate(RdxKind);
ScalarCost = TTI->getCmpSelInstrCost(Instruction::ICmp, ScalarTy,
SclCondTy, RdxPred, CostKind) +
@@ -9490,21 +11506,22 @@ static Optional<unsigned> getAggregateSize(Instruction *InsertInst) {
} while (true);
}
-static bool findBuildAggregate_rec(Instruction *LastInsertInst,
+static void findBuildAggregate_rec(Instruction *LastInsertInst,
TargetTransformInfo *TTI,
SmallVectorImpl<Value *> &BuildVectorOpds,
SmallVectorImpl<Value *> &InsertElts,
unsigned OperandOffset) {
do {
Value *InsertedOperand = LastInsertInst->getOperand(1);
- Optional<int> OperandIndex = getInsertIndex(LastInsertInst, OperandOffset);
+ Optional<unsigned> OperandIndex =
+ getInsertIndex(LastInsertInst, OperandOffset);
if (!OperandIndex)
- return false;
+ return;
if (isa<InsertElementInst>(InsertedOperand) ||
isa<InsertValueInst>(InsertedOperand)) {
- if (!findBuildAggregate_rec(cast<Instruction>(InsertedOperand), TTI,
- BuildVectorOpds, InsertElts, *OperandIndex))
- return false;
+ findBuildAggregate_rec(cast<Instruction>(InsertedOperand), TTI,
+ BuildVectorOpds, InsertElts, *OperandIndex);
+
} else {
BuildVectorOpds[*OperandIndex] = InsertedOperand;
InsertElts[*OperandIndex] = LastInsertInst;
@@ -9514,7 +11531,6 @@ static bool findBuildAggregate_rec(Instruction *LastInsertInst,
(isa<InsertValueInst>(LastInsertInst) ||
isa<InsertElementInst>(LastInsertInst)) &&
LastInsertInst->hasOneUse());
- return true;
}
/// Recognize construction of vectors like
@@ -9549,13 +11565,11 @@ static bool findBuildAggregate(Instruction *LastInsertInst,
BuildVectorOpds.resize(*AggregateSize);
InsertElts.resize(*AggregateSize);
- if (findBuildAggregate_rec(LastInsertInst, TTI, BuildVectorOpds, InsertElts,
- 0)) {
- llvm::erase_value(BuildVectorOpds, nullptr);
- llvm::erase_value(InsertElts, nullptr);
- if (BuildVectorOpds.size() >= 2)
- return true;
- }
+ findBuildAggregate_rec(LastInsertInst, TTI, BuildVectorOpds, InsertElts, 0);
+ llvm::erase_value(BuildVectorOpds, nullptr);
+ llvm::erase_value(InsertElts, nullptr);
+ if (BuildVectorOpds.size() >= 2)
+ return true;
return false;
}
@@ -9642,7 +11656,8 @@ static bool matchRdxBop(Instruction *I, Value *&V0, Value *&V1) {
/// performed.
static bool tryToVectorizeHorReductionOrInstOperands(
PHINode *P, Instruction *Root, BasicBlock *BB, BoUpSLP &R,
- TargetTransformInfo *TTI,
+ TargetTransformInfo *TTI, ScalarEvolution &SE, const DataLayout &DL,
+ const TargetLibraryInfo &TLI,
const function_ref<bool(Instruction *, BoUpSLP &)> Vectorize) {
if (!ShouldVectorizeHor)
return false;
@@ -9661,7 +11676,7 @@ static bool tryToVectorizeHorReductionOrInstOperands(
// horizontal reduction.
// Interrupt the process if the Root instruction itself was vectorized or all
// sub-trees not higher that RecursionMaxDepth were analyzed/vectorized.
- // Skip the analysis of CmpInsts.Compiler implements postanalysis of the
+ // Skip the analysis of CmpInsts. Compiler implements postanalysis of the
// CmpInsts so we can skip extra attempts in
// tryToVectorizeHorReductionOrInstOperands and save compile time.
std::queue<std::pair<Instruction *, unsigned>> Stack;
@@ -9669,13 +11684,16 @@ static bool tryToVectorizeHorReductionOrInstOperands(
SmallPtrSet<Value *, 8> VisitedInstrs;
SmallVector<WeakTrackingVH> PostponedInsts;
bool Res = false;
- auto &&TryToReduce = [TTI, &P, &R](Instruction *Inst, Value *&B0,
- Value *&B1) -> Value * {
+ auto &&TryToReduce = [TTI, &SE, &DL, &P, &R, &TLI](Instruction *Inst,
+ Value *&B0,
+ Value *&B1) -> Value * {
+ if (R.isAnalyzedReductionRoot(Inst))
+ return nullptr;
bool IsBinop = matchRdxBop(Inst, B0, B1);
bool IsSelect = match(Inst, m_Select(m_Value(), m_Value(), m_Value()));
if (IsBinop || IsSelect) {
HorizontalReduction HorRdx;
- if (HorRdx.matchAssociativeReduction(P, Inst))
+ if (HorRdx.matchAssociativeReduction(P, Inst, SE, DL, TLI))
return HorRdx.tryToReduce(R, TTI);
}
return nullptr;
@@ -9720,7 +11738,7 @@ static bool tryToVectorizeHorReductionOrInstOperands(
// Do not try to vectorize CmpInst operands, this is done separately.
// Final attempt for binop args vectorization should happen after the loop
// to try to find reductions.
- if (!isa<CmpInst>(Inst))
+ if (!isa<CmpInst, InsertElementInst, InsertValueInst>(Inst))
PostponedInsts.push_back(Inst);
}
@@ -9733,8 +11751,8 @@ static bool tryToVectorizeHorReductionOrInstOperands(
if (auto *I = dyn_cast<Instruction>(Op))
// Do not try to vectorize CmpInst operands, this is done
// separately.
- if (!isa<PHINode>(I) && !isa<CmpInst>(I) && !R.isDeleted(I) &&
- I->getParent() == BB)
+ if (!isa<PHINode, CmpInst, InsertElementInst, InsertValueInst>(I) &&
+ !R.isDeleted(I) && I->getParent() == BB)
Stack.emplace(I, Level);
}
// Try to vectorized binops where reductions were not found.
@@ -9758,8 +11776,8 @@ bool SLPVectorizerPass::vectorizeRootInstruction(PHINode *P, Value *V,
auto &&ExtraVectorization = [this](Instruction *I, BoUpSLP &R) -> bool {
return tryToVectorize(I, R);
};
- return tryToVectorizeHorReductionOrInstOperands(P, I, BB, R, TTI,
- ExtraVectorization);
+ return tryToVectorizeHorReductionOrInstOperands(P, I, BB, R, TTI, *SE, *DL,
+ *TLI, ExtraVectorization);
}
bool SLPVectorizerPass::vectorizeInsertValueInst(InsertValueInst *IVI,
@@ -9927,12 +11945,16 @@ bool SLPVectorizerPass::vectorizeSimpleInstructions(
for (auto *I : reverse(Instructions)) {
if (R.isDeleted(I))
continue;
- if (auto *LastInsertValue = dyn_cast<InsertValueInst>(I))
+ if (auto *LastInsertValue = dyn_cast<InsertValueInst>(I)) {
OpsChanged |= vectorizeInsertValueInst(LastInsertValue, BB, R);
- else if (auto *LastInsertElem = dyn_cast<InsertElementInst>(I))
+ } else if (auto *LastInsertElem = dyn_cast<InsertElementInst>(I)) {
OpsChanged |= vectorizeInsertElementInst(LastInsertElem, BB, R);
- else if (isa<CmpInst>(I))
+ } else if (isa<CmpInst>(I)) {
PostponedCmps.push_back(I);
+ continue;
+ }
+ // Try to find reductions in buildvector sequnces.
+ OpsChanged |= vectorizeRootInstruction(nullptr, I, BB, R, TTI);
}
if (AtTerminator) {
// Try to find reductions first.
@@ -10350,7 +12372,7 @@ bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) {
DomTreeNodeBase<llvm::BasicBlock> *NodeI2 =
DT->getNode(I2->getParent());
assert(NodeI1 && "Should only process reachable instructions");
- assert(NodeI1 && "Should only process reachable instructions");
+ assert(NodeI2 && "Should only process reachable instructions");
assert((NodeI1 == NodeI2) ==
(NodeI1->getDFSNumIn() == NodeI2->getDFSNumIn()) &&
"Different nodes should have different DFS numbers");
diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index 8822c0004eb2..97f2b1a93815 100644
--- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -72,17 +72,17 @@ class VPRecipeBuilder {
VPRecipeBase *tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
VFRange &Range, VPlanPtr &Plan);
- /// Check if an induction recipe should be constructed for \I. If so build and
- /// return it. If not, return null.
- VPWidenIntOrFpInductionRecipe *
- tryToOptimizeInductionPHI(PHINode *Phi, ArrayRef<VPValue *> Operands,
- VFRange &Range) const;
+ /// Check if an induction recipe should be constructed for \p Phi. If so build
+ /// and return it. If not, return null.
+ VPRecipeBase *tryToOptimizeInductionPHI(PHINode *Phi,
+ ArrayRef<VPValue *> Operands,
+ VPlan &Plan, VFRange &Range);
/// Optimize the special case where the operand of \p I is a constant integer
/// induction variable.
VPWidenIntOrFpInductionRecipe *
tryToOptimizeInductionTruncate(TruncInst *I, ArrayRef<VPValue *> Operands,
- VFRange &Range, VPlan &Plan) const;
+ VFRange &Range, VPlan &Plan);
/// Handle non-loop phi nodes. Return a VPValue, if all incoming values match
/// or a new VPBlendRecipe otherwise. Currently all such phi nodes are turned
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 342d4a074e10..4d709097c306 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -23,11 +23,10 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
-#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Type.h"
@@ -35,13 +34,13 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GenericDomTreeConstruction.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/LoopVersioning.h"
+#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include <cassert>
-#include <iterator>
#include <string>
#include <vector>
@@ -60,7 +59,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const VPValue &V) {
}
#endif
-Value *VPLane::getAsRuntimeExpr(IRBuilder<> &Builder,
+Value *VPLane::getAsRuntimeExpr(IRBuilderBase &Builder,
const ElementCount &VF) const {
switch (LaneKind) {
case VPLane::Kind::ScalableLast:
@@ -158,25 +157,25 @@ void VPBlockBase::setPlan(VPlan *ParentPlan) {
}
/// \return the VPBasicBlock that is the exit of Block, possibly indirectly.
-const VPBasicBlock *VPBlockBase::getExitBasicBlock() const {
+const VPBasicBlock *VPBlockBase::getExitingBasicBlock() const {
const VPBlockBase *Block = this;
while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
- Block = Region->getExit();
+ Block = Region->getExiting();
return cast<VPBasicBlock>(Block);
}
-VPBasicBlock *VPBlockBase::getExitBasicBlock() {
+VPBasicBlock *VPBlockBase::getExitingBasicBlock() {
VPBlockBase *Block = this;
while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
- Block = Region->getExit();
+ Block = Region->getExiting();
return cast<VPBasicBlock>(Block);
}
VPBlockBase *VPBlockBase::getEnclosingBlockWithSuccessors() {
if (!Successors.empty() || !Parent)
return this;
- assert(Parent->getExit() == this &&
- "Block w/o successors not the exit of its parent.");
+ assert(Parent->getExiting() == this &&
+ "Block w/o successors not the exiting block of its parent.");
return Parent->getEnclosingBlockWithSuccessors();
}
@@ -188,28 +187,6 @@ VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() {
return Parent->getEnclosingBlockWithPredecessors();
}
-VPValue *VPBlockBase::getCondBit() {
- return CondBitUser.getSingleOperandOrNull();
-}
-
-const VPValue *VPBlockBase::getCondBit() const {
- return CondBitUser.getSingleOperandOrNull();
-}
-
-void VPBlockBase::setCondBit(VPValue *CV) { CondBitUser.resetSingleOpUser(CV); }
-
-VPValue *VPBlockBase::getPredicate() {
- return PredicateUser.getSingleOperandOrNull();
-}
-
-const VPValue *VPBlockBase::getPredicate() const {
- return PredicateUser.getSingleOperandOrNull();
-}
-
-void VPBlockBase::setPredicate(VPValue *CV) {
- PredicateUser.resetSingleOpUser(CV);
-}
-
void VPBlockBase::deleteCFG(VPBlockBase *Entry) {
SmallVector<VPBlockBase *, 8> Blocks(depth_first(Entry));
@@ -245,6 +222,52 @@ Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) {
// set(Def, Extract, Instance);
return Extract;
}
+BasicBlock *VPTransformState::CFGState::getPreheaderBBFor(VPRecipeBase *R) {
+ VPRegionBlock *LoopRegion = R->getParent()->getEnclosingLoopRegion();
+ return VPBB2IRBB[LoopRegion->getPreheaderVPBB()];
+}
+
+void VPTransformState::addNewMetadata(Instruction *To,
+ const Instruction *Orig) {
+ // If the loop was versioned with memchecks, add the corresponding no-alias
+ // metadata.
+ if (LVer && (isa<LoadInst>(Orig) || isa<StoreInst>(Orig)))
+ LVer->annotateInstWithNoAlias(To, Orig);
+}
+
+void VPTransformState::addMetadata(Instruction *To, Instruction *From) {
+ propagateMetadata(To, From);
+ addNewMetadata(To, From);
+}
+
+void VPTransformState::addMetadata(ArrayRef<Value *> To, Instruction *From) {
+ for (Value *V : To) {
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ addMetadata(I, From);
+ }
+}
+
+void VPTransformState::setDebugLocFromInst(const Value *V) {
+ if (const Instruction *Inst = dyn_cast_or_null<Instruction>(V)) {
+ const DILocation *DIL = Inst->getDebugLoc();
+
+ // When a FSDiscriminator is enabled, we don't need to add the multiply
+ // factors to the discriminators.
+ if (DIL && Inst->getFunction()->isDebugInfoForProfiling() &&
+ !isa<DbgInfoIntrinsic>(Inst) && !EnableFSDiscriminator) {
+ // FIXME: For scalable vectors, assume vscale=1.
+ auto NewDIL =
+ DIL->cloneByMultiplyingDuplicationFactor(UF * VF.getKnownMinValue());
+ if (NewDIL)
+ Builder.SetCurrentDebugLocation(*NewDIL);
+ else
+ LLVM_DEBUG(dbgs() << "Failed to create new discriminator: "
+ << DIL->getFilename() << " Line: " << DIL->getLine());
+ } else
+ Builder.SetCurrentDebugLocation(DIL);
+ } else
+ Builder.SetCurrentDebugLocation(DebugLoc());
+}
BasicBlock *
VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
@@ -252,43 +275,36 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
// Pred stands for Predessor. Prev stands for Previous - last visited/created.
BasicBlock *PrevBB = CFG.PrevBB;
BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(),
- PrevBB->getParent(), CFG.LastBB);
+ PrevBB->getParent(), CFG.ExitBB);
LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n');
// Hook up the new basic block to its predecessors.
for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
- VPBasicBlock *PredVPBB = PredVPBlock->getExitBasicBlock();
- auto &PredVPSuccessors = PredVPBB->getSuccessors();
+ VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
+ auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
- // In outer loop vectorization scenario, the predecessor BBlock may not yet
- // be visited(backedge). Mark the VPBasicBlock for fixup at the end of
- // vectorization. We do not encounter this case in inner loop vectorization
- // as we start out by building a loop skeleton with the vector loop header
- // and latch blocks. As a result, we never enter this function for the
- // header block in the non VPlan-native path.
- if (!PredBB) {
- assert(EnableVPlanNativePath &&
- "Unexpected null predecessor in non VPlan-native path");
- CFG.VPBBsToFix.push_back(PredVPBB);
- continue;
- }
-
assert(PredBB && "Predecessor basic-block not found building successor.");
auto *PredBBTerminator = PredBB->getTerminator();
LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n');
+
+ auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator);
if (isa<UnreachableInst>(PredBBTerminator)) {
assert(PredVPSuccessors.size() == 1 &&
"Predecessor ending w/o branch must have single successor.");
+ DebugLoc DL = PredBBTerminator->getDebugLoc();
PredBBTerminator->eraseFromParent();
- BranchInst::Create(NewBB, PredBB);
+ auto *Br = BranchInst::Create(NewBB, PredBB);
+ Br->setDebugLoc(DL);
+ } else if (TermBr && !TermBr->isConditional()) {
+ TermBr->setSuccessor(0, NewBB);
} else {
- assert(PredVPSuccessors.size() == 2 &&
- "Predecessor ending with branch must have two successors.");
+ // Set each forward successor here when it is created, excluding
+ // backedges. A backward successor is set when the branch is created.
unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
- assert(!PredBBTerminator->getSuccessor(idx) &&
+ assert(!TermBr->getSuccessor(idx) &&
"Trying to reset an existing successor block.");
- PredBBTerminator->setSuccessor(idx, NewBB);
+ TermBr->setSuccessor(idx, NewBB);
}
}
return NewBB;
@@ -300,27 +316,51 @@ void VPBasicBlock::execute(VPTransformState *State) {
VPBlockBase *SingleHPred = nullptr;
BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
- // 1. Create an IR basic block, or reuse the last one if possible.
- // The last IR basic block is reused, as an optimization, in three cases:
- // A. the first VPBB reuses the loop header BB - when PrevVPBB is null;
- // B. when the current VPBB has a single (hierarchical) predecessor which
- // is PrevVPBB and the latter has a single (hierarchical) successor; and
- // C. when the current VPBB is an entry of a region replica - where PrevVPBB
- // is the exit of this region from a previous instance, or the predecessor
- // of this region.
- if (PrevVPBB && /* A */
- !((SingleHPred = getSingleHierarchicalPredecessor()) &&
- SingleHPred->getExitBasicBlock() == PrevVPBB &&
- PrevVPBB->getSingleHierarchicalSuccessor()) && /* B */
- !(Replica && getPredecessors().empty())) { /* C */
+ auto IsLoopRegion = [](VPBlockBase *BB) {
+ auto *R = dyn_cast<VPRegionBlock>(BB);
+ return R && !R->isReplicator();
+ };
+
+ // 1. Create an IR basic block, or reuse the last one or ExitBB if possible.
+ if (getPlan()->getVectorLoopRegion()->getSingleSuccessor() == this) {
+ // ExitBB can be re-used for the exit block of the Plan.
+ NewBB = State->CFG.ExitBB;
+ State->CFG.PrevBB = NewBB;
+
+ // Update the branch instruction in the predecessor to branch to ExitBB.
+ VPBlockBase *PredVPB = getSingleHierarchicalPredecessor();
+ VPBasicBlock *ExitingVPBB = PredVPB->getExitingBasicBlock();
+ assert(PredVPB->getSingleSuccessor() == this &&
+ "predecessor must have the current block as only successor");
+ BasicBlock *ExitingBB = State->CFG.VPBB2IRBB[ExitingVPBB];
+ // The Exit block of a loop is always set to be successor 0 of the Exiting
+ // block.
+ cast<BranchInst>(ExitingBB->getTerminator())->setSuccessor(0, NewBB);
+ } else if (PrevVPBB && /* A */
+ !((SingleHPred = getSingleHierarchicalPredecessor()) &&
+ SingleHPred->getExitingBasicBlock() == PrevVPBB &&
+ PrevVPBB->getSingleHierarchicalSuccessor() &&
+ (SingleHPred->getParent() == getEnclosingLoopRegion() &&
+ !IsLoopRegion(SingleHPred))) && /* B */
+ !(Replica && getPredecessors().empty())) { /* C */
+ // The last IR basic block is reused, as an optimization, in three cases:
+ // A. the first VPBB reuses the loop pre-header BB - when PrevVPBB is null;
+ // B. when the current VPBB has a single (hierarchical) predecessor which
+ // is PrevVPBB and the latter has a single (hierarchical) successor which
+ // both are in the same non-replicator region; and
+ // C. when the current VPBB is an entry of a region replica - where PrevVPBB
+ // is the exiting VPBB of this region from a previous instance, or the
+ // predecessor of this region.
+
NewBB = createEmptyBasicBlock(State->CFG);
State->Builder.SetInsertPoint(NewBB);
// Temporarily terminate with unreachable until CFG is rewired.
UnreachableInst *Terminator = State->Builder.CreateUnreachable();
+ // Register NewBB in its loop. In innermost loops its the same for all
+ // BB's.
+ if (State->CurrentVectorLoop)
+ State->CurrentVectorLoop->addBasicBlockToLoop(NewBB, *State->LI);
State->Builder.SetInsertPoint(Terminator);
- // Register NewBB in its loop. In innermost loops its the same for all BB's.
- Loop *L = State->LI->getLoopFor(State->CFG.LastBB);
- L->addBasicBlockToLoop(NewBB, *State->LI);
State->CFG.PrevBB = NewBB;
}
@@ -334,29 +374,6 @@ void VPBasicBlock::execute(VPTransformState *State) {
for (VPRecipeBase &Recipe : Recipes)
Recipe.execute(*State);
- VPValue *CBV;
- if (EnableVPlanNativePath && (CBV = getCondBit())) {
- assert(CBV->getUnderlyingValue() &&
- "Unexpected null underlying value for condition bit");
-
- // Condition bit value in a VPBasicBlock is used as the branch selector. In
- // the VPlan-native path case, since all branches are uniform we generate a
- // branch instruction using the condition value from vector lane 0 and dummy
- // successors. The successors are fixed later when the successor blocks are
- // visited.
- Value *NewCond = State->get(CBV, {0, 0});
-
- // Replace the temporary unreachable terminator with the new conditional
- // branch.
- auto *CurrentTerminator = NewBB->getTerminator();
- assert(isa<UnreachableInst>(CurrentTerminator) &&
- "Expected to replace unreachable terminator with conditional "
- "branch.");
- auto *CondBr = BranchInst::Create(NewBB, nullptr, NewCond);
- CondBr->setSuccessor(0, nullptr);
- ReplaceInstWithInst(CurrentTerminator, CondBr);
- }
-
LLVM_DEBUG(dbgs() << "LV: filled BB:" << *NewBB);
}
@@ -395,6 +412,61 @@ VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) {
return SplitBlock;
}
+VPRegionBlock *VPBasicBlock::getEnclosingLoopRegion() {
+ VPRegionBlock *P = getParent();
+ if (P && P->isReplicator()) {
+ P = P->getParent();
+ assert(!cast<VPRegionBlock>(P)->isReplicator() &&
+ "unexpected nested replicate regions");
+ }
+ return P;
+}
+
+static bool hasConditionalTerminator(const VPBasicBlock *VPBB) {
+ if (VPBB->empty()) {
+ assert(
+ VPBB->getNumSuccessors() < 2 &&
+ "block with multiple successors doesn't have a recipe as terminator");
+ return false;
+ }
+
+ const VPRecipeBase *R = &VPBB->back();
+ auto *VPI = dyn_cast<VPInstruction>(R);
+ bool IsCondBranch =
+ isa<VPBranchOnMaskRecipe>(R) ||
+ (VPI && (VPI->getOpcode() == VPInstruction::BranchOnCond ||
+ VPI->getOpcode() == VPInstruction::BranchOnCount));
+ (void)IsCondBranch;
+
+ if (VPBB->getNumSuccessors() >= 2 || VPBB->isExiting()) {
+ assert(IsCondBranch && "block with multiple successors not terminated by "
+ "conditional branch recipe");
+
+ return true;
+ }
+
+ assert(
+ !IsCondBranch &&
+ "block with 0 or 1 successors terminated by conditional branch recipe");
+ return false;
+}
+
+VPRecipeBase *VPBasicBlock::getTerminator() {
+ if (hasConditionalTerminator(this))
+ return &back();
+ return nullptr;
+}
+
+const VPRecipeBase *VPBasicBlock::getTerminator() const {
+ if (hasConditionalTerminator(this))
+ return &back();
+ return nullptr;
+}
+
+bool VPBasicBlock::isExiting() const {
+ return getParent()->getExitingBasicBlock() == this;
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const {
if (getSuccessors().empty()) {
@@ -411,13 +483,6 @@ void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const {
void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << getName() << ":\n";
- if (const VPValue *Pred = getPredicate()) {
- O << Indent << "BlockPredicate:";
- Pred->printAsOperand(O, SlotTracker);
- if (const auto *PredInst = dyn_cast<VPInstruction>(Pred))
- O << " (" << PredInst->getParent()->getName() << ")";
- O << '\n';
- }
auto RecipeIndent = Indent + " ";
for (const VPRecipeBase &Recipe : *this) {
@@ -426,14 +491,6 @@ void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
}
printSuccessors(O, Indent);
-
- if (const VPValue *CBV = getCondBit()) {
- O << Indent << "CondBit: ";
- CBV->printAsOperand(O, SlotTracker);
- if (const auto *CBI = dyn_cast<VPInstruction>(CBV))
- O << " (" << CBI->getParent()->getName() << ")";
- O << '\n';
- }
}
#endif
@@ -448,25 +505,26 @@ void VPRegionBlock::execute(VPTransformState *State) {
ReversePostOrderTraversal<VPBlockBase *> RPOT(Entry);
if (!isReplicator()) {
+ // Create and register the new vector loop.
+ Loop *PrevLoop = State->CurrentVectorLoop;
+ State->CurrentVectorLoop = State->LI->AllocateLoop();
+ BasicBlock *VectorPH = State->CFG.VPBB2IRBB[getPreheaderVPBB()];
+ Loop *ParentLoop = State->LI->getLoopFor(VectorPH);
+
+ // Insert the new loop into the loop nest and register the new basic blocks
+ // before calling any utilities such as SCEV that require valid LoopInfo.
+ if (ParentLoop)
+ ParentLoop->addChildLoop(State->CurrentVectorLoop);
+ else
+ State->LI->addTopLevelLoop(State->CurrentVectorLoop);
+
// Visit the VPBlocks connected to "this", starting from it.
for (VPBlockBase *Block : RPOT) {
- if (EnableVPlanNativePath) {
- // The inner loop vectorization path does not represent loop preheader
- // and exit blocks as part of the VPlan. In the VPlan-native path, skip
- // vectorizing loop preheader block. In future, we may replace this
- // check with the check for loop preheader.
- if (Block->getNumPredecessors() == 0)
- continue;
-
- // Skip vectorizing loop exit block. In future, we may replace this
- // check with the check for loop exit.
- if (Block->getNumSuccessors() == 0)
- continue;
- }
-
LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
Block->execute(State);
}
+
+ State->CurrentVectorLoop = PrevLoop;
return;
}
@@ -508,341 +566,32 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
}
#endif
-bool VPRecipeBase::mayWriteToMemory() const {
- switch (getVPDefID()) {
- case VPWidenMemoryInstructionSC: {
- return cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
- }
- case VPReplicateSC:
- case VPWidenCallSC:
- return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
- ->mayWriteToMemory();
- case VPBranchOnMaskSC:
- return false;
- case VPWidenIntOrFpInductionSC:
- case VPWidenCanonicalIVSC:
- case VPWidenPHISC:
- case VPBlendSC:
- case VPWidenSC:
- case VPWidenGEPSC:
- case VPReductionSC:
- case VPWidenSelectSC: {
- const Instruction *I =
- dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
- (void)I;
- assert((!I || !I->mayWriteToMemory()) &&
- "underlying instruction may write to memory");
- return false;
- }
- default:
- return true;
- }
-}
-
-bool VPRecipeBase::mayReadFromMemory() const {
- switch (getVPDefID()) {
- case VPWidenMemoryInstructionSC: {
- return !cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
- }
- case VPReplicateSC:
- case VPWidenCallSC:
- return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
- ->mayReadFromMemory();
- case VPBranchOnMaskSC:
- return false;
- case VPWidenIntOrFpInductionSC:
- case VPWidenCanonicalIVSC:
- case VPWidenPHISC:
- case VPBlendSC:
- case VPWidenSC:
- case VPWidenGEPSC:
- case VPReductionSC:
- case VPWidenSelectSC: {
- const Instruction *I =
- dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
- (void)I;
- assert((!I || !I->mayReadFromMemory()) &&
- "underlying instruction may read from memory");
- return false;
- }
- default:
- return true;
- }
-}
-
-bool VPRecipeBase::mayHaveSideEffects() const {
- switch (getVPDefID()) {
- case VPBranchOnMaskSC:
- return false;
- case VPWidenIntOrFpInductionSC:
- case VPWidenCanonicalIVSC:
- case VPWidenPHISC:
- case VPBlendSC:
- case VPWidenSC:
- case VPWidenGEPSC:
- case VPReductionSC:
- case VPWidenSelectSC: {
- const Instruction *I =
- dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
- (void)I;
- assert((!I || !I->mayHaveSideEffects()) &&
- "underlying instruction has side-effects");
- return false;
- }
- case VPReplicateSC: {
- auto *R = cast<VPReplicateRecipe>(this);
- return R->getUnderlyingInstr()->mayHaveSideEffects();
- }
- default:
- return true;
- }
-}
-
-void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
- assert(!Parent && "Recipe already in some VPBasicBlock");
- assert(InsertPos->getParent() &&
- "Insertion position not in any VPBasicBlock");
- Parent = InsertPos->getParent();
- Parent->getRecipeList().insert(InsertPos->getIterator(), this);
-}
-
-void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) {
- assert(!Parent && "Recipe already in some VPBasicBlock");
- assert(InsertPos->getParent() &&
- "Insertion position not in any VPBasicBlock");
- Parent = InsertPos->getParent();
- Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this);
-}
-
-void VPRecipeBase::removeFromParent() {
- assert(getParent() && "Recipe not in any VPBasicBlock");
- getParent()->getRecipeList().remove(getIterator());
- Parent = nullptr;
-}
-
-iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {
- assert(getParent() && "Recipe not in any VPBasicBlock");
- return getParent()->getRecipeList().erase(getIterator());
-}
-
-void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {
- removeFromParent();
- insertAfter(InsertPos);
-}
-
-void VPRecipeBase::moveBefore(VPBasicBlock &BB,
- iplist<VPRecipeBase>::iterator I) {
- assert(I == BB.end() || I->getParent() == &BB);
- removeFromParent();
- Parent = &BB;
- BB.getRecipeList().insert(I, this);
-}
-
-void VPInstruction::generateInstruction(VPTransformState &State,
- unsigned Part) {
- IRBuilder<> &Builder = State.Builder;
- Builder.SetCurrentDebugLocation(DL);
-
- if (Instruction::isBinaryOp(getOpcode())) {
- Value *A = State.get(getOperand(0), Part);
- Value *B = State.get(getOperand(1), Part);
- Value *V = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B);
- State.set(this, V, Part);
- return;
- }
-
- switch (getOpcode()) {
- case VPInstruction::Not: {
- Value *A = State.get(getOperand(0), Part);
- Value *V = Builder.CreateNot(A);
- State.set(this, V, Part);
- break;
- }
- case VPInstruction::ICmpULE: {
- Value *IV = State.get(getOperand(0), Part);
- Value *TC = State.get(getOperand(1), Part);
- Value *V = Builder.CreateICmpULE(IV, TC);
- State.set(this, V, Part);
- break;
- }
- case Instruction::Select: {
- Value *Cond = State.get(getOperand(0), Part);
- Value *Op1 = State.get(getOperand(1), Part);
- Value *Op2 = State.get(getOperand(2), Part);
- Value *V = Builder.CreateSelect(Cond, Op1, Op2);
- State.set(this, V, Part);
- break;
- }
- case VPInstruction::ActiveLaneMask: {
- // Get first lane of vector induction variable.
- Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0));
- // Get the original loop tripcount.
- Value *ScalarTC = State.get(getOperand(1), Part);
-
- auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
- auto *PredTy = VectorType::get(Int1Ty, State.VF);
- Instruction *Call = Builder.CreateIntrinsic(
- Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()},
- {VIVElem0, ScalarTC}, nullptr, "active.lane.mask");
- State.set(this, Call, Part);
- break;
- }
- case VPInstruction::FirstOrderRecurrenceSplice: {
- // Generate code to combine the previous and current values in vector v3.
- //
- // vector.ph:
- // v_init = vector(..., ..., ..., a[-1])
- // br vector.body
- //
- // vector.body
- // i = phi [0, vector.ph], [i+4, vector.body]
- // v1 = phi [v_init, vector.ph], [v2, vector.body]
- // v2 = a[i, i+1, i+2, i+3];
- // v3 = vector(v1(3), v2(0, 1, 2))
-
- // For the first part, use the recurrence phi (v1), otherwise v2.
- auto *V1 = State.get(getOperand(0), 0);
- Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1);
- if (!PartMinus1->getType()->isVectorTy()) {
- State.set(this, PartMinus1, Part);
- } else {
- Value *V2 = State.get(getOperand(1), Part);
- State.set(this, Builder.CreateVectorSplice(PartMinus1, V2, -1), Part);
- }
- break;
- }
-
- case VPInstruction::CanonicalIVIncrement:
- case VPInstruction::CanonicalIVIncrementNUW: {
- Value *Next = nullptr;
- if (Part == 0) {
- bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementNUW;
- auto *Phi = State.get(getOperand(0), 0);
- // The loop step is equal to the vectorization factor (num of SIMD
- // elements) times the unroll factor (num of SIMD instructions).
- Value *Step =
- createStepForVF(Builder, Phi->getType(), State.VF, State.UF);
- Next = Builder.CreateAdd(Phi, Step, "index.next", IsNUW, false);
- } else {
- Next = State.get(this, 0);
- }
-
- State.set(this, Next, Part);
- break;
- }
- case VPInstruction::BranchOnCount: {
- if (Part != 0)
- break;
- // First create the compare.
- Value *IV = State.get(getOperand(0), Part);
- Value *TC = State.get(getOperand(1), Part);
- Value *Cond = Builder.CreateICmpEQ(IV, TC);
-
- // Now create the branch.
- auto *Plan = getParent()->getPlan();
- VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
- VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
- if (Header->empty()) {
- assert(EnableVPlanNativePath &&
- "empty entry block only expected in VPlanNativePath");
- Header = cast<VPBasicBlock>(Header->getSingleSuccessor());
+void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
+ Value *CanonicalIVStartValue,
+ VPTransformState &State,
+ bool IsEpilogueVectorization) {
+
+ VPBasicBlock *ExitingVPBB = getVectorLoopRegion()->getExitingBasicBlock();
+ auto *Term = dyn_cast<VPInstruction>(&ExitingVPBB->back());
+ // Try to simplify BranchOnCount to 'BranchOnCond true' if TC <= VF * UF when
+ // preparing to execute the plan for the main vector loop.
+ if (!IsEpilogueVectorization && Term &&
+ Term->getOpcode() == VPInstruction::BranchOnCount &&
+ isa<ConstantInt>(TripCountV)) {
+ ConstantInt *C = cast<ConstantInt>(TripCountV);
+ uint64_t TCVal = C->getZExtValue();
+ if (TCVal && TCVal <= State.VF.getKnownMinValue() * State.UF) {
+ auto *BOC =
+ new VPInstruction(VPInstruction::BranchOnCond,
+ {getOrAddExternalDef(State.Builder.getTrue())});
+ Term->eraseFromParent();
+ ExitingVPBB->appendRecipe(BOC);
+ // TODO: Further simplifications are possible
+ // 1. Replace inductions with constants.
+ // 2. Replace vector loop region with VPBasicBlock.
}
- // TODO: Once the exit block is modeled in VPlan, use it instead of going
- // through State.CFG.LastBB.
- BasicBlock *Exit =
- cast<BranchInst>(State.CFG.LastBB->getTerminator())->getSuccessor(0);
-
- Builder.CreateCondBr(Cond, Exit, State.CFG.VPBB2IRBB[Header]);
- Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
- break;
- }
- default:
- llvm_unreachable("Unsupported opcode for instruction");
- }
-}
-
-void VPInstruction::execute(VPTransformState &State) {
- assert(!State.Instance && "VPInstruction executing an Instance");
- IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
- State.Builder.setFastMathFlags(FMF);
- for (unsigned Part = 0; Part < State.UF; ++Part)
- generateInstruction(State, Part);
-}
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void VPInstruction::dump() const {
- VPSlotTracker SlotTracker(getParent()->getPlan());
- print(dbgs(), "", SlotTracker);
-}
-
-void VPInstruction::print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
- O << Indent << "EMIT ";
-
- if (hasResult()) {
- printAsOperand(O, SlotTracker);
- O << " = ";
- }
-
- switch (getOpcode()) {
- case VPInstruction::Not:
- O << "not";
- break;
- case VPInstruction::ICmpULE:
- O << "icmp ule";
- break;
- case VPInstruction::SLPLoad:
- O << "combined load";
- break;
- case VPInstruction::SLPStore:
- O << "combined store";
- break;
- case VPInstruction::ActiveLaneMask:
- O << "active lane mask";
- break;
- case VPInstruction::FirstOrderRecurrenceSplice:
- O << "first-order splice";
- break;
- case VPInstruction::CanonicalIVIncrement:
- O << "VF * UF + ";
- break;
- case VPInstruction::CanonicalIVIncrementNUW:
- O << "VF * UF +(nuw) ";
- break;
- case VPInstruction::BranchOnCount:
- O << "branch-on-count ";
- break;
- default:
- O << Instruction::getOpcodeName(getOpcode());
- }
-
- O << FMF;
-
- for (const VPValue *Operand : operands()) {
- O << " ";
- Operand->printAsOperand(O, SlotTracker);
}
- if (DL) {
- O << ", !dbg ";
- DL.print(O);
- }
-}
-#endif
-
-void VPInstruction::setFastMathFlags(FastMathFlags FMFNew) {
- // Make sure the VPInstruction is a floating-point operation.
- assert((Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
- Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
- Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
- Opcode == Instruction::FCmp) &&
- "this op can't take fast-math flags");
- FMF = FMFNew;
-}
-
-void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
- Value *CanonicalIVStartValue,
- VPTransformState &State) {
// Check if the trip count is needed, and if so build it.
if (TripCount && TripCount->getNumUsers()) {
for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
@@ -868,111 +617,78 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
// When vectorizing the epilogue loop, the canonical induction start value
// needs to be changed from zero to the value after the main vector loop.
if (CanonicalIVStartValue) {
- VPValue *VPV = new VPValue(CanonicalIVStartValue);
- addExternalDef(VPV);
+ VPValue *VPV = getOrAddExternalDef(CanonicalIVStartValue);
auto *IV = getCanonicalIV();
assert(all_of(IV->users(),
[](const VPUser *U) {
+ if (isa<VPScalarIVStepsRecipe>(U))
+ return true;
auto *VPI = cast<VPInstruction>(U);
return VPI->getOpcode() ==
VPInstruction::CanonicalIVIncrement ||
VPI->getOpcode() ==
VPInstruction::CanonicalIVIncrementNUW;
}) &&
- "the canonical IV should only be used by its increments when "
+ "the canonical IV should only be used by its increments or "
+ "ScalarIVSteps when "
"resetting the start value");
IV->setOperand(0, VPV);
}
}
-/// Generate the code inside the body of the vectorized loop. Assumes a single
-/// LoopVectorBody basic-block was created for this. Introduce additional
-/// basic-blocks as needed, and fill them all.
+/// Generate the code inside the preheader and body of the vectorized loop.
+/// Assumes a single pre-header basic-block was created for this. Introduce
+/// additional basic-blocks as needed, and fill them all.
void VPlan::execute(VPTransformState *State) {
- // 0. Set the reverse mapping from VPValues to Values for code generation.
+ // Set the reverse mapping from VPValues to Values for code generation.
for (auto &Entry : Value2VPValue)
State->VPValue2Value[Entry.second] = Entry.first;
- BasicBlock *VectorPreHeaderBB = State->CFG.PrevBB;
- State->CFG.VectorPreHeader = VectorPreHeaderBB;
- BasicBlock *VectorHeaderBB = VectorPreHeaderBB->getSingleSuccessor();
- assert(VectorHeaderBB && "Loop preheader does not have a single successor.");
-
- // 1. Make room to generate basic-blocks inside loop body if needed.
- BasicBlock *VectorLatchBB = VectorHeaderBB->splitBasicBlock(
- VectorHeaderBB->getFirstInsertionPt(), "vector.body.latch");
- Loop *L = State->LI->getLoopFor(VectorHeaderBB);
- L->addBasicBlockToLoop(VectorLatchBB, *State->LI);
- // Remove the edge between Header and Latch to allow other connections.
- // Temporarily terminate with unreachable until CFG is rewired.
- // Note: this asserts the generated code's assumption that
- // getFirstInsertionPt() can be dereferenced into an Instruction.
- VectorHeaderBB->getTerminator()->eraseFromParent();
- State->Builder.SetInsertPoint(VectorHeaderBB);
- UnreachableInst *Terminator = State->Builder.CreateUnreachable();
- State->Builder.SetInsertPoint(Terminator);
-
- // 2. Generate code in loop body.
+ // Initialize CFG state.
State->CFG.PrevVPBB = nullptr;
- State->CFG.PrevBB = VectorHeaderBB;
- State->CFG.LastBB = VectorLatchBB;
+ State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor();
+ BasicBlock *VectorPreHeader = State->CFG.PrevBB;
+ State->Builder.SetInsertPoint(VectorPreHeader->getTerminator());
+ // Generate code in the loop pre-header and body.
for (VPBlockBase *Block : depth_first(Entry))
Block->execute(State);
- // Setup branch terminator successors for VPBBs in VPBBsToFix based on
- // VPBB's successors.
- for (auto VPBB : State->CFG.VPBBsToFix) {
- assert(EnableVPlanNativePath &&
- "Unexpected VPBBsToFix in non VPlan-native path");
- BasicBlock *BB = State->CFG.VPBB2IRBB[VPBB];
- assert(BB && "Unexpected null basic block for VPBB");
-
- unsigned Idx = 0;
- auto *BBTerminator = BB->getTerminator();
-
- for (VPBlockBase *SuccVPBlock : VPBB->getHierarchicalSuccessors()) {
- VPBasicBlock *SuccVPBB = SuccVPBlock->getEntryBasicBlock();
- BBTerminator->setSuccessor(Idx, State->CFG.VPBB2IRBB[SuccVPBB]);
- ++Idx;
- }
- }
-
- // 3. Merge the temporary latch created with the last basic-block filled.
- BasicBlock *LastBB = State->CFG.PrevBB;
- assert(isa<BranchInst>(LastBB->getTerminator()) &&
- "Expected VPlan CFG to terminate with branch");
-
- // Move both the branch and check from LastBB to VectorLatchBB.
- auto *LastBranch = cast<BranchInst>(LastBB->getTerminator());
- LastBranch->moveBefore(VectorLatchBB->getTerminator());
- VectorLatchBB->getTerminator()->eraseFromParent();
- // Move condition so it is guaranteed to be next to branch. This is only done
- // to avoid excessive test updates.
- // TODO: Remove special handling once the increments for all inductions are
- // modeled explicitly in VPlan.
- cast<Instruction>(LastBranch->getCondition())->moveBefore(LastBranch);
- // Connect LastBB to VectorLatchBB to facilitate their merge.
- BranchInst::Create(VectorLatchBB, LastBB);
-
- // Merge LastBB with Latch.
- bool Merged = MergeBlockIntoPredecessor(VectorLatchBB, nullptr, State->LI);
- (void)Merged;
- assert(Merged && "Could not merge last basic block with latch.");
- VectorLatchBB = LastBB;
+ VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitingBasicBlock();
+ BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
// Fix the latch value of canonical, reduction and first-order recurrences
// phis in the vector loop.
- VPBasicBlock *Header = Entry->getEntryBasicBlock();
- if (Header->empty()) {
- assert(EnableVPlanNativePath);
- Header = cast<VPBasicBlock>(Header->getSingleSuccessor());
- }
+ VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock();
for (VPRecipeBase &R : Header->phis()) {
// Skip phi-like recipes that generate their backedege values themselves.
- // TODO: Model their backedge values explicitly.
- if (isa<VPWidenIntOrFpInductionRecipe>(&R) || isa<VPWidenPHIRecipe>(&R))
+ if (isa<VPWidenPHIRecipe>(&R))
+ continue;
+
+ if (isa<VPWidenPointerInductionRecipe>(&R) ||
+ isa<VPWidenIntOrFpInductionRecipe>(&R)) {
+ PHINode *Phi = nullptr;
+ if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
+ Phi = cast<PHINode>(State->get(R.getVPSingleValue(), 0));
+ } else {
+ auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
+ // TODO: Split off the case that all users of a pointer phi are scalar
+ // from the VPWidenPointerInductionRecipe.
+ if (WidenPhi->onlyScalarsGenerated(State->VF))
+ continue;
+
+ auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi, 0));
+ Phi = cast<PHINode>(GEP->getPointerOperand());
+ }
+
+ Phi->setIncomingBlock(1, VectorLatchBB);
+
+ // Move the last step to the end of the latch block. This ensures
+ // consistent placement of all induction updates.
+ Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
+ Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode());
continue;
+ }
auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
// For canonical IV, first-order recurrences and in-order reduction phis,
@@ -993,9 +709,12 @@ void VPlan::execute(VPTransformState *State) {
}
// We do not attempt to preserve DT for outer loop vectorization currently.
- if (!EnableVPlanNativePath)
- updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB,
- L->getExitBlock());
+ if (!EnableVPlanNativePath) {
+ BasicBlock *VectorHeaderBB = State->CFG.VPBB2IRBB[Header];
+ State->DT->addNewBlock(VectorHeaderBB, VectorPreHeader);
+ updateDominatorTree(State->DT, VectorHeaderBB, VectorLatchBB,
+ State->CFG.ExitBB);
+ }
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -1021,6 +740,17 @@ void VPlan::print(raw_ostream &O) const {
O << '\n';
Block->print(O, "", SlotTracker);
}
+
+ if (!LiveOuts.empty())
+ O << "\n";
+ for (auto &KV : LiveOuts) {
+ O << "Live-out ";
+ KV.second->getPhi()->printAsOperand(O);
+ O << " = ";
+ KV.second->getOperand(0)->printAsOperand(O, SlotTracker);
+ O << "\n";
+ }
+
O << "}\n";
}
@@ -1034,11 +764,14 @@ LLVM_DUMP_METHOD
void VPlan::dump() const { print(dbgs()); }
#endif
-void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB,
+void VPlan::addLiveOut(PHINode *PN, VPValue *V) {
+ assert(LiveOuts.count(PN) == 0 && "an exit value for PN already exists");
+ LiveOuts.insert({PN, new VPLiveOut(PN, V)});
+}
+
+void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopHeaderBB,
BasicBlock *LoopLatchBB,
BasicBlock *LoopExitBB) {
- BasicBlock *LoopHeaderBB = LoopPreHeaderBB->getSingleSuccessor();
- assert(LoopHeaderBB && "Loop preheader does not have a single successor.");
// The vector body may be more than a single basic-block by this point.
// Update the dominator tree information inside the vector body by propagating
// it from header to latch, expecting only triangular control-flow, if any.
@@ -1075,6 +808,7 @@ void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB,
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+
Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") +
Twine(getOrCreateBID(Block));
@@ -1122,8 +856,8 @@ void VPlanPrinter::dumpBlock(const VPBlockBase *Block) {
void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To,
bool Hidden, const Twine &Label) {
// Due to "dot" we print an edge between two regions as an edge between the
- // exit basic block and the entry basic of the respective regions.
- const VPBlockBase *Tail = From->getExitBasicBlock();
+ // exiting basic block and the entry basic of the respective regions.
+ const VPBlockBase *Tail = From->getExitingBasicBlock();
const VPBlockBase *Head = To->getEntryBasicBlock();
OS << Indent << getUID(Tail) << " -> " << getUID(Head);
OS << " [ label=\"" << Label << '\"';
@@ -1213,328 +947,6 @@ void VPlanIngredient::print(raw_ostream &O) const {
V->printAsOperand(O, false);
}
-void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
- O << Indent << "WIDEN-CALL ";
-
- auto *CI = cast<CallInst>(getUnderlyingInstr());
- if (CI->getType()->isVoidTy())
- O << "void ";
- else {
- printAsOperand(O, SlotTracker);
- O << " = ";
- }
-
- O << "call @" << CI->getCalledFunction()->getName() << "(";
- printOperands(O, SlotTracker);
- O << ")";
-}
-
-void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
- O << Indent << "WIDEN-SELECT ";
- printAsOperand(O, SlotTracker);
- O << " = select ";
- getOperand(0)->printAsOperand(O, SlotTracker);
- O << ", ";
- getOperand(1)->printAsOperand(O, SlotTracker);
- O << ", ";
- getOperand(2)->printAsOperand(O, SlotTracker);
- O << (InvariantCond ? " (condition is loop invariant)" : "");
-}
-
-void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
- O << Indent << "WIDEN ";
- printAsOperand(O, SlotTracker);
- O << " = " << getUnderlyingInstr()->getOpcodeName() << " ";
- printOperands(O, SlotTracker);
-}
-
-void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
- O << Indent << "WIDEN-INDUCTION";
- if (getTruncInst()) {
- O << "\\l\"";
- O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\"";
- O << " +\n" << Indent << "\" ";
- getVPValue(0)->printAsOperand(O, SlotTracker);
- } else
- O << " " << VPlanIngredient(IV);
-}
-#endif
-
-bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
- auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
- auto *StepC = dyn_cast<SCEVConstant>(getInductionDescriptor().getStep());
- return StartC && StartC->isZero() && StepC && StepC->isOne();
-}
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
- O << Indent << "WIDEN-GEP ";
- O << (IsPtrLoopInvariant ? "Inv" : "Var");
- size_t IndicesNumber = IsIndexLoopInvariant.size();
- for (size_t I = 0; I < IndicesNumber; ++I)
- O << "[" << (IsIndexLoopInvariant[I] ? "Inv" : "Var") << "]";
-
- O << " ";
- printAsOperand(O, SlotTracker);
- O << " = getelementptr ";
- printOperands(O, SlotTracker);
-}
-
-void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
- O << Indent << "WIDEN-PHI ";
-
- auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
- // Unless all incoming values are modeled in VPlan print the original PHI
- // directly.
- // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
- // values as VPValues.
- if (getNumOperands() != OriginalPhi->getNumOperands()) {
- O << VPlanIngredient(OriginalPhi);
- return;
- }
-
- printAsOperand(O, SlotTracker);
- O << " = phi ";
- printOperands(O, SlotTracker);
-}
-
-void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
- O << Indent << "BLEND ";
- Phi->printAsOperand(O, false);
- O << " =";
- if (getNumIncomingValues() == 1) {
- // Not a User of any mask: not really blending, this is a
- // single-predecessor phi.
- O << " ";
- getIncomingValue(0)->printAsOperand(O, SlotTracker);
- } else {
- for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
- O << " ";
- getIncomingValue(I)->printAsOperand(O, SlotTracker);
- O << "/";
- getMask(I)->printAsOperand(O, SlotTracker);
- }
- }
-}
-
-void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
- O << Indent << "REDUCE ";
- printAsOperand(O, SlotTracker);
- O << " = ";
- getChainOp()->printAsOperand(O, SlotTracker);
- O << " +";
- if (isa<FPMathOperator>(getUnderlyingInstr()))
- O << getUnderlyingInstr()->getFastMathFlags();
- O << " reduce." << Instruction::getOpcodeName(RdxDesc->getOpcode()) << " (";
- getVecOp()->printAsOperand(O, SlotTracker);
- if (getCondOp()) {
- O << ", ";
- getCondOp()->printAsOperand(O, SlotTracker);
- }
- O << ")";
-}
-
-void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
- O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
-
- if (!getUnderlyingInstr()->getType()->isVoidTy()) {
- printAsOperand(O, SlotTracker);
- O << " = ";
- }
- O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()) << " ";
- printOperands(O, SlotTracker);
-
- if (AlsoPack)
- O << " (S->V)";
-}
-
-void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
- O << Indent << "PHI-PREDICATED-INSTRUCTION ";
- printAsOperand(O, SlotTracker);
- O << " = ";
- printOperands(O, SlotTracker);
-}
-
-void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
- O << Indent << "WIDEN ";
-
- if (!isStore()) {
- printAsOperand(O, SlotTracker);
- O << " = ";
- }
- O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " ";
-
- printOperands(O, SlotTracker);
-}
-#endif
-
-void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) {
- Value *Start = getStartValue()->getLiveInIRValue();
- PHINode *EntryPart = PHINode::Create(
- Start->getType(), 2, "index", &*State.CFG.PrevBB->getFirstInsertionPt());
- EntryPart->addIncoming(Start, State.CFG.VectorPreHeader);
- EntryPart->setDebugLoc(DL);
- for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
- State.set(this, EntryPart, Part);
-}
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
- O << Indent << "EMIT ";
- printAsOperand(O, SlotTracker);
- O << " = CANONICAL-INDUCTION";
-}
-#endif
-
-void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
- Value *CanonicalIV = State.get(getOperand(0), 0);
- Type *STy = CanonicalIV->getType();
- IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
- ElementCount VF = State.VF;
- Value *VStart = VF.isScalar()
- ? CanonicalIV
- : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
- for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
- Value *VStep = createStepForVF(Builder, STy, VF, Part);
- if (VF.isVector()) {
- VStep = Builder.CreateVectorSplat(VF, VStep);
- VStep = Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
- }
- Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
- State.set(this, CanonicalVectorIV, Part);
- }
-}
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
- O << Indent << "EMIT ";
- printAsOperand(O, SlotTracker);
- O << " = WIDEN-CANONICAL-INDUCTION ";
- printOperands(O, SlotTracker);
-}
-#endif
-
-void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {
- auto &Builder = State.Builder;
- // Create a vector from the initial value.
- auto *VectorInit = getStartValue()->getLiveInIRValue();
-
- Type *VecTy = State.VF.isScalar()
- ? VectorInit->getType()
- : VectorType::get(VectorInit->getType(), State.VF);
-
- if (State.VF.isVector()) {
- auto *IdxTy = Builder.getInt32Ty();
- auto *One = ConstantInt::get(IdxTy, 1);
- IRBuilder<>::InsertPointGuard Guard(Builder);
- Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator());
- auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
- auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
- VectorInit = Builder.CreateInsertElement(
- PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
- }
-
- // Create a phi node for the new recurrence.
- PHINode *EntryPart = PHINode::Create(
- VecTy, 2, "vector.recur", &*State.CFG.PrevBB->getFirstInsertionPt());
- EntryPart->addIncoming(VectorInit, State.CFG.VectorPreHeader);
- State.set(this, EntryPart, 0);
-}
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
- O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
- printAsOperand(O, SlotTracker);
- O << " = phi ";
- printOperands(O, SlotTracker);
-}
-#endif
-
-void VPReductionPHIRecipe::execute(VPTransformState &State) {
- PHINode *PN = cast<PHINode>(getUnderlyingValue());
- auto &Builder = State.Builder;
-
- // In order to support recurrences we need to be able to vectorize Phi nodes.
- // Phi nodes have cycles, so we need to vectorize them in two stages. This is
- // stage #1: We create a new vector PHI node with no incoming edges. We'll use
- // this value when we vectorize all of the instructions that use the PHI.
- bool ScalarPHI = State.VF.isScalar() || IsInLoop;
- Type *VecTy =
- ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), State.VF);
-
- BasicBlock *HeaderBB = State.CFG.PrevBB;
- assert(State.LI->getLoopFor(HeaderBB)->getHeader() == HeaderBB &&
- "recipe must be in the vector loop header");
- unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF;
- for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
- Value *EntryPart =
- PHINode::Create(VecTy, 2, "vec.phi", &*HeaderBB->getFirstInsertionPt());
- State.set(this, EntryPart, Part);
- }
-
- // Reductions do not have to start at zero. They can start with
- // any loop invariant values.
- VPValue *StartVPV = getStartValue();
- Value *StartV = StartVPV->getLiveInIRValue();
-
- Value *Iden = nullptr;
- RecurKind RK = RdxDesc.getRecurrenceKind();
- if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) ||
- RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK)) {
- // MinMax reduction have the start value as their identify.
- if (ScalarPHI) {
- Iden = StartV;
- } else {
- IRBuilderBase::InsertPointGuard IPBuilder(Builder);
- Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator());
- StartV = Iden =
- Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident");
- }
- } else {
- Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(),
- RdxDesc.getFastMathFlags());
-
- if (!ScalarPHI) {
- Iden = Builder.CreateVectorSplat(State.VF, Iden);
- IRBuilderBase::InsertPointGuard IPBuilder(Builder);
- Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator());
- Constant *Zero = Builder.getInt32(0);
- StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
- }
- }
-
- for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
- Value *EntryPart = State.get(this, Part);
- // Make sure to add the reduction start value only to the
- // first unroll part.
- Value *StartVal = (Part == 0) ? StartV : Iden;
- cast<PHINode>(EntryPart)->addIncoming(StartVal, State.CFG.VectorPreHeader);
- }
-}
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
- O << Indent << "WIDEN-REDUCTION-PHI ";
-
- printAsOperand(O, SlotTracker);
- O << " = phi ";
- printOperands(O, SlotTracker);
-}
#endif
template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT);
@@ -1594,7 +1006,10 @@ void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
continue;
assert(isa<VPInstruction>(&VPI) && "Can only handle VPInstructions");
auto *VPInst = cast<VPInstruction>(&VPI);
- auto *Inst = cast<Instruction>(VPInst->getUnderlyingValue());
+
+ auto *Inst = dyn_cast_or_null<Instruction>(VPInst->getUnderlyingValue());
+ if (!Inst)
+ continue;
auto *IG = IAI.getInterleaveGroup(Inst);
if (!IG)
continue;
@@ -1622,7 +1037,7 @@ void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan &Plan,
InterleavedAccessInfo &IAI) {
Old2NewTy Old2New;
- visitRegion(cast<VPRegionBlock>(Plan.getEntry()), Old2New, IAI);
+ visitRegion(Plan.getVectorLoopRegion(), Old2New, IAI);
}
void VPSlotTracker::assignSlot(const VPValue *V) {
@@ -1632,8 +1047,8 @@ void VPSlotTracker::assignSlot(const VPValue *V) {
void VPSlotTracker::assignSlots(const VPlan &Plan) {
- for (const VPValue *V : Plan.VPExternalDefs)
- assignSlot(V);
+ for (const auto &P : Plan.VPExternalDefs)
+ assignSlot(P.second);
assignSlot(&Plan.VectorTripCount);
if (Plan.BackedgeTakenCount)
@@ -1651,7 +1066,19 @@ void VPSlotTracker::assignSlots(const VPlan &Plan) {
}
bool vputils::onlyFirstLaneUsed(VPValue *Def) {
- return all_of(Def->users(), [Def](VPUser *U) {
- return cast<VPRecipeBase>(U)->onlyFirstLaneUsed(Def);
- });
+ return all_of(Def->users(),
+ [Def](VPUser *U) { return U->onlyFirstLaneUsed(Def); });
+}
+
+VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr,
+ ScalarEvolution &SE) {
+ if (auto *E = dyn_cast<SCEVConstant>(Expr))
+ return Plan.getOrAddExternalDef(E->getValue());
+ if (auto *E = dyn_cast<SCEVUnknown>(Expr))
+ return Plan.getOrAddExternalDef(E->getValue());
+
+ VPBasicBlock *Preheader = Plan.getEntry()->getEntryBasicBlock();
+ VPValue *Step = new VPExpandSCEVRecipe(Expr, SE);
+ Preheader->appendRecipe(cast<VPRecipeBase>(Step->getDef()));
+ return Step;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index bcaabca692cc..09da4a545d0d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -25,27 +25,26 @@
#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
-#include "VPlanLoopInfo.h"
#include "VPlanValue.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Twine.h"
#include "llvm/ADT/ilist.h"
#include "llvm/ADT/ilist_node.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/DebugLoc.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/Support/InstructionCost.h"
+#include "llvm/IR/FMF.h"
+#include "llvm/Transforms/Utils/LoopVersioning.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
-#include <map>
#include <string>
namespace llvm {
@@ -54,6 +53,7 @@ class BasicBlock;
class DominatorTree;
class InductionDescriptor;
class InnerLoopVectorizer;
+class IRBuilderBase;
class LoopInfo;
class raw_ostream;
class RecurrenceDescriptor;
@@ -67,10 +67,11 @@ class VPlanSlp;
/// Returns a calculation for the total number of elements for a given \p VF.
/// For fixed width vectors this value is a constant, whereas for scalable
/// vectors it is an expression determined at runtime.
-Value *getRuntimeVF(IRBuilder<> &B, Type *Ty, ElementCount VF);
+Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF);
/// Return a value for Step multiplied by VF.
-Value *createStepForVF(IRBuilder<> &B, Type *Ty, ElementCount VF, int64_t Step);
+Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
+ int64_t Step);
/// A range of powers-of-2 vectorization factors with fixed start and
/// adjustable end. The range includes start and excludes end, e.g.,:
@@ -151,7 +152,7 @@ public:
/// Returns an expression describing the lane index that can be used at
/// runtime.
- Value *getAsRuntimeExpr(IRBuilder<> &Builder, const ElementCount &VF) const;
+ Value *getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const;
/// Returns the Kind of lane offset.
Kind getKind() const { return LaneKind; }
@@ -199,10 +200,10 @@ struct VPIteration {
/// needed for generating the output IR.
struct VPTransformState {
VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI,
- DominatorTree *DT, IRBuilder<> &Builder,
+ DominatorTree *DT, IRBuilderBase &Builder,
InnerLoopVectorizer *ILV, VPlan *Plan)
- : VF(VF), UF(UF), LI(LI), DT(DT), Builder(Builder), ILV(ILV), Plan(Plan) {
- }
+ : VF(VF), UF(UF), LI(LI), DT(DT), Builder(Builder), ILV(ILV), Plan(Plan),
+ LVer(nullptr) {}
/// The chosen Vectorization and Unroll Factors of the loop being vectorized.
ElementCount VF;
@@ -298,6 +299,27 @@ struct VPTransformState {
Iter->second[Instance.Part][CacheIdx] = V;
}
+ /// Add additional metadata to \p To that was not present on \p Orig.
+ ///
+ /// Currently this is used to add the noalias annotations based on the
+ /// inserted memchecks. Use this for instructions that are *cloned* into the
+ /// vector loop.
+ void addNewMetadata(Instruction *To, const Instruction *Orig);
+
+ /// Add metadata from one instruction to another.
+ ///
+ /// This includes both the original MDs from \p From and additional ones (\see
+ /// addNewMetadata). Use this for *newly created* instructions in the vector
+ /// loop.
+ void addMetadata(Instruction *To, Instruction *From);
+
+ /// Similar to the previous function but it adds the metadata to a
+ /// vector of instructions.
+ void addMetadata(ArrayRef<Value *> To, Instruction *From);
+
+ /// Set the debug location in the builder using the debug location in \p V.
+ void setDebugLocFromInst(const Value *V);
+
/// Hold state information used when constructing the CFG of the output IR,
/// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
struct CFGState {
@@ -308,26 +330,19 @@ struct VPTransformState {
/// header BasicBlock.
BasicBlock *PrevBB = nullptr;
- /// The last IR BasicBlock in the output IR. Set to the new latch
- /// BasicBlock, used for placing the newly created BasicBlocks.
- BasicBlock *LastBB = nullptr;
-
- /// The IR BasicBlock that is the preheader of the vector loop in the output
- /// IR.
- /// FIXME: The vector preheader should also be modeled in VPlan, so any code
- /// that needs to be added to the preheader gets directly generated by
- /// VPlan. There should be no need to manage a pointer to the IR BasicBlock.
- BasicBlock *VectorPreHeader = nullptr;
+ /// The last IR BasicBlock in the output IR. Set to the exit block of the
+ /// vector loop.
+ BasicBlock *ExitBB = nullptr;
/// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
/// of replication, maps the BasicBlock of the last replica created.
SmallDenseMap<VPBasicBlock *, BasicBlock *> VPBB2IRBB;
- /// Vector of VPBasicBlocks whose terminator instruction needs to be fixed
- /// up at the end of vector code generation.
- SmallVector<VPBasicBlock *, 8> VPBBsToFix;
-
CFGState() = default;
+
+ /// Returns the BasicBlock* mapped to the pre-header of the loop region
+ /// containing \p R.
+ BasicBlock *getPreheaderBBFor(VPRecipeBase *R);
} CFG;
/// Hold a pointer to LoopInfo to register new basic blocks in the loop.
@@ -337,7 +352,7 @@ struct VPTransformState {
DominatorTree *DT;
/// Hold a reference to the IRBuilder used to generate output IR code.
- IRBuilder<> &Builder;
+ IRBuilderBase &Builder;
VPValue2ValueTy VPValue2Value;
@@ -353,41 +368,16 @@ struct VPTransformState {
/// Holds recipes that may generate a poison value that is used after
/// vectorization, even when their operands are not poison.
SmallPtrSet<VPRecipeBase *, 16> MayGeneratePoisonRecipes;
-};
-
-/// VPUsers instance used by VPBlockBase to manage CondBit and the block
-/// predicate. Currently VPBlockUsers are used in VPBlockBase for historical
-/// reasons, but in the future the only VPUsers should either be recipes or
-/// live-outs.VPBlockBase uses.
-struct VPBlockUser : public VPUser {
- VPBlockUser() : VPUser({}, VPUserID::Block) {}
- VPValue *getSingleOperandOrNull() {
- if (getNumOperands() == 1)
- return getOperand(0);
+ /// The loop object for the current parent region, or nullptr.
+ Loop *CurrentVectorLoop = nullptr;
- return nullptr;
- }
- const VPValue *getSingleOperandOrNull() const {
- if (getNumOperands() == 1)
- return getOperand(0);
-
- return nullptr;
- }
-
- void resetSingleOpUser(VPValue *NewVal) {
- assert(getNumOperands() <= 1 && "Didn't expect more than one operand!");
- if (!NewVal) {
- if (getNumOperands() == 1)
- removeLastOperand();
- return;
- }
-
- if (getNumOperands() == 1)
- setOperand(0, NewVal);
- else
- addOperand(NewVal);
- }
+ /// LoopVersioning. It's only set up (non-null) if memchecks were
+ /// used.
+ ///
+ /// This is currently only used to add no-alias metadata based on the
+ /// memchecks. The actually versioning is performed manually.
+ std::unique_ptr<LoopVersioning> LVer;
};
/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
@@ -410,16 +400,6 @@ class VPBlockBase {
/// List of successor blocks.
SmallVector<VPBlockBase *, 1> Successors;
- /// Successor selector managed by a VPUser. For blocks with zero or one
- /// successors, there is no operand. Otherwise there is exactly one operand
- /// which is the branch condition.
- VPBlockUser CondBitUser;
-
- /// If the block is predicated, its predicate is stored as an operand of this
- /// VPUser to maintain the def-use relations. Otherwise there is no operand
- /// here.
- VPBlockUser PredicateUser;
-
/// VPlan containing the block. Can only be set on the entry block of the
/// plan.
VPlan *Plan = nullptr;
@@ -493,11 +473,11 @@ public:
const VPBasicBlock *getEntryBasicBlock() const;
VPBasicBlock *getEntryBasicBlock();
- /// \return the VPBasicBlock that is the exit of this VPBlockBase,
+ /// \return the VPBasicBlock that is the exiting this VPBlockBase,
/// recursively, if the latter is a VPRegionBlock. Otherwise, if this
/// VPBlockBase is a VPBasicBlock, it is returned.
- const VPBasicBlock *getExitBasicBlock() const;
- VPBasicBlock *getExitBasicBlock();
+ const VPBasicBlock *getExitingBasicBlock() const;
+ VPBasicBlock *getExitingBasicBlock();
const VPBlocksTy &getSuccessors() const { return Successors; }
VPBlocksTy &getSuccessors() { return Successors; }
@@ -565,20 +545,6 @@ public:
return getEnclosingBlockWithPredecessors()->getSinglePredecessor();
}
- /// \return the condition bit selecting the successor.
- VPValue *getCondBit();
- /// \return the condition bit selecting the successor.
- const VPValue *getCondBit() const;
- /// Set the condition bit selecting the successor.
- void setCondBit(VPValue *CV);
-
- /// \return the block's predicate.
- VPValue *getPredicate();
- /// \return the block's predicate.
- const VPValue *getPredicate() const;
- /// Set the block's predicate.
- void setPredicate(VPValue *Pred);
-
/// Set a given VPBlockBase \p Successor as the single successor of this
/// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
/// This VPBlockBase must have no successors.
@@ -588,14 +554,11 @@ public:
}
/// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
- /// successors of this VPBlockBase. \p Condition is set as the successor
- /// selector. This VPBlockBase is not added as predecessor of \p IfTrue or \p
- /// IfFalse. This VPBlockBase must have no successors.
- void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse,
- VPValue *Condition) {
+ /// successors of this VPBlockBase. This VPBlockBase is not added as
+ /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
+ /// successors.
+ void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
assert(Successors.empty() && "Setting two successors when others exist.");
- assert(Condition && "Setting two successors without condition!");
- setCondBit(Condition);
appendSuccessor(IfTrue);
appendSuccessor(IfFalse);
}
@@ -612,11 +575,8 @@ public:
/// Remove all the predecessor of this block.
void clearPredecessors() { Predecessors.clear(); }
- /// Remove all the successors of this block and set to null its condition bit
- void clearSuccessors() {
- Successors.clear();
- setCondBit(nullptr);
- }
+ /// Remove all the successors of this block.
+ void clearSuccessors() { Successors.clear(); }
/// The method which generates the output IR that correspond to this
/// VPBlockBase, thereby "executing" the VPlan.
@@ -665,6 +625,32 @@ public:
#endif
};
+/// A value that is used outside the VPlan. The operand of the user needs to be
+/// added to the associated LCSSA phi node.
+class VPLiveOut : public VPUser {
+ PHINode *Phi;
+
+public:
+ VPLiveOut(PHINode *Phi, VPValue *Op)
+ : VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi) {}
+
+ /// Fixup the wrapped LCSSA phi node in the unique exit block. This simply
+ /// means we need to add the appropriate incoming value from the middle
+ /// block as exiting edges from the scalar epilogue loop (if present) are
+ /// already in place, and we exit the vector loop exclusively to the middle
+ /// block.
+ void fixPhi(VPlan &Plan, VPTransformState &State);
+
+ /// Returns true if the VPLiveOut uses scalars of operand \p Op.
+ bool usesScalars(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return true;
+ }
+
+ PHINode *getPhi() const { return Phi; }
+};
+
/// VPRecipeBase is a base class modeling a sequence of one or more output IR
/// instructions. VPRecipeBase owns the the VPValues it defines through VPDef
/// and is responsible for deleting its defined values. Single-value
@@ -699,6 +685,9 @@ public:
/// Insert an unlinked recipe into a basic block immediately before
/// the specified recipe.
void insertBefore(VPRecipeBase *InsertPos);
+ /// Insert an unlinked recipe into \p BB immediately before the insertion
+ /// point \p IP;
+ void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);
/// Insert an unlinked Recipe into a basic block immediately after
/// the specified Recipe.
@@ -759,14 +748,6 @@ public:
bool mayReadOrWriteMemory() const {
return mayReadFromMemory() || mayWriteToMemory();
}
-
- /// Returns true if the recipe only uses the first lane of operand \p Op.
- /// Conservatively returns false.
- virtual bool onlyFirstLaneUsed(const VPValue *Op) const {
- assert(is_contained(operands(), Op) &&
- "Op must be an operand of the recipe");
- return false;
- }
};
inline bool VPUser::classof(const VPDef *Def) {
@@ -804,6 +785,7 @@ public:
CanonicalIVIncrement,
CanonicalIVIncrementNUW,
BranchOnCount,
+ BranchOnCond
};
private:
@@ -892,6 +874,7 @@ public:
case Instruction::Unreachable:
case Instruction::Fence:
case Instruction::AtomicRMW:
+ case VPInstruction::BranchOnCond:
case VPInstruction::BranchOnCount:
return false;
default:
@@ -1049,27 +1032,25 @@ public:
};
/// A recipe for handling phi nodes of integer and floating-point inductions,
-/// producing their vector and scalar values.
+/// producing their vector values.
class VPWidenIntOrFpInductionRecipe : public VPRecipeBase, public VPValue {
PHINode *IV;
const InductionDescriptor &IndDesc;
- bool NeedsScalarIV;
bool NeedsVectorIV;
public:
- VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start,
+ VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
const InductionDescriptor &IndDesc,
- bool NeedsScalarIV, bool NeedsVectorIV)
- : VPRecipeBase(VPWidenIntOrFpInductionSC, {Start}), VPValue(IV, this),
- IV(IV), IndDesc(IndDesc), NeedsScalarIV(NeedsScalarIV),
+ bool NeedsVectorIV)
+ : VPRecipeBase(VPWidenIntOrFpInductionSC, {Start, Step}),
+ VPValue(IV, this), IV(IV), IndDesc(IndDesc),
NeedsVectorIV(NeedsVectorIV) {}
- VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start,
+ VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
const InductionDescriptor &IndDesc,
- TruncInst *Trunc, bool NeedsScalarIV,
- bool NeedsVectorIV)
- : VPRecipeBase(VPWidenIntOrFpInductionSC, {Start}), VPValue(Trunc, this),
- IV(IV), IndDesc(IndDesc), NeedsScalarIV(NeedsScalarIV),
+ TruncInst *Trunc, bool NeedsVectorIV)
+ : VPRecipeBase(VPWidenIntOrFpInductionSC, {Start, Step}),
+ VPValue(Trunc, this), IV(IV), IndDesc(IndDesc),
NeedsVectorIV(NeedsVectorIV) {}
~VPWidenIntOrFpInductionRecipe() override = default;
@@ -1093,6 +1074,10 @@ public:
VPValue *getStartValue() { return getOperand(0); }
const VPValue *getStartValue() const { return getOperand(0); }
+ /// Returns the step value of the induction.
+ VPValue *getStepValue() { return getOperand(1); }
+ const VPValue *getStepValue() const { return getOperand(1); }
+
/// Returns the first defined value as TruncInst, if it is one or nullptr
/// otherwise.
TruncInst *getTruncInst() {
@@ -1102,6 +1087,8 @@ public:
return dyn_cast_or_null<TruncInst>(getVPValue(0)->getUnderlyingValue());
}
+ PHINode *getPHINode() { return IV; }
+
/// Returns the induction descriptor for the recipe.
const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
@@ -1115,9 +1102,6 @@ public:
return TruncI ? TruncI->getType() : IV->getType();
}
- /// Returns true if a scalar phi needs to be created for the induction.
- bool needsScalarIV() const { return NeedsScalarIV; }
-
/// Returns true if a vector phi needs to be created for the induction.
bool needsVectorIV() const { return NeedsVectorIV; }
};
@@ -1167,6 +1151,9 @@ public:
VPValue *getStartValue() {
return getNumOperands() == 0 ? nullptr : getOperand(0);
}
+ VPValue *getStartValue() const {
+ return getNumOperands() == 0 ? nullptr : getOperand(0);
+ }
/// Returns the incoming value from the loop backedge.
VPValue *getBackedgeValue() {
@@ -1180,6 +1167,52 @@ public:
}
};
+class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe {
+ const InductionDescriptor &IndDesc;
+
+ /// SCEV used to expand step.
+ /// FIXME: move expansion of step to the pre-header, once it is modeled
+ /// explicitly.
+ ScalarEvolution &SE;
+
+public:
+ /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
+ /// Start.
+ VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start,
+ const InductionDescriptor &IndDesc,
+ ScalarEvolution &SE)
+ : VPHeaderPHIRecipe(VPVWidenPointerInductionSC, VPWidenPointerInductionSC,
+ Phi),
+ IndDesc(IndDesc), SE(SE) {
+ addOperand(Start);
+ }
+
+ ~VPWidenPointerInductionRecipe() override = default;
+
+ /// Method to support type inquiry through isa, cast, and dyn_cast.
+ static inline bool classof(const VPRecipeBase *B) {
+ return B->getVPDefID() == VPRecipeBase::VPWidenPointerInductionSC;
+ }
+ static inline bool classof(const VPHeaderPHIRecipe *R) {
+ return R->getVPDefID() == VPRecipeBase::VPWidenPointerInductionSC;
+ }
+ static inline bool classof(const VPValue *V) {
+ return V->getVPValueID() == VPValue::VPVWidenPointerInductionSC;
+ }
+
+ /// Generate vector values for the pointer induction.
+ void execute(VPTransformState &State) override;
+
+ /// Returns true if only scalar values will be generated.
+ bool onlyScalarsGenerated(ElementCount VF);
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+};
+
/// A recipe for handling header phis that are widened in the vector loop.
/// In the VPlan native path, all incoming VPValues & VPBasicBlock pairs are
/// managed in the recipe directly.
@@ -1363,9 +1396,8 @@ public:
"Op must be an operand of the recipe");
// Recursing through Blend recipes only, must terminate at header phi's the
// latest.
- return all_of(users(), [this](VPUser *U) {
- return cast<VPRecipeBase>(U)->onlyFirstLaneUsed(this);
- });
+ return all_of(users(),
+ [this](VPUser *U) { return U->onlyFirstLaneUsed(this); });
}
};
@@ -1440,6 +1472,15 @@ public:
unsigned getNumStoreOperands() const {
return getNumOperands() - (HasMask ? 2 : 1);
}
+
+ /// The recipe only uses the first lane of the address.
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return Op == getAddr() && all_of(getStoredValues(), [Op](VPValue *StoredV) {
+ return Op != StoredV;
+ });
+ }
};
/// A recipe to represent inloop reduction operations, performing a reduction on
@@ -1551,6 +1592,13 @@ public:
"Op must be an operand of the recipe");
return isUniform();
}
+
+ /// Returns true if the recipe uses scalars of operand \p Op.
+ bool usesScalars(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return true;
+ }
};
/// A recipe for generating conditional branches on the bits of a mask.
@@ -1590,6 +1638,13 @@ public:
// Mask is optional.
return getNumOperands() == 1 ? getOperand(0) : nullptr;
}
+
+ /// Returns true if the recipe uses scalars of operand \p Op.
+ bool usesScalars(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return true;
+ }
};
/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
@@ -1619,6 +1674,13 @@ public:
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
#endif
+
+ /// Returns true if the recipe uses scalars of operand \p Op.
+ bool usesScalars(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return true;
+ }
};
/// A Recipe for widening load/store operations.
@@ -1627,7 +1689,7 @@ public:
/// - For store: Address, stored value, optional mask
/// TODO: We currently execute only per-part unless a specific instance is
/// provided.
-class VPWidenMemoryInstructionRecipe : public VPRecipeBase, public VPValue {
+class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
Instruction &Ingredient;
// Whether the loaded-from / stored-to addresses are consecutive.
@@ -1649,10 +1711,10 @@ class VPWidenMemoryInstructionRecipe : public VPRecipeBase, public VPValue {
public:
VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
bool Consecutive, bool Reverse)
- : VPRecipeBase(VPWidenMemoryInstructionSC, {Addr}),
- VPValue(VPValue::VPVMemoryInstructionSC, &Load, this), Ingredient(Load),
+ : VPRecipeBase(VPWidenMemoryInstructionSC, {Addr}), Ingredient(Load),
Consecutive(Consecutive), Reverse(Reverse) {
assert((Consecutive || !Reverse) && "Reverse implies consecutive");
+ new VPValue(VPValue::VPVMemoryInstructionSC, &Load, this);
setMask(Mask);
}
@@ -1660,7 +1722,6 @@ public:
VPValue *StoredValue, VPValue *Mask,
bool Consecutive, bool Reverse)
: VPRecipeBase(VPWidenMemoryInstructionSC, {Addr, StoredValue}),
- VPValue(VPValue::VPVMemoryInstructionSC, &Store, this),
Ingredient(Store), Consecutive(Consecutive), Reverse(Reverse) {
assert((Consecutive || !Reverse) && "Reverse implies consecutive");
setMask(Mask);
@@ -1714,9 +1775,42 @@ public:
"Op must be an operand of the recipe");
// Widened, consecutive memory operations only demand the first lane of
- // their address.
- return Op == getAddr() && isConsecutive();
+ // their address, unless the same operand is also stored. That latter can
+ // happen with opaque pointers.
+ return Op == getAddr() && isConsecutive() &&
+ (!isStore() || Op != getStoredValue());
+ }
+
+ Instruction &getIngredient() const { return Ingredient; }
+};
+
+/// Recipe to expand a SCEV expression.
+class VPExpandSCEVRecipe : public VPRecipeBase, public VPValue {
+ const SCEV *Expr;
+ ScalarEvolution &SE;
+
+public:
+ VPExpandSCEVRecipe(const SCEV *Expr, ScalarEvolution &SE)
+ : VPRecipeBase(VPExpandSCEVSC, {}), VPValue(nullptr, this), Expr(Expr),
+ SE(SE) {}
+
+ ~VPExpandSCEVRecipe() override = default;
+
+ /// Method to support type inquiry through isa, cast, and dyn_cast.
+ static inline bool classof(const VPDef *D) {
+ return D->getVPDefID() == VPExpandSCEVSC;
}
+
+ /// Generate a canonical vector induction variable of the vector loop, with
+ void execute(VPTransformState &State) override;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+
+ const SCEV *getSCEV() const { return Expr; }
};
/// Canonical scalar induction phi of the vector loop. Starting at the specified
@@ -1738,6 +1832,12 @@ public:
static inline bool classof(const VPDef *D) {
return D->getVPDefID() == VPCanonicalIVPHISC;
}
+ static inline bool classof(const VPHeaderPHIRecipe *D) {
+ return D->getVPDefID() == VPCanonicalIVPHISC;
+ }
+ static inline bool classof(const VPValue *V) {
+ return V->getVPValueID() == VPValue::VPVCanonicalIVPHISC;
+ }
/// Generate the canonical scalar induction phi of the vector loop.
void execute(VPTransformState &State) override;
@@ -1803,6 +1903,64 @@ public:
}
};
+/// A recipe for handling phi nodes of integer and floating-point inductions,
+/// producing their scalar values.
+class VPScalarIVStepsRecipe : public VPRecipeBase, public VPValue {
+ /// Scalar type to use for the generated values.
+ Type *Ty;
+ /// If not nullptr, truncate the generated values to TruncToTy.
+ Type *TruncToTy;
+ const InductionDescriptor &IndDesc;
+
+public:
+ VPScalarIVStepsRecipe(Type *Ty, const InductionDescriptor &IndDesc,
+ VPValue *CanonicalIV, VPValue *Start, VPValue *Step,
+ Type *TruncToTy)
+ : VPRecipeBase(VPScalarIVStepsSC, {CanonicalIV, Start, Step}),
+ VPValue(nullptr, this), Ty(Ty), TruncToTy(TruncToTy), IndDesc(IndDesc) {
+ }
+
+ ~VPScalarIVStepsRecipe() override = default;
+
+ /// Method to support type inquiry through isa, cast, and dyn_cast.
+ static inline bool classof(const VPDef *D) {
+ return D->getVPDefID() == VPRecipeBase::VPScalarIVStepsSC;
+ }
+ /// Extra classof implementations to allow directly casting from VPUser ->
+ /// VPScalarIVStepsRecipe.
+ static inline bool classof(const VPUser *U) {
+ auto *R = dyn_cast<VPRecipeBase>(U);
+ return R && R->getVPDefID() == VPRecipeBase::VPScalarIVStepsSC;
+ }
+ static inline bool classof(const VPRecipeBase *R) {
+ return R->getVPDefID() == VPRecipeBase::VPScalarIVStepsSC;
+ }
+
+ /// Generate the scalarized versions of the phi node as needed by their users.
+ void execute(VPTransformState &State) override;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+
+ /// Returns true if the induction is canonical, i.e. starting at 0 and
+ /// incremented by UF * VF (= the original IV is incremented by 1).
+ bool isCanonical() const;
+
+ VPCanonicalIVPHIRecipe *getCanonicalIV() const;
+ VPValue *getStartValue() const { return getOperand(1); }
+ VPValue *getStepValue() const { return getOperand(2); }
+
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return true;
+ }
+};
+
/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
/// holds a sequence of zero or more VPRecipe's each representing a sequence of
/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
@@ -1895,6 +2053,8 @@ public:
/// SplitAt to the new block. Returns the new block.
VPBasicBlock *splitAt(iterator SplitAt);
+ VPRegionBlock *getEnclosingLoopRegion();
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
/// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
@@ -1906,6 +2066,14 @@ public:
using VPBlockBase::print; // Get the print(raw_stream &O) version.
#endif
+ /// If the block has multiple successors, return the branch recipe terminating
+ /// the block. If there are no or only a single successor, return nullptr;
+ VPRecipeBase *getTerminator();
+ const VPRecipeBase *getTerminator() const;
+
+ /// Returns true if the block is exiting it's parent region.
+ bool isExiting() const;
+
private:
/// Create an IR BasicBlock to hold the output instructions generated by this
/// VPBasicBlock, and return it. Update the CFGState accordingly.
@@ -1913,7 +2081,7 @@ private:
};
/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
-/// which form a Single-Entry-Single-Exit subgraph of the output IR CFG.
+/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
/// A VPRegionBlock may indicate that its contents are to be replicated several
/// times. This is designed to support predicated scalarization, in which a
/// scalar if-then code structure needs to be generated VF * UF times. Having
@@ -1924,25 +2092,26 @@ class VPRegionBlock : public VPBlockBase {
/// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
VPBlockBase *Entry;
- /// Hold the Single Exit of the SESE region modelled by the VPRegionBlock.
- VPBlockBase *Exit;
+ /// Hold the Single Exiting block of the SESE region modelled by the
+ /// VPRegionBlock.
+ VPBlockBase *Exiting;
/// An indicator whether this region is to generate multiple replicated
/// instances of output IR corresponding to its VPBlockBases.
bool IsReplicator;
public:
- VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exit,
+ VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
const std::string &Name = "", bool IsReplicator = false)
- : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exit(Exit),
+ : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
IsReplicator(IsReplicator) {
assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
- assert(Exit->getSuccessors().empty() && "Exit block has successors.");
+ assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
Entry->setParent(this);
- Exit->setParent(this);
+ Exiting->setParent(this);
}
VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
- : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exit(nullptr),
+ : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
IsReplicator(IsReplicator) {}
~VPRegionBlock() override {
@@ -1976,16 +2145,22 @@ public:
// DominatorTreeBase representing the Graph type.
VPBlockBase &front() const { return *Entry; }
- const VPBlockBase *getExit() const { return Exit; }
- VPBlockBase *getExit() { return Exit; }
+ const VPBlockBase *getExiting() const { return Exiting; }
+ VPBlockBase *getExiting() { return Exiting; }
- /// Set \p ExitBlock as the exit VPBlockBase of this VPRegionBlock. \p
- /// ExitBlock must have no successors.
- void setExit(VPBlockBase *ExitBlock) {
- assert(ExitBlock->getSuccessors().empty() &&
+ /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
+ /// ExitingBlock must have no successors.
+ void setExiting(VPBlockBase *ExitingBlock) {
+ assert(ExitingBlock->getSuccessors().empty() &&
"Exit block cannot have successors.");
- Exit = ExitBlock;
- ExitBlock->setParent(this);
+ Exiting = ExitingBlock;
+ ExitingBlock->setParent(this);
+ }
+
+ /// Returns the pre-header VPBasicBlock of the loop region.
+ VPBasicBlock *getPreheaderVPBB() {
+ assert(!isReplicator() && "should only get pre-header of loop regions");
+ return getSinglePredecessor()->getExitingBasicBlock();
}
/// An indicator whether this region is to generate multiple replicated
@@ -2119,11 +2294,11 @@ struct GraphTraits<Inverse<VPRegionBlock *>>
using nodes_iterator = df_iterator<NodeRef>;
static NodeRef getEntryNode(Inverse<GraphRef> N) {
- return N.Graph->getExit();
+ return N.Graph->getExiting();
}
static nodes_iterator nodes_begin(GraphRef N) {
- return nodes_iterator::begin(N->getExit());
+ return nodes_iterator::begin(N->getExiting());
}
static nodes_iterator nodes_end(GraphRef N) {
@@ -2281,12 +2456,9 @@ class VPlan {
/// Holds the name of the VPlan, for printing.
std::string Name;
- /// Holds all the external definitions created for this VPlan.
- // TODO: Introduce a specific representation for external definitions in
- // VPlan. External definitions must be immutable and hold a pointer to its
- // underlying IR that will be used to implement its structural comparison
- // (operators '==' and '<').
- SetVector<VPValue *> VPExternalDefs;
+ /// Holds all the external definitions created for this VPlan. External
+ /// definitions must be immutable and hold a pointer to their underlying IR.
+ DenseMap<Value *, VPValue *> VPExternalDefs;
/// Represents the trip count of the original loop, for folding
/// the tail.
@@ -2307,13 +2479,13 @@ class VPlan {
/// to be free when the plan's destructor is called.
SmallVector<VPValue *, 16> VPValuesToFree;
- /// Holds the VPLoopInfo analysis for this VPlan.
- VPLoopInfo VPLInfo;
-
/// Indicates whether it is safe use the Value2VPValue mapping or if the
/// mapping cannot be used any longer, because it is stale.
bool Value2VPValueEnabled = true;
+ /// Values used outside the plan.
+ MapVector<PHINode *, VPLiveOut *> LiveOuts;
+
public:
VPlan(VPBlockBase *Entry = nullptr) : Entry(Entry) {
if (Entry)
@@ -2321,6 +2493,8 @@ public:
}
~VPlan() {
+ clearLiveOuts();
+
if (Entry) {
VPValue DummyValue;
for (VPBlockBase *Block : depth_first(Entry))
@@ -2334,13 +2508,14 @@ public:
delete TripCount;
if (BackedgeTakenCount)
delete BackedgeTakenCount;
- for (VPValue *Def : VPExternalDefs)
- delete Def;
+ for (auto &P : VPExternalDefs)
+ delete P.second;
}
/// Prepare the plan for execution, setting up the required live-in values.
void prepareToExecute(Value *TripCount, Value *VectorTripCount,
- Value *CanonicalIVStartValue, VPTransformState &State);
+ Value *CanonicalIVStartValue, VPTransformState &State,
+ bool IsEpilogueVectorization);
/// Generate the IR code for this VPlan.
void execute(struct VPTransformState *State);
@@ -2383,9 +2558,13 @@ public:
void setName(const Twine &newName) { Name = newName.str(); }
- /// Add \p VPVal to the pool of external definitions if it's not already
- /// in the pool.
- void addExternalDef(VPValue *VPVal) { VPExternalDefs.insert(VPVal); }
+ /// Get the existing or add a new external definition for \p V.
+ VPValue *getOrAddExternalDef(Value *V) {
+ auto I = VPExternalDefs.insert({V, nullptr});
+ if (I.second)
+ I.first->second = new VPValue(V);
+ return I.first->second;
+ }
void addVPValue(Value *V) {
assert(Value2VPValueEnabled &&
@@ -2432,10 +2611,6 @@ public:
Value2VPValue.erase(V);
}
- /// Return the VPLoopInfo analysis for this VPlan.
- VPLoopInfo &getVPLoopInfo() { return VPLInfo; }
- const VPLoopInfo &getVPLoopInfo() const { return VPLInfo; }
-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print this VPlan to \p O.
void print(raw_ostream &O) const;
@@ -2465,7 +2640,10 @@ public:
/// Returns the VPRegionBlock of the vector loop.
VPRegionBlock *getVectorLoopRegion() {
- return cast<VPRegionBlock>(getEntry());
+ return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());
+ }
+ const VPRegionBlock *getVectorLoopRegion() const {
+ return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());
}
/// Returns the canonical induction recipe of the vector loop.
@@ -2478,6 +2656,23 @@ public:
return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
}
+ void addLiveOut(PHINode *PN, VPValue *V);
+
+ void clearLiveOuts() {
+ for (auto &KV : LiveOuts)
+ delete KV.second;
+ LiveOuts.clear();
+ }
+
+ void removeLiveOut(PHINode *PN) {
+ delete LiveOuts[PN];
+ LiveOuts.erase(PN);
+ }
+
+ const MapVector<PHINode *, VPLiveOut *> &getLiveOuts() const {
+ return LiveOuts;
+ }
+
private:
/// Add to the given dominator tree the header block and every new basic block
/// that was created between it and the latch block, inclusive.
@@ -2567,9 +2762,8 @@ public:
/// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p
/// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p
/// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. \p BlockPtr's
- /// successors are moved from \p BlockPtr to \p NewBlock and \p BlockPtr's
- /// conditional bit is propagated to \p NewBlock. \p NewBlock must have
- /// neither successors nor predecessors.
+ /// successors are moved from \p BlockPtr to \p NewBlock. \p NewBlock must
+ /// have neither successors nor predecessors.
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) {
assert(NewBlock->getSuccessors().empty() &&
NewBlock->getPredecessors().empty() &&
@@ -2580,24 +2774,22 @@ public:
disconnectBlocks(BlockPtr, Succ);
connectBlocks(NewBlock, Succ);
}
- NewBlock->setCondBit(BlockPtr->getCondBit());
- BlockPtr->setCondBit(nullptr);
connectBlocks(BlockPtr, NewBlock);
}
/// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p
/// BlockPtr. Add \p IfTrue and \p IfFalse as succesors of \p BlockPtr and \p
/// BlockPtr as predecessor of \p IfTrue and \p IfFalse. Propagate \p BlockPtr
- /// parent to \p IfTrue and \p IfFalse. \p Condition is set as the successor
- /// selector. \p BlockPtr must have no successors and \p IfTrue and \p IfFalse
- /// must have neither successors nor predecessors.
+ /// parent to \p IfTrue and \p IfFalse. \p BlockPtr must have no successors
+ /// and \p IfTrue and \p IfFalse must have neither successors nor
+ /// predecessors.
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse,
- VPValue *Condition, VPBlockBase *BlockPtr) {
+ VPBlockBase *BlockPtr) {
assert(IfTrue->getSuccessors().empty() &&
"Can't insert IfTrue with successors.");
assert(IfFalse->getSuccessors().empty() &&
"Can't insert IfFalse with successors.");
- BlockPtr->setTwoSuccessors(IfTrue, IfFalse, Condition);
+ BlockPtr->setTwoSuccessors(IfTrue, IfFalse);
IfTrue->setPredecessors({BlockPtr});
IfFalse->setPredecessors({BlockPtr});
IfTrue->setParent(BlockPtr->getParent());
@@ -2639,8 +2831,8 @@ public:
R.moveBefore(*PredVPBB, PredVPBB->end());
VPBlockUtils::disconnectBlocks(PredVPBB, VPBB);
auto *ParentRegion = cast<VPRegionBlock>(Block->getParent());
- if (ParentRegion->getExit() == Block)
- ParentRegion->setExit(PredVPBB);
+ if (ParentRegion->getExiting() == Block)
+ ParentRegion->setExiting(PredVPBB);
SmallVector<VPBlockBase *> Successors(Block->successors());
for (auto *Succ : Successors) {
VPBlockUtils::disconnectBlocks(Block, Succ);
@@ -2650,41 +2842,6 @@ public:
return PredVPBB;
}
- /// Returns true if the edge \p FromBlock -> \p ToBlock is a back-edge.
- static bool isBackEdge(const VPBlockBase *FromBlock,
- const VPBlockBase *ToBlock, const VPLoopInfo *VPLI) {
- assert(FromBlock->getParent() == ToBlock->getParent() &&
- FromBlock->getParent() && "Must be in same region");
- const VPLoop *FromLoop = VPLI->getLoopFor(FromBlock);
- const VPLoop *ToLoop = VPLI->getLoopFor(ToBlock);
- if (!FromLoop || !ToLoop || FromLoop != ToLoop)
- return false;
-
- // A back-edge is a branch from the loop latch to its header.
- return ToLoop->isLoopLatch(FromBlock) && ToBlock == ToLoop->getHeader();
- }
-
- /// Returns true if \p Block is a loop latch
- static bool blockIsLoopLatch(const VPBlockBase *Block,
- const VPLoopInfo *VPLInfo) {
- if (const VPLoop *ParentVPL = VPLInfo->getLoopFor(Block))
- return ParentVPL->isLoopLatch(Block);
-
- return false;
- }
-
- /// Count and return the number of succesors of \p PredBlock excluding any
- /// backedges.
- static unsigned countSuccessorsNoBE(VPBlockBase *PredBlock,
- VPLoopInfo *VPLI) {
- unsigned Count = 0;
- for (VPBlockBase *SuccBlock : PredBlock->getSuccessors()) {
- if (!VPBlockUtils::isBackEdge(PredBlock, SuccBlock, VPLI))
- Count++;
- }
- return Count;
- }
-
/// Return an iterator range over \p Range which only includes \p BlockTy
/// blocks. The accesses are casted to \p BlockTy.
template <typename BlockTy, typename T>
@@ -2845,6 +3002,13 @@ namespace vputils {
/// Returns true if only the first lane of \p Def is used.
bool onlyFirstLaneUsed(VPValue *Def);
+/// Get or create a VPValue that corresponds to the expansion of \p Expr. If \p
+/// Expr is a SCEVConstant or SCEVUnknown, return a VPValue wrapping the live-in
+/// value. Otherwise return a VPExpandSCEVRecipe to expand \p Expr. If \p Plan's
+/// pre-header already contains a recipe expanding \p Expr, return it. If not,
+/// create a new one.
+VPValue *getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr,
+ ScalarEvolution &SE);
} // end namespace vputils
} // end namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
index 379988733312..84b0dac862b6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
@@ -42,9 +42,6 @@ private:
// Vectorization plan that we are working on.
VPlan &Plan;
- // Output Top Region.
- VPRegionBlock *TopRegion = nullptr;
-
// Builder of the VPlan instruction-level representation.
VPBuilder VPIRBuilder;
@@ -59,6 +56,9 @@ private:
// Hold phi node's that need to be fixed once the plain CFG has been built.
SmallVector<PHINode *, 8> PhisToFix;
+ /// Maps loops in the original IR to their corresponding region.
+ DenseMap<Loop *, VPRegionBlock *> Loop2Region;
+
// Utility functions.
void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB);
void fixPhiNodes();
@@ -73,8 +73,9 @@ public:
PlainCFGBuilder(Loop *Lp, LoopInfo *LI, VPlan &P)
: TheLoop(Lp), LI(LI), Plan(P) {}
- // Build the plain CFG and return its Top Region.
- VPRegionBlock *buildPlainCFG();
+ /// Build plain CFG for TheLoop. Return the pre-header VPBasicBlock connected
+ /// to a new VPRegionBlock (TopRegion) enclosing the plain CFG.
+ VPBasicBlock *buildPlainCFG();
};
} // anonymous namespace
@@ -106,19 +107,32 @@ void PlainCFGBuilder::fixPhiNodes() {
}
}
-// Create a new empty VPBasicBlock for an incoming BasicBlock or retrieve an
-// existing one if it was already created.
+// Create a new empty VPBasicBlock for an incoming BasicBlock in the region
+// corresponding to the containing loop or retrieve an existing one if it was
+// already created. If no region exists yet for the loop containing \p BB, a new
+// one is created.
VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
auto BlockIt = BB2VPBB.find(BB);
if (BlockIt != BB2VPBB.end())
// Retrieve existing VPBB.
return BlockIt->second;
+ // Get or create a region for the loop containing BB.
+ Loop *CurrentLoop = LI->getLoopFor(BB);
+ VPRegionBlock *ParentR = nullptr;
+ if (CurrentLoop) {
+ auto Iter = Loop2Region.insert({CurrentLoop, nullptr});
+ if (Iter.second)
+ Iter.first->second = new VPRegionBlock(
+ CurrentLoop->getHeader()->getName().str(), false /*isReplicator*/);
+ ParentR = Iter.first->second;
+ }
+
// Create new VPBB.
LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << BB->getName() << "\n");
VPBasicBlock *VPBB = new VPBasicBlock(BB->getName());
BB2VPBB[BB] = VPBB;
- VPBB->setParent(TopRegion);
+ VPBB->setParent(ParentR);
return VPBB;
}
@@ -182,8 +196,7 @@ VPValue *PlainCFGBuilder::getOrCreateVPOperand(Value *IRVal) {
// A and B: Create VPValue and add it to the pool of external definitions and
// to the Value->VPValue map.
- VPValue *NewVPVal = new VPValue(IRVal);
- Plan.addExternalDef(NewVPVal);
+ VPValue *NewVPVal = Plan.getOrAddExternalDef(IRVal);
IRDef2VPValue[IRVal] = NewVPVal;
return NewVPVal;
}
@@ -203,10 +216,13 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
"Instruction shouldn't have been visited.");
if (auto *Br = dyn_cast<BranchInst>(Inst)) {
- // Branch instruction is not explicitly represented in VPlan but we need
- // to represent its condition bit when it's conditional.
- if (Br->isConditional())
- getOrCreateVPOperand(Br->getCondition());
+ // Conditional branch instruction are represented using BranchOnCond
+ // recipes.
+ if (Br->isConditional()) {
+ VPValue *Cond = getOrCreateVPOperand(Br->getCondition());
+ VPBB->appendRecipe(
+ new VPInstruction(VPInstruction::BranchOnCond, {Cond}));
+ }
// Skip the rest of the Instruction processing for Branch instructions.
continue;
@@ -238,11 +254,8 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
}
// Main interface to build the plain CFG.
-VPRegionBlock *PlainCFGBuilder::buildPlainCFG() {
- // 1. Create the Top Region. It will be the parent of all VPBBs.
- TopRegion = new VPRegionBlock("TopRegion", false /*isReplicator*/);
-
- // 2. Scan the body of the loop in a topological order to visit each basic
+VPBasicBlock *PlainCFGBuilder::buildPlainCFG() {
+ // 1. Scan the body of the loop in a topological order to visit each basic
// block after having visited its predecessor basic blocks. Create a VPBB for
// each BB and link it to its successor and predecessor VPBBs. Note that
// predecessors must be set in the same order as they are in the incomming IR.
@@ -251,21 +264,20 @@ VPRegionBlock *PlainCFGBuilder::buildPlainCFG() {
// Loop PH needs to be explicitly visited since it's not taken into account by
// LoopBlocksDFS.
- BasicBlock *PreheaderBB = TheLoop->getLoopPreheader();
- assert((PreheaderBB->getTerminator()->getNumSuccessors() == 1) &&
+ BasicBlock *ThePreheaderBB = TheLoop->getLoopPreheader();
+ assert((ThePreheaderBB->getTerminator()->getNumSuccessors() == 1) &&
"Unexpected loop preheader");
- VPBasicBlock *PreheaderVPBB = getOrCreateVPBB(PreheaderBB);
- for (auto &I : *PreheaderBB) {
+ VPBasicBlock *ThePreheaderVPBB = getOrCreateVPBB(ThePreheaderBB);
+ ThePreheaderVPBB->setName("vector.ph");
+ for (auto &I : *ThePreheaderBB) {
if (I.getType()->isVoidTy())
continue;
- VPValue *VPV = new VPValue(&I);
- Plan.addExternalDef(VPV);
- IRDef2VPValue[&I] = VPV;
+ IRDef2VPValue[&I] = Plan.getOrAddExternalDef(&I);
}
// Create empty VPBB for Loop H so that we can link PH->H.
VPBlockBase *HeaderVPBB = getOrCreateVPBB(TheLoop->getHeader());
- // Preheader's predecessors will be set during the loop RPO traversal below.
- PreheaderVPBB->setOneSuccessor(HeaderVPBB);
+ HeaderVPBB->setName("vector.body");
+ ThePreheaderVPBB->setOneSuccessor(HeaderVPBB);
LoopBlocksRPO RPO(TheLoop);
RPO.perform(LI);
@@ -295,16 +307,13 @@ VPRegionBlock *PlainCFGBuilder::buildPlainCFG() {
// Get VPBB's condition bit.
assert(isa<BranchInst>(TI) && "Unsupported terminator!");
- auto *Br = cast<BranchInst>(TI);
- Value *BrCond = Br->getCondition();
// Look up the branch condition to get the corresponding VPValue
// representing the condition bit in VPlan (which may be in another VPBB).
- assert(IRDef2VPValue.count(BrCond) &&
+ assert(IRDef2VPValue.count(cast<BranchInst>(TI)->getCondition()) &&
"Missing condition bit in IRDef2VPValue!");
- VPValue *VPCondBit = IRDef2VPValue[BrCond];
- // Link successors using condition bit.
- VPBB->setTwoSuccessors(SuccVPBB0, SuccVPBB1, VPCondBit);
+ // Link successors.
+ VPBB->setTwoSuccessors(SuccVPBB0, SuccVPBB1);
} else
llvm_unreachable("Number of successors not supported.");
@@ -312,30 +321,61 @@ VPRegionBlock *PlainCFGBuilder::buildPlainCFG() {
setVPBBPredsFromBB(VPBB, BB);
}
- // 3. Process outermost loop exit. We created an empty VPBB for the loop
+ // 2. Process outermost loop exit. We created an empty VPBB for the loop
// single exit BB during the RPO traversal of the loop body but Instructions
// weren't visited because it's not part of the the loop.
BasicBlock *LoopExitBB = TheLoop->getUniqueExitBlock();
assert(LoopExitBB && "Loops with multiple exits are not supported.");
VPBasicBlock *LoopExitVPBB = BB2VPBB[LoopExitBB];
- createVPInstructionsForVPBB(LoopExitVPBB, LoopExitBB);
// Loop exit was already set as successor of the loop exiting BB.
// We only set its predecessor VPBB now.
setVPBBPredsFromBB(LoopExitVPBB, LoopExitBB);
+ // 3. Fix up region blocks for loops. For each loop,
+ // * use the header block as entry to the corresponding region,
+ // * use the latch block as exit of the corresponding region,
+ // * set the region as successor of the loop pre-header, and
+ // * set the exit block as successor to the region.
+ SmallVector<Loop *> LoopWorkList;
+ LoopWorkList.push_back(TheLoop);
+ while (!LoopWorkList.empty()) {
+ Loop *L = LoopWorkList.pop_back_val();
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *Exiting = L->getLoopLatch();
+ assert(Exiting == L->getExitingBlock() &&
+ "Latch must be the only exiting block");
+ VPRegionBlock *Region = Loop2Region[L];
+ VPBasicBlock *HeaderVPBB = getOrCreateVPBB(Header);
+ VPBasicBlock *ExitingVPBB = getOrCreateVPBB(Exiting);
+
+ // Disconnect backedge and pre-header from header.
+ VPBasicBlock *PreheaderVPBB = getOrCreateVPBB(L->getLoopPreheader());
+ VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPBB);
+ VPBlockUtils::disconnectBlocks(ExitingVPBB, HeaderVPBB);
+
+ Region->setParent(PreheaderVPBB->getParent());
+ Region->setEntry(HeaderVPBB);
+ VPBlockUtils::connectBlocks(PreheaderVPBB, Region);
+
+ // Disconnect exit block from exiting (=latch) block, set exiting block and
+ // connect region to exit block.
+ VPBasicBlock *ExitVPBB = getOrCreateVPBB(L->getExitBlock());
+ VPBlockUtils::disconnectBlocks(ExitingVPBB, ExitVPBB);
+ Region->setExiting(ExitingVPBB);
+ VPBlockUtils::connectBlocks(Region, ExitVPBB);
+
+ // Queue sub-loops for processing.
+ LoopWorkList.append(L->begin(), L->end());
+ }
// 4. The whole CFG has been built at this point so all the input Values must
// have a VPlan couterpart. Fix VPlan phi nodes by adding their corresponding
// VPlan operands.
fixPhiNodes();
- // 5. Final Top Region setup. Set outermost loop pre-header and single exit as
- // Top Region entry and exit.
- TopRegion->setEntry(PreheaderVPBB);
- TopRegion->setExit(LoopExitVPBB);
- return TopRegion;
+ return ThePreheaderVPBB;
}
-VPRegionBlock *VPlanHCFGBuilder::buildPlainCFG() {
+VPBasicBlock *VPlanHCFGBuilder::buildPlainCFG() {
PlainCFGBuilder PCFGBuilder(TheLoop, LI, Plan);
return PCFGBuilder.buildPlainCFG();
}
@@ -343,20 +383,15 @@ VPRegionBlock *VPlanHCFGBuilder::buildPlainCFG() {
// Public interface to build a H-CFG.
void VPlanHCFGBuilder::buildHierarchicalCFG() {
// Build Top Region enclosing the plain CFG and set it as VPlan entry.
- VPRegionBlock *TopRegion = buildPlainCFG();
- Plan.setEntry(TopRegion);
+ VPBasicBlock *EntryVPBB = buildPlainCFG();
+ Plan.setEntry(EntryVPBB);
LLVM_DEBUG(Plan.setName("HCFGBuilder: Plain CFG\n"); dbgs() << Plan);
+ VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
Verifier.verifyHierarchicalCFG(TopRegion);
// Compute plain CFG dom tree for VPLInfo.
VPDomTree.recalculate(*TopRegion);
LLVM_DEBUG(dbgs() << "Dominator Tree after building the plain CFG.\n";
VPDomTree.print(dbgs()));
-
- // Compute VPLInfo and keep it in Plan.
- VPLoopInfo &VPLInfo = Plan.getVPLoopInfo();
- VPLInfo.analyze(VPDomTree);
- LLVM_DEBUG(dbgs() << "VPLoop Info After buildPlainCFG:\n";
- VPLInfo.print(dbgs()));
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
index 238ee7e6347c..2d52990af268 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
@@ -24,13 +24,15 @@
#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_VPLANHCFGBUILDER_H
#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_VPLANHCFGBUILDER_H
-#include "VPlan.h"
#include "VPlanDominatorTree.h"
#include "VPlanVerifier.h"
namespace llvm {
class Loop;
+class LoopInfo;
+class VPRegionBlock;
+class VPlan;
class VPlanTestBase;
/// Main class to build the VPlan H-CFG for an incoming IR.
@@ -55,9 +57,9 @@ private:
// are introduced.
VPDominatorTree VPDomTree;
- /// Build plain CFG for TheLoop. Return a new VPRegionBlock (TopRegion)
- /// enclosing the plain CFG.
- VPRegionBlock *buildPlainCFG();
+ /// Build plain CFG for TheLoop. Return the pre-header VPBasicBlock connected
+ /// to a new VPRegionBlock (TopRegion) enclosing the plain CFG.
+ VPBasicBlock *buildPlainCFG();
public:
VPlanHCFGBuilder(Loop *Lp, LoopInfo *LI, VPlan &P)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanLoopInfo.h b/llvm/lib/Transforms/Vectorize/VPlanLoopInfo.h
deleted file mode 100644
index 5208f2d58e2b..000000000000
--- a/llvm/lib/Transforms/Vectorize/VPlanLoopInfo.h
+++ /dev/null
@@ -1,44 +0,0 @@
-//===-- VPLoopInfo.h --------------------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This file defines VPLoopInfo analysis and VPLoop class. VPLoopInfo is a
-/// specialization of LoopInfoBase for VPBlockBase. VPLoops is a specialization
-/// of LoopBase that is used to hold loop metadata from VPLoopInfo. Further
-/// information can be found in VectorizationPlanner.rst.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLOOPINFO_H
-#define LLVM_TRANSFORMS_VECTORIZE_VPLOOPINFO_H
-
-#include "llvm/Analysis/LoopInfoImpl.h"
-
-namespace llvm {
-class VPBlockBase;
-
-/// Hold analysis information for every loop detected by VPLoopInfo. It is an
-/// instantiation of LoopBase.
-class VPLoop : public LoopBase<VPBlockBase, VPLoop> {
-private:
- friend class LoopInfoBase<VPBlockBase, VPLoop>;
- explicit VPLoop(VPBlockBase *VPB) : LoopBase<VPBlockBase, VPLoop>(VPB) {}
-};
-
-/// VPLoopInfo provides analysis of natural loop for VPBlockBase-based
-/// Hierarchical CFG. It is a specialization of LoopInfoBase class.
-// TODO: VPLoopInfo is initially computed on top of the VPlan plain CFG, which
-// is the same as the incoming IR CFG. If it's more efficient than running the
-// whole loop detection algorithm, we may want to create a mechanism to
-// translate LoopInfo into VPLoopInfo. However, that would require significant
-// changes in LoopInfoBase class.
-typedef LoopInfoBase<VPBlockBase, VPLoop> VPLoopInfo;
-
-} // namespace llvm
-
-#endif // LLVM_TRANSFORMS_VECTORIZE_VPLOOPINFO_H
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
deleted file mode 100644
index e879a33db6ee..000000000000
--- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
+++ /dev/null
@@ -1,248 +0,0 @@
-//===-- VPlanPredicator.cpp -------------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This file implements the VPlanPredicator class which contains the public
-/// interfaces to predicate and linearize the VPlan region.
-///
-//===----------------------------------------------------------------------===//
-
-#include "VPlanPredicator.h"
-#include "VPlan.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/GraphTraits.h"
-#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-#define DEBUG_TYPE "VPlanPredicator"
-
-using namespace llvm;
-
-// Generate VPInstructions at the beginning of CurrBB that calculate the
-// predicate being propagated from PredBB to CurrBB depending on the edge type
-// between them. For example if:
-// i. PredBB is controlled by predicate %BP, and
-// ii. The edge PredBB->CurrBB is the false edge, controlled by the condition
-// bit value %CBV then this function will generate the following two
-// VPInstructions at the start of CurrBB:
-// %IntermediateVal = not %CBV
-// %FinalVal = and %BP %IntermediateVal
-// It returns %FinalVal.
-VPValue *VPlanPredicator::getOrCreateNotPredicate(VPBasicBlock *PredBB,
- VPBasicBlock *CurrBB) {
- VPValue *CBV = PredBB->getCondBit();
-
- // Set the intermediate value - this is either 'CBV', or 'not CBV'
- // depending on the edge type.
- EdgeType ET = getEdgeTypeBetween(PredBB, CurrBB);
- VPValue *IntermediateVal = nullptr;
- switch (ET) {
- case EdgeType::TRUE_EDGE:
- // CurrBB is the true successor of PredBB - nothing to do here.
- IntermediateVal = CBV;
- break;
-
- case EdgeType::FALSE_EDGE:
- // CurrBB is the False successor of PredBB - compute not of CBV.
- IntermediateVal = Builder.createNot(CBV, {});
- break;
- }
-
- // Now AND intermediate value with PredBB's block predicate if it has one.
- VPValue *BP = PredBB->getPredicate();
- if (BP)
- return Builder.createAnd(BP, IntermediateVal, {});
- else
- return IntermediateVal;
-}
-
-// Generate a tree of ORs for all IncomingPredicates in WorkList.
-// Note: This function destroys the original Worklist.
-//
-// P1 P2 P3 P4 P5
-// \ / \ / /
-// OR1 OR2 /
-// \ | /
-// \ +/-+
-// \ / |
-// OR3 |
-// \ |
-// OR4 <- Returns this
-// |
-//
-// The algorithm uses a worklist of predicates as its main data structure.
-// We pop a pair of values from the front (e.g. P1 and P2), generate an OR
-// (in this example OR1), and push it back. In this example the worklist
-// contains {P3, P4, P5, OR1}.
-// The process iterates until we have only one element in the Worklist (OR4).
-// The last element is the root predicate which is returned.
-VPValue *VPlanPredicator::genPredicateTree(std::list<VPValue *> &Worklist) {
- if (Worklist.empty())
- return nullptr;
-
- // The worklist initially contains all the leaf nodes. Initialize the tree
- // using them.
- while (Worklist.size() >= 2) {
- // Pop a pair of values from the front.
- VPValue *LHS = Worklist.front();
- Worklist.pop_front();
- VPValue *RHS = Worklist.front();
- Worklist.pop_front();
-
- // Create an OR of these values.
- VPValue *Or = Builder.createOr(LHS, RHS, {});
-
- // Push OR to the back of the worklist.
- Worklist.push_back(Or);
- }
-
- assert(Worklist.size() == 1 && "Expected 1 item in worklist");
-
- // The root is the last node in the worklist.
- VPValue *Root = Worklist.front();
-
- // This root needs to replace the existing block predicate. This is done in
- // the caller function.
- return Root;
-}
-
-// Return whether the edge FromBlock -> ToBlock is a TRUE_EDGE or FALSE_EDGE
-VPlanPredicator::EdgeType
-VPlanPredicator::getEdgeTypeBetween(VPBlockBase *FromBlock,
- VPBlockBase *ToBlock) {
- unsigned Count = 0;
- for (VPBlockBase *SuccBlock : FromBlock->getSuccessors()) {
- if (SuccBlock == ToBlock) {
- assert(Count < 2 && "Switch not supported currently");
- return (Count == 0) ? EdgeType::TRUE_EDGE : EdgeType::FALSE_EDGE;
- }
- Count++;
- }
-
- llvm_unreachable("Broken getEdgeTypeBetween");
-}
-
-// Generate all predicates needed for CurrBlock by going through its immediate
-// predecessor blocks.
-void VPlanPredicator::createOrPropagatePredicates(VPBlockBase *CurrBlock,
- VPRegionBlock *Region) {
- // Blocks that dominate region exit inherit the predicate from the region.
- // Return after setting the predicate.
- if (VPDomTree.dominates(CurrBlock, Region->getExit())) {
- VPValue *RegionBP = Region->getPredicate();
- CurrBlock->setPredicate(RegionBP);
- return;
- }
-
- // Collect all incoming predicates in a worklist.
- std::list<VPValue *> IncomingPredicates;
-
- // Set the builder's insertion point to the top of the current BB
- VPBasicBlock *CurrBB = cast<VPBasicBlock>(CurrBlock->getEntryBasicBlock());
- Builder.setInsertPoint(CurrBB, CurrBB->begin());
-
- // For each predecessor, generate the VPInstructions required for
- // computing 'BP AND (not) CBV" at the top of CurrBB.
- // Collect the outcome of this calculation for all predecessors
- // into IncomingPredicates.
- for (VPBlockBase *PredBlock : CurrBlock->getPredecessors()) {
- // Skip back-edges
- if (VPBlockUtils::isBackEdge(PredBlock, CurrBlock, VPLI))
- continue;
-
- VPValue *IncomingPredicate = nullptr;
- unsigned NumPredSuccsNoBE =
- VPBlockUtils::countSuccessorsNoBE(PredBlock, VPLI);
-
- // If there is an unconditional branch to the currBB, then we don't create
- // edge predicates. We use the predecessor's block predicate instead.
- if (NumPredSuccsNoBE == 1)
- IncomingPredicate = PredBlock->getPredicate();
- else if (NumPredSuccsNoBE == 2) {
- // Emit recipes into CurrBlock if required
- assert(isa<VPBasicBlock>(PredBlock) && "Only BBs have multiple exits");
- IncomingPredicate =
- getOrCreateNotPredicate(cast<VPBasicBlock>(PredBlock), CurrBB);
- } else
- llvm_unreachable("FIXME: switch statement ?");
-
- if (IncomingPredicate)
- IncomingPredicates.push_back(IncomingPredicate);
- }
-
- // Logically OR all incoming predicates by building the Predicate Tree.
- VPValue *Predicate = genPredicateTree(IncomingPredicates);
-
- // Now update the block's predicate with the new one.
- CurrBlock->setPredicate(Predicate);
-}
-
-// Generate all predicates needed for Region.
-void VPlanPredicator::predicateRegionRec(VPRegionBlock *Region) {
- VPBasicBlock *EntryBlock = cast<VPBasicBlock>(Region->getEntry());
- ReversePostOrderTraversal<VPBlockBase *> RPOT(EntryBlock);
-
- // Generate edge predicates and append them to the block predicate. RPO is
- // necessary since the predecessor blocks' block predicate needs to be set
- // before the current block's block predicate can be computed.
- for (VPBlockBase *Block : RPOT) {
- // TODO: Handle nested regions once we start generating the same.
- assert(!isa<VPRegionBlock>(Block) && "Nested region not expected");
- createOrPropagatePredicates(Block, Region);
- }
-}
-
-// Linearize the CFG within Region.
-// TODO: Predication and linearization need RPOT for every region.
-// This traversal is expensive. Since predication is not adding new
-// blocks, we should be able to compute RPOT once in predication and
-// reuse it here. This becomes even more important once we have nested
-// regions.
-void VPlanPredicator::linearizeRegionRec(VPRegionBlock *Region) {
- ReversePostOrderTraversal<VPBlockBase *> RPOT(Region->getEntry());
- VPBlockBase *PrevBlock = nullptr;
-
- for (VPBlockBase *CurrBlock : RPOT) {
- // TODO: Handle nested regions once we start generating the same.
- assert(!isa<VPRegionBlock>(CurrBlock) && "Nested region not expected");
-
- // Linearize control flow by adding an unconditional edge between PrevBlock
- // and CurrBlock skipping loop headers and latches to keep intact loop
- // header predecessors and loop latch successors.
- if (PrevBlock && !VPLI->isLoopHeader(CurrBlock) &&
- !VPBlockUtils::blockIsLoopLatch(PrevBlock, VPLI)) {
-
- LLVM_DEBUG(dbgs() << "Linearizing: " << PrevBlock->getName() << "->"
- << CurrBlock->getName() << "\n");
-
- PrevBlock->clearSuccessors();
- CurrBlock->clearPredecessors();
- VPBlockUtils::connectBlocks(PrevBlock, CurrBlock);
- }
-
- PrevBlock = CurrBlock;
- }
-}
-
-// Entry point. The driver function for the predicator.
-void VPlanPredicator::predicate() {
- // Predicate the blocks within Region.
- predicateRegionRec(cast<VPRegionBlock>(Plan.getEntry()));
-
- // Linearlize the blocks with Region.
- linearizeRegionRec(cast<VPRegionBlock>(Plan.getEntry()));
-}
-
-VPlanPredicator::VPlanPredicator(VPlan &Plan)
- : Plan(Plan), VPLI(&(Plan.getVPLoopInfo())) {
- // FIXME: Predicator is currently computing the dominator information for the
- // top region. Once we start storing dominator information in a VPRegionBlock,
- // we can avoid this recalculation.
- VPDomTree.recalculate(*(cast<VPRegionBlock>(Plan.getEntry())));
-}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.h b/llvm/lib/Transforms/Vectorize/VPlanPredicator.h
deleted file mode 100644
index a5db9a54da3c..000000000000
--- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.h
+++ /dev/null
@@ -1,74 +0,0 @@
-//===-- VPlanPredicator.h ---------------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This file defines the VPlanPredicator class which contains the public
-/// interfaces to predicate and linearize the VPlan region.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_PREDICATOR_H
-#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_PREDICATOR_H
-
-#include "LoopVectorizationPlanner.h"
-#include "VPlan.h"
-#include "VPlanDominatorTree.h"
-
-namespace llvm {
-
-class VPlanPredicator {
-private:
- enum class EdgeType {
- TRUE_EDGE,
- FALSE_EDGE,
- };
-
- // VPlan being predicated.
- VPlan &Plan;
-
- // VPLoopInfo for Plan's HCFG.
- VPLoopInfo *VPLI;
-
- // Dominator tree for Plan's HCFG.
- VPDominatorTree VPDomTree;
-
- // VPlan builder used to generate VPInstructions for block predicates.
- VPBuilder Builder;
-
- /// Get the type of edge from \p FromBlock to \p ToBlock. Returns TRUE_EDGE if
- /// \p ToBlock is either the unconditional successor or the conditional true
- /// successor of \p FromBlock and FALSE_EDGE otherwise.
- EdgeType getEdgeTypeBetween(VPBlockBase *FromBlock, VPBlockBase *ToBlock);
-
- /// Create and return VPValue corresponding to the predicate for the edge from
- /// \p PredBB to \p CurrentBlock.
- VPValue *getOrCreateNotPredicate(VPBasicBlock *PredBB, VPBasicBlock *CurrBB);
-
- /// Generate and return the result of ORing all the predicate VPValues in \p
- /// Worklist.
- VPValue *genPredicateTree(std::list<VPValue *> &Worklist);
-
- /// Create or propagate predicate for \p CurrBlock in region \p Region using
- /// predicate(s) of its predecessor(s)
- void createOrPropagatePredicates(VPBlockBase *CurrBlock,
- VPRegionBlock *Region);
-
- /// Predicate the CFG within \p Region.
- void predicateRegionRec(VPRegionBlock *Region);
-
- /// Linearize the CFG within \p Region.
- void linearizeRegionRec(VPRegionBlock *Region);
-
-public:
- VPlanPredicator(VPlan &Plan);
-
- /// Predicate Plan's HCFG.
- void predicate();
-};
-} // end namespace llvm
-#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_PREDICATOR_H
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
new file mode 100644
index 000000000000..92422b17457c
--- /dev/null
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -0,0 +1,840 @@
+//===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains implementations for different VPlan recipes.
+///
+//===----------------------------------------------------------------------===//
+
+#include "VPlan.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/IVDescriptors.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
+#include <cassert>
+
+using namespace llvm;
+
+extern cl::opt<bool> EnableVPlanNativePath;
+
+bool VPRecipeBase::mayWriteToMemory() const {
+ switch (getVPDefID()) {
+ case VPWidenMemoryInstructionSC: {
+ return cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
+ }
+ case VPReplicateSC:
+ case VPWidenCallSC:
+ return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
+ ->mayWriteToMemory();
+ case VPBranchOnMaskSC:
+ return false;
+ case VPWidenIntOrFpInductionSC:
+ case VPWidenCanonicalIVSC:
+ case VPWidenPHISC:
+ case VPBlendSC:
+ case VPWidenSC:
+ case VPWidenGEPSC:
+ case VPReductionSC:
+ case VPWidenSelectSC: {
+ const Instruction *I =
+ dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
+ (void)I;
+ assert((!I || !I->mayWriteToMemory()) &&
+ "underlying instruction may write to memory");
+ return false;
+ }
+ default:
+ return true;
+ }
+}
+
+bool VPRecipeBase::mayReadFromMemory() const {
+ switch (getVPDefID()) {
+ case VPWidenMemoryInstructionSC: {
+ return !cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
+ }
+ case VPReplicateSC:
+ case VPWidenCallSC:
+ return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
+ ->mayReadFromMemory();
+ case VPBranchOnMaskSC:
+ return false;
+ case VPWidenIntOrFpInductionSC:
+ case VPWidenCanonicalIVSC:
+ case VPWidenPHISC:
+ case VPBlendSC:
+ case VPWidenSC:
+ case VPWidenGEPSC:
+ case VPReductionSC:
+ case VPWidenSelectSC: {
+ const Instruction *I =
+ dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
+ (void)I;
+ assert((!I || !I->mayReadFromMemory()) &&
+ "underlying instruction may read from memory");
+ return false;
+ }
+ default:
+ return true;
+ }
+}
+
+bool VPRecipeBase::mayHaveSideEffects() const {
+ switch (getVPDefID()) {
+ case VPWidenIntOrFpInductionSC:
+ case VPWidenPointerInductionSC:
+ case VPWidenCanonicalIVSC:
+ case VPWidenPHISC:
+ case VPBlendSC:
+ case VPWidenSC:
+ case VPWidenGEPSC:
+ case VPReductionSC:
+ case VPWidenSelectSC:
+ case VPScalarIVStepsSC: {
+ const Instruction *I =
+ dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
+ (void)I;
+ assert((!I || !I->mayHaveSideEffects()) &&
+ "underlying instruction has side-effects");
+ return false;
+ }
+ case VPReplicateSC: {
+ auto *R = cast<VPReplicateRecipe>(this);
+ return R->getUnderlyingInstr()->mayHaveSideEffects();
+ }
+ default:
+ return true;
+ }
+}
+
+void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
+ auto Lane = VPLane::getLastLaneForVF(State.VF);
+ VPValue *ExitValue = getOperand(0);
+ if (Plan.isUniformAfterVectorization(ExitValue))
+ Lane = VPLane::getFirstLane();
+ Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)),
+ State.Builder.GetInsertBlock());
+}
+
+void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
+ assert(!Parent && "Recipe already in some VPBasicBlock");
+ assert(InsertPos->getParent() &&
+ "Insertion position not in any VPBasicBlock");
+ Parent = InsertPos->getParent();
+ Parent->getRecipeList().insert(InsertPos->getIterator(), this);
+}
+
+void VPRecipeBase::insertBefore(VPBasicBlock &BB,
+ iplist<VPRecipeBase>::iterator I) {
+ assert(!Parent && "Recipe already in some VPBasicBlock");
+ assert(I == BB.end() || I->getParent() == &BB);
+ Parent = &BB;
+ BB.getRecipeList().insert(I, this);
+}
+
+void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) {
+ assert(!Parent && "Recipe already in some VPBasicBlock");
+ assert(InsertPos->getParent() &&
+ "Insertion position not in any VPBasicBlock");
+ Parent = InsertPos->getParent();
+ Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this);
+}
+
+void VPRecipeBase::removeFromParent() {
+ assert(getParent() && "Recipe not in any VPBasicBlock");
+ getParent()->getRecipeList().remove(getIterator());
+ Parent = nullptr;
+}
+
+iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {
+ assert(getParent() && "Recipe not in any VPBasicBlock");
+ return getParent()->getRecipeList().erase(getIterator());
+}
+
+void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {
+ removeFromParent();
+ insertAfter(InsertPos);
+}
+
+void VPRecipeBase::moveBefore(VPBasicBlock &BB,
+ iplist<VPRecipeBase>::iterator I) {
+ removeFromParent();
+ insertBefore(BB, I);
+}
+
+void VPInstruction::generateInstruction(VPTransformState &State,
+ unsigned Part) {
+ IRBuilderBase &Builder = State.Builder;
+ Builder.SetCurrentDebugLocation(DL);
+
+ if (Instruction::isBinaryOp(getOpcode())) {
+ Value *A = State.get(getOperand(0), Part);
+ Value *B = State.get(getOperand(1), Part);
+ Value *V = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B);
+ State.set(this, V, Part);
+ return;
+ }
+
+ switch (getOpcode()) {
+ case VPInstruction::Not: {
+ Value *A = State.get(getOperand(0), Part);
+ Value *V = Builder.CreateNot(A);
+ State.set(this, V, Part);
+ break;
+ }
+ case VPInstruction::ICmpULE: {
+ Value *IV = State.get(getOperand(0), Part);
+ Value *TC = State.get(getOperand(1), Part);
+ Value *V = Builder.CreateICmpULE(IV, TC);
+ State.set(this, V, Part);
+ break;
+ }
+ case Instruction::Select: {
+ Value *Cond = State.get(getOperand(0), Part);
+ Value *Op1 = State.get(getOperand(1), Part);
+ Value *Op2 = State.get(getOperand(2), Part);
+ Value *V = Builder.CreateSelect(Cond, Op1, Op2);
+ State.set(this, V, Part);
+ break;
+ }
+ case VPInstruction::ActiveLaneMask: {
+ // Get first lane of vector induction variable.
+ Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0));
+ // Get the original loop tripcount.
+ Value *ScalarTC = State.get(getOperand(1), Part);
+
+ auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
+ auto *PredTy = VectorType::get(Int1Ty, State.VF);
+ Instruction *Call = Builder.CreateIntrinsic(
+ Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()},
+ {VIVElem0, ScalarTC}, nullptr, "active.lane.mask");
+ State.set(this, Call, Part);
+ break;
+ }
+ case VPInstruction::FirstOrderRecurrenceSplice: {
+ // Generate code to combine the previous and current values in vector v3.
+ //
+ // vector.ph:
+ // v_init = vector(..., ..., ..., a[-1])
+ // br vector.body
+ //
+ // vector.body
+ // i = phi [0, vector.ph], [i+4, vector.body]
+ // v1 = phi [v_init, vector.ph], [v2, vector.body]
+ // v2 = a[i, i+1, i+2, i+3];
+ // v3 = vector(v1(3), v2(0, 1, 2))
+
+ // For the first part, use the recurrence phi (v1), otherwise v2.
+ auto *V1 = State.get(getOperand(0), 0);
+ Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1);
+ if (!PartMinus1->getType()->isVectorTy()) {
+ State.set(this, PartMinus1, Part);
+ } else {
+ Value *V2 = State.get(getOperand(1), Part);
+ State.set(this, Builder.CreateVectorSplice(PartMinus1, V2, -1), Part);
+ }
+ break;
+ }
+ case VPInstruction::CanonicalIVIncrement:
+ case VPInstruction::CanonicalIVIncrementNUW: {
+ Value *Next = nullptr;
+ if (Part == 0) {
+ bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementNUW;
+ auto *Phi = State.get(getOperand(0), 0);
+ // The loop step is equal to the vectorization factor (num of SIMD
+ // elements) times the unroll factor (num of SIMD instructions).
+ Value *Step =
+ createStepForVF(Builder, Phi->getType(), State.VF, State.UF);
+ Next = Builder.CreateAdd(Phi, Step, "index.next", IsNUW, false);
+ } else {
+ Next = State.get(this, 0);
+ }
+
+ State.set(this, Next, Part);
+ break;
+ }
+ case VPInstruction::BranchOnCond: {
+ if (Part != 0)
+ break;
+
+ Value *Cond = State.get(getOperand(0), VPIteration(Part, 0));
+ VPRegionBlock *ParentRegion = getParent()->getParent();
+ VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
+
+ // Replace the temporary unreachable terminator with a new conditional
+ // branch, hooking it up to backward destination for exiting blocks now and
+ // to forward destination(s) later when they are created.
+ BranchInst *CondBr =
+ Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);
+
+ if (getParent()->isExiting())
+ CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
+
+ CondBr->setSuccessor(0, nullptr);
+ Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
+ break;
+ }
+ case VPInstruction::BranchOnCount: {
+ if (Part != 0)
+ break;
+ // First create the compare.
+ Value *IV = State.get(getOperand(0), Part);
+ Value *TC = State.get(getOperand(1), Part);
+ Value *Cond = Builder.CreateICmpEQ(IV, TC);
+
+ // Now create the branch.
+ auto *Plan = getParent()->getPlan();
+ VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
+ VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
+
+ // Replace the temporary unreachable terminator with a new conditional
+ // branch, hooking it up to backward destination (the header) now and to the
+ // forward destination (the exit/middle block) later when it is created.
+ // Note that CreateCondBr expects a valid BB as first argument, so we need
+ // to set it to nullptr later.
+ BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),
+ State.CFG.VPBB2IRBB[Header]);
+ CondBr->setSuccessor(0, nullptr);
+ Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
+ break;
+ }
+ default:
+ llvm_unreachable("Unsupported opcode for instruction");
+ }
+}
+
+void VPInstruction::execute(VPTransformState &State) {
+ assert(!State.Instance && "VPInstruction executing an Instance");
+ IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
+ State.Builder.setFastMathFlags(FMF);
+ for (unsigned Part = 0; Part < State.UF; ++Part)
+ generateInstruction(State, Part);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPInstruction::dump() const {
+ VPSlotTracker SlotTracker(getParent()->getPlan());
+ print(dbgs(), "", SlotTracker);
+}
+
+void VPInstruction::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent << "EMIT ";
+
+ if (hasResult()) {
+ printAsOperand(O, SlotTracker);
+ O << " = ";
+ }
+
+ switch (getOpcode()) {
+ case VPInstruction::Not:
+ O << "not";
+ break;
+ case VPInstruction::ICmpULE:
+ O << "icmp ule";
+ break;
+ case VPInstruction::SLPLoad:
+ O << "combined load";
+ break;
+ case VPInstruction::SLPStore:
+ O << "combined store";
+ break;
+ case VPInstruction::ActiveLaneMask:
+ O << "active lane mask";
+ break;
+ case VPInstruction::FirstOrderRecurrenceSplice:
+ O << "first-order splice";
+ break;
+ case VPInstruction::CanonicalIVIncrement:
+ O << "VF * UF + ";
+ break;
+ case VPInstruction::CanonicalIVIncrementNUW:
+ O << "VF * UF +(nuw) ";
+ break;
+ case VPInstruction::BranchOnCond:
+ O << "branch-on-cond";
+ break;
+ case VPInstruction::BranchOnCount:
+ O << "branch-on-count ";
+ break;
+ default:
+ O << Instruction::getOpcodeName(getOpcode());
+ }
+
+ O << FMF;
+
+ for (const VPValue *Operand : operands()) {
+ O << " ";
+ Operand->printAsOperand(O, SlotTracker);
+ }
+
+ if (DL) {
+ O << ", !dbg ";
+ DL.print(O);
+ }
+}
+#endif
+
+void VPInstruction::setFastMathFlags(FastMathFlags FMFNew) {
+ // Make sure the VPInstruction is a floating-point operation.
+ assert((Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
+ Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
+ Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
+ Opcode == Instruction::FCmp) &&
+ "this op can't take fast-math flags");
+ FMF = FMFNew;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent << "WIDEN-CALL ";
+
+ auto *CI = cast<CallInst>(getUnderlyingInstr());
+ if (CI->getType()->isVoidTy())
+ O << "void ";
+ else {
+ printAsOperand(O, SlotTracker);
+ O << " = ";
+ }
+
+ O << "call @" << CI->getCalledFunction()->getName() << "(";
+ printOperands(O, SlotTracker);
+ O << ")";
+}
+
+void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent << "WIDEN-SELECT ";
+ printAsOperand(O, SlotTracker);
+ O << " = select ";
+ getOperand(0)->printAsOperand(O, SlotTracker);
+ O << ", ";
+ getOperand(1)->printAsOperand(O, SlotTracker);
+ O << ", ";
+ getOperand(2)->printAsOperand(O, SlotTracker);
+ O << (InvariantCond ? " (condition is loop invariant)" : "");
+}
+
+void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent << "WIDEN ";
+ printAsOperand(O, SlotTracker);
+ O << " = " << getUnderlyingInstr()->getOpcodeName() << " ";
+ printOperands(O, SlotTracker);
+}
+
+void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent << "WIDEN-INDUCTION";
+ if (getTruncInst()) {
+ O << "\\l\"";
+ O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\"";
+ O << " +\n" << Indent << "\" ";
+ getVPValue(0)->printAsOperand(O, SlotTracker);
+ } else
+ O << " " << VPlanIngredient(IV);
+
+ O << ", ";
+ getStepValue()->printAsOperand(O, SlotTracker);
+}
+#endif
+
+bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
+ auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
+ auto *StepC = dyn_cast<SCEVConstant>(getInductionDescriptor().getStep());
+ return StartC && StartC->isZero() && StepC && StepC->isOne();
+}
+
+VPCanonicalIVPHIRecipe *VPScalarIVStepsRecipe::getCanonicalIV() const {
+ return cast<VPCanonicalIVPHIRecipe>(getOperand(0));
+}
+
+bool VPScalarIVStepsRecipe::isCanonical() const {
+ auto *CanIV = getCanonicalIV();
+ // The start value of the steps-recipe must match the start value of the
+ // canonical induction and it must step by 1.
+ if (CanIV->getStartValue() != getStartValue())
+ return false;
+ auto *StepVPV = getStepValue();
+ if (StepVPV->getDef())
+ return false;
+ auto *StepC = dyn_cast_or_null<ConstantInt>(StepVPV->getLiveInIRValue());
+ return StepC && StepC->isOne();
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent;
+ printAsOperand(O, SlotTracker);
+ O << Indent << "= SCALAR-STEPS ";
+ printOperands(O, SlotTracker);
+}
+
+void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent << "WIDEN-GEP ";
+ O << (IsPtrLoopInvariant ? "Inv" : "Var");
+ size_t IndicesNumber = IsIndexLoopInvariant.size();
+ for (size_t I = 0; I < IndicesNumber; ++I)
+ O << "[" << (IsIndexLoopInvariant[I] ? "Inv" : "Var") << "]";
+
+ O << " ";
+ printAsOperand(O, SlotTracker);
+ O << " = getelementptr ";
+ printOperands(O, SlotTracker);
+}
+
+void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent << "BLEND ";
+ Phi->printAsOperand(O, false);
+ O << " =";
+ if (getNumIncomingValues() == 1) {
+ // Not a User of any mask: not really blending, this is a
+ // single-predecessor phi.
+ O << " ";
+ getIncomingValue(0)->printAsOperand(O, SlotTracker);
+ } else {
+ for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
+ O << " ";
+ getIncomingValue(I)->printAsOperand(O, SlotTracker);
+ O << "/";
+ getMask(I)->printAsOperand(O, SlotTracker);
+ }
+ }
+}
+
+void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent << "REDUCE ";
+ printAsOperand(O, SlotTracker);
+ O << " = ";
+ getChainOp()->printAsOperand(O, SlotTracker);
+ O << " +";
+ if (isa<FPMathOperator>(getUnderlyingInstr()))
+ O << getUnderlyingInstr()->getFastMathFlags();
+ O << " reduce." << Instruction::getOpcodeName(RdxDesc->getOpcode()) << " (";
+ getVecOp()->printAsOperand(O, SlotTracker);
+ if (getCondOp()) {
+ O << ", ";
+ getCondOp()->printAsOperand(O, SlotTracker);
+ }
+ O << ")";
+ if (RdxDesc->IntermediateStore)
+ O << " (with final reduction value stored in invariant address sank "
+ "outside of loop)";
+}
+
+void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
+
+ if (!getUnderlyingInstr()->getType()->isVoidTy()) {
+ printAsOperand(O, SlotTracker);
+ O << " = ";
+ }
+ if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
+ O << "call @" << CB->getCalledFunction()->getName() << "(";
+ interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)),
+ O, [&O, &SlotTracker](VPValue *Op) {
+ Op->printAsOperand(O, SlotTracker);
+ });
+ O << ")";
+ } else {
+ O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()) << " ";
+ printOperands(O, SlotTracker);
+ }
+
+ if (AlsoPack)
+ O << " (S->V)";
+}
+
+void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent << "PHI-PREDICATED-INSTRUCTION ";
+ printAsOperand(O, SlotTracker);
+ O << " = ";
+ printOperands(O, SlotTracker);
+}
+
+void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent << "WIDEN ";
+
+ if (!isStore()) {
+ getVPSingleValue()->printAsOperand(O, SlotTracker);
+ O << " = ";
+ }
+ O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " ";
+
+ printOperands(O, SlotTracker);
+}
+#endif
+
+void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) {
+ Value *Start = getStartValue()->getLiveInIRValue();
+ PHINode *EntryPart = PHINode::Create(
+ Start->getType(), 2, "index", &*State.CFG.PrevBB->getFirstInsertionPt());
+
+ BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
+ EntryPart->addIncoming(Start, VectorPH);
+ EntryPart->setDebugLoc(DL);
+ for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
+ State.set(this, EntryPart, Part);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent << "EMIT ";
+ printAsOperand(O, SlotTracker);
+ O << " = CANONICAL-INDUCTION";
+}
+#endif
+
+bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(ElementCount VF) {
+ bool IsUniform = vputils::onlyFirstLaneUsed(this);
+ return all_of(users(),
+ [&](const VPUser *U) { return U->usesScalars(this); }) &&
+ (IsUniform || !VF.isScalable());
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent << "EMIT ";
+ printAsOperand(O, SlotTracker);
+ O << " = WIDEN-POINTER-INDUCTION ";
+ getStartValue()->printAsOperand(O, SlotTracker);
+ O << ", " << *IndDesc.getStep();
+}
+#endif
+
+void VPExpandSCEVRecipe::execute(VPTransformState &State) {
+ assert(!State.Instance && "cannot be used in per-lane");
+ const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout();
+ SCEVExpander Exp(SE, DL, "induction");
+
+ Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
+ &*State.Builder.GetInsertPoint());
+
+ for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
+ State.set(this, Res, Part);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent << "EMIT ";
+ getVPSingleValue()->printAsOperand(O, SlotTracker);
+ O << " = EXPAND SCEV " << *Expr;
+}
+#endif
+
+void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
+ Value *CanonicalIV = State.get(getOperand(0), 0);
+ Type *STy = CanonicalIV->getType();
+ IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
+ ElementCount VF = State.VF;
+ Value *VStart = VF.isScalar()
+ ? CanonicalIV
+ : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
+ for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
+ Value *VStep = createStepForVF(Builder, STy, VF, Part);
+ if (VF.isVector()) {
+ VStep = Builder.CreateVectorSplat(VF, VStep);
+ VStep =
+ Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
+ }
+ Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
+ State.set(this, CanonicalVectorIV, Part);
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent << "EMIT ";
+ printAsOperand(O, SlotTracker);
+ O << " = WIDEN-CANONICAL-INDUCTION ";
+ printOperands(O, SlotTracker);
+}
+#endif
+
+void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {
+ auto &Builder = State.Builder;
+ // Create a vector from the initial value.
+ auto *VectorInit = getStartValue()->getLiveInIRValue();
+
+ Type *VecTy = State.VF.isScalar()
+ ? VectorInit->getType()
+ : VectorType::get(VectorInit->getType(), State.VF);
+
+ BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
+ if (State.VF.isVector()) {
+ auto *IdxTy = Builder.getInt32Ty();
+ auto *One = ConstantInt::get(IdxTy, 1);
+ IRBuilder<>::InsertPointGuard Guard(Builder);
+ Builder.SetInsertPoint(VectorPH->getTerminator());
+ auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
+ auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
+ VectorInit = Builder.CreateInsertElement(
+ PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
+ }
+
+ // Create a phi node for the new recurrence.
+ PHINode *EntryPart = PHINode::Create(
+ VecTy, 2, "vector.recur", &*State.CFG.PrevBB->getFirstInsertionPt());
+ EntryPart->addIncoming(VectorInit, VectorPH);
+ State.set(this, EntryPart, 0);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
+ printAsOperand(O, SlotTracker);
+ O << " = phi ";
+ printOperands(O, SlotTracker);
+}
+#endif
+
+void VPReductionPHIRecipe::execute(VPTransformState &State) {
+ PHINode *PN = cast<PHINode>(getUnderlyingValue());
+ auto &Builder = State.Builder;
+
+ // In order to support recurrences we need to be able to vectorize Phi nodes.
+ // Phi nodes have cycles, so we need to vectorize them in two stages. This is
+ // stage #1: We create a new vector PHI node with no incoming edges. We'll use
+ // this value when we vectorize all of the instructions that use the PHI.
+ bool ScalarPHI = State.VF.isScalar() || IsInLoop;
+ Type *VecTy =
+ ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), State.VF);
+
+ BasicBlock *HeaderBB = State.CFG.PrevBB;
+ assert(State.CurrentVectorLoop->getHeader() == HeaderBB &&
+ "recipe must be in the vector loop header");
+ unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF;
+ for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
+ Value *EntryPart =
+ PHINode::Create(VecTy, 2, "vec.phi", &*HeaderBB->getFirstInsertionPt());
+ State.set(this, EntryPart, Part);
+ }
+
+ BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
+
+ // Reductions do not have to start at zero. They can start with
+ // any loop invariant values.
+ VPValue *StartVPV = getStartValue();
+ Value *StartV = StartVPV->getLiveInIRValue();
+
+ Value *Iden = nullptr;
+ RecurKind RK = RdxDesc.getRecurrenceKind();
+ if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) ||
+ RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK)) {
+ // MinMax reduction have the start value as their identify.
+ if (ScalarPHI) {
+ Iden = StartV;
+ } else {
+ IRBuilderBase::InsertPointGuard IPBuilder(Builder);
+ Builder.SetInsertPoint(VectorPH->getTerminator());
+ StartV = Iden =
+ Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident");
+ }
+ } else {
+ Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(),
+ RdxDesc.getFastMathFlags());
+
+ if (!ScalarPHI) {
+ Iden = Builder.CreateVectorSplat(State.VF, Iden);
+ IRBuilderBase::InsertPointGuard IPBuilder(Builder);
+ Builder.SetInsertPoint(VectorPH->getTerminator());
+ Constant *Zero = Builder.getInt32(0);
+ StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
+ }
+ }
+
+ for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
+ Value *EntryPart = State.get(this, Part);
+ // Make sure to add the reduction start value only to the
+ // first unroll part.
+ Value *StartVal = (Part == 0) ? StartV : Iden;
+ cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH);
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent << "WIDEN-REDUCTION-PHI ";
+
+ printAsOperand(O, SlotTracker);
+ O << " = phi ";
+ printOperands(O, SlotTracker);
+}
+#endif
+
+void VPWidenPHIRecipe::execute(VPTransformState &State) {
+ assert(EnableVPlanNativePath &&
+ "Non-native vplans are not expected to have VPWidenPHIRecipes.");
+
+ // Currently we enter here in the VPlan-native path for non-induction
+ // PHIs where all control flow is uniform. We simply widen these PHIs.
+ // Create a vector phi with no operands - the vector phi operands will be
+ // set at the end of vector code generation.
+ VPBasicBlock *Parent = getParent();
+ VPRegionBlock *LoopRegion = Parent->getEnclosingLoopRegion();
+ unsigned StartIdx = 0;
+ // For phis in header blocks of loop regions, use the index of the value
+ // coming from the preheader.
+ if (LoopRegion->getEntryBasicBlock() == Parent) {
+ for (unsigned I = 0; I < getNumOperands(); ++I) {
+ if (getIncomingBlock(I) ==
+ LoopRegion->getSinglePredecessor()->getExitingBasicBlock())
+ StartIdx = I;
+ }
+ }
+ Value *Op0 = State.get(getOperand(StartIdx), 0);
+ Type *VecTy = Op0->getType();
+ Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
+ State.set(this, VecPhi, 0);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent << "WIDEN-PHI ";
+
+ auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
+ // Unless all incoming values are modeled in VPlan print the original PHI
+ // directly.
+ // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
+ // values as VPValues.
+ if (getNumOperands() != OriginalPhi->getNumOperands()) {
+ O << VPlanIngredient(OriginalPhi);
+ return;
+ }
+
+ printAsOperand(O, SlotTracker);
+ O << " = phi ";
+ printOperands(O, SlotTracker);
+}
+#endif
diff --git a/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp b/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp
index 9e19e172dea5..3a7e77fd9efd 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp
@@ -15,16 +15,10 @@
//===----------------------------------------------------------------------===//
#include "VPlan.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/PostOrderIterator.h"
+#include "VPlanValue.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/VectorUtils.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CFG.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Type.h"
@@ -32,12 +26,9 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <algorithm>
#include <cassert>
-#include <iterator>
#include <utility>
using namespace llvm;
@@ -396,7 +387,7 @@ VPInstruction *VPlanSlp::buildGraph(ArrayRef<VPValue *> Values) {
return markFailed();
assert(getOpcode(Values) && "Opcodes for all values must match");
- unsigned ValuesOpcode = getOpcode(Values).getValue();
+ unsigned ValuesOpcode = *getOpcode(Values);
SmallVector<VPValue *, 4> CombinedOperands;
if (areCommutative(Values)) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 70ce773a8a85..cca484e13bf1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -13,6 +13,8 @@
#include "VPlanTransforms.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/IVDescriptors.h"
using namespace llvm;
@@ -22,17 +24,15 @@ void VPlanTransforms::VPInstructionsToVPRecipes(
GetIntOrFpInductionDescriptor,
SmallPtrSetImpl<Instruction *> &DeadInstructions, ScalarEvolution &SE) {
- auto *TopRegion = cast<VPRegionBlock>(Plan->getEntry());
- ReversePostOrderTraversal<VPBlockBase *> RPOT(TopRegion->getEntry());
-
- for (VPBlockBase *Base : RPOT) {
- // Do not widen instructions in pre-header and exit blocks.
- if (Base->getNumPredecessors() == 0 || Base->getNumSuccessors() == 0)
- continue;
-
- VPBasicBlock *VPBB = Base->getEntryBasicBlock();
+ ReversePostOrderTraversal<VPBlockRecursiveTraversalWrapper<VPBlockBase *>>
+ RPOT(Plan->getEntry());
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
+ VPRecipeBase *Term = VPBB->getTerminator();
+ auto EndIter = Term ? Term->getIterator() : VPBB->end();
// Introduce each ingredient into VPlan.
- for (VPRecipeBase &Ingredient : llvm::make_early_inc_range(*VPBB)) {
+ for (VPRecipeBase &Ingredient :
+ make_early_inc_range(make_range(VPBB->begin(), EndIter))) {
+
VPValue *VPV = Ingredient.getVPSingleValue();
Instruction *Inst = cast<Instruction>(VPV->getUnderlyingValue());
if (DeadInstructions.count(Inst)) {
@@ -47,8 +47,10 @@ void VPlanTransforms::VPInstructionsToVPRecipes(
auto *Phi = cast<PHINode>(VPPhi->getUnderlyingValue());
if (const auto *II = GetIntOrFpInductionDescriptor(Phi)) {
VPValue *Start = Plan->getOrAddVPValue(II->getStartValue());
+ VPValue *Step =
+ vputils::getOrCreateVPValueForSCEVExpr(*Plan, II->getStep(), SE);
NewRecipe =
- new VPWidenIntOrFpInductionRecipe(Phi, Start, *II, false, true);
+ new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, *II, true);
} else {
Plan->addVPValue(Phi, VPPhi);
continue;
@@ -295,14 +297,19 @@ bool VPlanTransforms::mergeReplicateRegions(VPlan &Plan) {
}
void VPlanTransforms::removeRedundantInductionCasts(VPlan &Plan) {
- SmallVector<std::pair<VPRecipeBase *, VPValue *>> CastsToRemove;
- for (auto &Phi : Plan.getEntry()->getEntryBasicBlock()->phis()) {
+ for (auto &Phi : Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
if (!IV || IV->getTruncInst())
continue;
- // Visit all casts connected to IV and in Casts. Collect them.
- // remember them for removal.
+ // A sequence of IR Casts has potentially been recorded for IV, which
+ // *must be bypassed* when the IV is vectorized, because the vectorized IV
+ // will produce the desired casted value. This sequence forms a def-use
+ // chain and is provided in reverse order, ending with the cast that uses
+ // the IV phi. Search for the recipe of the last cast in the chain and
+ // replace it with the original IV. Note that only the final cast is
+ // expected to have users outside the cast-chain and the dead casts left
+ // over will be cleaned up later.
auto &Casts = IV->getInductionDescriptor().getCastInsts();
VPValue *FindMyCast = IV;
for (Instruction *IRCast : reverse(Casts)) {
@@ -315,14 +322,9 @@ void VPlanTransforms::removeRedundantInductionCasts(VPlan &Plan) {
break;
}
}
- assert(FoundUserCast && "Missing a cast to remove");
- CastsToRemove.emplace_back(FoundUserCast, IV);
FindMyCast = FoundUserCast->getVPSingleValue();
}
- }
- for (auto &E : CastsToRemove) {
- E.first->getVPSingleValue()->replaceAllUsesWith(E.second);
- E.first->eraseFromParent();
+ FindMyCast->replaceAllUsesWith(IV);
}
}
@@ -358,3 +360,73 @@ void VPlanTransforms::removeRedundantCanonicalIVs(VPlan &Plan) {
}
}
}
+
+void VPlanTransforms::removeDeadRecipes(VPlan &Plan) {
+ ReversePostOrderTraversal<VPBlockRecursiveTraversalWrapper<VPBlockBase *>>
+ RPOT(Plan.getEntry());
+
+ for (VPBasicBlock *VPBB : reverse(VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT))) {
+ // The recipes in the block are processed in reverse order, to catch chains
+ // of dead recipes.
+ for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) {
+ if (R.mayHaveSideEffects() || any_of(R.definedValues(), [](VPValue *V) {
+ return V->getNumUsers() > 0;
+ }))
+ continue;
+ R.eraseFromParent();
+ }
+ }
+}
+
+void VPlanTransforms::optimizeInductions(VPlan &Plan, ScalarEvolution &SE) {
+ SmallVector<VPRecipeBase *> ToRemove;
+ VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
+ bool HasOnlyVectorVFs = !Plan.hasVF(ElementCount::getFixed(1));
+ for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
+ auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
+ if (!IV)
+ continue;
+ if (HasOnlyVectorVFs &&
+ none_of(IV->users(), [IV](VPUser *U) { return U->usesScalars(IV); }))
+ continue;
+
+ const InductionDescriptor &ID = IV->getInductionDescriptor();
+ VPValue *Step =
+ vputils::getOrCreateVPValueForSCEVExpr(Plan, ID.getStep(), SE);
+ Instruction *TruncI = IV->getTruncInst();
+ VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe(
+ IV->getPHINode()->getType(), ID, Plan.getCanonicalIV(),
+ IV->getStartValue(), Step, TruncI ? TruncI->getType() : nullptr);
+ HeaderVPBB->insert(Steps, HeaderVPBB->getFirstNonPhi());
+
+ // Update scalar users of IV to use Step instead. Use SetVector to ensure
+ // the list of users doesn't contain duplicates.
+ SetVector<VPUser *> Users(IV->user_begin(), IV->user_end());
+ for (VPUser *U : Users) {
+ if (HasOnlyVectorVFs && !U->usesScalars(IV))
+ continue;
+ for (unsigned I = 0, E = U->getNumOperands(); I != E; I++) {
+ if (U->getOperand(I) != IV)
+ continue;
+ U->setOperand(I, Steps);
+ }
+ }
+ }
+}
+
+void VPlanTransforms::removeRedundantExpandSCEVRecipes(VPlan &Plan) {
+ DenseMap<const SCEV *, VPValue *> SCEV2VPV;
+
+ for (VPRecipeBase &R :
+ make_early_inc_range(*Plan.getEntry()->getEntryBasicBlock())) {
+ auto *ExpR = dyn_cast<VPExpandSCEVRecipe>(&R);
+ if (!ExpR)
+ continue;
+
+ auto I = SCEV2VPV.insert({ExpR->getSCEV(), ExpR});
+ if (I.second)
+ continue;
+ ExpR->replaceAllUsesWith(I.first->second);
+ ExpR->eraseFromParent();
+ }
+}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index e74409a86466..3372e255dff7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -14,8 +14,7 @@
#define LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H
#include "VPlan.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
namespace llvm {
@@ -23,6 +22,7 @@ class InductionDescriptor;
class Instruction;
class PHINode;
class ScalarEvolution;
+class Loop;
struct VPlanTransforms {
/// Replaces the VPInstructions in \p Plan with corresponding
@@ -49,6 +49,18 @@ struct VPlanTransforms {
/// Try to replace VPWidenCanonicalIVRecipes with a widened canonical IV
/// recipe, if it exists.
static void removeRedundantCanonicalIVs(VPlan &Plan);
+
+ static void removeDeadRecipes(VPlan &Plan);
+
+ /// If any user of a VPWidenIntOrFpInductionRecipe needs scalar values,
+ /// provide them by building scalar steps off of the canonical scalar IV and
+ /// update the original IV's users. This is an optional optimization to reduce
+ /// the needs of vector extracts.
+ static void optimizeInductions(VPlan &Plan, ScalarEvolution &SE);
+
+ /// Remove redundant EpxandSCEVRecipes in \p Plan's entry block by replacing
+ /// them with already existing recipes expanding the same SCEV expression.
+ static void removeRedundantExpandSCEVRecipes(VPlan &Plan);
};
} // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 5296d2b9485c..5fc676834331 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -106,6 +106,7 @@ public:
VPVFirstOrderRecurrencePHISC,
VPVWidenPHISC,
VPVWidenIntOrFpInductionSC,
+ VPVWidenPointerInductionSC,
VPVPredInstPHI,
VPVReductionPHISC,
};
@@ -207,9 +208,7 @@ public:
/// Subclass identifier (for isa/dyn_cast).
enum class VPUserID {
Recipe,
- // TODO: Currently VPUsers are used in VPBlockBase, but in the future the
- // only VPUsers should either be recipes or live-outs.
- Block
+ LiveOut,
};
private:
@@ -286,6 +285,22 @@ public:
/// Method to support type inquiry through isa, cast, and dyn_cast.
static inline bool classof(const VPDef *Recipe);
+
+ /// Returns true if the VPUser uses scalars of operand \p Op. Conservatively
+ /// returns if only first (scalar) lane is used, as default.
+ virtual bool usesScalars(const VPValue *Op) const {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return onlyFirstLaneUsed(Op);
+ }
+
+ /// Returns true if the VPUser only uses the first lane of operand \p Op.
+ /// Conservatively returns false.
+ virtual bool onlyFirstLaneUsed(const VPValue *Op) const {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return false;
+ }
};
/// This class augments a recipe with a set of VPValues defined by the recipe.
@@ -327,10 +342,12 @@ public:
/// type identification.
using VPRecipeTy = enum {
VPBranchOnMaskSC,
+ VPExpandSCEVSC,
VPInstructionSC,
VPInterleaveSC,
VPReductionSC,
VPReplicateSC,
+ VPScalarIVStepsSC,
VPWidenCallSC,
VPWidenCanonicalIVSC,
VPWidenGEPSC,
@@ -344,6 +361,7 @@ public:
VPFirstOrderRecurrencePHISC,
VPWidenPHISC,
VPWidenIntOrFpInductionSC,
+ VPWidenPointerInductionSC,
VPPredInstPHISC,
VPReductionPHISC,
VPFirstPHISC = VPBlendSC,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index d36f250995e1..f917883145c0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -43,17 +43,20 @@ static bool hasDuplicates(const SmallVectorImpl<VPBlockBase *> &VPBlockVec) {
/// \p Region. Checks in this function are generic for VPBlockBases. They are
/// not specific for VPBasicBlocks or VPRegionBlocks.
static void verifyBlocksInRegion(const VPRegionBlock *Region) {
- for (const VPBlockBase *VPB :
- make_range(df_iterator<const VPBlockBase *>::begin(Region->getEntry()),
- df_iterator<const VPBlockBase *>::end(Region->getExit()))) {
+ for (const VPBlockBase *VPB : make_range(
+ df_iterator<const VPBlockBase *>::begin(Region->getEntry()),
+ df_iterator<const VPBlockBase *>::end(Region->getExiting()))) {
// Check block's parent.
assert(VPB->getParent() == Region && "VPBlockBase has wrong parent");
+ auto *VPBB = dyn_cast<VPBasicBlock>(VPB);
// Check block's condition bit.
- if (VPB->getNumSuccessors() > 1)
- assert(VPB->getCondBit() && "Missing condition bit!");
+ if (VPB->getNumSuccessors() > 1 || (VPBB && VPBB->isExiting()))
+ assert(VPBB && VPBB->getTerminator() &&
+ "Block has multiple successors but doesn't "
+ "have a proper branch recipe!");
else
- assert(!VPB->getCondBit() && "Unexpected condition bit!");
+ assert((!VPBB || !VPBB->getTerminator()) && "Unexpected branch recipe!");
// Check block's successors.
const auto &Successors = VPB->getSuccessors();
@@ -94,13 +97,14 @@ static void verifyBlocksInRegion(const VPRegionBlock *Region) {
/// VPBlockBases. Do not recurse inside nested VPRegionBlocks.
static void verifyRegion(const VPRegionBlock *Region) {
const VPBlockBase *Entry = Region->getEntry();
- const VPBlockBase *Exit = Region->getExit();
+ const VPBlockBase *Exiting = Region->getExiting();
- // Entry and Exit shouldn't have any predecessor/successor, respectively.
+ // Entry and Exiting shouldn't have any predecessor/successor, respectively.
assert(!Entry->getNumPredecessors() && "Region entry has predecessors.");
- assert(!Exit->getNumSuccessors() && "Region exit has successors.");
+ assert(!Exiting->getNumSuccessors() &&
+ "Region exiting block has successors.");
(void)Entry;
- (void)Exit;
+ (void)Exiting;
verifyBlocksInRegion(Region);
}
@@ -111,9 +115,9 @@ static void verifyRegionRec(const VPRegionBlock *Region) {
verifyRegion(Region);
// Recurse inside nested regions.
- for (const VPBlockBase *VPB :
- make_range(df_iterator<const VPBlockBase *>::begin(Region->getEntry()),
- df_iterator<const VPBlockBase *>::end(Region->getExit()))) {
+ for (const VPBlockBase *VPB : make_range(
+ df_iterator<const VPBlockBase *>::begin(Region->getEntry()),
+ df_iterator<const VPBlockBase *>::end(Region->getExiting()))) {
if (const auto *SubRegion = dyn_cast<VPRegionBlock>(VPB))
verifyRegionRec(SubRegion);
}
@@ -157,7 +161,7 @@ bool VPlanVerifier::verifyPlanIsValid(const VPlan &Plan) {
}
}
- const VPRegionBlock *TopRegion = cast<VPRegionBlock>(Plan.getEntry());
+ const VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
const VPBasicBlock *Entry = dyn_cast<VPBasicBlock>(TopRegion->getEntry());
if (!Entry) {
errs() << "VPlan entry block is not a VPBasicBlock\n";
@@ -170,19 +174,19 @@ bool VPlanVerifier::verifyPlanIsValid(const VPlan &Plan) {
return false;
}
- const VPBasicBlock *Exit = dyn_cast<VPBasicBlock>(TopRegion->getExit());
- if (!Exit) {
- errs() << "VPlan exit block is not a VPBasicBlock\n";
+ const VPBasicBlock *Exiting = dyn_cast<VPBasicBlock>(TopRegion->getExiting());
+ if (!Exiting) {
+ errs() << "VPlan exiting block is not a VPBasicBlock\n";
return false;
}
- if (Exit->empty()) {
- errs() << "VPlan vector loop exit must end with BranchOnCount "
+ if (Exiting->empty()) {
+ errs() << "VPlan vector loop exiting block must end with BranchOnCount "
"VPInstruction but is empty\n";
return false;
}
- auto *LastInst = dyn_cast<VPInstruction>(std::prev(Exit->end()));
+ auto *LastInst = dyn_cast<VPInstruction>(std::prev(Exiting->end()));
if (!LastInst || LastInst->getOpcode() != VPInstruction::BranchOnCount) {
errs() << "VPlan vector loop exit must end with BranchOnCount "
"VPInstruction\n";
@@ -197,10 +201,17 @@ bool VPlanVerifier::verifyPlanIsValid(const VPlan &Plan) {
errs() << "region entry block has predecessors\n";
return false;
}
- if (Region->getExit()->getNumSuccessors() != 0) {
- errs() << "region exit block has successors\n";
+ if (Region->getExiting()->getNumSuccessors() != 0) {
+ errs() << "region exiting block has successors\n";
return false;
}
}
+
+ for (auto &KV : Plan.getLiveOuts())
+ if (KV.second->getNumOperands() != 1) {
+ errs() << "live outs must have a single operand\n";
+ return false;
+ }
+
return true;
}
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 620d388199e0..90598937affc 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -103,11 +103,13 @@ private:
bool foldSingleElementStore(Instruction &I);
bool scalarizeLoadExtract(Instruction &I);
bool foldShuffleOfBinops(Instruction &I);
+ bool foldShuffleFromReductions(Instruction &I);
+ bool foldSelectShuffle(Instruction &I, bool FromReduction = false);
void replaceValue(Value &Old, Value &New) {
Old.replaceAllUsesWith(&New);
- New.takeName(&Old);
if (auto *NewI = dyn_cast<Instruction>(&New)) {
+ New.takeName(&Old);
Worklist.pushUsersToWorkList(*NewI);
Worklist.pushValue(NewI);
}
@@ -152,12 +154,7 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
Value *SrcPtr = Load->getPointerOperand()->stripPointerCasts();
assert(isa<PointerType>(SrcPtr->getType()) && "Expected a pointer type");
- // If original AS != Load's AS, we can't bitcast the original pointer and have
- // to use Load's operand instead. Ideally we would want to strip pointer casts
- // without changing AS, but there's no API to do that ATM.
unsigned AS = Load->getPointerAddressSpace();
- if (AS != SrcPtr->getType()->getPointerAddressSpace())
- SrcPtr = Load->getPointerOperand();
// We are potentially transforming byte-sized (8-bit) memory accesses, so make
// sure we have all of our type-based constraints in place for this target.
@@ -245,7 +242,8 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
// It is safe and potentially profitable to load a vector directly:
// inselt undef, load Scalar, 0 --> load VecPtr
IRBuilder<> Builder(Load);
- Value *CastedPtr = Builder.CreateBitCast(SrcPtr, MinVecTy->getPointerTo(AS));
+ Value *CastedPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
+ SrcPtr, MinVecTy->getPointerTo(AS));
Value *VecLd = Builder.CreateAlignedLoad(MinVecTy, CastedPtr, Alignment);
VecLd = Builder.CreateShuffleVector(VecLd, Mask);
@@ -259,12 +257,12 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
ExtractElementInst *VectorCombine::getShuffleExtract(
ExtractElementInst *Ext0, ExtractElementInst *Ext1,
unsigned PreferredExtractIndex = InvalidIndex) const {
- assert(isa<ConstantInt>(Ext0->getIndexOperand()) &&
- isa<ConstantInt>(Ext1->getIndexOperand()) &&
- "Expected constant extract indexes");
+ auto *Index0C = dyn_cast<ConstantInt>(Ext0->getIndexOperand());
+ auto *Index1C = dyn_cast<ConstantInt>(Ext1->getIndexOperand());
+ assert(Index0C && Index1C && "Expected constant extract indexes");
- unsigned Index0 = cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue();
- unsigned Index1 = cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue();
+ unsigned Index0 = Index0C->getZExtValue();
+ unsigned Index1 = Index1C->getZExtValue();
// If the extract indexes are identical, no shuffle is needed.
if (Index0 == Index1)
@@ -310,9 +308,10 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
const Instruction &I,
ExtractElementInst *&ConvertToShuffle,
unsigned PreferredExtractIndex) {
- assert(isa<ConstantInt>(Ext0->getOperand(1)) &&
- isa<ConstantInt>(Ext1->getOperand(1)) &&
- "Expected constant extract indexes");
+ auto *Ext0IndexC = dyn_cast<ConstantInt>(Ext0->getOperand(1));
+ auto *Ext1IndexC = dyn_cast<ConstantInt>(Ext1->getOperand(1));
+ assert(Ext0IndexC && Ext1IndexC && "Expected constant extract indexes");
+
unsigned Opcode = I.getOpcode();
Type *ScalarTy = Ext0->getType();
auto *VecTy = cast<VectorType>(Ext0->getOperand(0)->getType());
@@ -335,8 +334,8 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
// Get cost estimates for the extract elements. These costs will factor into
// both sequences.
- unsigned Ext0Index = cast<ConstantInt>(Ext0->getOperand(1))->getZExtValue();
- unsigned Ext1Index = cast<ConstantInt>(Ext1->getOperand(1))->getZExtValue();
+ unsigned Ext0Index = Ext0IndexC->getZExtValue();
+ unsigned Ext1Index = Ext1IndexC->getZExtValue();
InstructionCost Extract0Cost =
TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, Ext0Index);
@@ -698,8 +697,9 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
ScalarInst->copyIRFlags(&I);
// Fold the vector constants in the original vectors into a new base vector.
- Constant *NewVecC = IsCmp ? ConstantExpr::getCompare(Pred, VecC0, VecC1)
- : ConstantExpr::get(Opcode, VecC0, VecC1);
+ Value *NewVecC =
+ IsCmp ? Builder.CreateCmp(Pred, VecC0, VecC1)
+ : Builder.CreateBinOp((Instruction::BinaryOps)Opcode, VecC0, VecC1);
Value *Insert = Builder.CreateInsertElement(NewVecC, Scalar, Index);
replaceValue(I, *Insert);
return true;
@@ -1019,12 +1019,8 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
return false;
NumInstChecked++;
}
- }
-
- if (!LastCheckedInst)
- LastCheckedInst = UI;
- else if (LastCheckedInst->comesBefore(UI))
LastCheckedInst = UI;
+ }
auto ScalarIdx = canScalarizeAccess(FixedVT, UI->getOperand(1), &I, AC, DT);
if (!ScalarIdx.isSafe()) {
@@ -1121,6 +1117,339 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
return true;
}
+/// Given a commutative reduction, the order of the input lanes does not alter
+/// the results. We can use this to remove certain shuffles feeding the
+/// reduction, removing the need to shuffle at all.
+bool VectorCombine::foldShuffleFromReductions(Instruction &I) {
+ auto *II = dyn_cast<IntrinsicInst>(&I);
+ if (!II)
+ return false;
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::vector_reduce_add:
+ case Intrinsic::vector_reduce_mul:
+ case Intrinsic::vector_reduce_and:
+ case Intrinsic::vector_reduce_or:
+ case Intrinsic::vector_reduce_xor:
+ case Intrinsic::vector_reduce_smin:
+ case Intrinsic::vector_reduce_smax:
+ case Intrinsic::vector_reduce_umin:
+ case Intrinsic::vector_reduce_umax:
+ break;
+ default:
+ return false;
+ }
+
+ // Find all the inputs when looking through operations that do not alter the
+ // lane order (binops, for example). Currently we look for a single shuffle,
+ // and can ignore splat values.
+ std::queue<Value *> Worklist;
+ SmallPtrSet<Value *, 4> Visited;
+ ShuffleVectorInst *Shuffle = nullptr;
+ if (auto *Op = dyn_cast<Instruction>(I.getOperand(0)))
+ Worklist.push(Op);
+
+ while (!Worklist.empty()) {
+ Value *CV = Worklist.front();
+ Worklist.pop();
+ if (Visited.contains(CV))
+ continue;
+
+ // Splats don't change the order, so can be safely ignored.
+ if (isSplatValue(CV))
+ continue;
+
+ Visited.insert(CV);
+
+ if (auto *CI = dyn_cast<Instruction>(CV)) {
+ if (CI->isBinaryOp()) {
+ for (auto *Op : CI->operand_values())
+ Worklist.push(Op);
+ continue;
+ } else if (auto *SV = dyn_cast<ShuffleVectorInst>(CI)) {
+ if (Shuffle && Shuffle != SV)
+ return false;
+ Shuffle = SV;
+ continue;
+ }
+ }
+
+ // Anything else is currently an unknown node.
+ return false;
+ }
+
+ if (!Shuffle)
+ return false;
+
+ // Check all uses of the binary ops and shuffles are also included in the
+ // lane-invariant operations (Visited should be the list of lanewise
+ // instructions, including the shuffle that we found).
+ for (auto *V : Visited)
+ for (auto *U : V->users())
+ if (!Visited.contains(U) && U != &I)
+ return false;
+
+ FixedVectorType *VecType =
+ dyn_cast<FixedVectorType>(II->getOperand(0)->getType());
+ if (!VecType)
+ return false;
+ FixedVectorType *ShuffleInputType =
+ dyn_cast<FixedVectorType>(Shuffle->getOperand(0)->getType());
+ if (!ShuffleInputType)
+ return false;
+ int NumInputElts = ShuffleInputType->getNumElements();
+
+ // Find the mask from sorting the lanes into order. This is most likely to
+ // become a identity or concat mask. Undef elements are pushed to the end.
+ SmallVector<int> ConcatMask;
+ Shuffle->getShuffleMask(ConcatMask);
+ sort(ConcatMask, [](int X, int Y) { return (unsigned)X < (unsigned)Y; });
+ bool UsesSecondVec =
+ any_of(ConcatMask, [&](int M) { return M >= NumInputElts; });
+ InstructionCost OldCost = TTI.getShuffleCost(
+ UsesSecondVec ? TTI::SK_PermuteTwoSrc : TTI::SK_PermuteSingleSrc, VecType,
+ Shuffle->getShuffleMask());
+ InstructionCost NewCost = TTI.getShuffleCost(
+ UsesSecondVec ? TTI::SK_PermuteTwoSrc : TTI::SK_PermuteSingleSrc, VecType,
+ ConcatMask);
+
+ LLVM_DEBUG(dbgs() << "Found a reduction feeding from a shuffle: " << *Shuffle
+ << "\n");
+ LLVM_DEBUG(dbgs() << " OldCost: " << OldCost << " vs NewCost: " << NewCost
+ << "\n");
+ if (NewCost < OldCost) {
+ Builder.SetInsertPoint(Shuffle);
+ Value *NewShuffle = Builder.CreateShuffleVector(
+ Shuffle->getOperand(0), Shuffle->getOperand(1), ConcatMask);
+ LLVM_DEBUG(dbgs() << "Created new shuffle: " << *NewShuffle << "\n");
+ replaceValue(*Shuffle, *NewShuffle);
+ }
+
+ // See if we can re-use foldSelectShuffle, getting it to reduce the size of
+ // the shuffle into a nicer order, as it can ignore the order of the shuffles.
+ return foldSelectShuffle(*Shuffle, true);
+}
+
+/// This method looks for groups of shuffles acting on binops, of the form:
+/// %x = shuffle ...
+/// %y = shuffle ...
+/// %a = binop %x, %y
+/// %b = binop %x, %y
+/// shuffle %a, %b, selectmask
+/// We may, especially if the shuffle is wider than legal, be able to convert
+/// the shuffle to a form where only parts of a and b need to be computed. On
+/// architectures with no obvious "select" shuffle, this can reduce the total
+/// number of operations if the target reports them as cheaper.
+bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
+ auto *SVI = dyn_cast<ShuffleVectorInst>(&I);
+ auto *VT = dyn_cast<FixedVectorType>(I.getType());
+ if (!SVI || !VT)
+ return false;
+ auto *Op0 = dyn_cast<Instruction>(SVI->getOperand(0));
+ auto *Op1 = dyn_cast<Instruction>(SVI->getOperand(1));
+ if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
+ VT != Op0->getType())
+ return false;
+ auto *SVI0A = dyn_cast<ShuffleVectorInst>(Op0->getOperand(0));
+ auto *SVI0B = dyn_cast<ShuffleVectorInst>(Op0->getOperand(1));
+ auto *SVI1A = dyn_cast<ShuffleVectorInst>(Op1->getOperand(0));
+ auto *SVI1B = dyn_cast<ShuffleVectorInst>(Op1->getOperand(1));
+ auto checkSVNonOpUses = [&](Instruction *I) {
+ if (!I || I->getOperand(0)->getType() != VT)
+ return true;
+ return any_of(I->users(), [&](User *U) { return U != Op0 && U != Op1; });
+ };
+ if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||
+ checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))
+ return false;
+
+ // Collect all the uses that are shuffles that we can transform together. We
+ // may not have a single shuffle, but a group that can all be transformed
+ // together profitably.
+ SmallVector<ShuffleVectorInst *> Shuffles;
+ auto collectShuffles = [&](Instruction *I) {
+ for (auto *U : I->users()) {
+ auto *SV = dyn_cast<ShuffleVectorInst>(U);
+ if (!SV || SV->getType() != VT)
+ return false;
+ if (!llvm::is_contained(Shuffles, SV))
+ Shuffles.push_back(SV);
+ }
+ return true;
+ };
+ if (!collectShuffles(Op0) || !collectShuffles(Op1))
+ return false;
+ // From a reduction, we need to be processing a single shuffle, otherwise the
+ // other uses will not be lane-invariant.
+ if (FromReduction && Shuffles.size() > 1)
+ return false;
+
+ // For each of the output shuffles, we try to sort all the first vector
+ // elements to the beginning, followed by the second array elements at the
+ // end. If the binops are legalized to smaller vectors, this may reduce total
+ // number of binops. We compute the ReconstructMask mask needed to convert
+ // back to the original lane order.
+ SmallVector<int> V1, V2;
+ SmallVector<SmallVector<int>> ReconstructMasks;
+ int MaxV1Elt = 0, MaxV2Elt = 0;
+ unsigned NumElts = VT->getNumElements();
+ for (ShuffleVectorInst *SVN : Shuffles) {
+ SmallVector<int> Mask;
+ SVN->getShuffleMask(Mask);
+
+ // Check the operands are the same as the original, or reversed (in which
+ // case we need to commute the mask).
+ Value *SVOp0 = SVN->getOperand(0);
+ Value *SVOp1 = SVN->getOperand(1);
+ if (SVOp0 == Op1 && SVOp1 == Op0) {
+ std::swap(SVOp0, SVOp1);
+ ShuffleVectorInst::commuteShuffleMask(Mask, NumElts);
+ }
+ if (SVOp0 != Op0 || SVOp1 != Op1)
+ return false;
+
+ // Calculate the reconstruction mask for this shuffle, as the mask needed to
+ // take the packed values from Op0/Op1 and reconstructing to the original
+ // order.
+ SmallVector<int> ReconstructMask;
+ for (unsigned I = 0; I < Mask.size(); I++) {
+ if (Mask[I] < 0) {
+ ReconstructMask.push_back(-1);
+ } else if (Mask[I] < static_cast<int>(NumElts)) {
+ MaxV1Elt = std::max(MaxV1Elt, Mask[I]);
+ auto It = find(V1, Mask[I]);
+ if (It != V1.end())
+ ReconstructMask.push_back(It - V1.begin());
+ else {
+ ReconstructMask.push_back(V1.size());
+ V1.push_back(Mask[I]);
+ }
+ } else {
+ MaxV2Elt = std::max<int>(MaxV2Elt, Mask[I] - NumElts);
+ auto It = find(V2, Mask[I] - NumElts);
+ if (It != V2.end())
+ ReconstructMask.push_back(NumElts + It - V2.begin());
+ else {
+ ReconstructMask.push_back(NumElts + V2.size());
+ V2.push_back(Mask[I] - NumElts);
+ }
+ }
+ }
+
+ // For reductions, we know that the lane ordering out doesn't alter the
+ // result. In-order can help simplify the shuffle away.
+ if (FromReduction)
+ sort(ReconstructMask);
+ ReconstructMasks.push_back(ReconstructMask);
+ }
+
+ // If the Maximum element used from V1 and V2 are not larger than the new
+ // vectors, the vectors are already packes and performing the optimization
+ // again will likely not help any further. This also prevents us from getting
+ // stuck in a cycle in case the costs do not also rule it out.
+ if (V1.empty() || V2.empty() ||
+ (MaxV1Elt == static_cast<int>(V1.size()) - 1 &&
+ MaxV2Elt == static_cast<int>(V2.size()) - 1))
+ return false;
+
+ // Calculate the masks needed for the new input shuffles, which get padded
+ // with undef
+ SmallVector<int> V1A, V1B, V2A, V2B;
+ for (unsigned I = 0; I < V1.size(); I++) {
+ V1A.push_back(SVI0A->getMaskValue(V1[I]));
+ V1B.push_back(SVI0B->getMaskValue(V1[I]));
+ }
+ for (unsigned I = 0; I < V2.size(); I++) {
+ V2A.push_back(SVI1A->getMaskValue(V2[I]));
+ V2B.push_back(SVI1B->getMaskValue(V2[I]));
+ }
+ while (V1A.size() < NumElts) {
+ V1A.push_back(UndefMaskElem);
+ V1B.push_back(UndefMaskElem);
+ }
+ while (V2A.size() < NumElts) {
+ V2A.push_back(UndefMaskElem);
+ V2B.push_back(UndefMaskElem);
+ }
+
+ auto AddShuffleCost = [&](InstructionCost C, ShuffleVectorInst *SV) {
+ return C +
+ TTI.getShuffleCost(TTI::SK_PermuteTwoSrc, VT, SV->getShuffleMask());
+ };
+ auto AddShuffleMaskCost = [&](InstructionCost C, ArrayRef<int> Mask) {
+ return C + TTI.getShuffleCost(TTI::SK_PermuteTwoSrc, VT, Mask);
+ };
+
+ // Get the costs of the shuffles + binops before and after with the new
+ // shuffle masks.
+ InstructionCost CostBefore =
+ TTI.getArithmeticInstrCost(Op0->getOpcode(), VT) +
+ TTI.getArithmeticInstrCost(Op1->getOpcode(), VT);
+ CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
+ InstructionCost(0), AddShuffleCost);
+ // This set helps us only cost each unique shuffle once.
+ SmallPtrSet<ShuffleVectorInst *, 4> InputShuffles(
+ {SVI0A, SVI0B, SVI1A, SVI1B});
+ CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
+ InstructionCost(0), AddShuffleCost);
+
+ // The new binops will be unused for lanes past the used shuffle lengths.
+ // These types attempt to get the correct cost for that from the target.
+ FixedVectorType *Op0SmallVT =
+ FixedVectorType::get(VT->getScalarType(), V1.size());
+ FixedVectorType *Op1SmallVT =
+ FixedVectorType::get(VT->getScalarType(), V2.size());
+ InstructionCost CostAfter =
+ TTI.getArithmeticInstrCost(Op0->getOpcode(), Op0SmallVT) +
+ TTI.getArithmeticInstrCost(Op1->getOpcode(), Op1SmallVT);
+ CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(),
+ InstructionCost(0), AddShuffleMaskCost);
+ std::set<SmallVector<int>> OutputShuffleMasks({V1A, V1B, V2A, V2B});
+ CostAfter +=
+ std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
+ InstructionCost(0), AddShuffleMaskCost);
+
+ if (CostBefore <= CostAfter)
+ return false;
+
+ // The cost model has passed, create the new instructions.
+ Builder.SetInsertPoint(SVI0A);
+ Value *NSV0A = Builder.CreateShuffleVector(SVI0A->getOperand(0),
+ SVI0A->getOperand(1), V1A);
+ Builder.SetInsertPoint(SVI0B);
+ Value *NSV0B = Builder.CreateShuffleVector(SVI0B->getOperand(0),
+ SVI0B->getOperand(1), V1B);
+ Builder.SetInsertPoint(SVI1A);
+ Value *NSV1A = Builder.CreateShuffleVector(SVI1A->getOperand(0),
+ SVI1A->getOperand(1), V2A);
+ Builder.SetInsertPoint(SVI1B);
+ Value *NSV1B = Builder.CreateShuffleVector(SVI1B->getOperand(0),
+ SVI1B->getOperand(1), V2B);
+ Builder.SetInsertPoint(Op0);
+ Value *NOp0 = Builder.CreateBinOp((Instruction::BinaryOps)Op0->getOpcode(),
+ NSV0A, NSV0B);
+ if (auto *I = dyn_cast<Instruction>(NOp0))
+ I->copyIRFlags(Op0, true);
+ Builder.SetInsertPoint(Op1);
+ Value *NOp1 = Builder.CreateBinOp((Instruction::BinaryOps)Op1->getOpcode(),
+ NSV1A, NSV1B);
+ if (auto *I = dyn_cast<Instruction>(NOp1))
+ I->copyIRFlags(Op1, true);
+
+ for (int S = 0, E = ReconstructMasks.size(); S != E; S++) {
+ Builder.SetInsertPoint(Shuffles[S]);
+ Value *NSV = Builder.CreateShuffleVector(NOp0, NOp1, ReconstructMasks[S]);
+ replaceValue(*Shuffles[S], *NSV);
+ }
+
+ Worklist.pushValue(NSV0A);
+ Worklist.pushValue(NSV0B);
+ Worklist.pushValue(NSV1A);
+ Worklist.pushValue(NSV1B);
+ for (auto *S : Shuffles)
+ Worklist.add(S);
+ return true;
+}
+
/// This is the entry point for all transforms. Pass manager differences are
/// handled in the callers of this function.
bool VectorCombine::run() {
@@ -1140,6 +1469,8 @@ bool VectorCombine::run() {
MadeChange |= foldBitcastShuf(I);
MadeChange |= foldExtractedCmps(I);
MadeChange |= foldShuffleOfBinops(I);
+ MadeChange |= foldShuffleFromReductions(I);
+ MadeChange |= foldSelectShuffle(I);
}
MadeChange |= scalarizeBinopOrCmp(I);
MadeChange |= scalarizeLoadExtract(I);
diff --git a/llvm/lib/Transforms/Vectorize/Vectorize.cpp b/llvm/lib/Transforms/Vectorize/Vectorize.cpp
index 010ca28fc237..208e5eeea864 100644
--- a/llvm/lib/Transforms/Vectorize/Vectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/Vectorize.cpp
@@ -15,7 +15,6 @@
#include "llvm/Transforms/Vectorize.h"
#include "llvm-c/Initialization.h"
#include "llvm-c/Transforms/Vectorize.h"
-#include "llvm/Analysis/Passes.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/InitializePasses.h"
#include "llvm/PassRegistry.h"
diff --git a/llvm/lib/WindowsDriver/MSVCPaths.cpp b/llvm/lib/WindowsDriver/MSVCPaths.cpp
new file mode 100644
index 000000000000..0661ed7c6ae1
--- /dev/null
+++ b/llvm/lib/WindowsDriver/MSVCPaths.cpp
@@ -0,0 +1,719 @@
+//===-- MSVCPaths.cpp - MSVC path-parsing helpers -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/WindowsDriver/MSVCPaths.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/VersionTuple.h"
+#include "llvm/Support/VirtualFileSystem.h"
+#include <string>
+
+#ifdef _WIN32
+#include "llvm/Support/ConvertUTF.h"
+#endif
+
+#ifdef _WIN32
+#define WIN32_LEAN_AND_MEAN
+#define NOGDI
+#ifndef NOMINMAX
+#define NOMINMAX
+#endif
+#include <windows.h>
+#endif
+
+#ifdef _MSC_VER
+// Don't support SetupApi on MinGW.
+#define USE_MSVC_SETUP_API
+
+// Make sure this comes before MSVCSetupApi.h
+#include <comdef.h>
+
+#include "llvm/Support/COM.h"
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wnon-virtual-dtor"
+#endif
+#include "llvm/WindowsDriver/MSVCSetupApi.h"
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+_COM_SMARTPTR_TYPEDEF(ISetupConfiguration, __uuidof(ISetupConfiguration));
+_COM_SMARTPTR_TYPEDEF(ISetupConfiguration2, __uuidof(ISetupConfiguration2));
+_COM_SMARTPTR_TYPEDEF(ISetupHelper, __uuidof(ISetupHelper));
+_COM_SMARTPTR_TYPEDEF(IEnumSetupInstances, __uuidof(IEnumSetupInstances));
+_COM_SMARTPTR_TYPEDEF(ISetupInstance, __uuidof(ISetupInstance));
+_COM_SMARTPTR_TYPEDEF(ISetupInstance2, __uuidof(ISetupInstance2));
+#endif
+
+static std::string
+getHighestNumericTupleInDirectory(llvm::vfs::FileSystem &VFS,
+ llvm::StringRef Directory) {
+ std::string Highest;
+ llvm::VersionTuple HighestTuple;
+
+ std::error_code EC;
+ for (llvm::vfs::directory_iterator DirIt = VFS.dir_begin(Directory, EC),
+ DirEnd;
+ !EC && DirIt != DirEnd; DirIt.increment(EC)) {
+ auto Status = VFS.status(DirIt->path());
+ if (!Status || !Status->isDirectory())
+ continue;
+ llvm::StringRef CandidateName = llvm::sys::path::filename(DirIt->path());
+ llvm::VersionTuple Tuple;
+ if (Tuple.tryParse(CandidateName)) // tryParse() returns true on error.
+ continue;
+ if (Tuple > HighestTuple) {
+ HighestTuple = Tuple;
+ Highest = CandidateName.str();
+ }
+ }
+
+ return Highest;
+}
+
+static bool getWindows10SDKVersionFromPath(llvm::vfs::FileSystem &VFS,
+ const std::string &SDKPath,
+ std::string &SDKVersion) {
+ llvm::SmallString<128> IncludePath(SDKPath);
+ llvm::sys::path::append(IncludePath, "Include");
+ SDKVersion = getHighestNumericTupleInDirectory(VFS, IncludePath);
+ return !SDKVersion.empty();
+}
+
+static bool getWindowsSDKDirViaCommandLine(
+ llvm::vfs::FileSystem &VFS, llvm::Optional<llvm::StringRef> WinSdkDir,
+ llvm::Optional<llvm::StringRef> WinSdkVersion,
+ llvm::Optional<llvm::StringRef> WinSysRoot, std::string &Path, int &Major,
+ std::string &Version) {
+ if (WinSdkDir || WinSysRoot) {
+ // Don't validate the input; trust the value supplied by the user.
+ // The motivation is to prevent unnecessary file and registry access.
+ llvm::VersionTuple SDKVersion;
+ if (WinSdkVersion)
+ SDKVersion.tryParse(*WinSdkVersion);
+
+ if (WinSysRoot) {
+ llvm::SmallString<128> SDKPath(*WinSysRoot);
+ llvm::sys::path::append(SDKPath, "Windows Kits");
+ if (!SDKVersion.empty())
+ llvm::sys::path::append(SDKPath, llvm::Twine(SDKVersion.getMajor()));
+ else
+ llvm::sys::path::append(
+ SDKPath, getHighestNumericTupleInDirectory(VFS, SDKPath));
+ Path = std::string(SDKPath.str());
+ } else {
+ Path = WinSdkDir->str();
+ }
+
+ if (!SDKVersion.empty()) {
+ Major = SDKVersion.getMajor();
+ Version = SDKVersion.getAsString();
+ } else if (getWindows10SDKVersionFromPath(VFS, Path, Version)) {
+ Major = 10;
+ }
+ return true;
+ }
+ return false;
+}
+
+#ifdef _WIN32
+static bool readFullStringValue(HKEY hkey, const char *valueName,
+ std::string &value) {
+ std::wstring WideValueName;
+ if (!llvm::ConvertUTF8toWide(valueName, WideValueName))
+ return false;
+
+ DWORD result = 0;
+ DWORD valueSize = 0;
+ DWORD type = 0;
+ // First just query for the required size.
+ result = RegQueryValueExW(hkey, WideValueName.c_str(), NULL, &type, NULL,
+ &valueSize);
+ if (result != ERROR_SUCCESS || type != REG_SZ || !valueSize)
+ return false;
+ std::vector<BYTE> buffer(valueSize);
+ result = RegQueryValueExW(hkey, WideValueName.c_str(), NULL, NULL, &buffer[0],
+ &valueSize);
+ if (result == ERROR_SUCCESS) {
+ std::wstring WideValue(reinterpret_cast<const wchar_t *>(buffer.data()),
+ valueSize / sizeof(wchar_t));
+ if (valueSize && WideValue.back() == L'\0') {
+ WideValue.pop_back();
+ }
+ // The destination buffer must be empty as an invariant of the conversion
+ // function; but this function is sometimes called in a loop that passes in
+ // the same buffer, however. Simply clear it out so we can overwrite it.
+ value.clear();
+ return llvm::convertWideToUTF8(WideValue, value);
+ }
+ return false;
+}
+#endif
+
+/// Read registry string.
+/// This also supports a means to look for high-versioned keys by use
+/// of a $VERSION placeholder in the key path.
+/// $VERSION in the key path is a placeholder for the version number,
+/// causing the highest value path to be searched for and used.
+/// I.e. "SOFTWARE\\Microsoft\\VisualStudio\\$VERSION".
+/// There can be additional characters in the component. Only the numeric
+/// characters are compared. This function only searches HKLM.
+static bool getSystemRegistryString(const char *keyPath, const char *valueName,
+ std::string &value, std::string *phValue) {
+#ifndef _WIN32
+ return false;
+#else
+ HKEY hRootKey = HKEY_LOCAL_MACHINE;
+ HKEY hKey = NULL;
+ long lResult;
+ bool returnValue = false;
+
+ const char *placeHolder = strstr(keyPath, "$VERSION");
+ std::string bestName;
+ // If we have a $VERSION placeholder, do the highest-version search.
+ if (placeHolder) {
+ const char *keyEnd = placeHolder - 1;
+ const char *nextKey = placeHolder;
+ // Find end of previous key.
+ while ((keyEnd > keyPath) && (*keyEnd != '\\'))
+ keyEnd--;
+ // Find end of key containing $VERSION.
+ while (*nextKey && (*nextKey != '\\'))
+ nextKey++;
+ size_t partialKeyLength = keyEnd - keyPath;
+ char partialKey[256];
+ if (partialKeyLength >= sizeof(partialKey))
+ partialKeyLength = sizeof(partialKey) - 1;
+ strncpy(partialKey, keyPath, partialKeyLength);
+ partialKey[partialKeyLength] = '\0';
+ HKEY hTopKey = NULL;
+ lResult = RegOpenKeyExA(hRootKey, partialKey, 0, KEY_READ | KEY_WOW64_32KEY,
+ &hTopKey);
+ if (lResult == ERROR_SUCCESS) {
+ char keyName[256];
+ double bestValue = 0.0;
+ DWORD index, size = sizeof(keyName) - 1;
+ for (index = 0; RegEnumKeyExA(hTopKey, index, keyName, &size, NULL, NULL,
+ NULL, NULL) == ERROR_SUCCESS;
+ index++) {
+ const char *sp = keyName;
+ while (*sp && !llvm::isDigit(*sp))
+ sp++;
+ if (!*sp)
+ continue;
+ const char *ep = sp + 1;
+ while (*ep && (llvm::isDigit(*ep) || (*ep == '.')))
+ ep++;
+ char numBuf[32];
+ strncpy(numBuf, sp, sizeof(numBuf) - 1);
+ numBuf[sizeof(numBuf) - 1] = '\0';
+ double dvalue = strtod(numBuf, NULL);
+ if (dvalue > bestValue) {
+ // Test that InstallDir is indeed there before keeping this index.
+ // Open the chosen key path remainder.
+ bestName = keyName;
+ // Append rest of key.
+ bestName.append(nextKey);
+ lResult = RegOpenKeyExA(hTopKey, bestName.c_str(), 0,
+ KEY_READ | KEY_WOW64_32KEY, &hKey);
+ if (lResult == ERROR_SUCCESS) {
+ if (readFullStringValue(hKey, valueName, value)) {
+ bestValue = dvalue;
+ if (phValue)
+ *phValue = bestName;
+ returnValue = true;
+ }
+ RegCloseKey(hKey);
+ }
+ }
+ size = sizeof(keyName) - 1;
+ }
+ RegCloseKey(hTopKey);
+ }
+ } else {
+ lResult =
+ RegOpenKeyExA(hRootKey, keyPath, 0, KEY_READ | KEY_WOW64_32KEY, &hKey);
+ if (lResult == ERROR_SUCCESS) {
+ if (readFullStringValue(hKey, valueName, value))
+ returnValue = true;
+ if (phValue)
+ phValue->clear();
+ RegCloseKey(hKey);
+ }
+ }
+ return returnValue;
+#endif // _WIN32
+}
+
+namespace llvm {
+
+const char *archToWindowsSDKArch(Triple::ArchType Arch) {
+ switch (Arch) {
+ case Triple::ArchType::x86:
+ return "x86";
+ case Triple::ArchType::x86_64:
+ return "x64";
+ case Triple::ArchType::arm:
+ return "arm";
+ case Triple::ArchType::aarch64:
+ return "arm64";
+ default:
+ return "";
+ }
+}
+
+const char *archToLegacyVCArch(Triple::ArchType Arch) {
+ switch (Arch) {
+ case Triple::ArchType::x86:
+ // x86 is default in legacy VC toolchains.
+ // e.g. x86 libs are directly in /lib as opposed to /lib/x86.
+ return "";
+ case Triple::ArchType::x86_64:
+ return "amd64";
+ case Triple::ArchType::arm:
+ return "arm";
+ case Triple::ArchType::aarch64:
+ return "arm64";
+ default:
+ return "";
+ }
+}
+
+const char *archToDevDivInternalArch(Triple::ArchType Arch) {
+ switch (Arch) {
+ case Triple::ArchType::x86:
+ return "i386";
+ case Triple::ArchType::x86_64:
+ return "amd64";
+ case Triple::ArchType::arm:
+ return "arm";
+ case Triple::ArchType::aarch64:
+ return "arm64";
+ default:
+ return "";
+ }
+}
+
+bool appendArchToWindowsSDKLibPath(int SDKMajor, SmallString<128> LibPath,
+ Triple::ArchType Arch, std::string &path) {
+ if (SDKMajor >= 8) {
+ sys::path::append(LibPath, archToWindowsSDKArch(Arch));
+ } else {
+ switch (Arch) {
+ // In Windows SDK 7.x, x86 libraries are directly in the Lib folder.
+ case Triple::x86:
+ break;
+ case Triple::x86_64:
+ sys::path::append(LibPath, "x64");
+ break;
+ case Triple::arm:
+ // It is not necessary to link against Windows SDK 7.x when targeting ARM.
+ return false;
+ default:
+ return false;
+ }
+ }
+
+ path = std::string(LibPath.str());
+ return true;
+}
+
+std::string getSubDirectoryPath(SubDirectoryType Type, ToolsetLayout VSLayout,
+ const std::string &VCToolChainPath,
+ Triple::ArchType TargetArch,
+ StringRef SubdirParent) {
+ const char *SubdirName;
+ const char *IncludeName;
+ switch (VSLayout) {
+ case ToolsetLayout::OlderVS:
+ SubdirName = archToLegacyVCArch(TargetArch);
+ IncludeName = "include";
+ break;
+ case ToolsetLayout::VS2017OrNewer:
+ SubdirName = archToWindowsSDKArch(TargetArch);
+ IncludeName = "include";
+ break;
+ case ToolsetLayout::DevDivInternal:
+ SubdirName = archToDevDivInternalArch(TargetArch);
+ IncludeName = "inc";
+ break;
+ }
+
+ SmallString<256> Path(VCToolChainPath);
+ if (!SubdirParent.empty())
+ sys::path::append(Path, SubdirParent);
+
+ switch (Type) {
+ case SubDirectoryType::Bin:
+ if (VSLayout == ToolsetLayout::VS2017OrNewer) {
+ // MSVC ships with two linkers: a 32-bit x86 and 64-bit x86 linker.
+ // On x86, pick the linker that corresponds to the current process.
+ // On ARM64, pick the 32-bit x86 linker; the 64-bit one doesn't run
+ // on Windows 10.
+ //
+ // FIXME: Consider using IsWow64GuestMachineSupported to figure out
+ // if we can invoke the 64-bit linker. It's generally preferable
+ // because it won't run out of address-space.
+ const bool HostIsX64 =
+ Triple(sys::getProcessTriple()).getArch() == Triple::x86_64;
+ const char *const HostName = HostIsX64 ? "Hostx64" : "Hostx86";
+ sys::path::append(Path, "bin", HostName, SubdirName);
+ } else { // OlderVS or DevDivInternal
+ sys::path::append(Path, "bin", SubdirName);
+ }
+ break;
+ case SubDirectoryType::Include:
+ sys::path::append(Path, IncludeName);
+ break;
+ case SubDirectoryType::Lib:
+ sys::path::append(Path, "lib", SubdirName);
+ break;
+ }
+ return std::string(Path.str());
+}
+
+bool useUniversalCRT(ToolsetLayout VSLayout, const std::string &VCToolChainPath,
+ Triple::ArchType TargetArch, vfs::FileSystem &VFS) {
+ SmallString<128> TestPath(getSubDirectoryPath(
+ SubDirectoryType::Include, VSLayout, VCToolChainPath, TargetArch));
+ sys::path::append(TestPath, "stdlib.h");
+ return !VFS.exists(TestPath);
+}
+
+bool getWindowsSDKDir(vfs::FileSystem &VFS, Optional<StringRef> WinSdkDir,
+ Optional<StringRef> WinSdkVersion,
+ Optional<StringRef> WinSysRoot, std::string &Path,
+ int &Major, std::string &WindowsSDKIncludeVersion,
+ std::string &WindowsSDKLibVersion) {
+ // Trust /winsdkdir and /winsdkversion if present.
+ if (getWindowsSDKDirViaCommandLine(VFS, WinSdkDir, WinSdkVersion, WinSysRoot,
+ Path, Major, WindowsSDKIncludeVersion)) {
+ WindowsSDKLibVersion = WindowsSDKIncludeVersion;
+ return true;
+ }
+
+ // FIXME: Try env vars (%WindowsSdkDir%, %UCRTVersion%) before going to
+ // registry.
+
+ // Try the Windows registry.
+ std::string RegistrySDKVersion;
+ if (!getSystemRegistryString(
+ "SOFTWARE\\Microsoft\\Microsoft SDKs\\Windows\\$VERSION",
+ "InstallationFolder", Path, &RegistrySDKVersion))
+ return false;
+ if (Path.empty() || RegistrySDKVersion.empty())
+ return false;
+
+ WindowsSDKIncludeVersion.clear();
+ WindowsSDKLibVersion.clear();
+ Major = 0;
+ std::sscanf(RegistrySDKVersion.c_str(), "v%d.", &Major);
+ if (Major <= 7)
+ return true;
+ if (Major == 8) {
+ // Windows SDK 8.x installs libraries in a folder whose names depend on the
+ // version of the OS you're targeting. By default choose the newest, which
+ // usually corresponds to the version of the OS you've installed the SDK on.
+ const char *Tests[] = {"winv6.3", "win8", "win7"};
+ for (const char *Test : Tests) {
+ SmallString<128> TestPath(Path);
+ sys::path::append(TestPath, "Lib", Test);
+ if (VFS.exists(TestPath)) {
+ WindowsSDKLibVersion = Test;
+ break;
+ }
+ }
+ return !WindowsSDKLibVersion.empty();
+ }
+ if (Major == 10) {
+ if (!getWindows10SDKVersionFromPath(VFS, Path, WindowsSDKIncludeVersion))
+ return false;
+ WindowsSDKLibVersion = WindowsSDKIncludeVersion;
+ return true;
+ }
+ // Unsupported SDK version
+ return false;
+}
+
+bool getUniversalCRTSdkDir(vfs::FileSystem &VFS, Optional<StringRef> WinSdkDir,
+ Optional<StringRef> WinSdkVersion,
+ Optional<StringRef> WinSysRoot, std::string &Path,
+ std::string &UCRTVersion) {
+ // If /winsdkdir is passed, use it as location for the UCRT too.
+ // FIXME: Should there be a dedicated /ucrtdir to override /winsdkdir?
+ int Major;
+ if (getWindowsSDKDirViaCommandLine(VFS, WinSdkDir, WinSdkVersion, WinSysRoot,
+ Path, Major, UCRTVersion))
+ return true;
+
+ // FIXME: Try env vars (%UniversalCRTSdkDir%, %UCRTVersion%) before going to
+ // registry.
+
+ // vcvarsqueryregistry.bat for Visual Studio 2015 queries the registry
+ // for the specific key "KitsRoot10". So do we.
+ if (!getSystemRegistryString(
+ "SOFTWARE\\Microsoft\\Windows Kits\\Installed Roots", "KitsRoot10",
+ Path, nullptr))
+ return false;
+
+ return getWindows10SDKVersionFromPath(VFS, Path, UCRTVersion);
+}
+
+bool findVCToolChainViaCommandLine(vfs::FileSystem &VFS,
+ Optional<StringRef> VCToolsDir,
+ Optional<StringRef> VCToolsVersion,
+ Optional<StringRef> WinSysRoot,
+ std::string &Path, ToolsetLayout &VSLayout) {
+ // Don't validate the input; trust the value supplied by the user.
+ // The primary motivation is to prevent unnecessary file and registry access.
+ if (VCToolsDir || WinSysRoot) {
+ if (WinSysRoot) {
+ SmallString<128> ToolsPath(*WinSysRoot);
+ sys::path::append(ToolsPath, "VC", "Tools", "MSVC");
+ std::string ToolsVersion;
+ if (VCToolsVersion)
+ ToolsVersion = VCToolsVersion->str();
+ else
+ ToolsVersion = getHighestNumericTupleInDirectory(VFS, ToolsPath);
+ sys::path::append(ToolsPath, ToolsVersion);
+ Path = std::string(ToolsPath.str());
+ } else {
+ Path = VCToolsDir->str();
+ }
+ VSLayout = ToolsetLayout::VS2017OrNewer;
+ return true;
+ }
+ return false;
+}
+
+bool findVCToolChainViaEnvironment(vfs::FileSystem &VFS, std::string &Path,
+ ToolsetLayout &VSLayout) {
+ // These variables are typically set by vcvarsall.bat
+ // when launching a developer command prompt.
+ if (Optional<std::string> VCToolsInstallDir =
+ sys::Process::GetEnv("VCToolsInstallDir")) {
+ // This is only set by newer Visual Studios, and it leads straight to
+ // the toolchain directory.
+ Path = std::move(*VCToolsInstallDir);
+ VSLayout = ToolsetLayout::VS2017OrNewer;
+ return true;
+ }
+ if (Optional<std::string> VCInstallDir =
+ sys::Process::GetEnv("VCINSTALLDIR")) {
+ // If the previous variable isn't set but this one is, then we've found
+ // an older Visual Studio. This variable is set by newer Visual Studios too,
+ // so this check has to appear second.
+ // In older Visual Studios, the VC directory is the toolchain.
+ Path = std::move(*VCInstallDir);
+ VSLayout = ToolsetLayout::OlderVS;
+ return true;
+ }
+
+ // We couldn't find any VC environment variables. Let's walk through PATH and
+ // see if it leads us to a VC toolchain bin directory. If it does, pick the
+ // first one that we find.
+ if (Optional<std::string> PathEnv = sys::Process::GetEnv("PATH")) {
+ SmallVector<StringRef, 8> PathEntries;
+ StringRef(*PathEnv).split(PathEntries, sys::EnvPathSeparator);
+ for (StringRef PathEntry : PathEntries) {
+ if (PathEntry.empty())
+ continue;
+
+ SmallString<256> ExeTestPath;
+
+ // If cl.exe doesn't exist, then this definitely isn't a VC toolchain.
+ ExeTestPath = PathEntry;
+ sys::path::append(ExeTestPath, "cl.exe");
+ if (!VFS.exists(ExeTestPath))
+ continue;
+
+ // cl.exe existing isn't a conclusive test for a VC toolchain; clang also
+ // has a cl.exe. So let's check for link.exe too.
+ ExeTestPath = PathEntry;
+ sys::path::append(ExeTestPath, "link.exe");
+ if (!VFS.exists(ExeTestPath))
+ continue;
+
+ // whatever/VC/bin --> old toolchain, VC dir is toolchain dir.
+ StringRef TestPath = PathEntry;
+ bool IsBin = sys::path::filename(TestPath).equals_insensitive("bin");
+ if (!IsBin) {
+ // Strip any architecture subdir like "amd64".
+ TestPath = sys::path::parent_path(TestPath);
+ IsBin = sys::path::filename(TestPath).equals_insensitive("bin");
+ }
+ if (IsBin) {
+ StringRef ParentPath = sys::path::parent_path(TestPath);
+ StringRef ParentFilename = sys::path::filename(ParentPath);
+ if (ParentFilename.equals_insensitive("VC")) {
+ Path = std::string(ParentPath);
+ VSLayout = ToolsetLayout::OlderVS;
+ return true;
+ }
+ if (ParentFilename.equals_insensitive("x86ret") ||
+ ParentFilename.equals_insensitive("x86chk") ||
+ ParentFilename.equals_insensitive("amd64ret") ||
+ ParentFilename.equals_insensitive("amd64chk")) {
+ Path = std::string(ParentPath);
+ VSLayout = ToolsetLayout::DevDivInternal;
+ return true;
+ }
+
+ } else {
+ // This could be a new (>=VS2017) toolchain. If it is, we should find
+ // path components with these prefixes when walking backwards through
+ // the path.
+ // Note: empty strings match anything.
+ StringRef ExpectedPrefixes[] = {"", "Host", "bin", "",
+ "MSVC", "Tools", "VC"};
+
+ auto It = sys::path::rbegin(PathEntry);
+ auto End = sys::path::rend(PathEntry);
+ for (StringRef Prefix : ExpectedPrefixes) {
+ if (It == End)
+ goto NotAToolChain;
+ if (!It->startswith_insensitive(Prefix))
+ goto NotAToolChain;
+ ++It;
+ }
+
+ // We've found a new toolchain!
+ // Back up 3 times (/bin/Host/arch) to get the root path.
+ StringRef ToolChainPath(PathEntry);
+ for (int i = 0; i < 3; ++i)
+ ToolChainPath = sys::path::parent_path(ToolChainPath);
+
+ Path = std::string(ToolChainPath);
+ VSLayout = ToolsetLayout::VS2017OrNewer;
+ return true;
+ }
+
+ NotAToolChain:
+ continue;
+ }
+ }
+ return false;
+}
+
+bool findVCToolChainViaSetupConfig(vfs::FileSystem &VFS, std::string &Path,
+ ToolsetLayout &VSLayout) {
+#if !defined(USE_MSVC_SETUP_API)
+ return false;
+#else
+ // FIXME: This really should be done once in the top-level program's main
+ // function, as it may have already been initialized with a different
+ // threading model otherwise.
+ sys::InitializeCOMRAII COM(sys::COMThreadingMode::SingleThreaded);
+ HRESULT HR;
+
+ // _com_ptr_t will throw a _com_error if a COM calls fail.
+ // The LLVM coding standards forbid exception handling, so we'll have to
+ // stop them from being thrown in the first place.
+ // The destructor will put the regular error handler back when we leave
+ // this scope.
+ struct SuppressCOMErrorsRAII {
+ static void __stdcall handler(HRESULT hr, IErrorInfo *perrinfo) {}
+
+ SuppressCOMErrorsRAII() { _set_com_error_handler(handler); }
+
+ ~SuppressCOMErrorsRAII() { _set_com_error_handler(_com_raise_error); }
+
+ } COMErrorSuppressor;
+
+ ISetupConfigurationPtr Query;
+ HR = Query.CreateInstance(__uuidof(SetupConfiguration));
+ if (FAILED(HR))
+ return false;
+
+ IEnumSetupInstancesPtr EnumInstances;
+ HR = ISetupConfiguration2Ptr(Query)->EnumAllInstances(&EnumInstances);
+ if (FAILED(HR))
+ return false;
+
+ ISetupInstancePtr Instance;
+ HR = EnumInstances->Next(1, &Instance, nullptr);
+ if (HR != S_OK)
+ return false;
+
+ ISetupInstancePtr NewestInstance;
+ Optional<uint64_t> NewestVersionNum;
+ do {
+ bstr_t VersionString;
+ uint64_t VersionNum;
+ HR = Instance->GetInstallationVersion(VersionString.GetAddress());
+ if (FAILED(HR))
+ continue;
+ HR = ISetupHelperPtr(Query)->ParseVersion(VersionString, &VersionNum);
+ if (FAILED(HR))
+ continue;
+ if (!NewestVersionNum || (VersionNum > NewestVersionNum)) {
+ NewestInstance = Instance;
+ NewestVersionNum = VersionNum;
+ }
+ } while ((HR = EnumInstances->Next(1, &Instance, nullptr)) == S_OK);
+
+ if (!NewestInstance)
+ return false;
+
+ bstr_t VCPathWide;
+ HR = NewestInstance->ResolvePath(L"VC", VCPathWide.GetAddress());
+ if (FAILED(HR))
+ return false;
+
+ std::string VCRootPath;
+ convertWideToUTF8(std::wstring(VCPathWide), VCRootPath);
+
+ SmallString<256> ToolsVersionFilePath(VCRootPath);
+ sys::path::append(ToolsVersionFilePath, "Auxiliary", "Build",
+ "Microsoft.VCToolsVersion.default.txt");
+
+ auto ToolsVersionFile = MemoryBuffer::getFile(ToolsVersionFilePath);
+ if (!ToolsVersionFile)
+ return false;
+
+ SmallString<256> ToolchainPath(VCRootPath);
+ sys::path::append(ToolchainPath, "Tools", "MSVC",
+ ToolsVersionFile->get()->getBuffer().rtrim());
+ auto Status = VFS.status(ToolchainPath);
+ if (!Status || !Status->isDirectory())
+ return false;
+
+ Path = std::string(ToolchainPath.str());
+ VSLayout = ToolsetLayout::VS2017OrNewer;
+ return true;
+#endif
+}
+
+bool findVCToolChainViaRegistry(std::string &Path, ToolsetLayout &VSLayout) {
+ std::string VSInstallPath;
+ if (getSystemRegistryString(R"(SOFTWARE\Microsoft\VisualStudio\$VERSION)",
+ "InstallDir", VSInstallPath, nullptr) ||
+ getSystemRegistryString(R"(SOFTWARE\Microsoft\VCExpress\$VERSION)",
+ "InstallDir", VSInstallPath, nullptr)) {
+ if (!VSInstallPath.empty()) {
+ SmallString<256> VCPath(StringRef(VSInstallPath.c_str(),
+ VSInstallPath.find(R"(\Common7\IDE)")));
+ sys::path::append(VCPath, "VC");
+
+ Path = std::string(VCPath.str());
+ VSLayout = ToolsetLayout::OlderVS;
+ return true;
+ }
+ }
+ return false;
+}
+
+} // namespace llvm
diff --git a/llvm/lib/WindowsManifest/WindowsManifestMerger.cpp b/llvm/lib/WindowsManifest/WindowsManifestMerger.cpp
index 40c03f7b0de7..8f5c53faf91e 100644
--- a/llvm/lib/WindowsManifest/WindowsManifestMerger.cpp
+++ b/llvm/lib/WindowsManifest/WindowsManifestMerger.cpp
@@ -14,8 +14,6 @@
#include "llvm/Config/config.h"
#include "llvm/Support/MemoryBuffer.h"
-#include <map>
-
#if LLVM_ENABLE_LIBXML2
#include <libxml/xmlreader.h>
#endif
@@ -706,7 +704,7 @@ bool windows_manifest::isAvailable() { return false; }
WindowsManifestMerger::WindowsManifestMerger()
: Impl(std::make_unique<WindowsManifestMergerImpl>()) {}
-WindowsManifestMerger::~WindowsManifestMerger() {}
+WindowsManifestMerger::~WindowsManifestMerger() = default;
Error WindowsManifestMerger::merge(MemoryBufferRef Manifest) {
return Impl->merge(Manifest);
diff --git a/llvm/lib/XRay/FDRTraceWriter.cpp b/llvm/lib/XRay/FDRTraceWriter.cpp
index 71c09bd4fce4..2b80740ed436 100644
--- a/llvm/lib/XRay/FDRTraceWriter.cpp
+++ b/llvm/lib/XRay/FDRTraceWriter.cpp
@@ -74,7 +74,7 @@ FDRTraceWriter::FDRTraceWriter(raw_ostream &O, const XRayFileHeader &H)
OS.write(FreeFormBytes);
}
-FDRTraceWriter::~FDRTraceWriter() {}
+FDRTraceWriter::~FDRTraceWriter() = default;
Error FDRTraceWriter::visit(BufferExtents &R) {
return writeMetadata<7u>(OS, R.size());
diff --git a/llvm/tools/bugpoint/CrashDebugger.cpp b/llvm/tools/bugpoint/CrashDebugger.cpp
index d127ea0945f2..9912f59f0ba6 100644
--- a/llvm/tools/bugpoint/CrashDebugger.cpp
+++ b/llvm/tools/bugpoint/CrashDebugger.cpp
@@ -270,7 +270,7 @@ bool ReduceCrashingFunctions::TestFuncs(std::vector<Function *> &Funcs) {
// First, remove aliases to functions we're about to purge.
for (GlobalAlias &Alias : M->aliases()) {
GlobalObject *Root = Alias.getAliaseeObject();
- Function *F = dyn_cast_or_null<Function>(Root);
+ auto *F = dyn_cast<Function>(Root);
if (F) {
if (Functions.count(F))
// We're keeping this function.
@@ -278,7 +278,7 @@ bool ReduceCrashingFunctions::TestFuncs(std::vector<Function *> &Funcs) {
} else if (Root->isNullValue()) {
// This referenced a globalalias that we've already replaced,
// so we still need to replace this alias.
- } else if (!F) {
+ } else {
// Not a function, therefore not something we mess with.
continue;
}
diff --git a/llvm/tools/bugpoint/ExecutionDriver.cpp b/llvm/tools/bugpoint/ExecutionDriver.cpp
index f06f378962d9..2b06e8f3b365 100644
--- a/llvm/tools/bugpoint/ExecutionDriver.cpp
+++ b/llvm/tools/bugpoint/ExecutionDriver.cpp
@@ -105,7 +105,7 @@ namespace llvm {
// program being debugged.
cl::list<std::string> InputArgv("args", cl::Positional,
cl::desc("<program arguments>..."),
- cl::ZeroOrMore, cl::PositionalEatsArgs);
+ cl::PositionalEatsArgs);
cl::opt<std::string>
OutputPrefix("output-prefix", cl::init("bugpoint"),
@@ -114,19 +114,19 @@ cl::opt<std::string>
namespace {
cl::list<std::string> ToolArgv("tool-args", cl::Positional,
- cl::desc("<tool arguments>..."), cl::ZeroOrMore,
+ cl::desc("<tool arguments>..."),
cl::PositionalEatsArgs);
cl::list<std::string> SafeToolArgv("safe-tool-args", cl::Positional,
cl::desc("<safe-tool arguments>..."),
- cl::ZeroOrMore, cl::PositionalEatsArgs);
+ cl::PositionalEatsArgs);
cl::opt<std::string> CCBinary("gcc", cl::init(""),
cl::desc("The gcc binary to use."));
cl::list<std::string> CCToolArgv("gcc-tool-args", cl::Positional,
cl::desc("<gcc-tool arguments>..."),
- cl::ZeroOrMore, cl::PositionalEatsArgs);
+ cl::PositionalEatsArgs);
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/tools/bugpoint/OptimizerDriver.cpp b/llvm/tools/bugpoint/OptimizerDriver.cpp
index e67e877c13af..d425a8c5b49a 100644
--- a/llvm/tools/bugpoint/OptimizerDriver.cpp
+++ b/llvm/tools/bugpoint/OptimizerDriver.cpp
@@ -117,7 +117,7 @@ cl::opt<bool> SilencePasses(
static cl::list<std::string> OptArgs("opt-args", cl::Positional,
cl::desc("<opt arguments>..."),
- cl::ZeroOrMore, cl::PositionalEatsArgs);
+ cl::PositionalEatsArgs);
/// runPasses - Run the specified passes on Program, outputting a bitcode file
/// and writing the filename into OutputFile if successful. If the
diff --git a/llvm/tools/bugpoint/bugpoint.cpp b/llvm/tools/bugpoint/bugpoint.cpp
index 937ec23231b0..6e3f237d0a39 100644
--- a/llvm/tools/bugpoint/bugpoint.cpp
+++ b/llvm/tools/bugpoint/bugpoint.cpp
@@ -65,11 +65,7 @@ static cl::opt<bool>
// PassNameParser.
//
static cl::list<const PassInfo *, bool, PassNameParser>
- PassList(cl::desc("Passes available:"), cl::ZeroOrMore);
-
-static cl::opt<bool>
- StandardLinkOpts("std-link-opts",
- cl::desc("Include the standard link time optimizations"));
+ PassList(cl::desc("Passes available:"));
static cl::opt<bool>
OptLevelO1("O1", cl::desc("Optimization level 1. Identical to 'opt -O1'"));
@@ -203,12 +199,6 @@ int main(int argc, char **argv) {
AddToDriver PM(D);
- if (StandardLinkOpts) {
- PassManagerBuilder Builder;
- Builder.Inliner = createFunctionInliningPass();
- Builder.populateLTOPassManager(PM);
- }
-
if (OptLevelO1)
AddOptimizationPasses(PM, 1, 0);
else if (OptLevelO2)
diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp
index c07f4e66486c..8d82d78b15b5 100644
--- a/llvm/tools/llc/llc.cpp
+++ b/llvm/tools/llc/llc.cpp
@@ -36,6 +36,7 @@
#include "llvm/IR/Verifier.h"
#include "llvm/IRReader/IRReader.h"
#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCTargetOptionsCommandFlags.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Pass.h"
@@ -117,12 +118,10 @@ static cl::opt<bool>
// Determine optimization level.
static cl::opt<char>
-OptLevel("O",
- cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] "
- "(default = '-O2')"),
- cl::Prefix,
- cl::ZeroOrMore,
- cl::init(' '));
+ OptLevel("O",
+ cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] "
+ "(default = '-O2')"),
+ cl::Prefix, cl::init(' '));
static cl::opt<std::string>
TargetTriple("mtriple", cl::desc("Override target triple for module"));
@@ -212,7 +211,7 @@ static RunPassOption RunPassOpt;
static cl::opt<RunPassOption, true, cl::parser<std::string>> RunPass(
"run-pass",
cl::desc("Run compiler only for specified passes (comma separated list)"),
- cl::value_desc("pass-name"), cl::ZeroOrMore, cl::location(RunPassOpt));
+ cl::value_desc("pass-name"), cl::location(RunPassOpt));
static int compileModule(char **, LLVMContext &);
@@ -369,6 +368,7 @@ int main(int argc, char **argv) {
initializeHardwareLoopsPass(*Registry);
initializeTransformUtils(*Registry);
initializeReplaceWithVeclibLegacyPass(*Registry);
+ initializeTLSVariableHoistLegacyPassPass(*Registry);
// Initialize debugging passes.
initializeScavengerTestPass(*Registry);
@@ -501,14 +501,26 @@ static int compileModule(char **argv, LLVMContext &Context) {
TargetMachine::parseBinutilsVersion(BinutilsVersion);
Options.DisableIntegratedAS = NoIntegratedAssembler;
Options.MCOptions.ShowMCEncoding = ShowMCEncoding;
- Options.MCOptions.MCUseDwarfDirectory = DwarfDirectory;
Options.MCOptions.AsmVerbose = AsmVerbose;
Options.MCOptions.PreserveAsmComments = PreserveComments;
Options.MCOptions.IASSearchPaths = IncludeDirs;
Options.MCOptions.SplitDwarfFile = SplitDwarfFile;
+ if (DwarfDirectory.getPosition()) {
+ Options.MCOptions.MCUseDwarfDirectory =
+ DwarfDirectory ? MCTargetOptions::EnableDwarfDirectory
+ : MCTargetOptions::DisableDwarfDirectory;
+ } else {
+ // -dwarf-directory is not set explicitly. Some assemblers
+ // (e.g. GNU as or ptxas) do not support `.file directory'
+ // syntax prior to DWARFv5. Let the target decide the default
+ // value.
+ Options.MCOptions.MCUseDwarfDirectory =
+ MCTargetOptions::DefaultDwarfDirectory;
+ }
};
Optional<Reloc::Model> RM = codegen::getExplicitRelocModel();
+ Optional<CodeModel::Model> CM = codegen::getExplicitCodeModel();
const Target *TheTarget = nullptr;
std::unique_ptr<TargetMachine> Target;
@@ -535,14 +547,13 @@ static int compileModule(char **argv, LLVMContext &Context) {
// On AIX, setting the relocation model to anything other than PIC is
// considered a user error.
- if (TheTriple.isOSAIX() && RM.hasValue() && *RM != Reloc::PIC_)
+ if (TheTriple.isOSAIX() && RM && *RM != Reloc::PIC_)
reportError("invalid relocation model, AIX only supports PIC",
InputFilename);
InitializeOptions(TheTriple);
Target = std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
- TheTriple.getTriple(), CPUStr, FeaturesStr, Options, RM,
- codegen::getExplicitCodeModel(), OLvl));
+ TheTriple.getTriple(), CPUStr, FeaturesStr, Options, RM, CM, OLvl));
assert(Target && "Could not allocate target machine!");
return Target->createDataLayout().getStringRepresentation();
@@ -562,6 +573,10 @@ static int compileModule(char **argv, LLVMContext &Context) {
}
if (!TargetTriple.empty())
M->setTargetTriple(Triple::normalize(TargetTriple));
+
+ Optional<CodeModel::Model> CM_IR = M->getCodeModel();
+ if (!CM && CM_IR)
+ Target->setCodeModel(CM_IR.getValue());
} else {
TheTriple = Triple(Triple::normalize(TargetTriple));
if (TheTriple.getTriple().empty())
@@ -578,7 +593,7 @@ static int compileModule(char **argv, LLVMContext &Context) {
// On AIX, setting the relocation model to anything other than PIC is
// considered a user error.
- if (TheTriple.isOSAIX() && RM.hasValue() && *RM != Reloc::PIC_) {
+ if (TheTriple.isOSAIX() && RM && *RM != Reloc::PIC_) {
WithColor::error(errs(), argv[0])
<< "invalid relocation model, AIX only supports PIC.\n";
return 1;
@@ -586,8 +601,7 @@ static int compileModule(char **argv, LLVMContext &Context) {
InitializeOptions(TheTriple);
Target = std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
- TheTriple.getTriple(), CPUStr, FeaturesStr, Options, RM,
- codegen::getExplicitCodeModel(), OLvl));
+ TheTriple.getTriple(), CPUStr, FeaturesStr, Options, RM, CM, OLvl));
assert(Target && "Could not allocate target machine!");
// If we don't have a module then just exit now. We do this down
diff --git a/llvm/tools/lli/lli.cpp b/llvm/tools/lli/lli.cpp
index d20daa07196b..f2e3886bdf07 100644
--- a/llvm/tools/lli/lli.cpp
+++ b/llvm/tools/lli/lli.cpp
@@ -28,12 +28,15 @@
#include "llvm/ExecutionEngine/ObjectCache.h"
#include "llvm/ExecutionEngine/Orc/DebugObjectManagerPlugin.h"
#include "llvm/ExecutionEngine/Orc/DebugUtils.h"
+#include "llvm/ExecutionEngine/Orc/ELFNixPlatform.h"
#include "llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h"
+#include "llvm/ExecutionEngine/Orc/EPCDynamicLibrarySearchGenerator.h"
#include "llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h"
#include "llvm/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.h"
#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
#include "llvm/ExecutionEngine/Orc/LLJIT.h"
+#include "llvm/ExecutionEngine/Orc/MachOPlatform.h"
#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
#include "llvm/ExecutionEngine/Orc/SimpleRemoteEPC.h"
#include "llvm/ExecutionEngine/Orc/SymbolStringPool.h"
@@ -120,6 +123,9 @@ namespace {
"RuntimeDyld"),
clEnumValN(JITLinkerKind::JITLink, "jitlink",
"Orc-specific linker")));
+ cl::opt<std::string> OrcRuntime("orc-runtime",
+ cl::desc("Use ORC runtime from given path"),
+ cl::init(""));
cl::opt<unsigned>
LazyJITCompileThreads("compile-threads",
@@ -144,8 +150,7 @@ namespace {
"-extra-module arguments."));
cl::list<std::string>
- Dylibs("dlopen", cl::desc("Dynamic libraries to load before linking"),
- cl::ZeroOrMore);
+ Dylibs("dlopen", cl::desc("Dynamic libraries to load before linking"));
// The MCJIT supports building for a target address space separate from
// the JIT compilation process. Use a forked process and a copying
@@ -166,13 +171,10 @@ namespace {
cl::value_desc("filename"), cl::init(""));
// Determine optimization level.
- cl::opt<char>
- OptLevel("O",
- cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] "
- "(default = '-O2')"),
- cl::Prefix,
- cl::ZeroOrMore,
- cl::init(' '));
+ cl::opt<char> OptLevel("O",
+ cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] "
+ "(default = '-O2')"),
+ cl::Prefix, cl::init(' '));
cl::opt<std::string>
TargetTriple("mtriple", cl::desc("Override target triple for module"));
@@ -234,13 +236,15 @@ namespace {
cl::desc("Do not resolve lli process symbols in JIT'd code"),
cl::init(false));
- enum class LLJITPlatform { Inactive, DetectHost, GenericIR };
+ enum class LLJITPlatform { Inactive, DetectHost, ORC, GenericIR };
cl::opt<LLJITPlatform>
Platform("lljit-platform", cl::desc("Platform to use with LLJIT"),
cl::init(LLJITPlatform::DetectHost),
cl::values(clEnumValN(LLJITPlatform::DetectHost, "DetectHost",
"Select based on JIT target triple"),
+ clEnumValN(LLJITPlatform::ORC, "ORC",
+ "Use ORCPlatform with the ORC runtime"),
clEnumValN(LLJITPlatform::GenericIR, "GenericIR",
"Use LLJITGenericIRPlatform"),
clEnumValN(LLJITPlatform::Inactive, "Inactive",
@@ -369,6 +373,53 @@ private:
}
};
+class ORCPlatformSupport : public orc::LLJIT::PlatformSupport {
+public:
+ ORCPlatformSupport(orc::LLJIT &J) : J(J) {}
+
+ Error initialize(orc::JITDylib &JD) override {
+ using llvm::orc::shared::SPSExecutorAddr;
+ using llvm::orc::shared::SPSString;
+ using SPSDLOpenSig = SPSExecutorAddr(SPSString, int32_t);
+ enum dlopen_mode : int32_t {
+ ORC_RT_RTLD_LAZY = 0x1,
+ ORC_RT_RTLD_NOW = 0x2,
+ ORC_RT_RTLD_LOCAL = 0x4,
+ ORC_RT_RTLD_GLOBAL = 0x8
+ };
+
+ if (auto WrapperAddr = J.lookup("__orc_rt_jit_dlopen_wrapper")) {
+ return J.getExecutionSession().callSPSWrapper<SPSDLOpenSig>(
+ *WrapperAddr, DSOHandles[&JD], JD.getName(),
+ int32_t(ORC_RT_RTLD_LAZY));
+ } else
+ return WrapperAddr.takeError();
+ }
+
+ Error deinitialize(orc::JITDylib &JD) override {
+ using llvm::orc::shared::SPSExecutorAddr;
+ using SPSDLCloseSig = int32_t(SPSExecutorAddr);
+
+ if (auto WrapperAddr = J.lookup("__orc_rt_jit_dlclose_wrapper")) {
+ int32_t result;
+ auto E = J.getExecutionSession().callSPSWrapper<SPSDLCloseSig>(
+ *WrapperAddr, result, DSOHandles[&JD]);
+ if (E)
+ return E;
+ else if (result)
+ return make_error<StringError>("dlclose failed",
+ inconvertibleErrorCode());
+ DSOHandles.erase(&JD);
+ } else
+ return WrapperAddr.takeError();
+ return Error::success();
+ }
+
+private:
+ orc::LLJIT &J;
+ DenseMap<orc::JITDylib *, orc::ExecutorAddr> DSOHandles;
+};
+
// On Mingw and Cygwin, an external symbol named '__main' is called from the
// generated 'main' function to allow static initialization. To avoid linking
// problems with remote targets (because lli's remote target support does not
@@ -881,7 +932,7 @@ int runOrcJIT(const char *ProgName) {
}
Builder.setLazyCompileFailureAddr(
- pointerToJITTargetAddress(exitOnLazyCallThroughFailure));
+ orc::ExecutorAddr::fromPtr(exitOnLazyCallThroughFailure));
Builder.setNumCompileThreads(LazyJITCompileThreads);
// If the object cache is enabled then set a custom compile function
@@ -908,21 +959,29 @@ int runOrcJIT(const char *ProgName) {
}
// Set up LLJIT platform.
- {
- LLJITPlatform P = Platform;
- if (P == LLJITPlatform::DetectHost)
+ LLJITPlatform P = Platform;
+ if (P == LLJITPlatform::DetectHost) {
+ if (JITLinker == JITLinkerKind::JITLink && !OrcRuntime.empty() &&
+ (TT->isOSBinFormatMachO() || TT->isOSBinFormatELF()))
+ P = LLJITPlatform::ORC;
+ else
P = LLJITPlatform::GenericIR;
-
- switch (P) {
- case LLJITPlatform::GenericIR:
- // Nothing to do: LLJITBuilder will use this by default.
- break;
- case LLJITPlatform::Inactive:
- Builder.setPlatformSetUp(orc::setUpInactivePlatform);
- break;
- default:
- llvm_unreachable("Unrecognized platform value");
- }
+ }
+ switch (P) {
+ case LLJITPlatform::ORC:
+ Builder.setPlatformSetUp([](llvm::orc::LLJIT &J) -> llvm::Error {
+ J.setPlatformSupport(std::make_unique<ORCPlatformSupport>(J));
+ return Error::success();
+ });
+ break;
+ case LLJITPlatform::GenericIR:
+ // Nothing to do: LLJITBuilder will use this by default.
+ break;
+ case LLJITPlatform::Inactive:
+ Builder.setPlatformSetUp(orc::setUpInactivePlatform);
+ break;
+ default:
+ llvm_unreachable("Unrecognized platform value");
}
std::unique_ptr<orc::ExecutorProcessControl> EPC = nullptr;
@@ -930,13 +989,15 @@ int runOrcJIT(const char *ProgName) {
EPC = ExitOnErr(orc::SelfExecutorProcessControl::Create(
std::make_shared<orc::SymbolStringPool>()));
- Builder.setObjectLinkingLayerCreator([&EPC](orc::ExecutionSession &ES,
- const Triple &) {
+ Builder.setObjectLinkingLayerCreator([&EPC, &P](orc::ExecutionSession &ES,
+ const Triple &TT) {
auto L = std::make_unique<orc::ObjectLinkingLayer>(ES, EPC->getMemMgr());
- L->addPlugin(std::make_unique<orc::EHFrameRegistrationPlugin>(
- ES, ExitOnErr(orc::EPCEHFrameRegistrar::Create(ES))));
- L->addPlugin(std::make_unique<orc::DebugObjectManagerPlugin>(
- ES, ExitOnErr(orc::createJITLoaderGDBRegistrar(ES))));
+ if (P != LLJITPlatform::ORC) {
+ L->addPlugin(std::make_unique<orc::EHFrameRegistrationPlugin>(
+ ES, ExitOnErr(orc::EPCEHFrameRegistrar::Create(ES))));
+ L->addPlugin(std::make_unique<orc::DebugObjectManagerPlugin>(
+ ES, ExitOnErr(orc::createJITLoaderGDBRegistrar(ES))));
+ }
return L;
});
}
@@ -983,6 +1044,31 @@ int runOrcJIT(const char *ProgName) {
std::make_unique<LLIBuiltinFunctionGenerator>(GenerateBuiltinFunctions,
Mangle));
+ if (P == LLJITPlatform::ORC) {
+ if (auto *OLL = llvm::dyn_cast<llvm::orc::ObjectLinkingLayer>(ObjLayer)) {
+ auto &ES = J->getExecutionSession();
+ if (TT->isOSBinFormatMachO()) {
+ if (auto P = llvm::orc::MachOPlatform::Create(
+ ES, *OLL, J->getMainJITDylib(), OrcRuntime.c_str()))
+ ES.setPlatform(std::move(*P));
+ else
+ ExitOnErr(P.takeError());
+ } else if (TT->isOSBinFormatELF()) {
+ if (auto P = llvm::orc::ELFNixPlatform::Create(
+ ES, *OLL, J->getMainJITDylib(), OrcRuntime.c_str()))
+ ES.setPlatform(std::move(*P));
+ else
+ ExitOnErr(P.takeError());
+ } else {
+ errs() << "No ORC platform support\n";
+ exit(1);
+ }
+ } else {
+ errs() << "ORC platform requires JITLink\n";
+ exit(1);
+ }
+ }
+
// Regular modules are greedy: They materialize as a whole and trigger
// materialization for all required symbols recursively. Lazy modules go
// through partitioning and they replace outgoing calls with reexport stubs
@@ -1049,23 +1135,21 @@ int runOrcJIT(const char *ProgName) {
for (auto &ThreadEntryPoint : ThreadEntryPoints) {
auto EntryPointSym = ExitOnErr(J->lookup(ThreadEntryPoint));
typedef void (*EntryPointPtr)();
- auto EntryPoint =
- reinterpret_cast<EntryPointPtr>(static_cast<uintptr_t>(EntryPointSym.getAddress()));
+ auto EntryPoint = EntryPointSym.toPtr<EntryPointPtr>();
AltEntryThreads.push_back(std::thread([EntryPoint]() { EntryPoint(); }));
}
// Resolve and run the main function.
- JITEvaluatedSymbol MainSym = ExitOnErr(J->lookup(EntryFunc));
+ auto MainAddr = ExitOnErr(J->lookup(EntryFunc));
int Result;
if (EPC) {
// ExecutorProcessControl-based execution with JITLink.
- Result = ExitOnErr(
- EPC->runAsMain(orc::ExecutorAddr(MainSym.getAddress()), InputArgv));
+ Result = ExitOnErr(EPC->runAsMain(MainAddr, InputArgv));
} else {
// Manual in-process execution with RuntimeDyld.
using MainFnTy = int(int, char *[]);
- auto MainFn = jitTargetAddressToFunction<MainFnTy *>(MainSym.getAddress());
+ auto MainFn = MainAddr.toPtr<MainFnTy *>();
Result = orc::runAsMain(MainFn, InputArgv, StringRef(InputFile));
}
diff --git a/llvm/tools/llvm-ar/llvm-ar.cpp b/llvm/tools/llvm-ar/llvm-ar.cpp
index 8842162f5216..e964dc8256a5 100644
--- a/llvm/tools/llvm-ar/llvm-ar.cpp
+++ b/llvm/tools/llvm-ar/llvm-ar.cpp
@@ -22,6 +22,7 @@
#include "llvm/Object/MachO.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Object/SymbolicFile.h"
+#include "llvm/Object/XCOFFObjectFile.h"
#include "llvm/Support/Chrono.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ConvertUTF.h"
@@ -61,32 +62,30 @@ static StringRef ToolName;
// The basename of this program.
static StringRef Stem;
-const char RanlibHelp[] = R"(OVERVIEW: LLVM Ranlib (llvm-ranlib)
-
- This program generates an index to speed access to archives
-
-USAGE: llvm-ranlib <archive-file>
-
-OPTIONS:
- -h --help - Display available options
- -v --version - Display the version of this program
- -D - Use zero for timestamps and uids/gids (default)
- -U - Use actual timestamps and uids/gids
-)";
-
-const char ArHelp[] = R"(OVERVIEW: LLVM Archiver
-
-USAGE: llvm-ar [options] [-]<operation>[modifiers] [relpos] [count] <archive> [files]
- llvm-ar -M [<mri-script]
+static void printRanLibHelp(StringRef ToolName) {
+ outs() << "OVERVIEW: LLVM Ranlib\n\n"
+ << "This program generates an index to speed access to archives\n\n"
+ << "USAGE: " + ToolName + " <archive-file>\n\n"
+ << "OPTIONS:\n"
+ << " -h --help - Display available options\n"
+ << " -v --version - Display the version of this program\n"
+ << " -D - Use zero for timestamps and uids/gids "
+ "(default)\n"
+ << " -U - Use actual timestamps and uids/gids\n";
+}
-OPTIONS:
+static void printArHelp(StringRef ToolName) {
+ const char ArOptions[] =
+ R"(OPTIONS:
--format - archive format to create
=default - default
=gnu - gnu
=darwin - darwin
=bsd - bsd
+ =aix - aix (big archive)
--plugin=<string> - ignored for compatibility
-h --help - display this help and exit
+ --output - the directory to extract archive members to
--rsp-quoting - quoting style for response files
=posix - posix
=windows - windows
@@ -126,11 +125,20 @@ MODIFIERS:
[V] - display the version and exit
)";
+ outs() << "OVERVIEW: LLVM Archiver\n\n"
+ << "USAGE: " + ToolName +
+ " [options] [-]<operation>[modifiers] [relpos] "
+ "[count] <archive> [files]\n"
+ << " " + ToolName + " -M [<mri-script]\n\n";
+
+ outs() << ArOptions;
+}
+
static void printHelpMessage() {
if (Stem.contains_insensitive("ranlib"))
- outs() << RanlibHelp;
+ printRanLibHelp(Stem);
else if (Stem.contains_insensitive("ar"))
- outs() << ArHelp;
+ printArHelp(Stem);
}
static unsigned MRILineNumber;
@@ -181,7 +189,7 @@ static SmallVector<const char *, 256> PositionalArgs;
static bool MRI;
namespace {
-enum Format { Default, GNU, BSD, DARWIN, Unknown };
+enum Format { Default, GNU, BSD, DARWIN, BIGARCHIVE, Unknown };
}
static Format FormatType = Default;
@@ -230,6 +238,9 @@ static int CountParam = 0;
// command line.
static std::string ArchiveName;
+// Output directory specified by --output.
+static std::string OutputDir;
+
static std::vector<std::unique_ptr<MemoryBuffer>> ArchiveBuffers;
static std::vector<std::unique_ptr<object::Archive>> Archives;
@@ -447,6 +458,19 @@ static ArchiveOperation parseCommandLine() {
if (AddLibrary && Operation != QuickAppend)
badUsage("the 'L' modifier is only applicable to the 'q' operation");
+ if (!OutputDir.empty()) {
+ if (Operation != Extract)
+ badUsage("--output is only applicable to the 'x' operation");
+ bool IsDir = false;
+ // If OutputDir is not a directory, create_directories may still succeed if
+ // all components of the path prefix are directories. Test is_directory as
+ // well.
+ if (!sys::fs::create_directories(OutputDir))
+ sys::fs::is_directory(OutputDir, IsDir);
+ if (!IsDir)
+ fail("'" + OutputDir + "' is not a directory");
+ }
+
// Return the parsed operation to the caller
return Operation;
}
@@ -547,7 +571,15 @@ static void doExtract(StringRef Name, const object::Archive::Child &C) {
failIfError(ModeOrErr.takeError());
sys::fs::perms Mode = ModeOrErr.get();
- llvm::StringRef outputFilePath = sys::path::filename(Name);
+ StringRef outputFilePath;
+ SmallString<128> path;
+ if (OutputDir.empty()) {
+ outputFilePath = sys::path::filename(Name);
+ } else {
+ sys::path::append(path, OutputDir, sys::path::filename(Name));
+ outputFilePath = path.str();
+ }
+
if (Verbose)
outs() << "x - " << outputFilePath << '\n';
@@ -652,8 +684,6 @@ static void performReadOperation(ArchiveOperation Operation,
static void addChildMember(std::vector<NewArchiveMember> &Members,
const object::Archive::Child &M,
bool FlattenArchive = false) {
- if (Thin && !M.getParent()->isThin())
- fail("cannot convert a regular archive to a thin one");
Expected<NewArchiveMember> NMOrErr =
NewArchiveMember::getOldMember(M, Deterministic);
failIfError(NMOrErr.takeError());
@@ -875,48 +905,18 @@ computeNewArchiveMembers(ArchiveOperation Operation,
return Ret;
}
-static object::Archive::Kind getDefaultForHost() {
- return Triple(sys::getProcessTriple()).isOSDarwin()
- ? object::Archive::K_DARWIN
- : object::Archive::K_GNU;
-}
-
-static object::Archive::Kind getKindFromMember(const NewArchiveMember &Member) {
- auto MemBufferRef = Member.Buf->getMemBufferRef();
- Expected<std::unique_ptr<object::ObjectFile>> OptionalObject =
- object::ObjectFile::createObjectFile(MemBufferRef);
-
- if (OptionalObject)
- return isa<object::MachOObjectFile>(**OptionalObject)
- ? object::Archive::K_DARWIN
- : object::Archive::K_GNU;
-
- // squelch the error in case we had a non-object file
- consumeError(OptionalObject.takeError());
-
- // If we're adding a bitcode file to the archive, detect the Archive kind
- // based on the target triple.
- LLVMContext Context;
- if (identify_magic(MemBufferRef.getBuffer()) == file_magic::bitcode) {
- if (auto ObjOrErr = object::SymbolicFile::createSymbolicFile(
- MemBufferRef, file_magic::bitcode, &Context)) {
- auto &IRObject = cast<object::IRObjectFile>(**ObjOrErr);
- return Triple(IRObject.getTargetTriple()).isOSDarwin()
- ? object::Archive::K_DARWIN
- : object::Archive::K_GNU;
- } else {
- // Squelch the error in case this was not a SymbolicFile.
- consumeError(ObjOrErr.takeError());
- }
- }
-
- return getDefaultForHost();
-}
-
static void performWriteOperation(ArchiveOperation Operation,
object::Archive *OldArchive,
std::unique_ptr<MemoryBuffer> OldArchiveBuf,
std::vector<NewArchiveMember> *NewMembersP) {
+ if (OldArchive) {
+ if (Thin && !OldArchive->isThin())
+ fail("cannot convert a regular archive to a thin one");
+
+ if (OldArchive->isThin())
+ Thin = true;
+ }
+
std::vector<NewArchiveMember> NewMembers;
if (!NewMembersP)
NewMembers = computeNewArchiveMembers(Operation, OldArchive);
@@ -926,14 +926,23 @@ static void performWriteOperation(ArchiveOperation Operation,
case Default:
if (Thin)
Kind = object::Archive::K_GNU;
- else if (OldArchive)
+ else if (OldArchive) {
Kind = OldArchive->kind();
- else if (NewMembersP)
- Kind = !NewMembersP->empty() ? getKindFromMember(NewMembersP->front())
- : getDefaultForHost();
+ if (Kind == object::Archive::K_BSD) {
+ auto InferredKind = object::Archive::K_BSD;
+ if (NewMembersP && !NewMembersP->empty())
+ InferredKind = NewMembersP->front().detectKindFromObject();
+ else if (!NewMembers.empty())
+ InferredKind = NewMembers.front().detectKindFromObject();
+ if (InferredKind == object::Archive::K_DARWIN)
+ Kind = object::Archive::K_DARWIN;
+ }
+ } else if (NewMembersP)
+ Kind = !NewMembersP->empty() ? NewMembersP->front().detectKindFromObject()
+ : object::Archive::getDefaultKindForHost();
else
- Kind = !NewMembers.empty() ? getKindFromMember(NewMembers.front())
- : getDefaultForHost();
+ Kind = !NewMembers.empty() ? NewMembers.front().detectKindFromObject()
+ : object::Archive::getDefaultKindForHost();
break;
case GNU:
Kind = object::Archive::K_GNU;
@@ -948,6 +957,11 @@ static void performWriteOperation(ArchiveOperation Operation,
fail("only the gnu format has a thin mode");
Kind = object::Archive::K_DARWIN;
break;
+ case BIGARCHIVE:
+ if (Thin)
+ fail("only the gnu format has a thin mode");
+ Kind = object::Archive::K_AIXBIG;
+ break;
case Unknown:
llvm_unreachable("");
}
@@ -1073,8 +1087,12 @@ static void runMRIScript() {
switch (Command) {
case MRICommand::AddLib: {
+ if (!Create)
+ fail("no output archive has been opened");
object::Archive &Lib = readLibrary(Rest);
{
+ if (Thin && !Lib.isThin())
+ fail("cannot add a regular archive's contents to a thin archive");
Error Err = Error::success();
for (auto &Member : Lib.children(Err))
addChildMember(NewMembers, Member, /*FlattenArchive=*/Thin);
@@ -1083,6 +1101,8 @@ static void runMRIScript() {
break;
}
case MRICommand::AddMod:
+ if (!Create)
+ fail("no output archive has been opened");
addMember(NewMembers, Rest);
break;
case MRICommand::CreateThin:
@@ -1095,6 +1115,8 @@ static void runMRIScript() {
if (Saved)
fail("file already saved");
ArchiveName = std::string(Rest);
+ if (ArchiveName.empty())
+ fail("missing archive name");
break;
case MRICommand::Delete: {
llvm::erase_if(NewMembers, [=](NewArchiveMember &M) {
@@ -1116,7 +1138,8 @@ static void runMRIScript() {
// Nothing to do if not saved.
if (Saved)
- performOperation(ReplaceOrInsert, &NewMembers);
+ performOperation(ReplaceOrInsert, /*OldArchive=*/nullptr,
+ /*OldArchiveBuf=*/nullptr, &NewMembers);
exit(0);
}
@@ -1219,12 +1242,18 @@ static int ar_main(int argc, char **argv) {
.Case("gnu", GNU)
.Case("darwin", DARWIN)
.Case("bsd", BSD)
+ .Case("bigarchive", BIGARCHIVE)
.Default(Unknown);
if (FormatType == Unknown)
fail(std::string("Invalid format ") + Match);
continue;
}
+ if ((Match = matchFlagWithArg("output", ArgIt, Argv))) {
+ OutputDir = Match;
+ continue;
+ }
+
if (matchFlagWithArg("plugin", ArgIt, Argv) ||
matchFlagWithArg("rsp-quoting", ArgIt, Argv))
continue;
@@ -1274,7 +1303,7 @@ static int ranlib_main(int argc, char **argv) {
return performOperation(CreateSymTab, nullptr);
}
-int main(int argc, char **argv) {
+int llvm_ar_main(int argc, char **argv) {
InitLLVM X(argc, argv);
ToolName = argv[0];
diff --git a/llvm/tools/llvm-cov/CodeCoverage.cpp b/llvm/tools/llvm-cov/CodeCoverage.cpp
index ef801287c1be..6932e9b5bd31 100644
--- a/llvm/tools/llvm-cov/CodeCoverage.cpp
+++ b/llvm/tools/llvm-cov/CodeCoverage.cpp
@@ -265,8 +265,7 @@ bool CodeCoverageTool::isEquivalentFile(StringRef FilePath1,
StringRef FilePath2) {
auto Status1 = getFileStatus(FilePath1);
auto Status2 = getFileStatus(FilePath2);
- return Status1.hasValue() && Status2.hasValue() &&
- sys::fs::equivalent(Status1.getValue(), Status2.getValue());
+ return Status1 && Status2 && sys::fs::equivalent(*Status1, *Status2);
}
ErrorOr<const MemoryBuffer &>
@@ -621,14 +620,14 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) {
cl::Positional, cl::desc("Covered executable or object file."));
cl::list<std::string> CovFilenames(
- "object", cl::desc("Coverage executable or object file"), cl::ZeroOrMore);
+ "object", cl::desc("Coverage executable or object file"));
cl::opt<bool> DebugDumpCollectedObjects(
"dump-collected-objects", cl::Optional, cl::Hidden,
cl::desc("Show the collected coverage object files"));
- cl::list<std::string> InputSourceFiles(
- cl::Positional, cl::desc("<Source files>"), cl::ZeroOrMore);
+ cl::list<std::string> InputSourceFiles(cl::Positional,
+ cl::desc("<Source files>"));
cl::opt<bool> DebugDumpCollectedPaths(
"dump-collected-paths", cl::Optional, cl::Hidden,
@@ -665,32 +664,32 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) {
cl::list<std::string> NameFilters(
"name", cl::Optional,
cl::desc("Show code coverage only for functions with the given name"),
- cl::ZeroOrMore, cl::cat(FilteringCategory));
+ cl::cat(FilteringCategory));
cl::list<std::string> NameFilterFiles(
"name-allowlist", cl::Optional,
cl::desc("Show code coverage only for functions listed in the given "
"file"),
- cl::ZeroOrMore, cl::cat(FilteringCategory));
+ cl::cat(FilteringCategory));
// Allow for accepting previous option name.
cl::list<std::string> NameFilterFilesDeprecated(
"name-whitelist", cl::Optional, cl::Hidden,
cl::desc("Show code coverage only for functions listed in the given "
"file. Deprecated, use -name-allowlist instead"),
- cl::ZeroOrMore, cl::cat(FilteringCategory));
+ cl::cat(FilteringCategory));
cl::list<std::string> NameRegexFilters(
"name-regex", cl::Optional,
cl::desc("Show code coverage only for functions that match the given "
"regular expression"),
- cl::ZeroOrMore, cl::cat(FilteringCategory));
+ cl::cat(FilteringCategory));
cl::list<std::string> IgnoreFilenameRegexFilters(
"ignore-filename-regex", cl::Optional,
cl::desc("Skip source code files with file paths that match the given "
"regular expression"),
- cl::ZeroOrMore, cl::cat(FilteringCategory));
+ cl::cat(FilteringCategory));
cl::opt<double> RegionCoverageLtFilter(
"region-coverage-lt", cl::Optional,
@@ -883,6 +882,9 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) {
}
CoverageArches.emplace_back(Arch);
}
+ if (CoverageArches.size() == 1)
+ CoverageArches.insert(CoverageArches.end(), ObjectFilenames.size() - 1,
+ CoverageArches[0]);
if (CoverageArches.size() != ObjectFilenames.size()) {
error("Number of architectures doesn't match the number of objects");
return 1;
@@ -973,6 +975,11 @@ int CodeCoverageTool::doShow(int argc, const char **argv,
"project-title", cl::Optional,
cl::desc("Set project title for the coverage report"));
+ cl::opt<std::string> CovWatermark(
+ "coverage-watermark", cl::Optional,
+ cl::desc("<high>,<low> value indicate thresholds for high and low"
+ "coverage watermark"));
+
auto Err = commandLineParser(argc, argv);
if (Err)
return Err;
@@ -982,6 +989,47 @@ int CodeCoverageTool::doShow(int argc, const char **argv,
return 1;
}
+ ViewOpts.HighCovWatermark = 100.0;
+ ViewOpts.LowCovWatermark = 80.0;
+ if (!CovWatermark.empty()) {
+ auto WaterMarkPair = StringRef(CovWatermark).split(',');
+ if (WaterMarkPair.first.empty() || WaterMarkPair.second.empty()) {
+ error("invalid argument '" + CovWatermark +
+ "', must be in format 'high,low'",
+ "-coverage-watermark");
+ return 1;
+ }
+
+ char *EndPointer = nullptr;
+ ViewOpts.HighCovWatermark =
+ strtod(WaterMarkPair.first.begin(), &EndPointer);
+ if (EndPointer != WaterMarkPair.first.end()) {
+ error("invalid number '" + WaterMarkPair.first +
+ "', invalid value for 'high'",
+ "-coverage-watermark");
+ return 1;
+ }
+
+ ViewOpts.LowCovWatermark =
+ strtod(WaterMarkPair.second.begin(), &EndPointer);
+ if (EndPointer != WaterMarkPair.second.end()) {
+ error("invalid number '" + WaterMarkPair.second +
+ "', invalid value for 'low'",
+ "-coverage-watermark");
+ return 1;
+ }
+
+ if (ViewOpts.HighCovWatermark > 100 || ViewOpts.LowCovWatermark < 0 ||
+ ViewOpts.HighCovWatermark <= ViewOpts.LowCovWatermark) {
+ error(
+ "invalid number range '" + CovWatermark +
+ "', must be both high and low should be between 0-100, and high "
+ "> low",
+ "-coverage-watermark");
+ return 1;
+ }
+ }
+
ViewOpts.ShowLineNumbers = true;
ViewOpts.ShowLineStats = ShowLineExecutionCounts.getNumOccurrences() != 0 ||
!ShowRegions || ShowBestLineRegionsCounts;
diff --git a/llvm/tools/llvm-cov/CoverageViewOptions.h b/llvm/tools/llvm-cov/CoverageViewOptions.h
index 045fb1787bce..c6e99819f319 100644
--- a/llvm/tools/llvm-cov/CoverageViewOptions.h
+++ b/llvm/tools/llvm-cov/CoverageViewOptions.h
@@ -50,6 +50,8 @@ struct CoverageViewOptions {
std::string CreatedTimeStr;
unsigned NumThreads;
std::string CompilationDirectory;
+ float HighCovWatermark;
+ float LowCovWatermark;
/// Change the output's stream color if the colors are enabled.
ColoredRawOstream colored_ostream(raw_ostream &OS,
diff --git a/llvm/tools/llvm-cov/SourceCoverageViewHTML.cpp b/llvm/tools/llvm-cov/SourceCoverageViewHTML.cpp
index 56efc40b9349..46782c9b3c9a 100644
--- a/llvm/tools/llvm-cov/SourceCoverageViewHTML.cpp
+++ b/llvm/tools/llvm-cov/SourceCoverageViewHTML.cpp
@@ -338,24 +338,24 @@ void CoveragePrinterHTML::emitFileSummary(raw_ostream &OS, StringRef SF,
SmallVector<std::string, 8> Columns;
// Format a coverage triple and add the result to the list of columns.
- auto AddCoverageTripleToColumn = [&Columns](unsigned Hit, unsigned Total,
- float Pctg) {
- std::string S;
- {
- raw_string_ostream RSO{S};
- if (Total)
- RSO << format("%*.2f", 7, Pctg) << "% ";
- else
- RSO << "- ";
- RSO << '(' << Hit << '/' << Total << ')';
- }
- const char *CellClass = "column-entry-yellow";
- if (Hit == Total)
- CellClass = "column-entry-green";
- else if (Pctg < 80.0)
- CellClass = "column-entry-red";
- Columns.emplace_back(tag("td", tag("pre", S), CellClass));
- };
+ auto AddCoverageTripleToColumn =
+ [&Columns, this](unsigned Hit, unsigned Total, float Pctg) {
+ std::string S;
+ {
+ raw_string_ostream RSO{S};
+ if (Total)
+ RSO << format("%*.2f", 7, Pctg) << "% ";
+ else
+ RSO << "- ";
+ RSO << '(' << Hit << '/' << Total << ')';
+ }
+ const char *CellClass = "column-entry-yellow";
+ if (Pctg >= Opts.HighCovWatermark)
+ CellClass = "column-entry-green";
+ else if (Pctg < Opts.LowCovWatermark)
+ CellClass = "column-entry-red";
+ Columns.emplace_back(tag("td", tag("pre", S), CellClass));
+ };
// Simplify the display file path, and wrap it in a link if requested.
std::string Filename;
@@ -538,7 +538,7 @@ void SourceCoverageViewHTML::renderLine(raw_ostream &OS, LineRef L,
auto Highlight = [&](const std::string &Snippet, unsigned LC, unsigned RC) {
if (getOptions().Debug)
HighlightedRanges.emplace_back(LC, RC);
- return tag("span", Snippet, std::string(Color.getValue()));
+ return tag("span", Snippet, std::string(*Color));
};
auto CheckIfUncovered = [&](const CoverageSegment *S) {
@@ -561,12 +561,12 @@ void SourceCoverageViewHTML::renderLine(raw_ostream &OS, LineRef L,
else
Color = None;
- if (Color.hasValue())
+ if (Color)
Snippets[I + 1] = Highlight(Snippets[I + 1], CurSeg->Col,
CurSeg->Col + Snippets[I + 1].size());
}
- if (Color.hasValue() && Segments.empty())
+ if (Color && Segments.empty())
Snippets.back() = Highlight(Snippets.back(), 1, 1 + Snippets.back().size());
if (getOptions().Debug) {
diff --git a/llvm/tools/llvm-cov/TestingSupport.cpp b/llvm/tools/llvm-cov/TestingSupport.cpp
index 9c6b25f2f585..289a1621660b 100644
--- a/llvm/tools/llvm-cov/TestingSupport.cpp
+++ b/llvm/tools/llvm-cov/TestingSupport.cpp
@@ -12,6 +12,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/LEB128.h"
+#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
#include <functional>
#include <system_error>
diff --git a/llvm/tools/llvm-cxxdump/llvm-cxxdump.cpp b/llvm/tools/llvm-cxxdump/llvm-cxxdump.cpp
index 1430674dbadc..02f4c8493903 100644
--- a/llvm/tools/llvm-cxxdump/llvm-cxxdump.cpp
+++ b/llvm/tools/llvm-cxxdump/llvm-cxxdump.cpp
@@ -36,7 +36,7 @@ namespace opts {
cl::OptionCategory CXXDumpCategory("CXX Dump Options");
cl::list<std::string> InputFilenames(cl::Positional,
cl::desc("<input object files>"),
- cl::ZeroOrMore, cl::cat(CXXDumpCategory));
+ cl::cat(CXXDumpCategory));
} // namespace opts
namespace llvm {
diff --git a/llvm/tools/llvm-cxxfilt/Opts.td b/llvm/tools/llvm-cxxfilt/Opts.td
index 93f865245fe6..f652a1a7f88b 100644
--- a/llvm/tools/llvm-cxxfilt/Opts.td
+++ b/llvm/tools/llvm-cxxfilt/Opts.td
@@ -16,7 +16,7 @@ multiclass Eq<string name, string help> {
def help : FF<"help", "Display this help">;
defm strip_underscore : BB<"strip-underscore", "Strip the leading underscore", "Don't strip the leading underscore">;
-def types : FF<"types", "">;
+def types : FF<"types", "Attempt to demangle types as well as function names">;
def version : FF<"version", "Display the version">;
defm : Eq<"format", "Specify mangling format. Currently ignored because only 'gnu' is supported">;
diff --git a/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp b/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp
index ccfaaa96deb2..1cea9e29faa4 100644
--- a/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp
+++ b/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp
@@ -140,7 +140,7 @@ static void demangleLine(llvm::raw_ostream &OS, StringRef Mangled, bool Split) {
OS.flush();
}
-int main(int argc, char **argv) {
+int llvm_cxxfilt_main(int argc, char **argv) {
InitLLVM X(argc, argv);
BumpPtrAllocator A;
StringSaver Saver(A);
diff --git a/llvm/tools/llvm-dis/llvm-dis.cpp b/llvm/tools/llvm-dis/llvm-dis.cpp
index 7b3c3e7706a6..4996fc12ae32 100644
--- a/llvm/tools/llvm-dis/llvm-dis.cpp
+++ b/llvm/tools/llvm-dis/llvm-dis.cpp
@@ -23,6 +23,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/ModuleSummaryIndex.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Error.h"
@@ -37,7 +38,7 @@ using namespace llvm;
static cl::OptionCategory DisCategory("Disassembler Options");
-static cl::list<std::string> InputFilenames(cl::Positional, cl::ZeroOrMore,
+static cl::list<std::string> InputFilenames(cl::Positional,
cl::desc("[input bitcode]..."),
cl::cat(DisCategory));
@@ -179,8 +180,13 @@ int main(int argc, char **argv) {
}
for (std::string InputFilename : InputFilenames) {
- std::unique_ptr<MemoryBuffer> MB = ExitOnErr(
- errorOrToExpected(MemoryBuffer::getFileOrSTDIN(InputFilename)));
+ ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
+ MemoryBuffer::getFileOrSTDIN(InputFilename);
+ if (std::error_code EC = BufferOrErr.getError()) {
+ WithColor::error() << InputFilename << ": " << EC.message() << '\n';
+ return 1;
+ }
+ std::unique_ptr<MemoryBuffer> MB = std::move(BufferOrErr.get());
BitcodeFileContents IF = ExitOnErr(llvm::getBitcodeFileContents(*MB));
diff --git a/llvm/tools/llvm-dwarfdump/Statistics.cpp b/llvm/tools/llvm-dwarfdump/Statistics.cpp
index 5c08e43b4b09..ed92665e0483 100644
--- a/llvm/tools/llvm-dwarfdump/Statistics.cpp
+++ b/llvm/tools/llvm-dwarfdump/Statistics.cpp
@@ -11,6 +11,7 @@
#include "llvm/ADT/StringSet.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
+#include "llvm/DebugInfo/DWARF/DWARFExpression.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/JSON.h"
@@ -1043,14 +1044,19 @@ bool dwarfdump::collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx,
LocStats.LocalVarNonEntryValLocStats);
J.objectEnd();
OS << '\n';
- LLVM_DEBUG(llvm::dbgs() << "Total Availability: "
- << (int)std::round((VarParamWithLoc.Value * 100.0) /
+ LLVM_DEBUG(
+ llvm::dbgs() << "Total Availability: "
+ << (VarParamTotal.Value
+ ? (int)std::round((VarParamWithLoc.Value * 100.0) /
VarParamTotal.Value)
- << "%\n";
- llvm::dbgs() << "PC Ranges covered: "
- << (int)std::round(
+ : 0)
+ << "%\n";
+ llvm::dbgs() << "PC Ranges covered: "
+ << (GlobalStats.ScopeBytes.Value
+ ? (int)std::round(
(GlobalStats.ScopeBytesCovered.Value * 100.0) /
GlobalStats.ScopeBytes.Value)
- << "%\n");
+ : 0)
+ << "%\n");
return true;
}
diff --git a/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp b/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
index 9c2ddc3867a5..f7d3052c8c4d 100644
--- a/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
+++ b/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
@@ -15,6 +15,8 @@
#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/Triple.h"
#include "llvm/DebugInfo/DIContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h"
+#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/MachOUniversal.h"
@@ -24,6 +26,7 @@
#include "llvm/Support/Format.h"
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
#include "llvm/Support/Regex.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/ToolOutputFile.h"
@@ -119,7 +122,7 @@ using namespace cl;
OptionCategory DwarfDumpCategory("Specific Options");
static list<std::string>
InputFilenames(Positional, desc("<input object files or .dSYM bundles>"),
- ZeroOrMore, cat(DwarfDumpCategory));
+ cat(DwarfDumpCategory));
cl::OptionCategory SectionCategory("Section-specific Dump Options",
"These control which sections are dumped. "
@@ -245,6 +248,10 @@ static cl::opt<bool>
cl::desc("Show the sizes of all debug sections, "
"expressed in bytes."),
cat(DwarfDumpCategory));
+static cl::opt<bool>
+ ShowSources("show-sources",
+ cl::desc("Show the sources across all compilation units."),
+ cat(DwarfDumpCategory));
static opt<bool> Verify("verify", desc("Verify the DWARF debug info."),
cat(DwarfDumpCategory));
static opt<bool> Quiet("quiet", desc("Use with -verify to not emit to STDOUT."),
@@ -464,6 +471,87 @@ static bool lookup(ObjectFile &Obj, DWARFContext &DICtx, uint64_t Address,
return true;
}
+// Collect all sources referenced from the given line table, scoped to the given
+// CU compilation directory.
+static bool collectLineTableSources(const DWARFDebugLine::LineTable &LT,
+ StringRef CompDir,
+ std::vector<std::string> &Sources) {
+ bool Result = true;
+ llvm::Optional<uint64_t> LastIndex = LT.getLastValidFileIndex();
+ for (uint64_t I = LT.hasFileAtIndex(0) ? 0 : 1,
+ E = LastIndex ? *LastIndex + 1 : 0;
+ I < E; ++I) {
+ std::string Path;
+ Result &= LT.getFileNameByIndex(
+ I, CompDir, DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath,
+ Path);
+ Sources.push_back(std::move(Path));
+ }
+ return Result;
+}
+
+static bool collectObjectSources(ObjectFile &Obj, DWARFContext &DICtx,
+ const Twine &Filename, raw_ostream &OS) {
+ bool Result = true;
+ std::vector<std::string> Sources;
+
+ bool HasCompileUnits = false;
+ for (const auto &CU : DICtx.compile_units()) {
+ HasCompileUnits = true;
+ // Extract paths from the line table for this CU. This allows combining the
+ // compilation directory with the line information, in case both the include
+ // directory and file names in the line table are relative.
+ const DWARFDebugLine::LineTable *LT = DICtx.getLineTableForUnit(CU.get());
+ StringRef CompDir = CU->getCompilationDir();
+ if (LT) {
+ Result &= collectLineTableSources(*LT, CompDir, Sources);
+ } else {
+ // Since there's no line table for this CU, collect the name from the CU
+ // itself.
+ const char *Name = CU->getUnitDIE().getShortName();
+ if (!Name) {
+ WithColor::warning()
+ << Filename << ": missing name for compilation unit\n";
+ continue;
+ }
+ SmallString<64> AbsName;
+ if (sys::path::is_relative(Name, sys::path::Style::posix) &&
+ sys::path::is_relative(Name, sys::path::Style::windows))
+ AbsName = CompDir;
+ sys::path::append(AbsName, Name);
+ Sources.push_back(std::string(AbsName));
+ }
+ }
+
+ if (!HasCompileUnits) {
+ // Since there's no compile units available, walk the line tables and
+ // extract out any referenced paths.
+ DWARFDataExtractor LineData(DICtx.getDWARFObj(),
+ DICtx.getDWARFObj().getLineSection(),
+ DICtx.isLittleEndian(), 0);
+ DWARFDebugLine::SectionParser Parser(LineData, DICtx, DICtx.normal_units());
+ while (!Parser.done()) {
+ const auto RecoverableErrorHandler = [&](Error Err) {
+ Result = false;
+ WithColor::defaultErrorHandler(std::move(Err));
+ };
+ void (*UnrecoverableErrorHandler)(Error Err) = error;
+
+ DWARFDebugLine::LineTable LT =
+ Parser.parseNext(RecoverableErrorHandler, UnrecoverableErrorHandler);
+ Result &= collectLineTableSources(LT, /*CompDir=*/"", Sources);
+ }
+ }
+
+ // Dedup and order the sources.
+ llvm::sort(Sources.begin(), Sources.end());
+ Sources.erase(std::unique(Sources.begin(), Sources.end()), Sources.end());
+
+ for (StringRef Name : Sources)
+ OS << Name << "\n";
+ return Result;
+}
+
static bool dumpObjectFile(ObjectFile &Obj, DWARFContext &DICtx,
const Twine &Filename, raw_ostream &OS) {
logAllUnhandledErrors(DICtx.loadRegisterInfo(Obj), errs(),
@@ -677,6 +765,9 @@ int main(int argc, char **argv) {
} else if (ShowSectionSizes) {
for (auto Object : Objects)
Success &= handleFile(Object, collectObjectSectionSizes, OutputFile.os());
+ } else if (ShowSources) {
+ for (auto Object : Objects)
+ Success &= handleFile(Object, collectObjectSources, OutputFile.os());
} else {
for (auto Object : Objects)
Success &= handleFile(Object, dumpObjectFile, OutputFile.os());
diff --git a/llvm/tools/llvm-dwp/llvm-dwp.cpp b/llvm/tools/llvm-dwp/llvm-dwp.cpp
index 4b6f7bc8dd34..d2d162d648c0 100644
--- a/llvm/tools/llvm-dwp/llvm-dwp.cpp
+++ b/llvm/tools/llvm-dwp/llvm-dwp.cpp
@@ -19,11 +19,14 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCTargetOptionsCommandFlags.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/InitLLVM.h"
+#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/ToolOutputFile.h"
@@ -33,13 +36,13 @@ using namespace llvm::object;
static mc::RegisterMCTargetOptionsFlags MCTargetOptionsFlags;
cl::OptionCategory DwpCategory("Specific Options");
-static cl::list<std::string> InputFiles(cl::Positional, cl::ZeroOrMore,
- cl::desc("<input files>"),
- cl::cat(DwpCategory));
+static cl::list<std::string>
+ InputFiles(cl::Positional, cl::desc("<input files>"), cl::cat(DwpCategory));
static cl::list<std::string> ExecFilenames(
- "e", cl::ZeroOrMore,
- cl::desc("Specify the executable/library files to get the list of *.dwo from"),
+ "e",
+ cl::desc(
+ "Specify the executable/library files to get the list of *.dwo from"),
cl::value_desc("filename"), cl::cat(DwpCategory));
static cl::opt<std::string> OutputFilename(cl::Required, "o",
@@ -162,7 +165,7 @@ int main(int argc, char **argv) {
if (!MII)
return error("no instr info info for target " + TripleName, Context);
- MCCodeEmitter *MCE = TheTarget->createMCCodeEmitter(*MII, *MRI, MC);
+ MCCodeEmitter *MCE = TheTarget->createMCCodeEmitter(*MII, MC);
if (!MCE)
return error("no code emitter for target " + TripleName, Context);
@@ -193,7 +196,7 @@ int main(int argc, char **argv) {
return 1;
}
- MS->Finish();
+ MS->finish();
OutFile.keep();
return 0;
}
diff --git a/llvm/tools/llvm-extract/llvm-extract.cpp b/llvm/tools/llvm-extract/llvm-extract.cpp
index 3cdef529504e..ffd2a390d9c3 100644
--- a/llvm/tools/llvm-extract/llvm-extract.cpp
+++ b/llvm/tools/llvm-extract/llvm-extract.cpp
@@ -66,8 +66,7 @@ static cl::opt<bool>
// ExtractFuncs - The functions to extract from the module.
static cl::list<std::string>
ExtractFuncs("func", cl::desc("Specify function to extract"),
- cl::ZeroOrMore, cl::value_desc("function"),
- cl::cat(ExtractCat));
+ cl::value_desc("function"), cl::cat(ExtractCat));
// ExtractRegExpFuncs - The functions, matched via regular expression, to
// extract from the module.
@@ -75,8 +74,7 @@ static cl::list<std::string>
ExtractRegExpFuncs("rfunc",
cl::desc("Specify function(s) to extract using a "
"regular expression"),
- cl::ZeroOrMore, cl::value_desc("rfunction"),
- cl::cat(ExtractCat));
+ cl::value_desc("rfunction"), cl::cat(ExtractCat));
// ExtractBlocks - The blocks to extract from the module.
static cl::list<std::string> ExtractBlocks(
@@ -90,14 +88,12 @@ static cl::list<std::string> ExtractBlocks(
" --bb=f:bb1;bb2 will extract one function with both bb1 and bb2;\n"
" --bb=f:bb1 --bb=f:bb2 will extract two functions, one with bb1, one "
"with bb2."),
- cl::ZeroOrMore, cl::value_desc("function:bb1[;bb2...]"),
- cl::cat(ExtractCat));
+ cl::value_desc("function:bb1[;bb2...]"), cl::cat(ExtractCat));
// ExtractAlias - The alias to extract from the module.
static cl::list<std::string>
ExtractAliases("alias", cl::desc("Specify alias to extract"),
- cl::ZeroOrMore, cl::value_desc("alias"),
- cl::cat(ExtractCat));
+ cl::value_desc("alias"), cl::cat(ExtractCat));
// ExtractRegExpAliases - The aliases, matched via regular expression, to
// extract from the module.
@@ -105,14 +101,12 @@ static cl::list<std::string>
ExtractRegExpAliases("ralias",
cl::desc("Specify alias(es) to extract using a "
"regular expression"),
- cl::ZeroOrMore, cl::value_desc("ralias"),
- cl::cat(ExtractCat));
+ cl::value_desc("ralias"), cl::cat(ExtractCat));
// ExtractGlobals - The globals to extract from the module.
static cl::list<std::string>
ExtractGlobals("glob", cl::desc("Specify global to extract"),
- cl::ZeroOrMore, cl::value_desc("global"),
- cl::cat(ExtractCat));
+ cl::value_desc("global"), cl::cat(ExtractCat));
// ExtractRegExpGlobals - The globals, matched via regular expression, to
// extract from the module...
@@ -120,8 +114,7 @@ static cl::list<std::string>
ExtractRegExpGlobals("rglob",
cl::desc("Specify global(s) to extract using a "
"regular expression"),
- cl::ZeroOrMore, cl::value_desc("rglobal"),
- cl::cat(ExtractCat));
+ cl::value_desc("rglobal"), cl::cat(ExtractCat));
static cl::opt<bool> OutputAssembly("S",
cl::desc("Write output as LLVM assembly"),
diff --git a/llvm/tools/llvm-link/llvm-link.cpp b/llvm/tools/llvm-link/llvm-link.cpp
index 9abe8efaa4e8..6585b193b2cb 100644
--- a/llvm/tools/llvm-link/llvm-link.cpp
+++ b/llvm/tools/llvm-link/llvm-link.cpp
@@ -48,7 +48,7 @@ static cl::list<std::string> InputFilenames(cl::Positional, cl::OneOrMore,
cl::cat(LinkCategory));
static cl::list<std::string> OverridingInputs(
- "override", cl::ZeroOrMore, cl::value_desc("filename"),
+ "override", cl::value_desc("filename"),
cl::desc(
"input bitcode file which can override previously defined symbol(s)"),
cl::cat(LinkCategory));
@@ -56,7 +56,7 @@ static cl::list<std::string> OverridingInputs(
// Option to simulate function importing for testing. This enables using
// llvm-link to simulate ThinLTO backend processes.
static cl::list<std::string> Imports(
- "import", cl::ZeroOrMore, cl::value_desc("function:filename"),
+ "import", cl::value_desc("function:filename"),
cl::desc("Pair of function name and filename, where function should be "
"imported from bitcode in filename"),
cl::cat(LinkCategory));
@@ -124,6 +124,11 @@ static cl::opt<bool> NoVerify("disable-verify",
cl::desc("Do not run the verifier"), cl::Hidden,
cl::cat(LinkCategory));
+static cl::opt<bool> IgnoreNonBitcode(
+ "ignore-non-bitcode",
+ cl::desc("Do not report an error for non-bitcode files in archives"),
+ cl::Hidden);
+
static ExitOnError ExitOnErr;
// Read the specified bitcode file in and return it. This routine searches the
@@ -164,11 +169,16 @@ static std::unique_ptr<Module> loadArFile(const char *Argv0,
if (Verbose)
errs() << "Reading library archive file '" << ArchiveName
<< "' to memory\n";
- Error Err = Error::success();
- object::Archive Archive(*Buffer, Err);
- ExitOnErr(std::move(Err));
+ Expected<std::unique_ptr<object::Archive>> ArchiveOrError =
+ object::Archive::create(Buffer->getMemBufferRef());
+ if (!ArchiveOrError)
+ ExitOnErr(ArchiveOrError.takeError());
+
+ std::unique_ptr<object::Archive> Archive = std::move(ArchiveOrError.get());
+
Linker L(*Result);
- for (const object::Archive::Child &C : Archive.children(Err)) {
+ Error Err = Error::success();
+ for (const object::Archive::Child &C : Archive->children(Err)) {
Expected<StringRef> Ename = C.getName();
if (Error E = Ename.takeError()) {
errs() << Argv0 << ": ";
@@ -194,6 +204,8 @@ static std::unique_ptr<Module> loadArFile(const char *Argv0,
MemBuf.get().getBufferStart()),
reinterpret_cast<const unsigned char *>(
MemBuf.get().getBufferEnd()))) {
+ if (IgnoreNonBitcode)
+ continue;
errs() << Argv0 << ": ";
WithColor::error() << " member of archive is not a bitcode file: '"
<< ChildName << "'\n";
diff --git a/llvm/tools/llvm-lto/llvm-lto.cpp b/llvm/tools/llvm-lto/llvm-lto.cpp
index 8fc3a5d68500..c8266616b73d 100644
--- a/llvm/tools/llvm-lto/llvm-lto.cpp
+++ b/llvm/tools/llvm-lto/llvm-lto.cpp
@@ -71,7 +71,7 @@ static cl::opt<char>
OptLevel("O",
cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] "
"(default = '-O2')"),
- cl::Prefix, cl::ZeroOrMore, cl::init('2'), cl::cat(LTOCategory));
+ cl::Prefix, cl::init('2'), cl::cat(LTOCategory));
static cl::opt<bool>
IndexStats("thinlto-index-stats",
@@ -210,12 +210,12 @@ static cl::opt<std::string> OutputFilename("o", cl::init(""),
static cl::list<std::string> ExportedSymbols(
"exported-symbol",
cl::desc("List of symbols to export from the resulting object file"),
- cl::ZeroOrMore, cl::cat(LTOCategory));
+ cl::cat(LTOCategory));
static cl::list<std::string>
DSOSymbols("dso-symbol",
cl::desc("Symbol to put in the symtab in the resulting dso"),
- cl::ZeroOrMore, cl::cat(LTOCategory));
+ cl::cat(LTOCategory));
static cl::opt<bool> ListSymbolsOnly(
"list-symbols-only", cl::init(false),
@@ -256,10 +256,6 @@ static cl::opt<bool> PrintMachOCPUOnly(
cl::desc("Instead of running LTO, print the mach-o cpu in each IR file"),
cl::cat(LTOCategory));
-static cl::opt<bool> UseNewPM(
- "use-new-pm", cl::desc("Run LTO passes using the new pass manager"),
- cl::init(LLVM_ENABLE_NEW_PASS_MANAGER), cl::Hidden, cl::cat(LTOCategory));
-
static cl::opt<bool>
DebugPassManager("debug-pass-manager", cl::init(false), cl::Hidden,
cl::desc("Print pass management debugging information"),
@@ -604,7 +600,6 @@ public:
ThinGenerator.setCacheMaxSizeFiles(ThinLTOCacheMaxSizeFiles);
ThinGenerator.setCacheMaxSizeBytes(ThinLTOCacheMaxSizeBytes);
ThinGenerator.setFreestanding(EnableFreestanding);
- ThinGenerator.setUseNewPM(UseNewPM);
ThinGenerator.setDebugPassManager(DebugPassManager);
// Add all the exported symbols to the table of symbols to preserve.
@@ -1015,6 +1010,7 @@ int main(int argc, char **argv) {
CodeGen.setCodePICModel(codegen::getExplicitRelocModel());
CodeGen.setFreestanding(EnableFreestanding);
+ CodeGen.setDebugPassManager(DebugPassManager);
CodeGen.setDebugInfo(LTO_DEBUG_MODEL_DWARF);
CodeGen.setTargetOptions(Options);
@@ -1069,10 +1065,8 @@ int main(int argc, char **argv) {
CodeGen.setOptLevel(OptLevel - '0');
CodeGen.setAttrs(codegen::getMAttrs());
- CodeGen.setUseNewPM(UseNewPM);
-
if (auto FT = codegen::getExplicitFileType())
- CodeGen.setFileType(FT.getValue());
+ CodeGen.setFileType(*FT);
if (!OutputFilename.empty()) {
if (SaveLinkedModuleFile) {
diff --git a/llvm/tools/llvm-lto2/llvm-lto2.cpp b/llvm/tools/llvm-lto2/llvm-lto2.cpp
index 7416e5850944..f79db36d2d2d 100644
--- a/llvm/tools/llvm-lto2/llvm-lto2.cpp
+++ b/llvm/tools/llvm-lto2/llvm-lto2.cpp
@@ -37,9 +37,10 @@ using namespace lto;
static codegen::RegisterCodeGenFlags CGF;
static cl::opt<char>
- OptLevel("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] "
- "(default = '-O2')"),
- cl::Prefix, cl::ZeroOrMore, cl::init('2'));
+ OptLevel("O",
+ cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] "
+ "(default = '-O2')"),
+ cl::Prefix, cl::init('2'));
static cl::opt<char> CGOptLevel(
"cg-opt-level",
@@ -67,11 +68,23 @@ static cl::opt<std::string> AAPipeline("aa-pipeline",
static cl::opt<bool> SaveTemps("save-temps", cl::desc("Save temporary files"));
static cl::opt<bool>
- ThinLTODistributedIndexes("thinlto-distributed-indexes", cl::init(false),
+ ThinLTODistributedIndexes("thinlto-distributed-indexes",
cl::desc("Write out individual index and "
"import files for the "
"distributed backend case"));
+static cl::opt<bool>
+ ThinLTOEmitIndexes("thinlto-emit-indexes",
+ cl::desc("Write out individual index files via "
+ "InProcessThinLTO"));
+
+static cl::opt<bool>
+ ThinLTOEmitImports("thinlto-emit-imports",
+ cl::desc("Write out individual imports files via "
+ "InProcessThinLTO. Has no effect unless "
+ "specified with -thinlto-emit-indexes or "
+ "-thinlto-distributed-indexes"));
+
// Default to using all available threads in the system, but using only one
// thread per core (no SMT).
// Use -thinlto-threads=all to use hardware_concurrency() instead, which means
@@ -89,8 +102,7 @@ static cl::list<std::string> SymbolResolutions(
" runtime and is known to be in this linkage unit\n"
" x - externally visible: the definition of this symbol is\n"
" visible outside of the LTO unit\n"
- "A resolution for each symbol must be specified."),
- cl::ZeroOrMore);
+ "A resolution for each symbol must be specified"));
static cl::opt<std::string> OverrideTriple(
"override-triple",
@@ -141,15 +153,14 @@ static cl::opt<std::string>
static cl::opt<bool>
RunCSIRInstr("lto-cspgo-gen",
cl::desc("Run PGO context sensitive IR instrumentation"),
- cl::init(false), cl::Hidden);
+ cl::Hidden);
-static cl::opt<bool>
- UseNewPM("use-new-pm",
- cl::desc("Run LTO passes using the new pass manager"),
- cl::init(LLVM_ENABLE_NEW_PASS_MANAGER), cl::Hidden);
+static cl::opt<bool> LtoOpaquePointers("lto-opaque-pointers",
+ cl::desc("Enable opaque pointer types"),
+ cl::init(true), cl::Hidden);
static cl::opt<bool>
- DebugPassManager("debug-pass-manager", cl::init(false), cl::Hidden,
+ DebugPassManager("debug-pass-manager", cl::Hidden,
cl::desc("Print pass management debugging information"));
static cl::opt<std::string>
@@ -162,7 +173,7 @@ static cl::list<std::string>
static cl::opt<bool> EnableFreestanding(
"lto-freestanding",
cl::desc("Enable Freestanding (disable builtins / TLI) during LTO"),
- cl::init(false), cl::Hidden);
+ cl::Hidden);
static void check(Error E, std::string Msg) {
if (!E)
@@ -242,7 +253,7 @@ static int run(int argc, char **argv) {
Conf.Options = codegen::InitTargetOptionsFromCodeGenFlags(Triple());
Conf.MAttrs = codegen::getMAttrs();
if (auto RM = codegen::getExplicitRelocModel())
- Conf.RelocModel = RM.getValue();
+ Conf.RelocModel = *RM;
Conf.CodeModel = codegen::getExplicitCodeModel();
Conf.DebugPassManager = DebugPassManager;
@@ -267,7 +278,6 @@ static int run(int argc, char **argv) {
Conf.AAPipeline = AAPipeline;
Conf.OptLevel = OptLevel - '0';
- Conf.UseNewPM = UseNewPM;
Conf.Freestanding = EnableFreestanding;
for (auto &PluginFN : PassPlugins)
Conf.PassPlugins.push_back(PluginFN);
@@ -290,24 +300,27 @@ static int run(int argc, char **argv) {
}
if (auto FT = codegen::getExplicitFileType())
- Conf.CGFileType = FT.getValue();
+ Conf.CGFileType = *FT;
Conf.OverrideTriple = OverrideTriple;
Conf.DefaultTriple = DefaultTriple;
Conf.StatsFile = StatsFile;
Conf.PTO.LoopVectorization = Conf.OptLevel > 1;
Conf.PTO.SLPVectorization = Conf.OptLevel > 1;
+ Conf.OpaquePointers = LtoOpaquePointers;
ThinBackend Backend;
if (ThinLTODistributedIndexes)
- Backend = createWriteIndexesThinBackend(/* OldPrefix */ "",
- /* NewPrefix */ "",
- /* ShouldEmitImportsFiles */ true,
- /* LinkedObjectsFile */ nullptr,
- /* OnWrite */ {});
+ Backend =
+ createWriteIndexesThinBackend(/* OldPrefix */ "",
+ /* NewPrefix */ "", ThinLTOEmitImports,
+ /* LinkedObjectsFile */ nullptr,
+ /* OnWrite */ {});
else
Backend = createInProcessThinBackend(
- llvm::heavyweight_hardware_concurrency(Threads));
+ llvm::heavyweight_hardware_concurrency(Threads),
+ /* OnWrite */ {}, ThinLTOEmitIndexes, ThinLTOEmitImports);
+
// Track whether we hit an error; in particular, in the multi-threaded case,
// we can't exit() early because the rest of the threads wouldn't have had a
// change to be join-ed, and that would result in a "terminate called without
diff --git a/llvm/tools/llvm-mc/llvm-mc.cpp b/llvm/tools/llvm-mc/llvm-mc.cpp
index 4e5a12e53a6b..2a525f53ec29 100644
--- a/llvm/tools/llvm-mc/llvm-mc.cpp
+++ b/llvm/tools/llvm-mc/llvm-mc.cpp
@@ -541,7 +541,7 @@ int main(int argc, char **argv) {
// Set up the AsmStreamer.
std::unique_ptr<MCCodeEmitter> CE;
if (ShowEncoding)
- CE.reset(TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx));
+ CE.reset(TheTarget->createMCCodeEmitter(*MCII, Ctx));
std::unique_ptr<MCAsmBackend> MAB(
TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions));
@@ -561,7 +561,7 @@ int main(int argc, char **argv) {
OS = BOS.get();
}
- MCCodeEmitter *CE = TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx);
+ MCCodeEmitter *CE = TheTarget->createMCCodeEmitter(*MCII, Ctx);
MCAsmBackend *MAB = TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions);
Str.reset(TheTarget->createMCObjectStreamer(
TheTriple, Ctx, std::unique_ptr<MCAsmBackend>(MAB),
diff --git a/llvm/tools/llvm-mca/CodeRegionGenerator.cpp b/llvm/tools/llvm-mca/CodeRegionGenerator.cpp
index 6cdd0ba797aa..cb8e1822ee30 100644
--- a/llvm/tools/llvm-mca/CodeRegionGenerator.cpp
+++ b/llvm/tools/llvm-mca/CodeRegionGenerator.cpp
@@ -16,6 +16,7 @@
#include "CodeRegionGenerator.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCTargetOptions.h"
@@ -62,10 +63,10 @@ public:
uint64_t Size = 0, unsigned ByteAlignment = 0,
SMLoc Loc = SMLoc()) override {}
void emitGPRel32Value(const MCExpr *Value) override {}
- void BeginCOFFSymbolDef(const MCSymbol *Symbol) override {}
- void EmitCOFFSymbolStorageClass(int StorageClass) override {}
- void EmitCOFFSymbolType(int Type) override {}
- void EndCOFFSymbolDef() override {}
+ void beginCOFFSymbolDef(const MCSymbol *Symbol) override {}
+ void emitCOFFSymbolStorageClass(int StorageClass) override {}
+ void emitCOFFSymbolType(int Type) override {}
+ void endCOFFSymbolDef() override {}
ArrayRef<MCInst> GetInstructionSequence(unsigned Index) const {
return Regions.getInstructionSequence(Index);
diff --git a/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp b/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp
index caa8554a416a..67b636737b97 100644
--- a/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp
+++ b/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp
@@ -70,7 +70,7 @@ void InstructionInfoView::printView(raw_ostream &OS) const {
else if (IIVDEntry.Latency < 100)
TempStream << ' ';
- if (IIVDEntry.RThroughput.hasValue()) {
+ if (IIVDEntry.RThroughput) {
double RT = IIVDEntry.RThroughput.getValue();
TempStream << format("%.2f", RT) << ' ';
if (RT < 10.0)
@@ -152,7 +152,7 @@ InstructionInfoView::toJSON(const InstructionInfoViewData &IIVD) const {
{"mayLoad", IIVD.mayLoad},
{"mayStore", IIVD.mayStore},
{"hasUnmodeledSideEffects", IIVD.hasUnmodeledSideEffects}});
- JO.try_emplace("RThroughput", IIVD.RThroughput.getValueOr(0.0));
+ JO.try_emplace("RThroughput", IIVD.RThroughput.value_or(0.0));
return JO;
}
diff --git a/llvm/tools/llvm-mca/Views/InstructionView.h b/llvm/tools/llvm-mca/Views/InstructionView.h
index cec07eef6a80..ae57246fc35f 100644
--- a/llvm/tools/llvm-mca/Views/InstructionView.h
+++ b/llvm/tools/llvm-mca/Views/InstructionView.h
@@ -17,9 +17,10 @@
#include "llvm/MCA/View.h"
#include "llvm/Support/JSON.h"
-#include "llvm/Support/raw_ostream.h"
namespace llvm {
+class MCInstPrinter;
+
namespace mca {
// The base class for views that deal with individual machine instructions.
diff --git a/llvm/tools/llvm-mca/Views/SchedulerStatistics.cpp b/llvm/tools/llvm-mca/Views/SchedulerStatistics.cpp
index 7a341d4c2079..06caeda344c8 100644
--- a/llvm/tools/llvm-mca/Views/SchedulerStatistics.cpp
+++ b/llvm/tools/llvm-mca/Views/SchedulerStatistics.cpp
@@ -48,23 +48,23 @@ void SchedulerStatistics::onEvent(const HWInstructionEvent &Event) {
} else if (Event.Type == HWInstructionEvent::Dispatched) {
const Instruction &Inst = *Event.IR.getInstruction();
const unsigned Index = Event.IR.getSourceIndex();
- if (LQResourceID && Inst.getDesc().MayLoad &&
+ if (LQResourceID && Inst.getMayLoad() &&
MostRecentLoadDispatched != Index) {
Usage[LQResourceID].SlotsInUse++;
MostRecentLoadDispatched = Index;
}
- if (SQResourceID && Inst.getDesc().MayStore &&
+ if (SQResourceID && Inst.getMayStore() &&
MostRecentStoreDispatched != Index) {
Usage[SQResourceID].SlotsInUse++;
MostRecentStoreDispatched = Index;
}
} else if (Event.Type == HWInstructionEvent::Executed) {
const Instruction &Inst = *Event.IR.getInstruction();
- if (LQResourceID && Inst.getDesc().MayLoad) {
+ if (LQResourceID && Inst.getMayLoad()) {
assert(Usage[LQResourceID].SlotsInUse);
Usage[LQResourceID].SlotsInUse--;
}
- if (SQResourceID && Inst.getDesc().MayStore) {
+ if (SQResourceID && Inst.getMayStore()) {
assert(Usage[SQResourceID].SlotsInUse);
Usage[SQResourceID].SlotsInUse--;
}
diff --git a/llvm/tools/llvm-mca/llvm-mca.cpp b/llvm/tools/llvm-mca/llvm-mca.cpp
index 1826491f3f30..409de283e5a1 100644
--- a/llvm/tools/llvm-mca/llvm-mca.cpp
+++ b/llvm/tools/llvm-mca/llvm-mca.cpp
@@ -465,6 +465,21 @@ int main(int argc, char **argv) {
const MCSchedModel &SM = STI->getSchedModel();
+ std::unique_ptr<mca::InstrPostProcess> IPP;
+ if (!DisableCustomBehaviour) {
+ // TODO: It may be a good idea to separate CB and IPP so that they can
+ // be used independently of each other. What I mean by this is to add
+ // an extra command-line arg --disable-ipp so that CB and IPP can be
+ // toggled without needing to toggle both of them together.
+ IPP = std::unique_ptr<mca::InstrPostProcess>(
+ TheTarget->createInstrPostProcess(*STI, *MCII));
+ }
+ if (!IPP) {
+ // If the target doesn't have its own IPP implemented (or the -disable-cb
+ // flag is set) then we use the base class (which does nothing).
+ IPP = std::make_unique<mca::InstrPostProcess>(*STI, *MCII);
+ }
+
// Create an instruction builder.
mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get());
@@ -479,7 +494,7 @@ int main(int argc, char **argv) {
unsigned RegionIdx = 0;
std::unique_ptr<MCCodeEmitter> MCE(
- TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx));
+ TheTarget->createMCCodeEmitter(*MCII, Ctx));
assert(MCE && "Unable to create code emitter!");
std::unique_ptr<MCAsmBackend> MAB(TheTarget->createMCAsmBackend(
@@ -498,16 +513,7 @@ int main(int argc, char **argv) {
ArrayRef<MCInst> Insts = Region->getInstructions();
mca::CodeEmitter CE(*STI, *MAB, *MCE, Insts);
- std::unique_ptr<mca::InstrPostProcess> IPP;
- if (!DisableCustomBehaviour) {
- IPP = std::unique_ptr<mca::InstrPostProcess>(
- TheTarget->createInstrPostProcess(*STI, *MCII));
- }
- if (!IPP)
- // If the target doesn't have its own IPP implemented (or the
- // -disable-cb flag is set) then we use the base class
- // (which does nothing).
- IPP = std::make_unique<mca::InstrPostProcess>(*STI, *MCII);
+ IPP->resetState();
SmallVector<std::unique_ptr<mca::Instruction>> LoweredSequence;
for (const MCInst &MCI : Insts) {
@@ -536,7 +542,8 @@ int main(int argc, char **argv) {
LoweredSequence.emplace_back(std::move(Inst.get()));
}
- mca::SourceMgr S(LoweredSequence, PrintInstructionTables ? 1 : Iterations);
+ mca::CircularSourceMgr S(LoweredSequence,
+ PrintInstructionTables ? 1 : Iterations);
if (PrintInstructionTables) {
// Create a pipeline, stages, and a printer.
diff --git a/llvm/tools/llvm-modextract/llvm-modextract.cpp b/llvm/tools/llvm-modextract/llvm-modextract.cpp
index b1d6bfb790ec..50f503ae0ac4 100644
--- a/llvm/tools/llvm-modextract/llvm-modextract.cpp
+++ b/llvm/tools/llvm-modextract/llvm-modextract.cpp
@@ -17,6 +17,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/WithColor.h"
diff --git a/llvm/tools/llvm-nm/Opts.td b/llvm/tools/llvm-nm/Opts.td
index 3a790890909a..60ac134269b3 100644
--- a/llvm/tools/llvm-nm/Opts.td
+++ b/llvm/tools/llvm-nm/Opts.td
@@ -13,10 +13,12 @@ multiclass Eq<string name, string help> {
def : Separate<["--"], name>, Alias<!cast<Joined>(NAME #_EQ)>;
}
+def X : JoinedOrSeparate<["-"], "X">, HelpText<"Specifies the type of ELF, XCOFF, or IR object file to examine. The value must be one of: 32, 64, 32_64, any (default)">;
def debug_syms : FF<"debug-syms", "Show all symbols, even debugger only">;
def defined_only : FF<"defined-only", "Show only defined symbols">;
defm demangle : BB<"demangle", "Demangle C++ symbol names", "Don't demangle symbol names">;
def dynamic : FF<"dynamic", "Display dynamic symbols instead of normal symbols">;
+def export_symbols : FF<"export-symbols", "Export symbol list for all inputs">;
def extern_only : FF<"extern-only", "Show only external symbols">;
defm format : Eq<"format", "Specify output format: bsd (default), posix, sysv, darwin, just-symbols">, MetaVarName<"<format>">;
def help : FF<"help", "Display this help">;
@@ -48,6 +50,11 @@ def no_dyldinfo : FF<"no-dyldinfo", "Don't add any symbols from the dyldinfo">,
def s : F<"s", "Dump only symbols from this segment and section name">, Group<grp_mach_o>;
def x : F<"x", "Print symbol entry in hex">, Group<grp_mach_o>;
+// XCOFF specific options.
+def grp_xcoff_o : OptionGroup<"kind">, HelpText<"llvm-nm XCOFF Specific Options">;
+
+def no_rsrc : FF<"no-rsrc", "Exclude resource file symbols (__rsrc) from the export symbol list.">, Group<grp_xcoff_o>;
+
def : FF<"just-symbol-name", "Alias for --format=just-symbols">, Alias<format_EQ>, AliasArgs<["just-symbols"]>, Flags<[HelpHidden]>;
def : FF<"portability", "Alias for --format=posix">, Alias<format_EQ>, AliasArgs<["posix"]>;
@@ -70,7 +77,7 @@ def : F<"r", "Alias for --reverse-sort">, Alias<reverse_sort>;
def : F<"S", "Alias for --print-size">, Alias<print_size>;
def : JoinedOrSeparate<["-"], "t">, HelpText<"Alias for --radix">, Alias<radix_EQ>, MetaVarName<"<radix>">;
def : F<"u", "Alias for --undefined-only">, Alias<undefined_only>;
-def : F<"U", "Deprecated alias for --defined-only">, Alias<defined_only>, Flags<[HelpHidden]>;
+def : F<"U", "Alias for --defined-only">, Alias<defined_only>;
def : F<"v", "Alias for --numeric-sort">, Alias<numeric_sort>;
def : F<"V", "Alias for --version">, Alias<version>;
-def : F<"W", "Deprecated alias for --no-weak">, Alias<no_weak>, Flags<[HelpHidden]>;
+def : F<"W", "Alias for --no-weak">, Alias<no_weak>;
diff --git a/llvm/tools/llvm-nm/llvm-nm.cpp b/llvm/tools/llvm-nm/llvm-nm.cpp
index f1d8b0026429..f0def8b74e60 100644
--- a/llvm/tools/llvm-nm/llvm-nm.cpp
+++ b/llvm/tools/llvm-nm/llvm-nm.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/StringSwitch.h"
#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/BinaryFormat/XCOFF.h"
#include "llvm/Demangle/Demangle.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
@@ -83,13 +84,16 @@ public:
};
enum OutputFormatTy { bsd, sysv, posix, darwin, just_symbols };
+enum class BitModeTy { Bit32, Bit64, Bit32_64, Any };
} // namespace
static bool ArchiveMap;
+static BitModeTy BitMode;
static bool DebugSyms;
static bool DefinedOnly;
static bool Demangle;
static bool DynamicSyms;
+static bool ExportSymbols;
static bool ExternalOnly;
static OutputFormatTy OutputFormat;
static bool NoLLVMBitcode;
@@ -105,6 +109,9 @@ static bool SizeSort;
static bool UndefinedOnly;
static bool WithoutAliases;
+// XCOFF-specific options.
+static bool NoRsrc;
+
namespace {
enum Radix { d, o, x };
} // namespace
@@ -128,7 +135,8 @@ static bool HadError = false;
static StringRef ToolName;
-static void warn(Error Err, Twine FileName, Twine Context = Twine()) {
+static void warn(Error Err, Twine FileName, Twine Context = Twine(),
+ Twine Archive = Twine()) {
assert(Err);
// Flush the standard output so that the warning isn't interleaved with other
@@ -137,8 +145,9 @@ static void warn(Error Err, Twine FileName, Twine Context = Twine()) {
handleAllErrors(std::move(Err), [&](const ErrorInfoBase &EI) {
WithColor::warning(errs(), ToolName)
- << FileName << ": " << (Context.str().empty() ? "" : Context + ": ")
- << EI.message() << "\n";
+ << (Archive.str().empty() ? FileName : Archive + "(" + FileName + ")")
+ << ": " << (Context.str().empty() ? "" : Context + ": ") << EI.message()
+ << "\n";
});
}
@@ -211,6 +220,8 @@ struct NMSymbol {
StringRef SectionName;
StringRef TypeName;
BasicSymbolRef Sym;
+ StringRef Visibility;
+
// The Sym field above points to the native symbol in the object file,
// for Mach-O when we are creating symbols from the dyld info the above
// pointer is null as there is no native symbol. In these cases the fields
@@ -222,40 +233,59 @@ struct NMSymbol {
uint8_t NSect;
uint16_t NDesc;
std::string IndirectName;
-};
-} // anonymous namespace
-static bool compareSymbolAddress(const NMSymbol &A, const NMSymbol &B) {
- bool ADefined;
- // Symbol flags have been checked in the caller.
- if (A.Sym.getRawDataRefImpl().p) {
- uint32_t AFlags = cantFail(A.Sym.getFlags());
- ADefined = !(AFlags & SymbolRef::SF_Undefined);
- } else {
- ADefined = A.TypeChar != 'U';
+ bool isDefined() const {
+ if (Sym.getRawDataRefImpl().p) {
+ uint32_t Flags = cantFail(Sym.getFlags());
+ return !(Flags & SymbolRef::SF_Undefined);
+ }
+ return TypeChar != 'U';
}
- bool BDefined;
- // Symbol flags have been checked in the caller.
- if (B.Sym.getRawDataRefImpl().p) {
- uint32_t BFlags = cantFail(B.Sym.getFlags());
- BDefined = !(BFlags & SymbolRef::SF_Undefined);
- } else {
- BDefined = B.TypeChar != 'U';
+
+ bool initializeFlags(const SymbolicFile &Obj) {
+ Expected<uint32_t> SymFlagsOrErr = Sym.getFlags();
+ if (!SymFlagsOrErr) {
+ // TODO: Test this error.
+ error(SymFlagsOrErr.takeError(), Obj.getFileName());
+ return false;
+ }
+ SymFlags = *SymFlagsOrErr;
+ return true;
}
- return std::make_tuple(ADefined, A.Address, A.Name, A.Size) <
- std::make_tuple(BDefined, B.Address, B.Name, B.Size);
-}
-static bool compareSymbolSize(const NMSymbol &A, const NMSymbol &B) {
- return std::make_tuple(A.Size, A.Name, A.Address) <
- std::make_tuple(B.Size, B.Name, B.Address);
-}
+ bool shouldPrint() const {
+ bool Undefined = SymFlags & SymbolRef::SF_Undefined;
+ bool Global = SymFlags & SymbolRef::SF_Global;
+ bool Weak = SymFlags & SymbolRef::SF_Weak;
+ bool FormatSpecific = SymFlags & SymbolRef::SF_FormatSpecific;
+ if ((!Undefined && UndefinedOnly) || (Undefined && DefinedOnly) ||
+ (!Global && ExternalOnly) || (Weak && NoWeakSymbols) ||
+ (FormatSpecific && !(SpecialSyms || DebugSyms)))
+ return false;
+ return true;
+ }
+};
-static bool compareSymbolName(const NMSymbol &A, const NMSymbol &B) {
+bool operator<(const NMSymbol &A, const NMSymbol &B) {
+ if (NumericSort)
+ return std::make_tuple(A.isDefined(), A.Address, A.Name, A.Size) <
+ std::make_tuple(B.isDefined(), B.Address, B.Name, B.Size);
+ if (SizeSort)
+ return std::make_tuple(A.Size, A.Name, A.Address) <
+ std::make_tuple(B.Size, B.Name, B.Address);
+ if (ExportSymbols)
+ return std::make_tuple(A.Name, A.Visibility) <
+ std::make_tuple(B.Name, B.Visibility);
return std::make_tuple(A.Name, A.Size, A.Address) <
std::make_tuple(B.Name, B.Size, B.Address);
}
+bool operator>(const NMSymbol &A, const NMSymbol &B) { return B < A; }
+bool operator==(const NMSymbol &A, const NMSymbol &B) {
+ return !(A < B) && !(B < A);
+}
+} // anonymous namespace
+
static char isSymbolList64Bit(SymbolicFile &Obj) {
if (auto *IRObj = dyn_cast<IRObjectFile>(&Obj))
return Triple(IRObj->getTargetTriple()).isArch64Bit();
@@ -263,7 +293,6 @@ static char isSymbolList64Bit(SymbolicFile &Obj) {
return false;
if (XCOFFObjectFile *XCOFFObj = dyn_cast<XCOFFObjectFile>(&Obj))
return XCOFFObj->is64Bit();
-
if (isa<WasmObjectFile>(Obj))
return false;
if (TapiFile *Tapi = dyn_cast<TapiFile>(&Obj))
@@ -274,7 +303,6 @@ static char isSymbolList64Bit(SymbolicFile &Obj) {
}
static StringRef CurrentFilename;
-static std::vector<NMSymbol> SymbolList;
static char getSymbolNMTypeChar(IRObjectFile &Obj, basic_symbol_iterator I);
@@ -658,27 +686,28 @@ static void writeFileName(raw_ostream &S, StringRef ArchiveName,
}
}
-static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName,
- StringRef ArchiveName,
- StringRef ArchitectureName) {
- if (!NoSort) {
- using Comparator = bool (*)(const NMSymbol &, const NMSymbol &);
- Comparator Cmp;
- if (NumericSort)
- Cmp = &compareSymbolAddress;
- else if (SizeSort)
- Cmp = &compareSymbolSize;
- else
- Cmp = &compareSymbolName;
+static void sortSymbolList(std::vector<NMSymbol> &SymbolList) {
+ if (NoSort)
+ return;
- if (ReverseSort)
- llvm::sort(SymbolList, [=](const NMSymbol &A, const NMSymbol &B) -> bool {
- return Cmp(B, A);
- });
- else
- llvm::sort(SymbolList, Cmp);
+ if (ReverseSort)
+ llvm::sort(SymbolList, std::greater<>());
+ else
+ llvm::sort(SymbolList);
+}
+
+static void printExportSymbolList(const std::vector<NMSymbol> &SymbolList) {
+ for (const NMSymbol &Sym : SymbolList) {
+ outs() << Sym.Name;
+ if (!Sym.Visibility.empty())
+ outs() << ' ' << Sym.Visibility;
+ outs() << '\n';
}
+}
+static void printSymbolList(SymbolicFile &Obj,
+ std::vector<NMSymbol> &SymbolList, bool printName,
+ StringRef ArchiveName, StringRef ArchitectureName) {
if (!PrintFileName) {
if ((OutputFormat == bsd || OutputFormat == posix ||
OutputFormat == just_symbols) &&
@@ -725,7 +754,9 @@ static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName,
}
for (const NMSymbol &S : SymbolList) {
- uint32_t SymFlags;
+ if (!S.shouldPrint())
+ continue;
+
std::string Name = S.Name;
MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(&Obj);
if (Demangle) {
@@ -737,25 +768,7 @@ static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName,
if (Optional<std::string> Opt = Fn(S.Name))
Name = *Opt;
}
- if (S.Sym.getRawDataRefImpl().p) {
- Expected<uint32_t> SymFlagsOrErr = S.Sym.getFlags();
- if (!SymFlagsOrErr) {
- // TODO: Test this error.
- error(SymFlagsOrErr.takeError(), Obj.getFileName());
- return;
- }
- SymFlags = *SymFlagsOrErr;
- } else
- SymFlags = S.SymFlags;
- bool Undefined = SymFlags & SymbolRef::SF_Undefined;
- bool Global = SymFlags & SymbolRef::SF_Global;
- bool Weak = SymFlags & SymbolRef::SF_Weak;
- bool FormatSpecific = SymFlags & SymbolRef::SF_FormatSpecific;
- if ((!Undefined && UndefinedOnly) || (Undefined && DefinedOnly) ||
- (!Global && ExternalOnly) || (Weak && NoWeakSymbols) ||
- (FormatSpecific && !(SpecialSyms || DebugSyms)))
- continue;
if (PrintFileName)
writeFileName(outs(), ArchiveName, ArchitectureName);
if ((OutputFormat == just_symbols ||
@@ -1141,7 +1154,7 @@ static char getNMSectionTagAndName(SymbolicFile &Obj, basic_symbol_iterator I,
// getNsectForSegSect() is used to implement the Mach-O "-s segname sectname"
// option to dump only those symbols from that section in a Mach-O file.
-// It is called once for each Mach-O file from dumpSymbolNamesFromObject()
+// It is called once for each Mach-O file from getSymbolNamesFromObject()
// to get the section number for that named section from the command line
// arguments. It returns the section number for that section in the Mach-O
// file or zero it is not present.
@@ -1163,7 +1176,7 @@ static unsigned getNsectForSegSect(MachOObjectFile *Obj) {
// getNsectInMachO() is used to implement the Mach-O "-s segname sectname"
// option to dump only those symbols from that section in a Mach-O file.
// It is called once for each symbol in a Mach-O file from
-// dumpSymbolNamesFromObject() and returns the section number for that symbol
+// getSymbolNamesFromObject() and returns the section number for that symbol
// if it is in a section, else it returns 0.
static unsigned getNsectInMachO(MachOObjectFile &Obj, BasicSymbolRef Sym) {
DataRefImpl Symb = Sym.getRawDataRefImpl();
@@ -1175,7 +1188,8 @@ static unsigned getNsectInMachO(MachOObjectFile &Obj, BasicSymbolRef Sym) {
return (STE.n_type & MachO::N_TYPE) == MachO::N_SECT ? STE.n_sect : 0;
}
-static void dumpSymbolsFromDLInfoMachO(MachOObjectFile &MachO) {
+static void dumpSymbolsFromDLInfoMachO(MachOObjectFile &MachO,
+ std::vector<NMSymbol> &SymbolList) {
size_t I = SymbolList.size();
std::string ExportsNameBuffer;
raw_string_ostream EOS(ExportsNameBuffer);
@@ -1642,28 +1656,127 @@ static void dumpSymbolsFromDLInfoMachO(MachOObjectFile &MachO) {
}
}
-static void dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
- StringRef ArchiveName = {},
- StringRef ArchitectureName = {}) {
+static bool shouldDump(SymbolicFile &Obj) {
+ // The -X option is currently only implemented for XCOFF, ELF, and IR object
+ // files. The option isn't fundamentally impossible with other formats, just
+ // isn't implemented.
+ if (!isa<XCOFFObjectFile>(Obj) && !isa<ELFObjectFileBase>(Obj) &&
+ !isa<IRObjectFile>(Obj))
+ return true;
+
+ return isSymbolList64Bit(Obj) ? BitMode != BitModeTy::Bit32
+ : BitMode != BitModeTy::Bit64;
+}
+
+static void getXCOFFExports(XCOFFObjectFile *XCOFFObj,
+ std::vector<NMSymbol> &SymbolList,
+ StringRef ArchiveName) {
+ // Skip Shared object file.
+ if (XCOFFObj->getFlags() & XCOFF::F_SHROBJ)
+ return;
+
+ for (SymbolRef Sym : XCOFFObj->symbols()) {
+ // There is no visibility in old 32 bit XCOFF object file interpret.
+ bool HasVisibilityAttr =
+ XCOFFObj->is64Bit() || (XCOFFObj->auxiliaryHeader32() &&
+ (XCOFFObj->auxiliaryHeader32()->getVersion() ==
+ XCOFF::NEW_XCOFF_INTERPRET));
+
+ if (HasVisibilityAttr) {
+ XCOFFSymbolRef XCOFFSym = XCOFFObj->toSymbolRef(Sym.getRawDataRefImpl());
+ uint16_t SymType = XCOFFSym.getSymbolType();
+ if ((SymType & XCOFF::VISIBILITY_MASK) == XCOFF::SYM_V_INTERNAL)
+ continue;
+ if ((SymType & XCOFF::VISIBILITY_MASK) == XCOFF::SYM_V_HIDDEN)
+ continue;
+ }
+
+ Expected<section_iterator> SymSecOrErr = Sym.getSection();
+ if (!SymSecOrErr) {
+ warn(SymSecOrErr.takeError(), XCOFFObj->getFileName(),
+ "for symbol with index " +
+ Twine(XCOFFObj->getSymbolIndex(Sym.getRawDataRefImpl().p)),
+ ArchiveName);
+ continue;
+ }
+ section_iterator SecIter = *SymSecOrErr;
+ // If the symbol is not in a text or data section, it is not exported.
+ if (SecIter == XCOFFObj->section_end())
+ continue;
+ if (!(SecIter->isText() || SecIter->isData() || SecIter->isBSS()))
+ continue;
+
+ StringRef SymName = cantFail(Sym.getName());
+ if (SymName.empty())
+ continue;
+ if (SymName.startswith("__sinit") || SymName.startswith("__sterm") ||
+ SymName.front() == '.' || SymName.front() == '(')
+ continue;
+
+ // Check the SymName regex matching with "^__[0-9]+__".
+ if (SymName.size() > 4 && SymName.startswith("__") &&
+ SymName.endswith("__")) {
+ if (std::all_of(SymName.begin() + 2, SymName.end() - 2, isDigit))
+ continue;
+ }
+
+ if (SymName == "__rsrc" && NoRsrc)
+ continue;
+
+ if (SymName.startswith("__tf1"))
+ SymName = SymName.substr(6);
+ else if (SymName.startswith("__tf9"))
+ SymName = SymName.substr(14);
+
+ NMSymbol S = {};
+ S.Name = SymName.str();
+ S.Sym = Sym;
+
+ if (HasVisibilityAttr) {
+ XCOFFSymbolRef XCOFFSym = XCOFFObj->toSymbolRef(Sym.getRawDataRefImpl());
+ uint16_t SymType = XCOFFSym.getSymbolType();
+ if ((SymType & XCOFF::VISIBILITY_MASK) == XCOFF::SYM_V_PROTECTED)
+ S.Visibility = "protected";
+ else if ((SymType & XCOFF::VISIBILITY_MASK) == XCOFF::SYM_V_EXPORTED)
+ S.Visibility = "export";
+ }
+ if (S.initializeFlags(*XCOFFObj))
+ SymbolList.push_back(S);
+ }
+}
+
+static Expected<SymbolicFile::basic_symbol_iterator_range>
+getDynamicSyms(SymbolicFile &Obj) {
+ const auto *E = dyn_cast<ELFObjectFileBase>(&Obj);
+ if (!E)
+ return createError("File format has no dynamic symbol table");
+ return E->getDynamicSymbolIterators();
+}
+
+// Returns false if there is error found or true otherwise.
+static bool getSymbolNamesFromObject(SymbolicFile &Obj,
+ std::vector<NMSymbol> &SymbolList) {
auto Symbols = Obj.symbols();
std::vector<VersionEntry> SymbolVersions;
+
if (DynamicSyms) {
- const auto *E = dyn_cast<ELFObjectFileBase>(&Obj);
- if (!E) {
- error("File format has no dynamic symbol table", Obj.getFileName());
- return;
+ Expected<SymbolicFile::basic_symbol_iterator_range> SymbolsOrErr =
+ getDynamicSyms(Obj);
+ if (!SymbolsOrErr) {
+ error(SymbolsOrErr.takeError(), Obj.getFileName());
+ return false;
+ }
+ Symbols = *SymbolsOrErr;
+ if (const auto *E = dyn_cast<ELFObjectFileBase>(&Obj)) {
+ if (Expected<std::vector<VersionEntry>> VersionsOrErr =
+ E->readDynsymVersions())
+ SymbolVersions = std::move(*VersionsOrErr);
+ else
+ WithColor::warning(errs(), ToolName)
+ << "unable to read symbol versions: "
+ << toString(VersionsOrErr.takeError()) << "\n";
}
- Symbols = E->getDynamicSymbolIterators();
-
- if (Expected<std::vector<VersionEntry>> VersionsOrErr =
- E->readDynsymVersions())
- SymbolVersions = std::move(*VersionsOrErr);
- else
- WithColor::warning(errs(), ToolName)
- << "unable to read symbol versions: "
- << toString(VersionsOrErr.takeError()) << "\n";
}
-
// If a "-s segname sectname" option was specified and this is a Mach-O
// file get the section number for that section in this object file.
unsigned int Nsect = 0;
@@ -1672,8 +1785,9 @@ static void dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
Nsect = getNsectForSegSect(MachO);
// If this section is not in the object file no symbols are printed.
if (Nsect == 0)
- return;
+ return false;
}
+
if (!(MachO && DyldInfoOnly)) {
size_t I = -1;
for (BasicSymbolRef Sym : Symbols) {
@@ -1681,7 +1795,7 @@ static void dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
Expected<uint32_t> SymFlagsOrErr = Sym.getFlags();
if (!SymFlagsOrErr) {
error(SymFlagsOrErr.takeError(), Obj.getFileName());
- return;
+ return false;
}
// Don't drop format specifc symbols for ARM and AArch64 ELF targets, they
@@ -1734,7 +1848,8 @@ static void dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
(SymbolVersions[I].IsVerDef ? "@@" : "@") + SymbolVersions[I].Name;
S.Sym = Sym;
- SymbolList.push_back(S);
+ if (S.initializeFlags(Obj))
+ SymbolList.push_back(S);
}
}
@@ -1745,16 +1860,66 @@ static void dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
// language symbols for example. The option -only-dyldinfo will fake up
// all symbols from the dyld export trie as well as the bind info.
if (MachO && !NoDyldInfo)
- dumpSymbolsFromDLInfoMachO(*MachO);
+ dumpSymbolsFromDLInfoMachO(*MachO, SymbolList);
+ return true;
+}
+
+static void printObjectLabel(bool PrintArchiveName, StringRef ArchiveName,
+ StringRef ArchitectureName,
+ StringRef ObjectFileName) {
+ outs() << "\n";
+ if (ArchiveName.empty() || !PrintArchiveName)
+ outs() << ObjectFileName;
+ else
+ outs() << ArchiveName << "(" << ObjectFileName << ")";
+ if (!ArchitectureName.empty())
+ outs() << " (for architecture " << ArchitectureName << ")";
+ outs() << ":\n";
+}
+
+static Expected<bool> hasSymbols(SymbolicFile &Obj) {
+ if (DynamicSyms) {
+ Expected<SymbolicFile::basic_symbol_iterator_range> DynamicSymsOrErr =
+ getDynamicSyms(Obj);
+ if (!DynamicSymsOrErr)
+ return DynamicSymsOrErr.takeError();
+ return !DynamicSymsOrErr->empty();
+ }
+ return !Obj.symbols().empty();
+}
+
+static void dumpSymbolNamesFromObject(
+ SymbolicFile &Obj, std::vector<NMSymbol> &SymbolList,
+ bool PrintSymbolObject, bool PrintObjectLabel, StringRef ArchiveName = {},
+ StringRef ArchitectureName = {}, StringRef ObjectName = {},
+ bool PrintArchiveName = true) {
+ if (!shouldDump(Obj))
+ return;
+
+ if (ExportSymbols && Obj.isXCOFF()) {
+ XCOFFObjectFile *XCOFFObj = cast<XCOFFObjectFile>(&Obj);
+ getXCOFFExports(XCOFFObj, SymbolList, ArchiveName);
+ return;
+ }
+
+ if (PrintObjectLabel && !ExportSymbols)
+ printObjectLabel(PrintArchiveName, ArchiveName, ArchitectureName,
+ ObjectName.empty() ? Obj.getFileName() : ObjectName);
+ if (!getSymbolNamesFromObject(Obj, SymbolList) || ExportSymbols)
+ return;
CurrentFilename = Obj.getFileName();
- if (Symbols.empty() && SymbolList.empty() && !Quiet) {
+ // If there is an error in hasSymbols(), the error should be encountered in
+ // function getSymbolNamesFromObject first.
+ if (!cantFail(hasSymbols(Obj)) && SymbolList.empty() && !Quiet) {
writeFileName(errs(), ArchiveName, ArchitectureName);
errs() << "no symbols\n";
}
- sortAndPrintSymbolList(Obj, printName, ArchiveName, ArchitectureName);
+ sortSymbolList(SymbolList);
+ printSymbolList(Obj, SymbolList, PrintSymbolObject, ArchiveName,
+ ArchitectureName);
}
// checkMachOAndArchFlags() checks to see if the SymbolicFile is a Mach-O file
@@ -1762,7 +1927,7 @@ static void dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
// check to make sure this Mach-O file is one of those architectures or all
// architectures was specificed. If not then an error is generated and this
// routine returns false. Else it returns true.
-static bool checkMachOAndArchFlags(SymbolicFile *O, std::string &Filename) {
+static bool checkMachOAndArchFlags(SymbolicFile *O, StringRef Filename) {
auto *MachO = dyn_cast<MachOObjectFile>(O);
if (!MachO || ArchAll || ArchFlags.empty())
@@ -1789,282 +1954,172 @@ static bool checkMachOAndArchFlags(SymbolicFile *O, std::string &Filename) {
return true;
}
-static void dumpSymbolNamesFromFile(std::string &Filename) {
- ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
- MemoryBuffer::getFileOrSTDIN(Filename);
- if (error(BufferOrErr.getError(), Filename))
- return;
-
- LLVMContext Context;
- LLVMContext *ContextPtr = NoLLVMBitcode ? nullptr : &Context;
- Expected<std::unique_ptr<Binary>> BinaryOrErr =
- createBinary(BufferOrErr.get()->getMemBufferRef(), ContextPtr);
- if (!BinaryOrErr) {
- error(BinaryOrErr.takeError(), Filename);
- return;
- }
- Binary &Bin = *BinaryOrErr.get();
-
- if (Archive *A = dyn_cast<Archive>(&Bin)) {
- if (ArchiveMap) {
- Archive::symbol_iterator I = A->symbol_begin();
- Archive::symbol_iterator E = A->symbol_end();
- if (I != E) {
- outs() << "Archive map\n";
- for (; I != E; ++I) {
- Expected<Archive::Child> C = I->getMember();
- if (!C) {
- error(C.takeError(), Filename);
- break;
- }
- Expected<StringRef> FileNameOrErr = C->getName();
- if (!FileNameOrErr) {
- error(FileNameOrErr.takeError(), Filename);
- break;
- }
- StringRef SymName = I->getName();
- outs() << SymName << " in " << FileNameOrErr.get() << "\n";
- }
- outs() << "\n";
+static void dumpArchiveMap(Archive *A, StringRef Filename) {
+ Archive::symbol_iterator I = A->symbol_begin();
+ Archive::symbol_iterator E = A->symbol_end();
+ if (I != E) {
+ outs() << "Archive map\n";
+ for (; I != E; ++I) {
+ Expected<Archive::Child> C = I->getMember();
+ if (!C) {
+ error(C.takeError(), Filename);
+ break;
}
+ Expected<StringRef> FileNameOrErr = C->getName();
+ if (!FileNameOrErr) {
+ error(FileNameOrErr.takeError(), Filename);
+ break;
+ }
+ StringRef SymName = I->getName();
+ outs() << SymName << " in " << FileNameOrErr.get() << "\n";
}
+ outs() << "\n";
+ }
+}
- {
- Error Err = Error::success();
- for (auto &C : A->children(Err)) {
- Expected<std::unique_ptr<Binary>> ChildOrErr =
- C.getAsBinary(ContextPtr);
- if (!ChildOrErr) {
- if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
- error(std::move(E), Filename, C);
- continue;
- }
- if (SymbolicFile *O = dyn_cast<SymbolicFile>(&*ChildOrErr.get())) {
- if (!MachOPrintSizeWarning && PrintSize && isa<MachOObjectFile>(O)) {
- WithColor::warning(errs(), ToolName)
- << "sizes with -print-size for Mach-O files are always zero.\n";
- MachOPrintSizeWarning = true;
- }
- if (!checkMachOAndArchFlags(O, Filename))
- return;
- if (!PrintFileName) {
- outs() << "\n";
- if (isa<MachOObjectFile>(O)) {
- outs() << Filename << "(" << O->getFileName() << ")";
- } else
- outs() << O->getFileName();
- outs() << ":\n";
- }
- dumpSymbolNamesFromObject(*O, false, Filename);
- }
+static void dumpArchive(Archive *A, std::vector<NMSymbol> &SymbolList,
+ StringRef Filename, LLVMContext *ContextPtr) {
+ if (ArchiveMap)
+ dumpArchiveMap(A, Filename);
+
+ Error Err = Error::success();
+ for (auto &C : A->children(Err)) {
+ Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary(ContextPtr);
+ if (!ChildOrErr) {
+ if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
+ error(std::move(E), Filename, C);
+ continue;
+ }
+ if (SymbolicFile *O = dyn_cast<SymbolicFile>(&*ChildOrErr.get())) {
+ if (!MachOPrintSizeWarning && PrintSize && isa<MachOObjectFile>(O)) {
+ WithColor::warning(errs(), ToolName)
+ << "sizes with -print-size for Mach-O files are always zero.\n";
+ MachOPrintSizeWarning = true;
}
- if (Err)
- error(std::move(Err), A->getFileName());
+ if (!checkMachOAndArchFlags(O, Filename))
+ return;
+ dumpSymbolNamesFromObject(*O, SymbolList, /*PrintSymbolObject=*/false,
+ !PrintFileName, Filename,
+ /*ArchitectureName=*/{}, O->getFileName(),
+ /*PrintArchiveName=*/false);
}
- return;
}
- if (MachOUniversalBinary *UB = dyn_cast<MachOUniversalBinary>(&Bin)) {
- // If we have a list of architecture flags specified dump only those.
- if (!ArchAll && !ArchFlags.empty()) {
- // Look for a slice in the universal binary that matches each ArchFlag.
- bool ArchFound;
- for (unsigned i = 0; i < ArchFlags.size(); ++i) {
- ArchFound = false;
- for (MachOUniversalBinary::object_iterator I = UB->begin_objects(),
- E = UB->end_objects();
- I != E; ++I) {
- if (ArchFlags[i] == I->getArchFlagName()) {
- ArchFound = true;
- Expected<std::unique_ptr<ObjectFile>> ObjOrErr =
- I->getAsObjectFile();
- std::string ArchiveName;
- std::string ArchitectureName;
- ArchiveName.clear();
- ArchitectureName.clear();
- if (ObjOrErr) {
- ObjectFile &Obj = *ObjOrErr.get();
- if (ArchFlags.size() > 1) {
- if (PrintFileName)
- ArchitectureName = I->getArchFlagName();
- else
- outs() << "\n" << Obj.getFileName() << " (for architecture "
- << I->getArchFlagName() << ")"
- << ":\n";
+ if (Err)
+ error(std::move(Err), A->getFileName());
+}
+
+static void dumpMachOUniversalBinaryMatchArchFlags(
+ MachOUniversalBinary *UB, std::vector<NMSymbol> &SymbolList,
+ StringRef Filename, LLVMContext *ContextPtr) {
+ // Look for a slice in the universal binary that matches each ArchFlag.
+ bool ArchFound;
+ for (unsigned i = 0; i < ArchFlags.size(); ++i) {
+ ArchFound = false;
+ for (MachOUniversalBinary::object_iterator I = UB->begin_objects(),
+ E = UB->end_objects();
+ I != E; ++I) {
+ if (ArchFlags[i] == I->getArchFlagName()) {
+ ArchFound = true;
+ Expected<std::unique_ptr<ObjectFile>> ObjOrErr = I->getAsObjectFile();
+ std::string ArchiveName;
+ std::string ArchitectureName;
+ ArchiveName.clear();
+ ArchitectureName.clear();
+ if (ObjOrErr) {
+ ObjectFile &Obj = *ObjOrErr.get();
+ if (ArchFlags.size() > 1)
+ ArchitectureName = I->getArchFlagName();
+ dumpSymbolNamesFromObject(Obj, SymbolList,
+ /*PrintSymbolObject=*/false,
+ (ArchFlags.size() > 1) && !PrintFileName,
+ ArchiveName, ArchitectureName);
+ } else if (auto E =
+ isNotObjectErrorInvalidFileType(ObjOrErr.takeError())) {
+ error(std::move(E), Filename,
+ ArchFlags.size() > 1 ? StringRef(I->getArchFlagName())
+ : StringRef());
+ continue;
+ } else if (Expected<std::unique_ptr<Archive>> AOrErr =
+ I->getAsArchive()) {
+ std::unique_ptr<Archive> &A = *AOrErr;
+ Error Err = Error::success();
+ for (auto &C : A->children(Err)) {
+ Expected<std::unique_ptr<Binary>> ChildOrErr =
+ C.getAsBinary(ContextPtr);
+ if (!ChildOrErr) {
+ if (auto E =
+ isNotObjectErrorInvalidFileType(ChildOrErr.takeError())) {
+ error(std::move(E), Filename, C,
+ ArchFlags.size() > 1 ? StringRef(I->getArchFlagName())
+ : StringRef());
}
- dumpSymbolNamesFromObject(Obj, false, ArchiveName,
- ArchitectureName);
- } else if (auto E = isNotObjectErrorInvalidFileType(
- ObjOrErr.takeError())) {
- error(std::move(E), Filename, ArchFlags.size() > 1 ?
- StringRef(I->getArchFlagName()) : StringRef());
continue;
- } else if (Expected<std::unique_ptr<Archive>> AOrErr =
- I->getAsArchive()) {
- std::unique_ptr<Archive> &A = *AOrErr;
- Error Err = Error::success();
- for (auto &C : A->children(Err)) {
- Expected<std::unique_ptr<Binary>> ChildOrErr =
- C.getAsBinary(ContextPtr);
- if (!ChildOrErr) {
- if (auto E = isNotObjectErrorInvalidFileType(
- ChildOrErr.takeError())) {
- error(std::move(E), Filename, C, ArchFlags.size() > 1 ?
- StringRef(I->getArchFlagName()) : StringRef());
- }
- continue;
- }
- if (SymbolicFile *O =
- dyn_cast<SymbolicFile>(&*ChildOrErr.get())) {
- if (PrintFileName) {
- ArchiveName = std::string(A->getFileName());
- if (ArchFlags.size() > 1)
- ArchitectureName = I->getArchFlagName();
- } else {
- outs() << "\n" << A->getFileName();
- outs() << "(" << O->getFileName() << ")";
- if (ArchFlags.size() > 1) {
- outs() << " (for architecture " << I->getArchFlagName()
- << ")";
- }
- outs() << ":\n";
- }
- dumpSymbolNamesFromObject(*O, false, ArchiveName,
- ArchitectureName);
- }
- }
- if (Err)
- error(std::move(Err), A->getFileName());
- } else {
- consumeError(AOrErr.takeError());
- error(Filename + " for architecture " +
- StringRef(I->getArchFlagName()) +
- " is not a Mach-O file or an archive file",
- "Mach-O universal file");
}
- }
- }
- if (!ArchFound) {
- error(ArchFlags[i],
- "file: " + Filename + " does not contain architecture");
- return;
- }
- }
- return;
- }
- // No architecture flags were specified so if this contains a slice that
- // matches the host architecture dump only that.
- if (!ArchAll) {
- Triple HostTriple = MachOObjectFile::getHostArch();
- StringRef HostArchName = HostTriple.getArchName();
- for (MachOUniversalBinary::object_iterator I = UB->begin_objects(),
- E = UB->end_objects();
- I != E; ++I) {
- if (HostArchName == I->getArchFlagName()) {
- Expected<std::unique_ptr<ObjectFile>> ObjOrErr = I->getAsObjectFile();
- std::string ArchiveName;
- if (ObjOrErr) {
- ObjectFile &Obj = *ObjOrErr.get();
- dumpSymbolNamesFromObject(Obj, false);
- } else if (auto E = isNotObjectErrorInvalidFileType(
- ObjOrErr.takeError())) {
- error(std::move(E), Filename);
- return;
- } else if (Expected<std::unique_ptr<Archive>> AOrErr =
- I->getAsArchive()) {
- std::unique_ptr<Archive> &A = *AOrErr;
- Error Err = Error::success();
- for (auto &C : A->children(Err)) {
- Expected<std::unique_ptr<Binary>> ChildOrErr =
- C.getAsBinary(ContextPtr);
- if (!ChildOrErr) {
- if (auto E = isNotObjectErrorInvalidFileType(
- ChildOrErr.takeError()))
- error(std::move(E), Filename, C);
- continue;
- }
- if (SymbolicFile *O =
- dyn_cast<SymbolicFile>(&*ChildOrErr.get())) {
- if (PrintFileName)
- ArchiveName = std::string(A->getFileName());
- else
- outs() << "\n" << A->getFileName() << "(" << O->getFileName()
- << ")"
- << ":\n";
- dumpSymbolNamesFromObject(*O, false, ArchiveName);
- }
+ if (SymbolicFile *O = dyn_cast<SymbolicFile>(&*ChildOrErr.get())) {
+ ArchiveName = std::string(A->getFileName());
+ if (ArchFlags.size() > 1)
+ ArchitectureName = I->getArchFlagName();
+ dumpSymbolNamesFromObject(
+ *O, SymbolList, /*PrintSymbolObject=*/false, !PrintFileName,
+ ArchiveName, ArchitectureName);
}
- if (Err)
- error(std::move(Err), A->getFileName());
- } else {
- consumeError(AOrErr.takeError());
- error(Filename + " for architecture " +
- StringRef(I->getArchFlagName()) +
- " is not a Mach-O file or an archive file",
- "Mach-O universal file");
}
- return;
+ if (Err)
+ error(std::move(Err), A->getFileName());
+ } else {
+ consumeError(AOrErr.takeError());
+ error(Filename + " for architecture " +
+ StringRef(I->getArchFlagName()) +
+ " is not a Mach-O file or an archive file",
+ "Mach-O universal file");
}
}
}
- // Either all architectures have been specified or none have been specified
- // and this does not contain the host architecture so dump all the slices.
- bool moreThanOneArch = UB->getNumberOfObjects() > 1;
- for (const MachOUniversalBinary::ObjectForArch &O : UB->objects()) {
- Expected<std::unique_ptr<ObjectFile>> ObjOrErr = O.getAsObjectFile();
+ if (!ArchFound) {
+ error(ArchFlags[i],
+ "file: " + Filename + " does not contain architecture");
+ return;
+ }
+ }
+}
+
+// Returns true If the binary contains a slice that matches the host
+// architecture, or false otherwise.
+static bool dumpMachOUniversalBinaryMatchHost(MachOUniversalBinary *UB,
+ std::vector<NMSymbol> &SymbolList,
+ StringRef Filename,
+ LLVMContext *ContextPtr) {
+ Triple HostTriple = MachOObjectFile::getHostArch();
+ StringRef HostArchName = HostTriple.getArchName();
+ for (MachOUniversalBinary::object_iterator I = UB->begin_objects(),
+ E = UB->end_objects();
+ I != E; ++I) {
+ if (HostArchName == I->getArchFlagName()) {
+ Expected<std::unique_ptr<ObjectFile>> ObjOrErr = I->getAsObjectFile();
std::string ArchiveName;
- std::string ArchitectureName;
- ArchiveName.clear();
- ArchitectureName.clear();
if (ObjOrErr) {
ObjectFile &Obj = *ObjOrErr.get();
- if (PrintFileName) {
- if (isa<MachOObjectFile>(Obj) && moreThanOneArch)
- ArchitectureName = O.getArchFlagName();
- } else {
- if (moreThanOneArch)
- outs() << "\n";
- outs() << Obj.getFileName();
- if (isa<MachOObjectFile>(Obj) && moreThanOneArch)
- outs() << " (for architecture " << O.getArchFlagName() << ")";
- outs() << ":\n";
- }
- dumpSymbolNamesFromObject(Obj, false, ArchiveName, ArchitectureName);
- } else if (auto E = isNotObjectErrorInvalidFileType(
- ObjOrErr.takeError())) {
- error(std::move(E), Filename, moreThanOneArch ?
- StringRef(O.getArchFlagName()) : StringRef());
- continue;
- } else if (Expected<std::unique_ptr<Archive>> AOrErr =
- O.getAsArchive()) {
+ dumpSymbolNamesFromObject(Obj, SymbolList, /*PrintSymbolObject=*/false,
+ /*PrintObjectLabel=*/false);
+ } else if (auto E = isNotObjectErrorInvalidFileType(ObjOrErr.takeError()))
+ error(std::move(E), Filename);
+ else if (Expected<std::unique_ptr<Archive>> AOrErr = I->getAsArchive()) {
std::unique_ptr<Archive> &A = *AOrErr;
Error Err = Error::success();
for (auto &C : A->children(Err)) {
Expected<std::unique_ptr<Binary>> ChildOrErr =
- C.getAsBinary(ContextPtr);
+ C.getAsBinary(ContextPtr);
if (!ChildOrErr) {
- if (auto E = isNotObjectErrorInvalidFileType(
- ChildOrErr.takeError()))
- error(std::move(E), Filename, C, moreThanOneArch ?
- StringRef(ArchitectureName) : StringRef());
+ if (auto E =
+ isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
+ error(std::move(E), Filename, C);
continue;
}
- if (SymbolicFile *F = dyn_cast<SymbolicFile>(&*ChildOrErr.get())) {
- if (PrintFileName) {
- ArchiveName = std::string(A->getFileName());
- if (isa<MachOObjectFile>(F) && moreThanOneArch)
- ArchitectureName = O.getArchFlagName();
- } else {
- outs() << "\n" << A->getFileName();
- if (isa<MachOObjectFile>(F)) {
- outs() << "(" << F->getFileName() << ")";
- if (moreThanOneArch)
- outs() << " (for architecture " << O.getArchFlagName()
- << ")";
- } else
- outs() << ":" << F->getFileName();
- outs() << ":\n";
- }
- dumpSymbolNamesFromObject(*F, false, ArchiveName, ArchitectureName);
+ if (SymbolicFile *O = dyn_cast<SymbolicFile>(&*ChildOrErr.get())) {
+ ArchiveName = std::string(A->getFileName());
+ dumpSymbolNamesFromObject(*O, SymbolList,
+ /*PrintSymbolObject=*/false,
+ !PrintFileName, ArchiveName);
}
}
if (Err)
@@ -2072,49 +2127,176 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
} else {
consumeError(AOrErr.takeError());
error(Filename + " for architecture " +
- StringRef(O.getArchFlagName()) +
- " is not a Mach-O file or an archive file",
+ StringRef(I->getArchFlagName()) +
+ " is not a Mach-O file or an archive file",
"Mach-O universal file");
}
+ return true;
}
- return;
}
+ return false;
+}
- if (TapiUniversal *TU = dyn_cast<TapiUniversal>(&Bin)) {
- for (const TapiUniversal::ObjectForArch &I : TU->objects()) {
- StringRef ArchName = I.getArchFlagName();
- const bool ShowArch =
- ArchFlags.empty() || llvm::is_contained(ArchFlags, ArchName);
- if (!ShowArch)
- continue;
- if (!AddInlinedInfo && !I.isTopLevelLib())
- continue;
- if (auto ObjOrErr = I.getAsObjectFile()) {
- outs() << "\n"
- << I.getInstallName() << " (for architecture " << ArchName << ")"
- << ":\n";
- dumpSymbolNamesFromObject(*ObjOrErr.get(), false, {}, ArchName);
- } else if (Error E =
- isNotObjectErrorInvalidFileType(ObjOrErr.takeError())) {
- error(std::move(E), Filename, ArchName);
+static void dumpMachOUniversalBinaryArchAll(MachOUniversalBinary *UB,
+ std::vector<NMSymbol> &SymbolList,
+ StringRef Filename,
+ LLVMContext *ContextPtr) {
+ bool moreThanOneArch = UB->getNumberOfObjects() > 1;
+ for (const MachOUniversalBinary::ObjectForArch &O : UB->objects()) {
+ Expected<std::unique_ptr<ObjectFile>> ObjOrErr = O.getAsObjectFile();
+ std::string ArchiveName;
+ std::string ArchitectureName;
+ ArchiveName.clear();
+ ArchitectureName.clear();
+ if (ObjOrErr) {
+ ObjectFile &Obj = *ObjOrErr.get();
+ if (isa<MachOObjectFile>(Obj) && moreThanOneArch)
+ ArchitectureName = O.getArchFlagName();
+ dumpSymbolNamesFromObject(Obj, SymbolList, /*PrintSymbolObject=*/false,
+ !PrintFileName, ArchiveName, ArchitectureName);
+ } else if (auto E = isNotObjectErrorInvalidFileType(ObjOrErr.takeError())) {
+ error(std::move(E), Filename,
+ moreThanOneArch ? StringRef(O.getArchFlagName()) : StringRef());
+ continue;
+ } else if (Expected<std::unique_ptr<Archive>> AOrErr = O.getAsArchive()) {
+ std::unique_ptr<Archive> &A = *AOrErr;
+ Error Err = Error::success();
+ for (auto &C : A->children(Err)) {
+ Expected<std::unique_ptr<Binary>> ChildOrErr =
+ C.getAsBinary(ContextPtr);
+ if (!ChildOrErr) {
+ if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
+ error(std::move(E), Filename, C,
+ moreThanOneArch ? StringRef(ArchitectureName) : StringRef());
+ continue;
+ }
+ if (SymbolicFile *F = dyn_cast<SymbolicFile>(&*ChildOrErr.get())) {
+ ArchiveName = std::string(A->getFileName());
+ if (isa<MachOObjectFile>(F) && moreThanOneArch)
+ ArchitectureName = O.getArchFlagName();
+ dumpSymbolNamesFromObject(*F, SymbolList, /*PrintSymbolObject=*/false,
+ !PrintFileName, ArchiveName,
+ ArchitectureName);
+ }
}
+ if (Err)
+ error(std::move(Err), A->getFileName());
+ } else {
+ consumeError(AOrErr.takeError());
+ error(Filename + " for architecture " + StringRef(O.getArchFlagName()) +
+ " is not a Mach-O file or an archive file",
+ "Mach-O universal file");
}
+ }
+}
+static void dumpMachOUniversalBinary(MachOUniversalBinary *UB,
+ std::vector<NMSymbol> &SymbolList,
+ StringRef Filename,
+ LLVMContext *ContextPtr) {
+ // If we have a list of architecture flags specified dump only those.
+ if (!ArchAll && !ArchFlags.empty()) {
+ dumpMachOUniversalBinaryMatchArchFlags(UB, SymbolList, Filename,
+ ContextPtr);
return;
}
- if (SymbolicFile *O = dyn_cast<SymbolicFile>(&Bin)) {
- if (!MachOPrintSizeWarning && PrintSize && isa<MachOObjectFile>(O)) {
- WithColor::warning(errs(), ToolName)
- << "sizes with --print-size for Mach-O files are always zero.\n";
- MachOPrintSizeWarning = true;
+ // No architecture flags were specified so if this contains a slice that
+ // matches the host architecture dump only that.
+ if (!ArchAll &&
+ dumpMachOUniversalBinaryMatchHost(UB, SymbolList, Filename, ContextPtr))
+ return;
+
+ // Either all architectures have been specified or none have been specified
+ // and this does not contain the host architecture so dump all the slices.
+ dumpMachOUniversalBinaryArchAll(UB, SymbolList, Filename, ContextPtr);
+}
+
+static void dumpTapiUniversal(TapiUniversal *TU,
+ std::vector<NMSymbol> &SymbolList,
+ StringRef Filename) {
+ for (const TapiUniversal::ObjectForArch &I : TU->objects()) {
+ StringRef ArchName = I.getArchFlagName();
+ const bool ShowArch =
+ ArchFlags.empty() || llvm::is_contained(ArchFlags, ArchName);
+ if (!ShowArch)
+ continue;
+ if (!AddInlinedInfo && !I.isTopLevelLib())
+ continue;
+ if (auto ObjOrErr = I.getAsObjectFile())
+ dumpSymbolNamesFromObject(
+ *ObjOrErr.get(), SymbolList, /*PrintSymbolObject=*/false,
+ /*PrintObjectLabel=*/true,
+ /*ArchiveName=*/{}, ArchName, I.getInstallName());
+ else if (Error E = isNotObjectErrorInvalidFileType(ObjOrErr.takeError())) {
+ error(std::move(E), Filename, ArchName);
}
- if (!checkMachOAndArchFlags(O, Filename))
- return;
- dumpSymbolNamesFromObject(*O, true);
}
}
+static void dumpSymbolicFile(SymbolicFile *O, std::vector<NMSymbol> &SymbolList,
+ StringRef Filename) {
+ if (!MachOPrintSizeWarning && PrintSize && isa<MachOObjectFile>(O)) {
+ WithColor::warning(errs(), ToolName)
+ << "sizes with --print-size for Mach-O files are always zero.\n";
+ MachOPrintSizeWarning = true;
+ }
+ if (!checkMachOAndArchFlags(O, Filename))
+ return;
+ dumpSymbolNamesFromObject(*O, SymbolList, /*PrintSymbolObject=*/true,
+ /*PrintObjectLabel=*/false);
+}
+
+static std::vector<NMSymbol> dumpSymbolNamesFromFile(StringRef Filename) {
+ std::vector<NMSymbol> SymbolList;
+ ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
+ MemoryBuffer::getFileOrSTDIN(Filename);
+ if (error(BufferOrErr.getError(), Filename))
+ return SymbolList;
+
+ // Always enable opaque pointers, to handle archives with mixed typed and
+ // opaque pointer bitcode files gracefully. As we're only reading symbols,
+ // the used pointer types don't matter.
+ LLVMContext Context;
+ Context.setOpaquePointers(true);
+ LLVMContext *ContextPtr = NoLLVMBitcode ? nullptr : &Context;
+ Expected<std::unique_ptr<Binary>> BinaryOrErr =
+ createBinary(BufferOrErr.get()->getMemBufferRef(), ContextPtr);
+ if (!BinaryOrErr) {
+ error(BinaryOrErr.takeError(), Filename);
+ return SymbolList;
+ }
+ Binary &Bin = *BinaryOrErr.get();
+ if (Archive *A = dyn_cast<Archive>(&Bin))
+ dumpArchive(A, SymbolList, Filename, ContextPtr);
+ else if (MachOUniversalBinary *UB = dyn_cast<MachOUniversalBinary>(&Bin))
+ dumpMachOUniversalBinary(UB, SymbolList, Filename, ContextPtr);
+ else if (TapiUniversal *TU = dyn_cast<TapiUniversal>(&Bin))
+ dumpTapiUniversal(TU, SymbolList, Filename);
+ else if (SymbolicFile *O = dyn_cast<SymbolicFile>(&Bin))
+ dumpSymbolicFile(O, SymbolList, Filename);
+ return SymbolList;
+}
+
+static void
+exportSymbolNamesFromFiles(const std::vector<std::string> &InputFilenames) {
+ std::vector<NMSymbol> SymbolList;
+ for (const auto &FileName : InputFilenames) {
+ std::vector<NMSymbol> FileSymList = dumpSymbolNamesFromFile(FileName);
+ SymbolList.insert(SymbolList.end(), FileSymList.begin(), FileSymList.end());
+ }
+
+ // Delete symbols which should not be printed from SymolList.
+ SymbolList.erase(
+ llvm::remove_if(SymbolList,
+ [](const NMSymbol &s) { return !s.shouldPrint(); }),
+ SymbolList.end());
+ sortSymbolList(SymbolList);
+ SymbolList.erase(std::unique(SymbolList.begin(), SymbolList.end()),
+ SymbolList.end());
+ printExportSymbolList(SymbolList);
+}
+
int main(int argc, char **argv) {
InitLLVM X(argc, argv);
BumpPtrAllocator A;
@@ -2169,6 +2351,12 @@ int main(int argc, char **argv) {
PrintFileName = Args.hasArg(OPT_print_file_name);
PrintSize = Args.hasArg(OPT_print_size);
ReverseSort = Args.hasArg(OPT_reverse_sort);
+ ExportSymbols = Args.hasArg(OPT_export_symbols);
+ if (ExportSymbols) {
+ ExternalOnly = true;
+ DefinedOnly = true;
+ }
+
Quiet = Args.hasArg(OPT_quiet);
V = Args.getLastArgValue(OPT_radix_EQ, "x");
if (V == "o")
@@ -2185,6 +2373,18 @@ int main(int argc, char **argv) {
UndefinedOnly = Args.hasArg(OPT_undefined_only);
WithoutAliases = Args.hasArg(OPT_without_aliases);
+ StringRef Mode = Args.getLastArgValue(OPT_X, "any");
+ if (Mode == "32")
+ BitMode = BitModeTy::Bit32;
+ else if (Mode == "64")
+ BitMode = BitModeTy::Bit64;
+ else if (Mode == "32_64")
+ BitMode = BitModeTy::Bit32_64;
+ else if (Mode == "any")
+ BitMode = BitModeTy::Any;
+ else
+ error("-X value should be one of: 32, 64, 32_64, (default) any");
+
// Mach-O specific options.
FormatMachOasHex = Args.hasArg(OPT_x);
AddDyldInfo = Args.hasArg(OPT_add_dyldinfo);
@@ -2192,6 +2392,9 @@ int main(int argc, char **argv) {
DyldInfoOnly = Args.hasArg(OPT_dyldinfo_only);
NoDyldInfo = Args.hasArg(OPT_no_dyldinfo);
+ // XCOFF specific options.
+ NoRsrc = Args.hasArg(OPT_no_rsrc);
+
// llvm-nm only reads binary files.
if (error(sys::ChangeStdinToBinary()))
return 1;
@@ -2249,7 +2452,10 @@ int main(int argc, char **argv) {
if (NoDyldInfo && (AddDyldInfo || DyldInfoOnly))
error("--no-dyldinfo can't be used with --add-dyldinfo or --dyldinfo-only");
- llvm::for_each(InputFilenames, dumpSymbolNamesFromFile);
+ if (ExportSymbols)
+ exportSymbolNamesFromFiles(InputFilenames);
+ else
+ llvm::for_each(InputFilenames, dumpSymbolNamesFromFile);
if (HadError)
return 1;
diff --git a/llvm/tools/llvm-objcopy/BitcodeStripOpts.td b/llvm/tools/llvm-objcopy/BitcodeStripOpts.td
index cc178164b03c..21db854b1e6f 100644
--- a/llvm/tools/llvm-objcopy/BitcodeStripOpts.td
+++ b/llvm/tools/llvm-objcopy/BitcodeStripOpts.td
@@ -17,8 +17,14 @@ def help : Flag<["--"], "help">;
def h : Flag<["-"], "h">, Alias<help>;
def version : Flag<["--"], "version">,
- HelpText<"Print the version and exit.">;
+ HelpText<"Print the version and exit">;
def V : Flag<["-"], "V">,
Alias<version>,
HelpText<"Alias for --version">;
+
+def remove : Flag<["-"], "r">,
+ HelpText<"Remove the __LLVM bitcode segment entirely">;
+
+def output : JoinedOrSeparate<["-"], "o">, HelpText<"Write output to <file>">,
+ MetaVarName<"<file>">;
diff --git a/llvm/tools/llvm-objcopy/ConfigManager.cpp b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp
index 90730c421a46..5b2b4b5704d8 100644
--- a/llvm/tools/llvm-objcopy/ConfigManager.cpp
+++ b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp
@@ -1,4 +1,4 @@
-//===- ConfigManager.cpp --------------------------------------------------===//
+//===- ObjcopyOptions.cpp -------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,12 +6,15 @@
//
//===----------------------------------------------------------------------===//
-#include "ConfigManager.h"
+#include "ObjcopyOptions.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/ObjCopy/CommonConfig.h"
+#include "llvm/ObjCopy/ConfigManager.h"
+#include "llvm/ObjCopy/MachO/MachOConfig.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Support/CRC.h"
@@ -20,8 +23,6 @@
#include "llvm/Support/Errc.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/StringSaver.h"
-#include <memory>
using namespace llvm;
using namespace llvm::objcopy;
@@ -366,41 +367,6 @@ static Error addSymbolsFromFile(NameMatcher &Symbols, BumpPtrAllocator &Alloc,
return Error::success();
}
-Expected<NameOrPattern>
-NameOrPattern::create(StringRef Pattern, MatchStyle MS,
- function_ref<Error(Error)> ErrorCallback) {
- switch (MS) {
- case MatchStyle::Literal:
- return NameOrPattern(Pattern);
- case MatchStyle::Wildcard: {
- SmallVector<char, 32> Data;
- bool IsPositiveMatch = true;
- if (Pattern[0] == '!') {
- IsPositiveMatch = false;
- Pattern = Pattern.drop_front();
- }
- Expected<GlobPattern> GlobOrErr = GlobPattern::create(Pattern);
-
- // If we couldn't create it as a glob, report the error, but try again with
- // a literal if the error reporting is non-fatal.
- if (!GlobOrErr) {
- if (Error E = ErrorCallback(GlobOrErr.takeError()))
- return std::move(E);
- return create(Pattern, MatchStyle::Literal, ErrorCallback);
- }
-
- return NameOrPattern(std::make_shared<GlobPattern>(*GlobOrErr),
- IsPositiveMatch);
- }
- case MatchStyle::Regex: {
- SmallVector<char, 32> Data;
- return NameOrPattern(std::make_shared<Regex>(
- ("^" + Pattern.ltrim('^').rtrim('$') + "$").toStringRef(Data)));
- }
- }
- llvm_unreachable("Unhandled llvm.objcopy.MatchStyle enum");
-}
-
static Error addSymbolsToRenameFromFile(StringMap<StringRef> &SymbolsToRename,
BumpPtrAllocator &Alloc,
StringRef Filename) {
@@ -559,70 +525,34 @@ static Expected<NewSymbolInfo> parseNewSymbolInfo(StringRef FlagValue) {
return SI;
}
-Expected<const ELFConfig &> ConfigManager::getELFConfig() const {
- return ELF;
-}
-
-Expected<const COFFConfig &> ConfigManager::getCOFFConfig() const {
- if (!Common.SplitDWO.empty() || !Common.SymbolsPrefix.empty() ||
- !Common.AllocSectionsPrefix.empty() || !Common.DumpSection.empty() ||
- !Common.KeepSection.empty() || !Common.SymbolsToGlobalize.empty() ||
- !Common.SymbolsToKeep.empty() || !Common.SymbolsToLocalize.empty() ||
- !Common.SymbolsToWeaken.empty() || !Common.SymbolsToKeepGlobal.empty() ||
- !Common.SectionsToRename.empty() || !Common.SetSectionAlignment.empty() ||
- Common.ExtractDWO || Common.PreserveDates || Common.StripDWO ||
- Common.StripNonAlloc || Common.StripSections || Common.Weaken ||
- Common.DecompressDebugSections ||
- Common.DiscardMode == DiscardType::Locals ||
- !Common.SymbolsToAdd.empty()) {
- return createStringError(llvm::errc::invalid_argument,
- "option not supported by llvm-objcopy for COFF");
- }
-
- return COFF;
-}
-
-Expected<const MachOConfig &> ConfigManager::getMachOConfig() const {
- if (!Common.SplitDWO.empty() || !Common.SymbolsPrefix.empty() ||
- !Common.AllocSectionsPrefix.empty() || !Common.KeepSection.empty() ||
- !Common.SymbolsToGlobalize.empty() || !Common.SymbolsToKeep.empty() ||
- !Common.SymbolsToLocalize.empty() || !Common.SymbolsToWeaken.empty() ||
- !Common.SymbolsToKeepGlobal.empty() || !Common.SectionsToRename.empty() ||
- !Common.UnneededSymbolsToRemove.empty() ||
- !Common.SetSectionAlignment.empty() || !Common.SetSectionFlags.empty() ||
- Common.ExtractDWO || Common.PreserveDates || Common.StripAllGNU ||
- Common.StripDWO || Common.StripNonAlloc || Common.StripSections ||
- Common.Weaken || Common.DecompressDebugSections || Common.StripUnneeded ||
- Common.DiscardMode == DiscardType::Locals ||
- !Common.SymbolsToAdd.empty()) {
- return createStringError(llvm::errc::invalid_argument,
- "option not supported by llvm-objcopy for MachO");
- }
+// Parse input option \p ArgValue and load section data. This function
+// extracts section name and name of the file keeping section data from
+// ArgValue, loads data from the file, and stores section name and data
+// into the vector of new sections \p NewSections.
+static Error loadNewSectionData(StringRef ArgValue, StringRef OptionName,
+ std::vector<NewSectionInfo> &NewSections) {
+ if (!ArgValue.contains('='))
+ return createStringError(errc::invalid_argument,
+ "bad format for " + OptionName + ": missing '='");
- return MachO;
-}
+ std::pair<StringRef, StringRef> SecPair = ArgValue.split("=");
+ if (SecPair.second.empty())
+ return createStringError(errc::invalid_argument, "bad format for " +
+ OptionName +
+ ": missing file name");
-Expected<const WasmConfig &> ConfigManager::getWasmConfig() const {
- if (!Common.AddGnuDebugLink.empty() || Common.ExtractPartition ||
- !Common.SplitDWO.empty() || !Common.SymbolsPrefix.empty() ||
- !Common.AllocSectionsPrefix.empty() ||
- Common.DiscardMode != DiscardType::None || !Common.SymbolsToAdd.empty() ||
- !Common.SymbolsToGlobalize.empty() || !Common.SymbolsToLocalize.empty() ||
- !Common.SymbolsToKeep.empty() || !Common.SymbolsToRemove.empty() ||
- !Common.UnneededSymbolsToRemove.empty() ||
- !Common.SymbolsToWeaken.empty() || !Common.SymbolsToKeepGlobal.empty() ||
- !Common.SectionsToRename.empty() || !Common.SetSectionAlignment.empty() ||
- !Common.SetSectionFlags.empty() || !Common.SymbolsToRename.empty()) {
- return createStringError(
- llvm::errc::invalid_argument,
- "only flags for section dumping, removal, and addition are supported");
- }
+ ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
+ MemoryBuffer::getFile(SecPair.second);
+ if (!BufOrErr)
+ return createFileError(SecPair.second,
+ errorCodeToError(BufOrErr.getError()));
- return Wasm;
+ NewSections.push_back({SecPair.first, std::move(*BufOrErr)});
+ return Error::success();
}
-// ParseObjcopyOptions returns the config and sets the input arguments. If a
-// help flag is set then ParseObjcopyOptions will print the help messege and
+// parseObjcopyOptions returns the config and sets the input arguments. If a
+// help flag is set then parseObjcopyOptions will print the help messege and
// exit.
Expected<DriverConfig>
objcopy::parseObjcopyOptions(ArrayRef<const char *> RawArgsArr,
@@ -695,11 +625,10 @@ objcopy::parseObjcopyOptions(ArrayRef<const char *> RawArgsArr,
MatchStyle SectionMatchStyle = InputArgs.hasArg(OBJCOPY_regex)
? MatchStyle::Regex
: MatchStyle::Wildcard;
- MatchStyle SymbolMatchStyle = InputArgs.hasArg(OBJCOPY_regex)
- ? MatchStyle::Regex
- : InputArgs.hasArg(OBJCOPY_wildcard)
- ? MatchStyle::Wildcard
- : MatchStyle::Literal;
+ MatchStyle SymbolMatchStyle
+ = InputArgs.hasArg(OBJCOPY_regex) ? MatchStyle::Regex
+ : InputArgs.hasArg(OBJCOPY_wildcard) ? MatchStyle::Wildcard
+ : MatchStyle::Literal;
StringRef InputFormat, OutputFormat;
if (InputArgs.hasArg(OBJCOPY_target)) {
InputFormat = InputArgs.getLastArgValue(OBJCOPY_target);
@@ -800,7 +729,6 @@ objcopy::parseObjcopyOptions(ArrayRef<const char *> RawArgsArr,
Config.CompressionType =
StringSwitch<DebugCompressionType>(
InputArgs.getLastArgValue(OBJCOPY_compress_debug_sections_eq))
- .Case("zlib-gnu", DebugCompressionType::GNU)
.Case("zlib", DebugCompressionType::Z)
.Default(DebugCompressionType::None);
if (Config.CompressionType == DebugCompressionType::None)
@@ -912,26 +840,14 @@ objcopy::parseObjcopyOptions(ArrayRef<const char *> RawArgsArr,
Arg->getValue(), SectionMatchStyle, ErrorCallback)))
return std::move(E);
for (auto Arg : InputArgs.filtered(OBJCOPY_add_section)) {
- StringRef ArgValue(Arg->getValue());
- if (!ArgValue.contains('='))
- return createStringError(errc::invalid_argument,
- "bad format for --add-section: missing '='");
- if (ArgValue.split("=").second.empty())
- return createStringError(
- errc::invalid_argument,
- "bad format for --add-section: missing file name");
- Config.AddSection.push_back(ArgValue);
+ if (Error Err = loadNewSectionData(Arg->getValue(), "--add-section",
+ Config.AddSection))
+ return std::move(Err);
}
for (auto Arg : InputArgs.filtered(OBJCOPY_update_section)) {
- StringRef ArgValue(Arg->getValue());
- if (!ArgValue.contains('='))
- return createStringError(errc::invalid_argument,
- "bad format for --update-section: missing '='");
- if (ArgValue.split("=").second.empty())
- return createStringError(
- errc::invalid_argument,
- "bad format for --update-section: missing file name");
- Config.UpdateSection.push_back(ArgValue);
+ if (Error Err = loadNewSectionData(Arg->getValue(), "--update-section",
+ Config.UpdateSection))
+ return std::move(Err);
}
for (auto *Arg : InputArgs.filtered(OBJCOPY_dump_section)) {
StringRef Value(Arg->getValue());
@@ -953,11 +869,12 @@ objcopy::parseObjcopyOptions(ArrayRef<const char *> RawArgsArr,
InputArgs.hasArg(OBJCOPY_extract_main_partition);
ELFConfig.LocalizeHidden = InputArgs.hasArg(OBJCOPY_localize_hidden);
Config.Weaken = InputArgs.hasArg(OBJCOPY_weaken);
- if (InputArgs.hasArg(OBJCOPY_discard_all, OBJCOPY_discard_locals))
- Config.DiscardMode =
- InputArgs.hasFlag(OBJCOPY_discard_all, OBJCOPY_discard_locals)
- ? DiscardType::All
- : DiscardType::Locals;
+ if (auto *Arg =
+ InputArgs.getLastArg(OBJCOPY_discard_all, OBJCOPY_discard_locals)) {
+ Config.DiscardMode = Arg->getOption().matches(OBJCOPY_discard_all)
+ ? DiscardType::All
+ : DiscardType::Locals;
+ }
Config.OnlyKeepDebug = InputArgs.hasArg(OBJCOPY_only_keep_debug);
ELFConfig.KeepFileSymbols = InputArgs.hasArg(OBJCOPY_keep_file_symbols);
MachOConfig.KeepUndefined = InputArgs.hasArg(OBJCOPY_keep_undefined);
@@ -1095,8 +1012,8 @@ objcopy::parseObjcopyOptions(ArrayRef<const char *> RawArgsArr,
return std::move(DC);
}
-// ParseInstallNameToolOptions returns the config and sets the input arguments.
-// If a help flag is set then ParseInstallNameToolOptions will print the help
+// parseInstallNameToolOptions returns the config and sets the input arguments.
+// If a help flag is set then parseInstallNameToolOptions will print the help
// messege and exit.
Expected<DriverConfig>
objcopy::parseInstallNameToolOptions(ArrayRef<const char *> ArgsArr) {
@@ -1233,10 +1150,12 @@ objcopy::parseInstallNameToolOptions(ArrayRef<const char *> ArgsArr) {
}
Expected<DriverConfig>
-objcopy::parseBitcodeStripOptions(ArrayRef<const char *> ArgsArr) {
+objcopy::parseBitcodeStripOptions(ArrayRef<const char *> ArgsArr,
+ function_ref<Error(Error)> ErrorCallback) {
DriverConfig DC;
ConfigManager ConfigMgr;
CommonConfig &Config = ConfigMgr.Common;
+ MachOConfig &MachOConfig = ConfigMgr.MachO;
BitcodeStripOptTable T;
unsigned MissingArgumentIndex, MissingArgumentCount;
opt::InputArgList InputArgs =
@@ -1271,14 +1190,28 @@ objcopy::parseBitcodeStripOptions(ArrayRef<const char *> ArgsArr) {
"llvm-bitcode-strip expects a single input file");
assert(!Positional.empty());
Config.InputFilename = Positional[0];
- Config.OutputFilename = Positional[0];
+
+ if (!InputArgs.hasArg(BITCODE_STRIP_output)) {
+ return createStringError(errc::invalid_argument,
+ "-o is a required argument");
+ }
+ Config.OutputFilename = InputArgs.getLastArgValue(BITCODE_STRIP_output);
+
+ if (!InputArgs.hasArg(BITCODE_STRIP_remove))
+ return createStringError(errc::invalid_argument, "no action specified");
+
+ // We only support -r for now, which removes all bitcode sections and
+ // the __LLVM segment if it's now empty.
+ cantFail(Config.ToRemove.addMatcher(NameOrPattern::create(
+ "__LLVM,__bundle", MatchStyle::Literal, ErrorCallback)));
+ MachOConfig.EmptySegmentsToRemove.insert("__LLVM");
DC.CopyConfigs.push_back(std::move(ConfigMgr));
return std::move(DC);
}
-// ParseStripOptions returns the config and sets the input arguments. If a
-// help flag is set then ParseStripOptions will print the help messege and
+// parseStripOptions returns the config and sets the input arguments. If a
+// help flag is set then parseStripOptions will print the help messege and
// exit.
Expected<DriverConfig>
objcopy::parseStripOptions(ArrayRef<const char *> RawArgsArr,
@@ -1337,19 +1270,17 @@ objcopy::parseStripOptions(ArrayRef<const char *> RawArgsArr,
"--regex and --wildcard are incompatible");
MatchStyle SectionMatchStyle =
InputArgs.hasArg(STRIP_regex) ? MatchStyle::Regex : MatchStyle::Wildcard;
- MatchStyle SymbolMatchStyle = InputArgs.hasArg(STRIP_regex)
- ? MatchStyle::Regex
- : InputArgs.hasArg(STRIP_wildcard)
- ? MatchStyle::Wildcard
- : MatchStyle::Literal;
+ MatchStyle SymbolMatchStyle
+ = InputArgs.hasArg(STRIP_regex) ? MatchStyle::Regex
+ : InputArgs.hasArg(STRIP_wildcard) ? MatchStyle::Wildcard
+ : MatchStyle::Literal;
ELFConfig.AllowBrokenLinks = InputArgs.hasArg(STRIP_allow_broken_links);
Config.StripDebug = InputArgs.hasArg(STRIP_strip_debug);
- if (InputArgs.hasArg(STRIP_discard_all, STRIP_discard_locals))
- Config.DiscardMode =
- InputArgs.hasFlag(STRIP_discard_all, STRIP_discard_locals)
- ? DiscardType::All
- : DiscardType::Locals;
+ if (auto *Arg = InputArgs.getLastArg(STRIP_discard_all, STRIP_discard_locals))
+ Config.DiscardMode = Arg->getOption().matches(STRIP_discard_all)
+ ? DiscardType::All
+ : DiscardType::Locals;
Config.StripSections = InputArgs.hasArg(STRIP_strip_sections);
Config.StripUnneeded = InputArgs.hasArg(STRIP_strip_unneeded);
if (auto Arg = InputArgs.getLastArg(STRIP_strip_all, STRIP_no_strip_all))
@@ -1381,8 +1312,9 @@ objcopy::parseStripOptions(ArrayRef<const char *> RawArgsArr,
return std::move(E);
if (!InputArgs.hasArg(STRIP_no_strip_all) && !Config.StripDebug &&
- !Config.StripUnneeded && Config.DiscardMode == DiscardType::None &&
- !Config.StripAllGNU && Config.SymbolsToRemove.empty())
+ !Config.OnlyKeepDebug && !Config.StripUnneeded &&
+ Config.DiscardMode == DiscardType::None && !Config.StripAllGNU &&
+ Config.SymbolsToRemove.empty())
Config.StripAll = true;
if (Config.DiscardMode == DiscardType::All) {
diff --git a/llvm/tools/llvm-objcopy/ConfigManager.h b/llvm/tools/llvm-objcopy/ObjcopyOptions.h
index c0d0e8bbc721..f7fa2af304d7 100644
--- a/llvm/tools/llvm-objcopy/ConfigManager.h
+++ b/llvm/tools/llvm-objcopy/ObjcopyOptions.h
@@ -1,4 +1,4 @@
-//===- ConfigManager.h ----------------------------------------------------===//
+//===- ObjcopyOptions.h ---------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,40 +6,16 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_LLVM_OBJCOPY_CONFIGMANAGER_H
-#define LLVM_TOOLS_LLVM_OBJCOPY_CONFIGMANAGER_H
+#ifndef LLVM_TOOLS_LLVM_OBJCOPY_OBJCOPYOPTIONS_H
+#define LLVM_TOOLS_LLVM_OBJCOPY_OBJCOPYOPTIONS_H
-#include "COFF/COFFConfig.h"
-#include "CommonConfig.h"
-#include "ELF/ELFConfig.h"
-#include "MachO/MachOConfig.h"
-#include "MultiFormatConfig.h"
-#include "wasm/WasmConfig.h"
+#include "llvm/ObjCopy/ConfigManager.h"
#include "llvm/Support/Allocator.h"
#include <vector>
namespace llvm {
namespace objcopy {
-// ConfigManager keeps all configurations and prepare
-// format-specific options.
-struct ConfigManager : public MultiFormatConfig {
- virtual ~ConfigManager() {}
-
- const CommonConfig &getCommonConfig() const override { return Common; }
- Expected<const ELFConfig &> getELFConfig() const override;
- Expected<const COFFConfig &> getCOFFConfig() const override;
- Expected<const MachOConfig &> getMachOConfig() const override;
- Expected<const WasmConfig &> getWasmConfig() const override;
-
- // All configs.
- CommonConfig Common;
- ELFConfig ELF;
- COFFConfig COFF;
- MachOConfig MachO;
- WasmConfig Wasm;
-};
-
// Configuration for the overall invocation of this tool. When invoked as
// objcopy, will always contain exactly one CopyConfig. When invoked as strip,
// will contain one or more CopyConfigs.
@@ -65,7 +41,9 @@ parseInstallNameToolOptions(ArrayRef<const char *> ArgsArr);
// ParseBitcodeStripOptions returns the config and sets the input arguments.
// If a help flag is set then ParseBitcodeStripOptions will print the help
// messege and exit.
-Expected<DriverConfig> parseBitcodeStripOptions(ArrayRef<const char *> ArgsArr);
+Expected<DriverConfig>
+parseBitcodeStripOptions(ArrayRef<const char *> ArgsArr,
+ llvm::function_ref<Error(Error)> ErrorCallback);
// ParseStripOptions returns the config and sets the input arguments. If a
// help flag is set then ParseStripOptions will print the help messege and
@@ -77,4 +55,4 @@ parseStripOptions(ArrayRef<const char *> ArgsArr,
} // namespace objcopy
} // namespace llvm
-#endif // LLVM_TOOLS_LLVM_OBJCOPY_CONFIGMANAGER_H
+#endif // LLVM_TOOLS_LLVM_OBJCOPY_OBJCOPYOPTIONS_H
diff --git a/llvm/tools/llvm-objcopy/ObjcopyOpts.td b/llvm/tools/llvm-objcopy/ObjcopyOpts.td
index bfd66caf41ed..ff73265989f3 100644
--- a/llvm/tools/llvm-objcopy/ObjcopyOpts.td
+++ b/llvm/tools/llvm-objcopy/ObjcopyOpts.td
@@ -32,9 +32,9 @@ defm new_symbol_visibility : Eq<"new-symbol-visibility", "Visibility of "
def compress_debug_sections : Flag<["--"], "compress-debug-sections">;
def compress_debug_sections_eq
: Joined<["--"], "compress-debug-sections=">,
- MetaVarName<"[ zlib | zlib-gnu ]">,
+ MetaVarName<"[ zlib ]">,
HelpText<"Compress DWARF debug sections using specified style. Supported "
- "styles: 'zlib-gnu' and 'zlib'">;
+ "formats: 'zlib'">;
def decompress_debug_sections : Flag<["--"], "decompress-debug-sections">,
HelpText<"Decompress DWARF debug sections.">;
defm split_dwo
@@ -222,5 +222,5 @@ defm add_symbol
MetaVarName<"name=[section:]value[,flags]">;
defm update_section
- : Eq<"update-section", "Add section <name> with contents from a file <file>.">,
+ : Eq<"update-section", "Replace the contents of section <name> with contents from a file <file>.">,
MetaVarName<"name=file">;
diff --git a/llvm/tools/llvm-objcopy/llvm-objcopy.cpp b/llvm/tools/llvm-objcopy/llvm-objcopy.cpp
index a5963985f78a..aa262152ed64 100644
--- a/llvm/tools/llvm-objcopy/llvm-objcopy.cpp
+++ b/llvm/tools/llvm-objcopy/llvm-objcopy.cpp
@@ -6,23 +6,22 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm-objcopy.h"
-#include "COFF/COFFConfig.h"
-#include "COFF/COFFObjcopy.h"
-#include "CommonConfig.h"
-#include "ConfigManager.h"
-#include "ELF/ELFConfig.h"
-#include "ELF/ELFObjcopy.h"
-#include "MachO/MachOConfig.h"
-#include "MachO/MachOObjcopy.h"
-#include "wasm/WasmConfig.h"
-#include "wasm/WasmObjcopy.h"
-
+#include "ObjcopyOptions.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/ObjCopy/COFF/COFFConfig.h"
+#include "llvm/ObjCopy/COFF/COFFObjcopy.h"
+#include "llvm/ObjCopy/CommonConfig.h"
+#include "llvm/ObjCopy/ELF/ELFConfig.h"
+#include "llvm/ObjCopy/ELF/ELFObjcopy.h"
+#include "llvm/ObjCopy/MachO/MachOConfig.h"
+#include "llvm/ObjCopy/MachO/MachOObjcopy.h"
+#include "llvm/ObjCopy/ObjCopy.h"
+#include "llvm/ObjCopy/wasm/WasmConfig.h"
+#include "llvm/ObjCopy/wasm/WasmObjcopy.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/ArchiveWriter.h"
#include "llvm/Object/Binary.h"
@@ -42,6 +41,7 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/FileUtilities.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/Memory.h"
@@ -87,7 +87,7 @@ static Expected<DriverConfig> getDriverConfig(ArrayRef<const char *> Args) {
};
if (Is("bitcode-strip") || Is("bitcode_strip"))
- return parseBitcodeStripOptions(Args);
+ return parseBitcodeStripOptions(Args, reportWarning);
else if (Is("strip"))
return parseStripOptions(Args, reportWarning);
else if (Is("install-name-tool") || Is("install_name_tool"))
@@ -96,40 +96,6 @@ static Expected<DriverConfig> getDriverConfig(ArrayRef<const char *> Args) {
return parseObjcopyOptions(Args, reportWarning);
}
-// For regular archives this function simply calls llvm::writeArchive,
-// For thin archives it writes the archive file itself as well as its members.
-static Error deepWriteArchive(StringRef ArcName,
- ArrayRef<NewArchiveMember> NewMembers,
- bool WriteSymtab, object::Archive::Kind Kind,
- bool Deterministic, bool Thin) {
- if (Error E = writeArchive(ArcName, NewMembers, WriteSymtab, Kind,
- Deterministic, Thin))
- return createFileError(ArcName, std::move(E));
-
- if (!Thin)
- return Error::success();
-
- for (const NewArchiveMember &Member : NewMembers) {
- // For regular files (as is the case for deepWriteArchive),
- // FileOutputBuffer::create will return OnDiskBuffer.
- // OnDiskBuffer uses a temporary file and then renames it. So in reality
- // there is no inefficiency / duplicated in-memory buffers in this case. For
- // now in-memory buffers can not be completely avoided since
- // NewArchiveMember still requires them even though writeArchive does not
- // write them on disk.
- Expected<std::unique_ptr<FileOutputBuffer>> FB =
- FileOutputBuffer::create(Member.MemberName, Member.Buf->getBufferSize(),
- FileOutputBuffer::F_executable);
- if (!FB)
- return FB.takeError();
- std::copy(Member.Buf->getBufferStart(), Member.Buf->getBufferEnd(),
- (*FB)->getBufferStart());
- if (Error E = (*FB)->commit())
- return E;
- }
- return Error::success();
-}
-
/// The function executeObjcopyOnIHex does the dispatch based on the format
/// of the output specified by the command line options.
static Error executeObjcopyOnIHex(ConfigManager &ConfigMgr, MemoryBuffer &In,
@@ -166,162 +132,16 @@ static Error executeObjcopyOnRawBinary(ConfigManager &ConfigMgr,
llvm_unreachable("unsupported output format");
}
-/// The function executeObjcopyOnBinary does the dispatch based on the format
-/// of the input binary (ELF, MachO or COFF).
-static Error executeObjcopyOnBinary(const MultiFormatConfig &Config,
- object::Binary &In, raw_ostream &Out) {
- if (auto *ELFBinary = dyn_cast<object::ELFObjectFileBase>(&In)) {
- Expected<const ELFConfig &> ELFConfig = Config.getELFConfig();
- if (!ELFConfig)
- return ELFConfig.takeError();
-
- return elf::executeObjcopyOnBinary(Config.getCommonConfig(), *ELFConfig,
- *ELFBinary, Out);
- } else if (auto *COFFBinary = dyn_cast<object::COFFObjectFile>(&In)) {
- Expected<const COFFConfig &> COFFConfig = Config.getCOFFConfig();
- if (!COFFConfig)
- return COFFConfig.takeError();
-
- return coff::executeObjcopyOnBinary(Config.getCommonConfig(), *COFFConfig,
- *COFFBinary, Out);
- } else if (auto *MachOBinary = dyn_cast<object::MachOObjectFile>(&In)) {
- Expected<const MachOConfig &> MachOConfig = Config.getMachOConfig();
- if (!MachOConfig)
- return MachOConfig.takeError();
-
- return macho::executeObjcopyOnBinary(Config.getCommonConfig(), *MachOConfig,
- *MachOBinary, Out);
- } else if (auto *MachOUniversalBinary =
- dyn_cast<object::MachOUniversalBinary>(&In)) {
- return macho::executeObjcopyOnMachOUniversalBinary(
- Config, *MachOUniversalBinary, Out);
- } else if (auto *WasmBinary = dyn_cast<object::WasmObjectFile>(&In)) {
- Expected<const WasmConfig &> WasmConfig = Config.getWasmConfig();
- if (!WasmConfig)
- return WasmConfig.takeError();
-
- return objcopy::wasm::executeObjcopyOnBinary(Config.getCommonConfig(),
- *WasmConfig, *WasmBinary, Out);
- } else
- return createStringError(object_error::invalid_file_type,
- "unsupported object file format");
-}
-
-namespace llvm {
-namespace objcopy {
-
-Expected<std::vector<NewArchiveMember>>
-createNewArchiveMembers(const MultiFormatConfig &Config, const Archive &Ar) {
- std::vector<NewArchiveMember> NewArchiveMembers;
- Error Err = Error::success();
- for (const Archive::Child &Child : Ar.children(Err)) {
- Expected<StringRef> ChildNameOrErr = Child.getName();
- if (!ChildNameOrErr)
- return createFileError(Ar.getFileName(), ChildNameOrErr.takeError());
-
- Expected<std::unique_ptr<Binary>> ChildOrErr = Child.getAsBinary();
- if (!ChildOrErr)
- return createFileError(Ar.getFileName() + "(" + *ChildNameOrErr + ")",
- ChildOrErr.takeError());
-
- SmallVector<char, 0> Buffer;
- raw_svector_ostream MemStream(Buffer);
-
- if (Error E = executeObjcopyOnBinary(Config, *ChildOrErr->get(), MemStream))
- return std::move(E);
-
- Expected<NewArchiveMember> Member = NewArchiveMember::getOldMember(
- Child, Config.getCommonConfig().DeterministicArchives);
- if (!Member)
- return createFileError(Ar.getFileName(), Member.takeError());
-
- Member->Buf = std::make_unique<SmallVectorMemoryBuffer>(
- std::move(Buffer), ChildNameOrErr.get(),
- /*RequiresNullTerminator=*/false);
- Member->MemberName = Member->Buf->getBufferIdentifier();
- NewArchiveMembers.push_back(std::move(*Member));
- }
- if (Err)
- return createFileError(Config.getCommonConfig().InputFilename,
- std::move(Err));
- return std::move(NewArchiveMembers);
-}
-
-} // end namespace objcopy
-} // end namespace llvm
-
-static Error executeObjcopyOnArchive(const ConfigManager &ConfigMgr,
- const object::Archive &Ar) {
- Expected<std::vector<NewArchiveMember>> NewArchiveMembersOrErr =
- createNewArchiveMembers(ConfigMgr, Ar);
- if (!NewArchiveMembersOrErr)
- return NewArchiveMembersOrErr.takeError();
- const CommonConfig &Config = ConfigMgr.getCommonConfig();
- return deepWriteArchive(Config.OutputFilename, *NewArchiveMembersOrErr,
- Ar.hasSymbolTable(), Ar.kind(),
- Config.DeterministicArchives, Ar.isThin());
-}
-
-static Error restoreStatOnFile(StringRef Filename,
- const sys::fs::file_status &Stat,
- const ConfigManager &ConfigMgr) {
- int FD;
- const CommonConfig &Config = ConfigMgr.getCommonConfig();
-
- // Writing to stdout should not be treated as an error here, just
- // do not set access/modification times or permissions.
- if (Filename == "-")
- return Error::success();
-
- if (auto EC =
- sys::fs::openFileForWrite(Filename, FD, sys::fs::CD_OpenExisting))
- return createFileError(Filename, EC);
-
- if (Config.PreserveDates)
- if (auto EC = sys::fs::setLastAccessAndModificationTime(
- FD, Stat.getLastAccessedTime(), Stat.getLastModificationTime()))
- return createFileError(Filename, EC);
-
- sys::fs::file_status OStat;
- if (std::error_code EC = sys::fs::status(FD, OStat))
- return createFileError(Filename, EC);
- if (OStat.type() == sys::fs::file_type::regular_file) {
-#ifndef _WIN32
- // Keep ownership if llvm-objcopy is called under root.
- if (Config.InputFilename == Config.OutputFilename && OStat.getUser() == 0)
- sys::fs::changeFileOwnership(FD, Stat.getUser(), Stat.getGroup());
-#endif
-
- sys::fs::perms Perm = Stat.permissions();
- if (Config.InputFilename != Config.OutputFilename)
- Perm = static_cast<sys::fs::perms>(Perm & ~sys::fs::getUmask() & ~06000);
-#ifdef _WIN32
- if (auto EC = sys::fs::setPermissions(Filename, Perm))
-#else
- if (auto EC = sys::fs::setPermissions(FD, Perm))
-#endif
- return createFileError(Filename, EC);
- }
-
- if (auto EC = sys::Process::SafelyCloseFileDescriptor(FD))
- return createFileError(Filename, EC);
-
- return Error::success();
-}
-
/// The function executeObjcopy does the higher level dispatch based on the type
/// of input (raw binary, archive or single object file) and takes care of the
/// format-agnostic modifications, i.e. preserving dates.
static Error executeObjcopy(ConfigManager &ConfigMgr) {
CommonConfig &Config = ConfigMgr.Common;
- sys::fs::file_status Stat;
- if (Config.InputFilename != "-") {
- if (auto EC = sys::fs::status(Config.InputFilename, Stat))
- return createFileError(Config.InputFilename, EC);
- } else {
- Stat.permissions(static_cast<sys::fs::perms>(0777));
- }
+ Expected<FilePermissionsApplier> PermsApplierOrErr =
+ FilePermissionsApplier::create(Config.InputFilename);
+ if (!PermsApplierOrErr)
+ return PermsApplierOrErr.takeError();
std::function<Error(raw_ostream & OutFile)> ObjcopyFunc;
@@ -390,19 +210,20 @@ static Error executeObjcopy(ConfigManager &ConfigMgr) {
}
}
- if (Error E = restoreStatOnFile(Config.OutputFilename, Stat, ConfigMgr))
+ if (Error E =
+ PermsApplierOrErr->apply(Config.OutputFilename, Config.PreserveDates))
return E;
- if (!Config.SplitDWO.empty()) {
- Stat.permissions(static_cast<sys::fs::perms>(0666));
- if (Error E = restoreStatOnFile(Config.SplitDWO, Stat, ConfigMgr))
+ if (!Config.SplitDWO.empty())
+ if (Error E =
+ PermsApplierOrErr->apply(Config.SplitDWO, Config.PreserveDates,
+ static_cast<sys::fs::perms>(0666)))
return E;
- }
return Error::success();
}
-int main(int argc, char **argv) {
+int llvm_objcopy_main(int argc, char **argv) {
InitLLVM X(argc, argv);
ToolName = argv[0];
diff --git a/llvm/tools/llvm-objdump/COFFDump.cpp b/llvm/tools/llvm-objdump/COFFDump.cpp
index 32fdd1a4d5c3..e085e26c3cd0 100644
--- a/llvm/tools/llvm-objdump/COFFDump.cpp
+++ b/llvm/tools/llvm-objdump/COFFDump.cpp
@@ -430,21 +430,12 @@ static void printTLSDirectory(const COFFObjectFile *Obj) {
if (!PE32Header && !PE32PlusHeader)
return;
- const data_directory *DataDir = Obj->getDataDirectory(COFF::TLS_TABLE);
- if (!DataDir || DataDir->RelativeVirtualAddress == 0)
- return;
-
- uintptr_t IntPtr = 0;
- if (Error E =
- Obj->getRvaPtr(DataDir->RelativeVirtualAddress, IntPtr))
- reportError(std::move(E), Obj->getFileName());
-
if (PE32Header) {
- auto *TLSDir = reinterpret_cast<const coff_tls_directory32 *>(IntPtr);
- printTLSDirectoryT(TLSDir);
+ if (auto *TLSDir = Obj->getTLSDirectory32())
+ printTLSDirectoryT(TLSDir);
} else {
- auto *TLSDir = reinterpret_cast<const coff_tls_directory64 *>(IntPtr);
- printTLSDirectoryT(TLSDir);
+ if (auto *TLSDir = Obj->getTLSDirectory64())
+ printTLSDirectoryT(TLSDir);
}
outs() << "\n";
@@ -459,19 +450,10 @@ static void printLoadConfiguration(const COFFObjectFile *Obj) {
if (Obj->getMachine() != COFF::IMAGE_FILE_MACHINE_I386)
return;
- const data_directory *DataDir = Obj->getDataDirectory(COFF::LOAD_CONFIG_TABLE);
- if (!DataDir)
- reportError("no load config data dir", Obj->getFileName());
-
- uintptr_t IntPtr = 0;
- if (DataDir->RelativeVirtualAddress == 0)
+ auto *LoadConf = Obj->getLoadConfig32();
+ if (!LoadConf)
return;
- if (Error E =
- Obj->getRvaPtr(DataDir->RelativeVirtualAddress, IntPtr))
- reportError(std::move(E), Obj->getFileName());
-
- auto *LoadConf = reinterpret_cast<const coff_load_configuration32 *>(IntPtr);
outs() << "Load configuration:"
<< "\n Timestamp: " << LoadConf->TimeDateStamp
<< "\n Major Version: " << LoadConf->MajorVersion
@@ -544,11 +526,11 @@ static void printImportTables(const COFFObjectFile *Obj) {
// Prints export tables. The export table is a table containing the list of
// exported symbol from the DLL.
static void printExportTable(const COFFObjectFile *Obj) {
- outs() << "Export Table:\n";
export_directory_iterator I = Obj->export_directory_begin();
export_directory_iterator E = Obj->export_directory_end();
if (I == E)
return;
+ outs() << "Export Table:\n";
StringRef DllName;
uint32_t OrdinalBase;
if (I->getDllName(DllName))
diff --git a/llvm/tools/llvm-objdump/ELFDump.cpp b/llvm/tools/llvm-objdump/ELFDump.cpp
index 98e71497d022..ca73dafe2b8e 100644
--- a/llvm/tools/llvm-objdump/ELFDump.cpp
+++ b/llvm/tools/llvm-objdump/ELFDump.cpp
@@ -171,8 +171,12 @@ uint64_t objdump::getELFSectionLMA(const object::ELFSectionRef &Sec) {
template <class ELFT>
static void printDynamicSection(const ELFFile<ELFT> &Elf, StringRef Filename) {
- ArrayRef<typename ELFT::Dyn> DynamicEntries =
- unwrapOrError(Elf.dynamicEntries(), Filename);
+ auto DynamicEntriesOrErr = Elf.dynamicEntries();
+ if (!DynamicEntriesOrErr) {
+ reportWarning(toString(DynamicEntriesOrErr.takeError()), Filename);
+ return;
+ }
+ ArrayRef<typename ELFT::Dyn> DynamicEntries = *DynamicEntriesOrErr;
// Find the maximum tag name length to format the value column properly.
size_t MaxLen = 0;
diff --git a/llvm/tools/llvm-objdump/MachODump.cpp b/llvm/tools/llvm-objdump/MachODump.cpp
index 31867625f0e5..60c34158941b 100644
--- a/llvm/tools/llvm-objdump/MachODump.cpp
+++ b/llvm/tools/llvm-objdump/MachODump.cpp
@@ -81,6 +81,7 @@ bool objdump::DataInCode;
bool objdump::FunctionStarts;
bool objdump::LinkOptHints;
bool objdump::InfoPlist;
+bool objdump::DyldInfo;
bool objdump::DylibsUsed;
bool objdump::DylibId;
bool objdump::Verbose;
@@ -111,6 +112,7 @@ void objdump::parseMachOOptions(const llvm::opt::InputArgList &InputArgs) {
FunctionStarts = InputArgs.hasArg(OBJDUMP_function_starts);
LinkOptHints = InputArgs.hasArg(OBJDUMP_link_opt_hints);
InfoPlist = InputArgs.hasArg(OBJDUMP_info_plist);
+ DyldInfo = InputArgs.hasArg(OBJDUMP_dyld_info);
DylibsUsed = InputArgs.hasArg(OBJDUMP_dylibs_used);
DylibId = InputArgs.hasArg(OBJDUMP_dylib_id);
Verbose = !InputArgs.hasArg(OBJDUMP_non_verbose);
@@ -188,8 +190,12 @@ typedef DiceTable::iterator dice_table_iterator;
namespace {
struct ScopedXarFile {
xar_t xar;
- ScopedXarFile(const char *filename, int32_t flags)
- : xar(xar_open(filename, flags)) {}
+ ScopedXarFile(const char *filename, int32_t flags) {
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wdeprecated-declarations"
+ xar = xar_open(filename, flags);
+#pragma clang diagnostic pop
+ }
~ScopedXarFile() {
if (xar)
xar_close(xar);
@@ -1178,6 +1184,20 @@ static void PrintLinkOptHints(MachOObjectFile *O) {
}
}
+static void printMachOChainedFixups(object::MachOObjectFile *Obj) {
+ Error Err = Error::success();
+ for (const object::MachOChainedFixupEntry &Entry : Obj->fixupTable(Err)) {
+ (void)Entry;
+ }
+ if (Err)
+ reportError(std::move(Err), Obj->getFileName());
+}
+
+static void PrintDyldInfo(MachOObjectFile *O) {
+ outs() << "dyld information:" << '\n';
+ printMachOChainedFixups(O);
+}
+
static void PrintDylibs(MachOObjectFile *O, bool JustId) {
unsigned Index = 0;
for (const auto &Load : O->load_commands()) {
@@ -1896,8 +1916,8 @@ static void ProcessMachO(StringRef Name, MachOObjectFile *MachOOF,
// UniversalHeaders or ArchiveHeaders.
if (Disassemble || Relocations || PrivateHeaders || ExportsTrie || Rebase ||
Bind || SymbolTable || LazyBind || WeakBind || IndirectSymbols ||
- DataInCode || FunctionStarts || LinkOptHints || DylibsUsed || DylibId ||
- Rpaths || ObjcMetaData || (!FilterSections.empty())) {
+ DataInCode || FunctionStarts || LinkOptHints || DyldInfo || DylibsUsed ||
+ DylibId || Rpaths || ObjcMetaData || (!FilterSections.empty())) {
if (LeadingHeaders) {
outs() << Name;
if (!ArchiveMemberName.empty())
@@ -1966,6 +1986,8 @@ static void ProcessMachO(StringRef Name, MachOObjectFile *MachOOF,
DumpSectionContents(FileName, MachOOF, Verbose);
if (InfoPlist)
DumpInfoPlistSectionContents(FileName, MachOOF);
+ if (DyldInfo)
+ PrintDyldInfo(MachOOF);
if (DylibsUsed)
PrintDylibs(MachOOF, false);
if (DylibId)
@@ -2586,7 +2608,8 @@ struct DisassembleInfo {
// value of TagType is currently 1 (for the LLVMOpInfo1 struct). If symbolic
// information is returned then this function returns 1 else it returns 0.
static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
- uint64_t Size, int TagType, void *TagBuf) {
+ uint64_t OpSize, uint64_t InstSize, int TagType,
+ void *TagBuf) {
struct DisassembleInfo *info = (struct DisassembleInfo *)DisInfo;
struct LLVMOpInfo1 *op_info = (struct LLVMOpInfo1 *)TagBuf;
uint64_t value = op_info->Value;
@@ -2603,7 +2626,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
unsigned int Arch = info->O->getArch();
if (Arch == Triple::x86) {
- if (Size != 1 && Size != 2 && Size != 4 && Size != 0)
+ if (OpSize != 1 && OpSize != 2 && OpSize != 4 && OpSize != 0)
return 0;
if (info->O->getHeader().filetype != MachO::MH_OBJECT) {
// TODO:
@@ -2683,7 +2706,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
return 0;
}
if (Arch == Triple::x86_64) {
- if (Size != 1 && Size != 2 && Size != 4 && Size != 0)
+ if (OpSize != 1 && OpSize != 2 && OpSize != 4 && OpSize != 0)
return 0;
// For non MH_OBJECT types, like MH_KEXT_BUNDLE, Search the external
// relocation entries of a linked image (if any) for an entry that matches
@@ -2715,7 +2738,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
// adds the Pc. But for x86_64 external relocation entries the Value
// is the offset from the external symbol.
if (info->O->getAnyRelocationPCRel(RE))
- op_info->Value -= Pc + Offset + Size;
+ op_info->Value -= Pc + InstSize;
const char *name =
unwrapOrError(Symbol.getName(), info->O->getFileName()).data();
op_info->AddSymbol.Present = 1;
@@ -2753,7 +2776,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
// adds the Pc. But for x86_64 external relocation entries the Value
// is the offset from the external symbol.
if (info->O->getAnyRelocationPCRel(RE))
- op_info->Value -= Pc + Offset + Size;
+ op_info->Value -= Pc + InstSize;
const char *name =
unwrapOrError(Symbol.getName(), info->O->getFileName()).data();
unsigned Type = info->O->getAnyRelocationType(RE);
@@ -2781,7 +2804,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
return 0;
}
if (Arch == Triple::arm) {
- if (Offset != 0 || (Size != 4 && Size != 2))
+ if (Offset != 0 || (InstSize != 4 && InstSize != 2))
return 0;
if (info->O->getHeader().filetype != MachO::MH_OBJECT) {
// TODO:
@@ -2918,7 +2941,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
return 1;
}
if (Arch == Triple::aarch64) {
- if (Offset != 0 || Size != 4)
+ if (Offset != 0 || InstSize != 4)
return 0;
if (info->O->getHeader().filetype != MachO::MH_OBJECT) {
// TODO:
@@ -9141,14 +9164,20 @@ static void PrintNoteLoadCommand(MachO::note_command Nt) {
outs() << " size " << Nt.size << "\n";
}
-static void PrintBuildToolVersion(MachO::build_tool_version bv) {
- outs() << " tool " << MachOObjectFile::getBuildTool(bv.tool) << "\n";
+static void PrintBuildToolVersion(MachO::build_tool_version bv, bool verbose) {
+ outs() << " tool ";
+ if (verbose)
+ outs() << MachOObjectFile::getBuildTool(bv.tool);
+ else
+ outs() << bv.tool;
+ outs() << "\n";
outs() << " version " << MachOObjectFile::getVersionString(bv.version)
<< "\n";
}
static void PrintBuildVersionLoadCommand(const MachOObjectFile *obj,
- MachO::build_version_command bd) {
+ MachO::build_version_command bd,
+ bool verbose) {
outs() << " cmd LC_BUILD_VERSION\n";
outs() << " cmdsize " << bd.cmdsize;
if (bd.cmdsize !=
@@ -9157,8 +9186,12 @@ static void PrintBuildVersionLoadCommand(const MachOObjectFile *obj,
outs() << " Incorrect size\n";
else
outs() << "\n";
- outs() << " platform " << MachOObjectFile::getBuildPlatform(bd.platform)
- << "\n";
+ outs() << " platform ";
+ if (verbose)
+ outs() << MachOObjectFile::getBuildPlatform(bd.platform);
+ else
+ outs() << bd.platform;
+ outs() << "\n";
if (bd.sdk)
outs() << " sdk " << MachOObjectFile::getVersionString(bd.sdk)
<< "\n";
@@ -9169,7 +9202,7 @@ static void PrintBuildVersionLoadCommand(const MachOObjectFile *obj,
outs() << " ntools " << bd.ntools << "\n";
for (unsigned i = 0; i < bd.ntools; ++i) {
MachO::build_tool_version bv = obj->getBuildToolVersion(i);
- PrintBuildToolVersion(bv);
+ PrintBuildToolVersion(bv, verbose);
}
}
@@ -10146,7 +10179,7 @@ static void PrintLoadCommands(const MachOObjectFile *Obj, uint32_t filetype,
} else if (Command.C.cmd == MachO::LC_BUILD_VERSION) {
MachO::build_version_command Bv =
Obj->getBuildVersionLoadCommand(Command);
- PrintBuildVersionLoadCommand(Obj, Bv);
+ PrintBuildVersionLoadCommand(Obj, Bv, verbose);
} else if (Command.C.cmd == MachO::LC_SOURCE_VERSION) {
MachO::source_version_command Sd = Obj->getSourceVersionCommand(Command);
PrintSourceVersionCommand(Sd);
diff --git a/llvm/tools/llvm-objdump/MachODump.h b/llvm/tools/llvm-objdump/MachODump.h
index 7568062bd6b0..12783e15b425 100644
--- a/llvm/tools/llvm-objdump/MachODump.h
+++ b/llvm/tools/llvm-objdump/MachODump.h
@@ -36,6 +36,7 @@ void parseMachOOptions(const llvm::opt::InputArgList &InputArgs);
extern bool Bind;
extern bool DataInCode;
extern std::string DisSymName;
+extern bool DyldInfo;
extern bool DylibId;
extern bool DylibsUsed;
extern bool ExportsTrie;
diff --git a/llvm/tools/llvm-objdump/ObjdumpOpts.td b/llvm/tools/llvm-objdump/ObjdumpOpts.td
index 9f27a6cdf163..00d7d8ccff17 100644
--- a/llvm/tools/llvm-objdump/ObjdumpOpts.td
+++ b/llvm/tools/llvm-objdump/ObjdumpOpts.td
@@ -81,6 +81,9 @@ def dwarf_EQ : Joined<["--"], "dwarf=">,
def fault_map_section : Flag<["--"], "fault-map-section">,
HelpText<"Display the content of the fault map section">;
+def offloading : Flag<["--"], "offloading">,
+ HelpText<"Display the content of the offloading section">;
+
def file_headers : Flag<["--"], "file-headers">,
HelpText<"Display the contents of the overall file header">;
def : Flag<["-"], "f">, Alias<file_headers>,
@@ -296,6 +299,12 @@ def info_plist : Flag<["--"], "info-plist">,
"Mach-O objects (requires --macho)">,
Group<grp_mach_o>;
+def dyld_info : Flag<["--"], "dyld_info">,
+ HelpText<"Print bind and rebase information used by dyld to resolve "
+ "external references in a final linked binary "
+ "(requires --macho)">,
+ Group<grp_mach_o>;
+
def dylibs_used : Flag<["--"], "dylibs-used">,
HelpText<"Print the shared libraries used for linked "
"Mach-O files (requires --macho)">,
diff --git a/llvm/tools/llvm-objdump/OffloadDump.cpp b/llvm/tools/llvm-objdump/OffloadDump.cpp
new file mode 100644
index 000000000000..7d4461f0a70e
--- /dev/null
+++ b/llvm/tools/llvm-objdump/OffloadDump.cpp
@@ -0,0 +1,102 @@
+//===-- OffloadDump.cpp - Offloading dumper ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements the offloading-specific dumper for llvm-objdump.
+///
+//===----------------------------------------------------------------------===//
+#include "OffloadDump.h"
+#include "llvm-objdump.h"
+
+using namespace llvm;
+using namespace llvm::object;
+using namespace llvm::objdump;
+
+constexpr const char OffloadSectionString[] = ".llvm.offloading";
+
+/// Get the printable name of the image kind.
+static StringRef getImageName(const OffloadBinary &OB) {
+ switch (OB.getImageKind()) {
+ case IMG_Object:
+ return "elf";
+ case IMG_Bitcode:
+ return "llvm ir";
+ case IMG_Cubin:
+ return "cubin";
+ case IMG_Fatbinary:
+ return "fatbinary";
+ case IMG_PTX:
+ return "ptx";
+ default:
+ return "<none>";
+ }
+}
+
+static void printBinary(const OffloadBinary &OB, uint64_t Index) {
+ outs() << "\nOFFLOADING IMAGE [" << Index << "]:\n";
+ outs() << left_justify("kind", 16) << getImageName(OB) << "\n";
+ outs() << left_justify("arch", 16) << OB.getArch() << "\n";
+ outs() << left_justify("triple", 16) << OB.getTriple() << "\n";
+ outs() << left_justify("producer", 16)
+ << getOffloadKindName(OB.getOffloadKind()) << "\n";
+}
+
+static Error visitAllBinaries(const OffloadBinary &OB) {
+ uint64_t Offset = 0;
+ uint64_t Index = 0;
+ while (Offset < OB.getMemoryBufferRef().getBufferSize()) {
+ MemoryBufferRef Buffer =
+ MemoryBufferRef(OB.getData().drop_front(Offset), OB.getFileName());
+ auto BinaryOrErr = OffloadBinary::create(Buffer);
+ if (!BinaryOrErr)
+ return BinaryOrErr.takeError();
+
+ OffloadBinary &Binary = **BinaryOrErr;
+ printBinary(Binary, Index++);
+
+ Offset += Binary.getSize();
+ }
+ return Error::success();
+}
+
+/// Print the embedded offloading contents of an ObjectFile \p O.
+void llvm::dumpOffloadBinary(const ObjectFile &O) {
+ for (SectionRef Sec : O.sections()) {
+ Expected<StringRef> Name = Sec.getName();
+ if (!Name || !Name->startswith(OffloadSectionString))
+ continue;
+
+ Expected<StringRef> Contents = Sec.getContents();
+ if (!Contents)
+ reportError(Contents.takeError(), O.getFileName());
+
+ MemoryBufferRef Buffer = MemoryBufferRef(*Contents, O.getFileName());
+ auto BinaryOrErr = OffloadBinary::create(Buffer);
+ if (!BinaryOrErr)
+ reportError(O.getFileName(), "while extracting offloading files: " +
+ toString(BinaryOrErr.takeError()));
+ OffloadBinary &Binary = **BinaryOrErr;
+
+ // Print out all the binaries that are contained in this buffer. If we fail
+ // to parse a binary before reaching the end of the buffer emit a warning.
+ if (Error Err = visitAllBinaries(Binary))
+ reportWarning("while parsing offloading files: " +
+ toString(std::move(Err)),
+ O.getFileName());
+ }
+}
+
+/// Print the contents of an offload binary file \p OB. This may contain
+/// multiple binaries stored in the same buffer.
+void llvm::dumpOffloadSections(const OffloadBinary &OB) {
+ // Print out all the binaries that are contained at this buffer. If we fail to
+ // parse a binary before reaching the end of the buffer emit a warning.
+ if (Error Err = visitAllBinaries(OB))
+ reportWarning("while parsing offloading files: " + toString(std::move(Err)),
+ OB.getFileName());
+}
diff --git a/llvm/tools/llvm-objdump/OffloadDump.h b/llvm/tools/llvm-objdump/OffloadDump.h
new file mode 100644
index 000000000000..75f188e9d506
--- /dev/null
+++ b/llvm/tools/llvm-objdump/OffloadDump.h
@@ -0,0 +1,22 @@
+//===-- OffloadDump.h -------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_OBJDUMP_OFFLOADDUMP_H
+#define LLVM_TOOLS_LLVM_OBJDUMP_OFFLOADDUMP_H
+
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/OffloadBinary.h"
+
+namespace llvm {
+
+void dumpOffloadSections(const object::OffloadBinary &OB);
+void dumpOffloadBinary(const object::ObjectFile &O);
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/tools/llvm-objdump/OtoolOpts.td b/llvm/tools/llvm-objdump/OtoolOpts.td
index 61ea701ed75d..e8bef284c0e9 100644
--- a/llvm/tools/llvm-objdump/OtoolOpts.td
+++ b/llvm/tools/llvm-objdump/OtoolOpts.td
@@ -47,7 +47,6 @@ def X : Flag<["-"], "X">, HelpText<"omit leading addresses or headers">;
// -addr_slide=arg
// -function_offsets
-
// Obsolete and unsupported:
def grp_obsolete : OptionGroup<"kind">,
HelpText<"Obsolete and unsupported flags">;
diff --git a/llvm/tools/llvm-objdump/SourcePrinter.cpp b/llvm/tools/llvm-objdump/SourcePrinter.cpp
index 8befac546204..c8ea6b543245 100644
--- a/llvm/tools/llvm-objdump/SourcePrinter.cpp
+++ b/llvm/tools/llvm-objdump/SourcePrinter.cpp
@@ -16,6 +16,8 @@
#include "llvm-objdump.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringSet.h"
+#include "llvm/DebugInfo/DWARF/DWARFExpression.h"
+#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/FormatVariadic.h"
diff --git a/llvm/tools/llvm-objdump/SourcePrinter.h b/llvm/tools/llvm-objdump/SourcePrinter.h
index 31d46e3108f6..29ef19c98c80 100644
--- a/llvm/tools/llvm-objdump/SourcePrinter.h
+++ b/llvm/tools/llvm-objdump/SourcePrinter.h
@@ -13,6 +13,7 @@
#include "llvm/ADT/StringSet.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/Symbolize/Symbolize.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/FormattedStream.h"
#include <unordered_map>
#include <vector>
diff --git a/llvm/tools/llvm-objdump/XCOFFDump.cpp b/llvm/tools/llvm-objdump/XCOFFDump.cpp
index b8fb2ed3d063..159741bebb67 100644
--- a/llvm/tools/llvm-objdump/XCOFFDump.cpp
+++ b/llvm/tools/llvm-objdump/XCOFFDump.cpp
@@ -106,7 +106,7 @@ std::string objdump::getXCOFFSymbolDescription(const SymbolInfoTy &SymbolInfo,
if (SymbolInfo.XCOFFSymInfo.StorageMappingClass &&
!SymbolInfo.XCOFFSymInfo.IsLabel) {
const XCOFF::StorageMappingClass Smc =
- SymbolInfo.XCOFFSymInfo.StorageMappingClass.getValue();
+ *SymbolInfo.XCOFFSymInfo.StorageMappingClass;
Result.append(("[" + XCOFF::getMappingClassString(Smc) + "]").str());
}
diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp
index 6b238fa01d25..7cd47da9efd9 100644
--- a/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -20,6 +20,7 @@
#include "ELFDump.h"
#include "MachODump.h"
#include "ObjdumpOptID.h"
+#include "OffloadDump.h"
#include "SourcePrinter.h"
#include "WasmDump.h"
#include "XCOFFDump.h"
@@ -33,6 +34,7 @@
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
#include "llvm/DebugInfo/Symbolize/Symbolize.h"
#include "llvm/Demangle/Demangle.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -52,10 +54,12 @@
#include "llvm/Object/COFF.h"
#include "llvm/Object/COFFImportFile.h"
#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Object/ELFTypes.h"
#include "llvm/Object/FaultMapParser.h"
#include "llvm/Object/MachO.h"
#include "llvm/Object/MachOUniversal.h"
#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/OffloadBinary.h"
#include "llvm/Object/Wasm.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
@@ -196,6 +200,7 @@ std::string objdump::MCPU;
std::vector<std::string> objdump::MAttrs;
bool objdump::ShowRawInsn;
bool objdump::LeadingAddr;
+static bool Offloading;
static bool RawClangAST;
bool objdump::Relocations;
bool objdump::PrintImmHex;
@@ -440,8 +445,13 @@ static bool isArmElf(const ObjectFile *Obj) {
return Elf && Elf->getEMachine() == ELF::EM_ARM;
}
+static bool isCSKYElf(const ObjectFile *Obj) {
+ const auto *Elf = dyn_cast<ELFObjectFileBase>(Obj);
+ return Elf && Elf->getEMachine() == ELF::EM_CSKY;
+}
+
static bool hasMappingSymbols(const ObjectFile *Obj) {
- return isArmElf(Obj) || isAArch64Elf(Obj);
+ return isArmElf(Obj) || isAArch64Elf(Obj) || isCSKYElf(Obj) ;
}
static void printRelocation(formatted_raw_ostream &OS, StringRef FileName,
@@ -957,6 +967,9 @@ SymbolInfoTy objdump::createSymbolInfo(const ObjectFile *Obj,
getXCOFFSymbolCsectSMC(XCOFFObj, Symbol);
return SymbolInfoTy(Addr, Name, Smc, SymbolIndex,
isLabel(XCOFFObj, Symbol));
+ } else if (Obj->isXCOFF()) {
+ const SymbolRef::Type SymType = unwrapOrError(Symbol.getType(), FileName);
+ return SymbolInfoTy(Addr, Name, SymType, true);
} else
return SymbolInfoTy(Addr, Name,
Obj->isELF() ? getElfSymbolType(Obj, Symbol)
@@ -973,11 +986,29 @@ static SymbolInfoTy createDummySymbolInfo(const ObjectFile *Obj,
}
static void
-collectLocalBranchTargets(ArrayRef<uint8_t> Bytes, const MCInstrAnalysis *MIA,
- MCDisassembler *DisAsm, MCInstPrinter *IP,
- const MCSubtargetInfo *STI, uint64_t SectionAddr,
- uint64_t Start, uint64_t End,
- std::unordered_map<uint64_t, std::string> &Labels) {
+collectBBAddrMapLabels(const std::unordered_map<uint64_t, BBAddrMap> &AddrToBBAddrMap,
+ uint64_t SectionAddr, uint64_t Start, uint64_t End,
+ std::unordered_map<uint64_t, std::vector<std::string>> &Labels) {
+ if (AddrToBBAddrMap.empty())
+ return;
+ Labels.clear();
+ uint64_t StartAddress = SectionAddr + Start;
+ uint64_t EndAddress = SectionAddr + End;
+ auto Iter = AddrToBBAddrMap.find(StartAddress);
+ if (Iter == AddrToBBAddrMap.end())
+ return;
+ for (unsigned I = 0, Size = Iter->second.BBEntries.size(); I < Size; ++I) {
+ uint64_t BBAddress = Iter->second.BBEntries[I].Offset + Iter->second.Addr;
+ if (BBAddress >= EndAddress)
+ continue;
+ Labels[BBAddress].push_back(("BB" + Twine(I)).str());
+ }
+}
+
+static void collectLocalBranchTargets(
+ ArrayRef<uint8_t> Bytes, const MCInstrAnalysis *MIA, MCDisassembler *DisAsm,
+ MCInstPrinter *IP, const MCSubtargetInfo *STI, uint64_t SectionAddr,
+ uint64_t Start, uint64_t End, std::unordered_map<uint64_t, std::string> &Labels) {
// So far only supports PowerPC and X86.
if (!STI->getTargetTriple().isPPC() && !STI->getTargetTriple().isX86())
return;
@@ -1006,7 +1037,6 @@ collectLocalBranchTargets(ArrayRef<uint8_t> Bytes, const MCInstrAnalysis *MIA,
!(STI->getTargetTriple().isPPC() && Target == Index))
Labels[Target] = ("L" + Twine(LabelCount++)).str();
}
-
Index += Size;
}
}
@@ -1241,6 +1271,20 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
if (!SectSize)
continue;
+ std::unordered_map<uint64_t, BBAddrMap> AddrToBBAddrMap;
+ if (SymbolizeOperands) {
+ if (auto *Elf = dyn_cast<ELFObjectFileBase>(Obj)) {
+ // Read the BB-address-map corresponding to this section, if present.
+ auto SectionBBAddrMapsOrErr = Elf->readBBAddrMap(Section.getIndex());
+ if (!SectionBBAddrMapsOrErr)
+ reportWarning(toString(SectionBBAddrMapsOrErr.takeError()),
+ Obj->getFileName());
+ for (auto &FunctionBBAddrMap : *SectionBBAddrMapsOrErr)
+ AddrToBBAddrMap.emplace(FunctionBBAddrMap.Addr,
+ std::move(FunctionBBAddrMap));
+ }
+ }
+
// Get the list of all the symbols in this section.
SectionSymbolsTy &Symbols = AllSymbols[Section];
std::vector<MappingSymbolPair> MappingSymbols;
@@ -1367,7 +1411,7 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
// Right now, most targets return None i.e ignore to treat a symbol
// separately. But WebAssembly decodes preludes for some symbols.
//
- if (Status.hasValue()) {
+ if (Status) {
if (Status.getValue() == MCDisassembler::Fail) {
outs() << "// Error in decoding " << SymbolName
<< " : Decoding failed region as bytes.\n";
@@ -1404,9 +1448,13 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
formatted_raw_ostream FOS(outs());
std::unordered_map<uint64_t, std::string> AllLabels;
- if (SymbolizeOperands)
+ std::unordered_map<uint64_t, std::vector<std::string>> BBAddrMapLabels;
+ if (SymbolizeOperands) {
collectLocalBranchTargets(Bytes, MIA, DisAsm, IP, PrimarySTI,
SectionAddr, Index, End, AllLabels);
+ collectBBAddrMapLabels(AddrToBBAddrMap, SectionAddr, Index, End,
+ BBAddrMapLabels);
+ }
while (Index < End) {
// ARM and AArch64 ELF binaries can interleave data and text in the
@@ -1450,9 +1498,15 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
}
// Print local label if there's any.
- auto Iter = AllLabels.find(SectionAddr + Index);
- if (Iter != AllLabels.end())
- FOS << "<" << Iter->second << ">:\n";
+ auto Iter1 = BBAddrMapLabels.find(SectionAddr + Index);
+ if (Iter1 != BBAddrMapLabels.end()) {
+ for (StringRef Label : Iter1->second)
+ FOS << "<" << Label << ">:\n";
+ } else {
+ auto Iter2 = AllLabels.find(SectionAddr + Index);
+ if (Iter2 != AllLabels.end())
+ FOS << "<" << Iter2->second << ">:\n";
+ }
// Disassemble a real instruction or a data when disassemble all is
// provided
@@ -1547,6 +1601,7 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
}
// Print the labels corresponding to the target if there's any.
+ bool BBAddrMapLabelAvailable = BBAddrMapLabels.count(Target);
bool LabelAvailable = AllLabels.count(Target);
if (TargetSym != nullptr) {
uint64_t TargetAddress = TargetSym->Addr;
@@ -1560,14 +1615,18 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
// Always Print the binary symbol precisely corresponding to
// the target address.
*TargetOS << TargetName;
- } else if (!LabelAvailable) {
+ } else if (BBAddrMapLabelAvailable) {
+ *TargetOS << BBAddrMapLabels[Target].front();
+ } else if (LabelAvailable) {
+ *TargetOS << AllLabels[Target];
+ } else {
// Always Print the binary symbol plus an offset if there's no
// local label corresponding to the target address.
*TargetOS << TargetName << "+0x" << Twine::utohexstr(Disp);
- } else {
- *TargetOS << AllLabels[Target];
}
*TargetOS << ">";
+ } else if (BBAddrMapLabelAvailable) {
+ *TargetOS << " <" << BBAddrMapLabels[Target].front() << ">";
} else if (LabelAvailable) {
*TargetOS << " <" << AllLabels[Target] << ">";
}
@@ -1634,9 +1693,12 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
// Package up features to be passed to target/subtarget
SubtargetFeatures Features = Obj->getFeatures();
- if (!MAttrs.empty())
+ if (!MAttrs.empty()) {
for (unsigned I = 0; I != MAttrs.size(); ++I)
Features.AddFeature(MAttrs[I]);
+ } else if (MCPU.empty() && Obj->getArch() == llvm::Triple::aarch64) {
+ Features.AddFeature("+all");
+ }
std::unique_ptr<const MCRegisterInfo> MRI(
TheTarget->createMCRegInfo(TripleName));
@@ -1653,7 +1715,7 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
"no assembly info for target " + TripleName);
if (MCPU.empty())
- MCPU = Obj->tryGetCPUName().getValueOr("").str();
+ MCPU = Obj->tryGetCPUName().value_or("").str();
std::unique_ptr<const MCSubtargetInfo> STI(
TheTarget->createMCSubtargetInfo(TripleName, MCPU, Features.getString()));
@@ -1721,10 +1783,6 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
void objdump::printRelocations(const ObjectFile *Obj) {
StringRef Fmt = Obj->getBytesInAddress() > 4 ? "%016" PRIx64 :
"%08" PRIx64;
- // Regular objdump doesn't print relocations in non-relocatable object
- // files.
- if (!Obj->isRelocatableObject())
- return;
// Build a mapping from relocation target to a vector of relocation
// sections. Usually, there is an only one relocation section for
@@ -1732,6 +1790,8 @@ void objdump::printRelocations(const ObjectFile *Obj) {
MapVector<SectionRef, std::vector<SectionRef>> SecToRelSec;
uint64_t Ndx;
for (const SectionRef &Section : ToolSectionFilter(*Obj, &Ndx)) {
+ if (Obj->isELF() && (ELFSectionRef(Section).getFlags() & ELF::SHF_ALLOC))
+ continue;
if (Section.relocation_begin() == Section.relocation_end())
continue;
Expected<section_iterator> SecOrErr = Section.getRelocatedSection();
@@ -2073,7 +2133,7 @@ void objdump::printSymbol(const ObjectFile *O, const SymbolRef &Symbol,
dyn_cast<const XCOFFObjectFile>(O), Symbol);
if (SymRef) {
- Expected<StringRef> NameOrErr = SymRef.getValue().getName();
+ Expected<StringRef> NameOrErr = SymRef->getName();
if (NameOrErr) {
outs() << " (csect:";
@@ -2227,13 +2287,13 @@ static void printFaultMaps(const ObjectFile *Obj) {
outs() << "FaultMap table:\n";
- if (!FaultMapSection.hasValue()) {
+ if (!FaultMapSection) {
outs() << "<not found>\n";
return;
}
StringRef FaultMapContents =
- unwrapOrError(FaultMapSection.getValue().getContents(), Obj->getFileName());
+ unwrapOrError(FaultMapSection->getContents(), Obj->getFileName());
FaultMapParser FMP(FaultMapContents.bytes_begin(),
FaultMapContents.bytes_end());
@@ -2423,6 +2483,8 @@ static void dumpObject(ObjectFile *O, const Archive *A = nullptr,
printRawClangAST(O);
if (FaultMapSection)
printFaultMaps(O);
+ if (Offloading)
+ dumpOffloadBinary(*O);
}
static void dumpObject(const COFFImportFile *I, const Archive *A,
@@ -2486,6 +2548,8 @@ static void dumpInput(StringRef file) {
dumpObject(O);
else if (MachOUniversalBinary *UB = dyn_cast<MachOUniversalBinary>(&Binary))
parseInputMachO(UB);
+ else if (OffloadBinary *OB = dyn_cast<OffloadBinary>(&Binary))
+ dumpOffloadSections(*OB);
else
reportError(errorCodeToError(object_error::invalid_file_type), file);
}
@@ -2589,6 +2653,7 @@ static void parseObjdumpOptions(const llvm::opt::InputArgList &InputArgs) {
}
DynamicRelocations = InputArgs.hasArg(OBJDUMP_dynamic_reloc);
FaultMapSection = InputArgs.hasArg(OBJDUMP_fault_map_section);
+ Offloading = InputArgs.hasArg(OBJDUMP_offloading);
FileHeaders = InputArgs.hasArg(OBJDUMP_file_headers);
SectionContents = InputArgs.hasArg(OBJDUMP_full_contents);
PrintLines = InputArgs.hasArg(OBJDUMP_line_numbers);
@@ -2756,12 +2821,12 @@ int main(int argc, char **argv) {
if (!ArchiveHeaders && !Disassemble && DwarfDumpType == DIDT_Null &&
!DynamicRelocations && !FileHeaders && !PrivateHeaders && !RawClangAST &&
!Relocations && !SectionHeaders && !SectionContents && !SymbolTable &&
- !DynamicSymbolTable && !UnwindInfo && !FaultMapSection &&
- !(MachOOpt &&
- (Bind || DataInCode || DylibId || DylibsUsed || ExportsTrie ||
- FirstPrivateHeader || FunctionStarts || IndirectSymbols || InfoPlist ||
- LazyBind || LinkOptHints || ObjcMetaData || Rebase || Rpaths ||
- UniversalHeaders || WeakBind || !FilterSections.empty()))) {
+ !DynamicSymbolTable && !UnwindInfo && !FaultMapSection && !Offloading &&
+ !(MachOOpt && (Bind || DataInCode || DyldInfo || DylibId || DylibsUsed ||
+ ExportsTrie || FirstPrivateHeader || FunctionStarts ||
+ IndirectSymbols || InfoPlist || LazyBind || LinkOptHints ||
+ ObjcMetaData || Rebase || Rpaths || UniversalHeaders ||
+ WeakBind || !FilterSections.empty()))) {
T->printHelp(ToolName);
return 2;
}
diff --git a/llvm/tools/llvm-pdbutil/BytesOutputStyle.cpp b/llvm/tools/llvm-pdbutil/BytesOutputStyle.cpp
index ffc907e09f11..4c851e14a12d 100644
--- a/llvm/tools/llvm-pdbutil/BytesOutputStyle.cpp
+++ b/llvm/tools/llvm-pdbutil/BytesOutputStyle.cpp
@@ -8,7 +8,6 @@
#include "BytesOutputStyle.h"
-#include "FormatUtil.h"
#include "StreamUtil.h"
#include "llvm-pdbutil.h"
@@ -17,6 +16,7 @@
#include "llvm/DebugInfo/MSF/MSFCommon.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
#include "llvm/DebugInfo/PDB/Native/DbiStream.h"
+#include "llvm/DebugInfo/PDB/Native/FormatUtil.h"
#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
#include "llvm/DebugInfo/PDB/Native/ModuleDebugStream.h"
#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
@@ -83,13 +83,13 @@ static void printHeader(LinePrinter &P, const Twine &S) {
}
BytesOutputStyle::BytesOutputStyle(PDBFile &File)
- : File(File), P(2, false, outs()) {}
+ : File(File), P(2, false, outs(), opts::Filters) {}
Error BytesOutputStyle::dump() {
- if (opts::bytes::DumpBlockRange.hasValue()) {
+ if (opts::bytes::DumpBlockRange) {
auto &R = *opts::bytes::DumpBlockRange;
- uint32_t Max = R.Max.getValueOr(R.Min);
+ uint32_t Max = R.Max.value_or(R.Min);
if (Max < R.Min)
return make_error<StringError>(
@@ -104,9 +104,9 @@ Error BytesOutputStyle::dump() {
P.NewLine();
}
- if (opts::bytes::DumpByteRange.hasValue()) {
+ if (opts::bytes::DumpByteRange) {
auto &R = *opts::bytes::DumpByteRange;
- uint32_t Max = R.Max.getValueOr(File.getFileSize());
+ uint32_t Max = R.Max.value_or(File.getFileSize());
if (Max < R.Min)
return make_error<StringError>("Invalid byte range specified. Max < Min",
diff --git a/llvm/tools/llvm-pdbutil/BytesOutputStyle.h b/llvm/tools/llvm-pdbutil/BytesOutputStyle.h
index d3aceb47679e..cd28032fe7cd 100644
--- a/llvm/tools/llvm-pdbutil/BytesOutputStyle.h
+++ b/llvm/tools/llvm-pdbutil/BytesOutputStyle.h
@@ -9,10 +9,10 @@
#ifndef LLVM_TOOLS_LLVMPDBDUMP_BYTESOUTPUTSTYLE_H
#define LLVM_TOOLS_LLVMPDBDUMP_BYTESOUTPUTSTYLE_H
-#include "LinePrinter.h"
#include "OutputStyle.h"
#include "StreamUtil.h"
+#include "llvm/DebugInfo/PDB/Native/LinePrinter.h"
#include "llvm/Support/Error.h"
namespace llvm {
diff --git a/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp b/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp
index ef299ea9d482..a173eb1faa62 100644
--- a/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp
+++ b/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp
@@ -8,8 +8,6 @@
#include "DumpOutputStyle.h"
-#include "FormatUtil.h"
-#include "InputFile.h"
#include "MinimalSymbolDumper.h"
#include "MinimalTypeDumper.h"
#include "StreamUtil.h"
@@ -38,10 +36,13 @@
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
#include "llvm/DebugInfo/PDB/Native/DbiStream.h"
+#include "llvm/DebugInfo/PDB/Native/FormatUtil.h"
#include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
#include "llvm/DebugInfo/PDB/Native/ISectionContribVisitor.h"
#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
+#include "llvm/DebugInfo/PDB/Native/InputFile.h"
#include "llvm/DebugInfo/PDB/Native/ModuleDebugStream.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
#include "llvm/DebugInfo/PDB/Native/PublicsStream.h"
#include "llvm/DebugInfo/PDB/Native/RawError.h"
@@ -61,7 +62,7 @@ using namespace llvm::msf;
using namespace llvm::pdb;
DumpOutputStyle::DumpOutputStyle(InputFile &File)
- : File(File), P(2, false, outs()) {
+ : File(File), P(2, false, outs(), opts::Filters) {
if (opts::dump::DumpTypeRefStats)
RefTracker.reset(new TypeReferenceTracker(File));
}
@@ -99,8 +100,8 @@ Error DumpOutputStyle::dump() {
}
if (opts::dump::DumpSymbolStats) {
- if (auto EC = dumpSymbolStats())
- return EC;
+ ExitOnError Err("Unexpected error processing module stats: ");
+ Err(dumpSymbolStats());
P.NewLine();
}
@@ -129,33 +130,33 @@ Error DumpOutputStyle::dump() {
}
if (opts::dump::DumpModules) {
- if (auto EC = dumpModules())
- return EC;
+ ExitOnError Err("Unexpected error processing modules: ");
+ Err(dumpModules());
}
if (opts::dump::DumpModuleFiles) {
- if (auto EC = dumpModuleFiles())
- return EC;
+ ExitOnError Err("Unexpected error processing files: ");
+ Err(dumpModuleFiles());
}
if (opts::dump::DumpLines) {
- if (auto EC = dumpLines())
- return EC;
+ ExitOnError Err("Unexpected error processing lines: ");
+ Err(dumpLines());
}
if (opts::dump::DumpInlineeLines) {
- if (auto EC = dumpInlineeLines())
- return EC;
+ ExitOnError Err("Unexpected error processing inlinee lines: ");
+ Err(dumpInlineeLines());
}
if (opts::dump::DumpXmi) {
- if (auto EC = dumpXmi())
- return EC;
+ ExitOnError Err("Unexpected error processing cross module imports: ");
+ Err(dumpXmi());
}
if (opts::dump::DumpXme) {
- if (auto EC = dumpXme())
- return EC;
+ ExitOnError Err("Unexpected error processing cross module exports: ");
+ Err(dumpXme());
}
if (opts::dump::DumpFpo) {
@@ -198,9 +199,8 @@ Error DumpOutputStyle::dump() {
}
if (opts::dump::DumpSymbols) {
- auto EC = File.isPdb() ? dumpModuleSymsForPdb() : dumpModuleSymsForObj();
- if (EC)
- return EC;
+ ExitOnError Err("Unexpected error processing symbols: ");
+ Err(File.isPdb() ? dumpModuleSymsForPdb() : dumpModuleSymsForObj());
}
if (opts::dump::DumpTypeRefStats) {
@@ -260,7 +260,7 @@ Error DumpOutputStyle::dumpFileSummary() {
P.formatLine("Has Globals: {0}", getPdb().hasPDBGlobalsStream());
P.formatLine("Has Publics: {0}", getPdb().hasPDBPublicsStream());
if (getPdb().hasPDBDbiStream()) {
- auto &DBI = Err(getPdb().getPDBDbiStream());
+ DbiStream &DBI = Err(getPdb().getPDBDbiStream());
P.formatLine("Is incrementally linked: {0}", DBI.isIncrementallyLinked());
P.formatLine("Has conflicting types: {0}", DBI.hasCTypes());
P.formatLine("Is stripped: {0}", DBI.isStripped());
@@ -343,36 +343,6 @@ static void printModuleDetailStats(LinePrinter &P, StringRef Label,
}
}
-static bool isMyCode(const SymbolGroup &Group) {
- if (Group.getFile().isObj())
- return true;
-
- StringRef Name = Group.name();
- if (Name.startswith("Import:"))
- return false;
- if (Name.endswith_insensitive(".dll"))
- return false;
- if (Name.equals_insensitive("* linker *"))
- return false;
- if (Name.startswith_insensitive("f:\\binaries\\Intermediate\\vctools"))
- return false;
- if (Name.startswith_insensitive("f:\\dd\\vctools\\crt"))
- return false;
- return true;
-}
-
-static bool shouldDumpSymbolGroup(uint32_t Idx, const SymbolGroup &Group) {
- if (opts::dump::JustMyCode && !isMyCode(Group))
- return false;
-
- // If the arg was not specified on the command line, always dump all modules.
- if (opts::dump::DumpModi.getNumOccurrences() == 0)
- return true;
-
- // Otherwise, only dump if this is the same module specified.
- return (opts::dump::DumpModi == Idx);
-}
-
Error DumpOutputStyle::dumpStreamSummary() {
printHeader(P, "Streams");
@@ -389,7 +359,7 @@ Error DumpOutputStyle::dumpStreamSummary() {
uint32_t StreamCount = getPdb().getNumStreams();
uint32_t MaxStreamSize = getPdb().getMaxStreamSize();
- for (uint16_t StreamIdx = 0; StreamIdx < StreamCount; ++StreamIdx) {
+ for (uint32_t StreamIdx = 0; StreamIdx < StreamCount; ++StreamIdx) {
P.formatLine(
"Stream {0} ({1} bytes): [{2}]",
fmt_align(StreamIdx, AlignStyle::Right, NumDigits(StreamCount)),
@@ -409,93 +379,6 @@ Error DumpOutputStyle::dumpStreamSummary() {
return Error::success();
}
-static Expected<ModuleDebugStreamRef> getModuleDebugStream(PDBFile &File,
- uint32_t Index) {
- ExitOnError Err("Unexpected error: ");
-
- auto &Dbi = Err(File.getPDBDbiStream());
- const auto &Modules = Dbi.modules();
- auto Modi = Modules.getModuleDescriptor(Index);
-
- uint16_t ModiStream = Modi.getModuleStreamIndex();
- if (ModiStream == kInvalidStreamIndex)
- return make_error<RawError>(raw_error_code::no_stream,
- "Module stream not present");
-
- auto ModStreamData = File.createIndexedStream(ModiStream);
-
- ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData));
- if (auto EC = ModS.reload())
- return make_error<RawError>(raw_error_code::corrupt_file,
- "Invalid module stream");
-
- return std::move(ModS);
-}
-
-template <typename CallbackT>
-static void
-iterateOneModule(InputFile &File, const Optional<PrintScope> &HeaderScope,
- const SymbolGroup &SG, uint32_t Modi, CallbackT Callback) {
- if (HeaderScope) {
- HeaderScope->P.formatLine(
- "Mod {0:4} | `{1}`: ",
- fmt_align(Modi, AlignStyle::Right, HeaderScope->LabelWidth), SG.name());
- }
-
- AutoIndent Indent(HeaderScope);
- Callback(Modi, SG);
-}
-
-template <typename CallbackT>
-static void iterateSymbolGroups(InputFile &Input,
- const Optional<PrintScope> &HeaderScope,
- CallbackT Callback) {
- AutoIndent Indent(HeaderScope);
-
- ExitOnError Err("Unexpected error processing modules: ");
-
- if (opts::dump::DumpModi.getNumOccurrences() > 0) {
- assert(opts::dump::DumpModi.getNumOccurrences() == 1);
- uint32_t Modi = opts::dump::DumpModi;
- SymbolGroup SG(&Input, Modi);
- iterateOneModule(Input, withLabelWidth(HeaderScope, NumDigits(Modi)), SG,
- Modi, Callback);
- return;
- }
-
- uint32_t I = 0;
-
- for (const auto &SG : Input.symbol_groups()) {
- if (shouldDumpSymbolGroup(I, SG))
- iterateOneModule(Input, withLabelWidth(HeaderScope, NumDigits(I)), SG, I,
- Callback);
-
- ++I;
- }
-}
-
-template <typename SubsectionT>
-static void iterateModuleSubsections(
- InputFile &File, const Optional<PrintScope> &HeaderScope,
- llvm::function_ref<void(uint32_t, const SymbolGroup &, SubsectionT &)>
- Callback) {
-
- iterateSymbolGroups(File, HeaderScope,
- [&](uint32_t Modi, const SymbolGroup &SG) {
- for (const auto &SS : SG.getDebugSubsections()) {
- SubsectionT Subsection;
-
- if (SS.kind() != Subsection.kind())
- continue;
-
- BinaryStreamReader Reader(SS.getRecordData());
- if (auto EC = Subsection.initialize(Reader))
- continue;
- Callback(Modi, SG, Subsection);
- }
- });
-}
-
static Expected<std::pair<std::unique_ptr<MappedBlockStream>,
ArrayRef<llvm::object::coff_section>>>
loadSectionHeaders(PDBFile &File, DbgHeaderType Type) {
@@ -504,7 +387,7 @@ loadSectionHeaders(PDBFile &File, DbgHeaderType Type) {
"Section headers require a DBI Stream, which could not be loaded",
inconvertibleErrorCode());
- auto &Dbi = cantFail(File.getPDBDbiStream());
+ DbiStream &Dbi = cantFail(File.getPDBDbiStream());
uint32_t SI = Dbi.getDebugStreamIndex(Type);
if (SI == kInvalidStreamIndex)
@@ -529,10 +412,10 @@ loadSectionHeaders(PDBFile &File, DbgHeaderType Type) {
return std::make_pair(std::move(Stream), Headers);
}
-static std::vector<std::string> getSectionNames(PDBFile &File) {
+static Expected<std::vector<std::string>> getSectionNames(PDBFile &File) {
auto ExpectedHeaders = loadSectionHeaders(File, DbgHeaderType::SectionHdr);
if (!ExpectedHeaders)
- return {};
+ return ExpectedHeaders.takeError();
std::unique_ptr<MappedBlockStream> Stream;
ArrayRef<object::coff_section> Headers;
@@ -590,31 +473,44 @@ Error DumpOutputStyle::dumpModules() {
}
AutoIndent Indent(P);
- ExitOnError Err("Unexpected error processing modules: ");
- auto &Stream = Err(getPdb().getPDBDbiStream());
+ Expected<DbiStream &> StreamOrErr = getPdb().getPDBDbiStream();
+ if (!StreamOrErr)
+ return StreamOrErr.takeError();
+ DbiStream &Stream = *StreamOrErr;
const DbiModuleList &Modules = Stream.modules();
- iterateSymbolGroups(
- File, PrintScope{P, 11}, [&](uint32_t Modi, const SymbolGroup &Strings) {
+ return iterateSymbolGroups(
+ File, PrintScope{P, 11},
+ [&](uint32_t Modi, const SymbolGroup &Strings) -> Error {
auto Desc = Modules.getModuleDescriptor(Modi);
if (opts::dump::DumpSectionContribs) {
- std::vector<std::string> Sections = getSectionNames(getPdb());
+ auto SectionsOrErr = getSectionNames(getPdb());
+ if (!SectionsOrErr)
+ return SectionsOrErr.takeError();
+ ArrayRef<std::string> Sections = *SectionsOrErr;
dumpSectionContrib(P, Desc.getSectionContrib(), Sections, 0);
}
P.formatLine("Obj: `{0}`: ", Desc.getObjFileName());
P.formatLine("debug stream: {0}, # files: {1}, has ec info: {2}",
Desc.getModuleStreamIndex(), Desc.getNumberOfFiles(),
Desc.hasECInfo());
- StringRef PdbFilePath =
- Err(Stream.getECName(Desc.getPdbFilePathNameIndex()));
- StringRef SrcFilePath =
- Err(Stream.getECName(Desc.getSourceFileNameIndex()));
+
+ auto PdbPathOrErr = Stream.getECName(Desc.getPdbFilePathNameIndex());
+ if (!PdbPathOrErr)
+ return PdbPathOrErr.takeError();
+ StringRef PdbFilePath = *PdbPathOrErr;
+
+ auto SrcPathOrErr = Stream.getECName(Desc.getSourceFileNameIndex());
+ if (!SrcPathOrErr)
+ return SrcPathOrErr.takeError();
+ StringRef SrcFilePath = *SrcPathOrErr;
+
P.formatLine("pdb file ni: {0} `{1}`, src file ni: {2} `{3}`",
Desc.getPdbFilePathNameIndex(), PdbFilePath,
Desc.getSourceFileNameIndex(), SrcFilePath);
+ return Error::success();
});
- return Error::success();
}
Error DumpOutputStyle::dumpModuleFiles() {
@@ -630,18 +526,20 @@ Error DumpOutputStyle::dumpModuleFiles() {
return Error::success();
}
- ExitOnError Err("Unexpected error processing modules: ");
-
- iterateSymbolGroups(File, PrintScope{P, 11},
- [this, &Err](uint32_t Modi, const SymbolGroup &Strings) {
- auto &Stream = Err(getPdb().getPDBDbiStream());
+ return iterateSymbolGroups(
+ File, PrintScope{P, 11},
+ [this](uint32_t Modi, const SymbolGroup &Strings) -> Error {
+ Expected<DbiStream &> StreamOrErr = getPdb().getPDBDbiStream();
+ if (!StreamOrErr)
+ return StreamOrErr.takeError();
+ DbiStream &Stream = *StreamOrErr;
- const DbiModuleList &Modules = Stream.modules();
- for (const auto &F : Modules.source_files(Modi)) {
- Strings.formatFromFileName(P, F);
- }
- });
- return Error::success();
+ const DbiModuleList &Modules = Stream.modules();
+ for (const auto &F : Modules.source_files(Modi)) {
+ Strings.formatFromFileName(P, F);
+ }
+ return Error::success();
+ });
}
Error DumpOutputStyle::dumpSymbolStats() {
@@ -652,39 +550,40 @@ Error DumpOutputStyle::dumpSymbolStats() {
return Error::success();
}
- ExitOnError Err("Unexpected error processing modules: ");
-
StatCollection SymStats;
StatCollection ChunkStats;
-
- Optional<PrintScope> Scope;
- if (File.isPdb())
- Scope.emplace(P, 2);
-
- iterateSymbolGroups(File, Scope, [&](uint32_t Modi, const SymbolGroup &SG) {
- StatCollection SS = getSymbolStats(SG, SymStats);
- StatCollection CS = getChunkStats(SG, ChunkStats);
-
- if (SG.getFile().isPdb()) {
- AutoIndent Indent(P);
- auto Modules = cantFail(File.pdb().getPDBDbiStream()).modules();
- uint32_t ModCount = Modules.getModuleCount();
- DbiModuleDescriptor Desc = Modules.getModuleDescriptor(Modi);
- uint32_t StreamIdx = Desc.getModuleStreamIndex();
-
- if (StreamIdx == kInvalidStreamIndex) {
- P.formatLine("Mod {0} (debug info not present): [{1}]",
- fmt_align(Modi, AlignStyle::Right, NumDigits(ModCount)),
- Desc.getModuleName());
- return;
- }
- P.formatLine("Stream {0}, {1} bytes", StreamIdx,
- getPdb().getStreamByteSize(StreamIdx));
-
- printModuleDetailStats<SymbolKind>(P, "Symbols", SS);
- printModuleDetailStats<DebugSubsectionKind>(P, "Chunks", CS);
- }
- });
+ PrintScope Scope(P, 2);
+
+ if (Error Err = iterateSymbolGroups(
+ File, Scope, [&](uint32_t Modi, const SymbolGroup &SG) -> Error {
+ StatCollection SS = getSymbolStats(SG, SymStats);
+ StatCollection CS = getChunkStats(SG, ChunkStats);
+
+ if (!SG.getFile().isPdb())
+ return Error::success();
+
+ AutoIndent Indent(P);
+ auto Modules = cantFail(File.pdb().getPDBDbiStream()).modules();
+ uint32_t ModCount = Modules.getModuleCount();
+ DbiModuleDescriptor Desc = Modules.getModuleDescriptor(Modi);
+ uint32_t StreamIdx = Desc.getModuleStreamIndex();
+
+ if (StreamIdx == kInvalidStreamIndex) {
+ P.formatLine(
+ "Mod {0} (debug info not present): [{1}]",
+ fmt_align(Modi, AlignStyle::Right, NumDigits(ModCount)),
+ Desc.getModuleName());
+ return Error::success();
+ }
+ P.formatLine("Stream {0}, {1} bytes", StreamIdx,
+ getPdb().getStreamByteSize(StreamIdx));
+
+ printModuleDetailStats<SymbolKind>(P, "Symbols", SS);
+ printModuleDetailStats<DebugSubsectionKind>(P, "Chunks", CS);
+
+ return Error::success();
+ }))
+ return Err;
if (SymStats.Totals.Count > 0) {
P.printLine(" Summary |");
@@ -944,11 +843,11 @@ Error DumpOutputStyle::dumpLines() {
uint32_t LastModi = UINT32_MAX;
uint32_t LastNameIndex = UINT32_MAX;
- iterateModuleSubsections<DebugLinesSubsectionRef>(
+ return iterateModuleSubsections<DebugLinesSubsectionRef>(
File, PrintScope{P, 4},
- [this, &LastModi, &LastNameIndex](uint32_t Modi,
- const SymbolGroup &Strings,
- DebugLinesSubsectionRef &Lines) {
+ [this, &LastModi,
+ &LastNameIndex](uint32_t Modi, const SymbolGroup &Strings,
+ DebugLinesSubsectionRef &Lines) -> Error {
uint16_t Segment = Lines.header()->RelocSegment;
uint32_t Begin = Lines.header()->RelocOffset;
uint32_t End = Begin + Lines.header()->CodeSize;
@@ -970,9 +869,8 @@ Error DumpOutputStyle::dumpLines() {
P.NewLine();
typesetLinesAndColumns(P, Begin, Block);
}
+ return Error::success();
});
-
- return Error::success();
}
Error DumpOutputStyle::dumpInlineeLines() {
@@ -983,10 +881,10 @@ Error DumpOutputStyle::dumpInlineeLines() {
return Error::success();
}
- iterateModuleSubsections<DebugInlineeLinesSubsectionRef>(
+ return iterateModuleSubsections<DebugInlineeLinesSubsectionRef>(
File, PrintScope{P, 2},
[this](uint32_t Modi, const SymbolGroup &Strings,
- DebugInlineeLinesSubsectionRef &Lines) {
+ DebugInlineeLinesSubsectionRef &Lines) -> Error {
P.formatLine("{0,+8} | {1,+5} | {2}", "Inlinee", "Line", "Source File");
for (const auto &Entry : Lines) {
P.formatLine("{0,+8} | {1,+5} | ", Entry.Header->Inlinee,
@@ -998,9 +896,8 @@ Error DumpOutputStyle::dumpInlineeLines() {
}
}
P.NewLine();
+ return Error::success();
});
-
- return Error::success();
}
Error DumpOutputStyle::dumpXmi() {
@@ -1011,10 +908,10 @@ Error DumpOutputStyle::dumpXmi() {
return Error::success();
}
- iterateModuleSubsections<DebugCrossModuleImportsSubsectionRef>(
+ return iterateModuleSubsections<DebugCrossModuleImportsSubsectionRef>(
File, PrintScope{P, 2},
[this](uint32_t Modi, const SymbolGroup &Strings,
- DebugCrossModuleImportsSubsectionRef &Imports) {
+ DebugCrossModuleImportsSubsectionRef &Imports) -> Error {
P.formatLine("{0,=32} | {1}", "Imported Module", "Type IDs");
for (const auto &Xmi : Imports) {
@@ -1039,9 +936,8 @@ Error DumpOutputStyle::dumpXmi() {
typesetItemList(TIs, P.getIndentLevel() + 35, 12, " ");
P.formatLine("{0,+32} | {1}", Module, Result);
}
+ return Error::success();
});
-
- return Error::success();
}
Error DumpOutputStyle::dumpXme() {
@@ -1052,18 +948,17 @@ Error DumpOutputStyle::dumpXme() {
return Error::success();
}
- iterateModuleSubsections<DebugCrossModuleExportsSubsectionRef>(
+ return iterateModuleSubsections<DebugCrossModuleExportsSubsectionRef>(
File, PrintScope{P, 2},
[this](uint32_t Modi, const SymbolGroup &Strings,
- DebugCrossModuleExportsSubsectionRef &Exports) {
+ DebugCrossModuleExportsSubsectionRef &Exports) -> Error {
P.formatLine("{0,-10} | {1}", "Local ID", "Global ID");
for (const auto &Export : Exports) {
P.formatLine("{0,+10:X+} | {1}", TypeIndex(Export.Local),
TypeIndex(Export.Global));
}
+ return Error::success();
});
-
- return Error::success();
}
std::string formatFrameType(object::frame_type FT) {
@@ -1084,7 +979,7 @@ Error DumpOutputStyle::dumpOldFpo(PDBFile &File) {
printHeader(P, "Old FPO Data");
ExitOnError Err("Error dumping old fpo data:");
- auto &Dbi = Err(File.getPDBDbiStream());
+ DbiStream &Dbi = Err(File.getPDBDbiStream());
if (!Dbi.hasOldFpoRecords()) {
printStreamNotPresent("FPO");
@@ -1111,7 +1006,7 @@ Error DumpOutputStyle::dumpNewFpo(PDBFile &File) {
printHeader(P, "New FPO Data");
ExitOnError Err("Error dumping new fpo data:");
- auto &Dbi = Err(File.getPDBDbiStream());
+ DbiStream &Dbi = Err(File.getPDBDbiStream());
if (!Dbi.hasNewFpoRecords()) {
printStreamNotPresent("New FPO");
@@ -1232,10 +1127,10 @@ Error DumpOutputStyle::dumpStringTableFromPdb() {
}
Error DumpOutputStyle::dumpStringTableFromObj() {
- iterateModuleSubsections<DebugStringTableSubsectionRef>(
+ return iterateModuleSubsections<DebugStringTableSubsectionRef>(
File, PrintScope{P, 4},
[&](uint32_t Modi, const SymbolGroup &Strings,
- DebugStringTableSubsectionRef &Strings2) {
+ DebugStringTableSubsectionRef &Strings2) -> Error {
BinaryStreamRef StringTableBuffer = Strings2.getBuffer();
BinaryStreamReader Reader(StringTableBuffer);
while (Reader.bytesRemaining() > 0) {
@@ -1248,8 +1143,8 @@ Error DumpOutputStyle::dumpStringTableFromObj() {
P.formatLine("{0} | {1}", fmt_align(Offset, AlignStyle::Right, 4),
Str);
}
+ return Error::success();
});
- return Error::success();
}
Error DumpOutputStyle::dumpNamedStreams() {
@@ -1352,10 +1247,16 @@ static void dumpPartialTypeStream(LinePrinter &Printer,
for (const auto &I : TiList) {
TypeIndex TI(I);
- CVType Type = Types.getType(TI);
- if (auto EC = codeview::visitTypeRecord(Type, TI, V))
- Printer.formatLine("An error occurred dumping type record {0}: {1}", TI,
- toString(std::move(EC)));
+ if (TI.isSimple()) {
+ Printer.formatLine("{0} | {1}", fmt_align(I, AlignStyle::Right, Width),
+ Types.getTypeName(TI));
+ } else if (Optional<CVType> Type = Types.tryGetType(TI)) {
+ if (auto EC = codeview::visitTypeRecord(*Type, TI, V))
+ Printer.formatLine("An error occurred dumping type record {0}: {1}",
+ TI, toString(std::move(EC)));
+ } else {
+ Printer.formatLine("Type {0} doesn't exist in TPI stream", TI);
+ }
}
}
}
@@ -1526,8 +1427,6 @@ Error DumpOutputStyle::dumpModuleSymsForObj() {
AutoIndent Indent(P);
- ExitOnError Err("Unexpected error processing symbols: ");
-
auto &Types = File.types();
SymbolVisitorCallbackPipeline Pipeline;
@@ -1538,25 +1437,18 @@ Error DumpOutputStyle::dumpModuleSymsForObj() {
Pipeline.addCallbackToPipeline(Dumper);
CVSymbolVisitor Visitor(Pipeline);
- std::unique_ptr<llvm::Error> SymbolError;
-
- iterateModuleSubsections<DebugSymbolsSubsectionRef>(
+ return iterateModuleSubsections<DebugSymbolsSubsectionRef>(
File, PrintScope{P, 2},
[&](uint32_t Modi, const SymbolGroup &Strings,
- DebugSymbolsSubsectionRef &Symbols) {
+ DebugSymbolsSubsectionRef &Symbols) -> Error {
Dumper.setSymbolGroup(&Strings);
for (auto Symbol : Symbols) {
if (auto EC = Visitor.visitSymbolRecord(Symbol)) {
- SymbolError = std::make_unique<Error>(std::move(EC));
- return;
+ return EC;
}
}
+ return Error::success();
});
-
- if (SymbolError)
- return std::move(*SymbolError);
-
- return Error::success();
}
Error DumpOutputStyle::dumpModuleSymsForPdb() {
@@ -1568,18 +1460,18 @@ Error DumpOutputStyle::dumpModuleSymsForPdb() {
}
AutoIndent Indent(P);
- ExitOnError Err("Unexpected error processing symbols: ");
auto &Ids = File.ids();
auto &Types = File.types();
- iterateSymbolGroups(
- File, PrintScope{P, 2}, [&](uint32_t I, const SymbolGroup &Strings) {
+ return iterateSymbolGroups(
+ File, PrintScope{P, 2},
+ [&](uint32_t I, const SymbolGroup &Strings) -> Error {
auto ExpectedModS = getModuleDebugStream(File.pdb(), I);
if (!ExpectedModS) {
P.formatLine("Error loading module stream {0}. {1}", I,
toString(ExpectedModS.takeError()));
- return;
+ return Error::success();
}
ModuleDebugStreamRef &ModS = *ExpectedModS;
@@ -1593,14 +1485,25 @@ Error DumpOutputStyle::dumpModuleSymsForPdb() {
Pipeline.addCallbackToPipeline(Dumper);
CVSymbolVisitor Visitor(Pipeline);
auto SS = ModS.getSymbolsSubstream();
- if (auto EC =
- Visitor.visitSymbolStream(ModS.getSymbolArray(), SS.Offset)) {
+ if (opts::Filters.SymbolOffset) {
+ CVSymbolVisitor::FilterOptions Filter;
+ Filter.SymbolOffset = opts::Filters.SymbolOffset;
+ Filter.ParentRecursiveDepth = opts::Filters.ParentRecurseDepth;
+ Filter.ChildRecursiveDepth = opts::Filters.ChildrenRecurseDepth;
+ if (auto EC = Visitor.visitSymbolStreamFiltered(ModS.getSymbolArray(),
+ Filter)) {
+ P.formatLine("Error while processing symbol records. {0}",
+ toString(std::move(EC)));
+ return EC;
+ }
+ } else if (auto EC = Visitor.visitSymbolStream(ModS.getSymbolArray(),
+ SS.Offset)) {
P.formatLine("Error while processing symbol records. {0}",
toString(std::move(EC)));
- return;
+ return EC;
}
+ return Error::success();
});
- return Error::success();
}
Error DumpOutputStyle::dumpTypeRefStats() {
@@ -1925,7 +1828,7 @@ Error DumpOutputStyle::dumpSectionContribs() {
AutoIndent Indent(P);
ExitOnError Err("Error dumping section contributions: ");
- auto &Dbi = Err(getPdb().getPDBDbiStream());
+ DbiStream &Dbi = Err(getPdb().getPDBDbiStream());
class Visitor : public ISectionContribVisitor {
public:
@@ -1948,8 +1851,11 @@ Error DumpOutputStyle::dumpSectionContribs() {
ArrayRef<std::string> Names;
};
- std::vector<std::string> Names = getSectionNames(getPdb());
- Visitor V(P, makeArrayRef(Names));
+ auto NamesOrErr = getSectionNames(getPdb());
+ if (!NamesOrErr)
+ return NamesOrErr.takeError();
+ ArrayRef<std::string> Names = *NamesOrErr;
+ Visitor V(P, Names);
Dbi.visitSectionContributions(V);
return Error::success();
}
@@ -1970,7 +1876,7 @@ Error DumpOutputStyle::dumpSectionMap() {
AutoIndent Indent(P);
ExitOnError Err("Error dumping section map: ");
- auto &Dbi = Err(getPdb().getPDBDbiStream());
+ DbiStream &Dbi = Err(getPdb().getPDBDbiStream());
uint32_t I = 0;
for (auto &M : Dbi.getSectionMap()) {
diff --git a/llvm/tools/llvm-pdbutil/DumpOutputStyle.h b/llvm/tools/llvm-pdbutil/DumpOutputStyle.h
index 041fb93a18a5..217d25d66d8b 100644
--- a/llvm/tools/llvm-pdbutil/DumpOutputStyle.h
+++ b/llvm/tools/llvm-pdbutil/DumpOutputStyle.h
@@ -9,13 +9,13 @@
#ifndef LLVM_TOOLS_LLVMPDBDUMP_DUMPOUTPUTSTYLE_H
#define LLVM_TOOLS_LLVMPDBDUMP_DUMPOUTPUTSTYLE_H
-#include "LinePrinter.h"
#include "OutputStyle.h"
#include "StreamUtil.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/DebugInfo/PDB/Native/LinePrinter.h"
#include "llvm/DebugInfo/PDB/Native/RawConstants.h"
#include <string>
diff --git a/llvm/tools/llvm-pdbutil/ExplainOutputStyle.cpp b/llvm/tools/llvm-pdbutil/ExplainOutputStyle.cpp
index b631bdf8f2b1..13a5f6ea6fe7 100644
--- a/llvm/tools/llvm-pdbutil/ExplainOutputStyle.cpp
+++ b/llvm/tools/llvm-pdbutil/ExplainOutputStyle.cpp
@@ -8,17 +8,20 @@
#include "ExplainOutputStyle.h"
-#include "FormatUtil.h"
-#include "InputFile.h"
#include "StreamUtil.h"
#include "llvm-pdbutil.h"
#include "llvm/DebugInfo/CodeView/Formatters.h"
+#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
#include "llvm/DebugInfo/PDB/Native/DbiStream.h"
+#include "llvm/DebugInfo/PDB/Native/FormatUtil.h"
#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
+#include "llvm/DebugInfo/PDB/Native/InputFile.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
+#include "llvm/Object/COFF.h"
#include "llvm/Support/BinaryByteStream.h"
#include "llvm/Support/BinaryStreamArray.h"
#include "llvm/Support/Error.h"
@@ -29,7 +32,7 @@ using namespace llvm::msf;
using namespace llvm::pdb;
ExplainOutputStyle::ExplainOutputStyle(InputFile &File, uint64_t FileOffset)
- : File(File), FileOffset(FileOffset), P(2, false, outs()) {}
+ : File(File), FileOffset(FileOffset), P(2, false, outs(), opts::Filters) {}
Error ExplainOutputStyle::dump() {
P.formatLine("Explaining file offset {0} of file '{1}'.", FileOffset,
diff --git a/llvm/tools/llvm-pdbutil/ExplainOutputStyle.h b/llvm/tools/llvm-pdbutil/ExplainOutputStyle.h
index f405cf615e92..e3d19f25a9ea 100644
--- a/llvm/tools/llvm-pdbutil/ExplainOutputStyle.h
+++ b/llvm/tools/llvm-pdbutil/ExplainOutputStyle.h
@@ -9,9 +9,10 @@
#ifndef LLVM_TOOLS_LLVMPDBDUMP_EXPLAINOUTPUTSTYLE_H
#define LLVM_TOOLS_LLVMPDBDUMP_EXPLAINOUTPUTSTYLE_H
-#include "LinePrinter.h"
#include "OutputStyle.h"
+#include "llvm/DebugInfo/PDB/Native/LinePrinter.h"
+
#include <string>
namespace llvm {
diff --git a/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp b/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp
index e6b5d21f36e5..8e17284871a9 100644
--- a/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp
+++ b/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp
@@ -8,17 +8,19 @@
#include "MinimalSymbolDumper.h"
-#include "FormatUtil.h"
-#include "InputFile.h"
-#include "LinePrinter.h"
-
#include "llvm/DebugInfo/CodeView/CVRecord.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/Formatters.h"
#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/PDB/Native/FormatUtil.h"
+#include "llvm/DebugInfo/PDB/Native/InputFile.h"
+#include "llvm/DebugInfo/PDB/Native/LinePrinter.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
+#include "llvm/Object/COFF.h"
#include "llvm/Support/FormatVariadic.h"
using namespace llvm;
diff --git a/llvm/tools/llvm-pdbutil/MinimalTypeDumper.cpp b/llvm/tools/llvm-pdbutil/MinimalTypeDumper.cpp
index 08006e9c62d4..be7e487673fb 100644
--- a/llvm/tools/llvm-pdbutil/MinimalTypeDumper.cpp
+++ b/llvm/tools/llvm-pdbutil/MinimalTypeDumper.cpp
@@ -8,8 +8,6 @@
#include "MinimalTypeDumper.h"
-#include "FormatUtil.h"
-#include "LinePrinter.h"
#include "TypeReferenceTracker.h"
#include "llvm-pdbutil.h"
@@ -19,8 +17,13 @@
#include "llvm/DebugInfo/CodeView/Formatters.h"
#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/PDB/Native/FormatUtil.h"
+#include "llvm/DebugInfo/PDB/Native/LinePrinter.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
#include "llvm/DebugInfo/PDB/Native/TpiHashing.h"
#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
+#include "llvm/Object/COFF.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/MathExtras.h"
diff --git a/llvm/tools/llvm-pdbutil/OutputStyle.h b/llvm/tools/llvm-pdbutil/OutputStyle.h
index da93c32053f3..8cc9016d79a2 100644
--- a/llvm/tools/llvm-pdbutil/OutputStyle.h
+++ b/llvm/tools/llvm-pdbutil/OutputStyle.h
@@ -9,9 +9,10 @@
#ifndef LLVM_TOOLS_LLVMPDBDUMP_OUTPUTSTYLE_H
#define LLVM_TOOLS_LLVMPDBDUMP_OUTPUTSTYLE_H
-#include "llvm/Support/Error.h"
-
namespace llvm {
+
+class Error;
+
namespace pdb {
class OutputStyle {
diff --git a/llvm/tools/llvm-pdbutil/PrettyBuiltinDumper.cpp b/llvm/tools/llvm-pdbutil/PrettyBuiltinDumper.cpp
index cd01a4004819..895066146a9d 100644
--- a/llvm/tools/llvm-pdbutil/PrettyBuiltinDumper.cpp
+++ b/llvm/tools/llvm-pdbutil/PrettyBuiltinDumper.cpp
@@ -7,8 +7,8 @@
//===----------------------------------------------------------------------===//
#include "PrettyBuiltinDumper.h"
-#include "LinePrinter.h"
+#include "llvm/DebugInfo/PDB/Native/LinePrinter.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h"
using namespace llvm;
@@ -90,6 +90,8 @@ StringRef BuiltinDumper::getTypeName(const PDBSymbolTypeBuiltin &Symbol) {
return "char16_t";
case PDB_BuiltinType::Char32:
return "char32_t";
+ case PDB_BuiltinType::Char8:
+ return "char8_t";
case PDB_BuiltinType::None:
return "...";
}
diff --git a/llvm/tools/llvm-pdbutil/PrettyClassDefinitionDumper.cpp b/llvm/tools/llvm-pdbutil/PrettyClassDefinitionDumper.cpp
index b7eccac5988c..2285ed16d2a5 100644
--- a/llvm/tools/llvm-pdbutil/PrettyClassDefinitionDumper.cpp
+++ b/llvm/tools/llvm-pdbutil/PrettyClassDefinitionDumper.cpp
@@ -8,13 +8,14 @@
#include "PrettyClassDefinitionDumper.h"
-#include "LinePrinter.h"
#include "PrettyClassLayoutGraphicalDumper.h"
#include "llvm-pdbutil.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/DebugInfo/PDB/IPDBLineNumber.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h"
#include "llvm/DebugInfo/PDB/UDTLayout.h"
diff --git a/llvm/tools/llvm-pdbutil/PrettyClassLayoutGraphicalDumper.cpp b/llvm/tools/llvm-pdbutil/PrettyClassLayoutGraphicalDumper.cpp
index a522935e34f1..1ade7f397030 100644
--- a/llvm/tools/llvm-pdbutil/PrettyClassLayoutGraphicalDumper.cpp
+++ b/llvm/tools/llvm-pdbutil/PrettyClassLayoutGraphicalDumper.cpp
@@ -8,7 +8,6 @@
#include "PrettyClassLayoutGraphicalDumper.h"
-#include "LinePrinter.h"
#include "PrettyClassDefinitionDumper.h"
#include "PrettyEnumDumper.h"
#include "PrettyFunctionDumper.h"
@@ -17,8 +16,10 @@
#include "PrettyVariableDumper.h"
#include "llvm-pdbutil.h"
+#include "llvm/DebugInfo/PDB/IPDBLineNumber.h"
#include "llvm/DebugInfo/PDB/PDBSymbolData.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h"
#include "llvm/DebugInfo/PDB/UDTLayout.h"
#include "llvm/Support/Format.h"
diff --git a/llvm/tools/llvm-pdbutil/PrettyCompilandDumper.cpp b/llvm/tools/llvm-pdbutil/PrettyCompilandDumper.cpp
index cf769ff66472..591bd4f93702 100644
--- a/llvm/tools/llvm-pdbutil/PrettyCompilandDumper.cpp
+++ b/llvm/tools/llvm-pdbutil/PrettyCompilandDumper.cpp
@@ -8,7 +8,6 @@
#include "PrettyCompilandDumper.h"
-#include "LinePrinter.h"
#include "PrettyFunctionDumper.h"
#include "llvm-pdbutil.h"
diff --git a/llvm/tools/llvm-pdbutil/PrettyEnumDumper.cpp b/llvm/tools/llvm-pdbutil/PrettyEnumDumper.cpp
index 9ed5893f252e..64557ff09c72 100644
--- a/llvm/tools/llvm-pdbutil/PrettyEnumDumper.cpp
+++ b/llvm/tools/llvm-pdbutil/PrettyEnumDumper.cpp
@@ -8,10 +8,11 @@
#include "PrettyEnumDumper.h"
-#include "LinePrinter.h"
#include "PrettyBuiltinDumper.h"
#include "llvm-pdbutil.h"
+#include "llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h"
+#include "llvm/DebugInfo/PDB/IPDBLineNumber.h"
#include "llvm/DebugInfo/PDB/PDBSymbolData.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h"
diff --git a/llvm/tools/llvm-pdbutil/PrettyExternalSymbolDumper.cpp b/llvm/tools/llvm-pdbutil/PrettyExternalSymbolDumper.cpp
index fede031ec0c0..34436c572c8a 100644
--- a/llvm/tools/llvm-pdbutil/PrettyExternalSymbolDumper.cpp
+++ b/llvm/tools/llvm-pdbutil/PrettyExternalSymbolDumper.cpp
@@ -7,8 +7,9 @@
//===----------------------------------------------------------------------===//
#include "PrettyExternalSymbolDumper.h"
-#include "LinePrinter.h"
+#include "llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h"
+#include "llvm/DebugInfo/PDB/Native/LinePrinter.h"
#include "llvm/DebugInfo/PDB/PDBSymbolExe.h"
#include "llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h"
#include "llvm/Support/Format.h"
diff --git a/llvm/tools/llvm-pdbutil/PrettyFunctionDumper.cpp b/llvm/tools/llvm-pdbutil/PrettyFunctionDumper.cpp
index b820ca333965..83cf4d918322 100644
--- a/llvm/tools/llvm-pdbutil/PrettyFunctionDumper.cpp
+++ b/llvm/tools/llvm-pdbutil/PrettyFunctionDumper.cpp
@@ -7,16 +7,19 @@
//===----------------------------------------------------------------------===//
#include "PrettyFunctionDumper.h"
-#include "LinePrinter.h"
#include "PrettyBuiltinDumper.h"
+#include "llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h"
+#include "llvm/DebugInfo/PDB/IPDBLineNumber.h"
#include "llvm/DebugInfo/PDB/IPDBSession.h"
+#include "llvm/DebugInfo/PDB/Native/LinePrinter.h"
#include "llvm/DebugInfo/PDB/PDBExtras.h"
#include "llvm/DebugInfo/PDB/PDBSymbolData.h"
#include "llvm/DebugInfo/PDB/PDBSymbolFunc.h"
#include "llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h"
#include "llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeArray.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h"
diff --git a/llvm/tools/llvm-pdbutil/PrettyTypeDumper.cpp b/llvm/tools/llvm-pdbutil/PrettyTypeDumper.cpp
index 2f7a39803ca5..9547d4e4ed35 100644
--- a/llvm/tools/llvm-pdbutil/PrettyTypeDumper.cpp
+++ b/llvm/tools/llvm-pdbutil/PrettyTypeDumper.cpp
@@ -8,7 +8,6 @@
#include "PrettyTypeDumper.h"
-#include "LinePrinter.h"
#include "PrettyBuiltinDumper.h"
#include "PrettyClassDefinitionDumper.h"
#include "PrettyEnumDumper.h"
@@ -16,6 +15,8 @@
#include "PrettyTypedefDumper.h"
#include "llvm-pdbutil.h"
+#include "llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h"
+#include "llvm/DebugInfo/PDB/IPDBLineNumber.h"
#include "llvm/DebugInfo/PDB/IPDBSession.h"
#include "llvm/DebugInfo/PDB/PDBSymbolExe.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeArray.h"
@@ -25,6 +26,7 @@
#include "llvm/DebugInfo/PDB/PDBSymbolTypePointer.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h"
#include "llvm/DebugInfo/PDB/UDTLayout.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/FormatVariadic.h"
diff --git a/llvm/tools/llvm-pdbutil/PrettyTypedefDumper.cpp b/llvm/tools/llvm-pdbutil/PrettyTypedefDumper.cpp
index ef73a8cdf9c4..197aa07299d1 100644
--- a/llvm/tools/llvm-pdbutil/PrettyTypedefDumper.cpp
+++ b/llvm/tools/llvm-pdbutil/PrettyTypedefDumper.cpp
@@ -8,13 +8,15 @@
#include "PrettyTypedefDumper.h"
-#include "LinePrinter.h"
#include "PrettyBuiltinDumper.h"
#include "PrettyFunctionDumper.h"
#include "PrettyTypeDumper.h"
+#include "llvm/DebugInfo/PDB/IPDBLineNumber.h"
#include "llvm/DebugInfo/PDB/IPDBSession.h"
+#include "llvm/DebugInfo/PDB/Native/LinePrinter.h"
#include "llvm/DebugInfo/PDB/PDBExtras.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypePointer.h"
diff --git a/llvm/tools/llvm-pdbutil/PrettyVariableDumper.cpp b/llvm/tools/llvm-pdbutil/PrettyVariableDumper.cpp
index 6dd7cc384cc9..e9ac6984356c 100644
--- a/llvm/tools/llvm-pdbutil/PrettyVariableDumper.cpp
+++ b/llvm/tools/llvm-pdbutil/PrettyVariableDumper.cpp
@@ -8,21 +8,23 @@
#include "PrettyVariableDumper.h"
-#include "LinePrinter.h"
#include "PrettyBuiltinDumper.h"
#include "PrettyFunctionDumper.h"
#include "llvm-pdbutil.h"
+#include "llvm/DebugInfo/PDB/IPDBLineNumber.h"
#include "llvm/DebugInfo/PDB/IPDBSession.h"
#include "llvm/DebugInfo/PDB/PDBSymbolData.h"
#include "llvm/DebugInfo/PDB/PDBSymbolFunc.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeArray.h"
-#include "llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypePointer.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h"
#include "llvm/DebugInfo/PDB/PDBTypes.h"
#include "llvm/Support/Format.h"
diff --git a/llvm/tools/llvm-pdbutil/StreamUtil.cpp b/llvm/tools/llvm-pdbutil/StreamUtil.cpp
index d0d0a9fbe927..878fb77353fa 100644
--- a/llvm/tools/llvm-pdbutil/StreamUtil.cpp
+++ b/llvm/tools/llvm-pdbutil/StreamUtil.cpp
@@ -7,13 +7,13 @@
//===----------------------------------------------------------------------===//
#include "StreamUtil.h"
-#include "FormatUtil.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
#include "llvm/DebugInfo/PDB/Native/DbiModuleList.h"
#include "llvm/DebugInfo/PDB/Native/DbiStream.h"
+#include "llvm/DebugInfo/PDB/Native/FormatUtil.h"
#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
@@ -95,7 +95,7 @@ void llvm::pdb::discoverStreamPurposes(PDBFile &File,
}
Streams.resize(StreamCount);
- for (uint16_t StreamIdx = 0; StreamIdx < StreamCount; ++StreamIdx) {
+ for (uint32_t StreamIdx = 0; StreamIdx < StreamCount; ++StreamIdx) {
if (StreamIdx == OldMSFDirectory)
Streams[StreamIdx] =
stream(StreamPurpose::Other, "Old MSF Directory", StreamIdx);
diff --git a/llvm/tools/llvm-pdbutil/TypeReferenceTracker.cpp b/llvm/tools/llvm-pdbutil/TypeReferenceTracker.cpp
index f184f02e01ee..d813bc22a93c 100644
--- a/llvm/tools/llvm-pdbutil/TypeReferenceTracker.cpp
+++ b/llvm/tools/llvm-pdbutil/TypeReferenceTracker.cpp
@@ -9,10 +9,12 @@
#include "TypeReferenceTracker.h"
#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
-#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
-#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
#include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
#include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
+#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
+#include "llvm/Object/COFF.h"
using namespace llvm;
using namespace llvm::pdb;
diff --git a/llvm/tools/llvm-pdbutil/TypeReferenceTracker.h b/llvm/tools/llvm-pdbutil/TypeReferenceTracker.h
index 8861731ab6ee..c586f6523c57 100644
--- a/llvm/tools/llvm-pdbutil/TypeReferenceTracker.h
+++ b/llvm/tools/llvm-pdbutil/TypeReferenceTracker.h
@@ -9,14 +9,13 @@
#ifndef LLVM_TOOLS_LLVMPDBDUMP_TYPEREFERENCETRACKER_H
#define LLVM_TOOLS_LLVMPDBDUMP_TYPEREFERENCETRACKER_H
-#include "InputFile.h"
-
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/DebugInfo/CodeView/CVRecord.h"
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
+#include "llvm/DebugInfo/PDB/Native/InputFile.h"
#include "llvm/Support/Error.h"
namespace llvm {
diff --git a/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp b/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp
index b152ebd6dccb..3b922a7bea21 100644
--- a/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp
+++ b/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp
@@ -15,8 +15,6 @@
#include "BytesOutputStyle.h"
#include "DumpOutputStyle.h"
#include "ExplainOutputStyle.h"
-#include "InputFile.h"
-#include "LinePrinter.h"
#include "OutputStyle.h"
#include "PrettyClassDefinitionDumper.h"
#include "PrettyCompilandDumper.h"
@@ -44,14 +42,18 @@
#include "llvm/DebugInfo/CodeView/StringsAndChecksums.h"
#include "llvm/DebugInfo/CodeView/TypeStreamMerger.h"
#include "llvm/DebugInfo/MSF/MSFBuilder.h"
+#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
+#include "llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h"
#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
#include "llvm/DebugInfo/PDB/IPDBInjectedSource.h"
+#include "llvm/DebugInfo/PDB/IPDBLineNumber.h"
#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h"
#include "llvm/DebugInfo/PDB/IPDBSession.h"
#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h"
#include "llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h"
#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
#include "llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h"
+#include "llvm/DebugInfo/PDB/Native/InputFile.h"
#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
#include "llvm/DebugInfo/PDB/Native/PDBFileBuilder.h"
@@ -67,6 +69,7 @@
#include "llvm/DebugInfo/PDB/PDBSymbolFunc.h"
#include "llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h"
#include "llvm/DebugInfo/PDB/PDBSymbolThunk.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h"
@@ -195,6 +198,8 @@ static cl::opt<bool> Typedefs("typedefs", cl::desc("Dump typedefs"),
cl::sub(DiaDumpSubcommand));
} // namespace diadump
+FilterOptions Filters;
+
namespace pretty {
cl::list<std::string> InputFilenames(cl::Positional,
cl::desc("<input PDB files>"),
@@ -211,7 +216,7 @@ cl::opt<bool> ShowInjectedSourceContent(
cl::list<std::string> WithName(
"with-name",
cl::desc("Display any symbol or type with the specified exact name"),
- cl::cat(TypeCategory), cl::ZeroOrMore, cl::sub(PrettySubcommand));
+ cl::cat(TypeCategory), cl::sub(PrettySubcommand));
cl::opt<bool> Compilands("compilands", cl::desc("Display compilands"),
cl::cat(TypeCategory), cl::sub(PrettySubcommand));
@@ -224,7 +229,7 @@ cl::opt<bool> Externals("externals", cl::desc("Dump external symbols"),
cl::cat(TypeCategory), cl::sub(PrettySubcommand));
cl::list<SymLevel> SymTypes(
"sym-types", cl::desc("Type of symbols to dump (default all)"),
- cl::cat(TypeCategory), cl::sub(PrettySubcommand), cl::ZeroOrMore,
+ cl::cat(TypeCategory), cl::sub(PrettySubcommand),
cl::values(
clEnumValN(SymLevel::Thunks, "thunks", "Display thunk symbols"),
clEnumValN(SymLevel::Data, "data", "Display data symbols"),
@@ -310,28 +315,31 @@ cl::opt<cl::boolOrDefault>
ColorOutput("color-output",
cl::desc("Override use of color (default = isatty)"),
cl::cat(OtherOptions), cl::sub(PrettySubcommand));
-cl::list<std::string> ExcludeTypes(
- "exclude-types", cl::desc("Exclude types by regular expression"),
- cl::ZeroOrMore, cl::cat(FilterCategory), cl::sub(PrettySubcommand));
-cl::list<std::string> ExcludeSymbols(
- "exclude-symbols", cl::desc("Exclude symbols by regular expression"),
- cl::ZeroOrMore, cl::cat(FilterCategory), cl::sub(PrettySubcommand));
-cl::list<std::string> ExcludeCompilands(
- "exclude-compilands", cl::desc("Exclude compilands by regular expression"),
- cl::ZeroOrMore, cl::cat(FilterCategory), cl::sub(PrettySubcommand));
+cl::list<std::string>
+ ExcludeTypes("exclude-types",
+ cl::desc("Exclude types by regular expression"),
+ cl::cat(FilterCategory), cl::sub(PrettySubcommand));
+cl::list<std::string>
+ ExcludeSymbols("exclude-symbols",
+ cl::desc("Exclude symbols by regular expression"),
+ cl::cat(FilterCategory), cl::sub(PrettySubcommand));
+cl::list<std::string>
+ ExcludeCompilands("exclude-compilands",
+ cl::desc("Exclude compilands by regular expression"),
+ cl::cat(FilterCategory), cl::sub(PrettySubcommand));
cl::list<std::string> IncludeTypes(
"include-types",
cl::desc("Include only types which match a regular expression"),
- cl::ZeroOrMore, cl::cat(FilterCategory), cl::sub(PrettySubcommand));
+ cl::cat(FilterCategory), cl::sub(PrettySubcommand));
cl::list<std::string> IncludeSymbols(
"include-symbols",
cl::desc("Include only symbols which match a regular expression"),
- cl::ZeroOrMore, cl::cat(FilterCategory), cl::sub(PrettySubcommand));
+ cl::cat(FilterCategory), cl::sub(PrettySubcommand));
cl::list<std::string> IncludeCompilands(
"include-compilands",
cl::desc("Include only compilands those which match a regular expression"),
- cl::ZeroOrMore, cl::cat(FilterCategory), cl::sub(PrettySubcommand));
+ cl::cat(FilterCategory), cl::sub(PrettySubcommand));
cl::opt<uint32_t> SizeThreshold(
"min-type-size", cl::desc("Displays only those types which are greater "
"than or equal to the specified size."),
@@ -384,7 +392,7 @@ cl::opt<std::string>
cl::sub(BytesSubcommand), cl::cat(MsfBytes));
cl::list<std::string>
- DumpStreamData("stream-data", cl::CommaSeparated, cl::ZeroOrMore,
+ DumpStreamData("stream-data", cl::CommaSeparated,
cl::desc("Dump binary data from specified streams. Format "
"is SN[:Start][@Size]"),
cl::sub(BytesSubcommand), cl::cat(MsfBytes));
@@ -407,14 +415,12 @@ cl::opt<bool> TypeServerMap("type-server", cl::desc("Dump type server map"),
cl::opt<bool> ECData("ec", cl::desc("Dump edit and continue map"),
cl::sub(BytesSubcommand), cl::cat(DbiBytes));
-cl::list<uint32_t>
- TypeIndex("type",
- cl::desc("Dump the type record with the given type index"),
- cl::ZeroOrMore, cl::CommaSeparated, cl::sub(BytesSubcommand),
- cl::cat(TypeCategory));
+cl::list<uint32_t> TypeIndex(
+ "type", cl::desc("Dump the type record with the given type index"),
+ cl::CommaSeparated, cl::sub(BytesSubcommand), cl::cat(TypeCategory));
cl::list<uint32_t>
IdIndex("id", cl::desc("Dump the id record with the given type index"),
- cl::ZeroOrMore, cl::CommaSeparated, cl::sub(BytesSubcommand),
+ cl::CommaSeparated, cl::sub(BytesSubcommand),
cl::cat(TypeCategory));
cl::opt<uint32_t> ModuleIndex(
@@ -500,7 +506,7 @@ cl::opt<bool> DontResolveForwardRefs(
cl::cat(TypeOptions), cl::sub(DumpSubcommand));
cl::list<uint32_t> DumpTypeIndex(
- "type-index", cl::ZeroOrMore, cl::CommaSeparated,
+ "type-index", cl::CommaSeparated,
cl::desc("only dump types with the specified hexadecimal type index"),
cl::cat(TypeOptions), cl::sub(DumpSubcommand));
@@ -516,7 +522,7 @@ cl::opt<bool> DumpIdExtras("id-extras",
cl::desc("dump id hashes and index offsets"),
cl::cat(TypeOptions), cl::sub(DumpSubcommand));
cl::list<uint32_t> DumpIdIndex(
- "id-index", cl::ZeroOrMore, cl::CommaSeparated,
+ "id-index", cl::CommaSeparated,
cl::desc("only dump ids with the specified hexadecimal type index"),
cl::cat(TypeOptions), cl::sub(DumpSubcommand));
@@ -536,7 +542,7 @@ cl::list<std::string> DumpGlobalNames(
"global-name",
cl::desc(
"With -globals, only dump globals whose name matches the given value"),
- cl::cat(SymbolOptions), cl::sub(DumpSubcommand), cl::ZeroOrMore);
+ cl::cat(SymbolOptions), cl::sub(DumpSubcommand));
cl::opt<bool> DumpPublics("publics", cl::desc("dump Publics stream data"),
cl::cat(SymbolOptions), cl::sub(DumpSubcommand));
cl::opt<bool> DumpPublicExtras("public-extras",
@@ -557,6 +563,27 @@ cl::opt<bool>
cl::opt<bool> DumpFpo("fpo", cl::desc("dump FPO records"),
cl::cat(SymbolOptions), cl::sub(DumpSubcommand));
+cl::opt<uint32_t> DumpSymbolOffset(
+ "symbol-offset", cl::Optional,
+ cl::desc("only dump symbol record with the specified symbol offset"),
+ cl::cat(SymbolOptions), cl::sub(DumpSubcommand));
+cl::opt<bool> DumpParents("show-parents",
+ cl::desc("dump the symbols record's all parents."),
+ cl::cat(SymbolOptions), cl::sub(DumpSubcommand));
+cl::opt<uint32_t>
+ DumpParentDepth("parent-recurse-depth", cl::Optional, cl::init(-1U),
+ cl::desc("only recurse to a depth of N when displaying "
+ "parents of a symbol record."),
+ cl::cat(SymbolOptions), cl::sub(DumpSubcommand));
+cl::opt<bool> DumpChildren("show-children",
+ cl::desc("dump the symbols record's all children."),
+ cl::cat(SymbolOptions), cl::sub(DumpSubcommand));
+cl::opt<uint32_t>
+ DumpChildrenDepth("children-recurse-depth", cl::Optional, cl::init(-1U),
+ cl::desc("only recurse to a depth of N when displaying "
+ "children of a symbol record."),
+ cl::cat(SymbolOptions), cl::sub(DumpSubcommand));
+
// MODULE & FILE OPTIONS
cl::opt<bool> DumpModules("modules", cl::desc("dump compiland information"),
cl::cat(FileOptions), cl::sub(DumpSubcommand));
@@ -680,7 +707,7 @@ cl::opt<bool> DumpModuleFiles("module-files", cl::desc("dump file information"),
cl::cat(FileOptions),
cl::sub(PdbToYamlSubcommand));
cl::list<ModuleSubsection> DumpModuleSubsections(
- "subsections", cl::ZeroOrMore, cl::CommaSeparated,
+ "subsections", cl::CommaSeparated,
cl::desc("dump subsections from each module's debug stream"), ChunkValues,
cl::cat(FileOptions), cl::sub(PdbToYamlSubcommand));
cl::opt<bool> DumpModuleSyms("module-syms", cl::desc("dump module symbols"),
@@ -764,7 +791,7 @@ static void yamlToPdb(StringRef Path) {
PDBFileBuilder Builder(Allocator);
uint32_t BlockSize = 4096;
- if (YamlObj.Headers.hasValue())
+ if (YamlObj.Headers)
BlockSize = YamlObj.Headers->SuperBlock.BlockSize;
ExitOnErr(Builder.initialize(BlockSize));
// Add each of the reserved streams. We ignore stream metadata in the
@@ -779,7 +806,7 @@ static void yamlToPdb(StringRef Path) {
StringsAndChecksums Strings;
Strings.setStrings(std::make_shared<DebugStringTableSubsection>());
- if (YamlObj.StringTable.hasValue()) {
+ if (YamlObj.StringTable) {
for (auto S : *YamlObj.StringTable)
Strings.strings()->insert(S);
}
@@ -789,7 +816,7 @@ static void yamlToPdb(StringRef Path) {
pdb::yaml::PdbTpiStream DefaultTpiStream;
pdb::yaml::PdbTpiStream DefaultIpiStream;
- const auto &Info = YamlObj.PdbStream.getValueOr(DefaultInfoStream);
+ const auto &Info = YamlObj.PdbStream.value_or(DefaultInfoStream);
auto &InfoBuilder = Builder.getInfoBuilder();
InfoBuilder.setAge(Info.Age);
@@ -799,7 +826,7 @@ static void yamlToPdb(StringRef Path) {
for (auto F : Info.Features)
InfoBuilder.addFeature(F);
- const auto &Dbi = YamlObj.DbiStream.getValueOr(DefaultDbiStream);
+ const auto &Dbi = YamlObj.DbiStream.value_or(DefaultDbiStream);
auto &DbiBuilder = Builder.getDbiBuilder();
DbiBuilder.setAge(Dbi.Age);
DbiBuilder.setBuildNumber(Dbi.BuildNumber);
@@ -814,7 +841,7 @@ static void yamlToPdb(StringRef Path) {
for (auto S : MI.SourceFiles)
ExitOnErr(DbiBuilder.addModuleSourceFile(ModiBuilder, S));
- if (MI.Modi.hasValue()) {
+ if (MI.Modi) {
const auto &ModiStream = *MI.Modi;
for (auto Symbol : ModiStream.Symbols) {
ModiBuilder.addSymbol(
@@ -834,7 +861,7 @@ static void yamlToPdb(StringRef Path) {
}
auto &TpiBuilder = Builder.getTpiBuilder();
- const auto &Tpi = YamlObj.TpiStream.getValueOr(DefaultTpiStream);
+ const auto &Tpi = YamlObj.TpiStream.value_or(DefaultTpiStream);
TpiBuilder.setVersionHeader(Tpi.Version);
AppendingTypeTableBuilder TS(Allocator);
for (const auto &R : Tpi.Records) {
@@ -842,7 +869,7 @@ static void yamlToPdb(StringRef Path) {
TpiBuilder.addTypeRecord(Type.RecordData, None);
}
- const auto &Ipi = YamlObj.IpiStream.getValueOr(DefaultIpiStream);
+ const auto &Ipi = YamlObj.IpiStream.value_or(DefaultIpiStream);
auto &IpiBuilder = Builder.getIpiBuilder();
IpiBuilder.setVersionHeader(Ipi.Version);
for (const auto &R : Ipi.Records) {
@@ -1068,7 +1095,7 @@ static void dumpPretty(StringRef Path) {
const bool UseColor = opts::pretty::ColorOutput == cl::BOU_UNSET
? Stream.has_colors()
: opts::pretty::ColorOutput == cl::BOU_TRUE;
- LinePrinter Printer(2, UseColor, Stream);
+ LinePrinter Printer(2, UseColor, Stream, opts::Filters);
auto GlobalScope(Session->getGlobalScope());
if (!GlobalScope)
@@ -1506,6 +1533,44 @@ int main(int Argc, const char **Argv) {
llvm::sys::InitializeCOMRAII COM(llvm::sys::COMThreadingMode::MultiThreaded);
+ // Initialize the filters for LinePrinter.
+ auto propagate = [&](auto &Target, auto &Reference) {
+ for (std::string &Option : Reference)
+ Target.push_back(Option);
+ };
+
+ propagate(opts::Filters.ExcludeTypes, opts::pretty::ExcludeTypes);
+ propagate(opts::Filters.ExcludeTypes, opts::pretty::ExcludeTypes);
+ propagate(opts::Filters.ExcludeSymbols, opts::pretty::ExcludeSymbols);
+ propagate(opts::Filters.ExcludeCompilands, opts::pretty::ExcludeCompilands);
+ propagate(opts::Filters.IncludeTypes, opts::pretty::IncludeTypes);
+ propagate(opts::Filters.IncludeSymbols, opts::pretty::IncludeSymbols);
+ propagate(opts::Filters.IncludeCompilands, opts::pretty::IncludeCompilands);
+ opts::Filters.PaddingThreshold = opts::pretty::PaddingThreshold;
+ opts::Filters.SizeThreshold = opts::pretty::SizeThreshold;
+ opts::Filters.JustMyCode = opts::dump::JustMyCode;
+ if (opts::dump::DumpModi.getNumOccurrences() > 0) {
+ if (opts::dump::DumpModi.getNumOccurrences() != 1) {
+ errs() << "argument '-modi' specified more than once.\n";
+ errs().flush();
+ exit(1);
+ }
+ opts::Filters.DumpModi = opts::dump::DumpModi;
+ }
+ if (opts::dump::DumpSymbolOffset) {
+ if (opts::dump::DumpModi.getNumOccurrences() != 1) {
+ errs()
+ << "need to specify argument '-modi' when using '-symbol-offset'.\n";
+ errs().flush();
+ exit(1);
+ }
+ opts::Filters.SymbolOffset = opts::dump::DumpSymbolOffset;
+ if (opts::dump::DumpParents)
+ opts::Filters.ParentRecurseDepth = opts::dump::DumpParentDepth;
+ if (opts::dump::DumpChildren)
+ opts::Filters.ChildrenRecurseDepth = opts::dump::DumpChildrenDepth;
+ }
+
if (opts::PdbToYamlSubcommand) {
pdb2Yaml(opts::pdb2yaml::InputFilename.front());
} else if (opts::YamlToPdbSubcommand) {
@@ -1544,14 +1609,14 @@ int main(int Argc, const char **Argv) {
// it needs to be escaped again in the C++. So matching a single \ in the
// input requires 4 \es in the C++.
if (opts::pretty::ExcludeCompilerGenerated) {
- opts::pretty::ExcludeTypes.push_back("__vc_attributes");
- opts::pretty::ExcludeCompilands.push_back("\\* Linker \\*");
+ opts::Filters.ExcludeTypes.push_back("__vc_attributes");
+ opts::Filters.ExcludeCompilands.push_back("\\* Linker \\*");
}
if (opts::pretty::ExcludeSystemLibraries) {
- opts::pretty::ExcludeCompilands.push_back(
+ opts::Filters.ExcludeCompilands.push_back(
"f:\\\\binaries\\\\Intermediate\\\\vctools\\\\crt_bld");
- opts::pretty::ExcludeCompilands.push_back("f:\\\\dd\\\\vctools\\\\crt");
- opts::pretty::ExcludeCompilands.push_back(
+ opts::Filters.ExcludeCompilands.push_back("f:\\\\dd\\\\vctools\\\\crt");
+ opts::Filters.ExcludeCompilands.push_back(
"d:\\\\th.obj.x86fre\\\\minkernel");
}
llvm::for_each(opts::pretty::InputFilenames, dumpPretty);
diff --git a/llvm/tools/llvm-pdbutil/llvm-pdbutil.h b/llvm/tools/llvm-pdbutil/llvm-pdbutil.h
index 9fe92c2c9d75..455fe5f28191 100644
--- a/llvm/tools/llvm-pdbutil/llvm-pdbutil.h
+++ b/llvm/tools/llvm-pdbutil/llvm-pdbutil.h
@@ -12,6 +12,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/PointerUnion.h"
+#include "llvm/DebugInfo/PDB/Native/LinePrinter.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
@@ -50,6 +51,8 @@ enum class ModuleSubsection {
All
};
+extern FilterOptions Filters;
+
namespace pretty {
enum class ClassDefinitionFormat { None, Layout, All };
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 6000460d3c23..9c6586483ef0 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -19,6 +19,7 @@
#include "llvm/ProfileData/InstrProfCorrelator.h"
#include "llvm/ProfileData/InstrProfReader.h"
#include "llvm/ProfileData/InstrProfWriter.h"
+#include "llvm/ProfileData/MemProf.h"
#include "llvm/ProfileData/ProfileCommon.h"
#include "llvm/ProfileData/RawMemProfReader.h"
#include "llvm/ProfileData/SampleProfReader.h"
@@ -37,6 +38,7 @@
#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
+#include <queue>
using namespace llvm;
@@ -89,6 +91,7 @@ static void exitWithError(Error E, StringRef Whence = "") {
}
exitWithError(IPE.message(), std::string(Whence), std::string(Hint));
});
+ return;
}
exitWithError(toString(std::move(E)), std::string(Whence));
@@ -237,7 +240,7 @@ static void overlapInput(const std::string &BaseFilename,
/// Load an input into a writer context.
static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
const InstrProfCorrelator *Correlator,
- WriterContext *WC) {
+ const StringRef ProfiledBinary, WriterContext *WC) {
std::unique_lock<std::mutex> CtxGuard{WC->Lock};
// Copy the filename, because llvm::ThreadPool copied the input "const
@@ -245,6 +248,48 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
// invalid outside of this packaged task.
std::string Filename = Input.Filename;
+ using ::llvm::memprof::RawMemProfReader;
+ if (RawMemProfReader::hasFormat(Input.Filename)) {
+ auto ReaderOrErr = RawMemProfReader::create(Input.Filename, ProfiledBinary);
+ if (!ReaderOrErr) {
+ exitWithError(ReaderOrErr.takeError(), Input.Filename);
+ }
+ std::unique_ptr<RawMemProfReader> Reader = std::move(ReaderOrErr.get());
+ // Check if the profile types can be merged, e.g. clang frontend profiles
+ // should not be merged with memprof profiles.
+ if (Error E = WC->Writer.mergeProfileKind(Reader->getProfileKind())) {
+ consumeError(std::move(E));
+ WC->Errors.emplace_back(
+ make_error<StringError>(
+ "Cannot merge MemProf profile with Clang generated profile.",
+ std::error_code()),
+ Filename);
+ return;
+ }
+
+ auto MemProfError = [&](Error E) {
+ instrprof_error IPE = InstrProfError::take(std::move(E));
+ WC->Errors.emplace_back(make_error<InstrProfError>(IPE), Filename);
+ };
+
+ // Add the frame mappings into the writer context.
+ const auto &IdToFrame = Reader->getFrameMapping();
+ for (const auto &I : IdToFrame) {
+ bool Succeeded = WC->Writer.addMemProfFrame(
+ /*Id=*/I.first, /*Frame=*/I.getSecond(), MemProfError);
+ // If we weren't able to add the frame mappings then it doesn't make sense
+ // to try to add the records from this profile.
+ if (!Succeeded)
+ return;
+ }
+ const auto &FunctionProfileData = Reader->getProfileData();
+ // Add the memprof records into the writer context.
+ for (const auto &I : FunctionProfileData) {
+ WC->Writer.addMemProfRecord(/*Id=*/I.first, /*Record=*/I.second);
+ }
+ return;
+ }
+
auto ReaderOrErr = InstrProfReader::create(Input.Filename, Correlator);
if (Error E = ReaderOrErr.takeError()) {
// Skip the empty profiles by returning sliently.
@@ -330,7 +375,8 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs,
SymbolRemapper *Remapper,
StringRef OutputFilename,
ProfileFormat OutputFormat, bool OutputSparse,
- unsigned NumThreads, FailureMode FailMode) {
+ unsigned NumThreads, FailureMode FailMode,
+ const StringRef ProfiledBinary) {
if (OutputFormat != PF_Binary && OutputFormat != PF_Compact_Binary &&
OutputFormat != PF_Ext_Binary && OutputFormat != PF_Text)
exitWithError("unknown format is specified");
@@ -363,14 +409,15 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs,
if (NumThreads == 1) {
for (const auto &Input : Inputs)
- loadInput(Input, Remapper, Correlator.get(), Contexts[0].get());
+ loadInput(Input, Remapper, Correlator.get(), ProfiledBinary,
+ Contexts[0].get());
} else {
ThreadPool Pool(hardware_concurrency(NumThreads));
// Load the inputs in parallel (N/NumThreads serial steps).
unsigned Ctx = 0;
for (const auto &Input : Inputs) {
- Pool.async(loadInput, Input, Remapper, Correlator.get(),
+ Pool.async(loadInput, Input, Remapper, Correlator.get(), ProfiledBinary,
Contexts[Ctx].get());
Ctx = (Ctx + 1) % NumThreads;
}
@@ -587,7 +634,7 @@ static void supplementInstrProfile(
SmallSet<instrprof_error, 4> WriterErrorCodes;
auto WC = std::make_unique<WriterContext>(OutputSparse, ErrorLock,
WriterErrorCodes);
- loadInput(Inputs[0], nullptr, nullptr, WC.get());
+ loadInput(Inputs[0], nullptr, nullptr, /*ProfiledBinary=*/"", WC.get());
if (WC->Errors.size() > 0)
exitWithError(std::move(WC->Errors[0].first), InstrFilename);
@@ -708,7 +755,7 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
LLVMContext Context;
sampleprof::ProfileSymbolList WriterList;
Optional<bool> ProfileIsProbeBased;
- Optional<bool> ProfileIsCSFlat;
+ Optional<bool> ProfileIsCS;
for (const auto &Input : Inputs) {
auto ReaderOrErr = SampleProfileReader::create(Input.Filename, Context,
FSDiscriminatorPassOption);
@@ -730,15 +777,14 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
}
SampleProfileMap &Profiles = Reader->getProfiles();
- if (ProfileIsProbeBased.hasValue() &&
+ if (ProfileIsProbeBased &&
ProfileIsProbeBased != FunctionSamples::ProfileIsProbeBased)
exitWithError(
"cannot merge probe-based profile with non-probe-based profile");
ProfileIsProbeBased = FunctionSamples::ProfileIsProbeBased;
- if (ProfileIsCSFlat.hasValue() &&
- ProfileIsCSFlat != FunctionSamples::ProfileIsCSFlat)
+ if (ProfileIsCS && ProfileIsCS != FunctionSamples::ProfileIsCS)
exitWithError("cannot merge CS profile with non-CS profile");
- ProfileIsCSFlat = FunctionSamples::ProfileIsCSFlat;
+ ProfileIsCS = FunctionSamples::ProfileIsCS;
for (SampleProfileMap::iterator I = Profiles.begin(), E = Profiles.end();
I != E; ++I) {
sampleprof_error Result = sampleprof_error::success;
@@ -761,7 +807,7 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
WriterList.merge(*ReaderList);
}
- if (ProfileIsCSFlat && (SampleMergeColdContext || SampleTrimColdContext)) {
+ if (ProfileIsCS && (SampleMergeColdContext || SampleTrimColdContext)) {
// Use threshold calculated from profile summary unless specified.
SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
auto Summary = Builder.computeSummaryForProfiles(ProfileMap);
@@ -776,10 +822,10 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
SampleMergeColdContext, SampleColdContextFrameDepth, false);
}
- if (ProfileIsCSFlat && GenCSNestedProfile) {
+ if (ProfileIsCS && GenCSNestedProfile) {
CSProfileConverter CSConverter(ProfileMap);
CSConverter.convertProfiles();
- ProfileIsCSFlat = FunctionSamples::ProfileIsCSFlat = false;
+ ProfileIsCS = FunctionSamples::ProfileIsCS = false;
}
auto WriterOrErr =
@@ -933,7 +979,7 @@ static int merge_main(int argc, const char *argv[]) {
cl::desc(
"Trim context sample profiles whose count is below cold threshold"));
cl::opt<uint32_t> SampleColdContextFrameDepth(
- "sample-frame-depth-for-cold-context", cl::init(1), cl::ZeroOrMore,
+ "sample-frame-depth-for-cold-context", cl::init(1),
cl::desc("Keep the last K frames while merging cold profile. 1 means the "
"context-less base profile"));
cl::opt<bool> GenPartialProfile(
@@ -949,7 +995,7 @@ static int merge_main(int argc, const char *argv[]) {
"zero-counter-threshold", cl::init(0.7), cl::Hidden,
cl::desc("For the function which is cold in instr profile but hot in "
"sample profile, if the ratio of the number of zero counters "
- "divided by the the total number of counters is above the "
+ "divided by the total number of counters is above the "
"threshold, the profile of the function will be regarded as "
"being harmful for performance and will be dropped."));
cl::opt<unsigned> SupplMinSizeThreshold(
@@ -967,6 +1013,9 @@ static int merge_main(int argc, const char *argv[]) {
cl::opt<std::string> DebugInfoFilename(
"debug-info", cl::init(""),
cl::desc("Use the provided debug info to correlate the raw profile."));
+ cl::opt<std::string> ProfiledBinary(
+ "profiled-binary", cl::init(""),
+ cl::desc("Path to binary from which the profile was collected."));
cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n");
@@ -1009,7 +1058,7 @@ static int merge_main(int argc, const char *argv[]) {
if (ProfileKind == instr)
mergeInstrProfile(WeightedInputs, DebugInfoFilename, Remapper.get(),
OutputFilename, OutputFormat, OutputSparse, NumThreads,
- FailureMode);
+ FailureMode, ProfiledBinary);
else
mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename,
OutputFormat, ProfileSymbolListFile, CompressAllSections,
@@ -1040,7 +1089,7 @@ static void overlapInstrProfile(const std::string &BaseFilename,
OS << "Sum of edge counts for profile " << TestFilename << " is 0.\n";
exit(0);
}
- loadInput(WeightedInput, nullptr, nullptr, &Context);
+ loadInput(WeightedInput, nullptr, nullptr, /*ProfiledBinary=*/"", &Context);
overlapInput(BaseFilename, TestFilename, &Context, Overlap, FuncFilter, OS,
IsCS);
Overlap.dump(OS);
@@ -1936,7 +1985,7 @@ std::error_code SampleOverlapAggregator::loadProfiles() {
if (BaseReader->profileIsProbeBased() != TestReader->profileIsProbeBased())
exitWithError(
"cannot compare probe-based profile with non-probe-based profile");
- if (BaseReader->profileIsCSFlat() != TestReader->profileIsCSFlat())
+ if (BaseReader->profileIsCS() != TestReader->profileIsCS())
exitWithError("cannot compare CS profile with non-CS profile");
// Load BaseHotThreshold and TestHotThreshold as 99-percentile threshold in
@@ -2097,7 +2146,7 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts,
auto ReaderOrErr = InstrProfReader::create(Filename);
std::vector<uint32_t> Cutoffs = std::move(DetailedSummaryCutoffs);
if (ShowDetailedSummary && Cutoffs.empty()) {
- Cutoffs = {800000, 900000, 950000, 990000, 999000, 999900, 999990};
+ Cutoffs = ProfileSummaryBuilder::DefaultCutoffs;
}
InstrProfSummaryBuilder Builder(std::move(Cutoffs));
if (Error E = ReaderOrErr.takeError())
@@ -2480,14 +2529,21 @@ static int showSampleProfile(const std::string &Filename, bool ShowCounts,
return 0;
}
-static int showMemProfProfile(const std::string &Filename, raw_fd_ostream &OS) {
- auto ReaderOr = llvm::memprof::RawMemProfReader::create(Filename);
+static int showMemProfProfile(const std::string &Filename,
+ const std::string &ProfiledBinary,
+ raw_fd_ostream &OS) {
+ auto ReaderOr = llvm::memprof::RawMemProfReader::create(
+ Filename, ProfiledBinary, /*KeepNames=*/true);
if (Error E = ReaderOr.takeError())
- exitWithError(std::move(E), Filename);
+ // Since the error can be related to the profile or the binary we do not
+ // pass whence. Instead additional context is provided where necessary in
+ // the error message.
+ exitWithError(std::move(E), /*Whence*/ "");
std::unique_ptr<llvm::memprof::RawMemProfReader> Reader(
ReaderOr.get().release());
- Reader->printSummaries(OS);
+
+ Reader->printYAML(OS);
return 0;
}
@@ -2587,6 +2643,9 @@ static int show_main(int argc, const char *argv[]) {
cl::opt<bool> ShowCovered(
"covered", cl::init(false),
cl::desc("Show only the functions that have been executed."));
+ cl::opt<std::string> ProfiledBinary(
+ "profiled-binary", cl::init(""),
+ cl::desc("Path to binary from which the profile was collected."));
cl::ParseCommandLineOptions(argc, argv, "LLVM profile data summary\n");
@@ -2624,7 +2683,7 @@ static int show_main(int argc, const char *argv[]) {
ShowAllFunctions, ShowDetailedSummary,
ShowFunction, ShowProfileSymbolList,
ShowSectionInfoOnly, ShowHotFuncList, OS);
- return showMemProfProfile(Filename, OS);
+ return showMemProfProfile(Filename, ProfiledBinary, OS);
}
int main(int argc, const char *argv[]) {
diff --git a/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp b/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp
index 78be632f2153..b7cbf353c43f 100644
--- a/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp
+++ b/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp
@@ -78,10 +78,10 @@ raw_ostream &operator<<(raw_ostream &OS, const ARM::WinEH::ReturnType &RT) {
OS << "pop {pc}";
break;
case ARM::WinEH::ReturnType::RT_B:
- OS << "b target";
+ OS << "bx <reg>";
break;
case ARM::WinEH::ReturnType::RT_BW:
- OS << "b.w target";
+ OS << "b.w <target>";
break;
case ARM::WinEH::ReturnType::RT_NoEpilogue:
OS << "(no epilogue)";
@@ -174,26 +174,47 @@ const Decoder::RingEntry Decoder::Ring64[] = {
{ 0xff, 0xec, 1, &Decoder::opcode_clear_unwound_to_call },
};
-void Decoder::printRegisters(const std::pair<uint16_t, uint32_t> &RegisterMask) {
- static const char * const GPRRegisterNames[16] = {
- "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
- "r11", "ip", "sp", "lr", "pc",
- };
+static void printRange(raw_ostream &OS, ListSeparator &LS, unsigned First,
+ unsigned Last, char Letter) {
+ if (First == Last)
+ OS << LS << Letter << First;
+ else
+ OS << LS << Letter << First << "-" << Letter << Last;
+}
- const uint16_t GPRMask = std::get<0>(RegisterMask);
- const uint16_t VFPMask = std::get<1>(RegisterMask);
+static void printRange(raw_ostream &OS, uint32_t Mask, ListSeparator &LS,
+ unsigned Start, unsigned End, char Letter) {
+ int First = -1;
+ for (unsigned RI = Start; RI <= End; ++RI) {
+ if (Mask & (1 << RI)) {
+ if (First < 0)
+ First = RI;
+ } else {
+ if (First >= 0) {
+ printRange(OS, LS, First, RI - 1, Letter);
+ First = -1;
+ }
+ }
+ }
+ if (First >= 0)
+ printRange(OS, LS, First, End, Letter);
+}
+
+void Decoder::printGPRMask(uint16_t GPRMask) {
+ OS << '{';
+ ListSeparator LS;
+ printRange(OS, GPRMask, LS, 0, 12, 'r');
+ if (GPRMask & (1 << 14))
+ OS << LS << "lr";
+ if (GPRMask & (1 << 15))
+ OS << LS << "pc";
+ OS << '}';
+}
+void Decoder::printVFPMask(uint32_t VFPMask) {
OS << '{';
ListSeparator LS;
- for (unsigned RI = 0, RE = 11; RI < RE; ++RI)
- if (GPRMask & (1 << RI))
- OS << LS << GPRRegisterNames[RI];
- for (unsigned RI = 0, RE = 32; RI < RE; ++RI)
- if (VFPMask & (1 << RI))
- OS << LS << "d" << unsigned(RI);
- for (unsigned RI = 11, RE = 16; RI < RE; ++RI)
- if (GPRMask & (1 << RI))
- OS << LS << GPRRegisterNames[RI];
+ printRange(OS, VFPMask, LS, 0, 31, 'd');
OS << '}';
}
@@ -325,7 +346,7 @@ bool Decoder::opcode_10Lxxxxx(const uint8_t *OC, unsigned &Offset,
SW.startLine() << format("0x%02x 0x%02x ; %s.w ",
OC[Offset + 0], OC[Offset + 1],
Prologue ? "push" : "pop");
- printRegisters(std::make_pair(RegisterMask, 0));
+ printGPRMask(RegisterMask);
OS << '\n';
Offset += 2;
@@ -346,7 +367,7 @@ bool Decoder::opcode_1100xxxx(const uint8_t *OC, unsigned &Offset,
bool Decoder::opcode_11010Lxx(const uint8_t *OC, unsigned &Offset,
unsigned Length, bool Prologue) {
- unsigned Link = (OC[Offset] & 0x4) >> 3;
+ unsigned Link = (OC[Offset] & 0x4) >> 2;
unsigned Count = (OC[Offset] & 0x3);
uint16_t GPRMask = (Link << (Prologue ? 14 : 15))
@@ -354,7 +375,7 @@ bool Decoder::opcode_11010Lxx(const uint8_t *OC, unsigned &Offset,
SW.startLine() << format("0x%02x ; %s ", OC[Offset],
Prologue ? "push" : "pop");
- printRegisters(std::make_pair(GPRMask, 0));
+ printGPRMask(GPRMask);
OS << '\n';
++Offset;
@@ -371,7 +392,7 @@ bool Decoder::opcode_11011Lxx(const uint8_t *OC, unsigned &Offset,
SW.startLine() << format("0x%02x ; %s.w ", OC[Offset],
Prologue ? "push" : "pop");
- printRegisters(std::make_pair(GPRMask, 0));
+ printGPRMask(GPRMask);
OS << '\n';
++Offset;
@@ -385,7 +406,7 @@ bool Decoder::opcode_11100xxx(const uint8_t *OC, unsigned &Offset,
SW.startLine() << format("0x%02x ; %s ", OC[Offset],
Prologue ? "vpush" : "vpop");
- printRegisters(std::make_pair(0, VFPMask));
+ printVFPMask(VFPMask);
OS << '\n';
++Offset;
@@ -407,12 +428,12 @@ bool Decoder::opcode_111010xx(const uint8_t *OC, unsigned &Offset,
bool Decoder::opcode_1110110L(const uint8_t *OC, unsigned &Offset,
unsigned Length, bool Prologue) {
- uint8_t GPRMask = ((OC[Offset + 0] & 0x01) << (Prologue ? 14 : 15))
- | ((OC[Offset + 1] & 0xff) << 0);
+ uint16_t GPRMask = ((OC[Offset + 0] & 0x01) << (Prologue ? 14 : 15))
+ | ((OC[Offset + 1] & 0xff) << 0);
SW.startLine() << format("0x%02x 0x%02x ; %s ", OC[Offset + 0],
OC[Offset + 1], Prologue ? "push" : "pop");
- printRegisters(std::make_pair(GPRMask, 0));
+ printGPRMask(GPRMask);
OS << '\n';
Offset += 2;
@@ -437,11 +458,13 @@ bool Decoder::opcode_11101110(const uint8_t *OC, unsigned &Offset,
bool Decoder::opcode_11101111(const uint8_t *OC, unsigned &Offset,
unsigned Length, bool Prologue) {
- assert(!Prologue && "may not be used in prologue");
-
if (OC[Offset + 1] & 0xf0)
SW.startLine() << format("0x%02x 0x%02x ; reserved\n",
OC[Offset + 0], OC[Offset + 1]);
+ else if (Prologue)
+ SW.startLine()
+ << format("0x%02x 0x%02x ; str.w lr, [sp, #-%u]!\n",
+ OC[Offset + 0], OC[Offset + 1], OC[Offset + 1] << 2);
else
SW.startLine()
<< format("0x%02x 0x%02x ; ldr.w lr, [sp], #%u\n",
@@ -455,11 +478,11 @@ bool Decoder::opcode_11110101(const uint8_t *OC, unsigned &Offset,
unsigned Length, bool Prologue) {
unsigned Start = (OC[Offset + 1] & 0xf0) >> 4;
unsigned End = (OC[Offset + 1] & 0x0f) >> 0;
- uint32_t VFPMask = ((1 << (End - Start)) - 1) << Start;
+ uint32_t VFPMask = ((1 << (End + 1 - Start)) - 1) << Start;
SW.startLine() << format("0x%02x 0x%02x ; %s ", OC[Offset + 0],
OC[Offset + 1], Prologue ? "vpush" : "vpop");
- printRegisters(std::make_pair(0, VFPMask));
+ printVFPMask(VFPMask);
OS << '\n';
Offset += 2;
@@ -470,11 +493,11 @@ bool Decoder::opcode_11110110(const uint8_t *OC, unsigned &Offset,
unsigned Length, bool Prologue) {
unsigned Start = (OC[Offset + 1] & 0xf0) >> 4;
unsigned End = (OC[Offset + 1] & 0x0f) >> 0;
- uint32_t VFPMask = ((1 << (End - Start)) - 1) << 16;
+ uint32_t VFPMask = ((1 << (End + 1 - Start)) - 1) << (16 + Start);
SW.startLine() << format("0x%02x 0x%02x ; %s ", OC[Offset + 0],
OC[Offset + 1], Prologue ? "vpush" : "vpop");
- printRegisters(std::make_pair(0, VFPMask));
+ printVFPMask(VFPMask);
OS << '\n';
Offset += 2;
@@ -553,14 +576,14 @@ bool Decoder::opcode_11111100(const uint8_t *OC, unsigned &Offset,
bool Decoder::opcode_11111101(const uint8_t *OC, unsigned &Offset,
unsigned Length, bool Prologue) {
- SW.startLine() << format("0x%02x ; b\n", OC[Offset]);
+ SW.startLine() << format("0x%02x ; bx <reg>\n", OC[Offset]);
++Offset;
return true;
}
bool Decoder::opcode_11111110(const uint8_t *OC, unsigned &Offset,
unsigned Length, bool Prologue) {
- SW.startLine() << format("0x%02x ; b.w\n", OC[Offset]);
+ SW.startLine() << format("0x%02x ; b.w <target>\n", OC[Offset]);
++Offset;
return true;
}
@@ -948,7 +971,7 @@ bool Decoder::dumpXDataRecord(const COFFObjectFile &COFF,
if (XData.E()) {
ArrayRef<uint8_t> UC = XData.UnwindByteCode();
- if (isAArch64 || !XData.F()) {
+ {
ListScope PS(SW, "Prologue");
decodeOpcodes(UC, 0, /*Prologue=*/true);
}
@@ -971,8 +994,9 @@ bool Decoder::dumpXDataRecord(const COFFObjectFile &COFF,
SW.printNumber("EpilogueStartIndex",
isAArch64 ? ES.EpilogueStartIndexAArch64()
: ES.EpilogueStartIndexARM());
- if (ES.ES & ~0xffc3ffff)
- SW.printNumber("ReservedBits", (ES.ES >> 18) & 0xF);
+ unsigned ReservedMask = isAArch64 ? 0xF : 0x3;
+ if ((ES.ES >> 18) & ReservedMask)
+ SW.printNumber("ReservedBits", (ES.ES >> 18) & ReservedMask);
ListScope Opcodes(SW, "Opcodes");
decodeOpcodes(XData.UnwindByteCode(),
@@ -1110,17 +1134,75 @@ bool Decoder::dumpPackedEntry(const object::COFFObjectFile &COFF,
SW.printString("Function",
formatSymbol(FunctionName, FunctionAddress, FunctionOffset));
- if (!isAArch64)
- SW.printBoolean("Fragment",
- RF.Flag() == RuntimeFunctionFlag::RFF_PackedFragment);
+ SW.printBoolean("Fragment",
+ RF.Flag() == RuntimeFunctionFlag::RFF_PackedFragment);
SW.printNumber("FunctionLength", RF.FunctionLength());
SW.startLine() << "ReturnType: " << RF.Ret() << '\n';
SW.printBoolean("HomedParameters", RF.H());
- SW.startLine() << "SavedRegisters: ";
- printRegisters(SavedRegisterMask(RF));
- OS << '\n';
+ SW.printNumber("Reg", RF.Reg());
+ SW.printNumber("R", RF.R());
+ SW.printBoolean("LinkRegister", RF.L());
+ SW.printBoolean("Chaining", RF.C());
SW.printNumber("StackAdjustment", StackAdjustment(RF) << 2);
+ {
+ ListScope PS(SW, "Prologue");
+
+ uint16_t GPRMask, VFPMask;
+ std::tie(GPRMask, VFPMask) = SavedRegisterMask(RF, /*Prologue=*/true);
+
+ if (StackAdjustment(RF) && !PrologueFolding(RF))
+ SW.startLine() << "sub sp, sp, #" << StackAdjustment(RF) * 4 << "\n";
+ if (VFPMask) {
+ SW.startLine() << "vpush ";
+ printVFPMask(VFPMask);
+ OS << "\n";
+ }
+ if (RF.C()) {
+ // Count the number of registers pushed below R11
+ int FpOffset = 4 * countPopulation(GPRMask & ((1U << 11) - 1));
+ if (FpOffset)
+ SW.startLine() << "add.w r11, sp, #" << FpOffset << "\n";
+ else
+ SW.startLine() << "mov r11, sp\n";
+ }
+ if (GPRMask) {
+ SW.startLine() << "push ";
+ printGPRMask(GPRMask);
+ OS << "\n";
+ }
+ if (RF.H())
+ SW.startLine() << "push {r0-r3}\n";
+ }
+
+ if (RF.Ret() != ReturnType::RT_NoEpilogue) {
+ ListScope PS(SW, "Epilogue");
+
+ uint16_t GPRMask, VFPMask;
+ std::tie(GPRMask, VFPMask) = SavedRegisterMask(RF, /*Prologue=*/false);
+
+ if (StackAdjustment(RF) && !EpilogueFolding(RF))
+ SW.startLine() << "add sp, sp, #" << StackAdjustment(RF) * 4 << "\n";
+ if (VFPMask) {
+ SW.startLine() << "vpop ";
+ printVFPMask(VFPMask);
+ OS << "\n";
+ }
+ if (GPRMask) {
+ SW.startLine() << "pop ";
+ printGPRMask(GPRMask);
+ OS << "\n";
+ }
+ if (RF.H()) {
+ if (RF.L() == 0 || RF.Ret() != ReturnType::RT_POP)
+ SW.startLine() << "add sp, sp, #16\n";
+ else
+ SW.startLine() << "ldr pc, [sp], #20\n";
+ }
+ if (RF.Ret() != ReturnType::RT_POP)
+ SW.startLine() << RF.Ret() << '\n';
+ }
+
return true;
}
@@ -1189,11 +1271,11 @@ bool Decoder::dumpPackedARM64Entry(const object::COFFObjectFile &COFF,
SW.startLine() << format("sub sp, sp, #%d\n", LocSZ);
}
if (RF.H()) {
- SW.startLine() << format("stp x6, x7, [sp, #%d]\n", IntSZ + FpSZ + 48);
- SW.startLine() << format("stp x4, x5, [sp, #%d]\n", IntSZ + FpSZ + 32);
- SW.startLine() << format("stp x2, x3, [sp, #%d]\n", IntSZ + FpSZ + 16);
+ SW.startLine() << format("stp x6, x7, [sp, #%d]\n", SavSZ - 16);
+ SW.startLine() << format("stp x4, x5, [sp, #%d]\n", SavSZ - 32);
+ SW.startLine() << format("stp x2, x3, [sp, #%d]\n", SavSZ - 48);
if (RF.RegI() > 0 || RF.RegF() > 0 || RF.CR() == 1) {
- SW.startLine() << format("stp x0, x1, [sp, #%d]\n", IntSZ + FpSZ);
+ SW.startLine() << format("stp x0, x1, [sp, #%d]\n", SavSZ - 64);
} else {
// This case isn't documented; if neither RegI nor RegF nor CR=1
// have decremented the stack pointer by SavSZ, we need to do it here
diff --git a/llvm/tools/llvm-readobj/ARMWinEHPrinter.h b/llvm/tools/llvm-readobj/ARMWinEHPrinter.h
index 920d4e5f7332..ceaa866ff215 100644
--- a/llvm/tools/llvm-readobj/ARMWinEHPrinter.h
+++ b/llvm/tools/llvm-readobj/ARMWinEHPrinter.h
@@ -133,7 +133,8 @@ class Decoder {
void decodeOpcodes(ArrayRef<uint8_t> Opcodes, unsigned Offset,
bool Prologue);
- void printRegisters(const std::pair<uint16_t, uint32_t> &RegisterMask);
+ void printGPRMask(uint16_t Mask);
+ void printVFPMask(uint32_t Mask);
ErrorOr<object::SectionRef>
getSectionContaining(const object::COFFObjectFile &COFF, uint64_t Address);
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index 04a67225401f..ae2dec5d15fb 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -1204,6 +1204,7 @@ const EnumEntry<unsigned> ElfMachineType[] = {
ENUM_ENT(EM_LANAI, "EM_LANAI"),
ENUM_ENT(EM_BPF, "EM_BPF"),
ENUM_ENT(EM_VE, "NEC SX-Aurora Vector Engine"),
+ ENUM_ENT(EM_LOONGARCH, "LoongArch"),
};
const EnumEntry<unsigned> ElfSymbolBindings[] = {
@@ -1241,10 +1242,17 @@ const EnumEntry<unsigned> ElfSectionFlags[] = {
ENUM_ENT(SHF_GROUP, "G"),
ENUM_ENT(SHF_TLS, "T"),
ENUM_ENT(SHF_COMPRESSED, "C"),
- ENUM_ENT(SHF_GNU_RETAIN, "R"),
ENUM_ENT(SHF_EXCLUDE, "E"),
};
+const EnumEntry<unsigned> ElfGNUSectionFlags[] = {
+ ENUM_ENT(SHF_GNU_RETAIN, "R")
+};
+
+const EnumEntry<unsigned> ElfSolarisSectionFlags[] = {
+ ENUM_ENT(SHF_SUNW_NODISCARD, "R")
+};
+
const EnumEntry<unsigned> ElfXCoreSectionFlags[] = {
ENUM_ENT(XCORE_SHF_CP_SECTION, ""),
ENUM_ENT(XCORE_SHF_DP_SECTION, "")
@@ -1274,9 +1282,19 @@ const EnumEntry<unsigned> ElfX86_64SectionFlags[] = {
};
static std::vector<EnumEntry<unsigned>>
-getSectionFlagsForTarget(unsigned EMachine) {
+getSectionFlagsForTarget(unsigned EOSAbi, unsigned EMachine) {
std::vector<EnumEntry<unsigned>> Ret(std::begin(ElfSectionFlags),
std::end(ElfSectionFlags));
+ switch (EOSAbi) {
+ case ELFOSABI_SOLARIS:
+ Ret.insert(Ret.end(), std::begin(ElfSolarisSectionFlags),
+ std::end(ElfSolarisSectionFlags));
+ break;
+ default:
+ Ret.insert(Ret.end(), std::begin(ElfGNUSectionFlags),
+ std::end(ElfGNUSectionFlags));
+ break;
+ }
switch (EMachine) {
case EM_ARM:
Ret.insert(Ret.end(), std::begin(ElfARMSectionFlags),
@@ -1304,7 +1322,8 @@ getSectionFlagsForTarget(unsigned EMachine) {
return Ret;
}
-static std::string getGNUFlags(unsigned EMachine, uint64_t Flags) {
+static std::string getGNUFlags(unsigned EOSAbi, unsigned EMachine,
+ uint64_t Flags) {
// Here we are trying to build the flags string in the same way as GNU does.
// It is not that straightforward. Imagine we have sh_flags == 0x90000000.
// SHF_EXCLUDE ("E") has a value of 0x80000000 and SHF_MASKPROC is 0xf0000000.
@@ -1315,7 +1334,7 @@ static std::string getGNUFlags(unsigned EMachine, uint64_t Flags) {
bool HasOSFlag = false;
bool HasProcFlag = false;
std::vector<EnumEntry<unsigned>> FlagsList =
- getSectionFlagsForTarget(EMachine);
+ getSectionFlagsForTarget(EOSAbi, EMachine);
while (Flags) {
// Take the least significant bit as a flag.
uint64_t Flag = Flags & -Flags;
@@ -1371,6 +1390,8 @@ static StringRef segmentTypeToString(unsigned Arch, unsigned Type) {
LLVM_READOBJ_ENUM_CASE(ELF, PT_MIPS_ABIFLAGS);
}
break;
+ case ELF::EM_RISCV:
+ switch (Type) { LLVM_READOBJ_ENUM_CASE(ELF, PT_RISCV_ATTRIBUTES); }
}
switch (Type) {
@@ -1404,12 +1425,16 @@ static std::string getGNUPtType(unsigned Arch, unsigned Type) {
return std::string("<unknown>: ") + to_string(format_hex(Type, 1));
// E.g. "PT_ARM_EXIDX" -> "EXIDX".
- if (Seg.startswith("PT_ARM_"))
- return Seg.drop_front(7).str();
+ if (Seg.consume_front("PT_ARM_"))
+ return Seg.str();
// E.g. "PT_MIPS_REGINFO" -> "REGINFO".
- if (Seg.startswith("PT_MIPS_"))
- return Seg.drop_front(8).str();
+ if (Seg.consume_front("PT_MIPS_"))
+ return Seg.str();
+
+ // E.g. "PT_RISCV_ATTRIBUTES"
+ if (Seg.consume_front("PT_RISCV_"))
+ return Seg.str();
// E.g. "PT_LOAD" -> "LOAD".
assert(Seg.startswith("PT_"));
@@ -1508,6 +1533,7 @@ const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion3[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX909),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX90A),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX90C),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX940),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1010),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1011),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1012),
@@ -1518,6 +1544,11 @@ const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion3[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1033),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1034),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1035),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1036),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1100),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1101),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1102),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1103),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_V3),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_SRAMECC_V3)
};
@@ -1562,6 +1593,7 @@ const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion4[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX909),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX90A),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX90C),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX940),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1010),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1011),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1012),
@@ -1572,6 +1604,11 @@ const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion4[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1033),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1034),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1035),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1036),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1100),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1101),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1102),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1103),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_ANY_V4),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_OFF_V4),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_ON_V4),
@@ -2265,6 +2302,7 @@ std::string ELFDumper<ELFT>::getDynamicEntry(uint64_t Type,
case DT_MIPS_PLTGOT:
case DT_MIPS_RWPLT:
case DT_MIPS_RLD_MAP_REL:
+ case DT_MIPS_XHASH:
return FormatHexValue(Value);
case DT_MIPS_FLAGS:
return FormatFlags(Value, makeArrayRef(ElfDynamicDTMipsFlags));
@@ -3277,7 +3315,7 @@ template <class ELFT> void GNUELFDumper<ELFT>::printFileHeaders() {
OS.PadToColumn(2u);
OS << "Version:";
OS.PadToColumn(37u);
- OS << to_hexString(e.e_ident[ELF::EI_VERSION]);
+ OS << utohexstr(e.e_ident[ELF::EI_VERSION]);
if (e.e_version == ELF::EV_CURRENT)
OS << " (current)";
OS << "\n";
@@ -3290,19 +3328,19 @@ template <class ELFT> void GNUELFDumper<ELFT>::printFileHeaders() {
Str = E->AltName.str();
} else {
if (e.e_type >= ET_LOPROC)
- Str = "Processor Specific: (" + to_hexString(e.e_type, false) + ")";
+ Str = "Processor Specific: (" + utohexstr(e.e_type, /*LowerCase=*/true) + ")";
else if (e.e_type >= ET_LOOS)
- Str = "OS Specific: (" + to_hexString(e.e_type, false) + ")";
+ Str = "OS Specific: (" + utohexstr(e.e_type, /*LowerCase=*/true) + ")";
else
- Str = "<unknown>: " + to_hexString(e.e_type, false);
+ Str = "<unknown>: " + utohexstr(e.e_type, /*LowerCase=*/true);
}
printFields(OS, "Type:", Str);
Str = enumToString(e.e_machine, makeArrayRef(ElfMachineType));
printFields(OS, "Machine:", Str);
- Str = "0x" + to_hexString(e.e_version);
+ Str = "0x" + utohexstr(e.e_version);
printFields(OS, "Version:", Str);
- Str = "0x" + to_hexString(e.e_entry);
+ Str = "0x" + utohexstr(e.e_entry);
printFields(OS, "Entry point address:", Str);
Str = to_string(e.e_phoff) + " (bytes into file)";
printFields(OS, "Start of program headers:", Str);
@@ -3319,7 +3357,7 @@ template <class ELFT> void GNUELFDumper<ELFT>::printFileHeaders() {
else if (e.e_machine == EM_AVR)
ElfFlags = printFlags(e.e_flags, makeArrayRef(ElfHeaderAVRFlags),
unsigned(ELF::EF_AVR_ARCH_MASK));
- Str = "0x" + to_hexString(e.e_flags);
+ Str = "0x" + utohexstr(e.e_flags);
if (!ElfFlags.empty())
Str = Str + ", " + ElfFlags;
printFields(OS, "Flags:", Str);
@@ -3497,7 +3535,7 @@ void GNUELFDumper<ELFT>::printRelRelaReloc(const Relocation<ELFT> &R,
Addend = " + ";
}
}
- Addend += to_hexString(RelAddend, false);
+ Addend += utohexstr(RelAddend, /*LowerCase=*/true);
}
OS << Addend << "\n";
}
@@ -3529,7 +3567,7 @@ void GNUELFDumper<ELFT>::printDynamicRelocHeader(unsigned Type, StringRef Name,
const DynRegionInfo &Reg) {
uint64_t Offset = Reg.Addr - this->Obj.base();
OS << "\n'" << Name.str().c_str() << "' relocation section at offset 0x"
- << to_hexString(Offset, false) << " contains " << Reg.Size << " bytes:\n";
+ << utohexstr(Offset, /*LowerCase=*/true) << " contains " << Reg.Size << " bytes:\n";
printRelocHeaderFields<ELFT>(OS, Type);
}
@@ -3582,7 +3620,7 @@ template <class ELFT> void GNUELFDumper<ELFT>::printRelocations() {
uintX_t Offset = Sec.sh_offset;
StringRef Name = this->getPrintableSectionName(Sec);
OS << "\nRelocation section '" << Name << "' at offset 0x"
- << to_hexString(Offset, false) << " contains " << EntriesNum
+ << utohexstr(Offset, /*LowerCase=*/true) << " contains " << EntriesNum
<< " entries:\n";
printRelocHeaderFields<ELFT>(OS, Sec.sh_type);
this->printRelocationsHelper(Sec);
@@ -3597,30 +3635,30 @@ template <class ELFT> void GNUELFDumper<ELFT>::printRelocations() {
// returned as '<unknown>' followed by the type value.
static std::string getSectionTypeOffsetString(unsigned Type) {
if (Type >= SHT_LOOS && Type <= SHT_HIOS)
- return "LOOS+0x" + to_hexString(Type - SHT_LOOS);
+ return "LOOS+0x" + utohexstr(Type - SHT_LOOS);
else if (Type >= SHT_LOPROC && Type <= SHT_HIPROC)
- return "LOPROC+0x" + to_hexString(Type - SHT_LOPROC);
+ return "LOPROC+0x" + utohexstr(Type - SHT_LOPROC);
else if (Type >= SHT_LOUSER && Type <= SHT_HIUSER)
- return "LOUSER+0x" + to_hexString(Type - SHT_LOUSER);
- return "0x" + to_hexString(Type) + ": <unknown>";
+ return "LOUSER+0x" + utohexstr(Type - SHT_LOUSER);
+ return "0x" + utohexstr(Type) + ": <unknown>";
}
static std::string getSectionTypeString(unsigned Machine, unsigned Type) {
StringRef Name = getELFSectionTypeName(Machine, Type);
// Handle SHT_GNU_* type names.
- if (Name.startswith("SHT_GNU_")) {
- if (Name == "SHT_GNU_HASH")
+ if (Name.consume_front("SHT_GNU_")) {
+ if (Name == "HASH")
return "GNU_HASH";
// E.g. SHT_GNU_verneed -> VERNEED.
- return Name.drop_front(8).upper();
+ return Name.upper();
}
if (Name == "SHT_SYMTAB_SHNDX")
return "SYMTAB SECTION INDICES";
- if (Name.startswith("SHT_"))
- return Name.drop_front(4).str();
+ if (Name.consume_front("SHT_"))
+ return Name.str();
return getSectionTypeOffsetString(Type);
}
@@ -3647,7 +3685,7 @@ template <class ELFT> void GNUELFDumper<ELFT>::printSectionHeaders() {
ArrayRef<Elf_Shdr> Sections = cantFail(this->Obj.sections());
OS << "There are " << to_string(Sections.size())
<< " section headers, starting at offset "
- << "0x" << to_hexString(this->Obj.getHeader().e_shoff, false) << ":\n\n";
+ << "0x" << utohexstr(this->Obj.getHeader().e_shoff, /*LowerCase=*/true) << ":\n\n";
OS << "Section Headers:\n";
Field Fields[11] = {
{"[Nr]", 2}, {"Name", 7}, {"Type", 25},
@@ -3680,7 +3718,8 @@ template <class ELFT> void GNUELFDumper<ELFT>::printSectionHeaders() {
Fields[4].Str = to_string(format_hex_no_prefix(Sec.sh_offset, 6));
Fields[5].Str = to_string(format_hex_no_prefix(Sec.sh_size, 6));
Fields[6].Str = to_string(format_hex_no_prefix(Sec.sh_entsize, 2));
- Fields[7].Str = getGNUFlags(this->Obj.getHeader().e_machine, Sec.sh_flags);
+ Fields[7].Str = getGNUFlags(this->Obj.getHeader().e_ident[ELF::EI_OSABI],
+ this->Obj.getHeader().e_machine, Sec.sh_flags);
Fields[8].Str = to_string(Sec.sh_link);
Fields[9].Str = to_string(Sec.sh_info);
Fields[10].Str = to_string(Sec.sh_addralign);
@@ -3804,7 +3843,7 @@ void GNUELFDumper<ELFT>::printSymbol(const Elf_Sym &Symbol, unsigned SymIndex,
Other &= ~STO_AARCH64_VARIANT_PCS;
Fields[5].Str += " [VARIANT_PCS";
if (Other != 0)
- Fields[5].Str.append(" | " + to_hexString(Other, false));
+ Fields[5].Str.append(" | " + utohexstr(Other, /*LowerCase=*/true));
Fields[5].Str.append("]");
}
} else if (this->Obj.getHeader().e_machine == ELF::EM_RISCV) {
@@ -3813,7 +3852,7 @@ void GNUELFDumper<ELFT>::printSymbol(const Elf_Sym &Symbol, unsigned SymIndex,
Other &= ~STO_RISCV_VARIANT_CC;
Fields[5].Str += " [VARIANT_CC";
if (Other != 0)
- Fields[5].Str.append(" | " + to_hexString(Other, false));
+ Fields[5].Str.append(" | " + utohexstr(Other, /*LowerCase=*/true));
Fields[5].Str.append("]");
}
} else {
@@ -4025,7 +4064,7 @@ template <class ELFT> void GNUELFDumper<ELFT>::printSectionDetails() {
ArrayRef<Elf_Shdr> Sections = cantFail(this->Obj.sections());
OS << "There are " << to_string(Sections.size())
<< " section headers, starting at offset "
- << "0x" << to_hexString(this->Obj.getHeader().e_shoff, false) << ":\n\n";
+ << "0x" << utohexstr(this->Obj.getHeader().e_shoff, /*LowerCase=*/true) << ":\n\n";
OS << "Section Headers:\n";
@@ -5041,6 +5080,57 @@ static bool printGNUNote(raw_ostream &OS, uint32_t NoteType,
return true;
}
+using AndroidNoteProperties = std::vector<std::pair<StringRef, std::string>>;
+static AndroidNoteProperties getAndroidNoteProperties(uint32_t NoteType,
+ ArrayRef<uint8_t> Desc) {
+ AndroidNoteProperties Props;
+ switch (NoteType) {
+ case ELF::NT_ANDROID_TYPE_MEMTAG:
+ if (Desc.empty()) {
+ Props.emplace_back("Invalid .note.android.memtag", "");
+ return Props;
+ }
+
+ switch (Desc[0] & NT_MEMTAG_LEVEL_MASK) {
+ case NT_MEMTAG_LEVEL_NONE:
+ Props.emplace_back("Tagging Mode", "NONE");
+ break;
+ case NT_MEMTAG_LEVEL_ASYNC:
+ Props.emplace_back("Tagging Mode", "ASYNC");
+ break;
+ case NT_MEMTAG_LEVEL_SYNC:
+ Props.emplace_back("Tagging Mode", "SYNC");
+ break;
+ default:
+ Props.emplace_back(
+ "Tagging Mode",
+ ("Unknown (" + Twine::utohexstr(Desc[0] & NT_MEMTAG_LEVEL_MASK) + ")")
+ .str());
+ break;
+ }
+ Props.emplace_back("Heap",
+ (Desc[0] & NT_MEMTAG_HEAP) ? "Enabled" : "Disabled");
+ Props.emplace_back("Stack",
+ (Desc[0] & NT_MEMTAG_STACK) ? "Enabled" : "Disabled");
+ break;
+ default:
+ return Props;
+ }
+ return Props;
+}
+
+static bool printAndroidNote(raw_ostream &OS, uint32_t NoteType,
+ ArrayRef<uint8_t> Desc) {
+ // Return true if we were able to pretty-print the note, false otherwise.
+ AndroidNoteProperties Props = getAndroidNoteProperties(NoteType, Desc);
+ if (Props.empty())
+ return false;
+ for (const auto &KV : Props)
+ OS << " " << KV.first << ": " << KV.second << '\n';
+ OS << '\n';
+ return true;
+}
+
template <typename ELFT>
static bool printLLVMOMPOFFLOADNote(raw_ostream &OS, uint32_t NoteType,
ArrayRef<uint8_t> Desc) {
@@ -5400,6 +5490,13 @@ const NoteType LLVMOMPOFFLOADNoteTypes[] = {
"NT_LLVM_OPENMP_OFFLOAD_PRODUCER_VERSION (producing toolchain version)"},
};
+const NoteType AndroidNoteTypes[] = {
+ {ELF::NT_ANDROID_TYPE_IDENT, "NT_ANDROID_TYPE_IDENT"},
+ {ELF::NT_ANDROID_TYPE_KUSER, "NT_ANDROID_TYPE_KUSER"},
+ {ELF::NT_ANDROID_TYPE_MEMTAG,
+ "NT_ANDROID_TYPE_MEMTAG (Android memory tagging information)"},
+};
+
const NoteType CoreNoteTypes[] = {
{ELF::NT_PRSTATUS, "NT_PRSTATUS (prstatus structure)"},
{ELF::NT_FPREGSET, "NT_FPREGSET (floating point registers)"},
@@ -5508,6 +5605,8 @@ StringRef getNoteTypeName(const typename ELFT::Note &Note, unsigned ELFType) {
return FindNote(AMDGPUNoteTypes);
if (Name == "LLVMOMPOFFLOAD")
return FindNote(LLVMOMPOFFLOADNoteTypes);
+ if (Name == "Android")
+ return FindNote(AndroidNoteTypes);
if (ELFType == ELF::ET_CORE)
return FindNote(CoreNoteTypes);
@@ -5658,6 +5757,9 @@ template <class ELFT> void GNUELFDumper<ELFT>::printNotes() {
return NoteOrErr.takeError();
}
}
+ } else if (Name == "Android") {
+ if (printAndroidNote(OS, Type, Descriptor))
+ return Error::success();
}
if (!Descriptor.empty()) {
OS << " description data:";
@@ -5838,7 +5940,7 @@ template <class ELFT>
SmallVector<uint32_t> ELFDumper<ELFT>::getSymbolIndexesForFunctionAddress(
uint64_t SymValue, Optional<const Elf_Shdr *> FunctionSec) {
SmallVector<uint32_t> SymbolIndexes;
- if (!this->AddressToIndexMap.hasValue()) {
+ if (!this->AddressToIndexMap) {
// Populate the address to index map upon the first invocation of this
// function.
this->AddressToIndexMap.emplace();
@@ -5991,9 +6093,8 @@ void ELFDumper<ELFT>::printStackSize(const Relocation<ELFT> &R,
return;
}
- uint64_t SymValue =
- Resolver(R.Type, Offset, RelocSymValue, Data.getAddress(&Offset),
- R.Addend.getValueOr(0));
+ uint64_t SymValue = Resolver(R.Type, Offset, RelocSymValue,
+ Data.getAddress(&Offset), R.Addend.value_or(0));
this->printFunctionStackSize(SymValue, FunctionSec, StackSizeSec, Data,
&Offset);
}
@@ -6368,7 +6469,7 @@ template <class ELFT> void LLVMELFDumper<ELFT>::printFileHeaders() {
else
TypeStr = "Unknown";
}
- W.printString("Type", TypeStr + " (0x" + to_hexString(E.e_type) + ")");
+ W.printString("Type", TypeStr + " (0x" + utohexstr(E.e_type) + ")");
W.printEnum("Machine", E.e_machine, makeArrayRef(ElfMachineType));
W.printNumber("Version", E.e_version);
@@ -6501,7 +6602,8 @@ template <class ELFT> void LLVMELFDumper<ELFT>::printSectionHeaders() {
int SectionIndex = -1;
std::vector<EnumEntry<unsigned>> FlagsList =
- getSectionFlagsForTarget(this->Obj.getHeader().e_machine);
+ getSectionFlagsForTarget(this->Obj.getHeader().e_ident[ELF::EI_OSABI],
+ this->Obj.getHeader().e_machine);
for (const Elf_Shdr &Sec : cantFail(this->Obj.sections())) {
DictScope SectionD(W, "Section");
W.printNumber("Index", ++SectionIndex);
@@ -6932,8 +7034,10 @@ template <class ELFT> void LLVMELFDumper<ELFT>::printCGProfile() {
template <class ELFT> void LLVMELFDumper<ELFT>::printBBAddrMaps() {
bool IsRelocatable = this->Obj.getHeader().e_type == ELF::ET_REL;
for (const Elf_Shdr &Sec : cantFail(this->Obj.sections())) {
- if (Sec.sh_type != SHT_LLVM_BB_ADDR_MAP)
+ if (Sec.sh_type != SHT_LLVM_BB_ADDR_MAP &&
+ Sec.sh_type != SHT_LLVM_BB_ADDR_MAP_V0) {
continue;
+ }
Optional<const Elf_Shdr *> FunctionSec = None;
if (IsRelocatable)
FunctionSec =
@@ -7024,6 +7128,17 @@ static bool printGNUNoteLLVMStyle(uint32_t NoteType, ArrayRef<uint8_t> Desc,
return true;
}
+static bool printAndroidNoteLLVMStyle(uint32_t NoteType, ArrayRef<uint8_t> Desc,
+ ScopedPrinter &W) {
+ // Return true if we were able to pretty-print the note, false otherwise.
+ AndroidNoteProperties Props = getAndroidNoteProperties(NoteType, Desc);
+ if (Props.empty())
+ return false;
+ for (const auto &KV : Props)
+ W.printString(KV.first, KV.second);
+ return true;
+}
+
template <typename ELFT>
static bool printLLVMOMPOFFLOADNoteLLVMStyle(uint32_t NoteType,
ArrayRef<uint8_t> Desc,
@@ -7126,6 +7241,9 @@ template <class ELFT> void LLVMELFDumper<ELFT>::printNotes() {
return N.takeError();
}
}
+ } else if (Name == "Android") {
+ if (printAndroidNoteLLVMStyle(Type, Descriptor, W))
+ return Error::success();
}
if (!Descriptor.empty()) {
W.printBinaryBlock("Description data", Descriptor);
diff --git a/llvm/tools/llvm-readobj/MachODumper.cpp b/llvm/tools/llvm-readobj/MachODumper.cpp
index 599b0355917e..4931ab575bb2 100644
--- a/llvm/tools/llvm-readobj/MachODumper.cpp
+++ b/llvm/tools/llvm-readobj/MachODumper.cpp
@@ -13,6 +13,7 @@
#include "ObjDumper.h"
#include "StackMapPrinter.h"
#include "llvm-readobj.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Object/MachO.h"
@@ -39,6 +40,11 @@ public:
void printNeededLibraries() override;
+ bool canCompareSymbols() const override { return true; }
+ bool compareSymbolsByName(object::SymbolRef LHS,
+ object::SymbolRef RHS) const override;
+ bool compareSymbolsByType(object::SymbolRef LHS,
+ object::SymbolRef RHS) const override;
// MachO-specific.
void printMachODataInCode() override;
void printMachOVersionMin() override;
@@ -51,10 +57,14 @@ private:
template<class MachHeader>
void printFileHeaders(const MachHeader &Header);
- StringRef getSymbolName(const SymbolRef &Symbol);
+ StringRef getSymbolName(const SymbolRef &Symbol) const;
+ uint8_t getSymbolType(const SymbolRef &Symbol) const;
void printSymbols() override;
+ void printSymbols(Optional<SymbolComparator> SymComp) override;
void printDynamicSymbols() override;
+ void printDynamicSymbols(Optional<SymbolComparator> SymComp) override;
+ void printSymbol(const SymbolRef &Symbol, ScopedPrinter &W);
void printSymbol(const SymbolRef &Symbol);
void printRelocation(const RelocationRef &Reloc);
@@ -602,7 +612,7 @@ void MachODumper::printRelocation(const MachOObjectFile *Obj,
}
}
-StringRef MachODumper::getSymbolName(const SymbolRef &Symbol) {
+StringRef MachODumper::getSymbolName(const SymbolRef &Symbol) const {
Expected<StringRef> SymbolNameOrErr = Symbol.getName();
if (!SymbolNameOrErr) {
reportError(SymbolNameOrErr.takeError(), Obj->getFileName());
@@ -610,19 +620,50 @@ StringRef MachODumper::getSymbolName(const SymbolRef &Symbol) {
return *SymbolNameOrErr;
}
-void MachODumper::printSymbols() {
- ListScope Group(W, "Symbols");
+uint8_t MachODumper::getSymbolType(const SymbolRef &Symbol) const {
+ return Obj->is64Bit()
+ ? Obj->getSymbol64TableEntry(Symbol.getRawDataRefImpl()).n_type
+ : Obj->getSymbolTableEntry(Symbol.getRawDataRefImpl()).n_type;
+}
+
+bool MachODumper::compareSymbolsByName(SymbolRef LHS, SymbolRef RHS) const {
+ return getSymbolName(LHS).str().compare(getSymbolName(RHS).str()) < 0;
+}
+
+bool MachODumper::compareSymbolsByType(SymbolRef LHS, SymbolRef RHS) const {
+ return getSymbolType(LHS) < getSymbolType(RHS);
+}
+
+void MachODumper::printSymbols() { printSymbols(None); }
- for (const SymbolRef &Symbol : Obj->symbols()) {
- printSymbol(Symbol);
+void MachODumper::printSymbols(Optional<SymbolComparator> SymComp) {
+ ListScope Group(W, "Symbols");
+ if (SymComp) {
+ auto SymbolRange = Obj->symbols();
+ std::vector<SymbolRef> SortedSymbols(SymbolRange.begin(),
+ SymbolRange.end());
+ llvm::stable_sort(SortedSymbols, *SymComp);
+ for (SymbolRef Symbol : SortedSymbols)
+ printSymbol(Symbol);
+ } else {
+ for (const SymbolRef &Symbol : Obj->symbols()) {
+ printSymbol(Symbol);
+ }
}
}
void MachODumper::printDynamicSymbols() {
ListScope Group(W, "DynamicSymbols");
}
+void MachODumper::printDynamicSymbols(Optional<SymbolComparator> SymComp) {
+ ListScope Group(W, "DynamicSymbols");
+}
void MachODumper::printSymbol(const SymbolRef &Symbol) {
+ printSymbol(Symbol, W);
+}
+
+void MachODumper::printSymbol(const SymbolRef &Symbol, ScopedPrinter &W) {
StringRef SymbolName = getSymbolName(Symbol);
MachOSymbol MOSymbol;
diff --git a/llvm/tools/llvm-readobj/ObjDumper.h b/llvm/tools/llvm-readobj/ObjDumper.h
index a09a243d381e..292efd2ae350 100644
--- a/llvm/tools/llvm-readobj/ObjDumper.h
+++ b/llvm/tools/llvm-readobj/ObjDumper.h
@@ -9,9 +9,14 @@
#ifndef LLVM_TOOLS_LLVM_READOBJ_OBJDUMPER_H
#define LLVM_TOOLS_LLVM_READOBJ_OBJDUMPER_H
+#include <functional>
#include <memory>
#include <system_error>
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/CommandLine.h"
@@ -25,7 +30,7 @@ class COFFImportFile;
class ObjectFile;
class XCOFFObjectFile;
class ELFObjectFileBase;
-}
+} // namespace object
namespace codeview {
class GlobalTypeTableBuilder;
class MergingTypeTableBuilder;
@@ -33,6 +38,33 @@ class MergingTypeTableBuilder;
class ScopedPrinter;
+// Comparator to compare symbols.
+// Usage: the caller registers predicates (i.e., how to compare the symbols) by
+// calling addPredicate(). The order in which predicates are registered is also
+// their priority.
+class SymbolComparator {
+public:
+ using CompPredicate =
+ std::function<bool(object::SymbolRef, object::SymbolRef)>;
+
+ // Each Obj format has a slightly different way of retrieving a symbol's info
+ // So we defer the predicate's impl to each format.
+ void addPredicate(CompPredicate Pred) { Predicates.push_back(Pred); }
+
+ bool operator()(object::SymbolRef LHS, object::SymbolRef RHS) {
+ for (CompPredicate Pred : Predicates) {
+ if (Pred(LHS, RHS))
+ return true;
+ if (Pred(RHS, LHS))
+ return false;
+ }
+ return false;
+ }
+
+private:
+ SmallVector<CompPredicate, 2> Predicates;
+};
+
class ObjDumper {
public:
ObjDumper(ScopedPrinter &Writer, StringRef ObjName);
@@ -52,6 +84,17 @@ public:
if (PrintDynamicSymbols)
printDynamicSymbols();
}
+ virtual void printSymbols(bool PrintSymbols, bool PrintDynamicSymbols,
+ llvm::Optional<SymbolComparator> SymComp) {
+ if (SymComp) {
+ if (PrintSymbols)
+ printSymbols(SymComp);
+ if (PrintDynamicSymbols)
+ printDynamicSymbols(SymComp);
+ } else {
+ printSymbols(PrintSymbols, PrintDynamicSymbols);
+ }
+ }
virtual void printProgramHeaders(bool PrintProgramHeaders,
cl::boolOrDefault PrintSectionMapping) {
if (PrintProgramHeaders)
@@ -62,6 +105,17 @@ public:
virtual void printUnwindInfo() = 0;
+ // Symbol comparison functions.
+ virtual bool canCompareSymbols() const { return false; }
+ virtual bool compareSymbolsByName(object::SymbolRef LHS,
+ object::SymbolRef RHS) const {
+ return true;
+ }
+ virtual bool compareSymbolsByType(object::SymbolRef LHS,
+ object::SymbolRef RHS) const {
+ return true;
+ }
+
// Only implemented for ELF at this time.
virtual void printDependentLibs() {}
virtual void printDynamicRelocations() { }
@@ -133,7 +187,9 @@ protected:
private:
virtual void printSymbols() {}
+ virtual void printSymbols(llvm::Optional<SymbolComparator> Comp) {}
virtual void printDynamicSymbols() {}
+ virtual void printDynamicSymbols(llvm::Optional<SymbolComparator> Comp) {}
virtual void printProgramHeaders() {}
virtual void printSectionMapping() {}
diff --git a/llvm/tools/llvm-readobj/Opts.td b/llvm/tools/llvm-readobj/Opts.td
index d0f273fa60c7..4687fc71245f 100644
--- a/llvm/tools/llvm-readobj/Opts.td
+++ b/llvm/tools/llvm-readobj/Opts.td
@@ -37,6 +37,7 @@ def section_mapping : FF<"section-mapping", "Display the section to segment mapp
def section_mapping_EQ_false : FF<"section-mapping=false", "Don't display the section to segment mapping">, Flags<[HelpHidden]>;
def section_relocations : FF<"section-relocations", "Display relocations for each section shown. This option has no effect for GNU style output">;
def section_symbols : FF<"section-symbols", "Display symbols for each section shown. This option has no effect for GNU style output">;
+defm sort_symbols : Eq<"sort-symbols", "Specify the keys to sort the symbols before displaying symtab">;
def stack_sizes : FF<"stack-sizes", "Display contents of all stack sizes sections. This option has no effect for GNU style output">;
def stackmap : FF<"stackmap", "Display contents of stackmap section">;
defm string_dump : Eq<"string-dump", "Display the specified section(s) as a list of strings">, MetaVarName<"<name or index>">;
@@ -86,7 +87,7 @@ def coff_tls_directory : FF<"coff-tls-directory", "Display TLS directory">, Grou
// XCOFF specific options.
def grp_xcoff : OptionGroup<"kind">, HelpText<"OPTIONS (XCOFF specific)">;
-def auxiliary_header : FF<"auxiliary-header" , "display the auxiliary header">, Group<grp_xcoff>;
+def auxiliary_header : FF<"auxiliary-header" , "Display the auxiliary header">, Group<grp_xcoff>;
def help : FF<"help", "Display this help">;
def version : FF<"version", "Display the version">;
diff --git a/llvm/tools/llvm-readobj/WasmDumper.cpp b/llvm/tools/llvm-readobj/WasmDumper.cpp
index b4d726016437..cf80a2d13d2d 100644
--- a/llvm/tools/llvm-readobj/WasmDumper.cpp
+++ b/llvm/tools/llvm-readobj/WasmDumper.cpp
@@ -179,13 +179,15 @@ void WasmDumper::printSectionHeaders() {
if (!Seg.Name.empty())
W.printString("Name", Seg.Name);
W.printNumber("Size", static_cast<uint64_t>(Seg.Content.size()));
- if (Seg.Offset.Opcode == wasm::WASM_OPCODE_I32_CONST)
- W.printNumber("Offset", Seg.Offset.Value.Int32);
- else if (Seg.Offset.Opcode == wasm::WASM_OPCODE_I64_CONST)
- W.printNumber("Offset", Seg.Offset.Value.Int64);
- else if (Seg.Offset.Opcode == wasm::WASM_OPCODE_GLOBAL_GET) {
+ if (Seg.Offset.Extended)
+ llvm_unreachable("extended const exprs not supported");
+ else if (Seg.Offset.Inst.Opcode == wasm::WASM_OPCODE_I32_CONST)
+ W.printNumber("Offset", Seg.Offset.Inst.Value.Int32);
+ else if (Seg.Offset.Inst.Opcode == wasm::WASM_OPCODE_I64_CONST)
+ W.printNumber("Offset", Seg.Offset.Inst.Value.Int64);
+ else if (Seg.Offset.Inst.Opcode == wasm::WASM_OPCODE_GLOBAL_GET) {
ListScope Group(W, "Offset");
- W.printNumber("Global", Seg.Offset.Value.Global);
+ W.printNumber("Global", Seg.Offset.Inst.Value.Global);
} else
llvm_unreachable("unknown init expr opcode");
}
diff --git a/llvm/tools/llvm-readobj/XCOFFDumper.cpp b/llvm/tools/llvm-readobj/XCOFFDumper.cpp
index 6e778d558d4f..ccae66f20127 100644
--- a/llvm/tools/llvm-readobj/XCOFFDumper.cpp
+++ b/llvm/tools/llvm-readobj/XCOFFDumper.cpp
@@ -17,7 +17,6 @@
#include "llvm/Support/ScopedPrinter.h"
#include <ctime>
-#include <stddef.h>
using namespace llvm;
using namespace object;
@@ -41,6 +40,8 @@ public:
void printNeededLibraries() override;
void printStringTable() override;
+ ScopedPrinter &getScopedPrinter() const { return W; }
+
private:
template <typename T> void printSectionHeaders(ArrayRef<T> Sections);
template <typename T> void printGenericSectionHeader(T &Sec) const;
@@ -113,6 +114,8 @@ void XCOFFDumper::printFileHeaders() {
}
void XCOFFDumper::printAuxiliaryHeader() {
+ DictScope DS(W, "AuxiliaryHeader");
+
if (Obj.is64Bit())
printAuxiliaryHeader(Obj.auxiliaryHeader64());
else
@@ -736,6 +739,46 @@ void XCOFFDumper::printGenericSectionHeader(T &Sec) const {
W.printNumber("NumberOfLineNumbers", Sec.NumberOfLineNumbers);
}
+enum PrintStyle { Hex, Number };
+template <typename T, typename V>
+static void printAuxMemberHelper(PrintStyle Style, const char *MemberName,
+ const T &Member, const V *AuxHeader,
+ uint16_t AuxSize, uint16_t &PartialFieldOffset,
+ const char *&PartialFieldName,
+ ScopedPrinter &W) {
+ ptrdiff_t Offset = reinterpret_cast<const char *>(&Member) -
+ reinterpret_cast<const char *>(AuxHeader);
+ if (Offset + sizeof(Member) <= AuxSize)
+ Style == Hex ? W.printHex(MemberName, Member)
+ : W.printNumber(MemberName, Member);
+ else if (Offset < AuxSize) {
+ PartialFieldOffset = Offset;
+ PartialFieldName = MemberName;
+ }
+}
+
+template <class T>
+void checkAndPrintAuxHeaderParseError(const char *PartialFieldName,
+ uint16_t PartialFieldOffset,
+ uint16_t AuxSize, T &AuxHeader,
+ XCOFFDumper *Dumper) {
+ if (PartialFieldOffset < AuxSize) {
+ Dumper->reportUniqueWarning(Twine("only partial field for ") +
+ PartialFieldName + " at offset (" +
+ Twine(PartialFieldOffset) + ")");
+ Dumper->getScopedPrinter().printBinary(
+ "Raw data", "",
+ ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(&AuxHeader) +
+ PartialFieldOffset,
+ AuxSize - PartialFieldOffset));
+ } else if (sizeof(AuxHeader) < AuxSize)
+ Dumper->getScopedPrinter().printBinary(
+ "Extra raw data", "",
+ ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(&AuxHeader) +
+ sizeof(AuxHeader),
+ AuxSize - sizeof(AuxHeader)));
+}
+
void XCOFFDumper::printAuxiliaryHeader(
const XCOFFAuxiliaryHeader32 *AuxHeader) {
if (AuxHeader == nullptr)
@@ -744,44 +787,40 @@ void XCOFFDumper::printAuxiliaryHeader(
uint16_t PartialFieldOffset = AuxSize;
const char *PartialFieldName = nullptr;
- DictScope DS(W, "AuxiliaryHeader");
-
-#define PrintAuxMember32(H, S, T) \
- if (offsetof(XCOFFAuxiliaryHeader32, T) + \
- sizeof(XCOFFAuxiliaryHeader32::T) <= \
- AuxSize) \
- W.print##H(S, AuxHeader->T); \
- else if (offsetof(XCOFFAuxiliaryHeader32, T) < AuxSize) { \
- PartialFieldOffset = offsetof(XCOFFAuxiliaryHeader32, T); \
- PartialFieldName = S; \
- }
+ auto PrintAuxMember = [&](PrintStyle Style, const char *MemberName,
+ auto &Member) {
+ printAuxMemberHelper(Style, MemberName, Member, AuxHeader, AuxSize,
+ PartialFieldOffset, PartialFieldName, W);
+ };
- PrintAuxMember32(Hex, "Magic", AuxMagic);
- PrintAuxMember32(Hex, "Version", Version);
- PrintAuxMember32(Hex, "Size of .text section", TextSize);
- PrintAuxMember32(Hex, "Size of .data section", InitDataSize);
- PrintAuxMember32(Hex, "Size of .bss section", BssDataSize);
- PrintAuxMember32(Hex, "Entry point address", EntryPointAddr);
- PrintAuxMember32(Hex, ".text section start address", TextStartAddr);
- PrintAuxMember32(Hex, ".data section start address", DataStartAddr);
- PrintAuxMember32(Hex, "TOC anchor address", TOCAnchorAddr);
- PrintAuxMember32(Number, "Section number of entryPoint", SecNumOfEntryPoint);
- PrintAuxMember32(Number, "Section number of .text", SecNumOfText);
- PrintAuxMember32(Number, "Section number of .data", SecNumOfData);
- PrintAuxMember32(Number, "Section number of TOC", SecNumOfTOC);
- PrintAuxMember32(Number, "Section number of loader data", SecNumOfLoader);
- PrintAuxMember32(Number, "Section number of .bss", SecNumOfBSS);
- PrintAuxMember32(Hex, "Maxium alignment of .text", MaxAlignOfText);
- PrintAuxMember32(Hex, "Maxium alignment of .data", MaxAlignOfData);
- PrintAuxMember32(Hex, "Module type", ModuleType);
- PrintAuxMember32(Hex, "CPU type of objects", CpuFlag);
- PrintAuxMember32(Hex, "(Reserved)", CpuType);
- PrintAuxMember32(Hex, "Maximum stack size", MaxStackSize);
- PrintAuxMember32(Hex, "Maximum data size", MaxDataSize);
- PrintAuxMember32(Hex, "Reserved for debugger", ReservedForDebugger);
- PrintAuxMember32(Hex, "Text page size", TextPageSize);
- PrintAuxMember32(Hex, "Data page size", DataPageSize);
- PrintAuxMember32(Hex, "Stack page size", StackPageSize);
+ PrintAuxMember(Hex, "Magic", AuxHeader->AuxMagic);
+ PrintAuxMember(Hex, "Version", AuxHeader->Version);
+ PrintAuxMember(Hex, "Size of .text section", AuxHeader->TextSize);
+ PrintAuxMember(Hex, "Size of .data section", AuxHeader->InitDataSize);
+ PrintAuxMember(Hex, "Size of .bss section", AuxHeader->BssDataSize);
+ PrintAuxMember(Hex, "Entry point address", AuxHeader->EntryPointAddr);
+ PrintAuxMember(Hex, ".text section start address", AuxHeader->TextStartAddr);
+ PrintAuxMember(Hex, ".data section start address", AuxHeader->DataStartAddr);
+ PrintAuxMember(Hex, "TOC anchor address", AuxHeader->TOCAnchorAddr);
+ PrintAuxMember(Number, "Section number of entryPoint",
+ AuxHeader->SecNumOfEntryPoint);
+ PrintAuxMember(Number, "Section number of .text", AuxHeader->SecNumOfText);
+ PrintAuxMember(Number, "Section number of .data", AuxHeader->SecNumOfData);
+ PrintAuxMember(Number, "Section number of TOC", AuxHeader->SecNumOfTOC);
+ PrintAuxMember(Number, "Section number of loader data",
+ AuxHeader->SecNumOfLoader);
+ PrintAuxMember(Number, "Section number of .bss", AuxHeader->SecNumOfBSS);
+ PrintAuxMember(Hex, "Maxium alignment of .text", AuxHeader->MaxAlignOfText);
+ PrintAuxMember(Hex, "Maxium alignment of .data", AuxHeader->MaxAlignOfData);
+ PrintAuxMember(Hex, "Module type", AuxHeader->ModuleType);
+ PrintAuxMember(Hex, "CPU type of objects", AuxHeader->CpuFlag);
+ PrintAuxMember(Hex, "(Reserved)", AuxHeader->CpuType);
+ PrintAuxMember(Hex, "Maximum stack size", AuxHeader->MaxStackSize);
+ PrintAuxMember(Hex, "Maximum data size", AuxHeader->MaxDataSize);
+ PrintAuxMember(Hex, "Reserved for debugger", AuxHeader->ReservedForDebugger);
+ PrintAuxMember(Hex, "Text page size", AuxHeader->TextPageSize);
+ PrintAuxMember(Hex, "Data page size", AuxHeader->DataPageSize);
+ PrintAuxMember(Hex, "Stack page size", AuxHeader->StackPageSize);
if (offsetof(XCOFFAuxiliaryHeader32, FlagAndTDataAlignment) +
sizeof(XCOFFAuxiliaryHeader32::FlagAndTDataAlignment) <=
AuxSize) {
@@ -790,35 +829,11 @@ void XCOFFDumper::printAuxiliaryHeader(
AuxHeader->getTDataAlignment());
}
- PrintAuxMember32(Number, "Section number for .tdata", SecNumOfTData);
- PrintAuxMember32(Number, "Section number for .tbss", SecNumOfTBSS);
+ PrintAuxMember(Number, "Section number for .tdata", AuxHeader->SecNumOfTData);
+ PrintAuxMember(Number, "Section number for .tbss", AuxHeader->SecNumOfTBSS);
- // Deal with error.
- if (PartialFieldOffset < AuxSize) {
- std::string ErrInfo;
- llvm::raw_string_ostream StringOS(ErrInfo);
- StringOS << "Only partial field for " << PartialFieldName << " at offset ("
- << PartialFieldOffset << ").";
- StringOS.flush();
- reportWarning(
- make_error<GenericBinaryError>(ErrInfo, object_error::parse_failed),
- "-");
- W.printBinary(
- "Raw data", "",
- ArrayRef<uint8_t>((const uint8_t *)(AuxHeader) + PartialFieldOffset,
- AuxSize - PartialFieldOffset));
- } else if (sizeof(XCOFFAuxiliaryHeader32) < AuxSize) {
- reportWarning(make_error<GenericBinaryError>(
- "There are extra data beyond auxiliary header",
- object_error::parse_failed),
- "-");
- W.printBinary("Extra raw data", "",
- ArrayRef<uint8_t>((const uint8_t *)(AuxHeader) +
- sizeof(XCOFFAuxiliaryHeader32),
- AuxSize - sizeof(XCOFFAuxiliaryHeader32)));
- }
-
-#undef PrintAuxMember32
+ checkAndPrintAuxHeaderParseError(PartialFieldName, PartialFieldOffset,
+ AuxSize, *AuxHeader, this);
}
void XCOFFDumper::printAuxiliaryHeader(
@@ -829,38 +844,34 @@ void XCOFFDumper::printAuxiliaryHeader(
uint16_t PartialFieldOffset = AuxSize;
const char *PartialFieldName = nullptr;
- DictScope DS(W, "AuxiliaryHeader");
-
-#define PrintAuxMember64(H, S, T) \
- if (offsetof(XCOFFAuxiliaryHeader64, T) + \
- sizeof(XCOFFAuxiliaryHeader64::T) <= \
- AuxSize) \
- W.print##H(S, AuxHeader->T); \
- else if (offsetof(XCOFFAuxiliaryHeader64, T) < AuxSize) { \
- PartialFieldOffset = offsetof(XCOFFAuxiliaryHeader64, T); \
- PartialFieldName = S; \
- }
+ auto PrintAuxMember = [&](PrintStyle Style, const char *MemberName,
+ auto &Member) {
+ printAuxMemberHelper(Style, MemberName, Member, AuxHeader, AuxSize,
+ PartialFieldOffset, PartialFieldName, W);
+ };
- PrintAuxMember64(Hex, "Magic", AuxMagic);
- PrintAuxMember64(Hex, "Version", Version);
- PrintAuxMember64(Hex, "Reserved for debugger", ReservedForDebugger);
- PrintAuxMember64(Hex, ".text section start address", TextStartAddr);
- PrintAuxMember64(Hex, ".data section start address", DataStartAddr);
- PrintAuxMember64(Hex, "TOC anchor address", TOCAnchorAddr);
- PrintAuxMember64(Number, "Section number of entryPoint", SecNumOfEntryPoint);
- PrintAuxMember64(Number, "Section number of .text", SecNumOfText);
- PrintAuxMember64(Number, "Section number of .data", SecNumOfData);
- PrintAuxMember64(Number, "Section number of TOC", SecNumOfTOC);
- PrintAuxMember64(Number, "Section number of loader data", SecNumOfLoader);
- PrintAuxMember64(Number, "Section number of .bss", SecNumOfBSS);
- PrintAuxMember64(Hex, "Maxium alignment of .text", MaxAlignOfText);
- PrintAuxMember64(Hex, "Maxium alignment of .data", MaxAlignOfData);
- PrintAuxMember64(Hex, "Module type", ModuleType);
- PrintAuxMember64(Hex, "CPU type of objects", CpuFlag);
- PrintAuxMember64(Hex, "(Reserved)", CpuType);
- PrintAuxMember64(Hex, "Text page size", TextPageSize);
- PrintAuxMember64(Hex, "Data page size", DataPageSize);
- PrintAuxMember64(Hex, "Stack page size", StackPageSize);
+ PrintAuxMember(Hex, "Magic", AuxHeader->AuxMagic);
+ PrintAuxMember(Hex, "Version", AuxHeader->Version);
+ PrintAuxMember(Hex, "Reserved for debugger", AuxHeader->ReservedForDebugger);
+ PrintAuxMember(Hex, ".text section start address", AuxHeader->TextStartAddr);
+ PrintAuxMember(Hex, ".data section start address", AuxHeader->DataStartAddr);
+ PrintAuxMember(Hex, "TOC anchor address", AuxHeader->TOCAnchorAddr);
+ PrintAuxMember(Number, "Section number of entryPoint",
+ AuxHeader->SecNumOfEntryPoint);
+ PrintAuxMember(Number, "Section number of .text", AuxHeader->SecNumOfText);
+ PrintAuxMember(Number, "Section number of .data", AuxHeader->SecNumOfData);
+ PrintAuxMember(Number, "Section number of TOC", AuxHeader->SecNumOfTOC);
+ PrintAuxMember(Number, "Section number of loader data",
+ AuxHeader->SecNumOfLoader);
+ PrintAuxMember(Number, "Section number of .bss", AuxHeader->SecNumOfBSS);
+ PrintAuxMember(Hex, "Maxium alignment of .text", AuxHeader->MaxAlignOfText);
+ PrintAuxMember(Hex, "Maxium alignment of .data", AuxHeader->MaxAlignOfData);
+ PrintAuxMember(Hex, "Module type", AuxHeader->ModuleType);
+ PrintAuxMember(Hex, "CPU type of objects", AuxHeader->CpuFlag);
+ PrintAuxMember(Hex, "(Reserved)", AuxHeader->CpuType);
+ PrintAuxMember(Hex, "Text page size", AuxHeader->TextPageSize);
+ PrintAuxMember(Hex, "Data page size", AuxHeader->DataPageSize);
+ PrintAuxMember(Hex, "Stack page size", AuxHeader->StackPageSize);
if (offsetof(XCOFFAuxiliaryHeader64, FlagAndTDataAlignment) +
sizeof(XCOFFAuxiliaryHeader64::FlagAndTDataAlignment) <=
AuxSize) {
@@ -868,42 +879,18 @@ void XCOFFDumper::printAuxiliaryHeader(
W.printHex("Alignment of thread-local storage",
AuxHeader->getTDataAlignment());
}
- PrintAuxMember64(Hex, "Size of .text section", TextSize);
- PrintAuxMember64(Hex, "Size of .data section", InitDataSize);
- PrintAuxMember64(Hex, "Size of .bss section", BssDataSize);
- PrintAuxMember64(Hex, "Entry point address", EntryPointAddr);
- PrintAuxMember64(Hex, "Maximum stack size", MaxStackSize);
- PrintAuxMember64(Hex, "Maximum data size", MaxDataSize);
- PrintAuxMember64(Number, "Section number for .tdata", SecNumOfTData);
- PrintAuxMember64(Number, "Section number for .tbss", SecNumOfTBSS);
- PrintAuxMember64(Hex, "Additional flags 64-bit XCOFF", XCOFF64Flag);
-
- if (PartialFieldOffset < AuxSize) {
- std::string ErrInfo;
- llvm::raw_string_ostream StringOS(ErrInfo);
- StringOS << "Only partial field for " << PartialFieldName << " at offset ("
- << PartialFieldOffset << ").";
- StringOS.flush();
- reportWarning(
- make_error<GenericBinaryError>(ErrInfo, object_error::parse_failed),
- "-");
- ;
- W.printBinary(
- "Raw data", "",
- ArrayRef<uint8_t>((const uint8_t *)(AuxHeader) + PartialFieldOffset,
- AuxSize - PartialFieldOffset));
- } else if (sizeof(XCOFFAuxiliaryHeader64) < AuxSize) {
- reportWarning(make_error<GenericBinaryError>(
- "There are extra data beyond auxiliary header",
- object_error::parse_failed),
- "-");
- W.printBinary("Extra raw data", "",
- ArrayRef<uint8_t>((const uint8_t *)(AuxHeader) +
- sizeof(XCOFFAuxiliaryHeader64),
- AuxSize - sizeof(XCOFFAuxiliaryHeader64)));
- }
-
-#undef PrintAuxMember64
+ PrintAuxMember(Hex, "Size of .text section", AuxHeader->TextSize);
+ PrintAuxMember(Hex, "Size of .data section", AuxHeader->InitDataSize);
+ PrintAuxMember(Hex, "Size of .bss section", AuxHeader->BssDataSize);
+ PrintAuxMember(Hex, "Entry point address", AuxHeader->EntryPointAddr);
+ PrintAuxMember(Hex, "Maximum stack size", AuxHeader->MaxStackSize);
+ PrintAuxMember(Hex, "Maximum data size", AuxHeader->MaxDataSize);
+ PrintAuxMember(Number, "Section number for .tdata", AuxHeader->SecNumOfTData);
+ PrintAuxMember(Number, "Section number for .tbss", AuxHeader->SecNumOfTBSS);
+ PrintAuxMember(Hex, "Additional flags 64-bit XCOFF", AuxHeader->XCOFF64Flag);
+
+ checkAndPrintAuxHeaderParseError(PartialFieldName, PartialFieldOffset,
+ AuxSize, *AuxHeader, this);
}
template <typename T>
diff --git a/llvm/tools/llvm-readobj/llvm-readobj.cpp b/llvm/tools/llvm-readobj/llvm-readobj.cpp
index 543b0de82cdf..e1ebbeb41f28 100644
--- a/llvm/tools/llvm-readobj/llvm-readobj.cpp
+++ b/llvm/tools/llvm-readobj/llvm-readobj.cpp
@@ -21,6 +21,7 @@
#include "llvm-readobj.h"
#include "ObjDumper.h"
#include "WindowsResourceDumper.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h"
#include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h"
#include "llvm/MC/TargetRegistry.h"
@@ -83,6 +84,14 @@ public:
};
enum OutputFormatTy { bsd, sysv, posix, darwin, just_symbols };
+
+enum SortSymbolKeyTy {
+ NAME = 0,
+ TYPE = 1,
+ UNKNOWN = 100,
+ // TODO: add ADDRESS, SIZE as needed.
+};
+
} // namespace
namespace opts {
@@ -113,6 +122,7 @@ static bool StringTable;
static bool Symbols;
static bool UnwindInfo;
static cl::boolOrDefault SectionMapping;
+static SmallVector<SortSymbolKeyTy> SortKeys;
// ELF specific options.
static bool DynamicTable;
@@ -253,6 +263,19 @@ static void parseOptions(const opt::InputArgList &Args) {
opts::ProgramHeaders = Args.hasArg(OPT_program_headers);
opts::RawRelr = Args.hasArg(OPT_raw_relr);
opts::SectionGroups = Args.hasArg(OPT_section_groups);
+ if (Arg *A = Args.getLastArg(OPT_sort_symbols_EQ)) {
+ std::string SortKeysString = A->getValue();
+ for (StringRef KeyStr : llvm::split(A->getValue(), ",")) {
+ SortSymbolKeyTy KeyType = StringSwitch<SortSymbolKeyTy>(KeyStr)
+ .Case("name", SortSymbolKeyTy::NAME)
+ .Case("type", SortSymbolKeyTy::TYPE)
+ .Default(SortSymbolKeyTy::UNKNOWN);
+ if (KeyType == SortSymbolKeyTy::UNKNOWN)
+ error("--sort-symbols value should be 'name' or 'type', but was '" +
+ Twine(KeyStr) + "'");
+ opts::SortKeys.push_back(KeyType);
+ }
+ }
opts::VersionInfo = Args.hasArg(OPT_version_info);
// Mach-O specific options.
@@ -334,11 +357,39 @@ static void dumpObject(ObjectFile &Obj, ScopedPrinter &Writer,
toString(std::move(ContentErr));
ObjDumper *Dumper;
+ Optional<SymbolComparator> SymComp;
Expected<std::unique_ptr<ObjDumper>> DumperOrErr = createDumper(Obj, Writer);
if (!DumperOrErr)
reportError(DumperOrErr.takeError(), FileStr);
Dumper = (*DumperOrErr).get();
+ if (!opts::SortKeys.empty()) {
+ if (Dumper->canCompareSymbols()) {
+ SymComp = SymbolComparator();
+ for (SortSymbolKeyTy Key : opts::SortKeys) {
+ switch (Key) {
+ case NAME:
+ SymComp->addPredicate([Dumper](SymbolRef LHS, SymbolRef RHS) {
+ return Dumper->compareSymbolsByName(LHS, RHS);
+ });
+ break;
+ case TYPE:
+ SymComp->addPredicate([Dumper](SymbolRef LHS, SymbolRef RHS) {
+ return Dumper->compareSymbolsByType(LHS, RHS);
+ });
+ break;
+ case UNKNOWN:
+ llvm_unreachable("Unsupported sort key");
+ }
+ }
+
+ } else {
+ reportWarning(createStringError(
+ errc::invalid_argument,
+ "--sort-symbols is not supported yet for this format"),
+ FileStr);
+ }
+ }
Dumper->printFileSummary(FileStr, Obj, opts::InputFilenames, A);
if (opts::FileHeaders)
@@ -374,7 +425,7 @@ static void dumpObject(ObjectFile &Obj, ScopedPrinter &Writer,
if (opts::UnwindInfo)
Dumper->printUnwindInfo();
if (opts::Symbols || opts::DynamicSymbols)
- Dumper->printSymbols(opts::Symbols, opts::DynamicSymbols);
+ Dumper->printSymbols(opts::Symbols, opts::DynamicSymbols, SymComp);
if (!opts::StringDump.empty())
Dumper->printSectionsAsString(Obj, opts::StringDump);
if (!opts::HexDump.empty())
diff --git a/llvm/tools/llvm-readobj/llvm-readobj.h b/llvm/tools/llvm-readobj/llvm-readobj.h
index 0ea695d1673d..989cd0aba6c0 100644
--- a/llvm/tools/llvm-readobj/llvm-readobj.h
+++ b/llvm/tools/llvm-readobj/llvm-readobj.h
@@ -9,10 +9,13 @@
#ifndef LLVM_TOOLS_LLVM_READOBJ_LLVM_READOBJ_H
#define LLVM_TOOLS_LLVM_READOBJ_LLVM_READOBJ_H
+#include "ObjDumper.h"
+
+#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
-#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorOr.h"
#include <string>
namespace llvm {
diff --git a/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp b/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp
index 21339a3f8f3d..df82fb04e8e6 100644
--- a/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp
+++ b/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp
@@ -46,7 +46,7 @@ using namespace llvm::object;
static cl::OptionCategory RTDyldCategory("RTDyld Options");
-static cl::list<std::string> InputFileList(cl::Positional, cl::ZeroOrMore,
+static cl::list<std::string> InputFileList(cl::Positional,
cl::desc("<input files>"),
cl::cat(RTDyldCategory));
@@ -79,11 +79,11 @@ static cl::opt<std::string>
cl::init("_main"), cl::cat(RTDyldCategory));
static cl::list<std::string> Dylibs("dylib", cl::desc("Add library."),
- cl::ZeroOrMore, cl::cat(RTDyldCategory));
+ cl::cat(RTDyldCategory));
static cl::list<std::string> InputArgv("args", cl::Positional,
cl::desc("<program arguments>..."),
- cl::ZeroOrMore, cl::PositionalEatsArgs,
+ cl::PositionalEatsArgs,
cl::cat(RTDyldCategory));
static cl::opt<std::string>
@@ -98,7 +98,7 @@ static cl::opt<std::string>
static cl::list<std::string>
CheckFiles("check",
cl::desc("File containing RuntimeDyld verifier checks."),
- cl::ZeroOrMore, cl::cat(RTDyldCategory));
+ cl::cat(RTDyldCategory));
static cl::opt<uint64_t>
PreallocMemory("preallocate",
@@ -127,14 +127,13 @@ static cl::list<std::string>
SpecificSectionMappings("map-section",
cl::desc("For -verify only: Map a section to a "
"specific address."),
- cl::ZeroOrMore, cl::Hidden,
- cl::cat(RTDyldCategory));
+ cl::Hidden, cl::cat(RTDyldCategory));
static cl::list<std::string> DummySymbolMappings(
"dummy-extern",
cl::desc("For -verify only: Inject a symbol into the extern "
"symbol table."),
- cl::ZeroOrMore, cl::Hidden, cl::cat(RTDyldCategory));
+ cl::Hidden, cl::cat(RTDyldCategory));
static cl::opt<bool> PrintAllocationRequests(
"print-alloc-requests",
@@ -286,7 +285,7 @@ private:
uintptr_t SlabSize = 0;
uintptr_t CurrentSlabOffset = 0;
SectionIDMap *SecIDMap = nullptr;
-#if defined(__x86_64__) && defined(__ELF__)
+#if defined(__x86_64__) && defined(__ELF__) && defined(__linux__)
unsigned UsedTLSStorage = 0;
#endif
};
@@ -350,7 +349,7 @@ uint8_t *TrivialMemoryManager::allocateDataSection(uintptr_t Size,
// In case the execution needs TLS storage, we define a very small TLS memory
// area here that will be used in allocateTLSSection().
-#if defined(__x86_64__) && defined(__ELF__)
+#if defined(__x86_64__) && defined(__ELF__) && defined(__linux__)
extern "C" {
alignas(16) __attribute__((visibility("hidden"), tls_model("initial-exec"),
used)) thread_local char LLVMRTDyldTLSSpace[16];
@@ -361,7 +360,7 @@ TrivialMemoryManager::TLSSection
TrivialMemoryManager::allocateTLSSection(uintptr_t Size, unsigned Alignment,
unsigned SectionID,
StringRef SectionName) {
-#if defined(__x86_64__) && defined(__ELF__)
+#if defined(__x86_64__) && defined(__ELF__) && defined(__linux__)
if (Size + UsedTLSStorage > sizeof(LLVMRTDyldTLSSpace)) {
return {};
}
diff --git a/llvm/tools/llvm-sim/llvm-sim.cpp b/llvm/tools/llvm-sim/llvm-sim.cpp
index 26e370ff30f1..6879d73c4434 100644
--- a/llvm/tools/llvm-sim/llvm-sim.cpp
+++ b/llvm/tools/llvm-sim/llvm-sim.cpp
@@ -85,10 +85,9 @@ exportToFile(const StringRef FilePath,
Optional<unsigned> End =
getPositionInModule((*C.back()).Inst, LLVMInstNum);
- assert(Start.hasValue() &&
+ assert(Start &&
"Could not find instruction number for first instruction");
- assert(End.hasValue() &&
- "Could not find instruction number for last instruction");
+ assert(End && "Could not find instruction number for last instruction");
J.object([&] {
J.attribute("start", Start.getValue());
diff --git a/llvm/tools/llvm-stress/llvm-stress.cpp b/llvm/tools/llvm-stress/llvm-stress.cpp
index 9135d60fdf92..e15d1d6048c7 100644
--- a/llvm/tools/llvm-stress/llvm-stress.cpp
+++ b/llvm/tools/llvm-stress/llvm-stress.cpp
@@ -69,41 +69,10 @@ static cl::opt<std::string> OutputFilename("o",
cl::value_desc("filename"),
cl::cat(StressCategory));
-static LLVMContext Context;
-
-namespace cl {
-
-template <> class parser<Type*> final : public basic_parser<Type*> {
-public:
- parser(Option &O) : basic_parser(O) {}
-
- // Parse options as IR types. Return true on error.
- bool parse(Option &O, StringRef, StringRef Arg, Type *&Value) {
- if (Arg == "half") Value = Type::getHalfTy(Context);
- else if (Arg == "fp128") Value = Type::getFP128Ty(Context);
- else if (Arg == "x86_fp80") Value = Type::getX86_FP80Ty(Context);
- else if (Arg == "ppc_fp128") Value = Type::getPPC_FP128Ty(Context);
- else if (Arg == "x86_mmx") Value = Type::getX86_MMXTy(Context);
- else if (Arg.startswith("i")) {
- unsigned N = 0;
- Arg.drop_front().getAsInteger(10, N);
- if (N > 0)
- Value = Type::getIntNTy(Context, N);
- }
-
- if (!Value)
- return O.error("Invalid IR scalar type: '" + Arg + "'!");
- return false;
- }
-
- StringRef getValueName() const override { return "IR scalar type"; }
-};
-
-} // end namespace cl
-
-static cl::list<Type*> AdditionalScalarTypes("types", cl::CommaSeparated,
- cl::desc("Additional IR scalar types "
- "(always includes i1, i8, i16, i32, i64, float and double)"));
+static cl::list<StringRef> AdditionalScalarTypes(
+ "types", cl::CommaSeparated,
+ cl::desc("Additional IR scalar types "
+ "(always includes i1, i8, i16, i32, i64, float and double)"));
namespace {
@@ -185,7 +154,38 @@ struct Modifier {
public:
/// C'tor
Modifier(BasicBlock *Block, PieceTable *PT, Random *R)
- : BB(Block), PT(PT), Ran(R), Context(BB->getContext()) {}
+ : BB(Block), PT(PT), Ran(R), Context(BB->getContext()) {
+ ScalarTypes.assign({Type::getInt1Ty(Context), Type::getInt8Ty(Context),
+ Type::getInt16Ty(Context), Type::getInt32Ty(Context),
+ Type::getInt64Ty(Context), Type::getFloatTy(Context),
+ Type::getDoubleTy(Context)});
+
+ for (auto &Arg : AdditionalScalarTypes) {
+ Type *Ty = nullptr;
+ if (Arg == "half")
+ Ty = Type::getHalfTy(Context);
+ else if (Arg == "fp128")
+ Ty = Type::getFP128Ty(Context);
+ else if (Arg == "x86_fp80")
+ Ty = Type::getX86_FP80Ty(Context);
+ else if (Arg == "ppc_fp128")
+ Ty = Type::getPPC_FP128Ty(Context);
+ else if (Arg == "x86_mmx")
+ Ty = Type::getX86_MMXTy(Context);
+ else if (Arg.startswith("i")) {
+ unsigned N = 0;
+ Arg.drop_front().getAsInteger(10, N);
+ if (N > 0)
+ Ty = Type::getIntNTy(Context, N);
+ }
+ if (!Ty) {
+ errs() << "Invalid IR scalar type: '" << Arg << "'!\n";
+ exit(1);
+ }
+
+ ScalarTypes.push_back(Ty);
+ }
+ }
/// virtual D'tor to silence warnings.
virtual ~Modifier() = default;
@@ -310,20 +310,6 @@ protected:
/// Pick a random scalar type.
Type *pickScalarType() {
- static std::vector<Type*> ScalarTypes;
- if (ScalarTypes.empty()) {
- ScalarTypes.assign({
- Type::getInt1Ty(Context),
- Type::getInt8Ty(Context),
- Type::getInt16Ty(Context),
- Type::getInt32Ty(Context),
- Type::getInt64Ty(Context),
- Type::getFloatTy(Context),
- Type::getDoubleTy(Context)
- });
- llvm::append_range(ScalarTypes, AdditionalScalarTypes);
- }
-
return ScalarTypes[getRandom() % ScalarTypes.size()];
}
@@ -338,6 +324,8 @@ protected:
/// Context
LLVMContext &Context;
+
+ std::vector<Type *> ScalarTypes;
};
struct LoadModifier: public Modifier {
@@ -347,8 +335,10 @@ struct LoadModifier: public Modifier {
void Act() override {
// Try to use predefined pointers. If non-exist, use undef pointer value;
Value *Ptr = getRandomPointerValue();
- Value *V = new LoadInst(Ptr->getType()->getPointerElementType(), Ptr, "L",
- BB->getTerminator());
+ Type *Ty = Ptr->getType()->isOpaquePointerTy()
+ ? pickType()
+ : Ptr->getType()->getNonOpaquePointerElementType();
+ Value *V = new LoadInst(Ty, Ptr, "L", BB->getTerminator());
PT->push_back(V);
}
};
@@ -360,14 +350,16 @@ struct StoreModifier: public Modifier {
void Act() override {
// Try to use predefined pointers. If non-exist, use undef pointer value;
Value *Ptr = getRandomPointerValue();
- Value *Val = getRandomValue(Ptr->getType()->getPointerElementType());
- Type *ValTy = Val->getType();
+ Type *ValTy = Ptr->getType()->isOpaquePointerTy()
+ ? pickType()
+ : Ptr->getType()->getNonOpaquePointerElementType();
// Do not store vectors of i1s because they are unsupported
// by the codegen.
if (ValTy->isVectorTy() && ValTy->getScalarSizeInBits() == 1)
return;
+ Value *Val = getRandomValue(ValTy);
new StoreInst(Val, Ptr, BB->getTerminator());
}
};
@@ -745,6 +737,7 @@ int main(int argc, char **argv) {
cl::HideUnrelatedOptions({&StressCategory, &getColorCategory()});
cl::ParseCommandLineOptions(argc, argv, "llvm codegen stress-tester\n");
+ LLVMContext Context;
auto M = std::make_unique<Module>("/tmp/autogen.bc", Context);
Function *F = GenEmptyFunction(M.get());
diff --git a/llvm/tools/llvm-strings/llvm-strings.cpp b/llvm/tools/llvm-strings/llvm-strings.cpp
index 438eed33d283..71d1321ee0ba 100644
--- a/llvm/tools/llvm-strings/llvm-strings.cpp
+++ b/llvm/tools/llvm-strings/llvm-strings.cpp
@@ -64,8 +64,7 @@ public:
static StringRef ToolName;
static cl::list<std::string> InputFileNames(cl::Positional,
- cl::desc("<input object files>"),
- cl::ZeroOrMore);
+ cl::desc("<input object files>"));
static int MinLength = 4;
static bool PrintFileName;
diff --git a/llvm/tools/llvm-symbolizer/Opts.td b/llvm/tools/llvm-symbolizer/Opts.td
index 6026e24d6ffa..6742e086d6ff 100644
--- a/llvm/tools/llvm-symbolizer/Opts.td
+++ b/llvm/tools/llvm-symbolizer/Opts.td
@@ -21,11 +21,17 @@ defm adjust_vma
: Eq<"adjust-vma", "Add specified offset to object file addresses">,
MetaVarName<"<offset>">;
def basenames : Flag<["--"], "basenames">, HelpText<"Strip directory names from paths">;
+defm build_id : Eq<"build-id", "Build ID used to look up the object file">;
+defm cache_size : Eq<"cache-size", "Max size in bytes of the in-memory binary cache.">;
+def color : F<"color", "Use color when symbolizing log markup.">;
+def color_EQ : Joined<["--"], "color=">, HelpText<"Whether to use color when symbolizing log markup: always, auto, never">, Values<"always,auto,never">;
defm debug_file_directory : Eq<"debug-file-directory", "Path to directory where to look for debug files">, MetaVarName<"<dir>">;
+defm debuginfod : B<"debuginfod", "Use debuginfod to find debug binaries", "Don't use debuginfod to find debug binaries">;
defm default_arch
: Eq<"default-arch", "Default architecture (for multi-arch objects)">,
Group<grp_mach_o>;
defm demangle : B<"demangle", "Demangle function names", "Don't demangle function names">;
+def filter_markup : Flag<["--"], "filter-markup">, HelpText<"Filter symbolizer markup from stdin.">;
def functions : F<"functions", "Print function name for a given address">;
def functions_EQ : Joined<["--"], "functions=">, HelpText<"Print function name for a given address">, Values<"none,short,linkage">;
def help : F<"help", "Display this help">;
diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
index 66a2e703129b..b782c7a1720a 100644
--- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
+++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -15,10 +15,16 @@
//===----------------------------------------------------------------------===//
#include "Opts.inc"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Config/config.h"
#include "llvm/DebugInfo/Symbolize/DIPrinter.h"
+#include "llvm/DebugInfo/Symbolize/Markup.h"
+#include "llvm/DebugInfo/Symbolize/MarkupFilter.h"
+#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
#include "llvm/DebugInfo/Symbolize/Symbolize.h"
+#include "llvm/Debuginfod/DIFetcher.h"
+#include "llvm/Debuginfod/Debuginfod.h"
#include "llvm/Debuginfod/HTTPClient.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
@@ -102,9 +108,31 @@ enum class Command {
Frame,
};
+static void enableDebuginfod(LLVMSymbolizer &Symbolizer) {
+ static bool IsEnabled = false;
+ if (IsEnabled)
+ return;
+ IsEnabled = true;
+ // Look up symbols using the debuginfod client.
+ Symbolizer.addDIFetcher(std::make_unique<DebuginfodDIFetcher>());
+ // The HTTPClient must be initialized for use by the debuginfod client.
+ HTTPClient::initialize();
+}
+
+static SmallVector<uint8_t> parseBuildID(StringRef Str) {
+ std::string Bytes;
+ if (!tryGetFromHex(Str, Bytes))
+ return {};
+ ArrayRef<uint8_t> BuildID(reinterpret_cast<const uint8_t *>(Bytes.data()),
+ Bytes.size());
+ return SmallVector<uint8_t>(BuildID.begin(), BuildID.end());
+}
+
static bool parseCommand(StringRef BinaryName, bool IsAddr2Line,
StringRef InputString, Command &Cmd,
- std::string &ModuleName, uint64_t &ModuleOffset) {
+ std::string &ModuleName,
+ SmallVectorImpl<uint8_t> &BuildID,
+ uint64_t &ModuleOffset) {
const char kDelimiters[] = " \n\r";
ModuleName = "";
if (InputString.consume_front("CODE ")) {
@@ -117,9 +145,31 @@ static bool parseCommand(StringRef BinaryName, bool IsAddr2Line,
// If no cmd, assume it's CODE.
Cmd = Command::Code;
}
- const char *Pos = InputString.data();
+
+ const char *Pos;
// Skip delimiters and parse input filename (if needed).
- if (BinaryName.empty()) {
+ if (BinaryName.empty() && BuildID.empty()) {
+ bool HasFilePrefix = false;
+ bool HasBuildIDPrefix = false;
+ while (true) {
+ if (InputString.consume_front("FILE:")) {
+ if (HasFilePrefix)
+ return false;
+ HasFilePrefix = true;
+ continue;
+ }
+ if (InputString.consume_front("BUILDID:")) {
+ if (HasBuildIDPrefix)
+ return false;
+ HasBuildIDPrefix = true;
+ continue;
+ }
+ break;
+ }
+ if (HasFilePrefix && HasBuildIDPrefix)
+ return false;
+
+ Pos = InputString.data();
Pos += strspn(Pos, kDelimiters);
if (*Pos == '"' || *Pos == '\'') {
char Quote = *Pos;
@@ -134,7 +184,14 @@ static bool parseCommand(StringRef BinaryName, bool IsAddr2Line,
ModuleName = std::string(Pos, NameLength);
Pos += NameLength;
}
+ if (HasBuildIDPrefix) {
+ BuildID = parseBuildID(ModuleName);
+ if (BuildID.empty())
+ return false;
+ ModuleName.clear();
+ }
} else {
+ Pos = InputString.data();
ModuleName = BinaryName.str();
}
// Skip delimiters and parse module offset.
@@ -148,31 +205,24 @@ static bool parseCommand(StringRef BinaryName, bool IsAddr2Line,
return !Offset.getAsInteger(IsAddr2Line ? 16 : 0, ModuleOffset);
}
-static void symbolizeInput(const opt::InputArgList &Args, uint64_t AdjustVMA,
- bool IsAddr2Line, OutputStyle Style,
- StringRef InputString, LLVMSymbolizer &Symbolizer,
- DIPrinter &Printer) {
- Command Cmd;
- std::string ModuleName;
- uint64_t Offset = 0;
- if (!parseCommand(Args.getLastArgValue(OPT_obj_EQ), IsAddr2Line,
- StringRef(InputString), Cmd, ModuleName, Offset)) {
- Printer.printInvalidCommand({ModuleName, None}, InputString);
- return;
- }
-
+template <typename T>
+void executeCommand(StringRef ModuleName, const T &ModuleSpec, Command Cmd,
+ uint64_t Offset, uint64_t AdjustVMA, bool ShouldInline,
+ OutputStyle Style, LLVMSymbolizer &Symbolizer,
+ DIPrinter &Printer) {
uint64_t AdjustedOffset = Offset - AdjustVMA;
+ object::SectionedAddress Address = {AdjustedOffset,
+ object::SectionedAddress::UndefSection};
if (Cmd == Command::Data) {
- Expected<DIGlobal> ResOrErr = Symbolizer.symbolizeData(
- ModuleName, {AdjustedOffset, object::SectionedAddress::UndefSection});
+ Expected<DIGlobal> ResOrErr = Symbolizer.symbolizeData(ModuleSpec, Address);
print({ModuleName, Offset}, ResOrErr, Printer);
} else if (Cmd == Command::Frame) {
- Expected<std::vector<DILocal>> ResOrErr = Symbolizer.symbolizeFrame(
- ModuleName, {AdjustedOffset, object::SectionedAddress::UndefSection});
+ Expected<std::vector<DILocal>> ResOrErr =
+ Symbolizer.symbolizeFrame(ModuleSpec, Address);
print({ModuleName, Offset}, ResOrErr, Printer);
- } else if (Args.hasFlag(OPT_inlines, OPT_no_inlines, !IsAddr2Line)) {
- Expected<DIInliningInfo> ResOrErr = Symbolizer.symbolizeInlinedCode(
- ModuleName, {AdjustedOffset, object::SectionedAddress::UndefSection});
+ } else if (ShouldInline) {
+ Expected<DIInliningInfo> ResOrErr =
+ Symbolizer.symbolizeInlinedCode(ModuleSpec, Address);
print({ModuleName, Offset}, ResOrErr, Printer);
} else if (Style == OutputStyle::GNU) {
// With PrintFunctions == FunctionNameKind::LinkageName (default)
@@ -181,8 +231,8 @@ static void symbolizeInput(const opt::InputArgList &Args, uint64_t AdjustVMA,
// caller function in the inlining chain. This contradicts the existing
// behavior of addr2line. Symbolizer.symbolizeInlinedCode() overrides only
// the topmost function, which suits our needs better.
- Expected<DIInliningInfo> ResOrErr = Symbolizer.symbolizeInlinedCode(
- ModuleName, {AdjustedOffset, object::SectionedAddress::UndefSection});
+ Expected<DIInliningInfo> ResOrErr =
+ Symbolizer.symbolizeInlinedCode(ModuleSpec, Address);
Expected<DILineInfo> Res0OrErr =
!ResOrErr
? Expected<DILineInfo>(ResOrErr.takeError())
@@ -190,10 +240,39 @@ static void symbolizeInput(const opt::InputArgList &Args, uint64_t AdjustVMA,
: ResOrErr->getFrame(0));
print({ModuleName, Offset}, Res0OrErr, Printer);
} else {
- Expected<DILineInfo> ResOrErr = Symbolizer.symbolizeCode(
- ModuleName, {AdjustedOffset, object::SectionedAddress::UndefSection});
+ Expected<DILineInfo> ResOrErr =
+ Symbolizer.symbolizeCode(ModuleSpec, Address);
print({ModuleName, Offset}, ResOrErr, Printer);
}
+ Symbolizer.pruneCache();
+}
+
+static void symbolizeInput(const opt::InputArgList &Args,
+ ArrayRef<uint8_t> IncomingBuildID,
+ uint64_t AdjustVMA, bool IsAddr2Line,
+ OutputStyle Style, StringRef InputString,
+ LLVMSymbolizer &Symbolizer, DIPrinter &Printer) {
+ Command Cmd;
+ std::string ModuleName;
+ SmallVector<uint8_t> BuildID(IncomingBuildID.begin(), IncomingBuildID.end());
+ uint64_t Offset = 0;
+ if (!parseCommand(Args.getLastArgValue(OPT_obj_EQ), IsAddr2Line,
+ StringRef(InputString), Cmd, ModuleName, BuildID, Offset)) {
+ Printer.printInvalidCommand({ModuleName, None}, InputString);
+ return;
+ }
+ bool ShouldInline = Args.hasFlag(OPT_inlines, OPT_no_inlines, !IsAddr2Line);
+ if (!BuildID.empty()) {
+ assert(ModuleName.empty());
+ if (!Args.hasArg(OPT_no_debuginfod))
+ enableDebuginfod(Symbolizer);
+ std::string BuildIDStr = toHex(BuildID);
+ executeCommand(BuildIDStr, BuildID, Cmd, Offset, AdjustVMA, ShouldInline,
+ Style, Symbolizer, Printer);
+ } else {
+ executeCommand(ModuleName, ModuleName, Cmd, Offset, AdjustVMA, ShouldInline,
+ Style, Symbolizer, Printer);
+ }
}
static void printHelp(StringRef ToolName, const SymbolizerOptTable &Tbl,
@@ -260,10 +339,52 @@ static FunctionNameKind decideHowToPrintFunctions(const opt::InputArgList &Args,
return IsAddr2Line ? FunctionNameKind::None : FunctionNameKind::LinkageName;
}
+static Optional<bool> parseColorArg(const opt::InputArgList &Args) {
+ if (Args.hasArg(OPT_color))
+ return true;
+ if (const opt::Arg *A = Args.getLastArg(OPT_color_EQ))
+ return StringSwitch<Optional<bool>>(A->getValue())
+ .Case("always", true)
+ .Case("never", false)
+ .Case("auto", None);
+ return None;
+}
+
+static SmallVector<uint8_t> parseBuildIDArg(const opt::InputArgList &Args,
+ int ID) {
+ const opt::Arg *A = Args.getLastArg(ID);
+ if (!A)
+ return {};
+
+ StringRef V(A->getValue());
+ SmallVector<uint8_t> BuildID = parseBuildID(V);
+ if (BuildID.empty()) {
+ errs() << A->getSpelling() + ": expected a build ID, but got '" + V + "'\n";
+ exit(1);
+ }
+ return BuildID;
+}
+
+// Symbolize the markup from stdin and write the result to stdout.
+static void filterMarkup(const opt::InputArgList &Args) {
+ MarkupParser Parser;
+ MarkupFilter Filter(outs(), parseColorArg(Args));
+ for (std::string InputString; std::getline(std::cin, InputString);) {
+ InputString += '\n';
+ Parser.parseLine(InputString);
+ Filter.beginLine(InputString);
+ while (Optional<MarkupNode> Element = Parser.nextNode())
+ Filter.filter(*Element);
+ }
+ Parser.flush();
+ while (Optional<MarkupNode> Element = Parser.nextNode())
+ Filter.filter(*Element);
+}
+
+ExitOnError ExitOnErr;
+
int main(int argc, char **argv) {
InitLLVM X(argc, argv);
- // The HTTPClient must be initialized for use by the debuginfod client.
- HTTPClient::initialize();
sys::InitializeCOMRAII COM(sys::COMThreadingMode::MultiThreaded);
bool IsAddr2Line = sys::path::stem(argv[0]).contains("addr2line");
@@ -304,6 +425,8 @@ int main(int argc, char **argv) {
}
#endif
Opts.UseSymbolTable = true;
+ if (Args.hasArg(OPT_cache_size_EQ))
+ parseIntArg(Args, OPT_cache_size_EQ, Opts.MaxCacheSize);
Config.PrintAddress = Args.hasArg(OPT_addresses);
Config.PrintFunctions = Opts.PrintFunctions != FunctionNameKind::None;
Config.Pretty = Args.hasArg(OPT_pretty_print);
@@ -319,6 +442,11 @@ int main(int argc, char **argv) {
}
}
+ if (Args.hasArg(OPT_filter_markup)) {
+ filterMarkup(Args);
+ return 0;
+ }
+
auto Style = IsAddr2Line ? OutputStyle::GNU : OutputStyle::LLVM;
if (const opt::Arg *A = Args.getLastArg(OPT_output_style_EQ)) {
if (strcmp(A->getValue(), "GNU") == 0)
@@ -329,7 +457,23 @@ int main(int argc, char **argv) {
Style = OutputStyle::LLVM;
}
+ if (Args.hasArg(OPT_build_id_EQ) && Args.hasArg(OPT_obj_EQ)) {
+ errs() << "error: cannot specify both --build-id and --obj\n";
+ return EXIT_FAILURE;
+ }
+ SmallVector<uint8_t> BuildID = parseBuildIDArg(Args, OPT_build_id_EQ);
+
LLVMSymbolizer Symbolizer(Opts);
+
+ // A debuginfod lookup could succeed if a HTTP client is available and at
+ // least one backing URL is configured.
+ bool ShouldUseDebuginfodByDefault =
+ HTTPClient::isAvailable() &&
+ !ExitOnErr(getDefaultDebuginfodUrls()).empty();
+ if (Args.hasFlag(OPT_debuginfod, OPT_no_debuginfod,
+ ShouldUseDebuginfodByDefault))
+ enableDebuginfod(Symbolizer);
+
std::unique_ptr<DIPrinter> Printer;
if (Style == OutputStyle::GNU)
Printer = std::make_unique<GNUPrinter>(outs(), errs(), Config);
@@ -348,15 +492,15 @@ int main(int argc, char **argv) {
std::string StrippedInputString(InputString);
llvm::erase_if(StrippedInputString,
[](char c) { return c == '\r' || c == '\n'; });
- symbolizeInput(Args, AdjustVMA, IsAddr2Line, Style, StrippedInputString,
- Symbolizer, *Printer);
+ symbolizeInput(Args, BuildID, AdjustVMA, IsAddr2Line, Style,
+ StrippedInputString, Symbolizer, *Printer);
outs().flush();
}
} else {
Printer->listBegin();
for (StringRef Address : InputAddresses)
- symbolizeInput(Args, AdjustVMA, IsAddr2Line, Style, Address, Symbolizer,
- *Printer);
+ symbolizeInput(Args, BuildID, AdjustVMA, IsAddr2Line, Style, Address,
+ Symbolizer, *Printer);
Printer->listEnd();
}
diff --git a/llvm/tools/llvm-tapi-diff/llvm-tapi-diff.cpp b/llvm/tools/llvm-tapi-diff/llvm-tapi-diff.cpp
index 772f124c5a59..09dd6f76bf6e 100644
--- a/llvm/tools/llvm-tapi-diff/llvm-tapi-diff.cpp
+++ b/llvm/tools/llvm-tapi-diff/llvm-tapi-diff.cpp
@@ -15,6 +15,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/InitLLVM.h"
+#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdlib>
diff --git a/llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp b/llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp
index 4a69f96a597a..7deeaef40caf 100644
--- a/llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp
+++ b/llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp
@@ -338,6 +338,7 @@ int main(int argc, char *argv[]) {
assert(TLIandSDKboth + TLIandSDKneither + TLIdoesSDKdoesnt +
TLIdoesntSDKdoes ==
LibFunc::NumLibFuncs);
+ (void) TLIandSDKneither;
outs() << "<< Total TLI yes SDK no: " << TLIdoesSDKdoesnt
<< "\n>> Total TLI no SDK yes: " << TLIdoesntSDKdoes
<< "\n== Total TLI yes SDK yes: " << TLIandSDKboth;
diff --git a/llvm/tools/llvm-xray/func-id-helper.cpp b/llvm/tools/llvm-xray/func-id-helper.cpp
index afc912a6398e..ce4eafd071ec 100644
--- a/llvm/tools/llvm-xray/func-id-helper.cpp
+++ b/llvm/tools/llvm-xray/func-id-helper.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "func-id-helper.h"
+#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include <sstream>
diff --git a/llvm/tools/llvm-xray/func-id-helper.h b/llvm/tools/llvm-xray/func-id-helper.h
index c6ce198170d5..d99fb7c1cfb0 100644
--- a/llvm/tools/llvm-xray/func-id-helper.h
+++ b/llvm/tools/llvm-xray/func-id-helper.h
@@ -13,6 +13,7 @@
#define LLVM_TOOLS_LLVM_XRAY_FUNC_ID_HELPER_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
#include "llvm/DebugInfo/Symbolize/Symbolize.h"
#include <unordered_map>
diff --git a/llvm/tools/llvm-xray/xray-graph-diff.cpp b/llvm/tools/llvm-xray/xray-graph-diff.cpp
index f22ea06e0537..bcadade86bb5 100644
--- a/llvm/tools/llvm-xray/xray-graph-diff.cpp
+++ b/llvm/tools/llvm-xray/xray-graph-diff.cpp
@@ -22,6 +22,7 @@
#include "xray-color-helper.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/MemoryBuffer.h"
#include "llvm/XRay/Trace.h"
using namespace llvm;
diff --git a/llvm/tools/opt/NewPMDriver.cpp b/llvm/tools/opt/NewPMDriver.cpp
index af3308939442..17c5da408560 100644
--- a/llvm/tools/opt/NewPMDriver.cpp
+++ b/llvm/tools/opt/NewPMDriver.cpp
@@ -13,7 +13,6 @@
//===----------------------------------------------------------------------===//
#include "NewPMDriver.h"
-#include "PassPrinters.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -66,10 +65,6 @@ static cl::opt<DebugLogging> DebugPM(
DebugLogging::Verbose, "verbose",
"Print extra information about adaptors and pass managers")));
-static cl::list<std::string>
- PassPlugins("load-pass-plugin",
- cl::desc("Load passes from plugin library"));
-
// This flag specifies a textual description of the alias analysis pipeline to
// use when querying for aliasing information. It only works in concert with
// the "passes" flag above.
@@ -122,11 +117,28 @@ static cl::opt<std::string> PipelineEarlySimplificationEPPipeline(
cl::desc("A textual description of the module pass pipeline inserted at "
"the EarlySimplification extension point into default pipelines"),
cl::Hidden);
+static cl::opt<std::string> OptimizerEarlyEPPipeline(
+ "passes-ep-optimizer-early",
+ cl::desc("A textual description of the module pass pipeline inserted at "
+ "the OptimizerEarly extension point into default pipelines"),
+ cl::Hidden);
static cl::opt<std::string> OptimizerLastEPPipeline(
"passes-ep-optimizer-last",
cl::desc("A textual description of the module pass pipeline inserted at "
"the OptimizerLast extension point into default pipelines"),
cl::Hidden);
+static cl::opt<std::string> FullLinkTimeOptimizationEarlyEPPipeline(
+ "passes-ep-full-link-time-optimization-early",
+ cl::desc("A textual description of the module pass pipeline inserted at "
+ "the FullLinkTimeOptimizationEarly extension point into default "
+ "pipelines"),
+ cl::Hidden);
+static cl::opt<std::string> FullLinkTimeOptimizationLastEPPipeline(
+ "passes-ep-full-link-time-optimization-last",
+ cl::desc("A textual description of the module pass pipeline inserted at "
+ "the FullLinkTimeOptimizationLast extension point into default "
+ "pipelines"),
+ cl::Hidden);
// Individual pipeline tuning options.
extern cl::opt<bool> DisableLoopUnrolling;
@@ -223,12 +235,35 @@ static void registerEPCallbacks(PassBuilder &PB) {
ExitOnError Err("Unable to parse EarlySimplification pipeline: ");
Err(PB.parsePassPipeline(PM, PipelineEarlySimplificationEPPipeline));
});
- if (tryParsePipelineText<FunctionPassManager>(PB, OptimizerLastEPPipeline))
+ if (tryParsePipelineText<ModulePassManager>(PB, OptimizerEarlyEPPipeline))
+ PB.registerOptimizerEarlyEPCallback(
+ [&PB](ModulePassManager &PM, OptimizationLevel) {
+ ExitOnError Err("Unable to parse OptimizerEarlyEP pipeline: ");
+ Err(PB.parsePassPipeline(PM, OptimizerEarlyEPPipeline));
+ });
+ if (tryParsePipelineText<ModulePassManager>(PB, OptimizerLastEPPipeline))
PB.registerOptimizerLastEPCallback(
[&PB](ModulePassManager &PM, OptimizationLevel) {
ExitOnError Err("Unable to parse OptimizerLastEP pipeline: ");
Err(PB.parsePassPipeline(PM, OptimizerLastEPPipeline));
});
+ if (tryParsePipelineText<ModulePassManager>(
+ PB, FullLinkTimeOptimizationEarlyEPPipeline))
+ PB.registerFullLinkTimeOptimizationEarlyEPCallback(
+ [&PB](ModulePassManager &PM, OptimizationLevel) {
+ ExitOnError Err(
+ "Unable to parse FullLinkTimeOptimizationEarlyEP pipeline: ");
+ Err(PB.parsePassPipeline(PM,
+ FullLinkTimeOptimizationEarlyEPPipeline));
+ });
+ if (tryParsePipelineText<ModulePassManager>(
+ PB, FullLinkTimeOptimizationLastEPPipeline))
+ PB.registerFullLinkTimeOptimizationLastEPCallback(
+ [&PB](ModulePassManager &PM, OptimizationLevel) {
+ ExitOnError Err(
+ "Unable to parse FullLinkTimeOptimizationLastEP pipeline: ");
+ Err(PB.parsePassPipeline(PM, FullLinkTimeOptimizationLastEPPipeline));
+ });
}
#define HANDLE_EXTENSION(Ext) \
@@ -240,6 +275,7 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
ToolOutputFile *ThinLTOLinkOut,
ToolOutputFile *OptRemarkFile,
StringRef PassPipeline, ArrayRef<StringRef> Passes,
+ ArrayRef<PassPlugin> PassPlugins,
OutputKind OK, VerifierKind VK,
bool ShouldPreserveAssemblyUseListOrder,
bool ShouldPreserveBitcodeUseListOrder,
@@ -312,33 +348,17 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
PassBuilder PB(TM, PTO, P, &PIC);
registerEPCallbacks(PB);
- // Load requested pass plugins and let them register pass builder callbacks
- for (auto &PluginFN : PassPlugins) {
- auto PassPlugin = PassPlugin::Load(PluginFN);
- if (!PassPlugin) {
- errs() << "Failed to load passes from '" << PluginFN
- << "'. Request ignored.\n";
- continue;
- }
-
- PassPlugin->registerPassBuilderCallbacks(PB);
- }
+ // For any loaded plugins, let them register pass builder callbacks.
+ for (auto &PassPlugin : PassPlugins)
+ PassPlugin.registerPassBuilderCallbacks(PB);
PB.registerPipelineParsingCallback(
[](StringRef Name, ModulePassManager &MPM,
ArrayRef<PassBuilder::PipelineElement>) {
AddressSanitizerOptions Opts;
if (Name == "asan-pipeline") {
- MPM.addPass(
- RequireAnalysisPass<ASanGlobalsMetadataAnalysis, Module>());
MPM.addPass(ModuleAddressSanitizerPass(Opts));
return true;
- } else if (Name == "asan-function-pipeline") {
- MPM.addPass(
- RequireAnalysisPass<ASanGlobalsMetadataAnalysis, Module>());
- MPM.addPass(
- createModuleToFunctionPassAdaptor(AddressSanitizerPass(Opts)));
- return true;
}
return false;
});
diff --git a/llvm/tools/opt/NewPMDriver.h b/llvm/tools/opt/NewPMDriver.h
index 056f7d6a9b80..16bb205afdca 100644
--- a/llvm/tools/opt/NewPMDriver.h
+++ b/llvm/tools/opt/NewPMDriver.h
@@ -20,12 +20,12 @@
#ifndef LLVM_TOOLS_OPT_NEWPMDRIVER_H
#define LLVM_TOOLS_OPT_NEWPMDRIVER_H
-#include "llvm/ADT/ArrayRef.h"
#include "llvm/Support/CommandLine.h"
namespace llvm {
class StringRef;
class Module;
+class PassPlugin;
class TargetMachine;
class ToolOutputFile;
class TargetLibraryInfoImpl;
@@ -69,7 +69,8 @@ bool runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
TargetLibraryInfoImpl *TLII, ToolOutputFile *Out,
ToolOutputFile *ThinLinkOut, ToolOutputFile *OptRemarkFile,
StringRef PassPipeline, ArrayRef<StringRef> PassInfos,
- opt_tool::OutputKind OK, opt_tool::VerifierKind VK,
+ ArrayRef<PassPlugin> PassPlugins, opt_tool::OutputKind OK,
+ opt_tool::VerifierKind VK,
bool ShouldPreserveAssemblyUseListOrder,
bool ShouldPreserveBitcodeUseListOrder,
bool EmitSummaryIndex, bool EmitModuleHash,
diff --git a/llvm/tools/opt/PassPrinters.cpp b/llvm/tools/opt/PassPrinters.cpp
deleted file mode 100644
index 4e81b5d29c4d..000000000000
--- a/llvm/tools/opt/PassPrinters.cpp
+++ /dev/null
@@ -1,212 +0,0 @@
-//===- PassPrinters.cpp - Utilities to print analysis info for passes -----===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Utilities to print analysis info for various kinds of passes.
-///
-//===----------------------------------------------------------------------===//
-
-#include "PassPrinters.h"
-#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Analysis/CallGraphSCCPass.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/RegionInfo.h"
-#include "llvm/Analysis/RegionPass.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/raw_ostream.h"
-#include <string>
-
-using namespace llvm;
-
-namespace {
-
-struct FunctionPassPrinter : public FunctionPass {
- const PassInfo *PassToPrint;
- raw_ostream &Out;
- static char ID;
- std::string PassName;
-
- FunctionPassPrinter(const PassInfo *PI, raw_ostream &out)
- : FunctionPass(ID), PassToPrint(PI), Out(out) {
- std::string PassToPrintName = std::string(PassToPrint->getPassName());
- PassName = "FunctionPass Printer: " + PassToPrintName;
- }
-
- bool runOnFunction(Function &F) override {
- Out << "Printing analysis '" << PassToPrint->getPassName()
- << "' for function '" << F.getName() << "':\n";
-
- // Get and print pass...
- getAnalysisID<Pass>(PassToPrint->getTypeInfo()).print(Out, F.getParent());
- return false;
- }
-
- StringRef getPassName() const override { return PassName; }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequiredID(PassToPrint->getTypeInfo());
- AU.setPreservesAll();
- }
-};
-
-char FunctionPassPrinter::ID = 0;
-
-struct CallGraphSCCPassPrinter : public CallGraphSCCPass {
- static char ID;
- const PassInfo *PassToPrint;
- raw_ostream &Out;
- std::string PassName;
-
- CallGraphSCCPassPrinter(const PassInfo *PI, raw_ostream &out)
- : CallGraphSCCPass(ID), PassToPrint(PI), Out(out) {
- std::string PassToPrintName = std::string(PassToPrint->getPassName());
- PassName = "CallGraphSCCPass Printer: " + PassToPrintName;
- }
-
- bool runOnSCC(CallGraphSCC &SCC) override {
- Out << "Printing analysis '" << PassToPrint->getPassName() << "':\n";
-
- // Get and print pass...
- for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
- Function *F = (*I)->getFunction();
- if (F)
- getAnalysisID<Pass>(PassToPrint->getTypeInfo())
- .print(Out, F->getParent());
- }
- return false;
- }
-
- StringRef getPassName() const override { return PassName; }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequiredID(PassToPrint->getTypeInfo());
- AU.setPreservesAll();
- }
-};
-
-char CallGraphSCCPassPrinter::ID = 0;
-
-struct ModulePassPrinter : public ModulePass {
- static char ID;
- const PassInfo *PassToPrint;
- raw_ostream &Out;
- std::string PassName;
-
- ModulePassPrinter(const PassInfo *PI, raw_ostream &out)
- : ModulePass(ID), PassToPrint(PI), Out(out) {
- std::string PassToPrintName = std::string(PassToPrint->getPassName());
- PassName = "ModulePass Printer: " + PassToPrintName;
- }
-
- bool runOnModule(Module &M) override {
- Out << "Printing analysis '" << PassToPrint->getPassName() << "':\n";
-
- // Get and print pass...
- getAnalysisID<Pass>(PassToPrint->getTypeInfo()).print(Out, &M);
- return false;
- }
-
- StringRef getPassName() const override { return PassName; }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequiredID(PassToPrint->getTypeInfo());
- AU.setPreservesAll();
- }
-};
-
-char ModulePassPrinter::ID = 0;
-
-struct LoopPassPrinter : public LoopPass {
- static char ID;
- const PassInfo *PassToPrint;
- raw_ostream &Out;
- std::string PassName;
-
- LoopPassPrinter(const PassInfo *PI, raw_ostream &out)
- : LoopPass(ID), PassToPrint(PI), Out(out) {
- std::string PassToPrintName = std::string(PassToPrint->getPassName());
- PassName = "LoopPass Printer: " + PassToPrintName;
- }
-
- bool runOnLoop(Loop *L, LPPassManager &LPM) override {
- Out << "Printing analysis '" << PassToPrint->getPassName() << "':\n";
-
- // Get and print pass...
- getAnalysisID<Pass>(PassToPrint->getTypeInfo())
- .print(Out, L->getHeader()->getParent()->getParent());
- return false;
- }
-
- StringRef getPassName() const override { return PassName; }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequiredID(PassToPrint->getTypeInfo());
- AU.setPreservesAll();
- }
-};
-
-char LoopPassPrinter::ID = 0;
-
-struct RegionPassPrinter : public RegionPass {
- static char ID;
- const PassInfo *PassToPrint;
- raw_ostream &Out;
- std::string PassName;
-
- RegionPassPrinter(const PassInfo *PI, raw_ostream &out)
- : RegionPass(ID), PassToPrint(PI), Out(out) {
- std::string PassToPrintName = std::string(PassToPrint->getPassName());
- PassName = "RegionPass Printer: " + PassToPrintName;
- }
-
- bool runOnRegion(Region *R, RGPassManager &RGM) override {
- Out << "Printing analysis '" << PassToPrint->getPassName() << "' for "
- << "region: '" << R->getNameStr() << "' in function '"
- << R->getEntry()->getParent()->getName() << "':\n";
- // Get and print pass...
- getAnalysisID<Pass>(PassToPrint->getTypeInfo())
- .print(Out, R->getEntry()->getParent()->getParent());
- return false;
- }
-
- StringRef getPassName() const override { return PassName; }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequiredID(PassToPrint->getTypeInfo());
- AU.setPreservesAll();
- }
-};
-
-char RegionPassPrinter::ID = 0;
-
-} // end anonymous namespace
-
-FunctionPass *llvm::createFunctionPassPrinter(const PassInfo *PI,
- raw_ostream &OS) {
- return new FunctionPassPrinter(PI, OS);
-}
-
-CallGraphSCCPass *llvm::createCallGraphPassPrinter(const PassInfo *PI,
- raw_ostream &OS) {
- return new CallGraphSCCPassPrinter(PI, OS);
-}
-
-ModulePass *llvm::createModulePassPrinter(const PassInfo *PI, raw_ostream &OS) {
- return new ModulePassPrinter(PI, OS);
-}
-
-LoopPass *llvm::createLoopPassPrinter(const PassInfo *PI, raw_ostream &OS) {
- return new LoopPassPrinter(PI, OS);
-}
-
-RegionPass *llvm::createRegionPassPrinter(const PassInfo *PI, raw_ostream &OS) {
- return new RegionPassPrinter(PI, OS);
-}
diff --git a/llvm/tools/opt/PassPrinters.h b/llvm/tools/opt/PassPrinters.h
deleted file mode 100644
index a4e1921399fc..000000000000
--- a/llvm/tools/opt/PassPrinters.h
+++ /dev/null
@@ -1,40 +0,0 @@
-//=- PassPrinters.h - Utilities to print analysis info for passes -*- C++ -*-=//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Utilities to print analysis info for various kinds of passes.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_OPT_PASSPRINTERS_H
-#define LLVM_TOOLS_OPT_PASSPRINTERS_H
-
-namespace llvm {
-
-class CallGraphSCCPass;
-class FunctionPass;
-class ModulePass;
-class LoopPass;
-class PassInfo;
-class raw_ostream;
-class RegionPass;
-
-FunctionPass *createFunctionPassPrinter(const PassInfo *PI, raw_ostream &out);
-
-CallGraphSCCPass *createCallGraphPassPrinter(const PassInfo *PI,
- raw_ostream &out);
-
-ModulePass *createModulePassPrinter(const PassInfo *PI, raw_ostream &out);
-
-LoopPass *createLoopPassPrinter(const PassInfo *PI, raw_ostream &out);
-
-RegionPass *createRegionPassPrinter(const PassInfo *PI, raw_ostream &out);
-
-} // end namespace llvm
-
-#endif // LLVM_TOOLS_OPT_PASSPRINTERS_H
diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp
index 7793a5471793..0e013ef3b9fd 100644
--- a/llvm/tools/opt/opt.cpp
+++ b/llvm/tools/opt/opt.cpp
@@ -13,7 +13,6 @@
#include "BreakpointPrinter.h"
#include "NewPMDriver.h"
-#include "PassPrinters.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
@@ -32,6 +31,7 @@
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/LegacyPassNameParser.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/ModuleSummaryIndex.h"
#include "llvm/IR/Verifier.h"
#include "llvm/IRReader/IRReader.h"
#include "llvm/InitializePasses.h"
@@ -39,6 +39,7 @@
#include "llvm/LinkAllPasses.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Passes/PassPlugin.h"
#include "llvm/Remarks/HotnessThresholdParser.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FileSystem.h"
@@ -51,7 +52,6 @@
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/YAMLTraits.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Transforms/Coroutines.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
@@ -74,7 +74,7 @@ static cl::opt<bool> EnableNewPassManager(
cl::desc("Enable the new pass manager, translating "
"'opt -foo' to 'opt -passes=foo'. This is strictly for the new PM "
"migration, use '-passes=' when possible."),
- cl::init(LLVM_ENABLE_NEW_PASS_MANAGER));
+ cl::init(true));
// This flag specifies a textual description of the optimization pass pipeline
// to run over the module. This flag switches opt to use the new pass manager
@@ -192,14 +192,9 @@ static cl::opt<bool>
DisableSimplifyLibCalls("disable-simplify-libcalls",
cl::desc("Disable simplify-libcalls"));
-static cl::list<std::string>
-DisableBuiltins("disable-builtin",
- cl::desc("Disable specific target library builtin function"),
- cl::ZeroOrMore);
-
-static cl::opt<bool>
- AnalyzeOnly("analyze", cl::desc("Only perform analysis, no optimization. "
- "Legacy pass manager only."));
+static cl::list<std::string> DisableBuiltins(
+ "disable-builtin",
+ cl::desc("Disable specific target library builtin function"));
static cl::opt<bool> EnableDebugify(
"enable-debugify",
@@ -252,11 +247,6 @@ static cl::opt<bool> DiscardValueNames(
cl::desc("Discard names from Value (other than GlobalValue)."),
cl::init(false), cl::Hidden);
-static cl::opt<bool> Coroutines(
- "enable-coroutines",
- cl::desc("Enable coroutine passes."),
- cl::init(false), cl::Hidden);
-
static cl::opt<bool> TimeTrace(
"time-trace",
cl::desc("Record time trace"));
@@ -300,6 +290,10 @@ static cl::opt<std::string> RemarksFormat(
cl::desc("The format used for serializing remarks (default: YAML)"),
cl::value_desc("format"), cl::init("yaml"));
+static cl::list<std::string>
+ PassPlugins("load-pass-plugin",
+ cl::desc("Load passes from plugin library"));
+
namespace llvm {
cl::opt<PGOKind>
PGOKindFlag("pgo-kind", cl::init(NoPGO), cl::Hidden,
@@ -370,9 +364,6 @@ static void AddOptimizationPasses(legacy::PassManagerBase &MPM,
if (TM)
TM->adjustPassManager(Builder);
- if (Coroutines)
- addCoroutinePassesToExtensionPoints(Builder);
-
switch (PGOKindFlag) {
case InstrGen:
Builder.EnablePGOInstrGen = true;
@@ -484,7 +475,7 @@ static bool shouldPinPassToLegacyPM(StringRef Pass) {
"x86-", "xcore-", "wasm-", "systemz-", "ppc-", "nvvm-",
"nvptx-", "mips-", "lanai-", "hexagon-", "bpf-", "avr-",
"thumb2-", "arm-", "si-", "gcn-", "amdgpu-", "aarch64-",
- "amdgcn-", "polly-", "riscv-"};
+ "amdgcn-", "polly-", "riscv-", "dxil-"};
std::vector<StringRef> PassNameContain = {"ehprepare"};
std::vector<StringRef> PassNameExact = {
"safe-stack", "cost-model",
@@ -498,7 +489,11 @@ static bool shouldPinPassToLegacyPM(StringRef Pass) {
"generic-to-nvvm", "expandmemcmp",
"loop-reduce", "lower-amx-type",
"pre-amx-config", "lower-amx-intrinsics",
- "polyhedral-info", "replace-with-veclib"};
+ "polyhedral-info", "print-polyhedral-info",
+ "replace-with-veclib", "jmc-instrument",
+ "dot-regions", "dot-regions-only",
+ "view-regions", "view-regions-only",
+ "select-optimize"};
for (const auto &P : PassNamePrefix)
if (Pass.startswith(P))
return true;
@@ -535,7 +530,6 @@ int main(int argc, char **argv) {
// Initialize passes
PassRegistry &Registry = *PassRegistry::getPassRegistry();
initializeCore(Registry);
- initializeCoroutines(Registry);
initializeScalarOpts(Registry);
initializeObjCARCOpts(Registry);
initializeVectorization(Registry);
@@ -550,6 +544,7 @@ int main(int argc, char **argv) {
// supported.
initializeExpandMemCmpPassPass(Registry);
initializeScalarizeMaskedMemIntrinLegacyPassPass(Registry);
+ initializeSelectOptimizePass(Registry);
initializeCodeGenPreparePass(Registry);
initializeAtomicExpandPass(Registry);
initializeRewriteSymbolsLegacyPassPass(Registry);
@@ -572,18 +567,38 @@ int main(int argc, char **argv) {
initializeHardwareLoopsPass(Registry);
initializeTypePromotionPass(Registry);
initializeReplaceWithVeclibLegacyPass(Registry);
+ initializeJMCInstrumenterPass(Registry);
#ifdef BUILD_EXAMPLES
initializeExampleIRTransforms(Registry);
#endif
+ SmallVector<PassPlugin, 1> PluginList;
+ PassPlugins.setCallback([&](const std::string &PluginPath) {
+ auto Plugin = PassPlugin::Load(PluginPath);
+ if (!Plugin) {
+ errs() << "Failed to load passes from '" << PluginPath
+ << "'. Request ignored.\n";
+ return;
+ }
+ PluginList.emplace_back(Plugin.get());
+ });
+
cl::ParseCommandLineOptions(argc, argv,
"llvm .bc -> .bc modular optimizer and analysis printer\n");
LLVMContext Context;
- if (AnalyzeOnly && NoOutput) {
- errs() << argv[0] << ": analyze mode conflicts with no-output mode.\n";
+ // If `-passes=` is specified, use NPM.
+ // If `-enable-new-pm` is specified and there are no codegen passes, use NPM.
+ // e.g. `-enable-new-pm -sroa` will use NPM.
+ // but `-enable-new-pm -codegenprepare` will still revert to legacy PM.
+ const bool UseNPM = (EnableNewPassManager && !shouldForceLegacyPM()) ||
+ PassPipeline.getNumOccurrences() > 0;
+
+ if (!UseNPM && PluginList.size()) {
+ errs() << argv[0] << ": " << PassPlugins.ArgStr
+ << " specified with legacy PM.\n";
return 1;
}
@@ -722,7 +737,7 @@ int main(int argc, char **argv) {
// If the output is set to be emitted to standard out, and standard out is a
// console, print out a warning message and refuse to do it. We don't
// impress anyone by spewing tons of binary goo to a terminal.
- if (!Force && !NoOutput && !AnalyzeOnly && !OutputAssembly)
+ if (!Force && !NoOutput && !OutputAssembly)
if (CheckBitcodeOutputToConsole(Out->os()))
NoOutput = true;
@@ -748,19 +763,7 @@ int main(int argc, char **argv) {
}
}
- // If `-passes=` is specified, use NPM.
- // If `-enable-new-pm` is specified and there are no codegen passes, use NPM.
- // e.g. `-enable-new-pm -sroa` will use NPM.
- // but `-enable-new-pm -codegenprepare` will still revert to legacy PM.
- if ((EnableNewPassManager && !shouldForceLegacyPM()) ||
- PassPipeline.getNumOccurrences() > 0) {
- if (AnalyzeOnly) {
- errs() << "Cannot specify -analyze under new pass manager, either "
- "specify '-enable-new-pm=0', or use the corresponding new pass "
- "manager pass, e.g. '-passes=print<scalar-evolution>'. For a "
- "full list of passes, see the '--print-passes' flag.\n";
- return 1;
- }
+ if (UseNPM) {
if (legacy::debugPassSpecified()) {
errs()
<< "-debug-pass does not work with the new PM, either use "
@@ -778,8 +781,9 @@ int main(int argc, char **argv) {
errs() << "Cannot specify multiple -O#\n";
return 1;
}
- if (NumOLevel > 0 && PassPipeline.getNumOccurrences() > 0) {
- errs() << "Cannot specify -O# and --passes=, use "
+ if (NumOLevel > 0 &&
+ (PassPipeline.getNumOccurrences() > 0 || PassList.size() > 0)) {
+ errs() << "Cannot specify -O# and --passes=/--foo-pass, use "
"-passes='default<O#>,other-pass'\n";
return 1;
}
@@ -817,7 +821,7 @@ int main(int argc, char **argv) {
// layer.
return runPassPipeline(argv[0], *M, TM.get(), &TLII, Out.get(),
ThinLinkOut.get(), RemarksFile.get(), Pipeline,
- Passes, OK, VK, PreserveAssemblyUseListOrder,
+ Passes, PluginList, OK, VK, PreserveAssemblyUseListOrder,
PreserveBitcodeUseListOrder, EmitSummaryIndex,
EmitModuleHash, EnableDebugify)
? 0
@@ -829,13 +833,13 @@ int main(int argc, char **argv) {
// the (-check)-debugify passes.
DebugifyCustomPassManager Passes;
DebugifyStatsMap DIStatsMap;
- DebugInfoPerPassMap DIPreservationMap;
+ DebugInfoPerPass DebugInfoBeforePass;
if (DebugifyEach) {
Passes.setDebugifyMode(DebugifyMode::SyntheticDebugInfo);
Passes.setDIStatsMap(DIStatsMap);
} else if (VerifyEachDebugInfoPreserve) {
Passes.setDebugifyMode(DebugifyMode::OriginalDebugInfo);
- Passes.setDIPreservationMap(DIPreservationMap);
+ Passes.setDebugInfoBeforePass(DebugInfoBeforePass);
if (!VerifyDIPreserveExport.empty())
Passes.setOrigDIVerifyBugsReportFilePath(VerifyDIPreserveExport);
}
@@ -855,10 +859,10 @@ int main(int argc, char **argv) {
Passes.setDIStatsMap(DIStatsMap);
Passes.add(createDebugifyModulePass());
} else if (VerifyDebugInfoPreserve) {
- Passes.setDIPreservationMap(DIPreservationMap);
+ Passes.setDebugInfoBeforePass(DebugInfoBeforePass);
Passes.add(createDebugifyModulePass(
DebugifyMode::OriginalDebugInfo, "",
- &(Passes.getDebugInfoPerPassMap())));
+ &(Passes.getDebugInfoPerPass())));
}
}
@@ -934,30 +938,8 @@ int main(int argc, char **argv) {
else
errs() << argv[0] << ": cannot create pass: "
<< PassInf->getPassName() << "\n";
- if (P) {
- PassKind Kind = P->getPassKind();
+ if (P)
addPass(Passes, P);
-
- if (AnalyzeOnly) {
- switch (Kind) {
- case PT_Region:
- Passes.add(createRegionPassPrinter(PassInf, Out->os()));
- break;
- case PT_Loop:
- Passes.add(createLoopPassPrinter(PassInf, Out->os()));
- break;
- case PT_Function:
- Passes.add(createFunctionPassPrinter(PassInf, Out->os()));
- break;
- case PT_CallGraphSCC:
- Passes.add(createCallGraphPassPrinter(PassInf, Out->os()));
- break;
- default:
- Passes.add(createModulePassPrinter(PassInf, Out->os()));
- break;
- }
- }
- }
}
if (OptLevelO0)
@@ -997,7 +979,7 @@ int main(int argc, char **argv) {
Passes.setOrigDIVerifyBugsReportFilePath(VerifyDIPreserveExport);
Passes.add(createCheckDebugifyModulePass(
false, "", nullptr, DebugifyMode::OriginalDebugInfo,
- &(Passes.getDebugInfoPerPassMap()), VerifyDIPreserveExport));
+ &(Passes.getDebugInfoPerPass()), VerifyDIPreserveExport));
}
}
@@ -1010,7 +992,7 @@ int main(int argc, char **argv) {
std::unique_ptr<raw_svector_ostream> BOS;
raw_ostream *OS = nullptr;
- const bool ShouldEmitOutput = !NoOutput && !AnalyzeOnly;
+ const bool ShouldEmitOutput = !NoOutput;
// Write bitcode or assembly to the output as the last step...
if (ShouldEmitOutput || RunTwice) {
diff --git a/llvm/utils/TableGen/AsmMatcherEmitter.cpp b/llvm/utils/TableGen/AsmMatcherEmitter.cpp
index be17d5c718c2..1acc2a86d176 100644
--- a/llvm/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/llvm/utils/TableGen/AsmMatcherEmitter.cpp
@@ -95,6 +95,7 @@
//
//===----------------------------------------------------------------------===//
+#include "CodeGenInstruction.h"
#include "CodeGenTarget.h"
#include "SubtargetFeatureInfo.h"
#include "Types.h"
@@ -3394,7 +3395,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
StringTable.GetOrAddStringOffset(LenMnemonic, false));
}
- OS << "static const char *const MnemonicTable =\n";
+ OS << "static const char MnemonicTable[] =\n";
StringTable.EmitString(OS);
OS << ";\n\n";
diff --git a/llvm/utils/TableGen/AsmWriterEmitter.cpp b/llvm/utils/TableGen/AsmWriterEmitter.cpp
index 9283ceeb31e0..1d738274c75a 100644
--- a/llvm/utils/TableGen/AsmWriterEmitter.cpp
+++ b/llvm/utils/TableGen/AsmWriterEmitter.cpp
@@ -19,15 +19,14 @@
#include "Types.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/MathExtras.h"
@@ -868,8 +867,6 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
IAPrinter IAP(CGA.Result->getAsString(), FlatAliasAsmString, NumMIOps);
- bool CantHandle = false;
-
unsigned MIOpNum = 0;
for (unsigned i = 0, e = LastOpNo; i != e; ++i) {
// Skip over tied operands as they're not part of an alias declaration.
@@ -969,10 +966,9 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
break;
}
case CodeGenInstAlias::ResultOperand::K_Reg:
- // If this is zero_reg, something's playing tricks we're not
- // equipped to handle.
if (!CGA.ResultOperands[i].getRegister()) {
- CantHandle = true;
+ IAP.addCond(std::string(formatv(
+ "AliasPatternCond::K_Reg, {0}::NoRegister", Namespace)));
break;
}
@@ -985,8 +981,6 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
MIOpNum += RO.getMINumOperands();
}
- if (CantHandle) continue;
-
std::vector<Record *> ReqFeatures;
if (PassSubtarget) {
// We only consider ReqFeatures predicates if PassSubtarget
@@ -1005,6 +999,17 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
if (D->getNumArgs() == 0)
PrintFatalError(R->getLoc(), "Invalid AssemblerCondDag!");
bool IsOr = CombineType == "any_of";
+ // Change (any_of FeatureAll, (any_of ...)) to (any_of FeatureAll, ...).
+ if (IsOr && D->getNumArgs() == 2 && isa<DagInit>(D->getArg(1))) {
+ DagInit *RHS = dyn_cast<DagInit>(D->getArg(1));
+ SmallVector<Init *> Args{D->getArg(0)};
+ SmallVector<StringInit *> ArgNames{D->getArgName(0)};
+ for (unsigned i = 0, e = RHS->getNumArgs(); i != e; ++i) {
+ Args.push_back(RHS->getArg(i));
+ ArgNames.push_back(RHS->getArgName(i));
+ }
+ D = DagInit::get(D->getOperator(), nullptr, Args, ArgNames);
+ }
for (auto *Arg : D->getArgs()) {
bool IsNeg = false;
diff --git a/llvm/utils/TableGen/AsmWriterInst.cpp b/llvm/utils/TableGen/AsmWriterInst.cpp
index 887abbac9d3b..4a78108d6f4a 100644
--- a/llvm/utils/TableGen/AsmWriterInst.cpp
+++ b/llvm/utils/TableGen/AsmWriterInst.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "AsmWriterInst.h"
+#include "CodeGenInstruction.h"
#include "CodeGenTarget.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/TableGen/Error.h"
diff --git a/llvm/utils/TableGen/Attributes.cpp b/llvm/utils/TableGen/Attributes.cpp
index 5deac4b34bf2..1f975f52d6e7 100644
--- a/llvm/utils/TableGen/Attributes.cpp
+++ b/llvm/utils/TableGen/Attributes.cpp
@@ -6,10 +6,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Support/MemoryBuffer.h"
#include "llvm/TableGen/Record.h"
-#include <algorithm>
-#include <string>
#include <vector>
using namespace llvm;
diff --git a/llvm/utils/TableGen/CallingConvEmitter.cpp b/llvm/utils/TableGen/CallingConvEmitter.cpp
index 127ae6247bd9..8f080cd250ab 100644
--- a/llvm/utils/TableGen/CallingConvEmitter.cpp
+++ b/llvm/utils/TableGen/CallingConvEmitter.cpp
@@ -15,12 +15,19 @@
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/TableGenBackend.h"
-#include <cassert>
using namespace llvm;
namespace {
class CallingConvEmitter {
RecordKeeper &Records;
+ unsigned Counter;
+ std::string CurrentAction;
+ bool SwiftAction;
+
+ std::map<std::string, std::set<std::string>> AssignedRegsMap;
+ std::map<std::string, std::set<std::string>> AssignedSwiftRegsMap;
+ std::map<std::string, std::set<std::string>> DelegateToMap;
+
public:
explicit CallingConvEmitter(RecordKeeper &R) : Records(R) {}
@@ -29,7 +36,7 @@ public:
private:
void EmitCallingConv(Record *CC, raw_ostream &O);
void EmitAction(Record *Action, unsigned Indent, raw_ostream &O);
- unsigned Counter;
+ void EmitArgRegisterLists(raw_ostream &O);
};
} // End anonymous namespace
@@ -39,6 +46,7 @@ void CallingConvEmitter::run(raw_ostream &O) {
// Emit prototypes for all of the non-custom CC's so that they can forward ref
// each other.
Records.startTimer("Emit prototypes");
+ O << "#ifndef GET_CC_REGISTER_LISTS\n\n";
for (Record *CC : CCs) {
if (!CC->getValueAsBit("Custom")) {
unsigned Pad = CC->getName().size();
@@ -59,18 +67,28 @@ void CallingConvEmitter::run(raw_ostream &O) {
// Emit each non-custom calling convention description in full.
Records.startTimer("Emit full descriptions");
for (Record *CC : CCs) {
- if (!CC->getValueAsBit("Custom"))
+ if (!CC->getValueAsBit("Custom")) {
EmitCallingConv(CC, O);
+ }
}
-}
+ EmitArgRegisterLists(O);
+
+ O << "\n#endif // CC_REGISTER_LIST\n";
+}
void CallingConvEmitter::EmitCallingConv(Record *CC, raw_ostream &O) {
ListInit *CCActions = CC->getValueAsListInit("Actions");
Counter = 0;
+ CurrentAction = CC->getName().str();
+ // Call upon the creation of a map entry from the void!
+ // We want an entry in AssignedRegsMap for every action, even if that
+ // entry is empty.
+ AssignedRegsMap[CurrentAction] = {};
+
O << "\n\n";
- unsigned Pad = CC->getName().size();
+ unsigned Pad = CurrentAction.size();
if (CC->getValueAsBit("Entry")) {
O << "bool llvm::";
Pad += 12;
@@ -78,13 +96,21 @@ void CallingConvEmitter::EmitCallingConv(Record *CC, raw_ostream &O) {
O << "static bool ";
Pad += 13;
}
- O << CC->getName() << "(unsigned ValNo, MVT ValVT,\n"
+ O << CurrentAction << "(unsigned ValNo, MVT ValVT,\n"
<< std::string(Pad, ' ') << "MVT LocVT, CCValAssign::LocInfo LocInfo,\n"
<< std::string(Pad, ' ') << "ISD::ArgFlagsTy ArgFlags, CCState &State) {\n";
// Emit all of the actions, in order.
for (unsigned i = 0, e = CCActions->size(); i != e; ++i) {
+ Record *Action = CCActions->getElementAsRecord(i);
+ SwiftAction = llvm::any_of(Action->getSuperClasses(),
+ [](const std::pair<Record *, SMRange> &Class) {
+ std::string Name =
+ Class.first->getNameInitAsString();
+ return StringRef(Name).startswith("CCIfSwift");
+ });
+
O << "\n";
- EmitAction(CCActions->getElementAsRecord(i), 2, O);
+ EmitAction(Action, 2, O);
}
O << "\n return true; // CC didn't match.\n";
@@ -94,7 +120,7 @@ void CallingConvEmitter::EmitCallingConv(Record *CC, raw_ostream &O) {
void CallingConvEmitter::EmitAction(Record *Action,
unsigned Indent, raw_ostream &O) {
std::string IndentStr = std::string(Indent, ' ');
-
+
if (Action->isSubClassOf("CCPredicateAction")) {
O << IndentStr << "if (";
@@ -122,18 +148,30 @@ void CallingConvEmitter::EmitAction(Record *Action,
O << IndentStr << "if (!" << CC->getName()
<< "(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))\n"
<< IndentStr << " return false;\n";
+ DelegateToMap[CurrentAction].insert(CC->getName().str());
} else if (Action->isSubClassOf("CCAssignToReg")) {
ListInit *RegList = Action->getValueAsListInit("RegList");
if (RegList->size() == 1) {
- O << IndentStr << "if (unsigned Reg = State.AllocateReg(";
- O << getQualifiedName(RegList->getElementAsRecord(0)) << ")) {\n";
+ std::string Name = getQualifiedName(RegList->getElementAsRecord(0));
+ O << IndentStr << "if (unsigned Reg = State.AllocateReg(" << Name
+ << ")) {\n";
+ if (SwiftAction)
+ AssignedSwiftRegsMap[CurrentAction].insert(Name);
+ else
+ AssignedRegsMap[CurrentAction].insert(Name);
} else {
O << IndentStr << "static const MCPhysReg RegList" << ++Counter
<< "[] = {\n";
O << IndentStr << " ";
ListSeparator LS;
- for (unsigned i = 0, e = RegList->size(); i != e; ++i)
- O << LS << getQualifiedName(RegList->getElementAsRecord(i));
+ for (unsigned i = 0, e = RegList->size(); i != e; ++i) {
+ std::string Name = getQualifiedName(RegList->getElementAsRecord(i));
+ if (SwiftAction)
+ AssignedSwiftRegsMap[CurrentAction].insert(Name);
+ else
+ AssignedRegsMap[CurrentAction].insert(Name);
+ O << LS << Name;
+ }
O << "\n" << IndentStr << "};\n";
O << IndentStr << "if (unsigned Reg = State.AllocateReg(RegList"
<< Counter << ")) {\n";
@@ -288,6 +326,83 @@ void CallingConvEmitter::EmitAction(Record *Action,
}
}
+void CallingConvEmitter::EmitArgRegisterLists(raw_ostream &O) {
+ // Transitively merge all delegated CCs into AssignedRegsMap.
+ using EntryTy = std::pair<std::string, std::set<std::string>>;
+ bool Redo;
+ do {
+ Redo = false;
+ std::deque<EntryTy> Worklist(DelegateToMap.begin(), DelegateToMap.end());
+
+ while (!Worklist.empty()) {
+ EntryTy Entry = Worklist.front();
+ Worklist.pop_front();
+
+ const std::string &CCName = Entry.first;
+ std::set<std::string> &Registers = Entry.second;
+ if (!Registers.empty())
+ continue;
+
+ for (auto &InnerEntry : Worklist) {
+ const std::string &InnerCCName = InnerEntry.first;
+ std::set<std::string> &InnerRegisters = InnerEntry.second;
+
+ if (InnerRegisters.find(CCName) != InnerRegisters.end()) {
+ AssignedRegsMap[InnerCCName].insert(
+ AssignedRegsMap[CCName].begin(),
+ AssignedRegsMap[CCName].end());
+ InnerRegisters.erase(CCName);
+ }
+ }
+
+ DelegateToMap.erase(CCName);
+ Redo = true;
+ }
+ } while (Redo);
+
+ if (AssignedRegsMap.empty())
+ return;
+
+ O << "\n#else\n\n";
+
+ for (auto &Entry : AssignedRegsMap) {
+ const std::string &RegName = Entry.first;
+ std::set<std::string> &Registers = Entry.second;
+
+ if (RegName.empty())
+ continue;
+
+ O << "const MCRegister " << Entry.first << "_ArgRegs[] = { ";
+
+ if (Registers.empty()) {
+ O << "0";
+ } else {
+ ListSeparator LS;
+ for (const std::string &Reg : Registers)
+ O << LS << Reg;
+ }
+
+ O << " };\n";
+ }
+
+ if (AssignedSwiftRegsMap.empty())
+ return;
+
+ O << "\n// Registers used by Swift.\n";
+ for (auto &Entry : AssignedSwiftRegsMap) {
+ const std::string &RegName = Entry.first;
+ std::set<std::string> &Registers = Entry.second;
+
+ O << "const MCRegister " << RegName << "_Swift_ArgRegs[] = { ";
+
+ ListSeparator LS;
+ for (const std::string &Reg : Registers)
+ O << LS << Reg;
+
+ O << " };\n";
+ }
+}
+
namespace llvm {
void EmitCallingConv(RecordKeeper &RK, raw_ostream &OS) {
diff --git a/llvm/utils/TableGen/CodeBeadsGen.cpp b/llvm/utils/TableGen/CodeBeadsGen.cpp
deleted file mode 100644
index 18a6d6d19eb2..000000000000
--- a/llvm/utils/TableGen/CodeBeadsGen.cpp
+++ /dev/null
@@ -1,137 +0,0 @@
-//===---------- CodeBeadsGen.cpp - Code Beads Generator -------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// CodeBeads are data fields carrying auxiliary information for instructions.
-//
-// Under the hood it's simply implemented by a `bits` field (with arbitrary
-// length) in each TG instruction description, where this TG backend will
-// generate a helper function to access it.
-//
-// This is especially useful for expressing variable length encoding
-// instructions and complex addressing modes. Since in those cases each
-// instruction is usually associated with large amount of information like
-// addressing mode details used on a specific operand. Instead of retreating to
-// ad-hoc methods to figure out these information when encoding an instruction,
-// CodeBeads provide a clean table for the instruction encoder to lookup.
-//===----------------------------------------------------------------------===//
-
-#include "CodeGenTarget.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/TableGen/Error.h"
-#include "llvm/TableGen/Record.h"
-#include "llvm/TableGen/TableGenBackend.h"
-#include <map>
-#include <string>
-#include <vector>
-using namespace llvm;
-
-namespace {
-
-class CodeBeadsGen {
- RecordKeeper &Records;
-
-public:
- CodeBeadsGen(RecordKeeper &R) : Records(R) {}
- void run(raw_ostream &OS);
-};
-
-void CodeBeadsGen::run(raw_ostream &OS) {
- CodeGenTarget Target(Records);
- std::vector<Record *> Insts = Records.getAllDerivedDefinitions("Instruction");
-
- // For little-endian instruction bit encodings, reverse the bit order
- Target.reverseBitsForLittleEndianEncoding();
-
- ArrayRef<const CodeGenInstruction *> NumberedInstructions =
- Target.getInstructionsByEnumValue();
-
- // Emit function declaration
- OS << "const uint8_t *llvm::" << Target.getInstNamespace();
- OS << "::getMCInstrBeads(unsigned Opcode) {\n";
-
- // First, get the maximum bit length among all beads. And do some
- // simple validation
- unsigned MaxBitLength = 0;
-
- for (const CodeGenInstruction *CGI : NumberedInstructions) {
- Record *R = CGI->TheDef;
- if (!R->getValue("Beads"))
- continue;
-
- BitsInit *BI = R->getValueAsBitsInit("Beads");
- if (!BI->isComplete()) {
- PrintFatalError(R->getLoc(), "Record `" + R->getName() +
- "', bit field 'Beads' is not complete");
- }
-
- MaxBitLength = std::max(MaxBitLength, BI->getNumBits());
- }
-
- // Number of bytes
- unsigned Parts = MaxBitLength / 8;
-
- // Emit instruction base values
- OS << " static const uint8_t InstBits[][" << Parts << "] = {\n";
- for (const CodeGenInstruction *CGI : NumberedInstructions) {
- Record *R = CGI->TheDef;
-
- if (R->getValueAsString("Namespace") == "TargetOpcode" ||
- !R->getValue("Beads")) {
- OS << "\t{ 0x0 },\t// ";
- if (R->getValueAsBit("isPseudo"))
- OS << "(Pseudo) ";
- OS << R->getName() << "\n";
- continue;
- }
-
- BitsInit *BI = R->getValueAsBitsInit("Beads");
-
- // Convert to byte array:
- // [dcba] -> [a][b][c][d]
- OS << "\t{";
- for (unsigned p = 0; p < Parts; ++p) {
- unsigned Right = 8 * p;
- unsigned Left = Right + 8;
-
- uint8_t Value = 0;
- for (unsigned i = Right; i != Left; ++i) {
- unsigned Shift = i % 8;
- if (auto *B = dyn_cast<BitInit>(BI->getBit(i))) {
- Value |= (static_cast<uint8_t>(B->getValue()) << Shift);
- } else {
- PrintFatalError(R->getLoc(), "Record `" + R->getName() +
- "', bit 'Beads[" + Twine(i) +
- "]' is not defined");
- }
- }
-
- if (p)
- OS << ',';
- OS << " 0x";
- OS.write_hex(Value);
- OS << "";
- }
- OS << " }," << '\t' << "// " << R->getName() << "\n";
- }
- OS << "\t{ 0x0 }\n };\n";
-
- // Emit initial function code
- OS << " return InstBits[Opcode];\n"
- << "}\n\n";
-}
-
-} // End anonymous namespace
-
-namespace llvm {
-
-void EmitCodeBeads(RecordKeeper &RK, raw_ostream &OS) {
- emitSourceFileHeader("Machine Code Beads", OS);
- CodeBeadsGen(RK).run(OS);
-}
-
-} // namespace llvm
diff --git a/llvm/utils/TableGen/CodeEmitterGen.cpp b/llvm/utils/TableGen/CodeEmitterGen.cpp
index fbac0d969917..2b9931b23c11 100644
--- a/llvm/utils/TableGen/CodeEmitterGen.cpp
+++ b/llvm/utils/TableGen/CodeEmitterGen.cpp
@@ -16,11 +16,13 @@
#include "CodeGenTarget.h"
#include "SubtargetFeatureInfo.h"
#include "Types.h"
+#include "VarLenCodeEmitterGen.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/TableGenBackend.h"
#include <cassert>
@@ -117,16 +119,16 @@ AddCodeToMergeInOperand(Record *R, BitsInit *BI, const std::string &VarName,
(!NamedOpIndices.empty() && NamedOpIndices.count(
CGI.Operands.getSubOperandNumber(NumberedOp).first)))) {
++NumberedOp;
+ }
- if (NumberedOp >= CGI.Operands.back().MIOperandNo +
- CGI.Operands.back().MINumOperands) {
- errs() << "Too few operands in record " << R->getName() <<
- " (no match for variable " << VarName << "):\n";
- errs() << *R;
- errs() << '\n';
-
- return;
- }
+ if (NumberedOp >=
+ CGI.Operands.back().MIOperandNo + CGI.Operands.back().MINumOperands) {
+ std::string E;
+ raw_string_ostream S(E);
+ S << "Too few operands in record " << R->getName()
+ << " (no match for variable " << VarName << "):\n";
+ S << *R;
+ PrintFatalError(R, E);
}
OpIdx = NumberedOp++;
@@ -396,132 +398,138 @@ void CodeEmitterGen::run(raw_ostream &o) {
ArrayRef<const CodeGenInstruction*> NumberedInstructions =
Target.getInstructionsByEnumValue();
- const CodeGenHwModes &HWM = Target.getHwModes();
- // The set of HwModes used by instruction encodings.
- std::set<unsigned> HwModes;
- BitWidth = 0;
- for (const CodeGenInstruction *CGI : NumberedInstructions) {
- Record *R = CGI->TheDef;
- if (R->getValueAsString("Namespace") == "TargetOpcode" ||
- R->getValueAsBit("isPseudo"))
- continue;
+ if (any_of(NumberedInstructions, [](const CodeGenInstruction *CGI) {
+ Record *R = CGI->TheDef;
+ return R->getValue("Inst") && isa<DagInit>(R->getValueInit("Inst"));
+ })) {
+ emitVarLenCodeEmitter(Records, o);
+ } else {
+ const CodeGenHwModes &HWM = Target.getHwModes();
+ // The set of HwModes used by instruction encodings.
+ std::set<unsigned> HwModes;
+ BitWidth = 0;
+ for (const CodeGenInstruction *CGI : NumberedInstructions) {
+ Record *R = CGI->TheDef;
+ if (R->getValueAsString("Namespace") == "TargetOpcode" ||
+ R->getValueAsBit("isPseudo"))
+ continue;
- if (const RecordVal *RV = R->getValue("EncodingInfos")) {
- if (DefInit *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
- EncodingInfoByHwMode EBM(DI->getDef(), HWM);
- for (auto &KV : EBM) {
- BitsInit *BI = KV.second->getValueAsBitsInit("Inst");
- BitWidth = std::max(BitWidth, BI->getNumBits());
- HwModes.insert(KV.first);
+ if (const RecordVal *RV = R->getValue("EncodingInfos")) {
+ if (DefInit *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
+ EncodingInfoByHwMode EBM(DI->getDef(), HWM);
+ for (auto &KV : EBM) {
+ BitsInit *BI = KV.second->getValueAsBitsInit("Inst");
+ BitWidth = std::max(BitWidth, BI->getNumBits());
+ HwModes.insert(KV.first);
+ }
+ continue;
}
- continue;
}
+ BitsInit *BI = R->getValueAsBitsInit("Inst");
+ BitWidth = std::max(BitWidth, BI->getNumBits());
+ }
+ UseAPInt = BitWidth > 64;
+
+ // Emit function declaration
+ if (UseAPInt) {
+ o << "void " << Target.getName()
+ << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
+ << " SmallVectorImpl<MCFixup> &Fixups,\n"
+ << " APInt &Inst,\n"
+ << " APInt &Scratch,\n"
+ << " const MCSubtargetInfo &STI) const {\n";
+ } else {
+ o << "uint64_t " << Target.getName();
+ o << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
+ << " SmallVectorImpl<MCFixup> &Fixups,\n"
+ << " const MCSubtargetInfo &STI) const {\n";
}
- BitsInit *BI = R->getValueAsBitsInit("Inst");
- BitWidth = std::max(BitWidth, BI->getNumBits());
- }
- UseAPInt = BitWidth > 64;
-
- // Emit function declaration
- if (UseAPInt) {
- o << "void " << Target.getName()
- << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
- << " SmallVectorImpl<MCFixup> &Fixups,\n"
- << " APInt &Inst,\n"
- << " APInt &Scratch,\n"
- << " const MCSubtargetInfo &STI) const {\n";
- } else {
- o << "uint64_t " << Target.getName();
- o << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
- << " SmallVectorImpl<MCFixup> &Fixups,\n"
- << " const MCSubtargetInfo &STI) const {\n";
- }
-
- // Emit instruction base values
- if (HwModes.empty()) {
- emitInstructionBaseValues(o, NumberedInstructions, Target, -1);
- } else {
- for (unsigned HwMode : HwModes)
- emitInstructionBaseValues(o, NumberedInstructions, Target, (int)HwMode);
- }
- if (!HwModes.empty()) {
- o << " const uint64_t *InstBits;\n";
- o << " unsigned HwMode = STI.getHwMode();\n";
- o << " switch (HwMode) {\n";
- o << " default: llvm_unreachable(\"Unknown hardware mode!\"); break;\n";
- for (unsigned I : HwModes) {
- o << " case " << I << ": InstBits = InstBits_" << HWM.getMode(I).Name
- << "; break;\n";
+ // Emit instruction base values
+ if (HwModes.empty()) {
+ emitInstructionBaseValues(o, NumberedInstructions, Target, -1);
+ } else {
+ for (unsigned HwMode : HwModes)
+ emitInstructionBaseValues(o, NumberedInstructions, Target, (int)HwMode);
}
- o << " };\n";
- }
- // Map to accumulate all the cases.
- std::map<std::string, std::vector<std::string>> CaseMap;
+ if (!HwModes.empty()) {
+ o << " const uint64_t *InstBits;\n";
+ o << " unsigned HwMode = STI.getHwMode();\n";
+ o << " switch (HwMode) {\n";
+ o << " default: llvm_unreachable(\"Unknown hardware mode!\"); break;\n";
+ for (unsigned I : HwModes) {
+ o << " case " << I << ": InstBits = InstBits_" << HWM.getMode(I).Name
+ << "; break;\n";
+ }
+ o << " };\n";
+ }
- // Construct all cases statement for each opcode
- for (Record *R : Insts) {
- if (R->getValueAsString("Namespace") == "TargetOpcode" ||
- R->getValueAsBit("isPseudo"))
- continue;
- std::string InstName =
- (R->getValueAsString("Namespace") + "::" + R->getName()).str();
- std::string Case = getInstructionCase(R, Target);
+ // Map to accumulate all the cases.
+ std::map<std::string, std::vector<std::string>> CaseMap;
- CaseMap[Case].push_back(std::move(InstName));
- }
+ // Construct all cases statement for each opcode
+ for (Record *R : Insts) {
+ if (R->getValueAsString("Namespace") == "TargetOpcode" ||
+ R->getValueAsBit("isPseudo"))
+ continue;
+ std::string InstName =
+ (R->getValueAsString("Namespace") + "::" + R->getName()).str();
+ std::string Case = getInstructionCase(R, Target);
- // Emit initial function code
- if (UseAPInt) {
- int NumWords = APInt::getNumWords(BitWidth);
- int NumBytes = (BitWidth + 7) / 8;
- o << " const unsigned opcode = MI.getOpcode();\n"
- << " if (Inst.getBitWidth() != " << BitWidth << ")\n"
- << " Inst = Inst.zext(" << BitWidth << ");\n"
- << " if (Scratch.getBitWidth() != " << BitWidth << ")\n"
- << " Scratch = Scratch.zext(" << BitWidth << ");\n"
- << " LoadIntFromMemory(Inst, (const uint8_t *)&InstBits[opcode * "
- << NumWords << "], " << NumBytes << ");\n"
- << " APInt &Value = Inst;\n"
- << " APInt &op = Scratch;\n"
- << " switch (opcode) {\n";
- } else {
- o << " const unsigned opcode = MI.getOpcode();\n"
- << " uint64_t Value = InstBits[opcode];\n"
- << " uint64_t op = 0;\n"
- << " (void)op; // suppress warning\n"
- << " switch (opcode) {\n";
- }
+ CaseMap[Case].push_back(std::move(InstName));
+ }
+
+ // Emit initial function code
+ if (UseAPInt) {
+ int NumWords = APInt::getNumWords(BitWidth);
+ o << " const unsigned opcode = MI.getOpcode();\n"
+ << " if (Scratch.getBitWidth() != " << BitWidth << ")\n"
+ << " Scratch = Scratch.zext(" << BitWidth << ");\n"
+ << " Inst = APInt(" << BitWidth
+ << ", makeArrayRef(InstBits + opcode * " << NumWords << ", " << NumWords
+ << "));\n"
+ << " APInt &Value = Inst;\n"
+ << " APInt &op = Scratch;\n"
+ << " switch (opcode) {\n";
+ } else {
+ o << " const unsigned opcode = MI.getOpcode();\n"
+ << " uint64_t Value = InstBits[opcode];\n"
+ << " uint64_t op = 0;\n"
+ << " (void)op; // suppress warning\n"
+ << " switch (opcode) {\n";
+ }
- // Emit each case statement
- std::map<std::string, std::vector<std::string>>::iterator IE, EE;
- for (IE = CaseMap.begin(), EE = CaseMap.end(); IE != EE; ++IE) {
- const std::string &Case = IE->first;
- std::vector<std::string> &InstList = IE->second;
+ // Emit each case statement
+ std::map<std::string, std::vector<std::string>>::iterator IE, EE;
+ for (IE = CaseMap.begin(), EE = CaseMap.end(); IE != EE; ++IE) {
+ const std::string &Case = IE->first;
+ std::vector<std::string> &InstList = IE->second;
- for (int i = 0, N = InstList.size(); i < N; i++) {
- if (i) o << "\n";
- o << " case " << InstList[i] << ":";
+ for (int i = 0, N = InstList.size(); i < N; i++) {
+ if (i)
+ o << "\n";
+ o << " case " << InstList[i] << ":";
+ }
+ o << " {\n";
+ o << Case;
+ o << " break;\n"
+ << " }\n";
}
- o << " {\n";
- o << Case;
- o << " break;\n"
- << " }\n";
- }
- // Default case: unhandled opcode
- o << " default:\n"
- << " std::string msg;\n"
- << " raw_string_ostream Msg(msg);\n"
- << " Msg << \"Not supported instr: \" << MI;\n"
- << " report_fatal_error(msg.c_str());\n"
- << " }\n";
- if (UseAPInt)
- o << " Inst = Value;\n";
- else
- o << " return Value;\n";
- o << "}\n\n";
+ // Default case: unhandled opcode
+ o << " default:\n"
+ << " std::string msg;\n"
+ << " raw_string_ostream Msg(msg);\n"
+ << " Msg << \"Not supported instr: \" << MI;\n"
+ << " report_fatal_error(Msg.str().c_str());\n"
+ << " }\n";
+ if (UseAPInt)
+ o << " Inst = Value;\n";
+ else
+ o << " return Value;\n";
+ o << "}\n\n";
+ }
const auto &All = SubtargetFeatureInfo::getAll(Records);
std::map<Record *, SubtargetFeatureInfo, LessRecordByID> SubtargetFeatures;
diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
index a1f8f4809d5f..9d6adb6d2c37 100644
--- a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "CodeGenDAGPatterns.h"
+#include "CodeGenInstruction.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
@@ -2815,6 +2816,7 @@ void TreePattern::ComputeNamedNodes(TreePatternNode *N) {
TreePatternNodePtr TreePattern::ParseTreePattern(Init *TheInit,
StringRef OpName) {
+ RecordKeeper &RK = TheInit->getRecordKeeper();
if (DefInit *DI = dyn_cast<DefInit>(TheInit)) {
Record *R = DI->getDef();
@@ -2853,13 +2855,13 @@ TreePatternNodePtr TreePattern::ParseTreePattern(Init *TheInit,
if (!OpName.empty())
error("Constant int or bit argument should not have a name!");
if (isa<BitInit>(TheInit))
- TheInit = TheInit->convertInitializerTo(IntRecTy::get());
+ TheInit = TheInit->convertInitializerTo(IntRecTy::get(RK));
return std::make_shared<TreePatternNode>(TheInit, 1);
}
if (BitsInit *BI = dyn_cast<BitsInit>(TheInit)) {
// Turn this into an IntInit.
- Init *II = BI->convertInitializerTo(IntRecTy::get());
+ Init *II = BI->convertInitializerTo(IntRecTy::get(RK));
if (!II || !isa<IntInit>(II))
error("Bits value must be constants!");
return ParseTreePattern(II, OpName);
@@ -2958,8 +2960,8 @@ TreePatternNodePtr TreePattern::ParseTreePattern(Init *TheInit,
else // Otherwise, no chain.
Operator = getDAGPatterns().get_intrinsic_wo_chain_sdnode();
- Children.insert(Children.begin(),
- std::make_shared<TreePatternNode>(IntInit::get(IID), 1));
+ Children.insert(Children.begin(), std::make_shared<TreePatternNode>(
+ IntInit::get(RK, IID), 1));
}
if (Operator->isSubClassOf("ComplexPattern")) {
@@ -4366,7 +4368,7 @@ void CodeGenDAGPatterns::ExpandHwModeBasedTypes() {
PatternsToMatch.emplace_back(P.getSrcRecord(), P.getPredicates(),
std::move(NewSrc), std::move(NewDst),
P.getDstRegs(), P.getAddedComplexity(),
- Record::getNewUID(), Mode, Check);
+ Record::getNewUID(Records), Mode, Check);
};
for (PatternToMatch &P : Copy) {
@@ -4742,7 +4744,7 @@ void CodeGenDAGPatterns::GenerateVariants() {
PatternsToMatch[i].getSrcRecord(), PatternsToMatch[i].getPredicates(),
Variant, PatternsToMatch[i].getDstPatternShared(),
PatternsToMatch[i].getDstRegs(),
- PatternsToMatch[i].getAddedComplexity(), Record::getNewUID(),
+ PatternsToMatch[i].getAddedComplexity(), Record::getNewUID(Records),
PatternsToMatch[i].getForceMode(),
PatternsToMatch[i].getHwModeFeatures());
}
diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.h b/llvm/utils/TableGen/CodeGenDAGPatterns.h
index 39d81230a4f2..94694a96eb90 100644
--- a/llvm/utils/TableGen/CodeGenDAGPatterns.h
+++ b/llvm/utils/TableGen/CodeGenDAGPatterns.h
@@ -28,7 +28,6 @@
#include <functional>
#include <map>
#include <numeric>
-#include <set>
#include <vector>
namespace llvm {
diff --git a/llvm/utils/TableGen/CodeGenInstruction.cpp b/llvm/utils/TableGen/CodeGenInstruction.cpp
index 78b698c31b2b..ba12633ace8c 100644
--- a/llvm/utils/TableGen/CodeGenInstruction.cpp
+++ b/llvm/utils/TableGen/CodeGenInstruction.cpp
@@ -12,7 +12,6 @@
#include "CodeGenInstruction.h"
#include "CodeGenTarget.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/TableGen/Error.h"
@@ -416,6 +415,7 @@ CodeGenInstruction::CodeGenInstruction(Record *R)
hasExtraDefRegAllocReq = R->getValueAsBit("hasExtraDefRegAllocReq");
isCodeGenOnly = R->getValueAsBit("isCodeGenOnly");
isPseudo = R->getValueAsBit("isPseudo");
+ isMeta = R->getValueAsBit("isMeta");
ImplicitDefs = R->getValueAsListOfDefs("Defs");
ImplicitUses = R->getValueAsListOfDefs("Uses");
@@ -632,8 +632,8 @@ bool CodeGenInstAlias::tryAliasOpMatch(DagInit *Result, unsigned AliasOpNo,
if (!BI->isComplete())
return false;
// Convert the bits init to an integer and use that for the result.
- IntInit *II =
- dyn_cast_or_null<IntInit>(BI->convertInitializerTo(IntRecTy::get()));
+ IntInit *II = dyn_cast_or_null<IntInit>(
+ BI->convertInitializerTo(IntRecTy::get(BI->getRecordKeeper())));
if (!II)
return false;
ResOp = ResultOperand(II->getValue());
diff --git a/llvm/utils/TableGen/CodeGenInstruction.h b/llvm/utils/TableGen/CodeGenInstruction.h
index e0ce5d433602..d3de6d95780c 100644
--- a/llvm/utils/TableGen/CodeGenInstruction.h
+++ b/llvm/utils/TableGen/CodeGenInstruction.h
@@ -16,13 +16,13 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/MachineValueType.h"
-#include "llvm/Support/SMLoc.h"
#include <cassert>
#include <string>
#include <utility>
#include <vector>
namespace llvm {
+class SMLoc;
template <typename T> class ArrayRef;
class Record;
class DagInit;
@@ -271,6 +271,7 @@ template <typename T> class ArrayRef;
bool hasExtraDefRegAllocReq : 1;
bool isCodeGenOnly : 1;
bool isPseudo : 1;
+ bool isMeta : 1;
bool isRegSequence : 1;
bool isExtractSubreg : 1;
bool isInsertSubreg : 1;
diff --git a/llvm/utils/TableGen/CodeGenIntrinsics.h b/llvm/utils/TableGen/CodeGenIntrinsics.h
index b005a5866f80..599795e3c065 100644
--- a/llvm/utils/TableGen/CodeGenIntrinsics.h
+++ b/llvm/utils/TableGen/CodeGenIntrinsics.h
@@ -26,7 +26,7 @@ struct CodeGenIntrinsic {
Record *TheDef; // The actual record defining this intrinsic.
std::string Name; // The name of the LLVM function "llvm.bswap.i32"
std::string EnumName; // The name of the enum "bswap_i32"
- std::string GCCBuiltinName; // Name of the corresponding GCC builtin, or "".
+ std::string ClangBuiltinName; // Name of the corresponding GCC builtin, or "".
std::string MSBuiltinName; // Name of the corresponding MS builtin, or "".
std::string TargetPrefix; // Target prefix, e.g. "ppc" for t-s intrinsics.
@@ -125,6 +125,9 @@ struct CodeGenIntrinsic {
/// True if the intrinsic is no-return.
bool isNoReturn;
+ /// True if the intrinsic is no-callback.
+ bool isNoCallback;
+
/// True if the intrinsic is no-sync.
bool isNoSync;
diff --git a/llvm/utils/TableGen/CodeGenMapTable.cpp b/llvm/utils/TableGen/CodeGenMapTable.cpp
index 38871eb8cf3c..02695942f5c1 100644
--- a/llvm/utils/TableGen/CodeGenMapTable.cpp
+++ b/llvm/utils/TableGen/CodeGenMapTable.cpp
@@ -75,8 +75,8 @@
//
//===----------------------------------------------------------------------===//
+#include "CodeGenInstruction.h"
#include "CodeGenTarget.h"
-#include "llvm/Support/Format.h"
#include "llvm/TableGen/Error.h"
using namespace llvm;
typedef std::map<std::string, std::vector<Record*> > InstrRelMapTy;
diff --git a/llvm/utils/TableGen/CodeGenRegisters.cpp b/llvm/utils/TableGen/CodeGenRegisters.cpp
index afaeb73ffab1..2c61be713afc 100644
--- a/llvm/utils/TableGen/CodeGenRegisters.cpp
+++ b/llvm/utils/TableGen/CodeGenRegisters.cpp
@@ -12,21 +12,18 @@
//===----------------------------------------------------------------------===//
#include "CodeGenRegisters.h"
-#include "CodeGenTarget.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IntEqClasses.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
@@ -204,12 +201,16 @@ namespace {
class RegUnitIterator {
CodeGenRegister::Vec::const_iterator RegI, RegE;
CodeGenRegister::RegUnitList::iterator UnitI, UnitE;
+ static CodeGenRegister::RegUnitList Sentinel;
public:
RegUnitIterator(const CodeGenRegister::Vec &Regs):
RegI(Regs.begin()), RegE(Regs.end()) {
- if (RegI != RegE) {
+ if (RegI == RegE) {
+ UnitI = Sentinel.end();
+ UnitE = Sentinel.end();
+ } else {
UnitI = (*RegI)->getRegUnits().begin();
UnitE = (*RegI)->getRegUnits().end();
advance();
@@ -240,6 +241,8 @@ protected:
}
};
+CodeGenRegister::RegUnitList RegUnitIterator::Sentinel;
+
} // end anonymous namespace
// Return true of this unit appears in RegUnits.
@@ -635,6 +638,7 @@ struct TupleExpander : SetTheory::Expander {
Def->getValueAsListOfStrings("RegAsmNames");
// Zip them up.
+ RecordKeeper &RK = Def->getRecords();
for (unsigned n = 0; n != Length; ++n) {
std::string Name;
Record *Proto = Lists[0][n];
@@ -651,13 +655,13 @@ struct TupleExpander : SetTheory::Expander {
SmallVector<Init *, 2> CostPerUse;
CostPerUse.insert(CostPerUse.end(), CostList->begin(), CostList->end());
- StringInit *AsmName = StringInit::get("");
+ StringInit *AsmName = StringInit::get(RK, "");
if (!RegNames.empty()) {
if (RegNames.size() <= n)
PrintFatalError(Def->getLoc(),
"Register tuple definition missing name for '" +
Name + "'.");
- AsmName = StringInit::get(RegNames[n]);
+ AsmName = StringInit::get(RK, RegNames[n]);
}
// Create a new Record representing the synthesized register. This record
@@ -696,7 +700,7 @@ struct TupleExpander : SetTheory::Expander {
// Composite registers are always covered by sub-registers.
if (Field == "CoveredBySubRegs")
- RV.setValue(BitInit::get(true));
+ RV.setValue(BitInit::get(RK, true));
// Copy fields from the RegisterTuples def.
if (Field == "SubRegIndices" ||
@@ -1106,6 +1110,17 @@ void CodeGenRegisterClass::buildRegUnitSet(const CodeGenRegBank &RegBank,
}
//===----------------------------------------------------------------------===//
+// CodeGenRegisterCategory
+//===----------------------------------------------------------------------===//
+
+CodeGenRegisterCategory::CodeGenRegisterCategory(CodeGenRegBank &RegBank,
+ Record *R)
+ : TheDef(R), Name(std::string(R->getName())) {
+ for (Record *RegClass : R->getValueAsListOfDefs("Classes"))
+ Classes.push_back(RegBank.getRegClass(RegClass));
+}
+
+//===----------------------------------------------------------------------===//
// CodeGenRegBank
//===----------------------------------------------------------------------===//
@@ -1222,6 +1237,12 @@ CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records,
for (auto &RC : RegClasses)
RC.EnumValue = i++;
CodeGenRegisterClass::computeSubClasses(*this);
+
+ // Read in the register category definitions.
+ std::vector<Record *> RCats =
+ Records.getAllDerivedDefinitions("RegisterCategory");
+ for (auto *R : RCats)
+ RegCategories.emplace_back(*this, R);
}
// Create a synthetic CodeGenSubRegIndex without a corresponding Record.
@@ -1794,6 +1815,7 @@ void CodeGenRegBank::computeRegUnitWeights() {
unsigned NumIters = 0;
for (bool Changed = true; Changed; ++NumIters) {
assert(NumIters <= NumNativeRegUnits && "Runaway register unit weights");
+ (void) NumIters;
Changed = false;
for (auto &Reg : Registers) {
CodeGenRegister::RegUnitList NormalUnits;
diff --git a/llvm/utils/TableGen/CodeGenRegisters.h b/llvm/utils/TableGen/CodeGenRegisters.h
index c9fcf83b0a8a..0fc8b3ef80dd 100644
--- a/llvm/utils/TableGen/CodeGenRegisters.h
+++ b/llvm/utils/TableGen/CodeGenRegisters.h
@@ -27,7 +27,6 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/SetTheory.h"
#include <cassert>
@@ -476,6 +475,26 @@ namespace llvm {
static void computeSubClasses(CodeGenRegBank&);
};
+ // Register categories are used when we need to deterine the category a
+ // register falls into (GPR, vector, fixed, etc.) without having to know
+ // specific information about the target architecture.
+ class CodeGenRegisterCategory {
+ Record *TheDef;
+ std::string Name;
+ std::list<CodeGenRegisterClass *> Classes;
+
+ public:
+ CodeGenRegisterCategory(CodeGenRegBank &, Record *R);
+ CodeGenRegisterCategory(CodeGenRegisterCategory &) = delete;
+
+ // Return the Record that defined this class, or NULL if the class was
+ // created by TableGen.
+ Record *getDef() const { return TheDef; }
+
+ std::string getName() const { return Name; }
+ std::list<CodeGenRegisterClass *> getClasses() const { return Classes; }
+ };
+
// Register units are used to model interference and register pressure.
// Every register is assigned one or more register units such that two
// registers overlap if and only if they have a register unit in common.
@@ -559,6 +578,13 @@ namespace llvm {
typedef std::map<CodeGenRegisterClass::Key, CodeGenRegisterClass*> RCKeyMap;
RCKeyMap Key2RC;
+ // Register categories.
+ std::list<CodeGenRegisterCategory> RegCategories;
+ DenseMap<Record *, CodeGenRegisterCategory *> Def2RCat;
+ using RCatKeyMap =
+ std::map<CodeGenRegisterClass::Key, CodeGenRegisterCategory *>;
+ RCatKeyMap Key2RCat;
+
// Remember each unique set of register units. Initially, this contains a
// unique set for each register class. Simliar sets are coalesced with
// pruneUnitSets and new supersets are inferred during computeRegUnitSets.
@@ -719,6 +745,14 @@ namespace llvm {
return RegClasses;
}
+ std::list<CodeGenRegisterCategory> &getRegCategories() {
+ return RegCategories;
+ }
+
+ const std::list<CodeGenRegisterCategory> &getRegCategories() const {
+ return RegCategories;
+ }
+
// Find a register class from its def.
CodeGenRegisterClass *getRegClass(const Record *) const;
diff --git a/llvm/utils/TableGen/CodeGenSchedule.cpp b/llvm/utils/TableGen/CodeGenSchedule.cpp
index e47bda725a17..4933bfc476f4 100644
--- a/llvm/utils/TableGen/CodeGenSchedule.cpp
+++ b/llvm/utils/TableGen/CodeGenSchedule.cpp
@@ -17,7 +17,6 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
diff --git a/llvm/utils/TableGen/CodeGenSchedule.h b/llvm/utils/TableGen/CodeGenSchedule.h
index a331a30b51a8..f7e35b0c808f 100644
--- a/llvm/utils/TableGen/CodeGenSchedule.h
+++ b/llvm/utils/TableGen/CodeGenSchedule.h
@@ -17,11 +17,8 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/SetTheory.h"
-#include <map>
namespace llvm {
diff --git a/llvm/utils/TableGen/CodeGenTarget.cpp b/llvm/utils/TableGen/CodeGenTarget.cpp
index 2c1583f7979d..af2e8576af2e 100644
--- a/llvm/utils/TableGen/CodeGenTarget.cpp
+++ b/llvm/utils/TableGen/CodeGenTarget.cpp
@@ -14,16 +14,13 @@
//===----------------------------------------------------------------------===//
#include "CodeGenTarget.h"
-#include "CodeGenDAGPatterns.h"
+#include "CodeGenInstruction.h"
#include "CodeGenIntrinsics.h"
#include "CodeGenSchedule.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Timer.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
-#include "llvm/TableGen/TableGenBackend.h"
#include <algorithm>
using namespace llvm;
@@ -56,9 +53,12 @@ StringRef llvm::getName(MVT::SimpleValueType T) {
}
StringRef llvm::getEnumName(MVT::SimpleValueType T) {
+ // clang-format off
switch (T) {
case MVT::Other: return "MVT::Other";
case MVT::i1: return "MVT::i1";
+ case MVT::i2: return "MVT::i2";
+ case MVT::i4: return "MVT::i4";
case MVT::i8: return "MVT::i8";
case MVT::i16: return "MVT::i16";
case MVT::i32: return "MVT::i32";
@@ -91,6 +91,8 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) {
case MVT::v256i1: return "MVT::v256i1";
case MVT::v512i1: return "MVT::v512i1";
case MVT::v1024i1: return "MVT::v1024i1";
+ case MVT::v128i2: return "MVT::v128i2";
+ case MVT::v64i4: return "MVT::v64i4";
case MVT::v1i8: return "MVT::v1i8";
case MVT::v2i8: return "MVT::v2i8";
case MVT::v4i8: return "MVT::v4i8";
@@ -227,6 +229,8 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) {
case MVT::nxv2bf16: return "MVT::nxv2bf16";
case MVT::nxv4bf16: return "MVT::nxv4bf16";
case MVT::nxv8bf16: return "MVT::nxv8bf16";
+ case MVT::nxv16bf16: return "MVT::nxv16bf16";
+ case MVT::nxv32bf16: return "MVT::nxv32bf16";
case MVT::nxv1f32: return "MVT::nxv1f32";
case MVT::nxv2f32: return "MVT::nxv2f32";
case MVT::nxv4f32: return "MVT::nxv4f32";
@@ -245,6 +249,7 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) {
case MVT::externref: return "MVT::externref";
default: llvm_unreachable("ILLEGAL VALUE TYPE!");
}
+ // clang-format on
}
/// getQualifiedName - Return the name of the specified record, with a
@@ -471,7 +476,7 @@ GetInstByName(const char *Name,
return I->second.get();
}
-static const char *const FixedInstrs[] = {
+static const char *FixedInstrs[] = {
#define HANDLE_TARGET_OPCODE(OPC) #OPC,
#include "llvm/Support/TargetOpcodes.def"
nullptr};
@@ -555,7 +560,7 @@ void CodeGenTarget::reverseBitsForLittleEndianEncoding() {
NewBits[middle] = BI->getBit(middle);
}
- BitsInit *NewBI = BitsInit::get(NewBits);
+ BitsInit *NewBI = BitsInit::get(Records, NewBits);
// Update the bits in reversed order so that emitInstrOpBits will get the
// correct endianness.
@@ -666,6 +671,7 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R,
isCommutative = false;
canThrow = false;
isNoReturn = false;
+ isNoCallback = false;
isNoSync = false;
isNoFree = false;
isWillReturn = false;
@@ -682,8 +688,8 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R,
EnumName = DefName.substr(4);
- if (R->getValue("GCCBuiltinName")) // Ignore a missing GCCBuiltinName field.
- GCCBuiltinName = std::string(R->getValueAsString("GCCBuiltinName"));
+ if (R->getValue("ClangBuiltinName")) // Ignore a missing ClangBuiltinName field.
+ ClangBuiltinName = std::string(R->getValueAsString("ClangBuiltinName"));
if (R->getValue("MSBuiltinName")) // Ignore a missing MSBuiltinName field.
MSBuiltinName = std::string(R->getValueAsString("MSBuiltinName"));
@@ -864,6 +870,8 @@ void CodeGenIntrinsic::setProperty(Record *R) {
isConvergent = true;
else if (R->getName() == "IntrNoReturn")
isNoReturn = true;
+ else if (R->getName() == "IntrNoCallback")
+ isNoCallback = true;
else if (R->getName() == "IntrNoSync")
isNoSync = true;
else if (R->getName() == "IntrNoFree")
diff --git a/llvm/utils/TableGen/CodeGenTarget.h b/llvm/utils/TableGen/CodeGenTarget.h
index 5bd84c873f2f..f14828f2c347 100644
--- a/llvm/utils/TableGen/CodeGenTarget.h
+++ b/llvm/utils/TableGen/CodeGenTarget.h
@@ -17,16 +17,15 @@
#define LLVM_UTILS_TABLEGEN_CODEGENTARGET_H
#include "CodeGenHwModes.h"
-#include "CodeGenInstruction.h"
#include "CodeGenRegisters.h"
#include "InfoByHwMode.h"
#include "SDNodeProperties.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/TableGen/Record.h"
-#include <algorithm>
namespace llvm {
+class RecordKeeper;
+class Record;
+class CodeGenInstruction;
struct CodeGenRegister;
class CodeGenSchedModels;
class CodeGenTarget;
diff --git a/llvm/utils/TableGen/DAGISelEmitter.cpp b/llvm/utils/TableGen/DAGISelEmitter.cpp
index 2f211e2958fa..d012a0172a8f 100644
--- a/llvm/utils/TableGen/DAGISelEmitter.cpp
+++ b/llvm/utils/TableGen/DAGISelEmitter.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "CodeGenDAGPatterns.h"
+#include "CodeGenInstruction.h"
#include "DAGISelMatcher.h"
#include "llvm/Support/Debug.h"
#include "llvm/TableGen/Record.h"
diff --git a/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp b/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp
index 5b0d16a8f3c8..777e75dcd929 100644
--- a/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp
+++ b/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp
@@ -13,9 +13,7 @@
#include "CodeGenDAGPatterns.h"
#include "DAGISelMatcher.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/Support/CommandLine.h"
diff --git a/llvm/utils/TableGen/DAGISelMatcherGen.cpp b/llvm/utils/TableGen/DAGISelMatcherGen.cpp
index 2361ed8a7a95..44bff4c67ab3 100644
--- a/llvm/utils/TableGen/DAGISelMatcherGen.cpp
+++ b/llvm/utils/TableGen/DAGISelMatcherGen.cpp
@@ -6,9 +6,10 @@
//
//===----------------------------------------------------------------------===//
-#include "DAGISelMatcher.h"
#include "CodeGenDAGPatterns.h"
+#include "CodeGenInstruction.h"
#include "CodeGenRegisters.h"
+#include "DAGISelMatcher.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/TableGen/Error.h"
diff --git a/llvm/utils/TableGen/DFAEmitter.cpp b/llvm/utils/TableGen/DFAEmitter.cpp
index 27161d261e85..f2d9165c5c8c 100644
--- a/llvm/utils/TableGen/DFAEmitter.cpp
+++ b/llvm/utils/TableGen/DFAEmitter.cpp
@@ -21,7 +21,6 @@
//===----------------------------------------------------------------------===//
#include "DFAEmitter.h"
-#include "CodeGenTarget.h"
#include "SequenceToOffsetTable.h"
#include "TableGenBackends.h"
#include "llvm/ADT/SmallVector.h"
@@ -30,9 +29,9 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TableGen/Record.h"
-#include "llvm/TableGen/TableGenBackend.h"
#include <cassert>
#include <cstdint>
+#include <deque>
#include <map>
#include <set>
#include <string>
@@ -306,6 +305,7 @@ void Automaton::emit(raw_ostream &OS) {
}
LLVM_DEBUG(dbgs() << " NFA automaton has " << SeenStates.size()
<< " states with " << NumTransitions << " transitions.\n");
+ (void) NumTransitions;
const auto &ActionTypes = Transitions.back().getTypes();
OS << "// The type of an action in the " << Name << " automaton.\n";
diff --git a/llvm/utils/TableGen/DFAPacketizerEmitter.cpp b/llvm/utils/TableGen/DFAPacketizerEmitter.cpp
index 9cbdbc19c206..6704d747f715 100644
--- a/llvm/utils/TableGen/DFAPacketizerEmitter.cpp
+++ b/llvm/utils/TableGen/DFAPacketizerEmitter.cpp
@@ -17,9 +17,7 @@
#include "CodeGenSchedule.h"
#include "CodeGenTarget.h"
#include "DFAEmitter.h"
-#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TableGen/Record.h"
diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp
new file mode 100644
index 000000000000..fd58e798b445
--- /dev/null
+++ b/llvm/utils/TableGen/DXILEmitter.cpp
@@ -0,0 +1,374 @@
+//===- DXILEmitter.cpp - DXIL operation Emitter ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// DXILEmitter uses the descriptions of DXIL operation to construct enum and
+// helper functions for DXIL operation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SequenceToOffsetTable.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
+
+using namespace llvm;
+
+namespace {
+
+struct DXILShaderModel {
+ int Major;
+ int Minor;
+};
+struct DXILParam {
+ int Pos; // position in parameter list
+ StringRef Type; // llvm type name, $o for overload, $r for resource
+ // type, $cb for legacy cbuffer, $u4 for u4 struct
+ StringRef Name; // short, unique name
+ StringRef Doc; // the documentation description of this parameter
+ bool IsConst; // whether this argument requires a constant value in the IR
+ StringRef EnumName; // the name of the enum type if applicable
+ int MaxValue; // the maximum value for this parameter if applicable
+ DXILParam(const Record *R) {
+ Name = R->getValueAsString("name");
+ Pos = R->getValueAsInt("pos");
+ Type = R->getValueAsString("llvm_type");
+ if (R->getValue("doc"))
+ Doc = R->getValueAsString("doc");
+ IsConst = R->getValueAsBit("is_const");
+ EnumName = R->getValueAsString("enum_name");
+ MaxValue = R->getValueAsInt("max_value");
+ }
+};
+
+struct DXILOperationData {
+ StringRef Name; // short, unique name
+
+ StringRef DXILOp; // name of DXIL operation
+ int DXILOpID; // ID of DXIL operation
+ StringRef DXILClass; // name of the opcode class
+ StringRef Category; // classification for this instruction
+ StringRef Doc; // the documentation description of this instruction
+
+ SmallVector<DXILParam> Params; // the operands that this instruction takes
+ StringRef OverloadTypes; // overload types if applicable
+ StringRef FnAttr; // attribute shorthands: rn=does not access
+ // memory,ro=only reads from memory
+ StringRef Intrinsic; // The llvm intrinsic map to DXILOp. Default is "" which
+ // means no map exist
+ bool IsDeriv; // whether this is some kind of derivative
+ bool IsGradient; // whether this requires a gradient calculation
+ bool IsFeedback; // whether this is a sampler feedback op
+ bool IsWave; // whether this requires in-wave, cross-lane functionality
+ bool RequiresUniformInputs; // whether this operation requires that all
+ // of its inputs are uniform across the wave
+ SmallVector<StringRef, 4>
+ ShaderStages; // shader stages to which this applies, empty for all.
+ DXILShaderModel ShaderModel; // minimum shader model required
+ DXILShaderModel ShaderModelTranslated; // minimum shader model required with
+ // translation by linker
+ SmallVector<StringRef, 4> counters; // counters for this inst.
+ DXILOperationData(const Record *R) {
+ Name = R->getValueAsString("name");
+ DXILOp = R->getValueAsString("dxil_op");
+ DXILOpID = R->getValueAsInt("dxil_opid");
+ DXILClass = R->getValueAsDef("op_class")->getValueAsString("name");
+ Category = R->getValueAsDef("category")->getValueAsString("name");
+
+ if (R->getValue("llvm_intrinsic")) {
+ auto *IntrinsicDef = R->getValueAsDef("llvm_intrinsic");
+ auto DefName = IntrinsicDef->getName();
+ assert(DefName.startswith("int_") && "invalid intrinsic name");
+ // Remove the int_ from intrinsic name.
+ Intrinsic = DefName.substr(4);
+ }
+
+ Doc = R->getValueAsString("doc");
+
+ ListInit *ParamList = R->getValueAsListInit("ops");
+ for (unsigned i = 0; i < ParamList->size(); ++i) {
+ Record *Param = ParamList->getElementAsRecord(i);
+ Params.emplace_back(DXILParam(Param));
+ }
+ OverloadTypes = R->getValueAsString("oload_types");
+ FnAttr = R->getValueAsString("fn_attr");
+ }
+};
+} // end anonymous namespace
+
+static void emitDXILOpEnum(DXILOperationData &DXILOp, raw_ostream &OS) {
+ // Name = ID, // Doc
+ OS << DXILOp.Name << " = " << DXILOp.DXILOpID << ", // " << DXILOp.Doc
+ << "\n";
+}
+
+static std::string buildCategoryStr(StringSet<> &Cetegorys) {
+ std::string Str;
+ raw_string_ostream OS(Str);
+ for (auto &It : Cetegorys) {
+ OS << " " << It.getKey();
+ }
+ return OS.str();
+}
+
+// Emit enum declaration for DXIL.
+static void emitDXILEnums(std::vector<DXILOperationData> &DXILOps,
+ raw_ostream &OS) {
+ // Sort by Category + OpName.
+ std::sort(DXILOps.begin(), DXILOps.end(),
+ [](DXILOperationData &A, DXILOperationData &B) {
+ // Group by Category first.
+ if (A.Category == B.Category)
+ // Inside same Category, order by OpName.
+ return A.DXILOp < B.DXILOp;
+ else
+ return A.Category < B.Category;
+ });
+
+ OS << "// Enumeration for operations specified by DXIL\n";
+ OS << "enum class OpCode : unsigned {\n";
+
+ StringMap<StringSet<>> ClassMap;
+ StringRef PrevCategory = "";
+ for (auto &DXILOp : DXILOps) {
+ StringRef Category = DXILOp.Category;
+ if (Category != PrevCategory) {
+ OS << "\n// " << Category << "\n";
+ PrevCategory = Category;
+ }
+ emitDXILOpEnum(DXILOp, OS);
+ auto It = ClassMap.find(DXILOp.DXILClass);
+ if (It != ClassMap.end()) {
+ It->second.insert(DXILOp.Category);
+ } else {
+ ClassMap[DXILOp.DXILClass].insert(DXILOp.Category);
+ }
+ }
+
+ OS << "\n};\n\n";
+
+ std::vector<std::pair<std::string, std::string>> ClassVec;
+ for (auto &It : ClassMap) {
+ ClassVec.emplace_back(
+ std::make_pair(It.getKey().str(), buildCategoryStr(It.second)));
+ }
+ // Sort by Category + ClassName.
+ std::sort(ClassVec.begin(), ClassVec.end(),
+ [](std::pair<std::string, std::string> &A,
+ std::pair<std::string, std::string> &B) {
+ StringRef ClassA = A.first;
+ StringRef CategoryA = A.second;
+ StringRef ClassB = B.first;
+ StringRef CategoryB = B.second;
+ // Group by Category first.
+ if (CategoryA == CategoryB)
+ // Inside same Category, order by ClassName.
+ return ClassA < ClassB;
+ else
+ return CategoryA < CategoryB;
+ });
+
+ OS << "// Groups for DXIL operations with equivalent function templates\n";
+ OS << "enum class OpCodeClass : unsigned {\n";
+ PrevCategory = "";
+ for (auto &It : ClassVec) {
+
+ StringRef Category = It.second;
+ if (Category != PrevCategory) {
+ OS << "\n// " << Category << "\n";
+ PrevCategory = Category;
+ }
+ StringRef Name = It.first;
+ OS << Name << ",\n";
+ }
+ OS << "\n};\n\n";
+}
+
+// Emit map from llvm intrinsic to DXIL operation.
+static void emitDXILIntrinsicMap(std::vector<DXILOperationData> &DXILOps,
+ raw_ostream &OS) {
+ OS << "\n";
+ // FIXME: use array instead of SmallDenseMap.
+ OS << "static const SmallDenseMap<Intrinsic::ID, DXIL::OpCode> LowerMap = "
+ "{\n";
+ for (auto &DXILOp : DXILOps) {
+ if (DXILOp.Intrinsic.empty())
+ continue;
+ // {Intrinsic::sin, DXIL::OpCode::Sin},
+ OS << " { Intrinsic::" << DXILOp.Intrinsic
+ << ", DXIL::OpCode::" << DXILOp.DXILOp << "},\n";
+ }
+ OS << "};\n";
+ OS << "\n";
+}
+
+static std::string emitDXILOperationFnAttr(StringRef FnAttr) {
+ return StringSwitch<std::string>(FnAttr)
+ .Case("rn", "Attribute::ReadNone")
+ .Case("ro", "Attribute::ReadOnly")
+ .Default("Attribute::None");
+}
+
+static std::string getOverloadKind(StringRef Overload) {
+ return StringSwitch<std::string>(Overload)
+ .Case("half", "OverloadKind::HALF")
+ .Case("float", "OverloadKind::FLOAT")
+ .Case("double", "OverloadKind::DOUBLE")
+ .Case("i1", "OverloadKind::I1")
+ .Case("i16", "OverloadKind::I16")
+ .Case("i32", "OverloadKind::I32")
+ .Case("i64", "OverloadKind::I64")
+ .Case("udt", "OverloadKind::UserDefineType")
+ .Case("obj", "OverloadKind::ObjectType")
+ .Default("OverloadKind::VOID");
+}
+
+static std::string getDXILOperationOverload(StringRef Overloads) {
+ SmallVector<StringRef> OverloadStrs;
+ Overloads.split(OverloadStrs, ';', /*MaxSplit*/ -1, /*KeepEmpty*/ false);
+ // Format is: OverloadKind::FLOAT | OverloadKind::HALF
+ assert(!OverloadStrs.empty() && "Invalid overloads");
+ auto It = OverloadStrs.begin();
+ std::string Result;
+ raw_string_ostream OS(Result);
+ OS << getOverloadKind(*It);
+ for (++It; It != OverloadStrs.end(); ++It) {
+ OS << " | " << getOverloadKind(*It);
+ }
+ return OS.str();
+}
+
+static std::string lowerFirstLetter(StringRef Name) {
+ if (Name.empty())
+ return "";
+
+ std::string LowerName = Name.str();
+ LowerName[0] = llvm::toLower(Name[0]);
+ return LowerName;
+}
+
+static std::string getDXILOpClassName(StringRef DXILOpClass) {
+ // Lower first letter expect for special case.
+ return StringSwitch<std::string>(DXILOpClass)
+ .Case("CBufferLoad", "cbufferLoad")
+ .Case("CBufferLoadLegacy", "cbufferLoadLegacy")
+ .Case("GSInstanceID", "gsInstanceID")
+ .Default(lowerFirstLetter(DXILOpClass));
+}
+
+static void emitDXILOperationTable(std::vector<DXILOperationData> &DXILOps,
+ raw_ostream &OS) {
+ // Sort by DXILOpID.
+ std::sort(DXILOps.begin(), DXILOps.end(),
+ [](DXILOperationData &A, DXILOperationData &B) {
+ return A.DXILOpID < B.DXILOpID;
+ });
+
+ // Collect Names.
+ SequenceToOffsetTable<std::string> OpClassStrings;
+ SequenceToOffsetTable<std::string> OpStrings;
+
+ StringSet<> ClassSet;
+ for (auto &DXILOp : DXILOps) {
+ OpStrings.add(DXILOp.DXILOp.str());
+
+ if (ClassSet.find(DXILOp.DXILClass) != ClassSet.end())
+ continue;
+ ClassSet.insert(DXILOp.DXILClass);
+ OpClassStrings.add(getDXILOpClassName(DXILOp.DXILClass));
+ }
+
+ // Layout names.
+ OpStrings.layout();
+ OpClassStrings.layout();
+
+ // Emit the DXIL operation table.
+ //{DXIL::OpCode::Sin, OpCodeNameIndex, OpCodeClass::Unary,
+ // OpCodeClassNameIndex,
+ // OverloadKind::FLOAT | OverloadKind::HALF, Attribute::AttrKind::ReadNone},
+ OS << "static const OpCodeProperty *getOpCodeProperty(DXIL::OpCode DXILOp) "
+ "{\n";
+
+ OS << " static const OpCodeProperty OpCodeProps[] = {\n";
+ for (auto &DXILOp : DXILOps) {
+ OS << " { DXIL::OpCode::" << DXILOp.DXILOp << ", "
+ << OpStrings.get(DXILOp.DXILOp.str())
+ << ", OpCodeClass::" << DXILOp.DXILClass << ", "
+ << OpClassStrings.get(getDXILOpClassName(DXILOp.DXILClass)) << ", "
+ << getDXILOperationOverload(DXILOp.OverloadTypes) << ", "
+ << emitDXILOperationFnAttr(DXILOp.FnAttr) << " },\n";
+ }
+ OS << " };\n";
+
+ OS << " // FIXME: change search to indexing with\n";
+ OS << " // DXILOp once all DXIL op is added.\n";
+ OS << " OpCodeProperty TmpProp;\n";
+ OS << " TmpProp.OpCode = DXILOp;\n";
+ OS << " const OpCodeProperty *Prop =\n";
+ OS << " llvm::lower_bound(OpCodeProps, TmpProp,\n";
+ OS << " [](const OpCodeProperty &A, const "
+ "OpCodeProperty &B) {\n";
+ OS << " return A.OpCode < B.OpCode;\n";
+ OS << " });\n";
+ OS << " assert(Prop && \"fail to find OpCodeProperty\");\n";
+ OS << " return Prop;\n";
+ OS << "}\n\n";
+
+ // Emit the string tables.
+ OS << "static const char *getOpCodeName(DXIL::OpCode DXILOp) {\n\n";
+
+ OpStrings.emitStringLiteralDef(OS,
+ " static const char DXILOpCodeNameTable[]");
+
+ OS << " auto *Prop = getOpCodeProperty(DXILOp);\n";
+ OS << " unsigned Index = Prop->OpCodeNameOffset;\n";
+ OS << " return DXILOpCodeNameTable + Index;\n";
+ OS << "}\n\n";
+
+ OS << "static const char *getOpCodeClassName(const OpCodeProperty &Prop) "
+ "{\n\n";
+
+ OpClassStrings.emitStringLiteralDef(
+ OS, " static const char DXILOpCodeClassNameTable[]");
+
+ OS << " unsigned Index = Prop.OpCodeClassNameOffset;\n";
+ OS << " return DXILOpCodeClassNameTable + Index;\n";
+ OS << "}\n ";
+}
+
+namespace llvm {
+
+void EmitDXILOperation(RecordKeeper &Records, raw_ostream &OS) {
+ std::vector<Record *> Ops = Records.getAllDerivedDefinitions("dxil_op");
+ OS << "// Generated code, do not edit.\n";
+ OS << "\n";
+
+ std::vector<DXILOperationData> DXILOps;
+ DXILOps.reserve(Ops.size());
+ for (auto *Record : Ops) {
+ DXILOps.emplace_back(DXILOperationData(Record));
+ }
+
+ OS << "#ifdef DXIL_OP_ENUM\n";
+ emitDXILEnums(DXILOps, OS);
+ OS << "#endif\n\n";
+
+ OS << "#ifdef DXIL_OP_INTRINSIC_MAP\n";
+ emitDXILIntrinsicMap(DXILOps, OS);
+ OS << "#endif\n\n";
+
+ OS << "#ifdef DXIL_OP_OPERATION_TABLE\n";
+ emitDXILOperationTable(DXILOps, OS);
+ OS << "#endif\n\n";
+
+ OS << "\n";
+}
+
+} // namespace llvm
diff --git a/llvm/utils/TableGen/FixedLenDecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp
index c5dd1e626696..8477e0639f90 100644
--- a/llvm/utils/TableGen/FixedLenDecoderEmitter.cpp
+++ b/llvm/utils/TableGen/DecoderEmitter.cpp
@@ -1,4 +1,4 @@
-//===------------ FixedLenDecoderEmitter.cpp - Decoder Generator ----------===//
+//===---------------- DecoderEmitter.cpp - Decoder Generator --------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -7,13 +7,14 @@
//===----------------------------------------------------------------------===//
//
// It contains the tablegen backend that emits the decoder functions for
-// targets with fixed length instruction set.
+// targets with fixed/variable length instruction set.
//
//===----------------------------------------------------------------------===//
#include "CodeGenInstruction.h"
#include "CodeGenTarget.h"
#include "InfoByHwMode.h"
+#include "VarLenCodeEmitterGen.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/CachedHashString.h"
@@ -23,7 +24,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/MC/MCDecoderOps.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -121,19 +122,18 @@ raw_ostream &operator<<(raw_ostream &OS, const EncodingAndInst &Value) {
return OS;
}
-class FixedLenDecoderEmitter {
+class DecoderEmitter {
RecordKeeper &RK;
std::vector<EncodingAndInst> NumberedEncodings;
public:
// Defaults preserved here for documentation, even though they aren't
// strictly necessary given the way that this is currently being called.
- FixedLenDecoderEmitter(RecordKeeper &R, std::string PredicateNamespace,
- std::string GPrefix = "if (",
- std::string GPostfix = " == MCDisassembler::Fail)",
- std::string ROK = "MCDisassembler::Success",
- std::string RFail = "MCDisassembler::Fail",
- std::string L = "")
+ DecoderEmitter(RecordKeeper &R, std::string PredicateNamespace,
+ std::string GPrefix = "if (",
+ std::string GPostfix = " == MCDisassembler::Fail)",
+ std::string ROK = "MCDisassembler::Success",
+ std::string RFail = "MCDisassembler::Fail", std::string L = "")
: RK(R), Target(R), PredicateNamespace(std::move(PredicateNamespace)),
GuardPrefix(std::move(GPrefix)), GuardPostfix(std::move(GPostfix)),
ReturnOK(std::move(ROK)), ReturnFail(std::move(RFail)),
@@ -143,6 +143,8 @@ public:
void emitTable(formatted_raw_ostream &o, DecoderTable &Table,
unsigned Indentation, unsigned BitWidth,
StringRef Namespace) const;
+ void emitInstrLenTable(formatted_raw_ostream &OS,
+ std::vector<unsigned> &InstrLen) const;
void emitPredicateFunction(formatted_raw_ostream &OS,
PredicateSet &Predicates,
unsigned Indentation) const;
@@ -217,8 +219,28 @@ static void dumpBits(raw_ostream &o, const BitsInit &bits) {
}
static BitsInit &getBitsField(const Record &def, StringRef str) {
- BitsInit *bits = def.getValueAsBitsInit(str);
- return *bits;
+ const RecordVal *RV = def.getValue(str);
+ if (BitsInit *Bits = dyn_cast<BitsInit>(RV->getValue()))
+ return *Bits;
+
+ // variable length instruction
+ VarLenInst VLI = VarLenInst(cast<DagInit>(RV->getValue()), RV);
+ SmallVector<Init *, 16> Bits;
+
+ for (auto &SI : VLI) {
+ if (const BitsInit *BI = dyn_cast<BitsInit>(SI.Value)) {
+ for (unsigned Idx = 0U; Idx < BI->getNumBits(); ++Idx) {
+ Bits.push_back(BI->getBit(Idx));
+ }
+ } else if (const BitInit *BI = dyn_cast<BitInit>(SI.Value)) {
+ Bits.push_back(const_cast<BitInit *>(BI));
+ } else {
+ for (unsigned Idx = 0U; Idx < SI.BitWidth; ++Idx)
+ Bits.push_back(UnsetInit::get(def.getRecords()));
+ }
+ }
+
+ return *BitsInit::get(def.getRecords(), Bits);
}
// Representation of the instruction to work on.
@@ -388,13 +410,13 @@ protected:
unsigned BitWidth;
// Parent emitter
- const FixedLenDecoderEmitter *Emitter;
+ const DecoderEmitter *Emitter;
public:
FilterChooser(ArrayRef<EncodingAndInst> Insts,
const std::vector<EncodingIDAndOpcode> &IDs,
const std::map<unsigned, std::vector<OperandInfo>> &Ops,
- unsigned BW, const FixedLenDecoderEmitter *E)
+ unsigned BW, const DecoderEmitter *E)
: AllInstructions(Insts), Opcodes(IDs), Operands(Ops),
FilterBitValues(BW, BIT_UNFILTERED), Parent(nullptr), BestIndex(-1),
BitWidth(BW), Emitter(E) {
@@ -421,20 +443,21 @@ protected:
// Populates the insn given the uid.
void insnWithID(insn_t &Insn, unsigned Opcode) const {
BitsInit &Bits = getBitsField(*AllInstructions[Opcode].EncodingDef, "Inst");
-
+ Insn.resize(BitWidth > Bits.getNumBits() ? BitWidth : Bits.getNumBits(),
+ BIT_UNSET);
// We may have a SoftFail bitmask, which specifies a mask where an encoding
// may differ from the value in "Inst" and yet still be valid, but the
// disassembler should return SoftFail instead of Success.
//
// This is used for marking UNPREDICTABLE instructions in the ARM world.
- BitsInit *SFBits =
- AllInstructions[Opcode].EncodingDef->getValueAsBitsInit("SoftFail");
-
- for (unsigned i = 0; i < BitWidth; ++i) {
+ const RecordVal *RV =
+ AllInstructions[Opcode].EncodingDef->getValue("SoftFail");
+ const BitsInit *SFBits = RV ? dyn_cast<BitsInit>(RV->getValue()) : nullptr;
+ for (unsigned i = 0; i < Bits.getNumBits(); ++i) {
if (SFBits && bitFromBits(*SFBits, i) == BIT_TRUE)
- Insn.push_back(BIT_UNSET);
+ Insn[i] = BIT_UNSET;
else
- Insn.push_back(bitFromBits(Bits, i));
+ Insn[i] = bitFromBits(Bits, i);
}
}
@@ -486,6 +509,8 @@ protected:
// Returns true if predicate matches were emitted, false otherwise.
bool emitPredicateMatch(raw_ostream &o, unsigned &Indentation,
unsigned Opc) const;
+ bool emitPredicateMatchAux(const Init &Val, bool ParenIfBinOp,
+ raw_ostream &OS) const;
bool doesOpcodeNeedPredicate(unsigned Opc) const;
unsigned getPredicateIndex(DecoderTableInfo &TableInfo, StringRef P) const;
@@ -747,11 +772,9 @@ unsigned Filter::usefulness() const {
//////////////////////////////////
// Emit the decoder state machine table.
-void FixedLenDecoderEmitter::emitTable(formatted_raw_ostream &OS,
- DecoderTable &Table,
- unsigned Indentation,
- unsigned BitWidth,
- StringRef Namespace) const {
+void DecoderEmitter::emitTable(formatted_raw_ostream &OS, DecoderTable &Table,
+ unsigned Indentation, unsigned BitWidth,
+ StringRef Namespace) const {
OS.indent(Indentation) << "static const uint8_t DecoderTable" << Namespace
<< BitWidth << "[] = {\n";
@@ -936,9 +959,18 @@ void FixedLenDecoderEmitter::emitTable(formatted_raw_ostream &OS,
OS.indent(Indentation) << "};\n\n";
}
-void FixedLenDecoderEmitter::
-emitPredicateFunction(formatted_raw_ostream &OS, PredicateSet &Predicates,
- unsigned Indentation) const {
+void DecoderEmitter::emitInstrLenTable(formatted_raw_ostream &OS,
+ std::vector<unsigned> &InstrLen) const {
+ OS << "static const uint8_t InstrLenTable[] = {\n";
+ for (unsigned &Len : InstrLen) {
+ OS << Len << ",\n";
+ }
+ OS << "};\n\n";
+}
+
+void DecoderEmitter::emitPredicateFunction(formatted_raw_ostream &OS,
+ PredicateSet &Predicates,
+ unsigned Indentation) const {
// The predicate function is just a big switch statement based on the
// input predicate index.
OS.indent(Indentation) << "static bool checkDecoderPredicate(unsigned Idx, "
@@ -961,16 +993,17 @@ emitPredicateFunction(formatted_raw_ostream &OS, PredicateSet &Predicates,
OS.indent(Indentation) << "}\n\n";
}
-void FixedLenDecoderEmitter::
-emitDecoderFunction(formatted_raw_ostream &OS, DecoderSet &Decoders,
- unsigned Indentation) const {
+void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS,
+ DecoderSet &Decoders,
+ unsigned Indentation) const {
// The decoder function is just a big switch statement based on the
// input decoder index.
OS.indent(Indentation) << "template <typename InsnType>\n";
OS.indent(Indentation) << "static DecodeStatus decodeToMCInst(DecodeStatus S,"
<< " unsigned Idx, InsnType insn, MCInst &MI,\n";
- OS.indent(Indentation) << " uint64_t "
- << "Address, const void *Decoder, bool &DecodeComplete) {\n";
+ OS.indent(Indentation)
+ << " uint64_t "
+ << "Address, const MCDisassembler *Decoder, bool &DecodeComplete) {\n";
Indentation += 2;
OS.indent(Indentation) << "DecodeComplete = true;\n";
// TODO: When InsnType is large, using uint64_t limits all fields to 64 bits
@@ -1195,6 +1228,40 @@ unsigned FilterChooser::getDecoderIndex(DecoderSet &Decoders,
return (unsigned)(P - Decoders.begin());
}
+// If ParenIfBinOp is true, print a surrounding () if Val uses && or ||.
+bool FilterChooser::emitPredicateMatchAux(const Init &Val, bool ParenIfBinOp,
+ raw_ostream &OS) const {
+ if (auto *D = dyn_cast<DefInit>(&Val)) {
+ if (!D->getDef()->isSubClassOf("SubtargetFeature"))
+ return true;
+ OS << "Bits[" << Emitter->PredicateNamespace << "::" << D->getAsString()
+ << "]";
+ return false;
+ }
+ if (auto *D = dyn_cast<DagInit>(&Val)) {
+ std::string Op = D->getOperator()->getAsString();
+ if (Op == "not" && D->getNumArgs() == 1) {
+ OS << '!';
+ return emitPredicateMatchAux(*D->getArg(0), true, OS);
+ }
+ if ((Op == "any_of" || Op == "all_of") && D->getNumArgs() > 0) {
+ bool Paren = D->getNumArgs() > 1 && std::exchange(ParenIfBinOp, true);
+ if (Paren)
+ OS << '(';
+ ListSeparator LS(Op == "any_of" ? " || " : " && ");
+ for (auto *Arg : D->getArgs()) {
+ OS << LS;
+ if (emitPredicateMatchAux(*Arg, ParenIfBinOp, OS))
+ return true;
+ }
+ if (Paren)
+ OS << ')';
+ return false;
+ }
+ }
+ return true;
+}
+
bool FilterChooser::emitPredicateMatch(raw_ostream &o, unsigned &Indentation,
unsigned Opc) const {
ListInit *Predicates =
@@ -1208,40 +1275,11 @@ bool FilterChooser::emitPredicateMatch(raw_ostream &o, unsigned &Indentation,
if (!isa<DagInit>(Pred->getValue("AssemblerCondDag")->getValue()))
continue;
- const DagInit *D = Pred->getValueAsDag("AssemblerCondDag");
- std::string CombineType = D->getOperator()->getAsString();
- if (CombineType != "any_of" && CombineType != "all_of")
- PrintFatalError(Pred->getLoc(), "Invalid AssemblerCondDag!");
- if (D->getNumArgs() == 0)
- PrintFatalError(Pred->getLoc(), "Invalid AssemblerCondDag!");
- bool IsOr = CombineType == "any_of";
-
if (!IsFirstEmission)
o << " && ";
-
- if (IsOr)
- o << "(";
-
- ListSeparator LS(IsOr ? " || " : " && ");
- for (auto *Arg : D->getArgs()) {
- o << LS;
- if (auto *NotArg = dyn_cast<DagInit>(Arg)) {
- if (NotArg->getOperator()->getAsString() != "not" ||
- NotArg->getNumArgs() != 1)
- PrintFatalError(Pred->getLoc(), "Invalid AssemblerCondDag!");
- Arg = NotArg->getArg(0);
- o << "!";
- }
- if (!isa<DefInit>(Arg) ||
- !cast<DefInit>(Arg)->getDef()->isSubClassOf("SubtargetFeature"))
- PrintFatalError(Pred->getLoc(), "Invalid AssemblerCondDag!");
- o << "Bits[" << Emitter->PredicateNamespace << "::" << Arg->getAsString()
- << "]";
- }
-
- if (IsOr)
- o << ")";
-
+ if (emitPredicateMatchAux(*Pred->getValueAsDag("AssemblerCondDag"),
+ Predicates->size() > 1, o))
+ PrintFatalError(Pred->getLoc(), "Invalid AssemblerCondDag!");
IsFirstEmission = false;
}
return !Predicates->empty();
@@ -1309,8 +1347,9 @@ void FilterChooser::emitPredicateTableEntry(DecoderTableInfo &TableInfo,
void FilterChooser::emitSoftFailTableEntry(DecoderTableInfo &TableInfo,
unsigned Opc) const {
- BitsInit *SFBits =
- AllInstructions[Opc].EncodingDef->getValueAsBitsInit("SoftFail");
+ const RecordVal *RV = AllInstructions[Opc].EncodingDef->getValue("SoftFail");
+ BitsInit *SFBits = RV ? dyn_cast<BitsInit>(RV->getValue()) : nullptr;
+
if (!SFBits) return;
BitsInit *InstBits =
AllInstructions[Opc].EncodingDef->getValueAsBitsInit("Inst");
@@ -1785,11 +1824,9 @@ void FilterChooser::emitTableEntries(DecoderTableInfo &TableInfo) const {
}
}
-static std::string findOperandDecoderMethod(TypedInit *TI) {
+static std::string findOperandDecoderMethod(Record *Record) {
std::string Decoder;
- Record *Record = cast<DefInit>(TI)->getDef();
-
RecordVal *DecoderString = Record->getValue("DecoderMethod");
StringInit *String = DecoderString ?
dyn_cast<StringInit>(DecoderString->getValue()) : nullptr;
@@ -1812,17 +1849,88 @@ static std::string findOperandDecoderMethod(TypedInit *TI) {
return Decoder;
}
-static bool
+OperandInfo getOpInfo(Record *TypeRecord) {
+ std::string Decoder = findOperandDecoderMethod(TypeRecord);
+
+ RecordVal *HasCompleteDecoderVal = TypeRecord->getValue("hasCompleteDecoder");
+ BitInit *HasCompleteDecoderBit =
+ HasCompleteDecoderVal
+ ? dyn_cast<BitInit>(HasCompleteDecoderVal->getValue())
+ : nullptr;
+ bool HasCompleteDecoder =
+ HasCompleteDecoderBit ? HasCompleteDecoderBit->getValue() : true;
+
+ return OperandInfo(Decoder, HasCompleteDecoder);
+}
+
+void parseVarLenInstOperand(const Record &Def,
+ std::vector<OperandInfo> &Operands,
+ const CodeGenInstruction &CGI) {
+
+ const RecordVal *RV = Def.getValue("Inst");
+ VarLenInst VLI(cast<DagInit>(RV->getValue()), RV);
+ SmallVector<int> TiedTo;
+
+ for (unsigned Idx = 0; Idx < CGI.Operands.size(); ++Idx) {
+ auto &Op = CGI.Operands[Idx];
+ if (Op.MIOperandInfo && Op.MIOperandInfo->getNumArgs() > 0)
+ for (auto *Arg : Op.MIOperandInfo->getArgs())
+ Operands.push_back(getOpInfo(cast<DefInit>(Arg)->getDef()));
+ else
+ Operands.push_back(getOpInfo(Op.Rec));
+
+ int TiedReg = Op.getTiedRegister();
+ TiedTo.push_back(-1);
+ if (TiedReg != -1) {
+ TiedTo[Idx] = TiedReg;
+ TiedTo[TiedReg] = Idx;
+ }
+ }
+
+ unsigned CurrBitPos = 0;
+ for (auto &EncodingSegment : VLI) {
+ unsigned Offset = 0;
+ StringRef OpName;
+
+ if (const StringInit *SI = dyn_cast<StringInit>(EncodingSegment.Value)) {
+ OpName = SI->getValue();
+ } else if (const DagInit *DI = dyn_cast<DagInit>(EncodingSegment.Value)) {
+ OpName = cast<StringInit>(DI->getArg(0))->getValue();
+ Offset = cast<IntInit>(DI->getArg(2))->getValue();
+ }
+
+ if (!OpName.empty()) {
+ auto OpSubOpPair =
+ const_cast<CodeGenInstruction &>(CGI).Operands.ParseOperandName(
+ OpName);
+ unsigned OpIdx = CGI.Operands.getFlattenedOperandNumber(OpSubOpPair);
+ Operands[OpIdx].addField(CurrBitPos, EncodingSegment.BitWidth, Offset);
+
+ int TiedReg = TiedTo[OpSubOpPair.first];
+ if (TiedReg != -1) {
+ unsigned OpIdx = CGI.Operands.getFlattenedOperandNumber(
+ std::make_pair(TiedReg, OpSubOpPair.second));
+ Operands[OpIdx].addField(CurrBitPos, EncodingSegment.BitWidth, Offset);
+ }
+ }
+
+ CurrBitPos += EncodingSegment.BitWidth;
+ }
+}
+
+static unsigned
populateInstruction(CodeGenTarget &Target, const Record &EncodingDef,
const CodeGenInstruction &CGI, unsigned Opc,
- std::map<unsigned, std::vector<OperandInfo>> &Operands) {
+ std::map<unsigned, std::vector<OperandInfo>> &Operands,
+ bool IsVarLenInst) {
const Record &Def = *CGI.TheDef;
// If all the bit positions are not specified; do not decode this instruction.
// We are bound to fail! For proper disassembly, the well-known encoding bits
// of the instruction must be fully specified.
BitsInit &Bits = getBitsField(EncodingDef, "Inst");
- if (Bits.allInComplete()) return false;
+ if (Bits.allInComplete())
+ return 0;
std::vector<OperandInfo> InsnOperands;
@@ -1834,7 +1942,7 @@ populateInstruction(CodeGenTarget &Target, const Record &EncodingDef,
InsnOperands.push_back(
OperandInfo(std::string(InstDecoder), HasCompleteInstDecoder));
Operands[Opc] = InsnOperands;
- return true;
+ return Bits.getNumBits();
}
// Generate a description of the operand of the instruction that we know
@@ -1848,11 +1956,11 @@ populateInstruction(CodeGenTarget &Target, const Record &EncodingDef,
DagInit *Out = Def.getValueAsDag("OutOperandList");
DagInit *In = Def.getValueAsDag("InOperandList");
for (unsigned i = 0; i < Out->getNumArgs(); ++i)
- InOutOperands.push_back(std::make_pair(Out->getArg(i),
- Out->getArgNameStr(i)));
+ InOutOperands.push_back(
+ std::make_pair(Out->getArg(i), Out->getArgNameStr(i)));
for (unsigned i = 0; i < In->getNumArgs(); ++i)
- InOutOperands.push_back(std::make_pair(In->getArg(i),
- In->getArgNameStr(i)));
+ InOutOperands.push_back(
+ std::make_pair(In->getArg(i), In->getArgNameStr(i)));
// Search for tied operands, so that we can correctly instantiate
// operands that are not explicitly represented in the encoding.
@@ -1869,257 +1977,254 @@ populateInstruction(CodeGenTarget &Target, const Record &EncodingDef,
}
}
- std::map<std::string, std::vector<OperandInfo>> NumberedInsnOperands;
- std::set<std::string> NumberedInsnOperandsNoTie;
- if (Target.getInstructionSet()->
- getValueAsBit("decodePositionallyEncodedOperands")) {
- const std::vector<RecordVal> &Vals = Def.getValues();
- unsigned NumberedOp = 0;
-
- std::set<unsigned> NamedOpIndices;
- if (Target.getInstructionSet()->
- getValueAsBit("noNamedPositionallyEncodedOperands"))
- // Collect the set of operand indices that might correspond to named
- // operand, and skip these when assigning operands based on position.
+ if (IsVarLenInst) {
+ parseVarLenInstOperand(EncodingDef, InsnOperands, CGI);
+ } else {
+ std::map<std::string, std::vector<OperandInfo>> NumberedInsnOperands;
+ std::set<std::string> NumberedInsnOperandsNoTie;
+ if (Target.getInstructionSet()->getValueAsBit(
+ "decodePositionallyEncodedOperands")) {
+ const std::vector<RecordVal> &Vals = Def.getValues();
+ unsigned NumberedOp = 0;
+
+ std::set<unsigned> NamedOpIndices;
+ if (Target.getInstructionSet()->getValueAsBit(
+ "noNamedPositionallyEncodedOperands"))
+ // Collect the set of operand indices that might correspond to named
+ // operand, and skip these when assigning operands based on position.
+ for (unsigned i = 0, e = Vals.size(); i != e; ++i) {
+ unsigned OpIdx;
+ if (!CGI.Operands.hasOperandNamed(Vals[i].getName(), OpIdx))
+ continue;
+
+ NamedOpIndices.insert(OpIdx);
+ }
+
for (unsigned i = 0, e = Vals.size(); i != e; ++i) {
- unsigned OpIdx;
- if (!CGI.Operands.hasOperandNamed(Vals[i].getName(), OpIdx))
+ // Ignore fixed fields in the record, we're looking for values like:
+ // bits<5> RST = { ?, ?, ?, ?, ? };
+ if (Vals[i].isNonconcreteOK() || Vals[i].getValue()->isComplete())
continue;
- NamedOpIndices.insert(OpIdx);
- }
+ // Determine if Vals[i] actually contributes to the Inst encoding.
+ unsigned bi = 0;
+ for (; bi < Bits.getNumBits(); ++bi) {
+ VarInit *Var = nullptr;
+ VarBitInit *BI = dyn_cast<VarBitInit>(Bits.getBit(bi));
+ if (BI)
+ Var = dyn_cast<VarInit>(BI->getBitVar());
+ else
+ Var = dyn_cast<VarInit>(Bits.getBit(bi));
+
+ if (Var && Var->getName() == Vals[i].getName())
+ break;
+ }
- for (unsigned i = 0, e = Vals.size(); i != e; ++i) {
- // Ignore fixed fields in the record, we're looking for values like:
- // bits<5> RST = { ?, ?, ?, ?, ? };
- if (Vals[i].isNonconcreteOK() || Vals[i].getValue()->isComplete())
- continue;
+ if (bi == Bits.getNumBits())
+ continue;
- // Determine if Vals[i] actually contributes to the Inst encoding.
- unsigned bi = 0;
- for (; bi < Bits.getNumBits(); ++bi) {
- VarInit *Var = nullptr;
- VarBitInit *BI = dyn_cast<VarBitInit>(Bits.getBit(bi));
- if (BI)
- Var = dyn_cast<VarInit>(BI->getBitVar());
- else
- Var = dyn_cast<VarInit>(Bits.getBit(bi));
+ // Skip variables that correspond to explicitly-named operands.
+ unsigned OpIdx;
+ if (CGI.Operands.hasOperandNamed(Vals[i].getName(), OpIdx))
+ continue;
- if (Var && Var->getName() == Vals[i].getName())
- break;
- }
+ // Get the bit range for this operand:
+ unsigned bitStart = bi++, bitWidth = 1;
+ for (; bi < Bits.getNumBits(); ++bi) {
+ VarInit *Var = nullptr;
+ VarBitInit *BI = dyn_cast<VarBitInit>(Bits.getBit(bi));
+ if (BI)
+ Var = dyn_cast<VarInit>(BI->getBitVar());
+ else
+ Var = dyn_cast<VarInit>(Bits.getBit(bi));
- if (bi == Bits.getNumBits())
- continue;
+ if (!Var)
+ break;
- // Skip variables that correspond to explicitly-named operands.
- unsigned OpIdx;
- if (CGI.Operands.hasOperandNamed(Vals[i].getName(), OpIdx))
- continue;
+ if (Var->getName() != Vals[i].getName())
+ break;
- // Get the bit range for this operand:
- unsigned bitStart = bi++, bitWidth = 1;
- for (; bi < Bits.getNumBits(); ++bi) {
- VarInit *Var = nullptr;
- VarBitInit *BI = dyn_cast<VarBitInit>(Bits.getBit(bi));
- if (BI)
- Var = dyn_cast<VarInit>(BI->getBitVar());
- else
- Var = dyn_cast<VarInit>(Bits.getBit(bi));
+ ++bitWidth;
+ }
- if (!Var)
- break;
+ unsigned NumberOps = CGI.Operands.size();
+ while (NumberedOp < NumberOps &&
+ (CGI.Operands.isFlatOperandNotEmitted(NumberedOp) ||
+ (!NamedOpIndices.empty() &&
+ NamedOpIndices.count(
+ CGI.Operands.getSubOperandNumber(NumberedOp).first))))
+ ++NumberedOp;
+
+ OpIdx = NumberedOp++;
+
+ // OpIdx now holds the ordered operand number of Vals[i].
+ std::pair<unsigned, unsigned> SO =
+ CGI.Operands.getSubOperandNumber(OpIdx);
+ const std::string &Name = CGI.Operands[SO.first].Name;
+
+ LLVM_DEBUG(dbgs() << "Numbered operand mapping for " << Def.getName()
+ << ": " << Name << "(" << SO.first << ", "
+ << SO.second << ") => " << Vals[i].getName() << "\n");
+
+ std::string Decoder;
+ Record *TypeRecord = CGI.Operands[SO.first].Rec;
+
+ RecordVal *DecoderString = TypeRecord->getValue("DecoderMethod");
+ StringInit *String =
+ DecoderString ? dyn_cast<StringInit>(DecoderString->getValue())
+ : nullptr;
+ if (String && String->getValue() != "")
+ Decoder = std::string(String->getValue());
+
+ if (Decoder == "" && CGI.Operands[SO.first].MIOperandInfo &&
+ CGI.Operands[SO.first].MIOperandInfo->getNumArgs()) {
+ Init *Arg = CGI.Operands[SO.first].MIOperandInfo->getArg(SO.second);
+ if (DefInit *DI = cast<DefInit>(Arg))
+ TypeRecord = DI->getDef();
+ }
- if (Var->getName() != Vals[i].getName())
- break;
+ bool isReg = false;
+ if (TypeRecord->isSubClassOf("RegisterOperand"))
+ TypeRecord = TypeRecord->getValueAsDef("RegClass");
+ if (TypeRecord->isSubClassOf("RegisterClass")) {
+ Decoder = "Decode" + TypeRecord->getName().str() + "RegisterClass";
+ isReg = true;
+ } else if (TypeRecord->isSubClassOf("PointerLikeRegClass")) {
+ Decoder = "DecodePointerLikeRegClass" +
+ utostr(TypeRecord->getValueAsInt("RegClassKind"));
+ isReg = true;
+ }
- ++bitWidth;
+ DecoderString = TypeRecord->getValue("DecoderMethod");
+ String = DecoderString ? dyn_cast<StringInit>(DecoderString->getValue())
+ : nullptr;
+ if (!isReg && String && String->getValue() != "")
+ Decoder = std::string(String->getValue());
+
+ RecordVal *HasCompleteDecoderVal =
+ TypeRecord->getValue("hasCompleteDecoder");
+ BitInit *HasCompleteDecoderBit =
+ HasCompleteDecoderVal
+ ? dyn_cast<BitInit>(HasCompleteDecoderVal->getValue())
+ : nullptr;
+ bool HasCompleteDecoder =
+ HasCompleteDecoderBit ? HasCompleteDecoderBit->getValue() : true;
+
+ OperandInfo OpInfo(Decoder, HasCompleteDecoder);
+ OpInfo.addField(bitStart, bitWidth, 0);
+
+ NumberedInsnOperands[Name].push_back(OpInfo);
+
+ // FIXME: For complex operands with custom decoders we can't handle tied
+ // sub-operands automatically. Skip those here and assume that this is
+ // fixed up elsewhere.
+ if (CGI.Operands[SO.first].MIOperandInfo &&
+ CGI.Operands[SO.first].MIOperandInfo->getNumArgs() > 1 && String &&
+ String->getValue() != "")
+ NumberedInsnOperandsNoTie.insert(Name);
}
+ }
- unsigned NumberOps = CGI.Operands.size();
- while (NumberedOp < NumberOps &&
- (CGI.Operands.isFlatOperandNotEmitted(NumberedOp) ||
- (!NamedOpIndices.empty() && NamedOpIndices.count(
- CGI.Operands.getSubOperandNumber(NumberedOp).first))))
- ++NumberedOp;
-
- OpIdx = NumberedOp++;
-
- // OpIdx now holds the ordered operand number of Vals[i].
- std::pair<unsigned, unsigned> SO =
- CGI.Operands.getSubOperandNumber(OpIdx);
- const std::string &Name = CGI.Operands[SO.first].Name;
-
- LLVM_DEBUG(dbgs() << "Numbered operand mapping for " << Def.getName()
- << ": " << Name << "(" << SO.first << ", " << SO.second
- << ") => " << Vals[i].getName() << "\n");
-
- std::string Decoder;
- Record *TypeRecord = CGI.Operands[SO.first].Rec;
-
- RecordVal *DecoderString = TypeRecord->getValue("DecoderMethod");
- StringInit *String = DecoderString ?
- dyn_cast<StringInit>(DecoderString->getValue()) : nullptr;
- if (String && String->getValue() != "")
- Decoder = std::string(String->getValue());
-
- if (Decoder == "" &&
- CGI.Operands[SO.first].MIOperandInfo &&
- CGI.Operands[SO.first].MIOperandInfo->getNumArgs()) {
- Init *Arg = CGI.Operands[SO.first].MIOperandInfo->
- getArg(SO.second);
- if (DefInit *DI = cast<DefInit>(Arg))
- TypeRecord = DI->getDef();
+ // For each operand, see if we can figure out where it is encoded.
+ for (const auto &Op : InOutOperands) {
+ if (!NumberedInsnOperands[std::string(Op.second)].empty()) {
+ llvm::append_range(InsnOperands,
+ NumberedInsnOperands[std::string(Op.second)]);
+ continue;
}
-
- bool isReg = false;
- if (TypeRecord->isSubClassOf("RegisterOperand"))
- TypeRecord = TypeRecord->getValueAsDef("RegClass");
- if (TypeRecord->isSubClassOf("RegisterClass")) {
- Decoder = "Decode" + TypeRecord->getName().str() + "RegisterClass";
- isReg = true;
- } else if (TypeRecord->isSubClassOf("PointerLikeRegClass")) {
- Decoder = "DecodePointerLikeRegClass" +
- utostr(TypeRecord->getValueAsInt("RegClassKind"));
- isReg = true;
+ if (!NumberedInsnOperands[TiedNames[std::string(Op.second)]].empty()) {
+ if (!NumberedInsnOperandsNoTie.count(
+ TiedNames[std::string(Op.second)])) {
+ // Figure out to which (sub)operand we're tied.
+ unsigned i =
+ CGI.Operands.getOperandNamed(TiedNames[std::string(Op.second)]);
+ int tiedTo = CGI.Operands[i].getTiedRegister();
+ if (tiedTo == -1) {
+ i = CGI.Operands.getOperandNamed(Op.second);
+ tiedTo = CGI.Operands[i].getTiedRegister();
+ }
+
+ if (tiedTo != -1) {
+ std::pair<unsigned, unsigned> SO =
+ CGI.Operands.getSubOperandNumber(tiedTo);
+
+ InsnOperands.push_back(
+ NumberedInsnOperands[TiedNames[std::string(Op.second)]]
+ [SO.second]);
+ }
+ }
+ continue;
}
- DecoderString = TypeRecord->getValue("DecoderMethod");
- String = DecoderString ?
- dyn_cast<StringInit>(DecoderString->getValue()) : nullptr;
- if (!isReg && String && String->getValue() != "")
- Decoder = std::string(String->getValue());
-
- RecordVal *HasCompleteDecoderVal =
- TypeRecord->getValue("hasCompleteDecoder");
- BitInit *HasCompleteDecoderBit = HasCompleteDecoderVal ?
- dyn_cast<BitInit>(HasCompleteDecoderVal->getValue()) : nullptr;
- bool HasCompleteDecoder = HasCompleteDecoderBit ?
- HasCompleteDecoderBit->getValue() : true;
-
- OperandInfo OpInfo(Decoder, HasCompleteDecoder);
- OpInfo.addField(bitStart, bitWidth, 0);
-
- NumberedInsnOperands[Name].push_back(OpInfo);
-
- // FIXME: For complex operands with custom decoders we can't handle tied
- // sub-operands automatically. Skip those here and assume that this is
- // fixed up elsewhere.
- if (CGI.Operands[SO.first].MIOperandInfo &&
- CGI.Operands[SO.first].MIOperandInfo->getNumArgs() > 1 &&
- String && String->getValue() != "")
- NumberedInsnOperandsNoTie.insert(Name);
- }
- }
+ // At this point, we can locate the decoder field, but we need to know how
+ // to interpret it. As a first step, require the target to provide
+ // callbacks for decoding register classes.
- // For each operand, see if we can figure out where it is encoded.
- for (const auto &Op : InOutOperands) {
- if (!NumberedInsnOperands[std::string(Op.second)].empty()) {
- llvm::append_range(InsnOperands,
- NumberedInsnOperands[std::string(Op.second)]);
- continue;
- }
- if (!NumberedInsnOperands[TiedNames[std::string(Op.second)]].empty()) {
- if (!NumberedInsnOperandsNoTie.count(TiedNames[std::string(Op.second)])) {
- // Figure out to which (sub)operand we're tied.
- unsigned i =
- CGI.Operands.getOperandNamed(TiedNames[std::string(Op.second)]);
- int tiedTo = CGI.Operands[i].getTiedRegister();
- if (tiedTo == -1) {
- i = CGI.Operands.getOperandNamed(Op.second);
- tiedTo = CGI.Operands[i].getTiedRegister();
- }
+ OperandInfo OpInfo = getOpInfo(cast<DefInit>(Op.first)->getDef());
- if (tiedTo != -1) {
- std::pair<unsigned, unsigned> SO =
- CGI.Operands.getSubOperandNumber(tiedTo);
+ // Some bits of the operand may be required to be 1 depending on the
+ // instruction's encoding. Collect those bits.
+ if (const RecordVal *EncodedValue = EncodingDef.getValue(Op.second))
+ if (const BitsInit *OpBits =
+ dyn_cast<BitsInit>(EncodedValue->getValue()))
+ for (unsigned I = 0; I < OpBits->getNumBits(); ++I)
+ if (const BitInit *OpBit = dyn_cast<BitInit>(OpBits->getBit(I)))
+ if (OpBit->getValue())
+ OpInfo.InitValue |= 1ULL << I;
- InsnOperands.push_back(
- NumberedInsnOperands[TiedNames[std::string(Op.second)]]
- [SO.second]);
- }
- }
- continue;
- }
+ unsigned Base = ~0U;
+ unsigned Width = 0;
+ unsigned Offset = 0;
- TypedInit *TI = cast<TypedInit>(Op.first);
-
- // At this point, we can locate the decoder field, but we need to know how
- // to interpret it. As a first step, require the target to provide
- // callbacks for decoding register classes.
- std::string Decoder = findOperandDecoderMethod(TI);
- Record *TypeRecord = cast<DefInit>(TI)->getDef();
-
- RecordVal *HasCompleteDecoderVal =
- TypeRecord->getValue("hasCompleteDecoder");
- BitInit *HasCompleteDecoderBit = HasCompleteDecoderVal ?
- dyn_cast<BitInit>(HasCompleteDecoderVal->getValue()) : nullptr;
- bool HasCompleteDecoder = HasCompleteDecoderBit ?
- HasCompleteDecoderBit->getValue() : true;
-
- OperandInfo OpInfo(Decoder, HasCompleteDecoder);
-
- // Some bits of the operand may be required to be 1 depending on the
- // instruction's encoding. Collect those bits.
- if (const RecordVal *EncodedValue = EncodingDef.getValue(Op.second))
- if (const BitsInit *OpBits = dyn_cast<BitsInit>(EncodedValue->getValue()))
- for (unsigned I = 0; I < OpBits->getNumBits(); ++I)
- if (const BitInit *OpBit = dyn_cast<BitInit>(OpBits->getBit(I)))
- if (OpBit->getValue())
- OpInfo.InitValue |= 1ULL << I;
-
- unsigned Base = ~0U;
- unsigned Width = 0;
- unsigned Offset = 0;
+ for (unsigned bi = 0; bi < Bits.getNumBits(); ++bi) {
+ VarInit *Var = nullptr;
+ VarBitInit *BI = dyn_cast<VarBitInit>(Bits.getBit(bi));
+ if (BI)
+ Var = dyn_cast<VarInit>(BI->getBitVar());
+ else
+ Var = dyn_cast<VarInit>(Bits.getBit(bi));
- for (unsigned bi = 0; bi < Bits.getNumBits(); ++bi) {
- VarInit *Var = nullptr;
- VarBitInit *BI = dyn_cast<VarBitInit>(Bits.getBit(bi));
- if (BI)
- Var = dyn_cast<VarInit>(BI->getBitVar());
- else
- Var = dyn_cast<VarInit>(Bits.getBit(bi));
+ if (!Var) {
+ if (Base != ~0U) {
+ OpInfo.addField(Base, Width, Offset);
+ Base = ~0U;
+ Width = 0;
+ Offset = 0;
+ }
+ continue;
+ }
- if (!Var) {
- if (Base != ~0U) {
- OpInfo.addField(Base, Width, Offset);
- Base = ~0U;
- Width = 0;
- Offset = 0;
+ if ((Var->getName() != Op.second &&
+ Var->getName() != TiedNames[std::string(Op.second)])) {
+ if (Base != ~0U) {
+ OpInfo.addField(Base, Width, Offset);
+ Base = ~0U;
+ Width = 0;
+ Offset = 0;
+ }
+ continue;
}
- continue;
- }
- if (Var->getName() != Op.second &&
- Var->getName() != TiedNames[std::string(Op.second)]) {
- if (Base != ~0U) {
+ if (Base == ~0U) {
+ Base = bi;
+ Width = 1;
+ Offset = BI ? BI->getBitNum() : 0;
+ } else if (BI && BI->getBitNum() != Offset + Width) {
OpInfo.addField(Base, Width, Offset);
- Base = ~0U;
- Width = 0;
- Offset = 0;
+ Base = bi;
+ Width = 1;
+ Offset = BI->getBitNum();
+ } else {
+ ++Width;
}
- continue;
}
- if (Base == ~0U) {
- Base = bi;
- Width = 1;
- Offset = BI ? BI->getBitNum() : 0;
- } else if (BI && BI->getBitNum() != Offset + Width) {
+ if (Base != ~0U)
OpInfo.addField(Base, Width, Offset);
- Base = bi;
- Width = 1;
- Offset = BI->getBitNum();
- } else {
- ++Width;
- }
- }
- if (Base != ~0U)
- OpInfo.addField(Base, Width, Offset);
-
- if (OpInfo.numFields() > 0)
- InsnOperands.push_back(OpInfo);
+ if (OpInfo.numFields() > 0)
+ InsnOperands.push_back(OpInfo);
+ }
}
Operands[Opc] = InsnOperands;
@@ -2142,7 +2247,7 @@ populateInstruction(CodeGenTarget &Target, const Record &EncodingDef,
});
#endif
- return true;
+ return Bits.getNumBits();
}
// emitFieldFromInstruction - Emit the templated helper function
@@ -2155,13 +2260,12 @@ static void emitFieldFromInstruction(formatted_raw_ostream &OS) {
<< "// InsnType must either be integral or an APInt-like object that "
"must:\n"
<< "// * be default-constructible and copy-constructible\n"
- << "// * be constructible from a uint64_t\n"
<< "// * be constructible from an APInt (this can be private)\n"
<< "// * Support insertBits(bits, startBit, numBits)\n"
<< "// * Support extractBitsAsZExtValue(numBits, startBit)\n"
- << "// * be convertible to bool\n"
<< "// * Support the ~, &, ==, and != operators with other objects of "
"the same type\n"
+ << "// * Support the != and bitwise & with uint64_t\n"
<< "// * Support put (<<) to raw_ostream&\n"
<< "template <typename InsnType>\n"
<< "#if defined(_MSC_VER) && !defined(__clang__)\n"
@@ -2214,18 +2318,26 @@ static void emitInsertBits(formatted_raw_ostream &OS) {
// emitDecodeInstruction - Emit the templated helper function
// decodeInstruction().
-static void emitDecodeInstruction(formatted_raw_ostream &OS) {
+static void emitDecodeInstruction(formatted_raw_ostream &OS,
+ bool IsVarLenInst) {
OS << "template <typename InsnType>\n"
<< "static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], "
"MCInst &MI,\n"
<< " InsnType insn, uint64_t "
"Address,\n"
- << " const void *DisAsm,\n"
- << " const MCSubtargetInfo &STI) {\n"
+ << " const MCDisassembler *DisAsm,\n"
+ << " const MCSubtargetInfo &STI";
+ if (IsVarLenInst) {
+ OS << ",\n"
+ << " llvm::function_ref<void(APInt "
+ "&,"
+ << " uint64_t)> makeUp";
+ }
+ OS << ") {\n"
<< " const FeatureBitset &Bits = STI.getFeatureBits();\n"
<< "\n"
<< " const uint8_t *Ptr = DecodeTable;\n"
- << " InsnType CurFieldValue = 0;\n"
+ << " uint64_t CurFieldValue = 0;\n"
<< " DecodeStatus S = MCDisassembler::Success;\n"
<< " while (true) {\n"
<< " ptrdiff_t Loc = Ptr - DecodeTable;\n"
@@ -2236,8 +2348,10 @@ static void emitDecodeInstruction(formatted_raw_ostream &OS) {
<< " case MCD::OPC_ExtractField: {\n"
<< " unsigned Start = *++Ptr;\n"
<< " unsigned Len = *++Ptr;\n"
- << " ++Ptr;\n"
- << " CurFieldValue = fieldFromInstruction(insn, Start, Len);\n"
+ << " ++Ptr;\n";
+ if (IsVarLenInst)
+ OS << " makeUp(insn, Start + Len);\n";
+ OS << " CurFieldValue = fieldFromInstruction(insn, Start, Len);\n"
<< " LLVM_DEBUG(dbgs() << Loc << \": OPC_ExtractField(\" << Start << "
"\", \"\n"
<< " << Len << \"): \" << CurFieldValue << \"\\n\");\n"
@@ -2246,7 +2360,7 @@ static void emitDecodeInstruction(formatted_raw_ostream &OS) {
<< " case MCD::OPC_FilterValue: {\n"
<< " // Decode the field value.\n"
<< " unsigned Len;\n"
- << " InsnType Val = decodeULEB128(++Ptr, &Len);\n"
+ << " uint64_t Val = decodeULEB128(++Ptr, &Len);\n"
<< " Ptr += Len;\n"
<< " // NumToSkip is a plain 24-bit integer.\n"
<< " unsigned NumToSkip = *Ptr++;\n"
@@ -2267,11 +2381,14 @@ static void emitDecodeInstruction(formatted_raw_ostream &OS) {
<< " }\n"
<< " case MCD::OPC_CheckField: {\n"
<< " unsigned Start = *++Ptr;\n"
- << " unsigned Len = *++Ptr;\n"
- << " InsnType FieldValue = fieldFromInstruction(insn, Start, Len);\n"
+ << " unsigned Len = *++Ptr;\n";
+ if (IsVarLenInst)
+ OS << " makeUp(insn, Start + Len);\n";
+ OS << " uint64_t FieldValue = fieldFromInstruction(insn, Start, Len);\n"
<< " // Decode the field value.\n"
- << " InsnType ExpectedValue = decodeULEB128(++Ptr, &Len);\n"
- << " Ptr += Len;\n"
+ << " unsigned PtrLen = 0;\n"
+ << " uint64_t ExpectedValue = decodeULEB128(++Ptr, &PtrLen);\n"
+ << " Ptr += PtrLen;\n"
<< " // NumToSkip is a plain 24-bit integer.\n"
<< " unsigned NumToSkip = *Ptr++;\n"
<< " NumToSkip |= (*Ptr++) << 8;\n"
@@ -2321,8 +2438,12 @@ static void emitDecodeInstruction(formatted_raw_ostream &OS) {
<< "\n"
<< " MI.clear();\n"
<< " MI.setOpcode(Opc);\n"
- << " bool DecodeComplete;\n"
- << " S = decodeToMCInst(S, DecodeIdx, insn, MI, Address, DisAsm, "
+ << " bool DecodeComplete;\n";
+ if (IsVarLenInst) {
+ OS << " Len = InstrLenTable[Opc];\n"
+ << " makeUp(insn, Len);\n";
+ }
+ OS << " S = decodeToMCInst(S, DecodeIdx, insn, MI, Address, DisAsm, "
"DecodeComplete);\n"
<< " assert(DecodeComplete);\n"
<< "\n"
@@ -2376,11 +2497,12 @@ static void emitDecodeInstruction(formatted_raw_ostream &OS) {
<< " case MCD::OPC_SoftFail: {\n"
<< " // Decode the mask values.\n"
<< " unsigned Len;\n"
- << " InsnType PositiveMask = decodeULEB128(++Ptr, &Len);\n"
+ << " uint64_t PositiveMask = decodeULEB128(++Ptr, &Len);\n"
<< " Ptr += Len;\n"
- << " InsnType NegativeMask = decodeULEB128(Ptr, &Len);\n"
+ << " uint64_t NegativeMask = decodeULEB128(Ptr, &Len);\n"
<< " Ptr += Len;\n"
- << " bool Fail = (insn & PositiveMask) || (~insn & NegativeMask);\n"
+ << " bool Fail = (insn & PositiveMask) != 0 || (~insn & "
+ "NegativeMask) != 0;\n"
<< " if (Fail)\n"
<< " S = MCDisassembler::SoftFail;\n"
<< " LLVM_DEBUG(dbgs() << Loc << \": OPC_SoftFail: \" << (Fail ? "
@@ -2399,9 +2521,11 @@ static void emitDecodeInstruction(formatted_raw_ostream &OS) {
}
// Emits disassembler code for instruction decoding.
-void FixedLenDecoderEmitter::run(raw_ostream &o) {
+void DecoderEmitter::run(raw_ostream &o) {
formatted_raw_ostream OS(o);
OS << "#include \"llvm/MC/MCInst.h\"\n";
+ OS << "#include \"llvm/MC/MCSubtargetInfo.h\"\n";
+ OS << "#include \"llvm/MC/SubtargetFeature.h\"\n";
OS << "#include \"llvm/Support/DataTypes.h\"\n";
OS << "#include \"llvm/Support/Debug.h\"\n";
OS << "#include \"llvm/Support/LEB128.h\"\n";
@@ -2469,6 +2593,14 @@ void FixedLenDecoderEmitter::run(raw_ostream &o) {
std::map<std::pair<std::string, unsigned>, std::vector<EncodingIDAndOpcode>>
OpcMap;
std::map<unsigned, std::vector<OperandInfo>> Operands;
+ std::vector<unsigned> InstrLen;
+
+ bool IsVarLenInst =
+ any_of(NumberedInstructions, [](const CodeGenInstruction *CGI) {
+ RecordVal *RV = CGI->TheDef->getValue("Inst");
+ return RV && isa<DagInit>(RV->getValue());
+ });
+ unsigned MaxInstLen = 0;
for (unsigned i = 0; i < NumberedEncodings.size(); ++i) {
const Record *EncodingDef = NumberedEncodings[i].EncodingDef;
@@ -2487,10 +2619,18 @@ void FixedLenDecoderEmitter::run(raw_ostream &o) {
NumInstructions++;
NumEncodings++;
- if (!Size)
+ if (!Size && !IsVarLenInst)
continue;
- if (populateInstruction(Target, *EncodingDef, *Inst, i, Operands)) {
+ if (IsVarLenInst)
+ InstrLen.resize(NumberedInstructions.size(), 0);
+
+ if (unsigned Len = populateInstruction(Target, *EncodingDef, *Inst, i,
+ Operands, IsVarLenInst)) {
+ if (IsVarLenInst) {
+ MaxInstLen = std::max(MaxInstLen, Len);
+ InstrLen[i] = Len;
+ }
std::string DecoderNamespace =
std::string(EncodingDef->getValueAsString("DecoderNamespace"));
if (!NumberedEncodings[i].HwModeName.empty())
@@ -2509,7 +2649,7 @@ void FixedLenDecoderEmitter::run(raw_ostream &o) {
ArrayRef<EncodingAndInst> NumberedEncodingsRef(
NumberedEncodings.data(), NumberedEncodings.size());
FilterChooser FC(NumberedEncodingsRef, Opc.second, Operands,
- 8 * Opc.first.second, this);
+ IsVarLenInst ? MaxInstLen : 8 * Opc.first.second, this);
// The decode table is cleared for each top level decoder function. The
// predicates and decoders themselves, however, are shared across all
@@ -2534,6 +2674,11 @@ void FixedLenDecoderEmitter::run(raw_ostream &o) {
OS.flush();
}
+ // For variable instruction, we emit a instruction length table
+ // to let the decoder know how long the instructions are.
+ // You can see example usage in M68k's disassembler.
+ if (IsVarLenInst)
+ emitInstrLenTable(OS, InstrLen);
// Emit the predicate function.
emitPredicateFunction(OS, TableInfo.Predicates, 0);
@@ -2541,20 +2686,20 @@ void FixedLenDecoderEmitter::run(raw_ostream &o) {
emitDecoderFunction(OS, TableInfo.Decoders, 0);
// Emit the main entry point for the decoder, decodeInstruction().
- emitDecodeInstruction(OS);
+ emitDecodeInstruction(OS, IsVarLenInst);
OS << "\n} // end namespace llvm\n";
}
namespace llvm {
-void EmitFixedLenDecoder(RecordKeeper &RK, raw_ostream &OS,
- const std::string &PredicateNamespace,
- const std::string &GPrefix,
- const std::string &GPostfix, const std::string &ROK,
- const std::string &RFail, const std::string &L) {
- FixedLenDecoderEmitter(RK, PredicateNamespace, GPrefix, GPostfix,
- ROK, RFail, L).run(OS);
+void EmitDecoder(RecordKeeper &RK, raw_ostream &OS,
+ const std::string &PredicateNamespace,
+ const std::string &GPrefix, const std::string &GPostfix,
+ const std::string &ROK, const std::string &RFail,
+ const std::string &L) {
+ DecoderEmitter(RK, PredicateNamespace, GPrefix, GPostfix, ROK, RFail, L)
+ .run(OS);
}
} // end namespace llvm
diff --git a/llvm/utils/TableGen/DirectiveEmitter.cpp b/llvm/utils/TableGen/DirectiveEmitter.cpp
index b21bf369d18e..f3751591f3d9 100644
--- a/llvm/utils/TableGen/DirectiveEmitter.cpp
+++ b/llvm/utils/TableGen/DirectiveEmitter.cpp
@@ -17,7 +17,6 @@
#include "llvm/ADT/StringSet.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
-#include "llvm/TableGen/TableGenBackend.h"
using namespace llvm;
@@ -368,8 +367,7 @@ void GenerateCaseForVersionedClauses(const std::vector<Record *> &Clauses,
const auto ClauseFormattedName = VerClause.getClause().getFormattedName();
- if (Cases.find(ClauseFormattedName) == Cases.end()) {
- Cases.insert(ClauseFormattedName);
+ if (Cases.insert(ClauseFormattedName).second) {
OS << " case " << DirLang.getClausePrefix() << ClauseFormattedName
<< ":\n";
OS << " return " << VerClause.getMinVersion()
diff --git a/llvm/utils/TableGen/DisassemblerEmitter.cpp b/llvm/utils/TableGen/DisassemblerEmitter.cpp
index 7c3f53b31bf4..297d12c5d0e9 100644
--- a/llvm/utils/TableGen/DisassemblerEmitter.cpp
+++ b/llvm/utils/TableGen/DisassemblerEmitter.cpp
@@ -95,12 +95,11 @@ using namespace llvm::X86Disassembler;
namespace llvm {
-extern void EmitFixedLenDecoder(RecordKeeper &RK, raw_ostream &OS,
- const std::string &PredicateNamespace,
- const std::string &GPrefix,
- const std::string &GPostfix,
- const std::string &ROK,
- const std::string &RFail, const std::string &L);
+extern void EmitDecoder(RecordKeeper &RK, raw_ostream &OS,
+ const std::string &PredicateNamespace,
+ const std::string &GPrefix, const std::string &GPostfix,
+ const std::string &ROK, const std::string &RFail,
+ const std::string &L);
void EmitDisassembler(RecordKeeper &Records, raw_ostream &OS) {
CodeGenTarget Target(Records);
@@ -140,17 +139,16 @@ void EmitDisassembler(RecordKeeper &Records, raw_ostream &OS) {
if (PredicateNamespace == "Thumb")
PredicateNamespace = "ARM";
- EmitFixedLenDecoder(Records, OS, PredicateNamespace,
- "if (!Check(S, ", "))",
- "S", "MCDisassembler::Fail",
- " MCDisassembler::DecodeStatus S = "
- "MCDisassembler::Success;\n(void)S;");
+ EmitDecoder(Records, OS, PredicateNamespace, "if (!Check(S, ", "))", "S",
+ "MCDisassembler::Fail",
+ " MCDisassembler::DecodeStatus S = "
+ "MCDisassembler::Success;\n(void)S;");
return;
}
- EmitFixedLenDecoder(Records, OS, std::string(Target.getName()), "if (",
- " == MCDisassembler::Fail)", "MCDisassembler::Success",
- "MCDisassembler::Fail", "");
+ EmitDecoder(Records, OS, std::string(Target.getName()), "if (",
+ " == MCDisassembler::Fail)", "MCDisassembler::Success",
+ "MCDisassembler::Fail", "");
}
} // end namespace llvm
diff --git a/llvm/utils/TableGen/ExegesisEmitter.cpp b/llvm/utils/TableGen/ExegesisEmitter.cpp
index 77654cbc92fd..bc8ccdac557b 100644
--- a/llvm/utils/TableGen/ExegesisEmitter.cpp
+++ b/llvm/utils/TableGen/ExegesisEmitter.cpp
@@ -13,15 +13,11 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/TableGenBackend.h"
-#include <algorithm>
#include <cassert>
-#include <cstdint>
#include <map>
#include <string>
#include <vector>
diff --git a/llvm/utils/TableGen/FastISelEmitter.cpp b/llvm/utils/TableGen/FastISelEmitter.cpp
index ac9fe6db4328..49c2ead468e3 100644
--- a/llvm/utils/TableGen/FastISelEmitter.cpp
+++ b/llvm/utils/TableGen/FastISelEmitter.cpp
@@ -17,8 +17,8 @@
//===----------------------------------------------------------------------===//
#include "CodeGenDAGPatterns.h"
+#include "CodeGenInstruction.h"
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
diff --git a/llvm/utils/TableGen/GICombinerEmitter.cpp b/llvm/utils/TableGen/GICombinerEmitter.cpp
index 0dea1ef00e4b..77e05aebf53a 100644
--- a/llvm/utils/TableGen/GICombinerEmitter.cpp
+++ b/llvm/utils/TableGen/GICombinerEmitter.cpp
@@ -933,28 +933,27 @@ void GICombinerEmitter::run(raw_ostream &OS) {
"getRuleIdxForIdentifier(RangePair.first);\n"
<< " const auto Last = "
"getRuleIdxForIdentifier(RangePair.second);\n"
- << " if (!First.hasValue() || !Last.hasValue())\n"
+ << " if (!First || !Last)\n"
<< " return None;\n"
<< " if (First >= Last)\n"
<< " report_fatal_error(\"Beginning of range should be before "
"end of range\");\n"
<< " return {{*First, *Last + 1}};\n"
- << " } else if (RangePair.first == \"*\") {\n"
+ << " }\n"
+ << " if (RangePair.first == \"*\") {\n"
<< " return {{0, " << Rules.size() << "}};\n"
- << " } else {\n"
- << " const auto I = getRuleIdxForIdentifier(RangePair.first);\n"
- << " if (!I.hasValue())\n"
- << " return None;\n"
- << " return {{*I, *I + 1}};\n"
<< " }\n"
- << " return None;\n"
+ << " const auto I = getRuleIdxForIdentifier(RangePair.first);\n"
+ << " if (!I)\n"
+ << " return None;\n"
+ << " return {{*I, *I + 1}};\n"
<< "}\n\n";
for (bool Enabled : {true, false}) {
OS << "bool " << getClassName() << "RuleConfig::setRule"
<< (Enabled ? "Enabled" : "Disabled") << "(StringRef RuleIdentifier) {\n"
<< " auto MaybeRange = getRuleRangeForIdentifier(RuleIdentifier);\n"
- << " if (!MaybeRange.hasValue())\n"
+ << " if (!MaybeRange)\n"
<< " return false;\n"
<< " for (auto I = MaybeRange->first; I < MaybeRange->second; ++I)\n"
<< " DisabledRules." << (Enabled ? "reset" : "set") << "(I);\n"
diff --git a/llvm/utils/TableGen/GlobalISel/GIMatchDag.cpp b/llvm/utils/TableGen/GlobalISel/GIMatchDag.cpp
index 7e037dd03b60..8be32d2effa6 100644
--- a/llvm/utils/TableGen/GlobalISel/GIMatchDag.cpp
+++ b/llvm/utils/TableGen/GlobalISel/GIMatchDag.cpp
@@ -48,7 +48,7 @@ void GIMatchDag::writeDOTGraph(raw_ostream &OS, StringRef ID) const {
<< Assignment.first << ")";
Separator = ", ";
}
- OS << format("|%p|", &N);
+ OS << llvm::format("|%p|", &N);
writePorts("d", N->getOperandInfo());
OS << "}\"";
if (N->isMatchRoot())
@@ -82,7 +82,7 @@ void GIMatchDag::writeDOTGraph(raw_ostream &OS, StringRef ID) const {
writePorts("s", N->getOperandInfo());
OS << "|" << N->getName() << "|";
N->printDescription(OS);
- OS << format("|%p|", &N);
+ OS << llvm::format("|%p|", &N);
writePorts("d", N->getOperandInfo());
OS << "}\",style=dotted]\n";
}
diff --git a/llvm/utils/TableGen/GlobalISel/GIMatchTree.h b/llvm/utils/TableGen/GlobalISel/GIMatchTree.h
index 56df37731c09..55a86259661d 100644
--- a/llvm/utils/TableGen/GlobalISel/GIMatchTree.h
+++ b/llvm/utils/TableGen/GlobalISel/GIMatchTree.h
@@ -32,11 +32,11 @@ public:
Optional<unsigned> OpIdx = None)
: Name(Name), InstrID(InstrID), OpIdx(OpIdx) {}
- bool isInstr() const { return !OpIdx.hasValue(); }
+ bool isInstr() const { return !OpIdx; }
StringRef getName() const { return Name; }
unsigned getInstrID() const { return InstrID; }
unsigned getOpIdx() const {
- assert(OpIdx.hasValue() && "Is not an operand binding");
+ assert(OpIdx && "Is not an operand binding");
return *OpIdx;
}
};
diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp
index 018aa7ee2f71..c8eac56d03e6 100644
--- a/llvm/utils/TableGen/GlobalISelEmitter.cpp
+++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp
@@ -30,6 +30,7 @@
//===----------------------------------------------------------------------===//
#include "CodeGenDAGPatterns.h"
+#include "CodeGenInstruction.h"
#include "SubtargetFeatureInfo.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/Statistic.h"
@@ -465,9 +466,9 @@ public:
MatchTableRecord(Optional<unsigned> LabelID_, StringRef EmitStr,
unsigned NumElements, unsigned Flags,
int64_t RawValue = std::numeric_limits<int64_t>::min())
- : LabelID(LabelID_.getValueOr(~0u)), EmitStr(EmitStr),
+ : LabelID(LabelID_.value_or(~0u)), EmitStr(EmitStr),
NumElements(NumElements), Flags(Flags), RawValue(RawValue) {
- assert((!LabelID_.hasValue() || LabelID != ~0u) &&
+ assert((!LabelID_ || LabelID != ~0u) &&
"This value is reserved for non-labels");
}
MatchTableRecord(const MatchTableRecord &Other) = default;
@@ -2935,12 +2936,12 @@ public:
}
void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
- Table << MatchTable::Opcode(SubOperand.hasValue() ? "GIR_ComplexSubOperandRenderer"
- : "GIR_ComplexRenderer")
+ Table << MatchTable::Opcode(SubOperand ? "GIR_ComplexSubOperandRenderer"
+ : "GIR_ComplexRenderer")
<< MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID)
<< MatchTable::Comment("RendererID")
<< MatchTable::IntValue(RendererID);
- if (SubOperand.hasValue())
+ if (SubOperand)
Table << MatchTable::Comment("SubOperand")
<< MatchTable::IntValue(SubOperand.getValue());
Table << MatchTable::Comment(SymbolicName) << MatchTable::LineBreak;
@@ -3815,12 +3816,15 @@ Expected<InstructionMatcher &> GlobalISelEmitter::addBuiltinPredicates(
if (!ParsedAddrSpaces.empty()) {
InsnMatcher.addPredicate<MemoryAddressSpacePredicateMatcher>(
0, ParsedAddrSpaces);
+ return InsnMatcher;
}
}
int64_t MinAlign = Predicate.getMinAlignment();
- if (MinAlign > 0)
+ if (MinAlign > 0) {
InsnMatcher.addPredicate<MemoryAlignmentPredicateMatcher>(0, MinAlign);
+ return InsnMatcher;
+ }
}
// G_LOAD is used for both non-extending and any-extending loads.
@@ -4269,7 +4273,7 @@ Error GlobalISelEmitter::importChildMatcher(
auto MaybeInsnOperand = OM.addPredicate<InstructionOperandMatcher>(
InsnMatcher.getRuleMatcher(), SrcChild->getName());
- if (!MaybeInsnOperand.hasValue()) {
+ if (!MaybeInsnOperand) {
// This isn't strictly true. If the user were to provide exactly the same
// matchers as the original operand then we could allow it. However, it's
// simpler to not permit the redundant specification.
@@ -4400,7 +4404,7 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderer(
TreePatternNode *DstChild) {
const auto &SubOperand = Rule.getComplexSubOperand(DstChild->getName());
- if (SubOperand.hasValue()) {
+ if (SubOperand) {
DstMIBuilder.addRenderer<RenderComplexPatternOperand>(
*std::get<0>(*SubOperand), DstChild->getName(),
std::get<1>(*SubOperand), std::get<2>(*SubOperand));
@@ -4802,7 +4806,7 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderers(
const auto SrcRCDstRCPair =
RC->getMatchingSubClassWithSubRegs(CGRegs, SubIdx);
- if (SrcRCDstRCPair.hasValue()) {
+ if (SrcRCDstRCPair) {
assert(SrcRCDstRCPair->second && "Couldn't find a matching subclass");
if (SrcRCDstRCPair->first != RC)
return failedImport("EXTRACT_SUBREG requires an additional COPY");
@@ -5533,6 +5537,7 @@ std::vector<Matcher *> GlobalISelEmitter::optimizeRules(
ProcessCurrentGroup();
LLVM_DEBUG(dbgs() << "NumGroups: " << NumGroups << "\n");
+ (void) NumGroups;
assert(CurrentGroup->empty() && "The last group wasn't properly processed");
return OptRules;
}
diff --git a/llvm/utils/TableGen/InstrInfoEmitter.cpp b/llvm/utils/TableGen/InstrInfoEmitter.cpp
index 3c92aa0cc27a..a7a4f4f5f1a7 100644
--- a/llvm/utils/TableGen/InstrInfoEmitter.cpp
+++ b/llvm/utils/TableGen/InstrInfoEmitter.cpp
@@ -36,6 +36,12 @@
using namespace llvm;
+cl::OptionCategory InstrInfoEmitterCat("Options for -gen-instr-info");
+static cl::opt<bool> ExpandMIOperandInfo(
+ "instr-info-expand-mi-operand-info",
+ cl::desc("Expand operand's MIOperandInfo DAG into suboperands"),
+ cl::cat(InstrInfoEmitterCat), cl::init(true));
+
namespace {
class InstrInfoEmitter {
@@ -379,6 +385,9 @@ void InstrInfoEmitter::emitOperandTypeMappings(
OS << "namespace " << Namespace << " {\n";
OS << "LLVM_READONLY\n";
OS << "static int getOperandType(uint16_t Opcode, uint16_t OpIdx) {\n";
+ auto getInstrName = [&](int I) -> StringRef {
+ return NumberedInstructions[I]->TheDef->getName();
+ };
// TODO: Factor out duplicate operand lists to compress the tables.
if (!NumberedInstructions.empty()) {
std::vector<int> OperandOffsets;
@@ -388,7 +397,7 @@ void InstrInfoEmitter::emitOperandTypeMappings(
OperandOffsets.push_back(CurrentOffset);
for (const auto &Op : Inst->Operands) {
const DagInit *MIOI = Op.MIOperandInfo;
- if (!MIOI || MIOI->getNumArgs() == 0) {
+ if (!ExpandMIOperandInfo || !MIOI || MIOI->getNumArgs() == 0) {
// Single, anonymous, operand.
OperandRecords.push_back(Op.Rec);
++CurrentOffset;
@@ -408,8 +417,10 @@ void InstrInfoEmitter::emitOperandTypeMappings(
OS << ((OperandRecords.size() <= UINT16_MAX) ? " const uint16_t"
: " const uint32_t");
OS << " Offsets[] = {\n";
- for (int I = 0, E = OperandOffsets.size(); I != E; ++I)
+ for (int I = 0, E = OperandOffsets.size(); I != E; ++I) {
+ OS << " /* " << getInstrName(I) << " */\n";
OS << " " << OperandOffsets[I] << ",\n";
+ }
OS << " };\n";
// Add an entry for the end so that we don't need to special case it below.
@@ -419,22 +430,22 @@ void InstrInfoEmitter::emitOperandTypeMappings(
// Size the signed integer operand type to save space.
assert(EnumVal <= INT16_MAX &&
"Too many operand types for operand types table");
+ OS << "\n using namespace OpTypes;\n";
OS << ((EnumVal <= INT8_MAX) ? " const int8_t" : " const int16_t");
OS << " OpcodeOperandTypes[] = {\n ";
- for (int I = 0, E = OperandRecords.size(), CurOffset = 1; I != E; ++I) {
+ for (int I = 0, E = OperandRecords.size(), CurOffset = 0; I != E; ++I) {
// We print each Opcode's operands in its own row.
if (I == OperandOffsets[CurOffset]) {
- OS << "\n ";
- // If there are empty rows, mark them with an empty comment.
+ OS << "\n /* " << getInstrName(CurOffset) << " */\n ";
while (OperandOffsets[++CurOffset] == I)
- OS << "/**/\n ";
+ OS << "/* " << getInstrName(CurOffset) << " */\n ";
}
Record *OpR = OperandRecords[I];
if ((OpR->isSubClassOf("Operand") ||
OpR->isSubClassOf("RegisterOperand") ||
OpR->isSubClassOf("RegisterClass")) &&
!OpR->isAnonymous())
- OS << "OpTypes::" << OpR->getName();
+ OS << OpR->getName();
else
OS << -1;
OS << ", ";
@@ -449,6 +460,31 @@ void InstrInfoEmitter::emitOperandTypeMappings(
OS << "} // end namespace " << Namespace << "\n";
OS << "} // end namespace llvm\n";
OS << "#endif // GET_INSTRINFO_OPERAND_TYPE\n\n";
+
+ OS << "#ifdef GET_INSTRINFO_MEM_OPERAND_SIZE\n";
+ OS << "#undef GET_INSTRINFO_MEM_OPERAND_SIZE\n";
+ OS << "namespace llvm {\n";
+ OS << "namespace " << Namespace << " {\n";
+ OS << "LLVM_READONLY\n";
+ OS << "static int getMemOperandSize(int OpType) {\n";
+ OS << " switch (OpType) {\n";
+ std::map<int, std::vector<StringRef>> SizeToOperandName;
+ for (const Record *Op : Operands) {
+ if (!Op->isSubClassOf("X86MemOperand"))
+ continue;
+ if (int Size = Op->getValueAsInt("Size"))
+ SizeToOperandName[Size].push_back(Op->getName());
+ }
+ OS << " default: return 0;\n";
+ for (auto KV : SizeToOperandName) {
+ for (const StringRef &OperandName : KV.second)
+ OS << " case OpTypes::" << OperandName << ":\n";
+ OS << " return " << KV.first << ";\n\n";
+ }
+ OS << " }\n}\n";
+ OS << "} // end namespace " << Namespace << "\n";
+ OS << "} // end namespace llvm\n";
+ OS << "#endif // GET_INSTRINFO_MEM_OPERAND_SIZE\n\n";
}
void InstrInfoEmitter::emitLogicalOperandSizeMappings(
@@ -943,6 +979,7 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
// Emit all of the target independent flags...
if (Inst.isPreISelOpcode) OS << "|(1ULL<<MCID::PreISelOpcode)";
if (Inst.isPseudo) OS << "|(1ULL<<MCID::Pseudo)";
+ if (Inst.isMeta) OS << "|(1ULL<<MCID::Meta)";
if (Inst.isReturn) OS << "|(1ULL<<MCID::Return)";
if (Inst.isEHScopeReturn) OS << "|(1ULL<<MCID::EHScopeReturn)";
if (Inst.isBranch) OS << "|(1ULL<<MCID::Branch)";
diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp
index a5aa4069e60f..fca2bc34e09a 100644
--- a/llvm/utils/TableGen/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp
@@ -46,7 +46,7 @@ public:
raw_ostream &OS);
void EmitGenerator(const CodeGenIntrinsicTable &Ints, raw_ostream &OS);
void EmitAttributes(const CodeGenIntrinsicTable &Ints, raw_ostream &OS);
- void EmitIntrinsicToBuiltinMap(const CodeGenIntrinsicTable &Ints, bool IsGCC,
+ void EmitIntrinsicToBuiltinMap(const CodeGenIntrinsicTable &Ints, bool IsClang,
raw_ostream &OS);
};
} // End anonymous namespace
@@ -196,25 +196,25 @@ void IntrinsicEmitter::EmitIntrinsicToOverloadTable(
enum IIT_Info {
// Common values should be encoded with 0-15.
IIT_Done = 0,
- IIT_I1 = 1,
- IIT_I8 = 2,
- IIT_I16 = 3,
- IIT_I32 = 4,
- IIT_I64 = 5,
- IIT_F16 = 6,
- IIT_F32 = 7,
- IIT_F64 = 8,
- IIT_V2 = 9,
- IIT_V4 = 10,
- IIT_V8 = 11,
- IIT_V16 = 12,
- IIT_V32 = 13,
- IIT_PTR = 14,
- IIT_ARG = 15,
+ IIT_I1 = 1,
+ IIT_I8 = 2,
+ IIT_I16 = 3,
+ IIT_I32 = 4,
+ IIT_I64 = 5,
+ IIT_F16 = 6,
+ IIT_F32 = 7,
+ IIT_F64 = 8,
+ IIT_V2 = 9,
+ IIT_V4 = 10,
+ IIT_V8 = 11,
+ IIT_V16 = 12,
+ IIT_V32 = 13,
+ IIT_PTR = 14,
+ IIT_ARG = 15,
// Values from 16+ are only encodable with the inefficient encoding.
- IIT_V64 = 16,
- IIT_MMX = 17,
+ IIT_V64 = 16,
+ IIT_MMX = 17,
IIT_TOKEN = 18,
IIT_METADATA = 19,
IIT_EMPTYSTRUCT = 20,
@@ -225,7 +225,7 @@ enum IIT_Info {
IIT_EXTEND_ARG = 25,
IIT_TRUNC_ARG = 26,
IIT_ANYPTR = 27,
- IIT_V1 = 28,
+ IIT_V1 = 28,
IIT_VARARG = 29,
IIT_HALF_VEC_ARG = 30,
IIT_SAME_VEC_WIDTH_ARG = 31,
@@ -248,20 +248,26 @@ enum IIT_Info {
IIT_BF16 = 48,
IIT_STRUCT9 = 49,
IIT_V256 = 50,
- IIT_AMX = 51,
+ IIT_AMX = 51,
IIT_PPCF128 = 52,
IIT_V3 = 53,
IIT_EXTERNREF = 54,
- IIT_FUNCREF = 55
+ IIT_FUNCREF = 55,
+ IIT_ANYPTR_TO_ELT = 56,
+ IIT_I2 = 57,
+ IIT_I4 = 58,
};
static void EncodeFixedValueType(MVT::SimpleValueType VT,
std::vector<unsigned char> &Sig) {
+ // clang-format off
if (MVT(VT).isInteger()) {
unsigned BitWidth = MVT(VT).getFixedSizeInBits();
switch (BitWidth) {
default: PrintFatalError("unhandled integer type width in intrinsic!");
case 1: return Sig.push_back(IIT_I1);
+ case 2: return Sig.push_back(IIT_I2);
+ case 4: return Sig.push_back(IIT_I4);
case 8: return Sig.push_back(IIT_I8);
case 16: return Sig.push_back(IIT_I16);
case 32: return Sig.push_back(IIT_I32);
@@ -291,6 +297,7 @@ static void EncodeFixedValueType(MVT::SimpleValueType VT,
case MVT::funcref:
return Sig.push_back(IIT_FUNCREF);
}
+ // clang-format on
}
#if defined(_MSC_VER) && !defined(__clang__)
@@ -327,6 +334,13 @@ static void EncodeFixedType(Record *R, std::vector<unsigned char> &ArgCodes,
// Encode LLVMMatchType<Number> ArgNo
Sig.push_back(Number);
return;
+ } else if (R->isSubClassOf("LLVMAnyPointerToElt")) {
+ Sig.push_back(IIT_ANYPTR_TO_ELT);
+ // Encode overloaded ArgNo
+ Sig.push_back(NextArgCode++);
+ // Encode LLVMMatchType<Number> ArgNo
+ Sig.push_back(Number);
+ return;
} else if (R->isSubClassOf("LLVMPointerToElt"))
Sig.push_back(IIT_PTR_TO_ELT);
else if (R->isSubClassOf("LLVMVectorElementType"))
@@ -415,6 +429,9 @@ static void UpdateArgCodes(Record *R, std::vector<unsigned char> &ArgCodes,
if (R->isSubClassOf("LLVMVectorOfAnyPointersToElt")) {
ArgCodes.push_back(3 /*vAny*/);
++NumInserted;
+ } else if (R->isSubClassOf("LLVMAnyPointerToElt")) {
+ ArgCodes.push_back(4 /*iPTRAny*/);
+ ++NumInserted;
}
return;
}
@@ -599,6 +616,9 @@ struct AttributeComparator {
if (L->isNoReturn != R->isNoReturn)
return R->isNoReturn;
+ if (L->isNoCallback != R->isNoCallback)
+ return R->isNoCallback;
+
if (L->isNoSync != R->isNoSync)
return R->isNoSync;
@@ -748,16 +768,18 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
if (!Intrinsic.canThrow ||
(Intrinsic.ModRef != CodeGenIntrinsic::ReadWriteMem &&
!Intrinsic.hasSideEffects) ||
- Intrinsic.isNoReturn || Intrinsic.isNoSync || Intrinsic.isNoFree ||
- Intrinsic.isWillReturn || Intrinsic.isCold || Intrinsic.isNoDuplicate ||
- Intrinsic.isNoMerge || Intrinsic.isConvergent ||
- Intrinsic.isSpeculatable) {
+ Intrinsic.isNoReturn || Intrinsic.isNoCallback || Intrinsic.isNoSync ||
+ Intrinsic.isNoFree || Intrinsic.isWillReturn || Intrinsic.isCold ||
+ Intrinsic.isNoDuplicate || Intrinsic.isNoMerge ||
+ Intrinsic.isConvergent || Intrinsic.isSpeculatable) {
OS << " const Attribute::AttrKind Atts[] = {";
ListSeparator LS(",");
if (!Intrinsic.canThrow)
OS << LS << "Attribute::NoUnwind";
if (Intrinsic.isNoReturn)
OS << LS << "Attribute::NoReturn";
+ if (Intrinsic.isNoCallback)
+ OS << LS << "Attribute::NoCallback";
if (Intrinsic.isNoSync)
OS << LS << "Attribute::NoSync";
if (Intrinsic.isNoFree)
@@ -858,14 +880,15 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
}
void IntrinsicEmitter::EmitIntrinsicToBuiltinMap(
- const CodeGenIntrinsicTable &Ints, bool IsGCC, raw_ostream &OS) {
- StringRef CompilerName = (IsGCC ? "GCC" : "MS");
+ const CodeGenIntrinsicTable &Ints, bool IsClang, raw_ostream &OS) {
+ StringRef CompilerName = (IsClang ? "Clang" : "MS");
+ StringRef UpperCompilerName = (IsClang ? "CLANG" : "MS");
typedef std::map<std::string, std::map<std::string, std::string>> BIMTy;
BIMTy BuiltinMap;
StringToOffsetTable Table;
for (unsigned i = 0, e = Ints.size(); i != e; ++i) {
const std::string &BuiltinName =
- IsGCC ? Ints[i].GCCBuiltinName : Ints[i].MSBuiltinName;
+ IsClang ? Ints[i].ClangBuiltinName : Ints[i].MSBuiltinName;
if (!BuiltinName.empty()) {
// Get the map for this target prefix.
std::map<std::string, std::string> &BIM =
@@ -883,7 +906,7 @@ void IntrinsicEmitter::EmitIntrinsicToBuiltinMap(
OS << "// This is used by the C front-end. The builtin name is passed\n";
OS << "// in as BuiltinName, and a target prefix (e.g. 'ppc') is passed\n";
OS << "// in as TargetPrefix. The result is assigned to 'IntrinsicID'.\n";
- OS << "#ifdef GET_LLVM_INTRINSIC_FOR_" << CompilerName << "_BUILTIN\n";
+ OS << "#ifdef GET_LLVM_INTRINSIC_FOR_" << UpperCompilerName << "_BUILTIN\n";
OS << "Intrinsic::ID Intrinsic::getIntrinsicFor" << CompilerName
<< "Builtin(const char "
diff --git a/llvm/utils/TableGen/OptParserEmitter.cpp b/llvm/utils/TableGen/OptParserEmitter.cpp
index d54132f3190b..182cd0076090 100644
--- a/llvm/utils/TableGen/OptParserEmitter.cpp
+++ b/llvm/utils/TableGen/OptParserEmitter.cpp
@@ -172,7 +172,7 @@ static MarshallingInfo createMarshallingInfo(const Record &R) {
Ret.NormalizedValuesScope = R.getValueAsString("NormalizedValuesScope");
Ret.ImpliedCheck = R.getValueAsString("ImpliedCheck");
Ret.ImpliedValue =
- R.getValueAsOptionalString("ImpliedValue").getValueOr(Ret.DefaultValue);
+ R.getValueAsOptionalString("ImpliedValue").value_or(Ret.DefaultValue);
Ret.ShouldParse = R.getValueAsString("ShouldParse");
Ret.Normalizer = R.getValueAsString("Normalizer");
diff --git a/llvm/utils/TableGen/OptRSTEmitter.cpp b/llvm/utils/TableGen/OptRSTEmitter.cpp
index 11d896229f5b..03c7326e817a 100644
--- a/llvm/utils/TableGen/OptRSTEmitter.cpp
+++ b/llvm/utils/TableGen/OptRSTEmitter.cpp
@@ -60,18 +60,43 @@ void EmitOptRST(RecordKeeper &Records, raw_ostream &OS) {
// Print the option name.
OS << R->getValueAsString("Name");
+ StringRef MetaVarName;
// Print the meta-variable.
if (!isa<UnsetInit>(R->getValueInit("MetaVarName"))) {
+ MetaVarName = R->getValueAsString("MetaVarName");
+ } else if (!isa<UnsetInit>(R->getValueInit("Values")))
+ MetaVarName = "<value>";
+
+ if (!MetaVarName.empty()) {
OS << '=';
- OS.write_escaped(R->getValueAsString("MetaVarName"));
+ OS.write_escaped(MetaVarName);
}
OS << "\n\n";
+ std::string HelpText;
// The option help text.
if (!isa<UnsetInit>(R->getValueInit("HelpText"))) {
+ HelpText = R->getValueAsString("HelpText").trim().str();
+ if (!HelpText.empty() && HelpText.back() != '.')
+ HelpText.push_back('.');
+ }
+
+ if (!isa<UnsetInit>(R->getValueInit("Values"))) {
+ SmallVector<StringRef> Values;
+ SplitString(R->getValueAsString("Values"), Values, ",");
+ HelpText += (" " + MetaVarName + " must be '").str();
+
+ if (Values.size() > 1) {
+ HelpText += join(Values.begin(), Values.end() - 1, "', '");
+ HelpText += "' or '";
+ }
+ HelpText += (Values.front() + "'.").str();
+ }
+
+ if (!HelpText.empty()) {
OS << ' ';
- OS.write_escaped(R->getValueAsString("HelpText"));
+ OS.write_escaped(HelpText);
OS << "\n\n";
}
}
diff --git a/llvm/utils/TableGen/PseudoLoweringEmitter.cpp b/llvm/utils/TableGen/PseudoLoweringEmitter.cpp
index 6acb630299c1..dc04174217fb 100644
--- a/llvm/utils/TableGen/PseudoLoweringEmitter.cpp
+++ b/llvm/utils/TableGen/PseudoLoweringEmitter.cpp
@@ -109,7 +109,8 @@ addDagOperandMapping(Record *Rec, DagInit *Dag, CodeGenInstruction &Insn,
OperandMap[BaseIdx + i].Data.Imm = II->getValue();
++OpsAdded;
} else if (auto *BI = dyn_cast<BitsInit>(Dag->getArg(i))) {
- auto *II = cast<IntInit>(BI->convertInitializerTo(IntRecTy::get()));
+ auto *II =
+ cast<IntInit>(BI->convertInitializerTo(IntRecTy::get(Records)));
OperandMap[BaseIdx + i].Kind = OpData::Imm;
OperandMap[BaseIdx + i].Data.Imm = II->getValue();
++OpsAdded;
diff --git a/llvm/utils/TableGen/RegisterBankEmitter.cpp b/llvm/utils/TableGen/RegisterBankEmitter.cpp
index d97d7acb87a7..e6689b211a7d 100644
--- a/llvm/utils/TableGen/RegisterBankEmitter.cpp
+++ b/llvm/utils/TableGen/RegisterBankEmitter.cpp
@@ -172,9 +172,8 @@ static void visitRegisterBankClasses(
SmallPtrSetImpl<const CodeGenRegisterClass *> &VisitedRCs) {
// Make sure we only visit each class once to avoid infinite loops.
- if (VisitedRCs.count(RC))
+ if (!VisitedRCs.insert(RC).second)
return;
- VisitedRCs.insert(RC);
// Visit each explicitly named class.
VisitFn(RC, Kind.str());
@@ -266,9 +265,8 @@ void RegisterBankEmitter::emitBaseClassImplementation(
<< "::NumRegisterBanks) {\n"
<< " // Assert that RegBank indices match their ID's\n"
<< "#ifndef NDEBUG\n"
- << " unsigned Index = 0;\n"
- << " for (const auto &RB : RegBanks)\n"
- << " assert(Index++ == RB->getID() && \"Index != ID\");\n"
+ << " for (auto RB : enumerate(RegBanks))\n"
+ << " assert(RB.index() == RB.value()->getID() && \"Index != ID\");\n"
<< "#endif // NDEBUG\n"
<< "}\n"
<< "} // end namespace llvm\n";
diff --git a/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
index 1ed7bc103f9c..3a0fa564074e 100644
--- a/llvm/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
@@ -268,7 +268,7 @@ EmitRegUnitPressure(raw_ostream &OS, const CodeGenRegBank &RegBank,
OS << "// Get the name of this register unit pressure set.\n"
<< "const char *" << ClassName << "::\n"
<< "getRegPressureSetName(unsigned Idx) const {\n"
- << " static const char *const PressureNameTable[] = {\n";
+ << " static const char *PressureNameTable[] = {\n";
unsigned MaxRegUnitWeight = 0;
for (unsigned i = 0; i < NumSets; ++i ) {
const RegUnitSet &RegUnits = RegBank.getRegSetAt(i);
@@ -753,7 +753,7 @@ RegisterInfoEmitter::emitComposeSubRegIndices(raw_ostream &OS,
}
OS << " };\n\n";
- OS << " --IdxA; assert(IdxA < " << SubRegIndicesSize << ");\n"
+ OS << " --IdxA; assert(IdxA < " << SubRegIndicesSize << "); (void) IdxA;\n"
<< " --IdxB; assert(IdxB < " << SubRegIndicesSize << ");\n";
if (Rows.size() > 1)
OS << " return Rows[RowMap[IdxA]][IdxB];\n";
@@ -814,12 +814,14 @@ RegisterInfoEmitter::emitComposeSubRegIndexLaneMask(raw_ostream &OS,
OS << " // Sequence " << Idx << "\n";
Idx += Sequence.size() + 1;
}
+ auto *IntType = getMinimalTypeForRange(*std::max_element(
+ SubReg2SequenceIndexMap.begin(), SubReg2SequenceIndexMap.end()));
OS << " };\n"
- " static const MaskRolOp *const CompositeSequences[] = {\n";
+ " static const "
+ << IntType << " CompositeSequences[] = {\n";
for (size_t i = 0, e = SubRegIndices.size(); i != e; ++i) {
OS << " ";
- unsigned Idx = SubReg2SequenceIndexMap[i];
- OS << format("&LaneMaskComposeSequences[%u]", Idx);
+ OS << SubReg2SequenceIndexMap[i];
if (i+1 != e)
OS << ",";
OS << " // to " << SubRegIndices[i].getName() << "\n";
@@ -832,7 +834,9 @@ RegisterInfoEmitter::emitComposeSubRegIndexLaneMask(raw_ostream &OS,
" --IdxA; assert(IdxA < " << SubRegIndices.size()
<< " && \"Subregister index out of bounds\");\n"
" LaneBitmask Result;\n"
- " for (const MaskRolOp *Ops = CompositeSequences[IdxA]; Ops->Mask.any(); ++Ops) {\n"
+ " for (const MaskRolOp *Ops =\n"
+ " &LaneMaskComposeSequences[CompositeSequences[IdxA]];\n"
+ " Ops->Mask.any(); ++Ops) {\n"
" LaneBitmask::Type M = LaneMask.getAsInteger() & Ops->Mask.getAsInteger();\n"
" if (unsigned S = Ops->RotateLeft)\n"
" Result |= LaneBitmask((M << S) | (M >> (LaneBitmask::BitWidth - S)));\n"
@@ -849,7 +853,9 @@ RegisterInfoEmitter::emitComposeSubRegIndexLaneMask(raw_ostream &OS,
" --IdxA; assert(IdxA < " << SubRegIndices.size()
<< " && \"Subregister index out of bounds\");\n"
" LaneBitmask Result;\n"
- " for (const MaskRolOp *Ops = CompositeSequences[IdxA]; Ops->Mask.any(); ++Ops) {\n"
+ " for (const MaskRolOp *Ops =\n"
+ " &LaneMaskComposeSequences[CompositeSequences[IdxA]];\n"
+ " Ops->Mask.any(); ++Ops) {\n"
" LaneBitmask::Type M = LaneMask.getAsInteger();\n"
" if (unsigned S = Ops->RotateLeft)\n"
" Result |= LaneBitmask((M >> S) | (M << (LaneBitmask::BitWidth - S)));\n"
@@ -1046,25 +1052,24 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
RegClassStrings.add(Name);
- // Emit the register list now.
- OS << " // " << Name << " Register Class...\n"
- << " const MCPhysReg " << Name
- << "[] = {\n ";
- for (Record *Reg : Order) {
- OS << getQualifiedName(Reg) << ", ";
- }
- OS << "\n };\n\n";
-
- OS << " // " << Name << " Bit set.\n"
- << " const uint8_t " << Name
- << "Bits[] = {\n ";
- BitVectorEmitter BVE;
- for (Record *Reg : Order) {
- BVE.add(Target.getRegBank().getReg(Reg)->EnumValue);
- }
- BVE.print(OS);
- OS << "\n };\n\n";
+ // Emit the register list now (unless it would be a zero-length array).
+ if (!Order.empty()) {
+ OS << " // " << Name << " Register Class...\n"
+ << " const MCPhysReg " << Name << "[] = {\n ";
+ for (Record *Reg : Order) {
+ OS << getQualifiedName(Reg) << ", ";
+ }
+ OS << "\n };\n\n";
+ OS << " // " << Name << " Bit set.\n"
+ << " const uint8_t " << Name << "Bits[] = {\n ";
+ BitVectorEmitter BVE;
+ for (Record *Reg : Order) {
+ BVE.add(Target.getRegBank().getReg(Reg)->EnumValue);
+ }
+ BVE.print(OS);
+ OS << "\n };\n\n";
+ }
}
OS << "} // end anonymous namespace\n\n";
@@ -1076,14 +1081,17 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
<< "MCRegisterClasses[] = {\n";
for (const auto &RC : RegisterClasses) {
+ ArrayRef<Record *> Order = RC.getOrder();
+ std::string RCName = Order.empty() ? "nullptr" : RC.getName();
+ std::string RCBitsName = Order.empty() ? "nullptr" : RC.getName() + "Bits";
+ std::string RCBitsSize = Order.empty() ? "0" : "sizeof(" + RCBitsName + ")";
assert(isInt<8>(RC.CopyCost) && "Copy cost too large.");
uint32_t RegSize = 0;
if (RC.RSI.isSimple())
RegSize = RC.RSI.getSimple().RegSize;
- OS << " { " << RC.getName() << ", " << RC.getName() << "Bits, "
+ OS << " { " << RCName << ", " << RCBitsName << ", "
<< RegClassStrings.get(RC.getName()) << ", " << RC.getOrder().size()
- << ", sizeof(" << RC.getName() << "Bits), "
- << RC.getQualifiedName() + "RegClassID"
+ << ", " << RCBitsSize << ", " << RC.getQualifiedName() + "RegClassID"
<< ", " << RegSize << ", " << RC.CopyCost << ", "
<< (RC.Allocatable ? "true" : "false") << " },\n";
}
@@ -1176,6 +1184,12 @@ RegisterInfoEmitter::runTargetHeader(raw_ostream &OS, CodeGenTarget &Target,
<< "unsigned RegUnit) const override;\n"
<< " ArrayRef<const char *> getRegMaskNames() const override;\n"
<< " ArrayRef<const uint32_t *> getRegMasks() const override;\n"
+ << " bool isGeneralPurposeRegister(const MachineFunction &, "
+ << "MCRegister) const override;\n"
+ << " bool isFixedRegister(const MachineFunction &, "
+ << "MCRegister) const override;\n"
+ << " bool isArgumentRegister(const MachineFunction &, "
+ << "MCRegister) const override;\n"
<< " /// Devirtualized TargetFrameLowering.\n"
<< " static const " << TargetName << "FrameLowering *getFrameLowering(\n"
<< " const MachineFunction &MF);\n"
@@ -1250,7 +1264,7 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
OS << "};\n";
// Emit SubRegIndex names, skipping 0.
- OS << "\nstatic const char *const SubRegIndexNameTable[] = { \"";
+ OS << "\nstatic const char *SubRegIndexNameTable[] = { \"";
for (const auto &Idx : SubRegIndices) {
OS << Idx.getName();
@@ -1620,10 +1634,54 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
}
OS << "}\n\n";
+ const std::list<CodeGenRegisterCategory> &RegCategories =
+ RegBank.getRegCategories();
+ OS << "bool " << ClassName << "::\n"
+ << "isGeneralPurposeRegister(const MachineFunction &MF, "
+ << "MCRegister PhysReg) const {\n"
+ << " return\n";
+ for (const CodeGenRegisterCategory &Category : RegCategories)
+ if (Category.getName() == "GeneralPurposeRegisters") {
+ for (const CodeGenRegisterClass *RC : Category.getClasses())
+ OS << " " << RC->getQualifiedName()
+ << "RegClass.contains(PhysReg) ||\n";
+ break;
+ }
+ OS << " false;\n";
+ OS << "}\n\n";
+
+ OS << "bool " << ClassName << "::\n"
+ << "isFixedRegister(const MachineFunction &MF, "
+ << "MCRegister PhysReg) const {\n"
+ << " return\n";
+ for (const CodeGenRegisterCategory &Category : RegCategories)
+ if (Category.getName() == "FixedRegisters") {
+ for (const CodeGenRegisterClass *RC : Category.getClasses())
+ OS << " " << RC->getQualifiedName()
+ << "RegClass.contains(PhysReg) ||\n";
+ break;
+ }
+ OS << " false;\n";
+ OS << "}\n\n";
+
+ OS << "bool " << ClassName << "::\n"
+ << "isArgumentRegister(const MachineFunction &MF, "
+ << "MCRegister PhysReg) const {\n"
+ << " return\n";
+ for (const CodeGenRegisterCategory &Category : RegCategories)
+ if (Category.getName() == "ArgumentRegisters") {
+ for (const CodeGenRegisterClass *RC : Category.getClasses())
+ OS << " " << RC->getQualifiedName()
+ << "RegClass.contains(PhysReg) ||\n";
+ break;
+ }
+ OS << " false;\n";
+ OS << "}\n\n";
+
OS << "ArrayRef<const char *> " << ClassName
<< "::getRegMaskNames() const {\n";
if (!CSRSets.empty()) {
- OS << " static const char *const Names[] = {\n";
+ OS << " static const char *Names[] = {\n";
for (Record *CSRSet : CSRSets)
OS << " " << '"' << CSRSet->getName() << '"' << ",\n";
OS << " };\n";
@@ -1683,6 +1741,8 @@ void RegisterInfoEmitter::debugDump(raw_ostream &OS) {
OS << "\tLaneMask: " << PrintLaneMask(RC.LaneMask) << '\n';
OS << "\tHasDisjunctSubRegs: " << RC.HasDisjunctSubRegs << '\n';
OS << "\tCoveredBySubRegs: " << RC.CoveredBySubRegs << '\n';
+ OS << "\tAllocatable: " << RC.Allocatable << '\n';
+ OS << "\tAllocationPriority: " << unsigned(RC.AllocationPriority) << '\n';
OS << "\tRegs:";
for (const CodeGenRegister *R : RC.getMembers()) {
OS << " " << R->getName();
diff --git a/llvm/utils/TableGen/SearchableTableEmitter.cpp b/llvm/utils/TableGen/SearchableTableEmitter.cpp
index dc5c96c662be..ea849807de03 100644
--- a/llvm/utils/TableGen/SearchableTableEmitter.cpp
+++ b/llvm/utils/TableGen/SearchableTableEmitter.cpp
@@ -30,7 +30,9 @@ using namespace llvm;
namespace {
int getAsInt(Init *B) {
- return cast<IntInit>(B->convertInitializerTo(IntRecTy::get()))->getValue();
+ return cast<IntInit>(
+ B->convertInitializerTo(IntRecTy::get(B->getRecordKeeper())))
+ ->getValue();
}
int getInt(Record *R, StringRef Field) {
return getAsInt(R->getValueInit(Field));
diff --git a/llvm/utils/TableGen/SequenceToOffsetTable.h b/llvm/utils/TableGen/SequenceToOffsetTable.h
index 41cdefdb1949..1b3451c24cb0 100644
--- a/llvm/utils/TableGen/SequenceToOffsetTable.h
+++ b/llvm/utils/TableGen/SequenceToOffsetTable.h
@@ -170,18 +170,18 @@ public:
/// `EmitLongStrLiterals` is false
void emitStringLiteralDef(raw_ostream &OS, const llvm::Twine &Decl) const {
assert(Entries && "Call layout() before emitStringLiteralDef()");
- if (EmitLongStrLiterals) {
- OS << "\n#ifdef __GNUC__\n"
- << "#pragma GCC diagnostic push\n"
- << "#pragma GCC diagnostic ignored \"-Woverlength-strings\"\n"
- << "#endif\n"
- << Decl << " = {\n";
- } else {
+ if (!EmitLongStrLiterals) {
OS << Decl << " = {\n";
emit(OS, printChar, "0");
- OS << "\n};\n\n";
+ OS << " 0\n};\n\n";
return;
}
+
+ OS << "\n#ifdef __GNUC__\n"
+ << "#pragma GCC diagnostic push\n"
+ << "#pragma GCC diagnostic ignored \"-Woverlength-strings\"\n"
+ << "#endif\n"
+ << Decl << " = {\n";
for (auto I : Seqs) {
OS << " /* " << I.second << " */ \"";
for (auto C : I.first) {
diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp
index 78bbb3196e5c..88827607b517 100644
--- a/llvm/utils/TableGen/SubtargetEmitter.cpp
+++ b/llvm/utils/TableGen/SubtargetEmitter.cpp
@@ -74,6 +74,7 @@ class SubtargetEmitter {
std::string Target;
void Enumeration(raw_ostream &OS, DenseMap<Record *, unsigned> &FeatureMap);
+ void EmitSubtargetInfoMacroCalls(raw_ostream &OS);
unsigned FeatureKeyValues(raw_ostream &OS,
const DenseMap<Record *, unsigned> &FeatureMap);
unsigned CPUKeyValues(raw_ostream &OS,
@@ -122,8 +123,7 @@ class SubtargetEmitter {
void EmitSchedModel(raw_ostream &OS);
void EmitHwModeCheck(const std::string &ClassName, raw_ostream &OS);
- void ParseFeaturesFunction(raw_ostream &OS, unsigned NumFeatures,
- unsigned NumProcs);
+ void ParseFeaturesFunction(raw_ostream &OS);
public:
SubtargetEmitter(RecordKeeper &R, CodeGenTarget &TGT)
@@ -193,6 +193,42 @@ static void printFeatureMask(raw_ostream &OS, RecVec &FeatureList,
OS << "} } }";
}
+/// Emit some information about the SubtargetFeature as calls to a macro so
+/// that they can be used from C++.
+void SubtargetEmitter::EmitSubtargetInfoMacroCalls(raw_ostream &OS) {
+ OS << "\n#ifdef GET_SUBTARGETINFO_MACRO\n";
+
+ std::vector<Record *> FeatureList =
+ Records.getAllDerivedDefinitions("SubtargetFeature");
+ llvm::sort(FeatureList, LessRecordFieldName());
+
+ for (const Record *Feature : FeatureList) {
+ const StringRef Attribute = Feature->getValueAsString("Attribute");
+ const StringRef Value = Feature->getValueAsString("Value");
+
+ // Only handle boolean features for now, excluding BitVectors and enums.
+ const bool IsBool = (Value == "false" || Value == "true") &&
+ !StringRef(Attribute).contains('[');
+ if (!IsBool)
+ continue;
+
+ // Some features default to true, with values set to false if enabled.
+ const char *Default = Value == "false" ? "true" : "false";
+
+ // Define the getter with lowercased first char: xxxYyy() { return XxxYyy; }
+ const std::string Getter =
+ Attribute.substr(0, 1).lower() + Attribute.substr(1).str();
+
+ OS << "GET_SUBTARGETINFO_MACRO(" << Attribute << ", " << Default << ", "
+ << Getter << ")\n";
+ }
+ OS << "#undef GET_SUBTARGETINFO_MACRO\n";
+ OS << "#endif // GET_SUBTARGETINFO_MACRO\n\n";
+
+ OS << "\n#ifdef GET_SUBTARGETINFO_MC_DESC\n";
+ OS << "#undef GET_SUBTARGETINFO_MC_DESC\n\n";
+}
+
//
// FeatureKeyValues - Emit data of all the subtarget features. Used by the
// command line.
@@ -1681,13 +1717,9 @@ void SubtargetEmitter::EmitHwModeCheck(const std::string &ClassName,
OS << " return 0;\n}\n";
}
-//
-// ParseFeaturesFunction - Produces a subtarget specific function for parsing
+// Produces a subtarget specific function for parsing
// the subtarget features string.
-//
-void SubtargetEmitter::ParseFeaturesFunction(raw_ostream &OS,
- unsigned NumFeatures,
- unsigned NumProcs) {
+void SubtargetEmitter::ParseFeaturesFunction(raw_ostream &OS) {
std::vector<Record*> Features =
Records.getAllDerivedDefinitions("SubtargetFeature");
llvm::sort(Features, LessRecord());
@@ -1803,8 +1835,7 @@ void SubtargetEmitter::run(raw_ostream &OS) {
OS << "} // end namespace llvm\n\n";
OS << "#endif // GET_SUBTARGETINFO_ENUM\n\n";
- OS << "\n#ifdef GET_SUBTARGETINFO_MC_DESC\n";
- OS << "#undef GET_SUBTARGETINFO_MC_DESC\n\n";
+ EmitSubtargetInfoMacroCalls(OS);
OS << "namespace llvm {\n";
#if 0
@@ -1858,7 +1889,7 @@ void SubtargetEmitter::run(raw_ostream &OS) {
OS << "#include \"llvm/Support/Debug.h\"\n";
OS << "#include \"llvm/Support/raw_ostream.h\"\n\n";
- ParseFeaturesFunction(OS, NumFeatures, NumProcs);
+ ParseFeaturesFunction(OS);
OS << "#endif // GET_SUBTARGETINFO_TARGET_DESC\n\n";
diff --git a/llvm/utils/TableGen/SubtargetFeatureInfo.cpp b/llvm/utils/TableGen/SubtargetFeatureInfo.cpp
index 33a22776f2df..f4f360fb5be2 100644
--- a/llvm/utils/TableGen/SubtargetFeatureInfo.cpp
+++ b/llvm/utils/TableGen/SubtargetFeatureInfo.cpp
@@ -108,6 +108,39 @@ void SubtargetFeatureInfo::emitComputeAvailableFeatures(
OS << "}\n\n";
}
+// If ParenIfBinOp is true, print a surrounding () if Val uses && or ||.
+static bool emitFeaturesAux(StringRef TargetName, const Init &Val,
+ bool ParenIfBinOp, raw_ostream &OS) {
+ if (auto *D = dyn_cast<DefInit>(&Val)) {
+ if (!D->getDef()->isSubClassOf("SubtargetFeature"))
+ return true;
+ OS << "FB[" << TargetName << "::" << D->getAsString() << "]";
+ return false;
+ }
+ if (auto *D = dyn_cast<DagInit>(&Val)) {
+ std::string Op = D->getOperator()->getAsString();
+ if (Op == "not" && D->getNumArgs() == 1) {
+ OS << '!';
+ return emitFeaturesAux(TargetName, *D->getArg(0), true, OS);
+ }
+ if ((Op == "any_of" || Op == "all_of") && D->getNumArgs() > 0) {
+ bool Paren = D->getNumArgs() > 1 && std::exchange(ParenIfBinOp, true);
+ if (Paren)
+ OS << '(';
+ ListSeparator LS(Op == "any_of" ? " || " : " && ");
+ for (auto *Arg : D->getArgs()) {
+ OS << LS;
+ if (emitFeaturesAux(TargetName, *Arg, ParenIfBinOp, OS))
+ return true;
+ }
+ if (Paren)
+ OS << ')';
+ return false;
+ }
+ }
+ return true;
+}
+
void SubtargetFeatureInfo::emitComputeAssemblerAvailableFeatures(
StringRef TargetName, StringRef ClassName, StringRef FuncName,
SubtargetFeatureInfoMap &SubtargetFeatures, raw_ostream &OS) {
@@ -118,37 +151,8 @@ void SubtargetFeatureInfo::emitComputeAssemblerAvailableFeatures(
const SubtargetFeatureInfo &SFI = SF.second;
OS << " if (";
-
- const DagInit *D = SFI.TheDef->getValueAsDag("AssemblerCondDag");
- std::string CombineType = D->getOperator()->getAsString();
- if (CombineType != "any_of" && CombineType != "all_of")
- PrintFatalError(SFI.TheDef->getLoc(), "Invalid AssemblerCondDag!");
- if (D->getNumArgs() == 0)
- PrintFatalError(SFI.TheDef->getLoc(), "Invalid AssemblerCondDag!");
- bool IsOr = CombineType == "any_of";
-
- if (IsOr)
- OS << "(";
-
- ListSeparator LS(IsOr ? " || " : " && ");
- for (auto *Arg : D->getArgs()) {
- OS << LS;
- if (auto *NotArg = dyn_cast<DagInit>(Arg)) {
- if (NotArg->getOperator()->getAsString() != "not" ||
- NotArg->getNumArgs() != 1)
- PrintFatalError(SFI.TheDef->getLoc(), "Invalid AssemblerCondDag!");
- Arg = NotArg->getArg(0);
- OS << "!";
- }
- if (!isa<DefInit>(Arg) ||
- !cast<DefInit>(Arg)->getDef()->isSubClassOf("SubtargetFeature"))
- PrintFatalError(SFI.TheDef->getLoc(), "Invalid AssemblerCondDag!");
- OS << "FB[" << TargetName << "::" << Arg->getAsString() << "]";
- }
-
- if (IsOr)
- OS << ")";
-
+ emitFeaturesAux(TargetName, *SFI.TheDef->getValueAsDag("AssemblerCondDag"),
+ /*ParenIfBinOp=*/false, OS);
OS << ")\n";
OS << " Features.set(" << SFI.getEnumBitName() << ");\n";
}
diff --git a/llvm/utils/TableGen/TableGen.cpp b/llvm/utils/TableGen/TableGen.cpp
index 2d4a45f889be..efd641887232 100644
--- a/llvm/utils/TableGen/TableGen.cpp
+++ b/llvm/utils/TableGen/TableGen.cpp
@@ -25,7 +25,6 @@ enum ActionType {
NullBackend,
DumpJSON,
GenEmitter,
- GenCodeBeads,
GenRegisterInfo,
GenInstrInfo,
GenInstrDocs,
@@ -52,11 +51,13 @@ enum ActionType {
GenGICombiner,
GenX86EVEX2VEXTables,
GenX86FoldTables,
+ GenX86MnemonicTables,
GenRegisterBank,
GenExegesis,
GenAutomata,
GenDirectivesEnumDecl,
GenDirectivesEnumImpl,
+ GenDXILOperation,
};
namespace llvm {
@@ -81,8 +82,6 @@ cl::opt<ActionType> Action(
clEnumValN(DumpJSON, "dump-json",
"Dump all records as machine-readable JSON"),
clEnumValN(GenEmitter, "gen-emitter", "Generate machine code emitter"),
- clEnumValN(GenCodeBeads, "gen-code-beads",
- "Generate machine code beads"),
clEnumValN(GenRegisterInfo, "gen-register-info",
"Generate registers and register classes info"),
clEnumValN(GenInstrInfo, "gen-instr-info",
@@ -130,6 +129,8 @@ cl::opt<ActionType> Action(
"Generate X86 EVEX to VEX compress tables"),
clEnumValN(GenX86FoldTables, "gen-x86-fold-tables",
"Generate X86 fold tables"),
+ clEnumValN(GenX86MnemonicTables, "gen-x86-mnemonic-tables",
+ "Generate X86 mnemonic tables"),
clEnumValN(GenRegisterBank, "gen-register-bank",
"Generate registers bank descriptions"),
clEnumValN(GenExegesis, "gen-exegesis",
@@ -138,7 +139,9 @@ cl::opt<ActionType> Action(
clEnumValN(GenDirectivesEnumDecl, "gen-directive-decl",
"Generate directive related declaration code (header file)"),
clEnumValN(GenDirectivesEnumImpl, "gen-directive-impl",
- "Generate directive related implementation code")));
+ "Generate directive related implementation code"),
+ clEnumValN(GenDXILOperation, "gen-dxil-operation",
+ "Generate DXIL operation information")));
cl::OptionCategory PrintEnumsCat("Options for -print-enums");
cl::opt<std::string> Class("class", cl::desc("Print Enum list for this class"),
@@ -161,9 +164,6 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
case GenEmitter:
EmitCodeEmitter(Records, OS);
break;
- case GenCodeBeads:
- EmitCodeBeads(Records, OS);
- break;
case GenRegisterInfo:
EmitRegisterInfo(Records, OS);
break;
@@ -257,6 +257,9 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
case GenX86EVEX2VEXTables:
EmitX86EVEX2VEXTables(Records, OS);
break;
+ case GenX86MnemonicTables:
+ EmitX86MnemonicTables(Records, OS);
+ break;
case GenX86FoldTables:
EmitX86FoldTables(Records, OS);
break;
@@ -272,6 +275,9 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
case GenDirectivesEnumImpl:
EmitDirectivesImpl(Records, OS);
break;
+ case GenDXILOperation:
+ EmitDXILOperation(Records, OS);
+ break;
}
return false;
diff --git a/llvm/utils/TableGen/TableGenBackends.h b/llvm/utils/TableGen/TableGenBackends.h
index 71db8dc77b05..4dff13095696 100644
--- a/llvm/utils/TableGen/TableGenBackends.h
+++ b/llvm/utils/TableGen/TableGenBackends.h
@@ -67,7 +67,6 @@ void EmitAsmMatcher(RecordKeeper &RK, raw_ostream &OS);
void EmitAsmWriter(RecordKeeper &RK, raw_ostream &OS);
void EmitCallingConv(RecordKeeper &RK, raw_ostream &OS);
void EmitCodeEmitter(RecordKeeper &RK, raw_ostream &OS);
-void EmitCodeBeads(RecordKeeper &RK, raw_ostream &OS);
void EmitDAGISel(RecordKeeper &RK, raw_ostream &OS);
void EmitDFAPacketizer(RecordKeeper &RK, raw_ostream &OS);
void EmitDisassembler(RecordKeeper &RK, raw_ostream &OS);
@@ -88,11 +87,13 @@ void EmitGlobalISel(RecordKeeper &RK, raw_ostream &OS);
void EmitGICombiner(RecordKeeper &RK, raw_ostream &OS);
void EmitX86EVEX2VEXTables(RecordKeeper &RK, raw_ostream &OS);
void EmitX86FoldTables(RecordKeeper &RK, raw_ostream &OS);
+void EmitX86MnemonicTables(RecordKeeper &RK, raw_ostream &OS);
void EmitRegisterBank(RecordKeeper &RK, raw_ostream &OS);
void EmitExegesis(RecordKeeper &RK, raw_ostream &OS);
void EmitAutomata(RecordKeeper &RK, raw_ostream &OS);
void EmitDirectivesDecl(RecordKeeper &RK, raw_ostream &OS);
void EmitDirectivesImpl(RecordKeeper &RK, raw_ostream &OS);
+void EmitDXILOperation(RecordKeeper &RK, raw_ostream &OS);
} // End llvm namespace
diff --git a/llvm/utils/TableGen/VarLenCodeEmitterGen.cpp b/llvm/utils/TableGen/VarLenCodeEmitterGen.cpp
new file mode 100644
index 000000000000..a6bbe2f7ff37
--- /dev/null
+++ b/llvm/utils/TableGen/VarLenCodeEmitterGen.cpp
@@ -0,0 +1,487 @@
+//===- VarLenCodeEmitterGen.cpp - CEG for variable-length insts -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The CodeEmitterGen component for variable-length instructions.
+//
+// The basic CodeEmitterGen is almost exclusively designed for fixed-
+// length instructions. A good analogy for its encoding scheme is how printf
+// works: The (immutable) formatting string represent the fixed values in the
+// encoded instruction. Placeholders (i.e. %something), on the other hand,
+// represent encoding for instruction operands.
+// ```
+// printf("1101 %src 1001 %dst", <encoded value for operand `src`>,
+// <encoded value for operand `dst`>);
+// ```
+// VarLenCodeEmitterGen in this file provides an alternative encoding scheme
+// that works more like a C++ stream operator:
+// ```
+// OS << 0b1101;
+// if (Cond)
+// OS << OperandEncoding0;
+// OS << 0b1001 << OperandEncoding1;
+// ```
+// You are free to concatenate arbitrary types (and sizes) of encoding
+// fragments on any bit position, bringing more flexibilities on defining
+// encoding for variable-length instructions.
+//
+// In a more specific way, instruction encoding is represented by a DAG type
+// `Inst` field. Here is an example:
+// ```
+// dag Inst = (descend 0b1101, (operand "$src", 4), 0b1001,
+// (operand "$dst", 4));
+// ```
+// It represents the following instruction encoding:
+// ```
+// MSB LSB
+// 1101<encoding for operand src>1001<encoding for operand dst>
+// ```
+// For more details about DAG operators in the above snippet, please
+// refer to \file include/llvm/Target/Target.td.
+//
+// VarLenCodeEmitter will convert the above DAG into the same helper function
+// generated by CodeEmitter, `MCCodeEmitter::getBinaryCodeForInstr` (except
+// for few details).
+//
+//===----------------------------------------------------------------------===//
+
+#include "VarLenCodeEmitterGen.h"
+#include "CodeGenHwModes.h"
+#include "CodeGenInstruction.h"
+#include "CodeGenTarget.h"
+#include "InfoByHwMode.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/Error.h"
+
+using namespace llvm;
+
+namespace {
+
+class VarLenCodeEmitterGen {
+ RecordKeeper &Records;
+
+ DenseMap<Record *, VarLenInst> VarLenInsts;
+
+ // Emit based values (i.e. fixed bits in the encoded instructions)
+ void emitInstructionBaseValues(
+ raw_ostream &OS,
+ ArrayRef<const CodeGenInstruction *> NumberedInstructions,
+ CodeGenTarget &Target, int HwMode = -1);
+
+ std::string getInstructionCase(Record *R, CodeGenTarget &Target);
+ std::string getInstructionCaseForEncoding(Record *R, Record *EncodingDef,
+ CodeGenTarget &Target);
+
+public:
+ explicit VarLenCodeEmitterGen(RecordKeeper &R) : Records(R) {}
+
+ void run(raw_ostream &OS);
+};
+
+} // end anonymous namespace
+
+VarLenInst::VarLenInst(const DagInit *DI, const RecordVal *TheDef)
+ : TheDef(TheDef), NumBits(0U) {
+ buildRec(DI);
+ for (const auto &S : Segments)
+ NumBits += S.BitWidth;
+}
+
+void VarLenInst::buildRec(const DagInit *DI) {
+ assert(TheDef && "The def record is nullptr ?");
+
+ std::string Op = DI->getOperator()->getAsString();
+
+ if (Op == "ascend" || Op == "descend") {
+ bool Reverse = Op == "descend";
+ int i = Reverse ? DI->getNumArgs() - 1 : 0;
+ int e = Reverse ? -1 : DI->getNumArgs();
+ int s = Reverse ? -1 : 1;
+ for (; i != e; i += s) {
+ const Init *Arg = DI->getArg(i);
+ if (const auto *BI = dyn_cast<BitsInit>(Arg)) {
+ if (!BI->isComplete())
+ PrintFatalError(TheDef->getLoc(),
+ "Expecting complete bits init in `" + Op + "`");
+ Segments.push_back({BI->getNumBits(), BI});
+ } else if (const auto *BI = dyn_cast<BitInit>(Arg)) {
+ if (!BI->isConcrete())
+ PrintFatalError(TheDef->getLoc(),
+ "Expecting concrete bit init in `" + Op + "`");
+ Segments.push_back({1, BI});
+ } else if (const auto *SubDI = dyn_cast<DagInit>(Arg)) {
+ buildRec(SubDI);
+ } else {
+ PrintFatalError(TheDef->getLoc(), "Unrecognized type of argument in `" +
+ Op + "`: " + Arg->getAsString());
+ }
+ }
+ } else if (Op == "operand") {
+ // (operand <operand name>, <# of bits>, [(encoder <custom encoder>)])
+ if (DI->getNumArgs() < 2)
+ PrintFatalError(TheDef->getLoc(),
+ "Expecting at least 2 arguments for `operand`");
+ HasDynamicSegment = true;
+ const Init *OperandName = DI->getArg(0), *NumBits = DI->getArg(1);
+ if (!isa<StringInit>(OperandName) || !isa<IntInit>(NumBits))
+ PrintFatalError(TheDef->getLoc(), "Invalid argument types for `operand`");
+
+ auto NumBitsVal = cast<IntInit>(NumBits)->getValue();
+ if (NumBitsVal <= 0)
+ PrintFatalError(TheDef->getLoc(), "Invalid number of bits for `operand`");
+
+ StringRef CustomEncoder;
+ if (DI->getNumArgs() >= 3)
+ CustomEncoder = getCustomEncoderName(DI->getArg(2));
+ Segments.push_back(
+ {static_cast<unsigned>(NumBitsVal), OperandName, CustomEncoder});
+ } else if (Op == "slice") {
+ // (slice <operand name>, <high / low bit>, <low / high bit>,
+ // [(encoder <custom encoder>)])
+ if (DI->getNumArgs() < 3)
+ PrintFatalError(TheDef->getLoc(),
+ "Expecting at least 3 arguments for `slice`");
+ HasDynamicSegment = true;
+ Init *OperandName = DI->getArg(0), *HiBit = DI->getArg(1),
+ *LoBit = DI->getArg(2);
+ if (!isa<StringInit>(OperandName) || !isa<IntInit>(HiBit) ||
+ !isa<IntInit>(LoBit))
+ PrintFatalError(TheDef->getLoc(), "Invalid argument types for `slice`");
+
+ auto HiBitVal = cast<IntInit>(HiBit)->getValue(),
+ LoBitVal = cast<IntInit>(LoBit)->getValue();
+ if (HiBitVal < 0 || LoBitVal < 0)
+ PrintFatalError(TheDef->getLoc(), "Invalid bit range for `slice`");
+ bool NeedSwap = false;
+ unsigned NumBits = 0U;
+ if (HiBitVal < LoBitVal) {
+ NeedSwap = true;
+ NumBits = static_cast<unsigned>(LoBitVal - HiBitVal + 1);
+ } else {
+ NumBits = static_cast<unsigned>(HiBitVal - LoBitVal + 1);
+ }
+
+ StringRef CustomEncoder;
+ if (DI->getNumArgs() >= 4)
+ CustomEncoder = getCustomEncoderName(DI->getArg(3));
+
+ if (NeedSwap) {
+ // Normalization: Hi bit should always be the second argument.
+ Init *const NewArgs[] = {OperandName, LoBit, HiBit};
+ Segments.push_back({NumBits,
+ DagInit::get(DI->getOperator(), nullptr, NewArgs, {}),
+ CustomEncoder});
+ } else {
+ Segments.push_back({NumBits, DI, CustomEncoder});
+ }
+ }
+}
+
+void VarLenCodeEmitterGen::run(raw_ostream &OS) {
+ CodeGenTarget Target(Records);
+ auto Insts = Records.getAllDerivedDefinitions("Instruction");
+
+ auto NumberedInstructions = Target.getInstructionsByEnumValue();
+ const CodeGenHwModes &HWM = Target.getHwModes();
+
+ // The set of HwModes used by instruction encodings.
+ std::set<unsigned> HwModes;
+ for (const CodeGenInstruction *CGI : NumberedInstructions) {
+ Record *R = CGI->TheDef;
+
+ // Create the corresponding VarLenInst instance.
+ if (R->getValueAsString("Namespace") == "TargetOpcode" ||
+ R->getValueAsBit("isPseudo"))
+ continue;
+
+ if (const RecordVal *RV = R->getValue("EncodingInfos")) {
+ if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
+ EncodingInfoByHwMode EBM(DI->getDef(), HWM);
+ for (auto &KV : EBM) {
+ HwModes.insert(KV.first);
+ Record *EncodingDef = KV.second;
+ RecordVal *RV = EncodingDef->getValue("Inst");
+ DagInit *DI = cast<DagInit>(RV->getValue());
+ VarLenInsts.insert({EncodingDef, VarLenInst(DI, RV)});
+ }
+ continue;
+ }
+ }
+ RecordVal *RV = R->getValue("Inst");
+ DagInit *DI = cast<DagInit>(RV->getValue());
+ VarLenInsts.insert({R, VarLenInst(DI, RV)});
+ }
+
+ // Emit function declaration
+ OS << "void " << Target.getName()
+ << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
+ << " SmallVectorImpl<MCFixup> &Fixups,\n"
+ << " APInt &Inst,\n"
+ << " APInt &Scratch,\n"
+ << " const MCSubtargetInfo &STI) const {\n";
+
+ // Emit instruction base values
+ if (HwModes.empty()) {
+ emitInstructionBaseValues(OS, NumberedInstructions, Target);
+ } else {
+ for (unsigned HwMode : HwModes)
+ emitInstructionBaseValues(OS, NumberedInstructions, Target, (int)HwMode);
+ }
+
+ if (!HwModes.empty()) {
+ OS << " const unsigned **Index;\n";
+ OS << " const uint64_t *InstBits;\n";
+ OS << " unsigned HwMode = STI.getHwMode();\n";
+ OS << " switch (HwMode) {\n";
+ OS << " default: llvm_unreachable(\"Unknown hardware mode!\"); break;\n";
+ for (unsigned I : HwModes) {
+ OS << " case " << I << ": InstBits = InstBits_" << HWM.getMode(I).Name
+ << "; Index = Index_" << HWM.getMode(I).Name << "; break;\n";
+ }
+ OS << " };\n";
+ }
+
+ // Emit helper function to retrieve base values.
+ OS << " auto getInstBits = [&](unsigned Opcode) -> APInt {\n"
+ << " unsigned NumBits = Index[Opcode][0];\n"
+ << " if (!NumBits)\n"
+ << " return APInt::getZeroWidth();\n"
+ << " unsigned Idx = Index[Opcode][1];\n"
+ << " ArrayRef<uint64_t> Data(&InstBits[Idx], "
+ << "APInt::getNumWords(NumBits));\n"
+ << " return APInt(NumBits, Data);\n"
+ << " };\n";
+
+ // Map to accumulate all the cases.
+ std::map<std::string, std::vector<std::string>> CaseMap;
+
+ // Construct all cases statement for each opcode
+ for (Record *R : Insts) {
+ if (R->getValueAsString("Namespace") == "TargetOpcode" ||
+ R->getValueAsBit("isPseudo"))
+ continue;
+ std::string InstName =
+ (R->getValueAsString("Namespace") + "::" + R->getName()).str();
+ std::string Case = getInstructionCase(R, Target);
+
+ CaseMap[Case].push_back(std::move(InstName));
+ }
+
+ // Emit initial function code
+ OS << " const unsigned opcode = MI.getOpcode();\n"
+ << " switch (opcode) {\n";
+
+ // Emit each case statement
+ for (const auto &C : CaseMap) {
+ const std::string &Case = C.first;
+ const auto &InstList = C.second;
+
+ ListSeparator LS("\n");
+ for (const auto &InstName : InstList)
+ OS << LS << " case " << InstName << ":";
+
+ OS << " {\n";
+ OS << Case;
+ OS << " break;\n"
+ << " }\n";
+ }
+ // Default case: unhandled opcode
+ OS << " default:\n"
+ << " std::string msg;\n"
+ << " raw_string_ostream Msg(msg);\n"
+ << " Msg << \"Not supported instr: \" << MI;\n"
+ << " report_fatal_error(Msg.str().c_str());\n"
+ << " }\n";
+ OS << "}\n\n";
+}
+
+static void emitInstBits(raw_ostream &IS, raw_ostream &SS, const APInt &Bits,
+ unsigned &Index) {
+ if (!Bits.getNumWords()) {
+ IS.indent(4) << "{/*NumBits*/0, /*Index*/0},";
+ return;
+ }
+
+ IS.indent(4) << "{/*NumBits*/" << Bits.getBitWidth() << ", "
+ << "/*Index*/" << Index << "},";
+
+ SS.indent(4);
+ for (unsigned I = 0; I < Bits.getNumWords(); ++I, ++Index)
+ SS << "UINT64_C(" << utostr(Bits.getRawData()[I]) << "),";
+}
+
+void VarLenCodeEmitterGen::emitInstructionBaseValues(
+ raw_ostream &OS, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
+ CodeGenTarget &Target, int HwMode) {
+ std::string IndexArray, StorageArray;
+ raw_string_ostream IS(IndexArray), SS(StorageArray);
+
+ const CodeGenHwModes &HWM = Target.getHwModes();
+ if (HwMode == -1) {
+ IS << " static const unsigned Index[][2] = {\n";
+ SS << " static const uint64_t InstBits[] = {\n";
+ } else {
+ StringRef Name = HWM.getMode(HwMode).Name;
+ IS << " static const unsigned Index_" << Name << "[][2] = {\n";
+ SS << " static const uint64_t InstBits_" << Name << "[] = {\n";
+ }
+
+ unsigned NumFixedValueWords = 0U;
+ for (const CodeGenInstruction *CGI : NumberedInstructions) {
+ Record *R = CGI->TheDef;
+
+ if (R->getValueAsString("Namespace") == "TargetOpcode" ||
+ R->getValueAsBit("isPseudo")) {
+ IS.indent(4) << "{/*NumBits*/0, /*Index*/0},\n";
+ continue;
+ }
+
+ Record *EncodingDef = R;
+ if (const RecordVal *RV = R->getValue("EncodingInfos")) {
+ if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
+ EncodingInfoByHwMode EBM(DI->getDef(), HWM);
+ if (EBM.hasMode(HwMode))
+ EncodingDef = EBM.get(HwMode);
+ }
+ }
+
+ auto It = VarLenInsts.find(EncodingDef);
+ if (It == VarLenInsts.end())
+ PrintFatalError(EncodingDef, "VarLenInst not found for this record");
+ const VarLenInst &VLI = It->second;
+
+ unsigned i = 0U, BitWidth = VLI.size();
+
+ // Start by filling in fixed values.
+ APInt Value(BitWidth, 0);
+ auto SI = VLI.begin(), SE = VLI.end();
+ // Scan through all the segments that have fixed-bits values.
+ while (i < BitWidth && SI != SE) {
+ unsigned SegmentNumBits = SI->BitWidth;
+ if (const auto *BI = dyn_cast<BitsInit>(SI->Value)) {
+ for (unsigned Idx = 0U; Idx != SegmentNumBits; ++Idx) {
+ auto *B = cast<BitInit>(BI->getBit(Idx));
+ Value.setBitVal(i + Idx, B->getValue());
+ }
+ }
+ if (const auto *BI = dyn_cast<BitInit>(SI->Value))
+ Value.setBitVal(i, BI->getValue());
+
+ i += SegmentNumBits;
+ ++SI;
+ }
+
+ emitInstBits(IS, SS, Value, NumFixedValueWords);
+ IS << '\t' << "// " << R->getName() << "\n";
+ if (Value.getNumWords())
+ SS << '\t' << "// " << R->getName() << "\n";
+ }
+ IS.indent(4) << "{/*NumBits*/0, /*Index*/0}\n };\n";
+ SS.indent(4) << "UINT64_C(0)\n };\n";
+
+ OS << IS.str() << SS.str();
+}
+
+std::string VarLenCodeEmitterGen::getInstructionCase(Record *R,
+ CodeGenTarget &Target) {
+ std::string Case;
+ if (const RecordVal *RV = R->getValue("EncodingInfos")) {
+ if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
+ const CodeGenHwModes &HWM = Target.getHwModes();
+ EncodingInfoByHwMode EBM(DI->getDef(), HWM);
+ Case += " switch (HwMode) {\n";
+ Case += " default: llvm_unreachable(\"Unhandled HwMode\");\n";
+ for (auto &KV : EBM) {
+ Case += " case " + itostr(KV.first) + ": {\n";
+ Case += getInstructionCaseForEncoding(R, KV.second, Target);
+ Case += " break;\n";
+ Case += " }\n";
+ }
+ Case += " }\n";
+ return Case;
+ }
+ }
+ return getInstructionCaseForEncoding(R, R, Target);
+}
+
+std::string VarLenCodeEmitterGen::getInstructionCaseForEncoding(
+ Record *R, Record *EncodingDef, CodeGenTarget &Target) {
+ auto It = VarLenInsts.find(EncodingDef);
+ if (It == VarLenInsts.end())
+ PrintFatalError(EncodingDef, "Parsed encoding record not found");
+ const VarLenInst &VLI = It->second;
+ size_t BitWidth = VLI.size();
+
+ CodeGenInstruction &CGI = Target.getInstruction(R);
+
+ std::string Case;
+ raw_string_ostream SS(Case);
+ // Resize the scratch buffer.
+ if (BitWidth && !VLI.isFixedValueOnly())
+ SS.indent(6) << "Scratch = Scratch.zext(" << BitWidth << ");\n";
+ // Populate based value.
+ SS.indent(6) << "Inst = getInstBits(opcode);\n";
+
+ // Process each segment in VLI.
+ size_t Offset = 0U;
+ for (const auto &ES : VLI) {
+ unsigned NumBits = ES.BitWidth;
+ const Init *Val = ES.Value;
+ // If it's a StringInit or DagInit, it's a reference to an operand
+ // or part of an operand.
+ if (isa<StringInit>(Val) || isa<DagInit>(Val)) {
+ StringRef OperandName;
+ unsigned LoBit = 0U;
+ if (const auto *SV = dyn_cast<StringInit>(Val)) {
+ OperandName = SV->getValue();
+ } else {
+ // Normalized: (slice <operand name>, <high bit>, <low bit>)
+ const auto *DV = cast<DagInit>(Val);
+ OperandName = cast<StringInit>(DV->getArg(0))->getValue();
+ LoBit = static_cast<unsigned>(cast<IntInit>(DV->getArg(2))->getValue());
+ }
+
+ auto OpIdx = CGI.Operands.ParseOperandName(OperandName);
+ unsigned FlatOpIdx = CGI.Operands.getFlattenedOperandNumber(OpIdx);
+ StringRef CustomEncoder = CGI.Operands[OpIdx.first].EncoderMethodName;
+ if (ES.CustomEncoder.size())
+ CustomEncoder = ES.CustomEncoder;
+
+ SS.indent(6) << "Scratch.clearAllBits();\n";
+ SS.indent(6) << "// op: " << OperandName.drop_front(1) << "\n";
+ if (CustomEncoder.empty())
+ SS.indent(6) << "getMachineOpValue(MI, MI.getOperand("
+ << utostr(FlatOpIdx) << ")";
+ else
+ SS.indent(6) << CustomEncoder << "(MI, /*OpIdx=*/" << utostr(FlatOpIdx);
+
+ SS << ", /*Pos=*/" << utostr(Offset) << ", Scratch, Fixups, STI);\n";
+
+ SS.indent(6) << "Inst.insertBits("
+ << "Scratch.extractBits(" << utostr(NumBits) << ", "
+ << utostr(LoBit) << ")"
+ << ", " << Offset << ");\n";
+ }
+ Offset += NumBits;
+ }
+
+ StringRef PostEmitter = R->getValueAsString("PostEncoderMethod");
+ if (!PostEmitter.empty())
+ SS.indent(6) << "Inst = " << PostEmitter << "(MI, Inst, STI);\n";
+
+ return Case;
+}
+
+namespace llvm {
+
+void emitVarLenCodeEmitter(RecordKeeper &R, raw_ostream &OS) {
+ VarLenCodeEmitterGen(R).run(OS);
+}
+
+} // end namespace llvm
diff --git a/llvm/utils/TableGen/VarLenCodeEmitterGen.h b/llvm/utils/TableGen/VarLenCodeEmitterGen.h
new file mode 100644
index 000000000000..5bdedee1dd51
--- /dev/null
+++ b/llvm/utils/TableGen/VarLenCodeEmitterGen.h
@@ -0,0 +1,66 @@
+//===- VarLenCodeEmitterGen.h - CEG for variable-length insts ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declare the CodeEmitterGen component for variable-length
+// instructions. See the .cpp file for more details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_UTILS_TABLEGEN_VARLENCODEEMITTERGEN_H
+#define LLVM_UTILS_TABLEGEN_VARLENCODEEMITTERGEN_H
+
+#include "llvm/TableGen/Record.h"
+
+namespace llvm {
+
+struct EncodingSegment {
+ unsigned BitWidth;
+ const Init *Value;
+ StringRef CustomEncoder = "";
+};
+
+class VarLenInst {
+ const RecordVal *TheDef;
+ size_t NumBits;
+
+ // Set if any of the segment is not fixed value.
+ bool HasDynamicSegment;
+
+ SmallVector<EncodingSegment, 4> Segments;
+
+ void buildRec(const DagInit *DI);
+
+ StringRef getCustomEncoderName(const Init *EI) const {
+ if (const auto *DI = dyn_cast<DagInit>(EI)) {
+ if (DI->getNumArgs() && isa<StringInit>(DI->getArg(0)))
+ return cast<StringInit>(DI->getArg(0))->getValue();
+ }
+ return "";
+ }
+
+public:
+ VarLenInst() : TheDef(nullptr), NumBits(0U), HasDynamicSegment(false) {}
+
+ explicit VarLenInst(const DagInit *DI, const RecordVal *TheDef);
+
+ /// Number of bits
+ size_t size() const { return NumBits; }
+
+ using const_iterator = decltype(Segments)::const_iterator;
+
+ const_iterator begin() const { return Segments.begin(); }
+ const_iterator end() const { return Segments.end(); }
+ size_t getNumSegments() const { return Segments.size(); }
+
+ bool isFixedValueOnly() const { return !HasDynamicSegment; }
+};
+
+void emitVarLenCodeEmitter(RecordKeeper &R, raw_ostream &OS);
+
+} // end namespace llvm
+#endif
diff --git a/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp b/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp
index 74969053f095..dc037e4409ab 100644
--- a/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp
+++ b/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp
@@ -37,8 +37,9 @@ void emitWebAssemblyDisassemblerTables(
if (!Def.getValue("Inst"))
continue;
auto &Inst = *Def.getValueAsBitsInit("Inst");
- auto Opc = static_cast<unsigned>(
- reinterpret_cast<IntInit *>(Inst.convertInitializerTo(IntRecTy::get()))
+ RecordKeeper &RK = Inst.getRecordKeeper();
+ unsigned Opc = static_cast<unsigned>(
+ cast<IntInit>(Inst.convertInitializerTo(IntRecTy::get(RK)))
->getValue());
if (Opc == 0xFFFFFFFF)
continue; // No opcode defined.
@@ -54,11 +55,7 @@ void emitWebAssemblyDisassemblerTables(
auto &CGIP = OpcodeTable[Prefix][Opc];
// All wasm instructions have a StackBased field of type string, we only
// want the instructions for which this is "true".
- auto StackString =
- Def.getValue("StackBased")->getValue()->getCastTo(StringRecTy::get());
- auto IsStackBased =
- StackString &&
- reinterpret_cast<const StringInit *>(StackString)->getValue() == "true";
+ bool IsStackBased = Def.getValueAsBit("StackBased");
if (!IsStackBased)
continue;
if (CGIP.second) {
@@ -66,14 +63,11 @@ void emitWebAssemblyDisassemblerTables(
// should be the canonical one. This determines which variant gets
// printed in a disassembly. We want e.g. "call" not "i32.call", and
// "end" when we don't know if its "end_loop" or "end_block" etc.
- auto IsCanonicalExisting = CGIP.second->TheDef->getValue("IsCanonical")
- ->getValue()
- ->getAsString() == "1";
+ bool IsCanonicalExisting = CGIP.second->TheDef->getValueAsBit("IsCanonical");
// We already have one marked explicitly as canonical, so keep it.
if (IsCanonicalExisting)
continue;
- auto IsCanonicalNew =
- Def.getValue("IsCanonical")->getValue()->getAsString() == "1";
+ bool IsCanonicalNew = Def.getValueAsBit("IsCanonical");
// If the new one is explicitly marked as canonical, take it.
if (!IsCanonicalNew) {
// Neither the existing or new instruction is canonical.
diff --git a/llvm/utils/TableGen/X86DisassemblerTables.cpp b/llvm/utils/TableGen/X86DisassemblerTables.cpp
index 81ddea99740d..2fa8fce81422 100644
--- a/llvm/utils/TableGen/X86DisassemblerTables.cpp
+++ b/llvm/utils/TableGen/X86DisassemblerTables.cpp
@@ -105,8 +105,7 @@ static inline bool inheritsFrom(InstructionContext child,
case IC_64BIT_ADSIZE:
return (noPrefix && inheritsFrom(child, IC_64BIT_OPSIZE_ADSIZE, noPrefix));
case IC_64BIT_OPSIZE_ADSIZE:
- return (noPrefix &&
- inheritsFrom(child, IC_64BIT_VEX_OPSIZE_ADSIZE, noPrefix));
+ return false;
case IC_XD:
return inheritsFrom(child, IC_64BIT_XD);
case IC_XS:
@@ -127,11 +126,10 @@ static inline bool inheritsFrom(InstructionContext child,
case IC_64BIT_OPSIZE:
return inheritsFrom(child, IC_64BIT_REXW_OPSIZE) ||
(!AdSize64 && inheritsFrom(child, IC_64BIT_OPSIZE_ADSIZE)) ||
- (!AdSize64 && inheritsFrom(child, IC_64BIT_REXW_ADSIZE)) ||
- (!AdSize64 && inheritsFrom(child, IC_64BIT_VEX_OPSIZE_ADSIZE));
+ (!AdSize64 && inheritsFrom(child, IC_64BIT_REXW_ADSIZE));
case IC_64BIT_XD:
- return (inheritsFrom(child, IC_64BIT_REXW_XD) ||
- (!AdSize64 && inheritsFrom(child, IC_64BIT_XD_ADSIZE)));
+ return(inheritsFrom(child, IC_64BIT_REXW_XD) ||
+ (!AdSize64 && inheritsFrom(child, IC_64BIT_XD_ADSIZE)));
case IC_64BIT_XS:
return(inheritsFrom(child, IC_64BIT_REXW_XS) ||
(!AdSize64 && inheritsFrom(child, IC_64BIT_XS_ADSIZE)));
@@ -161,12 +159,7 @@ static inline bool inheritsFrom(InstructionContext child,
case IC_VEX_OPSIZE:
return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_VEX_L_W_OPSIZE)) ||
(VEX_WIG && inheritsFrom(child, IC_VEX_W_OPSIZE)) ||
- (VEX_LIG && inheritsFrom(child, IC_VEX_L_OPSIZE)) ||
- inheritsFrom(child, IC_64BIT_VEX_OPSIZE);
- case IC_64BIT_VEX_OPSIZE:
- return inheritsFrom(child, IC_64BIT_VEX_OPSIZE_ADSIZE);
- case IC_64BIT_VEX_OPSIZE_ADSIZE:
- return false;
+ (VEX_LIG && inheritsFrom(child, IC_VEX_L_OPSIZE));
case IC_VEX_W:
return VEX_LIG && inheritsFrom(child, IC_VEX_L_W);
case IC_VEX_W_XS:
@@ -673,7 +666,6 @@ void DisassemblerTables::emitModRMDecision(raw_ostream &o1, raw_ostream &o2,
unsigned &i1, unsigned &i2,
unsigned &ModRMTableNum,
ModRMDecision &decision) const {
- static uint32_t sTableNumber = 0;
static uint32_t sEntryNumber = 1;
ModRMDecisionType dt = getDecisionType(decision);
@@ -753,8 +745,6 @@ void DisassemblerTables::emitModRMDecision(raw_ostream &o1, raw_ostream &o2,
assert(sEntryNumber < 65536U &&
"Index into ModRMDecision is too large for uint16_t!");
(void)sEntryNumber;
-
- ++sTableNumber;
}
void DisassemblerTables::emitOpcodeDecision(raw_ostream &o1, raw_ostream &o2,
@@ -891,9 +881,6 @@ void DisassemblerTables::emitContextTable(raw_ostream &o, unsigned &i) const {
if ((index & ATTR_EVEX) || (index & ATTR_VEX) || (index & ATTR_VEXL)) {
if (index & ATTR_EVEX)
o << "IC_EVEX";
- else if ((index & (ATTR_64BIT | ATTR_VEXL | ATTR_REXW | ATTR_OPSIZE)) ==
- (ATTR_64BIT | ATTR_OPSIZE))
- o << "IC_64BIT_VEX";
else
o << "IC_VEX";
@@ -905,13 +892,9 @@ void DisassemblerTables::emitContextTable(raw_ostream &o, unsigned &i) const {
if (index & ATTR_REXW)
o << "_W";
- if (index & ATTR_OPSIZE) {
+ if (index & ATTR_OPSIZE)
o << "_OPSIZE";
- if ((index & (ATTR_64BIT | ATTR_EVEX | ATTR_VEX | ATTR_VEXL |
- ATTR_REXW | ATTR_ADSIZE)) ==
- (ATTR_64BIT | ATTR_VEX | ATTR_ADSIZE))
- o << "_ADSIZE";
- } else if (index & ATTR_XD)
+ else if (index & ATTR_XD)
o << "_XD";
else if (index & ATTR_XS)
o << "_XS";
@@ -925,7 +908,8 @@ void DisassemblerTables::emitContextTable(raw_ostream &o, unsigned &i) const {
if (index & ATTR_EVEXB)
o << "_B";
}
- } else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XS))
+ }
+ else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XS))
o << "IC_64BIT_REXW_XS";
else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XD))
o << "IC_64BIT_REXW_XD";
diff --git a/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp b/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp
index 36c71843d70e..1384330ee8a1 100644
--- a/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp
+++ b/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp
@@ -11,11 +11,14 @@
///
//===----------------------------------------------------------------------===//
+#include "CodeGenInstruction.h"
#include "CodeGenTarget.h"
+#include "X86RecognizableInstr.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/TableGenBackend.h"
using namespace llvm;
+using namespace X86Disassembler;
namespace {
@@ -108,28 +111,25 @@ public:
IsMatch(const CodeGenInstruction *EVEXInst) : EVEXInst(EVEXInst) {}
bool operator()(const CodeGenInstruction *VEXInst) {
- Record *RecE = EVEXInst->TheDef;
- Record *RecV = VEXInst->TheDef;
- bool EVEX_W = RecE->getValueAsBit("HasVEX_W");
- bool VEX_W = RecV->getValueAsBit("HasVEX_W");
- bool VEX_WIG = RecV->getValueAsBit("IgnoresVEX_W");
- bool EVEX_WIG = RecE->getValueAsBit("IgnoresVEX_W");
- bool EVEX_W1_VEX_W0 = RecE->getValueAsBit("EVEX_W1_VEX_W0");
-
- if (RecV->getValueAsDef("OpEnc")->getName().str() != "EncVEX" ||
- RecV->getValueAsBit("isCodeGenOnly") != RecE->getValueAsBit("isCodeGenOnly") ||
+ RecognizableInstrBase VEXRI(*VEXInst);
+ RecognizableInstrBase EVEXRI(*EVEXInst);
+ bool VEX_W = VEXRI.HasVEX_W;
+ bool EVEX_W = EVEXRI.HasVEX_W;
+ bool VEX_WIG = VEXRI.IgnoresVEX_W;
+ bool EVEX_WIG = EVEXRI.IgnoresVEX_W;
+ bool EVEX_W1_VEX_W0 = EVEXInst->TheDef->getValueAsBit("EVEX_W1_VEX_W0");
+
+ if (VEXRI.IsCodeGenOnly != EVEXRI.IsCodeGenOnly ||
// VEX/EVEX fields
- RecV->getValueAsDef("OpPrefix") != RecE->getValueAsDef("OpPrefix") ||
- RecV->getValueAsDef("OpMap") != RecE->getValueAsDef("OpMap") ||
- RecV->getValueAsBit("hasVEX_4V") != RecE->getValueAsBit("hasVEX_4V") ||
- RecV->getValueAsBit("hasEVEX_L2") != RecE->getValueAsBit("hasEVEX_L2") ||
- RecV->getValueAsBit("hasVEX_L") != RecE->getValueAsBit("hasVEX_L") ||
+ VEXRI.OpPrefix != EVEXRI.OpPrefix || VEXRI.OpMap != EVEXRI.OpMap ||
+ VEXRI.HasVEX_4V != EVEXRI.HasVEX_4V ||
+ VEXRI.HasVEX_L != EVEXRI.HasVEX_L ||
// Match is allowed if either is VEX_WIG, or they match, or EVEX
// is VEX_W1X and VEX is VEX_W0.
(!(VEX_WIG || (!EVEX_WIG && EVEX_W == VEX_W) ||
(EVEX_W1_VEX_W0 && EVEX_W && !VEX_W))) ||
// Instruction's format
- RecV->getValueAsDef("Form") != RecE->getValueAsDef("Form"))
+ VEXRI.Form != EVEXRI.Form)
return false;
// This is needed for instructions with intrinsic version (_Int).
@@ -160,31 +160,6 @@ public:
return true;
}
-
-private:
- static inline bool isRegisterOperand(const Record *Rec) {
- return Rec->isSubClassOf("RegisterClass") ||
- Rec->isSubClassOf("RegisterOperand");
- }
-
- static inline bool isMemoryOperand(const Record *Rec) {
- return Rec->isSubClassOf("Operand") &&
- Rec->getValueAsString("OperandType") == "OPERAND_MEMORY";
- }
-
- static inline bool isImmediateOperand(const Record *Rec) {
- return Rec->isSubClassOf("Operand") &&
- Rec->getValueAsString("OperandType") == "OPERAND_IMMEDIATE";
- }
-
- static inline unsigned int getRegOperandSize(const Record *RegRec) {
- if (RegRec->isSubClassOf("RegisterClass"))
- return RegRec->getValueAsInt("Alignment");
- if (RegRec->isSubClassOf("RegisterOperand"))
- return RegRec->getValueAsDef("RegClass")->getValueAsInt("Alignment");
-
- llvm_unreachable("Register operand's size not known!");
- }
};
void X86EVEX2VEXTablesEmitter::run(raw_ostream &OS) {
@@ -206,23 +181,19 @@ void X86EVEX2VEXTablesEmitter::run(raw_ostream &OS) {
Target.getInstructionsByEnumValue();
for (const CodeGenInstruction *Inst : NumberedInstructions) {
+ const Record *Def = Inst->TheDef;
// Filter non-X86 instructions.
- if (!Inst->TheDef->isSubClassOf("X86Inst"))
+ if (!Def->isSubClassOf("X86Inst"))
continue;
+ RecognizableInstrBase RI(*Inst);
// Add VEX encoded instructions to one of VEXInsts vectors according to
// it's opcode.
- if (Inst->TheDef->getValueAsDef("OpEnc")->getName() == "EncVEX") {
- uint64_t Opcode = getValueFromBitsInit(Inst->TheDef->
- getValueAsBitsInit("Opcode"));
- VEXInsts[Opcode].push_back(Inst);
- }
+ if (RI.Encoding == X86Local::VEX)
+ VEXInsts[RI.Opcode].push_back(Inst);
// Add relevant EVEX encoded instructions to EVEXInsts
- else if (Inst->TheDef->getValueAsDef("OpEnc")->getName() == "EncEVEX" &&
- !Inst->TheDef->getValueAsBit("hasEVEX_K") &&
- !Inst->TheDef->getValueAsBit("hasEVEX_B") &&
- !Inst->TheDef->getValueAsBit("hasEVEX_L2") &&
- !Inst->TheDef->getValueAsBit("notEVEX2VEXConvertible"))
+ else if (RI.Encoding == X86Local::EVEX && !RI.HasEVEX_K && !RI.HasEVEX_B &&
+ !RI.HasEVEX_L2 && !Def->getValueAsBit("notEVEX2VEXConvertible"))
EVEXInsts.push_back(Inst);
}
diff --git a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
index 2a29331eb7e8..5b3f11848de6 100644
--- a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
+++ b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
@@ -18,6 +18,7 @@
#include "llvm/TableGen/TableGenBackend.h"
using namespace llvm;
+using namespace X86Disassembler;
namespace {
@@ -51,27 +52,32 @@ const char *ExplicitUnalign[] = {"MOVDQU", "MOVUPS", "MOVUPD",
// For manually mapping instructions that do not match by their encoding.
const ManualMapEntry ManualMapSet[] = {
- { "ADD16ri_DB", "ADD16mi", NO_UNFOLD },
- { "ADD16ri8_DB", "ADD16mi8", NO_UNFOLD },
- { "ADD16rr_DB", "ADD16mr", NO_UNFOLD },
- { "ADD32ri_DB", "ADD32mi", NO_UNFOLD },
- { "ADD32ri8_DB", "ADD32mi8", NO_UNFOLD },
- { "ADD32rr_DB", "ADD32mr", NO_UNFOLD },
- { "ADD64ri32_DB", "ADD64mi32", NO_UNFOLD },
- { "ADD64ri8_DB", "ADD64mi8", NO_UNFOLD },
- { "ADD64rr_DB", "ADD64mr", NO_UNFOLD },
- { "ADD8ri_DB", "ADD8mi", NO_UNFOLD },
- { "ADD8rr_DB", "ADD8mr", NO_UNFOLD },
- { "ADD16rr_DB", "ADD16rm", NO_UNFOLD },
- { "ADD32rr_DB", "ADD32rm", NO_UNFOLD },
- { "ADD64rr_DB", "ADD64rm", NO_UNFOLD },
- { "ADD8rr_DB", "ADD8rm", NO_UNFOLD },
- { "PUSH16r", "PUSH16rmm", UNFOLD },
- { "PUSH32r", "PUSH32rmm", UNFOLD },
- { "PUSH64r", "PUSH64rmm", UNFOLD },
- { "TAILJMPr", "TAILJMPm", UNFOLD },
- { "TAILJMPr64", "TAILJMPm64", UNFOLD },
- { "TAILJMPr64_REX", "TAILJMPm64_REX", UNFOLD },
+ { "ADD16ri_DB", "ADD16mi", NO_UNFOLD },
+ { "ADD16ri8_DB", "ADD16mi8", NO_UNFOLD },
+ { "ADD16rr_DB", "ADD16mr", NO_UNFOLD },
+ { "ADD32ri_DB", "ADD32mi", NO_UNFOLD },
+ { "ADD32ri8_DB", "ADD32mi8", NO_UNFOLD },
+ { "ADD32rr_DB", "ADD32mr", NO_UNFOLD },
+ { "ADD64ri32_DB", "ADD64mi32", NO_UNFOLD },
+ { "ADD64ri8_DB", "ADD64mi8", NO_UNFOLD },
+ { "ADD64rr_DB", "ADD64mr", NO_UNFOLD },
+ { "ADD8ri_DB", "ADD8mi", NO_UNFOLD },
+ { "ADD8rr_DB", "ADD8mr", NO_UNFOLD },
+ { "ADD16rr_DB", "ADD16rm", NO_UNFOLD },
+ { "ADD32rr_DB", "ADD32rm", NO_UNFOLD },
+ { "ADD64rr_DB", "ADD64rm", NO_UNFOLD },
+ { "ADD8rr_DB", "ADD8rm", NO_UNFOLD },
+ { "MMX_MOVD64from64rr", "MMX_MOVQ64mr", UNFOLD },
+ { "MMX_MOVD64grr", "MMX_MOVD64mr", UNFOLD },
+ { "MOVLHPSrr", "MOVHPSrm", NO_UNFOLD },
+ { "PUSH16r", "PUSH16rmm", UNFOLD },
+ { "PUSH32r", "PUSH32rmm", UNFOLD },
+ { "PUSH64r", "PUSH64rmm", UNFOLD },
+ { "TAILJMPr", "TAILJMPm", UNFOLD },
+ { "TAILJMPr64", "TAILJMPm64", UNFOLD },
+ { "TAILJMPr64_REX", "TAILJMPm64_REX", UNFOLD },
+ { "VMOVLHPSZrr", "VMOVHPSZ128rm", NO_UNFOLD },
+ { "VMOVLHPSrr", "VMOVHPSrm", NO_UNFOLD },
};
@@ -114,16 +120,21 @@ class X86FoldTablesEmitter {
OS << "X86::" << MemInst->TheDef->getName() << ",";
OS.PadToColumn(75);
+ std::string Attrs;
if (IsLoad)
- OS << "TB_FOLDED_LOAD | ";
+ Attrs += "TB_FOLDED_LOAD | ";
if (IsStore)
- OS << "TB_FOLDED_STORE | ";
+ Attrs += "TB_FOLDED_STORE | ";
if (CannotUnfold)
- OS << "TB_NO_REVERSE | ";
+ Attrs += "TB_NO_REVERSE | ";
if (IsAligned)
- OS << "TB_ALIGN_" << Alignment << " | ";
+ Attrs += "TB_ALIGN_" + std::to_string(Alignment) + " | ";
- OS << "0 },\n";
+ StringRef SimplifiedAttrs = StringRef(Attrs).rtrim("| ");
+ if (SimplifiedAttrs.empty())
+ SimplifiedAttrs = "0";
+
+ OS << SimplifiedAttrs << " },\n";
}
bool operator<(const X86FoldTableEntry &RHS) const {
@@ -207,56 +218,6 @@ static inline uint64_t getValueFromBitsInit(const BitsInit *B) {
return Value;
}
-// Returns true if the two given BitsInits represent the same integer value
-static inline bool equalBitsInits(const BitsInit *B1, const BitsInit *B2) {
- if (B1->getNumBits() != B2->getNumBits())
- PrintFatalError("Comparing two BitsInits with different sizes!");
-
- for (unsigned i = 0, e = B1->getNumBits(); i != e; ++i) {
- BitInit *Bit1 = cast<BitInit>(B1->getBit(i));
- BitInit *Bit2 = cast<BitInit>(B2->getBit(i));
- if (Bit1->getValue() != Bit2->getValue())
- return false;
- }
- return true;
-}
-
-// Return the size of the register operand
-static inline unsigned int getRegOperandSize(const Record *RegRec) {
- if (RegRec->isSubClassOf("RegisterOperand"))
- RegRec = RegRec->getValueAsDef("RegClass");
- if (RegRec->isSubClassOf("RegisterClass"))
- return RegRec->getValueAsListOfDefs("RegTypes")[0]->getValueAsInt("Size");
-
- llvm_unreachable("Register operand's size not known!");
-}
-
-// Return the size of the memory operand
-static inline unsigned getMemOperandSize(const Record *MemRec) {
- if (MemRec->isSubClassOf("Operand")) {
- StringRef Name =
- MemRec->getValueAsDef("ParserMatchClass")->getValueAsString("Name");
- if (Name == "Mem8")
- return 8;
- if (Name == "Mem16")
- return 16;
- if (Name == "Mem32")
- return 32;
- if (Name == "Mem64")
- return 64;
- if (Name == "Mem80")
- return 80;
- if (Name == "Mem128")
- return 128;
- if (Name == "Mem256")
- return 256;
- if (Name == "Mem512")
- return 512;
- }
-
- llvm_unreachable("Memory operand's size not known!");
-}
-
// Return true if the instruction defined as a register flavor.
static inline bool hasRegisterFormat(const Record *Inst) {
const BitsInit *FormBits = Inst->getValueAsBitsInit("FormBits");
@@ -279,22 +240,6 @@ static inline bool isNOREXRegClass(const Record *Op) {
return Op->getName().contains("_NOREX");
}
-static inline bool isRegisterOperand(const Record *Rec) {
- return Rec->isSubClassOf("RegisterClass") ||
- Rec->isSubClassOf("RegisterOperand") ||
- Rec->isSubClassOf("PointerLikeRegClass");
-}
-
-static inline bool isMemoryOperand(const Record *Rec) {
- return Rec->isSubClassOf("Operand") &&
- Rec->getValueAsString("OperandType") == "OPERAND_MEMORY";
-}
-
-static inline bool isImmediateOperand(const Record *Rec) {
- return Rec->isSubClassOf("Operand") &&
- Rec->getValueAsString("OperandType") == "OPERAND_IMMEDIATE";
-}
-
// Get the alternative instruction pointed by "FoldGenRegForm" field.
static inline const CodeGenInstruction *
getAltRegInst(const CodeGenInstruction *I, const RecordKeeper &Records,
@@ -312,61 +257,59 @@ getAltRegInst(const CodeGenInstruction *I, const RecordKeeper &Records,
// matches the EVEX instruction of this object.
class IsMatch {
const CodeGenInstruction *MemInst;
+ unsigned Variant;
public:
- IsMatch(const CodeGenInstruction *Inst, const RecordKeeper &Records)
- : MemInst(Inst) {}
+ IsMatch(const CodeGenInstruction *Inst, unsigned V)
+ : MemInst(Inst), Variant(V) {}
bool operator()(const CodeGenInstruction *RegInst) {
- Record *MemRec = MemInst->TheDef;
- Record *RegRec = RegInst->TheDef;
+ X86Disassembler::RecognizableInstrBase RegRI(*RegInst);
+ X86Disassembler::RecognizableInstrBase MemRI(*MemInst);
+ const Record *RegRec = RegInst->TheDef;
+ const Record *MemRec = MemInst->TheDef;
+
+ // EVEX_B means different things for memory and register forms.
+ if (RegRI.HasEVEX_B != 0 || MemRI.HasEVEX_B != 0)
+ return false;
+
+ // Instruction's format - The register form's "Form" field should be
+ // the opposite of the memory form's "Form" field.
+ if (!areOppositeForms(RegRI.Form, MemRI.Form))
+ return false;
+
+ // X86 encoding is crazy, e.g
+ //
+ // f3 0f c7 30 vmxon (%rax)
+ // f3 0f c7 f0 senduipi %rax
+ //
+ // This two instruction have similiar encoding fields but are unrelated
+ if (X86Disassembler::getMnemonic(MemInst, Variant) !=
+ X86Disassembler::getMnemonic(RegInst, Variant))
+ return false;
// Return false if one (at least) of the encoding fields of both
// instructions do not match.
- if (RegRec->getValueAsDef("OpEnc") != MemRec->getValueAsDef("OpEnc") ||
- !equalBitsInits(RegRec->getValueAsBitsInit("Opcode"),
- MemRec->getValueAsBitsInit("Opcode")) ||
- // VEX/EVEX fields
- RegRec->getValueAsDef("OpPrefix") !=
- MemRec->getValueAsDef("OpPrefix") ||
- RegRec->getValueAsDef("OpMap") != MemRec->getValueAsDef("OpMap") ||
- RegRec->getValueAsDef("OpSize") != MemRec->getValueAsDef("OpSize") ||
- RegRec->getValueAsDef("AdSize") != MemRec->getValueAsDef("AdSize") ||
- RegRec->getValueAsBit("hasVEX_4V") !=
- MemRec->getValueAsBit("hasVEX_4V") ||
- RegRec->getValueAsBit("hasEVEX_K") !=
- MemRec->getValueAsBit("hasEVEX_K") ||
- RegRec->getValueAsBit("hasEVEX_Z") !=
- MemRec->getValueAsBit("hasEVEX_Z") ||
- // EVEX_B means different things for memory and register forms.
- RegRec->getValueAsBit("hasEVEX_B") != 0 ||
- MemRec->getValueAsBit("hasEVEX_B") != 0 ||
+ if (RegRI.Encoding != MemRI.Encoding || RegRI.Opcode != MemRI.Opcode ||
+ RegRI.OpPrefix != MemRI.OpPrefix || RegRI.OpMap != MemRI.OpMap ||
+ RegRI.OpSize != MemRI.OpSize || RegRI.AdSize != MemRI.AdSize ||
+ RegRI.HasREX_W != MemRI.HasREX_W ||
+ RegRI.HasVEX_4V != MemRI.HasVEX_4V ||
+ RegRI.HasVEX_L != MemRI.HasVEX_L ||
+ RegRI.HasVEX_W != MemRI.HasVEX_W ||
+ RegRI.IgnoresVEX_L != MemRI.IgnoresVEX_L ||
+ RegRI.IgnoresVEX_W != MemRI.IgnoresVEX_W ||
+ RegRI.HasEVEX_K != MemRI.HasEVEX_K ||
+ RegRI.HasEVEX_KZ != MemRI.HasEVEX_KZ ||
+ RegRI.HasEVEX_L2 != MemRI.HasEVEX_L2 ||
RegRec->getValueAsBit("hasEVEX_RC") !=
MemRec->getValueAsBit("hasEVEX_RC") ||
- RegRec->getValueAsBit("hasREX_WPrefix") !=
- MemRec->getValueAsBit("hasREX_WPrefix") ||
RegRec->getValueAsBit("hasLockPrefix") !=
MemRec->getValueAsBit("hasLockPrefix") ||
RegRec->getValueAsBit("hasNoTrackPrefix") !=
MemRec->getValueAsBit("hasNoTrackPrefix") ||
- RegRec->getValueAsBit("hasVEX_L") !=
- MemRec->getValueAsBit("hasVEX_L") ||
- RegRec->getValueAsBit("hasEVEX_L2") !=
- MemRec->getValueAsBit("hasEVEX_L2") ||
- RegRec->getValueAsBit("ignoresVEX_L") !=
- MemRec->getValueAsBit("ignoresVEX_L") ||
- RegRec->getValueAsBit("HasVEX_W") !=
- MemRec->getValueAsBit("HasVEX_W") ||
- RegRec->getValueAsBit("IgnoresVEX_W") !=
- MemRec->getValueAsBit("IgnoresVEX_W") ||
RegRec->getValueAsBit("EVEX_W1_VEX_W0") !=
- MemRec->getValueAsBit("EVEX_W1_VEX_W0") ||
- // Instruction's format - The register form's "Form" field should be
- // the opposite of the memory form's "Form" field.
- !areOppositeForms(RegRec->getValueAsBitsInit("FormBits"),
- MemRec->getValueAsBitsInit("FormBits")) ||
- RegRec->getValueAsBit("isAsmParserOnly") !=
- MemRec->getValueAsBit("isAsmParserOnly"))
+ MemRec->getValueAsBit("EVEX_W1_VEX_W0"))
return false;
// Make sure the sizes of the operands of both instructions suit each other.
@@ -419,31 +362,24 @@ public:
private:
// Return true of the 2 given forms are the opposite of each other.
- bool areOppositeForms(const BitsInit *RegFormBits,
- const BitsInit *MemFormBits) {
- uint64_t MemFormNum = getValueFromBitsInit(MemFormBits);
- uint64_t RegFormNum = getValueFromBitsInit(RegFormBits);
-
- if ((MemFormNum == X86Local::MRM0m && RegFormNum == X86Local::MRM0r) ||
- (MemFormNum == X86Local::MRM1m && RegFormNum == X86Local::MRM1r) ||
- (MemFormNum == X86Local::MRM2m && RegFormNum == X86Local::MRM2r) ||
- (MemFormNum == X86Local::MRM3m && RegFormNum == X86Local::MRM3r) ||
- (MemFormNum == X86Local::MRM4m && RegFormNum == X86Local::MRM4r) ||
- (MemFormNum == X86Local::MRM5m && RegFormNum == X86Local::MRM5r) ||
- (MemFormNum == X86Local::MRM6m && RegFormNum == X86Local::MRM6r) ||
- (MemFormNum == X86Local::MRM7m && RegFormNum == X86Local::MRM7r) ||
- (MemFormNum == X86Local::MRMXm && RegFormNum == X86Local::MRMXr) ||
- (MemFormNum == X86Local::MRMXmCC && RegFormNum == X86Local::MRMXrCC) ||
- (MemFormNum == X86Local::MRMDestMem &&
- RegFormNum == X86Local::MRMDestReg) ||
- (MemFormNum == X86Local::MRMSrcMem &&
- RegFormNum == X86Local::MRMSrcReg) ||
- (MemFormNum == X86Local::MRMSrcMem4VOp3 &&
- RegFormNum == X86Local::MRMSrcReg4VOp3) ||
- (MemFormNum == X86Local::MRMSrcMemOp4 &&
- RegFormNum == X86Local::MRMSrcRegOp4) ||
- (MemFormNum == X86Local::MRMSrcMemCC &&
- RegFormNum == X86Local::MRMSrcRegCC))
+ bool areOppositeForms(unsigned RegForm, unsigned MemForm) {
+ if ((MemForm == X86Local::MRM0m && RegForm == X86Local::MRM0r) ||
+ (MemForm == X86Local::MRM1m && RegForm == X86Local::MRM1r) ||
+ (MemForm == X86Local::MRM2m && RegForm == X86Local::MRM2r) ||
+ (MemForm == X86Local::MRM3m && RegForm == X86Local::MRM3r) ||
+ (MemForm == X86Local::MRM4m && RegForm == X86Local::MRM4r) ||
+ (MemForm == X86Local::MRM5m && RegForm == X86Local::MRM5r) ||
+ (MemForm == X86Local::MRM6m && RegForm == X86Local::MRM6r) ||
+ (MemForm == X86Local::MRM7m && RegForm == X86Local::MRM7r) ||
+ (MemForm == X86Local::MRMXm && RegForm == X86Local::MRMXr) ||
+ (MemForm == X86Local::MRMXmCC && RegForm == X86Local::MRMXrCC) ||
+ (MemForm == X86Local::MRMDestMem && RegForm == X86Local::MRMDestReg) ||
+ (MemForm == X86Local::MRMSrcMem && RegForm == X86Local::MRMSrcReg) ||
+ (MemForm == X86Local::MRMSrcMem4VOp3 &&
+ RegForm == X86Local::MRMSrcReg4VOp3) ||
+ (MemForm == X86Local::MRMSrcMemOp4 &&
+ RegForm == X86Local::MRMSrcRegOp4) ||
+ (MemForm == X86Local::MRMSrcMemCC && RegForm == X86Local::MRMSrcRegCC))
return true;
return false;
@@ -535,7 +471,10 @@ void X86FoldTablesEmitter::updateTables(const CodeGenInstruction *RegInstr,
for (unsigned i = RegOutSize, e = RegInstr->Operands.size(); i < e; i++) {
Record *RegOpRec = RegInstr->Operands[i].Rec;
Record *MemOpRec = MemInstr->Operands[i].Rec;
- if (isRegisterOperand(RegOpRec) && isMemoryOperand(MemOpRec)) {
+ // PointerLikeRegClass: For instructions like TAILJMPr, TAILJMPr64, TAILJMPr64_REX
+ if ((isRegisterOperand(RegOpRec) ||
+ RegOpRec->isSubClassOf("PointerLikeRegClass")) &&
+ isMemoryOperand(MemOpRec)) {
switch (i) {
case 0:
addEntryWithFlags(Table0, RegInstr, MemInstr, S, 0);
@@ -583,10 +522,9 @@ void X86FoldTablesEmitter::run(formatted_raw_ostream &OS) {
Target.getInstructionsByEnumValue();
for (const CodeGenInstruction *Inst : NumberedInstructions) {
- if (!Inst->TheDef->getNameInit() || !Inst->TheDef->isSubClassOf("X86Inst"))
- continue;
-
const Record *Rec = Inst->TheDef;
+ if (!Rec->isSubClassOf("X86Inst") || Rec->getValueAsBit("isAsmParserOnly"))
+ continue;
// - Do not proceed if the instruction is marked as notMemoryFoldable.
// - Instructions including RST register class operands are not relevant
@@ -611,6 +549,8 @@ void X86FoldTablesEmitter::run(formatted_raw_ostream &OS) {
}
}
+ Record *AsmWriter = Target.getAsmWriter();
+ unsigned Variant = AsmWriter->getValueAsInt("Variant");
// For each memory form instruction, try to find its register form
// instruction.
for (const CodeGenInstruction *MemInst : MemInsts) {
@@ -626,7 +566,7 @@ void X86FoldTablesEmitter::run(formatted_raw_ostream &OS) {
// opcode.
std::vector<const CodeGenInstruction *> &OpcRegInsts = RegInstsIt->second;
- auto Match = find_if(OpcRegInsts, IsMatch(MemInst, Records));
+ auto Match = find_if(OpcRegInsts, IsMatch(MemInst, Variant));
if (Match != OpcRegInsts.end()) {
const CodeGenInstruction *RegInst = *Match;
// If the matched instruction has it's "FoldGenRegForm" set, map the
diff --git a/llvm/utils/TableGen/X86MnemonicTables.cpp b/llvm/utils/TableGen/X86MnemonicTables.cpp
new file mode 100644
index 000000000000..f405e051e355
--- /dev/null
+++ b/llvm/utils/TableGen/X86MnemonicTables.cpp
@@ -0,0 +1,94 @@
+//==- X86MnemonicTables.cpp - Generate mnemonic extraction tables. -*- C++ -*-//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend is responsible for emitting tables that group
+// instructions by their mnemonic name wrt AsmWriter Variant (e.g. isADD, etc).
+//
+//===----------------------------------------------------------------------===//
+
+#include "CodeGenInstruction.h"
+#include "CodeGenTarget.h"
+#include "X86RecognizableInstr.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/TableGenBackend.h"
+
+using namespace llvm;
+
+namespace {
+
+class X86MnemonicTablesEmitter {
+ CodeGenTarget Target;
+
+public:
+ X86MnemonicTablesEmitter(RecordKeeper &R) : Target(R) {}
+
+ // Output X86 mnemonic tables.
+ void run(raw_ostream &OS);
+};
+
+void X86MnemonicTablesEmitter::run(raw_ostream &OS) {
+ emitSourceFileHeader("X86 Mnemonic tables", OS);
+ OS << "namespace llvm {\nnamespace X86 {\n\n";
+ Record *AsmWriter = Target.getAsmWriter();
+ unsigned Variant = AsmWriter->getValueAsInt("Variant");
+
+ // Hold all instructions grouped by mnemonic
+ StringMap<SmallVector<const CodeGenInstruction *, 0>> MnemonicToCGInstrMap;
+
+ ArrayRef<const CodeGenInstruction *> NumberedInstructions =
+ Target.getInstructionsByEnumValue();
+ for (const CodeGenInstruction *I : NumberedInstructions) {
+ const Record *Def = I->TheDef;
+ // Filter non-X86 instructions.
+ if (!Def->isSubClassOf("X86Inst"))
+ continue;
+ X86Disassembler::RecognizableInstrBase RI(*I);
+ if (!RI.shouldBeEmitted())
+ continue;
+ if ( // Non-parsable instruction defs contain prefix as part of AsmString
+ Def->getValueAsString("AsmVariantName") == "NonParsable" ||
+ // Skip prefix byte
+ RI.Form == X86Local::PrefixByte)
+ continue;
+ std::string Mnemonic = X86Disassembler::getMnemonic(I, Variant);
+ MnemonicToCGInstrMap[Mnemonic].push_back(I);
+ }
+
+ OS << "#ifdef GET_X86_MNEMONIC_TABLES_H\n";
+ OS << "#undef GET_X86_MNEMONIC_TABLES_H\n\n";
+ for (StringRef Mnemonic : MnemonicToCGInstrMap.keys())
+ OS << "bool is" << Mnemonic << "(unsigned Opcode);\n";
+ OS << "#endif // GET_X86_MNEMONIC_TABLES_H\n\n";
+
+ OS << "#ifdef GET_X86_MNEMONIC_TABLES_CPP\n";
+ OS << "#undef GET_X86_MNEMONIC_TABLES_CPP\n\n";
+ for (StringRef Mnemonic : MnemonicToCGInstrMap.keys()) {
+ OS << "bool is" << Mnemonic << "(unsigned Opcode) {\n";
+ auto Mnemonics = MnemonicToCGInstrMap[Mnemonic];
+ if (Mnemonics.size() == 1) {
+ const CodeGenInstruction *CGI = *Mnemonics.begin();
+ OS << "\treturn Opcode == " << CGI->TheDef->getName() << ";\n}\n\n";
+ } else {
+ OS << "\tswitch (Opcode) {\n";
+ for (const CodeGenInstruction *CGI : Mnemonics) {
+ OS << "\tcase " << CGI->TheDef->getName() << ":\n";
+ }
+ OS << "\t\treturn true;\n\t}\n\treturn false;\n}\n\n";
+ }
+ }
+ OS << "#endif // GET_X86_MNEMONIC_TABLES_CPP\n\n";
+ OS << "} // end namespace X86\n} // end namespace llvm";
+}
+
+} // namespace
+
+namespace llvm {
+void EmitX86MnemonicTables(RecordKeeper &RK, raw_ostream &OS) {
+ X86MnemonicTablesEmitter(RK).run(OS);
+}
+} // namespace llvm
diff --git a/llvm/utils/TableGen/X86RecognizableInstr.cpp b/llvm/utils/TableGen/X86RecognizableInstr.cpp
index 4023d8f57318..9afde66fe6f3 100644
--- a/llvm/utils/TableGen/X86RecognizableInstr.cpp
+++ b/llvm/utils/TableGen/X86RecognizableInstr.cpp
@@ -24,6 +24,51 @@
using namespace llvm;
using namespace X86Disassembler;
+std::string X86Disassembler::getMnemonic(const CodeGenInstruction *I, unsigned Variant) {
+ std::string AsmString = I->FlattenAsmStringVariants(I->AsmString, Variant);
+ StringRef Mnemonic(AsmString);
+ // Extract a mnemonic assuming it's separated by \t
+ Mnemonic = Mnemonic.take_until([](char C) { return C == '\t'; });
+
+ // Special case: CMOVCC, JCC, SETCC have "${cond}" in mnemonic.
+ // Replace it with "CC" in-place.
+ size_t CondPos = Mnemonic.find("${cond}");
+ if (CondPos != StringRef::npos)
+ Mnemonic = AsmString.replace(CondPos, StringRef::npos, "CC");
+ return Mnemonic.upper();
+}
+
+bool X86Disassembler::isRegisterOperand(const Record *Rec) {
+ return Rec->isSubClassOf("RegisterClass") ||
+ Rec->isSubClassOf("RegisterOperand");
+}
+
+bool X86Disassembler::isMemoryOperand(const Record *Rec) {
+ return Rec->isSubClassOf("Operand") &&
+ Rec->getValueAsString("OperandType") == "OPERAND_MEMORY";
+}
+
+bool X86Disassembler::isImmediateOperand(const Record *Rec) {
+ return Rec->isSubClassOf("Operand") &&
+ Rec->getValueAsString("OperandType") == "OPERAND_IMMEDIATE";
+}
+
+unsigned X86Disassembler::getRegOperandSize(const Record *RegRec) {
+ if (RegRec->isSubClassOf("RegisterClass"))
+ return RegRec->getValueAsInt("Alignment");
+ if (RegRec->isSubClassOf("RegisterOperand"))
+ return RegRec->getValueAsDef("RegClass")->getValueAsInt("Alignment");
+
+ llvm_unreachable("Register operand's size not known!");
+}
+
+unsigned X86Disassembler::getMemOperandSize(const Record *MemRec) {
+ if (MemRec->isSubClassOf("X86MemOperand"))
+ return MemRec->getValueAsInt("Size");
+
+ llvm_unreachable("Memory operand's size not known!");
+}
+
/// byteFromBitsInit - Extracts a value at most 8 bits in width from a BitsInit.
/// Useful for switch statements and the like.
///
@@ -61,55 +106,49 @@ static uint8_t byteFromRec(const Record* rec, StringRef name) {
return byteFromBitsInit(*bits);
}
-RecognizableInstr::RecognizableInstr(DisassemblerTables &tables,
- const CodeGenInstruction &insn,
- InstrUID uid) {
- UID = uid;
-
- Rec = insn.TheDef;
- Name = std::string(Rec->getName());
- Spec = &tables.specForUID(UID);
-
- if (!Rec->isSubClassOf("X86Inst")) {
- ShouldBeEmitted = false;
- return;
- }
-
+RecognizableInstrBase::RecognizableInstrBase(const CodeGenInstruction &insn) {
+ const Record *Rec = insn.TheDef;
+ assert(Rec->isSubClassOf("X86Inst") && "Not a X86 Instruction");
OpPrefix = byteFromRec(Rec, "OpPrefixBits");
- OpMap = byteFromRec(Rec, "OpMapBits");
- Opcode = byteFromRec(Rec, "Opcode");
- Form = byteFromRec(Rec, "FormBits");
+ OpMap = byteFromRec(Rec, "OpMapBits");
+ Opcode = byteFromRec(Rec, "Opcode");
+ Form = byteFromRec(Rec, "FormBits");
Encoding = byteFromRec(Rec, "OpEncBits");
-
- OpSize = byteFromRec(Rec, "OpSizeBits");
- AdSize = byteFromRec(Rec, "AdSizeBits");
- HasREX_WPrefix = Rec->getValueAsBit("hasREX_WPrefix");
- HasVEX_4V = Rec->getValueAsBit("hasVEX_4V");
- HasVEX_W = Rec->getValueAsBit("HasVEX_W");
- IgnoresVEX_W = Rec->getValueAsBit("IgnoresVEX_W");
- IgnoresVEX_L = Rec->getValueAsBit("ignoresVEX_L");
- HasEVEX_L2Prefix = Rec->getValueAsBit("hasEVEX_L2");
- HasEVEX_K = Rec->getValueAsBit("hasEVEX_K");
- HasEVEX_KZ = Rec->getValueAsBit("hasEVEX_Z");
- HasEVEX_B = Rec->getValueAsBit("hasEVEX_B");
- IsCodeGenOnly = Rec->getValueAsBit("isCodeGenOnly");
- ForceDisassemble = Rec->getValueAsBit("ForceDisassemble");
- CD8_Scale = byteFromRec(Rec, "CD8_Scale");
-
- Name = std::string(Rec->getName());
-
- Operands = &insn.Operands.OperandList;
-
- HasVEX_LPrefix = Rec->getValueAsBit("hasVEX_L");
+ OpSize = byteFromRec(Rec, "OpSizeBits");
+ AdSize = byteFromRec(Rec, "AdSizeBits");
+ HasREX_W = Rec->getValueAsBit("hasREX_W");
+ HasVEX_4V = Rec->getValueAsBit("hasVEX_4V");
+ HasVEX_W = Rec->getValueAsBit("HasVEX_W");
+ IgnoresVEX_W = Rec->getValueAsBit("IgnoresVEX_W");
+ IgnoresVEX_L = Rec->getValueAsBit("ignoresVEX_L");
+ HasEVEX_L2 = Rec->getValueAsBit("hasEVEX_L2");
+ HasEVEX_K = Rec->getValueAsBit("hasEVEX_K");
+ HasEVEX_KZ = Rec->getValueAsBit("hasEVEX_Z");
+ HasEVEX_B = Rec->getValueAsBit("hasEVEX_B");
+ IsCodeGenOnly = Rec->getValueAsBit("isCodeGenOnly");
+ IsAsmParserOnly = Rec->getValueAsBit("isAsmParserOnly");
+ ForceDisassemble = Rec->getValueAsBit("ForceDisassemble");
+ CD8_Scale = byteFromRec(Rec, "CD8_Scale");
+ HasVEX_L = Rec->getValueAsBit("hasVEX_L");
EncodeRC = HasEVEX_B &&
(Form == X86Local::MRMDestReg || Form == X86Local::MRMSrcReg);
+}
+
+bool RecognizableInstrBase::shouldBeEmitted() const {
+ return Form != X86Local::Pseudo && (!IsCodeGenOnly || ForceDisassemble) &&
+ !IsAsmParserOnly;
+}
+RecognizableInstr::RecognizableInstr(DisassemblerTables &tables,
+ const CodeGenInstruction &insn,
+ InstrUID uid)
+ : RecognizableInstrBase(insn), Rec(insn.TheDef), Name(Rec->getName().str()),
+ Is32Bit(false), Is64Bit(false), Operands(&insn.Operands.OperandList),
+ UID(uid), Spec(&tables.specForUID(uid)) {
// Check for 64-bit inst which does not require REX
- Is32Bit = false;
- Is64Bit = false;
// FIXME: Is there some better way to check for In64BitMode?
- std::vector<Record*> Predicates = Rec->getValueAsListOfDefs("Predicates");
+ std::vector<Record *> Predicates = Rec->getValueAsListOfDefs("Predicates");
for (unsigned i = 0, e = Predicates.size(); i != e; ++i) {
if (Predicates[i]->getName().contains("Not64Bit") ||
Predicates[i]->getName().contains("In32Bit")) {
@@ -121,29 +160,19 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables,
break;
}
}
-
- if (Form == X86Local::Pseudo || (IsCodeGenOnly && !ForceDisassemble)) {
- ShouldBeEmitted = false;
- return;
- }
-
- ShouldBeEmitted = true;
}
void RecognizableInstr::processInstr(DisassemblerTables &tables,
const CodeGenInstruction &insn,
- InstrUID uid)
-{
- // Ignore "asm parser only" instructions.
- if (insn.TheDef->getValueAsBit("isAsmParserOnly"))
+ InstrUID uid) {
+ if (!insn.TheDef->isSubClassOf("X86Inst"))
return;
-
RecognizableInstr recogInstr(tables, insn, uid);
- if (recogInstr.shouldBeEmitted()) {
- recogInstr.emitInstructionSpecifier();
- recogInstr.emitDecodePath(tables);
- }
+ if (!recogInstr.shouldBeEmitted())
+ return;
+ recogInstr.emitInstructionSpecifier();
+ recogInstr.emitDecodePath(tables);
}
#define EVEX_KB(n) (HasEVEX_KZ && HasEVEX_B ? n##_KZ_B : \
@@ -155,12 +184,12 @@ InstructionContext RecognizableInstr::insnContext() const {
InstructionContext insnContext;
if (Encoding == X86Local::EVEX) {
- if (HasVEX_LPrefix && HasEVEX_L2Prefix) {
+ if (HasVEX_L && HasEVEX_L2) {
errs() << "Don't support VEX.L if EVEX_L2 is enabled: " << Name << "\n";
llvm_unreachable("Don't support VEX.L if EVEX_L2 is enabled");
}
// VEX_L & VEX_W
- if (!EncodeRC && HasVEX_LPrefix && HasVEX_W) {
+ if (!EncodeRC && HasVEX_L && HasVEX_W) {
if (OpPrefix == X86Local::PD)
insnContext = EVEX_KB(IC_EVEX_L_W_OPSIZE);
else if (OpPrefix == X86Local::XS)
@@ -173,7 +202,7 @@ InstructionContext RecognizableInstr::insnContext() const {
errs() << "Instruction does not use a prefix: " << Name << "\n";
llvm_unreachable("Invalid prefix");
}
- } else if (!EncodeRC && HasVEX_LPrefix) {
+ } else if (!EncodeRC && HasVEX_L) {
// VEX_L
if (OpPrefix == X86Local::PD)
insnContext = EVEX_KB(IC_EVEX_L_OPSIZE);
@@ -187,7 +216,7 @@ InstructionContext RecognizableInstr::insnContext() const {
errs() << "Instruction does not use a prefix: " << Name << "\n";
llvm_unreachable("Invalid prefix");
}
- } else if (!EncodeRC && HasEVEX_L2Prefix && HasVEX_W) {
+ } else if (!EncodeRC && HasEVEX_L2 && HasVEX_W) {
// EVEX_L2 & VEX_W
if (OpPrefix == X86Local::PD)
insnContext = EVEX_KB(IC_EVEX_L2_W_OPSIZE);
@@ -201,7 +230,7 @@ InstructionContext RecognizableInstr::insnContext() const {
errs() << "Instruction does not use a prefix: " << Name << "\n";
llvm_unreachable("Invalid prefix");
}
- } else if (!EncodeRC && HasEVEX_L2Prefix) {
+ } else if (!EncodeRC && HasEVEX_L2) {
// EVEX_L2
if (OpPrefix == X86Local::PD)
insnContext = EVEX_KB(IC_EVEX_L2_OPSIZE);
@@ -246,7 +275,7 @@ InstructionContext RecognizableInstr::insnContext() const {
}
/// eof EVEX
} else if (Encoding == X86Local::VEX || Encoding == X86Local::XOP) {
- if (HasVEX_LPrefix && HasVEX_W) {
+ if (HasVEX_L && HasVEX_W) {
if (OpPrefix == X86Local::PD)
insnContext = IC_VEX_L_W_OPSIZE;
else if (OpPrefix == X86Local::XS)
@@ -259,20 +288,15 @@ InstructionContext RecognizableInstr::insnContext() const {
errs() << "Instruction does not use a prefix: " << Name << "\n";
llvm_unreachable("Invalid prefix");
}
- } else if (OpPrefix == X86Local::PD && HasVEX_LPrefix)
+ } else if (OpPrefix == X86Local::PD && HasVEX_L)
insnContext = IC_VEX_L_OPSIZE;
else if (OpPrefix == X86Local::PD && HasVEX_W)
insnContext = IC_VEX_W_OPSIZE;
- else if (OpPrefix == X86Local::PD && Is64Bit &&
- AdSize == X86Local::AdSize32)
- insnContext = IC_64BIT_VEX_OPSIZE_ADSIZE;
- else if (OpPrefix == X86Local::PD && Is64Bit)
- insnContext = IC_64BIT_VEX_OPSIZE;
else if (OpPrefix == X86Local::PD)
insnContext = IC_VEX_OPSIZE;
- else if (HasVEX_LPrefix && OpPrefix == X86Local::XS)
+ else if (HasVEX_L && OpPrefix == X86Local::XS)
insnContext = IC_VEX_L_XS;
- else if (HasVEX_LPrefix && OpPrefix == X86Local::XD)
+ else if (HasVEX_L && OpPrefix == X86Local::XD)
insnContext = IC_VEX_L_XD;
else if (HasVEX_W && OpPrefix == X86Local::XS)
insnContext = IC_VEX_W_XS;
@@ -280,7 +304,7 @@ InstructionContext RecognizableInstr::insnContext() const {
insnContext = IC_VEX_W_XD;
else if (HasVEX_W && OpPrefix == X86Local::PS)
insnContext = IC_VEX_W;
- else if (HasVEX_LPrefix && OpPrefix == X86Local::PS)
+ else if (HasVEX_L && OpPrefix == X86Local::PS)
insnContext = IC_VEX_L;
else if (OpPrefix == X86Local::XD)
insnContext = IC_VEX_XD;
@@ -292,10 +316,10 @@ InstructionContext RecognizableInstr::insnContext() const {
errs() << "Instruction does not use a prefix: " << Name << "\n";
llvm_unreachable("Invalid prefix");
}
- } else if (Is64Bit || HasREX_WPrefix || AdSize == X86Local::AdSize64) {
- if (HasREX_WPrefix && (OpSize == X86Local::OpSize16 || OpPrefix == X86Local::PD))
+ } else if (Is64Bit || HasREX_W || AdSize == X86Local::AdSize64) {
+ if (HasREX_W && (OpSize == X86Local::OpSize16 || OpPrefix == X86Local::PD))
insnContext = IC_64BIT_REXW_OPSIZE;
- else if (HasREX_WPrefix && AdSize == X86Local::AdSize32)
+ else if (HasREX_W && AdSize == X86Local::AdSize32)
insnContext = IC_64BIT_REXW_ADSIZE;
else if (OpSize == X86Local::OpSize16 && OpPrefix == X86Local::XD)
insnContext = IC_64BIT_XD_OPSIZE;
@@ -309,15 +333,15 @@ InstructionContext RecognizableInstr::insnContext() const {
insnContext = IC_64BIT_OPSIZE;
else if (AdSize == X86Local::AdSize32)
insnContext = IC_64BIT_ADSIZE;
- else if (HasREX_WPrefix && OpPrefix == X86Local::XS)
+ else if (HasREX_W && OpPrefix == X86Local::XS)
insnContext = IC_64BIT_REXW_XS;
- else if (HasREX_WPrefix && OpPrefix == X86Local::XD)
+ else if (HasREX_W && OpPrefix == X86Local::XD)
insnContext = IC_64BIT_REXW_XD;
else if (OpPrefix == X86Local::XD)
insnContext = IC_64BIT_XD;
else if (OpPrefix == X86Local::XS)
insnContext = IC_64BIT_XS;
- else if (HasREX_WPrefix)
+ else if (HasREX_W)
insnContext = IC_64BIT_REXW;
else
insnContext = IC_64BIT;
@@ -392,7 +416,7 @@ void RecognizableInstr::handleOperand(bool optional, unsigned &operandIndex,
adjustOperandEncoding(encoding);
Spec->operands[operandIndex].encoding = encoding;
Spec->operands[operandIndex].type =
- typeFromString(std::string(typeName), HasREX_WPrefix, OpSize);
+ typeFromString(std::string(typeName), HasREX_W, OpSize);
++operandIndex;
++physicalOperandIndex;
@@ -835,13 +859,13 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const {
if (Form == X86Local::AddRegFrm || Form == X86Local::MRMSrcRegCC ||
Form == X86Local::MRMSrcMemCC || Form == X86Local::MRMXrCC ||
Form == X86Local::MRMXmCC || Form == X86Local::AddCCFrm) {
- unsigned Count = Form == X86Local::AddRegFrm ? 8 : 16;
+ uint8_t Count = Form == X86Local::AddRegFrm ? 8 : 16;
assert(((opcodeToSet % Count) == 0) && "ADDREG_FRM opcode not aligned");
uint8_t currentOpcode;
- for (currentOpcode = opcodeToSet; currentOpcode < opcodeToSet + Count;
- ++currentOpcode)
+ for (currentOpcode = opcodeToSet;
+ currentOpcode < (uint8_t)(opcodeToSet + Count); ++currentOpcode)
tables.setTableFields(*opcodeType, insnContext(), currentOpcode, *filter,
UID, Is32Bit, OpPrefix == 0,
IgnoresVEX_L || EncodeRC,
@@ -857,9 +881,9 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const {
#define TYPE(str, type) if (s == str) return type;
OperandType RecognizableInstr::typeFromString(const std::string &s,
- bool hasREX_WPrefix,
+ bool hasREX_W,
uint8_t OpSize) {
- if(hasREX_WPrefix) {
+ if(hasREX_W) {
// For instructions with a REX_W prefix, a declared 32-bit register encoding
// is special.
TYPE("GR32", TYPE_R32)
diff --git a/llvm/utils/TableGen/X86RecognizableInstr.h b/llvm/utils/TableGen/X86RecognizableInstr.h
index 8f557d9ee5f5..67aba26a142b 100644
--- a/llvm/utils/TableGen/X86RecognizableInstr.h
+++ b/llvm/utils/TableGen/X86RecognizableInstr.h
@@ -158,16 +158,8 @@ namespace X86Disassembler {
class DisassemblerTables;
-/// RecognizableInstr - Encapsulates all information required to decode a single
-/// instruction, as extracted from the LLVM instruction tables. Has methods
-/// to interpret the information available in the LLVM tables, and to emit the
-/// instruction into DisassemblerTables.
-class RecognizableInstr {
-private:
- /// The opcode of the instruction, as used in an MCInst
- InstrUID UID;
- /// The record from the .td files corresponding to this instruction
- const Record* Rec;
+/// Extract common fields of a single X86 instruction from a CodeGenInstruction
+struct RecognizableInstrBase {
/// The OpPrefix field from the record
uint8_t OpPrefix;
/// The OpMap field from the record
@@ -183,20 +175,20 @@ private:
uint8_t OpSize;
/// The AdSize field from the record
uint8_t AdSize;
- /// The hasREX_WPrefix field from the record
- bool HasREX_WPrefix;
+ /// The hasREX_W field from the record
+ bool HasREX_W;
/// The hasVEX_4V field from the record
bool HasVEX_4V;
/// The HasVEX_WPrefix field from the record
bool HasVEX_W;
/// The IgnoresVEX_W field from the record
bool IgnoresVEX_W;
- /// Inferred from the operands; indicates whether the L bit in the VEX prefix is set
- bool HasVEX_LPrefix;
+ /// The hasVEX_L field from the record
+ bool HasVEX_L;
/// The ignoreVEX_L field from the record
bool IgnoresVEX_L;
/// The hasEVEX_L2Prefix field from the record
- bool HasEVEX_L2Prefix;
+ bool HasEVEX_L2;
/// The hasEVEX_K field from the record
bool HasEVEX_K;
/// The hasEVEX_KZ field from the record
@@ -207,27 +199,39 @@ private:
bool EncodeRC;
/// The isCodeGenOnly field from the record
bool IsCodeGenOnly;
+ /// The isAsmParserOnly field from the record
+ bool IsAsmParserOnly;
/// The ForceDisassemble field from the record
bool ForceDisassemble;
// The CD8_Scale field from the record
uint8_t CD8_Scale;
- // Whether the instruction has the predicate "In64BitMode"
- bool Is64Bit;
- // Whether the instruction has the predicate "In32BitMode"
- bool Is32Bit;
+ /// \param insn The CodeGenInstruction to extract information from.
+ RecognizableInstrBase(const CodeGenInstruction &insn);
+ /// \returns true if this instruction should be emitted
+ bool shouldBeEmitted() const;
+};
+/// RecognizableInstr - Encapsulates all information required to decode a single
+/// instruction, as extracted from the LLVM instruction tables. Has methods
+/// to interpret the information available in the LLVM tables, and to emit the
+/// instruction into DisassemblerTables.
+class RecognizableInstr : public RecognizableInstrBase {
+private:
+ /// The record from the .td files corresponding to this instruction
+ const Record* Rec;
/// The instruction name as listed in the tables
std::string Name;
-
- /// Indicates whether the instruction should be emitted into the decode
- /// tables; regardless, it will be emitted into the instruction info table
- bool ShouldBeEmitted;
-
+ // Whether the instruction has the predicate "In32BitMode"
+ bool Is32Bit;
+ // Whether the instruction has the predicate "In64BitMode"
+ bool Is64Bit;
/// The operands of the instruction, as listed in the CodeGenInstruction.
/// They are not one-to-one with operands listed in the MCInst; for example,
/// memory operands expand to 5 operands in the MCInst
const std::vector<CGIOperandList::OperandInfo>* Operands;
+ /// The opcode of the instruction, as used in an MCInst
+ InstrUID UID;
/// The description of the instruction that is emitted into the instruction
/// info table
InstructionSpecifier* Spec;
@@ -243,7 +247,7 @@ private:
///
/// @param s - The string, as extracted by calling Rec->getName()
/// on a CodeGenInstruction::OperandInfo.
- /// @param hasREX_WPrefix - Indicates whether the instruction has a REX.W
+ /// @param hasREX_W - Indicates whether the instruction has a REX.W
/// prefix. If it does, 32-bit register operands stay
/// 32-bit regardless of the operand size.
/// @param OpSize Indicates the operand size of the instruction.
@@ -251,7 +255,7 @@ private:
/// register sizes keep their size.
/// @return - The operand's type.
static OperandType typeFromString(const std::string& s,
- bool hasREX_WPrefix, uint8_t OpSize);
+ bool hasREX_W, uint8_t OpSize);
/// immediateEncodingFromString - Translates an immediate encoding from the
/// string provided in the LLVM tables to an OperandEncoding for use in
@@ -314,19 +318,6 @@ private:
(const std::string&,
uint8_t OpSize));
- /// shouldBeEmitted - Returns the shouldBeEmitted field. Although filter()
- /// filters out many instructions, at various points in decoding we
- /// determine that the instruction should not actually be decodable. In
- /// particular, MMX MOV instructions aren't emitted, but they're only
- /// identified during operand parsing.
- ///
- /// @return - true if at this point we believe the instruction should be
- /// emitted; false if not. This will return false if filter() returns false
- /// once emitInstructionSpecifier() has been called.
- bool shouldBeEmitted() const {
- return ShouldBeEmitted;
- }
-
/// emitInstructionSpecifier - Loads the instruction specifier for the current
/// instruction into a DisassemblerTables.
///
@@ -339,6 +330,7 @@ private:
/// decode information for the current instruction.
void emitDecodePath(DisassemblerTables &tables) const;
+public:
/// Constructor - Initializes a RecognizableInstr with the appropriate fields
/// from a CodeGenInstruction.
///
@@ -348,7 +340,6 @@ private:
RecognizableInstr(DisassemblerTables &tables,
const CodeGenInstruction &insn,
InstrUID uid);
-public:
/// processInstr - Accepts a CodeGenInstruction and loads decode information
/// for it into a DisassemblerTables if appropriate.
///
@@ -362,6 +353,12 @@ public:
InstrUID uid);
};
+std::string getMnemonic(const CodeGenInstruction *I, unsigned Variant);
+bool isRegisterOperand(const Record *Rec);
+bool isMemoryOperand(const Record *Rec);
+bool isImmediateOperand(const Record *Rec);
+unsigned getRegOperandSize(const Record *RegRec);
+unsigned getMemOperandSize(const Record *MemRec);
} // namespace X86Disassembler
} // namespace llvm